wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Files at this revision

API Documentation at this revision

Comitter:
wolfSSL
Date:
Thu Jun 04 23:57:22 2020 +0000
Parent:
15:117db924cf7c
Child:
17:a5f916481144
Commit message:
wolfSSL 4.4.0

Changed in this revision

src/bio.c Show annotated file Show diff for this revision Revisions of this file
src/crl.c Show annotated file Show diff for this revision Revisions of this file
src/internal.c Show annotated file Show diff for this revision Revisions of this file
src/keys.c Show annotated file Show diff for this revision Revisions of this file
src/ocsp.c Show annotated file Show diff for this revision Revisions of this file
src/sniffer.c Show annotated file Show diff for this revision Revisions of this file
src/ssl.c Show annotated file Show diff for this revision Revisions of this file
src/tls.c Show annotated file Show diff for this revision Revisions of this file
src/tls13.c Show annotated file Show diff for this revision Revisions of this file
src/wolfio.c Show annotated file Show diff for this revision Revisions of this file
user_settings.h Show diff for this revision Revisions of this file
wolfcrypt/src/aes.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/arc4.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/asm.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/asn.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/blake2b.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/blake2s.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/camellia.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/chacha.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/chacha20_poly1305.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/cmac.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/coding.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/compress.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/cpuid.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/cryptocb.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/cryptodev.c Show diff for this revision Revisions of this file
wolfcrypt/src/curve25519.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/curve448.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/des3.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/dh.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/dsa.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ecc.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ed25519.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ed448.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/error.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/evp.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/fe_448.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/fe_low_mem.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/fe_operations.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/fips.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/fips_test.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ge_448.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ge_low_mem.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ge_operations.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/hash.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/hc128.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/hmac.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/idea.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/integer.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/logging.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/md2.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/md4.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/md5.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/memory.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/misc.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/pkcs12.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/pkcs7.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/poly1305.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/pwdbased.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/rabbit.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/random.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/ripemd.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/rsa.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sha.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sha256.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sha3.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sha512.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/signature.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_arm32.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_arm64.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_armthumb.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_c32.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_c64.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_cortexm.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_dsp32.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_int.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/sp_x86_64.c Show diff for this revision Revisions of this file
wolfcrypt/src/srp.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/tfm.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/wc_dsp.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/wc_encrypt.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/wc_pkcs11.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/wc_port.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/wolfevent.c Show annotated file Show diff for this revision Revisions of this file
wolfcrypt/src/wolfmath.c Show annotated file Show diff for this revision Revisions of this file
wolfssl/callbacks.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/certs_test.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/crl.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/error-ssl.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/include.am Show annotated file Show diff for this revision Revisions of this file
wolfssl/internal.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/ocsp.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/aes.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/asn1.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/asn1t.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/bio.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/bn.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/buffer.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/conf.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/crypto.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/des.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/dh.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/dsa.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ec.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ec25519.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ec448.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ecdh.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ecdsa.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ed25519.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ed448.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/engine.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/err.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/evp.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/hmac.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/include.am Show diff for this revision Revisions of this file
wolfssl/openssl/md4.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/md5.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/obj_mac.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/objects.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ocsp.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/opensslv.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ossl_typ.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/pem.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/pkcs12.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/pkcs7.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/rand.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/rc4.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ripemd.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/rsa.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/sha.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/sha3.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/ssl.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/stack.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/tls1.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/x509.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/x509_vfy.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/openssl/x509v3.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/options.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/options.h.in Show annotated file Show diff for this revision Revisions of this file
wolfssl/sniffer.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/sniffer_error.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/sniffer_error.rc Show annotated file Show diff for this revision Revisions of this file
wolfssl/ssl.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/test.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/version.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/aes.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/arc4.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/asn.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/asn_public.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/blake2-impl.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/blake2-int.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/blake2.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/camellia.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/chacha.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/chacha20_poly1305.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/cmac.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/coding.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/compress.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/cpuid.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/cryptocb.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/cryptodev.h Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/curve25519.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/curve448.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/des3.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/dh.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/dsa.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/ecc.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/ed25519.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/ed448.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/error-crypt.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/fe_448.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/fe_operations.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/fips_test.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/ge_448.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/ge_operations.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/hash.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/hc128.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/hmac.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/idea.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/integer.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/logging.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/md2.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/md4.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/md5.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/mem_track.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/memory.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/misc.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/mpi_class.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/mpi_superclass.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/pkcs11.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/pkcs12.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/pkcs7.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/poly1305.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/pwdbased.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/rabbit.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/random.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/ripemd.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/rsa.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/selftest.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/settings.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/sha.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/sha256.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/sha3.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/sha512.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/signature.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/sp.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/sp_int.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/srp.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/tfm.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/types.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/visibility.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/wc_encrypt.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/wc_pkcs11.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/wc_port.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/wolfevent.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfcrypt/wolfmath.h Show annotated file Show diff for this revision Revisions of this file
wolfssl/wolfio.h Show annotated file Show diff for this revision Revisions of this file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/bio.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,1706 @@
+/* bio.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if !defined(WOLFSSL_BIO_INCLUDED)
+    #ifndef WOLFSSL_IGNORE_FILE_WARN
+        #warning bio.c does not need to be compiled separately from ssl.c
+    #endif
+#else
+
+
+/* Helper function to decode a base64 input
+ *
+ * returns size of resulting buffer on success
+ */
+static int wolfSSL_BIO_BASE64_read(WOLFSSL_BIO* bio, void* buf, int len)
+{
+    word32 frmtSz = len;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_BASE64_read");
+
+    if (Base64_Decode((const byte*)buf, (word32)len, (byte*)buf, &frmtSz) !=0) {
+        WOLFSSL_MSG("Err doing base64 decode");
+        return SSL_FATAL_ERROR;
+    }
+
+    (void)bio;
+    return (int)frmtSz;
+}
+
+
+/* Helper function to read from WOLFSSL_BIO_BIO type
+ *
+ * returns amount in bytes read on success
+ */
+static int wolfSSL_BIO_BIO_read(WOLFSSL_BIO* bio, void* buf, int len)
+{
+    int   sz;
+    char* pt;
+
+    sz = wolfSSL_BIO_nread(bio, &pt, len);
+
+    if (sz > 0) {
+        XMEMCPY(buf, pt, sz);
+    }
+
+    return sz;
+}
+
+
+/* Handles reading from a memory type BIO and advancing the state.
+ *
+ * bio  WOLFSSL_BIO to read from
+ * buf  buffer to put data from bio in
+ * len  amount of data to be read
+ *
+ * returns size read on success
+ */
+static int wolfSSL_BIO_MEMORY_read(WOLFSSL_BIO* bio, void* buf, int len)
+{
+    int   sz;
+    WOLFSSL_ENTER("wolfSSL_BIO_MEMORY_read");
+
+    sz = wolfSSL_BIO_pending(bio);
+    if (sz > 0) {
+        const unsigned char* pt = NULL;
+        int memSz;
+
+        if (sz > len) {
+            sz = len;
+        }
+        memSz = wolfSSL_BIO_get_mem_data(bio, (void*)&pt);
+        if (memSz >= sz && pt != NULL) {
+            byte* tmp;
+
+            XMEMCPY(buf, (void*)pt, sz);
+            if (memSz - sz > 0) {
+                tmp = (byte*)XMALLOC(memSz-sz, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                if (tmp == NULL) {
+                    WOLFSSL_MSG("Memory error");
+                    return WOLFSSL_BIO_ERROR;
+                }
+                XMEMCPY(tmp, (void*)(pt + sz), memSz - sz);
+
+                /* reset internal bio->mem */
+                XFREE(bio->ptr, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                bio->ptr    = tmp;
+                bio->num = memSz-sz;
+                if (bio->mem_buf != NULL) {
+                    bio->mem_buf->data = (char*)bio->ptr;
+                    bio->mem_buf->length = bio->num;
+                }
+            }
+            bio->wrSz  -= sz;
+        }
+        else {
+            WOLFSSL_MSG("Issue with getting bio mem pointer");
+            return 0;
+        }
+    }
+    else {
+        return WOLFSSL_BIO_ERROR;
+    }
+
+    return sz;
+}
+
+#ifndef WOLFCRYPT_ONLY
+/* Helper function to read from WOLFSSL_BIO_SSL type
+ *
+ * returns the number of bytes read on success
+ */
+static int wolfSSL_BIO_SSL_read(WOLFSSL_BIO* bio, void* buf,
+        int len, WOLFSSL_BIO* front)
+{
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_SSL_read");
+
+    /* already got eof, again is error */
+    if ((front == NULL) || front->eof)
+        return WOLFSSL_FATAL_ERROR;
+
+    bio->flags &= ~(WOLFSSL_BIO_FLAG_RETRY); /* default no retry */
+    ret = wolfSSL_read((WOLFSSL*)bio->ptr, buf, len);
+    if (ret == 0)
+        front->eof = 1;
+    else if (ret < 0) {
+        int err = wolfSSL_get_error((WOLFSSL*)bio->ptr, 0);
+        if ( !(err == SSL_ERROR_WANT_READ || err == SSL_ERROR_WANT_WRITE) ) {
+            front->eof = 1;
+        }
+        else {
+            bio->flags |= WOLFSSL_BIO_FLAG_RETRY; /* should retry */
+        }
+    }
+
+    return ret;
+}
+
+static int wolfSSL_BIO_MD_read(WOLFSSL_BIO* bio, void* buf, int sz)
+{
+    int ret = sz;
+
+    if (wolfSSL_EVP_MD_CTX_type((WOLFSSL_EVP_MD_CTX*)bio->ptr) == NID_hmac) {
+        if (wolfSSL_EVP_DigestSignUpdate((WOLFSSL_EVP_MD_CTX*)bio->ptr, buf,
+                        sz) != WOLFSSL_SUCCESS)
+        {
+            ret = WOLFSSL_FATAL_ERROR;
+        }
+    }
+    else {
+        if (wolfSSL_EVP_DigestUpdate((WOLFSSL_EVP_MD_CTX*)bio->ptr, buf, ret)
+                != WOLFSSL_SUCCESS) {
+            ret = WOLFSSL_FATAL_ERROR;
+        }
+    }
+    return ret;
+}
+#endif /* WOLFCRYPT_ONLY */
+
+
+/* Used to read data from a WOLFSSL_BIO structure
+ *
+ * bio  structure to read data from
+ * buf  buffer to hold the result
+ * len  length of buf buffer
+ *
+ * returns the number of bytes read on success
+ */
+int wolfSSL_BIO_read(WOLFSSL_BIO* bio, void* buf, int len)
+{
+    int  ret = 0;
+    WOLFSSL_BIO* front = bio;
+    int  sz  = 0;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_read");
+
+    /* info cb, abort if user returns <= 0*/
+    if (front != NULL && front->infoCb != NULL) {
+        ret = (int)front->infoCb(front, WOLFSSL_BIO_CB_READ, (const char*)buf,
+                                                                     len, 0, 1);
+        if (ret <= 0) {
+            return ret;
+        }
+    }
+
+    /* start at end of list and work backwards */
+    while ((bio != NULL) && (bio->next != NULL)) {
+        bio = bio->next;
+    }
+
+    while (bio != NULL && ret >= 0) {
+        /* check for custom read */
+        if (bio->method && bio->method->readCb) {
+            ret = bio->method->readCb(bio, (char*)buf, len);
+        }
+
+        /* formatting data */
+        if (bio->type == WOLFSSL_BIO_BASE64 && ret > 0 && sz > 0) {
+            ret = wolfSSL_BIO_BASE64_read(bio, buf, sz);
+        }
+
+        /* write BIOs */
+        if (bio && bio->type == WOLFSSL_BIO_BIO) {
+            ret = wolfSSL_BIO_BIO_read(bio, buf, len);
+        }
+
+        if (bio && bio->type == WOLFSSL_BIO_MEMORY) {
+            ret = wolfSSL_BIO_MEMORY_read(bio, buf, len);
+        }
+
+    #ifndef NO_FILESYSTEM
+        if (bio && bio->type == WOLFSSL_BIO_FILE) {
+            ret = (int)XFREAD(buf, 1, len, (XFILE)bio->ptr);
+        }
+    #endif
+
+    #ifndef WOLFCRYPT_ONLY
+        if (bio && bio->type == WOLFSSL_BIO_SSL) {
+            ret = wolfSSL_BIO_SSL_read(bio, buf, len, front);
+        }
+
+        /* data passing through BIO MD wrapper */
+        if (bio && bio->type == WOLFSSL_BIO_MD && ret > 0) {
+            ret = wolfSSL_BIO_MD_read(bio, buf, ret);
+        }
+    #endif
+
+        /* case where front of list is done */
+        if (bio == front) {
+            break; /* at front of list so be done */
+        }
+
+        if (ret > 0) {
+            sz = ret; /* adjust size for formatting */
+        }
+
+        /* previous WOLFSSL_BIO in list working towards head of list */
+        bio = bio->prev;
+    }
+
+    /* info cb, user can override return value */
+    if (front != NULL && front->infoCb != NULL) {
+        ret = (int)front->infoCb(front,
+                                 WOLFSSL_BIO_CB_READ | WOLFSSL_BIO_CB_RETURN,
+                                 (const char*)buf, len, 0, ret);
+    }
+
+    return ret;
+}
+
+
+/* Converts data into base64 output
+ *
+ * returns the resulting buffer size on success.
+ */
+static int wolfSSL_BIO_BASE64_write(WOLFSSL_BIO* bio, const void* data,
+        word32 inLen, byte* out, word32* outLen)
+{
+    byte* tmp = NULL;
+    int ret   = 0;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_BASE64_write");
+
+    if (bio == NULL || data == NULL || out == NULL || outLen == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_BASE64_ENCODE)
+    tmp = (byte*)XMALLOC(*outLen, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (tmp == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if ((bio->flags & WOLFSSL_BIO_FLAG_BASE64_NO_NL) ==
+            WOLFSSL_BIO_FLAG_BASE64_NO_NL) {
+        if (Base64_Encode_NoNl((const byte*)data, inLen,
+                tmp, outLen) < 0) {
+            ret = WOLFSSL_FATAL_ERROR;
+        }
+    }
+    else {
+        if (Base64_Encode((const byte*)data, inLen,
+                tmp, outLen) < 0) {
+            ret = WOLFSSL_FATAL_ERROR;
+        }
+    }
+
+    if (ret != WOLFSSL_FATAL_ERROR) {
+        ret = (int) inLen;
+        XMEMCPY(out, tmp, *outLen);
+
+    }
+    XFREE(tmp, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#else
+    (void)bio;
+    (void)data;
+    (void)inLen;
+    (void)out;
+    (void)outLen;
+    (void)tmp;
+    WOLFSSL_MSG("BASE64 encoding not compiled in");
+#endif
+    return ret;
+}
+
+
+#ifndef WOLFCRYPT_ONLY
+/* Helper function for writing to a WOLFSSL_BIO_SSL type
+ *
+ * returns the amount written in bytes on success
+ */
+static int wolfSSL_BIO_SSL_write(WOLFSSL_BIO* bio, const void* data,
+        int len, WOLFSSL_BIO* front)
+{
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_SSL_write");
+
+    if (bio->ptr == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    bio->flags &= ~(WOLFSSL_BIO_FLAG_RETRY); /* default no retry */
+    ret = wolfSSL_write((WOLFSSL*)bio->ptr, data, len);
+    if (ret == 0)
+        front->eof = 1;
+    else if (ret < 0) {
+        int err = wolfSSL_get_error((WOLFSSL*)bio->ptr, 0);
+        if ( !(err == SSL_ERROR_WANT_READ || err == SSL_ERROR_WANT_WRITE) ) {
+            front->eof = 1;
+        }
+        else {
+            bio->flags |= WOLFSSL_BIO_FLAG_RETRY; /* should retry */
+        }
+    }
+    return ret;
+}
+#endif /* WOLFCRYPT_ONLY */
+
+
+/* Writes to a WOLFSSL_BIO_BIO type.
+ *
+ * returns the amount written on success
+ */
+static int wolfSSL_BIO_BIO_write(WOLFSSL_BIO* bio, const void* data,
+        int len)
+{
+    int   sz;
+    char* buf;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_BIO_write");
+
+    /* adding in sanity checks for static analysis tools */
+    if (bio == NULL || data == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    sz = wolfSSL_BIO_nwrite(bio, &buf, len);
+
+    /* test space for write */
+    if (sz <= 0) {
+        WOLFSSL_MSG("No room left to write");
+        return sz;
+    }
+
+    XMEMCPY(buf, data, sz);
+
+    return sz;
+}
+
+
+/* for complete compatibility a bio memory write allocs its own memory
+ * until the application runs out ....
+ *
+ * bio  structure to hold incoming data
+ * data buffer holding the data to be written
+ * len  length of data buffer
+ *
+ * returns the amount of data written on success and WOLFSSL_FAILURE or
+ *         WOLFSSL_BIO_ERROR for failure cases.
+ */
+static int wolfSSL_BIO_MEMORY_write(WOLFSSL_BIO* bio, const void* data,
+        int len)
+{
+    int   sz;
+    const unsigned char* buf;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_MEMORY_write");
+
+    if (bio == NULL || data == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    sz = wolfSSL_BIO_pending(bio);
+    if (sz < 0) {
+        WOLFSSL_MSG("Error getting memory data");
+        return sz;
+    }
+
+    if (bio->ptr == NULL) {
+        bio->ptr = (byte*)XMALLOC(len, bio->heap, DYNAMIC_TYPE_OPENSSL);
+        if (bio->ptr == NULL) {
+            WOLFSSL_MSG("Error on malloc");
+            return WOLFSSL_FAILURE;
+        }
+        bio->num = len;
+        if (bio->mem_buf != NULL) {
+            bio->mem_buf->data = (char*)bio->ptr;
+            bio->mem_buf->length = bio->num;
+        }
+    }
+
+    /* check if will fit in current buffer size */
+    if (wolfSSL_BIO_get_mem_data(bio, (void*)&buf) < 0) {
+        return WOLFSSL_BIO_ERROR;
+    }
+    if (bio->num < sz + len) {
+        bio->ptr = (byte*)XREALLOC(bio->ptr, sz + len, bio->heap,
+            DYNAMIC_TYPE_OPENSSL);
+        if (bio->ptr == NULL) {
+            WOLFSSL_MSG("Error on realloc");
+            return WOLFSSL_FAILURE;
+        }
+        bio->num = sz + len;
+        if (bio->mem_buf != NULL) {
+            bio->mem_buf->data = (char*)bio->ptr;
+            bio->mem_buf->length = bio->num;
+        }
+    }
+
+    XMEMCPY((byte*)bio->ptr + sz, data, len);
+    bio->wrSz += len;
+
+    return len;
+}
+
+
+#ifndef WOLFCRYPT_ONLY
+/* Helper function for writing to a WOLFSSL_BIO_MD type
+ *
+ * returns the amount written in bytes on success (0)
+ */
+static int wolfSSL_BIO_MD_write(WOLFSSL_BIO* bio, const void* data, int len)
+{
+    int ret = 0;
+
+    if (bio == NULL || data == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (wolfSSL_EVP_MD_CTX_type((WOLFSSL_EVP_MD_CTX*)bio->ptr) == NID_hmac) {
+        if (wolfSSL_EVP_DigestSignUpdate((WOLFSSL_EVP_MD_CTX*)bio->ptr, data,
+                    len) != WOLFSSL_SUCCESS) {
+            ret = WOLFSSL_BIO_ERROR;
+        }
+    }
+    else {
+        if (wolfSSL_EVP_DigestUpdate((WOLFSSL_EVP_MD_CTX*)bio->ptr, data, len)
+                != WOLFSSL_SUCCESS) {
+            ret =  WOLFSSL_BIO_ERROR;
+        }
+    }
+    return ret;
+}
+#endif /* WOLFCRYPT_ONLY */
+
+
+/* Writes data to a WOLFSSL_BIO structure
+ *
+ * bio  structure to write to
+ * data holds the data to be written
+ * len  length of data buffer
+ *
+ * returns the amount written in bytes on success
+ */
+int wolfSSL_BIO_write(WOLFSSL_BIO* bio, const void* data, int len)
+{
+    int  ret = 0;
+    int  retB64 = 0;
+    WOLFSSL_BIO* front = bio;
+    void*  frmt   = NULL;
+    word32 frmtSz = 0;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_write");
+
+    /* info cb, abort if user returns <= 0*/
+    if (front != NULL && front->infoCb != NULL) {
+        ret = (int)front->infoCb(front, WOLFSSL_BIO_CB_WRITE,
+                (const char*)data, len, 0, 1);
+        if (ret <= 0) {
+            return ret;
+        }
+    }
+
+    while (bio != NULL && ret >= 0) {
+        /* check for custom write */
+        if (bio->method && bio->method->writeCb) {
+            ret = bio->method->writeCb(bio, (const char*)data, len);
+        }
+
+        /* check for formatting */
+        if (bio->type == WOLFSSL_BIO_BASE64) {
+#if defined(WOLFSSL_BASE64_ENCODE)
+            word32 sz = 0;
+
+            if (bio->flags & WOLFSSL_BIO_FLAG_BASE64_NO_NL) {
+                if (Base64_Encode_NoNl((const byte*)data, len, NULL,
+                            &sz) != LENGTH_ONLY_E) {
+                    WOLFSSL_MSG("Error with base 64 get length");
+                    ret = SSL_FATAL_ERROR;
+                }
+            }
+            else {
+                if (Base64_Encode((const byte*)data, len, NULL, &sz) !=
+                    LENGTH_ONLY_E) {
+                    WOLFSSL_MSG("Error with base 64 get length");
+                    ret = SSL_FATAL_ERROR;
+                }
+            }
+
+            if (frmt == NULL && sz > 0 && ret != SSL_FATAL_ERROR) {
+                frmt = (void*)XMALLOC(sz, front->heap,
+                        DYNAMIC_TYPE_TMP_BUFFER);
+                if (frmt == NULL) {
+                    WOLFSSL_MSG("Memory error");
+                    ret = SSL_FATAL_ERROR;
+                }
+                frmtSz = sz;
+            }
+            else if (sz > frmtSz) {
+                frmt = (void*)XREALLOC(frmt, sz, front->heap,
+                        DYNAMIC_TYPE_TMP_BUFFER);
+                if (frmt == NULL) {
+                    WOLFSSL_MSG("Memory error");
+                    ret = SSL_FATAL_ERROR;
+                }
+                /* since frmt already existed then data should point to knew
+                   formatted buffer */
+                data = frmt;
+                len  = frmtSz;
+                frmtSz = sz;
+            }
+#endif /* defined(WOLFSSL_BASE64_ENCODE) */
+
+            if (ret >= 0) {
+                /* change so that data is formatted buffer */
+                retB64 = wolfSSL_BIO_BASE64_write(bio, data, (word32)len,
+                         (byte*)frmt, &frmtSz);
+                data = frmt;
+                len  = frmtSz;
+            }
+        }
+
+        /* write bios */
+        if (bio && bio->type == WOLFSSL_BIO_BIO) {
+            ret = wolfSSL_BIO_BIO_write(bio, data, len);
+        }
+
+        if (bio && bio->type == WOLFSSL_BIO_MEMORY) {
+            ret = wolfSSL_BIO_MEMORY_write(bio, data, len);
+        }
+
+    #ifndef NO_FILESYSTEM
+        if (bio && bio->type == WOLFSSL_BIO_FILE) {
+            ret = (int)XFWRITE(data, 1, len, (XFILE)bio->ptr);
+        }
+    #endif
+
+    #ifndef WOLFCRYPT_ONLY
+        if (bio && bio->type == WOLFSSL_BIO_SSL) {
+            /* already got eof, again is error */
+            if (front->eof) {
+                ret = SSL_FATAL_ERROR;
+            }
+            else {
+                ret = wolfSSL_BIO_SSL_write(bio, data, len, front);
+            }
+        }
+
+        if (bio && bio->type == WOLFSSL_BIO_MD) {
+            if (bio->next != NULL) { /* data passing through MD BIO */
+                ret = wolfSSL_BIO_MD_write(bio, data, len);
+            }
+        }
+    #endif /* WOLFCRYPT_ONLY */
+
+        /* advance to the next bio in list */
+        bio = bio->next;
+    }
+
+    if (frmt != NULL) {
+        XFREE(frmt, front->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    /* info cb, user can override return value */
+    if (front != NULL && front->infoCb != NULL) {
+        ret = (int)front->infoCb(front,
+                                 WOLFSSL_BIO_CB_WRITE | WOLFSSL_BIO_CB_RETURN,
+                                 (const char*)data, 0, 0, ret);
+    }
+
+    if (retB64 != 0)
+        return retB64;
+    else
+        return ret;
+}
+
+
+/* Wrapper for other BIO type functions, expected to grow as OpenSSL compatibility
+ * layer grows.
+ *
+ * return info. specific to the cmd that is passed in.
+ */
+#if defined(OPENSSL_ALL) || defined(OPENSSL_EXTRA)
+long wolfSSL_BIO_ctrl(WOLFSSL_BIO *bio, int cmd, long larg, void *parg)
+{
+    long ret;
+
+    (void)larg; /* not currently used */
+
+    WOLFSSL_ENTER("wolfSSL_BIO_ctrl");
+
+    if (bio && bio->method && bio->method->ctrlCb) {
+        return bio->method->ctrlCb(bio, cmd, larg, parg);
+    }
+
+    switch(cmd) {
+        case BIO_CTRL_PENDING:
+        case BIO_CTRL_WPENDING:
+            ret = (long)wolfSSL_BIO_ctrl_pending(bio);
+            break;
+        case BIO_CTRL_INFO:
+            ret = (long)wolfSSL_BIO_get_mem_data(bio, parg);
+            break;
+        case BIO_CTRL_FLUSH:
+            ret = (long)wolfSSL_BIO_flush(bio);
+            break;
+        case BIO_CTRL_RESET:
+            ret = (long)wolfSSL_BIO_reset(bio);
+            break;
+        default:
+            WOLFSSL_MSG("CMD not yet implemented");
+            ret = WOLFSSL_FAILURE;
+            break;
+    }
+    return ret;
+}
+#endif
+
+
+/* helper function for wolfSSL_BIO_gets
+ * size till a newline is hit
+ * returns the number of bytes including the new line character
+ */
+static int wolfSSL_getLineLength(char* in, int inSz)
+{
+    int i;
+
+    for (i = 0; i < inSz; i++) {
+        if (in[i] == '\n') {
+            return i + 1; /* includes new line character */
+        }
+    }
+
+    return inSz; /* rest of buffer is all one line */
+}
+
+
+/* Gets the next line from bio. Goes until a new line character or end of
+ * buffer is reached.
+ *
+ * bio  the structure to read a new line from
+ * buf  buffer to hold the result
+ * sz   the size of "buf" buffer
+ *
+ * returns the size of the result placed in buf on success and a 0 or negative
+ *         value in an error case.
+ */
+int wolfSSL_BIO_gets(WOLFSSL_BIO* bio, char* buf, int sz)
+{
+    int ret = WOLFSSL_BIO_UNSET;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_gets");
+
+    if (bio == NULL || buf == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* not enough space for character plus terminator */
+    if (sz <= 1) {
+        return 0;
+    }
+
+    /* info cb, abort if user returns <= 0*/
+    if (bio->infoCb != NULL) {
+        ret = (int)bio->infoCb(bio, WOLFSSL_BIO_CB_GETS, buf, sz, 0, 1);
+        if (ret <= 0) {
+            return ret;
+        }
+    }
+
+    /* check if is custom method */
+    if (bio->method && bio->method->getsCb) {
+        return bio->method->getsCb(bio, buf, sz);
+    }
+
+    switch (bio->type) {
+#ifndef NO_FILESYSTEM
+        case WOLFSSL_BIO_FILE:
+            if (((XFILE)bio->ptr) == XBADFILE) {
+                return WOLFSSL_BIO_ERROR;
+            }
+
+            #if defined(MICRIUM) || defined(LSR_FS) || defined(EBSNET)
+            WOLFSSL_MSG("XFGETS not ported for this system yet");
+            ret = XFGETS(buf, sz, (XFILE)bio->ptr);
+            #else
+            if (XFGETS(buf, sz, (XFILE)bio->ptr) != NULL) {
+                ret = (int)XSTRLEN(buf);
+            }
+            else {
+                ret = WOLFSSL_BIO_ERROR;
+            }
+            #endif
+            break;
+#endif /* NO_FILESYSTEM */
+        case WOLFSSL_BIO_MEMORY:
+            {
+                const byte* c;
+                int   cSz;
+                cSz = wolfSSL_BIO_pending(bio);
+                if (cSz == 0) {
+                    ret = 0; /* Nothing to read */
+                    buf[0] = '\0';
+                    break;
+                }
+
+                if (wolfSSL_BIO_get_mem_data(bio, (void*)&c) <= 0) {
+                    ret = WOLFSSL_BIO_ERROR;
+                    break;
+                }
+
+                cSz = wolfSSL_getLineLength((char*)c, cSz);
+                /* check case where line was bigger then buffer and buffer
+                 * needs end terminator */
+                if (cSz >= sz) {
+                    cSz = sz - 1;
+                    buf[cSz] = '\0';
+                }
+                else {
+                    /* not minus 1 here because placing terminator after
+                       msg and have checked that sz is large enough */
+                    buf[cSz] = '\0';
+                }
+
+                ret = wolfSSL_BIO_MEMORY_read(bio, (void*)buf, cSz);
+                /* ret is read after the switch statement */
+                break;
+            }
+        case WOLFSSL_BIO_BIO:
+            {
+                char* c;
+                int   cSz;
+                cSz = wolfSSL_BIO_nread0(bio, &c);
+                if (cSz == 0) {
+                    ret = 0; /* Nothing to read */
+                    buf[0] = '\0';
+                    break;
+                }
+
+                cSz = wolfSSL_getLineLength(c, cSz);
+                /* check case where line was bigger then buffer and buffer
+                 * needs end terminator */
+                if (cSz >= sz) {
+                    cSz = sz - 1;
+                    buf[cSz] = '\0';
+                }
+                else {
+                    /* not minus 1 here because placing terminator after
+                       msg and have checked that sz is large enough */
+                    buf[cSz] = '\0';
+                }
+
+                ret = wolfSSL_BIO_nread(bio, &c, cSz);
+                if (ret > 0 && ret < sz) {
+                    XMEMCPY(buf, c, ret);
+                }
+                break;
+            }
+
+#ifndef WOLFCRYPT_ONLY
+        /* call final on hash */
+        case WOLFSSL_BIO_MD:
+            if (wolfSSL_EVP_MD_CTX_size((WOLFSSL_EVP_MD_CTX*)bio->ptr) > sz) {
+                WOLFSSL_MSG("Output buffer was too small for digest");
+                ret = WOLFSSL_FAILURE;
+            }
+            else {
+                unsigned int szOut = 0;
+                ret = wolfSSL_EVP_DigestFinal((WOLFSSL_EVP_MD_CTX*)bio->ptr,
+                        (unsigned char*)buf, &szOut);
+                if (ret == WOLFSSL_SUCCESS) {
+                    ret = szOut;
+                }
+            }
+            break;
+#endif /* WOLFCRYPT_ONLY */
+
+        default:
+            WOLFSSL_MSG("BIO type not supported yet with wolfSSL_BIO_gets");
+    }
+
+    /* info cb, user can override return value */
+    if (bio->infoCb != NULL) {
+        ret = (int)bio->infoCb(bio, WOLFSSL_BIO_CB_GETS | WOLFSSL_BIO_CB_RETURN,
+                               buf, sz, 0, ret);
+    }
+
+    return ret;
+}
+
+
+/* Writes a null terminated string to bio.
+ *
+ * bio  the structure to write to
+ * buf  buffer to holding input string
+ *
+ * returns the size of the result placed in bio on success and a 0 or negative
+ *         value in an error case. -2 is returned if the implementation is not
+ *         supported for the BIO type.
+ */
+int wolfSSL_BIO_puts(WOLFSSL_BIO* bio, const char* buf)
+{
+    int sz;
+
+    if (bio == NULL || buf == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    /* check if is custom method */
+    if (bio->method && bio->method->putsCb) {
+        return bio->method->putsCb(bio, buf);
+    }
+
+    sz = (int)XSTRLEN(buf);
+    if (sz <= 0) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return wolfSSL_BIO_write(bio, buf, sz);
+}
+
+
+/* searches through bio list for a BIO of type "type"
+ * returns NULL on failure to find a given type */
+WOLFSSL_BIO* wolfSSL_BIO_find_type(WOLFSSL_BIO* bio, int type)
+{
+    WOLFSSL_BIO* local = NULL;
+    WOLFSSL_BIO* current;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_find_type");
+
+    if (bio == NULL) {
+        return local;
+    }
+
+    current = bio;
+    while (current != NULL) {
+        if (current->type == type) {
+            WOLFSSL_MSG("Found matching WOLFSSL_BIO type");
+            local = current;
+            break;
+        }
+        current = current->next;
+    }
+
+    return local;
+}
+
+
+/* returns a pointer to the next WOLFSSL_BIO in the chain on success.
+ * If a failure case then NULL is returned */
+WOLFSSL_BIO* wolfSSL_BIO_next(WOLFSSL_BIO* bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_next");
+
+    if (bio == NULL) {
+        WOLFSSL_MSG("Bad argument passed in");
+        return NULL;
+    }
+
+    return bio->next;
+}
+
+/* BIO_wpending returns the number of bytes pending to be written. */
+size_t wolfSSL_BIO_wpending(const WOLFSSL_BIO *bio)
+{
+    WOLFSSL_ENTER("BIO_wpending");
+
+    if (bio == NULL)
+        return 0;
+
+    if (bio->type == WOLFSSL_BIO_MEMORY) {
+        return bio->wrSz;
+    }
+
+    /* type BIO_BIO then check paired buffer */
+    if (bio->type == WOLFSSL_BIO_BIO && bio->pair != NULL) {
+        WOLFSSL_BIO* pair = bio->pair;
+        return pair->wrIdx;
+    }
+
+    return 0;
+}
+
+/* Return the number of pending bytes in read and write buffers */
+size_t wolfSSL_BIO_ctrl_pending(WOLFSSL_BIO *bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_ctrl_pending");
+    if (bio == NULL) {
+        return 0;
+    }
+
+    if (bio->type == WOLFSSL_BIO_MD) {
+        /* MD is a wrapper only get next bio */
+        while (bio->next != NULL) {
+            bio = bio->next;
+            if (bio->type != WOLFSSL_BIO_MD) {
+                break;
+            }
+        }
+    }
+
+#ifndef WOLFCRYPT_ONLY
+    if (bio->type == WOLFSSL_BIO_SSL && bio->ptr != NULL) {
+        return (long)wolfSSL_pending((WOLFSSL*)bio->ptr);
+    }
+#endif
+
+    if (bio->type == WOLFSSL_BIO_MEMORY) {
+        return bio->wrSz;
+    }
+
+    /* type BIO_BIO then check paired buffer */
+    if (bio->type == WOLFSSL_BIO_BIO && bio->pair != NULL) {
+        WOLFSSL_BIO* pair = bio->pair;
+        if (pair->wrIdx > 0 && pair->wrIdx <= pair->rdIdx) {
+            /* in wrap around state where beginning of buffer is being
+             * overwritten */
+            return pair->wrSz - pair->rdIdx + pair->wrIdx;
+        }
+        else {
+            /* simple case where has not wrapped around */
+            return pair->wrIdx - pair->rdIdx;
+        }
+    }
+    return 0;
+}
+
+
+long wolfSSL_BIO_get_mem_ptr(WOLFSSL_BIO *bio, WOLFSSL_BUF_MEM **ptr)
+{
+    WOLFSSL_BIO* front = bio;
+    long ret = WOLFSSL_FAILURE;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_get_mem_ptr");
+
+    if (bio == NULL || ptr == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* start at end and work backwards to find a memory BIO in the BIO chain */
+    while ((bio != NULL) && (bio->next != NULL)) {
+        bio = bio->next;
+    }
+
+    while (bio != NULL) {
+
+        if (bio->type == WOLFSSL_BIO_MEMORY) {
+            *ptr = bio->mem_buf;
+            ret = WOLFSSL_SUCCESS;
+        }
+
+        if (bio == front) {
+            break;
+        }
+        bio = bio->prev;
+    }
+
+    return ret;
+}
+
+WOLFSSL_API long wolfSSL_BIO_int_ctrl(WOLFSSL_BIO *bp, int cmd, long larg, int iarg)
+{
+    (void) bp;
+    (void) cmd;
+    (void) larg;
+    (void) iarg;
+    WOLFSSL_STUB("BIO_int_ctrl");
+    return 0;
+}
+
+
+int wolfSSL_BIO_set_write_buf_size(WOLFSSL_BIO *bio, long size)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_set_write_buf_size");
+
+    if (bio == NULL || bio->type != WOLFSSL_BIO_BIO || size < 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* if already in pair then do not change size */
+    if (bio->pair != NULL) {
+        WOLFSSL_MSG("WOLFSSL_BIO is paired, free from pair before changing");
+        return WOLFSSL_FAILURE;
+    }
+
+    bio->wrSz  = (int)size;
+    if (bio->wrSz < 0) {
+        WOLFSSL_MSG("Unexpected negative size value");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (bio->ptr != NULL) {
+        XFREE(bio->ptr, bio->heap, DYNAMIC_TYPE_OPENSSL);
+    }
+
+    bio->ptr = (byte*)XMALLOC(bio->wrSz, bio->heap, DYNAMIC_TYPE_OPENSSL);
+    if (bio->ptr == NULL) {
+        WOLFSSL_MSG("Memory allocation error");
+        return WOLFSSL_FAILURE;
+    }
+    bio->num = bio->wrSz;
+    bio->wrIdx = 0;
+    bio->rdIdx = 0;
+    if (bio->mem_buf != NULL) {
+        bio->mem_buf->data = (char*)bio->ptr;
+        bio->mem_buf->length = bio->num;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+/* Joins two BIO_BIO types. The write of b1 goes to the read of b2 and vice
+ * versa. Creating something similar to a two way pipe.
+ * Reading and writing between the two BIOs is not thread safe, they are
+ * expected to be used by the same thread. */
+int wolfSSL_BIO_make_bio_pair(WOLFSSL_BIO *b1, WOLFSSL_BIO *b2)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_make_bio_pair");
+
+    if (b1 == NULL || b2 == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_BIO_make_bio_pair", BAD_FUNC_ARG);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* both are expected to be of type BIO and not already paired */
+    if (b1->type != WOLFSSL_BIO_BIO || b2->type != WOLFSSL_BIO_BIO ||
+        b1->pair != NULL || b2->pair != NULL) {
+        WOLFSSL_MSG("Expected type BIO and not already paired");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* set default write size if not already set */
+    if (b1->ptr == NULL && wolfSSL_BIO_set_write_buf_size(b1,
+                            WOLFSSL_BIO_SIZE) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (b2->ptr == NULL && wolfSSL_BIO_set_write_buf_size(b2,
+                            WOLFSSL_BIO_SIZE) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    b1->pair = b2;
+    b2->pair = b1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+int wolfSSL_BIO_ctrl_reset_read_request(WOLFSSL_BIO *b)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_ctrl_reset_read_request");
+
+    if (b == NULL || b->type == WOLFSSL_BIO_MEMORY) {
+        return SSL_FAILURE;
+    }
+
+    b->readRq = 0;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+/* Does not advance read index pointer */
+int wolfSSL_BIO_nread0(WOLFSSL_BIO *bio, char **buf)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_nread0");
+
+    if (bio == NULL || buf == NULL) {
+        WOLFSSL_MSG("NULL argument passed in");
+        return 0;
+    }
+
+    /* if paired read from pair */
+    if (bio->pair != NULL) {
+        WOLFSSL_BIO* pair = bio->pair;
+
+        /* case where have wrapped around write buffer */
+        *buf = (char*)pair->ptr + pair->rdIdx;
+        if (pair->wrIdx > 0 && pair->rdIdx >= pair->wrIdx) {
+            return pair->wrSz - pair->rdIdx;
+        }
+        else {
+            return pair->wrIdx - pair->rdIdx;
+        }
+    }
+
+    return 0;
+}
+
+
+/* similar to wolfSSL_BIO_nread0 but advances the read index */
+int wolfSSL_BIO_nread(WOLFSSL_BIO *bio, char **buf, int num)
+{
+    int sz = WOLFSSL_BIO_UNSET;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_nread");
+
+    if (bio == NULL || buf == NULL) {
+        WOLFSSL_MSG("NULL argument passed in");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (bio->type == WOLFSSL_BIO_MEMORY) {
+        return SSL_FAILURE;
+    }
+
+    if (bio->pair != NULL) {
+        /* special case if asking to read 0 bytes */
+        if (num == 0) {
+            *buf = (char*)bio->pair->ptr + bio->pair->rdIdx;
+            return 0;
+        }
+
+        /* get amount able to read and set buffer pointer */
+        sz = wolfSSL_BIO_nread0(bio, buf);
+        if (sz == 0) {
+            return WOLFSSL_BIO_ERROR;
+        }
+
+        if (num < sz) {
+            sz = num;
+        }
+        bio->pair->rdIdx += sz;
+
+        /* check if have read to the end of the buffer and need to reset */
+        if (bio->pair->rdIdx == bio->pair->wrSz) {
+            bio->pair->rdIdx = 0;
+            if (bio->pair->wrIdx == bio->pair->wrSz) {
+                bio->pair->wrIdx = 0;
+            }
+        }
+
+        /* check if read up to write index, if so then reset index */
+        if (bio->pair->rdIdx == bio->pair->wrIdx) {
+            bio->pair->rdIdx = 0;
+            bio->pair->wrIdx = 0;
+        }
+    }
+
+    return sz;
+}
+
+
+int wolfSSL_BIO_nwrite(WOLFSSL_BIO *bio, char **buf, int num)
+{
+    int sz = WOLFSSL_BIO_UNSET;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_nwrite");
+
+    if (bio == NULL || buf == NULL) {
+        WOLFSSL_MSG("NULL argument passed in");
+        return 0;
+    }
+
+    if (bio->type != WOLFSSL_BIO_BIO) {
+        return SSL_FAILURE;
+    }
+
+    if (bio->pair != NULL) {
+        if (num == 0) {
+            *buf = (char*)bio->ptr + bio->wrIdx;
+            return 0;
+        }
+
+        if (bio->wrIdx < bio->rdIdx) {
+            /* if wrapped around only write up to read index. In this case
+             * rdIdx is always greater then wrIdx so sz will not be negative. */
+            sz = bio->rdIdx - bio->wrIdx;
+        }
+        else if (bio->rdIdx > 0 && bio->wrIdx == bio->rdIdx) {
+            return WOLFSSL_BIO_ERROR; /* no more room to write */
+        }
+        else {
+            /* write index is past read index so write to end of buffer */
+            sz = bio->wrSz - bio->wrIdx;
+
+            if (sz <= 0) {
+                /* either an error has occurred with write index or it is at the
+                 * end of the write buffer. */
+                if (bio->rdIdx == 0) {
+                    /* no more room, nothing has been read */
+                    return WOLFSSL_BIO_ERROR;
+                }
+
+                bio->wrIdx = 0;
+
+                /* check case where read index is not at 0 */
+                if (bio->rdIdx > 0) {
+                    sz = bio->rdIdx; /* can write up to the read index */
+                }
+                else {
+                    sz = bio->wrSz; /* no restriction other then buffer size */
+                }
+            }
+        }
+
+        if (num < sz) {
+            sz = num;
+        }
+        *buf = (char*)bio->ptr + bio->wrIdx;
+        bio->wrIdx += sz;
+
+        /* if at the end of the buffer and space for wrap around then set
+         * write index back to 0 */
+        if (bio->wrIdx == bio->wrSz && bio->rdIdx > 0) {
+            bio->wrIdx = 0;
+        }
+    }
+
+    return sz;
+}
+
+
+/* Reset BIO to initial state */
+int wolfSSL_BIO_reset(WOLFSSL_BIO *bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_reset");
+
+    if (bio == NULL) {
+        WOLFSSL_MSG("NULL argument passed in");
+        /* -1 is consistent failure even for FILE type */
+        return WOLFSSL_BIO_ERROR;
+    }
+
+    switch (bio->type) {
+        #ifndef NO_FILESYSTEM
+        case WOLFSSL_BIO_FILE:
+            XREWIND((XFILE)bio->ptr);
+            return 0;
+        #endif
+
+        case WOLFSSL_BIO_BIO:
+            bio->rdIdx = 0;
+            bio->wrIdx = 0;
+            return 0;
+
+        case WOLFSSL_BIO_MEMORY:
+            bio->rdIdx = 0;
+            bio->wrIdx = 0;
+            bio->wrSz  = 0;
+            XFREE(bio->ptr, bio->heap, DYNAMIC_TYPE_OPENSSL);
+            bio->ptr = NULL;
+            bio->num = 0;
+            if (bio->mem_buf != NULL) {
+                bio->mem_buf->data = (char*)bio->ptr;
+                bio->mem_buf->length = bio->num;
+            }
+            return 0;
+
+#ifndef WOLFCRYPT_ONLY
+        case WOLFSSL_BIO_MD:
+            if (bio->ptr != NULL) {
+                const WOLFSSL_EVP_MD* md =
+                    wolfSSL_EVP_MD_CTX_md((WOLFSSL_EVP_MD_CTX*)bio->ptr);
+                wolfSSL_EVP_MD_CTX_init((WOLFSSL_EVP_MD_CTX*)bio->ptr);
+                wolfSSL_EVP_DigestInit((WOLFSSL_EVP_MD_CTX*)bio->ptr, md);
+            }
+            return 0;
+#endif /* WOLFCRYPT_ONLY */
+
+        default:
+            WOLFSSL_MSG("Unknown BIO type needs added to reset function");
+    }
+
+    return WOLFSSL_BIO_ERROR;
+}
+
+#ifndef NO_FILESYSTEM
+long wolfSSL_BIO_set_fp(WOLFSSL_BIO *bio, XFILE fp, int c)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_set_fp");
+
+    if (bio == NULL || fp == XBADFILE) {
+        WOLFSSL_LEAVE("wolfSSL_BIO_set_fp", BAD_FUNC_ARG);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (bio->type != WOLFSSL_BIO_FILE) {
+        return WOLFSSL_FAILURE;
+    }
+
+    bio->shutdown = (byte)c;
+    bio->ptr = (XFILE)fp;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+long wolfSSL_BIO_get_fp(WOLFSSL_BIO *bio, XFILE* fp)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_get_fp");
+
+    if (bio == NULL || fp == XBADFILE) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (bio->type != WOLFSSL_BIO_FILE) {
+        return SSL_FAILURE;
+    }
+
+    *fp = (XFILE)bio->ptr;
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* overwrites file */
+int wolfSSL_BIO_write_filename(WOLFSSL_BIO *bio, char *name)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_write_filename");
+
+    if (bio == NULL || name == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (bio->type == WOLFSSL_BIO_FILE) {
+        if (((XFILE)bio->ptr) != XBADFILE && bio->shutdown == BIO_CLOSE) {
+            XFCLOSE((XFILE)bio->ptr);
+        }
+
+        bio->ptr = XFOPEN(name, "w");
+        if (((XFILE)bio->ptr) == XBADFILE) {
+            return WOLFSSL_FAILURE;
+        }
+        bio->shutdown = BIO_CLOSE;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_seek(WOLFSSL_BIO *bio, int ofs)
+{
+      WOLFSSL_ENTER("wolfSSL_BIO_seek");
+
+      if (bio == NULL) {
+          return -1;
+      }
+
+      /* offset ofs from beginning of file */
+      if (bio->type == WOLFSSL_BIO_FILE &&
+              XFSEEK((XFILE)bio->ptr, ofs, SEEK_SET) < 0) {
+          return -1;
+      }
+
+      return 0;
+}
+#endif /* NO_FILESYSTEM */
+
+
+long wolfSSL_BIO_set_mem_eof_return(WOLFSSL_BIO *bio, int v)
+{
+      WOLFSSL_ENTER("wolfSSL_BIO_set_mem_eof_return");
+
+      if (bio != NULL) {
+        bio->eof = v;
+      }
+
+      return 0;
+}
+
+int wolfSSL_BIO_get_len(WOLFSSL_BIO *bio)
+{
+    int len;
+#ifndef NO_FILESYSTEM
+    long memSz = 0, curr = 0;
+    XFILE file;
+#endif
+
+    WOLFSSL_ENTER("wolfSSL_BIO_get_len");
+
+    if ((len = wolfSSL_BIO_pending(bio)) > 0) {
+    }
+#ifndef NO_FILESYSTEM
+    else if (bio->type == WOLFSSL_BIO_FILE) {
+        if (wolfSSL_BIO_get_fp(bio, &file) != WOLFSSL_SUCCESS)
+            len = BAD_FUNC_ARG;
+        if (len == 0) {
+            curr = XFTELL(file);
+            if (curr < 0) {
+                len = WOLFSSL_BAD_FILE;
+            }
+            if (XFSEEK(file, 0, XSEEK_END) != 0)
+                len = WOLFSSL_BAD_FILE;
+        }
+        if (len == 0) {
+            memSz = XFTELL(file);
+            if (memSz > MAX_WOLFSSL_FILE_SIZE || memSz < 0)
+                len = WOLFSSL_BAD_FILE;
+        }
+        if (len == 0) {
+            memSz -= curr;
+            len = (int)memSz;
+            if (XFSEEK(file, curr, SEEK_SET) != 0)
+                len = WOLFSSL_BAD_FILE;
+        }
+    }
+#endif
+    return len;
+}
+
+
+void wolfSSL_BIO_set_callback(WOLFSSL_BIO *bio, wolf_bio_info_cb callback_func)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_set_callback");
+
+    if (bio != NULL) {
+        bio->infoCb = callback_func;
+    }
+}
+
+
+wolf_bio_info_cb wolfSSL_BIO_get_callback(WOLFSSL_BIO *bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_get_callback");
+
+    if (bio != NULL) {
+        return bio->infoCb;
+    }
+
+    return NULL;
+}
+
+
+void wolfSSL_BIO_set_callback_arg(WOLFSSL_BIO *bio, char *arg)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_set_callback_arg");
+
+    if (bio != NULL) {
+        bio->infoArg = arg;
+    }
+}
+
+
+char* wolfSSL_BIO_get_callback_arg(const WOLFSSL_BIO *bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_get_callback_arg");
+
+    if (bio != NULL) {
+        return bio->infoArg;
+    }
+
+    return NULL;
+}
+
+
+/* store a user pointer in the WOLFSSL_BIO structure */
+void wolfSSL_BIO_set_data(WOLFSSL_BIO* bio, void *ptr)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_set_data");
+
+    if (bio != NULL) {
+        bio->usrCtx = ptr;
+    }
+}
+
+
+void* wolfSSL_BIO_get_data(WOLFSSL_BIO* bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_get_data");
+
+    if (bio != NULL)
+        return bio->usrCtx;
+
+    WOLFSSL_MSG("WOLFSSL_BIO was null");
+    return NULL;
+}
+
+/* If flag is 0 then blocking is set, if 1 then non blocking.
+ * Always returns 1
+ */
+long wolfSSL_BIO_set_nbio(WOLFSSL_BIO* bio, long on)
+{
+    #ifndef WOLFSSL_DTLS
+    (void)on;
+    #endif
+    WOLFSSL_ENTER("wolfSSL_BIO_set_nbio");
+
+    switch (bio->type) {
+        case WOLFSSL_BIO_SOCKET:
+        #ifdef XFCNTL
+            {
+                int flag = XFCNTL(bio->num, F_GETFL, 0);
+                if (on)
+                    XFCNTL(bio->num, F_SETFL, flag | O_NONBLOCK);
+                else
+                    XFCNTL(bio->num, F_SETFL, flag & ~O_NONBLOCK);
+            }
+        #endif
+            break;
+        case WOLFSSL_BIO_SSL:
+        #ifdef WOLFSSL_DTLS
+            wolfSSL_dtls_set_using_nonblock((WOLFSSL*)bio->ptr, (int)on);
+        #endif
+            break;
+
+        default:
+            WOLFSSL_MSG("Unsupported bio type for non blocking");
+            break;
+    }
+
+    return 1;
+}
+
+
+
+/* creates a new custom WOLFSSL_BIO_METHOD */
+WOLFSSL_BIO_METHOD *wolfSSL_BIO_meth_new(int type, const char *name)
+{
+    WOLFSSL_BIO_METHOD* meth;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_new");
+
+    meth = (WOLFSSL_BIO_METHOD*)XMALLOC(sizeof(WOLFSSL_BIO_METHOD), NULL,
+            DYNAMIC_TYPE_OPENSSL);
+    if (meth == NULL) {
+        WOLFSSL_MSG("Error allocating memory for WOLFSSL_BIO_METHOD");
+        return NULL;
+    }
+    XMEMSET(meth, 0, sizeof(WOLFSSL_BIO_METHOD));
+    meth->type = (byte)type;
+    XSTRNCPY(meth->name, name, MAX_BIO_METHOD_NAME - 1);
+
+    return meth;
+}
+
+
+void wolfSSL_BIO_meth_free(WOLFSSL_BIO_METHOD *biom)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_free");
+    if (biom) {
+        XFREE(biom, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+
+int wolfSSL_BIO_meth_set_write(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_write_cb biom_write)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_set_write");
+    if (biom) {
+        biom->writeCb = biom_write;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_meth_set_read(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_read_cb biom_read)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_set_read");
+    if (biom) {
+        biom->readCb = biom_read;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_meth_set_puts(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_puts_cb biom_puts)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_set_puts");
+    if (biom) {
+        biom->putsCb = biom_puts;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_meth_set_gets(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_gets_cb biom_gets)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_set_gets");
+    if (biom) {
+        biom->getsCb = biom_gets;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_meth_set_ctrl(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_ctrl_get_cb biom_ctrl)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_set_ctrl");
+    if (biom) {
+        biom->ctrlCb = biom_ctrl;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_meth_set_create(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_create_cb biom_create)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_meth_set_create");
+    if (biom) {
+        biom->createCb = biom_create;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_BIO_meth_set_destroy(WOLFSSL_BIO_METHOD *biom,
+        wolfSSL_BIO_meth_destroy_cb biom_destroy)
+{
+    WOLFSSL_STUB("wolfSSL_BIO_meth_set_destroy");
+    if (biom) {
+        biom->freeCb = biom_destroy;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+
+/* this compatibility function can be used for multiple BIO types */
+int wolfSSL_BIO_get_mem_data(WOLFSSL_BIO* bio, void* p)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_get_mem_data");
+
+    if (bio == NULL)
+        return WOLFSSL_FATAL_ERROR;
+
+    if (p) {
+        *(byte**)p = (byte*)bio->ptr;
+    }
+
+    return bio->num;
+}
+
+int wolfSSL_BIO_pending(WOLFSSL_BIO* bio)
+{
+    return (int)wolfSSL_BIO_ctrl_pending(bio);
+}
+
+
+int wolfSSL_BIO_flush(WOLFSSL_BIO* bio)
+{
+    /* for wolfSSL no flushing needed */
+    WOLFSSL_ENTER("BIO_flush");
+    (void)bio;
+    return 1;
+}
+#endif /* WOLFSSL_BIO_INCLUDED */
+
--- a/src/crl.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/crl.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* crl.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -83,7 +83,7 @@
 
     XMEMCPY(crle->issuerHash, dcrl->issuerHash, CRL_DIGEST_SIZE);
     /* XMEMCPY(crle->crlHash, dcrl->crlHash, CRL_DIGEST_SIZE);
-     *   copy the hash here if needed for optimized comparisons */
+     * copy the hash here if needed for optimized comparisons */
     XMEMCPY(crle->lastDate, dcrl->lastDate, MAX_DATE_SIZE);
     XMEMCPY(crle->nextDate, dcrl->nextDate, MAX_DATE_SIZE);
     crle->lastDateFormat = dcrl->lastDateFormat;
@@ -109,7 +109,7 @@
         }
         XMEMCPY(crle->toBeSigned, buff + dcrl->certBegin, crle->tbsSz);
         XMEMCPY(crle->signature, dcrl->signature, crle->signatureSz);
-    #if !defined(NO_SKID) && defined(CRL_SKID_READY)
+    #ifndef NO_SKID
         crle->extAuthKeyIdSet = dcrl->extAuthKeyIdSet;
         if (crle->extAuthKeyIdSet)
             XMEMCPY(crle->extAuthKeyId, dcrl->extAuthKeyId, KEYID_SIZE);
@@ -121,6 +121,7 @@
     }
 
     (void)verified;
+    (void)heap;
 
     return 0;
 }
@@ -200,17 +201,15 @@
 
     while (crle) {
         if (XMEMCMP(crle->issuerHash, cert->issuerHash, CRL_DIGEST_SIZE) == 0) {
-            int doNextDate = 1;
-
             WOLFSSL_MSG("Found CRL Entry on list");
 
             if (crle->verified == 0) {
-                Signer* ca;
-            #if !defined(NO_SKID) && defined(CRL_SKID_READY)
-                byte extAuthKeyId[KEYID_SIZE]
+                Signer* ca = NULL;
+            #ifndef NO_SKID
+                byte extAuthKeyId[KEYID_SIZE];
             #endif
                 byte issuerHash[CRL_DIGEST_SIZE];
-                byte* tbs = NULL;
+                byte* tbs;
                 word32 tbsSz = crle->tbsSz;
                 byte* sig = NULL;
                 word32 sigSz = crle->signatureSz;
@@ -231,15 +230,15 @@
 
                 XMEMCPY(tbs, crle->toBeSigned, tbsSz);
                 XMEMCPY(sig, crle->signature, sigSz);
-            #if !defined(NO_SKID) && defined(CRL_SKID_READY)
-                XMEMCMPY(extAuthKeyId, crle->extAuthKeyId,
+            #ifndef NO_SKID
+                XMEMCPY(extAuthKeyId, crle->extAuthKeyId,
                                                           sizeof(extAuthKeyId));
             #endif
                 XMEMCPY(issuerHash, crle->issuerHash, sizeof(issuerHash));
 
                 wc_UnLockMutex(&crl->crlLock);
 
-            #if !defined(NO_SKID) && defined(CRL_SKID_READY)
+            #ifndef NO_SKID
                 if (crle->extAuthKeyIdSet)
                     ca = GetCA(crl->cm, extAuthKeyId);
                 if (ca == NULL)
@@ -296,14 +295,12 @@
 
             WOLFSSL_MSG("Checking next date validity");
 
-            #ifdef WOLFSSL_NO_CRL_NEXT_DATE
-                if (crle->nextDateFormat == ASN_OTHER_TYPE)
-                    doNextDate = 0;  /* skip */
-            #endif
-
-            if (doNextDate) {
+        #ifdef WOLFSSL_NO_CRL_NEXT_DATE
+            if (crle->nextDateFormat != ASN_OTHER_TYPE)
+        #endif
+            {
             #ifndef NO_ASN_TIME
-                if (!ValidateDate(crle->nextDate,crle->nextDateFormat, AFTER)) {
+                if (!XVALIDATE_DATE(crle->nextDate,crle->nextDateFormat, AFTER)) {
                     WOLFSSL_MSG("CRL next date is no longer valid");
                     ret = ASN_AFTER_DATE_E;
                 }
@@ -428,7 +425,7 @@
 
 /* Load CRL File of type, WOLFSSL_SUCCESS on ok */
 int BufferLoadCRL(WOLFSSL_CRL* crl, const byte* buff, long sz, int type,
-                  int noVerify)
+                  int verify)
 {
     int          ret = WOLFSSL_SUCCESS;
     const byte*  myBuffer = buff;    /* if DER ok, otherwise switch */
@@ -471,7 +468,7 @@
 
     InitDecodedCRL(dcrl, crl->heap);
     ret = ParseCRL(dcrl, myBuffer, (word32)sz, crl->cm);
-    if (ret != 0 && !(ret == ASN_CRL_NO_SIGNER_E && noVerify)) {
+    if (ret != 0 && !(ret == ASN_CRL_NO_SIGNER_E && verify == NO_VERIFY)) {
         WOLFSSL_MSG("ParseCRL error");
     }
     else {
@@ -516,7 +513,7 @@
     wc_UnLockMutex(&crl->crlLock);
 
     WOLFSSL_LEAVE("wolfSSL_X509_STORE_add_crl", WOLFSSL_SUCCESS);
-    
+
     return WOLFSSL_SUCCESS;
 }
 #endif
@@ -1022,8 +1019,8 @@
             }
         }
 
-        if (!skip && ProcessFile(NULL, name, type, CRL_TYPE, NULL, 0, crl)
-                                                           != WOLFSSL_SUCCESS) {
+        if (!skip && ProcessFile(NULL, name, type, CRL_TYPE, NULL, 0, crl,
+                                 VERIFY) != WOLFSSL_SUCCESS) {
             WOLFSSL_MSG("CRL file load failed, continuing");
         }
 
@@ -1045,8 +1042,7 @@
         pathLen = (word32)XSTRLEN(path);
         pathBuf = (char*)XMALLOC(pathLen+1, crl->heap,DYNAMIC_TYPE_CRL_MONITOR);
         if (pathBuf) {
-            XSTRNCPY(pathBuf, path, pathLen);
-            pathBuf[pathLen] = '\0'; /* Null Terminate */
+            XSTRNCPY(pathBuf, path, pathLen+1);
 
             if (type == WOLFSSL_FILETYPE_PEM) {
                 /* free old path before setting a new one */
--- a/src/internal.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/internal.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* internal.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -27,6 +27,24 @@
 
 #include <wolfssl/wolfcrypt/settings.h>
 
+/*
+ * WOLFSSL_SMALL_CERT_VERIFY:
+ *     Verify the certificate signature without using DecodedCert. Doubles up
+ *     on some code but allows smaller peak heap memory usage.
+ *     Cannot be used with WOLFSSL_NONBLOCK_OCSP.
+ * WOLFSSL_ALT_CERT_CHAINS:
+ *     Allows CA's to be presented by peer, but not part of a valid chain.
+ *     Default wolfSSL behavior is to require validation of all presented peer
+ *     certificates. This also allows loading intermediate CA's as trusted
+ *     and ignoring no signer failures for CA's up the chain to root.
+ */
+
+
+#ifdef EXTERNAL_OPTS_OPENVPN
+#error EXTERNAL_OPTS_OPENVPN should not be defined\
+    when building wolfSSL
+#endif
+
 #ifndef WOLFCRYPT_ONLY
 
 #include <wolfssl/internal.h>
@@ -53,13 +71,7 @@
 
 #if defined(DEBUG_WOLFSSL) || defined(SHOW_SECRETS) || \
     defined(CHACHA_AEAD_TEST) || defined(WOLFSSL_SESSION_EXPORT_DEBUG)
-    #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        #if MQX_USE_IO_OLD
-            #include <fio.h>
-        #else
-            #include <nio.h>
-        #endif
-    #else
+    #ifndef NO_STDIO_FILESYSTEM
         #include <stdio.h>
     #endif
 #endif
@@ -105,8 +117,8 @@
 
 #ifndef NO_WOLFSSL_SERVER
     static int DoClientKeyExchange(WOLFSSL* ssl, byte* input, word32*, word32);
-    #if (!defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519)) && \
-                                                !defined(WOLFSSL_NO_CLIENT_AUTH)
+    #if (!defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                        defined(HAVE_ED448)) && !defined(WOLFSSL_NO_CLIENT_AUTH)
         static int DoCertificateVerify(WOLFSSL* ssl, byte*, word32*, word32);
     #endif
     #ifdef WOLFSSL_DTLS
@@ -129,6 +141,7 @@
 #endif
     getRecordLayerHeader,
     getData,
+    verifyEncryptedMessage,
     decryptMessage,
     verifyMessage,
     runProcessingOneMessage
@@ -140,13 +153,13 @@
 
 /* Server random bytes for TLS v1.3 described downgrade protection mechanism. */
 static const byte tls13Downgrade[7] = {
-    0x44, 0x4f, 0x47, 0x4e, 0x47, 0x52, 0x44
+    0x44, 0x4f, 0x57, 0x4e, 0x47, 0x52, 0x44
 };
 #define TLS13_DOWNGRADE_SZ  sizeof(tls13Downgrade)
 
 #endif /* !NO_WOLFSSL_SERVER || !NO_WOLFSSL_CLIENT */
 
-#ifndef NO_OLD_TLS
+#if !defined(NO_OLD_TLS) && !defined(WOLFSSL_AEAD_ONLY)
 static int SSL_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
                     int padSz, int content, int verify);
 
@@ -158,7 +171,11 @@
     int QSH_Init(WOLFSSL* ssl);
 #endif
 
-
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    int tsip_useable(const WOLFSSL *ssl);
+    int tsip_generatePremasterSecret();
+    int tsip_generateEncryptPreMasterSecret(WOLFSSL *ssl, byte *out, word32 *outSz);
+#endif
 int IsTLS(const WOLFSSL* ssl)
 {
     if (ssl->version.major == SSLv3_MAJOR && ssl->version.minor >=TLSv1_MINOR)
@@ -195,10 +212,18 @@
         return 0;
     #endif /* WOLFSSL_DTLS */
 
+#ifdef WOLFSSL_TLS13
+    if (isSend)
+        return ssl->encrypt.setup;
+    else
+        return ssl->decrypt.setup;
+#else
     return ssl->keys.encryptionOn;
-}
-
-
+#endif
+}
+
+
+#if defined(WOLFSSL_DTLS) || !defined(WOLFSSL_NO_TLS12)
 /* If SCTP is not enabled returns the state of the dtls option.
  * If SCTP is enabled returns dtls && !sctp. */
 static WC_INLINE int IsDtlsNotSctpMode(WOLFSSL* ssl)
@@ -213,6 +238,7 @@
 
     return result;
 }
+#endif /* DTLS || !WOLFSSL_NO_TLS12 */
 
 
 #ifdef HAVE_QSH
@@ -239,7 +265,6 @@
         /* free struct */
         XFREE(preKey, ssl->heap, DYNAMIC_TYPE_QSH);
     }
-    key = NULL;
 
 
     /* free all of peers QSH keys */
@@ -299,8 +324,8 @@
 
 
 #ifdef HAVE_NTRU
-static WC_RNG* rng;
-static wolfSSL_Mutex* rngMutex;
+static WOLFSSL_GLOBAL WC_RNG* rng;
+static WOLFSSL_GLOBAL wolfSSL_Mutex* rngMutex;
 
 static word32 GetEntropy(unsigned char* out, word32 num_bytes)
 {
@@ -464,7 +489,8 @@
 
 
 /* serializes the key struct for exporting */
-static int ExportKeyState(WOLFSSL* ssl, byte* exp, word32 len, byte ver)
+static int ExportKeyState(WOLFSSL* ssl, byte* exp, word32 len, byte ver,
+        byte small)
 {
     word32 idx = 0;
     byte   sz;
@@ -478,12 +504,12 @@
 
     keys = &(ssl->keys);
 
-    if (DTLS_EXPORT_KEY_SZ > len) {
-        WOLFSSL_MSG("Buffer not large enough for max key struct size");
-        return BUFFER_E;
-    }
-
-    XMEMSET(exp, 0, DTLS_EXPORT_KEY_SZ);
+    if (DTLS_EXPORT_MIN_KEY_SZ > len) {
+        WOLFSSL_MSG("Buffer not large enough for minimum key struct size");
+        return BUFFER_E;
+    }
+
+    XMEMSET(exp, 0, DTLS_EXPORT_MIN_KEY_SZ);
 
     c32toa(keys->peer_sequence_number_hi, exp + idx); idx += OPAQUE32_LEN;
     c32toa(keys->peer_sequence_number_lo, exp + idx); idx += OPAQUE32_LEN;
@@ -514,8 +540,15 @@
     exp[idx++] = keys->encryptionOn;
     exp[idx++] = keys->decryptedCur;
 
+    /* from here on the buffer needs checked because is variable length that
+     * can be larger than DTLS_EXPORT_MIN_KEY_SZ */
     {
         word32 i;
+        if ((OPAQUE16_LEN * 2) + idx +
+                (2 * (WOLFSSL_DTLS_WINDOW_WORDS * OPAQUE32_LEN)) > len) {
+            WOLFSSL_MSG("Buffer not large enough for WOLFSSL_DTLS_WINDOW_WORDS");
+            return BUFFER_E;
+        }
 
         c16toa(WOLFSSL_DTLS_WINDOW_WORDS, exp + idx); idx += OPAQUE16_LEN;
         for (i = 0; i < WOLFSSL_DTLS_WINDOW_WORDS; i++) {
@@ -529,6 +562,11 @@
         }
     }
 
+    if (idx >= len) {
+        WOLFSSL_MSG("Buffer not large enough for truncated hmac flag");
+        return BUFFER_E;
+    }
+
 #ifdef HAVE_TRUNCATED_HMAC
     sz         = ssl->truncated_hmac ? TRUNCATED_HMAC_SZ: ssl->specs.hash_size;
     exp[idx++] = ssl->truncated_hmac;
@@ -536,26 +574,60 @@
     sz         = ssl->specs.hash_size;
     exp[idx++] = 0; /* no truncated hmac */
 #endif
-    exp[idx++] = sz;
-    XMEMCPY(exp + idx, keys->client_write_MAC_secret, sz); idx += sz;
-    XMEMCPY(exp + idx, keys->server_write_MAC_secret, sz); idx += sz;
-
-    sz         = ssl->specs.key_size;
+
+    sz = (small)? 0: sz;
+    if (idx + (sz * 2) + OPAQUE8_LEN > len) {
+        WOLFSSL_MSG("Buffer not large enough for MAC secret");
+        return BUFFER_E;
+    }
+
     exp[idx++] = sz;
-    XMEMCPY(exp + idx, keys->client_write_key, sz); idx += sz;
-    XMEMCPY(exp + idx, keys->server_write_key, sz); idx += sz;
-
-    sz         = ssl->specs.iv_size;
+    if (sz > 0) {
+    #ifndef WOLFSSL_AEAD_ONLY
+        XMEMCPY(exp + idx, keys->client_write_MAC_secret, sz); idx += sz;
+        XMEMCPY(exp + idx, keys->server_write_MAC_secret, sz); idx += sz;
+    #else
+        XMEMSET(exp + idx, 0, sz); idx += sz;
+        XMEMSET(exp + idx, 0, sz); idx += sz;
+    #endif
+    }
+
+    sz = (small)? 0: ssl->specs.key_size;
+    if (idx + (sz * 2) + OPAQUE8_LEN > len) {
+        WOLFSSL_MSG("Buffer not large enough for write key");
+        return BUFFER_E;
+    }
+
     exp[idx++] = sz;
-    XMEMCPY(exp + idx, keys->client_write_IV, sz); idx += sz;
-    XMEMCPY(exp + idx, keys->server_write_IV, sz); idx += sz;
+    if (sz > 0) {
+        XMEMCPY(exp + idx, keys->client_write_key, sz); idx += sz;
+        XMEMCPY(exp + idx, keys->server_write_key, sz); idx += sz;
+    }
+
+    sz = (small)? 0: ssl->specs.iv_size;
+    if (idx + (sz * 2) + OPAQUE8_LEN + AEAD_MAX_EXP_SZ > len) {
+        WOLFSSL_MSG("Buffer not large enough for IVs");
+        return BUFFER_E;
+    }
+
+    exp[idx++] = sz;
+    if (sz > 0) {
+        XMEMCPY(exp + idx, keys->client_write_IV, sz); idx += sz;
+        XMEMCPY(exp + idx, keys->server_write_IV, sz); idx += sz;
+    }
     XMEMCPY(exp + idx, keys->aead_exp_IV, AEAD_MAX_EXP_SZ);
     idx += AEAD_MAX_EXP_SZ;
 
-    sz         = AEAD_MAX_IMP_SZ;
+    sz = (small)? 0: AEAD_MAX_IMP_SZ;
+    if (idx + (sz * 2) + OPAQUE8_LEN > len) {
+        WOLFSSL_MSG("Buffer not large enough for imp IVs");
+        return BUFFER_E;
+    }
     exp[idx++] = sz;
-    XMEMCPY(exp + idx, keys->aead_enc_imp_IV, sz); idx += sz;
-    XMEMCPY(exp + idx, keys->aead_dec_imp_IV, sz); idx += sz;
+    if (sz > 0) {
+        XMEMCPY(exp + idx, keys->aead_enc_imp_IV, sz); idx += sz;
+        XMEMCPY(exp + idx, keys->aead_dec_imp_IV, sz); idx += sz;
+    }
 
     /* DTLS_EXPORT_KEY_SZ is max value. idx size can vary */
     if (idx > DTLS_EXPORT_KEY_SZ) {
@@ -621,6 +693,7 @@
 
     /* check minimum length -- includes byte used for size indicators */
     if (len < DTLS_EXPORT_MIN_KEY_SZ) {
+        WOLFSSL_MSG("Buffer not large enough for minimum expected size");
         return BUFFER_E;
     }
     ato32(exp + idx, &keys->peer_sequence_number_hi); idx += OPAQUE32_LEN;
@@ -695,34 +768,53 @@
     idx++; /* no truncated hmac */
 #endif
     sz = exp[idx++];
-    if (sz > sizeof(keys->client_write_MAC_secret) || sz + idx > len) {
-        return BUFFER_E;
-    }
-    XMEMCPY(keys->client_write_MAC_secret, exp + idx, sz); idx += sz;
-    XMEMCPY(keys->server_write_MAC_secret, exp + idx, sz); idx += sz;
+#ifndef WOLFSSL_AEAD_ONLY
+    if (sz > sizeof(keys->client_write_MAC_secret) || (sz * 2) + idx > len) {
+        WOLFSSL_MSG("Buffer not large enough for MAC import");
+        return BUFFER_E;
+    }
+    if (sz > 0) {
+        XMEMCPY(keys->client_write_MAC_secret, exp + idx, sz); idx += sz;
+        XMEMCPY(keys->server_write_MAC_secret, exp + idx, sz); idx += sz;
+    }
+#else
+    if (sz + idx > len) {
+        return BUFFER_E;
+    }
+    idx += sz; idx += sz;
+#endif
 
     sz = exp[idx++];
-    if (sz > sizeof(keys->client_write_key) || sz + idx > len) {
-        return BUFFER_E;
-    }
-    XMEMCPY(keys->client_write_key, exp + idx, sz); idx += sz;
-    XMEMCPY(keys->server_write_key, exp + idx, sz); idx += sz;
+    if (sz > sizeof(keys->client_write_key) || (sz * 2) + idx > len) {
+        WOLFSSL_MSG("Buffer not large enough for key import");
+        return BUFFER_E;
+    }
+    if (sz > 0) {
+        XMEMCPY(keys->client_write_key, exp + idx, sz); idx += sz;
+        XMEMCPY(keys->server_write_key, exp + idx, sz); idx += sz;
+    }
 
     sz = exp[idx++];
-    if (sz > sizeof(keys->client_write_IV) || sz + idx > len) {
-        return BUFFER_E;
-    }
-    XMEMCPY(keys->client_write_IV, exp + idx, sz); idx += sz;
-    XMEMCPY(keys->server_write_IV, exp + idx, sz); idx += sz;
+    if (sz > sizeof(keys->client_write_IV) || (sz * 2) + idx > len) {
+        WOLFSSL_MSG("Buffer not large enough for write IV import");
+        return BUFFER_E;
+    }
+    if (sz > 0) {
+        XMEMCPY(keys->client_write_IV, exp + idx, sz); idx += sz;
+        XMEMCPY(keys->server_write_IV, exp + idx, sz); idx += sz;
+    }
     XMEMCPY(keys->aead_exp_IV, exp + idx, AEAD_MAX_EXP_SZ);
     idx += AEAD_MAX_EXP_SZ;
 
     sz = exp[idx++];
-    if (sz > sizeof(keys->aead_enc_imp_IV) || sz + idx > len) {
-        return BUFFER_E;
-    }
-    XMEMCPY(keys->aead_enc_imp_IV, exp + idx, sz); idx += sz;
-    XMEMCPY(keys->aead_dec_imp_IV, exp + idx, sz); idx += sz;
+    if (sz > sizeof(keys->aead_enc_imp_IV) || (sz * 2) + idx > len) {
+        WOLFSSL_MSG("Buffer not large enough for imp IV import");
+        return BUFFER_E;
+    }
+    if (sz > 0) {
+        XMEMCPY(keys->aead_enc_imp_IV, exp + idx, sz); idx += sz;
+        XMEMCPY(keys->aead_dec_imp_IV, exp + idx, sz); idx += sz;
+    }
 
     WOLFSSL_LEAVE("ImportKeyState", idx);
     (void)ver;
@@ -1020,6 +1112,7 @@
     return idx;
 }
 
+#ifndef WOLFSSL_SESSION_EXPORT_NOPEER
 static int ExportPeerInfo(WOLFSSL* ssl, byte* exp, word32 len, byte ver)
 {
     int    idx  = 0;
@@ -1059,6 +1152,7 @@
 
     return idx;
 }
+#endif /* !WOLFSSL_SESSION_EXPORT_NOPEER */
 
 
 static int ImportPeerInfo(WOLFSSL* ssl, byte* buf, word32 len, byte ver)
@@ -1074,6 +1168,11 @@
         return BAD_FUNC_ARG;
     }
 
+    if (len == 0) {
+        WOLFSSL_MSG("No peer info sent");
+        return 0;
+    }
+
     if (ssl == NULL || buf == NULL || len < 3 * DTLS_EXPORT_LEN) {
         return BAD_FUNC_ARG;
     }
@@ -1105,6 +1204,64 @@
 }
 
 
+/* WOLFSSL_LOCAL function that serializes the current WOLFSSL session state only
+ * buf is used to hold the serialized WOLFSSL struct and sz is the size of buf
+ * passed in.
+ * On success returns the size of serialized session state.*/
+int wolfSSL_dtls_export_state_internal(WOLFSSL* ssl, byte* buf, word32 sz)
+{
+    int ret;
+    word32 idx      = 0;
+    word32 totalLen = 0;
+
+    WOLFSSL_ENTER("wolfSSL_dtls_export_state_internal");
+
+    if (buf == NULL || ssl == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_dtls_export_state_internal", BAD_FUNC_ARG);
+        return BAD_FUNC_ARG;
+    }
+
+    totalLen += DTLS_EXPORT_LEN * 2; /* 2 protocol bytes and 2 length bytes */
+    /* each of the following have a 2 byte length before data */
+    totalLen += DTLS_EXPORT_LEN + DTLS_EXPORT_MIN_KEY_SZ;
+    if (totalLen > sz) {
+        WOLFSSL_LEAVE("wolfSSL_dtls_export_state_internal", BUFFER_E);
+        return BUFFER_E;
+    }
+
+    buf[idx++] =  (byte)DTLS_EXPORT_STATE_PRO;
+    buf[idx++] = ((byte)DTLS_EXPORT_STATE_PRO & 0xF0) |
+                 ((byte)DTLS_EXPORT_VERSION & 0X0F);
+    idx += DTLS_EXPORT_LEN; /* leave room for total length */
+
+    /* export keys struct and dtls state -- variable length stored in ret */
+    idx += DTLS_EXPORT_LEN; /* leave room for length */
+    if ((ret = ExportKeyState(ssl, buf + idx, sz - idx,
+                                                 DTLS_EXPORT_VERSION, 1)) < 0) {
+        WOLFSSL_LEAVE("wolfSSL_dtls_export_state_internal", ret);
+        return ret;
+    }
+    c16toa((word16)ret, buf + idx - DTLS_EXPORT_LEN); idx += ret;
+
+    /* place total length of exported buffer minus 2 bytes protocol/version */
+    c16toa((word16)(idx - DTLS_EXPORT_LEN), buf + DTLS_EXPORT_LEN);
+
+#ifdef WOLFSSL_SESSION_EXPORT_DEBUG
+    /* if compiled with debug options then print the version, protocol, size */
+    {
+        char debug[256];
+        XSNPRINTF(debug, sizeof(debug), "Exporting DTLS session state\n"
+                   "\tVersion  : %d\n\tProtocol : %02X%01X\n\tLength of: %d\n\n"
+               , (int)DTLS_EXPORT_VERSION, buf[0], (buf[1] >> 4), idx - 2);
+        WOLFSSL_MSG(debug);
+    }
+#endif /* WOLFSSL_SESSION_EXPORT_DEBUG */
+
+    WOLFSSL_LEAVE("wolfSSL_dtls_export_state_internal", idx);
+    return idx;
+}
+
+
 /* WOLFSSL_LOCAL function that serializes the current WOLFSSL session
  * buf is used to hold the serialized WOLFSSL struct and sz is the size of buf
  * passed in.
@@ -1151,7 +1308,7 @@
     /* export keys struct and dtls state -- variable length stored in ret */
     idx += DTLS_EXPORT_LEN; /* leave room for length */
     if ((ret = ExportKeyState(ssl, buf + idx, sz - idx,
-                                                    DTLS_EXPORT_VERSION)) < 0) {
+                                                 DTLS_EXPORT_VERSION, 0)) < 0) {
         WOLFSSL_LEAVE("wolfSSL_dtls_export_internal", ret);
         return ret;
     }
@@ -1168,11 +1325,15 @@
 
     /* export of dtls peer information */
     idx += DTLS_EXPORT_LEN;
+#ifdef WOLFSSL_SESSION_EXPORT_NOPEER
+    ret = 0; /* not saving peer port/ip information */
+#else
     if ((ret = ExportPeerInfo(ssl, buf + idx, sz - idx,
                                                     DTLS_EXPORT_VERSION)) < 0) {
         WOLFSSL_LEAVE("wolfSSL_dtls_export_internal", ret);
         return ret;
     }
+#endif
     c16toa(ret, buf + idx - DTLS_EXPORT_LEN);
     idx += ret;
 
@@ -1196,6 +1357,78 @@
 
 
 /* On success return amount of buffer consumed */
+int wolfSSL_dtls_import_state_internal(WOLFSSL* ssl, byte* buf, word32 sz)
+{
+    word32 idx    = 0;
+    word16 length = 0;
+    int version;
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_dtls_import_state_internal");
+    /* check at least enough room for protocol and length */
+    if (sz < DTLS_EXPORT_LEN * 2 || ssl == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_dtls_import_state_internal", BAD_FUNC_ARG);
+        return BAD_FUNC_ARG;
+    }
+
+    if (buf[idx++] !=  (byte)DTLS_EXPORT_STATE_PRO ||
+            (buf[idx] & 0xF0) != ((byte)DTLS_EXPORT_PRO & 0xF0)) {
+        WOLFSSL_MSG("Incorrect protocol");
+        return BAD_FUNC_ARG;
+    }
+    version = buf[idx++] & 0x0F;
+
+    ato16(buf + idx, &length); idx += DTLS_EXPORT_LEN;
+    if (length > sz - DTLS_EXPORT_LEN) { /* subtract 2 for protocol */
+        WOLFSSL_MSG("Buffer size sanity check failed");
+        return BUFFER_E;
+    }
+
+#ifdef WOLFSSL_SESSION_EXPORT_DEBUG
+    /* if compiled with debug options then print the version, protocol, size */
+    {
+        char debug[256];
+        XSNPRINTF(debug, sizeof(debug), "Importing DTLS session state\n"
+                   "\tVersion  : %d\n\tProtocol : %02X%01X\n\tLength of: %d\n\n"
+               , (int)version, buf[0], (buf[1] >> 4), length);
+        WOLFSSL_MSG(debug);
+    }
+#endif /* WOLFSSL_SESSION_EXPORT_DEBUG */
+
+    /* perform sanity checks and extract Options information used */
+    switch (version) {
+        case DTLS_EXPORT_VERSION:
+            break;
+
+        default:
+            WOLFSSL_MSG("Bad export state version");
+            return BAD_FUNC_ARG;
+
+    }
+
+    /* perform sanity checks and extract Keys struct */
+    if (DTLS_EXPORT_LEN + idx > sz) {
+        WOLFSSL_MSG("Import Key struct error");
+        return BUFFER_E;
+    }
+    ato16(buf + idx, &length); idx += DTLS_EXPORT_LEN;
+    if (length > DTLS_EXPORT_KEY_SZ || length + idx > sz) {
+        WOLFSSL_MSG("Import Key struct error");
+        return BUFFER_E;
+    }
+    if ((ret = ImportKeyState(ssl, buf + idx, length, version)) < 0) {
+        WOLFSSL_MSG("Import Key struct error");
+        WOLFSSL_LEAVE("wolfSSL_dtls_import_state_internal", ret);
+        return ret;
+    }
+    idx += ret;
+
+    WOLFSSL_LEAVE("wolfSSL_dtls_import_state_internal", ret);
+    return ret;
+}
+
+
+/* On success return amount of buffer consumed */
 int wolfSSL_dtls_import_internal(WOLFSSL* ssl, byte* buf, word32 sz)
 {
     word32 idx    = 0;
@@ -1214,8 +1447,9 @@
     if (buf[idx++]       !=  (byte)DTLS_EXPORT_PRO ||
        (buf[idx] & 0xF0) != ((byte)DTLS_EXPORT_PRO & 0xF0)) {
         /* don't increment on second idx to next get version */
-        WOLFSSL_MSG("Incorrect protocol");
-        return BAD_FUNC_ARG;
+
+        /* check if importing state only */
+        return wolfSSL_dtls_import_state_internal(ssl, buf, sz);
     }
     version = buf[idx++] & 0x0F;
 
@@ -1349,6 +1583,62 @@
     method->downgrade  = 0;
 }
 
+#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EITHER_SIDE)
+int InitSSL_Side(WOLFSSL* ssl, word16 side)
+{
+    if (ssl == NULL)
+        return BAD_FUNC_ARG;
+
+    /* set side */
+    ssl->options.side = side;
+
+    /* reset options that are side specific */
+#ifdef HAVE_NTRU
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        ssl->options.haveNTRU = 1;      /* always on client side */
+                                        /* server can turn on by loading key */
+    }
+#endif
+#ifdef HAVE_ECC
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        ssl->options.haveECDSAsig  = 1; /* always on client side */
+        ssl->options.haveECC = 1;       /* server turns on with ECC key cert */
+        ssl->options.haveStaticECC = 1; /* server can turn on by loading key */
+    }
+#elif defined(HAVE_ED25519) || defined(HAVE_ED448)
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        ssl->options.haveECDSAsig  = 1; /* always on client side */
+        ssl->options.haveECC  = 1;      /* server turns on with ECC key cert */
+    }
+#endif
+
+#if defined(HAVE_EXTENDED_MASTER) && !defined(NO_WOLFSSL_CLIENT)
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        if ((ssl->ctx->method->version.major == SSLv3_MAJOR) &&
+             (ssl->ctx->method->version.minor >= TLSv1_MINOR)) {
+            ssl->options.haveEMS = 1;
+        }
+    #ifdef WOLFSSL_DTLS
+        if (ssl->ctx->method->version.major == DTLS_MAJOR)
+            ssl->options.haveEMS = 1;
+    #endif /* WOLFSSL_DTLS */
+    }
+#endif /* HAVE_EXTENDED_MASTER && !NO_WOLFSSL_CLIENT */
+
+#if defined(WOLFSSL_DTLS) && !defined(NO_WOLFSSL_SERVER)
+    if (ssl->options.dtls && ssl->options.side == WOLFSSL_SERVER_END) {
+        int ret;
+        ret = wolfSSL_DTLS_SetCookieSecret(ssl, NULL, 0);
+        if (ret != 0) {
+            WOLFSSL_MSG("DTLS Cookie Secret error");
+            return ret;
+        }
+    }
+#endif /* WOLFSSL_DTLS && !NO_WOLFSSL_SERVER */
+
+    return InitSSL_Suites(ssl);
+}
+#endif /* OPENSSL_EXTRA || WOLFSSL_EITHER_SIDE */
 
 /* Initialize SSL context, return 0 on success */
 int InitSSL_Ctx(WOLFSSL_CTX* ctx, WOLFSSL_METHOD* method, void* heap)
@@ -1380,8 +1670,10 @@
     ctx->minEccKeySz  = MIN_ECCKEY_SZ;
     ctx->eccTempKeySz = ECDHE_SIZE;
 #endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+    ctx->verifyDepth = MAX_CHAIN_DEPTH;
+#endif
 #ifdef OPENSSL_EXTRA
-    ctx->verifyDepth = MAX_CHAIN_DEPTH;
     ctx->cbioFlag = WOLFSSL_CBIO_NONE;
 #endif
 
@@ -1398,6 +1690,15 @@
                 #error Micrium port does not support DTLS session export yet
             #endif
         #endif
+    #elif defined WOLFSSL_UIP
+        ctx->CBIORecv = uIPReceive;
+        ctx->CBIOSend = uIPSend;
+        #ifdef WOLFSSL_DTLS
+        if (method->version.major == DTLS_MAJOR) {
+            ctx->CBIOSendTo = uIPSendTo;
+            ctx->CBIORecvFrom = uIPRecvFrom;
+        }
+        #endif
     #else
         ctx->CBIORecv = EmbedReceive;
         ctx->CBIOSend = EmbedSend;
@@ -1417,6 +1718,12 @@
 #ifdef HAVE_NETX
     ctx->CBIORecv = NetX_Receive;
     ctx->CBIOSend = NetX_Send;
+#elif defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+    ctx->CBIORecv = Mynewt_Receive;
+    ctx->CBIOSend = Mynewt_Send;
+#elif defined(WOLFSSL_GNRC)
+    ctx->CBIORecv = GNRC_ReceiveFrom;
+    ctx->CBIOSend = GNRC_SendTo;
 #endif
 
 #ifdef HAVE_NTRU
@@ -1430,7 +1737,7 @@
         ctx->haveECC  = 1;             /* server turns on with ECC key cert */
         ctx->haveStaticECC = 1;        /* server can turn on by loading key */
     }
-#elif defined(HAVE_ED25519)
+#elif defined(HAVE_ED25519) || defined(HAVE_ED448)
     if (method->side == WOLFSSL_CLIENT_END) {
         ctx->haveECDSAsig  = 1;        /* always on client side */
         ctx->haveECC  = 1;             /* server turns on with ECC key cert */
@@ -1439,8 +1746,12 @@
 
     ctx->devId = INVALID_DEVID;
 
-#if defined(WOLFSSL_DTLS) && defined(WOLFSSL_SCTP)
-    ctx->dtlsMtuSz = MAX_RECORD_SIZE;
+#if defined(WOLFSSL_DTLS)
+    #ifdef WOLFSSL_SCTP
+        ctx->dtlsMtuSz = MAX_RECORD_SIZE;
+    #elif defined(WOLFSSL_DTLS_MTU)
+        ctx->dtlsMtuSz = MAX_MTU;
+    #endif
 #endif
 
 #ifndef NO_CERTS
@@ -1491,7 +1802,8 @@
 /* In case contexts are held in array and don't want to free actual ctx */
 void SSL_CtxResourceFree(WOLFSSL_CTX* ctx)
 {
-#ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+#if defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2) && \
+                     defined(HAVE_TLS_EXTENSIONS) && !defined(NO_WOLFSSL_SERVER)
     int i;
 #endif
 
@@ -1499,19 +1811,34 @@
     wolfEventQueue_Free(&ctx->event_queue);
 #endif /* HAVE_WOLF_EVENT */
 
+#ifdef WOLFSSL_STATIC_MEMORY
+    if (ctx->onHeap == 1) {
+        XFREE(ctx->method, ctx->heap, DYNAMIC_TYPE_METHOD);
+    }
+    else {
+        XFREE(ctx->method, NULL, DYNAMIC_TYPE_METHOD);
+    }
+#else
     XFREE(ctx->method, ctx->heap, DYNAMIC_TYPE_METHOD);
-    if (ctx->suites)
+#endif
+    ctx->method = NULL;
+    if (ctx->suites) {
         XFREE(ctx->suites, ctx->heap, DYNAMIC_TYPE_SUITES);
+        ctx->suites = NULL;
+    }
 
 #ifndef NO_DH
     XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    ctx->serverDH_G.buffer = NULL;
     XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    ctx->serverDH_P.buffer = NULL;
 #endif /* !NO_DH */
 
 #ifdef SINGLE_THREADED
     if (ctx->rng) {
         wc_FreeRng(ctx->rng);
         XFREE(ctx->rng, ctx->heap, DYNAMIC_TYPE_RNG);
+        ctx->rng = NULL;
     }
 #endif /* SINGLE_THREADED */
 
@@ -1522,12 +1849,14 @@
         if (ctx->ourCert && ctx->ownOurCert) {
             FreeX509(ctx->ourCert);
             XFREE(ctx->ourCert, ctx->heap, DYNAMIC_TYPE_X509);
+            ctx->ourCert = NULL;
         }
     #endif /* KEEP_OUR_CERT */
     FreeDer(&ctx->certChain);
     wolfSSL_CertManagerFree(ctx->cm);
+    ctx->cm = NULL;
     #ifdef OPENSSL_EXTRA
-	/* ctx->cm was free'd so cm of x509 store should now be NULL */
+    /* ctx->cm was free'd so cm of x509 store should now be NULL */
         if (ctx->x509_store_pt != NULL) {
             ctx->x509_store_pt->cm = NULL;
         }
@@ -1566,6 +1895,7 @@
         if (ctx->chainOcspRequest[i]) {
             FreeOcspRequest(ctx->chainOcspRequest[i]);
             XFREE(ctx->chainOcspRequest[i], ctx->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+            ctx->chainOcspRequest[i] = NULL;
         }
     }
 #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
@@ -1573,13 +1903,15 @@
 
 #endif /* HAVE_TLS_EXTENSIONS */
 #ifdef OPENSSL_EXTRA
-    if(ctx->alpn_cli_protos)
+    if(ctx->alpn_cli_protos) {
         XFREE((void *)ctx->alpn_cli_protos, NULL, DYNAMIC_TYPE_OPENSSL);
+        ctx->alpn_cli_protos = NULL;
+    }
 #endif
 #ifdef WOLFSSL_STATIC_MEMORY
     if (ctx->heap != NULL) {
 #ifdef WOLFSSL_HEAP_TEST
-        /* avoid derefrencing a test value */
+        /* avoid dereferencing a test value */
         if (ctx->heap != (void*)WOLFSSL_HEAP_TEST)
 #endif
         {
@@ -1593,11 +1925,10 @@
 
 void FreeSSL_Ctx(WOLFSSL_CTX* ctx)
 {
-    int doFree = 0;
-
-    if (wc_LockMutex(&ctx->countMutex) != 0) {
-        WOLFSSL_MSG("Couldn't lock count mutex");
-
+    int refCount;
+
+    /* decrement CTX reference count */
+    if ((refCount = SSL_CTX_RefCount(ctx, -1)) < 0) {
         /* check error state, if mutex error code then mutex init failed but
          * CTX was still malloc'd */
         if (ctx->err == CTX_INIT_MUTEX_E) {
@@ -1606,16 +1937,19 @@
         }
         return;
     }
-    ctx->refCount--;
-    if (ctx->refCount == 0)
-        doFree = 1;
-    wc_UnLockMutex(&ctx->countMutex);
-
-    if (doFree) {
+
+    if (refCount == 0) {
+        void* heap = ctx->heap;
         WOLFSSL_MSG("CTX ref count down to 0, doing full free");
         SSL_CtxResourceFree(ctx);
         wc_FreeMutex(&ctx->countMutex);
-        XFREE(ctx, ctx->heap, DYNAMIC_TYPE_CTX);
+#ifdef WOLFSSL_STATIC_MEMORY
+        if (ctx->onHeap == 0) {
+            heap = NULL;
+        }
+#endif
+        XFREE(ctx, heap, DYNAMIC_TYPE_CTX);
+        (void)heap; /* not used in some builds */
     }
     else {
         (void)ctx;
@@ -1655,7 +1989,7 @@
     ssl->encrypt.chacha = NULL;
     ssl->decrypt.chacha = NULL;
 #endif
-#ifdef HAVE_POLY1305
+#if defined(HAVE_POLY1305) && defined(HAVE_ONE_TIME_AUTH)
     ssl->auth.poly1305 = NULL;
 #endif
     ssl->encrypt.setup = 0;
@@ -1686,18 +2020,23 @@
     XFREE(ssl->encrypt.des3, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.des3, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
-#ifdef BUILD_AES
+#if defined(BUILD_AES) || defined(BUILD_AESGCM) /* See: InitKeys() in keys.c
+                                                 * on addition of BUILD_AESGCM
+                                                 * check (enc->aes, dec->aes) */
     wc_AesFree(ssl->encrypt.aes);
     wc_AesFree(ssl->decrypt.aes);
-    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+    #if (defined(BUILD_AESGCM) || defined(HAVE_AESCCM)) && \
+                                                      !defined(WOLFSSL_NO_TLS12)
         XFREE(ssl->decrypt.additional, ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
-        XFREE(ssl->decrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
         XFREE(ssl->encrypt.additional, ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
-        XFREE(ssl->encrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
     #endif
     XFREE(ssl->encrypt.aes, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.aes, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
+#ifdef CIPHER_NONCE
+    XFREE(ssl->decrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
+    XFREE(ssl->encrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
+#endif
 #ifdef HAVE_CAMELLIA
     XFREE(ssl->encrypt.cam, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.cam, ssl->heap, DYNAMIC_TYPE_CIPHER);
@@ -1714,13 +2053,19 @@
     XFREE(ssl->encrypt.chacha, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.chacha, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
-#ifdef HAVE_POLY1305
+#if defined(HAVE_POLY1305) && defined(HAVE_ONE_TIME_AUTH)
     XFREE(ssl->auth.poly1305, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
 #ifdef HAVE_IDEA
     XFREE(ssl->encrypt.idea, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.idea, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
+#if defined(WOLFSSL_TLS13) && defined(HAVE_NULL_CIPHER)
+    wc_HmacFree(ssl->encrypt.hmac);
+    wc_HmacFree(ssl->decrypt.hmac);
+    XFREE(ssl->encrypt.hmac, ssl->heap, DYNAMIC_TYPE_CIPHER);
+    XFREE(ssl->decrypt.hmac, ssl->heap, DYNAMIC_TYPE_CIPHER);
+#endif
 }
 
 
@@ -1735,88 +2080,158 @@
     cs->sig_algo              = INVALID_BYTE;
 }
 
+#if defined(USE_ECDSA_KEYSZ_HASH_ALGO) || (defined(WOLFSSL_TLS13) && \
+                                                              defined(HAVE_ECC))
+static int GetMacDigestSize(byte macAlgo)
+{
+    switch (macAlgo) {
+    #ifndef NO_SHA
+        case sha_mac:
+            return WC_SHA_DIGEST_SIZE;
+    #endif
+    #ifndef NO_SHA256
+        case sha256_mac:
+            return WC_SHA256_DIGEST_SIZE;
+    #endif
+    #ifdef WOLFSSL_SHA384
+        case sha384_mac:
+            return WC_SHA384_DIGEST_SIZE;
+    #endif
+    #ifdef WOLFSSL_SHA512
+        case sha512_mac:
+            return WC_SHA512_DIGEST_SIZE;
+    #endif
+        default:
+            break;
+    }
+    return NOT_COMPILED_IN;
+}
+#endif /* USE_ECDSA_KEYSZ_HASH_ALGO */
+
+static WC_INLINE void AddSuiteHashSigAlgo(Suites* suites, byte macAlgo, byte sigAlgo,
+    int keySz, word16* inOutIdx)
+{
+    int addSigAlgo = 1;
+
+#ifdef USE_ECDSA_KEYSZ_HASH_ALGO
+    if (sigAlgo == ecc_dsa_sa_algo) {
+        int digestSz = GetMacDigestSize(macAlgo);
+        /* do not add sig/algos with digest size larger than key size */
+        if (digestSz <= 0 || (keySz > 0 && digestSz > keySz)) {
+            addSigAlgo = 0;
+        }
+    }
+#else
+    (void)keySz;
+#endif /* USE_ECDSA_KEYSZ_HASH_ALGO */
+
+    if (addSigAlgo) {
+#ifdef WC_RSA_PSS
+        if (sigAlgo == rsa_pss_sa_algo) {
+            /* RSA PSS is sig then mac */
+            suites->hashSigAlgo[*inOutIdx] = sigAlgo;
+            *inOutIdx += 1;
+            suites->hashSigAlgo[*inOutIdx] = macAlgo;
+            *inOutIdx += 1;
+    #ifdef WOLFSSL_TLS13
+            /* Add the certificate algorithm as well */
+            suites->hashSigAlgo[*inOutIdx] = sigAlgo;
+            *inOutIdx += 1;
+            suites->hashSigAlgo[*inOutIdx] = PSS_RSAE_TO_PSS_PSS(macAlgo);
+            *inOutIdx += 1;
+    #endif
+        }
+        else
+#endif
+        {
+            suites->hashSigAlgo[*inOutIdx] = macAlgo;
+            *inOutIdx += 1;
+            suites->hashSigAlgo[*inOutIdx] = sigAlgo;
+            *inOutIdx += 1;
+        }
+    }
+}
+
 void InitSuitesHashSigAlgo(Suites* suites, int haveECDSAsig, int haveRSAsig,
                            int haveAnon, int tls1_2, int keySz)
 {
-    int idx = 0;
+    word16 idx = 0;
 
     (void)tls1_2;
     (void)keySz;
 
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     if (haveECDSAsig) {
-    #ifdef HAVE_ECC
-        #ifdef WOLFSSL_SHA512
-            suites->hashSigAlgo[idx++] = sha512_mac;
-            suites->hashSigAlgo[idx++] = ecc_dsa_sa_algo;
-        #endif
-        #ifdef WOLFSSL_SHA384
-            suites->hashSigAlgo[idx++] = sha384_mac;
-            suites->hashSigAlgo[idx++] = ecc_dsa_sa_algo;
-        #endif
-        #ifndef NO_SHA256
-            suites->hashSigAlgo[idx++] = sha256_mac;
-            suites->hashSigAlgo[idx++] = ecc_dsa_sa_algo;
-        #endif
-        #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
-                                                defined(WOLFSSL_ALLOW_TLS_SHA1))
-            suites->hashSigAlgo[idx++] = sha_mac;
-            suites->hashSigAlgo[idx++] = ecc_dsa_sa_algo;
-        #endif
-    #endif
-        #ifdef HAVE_ED25519
-            suites->hashSigAlgo[idx++] = ED25519_SA_MAJOR;
-            suites->hashSigAlgo[idx++] = ED25519_SA_MINOR;
-        #endif
-    }
-#endif /* HAVE_ECC || HAVE_ED25519 */
+#ifdef HAVE_ECC
+    #ifdef WOLFSSL_SHA512
+        AddSuiteHashSigAlgo(suites, sha512_mac, ecc_dsa_sa_algo, keySz, &idx);
+    #endif
+    #ifdef WOLFSSL_SHA384
+        AddSuiteHashSigAlgo(suites, sha384_mac, ecc_dsa_sa_algo, keySz, &idx);
+    #endif
+    #ifndef NO_SHA256
+        AddSuiteHashSigAlgo(suites, sha256_mac, ecc_dsa_sa_algo, keySz, &idx);
+    #endif
+    #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
+                                            defined(WOLFSSL_ALLOW_TLS_SHA1))
+        AddSuiteHashSigAlgo(suites, sha_mac, ecc_dsa_sa_algo, keySz, &idx);
+    #endif
+#endif
+    #ifdef HAVE_ED25519
+        AddSuiteHashSigAlgo(suites, ED25519_SA_MAJOR, ED25519_SA_MINOR, keySz,
+                                                                          &idx);
+    #endif
+    #ifdef HAVE_ED448
+        AddSuiteHashSigAlgo(suites, ED448_SA_MAJOR, ED448_SA_MINOR, keySz,
+                                                                          &idx);
+    #endif
+    }
+#endif /* HAVE_ECC || HAVE_ED25519 || defined(HAVE_ED448 */
 
     if (haveRSAsig) {
-        #ifdef WC_RSA_PSS
-            if (tls1_2) {
-            #ifdef WOLFSSL_SHA512
-                suites->hashSigAlgo[idx++] = rsa_pss_sa_algo;
-                suites->hashSigAlgo[idx++] = sha512_mac;
-            #endif
-            #ifdef WOLFSSL_SHA384
-                suites->hashSigAlgo[idx++] = rsa_pss_sa_algo;
-                suites->hashSigAlgo[idx++] = sha384_mac;
-            #endif
-            #ifndef NO_SHA256
-                suites->hashSigAlgo[idx++] = rsa_pss_sa_algo;
-                suites->hashSigAlgo[idx++] = sha256_mac;
-            #endif
-            }
-        #endif
+    #ifdef WC_RSA_PSS
+        if (tls1_2) {
         #ifdef WOLFSSL_SHA512
-            suites->hashSigAlgo[idx++] = sha512_mac;
-            suites->hashSigAlgo[idx++] = rsa_sa_algo;
+            AddSuiteHashSigAlgo(suites, sha512_mac, rsa_pss_sa_algo, keySz,
+                                                                          &idx);
         #endif
         #ifdef WOLFSSL_SHA384
-            suites->hashSigAlgo[idx++] = sha384_mac;
-            suites->hashSigAlgo[idx++] = rsa_sa_algo;
+            AddSuiteHashSigAlgo(suites, sha384_mac, rsa_pss_sa_algo, keySz,
+                                                                          &idx);
         #endif
         #ifndef NO_SHA256
-            suites->hashSigAlgo[idx++] = sha256_mac;
-            suites->hashSigAlgo[idx++] = rsa_sa_algo;
-        #endif
-        #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
-                                                defined(WOLFSSL_ALLOW_TLS_SHA1))
-            suites->hashSigAlgo[idx++] = sha_mac;
-            suites->hashSigAlgo[idx++] = rsa_sa_algo;
-        #endif
+            AddSuiteHashSigAlgo(suites, sha256_mac, rsa_pss_sa_algo, keySz,
+                                                                          &idx);
+        #endif
+        }
+    #endif
+    #ifdef WOLFSSL_SHA512
+        AddSuiteHashSigAlgo(suites, sha512_mac, rsa_sa_algo, keySz, &idx);
+    #endif
+    #ifdef WOLFSSL_SHA384
+        AddSuiteHashSigAlgo(suites, sha384_mac, rsa_sa_algo, keySz, &idx);
+    #endif
+    #ifndef NO_SHA256
+        AddSuiteHashSigAlgo(suites, sha256_mac, rsa_sa_algo, keySz, &idx);
+    #endif
+    #ifdef WOLFSSL_SHA224
+        AddSuiteHashSigAlgo(suites, sha224_mac, rsa_sa_algo, keySz, &idx);
+    #endif
+    #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
+                                            defined(WOLFSSL_ALLOW_TLS_SHA1))
+        AddSuiteHashSigAlgo(suites, sha_mac, rsa_sa_algo, keySz, &idx);
+    #endif
     }
 
 #ifdef HAVE_ANON
     if (haveAnon) {
-            suites->hashSigAlgo[idx++] = sha_mac;
-            suites->hashSigAlgo[idx++] = anonymous_sa_algo;
+        AddSuiteHashSigAlgo(suites, sha_mac, anonymous_sa_algo, keySz, &idx);
     }
 #endif
 
     (void)haveAnon;
     (void)haveECDSAsig;
-    suites->hashSigAlgoSz = (word16)idx;
+    suites->hashSigAlgoSz = idx;
 }
 
 void InitSuites(Suites* suites, ProtocolVersion pv, int keySz, word16 haveRSA,
@@ -1888,11 +2303,27 @@
         suites->suites[idx++] = TLS_AES_128_CCM_8_SHA256;
     }
 #endif
+
+#ifdef HAVE_NULL_CIPHER
+    #ifdef BUILD_TLS_SHA256_SHA256
+        if (tls1_3) {
+            suites->suites[idx++] = ECC_BYTE;
+            suites->suites[idx++] = TLS_SHA256_SHA256;
+        }
+    #endif
+
+    #ifdef BUILD_TLS_SHA384_SHA384
+        if (tls1_3) {
+            suites->suites[idx++] = ECC_BYTE;
+            suites->suites[idx++] = TLS_SHA384_SHA384;
+        }
+    #endif
+#endif
 #endif /* WOLFSSL_TLS13 */
 
 #ifndef WOLFSSL_NO_TLS12
 
-#ifndef NO_WOLFSSL_SERVER
+#if !defined(NO_WOLFSSL_SERVER) && !defined(NO_RSA)
     if (side == WOLFSSL_SERVER_END && haveStaticECC) {
         haveRSA = 0;   /* can't do RSA with ECDSA key */
     }
@@ -1906,7 +2337,7 @@
     if (pv.major == DTLS_MAJOR) {
         dtls   = 1;
         tls    = 1;
-        /* May be dead assignments dependant upon configuration */
+        /* May be dead assignments dependent upon configuration */
         (void) dtls;
         (void) tls;
         tls1_2 = pv.minor <= DTLSv1_2_MINOR;
@@ -2316,14 +2747,24 @@
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256
-    if (tls && haveDH && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && haveRSA)
+#else
+    if (tls && haveDH && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_AES_256_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256
-    if (tls && haveDH && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && haveRSA)
+#else
+    if (tls && haveDH && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_AES_128_CBC_SHA256;
     }
@@ -2354,14 +2795,24 @@
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_SHA256
-    if (tls && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveRSA)
+#else
+    if (tls && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_RSA_WITH_AES_256_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_SHA256
-    if (tls && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveRSA)
+#else
+    if (tls && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_RSA_WITH_AES_128_CBC_SHA256;
     }
@@ -2410,6 +2861,13 @@
     }
 #endif
 
+#ifdef BUILD_TLS_RSA_WITH_NULL_MD5
+    if (tls && haveRSA) {
+        suites->suites[idx++] = CIPHER_BYTE;
+        suites->suites[idx++] = TLS_RSA_WITH_NULL_MD5;
+    }
+#endif
+
 #ifdef BUILD_TLS_RSA_WITH_NULL_SHA
     if (tls && haveRSA) {
         suites->suites[idx++] = CIPHER_BYTE;
@@ -2418,7 +2876,12 @@
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_NULL_SHA256
-    if (tls && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveRSA)
+#else
+    if (tls && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_RSA_WITH_NULL_SHA256;
     }
@@ -2432,28 +2895,48 @@
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384
-    if (tls && haveDH && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && havePSK)
+#else
+    if (tls && haveDH && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_PSK_WITH_AES_256_CBC_SHA384;
     }
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_256_CBC_SHA384
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_PSK_WITH_AES_256_CBC_SHA384;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256
-    if (tls && haveDH && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && havePSK)
+#else
+    if (tls && haveDH && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_PSK_WITH_AES_128_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_128_CBC_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls1 && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_PSK_WITH_AES_128_CBC_SHA256;
     }
@@ -2481,28 +2964,48 @@
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_CHACHA20_POLY1305_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = CHACHA_BYTE;
         suites->suites[idx++] = TLS_PSK_WITH_CHACHA20_POLY1305_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = CHACHA_BYTE;
         suites->suites[idx++] = TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = CHACHA_BYTE;
         suites->suites[idx++] = TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = ECC_BYTE;
         suites->suites[idx++] = TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256;
     }
@@ -2537,35 +3040,60 @@
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_NULL_SHA384
-    if (tls && haveDH && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && havePSK)
+#else
+    if (tls && haveDH && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_PSK_WITH_NULL_SHA384;
     }
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_NULL_SHA384
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_PSK_WITH_NULL_SHA384;
     }
 #endif
 
 #ifdef BUILD_TLS_ECDHE_PSK_WITH_NULL_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = ECC_BYTE;
         suites->suites[idx++] = TLS_ECDHE_PSK_WITH_NULL_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_NULL_SHA256
-    if (tls && haveDH && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && havePSK)
+#else
+    if (tls && haveDH && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_PSK_WITH_NULL_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_NULL_SHA256
-    if (tls && havePSK) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && havePSK)
+#else
+    if (tls && havePSK)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_PSK_WITH_NULL_SHA256;
     }
@@ -2613,27 +3141,6 @@
     }
 #endif
 
-#ifdef BUILD_TLS_RSA_WITH_HC_128_B2B256
-    if (!dtls && tls && haveRSA) {
-        suites->suites[idx++] = CIPHER_BYTE;
-        suites->suites[idx++] = TLS_RSA_WITH_HC_128_B2B256;
-    }
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_B2B256
-    if (tls && haveRSA) {
-        suites->suites[idx++] = CIPHER_BYTE;
-        suites->suites[idx++] = TLS_RSA_WITH_AES_128_CBC_B2B256;
-    }
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_B2B256
-    if (tls && haveRSA) {
-        suites->suites[idx++] = CIPHER_BYTE;
-        suites->suites[idx++] = TLS_RSA_WITH_AES_256_CBC_B2B256;
-    }
-#endif
-
 #ifdef BUILD_TLS_RSA_WITH_RABBIT_SHA
     if (!dtls && tls && haveRSA) {
         suites->suites[idx++] = CIPHER_BYTE;
@@ -2670,28 +3177,48 @@
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256
-    if (tls && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveRSA)
+#else
+    if (tls && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256
-    if (tls && haveDH && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && haveRSA)
+#else
+    if (tls && haveDH && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256
-    if (tls && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveRSA)
+#else
+    if (tls && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256
-    if (tls && haveDH && haveRSA) {
+#ifndef WOLFSSL_OLDTLS_SHA2_CIPHERSUITES
+    if (tls1_2 && haveDH && haveRSA)
+#else
+    if (tls && haveDH && haveRSA)
+#endif
+    {
         suites->suites[idx++] = CIPHER_BYTE;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256;
     }
@@ -2708,7 +3235,8 @@
 
     suites->suiteSz = idx;
 
-    InitSuitesHashSigAlgo(suites, haveECDSAsig, haveRSAsig, 0, tls1_2, keySz);
+    InitSuitesHashSigAlgo(suites, haveECDSAsig | haveECC, haveRSAsig | haveRSA,
+                                                              0, tls1_2, keySz);
 }
 
 #if !defined(NO_WOLFSSL_SERVER) || !defined(NO_CERTS) || \
@@ -2724,13 +3252,6 @@
 {
     switch (input[0]) {
         case NEW_SA_MAJOR:
-    #ifdef WC_RSA_PSS
-            /* PSS signatures: 0x080[4-6] */
-            if (input[1] <= sha512_mac) {
-                *hsType   = input[0];
-                *hashAlgo = input[1];
-            }
-    #endif
     #ifdef HAVE_ED25519
             /* ED25519: 0x0807 */
             if (input[1] == ED25519_SA_MINOR) {
@@ -2738,8 +3259,29 @@
                 /* Hash performed as part of sign/verify operation. */
                 *hashAlgo = sha512_mac;
             }
-    #endif
+            else
+    #endif
+    #ifdef HAVE_ED448
             /* ED448: 0x0808 */
+            if (input[1] == ED448_SA_MINOR) {
+                *hsType = ed448_sa_algo;
+                /* Hash performed as part of sign/verify operation. */
+                *hashAlgo = sha512_mac;
+            }
+            else
+    #endif
+    #ifdef WC_RSA_PSS
+            /* PSS PSS signatures: 0x080[9-b] */
+            if (input[1] >= pss_sha256 && input[1] <= pss_sha512) {
+                *hsType   = rsa_pss_pss_algo;
+                *hashAlgo = PSS_PSS_HASH_TO_MAC(input[1]);
+            }
+            else
+    #endif
+            {
+                *hsType   = input[0];
+                *hashAlgo = input[1];
+            }
             break;
         default:
             *hashAlgo = input[0];
@@ -2752,7 +3294,7 @@
 #ifndef WOLFSSL_NO_TLS12
 #if !defined(NO_WOLFSSL_SERVER) || !defined(NO_WOLFSSL_CLIENT)
 #if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
-                                       (!defined(NO_RSA) && defined(WC_RSA_PSS))
+             defined(HAVE_CURVE448) || (!defined(NO_RSA) && defined(WC_RSA_PSS))
 
 static enum wc_HashType HashAlgoToType(int hashAlgo)
 {
@@ -2769,6 +3311,10 @@
         case sha256_mac:
             return WC_HASH_TYPE_SHA256;
     #endif
+    #ifdef WOLFSSL_SHA224
+        case sha224_mac:
+            return WC_HASH_TYPE_SHA224;
+    #endif
     #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
                              defined(WOLFSSL_ALLOW_TLS_SHA1))
         case sha_mac:
@@ -2794,6 +3340,7 @@
     if (name != NULL) {
         name->name        = name->staticName;
         name->dynamicName = 0;
+        name->sz = 0;
 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
         XMEMSET(&name->fullName, 0, sizeof(DecodedName));
         XMEMSET(&name->cnEntry,  0, sizeof(WOLFSSL_X509_NAME_ENTRY));
@@ -2809,8 +3356,10 @@
 void FreeX509Name(WOLFSSL_X509_NAME* name, void* heap)
 {
     if (name != NULL) {
-        if (name->dynamicName)
+        if (name->dynamicName) {
             XFREE(name->name, heap, DYNAMIC_TYPE_SUBJECT_CN);
+            name->name = NULL;
+        }
 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
         {
             int i;
@@ -2824,6 +3373,7 @@
                     XFREE(name->extra[i].data.data, heap, DYNAMIC_TYPE_OPENSSL);
                 }
             }
+            wolfSSL_ASN1_OBJECT_free(&name->cnEntry.object);
         }
 #endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
     }
@@ -2845,6 +3395,10 @@
     InitX509Name(&x509->issuer, 0);
     InitX509Name(&x509->subject, 0);
     x509->dynamicMemory  = (byte)dynamicFlag;
+    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+        x509->refCount = 1;
+        (void)wc_InitMutex(&x509->refMutex);
+    #endif
 }
 
 
@@ -2856,26 +3410,68 @@
 
     FreeX509Name(&x509->issuer, x509->heap);
     FreeX509Name(&x509->subject, x509->heap);
-    if (x509->pubKey.buffer)
+    if (x509->pubKey.buffer) {
         XFREE(x509->pubKey.buffer, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+        x509->pubKey.buffer = NULL;
+    }
     FreeDer(&x509->derCert);
     XFREE(x509->sig.buffer, x509->heap, DYNAMIC_TYPE_SIGNATURE);
+    x509->sig.buffer = NULL;
     #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
         XFREE(x509->authKeyId, x509->heap, DYNAMIC_TYPE_X509_EXT);
+        x509->authKeyId = NULL;
         XFREE(x509->subjKeyId, x509->heap, DYNAMIC_TYPE_X509_EXT);
+        x509->subjKeyId = NULL;
         if (x509->authInfo != NULL) {
             XFREE(x509->authInfo, x509->heap, DYNAMIC_TYPE_X509_EXT);
-        }
+            x509->authInfo = NULL;
+        }
+        #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+        if (x509->authInfoCaIssuer != NULL) {
+            XFREE(x509->authInfoCaIssuer, x509->heap, DYNAMIC_TYPE_X509_EXT);
+        }
+        if (x509->ext_sk != NULL) {
+            wolfSSL_sk_X509_EXTENSION_free(x509->ext_sk);
+        }
+        #endif /* OPENSSL_ALL || WOLFSSL_QT */
+        #ifdef OPENSSL_EXTRA
+        /* Free serialNumber that was set by wolfSSL_X509_get_serialNumber */
+        if (x509->serialNumber != NULL) {
+            wolfSSL_ASN1_INTEGER_free(x509->serialNumber);
+        }
+        #endif
         if (x509->extKeyUsageSrc != NULL) {
             XFREE(x509->extKeyUsageSrc, x509->heap, DYNAMIC_TYPE_X509_EXT);
+            x509->extKeyUsageSrc= NULL;
         }
     #endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
-    if (x509->altNames)
+    #if defined(OPENSSL_ALL)
+        if (x509->algor.algorithm) {
+            wolfSSL_ASN1_OBJECT_free(x509->algor.algorithm);
+            x509->algor.algorithm = NULL;
+        }
+        if (x509->key.algor) {
+            wolfSSL_X509_ALGOR_free(x509->key.algor);
+            x509->key.algor = NULL;
+        }
+        if (x509->key.pkey) {
+            wolfSSL_EVP_PKEY_free(x509->key.pkey);
+            x509->key.pkey = NULL;
+        }
+    #endif /* OPENSSL_ALL */
+    if (x509->altNames) {
         FreeAltNames(x509->altNames, x509->heap);
+        x509->altNames = NULL;
+    }
+
+    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+        wc_FreeMutex(&x509->refMutex);
+    #endif
 }
 
 
 #if !defined(NO_WOLFSSL_SERVER) || !defined(NO_WOLFSSL_CLIENT)
+#if !defined(WOLFSSL_NO_TLS12)
 /* Encode the signature algorithm into buffer.
  *
  * hashalgo  The hash algorithm.
@@ -2898,6 +3494,13 @@
             (void)hashAlgo;
             break;
 #endif
+#ifdef HAVE_ED448
+        case ed448_sa_algo:
+            output[0] = ED448_SA_MAJOR;
+            output[1] = ED448_SA_MINOR;
+            (void)hashAlgo;
+            break;
+#endif
 #ifndef NO_RSA
         case rsa_sa_algo:
             output[0] = hashAlgo;
@@ -2911,11 +3514,11 @@
             break;
     #endif
 #endif
-        /* ED448: 0x0808 */
     }
     (void)hashAlgo;
     (void)output;
 }
+#endif
 
 #if !defined(WOLFSSL_NO_TLS12) && !defined(WOLFSSL_NO_CLIENT_AUTH)
 static void SetDigest(WOLFSSL* ssl, int hashAlgo)
@@ -2951,9 +3554,24 @@
 #endif /* !NO_WOLFSSL_SERVER || !NO_WOLFSSL_CLIENT */
 #endif /* !NO_CERTS */
 
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+static word32 MacSize(WOLFSSL* ssl)
+{
+#ifdef HAVE_TRUNCATED_HMAC
+    word32 digestSz = ssl->truncated_hmac ? (byte)TRUNCATED_HMAC_SZ
+                                          : ssl->specs.hash_size;
+#else
+    word32 digestSz = ssl->specs.hash_size;
+#endif
+
+    return digestSz;
+}
+#endif /* HAVE_ENCRYPT_THEN_MAC && !WOLFSSL_AEAD_ONLY */
+
 #ifndef NO_RSA
 #ifndef WOLFSSL_NO_TLS12
-#if !defined(NO_WOLFSSL_SERVER) || !defined(NO_WOLFSSL_CLIENT)
+#if !defined(NO_WOLFSSL_SERVER) || (!defined(NO_WOLFSSL_CLIENT) && \
+                                               !defined(WOLFSSL_NO_CLIENT_AUTH))
 static int TypeHash(int hashAlgo)
 {
     switch (hashAlgo) {
@@ -2969,6 +3587,10 @@
         case sha256_mac:
             return SHA256h;
     #endif
+    #ifdef WOLFSSL_SHA224
+        case sha224_mac:
+            return SHA224h;
+    #endif
     #ifndef NO_SHA
         case sha_mac:
             return SHAh;
@@ -3013,6 +3635,7 @@
 }
 #endif
 
+#if !defined(NO_WOLFSSL_SERVER) || !defined(WOLFSSL_NO_CLIENT_AUTH)
 int RsaSign(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out,
             word32* outSz, int sigAlgo, int hashAlgo, RsaKey* key,
             DerBuffer* keyBufInfo)
@@ -3037,9 +3660,11 @@
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     /* initialize event */
-    ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
-    if (ret != 0)
-        return ret;
+    if (key) {
+        ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        if (ret != 0)
+            return ret;
+    }
 #endif
 
 #if defined(WC_RSA_PSS)
@@ -3079,7 +3704,7 @@
 
     /* Handle async pending response */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ret == WC_PENDING_E) {
+    if (key && ret == WC_PENDING_E) {
         ret = wolfSSL_AsyncPush(ssl, &key->asyncDev);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
@@ -3094,6 +3719,7 @@
 
     return ret;
 }
+#endif
 
 int RsaVerify(WOLFSSL* ssl, byte* in, word32 inSz, byte** out, int sigAlgo,
               int hashAlgo, RsaKey* key, buffer* keyBufInfo)
@@ -3229,19 +3855,32 @@
             ret = ssl->ctx->RsaPssSignCheckCb(ssl, verifySig, sigSz, &out,
                                            TypeHash(hashAlgo), mgf,
                                            keyBuf, keySz, ctx);
+            if (ret > 0) {
+                ret = wc_RsaPSS_CheckPadding(plain, plainSz, out, ret,
+                                             hashType);
+                if (ret != 0)
+                    ret = VERIFY_CERT_ERROR;
+            }
         }
         else
     #endif /* HAVE_PK_CALLBACKS */
         {
             ret = wc_RsaPSS_VerifyInline(verifySig, sigSz, &out, hashType, mgf,
                                          key);
-        }
-
-        if (ret > 0) {
-            ret = wc_RsaPSS_CheckPadding(plain, plainSz, out, ret, hashType);
-            if (ret != 0)
-                ret = VERIFY_CERT_ERROR;
-        }
+            if (ret > 0) {
+    #ifdef HAVE_SELFTEST
+                ret = wc_RsaPSS_CheckPadding(plain, plainSz, out, ret,
+                                             hashType);
+    #else
+                ret = wc_RsaPSS_CheckPadding_ex(plain, plainSz, out, ret,
+                                                hashType, -1,
+                                                mp_count_bits(&key->n));
+    #endif
+                if (ret != 0)
+                    ret = VERIFY_CERT_ERROR;
+            }
+        }
+
     }
     else
 #endif /* WC_RSA_PSS */
@@ -3288,6 +3927,7 @@
 
 #ifndef WOLFSSL_NO_TLS12
 
+#if !defined(NO_WOLFSSL_SERVER) || !defined(WOLFSSL_NO_CLIENT_AUTH)
 int RsaDec(WOLFSSL* ssl, byte* in, word32 inSz, byte** out, word32* outSz,
     RsaKey* key, DerBuffer* keyBufInfo)
 {
@@ -3347,6 +3987,7 @@
 
     return ret;
 }
+#endif /* !NO_WOLFSSL_SERVER) || !WOLFSSL_NO_CLIENT_AUTH */
 
 int RsaEnc(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out, word32* outSz,
     RsaKey* key, buffer* keyBufInfo)
@@ -3430,9 +4071,11 @@
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     /* initialize event */
-    ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
-    if (ret != 0)
-        return ret;
+    if (key) {
+        ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        if (ret != 0)
+            return ret;
+    }
 #endif
 
 #if defined(HAVE_PK_CALLBACKS)
@@ -3449,7 +4092,7 @@
 
     /* Handle async pending response */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ret == WC_PENDING_E) {
+    if (key && ret == WC_PENDING_E) {
         ret = wolfSSL_AsyncPush(ssl, &key->asyncDev);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
@@ -3670,6 +4313,9 @@
     /* make sure the curve is set for TLS */
     if (ret == 0 && key->dp) {
         ssl->ecdhCurveOID = key->dp->oidSum;
+    #if defined(WOLFSSL_TLS13) || defined(HAVE_FFDHE)
+        ssl->namedGroup = 0;
+    #endif
     }
 
     /* Handle async pending response */
@@ -3723,14 +4369,14 @@
     return ret;
 }
 
-/* Sign the data using EdDSA and key using X25519.
+/* Sign the data using EdDSA and key using Ed25519.
  *
  * ssl    SSL object.
  * in     Data or message to sign.
  * inSz   Length of the data.
  * out    Buffer to hold signature.
  * outSz  On entry, size of the buffer. On exit, the size of the signature.
- * key    The private X25519 key data.
+ * key    The private Ed25519 key data.
  * keySz  The length of the private key data in bytes.
  * ctx    The callback context.
  * returns 0 on success, otherwise the value is an error.
@@ -3785,14 +4431,14 @@
     return ret;
 }
 
-/* Verify the data using EdDSA and key using X25519.
+/* Verify the data using EdDSA and key using Ed25519.
  *
  * ssl    SSL object.
  * in     Signature data.
  * inSz   Length of the signature data in bytes.
  * msg    Message to verify.
  * outSz  Length of message in bytes.
- * key    The public X25519 key data.
+ * key    The public Ed25519 key data.
  * keySz  The length of the private key data in bytes.
  * ctx    The callback context.
  * returns 0 on success, otherwise the value is an error.
@@ -3972,6 +4618,9 @@
 
     if (ret == 0) {
         ssl->ecdhCurveOID = ECC_X25519_OID;
+    #if defined(WOLFSSL_TLS13) || defined(HAVE_FFDHE)
+        ssl->namedGroup = 0;
+    #endif
     }
 
     /* Handle async pending response */
@@ -3987,6 +4636,308 @@
 }
 #endif /* HAVE_CURVE25519 */
 
+#ifdef HAVE_ED448
+/* Check whether the key contains a public key.
+ * If not then pull it out of the leaf certificate.
+ *
+ * ssl  SSL/TLS object.
+ * returns MEMORY_E when unable to allocate memory, a parsing error, otherwise
+ * 0 on success.
+ */
+int Ed448CheckPubKey(WOLFSSL* ssl)
+{
+    ed448_key* key = (ed448_key*)ssl->hsKey;
+    int ret = 0;
+
+    /* Public key required for signing. */
+    if (!key->pubKeySet) {
+        DerBuffer* leaf = ssl->buffers.certificate;
+        DecodedCert* cert = (DecodedCert*)XMALLOC(sizeof(*cert), ssl->heap,
+            DYNAMIC_TYPE_DCERT);
+        if (cert == NULL)
+            ret = MEMORY_E;
+
+        if (ret == 0) {
+            InitDecodedCert(cert, leaf->buffer, leaf->length, ssl->heap);
+            ret = DecodeToKey(cert, 0);
+        }
+        if (ret == 0) {
+            ret = wc_ed448_import_public(cert->publicKey, cert->pubKeySize,
+                key);
+        }
+        if (cert != NULL) {
+            FreeDecodedCert(cert);
+            XFREE(cert, ssl->heap, DYNAMIC_TYPE_DCERT);
+        }
+    }
+
+    return ret;
+}
+
+/* Sign the data using EdDSA and key using Ed448.
+ *
+ * ssl    SSL object.
+ * in     Data or message to sign.
+ * inSz   Length of the data.
+ * out    Buffer to hold signature.
+ * outSz  On entry, size of the buffer. On exit, the size of the signature.
+ * key    The private Ed448 key data.
+ * keySz  The length of the private key data in bytes.
+ * ctx    The callback context.
+ * returns 0 on success, otherwise the value is an error.
+ */
+int Ed448Sign(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out,
+              word32* outSz, ed448_key* key, DerBuffer* keyBufInfo)
+{
+    int ret;
+#ifdef HAVE_PK_CALLBACKS
+    const byte* keyBuf = NULL;
+    word32 keySz = 0;
+
+    if (keyBufInfo) {
+        keyBuf = keyBufInfo->buffer;
+        keySz = keyBufInfo->length;
+    }
+#endif
+
+    (void)ssl;
+    (void)keyBufInfo;
+
+    WOLFSSL_ENTER("Ed448Sign");
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* initialize event */
+    ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    if (ret != 0)
+        return ret;
+#endif
+
+#if defined(HAVE_PK_CALLBACKS)
+    if (ssl->ctx->Ed448SignCb) {
+        void* ctx = wolfSSL_GetEd448SignCtx(ssl);
+        ret = ssl->ctx->Ed448SignCb(ssl, in, inSz, out, outSz, keyBuf, keySz,
+            ctx);
+    }
+    else
+#endif /* HAVE_PK_CALLBACKS */
+    {
+        ret = wc_ed448_sign_msg(in, inSz, out, outSz, key, NULL, 0);
+    }
+
+    /* Handle async pending response */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev);
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("Ed448Sign", ret);
+
+    return ret;
+}
+
+/* Verify the data using EdDSA and key using Ed448.
+ *
+ * ssl    SSL object.
+ * in     Signature data.
+ * inSz   Length of the signature data in bytes.
+ * msg    Message to verify.
+ * outSz  Length of message in bytes.
+ * key    The public Ed448 key data.
+ * keySz  The length of the private key data in bytes.
+ * ctx    The callback context.
+ * returns 0 on success, otherwise the value is an error.
+ */
+int Ed448Verify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* msg,
+                word32 msgSz, ed448_key* key, buffer* keyBufInfo)
+{
+    int ret;
+#ifdef HAVE_PK_CALLBACKS
+    const byte* keyBuf = NULL;
+    word32 keySz = 0;
+
+    if (keyBufInfo) {
+        keyBuf = keyBufInfo->buffer;
+        keySz = keyBufInfo->length;
+    }
+#endif
+
+    (void)ssl;
+    (void)keyBufInfo;
+
+    WOLFSSL_ENTER("Ed448Verify");
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* initialize event */
+    ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    if (ret != 0)
+        return ret;
+#endif
+
+#ifdef HAVE_PK_CALLBACKS
+    if (ssl->ctx->Ed448VerifyCb) {
+        void* ctx = wolfSSL_GetEd448VerifyCtx(ssl);
+        ret = ssl->ctx->Ed448VerifyCb(ssl, in, inSz, msg, msgSz, keyBuf, keySz,
+             &ssl->eccVerifyRes, ctx);
+    }
+    else
+#endif /* HAVE_PK_CALLBACKS  */
+    {
+        ret = wc_ed448_verify_msg(in, inSz, msg, msgSz, &ssl->eccVerifyRes, key,
+            NULL, 0);
+    }
+
+    /* Handle async pending response */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev);
+    }
+    else
+#endif /* WOLFSSL_ASYNC_CRYPT */
+    {
+        ret = (ret != 0 || ssl->eccVerifyRes == 0) ? VERIFY_SIGN_ERROR : 0;
+    }
+
+    WOLFSSL_LEAVE("Ed448Verify", ret);
+
+    return ret;
+}
+#endif /* HAVE_ED448 */
+
+#ifdef HAVE_CURVE448
+#ifdef HAVE_PK_CALLBACKS
+    /* Gets X448 key for shared secret callback testing
+     * Client side: returns peer key
+     * Server side: returns private key
+     */
+    static int X448GetKey(WOLFSSL* ssl, curve448_key** otherKey)
+    {
+        int ret = NO_PEER_KEY;
+        struct curve448_key* tmpKey = NULL;
+
+        if (ssl == NULL || otherKey == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+        if (ssl->options.side == WOLFSSL_CLIENT_END) {
+            if (!ssl->peerX448Key || !ssl->peerX448KeyPresent) {
+                return NO_PEER_KEY;
+            }
+            tmpKey = (struct curve448_key*)ssl->peerX448Key;
+        }
+        else if (ssl->options.side == WOLFSSL_SERVER_END) {
+            if (!ssl->eccTempKeyPresent) {
+                return NO_PRIVATE_KEY;
+            }
+            tmpKey = (struct curve448_key*)ssl->eccTempKey;
+        }
+
+        if (tmpKey) {
+            *otherKey = (curve448_key *)tmpKey;
+            ret = 0;
+        }
+
+        return ret;
+    }
+#endif /* HAVE_PK_CALLBACKS */
+
+static int X448SharedSecret(WOLFSSL* ssl, curve448_key* priv_key,
+                            curve448_key* pub_key, byte* pubKeyDer,
+                            word32* pubKeySz, byte* out, word32* outlen,
+                            int side)
+{
+    int ret;
+
+    (void)ssl;
+    (void)pubKeyDer;
+    (void)pubKeySz;
+    (void)side;
+
+    WOLFSSL_ENTER("X448SharedSecret");
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* initialize event */
+    ret = wolfSSL_AsyncInit(ssl, &priv_key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    if (ret != 0)
+        return ret;
+#endif
+
+#ifdef HAVE_PK_CALLBACKS
+    if (ssl->ctx->X448SharedSecretCb) {
+        curve448_key* otherKey = NULL;
+
+        ret = X448GetKey(ssl, &otherKey);
+        if (ret == 0) {
+            void* ctx = wolfSSL_GetX448SharedSecretCtx(ssl);
+            ret = ssl->ctx->X448SharedSecretCb(ssl, otherKey, pubKeyDer,
+                pubKeySz, out, outlen, side, ctx);
+        }
+    }
+    else
+#endif
+    {
+        ret = wc_curve448_shared_secret_ex(priv_key, pub_key, out, outlen,
+            EC448_LITTLE_ENDIAN);
+    }
+
+    /* Handle async pending response */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &priv_key->asyncDev);
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("X448SharedSecret", ret);
+
+    return ret;
+}
+
+static int X448MakeKey(WOLFSSL* ssl, curve448_key* key, curve448_key* peer)
+{
+    int ret = 0;
+
+    (void)peer;
+
+    WOLFSSL_ENTER("X448MakeKey");
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* initialize event */
+    ret = wolfSSL_AsyncInit(ssl, &key->asyncDev, WC_ASYNC_FLAG_NONE);
+    if (ret != 0)
+        return ret;
+#endif
+
+#ifdef HAVE_PK_CALLBACKS
+    if (ssl->ctx->X448KeyGenCb) {
+        void* ctx = wolfSSL_GetX448KeyGenCtx(ssl);
+        ret = ssl->ctx->X448KeyGenCb(ssl, key, CURVE448_KEY_SIZE, ctx);
+    }
+    else
+#endif
+    {
+        ret = wc_curve448_make_key(ssl->rng, CURVE448_KEY_SIZE, key);
+    }
+
+    if (ret == 0) {
+        ssl->ecdhCurveOID = ECC_X448_OID;
+    #if defined(WOLFSSL_TLS13) || defined(HAVE_FFDHE)
+        ssl->namedGroup = 0;
+    #endif
+    }
+
+    /* Handle async pending response */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev);
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("X448MakeKey", ret);
+
+    return ret;
+}
+#endif /* HAVE_CURVE448 */
+
 #if !defined(NO_CERTS) || !defined(NO_PSK)
 #if !defined(NO_DH)
 
@@ -4048,8 +4999,20 @@
     else
 #endif
     {
-        ret = wc_DhAgree(dhKey, agree, agreeSz, priv, privSz, otherPub,
-                otherPubSz);
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+        ret = wc_DhCheckPubValue(ssl->buffers.serverDH_P.buffer,
+                    ssl->buffers.serverDH_P.length, otherPub, otherPubSz);
+        if (ret != 0) {
+    #ifdef OPENSSL_EXTRA
+            SendAlert(ssl, alert_fatal, illegal_parameter);
+    #endif
+        }
+        else
+#endif
+        {
+            ret = wc_DhAgree(dhKey, agree, agreeSz, priv, privSz, otherPub,
+                    otherPubSz);
+        }
     }
 
     /* Handle async pending response */
@@ -4070,11 +5033,47 @@
 
 
 #ifdef HAVE_PK_CALLBACKS
+int wolfSSL_IsPrivatePkSet(WOLFSSL* ssl)
+{
+    int pkcbset = 0;
+    (void)ssl;
+
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
+                                                                !defined(NO_RSA)
+    if (0
+    #ifdef HAVE_ECC
+        || (ssl->ctx->EccSignCb != NULL &&
+                                        ssl->buffers.keyType == ecc_dsa_sa_algo)
+    #endif
+    #ifdef HAVE_ED25519
+        || (ssl->ctx->Ed25519SignCb != NULL &&
+                                        ssl->buffers.keyType == ed25519_sa_algo)
+    #endif
+    #ifdef HAVE_ED448
+        || (ssl->ctx->Ed448SignCb != NULL &&
+                                          ssl->buffers.keyType == ed448_sa_algo)
+    #endif
+    #ifndef NO_RSA
+        || (ssl->ctx->RsaSignCb != NULL && ssl->buffers.keyType == rsa_sa_algo)
+        || (ssl->ctx->RsaDecCb != NULL && ssl->buffers.keyType == rsa_kea)
+        #ifdef WC_RSA_PSS
+        || (ssl->ctx->RsaPssSignCb != NULL &&
+                                        ssl->buffers.keyType == rsa_pss_sa_algo)
+        #endif
+    #endif
+    ) {
+        pkcbset = 1;
+    }
+#endif
+    return pkcbset;
+}
+
 int wolfSSL_CTX_IsPrivatePkSet(WOLFSSL_CTX* ctx)
 {
     int pkcbset = 0;
     (void)ctx;
-#if defined(HAVE_ECC) || defined(HAVE_ED25519) || !defined(NO_RSA)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
+                                                                !defined(NO_RSA)
     if (0
     #ifdef HAVE_ECC
         || ctx->EccSignCb != NULL
@@ -4082,6 +5081,9 @@
     #ifdef HAVE_ED25519
         || ctx->Ed25519SignCb != NULL
     #endif
+    #ifdef HAVE_ED448
+        || ctx->Ed448SignCb != NULL
+    #endif
     #ifndef NO_RSA
         || ctx->RsaSignCb != NULL
         || ctx->RsaDecCb != NULL
@@ -4097,6 +5099,119 @@
 }
 #endif /* HAVE_PK_CALLBACKS */
 
+
+int InitSSL_Suites(WOLFSSL* ssl)
+{
+    int keySz = 0;
+    byte havePSK = 0;
+    byte haveAnon = 0;
+    byte haveRSA = 0;
+    byte haveMcast = 0;
+
+    (void)haveAnon; /* Squash unused var warnings */
+    (void)haveMcast;
+
+    if (!ssl)
+        return BAD_FUNC_ARG;
+
+#ifndef NO_RSA
+    haveRSA = 1;
+#endif
+#ifndef NO_PSK
+    havePSK = (byte)ssl->options.havePSK;
+#endif /* NO_PSK */
+#ifdef HAVE_ANON
+    haveAnon = (byte)ssl->options.haveAnon;
+#endif /* HAVE_ANON*/
+#ifdef WOLFSSL_MULTICAST
+    haveMcast = (byte)ssl->options.haveMcast;
+#endif /* WOLFSSL_MULTICAST */
+
+#ifdef WOLFSSL_EARLY_DATA
+    if (ssl->options.side == WOLFSSL_SERVER_END)
+        ssl->options.maxEarlyDataSz = ssl->ctx->maxEarlyDataSz;
+#endif
+#if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
+    ssl->options.cacheMessages = ssl->options.side == WOLFSSL_SERVER_END ||
+                                      ssl->buffers.keyType == ed25519_sa_algo ||
+                                      ssl->buffers.keyType == ed448_sa_algo;
+#endif
+
+#ifndef NO_CERTS
+    keySz = ssl->buffers.keySz;
+#endif
+
+    /* make sure server has DH parms, and add PSK if there, add NTRU too */
+    if (ssl->options.side == WOLFSSL_SERVER_END) {
+        InitSuites(ssl->suites, ssl->version, keySz, haveRSA, havePSK,
+                   ssl->options.haveDH, ssl->options.haveNTRU,
+                   ssl->options.haveECDSAsig, ssl->options.haveECC,
+                   ssl->options.haveStaticECC, ssl->options.side);
+    }
+    else {
+        InitSuites(ssl->suites, ssl->version, keySz, haveRSA, havePSK,
+                   TRUE, ssl->options.haveNTRU,
+                   ssl->options.haveECDSAsig, ssl->options.haveECC,
+                   ssl->options.haveStaticECC, ssl->options.side);
+    }
+
+#if !defined(NO_CERTS) && !defined(WOLFSSL_SESSION_EXPORT)
+    /* make sure server has cert and key unless using PSK, Anon, or
+     * Multicast. This should be true even if just switching ssl ctx */
+    if (ssl->options.side == WOLFSSL_SERVER_END &&
+            !havePSK && !haveAnon && !haveMcast) {
+
+        /* server certificate must be loaded */
+        if (!ssl->buffers.certificate || !ssl->buffers.certificate->buffer) {
+            WOLFSSL_MSG("Server missing certificate");
+            return NO_PRIVATE_KEY;
+        }
+
+        /* allow no private key if using PK callbacks and CB is set */
+    #ifdef HAVE_PK_CALLBACKS
+        if (wolfSSL_CTX_IsPrivatePkSet(ssl->ctx)) {
+            WOLFSSL_MSG("Using PK for server private key");
+        }
+        else
+    #endif
+        if (!ssl->buffers.key || !ssl->buffers.key->buffer) {
+            WOLFSSL_MSG("Server missing private key");
+            return NO_PRIVATE_KEY;
+        }
+    }
+#endif
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* returns new reference count. Arg incr positive=up or negative=down */
+int SSL_CTX_RefCount(WOLFSSL_CTX* ctx, int incr)
+{
+    int refCount;
+
+    if (ctx == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (wc_LockMutex(&ctx->countMutex) != 0) {
+        WOLFSSL_MSG("Couldn't lock CTX count mutex");
+        return BAD_MUTEX_E;
+    }
+
+    ctx->refCount += incr;
+    /* make sure refCount is never negative */
+    if (ctx->refCount < 0) {
+        ctx->refCount = 0;
+    }
+    refCount = ctx->refCount;
+
+    wc_UnLockMutex(&ctx->countMutex);
+
+    return refCount;
+}
+
 /* This function inherits a WOLFSSL_CTX's fields into an SSL object.
    It is used during initialization and to switch an ssl's CTX with
    wolfSSL_Set_SSL_CTX.  Requires ssl->suites alloc and ssl-arrays with PSK
@@ -4109,20 +5224,16 @@
    WOLFSSL_SUCCESS return value on success */
 int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
 {
-    byte havePSK = 0;
-    byte haveAnon = 0;
+    int ret;
     byte newSSL;
-    byte haveRSA = 0;
-    byte haveMcast = 0;
-
-    (void)haveAnon; /* Squash unused var warnings */
-    (void)haveMcast;
 
     if (!ssl || !ctx)
         return BAD_FUNC_ARG;
 
+#ifndef SINGLE_THREADED
     if (ssl->suites == NULL && !writeDup)
         return BAD_FUNC_ARG;
+#endif
 
     newSSL = ssl->ctx == NULL; /* Assign after null check */
 
@@ -4132,20 +5243,6 @@
     }
 #endif
 
-
-#ifndef NO_RSA
-    haveRSA = 1;
-#endif
-#ifndef NO_PSK
-    havePSK = ctx->havePSK;
-#endif /* NO_PSK */
-#ifdef HAVE_ANON
-    haveAnon = ctx->haveAnon;
-#endif /* HAVE_ANON*/
-#ifdef WOLFSSL_MULTICAST
-    haveMcast = ctx->haveMcast;
-#endif /* WOLFSSL_MULTICAST */
-
     /* decrement previous CTX reference count if exists.
      * This should only happen if switching ctxs!*/
     if (!newSSL) {
@@ -4154,12 +5251,11 @@
     }
 
     /* increment CTX reference count */
-    if (wc_LockMutex(&ctx->countMutex) != 0) {
-        WOLFSSL_MSG("Couldn't lock CTX count mutex");
-        return BAD_MUTEX_E;
-    }
-    ctx->refCount++;
-    wc_UnLockMutex(&ctx->countMutex);
+    if ((ret = SSL_CTX_RefCount(ctx, 1)) < 0) {
+        return ret;
+    }
+    ret = WOLFSSL_SUCCESS; /* set default ret */
+
     ssl->ctx     = ctx; /* only for passing to calls, options could change */
     ssl->version = ctx->method->version;
 
@@ -4167,7 +5263,7 @@
     ssl->eccTempKeySz = ctx->eccTempKeySz;
     ssl->ecdhCurveOID = ctx->ecdhCurveOID;
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     ssl->pkCurveOID = ctx->pkCurveOID;
 #endif
 
@@ -4191,6 +5287,10 @@
     ssl->options.havePSK   = ctx->havePSK;
     ssl->options.client_psk_cb = ctx->client_psk_cb;
     ssl->options.server_psk_cb = ctx->server_psk_cb;
+#ifdef WOLFSSL_TLS13
+    ssl->options.client_psk_tls13_cb = ctx->client_psk_tls13_cb;
+    ssl->options.server_psk_tls13_cb = ctx->server_psk_tls13_cb;
+#endif
 #endif /* NO_PSK */
 #ifdef WOLFSSL_EARLY_DATA
     if (ssl->options.side == WOLFSSL_SERVER_END)
@@ -4210,7 +5310,7 @@
 #ifdef HAVE_ECC
     ssl->options.minEccKeySz = ctx->minEccKeySz;
 #endif
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     ssl->options.verifyDepth = ctx->verifyDepth;
 #endif
 
@@ -4231,6 +5331,10 @@
     ssl->options.groupMessages = ctx->groupMessages;
 
 #ifndef NO_DH
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+        !defined(HAVE_SELFTEST)
+        ssl->options.dhKeyTested = ctx->dhKeyTested;
+    #endif
     ssl->buffers.serverDH_P = ctx->serverDH_P;
     ssl->buffers.serverDH_G = ctx->serverDH_G;
 #endif
@@ -4242,14 +5346,18 @@
 #ifdef WOLFSSL_TLS13
     ssl->buffers.certChainCnt = ctx->certChainCnt;
 #endif
-    ssl->buffers.key     = ctx->privateKey;
-    ssl->buffers.keyType = ctx->privateKeyType;
-    ssl->buffers.keySz   = ctx->privateKeySz;
-#endif
-#if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                        !defined(NO_ED25519_CLIENT_AUTH)
+    ssl->buffers.key      = ctx->privateKey;
+    ssl->buffers.keyType  = ctx->privateKeyType;
+    ssl->buffers.keyId    = ctx->privateKeyId;
+    ssl->buffers.keySz    = ctx->privateKeySz;
+    ssl->buffers.keyDevId = ctx->privateKeyDevId;
+#endif
+#if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
     ssl->options.cacheMessages = ssl->options.side == WOLFSSL_SERVER_END ||
-                                        ssl->buffers.keyType == ed25519_sa_algo;
+                                      ssl->buffers.keyType == ed25519_sa_algo ||
+                                      ssl->buffers.keyType == ed448_sa_algo;
 #endif
 
 
@@ -4258,11 +5366,6 @@
 #endif
 
     if (writeDup == 0) {
-        int keySz = 0;
-#ifndef NO_CERTS
-        keySz = ssl->buffers.keySz;
-#endif
-
 #ifndef NO_PSK
         if (ctx->server_hint[0]) {   /* set in CTX */
             XSTRNCPY(ssl->arrays->server_hint, ctx->server_hint,
@@ -4271,49 +5374,21 @@
         }
 #endif /* NO_PSK */
 
-        if (ctx->suites)
+        if (ctx->suites) {
+#ifndef SINGLE_THREADED
             *ssl->suites = *ctx->suites;
-        else
+#else
+            ssl->suites = ctx->suites;
+#endif
+        }
+        else {
             XMEMSET(ssl->suites, 0, sizeof(Suites));
-
-        /* make sure server has DH parms, and add PSK if there, add NTRU too */
-        if (ssl->options.side == WOLFSSL_SERVER_END)
-            InitSuites(ssl->suites, ssl->version, keySz, haveRSA, havePSK,
-                       ssl->options.haveDH, ssl->options.haveNTRU,
-                       ssl->options.haveECDSAsig, ssl->options.haveECC,
-                       ssl->options.haveStaticECC, ssl->options.side);
-        else
-            InitSuites(ssl->suites, ssl->version, keySz, haveRSA, havePSK,
-                       TRUE, ssl->options.haveNTRU,
-                       ssl->options.haveECDSAsig, ssl->options.haveECC,
-                       ssl->options.haveStaticECC, ssl->options.side);
-
-#if !defined(NO_CERTS) && !defined(WOLFSSL_SESSION_EXPORT)
-        /* make sure server has cert and key unless using PSK, Anon, or
-         * Multicast. This should be true even if just switching ssl ctx */
-        if (ssl->options.side == WOLFSSL_SERVER_END &&
-                !havePSK && !haveAnon && !haveMcast) {
-
-            /* server certificate must be loaded */
-            if (!ssl->buffers.certificate || !ssl->buffers.certificate->buffer) {
-                WOLFSSL_MSG("Server missing certificate");
-                return NO_PRIVATE_KEY;
-            }
-
-            /* allow no private key if using PK callbacks and CB is set */
-        #ifdef HAVE_PK_CALLBACKS
-            if (wolfSSL_CTX_IsPrivatePkSet(ctx)) {
-                WOLFSSL_MSG("Using PK for server private key");
-            }
-            else
-        #endif
-            if (!ssl->buffers.key || !ssl->buffers.key->buffer) {
-                WOLFSSL_MSG("Server missing private key");
-                return NO_PRIVATE_KEY;
-            }
-        }
-#endif
-
+        }
+
+        if (ssl->options.side != WOLFSSL_NEITHER_END) {
+            /* Defer initializing suites until accept or connect */
+            ret = InitSSL_Suites(ssl);
+        }
     }  /* writeDup check */
 
 #ifdef WOLFSSL_SESSION_EXPORT
@@ -4329,7 +5404,7 @@
 #endif
     ssl->verifyDepth = ctx->verifyDepth;
 
-    return WOLFSSL_SUCCESS;
+    return ret;
 }
 
 int InitHandshakeHashes(WOLFSSL* ssl)
@@ -4355,27 +5430,42 @@
     ret = wc_InitMd5_ex(&ssl->hsHashes->hashMd5, ssl->heap, ssl->devId);
     if (ret != 0)
         return ret;
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        wc_Md5SetFlags(&ssl->hsHashes->hashMd5, WC_HASH_FLAG_WILLCOPY);
+    #endif
 #endif
 #ifndef NO_SHA
     ret = wc_InitSha_ex(&ssl->hsHashes->hashSha, ssl->heap, ssl->devId);
     if (ret != 0)
         return ret;
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        wc_ShaSetFlags(&ssl->hsHashes->hashSha, WC_HASH_FLAG_WILLCOPY);
+    #endif
 #endif
 #endif /* !NO_OLD_TLS */
 #ifndef NO_SHA256
     ret = wc_InitSha256_ex(&ssl->hsHashes->hashSha256, ssl->heap, ssl->devId);
     if (ret != 0)
         return ret;
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        wc_Sha256SetFlags(&ssl->hsHashes->hashSha256, WC_HASH_FLAG_WILLCOPY);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA384
     ret = wc_InitSha384_ex(&ssl->hsHashes->hashSha384, ssl->heap, ssl->devId);
     if (ret != 0)
         return ret;
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        wc_Sha384SetFlags(&ssl->hsHashes->hashSha384, WC_HASH_FLAG_WILLCOPY);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA512
     ret = wc_InitSha512_ex(&ssl->hsHashes->hashSha512, ssl->heap, ssl->devId);
     if (ret != 0)
         return ret;
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        wc_Sha512SetFlags(&ssl->hsHashes->hashSha512, WC_HASH_FLAG_WILLCOPY);
+    #endif
 #endif
 
     return ret;
@@ -4401,7 +5491,8 @@
     #ifdef WOLFSSL_SHA512
         wc_Sha512Free(&ssl->hsHashes->hashSha512);
     #endif
-    #if defined(HAVE_ED25519) && !defined(WOLFSSL_NO_CLIENT_AUTH)
+    #if (defined(HAVE_ED25519) || defined(HAVE_ED448)) && \
+                                                !defined(WOLFSSL_NO_CLIENT_AUTH)
         if (ssl->hsHashes->messages != NULL) {
             XFREE(ssl->hsHashes->messages, ssl->heap, DYNAMIC_TYPE_HASHES);
             ssl->hsHashes->messages = NULL;
@@ -4433,7 +5524,7 @@
         WOLFSSL_HEAP_HINT* ssl_hint;
         WOLFSSL_HEAP_HINT* ctx_hint;
 
-        /* avoid derefrencing a test value */
+        /* avoid dereferencing a test value */
     #ifdef WOLFSSL_HEAP_TEST
         if (ctx->heap == (void*)WOLFSSL_HEAP_TEST) {
             ssl->heap = ctx->heap;
@@ -4541,8 +5632,17 @@
 #ifdef HAVE_NETX
     ssl->IOCB_ReadCtx  = &ssl->nxCtx;  /* default NetX IO ctx, same for read */
     ssl->IOCB_WriteCtx = &ssl->nxCtx;  /* and write */
-#endif
-
+#elif defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+    ssl->mnCtx = mynewt_ctx_new();
+    if(!ssl->mnCtx) {
+        return MEMORY_E;
+    }
+    ssl->IOCB_ReadCtx  = ssl->mnCtx;  /* default Mynewt IO ctx, same for read */
+    ssl->IOCB_WriteCtx = ssl->mnCtx;  /* and write */
+#elif defined (WOLFSSL_GNRC)
+    ssl->IOCB_ReadCtx = ssl->gnrcCtx;
+    ssl->IOCB_WriteCtx = ssl->gnrcCtx;
+#endif
     /* initialize states */
     ssl->options.serverState = NULL_STATE;
     ssl->options.clientState = NULL_STATE;
@@ -4554,10 +5654,18 @@
     ssl->options.buildMsgState = BUILD_MSG_BEGIN;
     ssl->encrypt.state = CIPHER_STATE_BEGIN;
     ssl->decrypt.state = CIPHER_STATE_BEGIN;
+#ifndef NO_DH
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+        !defined(HAVE_SELFTEST)
+        ssl->options.dhDoKeyTest = 1;
+    #endif
+#endif
 
 #ifdef WOLFSSL_DTLS
     #ifdef WOLFSSL_SCTP
         ssl->options.dtlsSctp           = ctx->dtlsSctp;
+    #endif
+    #if defined(WOLFSSL_SCTP) || defined(WOLFSSL_DTLS_MTU)
         ssl->dtlsMtuSz                  = ctx->dtlsMtuSz;
         ssl->dtls_expected_rx           = ssl->dtlsMtuSz;
     #else
@@ -4570,11 +5678,13 @@
     ssl->buffers.dtlsCtx.wfd            = -1;
 #endif
 
+#ifndef WOLFSSL_AEAD_ONLY
     #ifndef NO_OLD_TLS
         ssl->hmac = SSL_hmac; /* default to SSLv3 */
     #elif !defined(WOLFSSL_NO_TLS12)
         ssl->hmac = TLS_hmac;
     #endif
+#endif
 
 
     ssl->cipher.ssl = ssl;
@@ -4583,6 +5693,7 @@
     ssl->options.haveEMS = ctx->haveEMS;
 #endif
     ssl->options.useClientOrder = ctx->useClientOrder;
+    ssl->options.mutualAuth = ctx->mutualAuth;
 
 #ifdef WOLFSSL_TLS13
     #ifdef HAVE_SESSION_TICKET
@@ -4615,6 +5726,10 @@
 #endif
 #endif /* HAVE_TLS_EXTENSIONS */
 
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    ssl->options.disallowEncThenMac = ctx->disallowEncThenMac;
+#endif
+
     /* default alert state (none) */
     ssl->alert_history.last_rx.code  = -1;
     ssl->alert_history.last_rx.level = -1;
@@ -4626,6 +5741,7 @@
     ssl->sessionCtxSz = ctx->sessionCtxSz;
     XMEMCPY(ssl->sessionCtx, ctx->sessionCtx, ctx->sessionCtxSz);
     ssl->cbioFlag = ctx->cbioFlag;
+
 #endif
 
     InitCiphers(ssl);
@@ -4642,20 +5758,49 @@
             return MEMORY_E;
         }
         XMEMSET(ssl->arrays, 0, sizeof(Arrays));
+#if defined(WOLFSSL_TLS13) || defined(WOLFSSL_SNIFFER)
+        ssl->arrays->preMasterSz = ENCRYPT_LEN;
         ssl->arrays->preMasterSecret = (byte*)XMALLOC(ENCRYPT_LEN, ssl->heap,
             DYNAMIC_TYPE_SECRET);
         if (ssl->arrays->preMasterSecret == NULL) {
             return MEMORY_E;
         }
         XMEMSET(ssl->arrays->preMasterSecret, 0, ENCRYPT_LEN);
-
-        /* suites */
-        ssl->suites = (Suites*)XMALLOC(sizeof(Suites), ssl->heap,
-                                   DYNAMIC_TYPE_SUITES);
-        if (ssl->suites == NULL) {
-            WOLFSSL_MSG("Suites Memory error");
-            return MEMORY_E;
-        }
+#endif
+
+#ifdef OPENSSL_EXTRA
+    if ((ssl->param = (WOLFSSL_X509_VERIFY_PARAM*)XMALLOC(
+                           sizeof(WOLFSSL_X509_VERIFY_PARAM),
+                           ssl->heap, DYNAMIC_TYPE_OPENSSL)) == NULL) {
+        WOLFSSL_MSG("ssl->param memory error");
+        return MEMORY_E;
+    }
+    XMEMSET(ssl->param, 0, sizeof(WOLFSSL_X509_VERIFY_PARAM));
+#endif
+
+#ifdef SINGLE_THREADED
+        if (ctx->suites == NULL)
+#endif
+        {
+            /* suites */
+            ssl->suites = (Suites*)XMALLOC(sizeof(Suites), ssl->heap,
+                                       DYNAMIC_TYPE_SUITES);
+            if (ssl->suites == NULL) {
+                WOLFSSL_MSG("Suites Memory error");
+                return MEMORY_E;
+            }
+        #ifdef OPENSSL_ALL
+            ssl->suites->stack = NULL;
+        #endif
+#ifdef SINGLE_THREADED
+            ssl->options.ownSuites = 1;
+#endif
+        }
+#ifdef SINGLE_THREADED
+        else {
+            ssl->options.ownSuites = 0;
+        }
+#endif
     }
 
     /* Initialize SSL with the appropriate fields from it's ctx */
@@ -4718,6 +5863,10 @@
 #ifdef HAVE_SECRET_CALLBACK
     ssl->sessionSecretCb  = NULL;
     ssl->sessionSecretCtx = NULL;
+#ifdef WOLFSSL_TLS13
+    ssl->tls13SecretCb  = NULL;
+    ssl->tls13SecretCtx = NULL;
+#endif
 #endif
 
 #ifdef HAVE_SESSION_TICKET
@@ -4746,13 +5895,19 @@
 #endif
 
 #ifdef HAVE_SECURE_RENEGOTIATION
-    /* use secure renegotiation by default (not recommend) */
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        int useSecureReneg = ssl->ctx->useSecureReneg;
+        /* use secure renegotiation by default (not recommend) */
     #ifdef WOLFSSL_SECURE_RENEGOTIATION_ON_BY_DEFAULT
-        ret = wolfSSL_UseSecureRenegotiation(ssl);
-        if (ret != WOLFSSL_SUCCESS)
-            return ret;
-    #endif
-#endif
+        useSecureReneg = 1;
+    #endif
+        if (useSecureReneg) {
+            ret = wolfSSL_UseSecureRenegotiation(ssl);
+            if (ret != WOLFSSL_SUCCESS)
+                return ret;
+            }
+    }
+#endif /* HAVE_SECURE_RENEGOTIATION */
 
     return 0;
 }
@@ -4797,12 +5952,22 @@
             case DYNAMIC_TYPE_ED25519:
                 wc_ed25519_free((ed25519_key*)*pKey);
                 break;
-        #endif /* HAVE_CURVE25519 */
+        #endif /* HAVE_ED25519 */
         #ifdef HAVE_CURVE25519
             case DYNAMIC_TYPE_CURVE25519:
                 wc_curve25519_free((curve25519_key*)*pKey);
                 break;
         #endif /* HAVE_CURVE25519 */
+        #ifdef HAVE_ED448
+            case DYNAMIC_TYPE_ED448:
+                wc_ed448_free((ed448_key*)*pKey);
+                break;
+        #endif /* HAVE_ED448 */
+        #ifdef HAVE_CURVE448
+            case DYNAMIC_TYPE_CURVE448:
+                wc_curve448_free((curve448_key*)*pKey);
+                break;
+        #endif /* HAVE_CURVE448 */
         #ifndef NO_DH
             case DYNAMIC_TYPE_DH:
                 wc_FreeDhKey((DhKey*)*pKey);
@@ -4855,6 +6020,16 @@
             sz = sizeof(curve25519_key);
             break;
     #endif /* HAVE_CURVE25519 */
+    #ifdef HAVE_ED448
+        case DYNAMIC_TYPE_ED448:
+            sz = sizeof(ed448_key);
+            break;
+    #endif /* HAVE_ED448 */
+    #ifdef HAVE_CURVE448
+        case DYNAMIC_TYPE_CURVE448:
+            sz = sizeof(curve448_key);
+            break;
+    #endif /* HAVE_CURVE448 */
     #ifndef NO_DH
         case DYNAMIC_TYPE_DH:
             sz = sizeof(DhKey);
@@ -4868,7 +6043,7 @@
         return NOT_COMPILED_IN;
     }
 
-    /* Allocate memeory for key */
+    /* Allocate memory for key */
     *pKey = XMALLOC(sz, ssl->heap, type);
     if (*pKey == NULL) {
         return MEMORY_E;
@@ -4898,6 +6073,18 @@
             ret = 0;
             break;
     #endif /* HAVE_CURVE25519 */
+    #ifdef HAVE_ED448
+        case DYNAMIC_TYPE_ED448:
+            wc_ed448_init((ed448_key*)*pKey);
+            ret = 0;
+            break;
+    #endif /* HAVE_CURVE448 */
+    #ifdef HAVE_CURVE448
+        case DYNAMIC_TYPE_CURVE448:
+            wc_curve448_init((curve448_key*)*pKey);
+            ret = 0;
+            break;
+    #endif /* HAVE_CURVE448 */
     #ifndef NO_DH
         case DYNAMIC_TYPE_DH:
             ret = wc_InitDhKey_ex((DhKey*)*pKey, ssl->heap, ssl->devId);
@@ -4916,7 +6103,7 @@
 }
 
 #if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
-    defined(HAVE_CURVE25519)
+    defined(HAVE_CURVE25519) || defined(HHAVE_ED448) || defined(HAVE_CURVE448)
 static int ReuseKey(WOLFSSL* ssl, int type, void* pKey)
 {
     int ret = 0;
@@ -4948,6 +6135,18 @@
             ret = wc_curve25519_init((curve25519_key*)pKey);
             break;
     #endif /* HAVE_CURVE25519 */
+    #ifdef HAVE_ED448
+        case DYNAMIC_TYPE_ED448:
+            wc_ed448_free((ed448_key*)pKey);
+            ret = wc_ed448_init((ed448_key*)pKey);
+            break;
+    #endif /* HAVE_CURVE448 */
+    #ifdef HAVE_CURVE448
+        case DYNAMIC_TYPE_CURVE448:
+            wc_curve448_free((curve448_key*)pKey);
+            ret = wc_curve448_init((curve448_key*)pKey);
+            break;
+    #endif /* HAVE_CURVE448 */
     #ifndef NO_DH
         case DYNAMIC_TYPE_DH:
             wc_FreeDhKey((DhKey*)pKey);
@@ -4995,6 +6194,23 @@
 #endif
 }
 
+
+/* Free up all memory used by Suites structure from WOLFSSL */
+void FreeSuites(WOLFSSL* ssl)
+{
+#ifdef SINGLE_THREADED
+    if (ssl->options.ownSuites)
+#endif
+    {
+    #ifdef OPENSSL_ALL
+        wolfSSL_sk_SSL_CIPHER_free(ssl->suites->stack);
+    #endif
+        XFREE(ssl->suites, ssl->heap, DYNAMIC_TYPE_SUITES);
+    }
+    ssl->suites = NULL;
+}
+
+
 /* In case holding SSL object in array and don't want to free actual ssl */
 void SSL_ResourceFree(WOLFSSL* ssl)
 {
@@ -5011,13 +6227,20 @@
         wc_FreeRng(ssl->rng);
         XFREE(ssl->rng, ssl->heap, DYNAMIC_TYPE_RNG);
     }
-    XFREE(ssl->suites, ssl->heap, DYNAMIC_TYPE_SUITES);
+    FreeSuites(ssl);
     FreeHandshakeHashes(ssl);
     XFREE(ssl->buffers.domainName.buffer, ssl->heap, DYNAMIC_TYPE_DOMAIN);
 
     /* clear keys struct after session */
     ForceZero(&ssl->keys, sizeof(Keys));
 
+#ifdef WOLFSSL_TLS13
+    if (ssl->options.tls1_3) {
+        ForceZero(&ssl->clientSecret, sizeof(ssl->clientSecret));
+        ForceZero(&ssl->serverSecret, sizeof(ssl->serverSecret));
+    }
+#endif
+
 #ifndef NO_DH
     if (ssl->buffers.serverDH_Priv.buffer) {
         ForceZero(ssl->buffers.serverDH_Priv.buffer,
@@ -5039,6 +6262,9 @@
     FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
     ssl->peerRsaKeyPresent = 0;
 #endif
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    XFREE(ssl->peerTsipEncRsaKeyIndex, ssl->heap, DYNAMIC_TYPE_RSA);
+#endif
     if (ssl->buffers.inputBuffer.dynamicFlag)
         ShrinkInputBuffer(ssl, FORCED_FREE);
     if (ssl->buffers.outputBuffer.dynamicFlag)
@@ -5065,6 +6291,8 @@
     if (ssl->biord != ssl->biowr)        /* only free write if different */
         wolfSSL_BIO_free(ssl->biowr);
     wolfSSL_BIO_free(ssl->biord);        /* always free read bio */
+    ssl->biowr = NULL;
+    ssl->biord = NULL;
 #endif
 #ifdef HAVE_LIBZ
     FreeStreams(ssl);
@@ -5075,25 +6303,36 @@
     FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
     ssl->peerEccDsaKeyPresent = 0;
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
-    {
-        int dtype;
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) ||defined(HAVE_CURVE448)
+    {
+        int dtype = 0;
     #ifdef HAVE_ECC
         dtype = DYNAMIC_TYPE_ECC;
     #endif
     #ifdef HAVE_CURVE25519
+        if (ssl->peerX25519KeyPresent
     #ifdef HAVE_ECC
-        if (ssl->peerX25519KeyPresent ||
-                              ssl->eccTempKeyPresent == DYNAMIC_TYPE_CURVE25519)
+                           || ssl->eccTempKeyPresent == DYNAMIC_TYPE_CURVE25519
     #endif /* HAVE_ECC */
-         {
+           )
+        {
             dtype = DYNAMIC_TYPE_CURVE25519;
-         }
+        }
     #endif /* HAVE_CURVE25519 */
+    #ifdef HAVE_CURVE448
+        if (ssl->peerX448KeyPresent
+    #ifdef HAVE_ECC
+                             || ssl->eccTempKeyPresent == DYNAMIC_TYPE_CURVE448
+    #endif /* HAVE_ECC */
+           )
+        {
+            dtype = DYNAMIC_TYPE_CURVE448;
+        }
+    #endif /* HAVE_CURVE448 */
         FreeKey(ssl, dtype, (void**)&ssl->eccTempKey);
         ssl->eccTempKeyPresent = 0;
     }
-#endif /* HAVE_ECC || HAVE_CURVE25519 */
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 #ifdef HAVE_CURVE25519
     FreeKey(ssl, DYNAMIC_TYPE_CURVE25519, (void**)&ssl->peerX25519Key);
     ssl->peerX25519KeyPresent = 0;
@@ -5109,6 +6348,21 @@
         }
     #endif
 #endif
+#ifdef HAVE_CURVE448
+    FreeKey(ssl, DYNAMIC_TYPE_CURVE448, (void**)&ssl->peerX448Key);
+    ssl->peerX448KeyPresent = 0;
+#endif
+#ifdef HAVE_ED448
+    FreeKey(ssl, DYNAMIC_TYPE_ED448, (void**)&ssl->peerEd448Key);
+    ssl->peerEd448KeyPresent = 0;
+    #ifdef HAVE_PK_CALLBACKS
+        if (ssl->buffers.peerEd448Key.buffer != NULL) {
+            XFREE(ssl->buffers.peerEd448Key.buffer, ssl->heap,
+                                                            DYNAMIC_TYPE_ED448);
+            ssl->buffers.peerEd448Key.buffer = NULL;
+        }
+    #endif
+#endif
 #ifdef HAVE_PK_CALLBACKS
     #ifdef HAVE_ECC
         XFREE(ssl->buffers.peerEccDsaKey.buffer, ssl->heap, DYNAMIC_TYPE_ECC);
@@ -5127,6 +6381,12 @@
     }
 #endif
 #endif /* HAVE_TLS_EXTENSIONS */
+#if defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+    if (ssl->mnCtx) {
+        mynewt_ctx_clear(ssl->mnCtx);
+        ssl->mnCtx = NULL;
+    }
+#endif
 #ifdef HAVE_NETX
     if (ssl->nxCtx.nxPacket)
         nx_packet_release(ssl->nxCtx.nxPacket);
@@ -5151,7 +6411,11 @@
         FreeWriteDup(ssl);
     }
 #endif
-
+#ifdef OPENSSL_EXTRA
+    if (ssl->param) {
+        XFREE(ssl->param, ssl->heap, DYNAMIC_TYPE_OPENSSL);
+    }
+#endif
 #if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
     while (ssl->certReqCtx != NULL) {
         CertReqCtx* curr = ssl->certReqCtx;
@@ -5197,6 +6461,10 @@
     #endif
     }
 #endif /* WOLFSSL_STATIC_MEMORY */
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    wolfSSL_sk_CIPHER_free(ssl->supportedCiphers);
+    wolfSSL_sk_X509_free(ssl->peerCertChain);
+#endif
 }
 
 /* Free any handshake resources no longer needed */
@@ -5214,15 +6482,31 @@
     if (ssl->buffers.inputBuffer.dynamicFlag)
         ShrinkInputBuffer(ssl, NO_FORCED_FREE);
 
-    /* suites */
-    XFREE(ssl->suites, ssl->heap, DYNAMIC_TYPE_SUITES);
-    ssl->suites = NULL;
-
-    /* hsHashes */
-    FreeHandshakeHashes(ssl);
+#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
+    if (!ssl->options.tls1_3)
+#endif
+    {
+    #ifndef OPENSSL_ALL
+        /* free suites unless using compatibility layer */
+        FreeSuites(ssl);
+    #endif
+        /* hsHashes */
+        FreeHandshakeHashes(ssl);
+    }
 
     /* RNG */
-    if (ssl->specs.cipher_type == stream || ssl->options.tls1_1 == 0) {
+    if (ssl->options.tls1_1 == 0
+#ifndef WOLFSSL_AEAD_ONLY
+        || ssl->specs.cipher_type == stream
+#endif
+#if defined(WOLFSSL_TLS13)
+    #if !defined(WOLFSSL_POST_HANDSHAKE_AUTH)
+        || ssl->options.tls1_3
+    #elif !defined(HAVE_SESSION_TICKET)
+        || (ssl->options.tls1_3 && ssl->options.side == WOLFSSL_SERVER_END)
+    #endif
+#endif
+    ) {
         if (ssl->options.weOwnRng) {
             wc_FreeRng(ssl->rng);
             XFREE(ssl->rng, ssl->heap, DYNAMIC_TYPE_RNG);
@@ -5241,23 +6525,42 @@
     }
 #endif
 
-    /* arrays */
-    if (ssl->options.saveArrays == 0)
-        FreeArrays(ssl, 1);
-
+#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH) && \
+                                                    defined(HAVE_SESSION_TICKET)
+    if (!ssl->options.tls1_3)
+#endif
+        /* arrays */
+        if (ssl->options.saveArrays == 0)
+            FreeArrays(ssl, 1);
+
+#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
+    if (!ssl->options.tls1_3 || ssl->options.side == WOLFSSL_CLIENT_END)
+#endif
+    {
 #ifndef NO_RSA
-    /* peerRsaKey */
-    FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
-    ssl->peerRsaKeyPresent = 0;
-#endif
+        /* peerRsaKey */
+        FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
+        ssl->peerRsaKeyPresent = 0;
+#endif
+#ifdef HAVE_ECC
+        FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
+        ssl->peerEccDsaKeyPresent = 0;
+#endif /* HAVE_ECC */
+#ifdef HAVE_ED25519
+        FreeKey(ssl, DYNAMIC_TYPE_ED25519, (void**)&ssl->peerEd25519Key);
+        ssl->peerEd25519KeyPresent = 0;
+#endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+        FreeKey(ssl, DYNAMIC_TYPE_ED448, (void**)&ssl->peerEd448Key);
+        ssl->peerEd448KeyPresent = 0;
+#endif /* HAVE_ED448 */
+    }
 
 #ifdef HAVE_ECC
     FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccKey);
     ssl->peerEccKeyPresent = 0;
-    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
-    ssl->peerEccDsaKeyPresent = 0;
-#endif
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+#endif
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
     {
         int dtype;
     #ifdef HAVE_ECC
@@ -5272,14 +6575,28 @@
             dtype = DYNAMIC_TYPE_CURVE25519;
          }
     #endif /* HAVE_CURVE25519 */
+    #ifdef HAVE_CURVE448
+    #ifdef HAVE_ECC
+        if (ssl->peerX448KeyPresent ||
+                                ssl->eccTempKeyPresent == DYNAMIC_TYPE_CURVE448)
+    #endif /* HAVE_ECC */
+         {
+            dtype = DYNAMIC_TYPE_CURVE448;
+         }
+    #endif /* HAVE_CURVE448 */
         FreeKey(ssl, dtype, (void**)&ssl->eccTempKey);
         ssl->eccTempKeyPresent = 0;
     }
-#endif /* HAVE_ECC || HAVE_CURVE25519 */
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 #ifdef HAVE_CURVE25519
     FreeKey(ssl, DYNAMIC_TYPE_CURVE25519, (void**)&ssl->peerX25519Key);
     ssl->peerX25519KeyPresent = 0;
 #endif
+#ifdef HAVE_CURVE448
+    FreeKey(ssl, DYNAMIC_TYPE_CURVE448, (void**)&ssl->peerX448Key);
+    ssl->peerX448KeyPresent = 0;
+#endif
+
 #ifndef NO_DH
     if (ssl->buffers.serverDH_Priv.buffer) {
         ForceZero(ssl->buffers.serverDH_Priv.buffer,
@@ -5297,10 +6614,15 @@
         ssl->buffers.serverDH_P.buffer = NULL;
     }
 #endif /* !NO_DH */
+
 #ifndef NO_CERTS
     wolfSSL_UnloadCertsKeys(ssl);
 #endif
 #ifdef HAVE_PK_CALLBACKS
+#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
+    if (!ssl->options.tls1_3 || ssl->options.side == WOLFSSL_CLIENT_END)
+#endif
+    {
     #ifdef HAVE_ECC
         XFREE(ssl->buffers.peerEccDsaKey.buffer, ssl->heap, DYNAMIC_TYPE_ECC);
         ssl->buffers.peerEccDsaKey.buffer = NULL;
@@ -5314,6 +6636,11 @@
                                                           DYNAMIC_TYPE_ED25519);
         ssl->buffers.peerEd25519Key.buffer = NULL;
     #endif
+    #ifdef HAVE_ED448
+        XFREE(ssl->buffers.peerEd448Key.buffer, ssl->heap, DYNAMIC_TYPE_ED448);
+        ssl->buffers.peerEd448Key.buffer = NULL;
+    #endif
+    }
 #endif /* HAVE_PK_CALLBACKS */
 
 #ifdef HAVE_QSH
@@ -5329,6 +6656,13 @@
     }
 #endif
 
+#if defined(HAVE_TLS_EXTENSIONS) && !defined(HAVE_SNI) && \
+                    !defined(HAVE_ALPN) && !defined(WOLFSSL_POST_HANDSHAKE_AUTH)
+    /* Some extensions need to be kept for post-handshake querying. */
+    TLSX_FreeAll(ssl->extensions, ssl->heap);
+    ssl->extensions = NULL;
+#endif
+
 #ifdef WOLFSSL_STATIC_MEMORY
     /* when done with handshake decrement current handshake count */
     if (ssl->heap != NULL) {
@@ -5358,7 +6692,7 @@
 void FreeSSL(WOLFSSL* ssl, void* heap)
 {
     if (ssl->ctx) {
-        FreeSSL_Ctx(ssl->ctx); /* will decrement and free underyling CTX if 0 */
+        FreeSSL_Ctx(ssl->ctx); /* will decrement and free underlying CTX if 0 */
     }
     SSL_ResourceFree(ssl);
     XFREE(ssl, heap, DYNAMIC_TYPE_SSL);
@@ -5368,6 +6702,8 @@
 #if !defined(NO_OLD_TLS) || defined(WOLFSSL_DTLS) || \
     ((defined(HAVE_CHACHA) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM)) \
      && defined(HAVE_AEAD))
+
+#if defined(WOLFSSL_DTLS) || !defined(WOLFSSL_NO_TLS12)
 static WC_INLINE void GetSEQIncrement(WOLFSSL* ssl, int verify, word32 seq[2])
 {
     if (verify) {
@@ -5387,6 +6723,7 @@
         }
     }
 }
+#endif /* WOLFSSL_DTLS || !WOLFSSL_NO_TLS12 */
 
 
 #ifdef WOLFSSL_DTLS
@@ -5396,39 +6733,39 @@
         /* Previous epoch case */
         if (ssl->options.haveMcast) {
         #ifdef WOLFSSL_MULTICAST
-            seq[0] = ((ssl->keys.dtls_epoch - 1) << 16) |
+            seq[0] = (((word32)ssl->keys.dtls_epoch - 1) << 16) |
                      (ssl->options.mcastID << 8) |
                      (ssl->keys.dtls_prev_sequence_number_hi & 0xFF);
         #endif
         }
         else
-            seq[0] = ((ssl->keys.dtls_epoch - 1) << 16) |
+            seq[0] = (((word32)ssl->keys.dtls_epoch - 1) << 16) |
                      (ssl->keys.dtls_prev_sequence_number_hi & 0xFFFF);
         seq[1] = ssl->keys.dtls_prev_sequence_number_lo;
     }
     else if (order == PEER_ORDER) {
         if (ssl->options.haveMcast) {
         #ifdef WOLFSSL_MULTICAST
-            seq[0] = (ssl->keys.curEpoch << 16) |
+            seq[0] = ((word32)ssl->keys.curEpoch << 16) |
                      (ssl->keys.curPeerId << 8) |
                      (ssl->keys.curSeq_hi & 0xFF);
         #endif
         }
         else
-            seq[0] = (ssl->keys.curEpoch << 16) |
+            seq[0] = ((word32)ssl->keys.curEpoch << 16) |
                      (ssl->keys.curSeq_hi & 0xFFFF);
         seq[1] = ssl->keys.curSeq_lo; /* explicit from peer */
     }
     else {
         if (ssl->options.haveMcast) {
         #ifdef WOLFSSL_MULTICAST
-            seq[0] = (ssl->keys.dtls_epoch << 16) |
+            seq[0] = ((word32)ssl->keys.dtls_epoch << 16) |
                      (ssl->options.mcastID << 8) |
                      (ssl->keys.dtls_sequence_number_hi & 0xFF);
         #endif
         }
         else
-            seq[0] = (ssl->keys.dtls_epoch << 16) |
+            seq[0] = ((word32)ssl->keys.dtls_epoch << 16) |
                      (ssl->keys.dtls_sequence_number_hi & 0xFFFF);
         seq[1] = ssl->keys.dtls_sequence_number_lo;
     }
@@ -5462,7 +6799,7 @@
 }
 #endif /* WOLFSSL_DTLS */
 
-
+#if defined(WOLFSSL_DTLS) || !defined(WOLFSSL_NO_TLS12)
 static WC_INLINE void WriteSEQ(WOLFSSL* ssl, int verifyOrder, byte* out)
 {
     word32 seq[2] = {0, 0};
@@ -5479,7 +6816,9 @@
     c32toa(seq[0], out);
     c32toa(seq[1], out + OPAQUE32_LEN);
 }
-#endif
+#endif /* WOLFSSL_DTLS || !WOLFSSL_NO_TLS12 */
+#endif /* !NO_OLD_TLS || WOLFSSL_DTLS ||
+        *     ((HAVE_CHACHA || HAVE_AESCCM || HAVE_AESGCM) && HAVE_AEAD) */
 
 #ifdef WOLFSSL_DTLS
 
@@ -5492,7 +6831,7 @@
  * extra space for the headers. */
 DtlsMsg* DtlsMsgNew(word32 sz, void* heap)
 {
-    DtlsMsg* msg = NULL;
+    DtlsMsg* msg;
 
     (void)heap;
     msg = (DtlsMsg*)XMALLOC(sizeof(DtlsMsg), heap, DYNAMIC_TYPE_DTLS_MSG);
@@ -5589,7 +6928,7 @@
             c32to24(msg->sz, msg->msg - DTLS_HANDSHAKE_FRAG_SZ);
         }
 
-        /* if no mesage data, just return */
+        /* if no message data, just return */
         if (fragSz == 0)
             return 0;
 
@@ -5772,8 +7111,12 @@
     DtlsMsg* item;
     int ret = 0;
 
-    if (ssl->dtls_tx_msg_list_sz > DTLS_POOL_SZ)
+    WOLFSSL_ENTER("DtlsMsgPoolSave()");
+
+    if (ssl->dtls_tx_msg_list_sz > DTLS_POOL_SZ) {
+        WOLFSSL_ERROR(DTLS_POOL_SZ_E);
         return DTLS_POOL_SZ_E;
+    }
 
     item = DtlsMsgNew(dataSz, ssl->heap);
 
@@ -5796,6 +7139,7 @@
     else
         ret = MEMORY_E;
 
+    WOLFSSL_LEAVE("DtlsMsgPoolSave()", ret);
     return ret;
 }
 
@@ -5808,6 +7152,7 @@
         ssl->dtls_timeout *= DTLS_TIMEOUT_MULTIPLIER;
         result = 0;
     }
+    WOLFSSL_LEAVE("DtlsMsgPoolTimeout()", result);
     return result;
 }
 
@@ -5816,9 +7161,11 @@
  * value. */
 void DtlsMsgPoolReset(WOLFSSL* ssl)
 {
+    WOLFSSL_ENTER("DtlsMsgPoolReset()");
     if (ssl->dtls_tx_msg_list) {
         DtlsMsgListDelete(ssl->dtls_tx_msg_list, ssl->heap);
         ssl->dtls_tx_msg_list = NULL;
+        ssl->dtls_tx_msg = NULL;
         ssl->dtls_tx_msg_list_sz = 0;
         ssl->dtls_timeout = ssl->dtls_timeout_init;
     }
@@ -5847,9 +7194,27 @@
 int DtlsMsgPoolSend(WOLFSSL* ssl, int sendOnlyFirstPacket)
 {
     int ret = 0;
-    DtlsMsg* pool = ssl->dtls_tx_msg_list;
+    DtlsMsg* pool;
+
+    WOLFSSL_ENTER("DtlsMsgPoolSend()");
+
+    pool = ssl->dtls_tx_msg == NULL ? ssl->dtls_tx_msg_list : ssl->dtls_tx_msg;
 
     if (pool != NULL) {
+        if ((ssl->options.side == WOLFSSL_SERVER_END &&
+             !(ssl->options.acceptState == SERVER_HELLO_DONE ||
+               ssl->options.acceptState == ACCEPT_FINISHED_DONE ||
+               ssl->options.acceptState == ACCEPT_THIRD_REPLY_DONE)) ||
+            (ssl->options.side == WOLFSSL_CLIENT_END &&
+             !(ssl->options.connectState == CLIENT_HELLO_SENT ||
+               ssl->options.connectState == HELLO_AGAIN_REPLY ||
+               ssl->options.connectState == FINISHED_DONE ||
+               ssl->options.connectState == SECOND_REPLY_DONE))) {
+
+            WOLFSSL_ERROR(DTLS_RETX_OVER_TX);
+            ssl->error = DTLS_RETX_OVER_TX;
+            return WOLFSSL_FATAL_ERROR;
+        }
 
         while (pool != NULL) {
             if (pool->seq == 0) {
@@ -5867,8 +7232,10 @@
 
                 WriteSEQ(ssl, epochOrder, dtls->sequence_number);
                 DtlsSEQIncrement(ssl, epochOrder);
-                if ((ret = CheckAvailableSize(ssl, pool->sz)) != 0)
+                if ((ret = CheckAvailableSize(ssl, pool->sz)) != 0) {
+                    WOLFSSL_ERROR(ret);
                     return ret;
+                }
 
                 XMEMCPY(ssl->buffers.outputBuffer.buffer,
                         pool->buf, pool->sz);
@@ -5884,29 +7251,34 @@
                 inputSz = pool->sz;
                 sendSz = inputSz + MAX_MSG_EXTRA;
 
-                if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
+                if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                    WOLFSSL_ERROR(ret);
                     return ret;
+                }
 
                 output = ssl->buffers.outputBuffer.buffer +
                          ssl->buffers.outputBuffer.length;
                 sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
                                       handshake, 0, 0, 0);
-                if (sendSz < 0)
+                if (sendSz < 0) {
+                    WOLFSSL_ERROR(BUILD_MSG_ERROR);
                     return BUILD_MSG_ERROR;
+                }
 
                 ssl->buffers.outputBuffer.length += sendSz;
             }
 
             ret = SendBuffered(ssl);
             if (ret < 0) {
+                WOLFSSL_ERROR(ret);
                 return ret;
             }
 
             /**
-             * on server side, retranmission is being triggered only by sending
+             * on server side, retransmission is being triggered only by sending
              * first message of given flight, in order to trigger client
              * to retransmit its whole flight. Sending the whole previous flight
-             * could lead to retranmission of previous client flight for each
+             * could lead to retransmission of previous client flight for each
              * server message from previous flight. Therefore one message should
              * be enough to do the trick.
              */
@@ -5917,9 +7289,11 @@
             }
             else
                 pool = pool->next;
-        }
-    }
-
+            ssl->dtls_tx_msg = pool;
+        }
+    }
+
+    WOLFSSL_LEAVE("DtlsMsgPoolSend()", ret);
     return ret;
 }
 
@@ -5966,8 +7340,7 @@
 #endif /* WOLFSSL_DTLS */
 
 
-
-
+#ifndef NO_ASN_TIME
 #if defined(USER_TICKS)
 #if 0
     word32 LowResTimer(void)
@@ -6020,6 +7393,15 @@
         return (word32)rtp_get_system_sec();
     }
 
+#elif defined(WOLFSSL_DEOS)
+
+    word32 LowResTimer(void)
+    {
+        const uint32_t systemTickTimeInHz = 1000000 / systemTickInMicroseconds();
+        uint32_t *systemTickPtr = systemTickPointer();
+
+        return (word32) *systemTickPtr/systemTickTimeInHz;
+    }
 
 #elif defined(MICRIUM)
 
@@ -6082,6 +7464,15 @@
         return (unsigned int)(((float)xTaskGetTickCount())/configTICK_RATE_HZ);
     }
 
+#elif defined(FREERTOS)
+
+    #include "task.h"
+
+    unsigned int LowResTimer(void)
+    {
+        return (unsigned int)(((float)xTaskGetTickCount())/configTICK_RATE_HZ);
+    }
+
 #elif defined(FREESCALE_KSDK_BM)
 
     #include "lwip/sys.h" /* lwIP */
@@ -6134,10 +7525,28 @@
         /* returns number of 10ms ticks, so 100 ticks/sec */
         return NU_Retrieve_Clock() / NU_TICKS_PER_SECOND;
     }
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+
+    #include "os/os_time.h"
+    word32 LowResTimer(void)
+    {
+        word32 now;
+        struct os_timeval tv;
+        os_gettimeofday(&tv, NULL);
+        now = (word32)tv.tv_sec;
+        return now;
+    }
+
+#elif defined(WOLFSSL_ZEPHYR)
+
+    word32 LowResTimer(void)
+    {
+        return k_uptime_get() / 1000;
+    }
 
 #else
     /* Posix style time */
-    #ifndef USER_TIME
+    #if !defined(USER_TIME) && !defined(USE_WOLF_TM)
     #include <time.h>
     #endif
 
@@ -6146,16 +7555,18 @@
         return (word32)XTIME(0);
     }
 #endif
-#if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
-/* Store the message for use with CertificateVerify using Ed25519.
+#endif /* !NO_ASN_TIME */
+#if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
+/* Store the message for use with CertificateVerify using EdDSA.
  *
  * ssl   SSL/TLS object.
  * data  Message to store.
  * sz    Size of message to store.
  * returns MEMORY_E if not able to reallocate, otherwise 0.
  */
-static int Ed25519Update(WOLFSSL* ssl, const byte* data, int sz)
+static int EdDSA_Update(WOLFSSL* ssl, const byte* data, int sz)
 {
     int   ret = 0;
     byte* msgs;
@@ -6176,7 +7587,7 @@
 
     return ret;
 }
-#endif /* HAVE_ED25519 && !WOLFSSL_NO_CLIENT_AUTH */
+#endif /* (HAVE_ED25519 || HAVE_ED448) && !WOLFSSL_NO_CLIENT_AUTH */
 
 #ifndef NO_CERTS
 int HashOutputRaw(WOLFSSL* ssl, const byte* output, int sz)
@@ -6218,9 +7629,10 @@
         if (ret != 0)
             return ret;
     #endif
-    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
-        ret = Ed25519Update(ssl, output, sz);
+    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
+        ret = EdDSA_Update(ssl, output, sz);
         if (ret != 0)
             return ret;
     #endif
@@ -6237,6 +7649,9 @@
     int ret = 0;
     const byte* adj;
 
+    if (ssl->hsHashes == NULL)
+        return BAD_FUNC_ARG;
+
     adj = output + RECORD_HEADER_SZ + ivSz;
     sz -= RECORD_HEADER_SZ;
 
@@ -6275,9 +7690,10 @@
         if (ret != 0)
             return ret;
     #endif
-    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
-        ret = Ed25519Update(ssl, adj, sz);
+    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
+        ret = EdDSA_Update(ssl, adj, sz);
         if (ret != 0)
             return ret;
     #endif
@@ -6334,9 +7750,10 @@
         if (ret != 0)
             return ret;
     #endif
-    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
-        ret = Ed25519Update(ssl, adj, sz);
+    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
+        ret = EdDSA_Update(ssl, adj, sz);
         if (ret != 0)
             return ret;
     #endif
@@ -6395,7 +7812,8 @@
 }
 
 
-#if !defined(WOLFSSL_NO_TLS12) || defined(HAVE_SESSION_TICKET)
+#if !defined(WOLFSSL_NO_TLS12) || (defined(HAVE_SESSION_TICKET) && \
+                                                    !defined(NO_WOLFSSL_SERVER))
 /* add handshake header for message */
 static void AddHandShakeHeader(byte* output, word32 length,
                                word32 fragOffset, word32 fragLength,
@@ -6442,11 +7860,12 @@
     AddRecordHeader(output, length + lengthAdj, handshake, ssl);
     AddHandShakeHeader(output + outputAdj, length, 0, length, type, ssl);
 }
-#endif /* !WOLFSSL_NO_TLS12 || HAVE_SESSION_TICKET */
+#endif /* !WOLFSSL_NO_TLS12 || (HAVE_SESSION_TICKET && !NO_WOLFSSL_SERVER) */
 
 
 #ifndef WOLFSSL_NO_TLS12
-#ifndef NO_CERTS
+#if !defined(NO_CERTS) && (!defined(NO_WOLFSSL_SERVER) || \
+                                               !defined(WOLFSSL_NO_CLIENT_AUTH))
 static void AddFragHeaders(byte* output, word32 fragSz, word32 fragOffset,
                            word32 length, byte type, WOLFSSL* ssl)
 {
@@ -6480,9 +7899,18 @@
 
 retry:
     recvd = ssl->CBIORecv(ssl, (char *)buf, (int)sz, ssl->IOCB_ReadCtx);
-    if (recvd < 0)
+    if (recvd < 0) {
         switch (recvd) {
             case WOLFSSL_CBIO_ERR_GENERAL:        /* general/unknown error */
+                #if defined(OPENSSL_ALL) || defined(WOLFSSL_APACHE_HTTPD)
+                    if (ssl->biord) {
+                        /* If retry and read flags are set, return WANT_READ */
+                        if ((ssl->biord->flags & WOLFSSL_BIO_FLAG_READ) &&
+                            (ssl->biord->flags & WOLFSSL_BIO_FLAG_RETRY)) {
+                            return WANT_READ;
+                        }
+                    }
+                #endif
                 return -1;
 
             case WOLFSSL_CBIO_ERR_WANT_READ:      /* want read, would block */
@@ -6521,21 +7949,24 @@
                 ssl->options.isClosed = 1;
                 return -1;
 
+            case WOLFSSL_CBIO_ERR_TIMEOUT:
             #ifdef WOLFSSL_DTLS
-            case WOLFSSL_CBIO_ERR_TIMEOUT:
                 if (IsDtlsNotSctpMode(ssl) &&
                     !ssl->options.handShakeDone &&
                     DtlsMsgPoolTimeout(ssl) == 0 &&
                     DtlsMsgPoolSend(ssl, 0) == 0) {
 
+                    /* retry read for DTLS during handshake only */
                     goto retry;
                 }
+            #endif
                 return -1;
-            #endif
 
             default:
+                WOLFSSL_MSG("Unexpected recv return code");
                 return recvd;
         }
+    }
 
     return recvd;
 }
@@ -6695,6 +8126,16 @@
         tmp += align - hdrSz;
 #endif
 
+#ifdef WOLFSSL_STATIC_MEMORY
+    /* can be from IO memory pool which does not need copy if same buffer */
+    if (ssl->buffers.outputBuffer.length &&
+            tmp == ssl->buffers.outputBuffer.buffer) {
+        ssl->buffers.outputBuffer.bufferSize =
+            size + ssl->buffers.outputBuffer.length;
+        return 0;
+    }
+#endif
+
     if (ssl->buffers.outputBuffer.length)
         XMEMCPY(tmp, ssl->buffers.outputBuffer.buffer,
                ssl->buffers.outputBuffer.length);
@@ -6759,6 +8200,16 @@
         tmp += align - hdrSz;
 #endif
 
+#ifdef WOLFSSL_STATIC_MEMORY
+    /* can be from IO memory pool which does not need copy if same buffer */
+    if (usedLength && tmp == ssl->buffers.inputBuffer.buffer) {
+        ssl->buffers.inputBuffer.bufferSize = size + usedLength;
+        ssl->buffers.inputBuffer.idx    = 0;
+        ssl->buffers.inputBuffer.length = usedLength;
+        return 0;
+    }
+#endif
+
     if (usedLength)
         XMEMCPY(tmp, ssl->buffers.inputBuffer.buffer +
                     ssl->buffers.inputBuffer.idx, usedLength);
@@ -6845,9 +8296,8 @@
     }
 
 #ifdef WOLFSSL_DTLS
-    if (IsDtlsNotSctpMode(ssl) &&
-        (!DtlsCheckWindow(ssl) ||
-         (ssl->options.handShakeDone && ssl->keys.curEpoch == 0))) {
+    if (IsDtlsNotSctpMode(ssl) && !DtlsCheckWindow(ssl)) {
+            WOLFSSL_LEAVE("GetRecordHeader()", SEQUENCE_ERROR);
             return SEQUENCE_ERROR;
     }
 #endif
@@ -6879,6 +8329,14 @@
             WOLFSSL_MSG("DTLS handshake, skip RH version number check");
         else {
             WOLFSSL_MSG("SSL version error");
+            /* send alert per RFC5246 Appendix E. Backward Compatibility */
+            if (ssl->options.side == WOLFSSL_CLIENT_END) {
+#ifdef WOLFSSL_MYSQL_COMPATIBLE
+                SendAlert(ssl, alert_fatal, wc_protocol_version);
+#else
+                SendAlert(ssl, alert_fatal, protocol_version);
+#endif
+            }
             return VERSION_ERROR;              /* only use requested version */
         }
     }
@@ -6940,8 +8398,10 @@
     word32 idx = *inOutIdx;
 
     *inOutIdx += HANDSHAKE_HEADER_SZ + DTLS_HANDSHAKE_EXTRA;
-    if (*inOutIdx > totalSz)
-        return BUFFER_E;
+    if (*inOutIdx > totalSz) {
+        WOLFSSL_ERROR(BUFFER_E);
+        return BUFFER_E;
+    }
 
     *type = input[idx++];
     c24to32(input + idx, size);
@@ -6957,8 +8417,10 @@
     if (ssl->curRL.pvMajor != ssl->version.major ||
         ssl->curRL.pvMinor != ssl->version.minor) {
 
-        if (*type != client_hello && *type != hello_verify_request)
+        if (*type != client_hello && *type != hello_verify_request) {
+            WOLFSSL_ERROR(VERSION_ERROR);
             return VERSION_ERROR;
+        }
         else {
             WOLFSSL_MSG("DTLS Handshake ignoring hello or verify version");
         }
@@ -7126,7 +8588,8 @@
         REQUIRES_ECC_STATIC,
         REQUIRES_PSK,
         REQUIRES_NTRU,
-        REQUIRES_RSA_SIG
+        REQUIRES_RSA_SIG,
+        REQUIRES_AEAD
     };
 
 
@@ -7197,6 +8660,10 @@
                 return 1;
             break;
             }
+
+        if (requirement == REQUIRES_AEAD)
+            return 1;
+
         }
 #endif /* HAVE_CHACHA */
 
@@ -7204,7 +8671,7 @@
         if (first == ECC_BYTE) {
 
         switch (second) {
-#ifdef HAVE_ECC
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
     #ifndef NO_RSA
         case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA :
             if (requirement == REQUIRES_RSA)
@@ -7306,34 +8773,46 @@
         case TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 :
             if (requirement == REQUIRES_ECC)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 :
             if (requirement == REQUIRES_ECC)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 :
             if (requirement == REQUIRES_ECC_STATIC)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 :
             if (requirement == REQUIRES_ECC_STATIC)
                 return 1;
-            break;
-#endif /* HAVE_ECC */
+            if (requirement == REQUIRES_AEAD)
+                return 1;
+            break;
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
 #ifndef NO_RSA
-    #ifdef HAVE_ECC
+    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         case TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 :
             if (requirement == REQUIRES_RSA)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 :
             if (requirement == REQUIRES_RSA)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 :
@@ -7341,6 +8820,8 @@
                 return 1;
             if (requirement == REQUIRES_RSA_SIG)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 :
@@ -7348,8 +8829,10 @@
                 return 1;
             if (requirement == REQUIRES_RSA_SIG)
                 return 1;
-            break;
-    #endif /* HAVE_ECC */
+            if (requirement == REQUIRES_AEAD)
+                return 1;
+            break;
+    #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
     #ifdef HAVE_AESCCM
         case TLS_RSA_WITH_AES_128_CCM_8 :
         case TLS_RSA_WITH_AES_256_CCM_8 :
@@ -7357,9 +8840,11 @@
                 return 1;
             if (requirement == REQUIRES_RSA_SIG)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
     #endif /* HAVE_AESCCM */
-    #ifdef HAVE_ECC
+    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
 
         case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 :
         case TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 :
@@ -7374,15 +8859,17 @@
             if (requirement == REQUIRES_ECC_STATIC)
                 return 1;
             break;
-    #endif /* HAVE_ECC */
+    #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 #endif /* !NO_RSA */
 
-#ifdef HAVE_ECC
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         case TLS_ECDHE_ECDSA_WITH_AES_128_CCM :
         case TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8 :
         case TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8 :
             if (requirement == REQUIRES_ECC)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 :
@@ -7398,7 +8885,7 @@
             if (requirement == REQUIRES_ECC_STATIC)
                 return 1;
             break;
-#endif /* HAVE_ECC */
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
 #ifndef NO_PSK
         case TLS_PSK_WITH_AES_128_CCM:
@@ -7407,6 +8894,8 @@
         case TLS_PSK_WITH_AES_256_CCM_8:
             if (requirement == REQUIRES_PSK)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_DHE_PSK_WITH_AES_128_CCM:
@@ -7415,9 +8904,11 @@
                 return 1;
             if (requirement == REQUIRES_DHE)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 #endif /* !NO_PSK */
-#ifdef HAVE_ECC
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         case TLS_ECDHE_ECDSA_WITH_NULL_SHA :
             if (requirement == REQUIRES_ECC)
                 return 1;
@@ -7432,7 +8923,15 @@
             if (requirement == REQUIRES_PSK)
                 return 1;
             break;
-#endif /* HAVE_ECC */
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
+
+#if defined(WOLFSSL_TLS13) && defined(HAVE_NULL_CIPHER)
+        case TLS_SHA256_SHA256:
+            break;
+        case TLS_SHA384_SHA384:
+            break;
+#endif
+
         default:
             WOLFSSL_MSG("Unsupported cipher suite, CipherRequires ECC");
             return 0;
@@ -7526,6 +9025,7 @@
                 return 1;
             break;
 
+        case TLS_RSA_WITH_NULL_MD5 :
         case TLS_RSA_WITH_NULL_SHA :
         case TLS_RSA_WITH_NULL_SHA256 :
             if (requirement == REQUIRES_RSA)
@@ -7549,7 +9049,19 @@
 
 #ifndef NO_PSK
         case TLS_PSK_WITH_AES_128_GCM_SHA256 :
+            if (requirement == REQUIRES_PSK)
+                return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
+            break;
+
         case TLS_PSK_WITH_AES_256_GCM_SHA384 :
+            if (requirement == REQUIRES_PSK)
+                return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
+            break;
+
         case TLS_PSK_WITH_AES_128_CBC_SHA256 :
         case TLS_PSK_WITH_AES_256_CBC_SHA384 :
         case TLS_PSK_WITH_AES_128_CBC_SHA :
@@ -7563,6 +9075,14 @@
 
         case TLS_DHE_PSK_WITH_AES_128_GCM_SHA256 :
         case TLS_DHE_PSK_WITH_AES_256_GCM_SHA384 :
+            if (requirement == REQUIRES_DHE)
+                return 1;
+            if (requirement == REQUIRES_PSK)
+                return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
+            break;
+
         case TLS_DHE_PSK_WITH_AES_128_CBC_SHA256 :
         case TLS_DHE_PSK_WITH_AES_256_CBC_SHA384 :
         case TLS_DHE_PSK_WITH_NULL_SHA384 :
@@ -7613,21 +9133,8 @@
             if (requirement == REQUIRES_RSA)
                 return 1;
             break;
-
-        case TLS_RSA_WITH_HC_128_B2B256:
-            if (requirement == REQUIRES_RSA)
-                return 1;
-            break;
 #endif /* NO_HC128 */
 
-#ifdef HAVE_BLAKE2
-        case TLS_RSA_WITH_AES_128_CBC_B2B256:
-        case TLS_RSA_WITH_AES_256_CBC_B2B256:
-            if (requirement == REQUIRES_RSA)
-                return 1;
-            break;
-#endif /* HAVE_BLAKE2 */
-
 #ifndef NO_RABBIT
         case TLS_RSA_WITH_RABBIT_SHA :
             if (requirement == REQUIRES_RSA)
@@ -7639,6 +9146,8 @@
         case TLS_RSA_WITH_AES_256_GCM_SHA384 :
             if (requirement == REQUIRES_RSA)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
         case TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 :
@@ -7647,6 +9156,8 @@
                 return 1;
             if (requirement == REQUIRES_DHE)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 
 #ifdef HAVE_CAMELLIA
@@ -7688,6 +9199,8 @@
         case TLS_DH_anon_WITH_AES_256_GCM_SHA384:
             if (requirement == REQUIRES_DHE)
                 return 1;
+            if (requirement == REQUIRES_AEAD)
+                return 1;
             break;
 #endif
 #ifdef WOLFSSL_MULTICAST
@@ -7713,7 +9226,7 @@
 
 
 /* Match names with wildcards, each wildcard can represent a single name
-   component or fragment but not mulitple names, i.e.,
+   component or fragment but not multiple names, i.e.,
    *.z.com matches y.z.com but not x.y.z.com
 
    return 1 on success */
@@ -7792,7 +9305,6 @@
     return match;
 }
 
-
 #ifdef OPENSSL_EXTRA
 /* Check that alternative names, if they exists, match the domain.
  * Fail if there are wild patterns and they didn't match.
@@ -7824,8 +9336,10 @@
             break;
         }
         /* No matches and wild pattern match failed. */
-        else if (altName->name[0] == '*' && match == 0)
+        else if (altName->name && altName->len >=1 &&
+                altName->name[0] == '*' && match == 0) {
             match = -1;
+        }
 
         altName = altName->next;
     }
@@ -7862,6 +9376,13 @@
 
     return 0;
 }
+
+int CheckIPAddr(DecodedCert* dCert, char* ipasc)
+{
+    WOLFSSL_MSG("Checking IPAddr");
+
+    return CheckHostName(dCert, ipasc, (size_t)XSTRLEN(ipasc));
+}
 #endif
 
 #ifdef SESSION_CERTS
@@ -7883,6 +9404,8 @@
 #if defined(KEEP_PEER_CERT) || defined(SESSION_CERTS) || \
     defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
 /* Copy parts X509 needs from Decoded cert, 0 on success */
+/* The same DecodedCert cannot be copied to WOLFSSL_X509 twice otherwise the
+ * altNames pointers could be free'd by second x509 still active by first */
 int CopyDecodedToX509(WOLFSSL_X509* x509, DecodedCert* dCert)
 {
     int ret = 0;
@@ -7926,8 +9449,12 @@
     x509->subject.x509 = x509;
 #endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
-    XMEMCPY(x509->subject.raw, dCert->subjectRaw, dCert->subjectRawLen);
-    x509->subject.rawLen = dCert->subjectRawLen;
+    x509->subject.rawLen = min(dCert->subjectRawLen, sizeof(x509->subject.raw));
+    XMEMCPY(x509->subject.raw, dCert->subjectRaw, x509->subject.rawLen);
+#ifdef WOLFSSL_CERT_EXT
+    x509->issuer.rawLen = min(dCert->issuerRawLen, sizeof(x509->issuer.raw));
+    XMEMCPY(x509->issuer.raw, dCert->issuerRaw, x509->issuer.rawLen);
+#endif
 #endif
 
     XMEMCPY(x509->serial, dCert->serial, EXTERNAL_SERIAL_SIZE);
@@ -7965,20 +9492,23 @@
     }
 #endif /* WOLFSSL_SEP */
     {
-        int minSz = min(dCert->beforeDateLen, MAX_DATE_SZ);
-        if (minSz > 0) {
-            x509->notBeforeSz = minSz;
-            XMEMCPY(x509->notBefore, dCert->beforeDate, minSz);
-        }
-        else
-            x509->notBeforeSz = 0;
-        minSz = min(dCert->afterDateLen, MAX_DATE_SZ);
-        if (minSz > 0) {
-            x509->notAfterSz = minSz;
-            XMEMCPY(x509->notAfter, dCert->afterDate, minSz);
-        }
-        else
-            x509->notAfterSz = 0;
+        int minSz;
+        if (dCert->beforeDateLen > 0) {
+            minSz = min(dCert->beforeDate[1], MAX_DATE_SZ);
+            x509->notBefore.type = dCert->beforeDate[0];
+            x509->notBefore.length = minSz;
+            XMEMCPY(x509->notBefore.data, &dCert->beforeDate[2], minSz);
+        }
+        else
+            x509->notBefore.length = 0;
+        if (dCert->afterDateLen > 0) {
+            minSz = min(dCert->afterDate[1], MAX_DATE_SZ);
+            x509->notAfter.type = dCert->afterDate[0];
+            x509->notAfter.length = minSz;
+            XMEMCPY(x509->notAfter.data, &dCert->afterDate[2], minSz);
+        }
+        else
+            x509->notAfter.length = 0;
     }
 
     if (dCert->publicKey != NULL && dCert->pubKeySize != 0) {
@@ -7991,6 +9521,28 @@
         }
         else
             ret = MEMORY_E;
+#if defined(OPENSSL_ALL)
+        if (ret == 0) {
+            x509->key.pubKeyOID = dCert->keyOID;
+
+            if (!x509->key.algor) {
+                x509->key.algor = wolfSSL_X509_ALGOR_new();
+            } else {
+                wolfSSL_ASN1_OBJECT_free(x509->key.algor->algorithm);
+            }
+            if (!(x509->key.algor->algorithm =
+                    wolfSSL_OBJ_nid2obj(dCert->keyOID))) {
+                ret = PUBLIC_KEY_E;
+            }
+
+            wolfSSL_EVP_PKEY_free(x509->key.pkey);
+            if (!(x509->key.pkey = wolfSSL_d2i_PUBKEY(NULL,
+                                                      &dCert->publicKey,
+                                                      dCert->pubKeySize))) {
+                ret = PUBLIC_KEY_E;
+            }
+        }
+#endif
     }
 
     if (dCert->signature != NULL && dCert->sigLength != 0 &&
@@ -8005,6 +9557,13 @@
             x509->sig.length = dCert->sigLength;
             x509->sigOID = dCert->signatureOID;
         }
+#if defined(OPENSSL_ALL)
+        wolfSSL_ASN1_OBJECT_free(x509->algor.algorithm);
+        if (!(x509->algor.algorithm =
+                wolfSSL_OBJ_nid2obj(dCert->signatureOID))) {
+            ret = PUBLIC_KEY_E;
+        }
+#endif
     }
 
     /* store cert for potential retrieval */
@@ -8079,6 +9638,19 @@
             ret = MEMORY_E;
         }
     }
+    #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    if (dCert->extAuthInfoCaIssuer != NULL && dCert->extAuthInfoCaIssuerSz > 0) {
+        x509->authInfoCaIssuer = (byte*)XMALLOC(dCert->extAuthInfoCaIssuerSz, x509->heap,
+                DYNAMIC_TYPE_X509_EXT);
+        if (x509->authInfoCaIssuer != NULL) {
+            XMEMCPY(x509->authInfoCaIssuer, dCert->extAuthInfoCaIssuer, dCert->extAuthInfoCaIssuerSz);
+            x509->authInfoCaIssuerSz = dCert->extAuthInfoCaIssuerSz;
+        }
+        else {
+            ret = MEMORY_E;
+        }
+    }
+    #endif
     x509->basicConstSet = dCert->extBasicConstSet;
     x509->basicConstCrit = dCert->extBasicConstCrit;
     x509->basicConstPlSet = dCert->pathLengthSet;
@@ -8126,10 +9698,10 @@
             ret = MEMORY_E;
         }
     }
-    #ifdef WOLFSSL_SEP
+    #if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
         x509->certPolicySet = dCert->extCertPolicySet;
         x509->certPolicyCrit = dCert->extCertPolicyCrit;
-    #endif /* WOLFSSL_SEP */
+    #endif /* WOLFSSL_SEP || WOLFSSL_QT */
     #ifdef WOLFSSL_CERT_EXT
         {
             int i;
@@ -8140,9 +9712,9 @@
         }
     #endif /* WOLFSSL_CERT_EXT */
 #endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     x509->pkCurveOID = dCert->pkCurveOID;
-#endif /* HAVE_ECC */
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
     return ret;
 }
@@ -8291,34 +9863,343 @@
 #endif /* HAVE_PK_CALLBACKS */
 
 
-typedef struct ProcPeerCertArgs {
-    buffer*      certs;
+#if !defined(NO_WOLFSSL_CLIENT) || !defined(WOLFSSL_NO_CLIENT_AUTH)
+static void DoCertFatalAlert(WOLFSSL* ssl, int ret)
+{
+    int alertWhy;
+    if (ssl == NULL || ret == 0) {
+        return;
+    }
+
+    /* Determine alert reason */
+    alertWhy = bad_certificate;
+    if (ret == ASN_AFTER_DATE_E || ret == ASN_BEFORE_DATE_E) {
+        alertWhy = certificate_expired;
+    }
+#if (defined(OPENSSL_ALL) || defined(WOLFSSL_APACHE_HTTPD))
+    else if (ret == CRL_CERT_REVOKED) {
+        alertWhy = certificate_revoked;
+    }
+#endif
+    else if (ret == NO_PEER_CERT) {
 #ifdef WOLFSSL_TLS13
-    buffer*      exts; /* extentions */
-#endif
-    DecodedCert* dCert;
-    char*  domain;
-    word32 idx;
-    word32 begin;
-    int    totalCerts; /* number of certs in certs buffer */
-    int    count;
-    int    dCertInit;
-    int    certIdx;
-    int    fatal;
-    int    lastErr;
-#ifdef WOLFSSL_ALT_CERT_CHAINS
-    int    lastCaErr;
-#endif
-#ifdef WOLFSSL_TLS13
-    byte   ctxSz;
-#endif
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    byte haveTrustPeer; /* was cert verified by loaded trusted peer cert */
-#endif
-#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
-    char   untrustedDepth;
-#endif
-} ProcPeerCertArgs;
+        if (ssl->options.tls1_3) {
+            alertWhy = certificate_required;
+        }
+        else
+#endif
+        {
+            alertWhy = handshake_failure;
+        }
+    }
+
+    /* send fatal alert and mark connection closed */
+    SendAlert(ssl, alert_fatal, alertWhy); /* try to send */
+    ssl->options.isClosed = 1;
+}
+
+/* WOLFSSL_ALWAYS_VERIFY_CB: Use verify callback for success or failure cases */
+/* WOLFSSL_VERIFY_CB_ALL_CERTS: Issue callback for all intermediate certificates */
+
+/* Callback is issued for certificate presented in TLS Certificate (11) packet.
+ * The intermediates are done first then peer leaf cert last. Use the
+ * store->error_depth member to determine index (0=peer, >1 intermediates)
+ */
+
+int DoVerifyCallback(WOLFSSL_CERT_MANAGER* cm, WOLFSSL* ssl, int ret,
+                                                        ProcPeerCertArgs* args)
+{
+    int verify_ok = 0, use_cb = 0;
+    void *heap = (ssl != NULL) ? ssl->heap : cm->heap;
+
+    /* Determine if verify was okay */
+    if (ret == 0) {
+        verify_ok = 1;
+    }
+
+    /* Determine if verify callback should be used */
+    if (ret != 0) {
+        if ((ssl != NULL) && (!ssl->options.verifyNone)) {
+            use_cb = 1; /* always report errors */
+        }
+    }
+#ifdef WOLFSSL_ALWAYS_VERIFY_CB
+    /* always use verify callback on peer leaf cert */
+    if (args->certIdx == 0) {
+        use_cb = 1;
+    }
+#endif
+#ifdef WOLFSSL_VERIFY_CB_ALL_CERTS
+    /* perform verify callback on other intermediate certs (not just peer) */
+    if (args->certIdx > 0) {
+        use_cb = 1;
+    }
+#endif
+#if defined(OPENSSL_EXTRA)
+    /* perform domain name check on the peer certificate */
+    if (args->dCertInit && args->dCert && (ssl != NULL) &&
+            ssl->param && ssl->param->hostName[0]) {
+        /* If altNames names is present, then subject common name is ignored */
+        if (args->dCert->altNames != NULL) {
+            if (CheckAltNames(args->dCert, ssl->param->hostName) == 0 ) {
+                if (ret == 0) {
+                    ret = DOMAIN_NAME_MISMATCH;
+                }
+            }
+        }
+        else {
+            if (args->dCert->subjectCN) {
+                if (MatchDomainName(args->dCert->subjectCN,
+                                    args->dCert->subjectCNLen,
+                                    ssl->param->hostName) == 0) {
+                    if (ret == 0) {
+                        ret = DOMAIN_NAME_MISMATCH;
+                    }
+                }
+            }
+        }
+    }
+
+    /* perform IP address check on the peer certificate */
+    if ((args->dCertInit != 0) && (args->dCert != NULL) && (ssl != NULL) &&
+        (ssl->param != NULL) && (XSTRLEN(ssl->param->ipasc) > 0)) {
+        if (CheckIPAddr(args->dCert, ssl->param->ipasc) != 0) {
+            if (ret == 0) {
+                ret = IPADDR_MISMATCH;
+            }
+        }
+    }
+#endif
+    /* if verify callback has been set */
+    if ((use_cb && (ssl != NULL) && ((ssl->verifyCallback != NULL)
+    #ifdef OPENSSL_ALL
+        || (ssl->ctx->verifyCertCb != NULL)
+    #endif
+        ))
+    #ifndef NO_WOLFSSL_CM_VERIFY
+        || (cm->verifyCallback != NULL)
+    #endif
+        ) {
+        int verifyFail = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        WOLFSSL_X509_STORE_CTX* store;
+        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        WOLFSSL_X509* x509;
+        #endif
+        char* domain = NULL;
+    #else
+        WOLFSSL_X509_STORE_CTX store[1];
+        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        WOLFSSL_X509           x509[1];
+        #endif
+        char domain[ASN_NAME_MAX];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
+            sizeof(WOLFSSL_X509_STORE_CTX), heap, DYNAMIC_TYPE_X509_STORE);
+        if (store == NULL) {
+            return MEMORY_E;
+        }
+        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        x509 = (WOLFSSL_X509*)XMALLOC(sizeof(WOLFSSL_X509), heap,
+            DYNAMIC_TYPE_X509);
+        if (x509 == NULL) {
+            XFREE(store, heap, DYNAMIC_TYPE_X509);
+            return MEMORY_E;
+        }
+        #endif
+        domain = (char*)XMALLOC(ASN_NAME_MAX, heap, DYNAMIC_TYPE_STRING);
+        if (domain == NULL) {
+            XFREE(store, heap, DYNAMIC_TYPE_X509);
+            #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+            XFREE(x509, heap, DYNAMIC_TYPE_X509);
+            #endif
+            return MEMORY_E;
+        }
+    #endif /* WOLFSSL_SMALL_STACK */
+
+        XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
+    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        XMEMSET(x509, 0, sizeof(WOLFSSL_X509));
+    #endif
+        domain[0] = '\0';
+
+        /* build subject CN as string to return in store */
+        if (args->dCertInit && args->dCert && args->dCert->subjectCN) {
+            int subjectCNLen = args->dCert->subjectCNLen;
+            if (subjectCNLen > ASN_NAME_MAX-1)
+                subjectCNLen = ASN_NAME_MAX-1;
+            if (subjectCNLen > 0) {
+                XMEMCPY(domain, args->dCert->subjectCN, subjectCNLen);
+                domain[subjectCNLen] = '\0';
+            }
+        }
+
+        store->error = ret;
+        store->error_depth = args->certIdx;
+        store->discardSessionCerts = 0;
+        store->domain = domain;
+        store->userCtx = (ssl != NULL) ? ssl->verifyCbCtx : cm;
+        store->certs = args->certs;
+        store->totalCerts = args->totalCerts;
+    #if defined(HAVE_EX_DATA) || defined(FORTRESS)
+        if (wolfSSL_CRYPTO_set_ex_data(&store->ex_data, 0, ssl)
+                != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Failed to store ssl context in WOLFSSL_X509_STORE_CTX");
+        }
+    #endif
+
+        if (ssl != NULL) {
+    #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if (ssl->ctx->x509_store_pt != NULL) {
+                store->store = ssl->ctx->x509_store_pt;
+            }
+            else {
+                store->store = &ssl->ctx->x509_store;
+            }
+    #if defined(OPENSSL_EXTRA)
+            store->depth = args->count;
+            store->param = (WOLFSSL_X509_VERIFY_PARAM*)XMALLOC(
+                            sizeof(WOLFSSL_X509_VERIFY_PARAM),
+                            heap, DYNAMIC_TYPE_OPENSSL);
+            if (store->param == NULL) {
+        #ifdef WOLFSSL_SMALL_STACK
+                XFREE(domain, heap, DYNAMIC_TYPE_STRING);
+            #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+                XFREE(x509, heap, DYNAMIC_TYPE_X509);
+            #endif
+                XFREE(store, heap, DYNAMIC_TYPE_X509_STORE);
+        #endif
+                return MEMORY_E;
+            }
+            XMEMSET(store->param, 0, sizeof(WOLFSSL_X509_VERIFY_PARAM));
+            /* Overwrite with non-default param values in SSL */
+            if (ssl->param) {
+                if (ssl->param->check_time)
+                    store->param->check_time = ssl->param->check_time;
+
+                if (ssl->param->flags)
+                    store->param->flags = ssl->param->flags;
+
+                if (ssl->param->hostName[0])
+                    XMEMCPY(store->param->hostName, ssl->param->hostName,
+                            WOLFSSL_HOST_NAME_MAX);
+
+            }
+    #endif /* defined(OPENSSL_EXTRA) */
+    #endif /* defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)*/
+    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        #ifdef KEEP_PEER_CERT
+            if (args->certIdx == 0) {
+                store->current_cert = &ssl->peerCert; /* use existing X509 */
+            }
+            else
+        #endif
+            {
+                InitX509(x509, 0, heap);
+                if (CopyDecodedToX509(x509, args->dCert) == 0) {
+                    store->current_cert = x509;
+                }
+                else {
+                    FreeX509(x509);
+                }
+            }
+    #endif
+    #ifdef SESSION_CERTS
+            store->sesChain = &ssl->session.chain;
+    #endif
+        }
+    #ifndef NO_WOLFSSL_CM_VERIFY
+        /* non-zero return code indicates failure override */
+        if ((cm != NULL) && (cm->verifyCallback != NULL)) {
+            store->userCtx = cm;
+            if (cm->verifyCallback(verify_ok, store)) {
+                if (ret != 0) {
+                    WOLFSSL_MSG("Verify CM callback overriding error!");
+                    ret = 0;
+                }
+            }
+            else {
+                verifyFail = 1;
+            }
+        }
+    #endif
+
+        if (ssl != NULL) {
+    #ifdef OPENSSL_ALL
+            /* non-zero return code indicates failure override */
+            if (ssl->ctx->verifyCertCb) {
+                if (ssl->ctx->verifyCertCb(store, ssl->ctx->verifyCertCbArg)) {
+                    if (ret != 0) {
+                        WOLFSSL_MSG("Verify Cert callback overriding error!");
+                        ret = 0;
+                    }
+                }
+                else {
+                    verifyFail = 1;
+                }
+            }
+    #endif
+
+            /* non-zero return code indicates failure override */
+            if (ssl->verifyCallback) {
+                if (ssl->verifyCallback(verify_ok, store)) {
+                    if (ret != 0) {
+                        WOLFSSL_MSG("Verify callback overriding error!");
+                        ret = 0;
+                    }
+                }
+                else {
+                    verifyFail = 1;
+                }
+            }
+        }
+
+        if (verifyFail) {
+            /* induce error if one not present */
+            if (ret == 0) {
+                ret = VERIFY_CERT_ERROR;
+            }
+
+            /* mark as verify error */
+            args->verifyErr = 1;
+        }
+    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        if (args->certIdx > 0)
+            FreeX509(x509);
+    #endif
+    #if defined(SESSION_CERTS) && defined(OPENSSL_EXTRA)
+        wolfSSL_sk_X509_free(store->chain);
+        store->chain = NULL;
+    #endif
+    #ifdef SESSION_CERTS
+        if ((ssl != NULL) && (store->discardSessionCerts)) {
+            WOLFSSL_MSG("Verify callback requested discard sess certs");
+            ssl->session.chain.count = 0;
+        #ifdef WOLFSSL_ALT_CERT_CHAINS
+            ssl->session.altChain.count = 0;
+        #endif
+        }
+    #endif /* SESSION_CERTS */
+#ifdef OPENSSL_EXTRA
+        if ((ssl != NULL) && (store->param)) {
+            XFREE(store->param, heap, DYNAMIC_TYPE_OPENSSL);
+        }
+#endif
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(domain, heap, DYNAMIC_TYPE_STRING);
+        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+        XFREE(x509, heap, DYNAMIC_TYPE_X509);
+        #endif
+        XFREE(store, heap, DYNAMIC_TYPE_X509_STORE);
+    #endif
+    }
+
+    (void)heap;
+
+    return ret;
+}
 
 static void FreeProcPeerCertArgs(WOLFSSL* ssl, void* pArgs)
 {
@@ -8326,10 +10207,6 @@
 
     (void)ssl;
 
-    if (args->domain) {
-        XFREE(args->domain, ssl->heap, DYNAMIC_TYPE_STRING);
-        args->domain = NULL;
-    }
     if (args->certs) {
         XFREE(args->certs, ssl->heap, DYNAMIC_TYPE_DER);
         args->certs = NULL;
@@ -8350,6 +10227,197 @@
     }
 }
 
+static int ProcessPeerCertParse(WOLFSSL* ssl, ProcPeerCertArgs* args,
+    int certType, int verify, byte** pSubjectHash, int* pAlreadySigner)
+{
+    int ret = 0;
+    buffer* cert;
+    byte* subjectHash = NULL;
+    int alreadySigner = 0;
+#ifdef WOLFSSL_SMALL_CERT_VERIFY
+    int sigRet = 0;
+#endif
+
+    if (ssl == NULL || args == NULL)
+        return BAD_FUNC_ARG;
+
+    /* check to make sure certificate index is valid */
+    if (args->certIdx > args->count)
+        return BUFFER_E;
+
+    /* check if returning from non-blocking OCSP */
+    /* skip this section because cert is already initialized and parsed */
+#ifdef WOLFSSL_NONBLOCK_OCSP
+    if (args->lastErr == OCSP_WANT_READ) {
+        args->lastErr = 0; /* clear error */
+        return 0;
+    }
+#endif
+
+#ifdef WOLFSSL_TRUST_PEER_CERT
+    /* we have trusted peer */
+    if (args->haveTrustPeer) {
+        return 0;
+    }
+#endif
+
+    /* get certificate buffer */
+    cert = &args->certs[args->certIdx];
+
+#ifdef WOLFSSL_SMALL_CERT_VERIFY
+    if (verify == VERIFY) {
+        /* for small cert verify, release decoded cert during signature check to
+            reduce peak memory usage */
+        if (args->dCert != NULL) {
+            if (args->dCertInit) {
+                FreeDecodedCert(args->dCert);
+                args->dCertInit = 0;
+            }
+            XFREE(args->dCert, ssl->heap, DYNAMIC_TYPE_DCERT);
+            args->dCert = NULL;
+        }
+
+        /* perform cert parsing and signature check */
+        sigRet = CheckCertSignature(cert->buffer, cert->length,
+                                         ssl->heap, ssl->ctx->cm);
+        /* fail on errors here after the ParseCertRelative call, so dCert is populated */
+
+        /* verify name only in ParseCertRelative below, signature check done */
+        verify = VERIFY_NAME;
+    }
+#endif /* WOLFSSL_SMALL_CERT_VERIFY */
+
+    /* make sure the decoded cert structure is allocated and initialized */
+    if (!args->dCertInit
+    #ifdef WOLFSSL_SMALL_CERT_VERIFY
+        || args->dCert == NULL
+    #endif
+    ) {
+    #ifdef WOLFSSL_SMALL_CERT_VERIFY
+        if (args->dCert == NULL) {
+            args->dCert = (DecodedCert*)XMALLOC(
+                                 sizeof(DecodedCert), ssl->heap,
+                                 DYNAMIC_TYPE_DCERT);
+            if (args->dCert == NULL) {
+                return MEMORY_E;
+            }
+        }
+    #endif
+
+        InitDecodedCert(args->dCert, cert->buffer, cert->length, ssl->heap);
+
+        args->dCertInit = 1;
+        args->dCert->sigCtx.devId = ssl->devId;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        args->dCert->sigCtx.asyncCtx = ssl;
+    #endif
+
+    #ifdef HAVE_PK_CALLBACKS
+        /* setup the PK callback context */
+        ret = InitSigPkCb(ssl, &args->dCert->sigCtx);
+        if (ret != 0)
+            return ret;
+    #endif
+    }
+
+    /* Parse Certificate */
+    ret = ParseCertRelative(args->dCert, certType, verify, ssl->ctx->cm);
+    /* perform below checks for date failure cases */
+    if (ret == 0 || ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E) {
+        /* get subject and determine if already loaded */
+    #ifndef NO_SKID
+        if (args->dCert->extAuthKeyIdSet)
+            subjectHash = args->dCert->extSubjKeyId;
+        else
+    #endif
+            subjectHash = args->dCert->subjectHash;
+        alreadySigner = AlreadySigner(ssl->ctx->cm, subjectHash);
+    }
+
+#ifdef WOLFSSL_SMALL_CERT_VERIFY
+    /* get signature check failures from above */
+    if (ret == 0)
+        ret = sigRet;
+#endif
+
+    if (pSubjectHash)
+        *pSubjectHash = subjectHash;
+    if (pAlreadySigner)
+        *pAlreadySigner = alreadySigner;
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl,
+            args->dCert->sigCtx.asyncDev);
+    }
+#endif
+
+    return ret;
+}
+
+/* Check key sizes for certs. Is redundant check since
+   ProcessBuffer also performs this check. */
+static int ProcessPeerCertCheckKey(WOLFSSL* ssl, ProcPeerCertArgs* args)
+{
+    int ret = 0;
+
+    if (ssl->options.verifyNone) {
+        return ret;
+    }
+
+    switch (args->dCert->keyOID) {
+    #ifndef NO_RSA
+        case RSAk:
+            if (ssl->options.minRsaKeySz < 0 ||
+                    args->dCert->pubKeySize <
+                     (word16)ssl->options.minRsaKeySz) {
+                WOLFSSL_MSG(
+                    "RSA key size in cert chain error");
+                ret = RSA_KEY_SIZE_E;
+            }
+            break;
+    #endif /* !NO_RSA */
+    #ifdef HAVE_ECC
+        case ECDSAk:
+            if (ssl->options.minEccKeySz < 0 ||
+                    args->dCert->pubKeySize <
+                     (word16)ssl->options.minEccKeySz) {
+                WOLFSSL_MSG(
+                    "ECC key size in cert chain error");
+                ret = ECC_KEY_SIZE_E;
+            }
+            break;
+    #endif /* HAVE_ECC */
+    #ifdef HAVE_ED25519
+        case ED25519k:
+            if (ssl->options.minEccKeySz < 0 ||
+                    ED25519_KEY_SIZE < (word16)ssl->options.minEccKeySz) {
+                WOLFSSL_MSG(
+                    "ECC key size in cert chain error");
+                ret = ECC_KEY_SIZE_E;
+            }
+            break;
+    #endif /* HAVE_ED25519 */
+    #ifdef HAVE_ED448
+        case ED448k:
+            if (ssl->options.minEccKeySz < 0 ||
+                    ED448_KEY_SIZE < (word16)ssl->options.minEccKeySz) {
+                WOLFSSL_MSG(
+                    "ECC key size in cert chain error");
+                ret = ECC_KEY_SIZE_E;
+            }
+            break;
+    #endif /* HAVE_ED448 */
+        default:
+            WOLFSSL_MSG("Key size not checked");
+            /* key not being checked for size if not in
+               switch */
+            break;
+    }
+
+    return ret;
+}
+
 int ProcessPeerCerts(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                      word32 totalSz)
 {
@@ -8360,14 +10428,13 @@
     (void)sizeof(args_test);
 #elif defined(WOLFSSL_NONBLOCK_OCSP)
     ProcPeerCertArgs* args = ssl->nonblockarg;
+#elif defined(WOLFSSL_SMALL_STACK)
+    ProcPeerCertArgs* args = NULL;
 #else
     ProcPeerCertArgs  args[1];
 #endif
-
-    buffer* cert;
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    byte haveTrustPeer = 0; /* was cert verified by loaded trusted peer cert */
-#endif
+    byte* subjectHash = NULL;
+    int alreadySigner = 0;
 
     WOLFSSL_ENTER("ProcessPeerCerts");
 
@@ -8388,6 +10455,12 @@
         }
     }
     if (ssl->nonblockarg == NULL) /* new args */
+#elif defined(WOLFSSL_SMALL_STACK)
+    args = (ProcPeerCertArgs*)XMALLOC(
+        sizeof(ProcPeerCertArgs), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (args == NULL) {
+        ERROR_OUT(MEMORY_E, exit_ppc);
+    }
 #endif
     {
         /* Reset state */
@@ -8422,16 +10495,16 @@
 
                 /* Certificate Request Context */
                 if ((args->idx - args->begin) + OPAQUE8_LEN > totalSz)
-                    return BUFFER_ERROR;
+                    ERROR_OUT(BUFFER_ERROR, exit_ppc);
                 ctxSz = *(input + args->idx);
                 args->idx++;
                 if ((args->idx - args->begin) + ctxSz > totalSz)
-                    return BUFFER_ERROR;
+                    ERROR_OUT(BUFFER_ERROR, exit_ppc);
             #ifndef NO_WOLFSSL_CLIENT
                 /* Must be empty when received from server. */
                 if (ssl->options.side == WOLFSSL_CLIENT_END) {
                     if (ctxSz != 0) {
-                        return INVALID_CERT_CTX_E;
+                        ERROR_OUT(INVALID_CERT_CTX_E, exit_ppc);
                     }
                 }
             #endif
@@ -8440,7 +10513,7 @@
                 if (ssl->options.side == WOLFSSL_SERVER_END) {
                     if (ssl->options.handShakeState != HANDSHAKE_DONE &&
                                                                    ctxSz != 0) {
-                        return INVALID_CERT_CTX_E;
+                        ERROR_OUT(INVALID_CERT_CTX_E, exit_ppc);
                     }
                     else if (ssl->options.handShakeState == HANDSHAKE_DONE) {
                 #ifdef WOLFSSL_POST_HANDSHAKE_AUTH
@@ -8463,18 +10536,27 @@
                         }
                         if (curr == NULL)
                 #endif
-                            return INVALID_CERT_CTX_E;
+                            ERROR_OUT(INVALID_CERT_CTX_E, exit_ppc);
                     }
                 }
             #endif
                 args->idx += ctxSz;
 
+            #ifdef OPENSSL_EXTRA
+                /* allocate buffer for cert extensions */
+                args->exts = (buffer*)XMALLOC(sizeof(buffer) *
+                     (ssl->verifyDepth + 1), ssl->heap, DYNAMIC_TYPE_CERT_EXT);
+                if (args->exts == NULL) {
+                    ERROR_OUT(MEMORY_E, exit_ppc);
+                }
+            #else
                 /* allocate buffer for cert extensions */
                 args->exts = (buffer*)XMALLOC(sizeof(buffer) * MAX_CHAIN_DEPTH,
                                             ssl->heap, DYNAMIC_TYPE_CERT_EXT);
                 if (args->exts == NULL) {
                     ERROR_OUT(MEMORY_E, exit_ppc);
                 }
+            #endif
             }
         #endif
 
@@ -8494,6 +10576,7 @@
             }
             XMEMSET(args->certs, 0, sizeof(buffer) * MAX_CHAIN_DEPTH);
         #endif /* OPENSSL_EXTRA */
+
             /* Certificate List */
             if ((args->idx - args->begin) + OPAQUE24_LEN > totalSz) {
                 ERROR_OUT(BUFFER_ERROR, exit_ppc);
@@ -8513,7 +10596,7 @@
             while (listSz) {
                 word32 certSz;
 
-            #ifdef OPENSSL_EXTRA
+            #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                 if (args->totalCerts > ssl->verifyDepth) {
                     ssl->peerVerifyRet = X509_V_ERR_CERT_CHAIN_TOO_LONG;
                     ERROR_OUT(MAX_CHAIN_ERROR, exit_ppc);
@@ -8552,12 +10635,14 @@
                 if (ssl->options.tls1_3) {
                     word16 extSz;
 
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > totalSz)
-                        return BUFFER_ERROR;
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > totalSz) {
+                        ERROR_OUT(BUFFER_ERROR, exit_ppc);
+                    }
                     ato16(input + args->idx, &extSz);
                     args->idx += OPAQUE16_LEN;
-                    if ((args->idx - args->begin) + extSz > totalSz)
-                        return BUFFER_ERROR;
+                    if ((args->idx - args->begin) + extSz > totalSz) {
+                        ERROR_OUT(BUFFER_ERROR, exit_ppc);
+                    }
                     /* Store extension data info for later processing. */
                     args->exts[args->totalCerts].length = extSz;
                     args->exts[args->totalCerts].buffer = input + args->idx;
@@ -8565,8 +10650,9 @@
                     listSz -= extSz + OPAQUE16_LEN;
                     ret = TLSX_Parse(ssl, args->exts[args->totalCerts].buffer,
                         args->exts[args->totalCerts].length, certificate, NULL);
-                    if (ret < 0)
-                        return ret;
+                    if (ret < 0) {
+                        ERROR_OUT(ret, exit_ppc);
+                    }
                 }
             #endif
 
@@ -8575,14 +10661,23 @@
             } /* while (listSz) */
 
             args->count = args->totalCerts;
-            args->certIdx = 0;
+            args->certIdx = 0; /* select peer cert (first one) */
+
+            if (args->count == 0 && ssl->options.mutualAuth &&
+                                      ssl->options.side == WOLFSSL_SERVER_END) {
+                ret = NO_PEER_CERT;
+                DoCertFatalAlert(ssl, ret);
+            }
 
             args->dCertInit = 0;
+        #ifndef WOLFSSL_SMALL_CERT_VERIFY
             args->dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), ssl->heap,
                                                        DYNAMIC_TYPE_DCERT);
             if (args->dCert == NULL) {
                 ERROR_OUT(MEMORY_E, exit_ppc);
             }
+            XMEMSET(args->dCert, 0, sizeof(DecodedCert));
+        #endif
 
             /* Advance state and proceed */
             ssl->options.asyncState = TLS_ASYNC_BUILD;
@@ -8592,289 +10687,104 @@
         case TLS_ASYNC_BUILD:
         {
             if (args->count > 0) {
-            #ifdef WOLFSSL_TRUST_PEER_CERT
+
+                /* check for trusted peer and get untrustedDepth */
+            #if defined(WOLFSSL_TRUST_PEER_CERT) || defined(OPENSSL_EXTRA)
                 if (args->certIdx == 0) {
-                    /* if using trusted peer certs check before verify chain
-                       and CA test */
+                #ifdef WOLFSSL_TRUST_PEER_CERT
                     TrustedPeerCert* tp;
-
-                    cert = &args->certs[args->certIdx];
-
-                    if (!args->dCertInit) {
-                        InitDecodedCert(args->dCert,
-                            cert->buffer, cert->length, ssl->heap);
-                        args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
-                    #ifdef WOLFSSL_ASYNC_CRYPT
-                        args->dCert->sigCtx.asyncCtx = ssl;
-                    #endif
-                        args->dCertInit = 1;
-                    #ifdef HAVE_PK_CALLBACKS
-                        ret = InitSigPkCb(ssl, &args->dCert->sigCtx);
-                        if (ret != 0)
-                            goto exit_ppc;
-                    #endif
-                    }
-
-                    ret = ParseCertRelative(args->dCert, CERT_TYPE, 0,
-                                                            ssl->ctx->cm);
-                #ifdef WOLFSSL_ASYNC_CRYPT
-                    if (ret == WC_PENDING_E) {
-                        ret = wolfSSL_AsyncPush(ssl,
-                            args->dCert->sigCtx.asyncDev);
-                        goto exit_ppc;
-                    }
-                #endif
+                    int matchType = WC_MATCH_NAME;
+                #endif
+
+                    ret = ProcessPeerCertParse(ssl, args, CERT_TYPE, NO_VERIFY,
+                        &subjectHash, &alreadySigner);
                     if (ret != 0)
                         goto exit_ppc;
 
-                #ifndef NO_SKID
-                    if (args->dCert->extAuthKeyIdSet) {
-                        tp = GetTrustedPeer(ssl->ctx->cm,
-                                    args->dCert->extSubjKeyId, WC_MATCH_SKID);
-                    }
-                    else { /* if the cert has no SKID try to match by name */
-                        tp = GetTrustedPeer(ssl->ctx->cm,
-                                    args->dCert->subjectHash, WC_MATCH_NAME);
-                    }
-                #else /* NO_SKID */
-                    tp = GetTrustedPeer(ssl->ctx->cm, args->dCert->subjectHash,
-                                                                 WC_MATCH_NAME);
-                #endif /* NO SKID */
+                #ifdef OPENSSL_EXTRA
+                    /* Determine untrusted depth */
+                    if (!alreadySigner && (!args->dCert ||
+                            !args->dCertInit || !args->dCert->selfSigned)) {
+                        args->untrustedDepth = 1;
+                    }
+                #endif
+
+                #ifdef WOLFSSL_TRUST_PEER_CERT
+                    #ifndef NO_SKID
+                    if (args->dCert->extAuthKeyIdSet)
+                        matchType = WC_MATCH_SKID;
+                    #endif
+                    tp = GetTrustedPeer(ssl->ctx->cm, subjectHash, matchType);
                     WOLFSSL_MSG("Checking for trusted peer cert");
 
-                    if (tp == NULL) {
+                    if (tp && MatchTrustedPeer(tp, args->dCert)) {
+                        WOLFSSL_MSG("Found matching trusted peer cert");
+                        args->haveTrustPeer = 1;
+                    }
+                    else if (tp == NULL) {
                         /* no trusted peer cert */
-                        WOLFSSL_MSG("No matching trusted peer cert. "
-                            "Checking CAs");
+                        WOLFSSL_MSG("No matching trusted peer cert. Checking CAs");
+                    }
+                    else {
+                        WOLFSSL_MSG("Trusted peer cert did not match!");
+                    }
+                    if (!args->haveTrustPeer)
+                #endif
+                    {
+                        /* free cert if not trusted peer */
                         FreeDecodedCert(args->dCert);
                         args->dCertInit = 0;
-                    #ifdef OPENSSL_EXTRA
-                        args->untrustedDepth = 1;
-                    #endif
-                    } else if (MatchTrustedPeer(tp, args->dCert)){
-                        WOLFSSL_MSG("Found matching trusted peer cert");
-                        haveTrustPeer = 1;
-                    } else {
-                        WOLFSSL_MSG("Trusted peer cert did not match!");
-                        FreeDecodedCert(args->dCert);
-                        args->dCertInit = 0;
-                    #ifdef OPENSSL_EXTRA
-                        args->untrustedDepth = 1;
-                    #endif
-                    }
-                }
-            #endif /* WOLFSSL_TRUST_PEER_CERT */
-            #ifdef OPENSSL_EXTRA
-                #ifdef WOLFSSL_TRUST_PEER_CERT
-                else
-                #endif
-                if (args->certIdx == 0) {
-                    byte* subjectHash;
-                    cert = &args->certs[args->certIdx];
-
-                    if (!args->dCertInit) {
-                        InitDecodedCert(args->dCert,
-                            cert->buffer, cert->length, ssl->heap);
-                        args->dCert->sigCtx.devId = ssl->devId;
-                    #ifdef WOLFSSL_ASYNC_CRYPT
-                        args->dCert->sigCtx.asyncCtx = ssl;
-                    #endif
-                        args->dCertInit = 1;
-                    #ifdef HAVE_PK_CALLBACKS
-                        ret = InitSigPkCb(ssl, &args->dCert->sigCtx);
-                        if (ret != 0)
-                            goto exit_ppc;
-                    #endif
-                    }
-
-                    ret = ParseCertRelative(args->dCert, CERT_TYPE, 0,
-                                                                  ssl->ctx->cm);
-                #ifdef WOLFSSL_ASYNC_CRYPT
-                    if (ret == WC_PENDING_E) {
-                        ret = wolfSSL_AsyncPush(ssl,
-                            args->dCert->sigCtx.asyncDev);
-                        goto exit_ppc;
-                    }
-                #endif
-                    if (ret != 0) {
-                        goto exit_ppc;
-                    }
-
-                #ifndef NO_SKID
-                    subjectHash = args->dCert->extSubjKeyId;
-                #else
-                    subjectHash = args->dCert->subjectHash;
-                #endif
-                    if (!AlreadySigner(ssl->ctx->cm, subjectHash))
-                        args->untrustedDepth = 1;
-                    FreeDecodedCert(args->dCert);
-                    args->dCertInit = 0;
-                }
-            #endif
-
-                /* verify up to peer's first */
+                    }
+                }
+            #endif /* WOLFSSL_TRUST_PEER_CERT || OPENSSL_EXTRA */
+
+                /* check certificate up to peer's first */
                 /* do not verify chain if trusted peer cert found */
                 while (args->count > 1
                 #ifdef WOLFSSL_TRUST_PEER_CERT
-                    && !haveTrustPeer
+                    && !args->haveTrustPeer
                 #endif /* WOLFSSL_TRUST_PEER_CERT */
                 ) {
-                    byte *subjectHash;
-
+                    int skipAddCA = 0;
+
+                    /* select last certificate */
                     args->certIdx = args->count - 1;
-                    cert = &args->certs[args->certIdx];
-
-                    if (!args->dCertInit) {
-                        InitDecodedCert(args->dCert,
-                            cert->buffer, cert->length, ssl->heap);
-                        args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
-                    #ifdef WOLFSSL_ASYNC_CRYPT
-                        args->dCert->sigCtx.asyncCtx = ssl;
-                    #endif
-                        args->dCertInit = 1;
-                    #ifdef HAVE_PK_CALLBACKS
-                        ret = InitSigPkCb(ssl, &args->dCert->sigCtx);
-                        if (ret != 0)
-                            goto exit_ppc;
-                    #endif
-                    }
-
-                    /* check if returning from non-blocking OCSP */
-                #ifdef WOLFSSL_NONBLOCK_OCSP
-                    if (args->lastErr != OCSP_WANT_READ)
-                    {
-                #endif
-
-                    ret = ParseCertRelative(args->dCert, CERT_TYPE,
-                                    !ssl->options.verifyNone, ssl->ctx->cm);
+
+                    ret = ProcessPeerCertParse(ssl, args, CERT_TYPE,
+                        !ssl->options.verifyNone ? VERIFY : NO_VERIFY,
+                        &subjectHash, &alreadySigner);
                 #ifdef WOLFSSL_ASYNC_CRYPT
-                    if (ret == WC_PENDING_E) {
-                        ret = wolfSSL_AsyncPush(ssl,
-                            args->dCert->sigCtx.asyncDev);
+                    if (ret == WC_PENDING_E)
                         goto exit_ppc;
-                    }
-                #endif
-
-                #ifndef NO_SKID
-                    subjectHash = args->dCert->extSubjKeyId;
-                #else
-                    subjectHash = args->dCert->subjectHash;
-                #endif
-
-                    /* Check key sizes for certs. Is redundent check since
-                       ProcessBuffer also performs this check. */
-                    if (!ssl->options.verifyNone) {
-                        switch (args->dCert->keyOID) {
-                        #ifndef NO_RSA
-                            case RSAk:
-                                if (ssl->options.minRsaKeySz < 0 ||
-                                        args->dCert->pubKeySize <
-                                         (word16)ssl->options.minRsaKeySz) {
-                                    WOLFSSL_MSG(
-                                        "RSA key size in cert chain error");
-                                    ret = RSA_KEY_SIZE_E;
-                                }
-                                break;
-                        #endif /* !NO_RSA */
-                        #ifdef HAVE_ECC
-                            case ECDSAk:
-                                if (ssl->options.minEccKeySz < 0 ||
-                                        args->dCert->pubKeySize <
-                                         (word16)ssl->options.minEccKeySz) {
-                                    WOLFSSL_MSG(
-                                        "ECC key size in cert chain error");
-                                    ret = ECC_KEY_SIZE_E;
-                                }
-                                break;
-                        #endif /* HAVE_ECC */
-                        #ifdef HAVE_ED25519
-                            case ED25519k:
-                                if (ssl->options.minEccKeySz < 0 ||
-                                        ED25519_KEY_SIZE <
-                                         (word16)ssl->options.minEccKeySz) {
-                                    WOLFSSL_MSG(
-                                        "ECC key size in cert chain error");
-                                    ret = ECC_KEY_SIZE_E;
-                                }
-                                break;
-                        #endif /* HAVE_ED25519 */
-                            default:
-                                WOLFSSL_MSG("Key size not checked");
-                                /* key not being checked for size if not in
-                                   switch */
-                                break;
-                        } /* switch (dCert->keyOID) */
-                    } /* if (!ssl->options.verifyNone) */
+                #endif
+                    if (ret == 0) {
+                        ret = ProcessPeerCertCheckKey(ssl, args);
+                    }
 
                     if (ret == 0 && args->dCert->isCA == 0) {
                         WOLFSSL_MSG("Chain cert is not a CA, not adding as one");
                     }
                     else if (ret == 0 && ssl->options.verifyNone) {
-                        WOLFSSL_MSG("Chain cert not verified by option, not adding as CA");
-                    }
-                    else if (ret == 0 && !AlreadySigner(ssl->ctx->cm, subjectHash)) {
-                        DerBuffer* add = NULL;
-                        ret = AllocDer(&add, cert->length, CA_TYPE, ssl->heap);
-                        if (ret < 0)
-                            goto exit_ppc;
-
-                        WOLFSSL_MSG("Adding CA from chain");
-
-                        XMEMCPY(add->buffer, cert->buffer, cert->length);
-
-                    #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
-                        if (args->certIdx > args->untrustedDepth)
-                            args->untrustedDepth = (char) args->certIdx + 1;
-                    #endif
-
-                        /* already verified above */
-                        ret = AddCA(ssl->ctx->cm, &add, WOLFSSL_CHAIN_CA, 0);
-                        if (ret == 1) {
-                            ret = 0;   /* WOLFSSL_SUCCESS for external */
-                        }
-
-                    #ifdef WOLFSSL_ALT_CERT_CHAINS
-                        /* if the previous CA cert failed, clear last error */
-                        if (args->lastCaErr != 0) {
-                            WOLFSSL_MSG("Using alternate cert chain");
-                            ssl->options.usingAltCertChain = 1;
-
-                            /* clear last CA fail since CA cert was validated */
-                            args->lastCaErr = 0;
-
-                        #ifdef SESSION_CERTS
-                            AddSessionCertToChain(&ssl->session.altChain,
-                                cert->buffer, cert->length);
-                        #endif /* SESSION_CERTS */
-                        }
-                    #endif
-                    }
-                    else if (ret != 0) {
-                        WOLFSSL_MSG("Failed to verify CA from chain");
-                    #ifdef WOLFSSL_ALT_CERT_CHAINS
-                        if (args->lastCaErr == 0) {
-                            /* store CA error and proceed to next cert */
-                            args->lastCaErr = ret;
-                            ret = 0;
-                        }
-                        else {
-                            args->lastErr = args->lastCaErr;
-                        }
-                    #endif
+                        WOLFSSL_MSG("Chain cert not verified by option, "
+                            "not adding as CA");
+                    }
+                    else if (ret == 0) {
                     #ifdef OPENSSL_EXTRA
-                        ssl->peerVerifyRet = X509_V_ERR_INVALID_CA;
-                    #endif
+                        if (args->certIdx > args->untrustedDepth) {
+                            args->untrustedDepth = (char)args->certIdx + 1;
+                        }
+                    #endif
+
+                        if (alreadySigner) {
+                            WOLFSSL_MSG("Verified CA from chain and already had it");
+                        }
                     }
                     else {
-                        WOLFSSL_MSG("Verified CA from chain and already had it");
-                    }
-
-                #ifdef WOLFSSL_NONBLOCK_OCSP
-                    }
-                    else {
-                        args->lastErr = 0; /* clear last error */
-                    }
-                #endif
+                        WOLFSSL_MSG("Failed to verify CA from chain");
+                    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+                        ssl->peerVerifyRet = X509_V_ERR_INVALID_CA;
+                    #endif
+                    }
 
             #if defined(HAVE_OCSP) || defined(HAVE_CRL)
                     if (ret == 0) {
@@ -8927,201 +10837,77 @@
                     }
             #endif /* HAVE_OCSP || HAVE_CRL */
 
-            #if defined(WOLFSSL_VERIFY_CB_ALL_CERTS)
+                    /* Do verify callback */
+                    ret = DoVerifyCallback(ssl->ctx->cm, ssl, ret, args);
+
+                #ifdef WOLFSSL_ALT_CERT_CHAINS
+                    /* For alternate cert chain, its okay for a CA cert to fail
+                        with ASN_NO_SIGNER_E here. The "alternate" certificate
+                        chain mode only requires that the peer certificate
+                        validate to a trusted CA */
+                    if (ret != 0 && args->dCert->isCA) {
+                        if (ret == ASN_NO_SIGNER_E) {
+                            if (!ssl->options.usingAltCertChain) {
+                                WOLFSSL_MSG("Trying alternate cert chain");
+                                ssl->options.usingAltCertChain = 1;
+                            }
+
+                            ret = 0; /* clear error and continue */
+                        }
+
+                        /* do not add to certificate manager */
+                        skipAddCA = 1;
+                    }
+                #endif /* WOLFSSL_ALT_CERT_CHAINS */
+
+                    /* If valid CA then add to Certificate Manager */
+                    if (ret == 0 && args->dCert->isCA &&
+                            !ssl->options.verifyNone && !skipAddCA) {
+                        buffer* cert = &args->certs[args->certIdx];
+
+                        /* Is valid CA */
+                    #if defined(SESSION_CERTS) && defined(WOLFSSL_ALT_CERT_CHAINS)
+                        /* if using alternate chain, store the cert used */
+                        if (ssl->options.usingAltCertChain) {
+                            AddSessionCertToChain(&ssl->session.altChain,
+                                cert->buffer, cert->length);
+                        }
+                    #endif /* SESSION_CERTS && WOLFSSL_ALT_CERT_CHAINS */
+                        if (!alreadySigner) {
+                            DerBuffer* add = NULL;
+                            ret = AllocDer(&add, cert->length, CA_TYPE, ssl->heap);
+                            if (ret < 0)
+                                goto exit_ppc;
+
+                            XMEMCPY(add->buffer, cert->buffer, cert->length);
+
+                            /* CA already verified above in ParseCertRelative */
+                            WOLFSSL_MSG("Adding CA from chain");
+                            ret = AddCA(ssl->ctx->cm, &add, WOLFSSL_CHAIN_CA,
+                                NO_VERIFY);
+                            if (ret == WOLFSSL_SUCCESS) {
+                                ret = 0;
+                            }
+                        }
+                    }
+
+                    /* Handle error codes */
                     if (ret != 0) {
                         if (!ssl->options.verifyNone) {
-                            int why = bad_certificate;
-
-                        if (ret == ASN_AFTER_DATE_E || ret ==
-                                ASN_BEFORE_DATE_E) {
-                            why = certificate_expired;
-                        }
-                        if (ssl->verifyCallback) {
-                            int ok;
-
-                        #ifdef WOLFSSL_SMALL_STACK
-                            WOLFSSL_X509_STORE_CTX* store;
-                            WOLFSSL_X509* x509 = (WOLFSSL_X509*)XMALLOC(
-                                sizeof(WOLFSSL_X509), ssl->heap,
-                                DYNAMIC_TYPE_X509);
-                            if (x509 == NULL) {
-                                ERROR_OUT(MEMORY_E, exit_ppc);
-                            }
-                            store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
-                                    sizeof(WOLFSSL_X509_STORE_CTX), ssl->heap,
-                                                    DYNAMIC_TYPE_X509_STORE);
-                            if (store == NULL) {
-                                wolfSSL_X509_free(x509);
-                                ERROR_OUT(MEMORY_E, exit_ppc);
-                            }
-                        #else
-                            WOLFSSL_X509_STORE_CTX  store[1];
-                            WOLFSSL_X509 x509[1];
-                        #endif
-
-                            XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
-
-                            store->error = ret;
-                            store->error_depth = args->certIdx;
-                            store->discardSessionCerts = 0;
-                            store->domain = args->domain;
-                            store->userCtx = ssl->verifyCbCtx;
-                            store->certs = args->certs;
-                            store->totalCerts = args->totalCerts;
-
-                        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-                            if (ssl->ctx->x509_store_pt != NULL) {
-                                store->store = ssl->ctx->x509_store_pt;
-                            }
-                            else {
-                                store->store = &ssl->ctx->x509_store;
-                            }
-                        #endif
-                        #if !defined(NO_CERTS)
-                            InitX509(x509, 1, ssl->heap);
-                            #if defined(KEEP_PEER_CERT) || \
-                                defined(SESSION_CERTS)
-                            if (CopyDecodedToX509(x509, args->dCert) == 0) {
-                                store->current_cert = x509;
-                            }
-                            #endif
-                        #endif
-                        #if defined(HAVE_EX_DATA) || defined(HAVE_FORTRESS)
-                            store->ex_data = ssl;
-                        #endif
-                        #ifdef SESSION_CERTS
-                            store->sesChain = &(ssl->session.chain);
-                        #endif
-                            ok = ssl->verifyCallback(0, store);
-                            if (ok) {
-                                WOLFSSL_MSG("Verify callback overriding error!");
-                                ret = 0;
-                            }
-                        #ifndef NO_CERTS
-                            FreeX509(x509);
-                        #endif
-                        #if defined(SESSION_CERTS) && defined(OPENSSL_EXTRA)
-                            wolfSSL_sk_X509_free(store->chain);
-                            store->chain = NULL;
-                        #endif
-                        #ifdef SESSION_CERTS
-                            if (store->discardSessionCerts) {
-                                WOLFSSL_MSG("Verify callback requested discard sess certs");
-                                ssl->session.chain.count = 0;
-                            #ifdef WOLFSSL_ALT_CERT_CHAINS
-                                ssl->session.altChain.count = 0;
-                            #endif
-                            }
-                        #endif /* SESSION_CERTS */
-                        #ifdef WOLFSSL_SMALL_STACK
-                            XFREE(x509, ssl->heap, DYNAMIC_TYPE_X509);
-                            XFREE(store, ssl->heap, DYNAMIC_TYPE_X509_STORE);
-                        #endif
-                        }
-                        if (ret != 0) {
-                            SendAlert(ssl, alert_fatal, why);   /* try to send */
-                            ssl->options.isClosed = 1;
-                        }
-                    }
-
-                    ssl->error = ret;
-                }
-            #ifdef WOLFSSL_ALWAYS_VERIFY_CB
-                else {
-                    if (ssl->verifyCallback) {
-                        int ok;
-
-                    #ifdef WOLFSSL_SMALL_STACK
-                        WOLFSSL_X509_STORE_CTX* store;
-                        WOLFSSL_X509* x509 = (WOLFSSL_X509*)XMALLOC(
-                                sizeof(WOLFSSL_X509), ssl->heap,
-                                DYNAMIC_TYPE_X509);
-                        if (x509 == NULL) {
-                            ERROR_OUT(MEMORY_E, exit_ppc);
-                        }
-                        store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
-                                    sizeof(WOLFSSL_X509_STORE_CTX), ssl->heap,
-                                                    DYNAMIC_TYPE_X509_STORE);
-                        if (store == NULL) {
-                            wolfSSL_X509_free(x509);
-                            ERROR_OUT(MEMORY_E, exit_ppc);
-                        }
-                    #else
-                        WOLFSSL_X509_STORE_CTX  store[1];
-                        WOLFSSL_X509            x509[1];
-                    #endif
-
-                        XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
-
-                        store->error = ret;
-                        store->error_depth = args->certIdx;
-                        store->discardSessionCerts = 0;
-                        store->domain = args->domain;
-                        store->userCtx = ssl->verifyCbCtx;
-                        store->certs = args->certs;
-                        store->totalCerts = args->totalCerts;
-
-                    #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-                        if (ssl->ctx->x509_store_pt != NULL) {
-                            store->store = ssl->ctx->x509_store_pt;
-                        }
-                        else {
-                            store->store = &ssl->ctx->x509_store;
-                        }
-                    #endif
-                    #if !defined(NO_CERTS)
-                        InitX509(x509, 1, ssl->heap);
-                        #if defined(KEEP_PEER_CERT) || defined(SESSION_CERTS)
-                        if (CopyDecodedToX509(x509, args->dCert) == 0) {
-                            store->current_cert = x509;
-                        }
-                        #endif
-                    #endif
-                    #ifdef SESSION_CERTS
-                        store->sesChain = &(ssl->session.chain);
-                    #endif
-                        store->ex_data = ssl;
-
-                        ok = ssl->verifyCallback(1, store);
-                        if (!ok) {
-                            WOLFSSL_MSG("Verify callback overriding valid certificate!");
-                            ret = -1;
-                            ssl->options.isClosed = 1;
-                        }
-                    #ifndef NO_CERTS
-                        FreeX509(x509);
-                    #endif
-                    #if defined(SESSION_CERTS) && defined(OPENSSL_EXTRA)
-                        wolfSSL_sk_X509_free(store->chain);
-                        store->chain = NULL;
-                    #endif
-                    #ifdef SESSION_CERTS
-                        if (store->discardSessionCerts) {
-                            WOLFSSL_MSG("Verify callback requested discard sess certs");
-                            ssl->session.chain.count = 0;
-                        #ifdef WOLFSSL_ALT_CERT_CHAINS
-                            ssl->session.altChain.count = 0;
-                        #endif
-                        }
-                    #endif /* SESSION_CERTS */
-                    #ifdef WOLFSSL_SMALL_STACK
-                        XFREE(store, ssl->heap, DYNAMIC_TYPE_X509_STORE);
-                        XFREE(x509, ssl->heap, DYNAMIC_TYPE_X509);
-                    #endif
-                    }
-                }
-            #endif /* WOLFSSL_ALWAYS_VERIFY_CB */
-        #endif /* WOLFSSL_VERIFY_CB_ALL_CERTS */
-
-                    if (ret != 0 && args->lastErr == 0) {
-                        args->lastErr = ret;   /* save error from last time */
-                        ret = 0; /* reset error */
+                            DoCertFatalAlert(ssl, ret);
+                        }
+                        ssl->error = ret; /* Report SSL error */
+
+                        if (args->lastErr == 0) {
+                            args->lastErr = ret; /* save error from last time */
+                            ret = 0; /* reset error */
+                        }
                     }
 
                     FreeDecodedCert(args->dCert);
                     args->dCertInit = 0;
                     args->count--;
-                } /* while (count > 0 && !haveTrustPeer) */
+                } /* while (count > 0 && !args->haveTrustPeer) */
             } /* if (count > 0) */
 
             /* Check for error */
@@ -9140,56 +10926,44 @@
             if (args->count > 0) {
                 WOLFSSL_MSG("Verifying Peer's cert");
 
+                /* select peer cert (first one) */
                 args->certIdx = 0;
-                cert = &args->certs[args->certIdx];
-
-                if (!args->dCertInit) {
-                    InitDecodedCert(args->dCert,
-                        cert->buffer, cert->length, ssl->heap);
-                    args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
-                #ifdef WOLFSSL_ASYNC_CRYPT
-                    args->dCert->sigCtx.asyncCtx = ssl;
-                #endif
-                    args->dCertInit = 1;
-                #ifdef HAVE_PK_CALLBACKS
-                    ret = InitSigPkCb(ssl, &args->dCert->sigCtx);
-                    if (ret != 0)
-                        goto exit_ppc;
-                #endif
-                }
-
-            #ifdef WOLFSSL_TRUST_PEER_CERT
-                if (!haveTrustPeer)
-            #endif
-                {
-                    /* only parse if not already present in dCert from above */
-                    ret = ParseCertRelative(args->dCert, CERT_TYPE,
-                                    !ssl->options.verifyNone, ssl->ctx->cm);
-                #ifdef WOLFSSL_ASYNC_CRYPT
-                    if (ret == WC_PENDING_E) {
-                        ret = wolfSSL_AsyncPush(ssl,
-                            args->dCert->sigCtx.asyncDev);
-                        goto exit_ppc;
-                    }
-                #endif
-                }
-
+
+                ret = ProcessPeerCertParse(ssl, args, CERT_TYPE,
+                        !ssl->options.verifyNone ? VERIFY : NO_VERIFY,
+                        &subjectHash, &alreadySigner);
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    goto exit_ppc;
+            #endif
                 if (ret == 0) {
                     WOLFSSL_MSG("Verified Peer's cert");
-                #ifdef OPENSSL_EXTRA
+                #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     ssl->peerVerifyRet = X509_V_OK;
                 #endif
                 #if defined(SESSION_CERTS) && defined(WOLFSSL_ALT_CERT_CHAINS)
+                    /* if using alternate chain, store the cert used */
                     if (ssl->options.usingAltCertChain) {
+                        buffer* cert = &args->certs[args->certIdx];
                         AddSessionCertToChain(&ssl->session.altChain,
                             cert->buffer, cert->length);
                     }
                 #endif /* SESSION_CERTS && WOLFSSL_ALT_CERT_CHAINS */
-                    args->fatal = 0;
+
+                    /* check if fatal error */
+                    if (args->verifyErr) {
+                        args->fatal = 1;
+                        if (ret == 0) {
+                            ret = args->lastErr;
+                        }
+                    }
+                    else {
+                        args->fatal = 0;
+                    }
                 }
                 else if (ret == ASN_PARSE_E || ret == BUFFER_E) {
                     WOLFSSL_MSG("Got Peer cert ASN PARSE or BUFFER ERROR");
-                #ifdef OPENSSL_EXTRA
+                #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     SendAlert(ssl, alert_fatal, bad_certificate);
                     ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
                 #endif
@@ -9197,13 +10971,14 @@
                 }
                 else {
                     WOLFSSL_MSG("Failed to verify Peer's cert");
-                #ifdef OPENSSL_EXTRA
+                #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     ssl->peerVerifyRet = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE;
                 #endif
                     if (ssl->verifyCallback) {
                         WOLFSSL_MSG(
                             "\tCallback override available, will continue");
-                        args->fatal = 0;
+                        /* check if fatal error */
+                        args->fatal = (args->verifyErr) ? 1 : 0;
                     }
                     else {
                         WOLFSSL_MSG("\tNo callback override available, fatal");
@@ -9215,25 +10990,29 @@
                 }
 
             #ifdef HAVE_SECURE_RENEGOTIATION
-                if (args->fatal == 0 && ssl->secure_renegotiation
-                               && ssl->secure_renegotiation->enabled) {
+                if (args->fatal == 0 && !IsAtLeastTLSv1_3(ssl->version)
+                                     && ssl->secure_renegotiation
+                                     && ssl->secure_renegotiation->enabled) {
 
                     if (IsEncryptionOn(ssl, 0)) {
                         /* compare against previous time */
-                        if (XMEMCMP(args->dCert->subjectHash,
-                                    ssl->secure_renegotiation->subject_hash,
-                                    WC_SHA_DIGEST_SIZE) != 0) {
-                            WOLFSSL_MSG(
-                                "Peer sent different cert during scr, fatal");
-                            args->fatal = 1;
-                            ret   = SCR_DIFFERENT_CERT_E;
+                        if (ssl->secure_renegotiation->subject_hash_set) {
+                            if (XMEMCMP(args->dCert->subjectHash,
+                                        ssl->secure_renegotiation->subject_hash,
+                                        KEYID_SIZE) != 0) {
+                                WOLFSSL_MSG(
+                                  "Peer sent different cert during scr, fatal");
+                                args->fatal = 1;
+                                ret = SCR_DIFFERENT_CERT_E;
+                            }
                         }
                     }
 
                     /* cache peer's hash */
                     if (args->fatal == 0) {
                         XMEMCPY(ssl->secure_renegotiation->subject_hash,
-                                args->dCert->subjectHash, WC_SHA_DIGEST_SIZE);
+                                args->dCert->subjectHash, KEYID_SIZE);
+                        ssl->secure_renegotiation->subject_hash_set = 1;
                     }
                 }
             #endif /* HAVE_SECURE_RENEGOTIATION */
@@ -9262,7 +11041,7 @@
                             args->fatal = TLSX_CSR_InitRequest(ssl->extensions,
                                                     args->dCert, ssl->heap);
                             doLookup = 0;
-                        #ifdef WOLFSSL_TLS13
+                        #if defined(WOLFSSL_TLS13) && !defined(NO_WOLFSSL_SERVER)
                             if (ssl->options.tls1_3) {
                                 TLSX* ext = TLSX_Find(ssl->extensions,
                                                            TLSX_STATUS_REQUEST);
@@ -9302,7 +11081,7 @@
                         if (ret != 0) {
                             WOLFSSL_MSG("\tOCSP Lookup not ok");
                             args->fatal = 0;
-                        #ifdef OPENSSL_EXTRA
+                        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                             ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
                         #endif
                         }
@@ -9321,7 +11100,7 @@
                         if (ret != 0) {
                             WOLFSSL_MSG("\tCRL check not ok");
                             args->fatal = 0;
-                        #ifdef OPENSSL_EXTRA
+                        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                             ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
                         #endif
                         }
@@ -9333,9 +11112,18 @@
 
             #ifdef KEEP_PEER_CERT
                 if (args->fatal == 0) {
+                    int copyRet = 0;
+
+                    #ifdef HAVE_SECURE_RENEGOTIATION
+                        if (ssl->secure_renegotiation &&
+                                           ssl->secure_renegotiation->enabled) {
+                            /* free old peer cert */
+                            FreeX509(&ssl->peerCert);
+                        }
+                    #endif
+
                     /* set X509 format for peer cert */
-                    int copyRet = CopyDecodedToX509(&ssl->peerCert,
-                                                                args->dCert);
+                    copyRet = CopyDecodedToX509(&ssl->peerCert, args->dCert);
                     if (copyRet == MEMORY_E) {
                         args->fatal = 1;
                     }
@@ -9398,7 +11186,7 @@
 
                 if (args->fatal) {
                     ssl->error = ret;
-                #ifdef OPENSSL_EXTRA
+                #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     SendAlert(ssl, alert_fatal, bad_certificate);
                     ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
                 #endif
@@ -9407,23 +11195,6 @@
 
                 ssl->options.havePeerCert = 1;
 
-                args->domain = (char*)XMALLOC(ASN_NAME_MAX, ssl->heap,
-                                                    DYNAMIC_TYPE_STRING);
-                if (args->domain == NULL) {
-                    ERROR_OUT(MEMORY_E, exit_ppc);
-                }
-
-                /* store for callback use */
-                if (args->dCert->subjectCN &&
-                                    args->dCert->subjectCNLen < ASN_NAME_MAX) {
-                    XMEMCPY(args->domain, args->dCert->subjectCN,
-                        args->dCert->subjectCNLen);
-                    args->domain[args->dCert->subjectCNLen] = '\0';
-                }
-                else {
-                    args->domain[0] = '\0';
-                }
-
                 if (!ssl->options.verifyNone && ssl->buffers.domainName.buffer) {
                 #ifndef WOLFSSL_ALLOW_NO_CN_IN_SAN
                     /* Per RFC 5280 section 4.2.1.6, "Whenever such identities
@@ -9481,14 +11252,41 @@
                         }
 
                         if (keyRet != 0 || wc_RsaPublicKeyDecode(
-                                args->dCert->publicKey, &keyIdx, ssl->peerRsaKey,
+                               args->dCert->publicKey, &keyIdx, ssl->peerRsaKey,
                                                 args->dCert->pubKeySize) != 0) {
                             ret = PEER_KEY_ERROR;
                         }
                         else {
                             ssl->peerRsaKeyPresent = 1;
+                    #ifdef WOLFSSL_RENESAS_TSIP_TLS
+                        /* copy encrypted tsip key index into ssl object */
+                        if (args->dCert->tsip_encRsaKeyIdx) {
+                            if (!ssl->peerTsipEncRsaKeyIndex) {
+                                ssl->peerTsipEncRsaKeyIndex = (byte*)XMALLOC(
+                                    TSIP_TLS_ENCPUBKEY_SZ_BY_CERTVRFY,
+                                    ssl->heap, DYNAMIC_TYPE_RSA);
+                                if (!ssl->peerTsipEncRsaKeyIndex) {
+                                    args->lastErr = MEMORY_E;
+                                    goto exit_ppc;
+                                }
+                            }
+
+                            XMEMCPY(ssl->peerTsipEncRsaKeyIndex,
+                                        args->dCert->tsip_encRsaKeyIdx,
+                                        TSIP_TLS_ENCPUBKEY_SZ_BY_CERTVRFY);
+                         }
+                    #endif
                     #ifdef HAVE_PK_CALLBACKS
                         #ifndef NO_RSA
+                            #ifdef HAVE_SECURE_RENEGOTIATION
+                            if (ssl->buffers.peerRsaKey.buffer) {
+                                XFREE(ssl->buffers.peerRsaKey.buffer,
+                                        ssl->heap, DYNAMIC_TYPE_RSA);
+                                ssl->buffers.peerRsaKey.buffer = NULL;
+                            }
+                            #endif
+
+
                             ssl->buffers.peerRsaKey.buffer =
                                    (byte*)XMALLOC(args->dCert->pubKeySize,
                                                 ssl->heap, DYNAMIC_TYPE_RSA);
@@ -9635,18 +11433,60 @@
                         break;
                     }
                 #endif /* HAVE_ED25519 */
+                #ifdef HAVE_ED448
+                    case ED448k:
+                    {
+                        int keyRet = 0;
+                        if (ssl->peerEd448Key == NULL) {
+                            /* alloc/init on demand */
+                            keyRet = AllocKey(ssl, DYNAMIC_TYPE_ED448,
+                                    (void**)&ssl->peerEd448Key);
+                        } else if (ssl->peerEd448KeyPresent) {
+                            keyRet = ReuseKey(ssl, DYNAMIC_TYPE_ED448,
+                                    ssl->peerEd448Key);
+                            ssl->peerEd448KeyPresent = 0;
+                        }
+
+                        if (keyRet != 0 ||
+                            wc_ed448_import_public(args->dCert->publicKey,
+                                    args->dCert->pubKeySize,
+                                    ssl->peerEd448Key) != 0) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            ssl->peerEd448KeyPresent = 1;
+                    #ifdef HAVE_PK_CALLBACKS
+                            ssl->buffers.peerEd448Key.buffer =
+                                   (byte*)XMALLOC(args->dCert->pubKeySize,
+                                           ssl->heap, DYNAMIC_TYPE_ED448);
+                            if (ssl->buffers.peerEd448Key.buffer == NULL) {
+                                ERROR_OUT(MEMORY_ERROR, exit_ppc);
+                            }
+                            else {
+                                XMEMCPY(ssl->buffers.peerEd448Key.buffer,
+                                        args->dCert->publicKey,
+                                        args->dCert->pubKeySize);
+                                ssl->buffers.peerEd448Key.length =
+                                        args->dCert->pubKeySize;
+                            }
+                    #endif /*HAVE_PK_CALLBACKS */
+                        }
+
+                        /* check size of peer ECC key */
+                        if (ret == 0 && ssl->peerEd448KeyPresent &&
+                               !ssl->options.verifyNone &&
+                               ED448_KEY_SIZE < ssl->options.minEccKeySz) {
+                            ret = ECC_KEY_SIZE_E;
+                            WOLFSSL_MSG("Peer ECC key is too small");
+                        }
+                        break;
+                    }
+                #endif /* HAVE_ED448 */
                     default:
                         break;
                 }
 
-                FreeDecodedCert(args->dCert);
-                args->dCertInit = 0;
-
-                /* release since we don't need it anymore */
-                if (args->dCert) {
-                    XFREE(args->dCert, ssl->heap, DYNAMIC_TYPE_DCERT);
-                    args->dCert = NULL;
-                }
+                /* args->dCert free'd in function cleanup after callback */
             } /* if (count > 0) */
 
             /* Check for error */
@@ -9661,153 +11501,32 @@
 
         case TLS_ASYNC_FINALIZE:
         {
-        #ifdef WOLFSSL_SMALL_STACK
-            WOLFSSL_X509_STORE_CTX* store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
-                                    sizeof(WOLFSSL_X509_STORE_CTX), ssl->heap,
-                                                    DYNAMIC_TYPE_X509_STORE);
-            if (store == NULL) {
-                ERROR_OUT(MEMORY_E, exit_ppc);
-            }
-        #else
-            WOLFSSL_X509_STORE_CTX  store[1];
-        #endif
-
-            XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
-
             /* load last error */
             if (args->lastErr != 0 && ret == 0) {
                 ret = args->lastErr;
             }
 
-        #ifdef OPENSSL_EXTRA
+        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
             if (args->untrustedDepth > ssl->options.verifyDepth) {
                 ssl->peerVerifyRet = X509_V_ERR_CERT_CHAIN_TOO_LONG;
                 ret = MAX_CHAIN_ERROR;
             }
         #endif
+
+            /* Do verify callback */
+            ret = DoVerifyCallback(ssl->ctx->cm, ssl, ret, args);
+
+            if (ssl->options.verifyNone &&
+                              (ret == CRL_MISSING || ret == CRL_CERT_REVOKED)) {
+                WOLFSSL_MSG("Ignoring CRL problem based on verify setting");
+                ret = ssl->error = 0;
+            }
+
             if (ret != 0) {
                 if (!ssl->options.verifyNone) {
-                    int why = bad_certificate;
-
-                    if (ret == ASN_AFTER_DATE_E || ret == ASN_BEFORE_DATE_E) {
-                        why = certificate_expired;
-                    }
-                    if (ssl->verifyCallback) {
-                        int ok;
-
-                        store->error = ret;
-                        store->error_depth = args->certIdx;
-                        store->discardSessionCerts = 0;
-                        store->domain = args->domain;
-                        store->userCtx = ssl->verifyCbCtx;
-                        store->certs = args->certs;
-                        store->totalCerts = args->totalCerts;
-
-                    #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-                        if (ssl->ctx->x509_store_pt != NULL) {
-                            store->store = ssl->ctx->x509_store_pt;
-                        }
-                        else {
-                            store->store = &ssl->ctx->x509_store;
-                        }
-                    #endif
-                    #ifdef KEEP_PEER_CERT
-                        if (ssl->peerCert.subject.sz > 0)
-                            store->current_cert = &ssl->peerCert;
-                        else
-                            store->current_cert = NULL;
-                    #else
-                        store->current_cert = NULL;
-                    #endif /* KEEP_PEER_CERT */
-                    #if defined(HAVE_EX_DATA) || defined(HAVE_FORTRESS)
-                        store->ex_data = ssl;
-                    #endif
-                    #ifdef SESSION_CERTS
-                        store->sesChain = &(ssl->session.chain);
-                    #endif
-                        ok = ssl->verifyCallback(0, store);
-                        if (ok) {
-                            WOLFSSL_MSG("Verify callback overriding error!");
-                            ret = 0;
-                        }
-                    #ifdef SESSION_CERTS
-                        if (store->discardSessionCerts) {
-                            WOLFSSL_MSG("Verify callback requested discard sess certs");
-                            ssl->session.chain.count = 0;
-                        #ifdef WOLFSSL_ALT_CERT_CHAINS
-                            ssl->session.altChain.count = 0;
-                        #endif
-                        }
-                    #endif /* SESSION_CERTS */
-                    }
-                    if (ret != 0) {
-                        SendAlert(ssl, alert_fatal, why);   /* try to send */
-                        ssl->options.isClosed = 1;
-                    }
-                }
-
-                ssl->error = ret;
-            }
-        #ifdef WOLFSSL_ALWAYS_VERIFY_CB
-            else {
-                if (ssl->verifyCallback) {
-                    int ok;
-
-                    store->error = ret;
-                #ifdef WOLFSSL_WPAS
-                    store->error_depth = 0;
-                #else
-                    store->error_depth = args->certIdx;
-                #endif
-                    store->discardSessionCerts = 0;
-                    store->domain = args->domain;
-                    store->userCtx = ssl->verifyCbCtx;
-                    store->certs = args->certs;
-                    store->totalCerts = args->totalCerts;
-
-                #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-                    if (ssl->ctx->x509_store_pt != NULL) {
-                        store->store = ssl->ctx->x509_store_pt;
-                    }
-                    else {
-                        store->store = &ssl->ctx->x509_store;
-                    }
-                #endif
-                #ifdef KEEP_PEER_CERT
-                    if (ssl->peerCert.subject.sz > 0)
-                        store->current_cert = &ssl->peerCert;
-                    else
-                        store->current_cert = NULL;
-                #endif
-                    store->ex_data = ssl;
-                #ifdef SESSION_CERTS
-                    store->sesChain = &(ssl->session.chain);
-                #endif
-
-                    ok = ssl->verifyCallback(1, store);
-                    if (!ok) {
-                        WOLFSSL_MSG("Verify callback overriding valid certificate!");
-                        ret = -1;
-                        SendAlert(ssl, alert_fatal, bad_certificate);
-                        ssl->options.isClosed = 1;
-                    }
-                #ifdef SESSION_CERTS
-                    if (store->discardSessionCerts) {
-                        WOLFSSL_MSG("Verify callback requested discard sess certs");
-                        ssl->session.chain.count = 0;
-                    #ifdef WOLFSSL_ALT_CERT_CHAINS
-                        ssl->session.altChain.count = 0;
-                    #endif
-                    }
-                #endif /* SESSION_CERTS */
-                }
-            }
-        #endif /* WOLFSSL_ALWAYS_VERIFY_CB */
-
-            if (ssl->options.verifyNone &&
-                                      (ret == CRL_MISSING || ret == CRL_CERT_REVOKED)) {
-                WOLFSSL_MSG("Ignoring CRL problem based on verify setting");
-                ret = ssl->error = 0;
+                    DoCertFatalAlert(ssl, ret);
+                }
+                ssl->error = ret; /* Report SSL error */
             }
 
             if (ret == 0 && ssl->options.side == WOLFSSL_CLIENT_END) {
@@ -9816,15 +11535,12 @@
 
             if (IsEncryptionOn(ssl, 0)) {
                 args->idx += ssl->keys.padSz;
-            }
-
-        #if defined(SESSION_CERTS) && defined(OPENSSL_EXTRA)
-            wolfSSL_sk_X509_free(store->chain);
-            store->chain = NULL;
-        #endif
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(store, ssl->heap, DYNAMIC_TYPE_X509_STORE);
-        #endif
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                if (ssl->options.startedETMRead)
+                    args->idx += MacSize(ssl);
+            #endif
+            }
+
             /* Advance state and proceed */
             ssl->options.asyncState = TLS_ASYNC_END;
         } /* case TLS_ASYNC_FINALIZE */
@@ -9849,7 +11565,7 @@
 
 #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLFSSL_NONBLOCK_OCSP)
     if (ret == WC_PENDING_E || ret == OCSP_WANT_READ) {
-        /* Mark message as not recevied so it can process again */
+        /* Mark message as not received so it can process again */
         ssl->msgsReceived.got_certificate = 0;
 
         return ret;
@@ -9858,17 +11574,22 @@
 
     FreeProcPeerCertArgs(ssl, args);
 
-#if !defined(WOLFSSL_ASYNC_CRYPT) && defined(WOLFSSL_NONBLOCK_OCSP)
+#if defined(WOLFSSL_ASYNC_CRYPT)
+#elif defined(WOLFSSL_NONBLOCK_OCSP)
     XFREE(args, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
     ssl->nonblockarg = NULL;
+#elif defined(WOLFSSL_SMALL_STACK)
+    XFREE(args, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
     FreeKeyExchange(ssl);
 
     return ret;
 }
+#endif
 
 #ifndef WOLFSSL_NO_TLS12
+#if !defined(NO_WOLFSSL_CLIENT) || !defined(WOLFSSL_NO_CLIENT_AUTH)
 
 /* handle processing of certificate (11) */
 static int DoCertificate(WOLFSSL* ssl, byte* input, word32* inOutIdx,
@@ -9879,7 +11600,19 @@
     WOLFSSL_START(WC_FUNC_CERTIFICATE_DO);
     WOLFSSL_ENTER("DoCertificate");
 
+#ifdef SESSION_CERTS
+    /* Reset the session cert chain count in case the session resume failed. */
+    ssl->session.chain.count = 0;
+    #ifdef WOLFSSL_ALT_CERT_CHAINS
+        ssl->session.altChain.count = 0;
+    #endif
+#endif /* SESSION_CERTS */
+
     ret = ProcessPeerCerts(ssl, input, inOutIdx, size);
+#ifdef WOLFSSL_EXTRA_ALERTS
+    if (ret == BUFFER_ERROR || ret == ASN_PARSE_E)
+        SendAlert(ssl, alert_fatal, decode_error);
+#endif
 
 #ifdef OPENSSL_EXTRA
     ssl->options.serverState = SERVER_CERT_COMPLETE;
@@ -10028,12 +11761,31 @@
     if (ret != 0)
         SendAlert(ssl, alert_fatal, bad_certificate_status_response);
 
+    if (IsEncryptionOn(ssl, 0)) {
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (ssl->options.startedETMRead) {
+            word32 digestSz = MacSize(ssl);
+            if (*inOutIdx + ssl->keys.padSz + digestSz > size)
+                return BUFFER_E;
+            *inOutIdx += ssl->keys.padSz + digestSz;
+        }
+        else
+    #endif
+        {
+            if (*inOutIdx + ssl->keys.padSz > size)
+                return BUFFER_E;
+            *inOutIdx += ssl->keys.padSz;
+        }
+    }
+
     WOLFSSL_LEAVE("DoCertificateStatus", ret);
     WOLFSSL_END(WC_FUNC_CERTIFICATE_STATUS_DO);
 
     return ret;
 }
 
+#endif
+
 #endif /* !WOLFSSL_NO_TLS12 */
 
 #endif /* !NO_CERTS */
@@ -10045,15 +11797,29 @@
 {
     (void)input;
 
+    WOLFSSL_START(WC_FUNC_HELLO_REQUEST_DO);
+    WOLFSSL_ENTER("DoHelloRequest");
+
     if (size) /* must be 0 */
         return BUFFER_ERROR;
 
     if (IsEncryptionOn(ssl, 0)) {
-        /* access beyond input + size should be checked against totalSz */
-        if (*inOutIdx + ssl->keys.padSz > totalSz)
-            return BUFFER_E;
-
-        *inOutIdx += ssl->keys.padSz;
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (ssl->options.startedETMRead) {
+            word32 digestSz = MacSize(ssl);
+            if (*inOutIdx + ssl->keys.padSz + digestSz > totalSz)
+                return BUFFER_E;
+            *inOutIdx += ssl->keys.padSz + digestSz;
+        }
+        else
+    #endif
+        {
+            /* access beyond input + size should be checked against totalSz */
+            if (*inOutIdx + ssl->keys.padSz > totalSz)
+                return BUFFER_E;
+
+            *inOutIdx += ssl->keys.padSz;
+        }
     }
 
     if (ssl->options.side == WOLFSSL_SERVER_END) {
@@ -10063,6 +11829,8 @@
 #ifdef HAVE_SECURE_RENEGOTIATION
     else if (ssl->secure_renegotiation && ssl->secure_renegotiation->enabled) {
         ssl->secure_renegotiation->startScr = 1;
+        WOLFSSL_LEAVE("DoHelloRequest", 0);
+        WOLFSSL_END(WC_FUNC_HELLO_REQUEST_DO);
         return 0;
     }
 #endif
@@ -10084,8 +11852,17 @@
         return BUFFER_ERROR;
 
     /* check against totalSz */
-    if (*inOutIdx + size + ssl->keys.padSz > totalSz)
-        return BUFFER_E;
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (ssl->options.startedETMRead) {
+            if (*inOutIdx + size + ssl->keys.padSz + MacSize(ssl) > totalSz)
+                return BUFFER_E;
+        }
+        else
+    #endif
+        {
+            if (*inOutIdx + size + ssl->keys.padSz > totalSz)
+                return BUFFER_E;
+        }
 
     #ifdef WOLFSSL_CALLBACKS
         if (ssl->hsInfoOn) AddPacketName(ssl, "Finished");
@@ -10095,6 +11872,9 @@
     if (sniff == NO_SNIFF) {
         if (XMEMCMP(input + *inOutIdx, &ssl->hsHashes->verifyHashes,size) != 0){
             WOLFSSL_MSG("Verify finished error on hashes");
+    #ifdef WOLFSSL_EXTRA_ALERTS
+            SendAlert(ssl, alert_fatal, decrypt_error);
+    #endif
             return VERIFY_FINISHED_ERROR;
         }
     }
@@ -10108,27 +11888,32 @@
         else
             XMEMCPY(ssl->secure_renegotiation->client_verify_data,
                     input + *inOutIdx, TLS_FINISHED_SZ);
+        ssl->secure_renegotiation->verifySet = 1;
     }
 #endif
 
     /* force input exhaustion at ProcessReply consuming padSz */
     *inOutIdx += size + ssl->keys.padSz;
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    if (ssl->options.startedETMRead)
+        *inOutIdx += MacSize(ssl);
+#endif
 
     if (ssl->options.side == WOLFSSL_CLIENT_END) {
         ssl->options.serverState = SERVER_FINISHED_COMPLETE;
 #ifdef OPENSSL_EXTRA
-		ssl->cbmode = SSL_CB_MODE_WRITE;
-		ssl->options.clientState = CLIENT_FINISHED_COMPLETE;
+        ssl->cbmode = SSL_CB_MODE_WRITE;
+        ssl->options.clientState = CLIENT_FINISHED_COMPLETE;
 #endif
         if (!ssl->options.resuming) {
 #ifdef OPENSSL_EXTRA
-			if (ssl->CBIS != NULL) {
-				ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
-			}
-#endif
-			ssl->options.handShakeState = HANDSHAKE_DONE;
-			ssl->options.handShakeDone  = 1;
-		}
+            if (ssl->CBIS != NULL) {
+                ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
+            }
+#endif
+            ssl->options.handShakeState = HANDSHAKE_DONE;
+            ssl->options.handShakeDone  = 1;
+        }
     }
     else {
         ssl->options.clientState = CLIENT_FINISHED_COMPLETE;
@@ -10138,14 +11923,19 @@
 #endif
         if (ssl->options.resuming) {
 #ifdef OPENSSL_EXTRA
-			if (ssl->CBIS != NULL) {
-				ssl->CBIS(ssl, SSL_CB_ACCEPT_LOOP, SSL_SUCCESS);
-			}
-#endif
-			ssl->options.handShakeState = HANDSHAKE_DONE;
-			ssl->options.handShakeDone  = 1;
-        }
-    }
+            if (ssl->CBIS != NULL) {
+                ssl->CBIS(ssl, SSL_CB_ACCEPT_LOOP, SSL_SUCCESS);
+            }
+#endif
+            ssl->options.handShakeState = HANDSHAKE_DONE;
+            ssl->options.handShakeDone  = 1;
+        }
+    }
+#ifdef WOLFSSL_DTLS
+    if (ssl->options.dtls) {
+        DtlsMsgPoolReset(ssl);
+    }
+#endif
 
     WOLFSSL_LEAVE("DoFinished", 0);
     WOLFSSL_END(WC_FUNC_FINISHED_DO);
@@ -10175,6 +11965,9 @@
         case client_hello:
             if (ssl->msgsReceived.got_client_hello) {
                 WOLFSSL_MSG("Duplicate ClientHello received");
+    #ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, unexpected_message);
+    #endif
                 return DUPLICATE_MSG_E;
             }
             ssl->msgsReceived.got_client_hello = 1;
@@ -10331,7 +12124,8 @@
 
                 #ifndef NO_PSK
                     if (ssl->specs.kea == psk_kea &&
-                                               ssl->arrays->server_hint[0] == 0)
+                        ssl->arrays != NULL &&
+                        ssl->arrays->server_hint[0] == 0)
                         pskNoServerHint = 1;
                 #endif
                 if (ssl->specs.static_ecdh == 1 ||
@@ -10366,6 +12160,9 @@
         case client_key_exchange:
             if (ssl->msgsReceived.got_client_key_exchange) {
                 WOLFSSL_MSG("Duplicate ClientKeyExchange received");
+    #ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, unexpected_message);
+    #endif
                 return DUPLICATE_MSG_E;
             }
             ssl->msgsReceived.got_client_key_exchange = 1;
@@ -10386,6 +12183,9 @@
 
             if (ssl->msgsReceived.got_change_cipher == 0) {
                 WOLFSSL_MSG("Finished received before ChangeCipher");
+    #ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, unexpected_message);
+    #endif
                 return NO_CHANGE_CIPHER_E;
             }
             break;
@@ -10393,6 +12193,9 @@
         case change_cipher_hs:
             if (ssl->msgsReceived.got_change_cipher) {
                 WOLFSSL_MSG("Duplicate ChangeCipher received");
+    #ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, unexpected_message);
+    #endif
                 return DUPLICATE_MSG_E;
             }
             /* DTLS is going to ignore the CCS message if the client key
@@ -10402,10 +12205,17 @@
 
 #ifndef NO_WOLFSSL_CLIENT
             if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                if (!ssl->options.resuming &&
-                                 ssl->msgsReceived.got_server_hello_done == 0) {
-                    WOLFSSL_MSG("No ServerHelloDone before ChangeCipher");
-                    return OUT_OF_ORDER_E;
+                if (!ssl->options.resuming) {
+                   if (ssl->msgsReceived.got_server_hello_done == 0) {
+                        WOLFSSL_MSG("No ServerHelloDone before ChangeCipher");
+                        return OUT_OF_ORDER_E;
+                   }
+                }
+                else {
+                    if (ssl->msgsReceived.got_server_hello == 0) {
+                        WOLFSSL_MSG("No ServerHello before ChangeCipher on Resume");
+                        return OUT_OF_ORDER_E;
+                    }
                 }
                 #ifdef HAVE_SESSION_TICKET
                     if (ssl->expect_session_ticket) {
@@ -10424,6 +12234,9 @@
                 if (!ssl->options.resuming &&
                                ssl->msgsReceived.got_client_key_exchange == 0) {
                     WOLFSSL_MSG("No ClientKeyExchange before ChangeCipher");
+    #ifdef WOLFSSL_EXTRA_ALERTS
+                    SendAlert(ssl, alert_fatal, unexpected_message);
+    #endif
                     return OUT_OF_ORDER_E;
                 }
                 #ifndef NO_CERTS
@@ -10471,11 +12284,39 @@
 #endif
 
     /* make sure can read the message */
-    if (*inOutIdx + size > totalSz)
+    if (*inOutIdx + size > totalSz) {
+        WOLFSSL_MSG("Incomplete Data");
         return INCOMPLETE_DATA;
+    }
 
     expectedIdx = *inOutIdx + size +
                   (ssl->keys.encryptionOn ? ssl->keys.padSz : 0);
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    if (ssl->options.startedETMRead && ssl->keys.encryptionOn)
+        expectedIdx += MacSize(ssl);
+#endif
+
+#if !defined(WOLFSSL_NO_SERVER) && \
+    defined(HAVE_SECURE_RENEGOTIATION) && \
+    defined(HAVE_SERVER_RENEGOTIATION_INFO)
+    if (ssl->options.handShakeDone && type == client_hello &&
+            ssl->secure_renegotiation &&
+            ssl->secure_renegotiation->enabled)
+    {
+        WOLFSSL_MSG("Reset handshake state");
+        XMEMSET(&ssl->msgsReceived, 0, sizeof(MsgsReceived));
+        ssl->options.serverState = NULL_STATE;
+        ssl->options.clientState = NULL_STATE;
+        ssl->options.connectState = CONNECT_BEGIN;
+        ssl->options.acceptState = ACCEPT_FIRST_REPLY_DONE;
+        ssl->options.handShakeState = NULL_STATE;
+        ssl->secure_renegotiation->cache_status = SCR_CACHE_NEEDED;
+
+        ret = InitHandshakeHashes(ssl);
+        if (ret != 0)
+            return ret;
+    }
+#endif
 
     /* sanity check msg received */
     if ( (ret = SanityCheckMsgReceived(ssl, type)) != 0) {
@@ -10537,14 +12378,17 @@
     #endif
     ) {
         ret = HashInput(ssl, input + *inOutIdx, size);
-        if (ret != 0) return ret;
+        if (ret != 0) {
+            WOLFSSL_MSG("Incomplete handshake hashes");
+            return ret;
+        }
     }
 
 #ifdef OPENSSL_EXTRA
     if (ssl->CBIS != NULL){
-		ssl->cbmode = SSL_CB_MODE_READ;
-		ssl->cbtype = type;
-		ssl->CBIS(ssl, SSL_CB_ACCEPT_LOOP, SSL_SUCCESS);
+        ssl->cbmode = SSL_CB_MODE_READ;
+        ssl->cbtype = type;
+        ssl->CBIS(ssl, SSL_CB_ACCEPT_LOOP, SSL_SUCCESS);
     }
 #endif
 
@@ -10564,11 +12408,23 @@
     case server_hello:
         WOLFSSL_MSG("processing server hello");
         ret = DoServerHello(ssl, input, inOutIdx, size);
-    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
+    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
         if (ssl->options.resuming || !IsAtLeastTLSv1_2(ssl) ||
                                                IsAtLeastTLSv1_3(ssl->version)) {
-            ssl->options.cacheMessages = 0;
+
+        #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLFSSL_NONBLOCK_OCSP)
+            if (ret != WC_PENDING_E && ret != OCSP_WANT_READ)
+        #endif
+            {
+                ssl->options.cacheMessages = 0;
+                if (ssl->hsHashes->messages != NULL) {
+                    XFREE(ssl->hsHashes->messages, ssl->heap,
+                        DYNAMIC_TYPE_HASHES);
+                    ssl->hsHashes->messages = NULL;
+                }
+            }
         }
     #endif
         break;
@@ -10593,7 +12449,8 @@
 #endif /* HAVE_SESSION_TICKET */
 #endif
 
-#ifndef NO_CERTS
+#if !defined(NO_CERTS) && (!defined(NO_WOLFSSL_CLIENT) || \
+                                               !defined(WOLFSSL_NO_CLIENT_AUTH))
     case certificate:
         WOLFSSL_MSG("processing certificate");
         ret = DoCertificate(ssl, input, inOutIdx, size);
@@ -10616,6 +12473,10 @@
         ssl->options.serverState = SERVER_HELLODONE_COMPLETE;
         if (IsEncryptionOn(ssl, 0)) {
             *inOutIdx += ssl->keys.padSz;
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMRead)
+                *inOutIdx += MacSize(ssl);
+        #endif
         }
         if (ssl->options.resuming) {
             WOLFSSL_MSG("Not resuming as thought");
@@ -10632,13 +12493,42 @@
     case client_hello:
         WOLFSSL_MSG("processing client hello");
         ret = DoClientHello(ssl, input, inOutIdx, size);
-    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
+    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
         if (ssl->options.resuming || !ssl->options.verifyPeer || \
                      !IsAtLeastTLSv1_2(ssl) || IsAtLeastTLSv1_3(ssl->version)) {
-            ssl->options.cacheMessages = 0;
-        }
-    #endif
+        #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLFSSL_NONBLOCK_OCSP)
+            if (ret != WC_PENDING_E && ret != OCSP_WANT_READ)
+        #endif
+            {
+                ssl->options.cacheMessages = 0;
+                if (ssl->hsHashes->messages != NULL) {
+                    XFREE(ssl->hsHashes->messages, ssl->heap, DYNAMIC_TYPE_HASHES);
+                    ssl->hsHashes->messages = NULL;
+                }
+            }
+        }
+    #endif
+        if (IsEncryptionOn(ssl, 0)) {
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMRead) {
+                word32 digestSz = MacSize(ssl);
+                if (*inOutIdx + ssl->keys.padSz + digestSz > totalSz)
+                    return BUFFER_E;
+                *inOutIdx += ssl->keys.padSz + digestSz;
+            }
+            else
+        #endif
+            {
+                /* access beyond input + size should be checked against totalSz
+                 */
+                if (*inOutIdx + ssl->keys.padSz > totalSz)
+                    return BUFFER_E;
+
+                *inOutIdx += ssl->keys.padSz;
+            }
+        }
         break;
 
     case client_key_exchange:
@@ -10646,13 +12536,13 @@
         ret = DoClientKeyExchange(ssl, input, inOutIdx, size);
         break;
 
-#if (!defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519)) && \
-                                                !defined(WOLFSSL_NO_CLIENT_AUTH)
+#if (!defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                        defined(HAVE_ED448)) && !defined(WOLFSSL_NO_CLIENT_AUTH)
     case certificate_verify:
         WOLFSSL_MSG("processing certificate verify");
         ret = DoCertificateVerify(ssl, input, inOutIdx, size);
         break;
-#endif /* (!NO_RSA || HAVE_ECC || HAVE_ED25519) && !WOLFSSL_NO_CLIENT_AUTH */
+#endif /* (!NO_RSA || ECC || ED25519 || ED448) && !WOLFSSL_NO_CLIENT_AUTH */
 
 #endif /* !NO_WOLFSSL_SERVER */
 
@@ -10668,6 +12558,15 @@
         ret = DECODE_E;
     }
 
+    if (ret == 0 && ssl->buffers.inputBuffer.dynamicFlag
+    #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLFSSL_NONBLOCK_OCSP)
+        /* do not shrink input for async or non-block */
+        && ssl->error != WC_PENDING_E && ssl->error != OCSP_WANT_READ
+    #endif
+    ) {
+        ShrinkInputBuffer(ssl, NO_FORCED_FREE);
+    }
+
 #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLFSSL_NONBLOCK_OCSP)
     /* if async, offset index so this msg will be processed again */
     if ((ret == WC_PENDING_E || ret == OCSP_WANT_READ) && *inOutIdx > 0) {
@@ -10678,6 +12577,11 @@
         }
     #endif
     }
+
+    /* make sure async error is cleared */
+    if (ret == 0 && (ssl->error == WC_PENDING_E || ssl->error == OCSP_WANT_READ)) {
+        ssl->error = 0;
+    }
 #endif /* WOLFSSL_ASYNC_CRYPT || WOLFSSL_NONBLOCK_OCSP */
 
     WOLFSSL_LEAVE("DoHandShakeMsgType()", ret);
@@ -10744,11 +12648,14 @@
         ret = DoHandShakeMsgType(ssl, input, inOutIdx, type, size, totalSz);
     }
     else {
-        if (inputLength + ssl->arrays->pendingMsgOffset
-                                                  > ssl->arrays->pendingMsgSz) {
-
-            return BUFFER_ERROR;
-        }
+        word32 pendSz =
+            ssl->arrays->pendingMsgSz - ssl->arrays->pendingMsgOffset;
+
+        /* Catch the case where there may be the remainder of a fragmented
+         * handshake message and the next handshake message in the same
+         * record. */
+        if (inputLength > pendSz)
+            inputLength = pendSz;
 
         XMEMCPY(ssl->arrays->pendingMsg + ssl->arrays->pendingMsgOffset,
                 input + *inOutIdx, inputLength);
@@ -10757,13 +12664,11 @@
 
         if (ssl->arrays->pendingMsgOffset == ssl->arrays->pendingMsgSz)
         {
-            word32 idx = 0;
+            word32 idx = HANDSHAKE_HEADER_SZ;
             ret = DoHandShakeMsgType(ssl,
-                                     ssl->arrays->pendingMsg
-                                                          + HANDSHAKE_HEADER_SZ,
+                                     ssl->arrays->pendingMsg,
                                      &idx, ssl->arrays->pendingMsgType,
-                                     ssl->arrays->pendingMsgSz
-                                                          - HANDSHAKE_HEADER_SZ,
+                                     ssl->arrays->pendingMsgSz - idx,
                                      ssl->arrays->pendingMsgSz);
         #ifdef WOLFSSL_ASYNC_CRYPT
             if (ret == WC_PENDING_E) {
@@ -10879,7 +12784,7 @@
             return 0;
         }
 
-        if (window[idx] & (1 << (newDiff - 1))) {
+        if (window[idx] & (1 << newDiff)) {
             WOLFSSL_MSG("Current record sequence number already received.");
             return 0;
         }
@@ -10986,7 +12891,7 @@
         word32 newDiff = diff % DTLS_WORD_BITS;
 
         if (idx < WOLFSSL_DTLS_WINDOW_WORDS)
-            window[idx] |= (1 << (newDiff - 1));
+            window[idx] |= (1 << newDiff);
     }
     else {
         if (diff >= DTLS_SEQ_BITS)
@@ -11008,7 +12913,7 @@
                 else {
                     temp |= (oldWindow[i-idx] << newDiff);
                     window[i] = temp;
-                    temp = oldWindow[i-idx] >> (DTLS_WORD_BITS - newDiff);
+                    temp = oldWindow[i-idx] >> (DTLS_WORD_BITS - newDiff - 1);
                 }
             }
         }
@@ -11027,9 +12932,11 @@
     DtlsMsg* item = ssl->dtls_rx_msg_list;
     int ret = 0;
 
+    WOLFSSL_ENTER("DtlsMsgDrain()");
+
     /* While there is an item in the store list, and it is the expected
      * message, and it is complete, and there hasn't been an error in the
-     * last messge... */
+     * last message... */
     while (item != NULL &&
             ssl->keys.dtls_expected_peer_handshake_number == item->seq &&
             item->fragSz == item->sz &&
@@ -11050,6 +12957,7 @@
         ssl->dtls_rx_msg_list_sz--;
     }
 
+    WOLFSSL_LEAVE("DtlsMsgDrain()", ret);
     return ret;
 }
 
@@ -11077,12 +12985,24 @@
 
     /* parse header */
     if (GetDtlsHandShakeHeader(ssl, input, inOutIdx, &type,
-                               &size, &fragOffset, &fragSz, totalSz) != 0)
+                               &size, &fragOffset, &fragSz, totalSz) != 0) {
+        WOLFSSL_ERROR(PARSE_ERROR);
         return PARSE_ERROR;
+    }
+
+    /* Cap the maximum size of a handshake message to something reasonable.
+     * By default is the maximum size of a certificate message assuming
+     * nine 2048-bit RSA certificates in the chain. */
+    if (size > MAX_HANDSHAKE_SZ) {
+        WOLFSSL_MSG("Handshake message too large");
+        return HANDSHAKE_SIZE_ERROR;
+    }
 
     /* check that we have complete fragment */
-    if (*inOutIdx + fragSz > totalSz)
+    if (*inOutIdx + fragSz > totalSz) {
+        WOLFSSL_ERROR(INCOMPLETE_DATA);
         return INCOMPLETE_DATA;
+    }
 
     /* Check the handshake sequence number first. If out of order,
      * add the current message to the list. If the message is in order,
@@ -11126,10 +13046,22 @@
         /* Already saw this message and processed it. It can be ignored. */
         *inOutIdx += fragSz;
         if(type == finished ) {
-            if (*inOutIdx + ssl->keys.padSz > totalSz) {
-                return BUFFER_E;
-            }
-            *inOutIdx += ssl->keys.padSz;
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMRead) {
+                word32 digestSz = MacSize(ssl);
+                if (*inOutIdx + ssl->keys.padSz + digestSz > totalSz)
+                    return BUFFER_E;
+                *inOutIdx += ssl->keys.padSz + digestSz;
+            }
+            else
+        #endif
+            {
+                if (*inOutIdx + ssl->keys.padSz > totalSz) {
+                    WOLFSSL_ERROR(BUFFER_E);
+                    return BUFFER_E;
+                }
+                *inOutIdx += ssl->keys.padSz;
+            }
         }
         if (IsDtlsNotSctpMode(ssl) &&
             VerifyForDtlsMsgPoolSend(ssl, type, fragOffset)) {
@@ -11172,6 +13104,11 @@
 #ifndef WOLFSSL_NO_TLS12
 
 #ifdef HAVE_AEAD
+
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    (((defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) && \
+    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))) || \
+    (defined(HAVE_POLY1305) && defined(HAVE_CHACHA)))
 static WC_INLINE void AeadIncrementExpIV(WOLFSSL* ssl)
 {
     int i;
@@ -11179,6 +13116,7 @@
         if (++ssl->keys.aead_exp_IV[i]) return;
     }
 }
+#endif
 
 
 #if defined(HAVE_POLY1305) && defined(HAVE_CHACHA)
@@ -11221,8 +13159,8 @@
     /* 32 bit size of cipher to 64 bit endian */
     padding[0] =  msglen        & 0xff;
     padding[1] = (msglen >>  8) & 0xff;
-    padding[2] = (msglen >> 16) & 0xff;
-    padding[3] = (msglen >> 24) & 0xff;
+    padding[2] = ((word32)msglen >> 16) & 0xff;
+    padding[3] = ((word32)msglen >> 24) & 0xff;
     if ((ret = wc_Poly1305Update(ssl->auth.poly1305, padding, sizeof(padding)))
         != 0)
         return ret;
@@ -11236,11 +13174,11 @@
 
 
 /* When the flag oldPoly is not set this follows RFC7905. When oldPoly is set
- * the implmentation follows an older draft for creating the nonce and MAC.
- * The flag oldPoly gets set automaticlly depending on what cipher suite was
+ * the implementation follows an older draft for creating the nonce and MAC.
+ * The flag oldPoly gets set automatically depending on what cipher suite was
  * negotiated in the handshake. This is able to be done because the IDs for the
  * cipher suites was updated in RFC7905 giving unique values for the older
- * draft in comparision to the more recent RFC.
+ * draft in comparison to the more recent RFC.
  *
  * ssl   WOLFSSL structure to get cipher and TLS state from
  * out   output buffer to hold encrypted data
@@ -11281,7 +13219,6 @@
     #ifdef WOLFSSL_DTLS
         if (ssl->options.dtls) {
             additionalSrc -= DTLS_HANDSHAKE_EXTRA;
-            DtlsSEQIncrement(ssl, CUR_ORDER);
         }
     #endif
 
@@ -11326,11 +13263,19 @@
         return ret;
     }
 
-    ForceZero(nonce, CHACHA20_NONCE_SZ); /* done with nonce, clear it */
     /* create Poly1305 key using chacha20 keystream */
     if ((ret = wc_Chacha_Process(ssl->encrypt.chacha, poly,
-                                                      poly, sizeof(poly))) != 0)
-        return ret;
+                                                    poly, sizeof(poly))) != 0) {
+        ForceZero(nonce, CHACHA20_NONCE_SZ);
+        return ret;
+    }
+
+    /* set the counter after getting poly1305 key */
+    if ((ret = wc_Chacha_SetIV(ssl->encrypt.chacha, nonce, 1)) != 0) {
+        ForceZero(nonce, CHACHA20_NONCE_SZ);
+        return ret;
+    }
+    ForceZero(nonce, CHACHA20_NONCE_SZ); /* done with nonce, clear it */
 
     /* encrypt the plain text */
     if ((ret = wc_Chacha_Process(ssl->encrypt.chacha, out,
@@ -11387,11 +13332,11 @@
 
 
 /* When the flag oldPoly is not set this follows RFC7905. When oldPoly is set
- * the implmentation follows an older draft for creating the nonce and MAC.
- * The flag oldPoly gets set automaticlly depending on what cipher suite was
+ * the implementation follows an older draft for creating the nonce and MAC.
+ * The flag oldPoly gets set automatically depending on what cipher suite was
  * negotiated in the handshake. This is able to be done because the IDs for the
  * cipher suites was updated in RFC7905 giving unique values for the older
- * draft in comparision to the more recent RFC.
+ * draft in comparison to the more recent RFC.
  *
  * ssl   WOLFSSL structure to get cipher and TLS state from
  * plain output buffer to hold decrypted data
@@ -11472,11 +13417,19 @@
         return ret;
     }
 
-    ForceZero(nonce, CHACHA20_NONCE_SZ); /* done with nonce, clear it */
     /* use chacha20 keystream to get poly1305 key for tag */
     if ((ret = wc_Chacha_Process(ssl->decrypt.chacha, poly,
-                                                      poly, sizeof(poly))) != 0)
-        return ret;
+                                                    poly, sizeof(poly))) != 0) {
+        ForceZero(nonce, CHACHA20_NONCE_SZ);
+        return ret;
+    }
+
+    /* set counter after getting poly1305 key */
+    if ((ret = wc_Chacha_SetIV(ssl->decrypt.chacha, nonce, 1)) != 0) {
+        ForceZero(nonce, CHACHA20_NONCE_SZ);
+        return ret;
+    }
+    ForceZero(nonce, CHACHA20_NONCE_SZ); /* done with nonce, clear it */
 
     /* get the tag using Poly1305 */
     if (ssl->options.oldPoly != 0) {
@@ -11528,6 +13481,29 @@
 #endif /* HAVE_AEAD */
 
 
+#if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+
+#if !defined(NO_GCM_ENCRYPT_EXTRA) && \
+    ((!defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)) || \
+    (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+/* The following type is used to share code between AES-GCM and AES-CCM. */
+    typedef int (*AesAuthEncryptFunc)(Aes* aes, byte* out,
+                                       const byte* in, word32 sz,
+                                       byte* iv, word32 ivSz,
+                                       byte* authTag, word32 authTagSz,
+                                       const byte* authIn, word32 authInSz);
+    #define AES_AUTH_ENCRYPT_FUNC AesAuthEncryptFunc
+    #define AES_GCM_ENCRYPT wc_AesGcmEncrypt_ex
+    #define AES_CCM_ENCRYPT wc_AesCcmEncrypt_ex
+#else
+    #define AES_AUTH_ENCRYPT_FUNC wc_AesAuthEncryptFunc
+    #define AES_GCM_ENCRYPT wc_AesGcmEncrypt
+    #define AES_CCM_ENCRYPT wc_AesCcmEncrypt
+#endif
+
+#endif
+
+
 static WC_INLINE int EncryptDo(WOLFSSL* ssl, byte* out, const byte* input,
     word16 sz, int asyncOkay)
 {
@@ -11569,7 +13545,7 @@
             break;
     #endif
 
-    #ifdef BUILD_AES
+    #if defined(BUILD_AES) && defined(HAVE_AES_CBC)
         case wolfssl_aes:
         #ifdef WOLFSSL_ASYNC_CRYPT
             /* initialize event */
@@ -11578,7 +13554,12 @@
             if (ret != 0)
                 break;
         #endif
-
+        #if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+            !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+            if (tsip_useable(ssl)) {
+                ret = wc_tsip_AesCbcEncrypt(ssl->encrypt.aes, out, input, sz);
+            } else
+        #endif
             ret = wc_AesCbcEncrypt(ssl->encrypt.aes, out, input, sz);
         #ifdef WOLFSSL_ASYNC_CRYPT
             if (ret == WC_PENDING_E && asyncOkay) {
@@ -11592,7 +13573,7 @@
         case wolfssl_aes_gcm:
         case wolfssl_aes_ccm:/* GCM AEAD macros use same size as CCM */
         {
-            wc_AesAuthEncryptFunc aes_auth_fn;
+            AES_AUTH_ENCRYPT_FUNC aes_auth_fn;
             const byte* additionalSrc;
 
         #ifdef WOLFSSL_ASYNC_CRYPT
@@ -11605,11 +13586,11 @@
 
         #if defined(BUILD_AESGCM) && defined(HAVE_AESCCM)
             aes_auth_fn = (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
-                            ? wc_AesGcmEncrypt : wc_AesCcmEncrypt;
+                            ? AES_GCM_ENCRYPT : AES_CCM_ENCRYPT;
         #elif defined(BUILD_AESGCM)
-            aes_auth_fn = wc_AesGcmEncrypt;
+            aes_auth_fn = AES_GCM_ENCRYPT;
         #else
-            aes_auth_fn = wc_AesCcmEncrypt;
+            aes_auth_fn = AES_CCM_ENCRYPT;
         #endif
             additionalSrc = input - 5;
 
@@ -11632,10 +13613,14 @@
              * IV length minus the authentication tag size. */
             c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
                                 ssl->encrypt.additional + AEAD_LEN_OFFSET);
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    ((defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) && \
+    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)))
             XMEMCPY(ssl->encrypt.nonce,
                                 ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
             XMEMCPY(ssl->encrypt.nonce + AESGCM_IMP_IV_SZ,
                                 ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+#endif
             ret = aes_auth_fn(ssl->encrypt.aes,
                     out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
                     sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
@@ -11648,6 +13633,12 @@
                 ret = wolfSSL_AsyncPush(ssl, asyncDev);
             }
         #endif
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    ((!defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)) || \
+    (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+            XMEMCPY(out,
+                    ssl->encrypt.nonce + AESGCM_IMP_IV_SZ, AESGCM_EXP_IV_SZ);
+#endif
         }
         break;
     #endif /* BUILD_AESGCM || HAVE_AESCCM */
@@ -11776,15 +13767,13 @@
                 ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
             {
                 /* finalize authentication cipher */
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    ((defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) && \
+    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)))
                 AeadIncrementExpIV(ssl);
-
+#endif
                 if (ssl->encrypt.nonce)
                     ForceZero(ssl->encrypt.nonce, AESGCM_NONCE_SZ);
-
-            #ifdef WOLFSSL_DTLS
-                if (ssl->options.dtls)
-                    DtlsSEQIncrement(ssl, CUR_ORDER);
-            #endif
             }
         #endif /* BUILD_AESGCM || HAVE_AESCCM */
             break;
@@ -11797,6 +13786,7 @@
     return ret;
 }
 
+
 static WC_INLINE int DecryptDo(WOLFSSL* ssl, byte* plain, const byte* input,
                            word16 sz)
 {
@@ -11833,7 +13823,7 @@
             break;
     #endif
 
-    #ifdef BUILD_AES
+    #if defined(BUILD_AES) && defined(HAVE_AES_CBC)
         case wolfssl_aes:
         #ifdef WOLFSSL_ASYNC_CRYPT
             /* initialize event */
@@ -11842,7 +13832,12 @@
             if (ret != 0)
                 break;
         #endif
-
+        #if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+            !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+            if (tsip_useable(ssl)) {
+                ret = wc_tsip_AesCbcDecrypt(ssl->decrypt.aes, plain, input, sz);
+            } else
+        #endif
             ret = wc_AesCbcDecrypt(ssl->decrypt.aes, plain, input, sz);
         #ifdef WOLFSSL_ASYNC_CRYPT
             if (ret == WC_PENDING_E) {
@@ -12084,7 +14079,17 @@
     word32 minLength = ssl->specs.hash_size; /* covers stream */
 #endif
 
+#ifndef WOLFSSL_AEAD_ONLY
     if (ssl->specs.cipher_type == block) {
+#ifdef HAVE_ENCRYPT_THEN_MAC
+        if (ssl->options.startedETMRead) {
+            if ((encryptSz - MacSize(ssl)) % ssl->specs.block_size) {
+                WOLFSSL_MSG("Block ciphertext not block size");
+                return SANITY_CIPHER_E;
+            }
+        }
+        else
+#endif
         if (encryptSz % ssl->specs.block_size) {
             WOLFSSL_MSG("Block ciphertext not block size");
             return SANITY_CIPHER_E;
@@ -12098,7 +14103,9 @@
         if (ssl->options.tls1_1)
             minLength += ssl->specs.block_size;  /* explicit IV */
     }
-    else if (ssl->specs.cipher_type == aead) {
+    else
+#endif
+    if (ssl->specs.cipher_type == aead) {
         minLength = ssl->specs.aead_mac_size;    /* authTag size */
         if (CipherHasExpIV(ssl))
             minLength += AESGCM_EXP_IV_SZ;       /* explicit IV  */
@@ -12113,6 +14120,7 @@
 }
 
 
+#ifndef WOLFSSL_AEAD_ONLY
 /* check all length bytes for the pad value, return 0 on success */
 static int PadCheck(const byte* a, byte pad, int length)
 {
@@ -12175,8 +14183,8 @@
     unsigned char started, notEnded;
     unsigned char good = 0;
 
-    if (scanStart < 0)
-        scanStart = 0;
+    scanStart &= ctMaskIntGTE(scanStart, 0);
+    macStart &= ctMaskIntGTE(macStart, 0);
 
     /* Div on Intel has different speeds depending on value.
      * Use a bitwise AND or mod a specific value (converted to mul). */
@@ -12244,12 +14252,13 @@
     /* Make ret negative on masking failure. */
     ret -= 1 - good;
 
-    /* Treat any faulure as verify MAC error. */
+    /* Treat any failure as verify MAC error. */
     if (ret != 0)
         ret = VERIFY_MAC_ERROR;
 
     return ret;
 }
+#endif
 
 
 int DoApplicationData(WOLFSSL* ssl, byte* input, word32* inOutIdx)
@@ -12264,9 +14273,19 @@
 #endif
 
 #ifdef WOLFSSL_EARLY_DATA
-    if (ssl->earlyData != no_early_data) {
-    }
-    else
+    if (ssl->options.tls1_3 && ssl->options.handShakeDone == 0) {
+        if (ssl->options.side == WOLFSSL_SERVER_END &&
+                          ssl->earlyData != no_early_data &&
+                          ssl->options.clientState < CLIENT_FINISHED_COMPLETE) {
+            ssl->earlyDataSz += ssl->curSize;
+            if (ssl->earlyDataSz <= ssl->options.maxEarlyDataSz) {
+                WOLFSSL_MSG("Ignoring EarlyData!");
+                *inOutIdx = ssl->buffers.inputBuffer.length;
+                return 0;
+            }
+            WOLFSSL_MSG("Too much EarlyData!");
+        }
+    }
 #endif
     if (ssl->options.handShakeDone == 0) {
         WOLFSSL_MSG("Received App data before a handshake completed");
@@ -12274,22 +14293,30 @@
         return OUT_OF_ORDER_E;
     }
 
+#ifndef WOLFSSL_AEAD_ONLY
     if (ssl->specs.cipher_type == block) {
         if (ssl->options.tls1_1)
             ivExtra = ssl->specs.block_size;
     }
-    else if (ssl->specs.cipher_type == aead) {
+    else
+#endif
+    if (ssl->specs.cipher_type == aead) {
         if (CipherHasExpIV(ssl))
             ivExtra = AESGCM_EXP_IV_SZ;
     }
 
     dataSz = msgSz - ivExtra - ssl->keys.padSz;
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    if (ssl->options.startedETMRead)
+        dataSz -= MacSize(ssl);
+#endif
     if (dataSz < 0) {
         WOLFSSL_MSG("App data buffer error, malicious input?");
+        SendAlert(ssl, alert_fatal, unexpected_message);
         return BUFFER_ERROR;
     }
 #ifdef WOLFSSL_EARLY_DATA
-    if (ssl->earlyData != no_early_data) {
+    if (ssl->earlyData > early_data_ext) {
         if (ssl->earlyDataSz + dataSz > ssl->options.maxEarlyDataSz) {
             SendAlert(ssl, alert_fatal, unexpected_message);
             return WOLFSSL_FATAL_ERROR;
@@ -12315,6 +14342,10 @@
     }
 
     idx += ssl->keys.padSz;
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    if (ssl->options.startedETMRead)
+        idx += MacSize(ssl);
+#endif
 
 #ifdef HAVE_LIBZ
     /* decompress could be bigger, overwrite after verify */
@@ -12333,6 +14364,7 @@
 {
     byte level;
     byte code;
+    word32 dataSz = totalSz - *inOutIdx;
 
     #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
         if (ssl->hsInfoOn)
@@ -12344,14 +14376,21 @@
                           READ_PROTO, ssl->heap);
     #endif
 
-    if (++ssl->options.alertCount >= WOLFSSL_ALERT_COUNT_MAX) {
-        WOLFSSL_MSG("Alert count exceeded");
-        return ALERT_COUNT_E;
+    if (IsEncryptionOn(ssl, 0)) {
+        dataSz -= ssl->keys.padSz;
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (ssl->options.startedETMRead)
+            dataSz -= MacSize(ssl);
+    #endif
     }
 
     /* make sure can read the message */
-    if (*inOutIdx + ALERT_SIZE > totalSz)
-        return BUFFER_E;
+    if (dataSz != ALERT_SIZE) {
+#ifdef WOLFSSL_EXTRA_ALERTS
+        SendAlert(ssl, alert_fatal, unexpected_message);
+#endif
+        return BUFFER_E;
+    }
 
     level = input[(*inOutIdx)++];
     code  = input[(*inOutIdx)++];
@@ -12362,6 +14401,15 @@
         ssl->options.isClosed = 1;  /* Don't send close_notify */
     }
 
+    if (++ssl->options.alertCount >= WOLFSSL_ALERT_COUNT_MAX) {
+        WOLFSSL_MSG("Alert count exceeded");
+#ifdef WOLFSSL_EXTRA_ALERTS
+        if (level != alert_warning || code != close_notify)
+            SendAlert(ssl, alert_fatal, unexpected_message);
+#endif
+        return ALERT_COUNT_E;
+    }
+
     WOLFSSL_MSG("Got alert");
     if (*type == close_notify) {
         WOLFSSL_MSG("\tclose notify");
@@ -12369,17 +14417,19 @@
     }
 #ifdef WOLFSSL_TLS13
     if (*type == decode_error) {
-        WOLFSSL_MSG("    decode error");
+        WOLFSSL_MSG("\tdecode error");
     }
     if (*type == illegal_parameter) {
-        WOLFSSL_MSG("    illegal parameter");
+        WOLFSSL_MSG("\tillegal parameter");
     }
 #endif
     WOLFSSL_ERROR(*type);
     if (IsEncryptionOn(ssl, 0)) {
-        if (*inOutIdx + ssl->keys.padSz > totalSz)
-            return BUFFER_E;
         *inOutIdx += ssl->keys.padSz;
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (ssl->options.startedETMRead)
+            *inOutIdx += MacSize(ssl);
+    #endif
     }
 
     return level;
@@ -12433,12 +14483,12 @@
                      ssl->buffers.inputBuffer.buffer +
                      ssl->buffers.inputBuffer.length,
                      inSz);
-        if (in == -1)
-            return SOCKET_ERROR_E;
-
         if (in == WANT_READ)
             return WANT_READ;
 
+        if (in < 0)
+            return SOCKET_ERROR_E;
+
         if (in > inSz)
             return RECV_OVERFLOW_E;
 
@@ -12458,11 +14508,39 @@
     return 0;
 }
 
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+static WC_INLINE int VerifyMacEnc(WOLFSSL* ssl, const byte* input, word32 msgSz,
+                                  int content)
+{
+    int    ret;
+#ifdef HAVE_TRUNCATED_HMAC
+    word32 digestSz = ssl->truncated_hmac ? (byte)TRUNCATED_HMAC_SZ
+                                          : ssl->specs.hash_size;
+#else
+    word32 digestSz = ssl->specs.hash_size;
+#endif
+    byte   verify[WC_MAX_DIGEST_SIZE];
+
+    WOLFSSL_MSG("Verify MAC of Encrypted Data");
+
+    if (msgSz < digestSz) {
+        return VERIFY_MAC_ERROR;
+    }
+
+    ret  = ssl->hmac(ssl, verify, input, msgSz - digestSz, -1, content, 1);
+    ret |= ConstantCompare(verify, input + msgSz - digestSz, digestSz);
+    if (ret != 0) {
+        return VERIFY_MAC_ERROR;
+    }
+
+    return 0;
+}
+#endif
 
 static WC_INLINE int VerifyMac(WOLFSSL* ssl, const byte* input, word32 msgSz,
                             int content, word32* padSz)
 {
-#ifndef WOLFSSL_NO_TLS12
+#if !defined(WOLFSSL_NO_TLS12) && !defined(WOLFSSL_AEAD_ONLY)
     int    ivExtra = 0;
     int    ret;
     word32 pad     = 0;
@@ -12519,17 +14597,16 @@
         if (ret != 0)
             return VERIFY_MAC_ERROR;
     }
-
-#endif /* WOLFSSL_NO_TLS12 */
+#endif /* !WOLFSSL_NO_TLS12 && !WOLFSSL_AEAD_ONLY */
 
     if (ssl->specs.cipher_type == aead) {
         *padSz = ssl->specs.aead_mac_size;
     }
-#ifndef WOLFSSL_NO_TLS12
+#if !defined(WOLFSSL_NO_TLS12) && !defined(WOLFSSL_AEAD_ONLY)
     else {
         *padSz = digestSz + pad + padByte;
     }
-#endif /* WOLFSSL_NO_TLS12 */
+#endif /* !WOLFSSL_NO_TLS12 && !WOLFSSL_AEAD_ONLY */
 
     (void)input;
     (void)msgSz;
@@ -12699,6 +14776,15 @@
             if (ret != 0)
                 return ret;
 
+#ifdef WOLFSSL_TLS13
+            if (IsAtLeastTLSv1_3(ssl->version) && IsEncryptionOn(ssl, 0) &&
+                                        ssl->curRL.type != application_data &&
+                                        ssl->curRL.type != change_cipher_spec) {
+                SendAlert(ssl, alert_fatal, unexpected_message);
+                return PARSE_ERROR;
+            }
+#endif
+
             ssl->options.processReply = getData;
             FALL_THROUGH;
 
@@ -12707,9 +14793,15 @@
 
             /* get sz bytes or return error */
             if (!ssl->options.dtls) {
-                if ((ret = GetInputData(ssl, ssl->curSize)) < 0)
+                if ((ret = GetInputData(ssl, ssl->curSize)) < 0) {
+#ifdef WOLFSSL_EXTRA_ALERTS
+                    if (ret != WANT_READ)
+                        SendAlert(ssl, alert_fatal, bad_record_mac);
+#endif
                     return ret;
-            } else {
+                }
+            }
+            else {
 #ifdef WOLFSSL_DTLS
                 /* read ahead may already have */
                 used = ssl->buffers.inputBuffer.length -
@@ -12720,8 +14812,71 @@
 #endif
             }
 
+            if (IsEncryptionOn(ssl, 0)) {
+                int tooLong = 0;
+
+#ifdef WOLFSSL_TLS13
+                if (IsAtLeastTLSv1_3(ssl->version)) {
+                    tooLong  = ssl->curSize > MAX_TLS13_ENC_SZ;
+                    tooLong |= ssl->curSize - ssl->specs.aead_mac_size >
+                                                             MAX_TLS13_PLAIN_SZ;
+                }
+#endif
+#ifdef WOLFSSL_EXTRA_ALERTS
+                if (!IsAtLeastTLSv1_3(ssl->version))
+                    tooLong = ssl->curSize > MAX_TLS_CIPHER_SZ;
+#endif
+                if (tooLong) {
+                    WOLFSSL_MSG("Encrypted data too long");
+#if defined(WOLFSSL_TLS13) || defined(WOLFSSL_EXTRA_ALERTS)
+                    SendAlert(ssl, alert_fatal, record_overflow);
+#endif
+                    return BUFFER_ERROR;
+                }
+            }
+            ssl->keys.padSz = 0;
+
+            ssl->options.processReply = verifyEncryptedMessage;
+            startIdx = ssl->buffers.inputBuffer.idx;  /* in case > 1 msg per */
+            FALL_THROUGH;
+
+        /* verify digest of encrypted message */
+        case verifyEncryptedMessage:
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0 &&
+                                   !atomicUser && ssl->options.startedETMRead) {
+                ret = VerifyMacEnc(ssl, ssl->buffers.inputBuffer.buffer +
+                                   ssl->buffers.inputBuffer.idx,
+                                   ssl->curSize, ssl->curRL.type);
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    return ret;
+            #endif
+                if (ret < 0) {
+                    WOLFSSL_MSG("VerifyMacEnc failed");
+                    WOLFSSL_ERROR(ret);
+                #ifdef WOLFSSL_DTLS
+                    /* If in DTLS mode, if the decrypt fails for any
+                     * reason, pretend the datagram never happened. */
+                    if (ssl->options.dtls) {
+                        ssl->options.processReply = doProcessInit;
+                        ssl->buffers.inputBuffer.idx =
+                                        ssl->buffers.inputBuffer.length;
+                        #ifdef WOLFSSL_DTLS_DROP_STATS
+                            ssl->macDropCount++;
+                        #endif /* WOLFSSL_DTLS_DROP_STATS */
+                    }
+                #endif /* WOLFSSL_DTLS */
+                #ifdef WOLFSSL_EXTRA_ALERTS
+                    if (!ssl->options.dtls)
+                        SendAlert(ssl, alert_fatal, bad_record_mac);
+                #endif
+                    return DECRYPT_ERROR;
+                }
+                ssl->keys.encryptSz    = ssl->curSize;
+            }
+#endif
             ssl->options.processReply = decryptMessage;
-            startIdx = ssl->buffers.inputBuffer.idx;  /* in case > 1 msg per */
             FALL_THROUGH;
 
         /* decrypt message */
@@ -12738,28 +14893,60 @@
                 bufferStatic* in = &ssl->buffers.inputBuffer;
 
                 ret = SanityCheckCipherText(ssl, ssl->curSize);
-                if (ret < 0)
+                if (ret < 0) {
+                #ifdef WOLFSSL_EXTRA_ALERTS
+                    SendAlert(ssl, alert_fatal, bad_record_mac);
+                #endif
                     return ret;
+                }
 
                 if (atomicUser) {
-                #ifdef ATOMIC_USER
-                    ret = ssl->ctx->DecryptVerifyCb(ssl,
-                                  in->buffer + in->idx,
-                                  in->buffer + in->idx,
-                                  ssl->curSize, ssl->curRL.type, 1,
-                                  &ssl->keys.padSz, ssl->DecryptVerifyCtx);
-                #endif /* ATOMIC_USER */
+        #ifdef ATOMIC_USER
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                    if (ssl->options.startedETMRead) {
+                        ret = ssl->ctx->VerifyDecryptCb(ssl,
+                                     in->buffer + in->idx, in->buffer + in->idx,
+                                     ssl->curSize - MacSize(ssl),
+                                     ssl->curRL.type, 1, &ssl->keys.padSz,
+                                     ssl->DecryptVerifyCtx);
+                    }
+                    else
+            #endif
+                    {
+                        ret = ssl->ctx->DecryptVerifyCb(ssl,
+                                      in->buffer + in->idx,
+                                      in->buffer + in->idx,
+                                      ssl->curSize, ssl->curRL.type, 1,
+                                      &ssl->keys.padSz, ssl->DecryptVerifyCtx);
+                    }
+        #endif /* ATOMIC_USER */
                 }
                 else {
                     if (!ssl->options.tls1_3) {
-                #ifndef WOLFSSL_NO_TLS12
+        #ifndef WOLFSSL_NO_TLS12
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                    if (ssl->options.startedETMRead) {
+                        word32 digestSz = MacSize(ssl);
+                        ret = Decrypt(ssl,
+                                      in->buffer + in->idx,
+                                      in->buffer + in->idx,
+                                      ssl->curSize - digestSz);
+                         ssl->keys.padSz =
+                              in->buffer[in->idx + ssl->curSize - digestSz - 1];
+                         ssl->keys.padSz += 1;
+                         ssl->keys.decryptedCur = 1;
+                    }
+                    else
+            #endif
+                    {
                         ret = Decrypt(ssl,
                                       in->buffer + in->idx,
                                       in->buffer + in->idx,
                                       ssl->curSize);
-                #else
+                    }
+        #else
                         ret = DECRYPT_ERROR;
-                #endif
+        #endif
                     }
                     else
                     {
@@ -12790,29 +14977,40 @@
             #endif
 
                 if (ret >= 0) {
-                #ifndef WOLFSSL_NO_TLS12
+            #ifndef WOLFSSL_NO_TLS12
                     /* handle success */
+                #ifndef WOLFSSL_AEAD_ONLY
                     if (ssl->options.tls1_1 && ssl->specs.cipher_type == block)
                         ssl->buffers.inputBuffer.idx += ssl->specs.block_size;
+                #endif
                         /* go past TLSv1.1 IV */
                     if (CipherHasExpIV(ssl))
                         ssl->buffers.inputBuffer.idx += AESGCM_EXP_IV_SZ;
-                #endif
+            #endif
                 }
                 else {
                     WOLFSSL_MSG("Decrypt failed");
                     WOLFSSL_ERROR(ret);
                 #ifdef WOLFSSL_EARLY_DATA
                     if (ssl->options.tls1_3) {
-                        ssl->earlyDataSz += ssl->curSize;
-                        if (ssl->earlyDataSz <= ssl->options.maxEarlyDataSz) {
-                            if (ssl->keys.peer_sequence_number_lo-- == 0)
-                                ssl->keys.peer_sequence_number_hi--;
-                            ssl->options.processReply = doProcessInit;
-                            ssl->buffers.inputBuffer.idx =
-                                            ssl->buffers.inputBuffer.length;
-                            return 0;
-                        }
+                         if (ssl->options.side == WOLFSSL_SERVER_END &&
+                                 ssl->earlyData != no_early_data &&
+                                 ssl->options.clientState <
+                                                     CLIENT_FINISHED_COMPLETE) {
+                            ssl->earlyDataSz += ssl->curSize;
+                            if (ssl->earlyDataSz <=
+                                                  ssl->options.maxEarlyDataSz) {
+                                WOLFSSL_MSG("Ignoring EarlyData!");
+                                if (ssl->keys.peer_sequence_number_lo-- == 0)
+                                    ssl->keys.peer_sequence_number_hi--;
+                                ssl->options.processReply = doProcessInit;
+                                ssl->buffers.inputBuffer.idx =
+                                                ssl->buffers.inputBuffer.length;
+                                return 0;
+                            }
+                            WOLFSSL_MSG("Too much EarlyData!");
+                        }
+                        SendAlert(ssl, alert_fatal, bad_record_mac);
                     }
                 #endif
                 #ifdef WOLFSSL_DTLS
@@ -12827,7 +15025,6 @@
                         #endif /* WOLFSSL_DTLS_DROP_STATS */
                     }
                 #endif /* WOLFSSL_DTLS */
-
                     return DECRYPT_ERROR;
                 }
             }
@@ -12846,7 +15043,11 @@
                                          ssl->curRL.type != change_cipher_spec))
 #endif
             {
-                if (!atomicUser) {
+                if (!atomicUser
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                                && !ssl->options.startedETMRead
+#endif
+                    ) {
                     ret = VerifyMac(ssl, ssl->buffers.inputBuffer.buffer +
                                     ssl->buffers.inputBuffer.idx,
                                     ssl->curSize, ssl->curRL.type,
@@ -12858,7 +15059,7 @@
                     if (ret < 0) {
                         WOLFSSL_MSG("VerifyMac failed");
                         WOLFSSL_ERROR(ret);
-                        #ifdef WOLFSSL_DTLS
+                    #ifdef WOLFSSL_DTLS
                         /* If in DTLS mode, if the decrypt fails for any
                          * reason, pretend the datagram never happened. */
                         if (ssl->options.dtls) {
@@ -12869,7 +15070,11 @@
                                 ssl->macDropCount++;
                             #endif /* WOLFSSL_DTLS_DROP_STATS */
                         }
-                        #endif /* WOLFSSL_DTLS */
+                    #endif /* WOLFSSL_DTLS */
+                    #ifdef WOLFSSL_EXTRA_ALERTS
+                        if (!ssl->options.dtls)
+                            SendAlert(ssl, alert_fatal, bad_record_mac);
+                    #endif
                         return DECRYPT_ERROR;
                     }
                 }
@@ -12898,6 +15103,29 @@
         /* the record layer is here */
         case runProcessingOneMessage:
 
+       #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (IsEncryptionOn(ssl, 0) && ssl->options.startedETMRead) {
+                if (ssl->buffers.inputBuffer.length - ssl->keys.padSz -
+                                              ssl->buffers.inputBuffer.idx -
+                                              MacSize(ssl) > MAX_PLAINTEXT_SZ) {
+                    WOLFSSL_MSG("Plaintext too long - Encrypt-Then-MAC");
+            #if defined(WOLFSSL_EXTRA_ALERTS)
+                    SendAlert(ssl, alert_fatal, record_overflow);
+            #endif
+                    return BUFFER_ERROR;
+                }
+            }
+            else
+       #endif
+            if (ssl->buffers.inputBuffer.length - ssl->keys.padSz -
+                              ssl->buffers.inputBuffer.idx > MAX_PLAINTEXT_SZ) {
+                WOLFSSL_MSG("Plaintext too long");
+#if defined(WOLFSSL_TLS13) || defined(WOLFSSL_EXTRA_ALERTS)
+                SendAlert(ssl, alert_fatal, record_overflow);
+#endif
+                return BUFFER_ERROR;
+            }
+
         #ifdef WOLFSSL_DTLS
             if (IsDtlsNotSctpMode(ssl)) {
                 DtlsUpdateWindow(ssl);
@@ -12937,7 +15165,7 @@
                         if (ret != 0)
                             return ret;
                         if (ssl->options.side == WOLFSSL_SERVER_END &&
-                                ssl->earlyData &&
+                                ssl->earlyData > early_data_ext &&
                                 ssl->options.handShakeState == HANDSHAKE_DONE) {
                             ssl->earlyData = no_early_data;
                             ssl->options.processReply = doProcessInit;
@@ -12948,8 +15176,10 @@
                         ret = BUFFER_ERROR;
 #endif
                     }
-                    if (ret != 0)
+                    if (ret != 0) {
+                        WOLFSSL_ERROR(ret);
                         return ret;
+                    }
                     break;
 
                 case change_cipher_spec:
@@ -12979,6 +15209,10 @@
     #else
                     if (IsAtLeastTLSv1_3(ssl->version)) {
                         word32 i = ssl->buffers.inputBuffer.idx;
+                        if (ssl->options.handShakeState == HANDSHAKE_DONE) {
+                            SendAlert(ssl, alert_fatal, unexpected_message);
+                            return UNKNOWN_RECORD_TYPE;
+                        }
                         if (ssl->curSize != 1 ||
                                       ssl->buffers.inputBuffer.buffer[i] != 1) {
                             SendAlert(ssl, alert_fatal, illegal_parameter);
@@ -13024,6 +15258,13 @@
                     if (IsEncryptionOn(ssl, 0) && ssl->options.handShakeDone) {
                         ssl->buffers.inputBuffer.idx += ssl->keys.padSz;
                         ssl->curSize -= (word16) ssl->buffers.inputBuffer.idx;
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                        if (ssl->options.startedETMRead) {
+                            word32 digestSz = MacSize(ssl);
+                            ssl->buffers.inputBuffer.idx += digestSz;
+                            ssl->curSize -= digestSz;
+                        }
+            #endif
                     }
 
                     if (ssl->curSize != 1) {
@@ -13041,6 +15282,10 @@
                     if ((ret = SetKeysSide(ssl, DECRYPT_SIDE_ONLY)) != 0)
                         return ret;
 
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                    ssl->options.startedETMRead = ssl->options.encThenMac;
+            #endif
+
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
                             WOLFSSL_DTLS_PEERSEQ* peerSeq = ssl->keys.peerSeq;
@@ -13053,7 +15298,6 @@
                                         ssl->ctx->mcastMaxSeq);
                             }
 #endif
-                            DtlsMsgPoolReset(ssl);
                             peerSeq->nextEpoch++;
                             peerSeq->prevSeq_lo = peerSeq->nextSeq_lo;
                             peerSeq->prevSeq_hi = peerSeq->nextSeq_hi;
@@ -13094,7 +15338,7 @@
                     #endif
                     if ((ret = DoApplicationData(ssl,
                                                 ssl->buffers.inputBuffer.buffer,
-                                               &ssl->buffers.inputBuffer.idx))
+                                                &ssl->buffers.inputBuffer.idx))
                                                                          != 0) {
                         WOLFSSL_ERROR(ret);
                         return ret;
@@ -13138,12 +15382,29 @@
 
                 if (IsEncryptionOn(ssl, 0)) {
                     WOLFSSL_MSG("Bundled encrypted messages, remove middle pad");
-                    if (ssl->buffers.inputBuffer.idx >= ssl->keys.padSz) {
-                        ssl->buffers.inputBuffer.idx -= ssl->keys.padSz;
-                    }
-                    else {
-                        WOLFSSL_MSG("\tmiddle padding error");
-                        return FATAL_ERROR;
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                    if (ssl->options.startedETMRead) {
+                        word32 digestSz = MacSize(ssl);
+                        if (ssl->buffers.inputBuffer.idx >=
+                                                   ssl->keys.padSz + digestSz) {
+                            ssl->buffers.inputBuffer.idx -=
+                                                     ssl->keys.padSz + digestSz;
+                        }
+                        else {
+                            WOLFSSL_MSG("\tmiddle padding error");
+                            return FATAL_ERROR;
+                        }
+                    }
+                    else
+             #endif
+                    {
+                        if (ssl->buffers.inputBuffer.idx >= ssl->keys.padSz) {
+                            ssl->buffers.inputBuffer.idx -= ssl->keys.padSz;
+                        }
+                        else {
+                            WOLFSSL_MSG("\tmiddle padding error");
+                            return FATAL_ERROR;
+                        }
                     }
                 }
 
@@ -13172,18 +15433,18 @@
     int                ret;
 
     #ifdef OPENSSL_EXTRA
-	ssl->cbmode = SSL_CB_MODE_WRITE;
-	if (ssl->options.side == WOLFSSL_SERVER_END){
-		ssl->options.serverState = SERVER_CHANGECIPHERSPEC_COMPLETE;
-		if (ssl->CBIS != NULL)
-			ssl->CBIS(ssl, SSL_CB_ACCEPT_LOOP, SSL_SUCCESS);
-	}
-	else{
-		ssl->options.clientState =
-			CLIENT_CHANGECIPHERSPEC_COMPLETE;
-		if (ssl->CBIS != NULL)
-			ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
-	}
+    ssl->cbmode = SSL_CB_MODE_WRITE;
+    if (ssl->options.side == WOLFSSL_SERVER_END){
+        ssl->options.serverState = SERVER_CHANGECIPHERSPEC_COMPLETE;
+        if (ssl->CBIS != NULL)
+            ssl->CBIS(ssl, SSL_CB_ACCEPT_LOOP, SSL_SUCCESS);
+    }
+    else{
+        ssl->options.clientState =
+            CLIENT_CHANGECIPHERSPEC_COMPLETE;
+        if (ssl->CBIS != NULL)
+            ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
+    }
     #endif
 
     #ifdef WOLFSSL_DTLS
@@ -13198,7 +15459,7 @@
         sendSz += MAX_MSG_EXTRA;
     }
 
-    /* check for avalaible size */
+    /* check for available size */
     if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
         return ret;
 
@@ -13224,6 +15485,7 @@
 
     #ifdef WOLFSSL_DTLS
         if (IsDtlsNotSctpMode(ssl)) {
+            DtlsSEQIncrement(ssl, CUR_ORDER);
             if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
                 return ret;
         }
@@ -13250,7 +15512,7 @@
 }
 
 
-#ifndef NO_OLD_TLS
+#if !defined(NO_OLD_TLS) && !defined(WOLFSSL_AEAD_ONLY)
 static int SSL_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
                     int padLen, int content, int verify)
 {
@@ -13365,7 +15627,7 @@
     }
     return 0;
 }
-#endif /* NO_OLD_TLS */
+#endif /* !NO_OLD_TLS && !WOLFSSL_AEAD_ONLY */
 
 
 #ifndef NO_CERTS
@@ -13635,6 +15897,7 @@
             }
         #endif
 
+        #ifndef WOLFSSL_AEAD_ONLY
             if (ssl->specs.cipher_type == block) {
                 word32 blockSz = ssl->specs.block_size;
                 if (ssl->options.tls1_1) {
@@ -13645,13 +15908,21 @@
                         ERROR_OUT(BUFFER_E, exit_buildmsg);
                 }
                 args->sz += 1;       /* pad byte */
-                args->pad = (args->sz - args->headerSz) % blockSz;
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                if (ssl->options.startedETMWrite) {
+                    args->pad = (args->sz - args->headerSz -
+                                                      args->digestSz) % blockSz;
+                }
+                else
+            #endif
+                    args->pad = (args->sz - args->headerSz) % blockSz;
                 #ifdef OPENSSL_EXTRA
                 if(args->pad != 0)
                 #endif
                     args->pad = blockSz - args->pad;
                 args->sz += args->pad;
             }
+        #endif /* WOLFSSL_AEAD_ONLY */
 
         #ifdef HAVE_AEAD
             if (ssl->specs.cipher_type == aead) {
@@ -13681,13 +15952,15 @@
                     goto exit_buildmsg;
 
             }
-
-        #ifdef HAVE_AEAD
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    ((defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) && \
+    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) && \
+    defined(HAVE_AEAD))
             if (ssl->specs.cipher_type == aead) {
                 if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
                     XMEMCPY(args->iv, ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
             }
-        #endif
+#endif
 
             args->size = (word16)(args->sz - args->headerSz);    /* include mac and digest */
             AddRecordHeader(output, args->size, (byte)type, ssl);
@@ -13706,19 +15979,27 @@
         FALL_THROUGH;
         case BUILD_MSG_HASH:
         {
-            word32 i;
-
             if (type == handshake && hashOutput) {
                 ret = HashOutput(ssl, output, args->headerSz + inSz, args->ivSz);
                 if (ret != 0)
                     goto exit_buildmsg;
             }
+        #ifndef WOLFSSL_AEAD_ONLY
             if (ssl->specs.cipher_type == block) {
-                word32 tmpIdx = args->idx + args->digestSz;
+                word32 tmpIdx;
+                word32 i;
+
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                if (ssl->options.startedETMWrite)
+                    tmpIdx = args->idx;
+                else
+            #endif
+                    tmpIdx = args->idx + args->digestSz;
 
                 for (i = 0; i <= args->pad; i++)
                     output[tmpIdx++] = (byte)args->pad; /* pad byte gets pad value */
             }
+        #endif
 
             ssl->options.buildMsgState = BUILD_MSG_VERIFY_MAC;
         }
@@ -13726,45 +16007,72 @@
         case BUILD_MSG_VERIFY_MAC:
         {
             /* User Record Layer Callback handling */
-        #ifdef ATOMIC_USER
-            if (ssl->ctx->MacEncryptCb) {
-                ret = ssl->ctx->MacEncryptCb(ssl, output + args->idx,
-                                output + args->headerSz + args->ivSz, inSz, type, 0,
-                                output + args->headerSz, output + args->headerSz, args->size,
-                                ssl->MacEncryptCtx);
-                goto exit_buildmsg;
-            }
-        #endif
-
-            if (ssl->specs.cipher_type != aead) {
-        #ifdef HAVE_TRUNCATED_HMAC
-            if (ssl->truncated_hmac && ssl->specs.hash_size > args->digestSz) {
-            #ifdef WOLFSSL_SMALL_STACK
-                byte* hmac = NULL;
-            #else
-                byte  hmac[WC_MAX_DIGEST_SIZE];
-            #endif
-
-            #ifdef WOLFSSL_SMALL_STACK
-                hmac = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, ssl->heap,
-                                                       DYNAMIC_TYPE_DIGEST);
-                if (hmac == NULL)
-                    ERROR_OUT(MEMORY_E, exit_buildmsg);
-            #endif
-
-                ret = ssl->hmac(ssl, hmac, output + args->headerSz + args->ivSz,
-                                                             inSz, -1, type, 0);
-                XMEMCPY(output + args->idx, hmac, args->digestSz);
-
-            #ifdef WOLFSSL_SMALL_STACK
-                XFREE(hmac, ssl->heap, DYNAMIC_TYPE_DIGEST);
-            #endif
+    #ifdef ATOMIC_USER
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMWrite) {
+                if (ssl->ctx->EncryptMacCb) {
+                    ret = ssl->ctx->EncryptMacCb(ssl, output + args->idx +
+                                                 args->pad + 1, type, 0,
+                                                 output + args->headerSz,
+                                                 output + args->headerSz,
+                                                 args->size - args->digestSz,
+                                                 ssl->MacEncryptCtx);
+                    goto exit_buildmsg;
+                }
             }
             else
         #endif
-                ret = ssl->hmac(ssl, output + args->idx, output +
+            {
+                if (ssl->ctx->MacEncryptCb) {
+                    ret = ssl->ctx->MacEncryptCb(ssl, output + args->idx,
+                                    output + args->headerSz + args->ivSz, inSz,
+                                    type, 0, output + args->headerSz,
+                                    output + args->headerSz, args->size,
+                                    ssl->MacEncryptCtx);
+                    goto exit_buildmsg;
+                }
+            }
+    #endif
+
+        #ifndef WOLFSSL_AEAD_ONLY
+            if (ssl->specs.cipher_type != aead
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                                               && !ssl->options.startedETMWrite
+            #endif
+                ) {
+            #ifdef HAVE_TRUNCATED_HMAC
+                if (ssl->truncated_hmac &&
+                                        ssl->specs.hash_size > args->digestSz) {
+                #ifdef WOLFSSL_SMALL_STACK
+                    byte* hmac;
+                #else
+                    byte  hmac[WC_MAX_DIGEST_SIZE];
+                #endif
+
+                #ifdef WOLFSSL_SMALL_STACK
+                    hmac = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, ssl->heap,
+                                                           DYNAMIC_TYPE_DIGEST);
+                    if (hmac == NULL)
+                        ERROR_OUT(MEMORY_E, exit_buildmsg);
+                #endif
+
+                    ret = ssl->hmac(ssl, hmac,
+                                     output + args->headerSz + args->ivSz, inSz,
+                                     -1, type, 0);
+                    XMEMCPY(output + args->idx, hmac, args->digestSz);
+
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(hmac, ssl->heap, DYNAMIC_TYPE_DIGEST);
+                #endif
+                }
+                else
+            #endif
+                {
+                    ret = ssl->hmac(ssl, output + args->idx, output +
                                 args->headerSz + args->ivSz, inSz, -1, type, 0);
-            }
+                }
+            }
+        #endif /* WOLFSSL_AEAD_ONLY */
             if (ret != 0)
                 goto exit_buildmsg;
 
@@ -13773,9 +16081,65 @@
         FALL_THROUGH;
         case BUILD_MSG_ENCRYPT:
         {
-            ret = Encrypt(ssl, output + args->headerSz, output + args->headerSz, args->size,
-                asyncOkay);
-            break;
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMWrite) {
+                ret = Encrypt(ssl, output + args->headerSz,
+                                        output + args->headerSz,
+                                        args->size - args->digestSz, asyncOkay);
+            }
+            else
+    #endif
+            {
+                ret = Encrypt(ssl, output + args->headerSz,
+                                output + args->headerSz, args->size, asyncOkay);
+            }
+            if (ret != 0)
+                goto exit_buildmsg;
+            ssl->options.buildMsgState = BUILD_MSG_ENCRYPTED_VERIFY_MAC;
+        }
+        FALL_THROUGH;
+        case BUILD_MSG_ENCRYPTED_VERIFY_MAC:
+        {
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMWrite) {
+                WOLFSSL_MSG("Calculate MAC of Encrypted Data");
+
+            #ifdef HAVE_TRUNCATED_HMAC
+                if (ssl->truncated_hmac &&
+                                        ssl->specs.hash_size > args->digestSz) {
+                #ifdef WOLFSSL_SMALL_STACK
+                    byte* hmac = NULL;
+                #else
+                    byte  hmac[WC_MAX_DIGEST_SIZE];
+                #endif
+
+                #ifdef WOLFSSL_SMALL_STACK
+                    hmac = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, ssl->heap,
+                                                           DYNAMIC_TYPE_DIGEST);
+                    if (hmac == NULL)
+                        ERROR_OUT(MEMORY_E, exit_buildmsg);
+                #endif
+
+                    ret = ssl->hmac(ssl, hmac, output + args->headerSz,
+                                    args->ivSz + inSz + args->pad + 1, -1, type,
+                                    0);
+                    XMEMCPY(output + args->idx + args->pad + 1, hmac,
+                                                                args->digestSz);
+
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(hmac, ssl->heap, DYNAMIC_TYPE_DIGEST);
+                #endif
+                }
+                else
+            #endif
+                {
+                    ret = ssl->hmac(ssl, output + args->idx + args->pad + 1,
+                                    output + args->headerSz,
+                                    args->ivSz + inSz + args->pad + 1, -1, type,
+                                    0);
+                }
+            }
+        #endif /* HAVE_ENCRYPT_THEN_MAC && !WOLFSSL_AEAD_ONLY */
         }
     }
 
@@ -13832,6 +16196,10 @@
     if ((ret = SetKeysSide(ssl, ENCRYPT_SIDE_ONLY)) != 0)
         return ret;
 
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        ssl->options.startedETMWrite = ssl->options.encThenMac;
+    #endif
+
     /* check for available size */
     outputSz = sizeof(input) + MAX_MSG_EXTRA;
     if ((ret = CheckAvailableSize(ssl, outputSz)) != 0)
@@ -13936,11 +16304,19 @@
         (defined(HAVE_CERTIFICATE_STATUS_REQUEST) || \
          defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2))) || \
     (defined(WOLFSSL_TLS13) && defined(HAVE_CERTIFICATE_STATUS_REQUEST))
+/* Parses and decodes the certificate then initializes "request". In the case
+ * of !ssl->buffers.weOwnCert, ssl->ctx->certOcspRequest gets set to "request".
+ *
+ * Returns 0 on success
+ */
 static int CreateOcspRequest(WOLFSSL* ssl, OcspRequest* request,
                              DecodedCert* cert, byte* certData, word32 length)
 {
     int ret;
 
+    if (request != NULL)
+        XMEMSET(request, 0, sizeof(OcspRequest));
+
     InitDecodedCert(cert, certData, length, ssl->heap);
     /* TODO: Setup async support here */
     ret = ParseCertRelative(cert, CERT_TYPE, VERIFY, ssl->ctx->cm);
@@ -13967,19 +16343,30 @@
 }
 
 
+/* Creates OCSP response and places it in variable "response". Memory
+ * management for "buffer* response" is up to the caller.
+ *
+ * Also creates an OcspRequest in the case that ocspRequest is null or that
+ * ssl->buffers.weOwnCert is set. In those cases managing ocspRequest free'ing
+ * is up to the caller. NOTE: in OcspCreateRequest ssl->ctx->certOcspRequest can
+ * be set to point to "ocspRequest" and it then should not be free'd since
+ * wolfSSL_CTX_free will take care of it.
+ *
+ * Returns 0 on success
+ */
 int CreateOcspResponse(WOLFSSL* ssl, OcspRequest** ocspRequest,
                        buffer* response)
 {
     int          ret = 0;
-    OcspRequest* request;
+    OcspRequest* request = NULL;
+    byte createdRequest  = 0;
 
     if (ssl == NULL || ocspRequest == NULL || response == NULL)
         return BAD_FUNC_ARG;
 
+    XMEMSET(response, 0, sizeof(*response));
     request = *ocspRequest;
 
-    XMEMSET(response, 0, sizeof(*response));
-
     /* unable to fetch status. skip. */
     if (ssl->ctx->cm == NULL || ssl->ctx->cm->ocspStaplingEnabled == 0)
         return 0;
@@ -14007,13 +16394,17 @@
         if (request == NULL)
             ret = MEMORY_E;
 
+        createdRequest = 1;
         if (ret == 0) {
             ret = CreateOcspRequest(ssl, request, cert, der->buffer,
                                                                    der->length);
         }
 
-        if (request != NULL)
+        if (ret != 0) {
             XFREE(request, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+            request = NULL;
+        }
+
     #ifdef WOLFSSL_SMALL_STACK
         XFREE(cert, ssl->heap, DYNAMIC_TYPE_DCERT);
     #endif
@@ -14031,7 +16422,14 @@
         }
     }
 
-    *ocspRequest = request;
+    /* free request up if error case found otherwise return it */
+    if (ret != 0 && createdRequest) {
+        FreeOcspRequest(request);
+        XFREE(request, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+    }
+
+    if (ret == 0)
+        *ocspRequest = request;
 
     return ret;
 }
@@ -14347,12 +16745,16 @@
 
     sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ + reqSz;
 
+    if (!ssl->options.dtls) {
+        if (IsEncryptionOn(ssl, 1))
+            sendSz += MAX_MSG_EXTRA;
+    }
+    else {
     #ifdef WOLFSSL_DTLS
-        if (ssl->options.dtls) {
-            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-            i      += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-        }
-    #endif
+        sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+        i      += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+    #endif
+    }
     /* check for available size */
     if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
         return ret;
@@ -14405,18 +16807,34 @@
 #endif
     (void)i;
 
-    #ifdef WOLFSSL_DTLS
-        if (IsDtlsNotSctpMode(ssl)) {
-            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+        if (IsEncryptionOn(ssl, 1)) {
+            byte* input;
+            int   inputSz = i - RECORD_HEADER_SZ; /* build msg adds rec hdr */
+
+            input = (byte*)XMALLOC(inputSz, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+            if (input == NULL)
+                return MEMORY_E;
+
+            XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
+            sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
+                                  handshake, 1, 0, 0);
+            XFREE(input, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+
+            if (sendSz < 0)
+                return sendSz;
+        } else {
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+                if (IsDtlsNotSctpMode(ssl)) {
+                    if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                        return ret;
+                }
+            #endif
+            ret = HashOutput(ssl, output, sendSz, 0);
+            if (ret != 0)
                 return ret;
         }
-        if (ssl->options.dtls)
-            DtlsSEQIncrement(ssl, CUR_ORDER);
-    #endif
-
-    ret = HashOutput(ssl, output, sendSz, 0);
-    if (ret != 0)
-        return ret;
 
     #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
         if (ssl->hsInfoOn)
@@ -14573,13 +16991,21 @@
             buffer response;
 
             ret = CreateOcspResponse(ssl, &request, &response);
+
+            /* if a request was successfully created and not stored in
+             * ssl->ctx then free it */
+            if (ret == 0 && request != ssl->ctx->certOcspRequest) {
+                FreeOcspRequest(request);
+                XFREE(request, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+                request = NULL;
+            }
+
             if (ret == 0 && response.buffer) {
                 ret = BuildCertificateStatus(ssl, status_type, &response, 1);
 
                 XFREE(response.buffer, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
                 response.buffer = NULL;
             }
-
             break;
         }
 
@@ -14596,12 +17022,21 @@
             XMEMSET(responses, 0, sizeof(responses));
 
             ret = CreateOcspResponse(ssl, &request, &responses[0]);
+
+            /* if a request was successfully created and not stored in
+             * ssl->ctx then free it */
+            if (ret == 0 && request != ssl->ctx->certOcspRequest) {
+                FreeOcspRequest(request);
+                XFREE(request, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+                request = NULL;
+            }
+
             if (ret == 0 && (!ssl->ctx->chainOcspRequest[0]
                                               || ssl->buffers.weOwnCertChain)) {
                 buffer der;
                 word32 idx = 0;
             #ifdef WOLFSSL_SMALL_STACK
-                DecodedCert* cert = NULL;
+                DecodedCert* cert;
             #else
                 DecodedCert  cert[1];
             #endif
@@ -14647,6 +17082,7 @@
 
 
                         i++;
+                        FreeOcspRequest(request);
                     }
                 }
 
@@ -14704,12 +17140,30 @@
 
 #endif /* WOLFSSL_NO_TLS12 */
 
+
+/* If secure renegotiation is disabled, this will always return false.
+ * Otherwise it checks to see if we are currently renegotiating. */
+static WC_INLINE int IsSCR(WOLFSSL* ssl)
+{
+#ifndef HAVE_SECURE_RENEGOTIATION
+    (void)ssl;
+#else /* HAVE_SECURE_RENEGOTIATION */
+    if (ssl->secure_renegotiation &&
+            ssl->secure_renegotiation->enabled &&
+            ssl->options.handShakeState != HANDSHAKE_DONE)
+        return 1;
+#endif /* HAVE_SECURE_RENEGOTIATION */
+    return 0;
+}
+
+
 int SendData(WOLFSSL* ssl, const void* data, int sz)
 {
     int sent = 0,  /* plainText size */
         sendSz,
         ret,
         dtlsExtra = 0;
+    int groupMsgs = 0;
 
     if (ssl->error == WANT_WRITE
     #ifdef WOLFSSL_ASYNC_CRYPT
@@ -14719,14 +17173,18 @@
         ssl->error = 0;
     }
 
-#ifdef WOLFSSL_DTLS
-    if (ssl->options.dtls) {
-        /* In DTLS mode, we forgive some errors and allow the session
-         * to continue despite them. */
-        if (ssl->error == VERIFY_MAC_ERROR || ssl->error == DECRYPT_ERROR)
+    /* don't allow write after decrypt or mac error */
+    if (ssl->error == VERIFY_MAC_ERROR || ssl->error == DECRYPT_ERROR) {
+        /* For DTLS allow these possible errors and allow the session
+            to continue despite them */
+        if (ssl->options.dtls) {
             ssl->error = 0;
-    }
-#endif /* WOLFSSL_DTLS */
+        }
+        else {
+            WOLFSSL_MSG("Not allowing write after decrypt or mac error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+    }
 
 #ifdef WOLFSSL_EARLY_DATA
     if (ssl->earlyData != no_early_data) {
@@ -14734,10 +17192,13 @@
             WOLFSSL_MSG("handshake complete, trying to send early data");
             return BUILD_MSG_ERROR;
         }
-    }
-    else
-#endif
-    if (ssl->options.handShakeState != HANDSHAKE_DONE) {
+    #ifdef WOLFSSL_EARLY_DATA_GROUP
+        groupMsgs = 1;
+    #endif
+    }
+    else
+#endif
+    if (ssl->options.handShakeState != HANDSHAKE_DONE && !IsSCR(ssl)) {
         int err;
         WOLFSSL_MSG("handshake not complete, trying to finish");
         if ( (err = wolfSSL_negotiate(ssl)) != WOLFSSL_SUCCESS) {
@@ -14752,7 +17213,7 @@
     }
 
     /* last time system socket output buffer was full, try again to send */
-    if (ssl->buffers.outputBuffer.length > 0) {
+    if (!groupMsgs && ssl->buffers.outputBuffer.length > 0) {
         WOLFSSL_MSG("output buffer was full, trying to send again");
         if ( (ssl->error = SendBuffered(ssl)) < 0) {
             WOLFSSL_ERROR(ssl->error);
@@ -14862,7 +17323,7 @@
 
         /* only one message per attempt */
         if (ssl->options.partialWrite == 1) {
-            WOLFSSL_MSG("Paritial Write on, only sending one record");
+            WOLFSSL_MSG("Partial Write on, only sending one record");
             break;
         }
     }
@@ -14923,10 +17384,10 @@
 startScr:
     if (ssl->secure_renegotiation && ssl->secure_renegotiation->startScr) {
         int err;
-        ssl->secure_renegotiation->startScr = 0;  /* only start once */
         WOLFSSL_MSG("Need to start scr, server requested");
         if ( (err = wolfSSL_Rehandshake(ssl)) != WOLFSSL_SUCCESS)
             return  err;
+        ssl->secure_renegotiation->startScr = 0;  /* only start once */
     }
 #endif
 
@@ -14986,6 +17447,8 @@
     int  outputSz;
     int  dtlsExtra = 0;
 
+    WOLFSSL_ENTER("SendAlert");
+
 #ifdef HAVE_WRITE_DUP
     if (ssl->dupWrite && ssl->dupSide == READ_DUP_SIDE) {
         int notifyErr = 0;
@@ -15047,9 +17510,11 @@
 
     /* only send encrypted alert if handshake actually complete, otherwise
        other side may not be able to handle it */
-    if (IsEncryptionOn(ssl, 1) && ssl->options.handShakeDone)
-        sendSz = BuildMessage(ssl, output, outputSz, input, ALERT_SIZE,
-                                                          alert, 0, 0, 0);
+    if (IsEncryptionOn(ssl, 1) && (IsAtLeastTLSv1_3(ssl->version) ||
+                                                  ssl->options.handShakeDone)) {
+        sendSz = BuildMessage(ssl, output, outputSz, input, ALERT_SIZE, alert,
+                                                                       0, 0, 0);
+    }
     else {
 
         AddRecordHeader(output, ALERT_SIZE, alert, ssl);
@@ -15080,7 +17545,11 @@
     ssl->buffers.outputBuffer.length += sendSz;
     ssl->options.sendAlertState = 1;
 
-    return SendBuffered(ssl);
+    ret = SendBuffered(ssl);
+
+    WOLFSSL_LEAVE("SendAlert", ret);
+
+    return ret;
 }
 
 const char* wolfSSL_ERR_reason_error_string(unsigned long e)
@@ -15152,7 +17621,7 @@
         return "error during decryption";
 
     case FATAL_ERROR :
-        return "revcd alert fatal error";
+        return "received alert fatal error";
 
     case ENCRYPT_ERROR :
         return "error during encryption";
@@ -15187,6 +17656,9 @@
     case DOMAIN_NAME_MISMATCH :
         return "peer subject name mismatch";
 
+    case IPADDR_MISMATCH :
+        return "peer ip address mismatch";
+
     case WANT_READ :
     case WOLFSSL_ERROR_WANT_READ :
         return "non-blocking socket wants data to be read";
@@ -15231,15 +17703,6 @@
     case NTRU_DECRYPT_ERROR:
         return "NTRU decrypt error";
 
-    case ZLIB_INIT_ERROR:
-        return "zlib init error";
-
-    case ZLIB_COMPRESS_ERROR:
-        return "zlib compress error";
-
-    case ZLIB_DECOMPRESS_ERROR:
-        return "zlib decompress error";
-
     case GETTIME_ERROR:
         return "gettimeofday() error";
 
@@ -15376,9 +17839,6 @@
     case SESSION_TICKET_EXPECT_E:
         return "Session Ticket Error";
 
-    case SCR_DIFFERENT_CERT_E:
-        return "Peer sent different cert during SCR";
-
     case SESSION_SECRET_CB_E:
         return "Session Secret Callback Error";
 
@@ -15464,7 +17924,7 @@
         return "Certificate context does not match request or not empty";
 
     case BAD_KEY_SHARE_DATA:
-        return "The Key Share data contains group that was in Client Hello";
+        return "The Key Share data contains group that wasn't in Client Hello";
 
     case MISSING_HANDSHAKE_DATA:
         return "The handshake message is missing required data";
@@ -15481,6 +17941,9 @@
     case UNSUPPORTED_EXTENSION:
         return "TLS Extension not requested by the client";
 
+    case PRF_MISSING:
+        return "Pseudo-random function is not enabled";
+
     case KEY_SHARE_ERROR:
         return "Key share extension did not contain a valid named group";
 
@@ -15499,6 +17962,30 @@
     case EXT_MISSING:
         return "Required TLS extension missing";
 
+    case DTLS_RETX_OVER_TX:
+        return "DTLS interrupting flight transmit with retransmit";
+
+    case DH_PARAMS_NOT_FFDHE_E:
+        return "Server DH parameters were not from the FFDHE set as required";
+
+    case TCA_INVALID_ID_TYPE:
+        return "TLS Extension Trusted CA ID type invalid";
+
+    case TCA_ABSENT_ERROR:
+        return "TLS Extension Trusted CA ID response absent";
+
+    case TSIP_MAC_DIGSZ_E:
+        return "TSIP MAC size invalid, must be sized for SHA-1 or SHA-256";
+
+    case CLIENT_CERT_CB_ERROR:
+        return "Error importing client cert or key from callback";
+
+    case SSL_SHUTDOWN_ALREADY_DONE_E:
+        return "Shutdown has already occurred";
+
+    case TLS13_SECRET_CB_E:
+        return "TLS1.3 Secret Callback Error";
+
     default :
         return "unknown error number";
     }
@@ -15509,487 +17996,497 @@
 void SetErrorString(int error, char* str)
 {
     XSTRNCPY(str, wolfSSL_ERR_reason_error_string(error), WOLFSSL_MAX_ERROR_SZ);
+    str[WOLFSSL_MAX_ERROR_SZ-1] = 0;
 }
 
 #ifndef NO_ERROR_STRINGS
-    #define SUITE_INFO(x,y,z,w) {(x),(y),(z),(w)}
-#else
-    #define SUITE_INFO(x,y,z,w) {(x),(z),(w)}
+    #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+        #define SUITE_INFO(x,y,z,w,v,u) {(x),(y),(z),(w),(v),(u)}
+    #else
+        #define SUITE_INFO(x,y,z,w,v,u) {(x),(y),(z),(w)}
+    #endif
+#else
+    #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+        #define SUITE_INFO(x,y,z,w,v,u) {(x),(z),(w),(v),(u)}
+    #else
+        #define SUITE_INFO(x,y,z,w,v,u) {(x),(z),(w)}
+    #endif
 #endif
 
 static const CipherSuiteInfo cipher_names[] =
 {
+
+#ifdef BUILD_TLS_AES_128_GCM_SHA256
+    SUITE_INFO("TLS13-AES128-GCM-SHA256","TLS_AES_128_GCM_SHA256",TLS13_BYTE,TLS_AES_128_GCM_SHA256, TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_AES_256_GCM_SHA384
+    SUITE_INFO("TLS13-AES256-GCM-SHA384","TLS_AES_256_GCM_SHA384",TLS13_BYTE,TLS_AES_256_GCM_SHA384, TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_CHACHA20_POLY1305_SHA256
+    SUITE_INFO("TLS13-CHACHA20-POLY1305-SHA256","TLS_CHACHA20_POLY1305_SHA256",TLS13_BYTE,TLS_CHACHA20_POLY1305_SHA256, TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_AES_128_CCM_SHA256
+    SUITE_INFO("TLS13-AES128-CCM-SHA256","TLS_AES_128_CCM_SHA256",TLS13_BYTE,TLS_AES_128_CCM_SHA256, TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_AES_128_CCM_8_SHA256
+    SUITE_INFO("TLS13-AES128-CCM-8-SHA256","TLS_AES_128_CCM_8_SHA256",TLS13_BYTE,TLS_AES_128_CCM_8_SHA256,TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_SHA256_SHA256
+    SUITE_INFO("TLS13-SHA256-SHA256","TLS_SHA256_SHA256",ECC_BYTE,TLS_SHA256_SHA256,TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_SHA384_SHA384
+    SUITE_INFO("TLS13-SHA384-SHA384","TLS_SHA384_SHA384",ECC_BYTE,TLS_SHA384_SHA384,TLSv1_3_MINOR, SSLv3_MAJOR),
+#endif
+
 #ifndef WOLFSSL_NO_TLS12
 
 #ifdef BUILD_SSL_RSA_WITH_RC4_128_SHA
-    SUITE_INFO("RC4-SHA","SSL_RSA_WITH_RC4_128_SHA",CIPHER_BYTE,SSL_RSA_WITH_RC4_128_SHA),
+    SUITE_INFO("RC4-SHA","SSL_RSA_WITH_RC4_128_SHA",CIPHER_BYTE,SSL_RSA_WITH_RC4_128_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_SSL_RSA_WITH_RC4_128_MD5
-    SUITE_INFO("RC4-MD5","SSL_RSA_WITH_RC4_128_MD5",CIPHER_BYTE,SSL_RSA_WITH_RC4_128_MD5),
+    SUITE_INFO("RC4-MD5","SSL_RSA_WITH_RC4_128_MD5",CIPHER_BYTE,SSL_RSA_WITH_RC4_128_MD5,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_SSL_RSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("DES-CBC3-SHA","SSL_RSA_WITH_3DES_EDE_CBC_SHA",CIPHER_BYTE,SSL_RSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("DES-CBC3-SHA","SSL_RSA_WITH_3DES_EDE_CBC_SHA",CIPHER_BYTE,SSL_RSA_WITH_3DES_EDE_CBC_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("AES128-SHA","TLS_RSA_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("AES128-SHA","TLS_RSA_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_AES_128_CBC_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("AES256-SHA","TLS_RSA_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("AES256-SHA","TLS_RSA_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_AES_256_CBC_SHA,SSLv3_MINOR,SSLv3_MAJOR),
+#endif
+
+#ifdef BUILD_TLS_RSA_WITH_NULL_MD5
+    SUITE_INFO("NULL-MD5","TLS_RSA_WITH_NULL_MD5",CIPHER_BYTE,TLS_RSA_WITH_NULL_MD5,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_NULL_SHA
-    SUITE_INFO("NULL-SHA","TLS_RSA_WITH_NULL_SHA",CIPHER_BYTE,TLS_RSA_WITH_NULL_SHA),
+    SUITE_INFO("NULL-SHA","TLS_RSA_WITH_NULL_SHA",CIPHER_BYTE,TLS_RSA_WITH_NULL_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_NULL_SHA256
-    SUITE_INFO("NULL-SHA256","TLS_RSA_WITH_NULL_SHA256",CIPHER_BYTE,TLS_RSA_WITH_NULL_SHA256),
+    SUITE_INFO("NULL-SHA256","TLS_RSA_WITH_NULL_SHA256",CIPHER_BYTE,TLS_RSA_WITH_NULL_SHA256,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("DHE-RSA-AES128-SHA","TLS_DHE_RSA_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("DHE-RSA-AES128-SHA","TLS_DHE_RSA_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("DHE-RSA-AES256-SHA","TLS_DHE_RSA_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("DHE-RSA-AES256-SHA","TLS_DHE_RSA_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("DHE-PSK-AES256-GCM-SHA384","TLS_DHE_PSK_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("DHE-PSK-AES256-GCM-SHA384","TLS_DHE_PSK_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_256_GCM_SHA384,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("DHE-PSK-AES128-GCM-SHA256","TLS_DHE_PSK_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("DHE-PSK-AES128-GCM-SHA256","TLS_DHE_PSK_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_128_GCM_SHA256,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("PSK-AES256-GCM-SHA384","TLS_PSK_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_PSK_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("PSK-AES256-GCM-SHA384","TLS_PSK_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_PSK_WITH_AES_256_GCM_SHA384,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("PSK-AES128-GCM-SHA256","TLS_PSK_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_PSK_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("PSK-AES128-GCM-SHA256","TLS_PSK_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_PSK_WITH_AES_128_GCM_SHA256,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384
-    SUITE_INFO("DHE-PSK-AES256-CBC-SHA384","TLS_DHE_PSK_WITH_AES_256_CBC_SHA384",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_256_CBC_SHA384),
+    SUITE_INFO("DHE-PSK-AES256-CBC-SHA384","TLS_DHE_PSK_WITH_AES_256_CBC_SHA384",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_256_CBC_SHA384,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("DHE-PSK-AES128-CBC-SHA256","TLS_DHE_PSK_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("DHE-PSK-AES128-CBC-SHA256","TLS_DHE_PSK_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_DHE_PSK_WITH_AES_128_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_256_CBC_SHA384
-    SUITE_INFO("PSK-AES256-CBC-SHA384","TLS_PSK_WITH_AES_256_CBC_SHA384",CIPHER_BYTE,TLS_PSK_WITH_AES_256_CBC_SHA384),
+    SUITE_INFO("PSK-AES256-CBC-SHA384","TLS_PSK_WITH_AES_256_CBC_SHA384",CIPHER_BYTE,TLS_PSK_WITH_AES_256_CBC_SHA384,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("PSK-AES128-CBC-SHA256","TLS_PSK_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_PSK_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("PSK-AES128-CBC-SHA256","TLS_PSK_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_PSK_WITH_AES_128_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_128_CBC_SHA
-    SUITE_INFO("PSK-AES128-CBC-SHA","TLS_PSK_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_PSK_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("PSK-AES128-CBC-SHA","TLS_PSK_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_PSK_WITH_AES_128_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_256_CBC_SHA
-    SUITE_INFO("PSK-AES256-CBC-SHA","TLS_PSK_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_PSK_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("PSK-AES256-CBC-SHA","TLS_PSK_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_PSK_WITH_AES_256_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_128_CCM
-    SUITE_INFO("DHE-PSK-AES128-CCM","TLS_DHE_PSK_WITH_AES_128_CCM",ECC_BYTE,TLS_DHE_PSK_WITH_AES_128_CCM),
+    SUITE_INFO("DHE-PSK-AES128-CCM","TLS_DHE_PSK_WITH_AES_128_CCM",ECC_BYTE,TLS_DHE_PSK_WITH_AES_128_CCM,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_AES_256_CCM
-    SUITE_INFO("DHE-PSK-AES256-CCM","TLS_DHE_PSK_WITH_AES_256_CCM",ECC_BYTE,TLS_DHE_PSK_WITH_AES_256_CCM),
+    SUITE_INFO("DHE-PSK-AES256-CCM","TLS_DHE_PSK_WITH_AES_256_CCM",ECC_BYTE,TLS_DHE_PSK_WITH_AES_256_CCM,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_128_CCM
-    SUITE_INFO("PSK-AES128-CCM","TLS_PSK_WITH_AES_128_CCM",ECC_BYTE,TLS_PSK_WITH_AES_128_CCM),
+    SUITE_INFO("PSK-AES128-CCM","TLS_PSK_WITH_AES_128_CCM",ECC_BYTE,TLS_PSK_WITH_AES_128_CCM,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_256_CCM
-    SUITE_INFO("PSK-AES256-CCM","TLS_PSK_WITH_AES_256_CCM",ECC_BYTE,TLS_PSK_WITH_AES_256_CCM),
+    SUITE_INFO("PSK-AES256-CCM","TLS_PSK_WITH_AES_256_CCM",ECC_BYTE,TLS_PSK_WITH_AES_256_CCM,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_128_CCM_8
-    SUITE_INFO("PSK-AES128-CCM-8","TLS_PSK_WITH_AES_128_CCM_8",ECC_BYTE,TLS_PSK_WITH_AES_128_CCM_8),
+    SUITE_INFO("PSK-AES128-CCM-8","TLS_PSK_WITH_AES_128_CCM_8",ECC_BYTE,TLS_PSK_WITH_AES_128_CCM_8,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_AES_256_CCM_8
-    SUITE_INFO("PSK-AES256-CCM-8","TLS_PSK_WITH_AES_256_CCM_8",ECC_BYTE,TLS_PSK_WITH_AES_256_CCM_8),
+    SUITE_INFO("PSK-AES256-CCM-8","TLS_PSK_WITH_AES_256_CCM_8",ECC_BYTE,TLS_PSK_WITH_AES_256_CCM_8,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_NULL_SHA384
-    SUITE_INFO("DHE-PSK-NULL-SHA384","TLS_DHE_PSK_WITH_NULL_SHA384",CIPHER_BYTE,TLS_DHE_PSK_WITH_NULL_SHA384),
+    SUITE_INFO("DHE-PSK-NULL-SHA384","TLS_DHE_PSK_WITH_NULL_SHA384",CIPHER_BYTE,TLS_DHE_PSK_WITH_NULL_SHA384,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_NULL_SHA256
-    SUITE_INFO("DHE-PSK-NULL-SHA256","TLS_DHE_PSK_WITH_NULL_SHA256",CIPHER_BYTE,TLS_DHE_PSK_WITH_NULL_SHA256),
+    SUITE_INFO("DHE-PSK-NULL-SHA256","TLS_DHE_PSK_WITH_NULL_SHA256",CIPHER_BYTE,TLS_DHE_PSK_WITH_NULL_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_NULL_SHA384
-    SUITE_INFO("PSK-NULL-SHA384","TLS_PSK_WITH_NULL_SHA384",CIPHER_BYTE,TLS_PSK_WITH_NULL_SHA384),
+    SUITE_INFO("PSK-NULL-SHA384","TLS_PSK_WITH_NULL_SHA384",CIPHER_BYTE,TLS_PSK_WITH_NULL_SHA384,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_NULL_SHA256
-    SUITE_INFO("PSK-NULL-SHA256","TLS_PSK_WITH_NULL_SHA256",CIPHER_BYTE,TLS_PSK_WITH_NULL_SHA256),
+    SUITE_INFO("PSK-NULL-SHA256","TLS_PSK_WITH_NULL_SHA256",CIPHER_BYTE,TLS_PSK_WITH_NULL_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_NULL_SHA
-    SUITE_INFO("PSK-NULL-SHA","TLS_PSK_WITH_NULL_SHA",CIPHER_BYTE,TLS_PSK_WITH_NULL_SHA),
+    SUITE_INFO("PSK-NULL-SHA","TLS_PSK_WITH_NULL_SHA",CIPHER_BYTE,TLS_PSK_WITH_NULL_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_HC_128_MD5
-    SUITE_INFO("HC128-MD5","TLS_RSA_WITH_HC_128_MD5",CIPHER_BYTE,TLS_RSA_WITH_HC_128_MD5),
+    SUITE_INFO("HC128-MD5","TLS_RSA_WITH_HC_128_MD5",CIPHER_BYTE,TLS_RSA_WITH_HC_128_MD5,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_HC_128_SHA
-    SUITE_INFO("HC128-SHA","TLS_RSA_WITH_HC_128_SHA",CIPHER_BYTE,TLS_RSA_WITH_HC_128_SHA),
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_HC_128_B2B256
-    SUITE_INFO("HC128-B2B256","TLS_RSA_WITH_HC_128_B2B256",CIPHER_BYTE,TLS_RSA_WITH_HC_128_B2B256),
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_B2B256
-    SUITE_INFO("AES128-B2B256","TLS_RSA_WITH_AES_128_CBC_B2B256",CIPHER_BYTE,TLS_RSA_WITH_AES_128_CBC_B2B256),
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_B2B256
-    SUITE_INFO("AES256-B2B256","TLS_RSA_WITH_AES_256_CBC_B2B256",CIPHER_BYTE,TLS_RSA_WITH_AES_256_CBC_B2B256),
+    SUITE_INFO("HC128-SHA","TLS_RSA_WITH_HC_128_SHA",CIPHER_BYTE,TLS_RSA_WITH_HC_128_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_RABBIT_SHA
-    SUITE_INFO("RABBIT-SHA","TLS_RSA_WITH_RABBIT_SHA",CIPHER_BYTE,TLS_RSA_WITH_RABBIT_SHA),
+    SUITE_INFO("RABBIT-SHA","TLS_RSA_WITH_RABBIT_SHA",CIPHER_BYTE,TLS_RSA_WITH_RABBIT_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_NTRU_RSA_WITH_RC4_128_SHA
-    SUITE_INFO("NTRU-RC4-SHA","TLS_NTRU_RSA_WITH_RC4_128_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_RC4_128_SHA),
+    SUITE_INFO("NTRU-RC4-SHA","TLS_NTRU_RSA_WITH_RC4_128_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_RC4_128_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_NTRU_RSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("NTRU-DES-CBC3-SHA","TLS_NTRU_RSA_WITH_3DES_EDE_CBC_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("NTRU-DES-CBC3-SHA","TLS_NTRU_RSA_WITH_3DES_EDE_CBC_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_3DES_EDE_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_NTRU_RSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("NTRU-AES128-SHA","TLS_NTRU_RSA_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("NTRU-AES128-SHA","TLS_NTRU_RSA_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_AES_128_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_NTRU_RSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("NTRU-AES256-SHA","TLS_NTRU_RSA_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("NTRU-AES256-SHA","TLS_NTRU_RSA_WITH_AES_256_CBC_SHA",CIPHER_BYTE,TLS_NTRU_RSA_WITH_AES_256_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_128_CCM_8
-    SUITE_INFO("AES128-CCM-8","TLS_RSA_WITH_AES_128_CCM_8",ECC_BYTE,TLS_RSA_WITH_AES_128_CCM_8),
+    SUITE_INFO("AES128-CCM-8","TLS_RSA_WITH_AES_128_CCM_8",ECC_BYTE,TLS_RSA_WITH_AES_128_CCM_8, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_256_CCM_8
-    SUITE_INFO("AES256-CCM-8","TLS_RSA_WITH_AES_256_CCM_8",ECC_BYTE,TLS_RSA_WITH_AES_256_CCM_8),
+    SUITE_INFO("AES256-CCM-8","TLS_RSA_WITH_AES_256_CCM_8",ECC_BYTE,TLS_RSA_WITH_AES_256_CCM_8, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CCM
-	SUITE_INFO("ECDHE-ECDSA-AES128-CCM","TLS_ECDHE_ECDSA_WITH_AES_128_CCM",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CCM),
+	SUITE_INFO("ECDHE-ECDSA-AES128-CCM","TLS_ECDHE_ECDSA_WITH_AES_128_CCM",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CCM, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8
-    SUITE_INFO("ECDHE-ECDSA-AES128-CCM-8","TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8),
+    SUITE_INFO("ECDHE-ECDSA-AES128-CCM-8","TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8
-    SUITE_INFO("ECDHE-ECDSA-AES256-CCM-8","TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8),
+    SUITE_INFO("ECDHE-ECDSA-AES256-CCM-8","TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("ECDHE-RSA-AES128-SHA","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("ECDHE-RSA-AES128-SHA","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("ECDHE-RSA-AES256-SHA","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("ECDHE-RSA-AES256-SHA","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("ECDHE-ECDSA-AES128-SHA","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("ECDHE-ECDSA-AES128-SHA","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("ECDHE-ECDSA-AES256-SHA","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("ECDHE-ECDSA-AES256-SHA","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_RC4_128_SHA
-    SUITE_INFO("ECDHE-RSA-RC4-SHA","TLS_ECDHE_RSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_RC4_128_SHA),
+    SUITE_INFO("ECDHE-RSA-RC4-SHA","TLS_ECDHE_RSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_RC4_128_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("ECDHE-RSA-DES-CBC3-SHA","TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("ECDHE-RSA-DES-CBC3-SHA","TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA
-    SUITE_INFO("ECDHE-ECDSA-RC4-SHA","TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_RC4_128_SHA),
+    SUITE_INFO("ECDHE-ECDSA-RC4-SHA","TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("ECDHE-ECDSA-DES-CBC3-SHA","TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("ECDHE-ECDSA-DES-CBC3-SHA","TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("AES128-SHA256","TLS_RSA_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("AES128-SHA256","TLS_RSA_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_AES_128_CBC_SHA256, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_SHA256
-    SUITE_INFO("AES256-SHA256","TLS_RSA_WITH_AES_256_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_AES_256_CBC_SHA256),
+    SUITE_INFO("AES256-SHA256","TLS_RSA_WITH_AES_256_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_AES_256_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("DHE-RSA-AES128-SHA256","TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("DHE-RSA-AES128-SHA256","TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256
-    SUITE_INFO("DHE-RSA-AES256-SHA256","TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_256_CBC_SHA256),
+    SUITE_INFO("DHE-RSA-AES256-SHA256","TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("ECDH-RSA-AES128-SHA","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("ECDH-RSA-AES128-SHA","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_128_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("ECDH-RSA-AES256-SHA","TLS_ECDH_RSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("ECDH-RSA-AES256-SHA","TLS_ECDH_RSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_256_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA
-    SUITE_INFO("ECDH-ECDSA-AES128-SHA","TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("ECDH-ECDSA-AES128-SHA","TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA
-    SUITE_INFO("ECDH-ECDSA-AES256-SHA","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA),
+    SUITE_INFO("ECDH-ECDSA-AES256-SHA","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_RC4_128_SHA
-    SUITE_INFO("ECDH-RSA-RC4-SHA","TLS_ECDH_RSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_RC4_128_SHA),
+    SUITE_INFO("ECDH-RSA-RC4-SHA","TLS_ECDH_RSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_RC4_128_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("ECDH-RSA-DES-CBC3-SHA","TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("ECDH-RSA-DES-CBC3-SHA","TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_RC4_128_SHA
-    SUITE_INFO("ECDH-ECDSA-RC4-SHA","TLS_ECDH_ECDSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_RC4_128_SHA),
+    SUITE_INFO("ECDH-ECDSA-RC4-SHA","TLS_ECDH_ECDSA_WITH_RC4_128_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_RC4_128_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("ECDH-ECDSA-DES-CBC3-SHA","TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("ECDH-ECDSA-DES-CBC3-SHA","TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA",ECC_BYTE,TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("AES128-GCM-SHA256","TLS_RSA_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_RSA_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("AES128-GCM-SHA256","TLS_RSA_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_RSA_WITH_AES_128_GCM_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("AES256-GCM-SHA384","TLS_RSA_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_RSA_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("AES256-GCM-SHA384","TLS_RSA_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_RSA_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("DHE-RSA-AES128-GCM-SHA256","TLS_DHE_RSA_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("DHE-RSA-AES128-GCM-SHA256","TLS_DHE_RSA_WITH_AES_128_GCM_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("DHE-RSA-AES256-GCM-SHA384","TLS_DHE_RSA_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("DHE-RSA-AES256-GCM-SHA384","TLS_DHE_RSA_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_DHE_RSA_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("ECDHE-RSA-AES128-GCM-SHA256","TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("ECDHE-RSA-AES128-GCM-SHA256","TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("ECDHE-RSA-AES256-GCM-SHA384","TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("ECDHE-RSA-AES256-GCM-SHA384","TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("ECDHE-ECDSA-AES128-GCM-SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("ECDHE-ECDSA-AES128-GCM-SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("ECDHE-ECDSA-AES256-GCM-SHA384","TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("ECDHE-ECDSA-AES256-GCM-SHA384","TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("ECDH-RSA-AES128-GCM-SHA256","TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("ECDH-RSA-AES128-GCM-SHA256","TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("ECDH-RSA-AES256-GCM-SHA384","TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("ECDH-RSA-AES256-GCM-SHA384","TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256
-    SUITE_INFO("ECDH-ECDSA-AES128-GCM-SHA256","TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256),
+    SUITE_INFO("ECDH-ECDSA-AES128-GCM-SHA256","TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("ECDH-ECDSA-AES256-GCM-SHA384","TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("ECDH-ECDSA-AES256-GCM-SHA384","TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA
-    SUITE_INFO("CAMELLIA128-SHA","TLS_RSA_WITH_CAMELLIA_128_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_128_CBC_SHA),
+    SUITE_INFO("CAMELLIA128-SHA","TLS_RSA_WITH_CAMELLIA_128_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_128_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA
-    SUITE_INFO("DHE-RSA-CAMELLIA128-SHA","TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA),
+    SUITE_INFO("DHE-RSA-CAMELLIA128-SHA","TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA
-    SUITE_INFO("CAMELLIA256-SHA","TLS_RSA_WITH_CAMELLIA_256_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_256_CBC_SHA),
+    SUITE_INFO("CAMELLIA256-SHA","TLS_RSA_WITH_CAMELLIA_256_CBC_SHA",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_256_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA
-    SUITE_INFO("DHE-RSA-CAMELLIA256-SHA","TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA),
+    SUITE_INFO("DHE-RSA-CAMELLIA256-SHA","TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256
-    SUITE_INFO("CAMELLIA128-SHA256","TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256),
+    SUITE_INFO("CAMELLIA128-SHA256","TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256
-    SUITE_INFO("DHE-RSA-CAMELLIA128-SHA256","TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256),
+    SUITE_INFO("DHE-RSA-CAMELLIA128-SHA256","TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256
-    SUITE_INFO("CAMELLIA256-SHA256","TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256),
+    SUITE_INFO("CAMELLIA256-SHA256","TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256",CIPHER_BYTE,TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256
-    SUITE_INFO("DHE-RSA-CAMELLIA256-SHA256","TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256),
+    SUITE_INFO("DHE-RSA-CAMELLIA256-SHA256","TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256",CIPHER_BYTE,TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("ECDHE-RSA-AES128-SHA256","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("ECDHE-RSA-AES128-SHA256","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("ECDHE-ECDSA-AES128-SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("ECDHE-ECDSA-AES128-SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("ECDH-RSA-AES128-SHA256","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("ECDH-RSA-AES128-SHA256","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("ECDH-ECDSA-AES128-SHA256","TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("ECDH-ECDSA-AES128-SHA256","TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384
-    SUITE_INFO("ECDHE-RSA-AES256-SHA384","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384),
+    SUITE_INFO("ECDHE-RSA-AES256-SHA384","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384
-    SUITE_INFO("ECDHE-ECDSA-AES256-SHA384","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384),
+    SUITE_INFO("ECDHE-ECDSA-AES256-SHA384","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384
-    SUITE_INFO("ECDH-RSA-AES256-SHA384","TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384),
+    SUITE_INFO("ECDH-RSA-AES256-SHA384","TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384
-    SUITE_INFO("ECDH-ECDSA-AES256-SHA384","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384),
+    SUITE_INFO("ECDH-ECDSA-AES256-SHA384","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384",ECC_BYTE,TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("ECDHE-RSA-CHACHA20-POLY1305","TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256),
+    SUITE_INFO("ECDHE-RSA-CHACHA20-POLY1305","TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("ECDHE-ECDSA-CHACHA20-POLY1305","TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256),
+    SUITE_INFO("ECDHE-ECDSA-CHACHA20-POLY1305","TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("DHE-RSA-CHACHA20-POLY1305","TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256),
+    SUITE_INFO("DHE-RSA-CHACHA20-POLY1305","TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256
-    SUITE_INFO("ECDHE-RSA-CHACHA20-POLY1305-OLD","TLS_ECDHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256),
+    SUITE_INFO("ECDHE-RSA-CHACHA20-POLY1305-OLD","TLS_ECDHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_CHACHA20_OLD_POLY1305_SHA256
-    SUITE_INFO("ECDHE-ECDSA-CHACHA20-POLY1305-OLD","TLS_ECDHE_ECDSA_WITH_CHACHA20_OLD_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_ECDSA_WITH_CHACHA20_OLD_POLY1305_SHA256),
+    SUITE_INFO("ECDHE-ECDSA-CHACHA20-POLY1305-OLD","TLS_ECDHE_ECDSA_WITH_CHACHA20_OLD_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_ECDSA_WITH_CHACHA20_OLD_POLY1305_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256
-    SUITE_INFO("DHE-RSA-CHACHA20-POLY1305-OLD","TLS_DHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256",CHACHA_BYTE,TLS_DHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256),
+    SUITE_INFO("DHE-RSA-CHACHA20-POLY1305-OLD","TLS_DHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256",CHACHA_BYTE,TLS_DHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DH_anon_WITH_AES_128_CBC_SHA
-    SUITE_INFO("ADH-AES128-SHA","TLS_DH_anon_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_DH_anon_WITH_AES_128_CBC_SHA),
+    SUITE_INFO("ADH-AES128-SHA","TLS_DH_anon_WITH_AES_128_CBC_SHA",CIPHER_BYTE,TLS_DH_anon_WITH_AES_128_CBC_SHA, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DH_anon_WITH_AES_256_GCM_SHA384
-    SUITE_INFO("ADH-AES256-GCM-SHA384","TLS_DH_anon_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_DH_anon_WITH_AES_256_GCM_SHA384),
+    SUITE_INFO("ADH-AES256-GCM-SHA384","TLS_DH_anon_WITH_AES_256_GCM_SHA384",CIPHER_BYTE,TLS_DH_anon_WITH_AES_256_GCM_SHA384, TLSv1_2_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_QSH
-    SUITE_INFO("QSH","TLS_QSH",QSH_BYTE,TLS_QSH),
+    SUITE_INFO("QSH","TLS_QSH",QSH_BYTE,TLS_QSH, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef HAVE_RENEGOTIATION_INDICATION
-    SUITE_INFO("RENEGOTIATION-INFO","TLS_EMPTY_RENEGOTIATION_INFO_SCSV",CIPHER_BYTE,TLS_EMPTY_RENEGOTIATION_INFO_SCSV),
+    SUITE_INFO("RENEGOTIATION-INFO","TLS_EMPTY_RENEGOTIATION_INFO_SCSV",CIPHER_BYTE,TLS_EMPTY_RENEGOTIATION_INFO_SCSV,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_SSL_RSA_WITH_IDEA_CBC_SHA
-    SUITE_INFO("IDEA-CBC-SHA","SSL_RSA_WITH_IDEA_CBC_SHA",CIPHER_BYTE,SSL_RSA_WITH_IDEA_CBC_SHA),
+    SUITE_INFO("IDEA-CBC-SHA","SSL_RSA_WITH_IDEA_CBC_SHA",CIPHER_BYTE,SSL_RSA_WITH_IDEA_CBC_SHA,SSLv3_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_NULL_SHA
-    SUITE_INFO("ECDHE-ECDSA-NULL-SHA","TLS_ECDHE_ECDSA_WITH_NULL_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_NULL_SHA),
+    SUITE_INFO("ECDHE-ECDSA-NULL-SHA","TLS_ECDHE_ECDSA_WITH_NULL_SHA",ECC_BYTE,TLS_ECDHE_ECDSA_WITH_NULL_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_PSK_WITH_NULL_SHA256
-    SUITE_INFO("ECDHE-PSK-NULL-SHA256","TLS_ECDHE_PSK_WITH_NULL_SHA256",ECC_BYTE,TLS_ECDHE_PSK_WITH_NULL_SHA256),
+    SUITE_INFO("ECDHE-PSK-NULL-SHA256","TLS_ECDHE_PSK_WITH_NULL_SHA256",ECC_BYTE,TLS_ECDHE_PSK_WITH_NULL_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256
-    SUITE_INFO("ECDHE-PSK-AES128-CBC-SHA256","TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256),
+    SUITE_INFO("ECDHE-PSK-AES128-CBC-SHA256","TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256",ECC_BYTE,TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256,TLSv1_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_PSK_WITH_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("PSK-CHACHA20-POLY1305","TLS_PSK_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_PSK_WITH_CHACHA20_POLY1305_SHA256),
+    SUITE_INFO("PSK-CHACHA20-POLY1305","TLS_PSK_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_PSK_WITH_CHACHA20_POLY1305_SHA256,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("ECDHE-PSK-CHACHA20-POLY1305","TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256),
+    SUITE_INFO("ECDHE-PSK-CHACHA20-POLY1305","TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("DHE-PSK-CHACHA20-POLY1305","TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256),
+    SUITE_INFO("DHE-PSK-CHACHA20-POLY1305","TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256",CHACHA_BYTE,TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256,TLSv1_2_MINOR,SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA
-    SUITE_INFO("EDH-RSA-DES-CBC3-SHA","TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA),
+    SUITE_INFO("EDH-RSA-DES-CBC3-SHA","TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA",CIPHER_BYTE,TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA, TLSv1_MINOR, SSLv3_MAJOR),
 #endif
 
 #ifdef BUILD_WDM_WITH_NULL_SHA256
-    SUITE_INFO("WDM-NULL-SHA256","WDM_WITH_NULL_SHA256",CIPHER_BYTE,WDM_WITH_NULL_SHA256),
+    SUITE_INFO("WDM-NULL-SHA256","WDM_WITH_NULL_SHA256",CIPHER_BYTE,WDM_WITH_NULL_SHA256, TLSv1_3_MINOR, SSLv3_MAJOR)
 #endif
 
 #endif /* WOLFSSL_NO_TLS12 */
-
-#ifdef BUILD_TLS_AES_128_GCM_SHA256
-    SUITE_INFO("TLS13-AES128-GCM-SHA256","TLS_AES_128_GCM_SHA256",TLS13_BYTE,TLS_AES_128_GCM_SHA256),
-#endif
-
-#ifdef BUILD_TLS_AES_256_GCM_SHA384
-    SUITE_INFO("TLS13-AES256-GCM-SHA384","TLS_AES_256_GCM_SHA384",TLS13_BYTE,TLS_AES_256_GCM_SHA384),
-#endif
-
-#ifdef BUILD_TLS_CHACHA20_POLY1305_SHA256
-    SUITE_INFO("TLS13-CHACHA20-POLY1305-SHA256","TLS_CHACHA20_POLY1305_SHA256",TLS13_BYTE,TLS_CHACHA20_POLY1305_SHA256),
-#endif
-
-#ifdef BUILD_TLS_AES_128_CCM_SHA256
-    SUITE_INFO("TLS13-AES128-CCM-SHA256","TLS_AES_128_CCM_SHA256",TLS13_BYTE,TLS_AES_128_CCM_SHA256),
-#endif
-
-#ifdef BUILD_TLS_AES_128_CCM_8_SHA256
-    SUITE_INFO("TLS13-AES128-CCM-8-SHA256","TLS_AES_128_CCM_8_SHA256",TLS13_BYTE,TLS_AES_128_CCM_8_SHA256),
-#endif
 };
 
 
@@ -16010,7 +18507,7 @@
 const char* GetCipherNameInternal(const byte cipherSuite0, const byte cipherSuite)
 {
     int i;
-    const char* nameInternal = NULL;
+    const char* nameInternal = "None";
 
     for (i = 0; i < GetCipherNamesSize(); i++) {
         if ((cipher_names[i].cipherSuite0 == cipherSuite0) &&
@@ -16022,6 +18519,198 @@
     return nameInternal;
 }
 
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+const char* GetCipherKeaStr(char n[][MAX_SEGMENT_SZ]) {
+    const char* keaStr = NULL;
+    const char *n0,*n1,*n2,*n3,*n4;
+    n0 = n[0];
+    n1 = n[1];
+    n2 = n[2];
+    n3 = n[3];
+    n4 = n[4];
+
+    if (XSTRNCMP(n0,"ECDHE",5) == 0 && XSTRNCMP(n1,"PSK",3) == 0)
+        keaStr = "ECDHEPSK";
+    else if (XSTRNCMP(n0,"ECDH",4) == 0)
+        keaStr = "ECDH";
+    else if (XSTRNCMP(n0,"DHE",3) == 0 && XSTRNCMP(n1,"PSK",3) == 0)
+        keaStr = "DHEPSK";
+    else if (XSTRNCMP(n0,"DHE",3) == 0)
+        keaStr = "DH";
+   else if (XSTRNCMP(n0,"RSA",3) == 0 && XSTRNCMP(n1,"PSK",3) == 0)
+        keaStr = "RSAPSK";
+    else if (XSTRNCMP(n0,"SRP",3) == 0)
+        keaStr = "SRP";
+    else if (XSTRNCMP(n0,"PSK",3) == 0)
+        keaStr = "PSK";
+    else if (XSTRNCMP(n0,"EDH",3) == 0)
+        keaStr = "EDH";
+    else if ((XSTRNCMP(n1,"SHA",3) == 0) || (XSTRNCMP(n2,"SHA",3) == 0) ||
+             (XSTRNCMP(n3,"SHA",3) == 0) || (XSTRNCMP(n4,"SHA",3) == 0) ||
+             (XSTRNCMP(n2,"RSA",3) == 0) || (XSTRNCMP(n0,"AES128",6) == 0) ||
+             (XSTRNCMP(n0,"AES256",6) == 0) || (XSTRNCMP(n1,"MD5",3) == 0))
+        keaStr = "RSA";
+    else
+        keaStr = "unknown";
+
+    return keaStr;
+}
+
+const char* GetCipherAuthStr(char n[][MAX_SEGMENT_SZ]) {
+
+    const char* authStr = NULL;
+    const char *n0,*n1,*n2;
+    n0 = n[0];
+    n1 = n[1];
+    n2 = n[2];
+
+    if ((XSTRNCMP(n0,"AES128",6) == 0) || (XSTRNCMP(n0,"AES256",6) == 0)  ||
+        ((XSTRNCMP(n0,"TLS13",5) == 0) && ((XSTRNCMP(n1,"AES128",6) == 0) ||
+         (XSTRNCMP(n1,"AES256",6) == 0) || (XSTRNCMP(n1,"CHACHA20",8) == 0))) ||
+        (XSTRNCMP(n0,"RSA",3) == 0) || (XSTRNCMP(n1,"RSA",3) == 0) ||
+        (XSTRNCMP(n1,"SHA",3) == 0) || (XSTRNCMP(n2,"SHA",3) == 0) ||
+        (XSTRNCMP(n1,"MD5",3) == 0))
+        authStr = "RSA";
+    else if (XSTRNCMP(n0,"PSK",3) == 0 || XSTRNCMP(n1,"PSK",3) == 0)
+        authStr = "PSK";
+    else if (XSTRNCMP(n0,"SRP",3) == 0 && XSTRNCMP(n1,"AES",3) == 0)
+        authStr = "SRP";
+    else if (XSTRNCMP(n1,"ECDSA",5) == 0)
+        authStr = "ECDSA";
+    else
+        authStr = "unknown";
+
+    return authStr;
+}
+
+const char* GetCipherEncStr(char n[][MAX_SEGMENT_SZ]) {
+    const char* encStr = NULL;
+    const char *n0,*n1,*n2,*n3;
+    n0 = n[0];
+    n1 = n[1];
+    n2 = n[2];
+    n3 = n[3];
+
+    if ((XSTRNCMP(n0,"AES256",6) == 0 && XSTRNCMP(n1,"GCM",3) == 0) ||
+        (XSTRNCMP(n1,"AES256",6) == 0 && XSTRNCMP(n2,"GCM",3) == 0) ||
+        (XSTRNCMP(n2,"AES256",6) == 0 && XSTRNCMP(n3,"GCM",3) == 0))
+        encStr = "AESGCM(256)";
+
+    else if ((XSTRNCMP(n0,"AES128",6) == 0 && XSTRNCMP(n1,"GCM",3) == 0) ||
+             (XSTRNCMP(n1,"AES128",6) == 0 && XSTRNCMP(n2,"GCM",3) == 0) ||
+             (XSTRNCMP(n2,"AES128",6) == 0 && XSTRNCMP(n3,"GCM",3) == 0))
+        encStr = "AESGCM(128)";
+
+    else if ((XSTRNCMP(n0,"AES128",6) == 0 && XSTRNCMP(n1,"CCM",3) == 0) ||
+             (XSTRNCMP(n1,"AES128",6) == 0 && XSTRNCMP(n2,"CCM",3) == 0) ||
+             (XSTRNCMP(n2,"AES128",6) == 0 && XSTRNCMP(n3,"CCM",3) == 0))
+        encStr = "AESCCM(128)";
+
+    else if ((XSTRNCMP(n0,"AES128",6) == 0) ||
+             (XSTRNCMP(n1,"AES128",6) == 0) ||
+             (XSTRNCMP(n2,"AES128",6) == 0) ||
+             (XSTRNCMP(n1,"AES",3) == 0 && XSTRNCMP(n2,"128",3) == 0) ||
+             (XSTRNCMP(n2,"AES",3) == 0 && XSTRNCMP(n3,"128",3) == 0))
+        encStr = "AES(128)";
+
+    else if ((XSTRNCMP(n0,"AES256",6) == 0) ||
+             (XSTRNCMP(n1,"AES256",6) == 0) ||
+             (XSTRNCMP(n2,"AES256",6) == 0) ||
+             (XSTRNCMP(n1,"AES",3) == 0 && XSTRNCMP(n2,"256",3) == 0) ||
+             (XSTRNCMP(n2,"AES",3) == 0 && XSTRNCMP(n3,"256",3) == 0))
+        encStr = "AES(256)";
+
+    else if ((XSTRNCMP(n0,"CAMELLIA256",11) == 0) ||
+             (XSTRNCMP(n2,"CAMELLIA256",11) == 0))
+        encStr = "CAMELLIA(256)";
+    else if ((XSTRNCMP(n0,"CAMELLIA128",11) == 0) ||
+             (XSTRNCMP(n2,"CAMELLIA128",11) == 0))
+        encStr = "CAMELLIA(128)";
+    else if ((XSTRNCMP(n0,"RC4",3) == 0) || (XSTRNCMP(n2,"RC4",3) == 0))
+        encStr = "RC4";
+    else if (((XSTRNCMP(n0,"DES",3) == 0)  || (XSTRNCMP(n2,"DES",3) == 0)) &&
+             ((XSTRNCMP(n1,"CBC3",4) == 0) || (XSTRNCMP(n3,"CBC3",4) == 0)))
+        encStr = "3DES";
+    else if ((XSTRNCMP(n1,"CHACHA20",8) == 0 && XSTRNCMP(n2,"POLY1305",8) == 0) ||
+             (XSTRNCMP(n2,"CHACHA20",8) == 0 && XSTRNCMP(n3,"POLY1305",8) == 0))
+        encStr = "CHACHA20/POLY1305(256)";
+    else if ((XSTRNCMP(n0,"NULL",4) == 0) || (XSTRNCMP(n1,"NULL",4) == 0) ||
+             (XSTRNCMP(n2,"NULL",4) == 0) ||
+             ((XSTRNCMP(n0,"TLS13",5) == 0) && (XSTRNCMP(n3,"",0) == 0)))
+        encStr = "None";
+    else if ((XSTRNCMP(n0,"IDEA",4) == 0))
+        encStr = "IDEA";
+    else if ((XSTRNCMP(n0,"RABBIT",4) == 0))
+        encStr = "RABBIT";
+    else if ((XSTRNCMP(n0,"HC128",5) == 0))
+        encStr = "HC128";
+    else
+        encStr = "unknown";
+
+    return encStr;
+}
+
+/* Returns the MAC string of a cipher or "unknown" on failure */
+const char* GetCipherMacStr(char n[][MAX_SEGMENT_SZ]) {
+
+    const char* macStr = NULL;
+    const char *n1,*n2,*n3,*n4;
+    n1 = n[1];
+    n2 = n[2];
+    n3 = n[3];
+    n4 = n[4];
+
+    if ((XSTRNCMP(n4,"SHA256",6) == 0) || (XSTRNCMP(n3,"SHA256",6) == 0) ||
+        (XSTRNCMP(n2,"SHA256",6) == 0) || (XSTRNCMP(n1,"SHA256",6) == 0))
+        macStr = "SHA256";
+    else if ((XSTRNCMP(n4,"SHA384",6) == 0) ||
+             (XSTRNCMP(n3,"SHA384",6) == 0) ||
+             (XSTRNCMP(n2,"SHA384",6) == 0) ||
+             (XSTRNCMP(n1,"SHA384",6) == 0))
+        macStr = "SHA384";
+    else if ((XSTRNCMP(n4,"SHA",3) == 0) || (XSTRNCMP(n3,"SHA",3) == 0) ||
+             (XSTRNCMP(n2,"SHA",3) == 0) || (XSTRNCMP(n1,"SHA",3) == 0) ||
+             (XSTRNCMP(n1,"MD5",3) == 0))
+        macStr = "SHA1";
+    else if ((XSTRNCMP(n3,"GCM",3) == 0) ||
+             (XSTRNCMP(n1,"CCM",3) == 0) ||
+             (XSTRNCMP(n2,"CCM",3) == 0) || (XSTRNCMP(n3,"CCM",3) == 0) ||
+             (XSTRNCMP(n1,"CHACHA20",8) == 0 && XSTRNCMP(n2,"POLY1305",8) == 0) ||
+             (XSTRNCMP(n2,"CHACHA20",8) == 0 && XSTRNCMP(n3,"POLY1305",8) == 0))
+        macStr = "AEAD";
+    else
+        macStr = "unknown";
+
+    return macStr;
+}
+
+/* Returns the number of bits based on the cipher enc string, or 0 on failure */
+int SetCipherBits(const char* enc) {
+    int ret = WOLFSSL_FAILURE;
+
+    if ((XSTRNCMP(enc,"AESGCM(256)",11) == 0) ||
+        (XSTRNCMP(enc,"AES(256)",8) == 0) ||
+        (XSTRNCMP(enc,"CAMELLIA(256)",13) == 0) ||
+        (XSTRNCMP(enc,"CHACHA20/POLY1305(256)",22) == 0))
+            ret = 256;
+    else if
+        ((XSTRNCMP(enc,"3DES",4) == 0))
+            ret = 168;
+    else if
+        ((XSTRNCMP(enc,"AESGCM(128)",11) == 0) ||
+         (XSTRNCMP(enc,"AES(128)",8) == 0) ||
+         (XSTRNCMP(enc,"CAMELLIA(128)",13) == 0) ||
+         (XSTRNCMP(enc,"IDEA",4) == 0) ||
+         (XSTRNCMP(enc,"RC4",3) == 0))
+            ret = 128;
+   else if
+        ((XSTRNCMP(enc,"DES",3) == 0))
+            ret = 56;
+
+    return ret;
+}
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+
 const char* GetCipherNameIana(const byte cipherSuite0, const byte cipherSuite)
 {
 #ifndef NO_ERROR_STRINGS
@@ -16061,6 +18750,24 @@
     return GetCipherNameIana(ssl->options.cipherSuite0, ssl->options.cipherSuite);
 }
 
+int GetCipherSuiteFromName(const char* name, byte* cipherSuite0,
+                           byte* cipherSuite)
+{
+    int           ret = BAD_FUNC_ARG;
+    int           i;
+    unsigned long len = (unsigned long)XSTRLEN(name);
+
+    for (i = 0; i < GetCipherNamesSize(); i++) {
+        if (XSTRNCMP(name, cipher_names[i].name, len) == 0) {
+            *cipherSuite0 = cipher_names[i].cipherSuite0;
+            *cipherSuite  = cipher_names[i].cipherSuite;
+            ret = 0;
+            break;
+        }
+    }
+
+    return ret;
+}
 
 /**
 Set the enabled cipher suites.
@@ -16088,7 +18795,7 @@
 
     if (next[0] == 0 || XSTRNCMP(next, "ALL", 3) == 0 ||
                         XSTRNCMP(next, "DEFAULT", 7) == 0)
-        return 1; /* wolfSSL defualt */
+        return 1; /* wolfSSL default */
 
     do {
         char*  current = next;
@@ -16104,7 +18811,11 @@
         name[(length == sizeof(name)) ? length - 1 : length] = 0;
 
         for (i = 0; i < suiteSz; i++) {
-            if (XSTRNCMP(name, cipher_names[i].name, sizeof(name)) == 0) {
+            if (XSTRNCMP(name, cipher_names[i].name, sizeof(name)) == 0
+            #ifndef NO_ERROR_STRINGS
+                || XSTRNCMP(name, cipher_names[i].name_iana, sizeof(name)) == 0
+            #endif
+             ) {
             #ifdef WOLFSSL_DTLS
                 /* don't allow stream ciphers with DTLS */
                 if (ctx->method->version.major == DTLS_MAJOR) {
@@ -16124,35 +18835,27 @@
                     return 0; /* suites buffer not large enough, error out */
                 }
 
-                suites->suites[idx++] =
-            #ifdef WOLFSSL_TLS13
-                    (XSTRSTR(name, "TLS13"))  ? TLS13_BYTE :
-            #endif
-            #ifdef HAVE_CHACHA
-                    (XSTRSTR(name, "CHACHA")) ? CHACHA_BYTE :
-            #endif
-            #ifdef HAVE_QSH
-                    (XSTRSTR(name, "QSH"))    ? QSH_BYTE :
-            #endif
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
-                    (XSTRSTR(name, "EC"))     ? ECC_BYTE :
-            #endif
-            #ifdef HAVE_AESCCM
-                    (XSTRSTR(name, "CCM"))    ? ECC_BYTE :
-            #endif
-                    CIPHER_BYTE; /* normal */
-
+                suites->suites[idx++] = cipher_names[i].cipherSuite0;
                 suites->suites[idx++] = cipher_names[i].cipherSuite;
                 /* The suites are either ECDSA, RSA, PSK, or Anon. The RSA
                  * suites don't necessarily have RSA in the name. */
             #ifdef WOLFSSL_TLS13
-                if (XSTRSTR(name, "TLS13")) {
+                if (cipher_names[i].cipherSuite0 == TLS13_BYTE ||
+                         (cipher_names[i].cipherSuite0 == ECC_BYTE &&
+                          (cipher_names[i].cipherSuite == TLS_SHA256_SHA256 ||
+                           cipher_names[i].cipherSuite == TLS_SHA384_SHA384))) {
+                #ifndef NO_RSA
                     haveRSAsig = 1;
+                #endif
+                #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
                     haveECDSAsig = 1;
+                #endif
                 }
                 else
             #endif
-            #if defined(HAVE_ECC) || defined(HAVE_ED25519)
+            #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
                 if ((haveECDSAsig == 0) && XSTRSTR(name, "ECDSA"))
                     haveECDSAsig = 1;
                 else
@@ -16195,10 +18898,10 @@
 
 
 #if !defined(NO_WOLFSSL_SERVER) || !defined(NO_CERTS)
-void PickHashSigAlgo(WOLFSSL* ssl, const byte* hashSigAlgo,
-                     word32 hashSigAlgoSz)
+int PickHashSigAlgo(WOLFSSL* ssl, const byte* hashSigAlgo, word32 hashSigAlgoSz)
 {
     word32 i;
+    int ret = MATCH_SUITE_ERROR;
 
     ssl->suites->sigAlgo = ssl->specs.sig_algo;
 
@@ -16222,26 +18925,114 @@
     }
 #endif
 
+    if (hashSigAlgoSz == 0)
+        return 0;
+
     /* i+1 since peek a byte ahead for type */
     for (i = 0; (i+1) < hashSigAlgoSz; i += HELLO_EXT_SIGALGO_SZ) {
         byte hashAlgo = 0, sigAlgo = 0;
 
         DecodeSigAlg(&hashSigAlgo[i], &hashAlgo, &sigAlgo);
     #ifdef HAVE_ED25519
-        if (ssl->pkCurveOID == ECC_ED25519_OID && sigAlgo != ed25519_sa_algo)
-            continue;
-
-        if (sigAlgo == ed25519_sa_algo &&
+        if (ssl->pkCurveOID == ECC_ED25519_OID) {
+            if (sigAlgo != ed25519_sa_algo)
+                continue;
+            if (sigAlgo == ed25519_sa_algo &&
+                                      ssl->suites->sigAlgo == ecc_dsa_sa_algo) {
+                ssl->suites->sigAlgo = sigAlgo;
+                ssl->suites->hashAlgo = sha512_mac;
+                ret = 0;
+                break;
+            }
+        }
+    #endif
+    #ifdef HAVE_ED448
+        if (ssl->pkCurveOID == ECC_ED448_OID) {
+            if (sigAlgo != ed448_sa_algo)
+                continue;
+
+            if (sigAlgo == ed448_sa_algo &&
                                       ssl->suites->sigAlgo == ecc_dsa_sa_algo) {
+                ssl->suites->sigAlgo = sigAlgo;
+                ssl->suites->hashAlgo = sha512_mac;
+                ret = 0;
+                break;
+            }
+        }
+    #endif
+    #if defined(WOLFSSL_TLS13) && defined(HAVE_ECC)
+        if (IsAtLeastTLSv1_3(ssl->version) && sigAlgo == ssl->suites->sigAlgo &&
+                                                   sigAlgo == ecc_dsa_sa_algo) {
+
+            int digestSz = GetMacDigestSize(hashAlgo);
+            if (digestSz <= 0)
+                continue;
+
+            /* TLS 1.3 signature algorithms for ECDSA match hash length with
+             * key size.
+             */
+            if (digestSz != ssl->buffers.keySz)
+                continue;
+
+            ssl->suites->hashAlgo = hashAlgo;
             ssl->suites->sigAlgo = sigAlgo;
-            ssl->suites->hashAlgo = sha512_mac;
-            break;
-        }
-    #endif
-        if (sigAlgo == ssl->suites->sigAlgo || (sigAlgo == rsa_pss_sa_algo &&
-                                         ssl->suites->sigAlgo == rsa_sa_algo)) {
+            ret = 0;
+            break; /* done selected sig/hash algorithms */
+        }
+        else
+    #endif
+    /* For ECDSA the `USE_ECDSA_KEYSZ_HASH_ALGO` build option will choose a hash
+     * algorithm that matches the ephemeral ECDHE key size or the next highest
+     * available. This workaround resolves issue with some peer's that do not
+     * properly support scenarios such as a P-256 key hashed with SHA512.
+     */
+    #if defined(HAVE_ECC) && defined(USE_ECDSA_KEYSZ_HASH_ALGO)
+        if (sigAlgo == ssl->suites->sigAlgo && sigAlgo == ecc_dsa_sa_algo) {
+            int digestSz = GetMacDigestSize(hashAlgo);
+            if (digestSz <= 0)
+                continue;
+
+            /* For ecc_dsa_sa_algo, pick hash algo that is curve size unless
+                algorithm in not compiled in, then choose next highest */
+            if (digestSz == ssl->eccTempKeySz) {
+                ssl->suites->hashAlgo = hashAlgo;
+                ssl->suites->sigAlgo = sigAlgo;
+            #if defined(WOLFSSL_TLS13) || defined(HAVE_FFDHE)
+                ssl->namedGroup = 0;
+            #endif
+                ret = 0;
+                break; /* done selected sig/hash algorithms */
+            }
+            /* not strong enough, so keep checking hashSigAlso list */
+            if (digestSz < ssl->eccTempKeySz)
+                continue;
+
+            /* mark as highest and check remainder of hashSigAlgo list */
+            ssl->suites->hashAlgo = hashAlgo;
+            ssl->suites->sigAlgo = sigAlgo;
+            ret = 0;
+        }
+        else
+    #endif
+    #ifdef WC_RSA_PSS
+        if (IsAtLeastTLSv1_3(ssl->version) &&
+                                          ssl->suites->sigAlgo == rsa_sa_algo &&
+                                          sigAlgo != rsa_pss_sa_algo) {
+            continue;
+        }
+        else if (sigAlgo == ssl->suites->sigAlgo ||
+                                        (sigAlgo == rsa_pss_sa_algo &&
+                                         (ssl->suites->sigAlgo == rsa_sa_algo)))
+    #else
+        if (sigAlgo == ssl->suites->sigAlgo)
+    #endif
+        {
+            /* pick highest available between both server and client */
             switch (hashAlgo) {
                 case sha_mac:
+            #ifdef WOLFSSL_SHA224
+                case sha224_mac:
+            #endif
             #ifndef NO_SHA256
                 case sha256_mac:
             #endif
@@ -16251,27 +19042,39 @@
             #ifdef WOLFSSL_SHA512
                 case sha512_mac:
             #endif
-                    if (hashAlgo < ssl->suites->hashAlgo)
+                    /* not strong enough, so keep checking hashSigAlso list */
+                    if (hashAlgo < ssl->suites->hashAlgo) {
+                        ret = 0;
                         continue;
+                    }
+                    /* mark as highest and check remainder of hashSigAlgo list */
                     ssl->suites->hashAlgo = hashAlgo;
                     ssl->suites->sigAlgo = sigAlgo;
                     break;
                 default:
                     continue;
             }
-            break;
-        }
-        else if (ssl->specs.sig_algo == 0) {
+            ret = 0;
+            break;
+        }
+#if defined(WOLFSSL_TLS13)
+        else if (ssl->specs.sig_algo == 0 && IsAtLeastTLSv1_3(ssl->version)) {
+        }
+#endif
+        else if (ssl->specs.sig_algo == 0)
+        {
             ssl->suites->hashAlgo = ssl->specs.mac_algorithm;
-        }
-    }
-
+            ret = 0;
+        }
+    }
+
+    return ret;
 }
 #endif /* !defined(NO_WOLFSSL_SERVER) || !defined(NO_CERTS) */
 
 #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
 
-    /* Initialisze HandShakeInfo */
+    /* Initialize HandShakeInfo */
     void InitHandShakeInfo(HandShakeInfo* info, WOLFSSL* ssl)
     {
         int i;
@@ -16315,7 +19118,7 @@
             char* packetName = info->packetNames[info->numberPackets];
             XSTRNCPY(packetName, name, MAX_PACKETNAME_SZ);
             packetName[MAX_PACKETNAME_SZ] = '\0';
-            info->numberPackets++
+            info->numberPackets++;
         }
     #endif
         (void)ssl;
@@ -16324,7 +19127,7 @@
 
 
     #ifdef WOLFSSL_CALLBACKS
-    /* Initialisze TimeoutInfo */
+    /* Initialize TimeoutInfo */
     void InitTimeoutInfo(TimeoutInfo* info)
     {
         int i;
@@ -16358,7 +19161,7 @@
 
     }
 
-    /* Add packet name to previsouly added packet info */
+    /* Add packet name to previously added packet info */
     void AddLateName(const char* name, TimeoutInfo* info)
     {
         /* make sure we have a valid previous one */
@@ -16370,7 +19173,7 @@
         }
     }
 
-    /* Add record header to previsouly added packet info */
+    /* Add record header to previously added packet info */
     void AddLateRecordHeader(const RecordLayerHeader* rl, TimeoutInfo* info)
     {
         /* make sure we have a valid previous one */
@@ -16405,7 +19208,7 @@
         TimeoutInfo* info = &ssl->timeoutInfo;
 
         if (info->numberPackets < (MAX_PACKETS_HANDSHAKE - 1)) {
-            Timeval currTime;
+            WOLFSSL_TIMEVAL currTime;
 
             /* may add name after */
             if (name) {
@@ -16460,10 +19263,9 @@
 
 #endif /* WOLFSSL_CALLBACKS */
 
-#if !defined(NO_CERTS) && (defined(WOLFSSL_TLS13) || \
-                                                    !defined(NO_WOLFSSL_CLIENT))
-
-/* Decode the private key - RSA, ECC, or Ed25519 - and creates a key object.
+#if !defined(NO_CERTS)
+
+/* Decode the private key - RSA/ECC/Ed25519/Ed448 - and creates a key object.
  * The signature type is set as well.
  * The maximum length of a signature is returned.
  *
@@ -16477,44 +19279,106 @@
     int      keySz;
     word32   idx;
 
+#ifdef HAVE_PK_CALLBACKS
+    /* allow no private key if using PK callbacks and CB is set */
+    if (wolfSSL_IsPrivatePkSet(ssl)) {
+        *length = GetPrivateKeySigSize(ssl);
+        return 0;
+    }
+    else
+#endif
+
     /* make sure private key exists */
     if (ssl->buffers.key == NULL || ssl->buffers.key->buffer == NULL) {
         WOLFSSL_MSG("Private key missing!");
         ERROR_OUT(NO_PRIVATE_KEY, exit_dpk);
     }
 
-#ifndef NO_RSA
-    ssl->hsType = DYNAMIC_TYPE_RSA;
-    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
-    if (ret != 0) {
+#ifdef HAVE_PKCS11
+    if (ssl->buffers.keyDevId != INVALID_DEVID && ssl->buffers.keyId) {
+        if (ssl->buffers.keyType == rsa_sa_algo)
+            ssl->hsType = DYNAMIC_TYPE_RSA;
+        else if (ssl->buffers.keyType == ecc_dsa_sa_algo)
+            ssl->hsType = DYNAMIC_TYPE_ECC;
+        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+        if (ret != 0) {
+            goto exit_dpk;
+        }
+
+        if (ssl->buffers.keyType == rsa_sa_algo) {
+    #ifndef NO_RSA
+            ret = wc_InitRsaKey_Id((RsaKey*)ssl->hsKey,
+                             ssl->buffers.key->buffer, ssl->buffers.key->length,
+                             ssl->heap, ssl->buffers.keyDevId);
+            if (ret == 0) {
+                if (ssl->buffers.keySz < ssl->options.minRsaKeySz) {
+                    WOLFSSL_MSG("RSA key size too small");
+                    ERROR_OUT(RSA_KEY_SIZE_E, exit_dpk);
+                }
+
+                /* Return the maximum signature length. */
+                *length = (word16)ssl->buffers.keySz;
+            }
+    #else
+            ret = NOT_COMPILED_IN;
+    #endif
+        }
+        else if (ssl->buffers.keyType == ecc_dsa_sa_algo) {
+    #ifdef HAVE_ECC
+            ret = wc_ecc_init_id((ecc_key*)ssl->hsKey, ssl->buffers.key->buffer,
+                                 ssl->buffers.key->length, ssl->heap,
+                                 ssl->buffers.keyDevId);
+            if (ret == 0) {
+                if (ssl->buffers.keySz < ssl->options.minEccKeySz) {
+                    WOLFSSL_MSG("ECC key size too small");
+                    ERROR_OUT(ECC_KEY_SIZE_E, exit_dpk);
+                }
+
+                /* Return the maximum signature length. */
+                *length = (word16)wc_ecc_sig_size_calc(ssl->buffers.keySz);
+            }
+    #else
+            ret = NOT_COMPILED_IN;
+    #endif
+        }
         goto exit_dpk;
     }
-
-    WOLFSSL_MSG("Trying RSA private key");
-
-    /* Set start of data to beginning of buffer. */
-    idx = 0;
-    /* Decode the key assuming it is an RSA private key. */
-    ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
-    if (ret == 0) {
-        WOLFSSL_MSG("Using RSA private key");
-
-        /* It worked so check it meets minimum key size requirements. */
-        keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
-        if (keySz < 0) { /* check if keySz has error case */
-            ERROR_OUT(keySz, exit_dpk);
-        }
-
-        if (keySz < ssl->options.minRsaKeySz) {
-            WOLFSSL_MSG("RSA key size too small");
-            ERROR_OUT(RSA_KEY_SIZE_E, exit_dpk);
-        }
-
-        /* Return the maximum signature length. */
-        *length = (word16)keySz;
-
-        goto exit_dpk;
+#endif
+
+#ifndef NO_RSA
+    if (ssl->buffers.keyType == rsa_sa_algo || ssl->buffers.keyType == 0) {
+        ssl->hsType = DYNAMIC_TYPE_RSA;
+        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+        if (ret != 0) {
+            goto exit_dpk;
+        }
+
+        WOLFSSL_MSG("Trying RSA private key");
+
+        /* Set start of data to beginning of buffer. */
+        idx = 0;
+        /* Decode the key assuming it is an RSA private key. */
+        ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
+                    (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
+        if (ret == 0) {
+            WOLFSSL_MSG("Using RSA private key");
+
+            /* It worked so check it meets minimum key size requirements. */
+            keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
+            if (keySz < 0) { /* check if keySz has error case */
+                ERROR_OUT(keySz, exit_dpk);
+            }
+
+            if (keySz < ssl->options.minRsaKeySz) {
+                WOLFSSL_MSG("RSA key size too small");
+                ERROR_OUT(RSA_KEY_SIZE_E, exit_dpk);
+            }
+
+            /* Return the maximum signature length. */
+            *length = (word16)keySz;
+
+            goto exit_dpk;
+        }
     }
 #endif /* !NO_RSA */
 
@@ -16523,38 +19387,40 @@
     FreeKey(ssl, ssl->hsType, (void**)&ssl->hsKey);
 #endif /* !NO_RSA */
 
-    ssl->hsType = DYNAMIC_TYPE_ECC;
-    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
-    if (ret != 0) {
-        goto exit_dpk;
-    }
-
-#ifndef NO_RSA
-    WOLFSSL_MSG("Trying ECC private key, RSA didn't work");
-#else
-    WOLFSSL_MSG("Trying ECC private key");
-#endif
-
-    /* Set start of data to beginning of buffer. */
-    idx = 0;
-    /* Decode the key assuming it is an ECC private key. */
-    ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                                 (ecc_key*)ssl->hsKey,
-                                 ssl->buffers.key->length);
-    if (ret == 0) {
-        WOLFSSL_MSG("Using ECC private key");
-
-        /* Check it meets the minimum ECC key size requirements. */
-        keySz = wc_ecc_size((ecc_key*)ssl->hsKey);
-        if (keySz < ssl->options.minEccKeySz) {
-            WOLFSSL_MSG("ECC key size too small");
-            ERROR_OUT(ECC_KEY_SIZE_E, exit_dpk);
-        }
-
-        /* Return the maximum signature length. */
-        *length = (word16)wc_ecc_sig_size((ecc_key*)ssl->hsKey);
-
-        goto exit_dpk;
+    if (ssl->buffers.keyType == ecc_dsa_sa_algo || ssl->buffers.keyType == 0) {
+        ssl->hsType = DYNAMIC_TYPE_ECC;
+        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+        if (ret != 0) {
+            goto exit_dpk;
+        }
+
+    #ifndef NO_RSA
+        WOLFSSL_MSG("Trying ECC private key, RSA didn't work");
+    #else
+        WOLFSSL_MSG("Trying ECC private key");
+    #endif
+
+        /* Set start of data to beginning of buffer. */
+        idx = 0;
+        /* Decode the key assuming it is an ECC private key. */
+        ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
+                                     (ecc_key*)ssl->hsKey,
+                                     ssl->buffers.key->length);
+        if (ret == 0) {
+            WOLFSSL_MSG("Using ECC private key");
+
+            /* Check it meets the minimum ECC key size requirements. */
+            keySz = wc_ecc_size((ecc_key*)ssl->hsKey);
+            if (keySz < ssl->options.minEccKeySz) {
+                WOLFSSL_MSG("ECC key size too small");
+                ERROR_OUT(ECC_KEY_SIZE_E, exit_dpk);
+            }
+
+            /* Return the maximum signature length. */
+            *length = (word16)wc_ecc_sig_size((ecc_key*)ssl->hsKey);
+
+            goto exit_dpk;
+        }
     }
 #endif
 #ifdef HAVE_ED25519
@@ -16562,41 +19428,87 @@
         FreeKey(ssl, ssl->hsType, (void**)&ssl->hsKey);
     #endif
 
-    ssl->hsType = DYNAMIC_TYPE_ED25519;
-    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
-    if (ret != 0) {
-        goto exit_dpk;
-    }
-
-    #ifdef HAVE_ECC
-        WOLFSSL_MSG("Trying ED25519 private key, ECC didn't work");
-    #elif !defined(NO_RSA)
-        WOLFSSL_MSG("Trying ED25519 private key, RSA didn't work");
-    #else
-        WOLFSSL_MSG("Trying ED25519 private key");
-    #endif
-
-    /* Set start of data to beginning of buffer. */
-    idx = 0;
-    /* Decode the key assuming it is an ED25519 private key. */
-    ret = wc_Ed25519PrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                                     (ed25519_key*)ssl->hsKey,
-                                     ssl->buffers.key->length);
-    if (ret == 0) {
-        WOLFSSL_MSG("Using ED25519 private key");
-
-        /* Check it meets the minimum ECC key size requirements. */
-        if (ED25519_KEY_SIZE < ssl->options.minEccKeySz) {
-            WOLFSSL_MSG("ED25519 key size too small");
-            ERROR_OUT(ECC_KEY_SIZE_E, exit_dpk);
-        }
-
-        /* Return the maximum signature length. */
-        *length = ED25519_SIG_SIZE;
-
-        goto exit_dpk;
+    if (ssl->buffers.keyType == ed25519_sa_algo || ssl->buffers.keyType == 0) {
+        ssl->hsType = DYNAMIC_TYPE_ED25519;
+        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+        if (ret != 0) {
+            goto exit_dpk;
+        }
+
+        #ifdef HAVE_ECC
+            WOLFSSL_MSG("Trying ED25519 private key, ECC didn't work");
+        #elif !defined(NO_RSA)
+            WOLFSSL_MSG("Trying ED25519 private key, RSA didn't work");
+        #else
+            WOLFSSL_MSG("Trying ED25519 private key");
+        #endif
+
+        /* Set start of data to beginning of buffer. */
+        idx = 0;
+        /* Decode the key assuming it is an ED25519 private key. */
+        ret = wc_Ed25519PrivateKeyDecode(ssl->buffers.key->buffer, &idx,
+                                         (ed25519_key*)ssl->hsKey,
+                                         ssl->buffers.key->length);
+        if (ret == 0) {
+            WOLFSSL_MSG("Using ED25519 private key");
+
+            /* Check it meets the minimum ECC key size requirements. */
+            if (ED25519_KEY_SIZE < ssl->options.minEccKeySz) {
+                WOLFSSL_MSG("ED25519 key size too small");
+                ERROR_OUT(ECC_KEY_SIZE_E, exit_dpk);
+            }
+
+            /* Return the maximum signature length. */
+            *length = ED25519_SIG_SIZE;
+
+            goto exit_dpk;
+        }
     }
 #endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+    #if !defined(NO_RSA) || defined(HAVE_ECC)
+        FreeKey(ssl, ssl->hsType, (void**)&ssl->hsKey);
+    #endif
+
+    if (ssl->buffers.keyType == ed448_sa_algo || ssl->buffers.keyType == 0) {
+        ssl->hsType = DYNAMIC_TYPE_ED448;
+        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+        if (ret != 0) {
+            goto exit_dpk;
+        }
+
+        #ifdef HAVE_ED25519
+            WOLFSSL_MSG("Trying ED448 private key, ED25519 didn't work");
+        #elif defined(HAVE_ECC)
+            WOLFSSL_MSG("Trying ED448 private key, ECC didn't work");
+        #elif !defined(NO_RSA)
+            WOLFSSL_MSG("Trying ED448 private key, RSA didn't work");
+        #else
+            WOLFSSL_MSG("Trying ED447 private key");
+        #endif
+
+        /* Set start of data to beginning of buffer. */
+        idx = 0;
+        /* Decode the key assuming it is an ED448 private key. */
+        ret = wc_Ed448PrivateKeyDecode(ssl->buffers.key->buffer, &idx,
+                                       (ed448_key*)ssl->hsKey,
+                                       ssl->buffers.key->length);
+        if (ret == 0) {
+            WOLFSSL_MSG("Using ED448 private key");
+
+            /* Check it meets the minimum ECC key size requirements. */
+            if (ED448_KEY_SIZE < ssl->options.minEccKeySz) {
+                WOLFSSL_MSG("ED448 key size too small");
+                ERROR_OUT(ECC_KEY_SIZE_E, exit_dpk);
+            }
+
+            /* Return the maximum signature length. */
+            *length = ED448_SIG_SIZE;
+
+            goto exit_dpk;
+        }
+    }
+#endif /* HAVE_ED448 */
 
     (void)idx;
     (void)keySz;
@@ -16646,7 +19558,10 @@
             if (ticket == NULL) return MEMORY_E;
 
             ret = TLSX_UseSessionTicket(&ssl->extensions, ticket, ssl->heap);
-            if (ret != WOLFSSL_SUCCESS) return ret;
+            if (ret != WOLFSSL_SUCCESS) {
+                TLSX_SessionTicket_Free(ticket, ssl->heap);
+                return ret;
+            }
 
             idSz = 0;
         }
@@ -16773,7 +19688,7 @@
 
             if (IsAtLeastTLSv1_2(ssl)) {
                 if (ssl->suites->hashSigAlgoSz) {
-                    int i;
+                    word16 i;
                     /* extension type */
                     c16toa(HELLO_EXT_SIG_ALGO, output + idx);
                     idx += HELLO_EXT_TYPE_SZ;
@@ -16784,7 +19699,7 @@
                     /* sig algos length */
                     c16toa(ssl->suites->hashSigAlgoSz, output + idx);
                     idx += HELLO_EXT_SIGALGO_SZ;
-                    for (i = 0; i < ssl->suites->hashSigAlgoSz; i++, idx++) {
+                    for (i=0; i < ssl->suites->hashSigAlgoSz; i++, idx++) {
                         output[idx] = ssl->suites->hashSigAlgo[i];
                     }
                 }
@@ -16817,6 +19732,10 @@
                 return sendSz;
         } else {
             #ifdef WOLFSSL_DTLS
+                if (IsDtlsNotSctpMode(ssl)) {
+                    if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                        return ret;
+                }
                 if (ssl->options.dtls)
                     DtlsSEQIncrement(ssl, CUR_ORDER);
             #endif
@@ -16825,18 +19744,11 @@
                 return ret;
         }
 
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                    return ret;
-            }
-        #endif
-
         ssl->options.clientState = CLIENT_HELLO_COMPLETE;
 #ifdef OPENSSL_EXTRA
         ssl->cbmode = SSL_CB_MODE_WRITE;
-		if (ssl->CBIS != NULL)
-			ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
+        if (ssl->CBIS != NULL)
+            ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
 #endif
 
 #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
@@ -16876,7 +19788,7 @@
         }
 #endif
 
-        if ((*inOutIdx - begin) + OPAQUE16_LEN + OPAQUE8_LEN > size)
+        if (OPAQUE16_LEN + OPAQUE8_LEN > size)
             return BUFFER_ERROR;
 
         XMEMCPY(&pv, input + *inOutIdx, OPAQUE16_LEN);
@@ -16938,17 +19850,12 @@
      */
     int CheckVersion(WOLFSSL *ssl, ProtocolVersion pv)
     {
-#ifdef WOLFSSL_TLS13
-    #ifndef WOLFSSL_TLS13_FINAL
-        /* TODO: [TLS13] Remove this.
-         * Translate the draft TLS v1.3 version to final version.
-         */
+    #ifdef WOLFSSL_TLS13_DRAFT
         if (pv.major == TLS_DRAFT_MAJOR) {
             pv.major = SSLv3_MAJOR;
             pv.minor = TLSv1_3_MINOR;
         }
     #endif
-#endif
 
         #ifdef OPENSSL_EXTRA
         if (ssl->CBIS != NULL) {
@@ -17129,6 +20036,24 @@
         ssl->options.cipherSuite  = cs1;
         compression = input[i++];
 
+#ifndef WOLFSSL_NO_STRICT_CIPHER_SUITE
+        {
+            word32 idx, found = 0;
+            /* confirm server_hello cipher suite is one sent in client_hello */
+            for (idx = 0; idx < ssl->suites->suiteSz; idx += 2) {
+                if (ssl->suites->suites[idx]   == cs0 &&
+                    ssl->suites->suites[idx+1] == cs1) {
+                    found = 1;
+                    break;
+                }
+            }
+            if (!found) {
+                WOLFSSL_MSG("ServerHello did not use cipher suite from ClientHello");
+                return MATCH_SUITE_ERROR;
+            }
+        }
+#endif /* !WOLFSSL_NO_STRICT_CIPHER_SUITE */
+
         if (compression != NO_COMPRESSION && !ssl->options.usingCompression) {
             WOLFSSL_MSG("Server forcing compression w/o support");
             return COMPRESSION_ERROR;
@@ -17155,8 +20080,8 @@
                 if ((i - begin) + totalExtSz > helloSz)
                     return BUFFER_ERROR;
 
-                if ((ret = TLSX_Parse(ssl, (byte *) input + i,
-                                                          totalExtSz, 0, NULL)))
+                if ((ret = TLSX_Parse(ssl, (byte *) input + i, totalExtSz,
+                                                           server_hello, NULL)))
                     return ret;
 
                 i += totalExtSz;
@@ -17232,6 +20157,12 @@
 
         if (IsEncryptionOn(ssl, 0)) {
             *inOutIdx += ssl->keys.padSz;
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMWrite &&
+                                              ssl->specs.cipher_type == block) {
+                *inOutIdx += MacSize(ssl);
+            }
+        #endif
         }
 
 #ifdef HAVE_SECRET_CALLBACK
@@ -17252,6 +20183,30 @@
         return ret;
     }
 
+#ifdef WOLFSSL_TLS13
+    /* returns 1 if able to do TLS 1.3 otherwise 0 */
+    static int TLSv1_3_Capable(WOLFSSL* ssl)
+    {
+    #ifndef WOLFSSL_TLS13
+        return 0;
+    #else
+        int ret = 0;
+
+        if (IsAtLeastTLSv1_3(ssl->ctx->method->version)) {
+            ret = 1;
+        }
+
+        #ifdef OPENSSL_EXTRA
+        if ((wolfSSL_get_options(ssl) & SSL_OP_NO_TLSv1_3)) {
+            /* option set at run time to disable TLS 1.3 */
+            ret = 0;
+        }
+        #endif
+        return ret;
+    #endif
+    }
+#endif /* WOLFSSL_TLS13 */
+
     int CompleteServerHello(WOLFSSL* ssl)
     {
         int ret;
@@ -17261,7 +20216,7 @@
                                                          TLS13_DOWNGRADE_SZ - 1;
             byte  vers = ssl->arrays->serverRandom[RAN_LEN - 1];
     #ifdef WOLFSSL_TLS13
-            if (IsAtLeastTLSv1_3(ssl->ctx->method->version)) {
+            if (TLSv1_3_Capable(ssl)) {
                 /* TLS v1.3 capable client not allowed to downgrade when
                  * connecting to TLS v1.3 capable server unless cipher suite
                  * demands it.
@@ -17362,6 +20317,11 @@
     {
         word16 len;
         word32 begin = *inOutIdx;
+    #ifdef OPENSSL_EXTRA
+        int ret;
+        WOLFSSL_X509* x509 = NULL;
+        WOLFSSL_EVP_PKEY* pkey = NULL;
+    #endif
 
         WOLFSSL_START(WC_FUNC_CERTIFICATE_REQUEST_DO);
         WOLFSSL_ENTER("DoCertificateRequest");
@@ -17373,7 +20333,7 @@
                 AddLateName("CertificateRequest", &ssl->timeoutInfo);
         #endif
 
-        if ((*inOutIdx - begin) + OPAQUE8_LEN > size)
+        if (OPAQUE8_LEN > size)
             return BUFFER_ERROR;
 
         len = input[(*inOutIdx)++];
@@ -17395,7 +20355,19 @@
             if ((*inOutIdx - begin) + len > size)
                 return BUFFER_ERROR;
 
-            PickHashSigAlgo(ssl, input + *inOutIdx, len);
+            if (PickHashSigAlgo(ssl, input + *inOutIdx, len) != 0 &&
+                                             ssl->buffers.certificate &&
+                                             ssl->buffers.certificate->buffer) {
+            #ifdef HAVE_PK_CALLBACKS
+                if (wolfSSL_CTX_IsPrivatePkSet(ssl->ctx)) {
+                    WOLFSSL_MSG("Using PK for client private key");
+                    return INVALID_PARAMETER;
+                }
+            #endif
+                if (ssl->buffers.key && ssl->buffers.key->buffer) {
+                    return INVALID_PARAMETER;
+                }
+            }
             *inOutIdx += len;
     #ifdef WC_RSA_PSS
             ssl->pssAlgo = 0;
@@ -17430,6 +20402,26 @@
             len -= OPAQUE16_LEN + dnSz;
         }
 
+    #ifdef OPENSSL_EXTRA
+        /* call client cert callback if no cert has been loaded */
+        if ((ssl->ctx->CBClientCert != NULL) &&
+            (!ssl->buffers.certificate || !ssl->buffers.certificate->buffer)) {
+
+            ret = ssl->ctx->CBClientCert(ssl, &x509, &pkey);
+            if (ret == 1) {
+                if ((wolfSSL_use_certificate(ssl, x509) != WOLFSSL_SUCCESS) ||
+                    (wolfSSL_use_PrivateKey(ssl, pkey) != WOLFSSL_SUCCESS)) {
+                    return CLIENT_CERT_CB_ERROR;
+                }
+                wolfSSL_X509_free(x509);
+                wolfSSL_EVP_PKEY_free(pkey);
+
+            } else if (ret < 0) {
+                return WOLFSSL_ERROR_WANT_X509_LOOKUP;
+            }
+        }
+    #endif
+
         /* don't send client cert or cert verify if user hasn't provided
            cert and private key */
         if (ssl->buffers.certificate && ssl->buffers.certificate->buffer) {
@@ -17443,17 +20435,22 @@
                 ssl->options.sendVerify = SEND_CERT;
             }
         }
-	#ifdef OPENSSL_EXTRA
-		else
-	#else
+    #ifdef OPENSSL_EXTRA
+        else
+    #else
         else if (IsTLS(ssl))
-	#endif
+    #endif
         {
             ssl->options.sendVerify = SEND_BLANK_CERT;
         }
 
-        if (IsEncryptionOn(ssl, 0))
+        if (IsEncryptionOn(ssl, 0)) {
             *inOutIdx += ssl->keys.padSz;
+        #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            if (ssl->options.startedETMRead)
+                *inOutIdx += MacSize(ssl);
+        #endif
+        }
 
         WOLFSSL_LEAVE("DoCertificateRequest", 0);
         WOLFSSL_END(WC_FUNC_CERTIFICATE_REQUEST_DO);
@@ -17463,7 +20460,7 @@
 #endif /* !NO_CERTS */
 
 
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
 
     static int CheckCurveId(int tlsCurveId)
     {
@@ -17497,10 +20494,10 @@
             case WOLFSSL_ECC_SECP224K1: return ECC_SECP224K1_OID;
         #endif /* HAVE_ECC_KOBLITZ */
     #endif
-    #if !defined(NO_ECC256)  || defined(HAVE_ALL_CURVES)
         #ifdef HAVE_CURVE25519
             case WOLFSSL_ECC_X25519: return ECC_X25519_OID;
         #endif
+    #if !defined(NO_ECC256)  || defined(HAVE_ALL_CURVES)
         #ifndef NO_ECC_SECP
             case WOLFSSL_ECC_SECP256R1: return ECC_SECP256R1_OID;
         #endif /* !NO_ECC_SECP */
@@ -17511,6 +20508,9 @@
             case WOLFSSL_ECC_BRAINPOOLP256R1: return ECC_BRAINPOOLP256R1_OID;
         #endif /* HAVE_ECC_BRAINPOOL */
     #endif
+        #ifdef HAVE_CURVE448
+            case WOLFSSL_ECC_X448: return ECC_X448_OID;
+        #endif
     #if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
         #ifndef NO_ECC_SECP
             case WOLFSSL_ECC_SECP384R1: return ECC_SECP384R1_OID;
@@ -17536,21 +20536,25 @@
 
 #endif /* HAVE_ECC */
 
-
 /* Persistable DoServerKeyExchange arguments */
 typedef struct DskeArgs {
     byte*  output; /* not allocated */
-#if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
     byte*  verifySig;
 #endif
     word32 idx;
     word32 begin;
-#if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
     word16 verifySigSz;
 #endif
     word16 sigSz;
     byte   sigAlgo;
     byte   hashAlgo;
+#if !defined(NO_RSA) && defined(WC_RSA_PSS)
+    int    bits;
+#endif
 } DskeArgs;
 
 static void FreeDskeArgs(WOLFSSL* ssl, void* pArgs)
@@ -17560,7 +20564,8 @@
     (void)ssl;
     (void)args;
 
-#if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
     if (args->verifySig) {
         XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_SIGNATURE);
         args->verifySig = NULL;
@@ -17568,6 +20573,172 @@
 #endif
 }
 
+#ifndef NO_DH
+static int GetDhPublicKey(WOLFSSL* ssl, const byte* input, word32 size,
+                          DskeArgs* args)
+{
+    int             ret = 0;
+    word16          length;
+#ifdef HAVE_FFDHE
+    const DhParams* params = NULL;
+    int             group = 0;
+#endif
+
+    ssl->buffers.weOwnDH = 1;
+
+    ssl->buffers.serverDH_P.buffer = NULL;
+    ssl->buffers.serverDH_G.buffer = NULL;
+    ssl->buffers.serverDH_Pub.buffer = NULL;
+
+    /* p */
+    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
+        ERROR_OUT(BUFFER_ERROR, exit_gdpk);
+    }
+
+    ato16(input + args->idx, &length);
+    args->idx += OPAQUE16_LEN;
+
+    if ((args->idx - args->begin) + length > size) {
+        ERROR_OUT(BUFFER_ERROR, exit_gdpk);
+    }
+
+    if (length < ssl->options.minDhKeySz) {
+        WOLFSSL_MSG("Server using a DH key that is too small");
+        SendAlert(ssl, alert_fatal, handshake_failure);
+        ERROR_OUT(DH_KEY_SIZE_E, exit_gdpk);
+    }
+    if (length > ssl->options.maxDhKeySz) {
+        WOLFSSL_MSG("Server using a DH key that is too big");
+        SendAlert(ssl, alert_fatal, handshake_failure);
+        ERROR_OUT(DH_KEY_SIZE_E, exit_gdpk);
+    }
+
+    ssl->buffers.serverDH_P.buffer =
+        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    if (ssl->buffers.serverDH_P.buffer) {
+        ssl->buffers.serverDH_P.length = length;
+    }
+    else {
+        ERROR_OUT(MEMORY_ERROR, exit_gdpk);
+    }
+
+    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
+                                                        length);
+    args->idx += length;
+
+    ssl->options.dhKeySz = length;
+
+    /* g */
+    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
+        ERROR_OUT(BUFFER_ERROR, exit_gdpk);
+    }
+
+    ato16(input + args->idx, &length);
+    args->idx += OPAQUE16_LEN;
+
+    if ((args->idx - args->begin) + length > size) {
+        ERROR_OUT(BUFFER_ERROR, exit_gdpk);
+    }
+
+    ssl->buffers.serverDH_G.buffer =
+        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    if (ssl->buffers.serverDH_G.buffer) {
+        ssl->buffers.serverDH_G.length = length;
+    }
+    else {
+        ERROR_OUT(MEMORY_ERROR, exit_gdpk);
+    }
+
+    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
+                                                        length);
+    args->idx += length;
+
+    /* pub */
+    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
+        ERROR_OUT(BUFFER_ERROR, exit_gdpk);
+    }
+
+    ato16(input + args->idx, &length);
+    args->idx += OPAQUE16_LEN;
+
+    if ((args->idx - args->begin) + length > size) {
+        ERROR_OUT(BUFFER_ERROR, exit_gdpk);
+    }
+
+    ssl->buffers.serverDH_Pub.buffer =
+        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    if (ssl->buffers.serverDH_Pub.buffer) {
+        ssl->buffers.serverDH_Pub.length = length;
+    }
+    else {
+        ERROR_OUT(MEMORY_ERROR, exit_gdpk);
+    }
+
+    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
+                                                        length);
+    args->idx += length;
+
+#ifdef HAVE_FFDHE
+    switch (ssl->options.dhKeySz) {
+    #ifdef HAVE_FFDHE_2048
+        case 2048/8:
+            params = wc_Dh_ffdhe2048_Get();
+            group = WOLFSSL_FFDHE_2048;
+            break;
+    #endif
+    #ifdef HAVE_FFDHE_3072
+        case 3072/8:
+            params = wc_Dh_ffdhe3072_Get();
+            group = WOLFSSL_FFDHE_3072;
+            break;
+    #endif
+    #ifdef HAVE_FFDHE_4096
+        case 4096/8:
+            params = wc_Dh_ffdhe4096_Get();
+            group = WOLFSSL_FFDHE_4096;
+            break;
+    #endif
+    #ifdef HAVE_FFDHE_6144
+        case 6144/8:
+            params = wc_Dh_ffdhe6144_Get();
+            group = WOLFSSL_FFDHE_6144;
+            break;
+    #endif
+    #ifdef HAVE_FFDHE_8192
+        case 8192/8:
+            params = wc_Dh_ffdhe8192_Get();
+            group = WOLFSSL_FFDHE_8192;
+            break;
+    #endif
+        default:
+            break;
+    }
+
+    if (params == NULL || params->g_len != ssl->buffers.serverDH_G.length ||
+            (XMEMCMP(ssl->buffers.serverDH_G.buffer, params->g,
+                    params->g_len) != 0) ||
+            (XMEMCMP(ssl->buffers.serverDH_P.buffer, params->p,
+                    params->p_len) != 0)) {
+        WOLFSSL_MSG("Server not using FFDHE parameters");
+    #ifdef WOLFSSL_REQUIRE_FFDHE
+        SendAlert(ssl, alert_fatal, handshake_failure);
+        ERROR_OUT(DH_PARAMS_NOT_FFDHE_E, exit_gdpk);
+    #endif
+    }
+    else {
+        ssl->namedGroup = group;
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+        !defined(HAVE_SELFTEST)
+        ssl->options.dhDoKeyTest = 0;
+    #endif
+    }
+#endif /* HAVE_FFDHE */
+
+exit_gdpk:
+    return ret;
+}
+#endif
+
 /* handle processing of server_key_exchange (12) */
 static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                word32* inOutIdx, word32 size)
@@ -17652,101 +20823,14 @@
             #ifndef NO_DH
                 case diffie_hellman_kea:
                 {
-                    word16 length;
-
-                    /* p */
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ato16(input + args->idx, &length);
-                    args->idx += OPAQUE16_LEN;
-
-                    if ((args->idx - args->begin) + length > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    if (length < ssl->options.minDhKeySz) {
-                        WOLFSSL_MSG("Server using a DH key that is too small");
-                        SendAlert(ssl, alert_fatal, handshake_failure);
-                        ERROR_OUT(DH_KEY_SIZE_E, exit_dske);
-                    }
-                    if (length > ssl->options.maxDhKeySz) {
-                        WOLFSSL_MSG("Server using a DH key that is too big");
-                        SendAlert(ssl, alert_fatal, handshake_failure);
-                        ERROR_OUT(DH_KEY_SIZE_E, exit_dske);
-                    }
-
-                    ssl->buffers.serverDH_P.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    if (ssl->buffers.serverDH_P.buffer) {
-                        ssl->buffers.serverDH_P.length = length;
-                    }
-                    else {
-                        ERROR_OUT(MEMORY_ERROR, exit_dske);
-                    }
-
-                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
-                                                                        length);
-                    args->idx += length;
-
-                    ssl->options.dhKeySz = length;
-
-                    /* g */
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ato16(input + args->idx, &length);
-                    args->idx += OPAQUE16_LEN;
-
-                    if ((args->idx - args->begin) + length > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ssl->buffers.serverDH_G.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    if (ssl->buffers.serverDH_G.buffer) {
-                        ssl->buffers.serverDH_G.length = length;
-                    }
-                    else {
-                        ERROR_OUT(MEMORY_ERROR, exit_dske);
-                    }
-
-                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
-                                                                        length);
-                    args->idx += length;
-
-                    ssl->buffers.weOwnDH = 1;
-
-                    /* pub */
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ato16(input + args->idx, &length);
-                    args->idx += OPAQUE16_LEN;
-
-                    if ((args->idx - args->begin) + length > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ssl->buffers.serverDH_Pub.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    if (ssl->buffers.serverDH_Pub.buffer) {
-                        ssl->buffers.serverDH_Pub.length = length;
-                    }
-                    else {
-                        ERROR_OUT(MEMORY_ERROR, exit_dske);
-                    }
-
-                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
-                                                                        length);
-                    args->idx += length;
+                    ret = GetDhPublicKey(ssl, input, size, args);
+                    if (ret != 0)
+                        goto exit_dske;
                     break;
                 }
             #endif /* !NO_DH */
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                 case ecc_diffie_hellman_kea:
                 {
                     byte b;
@@ -17772,6 +20856,9 @@
                         ERROR_OUT(ECC_CURVE_ERROR, exit_dske);
                     }
                     ssl->ecdhCurveOID = curveOid;
+                #if defined(WOLFSSL_TLS13) || defined(HAVE_FFDHE)
+                    ssl->namedGroup = 0;
+                #endif
 
                     length = input[args->idx++];
                     if ((args->idx - args->begin) + length > size) {
@@ -17795,6 +20882,21 @@
                             }
                         }
 
+                        if ((ret = wc_curve25519_check_public(
+                                input + args->idx, length,
+                                EC25519_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            if (ret == BUFFER_E)
+                                SendAlert(ssl, alert_fatal, decode_error);
+                            else if (ret == ECC_OUT_OF_RANGE_E)
+                                SendAlert(ssl, alert_fatal, bad_record_mac);
+                            else {
+                                SendAlert(ssl, alert_fatal, illegal_parameter);
+                            }
+                        #endif
+                            ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
+                        }
+
                         if (wc_curve25519_import_public_ex(input + args->idx,
                                 length, ssl->peerX25519Key,
                                 EC25519_LITTLE_ENDIAN) != 0) {
@@ -17806,6 +20908,49 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                        if (ssl->peerX448Key == NULL) {
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                           (void**)&ssl->peerX448Key);
+                            if (ret != 0) {
+                                goto exit_dske;
+                            }
+                        } else if (ssl->peerX448KeyPresent) {
+                            ret = ReuseKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                           ssl->peerX448Key);
+                            ssl->peerX448KeyPresent = 0;
+                            if (ret != 0) {
+                                goto exit_dske;
+                            }
+                        }
+
+                        if ((ret = wc_curve448_check_public(
+                                input + args->idx, length,
+                                EC448_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            if (ret == BUFFER_E)
+                                SendAlert(ssl, alert_fatal, decode_error);
+                            else if (ret == ECC_OUT_OF_RANGE_E)
+                                SendAlert(ssl, alert_fatal, bad_record_mac);
+                            else {
+                                SendAlert(ssl, alert_fatal, illegal_parameter);
+                            }
+                        #endif
+                            ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
+                        }
+
+                        if (wc_curve448_import_public_ex(input + args->idx,
+                                length, ssl->peerX448Key,
+                                EC448_LITTLE_ENDIAN) != 0) {
+                            ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
+                        }
+
+                        args->idx += length;
+                        ssl->peerX448KeyPresent = 1;
+                        break;
+                    }
+                #endif
                 #ifdef HAVE_ECC
                     if (ssl->peerEccKey == NULL) {
                         ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
@@ -17824,15 +20969,18 @@
                     curveId = wc_ecc_get_oid(curveOid, NULL, NULL);
                     if (wc_ecc_import_x963_ex(input + args->idx, length,
                                         ssl->peerEccKey, curveId) != 0) {
+                    #ifdef WOLFSSL_EXTRA_ALERTS
+                        SendAlert(ssl, alert_fatal, illegal_parameter);
+                    #endif
                         ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
                     }
 
                     args->idx += length;
                     ssl->peerEccKeyPresent = 1;
-                    break;
-                #endif
-                }
-            #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif
+                    break;
+                }
+            #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
             #if !defined(NO_DH) && !defined(NO_PSK)
                 case dhe_psk_kea:
                 {
@@ -17857,100 +21005,14 @@
                     ssl->arrays->server_hint[srvHintLen] = '\0'; /* null term */
                     args->idx += length;
 
-                    /* p */
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ato16(input + args->idx, &length);
-                    args->idx += OPAQUE16_LEN;
-
-                    if ((args->idx - args->begin) + length > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    if (length < ssl->options.minDhKeySz) {
-                        WOLFSSL_MSG("Server using a DH key that is too small");
-                        SendAlert(ssl, alert_fatal, handshake_failure);
-                        ERROR_OUT(DH_KEY_SIZE_E, exit_dske);
-                    }
-                    if (length > ssl->options.maxDhKeySz) {
-                        WOLFSSL_MSG("Server using a DH key that is too big");
-                        SendAlert(ssl, alert_fatal, handshake_failure);
-                        ERROR_OUT(DH_KEY_SIZE_E, exit_dske);
-                    }
-
-                    ssl->buffers.serverDH_P.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    if (ssl->buffers.serverDH_P.buffer) {
-                        ssl->buffers.serverDH_P.length = length;
-                    }
-                    else {
-                        ERROR_OUT(MEMORY_ERROR, exit_dske);
-                    }
-
-                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
-                                                                        length);
-                    args->idx += length;
-
-                    ssl->options.dhKeySz = length;
-
-                    /* g */
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ato16(input + args->idx, &length);
-                    args->idx += OPAQUE16_LEN;
-
-                    if ((args->idx - args->begin) + length > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ssl->buffers.serverDH_G.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    if (ssl->buffers.serverDH_G.buffer) {
-                        ssl->buffers.serverDH_G.length = length;
-                    }
-                    else {
-                        ERROR_OUT(MEMORY_ERROR, exit_dske);
-                    }
-
-                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
-                                                                        length);
-                    args->idx += length;
-
-                    ssl->buffers.weOwnDH = 1;
-
-                    /* pub */
-                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ato16(input + args->idx, &length);
-                    args->idx += OPAQUE16_LEN;
-
-                    if ((args->idx - args->begin) + length > size) {
-                        ERROR_OUT(BUFFER_ERROR, exit_dske);
-                    }
-
-                    ssl->buffers.serverDH_Pub.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    if (ssl->buffers.serverDH_Pub.buffer) {
-                        ssl->buffers.serverDH_Pub.length = length;
-                    }
-                    else {
-                        ERROR_OUT(MEMORY_ERROR, exit_dske);
-                    }
-
-                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
-                                                                        length);
-                    args->idx += length;
-                    break;
-                }
-            #endif /* !NO_DH || !NO_PSK */
-            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                    ret = GetDhPublicKey(ssl, input, size, args);
+                    if (ret != 0)
+                        goto exit_dske;
+                    break;
+                }
+            #endif /* !NO_DH && !NO_PSK */
+            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                 {
                     byte b;
@@ -18016,6 +21078,21 @@
                             }
                         }
 
+                        if ((ret = wc_curve25519_check_public(
+                                input + args->idx, length,
+                                EC25519_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            if (ret == BUFFER_E)
+                                SendAlert(ssl, alert_fatal, decode_error);
+                            else if (ret == ECC_OUT_OF_RANGE_E)
+                                SendAlert(ssl, alert_fatal, bad_record_mac);
+                            else {
+                                SendAlert(ssl, alert_fatal, illegal_parameter);
+                            }
+                        #endif
+                            ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
+                        }
+
                         if (wc_curve25519_import_public_ex(input + args->idx,
                                 length, ssl->peerX25519Key,
                                 EC25519_LITTLE_ENDIAN) != 0) {
@@ -18027,6 +21104,49 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                        if (ssl->peerX448Key == NULL) {
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                           (void**)&ssl->peerX448Key);
+                            if (ret != 0) {
+                                goto exit_dske;
+                            }
+                        } else if (ssl->peerEccKeyPresent) {
+                            ret = ReuseKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                           ssl->peerX448Key);
+                            ssl->peerX448KeyPresent = 0;
+                            if (ret != 0) {
+                                goto exit_dske;
+                            }
+                        }
+
+                        if ((ret = wc_curve448_check_public(
+                                input + args->idx, length,
+                                EC448_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            if (ret == BUFFER_E)
+                                SendAlert(ssl, alert_fatal, decode_error);
+                            else if (ret == ECC_OUT_OF_RANGE_E)
+                                SendAlert(ssl, alert_fatal, bad_record_mac);
+                            else {
+                                SendAlert(ssl, alert_fatal, illegal_parameter);
+                            }
+                        #endif
+                            ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
+                        }
+
+                        if (wc_curve448_import_public_ex(input + args->idx,
+                                length, ssl->peerX448Key,
+                                EC448_LITTLE_ENDIAN) != 0) {
+                            ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
+                        }
+
+                        args->idx += length;
+                        ssl->peerX448KeyPresent = 1;
+                        break;
+                    }
+                #endif
 
                     if (ssl->peerEccKey == NULL) {
                         ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
@@ -18052,7 +21172,7 @@
                     ssl->peerEccKeyPresent = 1;
                     break;
                 }
-            #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+            #endif /* (HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448) && !NO_PSK */
                 default:
                     ret = BAD_KEA_TYPE_E;
             } /* switch(ssl->specs.kea) */
@@ -18082,7 +21202,8 @@
                 case diffie_hellman_kea:
                 case ecc_diffie_hellman_kea:
                 {
-            #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(HAVE_ED25519)
+            #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(HAVE_ED25519) \
+                                                     && !defined(HAVE_ED448)
                     ERROR_OUT(NOT_COMPILED_IN, exit_dske);
             #else
                     enum wc_HashType hashType;
@@ -18208,12 +21329,21 @@
                             break;
                         }
                     #endif /* HAVE_ED25519 */
+                    #if defined(HAVE_ED448)
+                        case ed448_sa_algo:
+                        {
+                            if (!ssl->peerEd448KeyPresent) {
+                                ERROR_OUT(NO_PEER_KEY, exit_dske);
+                            }
+                            break;
+                        }
+                    #endif /* HAVE_ED448 */
 
                     default:
                         ret = ALGO_ID_E;
                     } /* switch (args->sigAlgo) */
 
-            #endif /* NO_DH && !HAVE_ECC && !HAVE_ED25519 */
+            #endif /* NO_DH && !HAVE_ECC && !HAVE_ED25519 && !HAVE_ED448 */
                     break;
                 }
                 default:
@@ -18245,7 +21375,8 @@
                 case diffie_hellman_kea:
                 case ecc_diffie_hellman_kea:
                 {
-            #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(HAVE_ED25519)
+            #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(HAVE_ED25519) \
+                                                     && !defined(HAVE_ED448)
                     ERROR_OUT(NOT_COMPILED_IN, exit_dske);
             #else
                     if (ssl->options.usingAnon_cipher) {
@@ -18284,8 +21415,20 @@
 
                             if (ret >= 0) {
                                 args->sigSz = (word16)ret;
+                            #ifdef WC_RSA_PSS
+                                args->bits = mp_count_bits(&ssl->peerRsaKey->n);
+                            #endif
                                 ret = 0;
                             }
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            if (ret != WC_PENDING_E)
+                        #endif
+                            {
+                                /* peerRsaKey */
+                                FreeKey(ssl, DYNAMIC_TYPE_RSA,
+                                                      (void**)&ssl->peerRsaKey);
+                                ssl->peerRsaKeyPresent = 0;
+                            }
                             break;
                         }
                     #endif /* !NO_RSA */
@@ -18304,10 +21447,15 @@
                             #endif
                             );
 
-                            /* peerEccDsaKey */
-                            FreeKey(ssl, DYNAMIC_TYPE_ECC,
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            if (ret != WC_PENDING_E)
+                        #endif
+                            {
+                                /* peerEccDsaKey */
+                                FreeKey(ssl, DYNAMIC_TYPE_ECC,
                                                    (void**)&ssl->peerEccDsaKey);
-                            ssl->peerEccDsaKeyPresent = 0;
+                                ssl->peerEccDsaKeyPresent = 0;
+                            }
                             break;
                         }
                     #endif /* HAVE_ECC */
@@ -18326,18 +21474,50 @@
                             #endif
                             );
 
-                            /* peerEccDsaKey */
-                            FreeKey(ssl, DYNAMIC_TYPE_ED25519,
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            if (ret != WC_PENDING_E)
+                        #endif
+                            {
+                                /* peerEccDsaKey */
+                                FreeKey(ssl, DYNAMIC_TYPE_ED25519,
                                                   (void**)&ssl->peerEd25519Key);
-                            ssl->peerEd25519KeyPresent = 0;
+                                ssl->peerEd25519KeyPresent = 0;
+                            }
                             break;
                         }
                     #endif /* HAVE_ED25519 */
+                    #if defined(HAVE_ED448)
+                        case ed448_sa_algo:
+                        {
+                            ret = Ed448Verify(ssl,
+                                args->verifySig, args->verifySigSz,
+                                ssl->buffers.sig.buffer,
+                                ssl->buffers.sig.length,
+                                ssl->peerEd448Key,
+                            #ifdef HAVE_PK_CALLBACKS
+                                &ssl->buffers.peerEd448Key
+                            #else
+                                NULL
+                            #endif
+                            );
+
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            if (ret != WC_PENDING_E)
+                        #endif
+                            {
+                                /* peerEccDsaKey */
+                                FreeKey(ssl, DYNAMIC_TYPE_ED448,
+                                                    (void**)&ssl->peerEd448Key);
+                                ssl->peerEd448KeyPresent = 0;
+                            }
+                            break;
+                        }
+                    #endif /* HAVE_ED448 */
 
                     default:
                         ret = ALGO_ID_E;
                     } /* switch (sigAlgo) */
-            #endif /* NO_DH && !HAVE_ECC && !HAVE_ED25519 */
+            #endif /* NO_DH && !HAVE_ECC && !HAVE_ED25519 && !HAVE_ED448 */
                     break;
                 }
                 default:
@@ -18369,7 +21549,8 @@
                 case diffie_hellman_kea:
                 case ecc_diffie_hellman_kea:
                 {
-            #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(HAVE_ED25519)
+            #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(HAVE_ED25519) \
+                                                     && !defined(HAVE_ED448)
                     ERROR_OUT(NOT_COMPILED_IN, exit_dske);
             #else
                     if (ssl->options.usingAnon_cipher) {
@@ -18384,11 +21565,20 @@
                     #ifndef NO_RSA
                     #ifdef WC_RSA_PSS
                         case rsa_pss_sa_algo:
+                        #ifdef HAVE_SELFTEST
                             ret = wc_RsaPSS_CheckPadding(
                                              ssl->buffers.digest.buffer,
                                              ssl->buffers.digest.length,
                                              args->output, args->sigSz,
                                              HashAlgoToType(args->hashAlgo));
+                        #else
+                            ret = wc_RsaPSS_CheckPadding_ex(
+                                             ssl->buffers.digest.buffer,
+                                             ssl->buffers.digest.length,
+                                             args->output, args->sigSz,
+                                             HashAlgoToType(args->hashAlgo),
+                                             -1, args->bits);
+                        #endif
                             if (ret != 0)
                                 return ret;
                             break;
@@ -18397,7 +21587,7 @@
                         {
                             if (IsAtLeastTLSv1_2(ssl)) {
                             #ifdef WOLFSSL_SMALL_STACK
-                                byte*  encodedSig = NULL;
+                                byte*  encodedSig;
                             #else
                                 byte   encodedSig[MAX_ENCODED_SIG_SZ];
                             #endif
@@ -18447,10 +21637,15 @@
                             /* Nothing to do in this algo */
                             break;
                     #endif /* HAVE_ED25519 */
+                    #if defined(HAVE_ED448)
+                        case ed448_sa_algo:
+                            /* Nothing to do in this algo */
+                            break;
+                    #endif /* HAVE_ED448 */
                         default:
                             ret = ALGO_ID_E;
                     } /* switch (sigAlgo) */
-            #endif /* NO_DH && !HAVE_ECC && !HAVE_ED25519 */
+            #endif /* NO_DH && !HAVE_ECC && !HAVE_ED25519 && !HAVE_ED448 */
                     break;
                 }
                 default:
@@ -18471,6 +21666,10 @@
         {
             if (IsEncryptionOn(ssl, 0)) {
                 args->idx += ssl->keys.padSz;
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                if (ssl->options.startedETMRead)
+                    args->idx += MacSize(ssl);
+            #endif
             }
 
             /* QSH extensions */
@@ -18524,7 +21723,7 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     /* Handle async operation */
     if (ret == WC_PENDING_E) {
-        /* Mark message as not recevied so it can process again */
+        /* Mark message as not received so it can process again */
         ssl->msgsReceived.got_server_key_exchange = 0;
 
         return ret;
@@ -18637,7 +21836,7 @@
 
 int QSH_Init(WOLFSSL* ssl)
 {
-    /* check so not initialising twice when running DTLS */
+    /* check so not initializing twice when running DTLS */
     if (ssl->QSH_secret != NULL)
         return 0;
 
@@ -18793,7 +21992,7 @@
     int offset   = 0;
     word32 tmpSz = 0;
     buffer* buf;
-    QSHKey* current = ssl->peerQSHKey;
+    QSHKey* current;
     QSHScheme* schmPre = NULL;
     QSHScheme* schm    = NULL;
 
@@ -18802,6 +22001,7 @@
 
     WOLFSSL_MSG("Generating QSH secret key material");
 
+    current = ssl->peerQSHKey;
     /* get size of buffer needed */
     while (current) {
         if (current->pub.length != 0) {
@@ -18883,11 +22083,12 @@
 static word32 QSH_KeyGetSize(WOLFSSL* ssl)
 {
     word32 sz = 0;
-    QSHKey* current = ssl->peerQSHKey;
+    QSHKey* current;
 
     if (ssl == NULL)
         return -1;
 
+    current = ssl->peerQSHKey;
     sz += OPAQUE16_LEN; /* type of extension ie 0x00 0x18 */
     sz += OPAQUE24_LEN;
     /* get size of buffer needed */
@@ -18963,10 +22164,10 @@
     WOLFSSL_ENTER("SendClientKeyExchange");
 
 #ifdef OPENSSL_EXTRA
-	ssl->options.clientState = CLIENT_KEYEXCHANGE_COMPLETE;
-	ssl->cbmode = SSL_CB_MODE_WRITE;
-	if (ssl->CBIS != NULL)
-		ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
+    ssl->options.clientState = CLIENT_KEYEXCHANGE_COMPLETE;
+    ssl->cbmode = SSL_CB_MODE_WRITE;
+    if (ssl->CBIS != NULL)
+        ssl->CBIS(ssl, SSL_CB_CONNECT_LOOP, SSL_SUCCESS);
 #endif
 
 #ifdef WOLFSSL_ASYNC_CRYPT
@@ -19034,8 +22235,8 @@
                     }
                     break;
             #endif /* !NO_DH && !NO_PSK */
-            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                     /* sanity check that PSK client callback has been set */
                     if (ssl->options.client_psk_cb == NULL) {
@@ -19069,6 +22270,32 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->peerX448KeyPresent) {
+                        /* Check client ECC public key */
+                        if (!ssl->peerX448Key) {
+                            ERROR_OUT(NO_PEER_KEY, exit_scke);
+                        }
+
+                    #ifdef HAVE_PK_CALLBACKS
+                        /* if callback then use it for shared secret */
+                        if (ssl->ctx->X448SharedSecretCb != NULL) {
+                            break;
+                        }
+                    #endif
+
+                        /* create private key */
+                        ssl->hsType = DYNAMIC_TYPE_CURVE448;
+                        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
+
+                        ret = X448MakeKey(ssl, (curve448_key*)ssl->hsKey,
+                                          ssl->peerX448Key);
+                        break;
+                    }
+                #endif
                     /* Check client ECC public key */
                     if (!ssl->peerEccKey || !ssl->peerEccKeyPresent ||
                                             !ssl->peerEccKey->dp) {
@@ -19092,7 +22319,7 @@
                     ret = EccMakeKey(ssl, (ecc_key*)ssl->hsKey, ssl->peerEccKey);
 
                     break;
-            #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+            #endif /* (HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448) && !NO_PSK */
             #ifdef HAVE_NTRU
                 case ntru_kea:
                     if (ssl->peerNtruKeyPresent == 0) {
@@ -19100,7 +22327,8 @@
                     }
                     break;
             #endif /* HAVE_NTRU */
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                 case ecc_diffie_hellman_kea:
                 {
                 #ifdef HAVE_ECC
@@ -19116,6 +22344,13 @@
                     }
                     else
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                        if (ssl->ctx->X448SharedSecretCb != NULL)
+                            break;
+                    }
+                    else
+                #endif
                     if (ssl->ctx->EccSharedSecretCb != NULL) {
                         break;
                     }
@@ -19139,18 +22374,34 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->peerX448KeyPresent) {
+                        if (!ssl->peerX448Key) {
+                            ERROR_OUT(NO_PEER_KEY, exit_scke);
+                        }
+
+                        /* create private key */
+                        ssl->hsType = DYNAMIC_TYPE_CURVE448;
+                        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
+
+                        ret = X448MakeKey(ssl, (curve448_key*)ssl->hsKey,
+                                          ssl->peerX448Key);
+                        break;
+                    }
+                #endif
                 #ifdef HAVE_ECC
                     if (ssl->specs.static_ecdh) {
                         /* Note: EccDsa is really fixed Ecc key here */
-                        if (!ssl->peerEccDsaKey || !ssl->peerEccDsaKeyPresent ||
-                                                   !ssl->peerEccDsaKey->dp) {
+                        if (!ssl->peerEccDsaKey || !ssl->peerEccDsaKeyPresent) {
                             ERROR_OUT(NO_PEER_KEY, exit_scke);
                         }
                         peerKey = ssl->peerEccDsaKey;
                     }
                     else {
-                        if (!ssl->peerEccKey || !ssl->peerEccKeyPresent ||
-                                                !ssl->peerEccKey->dp) {
+                        if (!ssl->peerEccKey || !ssl->peerEccKeyPresent) {
                             ERROR_OUT(NO_PEER_KEY, exit_scke);
                         }
                         peerKey = ssl->peerEccKey;
@@ -19171,7 +22422,7 @@
 
                     break;
                 }
-            #endif /* HAVE_ECC || HAVE_CURVE25519 */
+            #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
                 default:
                     ret = BAD_KEA_TYPE_E;
@@ -19195,6 +22446,15 @@
             if (args->encSecret == NULL) {
                 ERROR_OUT(MEMORY_E, exit_scke);
             }
+            if (ssl->arrays->preMasterSecret == NULL) {
+                ssl->arrays->preMasterSz = ENCRYPT_LEN;
+                ssl->arrays->preMasterSecret = (byte*)XMALLOC(ENCRYPT_LEN,
+                                                ssl->heap, DYNAMIC_TYPE_SECRET);
+                if (ssl->arrays->preMasterSecret == NULL) {
+                    ERROR_OUT(MEMORY_E, exit_scke);
+                }
+                XMEMSET(ssl->arrays->preMasterSecret, 0, ENCRYPT_LEN);
+            }
 
             switch(ssl->specs.kea)
             {
@@ -19202,16 +22462,30 @@
                 case rsa_kea:
                 {
                     /* build PreMasterSecret with RNG data */
-                    ret = wc_RNG_GenerateBlock(ssl->rng,
+                    #if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+                       !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+                    if (tsip_useable(ssl)) {
+                        ret = tsip_generatePremasterSecret(
                         &ssl->arrays->preMasterSecret[VERSION_SZ],
-                        SECRET_LEN - VERSION_SZ);
-                    if (ret != 0) {
-                        goto exit_scke;
-                    }
-
-                    ssl->arrays->preMasterSecret[0] = ssl->chVersion.major;
-                    ssl->arrays->preMasterSecret[1] = ssl->chVersion.minor;
+                        ENCRYPT_LEN - VERSION_SZ);
+                    } else {
+                    #endif
+                        ret = wc_RNG_GenerateBlock(ssl->rng,
+                            &ssl->arrays->preMasterSecret[VERSION_SZ],
+                            SECRET_LEN - VERSION_SZ);
+                    #if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+                       !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+                    }
+                    #endif
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
+
+                        ssl->arrays->preMasterSecret[0] = ssl->chVersion.major;
+                        ssl->arrays->preMasterSecret[1] = ssl->chVersion.minor;
+
                     ssl->arrays->preMasterSz = SECRET_LEN;
+
                     break;
                 }
             #endif /* !NO_RSA */
@@ -19231,18 +22505,38 @@
                         goto exit_scke;
                     }
 
-                    ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
-                        ssl->buffers.serverDH_P.buffer,
-                        ssl->buffers.serverDH_P.length,
-                        ssl->buffers.serverDH_G.buffer,
-                        ssl->buffers.serverDH_G.length);
-                    if (ret != 0) {
-                        goto exit_scke;
+                    #if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) && \
+                        !defined(WOLFSSL_OLD_PRIME_CHECK)
+                    if (ssl->options.dhDoKeyTest &&
+                        !ssl->options.dhKeyTested)
+                    {
+                        ret = wc_DhSetCheckKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length,
+                            NULL, 0, 0, ssl->rng);
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
+                        ssl->options.dhKeyTested = 1;
+                    }
+                    else
+                    #endif
+                    {
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length);
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
                     }
 
                     /* for DH, encSecret is Yc, agree is pre-master */
                     ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
-                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                        ssl->buffers.sig.buffer, (word32*)&ssl->buffers.sig.length,
                         args->encSecret, &args->encSz);
 
                     /* set the max agree result size */
@@ -19326,24 +22620,44 @@
                         goto exit_scke;
                     }
 
-                    ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
-                        ssl->buffers.serverDH_P.buffer,
-                        ssl->buffers.serverDH_P.length,
-                        ssl->buffers.serverDH_G.buffer,
-                        ssl->buffers.serverDH_G.length);
-                    if (ret != 0) {
-                        goto exit_scke;
+                    #if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) && \
+                        !defined(WOLFSSL_OLD_PRIME_CHECK)
+                    if (ssl->options.dhDoKeyTest &&
+                        !ssl->options.dhKeyTested)
+                    {
+                        ret = wc_DhSetCheckKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length,
+                            NULL, 0, 0, ssl->rng);
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
+                        ssl->options.dhKeyTested = 1;
+                    }
+                    else
+                    #endif
+                    {
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length);
+                        if (ret != 0) {
+                            goto exit_scke;
+                        }
                     }
 
                     /* for DH, encSecret is Yc, agree is pre-master */
                     ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
-                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                        ssl->buffers.sig.buffer, (word32*)&ssl->buffers.sig.length,
                         args->output + OPAQUE16_LEN, &args->length);
                     break;
                 }
             #endif /* !NO_DH && !NO_PSK */
-            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                 {
                     word32 esSz = 0;
@@ -19373,7 +22687,7 @@
                     /* length is used for public key size */
                     args->length = MAX_ENCRYPT_SZ;
 
-                    /* Create shared ECC key leaving room at the begining
+                    /* Create shared ECC key leaving room at the beginning
                        of buffer for size of shared key. */
                     ssl->arrays->preMasterSz = ENCRYPT_LEN - OPAQUE16_LEN;
 
@@ -19397,6 +22711,26 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                    #ifdef HAVE_PK_CALLBACKS
+                        /* if callback then use it for shared secret */
+                        if (ssl->ctx->X448SharedSecretCb != NULL) {
+                            break;
+                        }
+                    #endif
+
+                        ret = wc_curve448_export_public_ex(
+                                (curve448_key*)ssl->hsKey,
+                                args->output + OPAQUE8_LEN, &args->length,
+                                EC448_LITTLE_ENDIAN);
+                        if (ret != 0) {
+                            ERROR_OUT(ECC_EXPORT_ERROR, exit_scke);
+                        }
+
+                        break;
+                    }
+                #endif
                 #ifdef HAVE_PK_CALLBACKS
                     /* if callback then use it for shared secret */
                     if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -19413,7 +22747,7 @@
 
                     break;
                 }
-            #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+            #endif /* (HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448) && !NO_PSK */
             #ifdef HAVE_NTRU
                 case ntru_kea:
                 {
@@ -19428,7 +22762,8 @@
                     break;
                 }
             #endif /* HAVE_NTRU */
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                 case ecc_diffie_hellman_kea:
                 {
                     ssl->arrays->preMasterSz = ENCRYPT_LEN;
@@ -19453,7 +22788,27 @@
                         break;
                     }
                 #endif
-                #ifdef HAVE_ECC
+                #ifdef HAVE_CURVE448
+                    if (ssl->hsType == DYNAMIC_TYPE_CURVE448) {
+                    #ifdef HAVE_PK_CALLBACKS
+                        /* if callback then use it for shared secret */
+                        if (ssl->ctx->X448SharedSecretCb != NULL) {
+                            break;
+                        }
+                    #endif
+
+                        ret = wc_curve448_export_public_ex(
+                                (curve448_key*)ssl->hsKey,
+                                args->encSecret + OPAQUE8_LEN, &args->encSz,
+                                EC448_LITTLE_ENDIAN);
+                        if (ret != 0) {
+                            ERROR_OUT(ECC_EXPORT_ERROR, exit_scke);
+                        }
+
+                        break;
+                    }
+                #endif
+                #if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)
                 #ifdef HAVE_PK_CALLBACKS
                     /* if callback then use it for shared secret */
                     if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -19470,7 +22825,7 @@
                 #endif /* HAVE_ECC */
                     break;
                 }
-            #endif /* HAVE_ECC || HAVE_CURVE25519 */
+            #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
                 default:
                     ret = BAD_KEA_TYPE_E;
@@ -19493,16 +22848,26 @@
             #ifndef NO_RSA
                 case rsa_kea:
                 {
-                    ret = RsaEnc(ssl,
-                        ssl->arrays->preMasterSecret, SECRET_LEN,
-                        args->encSecret, &args->encSz,
-                        ssl->peerRsaKey,
-                    #if defined(HAVE_PK_CALLBACKS)
-                        &ssl->buffers.peerRsaKey
-                    #else
-                        NULL
-                    #endif
-                    );
+                    #if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+                       !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+                    if (tsip_useable(ssl) &&
+                                     wc_RsaEncryptSize(ssl->peerRsaKey) == 256) {
+                        ret = tsip_generateEncryptPreMasterSecret(ssl,
+                                                            args->encSecret,
+                                                            &args->encSz);
+
+                    } else
+                    #endif
+                        ret = RsaEnc(ssl,
+                            ssl->arrays->preMasterSecret, SECRET_LEN,
+                            args->encSecret, &args->encSz,
+                            ssl->peerRsaKey,
+                        #if defined(HAVE_PK_CALLBACKS)
+                            &ssl->buffers.peerRsaKey
+                        #else
+                            NULL
+                        #endif
+                        );
 
                     break;
                 }
@@ -19537,8 +22902,8 @@
                     break;
                 }
             #endif /* !NO_DH && !NO_PSK */
-            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                 {
                 #ifdef HAVE_CURVE25519
@@ -19550,7 +22915,11 @@
                             &ssl->arrays->preMasterSz,
                             WOLFSSL_CLIENT_END
                         );
-                        if (ret == 0 && !ssl->specs.static_ecdh) {
+                        if (!ssl->specs.static_ecdh
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            && ret != WC_PENDING_E
+                        #endif
+                        ) {
                             FreeKey(ssl, DYNAMIC_TYPE_CURVE25519,
                                                    (void**)&ssl->peerX25519Key);
                             ssl->peerX25519KeyPresent = 0;
@@ -19558,6 +22927,27 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->peerX448KeyPresent) {
+                        ret = X448SharedSecret(ssl,
+                            (curve448_key*)ssl->hsKey, ssl->peerX448Key,
+                            args->output + OPAQUE8_LEN, &args->length,
+                            ssl->arrays->preMasterSecret + OPAQUE16_LEN,
+                            &ssl->arrays->preMasterSz,
+                            WOLFSSL_CLIENT_END
+                        );
+                        if (!ssl->specs.static_ecdh
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            && ret != WC_PENDING_E
+                        #endif
+                        ) {
+                            FreeKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                                     (void**)&ssl->peerX448Key);
+                            ssl->peerX448KeyPresent = 0;
+                        }
+                        break;
+                    }
+                #endif
                     ret = EccSharedSecret(ssl,
                         (ecc_key*)ssl->hsKey, ssl->peerEccKey,
                         args->output + OPAQUE8_LEN, &args->length,
@@ -19565,9 +22955,17 @@
                         &ssl->arrays->preMasterSz,
                         WOLFSSL_CLIENT_END
                     );
-                    break;
-                }
-            #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (ret != WC_PENDING_E)
+                #endif
+                    {
+                        FreeKey(ssl, DYNAMIC_TYPE_ECC,
+                                                      (void**)&ssl->peerEccKey);
+                        ssl->peerEccKeyPresent = 0;
+                    }
+                    break;
+                }
+            #endif /* (HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448) && !NO_PSK */
             #ifdef HAVE_NTRU
                 case ntru_kea:
                 {
@@ -19594,7 +22992,8 @@
                     break;
                 }
             #endif /* HAVE_NTRU */
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                 case ecc_diffie_hellman_kea:
                 {
                 #ifdef HAVE_ECC
@@ -19610,7 +23009,11 @@
                             &ssl->arrays->preMasterSz,
                             WOLFSSL_CLIENT_END
                         );
-                        if (ret == 0) {
+                        if (!ssl->specs.static_ecdh
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            && ret != WC_PENDING_E
+                        #endif
+                        ) {
                             FreeKey(ssl, DYNAMIC_TYPE_CURVE25519,
                                                    (void**)&ssl->peerX25519Key);
                             ssl->peerX25519KeyPresent = 0;
@@ -19618,6 +23021,27 @@
                         break;
                     }
                 #endif
+                #ifdef HAVE_CURVE448
+                    if (ssl->peerX448KeyPresent) {
+                        ret = X448SharedSecret(ssl,
+                            (curve448_key*)ssl->hsKey, ssl->peerX448Key,
+                            args->encSecret + OPAQUE8_LEN, &args->encSz,
+                            ssl->arrays->preMasterSecret,
+                            &ssl->arrays->preMasterSz,
+                            WOLFSSL_CLIENT_END
+                        );
+                        if (!ssl->specs.static_ecdh
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            && ret != WC_PENDING_E
+                        #endif
+                        ) {
+                            FreeKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                                     (void**)&ssl->peerX448Key);
+                            ssl->peerX448KeyPresent = 0;
+                        }
+                        break;
+                    }
+                #endif
                 #ifdef HAVE_ECC
                     peerKey = (ssl->specs.static_ecdh) ?
                               ssl->peerEccDsaKey : ssl->peerEccKey;
@@ -19629,11 +23053,20 @@
                         &ssl->arrays->preMasterSz,
                         WOLFSSL_CLIENT_END
                     );
-                #endif
-
-                    break;
-                }
-            #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                    if (!ssl->specs.static_ecdh
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                        && ret != WC_PENDING_E
+                #endif
+                     && !ssl->options.keepResources) {
+                        FreeKey(ssl, DYNAMIC_TYPE_ECC,
+                                                      (void**)&ssl->peerEccKey);
+                        ssl->peerEccKeyPresent = 0;
+                    }
+                #endif
+
+                    break;
+                }
+            #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
                 default:
                     ret = BAD_KEA_TYPE_E;
@@ -19699,8 +23132,8 @@
                     break;
                 }
             #endif /* !NO_DH && !NO_PSK */
-            #if defined(HAVE_ECC) && !defined(HAVE_CURVE25519) && \
-                                                                !defined(NO_PSK)
+            #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                 {
                     byte* pms = ssl->arrays->preMasterSecret;
@@ -19714,7 +23147,7 @@
                     *args->output = (byte)args->length;
                     args->encSz += args->length + OPAQUE8_LEN;
 
-                    /* Create pre master secret is the concatination of
+                    /* Create pre master secret is the concatenation of
                        eccSize + eccSharedKey + pskSize + pskKey */
                     c16toa((word16)ssl->arrays->preMasterSz, pms);
                     ssl->arrays->preMasterSz += OPAQUE16_LEN;
@@ -19730,14 +23163,15 @@
                     ssl->arrays->psk_keySz = 0; /* No further need */
                     break;
                 }
-            #endif /* (HAVE_ECC && !HAVE_CURVE25519) && !NO_PSK */
+            #endif /* (HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448) && !NO_PSK */
             #ifdef HAVE_NTRU
                 case ntru_kea:
                 {
                     break;
                 }
             #endif /* HAVE_NTRU */
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                 case ecc_diffie_hellman_kea:
                 {
                     /* place size of public key in buffer */
@@ -19745,7 +23179,7 @@
                     args->encSz += OPAQUE8_LEN;
                     break;
                 }
-            #endif /* HAVE_ECC || HAVE_CURVE25519 */
+            #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
 
                 default:
                     ret = BAD_KEA_TYPE_E;
@@ -19936,7 +23370,9 @@
 #endif
 
     /* No further need for PMS */
-    ForceZero(ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
+    if (ssl->arrays->preMasterSecret != NULL) {
+        ForceZero(ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
+    }
     ssl->arrays->preMasterSz = 0;
 
     /* Final cleanup */
@@ -19980,6 +23416,12 @@
                 ssl->hsType = DYNAMIC_TYPE_ED25519;
                 break;
         #endif
+        #ifdef HAVE_ED448
+            case ed448_sa_algo:
+                sigSz = ED448_SIG_SIZE; /* fixed known value */
+                ssl->hsType = DYNAMIC_TYPE_ED448;
+                break;
+        #endif
             default:
                 break;
         }
@@ -20066,7 +23508,7 @@
                 return 0;  /* sent blank cert, can't verify */
             }
 
-            args->sendSz = MAX_CERT_VERIFY_SZ;
+            args->sendSz = MAX_CERT_VERIFY_SZ + MAX_MSG_EXTRA;
             if (IsEncryptionOn(ssl, 1)) {
                 args->sendSz += MAX_MSG_EXTRA;
             }
@@ -20108,7 +23550,7 @@
                 }
             }
 
-            if (args->length <= 0) {
+            if (args->length == 0) {
                 ERROR_OUT(NO_PRIVATE_KEY, exit_scv);
             }
 
@@ -20158,6 +23600,8 @@
                 args->sigAlgo = ecc_dsa_sa_algo;
             else if (ssl->hsType == DYNAMIC_TYPE_ED25519)
                 args->sigAlgo = ed25519_sa_algo;
+            else if (ssl->hsType == DYNAMIC_TYPE_ED448)
+                args->sigAlgo = ed448_sa_algo;
 
             if (IsAtLeastTLSv1_2(ssl)) {
                 EncodeSigAlg(ssl->suites->hashAlgo, args->sigAlgo,
@@ -20188,6 +23632,7 @@
                 /* prepend hdr */
                 c16toa(args->length, args->verify + args->extraSz);
             }
+            #ifdef WC_RSA_PSS
             else if (args->sigAlgo == rsa_pss_sa_algo) {
                 XMEMCPY(ssl->buffers.sig.buffer, ssl->buffers.digest.buffer,
                         ssl->buffers.digest.length);
@@ -20197,6 +23642,7 @@
                 /* prepend hdr */
                 c16toa(args->length, args->verify + args->extraSz);
             }
+            #endif
         #endif /* !NO_RSA */
         #if defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)
             if (args->sigAlgo == ed25519_sa_algo) {
@@ -20205,6 +23651,13 @@
                     goto exit_scv;
             }
         #endif /* HAVE_ED25519 && !NO_ED25519_CLIENT_AUTH */
+        #if defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)
+            if (args->sigAlgo == ed448_sa_algo) {
+                ret = Ed448CheckPubKey(ssl);
+                if (ret != 0)
+                    goto exit_scv;
+            }
+        #endif /* HAVE_ED448 && !NO_ED448_CLIENT_AUTH */
 
             /* Advance state and proceed */
             ssl->options.asyncState = TLS_ASYNC_DO;
@@ -20219,7 +23672,7 @@
 
                 ret = EccSign(ssl,
                     ssl->buffers.digest.buffer, ssl->buffers.digest.length,
-                    ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                    ssl->buffers.sig.buffer, (word32*)&ssl->buffers.sig.length,
                     key,
             #ifdef HAVE_PK_CALLBACKS
                     ssl->buffers.key
@@ -20235,7 +23688,7 @@
 
                 ret = Ed25519Sign(ssl,
                     ssl->hsHashes->messages, ssl->hsHashes->length,
-                    ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                    ssl->buffers.sig.buffer, (word32*)&ssl->buffers.sig.length,
                     key,
             #ifdef HAVE_PK_CALLBACKS
                     ssl->buffers.key
@@ -20245,6 +23698,22 @@
                 );
             }
         #endif /* HAVE_ED25519 && !NO_ED25519_CLIENT_AUTH */
+        #if defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)
+           if (ssl->hsType == DYNAMIC_TYPE_ED448) {
+                ed448_key* key = (ed448_key*)ssl->hsKey;
+
+                ret = Ed448Sign(ssl,
+                    ssl->hsHashes->messages, ssl->hsHashes->length,
+                    ssl->buffers.sig.buffer, (word32*)&ssl->buffers.sig.length,
+                    key,
+            #ifdef HAVE_PK_CALLBACKS
+                    ssl->buffers.key
+            #else
+                    NULL
+            #endif
+                );
+            }
+        #endif /* HAVE_ED448 && !NO_ED448_CLIENT_AUTH */
         #ifndef NO_RSA
             if (ssl->hsType == DYNAMIC_TYPE_RSA) {
                 RsaKey* key = (RsaKey*)ssl->hsKey;
@@ -20276,47 +23745,52 @@
             /* restore verify pointer */
             args->verify = &args->output[args->idx];
 
-        #ifdef HAVE_ECC
-            if (ssl->hsType == DYNAMIC_TYPE_ECC) {
-                args->length = (word16)ssl->buffers.sig.length;
-                /* prepend hdr */
-                c16toa(args->length, args->verify + args->extraSz);
-                XMEMCPY(args->verify + args->extraSz + VERIFY_HEADER,
-                        ssl->buffers.sig.buffer, ssl->buffers.sig.length);
-            }
-        #endif /* HAVE_ECC */
-        #ifdef HAVE_ED25519
-            if (ssl->hsType == DYNAMIC_TYPE_ED25519) {
-                args->length = (word16)ssl->buffers.sig.length;
-                /* prepend hdr */
-                c16toa(args->length, args->verify + args->extraSz);
-                XMEMCPY(args->verify + args->extraSz + VERIFY_HEADER,
-                        ssl->buffers.sig.buffer, ssl->buffers.sig.length);
-            }
-        #endif /* HAVE_ED25519 */
-        #ifndef NO_RSA
-            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
-                RsaKey* key = (RsaKey*)ssl->hsKey;
-
-                if (args->verifySig == NULL) {
-                    args->verifySig = (byte*)XMALLOC(args->sigSz, ssl->heap,
-                                      DYNAMIC_TYPE_SIGNATURE);
+            switch (ssl->hsType) {
+        #if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
+            #ifdef HAVE_ECC
+                case DYNAMIC_TYPE_ECC:
+            #endif
+            #ifdef HAVE_ED25519
+                case DYNAMIC_TYPE_ED25519:
+            #endif
+            #ifdef HAVE_ED448
+                case DYNAMIC_TYPE_ED448:
+            #endif
+                    args->length = (word16)ssl->buffers.sig.length;
+                    /* prepend hdr */
+                    c16toa(args->length, args->verify + args->extraSz);
+                    XMEMCPY(args->verify + args->extraSz + VERIFY_HEADER,
+                            ssl->buffers.sig.buffer, ssl->buffers.sig.length);
+                    break;
+        #endif
+            #ifndef NO_RSA
+                case DYNAMIC_TYPE_RSA:
+                {
+                    RsaKey* key = (RsaKey*)ssl->hsKey;
+
                     if (args->verifySig == NULL) {
-                        ERROR_OUT(MEMORY_E, exit_scv);
-                    }
-                    XMEMCPY(args->verifySig, args->verify + args->extraSz +
+                        args->verifySig = (byte*)XMALLOC(args->sigSz, ssl->heap,
+                                          DYNAMIC_TYPE_SIGNATURE);
+                        if (args->verifySig == NULL) {
+                            ERROR_OUT(MEMORY_E, exit_scv);
+                        }
+                        XMEMCPY(args->verifySig, args->verify + args->extraSz +
                                                     VERIFY_HEADER, args->sigSz);
-                }
-
-                /* check for signature faults */
-                ret = VerifyRsaSign(ssl,
-                    args->verifySig, args->sigSz,
-                    ssl->buffers.sig.buffer, ssl->buffers.sig.length,
-                    args->sigAlgo, ssl->suites->hashAlgo, key,
-                    ssl->buffers.key
-                );
-            }
-        #endif /* !NO_RSA */
+                    }
+
+                    /* check for signature faults */
+                    ret = VerifyRsaSign(ssl,
+                        args->verifySig, args->sigSz,
+                        ssl->buffers.sig.buffer, ssl->buffers.sig.length,
+                        args->sigAlgo, ssl->suites->hashAlgo, key,
+                        ssl->buffers.key
+                    );
+                    break;
+                }
+            #endif /* !NO_RSA */
+                default:
+                    break;
+            }
 
             /* Check for error */
             if (ret != 0) {
@@ -20478,9 +23952,16 @@
                                    ssl->session_ticket_ctx);
         }
         /* Create a fake sessionID based on the ticket, this will
-         * supercede the existing session cache info. */
+         * supersede the existing session cache info. */
         ssl->options.haveSessionId = 1;
-        XMEMCPY(ssl->arrays->sessionID,
+#ifdef WOLFSSL_TLS13
+        if (ssl->options.tls1_3) {
+            XMEMCPY(ssl->session.sessionID,
+                                 ssl->session.ticket + length - ID_LEN, ID_LEN);
+        }
+        else
+#endif
+            XMEMCPY(ssl->arrays->sessionID,
                                  ssl->session.ticket + length - ID_LEN, ID_LEN);
     }
 
@@ -20503,7 +23984,7 @@
         return SESSION_TICKET_EXPECT_E;
     }
 
-    if ((*inOutIdx - begin) + OPAQUE32_LEN > size)
+    if (OPAQUE32_LEN > size)
         return BUFFER_ERROR;
 
     ato32(input + *inOutIdx, &lifetime);
@@ -20530,6 +24011,10 @@
 
     if (IsEncryptionOn(ssl, 0)) {
         *inOutIdx += ssl->keys.padSz;
+    #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (ssl->options.startedETMRead)
+            *inOutIdx += MacSize(ssl);
+    #endif
     }
 
     ssl->expect_session_ticket = 0;
@@ -20543,227 +24028,10 @@
 
 #endif /* NO_WOLFSSL_CLIENT */
 
-#ifndef NO_WOLFSSL_SERVER
-
-#ifndef WOLFSSL_NO_TLS12
-
-    /* handle generation of server_hello (2) */
-    int SendServerHello(WOLFSSL* ssl)
-    {
-        int    ret;
-        byte   *output;
-        word16 length;
-        word32 idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-        int    sendSz;
-        byte   sessIdSz = ID_LEN;
-        byte   echoId   = 0;  /* ticket echo id flag */
-        byte   cacheOff = 0;  /* session cache off flag */
-
-        WOLFSSL_START(WC_FUNC_SERVER_HELLO_SEND);
-        WOLFSSL_ENTER("SendServerHello");
-
-        length = VERSION_SZ + RAN_LEN
-               + ID_LEN + ENUM_LEN
-               + SUITE_LEN
-               + ENUM_LEN;
-
-#ifdef HAVE_TLS_EXTENSIONS
-        ret = TLSX_GetResponseSize(ssl, server_hello, &length);
-        if (ret != 0)
-            return ret;
-    #ifdef HAVE_SESSION_TICKET
-        if (ssl->options.useTicket) {
-            /* echo session id sz can be 0,32 or bogus len inbetween */
-            sessIdSz = ssl->arrays->sessionIDSz;
-            if (sessIdSz > ID_LEN) {
-                WOLFSSL_MSG("Bad bogus session id len");
-                return BUFFER_ERROR;
-            }
-            if (!IsAtLeastTLSv1_3(ssl->version))
-                length -= (ID_LEN - sessIdSz);  /* adjust ID_LEN assumption */
-            echoId = 1;
-        }
-    #endif /* HAVE_SESSION_TICKET */
-#else
-        if (ssl->options.haveEMS) {
-            length += HELLO_EXT_SZ_SZ + HELLO_EXT_SZ;
-        }
-#endif
-
-        /* is the session cahce off at build or runtime */
-#ifdef NO_SESSION_CACHE
-        cacheOff = 1;
-#else
-        if (ssl->options.sessionCacheOff == 1) {
-            cacheOff = 1;
-        }
-#endif
-
-        /* if no session cache don't send a session ID unless we're echoing
-         * an ID as part of session tickets */
-        if (echoId == 0 && cacheOff == 1) {
-            length -= ID_LEN;    /* adjust ID_LEN assumption */
-            sessIdSz = 0;
-        }
-
-        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
-        #ifdef WOLFSSL_DTLS
-        if (ssl->options.dtls) {
-            /* Server Hello should use the same sequence number as the
-             * Client Hello. */
-            ssl->keys.dtls_sequence_number_hi = ssl->keys.curSeq_hi;
-            ssl->keys.dtls_sequence_number_lo = ssl->keys.curSeq_lo;
-            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-        }
-        #endif /* WOLFSSL_DTLS */
-
-        /* check for avalaible size */
-        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
-            return ret;
-
-        /* get output buffer */
-        output = ssl->buffers.outputBuffer.buffer +
-                 ssl->buffers.outputBuffer.length;
-
-        AddHeaders(output, length, server_hello, ssl);
-
-        /* now write to output */
-        /* first version */
-        output[idx++] = (byte)ssl->version.major;
-        output[idx++] = (byte)ssl->version.minor;
-
-        /* then random and session id */
-        if (!ssl->options.resuming) {
-            /* generate random part and session id */
-            ret = wc_RNG_GenerateBlock(ssl->rng, output + idx,
-                RAN_LEN + sizeof(sessIdSz) + sessIdSz);
-            if (ret != 0)
-                return ret;
-
-#ifdef WOLFSSL_TLS13
-            if (IsAtLeastTLSv1_3(ssl->ctx->method->version)) {
-                /* TLS v1.3 capable server downgraded. */
-                XMEMCPY(output + idx + RAN_LEN - (TLS13_DOWNGRADE_SZ + 1),
-                        tls13Downgrade, TLS13_DOWNGRADE_SZ);
-                output[idx + RAN_LEN - 1] = (byte)IsAtLeastTLSv1_2(ssl);
-            }
-            else
-#endif
-            if (ssl->ctx->method->version.major == SSLv3_MAJOR &&
-                          ssl->ctx->method->version.minor == TLSv1_2_MINOR &&
-                                                       !IsAtLeastTLSv1_2(ssl)) {
-                /* TLS v1.2 capable server downgraded. */
-                XMEMCPY(output + idx + RAN_LEN - (TLS13_DOWNGRADE_SZ + 1),
-                        tls13Downgrade, TLS13_DOWNGRADE_SZ);
-                output[idx + RAN_LEN - 1] = 0;
-            }
-
-            /* store info in SSL for later */
-            XMEMCPY(ssl->arrays->serverRandom, output + idx, RAN_LEN);
-            idx += RAN_LEN;
-            output[idx++] = sessIdSz;
-            XMEMCPY(ssl->arrays->sessionID, output + idx, sessIdSz);
-            ssl->arrays->sessionIDSz = sessIdSz;
-        }
-        else {
-            /* If resuming, use info from SSL */
-            XMEMCPY(output + idx, ssl->arrays->serverRandom, RAN_LEN);
-            idx += RAN_LEN;
-            output[idx++] = sessIdSz;
-            XMEMCPY(output + idx, ssl->arrays->sessionID, sessIdSz);
-        }
-        idx += sessIdSz;
-
-#ifdef SHOW_SECRETS
-        {
-            int j;
-            printf("server random: ");
-            for (j = 0; j < RAN_LEN; j++)
-                printf("%02x", ssl->arrays->serverRandom[j]);
-            printf("\n");
-        }
-#endif
-
-        /* then cipher suite */
-        output[idx++] = ssl->options.cipherSuite0;
-        output[idx++] = ssl->options.cipherSuite;
-
-        /* then compression */
-        if (ssl->options.usingCompression)
-            output[idx++] = ZLIB_COMPRESSION;
-        else
-            output[idx++] = NO_COMPRESSION;
-
-        /* last, extensions */
-#ifdef HAVE_TLS_EXTENSIONS
-        ret = TLSX_WriteResponse(ssl, output + idx, server_hello, NULL);
-        if (ret != 0)
-            return ret;
-#else
-#ifdef HAVE_EXTENDED_MASTER
-        if (ssl->options.haveEMS) {
-            c16toa(HELLO_EXT_SZ, output + idx);
-            idx += HELLO_EXT_SZ_SZ;
-
-            c16toa(HELLO_EXT_EXTMS, output + idx);
-            idx += HELLO_EXT_TYPE_SZ;
-            c16toa(0, output + idx);
-            /*idx += HELLO_EXT_SZ_SZ;*/
-            /* idx is not used after this point. uncomment the line above
-             * if adding any more extentions in the future. */
-        }
-#endif
-#endif
-
-        ssl->buffers.outputBuffer.length += sendSz;
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                    return ret;
-            }
-
-            if (ssl->options.dtls) {
-                DtlsSEQIncrement(ssl, CUR_ORDER);
-            }
-        #endif
-
-        ret = HashOutput(ssl, output, sendSz, 0);
-        if (ret != 0)
-            return ret;
-
-    #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
-        if (ssl->hsInfoOn)
-            AddPacketName(ssl, "ServerHello");
-        if (ssl->toInfoOn)
-            AddPacketInfo(ssl, "ServerHello", handshake, output, sendSz,
-                          WRITE_PROTO, ssl->heap);
-    #endif
-
-        ssl->options.serverState = SERVER_HELLO_COMPLETE;
-
-        if (ssl->options.groupMessages)
-            ret = 0;
-        else
-            ret = SendBuffered(ssl);
-
-        WOLFSSL_LEAVE("SendServerHello", ret);
-        WOLFSSL_END(WC_FUNC_SERVER_HELLO_SEND);
-
-        return ret;
-    }
-
-
-#if defined(HAVE_ECC)
-
-    static byte SetCurveId(ecc_key* key)
-    {
-        if (key == NULL || key->dp == NULL) {
-            WOLFSSL_MSG("SetCurveId: Invalid key!");
-            return 0;
-        }
-
-        switch(key->dp->oidSum) {
+#ifdef HAVE_ECC
+    /* returns the WOLFSSL_* version of the curve from the OID sum */
+    word16 GetCurveByOID(int oidSum) {
+        switch(oidSum) {
     #if defined(HAVE_ECC160) || defined(HAVE_ALL_CURVES)
         #ifndef NO_ECC_SECP
             case ECC_SECP160R1_OID:
@@ -20835,39 +24103,291 @@
         #endif /* !NO_ECC_SECP */
     #endif
             default:
+                WOLFSSL_MSG("Curve OID not compiled in or implemented");
                 return 0;
         }
     }
-
-#endif /* HAVE_ECC || HAVE_CURVE25519 */
+#endif /* HAVE_ECC */
+
+
+#ifndef NO_WOLFSSL_SERVER
+
+#ifndef WOLFSSL_NO_TLS12
+
+    /* handle generation of server_hello (2) */
+    int SendServerHello(WOLFSSL* ssl)
+    {
+        int    ret;
+        byte   *output;
+        word16 length;
+        word32 idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+        int    sendSz;
+        byte   sessIdSz = ID_LEN;
+        byte   echoId   = 0;  /* ticket echo id flag */
+        byte   cacheOff = 0;  /* session cache off flag */
+
+        WOLFSSL_START(WC_FUNC_SERVER_HELLO_SEND);
+        WOLFSSL_ENTER("SendServerHello");
+
+        length = VERSION_SZ + RAN_LEN
+               + ID_LEN + ENUM_LEN
+               + SUITE_LEN
+               + ENUM_LEN;
+
+#ifdef HAVE_TLS_EXTENSIONS
+        ret = TLSX_GetResponseSize(ssl, server_hello, &length);
+        if (ret != 0)
+            return ret;
+    #ifdef HAVE_SESSION_TICKET
+        if (ssl->options.useTicket) {
+            /* echo session id sz can be 0,32 or bogus len in between */
+            sessIdSz = ssl->arrays->sessionIDSz;
+            if (sessIdSz > ID_LEN) {
+                WOLFSSL_MSG("Bad bogus session id len");
+                return BUFFER_ERROR;
+            }
+            if (!IsAtLeastTLSv1_3(ssl->version))
+                length -= (ID_LEN - sessIdSz);  /* adjust ID_LEN assumption */
+            echoId = 1;
+        }
+    #endif /* HAVE_SESSION_TICKET */
+#else
+        if (ssl->options.haveEMS) {
+            length += HELLO_EXT_SZ_SZ + HELLO_EXT_SZ;
+        }
+#endif
+
+        /* is the session cache off at build or runtime */
+#ifdef NO_SESSION_CACHE
+        cacheOff = 1;
+#else
+        if (ssl->options.sessionCacheOff == 1) {
+            cacheOff = 1;
+        }
+#endif
+
+        /* if no session cache don't send a session ID unless we're echoing
+         * an ID as part of session tickets */
+        if (echoId == 0 && cacheOff == 1) {
+            length -= ID_LEN;    /* adjust ID_LEN assumption */
+            sessIdSz = 0;
+        }
+
+        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+        #ifdef WOLFSSL_DTLS
+        if (ssl->options.dtls) {
+            /* Server Hello should use the same sequence number as the
+             * Client Hello. */
+            ssl->keys.dtls_sequence_number_hi = ssl->keys.curSeq_hi;
+            ssl->keys.dtls_sequence_number_lo = ssl->keys.curSeq_lo;
+            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+        }
+        #endif /* WOLFSSL_DTLS */
+
+        if (IsEncryptionOn(ssl, 1))
+            sendSz += MAX_MSG_EXTRA;
+
+        /* check for available size */
+        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
+            return ret;
+
+        /* get output buffer */
+        output = ssl->buffers.outputBuffer.buffer +
+                 ssl->buffers.outputBuffer.length;
+
+        AddHeaders(output, length, server_hello, ssl);
+
+        /* now write to output */
+        /* first version */
+        output[idx++] = (byte)ssl->version.major;
+        output[idx++] = (byte)ssl->version.minor;
+
+        /* then random and session id */
+        if (!ssl->options.resuming) {
+            /* generate random part and session id */
+            ret = wc_RNG_GenerateBlock(ssl->rng, output + idx,
+                RAN_LEN + sizeof(sessIdSz) + sessIdSz);
+            if (ret != 0)
+                return ret;
+
+#ifdef WOLFSSL_TLS13
+            if (TLSv1_3_Capable(ssl)) {
+                /* TLS v1.3 capable server downgraded. */
+                XMEMCPY(output + idx + RAN_LEN - (TLS13_DOWNGRADE_SZ + 1),
+                        tls13Downgrade, TLS13_DOWNGRADE_SZ);
+                output[idx + RAN_LEN - 1] = (byte)IsAtLeastTLSv1_2(ssl);
+            }
+            else
+#endif
+            if (ssl->ctx->method->version.major == SSLv3_MAJOR &&
+                          ssl->ctx->method->version.minor == TLSv1_2_MINOR &&
+                                                       !IsAtLeastTLSv1_2(ssl)) {
+                /* TLS v1.2 capable server downgraded. */
+                XMEMCPY(output + idx + RAN_LEN - (TLS13_DOWNGRADE_SZ + 1),
+                        tls13Downgrade, TLS13_DOWNGRADE_SZ);
+                output[idx + RAN_LEN - 1] = 0;
+            }
+
+            /* store info in SSL for later */
+            XMEMCPY(ssl->arrays->serverRandom, output + idx, RAN_LEN);
+            idx += RAN_LEN;
+            output[idx++] = sessIdSz;
+            XMEMCPY(ssl->arrays->sessionID, output + idx, sessIdSz);
+            ssl->arrays->sessionIDSz = sessIdSz;
+        }
+        else {
+            /* If resuming, use info from SSL */
+            XMEMCPY(output + idx, ssl->arrays->serverRandom, RAN_LEN);
+            idx += RAN_LEN;
+            output[idx++] = sessIdSz;
+            XMEMCPY(output + idx, ssl->arrays->sessionID, sessIdSz);
+        }
+        idx += sessIdSz;
+
+#ifdef SHOW_SECRETS
+        {
+            int j;
+            printf("server random: ");
+            for (j = 0; j < RAN_LEN; j++)
+                printf("%02x", ssl->arrays->serverRandom[j]);
+            printf("\n");
+        }
+#endif
+
+        /* then cipher suite */
+        output[idx++] = ssl->options.cipherSuite0;
+        output[idx++] = ssl->options.cipherSuite;
+
+        /* then compression */
+        if (ssl->options.usingCompression)
+            output[idx++] = ZLIB_COMPRESSION;
+        else
+            output[idx++] = NO_COMPRESSION;
+
+        /* last, extensions */
+#ifdef HAVE_TLS_EXTENSIONS
+        {
+            word16 offset = 0;
+            ret = TLSX_WriteResponse(ssl, output + idx, server_hello, &offset);
+            if (ret != 0)
+                return ret;
+            idx += offset;
+        }
+#else
+#ifdef HAVE_EXTENDED_MASTER
+        if (ssl->options.haveEMS) {
+            c16toa(HELLO_EXT_SZ, output + idx);
+            idx += HELLO_EXT_SZ_SZ;
+
+            c16toa(HELLO_EXT_EXTMS, output + idx);
+            idx += HELLO_EXT_TYPE_SZ;
+            c16toa(0, output + idx);
+            /*idx += HELLO_EXT_SZ_SZ;*/
+            /* idx is not used after this point. uncomment the line above
+             * if adding any more extensions in the future. */
+        }
+#endif
+#endif
+
+        if (IsEncryptionOn(ssl, 1)) {
+            byte* input;
+            int   inputSz = idx - RECORD_HEADER_SZ; /* build msg adds rec hdr */
+
+            input = (byte*)XMALLOC(inputSz, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+            if (input == NULL)
+                return MEMORY_E;
+
+            XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
+            sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
+                                  handshake, 1, 0, 0);
+            XFREE(input, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+
+            if (sendSz < 0)
+                return sendSz;
+        } else {
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+            ret = HashOutput(ssl, output, sendSz, 0);
+            if (ret != 0)
+                return ret;
+        }
+
+    #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
+        if (ssl->hsInfoOn)
+            AddPacketName(ssl, "ServerHello");
+        if (ssl->toInfoOn)
+            AddPacketInfo(ssl, "ServerHello", handshake, output, sendSz,
+                          WRITE_PROTO, ssl->heap);
+    #endif
+
+        ssl->options.serverState = SERVER_HELLO_COMPLETE;
+        ssl->buffers.outputBuffer.length += sendSz;
+
+    #ifdef WOLFSSL_DTLS
+        if (IsDtlsNotSctpMode(ssl)) {
+            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                return ret;
+        }
+    #endif
+
+        if (ssl->options.groupMessages)
+            ret = 0;
+        else
+            ret = SendBuffered(ssl);
+
+        WOLFSSL_LEAVE("SendServerHello", ret);
+        WOLFSSL_END(WC_FUNC_SERVER_HELLO_SEND);
+
+        return ret;
+    }
+
+
+#if defined(HAVE_ECC)
+
+    static byte SetCurveId(ecc_key* key)
+    {
+        if (key == NULL || key->dp == NULL) {
+            WOLFSSL_MSG("SetCurveId: Invalid key!");
+            return 0;
+        }
+
+        return (byte)GetCurveByOID(key->dp->oidSum);
+    }
+
+#endif /* HAVE_ECC */
 
     typedef struct SskeArgs {
         byte*  output; /* not allocated */
-    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
                                            (!defined(NO_DH) && !defined(NO_RSA))
         byte*  sigDataBuf;
     #endif
-    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         byte*  exportBuf;
     #endif
     #ifndef NO_RSA
         byte*  verifySig;
     #endif
+        byte*  input;
         word32 idx;
         word32 tmpSigSz;
         word32 length;
         word32 sigSz;
-    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
                                            (!defined(NO_DH) && !defined(NO_RSA))
         word32 sigDataSz;
     #endif
-    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         word32 exportSz;
     #endif
     #ifdef HAVE_QSH
         word32 qshSz;
     #endif
         int    sendSz;
+        int    inputSz;
     } SskeArgs;
 
     static void FreeSskeArgs(WOLFSSL* ssl, void* pArgs)
@@ -20876,13 +24396,13 @@
 
         (void)ssl;
 
-    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         if (args->exportBuf) {
             XFREE(args->exportBuf, ssl->heap, DYNAMIC_TYPE_DER);
             args->exportBuf = NULL;
         }
     #endif
-    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
                                            (!defined(NO_DH) && !defined(NO_RSA))
         if (args->sigDataBuf) {
             XFREE(args->sigDataBuf, ssl->heap, DYNAMIC_TYPE_SIGNATURE);
@@ -20945,19 +24465,20 @@
                 /* Do some checks / debug msgs */
                 switch(ssl->specs.kea)
                 {
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         WOLFSSL_MSG("Using ephemeral ECDH PSK");
                         break;
                     }
-                #endif /* (HAVE_ECC || CURVE25519) && !NO_PSK */
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
                 #if defined(HAVE_ECC)
                     case ecc_diffie_hellman_kea:
                     {
                         if (ssl->specs.static_ecdh) {
-                            WOLFSSL_MSG("Using Static ECDH, not sending ServerKeyExchange");
+                            WOLFSSL_MSG("Using Static ECDH, not sending "
+                                        "ServerKeyExchange");
                             ERROR_OUT(0, exit_sske);
                         }
 
@@ -21020,29 +24541,52 @@
                             goto exit_sske;
                         }
 
-                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
-                            ssl->buffers.serverDH_P.buffer,
-                            ssl->buffers.serverDH_P.length,
-                            ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length);
-                        if (ret != 0) {
-                            goto exit_sske;
+                        #if !defined(WOLFSSL_OLD_PRIME_CHECK) && \
+                            !defined(HAVE_FIPS) && \
+                            !defined(HAVE_SELFTEST)
+                        if (ssl->options.dhDoKeyTest &&
+                            !ssl->options.dhKeyTested)
+                        {
+                            ret = wc_DhSetCheckKey(
+                                ssl->buffers.serverDH_Key,
+                                ssl->buffers.serverDH_P.buffer,
+                                ssl->buffers.serverDH_P.length,
+                                ssl->buffers.serverDH_G.buffer,
+                                ssl->buffers.serverDH_G.length,
+                                NULL, 0, 0, ssl->rng);
+                            if (ret != 0) {
+                                goto exit_sske;
+                            }
+                            ssl->options.dhKeyTested = 1;
+                        }
+                        else
+                        #endif
+                        {
+                            ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                                ssl->buffers.serverDH_P.buffer,
+                                ssl->buffers.serverDH_P.length,
+                                ssl->buffers.serverDH_G.buffer,
+                                ssl->buffers.serverDH_G.length);
+                            if (ret != 0) {
+                                goto exit_sske;
+                            }
                         }
 
                         ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
-                            &ssl->buffers.serverDH_Priv.length,
+                            (word32*)&ssl->buffers.serverDH_Priv.length,
                             ssl->buffers.serverDH_Pub.buffer,
-                            &ssl->buffers.serverDH_Pub.length);
+                            (word32*)&ssl->buffers.serverDH_Pub.length);
                         break;
                     }
                 #endif /* !NO_DH && (!NO_PSK || !NO_RSA) */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                         /* Fall through to create temp ECC key */
-                #endif /* (HAVE_ECC || CURVE25519) && !NO_PSK */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                     #ifdef HAVE_CURVE25519
@@ -21068,6 +24612,29 @@
                             break;
                         }
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                            /* need ephemeral key now, create it if missing */
+                            if (ssl->eccTempKey == NULL) {
+                                /* alloc/init on demand */
+                                ret = AllocKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                    (void**)&ssl->eccTempKey);
+                                if (ret != 0) {
+                                    goto exit_sske;
+                                }
+                            }
+
+                            if (ssl->eccTempKeyPresent == 0) {
+                                ret = X448MakeKey(ssl,
+                                          (curve448_key*)ssl->eccTempKey, NULL);
+                                if (ret == 0 || ret == WC_PENDING_E) {
+                                    ssl->eccTempKeyPresent =
+                                        DYNAMIC_TYPE_CURVE448;
+                                }
+                            }
+                            break;
+                        }
+                    #endif
                     #ifdef HAVE_ECC
                         /* need ephemeral key now, create it if missing */
                         if (ssl->eccTempKey == NULL) {
@@ -21088,7 +24655,7 @@
                     #endif
                         break;
                     }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                     default:
                         /* Skip ServerKeyExchange */
                         goto exit_sske;
@@ -21107,7 +24674,7 @@
             case TLS_ASYNC_BUILD:
             {
             #if (!defined(NO_DH) && !defined(NO_RSA)) || (defined(HAVE_ECC) || \
-                                                       defined(HAVE_CURVE25519))
+                             defined(HAVE_CURVE25519) || defined(HAVE_CURVE448))
                 word32 preSigSz, preSigIdx;
             #endif
 
@@ -21143,12 +24710,17 @@
                             args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
+
+                        if (IsEncryptionOn(ssl, 1)) {
+                            args->sendSz += MAX_MSG_EXTRA;
+                        }
+
                         /* check for available size */
                         if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
-                        /* get ouput buffer */
+                        /* get output buffer */
                         args->output = ssl->buffers.outputBuffer.buffer +
                                        ssl->buffers.outputBuffer.length;
 
@@ -21202,12 +24774,16 @@
                         }
                     #endif
 
+                        if (IsEncryptionOn(ssl, 1)) {
+                            args->sendSz += MAX_MSG_EXTRA;
+                        }
+
                         /* check for available size */
                         if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
-                        /* get ouput buffer */
+                        /* get output buffer */
                         args->output = ssl->buffers.outputBuffer.buffer +
                                        ssl->buffers.outputBuffer.length;
 
@@ -21251,8 +24827,8 @@
                         break;
                     }
                 #endif /* !defined(NO_DH) && !defined(NO_PSK) */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         word32 hintLen;
@@ -21278,6 +24854,17 @@
                         }
                         else
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                            if (wc_curve448_export_public_ex(
+                                    (curve448_key*)ssl->eccTempKey,
+                                    args->exportBuf, &args->exportSz,
+                                    EC448_LITTLE_ENDIAN) != 0) {
+                                ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
+                            }
+                        }
+                        else
+                    #endif
                         {
                             if (wc_ecc_export_x963(ssl->eccTempKey,
                                        args->exportBuf, &args->exportSz) != 0) {
@@ -21304,6 +24891,11 @@
                             args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
+
+                        if (IsEncryptionOn(ssl, 1)) {
+                            args->sendSz += MAX_MSG_EXTRA;
+                        }
+
                         /* check for available size */
                         if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
@@ -21328,6 +24920,11 @@
                             args->output[args->idx++] = WOLFSSL_ECC_X25519;
                         else
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID)
+                            args->output[args->idx++] = WOLFSSL_ECC_X448;
+                        else
+                    #endif
                         {
                     #ifdef HAVE_ECC
                             args->output[args->idx++] =
@@ -21339,8 +24936,9 @@
                                                                 args->exportSz);
                         break;
                     }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                         enum wc_HashType hashType;
@@ -21367,8 +24965,19 @@
                         }
                         else
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                            if (wc_curve448_export_public_ex(
+                                        (curve448_key*)ssl->eccTempKey,
+                                        args->exportBuf, &args->exportSz,
+                                        EC448_LITTLE_ENDIAN) != 0) {
+                                ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
+                            }
+                        }
+                        else
+                    #endif
                         {
-                    #ifdef HAVE_ECC
+                    #if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)
                             if (wc_ecc_export_x963(ssl->eccTempKey,
                                        args->exportBuf, &args->exportSz) != 0) {
                                 ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
@@ -21384,7 +24993,7 @@
                         #ifdef HAVE_PK_CALLBACKS
                             if (wolfSSL_CTX_IsPrivatePkSet(ssl->ctx)) {
                                 args->tmpSigSz = GetPrivateKeySigSize(ssl);
-                                if (args->tmpSigSz <= 0) {
+                                if (args->tmpSigSz == 0) {
                                     ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
                                 }
                             }
@@ -21400,100 +25009,65 @@
                         #endif
                             case rsa_sa_algo:
                             {
-                                word32 i = 0;
-                                int keySz;
-
-                                ssl->hsType = DYNAMIC_TYPE_RSA;
-                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+                                word16 keySz;
+
+                                ssl->buffers.keyType = rsa_sa_algo;
+                                ret = DecodePrivateKey(ssl, &keySz);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
 
-                                ret = wc_RsaPrivateKeyDecode(
-                                    ssl->buffers.key->buffer,
-                                    &i,
-                                    (RsaKey*)ssl->hsKey,
-                                    ssl->buffers.key->length);
-                                if (ret != 0) {
-                                    goto exit_sske;
-                                }
-                                keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
-                                if (keySz < 0) { /* test if keySz has error */
-                                    ERROR_OUT(keySz, exit_sske);
-                                }
-
                                 args->tmpSigSz = (word32)keySz;
-                                if (keySz < ssl->options.minRsaKeySz) {
-                                    WOLFSSL_MSG("RSA signature key size too small");
-                                    ERROR_OUT(RSA_KEY_SIZE_E, exit_sske);
-                                }
                                 break;
                             }
                         #endif /* !NO_RSA */
                         #ifdef HAVE_ECC
                             case ecc_dsa_sa_algo:
                             {
-                                word32 i = 0;
-
-                                ssl->hsType = DYNAMIC_TYPE_ECC;
-                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
-                                if (ret != 0) {
-                                    goto exit_sske;
-                                }
-
-                                ret = wc_EccPrivateKeyDecode(
-                                    ssl->buffers.key->buffer,
-                                    &i,
-                                    (ecc_key*)ssl->hsKey,
-                                    ssl->buffers.key->length);
+                                word16 keySz;
+
+                                ssl->buffers.keyType = ecc_dsa_sa_algo;
+                                ret = DecodePrivateKey(ssl, &keySz);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
                                 /* worst case estimate */
-                                args->tmpSigSz = wc_ecc_sig_size(
-                                    (ecc_key*)ssl->hsKey);
-
-                                /* check the minimum ECC key size */
-                                if (wc_ecc_size((ecc_key*)ssl->hsKey) <
-                                        ssl->options.minEccKeySz) {
-                                    WOLFSSL_MSG("ECC key size too small");
-                                    ERROR_OUT(ECC_KEY_SIZE_E, exit_sske);
-                                }
+                                args->tmpSigSz = keySz;
                                 break;
                             }
                         #endif
                         #ifdef HAVE_ED25519
                             case ed25519_sa_algo:
                             {
-                                word32 i = 0;
-
-                                ssl->hsType = DYNAMIC_TYPE_ED25519;
-                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
-                                if (ret != 0) {
-                                    goto exit_sske;
-                                }
-
-                                ret = wc_Ed25519PrivateKeyDecode(
-                                    ssl->buffers.key->buffer,
-                                    &i,
-                                    (ed25519_key*)ssl->hsKey,
-                                    ssl->buffers.key->length);
+                                word16 keySz;
+
+                                ssl->buffers.keyType = ed25519_sa_algo;
+                                ret = DecodePrivateKey(ssl, &keySz);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
 
                                 /* worst case estimate */
                                 args->tmpSigSz = ED25519_SIG_SIZE;
-
-                                /* check the minimum ECC key size */
-                                if (ED25519_KEY_SIZE <
-                                        ssl->options.minEccKeySz) {
-                                    WOLFSSL_MSG("Ed25519 key size too small");
-                                    ERROR_OUT(ECC_KEY_SIZE_E, exit_sske);
-                                }
                                 break;
                             }
                         #endif /* HAVE_ED25519 */
+                        #ifdef HAVE_ED448
+                            case ed448_sa_algo:
+                            {
+                                word16 keySz;
+
+                                ssl->buffers.keyType = ed448_sa_algo;
+                                ret = DecodePrivateKey(ssl, &keySz);
+                                if (ret != 0) {
+                                    goto exit_sske;
+                                }
+
+                                /* worst case estimate */
+                                args->tmpSigSz = ED448_SIG_SIZE;
+                                break;
+                            }
+                        #endif /* HAVE_ED448 */
                             default:
                                 ERROR_OUT(ALGO_ID_E, exit_sske);  /* unsupported type */
                             } /* switch(ssl->specs.sig_algo) */
@@ -21520,12 +25094,16 @@
                             preSigIdx = args->idx;
                         }
                     #endif
+                        if (IsEncryptionOn(ssl, 1)) {
+                            args->sendSz += MAX_MSG_EXTRA;
+                        }
+
                         /* check for available size */
                         if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
-                        /* get ouput buffer */
+                        /* get output buffer */
                         args->output = ssl->buffers.outputBuffer.buffer +
                                        ssl->buffers.outputBuffer.length;
 
@@ -21540,6 +25118,11 @@
                             args->output[args->idx++] = WOLFSSL_ECC_X25519;
                         else
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID)
+                            args->output[args->idx++] = WOLFSSL_ECC_X448;
+                        else
+                    #endif
                         {
                     #ifdef HAVE_ECC
                             args->output[args->idx++] =
@@ -21574,7 +25157,7 @@
                         #endif
                         }
 
-                        /* Signtaure length will be written later, when we're sure what it is */
+                        /* Signature length will be written later, when we're sure what it is */
 
                     #ifdef HAVE_FUZZER
                         if (ssl->fuzzerCb) {
@@ -21597,9 +25180,13 @@
                         XMEMCPY(args->sigDataBuf+RAN_LEN+RAN_LEN,
                                 args->output + preSigIdx, preSigSz);
 
-                        if (ssl->suites->sigAlgo != ed25519_sa_algo) {
+                        if (ssl->suites->sigAlgo != ed25519_sa_algo &&
+                                        ssl->suites->sigAlgo != ed448_sa_algo) {
                             ssl->buffers.sig.length =
                                                  wc_HashGetDigestSize(hashType);
+                            if ((int)ssl->buffers.sig.length < 0) {
+                                ERROR_OUT(HASH_TYPE_E, exit_sske);
+                            }
                             ssl->buffers.sig.buffer = (byte*)XMALLOC(
                                             ssl->buffers.sig.length,
                                             ssl->heap, DYNAMIC_TYPE_SIGNATURE);
@@ -21672,10 +25259,17 @@
                                     goto exit_sske;
                                 break;
                         #endif /* HAVE_ED25519 */
+                        #ifdef  HAVE_ED448
+                            case ed448_sa_algo:
+                                ret = Ed448CheckPubKey(ssl);
+                                if (ret != 0)
+                                    goto exit_sske;
+                                break;
+                        #endif /* HAVE_ED448 */
                         } /* switch(ssl->specs.sig_algo) */
                         break;
                     }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #if !defined(NO_DH) && !defined(NO_RSA)
                     case diffie_hellman_kea:
                     {
@@ -21691,7 +25285,7 @@
                         preSigSz  = args->length;
 
                         if (!ssl->options.usingAnon_cipher) {
-                            int keySz;
+                            word16 keySz;
 
                             /* sig length */
                             args->length += LENGTH_SZ;
@@ -21706,36 +25300,21 @@
                             }
                             else
                             {
-                                word32 i = 0;
-
-                                ssl->hsType = DYNAMIC_TYPE_RSA;
-                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+                                if (ssl->buffers.keyType == 0)
+                                    ssl->buffers.keyType = rsa_sa_algo;
+                                ret = DecodePrivateKey(ssl, &keySz);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
-
-                                ret = wc_RsaPrivateKeyDecode(
-                                    ssl->buffers.key->buffer, &i,
-                                    (RsaKey*)ssl->hsKey,
-                                    ssl->buffers.key->length);
-                                if (ret != 0) {
-                                    goto exit_sske;
-                                }
-                                keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
-                            }
-
-                            if (keySz <= 0) { /* test if keySz has error */
+                            }
+
+                            if (keySz == 0) { /* test if keySz has error */
                                 ERROR_OUT(keySz, exit_sske);
                             }
 
                             args->tmpSigSz = (word32)keySz;
                             args->length += args->tmpSigSz;
 
-                            if (keySz < ssl->options.minRsaKeySz) {
-                                WOLFSSL_MSG("RSA key size too small");
-                                ERROR_OUT(RSA_KEY_SIZE_E, exit_sske);
-                            }
-
                             if (IsAtLeastTLSv1_2(ssl)) {
                                 args->length += HASH_SIG_SIZE;
                             }
@@ -21756,12 +25335,16 @@
                         }
                     #endif
 
+                        if (IsEncryptionOn(ssl, 1)) {
+                            args->sendSz += MAX_MSG_EXTRA;
+                        }
+
                         /* check for available size */
                         if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
-                        /* get ouput buffer */
+                        /* get output buffer */
                         args->output = ssl->buffers.outputBuffer.buffer +
                                        ssl->buffers.outputBuffer.length;
 
@@ -21847,7 +25430,8 @@
                         XMEMCPY(args->sigDataBuf+RAN_LEN+RAN_LEN,
                             args->output + preSigIdx, preSigSz);
 
-                        if (ssl->suites->sigAlgo != ed25519_sa_algo) {
+                        if (ssl->suites->sigAlgo != ed25519_sa_algo &&
+                                        ssl->suites->sigAlgo != ed448_sa_algo) {
                             ssl->buffers.sig.length =
                                                  wc_HashGetDigestSize(hashType);
                             ssl->buffers.sig.buffer = (byte*)XMALLOC(
@@ -21930,14 +25514,15 @@
                         break;
                     }
                 #endif /* !defined(NO_DH) && !defined(NO_PSK) */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         break;
                     }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
+                #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
                     case ecc_diffie_hellman_kea:
                     {
                         /* Sign hash to create signature */
@@ -22002,10 +25587,29 @@
                                 break;
                             }
                         #endif
+                        #ifdef HAVE_ED448
+                            case ed448_sa_algo:
+                            {
+                                ed448_key* key = (ed448_key*)ssl->hsKey;
+
+                                ret = Ed448Sign(ssl,
+                                    args->sigDataBuf, args->sigDataSz,
+                                    args->output + LENGTH_SZ + args->idx,
+                                    &args->sigSz,
+                                    key,
+                            #ifdef HAVE_PK_CALLBACKS
+                                    ssl->buffers.key
+                            #else
+                                    NULL
+                            #endif
+                                );
+                                break;
+                            }
+                        #endif
                         } /* switch(ssl->specs.sig_algo) */
                         break;
                     }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #if !defined(NO_DH) && !defined(NO_RSA)
                     case diffie_hellman_kea:
                     {
@@ -22071,15 +25675,16 @@
                         break;
                     }
                 #endif /* !defined(NO_DH) && !defined(NO_PSK) */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) ||  \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         /* Nothing to do in this sub-state */
                         break;
                     }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                         switch(ssl->suites->sigAlgo)
@@ -22121,6 +25726,9 @@
                         #ifdef HAVE_ED25519
                             case ed25519_sa_algo:
                         #endif
+                        #ifdef HAVE_ED448
+                            case ed448_sa_algo:
+                        #endif
                             {
                                 /* Now that we know the real sig size, write it. */
                                 c16toa((word16)args->sigSz,
@@ -22136,7 +25744,7 @@
                         } /* switch(ssl->specs.sig_algo) */
                         break;
                     }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #if !defined(NO_DH) && !defined(NO_RSA)
                     case diffie_hellman_kea:
                     {
@@ -22220,7 +25828,8 @@
                 }
             #endif
 
-            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                 if (ssl->specs.kea == ecdhe_psk_kea ||
                     ssl->specs.kea == ecc_diffie_hellman_kea) {
                     /* Check output to make sure it was set */
@@ -22232,24 +25841,53 @@
                         ERROR_OUT(BUFFER_ERROR, exit_sske);
                     }
                 }
-            #endif /* HAVE_ECC || HAVE_CURVE25519 */
-
-            #ifdef WOLFSSL_DTLS
-                if (IsDtlsNotSctpMode(ssl)) {
-                    if ((ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz)) != 0) {
+            #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
+
+                if (IsEncryptionOn(ssl, 1)) {
+                    args->inputSz = args->length + HANDSHAKE_HEADER_SZ;
+                                                     /* buildmsg adds rechdr */
+                    args->input = (byte*)XMALLOC(args->inputSz, ssl->heap,
+                                                        DYNAMIC_TYPE_IN_BUFFER);
+                    if (args->input == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_sske);
+                    }
+
+                    if (args->output == NULL) {
+                        ERROR_OUT(BUFFER_ERROR, exit_sske);
+                    }
+
+                    XMEMCPY(args->input, args->output + RECORD_HEADER_SZ,
+                                                                 args->inputSz);
+                    ret = BuildMessage(ssl, args->output, args->sendSz,
+                                args->input, args->inputSz, handshake, 1, 0, 0);
+                    XFREE(args->input, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+                    args->input = NULL;
+                        /* make sure its not double free'd on cleanup */
+
+                    if (ret >= 0) {
+                        args->sendSz = ret;
+                        ret = 0;
+                    }
+                }
+                else {
+                #ifdef WOLFSSL_DTLS
+                    if (IsDtlsNotSctpMode(ssl)) {
+                        if ((ret = DtlsMsgPoolSave(ssl,
+                                            args->output, args->sendSz)) != 0) {
+                            goto exit_sske;
+                        }
+                    }
+
+                    if (ssl->options.dtls)
+                        DtlsSEQIncrement(ssl, CUR_ORDER);
+                #endif
+
+                    ret = HashOutput(ssl, args->output, args->sendSz, 0);
+                    if (ret != 0) {
                         goto exit_sske;
                     }
                 }
 
-                if (ssl->options.dtls)
-                    DtlsSEQIncrement(ssl, CUR_ORDER);
-            #endif
-
-                ret = HashOutput(ssl, args->output, args->sendSz, 0);
-                if (ret != 0) {
-                    goto exit_sske;
-                }
-
             #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
                 if (ssl->hsInfoOn) {
                     AddPacketName(ssl, "ServerKeyExchange");
@@ -22297,10 +25935,14 @@
         return ret;
     }
 
-#ifdef HAVE_SERVER_RENEGOTIATION_INFO
+#if defined(HAVE_SERVER_RENEGOTIATION_INFO) || defined(HAVE_FALLBACK_SCSV) || \
+                                                            defined(OPENSSL_ALL)
 
     /* search suites for specific one, idx on success, negative on error */
-    static int FindSuite(Suites* suites, byte first, byte second)
+#ifndef WOLFSSL_TLS13
+    static
+#endif
+    int FindSuite(Suites* suites, byte first, byte second)
     {
         int i;
 
@@ -22404,8 +26046,20 @@
             }
         }
 
-#if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                  defined(HAVE_SUPPORTED_CURVES)
+#if !defined(WOLFSSL_OLDTLS_AEAD_CIPHERSUITES)
+        if (CipherRequires(first, second, REQUIRES_AEAD)) {
+            WOLFSSL_MSG("Requires AEAD");
+            if (ssl->version.major == SSLv3_MAJOR &&
+                                           ssl->version.minor < TLSv1_2_MINOR) {
+                WOLFSSL_MSG("Version of SSL does not support AEAD ciphers");
+                return 0;
+            }
+
+        }
+#endif
+
+#if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                       defined(HAVE_CURVE448)) && defined(HAVE_SUPPORTED_CURVES)
         if (!TLSX_ValidateSupportedCurves(ssl, first, second)) {
             WOLFSSL_MSG("Don't have matching curves");
             return 0;
@@ -22438,8 +26092,9 @@
             else if (ret != 0)
                 return 0;
         }
-        else if (first == TLS13_BYTE) {
-            /* Can't negotiate TLS 1.3 ciphersuites with lower protocol
+        else if (first == TLS13_BYTE || (first == ECC_BYTE &&
+                (second == TLS_SHA256_SHA256 || second == TLS_SHA384_SHA384))) {
+            /* Can't negotiate TLS 1.3 cipher suites with lower protocol
              * version. */
             return 0;
         }
@@ -22461,9 +26116,10 @@
                 ssl->options.cipherSuite0 = ssl->suites->suites[i];
                 ssl->options.cipherSuite  = ssl->suites->suites[i+1];
                 result = SetCipherSpecs(ssl);
-                if (result == 0)
-                    PickHashSigAlgo(ssl, peerSuites->hashSigAlgo,
-                                    peerSuites->hashSigAlgoSz);
+                if (result == 0) {
+                    result = PickHashSigAlgo(ssl, peerSuites->hashSigAlgo,
+                                                     peerSuites->hashSigAlgoSz);
+                }
                 return result;
             }
             else {
@@ -22483,7 +26139,7 @@
 
         /* & 0x1 equivalent % 2 */
         if (peerSuites->suiteSz == 0 || peerSuites->suiteSz & 0x1)
-            return MATCH_SUITE_ERROR;
+            return BUFFER_ERROR;
 
         if (ssl->suites == NULL)
             return SUITES_ERROR;
@@ -22723,11 +26379,11 @@
     int HandleTlsResumption(WOLFSSL* ssl, int bogusID, Suites* clSuites)
     {
         int ret = 0;
-        WOLFSSL_SESSION* session = GetSession(ssl,
-                                                  ssl->arrays->masterSecret, 1);
+        WOLFSSL_SESSION* session;
 
         (void)bogusID;
 
+        session = GetSession(ssl, ssl->arrays->masterSecret, 1);
         #ifdef HAVE_SESSION_TICKET
             if (ssl->options.useTicket == 1) {
                 session = &ssl->session;
@@ -22754,6 +26410,9 @@
             else if (session->haveEMS && !ssl->options.haveEMS) {
                 WOLFSSL_MSG("Trying to resume a session with EMS without "
                             "using EMS");
+            #ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, handshake_failure);
+            #endif
                 return EXT_MASTER_SECRET_NEEDED_E;
             }
         #ifdef HAVE_EXT_CACHE
@@ -22761,6 +26420,25 @@
         #endif
         }
         else {
+        #ifndef NO_RESUME_SUITE_CHECK
+            int j;
+
+            /* Check client suites include the one in session */
+            for (j = 0; j < clSuites->suiteSz; j += 2) {
+                if (clSuites->suites[j] == session->cipherSuite0 &&
+                                clSuites->suites[j+1] == session->cipherSuite) {
+                    break;
+                }
+            }
+            if (j == clSuites->suiteSz) {
+                WOLFSSL_MSG("Prev session's cipher suite not in ClientHello");
+            #ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, illegal_parameter);
+            #endif
+                return UNSUPPORTED_SUITE;
+            }
+        #endif
+
         #ifdef HAVE_EXT_CACHE
             wolfSSL_SESSION_free(session);
         #endif
@@ -22820,7 +26498,7 @@
         if (ssl->toInfoOn) AddLateName("ClientHello", &ssl->timeoutInfo);
 #endif
         /* protocol version, random and session id length check */
-        if ((i - begin) + OPAQUE16_LEN + RAN_LEN + OPAQUE8_LEN > helloSz)
+        if (OPAQUE16_LEN + RAN_LEN + OPAQUE8_LEN > helloSz)
             return BUFFER_ERROR;
 
         /* protocol version */
@@ -23067,11 +26745,32 @@
 #ifdef HAVE_SERVER_RENEGOTIATION_INFO
         /* check for TLS_EMPTY_RENEGOTIATION_INFO_SCSV suite */
         if (FindSuite(&clSuites, 0, TLS_EMPTY_RENEGOTIATION_INFO_SCSV) >= 0) {
+            TLSX* extension;
+
+            /* check for TLS_EMPTY_RENEGOTIATION_INFO_SCSV suite */
             ret = TLSX_AddEmptyRenegotiationInfo(&ssl->extensions, ssl->heap);
             if (ret != WOLFSSL_SUCCESS)
                 return ret;
+
+            extension = TLSX_Find(ssl->extensions, TLSX_RENEGOTIATION_INFO);
+            if (extension) {
+                ssl->secure_renegotiation =
+                                          (SecureRenegotiation*)extension->data;
+                ssl->secure_renegotiation->enabled = 1;
+            }
         }
 #endif /* HAVE_SERVER_RENEGOTIATION_INFO */
+#if defined(HAVE_FALLBACK_SCSV) || defined(OPENSSL_ALL)
+        /* check for TLS_FALLBACK_SCSV suite */
+        if (FindSuite(&clSuites, TLS_FALLBACK_SCSV, 0) >= 0) {
+            WOLFSSL_MSG("Found Fallback SCSV");
+            if (ssl->ctx->method->version.minor > pv.minor) {
+                WOLFSSL_MSG("Client trying to connect with lesser version");
+                SendAlert(ssl, alert_fatal, inappropriate_fallback);
+                return VERSION_ERROR;
+            }
+        }
+#endif
 
 #ifdef WOLFSSL_DTLS
         if (IsDtlsNotSctpMode(ssl)) {
@@ -23092,6 +26791,9 @@
 
         if (b == 0) {
             WOLFSSL_MSG("No compression types in list");
+#ifdef WOLFSSL_EXTRA_ALERTS
+            SendAlert(ssl, alert_fatal, decode_error);
+#endif
             return COMPRESSION_ERROR;
         }
 
@@ -23134,7 +26836,7 @@
 #endif /* WOLFSSL_DTLS */
 
         {
-            /* copmression match types */
+            /* compression match types */
             int matchNo = 0;
             int matchZlib = 0;
 
@@ -23158,6 +26860,9 @@
                 ssl->options.usingCompression = 0;  /* turn off */
             } else {
                 WOLFSSL_MSG("Could not match compression");
+#ifdef WOLFSSL_EXTRA_ALERTS
+                SendAlert(ssl, alert_fatal, illegal_parameter);
+#endif
                 return COMPRESSION_ERROR;
             }
         }
@@ -23273,6 +26978,14 @@
             ret = HandleTlsResumption(ssl, bogusID, &clSuites);
             if (ret != 0)
                 return ret;
+
+            #ifdef HAVE_SECURE_RENEGOTIATION
+            if (ssl->secure_renegotiation &&
+                    ssl->secure_renegotiation->enabled &&
+                    IsEncryptionOn(ssl, 0))
+                ssl->secure_renegotiation->startScr = 1;
+            #endif
+
             if (ssl->options.clientState == CLIENT_KEYEXCHANGE_COMPLETE) {
                 WOLFSSL_LEAVE("DoClientHello", ret);
                 WOLFSSL_END(WC_FUNC_CLIENT_HELLO_DO);
@@ -23280,8 +26993,33 @@
                 return ret;
             }
         }
+
+#if defined(HAVE_TLS_EXTENSIONS) && defined(HAVE_DH_DEFAULT_PARAMS)
+    #if defined(HAVE_FFDHE) && defined(HAVE_SUPPORTED_CURVES)
+        if (TLSX_Find(ssl->extensions, TLSX_SUPPORTED_GROUPS) != NULL) {
+            /* Set FFDHE parameters or clear DHE parameters if FFDH parameters
+             * present and no matches in the server's list. */
+            ret = TLSX_SupportedFFDHE_Set(ssl);
+            if (ret != 0)
+                return ret;
+        }
+    #endif
+#endif
+
         ret = MatchSuite(ssl, &clSuites);
-
+#ifdef WOLFSSL_EXTRA_ALERTS
+        if (ret == BUFFER_ERROR)
+            SendAlert(ssl, alert_fatal, decode_error);
+        else if (ret < 0)
+            SendAlert(ssl, alert_fatal, handshake_failure);
+#endif
+
+#ifdef HAVE_SECURE_RENEGOTIATION
+        if (ssl->secure_renegotiation && ssl->secure_renegotiation->enabled &&
+                IsEncryptionOn(ssl, 0)) {
+            ssl->secure_renegotiation->startScr = 1;
+        }
+#endif
         WOLFSSL_LEAVE("DoClientHello", ret);
         WOLFSSL_END(WC_FUNC_CLIENT_HELLO_DO);
 
@@ -23289,8 +27027,8 @@
     }
 
 
-#if (!defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519)) && \
-                                                !defined(WOLFSSL_NO_CLIENT_AUTH)
+#if (!defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                        defined(HAVE_ED448)) && !defined(WOLFSSL_NO_CLIENT_AUTH)
 
     typedef struct DcvArgs {
         byte*  output; /* not allocated */
@@ -23389,6 +27127,10 @@
                 else if (ssl->peerEd25519KeyPresent)
                     args->sigAlgo = ed25519_sa_algo;
             #endif /* HAVE_ED25519 && !NO_ED25519_CLIENT_AUTH */
+            #if defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)
+                else if (ssl->peerEd448KeyPresent)
+                    args->sigAlgo = ed448_sa_algo;
+            #endif /* HAVE_ED448 && !NO_ED448_CLIENT_AUTH */
 
                 if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                     ERROR_OUT(BUFFER_ERROR, exit_dcv);
@@ -23439,6 +27181,16 @@
                     }
                 }
             #endif /* HAVE_ED25519 && !NO_ED25519_CLIENT_AUTH */
+            #if defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)
+                if (ssl->peerEd448KeyPresent) {
+                    WOLFSSL_MSG("Doing ED448 peer cert verify");
+                    if (IsAtLeastTLSv1_2(ssl) &&
+                                               args->sigAlgo != ed448_sa_algo) {
+                        WOLFSSL_MSG(
+                                 "Oops, peer sent ED448 key but not in verify");
+                    }
+                }
+            #endif /* HAVE_ED448 && !NO_ED448_CLIENT_AUTH */
 
                 /* Advance state and proceed */
                 ssl->options.asyncState = TLS_ASYNC_DO;
@@ -23506,9 +27258,32 @@
                     );
                 }
             #endif /* HAVE_ED25519 && !NO_ED25519_CLIENT_AUTH */
+            #if defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)
+                if (ssl->peerEd448KeyPresent) {
+                    WOLFSSL_MSG("Doing Ed448 peer cert verify");
+
+                    ret = Ed448Verify(ssl,
+                        input + args->idx, args->sz,
+                        ssl->hsHashes->messages, ssl->hsHashes->prevLen,
+                        ssl->peerEd448Key,
+                    #ifdef HAVE_PK_CALLBACKS
+                        &ssl->buffers.peerEd448Key
+                    #else
+                        NULL
+                    #endif
+                    );
+                }
+            #endif /* HAVE_ED448 && !NO_ED448_CLIENT_AUTH */
+
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                /* handle async pending */
+                if (ret == WC_PENDING_E)
+                    goto exit_dcv;
+            #endif
 
                 /* Check for error */
                 if (ret != 0) {
+                    ret = SIG_VERIFY_E;
                     goto exit_dcv;
                 }
 
@@ -23526,33 +27301,45 @@
                         if (args->sigAlgo == rsa_pss_sa_algo) {
                             SetDigest(ssl, args->hashAlgo);
 
+                        #ifdef HAVE_SELFTEST
                             ret = wc_RsaPSS_CheckPadding(
-                                             ssl->buffers.digest.buffer,
-                                             ssl->buffers.digest.length,
-                                             args->output, args->sigSz,
-                                             HashAlgoToType(args->hashAlgo));
-                            if (ret != 0)
+                                            ssl->buffers.digest.buffer,
+                                            ssl->buffers.digest.length,
+                                            args->output, args->sigSz,
+                                            HashAlgoToType(args->hashAlgo));
+                        #else
+                            ret = wc_RsaPSS_CheckPadding_ex(
+                                            ssl->buffers.digest.buffer,
+                                            ssl->buffers.digest.length,
+                                            args->output, args->sigSz,
+                                            HashAlgoToType(args->hashAlgo), -1,
+                                            mp_count_bits(&ssl->peerRsaKey->n));
+                        #endif
+                            if (ret != 0) {
+                                ret = SIG_VERIFY_E;
                                 goto exit_dcv;
+                            }
                         }
                         else
                     #endif
                         {
                         #ifdef WOLFSSL_SMALL_STACK
-                            byte* encodedSig = NULL;
+                            byte* encodedSig;
                         #else
                             byte  encodedSig[MAX_ENCODED_SIG_SZ];
                         #endif
 
                         #ifdef WOLFSSL_SMALL_STACK
                             encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
-                                                ssl->heap, DYNAMIC_TYPE_SIGNATURE);
+                                             ssl->heap, DYNAMIC_TYPE_SIGNATURE);
                             if (encodedSig == NULL) {
                                 ERROR_OUT(MEMORY_E, exit_dcv);
                             }
                         #endif
 
                             if (args->sigAlgo != rsa_sa_algo) {
-                                WOLFSSL_MSG("Oops, peer sent RSA key but not in verify");
+                                WOLFSSL_MSG("Oops, peer sent RSA key but not "
+                                            "in verify");
                             }
 
                             SetDigest(ssl, args->hashAlgo);
@@ -23590,6 +27377,14 @@
 
             case TLS_ASYNC_FINALIZE:
             {
+                if (IsEncryptionOn(ssl, 0)) {
+                    args->idx += ssl->keys.padSz;
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                    if (ssl->options.startedETMRead)
+                        args->idx += MacSize(ssl);
+            #endif
+                }
+
                 ssl->options.havePeerVerify = 1;
 
                 /* Set final index */
@@ -23616,16 +27411,19 @@
     #ifdef WOLFSSL_ASYNC_CRYPT
         /* Handle async operation */
         if (ret == WC_PENDING_E) {
-            /* Mark message as not recevied so it can process again */
+            /* Mark message as not received so it can process again */
             ssl->msgsReceived.got_certificate_verify = 0;
 
             return ret;
         }
     #endif /* WOLFSSL_ASYNC_CRYPT */
-    #ifdef OPENSSL_EXTRA
-        if (ret != 0){
-             SendAlert(ssl, alert_fatal, bad_certificate);
-        }
+    #ifdef WOLFSSL_EXTRA_ALERTS
+        if (ret == BUFFER_ERROR)
+            SendAlert(ssl, alert_fatal, decode_error);
+        else if (ret == SIG_VERIFY_E)
+            SendAlert(ssl, alert_fatal, decrypt_error);
+        else if (ret != 0)
+            SendAlert(ssl, alert_fatal, bad_certificate);
     #endif
         /* Digest is not allocated, so do this to prevent free */
         ssl->buffers.digest.buffer = NULL;
@@ -23638,7 +27436,7 @@
         return ret;
     }
 
-#endif /* (!NO_RSA || HAVE_ECC || HAVE_ED25519) && !WOLFSSL_NO_CLIENT_AUTH */
+#endif /* (!NO_RSA || ECC || ED25519 || ED448) && !WOLFSSL_NO_CLIENT_AUTH */
 
     /* handle generation of server_hello_done (14) */
     int SendServerHelloDone(WOLFSSL* ssl)
@@ -23655,6 +27453,9 @@
             sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
     #endif
 
+        if (IsEncryptionOn(ssl, 1))
+            sendSz += MAX_MSG_EXTRA;
+
         /* check for available size */
         if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
             return ret;
@@ -23665,19 +27466,34 @@
 
         AddHeaders(output, 0, server_hello_done, ssl);
 
-    #ifdef WOLFSSL_DTLS
-        if (IsDtlsNotSctpMode(ssl)) {
-            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                return 0;
-        }
-
-        if (ssl->options.dtls)
-            DtlsSEQIncrement(ssl, CUR_ORDER);
-    #endif
-
-        ret = HashOutput(ssl, output, sendSz, 0);
+        if (IsEncryptionOn(ssl, 1)) {
+            byte* input;
+            int   inputSz = HANDSHAKE_HEADER_SZ; /* build msg adds rec hdr */
+
+            input = (byte*)XMALLOC(inputSz, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+            if (input == NULL)
+                return MEMORY_E;
+
+            XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
+            sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
+                                  handshake, 1, 0, 0);
+            XFREE(input, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+
+            if (sendSz < 0)
+                return sendSz;
+        } else {
+            #ifdef WOLFSSL_DTLS
+                if (IsDtlsNotSctpMode(ssl)) {
+                    if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                        return ret;
+                }
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+            ret = HashOutput(ssl, output, sendSz, 0);
             if (ret != 0)
                 return ret;
+        }
 
     #if defined(WOLFSSL_CALLBACKS) || defined(OPENSSL_EXTRA)
         if (ssl->hsInfoOn)
@@ -23786,9 +27602,14 @@
 
         /* encrypt */
         encLen = WOLFSSL_TICKET_ENC_SZ;  /* max size user can use */
-        ret = ssl->ctx->ticketEncCb(ssl, et->key_name, et->iv, et->mac, 1,
+        if (ssl->ctx->ticketEncCb == NULL) {
+            ret = WOLFSSL_TICKET_RET_FATAL;
+        }
+        else {
+            ret = ssl->ctx->ticketEncCb(ssl, et->key_name, et->iv, et->mac, 1,
                                     et->enc_ticket, sizeof(InternalTicket),
                                     &encLen, ssl->ctx->ticketEncCtx);
+        }
         if (ret == WOLFSSL_TICKET_RET_OK) {
             if (encLen < (int)sizeof(InternalTicket) ||
                 encLen > WOLFSSL_TICKET_ENC_SZ) {
@@ -23863,12 +27684,18 @@
             return BAD_TICKET_MSG_SZ;
         }
         outLen = inLen;   /* may be reduced by user padding */
-        ret = ssl->ctx->ticketEncCb(ssl, et->key_name, et->iv,
+
+        if (ssl->ctx->ticketEncCb == NULL) {
+            ret = WOLFSSL_TICKET_RET_FATAL;
+        }
+        else {
+            ret = ssl->ctx->ticketEncCb(ssl, et->key_name, et->iv,
                                     et->enc_ticket + inLen, 0,
                                     et->enc_ticket, inLen, &outLen,
                                     ssl->ctx->ticketEncCtx);
+        }
         if (ret == WOLFSSL_TICKET_RET_FATAL || ret < 0) return ret;
-        if (outLen > inLen || outLen < (int)sizeof(InternalTicket)) {
+        if (outLen > (int)inLen || outLen < (int)sizeof(InternalTicket)) {
             WOLFSSL_MSG("Bad user ticket decrypt len");
             return BAD_TICKET_KEY_CB_SZ;
         }
@@ -23892,11 +27719,16 @@
                 ssl->version.minor = it->pv.minor;
             }
 
+
             if (!IsAtLeastTLSv1_3(ssl->version)) {
                 XMEMCPY(ssl->arrays->masterSecret, it->msecret, SECRET_LEN);
                 /* Copy the haveExtendedMasterSecret property from the ticket to
                  * the saved session, so the property may be checked later. */
                 ssl->session.haveEMS = it->haveEMS;
+            #ifndef NO_RESUME_SUITE_CHECK
+                ssl->session.cipherSuite0 = it->suite[0];
+                ssl->session.cipherSuite = it->suite[1];
+            #endif
             }
             else {
 #ifdef WOLFSSL_TLS13
@@ -23946,12 +27778,16 @@
         length += ssl->session.ticketLen;
         sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
 
+        if (!ssl->options.dtls) {
+            if (IsEncryptionOn(ssl, 1) && ssl->options.handShakeDone)
+                sendSz += MAX_MSG_EXTRA;
+        }
+        else {
         #ifdef WOLFSSL_DTLS
-        if (ssl->options.dtls) {
             sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
             idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-        }
-        #endif
+        #endif
+        }
         /* check for available size */
         if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
             return ret;
@@ -23972,19 +27808,38 @@
 
         /* ticket */
         XMEMCPY(output + idx, ssl->session.ticket, ssl->session.ticketLen);
-        /* idx += ssl->session.ticketLen; */
-
-        #ifdef WOLFSSL_DTLS
-        if (ssl->options.dtls) {
-            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+        idx += ssl->session.ticketLen;
+
+        if (IsEncryptionOn(ssl, 1) && ssl->options.handShakeDone) {
+            byte* input;
+            int   inputSz = idx - RECORD_HEADER_SZ; /* build msg adds rec hdr */
+
+            input = (byte*)XMALLOC(inputSz, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+            if (input == NULL)
+                return MEMORY_E;
+
+            XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
+            sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
+                                  handshake, 1, 0, 0);
+            XFREE(input, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+
+            if (sendSz < 0)
+                return sendSz;
+        }
+        else {
+            #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls) {
+                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                    return ret;
+
+                DtlsSEQIncrement(ssl, CUR_ORDER);
+            }
+            #endif
+            ret = HashOutput(ssl, output, sendSz, 0);
+            if (ret != 0)
                 return ret;
-
-            DtlsSEQIncrement(ssl, CUR_ORDER);
-        }
-        #endif
-
-        ret = HashOutput(ssl, output, sendSz, 0);
-        if (ret != 0) return ret;
+        }
+
         ssl->buffers.outputBuffer.length += sendSz;
 
         ret = SendBuffered(ssl);
@@ -23999,6 +27854,62 @@
 
 #ifndef WOLFSSL_NO_TLS12
 
+#if defined(HAVE_SECURE_RENEGOTIATION) && \
+    defined(HAVE_SERVER_RENEGOTIATION_INFO) && \
+    !defined(WOLFSSL_NO_SERVER)
+
+    /* handle generation of server's hello_request (0) */
+    int SendHelloRequest(WOLFSSL* ssl)
+    {
+        byte* output;
+        int sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+        int ret;
+
+        WOLFSSL_START(WC_FUNC_HELLO_REQUEST_SEND);
+        WOLFSSL_ENTER("SendHelloRequest");
+
+        if (IsEncryptionOn(ssl, 1))
+            sendSz += MAX_MSG_EXTRA;
+
+        /* check for available size */
+        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
+            return ret;
+
+        /* get output buffer */
+        output = ssl->buffers.outputBuffer.buffer +
+                 ssl->buffers.outputBuffer.length;
+
+        AddHeaders(output, 0, hello_request, ssl);
+
+        if (IsEncryptionOn(ssl, 1)) {
+            byte* input;
+            int   inputSz = HANDSHAKE_HEADER_SZ; /* build msg adds rec hdr */
+
+            input = (byte*)XMALLOC(inputSz, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+            if (input == NULL)
+                return MEMORY_E;
+
+            XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
+            sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
+                                  handshake, 0, 0, 0);
+            XFREE(input, ssl->heap, DYNAMIC_TYPE_IN_BUFFER);
+
+            if (sendSz < 0)
+                return sendSz;
+        }
+
+        ssl->buffers.outputBuffer.length += sendSz;
+
+        ret = SendBuffered(ssl);
+
+        WOLFSSL_LEAVE("SendHelloRequest", ret);
+        WOLFSSL_END(WC_FUNC_HELLO_REQUEST_SEND);
+
+        return ret;
+    }
+
+#endif /* HAVE_SECURE_RENEGOTIATION && HAVE_SERVER_RENEGOTIATION_INFO */
+
 #ifdef WOLFSSL_DTLS
     /* handle generation of DTLS hello_verify_request (3) */
     static int SendHelloVerifyRequest(WOLFSSL* ssl,
@@ -24155,6 +28066,16 @@
                 }
             #endif
 
+                if (ssl->arrays->preMasterSecret == NULL) {
+                    ssl->arrays->preMasterSz = ENCRYPT_LEN;
+                    ssl->arrays->preMasterSecret = (byte*)XMALLOC(ENCRYPT_LEN,
+                                                ssl->heap, DYNAMIC_TYPE_SECRET);
+                    if (ssl->arrays->preMasterSecret == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_dcke);
+                    }
+                    XMEMSET(ssl->arrays->preMasterSecret, 0, ENCRYPT_LEN);
+                }
+
                 switch (ssl->specs.kea) {
                 #ifndef NO_RSA
                     case rsa_kea:
@@ -24184,12 +28105,13 @@
                         break;
                     }
                 #endif /* HAVE_NTRU */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                         break;
                     }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
@@ -24207,8 +28129,8 @@
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         /* sanity check that PSK server callback has been set */
@@ -24218,7 +28140,7 @@
                         }
                         break;
                     }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
                     default:
                         WOLFSSL_MSG("Bad kea type");
                         ret = BAD_KEA_TYPE_E;
@@ -24240,30 +28162,14 @@
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
-                        word32 i = 0;
-                        int    keySz;
-
-                        ssl->hsType = DYNAMIC_TYPE_RSA;
-                        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+                        word16 keySz;
+
+                        ssl->buffers.keyType = rsa_sa_algo;
+                        ret = DecodePrivateKey(ssl, &keySz);
                         if (ret != 0) {
                             goto exit_dcke;
                         }
-
-                        ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer,
-                            &i, (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
-                        if (ret != 0) {
-                            goto exit_dcke;
-                        }
-                        keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
-                        if (keySz < 0) { /* test if keySz has error */
-                            ERROR_OUT(keySz, exit_dcke);
-                        }
                         args->length = (word32)keySz;
-
-                        if (keySz < ssl->options.minRsaKeySz) {
-                            WOLFSSL_MSG("Peer RSA key is too small");
-                            ERROR_OUT(RSA_KEY_SIZE_E, exit_dcke);
-                        }
                         ssl->arrays->preMasterSz = SECRET_LEN;
 
                         if (ssl->options.tls) {
@@ -24278,6 +28184,9 @@
 
                             if ((word32)check != args->length) {
                                 WOLFSSL_MSG("RSA explicit size doesn't match");
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                SendAlert(ssl, alert_fatal, bad_record_mac);
+                        #endif
                                 ERROR_OUT(RSA_PRIVATE_ERROR, exit_dcke);
                             }
                         }
@@ -24389,7 +28298,8 @@
                         break;
                     }
                 #endif /* HAVE_NTRU */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                     #ifdef HAVE_ECC
@@ -24397,44 +28307,36 @@
 
                         /* handle static private key */
                         if (ssl->specs.static_ecdh &&
-                                          ssl->ecdhCurveOID != ECC_X25519_OID) {
-                            word32 i = 0;
-
-                            ssl->hsType = DYNAMIC_TYPE_ECC;
-                            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
+                                          ssl->ecdhCurveOID != ECC_X25519_OID &&
+                                          ssl->ecdhCurveOID != ECC_X448_OID) {
+                            word16 keySz;
+
+                            ssl->buffers.keyType = ecc_dsa_sa_algo;
+                            ret = DecodePrivateKey(ssl, &keySz);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
-
-                            ret = wc_EccPrivateKeyDecode(
-                                ssl->buffers.key->buffer,
-                                &i,
-                                (ecc_key*)ssl->hsKey,
-                                ssl->buffers.key->length);
-                            if (ret == 0) {
-                                private_key = (ecc_key*)ssl->hsKey;
-                                if (wc_ecc_size(private_key) <
-                                                ssl->options.minEccKeySz) {
-                                    WOLFSSL_MSG("ECC key too small");
-                                    ERROR_OUT(ECC_KEY_SIZE_E, exit_dcke);
-                                }
-                            }
+                            private_key = (ecc_key*)ssl->hsKey;
                         }
                     #endif
 
                         /* import peer ECC key */
                         if ((args->idx - args->begin) + OPAQUE8_LEN > size) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            SendAlert(ssl, alert_fatal, decode_error);
+                        #endif
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
                         args->length = input[args->idx++];
 
                         if ((args->idx - args->begin) + args->length > size) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            SendAlert(ssl, alert_fatal, decode_error);
+                        #endif
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ssl->arrays->preMasterSz = ENCRYPT_LEN;
-
                     #ifdef HAVE_CURVE25519
                         if (ssl->ecdhCurveOID == ECC_X25519_OID) {
                         #ifdef HAVE_PK_CALLBACKS
@@ -24459,18 +28361,93 @@
                                 }
                             }
 
+                            if ((ret = wc_curve25519_check_public(
+                                    input + args->idx, args->length,
+                                    EC25519_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                if (ret == BUFFER_E)
+                                    SendAlert(ssl, alert_fatal, decode_error);
+                                else if (ret == ECC_OUT_OF_RANGE_E)
+                                    SendAlert(ssl, alert_fatal, bad_record_mac);
+                                else {
+                                    SendAlert(ssl, alert_fatal,
+                                                             illegal_parameter);
+                                }
+                        #endif
+                                ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
+                            }
+
                             if (wc_curve25519_import_public_ex(
                                     input + args->idx, args->length,
                                     ssl->peerX25519Key,
                                     EC25519_LITTLE_ENDIAN)) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                SendAlert(ssl, alert_fatal, illegal_parameter);
+                        #endif
                                 ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                             }
 
+                            ssl->arrays->preMasterSz = CURVE25519_KEYSIZE;
+
                             ssl->peerX25519KeyPresent = 1;
 
-                            if (ret != 0) {
-                                goto exit_dcke;
-                            }
+                            break;
+                        }
+                    #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                        #ifdef HAVE_PK_CALLBACKS
+                            /* if callback then use it for shared secret */
+                            if (ssl->ctx->X448SharedSecretCb != NULL) {
+                                break;
+                            }
+                        #endif
+                            if (ssl->peerX448Key == NULL) {
+                                /* alloc/init on demand */
+                                ret = AllocKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                    (void**)&ssl->peerX448Key);
+                                if (ret != 0) {
+                                    goto exit_dcke;
+                                }
+                            } else if (ssl->peerX448KeyPresent) {
+                                ret = ReuseKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                               ssl->peerX448Key);
+                                ssl->peerX448KeyPresent = 0;
+                                if (ret != 0) {
+                                    goto exit_dcke;
+                                }
+                            }
+
+                            if ((ret = wc_curve448_check_public(
+                                    input + args->idx, args->length,
+                                    EC448_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                if (ret == BUFFER_E)
+                                    SendAlert(ssl, alert_fatal, decode_error);
+                                else if (ret == ECC_OUT_OF_RANGE_E)
+                                    SendAlert(ssl, alert_fatal, bad_record_mac);
+                                else {
+                                    SendAlert(ssl, alert_fatal,
+                                                             illegal_parameter);
+                                }
+                        #endif
+                                ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
+                            }
+
+                            if (wc_curve448_import_public_ex(
+                                    input + args->idx, args->length,
+                                    ssl->peerX448Key,
+                                    EC448_LITTLE_ENDIAN)) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                SendAlert(ssl, alert_fatal, illegal_parameter);
+                        #endif
+                                ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
+                            }
+
+                            ssl->arrays->preMasterSz = CURVE448_KEY_SIZE;
+
+                            ssl->peerX448KeyPresent = 1;
+
                             break;
                         }
                     #endif
@@ -24504,20 +28481,23 @@
                             }
                         }
 
-                        if (wc_ecc_import_x963_ex(input + args->idx, args->length,
-                                        ssl->peerEccKey, private_key->dp->id)) {
+                        if (wc_ecc_import_x963_ex(input + args->idx,
+                                                  args->length, ssl->peerEccKey,
+                                                  private_key->dp->id)) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            SendAlert(ssl, alert_fatal, illegal_parameter);
+                        #endif
                             ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                         }
 
+                        ssl->arrays->preMasterSz = private_key->dp->size;
+
                         ssl->peerEccKeyPresent = 1;
                 #endif /* HAVE_ECC */
 
-                        if (ret != 0) {
-                            goto exit_dcke;
-                        }
-                        break;
-                    }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                        break;
+                    }
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
@@ -24531,6 +28511,9 @@
                         args->idx += OPAQUE16_LEN;
 
                         if ((args->idx - args->begin) + clientPubSz > size) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                            SendAlert(ssl, alert_fatal, decode_error);
+                        #endif
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
@@ -24607,8 +28590,8 @@
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         word16 clientSz;
@@ -24676,6 +28659,22 @@
                                 }
                             }
 
+                            if ((ret = wc_curve25519_check_public(
+                                    input + args->idx, args->length,
+                                    EC25519_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                if (ret == BUFFER_E)
+                                    SendAlert(ssl, alert_fatal, decode_error);
+                                else if (ret == ECC_OUT_OF_RANGE_E)
+                                    SendAlert(ssl, alert_fatal, bad_record_mac);
+                                else {
+                                    SendAlert(ssl, alert_fatal,
+                                                             illegal_parameter);
+                                }
+                        #endif
+                                ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
+                            }
+
                             if (wc_curve25519_import_public_ex(
                                     input + args->idx, args->length,
                                     ssl->peerX25519Key,
@@ -24688,6 +28687,65 @@
                             break;
                         }
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                        #ifdef HAVE_PK_CALLBACKS
+                            /* if callback then use it for shared secret */
+                            if (ssl->ctx->X448SharedSecretCb != NULL) {
+                                break;
+                            }
+                        #endif
+
+                            if (ssl->eccTempKeyPresent == 0) {
+                                WOLFSSL_MSG(
+                                       "X448 ephemeral key not made correctly");
+                                ERROR_OUT(ECC_MAKEKEY_ERROR, exit_dcke);
+                            }
+
+                            if (ssl->peerX448Key == NULL) {
+                                /* alloc/init on demand */
+                                ret = AllocKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                    (void**)&ssl->peerX448Key);
+                                if (ret != 0) {
+                                    goto exit_dcke;
+                                }
+                            } else if (ssl->peerX448KeyPresent) {
+                                ret = ReuseKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                               ssl->peerX448Key);
+                                ssl->peerX448KeyPresent = 0;
+                                if (ret != 0) {
+                                    goto exit_dcke;
+                                }
+                            }
+
+                            if ((ret = wc_curve448_check_public(
+                                    input + args->idx, args->length,
+                                    EC448_LITTLE_ENDIAN)) != 0) {
+                        #ifdef WOLFSSL_EXTRA_ALERTS
+                                if (ret == BUFFER_E)
+                                    SendAlert(ssl, alert_fatal, decode_error);
+                                else if (ret == ECC_OUT_OF_RANGE_E)
+                                    SendAlert(ssl, alert_fatal, bad_record_mac);
+                                else {
+                                    SendAlert(ssl, alert_fatal,
+                                                             illegal_parameter);
+                                }
+                        #endif
+                                ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
+                            }
+
+                            if (wc_curve448_import_public_ex(
+                                    input + args->idx, args->length,
+                                    ssl->peerX448Key,
+                                    EC448_LITTLE_ENDIAN)) {
+                                ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
+                            }
+
+                            ssl->peerX448KeyPresent = 1;
+
+                            break;
+                        }
+                    #endif
                     #ifdef HAVE_PK_CALLBACKS
                         /* if callback then use it for shared secret */
                         if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -24716,15 +28774,16 @@
                                 goto exit_dcke;
                             }
                         }
-                        if (wc_ecc_import_x963_ex(input + args->idx, args->length,
-                                 ssl->peerEccKey, ssl->eccTempKey->dp->id)) {
+                        if (wc_ecc_import_x963_ex(input + args->idx,
+                                 args->length, ssl->peerEccKey,
+                                 ssl->eccTempKey->dp->id)) {
                             ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                         }
 
                         ssl->peerEccKeyPresent = 1;
                         break;
                     }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
                     default:
                         ret = BAD_KEA_TYPE_E;
                 } /* switch (ssl->specs.kea) */
@@ -24764,15 +28823,15 @@
                          *  indistinguishable:
                          *       RSA_BUFFER_E, RSA_PAD_E and RSA_PRIVATE_ERROR
                          */
-                        if (ret < 0 && ret != BAD_FUNC_ARG) {
-                        #ifdef WOLFSSL_ASYNC_CRYPT
-                            if (ret == WC_PENDING_E)
-                                goto exit_dcke;
-                        #endif
-                            /* store error code for handling below */
-                            args->lastErr = ret;
-                            ret = 0;
-                        }
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E)
+                            goto exit_dcke;
+                    #endif
+                        if (ret == BAD_FUNC_ARG)
+                            goto exit_dcke;
+
+                        args->lastErr = ret - (SECRET_LEN - args->sigSz);
+                        ret = 0;
                         break;
                     } /* rsa_kea */
                 #endif /* !NO_RSA */
@@ -24788,7 +28847,8 @@
                         break;
                     }
                 #endif /* HAVE_NTRU */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                         void* private_key = ssl->eccTempKey;
@@ -24807,6 +28867,19 @@
                             break;
                         }
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                            ret = X448SharedSecret(ssl,
+                                (curve448_key*)private_key,
+                                ssl->peerX448Key,
+                                input + args->idx, &args->length,
+                                ssl->arrays->preMasterSecret,
+                                &ssl->arrays->preMasterSz,
+                                WOLFSSL_SERVER_END
+                            );
+                            break;
+                        }
+                    #endif
                     #ifdef HAVE_ECC
                         if (ssl->specs.static_ecdh) {
                             private_key = ssl->hsKey;
@@ -24820,10 +28893,18 @@
                             &ssl->arrays->preMasterSz,
                             WOLFSSL_SERVER_END
                         );
-                    #endif
-                        break;
-                    }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret != WC_PENDING_E)
+                    #endif
+                        {
+                            FreeKey(ssl, DYNAMIC_TYPE_ECC,
+                                                      (void**)&ssl->peerEccKey);
+                            ssl->peerEccKeyPresent = 0;
+                        }
+                    #endif
+                        break;
+                    }
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
@@ -24850,8 +28931,8 @@
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                     #ifdef HAVE_CURVE25519
@@ -24864,7 +28945,10 @@
                                 &args->sigSz,
                                 WOLFSSL_SERVER_END
                             );
-                            if (ret == 0) {
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            if (ret != WC_PENDING_E)
+                        #endif
+                            {
                                 FreeKey(ssl, DYNAMIC_TYPE_CURVE25519,
                                                    (void**)&ssl->peerX25519Key);
                                 ssl->peerX25519KeyPresent = 0;
@@ -24872,6 +28956,27 @@
                             break;
                         }
                     #endif
+                    #ifdef HAVE_CURVE448
+                        if (ssl->ecdhCurveOID == ECC_X448_OID) {
+                            ret = X448SharedSecret(ssl,
+                                (curve448_key*)ssl->eccTempKey,
+                                ssl->peerX448Key,
+                                input + args->idx, &args->length,
+                                ssl->arrays->preMasterSecret + OPAQUE16_LEN,
+                                &args->sigSz,
+                                WOLFSSL_SERVER_END
+                            );
+                        #ifdef WOLFSSL_ASYNC_CRYPT
+                            if (ret != WC_PENDING_E)
+                        #endif
+                            {
+                                FreeKey(ssl, DYNAMIC_TYPE_CURVE448,
+                                                     (void**)&ssl->peerX448Key);
+                                ssl->peerX448KeyPresent = 0;
+                            }
+                            break;
+                        }
+                    #endif
                         /* Generate shared secret */
                         ret = EccSharedSecret(ssl,
                             ssl->eccTempKey, ssl->peerEccKey,
@@ -24880,9 +28985,18 @@
                             &args->sigSz,
                             WOLFSSL_SERVER_END
                         );
-                        break;
-                    }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+                        if (!ssl->specs.static_ecdh
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                            && ret != WC_PENDING_E
+                    #endif
+                        ) {
+                            FreeKey(ssl, DYNAMIC_TYPE_ECC,
+                                                      (void**)&ssl->peerEccKey);
+                            ssl->peerEccKeyPresent = 0;
+                        }
+                        break;
+                    }
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
                     default:
                         ret = BAD_KEA_TYPE_E;
                 } /* switch (ssl->specs.kea) */
@@ -24903,6 +29017,9 @@
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
+                        byte mask;
+                        int i;
+
                         /* Add the signature length to idx */
                         args->idx += args->length;
 
@@ -24924,24 +29041,28 @@
 
                         ret = args->lastErr;
                         args->lastErr = 0; /* reset */
+                        /* On error 'ret' will be negative - top bit set */
+                        mask = ((unsigned int)ret >>
+                                                   ((sizeof(ret) * 8) - 1)) - 1;
 
                         /* build PreMasterSecret */
                         ssl->arrays->preMasterSecret[0] = ssl->chVersion.major;
                         ssl->arrays->preMasterSecret[1] = ssl->chVersion.minor;
-                        if (ret == 0 && args->sigSz == SECRET_LEN &&
-                                                         args->output != NULL) {
-                            XMEMCPY(&ssl->arrays->preMasterSecret[VERSION_SZ],
-                                &args->output[VERSION_SZ],
-                                SECRET_LEN - VERSION_SZ);
-                        }
-                        else {
-                            /* preMasterSecret has RNG and version set */
-                            /* return proper length and ignore error */
-                            /* error will be caught as decryption error */
-                            args->sigSz = SECRET_LEN;
-                            ret = 0;
-                        }
-
+
+                        if (args->output != NULL) {
+                            /* Use random secret on error */
+                            for (i = VERSION_SZ; i < SECRET_LEN; i++) {
+                                ssl->arrays->preMasterSecret[i] =
+                                     ctMaskSel(mask, args->output[i],
+                                               ssl->arrays->preMasterSecret[i]);
+                            }
+                        }
+                        /* preMasterSecret has RNG and version set
+                         * return proper length and ignore error
+                         * error will be caught as decryption error
+                         */
+                        args->sigSz = SECRET_LEN;
+                        ret = 0;
                         break;
                     } /* rsa_kea */
                 #endif /* !NO_RSA */
@@ -24957,14 +29078,15 @@
                         break;
                     }
                 #endif /* HAVE_NTRU */
-                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+                #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
                     case ecc_diffie_hellman_kea:
                     {
                         /* skip past the imported peer key */
                         args->idx += args->length;
                         break;
                     }
-                #endif /* HAVE_ECC || HAVE_CURVE25519 */
+                #endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
@@ -25004,8 +29126,8 @@
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
-                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                                !defined(NO_PSK)
+                #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
                         byte* pms = ssl->arrays->preMasterSecret;
@@ -25016,7 +29138,7 @@
 
                         /* Add preMasterSecret */
                         c16toa(clientSz, pms);
-                        ssl->arrays->preMasterSz += OPAQUE16_LEN + clientSz;
+                        ssl->arrays->preMasterSz = OPAQUE16_LEN + clientSz;
                         pms += ssl->arrays->preMasterSz;
 
                         /* Use the PSK hint to look up the PSK and add it to the
@@ -25038,7 +29160,7 @@
                                       ssl->arrays->psk_keySz + OPAQUE16_LEN;
                         break;
                     }
-                #endif /* (HAVE_ECC || HAVE_CURVE25519) && !NO_PSK */
+                #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && !NO_PSK */
                     default:
                         ret = BAD_KEA_TYPE_E;
                 } /* switch (ssl->specs.kea) */
@@ -25055,6 +29177,14 @@
 
             case TLS_ASYNC_FINALIZE:
             {
+                if (IsEncryptionOn(ssl, 0)) {
+                    args->idx += ssl->keys.padSz;
+            #if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+                    if (ssl->options.startedETMRead)
+                        args->idx += MacSize(ssl);
+            #endif
+                }
+
             #ifdef HAVE_QSH
                 word16 name;
 
@@ -25117,7 +29247,7 @@
     #ifdef WOLFSSL_ASYNC_CRYPT
         /* Handle async operation */
         if (ret == WC_PENDING_E) {
-            /* Mark message as not recevied so it can process again */
+            /* Mark message as not received so it can process again */
             ssl->msgsReceived.got_client_key_exchange = 0;
 
             return ret;
@@ -25125,7 +29255,9 @@
     #endif /* WOLFSSL_ASYNC_CRYPT */
 
         /* Cleanup PMS */
-        ForceZero(ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
+        if (ssl->arrays->preMasterSecret != NULL) {
+            ForceZero(ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
+        }
         ssl->arrays->preMasterSz = 0;
 
         /* Final cleanup */
@@ -25261,7 +29393,7 @@
     }
 
 #ifdef HAVE_MAX_FRAGMENT
-    if ((ssl->max_fragment != 0) && (maxFragment > ssl->max_fragment)) {
+    if ((ssl->max_fragment != 0) && ((word16)maxFragment > ssl->max_fragment)) {
         maxFragment = ssl->max_fragment;
     }
 #endif /* HAVE_MAX_FRAGMENT */
--- a/src/keys.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/keys.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* keys.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -33,25 +33,22 @@
 #include <wolfssl/internal.h>
 #include <wolfssl/error-ssl.h>
 #if defined(SHOW_SECRETS) || defined(CHACHA_AEAD_TEST)
-    #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        #if MQX_USE_IO_OLD
-            #include <fio.h>
-        #else
-            #include <nio.h>
-        #endif
-    #else
+    #ifndef NO_STDIO_FILESYSTEM
         #include <stdio.h>
     #endif
 #endif
 
-
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+ int tsip_useable(const WOLFSSL *ssl);
+#endif
 int SetCipherSpecs(WOLFSSL* ssl)
 {
 #ifndef NO_WOLFSSL_CLIENT
     if (ssl->options.side == WOLFSSL_CLIENT_END) {
         /* server side verified before SetCipherSpecs call */
         if (VerifyClientSuite(ssl) != 1) {
-            WOLFSSL_MSG("SetCipherSpecs() client has an unusuable suite");
+            WOLFSSL_MSG("SetCipherSpecs() client has an unusable suite");
             return UNSUPPORTED_SUITE;
         }
     }
@@ -239,12 +236,12 @@
     }
     }
 
-    /* ECC extensions, or AES-CCM */
+    /* ECC extensions, AES-CCM or TLS 1.3 Integrity-only */
     if (ssl->options.cipherSuite0 == ECC_BYTE) {
 
     switch (ssl->options.cipherSuite) {
 
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
 
 #ifdef BUILD_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
     case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 :
@@ -420,9 +417,10 @@
         break;
 #endif
 
-#endif /* HAVE_ECC || HAVE_CURVE25519 */
-
-#if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && defined(HAVE_ED25519))
+#endif /* HAVE_ECC || HAVE_CURVE25519 || HAVE_CURVE448 */
+
+#if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) \
+                      || (defined(HAVE_CURVE448) && defined(HAVE_ED448))
 
 #ifdef BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256
     case TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 :
@@ -633,7 +631,7 @@
     break;
 #endif
 
-#endif /* HAVE_ECC || (HAVE_CURVE25519 && HAVE_ED25519) */
+#endif /* HAVE_ECC || (CURVE25519 && ED25519) || (CURVE448 && ED448) */
 
 #if defined(HAVE_ECC)
 
@@ -1065,6 +1063,44 @@
         break;
 #endif
 
+#if defined(WOLFSSL_TLS13) && defined(HAVE_NULL_CIPHER)
+    #ifdef BUILD_TLS_SHA256_SHA256
+    case TLS_SHA256_SHA256 :
+        ssl->specs.bulk_cipher_algorithm = wolfssl_cipher_null;
+        ssl->specs.cipher_type           = aead;
+        ssl->specs.mac_algorithm         = sha256_mac;
+        ssl->specs.kea                   = 0;
+        ssl->specs.sig_algo              = 0;
+        ssl->specs.hash_size             = WC_SHA256_DIGEST_SIZE;
+        ssl->specs.pad_size              = PAD_SHA;
+        ssl->specs.static_ecdh           = 0;
+        ssl->specs.key_size              = WC_SHA256_DIGEST_SIZE / 2;
+        ssl->specs.block_size            = 0;
+        ssl->specs.iv_size               = HMAC_NONCE_SZ;
+        ssl->specs.aead_mac_size         = WC_SHA256_DIGEST_SIZE;
+
+        break;
+    #endif
+
+    #ifdef BUILD_TLS_SHA384_SHA384
+    case TLS_SHA384_SHA384 :
+        ssl->specs.bulk_cipher_algorithm = wolfssl_cipher_null;
+        ssl->specs.cipher_type           = aead;
+        ssl->specs.mac_algorithm         = sha384_mac;
+        ssl->specs.kea                   = 0;
+        ssl->specs.sig_algo              = 0;
+        ssl->specs.hash_size             = WC_SHA384_DIGEST_SIZE;
+        ssl->specs.pad_size              = PAD_SHA;
+        ssl->specs.static_ecdh           = 0;
+        ssl->specs.key_size              = WC_SHA384_DIGEST_SIZE / 2;
+        ssl->specs.block_size            = 0;
+        ssl->specs.iv_size               = HMAC_NONCE_SZ;
+        ssl->specs.aead_mac_size         = WC_SHA384_DIGEST_SIZE;
+
+        break;
+    #endif
+#endif
+
     default:
         WOLFSSL_MSG("Unsupported cipher suite, SetCipherSpecs ECC");
         return UNSUPPORTED_SUITE;
@@ -1294,6 +1330,23 @@
         break;
 #endif
 
+#ifdef BUILD_TLS_RSA_WITH_NULL_MD5
+    case TLS_RSA_WITH_NULL_MD5 :
+        ssl->specs.bulk_cipher_algorithm = wolfssl_cipher_null;
+        ssl->specs.cipher_type           = stream;
+        ssl->specs.mac_algorithm         = md5_mac;
+        ssl->specs.kea                   = rsa_kea;
+        ssl->specs.sig_algo              = rsa_sa_algo;
+        ssl->specs.hash_size             = WC_MD5_DIGEST_SIZE;
+        ssl->specs.pad_size              = PAD_MD5;
+        ssl->specs.static_ecdh           = 0;
+        ssl->specs.key_size              = 0;
+        ssl->specs.block_size            = 0;
+        ssl->specs.iv_size               = 0;
+
+        break;
+#endif
+
 #ifdef BUILD_TLS_RSA_WITH_NULL_SHA
     case TLS_RSA_WITH_NULL_SHA :
         ssl->specs.bulk_cipher_algorithm = wolfssl_cipher_null;
@@ -1433,7 +1486,7 @@
         ssl->options.usingPSK_cipher     = 1;
         break;
 #endif
-        
+
 #ifdef BUILD_TLS_DH_anon_WITH_AES_256_GCM_SHA384
     case TLS_DH_anon_WITH_AES_256_GCM_SHA384:
         ssl->specs.bulk_cipher_algorithm = wolfssl_aes_gcm;
@@ -1808,57 +1861,6 @@
             break;
 #endif
 
-#ifdef BUILD_TLS_RSA_WITH_HC_128_B2B256
-        case TLS_RSA_WITH_HC_128_B2B256:
-            ssl->specs.bulk_cipher_algorithm = wolfssl_hc128;
-            ssl->specs.cipher_type           = stream;
-            ssl->specs.mac_algorithm         = blake2b_mac;
-            ssl->specs.kea                   = rsa_kea;
-            ssl->specs.sig_algo              = rsa_sa_algo;
-            ssl->specs.hash_size             = BLAKE2B_256;
-            ssl->specs.pad_size              = PAD_SHA;
-            ssl->specs.static_ecdh           = 0;
-            ssl->specs.key_size              = HC_128_KEY_SIZE;
-            ssl->specs.block_size            = 0;
-            ssl->specs.iv_size               = HC_128_IV_SIZE;
-
-            break;
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_B2B256
-        case TLS_RSA_WITH_AES_128_CBC_B2B256:
-            ssl->specs.bulk_cipher_algorithm = wolfssl_aes;
-            ssl->specs.cipher_type           = block;
-            ssl->specs.mac_algorithm         = blake2b_mac;
-            ssl->specs.kea                   = rsa_kea;
-            ssl->specs.sig_algo              = rsa_sa_algo;
-            ssl->specs.hash_size             = BLAKE2B_256;
-            ssl->specs.pad_size              = PAD_SHA;
-            ssl->specs.static_ecdh           = 0;
-            ssl->specs.key_size              = AES_128_KEY_SIZE;
-            ssl->specs.iv_size               = AES_IV_SIZE;
-            ssl->specs.block_size            = AES_BLOCK_SIZE;
-
-            break;
-#endif
-
-#ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_B2B256
-        case TLS_RSA_WITH_AES_256_CBC_B2B256:
-            ssl->specs.bulk_cipher_algorithm = wolfssl_aes;
-            ssl->specs.cipher_type           = block;
-            ssl->specs.mac_algorithm         = blake2b_mac;
-            ssl->specs.kea                   = rsa_kea;
-            ssl->specs.sig_algo              = rsa_sa_algo;
-            ssl->specs.hash_size             = BLAKE2B_256;
-            ssl->specs.pad_size              = PAD_SHA;
-            ssl->specs.static_ecdh           = 0;
-            ssl->specs.key_size              = AES_256_KEY_SIZE;
-            ssl->specs.iv_size               = AES_IV_SIZE;
-            ssl->specs.block_size            = AES_BLOCK_SIZE;
-
-            break;
-#endif
-
 #ifdef BUILD_TLS_RSA_WITH_RABBIT_SHA
     case TLS_RSA_WITH_RABBIT_SHA :
         ssl->specs.bulk_cipher_algorithm = wolfssl_rabbit;
@@ -2142,7 +2144,7 @@
     if (ssl->version.major == 3 && ssl->version.minor >= 1) {
 #ifndef NO_TLS
         ssl->options.tls = 1;
-    #ifndef WOLFSSL_NO_TLS12
+    #if !defined(WOLFSSL_NO_TLS12) && !defined(WOLFSSL_AEAD_ONLY)
         ssl->hmac = TLS_hmac;
     #endif
         if (ssl->version.minor >= 2) {
@@ -2153,7 +2155,12 @@
 #endif
     }
 
-#ifdef WOLFSSL_DTLS
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    if (IsAtLeastTLSv1_3(ssl->version) || ssl->specs.cipher_type != block)
+       ssl->options.encThenMac = 0;
+#endif
+
+#if defined(WOLFSSL_DTLS) && !defined(WOLFSSL_AEAD_ONLY)
     if (ssl->options.dtls)
         ssl->hmac = TLS_hmac;
 #endif
@@ -2212,19 +2219,24 @@
 
 
 static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
-                   int side, void* heap, int devId)
+                   int side, void* heap, int devId, WC_RNG* rng, int tls13)
 {
+    (void)rng;
+    (void)tls13;
+
 #ifdef BUILD_ARC4
-    word32 sz = specs->key_size;
     if (specs->bulk_cipher_algorithm == wolfssl_rc4) {
-        if (enc && enc->arc4 == NULL)
+        word32 sz = specs->key_size;
+        if (enc && enc->arc4 == NULL) {
             enc->arc4 = (Arc4*)XMALLOC(sizeof(Arc4), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->arc4 == NULL)
-            return MEMORY_E;
-        if (dec && dec->arc4 == NULL)
+            if (enc->arc4 == NULL)
+                 return MEMORY_E;
+        }
+        if (dec && dec->arc4 == NULL) {
             dec->arc4 = (Arc4*)XMALLOC(sizeof(Arc4), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->arc4 == NULL)
-            return MEMORY_E;
+            if (dec->arc4 == NULL)
+                return MEMORY_E;
+        }
 
         if (enc) {
             if (wc_Arc4Init(enc->arc4, heap, devId) != 0) {
@@ -2608,6 +2620,15 @@
                 if (gcmRet != 0) return gcmRet;
                 XMEMCPY(keys->aead_enc_imp_IV, keys->client_write_IV,
                         AEAD_MAX_IMP_SZ);
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    ((!defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)) || \
+    (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+                if (!tls13) {
+                    gcmRet = wc_AesGcmSetIV(enc->aes, AESGCM_NONCE_SZ,
+                            keys->client_write_IV, AESGCM_IMP_IV_SZ, rng);
+                    if (gcmRet != 0) return gcmRet;
+                }
+#endif
             }
             if (dec) {
                 gcmRet = wc_AesGcmSetKey(dec->aes, keys->server_write_key,
@@ -2624,6 +2645,15 @@
                 if (gcmRet != 0) return gcmRet;
                 XMEMCPY(keys->aead_enc_imp_IV, keys->server_write_IV,
                         AEAD_MAX_IMP_SZ);
+#if !defined(NO_PUBLIC_GCM_SET_IV) && \
+    ((!defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)) || \
+    (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+                if (!tls13) {
+                    gcmRet = wc_AesGcmSetIV(enc->aes, AESGCM_NONCE_SZ,
+                            keys->server_write_IV, AESGCM_IMP_IV_SZ, rng);
+                    if (gcmRet != 0) return gcmRet;
+                }
+#endif
             }
             if (dec) {
                 gcmRet = wc_AesGcmSetKey(dec->aes, keys->client_write_key,
@@ -2692,6 +2722,15 @@
                 }
                 XMEMCPY(keys->aead_enc_imp_IV, keys->client_write_IV,
                         AEAD_MAX_IMP_SZ);
+#if !defined(NO_PUBLIC_CCM_SET_NONCE) && \
+    ((!defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)) || \
+    (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+                if (!tls13) {
+                    CcmRet = wc_AesCcmSetNonce(enc->aes, keys->client_write_IV,
+                            AEAD_MAX_IMP_SZ);
+                    if (CcmRet != 0) return CcmRet;
+                }
+#endif
             }
             if (dec) {
                 CcmRet = wc_AesCcmSetKey(dec->aes, keys->server_write_key,
@@ -2712,6 +2751,15 @@
                 }
                 XMEMCPY(keys->aead_enc_imp_IV, keys->server_write_IV,
                         AEAD_MAX_IMP_SZ);
+#if !defined(NO_PUBLIC_CCM_SET_NONCE) && \
+    ((!defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)) || \
+    (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+                if (!tls13) {
+                    CcmRet = wc_AesCcmSetNonce(enc->aes, keys->server_write_IV,
+                            AEAD_MAX_IMP_SZ);
+                    if (CcmRet != 0) return CcmRet;
+                }
+#endif
             }
             if (dec) {
                 CcmRet = wc_AesCcmSetKey(dec->aes, keys->client_write_key,
@@ -2838,6 +2886,75 @@
 
 #ifdef HAVE_NULL_CIPHER
     if (specs->bulk_cipher_algorithm == wolfssl_cipher_null) {
+    #ifdef WOLFSSL_TLS13
+        if (tls13) {
+            int hmacRet;
+            int hashType = WC_HASH_TYPE_NONE;
+
+            switch (specs->mac_algorithm) {
+                case sha256_mac:
+                    hashType = WC_SHA256;
+                    break;
+                case sha384_mac:
+                    hashType = WC_SHA384;
+                    break;
+                default:
+                    break;
+            }
+
+            if (enc && enc->hmac == NULL) {
+                enc->hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap,
+                                                           DYNAMIC_TYPE_CIPHER);
+                if (enc->hmac == NULL)
+                    return MEMORY_E;
+            }
+
+            if (dec && dec->hmac == NULL) {
+                dec->hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap,
+                                                           DYNAMIC_TYPE_CIPHER);
+                if (dec->hmac == NULL)
+                    return MEMORY_E;
+            }
+
+            if (enc) {
+                if (wc_HmacInit(enc->hmac, heap, devId) != 0) {
+                    WOLFSSL_MSG("HmacInit failed in SetKeys");
+                    return ASYNC_INIT_E;
+                }
+            }
+            if (dec) {
+                if (wc_HmacInit(dec->hmac, heap, devId) != 0) {
+                    WOLFSSL_MSG("HmacInit failed in SetKeys");
+                    return ASYNC_INIT_E;
+                }
+            }
+
+            if (side == WOLFSSL_CLIENT_END) {
+                if (enc) {
+                    hmacRet = wc_HmacSetKey(enc->hmac, hashType,
+                                       keys->client_write_key, specs->key_size);
+                    if (hmacRet != 0) return hmacRet;
+                }
+                if (dec) {
+                    hmacRet = wc_HmacSetKey(dec->hmac, hashType,
+                                       keys->server_write_key, specs->key_size);
+                    if (hmacRet != 0) return hmacRet;
+                }
+            }
+            else {
+                if (enc) {
+                    hmacRet = wc_HmacSetKey(enc->hmac, hashType,
+                                       keys->server_write_key, specs->key_size);
+                    if (hmacRet != 0) return hmacRet;
+                }
+                if (dec) {
+                    hmacRet = wc_HmacSetKey(dec->hmac, hashType,
+                                       keys->client_write_key, specs->key_size);
+                    if (hmacRet != 0) return hmacRet;
+                }
+            }
+        }
+    #endif
         if (enc)
             enc->setup = 1;
         if (dec)
@@ -2931,17 +3048,13 @@
  */
 int SetKeysSide(WOLFSSL* ssl, enum encrypt_side side)
 {
-    int devId = INVALID_DEVID, ret, copy = 0;
+    int ret, copy = 0;
     Ciphers* wc_encrypt = NULL;
     Ciphers* wc_decrypt = NULL;
     Keys*    keys    = &ssl->keys;
 
     (void)copy;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    devId = ssl->devId;
-#endif
-
 #ifdef HAVE_SECURE_RENEGOTIATION
     if (ssl->secure_renegotiation && ssl->secure_renegotiation->cache_status) {
         keys = &ssl->secure_renegotiation->tmp_keys;
@@ -2954,10 +3067,10 @@
 #ifdef WOLFSSL_DEBUG_TLS
             WOLFSSL_MSG("Provisioning ENCRYPT key");
             if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                WOLFSSL_BUFFER(ssl->keys.client_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.client_write_key, ssl->specs.key_size);
             }
             else {
-                WOLFSSL_BUFFER(ssl->keys.server_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.server_write_key, ssl->specs.key_size);
             }
 #endif
             wc_encrypt = &ssl->encrypt;
@@ -2967,10 +3080,10 @@
 #ifdef WOLFSSL_DEBUG_TLS
             WOLFSSL_MSG("Provisioning DECRYPT key");
             if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                WOLFSSL_BUFFER(ssl->keys.server_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.server_write_key, ssl->specs.key_size);
             }
             else {
-                WOLFSSL_BUFFER(ssl->keys.client_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.client_write_key, ssl->specs.key_size);
             }
 #endif
             wc_decrypt = &ssl->decrypt;
@@ -2980,17 +3093,17 @@
 #ifdef WOLFSSL_DEBUG_TLS
             WOLFSSL_MSG("Provisioning ENCRYPT key");
             if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                WOLFSSL_BUFFER(ssl->keys.client_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.client_write_key, ssl->specs.key_size);
             }
             else {
-                WOLFSSL_BUFFER(ssl->keys.server_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.server_write_key, ssl->specs.key_size);
             }
             WOLFSSL_MSG("Provisioning DECRYPT key");
             if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                WOLFSSL_BUFFER(ssl->keys.server_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.server_write_key, ssl->specs.key_size);
             }
             else {
-                WOLFSSL_BUFFER(ssl->keys.client_write_key, AES_256_KEY_SIZE);
+                WOLFSSL_BUFFER(ssl->keys.client_write_key, ssl->specs.key_size);
             }
 #endif
             wc_encrypt = &ssl->encrypt;
@@ -3003,14 +3116,21 @@
 
 #ifdef HAVE_ONE_TIME_AUTH
     if (!ssl->auth.setup && ssl->specs.bulk_cipher_algorithm == wolfssl_chacha){
-        ret = SetAuthKeys(&ssl->auth, keys, &ssl->specs, ssl->heap, devId);
+        ret = SetAuthKeys(&ssl->auth, keys, &ssl->specs, ssl->heap, ssl->devId);
         if (ret != 0)
            return ret;
     }
 #endif
 
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+    /* check if keys for TSIP has been created */
+    if (tsip_useable(ssl) == 1)
+        ret = 0;
+    else
+#endif
     ret = SetKeys(wc_encrypt, wc_decrypt, keys, &ssl->specs, ssl->options.side,
-                  ssl->heap, devId);
+                  ssl->heap, ssl->devId, ssl->rng, ssl->options.tls1_3);
 
 #ifdef HAVE_SECURE_RENEGOTIATION
     if (copy) {
@@ -3022,15 +3142,19 @@
             clientCopy = 1;
 
         if (clientCopy) {
+    #ifndef WOLFSSL_AEAD_ONLY
             XMEMCPY(ssl->keys.client_write_MAC_secret,
                     keys->client_write_MAC_secret, WC_MAX_DIGEST_SIZE);
+    #endif
             XMEMCPY(ssl->keys.client_write_key,
                     keys->client_write_key, AES_256_KEY_SIZE);
             XMEMCPY(ssl->keys.client_write_IV,
                     keys->client_write_IV, MAX_WRITE_IV_SZ);
         } else {
+    #ifndef WOLFSSL_AEAD_ONLY
             XMEMCPY(ssl->keys.server_write_MAC_secret,
                     keys->server_write_MAC_secret, WC_MAX_DIGEST_SIZE);
+    #endif
             XMEMCPY(ssl->keys.server_write_key,
                     keys->server_write_key, AES_256_KEY_SIZE);
             XMEMCPY(ssl->keys.server_write_IV,
@@ -3099,8 +3223,10 @@
         /* Use the same keys for encrypt and decrypt. */
         if (ssl->specs.cipher_type != aead) {
             sz = ssl->specs.hash_size;
+    #ifndef WOLFSSL_AEAD_ONLY
             XMEMCPY(keys->client_write_MAC_secret,&keyData[i], sz);
             XMEMCPY(keys->server_write_MAC_secret,&keyData[i], sz);
+    #endif
             i += sz;
         }
         sz = ssl->specs.key_size;
@@ -3126,11 +3252,15 @@
     if (ssl->specs.cipher_type != aead) {
         sz = ssl->specs.hash_size;
         if (side & PROVISION_CLIENT) {
+    #ifndef WOLFSSL_AEAD_ONLY
             XMEMCPY(keys->client_write_MAC_secret,&keyData[i], sz);
+    #endif
             i += sz;
         }
         if (side & PROVISION_SERVER) {
+    #ifndef WOLFSSL_AEAD_ONLY
             XMEMCPY(keys->server_write_MAC_secret,&keyData[i], sz);
+    #endif
             i += sz;
         }
     }
@@ -3281,6 +3411,10 @@
     for (i = 0; i < sz; i++)
         ssl->arrays->preMasterSecret[i] = 0;
 
+    XFREE(ssl->arrays->preMasterSecret, ssl->heap, DYNAMIC_TYPE_SECRET);
+    ssl->arrays->preMasterSecret = NULL;
+    ssl->arrays->preMasterSz = 0;
+
     return 0;
 }
 
@@ -3471,5 +3605,4 @@
 }
 
 #endif /* WOLFCRYPT_ONLY */
-
 
--- a/src/ocsp.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/ocsp.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ocsp.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -199,6 +199,10 @@
 }
 
 
+/* Mallocs responseBuffer->buffer and is up to caller to free on success
+ *
+ * Returns OCSP status
+ */
 static int GetOcspStatus(WOLFSSL_OCSP* ocsp, OcspRequest* request,
                   OcspEntry* entry, CertStatus** status, buffer* responseBuffer)
 {
@@ -224,9 +228,11 @@
     }
     else if (*status) {
 #ifndef NO_ASN_TIME
-        if (ValidateDate((*status)->thisDate, (*status)->thisDateFormat, BEFORE)
+        if (XVALIDATE_DATE((*status)->thisDate,
+                                             (*status)->thisDateFormat, BEFORE)
         &&  ((*status)->nextDate[0] != 0)
-        &&  ValidateDate((*status)->nextDate, (*status)->nextDateFormat, AFTER))
+        &&  XVALIDATE_DATE((*status)->nextDate,
+                                             (*status)->nextDateFormat, AFTER))
 #endif
         {
             ret = xstat2err((*status)->status);
@@ -260,9 +266,9 @@
  * entry          The OCSP entry for this certificate.
  * returns OCSP_LOOKUP_FAIL when the response is bad and 0 otherwise.
  */
-static int CheckResponse(WOLFSSL_OCSP* ocsp, byte* response, int responseSz,
-                         buffer* responseBuffer, CertStatus* status,
-                         OcspEntry* entry, OcspRequest* ocspRequest)
+WOLFSSL_LOCAL int CheckOcspResponse(WOLFSSL_OCSP *ocsp, byte *response, int responseSz,
+                                    WOLFSSL_BUFFER_INFO *responseBuffer, CertStatus *status,
+                                    OcspEntry *entry, OcspRequest *ocspRequest)
 {
 #ifdef WOLFSSL_SMALL_STACK
     CertStatus*   newStatus;
@@ -293,7 +299,8 @@
     InitOcspResponse(ocspResponse, newStatus, response, responseSz);
     ret = OcspResponseDecode(ocspResponse, ocsp->cm, ocsp->cm->heap, 0);
     if (ret != 0) {
-        WOLFSSL_MSG("OcspResponseDecode failed");
+        ocsp->error = ret;
+        WOLFSSL_LEAVE("OcspResponseDecode failed", ocsp->error);
         goto end;
     }
 
@@ -335,6 +342,7 @@
         }
 
         /* Replace existing certificate entry with updated */
+        newStatus->next = status->next;
         XMEMCPY(status, newStatus, sizeof(CertStatus));
     }
     else {
@@ -421,12 +429,13 @@
         ret = ocsp->statusCb(ssl, ioCtx);
         if (ret == 0) {
             ret = wolfSSL_get_ocsp_response(ssl, &response);
-            ret = CheckResponse(ocsp, response, ret, responseBuffer, status,
+            ret = CheckOcspResponse(ocsp, response, ret, responseBuffer, status,
                                 entry, NULL);
             if (response != NULL)
                 XFREE(response, NULL, DYNAMIC_TYPE_OPENSSL);
             return ret;
         }
+        WOLFSSL_LEAVE("CheckOcspRequest", ocsp->error);
         return OCSP_LOOKUP_FAIL;
     }
 #endif
@@ -450,6 +459,10 @@
     request = (byte*)XMALLOC(requestSz, ocsp->cm->heap, DYNAMIC_TYPE_OCSP);
     if (request == NULL) {
         WOLFSSL_LEAVE("CheckCertOCSP", MEMORY_ERROR);
+        if (responseBuffer) {
+            XFREE(responseBuffer->buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            responseBuffer->buffer = NULL;
+        }
         return MEMORY_ERROR;
     }
 
@@ -465,18 +478,21 @@
     XFREE(request, ocsp->cm->heap, DYNAMIC_TYPE_OCSP);
 
     if (responseSz >= 0 && response) {
-        ret = CheckResponse(ocsp, response, responseSz, responseBuffer, status,
+        ret = CheckOcspResponse(ocsp, response, responseSz, responseBuffer, status,
                             entry, ocspRequest);
     }
 
     if (response != NULL && ocsp->cm->ocspRespFreeCb)
         ocsp->cm->ocspRespFreeCb(ioCtx, response);
 
+    /* Keep responseBuffer in the case of getting to response check. Caller
+     * should free responseBuffer after checking OCSP return value in "ret" */
     WOLFSSL_LEAVE("CheckOcspRequest", ret);
     return ret;
 }
 
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
+    defined(WOLFSSL_APACHE_HTTPD)
 
 int wolfSSL_OCSP_resp_find_status(WOLFSSL_OCSP_BASICRESP *bs,
     WOLFSSL_OCSP_CERTID* id, int* status, int* reason,
@@ -493,9 +509,9 @@
     if (status != NULL)
         *status = bs->status->status;
     if (thisupd != NULL)
-        *thisupd = (WOLFSSL_ASN1_TIME*)bs->status->thisDateAsn;
+        *thisupd = &bs->status->thisDateParsed;
     if (nextupd != NULL)
-        *nextupd = (WOLFSSL_ASN1_TIME*)bs->status->nextDateAsn;
+        *nextupd = &bs->status->nextDateParsed;
 
     /* TODO: Not needed for Nginx. */
     if (reason != NULL)
@@ -646,29 +662,34 @@
     }
 #ifndef NO_FILESYSTEM
     else if (bio->type == WOLFSSL_BIO_FILE) {
-        long i;
-        long l;
+        long fcur;
+        long flen;
 
-        i = XFTELL(bio->file);
-        if (i < 0)
+        if (bio->ptr == NULL)
             return NULL;
-        XFSEEK(bio->file, 0, SEEK_END);
-        l = XFTELL(bio->file);
-        if (l < 0)
+
+        fcur = XFTELL((XFILE)bio->ptr);
+        if (fcur < 0)
             return NULL;
-        if (XFSEEK(bio->file, i, SEEK_SET) != 0)
+        if(XFSEEK((XFILE)bio->ptr, 0, SEEK_END) != 0)
+            return NULL;
+        flen = XFTELL((XFILE)bio->ptr);
+        if (flen < 0)
+            return NULL;
+        if (XFSEEK((XFILE)bio->ptr, fcur, SEEK_SET) != 0)
             return NULL;
 
         /* check calculated length */
-        if (l - i <= 0)
+        fcur = flen - fcur;
+        if (fcur > MAX_WOLFSSL_FILE_SIZE || fcur <= 0)
             return NULL;
 
-        data = (byte*)XMALLOC(l - i, 0, DYNAMIC_TYPE_TMP_BUFFER);
+        data = (byte*)XMALLOC(fcur, 0, DYNAMIC_TYPE_TMP_BUFFER);
         if (data == NULL)
             return NULL;
         dataAlloced = 1;
 
-        len = wolfSSL_BIO_read(bio, (char *)data, (int)l);
+        len = wolfSSL_BIO_read(bio, (char *)data, (int)flen);
     }
 #endif
     else
@@ -676,7 +697,8 @@
 
     if (len > 0) {
         p = data;
-        ret = wolfSSL_d2i_OCSP_RESPONSE(response, (const unsigned char **)&p, len);
+        ret = wolfSSL_d2i_OCSP_RESPONSE(response, (const unsigned char **)&p,
+            len);
     }
 
     if (dataAlloced)
@@ -813,7 +835,7 @@
 
 int wolfSSL_i2d_OCSP_REQUEST(OcspRequest* request, unsigned char** data)
 {
-    word32 size;
+    int size;
 
     size = EncodeOcspRequest(request, NULL, 0);
     if (size <= 0 || data == NULL)
@@ -849,8 +871,222 @@
     return req;
 }
 
+WOLFSSL_OCSP_CERTID* wolfSSL_OCSP_CERTID_dup(WOLFSSL_OCSP_CERTID* id)
+{
+    WOLFSSL_OCSP_CERTID* certId;
+
+    if (id == NULL) {
+        return NULL;
+    }
+
+    certId = (WOLFSSL_OCSP_CERTID*)XMALLOC(sizeof(WOLFSSL_OCSP_CERTID),
+        id->heap, DYNAMIC_TYPE_OPENSSL);
+    if (certId) {
+        XMEMCPY(certId, id, sizeof(WOLFSSL_OCSP_CERTID));
+    }
+    return certId;
+}
 #endif
 
+#if defined(OPENSSL_ALL) || defined(APACHE_HTTPD)
+int wolfSSL_i2d_OCSP_REQUEST_bio(WOLFSSL_BIO* out,
+        WOLFSSL_OCSP_REQUEST *req)
+{
+    int size = -1;
+    unsigned char* data = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_i2d_OCSP_REQUEST_bio");
+    if (out == NULL || req == NULL)
+        return WOLFSSL_FAILURE;
+
+    size = wolfSSL_i2d_OCSP_REQUEST(req, NULL);
+    if (size > 0) {
+        data = (unsigned char*) XMALLOC(size, out->heap,
+                DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    if (data != NULL) {
+        size = wolfSSL_i2d_OCSP_REQUEST(req, &data);
+    }
+
+    if (size <= 0) {
+        XFREE(data, out->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wolfSSL_BIO_write(out, data, size) == (int)size) {
+        XFREE(data, out->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_SUCCESS;
+    }
+
+    XFREE(data, out->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    return WOLFSSL_FAILURE;
+}
+#endif /* OPENSSL_ALL || APACHE_HTTPD */
+
+#ifdef OPENSSL_EXTRA
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_OCSP_REQUEST_add_ext(OcspRequest* req, WOLFSSL_X509_EXTENSION* ext,
+        int idx)
+{
+    WOLFSSL_STUB("wolfSSL_OCSP_REQUEST_add_ext");
+    (void)req;
+    (void)ext;
+    (void)idx;
+    return WOLFSSL_FATAL_ERROR;
+}
+#endif
+
+#ifndef NO_WOLFSSL_STUB
+OcspResponse* wolfSSL_OCSP_response_create(int status,
+    WOLFSSL_OCSP_BASICRESP* bs)
+{
+    WOLFSSL_STUB("wolfSSL_OCSP_response_create");
+    (void)status;
+    (void)bs;
+    return NULL;
+}
+#endif
+
+#ifndef NO_WOLFSSL_STUB
+const char* wolfSSL_OCSP_crl_reason_str(long s)
+{
+    WOLFSSL_STUB("wolfSSL_OCSP_crl_reason_str");
+    (void)s;
+    return NULL;
+}
+#endif
+
+/* Returns elements of an OCSP_CERTID struct. Currently only supports
+ * returning the serial number, and returns an error if user requests
+ * any of name, pmd, and/or keyHash.
+ * Return 1 on success, 0 on failure */
+int wolfSSL_OCSP_id_get0_info(WOLFSSL_ASN1_STRING **name,
+  WOLFSSL_ASN1_OBJECT **pmd, WOLFSSL_ASN1_STRING **keyHash,
+  WOLFSSL_ASN1_INTEGER **serial, WOLFSSL_OCSP_CERTID *cid)
+{
+    int i = 0;
+    WOLFSSL_ASN1_INTEGER* ser;
+
+    WOLFSSL_ENTER("wolfSSL_OCSP_id_get0_info");
+
+    if (cid == NULL)
+        return 0;
+
+    /* build up ASN1_INTEGER for serial */
+    if (serial != NULL) {
+        ser = wolfSSL_ASN1_INTEGER_new();
+        if (ser == NULL)
+            return 0;
+
+        if (cid->serialSz > (WOLFSSL_ASN1_INTEGER_MAX - 2)) {
+            /* allocate data buffer, +2 for type and length */
+            ser->data = (unsigned char*)XMALLOC(cid->serialSz + 2, NULL,
+                DYNAMIC_TYPE_OPENSSL);
+            if (ser->data == NULL) {
+                wolfSSL_ASN1_INTEGER_free(ser);
+                return 0;
+            }
+            ser->dataMax = cid->serialSz + 2;
+            ser->isDynamic = 1;
+        } else {
+            /* Use array instead of dynamic memory */
+            ser->data    = ser->intData;
+            ser->dataMax = WOLFSSL_ASN1_INTEGER_MAX;
+        }
+
+        #ifdef WOLFSSL_QT
+            /* Serial number starts at 0 index of ser->data */
+            XMEMCPY(&ser->data[i], cid->serial, cid->serialSz);
+            ser->length = cid->serialSz;
+        #else
+            ser->data[i++] = ASN_INTEGER;
+            i += SetLength(cid->serialSz, ser->data + i);
+            XMEMCPY(&ser->data[i], cid->serial, cid->serialSz);
+        #endif
+
+        cid->serialInt = ser;
+        *serial = cid->serialInt;
+    }
+
+    /* Not needed for Apache, return error if user is requesting */
+    if (name != NULL || pmd != NULL || keyHash != NULL) {
+        if (name != NULL)
+            *name = NULL;
+
+        if (pmd != NULL)
+            *pmd = NULL;
+
+        if (keyHash != NULL)
+            *keyHash = NULL;
+        return 0;
+    }
+
+    return 1;
+}
+
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_OCSP_request_add1_nonce(OcspRequest* req, unsigned char* val,
+        int sz)
+{
+    WOLFSSL_STUB("wolfSSL_OCSP_request_add1_nonce");
+    (void)req;
+    (void)val;
+    (void)sz;
+    return WOLFSSL_FATAL_ERROR;
+}
+#endif
+
+/* Returns result of OCSP nonce comparison. Return values:
+ *  1 - nonces are both present and equal
+ *  2 - both nonces are absent
+ *  3 - nonce only present in response
+ * -1 - nonce only present in request
+ *  0 - both nonces present and equal
+ */
+int wolfSSL_OCSP_check_nonce(OcspRequest* req, WOLFSSL_OCSP_BASICRESP* bs)
+{
+    byte* reqNonce = NULL;
+    byte* rspNonce = NULL;
+    int reqNonceSz = 0;
+    int rspNonceSz = 0;
+
+    WOLFSSL_ENTER("wolfSSL_OCSP_check_nonce");
+
+    if (req != NULL) {
+        reqNonce = req->nonce;
+        reqNonceSz = req->nonceSz;
+    }
+
+    if (bs != NULL) {
+        rspNonce = bs->nonce;
+        rspNonceSz = bs->nonceSz;
+    }
+
+    /* nonce absent in both req and rsp */
+    if (reqNonce == NULL && rspNonce == NULL)
+        return 2;
+
+    /* nonce present in rsp only */
+    if (reqNonce == NULL && rspNonce != NULL)
+        return 3;
+
+    /* nonce present in req only */
+    if (reqNonce != NULL && rspNonce == NULL)
+        return -1;
+
+    /* nonces are present and equal, return 1. Extra NULL check for fixing
+        scan-build warning. */
+    if (reqNonceSz == rspNonceSz && reqNonce && rspNonce) {
+        if (XMEMCMP(reqNonce, rspNonce, reqNonceSz) == 0)
+            return 1;
+    }
+
+    /* nonces are present but not equal */
+    return 0;
+}
+#endif /* OPENSSL_EXTRA */
+
 #else /* HAVE_OCSP */
 
 
@@ -862,5 +1098,4 @@
 
 #endif /* HAVE_OCSP */
 #endif /* WOLFCRYPT_ONLY */
-
 
--- a/src/sniffer.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/sniffer.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sniffer.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -34,6 +34,8 @@
 
 #ifndef _WIN32
   #include <arpa/inet.h>
+#else
+  #include <WS2tcpip.h>
 #endif
 
 #ifdef _WIN32
@@ -54,6 +56,16 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+    #ifdef HAVE_INTEL_QA_SYNC
+        #include <wolfssl/wolfcrypt/port/intel/quickassist_sync.h>
+    #endif
+    #ifdef HAVE_CAVIUM_OCTEON_SYNC
+        #include <wolfssl/wolfcrypt/port/cavium/cavium_octeon_sync.h>
+    #endif
+#endif
+
 
 #ifndef WOLFSSL_SNIFFER_TIMEOUT
     #define WOLFSSL_SNIFFER_TIMEOUT 900
@@ -68,10 +80,13 @@
     ETHER_IF_ADDR_LEN  = 6,   /* ethernet interface address length */
     LOCAL_IF_ADDR_LEN  = 4,   /* localhost interface address length, !windows */
     TCP_PROTO          = 6,   /* TCP_PROTOCOL */
-    IP_HDR_SZ          = 20,  /* IP header length, min */
+    IP_HDR_SZ          = 20,  /* IPv4 header length, min */
+    IP6_HDR_SZ         = 40,  /* IPv6 header length, min */
     TCP_HDR_SZ         = 20,  /* TCP header length, min */
     IPV4               = 4,   /* IP version 4 */
+    IPV6               = 6,   /* IP version 6 */
     TCP_PROTOCOL       = 6,   /* TCP Protocol id */
+    NO_NEXT_HEADER     = 59,  /* IPv6 no headers follow */
     TRACE_MSG_SZ       = 80,  /* Trace Message buffer size */
     HASH_SIZE          = 499, /* Session Hash Table Rows */
     PSEUDO_HDR_SZ      = 12,  /* TCP Pseudo Header size in bytes */
@@ -122,8 +137,8 @@
 #endif /* _WIN32 */
 
 
-static int TraceOn = 0;         /* Trace is off by default */
-static FILE* TraceFile = 0;
+static WOLFSSL_GLOBAL int TraceOn = 0;         /* Trace is off by default */
+static WOLFSSL_GLOBAL FILE* TraceFile = 0;
 
 
 /* windows uses .rc table for this */
@@ -245,14 +260,30 @@
 
     /* 81 */
     "Bad Decrypt Size",
-    "Extended Master Secret Hash Error"
+    "Extended Master Secret Hash Error",
+    "Handshake Message Split Across TLS Records",
+    "ECC Private Decode Error",
+    "ECC Public Decode Error",
+
+    /* 86 */
+    "Watch callback not set",
+    "Watch hash failed",
+    "Watch callback failed",
+    "Bad Certificate Message",
+    "Store data callback not set",
+
+    /* 91 */
+    "No data destination Error",
+    "Store data callback failed",
+    "Loading chain input"
 };
 
 
 /* *nix version uses table above */
 static void GetError(int idx, char* str)
 {
-    XSTRNCPY(str, msgTable[idx - 1], MAX_ERROR_LEN);
+    XSTRNCPY(str, msgTable[idx - 1], MAX_ERROR_LEN-1);
+    str[MAX_ERROR_LEN-1] = '\0';
 }
 
 
@@ -293,11 +324,20 @@
 #endif
 
 
+typedef struct IpAddrInfo {
+    int version;
+    union {
+        word32 ip4;
+        byte   ip6[16];
+    };
+} IpAddrInfo;
+
+
 /* Sniffer Server holds info for each server/port monitored */
 typedef struct SnifferServer {
     SSL_CTX*       ctx;                          /* SSL context */
     char           address[MAX_SERVER_ADDRESS];  /* passed in server address */
-    word32         server;                       /* netowrk order address */
+    IpAddrInfo     server;                       /* network order address */
     int            port;                         /* server port */
 #ifdef HAVE_SNI
     NamedKey*      namedKeys;                    /* mapping of names and keys */
@@ -327,7 +367,7 @@
 } Flags;
 
 
-/* Out of Order FIN caputre */
+/* Out of Order FIN capture */
 typedef struct FinCaputre {
     word32 cliFinSeq;               /* client relative sequence FIN  0 is no */
     word32 srvFinSeq;               /* server relative sequence FIN, 0 is no */
@@ -359,8 +399,8 @@
     SnifferServer* context;         /* server context */
     SSL*           sslServer;       /* SSL server side decode */
     SSL*           sslClient;       /* SSL client side decode */
-    word32         server;          /* server address in network byte order */
-    word32         client;          /* client address in network byte order */
+    IpAddrInfo     server;          /* server address in network byte order */
+    IpAddrInfo     client;          /* client address in network byte order */
     word16         srvPort;         /* server port */
     word16         cliPort;         /* client port */
     word32         cliSeqStart;     /* client start sequence */
@@ -370,12 +410,16 @@
     FinCaputre     finCaputre;      /* retain out of order FIN s */
     Flags          flags;           /* session flags */
     time_t         lastUsed;          /* last used ticks */
+    word32         keySz;           /* size of the private key */
     PacketBuffer*  cliReassemblyList; /* client out of order packets */
     PacketBuffer*  srvReassemblyList; /* server out of order packets */
     word32         cliReassemblyMemory; /* client packet memory used */
     word32         srvReassemblyMemory; /* server packet memory used */
     struct SnifferSession* next;      /* for hash table list */
     byte*          ticketID;          /* mac ID of session ticket */
+#ifdef HAVE_SNI
+    const char*    sni;             /* server name indication */
+#endif
 #ifdef HAVE_EXTENDED_MASTER
     HsHashes*       hash;
 #endif
@@ -383,20 +427,43 @@
 
 
 /* Sniffer Server List and mutex */
-static SnifferServer* ServerList = 0;
-static wolfSSL_Mutex ServerListMutex;
+static WOLFSSL_GLOBAL SnifferServer* ServerList = 0;
+static WOLFSSL_GLOBAL wolfSSL_Mutex ServerListMutex;
 
 
 /* Session Hash Table, mutex, and count */
-static SnifferSession* SessionTable[HASH_SIZE];
-static wolfSSL_Mutex SessionMutex;
-static int SessionCount = 0;
+static WOLFSSL_GLOBAL SnifferSession* SessionTable[HASH_SIZE];
+static WOLFSSL_GLOBAL wolfSSL_Mutex SessionMutex;
+static WOLFSSL_GLOBAL int SessionCount = 0;
 
 /* Recovery of missed data switches and stats */
-static wolfSSL_Mutex RecoveryMutex;      /* for stats */
-static int RecoveryEnabled    = 0;       /* global switch */
-static int MaxRecoveryMemory  = -1;      /* per session max recovery memory */
-static word32 MissedDataSessions = 0;    /* # of sessions with missed data */
+static WOLFSSL_GLOBAL wolfSSL_Mutex RecoveryMutex; /* for stats */
+static WOLFSSL_GLOBAL int RecoveryEnabled    = 0;  /* global switch */
+static WOLFSSL_GLOBAL int MaxRecoveryMemory  = -1;
+                                           /* per session max recovery memory */
+static WOLFSSL_GLOBAL word32 MissedDataSessions = 0;
+                                            /* # of sessions with missed data */
+
+/* Connection Info Callback */
+static WOLFSSL_GLOBAL SSLConnCb ConnectionCb;
+static WOLFSSL_GLOBAL void* ConnectionCbCtx = NULL;
+
+#ifdef WOLFSSL_SNIFFER_STATS
+/* Sessions Statistics */
+static WOLFSSL_GLOBAL SSLStats SnifferStats;
+static WOLFSSL_GLOBAL wolfSSL_Mutex StatsMutex;
+#endif
+
+#ifdef WOLFSSL_SNIFFER_WATCH
+/* Watch Key Callback */
+static WOLFSSL_GLOBAL SSLWatchCb WatchCb;
+static WOLFSSL_GLOBAL void* WatchCbCtx = NULL;
+#endif
+
+#ifdef WOLFSSL_SNIFFER_STORE_DATA_CB
+/* Store Data Callback */
+static WOLFSSL_GLOBAL SSLStoreDataCb StoreDataCb;
+#endif
 
 
 static void UpdateMissedDataSessions(void)
@@ -407,6 +474,23 @@
 }
 
 
+#ifdef WOLFSSL_SNIFFER_STATS
+#define LOCK_STAT() do { wc_LockMutex(&StatsMutex); } while (0)
+#define UNLOCK_STAT() do { wc_UnLockMutex(&StatsMutex); } while (0)
+#define NOLOCK_ADD_TO_STAT(x,y) do { TraceStat(#x, y); x += y; } while (0)
+#define NOLOCK_INC_STAT(x) NOLOCK_ADD_TO_STAT(x,1)
+#define ADD_TO_STAT(x,y) do { LOCK_STAT(); \
+    NOLOCK_ADD_TO_STAT(x,y); UNLOCK_STAT(); } while (0)
+#define INC_STAT(x) do { LOCK_STAT(); \
+    NOLOCK_INC_STAT(x); UNLOCK_STAT(); } while (0)
+#endif
+
+
+#ifdef WOLF_CRYPTO_CB
+    static WOLFSSL_GLOBAL int CryptoDeviceId = INVALID_DEVID;
+#endif
+
+
 /* Initialize overall Sniffer */
 void ssl_InitSniffer(void)
 {
@@ -414,6 +498,24 @@
     wc_InitMutex(&ServerListMutex);
     wc_InitMutex(&SessionMutex);
     wc_InitMutex(&RecoveryMutex);
+#ifdef WOLFSSL_SNIFFER_STATS
+    XMEMSET(&SnifferStats, 0, sizeof(SSLStats));
+    wc_InitMutex(&StatsMutex);
+#endif
+#ifdef WOLF_CRYPTO_CB
+    #ifdef HAVE_INTEL_QA_SYNC
+    CryptoDeviceId = wc_CryptoCb_InitIntelQa();
+    if (INVALID_DEVID == CryptoDeviceId) {
+        printf("Couldn't init the Intel QA\n");
+    }
+    #endif
+    #ifdef HAVE_CAVIUM_OCTEON_SYNC
+    CryptoDeviceId = wc_CryptoCb_InitOcteon();
+    if (INVALID_DEVID == CryptoDeviceId) {
+        printf("Couldn't init the Intel QA\n");
+    }
+    #endif
+#endif
 }
 
 
@@ -425,9 +527,9 @@
     if (in) {
         if (in->key) {
             ForceZero(in->key, in->keySz);
-            free(in->key);
+            XFREE(in->key, NULL, DYNAMIC_TYPE_X509);
         }
-        free(in);
+        XFREE(in, NULL, DYNAMIC_TYPE_SNIFFER_NAMED_KEY);
     }
 }
 
@@ -458,7 +560,7 @@
 #endif
         SSL_CTX_free(srv->ctx);
     }
-    free(srv);
+    XFREE(srv, NULL, DYNAMIC_TYPE_SNIFFER_SERVER);
 }
 
 
@@ -466,8 +568,8 @@
 static void FreePacketBuffer(PacketBuffer* del)
 {
     if (del) {
-        free(del->data);
-        free(del);
+        XFREE(del->data, NULL, DYNAMIC_TYPE_SNIFFER_PB_BUFFER);
+        XFREE(del, NULL, DYNAMIC_TYPE_SNIFFER_PB);
     }
 }
 
@@ -498,12 +600,12 @@
         FreePacketList(session->cliReassemblyList);
         FreePacketList(session->srvReassemblyList);
 
-        free(session->ticketID);
+        XFREE(session->ticketID, NULL, DYNAMIC_TYPE_SNIFFER_TICKET_ID);
 #ifdef HAVE_EXTENDED_MASTER
-        free(session->hash);
+        XFREE(session->hash, NULL, DYNAMIC_TYPE_HASHES);
 #endif
     }
-    free(session);
+    XFREE(session, NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
 }
 
 
@@ -542,6 +644,15 @@
     wc_FreeMutex(&SessionMutex);
     wc_FreeMutex(&ServerListMutex);
 
+#ifdef WOLF_CRYPTO_CB
+#ifdef HAVE_INTEL_QA_SYNC
+    wc_CryptoCb_CleanupIntelQa(&CryptoDeviceId);
+#endif
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+    wc_CryptoCb_CleanupOcteon(&CryptoDeviceId);
+#endif
+#endif
+
     if (TraceFile) {
         TraceOn = 0;
         fclose(TraceFile);
@@ -642,76 +753,30 @@
 /* Initialize a SnifferServer */
 static void InitSnifferServer(SnifferServer* sniffer)
 {
-    sniffer->ctx = 0;
-    XMEMSET(sniffer->address, 0, MAX_SERVER_ADDRESS);
-    sniffer->server   = 0;
-    sniffer->port     = 0;
-#ifdef HAVE_SNI
-    sniffer->namedKeys = 0;
-    wc_InitMutex(&sniffer->namedKeysMutex);
-#endif
-    sniffer->next     = 0;
+    XMEMSET(sniffer, 0, sizeof(SnifferServer));
 }
 
 
 /* Initialize session flags */
 static void InitFlags(Flags* flags)
 {
-    flags->side           = 0;
-    flags->serverCipherOn = 0;
-    flags->clientCipherOn = 0;
-    flags->resuming       = 0;
-    flags->cached         = 0;
-    flags->clientHello    = 0;
-    flags->finCount       = 0;
-    flags->fatalError     = 0;
-    flags->cliAckFault    = 0;
-    flags->srvAckFault    = 0;
-    flags->cliSkipPartial = 0;
-    flags->srvSkipPartial = 0;
-#ifdef HAVE_EXTENDED_MASTER
-    flags->expectEms      = 0;
-#endif
+    XMEMSET(flags, 0, sizeof(Flags));
 }
 
 
 /* Initialize FIN Capture */
 static void InitFinCapture(FinCaputre* cap)
 {
-    cap->cliFinSeq  = 0;
-    cap->srvFinSeq  = 0;
-    cap->cliCounted = 0;
-    cap->srvCounted = 0;
+    XMEMSET(cap, 0, sizeof(FinCaputre));
 }
 
 
 /* Initialize a Sniffer Session */
 static void InitSession(SnifferSession* session)
 {
-    session->context        = 0;
-    session->sslServer      = 0;
-    session->sslClient      = 0;
-    session->server         = 0;
-    session->client         = 0;
-    session->srvPort        = 0;
-    session->cliPort        = 0;
-    session->cliSeqStart    = 0;
-    session->srvSeqStart    = 0;
-    session->cliExpected    = 0;
-    session->srvExpected    = 0;
-    session->lastUsed       = 0;
-    session->cliReassemblyList = 0;
-    session->srvReassemblyList = 0;
-    session->cliReassemblyMemory = 0;
-    session->srvReassemblyMemory = 0;
-    session->next           = 0;
-    session->ticketID       = 0;
-
+    XMEMSET(session, 0, sizeof(SnifferSession));
     InitFlags(&session->flags);
     InitFinCapture(&session->finCaputre);
-#ifdef HAVE_EXTENDED_MASTER
-    session->hash = 0;
-#endif
 }
 
 
@@ -719,8 +784,8 @@
 typedef struct IpInfo {
     int    length;        /* length of this header */
     int    total;         /* total length of fragment */
-    word32 src;           /* network order source address */
-    word32 dst;           /* network order destination address */
+    IpAddrInfo src;       /* network order source address */
+    IpAddrInfo dst;       /* network order destination address */
 } IpInfo;
 
 
@@ -766,7 +831,7 @@
 } EthernetHdr;
 
 
-/* IP Header */
+/* IPv4 Header */
 typedef struct IpHdr {
     byte    ver_hl;              /* version/header length */
     byte    tos;                 /* type of service */
@@ -781,6 +846,27 @@
 } IpHdr;
 
 
+/* IPv6 Header */
+typedef struct Ip6Hdr {
+    byte    ver_hl;              /* version/traffic class high */
+    byte    tc_fl;               /* traffic class low/flow label high */
+    word16  fl;                  /* flow label low */
+    word16  length;              /* payload length */
+    byte    next_header;         /* next header (6 for TCP, any other skip) */
+    byte    hl;                  /* hop limit */
+    byte    src[16];             /* source address */
+    byte    dst[16];             /* destination address */
+} Ip6Hdr;
+
+
+/* IPv6 extension header */
+typedef struct Ip6ExtHdr {
+    byte next_header;            /* next header (6 for TCP, any other skip) */
+    byte length;                 /* length in 8-octet units - 1 */
+    byte reserved[6];
+} Ip6ExtHdr;
+
+
 #define IP_HL(ip)      ( (((ip)->ver_hl) & 0x0f) * 4)
 #define IP_V(ip)       ( ((ip)->ver_hl) >> 4)
 
@@ -789,7 +875,7 @@
     word16  srcPort;            /* source port */
     word16  dstPort;            /* destination port */
     word32  sequence;           /* sequence number */
-    word32  ack;                /* acknoledgment number */
+    word32  ack;                /* acknowledgment number */
     byte    offset;             /* data offset, reserved */
     byte    flags;              /* option flags */
     word16  window;             /* window */
@@ -807,7 +893,7 @@
 
 
 
-/* Use platform specific GetError to write to tracfile if tracing */
+/* Use platform specific GetError to write to trace file if tracing */
 static void Trace(int idx)
 {
     if (TraceOn) {
@@ -870,13 +956,9 @@
 
 
 /* Convert network byte order address into human readable */
-static char* IpToS(word32 addr, char* str)
+static const char* IpToS(int version, void* src, char* dst)
 {
-    byte* p = (byte*)&addr;
-
-    SNPRINTF(str, TRACE_MSG_SZ, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
-
-    return str;
+    return inet_ntop(version, src, dst, TRACE_MSG_SZ);
 }
 
 
@@ -886,8 +968,22 @@
     if (TraceOn) {
         char src[TRACE_MSG_SZ];
         char dst[TRACE_MSG_SZ];
-        fprintf(TraceFile, "\tdst:%s src:%s\n", IpToS(iphdr->dst, dst),
-                IpToS(iphdr->src, src));
+        fprintf(TraceFile, "\tdst:%s src:%s\n",
+                IpToS(AF_INET, &iphdr->dst, dst),
+                IpToS(AF_INET, &iphdr->src, src));
+    }
+}
+
+
+/* Show destination and source address from Ip6Hdr for packet Trace */
+static void TraceIP6(Ip6Hdr* iphdr)
+{
+    if (TraceOn) {
+        char src[TRACE_MSG_SZ];
+        char dst[TRACE_MSG_SZ];
+        fprintf(TraceFile, "\tdst: %s src: %s\n",
+                IpToS(AF_INET6, iphdr->dst, dst),
+                IpToS(AF_INET6, iphdr->src, src));
     }
 }
 
@@ -1015,6 +1111,44 @@
 }
 
 
+/* Show SSLInfo if provided and is valid. */
+static void TraceSessionInfo(SSLInfo* sslInfo)
+{
+    if (TraceOn) {
+        if (sslInfo != NULL && sslInfo->isValid) {
+            fprintf(TraceFile,
+                    "\tver:(%u %u) suiteId:(%02x %02x) suiteName:(%s) "
+                    #ifdef HAVE_SNI
+                    "sni:(%s) "
+                    #endif
+                    "keySize:(%u)\n",
+                    sslInfo->protocolVersionMajor,
+                    sslInfo->protocolVersionMinor,
+                    sslInfo->serverCipherSuite0,
+                    sslInfo->serverCipherSuite,
+                    sslInfo->serverCipherSuiteName,
+                    #ifdef HAVE_SNI
+                    sslInfo->serverNameIndication,
+                    #endif
+                    sslInfo->keySize);
+        }
+    }
+}
+
+
+#ifdef WOLFSSL_SNIFFER_STATS
+
+/* Show value added to a named statistic. */
+static void TraceStat(const char* name, int add)
+{
+    if (TraceOn) {
+        fprintf(TraceFile, "\tAdding %d to %s\n", add, name);
+    }
+}
+
+#endif
+
+
 /* Set user error string */
 static void SetError(int idx, char* error, SnifferSession* session, int fatal)
 {
@@ -1025,6 +1159,21 @@
 }
 
 
+/* Compare IpAddrInfo structs */
+static WC_INLINE int MatchAddr(IpAddrInfo l, IpAddrInfo r)
+{
+    if (l.version == r.version) {
+        if (l.version == IPV4)
+            return (l.ip4 == r.ip4);
+        else if (l.version == IPV6)
+            return (0 == XMEMCMP(l.ip6, r.ip6, sizeof(l.ip6)));
+    }
+    return 0;
+}
+
+
+#ifndef WOLFSSL_SNIFFER_WATCH
+
 /* See if this IPV4 network order address has been registered */
 /* return 1 is true, 0 is false */
 static int IsServerRegistered(word32 addr)
@@ -1036,7 +1185,33 @@
 
     sniffer = ServerList;
     while (sniffer) {
-        if (sniffer->server == addr) {
+        if (sniffer->server.ip4 == addr) {
+            ret = 1;
+            break;
+        }
+        sniffer = sniffer->next;
+    }
+
+    wc_UnLockMutex(&ServerListMutex);
+
+    return ret;
+}
+
+
+/* See if this port has been registered to watch */
+/* See if this IPV4 network order address has been registered */
+/* return 1 is true, 0 is false */
+static int IsServerRegistered6(byte* addr)
+{
+    int ret = 0;     /* false */
+    SnifferServer* sniffer;
+
+    wc_LockMutex(&ServerListMutex);
+
+    sniffer = ServerList;
+    while (sniffer) {
+        if (sniffer->server.version == IPV6 &&
+                0 == XMEMCMP(sniffer->server.ip6, addr, sizeof(sniffer->server.ip6))) {
             ret = 1;
             break;
         }
@@ -1072,6 +1247,8 @@
     return ret;
 }
 
+#endif
+
 
 /* Get SnifferServer from IP and Port */
 static SnifferServer* GetSnifferServer(IpInfo* ipInfo, TcpInfo* tcpInfo)
@@ -1081,13 +1258,22 @@
     wc_LockMutex(&ServerListMutex);
 
     sniffer = ServerList;
+
+#ifndef WOLFSSL_SNIFFER_WATCH
     while (sniffer) {
-        if (sniffer->port == tcpInfo->srcPort && sniffer->server == ipInfo->src)
+        if (sniffer->port == tcpInfo->srcPort &&
+                MatchAddr(sniffer->server, ipInfo->src))
             break;
-        if (sniffer->port == tcpInfo->dstPort && sniffer->server == ipInfo->dst)
+        if (sniffer->port == tcpInfo->dstPort &&
+                MatchAddr(sniffer->server, ipInfo->dst))
             break;
+
         sniffer = sniffer->next;
     }
+#else
+    (void)ipInfo;
+    (void)tcpInfo;
+#endif
 
     wc_UnLockMutex(&ServerListMutex);
 
@@ -1098,14 +1284,28 @@
 /* Hash the Session Info, return hash row */
 static word32 SessionHash(IpInfo* ipInfo, TcpInfo* tcpInfo)
 {
-    word32 hash = ipInfo->src * ipInfo->dst;
+    word32 hash = 1;
+
+    if (ipInfo->src.version == IPV4) {
+        hash *= ipInfo->src.ip4 * ipInfo->dst.ip4;
+    }
+    else if (ipInfo->src.version == IPV6) {
+        word32* x;
+        word32  y;
+        x = (word32*)ipInfo->src.ip6;
+        y = x[0] ^ x[1] ^ x[2] ^ x[3];
+        hash *= y;
+        x = (word32*)ipInfo->dst.ip6;
+        y = x[0] ^ x[1] ^ x[2] ^ x[3];
+        hash *= y;
+    }
     hash *= tcpInfo->srcPort * tcpInfo->dstPort;
 
     return hash % HASH_SIZE;
 }
 
 
-/* Get Exisiting SnifferSession from IP and Port */
+/* Get Existing SnifferSession from IP and Port */
 static SnifferSession* GetSnifferSession(IpInfo* ipInfo, TcpInfo* tcpInfo)
 {
     SnifferSession* session;
@@ -1118,11 +1318,14 @@
 
     session = SessionTable[row];
     while (session) {
-        if (session->server == ipInfo->src && session->client == ipInfo->dst &&
+        if (MatchAddr(session->server, ipInfo->src) &&
+            MatchAddr(session->client, ipInfo->dst) &&
                     session->srvPort == tcpInfo->srcPort &&
                     session->cliPort == tcpInfo->dstPort)
             break;
-        if (session->client == ipInfo->src && session->server == ipInfo->dst &&
+
+        if (MatchAddr(session->client, ipInfo->src) &&
+            MatchAddr(session->server, ipInfo->dst) &&
                     session->cliPort == tcpInfo->srcPort &&
                     session->srvPort == tcpInfo->dstPort)
             break;
@@ -1137,18 +1340,21 @@
 
     /* determine side */
     if (session) {
-        if (ipInfo->dst == session->context->server &&
-            tcpInfo->dstPort == session->context->port)
+        if (MatchAddr(ipInfo->dst, session->server) &&
+            tcpInfo->dstPort == session->srvPort) {
+
             session->flags.side = WOLFSSL_SERVER_END;
-        else
+        }
+        else {
             session->flags.side = WOLFSSL_CLIENT_END;
+        }
     }
 
     return session;
 }
 
 
-#ifdef HAVE_SNI
+#if defined(HAVE_SNI) || defined(WOLFSSL_SNIFFER_WATCH)
 
 static int LoadKeyFile(byte** keyBuf, word32* keyBufSz,
                 const char* keyFile, int typeKey,
@@ -1165,11 +1371,18 @@
 
     file = XFOPEN(keyFile, "rb");
     if (file == XBADFILE) return -1;
-    XFSEEK(file, 0, XSEEK_END);
+    if(XFSEEK(file, 0, XSEEK_END) != 0) {
+        XFCLOSE(file);
+        return -1;
+    }
     fileSz = XFTELL(file);
+    if (fileSz > MAX_WOLFSSL_FILE_SIZE || fileSz < 0) {
+        XFCLOSE(file);
+        return -1;
+    }
     XREWIND(file);
 
-    loadBuf = (byte*)malloc(fileSz);
+    loadBuf = (byte*)XMALLOC(fileSz, NULL, DYNAMIC_TYPE_FILE);
     if (loadBuf == NULL) {
         XFCLOSE(file);
         return -1;
@@ -1179,12 +1392,12 @@
     XFCLOSE(file);
 
     if (ret != fileSz) {
-        free(loadBuf);
+        XFREE(loadBuf, NULL, DYNAMIC_TYPE_FILE);
         return -1;
     }
 
     if (typeKey == WOLFSSL_FILETYPE_PEM) {
-        byte* saveBuf   = (byte*)malloc(fileSz);
+        byte* saveBuf   = (byte*)XMALLOC(fileSz, NULL, DYNAMIC_TYPE_X509);
         int   saveBufSz = 0;
 
         ret = -1;
@@ -1193,7 +1406,7 @@
                                                 saveBuf, (int)fileSz, password);
             if (saveBufSz < 0) {
                 saveBufSz = 0;
-                free(saveBuf);
+                XFREE(saveBuf, NULL, DYNAMIC_TYPE_X509);
                 saveBuf = NULL;
             }
             else
@@ -1201,7 +1414,7 @@
         }
 
         ForceZero(loadBuf, (word32)fileSz);
-        free(loadBuf);
+        XFREE(loadBuf, NULL, DYNAMIC_TYPE_FILE);
 
         if (saveBuf) {
             *keyBuf = saveBuf;
@@ -1223,6 +1436,37 @@
 #endif
 
 
+#ifdef WOLFSSL_SNIFFER_WATCH
+
+static int CreateWatchSnifferServer(char* error)
+{
+    SnifferServer* sniffer;
+
+    sniffer = (SnifferServer*)XMALLOC(sizeof(SnifferServer), NULL,
+            DYNAMIC_TYPE_SNIFFER_SERVER);
+    if (sniffer == NULL) {
+        SetError(MEMORY_STR, error, NULL, 0);
+        return -1;
+    }
+    InitSnifferServer(sniffer);
+    sniffer->ctx = SSL_CTX_new(TLSv1_2_client_method());
+    if (!sniffer->ctx) {
+        SetError(MEMORY_STR, error, NULL, 0);
+        FreeSnifferServer(sniffer);
+        return -1;
+    }
+#ifdef WOLF_CRYPTO_CB
+    if (CryptoDeviceId != INVALID_DEVID)
+	    wolfSSL_CTX_SetDevId(sniffer->ctx, CryptoDeviceId);
+#endif
+    ServerList = sniffer;
+
+    return 0;
+}
+
+#endif
+
+
 static int SetNamedPrivateKey(const char* name, const char* address, int port,
             const char* keyFile, int typeKey, const char* password, char* error)
 {
@@ -1231,7 +1475,7 @@
     int            type = (typeKey == FILETYPE_PEM) ? WOLFSSL_FILETYPE_PEM :
                                                       WOLFSSL_FILETYPE_ASN1;
     int            isNew = 0;
-    word32         serverIp;
+    IpAddrInfo     serverIp;
 
 #ifdef HAVE_SNI
     NamedKey* namedKey = NULL;
@@ -1240,7 +1484,8 @@
     (void)name;
 #ifdef HAVE_SNI
     if (name != NULL) {
-        namedKey = (NamedKey*)malloc(sizeof(NamedKey));
+        namedKey = (NamedKey*)XMALLOC(sizeof(NamedKey),
+                NULL, DYNAMIC_TYPE_SNIFFER_NAMED_KEY);
         if (namedKey == NULL) {
             SetError(MEMORY_STR, error, NULL, 0);
             return -1;
@@ -1263,16 +1508,23 @@
     }
 #endif
 
-    serverIp = inet_addr(address);
+    serverIp.version = IPV4;
+    serverIp.ip4 = inet_addr(address);
+    if (serverIp.ip4 == INADDR_NONE) {
+        if (inet_pton(AF_INET6, address, serverIp.ip6) == 1) {
+            serverIp.version = IPV6;
+        }
+    }
     sniffer = ServerList;
     while (sniffer != NULL &&
-           (sniffer->server != serverIp || sniffer->port != port)) {
+            (!MatchAddr(sniffer->server, serverIp) || sniffer->port != port)) {
         sniffer = sniffer->next;
     }
 
     if (sniffer == NULL) {
         isNew = 1;
-        sniffer = (SnifferServer*)malloc(sizeof(SnifferServer));
+        sniffer = (SnifferServer*)XMALLOC(sizeof(SnifferServer),
+                NULL, DYNAMIC_TYPE_SNIFFER_SERVER);
         if (sniffer == NULL) {
             SetError(MEMORY_STR, error, NULL, 0);
 #ifdef HAVE_SNI
@@ -1313,6 +1565,9 @@
                 FreeSnifferServer(sniffer);
             return -1;
         }
+	#ifdef WOLF_CRYPTO_CB
+		wolfSSL_CTX_SetDevId(sniffer->ctx, CryptoDeviceId);
+	#endif
     }
 #ifdef HAVE_SNI
     else {
@@ -1382,12 +1637,66 @@
 }
 
 
+/* Check IP Header for IPV6, TCP, and a registered server address */
+/* returns 0 on success, -1 on error */
+static int CheckIp6Hdr(Ip6Hdr* iphdr, IpInfo* info, int length, char* error)
+{
+    int        version = IP_V(iphdr);
+    int        exthdrsz = IP6_HDR_SZ;
+
+    TraceIP6(iphdr);
+    Trace(IP_CHECK_STR);
+
+    if (version != IPV6) {
+        SetError(BAD_IPVER_STR, error, NULL, 0);
+        return -1;
+    }
+
+    /* Here, we need to move onto next header if not TCP. */
+    if (iphdr->next_header != TCP_PROTOCOL) {
+        Ip6ExtHdr* exthdr = (Ip6ExtHdr*)((byte*)iphdr + IP6_HDR_SZ);
+        do {
+            int hdrsz = (exthdr->length + 1) * 8;
+            if (hdrsz > length - exthdrsz) {
+                SetError(PACKET_HDR_SHORT_STR, error, NULL, 0);
+                return -1;
+            }
+            exthdrsz += hdrsz;
+            exthdr = (Ip6ExtHdr*)((byte*)exthdr + hdrsz);
+        }
+        while (exthdr->next_header != TCP_PROTOCOL &&
+                exthdr->next_header != NO_NEXT_HEADER);
+    }
+
+#ifndef WOLFSSL_SNIFFER_WATCH
+    if (!IsServerRegistered6(iphdr->src) && !IsServerRegistered6(iphdr->dst)) {
+        SetError(SERVER_NOT_REG_STR, error, NULL, 0);
+        return -1;
+    }
+#endif
+
+    info->length = exthdrsz;
+    info->total = ntohs(iphdr->length) + info->length;
+        /* IPv6 doesn't include its own header size in the length like v4. */
+    info->src.version = IPV6;
+    XMEMCPY(info->src.ip6, iphdr->src, sizeof(info->src.ip6));
+    info->dst.version = IPV6;
+    XMEMCPY(info->dst.ip6, iphdr->dst, sizeof(info->dst.ip6));
+
+    return 0;
+}
+
+
 /* Check IP Header for IPV4, TCP, and a registered server address */
+/* If header IPv6, pass to CheckIp6Hdr(). */
 /* returns 0 on success, -1 on error */
 static int CheckIpHdr(IpHdr* iphdr, IpInfo* info, int length, char* error)
 {
     int    version = IP_V(iphdr);
 
+    if (version == IPV6)
+        return CheckIp6Hdr((Ip6Hdr*)iphdr, info, length, error);
+
     TraceIP(iphdr);
     Trace(IP_CHECK_STR);
 
@@ -1401,15 +1710,19 @@
         return -1;
     }
 
+#ifndef WOLFSSL_SNIFFER_WATCH
     if (!IsServerRegistered(iphdr->src) && !IsServerRegistered(iphdr->dst)) {
         SetError(SERVER_NOT_REG_STR, error, NULL, 0);
         return -1;
     }
+#endif
 
     info->length  = IP_HL(iphdr);
     info->total   = ntohs(iphdr->length);
-    info->src     = iphdr->src;
-    info->dst     = iphdr->dst;
+    info->src.version = IPV4;
+    info->src.ip4 = iphdr->src;
+    info->dst.version = IPV4;
+    info->dst.ip4 = iphdr->dst;
 
     if (info->total == 0)
         info->total = length;  /* reassembled may be off */
@@ -1435,10 +1748,14 @@
     if (info->ack)
         info->ackNumber = ntohl(tcphdr->ack);
 
+#ifndef WOLFSSL_SNIFFER_WATCH
     if (!IsPortRegistered(info->srcPort) && !IsPortRegistered(info->dstPort)) {
         SetError(SERVER_PORT_NOT_REG_STR, error, NULL, 0);
         return -1;
     }
+#else
+    (void)error;
+#endif
 
     return 0;
 }
@@ -1457,13 +1774,65 @@
 }
 
 
-/* Process Client Key Exchange, RSA only */
+/* Copies the session's information to the provided sslInfo. Skip copy if
+ * SSLInfo is not provided. */
+static void CopySessionInfo(SnifferSession* session, SSLInfo* sslInfo)
+{
+    if (NULL != sslInfo) {
+        XMEMSET(sslInfo, 0, sizeof(SSLInfo));
+
+        /* Pass back Session Info after we have processed the Server Hello. */
+        if (0 != session->sslServer->options.cipherSuite) {
+            const char* pCipher;
+
+            sslInfo->isValid = 1;
+            sslInfo->protocolVersionMajor = session->sslServer->version.major;
+            sslInfo->protocolVersionMinor = session->sslServer->version.minor;
+            sslInfo->serverCipherSuite0 =
+                        session->sslServer->options.cipherSuite0;
+            sslInfo->serverCipherSuite =
+                        session->sslServer->options.cipherSuite;
+
+            pCipher = wolfSSL_get_cipher(session->sslServer);
+            if (NULL != pCipher) {
+                XSTRNCPY((char*)sslInfo->serverCipherSuiteName, pCipher,
+                         sizeof(sslInfo->serverCipherSuiteName));
+                sslInfo->serverCipherSuiteName
+                         [sizeof(sslInfo->serverCipherSuiteName) - 1] = '\0';
+            }
+            sslInfo->keySize = session->keySz;
+            #ifdef HAVE_SNI
+            if (NULL != session->sni) {
+                XSTRNCPY((char*)sslInfo->serverNameIndication,
+                         session->sni, sizeof(sslInfo->serverNameIndication));
+                sslInfo->serverNameIndication
+                         [sizeof(sslInfo->serverNameIndication) - 1] = '\0';
+            }
+            #endif
+            TraceSessionInfo(sslInfo);
+        }
+    }
+}
+
+
+/* Call the session connection start callback. */
+static void CallConnectionCb(SnifferSession* session)
+{
+    if (ConnectionCb != NULL) {
+        SSLInfo info;
+        CopySessionInfo(session, &info);
+        ConnectionCb((const void*)session, &info, ConnectionCbCtx);
+    }
+}
+
+
+/* Process Client Key Exchange, RSA or static ECDH */
 static int ProcessClientKeyExchange(const byte* input, int* sslBytes,
                                     SnifferSession* session, char* error)
 {
     word32 idx = 0;
-    RsaKey key;
-    int    ret;
+    int tryEcc = 0;
+    int ret;
 
     if (session->sslServer->buffers.key == NULL ||
         session->sslServer->buffers.key->buffer == NULL ||
@@ -1472,75 +1841,174 @@
         SetError(RSA_KEY_MISSING_STR, error, session, FATAL_ERROR_STATE);
         return -1;
     }
-    ret = wc_InitRsaKey(&key, 0);
-    if (ret == 0)
-        ret = wc_RsaPrivateKeyDecode(session->sslServer->buffers.key->buffer,
-                          &idx, &key, session->sslServer->buffers.key->length);
-    if (ret == 0) {
-        int length = wc_RsaEncryptSize(&key);
-
-        if (IsTLS(session->sslServer))
-            input += 2;     /* tls pre length */
-
-        if (length > *sslBytes) {
-            SetError(PARTIAL_INPUT_STR, error, session, FATAL_ERROR_STATE);
-            wc_FreeRsaKey(&key);
-            return -1;
+
+    {
+        RsaKey key;
+        int length;
+
+        ret = wc_InitRsaKey(&key, 0);
+        if (ret == 0) {
+            ret = wc_RsaPrivateKeyDecode(
+                    session->sslServer->buffers.key->buffer,
+                    &idx, &key, session->sslServer->buffers.key->length);
+            if (ret != 0) {
+                tryEcc = 1;
+                #ifndef HAVE_ECC
+                    SetError(RSA_DECODE_STR, error, session, FATAL_ERROR_STATE);
+                #else
+                    /* If we can do ECC, this isn't fatal. Not loading an ECC
+                     * key will be fatal, though. */
+                    SetError(RSA_DECODE_STR, error, session, 0);
+                #endif
+            }
         }
+
+        if (ret == 0) {
+            length = wc_RsaEncryptSize(&key);
+            if (IsTLS(session->sslServer)) {
+                input += 2;     /* tls pre length */
+            }
+
+            if (length > *sslBytes) {
+                SetError(PARTIAL_INPUT_STR, error, session, FATAL_ERROR_STATE);
+                ret = -1;
+            }
+        }
+
         #ifdef WC_RSA_BLINDING
+        if (ret == 0) {
             ret = wc_RsaSetRNG(&key, session->sslServer->rng);
             if (ret != 0) {
                 SetError(RSA_DECRYPT_STR, error, session, FATAL_ERROR_STATE);
-                return -1;
             }
-        #endif
-        do {
-        #ifdef WOLFSSL_ASYNC_CRYPT
-                ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
-        #endif
-            if (ret >= 0) {
-                ret = wc_RsaPrivateDecrypt(input, length,
-                      session->sslServer->arrays->preMasterSecret, SECRET_LEN,
-                      &key);
-            }
-        } while (ret == WC_PENDING_E);
-        if (ret != SECRET_LEN) {
-            SetError(RSA_DECRYPT_STR, error, session, FATAL_ERROR_STATE);
-            wc_FreeRsaKey(&key);
-            return -1;
-        }
-        session->sslServer->arrays->preMasterSz = SECRET_LEN;
-
-        /* store for client side as well */
-        XMEMCPY(session->sslClient->arrays->preMasterSecret,
-               session->sslServer->arrays->preMasterSecret, SECRET_LEN);
-        session->sslClient->arrays->preMasterSz = SECRET_LEN;
-
-        #ifdef SHOW_SECRETS
-        {
-            int i;
-            printf("pre master secret: ");
-            for (i = 0; i < SECRET_LEN; i++)
-                printf("%02x", session->sslServer->arrays->preMasterSecret[i]);
-            printf("\n");
         }
         #endif
+
+        if (ret == 0) {
+            session->keySz = length * WOLFSSL_BIT_SIZE;
+            /* length is the key size in bytes */
+            session->sslServer->arrays->preMasterSz = SECRET_LEN;
+
+            do {
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                ret = wc_AsyncWait(ret, &key.asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+            #endif
+                if (ret >= 0) {
+                    ret = wc_RsaPrivateDecrypt(input, length,
+                          session->sslServer->arrays->preMasterSecret,
+                          session->sslServer->arrays->preMasterSz, &key);
+                }
+            } while (ret == WC_PENDING_E);
+
+            if (ret != SECRET_LEN) {
+                SetError(RSA_DECRYPT_STR, error, session, FATAL_ERROR_STATE);
+            }
+        }
+
+        wc_FreeRsaKey(&key);
     }
-    else {
-        SetError(RSA_DECODE_STR, error, session, FATAL_ERROR_STATE);
-        wc_FreeRsaKey(&key);
-        return -1;
+
+    if (tryEcc) {
+#ifdef HAVE_ECC
+        ecc_key key;
+        ecc_key pubKey;
+        int length, keyInit = 0, pubKeyInit = 0;
+
+        idx = 0;
+        ret = wc_ecc_init(&key);
+        if (ret == 0) {
+            keyInit = 1;
+            ret = wc_ecc_init(&pubKey);
+        }
+        if (ret == 0) {
+            pubKeyInit = 1;
+            ret = wc_EccPrivateKeyDecode(
+                    session->sslServer->buffers.key->buffer,
+                    &idx, &key, session->sslServer->buffers.key->length);
+            if (ret != 0) {
+                SetError(ECC_DECODE_STR, error, session, FATAL_ERROR_STATE);
+            }
+        }
+
+        if (ret == 0) {
+            length = wc_ecc_size(&key) * 2 + 1;
+            /* The length should be 2 times the key size (x and y), plus 1
+             * for the type byte. */
+            if (IsTLS(session->sslServer)) {
+                input += 1; /* Don't include the TLS length for the key. */
+            }
+
+            if (length + 1 > *sslBytes) {
+                SetError(PARTIAL_INPUT_STR,
+                        error, session, FATAL_ERROR_STATE);
+                ret = -1;
+            }
+        }
+
+        if (ret == 0) {
+            ret = wc_ecc_import_x963_ex(input, length, &pubKey, ECC_CURVE_DEF);
+            if (ret != 0) {
+                SetError(ECC_PUB_DECODE_STR, error, session, FATAL_ERROR_STATE);
+            }
+        }
+
+        if (ret == 0) {
+            session->keySz = ((length - 1) / 2) * WOLFSSL_BIT_SIZE;
+            /* Length is in bytes. Subtract 1 for the ECC key type. Divide
+             * by two as the key is in (x,y) coordinates, where x and y are
+             * the same size, the key size. Convert from bytes to bits. */
+            session->sslServer->arrays->preMasterSz = ENCRYPT_LEN;
+
+            do {
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                ret = wc_AsyncWait(ret, &key.asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+            #endif
+                if (ret >= 0) {
+                    ret = wc_ecc_shared_secret(&key, &pubKey,
+                          session->sslServer->arrays->preMasterSecret,
+                          &session->sslServer->arrays->preMasterSz);
+                }
+            } while (ret == WC_PENDING_E);
+        }
+
+#ifdef WOLFSSL_SNIFFER_STATS
+        if (ret != 0)
+            INC_STAT(SnifferStats.sslKeyFails);
+#endif
+
+        if (keyInit)
+            wc_ecc_free(&key);
+        if (pubKeyInit)
+            wc_ecc_free(&pubKey);
+#endif
     }
 
+    /* store for client side as well */
+    XMEMCPY(session->sslClient->arrays->preMasterSecret,
+           session->sslServer->arrays->preMasterSecret,
+           session->sslServer->arrays->preMasterSz);
+    session->sslClient->arrays->preMasterSz =
+        session->sslServer->arrays->preMasterSz;
+
+    #ifdef SHOW_SECRETS
+    {
+        word32 i;
+        printf("pre master secret: ");
+        for (i = 0; i < session->sslServer->arrays->preMasterSz; i++)
+            printf("%02x", session->sslServer->arrays->preMasterSecret[i]);
+        printf("\n");
+    }
+    #endif
+
     if (SetCipherSpecs(session->sslServer) != 0) {
         SetError(BAD_CIPHER_SPEC_STR, error, session, FATAL_ERROR_STATE);
-        wc_FreeRsaKey(&key);
         return -1;
     }
 
     if (SetCipherSpecs(session->sslClient) != 0) {
         SetError(BAD_CIPHER_SPEC_STR, error, session, FATAL_ERROR_STATE);
-        wc_FreeRsaKey(&key);
         return -1;
     }
 
@@ -1572,7 +2040,8 @@
     }
 #endif
 
-    wc_FreeRsaKey(&key);
+    CallConnectionCb(session);
+
     return ret;
 }
 
@@ -1615,7 +2084,7 @@
                               SnifferSession* session, char* error)
 {
     ProtocolVersion pv;
-    byte            b;
+    byte            b, b0;
     int             toRead = VERSION_SZ + RAN_LEN + ENUM_LEN;
     int             doResume     = 0;
     int             initialBytes = *sslBytes;
@@ -1663,14 +2132,33 @@
     *sslBytes -= b;
 
     /* cipher suite */
-    b = *input++;  /* first byte, ECC or not */
-    session->sslServer->options.cipherSuite0 = b;
-    session->sslClient->options.cipherSuite0 = b;
+    b0 = *input++;  /* first byte, ECC or not */
+    session->sslServer->options.cipherSuite0 = b0;
+    session->sslClient->options.cipherSuite0 = b0;
     b = *input++;
     session->sslServer->options.cipherSuite = b;
     session->sslClient->options.cipherSuite = b;
     *sslBytes -= SUITE_LEN;
 
+#ifdef WOLFSSL_SNIFFER_STATS
+    {
+        const CipherSuiteInfo* suites = GetCipherNames();
+        int suitesSz = GetCipherNamesSize();
+        int match = 0;
+
+        while (suitesSz) {
+            if (b0 == suites->cipherSuite0 && b == suites->cipherSuite) {
+                match = 1;
+                break;
+            }
+            suites++;
+            suitesSz--;
+        }
+        if (!match)
+            INC_STAT(SnifferStats.sslCiphersUnsupported);
+    }
+#endif /* WOLFSSL_SNIFFER_STATS */
+
     /* compression */
     b = *input++;
     *sslBytes -= ENUM_LEN;
@@ -1731,15 +2219,16 @@
     }
 
     if (!session->flags.expectEms) {
-        free(session->hash);
+        XFREE(session->hash, NULL, DYNAMIC_TYPE_HASHES);
         session->hash = NULL;
     }
 #endif
 
-    if (session->sslServer->options.haveSessionId &&
-            XMEMCMP(session->sslServer->arrays->sessionID,
+    if (session->sslServer->options.haveSessionId) {
+        if (XMEMCMP(session->sslServer->arrays->sessionID,
                     session->sslClient->arrays->sessionID, ID_LEN) == 0)
-        doResume = 1;
+            doResume = 1;
+    }
     else if (session->sslClient->options.haveSessionId == 0 &&
              session->sslServer->options.haveSessionId == 0 &&
              session->ticketID)
@@ -1757,6 +2246,9 @@
         SSL_SESSION* resume = GetSession(session->sslServer,
                                   session->sslServer->arrays->masterSecret, 0);
         if (resume == NULL) {
+#ifdef WOLFSSL_SNIFFER_STATS
+            INC_STAT(SnifferStats.sslResumeMisses);
+#endif
             SetError(BAD_SESSION_RESUME_STR, error, session, FATAL_ERROR_STATE);
             return -1;
         }
@@ -1766,6 +2258,10 @@
         session->flags.resuming = 1;
 
         Trace(SERVER_DID_RESUMPTION_STR);
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslResumedConns);
+        INC_STAT(SnifferStats.sslResumptionValid);
+#endif
         if (SetCipherSpecs(session->sslServer) != 0) {
             SetError(BAD_CIPHER_SPEC_STR, error, session, FATAL_ERROR_STATE);
             return -1;
@@ -1792,6 +2288,11 @@
             return -1;
         }
     }
+    else {
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslStandardConns);
+#endif
+    }
 #ifdef SHOW_SECRETS
     {
         int i;
@@ -1845,6 +2346,7 @@
                                                              FATAL_ERROR_STATE);
                         return -1;
                     }
+                    session->sni = namedKey->name;
                     break;
                 }
                 else
@@ -1976,7 +2478,8 @@
 
             if (extLen) {
                 if (session->ticketID == 0) {
-                    session->ticketID = (byte*)malloc(ID_LEN);
+                    session->ticketID = (byte*)XMALLOC(ID_LEN,
+                            NULL, DYNAMIC_TYPE_SNIFFER_TICKET_ID);
                     if (session->ticketID == 0) {
                         SetError(MEMORY_STR, error, session,
                                  FATAL_ERROR_STATE);
@@ -1996,6 +2499,83 @@
 }
 
 
+#ifdef WOLFSSL_SNIFFER_WATCH
+
+/* Process Certificate */
+static int ProcessCertificate(const byte* input, int* sslBytes,
+        SnifferSession* session, char* error)
+{
+    Sha256 sha;
+    const byte* certChain;
+    word32 certChainSz;
+    word32 certSz;
+    int ret;
+    byte digest[SHA256_DIGEST_SIZE];
+
+    /* If the receiver is the server, this is the client certificate message,
+     * and it should be ignored at this point. */
+    if (session->flags.side == WOLFSSL_SERVER_END)
+        return 0;
+
+    if (WatchCb == NULL) {
+        SetError(WATCH_CB_MISSING_STR, error, session, FATAL_ERROR_STATE);
+        return -1;
+    }
+
+    if (*sslBytes < CERT_HEADER_SZ) {
+        SetError(BAD_CERT_MSG_STR, error, session, FATAL_ERROR_STATE);
+        return -1;
+    }
+    ato24(input, &certChainSz);
+    *sslBytes -= CERT_HEADER_SZ;
+    input += CERT_HEADER_SZ;
+
+    if (*sslBytes < (int)certChainSz) {
+        SetError(BAD_CERT_MSG_STR, error, session, FATAL_ERROR_STATE);
+        return -1;
+    }
+    certChain = input;
+
+    ato24(input, &certSz);
+    input += OPAQUE24_LEN;
+    if (*sslBytes < (int)certSz) {
+        SetError(BAD_CERT_MSG_STR, error, session, FATAL_ERROR_STATE);
+        return -1;
+    }
+
+    *sslBytes -= certChainSz;
+
+    ret = wc_InitSha256(&sha);
+    if (ret == 0)
+        ret = wc_Sha256Update(&sha, input, certSz);
+    if (ret == 0)
+        ret = wc_Sha256Final(&sha, digest);
+    if (ret != 0) {
+        SetError(WATCH_HASH_STR, error, session, FATAL_ERROR_STATE);
+        return -1;
+    }
+
+    ret = WatchCb((void*)session, digest, sizeof(digest),
+            certChain, certChainSz, WatchCbCtx, error);
+    if (ret != 0) {
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslKeysUnmatched);
+#endif
+        SetError(WATCH_FAIL_STR, error, session, FATAL_ERROR_STATE);
+        return -1;
+    }
+    else {
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslKeyMatches);
+#endif
+    }
+
+    return 0;
+}
+
+#endif
+
+
 /* Process Finished */
 static int ProcessFinished(const byte* input, int size, int* sslBytes,
                            SnifferSession* session, char* error)
@@ -2021,8 +2601,12 @@
     if (ret == 0 && session->flags.cached == 0) {
         if (session->sslServer->options.haveSessionId) {
             WOLFSSL_SESSION* sess = GetSession(session->sslServer, NULL, 0);
-            if (sess == NULL)
+            if (sess == NULL) {
                 AddSession(session->sslServer);  /* don't re add */
+#ifdef WOLFSSL_SNIFFER_STATS
+                INC_STAT(SnifferStats.sslResumptionInserts);
+#endif
+            }
             session->flags.cached = 1;
          }
     }
@@ -2059,8 +2643,9 @@
     startBytes = *sslBytes;
 
     if (*sslBytes < size) {
-        SetError(HANDSHAKE_INPUT_STR, error, session, FATAL_ERROR_STATE);
-        return -1;
+        Trace(SPLIT_HANDSHAKE_MSG_STR);
+        *sslBytes = 0;
+        return ret;
     }
 
     /* A session's arrays are released when the handshake is completed. */
@@ -2100,6 +2685,9 @@
             Trace(GOT_CERT_REQ_STR);
             break;
         case server_key_exchange:
+#ifdef WOLFSSL_SNIFFER_STATS
+            INC_STAT(SnifferStats.sslEphemeralMisses);
+#endif
             Trace(GOT_SERVER_KEY_EX_STR);
             /* can't know temp key passively */
             SetError(BAD_CIPHER_SPEC_STR, error, session, FATAL_ERROR_STATE);
@@ -2107,6 +2695,14 @@
             break;
         case certificate:
             Trace(GOT_CERT_STR);
+            if (session->flags.side == WOLFSSL_SERVER_END) {
+#ifdef WOLFSSL_SNIFFER_STATS
+                INC_STAT(SnifferStats.sslClientAuthConns);
+#endif
+            }
+#ifdef WOLFSSL_SNIFFER_WATCH
+            ret = ProcessCertificate(input, sslBytes, session, error);
+#endif
             break;
         case server_hello_done:
             Trace(GOT_SERVER_HELLO_DONE_STR);
@@ -2137,7 +2733,7 @@
                     ret = -1;
                 }
                 XMEMSET(session->hash, 0, sizeof(HsHashes));
-                free(session->hash);
+                XFREE(session->hash, NULL, DYNAMIC_TYPE_HASHES);
                 session->hash = NULL;
             }
             else {
@@ -2247,6 +2843,12 @@
             break;
          #endif
 
+        #ifdef HAVE_NULL_CIPHER
+        case wolfssl_cipher_null:
+            XMEMCPY(output, input, sz);
+            break;
+        #endif
+
         default:
             Trace(BAD_DECRYPT_TYPE);
             ret = -1;
@@ -2359,7 +2961,8 @@
 
     Trace(NEW_SESSION_STR);
     /* create a new one */
-    session = (SnifferSession*)malloc(sizeof(SnifferSession));
+    session = (SnifferSession*)XMALLOC(sizeof(SnifferSession),
+            NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
     if (session == NULL) {
         SetError(MEMORY_STR, error, NULL, 0);
         return 0;
@@ -2367,15 +2970,16 @@
     InitSession(session);
 #ifdef HAVE_EXTENDED_MASTER
     {
-        HsHashes* newHash = (HsHashes*)malloc(sizeof(HsHashes));
+        HsHashes* newHash = (HsHashes*)XMALLOC(sizeof(HsHashes),
+                NULL, DYNAMIC_TYPE_HASHES);
         if (newHash == NULL) {
             SetError(MEMORY_STR, error, NULL, 0);
-            free(session);
+            XFREE(session, NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
             return 0;
         }
         if (HashInit(newHash) != 0) {
             SetError(EXTENDED_MASTER_HASH_STR, error, NULL, 0);
-            free(session);
+            XFREE(session, NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
             return 0;
         }
         session->hash = newHash;
@@ -2388,18 +2992,22 @@
     session->cliSeqStart = tcpInfo->sequence;
     session->cliExpected = 1;  /* relative */
     session->lastUsed= time(NULL);
+    session->keySz = 0;
+#ifdef HAVE_SNI
+    session->sni = NULL;
+#endif
 
     session->context = GetSnifferServer(ipInfo, tcpInfo);
     if (session->context == NULL) {
         SetError(SERVER_NOT_REG_STR, error, NULL, 0);
-        free(session);
+        XFREE(session, NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
         return 0;
     }
 
     session->sslServer = SSL_new(session->context->ctx);
     if (session->sslServer == NULL) {
         SetError(BAD_NEW_SSL_STR, error, session, FATAL_ERROR_STATE);
-        free(session);
+        XFREE(session, NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
         return 0;
     }
     session->sslClient = SSL_new(session->context->ctx);
@@ -2408,7 +3016,7 @@
         session->sslServer = 0;
 
         SetError(BAD_NEW_SSL_STR, error, session, FATAL_ERROR_STATE);
-        free(session);
+        XFREE(session, NULL, DYNAMIC_TYPE_SNIFFER_SESSION);
         return 0;
     }
     /* put server back into server mode */
@@ -2431,12 +3039,10 @@
 
     wc_UnLockMutex(&SessionMutex);
 
-    /* determine headed side */
-    if (ipInfo->dst == session->context->server &&
-        tcpInfo->dstPort == session->context->port)
-        session->flags.side = WOLFSSL_SERVER_END;
-    else
-        session->flags.side = WOLFSSL_CLIENT_END;
+    /* CreateSession is called in response to a SYN packet, we know this
+     * is headed to the server. Also we know the server is one we care
+     * about as we've passed the GetSnifferServer() successfully. */
+    session->flags.side = WOLFSSL_SERVER_END;
 
     return session;
 }
@@ -2570,7 +3176,9 @@
         SetError(PACKET_HDR_SHORT_STR, error, NULL, 0);
         return -1;
     }
-    *sslBytes = (int)(packet + length - *sslFrame);
+    /* We only care about the data in the TCP/IP record. There may be extra
+     * data after the IP record for the FCS for Ethernet. */
+    *sslBytes = (int)(packet + ipInfo->total - *sslFrame);
 
     return 0;
 }
@@ -2584,6 +3192,9 @@
     /* create a new SnifferSession on client SYN */
     if (tcpInfo->syn && !tcpInfo->ack) {
         TraceClientSyn(tcpInfo->sequence);
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslEncryptedConns);
+#endif
         *session = CreateSession(ipInfo, tcpInfo, error);
         if (*session == NULL) {
             *session = GetSnifferSession(ipInfo, tcpInfo);
@@ -2607,6 +3218,13 @@
             if (sslBytes == 0 && tcpInfo->ack)
                 return 1;
 
+#ifdef WOLFSSL_SNIFFER_STATS
+            LOCK_STAT();
+            NOLOCK_INC_STAT(SnifferStats.sslDecryptedPackets);
+            NOLOCK_ADD_TO_STAT(SnifferStats.sslDecryptedBytes, sslBytes);
+            UNLOCK_STAT();
+#endif
+
             SetError(BAD_SESSION_STR, error, NULL, 0);
             return -1;
         }
@@ -2624,16 +3242,17 @@
     int added = end - *begin + 1;
     assert(*begin <= end);
 
-    pb = (PacketBuffer*)malloc(sizeof(PacketBuffer));
+    pb = (PacketBuffer*)XMALLOC(sizeof(PacketBuffer),
+            NULL, DYNAMIC_TYPE_SNIFFER_PB);
     if (pb == NULL) return NULL;
 
     pb->next  = 0;
     pb->begin = *begin;
     pb->end   = end;
-    pb->data = (byte*)malloc(added);
+    pb->data = (byte*)XMALLOC(added, NULL, DYNAMIC_TYPE_SNIFFER_PB_BUFFER);
 
     if (pb->data == NULL) {
-        free(pb);
+        XFREE(pb, NULL, DYNAMIC_TYPE_SNIFFER_PB);
         return NULL;
     }
     XMEMCPY(pb->data, data, added);
@@ -2950,6 +3569,9 @@
         }
 
         Trace(DROPPING_LOST_FRAG_STR);
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslDecodeFails);
+#endif
         prev = curr;
         curr = curr->next;
         *reassemblyMemory -= (prev->end - prev->begin + 1);
@@ -3067,7 +3689,8 @@
 /* returns 0 on success (continue), -1 on error, 1 on success (end) */
 static int CheckPreRecord(IpInfo* ipInfo, TcpInfo* tcpInfo,
                           const byte** sslFrame, SnifferSession** session,
-                          int* sslBytes, const byte** end, char* error)
+                          int* sslBytes, const byte** end,
+                          void* vChain, word32 chainSz, char* error)
 {
     word32 length;
     SSL*  ssl = ((*session)->flags.side == WOLFSSL_SERVER_END) ?
@@ -3109,8 +3732,7 @@
 
     /* if current partial data, add to end of partial */
     /* if skipping, the data is already at the end of partial */
-    if ( !skipPartial &&
-         (length = ssl->buffers.inputBuffer.length) ) {
+    if ( !skipPartial && (length = ssl->buffers.inputBuffer.length) ) {
         Trace(PARTIAL_ADD_STR);
 
         if ( (*sslBytes + length) > ssl->buffers.inputBuffer.bufferSize) {
@@ -3119,11 +3741,54 @@
                 return -1;
             }
         }
-        XMEMCPY(&ssl->buffers.inputBuffer.buffer[length], *sslFrame, *sslBytes);
+        if (vChain == NULL) {
+            XMEMCPY(&ssl->buffers.inputBuffer.buffer[length],
+                    *sslFrame, *sslBytes);
+            *sslBytes += length;
+            ssl->buffers.inputBuffer.length = *sslBytes;
+            *sslFrame = ssl->buffers.inputBuffer.buffer;
+            *end = *sslFrame + *sslBytes;
+        }
+    }
+
+    if (vChain != NULL) {
+#ifdef WOLFSSL_SNIFFER_CHAIN_INPUT
+        struct iovec* chain = (struct iovec*)vChain;
+        word32 i, offset, headerSz, qty, remainder;
+
+        Trace(CHAIN_INPUT_STR);
+        headerSz = (word32)*sslFrame - (word32)chain[0].iov_base;
+        remainder = *sslBytes;
+
+        if ( (*sslBytes + length) > ssl->buffers.inputBuffer.bufferSize) {
+            if (GrowInputBuffer(ssl, *sslBytes, length) < 0) {
+                SetError(MEMORY_STR, error, *session, FATAL_ERROR_STATE);
+                return -1;
+            }
+        }
+
+        qty = min(*sslBytes, (word32)chain[0].iov_len - headerSz);
+        XMEMCPY(&ssl->buffers.inputBuffer.buffer[length],
+               (byte*)chain[0].iov_base + headerSz, qty);
+        offset = length;
+        for (i = 1; i < chainSz; i++) {
+            offset += qty;
+            remainder -= qty;
+
+            if (chain[i].iov_len > remainder)
+                qty = remainder;
+            else
+                qty = (word32)chain[i].iov_len;
+            XMEMCPY(ssl->buffers.inputBuffer.buffer + offset,
+                    chain[i].iov_base, qty);
+        }
+
         *sslBytes += length;
         ssl->buffers.inputBuffer.length = *sslBytes;
         *sslFrame = ssl->buffers.inputBuffer.buffer;
         *end = *sslFrame + *sslBytes;
+#endif
+        (void)chainSz;
     }
 
     if ((*session)->flags.clientHello == 0 && **sslFrame != handshake) {
@@ -3142,6 +3807,10 @@
         }
         else {
 #ifdef STARTTLS_ALLOWED
+            if (ssl->buffers.inputBuffer.dynamicFlag) {
+                ssl->buffers.inputBuffer.length = 0;
+                ShrinkInputBuffer(ssl, NO_FORCED_FREE);
+            }
             return 1;
 #endif
         }
@@ -3222,7 +3891,7 @@
 /* return Number of bytes on success, 0 for no data yet, and -1 on error */
 static int ProcessMessage(const byte* sslFrame, SnifferSession* session,
                           int sslBytes, byte** data, const byte* end,
-                          char* error)
+                          void* ctx, char* error)
 {
     const byte*       sslBegin = sslFrame;
     const byte*       recordEnd;   /* end of record indicator */
@@ -3295,6 +3964,18 @@
         recordEnd = sslFrame - ivAdvance + rhSize;  /* sslFrame moved so
                                                        should recordEnd */
         decrypted = 1;
+
+#ifdef WOLFSSL_SNIFFER_STATS
+        if (errCode != 0) {
+            INC_STAT(SnifferStats.sslKeyFails);
+        }
+        else {
+            LOCK_STAT();
+            NOLOCK_INC_STAT(SnifferStats.sslDecryptedPackets);
+            NOLOCK_ADD_TO_STAT(SnifferStats.sslDecryptedBytes, sslBytes);
+            UNLOCK_STAT();
+        }
+#endif
         if (errCode != 0) {
             SetError(BAD_DECRYPT, error, session, FATAL_ERROR_STATE);
             return -1;
@@ -3348,21 +4029,55 @@
                     ret = ssl->buffers.clearOutputBuffer.length;
                     TraceGotData(ret);
                     if (ret) {  /* may be blank message */
-                        byte* tmpData;  /* don't leak on realloc free */
-                        /* add an extra byte at end of allocation in case user
-                         * wants to null terminate plaintext */
-                        tmpData = (byte*)realloc(*data, decoded + ret + 1);
-                        if (tmpData == NULL) {
-                            ForceZero(*data, decoded);
-                            free(*data);
-                            *data = NULL;
-                            SetError(MEMORY_STR, error, session,
-                                     FATAL_ERROR_STATE);
+                        if (data != NULL) {
+                            byte* tmpData;  /* don't leak on realloc free */
+                            /* add an extra byte at end of allocation in case
+                             * user wants to null terminate plaintext */
+                            tmpData = (byte*)XREALLOC(*data, decoded + ret + 1,
+                                    NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                            if (tmpData == NULL) {
+                                ForceZero(*data, decoded);
+                                XFREE(*data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                                *data = NULL;
+                                SetError(MEMORY_STR, error, session,
+                                         FATAL_ERROR_STATE);
+                                return -1;
+                            }
+                            *data = tmpData;
+                            XMEMCPY(*data + decoded,
+                                    ssl->buffers.clearOutputBuffer.buffer, ret);
+                        }
+                        else {
+#ifdef WOLFSSL_SNIFFER_STORE_DATA_CB
+                            if (StoreDataCb) {
+                                const byte* buf;
+                                word32 offset = 0;
+                                word32 bufSz;
+                                int stored;
+
+                                buf = ssl->buffers.clearOutputBuffer.buffer;
+                                bufSz = ssl->buffers.clearOutputBuffer.length;
+                                do {
+                                    stored = StoreDataCb(buf, bufSz, offset,
+                                            ctx);
+                                    if (stored <= 0) {
+                                        return -1;
+                                    }
+                                    offset += stored;
+                                } while (offset < bufSz);
+                            }
+                            else {
+                                SetError(STORE_DATA_CB_MISSING_STR, error,
+                                        session, FATAL_ERROR_STATE);
+                                return -1;
+                            }
+#else
+                            (void)ctx;
+                            SetError(NO_DATA_DEST_STR, error, session,
+                                    FATAL_ERROR_STATE);
                             return -1;
+#endif
                         }
-                        *data = tmpData;
-                        XMEMCPY(*data + decoded,
-                                ssl->buffers.clearOutputBuffer.buffer, ret);
                         TraceAddedData(ret, decoded);
                         decoded += ret;
                         ssl->buffers.clearOutputBuffer.length = 0;
@@ -3381,6 +4096,9 @@
             break;
         case alert:
             Trace(GOT_ALERT_STR);
+#ifdef WOLFSSL_SNIFFER_STATS
+            INC_STAT(SnifferStats.sslAlerts);
+#endif
             sslFrame += rhSize;
             sslBytes -= rhSize;
             break;
@@ -3422,9 +4140,11 @@
 
 
 /* See if we need to process any pending FIN captures */
-static void CheckFinCapture(IpInfo* ipInfo, TcpInfo* tcpInfo,
+/* Return 0=normal, else = session removed */
+static int CheckFinCapture(IpInfo* ipInfo, TcpInfo* tcpInfo,
                             SnifferSession* session)
 {
+    int ret = 0;
     if (session->finCaputre.cliFinSeq && session->finCaputre.cliFinSeq <=
                                          session->cliExpected) {
         if (session->finCaputre.cliCounted == 0) {
@@ -3443,8 +4163,11 @@
         }
     }
 
-    if (session->flags.finCount >= 2)
+    if (session->flags.finCount >= 2) {
         RemoveSession(session, ipInfo, tcpInfo, 0);
+        ret = 1;
+    }
+    return ret;
 }
 
 
@@ -3464,43 +4187,153 @@
 
 /* Passes in an IP/TCP packet for decoding (ethernet/localhost frame) removed */
 /* returns Number of bytes on success, 0 for no data yet, and -1 on error */
-int ssl_DecodePacket(const byte* packet, int length, byte** data, char* error)
+static int ssl_DecodePacketInternal(const byte* packet, int length,
+                                    void* vChain, word32 chainSz,
+                                    byte** data, SSLInfo* sslInfo,
+                                    void* ctx, char* error)
 {
     TcpInfo           tcpInfo;
     IpInfo            ipInfo;
     const byte*       sslFrame;
-    const byte*       end = packet + length;
+    const byte*       end;
     int               sslBytes;                /* ssl bytes unconsumed */
     int               ret;
     SnifferSession*   session = 0;
 
+#ifdef WOLFSSL_SNIFFER_CHAIN_INPUT
+    if (packet == NULL && vChain != NULL) {
+        struct iovec* chain = (struct iovec*)vChain;
+        word32 i;
+
+        length = 0;
+        for (i = 0; i < chainSz; i++)
+            length += chain[i].iov_len;
+        packet = (const byte*)chain[0].iov_base;
+    }
+#endif
+
     if (CheckHeaders(&ipInfo, &tcpInfo, packet, length, &sslFrame, &sslBytes,
                      error) != 0)
         return -1;
 
+    end = sslFrame + sslBytes;
+
     ret = CheckSession(&ipInfo, &tcpInfo, sslBytes, &session, error);
     if (RemoveFatalSession(&ipInfo, &tcpInfo, session, error)) return -1;
     else if (ret == -1) return -1;
-    else if (ret ==  1) return  0;   /* done for now */
+    else if (ret ==  1) {
+#ifdef WOLFSSL_SNIFFER_STATS
+        if (sslBytes > 0) {
+            LOCK_STAT();
+            NOLOCK_INC_STAT(SnifferStats.sslEncryptedPackets);
+            NOLOCK_ADD_TO_STAT(SnifferStats.sslEncryptedBytes, sslBytes);
+            UNLOCK_STAT();
+        }
+        else
+            INC_STAT(SnifferStats.sslDecryptedPackets);
+#endif
+         return  0;   /* done for now */
+    }
 
     ret = CheckSequence(&ipInfo, &tcpInfo, session, &sslBytes, &sslFrame,error);
     if (RemoveFatalSession(&ipInfo, &tcpInfo, session, error)) return -1;
     else if (ret == -1) return -1;
-    else if (ret ==  1) return  0;   /* done for now */
+    else if (ret ==  1) {
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslDecryptedPackets);
+#endif
+        return  0;   /* done for now */
+    }
 
     ret = CheckPreRecord(&ipInfo, &tcpInfo, &sslFrame, &session, &sslBytes,
-                         &end, error);
+                         &end, vChain, chainSz, error);
     if (RemoveFatalSession(&ipInfo, &tcpInfo, session, error)) return -1;
     else if (ret == -1) return -1;
-    else if (ret ==  1) return  0;   /* done for now */
-
-    ret = ProcessMessage(sslFrame, session, sslBytes, data, end, error);
+    else if (ret ==  1) {
+#ifdef WOLFSSL_SNIFFER_STATS
+        INC_STAT(SnifferStats.sslDecryptedPackets);
+#endif
+        return  0;   /* done for now */
+    }
+
+#ifdef WOLFSSL_SNIFFER_STATS
+    if (sslBytes > 0) {
+        LOCK_STAT();
+        NOLOCK_INC_STAT(SnifferStats.sslEncryptedPackets);
+        NOLOCK_ADD_TO_STAT(SnifferStats.sslEncryptedBytes, sslBytes);
+        UNLOCK_STAT();
+    }
+    else
+        INC_STAT(SnifferStats.sslDecryptedPackets);
+#endif
+
+    ret = ProcessMessage(sslFrame, session, sslBytes, data, end, ctx, error);
     if (RemoveFatalSession(&ipInfo, &tcpInfo, session, error)) return -1;
-    CheckFinCapture(&ipInfo, &tcpInfo, session);
+    if (CheckFinCapture(&ipInfo, &tcpInfo, session) == 0) {
+        CopySessionInfo(session, sslInfo);
+    }
+
     return ret;
 }
 
 
+/* Passes in an IP/TCP packet for decoding (ethernet/localhost frame) removed */
+/* returns Number of bytes on success, 0 for no data yet, and -1 on error */
+/* Also returns Session Info if available */
+int ssl_DecodePacketWithSessionInfo(const unsigned char* packet, int length,
+    unsigned char** data, SSLInfo* sslInfo, char* error)
+{
+    return ssl_DecodePacketInternal(packet, length, NULL, 0, data, sslInfo,
+            NULL, error);
+}
+
+
+/* Passes in an IP/TCP packet for decoding (ethernet/localhost frame) removed */
+/* returns Number of bytes on success, 0 for no data yet, and -1 on error */
+int ssl_DecodePacket(const byte* packet, int length, byte** data, char* error)
+{
+    return ssl_DecodePacketInternal(packet, length, NULL, 0, data, NULL, NULL,
+            error);
+}
+
+
+#ifdef WOLFSSL_SNIFFER_STORE_DATA_CB
+
+int ssl_DecodePacketWithSessionInfoStoreData(const unsigned char* packet,
+        int length, void* ctx, SSLInfo* sslInfo, char* error)
+{
+    return ssl_DecodePacketInternal(packet, length, NULL, 0, NULL, sslInfo,
+            ctx, error);
+}
+
+#endif
+
+
+#ifdef WOLFSSL_SNIFFER_CHAIN_INPUT
+
+int ssl_DecodePacketWithChain(void* vChain, word32 chainSz, byte** data,
+        char* error)
+{
+    return ssl_DecodePacketInternal(NULL, 0, vChain, chainSz, data, NULL, NULL,
+            error);
+}
+
+#endif
+
+
+#if defined(WOLFSSL_SNIFFER_CHAIN_INPUT) && \
+     defined(WOLFSSL_SNIFFER_STORE_DATA_CB)
+
+int ssl_DecodePacketWithChainSessionInfoStoreData(void* vChain, word32 chainSz,
+        void* ctx, SSLInfo* sslInfo, char* error)
+{
+    return ssl_DecodePacketInternal(NULL, 0, vChain, chainSz, NULL, sslInfo,
+            ctx, error);
+}
+
+#endif
+
+
 /* Deallocator for the decoded data buffer. */
 /* returns 0 on success, -1 on error */
 int ssl_FreeDecodeBuffer(byte** data, char* error)
@@ -3521,7 +4354,7 @@
 
     if (data != NULL) {
         ForceZero(*data, (word32)sz);
-        free(*data);
+        XFREE(*data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         *data = NULL;
     }
 
@@ -3534,12 +4367,15 @@
 int ssl_Trace(const char* traceFile, char* error)
 {
     if (traceFile) {
-        TraceFile = fopen(traceFile, "a");
-        if (!TraceFile) {
-            SetError(BAD_TRACE_FILE_STR, error, NULL, 0);
-            return -1;
+        /* Don't try to reopen the file */
+        if (TraceFile == NULL) {
+            TraceFile = fopen(traceFile, "a");
+            if (!TraceFile) {
+                SetError(BAD_TRACE_FILE_STR, error, NULL, 0);
+                return -1;
+             }
+            TraceOn = 1;
         }
-        TraceOn = 1;
     }
     else
         TraceOn = 0;
@@ -3565,6 +4401,8 @@
 
 
 
+#ifdef WOLFSSL_SESSION_STATS
+
 int ssl_GetSessionStats(unsigned int* active,     unsigned int* total,
                         unsigned int* peak,       unsigned int* maxSessions,
                         unsigned int* missedData, unsigned int* reassemblyMem,
@@ -3605,7 +4443,169 @@
     }
 }
 
-
+#endif
+
+
+
+int ssl_SetConnectionCb(SSLConnCb cb)
+{
+    ConnectionCb = cb;
+    return 0;
+}
+
+
+
+int ssl_SetConnectionCtx(void* ctx)
+{
+    ConnectionCbCtx = ctx;
+    return 0;
+}
+
+
+#ifdef WOLFSSL_SNIFFER_STATS
+
+/* Resets the statistics tracking global structure.
+ * returns 0 on success, -1 on error */
+int ssl_ResetStatistics(void)
+{
+    wc_LockMutex(&StatsMutex);
+    XMEMSET(&SnifferStats, 0, sizeof(SSLStats));
+    wc_UnLockMutex(&StatsMutex);
+    return 0;
+}
+
+
+/* Copies the SSL statistics into the provided stats record.
+ * returns 0 on success, -1 on error */
+int ssl_ReadStatistics(SSLStats* stats)
+{
+    if (stats == NULL)
+        return -1;
+
+    LOCK_STAT();
+    XMEMCPY(stats, &SnifferStats, sizeof(SSLStats));
+    UNLOCK_STAT();
+    return 0;
+}
+
+/* Copies the SSL statistics into the provided stats record then
+ * resets the statistics tracking global structure.
+ * returns 0 on success, -1 on error */
+int ssl_ReadResetStatistics(SSLStats* stats)
+{
+    if (stats == NULL)
+        return -1;
+
+    LOCK_STAT();
+    XMEMCPY(stats, &SnifferStats, sizeof(SSLStats));
+    XMEMSET(&SnifferStats, 0, sizeof(SSLStats));
+    UNLOCK_STAT();
+    return 0;
+}
+
+#endif /* WOLFSSL_SNIFFER_STATS */
+
+
+#ifdef WOLFSSL_SNIFFER_WATCH
+
+int ssl_SetWatchKeyCallback_ex(SSLWatchCb cb, int devId, char* error)
+{
+    (void)devId;
+    WatchCb = cb;
+    return CreateWatchSnifferServer(error);
+}
+
+
+int ssl_SetWatchKeyCallback(SSLWatchCb cb, char* error)
+{
+    WatchCb = cb;
+    return CreateWatchSnifferServer(error);
+}
+
+
+int ssl_SetWatchKeyCtx(void* ctx, char* error)
+{
+    (void)error;
+    WatchCbCtx = ctx;
+    return 0;
+}
+
+
+int ssl_SetWatchKey_buffer(void* vSniffer, const byte* key, word32 keySz,
+        int keyType, char* error)
+{
+    SnifferSession* sniffer;
+    int ret;
+
+    if (vSniffer == NULL) {
+        return -1;
+    }
+    if (key == NULL || keySz == 0) {
+        return -1;
+    }
+
+    sniffer = (SnifferSession*)vSniffer;
+    /* Remap the keyType from what the user can use to
+     * what wolfSSL_use_PrivateKey_buffer expects. */
+    keyType = (keyType == FILETYPE_PEM) ? WOLFSSL_FILETYPE_PEM :
+                                          WOLFSSL_FILETYPE_ASN1;
+
+    ret = wolfSSL_use_PrivateKey_buffer(sniffer->sslServer,
+            key, keySz, keyType);
+    if (ret != WOLFSSL_SUCCESS) {
+        SetError(KEY_FILE_STR, error, sniffer, FATAL_ERROR_STATE);
+        return -1;
+    }
+
+    return 0;
+}
+
+
+int ssl_SetWatchKey_file(void* vSniffer, const char* keyFile, int keyType,
+        const char* password, char* error)
+{
+    byte* keyBuf = NULL;
+    word32 keyBufSz = 0;
+    int ret;
+
+    if (vSniffer == NULL) {
+        return -1;
+    }
+    if (keyFile == NULL) {
+        return -1;
+    }
+
+    /* Remap the keyType from what the user can use to
+     * what LoadKeyFile expects. */
+    keyType = (keyType == FILETYPE_PEM) ? WOLFSSL_FILETYPE_PEM :
+                                          WOLFSSL_FILETYPE_ASN1;
+
+    ret = LoadKeyFile(&keyBuf, &keyBufSz, keyFile, keyType, password);
+    if (ret < 0) {
+        SetError(KEY_FILE_STR, error, NULL, 0);
+        XFREE(keyBuf, NULL, DYNAMIC_TYPE_X509);
+        return -1;
+    }
+
+    ret = ssl_SetWatchKey_buffer(vSniffer, keyBuf, keyBufSz, FILETYPE_DER,
+            error);
+    XFREE(keyBuf, NULL, DYNAMIC_TYPE_X509);
+
+    return ret;
+}
+
+#endif /* WOLFSSL_SNIFFER_WATCH */
+
+
+#ifdef WOLFSSL_SNIFFER_STORE_DATA_CB
+
+int ssl_SetStoreDataCallback(SSLStoreDataCb cb)
+{
+    StoreDataCb = cb;
+    return 0;
+}
+
+#endif /* WOLFSSL_SNIFFER_STORE_DATA_CB */
 
 #endif /* WOLFSSL_SNIFFER */
 #endif /* WOLFCRYPT_ONLY */
--- a/src/ssl.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/ssl.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ssl.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -25,8 +25,14 @@
 #endif
 
 #include <wolfssl/wolfcrypt/settings.h>
-
-#ifndef WOLFCRYPT_ONLY
+#if defined(OPENSSL_EXTRA) && !defined(_WIN32)
+    /* turn on GNU extensions for XVASPRINTF with wolfSSL_BIO_printf */
+    #undef  _GNU_SOURCE
+    #define _GNU_SOURCE
+#endif
+
+#if !defined(WOLFCRYPT_ONLY) || defined(OPENSSL_EXTRA) || \
+    defined(OPENSSL_EXTRA_X509_SMALL)
 
 #ifdef HAVE_ERRNO_H
     #include <errno.h>
@@ -43,12 +49,16 @@
 #endif
 
 
-#ifndef WOLFSSL_ALLOW_NO_SUITES
+#if !defined(WOLFSSL_ALLOW_NO_SUITES) && !defined(WOLFCRYPT_ONLY)
     #if defined(NO_DH) && !defined(HAVE_ECC) && !defined(WOLFSSL_STATIC_RSA) \
                 && !defined(WOLFSSL_STATIC_DH) && !defined(WOLFSSL_STATIC_PSK) \
-                && !defined(HAVE_ED25519)
+                && !defined(HAVE_ED25519) && !defined(HAVE_ED448)
         #error "No cipher suites defined because DH disabled, ECC disabled, and no static suites defined. Please see top of README"
     #endif
+    #ifdef WOLFSSL_CERT_GEN
+        /* need access to Cert struct for creating certificate */
+        #include <wolfssl/wolfcrypt/asn_public.h>
+    #endif
 #endif
 
 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) || \
@@ -59,6 +69,10 @@
 
 #include <wolfssl/wolfcrypt/wc_encrypt.h>
 
+#ifndef NO_RSA
+    #include <wolfssl/wolfcrypt/rsa.h>
+#endif
+
 #ifdef OPENSSL_EXTRA
     /* openssl headers begin */
     #include <wolfssl/openssl/aes.h>
@@ -73,9 +87,16 @@
     #include <wolfssl/openssl/ec.h>
     #include <wolfssl/openssl/ec25519.h>
     #include <wolfssl/openssl/ed25519.h>
+    #include <wolfssl/openssl/ec448.h>
+    #include <wolfssl/openssl/ed448.h>
     #include <wolfssl/openssl/ecdsa.h>
     #include <wolfssl/openssl/ecdh.h>
+    #include <wolfssl/openssl/err.h>
+    #include <wolfssl/openssl/opensslv.h>
     #include <wolfssl/openssl/rc4.h>
+    #include <wolfssl/openssl/stack.h>
+    #include <wolfssl/openssl/x509v3.h>
+    #include <wolfssl/openssl/x509_vfy.h>
     /* openssl headers end, wolfssl internal headers next */
     #include <wolfssl/wolfcrypt/hmac.h>
     #include <wolfssl/wolfcrypt/random.h>
@@ -86,6 +107,7 @@
     #include <wolfssl/wolfcrypt/idea.h>
     #include <wolfssl/wolfcrypt/curve25519.h>
     #include <wolfssl/wolfcrypt/ed25519.h>
+    #include <wolfssl/wolfcrypt/curve448.h>
     #if defined(OPENSSL_ALL) || defined(HAVE_STUNNEL)
         #include <wolfssl/openssl/ocsp.h>
     #endif /* WITH_STUNNEL */
@@ -95,14 +117,75 @@
     #if defined(WOLFCRYPT_HAVE_SRP) && !defined(NO_SHA256) \
         && !defined(WC_NO_RNG)
         #include <wolfssl/wolfcrypt/srp.h>
-        #include <wolfssl/wolfcrypt/random.h>
-    #endif
+    #endif
+    #if defined(HAVE_FIPS) || defined(HAVE_SELFTEST)
+        #include <wolfssl/wolfcrypt/pkcs7.h>
+    #endif
+    #if defined(OPENSSL_ALL) && defined(HAVE_PKCS7)
+        #include <wolfssl/openssl/pkcs7.h>
+    #endif /* OPENSSL_ALL && HAVE_PKCS7 */
+#endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+    int SetIndividualInternal(WOLFSSL_BIGNUM* bn, mp_int* mpi);
+    int SetIndividualExternal(WOLFSSL_BIGNUM** bn, mp_int* mpi);
+    int oid2nid(word32 oid, int grp);
+#endif
+
+#if defined(WOLFSSL_QT)
+    #include <wolfssl/wolfcrypt/sha.h>
 #endif
 
 #ifdef NO_ASN
     #include <wolfssl/wolfcrypt/dh.h>
 #endif
-
+#endif /* !WOLFCRYPT_ONLY || OPENSSL_EXTRA */
+
+#define WOLFSSL_EVP_INCLUDED
+#include "wolfcrypt/src/evp.c"
+
+#ifdef OPENSSL_EXTRA
+/* Global pointer to constant BN on */
+static WOLFSSL_BIGNUM* bn_one = NULL;
+#endif
+
+#ifndef WOLFCRYPT_ONLY
+
+#if defined(OPENSSL_EXTRA) && defined(HAVE_ECC)
+const WOLF_EC_NIST_NAME kNistCurves[] = {
+    {XSTR_SIZEOF("P-192"),   "P-192",   NID_X9_62_prime192v1},
+    {XSTR_SIZEOF("P-256"),   "P-256",   NID_X9_62_prime256v1},
+    {XSTR_SIZEOF("P-112"),   "P-112",   NID_secp112r1},
+    {XSTR_SIZEOF("P-112-2"), "P-112-2", NID_secp112r2},
+    {XSTR_SIZEOF("P-128"),   "P-128",   NID_secp128r1},
+    {XSTR_SIZEOF("P-128-2"), "P-128-2", NID_secp128r2},
+    {XSTR_SIZEOF("P-160"),   "P-160",   NID_secp160r1},
+    {XSTR_SIZEOF("P-160-2"), "P-160-2", NID_secp160r2},
+    {XSTR_SIZEOF("P-224"),   "P-224",   NID_secp224r1},
+    {XSTR_SIZEOF("P-384"),   "P-384",   NID_secp384r1},
+    {XSTR_SIZEOF("P-521"),   "P-521",   NID_secp521r1},
+    {XSTR_SIZEOF("K-160"),   "K-160",   NID_secp160k1},
+    {XSTR_SIZEOF("K-192"),   "K-192",   NID_secp192k1},
+    {XSTR_SIZEOF("K-224"),   "K-224",   NID_secp224k1},
+    {XSTR_SIZEOF("K-256"),   "K-256",   NID_secp256k1},
+    {XSTR_SIZEOF("B-160"),   "B-160",   NID_brainpoolP160r1},
+    {XSTR_SIZEOF("B-192"),   "B-192",   NID_brainpoolP192r1},
+    {XSTR_SIZEOF("B-224"),   "B-224",   NID_brainpoolP224r1},
+    {XSTR_SIZEOF("B-256"),   "B-256",   NID_brainpoolP256r1},
+    {XSTR_SIZEOF("B-320"),   "B-320",   NID_brainpoolP320r1},
+    {XSTR_SIZEOF("B-384"),   "B-384",   NID_brainpoolP384r1},
+    {XSTR_SIZEOF("B-512"),   "B-512",   NID_brainpoolP512r1},
+    {0,                     NULL,      0},
+};
+#endif
+
+#if defined(WOLFSSL_RENESAS_TSIP_TLS)
+    /* for root ca verification */
+int tsip_tls_RootCertVerify(const byte *cert, word32 cert_len,
+                            word32 key_n_start, word32 key_n_len,
+                            word32 key_e_start, word32 key_e_len,
+                            word32 cm_row);
+byte tsip_rootCAverified( );
+#endif
 
 #ifdef WOLFSSL_SESSION_EXPORT
 #ifdef WOLFSSL_DTLS
@@ -158,7 +241,7 @@
 /* This function allows for directly serializing a session rather than using
  * callbacks. It has less overhead by removing a temporary buffer and gives
  * control over when the session gets serialized. When using callbacks the
- * session is always serialized immediatly after the handshake is finished.
+ * session is always serialized immediately after the handshake is finished.
  *
  * buf is the argument to contain the serialized session
  * sz  is the size of the buffer passed in
@@ -189,6 +272,41 @@
 }
 
 
+/* This function is similar to wolfSSL_dtls_export but only exports the portion
+ * of the WOLFSSL structure related to the state of the connection, i.e. peer
+ * sequence number, epoch, AEAD state etc.
+ *
+ * buf is the argument to contain the serialized state, if null then set "sz" to
+ *     buffer size required
+ * sz  is the size of the buffer passed in
+ * ssl is the WOLFSSL struct to serialize
+ * returns the size of serialized session on success, 0 on no action, and
+ *         negative value on error */
+int wolfSSL_dtls_export_state_only(WOLFSSL* ssl, unsigned char* buf,
+        unsigned int* sz)
+{
+    WOLFSSL_ENTER("wolfSSL_dtls_export_state_only");
+
+    if (ssl == NULL || sz == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (buf == NULL) {
+        *sz = MAX_EXPORT_STATE_BUFFER;
+        return 0;
+    }
+
+    /* if not DTLS do nothing */
+    if (!ssl->options.dtls) {
+        WOLFSSL_MSG("Currently only DTLS export state is supported");
+        return 0;
+    }
+
+    /* copy over keys, options, and dtls state struct */
+    return wolfSSL_dtls_export_state_internal(ssl, buf, *sz);
+}
+
+
 /* returns 0 on success */
 int wolfSSL_send_session(WOLFSSL* ssl)
 {
@@ -234,10 +352,9 @@
 #endif /* WOLFSSL_DTLS */
 #endif /* WOLFSSL_SESSION_EXPORT */
 
-
 /* prevent multiple mutex initializations */
-static volatile int initRefCount = 0;
-static wolfSSL_Mutex count_mutex;   /* init ref count mutex */
+static volatile WOLFSSL_GLOBAL int initRefCount = 0;
+static WOLFSSL_GLOBAL wolfSSL_Mutex count_mutex;   /* init ref count mutex */
 
 /* Create a new WOLFSSL_CTX struct and return the pointer to created struct.
    WOLFSSL_METHOD pointer passed in is given to ctx to manage.
@@ -248,7 +365,7 @@
 {
     WOLFSSL_CTX* ctx = NULL;
 
-    WOLFSSL_ENTER("WOLFSSL_CTX_new_ex");
+    WOLFSSL_ENTER("wolfSSL_CTX_new_ex");
 
     if (initRefCount == 0) {
         /* user no longer forced to call Init themselves */
@@ -268,7 +385,15 @@
 
     ctx = (WOLFSSL_CTX*) XMALLOC(sizeof(WOLFSSL_CTX), heap, DYNAMIC_TYPE_CTX);
     if (ctx) {
-        if (InitSSL_Ctx(ctx, method, heap) < 0) {
+        int ret;
+
+        ret = InitSSL_Ctx(ctx, method, heap);
+    #ifdef WOLFSSL_STATIC_MEMORY
+        if (heap != NULL) {
+            ctx->onHeap = 1; /* free the memory back to heap when done */
+        }
+    #endif
+        if (ret < 0) {
             WOLFSSL_MSG("Init CTX failed");
             wolfSSL_CTX_free(ctx);
             ctx = NULL;
@@ -297,6 +422,7 @@
 }
 
 
+WOLFSSL_ABI
 WOLFSSL_CTX* wolfSSL_CTX_new(WOLFSSL_METHOD* method)
 {
 #ifdef WOLFSSL_HEAP_TEST
@@ -307,7 +433,16 @@
 #endif
 }
 
-
+#ifdef OPENSSL_EXTRA
+/* increases CTX reference count to track proper time to "free" */
+int wolfSSL_CTX_up_ref(WOLFSSL_CTX* ctx)
+{
+    int refCount = SSL_CTX_RefCount(ctx, 1);
+    return ((refCount > 1) ? 1 : 0);
+}
+#endif
+
+WOLFSSL_ABI
 void wolfSSL_CTX_free(WOLFSSL_CTX* ctx)
 {
     WOLFSSL_ENTER("SSL_CTX_free");
@@ -317,9 +452,11 @@
         if (ctx->srp != NULL){
             if (ctx->srp_password != NULL){
                 XFREE(ctx->srp_password, ctx->heap, DYNAMIC_TYPE_SRP);
+                ctx->srp_password = NULL;
             }
             wc_SrpTerm(ctx->srp);
             XFREE(ctx->srp, ctx->heap, DYNAMIC_TYPE_SRP);
+            ctx->srp = NULL;
         }
 #endif
         FreeSSL_Ctx(ctx);
@@ -329,6 +466,36 @@
 }
 
 
+#ifdef HAVE_ENCRYPT_THEN_MAC
+/**
+ * Sets whether Encrypt-Then-MAC extension can be negotiated against context.
+ * The default value: enabled.
+ *
+ * ctx  SSL/TLS context.
+ * set  Whether to allow or not: 1 is allow and 0 is disallow.
+ * returns WOLFSSL_SUCCESS
+ */
+int wolfSSL_CTX_AllowEncryptThenMac(WOLFSSL_CTX *ctx, int set)
+{
+    ctx->disallowEncThenMac = !set;
+    return WOLFSSL_SUCCESS;
+}
+
+/**
+ * Sets whether Encrypt-Then-MAC extension can be negotiated against context.
+ * The default value comes from context.
+ *
+ * ctx  SSL/TLS context.
+ * set  Whether to allow or not: 1 is allow and 0 is disallow.
+ * returns WOLFSSL_SUCCESS
+ */
+int wolfSSL_AllowEncryptThenMac(WOLFSSL *ssl, int set)
+{
+    ssl->options.disallowEncThenMac = !set;
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
 #ifdef SINGLE_THREADED
 /* no locking in single threaded mode, allow a CTX level rng to be shared with
  * WOLFSSL objects, WOLFSSL_SUCCESS on ok */
@@ -341,7 +508,7 @@
         return BAD_FUNC_ARG;
     }
 
-    rng = XMALLOC(sizeof(WC_RNG), ctx->heap, DYNAMIC_TYPE_RNG);
+    rng = (WC_RNG*)XMALLOC(sizeof(WC_RNG), ctx->heap, DYNAMIC_TYPE_RNG);
     if (rng == NULL) {
         return MEMORY_E;
     }
@@ -362,6 +529,7 @@
 #endif
 
 
+WOLFSSL_ABI
 WOLFSSL* wolfSSL_new(WOLFSSL_CTX* ctx)
 {
     WOLFSSL* ssl = NULL;
@@ -385,6 +553,7 @@
 }
 
 
+WOLFSSL_ABI
 void wolfSSL_free(WOLFSSL* ssl)
 {
     WOLFSSL_ENTER("SSL_free");
@@ -440,7 +609,7 @@
  * duplicate existing ssl members into dup needed for writing
  *
  * dup write only WOLFSSL
- * ssl exisiting WOLFSSL
+ * ssl existing WOLFSSL
  *
  * 0 on success
 */
@@ -488,10 +657,10 @@
 
 /*
  * duplicate a WOLFSSL object post handshake for writing only
- * turn exisitng object into read only.  Allows concurrent access from two
+ * turn existing object into read only.  Allows concurrent access from two
  * different threads.
  *
- * ssl exisiting WOLFSSL object
+ * ssl existing WOLFSSL object
  *
  * return dup'd WOLFSSL object on success
 */
@@ -522,7 +691,7 @@
         if ( (ret = InitSSL(dup, ssl->ctx, 1)) < 0) {
             FreeSSL(dup, ssl->ctx->heap);
             dup = NULL;
-        } else if ( (ret = DupSSL(dup, ssl) < 0)) {
+        } else if ( (ret = DupSSL(dup, ssl)) < 0) {
             FreeSSL(dup, ssl->ctx->heap);
             dup = NULL;
         }
@@ -571,7 +740,7 @@
 #ifndef WOLFSSL_NO_TLS12
     WOLFSSL_ENTER("SSL_use_old_poly");
     WOLFSSL_MSG("Warning SSL connection auto detects old/new and this function"
-            "is depriciated");
+            "is depreciated");
     ssl->options.oldPoly = (word16)value;
     WOLFSSL_LEAVE("SSL_use_old_poly", 0);
 #endif
@@ -580,6 +749,7 @@
 #endif
 
 
+WOLFSSL_ABI
 int wolfSSL_set_fd(WOLFSSL* ssl, int fd)
 {
     int ret;
@@ -690,30 +860,25 @@
 int wolfSSL_get_ciphers(char* buf, int len)
 {
     const CipherSuiteInfo* ciphers = GetCipherNames();
-    int  totalInc = 0;
-    int  step     = 0;
-    char delim    = ':';
-    int  size     = GetCipherNamesSize();
-    int  i;
+    int ciphersSz = GetCipherNamesSize();
+    int i;
+    int cipherNameSz;
 
     if (buf == NULL || len <= 0)
         return BAD_FUNC_ARG;
 
     /* Add each member to the buffer delimited by a : */
-    for (i = 0; i < size; i++) {
-        step = (int)(XSTRLEN(ciphers[i].name) + 1);  /* delimiter */
-        totalInc += step;
-
-        /* Check to make sure buf is large enough and will not overflow */
-        if (totalInc < len) {
-            size_t cipherLen = XSTRLEN(ciphers[i].name);
-            XSTRNCPY(buf, ciphers[i].name, cipherLen);
-            buf += cipherLen;
-
-            if (i < size - 1)
-                *buf++ = delim;
-            else
-                *buf++ = '\0';
+    for (i = 0; i < ciphersSz; i++) {
+        cipherNameSz = (int)XSTRLEN(ciphers[i].name);
+        if (cipherNameSz + 1 < len) {
+            XSTRNCPY(buf, ciphers[i].name, len);
+            buf += cipherNameSz;
+
+            if (i < ciphersSz - 1)
+                *buf++ = ':';
+            *buf = 0;
+
+            len -= cipherNameSz + 1;
         }
         else
             return BUFFER_E;
@@ -721,6 +886,41 @@
     return WOLFSSL_SUCCESS;
 }
 
+
+#ifndef NO_ERROR_STRINGS
+/* places a list of all supported cipher suites in TLS_* format into "buf"
+ * return WOLFSSL_SUCCESS on success */
+int wolfSSL_get_ciphers_iana(char* buf, int len)
+{
+    const CipherSuiteInfo* ciphers = GetCipherNames();
+    int ciphersSz = GetCipherNamesSize();
+    int i;
+    int cipherNameSz;
+
+    if (buf == NULL || len <= 0)
+        return BAD_FUNC_ARG;
+
+    /* Add each member to the buffer delimited by a : */
+    for (i = 0; i < ciphersSz; i++) {
+        cipherNameSz = (int)XSTRLEN(ciphers[i].name_iana);
+        if (cipherNameSz + 1 < len) {
+            XSTRNCPY(buf, ciphers[i].name_iana, len);
+            buf += cipherNameSz;
+
+            if (i < ciphersSz - 1)
+                *buf++ = ':';
+            *buf = 0;
+
+            len -= cipherNameSz + 1;
+        }
+        else
+            return BUFFER_E;
+    }
+    return WOLFSSL_SUCCESS;
+}
+#endif /* NO_ERROR_STRINGS */
+
+
 const char* wolfSSL_get_shared_ciphers(WOLFSSL* ssl, char* buf, int len)
 {
     const char* cipher;
@@ -736,26 +936,81 @@
 
 int wolfSSL_get_fd(const WOLFSSL* ssl)
 {
+    int fd = -1;
     WOLFSSL_ENTER("SSL_get_fd");
-    WOLFSSL_LEAVE("SSL_get_fd", ssl->rfd);
-    return ssl->rfd;
+    if (ssl) {
+        fd = ssl->rfd;
+    }
+    WOLFSSL_LEAVE("SSL_get_fd", fd);
+    return fd;
 }
 
 
 int wolfSSL_dtls(WOLFSSL* ssl)
 {
-    return ssl->options.dtls;
-}
-
+    int dtlsOpt = 0;
+    if (ssl)
+        dtlsOpt = ssl->options.dtls;
+    return dtlsOpt;
+}
+
+#if !defined(NO_CERTS)
+/* Set whether mutual authentication is required for connections.
+ * Server side only.
+ *
+ * ctx  The SSL/TLS CTX object.
+ * req  1 to indicate required and 0 when not.
+ * returns BAD_FUNC_ARG when ctx is NULL, SIDE_ERROR when not a server and
+ * 0 on success.
+ */
+int wolfSSL_CTX_mutual_auth(WOLFSSL_CTX* ctx, int req)
+{
+    if (ctx == NULL)
+        return BAD_FUNC_ARG;
+    if (ctx->method->side == WOLFSSL_CLIENT_END)
+        return SIDE_ERROR;
+
+    ctx->mutualAuth = (byte)req;
+
+    return 0;
+}
+
+/* Set whether mutual authentication is required for the connection.
+ * Server side only.
+ *
+ * ssl  The SSL/TLS object.
+ * req  1 to indicate required and 0 when not.
+ * returns BAD_FUNC_ARG when ssl is NULL, or not using TLS v1.3,
+ * SIDE_ERROR when not a client and 0 on success.
+ */
+int wolfSSL_mutual_auth(WOLFSSL* ssl, int req)
+{
+    if (ssl == NULL)
+        return BAD_FUNC_ARG;
+    if (ssl->options.side == WOLFSSL_SERVER_END)
+        return SIDE_ERROR;
+
+    ssl->options.mutualAuth = (word16)req;
+
+    return 0;
+}
+#endif /* NO_CERTS */
 
 #ifndef WOLFSSL_LEANPSK
 int wolfSSL_dtls_set_peer(WOLFSSL* ssl, void* peer, unsigned int peerSz)
 {
 #ifdef WOLFSSL_DTLS
-    void* sa = (void*)XMALLOC(peerSz, ssl->heap, DYNAMIC_TYPE_SOCKADDR);
+    void* sa;
+
+    if (ssl == NULL)
+        return WOLFSSL_FAILURE;
+
+    sa = (void*)XMALLOC(peerSz, ssl->heap, DYNAMIC_TYPE_SOCKADDR);
     if (sa != NULL) {
-        if (ssl->buffers.dtlsCtx.peer.sa != NULL)
+        if (ssl->buffers.dtlsCtx.peer.sa != NULL) {
             XFREE(ssl->buffers.dtlsCtx.peer.sa,ssl->heap,DYNAMIC_TYPE_SOCKADDR);
+            ssl->buffers.dtlsCtx.peer.sa = NULL;
+        }
         XMEMCPY(sa, peer, peerSz);
         ssl->buffers.dtlsCtx.peer.sa = sa;
         ssl->buffers.dtlsCtx.peer.sz = peerSz;
@@ -819,6 +1074,10 @@
     return WOLFSSL_SUCCESS;
 }
 
+#endif /* WOLFSSL_DTLS && WOLFSSL_SCTP */
+
+#if (defined(WOLFSSL_SCTP) || defined(WOLFSSL_DTLS_MTU)) && \
+                                                           defined(WOLFSSL_DTLS)
 
 int wolfSSL_CTX_dtls_set_mtu(WOLFSSL_CTX* ctx, word16 newMtu)
 {
@@ -844,8 +1103,7 @@
     return WOLFSSL_SUCCESS;
 }
 
-
-#endif /* WOLFSSL_DTLS && WOLFSSL_SCTP */
+#endif /* WOLFSSL_DTLS && (WOLFSSL_SCTP || WOLFSSL_DTLS_MTU) */
 
 
 #ifdef WOLFSSL_DTLS_DROP_STATS
@@ -888,14 +1146,13 @@
     if (ret == 0) {
         ctx->haveEMS = 0;
         ctx->haveMcast = 1;
-        ctx->mcastID = id;
+        ctx->mcastID = (byte)id;
 #ifndef WOLFSSL_USER_IO
         ctx->CBIORecv = EmbedReceiveFromMcast;
 #endif /* WOLFSSL_USER_IO */
-    }
-
-    if (ret == 0)
+
         ret = WOLFSSL_SUCCESS;
+    }
     WOLFSSL_LEAVE("wolfSSL_CTX_mcast_set_member_id()", ret);
     return ret;
 }
@@ -1156,6 +1413,7 @@
 }
 
 
+WOLFSSL_ABI
 WC_RNG* wolfSSL_GetRNG(WOLFSSL* ssl)
 {
     if (ssl) {
@@ -1171,55 +1429,55 @@
 int wolfSSL_GetObjectSize(void)
 {
 #ifdef SHOW_SIZES
-    printf("sizeof suites           = %lu\n", sizeof(Suites));
-    printf("sizeof ciphers(2)       = %lu\n", sizeof(Ciphers));
+    printf("sizeof suites           = %lu\n", (unsigned long)sizeof(Suites));
+    printf("sizeof ciphers(2)       = %lu\n", (unsigned long)sizeof(Ciphers));
 #ifndef NO_RC4
-    printf("\tsizeof arc4         = %lu\n", sizeof(Arc4));
-#endif
-    printf("\tsizeof aes          = %lu\n", sizeof(Aes));
+    printf("\tsizeof arc4         = %lu\n", (unsigned long)sizeof(Arc4));
+#endif
+    printf("\tsizeof aes          = %lu\n", (unsigned long)sizeof(Aes));
 #ifndef NO_DES3
-    printf("\tsizeof des3         = %lu\n", sizeof(Des3));
+    printf("\tsizeof des3         = %lu\n", (unsigned long)sizeof(Des3));
 #endif
 #ifndef NO_RABBIT
-    printf("\tsizeof rabbit       = %lu\n", sizeof(Rabbit));
+    printf("\tsizeof rabbit       = %lu\n", (unsigned long)sizeof(Rabbit));
 #endif
 #ifdef HAVE_CHACHA
-    printf("\tsizeof chacha       = %lu\n", sizeof(ChaCha));
-#endif
-    printf("sizeof cipher specs     = %lu\n", sizeof(CipherSpecs));
-    printf("sizeof keys             = %lu\n", sizeof(Keys));
-    printf("sizeof Hashes(2)        = %lu\n", sizeof(Hashes));
+    printf("\tsizeof chacha       = %lu\n", (unsigned long)sizeof(ChaCha));
+#endif
+    printf("sizeof cipher specs     = %lu\n", (unsigned long)sizeof(CipherSpecs));
+    printf("sizeof keys             = %lu\n", (unsigned long)sizeof(Keys));
+    printf("sizeof Hashes(2)        = %lu\n", (unsigned long)sizeof(Hashes));
 #ifndef NO_MD5
-    printf("\tsizeof MD5          = %lu\n", sizeof(wc_Md5));
+    printf("\tsizeof MD5          = %lu\n", (unsigned long)sizeof(wc_Md5));
 #endif
 #ifndef NO_SHA
-    printf("\tsizeof SHA          = %lu\n", sizeof(wc_Sha));
+    printf("\tsizeof SHA          = %lu\n", (unsigned long)sizeof(wc_Sha));
 #endif
 #ifdef WOLFSSL_SHA224
-    printf("\tsizeof SHA224       = %lu\n", sizeof(wc_Sha224));
+    printf("\tsizeof SHA224       = %lu\n", (unsigned long)sizeof(wc_Sha224));
 #endif
 #ifndef NO_SHA256
-    printf("\tsizeof SHA256       = %lu\n", sizeof(wc_Sha256));
+    printf("\tsizeof SHA256       = %lu\n", (unsigned long)sizeof(wc_Sha256));
 #endif
 #ifdef WOLFSSL_SHA384
-    printf("\tsizeof SHA384       = %lu\n", sizeof(wc_Sha384));
+    printf("\tsizeof SHA384       = %lu\n", (unsigned long)sizeof(wc_Sha384));
 #endif
 #ifdef WOLFSSL_SHA384
-    printf("\tsizeof SHA512       = %lu\n", sizeof(wc_Sha512));
-#endif
-    printf("sizeof Buffers          = %lu\n", sizeof(Buffers));
-    printf("sizeof Options          = %lu\n", sizeof(Options));
-    printf("sizeof Arrays           = %lu\n", sizeof(Arrays));
+    printf("\tsizeof SHA512       = %lu\n", (unsigned long)sizeof(wc_Sha512));
+#endif
+    printf("sizeof Buffers          = %lu\n", (unsigned long)sizeof(Buffers));
+    printf("sizeof Options          = %lu\n", (unsigned long)sizeof(Options));
+    printf("sizeof Arrays           = %lu\n", (unsigned long)sizeof(Arrays));
 #ifndef NO_RSA
-    printf("sizeof RsaKey           = %lu\n", sizeof(RsaKey));
+    printf("sizeof RsaKey           = %lu\n", (unsigned long)sizeof(RsaKey));
 #endif
 #ifdef HAVE_ECC
-    printf("sizeof ecc_key          = %lu\n", sizeof(ecc_key));
-#endif
-    printf("sizeof WOLFSSL_CIPHER    = %lu\n", sizeof(WOLFSSL_CIPHER));
-    printf("sizeof WOLFSSL_SESSION   = %lu\n", sizeof(WOLFSSL_SESSION));
-    printf("sizeof WOLFSSL           = %lu\n", sizeof(WOLFSSL));
-    printf("sizeof WOLFSSL_CTX       = %lu\n", sizeof(WOLFSSL_CTX));
+    printf("sizeof ecc_key          = %lu\n", (unsigned long)sizeof(ecc_key));
+#endif
+    printf("sizeof WOLFSSL_CIPHER    = %lu\n", (unsigned long)sizeof(WOLFSSL_CIPHER));
+    printf("sizeof WOLFSSL_SESSION   = %lu\n", (unsigned long)sizeof(WOLFSSL_SESSION));
+    printf("sizeof WOLFSSL           = %lu\n", (unsigned long)sizeof(WOLFSSL));
+    printf("sizeof WOLFSSL_CTX       = %lu\n", (unsigned long)sizeof(WOLFSSL_CTX));
 #endif
 
     return sizeof(WOLFSSL);
@@ -1397,7 +1655,7 @@
 int wolfSSL_CTX_SetMinEccKey_Sz(WOLFSSL_CTX* ctx, short keySz)
 {
     if (ctx == NULL || keySz < 0 || keySz % 8 != 0) {
-        WOLFSSL_MSG("Key size must be divisable by 8 or ctx was null");
+        WOLFSSL_MSG("Key size must be divisible by 8 or ctx was null");
         return BAD_FUNC_ARG;
     }
 
@@ -1412,7 +1670,7 @@
 int wolfSSL_SetMinEccKey_Sz(WOLFSSL* ssl, short keySz)
 {
     if (ssl == NULL || keySz < 0 || keySz % 8 != 0) {
-        WOLFSSL_MSG("Key size must be divisable by 8 or ssl was null");
+        WOLFSSL_MSG("Key size must be divisible by 8 or ssl was null");
         return BAD_FUNC_ARG;
     }
 
@@ -1426,7 +1684,7 @@
 int wolfSSL_CTX_SetMinRsaKey_Sz(WOLFSSL_CTX* ctx, short keySz)
 {
     if (ctx == NULL || keySz < 0 || keySz % 8 != 0) {
-        WOLFSSL_MSG("Key size must be divisable by 8 or ctx was null");
+        WOLFSSL_MSG("Key size must be divisible by 8 or ctx was null");
         return BAD_FUNC_ARG;
     }
 
@@ -1439,7 +1697,7 @@
 int wolfSSL_SetMinRsaKey_Sz(WOLFSSL* ssl, short keySz)
 {
     if (ssl == NULL || keySz < 0 || keySz % 8 != 0) {
-        WOLFSSL_MSG("Key size must be divisable by 8 or ssl was null");
+        WOLFSSL_MSG("Key size must be divisible by 8 or ssl was null");
         return BAD_FUNC_ARG;
     }
 
@@ -1453,21 +1711,26 @@
 int wolfSSL_SetTmpDH(WOLFSSL* ssl, const unsigned char* p, int pSz,
                     const unsigned char* g, int gSz)
 {
-    word16 havePSK = 0;
-    word16 haveRSA = 1;
-    int    keySz   = 0;
-
     WOLFSSL_ENTER("wolfSSL_SetTmpDH");
-    if (ssl == NULL || p == NULL || g == NULL) return BAD_FUNC_ARG;
-
-    if (pSz < ssl->options.minDhKeySz)
+
+    if (ssl == NULL || p == NULL || g == NULL)
+        return BAD_FUNC_ARG;
+
+    if ((word16)pSz < ssl->options.minDhKeySz)
+        return DH_KEY_SIZE_E;
+    if ((word16)pSz > ssl->options.maxDhKeySz)
         return DH_KEY_SIZE_E;
-    if (pSz > ssl->options.maxDhKeySz)
-        return DH_KEY_SIZE_E;
-
-    if (ssl->options.side != WOLFSSL_SERVER_END)
+
+    /* this function is for server only */
+    if (ssl->options.side == WOLFSSL_CLIENT_END)
         return SIDE_ERROR;
 
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+        !defined(HAVE_SELFTEST)
+        ssl->options.dhKeyTested = 0;
+        ssl->options.dhDoKeyTest = 1;
+    #endif
+
     if (ssl->buffers.serverDH_P.buffer && ssl->buffers.weOwnDH) {
         XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
         ssl->buffers.serverDH_P.buffer = NULL;
@@ -1498,23 +1761,57 @@
     XMEMCPY(ssl->buffers.serverDH_G.buffer, g, gSz);
 
     ssl->options.haveDH = 1;
+
+    if (ssl->options.side != WOLFSSL_NEITHER_END) {
+        word16 havePSK;
+        word16 haveRSA;
+        int    keySz   = 0;
+
     #ifndef NO_PSK
         havePSK = ssl->options.havePSK;
+    #else
+        havePSK = 0;
     #endif
     #ifdef NO_RSA
         haveRSA = 0;
+    #else
+        haveRSA = 1;
     #endif
     #ifndef NO_CERTS
         keySz = ssl->buffers.keySz;
     #endif
-    InitSuites(ssl->suites, ssl->version, keySz, haveRSA, havePSK,
-               ssl->options.haveDH, ssl->options.haveNTRU,
-               ssl->options.haveECDSAsig, ssl->options.haveECC,
-               ssl->options.haveStaticECC, ssl->options.side);
+        InitSuites(ssl->suites, ssl->version, keySz, haveRSA, havePSK,
+                   ssl->options.haveDH, ssl->options.haveNTRU,
+                   ssl->options.haveECDSAsig, ssl->options.haveECC,
+                   ssl->options.haveStaticECC, ssl->options.side);
+    }
 
     WOLFSSL_LEAVE("wolfSSL_SetTmpDH", 0);
-    return WOLFSSL_SUCCESS;
-}
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+#if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+    !defined(HAVE_SELFTEST)
+/* Enables or disables the session's DH key prime test. */
+int wolfSSL_SetEnableDhKeyTest(WOLFSSL* ssl, int enable)
+{
+    WOLFSSL_ENTER("wolfSSL_SetEnableDhKeyTest");
+
+    if (ssl == NULL)
+        return BAD_FUNC_ARG;
+
+    if (!enable)
+        ssl->options.dhDoKeyTest = 0;
+    else
+        ssl->options.dhDoKeyTest = 1;
+
+    WOLFSSL_LEAVE("wolfSSL_SetEnableDhKeyTest", WOLFSSL_SUCCESS);
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
 
 /* server ctx Diffie-Hellman parameters, WOLFSSL_SUCCESS on ok */
 int wolfSSL_CTX_SetTmpDH(WOLFSSL_CTX* ctx, const unsigned char* p, int pSz,
@@ -1523,13 +1820,40 @@
     WOLFSSL_ENTER("wolfSSL_CTX_SetTmpDH");
     if (ctx == NULL || p == NULL || g == NULL) return BAD_FUNC_ARG;
 
-    if (pSz < ctx->minDhKeySz)
+    if ((word16)pSz < ctx->minDhKeySz)
+        return DH_KEY_SIZE_E;
+    if ((word16)pSz > ctx->maxDhKeySz)
         return DH_KEY_SIZE_E;
-    if (pSz > ctx->maxDhKeySz)
-        return DH_KEY_SIZE_E;
+
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+        !defined(HAVE_SELFTEST)
+    {
+        DhKey checkKey;
+        WC_RNG rng;
+        int error, freeKey = 0;
+
+        error = wc_InitRng(&rng);
+        if (!error)
+            error = wc_InitDhKey(&checkKey);
+        if (!error) {
+            freeKey = 1;
+            error = wc_DhSetCheckKey(&checkKey,
+                                 p, pSz, g, gSz, NULL, 0, 0, &rng);
+        }
+        if (freeKey)
+            wc_FreeDhKey(&checkKey);
+        wc_FreeRng(&rng);
+        if (error)
+            return error;
+
+        ctx->dhKeyTested = 1;
+    }
+    #endif
 
     XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    ctx->serverDH_P.buffer = NULL;
     XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    ctx->serverDH_G.buffer = NULL;
 
     ctx->serverDH_P.buffer = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
     if (ctx->serverDH_P.buffer == NULL)
@@ -1538,6 +1862,7 @@
     ctx->serverDH_G.buffer = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
     if (ctx->serverDH_G.buffer == NULL) {
         XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+        ctx->serverDH_P.buffer = NULL;
         return MEMORY_E;
     }
 
@@ -1605,6 +1930,7 @@
 #endif /* !NO_DH */
 
 
+WOLFSSL_ABI
 int wolfSSL_write(WOLFSSL* ssl, const void* data, int sz)
 {
     int ret;
@@ -1697,6 +2023,8 @@
         ssl->dtls_expected_rx = max(sz + 100, MAX_MTU);
 #ifdef WOLFSSL_SCTP
         if (ssl->options.dtlsSctp)
+#endif
+#if defined(WOLLSSL_SCTP) || defined(WOLFSSL_DTLS_MTU)
             ssl->dtls_expected_rx = max(ssl->dtls_expected_rx, ssl->dtlsMtuSz);
 #endif
     }
@@ -1741,6 +2069,7 @@
 }
 
 
+WOLFSSL_ABI
 int wolfSSL_read(WOLFSSL* ssl, void* data, int sz)
 {
     WOLFSSL_ENTER("wolfSSL_read()");
@@ -1776,6 +2105,7 @@
 
 
 /* helpers to set the device id, WOLFSSL_SUCCESS on ok */
+WOLFSSL_ABI
 int wolfSSL_SetDevId(WOLFSSL* ssl, int devId)
 {
     if (ssl == NULL)
@@ -1785,6 +2115,8 @@
 
     return WOLFSSL_SUCCESS;
 }
+
+WOLFSSL_ABI
 int wolfSSL_CTX_SetDevId(WOLFSSL_CTX* ctx, int devId)
 {
     if (ctx == NULL)
@@ -1796,6 +2128,7 @@
 }
 
 /* helpers to get device id and heap */
+WOLFSSL_ABI
 int wolfSSL_CTX_GetDevId(WOLFSSL_CTX* ctx, WOLFSSL* ssl)
 {
     int devId = INVALID_DEVID;
@@ -1818,6 +2151,7 @@
 
 #ifdef HAVE_SNI
 
+WOLFSSL_ABI
 int wolfSSL_UseSNI(WOLFSSL* ssl, byte type, const void* data, word16 size)
 {
     if (ssl == NULL)
@@ -1827,6 +2161,7 @@
 }
 
 
+WOLFSSL_ABI
 int wolfSSL_CTX_UseSNI(WOLFSSL_CTX* ctx, byte type, const void* data,
                                                                     word16 size)
 {
@@ -1884,6 +2219,39 @@
 #endif /* HAVE_SNI */
 
 
+#ifdef HAVE_TRUSTED_CA
+
+WOLFSSL_API int wolfSSL_UseTrustedCA(WOLFSSL* ssl, byte type,
+            const byte* certId, word32 certIdSz)
+{
+    if (ssl == NULL)
+        return BAD_FUNC_ARG;
+
+    if (type == WOLFSSL_TRUSTED_CA_PRE_AGREED) {
+        if (certId != NULL || certIdSz != 0)
+            return BAD_FUNC_ARG;
+    }
+    else if (type == WOLFSSL_TRUSTED_CA_X509_NAME) {
+        if (certId == NULL || certIdSz == 0)
+            return BAD_FUNC_ARG;
+    }
+    #ifndef NO_SHA
+    else if (type == WOLFSSL_TRUSTED_CA_KEY_SHA1 ||
+            type == WOLFSSL_TRUSTED_CA_CERT_SHA1) {
+        if (certId == NULL || certIdSz != WC_SHA_DIGEST_SIZE)
+            return BAD_FUNC_ARG;
+    }
+    #endif
+    else
+        return BAD_FUNC_ARG;
+
+    return TLSX_UseTrustedCA(&ssl->extensions,
+            type, certId, certIdSz, ssl->heap);
+}
+
+#endif /* HAVE_TRUSTED_CA */
+
+
 #ifdef HAVE_MAX_FRAGMENT
 #ifndef NO_WOLFSSL_CLIENT
 
@@ -1892,6 +2260,26 @@
     if (ssl == NULL)
         return BAD_FUNC_ARG;
 
+#ifdef WOLFSSL_ALLOW_MAX_FRAGMENT_ADJUST
+    /* The following is a non-standard way to reconfigure the max packet size
+        post-handshake for wolfSSL_write/woflSSL_read */
+    if (ssl->options.handShakeState == HANDSHAKE_DONE) {
+        switch (mfl) {
+            case WOLFSSL_MFL_2_8 : ssl->max_fragment =  256; break;
+            case WOLFSSL_MFL_2_9 : ssl->max_fragment =  512; break;
+            case WOLFSSL_MFL_2_10: ssl->max_fragment = 1024; break;
+            case WOLFSSL_MFL_2_11: ssl->max_fragment = 2048; break;
+            case WOLFSSL_MFL_2_12: ssl->max_fragment = 4096; break;
+            case WOLFSSL_MFL_2_13: ssl->max_fragment = 8192; break;
+            default: ssl->max_fragment = MAX_RECORD_SIZE; break;
+        }
+        return WOLFSSL_SUCCESS;
+    }
+#endif /* WOLFSSL_MAX_FRAGMENT_ADJUST */
+
+    /* This call sets the max fragment TLS extension, which gets sent to server.
+        The server_hello response is what sets the `ssl->max_fragment` in
+        TLSX_MFL_Parse */
     return TLSX_UseMaxFragment(&ssl->extensions, mfl, ssl->heap);
 }
 
@@ -1979,8 +2367,7 @@
 #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
 
 /* Elliptic Curves */
-#ifdef HAVE_SUPPORTED_CURVES
-#ifndef NO_WOLFSSL_CLIENT
+#if defined(HAVE_SUPPORTED_CURVES) && !defined(NO_WOLFSSL_CLIENT)
 
 int wolfSSL_UseSupportedCurve(WOLFSSL* ssl, word16 name)
 {
@@ -2003,18 +2390,14 @@
         case WOLFSSL_ECC_BRAINPOOLP384R1:
         case WOLFSSL_ECC_BRAINPOOLP512R1:
         case WOLFSSL_ECC_X25519:
-            break;
-
-#ifdef WOLFSSL_TLS13
+        case WOLFSSL_ECC_X448:
+
         case WOLFSSL_FFDHE_2048:
         case WOLFSSL_FFDHE_3072:
         case WOLFSSL_FFDHE_4096:
         case WOLFSSL_FFDHE_6144:
         case WOLFSSL_FFDHE_8192:
-            if (!IsAtLeastTLSv1_3(ssl->version))
-                return WOLFSSL_SUCCESS;
-            break;
-#endif
+            break;
 
         default:
             return BAD_FUNC_ARG;
@@ -2047,16 +2430,13 @@
         case WOLFSSL_ECC_BRAINPOOLP384R1:
         case WOLFSSL_ECC_BRAINPOOLP512R1:
         case WOLFSSL_ECC_X25519:
-            break;
-
-#ifdef WOLFSSL_TLS13
+        case WOLFSSL_ECC_X448:
         case WOLFSSL_FFDHE_2048:
         case WOLFSSL_FFDHE_3072:
         case WOLFSSL_FFDHE_4096:
         case WOLFSSL_FFDHE_6144:
         case WOLFSSL_FFDHE_8192:
             break;
-#endif
 
         default:
             return BAD_FUNC_ARG;
@@ -2067,8 +2447,7 @@
     return TLSX_UseSupportedCurve(&ctx->extensions, name, ctx->heap);
 }
 
-#endif /* NO_WOLFSSL_CLIENT */
-#endif /* HAVE_SUPPORTED_CURVES */
+#endif /* HAVE_SUPPORTED_CURVES && !NO_WOLFSSL_CLIENT */
 
 /* QSH quantum safe handshake */
 #ifdef HAVE_QSH
@@ -2125,10 +2504,11 @@
 /* Application-Layer Protocol Negotiation */
 #ifdef HAVE_ALPN
 
+WOLFSSL_ABI
 int wolfSSL_UseALPN(WOLFSSL* ssl, char *protocol_name_list,
                     word32 protocol_name_listSz, byte options)
 {
-    char    *list, *ptr, *token[10];
+    char    *list, *ptr, *token[WOLFSSL_MAX_ALPN_NUMBER]={NULL};
     word16  len;
     int     idx = 0;
     int     ret = WOLFSSL_FAILURE;
@@ -2164,7 +2544,7 @@
 
     /* read all protocol name from the list */
     token[idx] = XSTRTOK(list, ",", &ptr);
-    while (token[idx] != NULL)
+    while (idx < WOLFSSL_MAX_ALPN_NUMBER && token[idx] != NULL)
         token[++idx] = XSTRTOK(NULL, ",", &ptr);
 
     /* add protocol name list in the TLS extension in reverse order */
@@ -2249,9 +2629,18 @@
     return ret;
 }
 
+int wolfSSL_CTX_UseSecureRenegotiation(WOLFSSL_CTX* ctx)
+{
+    if (ctx == NULL)
+        return BAD_FUNC_ARG;
+
+    ctx->useSecureReneg = 1;
+    return WOLFSSL_SUCCESS;
+}
+
 
 /* do a secure renegotiation handshake, user forced, we discourage */
-int wolfSSL_Rehandshake(WOLFSSL* ssl)
+static int _Rehandshake(WOLFSSL* ssl)
 {
     int ret;
 
@@ -2268,38 +2657,113 @@
         return SECURE_RENEGOTIATION_E;
     }
 
-    if (ssl->options.handShakeState != HANDSHAKE_DONE) {
-        WOLFSSL_MSG("Can't renegotiate until previous handshake complete");
-        return SECURE_RENEGOTIATION_E;
-    }
+    /* If the client started the renegotiation, the server will already
+     * have processed the client's hello. */
+    if (ssl->options.side != WOLFSSL_SERVER_END ||
+        ssl->options.acceptState != ACCEPT_FIRST_REPLY_DONE) {
+
+        if (ssl->options.handShakeState != HANDSHAKE_DONE) {
+            WOLFSSL_MSG("Can't renegotiate until previous handshake complete");
+            return SECURE_RENEGOTIATION_E;
+        }
 
 #ifndef NO_FORCE_SCR_SAME_SUITE
-    /* force same suite */
-    if (ssl->suites) {
-        ssl->suites->suiteSz = SUITE_LEN;
-        ssl->suites->suites[0] = ssl->options.cipherSuite0;
-        ssl->suites->suites[1] = ssl->options.cipherSuite;
-    }
-#endif
-
-    /* reset handshake states */
-    ssl->options.serverState = NULL_STATE;
-    ssl->options.clientState = NULL_STATE;
-    ssl->options.connectState  = CONNECT_BEGIN;
-    ssl->options.acceptState   = ACCEPT_BEGIN;
-    ssl->options.handShakeState = NULL_STATE;
-    ssl->options.processReply  = 0;  /* TODO, move states in internal.h */
-
-    XMEMSET(&ssl->msgsReceived, 0, sizeof(ssl->msgsReceived));
-
-    ssl->secure_renegotiation->cache_status = SCR_CACHE_NEEDED;
-
-    ret = InitHandshakeHashes(ssl);
-    if (ret !=0)
-        return ret;
-
+        /* force same suite */
+        if (ssl->suites) {
+            ssl->suites->suiteSz = SUITE_LEN;
+            ssl->suites->suites[0] = ssl->options.cipherSuite0;
+            ssl->suites->suites[1] = ssl->options.cipherSuite;
+        }
+#endif
+
+        /* reset handshake states */
+        ssl->options.sendVerify = 0;
+        ssl->options.serverState = NULL_STATE;
+        ssl->options.clientState = NULL_STATE;
+        ssl->options.connectState  = CONNECT_BEGIN;
+        ssl->options.acceptState   = ACCEPT_BEGIN_RENEG;
+        ssl->options.handShakeState = NULL_STATE;
+        ssl->options.processReply  = 0;  /* TODO, move states in internal.h */
+
+        XMEMSET(&ssl->msgsReceived, 0, sizeof(ssl->msgsReceived));
+
+        ssl->secure_renegotiation->cache_status = SCR_CACHE_NEEDED;
+
+#if !defined(NO_WOLFSSL_SERVER) && defined(HAVE_SERVER_RENEGOTIATION_INFO)
+        if (ssl->options.side == WOLFSSL_SERVER_END) {
+            ret = SendHelloRequest(ssl);
+            if (ret != 0) {
+                ssl->error = ret;
+                return WOLFSSL_FATAL_ERROR;
+            }
+        }
+#endif /* NO_WOLFSSL_SERVER && HAVE_SERVER_RENEGOTIATION_INFO */
+
+        ret = InitHandshakeHashes(ssl);
+        if (ret != 0) {
+            ssl->error = ret;
+            return WOLFSSL_FATAL_ERROR;
+        }
+    }
     ret = wolfSSL_negotiate(ssl);
-    return ret;
+    ssl->secure_rene_count++;
+    return ret;
+}
+
+
+/* do a secure renegotiation handshake, user forced, we discourage */
+int wolfSSL_Rehandshake(WOLFSSL* ssl)
+{
+    int ret = WOLFSSL_SUCCESS;
+    WOLFSSL_ENTER("wolfSSL_Rehandshake");
+
+    if (ssl->options.side == WOLFSSL_SERVER_END) {
+        /* Reset option to send certificate verify. */
+        ssl->options.sendVerify = 0;
+    }
+    else {
+        /* Reset resuming flag to do full secure handshake. */
+        ssl->options.resuming = 0;
+        #ifdef HAVE_SESSION_TICKET
+            /* Clearing the ticket. */
+            ret = wolfSSL_UseSessionTicket(ssl);
+        #endif
+    }
+
+    if (ret == WOLFSSL_SUCCESS)
+        ret = _Rehandshake(ssl);
+
+    return ret;
+}
+
+
+#ifndef NO_WOLFSSL_CLIENT
+
+/* do a secure resumption handshake, user forced, we discourage */
+int wolfSSL_SecureResume(WOLFSSL* ssl)
+{
+    WOLFSSL_ENTER("wolfSSL_SecureResume");
+
+    if (ssl == NULL)
+        return BAD_FUNC_ARG;
+
+    if (ssl->options.side == WOLFSSL_SERVER_END) {
+        ssl->error = SIDE_ERROR;
+        return SSL_FATAL_ERROR;
+    }
+
+    return _Rehandshake(ssl);
+}
+
+#endif /* NO_WOLFSSL_CLIENT */
+
+long wolfSSL_SSL_get_secure_renegotiation_support(WOLFSSL* ssl)
+{
+    WOLFSSL_ENTER("wolfSSL_SSL_get_secure_renegotiation_support");
+
+    if (!ssl || !ssl->secure_renegotiation)
+        return WOLFSSL_FAILURE;
+    return ssl->secure_renegotiation->enabled;
 }
 
 #endif /* HAVE_SECURE_RENEGOTIATION */
@@ -2502,10 +2966,10 @@
 
 
 /* WOLFSSL_SUCCESS on ok */
+WOLFSSL_ABI
 int wolfSSL_shutdown(WOLFSSL* ssl)
 {
     int  ret = WOLFSSL_FATAL_ERROR;
-    byte tmp;
     WOLFSSL_ENTER("SSL_shutdown()");
 
     if (ssl == NULL)
@@ -2534,15 +2998,26 @@
             }
         }
 
+#ifdef WOLFSSL_SHUTDOWNONCE
+        if (ssl->options.isClosed || ssl->options.connReset) {
+            /* Shutdown has already occurred.
+             * Caller is free to ignore this error. */
+            return SSL_SHUTDOWN_ALREADY_DONE_E;
+        }
+#endif
+
         /* call wolfSSL_shutdown again for bidirectional shutdown */
         if (ssl->options.sentNotify && !ssl->options.closeNotify) {
-            ret = wolfSSL_read(ssl, &tmp, 0);
-            if (ret < 0) {
+            ret = ProcessReply(ssl);
+            if (ret == ZERO_RETURN) {
+                /* simulate OpenSSL behavior */
+                ssl->error = WOLFSSL_ERROR_SYSCALL;
+                ret = WOLFSSL_SUCCESS;
+            } else if (ssl->error == WOLFSSL_ERROR_NONE) {
+                ret = WOLFSSL_SHUTDOWN_NOT_DONE;
+            } else {
                 WOLFSSL_ERROR(ssl->error);
                 ret = WOLFSSL_FATAL_ERROR;
-            } else if (ssl->options.closeNotify) {
-                ssl->error = WOLFSSL_ERROR_SYSCALL;   /* simulate OpenSSL behavior */
-                ret = WOLFSSL_SUCCESS;
             }
         }
     }
@@ -2574,6 +3049,7 @@
 }
 
 
+WOLFSSL_ABI
 int wolfSSL_get_error(WOLFSSL* ssl, int ret)
 {
     WOLFSSL_ENTER("SSL_get_error");
@@ -2596,7 +3072,7 @@
 }
 
 
-/* retrive alert history, WOLFSSL_SUCCESS on ok */
+/* retrieve alert history, WOLFSSL_SUCCESS on ok */
 int wolfSSL_get_alert_history(WOLFSSL* ssl, WOLFSSL_ALERT_HISTORY *h)
 {
     if (ssl && h) {
@@ -2605,6 +3081,20 @@
     return WOLFSSL_SUCCESS;
 }
 
+#ifdef OPENSSL_EXTRA
+/* returns SSL_WRITING, SSL_READING or SSL_NOTHING */
+int wolfSSL_want(WOLFSSL* ssl)
+{
+    int rw_state = SSL_NOTHING;
+    if (ssl) {
+        if (ssl->error == WANT_READ)
+            rw_state = SSL_READING;
+        else if (ssl->error == WANT_WRITE)
+            rw_state = SSL_WRITING;
+    }
+    return rw_state;
+}
+#endif
 
 /* return TRUE if current error is want read */
 int wolfSSL_want_read(WOLFSSL* ssl)
@@ -2630,7 +3120,7 @@
 
 char* wolfSSL_ERR_error_string(unsigned long errNumber, char* data)
 {
-    static const char* const msg = "Please supply a buffer for error string";
+    static wcchar msg = "Please supply a buffer for error string";
 
     WOLFSSL_ENTER("ERR_error_string");
     if (data) {
@@ -2740,6 +3230,7 @@
 
 const byte* wolfSSL_GetMacSecret(WOLFSSL* ssl, int verify)
 {
+#ifndef WOLFSSL_AEAD_ONLY
     if (ssl == NULL)
         return NULL;
 
@@ -2748,6 +3239,12 @@
         return ssl->keys.client_write_MAC_secret;
     else
         return ssl->keys.server_write_MAC_secret;
+#else
+    (void)ssl;
+    (void)verify;
+
+    return NULL;
+#endif
 }
 
 
@@ -2798,6 +3295,86 @@
     return NULL;
 }
 
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+/**
+ * Set the callback, against the context, that encrypts then MACs.
+ *
+ * ctx  SSL/TLS context.
+ * cb   Callback function to use with Encrypt-Then-MAC.
+ */
+void  wolfSSL_CTX_SetEncryptMacCb(WOLFSSL_CTX* ctx, CallbackEncryptMac cb)
+{
+    if (ctx)
+        ctx->EncryptMacCb = cb;
+}
+
+/**
+ * Set the context to use with callback that encrypts then MACs.
+ *
+ * ssl  SSL/TLS object.
+ * ctx  Callback function's context.
+ */
+void  wolfSSL_SetEncryptMacCtx(WOLFSSL* ssl, void *ctx)
+{
+    if (ssl)
+        ssl->EncryptMacCtx = ctx;
+}
+
+/**
+ * Get the context being used with callback that encrypts then MACs.
+ *
+ * ssl  SSL/TLS object.
+ * returns callback function's context or NULL if SSL/TLS object is NULL.
+ */
+void* wolfSSL_GetEncryptMacCtx(WOLFSSL* ssl)
+{
+    if (ssl)
+        return ssl->EncryptMacCtx;
+
+    return NULL;
+}
+
+
+/**
+ * Set the callback, against the context, that MAC verifies then decrypts.
+ *
+ * ctx  SSL/TLS context.
+ * cb   Callback function to use with Encrypt-Then-MAC.
+ */
+void  wolfSSL_CTX_SetVerifyDecryptCb(WOLFSSL_CTX* ctx, CallbackVerifyDecrypt cb)
+{
+    if (ctx)
+        ctx->VerifyDecryptCb = cb;
+}
+
+/**
+ * Set the context to use with callback that MAC verifies then decrypts.
+ *
+ * ssl  SSL/TLS object.
+ * ctx  Callback function's context.
+ */
+void  wolfSSL_SetVerifyDecryptCtx(WOLFSSL* ssl, void *ctx)
+{
+    if (ssl)
+        ssl->VerifyDecryptCtx = ctx;
+}
+
+/**
+ * Get the context being used with callback that MAC verifies then decrypts.
+ *
+ * ssl  SSL/TLS object.
+ * returns callback function's context or NULL if SSL/TLS object is NULL.
+ */
+void* wolfSSL_GetVerifyDecryptCtx(WOLFSSL* ssl)
+{
+    if (ssl)
+        return ssl->VerifyDecryptCtx;
+
+    return NULL;
+}
+#endif /* HAVE_ENCRYPT_THEN_MAC !WOLFSSL_AEAD_ONLY */
+
+
 
 const byte* wolfSSL_GetClientWriteKey(WOLFSSL* ssl)
 {
@@ -2866,10 +3443,12 @@
     if (ssl == NULL)
         return BAD_FUNC_ARG;
 
+#ifndef WOLFSSL_AEAD_ONLY
     if (ssl->specs.cipher_type == block)
         return WOLFSSL_BLOCK_TYPE;
     if (ssl->specs.cipher_type == stream)
         return WOLFSSL_STREAM_TYPE;
+#endif
     if (ssl->specs.cipher_type == aead)
         return WOLFSSL_AEAD_TYPE;
 
@@ -2939,7 +3518,7 @@
 
 WOLFSSL_CERT_MANAGER* wolfSSL_CertManagerNew_ex(void* heap)
 {
-    WOLFSSL_CERT_MANAGER* cm = NULL;
+    WOLFSSL_CERT_MANAGER* cm;
 
     WOLFSSL_ENTER("wolfSSL_CertManagerNew");
 
@@ -2995,8 +3574,9 @@
             if (cm->ocsp)
                 FreeOCSP(cm->ocsp, 1);
             XFREE(cm->ocspOverrideURL, cm->heap, DYNAMIC_TYPE_URL);
-        #if defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
-         || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
+        #if !defined(NO_WOLFSSL_SERVER) && \
+            (defined(HAVE_CERTIFICATE_STATUS_REQUEST) || \
+             defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2))
             if (cm->ocsp_stapling)
                 FreeOCSP(cm->ocsp_stapling, 1);
         #endif
@@ -3014,6 +3594,201 @@
 
 }
 
+#if defined(OPENSSL_EXTRA) && !defined(NO_FILESYSTEM)
+#if defined(WOLFSSL_SIGNER_DER_CERT)
+/******************************************************************************
+* wolfSSL_CertManagerGetCerts - retrieve stack of X509 certificates in a
+* certificate manager (CM).
+*
+* RETURNS:
+* returns stack of X509 certs on success, otherwise returns a NULL.
+*/
+WOLFSSL_STACK* wolfSSL_CertManagerGetCerts(WOLFSSL_CERT_MANAGER* cm)
+{
+    WOLFSSL_STACK* sk = NULL;
+    Signer* signers = NULL;
+    word32  row = 0;
+    DecodedCert* dCert = NULL;
+    WOLFSSL_X509* x509 = NULL;
+    int found = 0;
+
+    if (cm == NULL)
+        return NULL;
+
+    sk = wolfSSL_sk_X509_new();
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    if (wc_LockMutex(&cm->caLock) != 0) {
+        goto error_init;
+    }
+
+    for (row = 0; row < CA_TABLE_SIZE; row++) {
+        signers = cm->caTable[row];
+        while (signers && signers->derCert && signers->derCert->buffer) {
+
+            dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), cm->heap,
+                                          DYNAMIC_TYPE_DCERT);
+            if (dCert == NULL) {
+                goto error;
+            }
+
+            XMEMSET(dCert, 0, sizeof(DecodedCert));
+
+            InitDecodedCert(dCert, signers->derCert->buffer,
+                            signers->derCert->length, cm->heap);
+
+            /* Parse Certificate */
+            if (ParseCert(dCert, CERT_TYPE, NO_VERIFY, cm)) {
+                goto error;
+            }
+
+            x509 = (WOLFSSL_X509*)XMALLOC(sizeof(WOLFSSL_X509), cm->heap,
+                    DYNAMIC_TYPE_X509);
+
+            if (x509 == NULL) {
+                goto error;
+            }
+
+            InitX509(x509, 1, NULL);
+
+            if (CopyDecodedToX509(x509, dCert) == 0) {
+
+                if (wolfSSL_sk_X509_push(sk, x509) != SSL_SUCCESS) {
+                    WOLFSSL_MSG("Unable to load x509 into stack");
+                    FreeX509(x509);
+                    XFREE(x509, cm->heap, DYNAMIC_TYPE_X509);
+                    goto error;
+                }
+            }
+            else {
+                goto error;
+            }
+
+            found = 1;
+
+            signers = signers->next;
+
+            FreeDecodedCert(dCert);
+            XFREE(dCert, cm->heap, DYNAMIC_TYPE_DCERT);
+            dCert = NULL;
+        }
+    }
+    wc_UnLockMutex(&cm->caLock);
+
+    if (!found) {
+       goto error_init;
+    }
+
+    return sk;
+
+error:
+    wc_UnLockMutex(&cm->caLock);
+
+error_init:
+
+    if (dCert) {
+        FreeDecodedCert(dCert);
+        XFREE(dCert, cm->heap, DYNAMIC_TYPE_DCERT);
+    }
+
+    if (sk)
+        wolfSSL_sk_X509_free(sk);
+
+    return NULL;
+}
+#endif /* WOLFSSL_SIGNER_DER_CERT */
+
+/******************************************************************************
+* wolfSSL_X509_STORE_GetCerts - retrieve stack of X509 in a certificate store ctx
+*
+* This API can be used in SSL verify callback function to view cert chain
+* See examples/client/client.c and myVerify() function in test.h
+*
+* RETURNS:
+* returns stack of X509 certs on success, otherwise returns a NULL.
+*/
+WOLFSSL_STACK* wolfSSL_X509_STORE_GetCerts(WOLFSSL_X509_STORE_CTX* s)
+{
+    int  certIdx = 0;
+    WOLFSSL_BUFFER_INFO* cert = NULL;
+    DecodedCert* dCert = NULL;
+    WOLFSSL_X509* x509 = NULL;
+    WOLFSSL_STACK* sk = NULL;
+    int found = 0;
+
+    if (s == NULL) {
+        return NULL;
+    }
+
+    sk = wolfSSL_sk_X509_new();
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    for (certIdx = s->totalCerts - 1; certIdx >= 0; certIdx--) {
+        /* get certificate buffer */
+        cert = &s->certs[certIdx];
+
+        dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL, DYNAMIC_TYPE_DCERT);
+
+        if (dCert == NULL) {
+            goto error;
+        }
+        XMEMSET(dCert, 0, sizeof(DecodedCert));
+
+        InitDecodedCert(dCert, cert->buffer, cert->length, NULL);
+
+        /* Parse Certificate */
+        if (ParseCert(dCert, CERT_TYPE, NO_VERIFY, NULL)){
+            goto error;
+        }
+        x509 = wolfSSL_X509_new();
+
+        if (x509 == NULL) {
+            goto error;
+        }
+        InitX509(x509, 1, NULL);
+
+        if (CopyDecodedToX509(x509, dCert) == 0) {
+
+            if (wolfSSL_sk_X509_push(sk, x509) != SSL_SUCCESS) {
+                WOLFSSL_MSG("Unable to load x509 into stack");
+                wolfSSL_X509_free(x509);
+                goto error;
+            }
+        }
+        else {
+            goto error;
+        }
+        found = 1;
+
+        FreeDecodedCert(dCert);
+        XFREE(dCert, NULL, DYNAMIC_TYPE_DCERT);
+        dCert = NULL;
+    }
+
+    if (!found) {
+        wolfSSL_sk_X509_free(sk);
+        sk = NULL;
+    }
+    return sk;
+
+error:
+    if (dCert) {
+        FreeDecodedCert(dCert);
+        XFREE(dCert, NULL, DYNAMIC_TYPE_DCERT);
+    }
+
+    if (sk)
+        wolfSSL_sk_X509_free(sk);
+
+    return NULL;
+}
+#endif /* OPENSSL_EXTRA && !NO_FILESYSTEM */
 
 /* Unload the CA signer list */
 int wolfSSL_CertManagerUnloadCAs(WOLFSSL_CERT_MANAGER* cm)
@@ -3026,7 +3801,7 @@
     if (wc_LockMutex(&cm->caLock) != 0)
         return BAD_MUTEX_E;
 
-    FreeSignerTable(cm->caTable, CA_TABLE_SIZE, NULL);
+    FreeSignerTable(cm->caTable, CA_TABLE_SIZE, cm->heap);
 
     wc_UnLockMutex(&cm->caLock);
 
@@ -3046,7 +3821,7 @@
     if (wc_LockMutex(&cm->tpLock) != 0)
         return BAD_MUTEX_E;
 
-    FreeTrustedPeerTable(cm->tpTable, TP_TABLE_SIZE, NULL);
+    FreeTrustedPeerTable(cm->tpTable, TP_TABLE_SIZE, cm->heap);
 
     wc_UnLockMutex(&cm->tpLock);
 
@@ -3057,340 +3832,6 @@
 
 #endif /* NO_CERTS */
 
-#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) || \
-    defined(HAVE_WEBSERVER)
-
-static const struct cipher{
-        unsigned char type;
-        const char *name;
-} cipher_tbl[] = {
-
-#ifndef NO_AES
-    #ifdef WOLFSSL_AES_128
-    {AES_128_CBC_TYPE, "AES-128-CBC"},
-    #endif
-    #ifdef WOLFSSL_AES_192
-    {AES_192_CBC_TYPE, "AES-192-CBC"},
-    #endif
-    #ifdef WOLFSSL_AES_256
-    {AES_256_CBC_TYPE, "AES-256-CBC"},
-    #endif
-#if defined(OPENSSL_EXTRA)
-    #ifdef WOLFSSL_AES_128
-        {AES_128_CTR_TYPE, "AES-128-CTR"},
-    #endif
-    #ifdef WOLFSSL_AES_192
-        {AES_192_CTR_TYPE, "AES-192-CTR"},
-    #endif
-    #ifdef WOLFSSL_AES_256
-        {AES_256_CTR_TYPE, "AES-256-CTR"},
-    #endif
-
-    #ifdef WOLFSSL_AES_128
-        {AES_128_ECB_TYPE, "AES-128-ECB"},
-    #endif
-    #ifdef WOLFSSL_AES_192
-        {AES_192_ECB_TYPE, "AES-192-ECB"},
-    #endif
-    #ifdef WOLFSSL_AES_256
-        {AES_256_ECB_TYPE, "AES-256-ECB"},
-    #endif
-#endif
-
-#endif
-
-#ifndef NO_DES3
-    {DES_CBC_TYPE, "DES-CBC"},
-    {DES_ECB_TYPE, "DES-ECB"},
-
-    {DES_EDE3_CBC_TYPE, "DES-EDE3-CBC"},
-    {DES_EDE3_ECB_TYPE, "DES-EDE3-ECB"},
-#endif
-
-#ifndef NO_RC4
-    {ARC4_TYPE, "ARC4"},
-#endif
-
-#ifdef HAVE_IDEA
-    {IDEA_CBC_TYPE, "IDEA-CBC"},
-#endif
-    { 0, NULL}
-};
-
-const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbyname(const char *name)
-{
-
-    static const struct alias {
-        const char *name;
-        const char *alias;
-    } alias_tbl[] =
-    {
-#ifndef NO_DES3
-        {"DES-CBC", "DES"},
-        {"DES-CBC", "des"},
-        {"DES-ECB", "DES-ECB"},
-        {"DES-ECB", "des-ecb"},
-        {"DES-EDE3-CBC", "DES3"},
-        {"DES-EDE3-CBC", "des3"},
-        {"DES-EDE3-ECB", "DES-EDE3"},
-        {"DES-EDE3-ECB", "des-ede3"},
-        {"DES-EDE3-ECB", "des-ede3-ecb"},
-#endif
-#ifdef HAVE_IDEA
-        {"IDEA-CBC", "IDEA"},
-        {"IDEA-CBC", "idea"},
-#endif
-#ifndef NO_AES
-    #ifdef HAVE_AES_CBC
-        #ifdef WOLFSSL_AES_128
-        {"AES-128-CBC", "AES128-CBC"},
-        {"AES-128-CBC", "aes128-cbc"},
-        #endif
-        #ifdef WOLFSSL_AES_192
-        {"AES-192-CBC", "AES192-CBC"},
-        {"AES-192-CBC", "aes192-cbc"},
-        #endif
-        #ifdef WOLFSSL_AES_256
-        {"AES-256-CBC", "AES256-CBC"},
-        {"AES-256-CBC", "aes256-cbc"},
-        #endif
-    #endif
-    #ifdef WOLFSSL_AES_128
-        {"AES-128-ECB", "AES128-ECB"},
-        {"AES-128-ECB", "aes128-ecb"},
-    #endif
-    #ifdef WOLFSSL_AES_192
-        {"AES-192-ECB", "AES192-ECB"},
-        {"AES-192-ECB", "aes192-ecb"},
-    #endif
-    #ifdef WOLFSSL_AES_256
-        {"AES-256-ECB", "AES256-ECB"},
-        {"AES-256-EBC", "aes256-ecb"},
-    #endif
-#endif
-#ifndef NO_RC4
-        {"ARC4", "RC4"},
-#endif
-        { NULL, NULL}
-    };
-
-    const struct cipher *ent;
-    const struct alias  *al;
-
-    WOLFSSL_ENTER("EVP_get_cipherbyname");
-
-    for( al = alias_tbl; al->name != NULL; al++)
-        if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) {
-            name = al->name;
-            break;
-        }
-
-    for( ent = cipher_tbl; ent->name != NULL; ent++)
-        if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) {
-            return (WOLFSSL_EVP_CIPHER *)ent->name;
-        }
-
-    return NULL;
-}
-
-/*
- * return an EVP_CIPHER structure when cipher NID is passed.
- *
- * id  cipher NID
- *
- * retrun WOLFSSL_EVP_CIPHER
-*/
-const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbynid(int id)
-{
-    WOLFSSL_ENTER("EVP_get_cipherbynid");
-
-    switch(id) {
-
-#if defined(OPENSSL_EXTRA)
-#ifndef NO_AES
-    #ifdef HAVE_AES_CBC
-        #ifdef WOLFSSL_AES_128
-        case NID_aes_128_cbc:
-            return wolfSSL_EVP_aes_128_cbc();
-        #endif
-        #ifdef WOLFSSL_AES_192
-        case NID_aes_192_cbc:
-            return wolfSSL_EVP_aes_192_cbc();
-        #endif
-        #ifdef WOLFSSL_AES_256
-        case NID_aes_256_cbc:
-            return wolfSSL_EVP_aes_256_cbc();
-        #endif
-    #endif
-    #ifdef WOLFSSL_AES_COUNTER
-        #ifdef WOLFSSL_AES_128
-        case NID_aes_128_ctr:
-            return wolfSSL_EVP_aes_128_ctr();
-        #endif
-        #ifdef WOLFSSL_AES_192
-        case NID_aes_192_ctr:
-            return wolfSSL_EVP_aes_192_ctr();
-        #endif
-        #ifdef WOLFSSL_AES_256
-        case NID_aes_256_ctr:
-            return wolfSSL_EVP_aes_256_ctr();
-        #endif
-    #endif /* WOLFSSL_AES_COUNTER */
-    #ifdef HAVE_AES_ECB
-        #ifdef WOLFSSL_AES_128
-        case NID_aes_128_ecb:
-            return wolfSSL_EVP_aes_128_ecb();
-        #endif
-        #ifdef WOLFSSL_AES_192
-        case NID_aes_192_ecb:
-            return wolfSSL_EVP_aes_192_ecb();
-        #endif
-        #ifdef WOLFSSL_AES_256
-        case NID_aes_256_ecb:
-            return wolfSSL_EVP_aes_256_ecb();
-        #endif
-    #endif /* HAVE_AES_ECB */
-#endif
-
-#ifndef NO_DES3
-        case NID_des_cbc:
-            return wolfSSL_EVP_des_cbc();
-#ifdef WOLFSSL_DES_ECB
-        case NID_des_ecb:
-            return wolfSSL_EVP_des_ecb();
-#endif
-        case NID_des_ede3_cbc:
-            return wolfSSL_EVP_des_ede3_cbc();
-#ifdef WOLFSSL_DES_ECB
-        case NID_des_ede3_ecb:
-            return wolfSSL_EVP_des_ede3_ecb();
-#endif
-#endif /*NO_DES3*/
-
-#ifdef HAVE_IDEA
-        case NID_idea_cbc:
-            return wolfSSL_EVP_idea_cbc();
-#endif
-#endif /*OPENSSL_EXTRA*/
-
-        default:
-            WOLFSSL_MSG("Bad cipher id value");
-    }
-
-    return NULL;
-}
-
-#ifndef NO_AES
-    #ifdef HAVE_AES_CBC
-    #ifdef WOLFSSL_AES_128
-        static char *EVP_AES_128_CBC;
-    #endif
-    #ifdef WOLFSSL_AES_192
-        static char *EVP_AES_192_CBC;
-    #endif
-    #ifdef WOLFSSL_AES_256
-        static char *EVP_AES_256_CBC;
-    #endif
-    #endif /* HAVE_AES_CBC */
-#if defined(OPENSSL_EXTRA)
-    #ifdef WOLFSSL_AES_128
-    static char *EVP_AES_128_CTR;
-    #endif
-    #ifdef WOLFSSL_AES_192
-    static char *EVP_AES_192_CTR;
-    #endif
-    #ifdef WOLFSSL_AES_256
-    static char *EVP_AES_256_CTR;
-    #endif
-
-    #ifdef WOLFSSL_AES_128
-    static char *EVP_AES_128_ECB;
-    #endif
-    #ifdef WOLFSSL_AES_192
-    static char *EVP_AES_192_ECB;
-    #endif
-    #ifdef WOLFSSL_AES_256
-    static char *EVP_AES_256_ECB;
-    #endif
-    static const int  EVP_AES_SIZE = 11;
-#endif
-#endif
-
-#ifndef NO_DES3
-static char *EVP_DES_CBC;
-static char *EVP_DES_ECB;
-
-static char *EVP_DES_EDE3_CBC;
-static char *EVP_DES_EDE3_ECB;
-
-#ifdef OPENSSL_EXTRA
-static const int  EVP_DES_SIZE = 7;
-static const int  EVP_DES_EDE3_SIZE = 12;
-#endif
-
-#endif
-
-#ifdef HAVE_IDEA
-static char *EVP_IDEA_CBC;
-#if defined(OPENSSL_EXTRA)
-static const int  EVP_IDEA_SIZE = 8;
-#endif
-#endif
-
-void wolfSSL_EVP_init(void)
-{
-#ifndef NO_AES
-    #ifdef HAVE_AES_CBC
-        #ifdef WOLFSSL_AES_128
-        EVP_AES_128_CBC = (char *)EVP_get_cipherbyname("AES-128-CBC");
-        #endif
-        #ifdef WOLFSSL_AES_192
-        EVP_AES_192_CBC = (char *)EVP_get_cipherbyname("AES-192-CBC");
-        #endif
-        #ifdef WOLFSSL_AES_256
-        EVP_AES_256_CBC = (char *)EVP_get_cipherbyname("AES-256-CBC");
-        #endif
-    #endif /* HAVE_AES_CBC */
-
-#if defined(OPENSSL_EXTRA)
-        #ifdef WOLFSSL_AES_128
-        EVP_AES_128_CTR = (char *)EVP_get_cipherbyname("AES-128-CTR");
-        #endif
-        #ifdef WOLFSSL_AES_192
-        EVP_AES_192_CTR = (char *)EVP_get_cipherbyname("AES-192-CTR");
-        #endif
-        #ifdef WOLFSSL_AES_256
-        EVP_AES_256_CTR = (char *)EVP_get_cipherbyname("AES-256-CTR");
-        #endif
-
-        #ifdef WOLFSSL_AES_128
-        EVP_AES_128_ECB = (char *)EVP_get_cipherbyname("AES-128-ECB");
-        #endif
-        #ifdef WOLFSSL_AES_192
-        EVP_AES_192_ECB = (char *)EVP_get_cipherbyname("AES-192-ECB");
-        #endif
-        #ifdef WOLFSSL_AES_256
-        EVP_AES_256_ECB = (char *)EVP_get_cipherbyname("AES-256-ECB");
-        #endif
-#endif
-#endif
-
-#ifndef NO_DES3
-    EVP_DES_CBC = (char *)EVP_get_cipherbyname("DES-CBC");
-    EVP_DES_ECB = (char *)EVP_get_cipherbyname("DES-ECB");
-
-    EVP_DES_EDE3_CBC = (char *)EVP_get_cipherbyname("DES-EDE3-CBC");
-    EVP_DES_EDE3_ECB = (char *)EVP_get_cipherbyname("DES-EDE3-ECB");
-#endif
-
-#ifdef HAVE_IDEA
-    EVP_IDEA_CBC = (char *)EVP_get_cipherbyname("IDEA-CBC");
-#endif
-}
-
-#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL || HAVE_WEBSERVER */
-
-
 #if !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM)
 
 void wolfSSL_ERR_print_errors_fp(XFILE fp, int err)
@@ -3407,10 +3848,16 @@
 {
     wc_ERR_print_errors_fp(fp);
 }
-#endif
-#endif
-
-
+
+void wolfSSL_ERR_print_errors_cb (int (*cb)(const char *str, size_t len,
+                                            void *u), void *u)
+{
+    wc_ERR_print_errors_cb(cb, u);
+}
+#endif
+#endif
+
+WOLFSSL_ABI
 int wolfSSL_pending(WOLFSSL* ssl)
 {
     WOLFSSL_ENTER("SSL_pending");
@@ -3479,9 +3926,11 @@
 
 #ifndef NO_TLS
     #ifndef NO_OLD_TLS
+        #ifdef WOLFSSL_ALLOW_TLSV10
         case WOLFSSL_TLSV1:
             *minVersion = TLSv1_MINOR;
             break;
+        #endif
 
         case WOLFSSL_TLSV1_1:
             *minVersion = TLSv1_1_MINOR;
@@ -3509,6 +3958,7 @@
 
 
 /* Set minimum downgrade version allowed, WOLFSSL_SUCCESS on ok */
+WOLFSSL_ABI
 int wolfSSL_CTX_SetMinVersion(WOLFSSL_CTX* ctx, int version)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_SetMinVersion");
@@ -3637,7 +4087,7 @@
 /* Make a work from the front of random hash */
 static WC_INLINE word32 MakeWordFromHash(const byte* hashID)
 {
-    return ((word32)hashID[0] << 24) | (hashID[1] << 16) |
+    return ((word32)hashID[0] << 24) | ((word32)hashID[1] << 16) |
         (hashID[2] <<  8) | hashID[3];
 }
 
@@ -3869,7 +4319,7 @@
 {
     int ret, row;
     TrustedPeerCert* peerCert;
-    DecodedCert* cert = NULL;
+    DecodedCert* cert;
     DerBuffer*   der = *pDer;
     byte* subjectHash = NULL;
 
@@ -3882,6 +4332,7 @@
 
     InitDecodedCert(cert, der->buffer, der->length, cm->heap);
     if ((ret = ParseCert(cert, TRUSTED_PEER_TYPE, verify, cm)) != 0) {
+        FreeDecodedCert(cert);
         XFREE(cert, NULL, DYNAMIC_TYPE_DCERT);
         return ret;
     }
@@ -3916,6 +4367,7 @@
 
     if (AlreadyTrustedPeer(cm, subjectHash)) {
         WOLFSSL_MSG("\tAlready have this CA, not adding again");
+        FreeTrustedPeer(peerCert, cm->heap);
         (void)ret;
     }
     else {
@@ -4009,11 +4461,18 @@
 
     WOLFSSL_MSG("Adding a CA");
 
+    if (cm == NULL) {
+        FreeDer(pDer);
+        return BAD_FUNC_ARG;
+    }
+
 #ifdef WOLFSSL_SMALL_STACK
     cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
                                  DYNAMIC_TYPE_DCERT);
-    if (cert == NULL)
+    if (cert == NULL) {
+        FreeDer(pDer);
         return MEMORY_E;
+    }
 #endif
 
     InitDecodedCert(cert, der->buffer, der->length, cm->heap);
@@ -4056,6 +4515,15 @@
                 }
                 break;
             #endif /* HAVE_ED25519 */
+            #ifdef HAVE_ED448
+            case ED448k:
+                if (cm->minEccKeySz < 0 ||
+                                     ED448_KEY_SIZE < (word16)cm->minEccKeySz) {
+                    ret = ECC_KEY_SIZE_E;
+                    WOLFSSL_MSG("\tCA ECC key size error");
+                }
+                break;
+            #endif /* HAVE_ED448 */
 
             default:
                 WOLFSSL_MSG("\tNo key size check done on CA");
@@ -4069,7 +4537,7 @@
     }
 #ifndef ALLOW_INVALID_CERTSIGN
     else if (ret == 0 && cert->isCA == 1 && type != WOLFSSL_USER_CA &&
-             (cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) == 0) {
+        !cert->selfSigned && (cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) == 0) {
         /* Intermediate CA certs are required to have the keyCertSign
         * extension set. User loaded root certs are not. */
         WOLFSSL_MSG("\tDoesn't have key usage certificate signing");
@@ -4103,7 +4571,9 @@
             signer->name           = cert->subjectCN;
         }
         signer->pathLength     = cert->pathLength;
+        signer->maxPathLen     = cert->maxPathLen;
         signer->pathLengthSet  = cert->pathLengthSet;
+        signer->selfSigned     = cert->selfSigned;
     #ifndef IGNORE_NAME_CONSTRAINTS
         signer->permittedNames = cert->permittedNames;
         signer->excludedNames  = cert->excludedNames;
@@ -4114,6 +4584,10 @@
     #endif
         XMEMCPY(signer->subjectNameHash, cert->subjectHash,
                 SIGNER_DIGEST_SIZE);
+    #ifdef HAVE_OCSP
+        XMEMCPY(signer->subjectKeyHash, cert->subjectKeyHash,
+                KEYID_SIZE);
+    #endif
         signer->keyUsage = cert->extKeyUsageSet ? cert->extKeyUsage
                                                 : 0xFFFF;
         signer->next    = NULL; /* If Key Usage not set, all uses valid. */
@@ -4143,7 +4617,26 @@
             FreeSigner(signer, cm->heap);
         }
     }
-
+#if defined(WOLFSSL_RENESAS_TSIP_TLS)
+    /* Verify CA by TSIP so that generated tsip key is going to be able to */
+    /* be used for peer's cert verification                                */
+    /* TSIP is only able to handle USER CA, and only one CA.               */
+    /* Therefore, it doesn't need to call TSIP again if there is already   */
+    /* verified CA.                                                        */
+    if ( ret == 0 && signer != NULL ) {
+        signer->cm_idx = row;
+        if (type == WOLFSSL_USER_CA && tsip_rootCAverified() == 0 ) {
+            if ((ret = tsip_tls_RootCertVerify(cert->source, cert->maxIdx,
+                 cert->sigCtx.pubkey_n_start, cert->sigCtx.pubkey_n_len - 1,
+                 cert->sigCtx.pubkey_e_start, cert->sigCtx.pubkey_e_len - 1,
+                 row/* cm index */))
+                != 0)
+                WOLFSSL_MSG("tsip_tls_RootCertVerify() failed");
+            else
+                WOLFSSL_MSG("tsip_tls_RootCertVerify() succeed");
+        }
+    }
+#endif
     WOLFSSL_MSG("\tFreeing Parsed CA");
     FreeDecodedCert(cert);
 #ifdef WOLFSSL_SMALL_STACK
@@ -4166,22 +4659,29 @@
     /* basic config gives a cache with 33 sessions, adequate for clients and
        embedded servers
 
-       MEDIUM_SESSION_CACHE allows 1055 sessions, adequate for servers that
-       aren't under heavy load, basically allows 200 new sessions per minute
-
-       BIG_SESSION_CACHE yields 20,027 sessions
+       TITAN_SESSION_CACHE allows just over 2 million sessions, for servers
+       with titanic amounts of memory with long session ID timeouts and high
+       levels of traffic.
 
        HUGE_SESSION_CACHE yields 65,791 sessions, for servers under heavy load,
        allows over 13,000 new sessions per minute or over 200 new sessions per
        second
 
+       BIG_SESSION_CACHE yields 20,027 sessions
+
+       MEDIUM_SESSION_CACHE allows 1055 sessions, adequate for servers that
+       aren't under heavy load, basically allows 200 new sessions per minute
+
        SMALL_SESSION_CACHE only stores 6 sessions, good for embedded clients
        or systems where the default of nearly 3kB is too much RAM, this define
        uses less than 500 bytes RAM
 
        default SESSION_CACHE stores 33 sessions (no XXX_SESSION_CACHE defined)
     */
-    #ifdef HUGE_SESSION_CACHE
+    #if defined(TITAN_SESSION_CACHE)
+        #define SESSIONS_PER_ROW 31
+        #define SESSION_ROWS 64937
+    #elif defined(HUGE_SESSION_CACHE)
         #define SESSIONS_PER_ROW 11
         #define SESSION_ROWS 5981
     #elif defined(BIG_SESSION_CACHE)
@@ -4204,13 +4704,13 @@
         WOLFSSL_SESSION Sessions[SESSIONS_PER_ROW];
     } SessionRow;
 
-    static SessionRow SessionCache[SESSION_ROWS];
+    static WOLFSSL_GLOBAL SessionRow SessionCache[SESSION_ROWS];
 
     #if defined(WOLFSSL_SESSION_STATS) && defined(WOLFSSL_PEAK_SESSIONS)
-        static word32 PeakSessions;
-    #endif
-
-    static wolfSSL_Mutex session_mutex;   /* SessionCache mutex */
+        static WOLFSSL_GLOBAL word32 PeakSessions;
+    #endif
+
+    static WOLFSSL_GLOBAL wolfSSL_Mutex session_mutex; /* SessionCache mutex */
 
     #ifndef NO_CLIENT_CACHE
 
@@ -4225,12 +4725,14 @@
             ClientSession Clients[SESSIONS_PER_ROW];
         } ClientRow;
 
-        static ClientRow ClientCache[SESSION_ROWS];  /* Client Cache */
+        static WOLFSSL_GLOBAL ClientRow ClientCache[SESSION_ROWS];
+                                                     /* Client Cache */
                                                      /* uses session mutex */
     #endif  /* NO_CLIENT_CACHE */
 
 #endif /* NO_SESSION_CACHE */
 
+WOLFSSL_ABI
 int wolfSSL_Init(void)
 {
     WOLFSSL_ENTER("wolfSSL_Init");
@@ -4241,6 +4743,14 @@
             WOLFSSL_MSG("Bad wolfCrypt Init");
             return WC_INIT_E;
         }
+
+#ifdef OPENSSL_EXTRA
+        if (wolfSSL_RAND_seed(NULL, 0) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("wolfSSL_RAND_Seed failed");
+            return WC_INIT_E;
+        }
+#endif
+
 #ifndef NO_SESSION_CACHE
         if (wc_InitMutex(&session_mutex) != 0) {
             WOLFSSL_MSG("Bad Init Mutex session");
@@ -4271,7 +4781,7 @@
 /* process user cert chain to pass during the handshake */
 static int ProcessUserChain(WOLFSSL_CTX* ctx, const unsigned char* buff,
                          long sz, int format, int type, WOLFSSL* ssl,
-                         long* used, EncryptedInfo* info)
+                         long* used, EncryptedInfo* info, int verify)
 {
     int ret = 0;
     void* heap = wolfSSL_CTX_GetHeap(ctx, ssl);
@@ -4279,8 +4789,13 @@
     int cnt = 0;
 #endif
 
+    if ((type == CA_TYPE) && (ctx == NULL)) {
+        WOLFSSL_MSG("Need context for CA load");
+        return BAD_FUNC_ARG;
+    }
+
     /* we may have a user cert chain, try to consume */
-    if (type == CERT_TYPE && info->consumed < sz) {
+    if ((type == CERT_TYPE || type == CA_TYPE) && (info->consumed < sz)) {
     #ifdef WOLFSSL_SMALL_STACK
         byte   staticBuffer[1];                 /* force heap usage */
     #else
@@ -4324,7 +4839,8 @@
                 if (format == WOLFSSL_FILETYPE_ASN1) {
                     /* get length of der (read sequence) */
                     word32 inOutIdx = 0;
-                    if (GetSequence(buff + consumed, &inOutIdx, &length, remain) < 0) {
+                    if (GetSequence(buff + consumed, &inOutIdx, &length,
+                            remain) < 0) {
                         ret = ASN_NO_PEM_HEADER;
                     }
                     length += inOutIdx; /* include leading sequence */
@@ -4355,7 +4871,15 @@
                     if (used)
                         *used += info->consumed;
                 }
-            }
+
+                /* add CA's to certificate manager */
+                if (type == CA_TYPE) {
+                    /* verify CA unless user set to no verify */
+                    ret = AddCA(ctx->cm, &part, WOLFSSL_USER_CA, verify);
+                    gotOne = 0; /* don't exit loop for CA type */
+                }
+            }
+
             FreeDer(&part);
 
             if (ret == ASN_NO_PEM_HEADER && gotOne) {
@@ -4382,21 +4906,22 @@
                 }
                 ret = AllocDer(&ssl->buffers.certChain, idx, type, heap);
                 if (ret == 0) {
-                    XMEMCPY(ssl->buffers.certChain->buffer, chainBuffer, idx);
+                    XMEMCPY(ssl->buffers.certChain->buffer, chainBuffer,
+                            idx);
                     ssl->buffers.weOwnCertChain = 1;
                 }
-#ifdef WOLFSSL_TLS13
+            #ifdef WOLFSSL_TLS13
                 ssl->buffers.certChainCnt = cnt;
-#endif
+            #endif
             } else if (ctx) {
                 FreeDer(&ctx->certChain);
                 ret = AllocDer(&ctx->certChain, idx, type, heap);
                 if (ret == 0) {
                     XMEMCPY(ctx->certChain->buffer, chainBuffer, idx);
                 }
-#ifdef WOLFSSL_TLS13
+            #ifdef WOLFSSL_TLS13
                 ctx->certChainCnt = cnt;
-#endif
+            #endif
             }
         }
 
@@ -4406,29 +4931,278 @@
 
     return ret;
 }
+
+static int ProcessBufferTryDecode(WOLFSSL_CTX* ctx, WOLFSSL* ssl, DerBuffer* der,
+    int* keySz, word32* idx, int* resetSuites, int* keyFormat, void* heap, int devId)
+{
+    int ret = 0;
+
+    (void)heap;
+    (void)devId;
+
+    if (ctx == NULL && ssl == NULL)
+        ret = BAD_FUNC_ARG;
+    if (!der || !keySz || !idx || !resetSuites || !keyFormat)
+        ret = BAD_FUNC_ARG;
+
+#ifndef NO_RSA
+    if (ret == 0 && (*keyFormat == 0 || *keyFormat == RSAk)) {
+        /* make sure RSA key can be used */
+    #ifdef WOLFSSL_SMALL_STACK
+        RsaKey* key;
+    #else
+        RsaKey  key[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        key = (RsaKey*)XMALLOC(sizeof(RsaKey), heap, DYNAMIC_TYPE_RSA);
+        if (key == NULL)
+            return MEMORY_E;
+    #endif
+
+        ret = wc_InitRsaKey_ex(key, heap, devId);
+        if (ret == 0) {
+            *idx = 0;
+            if (wc_RsaPrivateKeyDecode(der->buffer, idx, key, der->length)
+                != 0) {
+            #if !defined(HAVE_ECC) && !defined(HAVE_ED25519) && \
+                                                            !defined(HAVE_ED448)
+                WOLFSSL_MSG("RSA decode failed and ECC/ED25519/ED448 not "
+                            "enabled to try");
+                ret = WOLFSSL_BAD_FILE;
+            #endif
+            }
+            else {
+                /* check that the size of the RSA key is enough */
+                int minRsaSz = ssl ? ssl->options.minRsaKeySz :
+                    ctx->minRsaKeySz;
+                *keySz = wc_RsaEncryptSize((RsaKey*)key);
+                if (*keySz < minRsaSz) {
+                    ret = RSA_KEY_SIZE_E;
+                    WOLFSSL_MSG("Private Key size too small");
+                }
+
+                if (ssl) {
+                    ssl->buffers.keyType = rsa_sa_algo;
+                    ssl->buffers.keySz = *keySz;
+                }
+                else {
+                    ctx->privateKeyType = rsa_sa_algo;
+                    ctx->privateKeySz = *keySz;
+                }
+
+                *keyFormat = RSAk;
+
+                if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
+                    ssl->options.haveStaticECC = 0;
+                    *resetSuites = 1;
+                }
+            }
+
+            wc_FreeRsaKey(key);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(key, heap, DYNAMIC_TYPE_RSA);
+    #endif
+    }
+#endif
+#ifdef HAVE_ECC
+    if (ret == 0 && (*keyFormat == 0 || *keyFormat == ECDSAk)) {
+        /* make sure ECC key can be used */
+    #ifdef WOLFSSL_SMALL_STACK
+        ecc_key* key;
+    #else
+        ecc_key  key[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        key = (ecc_key*)XMALLOC(sizeof(ecc_key), heap, DYNAMIC_TYPE_ECC);
+        if (key == NULL)
+            return MEMORY_E;
+    #endif
+
+        if (wc_ecc_init_ex(key, heap, devId) == 0) {
+            *idx = 0;
+            if (wc_EccPrivateKeyDecode(der->buffer, idx, key,
+                                                        der->length) == 0) {
+                /* check for minimum ECC key size and then free */
+                int minKeySz = ssl ? ssl->options.minEccKeySz :
+                                                        ctx->minEccKeySz;
+                *keySz = wc_ecc_size(key);
+                if (*keySz < minKeySz) {
+                    WOLFSSL_MSG("ECC private key too small");
+                    ret = ECC_KEY_SIZE_E;
+                }
+
+                *keyFormat = ECDSAk;
+                if (ssl) {
+                    ssl->options.haveStaticECC = 1;
+                    ssl->buffers.keyType = ecc_dsa_sa_algo;
+                    ssl->buffers.keySz = *keySz;
+                }
+                else {
+                    ctx->haveStaticECC = 1;
+                    ctx->privateKeyType = ecc_dsa_sa_algo;
+                    ctx->privateKeySz = *keySz;
+                }
+
+                if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
+                    *resetSuites = 1;
+                }
+            }
+
+            wc_ecc_free(key);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(key, heap, DYNAMIC_TYPE_ECC);
+    #endif
+    }
+#endif /* HAVE_ECC */
+#ifdef HAVE_ED25519
+    if (ret == 0 && (*keyFormat == 0 || *keyFormat == ED25519k)) {
+        /* make sure Ed25519 key can be used */
+    #ifdef WOLFSSL_SMALL_STACK
+        ed25519_key* key;
+    #else
+        ed25519_key  key[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        key = (ed25519_key*)XMALLOC(sizeof(ed25519_key), heap,
+                                                      DYNAMIC_TYPE_ED25519);
+        if (key == NULL)
+            return MEMORY_E;
+    #endif
+
+        ret = wc_ed25519_init(key);
+        if (ret == 0) {
+            *idx = 0;
+            if (wc_Ed25519PrivateKeyDecode(der->buffer, idx, key,
+                                                        der->length) == 0) {
+                /* check for minimum key size and then free */
+                int minKeySz = ssl ? ssl->options.minEccKeySz :
+                                                           ctx->minEccKeySz;
+                *keySz = ED25519_KEY_SIZE;
+                if (*keySz < minKeySz) {
+                    WOLFSSL_MSG("ED25519 private key too small");
+                    ret = ECC_KEY_SIZE_E;
+                }
+                if (ret == 0) {
+                    if (ssl) {
+                        ssl->buffers.keyType = ed25519_sa_algo;
+                        ssl->buffers.keySz = *keySz;
+                    }
+                    else if (ctx) {
+                        ctx->privateKeyType = ed25519_sa_algo;
+                        ctx->privateKeySz = *keySz;
+                    }
+
+                    *keyFormat = ED25519k;
+                    if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
+                        *resetSuites = 1;
+                    }
+                }
+            }
+
+            wc_ed25519_free(key);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(key, heap, DYNAMIC_TYPE_ED25519);
+    #endif
+    }
+#endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+    if (ret == 0 && (*keyFormat == 0 || *keyFormat == ED448k)) {
+        /* make sure Ed448 key can be used */
+    #ifdef WOLFSSL_SMALL_STACK
+        ed448_key* key = NULL;
+    #else
+        ed448_key  key[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        key = (ed448_key*)XMALLOC(sizeof(ed448_key), heap, DYNAMIC_TYPE_ED448);
+        if (key == NULL)
+            return MEMORY_E;
+    #endif
+
+        ret = wc_ed448_init(key);
+        if (ret == 0) {
+            *idx = 0;
+            if (wc_Ed448PrivateKeyDecode(der->buffer, idx, key,
+                                                            der->length) != 0) {
+                ret = WOLFSSL_BAD_FILE;
+            }
+
+            if (ret == 0) {
+                /* check for minimum key size and then free */
+                int minKeySz = ssl ? ssl->options.minEccKeySz :
+                                                               ctx->minEccKeySz;
+                *keySz = ED448_KEY_SIZE;
+                if (*keySz < minKeySz) {
+                    WOLFSSL_MSG("ED448 private key too small");
+                    ret = ECC_KEY_SIZE_E;
+                }
+            }
+            if (ret == 0) {
+                if (ssl) {
+                    ssl->buffers.keyType = ed448_sa_algo;
+                    ssl->buffers.keySz = *keySz;
+                }
+                else if (ctx) {
+                    ctx->privateKeyType = ed448_sa_algo;
+                    ctx->privateKeySz = *keySz;
+                }
+
+                *keyFormat = ED448k;
+                if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
+                    *resetSuites = 1;
+                }
+            }
+
+            wc_ed448_free(key);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(key, heap, DYNAMIC_TYPE_ED448);
+    #endif
+    }
+#endif /* HAVE_ED448 */
+    return ret;
+}
+
 /* process the buffer buff, length sz, into ctx of format and type
    used tracks bytes consumed, userChain specifies a user cert chain
    to pass during the handshake */
 int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                          long sz, int format, int type, WOLFSSL* ssl,
-                         long* used, int userChain)
+                         long* used, int userChain, int verify)
 {
     DerBuffer*    der = NULL;        /* holds DER or RAW (for NTRU) */
     int           ret = 0;
-    int           eccKey = 0;
-    int           ed25519Key = 0;
-    int           rsaKey = 0;
+    int           done = 0;
+    int           keyFormat = 0;
     int           resetSuites = 0;
     void*         heap = wolfSSL_CTX_GetHeap(ctx, ssl);
     int           devId = wolfSSL_CTX_GetDevId(ctx, ssl);
+    word32        idx = 0;
+    int           keySz = 0;
+#if (defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_PWDBASED)) || \
+     defined(HAVE_PKCS8)
+    word32        algId = 0;
+#endif
 #ifdef WOLFSSL_SMALL_STACK
     EncryptedInfo* info = NULL;
 #else
     EncryptedInfo  info[1];
 #endif
 
-    (void)rsaKey;
     (void)devId;
+    (void)idx;
+    (void)keySz;
 
     if (used)
         *used = sz;     /* used bytes default to sz, PEM chain may shorten*/
@@ -4449,7 +5223,7 @@
 #endif
 
     XMEMSET(info, 0, sizeof(EncryptedInfo));
-#ifdef WOLFSSL_ENCRYPTED_KEYS
+#if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_PWDBASED)
     if (ctx) {
         info->passwd_cb       = ctx->passwd_cb;
         info->passwd_userdata = ctx->passwd_userdata;
@@ -4458,7 +5232,7 @@
 
     if (format == WOLFSSL_FILETYPE_PEM) {
     #ifdef WOLFSSL_PEM_TO_DER
-        ret = PemToDer(buff, sz, type, &der, heap, info, &eccKey);
+        ret = PemToDer(buff, sz, type, &der, heap, info, &keyFormat);
     #else
         ret = NOT_COMPILED_IN;
     #endif
@@ -4467,12 +5241,19 @@
         /* ASN1 (DER) or RAW (NTRU) */
         int length = (int)sz;
         if (format == WOLFSSL_FILETYPE_ASN1) {
-            /* get length of der (read sequence) */
+            /* get length of der (read sequence or octet string) */
             word32 inOutIdx = 0;
-            if (GetSequence(buff, &inOutIdx, &length, (word32)sz) < 0) {
+            if (GetSequence(buff, &inOutIdx, &length, (word32)sz) >= 0) {
+                length += inOutIdx; /* include leading sequence */
+            }
+            /* get length using octect string (allowed for private key types) */
+            else if (type == PRIVATEKEY_TYPE &&
+                    GetOctetString(buff, &inOutIdx, &length, (word32)sz) >= 0) {
+                length += inOutIdx; /* include leading oct string */
+            }
+            else {
                 ret = ASN_PARSE_E;
             }
-            length += inOutIdx; /* include leading sequence */
         }
 
         info->consumed = length;
@@ -4482,6 +5263,18 @@
             if (ret == 0) {
                 XMEMCPY(der->buffer, buff, length);
             }
+
+        #ifdef HAVE_PKCS8
+            /* if private key try and remove PKCS8 header */
+            if (type == PRIVATEKEY_TYPE) {
+                if ((ret = ToTraditional_ex(der->buffer, der->length, &algId)) > 0) {
+                    /* Found PKCS8 header */
+                    /* ToTraditional_ex moves buff and returns adjusted length */
+                    der->length = ret;
+                }
+                ret = 0; /* failures should be ignored */
+            }
+        #endif
         }
     }
 
@@ -4491,75 +5284,43 @@
 
     /* process user chain */
     if (ret >= 0) {
-        /* First certificate in chain is loaded into ssl->buffers.certificate.
-         * Remainder are loaded into ssl->buffers.certChain.
-         * Chain should have server cert first, then intermediates, then root.
-         */
+        /* Chain should have server cert first, then intermediates, then root.
+         * First certificate in chain is processed below after ProcessUserChain
+         *   and is loaded into ssl->buffers.certificate.
+         * Remainder are processed using ProcessUserChain and are loaded into
+         *   ssl->buffers.certChain. */
         if (userChain) {
-            ret = ProcessUserChain(ctx, buff, sz, format, type, ssl, used, info);
-        }
-    }
-
-#ifdef WOLFSSL_ENCRYPTED_KEYS
-    /* for WOLFSSL_FILETYPE_PEM, PemToDer manage the decryption if required */
-    if (ret >= 0 && info->set && format != WOLFSSL_FILETYPE_PEM) {
-        /* decrypt */
-        int   passwordSz = NAME_SZ;
-#ifdef WOLFSSL_SMALL_STACK
-        char* password = NULL;
-#else
-        char  password[NAME_SZ];
-#endif
-
-    #ifdef WOLFSSL_SMALL_STACK
-        password = (char*)XMALLOC(passwordSz, heap, DYNAMIC_TYPE_STRING);
-        if (password == NULL)
-            ret = MEMORY_E;
-        else
-    #endif
-        if (info->passwd_cb == NULL) {
-            WOLFSSL_MSG("No password callback set");
-            ret = NO_PASSWORD;
-        }
-        else {
-            ret = info->passwd_cb(password, passwordSz, PEM_PASS_READ,
-                info->passwd_userdata);
-            if (ret >= 0) {
-                passwordSz = ret;
-
-                /* decrypt the key */
-                ret = wc_BufferKeyDecrypt(info, der->buffer, der->length,
-                    (byte*)password, passwordSz, WC_MD5);
-
-                ForceZero(password, passwordSz);
-            }
-        }
-
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(password, heap, DYNAMIC_TYPE_STRING);
-    #endif
-    }
-#endif /* WOLFSSL_ENCRYPTED_KEYS */
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(info, heap, DYNAMIC_TYPE_ENCRYPTEDINFO);
-#endif
+            ret = ProcessUserChain(ctx, buff, sz, format, type, ssl, used, info,
+                                   verify);
+        }
+    }
+
+    /* info is only used for private key with DER or PEM, so free now */
+    if (ret < 0 || type != PRIVATEKEY_TYPE || format == WOLFSSL_FILETYPE_RAW) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(info, heap, DYNAMIC_TYPE_ENCRYPTEDINFO);
+    #endif
+    }
 
     /* check for error */
     if (ret < 0) {
         FreeDer(&der);
-        return ret;
-    }
-
+        done = 1;
+    }
+
+    if (done == 1) {
+        /* No operation, just skip the next section */
+    }
     /* Handle DER owner */
-    if (type == CA_TYPE) {
+    else if (type == CA_TYPE) {
         if (ctx == NULL) {
             WOLFSSL_MSG("Need context for CA load");
             FreeDer(&der);
             return BAD_FUNC_ARG;
         }
         /* verify CA unless user set to no verify */
-        return AddCA(ctx->cm, &der, WOLFSSL_USER_CA, !ctx->verifyNone);
+        ret = AddCA(ctx->cm, &der, WOLFSSL_USER_CA, verify);
+        done = 1;
     }
 #ifdef WOLFSSL_TRUST_PEER_CERT
     else if (type == TRUSTED_PEER_TYPE) {
@@ -4569,7 +5330,8 @@
             return BAD_FUNC_ARG;
         }
         /* add trusted peer cert */
-        return AddTrustedPeer(ctx->cm, &der, !ctx->verifyNone);
+        ret = AddTrustedPeer(ctx->cm, &der, !ctx->verifyNone);
+        done = 1;
     }
 #endif /* WOLFSSL_TRUST_PEER_CERT */
     else if (type == CERT_TYPE) {
@@ -4624,207 +5386,80 @@
         return WOLFSSL_BAD_CERTTYPE;
     }
 
-    if (type == PRIVATEKEY_TYPE && format != WOLFSSL_FILETYPE_RAW) {
-    #ifndef NO_RSA
-        if (!eccKey && !ed25519Key) {
-            /* make sure RSA key can be used */
-            word32 idx = 0;
-        #ifdef WOLFSSL_SMALL_STACK
-            RsaKey* key = NULL;
+    if (done == 1) {
+        /* No operation, just skip the next section */
+    }
+    else if (type == PRIVATEKEY_TYPE && format != WOLFSSL_FILETYPE_RAW) {
+    #if defined(WOLFSSL_ENCRYPTED_KEYS) || defined(HAVE_PKCS8)
+        keyFormat = algId;
+    #endif
+
+        ret = ProcessBufferTryDecode(ctx, ssl, der, &keySz, &idx, &resetSuites,
+                &keyFormat, heap, devId);
+
+    #if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_PWDBASED)
+        /* for WOLFSSL_FILETYPE_PEM, PemToDer manages the decryption */
+        /* If private key type PKCS8 header wasn't already removed (algoId == 0) */
+        if ((ret != 0 || keyFormat == 0)
+            && format != WOLFSSL_FILETYPE_PEM && info->passwd_cb && algId == 0)
+        {
+            int   passwordSz = NAME_SZ;
+        #ifndef WOLFSSL_SMALL_STACK
+            char  password[NAME_SZ];
         #else
-            RsaKey  key[1];
-        #endif
-
-        #ifdef WOLFSSL_SMALL_STACK
-            key = (RsaKey*)XMALLOC(sizeof(RsaKey), heap, DYNAMIC_TYPE_RSA);
-            if (key == NULL)
+            char* password = (char*)XMALLOC(passwordSz, heap, DYNAMIC_TYPE_STRING);
+            if (password == NULL) {
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(info, heap, DYNAMIC_TYPE_ENCRYPTEDINFO);
+            #endif
+                FreeDer(&der);
                 return MEMORY_E;
-        #endif
-
-            ret = wc_InitRsaKey_ex(key, heap, devId);
-            if (ret == 0) {
-                if (wc_RsaPrivateKeyDecode(der->buffer, &idx, key, der->length)
-                    != 0) {
-                #ifdef HAVE_ECC
-                    /* could have DER ECC (or pkcs8 ecc), no easy way to tell */
-                    eccKey = 1;  /* try it next */
-                #elif defined(HAVE_ED25519)
-                    ed25519Key = 1; /* try it next */
-                #else
-                    WOLFSSL_MSG("RSA decode failed and ECC not enabled to try");
-                    ret = WOLFSSL_BAD_FILE;
-                #endif
-                }
-                else {
-                    /* check that the size of the RSA key is enough */
-                    int rsaSz = wc_RsaEncryptSize((RsaKey*)key);
-                    int minRsaSz;
-
-                    minRsaSz = ssl ? ssl->options.minRsaKeySz : ctx->minRsaKeySz;
-                    if (rsaSz < minRsaSz) {
-                        ret = RSA_KEY_SIZE_E;
-                        WOLFSSL_MSG("Private Key size too small");
-                    }
-
-                    if (ssl) {
-                        ssl->buffers.keyType = rsa_sa_algo;
-                        ssl->buffers.keySz = rsaSz;
-                    }
-                    else if(ctx) {
-                        ctx->privateKeyType = rsa_sa_algo;
-                        ctx->privateKeySz = rsaSz;
-                    }
-
-                    rsaKey = 1;
-                    (void)rsaKey;  /* for no ecc builds */
-
-                    if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
-                        ssl->options.haveStaticECC = 0;
-                        resetSuites = 1;
-                    }
-                }
-
-                wc_FreeRsaKey(key);
+            }
+        #endif
+            /* get password */
+            ret = info->passwd_cb(password, passwordSz, PEM_PASS_READ,
+                info->passwd_userdata);
+            if (ret >= 0) {
+                passwordSz = ret;
+
+                /* PKCS8 decrypt */
+                ret = ToTraditionalEnc(der->buffer, der->length,
+                                       password, passwordSz, &algId);
+                if (ret >= 0) {
+                    der->length = ret;
+                }
+                /* ignore failures and try parsing as unencrypted */
+
+                ForceZero(password, passwordSz);
             }
 
         #ifdef WOLFSSL_SMALL_STACK
-            XFREE(key, heap, DYNAMIC_TYPE_RSA);
-        #endif
-
-            if (ret != 0)
-                return ret;
-        }
-    #endif
-    #ifdef HAVE_ECC
-        if (!rsaKey && !ed25519Key) {
-            /* make sure ECC key can be used */
-            word32   idx = 0;
-        #ifdef WOLFSSL_SMALL_STACK
-            ecc_key* key = NULL;
-        #else
-            ecc_key  key[1];
-        #endif
-
-        #ifdef WOLFSSL_SMALL_STACK
-            key = (ecc_key*)XMALLOC(sizeof(ecc_key), heap, DYNAMIC_TYPE_ECC);
-            if (key == NULL)
-                return MEMORY_E;
-        #endif
-
-            if (wc_ecc_init_ex(key, heap, devId) == 0) {
-                if (wc_EccPrivateKeyDecode(der->buffer, &idx, key,
-                                                            der->length) == 0) {
-                    int keySz = wc_ecc_size(key);
-                    int minKeySz;
-
-                    /* check for minimum ECC key size and then free */
-                    minKeySz = ssl ? ssl->options.minEccKeySz : ctx->minEccKeySz;
-                    if (keySz < minKeySz) {
-                        wc_ecc_free(key);
-                        WOLFSSL_MSG("ECC private key too small");
-                        return ECC_KEY_SIZE_E;
-                    }
-
-                    eccKey = 1;
-                    if (ssl) {
-                        ssl->options.haveStaticECC = 1;
-                        ssl->buffers.keyType = ecc_dsa_sa_algo;
-                        ssl->buffers.keySz = keySz;
-                    }
-                    else if (ctx) {
-                        ctx->haveStaticECC = 1;
-                        ctx->privateKeyType = ecc_dsa_sa_algo;
-                        ctx->privateKeySz = keySz;
-                    }
-
-                    if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
-                        resetSuites = 1;
-                    }
-                }
-                else
-                    eccKey = 0;
-
-                wc_ecc_free(key);
-            }
-
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(key, heap, DYNAMIC_TYPE_ECC);
-        #endif
-        }
-    #endif /* HAVE_ECC */
-    #ifdef HAVE_ED25519
-        if (!rsaKey && !eccKey) {
-            /* make sure Ed25519 key can be used */
-            word32       idx = 0;
-        #ifdef WOLFSSL_SMALL_STACK
-            ed25519_key* key = NULL;
-        #else
-            ed25519_key  key[1];
-        #endif
-
-        #ifdef WOLFSSL_SMALL_STACK
-            key = (ed25519_key*)XMALLOC(sizeof(ed25519_key), heap,
-                                                          DYNAMIC_TYPE_ED25519);
-            if (key == NULL)
-                return MEMORY_E;
-        #endif
-
-            ret = wc_ed25519_init(key);
-            if (ret == 0) {
-                if (wc_Ed25519PrivateKeyDecode(der->buffer, &idx, key,
-                                                            der->length) != 0) {
-                    ret = WOLFSSL_BAD_FILE;
-                }
-
-                if (ret == 0) {
-                    /* check for minimum key size and then free */
-                    int minKeySz = ssl ? ssl->options.minEccKeySz :
-                                                               ctx->minEccKeySz;
-                    if (ED25519_KEY_SIZE < minKeySz) {
-                        WOLFSSL_MSG("ED25519 private key too small");
-                        ret = ECC_KEY_SIZE_E;
-                    }
-                }
-                if (ret == 0) {
-                    if (ssl) {
-                        ssl->buffers.keyType = ed25519_sa_algo;
-                        ssl->buffers.keySz = ED25519_KEY_SIZE;
-                    }
-                    else if (ctx) {
-                        ctx->privateKeyType = ed25519_sa_algo;
-                        ctx->privateKeySz = ED25519_KEY_SIZE;
-                    }
-
-                    ed25519Key = 1;
-                    if (ssl && ssl->options.side == WOLFSSL_SERVER_END) {
-                        resetSuites = 1;
-                    }
-                }
-
-                wc_ed25519_free(key);
-            }
-
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(key, heap, DYNAMIC_TYPE_ED25519);
-        #endif
-            if (ret != 0)
-                return ret;
-        }
-    #else
-        if (!rsaKey && !eccKey && !ed25519Key)
+            XFREE(password, heap, DYNAMIC_TYPE_STRING);
+        #endif
+            ret = ProcessBufferTryDecode(ctx, ssl, der, &keySz, &idx,
+                &resetSuites, &keyFormat, heap, devId);
+        }
+    #endif /* WOLFSSL_ENCRYPTED_KEYS && !NO_PWDBASED */
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(info, heap, DYNAMIC_TYPE_ENCRYPTEDINFO);
+    #endif
+
+        if (ret != 0)
+            return ret;
+        if (keyFormat == 0)
             return WOLFSSL_BAD_FILE;
-    #endif
-        (void)ed25519Key;
+
         (void)devId;
     }
     else if (type == CERT_TYPE) {
     #ifdef WOLFSSL_SMALL_STACK
-        DecodedCert* cert = NULL;
+        DecodedCert* cert;
     #else
         DecodedCert  cert[1];
     #endif
     #ifdef HAVE_PK_CALLBACKS
-        int keyType = 0, keySz = 0;
+        int keyType = 0;
     #endif
 
     #ifdef WOLFSSL_SMALL_STACK
@@ -4872,12 +5507,19 @@
                 else if (ctx)
                     ctx->haveECDSAsig = 1;
                 break;
+            case CTC_ED448:
+                WOLFSSL_MSG("ED448 cert signature");
+                if (ssl)
+                    ssl->options.haveECDSAsig = 1;
+                else if (ctx)
+                    ctx->haveECDSAsig = 1;
+                break;
             default:
                 WOLFSSL_MSG("Not ECDSA cert signature");
                 break;
         }
 
-    #if defined(HAVE_ECC) || defined(HAVE_ED25519)
+    #if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
         if (ssl) {
             ssl->pkCurveOID = cert->pkCurveOID;
         #ifndef WC_STRICT_SIG
@@ -4889,6 +5531,11 @@
                     ssl->options.haveECC = 1;
                 }
             #endif
+            #ifdef HAVE_ED448
+                else if (cert->keyOID == ED448k) {
+                    ssl->options.haveECC = 1;
+                }
+            #endif
         #else
             ssl->options.haveECC = ssl->options.haveECDSAsig;
         #endif
@@ -4904,6 +5551,11 @@
                     ctx->haveECC = 1;
                 }
             #endif
+            #ifdef HAVE_ED448
+                else if (cert->keyOID == ED448k) {
+                    ctx->haveECC = 1;
+                }
+            #endif
         #else
             ctx->haveECC = ctx->haveECDSAsig;
         #endif
@@ -4914,73 +5566,135 @@
         switch (cert->keyOID) {
         #ifndef NO_RSA
             case RSAk:
+            #ifdef HAVE_PK_CALLBACKS
+                keyType = rsa_sa_algo;
+            #endif
+            #ifdef HAVE_PKCS11
+                if (ctx) {
+                    ctx->privateKeyType = rsa_sa_algo;
+                }
+                else {
+                    ssl->buffers.keyType = rsa_sa_algo;
+                }
+            #endif
+                /* Determine RSA key size by parsing public key */
+                idx = 0;
+                ret = wc_RsaPublicKeyDecode_ex(cert->publicKey, &idx,
+                    cert->pubKeySize, NULL, (word32*)&keySz, NULL, NULL);
+                if (ret < 0)
+                    break;
+
                 if (ssl && !ssl->options.verifyNone) {
                     if (ssl->options.minRsaKeySz < 0 ||
-                          cert->pubKeySize < (word16)ssl->options.minRsaKeySz) {
+                          keySz < (int)ssl->options.minRsaKeySz) {
                         ret = RSA_KEY_SIZE_E;
                         WOLFSSL_MSG("Certificate RSA key size too small");
                     }
                 }
                 else if (ctx && !ctx->verifyNone) {
                     if (ctx->minRsaKeySz < 0 ||
-                                  cert->pubKeySize < (word16)ctx->minRsaKeySz) {
+                                  keySz < (int)ctx->minRsaKeySz) {
                         ret = RSA_KEY_SIZE_E;
                         WOLFSSL_MSG("Certificate RSA key size too small");
                     }
                 }
-            #ifdef HAVE_PK_CALLBACKS
-                keyType = rsa_sa_algo;
-                /* pubKeySize is the encoded public key */
-                /* mask lsb 5-bits to round by 16 to get actual key size */
-                keySz = cert->pubKeySize & ~0x1FL;
-            #endif
                 break;
         #endif /* !NO_RSA */
         #ifdef HAVE_ECC
             case ECDSAk:
+            #ifdef HAVE_PK_CALLBACKS
+                keyType = ecc_dsa_sa_algo;
+            #endif
+            #ifdef HAVE_PKCS11
+                if (ctx) {
+                    ctx->privateKeyType = ecc_dsa_sa_algo;
+                }
+                else {
+                    ssl->buffers.keyType = ecc_dsa_sa_algo;
+                }
+            #endif
+                /* Determine ECC key size based on curve */
+                keySz = wc_ecc_get_curve_size_from_id(
+                    wc_ecc_get_oid(cert->pkCurveOID, NULL, NULL));
+
                 if (ssl && !ssl->options.verifyNone) {
                     if (ssl->options.minEccKeySz < 0 ||
-                          cert->pubKeySize < (word16)ssl->options.minEccKeySz) {
+                          keySz < (int)ssl->options.minEccKeySz) {
                         ret = ECC_KEY_SIZE_E;
                         WOLFSSL_MSG("Certificate ECC key size error");
                     }
                 }
                 else if (ctx && !ctx->verifyNone) {
                     if (ctx->minEccKeySz < 0 ||
-                                  cert->pubKeySize < (word16)ctx->minEccKeySz) {
+                                  keySz < (int)ctx->minEccKeySz) {
                         ret = ECC_KEY_SIZE_E;
                         WOLFSSL_MSG("Certificate ECC key size error");
                     }
                 }
-            #ifdef HAVE_PK_CALLBACKS
-                keyType = ecc_dsa_sa_algo;
-                /* pubKeySize is encByte + x + y */
-                keySz = (cert->pubKeySize - 1) / 2;
-            #endif
                 break;
         #endif /* HAVE_ECC */
         #ifdef HAVE_ED25519
             case ED25519k:
+            #ifdef HAVE_PK_CALLBACKS
+                keyType = ed25519_sa_algo;
+            #endif
+            #ifdef HAVE_PKCS11
+                if (ctx) {
+                    ctx->privateKeyType = ed25519_sa_algo;
+                }
+                else {
+                    ssl->buffers.keyType = ed25519_sa_algo;
+                }
+            #endif
+                /* ED25519 is fixed key size */
+                keySz = ED25519_KEY_SIZE;
                 if (ssl && !ssl->options.verifyNone) {
                     if (ssl->options.minEccKeySz < 0 ||
-                          ED25519_KEY_SIZE < (word16)ssl->options.minEccKeySz) {
+                          keySz < (int)ssl->options.minEccKeySz) {
                         ret = ECC_KEY_SIZE_E;
                         WOLFSSL_MSG("Certificate Ed key size error");
                     }
                 }
                 else if (ctx && !ctx->verifyNone) {
                     if (ctx->minEccKeySz < 0 ||
-                                  ED25519_KEY_SIZE < (word16)ctx->minEccKeySz) {
+                                  keySz < (int)ctx->minEccKeySz) {
                         ret = ECC_KEY_SIZE_E;
                         WOLFSSL_MSG("Certificate ECC key size error");
                     }
                 }
-            #ifdef HAVE_PK_CALLBACKS
-                keyType = ed25519_sa_algo;
-                keySz = ED25519_KEY_SIZE;
-            #endif
                 break;
         #endif /* HAVE_ED25519 */
+        #ifdef HAVE_ED448
+            case ED448k:
+            #ifdef HAVE_PK_CALLBACKS
+                keyType = ed448_sa_algo;
+            #endif
+            #ifdef HAVE_PKCS11
+                if (ctx) {
+                    ctx->privateKeyType = ed448_sa_algo;
+                }
+                else {
+                    ssl->buffers.keyType = ed448_sa_algo;
+                }
+            #endif
+                /* ED448 is fixed key size */
+                keySz = ED448_KEY_SIZE;
+                if (ssl && !ssl->options.verifyNone) {
+                    if (ssl->options.minEccKeySz < 0 ||
+                          keySz < (int)ssl->options.minEccKeySz) {
+                        ret = ECC_KEY_SIZE_E;
+                        WOLFSSL_MSG("Certificate Ed key size error");
+                    }
+                }
+                else if (ctx && !ctx->verifyNone) {
+                    if (ctx->minEccKeySz < 0 ||
+                                  keySz < (int)ctx->minEccKeySz) {
+                        ret = ECC_KEY_SIZE_E;
+                        WOLFSSL_MSG("Certificate ECC key size error");
+                    }
+                }
+                break;
+        #endif /* HAVE_ED448 */
 
             default:
                 WOLFSSL_MSG("No key size check done on certificate");
@@ -5004,14 +5718,29 @@
     #endif
 
         if (ret != 0) {
-            return ret;
-        }
-    }
+            done = 1;
+        }
+    }
+
+    if (done == 1) {
+    #ifndef NO_WOLFSSL_CM_VERIFY
+        if ((type == CA_TYPE) || (type == CERT_TYPE)) {
+            /* Call to over-ride status */
+            if ((ctx != NULL) && (ctx->cm != NULL) &&
+                (ctx->cm->verifyCallback != NULL)) {
+                ret = CM_VerifyBuffer_ex(ctx->cm, buff,
+                        sz, format, (ret == WOLFSSL_SUCCESS ? 0 : ret));
+            }
+        }
+    #endif /* NO_WOLFSSL_CM_VERIFY */
+
+        return ret;
+    }
+
 
     if (ssl && resetSuites) {
         word16 havePSK = 0;
         word16 haveRSA = 0;
-        int    keySz   = 0;
 
         #ifndef NO_PSK
         if (ssl->options.havePSK) {
@@ -5038,7 +5767,7 @@
 
 /* CA PEM file for verification, may have multiple/chain certs to process */
 static int ProcessChainBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
-                            long sz, int format, int type, WOLFSSL* ssl)
+                        long sz, int format, int type, WOLFSSL* ssl, int verify)
 {
     long used   = 0;
     int  ret    = 0;
@@ -5049,47 +5778,44 @@
         long consumed = 0;
 
         ret = ProcessBuffer(ctx, buff + used, sz - used, format, type, ssl,
-                            &consumed, 0);
-
-#ifdef WOLFSSL_WPAS
-#ifdef HAVE_CRL
+                            &consumed, 0, verify);
+
         if (ret < 0) {
+#if defined(WOLFSSL_WPAS) && defined(HAVE_CRL)
             DerBuffer*    der = NULL;
             EncryptedInfo info;
 
             WOLFSSL_MSG("Trying a CRL");
             if (PemToDer(buff + used, sz - used, CRL_TYPE, &der, NULL, &info,
                                                                    NULL) == 0) {
-                WOLFSSL_MSG("   Proccessed a CRL");
+                WOLFSSL_MSG("   Processed a CRL");
                 wolfSSL_CertManagerLoadCRLBuffer(ctx->cm, der->buffer,
-                                                der->length, WOLFSSL_FILETYPE_ASN1);
+                                            der->length, WOLFSSL_FILETYPE_ASN1);
                 FreeDer(&der);
                 used += info.consumed;
                 continue;
             }
-        }
-#endif
-#endif
-        if (ret < 0)
-        {
-            if(consumed > 0) { /* Made progress in file */
+#endif
+
+            if (consumed > 0) { /* Made progress in file */
                 WOLFSSL_ERROR(ret);
                 WOLFSSL_MSG("CA Parse failed, with progress in file.");
                 WOLFSSL_MSG("Search for other certs in file");
-            } else {
+            }
+            else {
                 WOLFSSL_MSG("CA Parse failed, no progress in file.");
                 WOLFSSL_MSG("Do not continue search for other certs in file");
                 break;
             }
-        } else {
+        }
+        else {
             WOLFSSL_MSG("   Processed a CA");
             gotOne = 1;
         }
         used += consumed;
     }
 
-    if(gotOne)
-    {
+    if (gotOne) {
         WOLFSSL_MSG("Processed at least one valid CA. Other stuff OK");
         return WOLFSSL_SUCCESS;
     }
@@ -5100,20 +5826,32 @@
 static WC_INLINE WOLFSSL_METHOD* cm_pick_method(void)
 {
     #ifndef NO_WOLFSSL_CLIENT
-        #if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
+        #if !defined(NO_OLD_TLS) && defined(WOLFSSL_ALLOW_SSLV3)
             return wolfSSLv3_client_method();
+        #elif !defined(NO_OLD_TLS) && defined(WOLFSSL_ALLOW_TLSV10)
+            return wolfTLSv1_client_method();
+        #elif !defined(NO_OLD_TLS)
+            return wolfTLSv1_1_client_method();
         #elif !defined(WOLFSSL_NO_TLS12)
             return wolfTLSv1_2_client_method();
         #elif defined(WOLFSSL_TLS13)
             return wolfTLSv1_3_client_method();
+        #else
+            return NULL;
         #endif
     #elif !defined(NO_WOLFSSL_SERVER)
-        #if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
+        #if !defined(NO_OLD_TLS) && defined(WOLFSSL_ALLOW_SSLV3)
             return wolfSSLv3_server_method();
+        #elif !defined(NO_OLD_TLS) && defined(WOLFSSL_ALLOW_TLSV10)
+            return wolfTLSv1_server_method();
+        #elif !defined(NO_OLD_TLS)
+            return wolfTLSv1_1_server_method();
         #elif !defined(WOLFSSL_NO_TLS12)
             return wolfTLSv1_2_server_method();
         #elif defined(WOLFSSL_TLS13)
             return wolfTLSv1_3_server_method();
+        #else
+            return NULL;
         #endif
     #else
         return NULL;
@@ -5170,9 +5908,20 @@
         }
     }
 
-    return BufferLoadCRL(cm->crl, buff, sz, type, 0);
-}
-
+    return BufferLoadCRL(cm->crl, buff, sz, type, VERIFY);
+}
+
+int wolfSSL_CertManagerFreeCRL(WOLFSSL_CERT_MANAGER* cm)
+{
+    WOLFSSL_ENTER("wolfSSL_CertManagerFreeCRL");
+    if (cm == NULL)
+        return BAD_FUNC_ARG;
+    if (cm->crl != NULL){
+        FreeCRL(cm->crl, 1);
+        cm->crl = NULL;
+    }
+    return WOLFSSL_SUCCESS;
+}
 
 int wolfSSL_CTX_LoadCRLBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                               long sz, int type)
@@ -5251,14 +6000,26 @@
 
     return WOLFSSL_SUCCESS;
 }
+
+#ifndef NO_WOLFSSL_CM_VERIFY
+void wolfSSL_CertManagerSetVerify(WOLFSSL_CERT_MANAGER* cm, VerifyCallback vc)
+{
+    WOLFSSL_ENTER("wolfSSL_CertManagerSetVerify");
+    if (cm == NULL)
+        return;
+
+    cm->verifyCallback = vc;
+}
+#endif /* NO_WOLFSSL_CM_VERIFY */
+
 /* Verify the certificate, WOLFSSL_SUCCESS for ok, < 0 for error */
-int wolfSSL_CertManagerVerifyBuffer(WOLFSSL_CERT_MANAGER* cm, const byte* buff,
-                                    long sz, int format)
+int CM_VerifyBuffer_ex(WOLFSSL_CERT_MANAGER* cm, const byte* buff,
+                                    long sz, int format, int err_val)
 {
     int ret = 0;
     DerBuffer* der = NULL;
 #ifdef WOLFSSL_SMALL_STACK
-    DecodedCert* cert = NULL;
+    DecodedCert* cert;
 #else
     DecodedCert  cert[1];
 #endif
@@ -5294,11 +6055,54 @@
     if (ret == 0)
         ret = ParseCertRelative(cert, CERT_TYPE, 1, cm);
 
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    /* ret needs to be self-singer error for Qt compat */
+    if (ret == ASN_NO_SIGNER_E && cert->selfSigned)
+        ret = ASN_SELF_SIGNED_E;
+#endif
+
 #ifdef HAVE_CRL
     if (ret == 0 && cm->crlEnabled)
         ret = CheckCertCRL(cm->crl, cert);
 #endif
 
+#ifndef NO_WOLFSSL_CM_VERIFY
+    /* if verify callback has been set */
+    if (cm->verifyCallback) {
+        buffer certBuf;
+    #ifdef WOLFSSL_SMALL_STACK
+        ProcPeerCertArgs* args;
+        args = (ProcPeerCertArgs*)XMALLOC(
+            sizeof(ProcPeerCertArgs), cm->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (args == NULL) {
+            XFREE(cert, cm->heap, DYNAMIC_TYPE_DCERT);
+            return MEMORY_E;
+        }
+    #else
+        ProcPeerCertArgs  args[1];
+    #endif
+
+        certBuf.buffer = (byte*)buff;
+        certBuf.length = (unsigned int)sz;
+        XMEMSET(args, 0, sizeof(ProcPeerCertArgs));
+
+        args->totalCerts = 1;
+        args->certs = &certBuf;
+        args->dCert = cert;
+        args->dCertInit = 1;
+
+        if (err_val != 0) {
+            ret = err_val;
+        }
+        ret = DoVerifyCallback(cm, NULL, ret, args);
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(args, cm->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+    }
+#else
+    (void)err_val;
+#endif
+
     FreeDecodedCert(cert);
     FreeDer(&der);
 #ifdef WOLFSSL_SMALL_STACK
@@ -5308,7 +6112,12 @@
     return ret == 0 ? WOLFSSL_SUCCESS : ret;
 }
 
-
+/* Verify the certificate, WOLFSSL_SUCCESS for ok, < 0 for error */
+int wolfSSL_CertManagerVerifyBuffer(WOLFSSL_CERT_MANAGER* cm, const byte* buff,
+                                    long sz, int format)
+{
+    return CM_VerifyBuffer_ex(cm, buff, sz, format, 0);
+}
 /* turn on OCSP if off and compiled in, set options */
 int wolfSSL_CertManagerEnableOCSP(WOLFSSL_CERT_MANAGER* cm, int options)
 {
@@ -5379,6 +6188,7 @@
 
 #if defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
  || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
+    #ifndef NO_WOLFSSL_SERVER
     if (cm->ocsp_stapling == NULL) {
         cm->ocsp_stapling = (WOLFSSL_OCSP*)XMALLOC(sizeof(WOLFSSL_OCSP),
                                                cm->heap, DYNAMIC_TYPE_OCSP);
@@ -5392,13 +6202,14 @@
             return WOLFSSL_FAILURE;
         }
     }
-    cm->ocspStaplingEnabled = 1;
 
     #ifndef WOLFSSL_USER_IO
         cm->ocspIOCb = EmbedOcspLookup;
         cm->ocspRespFreeCb = EmbedOcspRespFree;
         cm->ocspIOCtx = cm->heap;
     #endif /* WOLFSSL_USER_IO */
+    #endif /* NO_WOLFSSL_SERVER */
+    cm->ocspStaplingEnabled = 1;
 #else
     ret = NOT_COMPILED_IN;
 #endif
@@ -5423,18 +6234,8 @@
 #endif
     return ret;
 }
-#if defined(SESSION_CERTS)
-WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_get_peer_cert_chain(const WOLFSSL* ssl)
-{
-    WOLFSSL_ENTER("wolfSSL_get_peer_cert_chain");
-    if ((ssl == NULL) || (ssl->session.chain.count == 0))
-        return NULL;
-    else
-        return (WOLF_STACK_OF(WOLFSSL_X509)* )&ssl->session.chain;
-}
-#endif
+
 #ifdef HAVE_OCSP
-
 /* check CRL if enabled, WOLFSSL_SUCCESS  */
 int wolfSSL_CertManagerCheckOCSP(WOLFSSL_CERT_MANAGER* cm, byte* der, int sz)
 {
@@ -5476,6 +6277,23 @@
     return ret == 0 ? WOLFSSL_SUCCESS : ret;
 }
 
+WOLFSSL_API int wolfSSL_CertManagerCheckOCSPResponse(WOLFSSL_CERT_MANAGER *cm,
+                                                    byte *response, int responseSz, buffer *responseBuffer,
+                                                    CertStatus *status, OcspEntry *entry, OcspRequest *ocspRequest)
+{
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_CertManagerCheckOCSP_Staple");
+    if (cm == NULL || response == NULL)
+        return BAD_FUNC_ARG;
+    if (cm->ocspEnabled == 0)
+        return WOLFSSL_SUCCESS;
+
+    ret = CheckOcspResponse(cm->ocsp, response, responseSz, responseBuffer, status,
+                        entry, ocspRequest);
+
+    return ret == 0 ? WOLFSSL_SUCCESS : ret;
+}
 
 int wolfSSL_CertManagerSetOCSPOverrideURL(WOLFSSL_CERT_MANAGER* cm,
                                           const char* url)
@@ -5641,13 +6459,18 @@
 
 #endif /* HAVE_OCSP */
 
+/* macro to get verify settings for AddCA */
+#define GET_VERIFY_SETTING_CTX(ctx) \
+    (ctx && ctx->verifyNone ? NO_VERIFY : VERIFY)
+#define GET_VERIFY_SETTING_SSL(ssl) \
+    (ssl && ssl->options.verifyNone ? NO_VERIFY : VERIFY)
 
 #ifndef NO_FILESYSTEM
 
 /* process a file with name fname into ctx of format and type
    userChain specifies a user certificate chain to pass during handshake */
 int ProcessFile(WOLFSSL_CTX* ctx, const char* fname, int format, int type,
-                WOLFSSL* ssl, int userChain, WOLFSSL_CRL* crl)
+                WOLFSSL* ssl, int userChain, WOLFSSL_CRL* crl, int verify)
 {
 #ifdef WOLFSSL_SMALL_STACK
     byte   staticBuffer[1]; /* force heap usage */
@@ -5660,6 +6483,8 @@
     long   sz = 0;
     XFILE  file;
     void*  heapHint = wolfSSL_CTX_GetHeap(ctx, ssl);
+    const char* header = NULL;
+    const char* footer = NULL;
 
     (void)crl;
     (void)heapHint;
@@ -5668,10 +6493,19 @@
 
     file = XFOPEN(fname, "rb");
     if (file == XBADFILE) return WOLFSSL_BAD_FILE;
-    XFSEEK(file, 0, XSEEK_END);
+    if (XFSEEK(file, 0, XSEEK_END) != 0) {
+        XFCLOSE(file);
+        return WOLFSSL_BAD_FILE;
+    }
     sz = XFTELL(file);
     XREWIND(file);
 
+    if (sz > MAX_WOLFSSL_FILE_SIZE || sz <= 0) {
+        WOLFSSL_MSG("ProcessFile file size error");
+        XFCLOSE(file);
+        return WOLFSSL_BAD_FILE;
+    }
+
     if (sz > (long)sizeof(staticBuffer)) {
         WOLFSSL_MSG("Getting dynamic buffer");
         myBuffer = (byte*)XMALLOC(sz, heapHint, DYNAMIC_TYPE_FILE);
@@ -5681,24 +6515,46 @@
         }
         dynamic = 1;
     }
-    else if (sz <= 0) {
-        XFCLOSE(file);
-        return WOLFSSL_BAD_FILE;
-    }
 
     if ( (ret = (int)XFREAD(myBuffer, 1, sz, file)) != sz)
         ret = WOLFSSL_BAD_FILE;
     else {
+        /* Try to detect type by parsing cert header and footer */
+        if (type == DETECT_CERT_TYPE) {
+            if (wc_PemGetHeaderFooter(CA_TYPE, &header, &footer) == 0 &&
+               (XSTRNSTR((char*)myBuffer, header, (int)sz) != NULL)) {
+                type = CA_TYPE;
+            }
+#ifdef HAVE_CRL
+            else if (wc_PemGetHeaderFooter(CRL_TYPE, &header, &footer) == 0 &&
+                    (XSTRNSTR((char*)myBuffer, header, (int)sz) != NULL)) {
+                type = CRL_TYPE;
+            }
+#endif
+            else if (wc_PemGetHeaderFooter(CERT_TYPE, &header, &footer) == 0 &&
+                    (XSTRNSTR((char*)myBuffer, header, (int)sz) != NULL)) {
+                type = CERT_TYPE;
+            }
+            else {
+                WOLFSSL_MSG("Failed to detect certificate type");
+                if (dynamic)
+                    XFREE(myBuffer, heapHint, DYNAMIC_TYPE_FILE);
+                XFCLOSE(file);
+                return WOLFSSL_BAD_CERTTYPE;
+            }
+        }
         if ((type == CA_TYPE || type == TRUSTED_PEER_TYPE)
-                                                  && format == WOLFSSL_FILETYPE_PEM)
-            ret = ProcessChainBuffer(ctx, myBuffer, sz, format, type, ssl);
+                                          && format == WOLFSSL_FILETYPE_PEM) {
+            ret = ProcessChainBuffer(ctx, myBuffer, sz, format, type, ssl,
+                                     verify);
+        }
 #ifdef HAVE_CRL
         else if (type == CRL_TYPE)
-            ret = BufferLoadCRL(crl, myBuffer, sz, format, 0);
+            ret = BufferLoadCRL(crl, myBuffer, sz, format, verify);
 #endif
         else
             ret = ProcessBuffer(ctx, myBuffer, sz, format, type, ssl, NULL,
-                                userChain);
+                                userChain, verify);
     }
 
     XFCLOSE(file);
@@ -5708,29 +6564,42 @@
     return ret;
 }
 
-
 /* loads file then loads each file in path, no c_rehash */
-int wolfSSL_CTX_load_verify_locations(WOLFSSL_CTX* ctx, const char* file,
-                                     const char* path)
+int wolfSSL_CTX_load_verify_locations_ex(WOLFSSL_CTX* ctx, const char* file,
+                                     const char* path, word32 flags)
 {
     int ret = WOLFSSL_SUCCESS;
 #ifndef NO_WOLFSSL_DIR
     int fileRet;
-#endif
-
-    WOLFSSL_ENTER("wolfSSL_CTX_load_verify_locations");
-
-    if (ctx == NULL || (file == NULL && path == NULL) )
-        return WOLFSSL_FAILURE;
-
-    if (file)
-        ret = ProcessFile(ctx, file, WOLFSSL_FILETYPE_PEM, CA_TYPE, NULL, 0, NULL);
+    int successCount = 0;
+    int failCount = 0;
+#endif
+    int verify;
+
+    WOLFSSL_MSG("wolfSSL_CTX_load_verify_locations_ex");
+
+    if (ctx == NULL || (file == NULL && path == NULL)) {
+        return WOLFSSL_FAILURE;
+    }
+
+    verify = GET_VERIFY_SETTING_CTX(ctx);
+    if (flags & WOLFSSL_LOAD_FLAG_DATE_ERR_OKAY)
+        verify = VERIFY_SKIP_DATE;
+
+    if (file) {
+        ret = ProcessFile(ctx, file, WOLFSSL_FILETYPE_PEM, CA_TYPE, NULL, 0,
+                          NULL, verify);
+#ifndef NO_WOLFSSL_DIR
+        if (ret == WOLFSSL_SUCCESS)
+            successCount++;
+#endif
+    }
 
     if (ret == WOLFSSL_SUCCESS && path) {
 #ifndef NO_WOLFSSL_DIR
         char* name = NULL;
     #ifdef WOLFSSL_SMALL_STACK
-        ReadDirCtx* readCtx = NULL;
+        ReadDirCtx* readCtx;
         readCtx = (ReadDirCtx*)XMALLOC(sizeof(ReadDirCtx), ctx->heap,
                                                        DYNAMIC_TYPE_DIRCTX);
         if (readCtx == NULL)
@@ -5742,28 +6611,64 @@
         /* try to load each regular file in path */
         fileRet = wc_ReadDirFirst(readCtx, path, &name);
         while (fileRet == 0 && name) {
+            WOLFSSL_MSG(name); /* log file name */
             ret = ProcessFile(ctx, name, WOLFSSL_FILETYPE_PEM, CA_TYPE,
-                                                          NULL, 0, NULL);
-            if (ret != WOLFSSL_SUCCESS)
-                break;
+                              NULL, 0, NULL, verify);
+            if (ret != WOLFSSL_SUCCESS) {
+                /* handle flags for ignoring errors, skipping expired certs or
+                   by PEM certificate header error */
+                if ( (flags & WOLFSSL_LOAD_FLAG_IGNORE_ERR) ||
+                    ((flags & WOLFSSL_LOAD_FLAG_PEM_CA_ONLY) &&
+                       (ret == ASN_NO_PEM_HEADER))) {
+                    /* Do not fail here if a certificate fails to load,
+                       continue to next file */
+                    ret = WOLFSSL_SUCCESS;
+                }
+                else {
+                    WOLFSSL_ERROR(ret);
+                    WOLFSSL_MSG("Load CA file failed, continuing");
+                    failCount++;
+                }
+            }
+            else {
+                successCount++;
+            }
             fileRet = wc_ReadDirNext(readCtx, path, &name);
         }
         wc_ReadDirClose(readCtx);
 
         /* pass directory read failure to response code */
-        if (ret == WOLFSSL_SUCCESS && fileRet != -1) {
+        if (fileRet != WC_READDIR_NOFILE) {
             ret = fileRet;
         }
+        /* report failure if no files were loaded or there were failures */
+        else if (successCount == 0 || failCount > 0) {
+            /* use existing error code if exists */
+            if (ret == WOLFSSL_SUCCESS)
+                ret = WOLFSSL_FAILURE;
+        }
+        else {
+            ret = WOLFSSL_SUCCESS;
+        }
 
     #ifdef WOLFSSL_SMALL_STACK
         XFREE(readCtx, ctx->heap, DYNAMIC_TYPE_DIRCTX);
     #endif
 #else
         ret = NOT_COMPILED_IN;
-#endif
-    }
-
-    return ret;
+        (void)flags;
+#endif
+    }
+
+    return ret;
+}
+
+WOLFSSL_ABI
+int wolfSSL_CTX_load_verify_locations(WOLFSSL_CTX* ctx, const char* file,
+                                     const char* path)
+{
+    return wolfSSL_CTX_load_verify_locations_ex(ctx, file, path,
+        WOLFSSL_LOAD_VERIFY_DEFAULT_FLAGS);
 }
 
 
@@ -5781,7 +6686,8 @@
         return WOLFSSL_FAILURE;
     }
 
-    return ProcessFile(ctx, file, type, TRUSTED_PEER_TYPE, NULL, 0, NULL);
+    return ProcessFile(ctx, file, type, TRUSTED_PEER_TYPE, NULL, 0, NULL,
+                       GET_VERIFY_SETTING_CTX(ctx));
 }
 #endif /* WOLFSSL_TRUST_PEER_CERT */
 
@@ -5804,12 +6710,15 @@
     WOLFSSL_ENTER("wolfSSL_CertManagerVerify");
 
     if (file == XBADFILE) return WOLFSSL_BAD_FILE;
-    XFSEEK(file, 0, XSEEK_END);
+    if(XFSEEK(file, 0, XSEEK_END) != 0) {
+        XFCLOSE(file);
+        return WOLFSSL_BAD_FILE;
+    }
     sz = XFTELL(file);
     XREWIND(file);
 
     if (sz > MAX_WOLFSSL_FILE_SIZE || sz <= 0) {
-        WOLFSSL_MSG("CertManagerVerify file bad size");
+        WOLFSSL_MSG("CertManagerVerify file size error");
         XFCLOSE(file);
         return WOLFSSL_BAD_FILE;
     }
@@ -5863,14 +6772,14 @@
 
     ret = wolfSSL_CTX_load_verify_locations(tmp, file, path);
 
-    /* don't loose our good one */
+    /* don't lose our good one */
     tmp->cm = NULL;
     wolfSSL_CTX_free(tmp);
 
     return ret;
 }
 
-
+#ifndef NO_CHECK_PRIVATE_KEY
 /* Check private against public in certificate for match
  *
  * ctx  WOLFSSL_CTX structure to check private key in
@@ -5930,10 +6839,11 @@
     return WOLFSSL_FAILURE;
 #endif
 }
+#endif /* !NO_CHECK_PRIVATE_KEY */
+
 
 #ifdef HAVE_CRL
 
-
 /* check CRL if enabled, WOLFSSL_SUCCESS  */
 int wolfSSL_CertManagerCheckCRL(WOLFSSL_CERT_MANAGER* cm, byte* der, int sz)
 {
@@ -6016,7 +6926,6 @@
     return LoadCRL(cm->crl, path, type, monitor);
 }
 
-
 int wolfSSL_EnableCRL(WOLFSSL* ssl, int options)
 {
     WOLFSSL_ENTER("wolfSSL_EnableCRL");
@@ -6036,7 +6945,6 @@
         return BAD_FUNC_ARG;
 }
 
-
 int wolfSSL_LoadCRL(WOLFSSL* ssl, const char* path, int type, int monitor)
 {
     WOLFSSL_ENTER("wolfSSL_LoadCRL");
@@ -6132,8 +7040,10 @@
     if (ctx == NULL || file == NULL)
         return WOLFSSL_FAILURE;
 
-    if (ProcessFile(ctx, file, format, CA_TYPE, NULL, 0, NULL) == WOLFSSL_SUCCESS)
-        return WOLFSSL_SUCCESS;
+    if (ProcessFile(ctx, file, format, CA_TYPE, NULL, 0, NULL,
+                    GET_VERIFY_SETTING_CTX(ctx)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
 
     return WOLFSSL_FAILURE;
 }
@@ -6142,24 +7052,31 @@
 
 
 
+WOLFSSL_ABI
 int wolfSSL_CTX_use_certificate_file(WOLFSSL_CTX* ctx, const char* file,
                                      int format)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_file");
-    if (ProcessFile(ctx, file, format, CERT_TYPE, NULL, 0, NULL) == WOLFSSL_SUCCESS)
-        return WOLFSSL_SUCCESS;
-
-    return WOLFSSL_FAILURE;
-}
-
-
+
+    if (ProcessFile(ctx, file, format, CERT_TYPE, NULL, 0, NULL,
+                    GET_VERIFY_SETTING_CTX(ctx)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
+
+WOLFSSL_ABI
 int wolfSSL_CTX_use_PrivateKey_file(WOLFSSL_CTX* ctx, const char* file,
                                     int format)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_use_PrivateKey_file");
-    if (ProcessFile(ctx, file, format, PRIVATEKEY_TYPE, NULL, 0, NULL)
-                    == WOLFSSL_SUCCESS)
-        return WOLFSSL_SUCCESS;
+
+    if (ProcessFile(ctx, file, format, PRIVATEKEY_TYPE, NULL, 0, NULL,
+                    GET_VERIFY_SETTING_CTX(ctx)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
 
     return WOLFSSL_FAILURE;
 }
@@ -6200,7 +7117,7 @@
 /* get cert chaining depth using ctx struct */
 long wolfSSL_CTX_get_verify_depth(WOLFSSL_CTX* ctx)
 {
-    if(ctx == NULL) {
+    if (ctx == NULL) {
         return BAD_FUNC_ARG;
     }
 #ifndef OPENSSL_EXTRA
@@ -6211,13 +7128,16 @@
 }
 
 
+WOLFSSL_ABI
 int wolfSSL_CTX_use_certificate_chain_file(WOLFSSL_CTX* ctx, const char* file)
 {
-   /* process up to MAX_CHAIN_DEPTH plus subject cert */
-   WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_chain_file");
-   if (ProcessFile(ctx, file, WOLFSSL_FILETYPE_PEM,CERT_TYPE,NULL,1, NULL)
-                   == WOLFSSL_SUCCESS)
-       return WOLFSSL_SUCCESS;
+    /* process up to MAX_CHAIN_DEPTH plus subject cert */
+    WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_chain_file");
+
+    if (ProcessFile(ctx, file, WOLFSSL_FILETYPE_PEM, CERT_TYPE, NULL, 1, NULL,
+                    GET_VERIFY_SETTING_CTX(ctx)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
 
    return WOLFSSL_FAILURE;
 }
@@ -6226,11 +7146,13 @@
 int wolfSSL_CTX_use_certificate_chain_file_format(WOLFSSL_CTX* ctx,
                                                   const char* file, int format)
 {
-   /* process up to MAX_CHAIN_DEPTH plus subject cert */
-   WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_chain_file_format");
-   if (ProcessFile(ctx, file, format, CERT_TYPE, NULL, 1, NULL)
-                   == WOLFSSL_SUCCESS)
-       return WOLFSSL_SUCCESS;
+    /* process up to MAX_CHAIN_DEPTH plus subject cert */
+    WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_chain_file_format");
+
+    if (ProcessFile(ctx, file, format, CERT_TYPE, NULL, 1, NULL,
+                    GET_VERIFY_SETTING_CTX(ctx)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
 
    return WOLFSSL_FAILURE;
 }
@@ -6258,10 +7180,19 @@
 
     file = XFOPEN(fname, "rb");
     if (file == XBADFILE) return WOLFSSL_BAD_FILE;
-    XFSEEK(file, 0, XSEEK_END);
+    if(XFSEEK(file, 0, XSEEK_END) != 0) {
+        XFCLOSE(file);
+        return WOLFSSL_BAD_FILE;
+    }
     sz = XFTELL(file);
     XREWIND(file);
 
+    if (sz > MAX_WOLFSSL_FILE_SIZE || sz <= 0) {
+        WOLFSSL_MSG("SetTmpDH file size error");
+        XFCLOSE(file);
+        return WOLFSSL_BAD_FILE;
+    }
+
     if (sz > (long)sizeof(staticBuffer)) {
         WOLFSSL_MSG("Getting dynamic buffer");
         myBuffer = (byte*) XMALLOC(sz, ctx->heap, DYNAMIC_TYPE_FILE);
@@ -6271,10 +7202,6 @@
         }
         dynamic = 1;
     }
-    else if (sz <= 0) {
-        XFCLOSE(file);
-        return WOLFSSL_BAD_FILE;
-    }
 
     if ( (ret = (int)XFREAD(myBuffer, 1, sz, file)) != sz)
         ret = WOLFSSL_BAD_FILE;
@@ -6312,82 +7239,6 @@
 
 #endif /* NO_FILESYSTEM */
 
-
-#if defined(OPENSSL_EXTRA) || !defined(NO_PWDBASED) && \
-    (defined(OPENSSL_EXTRA_X509_SMALL) || defined(HAVE_WEBSERVER))
-
-static int wolfSSL_EVP_get_hashinfo(const WOLFSSL_EVP_MD* evp,
-    int* pHash, int* pHashSz)
-{
-    enum wc_HashType hash = WC_HASH_TYPE_NONE;
-    int hashSz;
-
-    if (XSTRLEN(evp) < 3) {
-        /* do not try comparing strings if size is too small */
-        return WOLFSSL_FAILURE;
-    }
-
-    if (XSTRNCMP("SHA", evp, 3) == 0) {
-        if (XSTRLEN(evp) > 3) {
-        #ifndef NO_SHA256
-            if (XSTRNCMP("SHA256", evp, 6) == 0) {
-                hash = WC_HASH_TYPE_SHA256;
-            }
-            else
-        #endif
-        #ifdef WOLFSSL_SHA384
-            if (XSTRNCMP("SHA384", evp, 6) == 0) {
-                hash = WC_HASH_TYPE_SHA384;
-            }
-            else
-        #endif
-        #ifdef WOLFSSL_SHA512
-            if (XSTRNCMP("SHA512", evp, 6) == 0) {
-                hash = WC_HASH_TYPE_SHA512;
-            }
-            else
-        #endif
-            {
-                WOLFSSL_MSG("Unknown SHA hash");
-            }
-        }
-        else {
-            hash = WC_HASH_TYPE_SHA;
-        }
-    }
-#ifdef WOLFSSL_MD2
-    else if (XSTRNCMP("MD2", evp, 3) == 0) {
-        hash = WC_HASH_TYPE_MD2;
-    }
-#endif
-#ifndef NO_MD4
-    else if (XSTRNCMP("MD4", evp, 3) == 0) {
-        hash = WC_HASH_TYPE_MD4;
-    }
-#endif
-#ifndef NO_MD5
-    else if (XSTRNCMP("MD5", evp, 3) == 0) {
-        hash = WC_HASH_TYPE_MD5;
-    }
-#endif
-
-    if (pHash)
-        *pHash = hash;
-
-    hashSz = wc_HashGetDigestSize(hash);
-    if (pHashSz)
-        *pHashSz = hashSz;
-
-    if (hashSz < 0) {
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-#endif
-
-
 #ifdef OPENSSL_EXTRA
 /* put SSL type in extra for now, not very common */
 
@@ -6405,9 +7256,10 @@
 {
     WOLFSSL_PKCS8_PRIV_KEY_INFO* pkcs8 = NULL;
 #ifdef WOLFSSL_PEM_TO_DER
-    unsigned char* mem;
+    unsigned char* mem = NULL;
     int memSz;
     int keySz;
+    word32 algId;
 
     WOLFSSL_MSG("wolfSSL_d2i_PKCS8_PKEY_bio()");
 
@@ -6422,12 +7274,12 @@
     if ((keySz = wc_KeyPemToDer(mem, memSz, mem, memSz, NULL)) < 0) {
         WOLFSSL_MSG("Not PEM format");
         keySz = memSz;
-        if ((keySz = ToTraditional((byte*)mem, (word32)keySz)) < 0) {
-            return NULL;
-        }
-    }
-
-    pkcs8 = wolfSSL_PKEY_new();
+        if ((keySz = ToTraditional_ex((byte*)mem, (word32)keySz, &algId)) < 0) {
+            return NULL;
+        }
+    }
+
+    pkcs8 = wolfSSL_EVP_PKEY_new();
     if (pkcs8 == NULL) {
         return NULL;
     }
@@ -6485,7 +7337,7 @@
     }
 
     if (wolfSSL_BIO_read(bio, mem, (int)memSz) == memSz) {
-        pkey = wolfSSL_d2i_PUBKEY(NULL, &mem, memSz);
+        pkey = wolfSSL_d2i_PUBKEY(NULL, (const unsigned char**)&mem, memSz);
         if (out != NULL && pkey != NULL) {
             *out = pkey;
         }
@@ -6506,8 +7358,8 @@
  * returns a pointer to a new WOLFSSL_EVP_PKEY structure on success and NULL
  *         on fail
  */
-WOLFSSL_EVP_PKEY* wolfSSL_d2i_PUBKEY(WOLFSSL_EVP_PKEY** out, unsigned char** in,
-        long inSz)
+WOLFSSL_EVP_PKEY* wolfSSL_d2i_PUBKEY(WOLFSSL_EVP_PKEY** out,
+                                     const unsigned char** in, long inSz)
 {
     WOLFSSL_EVP_PKEY* pkey = NULL;
     const unsigned char* mem;
@@ -6530,7 +7382,7 @@
         if (wc_InitRsaKey(&rsa, NULL) == 0 &&
             wc_RsaPublicKeyDecode(mem, &keyIdx, &rsa, (word32)memSz) == 0) {
             wc_FreeRsaKey(&rsa);
-            pkey = wolfSSL_PKEY_new();
+            pkey = wolfSSL_EVP_PKEY_new();
             if (pkey != NULL) {
                 pkey->pkey_sz = keyIdx;
                 pkey->pkey.ptr = (char*)XMALLOC(memSz, NULL,
@@ -6574,7 +7426,7 @@
         if (wc_ecc_init(&ecc) == 0 &&
             wc_EccPublicKeyDecode(mem, &keyIdx, &ecc, (word32)memSz) == 0) {
             wc_ecc_free(&ecc);
-            pkey = wolfSSL_PKEY_new();
+            pkey = wolfSSL_EVP_PKEY_new();
             if (pkey != NULL) {
                 pkey->pkey_sz = keyIdx;
                 pkey->pkey.ptr = (char*)XMALLOC(keyIdx, NULL,
@@ -6588,6 +7440,21 @@
                 if (out != NULL) {
                     *out = pkey;
                 }
+
+                pkey->ownEcc = 1;
+                pkey->ecc = wolfSSL_EC_KEY_new();
+                if (pkey->ecc == NULL) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+
+                if (wolfSSL_EC_KEY_LoadDer_ex(pkey->ecc,
+                            (const unsigned char*)pkey->pkey.ptr,
+                            pkey->pkey_sz, WOLFSSL_EC_KEY_LOAD_PUBLIC) != 1) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+
                 return pkey;
             }
         }
@@ -6595,8 +7462,100 @@
     }
     #endif /* HAVE_ECC */
 
+    #if !defined(NO_DSA)
+    {
+        DsaKey dsa;
+        word32 keyIdx = 0;
+
+        /* test if DSA key */
+        if (wc_InitDsaKey(&dsa) == 0 &&
+            wc_DsaPublicKeyDecode(mem, &keyIdx, &dsa, (word32)memSz) == 0) {
+            wc_FreeDsaKey(&dsa);
+            pkey = wolfSSL_EVP_PKEY_new();
+
+            if (pkey != NULL) {
+                pkey->pkey_sz = keyIdx;
+                pkey->pkey.ptr = (char*)XMALLOC(memSz, NULL,
+                        DYNAMIC_TYPE_PUBLIC_KEY);
+                if (pkey->pkey.ptr == NULL) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+                XMEMCPY(pkey->pkey.ptr, mem, keyIdx);
+                pkey->type = EVP_PKEY_DSA;
+                if (out != NULL) {
+                    *out = pkey;
+                }
+
+                pkey->ownDsa = 1;
+                pkey->dsa = wolfSSL_DSA_new();
+                if (pkey->dsa == NULL) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+
+
+                if (wolfSSL_DSA_LoadDer_ex(pkey->dsa,
+                            (const unsigned char*)pkey->pkey.ptr,
+                            pkey->pkey_sz, WOLFSSL_DSA_LOAD_PUBLIC) != 1) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+
+                return pkey;
+            }
+        }
+        wc_FreeDsaKey(&dsa);
+    }
+    #endif /* NO_DSA */
+
+    #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+    {
+        DhKey dh;
+        word32 keyIdx = 0;
+
+        /* test if DH key */
+        if (wc_InitDhKey(&dh) == 0 &&
+            wc_DhKeyDecode(mem, &keyIdx, &dh, (word32)memSz) == 0) {
+            wc_FreeDhKey(&dh);
+            pkey = wolfSSL_EVP_PKEY_new();
+
+            if (pkey != NULL) {
+                pkey->pkey_sz = (int)memSz;
+                pkey->pkey.ptr = (char*)XMALLOC(memSz, NULL,
+                        DYNAMIC_TYPE_PUBLIC_KEY);
+                if (pkey->pkey.ptr == NULL) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+                XMEMCPY(pkey->pkey.ptr, mem, memSz);
+                pkey->type = EVP_PKEY_DH;
+                if (out != NULL) {
+                    *out = pkey;
+                }
+
+                pkey->ownDh = 1;
+                pkey->dh = wolfSSL_DH_new();
+                if (pkey->dh == NULL) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+
+                if (wolfSSL_DH_LoadDer(pkey->dh,
+                            (const unsigned char*)pkey->pkey.ptr,
+                            pkey->pkey_sz) != WOLFSSL_SUCCESS) {
+                    wolfSSL_EVP_PKEY_free(pkey);
+                    return NULL;
+                }
+
+                return pkey;
+            }
+        }
+        wc_FreeDhKey(&dh);
+    }
+    #endif /* !NO_DH && (WOLFSSL_QT || OPENSSL_ALL) */
+
     return pkey;
-
 }
 
 
@@ -6616,6 +7575,7 @@
     WOLFSSL_EVP_PKEY* local;
     word32 idx = 0;
     int    ret;
+    word32 algId;
 
     WOLFSSL_ENTER("wolfSSL_d2i_PrivateKey");
 
@@ -6626,8 +7586,8 @@
 
     /* Check if input buffer has PKCS8 header. In the case that it does not
      * have a PKCS8 header then do not error out. */
-    if ((ret = ToTraditionalInline((const byte*)(*in), &idx, (word32)inSz))
-            > 0) {
+    if ((ret = ToTraditionalInline_ex((const byte*)(*in), &idx, (word32)inSz,
+                                                                 &algId)) > 0) {
         WOLFSSL_MSG("Found and removed PKCS8 header");
     }
     else {
@@ -6639,8 +7599,9 @@
 
     if (out != NULL && *out != NULL) {
         wolfSSL_EVP_PKEY_free(*out);
-    }
-    local = wolfSSL_PKEY_new();
+        *out = NULL;
+    }
+    local = wolfSSL_EVP_PKEY_new();
     if (local == NULL) {
         return NULL;
     }
@@ -6698,7 +7659,40 @@
             }
             break;
 #endif /* HAVE_ECC */
-
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+#ifndef NO_DSA
+        case EVP_PKEY_DSA:
+            local->ownDsa = 1;
+            local->dsa = wolfSSL_DSA_new();
+            if (local->dsa == NULL) {
+                wolfSSL_EVP_PKEY_free(local);
+                return NULL;
+            }
+            if (wolfSSL_DSA_LoadDer(local->dsa,
+                    (const unsigned char*)local->pkey.ptr, local->pkey_sz)
+                    != SSL_SUCCESS) {
+                wolfSSL_EVP_PKEY_free(local);
+                return NULL;
+            }
+            break;
+#endif /* NO_DSA */
+#ifndef NO_DH
+        case EVP_PKEY_DH:
+            local->ownDh = 1;
+            local->dh = wolfSSL_DH_new();
+            if (local->dh == NULL) {
+                wolfSSL_EVP_PKEY_free(local);
+                return NULL;
+            }
+            if (wolfSSL_DH_LoadDer(local->dh,
+                      (const unsigned char*)local->pkey.ptr, local->pkey_sz)
+                      != SSL_SUCCESS) {
+                wolfSSL_EVP_PKEY_free(local);
+                return NULL;
+            }
+            break;
+#endif /* HAVE_DH */
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
         default:
             WOLFSSL_MSG("Unsupported key type");
             wolfSSL_EVP_PKEY_free(local);
@@ -6719,30 +7713,6 @@
     return local;
 }
 
-#ifndef NO_WOLFSSL_STUB
-long wolfSSL_ctrl(WOLFSSL* ssl, int cmd, long opt, void* pt)
-{
-    WOLFSSL_STUB("SSL_ctrl");
-    (void)ssl;
-    (void)cmd;
-    (void)opt;
-    (void)pt;
-    return WOLFSSL_FAILURE;
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
-long wolfSSL_CTX_ctrl(WOLFSSL_CTX* ctx, int cmd, long opt, void* pt)
-{
-    WOLFSSL_STUB("SSL_CTX_ctrl");
-    (void)ctx;
-    (void)cmd;
-    (void)opt;
-    (void)pt;
-    return WOLFSSL_FAILURE;
-}
-#endif
-
 #ifndef NO_CERTS
 
 int wolfSSL_check_private_key(const WOLFSSL* ssl)
@@ -6779,26 +7749,1315 @@
     return ret;
 }
 
+#if defined(OPENSSL_ALL)
+/* Returns the number of X509V3 extensions in X509 object, or 0 on failure */
+int wolfSSL_X509_get_ext_count(const WOLFSSL_X509* passedCert)
+{
+    int extCount = 0;
+    int length = 0;
+    int outSz = 0;
+    const byte* rawCert;
+    int sz = 0;
+    word32 idx = 0;
+    DecodedCert cert;
+    const byte* input;
+
+    WOLFSSL_ENTER("wolfSSL_X509_get_ext_count()");
+    if (passedCert == NULL) {
+        WOLFSSL_MSG("\tNot passed a certificate");
+        return WOLFSSL_FAILURE;
+    }
+
+    rawCert = wolfSSL_X509_get_der((WOLFSSL_X509*)passedCert, &outSz);
+    if (rawCert == NULL) {
+        WOLFSSL_MSG("\tpassedCert has no internal DerBuffer set.");
+        return WOLFSSL_FAILURE;
+    }
+    InitDecodedCert(&cert, rawCert, (word32)outSz, 0);
+
+    if (ParseCert(&cert, CA_TYPE, NO_VERIFY, NULL) < 0) {
+        WOLFSSL_MSG("\tCertificate parsing failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    input = cert.extensions;
+    sz = cert.extensionsSz;
+
+    if (input == NULL || sz == 0) {
+        WOLFSSL_MSG("\tsz or input NULL error");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (input[idx++] != ASN_EXTENSIONS) {
+        WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (GetLength(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: invalid length");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (GetSequence(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: should be a SEQUENCE (1)");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FAILURE;
+    }
+
+    while (idx < (word32)sz) {
+        if (GetSequence(input, &idx, &length, sz) < 0) {
+            WOLFSSL_MSG("\tfail: should be a SEQUENCE");
+            FreeDecodedCert(&cert);
+            return WOLFSSL_FAILURE;
+        }
+        idx += length;
+        extCount++;
+    }
+    FreeDecodedCert(&cert);
+    return extCount;
+}
+
+/* Creates and returns pointer to a new X509_EXTENSION object in memory */
+WOLFSSL_X509_EXTENSION* wolfSSL_X509_EXTENSION_new(void)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_EXTENSION_new");
+
+    WOLFSSL_X509_EXTENSION* newExt;
+    newExt = (WOLFSSL_X509_EXTENSION*)XMALLOC(sizeof(WOLFSSL_X509_EXTENSION),
+              NULL, DYNAMIC_TYPE_X509_EXT);
+    if (newExt == NULL)
+        return NULL;
+    XMEMSET(newExt, 0, sizeof(WOLFSSL_X509_EXTENSION));
+
+    return newExt;
+}
+
+void wolfSSL_X509_EXTENSION_free(WOLFSSL_X509_EXTENSION* x)
+{
+    WOLFSSL_ASN1_STRING asn1;
+    WOLFSSL_ENTER("wolfSSL_X509_EXTENSION_free");
+    if (x == NULL)
+        return;
+
+    if (x->obj != NULL)
+        wolfSSL_ASN1_OBJECT_free(x->obj);
+
+    asn1 = x->value;
+    if (asn1.length > 0 && asn1.data != NULL && asn1.isDynamic)
+        XFREE(asn1.data, NULL, DYNAMIC_TYPE_OPENSSL);
+
+    wolfSSL_sk_free(x->ext_sk);
+
+    XFREE(x, NULL, DYNAMIC_TYPE_X509_EXT);
+}
+
+/* Creates and returns a new WOLFSSL_X509_EXTENSION stack. */
+WOLFSSL_STACK* wolfSSL_sk_new_x509_ext(void)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_ENTER("wolfSSL_sk_new_x509_ext");
+
+    sk = wolfSSL_sk_new_null();
+    if (sk) {
+        sk->type = STACK_TYPE_X509_EXT;
+    }
+    return sk;
+}
+
+/* return 1 on success 0 on fail */
+int wolfSSL_sk_X509_EXTENSION_push(WOLFSSL_STACK* sk,WOLFSSL_X509_EXTENSION* ext)
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_sk_X509_EXTENSION_push");
+
+    if (sk == NULL || ext == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* no previous values in stack */
+    if (sk->data.ext == NULL) {
+        sk->data.ext = ext;
+        sk->num += 1;
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* stack already has value(s) create a new node and add more */
+    node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                                             DYNAMIC_TYPE_X509);
+    if (node == NULL) {
+        WOLFSSL_MSG("Memory error");
+        return WOLFSSL_FAILURE;
+    }
+    XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+    /* push new obj onto head of stack */
+    node->data.ext  = sk->data.ext;
+    node->next      = sk->next;
+    node->type      = sk->type;
+    sk->next        = node;
+    sk->data.ext    = ext;
+    sk->num        += 1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* Free the structure for X509_EXTENSION stack
+ *
+ * sk  stack to free nodes in
+ */
+void wolfSSL_sk_X509_EXTENSION_free(WOLFSSL_STACK* sk)
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_sk_X509_EXTENSION_free");
+
+    if (sk == NULL) {
+        return;
+    }
+
+    /* parse through stack freeing each node */
+    node = sk->next;
+    while ((node != NULL) && (sk->num > 1)) {
+        WOLFSSL_STACK* tmp = node;
+        node = node->next;
+
+        wolfSSL_X509_EXTENSION_free(tmp->data.ext);
+        XFREE(tmp, NULL, DYNAMIC_TYPE_X509);
+        sk->num -= 1;
+    }
+
+    /* free head of stack */
+    if (sk->num == 1) {
+        wolfSSL_X509_EXTENSION_free(sk->data.ext);
+    }
+    XFREE(sk, NULL, DYNAMIC_TYPE_X509);
+}
+
+int wolfSSL_ASN1_BIT_STRING_set_bit(WOLFSSL_ASN1_BIT_STRING* str, int pos,
+    int val)
+{
+    int bytes_cnt, bit;
+    byte* temp;
+
+    if (!str || (val != 0 && val != 1) || pos < 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    bytes_cnt = pos/8;
+    bit = 1<<(7-(pos%8));
+
+    if (bytes_cnt+1 > str->length) {
+        if (!(temp = (byte*)XREALLOC(str->data, bytes_cnt+1, NULL,
+                DYNAMIC_TYPE_OPENSSL))) {
+            return WOLFSSL_FAILURE;
+        }
+        XMEMSET(temp+str->length, 0, bytes_cnt+1 - str->length);
+        str->data = temp;
+        str->length = bytes_cnt+1;
+    }
+
+    str->data[bytes_cnt] &= ~bit;
+    str->data[bytes_cnt] |= val ? bit : 0;
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* Gets the X509_EXTENSION* ext based on it's location in WOLFSSL_X509* x509.
+ *
+ * x509   : The X509 structure to look for the extension.
+ * loc    : Location of the extension. If the extension is found at the given
+ * location, a new X509_EXTENSION structure is populated with extension-specific
+ * data based on the extension type.
+
+ * Returns NULL on error or pointer to X509_EXTENSION structure containing the
+ * extension. The returned X509_EXTENSION should not be free'd by caller.
+ * The returned X509_EXTENSION is pushed onto a stack inside the x509 argument.
+ * This is later free'd when x509 is free'd.
+ *
+ * NOTE: for unknown extension NIDs, a X509_EXTENSION is populated with the
+ * extension oid as the ASN1_OBJECT (QT compatibility)
+ */
+WOLFSSL_X509_EXTENSION* wolfSSL_X509_get_ext(const WOLFSSL_X509* x509, int loc)
+{
+    WOLFSSL_X509_EXTENSION* ext = NULL;
+    WOLFSSL_ENTER("wolfSSL_X509_get_ext");
+    if (x509 == NULL)
+        return NULL;
+
+   ext = wolfSSL_X509_set_ext((WOLFSSL_X509*) x509, loc);
+   return ext;
+}
+
+/* Pushes a new X509_EXTENSION* ext onto the stack inside WOLFSSL_X509* x509.
+ * This is currently a helper function for wolfSSL_X509_get_ext
+ * Caller does not free the returned WOLFSSL_X509_EXTENSION*
+ */
+WOLFSSL_X509_EXTENSION* wolfSSL_X509_set_ext(WOLFSSL_X509* x509, int loc)
+{
+    int extCount = 0, length = 0, outSz = 0, sz = 0, ret = 0;
+    int objSz = 0, isSet = 0;
+    const byte* rawCert;
+    const byte* input;
+    byte* oidBuf;
+    word32 oid, idx = 0, tmpIdx = 0;
+    WOLFSSL_X509_EXTENSION* ext = NULL;
+    WOLFSSL_ASN1_INTEGER* a;
+    WOLFSSL_STACK* sk;
+    DecodedCert cert;
+
+    WOLFSSL_ENTER("wolfSSL_X509_set_ext");
+
+    if(x509 == NULL){
+        WOLFSSL_MSG("\tNot passed a certificate");
+        return NULL;
+    }
+
+    if(loc <0 || (loc > wolfSSL_X509_get_ext_count(x509))){
+        WOLFSSL_MSG("\tBad location argument");
+        return NULL;
+    }
+
+    ext = wolfSSL_X509_EXTENSION_new();
+    if (ext == NULL) {
+        WOLFSSL_MSG("\tX509_EXTENSION_new() failed");
+        return NULL;
+    }
+
+    rawCert = wolfSSL_X509_get_der((WOLFSSL_X509*)x509, &outSz);
+    if (rawCert == NULL) {
+        WOLFSSL_MSG("\tX509_get_der() failed");
+        wolfSSL_X509_EXTENSION_free(ext);
+        return NULL;
+    }
+
+    InitDecodedCert( &cert, rawCert, (word32)outSz, 0);
+
+    if (ParseCert(&cert, CA_TYPE, NO_VERIFY, NULL) < 0) {
+        WOLFSSL_MSG("\tCertificate parsing failed");
+        wolfSSL_X509_EXTENSION_free(ext);
+        return NULL;
+    }
+
+    input = cert.extensions;
+    sz = cert.extensionsSz;
+
+    if (input == NULL || sz == 0) {
+        WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
+        wolfSSL_X509_EXTENSION_free(ext);
+        FreeDecodedCert(&cert);
+        return NULL;
+    }
+
+    if (input[idx++] != ASN_EXTENSIONS) {
+        WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
+        wolfSSL_X509_EXTENSION_free(ext);
+        FreeDecodedCert(&cert);
+        return NULL;
+    }
+
+    if (GetLength(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: invalid length");
+        wolfSSL_X509_EXTENSION_free(ext);
+        FreeDecodedCert(&cert);
+        return NULL;
+    }
+
+    if (GetSequence(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: should be a SEQUENCE (1)");
+        wolfSSL_X509_EXTENSION_free(ext);
+        FreeDecodedCert(&cert);
+        return NULL;
+    }
+
+    while (idx < (word32)sz) {
+        oid = 0;
+
+        if (GetSequence(input, &idx, &length, sz) < 0) {
+            WOLFSSL_MSG("\tfail: should be a SEQUENCE");
+            wolfSSL_X509_EXTENSION_free(ext);
+            FreeDecodedCert(&cert);
+            return NULL;
+        }
+
+        tmpIdx = idx;
+        ret = GetObjectId(input, &idx, &oid, oidCertExtType, sz);
+        if (ret < 0) {
+            WOLFSSL_MSG("\tfail: OBJECT ID");
+            wolfSSL_X509_EXTENSION_free(ext);
+            FreeDecodedCert(&cert);
+            return NULL;
+        }
+        idx = tmpIdx;
+
+        /* Continue while loop until extCount == loc or idx > sz */
+        if (extCount != loc) {
+            idx += length;
+            extCount++;
+            continue;
+        }
+        /* extCount == loc. Now get the extension. */
+        /* Check if extension has been set */
+        isSet = wolfSSL_X509_ext_isSet_by_NID((WOLFSSL_X509*)x509, oid);
+        ext->obj = wolfSSL_OBJ_nid2obj(oid);
+        if (ext->obj == NULL) {
+            WOLFSSL_MSG("\tfail: Invalid OBJECT");
+            wolfSSL_X509_EXTENSION_free(ext);
+            FreeDecodedCert(&cert);
+            return NULL;
+        }
+        ext->obj->nid = oid;
+
+        switch (oid) {
+            case BASIC_CA_OID:
+                if (!isSet)
+                    break;
+                /* Set pathlength */
+                a = wolfSSL_ASN1_INTEGER_new();
+                if (a == NULL) {
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                a->length = x509->pathLength;
+
+                /* Save ASN1_INTEGER in x509 extension */
+                ext->obj->pathlen = a;
+
+                ext->obj->ca = x509->isCa;
+                ext->crit = x509->basicConstCrit;
+                break;
+
+            case AUTH_INFO_OID:
+                if (!isSet)
+                    break;
+
+                /* Create a stack to hold both the caIssuer and ocsp objects
+                    in X509_EXTENSION structure */
+                sk = (WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)*)XMALLOC(
+                            sizeof(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)),
+                            NULL, DYNAMIC_TYPE_ASN1);
+                if (sk == NULL) {
+                    WOLFSSL_MSG("Failed to malloc stack");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                XMEMSET(sk, 0, sizeof(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)));
+                sk->type = STACK_TYPE_OBJ;
+
+                /* Add CaIssuers object to stack */
+                if (x509->authInfoCaIssuer != NULL &&
+                    x509->authInfoCaIssuerSz > 0)
+                {
+                    WOLFSSL_ASN1_OBJECT* obj;
+                    obj = wolfSSL_ASN1_OBJECT_new();
+                    if (obj == NULL) {
+                        WOLFSSL_MSG("Error creating ASN1 object");
+                        wolfSSL_sk_ASN1_OBJECT_free(sk);
+                        wolfSSL_X509_EXTENSION_free(ext);
+                        FreeDecodedCert(&cert);
+                        return NULL;
+                    }
+                    obj->obj = (byte*)x509->authInfoCaIssuer;
+                    obj->objSz = x509->authInfoCaIssuerSz;
+                    obj->grp = oidCertAuthInfoType;
+                    obj->nid = AIA_CA_ISSUER_OID;
+
+                    ret = wolfSSL_sk_ASN1_OBJECT_push(sk, obj);
+                    if (ret != WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("Error pushing ASN1 object onto stack");
+                        wolfSSL_ASN1_OBJECT_free(obj);
+                        wolfSSL_sk_ASN1_OBJECT_free(sk);
+                        wolfSSL_X509_EXTENSION_free(ext);
+                        FreeDecodedCert(&cert);
+                        return NULL;
+                    }
+                }
+
+                /* Add OCSP object to stack */
+                if (x509->authInfo != NULL &&
+                    x509->authInfoSz > 0)
+                {
+                    WOLFSSL_ASN1_OBJECT* obj;
+                    obj = wolfSSL_ASN1_OBJECT_new();
+                    if (obj == NULL) {
+                        WOLFSSL_MSG("Error creating ASN1 object");
+                        wolfSSL_sk_ASN1_OBJECT_free(sk);
+                        wolfSSL_X509_EXTENSION_free(ext);
+                        FreeDecodedCert(&cert);
+                        return NULL;
+                    }
+                    obj->obj = x509->authInfo;
+                    obj->objSz = x509->authInfoSz;
+                    obj->grp = oidCertAuthInfoType;
+                    obj->nid = AIA_OCSP_OID;
+
+                    ret = wolfSSL_sk_ASN1_OBJECT_push(sk, obj);
+                    if (ret != WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("Error pushing ASN1 object onto stack");
+                        wolfSSL_ASN1_OBJECT_free(obj);
+                        wolfSSL_sk_ASN1_OBJECT_free(sk);
+                        wolfSSL_X509_EXTENSION_free(ext);
+                        FreeDecodedCert(&cert);
+                        return NULL;
+                    }
+                }
+                ext->ext_sk = sk;
+                ext->crit = x509->authInfoCrit;
+                break;
+
+            case AUTH_KEY_OID:
+                if (!isSet)
+                    break;
+
+                ret = wolfSSL_ASN1_STRING_set(&ext->value, x509->authKeyId,
+                                        x509->authKeyIdSz);
+                if (ret != WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("ASN1_STRING_set() failed");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                ext->crit = x509->authKeyIdCrit;
+                break;
+
+            case SUBJ_KEY_OID:
+                if (!isSet)
+                    break;
+
+                ret = wolfSSL_ASN1_STRING_set(&ext->value, x509->subjKeyId,
+                                        x509->subjKeyIdSz);
+                if (ret != WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("ASN1_STRING_set() failed");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                ext->crit = x509->subjKeyIdCrit;
+                break;
+
+            case CERT_POLICY_OID:
+                if (!isSet)
+                    break;
+                ext->crit = x509->certPolicyCrit;
+                break;
+
+            case KEY_USAGE_OID:
+                if (!isSet)
+                    break;
+
+                ret = wolfSSL_ASN1_STRING_set(&ext->value,
+                                  (byte*)&(x509->keyUsage), sizeof(word16));
+                if (ret != WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("ASN1_STRING_set() failed");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                ext->crit = x509->keyUsageCrit;
+                break;
+
+            case EXT_KEY_USAGE_OID:
+                if (!isSet)
+                    break;
+                ext->crit = x509->keyUsageCrit;
+                break;
+
+            case CRL_DIST_OID:
+                if (!isSet)
+                    break;
+                ext->crit = x509->CRLdistCrit;
+                break;
+
+            case ALT_NAMES_OID:
+            {
+                WOLFSSL_GENERAL_NAME* gn = NULL;
+                DNS_entry* dns = NULL;
+                if (!isSet)
+                    break;
+
+                sk = (WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)*)XMALLOC(
+                          sizeof(WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)), NULL,
+                                                         DYNAMIC_TYPE_ASN1);
+                if (sk == NULL) {
+                    return NULL;
+                }
+                XMEMSET(sk, 0, sizeof(WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)));
+                sk->type = STACK_TYPE_GEN_NAME;
+
+                if (x509->subjAltNameSet && x509->altNames != NULL) {
+                    /* alt names are DNS_entry structs */
+                    dns = x509->altNames;
+                    /* Currently only support GEN_DNS type */
+                    while (dns != NULL) {
+                        gn = wolfSSL_GENERAL_NAME_new();
+                        if (gn == NULL) {
+                            WOLFSSL_MSG("Error creating GENERAL_NAME");
+                            wolfSSL_sk_free(sk);
+                            return NULL;
+                        }
+
+                        gn->type = dns->type;
+                        gn->d.ia5->length = dns->len;
+                        if (wolfSSL_ASN1_STRING_set(gn->d.ia5, dns->name,
+                                    gn->d.ia5->length) != WOLFSSL_SUCCESS) {
+                            WOLFSSL_MSG("ASN1_STRING_set failed");
+                            wolfSSL_GENERAL_NAME_free(gn);
+                            wolfSSL_sk_free(sk);
+                            return NULL;
+                        }
+
+                        dns = dns->next;
+                        /* last dns in list add at end of function */
+                        if (dns != NULL) {
+                            if (wolfSSL_sk_GENERAL_NAME_push(sk, gn) !=
+                                                          WOLFSSL_SUCCESS) {
+                            WOLFSSL_MSG("Error pushing onto stack");
+                            wolfSSL_GENERAL_NAME_free(gn);
+                            wolfSSL_sk_free(sk);
+                            sk = NULL;
+                            }
+                        }
+                    }
+                    if (wolfSSL_sk_GENERAL_NAME_push(sk,gn) !=
+                                                          WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("Error pushing onto stack");
+                        wolfSSL_GENERAL_NAME_free(gn);
+                        wolfSSL_sk_free(sk);
+                        sk = NULL;
+                    }
+                }
+                ext->ext_sk = sk;
+                ext->crit = x509->subjAltNameCrit;
+                break;
+            }
+
+            default:
+                WOLFSSL_MSG("Unknown extension type found, parsing OID");
+                /* If the extension type is not recognized/supported,
+                    set the ASN1_OBJECT in the extension with the
+                    parsed oid for access in later function calls */
+
+                /* Get OID from input */
+                if (GetASNObjectId(input, &idx, &length, sz) != 0) {
+                    WOLFSSL_MSG("Failed to Get ASN Object Id");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                oidBuf = (byte*)XMALLOC(length+1+MAX_LENGTH_SZ, NULL,
+                                    DYNAMIC_TYPE_TMP_BUFFER);
+                if (oidBuf == NULL) {
+                    WOLFSSL_MSG("Failed to malloc tmp buffer");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                oidBuf[0] = ASN_OBJECT_ID;
+                objSz++;
+                objSz += SetLength(length, oidBuf + 1);
+                objSz += length;
+
+                /* Set object size and reallocate space in object buffer */
+                ext->obj->objSz = objSz;
+                if(((ext->obj->dynamic & WOLFSSL_ASN1_DYNAMIC_DATA) != 0) ||
+                   (ext->obj->obj == NULL)) {
+                        ext->obj->obj =(byte*)XREALLOC((byte*)ext->obj->obj,
+                                             ext->obj->objSz,
+                                             NULL,DYNAMIC_TYPE_ASN1);
+                    if (ext->obj->obj == NULL) {
+                        wolfSSL_ASN1_OBJECT_free(ext->obj);
+                        wolfSSL_X509_EXTENSION_free(ext);
+                        FreeDecodedCert(&cert);
+                        return NULL;
+                    }
+                    ext->obj->dynamic |= WOLFSSL_ASN1_DYNAMIC_DATA;
+                } else {
+                    ext->obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
+                }
+                /* Get OID from input and copy to ASN1_OBJECT buffer */
+                XMEMCPY(oidBuf+2, input+idx, length);
+                XMEMCPY((byte*)ext->obj->obj, oidBuf, ext->obj->objSz);
+                XFREE(oidBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                oidBuf = NULL;
+                ext->obj->grp = oidCertExtType;
+                ext->crit = 0;
+
+                /* Get extension data and copy as ASN1_STRING */
+                tmpIdx = idx + length;
+                if ((tmpIdx >= (word32)sz) || (input[tmpIdx++] != ASN_OCTET_STRING)) {
+                    WOLFSSL_MSG("Error decoding unknown extension data");
+                    wolfSSL_ASN1_OBJECT_free(ext->obj);
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+
+                if (GetLength(input, &tmpIdx, &length, sz) <= 0) {
+                    WOLFSSL_MSG("Error: Invalid Input Length.");
+                    wolfSSL_ASN1_OBJECT_free(ext->obj);
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                ext->value.data = (char*)XMALLOC(length, NULL, DYNAMIC_TYPE_ASN1);
+                ext->value.isDynamic = 1;
+                if (ext->value.data == NULL) {
+                    WOLFSSL_MSG("Failed to malloc ASN1_STRING data");
+                    wolfSSL_X509_EXTENSION_free(ext);
+                    FreeDecodedCert(&cert);
+                    return NULL;
+                }
+                XMEMCPY(ext->value.data,input+tmpIdx,length);
+                ext->value.length = length;
+        } /* switch(oid) */
+
+        break; /* Got the Extension. Now exit while loop. */
+
+    } /* while(idx < sz) */
+
+    /* Store the new extension in a stack inside x509
+     * The extensions on the stack are free'd internally when FreeX509 is called
+     */
+    if (x509->ext_sk == NULL)
+        x509->ext_sk = wolfSSL_sk_new_x509_ext();
+    if (x509->ext_sk != NULL)
+        wolfSSL_sk_X509_EXTENSION_push(x509->ext_sk, ext);
+
+    FreeDecodedCert(&cert);
+    return ext;
+}
+
+/* Return 0 on success and 1 on failure. Copies ext data to bio, using indent
+ *  to pad the output. flag is ignored. */
+int wolfSSL_X509V3_EXT_print(WOLFSSL_BIO *out, WOLFSSL_X509_EXTENSION *ext,
+        unsigned long flag, int indent)
+{
+    ASN1_OBJECT* obj;
+    ASN1_STRING* str;
+    int nid;
+    const int sz = CTC_NAME_SIZE*2;
+    int rc = WOLFSSL_FAILURE;
+    char tmp[CTC_NAME_SIZE*2];
+    WOLFSSL_ENTER("wolfSSL_X509V3_EXT_print");
+
+    if ((out == NULL) || (ext == NULL)) {
+        WOLFSSL_MSG("NULL parameter error");
+        return rc;
+    }
+
+    obj = wolfSSL_X509_EXTENSION_get_object(ext);
+    if (obj == NULL) {
+        WOLFSSL_MSG("Error getting ASN1_OBJECT from X509_EXTENSION");
+        return rc;
+    }
+
+    str = wolfSSL_X509_EXTENSION_get_data(ext);
+    if (obj == NULL) {
+        WOLFSSL_MSG("Error getting ASN1_STRING from X509_EXTENSION");
+        return rc;
+    }
+
+    /* Print extension based on the type */
+    nid = wolfSSL_OBJ_obj2nid(obj);
+    switch (nid) {
+        case BASIC_CA_OID:
+        {
+            char isCa[] = "TRUE";
+            char notCa[] = "FALSE";
+            XSNPRINTF(tmp, sz, "%*sCA:%s", indent, "",
+                                                    obj->ca ? isCa : notCa);
+            break;
+        }
+        case ALT_NAMES_OID:
+        {
+            WOLFSSL_STACK* sk;
+            char* val;
+            int len;
+            tmp[0] = '\0'; /* Make sure tmp is null-terminated */
+
+            sk = ext->ext_sk;
+            while (sk != NULL) {
+                if (sk->type == STACK_TYPE_GEN_NAME && sk->data.gn) {
+                    /* str is GENERAL_NAME for subject alternative name ext */
+                    str = sk->data.gn->d.ia5;
+                    len = str->length + 2; /* + 2 for NULL char and "," */
+                    if (len > sz) {
+                        WOLFSSL_MSG("len greater than buffer size");
+                        return rc;
+                    }
+
+                    val = (char*)XMALLOC(len + indent, NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                    if (val == NULL) {
+                        WOLFSSL_MSG("Memory error");
+                        return rc;
+                    }
+                    if (sk->next)
+                        XSNPRINTF(val, len, "%*s%s, ", indent, "", str->strData);
+                    else
+                        XSNPRINTF(val, len, "%*s%s", indent, "", str->strData);
+
+                    XSTRNCAT(tmp, val, len);
+                    XFREE(val, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                }
+                sk = sk->next;
+            }
+            break;
+        }
+        case AUTH_KEY_OID:
+        case SUBJ_KEY_OID:
+        {
+            char* asn1str;
+            asn1str = wolfSSL_i2s_ASN1_STRING(NULL, str);
+            XSNPRINTF(tmp, sz, "%*s%s", indent, "", asn1str);
+            XFREE(asn1str, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            break;
+        }
+        case AUTH_INFO_OID:
+        case CERT_POLICY_OID:
+        case CRL_DIST_OID:
+        case KEY_USAGE_OID:
+            WOLFSSL_MSG("X509V3_EXT_print not yet implemented for ext type");
+            break;
+
+        default:
+            XSNPRINTF(tmp, sz, "%*s%s", indent, "", str->strData);
+    }
+
+    if (wolfSSL_BIO_write(out, tmp, (int)XSTRLEN(tmp)) == (int)XSTRLEN(tmp)) {
+        rc = WOLFSSL_SUCCESS;
+    }
+    (void) flag;
+
+    return rc;
+}
+
+/* Returns crit flag in X509_EXTENSION object */
+int wolfSSL_X509_EXTENSION_get_critical(const WOLFSSL_X509_EXTENSION* ex)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_EXTENSION_get_critical");
+    if (ex == NULL)
+        return BAD_FUNC_ARG;
+    return ex->crit;
+}
+
+/* Creates v3_ext_method for a given X509v3 extension
+ *
+ * ex   : The X509_EXTENSION used to create v3_ext_method. If the extension is
+ * not NULL, get the NID of the extension object and populate the
+ * extension type-specific X509V3_EXT_* function(s) in v3_ext_method.
+ *
+ * Returns NULL on error or pointer to the v3_ext_method populated with extension
+ * type-specific X509V3_EXT_* function(s).
+ *
+ * NOTE: NID_subject_key_identifier is currently the only extension implementing
+ * the X509V3_EXT_* functions, as it is the only type called directly by QT. The
+ * other extension types return a pointer to a v3_ext_method struct that contains
+ * only the NID.
+ */
+const WOLFSSL_v3_ext_method* wolfSSL_X509V3_EXT_get(WOLFSSL_X509_EXTENSION* ex)
+{
+    int nid;
+    WOLFSSL_v3_ext_method method;
+
+    WOLFSSL_ENTER("wolfSSL_X509V3_EXT_get");
+    if ((ex == NULL) || (ex->obj == NULL)) {
+        WOLFSSL_MSG("Passed an invalid X509_EXTENSION*");
+        return NULL;
+    }
+    /* Initialize all methods to NULL */
+    method.d2i = NULL;
+    method.i2v = NULL;
+    method.i2s = NULL;
+    method.i2r = NULL;
+
+    nid = ex->obj->nid;
+    if (nid <= 0) {
+        WOLFSSL_MSG("Failed to get nid from passed extension object");
+        return NULL;
+    }
+
+    switch (nid) {
+        case NID_basic_constraints:
+            break;
+        case NID_subject_key_identifier:
+            method.i2s = (X509V3_EXT_I2S)wolfSSL_i2s_ASN1_STRING;
+            break;
+        case NID_key_usage:
+            WOLFSSL_MSG("i2v function not yet implemented for Key Usage");
+            break;
+        case NID_authority_key_identifier:
+            WOLFSSL_MSG("i2v function not yet implemented for Auth Key Id");
+            break;
+        case NID_info_access:
+            WOLFSSL_MSG("i2v function not yet implemented for Info Access");
+            break;
+        case NID_ext_key_usage:
+            WOLFSSL_MSG("i2v function not yet implemented for Ext Key Usage");
+            break;
+        case NID_certificate_policies:
+            WOLFSSL_MSG("r2i function not yet implemented for Cert Policies");
+            break;
+        case NID_crl_distribution_points:
+            WOLFSSL_MSG("r2i function not yet implemented for CRL Dist Points");
+            break;
+        default:
+            /* If extension type is unknown, return NULL -- QT makes call to
+                X509_EXTENSION_get_data() if there is no v3_ext_method */
+            WOLFSSL_MSG("X509V3_EXT_get(): Unknown extension type found");
+            return NULL;
+    }
+
+    method.ext_nid = nid;
+    ex->ext_method = method;
+
+    return (const WOLFSSL_v3_ext_method*)&ex->ext_method;
+}
+
+/* Parses and returns an x509v3 extension internal structure.
+ *
+ * ext   : The X509_EXTENSION for parsing internal structure. If extension is
+ * not NULL, get the NID of the extension object and create a new
+ * extension-specific internal structure based on the extension type.
+ *
+ * Returns NULL on error or if NID is not found, otherwise returns a pointer to
+ * the extension type-specific X509_EXTENSION internal structure.
+ * Return is expected to be free'd by caller.
+ */
+void* wolfSSL_X509V3_EXT_d2i(WOLFSSL_X509_EXTENSION* ext)
+{
+    const WOLFSSL_v3_ext_method* method;
+    int ret;
+    WOLFSSL_ASN1_OBJECT* object;
+    WOLFSSL_BASIC_CONSTRAINTS* bc;
+    WOLFSSL_AUTHORITY_KEYID* akey;
+    WOLFSSL_ASN1_STRING* asn1String, *newString;
+    WOLFSSL_AUTHORITY_INFO_ACCESS* aia;
+    WOLFSSL_STACK* sk;
+
+    WOLFSSL_ENTER("wolfSSL_X509V3_EXT_d2i");
+
+    if(ext == NULL) {
+        WOLFSSL_MSG("Bad function Argument");
+        return NULL;
+    }
+
+    /* extract extension info */
+    method = wolfSSL_X509V3_EXT_get(ext);
+    if (method == NULL) {
+        WOLFSSL_MSG("wolfSSL_X509V3_EXT_get error");
+        return NULL;
+    }
+    object = wolfSSL_X509_EXTENSION_get_object(ext);
+    if (object == NULL) {
+        WOLFSSL_MSG("X509_EXTENSION_get_object failed");
+        return NULL;
+    }
+
+    /* Return pointer to proper internal structure based on NID */
+    switch (object->type) {
+        /* basicConstraints */
+        case (NID_basic_constraints):
+            WOLFSSL_MSG("basicConstraints");
+            /* Allocate new BASIC_CONSTRAINTS structure */
+            bc = (WOLFSSL_BASIC_CONSTRAINTS*)
+                  XMALLOC(sizeof(WOLFSSL_BASIC_CONSTRAINTS), NULL,
+                  DYNAMIC_TYPE_X509_EXT);
+            if (bc == NULL) {
+                WOLFSSL_MSG("Failed to malloc basic constraints");
+                return NULL;
+            }
+            /* Copy pathlen and CA into BASIC_CONSTRAINTS from object */
+            bc->ca = object->ca;
+            if (object->pathlen->length > 0) {
+                bc->pathlen = wolfSSL_ASN1_INTEGER_dup(object->pathlen);
+                if (bc->pathlen == NULL) {
+                    WOLFSSL_MSG("Failed to duplicate ASN1_INTEGER");
+                    XFREE(bc, NULL, DYNAMIC_TYPE_X509_EXT);
+                    return NULL;
+                }
+            }
+            else
+                bc->pathlen = NULL;
+            return bc;
+
+        /* subjectKeyIdentifier */
+        case (NID_subject_key_identifier):
+            WOLFSSL_MSG("subjectKeyIdentifier");
+            asn1String = wolfSSL_X509_EXTENSION_get_data(ext);
+            if (asn1String == NULL) {
+                WOLFSSL_MSG("X509_EXTENSION_get_data() failed");
+                return NULL;
+            }
+            newString = wolfSSL_ASN1_STRING_new();
+            if (newString == NULL) {
+                WOLFSSL_MSG("Failed to malloc ASN1_STRING");
+                return NULL;
+            }
+            ret = wolfSSL_ASN1_STRING_set(newString, asn1String->data,
+                                                            asn1String->length);
+            if (ret != WOLFSSL_SUCCESS) {
+                WOLFSSL_MSG("ASN1_STRING_set() failed");
+                wolfSSL_ASN1_STRING_free(newString);
+                return NULL;
+            };
+            newString->type = asn1String->type;
+            return newString;
+
+        /* authorityKeyIdentifier */
+        case (NID_authority_key_identifier):
+            WOLFSSL_MSG("AuthorityKeyIdentifier");
+
+            akey = (WOLFSSL_AUTHORITY_KEYID*)
+                    XMALLOC(sizeof(WOLFSSL_AUTHORITY_KEYID), NULL,
+                    DYNAMIC_TYPE_X509_EXT);
+            if (akey == NULL) {
+                WOLFSSL_MSG("Failed to malloc authority key id");
+                return NULL;
+            }
+
+            akey->keyid = wolfSSL_ASN1_STRING_new();
+            if (akey->keyid == NULL) {
+                WOLFSSL_MSG("ASN1_STRING_new() failed");
+                wolfSSL_AUTHORITY_KEYID_free(akey);
+                return NULL;
+            }
+
+            asn1String = wolfSSL_X509_EXTENSION_get_data(ext);
+            if (asn1String == NULL) {
+                WOLFSSL_MSG("X509_EXTENSION_get_data() failed");
+                wolfSSL_AUTHORITY_KEYID_free(akey);
+                return NULL;
+            }
+
+            ret = wolfSSL_ASN1_STRING_set(akey->keyid, asn1String->data,
+                                                            asn1String->length);
+            if (ret != WOLFSSL_SUCCESS) {
+                WOLFSSL_MSG("ASN1_STRING_set() failed");
+                wolfSSL_AUTHORITY_KEYID_free(akey);
+                return NULL;
+            };
+            akey->keyid->type   = asn1String->type;
+
+            /* For now, set issuer and serial to NULL. This may need to be
+                updated for future use */
+            akey->issuer = NULL;
+            akey->serial = NULL;
+            return akey;
+
+        /* keyUsage */
+        case (NID_key_usage):
+            WOLFSSL_MSG("keyUsage");
+            /* This may need to be updated for future use. The i2v method for
+                keyUsage is not currently set. For now, return the ASN1_STRING
+                representation of KeyUsage bit string */
+            asn1String = wolfSSL_X509_EXTENSION_get_data(ext);
+            if (asn1String == NULL) {
+                WOLFSSL_MSG("X509_EXTENSION_get_data() failed");
+                return NULL;
+            }
+            newString = wolfSSL_ASN1_STRING_new();
+            if (newString == NULL) {
+                WOLFSSL_MSG("Failed to malloc ASN1_STRING");
+                return NULL;
+            }
+            ret = wolfSSL_ASN1_STRING_set(newString, asn1String->data,
+                                                            asn1String->length);
+            if (ret != WOLFSSL_SUCCESS) {
+                WOLFSSL_MSG("ASN1_STRING_set() failed");
+                wolfSSL_ASN1_STRING_free(newString);
+                return NULL;
+            };
+            newString->type = asn1String->type;
+            return newString;
+
+        /* extKeyUsage */
+        case (NID_ext_key_usage):
+            WOLFSSL_MSG("extKeyUsage not supported yet");
+            return NULL;
+
+        /* certificatePolicies */
+        case (NID_certificate_policies):
+            WOLFSSL_MSG("certificatePolicies not supported yet");
+            return NULL;
+
+        /* cRLDistributionPoints */
+        case (NID_crl_distribution_points):
+            WOLFSSL_MSG("cRLDistributionPoints not supported yet");
+            return NULL;
+
+        /* authorityInfoAccess */
+        case (NID_info_access):
+            WOLFSSL_MSG("AuthorityInfoAccess");
+
+            sk = ext->ext_sk;
+            if (sk == NULL) {
+                WOLFSSL_MSG("ACCESS_DESCRIPTION stack NULL");
+                return NULL;
+            }
+
+            /* AUTHORITY_INFO_ACCESS is a stack of ACCESS_DESCRIPTION entries */
+            aia = wolfSSL_sk_new_null();
+            if (aia == NULL) {
+                WOLFSSL_MSG("Failed to malloc AUTHORITY_INFO_ACCESS");
+                return NULL;
+            }
+            aia->type = STACK_TYPE_ACCESS_DESCRIPTION;
+
+            while (sk) {
+                WOLFSSL_ACCESS_DESCRIPTION* ad;
+                WOLFSSL_ASN1_OBJECT* aiaEntry;
+
+                if (sk->type != STACK_TYPE_OBJ) {
+                    sk = sk->next;
+                    continue;
+                }
+
+                aiaEntry = sk->data.obj;
+
+                /* ACCESS_DESCRIPTION has two members, method and location.
+                Method: ASN1_OBJECT as either AIA_OCSP_OID or AIA_CA_ISSUER_OID
+                Location: GENERAL_NAME structure containing the URI. */
+
+                ad = (WOLFSSL_ACCESS_DESCRIPTION*)
+                        XMALLOC(sizeof(WOLFSSL_ACCESS_DESCRIPTION), NULL,
+                        DYNAMIC_TYPE_X509_EXT);
+                if (ad == NULL) {
+                    WOLFSSL_MSG("Failed to malloc ACCESS_DESCRIPTION");
+                    XFREE(aia, NULL, DYNAMIC_TYPE_X509_EXT);
+                    return NULL;
+                }
+                XMEMSET(ad, 0, sizeof(WOLFSSL_ACCESS_DESCRIPTION));
+
+                /* Create new ASN1_OBJECT from oid */
+                ad->method = wolfSSL_OBJ_nid2obj(aiaEntry->nid);
+                if (ad->method == NULL) {
+                    WOLFSSL_MSG("OBJ_nid2obj() failed");
+                    XFREE(aia, NULL, DYNAMIC_TYPE_X509_EXT);
+                    XFREE(ad, NULL, DYNAMIC_TYPE_X509_EXT);
+                    return NULL;
+                }
+
+                /* Allocate memory for GENERAL NAME */
+                ad->location = (WOLFSSL_GENERAL_NAME*)
+                                XMALLOC(sizeof(WOLFSSL_GENERAL_NAME), NULL,
+                                DYNAMIC_TYPE_OPENSSL);
+                if (ad->location == NULL) {
+                    WOLFSSL_MSG("Failed to malloc GENERAL_NAME");
+                    wolfSSL_ASN1_OBJECT_free(ad->method);
+                    XFREE(aia, NULL, DYNAMIC_TYPE_X509_EXT);
+                    XFREE(ad, NULL, DYNAMIC_TYPE_X509_EXT);
+                    return NULL;
+                }
+                XMEMSET(ad->location, 0, sizeof(WOLFSSL_GENERAL_NAME));
+                ad->location->type = GEN_URI;
+                ad->location->d.uniformResourceIdentifier =
+                                    wolfSSL_ASN1_STRING_new();
+                /* Set the URI in GENERAL_NAME */
+                ret = wolfSSL_ASN1_STRING_set(
+                                    ad->location->d.uniformResourceIdentifier,
+                                    aiaEntry->obj, aiaEntry->objSz);
+                if (ret != WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("ASN1_STRING_set() failed");
+                    wolfSSL_ASN1_OBJECT_free(ad->method);
+                    XFREE(aia, NULL, DYNAMIC_TYPE_X509_EXT);
+                    XFREE(ad, NULL, DYNAMIC_TYPE_X509_EXT);
+                    return NULL;
+                }
+                /* Push to AUTHORITY_INFO_ACCESS stack */
+                ret = wolfSSL_sk_ACCESS_DESCRIPTION_push(aia, ad);
+                if (ret != WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("Error pushing ASN1 AD onto stack");
+                    wolfSSL_sk_ACCESS_DESCRIPTION_pop_free(aia, NULL);
+                    wolfSSL_ASN1_OBJECT_free(ad->method);
+                    XFREE(aia, NULL, DYNAMIC_TYPE_X509_EXT);
+                    XFREE(ad, NULL, DYNAMIC_TYPE_X509_EXT);
+                    return NULL;
+                }
+
+                sk = sk->next;
+            }
+            return aia;
+
+        default:
+            WOLFSSL_MSG("Extension NID not in table, returning NULL");
+            break;
+    }
+    return NULL;
+}
 
 /* Looks for the extension matching the passed in nid
  *
- * c   : if not null then is set to status value -2 if multiple occurances
+ * x509 : certificate to get parse through for extension.
+ * nid : Extension OID to be found.
+ * lastPos : Start search from extension after lastPos.
+ *           Set to -1 to search from index 0.
+ * return >= 0 If successful the extension index is returned.
+ * return -1 If extension is not found or error is encountered.
+ */
+int wolfSSL_X509_get_ext_by_NID(const WOLFSSL_X509* x509, int nid, int lastPos)
+{
+    int extCount = 0, length = 0, outSz = 0, sz = 0, ret = 0;
+    int isSet = 0, found = 0, loc;
+    const byte* rawCert;
+    const byte* input;
+    word32 oid, idx = 0, tmpIdx = 0;
+    DecodedCert cert;
+
+    WOLFSSL_ENTER("wolfSSL_X509_get_ext_by_NID");
+
+    if(x509 == NULL){
+        WOLFSSL_MSG("\tNot passed a certificate");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if(lastPos < -1 || (lastPos > (wolfSSL_X509_get_ext_count(x509) - 1))){
+        WOLFSSL_MSG("\tBad location argument");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    loc = lastPos + 1;
+
+    rawCert = wolfSSL_X509_get_der((WOLFSSL_X509*)x509, &outSz);
+    if (rawCert == NULL) {
+        WOLFSSL_MSG("\tX509_get_der() failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    InitDecodedCert( &cert, rawCert, (word32)outSz, 0);
+
+    if (ParseCert(&cert, CA_TYPE, NO_VERIFY, NULL) < 0) {
+        WOLFSSL_MSG("\tCertificate parsing failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    input = cert.extensions;
+    sz = cert.extensionsSz;
+
+    if (input == NULL || sz == 0) {
+        WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (input[idx++] != ASN_EXTENSIONS) {
+        WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (GetLength(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: invalid length");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (GetSequence(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: should be a SEQUENCE (1)");
+        FreeDecodedCert(&cert);
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    while (idx < (word32)sz) {
+        oid = 0;
+
+        if (GetSequence(input, &idx, &length, sz) < 0) {
+            WOLFSSL_MSG("\tfail: should be a SEQUENCE");
+            FreeDecodedCert(&cert);
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        tmpIdx = idx;
+        ret = GetObjectId(input, &idx, &oid, oidCertExtType, sz);
+        if (ret < 0) {
+            WOLFSSL_MSG("\tfail: OBJECT ID");
+            FreeDecodedCert(&cert);
+            return WOLFSSL_FATAL_ERROR;
+        }
+        idx = tmpIdx;
+
+        if (extCount >= loc) {
+            /* extCount >= loc. Now check if extension has been set */
+            isSet = wolfSSL_X509_ext_isSet_by_NID((WOLFSSL_X509*)x509, oid);
+
+            if (isSet && ((word32)nid == oid)) {
+                found = 1;
+                break;
+            }
+        }
+
+        idx += length;
+        extCount++;
+    } /* while(idx < sz) */
+
+    FreeDecodedCert(&cert);
+
+    return found ? extCount : WOLFSSL_FATAL_ERROR;
+}
+
+
+#endif /* OPENSSL_ALL */
+
+WOLFSSL_ASN1_BIT_STRING* wolfSSL_ASN1_BIT_STRING_new(void)
+{
+    WOLFSSL_ASN1_BIT_STRING* str;
+
+    str = (WOLFSSL_ASN1_BIT_STRING*)XMALLOC(sizeof(WOLFSSL_ASN1_BIT_STRING),
+                                                  NULL, DYNAMIC_TYPE_OPENSSL);
+    if (str) {
+        XMEMSET(str, 0, sizeof(WOLFSSL_ASN1_BIT_STRING));
+    }
+    return str;
+}
+
+void wolfSSL_ASN1_BIT_STRING_free(WOLFSSL_ASN1_BIT_STRING* str)
+{
+    if (str) {
+        if (str->data) {
+            XFREE(str->data, NULL, DYNAMIC_TYPE_OPENSSL);
+            str->data = NULL;
+        }
+        XFREE(str, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+int wolfSSL_ASN1_BIT_STRING_get_bit(const WOLFSSL_ASN1_BIT_STRING* str, int i)
+{
+    if (!str || !str->data || str->length <= (i/8) || i < 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return (str->data[i/8] & (1<<(7-(i%8)))) ? 1 : 0;
+}
+
+/* Looks for the extension matching the passed in nid
+ *
+ * c   : if not null then is set to status value -2 if multiple occurrences
  *       of the extension are found, -1 if not found, 0 if found and not
  *       critical, and 1 if found and critical.
  * nid : Extension OID to be found.
  * idx : if NULL return first extension found match, otherwise start search at
  *       idx location and set idx to the location of extension returned.
- * returns NULL or a pointer to an WOLFSSL_STACK holding extension structure
+ * returns NULL or a pointer to an WOLFSSL_ASN1_BIT_STRING (for KEY_USAGE_OID)
+ * or WOLFSSL_STACK (for other)
+ * holding extension structure
  *
  * NOTE code for decoding extensions is in asn.c DecodeCertExtensions --
  * use already decoded extension in this function to avoid decoding twice.
  * Currently we do not make use of idx since getting pre decoded extensions.
  */
-void* wolfSSL_X509_get_ext_d2i(const WOLFSSL_X509* x509,
-                                                     int nid, int* c, int* idx)
-{
+void* wolfSSL_X509_get_ext_d2i(const WOLFSSL_X509* x509, int nid, int* c,
+    int* idx)
+{
+    void* ret = NULL;
     WOLFSSL_STACK* sk = NULL;
     WOLFSSL_ASN1_OBJECT* obj = NULL;
+    WOLFSSL_GENERAL_NAME* gn = NULL;
 
     WOLFSSL_ENTER("wolfSSL_X509_get_ext_d2i");
 
@@ -6810,20 +9069,12 @@
         *c = -1; /* default to not found */
     }
 
-    sk = (WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)*)XMALLOC(
-                sizeof(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)), NULL, DYNAMIC_TYPE_ASN1);
-    if (sk == NULL) {
-        return NULL;
-    }
-    XMEMSET(sk, 0, sizeof(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)));
-
     switch (nid) {
         case BASIC_CA_OID:
             if (x509->basicConstSet) {
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
                     WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                    wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
                 if (c != NULL) {
@@ -6831,60 +9082,81 @@
                 }
                 obj->type = BASIC_CA_OID;
                 obj->grp  = oidCertExtType;
+                obj->nid  = nid;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+            #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT) || \
+                    defined(WOLFSSL_APACHE_HTTPD)
+                obj->ca = x509->isCa;
+            #endif
             }
             else {
                 WOLFSSL_MSG("No Basic Constraint set");
             }
-            break;
+            return obj;
 
         case ALT_NAMES_OID:
-            {
-                DNS_entry* dns = NULL;
-
-                if (x509->subjAltNameSet && x509->altNames != NULL) {
-                    /* alt names are DNS_entry structs */
-                    if (c != NULL) {
-                        if (x509->altNames->next != NULL) {
-                            *c = -2; /* more then one found */
-                        }
-                        else {
-                            *c = x509->subjAltNameCrit;
-                        }
-                    }
-
-                    dns = x509->altNames;
-                    while (dns != NULL) {
-                        obj = wolfSSL_ASN1_OBJECT_new();
-                        if (obj == NULL) {
-                            WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                            wolfSSL_sk_ASN1_OBJECT_free(sk);
-                            return NULL;
+        {
+            DNS_entry* dns = NULL;
+            /* Malloc GENERAL_NAME stack */
+            sk = (WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)*)XMALLOC(
+                        sizeof(WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)), NULL,
+                                                         DYNAMIC_TYPE_ASN1);
+            if (sk == NULL) {
+                return NULL;
+            }
+            XMEMSET(sk, 0, sizeof(WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)));
+            sk->type = STACK_TYPE_GEN_NAME;
+
+            if (x509->subjAltNameSet && x509->altNames != NULL) {
+                /* alt names are DNS_entry structs */
+                if (c != NULL) {
+                    if (x509->altNames->next != NULL) {
+                        *c = -2; /* more then one found */
+                    }
+                    else {
+                        *c = x509->subjAltNameCrit;
+                    }
+                }
+
+                dns = x509->altNames;
+                /* Currently only support GEN_DNS type */
+                while (dns != NULL) {
+                    gn = wolfSSL_GENERAL_NAME_new();
+                    if (gn == NULL) {
+                        WOLFSSL_MSG("Error creating GENERAL_NAME");
+                        wolfSSL_sk_free(sk);
+                        return NULL;
+                    }
+
+                    gn->type = dns->type;
+                    gn->d.ia5->length = dns->len;
+                    if (wolfSSL_ASN1_STRING_set(gn->d.ia5, dns->name,
+                                gn->d.ia5->length) != WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("ASN1_STRING_set failed");
+                        wolfSSL_GENERAL_NAME_free(gn);
+                        wolfSSL_sk_free(sk);
+                        return NULL;
+                    }
+
+                    dns = dns->next;
+                    /* last dns in list add at end of function */
+                    if (dns != NULL) {
+                        if (wolfSSL_sk_GENERAL_NAME_push(sk, gn) !=
+                                                          WOLFSSL_SUCCESS) {
+                            WOLFSSL_MSG("Error pushing ASN1 object onto stack");
+                            wolfSSL_GENERAL_NAME_free(gn);
+                            wolfSSL_sk_free(sk);
+                            sk = NULL;
                         }
-                        obj->type = dns->type;
-                        obj->grp  = oidCertExtType;
-                        obj->obj  = (byte*)dns->name;
-
-                        /* set app derefrenced pointers */
-                        obj->d.ia5_internal.data   = dns->name;
-                        obj->d.ia5_internal.length = (int)XSTRLEN(dns->name);
-                        dns = dns->next;
-                        /* last dns in list add at end of function */
-                        if (dns != NULL) {
-                            if (wolfSSL_sk_ASN1_OBJECT_push(sk, obj) !=
-                                                                  WOLFSSL_SUCCESS) {
-                            WOLFSSL_MSG("Error pushing ASN1 object onto stack");
-                            wolfSSL_ASN1_OBJECT_free(obj);
-                            wolfSSL_sk_ASN1_OBJECT_free(sk);
-                            sk = NULL;
-                            }
-                        }
-                    }
-                }
-                else {
-                    WOLFSSL_MSG("No Alt Names set");
-                }
-            }
-            break;
+                    }
+                }
+            }
+            else {
+                WOLFSSL_MSG("No Alt Names set");
+            }
+
+            break;
+        }
 
         case CRL_DIST_OID:
             if (x509->CRLdistSet && x509->CRLInfo != NULL) {
@@ -6894,13 +9166,14 @@
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
                     WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                    wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
                 obj->type  = CRL_DIST_OID;
                 obj->grp   = oidCertExtType;
                 obj->obj   = x509->CRLInfo;
                 obj->objSz = x509->CRLInfoSz;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA ;
             }
             else {
                 WOLFSSL_MSG("No CRL dist set");
@@ -6915,13 +9188,14 @@
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
                     WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                    wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
                 obj->type  = AUTH_INFO_OID;
                 obj->grp   = oidCertExtType;
                 obj->obj   = x509->authInfo;
                 obj->objSz = x509->authInfoSz;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
             }
             else {
                 WOLFSSL_MSG("No Auth Info set");
@@ -6936,13 +9210,14 @@
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
                     WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                    wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
                 obj->type  = AUTH_KEY_OID;
                 obj->grp   = oidCertExtType;
                 obj->obj   = x509->authKeyId;
                 obj->objSz = x509->authKeyIdSz;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
             }
             else {
                 WOLFSSL_MSG("No Auth Key set");
@@ -6957,13 +9232,14 @@
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
                     WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                    wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
                 obj->type  = SUBJ_KEY_OID;
                 obj->grp   = oidCertExtType;
                 obj->obj   = x509->subjKeyId;
                 obj->objSz = x509->subjKeyIdSz;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
             }
             else {
                 WOLFSSL_MSG("No Subject Key set");
@@ -6971,39 +9247,26 @@
             break;
 
         case CERT_POLICY_OID:
-            #ifdef WOLFSSL_CERT_EXT
-            {
-                int i;
-
-                if (x509->certPoliciesNb > 0) {
-                    if (c != NULL) {
-                        if (x509->certPoliciesNb > 1) {
-                            *c = -2;
-                        }
-                        else {
-                            *c = 0;
-                        }
-                    }
-
-                    for (i = 0; i < x509->certPoliciesNb - 1; i++) {
-                        obj = wolfSSL_ASN1_OBJECT_new();
-                        if (obj == NULL) {
-                            WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                            wolfSSL_sk_ASN1_OBJECT_free(sk);
-                            return NULL;
-                        }
-                        obj->type  = CERT_POLICY_OID;
-                        obj->grp   = oidCertExtType;
-                        obj->obj   = (byte*)(x509->certPolicies[i]);
-                        obj->objSz = MAX_CERTPOL_SZ;
-                        if (wolfSSL_sk_ASN1_OBJECT_push(sk, obj)
-                                                               != WOLFSSL_SUCCESS) {
-                            WOLFSSL_MSG("Error pushing ASN1 object onto stack");
-                            wolfSSL_ASN1_OBJECT_free(obj);
-                            wolfSSL_sk_ASN1_OBJECT_free(sk);
-                            sk = NULL;
-                        }
-                    }
+        {
+        #ifdef WOLFSSL_CERT_EXT
+            int i;
+
+            if (x509->certPoliciesNb > 0) {
+                if (c != NULL) {
+                    if (x509->certPoliciesNb > 1) {
+                        *c = -2;
+                    }
+                    else {
+                        *c = 0;
+                    }
+                }
+
+                sk = wolfSSL_sk_new_asn1_obj();
+                if (sk == NULL) {
+                    return NULL;
+                }
+
+                for (i = 0; i < x509->certPoliciesNb - 1; i++) {
                     obj = wolfSSL_ASN1_OBJECT_new();
                     if (obj == NULL) {
                         WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
@@ -7014,39 +9277,15 @@
                     obj->grp   = oidCertExtType;
                     obj->obj   = (byte*)(x509->certPolicies[i]);
                     obj->objSz = MAX_CERTPOL_SZ;
-                }
-                else {
-                    WOLFSSL_MSG("No Cert Policy set");
-                }
-            }
-            #else
-                #ifdef WOLFSSL_SEP
-                if (x509->certPolicySet) {
-                    if (c != NULL) {
-                        *c = x509->certPolicyCrit;
-                    }
-                    obj = wolfSSL_ASN1_OBJECT_new();
-                    if (obj == NULL) {
-                        WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
+                    obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                    obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
+                    if (wolfSSL_sk_ASN1_OBJECT_push(sk, obj)
+                                                           != WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("Error pushing ASN1 object onto stack");
+                        wolfSSL_ASN1_OBJECT_free(obj);
                         wolfSSL_sk_ASN1_OBJECT_free(sk);
-                        return NULL;
-                    }
-                    obj->type  = CERT_POLICY_OID;
-                    obj->grp   = oidCertExtType;
-                }
-                else {
-                    WOLFSSL_MSG("No Cert Policy set");
-                }
-                #else
-                WOLFSSL_MSG("wolfSSL not built with WOLFSSL_SEP or WOLFSSL_CERT_EXT");
-                #endif /* WOLFSSL_SEP */
-            #endif /* WOLFSSL_CERT_EXT */
-            break;
-
-        case KEY_USAGE_OID:
-            if (x509->keyUsageSet) {
-                if (c != NULL) {
-                    *c = x509->keyUsageCrit;
+                        sk = NULL;
+                    }
                 }
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
@@ -7054,16 +9293,68 @@
                     wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
-                obj->type  = KEY_USAGE_OID;
+                obj->type  = CERT_POLICY_OID;
+                obj->grp   = oidCertExtType;
+                obj->obj   = (byte*)(x509->certPolicies[i]);
+                obj->objSz = MAX_CERTPOL_SZ;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
+            }
+            else {
+                WOLFSSL_MSG("No Cert Policy set");
+            }
+        #elif defined(WOLFSSL_SEP)
+            if (x509->certPolicySet) {
+                if (c != NULL) {
+                    *c = x509->certPolicyCrit;
+                }
+                obj = wolfSSL_ASN1_OBJECT_new();
+                if (obj == NULL) {
+                    WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
+                    return NULL;
+                }
+                obj->type  = CERT_POLICY_OID;
                 obj->grp   = oidCertExtType;
-                obj->obj   = (byte*)&(x509->keyUsage);
-                obj->objSz = sizeof(word16);
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+            }
+            else {
+                WOLFSSL_MSG("No Cert Policy set");
+            }
+        #else
+            WOLFSSL_MSG("wolfSSL not built with WOLFSSL_SEP or WOLFSSL_CERT_EXT");
+        #endif
+            break;
+        }
+        case KEY_USAGE_OID:
+        {
+            WOLFSSL_ASN1_BIT_STRING* bit_str = NULL;
+            if (x509->keyUsageSet) {
+                if (c != NULL) {
+                    *c = x509->keyUsageCrit;
+                }
+
+                bit_str = wolfSSL_ASN1_BIT_STRING_new();
+                if (bit_str == NULL) {
+                    WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_BIT_STRING struct");
+                    return NULL;
+                }
+
+                bit_str->type = KEY_USAGE_OID;
+                bit_str->flags = 0;
+                bit_str->length = sizeof(word16);
+                bit_str->data = (byte*)XMALLOC(bit_str->length, NULL, DYNAMIC_TYPE_OPENSSL);
+                if (bit_str->data == NULL) {
+                    wolfSSL_ASN1_BIT_STRING_free(bit_str);
+                    return NULL;
+                }
+                XMEMCPY(bit_str->data, &x509->keyUsage, bit_str->length);
             }
             else {
                 WOLFSSL_MSG("No Key Usage set");
             }
-            break;
-
+            /* don't add stack of and return bit string directly */
+            return bit_str;
+        }
         case INHIBIT_ANY_OID:
             WOLFSSL_MSG("INHIBIT ANY extension not supported");
             break;
@@ -7081,13 +9372,14 @@
                 obj = wolfSSL_ASN1_OBJECT_new();
                 if (obj == NULL) {
                     WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-                    wolfSSL_sk_ASN1_OBJECT_free(sk);
                     return NULL;
                 }
                 obj->type  = EXT_KEY_USAGE_OID;
                 obj->grp   = oidCertExtType;
                 obj->obj   = x509->extKeyUsageSrc;
                 obj->objSz = x509->extKeyUsageSz;
+                obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+                obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA;
             }
             else {
                 WOLFSSL_MSG("No Extended Key Usage set");
@@ -7126,63 +9418,163 @@
             WOLFSSL_MSG("Unsupported/Unknown extension OID");
     }
 
-    if (obj != NULL) {
-        if (wolfSSL_sk_ASN1_OBJECT_push(sk, obj) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("Error pushing ASN1 object onto stack");
-            wolfSSL_ASN1_OBJECT_free(obj);
-            wolfSSL_sk_ASN1_OBJECT_free(sk);
-            sk = NULL;
-        }
-    }
-    else { /* no ASN1 object found for extension, free stack */
-        wolfSSL_sk_ASN1_OBJECT_free(sk);
-        sk = NULL;
-    }
+    /* make sure stack of is allocated */
+    if ((obj || gn) && sk == NULL) {
+        sk = wolfSSL_sk_new_asn1_obj();
+        if (sk == NULL) {
+            goto err;
+        }
+    }
+    if (obj && wolfSSL_sk_ASN1_OBJECT_push(sk, obj) == WOLFSSL_SUCCESS) {
+        /* obj pushed successfully on stack */
+    }
+    else if (gn && wolfSSL_sk_GENERAL_NAME_push(sk, gn) == WOLFSSL_SUCCESS) {
+        /* gn pushed successfully on stack */
+    }
+    else {
+        /* Nothing to push or push failed */
+        WOLFSSL_MSG("Error pushing ASN1_OBJECT or GENERAL_NAME object onto stack "
+                    "or nothing to push.");
+        goto err;
+    }
+    ret = sk;
 
     (void)idx;
 
-    return sk;
-}
-
-
-/* this function makes the assumption that out buffer is big enough for digest*/
-static int wolfSSL_EVP_Digest(unsigned char* in, int inSz, unsigned char* out,
-                              unsigned int* outSz, const WOLFSSL_EVP_MD* evp,
-                              WOLFSSL_ENGINE* eng)
-{
-    int err;
-    int hashType = WC_HASH_TYPE_NONE;
-    int hashSz;
-
-    (void)eng;
-
-    err = wolfSSL_EVP_get_hashinfo(evp, &hashType, &hashSz);
-    if (err != WOLFSSL_SUCCESS)
-        return err;
-
-    *outSz = hashSz;
-
-    if (wc_Hash((enum wc_HashType)hashType, in, inSz, out, *outSz) != 0) {
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-
+    return ret;
+
+err:
+    if (obj) {
+        wolfSSL_ASN1_OBJECT_free(obj);
+    }
+    if (gn) {
+        wolfSSL_GENERAL_NAME_free(gn);
+    }
+    if (sk) {
+        wolfSSL_sk_ASN1_OBJECT_free(sk);
+    }
+    return NULL;
+}
+
+
+int wolfSSL_X509_add_altname(WOLFSSL_X509* x509, const char* name, int type)
+{
+    DNS_entry* newAltName = NULL;
+    char* nameCopy = NULL;
+    word32 nameSz;
+
+    if (x509 == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (name == NULL)
+        return WOLFSSL_SUCCESS;
+
+    nameSz = (word32)XSTRLEN(name);
+    if (nameSz == 0)
+        return WOLFSSL_SUCCESS;
+
+    newAltName = (DNS_entry*)XMALLOC(sizeof(DNS_entry),
+            x509->heap, DYNAMIC_TYPE_ALTNAME);
+    if (newAltName == NULL)
+        return WOLFSSL_FAILURE;
+
+    nameCopy = (char*)XMALLOC(nameSz + 1, x509->heap, DYNAMIC_TYPE_ALTNAME);
+    if (nameCopy == NULL) {
+        XFREE(newAltName, x509->heap, DYNAMIC_TYPE_ALTNAME);
+        return WOLFSSL_FAILURE;
+    }
+
+    XMEMCPY(nameCopy, name, nameSz + 1);
+
+    newAltName->next = x509->altNames;
+    newAltName->type = type;
+    newAltName->len = nameSz;
+    newAltName->name = nameCopy;
+    x509->altNames = newAltName;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_X509_add_ext(WOLFSSL_X509 *x509, WOLFSSL_X509_EXTENSION *ext, int loc)
+{
+    WOLFSSL_STUB("wolfSSL_X509_add_ext");
+    (void)x509;
+    (void)ext;
+    (void)loc;
+    return WOLFSSL_FAILURE;
+}
+
+/* currently LHASH is not implemented (and not needed for Apache port) */
+WOLFSSL_X509_EXTENSION* wolfSSL_X509V3_EXT_conf_nid(
+        WOLF_LHASH_OF(CONF_VALUE)* conf, WOLFSSL_X509V3_CTX* ctx, int nid,
+        char* value)
+{
+    WOLFSSL_STUB("wolfSSL_X509V3_EXT_conf_nid");
+
+    if (conf != NULL) {
+        WOLFSSL_MSG("Handling LHASH not implemented yet");
+        return NULL;
+    }
+
+    (void)conf;
+    (void)ctx;
+    (void)nid;
+    (void)value;
+    return NULL;
+}
+
+void wolfSSL_X509V3_set_ctx_nodb(WOLFSSL_X509V3_CTX* ctx)
+{
+    WOLFSSL_STUB("wolfSSL_X509V3_set_ctx_nodb");
+    (void)ctx;
+}
+#endif /* !NO_WOLFSSL_STUB */
+
+/* Returns pointer to ASN1_OBJECT from an X509_EXTENSION object */
+WOLFSSL_ASN1_OBJECT* wolfSSL_X509_EXTENSION_get_object \
+    (WOLFSSL_X509_EXTENSION* ext)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_EXTENSION_get_object");
+    if(ext == NULL)
+        return NULL;
+    return ext->obj;
+}
+
+/* Returns pointer to ASN1_STRING in X509_EXTENSION object */
+WOLFSSL_ASN1_STRING* wolfSSL_X509_EXTENSION_get_data(WOLFSSL_X509_EXTENSION* ext)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_EXTENSION_get_data");
+    if (ext == NULL)
+        return NULL;
+    return &ext->value;
+}
+
+#if !defined(NO_PWDBASED)
 int wolfSSL_X509_digest(const WOLFSSL_X509* x509, const WOLFSSL_EVP_MD* digest,
         unsigned char* buf, unsigned int* len)
 {
+    int ret;
+
     WOLFSSL_ENTER("wolfSSL_X509_digest");
 
     if (x509 == NULL || digest == NULL) {
-        return WOLFSSL_FAILURE;
-    }
-
-    return wolfSSL_EVP_Digest(x509->derCert->buffer, x509->derCert->length, buf,
+        WOLFSSL_MSG("Null argument found");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (x509->derCert == NULL) {
+        WOLFSSL_MSG("No DER certificate stored in X509");
+        return WOLFSSL_FAILURE;
+    }
+
+    ret = wolfSSL_EVP_Digest(x509->derCert->buffer, x509->derCert->length, buf,
                               len, digest, NULL);
-}
-
+    WOLFSSL_LEAVE("wolfSSL_X509_digest", ret);
+    return ret;
+}
+#endif
 
 int wolfSSL_use_PrivateKey(WOLFSSL* ssl, WOLFSSL_EVP_PKEY* pkey)
 {
@@ -7196,7 +9588,7 @@
 }
 
 
-int wolfSSL_use_PrivateKey_ASN1(int pri, WOLFSSL* ssl, unsigned char* der,
+int wolfSSL_use_PrivateKey_ASN1(int pri, WOLFSSL* ssl, const unsigned char* der,
                                 long derSz)
 {
     WOLFSSL_ENTER("wolfSSL_use_PrivateKey_ASN1");
@@ -7207,6 +9599,24 @@
     (void)pri; /* type of private key */
     return wolfSSL_use_PrivateKey_buffer(ssl, der, derSz, WOLFSSL_FILETYPE_ASN1);
 }
+/******************************************************************************
+* wolfSSL_CTX_use_PrivateKey_ASN1 - loads a private key buffer into the SSL ctx
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE
+*/
+
+int wolfSSL_CTX_use_PrivateKey_ASN1(int pri, WOLFSSL_CTX* ctx,
+                                            unsigned char* der, long derSz)
+{
+    WOLFSSL_ENTER("wolfSSL_CTX_use_PrivateKey_ASN1");
+    if (ctx == NULL || der == NULL ) {
+        return WOLFSSL_FAILURE;
+    }
+
+    (void)pri; /* type of private key */
+    return wolfSSL_CTX_use_PrivateKey_buffer(ctx, der, derSz, WOLFSSL_FILETYPE_ASN1);
+}
 
 
 #ifndef NO_RSA
@@ -7221,22 +9631,6 @@
 }
 #endif
 
-int wolfSSL_use_certificate_ASN1(WOLFSSL* ssl, unsigned char* der, int derSz)
-{
-    long idx;
-
-    WOLFSSL_ENTER("wolfSSL_use_certificate_ASN1");
-    if (der != NULL && ssl != NULL) {
-        if (ProcessBuffer(NULL, der, derSz, WOLFSSL_FILETYPE_ASN1, CERT_TYPE, ssl,
-                                                        &idx, 0) == WOLFSSL_SUCCESS)
-            return WOLFSSL_SUCCESS;
-    }
-
-    (void)idx;
-    return WOLFSSL_FAILURE;
-}
-
-
 int wolfSSL_use_certificate(WOLFSSL* ssl, WOLFSSL_X509* x509)
 {
     long idx;
@@ -7244,46 +9638,90 @@
     WOLFSSL_ENTER("wolfSSL_use_certificate");
     if (x509 != NULL && ssl != NULL && x509->derCert != NULL) {
         if (ProcessBuffer(NULL, x509->derCert->buffer, x509->derCert->length,
-                     WOLFSSL_FILETYPE_ASN1, CERT_TYPE, ssl, &idx, 0) == WOLFSSL_SUCCESS)
+                          WOLFSSL_FILETYPE_ASN1, CERT_TYPE, ssl, &idx, 0,
+                          GET_VERIFY_SETTING_SSL(ssl)) == WOLFSSL_SUCCESS) {
             return WOLFSSL_SUCCESS;
+        }
     }
 
     (void)idx;
     return WOLFSSL_FAILURE;
 }
+
 #endif /* NO_CERTS */
 
+#endif /* OPENSSL_EXTRA */
+
+#ifndef NO_CERTS
+int wolfSSL_use_certificate_ASN1(WOLFSSL* ssl, const unsigned char* der,
+                                 int derSz)
+{
+    long idx;
+
+    WOLFSSL_ENTER("wolfSSL_use_certificate_ASN1");
+    if (der != NULL && ssl != NULL) {
+        if (ProcessBuffer(NULL, der, derSz, WOLFSSL_FILETYPE_ASN1, CERT_TYPE,
+                ssl, &idx, 0, GET_VERIFY_SETTING_SSL(ssl)) == WOLFSSL_SUCCESS) {
+            return WOLFSSL_SUCCESS;
+        }
+    }
+
+    (void)idx;
+    return WOLFSSL_FAILURE;
+}
+
 #ifndef NO_FILESYSTEM
 
+WOLFSSL_ABI
 int wolfSSL_use_certificate_file(WOLFSSL* ssl, const char* file, int format)
 {
     WOLFSSL_ENTER("wolfSSL_use_certificate_file");
+
+    if (ssl == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
     if (ProcessFile(ssl->ctx, file, format, CERT_TYPE,
-                    ssl, 0, NULL) == WOLFSSL_SUCCESS)
-        return WOLFSSL_SUCCESS;
-
-    return WOLFSSL_FAILURE;
-}
-
-
+                ssl, 0, NULL, GET_VERIFY_SETTING_SSL(ssl)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
+
+WOLFSSL_ABI
 int wolfSSL_use_PrivateKey_file(WOLFSSL* ssl, const char* file, int format)
 {
     WOLFSSL_ENTER("wolfSSL_use_PrivateKey_file");
+
+    if (ssl == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
     if (ProcessFile(ssl->ctx, file, format, PRIVATEKEY_TYPE,
-                    ssl, 0, NULL) == WOLFSSL_SUCCESS)
-        return WOLFSSL_SUCCESS;
-
-    return WOLFSSL_FAILURE;
-}
-
-
+                ssl, 0, NULL, GET_VERIFY_SETTING_SSL(ssl)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
+
+WOLFSSL_ABI
 int wolfSSL_use_certificate_chain_file(WOLFSSL* ssl, const char* file)
 {
-   /* process up to MAX_CHAIN_DEPTH plus subject cert */
-   WOLFSSL_ENTER("wolfSSL_use_certificate_chain_file");
-   if (ProcessFile(ssl->ctx, file, WOLFSSL_FILETYPE_PEM, CERT_TYPE,
-                   ssl, 1, NULL) == WOLFSSL_SUCCESS)
-       return WOLFSSL_SUCCESS;
+    /* process up to MAX_CHAIN_DEPTH plus subject cert */
+    WOLFSSL_ENTER("wolfSSL_use_certificate_chain_file");
+
+    if (ssl == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (ProcessFile(ssl->ctx, file, WOLFSSL_FILETYPE_PEM, CERT_TYPE,
+               ssl, 1, NULL, GET_VERIFY_SETTING_SSL(ssl)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
 
    return WOLFSSL_FAILURE;
 }
@@ -7291,22 +9729,46 @@
 int wolfSSL_use_certificate_chain_file_format(WOLFSSL* ssl, const char* file,
                                               int format)
 {
-   /* process up to MAX_CHAIN_DEPTH plus subject cert */
-   WOLFSSL_ENTER("wolfSSL_use_certificate_chain_file_format");
-   if (ProcessFile(ssl->ctx, file, format, CERT_TYPE, ssl, 1,
-                   NULL) == WOLFSSL_SUCCESS)
-       return WOLFSSL_SUCCESS;
-
-   return WOLFSSL_FAILURE;
-}
-
+    /* process up to MAX_CHAIN_DEPTH plus subject cert */
+    WOLFSSL_ENTER("wolfSSL_use_certificate_chain_file_format");
+
+    if (ssl == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (ProcessFile(ssl->ctx, file, format, CERT_TYPE, ssl, 1,
+                    NULL, GET_VERIFY_SETTING_SSL(ssl)) == WOLFSSL_SUCCESS) {
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+#endif /* !NO_FILESYSTEM */
+#endif /* !NO_CERTS */
 
 #ifdef HAVE_ECC
 
 /* Set Temp CTX EC-DHE size in octets, should be 20 - 66 for 160 - 521 bit */
 int wolfSSL_CTX_SetTmpEC_DHE_Sz(WOLFSSL_CTX* ctx, word16 sz)
 {
-    if (ctx == NULL || sz < ECC_MINSIZE || sz > ECC_MAXSIZE)
+    if (ctx == NULL)
+        return BAD_FUNC_ARG;
+
+    if (sz == 0) {
+        /* applies only to ECDSA */
+        if (ctx->privateKeyType != ecc_dsa_sa_algo)
+            return WOLFSSL_SUCCESS;
+
+        if (ctx->privateKeySz == 0) {
+            WOLFSSL_MSG("Must set private key/cert first");
+            return BAD_FUNC_ARG;
+        }
+
+        sz = (word16)ctx->privateKeySz;
+    }
+
+    /* check size */
+    if (sz < ECC_MINSIZE || sz > ECC_MAXSIZE)
         return BAD_FUNC_ARG;
 
     ctx->eccTempKeySz = sz;
@@ -7329,8 +9791,9 @@
 #endif /* HAVE_ECC */
 
 
-
-
+#ifdef OPENSSL_EXTRA
+
+#ifndef NO_FILESYSTEM
 int wolfSSL_CTX_use_RSAPrivateKey_file(WOLFSSL_CTX* ctx,const char* file,
                                    int format)
 {
@@ -7346,9 +9809,9 @@
 
     return wolfSSL_use_PrivateKey_file(ssl, file, format);
 }
-
 #endif /* NO_FILESYSTEM */
 
+
 /* Copies the master secret over to out buffer. If outSz is 0 returns the size
  * of master secret.
  *
@@ -7397,11 +9860,12 @@
 int wolfSSL_CTX_use_NTRUPrivateKey_file(WOLFSSL_CTX* ctx, const char* file)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_use_NTRUPrivateKey_file");
+
     if (ctx == NULL)
         return WOLFSSL_FAILURE;
 
-    if (ProcessFile(ctx, file, WOLFSSL_FILETYPE_RAW, PRIVATEKEY_TYPE, NULL, 0, NULL)
-                         == WOLFSSL_SUCCESS) {
+    if (ProcessFile(ctx, file, WOLFSSL_FILETYPE_RAW, PRIVATEKEY_TYPE, NULL, 0,
+                    NULL, GET_VERIFY_SETTING_CTX(ctx)) == WOLFSSL_SUCCESS) {
         ctx->haveNTRU = 1;
         return WOLFSSL_SUCCESS;
     }
@@ -7418,6 +9882,9 @@
 void wolfSSL_CTX_set_verify(WOLFSSL_CTX* ctx, int mode, VerifyCallback vc)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_set_verify");
+    if (ctx == NULL)
+        return;
+
     if (mode & WOLFSSL_VERIFY_PEER) {
         ctx->verifyPeer = 1;
         ctx->verifyNone = 0;  /* in case previously set */
@@ -7428,8 +9895,9 @@
         ctx->verifyPeer = 0;  /* in case previously set */
     }
 
-    if (mode & WOLFSSL_VERIFY_FAIL_IF_NO_PEER_CERT)
+    if (mode & WOLFSSL_VERIFY_FAIL_IF_NO_PEER_CERT) {
         ctx->failNoCert = 1;
+    }
 
     if (mode & WOLFSSL_VERIFY_FAIL_EXCEPT_PSK) {
         ctx->failNoCert    = 0; /* fail on all is set to fail on PSK */
@@ -7439,10 +9907,26 @@
     ctx->verifyCallback = vc;
 }
 
+#ifdef OPENSSL_ALL
+void wolfSSL_CTX_set_cert_verify_callback(WOLFSSL_CTX* ctx,
+    CertVerifyCallback cb, void* arg)
+{
+    WOLFSSL_ENTER("SSL_CTX_set_cert_verify_callback");
+    if (ctx == NULL)
+        return;
+
+    ctx->verifyCertCb = cb;
+    ctx->verifyCertCbArg = arg;
+}
+#endif
+
 
 void wolfSSL_set_verify(WOLFSSL* ssl, int mode, VerifyCallback vc)
 {
     WOLFSSL_ENTER("wolfSSL_set_verify");
+    if (ssl == NULL)
+        return;
+
     if (mode & WOLFSSL_VERIFY_PEER) {
         ssl->options.verifyPeer = 1;
         ssl->options.verifyNone = 0;  /* in case previously set */
@@ -7464,6 +9948,20 @@
     ssl->verifyCallback = vc;
 }
 
+void wolfSSL_set_verify_result(WOLFSSL *ssl, long v)
+{
+    WOLFSSL_ENTER("wolfSSL_set_verify_result");
+
+    if (ssl == NULL)
+        return;
+
+#ifdef OPENSSL_ALL
+    ssl->verifyCallbackResult = v;
+#else
+    (void)v;
+    WOLFSSL_STUB("wolfSSL_set_verify_result");
+#endif
+}
 
 /* store user ctx for verify callback */
 void wolfSSL_SetCertCbCtx(WOLFSSL* ssl, void* ctx)
@@ -7553,16 +10051,18 @@
 
 #ifndef NO_SESSION_CACHE
 
+WOLFSSL_ABI
 WOLFSSL_SESSION* wolfSSL_get_session(WOLFSSL* ssl)
 {
     WOLFSSL_ENTER("SSL_get_session");
     if (ssl)
-        return GetSession(ssl, 0, 0);
-
-    return NULL;
-}
-
-
+        return GetSession(ssl, 0, 1);
+
+    return NULL;
+}
+
+
+WOLFSSL_ABI
 int wolfSSL_set_session(WOLFSSL* ssl, WOLFSSL_SESSION* session)
 {
     WOLFSSL_ENTER("SSL_set_session");
@@ -7931,6 +10431,7 @@
 #ifndef NO_SESSION_CACHE
 
 /* on by default if built in but allow user to turn off */
+WOLFSSL_ABI
 long wolfSSL_CTX_set_session_cache_mode(WOLFSSL_CTX* ctx, long mode)
 {
     WOLFSSL_ENTER("SSL_CTX_set_session_cache_mode");
@@ -8054,6 +10555,7 @@
 
     while (listSz) {
         Signer* signer;
+        byte*   publicKey;
         byte*   start = current + idx;  /* for end checks on this signer */
         int     minSz = sizeof(signer->pubKeySize) + sizeof(signer->keyOID) +
                       sizeof(signer->nameLen) + sizeof(signer->subjectNameHash);
@@ -8083,14 +10585,15 @@
             FreeSigner(signer, cm->heap);
             return BUFFER_E;
         }
-        signer->publicKey = (byte*)XMALLOC(signer->pubKeySize, cm->heap,
-                                           DYNAMIC_TYPE_KEY);
-        if (signer->publicKey == NULL) {
+        publicKey = (byte*)XMALLOC(signer->pubKeySize, cm->heap,
+                                   DYNAMIC_TYPE_KEY);
+        if (publicKey == NULL) {
             FreeSigner(signer, cm->heap);
             return MEMORY_E;
         }
 
-        XMEMCPY(signer->publicKey, current + idx, signer->pubKeySize);
+        XMEMCPY(publicKey, current + idx, signer->pubKeySize);
+        signer->publicKey = publicKey;
         idx += signer->pubKeySize;
 
         /* nameLen */
@@ -8270,12 +10773,15 @@
        return WOLFSSL_BAD_FILE;
     }
 
-    XFSEEK(file, 0, XSEEK_END);
+    if(XFSEEK(file, 0, XSEEK_END) != 0) {
+        XFCLOSE(file);
+        return WOLFSSL_BAD_FILE;
+    }
     memSz = (int)XFTELL(file);
     XREWIND(file);
 
-    if (memSz <= 0) {
-        WOLFSSL_MSG("Bad file size");
+    if (memSz > MAX_WOLFSSL_FILE_SIZE || memSz <= 0) {
+        WOLFSSL_MSG("CM_RestoreCertCache file size error");
         XFCLOSE(file);
         return WOLFSSL_BAD_FILE;
     }
@@ -8398,11 +10904,125 @@
 #endif /* PERSIST_CERT_CACHE */
 #endif /* NO_CERTS */
 
+#ifdef OPENSSL_EXTRA
+
+
+/* removes all cipher suites from the list that contain "toRemove"
+ * returns the new list size on success
+ */
+static int wolfSSL_remove_ciphers(char* list, int sz, const char* toRemove)
+{
+    int idx = 0;
+    char* next = (char*)list;
+    int totalSz = sz;
+
+    if (list == NULL) {
+        return 0;
+    }
+
+    do {
+        char*  current = next;
+        char   name[MAX_SUITE_NAME + 1];
+        word32 length;
+
+        next   = XSTRSTR(next, ":");
+        length = min(sizeof(name), !next ? (word32)XSTRLEN(current) /* last */
+                                         : (word32)(next - current));
+
+        XSTRNCPY(name, current, length);
+        name[(length == sizeof(name)) ? length - 1 : length] = 0;
+
+        if (XSTRSTR(name, toRemove)) {
+            XMEMMOVE(list + idx, list + idx + length, totalSz - (idx + length));
+            totalSz -= length;
+            list[totalSz] = '\0';
+            next = current;
+        }
+        else {
+            idx += length;
+        }
+    } while (next++); /* ++ needed to skip ':' */
+
+    return totalSz;
+}
+
+/* parse some bulk lists like !eNULL / !aNULL
+ *
+ * returns WOLFSSL_SUCCESS on success and sets the cipher suite list
+ */
+static int wolfSSL_parse_cipher_list(WOLFSSL_CTX* ctx, Suites* suites,
+        const char* list)
+{
+    int       ret          = 0;
+    const int suiteSz      = GetCipherNamesSize();
+    char*     next         = (char*)list;
+    const CipherSuiteInfo* names = GetCipherNames();
+    char*     localList    = NULL;
+    int sz = 0;
+
+    if (suites == NULL || list == NULL) {
+        WOLFSSL_MSG("NULL argument");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* does list contain eNULL or aNULL? */
+    if (XSTRSTR(list, "aNULL") || XSTRSTR(list, "eNULL")) {
+        do {
+            char*  current = next;
+            char   name[MAX_SUITE_NAME + 1];
+            int    i;
+            word32 length;
+
+            next   = XSTRSTR(next, ":");
+            length = min(sizeof(name), !next ? (word32)XSTRLEN(current) /*last*/
+                                             : (word32)(next - current));
+
+            XSTRNCPY(name, current, length);
+            name[(length == sizeof(name)) ? length - 1 : length] = 0;
+
+            /* check for "not" case */
+            if (name[0] == '!' && suiteSz > 0) {
+                /* populate list with all suites if not already created */
+                if (localList == NULL) {
+                    for (i = 0; i < suiteSz; i++) {
+                        sz += (int)XSTRLEN(names[i].name) + 2;
+                    }
+                    localList = (char*)XMALLOC(sz, ctx->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                    if (localList == NULL) {
+                        return WOLFSSL_FAILURE;
+                    }
+                    wolfSSL_get_ciphers(localList, sz);
+                    sz = (int)XSTRLEN(localList);
+                }
+
+                if (XSTRSTR(name, "eNULL")) {
+                    wolfSSL_remove_ciphers(localList, sz, "-NULL");
+                }
+            }
+        }
+        while (next++); /* ++ needed to skip ':' */
+
+        ret = SetCipherList(ctx, suites, localList);
+        XFREE(localList, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return (ret)? WOLFSSL_SUCCESS : WOLFSSL_FAILURE;
+    }
+    else {
+        return (SetCipherList(ctx, suites, list)) ? WOLFSSL_SUCCESS :
+            WOLFSSL_FAILURE;
+    }
+}
+
+#endif
+
 
 int wolfSSL_CTX_set_cipher_list(WOLFSSL_CTX* ctx, const char* list)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_set_cipher_list");
 
+    if (ctx == NULL)
+        return WOLFSSL_FAILURE;
+
     /* alloc/init on demand only */
     if (ctx->suites == NULL) {
         ctx->suites = (Suites*)XMALLOC(sizeof(Suites), ctx->heap,
@@ -8414,14 +11034,34 @@
         XMEMSET(ctx->suites, 0, sizeof(Suites));
     }
 
+#ifdef OPENSSL_EXTRA
+    return wolfSSL_parse_cipher_list(ctx, ctx->suites, list);
+#else
     return (SetCipherList(ctx, ctx->suites, list)) ? WOLFSSL_SUCCESS : WOLFSSL_FAILURE;
+#endif
 }
 
 
 int wolfSSL_set_cipher_list(WOLFSSL* ssl, const char* list)
 {
     WOLFSSL_ENTER("wolfSSL_set_cipher_list");
+#ifdef SINGLE_THREADED
+    if (ssl->ctx->suites == ssl->suites) {
+        ssl->suites = (Suites*)XMALLOC(sizeof(Suites), ssl->heap,
+                                       DYNAMIC_TYPE_SUITES);
+        if (ssl->suites == NULL) {
+            WOLFSSL_MSG("Suites Memory error");
+            return MEMORY_E;
+        }
+        ssl->options.ownSuites = 1;
+    }
+#endif
+
+#ifdef OPENSSL_EXTRA
+    return wolfSSL_parse_cipher_list(ssl->ctx, ssl->suites, list);
+#else
     return (SetCipherList(ssl->ctx, ssl->suites, list)) ? WOLFSSL_SUCCESS : WOLFSSL_FAILURE;
+#endif
 }
 
 
@@ -8429,6 +11069,9 @@
 {
     int useNb = 0;
 
+    if (ssl == NULL)
+        return WOLFSSL_FAILURE;
+
     WOLFSSL_ENTER("wolfSSL_dtls_get_using_nonblock");
     if (ssl->options.dtls) {
 #ifdef WOLFSSL_DTLS
@@ -8450,6 +11093,10 @@
     (void)nonblock;
 
     WOLFSSL_ENTER("wolfSSL_dtls_set_using_nonblock");
+
+    if (ssl == NULL)
+        return;
+
     if (ssl->options.dtls) {
 #ifdef WOLFSSL_DTLS
         ssl->options.dtlsUseNonblock = (nonblock != 0);
@@ -8466,9 +11113,40 @@
 
 int wolfSSL_dtls_get_current_timeout(WOLFSSL* ssl)
 {
-    return ssl->dtls_timeout;
-}
-
+    int timeout = 0;
+    if (ssl)
+        timeout = ssl->dtls_timeout;
+
+    WOLFSSL_LEAVE("wolfSSL_dtls_get_current_timeout()", timeout);
+    return timeout;
+}
+
+int wolfSSL_DTLSv1_get_timeout(WOLFSSL* ssl, WOLFSSL_TIMEVAL* timeleft)
+{
+    if (ssl && timeleft) {
+        XMEMSET(timeleft, 0, sizeof(WOLFSSL_TIMEVAL));
+        timeleft->tv_sec = ssl->dtls_timeout;
+    }
+    return 0;
+}
+
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_DTLSv1_handle_timeout(WOLFSSL* ssl)
+{
+    WOLFSSL_STUB("SSL_DTLSv1_handle_timeout");
+    (void)ssl;
+    return 0;
+}
+#endif
+
+#ifndef NO_WOLFSSL_STUB
+void wolfSSL_DTLSv1_set_initial_timeout_duration(WOLFSSL* ssl, word32 duration_ms)
+{
+    WOLFSSL_STUB("SSL_DTLSv1_set_initial_timeout_duration");
+    (void)ssl;
+    (void)duration_ms;
+}
+#endif
 
 /* user may need to alter init dtls recv timeout, WOLFSSL_SUCCESS on ok */
 int wolfSSL_dtls_set_timeout_init(WOLFSSL* ssl, int timeout)
@@ -8508,15 +11186,42 @@
 int wolfSSL_dtls_got_timeout(WOLFSSL* ssl)
 {
     int result = WOLFSSL_SUCCESS;
+    WOLFSSL_ENTER("wolfSSL_dtls_got_timeout()");
+
+    if (ssl == NULL)
+        return WOLFSSL_FATAL_ERROR;
 
     if (!ssl->options.handShakeDone &&
         (DtlsMsgPoolTimeout(ssl) < 0 || DtlsMsgPoolSend(ssl, 0) < 0)) {
 
         result = WOLFSSL_FATAL_ERROR;
     }
+
+    WOLFSSL_LEAVE("wolfSSL_dtls_got_timeout()", result);
     return result;
 }
 
+
+/* retransmit all the saves messages, WOLFSSL_SUCCESS on ok */
+int wolfSSL_dtls_retransmit(WOLFSSL* ssl)
+{
+    WOLFSSL_ENTER("wolfSSL_dtls_retransmit()");
+
+    if (ssl == NULL)
+        return WOLFSSL_FATAL_ERROR;
+
+    if (!ssl->options.handShakeDone) {
+        int result = DtlsMsgPoolSend(ssl, 0);
+        if (result < 0) {
+            ssl->error = result;
+            WOLFSSL_ERROR(result);
+            return WOLFSSL_FATAL_ERROR;
+        }
+    }
+
+    return 0;
+}
+
 #endif /* DTLS */
 #endif /* LEANPSK */
 
@@ -8581,92 +11286,117 @@
 
 #endif /* WOLFSSL_DTLS && !NO_WOLFSSL_SERVER */
 
-#ifdef OPENSSL_EXTRA
-    WOLFSSL_METHOD* wolfSSLv23_method(void) {
+
+/* EITHER SIDE METHODS */
+#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EITHER_SIDE)
+    WOLFSSL_METHOD* wolfSSLv23_method(void)
+    {
+        return wolfSSLv23_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfSSLv23_method_ex(void* heap)
+    {
         WOLFSSL_METHOD* m = NULL;
-        WOLFSSL_ENTER("wolfSSLv23_method");
-#if !defined(NO_WOLFSSL_CLIENT)
-        m = wolfSSLv23_client_method();
-#elif !defined(NO_WOLFSSL_SERVER)
-        m = wolfSSLv23_server_method();
-#endif
+        WOLFSSL_ENTER("SSLv23_method");
+    #if !defined(NO_WOLFSSL_CLIENT)
+        m = wolfSSLv23_client_method_ex(heap);
+    #elif !defined(NO_WOLFSSL_SERVER)
+        m = wolfSSLv23_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+
+        return m;
+    }
+
+    #ifdef WOLFSSL_ALLOW_SSLV3
+    WOLFSSL_METHOD* wolfSSLv3_method(void)
+    {
+        return wolfSSLv3_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfSSLv3_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m = NULL;
+        WOLFSSL_ENTER("SSLv3_method");
+    #if !defined(NO_WOLFSSL_CLIENT)
+        m = wolfSSLv3_client_method_ex(heap);
+    #elif !defined(NO_WOLFSSL_SERVER)
+        m = wolfSSLv3_server_method_ex(heap);
+    #endif
         if (m != NULL) {
             m->side = WOLFSSL_NEITHER_END;
         }
 
         return m;
     }
-#endif /* OPENSSL_EXTRA */
+    #endif
+#endif /* OPENSSL_EXTRA || WOLFSSL_EITHER_SIDE */
 
 /* client only parts */
 #ifndef NO_WOLFSSL_CLIENT
 
-    #if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
+    #ifdef OPENSSL_EXTRA
+    WOLFSSL_METHOD* wolfSSLv2_client_method(void)
+    {
+        WOLFSSL_STUB("wolfSSLv2_client_method");
+        return NULL;
+    }
+    #endif
+
+    #ifdef WOLFSSL_ALLOW_SSLV3
     WOLFSSL_METHOD* wolfSSLv3_client_method(void)
     {
-        WOLFSSL_ENTER("SSLv3_client_method");
         return wolfSSLv3_client_method_ex(NULL);
     }
-    #endif
-
-    #ifdef WOLFSSL_DTLS
-
-        #ifndef NO_OLD_TLS
-        WOLFSSL_METHOD* wolfDTLSv1_client_method(void)
-        {
-            WOLFSSL_ENTER("DTLSv1_client_method");
-            return wolfDTLSv1_client_method_ex(NULL);
-        }
-        #endif  /* NO_OLD_TLS */
-
-        WOLFSSL_METHOD* wolfDTLSv1_2_client_method(void)
-        {
-            WOLFSSL_ENTER("DTLSv1_2_client_method");
-            return wolfDTLSv1_2_client_method_ex(NULL);
-        }
-    #endif
-
-    #if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
     WOLFSSL_METHOD* wolfSSLv3_client_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
         WOLFSSL_ENTER("SSLv3_client_method_ex");
         if (method)
             InitSSL_Method(method, MakeSSLv3());
         return method;
     }
-    #endif
-
-    #ifdef WOLFSSL_DTLS
-
-        #ifndef NO_OLD_TLS
-        WOLFSSL_METHOD* wolfDTLSv1_client_method_ex(void* heap)
-        {
-            WOLFSSL_METHOD* method =
+    #endif /* WOLFSSL_ALLOW_SSLV3 */
+
+
+    WOLFSSL_METHOD* wolfSSLv23_client_method(void)
+    {
+        return wolfSSLv23_client_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfSSLv23_client_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
-            WOLFSSL_ENTER("DTLSv1_client_method_ex");
-            if (method)
-                InitSSL_Method(method, MakeDTLSv1());
-            return method;
-        }
-        #endif  /* NO_OLD_TLS */
-
-        WOLFSSL_METHOD* wolfDTLSv1_2_client_method_ex(void* heap)
-        {
-            WOLFSSL_METHOD* method =
-                              (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
-                                                     heap, DYNAMIC_TYPE_METHOD);
-            WOLFSSL_ENTER("DTLSv1_2_client_method_ex");
-            if (method)
-                InitSSL_Method(method, MakeDTLSv1_2());
-            (void)heap;
-            return method;
-        }
-    #endif
-
+        (void)heap;
+        WOLFSSL_ENTER("SSLv23_client_method_ex");
+        if (method) {
+    #if !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)
+        #if defined(WOLFSSL_TLS13)
+            InitSSL_Method(method, MakeTLSv1_3());
+        #elif !defined(WOLFSSL_NO_TLS12)
+            InitSSL_Method(method, MakeTLSv1_2());
+        #elif !defined(NO_OLD_TLS)
+            InitSSL_Method(method, MakeTLSv1_1());
+        #endif
+    #else
+        #ifndef NO_OLD_TLS
+            InitSSL_Method(method, MakeTLSv1_1());
+        #endif
+    #endif
+    #if !defined(NO_OLD_TLS) || defined(WOLFSSL_TLS13)
+            method->downgrade = 1;
+    #endif
+        }
+        return method;
+    }
+
+
+    #if defined(WOLFSSL_DTLS) || !defined(WOLFSSL_NO_TLS12) || !defined(NO_OLD_TLS) || \
+        defined(WOLFSSL_ALLOW_SSLV3)
     /* If SCTP is not enabled returns the state of the dtls option.
      * If SCTP is enabled returns dtls && !sctp. */
     static WC_INLINE int IsDtlsNotSctpMode(WOLFSSL* ssl)
@@ -8681,12 +11411,14 @@
 
         return result;
     }
+    #endif /* WOLFSSL_DTLS || !WOLFSSL_NO_TLS12 || !NO_OLD_TLS */
 
 
     /* please see note at top of README if you get an error from connect */
+    WOLFSSL_ABI
     int wolfSSL_connect(WOLFSSL* ssl)
     {
-    #ifndef WOLFSSL_NO_TLS12
+    #if !(defined(WOLFSSL_NO_TLS12) && defined(NO_OLD_TLS) && defined(WOLFSSL_TLS13))
         int neededState;
     #endif
 
@@ -8699,34 +11431,52 @@
         if (ssl == NULL)
             return BAD_FUNC_ARG;
 
-        #ifdef OPENSSL_EXTRA
-            if (ssl->CBIS != NULL) {
-                ssl->CBIS(ssl, SSL_ST_CONNECT, SSL_SUCCESS);
-                ssl->cbmode = SSL_CB_WRITE;
-            }
-        #endif
-        if (ssl->options.side != WOLFSSL_CLIENT_END) {
-            WOLFSSL_ERROR(ssl->error = SIDE_ERROR);
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-    #ifdef WOLFSSL_NO_TLS12
+    #if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EITHER_SIDE)
+        if (ssl->options.side == WOLFSSL_NEITHER_END) {
+            ssl->error = InitSSL_Side(ssl, WOLFSSL_CLIENT_END);
+            if (ssl->error != WOLFSSL_SUCCESS) {
+                WOLFSSL_ERROR(ssl->error);
+                return WOLFSSL_FATAL_ERROR;
+            }
+            ssl->error = 0; /* expected to be zero here */
+        }
+
+    #ifdef OPENSSL_EXTRA
+        if (ssl->CBIS != NULL) {
+            ssl->CBIS(ssl, SSL_ST_CONNECT, SSL_SUCCESS);
+            ssl->cbmode = SSL_CB_WRITE;
+        }
+    #endif
+    #endif /* OPENSSL_EXTRA || WOLFSSL_EITHER_SIDE */
+
+    #if defined(WOLFSSL_NO_TLS12) && defined(NO_OLD_TLS) && defined(WOLFSSL_TLS13)
         return wolfSSL_connect_TLSv13(ssl);
     #else
         #ifdef WOLFSSL_TLS13
-            if (ssl->options.tls1_3)
-                return wolfSSL_connect_TLSv13(ssl);
-        #endif
+        if (ssl->options.tls1_3)
+            return wolfSSL_connect_TLSv13(ssl);
+        #endif
+
+        if (ssl->options.side != WOLFSSL_CLIENT_END) {
+            WOLFSSL_ERROR(ssl->error = SIDE_ERROR);
+            return WOLFSSL_FATAL_ERROR;
+        }
 
         #ifdef WOLFSSL_DTLS
-            if (ssl->version.major == DTLS_MAJOR) {
-                ssl->options.dtls   = 1;
-                ssl->options.tls    = 1;
-                ssl->options.tls1_1 = 1;
-            }
-        #endif
-
-        if (ssl->buffers.outputBuffer.length > 0) {
+        if (ssl->version.major == DTLS_MAJOR) {
+            ssl->options.dtls   = 1;
+            ssl->options.tls    = 1;
+            ssl->options.tls1_1 = 1;
+        }
+        #endif
+
+        if (ssl->buffers.outputBuffer.length > 0
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            /* do not send buffered or advance state if last error was an 
+                async pending operation */
+            && ssl->error != WC_PENDING_E
+        #endif
+        ) {
             if ( (ssl->error = SendBuffered(ssl)) == 0) {
                 /* fragOffset is non-zero when sending fragments. On the last
                  * fragment, fragOffset is zero again, and the state can be
@@ -8747,11 +11497,6 @@
             }
         }
 
-#ifdef WOLFSSL_TLS13
-        if (ssl->options.tls1_3)
-            return wolfSSL_connect_TLSv13(ssl);
-#endif
-
         switch (ssl->options.connectState) {
 
         case CONNECT_BEGIN :
@@ -8808,6 +11553,8 @@
         #endif
 
             #ifdef WOLFSSL_DTLS
+            if (ssl->options.serverState ==
+                    SERVER_HELLOVERIFYREQUEST_COMPLETE) {
                 if (IsDtlsNotSctpMode(ssl)) {
                     /* re-init hashes, exclude first hello and verify request */
                     if ((ssl->error = InitHandshakeHashes(ssl)) != 0) {
@@ -8819,6 +11566,7 @@
                         return WOLFSSL_FATAL_ERROR;
                     }
                 }
+            }
             #endif
 
             ssl->options.connectState = HELLO_AGAIN_REPLY;
@@ -8971,38 +11719,25 @@
 /* server only parts */
 #ifndef NO_WOLFSSL_SERVER
 
-    #if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
+    #ifdef OPENSSL_EXTRA
+    WOLFSSL_METHOD* wolfSSLv2_server_method(void)
+    {
+        WOLFSSL_STUB("wolfSSLv2_server_method");
+        return 0;
+    }
+    #endif
+
+    #ifdef WOLFSSL_ALLOW_SSLV3
     WOLFSSL_METHOD* wolfSSLv3_server_method(void)
     {
-        WOLFSSL_ENTER("SSLv3_server_method");
         return wolfSSLv3_server_method_ex(NULL);
     }
-    #endif
-
-
-    #ifdef WOLFSSL_DTLS
-
-        #ifndef NO_OLD_TLS
-        WOLFSSL_METHOD* wolfDTLSv1_server_method(void)
-        {
-            WOLFSSL_ENTER("DTLSv1_server_method");
-            return wolfDTLSv1_server_method_ex(NULL);
-        }
-        #endif /* NO_OLD_TLS */
-
-        WOLFSSL_METHOD* wolfDTLSv1_2_server_method(void)
-        {
-            WOLFSSL_ENTER("DTLSv1_2_server_method");
-            return wolfDTLSv1_2_server_method_ex(NULL);
-        }
-    #endif
-
-    #if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
     WOLFSSL_METHOD* wolfSSLv3_server_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
         WOLFSSL_ENTER("SSLv3_server_method_ex");
         if (method) {
             InitSSL_Method(method, MakeSSLv3());
@@ -9010,51 +11745,69 @@
         }
         return method;
     }
-    #endif
-
-
-    #ifdef WOLFSSL_DTLS
-
-        #ifndef NO_OLD_TLS
-        WOLFSSL_METHOD* wolfDTLSv1_server_method_ex(void* heap)
-        {
-            WOLFSSL_METHOD* method =
+    #endif /* WOLFSSL_ALLOW_SSLV3 */
+
+    WOLFSSL_METHOD* wolfSSLv23_server_method(void)
+    {
+        return wolfSSLv23_server_method_ex(NULL);
+    }
+
+    WOLFSSL_METHOD* wolfSSLv23_server_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
-            WOLFSSL_ENTER("DTLSv1_server_method_ex");
-            if (method) {
-                InitSSL_Method(method, MakeDTLSv1());
-                method->side = WOLFSSL_SERVER_END;
-            }
-            return method;
-        }
-        #endif /* NO_OLD_TLS */
-
-        WOLFSSL_METHOD* wolfDTLSv1_2_server_method_ex(void* heap)
-        {
-            WOLFSSL_METHOD* method =
-                              (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
-                                                     heap, DYNAMIC_TYPE_METHOD);
-            WOLFSSL_ENTER("DTLSv1_2_server_method_ex");
-            if (method) {
-                InitSSL_Method(method, MakeDTLSv1_2());
-                method->side = WOLFSSL_SERVER_END;
-            }
-            (void)heap;
-            return method;
-        }
-    #endif
+        (void)heap;
+        WOLFSSL_ENTER("SSLv23_server_method_ex");
+        if (method) {
+    #if !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)
+        #ifdef WOLFSSL_TLS13
+            InitSSL_Method(method, MakeTLSv1_3());
+        #elif !defined(WOLFSSL_NO_TLS12)
+            InitSSL_Method(method, MakeTLSv1_2());
+        #elif !defined(NO_OLD_TLS)
+            InitSSL_Method(method, MakeTLSv1_1());
+        #endif
+    #else
+        #ifndef NO_OLD_TLS
+            InitSSL_Method(method, MakeTLSv1_1());
+        #else
+            #error Must have SHA256, SHA384 or SHA512 enabled for TLS 1.2
+        #endif
+    #endif
+    #if !defined(NO_OLD_TLS) || defined(WOLFSSL_TLS13)
+            method->downgrade = 1;
+    #endif
+            method->side      = WOLFSSL_SERVER_END;
+        }
+        return method;
+    }
 
 
     int wolfSSL_accept(WOLFSSL* ssl)
     {
-#ifndef WOLFSSL_NO_TLS12
+#if !(defined(WOLFSSL_NO_TLS12) && defined(NO_OLD_TLS) && defined(WOLFSSL_TLS13))
         word16 havePSK = 0;
         word16 haveAnon = 0;
         word16 haveMcast = 0;
 #endif
 
-#ifdef WOLFSSL_NO_TLS12
+        if (ssl == NULL)
+            return WOLFSSL_FATAL_ERROR;
+
+    #if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EITHER_SIDE)
+        if (ssl->options.side == WOLFSSL_NEITHER_END) {
+            WOLFSSL_MSG("Setting WOLFSSL_SSL to be server side");
+            ssl->error = InitSSL_Side(ssl, WOLFSSL_SERVER_END);
+            if (ssl->error != WOLFSSL_SUCCESS) {
+                WOLFSSL_ERROR(ssl->error);
+                return WOLFSSL_FATAL_ERROR;
+            }
+            ssl->error = 0; /* expected to be zero here */
+        }
+    #endif /* OPENSSL_EXTRA || WOLFSSL_EITHER_SIDE */
+
+#if defined(WOLFSSL_NO_TLS12) && defined(NO_OLD_TLS) && defined(WOLFSSL_TLS13)
         return wolfSSL_accept_TLSv13(ssl);
 #else
     #ifdef WOLFSSL_TLS13
@@ -9121,7 +11874,13 @@
         }
     #endif
 
-        if (ssl->buffers.outputBuffer.length > 0) {
+        if (ssl->buffers.outputBuffer.length > 0
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            /* do not send buffered or advance state if last error was an 
+                async pending operation */
+            && ssl->error != WC_PENDING_E
+        #endif
+        ) {
             if ( (ssl->error = SendBuffered(ssl)) == 0) {
                 /* fragOffset is non-zero when sending fragments. On the last
                  * fragment, fragOffset is zero again, and the state can be
@@ -9145,6 +11904,9 @@
         switch (ssl->options.acceptState) {
 
         case ACCEPT_BEGIN :
+#ifdef HAVE_SECURE_RENEGOTIATION
+        case ACCEPT_BEGIN_RENEG:
+#endif
             /* get response */
             while (ssl->options.clientState < CLIENT_HELLO_COMPLETE)
                 if ( (ssl->error = ProcessReply(ssl)) < 0) {
@@ -9363,6 +12125,7 @@
 
 #endif /* NO_HANDSHAKE_DONE_CB */
 
+WOLFSSL_ABI
 int wolfSSL_Cleanup(void)
 {
     int ret = WOLFSSL_SUCCESS;
@@ -9387,6 +12150,13 @@
     if (!release)
         return ret;
 
+#ifdef OPENSSL_EXTRA
+    if (bn_one) {
+        wolfSSL_BN_free(bn_one);
+        bn_one = NULL;
+    }
+#endif
+
 #ifndef NO_SESSION_CACHE
     if (wc_FreeMutex(&session_mutex) != 0)
         ret = BAD_MUTEX_E;
@@ -9394,6 +12164,10 @@
     if (wc_FreeMutex(&count_mutex) != 0)
         ret = BAD_MUTEX_E;
 
+#ifdef OPENSSL_EXTRA
+    wolfSSL_RAND_Cleanup();
+#endif
+
     if (wolfCrypt_Cleanup() != 0) {
         WOLFSSL_MSG("Error with wolfCrypt_Cleanup call");
         ret = WC_CLEANUP_E;
@@ -9425,6 +12199,7 @@
 }
 
 
+WOLFSSL_ABI
 void wolfSSL_flush_sessions(WOLFSSL_CTX* ctx, long tm)
 {
     /* static table now, no flushing needed */
@@ -9434,6 +12209,7 @@
 
 
 /* set ssl session timeout in seconds */
+WOLFSSL_ABI
 int wolfSSL_set_timeout(WOLFSSL* ssl, unsigned int to)
 {
     if (ssl == NULL)
@@ -9448,6 +12224,7 @@
 
 
 /* set ctx session timeout in seconds */
+WOLFSSL_ABI
 int wolfSSL_CTX_set_timeout(WOLFSSL_CTX* ctx, unsigned int to)
 {
     if (ctx == NULL)
@@ -9565,10 +12342,17 @@
     if (restoreSessionCerts) {
         ssl->session.chain        = session->chain;
         ssl->session.version      = session->version;
+    #ifdef NO_RESUME_SUITE_CHECK
         ssl->session.cipherSuite0 = session->cipherSuite0;
         ssl->session.cipherSuite  = session->cipherSuite;
+    #endif
     }
 #endif /* SESSION_CERTS */
+#if !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
+    ssl->session.cipherSuite0 = session->cipherSuite0;
+    ssl->session.cipherSuite  = session->cipherSuite;
+#endif
 }
 
 WOLFSSL_SESSION* GetSession(WOLFSSL* ssl, byte* masterSecret,
@@ -9594,7 +12378,7 @@
         return NULL;
 #endif
 
-    if (ssl->arrays)
+    if (!ssl->options.tls1_3 && ssl->arrays != NULL)
         id = ssl->arrays->sessionID;
     else
         id = ssl->session.sessionID;
@@ -9700,6 +12484,11 @@
     copyInto->isDynamic = 0;
 #endif
 
+#ifndef NO_RESUME_SUITE_CHECK
+    copyInto->cipherSuite0   = copyFrom->cipherSuite0;
+    copyInto->cipherSuite    = copyFrom->cipherSuite;
+#endif
+
     if (wc_UnLockMutex(&session_mutex) != 0) {
         return BAD_MUTEX_E;
     }
@@ -9711,8 +12500,10 @@
         return BAD_MUTEX_E;
     }
 
+#ifdef NO_RESUME_SUITE_CHECK
     copyInto->cipherSuite0   = copyFrom->cipherSuite0;
     copyInto->cipherSuite    = copyFrom->cipherSuite;
+#endif
     copyInto->namedGroup     = copyFrom->namedGroup;
     copyInto->ticketSeen     = copyFrom->ticketSeen;
     copyInto->ticketAdd      = copyFrom->ticketAdd;
@@ -9743,7 +12534,7 @@
             return BAD_MUTEX_E;
         }
 
-        if (ticketLen != copyFrom->ticketLen) {
+        if ((word16)ticketLen != copyFrom->ticketLen) {
             /* Another thread modified the ssl-> session ticket during alloc.
              * Treat as error, since ticket different than when copy requested */
             ret = VAR_STATE_CHANGE_E;
@@ -9803,6 +12594,9 @@
 #if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
                                defined(HAVE_SESSION_TICKET))
             ssl->version              = session->version;
+#endif
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
             ssl->options.cipherSuite0 = session->cipherSuite0;
             ssl->options.cipherSuite  = session->cipherSuite;
 #endif
@@ -9829,6 +12623,8 @@
     int    ticLen  = 0;
 #endif
     WOLFSSL_SESSION* session;
+    int i;
+    int overwrite = 0;
 
     if (ssl->options.sessionCacheOff)
         return 0;
@@ -9871,8 +12667,17 @@
     {
         /* Use the session object in the cache for external cache if required.
          */
-        row = HashSession(ssl->arrays->sessionID, ID_LEN, &error) %
-                SESSION_ROWS;
+#if defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET)
+        if (ssl->options.tls1_3) {
+            row = HashSession(ssl->session.sessionID, ID_LEN, &error) %
+                    SESSION_ROWS;
+        }
+        else
+#endif
+        {
+            row = HashSession(ssl->arrays->sessionID, ID_LEN, &error) %
+                    SESSION_ROWS;
+        }
         if (error != 0) {
             WOLFSSL_MSG("Hash session failed");
 #ifdef HAVE_SESSION_TICKET
@@ -9888,7 +12693,28 @@
             return BAD_MUTEX_E;
         }
 
-        idx = SessionCache[row].nextIdx++;
+        for (i=0; i<SESSIONS_PER_ROW; i++) {
+            if (ssl->options.tls1_3) {
+                if (XMEMCMP(ssl->session.sessionID, SessionCache[row].Sessions[i].sessionID, ID_LEN) == 0) {
+                    WOLFSSL_MSG("Session already exists. Overwriting.");
+                    overwrite = 1;
+                    idx = i;
+                    break;
+                }
+            }
+            else {
+                if (XMEMCMP(ssl->arrays->sessionID, SessionCache[row].Sessions[i].sessionID, ID_LEN) == 0) {
+                    WOLFSSL_MSG("Session already exists. Overwriting.");
+                    overwrite = 1;
+                    idx = i;
+                    break;
+                }
+            }
+        }
+
+        if (!overwrite) {
+            idx = SessionCache[row].nextIdx++;
+        }
 #ifdef SESSION_INDEX
         ssl->sessionIndex = (row << SESSIDX_ROW_SHIFT) | idx;
 #endif
@@ -9900,11 +12726,20 @@
     else
         XMEMCPY(session->masterSecret, ssl->session.masterSecret, SECRET_LEN);
     session->haveEMS = ssl->options.haveEMS;
-    XMEMCPY(session->sessionID, ssl->arrays->sessionID, ID_LEN);
-    session->sessionIDSz = ssl->arrays->sessionIDSz;
-
-#ifdef OPENSSL_EXTRA
-    /* If using compatibilty layer then check for and copy over session context
+#if defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET)
+    if (ssl->options.tls1_3) {
+        XMEMCPY(session->sessionID, ssl->session.sessionID, ID_LEN);
+        session->sessionIDSz = ID_LEN;
+    }
+    else
+#endif
+    {
+        XMEMCPY(session->sessionID, ssl->arrays->sessionID, ID_LEN);
+        session->sessionIDSz = ssl->arrays->sessionIDSz;
+    }
+
+#ifdef OPENSSL_EXTRA
+    /* If using compatibility layer then check for and copy over session context
      * id. */
     if (ssl->sessionCtxSz > 0 && ssl->sessionCtxSz < ID_LEN) {
         XMEMCPY(session->sessionCtx, ssl->sessionCtx, ssl->sessionCtxSz);
@@ -9916,7 +12751,7 @@
 
 #ifdef HAVE_SESSION_TICKET
     /* Check if another thread modified ticket since alloc */
-    if (ticLen != ssl->session.ticketLen) {
+    if ((word16)ticLen != ssl->session.ticketLen) {
         error = VAR_STATE_CHANGE_E;
     }
 
@@ -9935,9 +12770,7 @@
             session->ticket = session->staticTicket;
             session->isDynamic = 0;
         }
-    }
-
-    if (error == 0) {
+
         session->ticketLen = (word16)ticLen;
         XMEMCPY(session->ticket, ssl->session.ticket, ticLen);
     } else { /* cleanup, reset state */
@@ -9953,19 +12786,30 @@
 
 #ifdef SESSION_CERTS
     if (error == 0) {
-        session->chain.count = ssl->session.chain.count;
-        XMEMCPY(session->chain.certs, ssl->session.chain.certs,
-                sizeof(x509_buffer) * MAX_CHAIN_DEPTH);
+        if (!overwrite || ssl->session.chain.count > 0) {
+            /*
+             * If we are overwriting and no certs present in ssl->session.chain
+             * then keep the old chain.
+             */
+            session->chain.count = ssl->session.chain.count;
+            XMEMCPY(session->chain.certs, ssl->session.chain.certs,
+                    sizeof(x509_buffer) * session->chain.count);
+        }
     }
 #endif /* SESSION_CERTS */
 #if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
                                defined(HAVE_SESSION_TICKET))
     if (error == 0) {
         session->version      = ssl->version;
+    }
+#endif /* SESSION_CERTS || (WOLFSSL_TLS13 & HAVE_SESSION_TICKET) */
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
+    if (error == 0) {
         session->cipherSuite0 = ssl->options.cipherSuite0;
         session->cipherSuite  = ssl->options.cipherSuite;
     }
-#endif /* SESSION_CERTS || (WOLFSSL_TLS13 & HAVE_SESSION_TICKET) */
+#endif
 #if defined(WOLFSSL_TLS13)
     if (error == 0) {
         session->namedGroup     = ssl->session.namedGroup;
@@ -10109,7 +12953,7 @@
 
 #endif /* SESSION_INDEX */
 
-#if defined(SESSION_INDEX) && defined(SESSION_CERTS)
+#if defined(SESSION_CERTS)
 
 WOLFSSL_X509_CHAIN* wolfSSL_SESSION_get_peer_chain(WOLFSSL_SESSION* session)
 {
@@ -10123,6 +12967,32 @@
     return chain;
 }
 
+
+#ifdef OPENSSL_EXTRA
+/* gets the peer certificate associated with the session passed in
+ * returns null on failure, the caller should not free the returned pointer */
+WOLFSSL_X509* wolfSSL_SESSION_get0_peer(WOLFSSL_SESSION* session)
+{
+    WOLFSSL_ENTER("wolfSSL_SESSION_get_peer_chain");
+    if (session) {
+        int count;
+
+        count = wolfSSL_get_chain_count(&session->chain);
+        if (count < 1 || count >= MAX_CHAIN_DEPTH) {
+            WOLFSSL_MSG("bad count found");
+            return NULL;
+        }
+
+        if (session->peer == NULL) {
+            session->peer = wolfSSL_get_chain_X509(&session->chain, 0);
+        }
+        return session->peer;
+    }
+    WOLFSSL_MSG("No session passed in");
+
+    return NULL;
+}
+#endif /* OPENSSL_EXTRA */
 #endif /* SESSION_INDEX && SESSION_CERTS */
 
 
@@ -10160,7 +13030,7 @@
                 break;
             }
 
-            /* if not expried then good */
+            /* if not expired then good */
             if (ticks < (SessionCache[i].Sessions[idx].bornOn +
                          SessionCache[i].Sessions[idx].timeout) ) {
                 now++;
@@ -10293,6 +13163,7 @@
 
 /* call before SSL_connect, if verifying will add name check to
    date check and signature check */
+WOLFSSL_ABI
 int wolfSSL_check_domain_name(WOLFSSL* ssl, const char* dn)
 {
     WOLFSSL_ENTER("wolfSSL_check_domain_name");
@@ -10310,8 +13181,8 @@
             ssl->buffers.domainName.length + 1, ssl->heap, DYNAMIC_TYPE_DOMAIN);
 
     if (ssl->buffers.domainName.buffer) {
-        char* domainName = (char*)ssl->buffers.domainName.buffer;
-        XSTRNCPY(domainName, dn, ssl->buffers.domainName.length);
+        unsigned char* domainName = ssl->buffers.domainName.buffer;
+        XMEMCPY(domainName, dn, ssl->buffers.domainName.length);
         domainName[ssl->buffers.domainName.length] = '\0';
         return WOLFSSL_SUCCESS;
     }
@@ -10430,13 +13301,13 @@
 
 
     static int wolfSSL_ex_wrapper(WOLFSSL* ssl, HandShakeCallBack hsCb,
-                                 TimeoutCallBack toCb, Timeval timeout)
+                                 TimeoutCallBack toCb, WOLFSSL_TIMEVAL timeout)
     {
         int       ret        = WOLFSSL_FATAL_ERROR;
         int       oldTimerOn = 0;   /* was timer already on */
-        Timeval   startTime;
-        Timeval   endTime;
-        Timeval   totalTime;
+        WOLFSSL_TIMEVAL startTime;
+        WOLFSSL_TIMEVAL endTime;
+        WOLFSSL_TIMEVAL totalTime;
         Itimerval myTimeout;
         Itimerval oldTimeout; /* if old timer adjust from total time to reset */
         struct sigaction act, oact;
@@ -10544,7 +13415,7 @@
 #ifndef NO_WOLFSSL_CLIENT
 
     int wolfSSL_connect_ex(WOLFSSL* ssl, HandShakeCallBack hsCb,
-                          TimeoutCallBack toCb, Timeval timeout)
+                          TimeoutCallBack toCb, WOLFSSL_TIMEVAL timeout)
     {
         WOLFSSL_ENTER("wolfSSL_connect_ex");
         return wolfSSL_ex_wrapper(ssl, hsCb, toCb, timeout);
@@ -10556,7 +13427,7 @@
 #ifndef NO_WOLFSSL_SERVER
 
     int wolfSSL_accept_ex(WOLFSSL* ssl, HandShakeCallBack hsCb,
-                         TimeoutCallBack toCb,Timeval timeout)
+                         TimeoutCallBack toCb, WOLFSSL_TIMEVAL timeout)
     {
         WOLFSSL_ENTER("wolfSSL_accept_ex");
         return wolfSSL_ex_wrapper(ssl, hsCb, toCb, timeout);
@@ -10573,6 +13444,10 @@
                                          wc_psk_client_callback cb)
     {
         WOLFSSL_ENTER("SSL_CTX_set_psk_client_callback");
+
+        if (ctx == NULL)
+            return;
+
         ctx->havePSK = 1;
         ctx->client_psk_cb = cb;
     }
@@ -10584,6 +13459,10 @@
         int  keySz   = 0;
 
         WOLFSSL_ENTER("SSL_set_psk_client_callback");
+
+        if (ssl == NULL)
+            return;
+
         ssl->options.havePSK = 1;
         ssl->options.client_psk_cb = cb;
 
@@ -10604,6 +13483,8 @@
                                          wc_psk_server_callback cb)
     {
         WOLFSSL_ENTER("SSL_CTX_set_psk_server_callback");
+        if (ctx == NULL)
+            return;
         ctx->havePSK = 1;
         ctx->server_psk_cb = cb;
     }
@@ -10615,6 +13496,9 @@
         int  keySz   = 0;
 
         WOLFSSL_ENTER("SSL_set_psk_server_callback");
+        if (ssl == NULL)
+            return;
+
         ssl->options.havePSK = 1;
         ssl->options.server_psk_cb = cb;
 
@@ -10659,7 +13543,11 @@
         if (hint == 0)
             ctx->server_hint[0] = '\0';
         else {
-            XSTRNCPY(ctx->server_hint, hint, sizeof(ctx->server_hint));
+            /* Qt does not call CTX_set_*_psk_callbacks where havePSK is set */
+            #ifdef WOLFSSL_QT
+            ctx->havePSK=1;
+            #endif
+            XSTRNCPY(ctx->server_hint, hint, MAX_PSK_ID_LEN);
             ctx->server_hint[MAX_PSK_ID_LEN] = '\0'; /* null term */
         }
         return WOLFSSL_SUCCESS;
@@ -10677,8 +13565,8 @@
             ssl->arrays->server_hint[0] = 0;
         else {
             XSTRNCPY(ssl->arrays->server_hint, hint,
-                                            sizeof(ssl->arrays->server_hint));
-            ssl->arrays->server_hint[MAX_PSK_ID_LEN] = '\0'; /* null term */
+                                            sizeof(ssl->arrays->server_hint)-1);
+            ssl->arrays->server_hint[sizeof(ssl->arrays->server_hint)-1] = '\0';
         }
         return WOLFSSL_SUCCESS;
     }
@@ -10706,16 +13594,42 @@
 #ifndef NO_CERTS
 /* used to be defined on NO_FILESYSTEM only, but are generally useful */
 
+    int wolfSSL_CTX_load_verify_buffer_ex(WOLFSSL_CTX* ctx,
+                                         const unsigned char* in,
+                                         long sz, int format, int userChain,
+                                         word32 flags)
+    {
+        int verify;
+
+        WOLFSSL_ENTER("wolfSSL_CTX_load_verify_buffer_ex");
+
+        verify = GET_VERIFY_SETTING_CTX(ctx);
+        if (flags & WOLFSSL_LOAD_FLAG_DATE_ERR_OKAY)
+            verify = VERIFY_SKIP_DATE;
+
+        if (format == WOLFSSL_FILETYPE_PEM)
+            return ProcessChainBuffer(ctx, in, sz, format, CA_TYPE, NULL,
+                                      verify);
+        else
+            return ProcessBuffer(ctx, in, sz, format, CA_TYPE, NULL, NULL,
+                                 userChain, verify);
+    }
+
     /* wolfSSL extension allows DER files to be loaded from buffers as well */
     int wolfSSL_CTX_load_verify_buffer(WOLFSSL_CTX* ctx,
                                        const unsigned char* in,
                                        long sz, int format)
     {
-        WOLFSSL_ENTER("wolfSSL_CTX_load_verify_buffer");
-        if (format == WOLFSSL_FILETYPE_PEM)
-            return ProcessChainBuffer(ctx, in, sz, format, CA_TYPE, NULL);
-        else
-            return ProcessBuffer(ctx, in, sz, format, CA_TYPE, NULL,NULL,0);
+        return wolfSSL_CTX_load_verify_buffer_ex(ctx, in, sz, format, 0,
+            WOLFSSL_LOAD_VERIFY_DEFAULT_FLAGS);
+    }
+
+    int wolfSSL_CTX_load_verify_chain_buffer_format(WOLFSSL_CTX* ctx,
+                                       const unsigned char* in,
+                                       long sz, int format)
+    {
+        return wolfSSL_CTX_load_verify_buffer_ex(ctx, in, sz, format, 1,
+            WOLFSSL_LOAD_VERIFY_DEFAULT_FLAGS);
     }
 
 
@@ -10732,11 +13646,11 @@
         }
 
         if (format == WOLFSSL_FILETYPE_PEM)
-            return ProcessChainBuffer(ctx, in, sz, format,
-                                                       TRUSTED_PEER_TYPE, NULL);
-        else
-            return ProcessBuffer(ctx, in, sz, format, TRUSTED_PEER_TYPE,
-                                                                   NULL,NULL,0);
+            return ProcessChainBuffer(ctx, in, sz, format, TRUSTED_PEER_TYPE,
+                                      NULL, GET_VERIFY_SETTING_CTX(ctx));
+        else
+            return ProcessBuffer(ctx, in, sz, format, TRUSTED_PEER_TYPE, NULL,
+                                 NULL, 0, GET_VERIFY_SETTING_CTX(ctx));
     }
 #endif /* WOLFSSL_TRUST_PEER_CERT */
 
@@ -10745,7 +13659,8 @@
                                  const unsigned char* in, long sz, int format)
     {
         WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_buffer");
-        return ProcessBuffer(ctx, in, sz, format, CERT_TYPE, NULL, NULL, 0);
+        return ProcessBuffer(ctx, in, sz, format, CERT_TYPE, NULL, NULL, 0,
+                             GET_VERIFY_SETTING_CTX(ctx));
     }
 
 
@@ -10753,15 +13668,40 @@
                                  const unsigned char* in, long sz, int format)
     {
         WOLFSSL_ENTER("wolfSSL_CTX_use_PrivateKey_buffer");
-        return ProcessBuffer(ctx, in, sz, format, PRIVATEKEY_TYPE, NULL,NULL,0);
-    }
-
+        return ProcessBuffer(ctx, in, sz, format, PRIVATEKEY_TYPE, NULL, NULL,
+                             0, GET_VERIFY_SETTING_CTX(ctx));
+    }
+
+#ifdef HAVE_PKCS11
+    int wolfSSL_CTX_use_PrivateKey_id(WOLFSSL_CTX* ctx, const unsigned char* id,
+                                      long sz, int devId, long keySz)
+    {
+        int ret = WOLFSSL_FAILURE;
+
+        FreeDer(&ctx->privateKey);
+        if (AllocDer(&ctx->privateKey, (word32)sz, PRIVATEKEY_TYPE,
+                                                              ctx->heap) == 0) {
+            XMEMCPY(ctx->privateKey->buffer, id, sz);
+            ctx->privateKeyId = 1;
+            ctx->privateKeySz = (word32)keySz;
+            if (devId != INVALID_DEVID)
+                ctx->privateKeyDevId = devId;
+            else
+                ctx->privateKeyDevId = ctx->devId;
+
+            ret = WOLFSSL_SUCCESS;
+        }
+
+        return ret;
+    }
+#endif
 
     int wolfSSL_CTX_use_certificate_chain_buffer_format(WOLFSSL_CTX* ctx,
                                  const unsigned char* in, long sz, int format)
     {
         WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_chain_buffer_format");
-        return ProcessBuffer(ctx, in, sz, format, CERT_TYPE, NULL, NULL, 1);
+        return ProcessBuffer(ctx, in, sz, format, CERT_TYPE, NULL, NULL, 1,
+                             GET_VERIFY_SETTING_CTX(ctx));
     }
 
     int wolfSSL_CTX_use_certificate_chain_buffer(WOLFSSL_CTX* ctx,
@@ -10879,7 +13819,11 @@
                                  const unsigned char* in, long sz, int format)
     {
         WOLFSSL_ENTER("wolfSSL_use_certificate_buffer");
-        return ProcessBuffer(ssl->ctx, in, sz, format,CERT_TYPE,ssl,NULL,0);
+        if (ssl == NULL)
+            return BAD_FUNC_ARG;
+
+        return ProcessBuffer(ssl->ctx, in, sz, format, CERT_TYPE, ssl, NULL, 0,
+                             GET_VERIFY_SETTING_SSL(ssl));
     }
 
 
@@ -10887,16 +13831,48 @@
                                  const unsigned char* in, long sz, int format)
     {
         WOLFSSL_ENTER("wolfSSL_use_PrivateKey_buffer");
+        if (ssl == NULL)
+            return BAD_FUNC_ARG;
+
         return ProcessBuffer(ssl->ctx, in, sz, format, PRIVATEKEY_TYPE,
-                             ssl, NULL, 0);
-    }
+                             ssl, NULL, 0, GET_VERIFY_SETTING_SSL(ssl));
+    }
+
+#ifdef HAVE_PKCS11
+    int wolfSSL_use_PrivateKey_id(WOLFSSL* ssl, const unsigned char* id,
+                                  long sz, int devId, long keySz)
+    {
+        int ret = WOLFSSL_FAILURE;
+
+        if (ssl->buffers.weOwnKey)
+            FreeDer(&ssl->buffers.key);
+        if (AllocDer(&ssl->buffers.key, (word32)sz, PRIVATEKEY_TYPE,
+                                                            ssl->heap) == 0) {
+            XMEMCPY(ssl->buffers.key->buffer, id, sz);
+            ssl->buffers.weOwnKey = 1;
+            ssl->buffers.keyId = 1;
+            ssl->buffers.keySz = (word32)keySz;
+            if (devId != INVALID_DEVID)
+                ssl->buffers.keyDevId = devId;
+            else
+                ssl->buffers.keyDevId = ssl->devId;
+
+            ret = WOLFSSL_SUCCESS;
+        }
+
+        return ret;
+    }
+#endif
 
     int wolfSSL_use_certificate_chain_buffer_format(WOLFSSL* ssl,
                                  const unsigned char* in, long sz, int format)
     {
         WOLFSSL_ENTER("wolfSSL_use_certificate_chain_buffer_format");
+        if (ssl == NULL)
+            return BAD_FUNC_ARG;
+
         return ProcessBuffer(ssl->ctx, in, sz, format, CERT_TYPE,
-                             ssl, NULL, 1);
+                             ssl, NULL, 1, GET_VERIFY_SETTING_SSL(ssl));
     }
 
     int wolfSSL_use_certificate_chain_buffer(WOLFSSL* ssl,
@@ -10982,9 +13958,9 @@
             return WOLFSSL_FATAL_ERROR;
     }
 
-    int wolfSSL_OPENSSL_add_all_algorithms_noconf(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_OPENSSL_add_all_algorithms_noconf");
+    int wolfSSL_OpenSSL_add_all_algorithms_noconf(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_OpenSSL_add_all_algorithms_noconf");
 
         if  (wolfSSL_add_all_algorithms() == WOLFSSL_FATAL_ERROR)
             return WOLFSSL_FATAL_ERROR;
@@ -10992,6 +13968,20 @@
         return  WOLFSSL_SUCCESS;
     }
 
+    int wolfSSL_OpenSSL_add_all_algorithms_conf(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_OpenSSL_add_all_algorithms_conf");
+        /* This function is currently the same as
+        wolfSSL_OpenSSL_add_all_algorithms_noconf since we do not employ
+        the use of a wolfssl.cnf type configuration file and is only used for
+        OpenSSL compatability. */
+
+        if (wolfSSL_add_all_algorithms() == WOLFSSL_FATAL_ERROR) {
+            return WOLFSSL_FATAL_ERROR;
+        }
+        return WOLFSSL_SUCCESS;
+    }
+
    /* returns previous set cache size which stays constant */
     long wolfSSL_CTX_sess_set_cache_size(WOLFSSL_CTX* ctx, long sz)
     {
@@ -11000,7 +13990,7 @@
         (void)sz;
         WOLFSSL_MSG("session cache is set at compile time");
         #ifndef NO_SESSION_CACHE
-            return SESSIONS_PER_ROW * SESSION_ROWS;
+            return (long)(SESSIONS_PER_ROW * SESSION_ROWS);
         #else
             return 0;
         #endif
@@ -11037,10 +14027,10 @@
 
         /* if WOLFSSL_BIO is socket type then set WOLFSSL socket to use */
         if (rd != NULL && rd->type == WOLFSSL_BIO_SOCKET) {
-            wolfSSL_set_rfd(ssl, rd->fd);
+            wolfSSL_set_rfd(ssl, rd->num);
         }
         if (wr != NULL && wr->type == WOLFSSL_BIO_SOCKET) {
-            wolfSSL_set_wfd(ssl, wr->fd);
+            wolfSSL_set_wfd(ssl, wr->num);
         }
 
         /* free any existing WOLFSSL_BIOs in use */
@@ -11068,23 +14058,105 @@
             (wr != NULL && wr->type != WOLFSSL_BIO_SOCKET)) {
             ssl->CBIOSend = BioSend;
         }
-    }
-#endif
-
-#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EXTRA)
+
+        /* User programs should always retry reading from these BIOs */
+        if (rd) {
+            /* User writes to rd */
+            BIO_set_retry_write(rd);
+        }
+        if (wr) {
+            /* User reads from wr */
+            BIO_set_retry_read(wr);
+        }
+    }
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EXTRA) || defined(HAVE_WEBSERVER)
     void wolfSSL_CTX_set_client_CA_list(WOLFSSL_CTX* ctx,
                                        WOLF_STACK_OF(WOLFSSL_X509_NAME)* names)
     {
-        WOLFSSL_ENTER("wolfSSL_SSL_CTX_set_client_CA_list");
-
+        WOLFSSL_ENTER("wolfSSL_CTX_set_client_CA_list");
+    #if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EXTRA)
         if (ctx != NULL)
             ctx->ca_names = names;
-    }
-
-    WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_SSL_CTX_get_client_CA_list(
+    #else
+        (void)ctx;
+        (void)names;
+    #endif
+    }
+
+
+    /* returns the CA's set on server side or the CA's sent from server when
+     * on client side */
+#if defined(SESSION_CERTS) && defined(OPENSSL_ALL)
+    WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_get_client_CA_list(
+            const WOLFSSL* ssl)
+    {
+        WOLFSSL_ENTER("wolfSSL_get_client_CA_list");
+
+        if (ssl == NULL) {
+            WOLFSSL_MSG("Bad argument passed to wolfSSL_get_client_CA_list");
+            return NULL;
+        }
+
+        /* return list of CAs sent from the server */
+        if (ssl->options.side == WOLFSSL_CLIENT_END) {
+            WOLF_STACK_OF(WOLFSSL_X509)* sk;
+
+            sk = wolfSSL_get_peer_cert_chain(ssl);
+            if (sk != NULL) {
+                WOLF_STACK_OF(WOLFSSL_X509_NAME)* ret;
+                WOLFSSL_X509* x509;
+
+                ret = wolfSSL_sk_X509_NAME_new(NULL);
+                do {
+                    x509 = wolfSSL_sk_X509_pop(sk);
+                    if (x509 != NULL) {
+                        if (wolfSSL_X509_get_isCA(x509)) {
+                            if (wolfSSL_sk_X509_NAME_push(ret,
+                                    wolfSSL_X509_get_subject_name(x509)) != 0) {
+                                WOLFSSL_MSG("Error pushing X509 name to stack");
+                                /* continue on to try other certificates and
+                                 * do not fail out here */
+                            }
+                        }
+                        wolfSSL_X509_free(x509);
+                    }
+                } while (x509 != NULL);
+                wolfSSL_sk_X509_free(sk);
+                return ret;
+            }
+            return NULL;
+        }
+        else {
+            /* currently only can be set in the CTX */
+            return ssl->ctx->ca_names;
+        }
+    }
+#endif /* SESSION_CERTS */
+
+
+    #if defined(OPENSSL_ALL) || defined(OPENSSL_EXTRA) || \
+        defined(WOLFSSL_NGINX) || defined (WOLFSSL_HAPROXY)
+    /* registers client cert callback, called during handshake if server
+       requests client auth but user has not loaded client cert/key */
+    void wolfSSL_CTX_set_client_cert_cb(WOLFSSL_CTX *ctx, client_cert_cb cb)
+    {
+        WOLFSSL_ENTER("wolfSSL_CTX_set_client_cert_cb");
+
+        if (ctx != NULL) {
+            ctx->CBClientCert = cb;
+        }
+    }
+    #endif /* OPENSSL_ALL || OPENSSL_EXTRA || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
+
+#endif /* OPENSSL_EXTRA || WOLFSSL_EXTRA || HAVE_WEBSERVER */
+
+#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EXTRA)
+    WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_CTX_get_client_CA_list(
             const WOLFSSL_CTX *s)
     {
-        WOLFSSL_ENTER("wolfSSL_SSL_CTX_get_client_CA_list");
+        WOLFSSL_ENTER("wolfSSL_CTX_get_client_CA_list");
 
         if (s == NULL)
             return NULL;
@@ -11093,15 +14165,21 @@
     }
 #endif
 
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
     #if !defined(NO_RSA) && !defined(NO_CERTS)
     WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_load_client_CA_file(const char* fname)
     {
+        /* The webserver build is using this to load a CA into the server
+         * for client authentication as an option. Have this return NULL in
+         * that case. If OPENSSL_EXTRA is enabled, go ahead and include
+         * the function. */
+    #ifdef OPENSSL_EXTRA
         WOLFSSL_STACK *list = NULL;
         WOLFSSL_STACK *node;
         WOLFSSL_BIO* bio;
         WOLFSSL_X509 *cert = NULL;
         WOLFSSL_X509_NAME *subjectName = NULL;
+        unsigned long err;
 
         WOLFSSL_ENTER("wolfSSL_load_client_CA_file");
 
@@ -11115,21 +14193,18 @@
             if (subjectName == NULL)
                 break;
 
-            node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
-                                           DYNAMIC_TYPE_OPENSSL);
+            node = wolfSSL_sk_new_node(NULL);
             if (node == NULL)
                 break;
+            node->type = STACK_TYPE_X509_NAME;
 
             /* Need a persistent copy of the subject name. */
-            node->data.name = (WOLFSSL_X509_NAME*)XMALLOC(
-                    sizeof(WOLFSSL_X509_NAME), NULL, DYNAMIC_TYPE_OPENSSL);
-            if (node->data.name == NULL) {
-                XFREE(node, NULL, DYNAMIC_TYPE_OPENSSL);
-                break;
-            }
-            XMEMCPY(node->data.name, subjectName, sizeof(WOLFSSL_X509_NAME));
-            /* Clear pointers so freeing certificate doesn't free memory. */
-            XMEMSET(subjectName, 0, sizeof(WOLFSSL_X509_NAME));
+            node->data.name = wolfSSL_X509_NAME_dup(subjectName);
+            /*
+             * Original cert will be freed so make sure not to try to access
+             * it in the future.
+             */
+            node->data.name->x509 = NULL;
 
             /* Put node on the front of the list. */
             node->num  = (list == NULL) ? 1 : list->num + 1;
@@ -11140,11 +14215,31 @@
             cert = NULL;
         }
 
+        err = wolfSSL_ERR_peek_last_error();
+
+        if (ERR_GET_LIB(err) == ERR_LIB_PEM &&
+                ERR_GET_REASON(err) == PEM_R_NO_START_LINE) {
+            /*
+             * wolfSSL_PEM_read_bio_X509 pushes an ASN_NO_PEM_HEADER error
+             * to the error queue on file end. This should not be left
+             * for the caller to find so we clear the last error.
+             */
+            wc_RemoveErrorNode(-1);
+        }
+
         wolfSSL_X509_free(cert);
         wolfSSL_BIO_free(bio);
         return list;
-    }
-
+    #else
+        (void)fname;
+        return NULL;
+    #endif
+    }
+    #endif
+#endif
+
+#ifdef OPENSSL_EXTRA
+    #if !defined(NO_RSA) && !defined(NO_CERTS)
     int wolfSSL_CTX_add_client_CA(WOLFSSL_CTX* ctx, WOLFSSL_X509* x509)
     {
         WOLFSSL_STACK *node = NULL;
@@ -11386,24 +14481,22 @@
 #if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EXTRA)
     void wolfSSL_set_accept_state(WOLFSSL* ssl)
     {
-        word16 haveRSA = 1;
-        word16 havePSK = 0;
-
-        WOLFSSL_ENTER("SSL_set_accept_state");
+        WOLFSSL_ENTER("wolfSSL_set_accept_state");
         if (ssl->options.side == WOLFSSL_CLIENT_END) {
     #ifdef HAVE_ECC
             ecc_key key;
             word32 idx = 0;
 
             if (ssl->options.haveStaticECC && ssl->buffers.key != NULL) {
-                wc_ecc_init(&key);
-                if (wc_EccPrivateKeyDecode(ssl->buffers.key->buffer, &idx, &key,
-                                               ssl->buffers.key->length) != 0) {
-                    ssl->options.haveECDSAsig = 0;
-                    ssl->options.haveECC = 0;
-                    ssl->options.haveStaticECC = 0;
-                }
-                wc_ecc_free(&key);
+                if (wc_ecc_init(&key) >= 0) {
+                    if (wc_EccPrivateKeyDecode(ssl->buffers.key->buffer, &idx, &key,
+                                                   ssl->buffers.key->length) != 0) {
+                        ssl->options.haveECDSAsig = 0;
+                        ssl->options.haveECC = 0;
+                        ssl->options.haveStaticECC = 0;
+                    }
+                    wc_ecc_free(&key);
+                }
             }
     #endif
 
@@ -11415,19 +14508,10 @@
             }
     #endif
         }
-        ssl->options.side = WOLFSSL_SERVER_END;
-        /* reset suites in case user switched */
-
-        #ifdef NO_RSA
-            haveRSA = 0;
-        #endif
-        #ifndef NO_PSK
-            havePSK = ssl->options.havePSK;
-        #endif
-        InitSuites(ssl->suites, ssl->version, ssl->buffers.keySz, haveRSA,
-                   havePSK, ssl->options.haveDH, ssl->options.haveNTRU,
-                   ssl->options.haveECDSAsig, ssl->options.haveECC,
-                   ssl->options.haveStaticECC, ssl->options.side);
+
+        if (InitSSL_Side(ssl, WOLFSSL_SERVER_END) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Error initializing server side");
+        }
     }
 
 #endif /* OPENSSL_EXTRA || WOLFSSL_EXTRA */
@@ -11477,6 +14561,10 @@
         return ctx->mask;
     }
 
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+
     static long wolf_set_options(long old_op, long op);
     long wolfSSL_CTX_set_options(WOLFSSL_CTX* ctx, long opt)
     {
@@ -11490,6 +14578,10 @@
         return ctx->mask;
     }
 
+#endif
+
+#ifdef OPENSSL_EXTRA
+
     long wolfSSL_CTX_clear_options(WOLFSSL_CTX* ctx, long opt)
     {
         WOLFSSL_ENTER("SSL_CTX_clear_options");
@@ -11520,10 +14612,8 @@
         return WOLFSSL_SUCCESS;
     }
 
-
-
-
 #ifndef NO_CERTS
+
     WOLFSSL_X509_STORE* wolfSSL_CTX_get_cert_store(WOLFSSL_CTX* ctx)
     {
         if (ctx == NULL) {
@@ -11533,7 +14623,6 @@
         return &ctx->x509_store;
     }
 
-
     void wolfSSL_CTX_set_cert_store(WOLFSSL_CTX* ctx, WOLFSSL_X509_STORE* str)
     {
         if (ctx == NULL || str == NULL) {
@@ -11545,6 +14634,13 @@
             wolfSSL_CertManagerFree(ctx->cm);
         }
         ctx->cm               = str->cm;
+
+        /* free existing store if it exists */
+        if (ctx->x509_store_pt != NULL) {
+            /* cert manager was free'd a little earlier in this function */
+            ctx->x509_store_pt->cm = NULL;
+        }
+        wolfSSL_X509_STORE_free(ctx->x509_store_pt);
         ctx->x509_store.cache = str->cache;
         ctx->x509_store_pt    = str; /* take ownership of store and free it
                                         with CTX free */
@@ -11586,8 +14682,44 @@
             return;
         ctx->verify_cb = verify_cb;
     }
+
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    void wolfSSL_X509_STORE_set_verify_cb(WOLFSSL_X509_STORE *st,
+                                 WOLFSSL_X509_STORE_CTX_verify_cb verify_cb)
+    {
+        WOLFSSL_ENTER("WOLFSSL_X509_STORE_set_verify_cb");
+        if (st != NULL) {
+            st->verify_cb = verify_cb;
+        }
+    }
+#endif
+
+
 #endif /* !NO_CERTS */
 
+    WOLFSSL_BIO_METHOD* wolfSSL_BIO_f_md(void)
+    {
+        static WOLFSSL_BIO_METHOD meth;
+
+        WOLFSSL_ENTER("wolfSSL_BIO_f_md");
+        meth.type = WOLFSSL_BIO_MD;
+
+        return &meth;
+    }
+
+    /* return the context and initialize the BIO state */
+    int wolfSSL_BIO_get_md_ctx(WOLFSSL_BIO *bio, WOLFSSL_EVP_MD_CTX **mdcp)
+    {
+        int ret = WOLFSSL_FAILURE;
+
+        if ((bio != NULL) && (mdcp != NULL)) {
+            *mdcp = (WOLFSSL_EVP_MD_CTX*)bio->ptr;
+            ret = WOLFSSL_SUCCESS;
+        }
+
+        return ret;
+    }
+
     WOLFSSL_BIO_METHOD* wolfSSL_BIO_f_buffer(void)
     {
         static WOLFSSL_BIO_METHOD meth;
@@ -11613,7 +14745,7 @@
     {
         static WOLFSSL_BIO_METHOD bio_meth;
 
-        WOLFSSL_ENTER("wolfSSL_BIO_f_bio");
+        WOLFSSL_ENTER("wolfSSL_BIO_s_bio");
         bio_meth.type = WOLFSSL_BIO_BIO;
 
         return &bio_meth;
@@ -11625,7 +14757,7 @@
     {
         static WOLFSSL_BIO_METHOD file_meth;
 
-        WOLFSSL_ENTER("wolfSSL_BIO_f_file");
+        WOLFSSL_ENTER("wolfSSL_BIO_s_file");
         file_meth.type = WOLFSSL_BIO_FILE;
 
         return &file_meth;
@@ -11637,7 +14769,7 @@
     {
         static WOLFSSL_BIO_METHOD meth;
 
-        WOLFSSL_ENTER("BIO_f_ssl");
+        WOLFSSL_ENTER("wolfSSL_BIO_f_ssl");
         meth.type = WOLFSSL_BIO_SSL;
 
         return &meth;
@@ -11648,7 +14780,7 @@
     {
         static WOLFSSL_BIO_METHOD meth;
 
-        WOLFSSL_ENTER("BIO_s_socket");
+        WOLFSSL_ENTER("wolfSSL_BIO_s_socket");
         meth.type = WOLFSSL_BIO_SOCKET;
 
         return &meth;
@@ -11662,8 +14794,8 @@
         WOLFSSL_ENTER("BIO_new_socket");
         if (bio) {
             bio->type  = WOLFSSL_BIO_SOCKET;
-            bio->close = (byte)closeF;
-            bio->fd    = sfd;
+            bio->shutdown = (byte)closeF;
+            bio->num   = sfd;
         }
         return bio;
     }
@@ -11672,7 +14804,7 @@
     int wolfSSL_BIO_eof(WOLFSSL_BIO* b)
     {
         WOLFSSL_ENTER("BIO_eof");
-        if (b->eof)
+        if ((b != NULL) && (b->eof))
             return 1;
 
         return 0;
@@ -11684,22 +14816,34 @@
         WOLFSSL_ENTER("wolfSSL_BIO_set_ssl");
 
         if (b != NULL) {
-            b->ssl   = ssl;
-            b->close = (byte)closeF;
+            b->ptr   = ssl;
+            b->shutdown = (byte)closeF;
     /* add to ssl for bio free if SSL_free called before/instead of free_all? */
         }
 
         return 0;
     }
 
-
+#ifndef NO_FILESYSTEM
     long wolfSSL_BIO_set_fd(WOLFSSL_BIO* b, int fd, int closeF)
     {
         WOLFSSL_ENTER("wolfSSL_BIO_set_fd");
 
         if (b != NULL) {
-            b->fd    = fd;
-            b->close = (byte)closeF;
+            b->num = fd;
+            b->shutdown = (byte)closeF;
+        }
+
+        return WOLFSSL_SUCCESS;
+    }
+#endif
+
+    /* Sets the close flag */
+    int wolfSSL_BIO_set_close(WOLFSSL_BIO *b, long flag)
+    {
+        WOLFSSL_ENTER("wolfSSL_BIO_set_close");
+        if (b != NULL) {
+            b->shutdown = (byte)flag;
         }
 
         return WOLFSSL_SUCCESS;
@@ -11708,15 +14852,25 @@
 
     WOLFSSL_BIO* wolfSSL_BIO_new(WOLFSSL_BIO_METHOD* method)
     {
-        WOLFSSL_BIO* bio = (WOLFSSL_BIO*) XMALLOC(sizeof(WOLFSSL_BIO), 0,
-                                                DYNAMIC_TYPE_OPENSSL);
+        WOLFSSL_BIO* bio;
+
         WOLFSSL_ENTER("wolfSSL_BIO_new");
+        if (method == NULL) {
+            WOLFSSL_MSG("Bad method pointer passed in");
+            return NULL;
+        }
+
+        bio = (WOLFSSL_BIO*) XMALLOC(sizeof(WOLFSSL_BIO), 0,
+                DYNAMIC_TYPE_OPENSSL);
         if (bio) {
             XMEMSET(bio, 0, sizeof(WOLFSSL_BIO));
-            bio->type   = method->type;
-            bio->close  = BIO_CLOSE; /* default to close things */
+            bio->type = (byte)method->type;
+            bio->method = method;
+            bio->shutdown = BIO_CLOSE; /* default to close things */
+            bio->init = 1;
             if (method->type != WOLFSSL_BIO_FILE &&
-                    method->type != WOLFSSL_BIO_SOCKET) {
+                    method->type != WOLFSSL_BIO_SOCKET &&
+                    method->type != WOLFSSL_BIO_MD) {
                 bio->mem_buf =(WOLFSSL_BUF_MEM*)XMALLOC(sizeof(WOLFSSL_BUF_MEM),
                                                        0, DYNAMIC_TYPE_OPENSSL);
                 if (bio->mem_buf == NULL) {
@@ -11724,27 +14878,27 @@
                     wolfSSL_BIO_free(bio);
                     return NULL;
                 }
-                bio->mem_buf->data = (char*)bio->mem;
+                bio->mem_buf->data = (char*)bio->ptr;
+            }
+
+            if (method->type == WOLFSSL_BIO_MD) {
+                bio->ptr = wolfSSL_EVP_MD_CTX_new();
+                if (bio->ptr == NULL) {
+                    WOLFSSL_MSG("Memory error");
+                    wolfSSL_BIO_free(bio);
+                    return NULL;
+                }
+            }
+
+            /* check if is custom method */
+            if (method->createCb) {
+                method->createCb(bio);
             }
         }
         return bio;
     }
 
-
-    int wolfSSL_BIO_get_mem_data(WOLFSSL_BIO* bio, void* p)
-    {
-        WOLFSSL_ENTER("wolfSSL_BIO_get_mem_data");
-
-        if (bio == NULL || p == NULL)
-            return WOLFSSL_FATAL_ERROR;
-
-        *(byte **)p = bio->mem;
-
-        return bio->memLen;
-    }
-
-
-    WOLFSSL_BIO* wolfSSL_BIO_new_mem_buf(void* buf, int len)
+    WOLFSSL_BIO* wolfSSL_BIO_new_mem_buf(const void* buf, int len)
     {
         WOLFSSL_BIO* bio = NULL;
 
@@ -11757,18 +14911,18 @@
             return bio;
         }
 
-        bio->memLen = bio->wrSz = len;
-        bio->mem    = (byte*)XMALLOC(len, 0, DYNAMIC_TYPE_OPENSSL);
-        if (bio->mem == NULL) {
+        bio->num = bio->wrSz = len;
+        bio->ptr = (byte*)XMALLOC(len, 0, DYNAMIC_TYPE_OPENSSL);
+        if (bio->ptr == NULL) {
             wolfSSL_BIO_free(bio);
             return NULL;
         }
         if (bio->mem_buf != NULL) {
-            bio->mem_buf->data = (char*)bio->mem;
-            bio->mem_buf->length = bio->memLen;
-        }
-
-        XMEMCPY(bio->mem, buf, len);
+            bio->mem_buf->data = (char*)bio->ptr;
+            bio->mem_buf->length = bio->num;
+        }
+
+        XMEMCPY(bio->ptr, buf, len);
 
         return bio;
     }
@@ -11776,43 +14930,62 @@
     /*
      * Note : If the flag BIO_NOCLOSE is set then freeing memory buffers is up
      *        to the application.
+     * Returns 1 on success, 0 on failure
      */
     int wolfSSL_BIO_free(WOLFSSL_BIO* bio)
     {
+        int ret;
+
         /* unchain?, doesn't matter in goahead since from free all */
         WOLFSSL_ENTER("wolfSSL_BIO_free");
         if (bio) {
+
+            if (bio->infoCb) {
+                /* info callback is called before free */
+                ret = (int)bio->infoCb(bio, WOLFSSL_BIO_CB_FREE, NULL, 0, 0, 1);
+                if (ret <= 0) {
+                    return ret;
+                }
+            }
+
+            /* call custom set free callback */
+            if (bio->method && bio->method->freeCb) {
+                bio->method->freeCb(bio);
+            }
+
             /* remove from pair by setting the paired bios pair to NULL */
             if (bio->pair != NULL) {
                 bio->pair->pair = NULL;
             }
 
-            if (bio->close) {
-                if (bio->ssl)
-                    wolfSSL_free(bio->ssl);
-                if (bio->fd)
-                    CloseSocket(bio->fd);
+            if (bio->shutdown) {
+                if (bio->type == WOLFSSL_BIO_SSL && bio->ptr)
+                    wolfSSL_free((WOLFSSL*)bio->ptr);
+            #ifdef CloseSocket
+                if (bio->type == WOLFSSL_BIO_SOCKET && bio->num)
+                    CloseSocket(bio->num);
+            #endif
             }
 
         #ifndef NO_FILESYSTEM
-            if (bio->type == WOLFSSL_BIO_FILE && bio->close == BIO_CLOSE) {
-                if (bio->file) {
-                    XFCLOSE(bio->file);
-                }
-            }
-        #endif
-
-            if (bio->close != BIO_NOCLOSE) {
-                if (bio->mem != NULL) {
+            if (bio->type == WOLFSSL_BIO_FILE && bio->shutdown == BIO_CLOSE) {
+                if (bio->ptr) {
+                    XFCLOSE((XFILE)bio->ptr);
+                }
+            }
+        #endif
+
+            if (bio->shutdown != BIO_NOCLOSE) {
+                if (bio->type == WOLFSSL_BIO_MEMORY && bio->ptr != NULL) {
                     if (bio->mem_buf != NULL) {
-                        if (bio->mem_buf->data != (char*)bio->mem) {
-                            XFREE(bio->mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
-                            bio->mem = NULL;
+                        if (bio->mem_buf->data != (char*)bio->ptr) {
+                            XFREE(bio->ptr, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                            bio->ptr = NULL;
                         }
                     }
                     else {
-                        XFREE(bio->mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
-                        bio->mem = NULL;
+                        XFREE(bio->ptr, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                        bio->ptr = NULL;
                     }
                 }
                 if (bio->mem_buf != NULL) {
@@ -11821,9 +14994,19 @@
                 }
             }
 
+            if (bio->type == WOLFSSL_BIO_MD) {
+                wolfSSL_EVP_MD_CTX_free((WOLFSSL_EVP_MD_CTX*)bio->ptr);
+            }
+
             XFREE(bio, 0, DYNAMIC_TYPE_OPENSSL);
         }
-        return 0;
+        return 1;
+    }
+
+    /* like BIO_free, but no return value */
+    void wolfSSL_BIO_vfree(WOLFSSL_BIO* bio)
+    {
+        wolfSSL_BIO_free(bio);
     }
 
 
@@ -11847,15 +15030,6 @@
 
         return top;
     }
-
-
-    int wolfSSL_BIO_flush(WOLFSSL_BIO* bio)
-    {
-        /* for wolfSSL no flushing needed */
-        WOLFSSL_ENTER("BIO_flush");
-        (void)bio;
-        return 1;
-    }
 #endif /* OPENSSL_EXTRA */
 
 #ifdef WOLFSSL_ENCRYPTED_KEYS
@@ -11864,16 +15038,16 @@
                                                    void* userdata)
     {
         WOLFSSL_ENTER("SSL_CTX_set_default_passwd_cb_userdata");
-        ctx->passwd_userdata = userdata;
+        if (ctx)
+            ctx->passwd_userdata = userdata;
     }
 
 
     void wolfSSL_CTX_set_default_passwd_cb(WOLFSSL_CTX* ctx,pem_password_cb* cb)
     {
         WOLFSSL_ENTER("SSL_CTX_set_default_passwd_cb");
-        if (ctx != NULL) {
+        if (ctx)
             ctx->passwd_cb = cb;
-        }
     }
 
     pem_password_cb* wolfSSL_CTX_get_default_passwd_cb(WOLFSSL_CTX *ctx)
@@ -11895,51 +15069,6 @@
         return ctx->passwd_userdata;
     }
 
-#if !defined(NO_PWDBASED) && (defined(OPENSSL_EXTRA) || \
-        defined(OPENSSL_EXTRA_X509_SMALL) || defined(HAVE_WEBSERVER))
-
-    int wolfSSL_EVP_BytesToKey(const WOLFSSL_EVP_CIPHER* type,
-                       const WOLFSSL_EVP_MD* md, const byte* salt,
-                       const byte* data, int sz, int count, byte* key, byte* iv)
-    {
-        int ret;
-        int hashType = WC_HASH_TYPE_NONE;
-    #ifdef WOLFSSL_SMALL_STACK
-        EncryptedInfo* info = NULL;
-    #else
-        EncryptedInfo  info[1];
-    #endif
-
-    #ifdef WOLFSSL_SMALL_STACK
-        info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL,
-                                       DYNAMIC_TYPE_ENCRYPTEDINFO);
-        if (info == NULL) {
-            WOLFSSL_MSG("malloc failed");
-            return WOLFSSL_FAILURE;
-        }
-    #endif
-
-        XMEMSET(info, 0, sizeof(EncryptedInfo));
-        info->ivSz = EVP_SALT_SIZE;
-
-        ret = wolfSSL_EVP_get_hashinfo(md, &hashType, NULL);
-        if (ret == 0)
-            ret = wc_EncryptedInfoGet(info, type);
-        if (ret == 0)
-            ret = wc_PBKDF1_ex(key, info->keySz, iv, info->ivSz, data, sz, salt,
-                               EVP_SALT_SIZE, count, hashType, NULL);
-
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO);
-    #endif
-
-        if (ret <= 0)
-            return 0; /* failure - for compatibility */
-
-        return ret;
-    }
-
-#endif /* !NO_PWDBASED && (OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL || HAVE_WEBSERVER) */
 #endif /* WOLFSSL_ENCRYPTED_KEYS */
 
 
@@ -12010,9 +15139,38 @@
 #endif
     }
 
+#if (defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE))
+    /* print out and clear all errors */
+    void wolfSSL_ERR_print_errors(WOLFSSL_BIO* bio)
+    {
+        const char* file = NULL;
+        const char* reason = NULL;
+        int ret;
+        int line = 0;
+        char buf[WOLFSSL_MAX_ERROR_SZ * 2];
+
+        WOLFSSL_ENTER("wolfSSL_ERR_print_errors");
+
+        if (bio == NULL) {
+            WOLFSSL_MSG("BIO passed in was null");
+            return;
+        }
+
+        do {
+        ret = wc_PeekErrorNode(0, &file, &reason, &line);
+        if (ret >= 0) {
+            const char* r = wolfSSL_ERR_reason_error_string(0 - ret);
+            XSNPRINTF(buf, sizeof(buf), "error:%d:wolfSSL library:%s:%s:%d\n",
+                    ret, r, file, line);
+            wolfSSL_BIO_write(bio, buf, (int)XSTRLEN(buf));
+            wc_RemoveErrorNode(0);
+        }
+        } while (ret >= 0);
+    }
+#endif /* OPENSSL_EXTRA || DEBUG_WOLFSSL_VERBOSE */
+
 #endif /* OPENSSL_EXTRA || HAVE_WEBSERVER */
 
-
 #ifdef OPENSSL_EXTRA
 
 #if !defined(NO_WOLFSSL_SERVER)
@@ -12045,7 +15203,125 @@
     XMEMCPY(out, ssl->arrays->serverRandom, size);
     return size;
 }
-#endif /* !defined(NO_WOLFSSL_SERVER) */
+
+
+/* Used to get the peer ephemeral public key sent during the connection
+ * NOTE: currently wolfSSL_KeepHandshakeResources(WOLFSSL* ssl) must be called
+ *       before the ephemeral key is stored.
+ * return WOLFSSL_SUCCESS on success */
+int wolfSSL_get_server_tmp_key(const WOLFSSL* ssl, WOLFSSL_EVP_PKEY** pkey)
+{
+    WOLFSSL_EVP_PKEY* ret = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_get_server_tmp_key");
+
+    if (ssl == NULL || pkey == NULL) {
+        WOLFSSL_MSG("Bad argument passed in");
+        return WOLFSSL_FAILURE;
+    }
+
+#ifdef HAVE_ECC
+    if (ssl->peerEccKey != NULL) {
+        unsigned char* der;
+        const unsigned char* pt;
+        unsigned int   derSz = 0;
+        int sz;
+
+        if (wc_ecc_export_x963(ssl->peerEccKey, NULL, &derSz) !=
+                LENGTH_ONLY_E) {
+            WOLFSSL_MSG("get ecc der size failed");
+            return WOLFSSL_FAILURE;
+        }
+
+        derSz += MAX_SEQ_SZ + (2 * MAX_ALGO_SZ) + MAX_SEQ_SZ + TRAILING_ZERO;
+        der = (unsigned char*)XMALLOC(derSz, ssl->heap, DYNAMIC_TYPE_KEY);
+        if (der == NULL) {
+            WOLFSSL_MSG("Memory error");
+            return WOLFSSL_FAILURE;
+        }
+
+        if ((sz = wc_EccPublicKeyToDer(ssl->peerEccKey, der, derSz, 1)) <= 0) {
+            WOLFSSL_MSG("get ecc der failed");
+            XFREE(der, ssl->heap, DYNAMIC_TYPE_KEY);
+            return WOLFSSL_FAILURE;
+        }
+        pt = der; /* in case pointer gets advanced */
+        ret = wolfSSL_d2i_PUBKEY(NULL, &pt, sz);
+        XFREE(der, ssl->heap, DYNAMIC_TYPE_KEY);
+    }
+#endif
+
+    *pkey = ret;
+    if (ret == NULL)
+        return WOLFSSL_FAILURE;
+    else
+        return WOLFSSL_SUCCESS;
+}
+
+#endif /* !NO_WOLFSSL_SERVER */
+
+int wolfSSL_CTX_set_min_proto_version(WOLFSSL_CTX* ctx, int version)
+{
+    WOLFSSL_ENTER("wolfSSL_CTX_set_min_proto_version");
+
+    if (ctx == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    switch (version) {
+#if defined(WOLFSSL_ALLOW_SSLV3) && !defined(NO_OLD_TLS)
+        case SSL3_VERSION:
+            ctx->minDowngrade = SSLv3_MINOR;
+            break;
+#endif
+#ifndef NO_TLS
+    #ifndef NO_OLD_TLS
+        #ifdef WOLFSSL_ALLOW_TLSV10
+        case TLS1_VERSION:
+            ctx->minDowngrade = TLSv1_MINOR;
+            break;
+        #endif
+        case TLS1_1_VERSION:
+            ctx->minDowngrade = TLSv1_1_MINOR;
+            break;
+    #endif
+    #ifndef WOLFSSL_NO_TLS12
+        case TLS1_2_VERSION:
+            ctx->minDowngrade = TLSv1_2_MINOR;
+            break;
+    #endif
+    #ifdef WOLFSSL_TLS13
+        case TLS1_3_VERSION:
+            ctx->minDowngrade = TLSv1_3_MINOR;
+            break;
+    #endif
+#endif
+#ifdef WOLFSSL_DTLS
+    #ifndef NO_OLD_TLS
+        case DTLS1_VERSION:
+            ctx->minDowngrade = DTLS_MINOR;
+            break;
+    #endif
+        case DTLS1_2_VERSION:
+            ctx->minDowngrade = DTLSv1_2_MINOR;
+            break;
+#endif
+        default:
+            return BAD_FUNC_ARG;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_CTX_set_max_proto_version(WOLFSSL_CTX* ctx, int ver)
+{
+    WOLFSSL_ENTER("wolfSSL_CTX_set_max_proto_version");
+
+    /* supported only at compile-time only */
+    (void)ctx;
+    (void)ver;
+    return WOLFSSL_SUCCESS;
+}
 
 
 #if !defined(NO_WOLFSSL_CLIENT)
@@ -12093,6 +15369,10 @@
         return SSLEAY_VERSION_NUMBER;
     }
 
+    unsigned long wolfSSL_OpenSSL_version_num(void)
+    {
+        return OPENSSL_VERSION_NUMBER;
+    }
 
     const char* wolfSSLeay_version(int type)
     {
@@ -12431,926 +15711,213 @@
 
 #endif /* WOLFSSL_SHA512 */
 
-    static const struct s_ent {
-        const unsigned char macType;
-        const char *name;
-    } md_tbl[] = {
-    #ifndef NO_MD4
-         {MD4, "MD4"},
-    #endif /* NO_MD4 */
-
-    #ifndef NO_MD5
-        {WC_MD5, "MD5"},
-    #endif /* NO_MD5 */
-
-    #ifndef NO_SHA
-        {WC_SHA, "SHA"},
-    #endif /* NO_SHA */
-
-    #ifdef WOLFSSL_SHA224
-        {WC_SHA224, "SHA224"},
-    #endif /* WOLFSSL_SHA224 */
-    #ifndef NO_SHA256
-        {WC_SHA256, "SHA256"},
-    #endif
-
-    #ifdef WOLFSSL_SHA384
-        {WC_SHA384, "SHA384"},
-    #endif /* WOLFSSL_SHA384 */
-    #ifdef WOLFSSL_SHA512
-        {WC_SHA512, "SHA512"},
-    #endif /* WOLFSSL_SHA512 */
-        {0, NULL}
-    };
-
-const WOLFSSL_EVP_MD *wolfSSL_EVP_get_digestbyname(const char *name)
-{
-    static const struct alias {
-        const char *name;
-        const char *alias;
-    } alias_tbl[] =
-    {
-        {"MD4", "ssl3-md4"},
-        {"MD5", "ssl3-md5"},
-        {"SHA", "ssl3-sha1"},
-        {"SHA", "SHA1"},
-        { NULL, NULL}
-    };
-
-    const struct alias  *al;
-    const struct s_ent *ent;
-
-    for (al = alias_tbl; al->name != NULL; al++)
-        if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) {
-            name = al->name;
-            break;
-        }
-
-    for (ent = md_tbl; ent->name != NULL; ent++)
-        if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) {
-            return (EVP_MD *)ent->name;
-        }
-    return NULL;
-}
-
-static WOLFSSL_EVP_MD *wolfSSL_EVP_get_md(const unsigned char type)
-{
-    const struct s_ent *ent ;
-    WOLFSSL_ENTER("EVP_get_md");
-    for( ent = md_tbl; ent->name != NULL; ent++){
-        if(type == ent->macType) {
-            return (WOLFSSL_EVP_MD *)ent->name;
-        }
-    }
-    return (WOLFSSL_EVP_MD *)"";
-}
-
-int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
-{
-    const struct s_ent *ent ;
-    WOLFSSL_ENTER("EVP_MD_type");
-    for( ent = md_tbl; ent->name != NULL; ent++){
-        if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) {
-            return ent->macType;
-        }
-    }
-    return 0;
-}
-
-
-#ifndef NO_MD4
-
-    /* return a pointer to MD4 EVP type */
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_md4(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_md4");
-        return EVP_get_digestbyname("MD4");
-    }
-
-#endif /* !NO_MD4 */
-
-
-#ifndef NO_MD5
-
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_md5(void)
-    {
-        WOLFSSL_ENTER("EVP_md5");
-        return EVP_get_digestbyname("MD5");
-    }
-
-#endif /* !NO_MD5 */
-
-
-#ifndef NO_SHA
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha1(void)
-    {
-        WOLFSSL_ENTER("EVP_sha1");
-        return EVP_get_digestbyname("SHA");
-    }
-#endif /* NO_SHA */
-
-#ifdef WOLFSSL_SHA224
-
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha224(void)
-    {
-        WOLFSSL_ENTER("EVP_sha224");
-        return EVP_get_digestbyname("SHA224");
-    }
-
-#endif /* WOLFSSL_SHA224 */
-
-
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha256(void)
-    {
-        WOLFSSL_ENTER("EVP_sha256");
-        return EVP_get_digestbyname("SHA256");
-    }
-
-#ifdef WOLFSSL_SHA384
-
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha384(void)
-    {
-        WOLFSSL_ENTER("EVP_sha384");
-        return EVP_get_digestbyname("SHA384");
-    }
-
-#endif /* WOLFSSL_SHA384 */
-
-#ifdef WOLFSSL_SHA512
-
-    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha512(void)
-    {
-        WOLFSSL_ENTER("EVP_sha512");
-        return EVP_get_digestbyname("SHA512");
-    }
-
-#endif /* WOLFSSL_SHA512 */
-
-
-    WOLFSSL_EVP_MD_CTX *wolfSSL_EVP_MD_CTX_new(void)
-    {
-        WOLFSSL_EVP_MD_CTX* ctx;
-        WOLFSSL_ENTER("EVP_MD_CTX_new");
-        ctx = (WOLFSSL_EVP_MD_CTX*)XMALLOC(sizeof *ctx, NULL,
-                                                       DYNAMIC_TYPE_OPENSSL);
-        if (ctx){
-            wolfSSL_EVP_MD_CTX_init(ctx);
-        }
-        return ctx;
-    }
-
-    WOLFSSL_API void wolfSSL_EVP_MD_CTX_free(WOLFSSL_EVP_MD_CTX *ctx)
-    {
-        if (ctx) {
-            WOLFSSL_ENTER("EVP_MD_CTX_free");
-                wolfSSL_EVP_MD_CTX_cleanup(ctx);
-                XFREE(ctx, NULL, DYNAMIC_TYPE_OPENSSL);
-            }
-    }
-
-
-    /* returns the type of message digest used by the ctx */
-    int wolfSSL_EVP_MD_CTX_type(const WOLFSSL_EVP_MD_CTX *ctx) {
-        WOLFSSL_ENTER("EVP_MD_CTX_type");
-        return ctx->macType;
-    }
-
-
-    /* returns WOLFSSL_SUCCESS on success */
-    int wolfSSL_EVP_MD_CTX_copy(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in)
-    {
-        return wolfSSL_EVP_MD_CTX_copy_ex(out, in);
-    }
-
-
-    /* copies structure in to the structure out
-     *
-     * returns WOLFSSL_SUCCESS on success */
-    int wolfSSL_EVP_MD_CTX_copy_ex(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in)
-    {
-        if ((out == NULL) || (in == NULL)) return WOLFSSL_FAILURE;
-        WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_copy_ex");
-        XMEMCPY(out, in, sizeof(WOLFSSL_EVP_MD_CTX));
-        return WOLFSSL_SUCCESS;
-    }
-
-    void wolfSSL_EVP_MD_CTX_init(WOLFSSL_EVP_MD_CTX* ctx)
-    {
-        WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_init");
-        XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_MD_CTX));
-    }
-
-    const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx)
-    {
-        if (ctx == NULL)
-            return NULL;
-        WOLFSSL_ENTER("EVP_MD_CTX_md");
-        return (const WOLFSSL_EVP_MD *)wolfSSL_EVP_get_md(ctx->macType);
-    }
-
-    #ifndef NO_AES
-
-    #ifdef HAVE_AES_CBC
-    #ifdef WOLFSSL_AES_128
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cbc(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cbc");
-        if (EVP_AES_128_CBC == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_128_CBC;
-    }
-    #endif /* WOLFSSL_AES_128 */
-
-
-    #ifdef WOLFSSL_AES_192
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cbc(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cbc");
-        if (EVP_AES_192_CBC == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_192_CBC;
-    }
-    #endif /* WOLFSSL_AES_192 */
-
-
-    #ifdef WOLFSSL_AES_256
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cbc(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cbc");
-        if (EVP_AES_256_CBC == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_256_CBC;
-    }
-    #endif /* WOLFSSL_AES_256 */
-    #endif /* HAVE_AES_CBC */
-
-
-    #ifdef WOLFSSL_AES_128
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ctr(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ctr");
-        if (EVP_AES_128_CTR == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_128_CTR;
-    }
-    #endif /* WOLFSSL_AES_2128 */
-
-
-    #ifdef WOLFSSL_AES_192
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ctr(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ctr");
-        if (EVP_AES_192_CTR == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_192_CTR;
-    }
-    #endif /* WOLFSSL_AES_192 */
-
-
-    #ifdef WOLFSSL_AES_256
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ctr(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ctr");
-        if (EVP_AES_256_CTR == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_256_CTR;
-    }
-    #endif /* WOLFSSL_AES_256 */
-
-    #ifdef WOLFSSL_AES_128
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ecb(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ecb");
-        if (EVP_AES_128_ECB == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_128_ECB;
-    }
-    #endif /* WOLFSSL_AES_128 */
-
-
-    #ifdef WOLFSSL_AES_192
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ecb(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ecb");
-        if (EVP_AES_192_ECB == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_192_ECB;
-    }
-    #endif /* WOLFSSL_AES_192*/
-
-
-    #ifdef WOLFSSL_AES_256
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ecb(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ecb");
-        if (EVP_AES_256_ECB == NULL)
-            wolfSSL_EVP_init();
-        return EVP_AES_256_ECB;
-    }
-    #endif /* WOLFSSL_AES_256 */
-    #endif /* NO_AES */
-
-#ifndef NO_DES3
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_cbc(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_des_cbc");
-        if (EVP_DES_CBC == NULL)
-            wolfSSL_EVP_init();
-        return EVP_DES_CBC;
-    }
-#ifdef WOLFSSL_DES_ECB
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ecb(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_des_ecb");
-        if (EVP_DES_ECB == NULL)
-            wolfSSL_EVP_init();
-        return EVP_DES_ECB;
-    }
-#endif
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_cbc(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_cbc");
-        if (EVP_DES_EDE3_CBC == NULL)
-            wolfSSL_EVP_init();
-        return EVP_DES_EDE3_CBC;
-    }
-#ifdef WOLFSSL_DES_ECB
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_ecb(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_ecb");
-        if (EVP_DES_EDE3_ECB == NULL)
-            wolfSSL_EVP_init();
-        return EVP_DES_EDE3_ECB;
-    }
-#endif
-#endif /* NO_DES3 */
-
-#ifndef NO_RC4
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc4(void)
-    {
-        static const char* type = "ARC4";
-        WOLFSSL_ENTER("wolfSSL_EVP_rc4");
-        return type;
-    }
-#endif
-
-#ifdef HAVE_IDEA
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_idea_cbc(void)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_idea_cbc");
-        if (EVP_IDEA_CBC == NULL)
-            wolfSSL_EVP_init();
-        return EVP_IDEA_CBC;
-    }
-#endif
-    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_enc_null(void)
-    {
-        static const char* type = "NULL";
-        WOLFSSL_ENTER("wolfSSL_EVP_enc_null");
-        return type;
-    }
-
-
-    int wolfSSL_EVP_MD_CTX_cleanup(WOLFSSL_EVP_MD_CTX* ctx)
-    {
-        WOLFSSL_ENTER("EVP_MD_CTX_cleanup");
-        ForceZero(ctx, sizeof(*ctx));
-        ctx->macType = 0xFF;
-        return 1;
-    }
-
-
-
-    void wolfSSL_EVP_CIPHER_CTX_init(WOLFSSL_EVP_CIPHER_CTX* ctx)
-    {
-        WOLFSSL_ENTER("EVP_CIPHER_CTX_init");
-        if (ctx) {
-            ctx->cipherType = 0xff;   /* no init */
-            ctx->keyLen     = 0;
-            ctx->enc        = 1;      /* start in encrypt mode */
-        }
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_CIPHER_CTX_cleanup(WOLFSSL_EVP_CIPHER_CTX* ctx)
-    {
-        WOLFSSL_ENTER("EVP_CIPHER_CTX_cleanup");
-        if (ctx) {
-            ctx->cipherType = 0xff;  /* no more init */
-            ctx->keyLen     = 0;
-        }
-
-        return WOLFSSL_SUCCESS;
-    }
-
-
-    /* return WOLFSSL_SUCCESS on ok, 0 on failure to match API compatibility */
-    int  wolfSSL_EVP_CipherInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
-                               const WOLFSSL_EVP_CIPHER* type, const byte* key,
-                               const byte* iv, int enc)
-    {
-        int ret = 0;
-        (void)key;
-        (void)iv;
-        (void)enc;
-
-        WOLFSSL_ENTER("wolfSSL_EVP_CipherInit");
-        if (ctx == NULL) {
-            WOLFSSL_MSG("no ctx");
-            return 0;   /* failure */
-        }
-
-        if (type == NULL && ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT) {
-            WOLFSSL_MSG("no type set");
-            return 0;   /* failure */
-        }
-        if (ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT){
-            ctx->bufUsed = 0;
-            ctx->lastUsed = 0;
-            ctx->flags   = 0;
-        }
-#ifndef NO_AES
-    #ifdef HAVE_AES_CBC
-        #ifdef WOLFSSL_AES_128
-        if (ctx->cipherType == AES_128_CBC_TYPE ||
-            (type && XSTRNCMP(type, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_128_CBC");
-            ctx->cipherType = AES_128_CBC_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
-            ctx->keyLen     = 16;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, iv,
-                                ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-            if (iv && key == NULL) {
-                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-        #endif /* WOLFSSL_AES_128 */
-        #ifdef WOLFSSL_AES_192
-        if (ctx->cipherType == AES_192_CBC_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_192_CBC");
-            ctx->cipherType = AES_192_CBC_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
-            ctx->keyLen     = 24;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, iv,
-                                ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-            if (iv && key == NULL) {
-                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-        #endif /* WOLFSSL_AES_192 */
-        #ifdef WOLFSSL_AES_256
-        if (ctx->cipherType == AES_256_CBC_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_256_CBC");
-            ctx->cipherType = AES_256_CBC_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
-            ctx->keyLen     = 32;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, iv,
-                                ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION);
-                if (ret != 0){
-                    WOLFSSL_MSG("wc_AesSetKey() failed");
-                    return ret;
-                }
-            }
-            if (iv && key == NULL) {
-                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
-                if (ret != 0){
-                    WOLFSSL_MSG("wc_AesSetIV() failed");
-                    return ret;
-                }
-            }
-        }
-        #endif /* WOLFSSL_AES_256 */
-    #endif /* HAVE_AES_CBC */
-#ifdef WOLFSSL_AES_COUNTER
-        #ifdef WOLFSSL_AES_128
-        if (ctx->cipherType == AES_128_CTR_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_128_CTR");
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->cipherType = AES_128_CTR_TYPE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CTR_MODE;
-            ctx->keyLen     = 16;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-              ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, iv,
-                    AES_ENCRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-            if (iv && key == NULL) {
-                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-        #endif /* WOLFSSL_AES_128 */
-        #ifdef WOLFSSL_AES_192
-        if (ctx->cipherType == AES_192_CTR_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_192_CTR");
-            ctx->cipherType = AES_192_CTR_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CTR_MODE;
-            ctx->keyLen     = 24;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, iv,
-                      AES_ENCRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-            if (iv && key == NULL) {
-                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-        #endif /* WOLFSSL_AES_192 */
-        #ifdef WOLFSSL_AES_256
-        if (ctx->cipherType == AES_256_CTR_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_256_CTR");
-            ctx->cipherType = AES_256_CTR_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CTR_MODE;
-            ctx->keyLen     = 32;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, iv,
-                      AES_ENCRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-            if (iv && key == NULL) {
-                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-        #endif /* WOLFSSL_AES_256 */
-#endif /* WOLFSSL_AES_COUNTER */
-        #ifdef WOLFSSL_AES_128
-        if (ctx->cipherType == AES_128_ECB_TYPE ||
-            (type && XSTRNCMP(type, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_128_ECB");
-            ctx->cipherType = AES_128_ECB_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
-            ctx->keyLen     = 16;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, NULL,
-                      ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION);
-            }
-            if (ret != 0)
-                return ret;
-        }
-        #endif /* WOLFSSL_AES_128 */
-        #ifdef WOLFSSL_AES_192
-        if (ctx->cipherType == AES_192_ECB_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_192_ECB");
-            ctx->cipherType = AES_192_ECB_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
-            ctx->keyLen     = 24;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, NULL,
-                      ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION);
-            }
-            if (ret != 0)
-                return ret;
-        }
-        #endif /* WOLFSSL_AES_192 */
-        #ifdef WOLFSSL_AES_256
-        if (ctx->cipherType == AES_256_ECB_TYPE ||
-                 (type && XSTRNCMP(type, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_AES_256_ECB");
-            ctx->cipherType = AES_256_ECB_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
-            ctx->keyLen     = 32;
-            ctx->block_size = AES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-              ret = wc_AesSetKey(&ctx->cipher.aes, key, ctx->keyLen, NULL,
-                    ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION);
-            }
-            if (ret != 0)
-                return ret;
-        }
-        #endif /* WOLFSSL_AES_256 */
-#endif /* NO_AES */
-
-#ifndef NO_DES3
-        if (ctx->cipherType == DES_CBC_TYPE ||
-                 (type && XSTRNCMP(type, EVP_DES_CBC, EVP_DES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_DES_CBC");
-            ctx->cipherType = DES_CBC_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
-            ctx->keyLen     = 8;
-            ctx->block_size = DES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_Des_SetKey(&ctx->cipher.des, key, iv,
-                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-
-            if (iv && key == NULL)
-                wc_Des_SetIV(&ctx->cipher.des, iv);
-        }
-#ifdef WOLFSSL_DES_ECB
-        else if (ctx->cipherType == DES_ECB_TYPE ||
-                 (type && XSTRNCMP(type, EVP_DES_ECB, EVP_DES_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_DES_ECB");
-            ctx->cipherType = DES_ECB_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
-            ctx->keyLen     = 8;
-            ctx->block_size = DES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                WOLFSSL_MSG("Des_SetKey");
-                ret = wc_Des_SetKey(&ctx->cipher.des, key, NULL,
-                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-#endif
-        else if (ctx->cipherType == DES_EDE3_CBC_TYPE ||
-                 (type &&
-                  XSTRNCMP(type, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_DES_EDE3_CBC");
-            ctx->cipherType = DES_EDE3_CBC_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
-            ctx->keyLen     = 24;
-            ctx->block_size = DES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_Des3_SetKey(&ctx->cipher.des3, key, iv,
-                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-
-            if (iv && key == NULL) {
-                ret = wc_Des3_SetIV(&ctx->cipher.des3, iv);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-        else if (ctx->cipherType == DES_EDE3_ECB_TYPE ||
-                 (type &&
-                  XSTRNCMP(type, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_DES_EDE3_ECB");
-            ctx->cipherType = DES_EDE3_ECB_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
-            ctx->keyLen     = 24;
-            ctx->block_size = DES_BLOCK_SIZE;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_Des3_SetKey(&ctx->cipher.des3, key, NULL,
-                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-        }
-#endif /* NO_DES3 */
-#ifndef NO_RC4
-        if (ctx->cipherType == ARC4_TYPE || (type &&
-                                     XSTRNCMP(type, "ARC4", 4) == 0)) {
-            WOLFSSL_MSG("ARC4");
-            ctx->cipherType = ARC4_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_STREAM_CIPHER;
-            ctx->block_size = 1;
-            if (ctx->keyLen == 0)  /* user may have already set */
-                ctx->keyLen = 16;  /* default to 128 */
-            if (key)
-                wc_Arc4SetKey(&ctx->cipher.arc4, key, ctx->keyLen);
-        }
-#endif /* NO_RC4 */
-#ifdef HAVE_IDEA
-        if (ctx->cipherType == IDEA_CBC_TYPE ||
-                 (type && XSTRNCMP(type, EVP_IDEA_CBC, EVP_IDEA_SIZE) == 0)) {
-            WOLFSSL_MSG("EVP_IDEA_CBC");
-            ctx->cipherType = IDEA_CBC_TYPE;
-            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
-            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
-            ctx->keyLen     = IDEA_KEY_SIZE;
-            ctx->block_size = 8;
-            if (enc == 0 || enc == 1)
-                ctx->enc = enc ? 1 : 0;
-            if (key) {
-                ret = wc_IdeaSetKey(&ctx->cipher.idea, key, (word16)ctx->keyLen,
-                                    iv, ctx->enc ? IDEA_ENCRYPTION :
-                                                   IDEA_DECRYPTION);
-                if (ret != 0)
-                    return ret;
-            }
-
-            if (iv && key == NULL)
-                wc_IdeaSetIV(&ctx->cipher.idea, iv);
-        }
-#endif /* HAVE_IDEA */
-        if (ctx->cipherType == NULL_CIPHER_TYPE || (type &&
-                                     XSTRNCMP(type, "NULL", 4) == 0)) {
-            WOLFSSL_MSG("NULL cipher");
-            ctx->cipherType = NULL_CIPHER_TYPE;
-            ctx->keyLen = 0;
-            ctx->block_size = 16;
-        }
-        (void)ret; /* remove warning. If execution reaches this point, ret=0 */
-        return WOLFSSL_SUCCESS;
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_CIPHER_CTX_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_key_length");
-        if (ctx)
-            return ctx->keyLen;
-
-        return 0;   /* failure */
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_CIPHER_CTX_set_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
-                                             int keylen)
-    {
-        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_key_length");
-        if (ctx)
-            ctx->keyLen = keylen;
-        else
-            return 0;  /* failure */
-
-        return WOLFSSL_SUCCESS;
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_Cipher(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* dst, byte* src,
-                          word32 len)
-    {
-        int ret = 0;
-        WOLFSSL_ENTER("wolfSSL_EVP_Cipher");
-
-        if (ctx == NULL || dst == NULL || src == NULL) {
-            WOLFSSL_MSG("Bad function argument");
-            return 0;  /* failure */
-        }
-
-        if (ctx->cipherType == 0xff) {
-            WOLFSSL_MSG("no init");
-            return 0;  /* failure */
-        }
-
-        switch (ctx->cipherType) {
-
-#ifndef NO_AES
-#ifdef HAVE_AES_CBC
-            case AES_128_CBC_TYPE :
-            case AES_192_CBC_TYPE :
-            case AES_256_CBC_TYPE :
-                WOLFSSL_MSG("AES CBC");
-                if (ctx->enc)
-                    ret = wc_AesCbcEncrypt(&ctx->cipher.aes, dst, src, len);
-                else
-                    ret = wc_AesCbcDecrypt(&ctx->cipher.aes, dst, src, len);
-                break;
-#endif /* HAVE_AES_CBC */
-#ifdef HAVE_AES_ECB
-            case AES_128_ECB_TYPE :
-            case AES_192_ECB_TYPE :
-            case AES_256_ECB_TYPE :
-                WOLFSSL_MSG("AES ECB");
-                if (ctx->enc)
-                    ret = wc_AesEcbEncrypt(&ctx->cipher.aes, dst, src, len);
-                else
-                    ret = wc_AesEcbDecrypt(&ctx->cipher.aes, dst, src, len);
-                break;
-#endif
-#ifdef WOLFSSL_AES_COUNTER
-            case AES_128_CTR_TYPE :
-            case AES_192_CTR_TYPE :
-            case AES_256_CTR_TYPE :
-                    WOLFSSL_MSG("AES CTR");
-                    ret = wc_AesCtrEncrypt(&ctx->cipher.aes, dst, src, len);
-                break;
-#endif /* WOLFSSL_AES_COUNTER */
-#endif /* NO_AES */
-
-#ifndef NO_DES3
-            case DES_CBC_TYPE :
-                if (ctx->enc)
-                    wc_Des_CbcEncrypt(&ctx->cipher.des, dst, src, len);
-                else
-                    wc_Des_CbcDecrypt(&ctx->cipher.des, dst, src, len);
-                break;
-            case DES_EDE3_CBC_TYPE :
-                if (ctx->enc)
-                    ret = wc_Des3_CbcEncrypt(&ctx->cipher.des3, dst, src, len);
-                else
-                    ret = wc_Des3_CbcDecrypt(&ctx->cipher.des3, dst, src, len);
-                break;
-#ifdef WOLFSSL_DES_ECB
-            case DES_ECB_TYPE :
-                ret = wc_Des_EcbEncrypt(&ctx->cipher.des, dst, src, len);
-                break;
-            case DES_EDE3_ECB_TYPE :
-                ret = wc_Des3_EcbEncrypt(&ctx->cipher.des3, dst, src, len);
-                break;
-#endif
-#endif /* !NO_DES3 */
-
-#ifndef NO_RC4
-            case ARC4_TYPE :
-                wc_Arc4Process(&ctx->cipher.arc4, dst, src, len);
-                break;
-#endif
-
-#ifdef HAVE_IDEA
-            case IDEA_CBC_TYPE :
-                if (ctx->enc)
-                    wc_IdeaCbcEncrypt(&ctx->cipher.idea, dst, src, len);
-                else
-                    wc_IdeaCbcDecrypt(&ctx->cipher.idea, dst, src, len);
-                break;
-#endif
-            case NULL_CIPHER_TYPE :
-                XMEMCPY(dst, src, len);
-                break;
-
-            default: {
-                WOLFSSL_MSG("bad type");
-                return 0;  /* failure */
-            }
-        }
-
-        if (ret != 0) {
-            WOLFSSL_MSG("wolfSSL_EVP_Cipher failure");
-            return 0;  /* failure */
-        }
-
-        WOLFSSL_MSG("wolfSSL_EVP_Cipher success");
-        return WOLFSSL_SUCCESS;  /* success */
-    }
-
-#define WOLFSSL_EVP_INCLUDED
-#include "wolfcrypt/src/evp.c"
-
+#ifdef WOLFSSL_SHA3
+#ifndef WOLFSSL_NOSHA3_224
+
+    int wolfSSL_SHA3_224_Init(WOLFSSL_SHA3_224_CTX* sha)
+    {
+        int ret;
+
+        typedef char sha_test[sizeof(SHA3_224_CTX) >= sizeof(wc_Sha3) ? 1 : -1];
+        (void)sizeof(sha_test);
+
+        WOLFSSL_ENTER("SHA3_224_Init");
+        ret = wc_InitSha3_224((wc_Sha3*)sha, NULL, 0);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_224_Update(WOLFSSL_SHA3_224_CTX* sha, const void* input,
+                           unsigned long sz)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_224_Update");
+        ret = wc_Sha3_224_Update((wc_Sha3*)sha, (const byte*)input, (word32)sz);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_224_Final(byte* input, WOLFSSL_SHA3_224_CTX* sha)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_224_Final");
+        ret = wc_Sha3_224_Final((wc_Sha3*)sha, input);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+#endif /* WOLFSSL_NOSHA3_224 */
+
+
+#ifndef WOLFSSL_NOSHA3_256
+    int wolfSSL_SHA3_256_Init(WOLFSSL_SHA3_256_CTX* sha3_256)
+    {
+        int ret;
+
+        typedef char sha_test[sizeof(SHA3_256_CTX) >= sizeof(wc_Sha3) ? 1 : -1];
+        (void)sizeof(sha_test);
+
+        WOLFSSL_ENTER("SHA3_256_Init");
+        ret = wc_InitSha3_256((wc_Sha3*)sha3_256, NULL, 0);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_256_Update(WOLFSSL_SHA3_256_CTX* sha, const void* input,
+                              unsigned long sz)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_256_Update");
+        ret = wc_Sha3_256_Update((wc_Sha3*)sha, (const byte*)input, (word32)sz);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_256_Final(byte* input, WOLFSSL_SHA3_256_CTX* sha)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_256_Final");
+        ret = wc_Sha3_256_Final((wc_Sha3*)sha, input);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+#endif /* WOLFSSL_NOSHA3_256 */
+
+
+    int wolfSSL_SHA3_384_Init(WOLFSSL_SHA3_384_CTX* sha)
+    {
+        int ret;
+
+        typedef char sha_test[sizeof(SHA3_384_CTX) >= sizeof(wc_Sha3) ? 1 : -1];
+        (void)sizeof(sha_test);
+
+        WOLFSSL_ENTER("SHA3_384_Init");
+        ret = wc_InitSha3_384((wc_Sha3*)sha, NULL, 0);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_384_Update(WOLFSSL_SHA3_384_CTX* sha, const void* input,
+                           unsigned long sz)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_384_Update");
+        ret = wc_Sha3_384_Update((wc_Sha3*)sha, (const byte*)input, (word32)sz);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_384_Final(byte* input, WOLFSSL_SHA3_384_CTX* sha)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_384_Final");
+        ret = wc_Sha3_384_Final((wc_Sha3*)sha, input);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+
+#ifndef WOLFSSL_NOSHA3_512
+
+    int wolfSSL_SHA3_512_Init(WOLFSSL_SHA3_512_CTX* sha)
+    {
+        int ret;
+
+        typedef char sha_test[sizeof(SHA3_512_CTX) >= sizeof(wc_Sha3) ? 1 : -1];
+        (void)sizeof(sha_test);
+
+        WOLFSSL_ENTER("SHA3_512_Init");
+        ret = wc_InitSha3_512((wc_Sha3*)sha, NULL, 0);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_512_Update(WOLFSSL_SHA3_512_CTX* sha, const void* input,
+                           unsigned long sz)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_512_Update");
+        ret = wc_Sha3_512_Update((wc_Sha3*)sha, (const byte*)input, (word32)sz);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+
+    int wolfSSL_SHA3_512_Final(byte* input, WOLFSSL_SHA3_512_CTX* sha)
+    {
+        int ret;
+
+        WOLFSSL_ENTER("SHA3_512_Final");
+        ret = wc_Sha3_512_Final((wc_Sha3*)sha, input);
+
+        /* return 1 on success, 0 otherwise */
+        if (ret == 0)
+            return 1;
+
+        return 0;
+    }
+
+#endif /* WOLFSSL_NOSHA3_512 */
+#endif /* WOLFSSL_SHA3 */
 
     /* store for external read of iv, WOLFSSL_SUCCESS on success */
     int  wolfSSL_StoreExternalIV(WOLFSSL_EVP_CIPHER_CTX* ctx)
@@ -13365,13 +15932,29 @@
         switch (ctx->cipherType) {
 
 #ifndef NO_AES
+#ifdef HAVE_AES_CBC
             case AES_128_CBC_TYPE :
             case AES_192_CBC_TYPE :
             case AES_256_CBC_TYPE :
                 WOLFSSL_MSG("AES CBC");
                 XMEMCPY(ctx->iv, &ctx->cipher.aes.reg, AES_BLOCK_SIZE);
                 break;
-
+#endif
+#ifdef HAVE_AESGCM
+            case AES_128_GCM_TYPE :
+            case AES_192_GCM_TYPE :
+            case AES_256_GCM_TYPE :
+                WOLFSSL_MSG("AES GCM");
+                XMEMCPY(ctx->iv, &ctx->cipher.aes.reg, AES_BLOCK_SIZE);
+                break;
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AES_ECB
+            case AES_128_ECB_TYPE :
+            case AES_192_ECB_TYPE :
+            case AES_256_ECB_TYPE :
+                WOLFSSL_MSG("AES ECB");
+                break;
+#endif
 #ifdef WOLFSSL_AES_COUNTER
             case AES_128_CTR_TYPE :
             case AES_192_CTR_TYPE :
@@ -13380,7 +15963,38 @@
                 XMEMCPY(ctx->iv, &ctx->cipher.aes.reg, AES_BLOCK_SIZE);
                 break;
 #endif /* WOLFSSL_AES_COUNTER */
-
+#ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+            case AES_128_CFB1_TYPE:
+            case AES_192_CFB1_TYPE:
+            case AES_256_CFB1_TYPE:
+                WOLFSSL_MSG("AES CFB1");
+                break;
+            case AES_128_CFB8_TYPE:
+            case AES_192_CFB8_TYPE:
+            case AES_256_CFB8_TYPE:
+                WOLFSSL_MSG("AES CFB8");
+                break;
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+            case AES_128_CFB128_TYPE:
+            case AES_192_CFB128_TYPE:
+            case AES_256_CFB128_TYPE:
+                WOLFSSL_MSG("AES CFB128");
+                break;
+#endif /* WOLFSSL_AES_CFB */
+#if defined(WOLFSSL_AES_OFB)
+            case AES_128_OFB_TYPE:
+            case AES_192_OFB_TYPE:
+            case AES_256_OFB_TYPE:
+                WOLFSSL_MSG("AES OFB");
+                break;
+#endif /* WOLFSSL_AES_OFB */
+#ifdef WOLFSSL_AES_XTS
+            case AES_128_XTS_TYPE:
+            case AES_256_XTS_TYPE:
+                WOLFSSL_MSG("AES XTS");
+                break;
+#endif /* WOLFSSL_AES_XTS */
 #endif /* NO_AES */
 
 #ifndef NO_DES3
@@ -13394,6 +16008,14 @@
                 XMEMCPY(ctx->iv, &ctx->cipher.des3.reg, DES_BLOCK_SIZE);
                 break;
 #endif
+#ifdef WOLFSSL_DES_ECB
+            case DES_ECB_TYPE :
+                WOLFSSL_MSG("DES ECB");
+                break;
+            case DES_EDE3_ECB_TYPE :
+                WOLFSSL_MSG("DES3 ECB");
+                break;
+#endif
 
 #ifdef HAVE_IDEA
             case IDEA_CBC_TYPE :
@@ -13417,7 +16039,6 @@
         return WOLFSSL_SUCCESS;
     }
 
-
     /* set internal IV from external, WOLFSSL_SUCCESS on success */
     int  wolfSSL_SetInternalIV(WOLFSSL_EVP_CIPHER_CTX* ctx)
     {
@@ -13432,13 +16053,29 @@
         switch (ctx->cipherType) {
 
 #ifndef NO_AES
+#ifdef HAVE_AES_CBC
             case AES_128_CBC_TYPE :
             case AES_192_CBC_TYPE :
             case AES_256_CBC_TYPE :
                 WOLFSSL_MSG("AES CBC");
                 XMEMCPY(&ctx->cipher.aes.reg, ctx->iv, AES_BLOCK_SIZE);
                 break;
-
+#endif
+#ifdef HAVE_AESGCM
+            case AES_128_GCM_TYPE :
+            case AES_192_GCM_TYPE :
+            case AES_256_GCM_TYPE :
+                WOLFSSL_MSG("AES GCM");
+                XMEMCPY(&ctx->cipher.aes.reg, ctx->iv, AES_BLOCK_SIZE);
+                break;
+#endif
+#ifdef HAVE_AES_ECB
+            case AES_128_ECB_TYPE :
+            case AES_192_ECB_TYPE :
+            case AES_256_ECB_TYPE :
+                WOLFSSL_MSG("AES ECB");
+                break;
+#endif
 #ifdef WOLFSSL_AES_COUNTER
             case AES_128_CTR_TYPE :
             case AES_192_CTR_TYPE :
@@ -13461,6 +16098,14 @@
                 XMEMCPY(&ctx->cipher.des3.reg, ctx->iv, DES_BLOCK_SIZE);
                 break;
 #endif
+#ifdef WOLFSSL_DES_ECB
+            case DES_ECB_TYPE :
+                WOLFSSL_MSG("DES ECB");
+                break;
+            case DES_EDE3_ECB_TYPE :
+                WOLFSSL_MSG("DES3 ECB");
+                break;
+#endif
 
 #ifdef HAVE_IDEA
             case IDEA_CBC_TYPE :
@@ -13484,197 +16129,6 @@
         return WOLFSSL_SUCCESS;
     }
 
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_DigestInit(WOLFSSL_EVP_MD_CTX* ctx,
-                               const WOLFSSL_EVP_MD* type)
-    {
-        int ret = WOLFSSL_SUCCESS;
-
-        WOLFSSL_ENTER("EVP_DigestInit");
-
-        if (ctx == NULL || type == NULL) {
-            return BAD_FUNC_ARG;
-        }
-
-
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        /* compile-time validation of ASYNC_CTX_SIZE */
-        typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ?
-                                                                        1 : -1];
-        (void)sizeof(async_test);
-    #endif
-
-        if (XSTRNCMP(type, "SHA256", 6) == 0) {
-             ctx->macType = WC_SHA256;
-             ret = wolfSSL_SHA256_Init(&(ctx->hash.digest.sha256));
-        }
-    #ifdef WOLFSSL_SHA224
-        else if (XSTRNCMP(type, "SHA224", 6) == 0) {
-             ctx->macType = WC_SHA224;
-             ret = wolfSSL_SHA224_Init(&(ctx->hash.digest.sha224));
-        }
-    #endif
-    #ifdef WOLFSSL_SHA384
-        else if (XSTRNCMP(type, "SHA384", 6) == 0) {
-             ctx->macType = WC_SHA384;
-             ret = wolfSSL_SHA384_Init(&(ctx->hash.digest.sha384));
-        }
-    #endif
-    #ifdef WOLFSSL_SHA512
-        else if (XSTRNCMP(type, "SHA512", 6) == 0) {
-             ctx->macType = WC_SHA512;
-             ret = wolfSSL_SHA512_Init(&(ctx->hash.digest.sha512));
-        }
-    #endif
-    #ifndef NO_MD4
-        else if (XSTRNCMP(type, "MD4", 3) == 0) {
-            ctx->macType = MD4;
-            wolfSSL_MD4_Init(&(ctx->hash.digest.md4));
-        }
-    #endif
-    #ifndef NO_MD5
-        else if (XSTRNCMP(type, "MD5", 3) == 0) {
-            ctx->macType = WC_MD5;
-            ret = wolfSSL_MD5_Init(&(ctx->hash.digest.md5));
-        }
-    #endif
-    #ifndef NO_SHA
-        /* has to be last since would pick or 224, 256, 384, or 512 too */
-        else if (XSTRNCMP(type, "SHA", 3) == 0) {
-             ctx->macType = WC_SHA;
-             ret = wolfSSL_SHA_Init(&(ctx->hash.digest.sha));
-        }
-    #endif /* NO_SHA */
-        else
-             return BAD_FUNC_ARG;
-
-        return ret;
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok, WOLFSSL_FAILURE on failure */
-    int wolfSSL_EVP_DigestUpdate(WOLFSSL_EVP_MD_CTX* ctx, const void* data,
-                                size_t sz)
-    {
-        WOLFSSL_ENTER("EVP_DigestUpdate");
-
-        switch (ctx->macType) {
-#ifndef NO_MD4
-            case MD4:
-                wolfSSL_MD4_Update((MD4_CTX*)&ctx->hash, data,
-                                  (unsigned long)sz);
-                break;
-#endif
-#ifndef NO_MD5
-            case WC_MD5:
-                wolfSSL_MD5_Update((MD5_CTX*)&ctx->hash, data,
-                                  (unsigned long)sz);
-                break;
-#endif
-#ifndef NO_SHA
-            case WC_SHA:
-                wolfSSL_SHA_Update((SHA_CTX*)&ctx->hash, data,
-                                  (unsigned long)sz);
-                break;
-#endif
-#ifdef WOLFSSL_SHA224
-            case WC_SHA224:
-                wolfSSL_SHA224_Update((SHA224_CTX*)&ctx->hash, data,
-                                     (unsigned long)sz);
-                break;
-#endif
-#ifndef NO_SHA256
-            case WC_SHA256:
-                wolfSSL_SHA256_Update((SHA256_CTX*)&ctx->hash, data,
-                                     (unsigned long)sz);
-                break;
-#endif /* !NO_SHA256 */
-#ifdef WOLFSSL_SHA384
-            case WC_SHA384:
-                wolfSSL_SHA384_Update((SHA384_CTX*)&ctx->hash, data,
-                                     (unsigned long)sz);
-                break;
-#endif
-#ifdef WOLFSSL_SHA512
-            case WC_SHA512:
-                wolfSSL_SHA512_Update((SHA512_CTX*)&ctx->hash, data,
-                                     (unsigned long)sz);
-                break;
-#endif /* WOLFSSL_SHA512 */
-            default:
-                return WOLFSSL_FAILURE;
-        }
-
-        return WOLFSSL_SUCCESS;
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_DigestFinal(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md,
-                               unsigned int* s)
-    {
-        WOLFSSL_ENTER("EVP_DigestFinal");
-        switch (ctx->macType) {
-#ifndef NO_MD4
-            case MD4:
-                wolfSSL_MD4_Final(md, (MD4_CTX*)&ctx->hash);
-                if (s) *s = MD4_DIGEST_SIZE;
-                break;
-#endif
-#ifndef NO_MD5
-            case WC_MD5:
-                wolfSSL_MD5_Final(md, (MD5_CTX*)&ctx->hash);
-                if (s) *s = WC_MD5_DIGEST_SIZE;
-                break;
-#endif
-#ifndef NO_SHA
-            case WC_SHA:
-                wolfSSL_SHA_Final(md, (SHA_CTX*)&ctx->hash);
-                if (s) *s = WC_SHA_DIGEST_SIZE;
-                break;
-#endif
-#ifdef WOLFSSL_SHA224
-            case WC_SHA224:
-                wolfSSL_SHA224_Final(md, (SHA224_CTX*)&ctx->hash);
-                if (s) *s = WC_SHA224_DIGEST_SIZE;
-                break;
-#endif
-#ifndef NO_SHA256
-            case WC_SHA256:
-                wolfSSL_SHA256_Final(md, (SHA256_CTX*)&ctx->hash);
-                if (s) *s = WC_SHA256_DIGEST_SIZE;
-                break;
-#endif /* !NO_SHA256 */
-#ifdef WOLFSSL_SHA384
-            case WC_SHA384:
-                wolfSSL_SHA384_Final(md, (SHA384_CTX*)&ctx->hash);
-                if (s) *s = WC_SHA384_DIGEST_SIZE;
-                break;
-#endif
-#ifdef WOLFSSL_SHA512
-            case WC_SHA512:
-                wolfSSL_SHA512_Final(md, (SHA512_CTX*)&ctx->hash);
-                if (s) *s = WC_SHA512_DIGEST_SIZE;
-                break;
-#endif /* WOLFSSL_SHA512 */
-            default:
-                return WOLFSSL_FAILURE;
-        }
-
-        return WOLFSSL_SUCCESS;
-    }
-
-
-    /* WOLFSSL_SUCCESS on ok */
-    int wolfSSL_EVP_DigestFinal_ex(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md,
-                                   unsigned int* s)
-    {
-        WOLFSSL_ENTER("EVP_DigestFinal_ex");
-        return EVP_DigestFinal(ctx, md, s);
-    }
-
-
     unsigned char* wolfSSL_HMAC(const WOLFSSL_EVP_MD* evp_md, const void* key,
                                 int key_len, const unsigned char* d, int n,
                                 unsigned char* md, unsigned int* md_len)
@@ -13725,6 +16179,30 @@
             mdlen = WC_SHA512_DIGEST_SIZE;
         } else
 #endif
+#ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
+        if (XSTRNCMP(evp_md, "SHA3_224", 8) == 0) {
+            type = WC_SHA3_224;
+            mdlen = WC_SHA3_224_DIGEST_SIZE;
+        } else
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
+        if (XSTRNCMP(evp_md, "SHA3_256", 8) == 0) {
+            type = WC_SHA3_256;
+            mdlen = WC_SHA3_256_DIGEST_SIZE;
+        } else
+    #endif
+        if (XSTRNCMP(evp_md, "SHA3_384", 8) == 0) {
+            type = WC_SHA3_384;
+            mdlen = WC_SHA3_384_DIGEST_SIZE;
+        } else
+    #ifndef WOLFSSL_NOSHA3_512
+        if (XSTRNCMP(evp_md, "SHA3_512", 8) == 0) {
+            type = WC_SHA3_512;
+            mdlen = WC_SHA3_512_DIGEST_SIZE;
+        } else
+    #endif
+#endif
 #ifndef NO_SHA
         if (XSTRNCMP(evp_md, "SHA", 3) == 0) {
             type = WC_SHA;
@@ -13766,27 +16244,13 @@
     {
         WOLFSSL_ENTER("wolfSSL_ERR_clear_error");
 
-#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_NGINX)
+#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_NGINX) || \
+    defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
         wc_ClearErrorNodes();
 #endif
     }
 
 
-    /* frees all nodes in the current threads error queue
-     *
-     * id  thread id. ERR_remove_state is depriciated and id is ignored. The
-     *     current threads queue will be free'd.
-     */
-    void wolfSSL_ERR_remove_state(unsigned long id)
-    {
-        WOLFSSL_ENTER("wolfSSL_ERR_remove_state");
-        (void)id;
-        if (wc_ERR_remove_state() != 0) {
-            WOLFSSL_MSG("Error with removing the state");
-        }
-    }
-
-
     int wolfSSL_RAND_status(void)
     {
         return WOLFSSL_SUCCESS;  /* wolfCrypt provides enough seed internally */
@@ -13928,6 +16392,7 @@
                                       WOLFSSL_DES_key_schedule* ks3,
                                       WOLFSSL_DES_cblock* ivec, int enc)
     {
+        int ret;
         Des3 des;
         byte key[24];/* EDE uses 24 size key */
         byte lastblock[DES_BLOCK_SIZE];
@@ -13942,24 +16407,45 @@
         XMEMCPY(&key[DES_BLOCK_SIZE * 2], *ks3, DES_BLOCK_SIZE);
         lb_sz = sz%DES_BLOCK_SIZE;
         blk   = sz/DES_BLOCK_SIZE;
+
+        /* OpenSSL compat, no ret */
+        (void)wc_Des3Init(&des, NULL, INVALID_DEVID);
+
         if (enc) {
             wc_Des3_SetKey(&des, key, (const byte*)ivec, DES_ENCRYPTION);
-            wc_Des3_CbcEncrypt(&des, output, input, (word32)blk*DES_BLOCK_SIZE);
+            ret = wc_Des3_CbcEncrypt(&des, output, input, (word32)blk*DES_BLOCK_SIZE);
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &des.asyncDev, WC_ASYNC_FLAG_NONE);
+        #endif
+            (void)ret; /* ignore return codes for processing */
             if(lb_sz){
                 XMEMSET(lastblock, 0, DES_BLOCK_SIZE);
                 XMEMCPY(lastblock, input+sz-lb_sz, lb_sz);
-                wc_Des3_CbcEncrypt(&des, output+blk*DES_BLOCK_SIZE,
+                ret = wc_Des3_CbcEncrypt(&des, output+blk*DES_BLOCK_SIZE,
                     lastblock, (word32)DES_BLOCK_SIZE);
+            #if defined(WOLFSSL_ASYNC_CRYPT)
+                ret = wc_AsyncWait(ret, &des.asyncDev, WC_ASYNC_FLAG_NONE);
+            #endif
+                (void)ret; /* ignore return codes for processing */
             }
         }
         else {
             wc_Des3_SetKey(&des, key, (const byte*)ivec, DES_DECRYPTION);
-            wc_Des3_CbcDecrypt(&des, output, input, (word32)blk*DES_BLOCK_SIZE);
+            ret = wc_Des3_CbcDecrypt(&des, output, input, (word32)blk*DES_BLOCK_SIZE);
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &des.asyncDev, WC_ASYNC_FLAG_NONE);
+        #endif
+            (void)ret; /* ignore return codes for processing */
             if(lb_sz){
-                wc_Des3_CbcDecrypt(&des, lastblock, input+sz-lb_sz, (word32)DES_BLOCK_SIZE);
+                ret = wc_Des3_CbcDecrypt(&des, lastblock, input+sz-lb_sz, (word32)DES_BLOCK_SIZE);
+            #if defined(WOLFSSL_ASYNC_CRYPT)
+                ret = wc_AsyncWait(ret, &des.asyncDev, WC_ASYNC_FLAG_NONE);
+            #endif
+                (void)ret; /* ignore return codes for processing */
                 XMEMCPY(output+sz-lb_sz, lastblock, lb_sz);
             }
         }
+        wc_Des3Free(&des);
     }
 
 
@@ -13972,7 +16458,8 @@
         Des myDes;
         byte lastblock[DES_BLOCK_SIZE];
         int  lb_sz;
-        long  blk;
+        long idx = length;
+        long blk;
 
         WOLFSSL_ENTER("DES_ncbc_encrypt");
 
@@ -13980,40 +16467,42 @@
         wc_Des_SetKey(&myDes, (const byte*)schedule, (const byte*)ivec, !enc);
         lb_sz = length%DES_BLOCK_SIZE;
         blk   = length/DES_BLOCK_SIZE;
+        idx  -= sizeof(DES_cblock);
+        if (lb_sz) {
+            idx += DES_BLOCK_SIZE - lb_sz;
+        }
         if (enc){
-            wc_Des_CbcEncrypt(&myDes, output, input, (word32)blk*DES_BLOCK_SIZE);
-            if(lb_sz){
+            wc_Des_CbcEncrypt(&myDes, output, input,
+                    (word32)blk * DES_BLOCK_SIZE);
+            if (lb_sz){
                 XMEMSET(lastblock, 0, DES_BLOCK_SIZE);
                 XMEMCPY(lastblock, input+length-lb_sz, lb_sz);
-                wc_Des_CbcEncrypt(&myDes, output+blk*DES_BLOCK_SIZE,
+                wc_Des_CbcEncrypt(&myDes, output + blk * DES_BLOCK_SIZE,
                     lastblock, (word32)DES_BLOCK_SIZE);
             }
+            XMEMCPY(ivec, output + idx, sizeof(DES_cblock));
         } else {
-            wc_Des_CbcDecrypt(&myDes, output, input, (word32)blk*DES_BLOCK_SIZE);
-            if(lb_sz){
-                wc_Des_CbcDecrypt(&myDes, lastblock, input+length-lb_sz, (word32)DES_BLOCK_SIZE);
+            WOLFSSL_DES_cblock tmp;
+            XMEMCPY(tmp, input + idx, sizeof(DES_cblock));
+            wc_Des_CbcDecrypt(&myDes, output, input,
+                    (word32)blk * DES_BLOCK_SIZE);
+            if (lb_sz){
+                wc_Des_CbcDecrypt(&myDes, lastblock, input + length - lb_sz,
+                        (word32)DES_BLOCK_SIZE);
                 XMEMCPY(output+length-lb_sz, lastblock, lb_sz);
             }
-        }
-
-        XMEMCPY(ivec, output + length - sizeof(DES_cblock), sizeof(DES_cblock));
+            XMEMCPY(ivec, tmp, sizeof(WOLFSSL_DES_cblock));
+        }
+
     }
 
 #endif /* NO_DES3 */
 
-
     void wolfSSL_ERR_free_strings(void)
     {
         /* handled internally */
     }
 
-
-    void wolfSSL_EVP_cleanup(void)
-    {
-        /* nothing to do here */
-    }
-
-
     void wolfSSL_cleanup_all_ex_data(void)
     {
         /* nothing to do here */
@@ -14028,7 +16517,7 @@
         ssl->options.isClosed = 0;
         ssl->options.connReset = 0;
         ssl->options.sentNotify = 0;
-
+        ssl->options.sendVerify = 0;
         ssl->options.serverState = NULL_STATE;
         ssl->options.clientState = NULL_STATE;
         ssl->options.connectState = CONNECT_BEGIN;
@@ -14043,24 +16532,50 @@
         if (ssl->hsHashes != NULL) {
 #ifndef NO_OLD_TLS
 #ifndef NO_MD5
-            wc_InitMd5(&ssl->hsHashes->hashMd5);
+            if (wc_InitMd5_ex(&ssl->hsHashes->hashMd5, ssl->heap,
+                    ssl->devId) != 0) {
+                return WOLFSSL_FAILURE;
+            }
+        #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+            wc_Md5SetFlags(&ssl->hsHashes->hashMd5, WC_HASH_FLAG_WILLCOPY);
+        #endif
 #endif
 #ifndef NO_SHA
-            if (wc_InitSha(&ssl->hsHashes->hashSha) != 0)
-                return WOLFSSL_FAILURE;
+            if (wc_InitSha_ex(&ssl->hsHashes->hashSha, ssl->heap,
+                    ssl->devId) != 0) {
+                return WOLFSSL_FAILURE;
+            }
+        #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+            wc_ShaSetFlags(&ssl->hsHashes->hashSha, WC_HASH_FLAG_WILLCOPY);
+        #endif
 #endif
 #endif
 #ifndef NO_SHA256
-            if (wc_InitSha256(&ssl->hsHashes->hashSha256) != 0)
-                return WOLFSSL_FAILURE;
+            if (wc_InitSha256_ex(&ssl->hsHashes->hashSha256, ssl->heap,
+                    ssl->devId) != 0) {
+                return WOLFSSL_FAILURE;
+            }
+        #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+            wc_Sha256SetFlags(&ssl->hsHashes->hashSha256, WC_HASH_FLAG_WILLCOPY);
+        #endif
 #endif
 #ifdef WOLFSSL_SHA384
-            if (wc_InitSha384(&ssl->hsHashes->hashSha384) != 0)
-                return WOLFSSL_FAILURE;
+            if (wc_InitSha384_ex(&ssl->hsHashes->hashSha384, ssl->heap,
+                    ssl->devId) != 0) {
+                return WOLFSSL_FAILURE;
+            }
+        #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+            wc_Sha384SetFlags(&ssl->hsHashes->hashSha384, WC_HASH_FLAG_WILLCOPY);
+        #endif
 #endif
 #ifdef WOLFSSL_SHA512
-            if (wc_InitSha512(&ssl->hsHashes->hashSha512) != 0)
-                return WOLFSSL_FAILURE;
+            if (wc_InitSha512_ex(&ssl->hsHashes->hashSha512, ssl->heap,
+                    ssl->devId) != 0) {
+                return WOLFSSL_FAILURE;
+            }
+        #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+            wc_Sha512SetFlags(&ssl->hsHashes->hashSha512, WC_HASH_FLAG_WILLCOPY);
+        #endif
 #endif
         }
 #ifdef SESSION_CERTS
@@ -14087,17 +16602,35 @@
         return WOLFSSL_SUCCESS;
     }
 
-
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
     long wolfSSL_CTX_set_mode(WOLFSSL_CTX* ctx, long mode)
     {
         /* WOLFSSL_MODE_ACCEPT_MOVING_WRITE_BUFFER is wolfSSL default mode */
 
         WOLFSSL_ENTER("SSL_CTX_set_mode");
-        if (mode == SSL_MODE_ENABLE_PARTIAL_WRITE)
-            ctx->partialWrite = 1;
+        switch(mode) {
+            case SSL_MODE_ENABLE_PARTIAL_WRITE:
+                ctx->partialWrite = 1;
+                break;
+            #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+            case SSL_MODE_RELEASE_BUFFERS:
+                WOLFSSL_MSG("SSL_MODE_RELEASE_BUFFERS not implemented.");
+                break;
+            #endif
+            default:
+                WOLFSSL_MSG("Mode Not Implemented");
+        }
+
+        /* SSL_MODE_AUTO_RETRY
+         * Should not return -1 with renegotiation on read/write */
 
         return mode;
     }
+#endif
+
+#ifdef OPENSSL_EXTRA
 
     #ifndef NO_WOLFSSL_STUB
     long wolfSSL_SSL_get_mode(WOLFSSL* ssl)
@@ -14170,7 +16703,7 @@
     int wolfSSL_set_session_id_context(WOLFSSL* ssl, const unsigned char* id,
                                    unsigned int len)
     {
-        WOLFSSL_STUB("wolfSSL_set_session_id_context");
+        WOLFSSL_ENTER("wolfSSL_set_session_id_context");
 
         if (len > ID_LEN || ssl == NULL || id == NULL) {
             return SSL_FAILURE;
@@ -14186,7 +16719,7 @@
     {
         (void)ctx;
         #ifndef NO_SESSION_CACHE
-            return SESSIONS_PER_ROW * SESSION_ROWS;
+            return (long)(SESSIONS_PER_ROW * SESSION_ROWS);
         #else
             return 0;
         #endif
@@ -14222,7 +16755,8 @@
     }
 
 
-#ifdef DEBUG_WOLFSSL
+#if (defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA)) && \
+    (!defined(_WIN32) && !defined(NO_ERROR_QUEUE))
     static const char WOLFSSL_SYS_ACCEPT_T[]  = "accept";
     static const char WOLFSSL_SYS_BIND_T[]    = "bind";
     static const char WOLFSSL_SYS_CONNECT_T[] = "connect";
@@ -14273,12 +16807,18 @@
     {
         WOLFSSL_ENTER("wolfSSL_ERR_put_error");
 
-        #ifndef DEBUG_WOLFSSL
+        #if !defined(DEBUG_WOLFSSL) && !defined(OPENSSL_EXTRA)
         (void)fun;
         (void)err;
         (void)file;
         (void)line;
         WOLFSSL_MSG("Not compiled in debug mode");
+        #elif defined(OPENSSL_EXTRA) && \
+                (defined(_WIN32) || defined(NO_ERROR_QUEUE))
+        (void)fun;
+        (void)file;
+        (void)line;
+        WOLFSSL_ERROR(err);
         #else
         WOLFSSL_ERROR_LINE(err, wolfSSL_ERR_sys_func(fun), (unsigned int)line,
             file, NULL);
@@ -14288,7 +16828,7 @@
 
 
     /* Similar to wolfSSL_ERR_get_error_line but takes in a flags argument for
-     * more flexability.
+     * more flexibility.
      *
      * file  output pointer to file where error happened
      * line  output to line number of error
@@ -14338,8 +16878,8 @@
 #endif /* OPENSSL_EXTRA */
 
 
-#ifdef KEEP_PEER_CERT
-    #ifdef SESSION_CERTS
+#if (defined(KEEP_PEER_CERT) && defined(SESSION_CERTS)) || \
+                                   (defined(OPENSSL_ALL) && defined(HAVE_PKCS7))
     /* Decode the X509 DER encoded certificate into a WOLFSSL_X509 object.
      *
      * x509  WOLFSSL_X509 object to decode into.
@@ -14351,10 +16891,12 @@
     {
         int          ret;
     #ifdef WOLFSSL_SMALL_STACK
-        DecodedCert* cert = NULL;
+        DecodedCert* cert;
     #else
         DecodedCert  cert[1];
     #endif
+        if (x509 == NULL || in == NULL || len <= 0)
+            return BAD_FUNC_ARG;
 
     #ifdef WOLFSSL_SMALL_STACK
         cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
@@ -14367,7 +16909,9 @@
          */
         InitDecodedCert(cert, (byte*)in, len, NULL);
         if ((ret = ParseCertRelative(cert, CERT_TYPE, 0, NULL)) == 0) {
-            InitX509(x509, 0, NULL);
+        /* Check if x509 was not previously initialized by wolfSSL_X509_new() */
+            if (x509->dynamicMemory != TRUE)
+                InitX509(x509, 0, NULL);
             ret = CopyDecodedToX509(x509, cert);
             FreeDecodedCert(cert);
         }
@@ -14377,12 +16921,17 @@
 
         return ret;
     }
-    #endif /* SESSION_CERTS */
-
-
+#endif /* (KEEP_PEER_CERT && SESSION_CERTS) || (OPENSSL_ALL && HAVE_PKCS7) */
+
+
+#ifdef KEEP_PEER_CERT
+    WOLFSSL_ABI
     WOLFSSL_X509* wolfSSL_get_peer_certificate(WOLFSSL* ssl)
     {
         WOLFSSL_ENTER("SSL_get_peer_certificate");
+        if (ssl == NULL)
+            return NULL;
+
         if (ssl->peerCert.issuer.sz)
             return &ssl->peerCert;
 #ifdef SESSION_CERTS
@@ -14398,6 +16947,83 @@
 
 #endif /* KEEP_PEER_CERT */
 
+#if defined(SESSION_CERTS)
+/*  Return stack of peer certs.
+ *      If Qt or OPENSSL_ALL is defined then return ssl->peerCertChain.
+ *      All other cases return &ssl->session.chain
+ * ssl->peerCertChain is type WOLFSSL_STACK*
+ * ssl->session.chain is type WOLFSSL_X509_CHAIN
+ * Caller does not need to free return. The stack is Free'd when WOLFSSL* ssl is.
+ */
+WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_get_peer_cert_chain(const WOLFSSL* ssl)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_ENTER("wolfSSL_get_peer_cert_chain");
+
+    if (ssl == NULL)
+        return NULL;
+
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        if (ssl->peerCertChain == NULL)
+            wolfSSL_set_peer_cert_chain((WOLFSSL*) ssl);
+        sk = ssl->peerCertChain;
+    #else
+        sk = (WOLF_STACK_OF(WOLFSSL_X509)* )&ssl->session.chain;
+    #endif
+
+    if (sk == NULL) {
+        WOLFSSL_MSG("Error: Null Peer Cert Chain");
+    }
+    return sk;
+}
+
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+/* Builds up and creates a stack of peer certificates for ssl->peerCertChain
+    based off of the ssl session chain. Returns stack of WOLFSSL_X509 certs or
+    NULL on failure */
+WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_set_peer_cert_chain(WOLFSSL* ssl)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_X509* x509;
+    int i = 0;
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_set_peer_cert_chain");
+    if ((ssl == NULL) || (ssl->session.chain.count == 0))
+        return NULL;
+
+    sk = wolfSSL_sk_X509_new();
+    i = ssl->session.chain.count-1;
+    for (; i >= 0; i--) {
+        /* For servers, the peer certificate chain does not include the peer
+            certificate, so do not add it to the stack */
+        if (ssl->options.side == WOLFSSL_SERVER_END && i == 0)
+            continue;
+        x509 = wolfSSL_X509_new();
+        if (x509 == NULL) {
+            WOLFSSL_MSG("Error Creating X509");
+            return NULL;
+        }
+        ret = DecodeToX509(x509, ssl->session.chain.certs[i].buffer,
+                             ssl->session.chain.certs[i].length);
+
+        if (ret != 0 || wolfSSL_sk_X509_push(sk, x509) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Error decoding cert");
+            wolfSSL_X509_free(x509);
+            wolfSSL_sk_X509_free(sk);
+            return NULL;
+        }
+    }
+
+    if (sk == NULL) {
+        WOLFSSL_MSG("Null session chain");
+    }
+    /* This is Free'd when ssl is Free'd */
+    ssl->peerCertChain = sk;
+    return sk;
+}
+#endif /* OPENSSL_ALL || WOLFSSL_QT */
+#endif /* SESSION_CERTS */
 
 #ifndef NO_CERTS
 #if defined(KEEP_PEER_CERT) || defined(SESSION_CERTS) || \
@@ -14407,11 +17033,28 @@
  * don't */
 static void ExternalFreeX509(WOLFSSL_X509* x509)
 {
+    int doFree = 0;
+
     WOLFSSL_ENTER("ExternalFreeX509");
     if (x509) {
         if (x509->dynamicMemory) {
-            FreeX509(x509);
-            XFREE(x509, x509->heap, DYNAMIC_TYPE_X509);
+        #if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+            if (wc_LockMutex(&x509->refMutex) != 0) {
+                WOLFSSL_MSG("Couldn't lock x509 mutex");
+            }
+            /* only free if all references to it are done */
+            x509->refCount--;
+            if (x509->refCount == 0)
+                doFree = 1;
+            wc_UnLockMutex(&x509->refMutex);
+        #else
+            doFree = 1;
+        #endif /* OPENSSL_EXTRA */
+
+            if (doFree) {
+                FreeX509(x509);
+                XFREE(x509, x509->heap, DYNAMIC_TYPE_X509);
+            }
         } else {
             WOLFSSL_MSG("free called on non dynamic object, not freeing");
         }
@@ -14419,6 +17062,7 @@
 }
 
 /* Frees an external WOLFSSL_X509 structure */
+WOLFSSL_ABI
 void wolfSSL_X509_free(WOLFSSL_X509* x509)
 {
     WOLFSSL_ENTER("wolfSSL_FreeX509");
@@ -14428,6 +17072,7 @@
 
 /* copy name into in buffer, at most sz bytes, if buffer is null will
    malloc buffer, call responsible for freeing                     */
+WOLFSSL_ABI
 char* wolfSSL_X509_NAME_oneline(WOLFSSL_X509_NAME* name, char* in, int sz)
 {
     int copySz;
@@ -14462,6 +17107,115 @@
     return in;
 }
 
+#if defined(OPENSSL_EXTRA) && defined(XSNPRINTF)
+/* Copies X509 subject name into a buffer, with comma-separated name entries
+ *   (matching OpenSSL v1.0.0 format)
+ * Example Output for Issuer:
+ *
+ * C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting,
+ *  CN=www.wolfssl.com, emailAddress=info@wolfssl.com
+ */
+char* wolfSSL_X509_get_name_oneline(WOLFSSL_X509_NAME* name, char* in, int sz)
+{
+    WOLFSSL_X509_NAME_ENTRY* entry;
+    int nameSz, strSz, count, i, idx = 0;
+    int totalSz = 0;
+    char *str;
+    char tmpBuf[256];
+    char buf[80];
+    const char* sn;
+    WOLFSSL_ENTER("wolfSSL_X509_get_name_oneline");
+
+    if (name == NULL) {
+        WOLFSSL_MSG("wolfSSL_X509_get_subject_name failed");
+        return NULL;
+    }
+    #ifdef WOLFSSL_STATIC_MEMORY
+    if (!in) {
+        WOLFSSL_MSG("Using static memory -- please pass in a buffer");
+        return NULL;
+    }
+    #endif
+
+    tmpBuf[0] = '\0'; /* Make sure tmpBuf is NULL terminated */
+    /* Loop through X509 name entries and copy new format to buffer */
+    count = wolfSSL_X509_NAME_entry_count(name);
+    for (i = 0; i < count; i++) {
+
+        /* Get name entry and size */
+        entry = wolfSSL_X509_NAME_get_entry(name, i);
+        if (entry == NULL) {
+            WOLFSSL_MSG("X509_NAME_get_entry failed");
+            return NULL;
+        }
+        nameSz = wolfSSL_X509_NAME_get_text_by_NID(name, entry->nid, buf,
+                                                                   sizeof(buf));
+        if (nameSz < 0) {
+            WOLFSSL_MSG("X509_NAME_get_text_by_NID failed");
+            return NULL;
+        }
+
+        /* Get short name */
+        sn = wolfSSL_OBJ_nid2sn(entry->nid);
+        if (sn == NULL) {
+            WOLFSSL_MSG("OBJ_nid2sn failed");
+            return NULL;
+        }
+
+        /* Copy sn and name text to buffer
+         * Add extra strSz for '=', ',', ' ' and '\0' characters in XSNPRINTF.
+         */
+        if (i != count - 1) {
+            strSz = (int)XSTRLEN(sn) + nameSz + 4;
+            totalSz+= strSz;
+            str = (char*)XMALLOC(strSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (str == NULL) {
+                WOLFSSL_MSG("Memory error");
+                return NULL;
+            }
+            XSNPRINTF(str, strSz, "%s=%s, ", sn, buf);
+        }
+        else {
+            /* Copy last name entry
+            * Add extra strSz for '=' and '\0' characters in XSNPRINTF.
+            */
+            strSz = (int)XSTRLEN(sn) + nameSz + 2;
+            totalSz+= strSz;
+            str = (char*)XMALLOC(strSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (str == NULL) {
+                WOLFSSL_MSG("Memory error");
+                return NULL;
+            }
+            XSNPRINTF(str, strSz, "%s=%s", sn, buf);
+        }
+        /* Copy string to tmpBuf */
+        XSTRNCAT(tmpBuf, str, strSz);
+        idx += strSz;
+        XFREE(str, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    /* Allocate space based on total string size if no buffer was provided */
+    if (!in) {
+        in = (char*)XMALLOC(totalSz+1, NULL, DYNAMIC_TYPE_OPENSSL);
+        if (in == NULL) {
+            WOLFSSL_MSG("Memory error");
+            return in;
+        }
+    }
+    else {
+        if (totalSz > sz) {
+            WOLFSSL_MSG("Memory error");
+            return NULL;
+        }
+    }
+
+    XMEMCPY(in, tmpBuf, totalSz);
+    in[totalSz] = '\0';
+
+    return in;
+}
+#endif
+
 
 /* Wraps wolfSSL_X509_d2i
  *
@@ -14470,7 +17224,13 @@
 WOLFSSL_X509* wolfSSL_d2i_X509(WOLFSSL_X509** x509, const unsigned char** in,
         int len)
 {
-    return wolfSSL_X509_d2i(x509, *in, len);
+    WOLFSSL_X509* newX509 = NULL;
+
+    newX509 = wolfSSL_X509_d2i(x509, *in, len);
+    if (newX509 != NULL) {
+        *in += newX509->derCert->length;
+    }
+    return newX509;
 }
 
 
@@ -14482,7 +17242,7 @@
 
     if (in != NULL && len != 0) {
     #ifdef WOLFSSL_SMALL_STACK
-        DecodedCert* cert = NULL;
+        DecodedCert* cert;
     #else
         DecodedCert  cert[1];
     #endif
@@ -14496,12 +17256,10 @@
 
         InitDecodedCert(cert, (byte*)in, len, NULL);
         if (ParseCertRelative(cert, CERT_TYPE, 0, NULL) == 0) {
-            newX509 = (WOLFSSL_X509*)XMALLOC(sizeof(WOLFSSL_X509), NULL,
-                                             DYNAMIC_TYPE_X509);
+            newX509 = wolfSSL_X509_new();
             if (newX509 != NULL) {
-                InitX509(newX509, 1, NULL);
                 if (CopyDecodedToX509(newX509, cert) != 0) {
-                    XFREE(newX509, NULL, DYNAMIC_TYPE_X509);
+                    wolfSSL_X509_free(newX509);
                     newX509 = NULL;
                 }
             }
@@ -14520,8 +17278,12 @@
 #endif /* KEEP_PEER_CERT || SESSION_CERTS || OPENSSL_EXTRA ||
           OPENSSL_EXTRA_X509_SMALL */
 
-#if defined(OPENSSL_ALL) || defined(KEEP_PEER_CERT) || defined(SESSION_CERTS)
+
+
+#if defined(OPENSSL_ALL) || defined(KEEP_OUR_CERT) || defined(KEEP_PEER_CERT) || \
+    defined(SESSION_CERTS)
     /* return the next, if any, altname from the peer cert */
+    WOLFSSL_ABI
     char* wolfSSL_X509_get_next_altname(WOLFSSL_X509* cert)
     {
         char* ret = NULL;
@@ -14541,7 +17303,6 @@
         return ret;
     }
 
-
     int wolfSSL_X509_get_isCA(WOLFSSL_X509* x509)
     {
         int isCA = 0;
@@ -14560,7 +17321,8 @@
                                                  unsigned char* buf, int* bufSz)
     {
         WOLFSSL_ENTER("wolfSSL_X509_get_signature");
-        if (x509 == NULL || bufSz == NULL || *bufSz < (int)x509->sig.length)
+        if (x509 == NULL || bufSz == NULL || (*bufSz < (int)x509->sig.length &&
+                    buf != NULL))
             return WOLFSSL_FATAL_ERROR;
 
         if (buf != NULL)
@@ -14571,6 +17333,90 @@
     }
 
 
+    /* Getter function that copies over the DER public key buffer to "buf" and
+     * sets the size in bufSz. If "buf" is NULL then just bufSz is set to needed
+     * buffer size. "bufSz" passed in should initially be set by the user to be
+     * the size of "buf". This gets checked to make sure the buffer is large
+     * enough to hold the public key.
+     *
+     * Note: this is the X.509 form of key with "header" info.
+     * return WOLFSSL_SUCCESS on success
+     */
+    int wolfSSL_X509_get_pubkey_buffer(WOLFSSL_X509* x509,
+                                                unsigned char* buf, int* bufSz)
+    {
+    #ifdef WOLFSSL_SMALL_STACK
+        DecodedCert* cert;
+    #else
+        DecodedCert cert[1];
+    #endif
+        word32 idx;
+        const byte*  der;
+        int length = 0;
+        int    ret, derSz = 0;
+        int badDate = 0;
+        const byte* pubKeyX509 = NULL;
+        int   pubKeyX509Sz = 0;
+
+        WOLFSSL_ENTER("wolfSSL_X509_get_pubkey_buffer");
+        if (x509 == NULL || bufSz == NULL) {
+            WOLFSSL_LEAVE("wolfSSL_X509_get_pubkey_buffer", BAD_FUNC_ARG);
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+
+    #ifdef WOLFSSL_SMALL_STACK
+        cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert),
+                                       x509->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (cert == NULL) {
+            WOLFSSL_LEAVE("wolfSSL_X509_get_pubkey_buffer", MEMORY_E);
+            return WOLFSSL_FATAL_ERROR;
+        }
+    #endif
+
+        der = wolfSSL_X509_get_der(x509, &derSz);
+        InitDecodedCert(cert, der, derSz, NULL);
+        ret = wc_GetPubX509(cert, 0, &badDate);
+        if (ret >= 0) {
+            idx = cert->srcIdx;
+            pubKeyX509 = cert->source + cert->srcIdx;
+            ret = GetSequence(cert->source, &cert->srcIdx, &length,
+                    cert->maxIdx);
+            pubKeyX509Sz = length + (cert->srcIdx - idx);
+        }
+
+        FreeDecodedCert(cert);
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(cert, x509->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        if (ret < 0) {
+            WOLFSSL_LEAVE("wolfSSL_X509_get_pubkey_buffer", ret);
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        if (buf != NULL) {
+            if (pubKeyX509Sz > *bufSz) {
+                WOLFSSL_LEAVE("wolfSSL_X509_get_pubkey_buffer", BUFFER_E);
+                return WOLFSSL_FATAL_ERROR;
+            }
+            XMEMCPY(buf, pubKeyX509, pubKeyX509Sz);
+        }
+        *bufSz = pubKeyX509Sz;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+
+    /* Getter function for the public key OID value
+     * return public key OID stored in WOLFSSL_X509 structure */
+    int wolfSSL_X509_get_pubkey_type(WOLFSSL_X509* x509)
+    {
+        if (x509 == NULL)
+            return WOLFSSL_FAILURE;
+        return x509->pubKeyOID;
+    }
+
     /* write X509 serial number in unsigned binary to buffer
        buffer needs to be at least EXTERNAL_SERIAL_SIZE (32) for all cases
        return WOLFSSL_SUCCESS on success */
@@ -14588,7 +17434,7 @@
         return WOLFSSL_SUCCESS;
     }
 
-
+    /* not an openssl compatibility function - getting for derCert */
     const byte* wolfSSL_X509_get_der(WOLFSSL_X509* x509, int* outSz)
     {
         WOLFSSL_ENTER("wolfSSL_X509_get_der");
@@ -14600,6 +17446,86 @@
         return x509->derCert->buffer;
     }
 
+    /* used by JSSE (not a standard compatibility function) */
+    /* this is not thread safe */
+    WOLFSSL_ABI
+    const byte* wolfSSL_X509_notBefore(WOLFSSL_X509* x509)
+    {
+        static byte notBeforeData[CTC_DATE_SIZE]; /* temp buffer for date */
+        WOLFSSL_ENTER("wolfSSL_X509_notBefore");
+
+        if (x509 == NULL)
+            return NULL;
+
+        XMEMSET(notBeforeData, 0, sizeof(notBeforeData));
+        notBeforeData[0] = (byte)x509->notBefore.type;
+        notBeforeData[1] = (byte)x509->notBefore.length;
+        XMEMCPY(&notBeforeData[2], x509->notBefore.data, x509->notBefore.length);
+
+        return notBeforeData;
+    }
+    /* used by JSSE (not a standard compatibility function) */
+    /* this is not thread safe */
+    WOLFSSL_ABI
+    const byte* wolfSSL_X509_notAfter(WOLFSSL_X509* x509)
+    {
+        static byte notAfterData[CTC_DATE_SIZE]; /* temp buffer for date */
+        WOLFSSL_ENTER("wolfSSL_X509_notAfter");
+
+        if (x509 == NULL)
+            return NULL;
+
+        XMEMSET(notAfterData, 0, sizeof(notAfterData));
+        notAfterData[0] = (byte)x509->notAfter.type;
+        notAfterData[1] = (byte)x509->notAfter.length;
+        XMEMCPY(&notAfterData[2], x509->notAfter.data, x509->notAfter.length);
+
+        return notAfterData;
+    }
+
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) && !defined(NO_WOLFSSL_STUB)
+    WOLFSSL_ASN1_TIME* wolfSSL_X509_gmtime_adj(WOLFSSL_ASN1_TIME *s, long adj)
+    {
+        (void) s;
+        (void) adj;
+        WOLFSSL_STUB("wolfSSL_X509_gmtime_adj");
+        return NULL;
+    }
+    #endif
+
+    /* get the buffer to be signed (tbs) from the WOLFSSL_X509 certificate
+     *
+     * outSz : gets set to the size of the buffer
+     * returns a pointer to the internal buffer at the location of TBS on
+     *         on success and NULL on failure.
+     */
+    const unsigned char* wolfSSL_X509_get_tbs(WOLFSSL_X509* x509, int* outSz)
+    {
+        int sz = 0, len;
+        unsigned int idx = 0, tmpIdx;
+        const unsigned char* der = NULL;
+        const unsigned char* tbs = NULL;
+
+        if (x509 == NULL || outSz == NULL) {
+            return NULL;
+        }
+
+        der = wolfSSL_X509_get_der(x509, &sz);
+        if (der == NULL) {
+            return NULL;
+        }
+
+        if (GetSequence(der, &idx, &len, sz) < 0) {
+            return NULL;
+        }
+        tbs = der + idx;
+        tmpIdx = idx;
+        if (GetSequence(der, &idx, &len, sz) < 0) {
+            return NULL;
+        }
+        *outSz = len + (idx - tmpIdx);
+        return tbs;
+    }
 
     int wolfSSL_X509_version(WOLFSSL_X509* x509)
     {
@@ -14611,29 +17537,6 @@
         return x509->version;
     }
 
-
-    const byte* wolfSSL_X509_notBefore(WOLFSSL_X509* x509)
-    {
-        WOLFSSL_ENTER("wolfSSL_X509_notBefore");
-
-        if (x509 == NULL)
-            return NULL;
-
-        return x509->notBefore;
-    }
-
-
-    const byte* wolfSSL_X509_notAfter(WOLFSSL_X509* x509)
-    {
-        WOLFSSL_ENTER("wolfSSL_X509_notAfter");
-
-        if (x509 == NULL)
-            return NULL;
-
-        return x509->notAfter;
-    }
-
-
 #ifdef WOLFSSL_SEP
 
 /* copy oid into in buffer, at most *inOutSz bytes, if buffer is null will
@@ -14727,10 +17630,34 @@
 
 /* require OPENSSL_EXTRA since wolfSSL_X509_free is wrapped by OPENSSL_EXTRA */
 #if !defined(NO_CERTS) && defined(OPENSSL_EXTRA)
+
+WOLFSSL_ASN1_TIME* wolfSSL_X509_get_notBefore(const WOLFSSL_X509* x509)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_get_notBefore");
+
+    if (x509 == NULL)
+        return NULL;
+
+    return (WOLFSSL_ASN1_TIME*)&x509->notBefore;
+}
+
+
+WOLFSSL_ASN1_TIME* wolfSSL_X509_get_notAfter(const WOLFSSL_X509* x509)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_get_notAfter");
+
+    if (x509 == NULL)
+        return NULL;
+
+    return (WOLFSSL_ASN1_TIME*)&x509->notAfter;
+}
+
+
 /* return 1 on success 0 on fail */
 int wolfSSL_sk_X509_push(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk, WOLFSSL_X509* x509)
 {
     WOLFSSL_STACK* node;
+    WOLFSSL_ENTER("wolfSSL_sk_X509_push");
 
     if (sk == NULL || x509 == NULL) {
         return WOLFSSL_FAILURE;
@@ -14755,6 +17682,7 @@
     /* push new x509 onto head of stack */
     node->data.x509 = sk->data.x509;
     node->next      = sk->next;
+    node->type      = sk->type;
     sk->next        = node;
     sk->data.x509   = x509;
     sk->num        += 1;
@@ -14790,28 +17718,6 @@
     return x509;
 }
 
-
-/* Getter function for WOLFSSL_X509_NAME pointer
- *
- * sk is the stack to retrieve pointer from
- * i  is the index value in stack
- *
- * returns a pointer to a WOLFSSL_X509_NAME structure on success and NULL on
- *         fail
- */
-void* wolfSSL_sk_X509_NAME_value(const STACK_OF(WOLFSSL_X509_NAME)* sk, int i)
-{
-    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_value");
-
-    for (; sk != NULL && i > 0; i--)
-        sk = sk->next;
-
-    if (i != 0 || sk == NULL)
-        return NULL;
-    return sk->data.name;
-}
-
-
 /* Getter function for WOLFSSL_X509 pointer
  *
  * sk is the stack to retrieve pointer from
@@ -14820,7 +17726,7 @@
  * returns a pointer to a WOLFSSL_X509 structure on success and NULL on
  *         fail
  */
-void* wolfSSL_sk_X509_value(STACK_OF(WOLFSSL_X509)* sk, int i)
+WOLFSSL_X509* wolfSSL_sk_X509_value(STACK_OF(WOLFSSL_X509)* sk, int i)
 {
     WOLFSSL_ENTER("wolfSSL_sk_X509_value");
 
@@ -14832,6 +17738,20 @@
     return sk->data.x509;
 }
 
+WOLFSSL_X509* wolfSSL_sk_X509_shift(WOLF_STACK_OF(WOLFSSL_X509)* sk)
+{
+    return wolfSSL_sk_X509_pop(sk);
+}
+
+#ifndef NO_WOLFSSL_STUB
+void* wolfSSL_sk_X509_OBJECT_value(WOLF_STACK_OF(WOLFSSL_X509_OBJECT)* sk, int x)
+{
+    (void) sk;
+    (void) x;
+    return NULL;
+}
+#endif
+
 
 /* Free's all nodes in X509 stack. This is different then wolfSSL_sk_X509_free
  * in that it allows for choosing the function to use when freeing an X509s.
@@ -14839,7 +17759,9 @@
  * sk  stack to free nodes in
  * f   X509 free function
  */
-void wolfSSL_sk_X509_pop_free(STACK_OF(WOLFSSL_X509)* sk, void f (WOLFSSL_X509*)){
+void wolfSSL_sk_X509_pop_free(STACK_OF(WOLFSSL_X509)* sk,
+    void (*f) (WOLFSSL_X509*))
+{
     WOLFSSL_STACK* node;
 
     WOLFSSL_ENTER("wolfSSL_sk_X509_pop_free");
@@ -14850,83 +17772,316 @@
 
     /* parse through stack freeing each node */
     node = sk->next;
-    while (sk->num > 1) {
+    while (node && sk->num > 1) {
         WOLFSSL_STACK* tmp = node;
         node = node->next;
 
-        f(tmp->data.x509);
-        XFREE(tmp, NULL, DYNAMIC_TYPE_X509);
-        sk->num -= 1;
-    }
-
-    /* free head of stack */
-    if (sk->num == 1) {
-	    f(sk->data.x509);
-    }
-    XFREE(sk, NULL, DYNAMIC_TYPE_X509);
-}
-
-
-/* free structure for x509 stack */
-void wolfSSL_sk_X509_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk) {
-    WOLFSSL_STACK* node;
-
-    if (sk == NULL) {
-        return;
-    }
-
-    /* parse through stack freeing each node */
-    node = sk->next;
-    while (sk->num > 1) {
-        WOLFSSL_STACK* tmp = node;
-        node = node->next;
-
-        wolfSSL_X509_free(tmp->data.x509);
+        if (f)
+            f(tmp->data.x509);
+        else
+            wolfSSL_X509_free(tmp->data.x509);
+        tmp->data.x509 = NULL;
         XFREE(tmp, NULL, DYNAMIC_TYPE_X509);
         sk->num -= 1;
     }
 
     /* free head of stack */
     if (sk->num == 1) {
-    wolfSSL_X509_free(sk->data.x509);
+        if (f)
+            f(sk->data.x509);
+        else
+            wolfSSL_X509_free(sk->data.x509);
+        sk->data.x509 = NULL;
     }
     XFREE(sk, NULL, DYNAMIC_TYPE_X509);
 }
 
+
+/* free structure for x509 stack */
+void wolfSSL_sk_X509_free(WOLF_STACK_OF(WOLFSSL_X509)* sk)
+{
+    wolfSSL_sk_X509_pop_free(sk, NULL);
+}
+
 #endif /* NO_CERTS && OPENSSL_EXTRA */
 
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_ALL) || defined (WOLFSSL_QT)
+/* return 1 on success 0 on fail */
+int wolfSSL_sk_ACCESS_DESCRIPTION_push(WOLF_STACK_OF(ACCESS_DESCRIPTION)* sk,
+                                              WOLFSSL_ACCESS_DESCRIPTION* access)
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_sk_ACCESS_DESCRIPTION_push");
+
+    if (sk == NULL || access == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* no previous values in stack */
+    if (sk->data.access == NULL) {
+        sk->data.access = access;
+        sk->num += 1;
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* stack already has value(s) create a new node and add more */
+    node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                   DYNAMIC_TYPE_ASN1);
+    if (node == NULL) {
+        WOLFSSL_MSG("Memory error");
+        return WOLFSSL_FAILURE;
+    }
+    XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+    /* push new obj onto head of stack */
+    node->data.access  = sk->data.access;
+    node->next         = sk->next;
+    node->type         = sk->type;
+    sk->next           = node;
+    sk->data.access    = access;
+    sk->num            += 1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* Frees all nodes in ACCESS_DESCRIPTION stack
+*
+* sk stack of nodes to free
+* f  free function to use, not called with wolfSSL
+*/
+void wolfSSL_sk_ACCESS_DESCRIPTION_pop_free(WOLFSSL_STACK* sk,
+    void (*f) (WOLFSSL_ACCESS_DESCRIPTION*))
+{
+   WOLFSSL_STACK* node;
+
+   WOLFSSL_ENTER("wolfSSL_sk_ACCESS_DESCRIPTION_pop_free");
+
+   if (sk == NULL) {
+       return;
+   }
+
+   /* parse through stack freeing each node */
+   node = sk->next;
+   while (node && sk->num > 1) {
+       WOLFSSL_STACK* tmp = node;
+       node = node->next;
+
+       if (f)
+           f(tmp->data.access);
+       else
+           wolfSSL_ACCESS_DESCRIPTION_free(tmp->data.access);
+       tmp->data.access = NULL;
+       XFREE(tmp, NULL, DYNAMIC_TYPE_ASN1);
+       sk->num -= 1;
+   }
+
+   /* free head of stack */
+   if (sk->num == 1) {
+        if (f)
+            f(sk->data.access);
+        else {
+            if(sk->data.access->method) {
+
+               wolfSSL_ASN1_OBJECT_free(sk->data.access->method);
+            }
+            if(sk->data.access->location) {
+               wolfSSL_GENERAL_NAME_free(sk->data.access->location);
+            }
+        }
+        sk->data.access = NULL;
+   }
+   XFREE(sk, NULL, DYNAMIC_TYPE_ASN1);
+}
+
+void wolfSSL_sk_ACCESS_DESCRIPTION_free(WOLFSSL_STACK* sk)
+{
+    wolfSSL_sk_ACCESS_DESCRIPTION_pop_free(sk, NULL);
+}
+
+void wolfSSL_ACCESS_DESCRIPTION_free(WOLFSSL_ACCESS_DESCRIPTION* access)
+{
+    WOLFSSL_ENTER("wolfSSL_ACCESS_DESCRIPTION_free");
+    if (access == NULL)
+        return;
+
+    if (access->method)
+        wolfSSL_ASN1_OBJECT_free(access->method);
+    if (access->location)
+        wolfSSL_GENERAL_NAME_free(access->location);
+
+    /* access = NULL, don't try to access or double free it */
+}
+#endif
+
+#ifdef OPENSSL_EXTRA
+
+/* create a generic wolfSSL stack node
+ * returns a new WOLFSSL_STACK structure on success */
+WOLFSSL_STACK* wolfSSL_sk_new_node(void* heap)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_ENTER("wolfSSL_sk_new_node");
+
+    sk = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), heap,
+                                                          DYNAMIC_TYPE_OPENSSL);
+    if (sk != NULL) {
+        XMEMSET(sk, 0, sizeof(*sk));
+        sk->heap = heap;
+    }
+
+    return sk;
+}
+
+/* free's node but does not free internal data such as in->data.x509 */
+void wolfSSL_sk_free_node(WOLFSSL_STACK* in)
+{
+    if (in != NULL) {
+        XFREE(in, in->heap, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+/* pushes node "in" onto "stack" and returns pointer to the new stack on success
+ * also handles internal "num" for number of nodes on stack
+ * return WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_sk_push_node(WOLFSSL_STACK** stack, WOLFSSL_STACK* in)
+{
+    if (stack == NULL || in == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (*stack == NULL) {
+        in->num = 1;
+        *stack = in;
+        return WOLFSSL_SUCCESS;
+    }
+
+    in->num  = (*stack)->num + 1;
+    in->next = *stack;
+    *stack   = in;
+    return WOLFSSL_SUCCESS;
+}
+
+/* return 1 on success 0 on fail */
+int wolfSSL_sk_push(WOLFSSL_STACK* sk, const void *data)
+{
+    int ret = WOLFSSL_FAILURE;
+    WOLFSSL_ENTER("wolfSSL_sk_push");
+
+    switch (sk->type) {
+    #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+        case STACK_TYPE_X509:
+            ret = wolfSSL_sk_X509_push(sk, (WOLFSSL_X509*) data);
+            break;
+        #ifndef NO_WOLFSSL_STUB
+        case STACK_TYPE_CIPHER:
+            ret = wolfSSL_sk_CIPHER_push(sk, (WOLFSSL_CIPHER*) data);
+            break;
+        #endif
+        case STACK_TYPE_GEN_NAME:
+            ret = wolfSSL_sk_ASN1_OBJECT_push(sk, (WOLFSSL_ASN1_OBJECT*) data);
+            break;
+        case STACK_TYPE_ACCESS_DESCRIPTION:
+            ret = wolfSSL_sk_ACCESS_DESCRIPTION_push(sk,
+                                            (WOLFSSL_ACCESS_DESCRIPTION*) data);
+            break;
+        case STACK_TYPE_NULL:
+            ret = wolfSSL_sk_GENERIC_push(sk, (void*) data);
+            break;
+        case STACK_TYPE_OBJ:
+            ret = wolfSSL_sk_ASN1_OBJECT_push(sk, (WOLFSSL_ASN1_OBJECT*) data);
+            break;
+    #endif
+        default:
+            ret = wolfSSL_sk_ASN1_OBJECT_push(sk, (WOLFSSL_ASN1_OBJECT*) data);
+            break;
+    }
+
+    return ret;
+}
+
+/* Creates and returns new GENERAL_NAME structure */
+WOLFSSL_GENERAL_NAME* wolfSSL_GENERAL_NAME_new(void)
+{
+    WOLFSSL_GENERAL_NAME* gn;
+    WOLFSSL_ENTER("GENERAL_NAME_new");
+
+    gn = (WOLFSSL_GENERAL_NAME*)XMALLOC(sizeof(WOLFSSL_GENERAL_NAME), NULL,
+                                                             DYNAMIC_TYPE_ASN1);
+    if (gn == NULL) {
+        return NULL;
+    }
+    XMEMSET(gn, 0, sizeof(WOLFSSL_GENERAL_NAME));
+
+    gn->d.ia5 = wolfSSL_ASN1_STRING_new();
+    if (gn->d.ia5 == NULL) {
+        WOLFSSL_MSG("Issue creating ASN1_STRING struct");
+        wolfSSL_GENERAL_NAME_free(gn);
+        return NULL;
+    }
+    return gn;
+}
+
+/* return 1 on success 0 on fail */
+int wolfSSL_sk_GENERAL_NAME_push(WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)* sk,
+                                                      WOLFSSL_GENERAL_NAME* gn)
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_ENTER("wolfSSL_sk_GENERAL_NAME_push");
+
+    if (sk == NULL || gn == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* no previous values in stack */
+    if (sk->data.gn == NULL) {
+        sk->data.gn = gn;
+        sk->num += 1;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* stack already has value(s) create a new node and add more */
+    node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                                             DYNAMIC_TYPE_ASN1);
+    if (node == NULL) {
+        WOLFSSL_MSG("Memory error");
+        return WOLFSSL_FAILURE;
+    }
+    XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+    /* push new obj onto head of stack */
+    node->data.gn = sk->data.gn;
+    node->next    = sk->next;
+    sk->next      = node;
+    sk->data.gn   = gn;
+    sk->num      += 1;
+
+    return WOLFSSL_SUCCESS;
+}
 
 /* Returns the general name at index i from the stack
  *
- * sk stack to get general name from
- * i  index to get
+ * sk  stack to get general name from
+ * idx index to get
  *
  * return a pointer to the internal node of the stack
  */
-WOLFSSL_ASN1_OBJECT* wolfSSL_sk_GENERAL_NAME_value(WOLFSSL_STACK* sk, int i)
-{
-    WOLFSSL_STACK* cur;
-    int j;
-
-    WOLFSSL_ENTER("wolfSSL_sk_GENERAL_NAME_value");
-
-    if (i < 0 || sk == NULL) {
-        return NULL;
-    }
-
-    cur = sk;
-    for (j = 0; j < i && cur != NULL; j++) {
-        cur = cur->next;
-    }
-
-    if (cur == NULL) {
-        return NULL;
-    }
-
-    return cur->data.obj;
-}
-
+WOLFSSL_GENERAL_NAME* wolfSSL_sk_GENERAL_NAME_value(WOLFSSL_STACK* sk, int idx)
+{
+    WOLFSSL_STACK* ret;
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    ret = wolfSSL_sk_get_node(sk, idx);
+    if (ret != NULL) {
+        return ret->data.gn;
+    }
+    return NULL;
+}
 
 /* Gets the number of nodes in the stack
  *
@@ -14951,36 +18106,502 @@
  * f  free function to use, not called with wolfSSL
  */
 void wolfSSL_sk_GENERAL_NAME_pop_free(WOLFSSL_STACK* sk,
-        void f (WOLFSSL_ASN1_OBJECT*))
+        void (*f) (WOLFSSL_GENERAL_NAME*))
 {
     WOLFSSL_STACK* node;
 
     WOLFSSL_ENTER("wolfSSL_sk_GENERAL_NAME_pop_free");
 
-    (void)f;
     if (sk == NULL) {
         return;
     }
 
     /* parse through stack freeing each node */
     node = sk->next;
-    while (sk->num > 1) {
+    while (node && sk->num > 1) {
         WOLFSSL_STACK* tmp = node;
         node = node->next;
 
-        wolfSSL_ASN1_OBJECT_free(tmp->data.obj);
+        if (f)
+            f(tmp->data.gn);
+        else
+            wolfSSL_GENERAL_NAME_free(tmp->data.gn);
         XFREE(tmp, NULL, DYNAMIC_TYPE_ASN1);
         sk->num -= 1;
     }
 
     /* free head of stack */
     if (sk->num == 1) {
-	    wolfSSL_ASN1_OBJECT_free(sk->data.obj);
+        if (f)
+            f(sk->data.gn);
+        else
+            wolfSSL_GENERAL_NAME_free(sk->data.gn);
     }
     XFREE(sk, NULL, DYNAMIC_TYPE_ASN1);
-
-
-}
+}
+
+void wolfSSL_sk_GENERAL_NAME_free(WOLFSSL_STACK* sk)
+{
+    WOLFSSL_ENTER("sk_GENERAL_NAME_free");
+    wolfSSL_sk_GENERAL_NAME_pop_free(sk, NULL);
+}
+
+
+/* returns the number of nodes in stack on success and WOLFSSL_FATAL_ERROR
+ * on fail */
+int wolfSSL_sk_ACCESS_DESCRIPTION_num(WOLFSSL_STACK* sk)
+{
+    if (sk == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return (int)sk->num;
+}
+
+#ifndef NO_WOLFSSL_STUB
+/* similar to call to sk_ACCESS_DESCRIPTION_pop_free */
+void wolfSSL_AUTHORITY_INFO_ACCESS_free(
+        WOLF_STACK_OF(WOLFSSL_ACCESS_DESCRIPTION)* sk)
+{
+    WOLFSSL_STUB("wolfSSL_AUTHORITY_INFO_ACCESS_free");
+    (void)sk;
+}
+#endif
+
+/* returns the node at index "idx", NULL if not found */
+WOLFSSL_STACK* wolfSSL_sk_get_node(WOLFSSL_STACK* sk, int idx)
+{
+    int i;
+    WOLFSSL_STACK* ret = NULL;
+    WOLFSSL_STACK* current;
+
+    current = sk;
+    for (i = 0; i <= idx && current != NULL; i++) {
+        if (i == idx) {
+            ret = current;
+            break;
+        }
+        current = current->next;
+    }
+    return ret;
+}
+
+/* returns NULL on fail and pointer to internal data on success */
+WOLFSSL_ACCESS_DESCRIPTION* wolfSSL_sk_ACCESS_DESCRIPTION_value(
+        WOLFSSL_STACK* sk, int idx)
+{
+    WOLFSSL_STACK* ret;
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    ret = wolfSSL_sk_get_node(sk, idx);
+    if (ret != NULL) {
+        return ret->data.access;
+    }
+    return NULL;
+}
+
+/* Frees GENERAL_NAME objects.
+*/
+void wolfSSL_GENERAL_NAME_free(WOLFSSL_GENERAL_NAME* name)
+{
+    WOLFSSL_ENTER("wolfSSL_GENERAL_NAME_Free");
+    if(name != NULL) {
+        if (name->d.dNSName != NULL) {
+            wolfSSL_ASN1_STRING_free(name->d.dNSName);
+            name->d.dNSName = NULL;
+        }
+        if (name->d.uniformResourceIdentifier != NULL) {
+            wolfSSL_ASN1_STRING_free(name->d.uniformResourceIdentifier);
+            name->d.uniformResourceIdentifier = NULL;
+        }
+        if (name->d.iPAddress != NULL) {
+            wolfSSL_ASN1_STRING_free(name->d.iPAddress);
+            name->d.iPAddress = NULL;
+        }
+        if (name->d.registeredID != NULL) {
+            wolfSSL_ASN1_OBJECT_free(name->d.registeredID);
+            name->d.registeredID = NULL;
+        }
+        if (name->d.ia5 != NULL) {
+            wolfSSL_ASN1_STRING_free(name->d.ia5);
+            name->d.ia5 = NULL;
+        }
+        XFREE(name, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+void wolfSSL_GENERAL_NAMES_free(WOLFSSL_GENERAL_NAMES *gens)
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_GENERAL_NAMES_free");
+
+    if (gens == NULL) {
+        return;
+    }
+
+    /* parse through stack freeing each node */
+    node = gens->next;
+    while (gens->num > 1) {
+        WOLFSSL_STACK* tmp = node;
+        node = node->next;
+
+        wolfSSL_ASN1_OBJECT_free(tmp->data.obj);
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ASN1);
+        gens->num -= 1;
+    }
+
+    /* free head of stack */
+    if (gens->num == 1) {
+        wolfSSL_ASN1_OBJECT_free(gens->data.obj);
+    }
+    XFREE(gens, NULL, DYNAMIC_TYPE_ASN1);
+}
+
+#if defined(OPENSSL_ALL)
+WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* wolfSSL_sk_X509_EXTENSION_new_null(void)
+{
+    WOLFSSL_STACK* sk = wolfSSL_sk_new_node(NULL);
+    if (sk) {
+        sk->type = STACK_TYPE_X509_EXT;
+    }
+
+    return (WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)*)sk;;
+}
+
+/* returns the number of nodes on the stack */
+int wolfSSL_sk_X509_EXTENSION_num(WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* sk)
+{
+    if (sk != NULL) {
+        return (int)sk->num;
+    }
+    return WOLFSSL_FATAL_ERROR;
+}
+
+
+/* returns null on failure and pointer to internal value on success */
+WOLFSSL_X509_EXTENSION* wolfSSL_sk_X509_EXTENSION_value(
+        WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* sk, int idx)
+{
+    WOLFSSL_STACK* ret;
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    ret = wolfSSL_sk_get_node(sk, idx);
+    if (ret != NULL) {
+        return ret->data.ext;
+    }
+    return NULL;
+}
+
+/* frees all of the nodes and the values in stack */
+void wolfSSL_sk_X509_EXTENSION_pop_free(
+        WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* sk,
+        void (*f) (WOLFSSL_X509_EXTENSION*))
+{
+    WOLFSSL_STACK* current;
+
+    if (sk == NULL) {
+        return;
+    }
+
+    current = sk;
+    while (current != NULL) {
+        WOLFSSL_STACK* toFree = current;
+        current = current->next;
+
+        if (f)
+            f(toFree->data.ext);
+        wolfSSL_sk_free_node(toFree);
+    }
+}
+
+#if defined(HAVE_ECC)
+/* Copies ecc_key into new WOLFSSL_EC_KEY object
+ *
+ * src  : EC_KEY to duplicate. If EC_KEY is not null, create new EC_KEY and copy
+ * internal ecc_key from src to dup.
+ *
+ * Returns pointer to duplicate EC_KEY.
+ */
+WOLFSSL_EC_KEY *wolfSSL_EC_KEY_dup(const WOLFSSL_EC_KEY *src)
+{
+    WOLFSSL_EC_KEY *dup;
+    ecc_key *key, *srcKey;
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_EC_KEY_dup");
+
+    if (src == NULL || src->internal == NULL || src->group == NULL || \
+       src->pub_key == NULL || src->priv_key == NULL) {
+
+        WOLFSSL_MSG("src NULL error");
+        return NULL;
+    }
+
+    dup = wolfSSL_EC_KEY_new();
+    if (dup == NULL) {
+        WOLFSSL_MSG("wolfSSL_EC_KEY_new error");
+        return NULL;
+    }
+
+    key = (ecc_key*)dup->internal;
+    if (key == NULL) {
+        WOLFSSL_MSG("ecc_key NULL error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+    srcKey = (ecc_key*)src->internal;
+
+    /* ecc_key */
+    /* copy pubkey */
+    ret = wc_ecc_copy_point(&srcKey->pubkey, &key->pubkey);
+    if (ret != MP_OKAY) {
+        WOLFSSL_MSG("wc_ecc_copy_point error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    /* copy private key k */
+    ret = mp_copy(&srcKey->k, &key->k);
+    if (ret != MP_OKAY) {
+        WOLFSSL_MSG("mp_copy error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    /* copy domain parameters */
+    if (srcKey->dp) {
+        ret = wc_ecc_set_curve(key, 0, srcKey->dp->id);
+        if (ret != 0) {
+            WOLFSSL_MSG("wc_ecc_set_curve error");
+            return NULL;
+        }
+    }
+
+    key->type  = srcKey->type;
+    key->idx   = srcKey->idx;
+    key->state = srcKey->state;
+    key->flags = srcKey->flags;
+
+    /* Copy group */
+    if (dup->group == NULL) {
+        WOLFSSL_MSG("EC_GROUP_new_by_curve_name error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    dup->group->curve_idx = src->group->curve_idx;
+    dup->group->curve_nid = src->group->curve_nid;
+    dup->group->curve_oid = src->group->curve_oid;
+
+    /* Copy public key */
+    if (src->pub_key->internal == NULL || dup->pub_key->internal == NULL) {
+        WOLFSSL_MSG("NULL pub_key error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    /* Copy public key internal */
+    ret = wc_ecc_copy_point((ecc_point*)src->pub_key->internal, \
+                            (ecc_point*)dup->pub_key->internal);
+    if (ret != MP_OKAY) {
+        WOLFSSL_MSG("ecc_copy_point error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    /* Copy X, Y, Z */
+    dup->pub_key->X = wolfSSL_BN_dup(src->pub_key->X);
+    if (!dup->pub_key->X && src->pub_key->X) {
+        WOLFSSL_MSG("Error copying EC_POINT");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+    dup->pub_key->Y = wolfSSL_BN_dup(src->pub_key->Y);
+    if (!dup->pub_key->Y && src->pub_key->Y) {
+        WOLFSSL_MSG("Error copying EC_POINT");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+    dup->pub_key->Z = wolfSSL_BN_dup(src->pub_key->Z);
+    if (!dup->pub_key->Z && src->pub_key->Z) {
+        WOLFSSL_MSG("Error copying EC_POINT");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    dup->pub_key->inSet = src->pub_key->inSet;
+    dup->pub_key->exSet = src->pub_key->exSet;
+
+    /* Copy private key */
+    if (src->priv_key->internal == NULL || dup->priv_key->internal == NULL) {
+        WOLFSSL_MSG("NULL priv_key error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    /* Free priv_key before call to dup function */
+    wolfSSL_BN_free(dup->priv_key);
+    dup->priv_key = wolfSSL_BN_dup(src->priv_key);
+    if (dup->priv_key == NULL) {
+        WOLFSSL_MSG("BN_dup error");
+        wolfSSL_EC_KEY_free(dup);
+        return NULL;
+    }
+
+    return dup;
+
+}
+#endif /* HAVE_ECC */
+
+#if !defined(NO_DH)
+int wolfSSL_DH_check(const WOLFSSL_DH *dh, int *codes)
+{
+    int isPrime = MP_NO, codeTmp = 0;
+    WC_RNG rng;
+
+    WOLFSSL_ENTER("wolfSSL_DH_check");
+    if (dh == NULL){
+        return WOLFSSL_FAILURE;
+    }
+
+    if (dh->g == NULL || dh->g->internal == NULL){
+        codeTmp = DH_NOT_SUITABLE_GENERATOR;
+    }
+
+    if (dh->p == NULL || dh->p->internal == NULL){
+        codeTmp = DH_CHECK_P_NOT_PRIME;
+    }
+    else
+    {
+        /* test if dh->p has prime */
+        if (wc_InitRng(&rng) == 0){
+            mp_prime_is_prime_ex((mp_int*)dh->p->internal,8,&isPrime,&rng);
+        }
+        else {
+            WOLFSSL_MSG("Error initializing rng\n");
+            return WOLFSSL_FAILURE;
+        }
+        wc_FreeRng(&rng);
+        if (isPrime != MP_YES){
+            codeTmp = DH_CHECK_P_NOT_PRIME;
+        }
+    }
+    /* User may choose to enter NULL for codes if they don't want to check it*/
+    if (codes != NULL){
+        *codes = codeTmp;
+    }
+
+    /* if codeTmp was set,some check was flagged invalid */
+    if (codeTmp){
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* Converts DER encoded DH parameters to a WOLFSSL_DH structure.
+ *
+ * dh   : structure to copy DH parameters into.
+ * pp   : DER encoded DH parameters
+ * length   : length to copy
+ *
+ * Returns pointer to WOLFSSL_DH structure on success, or NULL on failure
+ */
+WOLFSSL_DH *wolfSSL_d2i_DHparams(WOLFSSL_DH **dh, const unsigned char **pp,
+                                                                    long length)
+{
+    WOLFSSL_DH *newDH = NULL;
+    int ret;
+    word32 idx = 0;
+
+    WOLFSSL_ENTER("wolfSSL_d2i_DHparams");
+
+    if (pp == NULL || length <= 0) {
+        WOLFSSL_MSG("bad argument");
+        return NULL;
+    }
+
+    if ((newDH = wolfSSL_DH_new()) == NULL) {
+        WOLFSSL_MSG("wolfSSL_DH_new() failed");
+        return NULL;
+    }
+
+    ret = wc_DhKeyDecode(*pp, &idx, (DhKey*)newDH->internal, (word32)length);
+    if (ret != 0) {
+        WOLFSSL_MSG("DhKeyDecode() failed");
+        wolfSSL_DH_free(newDH);
+        return NULL;
+    }
+    newDH->inSet = 1;
+
+    if (SetDhExternal(newDH) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetDhExternal failed");
+        wolfSSL_DH_free(newDH);
+        return NULL;
+    }
+
+    *pp += length;
+    if (dh != NULL){
+        *dh = newDH;
+    }
+
+    return newDH;
+}
+
+/* Converts internal WOLFSSL_DH structure to DER encoded DH.
+ *
+ * dh   : structure to copy DH parameters from.
+ * out  : DER buffer for DH parameters
+ *
+ * Returns size of DER on success and WOLFSSL_FAILURE if error
+ */
+int wolfSSL_i2d_DHparams(const WOLFSSL_DH *dh, unsigned char **out)
+{
+    word32 len;
+    int ret = 0;
+
+    WOLFSSL_ENTER("wolfSSL_i2d_DHparams");
+
+    if (dh == NULL) {
+        WOLFSSL_MSG("Bad parameters");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Get total length */
+    len = 2 + mp_leading_bit((mp_int*)dh->p->internal) +
+              mp_unsigned_bin_size((mp_int*)dh->p->internal) +
+          2 + mp_leading_bit((mp_int*)dh->g->internal) +
+              mp_unsigned_bin_size((mp_int*)dh->g->internal);
+
+    /* Two bytes required for length if ASN.1 SEQ data greater than 127 bytes
+     * and less than 256 bytes.
+     */
+    len = ((len > 127) ? 2 : 1) + len;
+
+    if (out != NULL && *out != NULL) {
+        ret = StoreDHparams(*out, &len, (mp_int*)dh->p->internal,
+                                        (mp_int*)dh->g->internal);
+        if (ret != MP_OKAY) {
+            WOLFSSL_MSG("StoreDHparams error");
+            len = 0;
+        }
+        else{
+            *out += len;
+        }
+    }
+    return (int)len;
+}
+#endif /* !NO_DH */
+
+#endif /* OPENSSL_ALL */
+
 #endif /* OPENSSL_EXTRA */
 
 #ifndef NO_FILESYSTEM
@@ -14997,12 +18618,13 @@
         byte* fileBuffer = NULL;
         long sz = 0;
 
-        XFSEEK(file, 0, XSEEK_END);
+        if (XFSEEK(file, 0, XSEEK_END) != 0)
+            return NULL;
         sz = XFTELL(file);
         XREWIND(file);
 
-        if (sz < 0) {
-            WOLFSSL_MSG("Bad tell on FILE");
+        if (sz > MAX_WOLFSSL_FILE_SIZE || sz < 0) {
+            WOLFSSL_MSG("X509_d2i file size error");
             return NULL;
         }
 
@@ -15024,6 +18646,7 @@
 
 #endif /* NO_STDIO_FILESYSTEM */
 
+WOLFSSL_ABI
 WOLFSSL_X509* wolfSSL_X509_load_certificate_file(const char* fname, int format)
 {
 #ifdef WOLFSSL_SMALL_STACK
@@ -15048,10 +18671,19 @@
     if (file == XBADFILE)
         return NULL;
 
-    XFSEEK(file, 0, XSEEK_END);
+    if (XFSEEK(file, 0, XSEEK_END) != 0){
+        XFCLOSE(file);
+        return NULL;
+    }
     sz = XFTELL(file);
     XREWIND(file);
 
+    if (sz > MAX_WOLFSSL_FILE_SIZE || sz < 0) {
+        WOLFSSL_MSG("X509_load_certificate_file size error");
+        XFCLOSE(file);
+        return NULL;
+    }
+
     if (sz > (long)sizeof(staticBuffer)) {
         fileBuffer = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE);
         if (fileBuffer == NULL) {
@@ -15060,10 +18692,6 @@
         }
         dynamic = 1;
     }
-    else if (sz < 0) {
-        XFCLOSE(file);
-        return NULL;
-    }
 
     ret = (int)XFREAD(fileBuffer, 1, sz, file);
     if (ret != sz) {
@@ -15115,7 +18743,7 @@
     /* ready to be decoded. */
     if (der != NULL && der->buffer != NULL) {
     #ifdef WOLFSSL_SMALL_STACK
-        DecodedCert* cert = NULL;
+        DecodedCert* cert;
     #else
         DecodedCert  cert[1];
     #endif
@@ -15133,7 +18761,7 @@
                 if (x509 != NULL) {
                     InitX509(x509, 1, NULL);
                     if (CopyDecodedToX509(x509, cert) != 0) {
-                        XFREE(x509, NULL, DYNAMIC_TYPE_X509);
+                        wolfSSL_X509_free(x509);
                         x509 = NULL;
                     }
                 }
@@ -15155,7 +18783,8 @@
 
 /* OPENSSL_EXTRA is needed for wolfSSL_X509_d21 function
    KEEP_OUR_CERT is to insure ability for returning ssl certificate */
-#if defined(OPENSSL_EXTRA) && defined(KEEP_OUR_CERT)
+#if (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)) && \
+    defined(KEEP_OUR_CERT)
 WOLFSSL_X509* wolfSSL_get_certificate(WOLFSSL* ssl)
 {
     if (ssl == NULL) {
@@ -15168,9 +18797,11 @@
                 WOLFSSL_MSG("Certificate buffer not set!");
                 return NULL;
             }
+            #ifndef WOLFSSL_X509_STORE_CERTS
             ssl->ourCert = wolfSSL_X509_d2i(NULL,
                                               ssl->buffers.certificate->buffer,
                                               ssl->buffers.certificate->length);
+            #endif
         }
         return ssl->ourCert;
     }
@@ -15181,9 +18812,11 @@
                     WOLFSSL_MSG("Ctx Certificate buffer not set!");
                     return NULL;
                 }
+                #ifndef WOLFSSL_X509_STORE_CERTS
                 ssl->ctx->ourCert = wolfSSL_X509_d2i(NULL,
                                                ssl->ctx->certificate->buffer,
                                                ssl->ctx->certificate->length);
+                #endif
                 ssl->ctx->ownOurCert = 1;
             }
             return ssl->ctx->ourCert;
@@ -15196,13 +18829,75 @@
 #endif /* NO_CERTS */
 
 
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+#ifndef NO_ASN
+void wolfSSL_ASN1_OBJECT_free(WOLFSSL_ASN1_OBJECT* obj)
+{
+    if (obj == NULL) {
+        return;
+    }
+    if ((obj->obj != NULL) && ((obj->dynamic & WOLFSSL_ASN1_DYNAMIC_DATA) != 0)) {
+        WOLFSSL_MSG("Freeing ASN1 data");
+        XFREE((void*)obj->obj, obj->heap, DYNAMIC_TYPE_ASN1);
+        obj->obj = NULL;
+    }
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    if (obj->pathlen != NULL) {
+        wolfSSL_ASN1_INTEGER_free(obj->pathlen);
+        obj->pathlen = NULL;
+    }
+    #endif
+    if ((obj->dynamic & WOLFSSL_ASN1_DYNAMIC) != 0) {
+        WOLFSSL_MSG("Freeing ASN1 OBJECT");
+        XFREE(obj, NULL, DYNAMIC_TYPE_ASN1);
+    }
+}
+#endif /* NO_ASN */
+#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
+
+#ifdef OPENSSL_EXTRA
+#ifndef NO_ASN
+WOLFSSL_ASN1_OBJECT* wolfSSL_ASN1_OBJECT_new(void)
+{
+    WOLFSSL_ASN1_OBJECT* obj;
+
+    obj = (WOLFSSL_ASN1_OBJECT*)XMALLOC(sizeof(WOLFSSL_ASN1_OBJECT), NULL,
+                                        DYNAMIC_TYPE_ASN1);
+    if (obj == NULL) {
+        return NULL;
+    }
+
+    XMEMSET(obj, 0, sizeof(WOLFSSL_ASN1_OBJECT));
+    obj->d.ia5 = &(obj->d.ia5_internal);
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    obj->d.iPAddress = &(obj->d.iPAddress_internal);
+#endif
+    obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+    return obj;
+}
+
+/* Creates and returns a new WOLFSSL_CIPHER stack. */
+WOLFSSL_STACK* wolfSSL_sk_new_asn1_obj(void)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_ENTER("wolfSSL_sk_new_asn1_obj");
+
+    sk = wolfSSL_sk_new_null();
+    if (sk == NULL)
+        return NULL;
+    sk->type = STACK_TYPE_OBJ;
+
+    return sk;
+}
+
 /* return 1 on success 0 on fail */
-int wolfSSL_sk_ASN1_OBJECT_push(WOLF_STACK_OF(WOLFSSL_ASN1_OBJEXT)* sk,
-                                                      WOLFSSL_ASN1_OBJECT* obj)
+int wolfSSL_sk_ASN1_OBJECT_push(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk,
+                                              WOLFSSL_ASN1_OBJECT* obj)
 {
     WOLFSSL_STACK* node;
 
+    WOLFSSL_ENTER("wolfSSL_sk_ASN1_OBJECT_push");
+
     if (sk == NULL || obj == NULL) {
         return WOLFSSL_FAILURE;
     }
@@ -15224,21 +18919,22 @@
     XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
 
     /* push new obj onto head of stack */
-    node->data.obj = sk->data.obj;
+    node->data.obj  = sk->data.obj;
     node->next      = sk->next;
+    node->type      = sk->type;
     sk->next        = node;
-    sk->data.obj   = obj;
+    sk->data.obj    = obj;
     sk->num        += 1;
 
     return WOLFSSL_SUCCESS;
 }
 
 
-WOLFSSL_ASN1_OBJECT* wolfSSL_sk_ASN1_OBJCET_pop(
-                                            WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk)
+WOLFSSL_ASN1_OBJECT* wolfSSL_sk_ASN1_OBJECT_pop(
+                                        WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk)
 {
     WOLFSSL_STACK* node;
-    WOLFSSL_ASN1_OBJECT*  obj;
+    WOLFSSL_ASN1_OBJECT* obj;
 
     if (sk == NULL) {
         return NULL;
@@ -15264,63 +18960,56 @@
 }
 
 
-#ifndef NO_ASN
-WOLFSSL_ASN1_OBJECT* wolfSSL_ASN1_OBJECT_new(void)
-{
-    WOLFSSL_ASN1_OBJECT* obj;
-
-    obj = (WOLFSSL_ASN1_OBJECT*)XMALLOC(sizeof(WOLFSSL_ASN1_OBJECT), NULL,
-                                        DYNAMIC_TYPE_ASN1);
-    if (obj == NULL) {
-        return NULL;
-    }
-
-    XMEMSET(obj, 0, sizeof(WOLFSSL_ASN1_OBJECT));
-    obj->d.ia5 = &(obj->d.ia5_internal);
-    return obj;
-}
-
-
-void wolfSSL_ASN1_OBJECT_free(WOLFSSL_ASN1_OBJECT* obj)
-{
-    if (obj == NULL) {
-        return;
-    }
-
-    if (obj->dynamic == 1) {
-        if (obj->obj != NULL) {
-            WOLFSSL_MSG("Freeing ASN1 OBJECT data");
-            XFREE(obj->obj, obj->heap, DYNAMIC_TYPE_ASN1);
-        }
-    }
-
-    XFREE(obj, NULL, DYNAMIC_TYPE_ASN1);
-}
-
-
-/* free structure for x509 stack */
+/* Free the structure for ASN1_OBJECT stack
+ *
+ * sk  stack to free nodes in
+ */
 void wolfSSL_sk_ASN1_OBJECT_free(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk)
 {
+    wolfSSL_sk_ASN1_OBJECT_pop_free(sk, NULL);
+}
+
+/* Free's all nodes in ASN1_OBJECT stack.
+ * This is different then wolfSSL_ASN1_OBJECT_free in that it allows for
+ * choosing the function to use when freeing an ASN1_OBJECT stack.
+ *
+ * sk  stack to free nodes in
+ * f   X509 free function
+ */
+void wolfSSL_sk_ASN1_OBJECT_pop_free(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk,
+                                     void (*f) (WOLFSSL_ASN1_OBJECT*))
+{
     WOLFSSL_STACK* node;
 
+    WOLFSSL_ENTER("wolfSSL_sk_ASN1_OBJECT_pop_free");
+
     if (sk == NULL) {
+        WOLFSSL_MSG("Parameter error");
         return;
     }
 
     /* parse through stack freeing each node */
     node = sk->next;
-    while (sk->num > 1) {
+    while (node && sk->num > 1) {
         WOLFSSL_STACK* tmp = node;
         node = node->next;
 
-        wolfSSL_ASN1_OBJECT_free(tmp->data.obj);
+        if (f)
+            f(tmp->data.obj);
+        else
+            wolfSSL_ASN1_OBJECT_free(tmp->data.obj);
+        tmp->data.obj = NULL;
         XFREE(tmp, NULL, DYNAMIC_TYPE_ASN1);
         sk->num -= 1;
     }
 
     /* free head of stack */
     if (sk->num == 1) {
-        wolfSSL_ASN1_OBJECT_free(sk->data.obj);
+        if (f)
+            f(sk->data.obj);
+        else
+            wolfSSL_ASN1_OBJECT_free(sk->data.obj);
+        sk->data.obj = NULL;
     }
     XFREE(sk, NULL, DYNAMIC_TYPE_ASN1);
 }
@@ -15333,18 +19022,76 @@
        The length of out is returned or a negative error code.
        The buffer *out should be free using OPENSSL_free().
        */
-    (void)out;
-    (void)in;
-    WOLFSSL_STUB("ASN1_STRING_to_UTF8");
-    return -1;
+    unsigned char* buf;
+    unsigned char* inPtr;
+    int inLen;
+
+    if (!out || !in) {
+        return -1;
+    }
+
+    inPtr = wolfSSL_ASN1_STRING_data(in);
+    inLen = wolfSSL_ASN1_STRING_length(in);
+    if (!inPtr || inLen < 0) {
+        return -1;
+    }
+    buf = (unsigned char*)XMALLOC(inLen + 1, NULL, DYNAMIC_TYPE_OPENSSL);
+    if (!buf) {
+        return -1;
+    }
+    XMEMCPY(buf, inPtr, inLen + 1);
+    *out = buf;
+    return inLen;
+}
+
+/* Returns string representation of ASN1_STRING */
+char* wolfSSL_i2s_ASN1_STRING(WOLFSSL_v3_ext_method *method,
+    const WOLFSSL_ASN1_STRING *s)
+{
+    int i;
+    int tmpSz = 100;
+    int valSz = 5;
+    char* tmp;
+    char val[5];
+    unsigned char* str;
+
+    WOLFSSL_ENTER("wolfSSL_i2s_ASN1_STRING");
+    (void)method;
+
+    if(s == NULL || s->data == NULL) {
+        WOLFSSL_MSG("Bad Function Argument");
+        return NULL;
+    }
+    str = (unsigned char*)XMALLOC(s->length, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (str == NULL) {
+        WOLFSSL_MSG("Memory Error");
+        return NULL;
+    }
+    XMEMCPY(str, (unsigned char*)s->data, s->length);
+
+    tmp = (char*)XMALLOC(tmpSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (tmp == NULL) {
+        WOLFSSL_MSG("Memory Error");
+        XFREE(str, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        return NULL;
+    }
+    XMEMSET(tmp, 0, tmpSz);
+
+    for (i = 0; i < tmpSz && i < (s->length - 1); i++) {
+        XSNPRINTF(val, valSz - 1, "%02X:", str[i]);
+        XSTRNCAT(tmp, val, valSz);
+    }
+    XSNPRINTF(val, valSz - 1, "%02X", str[i]);
+    XSTRNCAT(tmp, val, valSz);
+    XFREE(str, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return tmp;
 }
 #endif /* NO_ASN */
 
 void wolfSSL_set_connect_state(WOLFSSL* ssl)
 {
-    word16 haveRSA = 1;
-    word16 havePSK = 0;
-
+    WOLFSSL_ENTER("wolfSSL_set_connect_state");
     if (ssl == NULL) {
         WOLFSSL_MSG("WOLFSSL struct pointer passed in was null");
         return;
@@ -15364,44 +19111,92 @@
     ssl->buffers.serverDH_G.buffer = NULL;
     #endif
 
-    if (ssl->options.side == WOLFSSL_SERVER_END) {
-        #ifdef NO_RSA
-            haveRSA = 0;
-        #endif
-        #ifndef NO_PSK
-            havePSK = ssl->options.havePSK;
-        #endif
-        InitSuites(ssl->suites, ssl->version, ssl->buffers.keySz, haveRSA,
-                   havePSK, ssl->options.haveDH, ssl->options.haveNTRU,
-                   ssl->options.haveECDSAsig, ssl->options.haveECC,
-                   ssl->options.haveStaticECC, WOLFSSL_CLIENT_END);
-    }
-    ssl->options.side = WOLFSSL_CLIENT_END;
-}
-#endif /* OPENSSL_EXTRA || WOLFSSL_EXTRA */
+    if (InitSSL_Side(ssl, WOLFSSL_CLIENT_END) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("Error initializing client side");
+    }
+}
+#endif /* OPENSSL_EXTRA */
 
 
 int wolfSSL_get_shutdown(const WOLFSSL* ssl)
 {
+    int isShutdown = 0;
+
     WOLFSSL_ENTER("wolfSSL_get_shutdown");
-    /* in OpenSSL, WOLFSSL_SENT_SHUTDOWN = 1, when closeNotifySent   *
-     * WOLFSSL_RECEIVED_SHUTDOWN = 2, from close notify or fatal err */
-    return ((ssl->options.closeNotify||ssl->options.connReset) << 1)
-            | (ssl->options.sentNotify);
+
+    if (ssl) {
+        /* in OpenSSL, WOLFSSL_SENT_SHUTDOWN = 1, when closeNotifySent   *
+         * WOLFSSL_RECEIVED_SHUTDOWN = 2, from close notify or fatal err */
+        isShutdown = ((ssl->options.closeNotify||ssl->options.connReset) << 1)
+                    | (ssl->options.sentNotify);
+    }
+    return isShutdown;
 }
 
 
 int wolfSSL_session_reused(WOLFSSL* ssl)
 {
-    return ssl->options.resuming;
+    int resuming = 0;
+    if (ssl)
+        resuming = ssl->options.resuming;
+    return resuming;
 }
 
 #if defined(OPENSSL_EXTRA) || defined(HAVE_EXT_CACHE)
+WOLFSSL_SESSION* wolfSSL_SESSION_dup(WOLFSSL_SESSION* session)
+{
+#ifdef HAVE_EXT_CACHE
+    WOLFSSL_SESSION* copy;
+
+    WOLFSSL_ENTER("wolfSSL_SESSION_dup");
+
+    if (session == NULL)
+        return NULL;
+#ifdef HAVE_SESSION_TICKET
+    if (session->isDynamic && !session->ticket) {
+        WOLFSSL_MSG("Session dynamic flag is set but ticket pointer is null");
+        return NULL;
+    }
+#endif
+
+    copy = (WOLFSSL_SESSION*)XMALLOC(sizeof(WOLFSSL_SESSION), NULL,
+            DYNAMIC_TYPE_OPENSSL);
+    if (copy != NULL) {
+        XMEMCPY(copy, session, sizeof(WOLFSSL_SESSION));
+        copy->isAlloced = 1;
+#ifdef HAVE_SESSION_TICKET
+        if (session->isDynamic) {
+            copy->ticket = (byte*)XMALLOC(session->ticketLen, NULL,
+                                                    DYNAMIC_TYPE_SESSION_TICK);
+            XMEMCPY(copy->ticket, session->ticket, session->ticketLen);
+        } else {
+            copy->ticket = copy->staticTicket;
+        }
+#endif
+#if defined(SESSION_CERTS) && defined(OPENSSL_EXTRA)
+        copy->peer = wolfSSL_X509_dup(session->peer);
+#endif
+    }
+    return copy;
+#else
+    WOLFSSL_MSG("wolfSSL_SESSION_dup was called "
+                "but HAVE_EXT_CACHE is not defined");
+    (void)session;
+    return NULL;
+#endif /* HAVE_EXT_CACHE */
+}
+
 void wolfSSL_SESSION_free(WOLFSSL_SESSION* session)
 {
     if (session == NULL)
         return;
 
+#if defined(SESSION_CERTS) && defined(OPENSSL_EXTRA)
+    if (session->peer) {
+        wolfSSL_X509_free(session->peer);
+    }
+#endif
+
 #ifdef HAVE_EXT_CACHE
     if (session->isAlloced) {
     #ifdef HAVE_SESSION_TICKET
@@ -15417,11 +19212,18 @@
 }
 #endif
 
-const char* wolfSSL_get_version(WOLFSSL* ssl)
-{
-    WOLFSSL_ENTER("SSL_get_version");
-    if (ssl->version.major == SSLv3_MAJOR) {
-        switch (ssl->version.minor) {
+
+/* helper function that takes in a protocol version struct and returns string */
+static const char* wolfSSL_internal_get_version(ProtocolVersion* version)
+{
+    WOLFSSL_ENTER("wolfSSL_get_version");
+
+    if (version == NULL) {
+        return "Bad arg";
+    }
+
+    if (version->major == SSLv3_MAJOR) {
+        switch (version->minor) {
         #ifndef NO_OLD_TLS
             #ifdef WOLFSSL_ALLOW_SSLV3
             case SSLv3_MINOR :
@@ -15438,8 +19240,7 @@
                 return "TLSv1.2";
         #ifdef WOLFSSL_TLS13
             case TLSv1_3_MINOR :
-            /* TODO: [TLS13] Remove draft versions. */
-            #ifndef WOLFSSL_TLS13_FINAL
+            #ifdef WOLFSSL_TLS13_DRAFT
                 #ifdef WOLFSSL_TLS13_DRAFT_18
                     return "TLSv1.3 (Draft 18)";
                 #elif defined(WOLFSSL_TLS13_DRAFT_22)
@@ -15460,8 +19261,8 @@
         }
     }
 #ifdef WOLFSSL_DTLS
-    else if (ssl->version.major == DTLS_MAJOR) {
-        switch (ssl->version.minor) {
+    else if (version->major == DTLS_MAJOR) {
+        switch (version->minor) {
             case DTLS_MINOR :
                 return "DTLS";
             case DTLSv1_2_MINOR :
@@ -15475,6 +19276,17 @@
 }
 
 
+const char* wolfSSL_get_version(WOLFSSL* ssl)
+{
+    if (ssl == NULL) {
+        WOLFSSL_MSG("Bad argument");
+        return "unknown";
+    }
+
+    return wolfSSL_internal_get_version(&ssl->version);
+}
+
+
 /* current library version */
 const char* wolfSSL_lib_version(void)
 {
@@ -15500,8 +19312,11 @@
 WOLFSSL_CIPHER* wolfSSL_get_current_cipher(WOLFSSL* ssl)
 {
     WOLFSSL_ENTER("SSL_get_current_cipher");
-    if (ssl)
+    if (ssl) {
+        ssl->cipher.cipherSuite0 = ssl->options.cipherSuite0;
+        ssl->cipher.cipherSuite  = ssl->options.cipherSuite;
         return &ssl->cipher;
+    }
     else
         return NULL;
 }
@@ -15509,13 +19324,30 @@
 
 const char* wolfSSL_CIPHER_get_name(const WOLFSSL_CIPHER* cipher)
 {
-    WOLFSSL_ENTER("SSL_CIPHER_get_name");
+    WOLFSSL_ENTER("wolfSSL_CIPHER_get_name");
+
+    if (cipher == NULL) {
+        return NULL;
+    }
+
+    #if !defined(WOLFSSL_CIPHER_INTERNALNAME) && !defined(NO_ERROR_STRINGS) && \
+        !defined(WOLFSSL_QT)
+        return GetCipherNameIana(cipher->cipherSuite0, cipher->cipherSuite);
+    #else
+        return wolfSSL_get_cipher_name_from_suite(cipher->cipherSuite0,
+                cipher->cipherSuite);
+    #endif
+}
+
+const char*  wolfSSL_CIPHER_get_version(const WOLFSSL_CIPHER* cipher)
+{
+    WOLFSSL_ENTER("SSL_CIPHER_get_version");
 
     if (cipher == NULL || cipher->ssl == NULL) {
         return NULL;
     }
 
-    return wolfSSL_get_cipher_name_iana(cipher->ssl);
+    return wolfSSL_get_version(cipher->ssl);
 }
 
 const char* wolfSSL_SESSION_CIPHER_get_name(WOLFSSL_SESSION* session)
@@ -15524,8 +19356,13 @@
         return NULL;
     }
 
-#ifdef SESSION_CERTS
-    return GetCipherNameIana(session->cipherSuite0, session->cipherSuite);
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
+    #if !defined(WOLFSSL_CIPHER_INTERNALNAME) && !defined(NO_ERROR_STRINGS)
+        return GetCipherNameIana(session->cipherSuite0, session->cipherSuite);
+    #else
+        return GetCipherNameInternal(session->cipherSuite0, session->cipherSuite);
+    #endif
 #else
     return NULL;
 #endif
@@ -15550,8 +19387,135 @@
     return GetCipherNameInternal(cipherSuite0, cipherSuite);
 }
 
-
-#ifdef HAVE_ECC
+const char* wolfSSL_get_cipher_name_iana_from_suite(const byte cipherSuite0,
+        const byte cipherSuite)
+{
+    return GetCipherNameIana(cipherSuite0, cipherSuite);
+}
+
+
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+/* Creates and returns a new WOLFSSL_CIPHER stack. */
+WOLFSSL_STACK* wolfSSL_sk_new_cipher(void)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_ENTER("wolfSSL_sk_new_cipher");
+
+    sk = wolfSSL_sk_new_null();
+    if (sk == NULL)
+        return NULL;
+    sk->type = STACK_TYPE_CIPHER;
+
+    return sk;
+}
+
+#ifndef NO_WOLFSSL_STUB
+/* Keep as stubs for now */
+/* return 1 on success 0 on fail */
+int wolfSSL_sk_CIPHER_push(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk,
+                                                      WOLFSSL_CIPHER* cipher)
+{
+    WOLFSSL_STUB("wolfSSL_sk_CIPHER_push");
+    (void)sk;
+    (void)cipher;
+    return 0;
+}
+
+
+WOLFSSL_CIPHER* wolfSSL_sk_CIPHER_pop(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk)
+{
+    WOLFSSL_STUB("wolfSSL_sk_CIPHER_pop");
+    (void)sk;
+    return NULL;
+}
+#endif /* NO_WOLFSSL_STUB */
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+
+word32 wolfSSL_CIPHER_get_id(const WOLFSSL_CIPHER* cipher)
+{
+    word16 cipher_id = 0;
+
+    WOLFSSL_ENTER("SSL_CIPHER_get_id");
+
+    if (cipher && cipher->ssl) {
+        cipher_id = (cipher->ssl->options.cipherSuite0 << 8) |
+                     cipher->ssl->options.cipherSuite;
+    }
+
+    return cipher_id;
+}
+
+const WOLFSSL_CIPHER* wolfSSL_get_cipher_by_value(word16 value)
+{
+    const WOLFSSL_CIPHER* cipher = NULL;
+    byte cipherSuite0, cipherSuite;
+    WOLFSSL_ENTER("SSL_get_cipher_by_value");
+
+    /* extract cipher id information */
+    cipherSuite =   (value       & 0xFF);
+    cipherSuite0 = ((value >> 8) & 0xFF);
+
+    /* TODO: lookup by cipherSuite0 / cipherSuite */
+    (void)cipherSuite0;
+    (void)cipherSuite;
+
+    return cipher;
+}
+
+
+#if defined(OPENSSL_ALL)
+/* Free the structure for WOLFSSL_CIPHER stack
+ *
+ * sk  stack to free nodes in
+ */
+void wolfSSL_sk_CIPHER_free(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk)
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_STACK* tmp;
+    WOLFSSL_ENTER("wolfSSL_sk_CIPHER_free");
+
+    if (sk == NULL)
+        return;
+
+    /* parse through stack freeing each node */
+    node = sk->next;
+    while (node) {
+        tmp  = node;
+        node = node->next;
+        XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+
+    /* free head of stack */
+    XFREE(sk, NULL, DYNAMIC_TYPE_ASN1);
+}
+#endif
+
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448) || \
+                                                                 !defined(NO_DH)
+#ifdef HAVE_FFDHE
+static const char* wolfssl_ffdhe_name(word16 group)
+{
+    const char* str = NULL;
+    switch (group) {
+        case WOLFSSL_FFDHE_2048:
+            str = "FFDHE_2048";
+            break;
+        case WOLFSSL_FFDHE_3072:
+            str = "FFDHE_3072";
+            break;
+        case WOLFSSL_FFDHE_4096:
+            str = "FFDHE_4096";
+            break;
+        case WOLFSSL_FFDHE_6144:
+            str = "FFDHE_6144";
+            break;
+        case WOLFSSL_FFDHE_8192:
+            str = "FFDHE_8192";
+            break;
+    }
+    return str;
+}
+#endif
 /* Return the name of the curve used for key exchange as a printable string.
  *
  * ssl  The SSL/TLS object.
@@ -15559,16 +19523,37 @@
  */
 const char* wolfSSL_get_curve_name(WOLFSSL* ssl)
 {
-    if (ssl == NULL)
-        return NULL;
-    if (!IsAtLeastTLSv1_3(ssl->version) && ssl->specs.kea != ecdhe_psk_kea &&
-            ssl->specs.kea != ecc_diffie_hellman_kea)
-        return NULL;
-    if (ssl->ecdhCurveOID == 0)
-        return NULL;
-    if (ssl->ecdhCurveOID == ECC_X25519_OID)
-        return "X25519";
-    return wc_ecc_get_name(wc_ecc_get_oid(ssl->ecdhCurveOID, NULL, NULL));
+    const char* cName = NULL;
+
+    if (ssl == NULL)
+        return NULL;
+
+#ifdef HAVE_FFDHE
+    if (ssl->namedGroup != 0) {
+        cName = wolfssl_ffdhe_name(ssl->namedGroup);
+    }
+#endif
+
+#ifdef HAVE_CURVE25519
+    if (ssl->ecdhCurveOID == ECC_X25519_OID && cName == NULL) {
+        cName = "X25519";
+    }
+#endif
+
+#ifdef HAVE_CURVE448
+    if (ssl->ecdhCurveOID == ECC_X448_OID && cName == NULL) {
+        cName = "X448";
+    }
+#endif
+
+#ifdef HAVE_ECC
+    if (ssl->ecdhCurveOID != 0 && cName == NULL) {
+        cName = wc_ecc_get_name(wc_ecc_get_oid(ssl->ecdhCurveOID, NULL,
+                                NULL));
+    }
+#endif
+
+    return cName;
 }
 #endif
 
@@ -15582,7 +19567,7 @@
 /* returns a pointer to a new WOLFSSL_X509 structure on success and NULL on
  * fail
  */
-WOLFSSL_X509* wolfSSL_X509_new()
+WOLFSSL_X509* wolfSSL_X509_new(void)
 {
     WOLFSSL_X509* x509;
 
@@ -15595,6 +19580,7 @@
     return x509;
 }
 
+WOLFSSL_ABI
 WOLFSSL_X509_NAME* wolfSSL_X509_get_subject_name(WOLFSSL_X509* cert)
 {
     WOLFSSL_ENTER("wolfSSL_X509_get_subject_name");
@@ -15603,8 +19589,50 @@
     return NULL;
 }
 
-
-
+#if defined(OPENSSL_EXTRA) && !defined(NO_SHA)
+/******************************************************************************
+* wolfSSL_X509_subject_name_hash - compute the hash digest of the raw subject name
+*
+* RETURNS:
+* The beginning of the hash digest. Otherwise, returns zero.
+* Note:
+* Returns a different hash value from OpenSSL's X509_subject_name_hash() API
+* depending on the subject name.
+*/
+unsigned long wolfSSL_X509_subject_name_hash(const WOLFSSL_X509* x509)
+{
+    word32 ret = 0;
+    int retHash;
+    WOLFSSL_X509_NAME *subjectName = NULL;
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+        byte digest[PIC32_DIGEST_SIZE];
+#else
+        byte digest[WC_SHA_DIGEST_SIZE];
+#endif
+
+    if (x509 == NULL){
+        return WOLFSSL_FAILURE;
+    }
+
+    subjectName = wolfSSL_X509_get_subject_name((WOLFSSL_X509*)x509);
+
+    if (subjectName != NULL){
+        retHash = wc_ShaHash((const byte*)subjectName->name,
+                             (word32)subjectName->sz, digest);
+
+        if(retHash != 0){
+            WOLFSSL_MSG("Hash of X509 subjectName has failed");
+            return WOLFSSL_FAILURE;
+        }
+        ret = MakeWordFromHash(digest);
+    }
+
+    return (unsigned long)ret;
+}
+#endif
+
+WOLFSSL_ABI
 WOLFSSL_X509_NAME* wolfSSL_X509_get_issuer_name(WOLFSSL_X509* cert)
 {
     WOLFSSL_ENTER("X509_get_issuer_name");
@@ -15627,9 +19655,6 @@
 }
 
 #if defined(OPENSSL_EXTRA_X509_SMALL)
-#ifdef HAVE_ECC
-    static int SetECKeyExternal(WOLFSSL_EC_KEY* eckey);
-#endif
 
 /* Used to get a string from the WOLFSSL_X509_NAME structure that
  * corresponds with the NID value passed in.
@@ -15690,9 +19715,19 @@
             text = name->fullName.fullName + name->fullName.dcIdx[0];
             textSz = name->fullName.dcLen[0];
             break;
+        case NID_emailAddress:
+            text = name->fullName.fullName + name->fullName.emailIdx;
+            textSz = name->fullName.emailLen;
+            break;
+    #ifdef WOLFSSL_CERT_EXT
+        case ASN_BUS_CAT:
+            text = name->fullName.fullName + name->fullName.bcIdx;
+            textSz = name->fullName.bcLen;
+            break;
+    #endif
         default:
             WOLFSSL_MSG("Entry type not found");
-            return SSL_FATAL_ERROR;
+            return WOLFSSL_FATAL_ERROR;
     }
 
     /* if buf is NULL return size of buffer needed (minus null char) */
@@ -15712,7 +19747,6 @@
     return (textSz - 1); /* do not include null character in size */
 }
 
-
 /* Creates a new WOLFSSL_EVP_PKEY structure that has the public key from x509
  *
  * returns a pointer to the created WOLFSSL_EVP_PKEY on success and NULL on fail
@@ -15722,14 +19756,14 @@
     WOLFSSL_EVP_PKEY* key = NULL;
     WOLFSSL_ENTER("X509_get_pubkey");
     if (x509 != NULL) {
-        key = (WOLFSSL_EVP_PKEY*)XMALLOC(
-                    sizeof(WOLFSSL_EVP_PKEY), x509->heap,
-                                                       DYNAMIC_TYPE_PUBLIC_KEY);
+        key = wolfSSL_EVP_PKEY_new_ex(x509->heap);
         if (key != NULL) {
-            XMEMSET(key, 0, sizeof(WOLFSSL_EVP_PKEY));
             if (x509->pubKeyOID == RSAk) {
                 key->type = EVP_PKEY_RSA;
             }
+            else if (x509->pubKeyOID == DSAk) {
+                key->type = EVP_PKEY_DSA;
+            }
             else {
                 key->type = EVP_PKEY_EC;
             }
@@ -15738,7 +19772,7 @@
                         x509->pubKey.length, x509->heap,
                                                        DYNAMIC_TYPE_PUBLIC_KEY);
             if (key->pkey.ptr == NULL) {
-                XFREE(key, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+                wolfSSL_EVP_PKEY_free(key);
                 return NULL;
             }
             XMEMCPY(key->pkey.ptr, x509->pubKey.buffer, x509->pubKey.length);
@@ -15754,29 +19788,28 @@
                 key->ownRsa = 1;
                 key->rsa = wolfSSL_RSA_new();
                 if (key->rsa == NULL) {
-                    XFREE(key, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+                    wolfSSL_EVP_PKEY_free(key);
                     return NULL;
                 }
 
                 if (wolfSSL_RSA_LoadDer_ex(key->rsa,
                             (const unsigned char*)key->pkey.ptr, key->pkey_sz,
                             WOLFSSL_RSA_LOAD_PUBLIC) != SSL_SUCCESS) {
-                    wolfSSL_RSA_free(key->rsa);
-                    XFREE(key, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+                    wolfSSL_EVP_PKEY_free(key);
                     return NULL;
                 }
             }
             #endif /* NO_RSA */
 
             /* decode ECC key */
-            #ifdef HAVE_ECC
+            #if defined(HAVE_ECC) && defined(OPENSSL_EXTRA)
             if (key->type == EVP_PKEY_EC) {
                 word32 idx = 0;
 
                 key->ownEcc = 1;
                 key->ecc = wolfSSL_EC_KEY_new();
                 if (key->ecc == NULL || key->ecc->internal == NULL) {
-                    XFREE(key, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+                    wolfSSL_EVP_PKEY_free(key);
                     return NULL;
                 }
 
@@ -15785,21 +19818,39 @@
                 if (wc_EccPublicKeyDecode((const unsigned char*)key->pkey.ptr,
                         &idx, (ecc_key*)key->ecc->internal, key->pkey_sz) < 0) {
                     WOLFSSL_MSG("wc_EccPublicKeyDecode failed");
-                    XFREE(key, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    wolfSSL_EC_KEY_free(key->ecc);
+                    wolfSSL_EVP_PKEY_free(key);
                     return NULL;
                 }
 
                 if (SetECKeyExternal(key->ecc) != SSL_SUCCESS) {
                     WOLFSSL_MSG("SetECKeyExternal failed");
-                    XFREE(key, x509->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-                    wolfSSL_EC_KEY_free(key->ecc);
+                    wolfSSL_EVP_PKEY_free(key);
                     return NULL;
                 }
 
                 key->ecc->inSet = 1;
             }
             #endif /* HAVE_ECC */
+
+            #ifndef NO_DSA
+            if (key->type == EVP_PKEY_DSA) {
+                key->ownDsa = 1;
+                key->dsa = wolfSSL_DSA_new();
+                if (key->dsa == NULL) {
+                    wolfSSL_EVP_PKEY_free(key);
+                    return NULL;
+                }
+
+                if (wolfSSL_DSA_LoadDer_ex(key->dsa,
+                            (const unsigned char*)key->pkey.ptr, key->pkey_sz, \
+                            WOLFSSL_DSA_LOAD_PUBLIC) != SSL_SUCCESS) {
+                    wolfSSL_DSA_free(key->dsa);
+                    key->dsa = NULL;
+                    wolfSSL_EVP_PKEY_free(key);
+                    return NULL;
+                }
+            }
+            #endif /* NO_DSA */
         }
     }
     return key;
@@ -15811,6 +19862,77 @@
  * size of this subset and its memory usage */
 #endif /* OPENSSL_EXTRA_X509_SMALL */
 
+#if defined(OPENSSL_ALL)
+/* Takes two WOLFSSL_X509* certificates and performs a Sha hash of each, if the
+   * has values are the same, then it will do an XMEMCMP to confirm they are
+   * identical. Returns a 0 when certificates match, returns a negative number
+   * when certificates are not a match.
+*/
+int wolfSSL_X509_cmp(const WOLFSSL_X509 *a, const WOLFSSL_X509 *b)
+{
+        const byte* derA;
+        const byte* derB;
+        int retHashA;
+        int retHashB;
+        int outSzA = 0;
+        int outSzB = 0;
+
+        #ifdef WOLFSSL_PIC32MZ_HASH
+            byte digestA[PIC32_DIGEST_SIZE];
+            byte digestB[PIC32_DIGEST_SIZE];
+        #else
+            byte digestA[WC_SHA_DIGEST_SIZE];
+            byte digestB[WC_SHA_DIGEST_SIZE];
+        #endif
+
+        if (a == NULL || b == NULL){
+            return BAD_FUNC_ARG;
+        }
+
+        derA = wolfSSL_X509_get_der((WOLFSSL_X509*)a, &outSzA);
+        if(derA == NULL){
+            WOLFSSL_MSG("wolfSSL_X509_get_der - certificate A has failed");
+            return WOLFSSL_FATAL_ERROR;
+        }
+        derB = wolfSSL_X509_get_der((WOLFSSL_X509*)b, &outSzB);
+        if(derB == NULL){
+            WOLFSSL_MSG("wolfSSL_X509_get_der - certificate B has failed");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        retHashA = wc_ShaHash(derA, (word32)outSzA, digestA);
+        if(retHashA != 0){
+            WOLFSSL_MSG("Hash of certificate A has failed");
+            return WOLFSSL_FATAL_ERROR;
+        }
+        retHashB = wc_ShaHash(derB, (word32)outSzB, digestB);
+        if(retHashB != 0){
+            WOLFSSL_MSG("Hash of certificate B has failed");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        if (outSzA == outSzB){
+            #ifdef WOLFSSL_PIC32MZ_HASH
+            if(XMEMCMP(digestA, digestB, PIC32_DIGEST_SIZE) != 0){
+                return WOLFSSL_FATAL_ERROR;
+            }
+            #else
+            if(XMEMCMP(digestA, digestB, WC_SHA_DIGEST_SIZE) != 0){
+                return WOLFSSL_FATAL_ERROR;
+            }
+            #endif
+            else{
+                WOLFSSL_LEAVE("wolfSSL_X509_cmp", 0);
+                return 0;
+            }
+        }
+        else{
+            WOLFSSL_LEAVE("wolfSSL_X509_cmp", WOLFSSL_FATAL_ERROR);
+            return WOLFSSL_FATAL_ERROR;
+        }
+    }
+#endif
+
 #if defined(OPENSSL_EXTRA)
 #if !defined(NO_CERTS)
     int wolfSSL_X509_ext_isSet_by_NID(WOLFSSL_X509* x509, int nid)
@@ -15826,9 +19948,15 @@
                 case AUTH_KEY_OID: isSet = x509->authKeyIdSet; break;
                 case SUBJ_KEY_OID: isSet = x509->subjKeyIdSet; break;
                 case KEY_USAGE_OID: isSet = x509->keyUsageSet; break;
-                #ifdef WOLFSSL_SEP
+                case CRL_DIST_OID: isSet = x509->CRLdistSet; break;
+                case EXT_KEY_USAGE_OID: isSet = ((x509->extKeyUsageSrc) ? 1 : 0);
+                    break;
+                case AUTH_INFO_OID: isSet = x509->authInfoSet; break;
+                #if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
                     case CERT_POLICY_OID: isSet = x509->certPolicySet; break;
-                #endif /* WOLFSSL_SEP */
+                #endif /* WOLFSSL_SEP || WOLFSSL_QT */
+                default:
+                    WOLFSSL_MSG("NID not in table");
             }
         }
 
@@ -15851,9 +19979,10 @@
                 case AUTH_KEY_OID: crit = x509->authKeyIdCrit; break;
                 case SUBJ_KEY_OID: crit = x509->subjKeyIdCrit; break;
                 case KEY_USAGE_OID: crit = x509->keyUsageCrit; break;
-                #ifdef WOLFSSL_SEP
+                case CRL_DIST_OID: crit= x509->CRLdistCrit; break;
+                #if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
                     case CERT_POLICY_OID: crit = x509->certPolicyCrit; break;
-                #endif /* WOLFSSL_SEP */
+                #endif /* WOLFSSL_SEP || WOLFSSL_QT */
             }
         }
 
@@ -15971,7 +20100,7 @@
         WOLFSSL_ENTER("wolfSSL_X509_NAME_entry_count");
 
         if (name != NULL)
-            count = name->fullName.entryCount;
+            count = name->fullName.locSz;
 
         WOLFSSL_LEAVE("wolfSSL_X509_NAME_entry_count", count);
         return count;
@@ -15982,43 +20111,30 @@
     int wolfSSL_X509_NAME_get_index_by_NID(WOLFSSL_X509_NAME* name,
                                           int nid, int pos)
     {
-        int ret    = -1;
+        int value = nid, i;
 
         WOLFSSL_ENTER("wolfSSL_X509_NAME_get_index_by_NID");
 
-        if (name == NULL) {
+        if (name == NULL || pos >= DN_NAMES_MAX + DOMAIN_COMPONENT_MAX) {
             return BAD_FUNC_ARG;
         }
 
-        /* these index values are already stored in DecodedName
-           use those when available */
-        if (name->fullName.fullName && name->fullName.fullNameLen > 0) {
-            name->fullName.dcMode = 0;
-            switch (nid) {
-                case ASN_COMMON_NAME:
-                    if (pos != name->fullName.cnIdx)
-                        ret = name->fullName.cnIdx;
-                    break;
-                case ASN_DOMAIN_COMPONENT:
-                    name->fullName.dcMode = 1;
-                    if (pos < name->fullName.dcNum - 1){
-                        ret = pos + 1;
-                    } else {
-                        ret = -1;
-                    }
-                    break;
-                default:
-                    WOLFSSL_MSG("NID not yet implemented");
-                    break;
-            }
-        }
-
-        WOLFSSL_LEAVE("wolfSSL_X509_NAME_get_index_by_NID", ret);
-
-        (void)pos;
-        (void)nid;
-
-        return ret;
+        if (value == NID_emailAddress) {
+            value = ASN_EMAIL_NAME;
+        }
+
+        i = pos + 1; /* start search after index passed in */
+        if (i < 0) {
+            i = 0;
+        }
+
+        for (;i < name->fullName.locSz &&
+                i < DN_NAMES_MAX + DOMAIN_COMPONENT_MAX; i++) {
+            if (name->fullName.loc[i] == value) {
+                return i;
+            }
+        }
+        return WOLFSSL_FATAL_ERROR;
     }
 
 
@@ -16026,6 +20142,9 @@
                                                     WOLFSSL_X509_NAME_ENTRY* in)
     {
         WOLFSSL_ENTER("wolfSSL_X509_NAME_ENTRY_get_data");
+        if (in == NULL)
+            return NULL;
+
         return in->value;
     }
 
@@ -16034,7 +20153,7 @@
      *
      * returns a pointer to the new structure created on success or NULL if fail
      */
-    WOLFSSL_ASN1_STRING* wolfSSL_ASN1_STRING_new()
+    WOLFSSL_ASN1_STRING* wolfSSL_ASN1_STRING_new(void)
     {
         WOLFSSL_ASN1_STRING* asn1;
 
@@ -16056,7 +20175,7 @@
         WOLFSSL_ENTER("wolfSSL_ASN1_STRING_free");
 
         if (asn1 != NULL) {
-            if (asn1->length > 0 && asn1->data != NULL) {
+            if (asn1->length > 0 && asn1->data != NULL && asn1->isDynamic) {
                 XFREE(asn1->data, NULL, DYNAMIC_TYPE_OPENSSL);
             }
             XFREE(asn1, NULL, DYNAMIC_TYPE_OPENSSL);
@@ -16086,8 +20205,27 @@
     }
 
 
+/******************************************************************************
+* wolfSSL_ASN1_STRING_type - returns the type of <asn1>
+*
+* RETURNS:
+* returns the type set for <asn1>. Otherwise, returns WOLFSSL_FAILURE.
+*/
+    int wolfSSL_ASN1_STRING_type(const WOLFSSL_ASN1_STRING* asn1)
+    {
+
+        WOLFSSL_ENTER("wolfSSL_ASN1_STRING_type");
+
+        if (asn1 == NULL) {
+            return WOLFSSL_FAILURE;
+        }
+
+        return asn1->type;
+    }
+
     /* if dataSz is negative then use XSTRLEN to find length of data
      * return WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure */
+    /* `data` can be NULL and only buffer will be allocated */
     int wolfSSL_ASN1_STRING_set(WOLFSSL_ASN1_STRING* asn1, const void* data,
             int dataSz)
     {
@@ -16095,7 +20233,7 @@
 
         WOLFSSL_ENTER("wolfSSL_ASN1_STRING_set");
 
-        if (data == NULL || asn1 == NULL) {
+        if (asn1 == NULL || (data == NULL && dataSz < 0)) {
             return WOLFSSL_FAILURE;
         }
 
@@ -16111,16 +20249,28 @@
         }
 
         /* free any existing data before copying */
-        if (asn1->data != NULL) {
+        if (asn1->data != NULL && asn1->isDynamic) {
             XFREE(asn1->data, NULL, DYNAMIC_TYPE_OPENSSL);
-        }
-
-        /* create new data buffer and copy over */
-        asn1->data = (char*)XMALLOC(sz, NULL, DYNAMIC_TYPE_OPENSSL);
-        if (asn1->data == NULL) {
-            return WOLFSSL_FAILURE;
-        }
-        XMEMCPY(asn1->data, data, sz);
+            asn1->data = NULL;
+        }
+
+        if (sz > CTC_NAME_SIZE) {
+            /* create new data buffer and copy over */
+            asn1->data = (char*)XMALLOC(sz, NULL, DYNAMIC_TYPE_OPENSSL);
+            if (asn1->data == NULL) {
+                return WOLFSSL_FAILURE;
+            }
+            asn1->isDynamic = 1;
+        }
+        else {
+            XMEMSET(asn1->strData, 0, CTC_NAME_SIZE);
+            asn1->data = asn1->strData;
+            asn1->isDynamic = 0;
+        }
+        if (data != NULL) {
+            XMEMCPY(asn1->data, data, sz);
+            asn1->data[sz] = '\0';
+        }
         asn1->length = sz;
 
         return WOLFSSL_SUCCESS;
@@ -16152,6 +20302,17 @@
         }
     }
 
+#ifndef NO_WOLFSSL_STUB
+    WOLFSSL_ASN1_STRING* wolfSSL_d2i_DISPLAYTEXT(WOLFSSL_ASN1_STRING **asn,
+                                             const unsigned char **in, long len)
+    {
+        WOLFSSL_STUB("d2i_DISPLAYTEXT");
+        (void)asn;
+        (void)in;
+        (void)len;
+        return NULL;
+    }
+#endif
 
 #ifdef XSNPRINTF /* a snprintf function needs to be available */
     /* Writes the human readable form of x509 to bio.
@@ -16161,38 +20322,45 @@
      *
      * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
      */
-    int wolfSSL_X509_print(WOLFSSL_BIO* bio, WOLFSSL_X509* x509)
-    {
-        WOLFSSL_ENTER("wolfSSL_X509_print");
+    int wolfSSL_X509_print_ex(WOLFSSL_BIO* bio, WOLFSSL_X509* x509,
+        unsigned long nmflags, unsigned long cflag)
+    {
+        WOLFSSL_ENTER("wolfSSL_X509_print_ex");
+
+    #ifndef NO_WOLFSSL_STUB
+        /* flags currently not supported */
+        (void)nmflags;
+        (void)cflag;
+    #endif
 
         if (bio == NULL || x509 == NULL) {
             return WOLFSSL_FAILURE;
         }
 
-        if (wolfSSL_BIO_write(bio, "Certificate:\n", sizeof("Certificate:\n"))
-            <= 0) {
-                return WOLFSSL_FAILURE;
-        }
-
-        if (wolfSSL_BIO_write(bio, "    Data:\n", sizeof("    Data:\n"))
-            <= 0) {
+        if (wolfSSL_BIO_write(bio, "Certificate:\n",
+                      (int)XSTRLEN("Certificate:\n")) <= 0) {
+                return WOLFSSL_FAILURE;
+        }
+
+        if (wolfSSL_BIO_write(bio, "    Data:\n",
+                      (int)XSTRLEN("    Data:\n")) <= 0) {
                 return WOLFSSL_FAILURE;
         }
 
         /* print version of cert */
         {
             int version;
-            char tmp[17];
-
-            if ((version = wolfSSL_X509_version(x509)) <= 0) {
+            char tmp[20];
+
+            if ((version = wolfSSL_X509_version(x509)) < 0) {
                 WOLFSSL_MSG("Error getting X509 version");
                 return WOLFSSL_FAILURE;
             }
-            if (wolfSSL_BIO_write(bio, "        Version: ",
-                                sizeof("        Version: ")) <= 0) {
-                return WOLFSSL_FAILURE;
-            }
-	        XSNPRINTF(tmp, sizeof(tmp), "%d\n", version);
+            if (wolfSSL_BIO_write(bio, "        Version:",
+                          (int)XSTRLEN("        Version:")) <= 0) {
+                return WOLFSSL_FAILURE;
+            }
+            XSNPRINTF(tmp, sizeof(tmp), " %d (0x%x)\n", version, (byte)version-1);
             if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
                 return WOLFSSL_FAILURE;
             }
@@ -16209,15 +20377,15 @@
                 WOLFSSL_MSG("Error getting x509 serial number");
                 return WOLFSSL_FAILURE;
             }
-            if (wolfSSL_BIO_write(bio, "        Serial Number: ",
-                                sizeof("        Serial Number: ")) <= 0) {
+            if (wolfSSL_BIO_write(bio, "        Serial Number:",
+                          (int)XSTRLEN("        Serial Number:")) <= 0) {
                 return WOLFSSL_FAILURE;
             }
 
             /* if serial can fit into byte than print on the same line */
             if (sz <= (int)sizeof(byte)) {
                 char tmp[17];
-                XSNPRINTF(tmp, sizeof(tmp), "%d (0x%x)\n", serial[0],serial[0]);
+                XSNPRINTF(tmp, sizeof(tmp), " %d (0x%x)\n", serial[0],serial[0]);
                 if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
                     return WOLFSSL_FAILURE;
                 }
@@ -16231,7 +20399,7 @@
 
                 /* serial is larger than int size so print off hex values */
                 if (wolfSSL_BIO_write(bio, "\n            ",
-                                sizeof("\n            ")) <= 0) {
+                              (int)XSTRLEN("\n            ")) <= 0) {
                     return WOLFSSL_FAILURE;
                 }
                 tmp[0] = '\0';
@@ -16251,22 +20419,22 @@
 
         /* print signature algo */
         {
-            int   oid;
-            char* sig;
+            int oid;
+            const char* sig;
 
             if ((oid = wolfSSL_X509_get_signature_type(x509)) <= 0) {
                 WOLFSSL_MSG("Error getting x509 signature type");
                 return WOLFSSL_FAILURE;
             }
-            if (wolfSSL_BIO_write(bio, "    Signature Algorithm: ",
-                                sizeof("    Signature Algorithm: ")) <= 0) {
+            if (wolfSSL_BIO_write(bio, "        Signature Algorithm: ",
+                          (int)XSTRLEN("        Signature Algorithm: ")) <= 0) {
                 return WOLFSSL_FAILURE;
             }
             sig = GetSigName(oid);
             if (wolfSSL_BIO_write(bio, sig, (int)XSTRLEN(sig)) <= 0) {
                 return WOLFSSL_FAILURE;
             }
-            if (wolfSSL_BIO_write(bio, "\n", sizeof("\n")) <= 0) {
+            if (wolfSSL_BIO_write(bio, "\n", (int)XSTRLEN("\n")) <= 0) {
                 return WOLFSSL_FAILURE;
             }
         }
@@ -16282,11 +20450,16 @@
             int  issSz = 256;
         #endif
 
-            issuer  = wolfSSL_X509_NAME_oneline(
-                             wolfSSL_X509_get_issuer_name(x509), buff, issSz);
+        #if defined(WOLFSSL_QT)
+            issuer = wolfSSL_X509_get_name_oneline(
+                               wolfSSL_X509_get_issuer_name(x509), buff, issSz);
+        #else
+            issuer = wolfSSL_X509_NAME_oneline(
+                               wolfSSL_X509_get_issuer_name(x509), buff, issSz);
+        #endif
 
             if (wolfSSL_BIO_write(bio, "        Issuer: ",
-                                sizeof("        Issuer: ")) <= 0) {
+                          (int)XSTRLEN("        Issuer: ")) <= 0) {
                 #ifdef WOLFSSL_SMALL_STACK
                 XFREE(issuer, NULL, DYNAMIC_TYPE_OPENSSL);
                 #endif
@@ -16303,52 +20476,66 @@
             #ifdef WOLFSSL_SMALL_STACK
             XFREE(issuer, NULL, DYNAMIC_TYPE_OPENSSL);
             #endif
-            if (wolfSSL_BIO_write(bio, "\n", sizeof("\n")) <= 0) {
-                return WOLFSSL_FAILURE;
-            }
-        }
-
+            if (wolfSSL_BIO_write(bio, "\n", (int)XSTRLEN("\n")) <= 0) {
+                return WOLFSSL_FAILURE;
+            }
+        }
+
+    #ifndef NO_ASN_TIME
         /* print validity */
         {
             char tmp[80];
 
             if (wolfSSL_BIO_write(bio, "        Validity\n",
-                                sizeof("        Validity\n")) <= 0) {
-                return WOLFSSL_FAILURE;
-            }
+                          (int)XSTRLEN("        Validity\n")) <= 0) {
+                return WOLFSSL_FAILURE;
+            }
+
             if (wolfSSL_BIO_write(bio, "            Not Before: ",
-                                sizeof("            Not Before: ")) <= 0) {
-                return WOLFSSL_FAILURE;
-            }
-            if (GetTimeString(x509->notBefore + 2, ASN_UTC_TIME,
-                tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
-                if (GetTimeString(x509->notBefore + 2, ASN_GENERALIZED_TIME,
-                tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
-                    WOLFSSL_MSG("Error getting not before date");
-                    return WOLFSSL_FAILURE;
-                }
+                          (int)XSTRLEN("            Not Before: ")) <= 0) {
+                return WOLFSSL_FAILURE;
+            }
+            if (x509->notBefore.length > 0) {
+                if (GetTimeString(x509->notBefore.data, ASN_UTC_TIME,
+                    tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
+                    if (GetTimeString(x509->notBefore.data, ASN_GENERALIZED_TIME,
+                    tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("Error getting not before date");
+                        return WOLFSSL_FAILURE;
+                    }
+                }
+            }
+            else {
+                XSTRNCPY(tmp, "Not Set", sizeof(tmp)-1);
             }
             tmp[sizeof(tmp) - 1] = '\0'; /* make sure null terminated */
             if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
                 return WOLFSSL_FAILURE;
             }
+
             if (wolfSSL_BIO_write(bio, "\n            Not After : ",
-                                sizeof("\n            Not After : ")) <= 0) {
-                return WOLFSSL_FAILURE;
-            }
-            if (GetTimeString(x509->notAfter + 2,ASN_UTC_TIME,
-                tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
-                if (GetTimeString(x509->notAfter + 2,ASN_GENERALIZED_TIME,
+                          (int)XSTRLEN("\n            Not After : ")) <= 0) {
+                return WOLFSSL_FAILURE;
+            }
+            if (x509->notAfter.length > 0) {
+                if (GetTimeString(x509->notAfter.data, ASN_UTC_TIME,
                     tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
-                    WOLFSSL_MSG("Error getting not before date");
-                    return WOLFSSL_FAILURE;
-                }
+                    if (GetTimeString(x509->notAfter.data, ASN_GENERALIZED_TIME,
+                        tmp, sizeof(tmp)) != WOLFSSL_SUCCESS) {
+                        WOLFSSL_MSG("Error getting not after date");
+                        return WOLFSSL_FAILURE;
+                    }
+                }
+            }
+            else {
+                XSTRNCPY(tmp, "Not Set", sizeof(tmp)-1);
             }
             tmp[sizeof(tmp) - 1] = '\0'; /* make sure null terminated */
             if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
                 return WOLFSSL_FAILURE;
             }
         }
+    #endif
 
         /* print subject */
         {
@@ -16361,11 +20548,16 @@
             int  subSz = 256;
         #endif
 
-            subject  = wolfSSL_X509_NAME_oneline(
-                             wolfSSL_X509_get_subject_name(x509), buff, subSz);
+        #if defined(WOLFSSL_QT)
+            subject = wolfSSL_X509_get_name_oneline(
+                              wolfSSL_X509_get_subject_name(x509), buff, subSz);
+        #else
+            subject = wolfSSL_X509_NAME_oneline(
+                              wolfSSL_X509_get_subject_name(x509), buff, subSz);
+        #endif
 
             if (wolfSSL_BIO_write(bio, "\n        Subject: ",
-                                sizeof("\n        Subject: ")) <= 0) {
+                          (int)XSTRLEN("\n        Subject: ")) <= 0) {
                 #ifdef WOLFSSL_SMALL_STACK
                 XFREE(subject, NULL, DYNAMIC_TYPE_OPENSSL);
                 #endif
@@ -16386,24 +20578,26 @@
 
         /* get and print public key */
         if (wolfSSL_BIO_write(bio, "\n        Subject Public Key Info:\n",
-                          sizeof("\n        Subject Public Key Info:\n")) <= 0) {
+                      (int)XSTRLEN("\n        Subject Public Key Info:\n")) <= 0) {
             return WOLFSSL_FAILURE;
         }
         {
+        #if (!defined(NO_RSA) && !defined(HAVE_USER_RSA)) || defined(HAVE_ECC)
             char tmp[100];
+        #endif
 
             switch (x509->pubKeyOID) {
-                #ifndef NO_RSA
+            #ifndef NO_RSA
                 case RSAk:
                     if (wolfSSL_BIO_write(bio,
-                                "            Public Key Algorithm: RSA\n",
-                      sizeof("            Public Key Algorithm: RSA\n")) <= 0) {
+                                "            Public Key Algorithm: rsaEncryption\n",
+                   (int)XSTRLEN("            Public Key Algorithm: rsaEncryption\n")) <= 0) {
                         return WOLFSSL_FAILURE;
                     }
                 #ifdef HAVE_USER_RSA
                     if (wolfSSL_BIO_write(bio,
                         "                Build without user RSA to print key\n",
-                sizeof("                Build without user RSA to print key\n"))
+           (int)XSTRLEN("                Build without user RSA to print key\n"))
                         <= 0) {
                         return WOLFSSL_FAILURE;
                     }
@@ -16423,26 +20617,30 @@
                         if (wc_RsaPublicKeyDecode(x509->pubKey.buffer,
                                 &idx, &rsa, x509->pubKey.length) != 0) {
                             WOLFSSL_MSG("Error decoding RSA key");
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
                         if ((sz = wc_RsaEncryptSize(&rsa)) < 0) {
                             WOLFSSL_MSG("Error getting RSA key size");
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
                         XSNPRINTF(tmp, sizeof(tmp) - 1, "%s%s: (%d bit)\n%s\n",
-                                "                 ", "Public-Key", 8 * sz,
-                                "                 Modulus:");
+                                "                ", "Public-Key", 8 * sz,
+                                "                Modulus:");
                         tmp[sizeof(tmp) - 1] = '\0';
-                        if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                        if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
 
                         /* print out modulus */
-                        XSNPRINTF(tmp, sizeof(tmp) - 1,"                     ");
+                        XSNPRINTF(tmp, sizeof(tmp) - 1,"                    ");
                         tmp[sizeof(tmp) - 1] = '\0';
                         if (mp_leading_bit(&rsa.n)) {
                             lbit = 1;
-                            XSTRNCAT(tmp, "00", sizeof("00"));
+                            XSTRNCAT(tmp, "00", 3);
                         }
 
                         rawLen = mp_unsigned_bin_size(&rsa.n);
@@ -16450,6 +20648,7 @@
                                 DYNAMIC_TYPE_TMP_BUFFER);
                         if (rawKey == NULL) {
                             WOLFSSL_MSG("Memory error");
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
                         mp_to_unsigned_bin(&rsa.n, rawKey);
@@ -16462,13 +20661,15 @@
                             }
                             else if ((idx != 0) && (((idx + lbit) % 15) == 0)) {
                                 tmp[sizeof(tmp) - 1] = '\0';
-                                if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp))
-                                        <= 0) {
-                                    XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                                if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
+                                    XFREE(rawKey, NULL,
+                                        DYNAMIC_TYPE_TMP_BUFFER);
+                                    wc_FreeRsaKey(&rsa);
                                     return WOLFSSL_FAILURE;
                                 }
                                 XSNPRINTF(tmp, sizeof(tmp) - 1,
-                                        ":\n                     ");
+                                        ":\n                    ");
                                 XSNPRINTF(val, valSz - 1, "%02x", rawKey[idx]);
                             }
                             else {
@@ -16478,19 +20679,20 @@
                         }
                         XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 
-                        /* print out remaning modulus values */
+                        /* print out remaining modulus values */
                         if ((idx > 0) && (((idx - 1 + lbit) % 15) != 0)) {
-                                tmp[sizeof(tmp) - 1] = '\0';
-                                if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp))
-                                        <= 0) {
-                                    return WOLFSSL_FAILURE;
-                                }
+                            tmp[sizeof(tmp) - 1] = '\0';
+                            if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
+                                return WOLFSSL_FAILURE;
+                            }
                         }
 
                         /* print out exponent values */
                         rawLen = mp_unsigned_bin_size(&rsa.e);
                         if (rawLen < 0) {
                             WOLFSSL_MSG("Error getting exponent size");
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
 
@@ -16501,26 +20703,33 @@
                                 DYNAMIC_TYPE_TMP_BUFFER);
                         if (rawKey == NULL) {
                             WOLFSSL_MSG("Memory error");
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
                         XMEMSET(rawKey, 0, rawLen);
                         mp_to_unsigned_bin(&rsa.e, rawKey);
                         if ((word32)rawLen <= sizeof(word32)) {
                             idx = *(word32*)rawKey;
+                        #ifdef BIG_ENDIAN_ORDER
+                            idx = ByteReverseWord32(idx);
+                        #endif
                         }
                         XSNPRINTF(tmp, sizeof(tmp) - 1,
-                        "\n                 Exponent: %d\n", idx);
-                        if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                            "\n                Exponent: %d (0x%x)\n",idx, idx);
+                        if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
                             XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                            wc_FreeRsaKey(&rsa);
                             return WOLFSSL_FAILURE;
                         }
                         XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        wc_FreeRsaKey(&rsa);
                     }
                 #endif /* HAVE_USER_RSA */
                     break;
-                #endif /* NO_RSA */
-
-                #ifdef HAVE_ECC
+            #endif /* NO_RSA */
+
+            #ifdef HAVE_ECC
                 case ECDSAk:
                     {
                         word32 i;
@@ -16528,7 +20737,7 @@
 
                         if (wolfSSL_BIO_write(bio,
                                 "            Public Key Algorithm: EC\n",
-                      sizeof("            Public Key Algorithm: EC\n")) <= 0) {
+                   (int)XSTRLEN("            Public Key Algorithm: EC\n")) <= 0) {
                         return WOLFSSL_FAILURE;
                         }
                         if (wc_ecc_init_ex(&ecc, x509->heap, INVALID_DEVID)
@@ -16547,7 +20756,8 @@
                                 8 * wc_ecc_size(&ecc),
                                 "                 pub:");
                         tmp[sizeof(tmp) - 1] = '\0';
-                        if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                        if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
                             wc_ecc_free(&ecc);
                             return WOLFSSL_FAILURE;
                         }
@@ -16595,11 +20805,11 @@
                                 XSTRNCAT(tmp, val, valSz);
                             }
 
-                            /* print out remaning modulus values */
+                            /* print out remaining modulus values */
                             if ((i > 0) && (((i - 1) % 15) != 0)) {
                                 tmp[sizeof(tmp) - 1] = '\0';
-                                if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp))
-                                        <= 0) {
+                                if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
                                     wc_ecc_free(&ecc);
                                     XFREE(der, x509->heap,
                                                 DYNAMIC_TYPE_TMP_BUFFER);
@@ -16612,14 +20822,15 @@
                         XSNPRINTF(tmp, sizeof(tmp) - 1, "\n%s%s: %s\n",
                                 "                ", "ASN1 OID",
                                 ecc.dp->name);
-                        if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                        if (wolfSSL_BIO_write(bio, tmp,
+                                                      (int)XSTRLEN(tmp)) <= 0) {
                             wc_ecc_free(&ecc);
                             return WOLFSSL_FAILURE;
                         }
                         wc_ecc_free(&ecc);
                     }
                     break;
-                #endif /* HAVE_ECC */
+            #endif /* HAVE_ECC */
                 default:
                     WOLFSSL_MSG("Unknown key type");
                     return WOLFSSL_FAILURE;
@@ -16628,7 +20839,7 @@
 
         /* print out extensions */
         if (wolfSSL_BIO_write(bio, "        X509v3 extensions:\n",
-                            sizeof("        X509v3 extensions:\n")) <= 0) {
+                      (int)XSTRLEN("        X509v3 extensions:\n")) <= 0) {
             return WOLFSSL_FAILURE;
         }
 
@@ -16642,13 +20853,13 @@
 
 
             if (wolfSSL_BIO_write(bio,
-                        "            X509v3 Subject Key Identifier:\n",
-                 sizeof("            X509v3 Subject Key Identifier:\n"))
+                        "            X509v3 Subject Key Identifier: \n",
+           (int)XSTRLEN("            X509v3 Subject Key Identifier: \n"))
                  <= 0) {
                 return WOLFSSL_FAILURE;
             }
 
-            XSNPRINTF(tmp, sizeof(tmp) - 1, "                 ");
+            XSNPRINTF(tmp, sizeof(tmp) - 1, "                ");
             for (i = 0; i < sizeof(tmp) && i < (x509->subjKeyIdSz - 1); i++) {
                 XSNPRINTF(val, valSz - 1, "%02X:", x509->subjKeyId[i]);
                 XSTRNCAT(tmp, val, valSz);
@@ -16667,15 +20878,16 @@
             word32 i;
             char val[5];
             int valSz = 5;
+            int len = 0;
 
             if (wolfSSL_BIO_write(bio,
-                        "            X509v3 Authority Key Identifier:\n",
-                 sizeof("            X509v3 Authority Key Identifier:\n"))
+                        "            X509v3 Authority Key Identifier: \n",
+           (int)XSTRLEN("            X509v3 Authority Key Identifier: \n"))
                  <= 0) {
                 return WOLFSSL_FAILURE;
             }
 
-            XSNPRINTF(tmp, sizeof(tmp) - 1, "                 keyid");
+            XSNPRINTF(tmp, sizeof(tmp) - 1, "                keyid");
             for (i = 0; i < x509->authKeyIdSz; i++) {
                 /* check if buffer is almost full */
                 if (XSTRLEN(tmp) >= sizeof(tmp) - valSz) {
@@ -16687,46 +20899,11 @@
                 XSNPRINTF(val, valSz - 1, ":%02X", x509->authKeyId[i]);
                 XSTRNCAT(tmp, val, valSz);
             }
+            len = (int)XSTRLEN("\n");
+            XSTRNCAT(tmp, "\n", len + 1);
             if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
                 return WOLFSSL_FAILURE;
             }
-
-            /* print issuer */
-            {
-                char* issuer;
-            #ifdef WOLFSSL_SMALL_STACK
-                char* buff  = NULL;
-                int   issSz = 0;
-            #else
-                char buff[256];
-                int  issSz = 256;
-            #endif
-
-                issuer  = wolfSSL_X509_NAME_oneline(
-                               wolfSSL_X509_get_issuer_name(x509), buff, issSz);
-
-                if (wolfSSL_BIO_write(bio, "\n                 DirName:",
-                                  sizeof("\n                 DirName:")) <= 0) {
-                    #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(issuer, NULL, DYNAMIC_TYPE_OPENSSL);
-                    #endif
-                    return WOLFSSL_FAILURE;
-                }
-                if (issuer != NULL) {
-                    if (wolfSSL_BIO_write(bio, issuer, (int)XSTRLEN(issuer)) <= 0) {
-                        #ifdef WOLFSSL_SMALL_STACK
-                        XFREE(issuer, NULL, DYNAMIC_TYPE_OPENSSL);
-                        #endif
-                        return WOLFSSL_FAILURE;
-                    }
-                }
-                #ifdef WOLFSSL_SMALL_STACK
-                XFREE(issuer, NULL, DYNAMIC_TYPE_OPENSSL);
-                #endif
-                if (wolfSSL_BIO_write(bio, "\n", sizeof("\n")) <= 0) {
-                    return WOLFSSL_FAILURE;
-                }
-            }
         }
 
         /* print basic constraint */
@@ -16734,13 +20911,13 @@
             char tmp[100];
 
             if (wolfSSL_BIO_write(bio,
-                        "\n            X509v3 Basic Constraints:\n",
-                 sizeof("\n            X509v3 Basic Constraints:\n"))
+                        "\n            X509v3 Basic Constraints: \n",
+           (int)XSTRLEN("\n            X509v3 Basic Constraints: \n"))
                  <= 0) {
                 return WOLFSSL_FAILURE;
             }
             XSNPRINTF(tmp, sizeof(tmp),
-                    "                    CA:%s\n",
+                    "                CA:%s\n",
                     (x509->isCa)? "TRUE": "FALSE");
             if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
                 return WOLFSSL_FAILURE;
@@ -16748,7 +20925,7 @@
         }
 
         /* print out signature */
-        {
+        if (x509->sig.length > 0) {
             unsigned char* sig;
             int sigSz;
             int i;
@@ -16757,7 +20934,7 @@
 
             if (wolfSSL_BIO_write(bio,
                                 "    Signature Algorithm: ",
-                      sizeof("    Signature Algorithm: ")) <= 0) {
+                   (int)XSTRLEN("    Signature Algorithm: ")) <= 0) {
                 return WOLFSSL_FAILURE;
             }
             XSNPRINTF(tmp, sizeof(tmp) - 1,"%s\n", GetSigName(sigOid));
@@ -16768,14 +20945,15 @@
 
             sigSz = (int)x509->sig.length;
             sig = (unsigned char*)XMALLOC(sigSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            if (sig == NULL || sigSz <= 0) {
-                return WOLFSSL_FAILURE;
-            }
+            if (sig == NULL) {
+                return WOLFSSL_FAILURE;
+            }
+
             if (wolfSSL_X509_get_signature(x509, sig, &sigSz) <= 0) {
                 XFREE(sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
                 return WOLFSSL_FAILURE;
             }
-            XSNPRINTF(tmp, sizeof(tmp) - 1,"         ");
+            XSNPRINTF(tmp, sizeof(tmp) - 1,"        ");
             tmp[sizeof(tmp) - 1] = '\0';
             for (i = 0; i < sigSz; i++) {
                 char val[5];
@@ -16792,7 +20970,7 @@
                         return WOLFSSL_FAILURE;
                     }
                     XSNPRINTF(tmp, sizeof(tmp) - 1,
-                            ":\n         ");
+                            ":\n        ");
                     XSNPRINTF(val, valSz - 1, "%02x", sig[i]);
                 }
                 else {
@@ -16802,7 +20980,7 @@
             }
             XFREE(sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 
-            /* print out remaning sig values */
+            /* print out remaining sig values */
             if ((i > 0) && (((i - 1) % 18) != 0)) {
                     tmp[sizeof(tmp) - 1] = '\0';
                     if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp))
@@ -16813,26 +20991,165 @@
         }
 
         /* done with print out */
-        if (wolfSSL_BIO_write(bio, "\n", sizeof("\n")) <= 0) {
-            return WOLFSSL_FAILURE;
-        }
-
-        return WOLFSSL_SUCCESS;
-    }
+        if (wolfSSL_BIO_write(bio, "\n\0", (int)XSTRLEN("\n\0")) <= 0) {
+            return WOLFSSL_FAILURE;
+        }
+
+        return WOLFSSL_SUCCESS;
+    }
+    int wolfSSL_X509_print(WOLFSSL_BIO* bio, WOLFSSL_X509* x509)
+    {
+        return wolfSSL_X509_print_ex(bio, x509, 0, 0);
+    }
+
 #endif /* XSNPRINTF */
 
 #endif /* NO_CERTS */
 
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+/* Creates cipher->description based on cipher->offset
+ * cipher->offset is set in wolfSSL_get_ciphers_compat when it is added
+ * to a stack of ciphers.
+ * @param [in] cipher: A cipher from a stack of ciphers.
+ * return WOLFSSL_SUCCESS if cipher->description is set, else WOLFSSL_FAILURE
+ */
+int wolfSSL_sk_CIPHER_description(WOLFSSL_CIPHER* cipher)
+{
+    int ret = WOLFSSL_FAILURE;
+    int i,j,k;
+    int strLen;
+    unsigned long offset;
+    char* dp;
+    const char* name;
+    const char *keaStr, *authStr, *encStr, *macStr, *protocol;
+    char n[MAX_SEGMENTS][MAX_SEGMENT_SZ] = {{0}};
+    unsigned char len = MAX_DESCRIPTION_SZ-1;
+    const CipherSuiteInfo* cipher_names;
+    ProtocolVersion pv;
+    WOLFSSL_ENTER("wolfSSL_sk_CIPHER_description");
+
+    if (cipher == NULL)
+        return WOLFSSL_FAILURE;
+
+    dp = cipher->description;
+    if (dp == NULL)
+        return WOLFSSL_FAILURE;
+
+    cipher_names = GetCipherNames();
+
+    offset = cipher->offset;
+    pv.major = cipher_names[offset].major;
+    pv.minor = cipher_names[offset].minor;
+    protocol = wolfSSL_internal_get_version(&pv);
+
+    name = cipher_names[offset].name;
+
+    if (name == NULL)
+        return ret;
+
+    /* Segment cipher name into n[n0,n1,n2,n4]
+     * These are used later for comparisons to create:
+     * keaStr, authStr, encStr, macStr
+     *
+     * If cipher_name = ECDHE-ECDSA-AES256-SHA
+     * then n0 = "ECDHE", n1 = "ECDSA", n2 = "AES256", n3 = "SHA"
+     * and n = [n0,n1,n2,n3,0]
+     */
+    strLen = (int)XSTRLEN(name);
+
+    for (i = 0, j = 0, k = 0; i <= strLen; i++) {
+        if (k > MAX_SEGMENTS || j > MAX_SEGMENT_SZ)
+            break;
+
+        if (name[i] != '-' && name[i] != '\0') {
+            n[k][j] = name[i]; /* Fill kth segment string until '-' */
+            j++;
+        }
+        else {
+            n[k][j] = '\0';
+            j = 0;
+            k++;
+        }
+    }
+    /* keaStr */
+    keaStr = GetCipherKeaStr(n);
+    /* authStr */
+    authStr = GetCipherAuthStr(n);
+    /* encStr */
+    encStr = GetCipherEncStr(n);
+    if ((cipher->bits = SetCipherBits(encStr)) == WOLFSSL_FAILURE) {
+       WOLFSSL_MSG("Cipher Bits Not Set.");
+    }
+    /* macStr */
+    macStr = GetCipherMacStr(n);
+
+
+    /* Build up the string by copying onto the end. */
+    XSTRNCPY(dp, name, len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+
+    XSTRNCPY(dp, " ", len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+    XSTRNCPY(dp, protocol, len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+
+    XSTRNCPY(dp, " Kx=", len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+    XSTRNCPY(dp, keaStr, len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+
+    XSTRNCPY(dp, " Au=", len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+    XSTRNCPY(dp, authStr, len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+
+    XSTRNCPY(dp, " Enc=", len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+    XSTRNCPY(dp, encStr, len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+
+    XSTRNCPY(dp, " Mac=", len);
+    dp[len-1] = '\0'; strLen = (int)XSTRLEN(dp);
+    len -= (int)strLen; dp += strLen;
+    XSTRNCPY(dp, macStr, len);
+    dp[len-1] = '\0';
+
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
 char* wolfSSL_CIPHER_description(const WOLFSSL_CIPHER* cipher, char* in,
                                  int len)
 {
     char *ret = in;
     const char *keaStr, *authStr, *encStr, *macStr;
     size_t strLen;
+    WOLFSSL_ENTER("wolfSSL_CIPHER_description");
 
     if (cipher == NULL || in == NULL)
         return NULL;
 
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    /* if cipher is in the stack from wolfSSL_get_ciphers_compat then
+     * Return the description based on cipher_names[cipher->offset]
+     */
+    if (cipher->in_stack == TRUE) {
+        wolfSSL_sk_CIPHER_description((WOLFSSL_CIPHER*)cipher);
+        XSTRNCPY(in,cipher->description,len);
+        return ret;
+    }
+#endif
+
+    /* Get the cipher description based on the SSL session cipher */
     switch (cipher->ssl->specs.kea) {
         case no_kea:
             keaStr = "None";
@@ -17020,11 +21337,6 @@
             macStr = "SHA512";
             break;
 #endif
-#ifdef HAVE_BLAKE2
-        case blake2b_mac:
-            macStr = "BLAKE2b";
-            break;
-#endif
         default:
             macStr = "unknown";
             break;
@@ -17101,18 +21413,6 @@
 }
 #endif
 
-WOLFSSL_METHOD* wolfSSLv2_client_method(void)
-{
-    return 0;
-}
-
-
-WOLFSSL_METHOD* wolfSSLv2_server_method(void)
-{
-    return 0;
-}
-
-
 #ifndef NO_MD4
 
 void wolfSSL_MD4_Init(WOLFSSL_MD4_CTX* md4)
@@ -17169,18 +21469,12 @@
 }
 
 
-int wolfSSL_BIO_pending(WOLFSSL_BIO* bio)
-{
-    return (int)wolfSSL_BIO_ctrl_pending(bio);
-}
-
-
 
 WOLFSSL_BIO_METHOD* wolfSSL_BIO_s_mem(void)
 {
     static WOLFSSL_BIO_METHOD meth;
 
-    WOLFSSL_ENTER("BIO_s_mem");
+    WOLFSSL_ENTER("wolfSSL_BIO_s_mem");
     meth.type = WOLFSSL_BIO_MEMORY;
 
     return &meth;
@@ -17200,7 +21494,7 @@
 
 /* Set the flag for the bio.
  *
- * bio   the structre to set the flag in
+ * bio   the structure to set the flag in
  * flags the flag to use
  */
 void wolfSSL_BIO_set_flags(WOLFSSL_BIO* bio, int flags)
@@ -17212,6 +21506,57 @@
     }
 }
 
+void wolfSSL_BIO_clear_flags(WOLFSSL_BIO *bio, int flags)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_clear_flags");
+    if (bio != NULL) {
+        bio->flags &= ~flags;
+    }
+}
+
+/* Set ex_data for WOLFSSL_BIO
+ *
+ * bio  : BIO structure to set ex_data in
+ * idx  : Index of ex_data to set
+ * data : Data to set in ex_data
+ *
+ * Returns WOLFSSL_SUCCESS on success or WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_BIO_set_ex_data(WOLFSSL_BIO *bio, int idx, void *data)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_set_ex_data");
+    #ifdef HAVE_EX_DATA
+    if (bio != NULL && idx < MAX_EX_DATA) {
+        return wolfSSL_CRYPTO_set_ex_data(&bio->ex_data, idx, data);
+    }
+    #else
+    (void)bio;
+    (void)idx;
+    (void)data;
+    #endif
+    return WOLFSSL_FAILURE;
+}
+
+/* Get ex_data in WOLFSSL_BIO at given index
+ *
+ * bio  : BIO structure to get ex_data from
+ * idx  : Index of ex_data to get data from
+ *
+ * Returns void pointer to ex_data on success or NULL on failure
+ */
+void *wolfSSL_BIO_get_ex_data(WOLFSSL_BIO *bio, int idx)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_get_ex_data");
+    #ifdef HAVE_EX_DATA
+    if (bio != NULL && idx < MAX_EX_DATA && idx >= 0) {
+        return wolfSSL_CRYPTO_get_ex_data(&bio->ex_data, idx);
+    }
+    #else
+    (void)bio;
+    (void)idx;
+    #endif
+    return NULL;
+}
 
 #ifndef NO_WOLFSSL_STUB
 void wolfSSL_RAND_screen(void)
@@ -17324,15 +21669,20 @@
         return BAD_FUNC_ARG;
 
     fp = XFOPEN(file, "r");
-    if (fp == NULL)
-        return BAD_FUNC_ARG;
-
-    XFSEEK(fp, 0, XSEEK_END);
+    if (fp == XBADFILE)
+        return BAD_FUNC_ARG;
+
+    if(XFSEEK(fp, 0, XSEEK_END) != 0) {
+        XFCLOSE(fp);
+        return WOLFSSL_BAD_FILE;
+    }
     sz = XFTELL(fp);
     XREWIND(fp);
 
-    if (sz <= 0)
+    if (sz > MAX_WOLFSSL_FILE_SIZE || sz <= 0) {
+        WOLFSSL_MSG("X509_LOOKUP_load_file size error");
         goto end;
+    }
 
     pem = (byte*)XMALLOC(sz, 0, DYNAMIC_TYPE_PEM);
     if (pem == NULL) {
@@ -17359,7 +21709,8 @@
                 }
             }
 
-            ret = BufferLoadCRL(cm->crl, curr, sz, WOLFSSL_FILETYPE_PEM, 1);
+            ret = BufferLoadCRL(cm->crl, curr, sz, WOLFSSL_FILETYPE_PEM,
+                NO_VERIFY);
             if (ret != WOLFSSL_SUCCESS)
                 goto end;
 #endif
@@ -17420,6 +21771,10 @@
 WOLFSSL_X509_LOOKUP* wolfSSL_X509_STORE_add_lookup(WOLFSSL_X509_STORE* store,
                                                WOLFSSL_X509_LOOKUP_METHOD* m)
 {
+    WOLFSSL_ENTER("SSL_X509_STORE_add_lookup");
+    if (store == NULL)
+        return NULL;
+
     /* Method is a dummy value and is not needed. */
     (void)m;
     /* Make sure the lookup has a back reference to the store. */
@@ -17470,24 +21825,31 @@
     const unsigned char* der;
     int derSz = 0;
 
-    if (x509 == NULL || out == NULL) {
+    WOLFSSL_ENTER("wolfSSL_i2d_X509");
+
+    if (x509 == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_i2d_X509", BAD_FUNC_ARG);
         return BAD_FUNC_ARG;
     }
 
     der = wolfSSL_X509_get_der(x509, &derSz);
     if (der == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_i2d_X509", MEMORY_E);
         return MEMORY_E;
     }
 
-    if (*out == NULL) {
+    if (out != NULL && *out == NULL) {
         *out = (unsigned char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_OPENSSL);
         if (*out == NULL) {
+            WOLFSSL_LEAVE("wolfSSL_i2d_X509", MEMORY_E);
             return MEMORY_E;
         }
     }
 
-    XMEMCPY(*out, der, derSz);
-
+    if (out != NULL)
+        XMEMCPY(*out, der, derSz);
+
+    WOLFSSL_LEAVE("wolfSSL_i2d_X509", derSz);
     return derSz;
 }
 
@@ -17582,16 +21944,72 @@
     return localPkcs12;
 }
 
-
-/* helper function to get DER buffer from WOLFSSL_EVP_PKEY */
-static int wolfSSL_i2d_PrivateKey(WOLFSSL_EVP_PKEY* key, unsigned char** der)
-{
-    *der = (unsigned char*)key->pkey.ptr;
-
+/* Converts the PKCS12 to DER format and outputs it into bio.
+ *
+ * bio is the structure to hold output DER
+ * pkcs12 structure to create DER from
+ *
+ * return 1 for success or 0 if an error occurs
+ */
+int wolfSSL_i2d_PKCS12_bio(WOLFSSL_BIO *bio, WC_PKCS12 *pkcs12)
+{
+    int ret = WOLFSSL_FAILURE;
+
+    WOLFSSL_ENTER("wolfSSL_i2d_PKCS12_bio");
+
+    if ((bio != NULL) && (pkcs12 != NULL)) {
+        word32 certSz = 0;
+        byte *certDer = NULL;
+
+        certSz = wc_i2d_PKCS12(pkcs12, &certDer, NULL);
+        if ((certSz > 0) && (certDer != NULL)) {
+            if (wolfSSL_BIO_write(bio, certDer, certSz) == (int)certSz) {
+                ret = SSL_SUCCESS;
+            }
+        }
+
+        if (certDer != NULL) {
+            XFREE(certDer, NULL, DYNAMIC_TYPE_PKCS);
+        }
+    }
+
+    return ret;
+}
+
+/* helper function to get raw pointer to DER buffer from WOLFSSL_EVP_PKEY */
+static int wolfSSL_EVP_PKEY_get_der(WOLFSSL_EVP_PKEY* key, unsigned char** der)
+{
+    if (!key)
+        return WOLFSSL_FAILURE;
+    if (der)
+        *der = (unsigned char*)key->pkey.ptr;
     return key->pkey_sz;
 }
 
-
+/* Copies unencrypted DER key buffer into "der". If "der" is null then the size
+ * of buffer needed is returned
+ * NOTE: This also advances the "der" pointer to be at the end of buffer.
+ *
+ * Returns size of key buffer on success
+ */
+int wolfSSL_i2d_PrivateKey(WOLFSSL_EVP_PKEY* key, unsigned char** der)
+{
+    if (key == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (key->pkey_sz <= 0 || !key->pkey.ptr) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (der != NULL) {
+        /* since this function signature has no size value passed in it is
+         * assumed that the user has allocated a large enough buffer */
+        XMEMCPY(*der, key->pkey.ptr, key->pkey_sz);
+        *der += key->pkey_sz;
+    }
+    return key->pkey_sz;
+}
 
 /* Creates a new WC_PKCS12 structure
  *
@@ -17601,7 +22019,7 @@
  * cert  certificate to go into PKCS12 bundle
  * ca    extra certificates that can be added to bundle. Can be NULL
  * keyNID  type of encryption to use on the key (-1 means no encryption)
- * certNID type of ecnryption to use on the certificate
+ * certNID type of encryption to use on the certificate
  * itt     number of iterations with encryption
  * macItt  number of iterations with mac creation
  * keyType flag for signature and/or encryption key
@@ -17631,7 +22049,7 @@
     }
     passSz = (word32)XSTRLEN(pass);
 
-    if ((ret = wolfSSL_i2d_PrivateKey(pkey, &keyDer)) < 0) {
+    if ((ret = wolfSSL_EVP_PKEY_get_der(pkey, &keyDer)) < 0) {
         WOLFSSL_LEAVE("wolfSSL_PKCS12_create", ret);
         return NULL;
     }
@@ -17707,21 +22125,23 @@
 
     WOLFSSL_ENTER("wolfSSL_PKCS12_parse");
 
+    /* make sure we init return args */
+    if (pkey) *pkey = NULL;
+    if (cert) *cert = NULL;
+    if (ca)   *ca = NULL;
+
     if (pkcs12 == NULL || psw == NULL || pkey == NULL || cert == NULL) {
         WOLFSSL_MSG("Bad argument value");
         return WOLFSSL_FAILURE;
     }
 
     heap  = wc_PKCS12_GetHeap(pkcs12);
-    *pkey = NULL;
-    *cert = NULL;
 
     if (ca == NULL) {
         ret = wc_PKCS12_parse(pkcs12, psw, &pk, &pkSz, &certData, &certDataSz,
             NULL);
     }
     else {
-        *ca = NULL;
         ret = wc_PKCS12_parse(pkcs12, psw, &pk, &pkSz, &certData, &certDataSz,
             &certList);
     }
@@ -17861,7 +22281,7 @@
     /* get key type */
     ret = BAD_STATE_E;
     if (pk != NULL) { /* decode key if present */
-        *pkey = wolfSSL_PKEY_new_ex(heap);
+        *pkey = wolfSSL_EVP_PKEY_new_ex(heap);
         if (*pkey == NULL) {
             wolfSSL_X509_free(*cert); *cert = NULL;
             if (ca != NULL) {
@@ -18043,6 +22463,8 @@
                     }
                     else {
                         WOLFSSL_MSG("Certificate is self signed");
+                        if (issuer != NULL)
+                            wolfSSL_X509_free(issuer);
                     }
                 }
                 else {
@@ -18058,6 +22480,88 @@
     return ctx->chain;
 }
 
+/* make shallow copy of the stack, data pointers are copied by reference */
+WOLFSSL_STACK* wolfSSL_sk_X509_dup(WOLFSSL_STACK* sk)
+{
+    unsigned long i;
+    WOLFSSL_STACK* dup = NULL;
+    WOLFSSL_STACK* node = NULL;
+    WOLFSSL_STACK *dIdx = NULL, *sIdx = sk;
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    for (i = 0; i < sk->num; i++) {
+
+        node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                         DYNAMIC_TYPE_X509);
+        if (node == NULL) {
+            if (i != 0) {
+                wolfSSL_sk_free(dup);
+            }
+            WOLFSSL_MSG("Memory error");
+            return NULL;
+        }
+        XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+        /* copy sk node to new node, data by reference */
+        node->data.x509 = sIdx->data.x509;
+        node->num = sIdx->num;
+
+        /* insert node into list, progress idx */
+        if (i == 0) {
+            dup = node;
+        } else {
+            dIdx->next = node;
+        }
+
+        dIdx = node;
+        sIdx = sIdx->next;
+    }
+
+    return dup;
+}
+
+
+/* like X509_STORE_CTX_get_chain(), but return a copy with data reference
+   counts increased */
+WOLFSSL_STACK* wolfSSL_X509_STORE_CTX_get1_chain(WOLFSSL_X509_STORE_CTX* ctx)
+{
+    unsigned long i;
+    WOLFSSL_STACK* ref;
+    WOLFSSL_STACK* dup;
+
+    if (ctx == NULL) {
+        return NULL;
+    }
+
+    /* get chain in ctx */
+    ref = wolfSSL_X509_STORE_CTX_get_chain(ctx);
+    if (ref == NULL) {
+        return ref;
+    }
+
+    /* create duplicate of ctx chain */
+    dup = wolfSSL_sk_X509_dup(ref);
+    if (dup == NULL) {
+        return NULL;
+    }
+
+    /* increase ref counts of inner data X509 */
+    ref = dup;
+    for (i = 0; i < dup->num && ref != NULL; i++) {
+        if (wc_LockMutex(&ref->data.x509->refMutex) != 0) {
+            WOLFSSL_MSG("Failed to lock x509 mutex");
+        }
+        ref->data.x509->refCount++;
+        wc_UnLockMutex(&ref->data.x509->refMutex);
+        ref = ref->next;
+    }
+
+    return dup;
+}
+
 
 int wolfSSL_X509_STORE_add_cert(WOLFSSL_X509_STORE* store, WOLFSSL_X509* x509)
 {
@@ -18074,7 +22578,7 @@
             /* AddCA() frees the buffer. */
             XMEMCPY(derCert->buffer,
                             x509->derCert->buffer, x509->derCert->length);
-            result = AddCA(store->cm, &derCert, WOLFSSL_USER_CA, 1);
+            result = AddCA(store->cm, &derCert, WOLFSSL_USER_CA, VERIFY);
         }
     }
 
@@ -18090,36 +22594,41 @@
 WOLFSSL_X509_STORE* wolfSSL_X509_STORE_new(void)
 {
     WOLFSSL_X509_STORE* store = NULL;
-
-    if((store = (WOLFSSL_X509_STORE*)XMALLOC(sizeof(WOLFSSL_X509_STORE), NULL,
-                            DYNAMIC_TYPE_X509_STORE)) == NULL)
+    WOLFSSL_ENTER("SSL_X509_STORE_new");
+
+    if ((store = (WOLFSSL_X509_STORE*)XMALLOC(sizeof(WOLFSSL_X509_STORE), NULL,
+                                    DYNAMIC_TYPE_X509_STORE)) == NULL)
         goto err_exit;
 
-    if((store->cm = wolfSSL_CertManagerNew()) == NULL)
+    XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE));
+    store->isDynamic = 1;
+
+    if ((store->cm = wolfSSL_CertManagerNew()) == NULL)
         goto err_exit;
 
-    store->isDynamic = 1;
-
 #ifdef HAVE_CRL
     store->crl = NULL;
-    if((store->crl = (WOLFSSL_X509_CRL *)XMALLOC(sizeof(WOLFSSL_X509_CRL),
-                                NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL)
+    if ((store->crl = (WOLFSSL_X509_CRL *)XMALLOC(sizeof(WOLFSSL_X509_CRL),
+                                        NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL)
+        goto err_exit;
+    if (InitCRL(store->crl, NULL) < 0)
         goto err_exit;
-    if(InitCRL(store->crl, NULL) < 0)
+#endif
+
+#ifdef OPENSSL_EXTRA
+    if ((store->param = (WOLFSSL_X509_VERIFY_PARAM*)XMALLOC(
+                           sizeof(WOLFSSL_X509_VERIFY_PARAM),
+                           NULL,DYNAMIC_TYPE_OPENSSL)) == NULL)
         goto err_exit;
+
 #endif
 
     return store;
 
 err_exit:
-    if(store == NULL)
-        return NULL;
-    if(store->cm != NULL)
-        wolfSSL_CertManagerFree(store->cm);
-#ifdef HAVE_CRL
-    if(store->crl != NULL)
-        wolfSSL_X509_CRL_free(store->crl);
-#endif
+    if (store == NULL)
+        return NULL;
+
     wolfSSL_X509_STORE_free(store);
 
     return NULL;
@@ -18135,6 +22644,10 @@
         if (store->crl != NULL)
             wolfSSL_X509_CRL_free(store->crl);
 #endif
+#ifdef OPENSSL_EXTRA
+        if (store->param != NULL)
+            XFREE(store->param, NULL, DYNAMIC_TYPE_OPENSSL);
+#endif
         XFREE(store, NULL, DYNAMIC_TYPE_X509_STORE);
     }
 }
@@ -18146,6 +22659,9 @@
 
     WOLFSSL_ENTER("wolfSSL_X509_STORE_set_flags");
 
+    if (store == NULL)
+        return WOLFSSL_FAILURE;
+
     if ((flag & WOLFSSL_CRL_CHECKALL) || (flag & WOLFSSL_CRL_CHECK)) {
         ret = wolfSSL_CertManagerEnableCRL(store->cm, (int)flag);
     }
@@ -18178,8 +22694,10 @@
 
 WOLFSSL_X509_STORE_CTX* wolfSSL_X509_STORE_CTX_new(void)
 {
-    WOLFSSL_X509_STORE_CTX* ctx = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
-                                    sizeof(WOLFSSL_X509_STORE_CTX), NULL,
+    WOLFSSL_X509_STORE_CTX* ctx;
+    WOLFSSL_ENTER("X509_STORE_CTX_new");
+
+    ctx = (WOLFSSL_X509_STORE_CTX*)XMALLOC(sizeof(WOLFSSL_X509_STORE_CTX), NULL,
                                     DYNAMIC_TYPE_X509_CTX);
     if (ctx != NULL) {
         ctx->param = NULL;
@@ -18193,15 +22711,41 @@
 int wolfSSL_X509_STORE_CTX_init(WOLFSSL_X509_STORE_CTX* ctx,
      WOLFSSL_X509_STORE* store, WOLFSSL_X509* x509, WOLF_STACK_OF(WOLFSSL_X509)* sk)
 {
+    WOLFSSL_X509* x509_cert;
+    int ret = 0;
     (void)sk;
     WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_init");
+
     if (ctx != NULL) {
         ctx->store = store;
+        #ifndef WOLFSSL_X509_STORE_CERTS
         ctx->current_cert = x509;
+        #else
+        if(x509 != NULL){
+            ctx->current_cert = wolfSSL_X509_d2i(NULL, x509->derCert->buffer,x509->derCert->length);
+            if(ctx->current_cert == NULL)
+                return WOLFSSL_FATAL_ERROR;
+        } else
+            ctx->current_cert = NULL;
+        #endif
+
         ctx->chain  = sk;
+        /* Add intermediate certificates from stack to store */
+        while (sk != NULL) {
+            x509_cert = sk->data.x509;
+            if (x509_cert != NULL && x509_cert->isCa) {
+                ret = wolfSSL_X509_STORE_add_cert(store, x509_cert);
+                if (ret < 0) {
+                    return WOLFSSL_FATAL_ERROR;
+                }
+            }
+            sk = sk->next;
+        }
+
+        ctx->sesChain = NULL;
         ctx->domain = NULL;
-#ifdef HAVE_EX_DATA
-        ctx->ex_data = NULL;
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
+        XMEMSET(&ctx->ex_data, 0, sizeof(ctx->ex_data));
 #endif
         ctx->userCtx = NULL;
         ctx->error = 0;
@@ -18226,13 +22770,16 @@
 
 void wolfSSL_X509_STORE_CTX_free(WOLFSSL_X509_STORE_CTX* ctx)
 {
+    WOLFSSL_ENTER("X509_STORE_CTX_free");
     if (ctx != NULL) {
+    #if !defined(OPENSSL_ALL) && !defined(WOLFSSL_QT)
         if (ctx->store != NULL)
             wolfSSL_X509_STORE_free(ctx->store);
+        #ifndef WOLFSSL_KEEP_STORE_CERTS
         if (ctx->current_cert != NULL)
             wolfSSL_FreeX509(ctx->current_cert);
-        if (ctx->chain != NULL)
-            wolfSSL_sk_X509_free(ctx->chain);
+        #endif
+    #endif /* !OPENSSL_ALL && !WOLFSSL_QT */
 #ifdef OPENSSL_EXTRA
         if (ctx->param != NULL){
             XFREE(ctx->param,NULL,DYNAMIC_TYPE_OPENSSL);
@@ -18249,92 +22796,222 @@
     /* Do nothing */
 }
 
-
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+/* Returns corresponding X509 error from internal ASN error <e> */
+static int GetX509Error(int e)
+{
+    switch (e) {
+        case ASN_BEFORE_DATE_E:
+            return X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD;
+        case ASN_AFTER_DATE_E:
+            return X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD;
+        case ASN_NO_SIGNER_E:
+            return X509_V_ERR_INVALID_CA;
+        case ASN_SELF_SIGNED_E:
+            return X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT;
+        case ASN_PATHLEN_INV_E:
+        case ASN_PATHLEN_SIZE_E:
+            return X509_V_ERR_PATH_LENGTH_EXCEEDED;
+        case ASN_SIG_OID_E:
+        case ASN_SIG_CONFIRM_E:
+        case ASN_SIG_HASH_E:
+        case ASN_SIG_KEY_E:
+            return X509_V_ERR_CERT_SIGNATURE_FAILURE;
+        default:
+            WOLFSSL_MSG("Error not configured or implemented yet");
+            return e;
+    }
+}
+#endif
+
+/* Verifies certificate chain using WOLFSSL_X509_STORE_CTX
+ * returns 0 on success or < 0 on failure.
+ */
 int wolfSSL_X509_verify_cert(WOLFSSL_X509_STORE_CTX* ctx)
 {
+    int ret = 0;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    int depth = 0;
+    int error;
+    byte *afterDate, *beforeDate;
+#endif
+    WOLFSSL_ENTER("wolfSSL_X509_verify_cert");
+
     if (ctx != NULL && ctx->store != NULL && ctx->store->cm != NULL
          && ctx->current_cert != NULL && ctx->current_cert->derCert != NULL) {
-        return wolfSSL_CertManagerVerifyBuffer(ctx->store->cm,
+            ret = wolfSSL_CertManagerVerifyBuffer(ctx->store->cm,
                     ctx->current_cert->derCert->buffer,
                     ctx->current_cert->derCert->length,
                     WOLFSSL_FILETYPE_ASN1);
+
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+        /* If there was an error, process it and add it to CTX */
+        if (ret < 0) {
+            /* Get corresponding X509 error */
+            error = GetX509Error(ret);
+            /* Set error depth */
+            if (ctx->chain)
+                depth = (int)ctx->chain->num;
+
+            wolfSSL_X509_STORE_CTX_set_error(ctx, error);
+            wolfSSL_X509_STORE_CTX_set_error_depth(ctx, depth);
+            ctx->store->verify_cb(0, ctx);
+        }
+
+        error = 0;
+        /* wolfSSL_CertManagerVerifyBuffer only returns ASN_AFTER_DATE_E or
+         ASN_BEFORE_DATE_E if there are no additional errors found in the
+         cert. Therefore, check if the cert is expired or not yet valid
+         in order to return the correct expected error. */
+        afterDate = ctx->current_cert->notAfter.data;
+        beforeDate = ctx->current_cert->notBefore.data;
+
+        if (ValidateDate(afterDate, ctx->current_cert->notAfter.type,
+                                                                   AFTER) < 1) {
+            error = X509_V_ERR_CERT_HAS_EXPIRED;
+        }
+        else if (ValidateDate(beforeDate, ctx->current_cert->notBefore.type,
+                                                                  BEFORE) < 1) {
+            error = X509_V_ERR_CERT_NOT_YET_VALID;
+        }
+
+        if (error != 0 ) {
+            wolfSSL_X509_STORE_CTX_set_error(ctx, error);
+            wolfSSL_X509_STORE_CTX_set_error_depth(ctx, depth);
+            if (ctx->store && ctx->store->verify_cb)
+                ctx->store->verify_cb(0, ctx);
+        }
+#endif /* OPENSSL_ALL || WOLFSSL_QT */
+        return ret;
     }
     return WOLFSSL_FATAL_ERROR;
 }
+
+
+/* Use the public key to verify the signature. Note: this only verifies
+ * the certificate signature.
+ * returns WOLFSSL_SUCCESS on successful signature verification */
+int wolfSSL_X509_verify(WOLFSSL_X509* x509, WOLFSSL_EVP_PKEY* pkey)
+{
+    int ret;
+    const byte* der;
+    int derSz = 0;
+    int type;
+
+    if (x509 == NULL || pkey == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    der = wolfSSL_X509_get_der(x509, &derSz);
+    if (der == NULL) {
+        WOLFSSL_MSG("Error getting WOLFSSL_X509 DER");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    switch (pkey->type) {
+        case EVP_PKEY_RSA:
+            type = RSAk;
+            break;
+
+        case EVP_PKEY_EC:
+            type = ECDSAk;
+            break;
+
+        case EVP_PKEY_DSA:
+            type = DSAk;
+            break;
+
+        default:
+            WOLFSSL_MSG("Unknown pkey key type");
+            return WOLFSSL_FATAL_ERROR;
+    }
+
+    ret = CheckCertSignaturePubKey(der, derSz, x509->heap,
+            (unsigned char*)pkey->pkey.ptr, pkey->pkey_sz, type);
+    if (ret == 0) {
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
 #endif /* NO_CERTS */
 
 #if !defined(NO_FILESYSTEM)
 static void *wolfSSL_d2i_X509_fp_ex(XFILE file, void **x509, int type)
 {
     void *newx509 = NULL;
-    DerBuffer*   der = NULL;
     byte *fileBuffer = NULL;
-
-    if (file != XBADFILE)
-    {
-        long sz = 0;
-
-        XFSEEK(file, 0, XSEEK_END);
-        sz = XFTELL(file);
-        XREWIND(file);
-
-        if (sz < 0)
-        {
-            WOLFSSL_MSG("Bad tell on FILE");
-            return NULL;
-        }
-
-        fileBuffer = (byte *)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE);
-        if (fileBuffer != NULL)
-        {
-            if((long)XFREAD(fileBuffer, 1, sz, file) != sz)
-            {
-                WOLFSSL_MSG("File read failed");
+    long sz = 0;
+
+    /* init variable */
+    if (x509)
+        *x509 = NULL;
+
+    /* argument check */
+    if (file == XBADFILE) {
+        return NULL;
+    }
+
+    /* determine file size */
+    if (XFSEEK(file, 0, XSEEK_END) != 0) {
+        return NULL;
+    }
+    sz = XFTELL(file);
+    XREWIND(file);
+
+    if (sz > MAX_WOLFSSL_FILE_SIZE || sz <= 0) {
+        WOLFSSL_MSG("d2i_X509_fp_ex file size error");
+        return NULL;
+    }
+
+    fileBuffer = (byte *)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE);
+    if (fileBuffer != NULL) {
+        if ((long)XFREAD(fileBuffer, 1, sz, file) != sz) {
+            WOLFSSL_MSG("File read failed");
+            goto err_exit;
+        }
+        if (type == CERT_TYPE) {
+            newx509 = (void *)wolfSSL_X509_d2i(NULL, fileBuffer, (int)sz);
+        }
+    #ifdef HAVE_CRL
+        else if (type == CRL_TYPE) {
+            newx509 = (void *)wolfSSL_d2i_X509_CRL(NULL, fileBuffer, (int)sz);
+        }
+    #endif
+    #if !defined(NO_ASN) && !defined(NO_PWDBASED)
+        else if (type == PKCS12_TYPE) {
+            if ((newx509 = wc_PKCS12_new()) == NULL) {
                 goto err_exit;
             }
-            if(type == CERT_TYPE)
-                newx509 = (void *)wolfSSL_X509_d2i(NULL, fileBuffer, (int)sz);
-            #ifdef HAVE_CRL
-            else if(type == CRL_TYPE)
-                newx509 = (void *)wolfSSL_d2i_X509_CRL(NULL, fileBuffer, (int)sz);
-            #endif
-            #if !defined(NO_ASN) && !defined(NO_PWDBASED)
-            else if(type == PKCS12_TYPE){
-                if((newx509 = wc_PKCS12_new()) == NULL)
-                    goto err_exit;
-                if(wc_d2i_PKCS12(fileBuffer, (int)sz, (WC_PKCS12*)newx509) < 0)
-                    goto err_exit;
-            }
-            #endif
-            else goto err_exit;
-            if(newx509 == NULL)
-            {
-                WOLFSSL_MSG("X509 failed");
+            if (wc_d2i_PKCS12(fileBuffer, (int)sz, (WC_PKCS12*)newx509) < 0) {
                 goto err_exit;
             }
         }
-    }
-    if (x509 != NULL)
+    #endif
+        else {
+            goto err_exit;
+        }
+        if (newx509 == NULL) {
+            WOLFSSL_MSG("X509 failed");
+            goto err_exit;
+        }
+    }
+
+    if (x509)
         *x509 = newx509;
 
     goto _exit;
 
 err_exit:
-    if(newx509 != NULL){
-        if(type == CERT_TYPE)
-            wolfSSL_X509_free((WOLFSSL_X509*)newx509);
-        #ifdef HAVE_CRL
-        else {
-           if(type == CRL_TYPE)
-                wolfSSL_X509_CRL_free((WOLFSSL_X509_CRL*)newx509);
-        }
-        #endif
-    }
+#if !defined(NO_ASN) && !defined(NO_PWDBASED)
+    if ((newx509 != NULL) && (type == PKCS12_TYPE)) {
+        wc_PKCS12_free((WC_PKCS12*)newx509);
+        newx509 = NULL;
+    }
+#endif
 _exit:
-    if(der != NULL)
-        FreeDer(&der);
-    if(fileBuffer != NULL)
+    if (fileBuffer != NULL)
         XFREE(fileBuffer, NULL, DYNAMIC_TYPE_FILE);
+
     return newx509;
 }
 
@@ -18362,42 +23039,44 @@
 #endif /* !NO_FILESYSTEM */
 
 
-WOLFSSL_X509_CRL* wolfSSL_d2i_X509_CRL(WOLFSSL_X509_CRL** crl, const unsigned char* in, int len)
+WOLFSSL_X509_CRL* wolfSSL_d2i_X509_CRL(WOLFSSL_X509_CRL** crl,
+        const unsigned char* in, int len)
 {
     WOLFSSL_X509_CRL *newcrl = NULL;
-    int ret ;
+    int ret = WOLFSSL_SUCCESS;
 
     WOLFSSL_ENTER("wolfSSL_d2i_X509_CRL");
 
-    if(in == NULL){
+    if (in == NULL) {
         WOLFSSL_MSG("Bad argument value");
-        return NULL;
-    }
-
-    newcrl = (WOLFSSL_X509_CRL*)XMALLOC(sizeof(WOLFSSL_X509_CRL), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (newcrl == NULL){
-        WOLFSSL_MSG("New CRL allocation failed");
-        return NULL;
-    }
-    if (InitCRL(newcrl, NULL) < 0) {
-        WOLFSSL_MSG("Init tmp CRL failed");
-        goto err_exit;
-    }
-    ret = BufferLoadCRL(newcrl, in, len, WOLFSSL_FILETYPE_ASN1, 1);
-    if (ret != WOLFSSL_SUCCESS){
-        WOLFSSL_MSG("Buffer Load CRL failed");
-        goto err_exit;
-    }
-    if(crl){
-        *crl = newcrl;
-    }
-    goto _exit;
-
-err_exit:
-    if(newcrl != NULL)
+    } else {
+        newcrl = (WOLFSSL_X509_CRL*)XMALLOC(sizeof(WOLFSSL_X509_CRL), NULL,
+                DYNAMIC_TYPE_TMP_BUFFER);
+        if (newcrl == NULL){
+            WOLFSSL_MSG("New CRL allocation failed");
+        } else {
+            ret = InitCRL(newcrl, NULL);
+            if (ret < 0) {
+                WOLFSSL_MSG("Init tmp CRL failed");
+            } else {
+                ret = BufferLoadCRL(newcrl, in, len, WOLFSSL_FILETYPE_ASN1,
+                    NO_VERIFY);
+                if (ret != WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("Buffer Load CRL failed");
+                } else {
+                    if (crl) {
+                        *crl = newcrl;
+                    }
+                }
+            }
+        }
+    }
+
+    if((ret != WOLFSSL_SUCCESS) && (newcrl != NULL)) {
         wolfSSL_X509_CRL_free(newcrl);
-    newcrl = NULL;
-_exit:
+        newcrl = NULL;
+    }
+
     return newcrl;
 }
 
@@ -18439,82 +23118,30 @@
 #endif
 #endif /* OPENSSL_EXTRA */
 
-#if defined(OPENSSL_EXTRA_X509_SMALL)
-/* Subset of OPENSSL_EXTRA for PKEY operations PKEY free is needed by the
- * subset of X509 API */
-
-WOLFSSL_EVP_PKEY* wolfSSL_PKEY_new(){
-    return wolfSSL_PKEY_new_ex(NULL);
-}
-
-
-WOLFSSL_EVP_PKEY* wolfSSL_PKEY_new_ex(void* heap)
-{
-    WOLFSSL_EVP_PKEY* pkey;
-    int ret;
-    WOLFSSL_ENTER("wolfSSL_PKEY_new");
-    pkey = (WOLFSSL_EVP_PKEY*)XMALLOC(sizeof(WOLFSSL_EVP_PKEY), heap,
-            DYNAMIC_TYPE_PUBLIC_KEY);
-    if (pkey != NULL) {
-        XMEMSET(pkey, 0, sizeof(WOLFSSL_EVP_PKEY));
-        pkey->heap = heap;
-        pkey->type = WOLFSSL_EVP_PKEY_DEFAULT;
-#ifndef HAVE_FIPS
-        ret = wc_InitRng_ex(&(pkey->rng), heap, INVALID_DEVID);
-#else
-        ret = wc_InitRng(&(pkey->rng));
-#endif
-        if (ret != 0){
-            wolfSSL_EVP_PKEY_free(pkey);
-            WOLFSSL_MSG("memory falure");
-            return NULL;
-        }
-    }
-    else {
-        WOLFSSL_MSG("memory failure");
-    }
-
-    return pkey;
-}
-
-
-void wolfSSL_EVP_PKEY_free(WOLFSSL_EVP_PKEY* key)
-{
-    WOLFSSL_ENTER("wolfSSL_PKEY_free");
-    if (key != NULL) {
-        wc_FreeRng(&(key->rng));
-        if (key->pkey.ptr != NULL)
-        {
-            XFREE(key->pkey.ptr, key->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-        }
-        switch(key->type)
-        {
-            #ifndef NO_RSA
-            case EVP_PKEY_RSA:
-                if (key->rsa != NULL && key->ownRsa == 1) {
-                    wolfSSL_RSA_free(key->rsa);
-                }
-                break;
-            #endif /* NO_RSA */
-
-            #ifdef HAVE_ECC
-            case EVP_PKEY_EC:
-                if (key->ecc != NULL && key->ownEcc == 1) {
-                    wolfSSL_EC_KEY_free(key->ecc);
-                }
-                break;
-            #endif /* HAVE_ECC */
-
-            default:
-            break;
-        }
-        XFREE(key, key->heap, DYNAMIC_TYPE_PUBLIC_KEY);
-    }
-}
-#endif /* OPENSSL_EXTRA_X509_SMALL */
-
-
-#ifdef OPENSSL_EXTRA
+#ifdef OPENSSL_EXTRA
+
+/* Gets pointer to X509_STORE that was used to create context.
+ *
+ * Return valid pointer on success, NULL if ctx was NULL or not initialized
+ */
+WOLFSSL_X509_STORE* wolfSSL_X509_STORE_CTX_get0_store(
+        WOLFSSL_X509_STORE_CTX* ctx)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_get0_store");
+
+    if (ctx == NULL)
+        return NULL;
+
+    return ctx->store;
+}
+
+WOLFSSL_X509* wolfSSL_X509_STORE_CTX_get0_cert(WOLFSSL_X509_STORE_CTX* ctx)
+{
+    if (ctx == NULL)
+        return NULL;
+
+    return ctx->current_cert;
+}
 
 void wolfSSL_X509_STORE_CTX_set_time(WOLFSSL_X509_STORE_CTX* ctx,
                                     unsigned long flags,
@@ -18529,6 +23156,120 @@
     ctx->param->flags |= WOLFSSL_USE_CHECK_TIME;
 }
 
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_X509_STORE_CTX_set_purpose(WOLFSSL_X509_STORE_CTX *ctx,
+                                       int purpose)
+{
+    (void)ctx;
+    (void)purpose;
+    WOLFSSL_STUB("wolfSSL_X509_STORE_CTX_set_purpose");
+    return 0;
+}
+#endif
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+
+#ifndef NO_WOLFSSL_STUB
+/* Returns default file name and path of config file. However
+   a wolfssl.cnf file is not currently supported */
+char* wolfSSL_CONF_get1_default_config_file(void)
+{
+    WOLFSSL_ENTER("wolfSSL_CONF_get1_default_config_file");
+    WOLFSSL_STUB("CONF_get1_default_config_file");
+    return NULL;
+}
+#endif
+/******************************************************************************
+* wolfSSL_X509_VERIFY_PARAM_set1_host - sets the DNS hostname to name
+* hostnames is cleared if name is NULL or empty.
+*
+* RETURNS:
+*
+*/
+int wolfSSL_X509_VERIFY_PARAM_set1_host(WOLFSSL_X509_VERIFY_PARAM* pParam,
+                                         const char* name,
+                                         unsigned int nameSz)
+{
+    unsigned int sz = 0;
+
+    if (pParam == NULL)
+        return WOLFSSL_FAILURE;
+
+    XMEMSET(pParam->hostName, 0, WOLFSSL_HOST_NAME_MAX);
+
+    if (name == NULL)
+        return WOLFSSL_SUCCESS;
+
+    sz = (unsigned int)XSTRLEN(name);
+
+    /* If name is NUL-terminated, namelen can be set to zero. */
+    if(nameSz == 0 || nameSz > sz)
+        nameSz = sz;
+
+    if (nameSz > 0 && name[nameSz - 1] == '\0')
+        nameSz--;
+
+    if (nameSz > WOLFSSL_HOST_NAME_MAX-1)
+        nameSz = WOLFSSL_HOST_NAME_MAX-1;
+
+    if (nameSz > 0)
+        XMEMCPY(pParam->hostName, name, nameSz);
+
+        pParam->hostName[nameSz] = '\0';
+
+    return WOLFSSL_SUCCESS;
+}
+/******************************************************************************
+* wolfSSL_get0_param - return a pointer to the SSL verification parameters
+*
+* RETURNS:
+* returns pointer to the SSL verification parameters on success,
+* otherwise returns NULL
+*/
+WOLFSSL_X509_VERIFY_PARAM* wolfSSL_get0_param(WOLFSSL* ssl)
+{
+    if (ssl == NULL) {
+        return NULL;
+    }
+    return ssl->param;
+}
+
+/* Set the host flag in the X509_VERIFY_PARAM structure */
+void wolfSSL_X509_VERIFY_PARAM_set_hostflags(WOLFSSL_X509_VERIFY_PARAM* param,
+                                             unsigned int flags)
+{
+    if (param != NULL) {
+        param->hostFlags = flags;
+    }
+}
+
+/* Sets the expected IP address to ipasc.
+ *
+ * param is a pointer to the X509_VERIFY_PARAM structure
+ * ipasc is a NULL-terminated string with N.N.N.N for IPv4 and
+ *       HH:HH ... HH:HH for IPv6. There is no validation performed on the
+ *       parameter, and it must be an exact match with the IP in the cert.
+ *
+ * return 1 for success and 0 for failure*/
+int wolfSSL_X509_VERIFY_PARAM_set1_ip_asc(WOLFSSL_X509_VERIFY_PARAM *param,
+        const char *ipasc)
+{
+    int ret = WOLFSSL_FAILURE;
+
+    if (param != NULL) {
+        if (ipasc == NULL) {
+            param->ipasc[0] = '\0';
+        }
+        else {
+            XSTRNCPY(param->ipasc, ipasc, WOLFSSL_MAX_IPSTR-1);
+            param->ipasc[WOLFSSL_MAX_IPSTR-1] = '\0';
+        }
+        ret = WOLFSSL_SUCCESS;
+    }
+
+    return ret;
+}
+
 #ifndef NO_WOLFSSL_STUB
 void wolfSSL_X509_OBJECT_free_contents(WOLFSSL_X509_OBJECT* obj)
 {
@@ -18537,12 +23278,110 @@
 }
 #endif
 
-#ifndef NO_WOLFSSL_STUB
+#ifndef NO_ASN_TIME
 int wolfSSL_X509_cmp_current_time(const WOLFSSL_ASN1_TIME* asnTime)
 {
-    (void)asnTime;
-    WOLFSSL_STUB("X509_cmp_current_time");
-    return 0;
+    return wolfSSL_X509_cmp_time(asnTime, NULL);
+}
+
+/* return -1 if asnTime is earlier than or equal to cmpTime, and 1 otherwise
+ * return 0 on error
+ */
+int wolfSSL_X509_cmp_time(const WOLFSSL_ASN1_TIME* asnTime, time_t* cmpTime)
+{
+    int ret = WOLFSSL_FAILURE, i = 0;
+    time_t tmpTime, *pTime = &tmpTime;
+    byte data_ptr[MAX_TIME_STRING_SZ], inv = 0;
+    struct tm ts, *tmpTs, *ct;
+#if defined(NEED_TMP_TIME)
+    /* for use with gmtime_r */
+    struct tm tmpTimeStorage;
+
+    tmpTs = &tmpTimeStorage;
+#else
+    tmpTs = NULL;
+#endif
+    (void)tmpTs;
+
+    if (asnTime == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (cmpTime == NULL) {
+        /* Use current time */
+        *pTime = XTIME(0);
+    }
+    else {
+        pTime = cmpTime;
+    }
+
+    /* Convert ASN1_time to time_t */
+    XMEMSET(&ts, 0, sizeof(struct tm));
+
+    /* Check type */
+    if (asnTime->type == ASN_UTC_TIME) {
+        /* 2-digit year */
+        XMEMCPY(data_ptr, &asnTime->data[i], ASN_UTC_TIME_SIZE);
+        ts.tm_year = (data_ptr[i] - '0') * 10; i++;
+        ts.tm_year += data_ptr[i] - '0'; i++;
+        if (ts.tm_year < 70) {
+            ts.tm_year += 100;
+        }
+    }
+    else if (asnTime->type == ASN_GENERALIZED_TIME) {
+        /* 4-digit year */
+        XMEMCPY(data_ptr, &asnTime->data[i], ASN_GENERALIZED_TIME_SIZE);
+        ts.tm_year = (data_ptr[i] - '0') * 1000; i++;
+        ts.tm_year += (data_ptr[i] - '0') * 100; i++;
+        ts.tm_year += (data_ptr[i] - '0') * 10; i++;
+        ts.tm_year += data_ptr[i] - '0'; i++;
+        ts.tm_year -= 1900;
+    }
+    else {
+        /* Invalid type */
+        inv = 1;
+    }
+
+    if (inv != 1) {
+        ts.tm_mon = (data_ptr[i] - '0') * 10; i++;
+        ts.tm_mon += (data_ptr[i] - '0') - 1; i++; /* January is 0 not 1 */
+        ts.tm_mday = (data_ptr[i] - '0') * 10; i++;
+        ts.tm_mday += (data_ptr[i] - '0'); i++;
+        ts.tm_hour = (data_ptr[i] - '0') * 10; i++;
+        ts.tm_hour += (data_ptr[i] - '0'); i++;
+        ts.tm_min = (data_ptr[i] - '0') * 10; i++;
+        ts.tm_min += (data_ptr[i] - '0'); i++;
+        ts.tm_sec = (data_ptr[i] - '0') * 10; i++;
+        ts.tm_sec += (data_ptr[i] - '0');
+
+        /* Convert to time struct*/
+        ct = XGMTIME(pTime, tmpTs);
+
+        if (ct == NULL)
+            return GETTIME_ERROR;
+
+        /* DateGreaterThan returns 1 for >; 0 for <= */
+        ret = DateGreaterThan(&ts, ct) ? 1 : -1;
+    }
+
+    return ret;
+}
+#endif /* !NO_ASN_TIME */
+
+#if defined(OPENSSL_EXTRA) && !defined(NO_ASN_TIME) && !defined(USER_TIME) && \
+    !defined(TIME_OVERRIDES)
+WOLFSSL_ASN1_TIME *wolfSSL_X509_time_adj_ex(WOLFSSL_ASN1_TIME *asnTime,
+    int offset_day, long offset_sec, time_t *in_tm)
+{
+    /* get current time if in_tm is null */
+    time_t t = in_tm ? *in_tm : XTIME(0);
+    return wolfSSL_ASN1_TIME_adj(asnTime, t, offset_day, offset_sec);
+}
+
+WOLFSSL_ASN1_TIME *wolfSSL_X509_time_adj(WOLFSSL_ASN1_TIME *asnTime,
+    long offset_sec, time_t *in_tm)
+{
+    return wolfSSL_X509_time_adj_ex(asnTime, 0, offset_sec, in_tm);
 }
 #endif
 
@@ -18591,6 +23430,7 @@
     XMEMSET(a, 0, sizeof(WOLFSSL_ASN1_INTEGER));
     a->data    = a->intData;
     a->dataMax = WOLFSSL_ASN1_INTEGER_MAX;
+    a->length  = 0;
     return a;
 }
 
@@ -18607,6 +23447,104 @@
 }
 
 
+/* Duplicate all WOLFSSL_ASN1_INTEGER members from src to dup
+ *  src : WOLFSSL_ASN1_INTEGER to duplicate
+ *  Returns pointer to duplicate WOLFSSL_ASN1_INTEGER
+ */
+WOLFSSL_ASN1_INTEGER* wolfSSL_ASN1_INTEGER_dup(const WOLFSSL_ASN1_INTEGER* src)
+{
+    WOLFSSL_ASN1_INTEGER* dup;
+    WOLFSSL_ENTER("wolfSSL_ASN1_INTEGER_dup");
+    if (!src)
+        return NULL;
+
+    dup = wolfSSL_ASN1_INTEGER_new();
+
+    if (dup == NULL)
+        return NULL;
+
+    dup->negative  = src->negative;
+    dup->dataMax   = src->dataMax;
+    dup->isDynamic = src->isDynamic;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    dup->length    = src->length;
+#endif
+    XSTRNCPY((char*)dup->intData,(const char*)src->intData,WOLFSSL_ASN1_INTEGER_MAX);
+
+    if (dup->isDynamic && src->data && dup->dataMax) {
+        dup->data = (unsigned char*)
+            XMALLOC(src->dataMax,NULL,DYNAMIC_TYPE_OPENSSL);
+        if (dup->data == NULL) {
+            wolfSSL_ASN1_INTEGER_free(dup);
+            return NULL;
+        }
+        XMEMCPY(dup->data,src->data,dup->dataMax);
+    }
+    return dup;
+}
+
+
+/* sets the value of WOLFSSL_ASN1_INTEGER a to the long value v. */
+int wolfSSL_ASN1_INTEGER_set(WOLFSSL_ASN1_INTEGER *a, long v)
+{
+    int ret = WOLFSSL_SUCCESS; /* return 1 for success and 0 for failure */
+    int j;
+    unsigned int i = 0;
+    unsigned char tmp[sizeof(long)+1] = {0};
+
+    if (a != NULL) {
+        /* dynamically create data buffer, +2 for type and length */
+        a->data = (unsigned char*)XMALLOC((sizeof(long)+1) + 2, NULL,
+                DYNAMIC_TYPE_OPENSSL);
+        if (a->data == NULL) {
+            wolfSSL_ASN1_INTEGER_free(a);
+            ret = WOLFSSL_FAILURE;
+        }
+        else {
+            a->dataMax   = (int)(sizeof(long)+1) + 2;
+            a->isDynamic = 1;
+        }
+    }
+    else {
+        /* Invalid parameter */
+        ret = WOLFSSL_FAILURE;
+    }
+
+
+    if (ret != WOLFSSL_FAILURE) {
+        /* Set type */
+        a->data[i++] = ASN_INTEGER;
+
+        /* Check for negative */
+        if (v < 0) {
+            a->negative = 1;
+            v *= -1;
+        }
+
+        /* Create char buffer */
+        for (j = 0; j < (int)sizeof(long); j++) {
+            if (v == 0) {
+                break;
+            }
+            tmp[j] = (unsigned char)(v & 0xff);
+            v >>= 8;
+        }
+
+        /* Set length */
+        a->data[i++] = (unsigned char)((j == 0) ? ++j : j);
+        /* +2 for type and length */
+        a->length = j + 2;
+
+        /* Copy to data */
+        for (; j > 0; j--) {
+            a->data[i++] = tmp[j-1];
+        }
+    }
+
+    return ret;
+}
+
+
 WOLFSSL_ASN1_INTEGER* wolfSSL_X509_get_serialNumber(WOLFSSL_X509* x509)
 {
     WOLFSSL_ASN1_INTEGER* a;
@@ -18620,7 +23558,7 @@
 
     /* Make sure there is space for the data, ASN.1 type and length. */
     if (x509->serialSz > (WOLFSSL_ASN1_INTEGER_MAX - 2)) {
-        /* dynamicly create data buffer, +2 for type and length */
+        /* dynamically create data buffer, +2 for type and length */
         a->data = (unsigned char*)XMALLOC(x509->serialSz + 2, NULL,
                 DYNAMIC_TYPE_OPENSSL);
         if (a->data == NULL) {
@@ -18629,11 +23567,23 @@
         }
         a->dataMax   = x509->serialSz + 2;
         a->isDynamic = 1;
-    }
-
-    a->data[i++] = ASN_INTEGER;
-    i += SetLength(x509->serialSz, a->data + i);
-    XMEMCPY(&a->data[i], x509->serial, x509->serialSz);
+    } else {
+        /* Use array instead of dynamic memory */
+        a->data    = a->intData;
+        a->dataMax = WOLFSSL_ASN1_INTEGER_MAX;
+    }
+
+    #ifdef WOLFSSL_QT
+        XMEMCPY(&a->data[i], x509->serial, x509->serialSz);
+        a->length = x509->serialSz;
+    #else
+        a->data[i++] = ASN_INTEGER;
+        i += SetLength(x509->serialSz, a->data + i);
+        XMEMCPY(&a->data[i], x509->serial, x509->serialSz);
+        a->length = x509->serialSz + 2;
+    #endif
+
+    x509->serialNumber = a;
 
     return a;
 }
@@ -18642,6 +23592,7 @@
 
 #if defined(WOLFSSL_MYSQL_COMPATIBLE) || defined(WOLFSSL_NGINX) || \
     defined(WOLFSSL_HAPROXY) || defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+#ifndef NO_ASN_TIME
 int wolfSSL_ASN1_TIME_print(WOLFSSL_BIO* bio, const WOLFSSL_ASN1_TIME* asnTime)
 {
     char buf[MAX_TIME_STRING_SZ];
@@ -18669,13 +23620,8 @@
     return ret;
 }
 
-
 char* wolfSSL_ASN1_TIME_to_string(WOLFSSL_ASN1_TIME* t, char* buf, int len)
 {
-    int format;
-    int dateLen;
-    byte* date = (byte*)t;
-
     WOLFSSL_ENTER("wolfSSL_ASN1_TIME_to_string");
 
     if (t == NULL || buf == NULL || len < 5) {
@@ -18683,113 +23629,24 @@
         return NULL;
     }
 
-    format  = *date; date++;
-    dateLen = *date; date++;
-    if (dateLen > len) {
+    if (t->length > len) {
         WOLFSSL_MSG("Length of date is longer then buffer");
         return NULL;
     }
 
-    if (!GetTimeString(date, format, buf, len)) {
+    if (!GetTimeString(t->data, t->type, buf, len)) {
         return NULL;
     }
 
     return buf;
 }
+#endif /* !NO_ASN_TIME */
 #endif /* WOLFSSL_MYSQL_COMPATIBLE || WOLFSSL_NGINX || WOLFSSL_HAPROXY ||
     OPENSSL_EXTRA*/
 
 
 #ifdef OPENSSL_EXTRA
 
-#if !defined(NO_ASN_TIME) && !defined(USER_TIME) && \
-    !defined(TIME_OVERRIDES) && !defined(NO_FILESYSTEM)
-
-WOLFSSL_ASN1_TIME* wolfSSL_ASN1_TIME_adj(WOLFSSL_ASN1_TIME *s, time_t t,
-                                    int offset_day, long offset_sec)
-{
-    const time_t sec_per_day = 24*60*60;
-    struct tm* ts = NULL;
-    struct tm* tmpTime = NULL;
-    time_t t_adj = 0;
-    time_t offset_day_sec = 0;
-
-#if defined(NEED_TMP_TIME)
-    struct tm tmpTimeStorage;
-    tmpTime = &tmpTimeStorage;
-#else
-    (void)tmpTime;
-#endif
-
-    WOLFSSL_ENTER("wolfSSL_ASN1_TIME_adj");
-
-    if (s == NULL){
-        s = (WOLFSSL_ASN1_TIME*)XMALLOC(sizeof(WOLFSSL_ASN1_TIME), NULL,
-                                        DYNAMIC_TYPE_OPENSSL);
-        if (s == NULL){
-            return NULL;
-        }
-    }
-
-    /* compute GMT time with offset */
-    offset_day_sec = offset_day * sec_per_day;
-    t_adj          = t + offset_day_sec + offset_sec;
-    ts             = (struct tm *)XGMTIME(&t_adj, tmpTime);
-    if (ts == NULL){
-        WOLFSSL_MSG("failed to get time data.");
-        XFREE(s, NULL, DYNAMIC_TYPE_OPENSSL);
-        return NULL;
-    }
-
-    /* create ASN1 time notation */
-    /* UTC Time */
-    if (ts->tm_year >= 50 && ts->tm_year < 150){
-        char utc_str[ASN_UTC_TIME_SIZE];
-        int utc_year = 0,utc_mon,utc_day,utc_hour,utc_min,utc_sec;
-        byte *data_ptr = NULL;
-
-        if (ts->tm_year >= 50 && ts->tm_year < 100){
-            utc_year = ts->tm_year;
-        } else if (ts->tm_year >= 100 && ts->tm_year < 150){
-            utc_year = ts->tm_year - 100;
-        }
-        utc_mon  = ts->tm_mon + 1;
-        utc_day  = ts->tm_mday;
-        utc_hour = ts->tm_hour;
-        utc_min  = ts->tm_min;
-        utc_sec  = ts->tm_sec;
-        XSNPRINTF((char *)utc_str, ASN_UTC_TIME_SIZE,
-                  "%02d%02d%02d%02d%02d%02dZ",
-                  utc_year, utc_mon, utc_day, utc_hour, utc_min, utc_sec);
-        data_ptr  = s->data;
-        *data_ptr = (byte) ASN_UTC_TIME; data_ptr++;
-        *data_ptr = (byte) ASN_UTC_TIME_SIZE; data_ptr++;
-        XMEMCPY(data_ptr,(byte *)utc_str, ASN_UTC_TIME_SIZE);
-    /* GeneralizedTime */
-    } else {
-        char gt_str[ASN_GENERALIZED_TIME_SIZE];
-        int gt_year,gt_mon,gt_day,gt_hour,gt_min,gt_sec;
-        byte *data_ptr = NULL;
-
-        gt_year = ts->tm_year + 1900;
-        gt_mon  = ts->tm_mon + 1;
-        gt_day  = ts->tm_mday;
-        gt_hour = ts->tm_hour;
-        gt_min  = ts->tm_min;
-        gt_sec  = ts->tm_sec;
-        XSNPRINTF((char *)gt_str, ASN_GENERALIZED_TIME_SIZE,
-                  "%4d%02d%02d%02d%02d%02dZ",
-                  gt_year, gt_mon, gt_day, gt_hour, gt_min,gt_sec);
-        data_ptr  = s->data;
-        *data_ptr = (byte) ASN_GENERALIZED_TIME; data_ptr++;
-        *data_ptr = (byte) ASN_GENERALIZED_TIME_SIZE; data_ptr++;
-        XMEMCPY(data_ptr,(byte *)gt_str, ASN_GENERALIZED_TIME_SIZE);
-    }
-
-    return s;
-}
-#endif /* !NO_ASN_TIME && !USER_TIME && !TIME_OVERRIDES && !NO_FILESYSTEM */
-
 #ifndef NO_WOLFSSL_STUB
 int wolfSSL_ASN1_INTEGER_cmp(const WOLFSSL_ASN1_INTEGER* a,
                             const WOLFSSL_ASN1_INTEGER* b)
@@ -18810,18 +23667,69 @@
 }
 #endif
 
-
+/* get X509_STORE_CTX ex_data, max idx is MAX_EX_DATA */
 void* wolfSSL_X509_STORE_CTX_get_ex_data(WOLFSSL_X509_STORE_CTX* ctx, int idx)
 {
     WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_get_ex_data");
-#if defined(HAVE_EX_DATA) || defined(FORTRESS)
-    if (ctx != NULL && idx == 0)
-        return ctx->ex_data;
-#else
+    #if defined(HAVE_EX_DATA) || defined(FORTRESS)
+    if (ctx != NULL) {
+        return wolfSSL_CRYPTO_get_ex_data(&ctx->ex_data, idx);
+    }
+    #else
+    (void)ctx;
+    (void)idx;
+    #endif
+    return NULL;
+}
+
+
+/* set X509_STORE_CTX ex_data, max idx is MAX_EX_DATA. Return WOLFSSL_SUCCESS
+ * on success, WOLFSSL_FAILURE on error. */
+int wolfSSL_X509_STORE_CTX_set_ex_data(WOLFSSL_X509_STORE_CTX* ctx, int idx,
+                                       void *data)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_set_ex_data");
+    #if defined(HAVE_EX_DATA) || defined(FORTRESS)
+    if (ctx != NULL)
+    {
+        return wolfSSL_CRYPTO_set_ex_data(&ctx->ex_data, idx, data);
+    }
+    #else
     (void)ctx;
     (void)idx;
-#endif
-    return 0;
+    (void)data;
+    #endif
+    return WOLFSSL_FAILURE;
+}
+
+#if defined(WOLFSSL_APACHE_HTTPD) || defined(OPENSSL_ALL)
+void wolfSSL_X509_STORE_CTX_set_depth(WOLFSSL_X509_STORE_CTX* ctx, int depth)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_set_depth");
+    if (ctx)
+        ctx->depth = depth;
+}
+#endif
+
+
+WOLFSSL_X509* wolfSSL_X509_STORE_CTX_get0_current_issuer(
+        WOLFSSL_X509_STORE_CTX* ctx)
+{
+    int ret;
+    WOLFSSL_X509* issuer;
+
+    WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_get0_current_issuer");
+
+    if (ctx == NULL) {
+        return NULL;
+    }
+
+    ret = wolfSSL_X509_STORE_CTX_get1_issuer(&issuer, ctx, ctx->current_cert);
+    if (ret == WOLFSSL_SUCCESS) {
+        return issuer;
+    }
+
+    return NULL;
 }
 
 
@@ -18850,12 +23758,22 @@
     }
 }
 
+/* Set the error depth in the X509 STORE CTX */
+void wolfSSL_X509_STORE_CTX_set_error_depth(WOLFSSL_X509_STORE_CTX* ctx,
+                                                                      int depth)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_set_error_depth");
+
+    if (ctx != NULL) {
+        ctx->error_depth = depth;
+    }
+}
 
 /* Sets a function callback that will send information about the state of all
  * WOLFSSL objects that have been created by the WOLFSSL_CTX structure passed
  * in.
  *
- * ctx WOLFSSL_CTX structre to set callback function in
+ * ctx WOLFSSL_CTX structure to set callback function in
  * f   callback function to use
  */
 void wolfSSL_CTX_set_info_callback(WOLFSSL_CTX* ctx,
@@ -18878,6 +23796,23 @@
     return wolfSSL_ERR_peek_error_line_data(NULL, NULL, NULL, NULL);
 }
 
+int wolfSSL_ERR_GET_LIB(unsigned long err)
+{
+    switch (err) {
+    case PEM_R_NO_START_LINE:
+    case PEM_R_PROBLEMS_GETTING_PASSWORD:
+    case PEM_R_BAD_PASSWORD_READ:
+    case PEM_R_BAD_DECRYPT:
+        return ERR_LIB_PEM;
+    case EVP_R_BAD_DECRYPT:
+    case EVP_R_BN_DECODE_ERROR:
+    case EVP_R_DECODE_ERROR:
+    case EVP_R_PRIVATE_KEY_DECODE_ERROR:
+        return ERR_LIB_EVP;
+    default:
+        return 0;
+    }
+}
 
 /* This function is to find global error values that are the same through out
  * all library version. With wolfSSL having only one set of error codes the
@@ -18902,7 +23837,7 @@
     ret = 0 - ret; /* setting as negative value */
     /* wolfCrypt range is less than MAX (-100)
        wolfSSL range is MIN (-300) and lower */
-    if (ret < MAX_CODE_E) {
+    if (ret < MAX_CODE_E && ret > MIN_CODE_E) {
         return ret;
     }
     else {
@@ -18920,7 +23855,7 @@
  */
 const char* wolfSSL_alert_type_string_long(int alertID)
 {
-    WOLFSSL_ENTER("wolfSSL_aalert_type_string_long");
+    WOLFSSL_ENTER("wolfSSL_alert_type_string_long");
 
     switch (alertID) {
         case close_notify:
@@ -19014,6 +23949,13 @@
                 return illegal_parameter_str;
             }
 
+        case unknown_ca:
+            {
+                static const char unknown_ca_str[] =
+                    "unknown_ca";
+                return unknown_ca_str;
+            }
+
         case decode_error:
             {
                 static const char decode_error_str[] =
@@ -19489,6 +24431,10 @@
                     state = ss_client_finished;
                 else if (ssl->options.side == WOLFSSL_CLIENT_END)
                     state = ss_server_finished;
+                else {
+                    WOLFSSL_MSG("Unknown State");
+                    state = ss_null_state;
+                }
                 break;
             default:
                 WOLFSSL_MSG("Unknown State");
@@ -19550,35 +24496,38 @@
         return OUTPUT_STR[state][protocol][cbmode];
 }
 
-#ifndef NO_WOLFSSL_STUB
+/*
+ * Sets default PEM callback password if null is passed into
+ * the callback parameter of a PEM_read_bio_* function.
+ *
+ * Returns callback phrase size on success or WOLFSSL_FAILURE otherwise.
+ */
 int wolfSSL_PEM_def_callback(char* name, int num, int w, void* key)
 {
-    (void)name;
-    (void)num;
+    int sz;
     (void)w;
-    (void)key;
-    WOLFSSL_STUB("PEM_def_callback");
-    return 0;
-}
-#endif
-
+    WOLFSSL_ENTER("wolfSSL_PEM_def_callback");
+
+    /* We assume that the user passes a default password as userdata */
+    if (key) {
+        sz = (int)XSTRLEN((const char*)key);
+        sz = (sz > num) ? num : sz;
+        XMEMCPY(name, key, sz);
+        return sz;
+    } else {
+        WOLFSSL_MSG("Error, default password cannot be created.");
+        return WOLFSSL_FAILURE;
+    }
+}
+
+#endif /* OPENSSL_EXTRA */
+
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
 static long wolf_set_options(long old_op, long op)
 {
     /* if SSL_OP_ALL then turn all bug workarounds on */
     if ((op & SSL_OP_ALL) == SSL_OP_ALL) {
         WOLFSSL_MSG("\tSSL_OP_ALL");
-
-        op |= SSL_OP_MICROSOFT_SESS_ID_BUG;
-        op |= SSL_OP_NETSCAPE_CHALLENGE_BUG;
-        op |= SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG;
-        op |= SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG;
-        op |= SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER;
-        op |= SSL_OP_MSIE_SSLV2_RSA_PADDING;
-        op |= SSL_OP_SSLEAY_080_CLIENT_DH_BUG;
-        op |= SSL_OP_TLS_D5_BUG;
-        op |= SSL_OP_TLS_BLOCK_PADDING_BUG;
-        op |= SSL_OP_TLS_ROLLBACK_BUG;
-        op |= SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS;
     }
 
     /* by default cookie exchange is on with DTLS */
@@ -19590,9 +24539,11 @@
         WOLFSSL_MSG("\tWOLFSSL_OP_NO_SSLv2 : wolfSSL does not support SSLv2");
     }
 
+#ifdef SSL_OP_NO_TLSv1_3
     if ((op & SSL_OP_NO_TLSv1_3) == SSL_OP_NO_TLSv1_3) {
         WOLFSSL_MSG("\tSSL_OP_NO_TLSv1_3");
     }
+#endif
 
     if ((op & SSL_OP_NO_TLSv1_2) == SSL_OP_NO_TLSv1_2) {
         WOLFSSL_MSG("\tSSL_OP_NO_TLSv1_2");
@@ -19610,6 +24561,10 @@
         WOLFSSL_MSG("\tSSL_OP_NO_SSLv3");
     }
 
+    if ((op & SSL_OP_CIPHER_SERVER_PREFERENCE) == SSL_OP_CIPHER_SERVER_PREFERENCE) {
+        WOLFSSL_MSG("\tSSL_OP_CIPHER_SERVER_PREFERENCE");
+    }
+
     if ((op & SSL_OP_NO_COMPRESSION) == SSL_OP_NO_COMPRESSION) {
     #ifdef HAVE_LIBZ
         WOLFSSL_MSG("SSL_OP_NO_COMPRESSION");
@@ -19620,7 +24575,9 @@
 
     return old_op | op;
 }
-
+#endif
+
+#ifdef OPENSSL_EXTRA
 long wolfSSL_set_options(WOLFSSL* ssl, long op)
 {
     word16 haveRSA = 1;
@@ -19635,10 +24592,12 @@
 
     ssl->options.mask = wolf_set_options(ssl->options.mask, op);
 
+#ifdef SSL_OP_NO_TLSv1_3
     if ((ssl->options.mask & SSL_OP_NO_TLSv1_3) == SSL_OP_NO_TLSv1_3) {
         if (ssl->version.minor == TLSv1_3_MINOR)
             ssl->version.minor = TLSv1_2_MINOR;
     }
+#endif
 
     if ((ssl->options.mask & SSL_OP_NO_TLSv1_2) == SSL_OP_NO_TLSv1_2) {
         if (ssl->version.minor == TLSv1_2_MINOR)
@@ -19698,25 +24657,43 @@
     return ssl->options.mask;
 }
 
-/*** TBD ***/
-#ifndef NO_WOLFSSL_STUB
-WOLFSSL_API long wolfSSL_clear_num_renegotiations(WOLFSSL *s)
-{
-    (void)s;
-    WOLFSSL_STUB("SSL_clear_num_renegotiations");
-    return 0;
-}
-#endif
-
-/*** TBD ***/
-#ifndef NO_WOLFSSL_STUB
-WOLFSSL_API long wolfSSL_total_renegotiations(WOLFSSL *s)
-{
-    (void)s;
-    WOLFSSL_STUB("SSL_total_renegotiations");
-    return 0;
-}
-#endif
+
+#if defined(HAVE_SECURE_RENEGOTIATION) \
+        || defined(HAVE_SERVER_RENEGOTIATION_INFO)
+/* clears the counter for number of renegotiations done
+ * returns the current count before it is cleared */
+long wolfSSL_clear_num_renegotiations(WOLFSSL *s)
+{
+    long total;
+
+    WOLFSSL_ENTER("wolfSSL_clear_num_renegotiations");
+    if (s == NULL)
+        return 0;
+
+    total = s->secure_rene_count;
+    s->secure_rene_count = 0;
+    return total;
+}
+
+
+/* return the number of renegotiations since wolfSSL_new */
+long wolfSSL_total_renegotiations(WOLFSSL *s)
+{
+    WOLFSSL_ENTER("wolfSSL_total_renegotiations");
+    return wolfSSL_num_renegotiations(s);
+}
+
+
+/* return the number of renegotiations since wolfSSL_new */
+long wolfSSL_num_renegotiations(WOLFSSL* s)
+{
+    if (s == NULL) {
+        return 0;
+    }
+
+    return s->secure_rene_count;
+}
+#endif /* HAVE_SECURE_RENEGOTIATION || HAVE_SERVER_RENEGOTIATION_INFO */
 
 #ifndef NO_DH
 long wolfSSL_set_tmp_dh(WOLFSSL *ssl, WOLFSSL_DH *dh)
@@ -19882,51 +24859,326 @@
 }
 #endif
 
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API void *X509_get0_tbs_sigalg(const WOLFSSL_X509 *x)
-{
-    (void)x;
-    WOLFSSL_STUB("X509_get0_tbs_sigalg");
-    return NULL;
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API void X509_ALGOR_get0(WOLFSSL_ASN1_OBJECT **paobj, int *pptype, const void **ppval, const void *algor)
-{
-    (void)paobj;
-    (void)pptype;
-    (void)ppval;
-    (void)algor;
-    WOLFSSL_STUB("X509_ALGOR_get0");
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API void *X509_get_X509_PUBKEY(void * x)
-{
-    (void)x;
-    WOLFSSL_STUB("X509_get_X509_PUBKEY");
-    return NULL;
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API int X509_PUBKEY_get0_param(WOLFSSL_ASN1_OBJECT **ppkalg, const unsigned char **pk, int *ppklen, void **pa, WOLFSSL_EVP_PKEY *pub)
-{
-    (void)ppkalg;
-    (void)pk;
-    (void)ppklen;
-    (void)pa;
-    (void)pub;
-    WOLFSSL_STUB("X509_PUBKEY_get0_param");
-    return WOLFSSL_FAILURE;
-}
-#endif
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_APACHE_HTTPD) \
+    || defined(WOLFSSL_HAPROXY) || defined(WOLFSSL_WPAS)
+WOLFSSL_X509_ALGOR* wolfSSL_X509_ALGOR_new(void)
+{
+    WOLFSSL_X509_ALGOR* ret;
+    ret = (WOLFSSL_X509_ALGOR*)XMALLOC(sizeof(WOLFSSL_X509_ALGOR), NULL,
+                                       DYNAMIC_TYPE_OPENSSL);
+    if (ret) {
+        XMEMSET(ret, 0, sizeof(WOLFSSL_X509_ALGOR));
+    }
+    return ret;
+}
+
+void wolfSSL_X509_ALGOR_free(WOLFSSL_X509_ALGOR *alg)
+{
+    if (alg) {
+        wolfSSL_ASN1_OBJECT_free(alg->algorithm);
+        wolfSSL_ASN1_TYPE_free(alg->parameter);
+        XFREE(alg, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+/* Returns X509_ALGOR struct with signature algorithm */
+const WOLFSSL_X509_ALGOR* wolfSSL_X509_get0_tbs_sigalg(const WOLFSSL_X509 *x509)
+{
+    WOLFSSL_ENTER("X509_get0_tbs_sigalg");
+
+    if (x509 == NULL) {
+        WOLFSSL_MSG("x509 struct NULL error");
+        return NULL;
+    }
+
+    return &x509->algor;
+}
+
+/* Sets paobj pointer to X509_ALGOR signature algorithm */
+void wolfSSL_X509_ALGOR_get0(const WOLFSSL_ASN1_OBJECT **paobj, int *pptype,
+                            const void **ppval, const WOLFSSL_X509_ALGOR *algor)
+{
+    WOLFSSL_ENTER("X509_ALGOR_get0");
+
+    if (!algor) {
+        WOLFSSL_MSG("algor object is NULL");
+        return;
+    }
+
+    if (paobj)
+        *paobj = algor->algorithm;
+    if (ppval)
+        *ppval = algor->algorithm;
+    if (pptype) {
+        if (algor->parameter) {
+            *pptype = algor->parameter->type;
+        }
+        else {
+            /* Default to V_ASN1_OBJECT */
+            *pptype = V_ASN1_OBJECT;
+        }
+    }
+}
+
+/**
+ * Populate algor members.
+ *
+ * @param algor The object to be set
+ * @param aobj The value to be set in algor->algorithm
+ * @param ptype The type of algor->parameter
+ * @param pval The value of algor->parameter
+ * @return WOLFSSL_SUCCESS on success
+ *         WOLFSSL_FAILURE on missing parameters or bad malloc
+ */
+int wolfSSL_X509_ALGOR_set0(WOLFSSL_X509_ALGOR *algor, WOLFSSL_ASN1_OBJECT *aobj,
+                            int ptype, void *pval)
+{
+    if (!algor) {
+        return WOLFSSL_FAILURE;
+    }
+    if (aobj) {
+        algor->algorithm = aobj;
+    }
+    if (pval) {
+        if (!algor->parameter) {
+            algor->parameter = wolfSSL_ASN1_TYPE_new();
+            if (!algor->parameter) {
+                return WOLFSSL_FAILURE;
+            }
+        }
+        wolfSSL_ASN1_TYPE_set(algor->parameter, ptype, pval);
+    }
+    return WOLFSSL_SUCCESS;
+}
+
+/**
+ * Set `a` in a smart way.
+ *
+ * @param a Object to set
+ * @param type The type of object in value
+ * @param value Object to set
+ */
+void wolfSSL_ASN1_TYPE_set(WOLFSSL_ASN1_TYPE *a, int type, void *value)
+{
+    if (!a || !value) {
+        return;
+    }
+    switch (type) {
+        case V_ASN1_OBJECT:
+            a->value.object = value;
+            break;
+        case V_ASN1_UTCTIME:
+            a->value.utctime = value;
+            break;
+        case V_ASN1_GENERALIZEDTIME:
+            a->value.generalizedtime = value;
+            break;
+        default:
+            WOLFSSL_MSG("Unknown or unsupported ASN1_TYPE");
+            return;
+    }
+    a->type = type;
+}
+
+/**
+ * Allocate a new WOLFSSL_ASN1_TYPE object.
+ *
+ * @return New zero'ed WOLFSSL_ASN1_TYPE object
+ */
+WOLFSSL_ASN1_TYPE* wolfSSL_ASN1_TYPE_new(void)
+{
+    WOLFSSL_ASN1_TYPE* ret = (WOLFSSL_ASN1_TYPE*)XMALLOC(sizeof(WOLFSSL_ASN1_TYPE),
+                                                        NULL, DYNAMIC_TYPE_OPENSSL);
+    if (!ret)
+        return NULL;
+    XMEMSET(ret, 0, sizeof(WOLFSSL_ASN1_TYPE));
+    return ret;
+}
+
+/**
+ * Free WOLFSSL_ASN1_TYPE and all its members.
+ *
+ * @param at Object to free
+ */
+void wolfSSL_ASN1_TYPE_free(WOLFSSL_ASN1_TYPE* at)
+{
+    if (at) {
+        switch (at->type) {
+            case V_ASN1_OBJECT:
+                wolfSSL_ASN1_OBJECT_free(at->value.object);
+                break;
+            case V_ASN1_UTCTIME:
+                wolfSSL_ASN1_TIME_free(at->value.utctime);
+                break;
+            case V_ASN1_GENERALIZEDTIME:
+                wolfSSL_ASN1_TIME_free(at->value.generalizedtime);
+                break;
+            default:
+                WOLFSSL_MSG("Unknown or unsupported ASN1_TYPE");
+                break;
+        }
+        XFREE(at, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+/**
+ * Allocate a new WOLFSSL_X509_PUBKEY object.
+ *
+ * @return New zero'ed WOLFSSL_X509_PUBKEY object
+ */
+WOLFSSL_X509_PUBKEY *wolfSSL_X509_PUBKEY_new(void)
+{
+    WOLFSSL_X509_PUBKEY *ret;
+    ret = (WOLFSSL_X509_PUBKEY*)XMALLOC(sizeof(WOLFSSL_X509_PUBKEY), NULL,
+                                        DYNAMIC_TYPE_OPENSSL);
+    if (!ret) {
+        return NULL;
+    }
+    XMEMSET(ret, 0, sizeof(WOLFSSL_X509_PUBKEY));
+    ret->algor = wolfSSL_X509_ALGOR_new();
+    if (!ret->algor) {
+        wolfSSL_X509_PUBKEY_free(ret);
+        return NULL;
+    }
+    return ret;
+}
+
+/**
+ * Free WOLFSSL_X509_PUBKEY and all its members.
+ *
+ * @param at Object to free
+ */
+void wolfSSL_X509_PUBKEY_free(WOLFSSL_X509_PUBKEY *x)
+{
+    if (x) {
+        if (x->algor) {
+            wolfSSL_X509_ALGOR_free(x->algor);
+        }
+        if (x->pkey) {
+            wolfSSL_EVP_PKEY_free(x->pkey);
+        }
+        XFREE(x, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+/* Returns X509_PUBKEY structure containing X509_ALGOR and EVP_PKEY */
+WOLFSSL_X509_PUBKEY* wolfSSL_X509_get_X509_PUBKEY(const WOLFSSL_X509* x509)
+{
+    WOLFSSL_ENTER("X509_get_X509_PUBKEY");
+
+    if (x509 == NULL) {
+        WOLFSSL_MSG("x509 struct NULL error");
+        return NULL;
+    }
+
+    return (WOLFSSL_X509_PUBKEY*)&x509->key;
+}
+
+/* Sets ppkalg pointer to X509_PUBKEY algorithm. Returns WOLFSSL_SUCCESS on
+    success or WOLFSSL_FAILURE on error. */
+int wolfSSL_X509_PUBKEY_get0_param(WOLFSSL_ASN1_OBJECT **ppkalg,
+     const unsigned char **pk, int *ppklen, WOLFSSL_X509_ALGOR **pa,
+     WOLFSSL_X509_PUBKEY *pub)
+{
+    WOLFSSL_ENTER("X509_PUBKEY_get0_param");
+
+    if (!pub || !pub->pubKeyOID) {
+        WOLFSSL_MSG("X509_PUBKEY struct not populated");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!pub->algor) {
+        if (!(pub->algor = wolfSSL_X509_ALGOR_new())) {
+            return WOLFSSL_FAILURE;
+        }
+        pub->algor->algorithm = wolfSSL_OBJ_nid2obj(pub->pubKeyOID);
+        if (pub->algor->algorithm == NULL) {
+            WOLFSSL_MSG("Failed to create object from NID");
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+    if (pa)
+        *pa = pub->algor;
+    if (ppkalg)
+        *ppkalg = pub->algor->algorithm;
+    if (pk)
+        wolfSSL_EVP_PKEY_get_der(pub->pkey, (unsigned char **)pk);
+    if (ppklen)
+        *ppklen = wolfSSL_EVP_PKEY_get_der(pub->pkey, NULL);
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* Returns a pointer to the pkey when passed a key */
+WOLFSSL_EVP_PKEY* wolfSSL_X509_PUBKEY_get(WOLFSSL_X509_PUBKEY* key)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_PUBKEY_get");
+    if(key == NULL || key->pkey == NULL){
+        WOLFSSL_LEAVE("wolfSSL_X509_PUBKEY_get", BAD_FUNC_ARG);
+        return NULL;
+    }
+    WOLFSSL_LEAVE("wolfSSL_X509_PUBKEY_get", WOLFSSL_SUCCESS);
+    return key->pkey;
+}
+
+int wolfSSL_X509_PUBKEY_set(WOLFSSL_X509_PUBKEY **x, WOLFSSL_EVP_PKEY *key)
+{
+    WOLFSSL_X509_PUBKEY *pk = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_X509_PUBKEY_set");
+
+    if (!x || !key) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!(pk = wolfSSL_X509_PUBKEY_new())) {
+        return WOLFSSL_FAILURE;
+    }
+
+    switch (key->type) {
+#ifndef NO_RSA
+    case EVP_PKEY_RSA:
+        pk->algor->algorithm= wolfSSL_OBJ_nid2obj(RSAk);
+        break;
+#endif
+#ifndef NO_DSA
+    case EVP_PKEY_DSA:
+        pk->algor->algorithm = wolfSSL_OBJ_nid2obj(DSAk);
+        break;
+#endif
+#ifdef HAVE_ECC
+    case EVP_PKEY_EC:
+        pk->algor->algorithm = wolfSSL_OBJ_nid2obj(ECDSAk);
+        break;
+#endif
+    default:
+        WOLFSSL_MSG("Unknown key type");
+        goto error;
+    }
+
+    if (!pk->algor->algorithm) {
+        WOLFSSL_MSG("Failed to create algorithm object");
+        goto error;
+    }
+
+    if (!wolfSSL_EVP_PKEY_up_ref(key)) {
+        WOLFSSL_MSG("Failed to up key reference");
+        goto error;
+    }
+    pk->pkey = key;
+
+    wolfSSL_X509_PUBKEY_free(*x);
+    *x = pk;
+    return WOLFSSL_SUCCESS;
+error:
+    if (pk) {
+        wolfSSL_X509_PUBKEY_free(pk);
+    }
+    return WOLFSSL_FAILURE;
+}
+
+#endif /* OPENSSL_ALL || WOLFSSL_APACHE_HTTPD || WOLFSSL_HAPROXY*/
 
 #ifndef NO_WOLFSSL_STUB
 /*** TBD ***/
@@ -19950,6 +25202,48 @@
 }
 #endif
 
+/* Return number of bytes written to BIO on success. 0 on failure. */
+WOLFSSL_API int wolfSSL_i2a_ASN1_OBJECT(WOLFSSL_BIO *bp,
+                                        WOLFSSL_ASN1_OBJECT *a)
+{
+    int length = 0;
+    word32 idx = 0;
+    const char null_str[] = "NULL";
+
+    WOLFSSL_ENTER("wolfSSL_i2a_ASN1_OBJECT");
+
+    if (bp == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (a == NULL) {
+        /* Write "NULL" */
+        if (wolfSSL_BIO_write(bp, null_str, (int)XSTRLEN(null_str)) ==
+                (int)XSTRLEN(null_str)) {
+            return (int)XSTRLEN(null_str);
+        }
+        else {
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+
+    if ((a->obj == NULL) || (a->obj[idx++] != ASN_OBJECT_ID)) {
+        WOLFSSL_MSG("Bad ASN1 Object");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (GetLength((const byte*)a->obj, &idx, &length,
+                   a->objSz) < 0 || length < 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wolfSSL_BIO_write(bp, a->obj + idx, length) == (int)length) {
+        return length;
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_HAPROXY)
 #ifndef NO_WOLFSSL_STUB
 /*** TBD ***/
@@ -19995,33 +25289,20 @@
 }
 #endif
 
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API int wolfSSL_sk_SSL_CIPHER_num(const void * p)
-{
-    (void)p;
-    WOLFSSL_STUB("wolfSSL_sk_SSL_CIPHER_num");
-    return -1;
-}
-#endif
+
+int wolfSSL_sk_SSL_CIPHER_num(const WOLF_STACK_OF(WOLFSSL_CIPHER)* p)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_SSL_CIPHER_num");
+    if (p == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+    return (int)p->num;
+}
 
 #if !defined(NO_FILESYSTEM)
 #ifndef NO_WOLFSSL_STUB
 /*** TBD ***/
-WOLFSSL_API WOLFSSL_X509 *wolfSSL_PEM_read_X509(FILE *fp, WOLFSSL_X509 **x, pem_password_cb *cb, void *u)
-{
-    (void)fp;
-    (void)x;
-    (void)cb;
-    (void)u;
-    WOLFSSL_STUB("PEM_read_X509");
-    return NULL;
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PrivateKey(FILE *fp, WOLFSSL_EVP_PKEY **x, pem_password_cb *cb, void *u)
+WOLFSSL_API WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PrivateKey(XFILE fp, WOLFSSL_EVP_PKEY **x, pem_password_cb *cb, void *u)
 {
     (void)fp;
     (void)x;
@@ -20033,17 +25314,107 @@
 #endif
 #endif
 
-#ifndef NO_WOLFSSL_STUB
-/*** TBD ***/
-WOLFSSL_API int X509_STORE_load_locations(WOLFSSL_X509_STORE *ctx, const char *file, const char *dir)
-{
-    (void)ctx;
-    (void)file;
-    (void)dir;
-    WOLFSSL_STUB("X509_STORE_load_locations");
-    return WOLFSSL_FAILURE;
-}
-#endif
+#if !defined(NO_FILESYSTEM) && !defined(NO_WOLFSSL_DIR)
+/* Loads certificate(s) files in pem format into X509_STORE struct from either
+ * a file or directory.
+ * Returns WOLFSSL_SUCCESS on success or WOLFSSL_FAILURE if an error occurs.
+ */
+WOLFSSL_API int wolfSSL_X509_STORE_load_locations(WOLFSSL_X509_STORE *str,
+                                              const char *file, const char *dir)
+{
+    WOLFSSL_CTX* ctx;
+    char *name = NULL;
+    int ret = WOLFSSL_SUCCESS;
+    int successes = 0;
+#ifdef WOLFSSL_SMALL_STACK
+    ReadDirCtx* readCtx = NULL;
+#else
+    ReadDirCtx  readCtx[1];
+#endif
+
+    WOLFSSL_ENTER("X509_STORE_load_locations");
+
+    if (str == NULL || str->cm == NULL || (file == NULL  && dir == NULL))
+        return WOLFSSL_FAILURE;
+
+    /* tmp ctx for setting our cert manager */
+    ctx = wolfSSL_CTX_new(cm_pick_method());
+    if (ctx == NULL)
+        return WOLFSSL_FAILURE;
+
+    wolfSSL_CertManagerFree(ctx->cm);
+    ctx->cm = str->cm;
+
+#ifdef HAVE_CRL
+    if (str->cm->crl == NULL) {
+        if (wolfSSL_CertManagerEnableCRL(str->cm, 0) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Enable CRL failed");
+            wolfSSL_CTX_free(ctx);
+            return WOLFSSL_FAILURE;
+        }
+    }
+#endif
+
+    /* Load individual file */
+    if (file) {
+        /* Try to process file with type DETECT_CERT_TYPE to parse the
+           correct certificate header and footer type */
+        ret = ProcessFile(ctx, file, WOLFSSL_FILETYPE_PEM, DETECT_CERT_TYPE,
+                                                      NULL, 0, str->cm->crl, 0);
+        if (ret != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Failed to load file");
+            ret = WOLFSSL_FAILURE;
+        }
+    }
+
+    /* Load files in dir */
+    if (dir && ret == WOLFSSL_SUCCESS) {
+        #ifdef WOLFSSL_SMALL_STACK
+            readCtx = (ReadDirCtx*)XMALLOC(sizeof(ReadDirCtx), ctx->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+            if (readCtx == NULL) {
+                WOLFSSL_MSG("Memory error");
+                return WOLFSSL_FAILURE;
+            }
+        #endif
+
+        /* try to load each regular file in dir */
+        ret = wc_ReadDirFirst(readCtx, dir, &name);
+        while (ret == 0 && name) {
+            WOLFSSL_MSG(name);
+            /* Try to process file with type DETECT_CERT_TYPE to parse the
+               correct certificate header and footer type */
+            ret = ProcessFile(ctx, name, WOLFSSL_FILETYPE_PEM, DETECT_CERT_TYPE,
+                                                      NULL, 0, str->cm->crl, 0);
+            /* Not failing on load errors */
+            if (ret != WOLFSSL_SUCCESS)
+                WOLFSSL_MSG("Failed to load file in path, continuing");
+            else
+                successes++;
+
+            ret = wc_ReadDirNext(readCtx, dir, &name);
+        }
+        wc_ReadDirClose(readCtx);
+
+        /* Success if at least one file in dir was loaded */
+        if (successes > 0)
+            ret = WOLFSSL_SUCCESS;
+        else {
+            WOLFSSL_ERROR(ret);
+            ret = WOLFSSL_FAILURE;
+        }
+
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(readCtx, ctx->heap, DYNAMIC_TYPE_DIRCTX);
+        #endif
+    }
+
+    ctx->cm = NULL;
+    wolfSSL_CTX_free(ctx);
+
+    return ret;
+}
+#endif /* !NO_FILESYSTEM && !NO_WOLFSSL_DIR */
 
 #ifndef NO_WOLFSSL_STUB
 /*** TBD ***/
@@ -20083,6 +25454,9 @@
 }
 #endif /* HAVE_OCSP */
 
+#endif /* OPENSSL_EXTRA */
+
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
 long wolfSSL_get_verify_result(const WOLFSSL *ssl)
 {
     if (ssl == NULL) {
@@ -20091,7 +25465,9 @@
 
     return ssl->peerVerifyRet;
 }
-
+#endif
+
+#ifdef OPENSSL_EXTRA
 
 #ifndef NO_WOLFSSL_STUB
 /* shows the number of accepts attempted by CTX in it's lifetime */
@@ -20253,9 +25629,11 @@
     }
 
     if (ctx->certificate == NULL) {
+        WOLFSSL_ENTER("wolfSSL_use_certificate_chain_buffer_format");
+
         /* Process buffer makes first certificate the leaf. */
         ret = ProcessBuffer(ctx, der, derSz, WOLFSSL_FILETYPE_ASN1, CERT_TYPE,
-                            NULL, NULL, 1);
+                            NULL, NULL, 1, GET_VERIFY_SETTING_CTX(ctx));
         if (ret != WOLFSSL_SUCCESS) {
             WOLFSSL_LEAVE("wolfSSL_CTX_add_extra_chain_cert", ret);
             return WOLFSSL_FAILURE;
@@ -20269,7 +25647,8 @@
             return WOLFSSL_FAILURE;
         }
         XMEMCPY(derBuffer->buffer, der, derSz);
-        ret = AddCA(ctx->cm, &derBuffer, WOLFSSL_USER_CA, !ctx->verifyNone);
+        ret = AddCA(ctx->cm, &derBuffer, WOLFSSL_USER_CA,
+            GET_VERIFY_SETTING_CTX(ctx));
         if (ret != WOLFSSL_SUCCESS) {
             WOLFSSL_LEAVE("wolfSSL_CTX_add_extra_chain_cert", ret);
             return WOLFSSL_FAILURE;
@@ -20432,7 +25811,6 @@
         return -2;
     }
     else {
-        word32 i;
         word32 sz = sizeof(WOLFSSL_DES_key_schedule);
 
         /* sanity check before call to DES_check */
@@ -20442,19 +25820,9 @@
         }
 
         /* check odd parity */
-        for (i = 0; i < sz; i++) {
-            unsigned char c = *((unsigned char*)myDes + i);
-            if (((c & 0x01) ^
-                ((c >> 1) & 0x01) ^
-                ((c >> 2) & 0x01) ^
-                ((c >> 3) & 0x01) ^
-                ((c >> 4) & 0x01) ^
-                ((c >> 5) & 0x01) ^
-                ((c >> 6) & 0x01) ^
-                ((c >> 7) & 0x01)) != 1) {
-                WOLFSSL_MSG("Odd parity test fail");
-                return -1;
-            }
+        if (wolfSSL_DES_check_key_parity(myDes) != 1) {
+            WOLFSSL_MSG("Odd parity test fail");
+            return -1;
         }
 
         if (wolfSSL_DES_is_weak_key(myDes) == 1) {
@@ -20568,7 +25936,7 @@
     WOLFSSL_ENTER("wolfSSL_DES_set_odd_parity");
 
     for (i = 0; i < sz; i++) {
-        unsigned char c = *((unsigned char*)myDes + i);
+        unsigned char c = (*myDes)[i];
         if ((
             ((c >> 1) & 0x01) ^
             ((c >> 2) & 0x01) ^
@@ -20576,16 +25944,38 @@
             ((c >> 4) & 0x01) ^
             ((c >> 5) & 0x01) ^
             ((c >> 6) & 0x01) ^
-            ((c >> 7) & 0x01)) != 1) {
-            WOLFSSL_MSG("Setting odd parity bit");
-            *((unsigned char*)myDes + i) = *((unsigned char*)myDes + i) | 0x01;
-        }
-    }
-}
-
+            ((c >> 7) & 0x01)) == (c & 0x01)) {
+            WOLFSSL_MSG("Flipping parity bit");
+            (*myDes)[i] = c ^ 0x01;
+        }
+    }
+}
+
+int wolfSSL_DES_check_key_parity(WOLFSSL_DES_cblock *myDes)
+{
+    word32 i;
+    word32 sz = sizeof(WOLFSSL_DES_cblock);
+
+    WOLFSSL_ENTER("wolfSSL_DES_check_key_parity");
+
+    for (i = 0; i < sz; i++) {
+        unsigned char c = (*myDes)[i];
+        if ((
+            ((c >> 1) & 0x01) ^
+            ((c >> 2) & 0x01) ^
+            ((c >> 3) & 0x01) ^
+            ((c >> 4) & 0x01) ^
+            ((c >> 5) & 0x01) ^
+            ((c >> 6) & 0x01) ^
+            ((c >> 7) & 0x01)) == (c & 0x01)) {
+            return 0;
+        }
+    }
+    return 1;
+}
 
 #ifdef WOLFSSL_DES_ECB
-/* Encrpyt or decrypt input message desa with key and get output in desb.
+/* Encrypt or decrypt input message desa with key and get output in desb.
  * if enc is DES_ENCRYPT,input message is encrypted or
  * if enc is DES_DECRYPT,input message is decrypted.
  * */
@@ -20608,7 +25998,7 @@
         if (enc){
             if (wc_Des_EcbEncrypt(&myDes, (byte*) desb, (const byte*) desa,
                         sizeof(WOLFSSL_DES_cblock)) != 0){
-                WOLFSSL_MSG("wc_Des_EcbEncrpyt return error.");
+                WOLFSSL_MSG("wc_Des_EcbEncrypt return error.");
             }
         } else {
             if (wc_Des_EcbDecrypt(&myDes, (byte*) desb, (const byte*) desa,
@@ -20619,7 +26009,6 @@
     }
 }
 #endif
-
 #endif /* NO_DES3 */
 
 #ifndef NO_RC4
@@ -20769,7 +26158,7 @@
 #ifdef HAVE_AES_ECB
 /* Encrypt/decrypt a 16 byte block of data using the key passed in.
  *
- * in  buffer to encrypt/decyrpt
+ * in  buffer to encrypt/decrypt
  * out buffer to hold result of encryption/decryption
  * key AES structure to use with encryption/decryption
  * enc AES_ENCRPT for encryption and AES_DECRYPT for decryption
@@ -20804,11 +26193,11 @@
 }
 #endif /* HAVE_AES_ECB */
 
-
+#ifdef HAVE_AES_CBC
 /* Encrypt data using key and iv passed in. iv gets updated to most recent iv
- * state after encryptiond/decryption.
- *
- * in  buffer to encrypt/decyrpt
+ * state after encryption/decryption.
+ *
+ * in  buffer to encrypt/decrypt
  * out buffer to hold result of encryption/decryption
  * len length of input buffer
  * key AES structure to use with encryption/decryption
@@ -20822,7 +26211,7 @@
 
     WOLFSSL_ENTER("wolfSSL_AES_cbc_encrypt");
 
-    if (key == NULL || in == NULL || out == NULL || iv == NULL) {
+    if (key == NULL || in == NULL || out == NULL || iv == NULL || len == 0) {
         WOLFSSL_MSG("Error, Null argument passed in");
         return;
     }
@@ -20847,12 +26236,13 @@
     /* to be compatible copy iv to iv buffer after completing operation */
     XMEMCPY(iv, (byte*)(aes->reg), AES_BLOCK_SIZE);
 }
+#endif /* HAVE_AES_CBC */
 
 
 /* Encrypt data using CFB mode with key and iv passed in. iv gets updated to
- * most recent iv state after encryptiond/decryption.
- *
- * in  buffer to encrypt/decyrpt
+ * most recent iv state after encryption/decryption.
+ *
+ * in  buffer to encrypt/decrypt
  * out buffer to hold result of encryption/decryption
  * len length of input buffer
  * key AES structure to use with encryption/decryption
@@ -20910,25 +26300,185 @@
 }
 #endif /* NO_AES */
 
-#ifndef NO_WOLFSSL_STUB
+#ifndef NO_FILESYSTEM
+    #ifdef __clang__
+        #pragma clang diagnostic push
+        #pragma clang diagnostic ignored "-Wformat-nonliteral"
+    #endif
+#endif
+
+#if !defined(NO_FILESYSTEM) && defined (OPENSSL_EXTRA)
+/* returns amount printed on success, negative in fail case */
+int wolfSSL_BIO_vprintf(WOLFSSL_BIO* bio, const char* format, va_list args)
+{
+    int ret = -1;
+
+    if (bio == NULL)
+        return WOLFSSL_FATAL_ERROR;
+
+    switch (bio->type) {
+        case WOLFSSL_BIO_FILE:
+            if (bio->ptr == NULL) {
+                va_end(args);
+                return -1;
+            }
+            ret = vfprintf((XFILE)bio->ptr, format, args);
+            break;
+
+        case WOLFSSL_BIO_MEMORY:
+    #if defined(OPENSSL_EXTRA) && !defined(_WIN32)
+        case WOLFSSL_BIO_SSL:
+            {
+                int count;
+                char* pt = NULL;
+                va_list copy;
+
+                va_copy(copy, args);
+                count = vsnprintf(NULL, 0, format, args);
+                if (count >= 0)
+                {
+                    pt = (char*)XMALLOC(count + 1, bio->heap,
+                                        DYNAMIC_TYPE_TMP_BUFFER);
+                    if (pt != NULL)
+                    {
+                        count = vsnprintf(pt, count + 1, format, copy);
+                        if (count >= 0)
+                        {
+                            ret = wolfSSL_BIO_write(bio, pt, count);
+                        }
+                        XFREE(pt, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    }
+                }
+                va_end(copy);
+            }
+            break;
+    #endif
+
+        default:
+            WOLFSSL_MSG("Unsupported WOLFSSL_BIO type for wolfSSL_BIO_printf");
+            break;
+    }
+
+    return ret;
+}
+
+/* returns amount printed on success, negative in fail case */
 int wolfSSL_BIO_printf(WOLFSSL_BIO* bio, const char* format, ...)
 {
-    (void)bio;
-    (void)format;
-    WOLFSSL_STUB("BIO_printf");
-    return 0;
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
+    int ret;
+    va_list args;
+    va_start(args, format);
+
+    ret = wolfSSL_BIO_vprintf(bio, format, args);
+
+    va_end(args);
+
+    return ret;
+}
+
+#endif /* !defined(NO_FILESYSTEM) && defined (OPENSSL_EXTRA) */
+
+#if !defined(NO_FILESYSTEM) && defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#undef  LINE_LEN
+#define LINE_LEN 16
+int wolfSSL_BIO_dump(WOLFSSL_BIO *bio, const char *buf, int length)
+{
+    int ret = 0;
+
+    if (bio == NULL)
+        return 0;
+
+#ifndef NO_FILESYSTEM
+    if (bio->type == WOLFSSL_BIO_FILE) {
+        int i;
+        char line[80];
+
+        if (!buf) {
+            return fputs("\tNULL", (XFILE)bio->ptr);
+        }
+
+        sprintf(line, "\t");
+        for (i = 0; i < LINE_LEN; i++) {
+            if (i < length)
+                sprintf(line + 1 + i * 3,"%02x ", buf[i]);
+            else
+                sprintf(line + 1 + i * 3, "   ");
+        }
+        sprintf(line + 1 + LINE_LEN * 3, "| ");
+        for (i = 0; i < LINE_LEN; i++) {
+            if (i < length) {
+                sprintf(line + 3 + LINE_LEN * 3 + i,
+                     "%c", 31 < buf[i] && buf[i] < 127 ? buf[i] : '.');
+            }
+        }
+        ret += fputs(line, (XFILE)bio->ptr);
+
+        if (length > LINE_LEN)
+            ret += wolfSSL_BIO_dump(bio, buf + LINE_LEN, length - LINE_LEN);
+    }
+#else
+    (void)buf;
+    (void)length;
+#endif
+
+    return ret;
+}
+
+#ifndef NO_ASN_TIME
 int wolfSSL_ASN1_UTCTIME_print(WOLFSSL_BIO* bio, const WOLFSSL_ASN1_UTCTIME* a)
 {
-    (void)bio;
-    (void)a;
-    WOLFSSL_STUB("ASN1_UTCTIME_print");
-    return 0;
-}
-#endif
+    WOLFSSL_ENTER("ASN1_UTCTIME_print");
+    if (bio == NULL || a == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+    if (a->type != ASN_UTC_TIME) {
+        WOLFSSL_MSG("Error, not UTC_TIME");
+        return WOLFSSL_FAILURE;
+    }
+
+    return wolfSSL_ASN1_TIME_print(bio, a);
+}
+
+/* Checks the ASN1 syntax of "a"
+ * returns WOLFSSL_SUCCESS (1)  if correct otherwise WOLFSSL_FAILURE (0) */
+int wolfSSL_ASN1_TIME_check(const WOLFSSL_ASN1_TIME* a)
+{
+    char buf[MAX_TIME_STRING_SZ];
+
+    WOLFSSL_ENTER("wolfSSL_ASN1_TIME_check");
+
+    /* if can parse the WOLFSSL_ASN1_TIME passed in then consider syntax good */
+    if (wolfSSL_ASN1_TIME_to_string((WOLFSSL_ASN1_TIME*)a, buf,
+                MAX_TIME_STRING_SZ) == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_ASN_TIME */
+
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_ASN1_TIME_diff(int *pday, int *psec,
+                   const WOLFSSL_ASN1_TIME *from, const WOLFSSL_ASN1_TIME *to)
+{
+    WOLFSSL_STUB("wolfSSL_ASN1_TIME_diff");
+    (void)pday;
+    (void)psec;
+    (void)from;
+    (void)to;
+    return 0;
+}
+
+WOLFSSL_API WOLFSSL_ASN1_TIME *wolfSSL_ASN1_TIME_set(WOLFSSL_ASN1_TIME *s, time_t t)
+{
+    WOLFSSL_STUB("wolfSSL_ASN1_TIME_set");
+    (void)s;
+    (void)t;
+    return s;
+}
+#endif /* !NO_WOLFSSL_STUB */
 
 /* Return the month as a string.
  *
@@ -20946,12 +26496,17 @@
 int wolfSSL_ASN1_GENERALIZEDTIME_print(WOLFSSL_BIO* bio,
     const WOLFSSL_ASN1_GENERALIZEDTIME* asnTime)
 {
-    const char* p = (const char *)(asnTime->data + 2);
+    const char* p;
     WOLFSSL_ENTER("wolfSSL_ASN1_GENERALIZEDTIME_print");
 
     if (bio == NULL || asnTime == NULL)
         return BAD_FUNC_ARG;
 
+    if (asnTime->type != ASN_GENERALIZED_TIME) {
+        WOLFSSL_MSG("Error, not GENERALIZED_TIME");
+        return WOLFSSL_FAILURE;
+    }
+    p = (const char *)(asnTime->data);
     /* GetTimeString not always available. */
     wolfSSL_BIO_write(bio, MonthStr(p + 4), 3);
     wolfSSL_BIO_write(bio, " ", 1);
@@ -20980,33 +26535,322 @@
     XMEMSET(asn1Time->data, 0, sizeof(asn1Time->data));
 }
 
-int  wolfSSL_sk_num(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk)
+int wolfSSL_sk_num(WOLFSSL_STACK* sk)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_num");
+    if (sk == NULL)
+        return 0;
+    return (int)sk->num;
+}
+
+void* wolfSSL_sk_value(WOLFSSL_STACK* sk, int i)
+{
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    int offset = i;
+#endif
+    WOLFSSL_ENTER("wolfSSL_sk_value");
+
+    for (; sk != NULL && i > 0; i--)
+        sk = sk->next;
+    if (sk == NULL)
+        return NULL;
+
+    switch (sk->type) {
+        case STACK_TYPE_X509:
+            return (void*)sk->data.x509;
+        case STACK_TYPE_CIPHER:
+        #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+            sk->data.cipher.offset = offset;
+        #endif
+            return (void*)&sk->data.cipher;
+        case STACK_TYPE_GEN_NAME:
+            return (void*)sk->data.gn;
+        case STACK_TYPE_ACCESS_DESCRIPTION:
+            return (void*)sk->data.access;
+        case STACK_TYPE_OBJ:
+            return (void*)sk->data.obj;
+        case STACK_TYPE_X509_EXT:
+            return (void*)sk->data.ext;
+        case STACK_TYPE_CONF_VALUE:
+            return (void*)sk->data.conf->value;
+        case STACK_TYPE_NULL:
+        default:
+            return (void*)sk->data.generic;
+    }
+}
+
+/* Free the structure for ASN1_OBJECT stack */
+void wolfSSL_sk_free(WOLFSSL_STACK* sk)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_free");
+
+    if (sk == NULL) {
+        WOLFSSL_MSG("Error, BAD_FUNC_ARG");
+        return;
+    }
+
+    switch (sk->type) {
+        case STACK_TYPE_X509:
+            wolfSSL_sk_X509_free(sk);
+            break;
+        #if defined(OPENSSL_ALL)
+        case STACK_TYPE_CIPHER:
+            wolfSSL_sk_CIPHER_free(sk);
+            break;
+        #endif
+        case STACK_TYPE_GEN_NAME:
+            wolfSSL_sk_GENERAL_NAME_free(sk);
+            break;
+        #if defined(OPENSSL_ALL) || defined (WOLFSSL_QT)
+        case STACK_TYPE_ACCESS_DESCRIPTION:
+            wolfSSL_sk_ACCESS_DESCRIPTION_free(sk);
+            break;
+        #endif
+        case STACK_TYPE_OBJ:
+            wolfSSL_sk_ASN1_OBJECT_free(sk);
+            break;
+        #ifdef OPENSSL_ALL
+        case STACK_TYPE_X509_INFO:
+            wolfSSL_sk_X509_INFO_free(sk);
+            break;
+        case STACK_TYPE_X509_NAME:
+            wolfSSL_sk_X509_NAME_free(sk);
+            break;
+        case STACK_TYPE_CONF_VALUE:
+            wolfSSL_sk_CONF_VALUE_free(sk);
+            break;
+        #endif
+        case STACK_TYPE_NULL:
+        default:
+            wolfSSL_sk_GENERIC_free(sk);
+    }
+}
+/* Frees each node in the stack and frees the stack.
+ * Does not free any internal members of the stack nodes.
+ */
+void wolfSSL_sk_GENERIC_pop_free(WOLFSSL_STACK* sk,
+    void (*f) (void*))
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_STACK* tmp;
+    WOLFSSL_ENTER("wolfSSL_sk_GENERIC_pop_free");
+
+    if (sk == NULL)
+        return;
+
+    /* parse through stack freeing each node */
+    node = sk->next;
+    while (node) {
+        tmp  = node;
+        node = node->next;
+        if (f)
+            f(tmp->data.generic);
+        tmp->data.generic = NULL;
+        XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+
+    /* free head of stack */
+    XFREE(sk, NULL, DYNAMIC_TYPE_ASN1);
+}
+
+/* return 1 on success 0 on fail */
+int wolfSSL_sk_GENERIC_push(WOLFSSL_STACK* sk, void* generic)
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_sk_GENERIC_push");
+
+    if (sk == NULL || generic == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* no previous values in stack */
+    if (sk->data.generic == NULL) {
+        sk->data.generic = generic;
+        sk->num += 1;
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* stack already has value(s) create a new node and add more */
+    node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK),NULL,DYNAMIC_TYPE_SSL);
+    if (node == NULL) {
+        WOLFSSL_MSG("Memory error");
+        return WOLFSSL_FAILURE;
+    }
+    XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+    /* push new node onto head of stack */
+    node->type         = sk->type;
+    node->data.generic = sk->data.generic;
+    node->next         = sk->next;
+    sk->next           = node;
+    sk->data.generic   = generic;
+    sk->num           += 1;
+
+    return WOLFSSL_SUCCESS;
+}
+void wolfSSL_sk_GENERIC_free(WOLFSSL_STACK* sk)
+{
+    wolfSSL_sk_GENERIC_pop_free(sk, NULL);
+}
+
+
+/* Free all nodes in a stack */
+void wolfSSL_sk_pop_free(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk,
+                                                       wolfSSL_sk_freefunc func)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_pop_free");
+
+    if (sk == NULL) {
+        WOLFSSL_MSG("Error, BAD_FUNC_ARG");
+        return;
+    }
+
+    switch(sk->type) {
+        #if defined(OPENSSL_ALL) || defined (WOLFSSL_QT)
+        case STACK_TYPE_ACCESS_DESCRIPTION:
+            wolfSSL_sk_ACCESS_DESCRIPTION_pop_free(sk,
+                                                wolfSSL_ACCESS_DESCRIPTION_free);
+            break;
+        #endif
+        case STACK_TYPE_X509:
+            wolfSSL_sk_X509_pop_free(sk,(void (*)(WOLFSSL_X509*))func);
+            break;
+        case STACK_TYPE_OBJ:
+            wolfSSL_sk_ASN1_OBJECT_pop_free(sk,
+                                          (void (*)(WOLFSSL_ASN1_OBJECT*))func);
+            break;
+        case STACK_TYPE_GEN_NAME:
+            wolfSSL_sk_GENERAL_NAME_pop_free(sk,
+                                         (void (*)(WOLFSSL_GENERAL_NAME*))func);
+            break;
+        #ifdef OPENSSL_ALL
+        case STACK_TYPE_X509_NAME:
+            wolfSSL_sk_X509_NAME_pop_free(sk,
+                                         (void (*)(WOLFSSL_X509_NAME*))func);
+            break;
+        case STACK_TYPE_X509_EXT:
+            wolfSSL_sk_X509_EXTENSION_pop_free(sk,
+                                       (void (*)(WOLFSSL_X509_EXTENSION*))func);
+            break;
+        #endif
+        #if defined(OPENSSL_ALL)
+        case STACK_TYPE_X509_INFO:
+            wolfSSL_sk_X509_INFO_pop_free(sk,
+                                            (void (*)(WOLFSSL_X509_INFO*))func);
+            break;
+        #endif
+        default:
+            wolfSSL_sk_GENERIC_pop_free(sk,
+                                          (void (*)(void*))func);
+            break;
+    }
+}
+
+#if defined(OPENSSL_ALL)
+/* Free the structure for WOLFSSL_CONF_VALUE stack
+ *
+ * sk  stack to free nodes in
+ */
+void wolfSSL_sk_CONF_VALUE_free(WOLF_STACK_OF(WOLFSSL_CONF_VALUE)* sk)
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_STACK* tmp;
+    WOLFSSL_ENTER("wolfSSL_sk_CONF_VALUE_free");
+
+    if (sk == NULL)
+        return;
+
+    /* parse through stack freeing each node */
+    node = sk->next;
+    while (node) {
+        tmp  = node;
+        node = node->next;
+        XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+
+    /* free head of stack */
+    XFREE(sk, NULL, DYNAMIC_TYPE_ASN1);
+}
+#endif
+
+/* Creates and returns a new null stack. */
+WOLFSSL_STACK* wolfSSL_sk_new_null(void)
+{
+    WOLFSSL_STACK* sk;
+    WOLFSSL_ENTER("wolfSSL_sk_new_null");
+
+    sk = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                 DYNAMIC_TYPE_OPENSSL);
+    if (sk == NULL) {
+        WOLFSSL_MSG("WOLFSSL_STACK memory error");
+        return NULL;
+    }
+
+    XMEMSET(sk, 0, sizeof(WOLFSSL_STACK));
+    sk->type = STACK_TYPE_NULL;
+
+    return sk;
+}
+
+/* frees the wolfSSL_BASIC_CONSTRAINTS object */
+void wolfSSL_BASIC_CONSTRAINTS_free(WOLFSSL_BASIC_CONSTRAINTS *bc)
+{
+    WOLFSSL_ENTER("wolfSSL_BASIC_CONSTRAINTS_free");
+    if (bc == NULL) {
+        WOLFSSL_MSG("Argument is NULL");
+        return;
+    }
+    if (bc->pathlen) {
+        wolfSSL_ASN1_INTEGER_free(bc->pathlen);
+    }
+    XFREE(bc, NULL, DYNAMIC_TYPE_OPENSSL);
+}
+
+/* frees the wolfSSL_AUTHORITY_KEYID object */
+void wolfSSL_AUTHORITY_KEYID_free(WOLFSSL_AUTHORITY_KEYID *id)
+{
+    WOLFSSL_ENTER("wolfSSL_AUTHORITY_KEYID_free");
+    if(id == NULL) {
+        WOLFSSL_MSG("Argument is NULL");
+        return;
+    }
+    if (id->keyid) {
+        wolfSSL_ASN1_STRING_free(id->keyid);
+    }
+    if (id->issuer) {
+        wolfSSL_ASN1_OBJECT_free(id->issuer);
+    }
+    if (id->serial) {
+        wolfSSL_ASN1_INTEGER_free(id->serial);
+    }
+    XFREE(id, NULL, DYNAMIC_TYPE_OPENSSL);
+}
+
+int wolfSSL_sk_SSL_COMP_num(WOLF_STACK_OF(WOLFSSL_COMP)* sk)
 {
     if (sk == NULL)
         return 0;
     return (int)sk->num;
 }
 
-void* wolfSSL_sk_value(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk, int i)
-{
-    for (; sk != NULL && i > 0; i--)
-        sk = sk->next;
-    if (sk == NULL)
-        return NULL;
-    return (void*)sk->data.obj;
-}
-
 #endif /* OPENSSL_EXTRA */
 
 #if defined(OPENSSL_EXTRA) || defined(HAVE_EXT_CACHE)
-/* stunnel 4.28 needs */
+/* stunnel 4.28 needs
+ *
+ * Callback that is called if a session tries to resume but could not find
+ * the session to resume it.
+ */
 void wolfSSL_CTX_sess_set_get_cb(WOLFSSL_CTX* ctx,
                     WOLFSSL_SESSION*(*f)(WOLFSSL*, unsigned char*, int, int*))
 {
+    if (ctx == NULL)
+        return;
+
 #ifdef HAVE_EXT_CACHE
     ctx->get_sess_cb = f;
 #else
-    (void)ctx;
     (void)f;
 #endif
 }
@@ -21014,10 +26858,12 @@
 void wolfSSL_CTX_sess_set_new_cb(WOLFSSL_CTX* ctx,
                              int (*f)(WOLFSSL*, WOLFSSL_SESSION*))
 {
+    if (ctx == NULL)
+        return;
+
 #ifdef HAVE_EXT_CACHE
     ctx->new_sess_cb = f;
 #else
-    (void)ctx;
     (void)f;
 #endif
 }
@@ -21025,10 +26871,12 @@
 void wolfSSL_CTX_sess_set_remove_cb(WOLFSSL_CTX* ctx, void (*f)(WOLFSSL_CTX*,
                                                         WOLFSSL_SESSION*))
 {
+    if (ctx == NULL)
+        return;
+
 #ifdef HAVE_EXT_CACHE
     ctx->rem_sess_cb = f;
 #else
-    (void)ctx;
     (void)f;
 #endif
 }
@@ -21065,21 +26913,46 @@
     size += OPAQUE8_LEN;
     for (i = 0; i < sess->chain.count; i++)
         size += OPAQUE16_LEN + sess->chain.certs[i].length;
-    /* Protocol version + cipher suite */
-    size += OPAQUE16_LEN + OPAQUE16_LEN;
+#endif
+#if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
+                               defined(HAVE_SESSION_TICKET))
+    /* Protocol version */
+    size += OPAQUE16_LEN;
+#endif
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
+    /* cipher suite */
+    size += OPAQUE16_LEN;
 #endif
 #ifndef NO_CLIENT_CACHE
     /* ServerID len | ServerID */
     size += OPAQUE16_LEN + sess->idLen;
 #endif
+#ifdef OPENSSL_EXTRA
+    /* session context ID len | session context ID */
+    size += OPAQUE8_LEN + sess->sessionCtxSz;
+#endif
+#ifdef WOLFSSL_TLS13
+    /* namedGroup */
+    size += OPAQUE16_LEN;
+#endif
+#if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
+#ifdef WOLFSSL_TLS13
+    /* ticketSeen | ticketAdd */
+    size += OPAQUE32_LEN + OPAQUE32_LEN;
+#ifndef WOLFSSL_TLS13_DRAFT_18
+    /* ticketNonce */
+    size += OPAQUE8_LEN + sess->ticketNonce.len;
+#endif
+#endif
+#ifdef WOLFSSL_EARLY_DATA
+    size += OPAQUE32_LEN;
+#endif
+#endif
 #ifdef HAVE_SESSION_TICKET
     /* ticket len | ticket */
     size += OPAQUE16_LEN + sess->ticketLen;
 #endif
-#ifdef OPENSSL_EXTRA
-    /* session context ID len | session context ID */
-    size += OPAQUE8_LEN + sess->sessionCtxSz;
-#endif
 
     if (p != NULL) {
         if (*p == NULL)
@@ -21104,8 +26977,14 @@
                     sess->chain.certs[i].length);
             idx += sess->chain.certs[i].length;
         }
+#endif
+#if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
+                               defined(HAVE_SESSION_TICKET))
         data[idx++] = sess->version.major;
         data[idx++] = sess->version.minor;
+#endif
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
         data[idx++] = sess->cipherSuite0;
         data[idx++] = sess->cipherSuite;
 #endif
@@ -21114,16 +26993,37 @@
         XMEMCPY(data + idx, sess->serverID, sess->idLen);
         idx += sess->idLen;
 #endif
+#ifdef OPENSSL_EXTRA
+        data[idx++] = sess->sessionCtxSz;
+        XMEMCPY(data + idx, sess->sessionCtx, sess->sessionCtxSz);
+        idx += sess->sessionCtxSz;
+#endif
+#ifdef WOLFSSL_TLS13
+        c16toa(sess->namedGroup, data + idx);
+        idx += OPAQUE16_LEN;
+#endif
+#if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
+#ifdef WOLFSSL_TLS13
+    c32toa(sess->ticketSeen, data + idx);
+    idx += OPAQUE32_LEN;
+    c32toa(sess->ticketAdd, data + idx);
+    idx += OPAQUE32_LEN;
+#ifndef WOLFSSL_TLS13_DRAFT_18
+    data[idx++] = sess->ticketNonce.len;
+    XMEMCPY(data + idx, sess->ticketNonce.data, sess->ticketNonce.len);
+    idx += sess->ticketNonce.len;
+#endif
+#endif
+#ifdef WOLFSSL_EARLY_DATA
+        c32toa(sess->maxEarlyDataSz, data + idx);
+        idx += OPAQUE32_LEN;
+#endif
+#endif
 #ifdef HAVE_SESSION_TICKET
         c16toa(sess->ticketLen, data + idx); idx += OPAQUE16_LEN;
         XMEMCPY(data + idx, sess->ticket, sess->ticketLen);
         idx += sess->ticketLen;
 #endif
-#ifdef OPENSSL_EXTRA
-        data[idx++] = sess->sessionCtxSz;
-        XMEMCPY(data + idx, sess->sessionCtx, sess->sessionCtxSz);
-        idx += sess->sessionCtxSz;
-#endif
     }
 #endif
 
@@ -21224,14 +27124,24 @@
         XMEMCPY(s->chain.certs[j].buffer, data + idx, length);
         idx += length;
     }
-
-    /* Protocol Version | Cipher suite */
-    if (i - idx < OPAQUE16_LEN + OPAQUE16_LEN) {
+#endif
+#if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
+                               defined(HAVE_SESSION_TICKET))
+    /* Protocol Version */
+    if (i - idx < OPAQUE16_LEN) {
         ret = BUFFER_ERROR;
         goto end;
     }
     s->version.major = data[idx++];
     s->version.minor = data[idx++];
+#endif
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
+    /* Cipher suite */
+    if (i - idx < OPAQUE16_LEN) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
     s->cipherSuite0 = data[idx++];
     s->cipherSuite = data[idx++];
 #endif
@@ -21250,6 +27160,63 @@
     }
     XMEMCPY(s->serverID, data + idx, s->idLen); idx += s->idLen;
 #endif
+#ifdef OPENSSL_EXTRA
+    /* byte for length of session context ID */
+    if (i - idx < OPAQUE8_LEN) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    s->sessionCtxSz = data[idx++];
+
+    /* app session context ID */
+    if (i - idx < s->sessionCtxSz) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    XMEMCPY(s->sessionCtx, data + idx, s->sessionCtxSz); idx += s->sessionCtxSz;
+#endif
+#ifdef WOLFSSL_TLS13
+    if (i - idx < OPAQUE16_LEN) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    ato16(data + idx, &s->namedGroup);
+    idx += OPAQUE16_LEN;
+#endif
+#if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
+#ifdef WOLFSSL_TLS13
+    if (i - idx < (OPAQUE32_LEN * 2)) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    ato32(data + idx, &s->ticketSeen);
+    idx += OPAQUE32_LEN;
+    ato32(data + idx, &s->ticketAdd);
+    idx += OPAQUE32_LEN;
+#ifndef WOLFSSL_TLS13_DRAFT_18
+    if (i - idx < OPAQUE8_LEN) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    s->ticketNonce.len = data[idx++];
+
+    if (i - idx < s->ticketNonce.len) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    XMEMCPY(s->ticketNonce.data, data + idx, s->ticketNonce.len);
+    idx += s->ticketNonce.len;
+#endif
+#endif
+#ifdef WOLFSSL_EARLY_DATA
+    if (i - idx < OPAQUE32_LEN) {
+        ret = BUFFER_ERROR;
+        goto end;
+    }
+    ato32(data + idx, &s->maxEarlyDataSz);
+    idx += OPAQUE32_LEN;
+#endif
+#endif
 #ifdef HAVE_SESSION_TICKET
     /* ticket len */
     if (i - idx < OPAQUE16_LEN) {
@@ -21280,21 +27247,6 @@
     }
     XMEMCPY(s->ticket, data + idx, s->ticketLen); idx += s->ticketLen;
 #endif
-#ifdef OPENSSL_EXTRA
-    /* byte for length of session context ID */
-    if (i - idx < OPAQUE8_LEN) {
-        ret = BUFFER_ERROR;
-        goto end;
-    }
-    s->sessionCtxSz = data[idx++];
-
-    /* app session context ID */
-    if (i - idx < s->sessionCtxSz) {
-        ret = BUFFER_ERROR;
-        goto end;
-    }
-    XMEMCPY(s->sessionCtx, data + idx, s->sessionCtxSz); idx += s->sessionCtxSz;
-#endif
     (void)idx;
 
     if (sess != NULL)
@@ -21312,15 +27264,21 @@
 
 long wolfSSL_SESSION_get_timeout(const WOLFSSL_SESSION* sess)
 {
+    long timeout = 0;
     WOLFSSL_ENTER("wolfSSL_SESSION_get_timeout");
-    return sess->timeout;
+    if (sess)
+        timeout = sess->timeout;
+    return timeout;
 }
 
 
 long wolfSSL_SESSION_get_time(const WOLFSSL_SESSION* sess)
 {
+    long bornOn = 0;
     WOLFSSL_ENTER("wolfSSL_SESSION_get_time");
-    return sess->bornOn;
+    if (sess)
+        bornOn = sess->bornOn;
+    return bornOn;
 }
 
 
@@ -21354,8 +27312,8 @@
     #endif
         byte*          myBuffer  = staticBuffer;
         int            dynamic   = 0;
-        XFILE          file      = XBADFILE;
-        size_t         sz        = 0;
+        XFILE          file;
+        long           sz        = 0;
         WOLFSSL_CTX*   ctx       = ssl->ctx;
         WOLFSSL_X509*  peer_cert = &ssl->peerCert;
         DerBuffer*     fileDer = NULL;
@@ -21364,20 +27322,28 @@
         if (file == XBADFILE)
             return WOLFSSL_BAD_FILE;
 
-        XFSEEK(file, 0, XSEEK_END);
+        if (XFSEEK(file, 0, XSEEK_END) != 0) {
+            XFCLOSE(file);
+            return WOLFSSL_BAD_FILE;
+        }
         sz = XFTELL(file);
         XREWIND(file);
 
+        if (sz > MAX_WOLFSSL_FILE_SIZE || sz < 0) {
+            WOLFSSL_MSG("cmp_peer_cert_to_file size error");
+            XFCLOSE(file);
+            return WOLFSSL_BAD_FILE;
+        }
+
         if (sz > (long)sizeof(staticBuffer)) {
             WOLFSSL_MSG("Getting dynamic buffer");
             myBuffer = (byte*)XMALLOC(sz, ctx->heap, DYNAMIC_TYPE_FILE);
             dynamic = 1;
         }
 
-
         if ((myBuffer != NULL) &&
             (sz > 0) &&
-            (XFREAD(myBuffer, 1, sz, file) == sz) &&
+            (XFREAD(myBuffer, 1, sz, file) == (size_t)sz) &&
             (PemToDer(myBuffer, (long)sz, CERT_TYPE,
                       &fileDer, ctx->heap, NULL, NULL) == 0) &&
             (fileDer->length != 0) &&
@@ -21400,12 +27366,368 @@
 }
 #endif
 #endif /* OPENSSL_EXTRA */
-
+#endif /* !WOLFCRYPT_ONLY */
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+const WOLFSSL_ObjectInfo wolfssl_object_info[] = {
+#ifndef NO_CERTS
+    /* oidCertExtType */
+    { NID_basic_constraints, BASIC_CA_OID, oidCertExtType, "basicConstraints",
+                                                "X509v3 Basic Constraints"},
+    { NID_subject_alt_name, ALT_NAMES_OID, oidCertExtType, "subjectAltName",
+                                         "X509v3 Subject Alternative Name"},
+    { CRL_DIST_OID, CRL_DIST_OID, oidCertExtType, "crlDistributionPoints",
+                                          "X509v3 CRL Distribution Points"},
+    { NID_info_access, AUTH_INFO_OID, oidCertExtType, "authorityInfoAccess",
+                                            "Authority Information Access"},
+    { NID_authority_key_identifier, AUTH_KEY_OID, oidCertExtType,
+               "authorityKeyIdentifier", "X509v3 Authority Key Identifier"},
+    { NID_subject_key_identifier, SUBJ_KEY_OID, oidCertExtType,
+                   "subjectKeyIdentifier", "X509v3 Subject Key Identifier"},
+    { NID_key_usage, KEY_USAGE_OID, oidCertExtType, "keyUsage",
+                                                        "X509v3 Key Usage"},
+    { NID_inhibit_any_policy, INHIBIT_ANY_OID, oidCertExtType,
+                           "inhibitAnyPolicy", "X509v3 Inhibit Any Policy"},
+    { NID_ext_key_usage, KEY_USAGE_OID, oidCertExtType,
+                           "extendedKeyUsage", "X509v3 Extended Key Usage"},
+    { NID_name_constraints, NAME_CONS_OID, oidCertExtType,
+                              "nameConstraints", "X509v3 Name Constraints"},
+    { NID_certificate_policies, CERT_POLICY_OID, oidCertExtType,
+                      "certificatePolicies", "X509v3 Certificate Policies"},
+
+    /* oidCertAuthInfoType */
+    { AIA_OCSP_OID, AIA_OCSP_OID, oidCertAuthInfoType, "authorityInfoAccess",
+                                            "Authority Information Access"},
+    { AIA_CA_ISSUER_OID, AIA_CA_ISSUER_OID, oidCertAuthInfoType,
+                                                 "caIssuers", "CA Issuers"},
+
+    /* oidCertPolicyType */
+    { NID_any_policy, CP_ANY_OID, oidCertPolicyType, "anyPolicy",
+                                                       "X509v3 Any Policy"},
+
+    /* oidCertAltNameType */
+    { NID_hw_name_oid, HW_NAME_OID, oidCertAltNameType, "Hardware name",""},
+
+    /* oidCertKeyUseType */
+    { NID_anyExtendedKeyUsage, EKU_ANY_OID, oidCertKeyUseType,
+                           "anyExtendedKeyUsage", "Any Extended Key Usage"},
+    { EKU_SERVER_AUTH_OID, EKU_SERVER_AUTH_OID, oidCertKeyUseType,
+                             "serverAuth", "TLS Web Server Authentication"},
+    { EKU_CLIENT_AUTH_OID, EKU_CLIENT_AUTH_OID, oidCertKeyUseType,
+                             "clientAuth", "TLS Web Client Authentication"},
+    { EKU_OCSP_SIGN_OID, EKU_OCSP_SIGN_OID, oidCertKeyUseType,
+                                             "OCSPSigning", "OCSP Signing"},
+
+    /* oidCertNameType */
+    { NID_commonName, NID_commonName, oidCertNameType, "CN", "commonName"},
+    { NID_surname, NID_surname, oidCertNameType, "SN", "surname"},
+    { NID_serialNumber, NID_serialNumber, oidCertNameType, "serialNumber",
+                                                            "serialNumber"},
+    { NID_countryName, NID_countryName, oidCertNameType, "C", "countryName"},
+    { NID_localityName, NID_localityName, oidCertNameType, "L", "localityName"},
+    { NID_stateOrProvinceName, NID_stateOrProvinceName, oidCertNameType, "ST",
+                                                        "stateOrProvinceName"},
+    { NID_organizationName, NID_organizationName, oidCertNameType, "O",
+                                                        "organizationName"},
+    { NID_organizationalUnitName, NID_organizationalUnitName, oidCertNameType,
+                                                "OU", "organizationalUnitName"},
+    { NID_emailAddress, NID_emailAddress, oidCertNameType, "emailAddress",
+                                                            "emailAddress"},
+    { NID_domainComponent, NID_domainComponent, oidCertNameType, "DC",
+                                                            "domainComponent"},
+    { NID_businessCategory, NID_businessCategory, oidCertNameType, "businessCategory",
+                                                            "businessCategory"},
+    { NID_jurisdictionCountryName, NID_jurisdictionCountryName, oidCertNameType, "jurisdictionC",
+                                                            "jurisdictionCountryName"},
+    { NID_jurisdictionStateOrProvinceName, NID_jurisdictionStateOrProvinceName,
+            oidCertNameType, "jurisdictionST", "jurisdictionStateOrProvinceName"},
+#endif
+#ifdef OPENSSL_EXTRA /* OPENSSL_EXTRA_X509_SMALL only needs the above */
+        /* oidHashType */
+    #ifdef WOLFSSL_MD2
+        { NID_md2, MD2h, oidHashType, "MD2", "md2"},
+    #endif
+    #ifdef WOLFSSL_MD5
+        { NID_md5, MD5h, oidHashType, "MD5", "md5"},
+    #endif
+    #ifndef NO_SHA
+        { NID_sha1, SHAh, oidHashType, "SHA1", "sha1"},
+    #endif
+    #ifdef WOLFSSL_SHA224
+        { NID_sha224, SHA224h, oidHashType, "SHA224", "sha224"},
+    #endif
+    #ifndef NO_SHA256
+        { NID_sha256, SHA256h, oidHashType, "SHA256", "sha256"},
+    #endif
+    #ifdef WOLFSSL_SHA384
+        { NID_sha384, SHA384h, oidHashType, "SHA384", "sha384"},
+    #endif
+    #ifdef WOLFSSL_SHA512
+        { NID_sha512, SHA512h, oidHashType, "SHA512", "sha512"},
+    #endif
+
+        /* oidSigType */
+    #ifndef NO_DSA
+        #ifndef NO_SHA
+        { CTC_SHAwDSA, CTC_SHAwDSA, oidSigType, "DSA-SHA1", "dsaWithSHA1"},
+        #endif
+    #endif /* NO_DSA */
+    #ifndef NO_RSA
+        #ifdef WOLFSSL_MD2
+        { CTC_MD2wRSA, CTC_MD2wRSA, oidSigType, "RSA-MD2",
+                                                        "md2WithRSAEncryption"},
+        #endif
+        #ifndef NO_MD5
+        { CTC_MD5wRSA, CTC_MD5wRSA, oidSigType, "RSA-MD5",
+                                                        "md5WithRSAEncryption"},
+        #endif
+        #ifndef NO_SHA
+        { CTC_SHAwRSA, CTC_SHAwRSA, oidSigType, "RSA-SHA1",
+                                                       "sha1WithRSAEncryption"},
+        #endif
+        #ifdef WOLFSSL_SHA224
+        { CTC_SHA224wRSA, CTC_SHA224wRSA, oidSigType, "RSA-SHA224",
+                                                     "sha224WithRSAEncryption"},
+        #endif
+        #ifndef NO_SHA256
+        { CTC_SHA256wRSA, CTC_SHA256wRSA, oidSigType, "RSA-SHA256",
+                                                     "sha256WithRSAEncryption"},
+        #endif
+        #ifdef WOLFSSL_SHA384
+        { CTC_SHA384wRSA, CTC_SHA384wRSA, oidSigType, "RSA-SHA384",
+                                                     "sha384WithRSAEncryption"},
+        #endif
+        #ifdef WOLFSSL_SHA512
+        { CTC_SHA512wRSA, CTC_SHA512wRSA, oidSigType, "RSA-SHA512",
+                                                     "sha512WithRSAEncryption"},
+        #endif
+    #endif /* NO_RSA */
+    #ifdef HAVE_ECC
+        #ifndef NO_SHA
+        { CTC_SHAwECDSA, CTC_SHAwECDSA, oidSigType, "ecdsa-with-SHA1", "shaWithECDSA"},
+        #endif
+        #ifdef WOLFSSL_SHA224
+        { CTC_SHA224wECDSA, CTC_SHA224wECDSA, oidSigType, "ecdsa-with-SHA224","sha224WithECDSA"},
+        #endif
+        #ifndef NO_SHA256
+        { CTC_SHA256wECDSA, CTC_SHA256wECDSA, oidSigType, "ecdsa-with-SHA256","sha256WithECDSA"},
+        #endif
+        #ifdef WOLFSSL_SHA384
+        { CTC_SHA384wECDSA, CTC_SHA384wECDSA, oidSigType, "ecdsa-with-SHA384","sha384WithECDSA"},
+        #endif
+        #ifdef WOLFSSL_SHA512
+        { CTC_SHA512wECDSA, CTC_SHA512wECDSA, oidSigType, "ecdsa-with-SHA512","sha512WithECDSA"},
+        #endif
+    #endif /* HAVE_ECC */
+
+        /* oidKeyType */
+    #ifndef NO_DSA
+        { DSAk, DSAk, oidKeyType, "DSA", "dsaEncryption"},
+        { NID_dsa, DSAk, oidKeyType, "DSA", "dsaEncryption"},
+    #endif /* NO_DSA */
+    #ifndef NO_RSA
+        { RSAk, RSAk, oidKeyType, "RSA", "rsaEncryption"},
+        { NID_rsaEncryption, RSAk, oidKeyType, "RSA", "rsaEncryption"},
+    #endif /* NO_RSA */
+    #ifdef HAVE_NTRU
+        { NTRUk, NTRUk, oidKeyType, "NTRU", "ntruEncryption"},
+    #endif /* HAVE_NTRU */
+    #ifdef HAVE_ECC
+        { ECDSAk, ECDSAk, oidKeyType, "ECDSA", "ecdsaEncryption"},
+        { NID_X9_62_id_ecPublicKey, ECDSAk, oidKeyType, "id-ecPublicKey",
+                                                        "id-ecPublicKey"},
+    #endif /* HAVE_ECC */
+    #ifndef NO_DH
+        { NID_dhKeyAgreement, DHk, oidKeyType, "dhKeyAgreement", "dhKeyAgreement"},
+    #endif
+
+        /* oidCurveType */
+    #ifdef HAVE_ECC
+        { NID_X9_62_prime192v1, ECC_SECP192R1_OID, oidCurveType, "prime192v1", "prime192v1"},
+        { NID_X9_62_prime192v2, ECC_PRIME192V2_OID, oidCurveType, "prime192v2", "prime192v2"},
+        { NID_X9_62_prime192v3, ECC_PRIME192V3_OID, oidCurveType, "prime192v3", "prime192v3"},
+
+        { NID_X9_62_prime239v1, ECC_PRIME239V1_OID, oidCurveType, "prime239v1", "prime239v1"},
+        { NID_X9_62_prime239v2, ECC_PRIME239V2_OID, oidCurveType, "prime239v2", "prime239v2"},
+        { NID_X9_62_prime239v3, ECC_PRIME239V3_OID, oidCurveType, "prime239v3", "prime239v3"},
+
+        { NID_X9_62_prime256v1, ECC_SECP256R1_OID, oidCurveType, "prime256v1", "prime256v1"},
+
+        { NID_secp112r1, ECC_SECP112R1_OID,  oidCurveType, "secp112r1", "secp112r1"},
+        { NID_secp112r2, ECC_SECP112R2_OID,  oidCurveType, "secp112r2", "secp112r2"},
+
+        { NID_secp128r1, ECC_SECP128R1_OID,  oidCurveType, "secp128r1", "secp128r1"},
+        { NID_secp128r2, ECC_SECP128R2_OID,  oidCurveType, "secp128r2", "secp128r2"},
+
+        { NID_secp160r1, ECC_SECP160R1_OID,  oidCurveType, "secp160r1", "secp160r1"},
+        { NID_secp160r2, ECC_SECP160R2_OID,  oidCurveType, "secp160r2", "secp160r2"},
+
+        { NID_secp224r1, ECC_SECP224R1_OID,  oidCurveType, "secp224r1", "secp224r1"},
+        { NID_secp384r1, ECC_SECP384R1_OID,  oidCurveType, "secp384r1", "secp384r1"},
+        { NID_secp521r1, ECC_SECP521R1_OID,  oidCurveType, "secp521r1", "secp521r1"},
+
+        { NID_secp160k1, ECC_SECP160K1_OID,  oidCurveType, "secp160k1", "secp160k1"},
+        { NID_secp192k1, ECC_SECP192K1_OID,  oidCurveType, "secp192k1", "secp192k1"},
+        { NID_secp224k1, ECC_SECP224K1_OID,  oidCurveType, "secp224k1", "secp224k1"},
+        { NID_secp256k1, ECC_SECP256K1_OID,  oidCurveType, "secp256k1", "secp256k1"},
+
+        { NID_brainpoolP160r1, ECC_BRAINPOOLP160R1_OID,  oidCurveType, "brainpoolP160r1", "brainpoolP160r1"},
+        { NID_brainpoolP192r1, ECC_BRAINPOOLP192R1_OID,  oidCurveType, "brainpoolP192r1", "brainpoolP192r1"},
+        { NID_brainpoolP224r1, ECC_BRAINPOOLP224R1_OID,  oidCurveType, "brainpoolP224r1", "brainpoolP224r1"},
+        { NID_brainpoolP256r1, ECC_BRAINPOOLP256R1_OID,  oidCurveType, "brainpoolP256r1", "brainpoolP256r1"},
+        { NID_brainpoolP320r1, ECC_BRAINPOOLP320R1_OID,  oidCurveType, "brainpoolP320r1", "brainpoolP320r1"},
+        { NID_brainpoolP384r1, ECC_BRAINPOOLP384R1_OID,  oidCurveType, "brainpoolP384r1", "brainpoolP384r1"},
+        { NID_brainpoolP512r1, ECC_BRAINPOOLP512R1_OID,  oidCurveType, "brainpoolP512r1", "brainpoolP512r1"},
+    #endif /* HAVE_ECC */
+
+        /* oidBlkType */
+    #ifdef WOLFSSL_AES_128
+        { AES128CBCb, AES128CBCb, oidBlkType, "AES-128-CBC", "aes-128-cbc"},
+    #endif
+    #ifdef WOLFSSL_AES_192
+        { AES192CBCb, AES192CBCb, oidBlkType, "AES-192-CBC", "aes-192-cbc"},
+    #endif
+    #ifdef WOLFSSL_AES_256
+        { AES256CBCb, AES256CBCb, oidBlkType, "AES-256-CBC", "aes-256-cbc"},
+    #endif
+    #ifndef NO_DES3
+        { NID_des, DESb, oidBlkType, "DES-CBC", "des-cbc"},
+        { NID_des3, DES3b, oidBlkType, "DES-EDE3-CBC", "des-ede3-cbc"},
+    #endif /* !NO_DES3 */
+
+        /* oidOcspType */
+    #ifdef HAVE_OCSP
+        { NID_id_pkix_OCSP_basic, OCSP_BASIC_OID, oidOcspType, "basicOCSPResponse",
+                                                         "Basic OCSP Response"},
+        { OCSP_NONCE_OID, OCSP_NONCE_OID, oidOcspType, "Nonce",
+                                                                  "OCSP Nonce"},
+    #endif /* HAVE_OCSP */
+
+    #ifndef NO_PWDBASED
+        /* oidKdfType */
+        { PBKDF2_OID, PBKDF2_OID, oidKdfType, "PBKDFv2", "PBKDF2"},
+
+        /* oidPBEType */
+        { PBE_SHA1_RC4_128, PBE_SHA1_RC4_128, oidPBEType,
+                                 "PBE-SHA1-RC4-128", "pbeWithSHA1And128BitRC4"},
+        { PBE_SHA1_DES, PBE_SHA1_DES, oidPBEType, "PBE-SHA1-DES",
+                                                       "pbeWithSHA1AndDES-CBC"},
+        { PBE_SHA1_DES3, PBE_SHA1_DES3, oidPBEType, "PBE-SHA1-3DES",
+                                            "pbeWithSHA1And3-KeyTripleDES-CBC"},
+    #endif
+
+        /* oidKeyWrapType */
+    #ifdef WOLFSSL_AES_128
+        { AES128_WRAP, AES128_WRAP, oidKeyWrapType, "AES-128 wrap", "aes128-wrap"},
+    #endif
+    #ifdef WOLFSSL_AES_192
+        { AES192_WRAP, AES192_WRAP, oidKeyWrapType, "AES-192 wrap", "aes192-wrap"},
+    #endif
+    #ifdef WOLFSSL_AES_256
+        { AES256_WRAP, AES256_WRAP, oidKeyWrapType, "AES-256 wrap", "aes256-wrap"},
+    #endif
+
+    #ifndef NO_PKCS7
+        #ifndef NO_DH
+        /* oidCmsKeyAgreeType */
+            #ifndef NO_SHA
+        { dhSinglePass_stdDH_sha1kdf_scheme, dhSinglePass_stdDH_sha1kdf_scheme,
+                oidCmsKeyAgreeType, "dhSinglePass-stdDH-sha1kdf-scheme", "dhSinglePass-stdDH-sha1kdf-scheme"},
+            #endif
+            #ifdef WOLFSSL_SHA224
+        { dhSinglePass_stdDH_sha224kdf_scheme,
+                dhSinglePass_stdDH_sha224kdf_scheme, oidCmsKeyAgreeType,
+                "dhSinglePass-stdDH-sha224kdf-scheme", "dhSinglePass-stdDH-sha224kdf-scheme"},
+            #endif
+            #ifndef NO_SHA256
+        { dhSinglePass_stdDH_sha256kdf_scheme,
+                        dhSinglePass_stdDH_sha256kdf_scheme, oidCmsKeyAgreeType,
+                        "dhSinglePass-stdDH-sha256kdf-scheme", "dhSinglePass-stdDH-sha256kdf-scheme"},
+            #endif
+            #ifdef WOLFSSL_SHA384
+        { dhSinglePass_stdDH_sha384kdf_scheme,
+                        dhSinglePass_stdDH_sha384kdf_scheme, oidCmsKeyAgreeType,
+                        "dhSinglePass-stdDH-sha384kdf-scheme", "dhSinglePass-stdDH-sha384kdf-scheme"},
+            #endif
+            #ifdef WOLFSSL_SHA512
+        { dhSinglePass_stdDH_sha512kdf_scheme,
+                        dhSinglePass_stdDH_sha512kdf_scheme, oidCmsKeyAgreeType,
+                        "dhSinglePass-stdDH-sha512kdf-scheme", "dhSinglePass-stdDH-sha512kdf-scheme"},
+            #endif
+        #endif
+    #endif
+    #if defined(WOLFSSL_APACHE_HTTPD)
+        /* "1.3.6.1.5.5.7.8.7" */
+        { NID_id_on_dnsSRV, NID_id_on_dnsSRV, oidCertNameType,
+            WOLFSSL_SN_DNS_SRV, WOLFSSL_LN_DNS_SRV },
+
+        /* "1.3.6.1.4.1.311.20.2.3" */
+        { NID_ms_upn, WOLFSSL_MS_UPN_SUM, oidCertExtType, WOLFSSL_SN_MS_UPN,
+            WOLFSSL_LN_MS_UPN },
+
+        /* "1.3.6.1.5.5.7.1.24" */
+        { NID_tlsfeature, WOLFSSL_TLS_FEATURE_SUM, oidTlsExtType,
+            WOLFSSL_SN_TLS_FEATURE, WOLFSSL_LN_TLS_FEATURE },
+    #endif
+#endif /* OPENSSL_EXTRA */
+};
+
+#define WOLFSSL_OBJECT_INFO_SZ \
+                (sizeof(wolfssl_object_info) / sizeof(*wolfssl_object_info))
+const size_t wolfssl_object_info_sz = WOLFSSL_OBJECT_INFO_SZ;
+#endif
 #if defined(OPENSSL_EXTRA) || \
     (defined(OPENSSL_EXTRA_X509_SMALL) && !defined(NO_RSA))
 static WC_RNG globalRNG;
 static int initGlobalRNG = 0;
 #endif
+#if defined(OPENSSL_EXTRA) && \
+    !defined(NO_RSA) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+WC_RNG* WOLFSSL_RSA_GetRNG(WOLFSSL_RSA *rsa, WC_RNG **tmpRNG, int *initTmpRng)
+{
+    WC_RNG* rng = NULL;
+
+    if (!rsa || !initTmpRng) {
+        return NULL;
+    }
+    *initTmpRng = 0;
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && \
+    !defined(HAVE_FAST_RSA) && defined(WC_RSA_BLINDING)
+    rng = ((RsaKey*)rsa->internal)->rng;
+#endif
+    if (rng == NULL && tmpRNG) {
+        if (!*tmpRNG) {
+#ifdef WOLFSSL_SMALL_STACK
+            *tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (*tmpRNG == NULL)
+                return NULL;
+#else
+            WOLFSSL_MSG("*tmpRNG is null");
+            return NULL;
+#endif
+        }
+
+        if (wc_InitRng(*tmpRNG) == 0) {
+            rng = *tmpRNG;
+            *initTmpRng = 1;
+        }
+        else {
+            WOLFSSL_MSG("Bad RNG Init, trying global");
+            if (initGlobalRNG == 0)
+                WOLFSSL_MSG("Global RNG no Init");
+            else
+                rng = &globalRNG;
+#ifdef WOLFSSL_SMALL_STACK
+            if (*tmpRNG)
+                XFREE(*tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            *tmpRNG = NULL;
+#endif
+        }
+    }
+    return rng;
+}
+#endif
+#ifndef WOLFCRYPT_ONLY
 
 #ifdef OPENSSL_EXTRA
 
@@ -21511,7 +27833,7 @@
  *
  * fname name of file to write to
  *
- * Returns the number of bytes writen
+ * Returns the number of bytes written
  */
 int wolfSSL_RAND_write_file(const char* fname)
 {
@@ -21553,7 +27875,7 @@
             XFILE f;
 
             f = XFOPEN(fname, "wb");
-            if (f == NULL) {
+            if (f == XBADFILE) {
                 WOLFSSL_MSG("Error opening the file");
                 bytes = 0;
             }
@@ -21580,11 +27902,6 @@
     #include <sys/un.h>
 #endif
 
-/* at compile time check for HASH DRBG and throw warning if not found */
-#ifndef HAVE_HASHDRBG
-    #warning HAVE_HASHDRBG is needed for wolfSSL_RAND_egd to seed
-#endif
-
 /* This collects entropy from the path nm and seeds the global PRNG with it.
  * Makes a call to wolfSSL_RAND_Init which is not thread safe.
  *
@@ -21594,7 +27911,8 @@
  */
 int wolfSSL_RAND_egd(const char* nm)
 {
-#if defined(USE_WOLFSSL_IO) && !defined(USE_WINDOWS_API) && !defined(HAVE_FIPS)
+#if defined(USE_WOLFSSL_IO) && !defined(USE_WINDOWS_API) && !defined(HAVE_FIPS) && \
+    defined(HAVE_HASHDRBG)
     struct sockaddr_un rem;
     int fd;
     int ret = WOLFSSL_SUCCESS;
@@ -21628,7 +27946,7 @@
     }
     if (ret == WOLFSSL_SUCCESS) {
         rem.sun_family = AF_UNIX;
-        XSTRNCPY(rem.sun_path, nm, sizeof(rem.sun_path));
+        XSTRNCPY(rem.sun_path, nm, sizeof(rem.sun_path) - 1);
         rem.sun_path[sizeof(rem.sun_path)-1] = '\0';
     }
 
@@ -21728,13 +28046,13 @@
     else {
         return ret;
     }
-#else /* defined(USE_WOLFSSL_IO) && !defined(USE_WINDOWS_API) && !HAVE_FIPS */
+#else
     WOLFSSL_MSG("Type of socket needed is not available");
-    WOLFSSL_MSG("\tor using FIPS mode where RNG API is not available");
+    WOLFSSL_MSG("\tor using mode where DRBG API is not available");
     (void)nm;
 
     return WOLFSSL_FATAL_ERROR;
-#endif /* defined(USE_WOLFSSL_IO) && !defined(USE_WINDOWS_API) */
+#endif /* USE_WOLFSSL_IO && !USE_WINDOWS_API && !HAVE_FIPS && HAVE_HASHDRBG */
 }
 
 #endif /* !FREERTOS_TCP */
@@ -21763,7 +28081,7 @@
     int     initTmpRng = 0;
     WC_RNG* rng = NULL;
 #ifdef WOLFSSL_SMALL_STACK
-    WC_RNG* tmpRNG = NULL;
+    WC_RNG* tmpRNG;
 #else
     WC_RNG  tmpRNG[1];
 #endif
@@ -21776,13 +28094,12 @@
         return ret;
 #endif
 
-    if (wc_InitRng(tmpRNG) == 0) {
+    if (initGlobalRNG)
+        rng = &globalRNG;
+    else if(wc_InitRng(tmpRNG) == 0) {
         rng = tmpRNG;
         initTmpRng = 1;
     }
-    else if (initGlobalRNG)
-        rng = &globalRNG;
-
     if (rng) {
         if (wc_RNG_GenerateBlock(rng, buf, num) != 0)
             WOLFSSL_MSG("Bad wc_RNG_GenerateBlock");
@@ -21801,7 +28118,7 @@
 }
 
 
-int wolfSSL_RAND_poll()
+int wolfSSL_RAND_poll(void)
 {
     byte  entropy[16];
     int  ret = 0;
@@ -21821,1057 +28138,282 @@
 
     return ret;
 }
-
-WOLFSSL_BN_CTX* wolfSSL_BN_CTX_new(void)
-{
-    static int ctx;  /* wolfcrypt doesn't now need ctx */
-
-    WOLFSSL_MSG("wolfSSL_BN_CTX_new");
-    return (WOLFSSL_BN_CTX*)&ctx;
-
-}
-
-void wolfSSL_BN_CTX_init(WOLFSSL_BN_CTX* ctx)
-{
-    (void)ctx;
-    WOLFSSL_MSG("wolfSSL_BN_CTX_init");
-}
-
-
-void wolfSSL_BN_CTX_free(WOLFSSL_BN_CTX* ctx)
-{
-    (void)ctx;
-    WOLFSSL_MSG("wolfSSL_BN_CTX_free");
-    /* do free since static ctx that does nothing */
-}
 #endif /* OPENSSL_EXTRA */
 
 
-#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
-static void InitwolfSSL_BigNum(WOLFSSL_BIGNUM* bn)
-{
-    if (bn) {
-        XMEMSET(bn, 0, sizeof(WOLFSSL_BIGNUM));
-        bn->neg      = 0;
-        bn->internal = NULL;
-    }
-}
-
-WOLFSSL_BIGNUM* wolfSSL_BN_new(void)
-{
-    WOLFSSL_BIGNUM* external;
-    mp_int*        mpi;
-
-    WOLFSSL_MSG("wolfSSL_BN_new");
-
-    mpi = (mp_int*) XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_BIGINT);
-    if (mpi == NULL) {
-        WOLFSSL_MSG("wolfSSL_BN_new malloc mpi failure");
-        return NULL;
-    }
-
-    external = (WOLFSSL_BIGNUM*) XMALLOC(sizeof(WOLFSSL_BIGNUM), NULL,
-                                        DYNAMIC_TYPE_BIGINT);
-    if (external == NULL) {
-        WOLFSSL_MSG("wolfSSL_BN_new malloc WOLFSSL_BIGNUM failure");
-        XFREE(mpi, NULL, DYNAMIC_TYPE_BIGINT);
-        return NULL;
-    }
-
-    InitwolfSSL_BigNum(external);
-    external->internal = mpi;
-    if (mp_init(mpi) != MP_OKAY) {
-        wolfSSL_BN_free(external);
-        return NULL;
-    }
-
-    return external;
-}
-
-
-void wolfSSL_BN_free(WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_MSG("wolfSSL_BN_free");
-    if (bn) {
-        if (bn->internal) {
-            mp_forcezero((mp_int*)bn->internal);
-            XFREE(bn->internal, NULL, DYNAMIC_TYPE_BIGINT);
-            bn->internal = NULL;
-        }
-        XFREE(bn, NULL, DYNAMIC_TYPE_BIGINT);
-        bn = NULL;
-    }
-}
-#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
-
-#ifdef OPENSSL_EXTRA
-
-void wolfSSL_BN_clear_free(WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_MSG("wolfSSL_BN_clear_free");
-
-    wolfSSL_BN_free(bn);
-}
-
-
-/* WOLFSSL_SUCCESS on ok */
-int wolfSSL_BN_sub(WOLFSSL_BIGNUM* r, const WOLFSSL_BIGNUM* a,
-                  const WOLFSSL_BIGNUM* b)
-{
-    WOLFSSL_MSG("wolfSSL_BN_sub");
-
-    if (r == NULL || a == NULL || b == NULL)
-        return 0;
-
-    if (mp_sub((mp_int*)a->internal,(mp_int*)b->internal,
-               (mp_int*)r->internal) == MP_OKAY)
-        return WOLFSSL_SUCCESS;
-
-    WOLFSSL_MSG("wolfSSL_BN_sub mp_sub failed");
-    return 0;
-}
-
-/* WOLFSSL_SUCCESS on ok */
-int wolfSSL_BN_mod(WOLFSSL_BIGNUM* r, const WOLFSSL_BIGNUM* a,
-                  const WOLFSSL_BIGNUM* b, const WOLFSSL_BN_CTX* c)
-{
-    (void)c;
-    WOLFSSL_MSG("wolfSSL_BN_mod");
-
-    if (r == NULL || a == NULL || b == NULL)
-        return 0;
-
-    if (mp_mod((mp_int*)a->internal,(mp_int*)b->internal,
-               (mp_int*)r->internal) == MP_OKAY)
-        return WOLFSSL_SUCCESS;
-
-    WOLFSSL_MSG("wolfSSL_BN_mod mp_mod failed");
-    return 0;
-}
-
-
-/* r = (a^p) % m */
-int wolfSSL_BN_mod_exp(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *a,
-      const WOLFSSL_BIGNUM *p, const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
-{
-    int ret;
-
-    WOLFSSL_ENTER("wolfSSL_BN_mod_exp");
-
-    (void) ctx;
-    if (r == NULL || a == NULL || p == NULL || m == NULL) {
-        WOLFSSL_MSG("Bad Argument");
-        return WOLFSSL_FAILURE;
-    }
-
-    if ((ret = mp_exptmod((mp_int*)a->internal,(mp_int*)p->internal,
-               (mp_int*)m->internal, (mp_int*)r->internal)) == MP_OKAY) {
-        return WOLFSSL_SUCCESS;
-    }
-
-    WOLFSSL_LEAVE("wolfSSL_BN_mod_exp", ret);
-    (void)ret;
-
-    return WOLFSSL_FAILURE;
-}
-
-/* r = (a * p) % m */
-int wolfSSL_BN_mod_mul(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *a,
-        const WOLFSSL_BIGNUM *p, const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
-{
-    int ret;
-
-    WOLFSSL_ENTER("wolfSSL_BN_mod_mul");
-
-    (void) ctx;
-    if (r == NULL || a == NULL || p == NULL || m == NULL) {
-        WOLFSSL_MSG("Bad Argument");
-        return SSL_FAILURE;
-    }
-
-    if ((ret = mp_mulmod((mp_int*)a->internal,(mp_int*)p->internal,
-               (mp_int*)m->internal, (mp_int*)r->internal)) == MP_OKAY) {
-        return SSL_SUCCESS;
-    }
-
-    WOLFSSL_LEAVE("wolfSSL_BN_mod_mul", ret);
-    (void)ret;
-
-    return SSL_FAILURE;
-}
-
-const WOLFSSL_BIGNUM* wolfSSL_BN_value_one(void)
-{
-    static WOLFSSL_BIGNUM* bn_one = NULL;
-
-    WOLFSSL_MSG("wolfSSL_BN_value_one");
-
-    if (bn_one == NULL) {
-        bn_one = wolfSSL_BN_new();
-        if (bn_one) {
-            if (mp_set_int((mp_int*)bn_one->internal, 1) != MP_OKAY) {
-                /* handle error by freeing BN and returning NULL */
-                wolfSSL_BN_free(bn_one);
-                bn_one = NULL;
-            }
-        }
-    }
-
-    return bn_one;
-}
-
-/* return compliant with OpenSSL
- *   size of BIGNUM in bytes, 0 if error */
-int wolfSSL_BN_num_bytes(const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_ENTER("wolfSSL_BN_num_bytes");
-
-    if (bn == NULL || bn->internal == NULL)
-        return WOLFSSL_FAILURE;
-
-    return mp_unsigned_bin_size((mp_int*)bn->internal);
-}
-
-/* return compliant with OpenSSL
- *   size of BIGNUM in bits, 0 if error */
-int wolfSSL_BN_num_bits(const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_ENTER("wolfSSL_BN_num_bits");
-
-    if (bn == NULL || bn->internal == NULL)
-        return WOLFSSL_FAILURE;
-
-    return mp_count_bits((mp_int*)bn->internal);
-}
-
-/* return compliant with OpenSSL
- *   1 if BIGNUM is zero, 0 else */
-int wolfSSL_BN_is_zero(const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_MSG("wolfSSL_BN_is_zero");
-
-    if (bn == NULL || bn->internal == NULL)
-        return WOLFSSL_FAILURE;
-
-    if (mp_iszero((mp_int*)bn->internal) == MP_YES)
-        return WOLFSSL_SUCCESS;
-
-    return WOLFSSL_FAILURE;
-}
-
-/* return compliant with OpenSSL
- *   1 if BIGNUM is one, 0 else */
-int wolfSSL_BN_is_one(const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_MSG("wolfSSL_BN_is_one");
-
-    if (bn == NULL || bn->internal == NULL)
-        return WOLFSSL_FAILURE;
-
-    if (mp_cmp_d((mp_int*)bn->internal, 1) == MP_EQ)
-        return WOLFSSL_SUCCESS;
-
-    return WOLFSSL_FAILURE;
-}
-
-/* return compliant with OpenSSL
- *   1 if BIGNUM is odd, 0 else */
-int wolfSSL_BN_is_odd(const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_MSG("wolfSSL_BN_is_odd");
-
-    if (bn == NULL || bn->internal == NULL)
-        return WOLFSSL_FAILURE;
-
-    if (mp_isodd((mp_int*)bn->internal) == MP_YES)
-        return WOLFSSL_SUCCESS;
-
-    return WOLFSSL_FAILURE;
-}
-
-/* return compliant with OpenSSL
- *   -1 if a < b, 0 if a == b and 1 if a > b
- */
-int wolfSSL_BN_cmp(const WOLFSSL_BIGNUM* a, const WOLFSSL_BIGNUM* b)
-{
-    int ret;
-
-    WOLFSSL_MSG("wolfSSL_BN_cmp");
-
-    if (a == NULL || a->internal == NULL || b == NULL || b->internal == NULL)
-        return WOLFSSL_FATAL_ERROR;
-
-    ret = mp_cmp((mp_int*)a->internal, (mp_int*)b->internal);
-
-    return (ret == MP_EQ ? 0 : (ret == MP_GT ? 1 : -1));
-}
-
-/* return compliant with OpenSSL
- *   length of BIGNUM in bytes, -1 if error */
-int wolfSSL_BN_bn2bin(const WOLFSSL_BIGNUM* bn, unsigned char* r)
-{
-    WOLFSSL_MSG("wolfSSL_BN_bn2bin");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("NULL bn error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (r == NULL)
-        return mp_unsigned_bin_size((mp_int*)bn->internal);
-
-    if (mp_to_unsigned_bin((mp_int*)bn->internal, r) != MP_OKAY) {
-        WOLFSSL_MSG("mp_to_unsigned_bin error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    return mp_unsigned_bin_size((mp_int*)bn->internal);
-}
-
-
-WOLFSSL_BIGNUM* wolfSSL_BN_bin2bn(const unsigned char* str, int len,
-                            WOLFSSL_BIGNUM* ret)
-{
-    int weOwn = 0;
-
-    WOLFSSL_MSG("wolfSSL_BN_bin2bn");
-
-    /* if ret is null create a BN */
-    if (ret == NULL) {
-        ret = wolfSSL_BN_new();
-        weOwn = 1;
-        if (ret == NULL)
-            return NULL;
-    }
-
-    /* check ret and ret->internal then read in value */
-    if (ret && ret->internal) {
-        if (mp_read_unsigned_bin((mp_int*)ret->internal, str, len) != 0) {
-            WOLFSSL_MSG("mp_read_unsigned_bin failure");
-            if (weOwn)
-                wolfSSL_BN_free(ret);
-            return NULL;
-        }
-    }
-
-    return ret;
-}
-
-/* return compliant with OpenSSL
- *   1 if success, 0 if error */
-#ifndef NO_WOLFSSL_STUB
-int wolfSSL_mask_bits(WOLFSSL_BIGNUM* bn, int n)
-{
-    (void)bn;
-    (void)n;
-    WOLFSSL_ENTER("wolfSSL_BN_mask_bits");
-    WOLFSSL_STUB("BN_mask_bits");
-    return SSL_FAILURE;
-}
-#endif
-
-
-/* WOLFSSL_SUCCESS on ok */
-int wolfSSL_BN_rand(WOLFSSL_BIGNUM* bn, int bits, int top, int bottom)
-{
-    int           ret    = 0;
-    int           len    = bits / 8;
-    int           initTmpRng = 0;
-    WC_RNG*       rng    = NULL;
-#ifdef WOLFSSL_SMALL_STACK
-    WC_RNG*       tmpRNG = NULL;
-    byte*         buff   = NULL;
-#else
-    WC_RNG        tmpRNG[1];
-    byte          buff[1024];
-#endif
-
-    (void)top;
-    (void)bottom;
-    WOLFSSL_MSG("wolfSSL_BN_rand");
-
-    if (bits % 8)
-        len++;
-
-#ifdef WOLFSSL_SMALL_STACK
-    buff   = (byte*)XMALLOC(1024,        NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    tmpRNG = (WC_RNG*) XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
-    if (buff == NULL || tmpRNG == NULL) {
-        XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
-        return ret;
-    }
-#endif
-
-    if (bn == NULL || bn->internal == NULL)
-        WOLFSSL_MSG("Bad function arguments");
-    else if (wc_InitRng(tmpRNG) == 0) {
-        rng = tmpRNG;
-        initTmpRng = 1;
-    }
-    else if (initGlobalRNG)
-        rng = &globalRNG;
-
-    if (rng) {
-        if (wc_RNG_GenerateBlock(rng, buff, len) != 0)
-            WOLFSSL_MSG("Bad wc_RNG_GenerateBlock");
-        else {
-            buff[0]     |= 0x80 | 0x40;
-            buff[len-1] |= 0x01;
-
-            if (mp_read_unsigned_bin((mp_int*)bn->internal,buff,len) != MP_OKAY)
-                WOLFSSL_MSG("mp read bin failed");
-            else
-                ret = WOLFSSL_SUCCESS;
-        }
-    }
-
-    if (initTmpRng)
-        wc_FreeRng(tmpRNG);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
-#endif
-
-    return ret;
-}
-
-
-/* WOLFSSL_SUCCESS on ok
- * code is same as wolfSSL_BN_rand except for how top and bottom is handled.
- * top -1 then leave most sig bit alone
- * top 0 then most sig is set to 1
- * top is 1 then first two most sig bits are 1
- *
- * bottom is hot then odd number */
-int wolfSSL_BN_pseudo_rand(WOLFSSL_BIGNUM* bn, int bits, int top, int bottom)
-{
-    int           ret    = 0;
-    int           len    = bits / 8;
-    int           initTmpRng = 0;
-    WC_RNG*       rng    = NULL;
-#ifdef WOLFSSL_SMALL_STACK
-    WC_RNG*       tmpRNG = NULL;
-    byte*         buff   = NULL;
-#else
-    WC_RNG        tmpRNG[1];
-    byte          buff[1024];
-#endif
-
-    WOLFSSL_MSG("wolfSSL_BN_rand");
-
-    if (bits % 8)
-        len++;
-
-#ifdef WOLFSSL_SMALL_STACK
-    buff   = (byte*)XMALLOC(1024,        NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    tmpRNG = (WC_RNG*) XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (buff == NULL || tmpRNG == NULL) {
-        XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return ret;
-    }
-#endif
-
-    if (bn == NULL || bn->internal == NULL)
-        WOLFSSL_MSG("Bad function arguments");
-    else if (wc_InitRng(tmpRNG) == 0) {
-        rng = tmpRNG;
-        initTmpRng = 1;
-    }
-    else if (initGlobalRNG)
-        rng = &globalRNG;
-
-    if (rng) {
-        if (wc_RNG_GenerateBlock(rng, buff, len) != 0)
-            WOLFSSL_MSG("Bad wc_RNG_GenerateBlock");
-        else {
-            switch (top) {
-                case -1:
+
+#ifdef OPENSSL_EXTRA
+
+WOLFSSL_ASN1_INTEGER* wolfSSL_BN_to_ASN1_INTEGER(const WOLFSSL_BIGNUM *bn, WOLFSSL_ASN1_INTEGER *ai)
+{
+    WOLFSSL_ASN1_INTEGER* a;
+    int len;
+    WOLFSSL_ENTER("wolfSSL_BN_to_ASN1_INTEGER");
+
+    if (ai == NULL) {
+        a = wolfSSL_ASN1_INTEGER_new();
+
+        if (a == NULL)
+            return NULL;
+
+        a->type = V_ASN1_INTEGER;
+    }
+    else {
+        a = ai;
+    }
+    if (a) {
+        if (wolfSSL_BN_is_negative(bn) && !wolfSSL_BN_is_zero(bn)) {
+            a->type |= V_ASN1_NEG_INTEGER;
+            a->negative = 1;
+        }
+
+        len = wolfSSL_BN_num_bytes(bn);
+        if (len == 0)
+            len = 1;
+
+        /* allocate buffer */
+        if (len > (int)sizeof(a->intData)) {
+            /* create new data buffer and copy over */
+            a->data = (byte*)XMALLOC(len, NULL, DYNAMIC_TYPE_OPENSSL);
+            if (a->data == NULL) {
+                if (a != ai)
+                    wolfSSL_ASN1_INTEGER_free(a);
+                return NULL;
+            }
+            a->isDynamic = 1;
+        }
+        else {
+            XMEMSET(a->intData, 0, sizeof(a->intData));
+            a->data = a->intData;
+        }
+        a->length = len;
+
+        /* populate data */
+        if (wolfSSL_BN_is_zero(bn)) {
+            a->data[0] = 0;
+        }
+        else {
+            len = wolfSSL_BN_bn2bin(bn, a->data);
+        }
+        a->length = len;
+    }
+
+    return a;
+}
+
+#ifdef OPENSSL_ALL
+void *wolfSSL_ASN1_item_new(const WOLFSSL_ASN1_ITEM *template)
+{
+    void *ret = NULL;
+    const WOLFSSL_ASN1_TEMPLATE *member = NULL;
+    size_t i;
+    WOLFSSL_ENTER("wolfSSL_ASN1_item_new");
+    if (!template) {
+        return NULL;
+    }
+    if (!(ret = XMALLOC(template->size, NULL, DYNAMIC_TYPE_OPENSSL))) {
+        return NULL;
+    }
+    XMEMSET(ret, 0, template->size);
+    for (member = template->members, i = 0; i < template->mcount;
+            member++, i++) {
+        switch (member->type) {
+            case WOLFSSL_X509_ALGOR_ASN1:
+            {
+                WOLFSSL_X509_ALGOR* algor = wolfSSL_X509_ALGOR_new();
+                if (!algor) {
+                    goto error;
+                }
+                *(WOLFSSL_X509_ALGOR**)(((byte*)ret) + member->offset) = algor;
+                break;
+            }
+            case WOLFSSL_ASN1_BIT_STRING_ASN1:
+            {
+                WOLFSSL_ASN1_BIT_STRING* bit_str = wolfSSL_ASN1_BIT_STRING_new();
+                if (!bit_str) {
+                    goto error;
+                }
+                *(WOLFSSL_ASN1_BIT_STRING**)(((byte*)ret) + member->offset) = bit_str;
+                break;
+            }
+            default:
+                WOLFSSL_MSG("Type not supported in wolfSSL_ASN1_item_new");
+                goto error;
+        }
+    }
+    return ret;
+error:
+    wolfSSL_ASN1_item_free(ret, template);
+    return NULL;
+}
+
+void wolfSSL_ASN1_item_free(void *val, const WOLFSSL_ASN1_ITEM *template)
+{
+    const WOLFSSL_ASN1_TEMPLATE *member = NULL;
+    size_t i;
+    WOLFSSL_ENTER("wolfSSL_ASN1_item_free");
+    if (val) {
+        for (member = template->members, i = 0; i < template->mcount;
+                member++, i++) {
+            switch (member->type) {
+                case WOLFSSL_X509_ALGOR_ASN1:
+                {
+                    WOLFSSL_X509_ALGOR* algor = *(WOLFSSL_X509_ALGOR**)
+                                                 (((byte*)val) + member->offset);
+                    if (algor) {
+                        wolfSSL_X509_ALGOR_free(algor);
+                    }
                     break;
-
-                case 0:
-                    buff[0] |= 0x80;
-                    break;
-
-                case 1:
-                    buff[0] |= 0x80 | 0x40;
+                }
+                case WOLFSSL_ASN1_BIT_STRING_ASN1:
+                {
+                    WOLFSSL_ASN1_BIT_STRING* bit_str = *(WOLFSSL_ASN1_BIT_STRING**)
+                                                        (((byte*)val) + member->offset);
+                    if (bit_str) {
+                        wolfSSL_ASN1_BIT_STRING_free(bit_str);
+                    }
                     break;
-            }
-
-            if (bottom == 1) {
-                buff[len-1] |= 0x01;
-            }
-
-            if (mp_read_unsigned_bin((mp_int*)bn->internal,buff,len) != MP_OKAY)
-                WOLFSSL_MSG("mp read bin failed");
-            else
-                ret = WOLFSSL_SUCCESS;
-        }
-    }
-
-    if (initTmpRng)
-        wc_FreeRng(tmpRNG);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
-/* return code compliant with OpenSSL :
- *   1 if bit set, 0 else
- */
-int wolfSSL_BN_is_bit_set(const WOLFSSL_BIGNUM* bn, int n)
-{
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (n > DIGIT_BIT) {
-        WOLFSSL_MSG("input bit count too large");
-        return WOLFSSL_FAILURE;
-    }
-
-    return mp_is_bit_set((mp_int*)bn->internal, (mp_digit)n);
-}
-
-/* return code compliant with OpenSSL :
- *   1 if success, 0 else
- */
-int wolfSSL_BN_set_bit(WOLFSSL_BIGNUM* bn, int n)
-{
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (mp_set_bit((mp_int*)bn->internal, n) != MP_OKAY) {
-        WOLFSSL_MSG("mp_set_int error");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-
-/* WOLFSSL_SUCCESS on ok */
-/* Note on use: this function expects str to be an even length. It is
- * converting pairs of bytes into 8-bit values. As an example, the RSA
- * public exponent is commonly 0x010001. To get it to convert, you need
- * to pass in the string "010001", it will fail if you use "10001". This
- * is an affect of how Base16_Decode() works.
- */
-int wolfSSL_BN_hex2bn(WOLFSSL_BIGNUM** bn, const char* str)
-{
-    int     ret     = 0;
-    word32  decSz   = 1024;
-#ifdef WOLFSSL_SMALL_STACK
-    byte*   decoded = NULL;
-#else
-    byte    decoded[1024];
-#endif
-    int     weOwn = 0;
-
-    WOLFSSL_MSG("wolfSSL_BN_hex2bn");
-
-#ifdef WOLFSSL_SMALL_STACK
-    decoded = (byte*)XMALLOC(decSz, NULL, DYNAMIC_TYPE_DER);
-    if (decoded == NULL)
-        return ret;
-#endif
-
-    if (str == NULL || str[0] == '\0')
-        WOLFSSL_MSG("Bad function argument");
-    else if (Base16_Decode((byte*)str, (int)XSTRLEN(str), decoded, &decSz) < 0)
-        WOLFSSL_MSG("Bad Base16_Decode error");
-    else if (bn == NULL)
-        ret = decSz;
-    else {
-        if (*bn == NULL) {
-            *bn = wolfSSL_BN_new();
-            if (*bn != NULL) {
-                weOwn = 1;
-            }
-        }
-
-        if (*bn == NULL)
-            WOLFSSL_MSG("BN new failed");
-        else if (wolfSSL_BN_bin2bn(decoded, decSz, *bn) == NULL) {
-            WOLFSSL_MSG("Bad bin2bn error");
-            if (weOwn == 1) {
-                wolfSSL_BN_free(*bn); /* Free new BN */
-            }
-        }
-        else
-            ret = WOLFSSL_SUCCESS;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(decoded, NULL, DYNAMIC_TYPE_DER);
-#endif
-
-    return ret;
-}
-
-
-WOLFSSL_BIGNUM* wolfSSL_BN_dup(const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_BIGNUM* ret;
-
-    WOLFSSL_MSG("wolfSSL_BN_dup");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return NULL;
-    }
-
-    ret = wolfSSL_BN_new();
-    if (ret == NULL) {
-        WOLFSSL_MSG("bn new error");
-        return NULL;
-    }
-
-    if (mp_copy((mp_int*)bn->internal, (mp_int*)ret->internal) != MP_OKAY) {
-        WOLFSSL_MSG("mp_copy error");
-        wolfSSL_BN_free(ret);
-        return NULL;
-    }
-
-    ret->neg = bn->neg;
-
-    return ret;
-}
-
-
-WOLFSSL_BIGNUM* wolfSSL_BN_copy(WOLFSSL_BIGNUM* r, const WOLFSSL_BIGNUM* bn)
-{
-    WOLFSSL_MSG("wolfSSL_BN_copy");
-
-    if (r == NULL || bn == NULL) {
-        WOLFSSL_MSG("r or bn NULL error");
-        return NULL;
-    }
-
-    if (mp_copy((mp_int*)bn->internal, (mp_int*)r->internal) != MP_OKAY) {
-        WOLFSSL_MSG("mp_copy error");
-        return NULL;
-    }
-
-    r->neg = bn->neg;
-
-    return r;
-}
-
-/* return code compliant with OpenSSL :
- *   1 if success, 0 else
- */
-int wolfSSL_BN_set_word(WOLFSSL_BIGNUM* bn, WOLFSSL_BN_ULONG w)
-{
-    WOLFSSL_MSG("wolfSSL_BN_set_word");
-
-    if (bn == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (mp_set_int((mp_int*)bn->internal, w) != MP_OKAY) {
-        WOLFSSL_MSG("mp_init_set_int error");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-
-/* Returns the big number as an unsigned long if possible.
- *
- * bn  big number structure to get value from
- *
- * Returns value or 0xFFFFFFFFL if bigger than unsigned long.
- */
-unsigned long wolfSSL_BN_get_word(const WOLFSSL_BIGNUM* bn)
-{
-    mp_int* mp;
-
-    WOLFSSL_MSG("wolfSSL_BN_get_word");
-
-    if (bn == NULL) {
-        WOLFSSL_MSG("Invalid argument");
-        return 0;
-    }
-
-    if (wolfSSL_BN_num_bytes(bn) > (int)sizeof(unsigned long)) {
-        WOLFSSL_MSG("bignum is larger than unsigned long");
-        return 0xFFFFFFFFL;
-    }
-    mp = (mp_int*)bn->internal;
-
-    return (unsigned long)(mp->dp[0]);
-}
-
-/* return code compliant with OpenSSL :
- *   number length in decimal if success, 0 if error
- */
-#ifndef NO_WOLFSSL_STUB
-int wolfSSL_BN_dec2bn(WOLFSSL_BIGNUM** bn, const char* str)
-{
-    (void)bn;
-    (void)str;
-
-    WOLFSSL_MSG("wolfSSL_BN_dec2bn");
-    WOLFSSL_STUB("BN_dec2bn");
-    return SSL_FAILURE;
-}
-#endif
-
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
-char *wolfSSL_BN_bn2dec(const WOLFSSL_BIGNUM *bn)
+                }
+                default:
+                    WOLFSSL_MSG("Type not supported in wolfSSL_ASN1_item_free");
+            }
+        }
+        XFREE(val, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+#define bufLenOrNull(buf, len) (buf ? buf + len : NULL)
+
+static int i2dProcessMembers(const void *src, byte *buf,
+                          const WOLFSSL_ASN1_TEMPLATE *members, size_t mcount)
+{
+    const WOLFSSL_ASN1_TEMPLATE *member = NULL;
+    int len = 0, ret;
+    size_t i;
+    WOLFSSL_ENTER("processMembers");
+    for (member = members, i = 0; i < mcount; member++, i++) {
+        switch (member->type) {
+            case WOLFSSL_X509_ALGOR_ASN1:
+            {
+                word32 oid = 0;
+                word32 idx = 0;
+                const WOLFSSL_X509_ALGOR* algor = *(const WOLFSSL_X509_ALGOR**)
+                                                   (((byte*)src) + member->offset);
+                if (!algor->algorithm) {
+                    WOLFSSL_LEAVE("processMembers", WOLFSSL_FAILURE);
+                    return WOLFSSL_FAILURE;
+                }
+
+                if (GetObjectId(algor->algorithm->obj, &idx, &oid,
+                        algor->algorithm->grp, algor->algorithm->objSz) < 0) {
+                    WOLFSSL_MSG("Issue getting OID of object");
+                    return -1;
+                }
+
+                ret = SetAlgoID(oid, bufLenOrNull(buf, len),
+                                algor->algorithm->grp, 0);
+                if (!ret) {
+                    return WOLFSSL_FAILURE;
+                }
+                len += ret;
+                break;
+            }
+            case WOLFSSL_ASN1_BIT_STRING_ASN1:
+            {
+                const WOLFSSL_ASN1_BIT_STRING* bit_str;
+                bit_str = *(const WOLFSSL_ASN1_BIT_STRING**)
+                           (((byte*)src) + member->offset);
+                len += SetBitString(bit_str->length, 0, bufLenOrNull(buf, len));
+                if (buf && bit_str->data) {
+                    XMEMCPY(buf + len, bit_str->data, bit_str->length);
+                }
+                len += bit_str->length;
+                break;
+            }
+            default:
+                WOLFSSL_MSG("Type not support in processMembers");
+                WOLFSSL_LEAVE("processMembers", WOLFSSL_FAILURE);
+                return WOLFSSL_FAILURE;
+        }
+    }
+    WOLFSSL_LEAVE("processMembers", len);
+    return len;
+}
+
+int wolfSSL_ASN1_item_i2d(const void *src, byte **dest,
+                          const WOLFSSL_ASN1_ITEM *template)
 {
     int len = 0;
-    char *buf;
-
-    WOLFSSL_MSG("wolfSSL_BN_bn2dec");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return NULL;
-    }
-
-    if (mp_radix_size((mp_int*)bn->internal, MP_RADIX_DEC, &len) != MP_OKAY) {
-        WOLFSSL_MSG("mp_radix_size failure");
-        return NULL;
-    }
-
-    buf = (char*) XMALLOC(len, NULL, DYNAMIC_TYPE_OPENSSL);
-    if (buf == NULL) {
-        WOLFSSL_MSG("BN_bn2dec malloc buffer failure");
-        return NULL;
-    }
-
-    if (mp_todecimal((mp_int*)bn->internal, buf) != MP_OKAY) {
-        XFREE(buf, NULL, DYNAMIC_TYPE_ECC);
-        return NULL;
-    }
-
-    return buf;
-}
-#else
-char* wolfSSL_BN_bn2dec(const WOLFSSL_BIGNUM* bn)
-{
-    (void)bn;
-
-    WOLFSSL_MSG("wolfSSL_BN_bn2dec");
-
-    return NULL;
-}
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
-
-/* return code compliant with OpenSSL :
- *   1 if success, 0 else
- */
-int wolfSSL_BN_lshift(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *bn, int n)
-{
-    WOLFSSL_MSG("wolfSSL_BN_lshift");
-
-    if (r == NULL || r->internal == NULL || bn == NULL || bn->internal == NULL){
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (mp_mul_2d((mp_int*)bn->internal, n, (mp_int*)r->internal) != MP_OKAY) {
-        WOLFSSL_MSG("mp_mul_2d error");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-/* return code compliant with OpenSSL :
- *   1 if success, 0 else
- */
-int wolfSSL_BN_rshift(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *bn, int n)
-{
-    WOLFSSL_MSG("wolfSSL_BN_rshift");
-
-    if (r == NULL || r->internal == NULL || bn == NULL || bn->internal == NULL){
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (mp_div_2d((mp_int*)bn->internal, n,
-                  (mp_int*)r->internal, NULL) != MP_OKAY) {
-        WOLFSSL_MSG("mp_mul_2d error");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-/* return code compliant with OpenSSL :
- *   1 if success, 0 else
- */
-int wolfSSL_BN_add_word(WOLFSSL_BIGNUM *bn, WOLFSSL_BN_ULONG w)
-{
-    WOLFSSL_MSG("wolfSSL_BN_add_word");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (mp_add_d((mp_int*)bn->internal, w, (mp_int*)bn->internal) != MP_OKAY) {
-        WOLFSSL_MSG("mp_add_d error");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-/* return code compliant with OpenSSL :
- *   1 if success, 0 else
- */
-int wolfSSL_BN_add(WOLFSSL_BIGNUM *r, WOLFSSL_BIGNUM *a, WOLFSSL_BIGNUM *b)
-{
-    WOLFSSL_MSG("wolfSSL_BN_add");
-
-    if (r == NULL || r->internal == NULL || a == NULL || a->internal == NULL ||
-        b == NULL || b->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (mp_add((mp_int*)a->internal, (mp_int*)b->internal,
-               (mp_int*)r->internal) != MP_OKAY) {
-        WOLFSSL_MSG("mp_add_d error");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-#ifdef WOLFSSL_KEY_GEN
-
-/* return code compliant with OpenSSL :
- *   1 if prime, 0 if not, -1 if error
- */
-int wolfSSL_BN_is_prime_ex(const WOLFSSL_BIGNUM *bn, int nbchecks,
-                           WOLFSSL_BN_CTX *ctx, WOLFSSL_BN_GENCB *cb)
-{
-    int res;
-
-    (void)ctx;
-    (void)cb;
-
-    WOLFSSL_MSG("wolfSSL_BN_is_prime_ex");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (mp_prime_is_prime((mp_int*)bn->internal, nbchecks, &res) != MP_OKAY) {
-        WOLFSSL_MSG("mp_prime_is_prime error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (res != MP_YES) {
-        WOLFSSL_MSG("mp_prime_is_prime not prime");
-        return WOLFSSL_FAILURE;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-/* return code compliant with OpenSSL :
- *   (bn mod w) if success, -1 if error
- */
-WOLFSSL_BN_ULONG wolfSSL_BN_mod_word(const WOLFSSL_BIGNUM *bn,
-                                     WOLFSSL_BN_ULONG w)
-{
-    WOLFSSL_BN_ULONG ret = 0;
-
-    WOLFSSL_MSG("wolfSSL_BN_mod_word");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return (WOLFSSL_BN_ULONG)WOLFSSL_FATAL_ERROR;
-    }
-
-    if (mp_mod_d((mp_int*)bn->internal, w, &ret) != MP_OKAY) {
-        WOLFSSL_MSG("mp_add_d error");
-        return (WOLFSSL_BN_ULONG)WOLFSSL_FATAL_ERROR;
-    }
-
-    return ret;
-}
-#endif /* #ifdef WOLFSSL_KEY_GEN */
-
-char *wolfSSL_BN_bn2hex(const WOLFSSL_BIGNUM *bn)
-{
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(DEBUG_WOLFSSL)
-    int len = 0;
-    char *buf;
-
-    WOLFSSL_ENTER("wolfSSL_BN_bn2hex");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return NULL;
-    }
-
-    if (mp_radix_size((mp_int*)bn->internal, MP_RADIX_HEX, &len) != MP_OKAY) {
-        WOLFSSL_MSG("mp_radix_size failure");
-        return NULL;
-    }
-
-    buf = (char*) XMALLOC(len, NULL, DYNAMIC_TYPE_ECC);
-    if (buf == NULL) {
-        WOLFSSL_MSG("BN_bn2hex malloc buffer failure");
-        return NULL;
-    }
-
-    if (mp_tohex((mp_int*)bn->internal, buf) != MP_OKAY) {
-        XFREE(buf, NULL, DYNAMIC_TYPE_ECC);
-        return NULL;
-    }
-
-    return buf;
-#else
-    (void)bn;
-    WOLFSSL_MSG("wolfSSL_BN_bn2hex not compiled in");
-    return (char*)"";
-#endif
-}
-
-#ifndef NO_FILESYSTEM
-/* return code compliant with OpenSSL :
- *   1 if success, 0 if error
- */
-int wolfSSL_BN_print_fp(XFILE fp, const WOLFSSL_BIGNUM *bn)
-{
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(DEBUG_WOLFSSL)
-    char *buf;
-
-    WOLFSSL_ENTER("wolfSSL_BN_print_fp");
-
-    if (fp == NULL || bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FAILURE;
-    }
-
-    buf = wolfSSL_BN_bn2hex(bn);
-    if (buf == NULL) {
-        WOLFSSL_MSG("wolfSSL_BN_bn2hex failure");
-        return WOLFSSL_FAILURE;
-    }
-
-    fprintf(fp, "%s", buf);
-    XFREE(buf, NULL, DYNAMIC_TYPE_ECC);
-
-    return WOLFSSL_SUCCESS;
-#else
-    (void)fp;
-    (void)bn;
-
-    WOLFSSL_MSG("wolfSSL_BN_print_fp not compiled in");
-
-    return WOLFSSL_SUCCESS;
-#endif
-}
-#endif /* !NO_FILESYSTEM */
-
-
-WOLFSSL_BIGNUM *wolfSSL_BN_CTX_get(WOLFSSL_BN_CTX *ctx)
-{
-    /* ctx is not used, return new Bignum */
-    (void)ctx;
-
-    WOLFSSL_ENTER("wolfSSL_BN_CTX_get");
-
-    return wolfSSL_BN_new();
-}
-
-#ifndef NO_WOLFSSL_STUB
-void wolfSSL_BN_CTX_start(WOLFSSL_BN_CTX *ctx)
-{
-    (void)ctx;
-
-    WOLFSSL_ENTER("wolfSSL_BN_CTX_start");
-    WOLFSSL_STUB("BN_CTX_start");
-    WOLFSSL_MSG("wolfSSL_BN_CTX_start TBD");
-}
-#endif
-
-
-WOLFSSL_BIGNUM *wolfSSL_BN_mod_inverse(WOLFSSL_BIGNUM *r,
-                                       WOLFSSL_BIGNUM *a,
-                                       const WOLFSSL_BIGNUM *n,
-                                       WOLFSSL_BN_CTX *ctx)
-{
-    int dynamic = 0;
-
-    /* ctx is not used */
-    (void)ctx;
-
-    WOLFSSL_ENTER("wolfSSL_BN_mod_inverse");
-
-    /* check parameter */
-    if (r == NULL) {
-        r = wolfSSL_BN_new();
-        if (r == NULL){
-            WOLFSSL_MSG("WolfSSL_BN_new() failed");
-            return NULL;
-        }
-        dynamic = 1;
-    }
-
-    if (a == NULL) {
-        WOLFSSL_MSG("a NULL error");
-        if (dynamic == 1) {
-            wolfSSL_BN_free(r);
-        }
-        return NULL;
-    }
-
-    if (n == NULL) {
-        WOLFSSL_MSG("n NULL error");
-        if (dynamic == 1) {
-            wolfSSL_BN_free(r);
-        }
-        return NULL;
-    }
-
-    /* Compute inverse of a modulo n and return r */
-    if (mp_invmod((mp_int *)a->internal,(mp_int *)n->internal,
-                  (mp_int*)r->internal) == MP_VAL){
-        WOLFSSL_MSG("mp_invmod() error");
-        if (dynamic == 1) {
-            wolfSSL_BN_free(r);
-        }
-        return NULL;
-    }
-
-    return  r;
-}
+    byte *buf = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_ASN1_item_i2d");
+
+    if (!src || !template) {
+        WOLFSSL_LEAVE("wolfSSL_ASN1_item_i2d", WOLFSSL_FAILURE);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (dest && !*dest) {
+        len = wolfSSL_ASN1_item_i2d(src, NULL, template);
+        if (!len) {
+            goto error;
+        }
+        buf = (byte*)XMALLOC(len, NULL, DYNAMIC_TYPE_ASN1);
+        if (!buf) {
+            goto error;
+        }
+        len = 0;
+    }
+
+    switch (template->type) {
+        case ASN_SEQUENCE:
+        {
+            int seq_len = i2dProcessMembers(src, NULL, template->members,
+                                         template->mcount);
+            if (!seq_len) {
+                goto error;
+            }
+            len += SetSequence(seq_len, bufLenOrNull(buf, len));
+            if (buf &&
+                    i2dProcessMembers(src, bufLenOrNull(buf, len), template->members,
+                                   template->mcount) != seq_len) {
+                WOLFSSL_MSG("Inconsistent sequence length");
+                goto error;
+            }
+            len += seq_len;
+            break;
+        }
+        default:
+            WOLFSSL_MSG("Type not supported in wolfSSL_ASN1_item_i2d");
+            goto error;
+    }
+
+    if (dest && !*dest) {
+        *dest = buf;
+    }
+    else if (dest && *dest && buf) {
+        /* *dest length is not checked because the user is responsible
+         * for providing a long enough buffer */
+        XMEMCPY(*dest, buf, len);
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_ASN1_item_i2d", len);
+    return len;
+error:
+    if (buf) {
+        XFREE(buf, NULL, DYNAMIC_TYPE_ASN1);
+    }
+    WOLFSSL_LEAVE("wolfSSL_ASN1_item_i2d", WOLFSSL_FAILURE);
+    return WOLFSSL_FAILURE;
+}
+#endif /* OPENSSL_ALL */
 
 #ifndef NO_DH
 
 static void InitwolfSSL_DH(WOLFSSL_DH* dh)
 {
     if (dh) {
-        dh->p        = NULL;
-        dh->g        = NULL;
-        dh->q        = NULL;
-        dh->pub_key  = NULL;
-        dh->priv_key = NULL;
-        dh->internal = NULL;
-        dh->inSet    = 0;
-        dh->exSet    = 0;
+        XMEMSET(dh, 0, sizeof(WOLFSSL_DH));
     }
 }
 
@@ -22931,18 +28473,29 @@
     }
 }
 
-
-static int SetDhInternal(WOLFSSL_DH* dh)
+int SetDhInternal(WOLFSSL_DH* dh)
 {
     int            ret = WOLFSSL_FATAL_ERROR;
     int            pSz = 1024;
     int            gSz = 1024;
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    int            privSz = 256; /* Up to 2048-bit */
+    int            pubSz  = 256;
+#endif
 #ifdef WOLFSSL_SMALL_STACK
     unsigned char* p   = NULL;
     unsigned char* g   = NULL;
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        unsigned char* priv_key = NULL;
+        unsigned char* pub_key = NULL;
+    #endif
 #else
     unsigned char  p[1024];
     unsigned char  g[1024];
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        unsigned char priv_key[256];
+        unsigned char pub_key[256];
+    #endif
 #endif
 
     WOLFSSL_ENTER("SetDhInternal");
@@ -22953,17 +28506,46 @@
         WOLFSSL_MSG("Bad p internal size");
     else if (wolfSSL_BN_bn2bin(dh->g, NULL) > gSz)
         WOLFSSL_MSG("Bad g internal size");
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    else if (wolfSSL_BN_bn2bin(dh->priv_key, NULL) > privSz)
+        WOLFSSL_MSG("Bad private key internal size");
+    else if (wolfSSL_BN_bn2bin(dh->pub_key, NULL) > privSz)
+        WOLFSSL_MSG("Bad public key internal size");
+    #endif
     else {
     #ifdef WOLFSSL_SMALL_STACK
         p = (unsigned char*)XMALLOC(pSz, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
         g = (unsigned char*)XMALLOC(gSz, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
 
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        priv_key = (unsigned char*)XMALLOC(privSz,NULL,DYNAMIC_TYPE_PRIVATE_KEY);
+        pub_key  = (unsigned char*)XMALLOC(pubSz,NULL,DYNAMIC_TYPE_PUBLIC_KEY);
+    #endif
+
         if (p == NULL || g == NULL) {
             XFREE(p, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
             XFREE(g, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
             return ret;
         }
-    #endif
+    #endif /* WOLFSSL_SMALL_STACK */
+
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        privSz = wolfSSL_BN_bn2bin(dh->priv_key, priv_key);
+        pubSz  = wolfSSL_BN_bn2bin(dh->pub_key,  pub_key);
+        if (privSz <= 0) {
+            WOLFSSL_MSG("No private key size.");
+        }
+        if (pubSz <= 0) {
+            WOLFSSL_MSG("No public key size.");
+        }
+        if (privSz > 0 || pubSz > 0) {
+            ret = wc_DhSetFullKeys((DhKey*)dh->internal,priv_key,privSz,
+                                    pub_key,pubSz);
+            if (ret == WOLFSSL_FAILURE) {
+                WOLFSSL_MSG("Failed setting private or public key.");
+            }
+        }
+    #endif /* WOLFSSL_QT || OPENSSL_ALL */
 
         pSz = wolfSSL_BN_bn2bin(dh->p, p);
         gSz = wolfSSL_BN_bn2bin(dh->g, g);
@@ -22980,12 +28562,57 @@
     #ifdef WOLFSSL_SMALL_STACK
         XFREE(p, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
         XFREE(g, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
-    #endif
-    }
-
-
-    return ret;
-}
+        #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+            XFREE(priv_key, NULL, DYNAMIC_TYPE_PRIVATE_KEY);
+            XFREE(pub_key,  NULL, DYNAMIC_TYPE_PUBLIC_KEY);
+        #endif
+    #endif
+    }
+
+
+    return ret;
+}
+
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL) || defined(WOLFSSL_OPENSSH))
+/* Set the members of DhKey into WOLFSSL_DH
+ * DhKey was populated from wc_DhKeyDecode
+ */
+int SetDhExternal(WOLFSSL_DH *dh)
+{
+    DhKey *key;
+    WOLFSSL_MSG("Entering SetDhExternal");
+
+    if (dh == NULL || dh->internal == NULL) {
+        WOLFSSL_MSG("dh key NULL error");
+    }
+
+    key = (DhKey*)dh->internal;
+
+    if (SetIndividualExternal(&dh->p, &key->p) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("dh param p error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetIndividualExternal(&dh->g, &key->g) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("dh param g error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetIndividualExternal(&dh->priv_key, &key->priv) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("No DH Private Key");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetIndividualExternal(&dh->pub_key, &key->pub) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("No DH Public Key");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    dh->exSet = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_DH && (WOLFSSL_QT || OPENSSL_ALL) */
 
 /* return code compliant with OpenSSL :
  *   DH prime size in bytes if success, 0 if error
@@ -23000,6 +28627,63 @@
     return wolfSSL_BN_num_bytes(dh->p);
 }
 
+/* This sets a big number with the 768-bit prime from RFC 2409.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_768_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A63A3620FFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_768_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 768 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
+
+/* This sets a big number with the 1024-bit prime from RFC 2409.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_1024_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A637ED6B0BFF5CB6F406B7ED"
+        "EE386BFB5A899FA5AE9F24117C4B1FE6"
+        "49286651ECE65381FFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_1024_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 1024 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
 
 /* This sets a big number with the 1536-bit prime from RFC 3526.
  *
@@ -23035,6 +28719,300 @@
     return bn;
 }
 
+/* This sets a big number with the 2048-bit prime from RFC 3526.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_2048_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A637ED6B0BFF5CB6F406B7ED"
+        "EE386BFB5A899FA5AE9F24117C4B1FE6"
+        "49286651ECE45B3DC2007CB8A163BF05"
+        "98DA48361C55D39A69163FA8FD24CF5F"
+        "83655D23DCA3AD961C62F356208552BB"
+        "9ED529077096966D670C354E4ABC9804"
+        "F1746C08CA18217C32905E462E36CE3B"
+        "E39E772C180E86039B2783A2EC07A28F"
+        "B5C55DF06F4C52C9DE2BCBF695581718"
+        "3995497CEA956AE515D2261898FA0510"
+        "15728E5A8AACAA68FFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_2048_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 2048 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
+
+/* This sets a big number with the 3072-bit prime from RFC 3526.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_3072_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A637ED6B0BFF5CB6F406B7ED"
+        "EE386BFB5A899FA5AE9F24117C4B1FE6"
+        "49286651ECE45B3DC2007CB8A163BF05"
+        "98DA48361C55D39A69163FA8FD24CF5F"
+        "83655D23DCA3AD961C62F356208552BB"
+        "9ED529077096966D670C354E4ABC9804"
+        "F1746C08CA18217C32905E462E36CE3B"
+        "E39E772C180E86039B2783A2EC07A28F"
+        "B5C55DF06F4C52C9DE2BCBF695581718"
+        "3995497CEA956AE515D2261898FA0510"
+        "15728E5A8AAAC42DAD33170D04507A33"
+        "A85521ABDF1CBA64ECFB850458DBEF0A"
+        "8AEA71575D060C7DB3970F85A6E1E4C7"
+        "ABF5AE8CDB0933D71E8C94E04A25619D"
+        "CEE3D2261AD2EE6BF12FFA06D98A0864"
+        "D87602733EC86A64521F2B18177B200C"
+        "BBE117577A615D6C770988C0BAD946E2"
+        "08E24FA074E5AB3143DB5BFCE0FD108E"
+        "4B82D120A93AD2CAFFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_3072_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 3072 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
+
+/* This sets a big number with the 4096-bit prime from RFC 3526.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_4096_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A637ED6B0BFF5CB6F406B7ED"
+        "EE386BFB5A899FA5AE9F24117C4B1FE6"
+        "49286651ECE45B3DC2007CB8A163BF05"
+        "98DA48361C55D39A69163FA8FD24CF5F"
+        "83655D23DCA3AD961C62F356208552BB"
+        "9ED529077096966D670C354E4ABC9804"
+        "F1746C08CA18217C32905E462E36CE3B"
+        "E39E772C180E86039B2783A2EC07A28F"
+        "B5C55DF06F4C52C9DE2BCBF695581718"
+        "3995497CEA956AE515D2261898FA0510"
+        "15728E5A8AAAC42DAD33170D04507A33"
+        "A85521ABDF1CBA64ECFB850458DBEF0A"
+        "8AEA71575D060C7DB3970F85A6E1E4C7"
+        "ABF5AE8CDB0933D71E8C94E04A25619D"
+        "CEE3D2261AD2EE6BF12FFA06D98A0864"
+        "D87602733EC86A64521F2B18177B200C"
+        "BBE117577A615D6C770988C0BAD946E2"
+        "08E24FA074E5AB3143DB5BFCE0FD108E"
+        "4B82D120A92108011A723C12A787E6D7"
+        "88719A10BDBA5B2699C327186AF4E23C"
+        "1A946834B6150BDA2583E9CA2AD44CE8"
+        "DBBBC2DB04DE8EF92E8EFC141FBECAA6"
+        "287C59474E6BC05D99B2964FA090C3A2"
+        "233BA186515BE7ED1F612970CEE2D7AF"
+        "B81BDD762170481CD0069127D5B05AA9"
+        "93B4EA988D8FDDC186FFB7DC90A6C08F"
+        "4DF435C934063199FFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_4096_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 4096 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
+
+/* This sets a big number with the 6144-bit prime from RFC 3526.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_6144_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A637ED6B0BFF5CB6F406B7ED"
+        "EE386BFB5A899FA5AE9F24117C4B1FE6"
+        "49286651ECE45B3DC2007CB8A163BF05"
+        "98DA48361C55D39A69163FA8FD24CF5F"
+        "83655D23DCA3AD961C62F356208552BB"
+        "9ED529077096966D670C354E4ABC9804"
+        "F1746C08CA18217C32905E462E36CE3B"
+        "E39E772C180E86039B2783A2EC07A28F"
+        "B5C55DF06F4C52C9DE2BCBF695581718"
+        "3995497CEA956AE515D2261898FA0510"
+        "15728E5A8AAAC42DAD33170D04507A33"
+        "A85521ABDF1CBA64ECFB850458DBEF0A"
+        "8AEA71575D060C7DB3970F85A6E1E4C7"
+        "ABF5AE8CDB0933D71E8C94E04A25619D"
+        "CEE3D2261AD2EE6BF12FFA06D98A0864"
+        "D87602733EC86A64521F2B18177B200C"
+        "BBE117577A615D6C770988C0BAD946E2"
+        "08E24FA074E5AB3143DB5BFCE0FD108E"
+        "4B82D120A92108011A723C12A787E6D7"
+        "88719A10BDBA5B2699C327186AF4E23C"
+        "1A946834B6150BDA2583E9CA2AD44CE8"
+        "DBBBC2DB04DE8EF92E8EFC141FBECAA6"
+        "287C59474E6BC05D99B2964FA090C3A2"
+        "233BA186515BE7ED1F612970CEE2D7AF"
+        "B81BDD762170481CD0069127D5B05AA9"
+        "93B4EA988D8FDDC186FFB7DC90A6C08F"
+        "4DF435C93402849236C3FAB4D27C7026"
+        "C1D4DCB2602646DEC9751E763DBA37BD"
+        "F8FF9406AD9E530EE5DB382F413001AE"
+        "B06A53ED9027D831179727B0865A8918"
+        "DA3EDBEBCF9B14ED44CE6CBACED4BB1B"
+        "DB7F1447E6CC254B332051512BD7AF42"
+        "6FB8F401378CD2BF5983CA01C64B92EC"
+        "F032EA15D1721D03F482D7CE6E74FEF6"
+        "D55E702F46980C82B5A84031900B1C9E"
+        "59E7C97FBEC7E8F323A97A7E36CC88BE"
+        "0F1D45B7FF585AC54BD407B22B4154AA"
+        "CC8F6D7EBF48E1D814CC5ED20F8037E0"
+        "A79715EEF29BE32806A1D58BB7C5DA76"
+        "F550AA3D8A1FBFF0EB19CCB1A313D55C"
+        "DA56C9EC2EF29632387FE8D76E3C0468"
+        "043E8F663F4860EE12BF2D5B0B7474D6"
+        "E694F91E6DCC4024FFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_6144_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 6144 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
+
+
+/* This sets a big number with the 8192-bit prime from RFC 3526.
+ *
+ * bn  if not NULL then the big number structure is used. If NULL then a new
+ *     big number structure is created.
+ *
+ * Returns a WOLFSSL_BIGNUM structure on success and NULL with failure.
+ */
+WOLFSSL_BIGNUM* wolfSSL_DH_8192_prime(WOLFSSL_BIGNUM* bn)
+{
+    const char prm[] = {
+        "FFFFFFFFFFFFFFFFC90FDAA22168C234"
+        "C4C6628B80DC1CD129024E088A67CC74"
+        "020BBEA63B139B22514A08798E3404DD"
+        "EF9519B3CD3A431B302B0A6DF25F1437"
+        "4FE1356D6D51C245E485B576625E7EC6"
+        "F44C42E9A637ED6B0BFF5CB6F406B7ED"
+        "EE386BFB5A899FA5AE9F24117C4B1FE6"
+        "49286651ECE45B3DC2007CB8A163BF05"
+        "98DA48361C55D39A69163FA8FD24CF5F"
+        "83655D23DCA3AD961C62F356208552BB"
+        "9ED529077096966D670C354E4ABC9804"
+        "F1746C08CA18217C32905E462E36CE3B"
+        "E39E772C180E86039B2783A2EC07A28F"
+        "B5C55DF06F4C52C9DE2BCBF695581718"
+        "3995497CEA956AE515D2261898FA0510"
+        "15728E5A8AAAC42DAD33170D04507A33"
+        "A85521ABDF1CBA64ECFB850458DBEF0A"
+        "8AEA71575D060C7DB3970F85A6E1E4C7"
+        "ABF5AE8CDB0933D71E8C94E04A25619D"
+        "CEE3D2261AD2EE6BF12FFA06D98A0864"
+        "D87602733EC86A64521F2B18177B200C"
+        "BBE117577A615D6C770988C0BAD946E2"
+        "08E24FA074E5AB3143DB5BFCE0FD108E"
+        "4B82D120A92108011A723C12A787E6D7"
+        "88719A10BDBA5B2699C327186AF4E23C"
+        "1A946834B6150BDA2583E9CA2AD44CE8"
+        "DBBBC2DB04DE8EF92E8EFC141FBECAA6"
+        "287C59474E6BC05D99B2964FA090C3A2"
+        "233BA186515BE7ED1F612970CEE2D7AF"
+        "B81BDD762170481CD0069127D5B05AA9"
+        "93B4EA988D8FDDC186FFB7DC90A6C08F"
+        "4DF435C93402849236C3FAB4D27C7026"
+        "C1D4DCB2602646DEC9751E763DBA37BD"
+        "F8FF9406AD9E530EE5DB382F413001AE"
+        "B06A53ED9027D831179727B0865A8918"
+        "DA3EDBEBCF9B14ED44CE6CBACED4BB1B"
+        "DB7F1447E6CC254B332051512BD7AF42"
+        "6FB8F401378CD2BF5983CA01C64B92EC"
+        "F032EA15D1721D03F482D7CE6E74FEF6"
+        "D55E702F46980C82B5A84031900B1C9E"
+        "59E7C97FBEC7E8F323A97A7E36CC88BE"
+        "0F1D45B7FF585AC54BD407B22B4154AA"
+        "CC8F6D7EBF48E1D814CC5ED20F8037E0"
+        "A79715EEF29BE32806A1D58BB7C5DA76"
+        "F550AA3D8A1FBFF0EB19CCB1A313D55C"
+        "DA56C9EC2EF29632387FE8D76E3C0468"
+        "043E8F663F4860EE12BF2D5B0B7474D6"
+        "E694F91E6DBE115974A3926F12FEE5E4"
+        "38777CB6A932DF8CD8BEC4D073B931BA"
+        "3BC832B68D9DD300741FA7BF8AFC47ED"
+        "2576F6936BA424663AAB639C5AE4F568"
+        "3423B4742BF1C978238F16CBE39D652D"
+        "E3FDB8BEFC848AD922222E04A4037C07"
+        "13EB57A81A23F0C73473FC646CEA306B"
+        "4BCBC8862F8385DDFA9D4B7FA2C087E8"
+        "79683303ED5BDD3A062B3CF5B3A278A6"
+        "6D2A13F83F44F82DDF310EE074AB6A36"
+        "4597E899A0255DC164F31CC50846851D"
+        "F9AB48195DED7EA1B1D510BD7EE74D73"
+        "FAF36BC31ECFA268359046F4EB879F92"
+        "4009438B481C6CD7889A002ED5EE382B"
+        "C9190DA6FC026E479558E4475677E9AA"
+        "9E3050E2765694DFC81F56E880B96E71"
+        "60C980DD98EDD3DFFFFFFFFFFFFFFFFF"
+    };
+
+    WOLFSSL_ENTER("wolfSSL_DH_8192_prime");
+
+    if (wolfSSL_BN_hex2bn(&bn, prm) != SSL_SUCCESS) {
+        WOLFSSL_MSG("Error converting DH 8192 prime to big number");
+        return NULL;
+    }
+
+    return bn;
+}
 
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
@@ -23042,31 +29020,24 @@
 int wolfSSL_DH_generate_key(WOLFSSL_DH* dh)
 {
     int            ret    = WOLFSSL_FAILURE;
-    word32         pubSz  = 768;
-    word32         privSz = 768;
+    word32         pubSz  = 0;
+    word32         privSz = 0;
     int            initTmpRng = 0;
     WC_RNG*        rng    = NULL;
 #ifdef WOLFSSL_SMALL_STACK
+    WC_RNG*        tmpRNG;
+#else
+    WC_RNG         tmpRNG[1];
+#endif
     unsigned char* pub    = NULL;
     unsigned char* priv   = NULL;
-    WC_RNG*        tmpRNG = NULL;
-#else
-    unsigned char  pub [768];
-    unsigned char  priv[768];
-    WC_RNG         tmpRNG[1];
-#endif
 
     WOLFSSL_MSG("wolfSSL_DH_generate_key");
 
 #ifdef WOLFSSL_SMALL_STACK
     tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
-    pub    = (unsigned char*)XMALLOC(pubSz,   NULL, DYNAMIC_TYPE_PUBLIC_KEY);
-    priv   = (unsigned char*)XMALLOC(privSz,  NULL, DYNAMIC_TYPE_PRIVATE_KEY);
-
-    if (tmpRNG == NULL || pub == NULL || priv == NULL) {
+    if (tmpRNG == NULL) {
         XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
-        XFREE(pub,    NULL, DYNAMIC_TYPE_PUBLIC_KEY);
-        XFREE(priv,   NULL, DYNAMIC_TYPE_PRIVATE_KEY);
         return ret;
     }
 #endif
@@ -23088,10 +29059,21 @@
     }
 
     if (rng) {
-       if (wc_DhGenerateKeyPair((DhKey*)dh->internal, rng, priv, &privSz,
+        pubSz = wolfSSL_BN_num_bytes(dh->p);
+        if (dh->length) {
+            privSz = dh->length/8; /* to bytes */
+        } else {
+            privSz = pubSz;
+        }
+        pub   = (unsigned char*)XMALLOC(pubSz,  NULL, DYNAMIC_TYPE_PUBLIC_KEY);
+        priv  = (unsigned char*)XMALLOC(privSz, NULL, DYNAMIC_TYPE_PRIVATE_KEY);
+        if (pub == NULL || priv == NULL) {
+            WOLFSSL_MSG("Unable to malloc memory");
+        }
+        else if (wc_DhGenerateKeyPair((DhKey*)dh->internal, rng, priv, &privSz,
                                                                pub, &pubSz) < 0)
             WOLFSSL_MSG("Bad wc_DhGenerateKeyPair");
-       else {
+        else {
             if (dh->pub_key)
                 wolfSSL_BN_free(dh->pub_key);
 
@@ -23124,9 +29106,9 @@
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+#endif
     XFREE(pub,    NULL, DYNAMIC_TYPE_PUBLIC_KEY);
     XFREE(priv,   NULL, DYNAMIC_TYPE_PRIVATE_KEY);
-#endif
 
     return ret;
 }
@@ -23140,10 +29122,10 @@
 {
     int            ret    = WOLFSSL_FATAL_ERROR;
     word32         keySz  = 0;
-    word32         pubSz  = 1024;
-    word32         privSz = 1024;
-#ifdef WOLFSSL_SMALL_STACK
-    unsigned char* pub    = NULL;
+    int            pubSz  = 1024;
+    int            privSz = 1024;
+#ifdef WOLFSSL_SMALL_STACK
+    unsigned char* pub;
     unsigned char* priv   = NULL;
 #else
     unsigned char  pub [1024];
@@ -23194,6 +29176,59 @@
 
     return ret;
 }
+
+
+#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x10100000L
+/* ownership of p,q,and g get taken over by "dh" on success and should be free'd
+ * with a call to wolfSSL_DH_free -- not individually.
+ *
+ * returns WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_DH_set0_pqg(WOLFSSL_DH *dh, WOLFSSL_BIGNUM *p,
+    WOLFSSL_BIGNUM *q, WOLFSSL_BIGNUM *g)
+{
+    int ret;
+    WOLFSSL_ENTER("wolfSSL_DH_set0_pqg");
+
+    /* q can be NULL */
+    if (dh == NULL || p == NULL || g == NULL) {
+        WOLFSSL_MSG("Bad function arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* free existing internal DH structure and recreate with new p / g */
+    if (dh->inSet) {
+        ret = wc_FreeDhKey((DhKey*)dh->internal);
+        if (ret != 0) {
+            WOLFSSL_MSG("Unable to free internal DH key");
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+    wolfSSL_BN_free(dh->p);
+    wolfSSL_BN_free(dh->q);
+    wolfSSL_BN_free(dh->g);
+    wolfSSL_BN_free(dh->pub_key);
+    wolfSSL_BN_free(dh->priv_key);
+
+    dh->p = p;
+    dh->q = q;
+    dh->g = g;
+
+    ret = SetDhInternal(dh);
+    if (ret != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("Unable to set internal DH key");
+        dh->p = NULL;
+        dh->q = NULL;
+        dh->g = NULL;
+        dh->inSet = 0;
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* v1.1.0 or later */
+
 #endif /* NO_DH */
 
 
@@ -23265,282 +29300,19 @@
         InitwolfSSL_DSA(dsa);  /* set back to NULLs for safety */
 
         XFREE(dsa, NULL, DYNAMIC_TYPE_DSA);
-        dsa = NULL;
+
+        /* dsa = NULL, don't try to access or double free it */
     }
 }
 
 #endif /* NO_DSA */
-
 #endif /* OPENSSL_EXTRA */
-#if !defined(NO_RSA) && defined(OPENSSL_EXTRA_X509_SMALL)
-static void InitwolfSSL_Rsa(WOLFSSL_RSA* rsa)
-{
-    if (rsa) {
-        XMEMSET(rsa, 0, sizeof(WOLFSSL_RSA));
-    }
-}
-
-void wolfSSL_RSA_free(WOLFSSL_RSA* rsa)
-{
-    WOLFSSL_ENTER("wolfSSL_RSA_free");
-
-    if (rsa) {
-        if (rsa->internal) {
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && \
-    !defined(HAVE_FAST_RSA) && defined(WC_RSA_BLINDING)
-            WC_RNG* rng;
-
-            /* check if RNG is owned before freeing it */
-            if (rsa->ownRng) {
-                rng = ((RsaKey*)rsa->internal)->rng;
-                if (rng != NULL && rng != &globalRNG) {
-                    wc_FreeRng(rng);
-                    XFREE(rng, NULL, DYNAMIC_TYPE_RNG);
-                }
-            }
-#endif /* WC_RSA_BLINDING */
-            wc_FreeRsaKey((RsaKey*)rsa->internal);
-            XFREE(rsa->internal, NULL, DYNAMIC_TYPE_RSA);
-            rsa->internal = NULL;
-        }
-        wolfSSL_BN_free(rsa->iqmp);
-        wolfSSL_BN_free(rsa->dmq1);
-        wolfSSL_BN_free(rsa->dmp1);
-        wolfSSL_BN_free(rsa->q);
-        wolfSSL_BN_free(rsa->p);
-        wolfSSL_BN_free(rsa->d);
-        wolfSSL_BN_free(rsa->e);
-        wolfSSL_BN_free(rsa->n);
-
-    #ifdef WC_RSA_BLINDING
-        if (wc_FreeRng(rsa->rng) != 0) {
-            WOLFSSL_MSG("Issue freeing rng");
-        }
-        XFREE(rsa->rng, NULL, DYNAMIC_TYPE_RNG);
-    #endif
-
-        InitwolfSSL_Rsa(rsa);  /* set back to NULLs for safety */
-
-        XFREE(rsa, NULL, DYNAMIC_TYPE_RSA);
-        rsa = NULL;
-    }
-}
-
-WOLFSSL_RSA* wolfSSL_RSA_new(void)
-{
-    WOLFSSL_RSA* external;
-    RsaKey*     key;
-
-    WOLFSSL_ENTER("wolfSSL_RSA_new");
-
-    key = (RsaKey*) XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_RSA);
-    if (key == NULL) {
-        WOLFSSL_MSG("wolfSSL_RSA_new malloc RsaKey failure");
-        return NULL;
-    }
-
-    external = (WOLFSSL_RSA*) XMALLOC(sizeof(WOLFSSL_RSA), NULL,
-                                     DYNAMIC_TYPE_RSA);
-    if (external == NULL) {
-        WOLFSSL_MSG("wolfSSL_RSA_new malloc WOLFSSL_RSA failure");
-        XFREE(key, NULL, DYNAMIC_TYPE_RSA);
-        return NULL;
-    }
-
-    InitwolfSSL_Rsa(external);
-    if (wc_InitRsaKey(key, NULL) != 0) {
-        WOLFSSL_MSG("InitRsaKey WOLFSSL_RSA failure");
-        XFREE(external, NULL, DYNAMIC_TYPE_RSA);
-        XFREE(key, NULL, DYNAMIC_TYPE_RSA);
-        return NULL;
-    }
-
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && \
-    !defined(HAVE_FAST_RSA) && defined(WC_RSA_BLINDING)
-    {
-        WC_RNG* rng = NULL;
-
-        rng = (WC_RNG*) XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
-        if (rng != NULL && wc_InitRng(rng) != 0) {
-            WOLFSSL_MSG("InitRng failure, attempting to use global RNG");
-            XFREE(rng, NULL, DYNAMIC_TYPE_RNG);
-            rng = NULL;
-        }
-
-        external->ownRng = 1;
-        if (rng == NULL && initGlobalRNG) {
-            external->ownRng = 0;
-            rng = &globalRNG;
-        }
-
-        if (rng == NULL) {
-            WOLFSSL_MSG("wolfSSL_RSA_new no WC_RNG for blinding");
-            XFREE(external, NULL, DYNAMIC_TYPE_RSA);
-            XFREE(key, NULL, DYNAMIC_TYPE_RSA);
-            return NULL;
-        }
-
-        wc_RsaSetRNG(key, rng);
-    }
-#endif /* WC_RSA_BLINDING */
-
-    external->internal = key;
-    external->inSet = 0;
-    return external;
-}
-#endif /* !NO_RSA && OPENSSL_EXTRA_X509_SMALL */
-
-/* these defines are to make sure the functions SetIndividualExternal is not
- * declared and then not used. */
-#if !defined(NO_ASN) || !defined(NO_DSA) || defined(HAVE_ECC) || \
-    (!defined(NO_RSA) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA))
-
-#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
-/* when calling SetIndividualExternal, mpi should be cleared by caller if no
- * longer used. ie mp_clear(mpi). This is to free data when fastmath is
- * disabled since a copy of mpi is made by this function and placed into bn.
- */
-static int SetIndividualExternal(WOLFSSL_BIGNUM** bn, mp_int* mpi)
-{
-    byte dynamic = 0;
-
-    WOLFSSL_MSG("Entering SetIndividualExternal");
-
-    if (mpi == NULL || bn == NULL) {
-        WOLFSSL_MSG("mpi NULL error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (*bn == NULL) {
-        *bn = wolfSSL_BN_new();
-        if (*bn == NULL) {
-            WOLFSSL_MSG("SetIndividualExternal alloc failed");
-            return WOLFSSL_FATAL_ERROR;
-        }
-        dynamic = 1;
-    }
-
-    if (mp_copy(mpi, (mp_int*)((*bn)->internal)) != MP_OKAY) {
-        WOLFSSL_MSG("mp_copy error");
-        if (dynamic == 1) {
-            wolfSSL_BN_free(*bn);
-        }
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
-
-#ifdef OPENSSL_EXTRA /* only without X509_SMALL */
-static int SetIndividualInternal(WOLFSSL_BIGNUM* bn, mp_int* mpi)
-{
-    WOLFSSL_MSG("Entering SetIndividualInternal");
-
-    if (bn == NULL || bn->internal == NULL) {
-        WOLFSSL_MSG("bn NULL error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (mpi == NULL || (mp_init(mpi) != MP_OKAY)) {
-        WOLFSSL_MSG("mpi NULL error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (mp_copy((mp_int*)bn->internal, mpi) != MP_OKAY) {
-        WOLFSSL_MSG("mp_copy error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    return WOLFSSL_SUCCESS;
-}
-
-#ifndef NO_ASN
-WOLFSSL_BIGNUM *wolfSSL_ASN1_INTEGER_to_BN(const WOLFSSL_ASN1_INTEGER *ai,
-                                       WOLFSSL_BIGNUM *bn)
-{
-    mp_int mpi;
-    word32 idx = 0;
-    int ret;
-
-    WOLFSSL_ENTER("wolfSSL_ASN1_INTEGER_to_BN");
-
-    if (ai == NULL) {
-        return NULL;
-    }
-
-    if ((ret = GetInt(&mpi, ai->data, &idx, ai->dataMax)) != 0) {
-        /* expecting ASN1 format for INTEGER */
-        WOLFSSL_LEAVE("wolfSSL_ASN1_INTEGER_to_BN", ret);
-        return NULL;
-    }
-
-    /* mp_clear needs called because mpi is copied and causes memory leak with
-     * --disable-fastmath */
-    ret = SetIndividualExternal(&bn, &mpi);
-    mp_clear(&mpi);
-
-    if (ret != WOLFSSL_SUCCESS) {
-        return NULL;
-    }
-    return bn;
-}
-#endif /* !NO_ASN */
-
-#if !defined(NO_DSA) && !defined(NO_DH)
-WOLFSSL_DH *wolfSSL_DSA_dup_DH(const WOLFSSL_DSA *dsa)
-{
-    WOLFSSL_DH* dh;
-    DhKey*      key;
-
-    WOLFSSL_ENTER("wolfSSL_DSA_dup_DH");
-
-    if (dsa == NULL) {
-        return NULL;
-    }
-
-    dh = wolfSSL_DH_new();
-    if (dh == NULL) {
-        return NULL;
-    }
-    key = (DhKey*)dh->internal;
-
-    if (dsa->p != NULL &&
-        SetIndividualInternal(((WOLFSSL_DSA*)dsa)->p, &key->p) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa p key error");
-        wolfSSL_DH_free(dh);
-        return NULL;
-    }
-    if (dsa->g != NULL &&
-        SetIndividualInternal(((WOLFSSL_DSA*)dsa)->g, &key->g) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa g key error");
-        wolfSSL_DH_free(dh);
-        return NULL;
-    }
-
-    if (SetIndividualExternal(&dh->p, &key->p) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("dsa p key error");
-        wolfSSL_DH_free(dh);
-        return NULL;
-    }
-    if (SetIndividualExternal(&dh->g, &key->g) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("dsa g key error");
-        wolfSSL_DH_free(dh);
-        return NULL;
-    }
-
-    return dh;
-}
-#endif /* !defined(NO_DSA) && !defined(NO_DH) */
-
-#endif /* OPENSSL_EXTRA */
-#endif /* !NO_RSA && !NO_DSA */
 
 #ifdef OPENSSL_EXTRA
 
 #ifndef NO_DSA
 /* wolfSSL -> OpenSSL */
-static int SetDsaExternal(WOLFSSL_DSA* dsa)
+int SetDsaExternal(WOLFSSL_DSA* dsa)
 {
     DsaKey* key;
     WOLFSSL_MSG("Entering SetDsaExternal");
@@ -23583,7 +29355,7 @@
 }
 
 /* Openssl -> WolfSSL */
-static int SetDsaInternal(WOLFSSL_DSA* dsa)
+int SetDsaInternal(WOLFSSL_DSA* dsa)
 {
     DsaKey* key;
     WOLFSSL_MSG("Entering SetDsaInternal");
@@ -23640,429 +29412,9 @@
 #endif /* NO_DSA */
 #endif /* OPENSSL_EXTRA */
 
-#if !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA) && \
-    !defined(NO_RSA) && (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
-/* WolfSSL -> OpenSSL */
-static int SetRsaExternal(WOLFSSL_RSA* rsa)
-{
-    RsaKey* key;
-    WOLFSSL_MSG("Entering SetRsaExternal");
-
-    if (rsa == NULL || rsa->internal == NULL) {
-        WOLFSSL_MSG("rsa key NULL error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    key = (RsaKey*)rsa->internal;
-
-    if (SetIndividualExternal(&rsa->n, &key->n) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa n key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (SetIndividualExternal(&rsa->e, &key->e) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa e key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (key->type == RSA_PRIVATE) {
-        if (SetIndividualExternal(&rsa->d, &key->d) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa d key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-        if (SetIndividualExternal(&rsa->p, &key->p) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa p key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-        if (SetIndividualExternal(&rsa->q, &key->q) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa q key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-    #ifndef RSA_LOW_MEM
-        if (SetIndividualExternal(&rsa->dmp1, &key->dP) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa dP key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-        if (SetIndividualExternal(&rsa->dmq1, &key->dQ) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa dQ key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-        if (SetIndividualExternal(&rsa->iqmp, &key->u) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa u key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-    #endif /* !RSA_LOW_MEM */
-    }
-    rsa->exSet = 1;
-
-    return WOLFSSL_SUCCESS;
-}
-#endif
 
 #ifdef OPENSSL_EXTRA
 #if !defined(NO_RSA)
-#if !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-/* Openssl -> WolfSSL */
-static int SetRsaInternal(WOLFSSL_RSA* rsa)
-{
-    RsaKey* key;
-    WOLFSSL_MSG("Entering SetRsaInternal");
-
-    if (rsa == NULL || rsa->internal == NULL) {
-        WOLFSSL_MSG("rsa key NULL error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    key = (RsaKey*)rsa->internal;
-
-    if (SetIndividualInternal(rsa->n, &key->n) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa n key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (SetIndividualInternal(rsa->e, &key->e) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa e key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    /* public key */
-    key->type = RSA_PUBLIC;
-
-    if (rsa->d != NULL) {
-        if (SetIndividualInternal(rsa->d, &key->d) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("rsa d key error");
-            return WOLFSSL_FATAL_ERROR;
-        }
-
-        /* private key */
-        key->type = RSA_PRIVATE;
-    }
-
-    if (rsa->p != NULL &&
-        SetIndividualInternal(rsa->p, &key->p) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa p key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (rsa->q != NULL &&
-        SetIndividualInternal(rsa->q, &key->q) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa q key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-#ifndef RSA_LOW_MEM
-    if (rsa->dmp1 != NULL &&
-        SetIndividualInternal(rsa->dmp1, &key->dP) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa dP key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (rsa->dmq1 != NULL &&
-        SetIndividualInternal(rsa->dmq1, &key->dQ) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa dQ key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-
-    if (rsa->iqmp != NULL &&
-        SetIndividualInternal(rsa->iqmp, &key->u) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("rsa u key error");
-        return WOLFSSL_FATAL_ERROR;
-    }
-#endif /* !RSA_LOW_MEM */
-
-    rsa->inSet = 1;
-
-    return WOLFSSL_SUCCESS;
-}
-
-
-/* SSL_SUCCESS on ok */
-#ifndef NO_WOLFSSL_STUB
-int wolfSSL_RSA_blinding_on(WOLFSSL_RSA* rsa, WOLFSSL_BN_CTX* bn)
-{
-    (void)rsa;
-    (void)bn;
-    WOLFSSL_STUB("RSA_blinding_on");
-    WOLFSSL_MSG("wolfSSL_RSA_blinding_on");
-
-    return WOLFSSL_SUCCESS;  /* on by default */
-}
-#endif
-
-/* return compliant with OpenSSL
- *   size of encrypted data if success , -1 if error
- */
-int wolfSSL_RSA_public_encrypt(int len, const unsigned char* fr,
-                            unsigned char* to, WOLFSSL_RSA* rsa, int padding)
-{
-    int initTmpRng = 0;
-    WC_RNG *rng = NULL;
-    int outLen;
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    WC_RNG* tmpRNG = NULL;
-#else
-    WC_RNG  tmpRNG[1];
-#endif
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-    int  mgf = WC_MGF1NONE;
-    enum wc_HashType hash = WC_HASH_TYPE_NONE;
-#endif
-
-    WOLFSSL_MSG("wolfSSL_RSA_public_encrypt");
-
-    /* Check and remap the padding to internal values, if needed. */
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-    if (padding == RSA_PKCS1_PADDING)
-        padding = WC_RSA_PKCSV15_PAD;
-    else if (padding == RSA_PKCS1_OAEP_PADDING) {
-        padding = WC_RSA_OAEP_PAD;
-        hash = WC_HASH_TYPE_SHA;
-        mgf = WC_MGF1SHA1;
-    }
-#else
-    if (padding == RSA_PKCS1_PADDING)
-      ;
-#endif
-    else {
-        WOLFSSL_MSG("wolfSSL_RSA_public_encrypt unsupported padding");
-        return 0;
-    }
-
-    if (rsa->inSet == 0)
-    {
-        if (SetRsaInternal(rsa) != SSL_SUCCESS) {
-            WOLFSSL_MSG("SetRsaInternal failed");
-            return 0;
-        }
-    }
-
-    outLen = wolfSSL_RSA_size(rsa);
-
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && \
-    !defined(HAVE_FAST_RSA) && defined(WC_RSA_BLINDING)
-    rng = ((RsaKey*)rsa->internal)->rng;
-#endif
-    if (rng == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
-        tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        if (tmpRNG == NULL)
-            return 0;
-#endif
-
-        if (wc_InitRng(tmpRNG) == 0) {
-            rng = tmpRNG;
-            initTmpRng = 1;
-        }
-        else {
-            WOLFSSL_MSG("Bad RNG Init, trying global");
-            if (initGlobalRNG == 0)
-                WOLFSSL_MSG("Global RNG no Init");
-            else
-                rng = &globalRNG;
-        }
-    }
-
-    if (outLen == 0) {
-        WOLFSSL_MSG("Bad RSA size");
-    }
-
-    if (rng) {
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-        ret = wc_RsaPublicEncrypt_ex(fr, len, to, outLen,
-                             (RsaKey*)rsa->internal, rng, padding,
-                             hash, mgf, NULL, 0);
-#else
-        ret = wc_RsaPublicEncrypt(fr, len, to, outLen,
-                             (RsaKey*)rsa->internal, rng);
-#endif
-        if (ret <= 0) {
-            WOLFSSL_MSG("Bad Rsa Encrypt");
-        }
-        if (len <= 0) {
-            WOLFSSL_MSG("Bad Rsa Encrypt");
-        }
-    }
-
-    if (initTmpRng)
-        wc_FreeRng(tmpRNG);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(tmpRNG,     NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    if (ret >= 0)
-        WOLFSSL_MSG("wolfSSL_RSA_public_encrypt success");
-    else {
-        WOLFSSL_MSG("wolfSSL_RSA_public_encrypt failed");
-        ret = WOLFSSL_FATAL_ERROR; /* return -1 on error case */
-    }
-    return ret;
-}
-
-/* return compliant with OpenSSL
- *   size of plain recovered data if success , -1 if error
- */
-int wolfSSL_RSA_private_decrypt(int len, const unsigned char* fr,
-                            unsigned char* to, WOLFSSL_RSA* rsa, int padding)
-{
-    int outLen;
-    int ret = 0;
-  #if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-    int mgf = WC_MGF1NONE;
-    enum wc_HashType hash = WC_HASH_TYPE_NONE;
-  #endif
-
-    WOLFSSL_MSG("wolfSSL_RSA_private_decrypt");
-
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-    if (padding == RSA_PKCS1_PADDING)
-        padding = WC_RSA_PKCSV15_PAD;
-    else if (padding == RSA_PKCS1_OAEP_PADDING) {
-        padding = WC_RSA_OAEP_PAD;
-        hash = WC_HASH_TYPE_SHA;
-        mgf = WC_MGF1SHA1;
-    }
-#else
-    if (padding == RSA_PKCS1_PADDING)
-        ;
-#endif
-    else {
-        WOLFSSL_MSG("wolfSSL_RSA_private_decrypt unsupported padding");
-        return 0;
-    }
-
-    if (rsa->inSet == 0)
-    {
-        if (SetRsaInternal(rsa) != SSL_SUCCESS) {
-            WOLFSSL_MSG("SetRsaInternal failed");
-            return 0;
-        }
-    }
-
-    outLen = wolfSSL_RSA_size(rsa);
-    if (outLen == 0) {
-        WOLFSSL_MSG("Bad RSA size");
-    }
-
-    /* size of 'to' buffer must be size of RSA key */
-#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
-    ret = wc_RsaPrivateDecrypt_ex(fr, len, to, outLen,
-                            (RsaKey*)rsa->internal, padding,
-                            hash, mgf, NULL, 0);
-#else
-    ret = wc_RsaPrivateDecrypt(fr, len, to, outLen,
-                            (RsaKey*)rsa->internal);
-#endif
-
-    if (len <= 0) {
-        WOLFSSL_MSG("Bad Rsa Decrypt");
-    }
-
-    if (ret > 0)
-        WOLFSSL_MSG("wolfSSL_RSA_private_decrypt success");
-    else {
-        WOLFSSL_MSG("wolfSSL_RSA_private_decrypt failed");
-        ret = WOLFSSL_FATAL_ERROR;
-    }
-    return ret;
-}
-
-
-/* RSA private encrypt calls wc_RsaSSL_Sign. Similar function set up as RSA
- * public decrypt.
- *
- * len  Length of input buffer
- * in   Input buffer to sign
- * out  Output buffer (expected to be greater than or equal to RSA key size)
- * rsa     Key to use for encryption
- * padding Type of RSA padding to use.
- */
-int wolfSSL_RSA_private_encrypt(int len, unsigned char* in,
-                            unsigned char* out, WOLFSSL_RSA* rsa, int padding)
-{
-    int sz = 0;
-    WC_RNG* rng = NULL;
-    RsaKey* key;
-
-    WOLFSSL_MSG("wolfSSL_RSA_private_encrypt");
-
-    if (len < 0 || rsa == NULL || rsa->internal == NULL || in == NULL) {
-        WOLFSSL_MSG("Bad function arguments");
-        return 0;
-    }
-
-    if (padding != RSA_PKCS1_PADDING) {
-        WOLFSSL_MSG("wolfSSL_RSA_private_encrypt unsupported padding");
-        return 0;
-    }
-
-    if (rsa->inSet == 0)
-    {
-        WOLFSSL_MSG("Setting internal RSA structure");
-
-        if (SetRsaInternal(rsa) != SSL_SUCCESS) {
-            WOLFSSL_MSG("SetRsaInternal failed");
-            return 0;
-        }
-    }
-
-    key = (RsaKey*)rsa->internal;
-    #if defined(WC_RSA_BLINDING) && !defined(HAVE_USER_RSA)
-    rng = key->rng;
-    #else
-#ifndef HAVE_FIPS
-    if (wc_InitRng_ex(rng, key->heap, INVALID_DEVID) != 0) {
-#else
-    if (wc_InitRng(rng) != 0) {
-#endif
-        WOLFSSL_MSG("Error with random number");
-        return SSL_FATAL_ERROR;
-    }
-    #endif
-
-    /* size of output buffer must be size of RSA key */
-    sz = wc_RsaSSL_Sign(in, (word32)len, out, wolfSSL_RSA_size(rsa), key, rng);
-    #if !defined(WC_RSA_BLINDING) || defined(HAVE_USER_RSA)
-    if (wc_FreeRng(rng) != 0) {
-        WOLFSSL_MSG("Error freeing random number generator");
-        return SSL_FATAL_ERROR;
-    }
-    #endif
-    if (sz <= 0) {
-        WOLFSSL_LEAVE("wolfSSL_RSA_private_encrypt", sz);
-        return 0;
-    }
-
-    return sz;
-}
-#endif /* HAVE_USER_RSA */
-
-/* return compliant with OpenSSL
- *   RSA modulus size in bytes, -1 if error
- */
-int wolfSSL_RSA_size(const WOLFSSL_RSA* rsa)
-{
-    WOLFSSL_ENTER("wolfSSL_RSA_size");
-
-    if (rsa == NULL)
-        return WOLFSSL_FATAL_ERROR;
-    if (rsa->inSet == 0)
-    {
-        if (SetRsaInternal((WOLFSSL_RSA*)rsa) != SSL_SUCCESS) {
-            WOLFSSL_MSG("SetRsaInternal failed");
-            return 0;
-        }
-    }
-    return wc_RsaEncryptSize((RsaKey*)rsa->internal);
-}
-
-
 /* Generates a RSA key of length len
  *
  * len  length of RSA key i.e. 2048
@@ -24142,7 +29494,7 @@
 #ifdef WOLFSSL_KEY_GEN
     {
     #ifdef WOLFSSL_SMALL_STACK
-        WC_RNG* rng = NULL;
+        WC_RNG* rng;
     #else
         WC_RNG  rng[1];
     #endif
@@ -24206,7 +29558,7 @@
         int initTmpRng = 0;
         WC_RNG *rng = NULL;
 #ifdef WOLFSSL_SMALL_STACK
-        WC_RNG *tmpRNG = NULL;
+        WC_RNG *tmpRNG;
 #else
         WC_RNG tmpRNG[1];
 #endif
@@ -24307,7 +29659,7 @@
         int initTmpRng = 0;
         WC_RNG *rng = NULL;
 #ifdef WOLFSSL_SMALL_STACK
-        WC_RNG *tmpRNG = NULL;
+        WC_RNG *tmpRNG;
 #else
         WC_RNG tmpRNG[1];
 #endif
@@ -24353,6 +29705,30 @@
     return ret;
 }
 
+WOLFSSL_DSA_SIG* wolfSSL_DSA_SIG_new(void)
+{
+    WOLFSSL_DSA_SIG* sig;
+    WOLFSSL_ENTER("wolfSSL_DSA_SIG_new");
+    sig = (WOLFSSL_DSA_SIG*)XMALLOC(sizeof(WOLFSSL_DSA_SIG), NULL, DYNAMIC_TYPE_OPENSSL);
+    if (sig)
+        XMEMSET(sig, 0, sizeof(WOLFSSL_DSA_SIG));
+    return sig;
+}
+
+void wolfSSL_DSA_SIG_free(WOLFSSL_DSA_SIG *sig)
+{
+    WOLFSSL_ENTER("wolfSSL_DSA_SIG_free");
+    if (sig) {
+        if (sig->r) {
+            wolfSSL_BN_free(sig->r);
+        }
+        if (sig->s) {
+            wolfSSL_BN_free(sig->s);
+        }
+        XFREE(sig, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
 /* return WOLFSSL_SUCCESS on success, < 0 otherwise */
 int wolfSSL_DSA_do_sign(const unsigned char* d, unsigned char* sigRet,
                        WOLFSSL_DSA* dsa)
@@ -24417,6 +29793,44 @@
     return ret;
 }
 
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+WOLFSSL_DSA_SIG* wolfSSL_DSA_do_sign_ex(const unsigned char* digest,
+                                        int outLen, WOLFSSL_DSA* dsa)
+{
+    WOLFSSL_DSA_SIG* sig = NULL;
+    byte sigBin[DSA_SIG_SIZE];
+
+    WOLFSSL_ENTER("wolfSSL_DSA_do_sign_ex");
+
+    if (!digest || !dsa || outLen != WC_SHA_DIGEST_SIZE) {
+        WOLFSSL_MSG("Bad function arguments");
+        return NULL;
+    }
+
+    if (wolfSSL_DSA_do_sign(digest, sigBin, dsa) != WOLFSSL_SUCCESS) {
+        return NULL;
+    }
+
+    if (!(sig = wolfSSL_DSA_SIG_new())) {
+        goto error;
+    }
+
+    if (!(sig->r = wolfSSL_BN_bin2bn(sigBin, DSA_HALF_SIZE, NULL))) {
+        goto error;
+    }
+
+    if (!(sig->s = wolfSSL_BN_bin2bn(sigBin + DSA_HALF_SIZE, DSA_HALF_SIZE, NULL))) {
+        goto error;
+    }
+
+    return sig;
+error:
+    if (sig) {
+        wolfSSL_DSA_SIG_free(sig);
+    }
+    return NULL;
+}
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
 
 int wolfSSL_DSA_do_verify(const unsigned char* d, unsigned char* sig,
                         WOLFSSL_DSA* dsa, int *dsacheck)
@@ -24447,6 +29861,62 @@
 
     return WOLFSSL_SUCCESS;
 }
+
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+int wolfSSL_DSA_do_verify_ex(const unsigned char* digest, int digest_len,
+                             WOLFSSL_DSA_SIG* sig, WOLFSSL_DSA* dsa)
+{
+    int dsacheck, sz;
+    byte sigBin[DSA_SIG_SIZE];
+    byte* sigBinPtr = sigBin;
+
+    WOLFSSL_ENTER("wolfSSL_DSA_do_verify_ex");
+
+    if (!digest || !sig || !dsa || digest_len != WC_SHA_DIGEST_SIZE) {
+        WOLFSSL_MSG("Bad function arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!sig->r || !sig->s) {
+        WOLFSSL_MSG("No signature found in DSA_SIG");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* front pad with zeros */
+    if (!(sz = wolfSSL_BN_num_bytes(sig->r))) {
+        return WOLFSSL_FAILURE;
+    }
+    while (sz++ < DSA_HALF_SIZE) {
+        *sigBinPtr++ = 0;
+    }
+
+    if (wolfSSL_BN_bn2bin(sig->r, sigBinPtr) == WOLFSSL_FATAL_ERROR) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Move to s */
+    sigBinPtr = sigBin + DSA_HALF_SIZE;
+
+    /* front pad with zeros */
+    if (!(sz = wolfSSL_BN_num_bytes(sig->s))) {
+        return WOLFSSL_FAILURE;
+    }
+    while (sz++ < DSA_HALF_SIZE) {
+        *sigBinPtr++ = 0;
+    }
+
+    if (wolfSSL_BN_bn2bin(sig->s, sigBinPtr) == WOLFSSL_FATAL_ERROR) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wolfSSL_DSA_do_verify(digest, sigBin, dsa, &dsacheck) != WOLFSSL_SUCCESS ||
+            dsacheck != 1) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
 #endif /* NO_DSA */
 
 
@@ -24466,7 +29936,7 @@
 #define show(a,b,c)
 #endif
 
-/* return SSL_SUCCES on ok, 0 otherwise */
+/* return SSL_SUCCESS on ok, 0 otherwise */
 int wolfSSL_RSA_sign(int type, const unsigned char* m,
                            unsigned int mLen, unsigned char* sigRet,
                            unsigned int* sigLen, WOLFSSL_RSA* rsa)
@@ -24518,6 +29988,18 @@
     #ifdef WOLFSSL_SHA512
         case NID_sha512:    type = SHA512h; break;
     #endif
+    #ifndef WOLFSSL_NOSHA3_224
+        case NID_sha3_224:  type = SHA3_224h; break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
+        case NID_sha3_256:  type = SHA3_256h; break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
+        case NID_sha3_384:  type = SHA3_384h; break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
+        case NID_sha3_512:  type = SHA3_512h; break;
+    #endif
         default:
             WOLFSSL_MSG("This NID (md type) not configured or not implemented");
             return 0;
@@ -24636,7 +30118,7 @@
         XFREE(sigRet, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         return WOLFSSL_FAILURE;
     }
-    /* get non-encrypted signature to be compared with decrypted sugnature*/
+    /* get non-encrypted signature to be compared with decrypted signature */
     ret = wolfSSL_RSA_sign_ex(type, m, mLen, sigRet, &len, rsa, 0);
     if (ret <= 0) {
         WOLFSSL_MSG("Message Digest Error");
@@ -24670,44 +30152,27 @@
     }
 }
 
-int wolfSSL_RSA_public_decrypt(int flen, const unsigned char* from,
-                          unsigned char* to, WOLFSSL_RSA* rsa, int padding)
-{
-    int tlen = 0;
-
-    WOLFSSL_ENTER("wolfSSL_RSA_public_decrypt");
-
-    if (rsa == NULL || rsa->internal == NULL || from == NULL) {
-        WOLFSSL_MSG("Bad function arguments");
-        return 0;
-    }
-
-    if (padding != RSA_PKCS1_PADDING) {
-        WOLFSSL_MSG("wolfSSL_RSA_public_decrypt unsupported padding");
-        return 0;
-    }
-
-    if (rsa->inSet == 0)
-    {
-        WOLFSSL_MSG("No RSA internal set, do it");
-
-        if (SetRsaInternal(rsa) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetRsaInternal failed");
-            return 0;
-        }
-    }
-
-    /* size of 'to' buffer must be size of RSA key */
-    tlen = wc_RsaSSL_Verify(from, flen, to, wolfSSL_RSA_size(rsa),
-                            (RsaKey*)rsa->internal);
-    if (tlen <= 0)
-        WOLFSSL_MSG("wolfSSL_RSA_public_decrypt failed");
-    else {
-        WOLFSSL_MSG("wolfSSL_RSA_public_decrypt success");
-    }
-    return tlen;
-}
-
+void wolfSSL_RSA_get0_key(const WOLFSSL_RSA *r, const WOLFSSL_BIGNUM **n,
+    const WOLFSSL_BIGNUM **e, const WOLFSSL_BIGNUM **d)
+{
+    WOLFSSL_ENTER("wolfSSL_RSA_get0_key");
+
+    if (r != NULL) {
+        if (n != NULL)
+            *n = r->n;
+        if (e != NULL)
+            *e = r->e;
+        if (d != NULL)
+            *d = r->d;
+    } else {
+        if (n != NULL)
+            *n = NULL;
+        if (e != NULL)
+            *e = NULL;
+        if (d != NULL)
+            *d = NULL;
+    }
+}
 
 /* generate p-1 and q-1, WOLFSSL_SUCCESS on ok */
 int wolfSSL_RSA_GenAdd(WOLFSSL_RSA* rsa)
@@ -24755,7 +30220,13 @@
     else
         return WOLFSSL_FATAL_ERROR;
 }
-#endif /* NO_RSA */
+#endif /* !NO_RSA && !HAVE_USER_RSA */
+
+WOLFSSL_HMAC_CTX* wolfSSL_HMAC_CTX_new(void)
+{
+    return (WOLFSSL_HMAC_CTX*)XMALLOC(sizeof(WOLFSSL_HMAC_CTX), NULL,
+        DYNAMIC_TYPE_OPENSSL);
+}
 
 int wolfSSL_HMAC_CTX_Init(WOLFSSL_HMAC_CTX* ctx)
 {
@@ -24781,109 +30252,112 @@
 }
 
 
-/* Deep copy of information from src to des structure
- *
- * des destination to copy information to
- * src structure to get infromation from
- *
- * Returns SSL_SUCCESS on success and SSL_FAILURE on error
- */
-int wolfSSL_HMAC_CTX_copy(WOLFSSL_HMAC_CTX* des, WOLFSSL_HMAC_CTX* src)
-{
-    void* heap = NULL;
-
-    WOLFSSL_ENTER("wolfSSL_HMAC_CTX_copy");
-
-    if (des == NULL || src == NULL) {
-        return SSL_FAILURE;
-    }
+/* helper function for Deep copy of internal wolfSSL hmac structure
+ * returns WOLFSSL_SUCCESS on success */
+int wolfSSL_HmacCopy(Hmac* des, Hmac* src)
+{
+    void* heap;
 
 #ifndef HAVE_FIPS
-    heap = src->hmac.heap;
-#endif
-
-    if (wc_HmacInit(&des->hmac, heap, 0) != 0) {
-        WOLFSSL_MSG("Error initializing HMAC");
-        return SSL_FAILURE;
-    }
-
-    des->type = src->type;
+    heap = src->heap;
+#else
+    heap = NULL;
+#endif
+    if (wc_HmacInit(des, heap, 0) != 0) {
+        return WOLFSSL_FAILURE;
+    }
 
     /* requires that hash structures have no dynamic parts to them */
-    switch (src->hmac.macType) {
+    switch (src->macType) {
     #ifndef NO_MD5
         case WC_MD5:
-            XMEMCPY(&des->hmac.hash.md5, &src->hmac.hash.md5, sizeof(wc_Md5));
+            wc_Md5Copy(&src->hash.md5, &des->hash.md5);
             break;
     #endif /* !NO_MD5 */
 
     #ifndef NO_SHA
         case WC_SHA:
-            XMEMCPY(&des->hmac.hash.sha, &src->hmac.hash.sha, sizeof(wc_Sha));
+            wc_ShaCopy(&src->hash.sha, &des->hash.sha);
             break;
     #endif /* !NO_SHA */
 
     #ifdef WOLFSSL_SHA224
         case WC_SHA224:
-            XMEMCPY(&des->hmac.hash.sha224, &src->hmac.hash.sha224,
-                    sizeof(wc_Sha224));
+            wc_Sha224Copy(&src->hash.sha224, &des->hash.sha224);
             break;
     #endif /* WOLFSSL_SHA224 */
 
     #ifndef NO_SHA256
         case WC_SHA256:
-            XMEMCPY(&des->hmac.hash.sha256, &src->hmac.hash.sha256,
-                    sizeof(wc_Sha256));
+            wc_Sha256Copy(&src->hash.sha256, &des->hash.sha256);
             break;
     #endif /* !NO_SHA256 */
 
     #ifdef WOLFSSL_SHA384
         case WC_SHA384:
-            XMEMCPY(&des->hmac.hash.sha384, &src->hmac.hash.sha384,
-                    sizeof(wc_Sha384));
+            wc_Sha384Copy(&src->hash.sha384, &des->hash.sha384);
             break;
     #endif /* WOLFSSL_SHA384 */
     #ifdef WOLFSSL_SHA512
         case WC_SHA512:
-            XMEMCPY(&des->hmac.hash.sha512, &src->hmac.hash.sha512,
-                    sizeof(wc_Sha512));
+            wc_Sha512Copy(&src->hash.sha512, &des->hash.sha512);
             break;
     #endif /* WOLFSSL_SHA512 */
 
         default:
-            WOLFSSL_MSG("Unknown or unsupported hash type");
-            return WOLFSSL_FAILURE;
-    }
-
-    XMEMCPY((byte*)des->hmac.ipad, (byte*)src->hmac.ipad, WC_HMAC_BLOCK_SIZE);
-    XMEMCPY((byte*)des->hmac.opad, (byte*)src->hmac.opad, WC_HMAC_BLOCK_SIZE);
-    XMEMCPY((byte*)des->hmac.innerHash, (byte*)src->hmac.innerHash,
-                                                            WC_MAX_DIGEST_SIZE);
+            return WOLFSSL_FAILURE;
+    }
+
+    XMEMCPY((byte*)des->ipad, (byte*)src->ipad, WC_HMAC_BLOCK_SIZE);
+    XMEMCPY((byte*)des->opad, (byte*)src->opad, WC_HMAC_BLOCK_SIZE);
+    XMEMCPY((byte*)des->innerHash, (byte*)src->innerHash, WC_MAX_DIGEST_SIZE);
 #ifndef HAVE_FIPS
-    des->hmac.heap    = heap;
-#endif
-    des->hmac.macType = src->hmac.macType;
-    des->hmac.innerHashKeyed = src->hmac.innerHashKeyed;
+    des->heap    = heap;
+#endif
+    des->macType = src->macType;
+    des->innerHashKeyed = src->innerHashKeyed;
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    XMEMCPY(&des->asyncDev, &src->asyncDev, sizeof(WC_ASYNC_DEV));
+    des->keyLen = src->keyLen;
+    #ifdef HAVE_CAVIUM
+        des->data = (byte*)XMALLOC(src->dataLen, des->heap,
+                DYNAMIC_TYPE_HMAC);
+        if (des->data == NULL) {
+            return BUFFER_E;
+        }
+        XMEMCPY(des->data, src->data, src->dataLen);
+        des->dataLen = src->dataLen;
+    #endif /* HAVE_CAVIUM */
+#endif /* WOLFSSL_ASYNC_CRYPT */
+        return WOLFSSL_SUCCESS;
+}
+
+
+/* Deep copy of information from src to des structure
+ *
+ * des destination to copy information to
+ * src structure to get information from
+ *
+ * Returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on error
+ */
+int wolfSSL_HMAC_CTX_copy(WOLFSSL_HMAC_CTX* des, WOLFSSL_HMAC_CTX* src)
+{
+    WOLFSSL_ENTER("wolfSSL_HMAC_CTX_copy");
+
+    if (des == NULL || src == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    des->type = src->type;
     XMEMCPY((byte *)&des->save_ipad, (byte *)&src->hmac.ipad,
                                         WC_HMAC_BLOCK_SIZE);
     XMEMCPY((byte *)&des->save_opad, (byte *)&src->hmac.opad,
                                         WC_HMAC_BLOCK_SIZE);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    XMEMCPY(&des->hmac.asyncDev, &src->hmac.asyncDev, sizeof(WC_ASYNC_DEV));
-    des->hmac.keyLen = src->hmac.keyLen;
-    #ifdef HAVE_CAVIUM
-        des->hmac.data = (byte*)XMALLOC(src->hmac.dataLen, des->hmac.heap,
-                DYNAMIC_TYPE_HMAC);
-        if (des->hmac.data == NULL) {
-            return BUFFER_E;
-        }
-        XMEMCPY(des->hmac.data, src->hmac.data, src->hmac.dataLen);
-        des->hmac.dataLen = src->hmac.dataLen;
-    #endif /* HAVE_CAVIUM */
-#endif /* WOLFSSL_ASYNC_CRYPT */
-        return WOLFSSL_SUCCESS;
-}
+    return wolfSSL_HmacCopy(&des->hmac, &src->hmac);
+}
+
 
 #if defined(HAVE_FIPS) && \
     (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
@@ -24928,12 +30402,6 @@
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            ret = wc_InitBlake2b(&hmac->hash.blake2b, BLAKE2B_256);
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
         case WC_SHA3_224:
             ret = wc_InitSha3_224(&hmac->hash.sha3, heap, INVALID_DEVID);
@@ -25052,6 +30520,7 @@
         /* OpenSSL compat, no error */
     } else if(ctx->type >= 0) { /* MD5 == 0 */
         WOLFSSL_MSG("recover hmac");
+        wc_HmacFree(&ctx->hmac);
         if (wc_HmacInit(&ctx->hmac, NULL, INVALID_DEVID) == 0) {
             ctx->hmac.macType = (byte)ctx->type;
             ctx->hmac.innerHashKeyed = 0;
@@ -25104,7 +30573,7 @@
 
     WOLFSSL_MSG("wolfSSL_HMAC_Final");
 
-	/* "len" parameter is optional. */
+    /* "len" parameter is optional. */
     if (ctx == NULL || hash == NULL) {
         WOLFSSL_MSG("invalid parameter");
         return WOLFSSL_FAILURE;
@@ -25177,151 +30646,23 @@
 }
 
 
-const WOLFSSL_EVP_MD* wolfSSL_EVP_get_digestbynid(int id)
-{
-    WOLFSSL_MSG("wolfSSL_get_digestbynid");
-
-    switch(id) {
-#ifndef NO_MD5
-        case NID_md5:
-            return wolfSSL_EVP_md5();
-#endif
-#ifndef NO_SHA
-        case NID_sha1:
-            return wolfSSL_EVP_sha1();
-#endif
-        default:
-            WOLFSSL_MSG("Bad digest id value");
-    }
-
-    return NULL;
-}
-
-
-#ifndef NO_RSA
-WOLFSSL_RSA* wolfSSL_EVP_PKEY_get1_RSA(WOLFSSL_EVP_PKEY* key)
-{
-    WOLFSSL_RSA* local;
-
-    WOLFSSL_MSG("wolfSSL_EVP_PKEY_get1_RSA");
-
-    if (key == NULL) {
-        return NULL;
-    }
-
-    local = wolfSSL_RSA_new();
-    if (local == NULL) {
-        WOLFSSL_MSG("Error creating a new WOLFSSL_RSA structure");
-        return NULL;
-    }
-
-    if (key->type == EVP_PKEY_RSA) {
-        if (wolfSSL_RSA_LoadDer(local, (const unsigned char*)key->pkey.ptr,
-                    key->pkey_sz) != SSL_SUCCESS) {
-            /* now try public key */
-            if (wolfSSL_RSA_LoadDer_ex(local,
-                        (const unsigned char*)key->pkey.ptr, key->pkey_sz,
-                        WOLFSSL_RSA_LOAD_PUBLIC) != SSL_SUCCESS) {
-                wolfSSL_RSA_free(local);
-                local = NULL;
-            }
-        }
-    }
-    else {
-        WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an RSA key");
-        wolfSSL_RSA_free(local);
-        local = NULL;
-    }
-    return local;
-}
-
-
-/* with set1 functions the pkey struct does not own the RSA structure
- *
- * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
- */
-int wolfSSL_EVP_PKEY_set1_RSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_RSA *key)
-{
-    if((pkey == NULL) || (key ==NULL))return WOLFSSL_FAILURE;
-    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_RSA");
-    if (pkey->rsa != NULL && pkey->ownRsa == 1) {
-        wolfSSL_RSA_free(pkey->rsa);
-    }
-    pkey->rsa    = key;
-    pkey->ownRsa = 0; /* pkey does not own RSA */
-    pkey->type = EVP_PKEY_RSA;
-#ifdef WC_RSA_BLINDING
-    if (key->ownRng == 0) {
-        if (wc_RsaSetRNG((RsaKey*)(pkey->rsa->internal), &(pkey->rng)) != 0) {
-            WOLFSSL_MSG("Error setting RSA rng");
-            return SSL_FAILURE;
-        }
-    }
-#endif
-    return WOLFSSL_SUCCESS;
-}
-#endif /* NO_RSA */
-
-#ifndef NO_WOLFSSL_STUB
-WOLFSSL_DSA* wolfSSL_EVP_PKEY_get1_DSA(WOLFSSL_EVP_PKEY* key)
-{
-    (void)key;
-    WOLFSSL_MSG("wolfSSL_EVP_PKEY_get1_DSA not implemented");
-    WOLFSSL_STUB("EVP_PKEY_get1_DSA");
-    return NULL;
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
-WOLFSSL_EC_KEY* wolfSSL_EVP_PKEY_get1_EC_KEY(WOLFSSL_EVP_PKEY* key)
-{
-    (void)key;
-    WOLFSSL_STUB("EVP_PKEY_get1_EC_KEY");
-    WOLFSSL_MSG("wolfSSL_EVP_PKEY_get1_EC_KEY not implemented");
-
-    return NULL;
-}
-#endif
-
-void* wolfSSL_EVP_X_STATE(const WOLFSSL_EVP_CIPHER_CTX* ctx)
-{
-    WOLFSSL_MSG("wolfSSL_EVP_X_STATE");
-
-    if (ctx) {
-        switch (ctx->cipherType) {
-            case ARC4_TYPE:
-                WOLFSSL_MSG("returning arc4 state");
-                return (void*)&ctx->cipher.arc4.x;
-
-            default:
-                WOLFSSL_MSG("bad x state type");
-                return 0;
-        }
-    }
-
-    return NULL;
-}
-
-
-int wolfSSL_EVP_X_STATE_LEN(const WOLFSSL_EVP_CIPHER_CTX* ctx)
-{
-    WOLFSSL_MSG("wolfSSL_EVP_X_STATE_LEN");
-
-    if (ctx) {
-        switch (ctx->cipherType) {
-            case ARC4_TYPE:
-                WOLFSSL_MSG("returning arc4 state size");
-                return sizeof(Arc4);
-
-            default:
-                WOLFSSL_MSG("bad x state type");
-                return 0;
-        }
-    }
-
-    return 0;
-}
-
+void wolfSSL_HMAC_CTX_free(WOLFSSL_HMAC_CTX* ctx)
+{
+    if (!ctx) {
+        return;
+    }
+    wolfSSL_HMAC_cleanup(ctx);
+    XFREE(ctx, NULL, DYNAMIC_TYPE_OPENSSL);
+}
+
+size_t wolfSSL_HMAC_size(const WOLFSSL_HMAC_CTX *ctx)
+{
+    if (!ctx) {
+        return 0;
+    }
+
+    return (size_t)wc_HashGetDigestSize((enum wc_HashType)ctx->hmac.macType);
+}
 
 #ifndef NO_DES3
 
@@ -25368,159 +30709,6 @@
 
 #endif /* NO_AES */
 
-#ifndef NO_WOLFSSL_STUB
-const WOLFSSL_EVP_MD* wolfSSL_EVP_ripemd160(void)
-{
-    WOLFSSL_MSG("wolfSSL_ripemd160");
-    WOLFSSL_STUB("EVP_ripemd160");
-    return NULL;
-}
-#endif
-
-int wolfSSL_EVP_MD_size(const WOLFSSL_EVP_MD* type)
-{
-    WOLFSSL_MSG("wolfSSL_EVP_MD_size");
-
-    if (type == NULL) {
-        WOLFSSL_MSG("No md type arg");
-        return BAD_FUNC_ARG;
-    }
-
-    if (XSTRNCMP(type, "SHA256", 6) == 0) {
-        return WC_SHA256_DIGEST_SIZE;
-    }
-#ifndef NO_MD5
-    else if (XSTRNCMP(type, "MD5", 3) == 0) {
-        return WC_MD5_DIGEST_SIZE;
-    }
-#endif
-#ifdef WOLFSSL_SHA224
-    else if (XSTRNCMP(type, "SHA224", 6) == 0) {
-        return WC_SHA224_DIGEST_SIZE;
-    }
-#endif
-#ifdef WOLFSSL_SHA384
-    else if (XSTRNCMP(type, "SHA384", 6) == 0) {
-        return WC_SHA384_DIGEST_SIZE;
-    }
-#endif
-#ifdef WOLFSSL_SHA512
-    else if (XSTRNCMP(type, "SHA512", 6) == 0) {
-        return WC_SHA512_DIGEST_SIZE;
-    }
-#endif
-#ifndef NO_SHA
-    /* has to be last since would pick or 256, 384, or 512 too */
-    else if (XSTRNCMP(type, "SHA", 3) == 0) {
-        return WC_SHA_DIGEST_SIZE;
-    }
-#endif
-
-    return BAD_FUNC_ARG;
-}
-
-
-int wolfSSL_EVP_CIPHER_CTX_iv_length(const WOLFSSL_EVP_CIPHER_CTX* ctx)
-{
-    WOLFSSL_MSG("wolfSSL_EVP_CIPHER_CTX_iv_length");
-
-    switch (ctx->cipherType) {
-
-#ifdef HAVE_AES_CBC
-        case AES_128_CBC_TYPE :
-        case AES_192_CBC_TYPE :
-        case AES_256_CBC_TYPE :
-            WOLFSSL_MSG("AES CBC");
-            return AES_BLOCK_SIZE;
-#endif
-#ifdef WOLFSSL_AES_COUNTER
-        case AES_128_CTR_TYPE :
-        case AES_192_CTR_TYPE :
-        case AES_256_CTR_TYPE :
-            WOLFSSL_MSG("AES CTR");
-            return AES_BLOCK_SIZE;
-#endif
-#ifndef NO_DES3
-        case DES_CBC_TYPE :
-            WOLFSSL_MSG("DES CBC");
-            return DES_BLOCK_SIZE;
-
-        case DES_EDE3_CBC_TYPE :
-            WOLFSSL_MSG("DES EDE3 CBC");
-            return DES_BLOCK_SIZE;
-#endif
-#ifdef HAVE_IDEA
-        case IDEA_CBC_TYPE :
-            WOLFSSL_MSG("IDEA CBC");
-            return IDEA_BLOCK_SIZE;
-#endif
-#ifndef NO_RC4
-        case ARC4_TYPE :
-            WOLFSSL_MSG("ARC4");
-            return 0;
-#endif
-
-        case NULL_CIPHER_TYPE :
-            WOLFSSL_MSG("NULL");
-            return 0;
-
-        default: {
-            WOLFSSL_MSG("bad type");
-        }
-    }
-    return 0;
-}
-
-int wolfSSL_EVP_CIPHER_iv_length(const WOLFSSL_EVP_CIPHER* cipher)
-{
-    const char *name = (const char *)cipher;
-    WOLFSSL_MSG("wolfSSL_EVP_CIPHER_iv_length");
-
-#ifndef NO_AES
-    #ifdef WOLFSSL_AES_128
-    if (XSTRNCMP(name, EVP_AES_128_CBC, XSTRLEN(EVP_AES_128_CBC)) == 0)
-        return AES_BLOCK_SIZE;
-    #endif
-    #ifdef WOLFSSL_AES_192
-    if (XSTRNCMP(name, EVP_AES_192_CBC, XSTRLEN(EVP_AES_192_CBC)) == 0)
-        return AES_BLOCK_SIZE;
-    #endif
-    #ifdef WOLFSSL_AES_256
-    if (XSTRNCMP(name, EVP_AES_256_CBC, XSTRLEN(EVP_AES_256_CBC)) == 0)
-        return AES_BLOCK_SIZE;
-    #endif
-#ifdef WOLFSSL_AES_COUNTER
-    #ifdef WOLFSSL_AES_128
-    if (XSTRNCMP(name, EVP_AES_128_CTR, XSTRLEN(EVP_AES_128_CTR)) == 0)
-        return AES_BLOCK_SIZE;
-    #endif
-    #ifdef WOLFSSL_AES_192
-    if (XSTRNCMP(name, EVP_AES_192_CTR, XSTRLEN(EVP_AES_192_CTR)) == 0)
-        return AES_BLOCK_SIZE;
-    #endif
-    #ifdef WOLFSSL_AES_256
-    if (XSTRNCMP(name, EVP_AES_256_CTR, XSTRLEN(EVP_AES_256_CTR)) == 0)
-        return AES_BLOCK_SIZE;
-    #endif
-#endif
-#endif
-
-#ifndef NO_DES3
-    if ((XSTRNCMP(name, EVP_DES_CBC, XSTRLEN(EVP_DES_CBC)) == 0) ||
-           (XSTRNCMP(name, EVP_DES_EDE3_CBC, XSTRLEN(EVP_DES_EDE3_CBC)) == 0)) {
-        return DES_BLOCK_SIZE;
-    }
-#endif
-
-#ifdef HAVE_IDEA
-    if (XSTRNCMP(name, EVP_IDEA_CBC, XSTRLEN(EVP_IDEA_CBC)) == 0)
-        return IDEA_BLOCK_SIZE;
-#endif
-
-    (void)name;
-
-    return 0;
-}
 
 /* Free the dynamically allocated data.
  *
@@ -25618,7 +30806,7 @@
         return WOLFSSL_FAILURE;
     }
     XSTRNCPY((char*)*cipherInfo, info->name, cipherInfoSz);
-    XSTRNCAT((char*)*cipherInfo, ",", 1);
+    XSTRNCAT((char*)*cipherInfo, ",", 2);
 
     idx = (word32)XSTRLEN((char*)*cipherInfo);
     cipherInfoSz -= idx;
@@ -25637,7 +30825,81 @@
 }
 #endif /* WOLFSSL_KEY_GEN || WOLFSSL_PEM_TO_DER */
 
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+static int wolfSSL_RSA_To_Der(WOLFSSL_RSA* rsa, byte** outBuf, int publicKey)
+{
+    int derSz  = 0;
+    int ret;
+    byte* derBuf;
+
+    WOLFSSL_ENTER("wolfSSL_RSA_To_Der");
+
+    if (!rsa || (publicKey != 0 && publicKey != 1)) {
+        WOLFSSL_LEAVE("wolfSSL_RSA_To_Der", BAD_FUNC_ARG);
+        return BAD_FUNC_ARG;
+    }
+
+    if (rsa->inSet == 0) {
+        if ((ret = SetRsaInternal(rsa)) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal() Failed");
+            WOLFSSL_LEAVE("wolfSSL_RSA_To_Der", ret);
+            return ret;
+        }
+    }
+
+    if (publicKey) {
+        if ((derSz = wc_RsaPublicKeyDerSize((RsaKey *)rsa->internal, 1)) < 0) {
+            WOLFSSL_MSG("wc_RsaPublicKeyDerSize failed");
+            WOLFSSL_LEAVE("wolfSSL_RSA_To_Der", derSz);
+            return derSz;
+        }
+    }
+    else {
+        if ((derSz = wc_RsaKeyToDer((RsaKey*)rsa->internal, NULL, 0)) < 0) {
+            WOLFSSL_MSG("wc_RsaKeyToDer failed");
+            WOLFSSL_LEAVE("wolfSSL_RSA_To_Der", derSz);
+            return derSz;
+        }
+    }
+
+    if (outBuf) {
+        if (!(derBuf = (byte*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER))) {
+            WOLFSSL_MSG("malloc failed");
+            WOLFSSL_LEAVE("wolfSSL_RSA_To_Der", MEMORY_ERROR);
+            return MEMORY_ERROR;
+        }
+
+        /* Key to DER */
+        if (publicKey) {
+            derSz = wc_RsaKeyToPublicDer((RsaKey*)rsa->internal, derBuf, derSz);
+        }
+        else {
+            derSz = wc_RsaKeyToDer((RsaKey*)rsa->internal, derBuf, derSz);
+        }
+
+        if (derSz < 0) {
+            WOLFSSL_MSG("wc_RsaKeyToPublicDer failed");
+            XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        }
+        else {
+            if (*outBuf) {
+                XMEMCPY(*outBuf, derBuf, derSz);
+                XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            }
+            else {
+                *outBuf = derBuf;
+            }
+        }
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_RSA_To_Der", derSz);
+    return derSz;
+}
+#endif
+
 #if defined(WOLFSSL_KEY_GEN) || defined(WOLFSSL_CERT_GEN)
+#if (defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)) && !defined(NO_RSA)
+
 /* Takes a WOLFSSL_RSA key and writes it out to a WOLFSSL_BIO
  *
  * bio    the WOLFSSL_BIO to write to
@@ -25658,42 +30920,29 @@
 
     WOLFSSL_ENTER("wolfSSL_PEM_write_bio_RSAPrivateKey");
 
-
-    pkey = wolfSSL_PKEY_new_ex(bio->heap);
+    if (bio == NULL || key == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey = wolfSSL_EVP_PKEY_new_ex(bio->heap);
     if (pkey == NULL) {
-        WOLFSSL_MSG("wolfSSL_PKEY_new_ex failed");
-        return SSL_FAILURE;
+        WOLFSSL_MSG("wolfSSL_EVP_PKEY_new_ex failed");
+        return WOLFSSL_FAILURE;
     }
 
     pkey->type   = EVP_PKEY_RSA;
     pkey->rsa    = key;
     pkey->ownRsa = 0;
-#ifdef WOLFSSL_KEY_GEN
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
     /* similar to how wolfSSL_PEM_write_mem_RSAPrivateKey finds DER of key */
     {
-        int derMax;
         int derSz;
-        byte* derBuf;
-
-        /* 5 > size of n, d, p, q, d%(p-1), d(q-1), 1/q%p, e + ASN.1 additional
-         *  informations
-         */
-        derMax = 5 * wolfSSL_RSA_size(key) + AES_BLOCK_SIZE;
-
-        derBuf = (byte*)XMALLOC(derMax, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        if (derBuf == NULL) {
-            WOLFSSL_MSG("malloc failed");
-            wolfSSL_EVP_PKEY_free(pkey);
-            return SSL_FAILURE;
-        }
-
-        /* Key to DER */
-        derSz = wc_RsaKeyToDer((RsaKey*)key->internal, derBuf, derMax);
-        if (derSz < 0) {
-            WOLFSSL_MSG("wc_RsaKeyToDer failed");
-            XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            wolfSSL_EVP_PKEY_free(pkey);
-            return SSL_FAILURE;
+        byte* derBuf = NULL;
+
+        if ((derSz = wolfSSL_RSA_To_Der(key, &derBuf, 0)) < 0) {
+            WOLFSSL_MSG("wolfSSL_RSA_To_Der failed");
+            return WOLFSSL_FAILURE;
         }
 
         pkey->pkey.ptr = (char*)XMALLOC(derSz, bio->heap,
@@ -25702,7 +30951,7 @@
             WOLFSSL_MSG("key malloc failed");
             XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
             wolfSSL_EVP_PKEY_free(pkey);
-            return SSL_FAILURE;
+            return WOLFSSL_FAILURE;
         }
         pkey->pkey_sz = derSz;
         XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
@@ -25718,7 +30967,138 @@
     return ret;
 }
 
-
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+/* Takes an RSA public key and writes it out to a WOLFSSL_BIO
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+int wolfSSL_PEM_write_bio_RSA_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_RSA* rsa)
+{
+    int ret = 0, derSz = 0;
+    byte *derBuf = NULL;
+    WOLFSSL_EVP_PKEY* pkey = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_bio_RSA_PUBKEY");
+
+    if (bio == NULL || rsa == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Initialize pkey structure */
+    pkey = wolfSSL_EVP_PKEY_new_ex(bio->heap);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("wolfSSL_EVP_PKEY_new_ex failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->type   = EVP_PKEY_RSA;
+    pkey->rsa    = rsa;
+    pkey->ownRsa = 0;
+
+    if ((derSz = wolfSSL_RSA_To_Der(rsa, &derBuf, 1)) < 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_To_Der failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, bio->heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+    pkey->pkey_sz = derSz;
+    XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+    XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    ret = wolfSSL_PEM_write_bio_PUBKEY(bio, pkey);
+    wolfSSL_EVP_PKEY_free(pkey);
+
+    return ret;
+}
+#endif
+
+
+/* Reads an RSA public key from a WOLFSSL_BIO into a WOLFSSL_RSA
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+WOLFSSL_RSA *wolfSSL_PEM_read_bio_RSA_PUBKEY(WOLFSSL_BIO* bio,WOLFSSL_RSA** rsa,
+                                                pem_password_cb* cb, void *pass)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_RSA* local;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_RSA_PUBKEY");
+
+    pkey = wolfSSL_PEM_read_bio_PUBKEY(bio, NULL, cb, pass);
+    if (pkey == NULL) {
+        return NULL;
+    }
+
+    /* Since the WOLFSSL_RSA structure is being taken from WOLFSSL_EVP_PKEY the
+     * flag indicating that the WOLFSSL_RSA structure is owned should be FALSE
+     * to avoid having it free'd */
+    pkey->ownRsa = 0;
+    local = pkey->rsa;
+    if (rsa != NULL){
+        *rsa = local;
+    }
+
+    wolfSSL_EVP_PKEY_free(pkey);
+    return local;
+}
+
+#endif /* defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL) && !defined(NO_RSA) */
+
+/* Takes a public key and writes it out to a WOLFSSL_BIO
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+int wolfSSL_PEM_write_bio_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_EVP_PKEY* key)
+{
+    byte* keyDer;
+    int pemSz;
+    int ret;
+    byte* tmp;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_bio_PUBKEY");
+
+    if (bio == NULL || key == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    keyDer = (byte*)key->pkey.ptr;
+
+    pemSz = wc_DerToPem(keyDer, key->pkey_sz, NULL, 0, PUBLICKEY_TYPE);
+    if (pemSz < 0) {
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_bio_PUBKEY", pemSz);
+        return WOLFSSL_FAILURE;
+    }
+    tmp = (byte*)XMALLOC(pemSz, bio->heap, DYNAMIC_TYPE_OPENSSL);
+    if (tmp == NULL) {
+        return MEMORY_E;
+    }
+
+    ret = wc_DerToPemEx(keyDer, key->pkey_sz, tmp, pemSz,
+                        NULL, PUBLICKEY_TYPE);
+    if (ret < 0) {
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_bio_PUBKEY", ret);
+        XFREE(tmp, bio->heap, DYNAMIC_TYPE_OPENSSL);
+        return WOLFSSL_FAILURE;
+    }
+
+    ret = wolfSSL_BIO_write(bio, tmp, pemSz);
+    XFREE(tmp, bio->heap, DYNAMIC_TYPE_OPENSSL);
+    if (ret != pemSz) {
+        WOLFSSL_MSG("Unable to write full PEM to BIO");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* Takes a private key and writes it out to a WOLFSSL_BIO
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
 int wolfSSL_PEM_write_bio_PrivateKey(WOLFSSL_BIO* bio, WOLFSSL_EVP_PKEY* key,
                                         const WOLFSSL_EVP_CIPHER* cipher,
                                         unsigned char* passwd, int len,
@@ -25739,15 +31119,18 @@
     WOLFSSL_ENTER("wolfSSL_PEM_write_bio_PrivateKey");
 
     if (bio == NULL || key == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
         return WOLFSSL_FAILURE;
     }
 
     keyDer = (byte*)key->pkey.ptr;
 
     switch (key->type) {
+#ifndef NO_RSA
         case EVP_PKEY_RSA:
             type = PRIVATEKEY_TYPE;
             break;
+#endif
 
 #ifndef NO_DSA
         case EVP_PKEY_DSA:
@@ -25755,9 +31138,17 @@
             break;
 #endif
 
+#ifdef HAVE_ECC
         case EVP_PKEY_EC:
             type = ECC_PRIVATEKEY_TYPE;
             break;
+#endif
+
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+        case EVP_PKEY_DH:
+            type = DH_PRIVATEKEY_TYPE;
+            break;
+#endif
 
         default:
             WOLFSSL_MSG("Unknown Key type!");
@@ -25779,21 +31170,21 @@
     if (ret < 0) {
         WOLFSSL_LEAVE("wolfSSL_PEM_write_bio_PrivateKey", ret);
         XFREE(tmp, bio->heap, DYNAMIC_TYPE_OPENSSL);
-        return SSL_FAILURE;
+        return WOLFSSL_FAILURE;
     }
 
     ret = wolfSSL_BIO_write(bio, tmp, pemSz);
     XFREE(tmp, bio->heap, DYNAMIC_TYPE_OPENSSL);
     if (ret != pemSz) {
         WOLFSSL_MSG("Unable to write full PEM to BIO");
-        return SSL_FAILURE;
-    }
-
-    return SSL_SUCCESS;
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
 }
 #endif /* defined(WOLFSSL_KEY_GEN) || defined(WOLFSSL_CERT_GEN) */
 
-#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA) && \
+#if (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)) && \
     (defined(WOLFSSL_PEM_TO_DER) || defined(WOLFSSL_DER_TO_PEM))
 
 /* return code compliant with OpenSSL :
@@ -25803,8 +31194,8 @@
                                         unsigned char* passwd, int passwdSz,
                                         unsigned char **pem, int *plen)
 {
-    byte *derBuf, *tmp, *cipherInfo = NULL;
-    int  der_max_len = 0, derSz = 0;
+    byte *derBuf = NULL, *tmp, *cipherInfo = NULL;
+    int  derSz = 0;
     const int type = PRIVATEKEY_TYPE;
     const char* header = NULL;
     const char* footer = NULL;
@@ -25828,22 +31219,8 @@
         }
     }
 
-    /* 5 > size of n, d, p, q, d%(p-1), d(q-1), 1/q%p, e + ASN.1 additional
-     *  informations
-     */
-    der_max_len = 5 * wolfSSL_RSA_size(rsa) + AES_BLOCK_SIZE;
-
-    derBuf = (byte*)XMALLOC(der_max_len, NULL, DYNAMIC_TYPE_DER);
-    if (derBuf == NULL) {
-        WOLFSSL_MSG("malloc failed");
-        return WOLFSSL_FAILURE;
-    }
-
-    /* Key to DER */
-    derSz = wc_RsaKeyToDer((RsaKey*)rsa->internal, derBuf, der_max_len);
-    if (derSz < 0) {
-        WOLFSSL_MSG("wc_RsaKeyToDer failed");
-        XFREE(derBuf, NULL, DYNAMIC_TYPE_DER);
+    if ((derSz = wolfSSL_RSA_To_Der(rsa, &derBuf, 0)) < 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_To_Der failed");
         return WOLFSSL_FAILURE;
     }
 
@@ -25916,7 +31293,7 @@
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
-int wolfSSL_PEM_write_RSAPrivateKey(FILE *fp, WOLFSSL_RSA *rsa,
+int wolfSSL_PEM_write_RSAPrivateKey(XFILE fp, WOLFSSL_RSA *rsa,
                                     const EVP_CIPHER *enc,
                                     unsigned char *kstr, int klen,
                                     pem_password_cb *cb, void *u)
@@ -25929,7 +31306,8 @@
 
     WOLFSSL_MSG("wolfSSL_PEM_write_RSAPrivateKey");
 
-    if (fp == NULL || rsa == NULL || rsa->internal == NULL) {
+    if (fp == XBADFILE || rsa == NULL || rsa->internal == NULL)
+    {
         WOLFSSL_MSG("Bad function arguments");
         return WOLFSSL_FAILURE;
     }
@@ -25955,6 +31333,30 @@
 
 #ifdef HAVE_ECC
 
+#ifdef ALT_ECC_SIZE
+static int SetIndividualInternalEcc(WOLFSSL_BIGNUM* bn, mp_int* mpi)
+{
+    WOLFSSL_MSG("Entering SetIndividualInternal");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (mpi == NULL) {
+        WOLFSSL_MSG("mpi NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (mp_copy((mp_int*)bn->internal, mpi) != MP_OKAY) {
+        WOLFSSL_MSG("mp_copy error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* ALT_ECC_SIZE */
+
 /* EC_POINT Openssl -> WolfSSL */
 static int SetECPointInternal(WOLFSSL_EC_POINT *p)
 {
@@ -25968,6 +31370,7 @@
 
     point = (ecc_point*)p->internal;
 
+#ifndef ALT_ECC_SIZE
     if (p->X != NULL && SetIndividualInternal(p->X, point->x) != WOLFSSL_SUCCESS) {
         WOLFSSL_MSG("ecc point X error");
         return WOLFSSL_FATAL_ERROR;
@@ -25982,15 +31385,27 @@
         WOLFSSL_MSG("ecc point Z error");
         return WOLFSSL_FATAL_ERROR;
     }
+#else
+    if (p->X != NULL && SetIndividualInternalEcc(p->X, point->x) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("ecc point X error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (p->Y != NULL && SetIndividualInternalEcc(p->Y, point->y) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("ecc point Y error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (p->Z != NULL && SetIndividualInternalEcc(p->Z, point->z) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("ecc point Z error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+#endif
 
     p->inSet = 1;
 
     return WOLFSSL_SUCCESS;
 }
-#endif /* HAVE_ECC */
-#endif /* OPENSSL_EXTRA */
-
-#if defined(HAVE_ECC) && defined(OPENSSL_EXTRA_X509_SMALL)
 
 /* EC_POINT WolfSSL -> OpenSSL */
 static int SetECPointExternal(WOLFSSL_EC_POINT *p)
@@ -26028,7 +31443,7 @@
 
 
 /* EC_KEY wolfSSL -> OpenSSL */
-static int SetECKeyExternal(WOLFSSL_EC_KEY* eckey)
+int SetECKeyExternal(WOLFSSL_EC_KEY* eckey)
 {
     ecc_key* key;
 
@@ -26073,18 +31488,15 @@
 
     return WOLFSSL_SUCCESS;
 }
-#endif /* HAVE_ECC && OPENSSL_EXTRA_X509_SMALL */
-
-#ifdef OPENSSL_EXTRA
-#ifdef HAVE_ECC
+
 /* EC_KEY Openssl -> WolfSSL */
-static int SetECKeyInternal(WOLFSSL_EC_KEY* eckey)
+int SetECKeyInternal(WOLFSSL_EC_KEY* eckey)
 {
     ecc_key* key;
 
     WOLFSSL_ENTER("SetECKeyInternal");
 
-    if (eckey == NULL || eckey->internal == NULL) {
+    if (eckey == NULL || eckey->internal == NULL || eckey->group == NULL) {
         WOLFSSL_MSG("ec key NULL error");
         return WOLFSSL_FATAL_ERROR;
     }
@@ -26109,6 +31521,12 @@
             return WOLFSSL_FATAL_ERROR;
         }
 
+        /* copy over the public point to key */
+        if (wc_ecc_copy_point((ecc_point*)eckey->pub_key->internal, &key->pubkey) != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_copy_point error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
         /* public key */
         key->type = ECC_PUBLICKEY;
     }
@@ -26134,7 +31552,7 @@
     WOLFSSL_ENTER("wolfSSL_EC_KEY_get0_public_key");
 
     if (key == NULL) {
-        WOLFSSL_MSG("wolfSSL_EC_KEY_get0_group Bad arguments");
+        WOLFSSL_MSG("wolfSSL_EC_KEY_get0_public_key Bad arguments");
         return NULL;
     }
 
@@ -26196,6 +31614,11 @@
         return NULL;
     }
 
+    if (wolfSSL_BN_is_zero(key->priv_key)) {
+        /* return NULL if not set */
+        return NULL;
+    }
+
     return key->priv_key;
 }
 
@@ -26203,9 +31626,15 @@
 {
     WOLFSSL_EC_KEY *key;
     int x;
+    int eccEnum;
 
     WOLFSSL_ENTER("wolfSSL_EC_KEY_new_by_curve_name");
 
+    /* If NID passed in is OpenSSL type, convert it to ecc_curve_id enum */
+    eccEnum = NIDToEccEnum(nid);
+    if (eccEnum == -1)
+        eccEnum = nid;
+
     key = wolfSSL_EC_KEY_new();
     if (key == NULL) {
         WOLFSSL_MSG("wolfSSL_EC_KEY_new failure");
@@ -26213,7 +31642,7 @@
     }
 
     /* set the nid of the curve */
-    key->group->curve_nid = nid;
+    key->group->curve_nid = eccEnum;
 
     /* search and set the corresponding internal curve idx */
     for (x = 0; ecc_sets[x].size != 0; x++)
@@ -26226,10 +31655,96 @@
     return key;
 }
 
-#endif /* HAVE_ECC */
-#endif /* OPENSSL_EXTRA */
-
-#if defined(HAVE_ECC) && (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+const char* wolfSSL_EC_curve_nid2nist(int nid)
+{
+    const WOLF_EC_NIST_NAME* nist_name;
+    for (nist_name = kNistCurves; nist_name->name != NULL; nist_name++) {
+        if (nist_name->nid == nid) {
+            return kNistCurves->name;
+        }
+    }
+    return NULL;
+}
+
+#ifdef WOLFSSL_TLS13
+static int populate_groups(int* groups, int max_count, char *list)
+{
+    char *end;
+    int len;
+    int count = 0;
+    const WOLF_EC_NIST_NAME* nist_name;
+
+    if (!groups || !list) {
+        return -1;
+    }
+
+    for (end = list; ; list = ++end) {
+        if (count > max_count) {
+            WOLFSSL_MSG("Too many curves in list");
+            return -1;
+        }
+        while (*end != ':' && *end != '\0') end++;
+        len = (int)(end - list); /* end points to char after end
+                                  * of curve name so no need for -1 */
+        if ((len < kNistCurves_MIN_NAME_LEN) ||
+                (len > kNistCurves_MAX_NAME_LEN)) {
+            WOLFSSL_MSG("Unrecognized curve name in list");
+            return -1;
+        }
+        for (nist_name = kNistCurves; nist_name->name != NULL; nist_name++) {
+            if (len == nist_name->name_len &&
+                    XSTRNCMP(list, nist_name->name, nist_name->name_len) == 0) {
+                break;
+            }
+        }
+        if (!nist_name->name) {
+            WOLFSSL_MSG("Unrecognized curve name in list");
+            return -1;
+        }
+        groups[count++] = nist_name->nid;
+        if (*end == '\0') break;
+    }
+
+    return count;
+}
+
+int wolfSSL_CTX_set1_groups_list(WOLFSSL_CTX *ctx, char *list)
+{
+    int groups[WOLFSSL_MAX_GROUP_COUNT];
+    int count;
+
+    if (!ctx || !list) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((count = populate_groups(groups,
+            WOLFSSL_MAX_GROUP_COUNT, list)) == -1) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return wolfSSL_CTX_set_groups(ctx, groups, count) == WOLFSSL_SUCCESS ?
+            WOLFSSL_SUCCESS : WOLFSSL_FAILURE;
+}
+
+int wolfSSL_set1_groups_list(WOLFSSL *ssl, char *list)
+{
+    int groups[WOLFSSL_MAX_GROUP_COUNT];
+    int count;
+
+    if (!ssl || !list) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((count = populate_groups(groups,
+            WOLFSSL_MAX_GROUP_COUNT, list)) == -1) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return wolfSSL_set_groups(ssl, groups, count) == WOLFSSL_SUCCESS ?
+            WOLFSSL_SUCCESS : WOLFSSL_FAILURE;
+}
+#endif /* WOLFSSL_TLS13 */
+
 static void InitwolfSSL_ECKey(WOLFSSL_EC_KEY* key)
 {
     if (key) {
@@ -26245,8 +31760,6 @@
 WOLFSSL_EC_KEY *wolfSSL_EC_KEY_new(void)
 {
     WOLFSSL_EC_KEY *external;
-    ecc_key* key;
-
     WOLFSSL_ENTER("wolfSSL_EC_KEY_new");
 
     external = (WOLFSSL_EC_KEY*)XMALLOC(sizeof(WOLFSSL_EC_KEY), NULL,
@@ -26263,51 +31776,40 @@
                                            DYNAMIC_TYPE_ECC);
     if (external->internal == NULL) {
         WOLFSSL_MSG("wolfSSL_EC_KEY_new malloc ecc key failure");
-        wolfSSL_EC_KEY_free(external);
-        return NULL;
+        goto error;
     }
     XMEMSET(external->internal, 0, sizeof(ecc_key));
 
-    wc_ecc_init((ecc_key*)external->internal);
-
-    /* public key */
-    external->pub_key = (WOLFSSL_EC_POINT*)XMALLOC(sizeof(WOLFSSL_EC_POINT),
-                                                   NULL, DYNAMIC_TYPE_ECC);
-    if (external->pub_key == NULL) {
-        WOLFSSL_MSG("wolfSSL_EC_KEY_new malloc WOLFSSL_EC_POINT failure");
-        wolfSSL_EC_KEY_free(external);
-        return NULL;
-    }
-    XMEMSET(external->pub_key, 0, sizeof(WOLFSSL_EC_POINT));
-
-    key = (ecc_key*)external->internal;
-    external->pub_key->internal = wc_ecc_new_point();
-    if (wc_ecc_copy_point((ecc_point*)&key->pubkey,
-                (ecc_point*)external->pub_key->internal) != MP_OKAY) {
-        WOLFSSL_MSG("wc_ecc_copy_point failure");
-        wolfSSL_EC_KEY_free(external);
-        return NULL;
+    if (wc_ecc_init((ecc_key*)external->internal) != 0) {
+        WOLFSSL_MSG("wolfSSL_EC_KEY_new init ecc key failure");
+        goto error;
     }
 
     /* curve group */
-    external->group = (WOLFSSL_EC_GROUP*)XMALLOC(sizeof(WOLFSSL_EC_GROUP), NULL,
-                                                 DYNAMIC_TYPE_ECC);
+    external->group = wolfSSL_EC_GROUP_new_by_curve_name(ECC_CURVE_DEF);
     if (external->group == NULL) {
         WOLFSSL_MSG("wolfSSL_EC_KEY_new malloc WOLFSSL_EC_GROUP failure");
-        wolfSSL_EC_KEY_free(external);
-        return NULL;
-    }
-    XMEMSET(external->group, 0, sizeof(WOLFSSL_EC_GROUP));
+        goto error;
+    }
+
+    /* public key */
+    external->pub_key = wolfSSL_EC_POINT_new(external->group);
+    if (external->pub_key == NULL) {
+        WOLFSSL_MSG("wolfSSL_EC_POINT_new failure");
+        goto error;
+    }
 
     /* private key */
     external->priv_key = wolfSSL_BN_new();
     if (external->priv_key == NULL) {
         WOLFSSL_MSG("wolfSSL_BN_new failure");
-        wolfSSL_EC_KEY_free(external);
-        return NULL;
+        goto error;
     }
 
     return external;
+error:
+    wolfSSL_EC_KEY_free(external);
+    return NULL;
 }
 
 void wolfSSL_EC_KEY_free(WOLFSSL_EC_KEY *key)
@@ -26325,13 +31827,9 @@
         InitwolfSSL_ECKey(key); /* set back to NULLs for safety */
 
         XFREE(key, NULL, DYNAMIC_TYPE_ECC);
-        key = NULL;
-    }
-}
-#endif /* HAVE_ECC && (OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL) */
-
-#ifdef OPENSSL_EXTRA
-#ifdef HAVE_ECC
+        /* key = NULL, don't try to access or double free it */
+    }
+}
 
 #ifndef NO_WOLFSSL_STUB
 int wolfSSL_EC_KEY_set_group(WOLFSSL_EC_KEY *key, WOLFSSL_EC_GROUP *group)
@@ -26424,6 +31922,21 @@
 }
 #endif
 
+static int setupPoint(const WOLFSSL_EC_POINT *p) {
+    if (!p) {
+        return WOLFSSL_FAILURE;
+    }
+    if (p->inSet == 0) {
+        WOLFSSL_MSG("No ECPoint internal set, do it");
+
+        if (SetECPointInternal((WOLFSSL_EC_POINT *)p) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetECPointInternal SetECPointInternal failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+    return WOLFSSL_SUCCESS;
+}
+
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
@@ -26447,11 +31960,8 @@
         }
     }
 
-    if (pub->inSet == 0) {
-        if (SetECPointInternal((WOLFSSL_EC_POINT *)pub) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal failed");
-            return WOLFSSL_FAILURE;
-        }
+    if (setupPoint(pub) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
     }
 
     pub_p = (ecc_point*)pub->internal;
@@ -26471,7 +31981,12 @@
         return WOLFSSL_FAILURE;
     }
 
-    if (SetECKeyExternal(key) != WOLFSSL_SUCCESS) {
+    if (SetECPointExternal(key->pub_key) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetECKeyInternal failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (SetECKeyInternal(key) != WOLFSSL_SUCCESS) {
         WOLFSSL_MSG("SetECKeyInternal failed");
         return WOLFSSL_FAILURE;
     }
@@ -26483,6 +31998,151 @@
 }
 /* End EC_KEY */
 
+int wolfSSL_ECDSA_size(const WOLFSSL_EC_KEY *key)
+{
+    const EC_GROUP *group;
+    int bits, bytes;
+    word32 headerSz = 4;   /* 2*ASN_TAG + 2*LEN(ENUM) */
+
+    if (!key) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!(group = wolfSSL_EC_KEY_get0_group(key))) {
+        return WOLFSSL_FAILURE;
+    }
+    if ((bits = wolfSSL_EC_GROUP_order_bits(group)) == 0) {
+        return WOLFSSL_FAILURE;
+    }
+    bytes = (bits + 7) / 8; /* bytes needed to hold bits */
+    return headerSz +
+            2 + /* possible leading zeroes in r and s */
+            bytes + bytes + /* r and s */
+            2;
+}
+
+int wolfSSL_ECDSA_sign(int type, const unsigned char *digest,
+                       int digestSz, unsigned char *sig,
+                       unsigned int *sigSz, WOLFSSL_EC_KEY *key)
+{
+    int ret = WOLFSSL_SUCCESS;
+    WC_RNG* rng = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG* tmpRNG = NULL;
+#else
+    WC_RNG  tmpRNG[1];
+#endif
+    int initTmpRng = 0;
+
+    WOLFSSL_ENTER("wolfSSL_ECDSA_sign");
+
+    if (!key) {
+        return WOLFSSL_FAILURE;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
+    if (tmpRNG == NULL)
+        return WOLFSSL_FAILURE;
+#endif
+
+    if (wc_InitRng(tmpRNG) == 0) {
+        rng = tmpRNG;
+        initTmpRng = 1;
+    }
+    else {
+        WOLFSSL_MSG("Bad RNG Init, trying global");
+        if (initGlobalRNG == 0) {
+            WOLFSSL_MSG("Global RNG no Init");
+        }
+        else {
+            rng = &globalRNG;
+        }
+    }
+    if (rng) {
+        if (wc_ecc_sign_hash(digest, digestSz, sig, sigSz, rng, (ecc_key*)key->internal) != MP_OKAY) {
+            ret = WOLFSSL_FAILURE;
+        }
+        if (initTmpRng) {
+            wc_FreeRng(tmpRNG);
+        }
+    } else {
+        ret = WOLFSSL_FAILURE;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (tmpRNG)
+        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+#endif
+
+    (void)type;
+    return ret;
+}
+
+#ifndef HAVE_SELFTEST
+/* ECC point compression types were not included in selftest ecc.h */
+
+char* wolfSSL_EC_POINT_point2hex(const WOLFSSL_EC_GROUP* group,
+                                 const WOLFSSL_EC_POINT* point, int form,
+                                 WOLFSSL_BN_CTX* ctx)
+{
+    static const char* hexDigit = "0123456789ABCDEF";
+    char* hex = NULL;
+    int id;
+    int i, sz, len;
+
+    (void)ctx;
+
+    if (group == NULL || point == NULL)
+        return NULL;
+
+    id = wc_ecc_get_curve_id(group->curve_idx);
+
+    if ((sz = wc_ecc_get_curve_size_from_id(id)) < 0)
+        return NULL;
+
+    len = sz + 1;
+    if (form == POINT_CONVERSION_UNCOMPRESSED)
+        len += sz;
+
+    hex = (char*)XMALLOC(2 * len + 1, NULL, DYNAMIC_TYPE_ECC);
+    if (hex == NULL)
+        return NULL;
+    XMEMSET(hex, 0, 2 * len + 1);
+
+    /* Put in x-ordinate after format byte. */
+    i = sz - mp_unsigned_bin_size((mp_int*)point->X->internal) + 1;
+    if (mp_to_unsigned_bin((mp_int*)point->X->internal, (byte*)(hex + i)) < 0) {
+        XFREE(hex,  NULL, DYNAMIC_TYPE_ECC);
+        return NULL;
+    }
+
+    if (form == POINT_CONVERSION_COMPRESSED) {
+        hex[0] = mp_isodd((mp_int*)point->Y->internal) ? ECC_POINT_COMP_ODD :
+                                                         ECC_POINT_COMP_EVEN;
+    }
+    else {
+        hex[0] = ECC_POINT_UNCOMP;
+        /* Put in y-ordinate after x-ordinate */
+        i = 1 + 2 * sz - mp_unsigned_bin_size((mp_int*)point->Y->internal);
+        if (mp_to_unsigned_bin((mp_int*)point->Y->internal,
+                                                        (byte*)(hex + i)) < 0) {
+            XFREE(hex,  NULL, DYNAMIC_TYPE_ECC);
+            return NULL;
+        }
+    }
+
+    for (i = len-1; i >= 0; i--) {
+        byte b = hex[i];
+        hex[i * 2 + 1] = hexDigit[b  & 0xf];
+        hex[i * 2    ] = hexDigit[b >>   4];
+    }
+
+    return hex;
+}
+
+#endif /* HAVE_SELFTEST */
+
 void wolfSSL_EC_POINT_dump(const char *msg, const WOLFSSL_EC_POINT *p)
 {
 #if defined(DEBUG_WOLFSSL)
@@ -26490,6 +32150,10 @@
 
     WOLFSSL_ENTER("wolfSSL_EC_POINT_dump");
 
+    if (!WOLFSSL_IS_DEBUG_ON() || wolfSSL_GetLoggingCb()) {
+        return;
+    }
+
     if (p == NULL) {
         printf("%s = NULL", msg);
         return;
@@ -26540,12 +32204,26 @@
 #endif /* OPENSSL_EXTRA */
 
 #if defined(HAVE_ECC) && (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+const WOLFSSL_EC_METHOD* wolfSSL_EC_GROUP_method_of(
+                                                const WOLFSSL_EC_GROUP *group)
+{
+    return group;
+}
+
+int wolfSSL_EC_METHOD_get_field_type(const WOLFSSL_EC_METHOD *meth)
+{
+    if (meth) {
+        return NID_X9_62_prime_field;
+    }
+    return WOLFSSL_FAILURE;
+}
+
 void wolfSSL_EC_GROUP_free(WOLFSSL_EC_GROUP *group)
 {
     WOLFSSL_ENTER("wolfSSL_EC_GROUP_free");
 
     XFREE(group, NULL, DYNAMIC_TYPE_ECC);
-    group = NULL;
+    /* group = NULL, don't try to access or double free it */
 }
 #endif
 
@@ -26566,9 +32244,16 @@
 {
     WOLFSSL_EC_GROUP *g;
     int x;
+    int eccEnum;
 
     WOLFSSL_ENTER("wolfSSL_EC_GROUP_new_by_curve_name");
 
+    /* If NID passed in is OpenSSL type, convert it to ecc_curve_id enum */
+    eccEnum = NIDToEccEnum(nid);
+    if (eccEnum == -1)
+        eccEnum = nid;
+
+
     /* curve group */
     g = (WOLFSSL_EC_GROUP*) XMALLOC(sizeof(WOLFSSL_EC_GROUP), NULL,
                                     DYNAMIC_TYPE_ECC);
@@ -26579,15 +32264,17 @@
     XMEMSET(g, 0, sizeof(WOLFSSL_EC_GROUP));
 
     /* set the nid of the curve */
-    g->curve_nid = nid;
-
-    /* search and set the corresponding internal curve idx */
-    for (x = 0; ecc_sets[x].size != 0; x++)
-        if (ecc_sets[x].id == g->curve_nid) {
-            g->curve_idx = x;
-            g->curve_oid = ecc_sets[x].oidSum;
-            break;
-        }
+    g->curve_nid = eccEnum;
+
+    if (eccEnum > ECC_CURVE_DEF) {
+        /* search and set the corresponding internal curve idx */
+        for (x = 0; ecc_sets[x].size != 0; x++)
+            if (ecc_sets[x].id == g->curve_nid) {
+                g->curve_idx = x;
+                g->curve_oid = ecc_sets[x].oidSum;
+                break;
+            }
+    }
 
     return g;
 }
@@ -26597,6 +32284,7 @@
  */
 int wolfSSL_EC_GROUP_get_curve_name(const WOLFSSL_EC_GROUP *group)
 {
+    int nid;
     WOLFSSL_ENTER("wolfSSL_EC_GROUP_get_curve_name");
 
     if (group == NULL) {
@@ -26604,6 +32292,10 @@
         return WOLFSSL_FAILURE;
     }
 
+    /* If curve_nid is ECC Enum type, return corresponding OpenSSL nid */
+    if ((nid = EccEnumToNID(group->curve_nid)) != -1)
+        return nid;
+
     return group->curve_nid;
 }
 
@@ -26612,6 +32304,9 @@
  */
 int wolfSSL_EC_GROUP_get_degree(const WOLFSSL_EC_GROUP *group)
 {
+    int nid;
+    int tmp;
+
     WOLFSSL_ENTER("wolfSSL_EC_GROUP_get_degree");
 
     if (group == NULL || group->curve_idx < 0) {
@@ -26619,7 +32314,17 @@
         return WOLFSSL_FAILURE;
     }
 
-    switch(group->curve_nid) {
+    /* If curve_nid passed in is an ecc_curve_id enum, convert it to the
+        corresponding OpenSSL NID */
+    tmp = EccEnumToNID(group->curve_nid);
+    if (tmp != -1){
+        nid = tmp;
+    }
+    else{
+        nid = group->curve_nid;
+    }
+
+    switch(nid) {
         case NID_secp112r1:
         case NID_secp112r2:
             return 112;
@@ -26649,13 +32354,81 @@
         case NID_brainpoolP384r1:
             return 384;
         case NID_secp521r1:
+            return 521;
         case NID_brainpoolP512r1:
-            return 521;
+            return 512;
         default:
             return WOLFSSL_FAILURE;
     }
 }
 
+/* Converts OpenSSL NID value of ECC curves to the associated enum values in
+   ecc_curve_id, used by ecc_sets[].*/
+int NIDToEccEnum(int n)
+{
+    WOLFSSL_ENTER("NIDToEccEnum()");
+
+    switch(n) {
+        case NID_X9_62_prime192v1:
+            return ECC_SECP192R1;
+        case NID_X9_62_prime192v2:
+            return ECC_PRIME192V2;
+        case NID_X9_62_prime192v3:
+            return ECC_PRIME192V3;
+        case NID_X9_62_prime239v1:
+            return ECC_PRIME239V1;
+        case NID_X9_62_prime239v2:
+            return ECC_PRIME239V2;
+        case NID_X9_62_prime239v3:
+            return ECC_PRIME239V3;
+        case NID_X9_62_prime256v1:
+            return ECC_SECP256R1;
+        case NID_secp112r1:
+            return ECC_SECP112R1;
+        case NID_secp112r2:
+            return ECC_SECP112R2;
+        case NID_secp128r1:
+            return ECC_SECP128R1;
+        case NID_secp128r2:
+            return ECC_SECP128R2;
+        case NID_secp160r1:
+            return ECC_SECP160R1;
+        case NID_secp160r2:
+            return ECC_SECP160R2;
+        case NID_secp224r1:
+            return ECC_SECP224R1;
+        case NID_secp384r1:
+            return ECC_SECP384R1;
+        case NID_secp521r1:
+            return ECC_SECP521R1;
+        case NID_secp160k1:
+            return ECC_SECP160K1;
+        case NID_secp192k1:
+            return ECC_SECP192K1;
+        case NID_secp224k1:
+            return ECC_SECP224K1;
+        case NID_secp256k1:
+            return ECC_SECP256K1;
+        case NID_brainpoolP160r1:
+            return ECC_BRAINPOOLP160R1;
+        case NID_brainpoolP192r1:
+            return ECC_BRAINPOOLP192R1;
+        case NID_brainpoolP224r1:
+            return ECC_BRAINPOOLP224R1;
+        case NID_brainpoolP256r1:
+            return ECC_BRAINPOOLP256R1;
+        case NID_brainpoolP320r1:
+            return ECC_BRAINPOOLP320R1;
+        case NID_brainpoolP384r1:
+            return ECC_BRAINPOOLP384R1;
+        case NID_brainpoolP512r1:
+            return ECC_BRAINPOOLP512R1;
+        default:
+            WOLFSSL_MSG("NID not found");
+            return -1;
+    }
+}
+
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
@@ -26683,6 +32456,29 @@
 
     return WOLFSSL_SUCCESS;
 }
+
+int wolfSSL_EC_GROUP_order_bits(const WOLFSSL_EC_GROUP *group)
+{
+    int ret;
+    mp_int order;
+
+    if (group == NULL || group->curve_idx < 0) {
+        WOLFSSL_MSG("wolfSSL_EC_GROUP_order_bits NULL error");
+        return 0;
+    }
+
+    ret = mp_init(&order);
+    if (ret == 0) {
+        ret = mp_read_radix(&order, ecc_sets[group->curve_idx].order,
+            MP_RADIX_HEX);
+        if (ret == 0)
+            ret = mp_count_bits(&order);
+        mp_clear(&order);
+    }
+
+    return ret;
+}
+
 /* End EC_GROUP */
 
 /* Start EC_POINT */
@@ -26703,13 +32499,8 @@
         return WOLFSSL_FAILURE;
     }
 
-    if (p->inSet == 0) {
-        WOLFSSL_MSG("No ECPoint internal set, do it");
-
-        if (SetECPointInternal((WOLFSSL_EC_POINT *)p) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal SetECPointInternal failed");
-            return WOLFSSL_FAILURE;
-        }
+    if (setupPoint(p) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
     }
 
     if (out != NULL) {
@@ -26739,19 +32530,31 @@
         return WOLFSSL_FAILURE;
     }
 
-    if (wc_ecc_import_point_der(in, len, group->curve_idx,
-                                (ecc_point*)p->internal) != MP_OKAY) {
-        WOLFSSL_MSG("wc_ecc_import_point_der failed");
-        return WOLFSSL_FAILURE;
-    }
-
-    if (p->exSet == 0) {
-        WOLFSSL_MSG("No ECPoint external set, do it");
-
-        if (SetECPointExternal(p) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointExternal failed");
-            return WOLFSSL_FAILURE;
-        }
+#ifndef HAVE_SELFTEST
+    if (wc_ecc_import_point_der_ex(in, len, group->curve_idx,
+                                   (ecc_point*)p->internal, 0) != MP_OKAY) {
+        WOLFSSL_MSG("wc_ecc_import_point_der_ex failed");
+        return WOLFSSL_FAILURE;
+    }
+#else
+    /* ECC_POINT_UNCOMP is not defined CAVP self test so use magic number */
+    if (in[0] == 0x04) {
+        if (wc_ecc_import_point_der(in, len, group->curve_idx,
+                                    (ecc_point*)p->internal) != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_import_point_der failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+    else {
+        WOLFSSL_MSG("Only uncompressed points supported with HAVE_SELFTEST");
+        return WOLFSSL_FAILURE;
+    }
+#endif
+
+    /* Set new external point */
+    if (SetECPointExternal(p) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetECPointExternal failed");
+        return WOLFSSL_FAILURE;
     }
 
     wolfSSL_EC_POINT_dump("d2i p", p);
@@ -26759,6 +32562,172 @@
     return WOLFSSL_SUCCESS;
 }
 
+size_t wolfSSL_EC_POINT_point2oct(const WOLFSSL_EC_GROUP *group,
+                                  const WOLFSSL_EC_POINT *p,
+                                  char form,
+                                  byte *buf, size_t len, WOLFSSL_BN_CTX *ctx)
+{
+    word32 min_len = (word32)len;
+#ifndef HAVE_SELFTEST
+    int compressed = form == POINT_CONVERSION_COMPRESSED ? 1 : 0;
+#endif /* !HAVE_SELFTEST */
+
+    WOLFSSL_ENTER("EC_POINT_point2oct");
+
+    if (!group || !p) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (setupPoint(p) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wolfSSL_EC_POINT_is_at_infinity(group, p)) {
+        /* encodes to a single 0 octet */
+        if (buf != NULL) {
+            if (len < 1) {
+                ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
+                return WOLFSSL_FAILURE;
+            }
+            buf[0] = 0;
+        }
+        return 1;
+    }
+
+    if (form != POINT_CONVERSION_UNCOMPRESSED
+#ifndef HAVE_SELFTEST
+            && form != POINT_CONVERSION_COMPRESSED
+#endif /* !HAVE_SELFTEST */
+            ) {
+        WOLFSSL_MSG("Unsupported curve form");
+        return WOLFSSL_FAILURE;
+    }
+
+#ifndef HAVE_SELFTEST
+    if (wc_ecc_export_point_der_ex(group->curve_idx, (ecc_point*)p->internal,
+               buf, &min_len, compressed) != (buf ? MP_OKAY : LENGTH_ONLY_E)) {
+        return WOLFSSL_FAILURE;
+    }
+#else
+    if (wc_ecc_export_point_der(group->curve_idx, (ecc_point*)p->internal,
+                                buf, &min_len) != (buf ? MP_OKAY : LENGTH_ONLY_E)) {
+        return WOLFSSL_FAILURE;
+    }
+#endif /* !HAVE_SELFTEST */
+
+    (void)ctx;
+
+    return (size_t)min_len;
+}
+
+int wolfSSL_EC_POINT_oct2point(const WOLFSSL_EC_GROUP *group,
+                               WOLFSSL_EC_POINT *p, const unsigned char *buf,
+                               size_t len, WOLFSSL_BN_CTX *ctx)
+{
+    WOLFSSL_ENTER("wolfSSL_EC_POINT_oct2point");
+
+    if (!group || !p) {
+        return WOLFSSL_FAILURE;
+    }
+
+    (void)ctx;
+
+    return wolfSSL_ECPoint_d2i((unsigned char*)buf, (unsigned int)len, group, p);
+}
+
+int wolfSSL_i2o_ECPublicKey(const WOLFSSL_EC_KEY *in, unsigned char **out)
+{
+    size_t len;
+    unsigned char *tmp = NULL;
+    char form;
+    WOLFSSL_ENTER("wolfSSL_i2o_ECPublicKey");
+
+    if (!in) {
+        WOLFSSL_MSG("wolfSSL_i2o_ECPublicKey Bad arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Default to compressed form if not set */
+    form = in->form == POINT_CONVERSION_UNCOMPRESSED ?
+            POINT_CONVERSION_UNCOMPRESSED:
+            POINT_CONVERSION_COMPRESSED;
+
+    len = wolfSSL_EC_POINT_point2oct(in->group, in->pub_key, form,
+                                     NULL, 0, NULL);
+
+    if (len != WOLFSSL_FAILURE && out) {
+        if (!*out) {
+            if (!(tmp = (unsigned char*)XMALLOC(len, NULL,
+                                                DYNAMIC_TYPE_OPENSSL))) {
+                WOLFSSL_MSG("malloc failed");
+                return WOLFSSL_FAILURE;
+            }
+            *out = tmp;
+        }
+
+        if (wolfSSL_EC_POINT_point2oct(in->group, in->pub_key, form, *out,
+                                       len, NULL) == WOLFSSL_FAILURE) {
+            if (tmp) {
+                XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+                *out = NULL;
+            }
+            return WOLFSSL_FAILURE;
+        }
+
+        if (!tmp) {
+            /* Move buffer forward if it was not alloced in this function */
+            *out += len;
+        }
+    }
+
+    return (int)len;
+}
+
+void wolfSSL_EC_KEY_set_conv_form(WOLFSSL_EC_KEY *eckey, char form)
+{
+    if (eckey && (form == POINT_CONVERSION_COMPRESSED ||
+                  form == POINT_CONVERSION_UNCOMPRESSED)) {
+        eckey->form = form;
+    }
+}
+
+
+/* wolfSSL_EC_POINT_point2bn should return "in" if not null */
+WOLFSSL_BIGNUM *wolfSSL_EC_POINT_point2bn(const WOLFSSL_EC_GROUP *group,
+                                          const WOLFSSL_EC_POINT *p,
+                                          char form,
+                                          WOLFSSL_BIGNUM *in, WOLFSSL_BN_CTX *ctx)
+{
+    size_t len;
+    byte *buf;
+    WOLFSSL_BIGNUM *ret = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_EC_POINT_oct2point");
+
+    if (!group || !p) {
+        return NULL;
+    }
+
+    if ((len = wolfSSL_EC_POINT_point2oct(group, p, form,
+                                          NULL, 0, ctx)) == WOLFSSL_FAILURE) {
+        return NULL;
+    }
+
+    if (!(buf = (byte*)XMALLOC(len, NULL, DYNAMIC_TYPE_TMP_BUFFER))) {
+        WOLFSSL_MSG("malloc failed");
+        return NULL;
+    }
+
+    if (wolfSSL_EC_POINT_point2oct(group, p, form,
+                                   buf, len, ctx) == len) {
+        ret = wolfSSL_BN_bin2bn(buf, (int)len, in);
+    }
+
+    XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret;
+}
+
 WOLFSSL_EC_POINT *wolfSSL_EC_POINT_new(const WOLFSSL_EC_GROUP *group)
 {
     WOLFSSL_EC_POINT *p;
@@ -26797,33 +32766,101 @@
                                                 WOLFSSL_BIGNUM *y,
                                                 WOLFSSL_BN_CTX *ctx)
 {
+    mp_digit mp;
+    mp_int modulus;
     (void)ctx;
 
     WOLFSSL_ENTER("wolfSSL_EC_POINT_get_affine_coordinates_GFp");
 
     if (group == NULL || point == NULL || point->internal == NULL ||
-        x == NULL || y == NULL) {
+        x == NULL || y == NULL || wolfSSL_EC_POINT_is_at_infinity(group, point)) {
         WOLFSSL_MSG("wolfSSL_EC_POINT_get_affine_coordinates_GFp NULL error");
         return WOLFSSL_FAILURE;
     }
 
-    if (point->inSet == 0) {
-        WOLFSSL_MSG("No ECPoint internal set, do it");
-
-        if (SetECPointInternal((WOLFSSL_EC_POINT *)point) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal failed");
+    if (setupPoint(point) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!wolfSSL_BN_is_one(point->Z)) {
+        if (mp_init(&modulus) != MP_OKAY) {
+            WOLFSSL_MSG("mp_init failed");
+            return WOLFSSL_FAILURE;
+        }
+        /* Map the Jacobian point back to affine space */
+        if (mp_read_radix(&modulus, ecc_sets[group->curve_idx].prime, MP_RADIX_HEX) != MP_OKAY) {
+            WOLFSSL_MSG("mp_read_radix failed");
+            mp_clear(&modulus);
+            return WOLFSSL_FAILURE;
+        }
+        if (mp_montgomery_setup(&modulus, &mp) != MP_OKAY) {
+            WOLFSSL_MSG("mp_montgomery_setup failed");
+            mp_clear(&modulus);
+            return WOLFSSL_FAILURE;
+        }
+        if (ecc_map((ecc_point*)point->internal, &modulus, mp) != MP_OKAY) {
+            WOLFSSL_MSG("ecc_map failed");
+            mp_clear(&modulus);
+            return WOLFSSL_FAILURE;
+        }
+        if (SetECPointExternal((WOLFSSL_EC_POINT *)point) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetECPointExternal failed");
+            mp_clear(&modulus);
             return WOLFSSL_FAILURE;
         }
     }
 
     BN_copy(x, point->X);
     BN_copy(y, point->Y);
-
-    return WOLFSSL_SUCCESS;
-}
-
-#ifndef WOLFSSL_ATECC508A
-/* return code compliant with OpenSSL :
+    mp_clear(&modulus);
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_EC_POINT_set_affine_coordinates_GFp(const WOLFSSL_EC_GROUP *group,
+                                                WOLFSSL_EC_POINT *point,
+                                                const WOLFSSL_BIGNUM *x,
+                                                const WOLFSSL_BIGNUM *y,
+                                                WOLFSSL_BN_CTX *ctx)
+{
+    (void)ctx;
+    WOLFSSL_ENTER("wolfSSL_EC_POINT_set_affine_coordinates_GFp");
+
+    if (group == NULL || point == NULL || point->internal == NULL ||
+        x == NULL || y == NULL) {
+        WOLFSSL_MSG("wolfSSL_EC_POINT_set_affine_coordinates_GFp NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!point->X) {
+        point->X = wolfSSL_BN_new();
+    }
+    if (!point->Y) {
+        point->Y = wolfSSL_BN_new();
+    }
+    if (!point->Z) {
+        point->Z = wolfSSL_BN_new();
+    }
+    if (!point->X || !point->Y || !point->Z) {
+        WOLFSSL_MSG("wolfSSL_BN_new failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    BN_copy(point->X, x);
+    BN_copy(point->Y, y);
+    BN_copy(point->Z, wolfSSL_BN_value_one());
+
+    if (SetECPointInternal((WOLFSSL_EC_POINT *)point) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetECPointInternal failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+#if !defined(WOLFSSL_ATECC508A) && !defined(HAVE_SELFTEST)
+/* Calculate the value: generator * n + q * m
+ * return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
 int wolfSSL_EC_POINT_mul(const WOLFSSL_EC_GROUP *group, WOLFSSL_EC_POINT *r,
@@ -26831,63 +32868,138 @@
                          const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
 {
     mp_int a, prime;
-    int ret;
+    int ret = WOLFSSL_FAILURE;
+    ecc_point* result = NULL;
+    ecc_point* tmp = NULL;
 
     (void)ctx;
-    (void)n;
 
     WOLFSSL_ENTER("wolfSSL_EC_POINT_mul");
 
-    if (group == NULL || r == NULL || r->internal == NULL ||
-        q == NULL || q->internal == NULL || m == NULL) {
+    if (!group || !r) {
         WOLFSSL_MSG("wolfSSL_EC_POINT_mul NULL error");
         return WOLFSSL_FAILURE;
     }
 
-    if (q->inSet == 0) {
-        WOLFSSL_MSG("No ECPoint internal set, do it");
-
-        if (SetECPointInternal((WOLFSSL_EC_POINT *)q) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal q failed");
-            return WOLFSSL_FAILURE;
-        }
+    if (!(result = wc_ecc_new_point())) {
+        WOLFSSL_MSG("wolfSSL_EC_POINT_new error");
+        return WOLFSSL_FAILURE;
     }
 
     /* read the curve prime and a */
     if (mp_init_multi(&prime, &a, NULL, NULL, NULL, NULL) != MP_OKAY) {
-        return WOLFSSL_FAILURE;
-    }
-
-    ret = mp_read_radix(&prime, ecc_sets[group->curve_idx].prime, MP_RADIX_HEX);
-    if (ret == MP_OKAY) {
-        ret = mp_read_radix(&a, ecc_sets[group->curve_idx].Af, MP_RADIX_HEX);
-    }
-
-    /* r = q * m % prime */
-    if (ret == MP_OKAY) {
-        ret = wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
-                      (ecc_point*)r->internal, &a, &prime, 1);
-    }
-
+        WOLFSSL_MSG("mp_init_multi error");
+        goto cleanup;
+    }
+
+    if (q && setupPoint(q) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("setupPoint error");
+        goto cleanup;
+    }
+
+    if (mp_read_radix(&prime, ecc_sets[group->curve_idx].prime, MP_RADIX_HEX)
+            != MP_OKAY) {
+        WOLFSSL_MSG("mp_read_radix prime error");
+        goto cleanup;
+    }
+
+    if (mp_read_radix(&a, ecc_sets[group->curve_idx].Af, MP_RADIX_HEX)
+            != MP_OKAY) {
+        WOLFSSL_MSG("mp_read_radix a error");
+        goto cleanup;
+    }
+
+    if (n) {
+        /* load generator */
+        if (wc_ecc_get_generator(result, group->curve_idx)
+                != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_get_generator error");
+            goto cleanup;
+        }
+    }
+
+    if (n && q && m) {
+        /* r = generator * n + q * m */
+#ifdef ECC_SHAMIR
+        if (ecc_mul2add(result, (mp_int*)n->internal,
+                        (ecc_point*)q->internal, (mp_int*)m->internal,
+                        result, &a, &prime, NULL)
+                != MP_OKAY) {
+            WOLFSSL_MSG("ecc_mul2add error");
+            goto cleanup;
+        }
+#else
+        mp_digit mp = 0;
+        if (mp_montgomery_setup(&prime, &mp) != MP_OKAY) {
+            WOLFSSL_MSG("mp_montgomery_setup nqm error");
+            goto cleanup;
+        }
+        if (!(tmp = wc_ecc_new_point())) {
+            WOLFSSL_MSG("wolfSSL_EC_POINT_new nqm error");
+            goto cleanup;
+        }
+        /* r = generator * n */
+        if (wc_ecc_mulmod((mp_int*)n->internal, result, result, &a, &prime, 1)
+                != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_mulmod nqm error");
+            goto cleanup;
+        }
+        /* tmp = q * m */
+        if (wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
+                tmp, &a, &prime, 1) != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_mulmod nqm error");
+            goto cleanup;
+        }
+        /* result = result + tmp */
+        if (ecc_projective_add_point(tmp, result, result, &a, &prime, mp)
+                != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_mulmod nqm error");
+            goto cleanup;
+        }
+        if (ecc_map(result, &prime, mp) != MP_OKAY) {
+            WOLFSSL_MSG("ecc_map nqm error");
+            goto cleanup;
+        }
+#endif
+    }
+    else if (n) {
+        /* r = generator * n */
+        if (wc_ecc_mulmod((mp_int*)n->internal, result, result, &a, &prime, 1)
+                != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_mulmod gn error");
+            goto cleanup;
+        }
+    }
+    else if (q && m) {
+        /* r = q * m */
+        if (wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
+                           result, &a, &prime, 1) != MP_OKAY) {
+            WOLFSSL_MSG("wc_ecc_mulmod qm error");
+            goto cleanup;
+        }
+    }
+
+    /* copy to destination */
+    if (wc_ecc_copy_point(result, (ecc_point*)r->internal)) {
+        WOLFSSL_MSG("wc_ecc_copy_point error");
+        goto cleanup;
+    }
+    r->inSet = 1;
+    if (SetECPointExternal(r) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetECPointExternal error");
+        goto cleanup;
+    }
+
+    ret = WOLFSSL_SUCCESS;
+cleanup:
     mp_clear(&a);
     mp_clear(&prime);
-
-    if (ret == MP_OKAY) {
-        r->inSet = 1; /* mark internal set */
-
-        /* set the external value for the computed point */
-        ret = SetECPointExternal(r);
-        if (ret != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal r failed");
-        }
-    }
-    else {
-        ret = WOLFSSL_FAILURE;
-    }
-
-    return ret;
-}
-#endif
+    wc_ecc_del_point(result);
+    wc_ecc_del_point(tmp);
+    return ret;
+}
+#endif /* !defined(WOLFSSL_ATECC508A) && defined(ECC_SHAMIR) &&
+        * !defined(HAVE_SELFTEST) */
 
 void wolfSSL_EC_POINT_clear_free(WOLFSSL_EC_POINT *p)
 {
@@ -26923,6 +33035,32 @@
 
     return WOLFSSL_FATAL_ERROR;
 }
+
+int wolfSSL_EC_POINT_copy(WOLFSSL_EC_POINT *dest, const WOLFSSL_EC_POINT *src)
+{
+    WOLFSSL_ENTER("wolfSSL_EC_POINT_copy");
+
+    if (!dest || !src) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (setupPoint(src) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wc_ecc_copy_point((ecc_point*) dest->internal,
+                          (ecc_point*) src->internal) != MP_OKAY) {
+        return WOLFSSL_FAILURE;
+    }
+
+    dest->inSet = 1;
+
+    if (SetECPointExternal(dest) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
 #endif /* HAVE_ECC */
 #endif /* OPENSSL_EXTRA */
 
@@ -26946,7 +33084,7 @@
         p->inSet = p->exSet = 0;
 
         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
-        p = NULL;
+        /* p = NULL, don't try to access or double free it */
     }
 }
 #endif
@@ -26967,26 +33105,44 @@
         WOLFSSL_MSG("wolfSSL_EC_POINT_is_at_infinity NULL error");
         return WOLFSSL_FAILURE;
     }
-    if (point->inSet == 0) {
-        WOLFSSL_MSG("No ECPoint internal set, do it");
-
-        if (SetECPointInternal((WOLFSSL_EC_POINT *)point) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal failed");
-            return WOLFSSL_FAILURE;
-        }
+
+    if (setupPoint(point) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
     }
 
     ret = wc_ecc_point_is_at_infinity((ecc_point*)point->internal);
-    if (ret <= 0) {
+    if (ret < 0) {
         WOLFSSL_MSG("ecc_point_is_at_infinity failure");
         return WOLFSSL_FAILURE;
     }
 
-    return WOLFSSL_SUCCESS;
+    return ret;
 }
 
 /* End EC_POINT */
 
+size_t wolfSSL_EC_get_builtin_curves(WOLFSSL_EC_BUILTIN_CURVE *r, size_t nitems)
+{
+    size_t i, min_nitems;
+#ifdef HAVE_SELFTEST
+    size_t ecc_sets_count;
+    for (i = 0; ecc_sets[i].size != 0 && ecc_sets[i].name != NULL; i++);
+    ecc_sets_count = i;
+#endif
+
+    if (r == NULL || nitems == 0)
+        return ecc_sets_count;
+
+    min_nitems = nitems < ecc_sets_count ? nitems : ecc_sets_count;
+
+    for (i = 0; i < min_nitems; i++) {
+        r[i].nid = EccEnumToNID(ecc_sets[i].id);
+        r[i].comment = wolfSSL_OBJ_nid2sn(r[i].nid);
+    }
+
+    return ecc_sets_count;
+}
+
 /* Start ECDSA_SIG */
 void wolfSSL_ECDSA_SIG_free(WOLFSSL_ECDSA_SIG *sig)
 {
@@ -27158,6 +33314,70 @@
 
     return WOLFSSL_SUCCESS;
 }
+
+WOLFSSL_ECDSA_SIG *wolfSSL_d2i_ECDSA_SIG(WOLFSSL_ECDSA_SIG **sig,
+                                         const unsigned char **pp, long len)
+{
+    WOLFSSL_ECDSA_SIG *s = NULL;
+
+    if (pp == NULL)
+        return NULL;
+
+    if (sig != NULL)
+        s = *sig;
+    if (s == NULL) {
+        s = wolfSSL_ECDSA_SIG_new();
+        if (s == NULL)
+            return NULL;
+    }
+
+    /* DecodeECC_DSA_Sig calls mp_init, so free these */
+    mp_free((mp_int*)s->r->internal);
+    mp_free((mp_int*)s->s->internal);
+
+    if (DecodeECC_DSA_Sig(*pp, (word32)len, (mp_int*)s->r->internal,
+                                          (mp_int*)s->s->internal) != MP_OKAY) {
+        if (sig == NULL || *sig == NULL)
+            wolfSSL_ECDSA_SIG_free(s);
+        return NULL;
+    }
+
+    *pp += len;
+    if (sig != NULL)
+        *sig = s;
+    return s;
+}
+
+int wolfSSL_i2d_ECDSA_SIG(const WOLFSSL_ECDSA_SIG *sig, unsigned char **pp)
+{
+    word32 len;
+
+    if (sig == NULL)
+        return 0;
+
+    /* ASN.1: SEQ + INT + INT
+     *   ASN.1 Integer must be a positive value - prepend zero if number has
+     *   top bit set.
+     */
+    len = 2 + mp_leading_bit((mp_int*)sig->r->internal) +
+              mp_unsigned_bin_size((mp_int*)sig->r->internal) +
+          2 + mp_leading_bit((mp_int*)sig->s->internal) +
+              mp_unsigned_bin_size((mp_int*)sig->s->internal);
+    /* Two bytes required for length if ASN.1 SEQ data greater than 127 bytes
+     * and less than 256 bytes.
+     */
+    len = 1 + ((len > 127) ? 2 : 1) + len;
+    if (pp != NULL && *pp != NULL) {
+        if (StoreECC_DSA_Sig(*pp, &len, (mp_int*)sig->r->internal,
+                                        (mp_int*)sig->s->internal) != MP_OKAY) {
+            len = 0;
+        }
+        else
+            *pp += len;
+    }
+
+    return (int)len;
+}
 /* End ECDSA_SIG */
 
 /* Start ECDH */
@@ -27212,7 +33432,7 @@
  *   1 if success, 0 if error
  */
 #ifndef NO_WOLFSSL_STUB
-int wolfSSL_PEM_write_EC_PUBKEY(FILE *fp, WOLFSSL_EC_KEY *x)
+int wolfSSL_PEM_write_EC_PUBKEY(XFILE fp, WOLFSSL_EC_KEY *x)
 {
     (void)fp;
     (void)x;
@@ -27223,32 +33443,214 @@
 }
 #endif
 
+/* Uses the same format of input as wolfSSL_PEM_read_bio_PrivateKey but expects
+ * the results to be an EC key.
+ *
+ * bio  structure to read EC private key from
+ * ec   if not null is then set to the result
+ * cb   password callback for reading PEM
+ * pass password string
+ *
+ * returns a pointer to a new WOLFSSL_EC_KEY struct on success and NULL on fail
+ */
+
+WOLFSSL_EC_KEY* wolfSSL_PEM_read_bio_EC_PUBKEY(WOLFSSL_BIO* bio,
+                                               WOLFSSL_EC_KEY** ec,
+                                               pem_password_cb* cb, void *pass)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_EC_KEY* local;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_EC_PUBKEY");
+
+    pkey = wolfSSL_PEM_read_bio_PUBKEY(bio, NULL, cb, pass);
+    if (pkey == NULL) {
+        return NULL;
+    }
+
+    /* Since the WOLFSSL_EC_KEY structure is being taken from WOLFSSL_EVP_PKEY the
+     * flag indicating that the WOLFSSL_EC_KEY structure is owned should be FALSE
+     * to avoid having it free'd */
+    pkey->ownEcc = 0;
+    local = pkey->ecc;
+    if (ec != NULL) {
+        *ec = local;
+    }
+
+    wolfSSL_EVP_PKEY_free(pkey);
+    return local;
+}
+
+/* Reads a private EC key from a WOLFSSL_BIO into a WOLFSSL_EC_KEY.
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+WOLFSSL_EC_KEY* wolfSSL_PEM_read_bio_ECPrivateKey(WOLFSSL_BIO* bio,
+                                                  WOLFSSL_EC_KEY** ec,
+                                                  pem_password_cb* cb,
+                                                  void *pass)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_EC_KEY* local;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_ECPrivateKey");
+
+    pkey = wolfSSL_PEM_read_bio_PrivateKey(bio, NULL, cb, pass);
+    if (pkey == NULL) {
+        return NULL;
+    }
+
+    /* Since the WOLFSSL_EC_KEY structure is being taken from WOLFSSL_EVP_PKEY the
+     * flag indicating that the WOLFSSL_EC_KEY structure is owned should be FALSE
+     * to avoid having it free'd */
+    pkey->ownEcc = 0;
+    local = pkey->ecc;
+    if (ec != NULL) {
+        *ec = local;
+    }
+
+    wolfSSL_EVP_PKEY_free(pkey);
+    return local;
+}
 #endif /* NO_FILESYSTEM */
 
 #if defined(WOLFSSL_KEY_GEN)
+/* Takes a public WOLFSSL_EC_KEY and writes it out to WOLFSSL_BIO
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+int wolfSSL_PEM_write_bio_EC_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_EC_KEY* ec)
+{
+    int ret = 0, der_max_len = 0, derSz = 0;
+    byte *derBuf;
+    WOLFSSL_EVP_PKEY* pkey;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_bio_EC_PUBKEY");
+
+    if (bio == NULL || ec == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Initialize pkey structure */
+    pkey = wolfSSL_EVP_PKEY_new_ex(bio->heap);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("wolfSSL_EVP_PKEY_new_ex failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Set pkey info */
+    pkey->ecc    = ec;
+    pkey->ownEcc = 0; /* pkey does not own ECC */
+    pkey->type = EVP_PKEY_EC;
+
+    /* 4 > size of pub, priv + ASN.1 additional information */
+    der_max_len = 4 * wc_ecc_size((ecc_key*)ec->internal) + AES_BLOCK_SIZE;
+
+    derBuf = (byte*)XMALLOC(der_max_len, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("Malloc failed");
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* convert key to der format */
+    derSz = wc_EccPublicKeyToDer((ecc_key*)ec->internal, derBuf, der_max_len, 1);
+    if (derSz < 0) {
+        WOLFSSL_MSG("wc_EccPublicKeyToDer failed");
+        XFREE(derBuf, NULL, DYNAMIC_TYPE_DER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* add der info to the evp key */
+    pkey->pkey_sz = derSz;
+    XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+    XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    if((ret = wolfSSL_PEM_write_bio_PUBKEY(bio, pkey)) != WOLFSSL_SUCCESS){
+        WOLFSSL_MSG("wolfSSL_PEM_write_bio_PUBKEY failed");
+    }
+    wolfSSL_EVP_PKEY_free(pkey);
+
+    return ret;
+}
 
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
-#ifndef NO_WOLFSSL_STUB
-int wolfSSL_PEM_write_bio_ECPrivateKey(WOLFSSL_BIO* bio, WOLFSSL_EC_KEY* ecc,
+int wolfSSL_PEM_write_bio_ECPrivateKey(WOLFSSL_BIO* bio, WOLFSSL_EC_KEY* ec,
                                        const EVP_CIPHER* cipher,
                                        unsigned char* passwd, int len,
                                        pem_password_cb* cb, void* arg)
 {
-    (void)bio;
-    (void)ecc;
-    (void)cipher;
-    (void)passwd;
-    (void)len;
-    (void)cb;
-    (void)arg;
-    WOLFSSL_STUB("PEM_write_bio_ECPrivateKey");
-    WOLFSSL_MSG("wolfSSL_PEM_write_bio_ECPrivateKey not implemented");
-
-    return WOLFSSL_FAILURE;
-}
-#endif
+    int ret = 0, der_max_len = 0, derSz = 0;
+    byte *derBuf;
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_ENTER("WOLFSSL_PEM_write_bio_ECPrivateKey");
+
+    if (bio == NULL || ec == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Initialize pkey structure */
+    pkey = wolfSSL_EVP_PKEY_new_ex(bio->heap);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("wolfSSL_EVP_PKEY_new_ex failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Set pkey info */
+    pkey->ecc    = ec;
+    pkey->ownEcc = 0; /* pkey does not own ECC */
+    pkey->type = EVP_PKEY_EC;
+
+    /* 4 > size of pub, priv + ASN.1 additional informations
+     */
+    der_max_len = 4 * wc_ecc_size((ecc_key*)ec->internal) + AES_BLOCK_SIZE;
+
+    derBuf = (byte*)XMALLOC(der_max_len, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("Malloc failed");
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* convert key to der format */
+    derSz = wc_EccKeyToDer((ecc_key*)ec->internal, derBuf, der_max_len);
+    if (derSz < 0) {
+        WOLFSSL_MSG("wc_EccKeyToDer failed");
+        XFREE(derBuf, NULL, DYNAMIC_TYPE_DER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* add der info to the evp key */
+    pkey->pkey_sz = derSz;
+    XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+    XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    ret = wolfSSL_PEM_write_bio_PrivateKey(bio, pkey, cipher, passwd, len,
+                                        cb, arg);
+    wolfSSL_EVP_PKEY_free(pkey);
+
+    return ret;
+}
 
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
@@ -27279,13 +33681,12 @@
         WOLFSSL_MSG("No ECC internal set, do it");
 
         if (SetECKeyInternal(ecc) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetDsaInternal failed");
-            return WOLFSSL_FAILURE;
-        }
-    }
-
-    /* 4 > size of pub, priv + ASN.1 additional informations
-     */
+            WOLFSSL_MSG("SetECKeyInternal failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+    /* 4 > size of pub, priv + ASN.1 additional information */
     der_max_len = 4 * wc_ecc_size((ecc_key*)ecc->internal) + AES_BLOCK_SIZE;
 
     derBuf = (byte*)XMALLOC(der_max_len, NULL, DYNAMIC_TYPE_DER);
@@ -27297,7 +33698,7 @@
     /* Key to DER */
     derSz = wc_EccKeyToDer((ecc_key*)ecc->internal, derBuf, der_max_len);
     if (derSz < 0) {
-        WOLFSSL_MSG("wc_DsaKeyToDer failed");
+        WOLFSSL_MSG("wc_EccKeyToDer failed");
         XFREE(derBuf, NULL, DYNAMIC_TYPE_DER);
         return WOLFSSL_FAILURE;
     }
@@ -27378,7 +33779,7 @@
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
-int wolfSSL_PEM_write_ECPrivateKey(FILE *fp, WOLFSSL_EC_KEY *ecc,
+int wolfSSL_PEM_write_ECPrivateKey(XFILE fp, WOLFSSL_EC_KEY *ecc,
                                    const EVP_CIPHER *enc,
                                    unsigned char *kstr, int klen,
                                    pem_password_cb *cb, void *u)
@@ -27391,7 +33792,7 @@
 
     WOLFSSL_MSG("wolfSSL_PEM_write_ECPrivateKey");
 
-    if (fp == NULL || ecc == NULL || ecc->internal == NULL) {
+    if (fp == XBADFILE || ecc == NULL || ecc->internal == NULL) {
         WOLFSSL_MSG("Bad function arguments");
         return WOLFSSL_FAILURE;
     }
@@ -27422,26 +33823,144 @@
 
 #if defined(WOLFSSL_KEY_GEN)
 
-/* return code compliant with OpenSSL :
- *   1 if success, 0 if error
+/* Takes a DSA Privatekey and writes it out to a WOLFSSL_BIO
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
  */
 int wolfSSL_PEM_write_bio_DSAPrivateKey(WOLFSSL_BIO* bio, WOLFSSL_DSA* dsa,
                                        const EVP_CIPHER* cipher,
                                        unsigned char* passwd, int len,
                                        pem_password_cb* cb, void* arg)
 {
-    (void)bio;
-    (void)dsa;
-    (void)cipher;
-    (void)passwd;
-    (void)len;
-    (void)cb;
-    (void)arg;
-
-    WOLFSSL_MSG("wolfSSL_PEM_write_bio_DSAPrivateKey not implemented");
-
-    return WOLFSSL_FAILURE;
-}
+    int ret = 0, der_max_len = 0, derSz = 0;
+    byte *derBuf;
+    WOLFSSL_EVP_PKEY* pkey;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_bio_DSAPrivateKey");
+
+    if (bio == NULL || dsa == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey = wolfSSL_EVP_PKEY_new_ex(bio->heap);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("wolfSSL_EVP_PKEY_new_ex failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->type   = EVP_PKEY_DSA;
+    pkey->dsa    = dsa;
+    pkey->ownDsa = 0;
+
+    /* 4 > size of pub, priv, p, q, g + ASN.1 additional information */
+    der_max_len = 4 * wolfSSL_BN_num_bytes(dsa->g) + AES_BLOCK_SIZE;
+
+    derBuf = (byte*)XMALLOC(der_max_len, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("Malloc failed");
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* convert key to der format */
+    derSz = wc_DsaKeyToDer((DsaKey*)dsa->internal, derBuf, der_max_len);
+    if (derSz < 0) {
+        WOLFSSL_MSG("wc_DsaKeyToDer failed");
+        XFREE(derBuf, NULL, DYNAMIC_TYPE_DER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* add der info to the evp key */
+    pkey->pkey_sz = derSz;
+    XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+    XFREE(derBuf, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    ret = wolfSSL_PEM_write_bio_PrivateKey(bio, pkey, cipher, passwd, len,
+                                        cb, arg);
+    wolfSSL_EVP_PKEY_free(pkey);
+
+    return ret;
+}
+
+#ifndef HAVE_SELFTEST
+/* Takes a DSA public key and writes it out to a WOLFSSL_BIO
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+int wolfSSL_PEM_write_bio_DSA_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_DSA* dsa)
+{
+    int ret = 0, derMax = 0, derSz = 0;
+    byte *derBuf;
+    WOLFSSL_EVP_PKEY* pkey;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_bio_DSA_PUBKEY");
+
+    if (bio == NULL || dsa == NULL) {
+        WOLFSSL_MSG("Bad function arguements");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey = wolfSSL_EVP_PKEY_new_ex(bio->heap);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("wolfSSL_EVP_PKEY_new_ex failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->type   = EVP_PKEY_DSA;
+    pkey->dsa    = dsa;
+    pkey->ownDsa = 0;
+
+    /* 4 > size of pub, priv, p, q, g + ASN.1 additional information */
+    derMax = 4 * wolfSSL_BN_num_bytes(dsa->g) + AES_BLOCK_SIZE;
+
+    derBuf = (byte*)XMALLOC(derMax, bio->heap, DYNAMIC_TYPE_DER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("malloc failed");
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Key to DER */
+    derSz = wc_DsaKeyToPublicDer((DsaKey*)dsa->internal, derBuf, derMax);
+    if (derSz < 0) {
+        WOLFSSL_MSG("wc_DsaKeyToDer failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_DER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, bio->heap, DYNAMIC_TYPE_DER);
+
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_DER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+    pkey->pkey_sz = derSz;
+    XMEMSET(pkey->pkey.ptr, 0, derSz);
+
+    if (XMEMCPY(pkey->pkey.ptr, derBuf, derSz) == NULL) {
+        WOLFSSL_MSG("XMEMCPY failed");
+        XFREE(derBuf, bio->heap, DYNAMIC_TYPE_DER);
+        XFREE(pkey->pkey.ptr, bio->heap, DYNAMIC_TYPE_DER);
+        wolfSSL_EVP_PKEY_free(pkey);
+        return WOLFSSL_FAILURE;
+    }
+    XFREE(derBuf, bio->heap, DYNAMIC_TYPE_DER);
+    ret = wolfSSL_PEM_write_bio_PUBKEY(bio, pkey);
+    wolfSSL_EVP_PKEY_free(pkey);
+    return ret;
+}
+#endif /* HAVE_SELFTEST */
 
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
@@ -27477,8 +33996,7 @@
         }
     }
 
-    /* 4 > size of pub, priv, p, q, g + ASN.1 additional informations
-     */
+    /* 4 > size of pub, priv, p, q, g + ASN.1 additional information */
     der_max_len = 4 * wolfSSL_BN_num_bytes(dsa->g) + AES_BLOCK_SIZE;
 
     derBuf = (byte*)XMALLOC(der_max_len, NULL, DYNAMIC_TYPE_DER);
@@ -27571,7 +34089,7 @@
 /* return code compliant with OpenSSL :
  *   1 if success, 0 if error
  */
-int wolfSSL_PEM_write_DSAPrivateKey(FILE *fp, WOLFSSL_DSA *dsa,
+int wolfSSL_PEM_write_DSAPrivateKey(XFILE fp, WOLFSSL_DSA *dsa,
                                     const EVP_CIPHER *enc,
                                     unsigned char *kstr, int klen,
                                     pem_password_cb *cb, void *u)
@@ -27584,7 +34102,7 @@
 
     WOLFSSL_MSG("wolfSSL_PEM_write_DSAPrivateKey");
 
-    if (fp == NULL || dsa == NULL || dsa->internal == NULL) {
+    if (fp == XBADFILE || dsa == NULL || dsa->internal == NULL) {
         WOLFSSL_MSG("Bad function arguments");
         return WOLFSSL_FAILURE;
     }
@@ -27613,7 +34131,7 @@
  *   1 if success, 0 if error
  */
 #ifndef NO_WOLFSSL_STUB
-int wolfSSL_PEM_write_DSA_PUBKEY(FILE *fp, WOLFSSL_DSA *x)
+int wolfSSL_PEM_write_DSA_PUBKEY(XFILE fp, WOLFSSL_DSA *x)
 {
     (void)fp;
     (void)x;
@@ -27627,28 +34145,25 @@
 
 #endif /* #ifndef NO_DSA */
 
-
-WOLFSSL_EVP_PKEY* wolfSSL_PEM_read_bio_PrivateKey(WOLFSSL_BIO* bio,
-                    WOLFSSL_EVP_PKEY** key, pem_password_cb* cb, void* pass)
-{
-    WOLFSSL_EVP_PKEY* pkey = NULL;
-#ifdef WOLFSSL_SMALL_STACK
-    EncryptedInfo* info;
+static int pem_read_bio_key(WOLFSSL_BIO* bio, pem_password_cb* cb, void* pass,
+                            int keyType, int* eccFlag, DerBuffer** der)
+{
+#ifdef WOLFSSL_SMALL_STACK
+    EncryptedInfo* info = NULL;
 #else
     EncryptedInfo info[1];
 #endif /* WOLFSSL_SMALL_STACK */
-    pem_password_cb* localCb = cb;
-    DerBuffer* der = NULL;
-
+    pem_password_cb* localCb = NULL;
     char* mem = NULL;
-    int memSz;
-    int ret;
-    int eccFlag = 0;
-
-    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_PrivateKey");
-
-    if (bio == NULL) {
-        return pkey;
+    int memSz = 0;
+    int ret;
+
+    if(cb) {
+        localCb = cb;
+    } else {
+        if(pass) {
+            localCb = wolfSSL_PEM_def_callback;
+        }
     }
 
     if ((ret = wolfSSL_BIO_pending(bio)) > 0) {
@@ -27656,123 +34171,201 @@
         mem = (char*)XMALLOC(memSz, bio->heap, DYNAMIC_TYPE_OPENSSL);
         if (mem == NULL) {
             WOLFSSL_MSG("Memory error");
-            return NULL;
-        }
-
-        if ((ret = wolfSSL_BIO_read(bio, mem, memSz)) <= 0) {
-            WOLFSSL_LEAVE("wolfSSL_PEM_read_bio_PrivateKey", ret);
-            XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
-            return NULL;
+            ret = MEMORY_E;
+        }
+        if (ret >= 0) {
+            if ((ret = wolfSSL_BIO_read(bio, mem, memSz)) <= 0) {
+                XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                mem = NULL;
+                ret = MEMORY_E;
+            }
         }
     }
     else if (bio->type == WOLFSSL_BIO_FILE) {
-        int sz  = 100; /* read from file by 100 byte chuncks */
+        int sz  = 100; /* read from file by 100 byte chunks */
         int idx = 0;
         char* tmp = (char*)XMALLOC(sz, bio->heap, DYNAMIC_TYPE_OPENSSL);
-
         memSz = 0;
         if (tmp == NULL) {
             WOLFSSL_MSG("Memory error");
-            return NULL;
-        }
-
-        while ((sz = wolfSSL_BIO_read(bio, tmp, sz)) > 0) {
+            ret = MEMORY_E;
+        }
+
+        while (ret >= 0 && (sz = wolfSSL_BIO_read(bio, tmp, sz)) > 0) {
+            char* newMem;
             if (memSz + sz < 0) {
                 /* sanity check */
                 break;
             }
-            mem = (char*)XREALLOC(mem, memSz + sz, bio->heap,
+            newMem = (char*)XREALLOC(mem, memSz + sz, bio->heap,
                     DYNAMIC_TYPE_OPENSSL);
-            if (mem == NULL) {
+            if (newMem == NULL) {
                 WOLFSSL_MSG("Memory error");
+                XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                mem = NULL;
                 XFREE(tmp, bio->heap, DYNAMIC_TYPE_OPENSSL);
-                return NULL;
-            }
+                tmp = NULL;
+                ret = MEMORY_E;
+                break;
+            }
+            mem = newMem;
             XMEMCPY(mem + idx, tmp, sz);
             memSz += sz;
             idx   += sz;
-            sz = 100; /* read another 100 byte chunck from file */
+            sz = 100; /* read another 100 byte chunk from file */
         }
         XFREE(tmp, bio->heap, DYNAMIC_TYPE_OPENSSL);
+        tmp = NULL;
         if (memSz <= 0) {
             WOLFSSL_MSG("No data to read from bio");
             if (mem != NULL) {
                 XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
-            }
-            return NULL;
+                mem = NULL;
+            }
+            ret = BUFFER_E;
         }
     }
     else {
         WOLFSSL_MSG("No data to read from bio");
-        return NULL;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL,
-                                   DYNAMIC_TYPE_TMP_BUFFER);
-    if (info == NULL) {
-        WOLFSSL_MSG("Error getting memory for EncryptedInfo structure");
-        XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
-        return NULL;
-    }
-#endif
-
-    XMEMSET(info, 0, sizeof(EncryptedInfo));
-    info->passwd_cb       = localCb;
-    info->passwd_userdata = pass;
-    ret = PemToDer((const unsigned char*)mem, memSz, PRIVATEKEY_TYPE, &der,
-        NULL, info, &eccFlag);
-
-    if (ret < 0) {
-        WOLFSSL_MSG("Bad Pem To Der");
-    }
-    else {
-        int type;
-        const unsigned char* ptr = der->buffer;
-
-        /* write left over data back to bio */
-        if ((memSz - (int)info->consumed) > 0 &&
-                bio->type != WOLFSSL_BIO_FILE) {
-            if (wolfSSL_BIO_write(bio, mem + (int)info->consumed,
-                                   memSz - (int)info->consumed) <= 0) {
-                WOLFSSL_MSG("Unable to advance bio read pointer");
-            }
-        }
-
-        if (eccFlag) {
-            type = EVP_PKEY_EC;
-        }
-        else {
-            type = EVP_PKEY_RSA;
-        }
-
-        /* handle case where reuse is attempted */
-        if (key != NULL && *key != NULL) {
-            pkey = *key;
-        }
-
-        wolfSSL_d2i_PrivateKey(type, &pkey, &ptr, der->length);
-        if (pkey == NULL) {
-            WOLFSSL_MSG("Error loading DER buffer into WOLFSSL_EVP_PKEY");
+        ret = NOT_COMPILED_IN;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (ret >= 0) {
+        info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL,
+                                       DYNAMIC_TYPE_TMP_BUFFER);
+        if (info == NULL) {
+            WOLFSSL_MSG("Error getting memory for EncryptedInfo structure");
+            XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
+            mem = NULL;
+            ret = MEMORY_E;
+        }
+    }
+#endif
+
+    if (ret >= 0) {
+        XMEMSET(info, 0, sizeof(EncryptedInfo));
+        info->passwd_cb       = localCb;
+        info->passwd_userdata = pass;
+        ret = PemToDer((const unsigned char*)mem, memSz, keyType, der,
+            NULL, info, eccFlag);
+
+        if (ret < 0) {
+            WOLFSSL_MSG("Bad Pem To Der");
+        }
+        else {
+            /* write left over data back to bio */
+            if ((memSz - (int)info->consumed) > 0 &&
+                    bio->type != WOLFSSL_BIO_FILE) {
+                if (wolfSSL_BIO_write(bio, mem + (int)info->consumed,
+                                       memSz - (int)info->consumed) <= 0) {
+                    WOLFSSL_MSG("Unable to advance bio read pointer");
+                }
+            }
         }
     }
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(info, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-
     XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
+
+    return ret;
+}
+
+WOLFSSL_EVP_PKEY* wolfSSL_PEM_read_bio_PrivateKey(WOLFSSL_BIO* bio,
+                                                  WOLFSSL_EVP_PKEY** key,
+                                                  pem_password_cb* cb,
+                                                  void* pass)
+{
+    WOLFSSL_EVP_PKEY* pkey = NULL;
+    DerBuffer*         der = NULL;
+    int             keyFormat = 0;
+    int                 type = -1;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_PrivateKey");
+
+    if (bio == NULL)
+        return pkey;
+
+    if (pem_read_bio_key(bio, cb, pass, PRIVATEKEY_TYPE, &keyFormat,
+                                                                   &der) >= 0) {
+        const unsigned char* ptr = der->buffer;
+
+        if (keyFormat) {
+            /* keyFormat is Key_Sum enum */
+            if (keyFormat == RSAk)
+                type = EVP_PKEY_RSA;
+            else if (keyFormat == ECDSAk)
+                type =  EVP_PKEY_EC;
+            else if (keyFormat == DSAk)
+                type = EVP_PKEY_DSA;
+            else if (keyFormat == DHk)
+                type = EVP_PKEY_DH;
+        }
+        else {
+            /* Default to RSA if format is not set */
+            type = EVP_PKEY_RSA;
+        }
+
+        /* handle case where reuse is attempted */
+        if (key != NULL && *key != NULL)
+            pkey = *key;
+
+        wolfSSL_d2i_PrivateKey(type, &pkey, &ptr, der->length);
+        if (pkey == NULL) {
+            WOLFSSL_MSG("Error loading DER buffer into WOLFSSL_EVP_PKEY");
+        }
+    }
+
     FreeDer(&der);
 
-    if (key != NULL) {
+    if (key != NULL && pkey != NULL)
         *key = pkey;
-    }
+
+    WOLFSSL_LEAVE("wolfSSL_PEM_read_bio_PrivateKey", 0);
 
     return pkey;
 }
 
-
-#ifndef NO_RSA
+WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_bio_PUBKEY(WOLFSSL_BIO* bio,
+                                              WOLFSSL_EVP_PKEY **key,
+                                              pem_password_cb *cb, void *pass)
+{
+    WOLFSSL_EVP_PKEY* pkey = NULL;
+    DerBuffer*        der = NULL;
+    int               keyFormat = 0;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_PUBKEY");
+
+    if (bio == NULL)
+        return pkey;
+
+    if (pem_read_bio_key(bio, cb, pass, PUBLICKEY_TYPE, &keyFormat, &der) >= 0) {
+        const unsigned char* ptr = der->buffer;
+
+        /* handle case where reuse is attempted */
+        if (key != NULL && *key != NULL)
+            pkey = *key;
+
+        wolfSSL_d2i_PUBKEY(&pkey, &ptr, der->length);
+        if (pkey == NULL) {
+            WOLFSSL_MSG("Error loading DER buffer into WOLFSSL_EVP_PKEY");
+        }
+    }
+
+    FreeDer(&der);
+
+    if (key != NULL && pkey != NULL)
+        *key = pkey;
+
+    WOLFSSL_LEAVE("wolfSSL_PEM_read_bio_PUBKEY", 0);
+
+    return pkey;
+}
+
+
+#if (defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)) && !defined(NO_RSA)
 /* Uses the same format of input as wolfSSL_PEM_read_bio_PrivateKey but expects
  * the results to be an RSA key.
  *
@@ -27789,6 +34382,8 @@
     WOLFSSL_EVP_PKEY* pkey;
     WOLFSSL_RSA* local;
 
+    WOLFSSL_ENTER("PEM_read_bio_RSAPrivateKey");
+
     pkey = wolfSSL_PEM_read_bio_PrivateKey(bio, NULL, cb, pass);
     if (pkey == NULL) {
         return NULL;
@@ -27806,42 +34401,106 @@
     wolfSSL_EVP_PKEY_free(pkey);
     return local;
 }
-#endif /* !NO_RSA */
-
-
-/* return of pkey->type which will be EVP_PKEY_RSA for example.
- *
- * type  type of EVP_PKEY
- *
- * returns type or if type is not found then NID_undef
- */
-int wolfSSL_EVP_PKEY_type(int type)
-{
-    WOLFSSL_MSG("wolfSSL_EVP_PKEY_type");
-
-    switch (type) {
-    #ifdef OPENSSL_EXTRA
-        case EVP_PKEY_RSA:
-            return EVP_PKEY_RSA;
-        case EVP_PKEY_DSA:
-            return EVP_PKEY_DSA;
-        case EVP_PKEY_EC:
-            return EVP_PKEY_EC;
-    #endif
-        default:
-            return NID_undef;
-    }
-}
-
-
-int wolfSSL_EVP_PKEY_base_id(const EVP_PKEY *pkey)
-{
-    return EVP_PKEY_type(pkey->type);
-}
-
+#endif /* OPENSSL_EXTRA || OPENSSL_ALL || !NO_RSA */
+
+#if (defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)) && (!defined(NO_CERTS) && \
+       !defined(NO_FILESYSTEM) && !defined(NO_DSA) && defined(WOLFSSL_KEY_GEN))
+/* Uses the same format of input as wolfSSL_PEM_read_bio_PrivateKey but expects
+ * the results to be an DSA key.
+ *
+ * bio  structure to read DSA private key from
+ * dsa  if not null is then set to the result
+ * cb   password callback for reading PEM
+ * pass password string
+ *
+ * returns a pointer to a new WOLFSSL_DSA structure on success and NULL on fail
+ */
+WOLFSSL_DSA* wolfSSL_PEM_read_bio_DSAPrivateKey(WOLFSSL_BIO* bio,
+                                                WOLFSSL_DSA** dsa,
+                                                pem_password_cb* cb,void *pass)
+{
+    WOLFSSL_EVP_PKEY* pkey = NULL;
+    WOLFSSL_DSA* local;
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_DSAPrivateKey");
+
+
+    pkey = wolfSSL_PEM_read_bio_PrivateKey(bio, NULL, cb, pass);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("Error in PEM_read_bio_PrivateKey");
+         return NULL;
+     }
+     /* Since the WOLFSSL_DSA structure is being taken from WOLFSSL_EVP_PKEY the
+     * flag indicating that the WOLFSSL_DSA structure is owned should be FALSE
+     * to avoid having it free'd */
+    pkey->ownDsa = 0;
+    local = pkey->dsa;
+    if (dsa != NULL) {
+        *dsa = local;
+    }
+     wolfSSL_EVP_PKEY_free(pkey);
+    return local;
+}
+
+/* Reads an DSA public key from a WOLFSSL_BIO into a WOLFSSL_DSA.
+ * Returns WOLFSSL_SUCCESS or WOLFSSL_FAILURE
+ */
+WOLFSSL_DSA *wolfSSL_PEM_read_bio_DSA_PUBKEY(WOLFSSL_BIO* bio,WOLFSSL_DSA** dsa,
+                                                pem_password_cb* cb, void *pass)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_DSA* local;
+    WOLFSSL_ENTER("wolfSSL_PEM_read_bio_DSA_PUBKEY");
+
+    pkey = wolfSSL_PEM_read_bio_PUBKEY(bio, NULL, cb, pass);
+    if (pkey == NULL) {
+        WOLFSSL_MSG("wolfSSL_PEM_read_bio_PUBKEY failed");
+        return NULL;
+    }
+
+    /* Since the WOLFSSL_DSA structure is being taken from WOLFSSL_EVP_PKEY the
+     * flag indicating that the WOLFSSL_DSA structure is owned should be FALSE
+     * to avoid having it free'd */
+    pkey->ownDsa = 0;
+    local = pkey->dsa;
+    if (dsa != NULL) {
+        *dsa = local;
+    }
+
+    wolfSSL_EVP_PKEY_free(pkey);
+    return local;
+}
+#endif
+
+#ifdef HAVE_ECC
+/* returns a new WOLFSSL_EC_GROUP structure on success and NULL on fail */
+WOLFSSL_EC_GROUP* wolfSSL_PEM_read_bio_ECPKParameters(WOLFSSL_BIO* bio,
+        WOLFSSL_EC_GROUP** group, pem_password_cb* cb, void* pass)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_EC_GROUP* ret = NULL;
+
+    /* check on if bio is null is done in wolfSSL_PEM_read_bio_PrivateKey */
+    pkey = wolfSSL_PEM_read_bio_PrivateKey(bio, NULL, cb, pass);
+    if (pkey != NULL) {
+        if (pkey->type != EVP_PKEY_EC) {
+            WOLFSSL_MSG("Unexpected key type");
+        }
+        else {
+            ret = (WOLFSSL_EC_GROUP*)wolfSSL_EC_KEY_get0_group(pkey->ecc);
+
+            /* set ecc group to null so it is not free'd when pkey is free'd */
+            pkey->ecc->group = NULL;
+        }
+    }
+
+    (void)group;
+    wolfSSL_EVP_PKEY_free(pkey);
+    return ret;
+}
+#endif /* HAVE_ECC */
 
 #if !defined(NO_FILESYSTEM)
-WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PUBKEY(FILE *fp, EVP_PKEY **x,
+WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PUBKEY(XFILE fp, EVP_PKEY **x,
                                           pem_password_cb *cb, void *u)
 {
     (void)fp;
@@ -27857,9 +34516,181 @@
 
 #ifndef NO_RSA
 
+#if defined(XSNPRINTF) && !defined(HAVE_FAST_RSA)
+/* snprintf() must be available */
+
+/******************************************************************************
+* wolfSSL_RSA_print - writes the human readable form of RSA to bio
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE
+*/
+int wolfSSL_RSA_print(WOLFSSL_BIO* bio, WOLFSSL_RSA* rsa, int offset)
+{
+    char tmp[100] = {0};
+    word32 idx = 0;
+    int  sz = 0;
+    byte lbit = 0;
+    int  rawLen = 0;
+    byte* rawKey = NULL;
+    RsaKey* iRsa = NULL;
+    int i = 0;
+    mp_int *rsaElem = NULL;
+    char rsaStr[][20] = { "Modulus:",
+                          "PublicExponent:",
+                          "PrivateExponent:",
+                          "Prime1:",
+                          "Prime2:",
+                          "Exponent1:",
+                          "Exponent2:",
+                          "Coefficient:"
+                        };
+
+    WOLFSSL_ENTER("wolfSSL_RSA_print");
+    (void)offset;
+
+    if (bio == NULL || rsa == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if ((sz = wolfSSL_RSA_size(rsa)) < 0) {
+        WOLFSSL_MSG("Error getting RSA key size");
+        return WOLFSSL_FAILURE;
+    }
+    iRsa = (RsaKey*)rsa->internal;
+
+    XSNPRINTF(tmp, sizeof(tmp) - 1, "\n%s: (%d bit)",
+            "RSA Private-Key", 8 * sz);
+    tmp[sizeof(tmp) - 1] = '\0';
+    if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    for (i=0; i<RSA_INTS; i++) {
+        switch(i) {
+            case 0:
+                /* Print out modulus */
+                rsaElem = &iRsa->n;
+                break;
+            case 1:
+                rsaElem = &iRsa->e;
+                break;
+            case 2:
+                rsaElem = &iRsa->d;
+                break;
+            case 3:
+                rsaElem = &iRsa->p;
+                break;
+            case 4:
+                rsaElem = &iRsa->q;
+                break;
+            case 5:
+                rsaElem = &iRsa->dP;
+                break;
+            case 6:
+                rsaElem = &iRsa->dQ;
+                break;
+            case 7:
+                rsaElem = &iRsa->u;
+                break;
+            default:
+                WOLFSSL_MSG("Bad index value");
+        }
+
+        if (i == 1) {
+            /* Print out exponent values */
+            rawLen = mp_unsigned_bin_size(rsaElem);
+            if (rawLen < 0) {
+                WOLFSSL_MSG("Error getting exponent size");
+                return WOLFSSL_FAILURE;
+            }
+
+            if ((word32)rawLen < sizeof(word32)) {
+                rawLen = sizeof(word32);
+            }
+            rawKey = (byte*)XMALLOC(rawLen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (rawKey == NULL) {
+                WOLFSSL_MSG("Memory error");
+                return WOLFSSL_FAILURE;
+            }
+            XMEMSET(rawKey, 0, rawLen);
+            mp_to_unsigned_bin(rsaElem, rawKey);
+            if ((word32)rawLen <= sizeof(word32)) {
+                idx = *(word32*)rawKey;
+                #ifdef BIG_ENDIAN_ORDER
+                    idx = ByteReverseWord32(idx);
+                #endif
+            }
+            XSNPRINTF(tmp, sizeof(tmp) - 1, "\nExponent: %d (0x%x)", idx, idx);
+            if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                return WOLFSSL_FAILURE;
+            }
+            XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        }
+        else {
+            XSNPRINTF(tmp, sizeof(tmp) - 1, "\n%s\n    ", rsaStr[i]);
+            tmp[sizeof(tmp) - 1] = '\0';
+            if (mp_leading_bit(rsaElem)) {
+                lbit = 1;
+                XSTRNCAT(tmp, "00", 3);
+            }
+
+            rawLen = mp_unsigned_bin_size(rsaElem);
+            rawKey = (byte*)XMALLOC(rawLen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (rawKey == NULL) {
+                WOLFSSL_MSG("Memory error");
+                return WOLFSSL_FAILURE;
+            }
+            mp_to_unsigned_bin(rsaElem, rawKey);
+            for (idx = 0; idx < (word32)rawLen; idx++) {
+                char val[5];
+                int  valSz = 5;
+
+                if ((idx == 0) && !lbit) {
+                    XSNPRINTF(val, valSz - 1, "%02x", rawKey[idx]);
+                }
+                else if ((idx != 0) && (((idx + lbit) % 15) == 0)) {
+                    tmp[sizeof(tmp) - 1] = '\0';
+                    if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                        XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        return WOLFSSL_FAILURE;
+                    }
+                    XSNPRINTF(tmp, sizeof(tmp) - 1,
+                            ":\n    ");
+                    XSNPRINTF(val, valSz - 1, "%02x", rawKey[idx]);
+                }
+                else {
+                    XSNPRINTF(val, valSz - 1, ":%02x", rawKey[idx]);
+                }
+                XSTRNCAT(tmp, val, valSz);
+            }
+            XFREE(rawKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+            /* print out remaining values */
+            if ((idx > 0) && (((idx - 1 + lbit) % 15) != 0)) {
+                tmp[sizeof(tmp) - 1] = '\0';
+                if (wolfSSL_BIO_write(bio, tmp, (int)XSTRLEN(tmp)) <= 0) {
+                    return WOLFSSL_FAILURE;
+                }
+            }
+            lbit = 0;
+        }
+
+    }
+    /* done with print out */
+    if (wolfSSL_BIO_write(bio, "\n\0", (int)XSTRLEN("\n\0")) <= 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+#endif /* XSNPRINTF */
+
 #if !defined(NO_FILESYSTEM)
 #ifndef NO_WOLFSSL_STUB
-WOLFSSL_RSA *wolfSSL_PEM_read_RSAPublicKey(FILE *fp, WOLFSSL_RSA **x,
+WOLFSSL_RSA *wolfSSL_PEM_read_RSAPublicKey(XFILE fp, WOLFSSL_RSA **x,
                                            pem_password_cb *cb, void *u)
 {
     (void)fp;
@@ -27876,7 +34707,7 @@
  *   1 if success, 0 if error
  */
 #ifndef NO_WOLFSSL_STUB
-int wolfSSL_PEM_write_RSAPublicKey(FILE *fp, WOLFSSL_RSA *x)
+int wolfSSL_PEM_write_RSAPublicKey(XFILE fp, WOLFSSL_RSA *x)
 {
     (void)fp;
     (void)x;
@@ -27891,7 +34722,7 @@
  *   1 if success, 0 if error
  */
 #ifndef NO_WOLFSSL_STUB
-int wolfSSL_PEM_write_RSA_PUBKEY(FILE *fp, WOLFSSL_RSA *x)
+int wolfSSL_PEM_write_RSA_PUBKEY(XFILE fp, WOLFSSL_RSA *x)
 {
     (void)fp;
     (void)x;
@@ -27904,36 +34735,37 @@
 
 #endif /* NO_FILESYSTEM */
 
-WOLFSSL_RSA *wolfSSL_d2i_RSAPublicKey(WOLFSSL_RSA **r, const unsigned char **pp, long len)
+WOLFSSL_RSA *wolfSSL_d2i_RSAPublicKey(WOLFSSL_RSA **r, const unsigned char **pp,
+    long len)
 {
     WOLFSSL_RSA *rsa = NULL;
 
     WOLFSSL_ENTER("d2i_RSAPublicKey");
 
-    if(pp == NULL){
+    if (pp == NULL) {
         WOLFSSL_MSG("Bad argument");
         return NULL;
     }
-    if((rsa = wolfSSL_RSA_new()) == NULL){
+    if ((rsa = wolfSSL_RSA_new()) == NULL) {
         WOLFSSL_MSG("RSA_new failed");
         return NULL;
     }
 
-    if(wolfSSL_RSA_LoadDer_ex(rsa, *pp, (int)len, WOLFSSL_RSA_LOAD_PUBLIC)
-                                                     != WOLFSSL_SUCCESS){
+    if (wolfSSL_RSA_LoadDer_ex(rsa, *pp, (int)len, WOLFSSL_RSA_LOAD_PUBLIC)
+                                                         != WOLFSSL_SUCCESS) {
         WOLFSSL_MSG("RSA_LoadDer failed");
         wolfSSL_RSA_free(rsa);
         rsa = NULL;
-        return NULL;
-    }
-    if(r != NULL)
+    }
+    if (r != NULL)
         *r = rsa;
+
     return rsa;
 }
 
-/* Converts an rsa private key from der format to an rsa structure.
-Returns pointer to the rsa structure on succcess and NULL if error. */
-WOLFSSL_RSA *wolfSSL_d2i_RSAPrivateKey(WOLFSSL_RSA **r, 
+/* Converts an RSA private key from DER format to an RSA structure.
+Returns pointer to the RSA structure on success and NULL if error. */
+WOLFSSL_RSA *wolfSSL_d2i_RSAPrivateKey(WOLFSSL_RSA **r,
                                        const unsigned char **derBuf, long derSz)
 {
     WOLFSSL_RSA *rsa = NULL;
@@ -27950,106 +34782,66 @@
         return NULL;
     }
 
-    if (wolfSSL_RSA_LoadDer_ex(rsa, *derBuf, (int)derSz, 
+    if (wolfSSL_RSA_LoadDer_ex(rsa, *derBuf, (int)derSz,
                                  WOLFSSL_RSA_LOAD_PRIVATE) != WOLFSSL_SUCCESS) {
         WOLFSSL_MSG("RSA_LoadDer failed");
         wolfSSL_RSA_free(rsa);
         rsa = NULL;
-        return NULL;
-    }
-    if(r != NULL)
+    }
+    if (r != NULL)
         *r = rsa;
 
     return rsa;
 }
 
-#if !defined(HAVE_FAST_RSA)
-#if defined(WOLFSSL_KEY_GEN)
-
-/* Converts an internal rsa structure to der format.
-Returns size of der on success and WOLFSSL_FAILURE if error */
+#if !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \
+    !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+/* Converts an internal RSA structure to DER format.
+ * If "pp" is null then buffer size only is returned.
+ * If "*pp" is null then a created buffer is set in *pp and the caller is
+ *  responsible for free'ing it.
+ * Returns size of DER on success and WOLFSSL_FAILURE if error
+ */
 int wolfSSL_i2d_RSAPrivateKey(WOLFSSL_RSA *rsa, unsigned char **pp)
 {
-    byte* der = NULL;
-    int derMax;
-    int ret;
-    int i;
+    int ret;
 
     WOLFSSL_ENTER("wolfSSL_i2d_RSAPrivateKey");
 
     /* check for bad functions arguments */
-    if ((rsa == NULL) || (pp == NULL)) {
+    if (rsa == NULL) {
         WOLFSSL_MSG("Bad Function Arguments");
         return BAD_FUNC_ARG;
     }
 
-    if (rsa->inSet == 0) {
-        if ((ret = SetRsaInternal(rsa)) != WOLFSSL_SUCCESS) {
-            WOLFSSL_MSG("SetRsaInternal() Failed");
-            return ret;
-        }
-    }
-
-    /* 5 > size of n, d, p, q, d%(p-1), d(q-1), 1/q%p, e + ASN.1 additional
-     *  informations
-     */
-    derMax = 5 * wolfSSL_RSA_size(rsa) + AES_BLOCK_SIZE;
-
-    der = (byte*)XMALLOC(derMax, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (der == NULL) {
-        WOLFSSL_MSG("Malloc failed");
-        return WOLFSSL_FAILURE;
-    }
-
-    /* RSA key to DER */
-    if ((ret = wc_RsaKeyToDer((RsaKey *)rsa->internal, der, derMax)) < 0) {
-        WOLFSSL_MSG("wc_RsaKeyToDer() failed");
-        XFREE(der, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        der = NULL;
-        return ret;
-    }
-
-    /* ret is the size of the der buffer */
-    for (i = 0; i < ret; i++) {
-        *(*pp + i) = *(der + i);
-    }
-    *pp += ret;
-
-    XFREE(der, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    return ret; /* returns size of der if successful */
-}
-#endif /* WOLFSSL_KEY_GEN */
+    if ((ret = wolfSSL_RSA_To_Der(rsa, pp, 0)) < 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_To_Der failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    return ret; /* returns size of DER if successful */
+}
 
 
 int wolfSSL_i2d_RSAPublicKey(WOLFSSL_RSA *rsa, const unsigned char **pp)
 {
-    byte *der;
-    int derLen;
-    int ret;
-
-    WOLFSSL_ENTER("i2d_RSAPublicKey");
-    if ((rsa == NULL) || (pp == NULL))
-        return WOLFSSL_FATAL_ERROR;
-    if ((ret = SetRsaInternal(rsa)) != WOLFSSL_SUCCESS) {
-        WOLFSSL_MSG("SetRsaInternal Failed");
-        return ret;
-    }
-    if ((derLen = RsaPublicKeyDerSize((RsaKey *)rsa->internal, 1)) < 0)
-        return WOLFSSL_FATAL_ERROR;
-    der = (byte*)XMALLOC(derLen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (der == NULL) {
-        return WOLFSSL_FATAL_ERROR;
-    }
-    if ((ret = wc_RsaKeyToPublicDer((RsaKey *)rsa->internal, der, derLen)) < 0){
-        WOLFSSL_MSG("RsaKeyToPublicDer failed");
-        XFREE(der, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return ret;
-    }
-
-    *pp = der;
-    return ret;
-}
-#endif /* #if !defined(HAVE_FAST_RSA) */
+    int ret;
+
+    /* check for bad functions arguments */
+    if (rsa == NULL) {
+        WOLFSSL_MSG("Bad Function Arguments");
+        return BAD_FUNC_ARG;
+    }
+
+    if ((ret = wolfSSL_RSA_To_Der(rsa, (byte**)pp, 1)) < 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_To_Der failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    return ret;
+}
+#endif /* !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \
+        * !defined(NO_RSA) && !defined(HAVE_USER_RSA) */
 
 #endif /* !NO_RSA */
 #endif /* OPENSSL_EXTRA */
@@ -28102,6 +34894,431 @@
 
     return WOLFSSL_SUCCESS;
 }
+
+#if defined(WC_RSA_PSS) && (defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO) || \
+        defined(WOLFSSL_HAPROXY) || defined(WOLFSSL_NGINX))
+static int hash2mgf(enum wc_HashType hType)
+{
+    switch (hType) {
+#ifndef NO_SHA
+    case WC_HASH_TYPE_SHA:
+        return WC_MGF1SHA1;
+#endif
+#ifndef NO_SHA256
+#ifdef WOLFSSL_SHA224
+    case WC_HASH_TYPE_SHA224:
+        return WC_MGF1SHA224;
+#endif
+    case WC_HASH_TYPE_SHA256:
+        return WC_MGF1SHA256;
+#endif
+#ifdef WOLFSSL_SHA384
+    case WC_HASH_TYPE_SHA384:
+        return WC_MGF1SHA384;
+#endif
+#ifdef WOLFSSL_SHA512
+    case WC_HASH_TYPE_SHA512:
+        return WC_MGF1SHA512;
+#endif
+    case WC_HASH_TYPE_NONE:
+    case WC_HASH_TYPE_MD2:
+    case WC_HASH_TYPE_MD4:
+    case WC_HASH_TYPE_MD5:
+    case WC_HASH_TYPE_MD5_SHA:
+    case WC_HASH_TYPE_SHA3_224:
+    case WC_HASH_TYPE_SHA3_256:
+    case WC_HASH_TYPE_SHA3_384:
+    case WC_HASH_TYPE_SHA3_512:
+    case WC_HASH_TYPE_BLAKE2B:
+    case WC_HASH_TYPE_BLAKE2S:
+    default:
+        WOLFSSL_MSG("Unrecognized or unsupported hash function");
+        return WC_MGF1NONE;
+    }
+}
+
+/*
+ *                                +-----------+
+ *                                |     M     |
+ *                                +-----------+
+ *                                      |
+ *                                      V
+ *                                    Hash
+ *                                      |
+ *                                      V
+ *                        +--------+----------+----------+
+ *                   M' = |Padding1|  mHash   |   salt   |
+ *                        +--------+----------+----------+
+ *                                       |
+ *             +--------+----------+     V
+ *       DB =  |Padding2|maskedseed|   Hash
+ *             +--------+----------+     |
+ *                       |               |
+ *                       V               |    +--+
+ *                      xor <--- MGF <---|    |bc|
+ *                       |               |    +--+
+ *                       |               |      |
+ *                       V               V      V
+ *             +-------------------+----------+--+
+ *       EM =  |    maskedDB       |maskedseed|bc|
+ *             +-------------------+----------+--+
+ * Diagram taken from https://tools.ietf.org/html/rfc3447#section-9.1
+ */
+int wolfSSL_RSA_padding_add_PKCS1_PSS(WOLFSSL_RSA *rsa, unsigned char *EM,
+                                      const unsigned char *mHash,
+                                      const WOLFSSL_EVP_MD *hashAlg, int saltLen)
+{
+    int hashLen, emLen, mgf;
+    int ret = WOLFSSL_FAILURE;
+    int initTmpRng = 0;
+    WC_RNG *rng = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG* tmpRNG = NULL;
+#else
+    WC_RNG  _tmpRNG[1];
+    WC_RNG* tmpRNG = _tmpRNG;
+#endif
+    enum wc_HashType hashType;
+
+    WOLFSSL_ENTER("wolfSSL_RSA_padding_add_PKCS1_PSS");
+
+    if (!rsa || !EM || !mHash || !hashAlg) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!(rng = WOLFSSL_RSA_GetRNG(rsa, (WC_RNG**)&tmpRNG, &initTmpRng))) {
+        WOLFSSL_MSG("WOLFSSL_RSA_GetRNG error");
+        goto cleanup;
+    }
+
+    if (!rsa->exSet && SetRsaExternal(rsa) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetRsaExternal error");
+        goto cleanup;
+    }
+
+    hashType = wolfSSL_EVP_md2macType(hashAlg);
+    if (hashType < WC_HASH_TYPE_NONE || hashType > WC_HASH_TYPE_MAX) {
+        WOLFSSL_MSG("wolfSSL_EVP_md2macType error");
+        goto cleanup;
+    }
+
+   if ((mgf = hash2mgf(hashType)) == WC_MGF1NONE) {
+       WOLFSSL_MSG("hash2mgf error");
+       goto cleanup;
+   }
+
+    if ((hashLen = wolfSSL_EVP_MD_size(hashAlg)) < 0) {
+        WOLFSSL_MSG("wolfSSL_EVP_MD_size error");
+        goto cleanup;
+    }
+
+    if ((emLen = wolfSSL_RSA_size(rsa)) <= 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_size error");
+        goto cleanup;
+    }
+
+    switch (saltLen) {
+    /* Negative saltLen values are treated differently */
+        case RSA_PSS_SALTLEN_DIGEST:
+            saltLen = hashLen;
+            break;
+        case RSA_PSS_SALTLEN_MAX_SIGN:
+        case RSA_PSS_SALTLEN_MAX:
+            saltLen = emLen - hashLen - 2;
+            break;
+        default:
+            if (saltLen < 0) {
+                /* Not any currently implemented negative value */
+                WOLFSSL_MSG("invalid saltLen");
+                goto cleanup;
+            }
+    }
+
+    if (wc_RsaPad_ex(mHash, wolfSSL_EVP_MD_size(hashAlg), EM, emLen,
+                     RSA_BLOCK_TYPE_1, rng, WC_RSA_PSS_PAD,
+                     wolfSSL_EVP_md2macType(hashAlg), mgf, NULL, 0, saltLen,
+                     wolfSSL_BN_num_bits(rsa->n), NULL) != MP_OKAY) {
+        WOLFSSL_MSG("wc_RsaPad_ex error");
+        goto cleanup;
+    }
+
+    ret = WOLFSSL_SUCCESS;
+cleanup:
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+#ifdef WOLFSSL_SMALL_STACK
+    if (tmpRNG)
+        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    return ret;
+}
+
+/*
+ * Refer to wolfSSL_RSA_padding_add_PKCS1_PSS
+ * for an explanation of the parameters.
+ */
+int wolfSSL_RSA_verify_PKCS1_PSS(WOLFSSL_RSA *rsa, const unsigned char *mHash,
+                                 const WOLFSSL_EVP_MD *hashAlg,
+                                 const unsigned char *EM, int saltLen)
+{
+    int hashLen, mgf, emLen, mPrimeLen;
+    enum wc_HashType hashType;
+    byte *mPrime = NULL;
+    byte *buf = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_RSA_verify_PKCS1_PSS");
+
+    if (!rsa || !mHash || !hashAlg || !EM) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((hashLen = wolfSSL_EVP_MD_size(hashAlg)) < 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((emLen = wolfSSL_RSA_size(rsa)) <= 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_size error");
+        return WOLFSSL_FAILURE;
+    }
+
+    switch (saltLen) {
+    /* Negative saltLen values are treated differently */
+        case RSA_PSS_SALTLEN_DIGEST:
+            saltLen = hashLen;
+            break;
+        case RSA_PSS_SALTLEN_MAX_SIGN:
+        case RSA_PSS_SALTLEN_MAX:
+            saltLen = emLen - hashLen - 2;
+            break;
+        default:
+            if (saltLen < 0) {
+                /* Not any currently implemented negative value */
+                WOLFSSL_MSG("invalid saltLen");
+                return WOLFSSL_FAILURE;
+            }
+    }
+
+    if (!rsa->exSet && SetRsaExternal(rsa) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    hashType = wolfSSL_EVP_md2macType(hashAlg);
+    if (hashType < WC_HASH_TYPE_NONE || hashType > WC_HASH_TYPE_MAX) {
+        WOLFSSL_MSG("wolfSSL_EVP_md2macType error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((mgf = hash2mgf(hashType)) == WC_MGF1NONE) {
+        WOLFSSL_MSG("hash2mgf error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((hashLen = wolfSSL_EVP_MD_size(hashAlg)) < 0) {
+        WOLFSSL_MSG("wolfSSL_EVP_MD_size error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (!(buf = (byte*)XMALLOC(emLen, NULL, DYNAMIC_TYPE_TMP_BUFFER))) {
+        WOLFSSL_MSG("malloc error");
+        return WOLFSSL_FAILURE;
+    }
+    XMEMCPY(buf, EM, emLen);
+
+    /* Remove and verify the PSS padding */
+    if ((mPrimeLen = wc_RsaUnPad_ex(buf, emLen, &mPrime,
+                                    RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, hashType,
+                                    mgf, NULL, 0, saltLen,
+                                    wolfSSL_BN_num_bits(rsa->n), NULL)) < 0) {
+        WOLFSSL_MSG("wc_RsaPad_ex error");
+        XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Verify the hash is correct */
+    if (wc_RsaPSS_CheckPadding_ex(mHash, hashLen, mPrime, mPrimeLen, hashType,
+                                  saltLen, wolfSSL_BN_num_bits(rsa->n))
+                                  != MP_OKAY) {
+        WOLFSSL_MSG("wc_RsaPSS_CheckPadding_ex error");
+        XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+    XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
+#if defined(OPENSSL_EXTRA)
+WOLFSSL_RSA_METHOD *wolfSSL_RSA_meth_new(const char *name, int flags)
+{
+    int name_len;
+    WOLFSSL_RSA_METHOD* meth;
+
+    if (name == NULL) {
+        return NULL;
+    }
+
+    meth = (WOLFSSL_RSA_METHOD*)XMALLOC(sizeof(WOLFSSL_RSA_METHOD), NULL,
+        DYNAMIC_TYPE_OPENSSL);
+    name_len = (int)XSTRLEN(name);
+    if (!meth) {
+        return NULL;
+    }
+    meth->flags = flags;
+    meth->name = (char*)XMALLOC(name_len+1, NULL, DYNAMIC_TYPE_OPENSSL);
+    if (!meth->name) {
+        XFREE(meth, NULL, DYNAMIC_TYPE_OPENSSL);
+        return NULL;
+    }
+    XMEMCPY(meth->name, name, name_len+1);
+
+    return meth;
+}
+
+void wolfSSL_RSA_meth_free(WOLFSSL_RSA_METHOD *meth)
+{
+    if (meth) {
+        XFREE(meth->name, NULL, DYNAMIC_TYPE_OPENSSL);
+        XFREE(meth, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_RSA_meth_set(WOLFSSL_RSA_METHOD *rsa, void* p)
+{
+    (void)rsa;
+    (void)p;
+    WOLFSSL_STUB("RSA_METHOD is not implemented.");
+    return 1;
+}
+#endif
+
+int wolfSSL_RSA_set_method(WOLFSSL_RSA *rsa, WOLFSSL_RSA_METHOD *meth)
+{
+    if (rsa)
+        rsa->meth = meth;
+    return 1;
+}
+
+const WOLFSSL_RSA_METHOD* wolfSSL_RSA_get_method(const WOLFSSL_RSA *rsa)
+{
+    if (!rsa) {
+        return NULL;
+    }
+    return rsa->meth;
+}
+
+const WOLFSSL_RSA_METHOD* wolfSSL_RSA_get_default_method(void)
+{
+    return wolfSSL_RSA_meth_new("wolfSSL RSA", 0);
+}
+
+int wolfSSL_RSA_flags(const WOLFSSL_RSA *r)
+{
+    if (r && r->meth) {
+        return r->meth->flags;
+    } else {
+        return 0;
+    }
+}
+
+void wolfSSL_RSA_set_flags(WOLFSSL_RSA *r, int flags)
+{
+    if (r && r->meth) {
+        r->meth->flags = flags;
+    }
+}
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+WOLFSSL_RSA* wolfSSL_RSAPublicKey_dup(WOLFSSL_RSA *rsa)
+{
+    int derSz = 0;
+    byte *derBuf = NULL;
+    WOLFSSL_RSA* local;
+
+    WOLFSSL_ENTER("wolfSSL_RSAPublicKey_dup");
+
+    if (!rsa) {
+        return NULL;
+    }
+
+    local = wolfSSL_RSA_new();
+    if (local == NULL) {
+        WOLFSSL_MSG("Error creating a new WOLFSSL_RSA structure");
+        return NULL;
+    }
+
+    if ((derSz = wolfSSL_RSA_To_Der(rsa, &derBuf, 1)) < 0) {
+        WOLFSSL_MSG("wolfSSL_RSA_To_Der failed");
+        return NULL;
+    }
+
+    if (wolfSSL_RSA_LoadDer_ex(local,
+                derBuf, derSz,
+                WOLFSSL_RSA_LOAD_PUBLIC) != SSL_SUCCESS) {
+        wolfSSL_RSA_free(local);
+        local = NULL;
+    }
+    XFREE(derBuf, NULL, DYNAMIC_TYPE_ASN1);
+    return local;
+}
+#endif
+
+void* wolfSSL_RSA_get_ex_data(const WOLFSSL_RSA *rsa, int idx)
+{
+    WOLFSSL_ENTER("wolfSSL_RSA_get_ex_data");
+#ifdef HAVE_EX_DATA
+    if (rsa) {
+        return wolfSSL_CRYPTO_get_ex_data(&rsa->ex_data, idx);
+    }
+#else
+    (void)rsa;
+    (void)idx;
+#endif
+    return NULL;
+}
+
+int wolfSSL_RSA_set_ex_data(WOLFSSL_RSA *rsa, int idx, void *data)
+{
+    WOLFSSL_ENTER("wolfSSL_RSA_set_ex_data");
+    #ifdef HAVE_EX_DATA
+    if (rsa) {
+        return wolfSSL_CRYPTO_set_ex_data(&rsa->ex_data, idx, data);
+    }
+    #else
+    (void)rsa;
+    (void)idx;
+    (void)data;
+    #endif
+    return WOLFSSL_FAILURE;
+}
+
+int wolfSSL_RSA_set0_key(WOLFSSL_RSA *r, WOLFSSL_BIGNUM *n, WOLFSSL_BIGNUM *e,
+                         WOLFSSL_BIGNUM *d)
+{
+    /* If the fields n and e in r are NULL, the corresponding input
+     * parameters MUST be non-NULL for n and e.  d may be
+     * left NULL (in case only the public key is used).
+     */
+    if ((!r->n && !n) || (!r->e && !e))
+        return 0;
+
+    if (n) {
+        wolfSSL_BN_free(r->n);
+        r->n = n;
+    }
+    if (e) {
+        wolfSSL_BN_free(r->e);
+        r->e = e;
+    }
+    if (d) {
+        wolfSSL_BN_clear_free(r->d);
+        r->d = d;
+    }
+
+    return 1;
+}
+#endif /* OPENSSL_EXTRA */
 #endif /* NO_RSA */
 
 #ifdef OPENSSL_EXTRA
@@ -28134,12 +35351,60 @@
 
     return WOLFSSL_SUCCESS;
 }
-#endif /* NO_DSA */
+
+/* Loads DSA key from DER buffer. opt = DSA_LOAD_PRIVATE or DSA_LOAD_PUBLIC.
+    returns 1 on success, or 0 on failure.  */
+int wolfSSL_DSA_LoadDer_ex(WOLFSSL_DSA* dsa, const unsigned char* derBuf,
+                                                            int derSz, int opt)
+{
+    word32 idx = 0;
+    int    ret;
+
+    WOLFSSL_ENTER("wolfSSL_DSA_LoadDer");
+
+    if (dsa == NULL || dsa->internal == NULL || derBuf == NULL || derSz <= 0) {
+        WOLFSSL_MSG("Bad function arguments");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (opt == WOLFSSL_DSA_LOAD_PRIVATE) {
+        ret = DsaPrivateKeyDecode(derBuf, &idx, (DsaKey*)dsa->internal, derSz);
+    }
+    else {
+        ret = DsaPublicKeyDecode(derBuf, &idx, (DsaKey*)dsa->internal, derSz);
+    }
+
+    if (ret < 0 && opt == WOLFSSL_DSA_LOAD_PRIVATE) {
+        WOLFSSL_MSG("DsaPrivateKeyDecode failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+    else if (ret < 0 && opt == WOLFSSL_DSA_LOAD_PUBLIC) {
+        WOLFSSL_MSG("DsaPublicKeyDecode failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetDsaExternal(dsa) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetDsaExternal failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    dsa->inSet = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_DSA */
 
 #ifdef HAVE_ECC
 /* return WOLFSSL_SUCCESS if success, WOLFSSL_FATAL_ERROR if error */
-int wolfSSL_EC_KEY_LoadDer(WOLFSSL_EC_KEY* key,
-                           const unsigned char* derBuf,  int derSz)
+int wolfSSL_EC_KEY_LoadDer(WOLFSSL_EC_KEY* key, const unsigned char* derBuf,
+                           int derSz)
+{
+    return wolfSSL_EC_KEY_LoadDer_ex(key, derBuf, derSz,
+                                     WOLFSSL_EC_KEY_LOAD_PRIVATE);
+}
+
+int wolfSSL_EC_KEY_LoadDer_ex(WOLFSSL_EC_KEY* key, const unsigned char* derBuf,
+                              int derSz, int opt)
 {
     word32 idx = 0;
     int    ret;
@@ -28151,9 +35416,21 @@
         return WOLFSSL_FATAL_ERROR;
     }
 
-    ret = wc_EccPrivateKeyDecode(derBuf, &idx, (ecc_key*)key->internal, derSz);
+    if (opt == WOLFSSL_EC_KEY_LOAD_PRIVATE) {
+        ret = wc_EccPrivateKeyDecode(derBuf, &idx, (ecc_key*)key->internal,
+                                     derSz);
+    }
+    else {
+        ret = wc_EccPublicKeyDecode(derBuf, &idx, (ecc_key*)key->internal,
+                                    derSz);
+    }
     if (ret < 0) {
-        WOLFSSL_MSG("wc_EccPrivateKeyDecode failed");
+        if (opt == WOLFSSL_EC_KEY_LOAD_PRIVATE) {
+            WOLFSSL_MSG("wc_EccPrivateKeyDecode failed");
+        }
+        else {
+            WOLFSSL_MSG("wc_EccPublicKeyDecode failed");
+        }
         return WOLFSSL_FATAL_ERROR;
     }
 
@@ -28168,10 +35445,73 @@
 }
 #endif /* HAVE_ECC */
 
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL) || defined(WOLFSSL_OPENSSH))
+/* return WOLFSSL_SUCCESS if success, WOLFSSL_FATAL_ERROR if error */
+int wolfSSL_DH_LoadDer(WOLFSSL_DH* dh, const unsigned char* derBuf, int derSz)
+{
+    word32 idx = 0;
+    int    ret;
+
+    if (dh == NULL || dh->internal == NULL || derBuf == NULL || derSz <= 0) {
+        WOLFSSL_MSG("Bad function arguments");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    ret = wc_DhKeyDecode(derBuf, &idx, (DhKey*)dh->internal, (word32)derSz);
+    if (ret < 0) {
+        WOLFSSL_MSG("wc_DhKeyDecode failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetDhExternal(dh) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("SetDhExternal failed");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* ! NO_DH && WOLFSSL_QT || OPENSSL_ALL */
 
 #endif /* OPENSSL_EXTRA */
 
 
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+
+/* increments ref count of WOLFSSL_RSA. Return 1 on success, 0 on error */
+int wolfSSL_RSA_up_ref(WOLFSSL_RSA* rsa)
+{
+    if (rsa) {
+        if (wc_LockMutex(&rsa->refMutex) != 0) {
+            WOLFSSL_MSG("Failed to lock x509 mutex");
+        }
+        rsa->refCount++;
+        wc_UnLockMutex(&rsa->refMutex);
+
+        return 1;
+    }
+
+    return 0;
+}
+
+/* increments ref count of WOLFSSL_X509. Return 1 on success, 0 on error */
+int wolfSSL_X509_up_ref(WOLFSSL_X509* x509)
+{
+    if (x509) {
+        if (wc_LockMutex(&x509->refMutex) != 0) {
+            WOLFSSL_MSG("Failed to lock x509 mutex");
+        }
+        x509->refCount++;
+        wc_UnLockMutex(&x509->refMutex);
+
+        return 1;
+    }
+
+    return 0;
+}
+
+#endif /* OPENSSL_EXTRA || OPENSSL_ALL */
+
+
 #ifdef WOLFSSL_ALT_CERT_CHAINS
 int wolfSSL_is_peer_alt_cert_chain(const WOLFSSL* ssl)
 {
@@ -28278,7 +35618,7 @@
 
                     if ((ret = CopyDecodedToX509(x509, cert)) != 0) {
                         WOLFSSL_MSG("Failed to copy decoded");
-                        XFREE(x509, NULL, DYNAMIC_TYPE_X509);
+                        wolfSSL_X509_free(x509);
                         x509 = NULL;
                     }
                 }
@@ -28368,6 +35708,7 @@
 
 
 /* get session ID */
+WOLFSSL_ABI
 const byte* wolfSSL_get_sessionID(const WOLFSSL_SESSION* session)
 {
     WOLFSSL_ENTER("wolfSSL_get_sessionID");
@@ -28412,6 +35753,7 @@
     return NULL;
 }
 
+WOLFSSL_ABI
 void  wolfSSL_CTX_SetEccSignCb(WOLFSSL_CTX* ctx, CallbackEccSign cb)
 {
     if (ctx)
@@ -28545,6 +35887,84 @@
 }
 #endif /* HAVE_CURVE25519 */
 
+#ifdef HAVE_ED448
+void  wolfSSL_CTX_SetEd448SignCb(WOLFSSL_CTX* ctx, CallbackEd448Sign cb)
+{
+    if (ctx)
+        ctx->Ed448SignCb = cb;
+}
+void  wolfSSL_SetEd448SignCtx(WOLFSSL* ssl, void *ctx)
+{
+    if (ssl)
+        ssl->Ed448SignCtx = ctx;
+}
+void* wolfSSL_GetEd448SignCtx(WOLFSSL* ssl)
+{
+    if (ssl)
+        return ssl->Ed448SignCtx;
+
+    return NULL;
+}
+
+void  wolfSSL_CTX_SetEd448VerifyCb(WOLFSSL_CTX* ctx, CallbackEd448Verify cb)
+{
+    if (ctx)
+        ctx->Ed448VerifyCb = cb;
+}
+void  wolfSSL_SetEd448VerifyCtx(WOLFSSL* ssl, void *ctx)
+{
+    if (ssl)
+        ssl->Ed448VerifyCtx = ctx;
+}
+void* wolfSSL_GetEd448VerifyCtx(WOLFSSL* ssl)
+{
+    if (ssl)
+        return ssl->Ed448VerifyCtx;
+
+    return NULL;
+}
+#endif /* HAVE_ED448 */
+
+#ifdef HAVE_CURVE448
+void wolfSSL_CTX_SetX448KeyGenCb(WOLFSSL_CTX* ctx,
+        CallbackX448KeyGen cb)
+{
+    if (ctx)
+        ctx->X448KeyGenCb = cb;
+}
+void  wolfSSL_SetX448KeyGenCtx(WOLFSSL* ssl, void *ctx)
+{
+    if (ssl)
+        ssl->X448KeyGenCtx = ctx;
+}
+void* wolfSSL_GetX448KeyGenCtx(WOLFSSL* ssl)
+{
+    if (ssl)
+        return ssl->X448KeyGenCtx;
+
+    return NULL;
+}
+
+void wolfSSL_CTX_SetX448SharedSecretCb(WOLFSSL_CTX* ctx,
+        CallbackX448SharedSecret cb)
+{
+    if (ctx)
+        ctx->X448SharedSecretCb = cb;
+}
+void  wolfSSL_SetX448SharedSecretCtx(WOLFSSL* ssl, void *ctx)
+{
+    if (ssl)
+        ssl->X448SharedSecretCtx = ctx;
+}
+void* wolfSSL_GetX448SharedSecretCtx(WOLFSSL* ssl)
+{
+    if (ssl)
+        return ssl->X448SharedSecretCtx;
+
+    return NULL;
+}
+#endif /* HAVE_CURVE448 */
+
 #ifndef NO_RSA
 void  wolfSSL_CTX_SetRsaSignCb(WOLFSSL_CTX* ctx, CallbackRsaSign cb)
 {
@@ -28703,11 +36123,10 @@
     void wolfSSL_cert_service(void) {}
 #endif
 
-
-#ifdef OPENSSL_EXTRA /*Lighttp compatibility*/
-
+#ifdef OPENSSL_EXTRA
     #ifndef NO_CERTS
-    void wolfSSL_X509_NAME_free(WOLFSSL_X509_NAME *name){
+    void wolfSSL_X509_NAME_free(WOLFSSL_X509_NAME *name)
+    {
         WOLFSSL_ENTER("wolfSSL_X509_NAME_free");
         FreeX509Name(name, NULL);
         XFREE(name, NULL, DYNAMIC_TYPE_X509);
@@ -28718,7 +36137,7 @@
      *
      * returns NULL on failure, otherwise returns a new structure.
      */
-    WOLFSSL_X509_NAME* wolfSSL_X509_NAME_new()
+    WOLFSSL_X509_NAME* wolfSSL_X509_NAME_new(void)
     {
         WOLFSSL_X509_NAME* name;
 
@@ -28732,9 +36151,68 @@
         return name;
     }
 
-
-#if defined(WOLFSSL_CERT_GEN) && !defined(NO_RSA)
-/* needed SetName function from asn.c is wrapped by NO_RSA */
+    /* Creates a duplicate of a WOLFSSL_X509_NAME structure.
+       Returns a new WOLFSSL_X509_NAME structure or NULL on failure */
+    WOLFSSL_X509_NAME* wolfSSL_X509_NAME_dup(WOLFSSL_X509_NAME *name)
+    {
+        WOLFSSL_X509_NAME* dup = NULL;
+
+        WOLFSSL_ENTER("wolfSSL_X509_NAME_dup");
+
+        if (name == NULL) {
+            WOLFSSL_MSG("NULL parameter");
+            return NULL;
+        }
+
+        if (!(dup = wolfSSL_X509_NAME_new())) {
+            return NULL;
+        }
+
+        /* copy contents */
+        XMEMCPY(dup, name, sizeof(WOLFSSL_X509_NAME));
+        InitX509Name(dup, 1);
+        dup->sz = name->sz;
+
+        /* handle dynamic portions */
+        if (name->dynamicName) {
+            if (!(dup->name = (char*)XMALLOC(name->sz, 0,
+                                             DYNAMIC_TYPE_OPENSSL))) {
+                goto err;
+            }
+        }
+        XMEMCPY(dup->name, name->name, name->sz);
+    #if (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)) && \
+        !defined(NO_ASN)
+        if (!(dup->fullName.fullName = (char*)XMALLOC(name->fullName.fullNameLen,
+                                                   0, DYNAMIC_TYPE_OPENSSL))) {
+            goto err;
+        }
+        XMEMCPY(dup->fullName.fullName, name->fullName.fullName,
+            name->fullName.fullNameLen);
+    #endif
+
+        return dup;
+
+    err:
+        if (dup) {
+            if (dup->dynamicName && dup->name) {
+                XFREE(dup->name, 0, DYNAMIC_TYPE_OPENSSL);
+                dup->name = NULL;
+            }
+        #if (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)) && \
+            !defined(NO_ASN)
+            if (dup->fullName.fullName &&
+                dup->fullName.fullName != name->fullName.fullName) {
+                XFREE(dup->fullName.fullName, 0, DYNAMIC_TYPE_OPENSSL);
+                dup->fullName.fullName = NULL;
+            }
+        #endif
+            wolfSSL_X509_NAME_free(dup);
+        }
+        return NULL;
+    }
+
+#if defined(WOLFSSL_CERT_GEN)
     /* helper function for CopyX509NameToCertName()
      *
      * returns WOLFSSL_SUCCESS on success
@@ -28787,6 +36265,16 @@
         cName->unitEnc = CTC_UTF8;
         cName->commonName[0] = '\0';
         cName->commonNameEnc = CTC_UTF8;
+        cName->serialDev[0] = '\0';
+        cName->serialDevEnc = CTC_PRINTABLE;
+    #ifdef WOLFSSL_CERT_EXT
+        cName->busCat[0] = '\0';
+        cName->busCatEnc = CTC_UTF8;
+        cName->joiC[0] = '\0';
+        cName->joiCEnc = CTC_PRINTABLE;
+        cName->joiSt[0] = '\0';
+        cName->joiStEnc = CTC_PRINTABLE;
+    #endif
         cName->email[0] = '\0';
 
 
@@ -28841,6 +36329,40 @@
             return BUFFER_E;
         }
 
+        /* ASN_SERIAL_NUMBER */
+        WOLFSSL_MSG("Copy Serial Number of Device");
+        if (CopyX509NameEntry(cName->serialDev, CTC_NAME_SIZE,
+                    dn->fullName + dn->serialIdx, dn->serialLen)
+                    != SSL_SUCCESS) {
+            return BUFFER_E;
+        }
+
+    #ifdef WOLFSSL_CERT_EXT
+        /* ASN_BUS_CAT */
+        WOLFSSL_MSG("Copy Business Category");
+        if (CopyX509NameEntry(cName->busCat, CTC_NAME_SIZE,
+                    dn->fullName + dn->bcIdx, dn->bcLen)
+                    != SSL_SUCCESS) {
+            return BUFFER_E;
+        }
+
+        /* JoI Country */
+        WOLFSSL_MSG("Copy Jurisdiction of Incorporation Country");
+        if (CopyX509NameEntry(cName->joiC, CTC_NAME_SIZE,
+                    dn->fullName + dn->jcIdx, dn->jcLen)
+                    != SSL_SUCCESS) {
+            return BUFFER_E;
+        }
+
+        /* JoI State */
+        WOLFSSL_MSG("Copy Jurisdiction of Incorporation State");
+        if (CopyX509NameEntry(cName->joiSt, CTC_NAME_SIZE,
+                    dn->fullName + dn->jsIdx, dn->jsLen)
+                    != SSL_SUCCESS) {
+            return BUFFER_E;
+        }
+    #endif
+
         WOLFSSL_MSG("Copy Email");
         if (CopyX509NameEntry(cName->email, CTC_NAME_SIZE,
                     dn->fullName + dn->emailIdx, dn->emailLen)
@@ -28851,6 +36373,433 @@
         return WOLFSSL_SUCCESS;
     }
 
+#ifdef WOLFSSL_CERT_REQ
+    static int ReqCertFromX509(Cert* cert, WOLFSSL_X509* req)
+    {
+        int ret;
+
+        if (wc_InitCert(cert) != 0)
+            return WOLFSSL_FAILURE;
+
+        ret = CopyX509NameToCertName(&req->subject, &cert->subject);
+        if (ret == WOLFSSL_SUCCESS) {
+            cert->version = req->version;
+            cert->isCA = req->isCa;
+#ifdef WOLFSSL_CERT_EXT
+            if (req->subjKeyIdSz != 0) {
+                XMEMCPY(cert->skid, req->subjKeyId, req->subjKeyIdSz);
+                cert->skidSz = req->subjKeyIdSz;
+            }
+            if (req->keyUsageSet)
+                cert->keyUsage = req->keyUsage;
+            /* Extended Key Usage not supported. */
+#endif
+        }
+
+        return ret;
+    }
+#endif
+
+    /* convert a WOLFSSL_X509 to a Cert structure for writing out */
+    static int CertFromX509(Cert* cert, WOLFSSL_X509* x509)
+    {
+        int ret;
+        #ifdef WOLFSSL_CERT_EXT
+        int i;
+        #endif
+
+        WOLFSSL_ENTER("wolfSSL_X509_to_Cert()");
+
+        if (x509 == NULL || cert == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+        wc_InitCert(cert);
+
+        cert->version = (int)wolfSSL_X509_get_version(x509);
+
+    #ifdef WOLFSSL_ALT_NAMES
+        if (x509->notBefore.length > 0) {
+            if ((x509->notBefore.length + 2) < CTC_DATE_SIZE) {
+                cert->beforeDate[0] = x509->notBefore.type;
+                cert->beforeDate[1] = x509->notBefore.length;
+                XMEMCPY(&cert->beforeDate[2], x509->notBefore.data,
+                        x509->notBefore.length);
+                cert->beforeDateSz = x509->notBefore.length + 2;
+            }
+            else {
+                WOLFSSL_MSG("Not before date too large");
+                return WOLFSSL_FAILURE;
+            }
+        }
+        else {
+            cert->beforeDateSz = 0;
+        }
+        if (x509->notAfter.length > 0) {
+            if ((x509->notAfter.length + 2) < CTC_DATE_SIZE) {
+                cert->afterDate[0] = x509->notAfter.type;
+                cert->afterDate[1] = x509->notAfter.length;
+                XMEMCPY(&cert->afterDate[2], x509->notAfter.data,
+                        x509->notAfter.length);
+                cert->afterDateSz = x509->notAfter.length + 2;
+            }
+            else {
+                WOLFSSL_MSG("Not after date too large");
+                return WOLFSSL_FAILURE;
+            }
+        }
+        else {
+            cert->afterDateSz = 0;
+        }
+
+        cert->altNamesSz = FlattenAltNames(cert->altNames,
+                sizeof(cert->altNames), x509->altNames);
+
+    #endif /* WOLFSSL_ALT_NAMES */
+
+        cert->sigType = wolfSSL_X509_get_signature_type(x509);
+        cert->keyType = x509->pubKeyOID;
+        cert->isCA    = wolfSSL_X509_get_isCA(x509);
+
+    #ifdef WOLFSSL_CERT_EXT
+        if (x509->subjKeyIdSz < CTC_MAX_SKID_SIZE) {
+            XMEMCPY(cert->skid, x509->subjKeyId, x509->subjKeyIdSz);
+            cert->skidSz = (int)x509->subjKeyIdSz;
+        }
+        else {
+            WOLFSSL_MSG("Subject Key ID too large");
+            return WOLFSSL_FAILURE;
+        }
+
+        if (x509->authKeyIdSz < CTC_MAX_AKID_SIZE) {
+            XMEMCPY(cert->akid, x509->authKeyId, x509->authKeyIdSz);
+            cert->akidSz = (int)x509->authKeyIdSz;
+        }
+        else {
+            WOLFSSL_MSG("Auth Key ID too large");
+            return WOLFSSL_FAILURE;
+        }
+
+        for (i = 0; i < x509->certPoliciesNb; i++) {
+            /* copy the smaller of MAX macros, by default they are currently equal*/
+            if ((int)CTC_MAX_CERTPOL_SZ <= (int)MAX_CERTPOL_SZ) {
+                XMEMCPY(cert->certPolicies[i], x509->certPolicies[i],
+                        CTC_MAX_CERTPOL_SZ);
+            }
+            else {
+                XMEMCPY(cert->certPolicies[i], x509->certPolicies[i],
+                        MAX_CERTPOL_SZ);
+            }
+        }
+        cert->certPoliciesNb = (word16)x509->certPoliciesNb;
+
+        cert->keyUsage = x509->keyUsage;
+    #endif /* WOLFSSL_CERT_EXT */
+
+    #ifdef WOLFSSL_CERT_REQ
+        /* copy over challenge password for REQ certs */
+        XMEMCPY(cert->challengePw, x509->challengePw, CTC_NAME_SIZE);
+    #endif
+
+        if (x509->serialSz <= CTC_SERIAL_SIZE) {
+            XMEMCPY(cert->serial, x509->serial, x509->serialSz);
+        }
+        else {
+            WOLFSSL_MSG("Serial size error");
+            return WOLFSSL_FAILURE;
+        }
+
+        /* copy over Name structures */
+        if (x509->issuerSet)
+            cert->selfSigned = 0;
+        if ((ret = CopyX509NameToCertName(&(x509->issuer), &(cert->issuer)))
+            != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Error copying over issuer names");
+            WOLFSSL_LEAVE("wolfSSL_X509_to_Cert()", ret);
+            return WOLFSSL_FAILURE;
+        }
+        if ((ret = CopyX509NameToCertName(&(x509->subject), &(cert->subject)))
+            != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Error copying over subject names");
+            WOLFSSL_LEAVE("wolfSSL_X509_to_Cert()", ret);
+            return WOLFSSL_FAILURE;
+        }
+
+        cert->heap = x509->heap;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+
+    /* returns the sig type to use on success i.e CTC_SHAwRSA and WOLFSSL_FALURE
+     * on fail case */
+    static int wolfSSL_sigTypeFromPKEY(WOLFSSL_EVP_MD* md,
+            WOLFSSL_EVP_PKEY* pkey)
+    {
+        int hashType;
+        int sigType = WOLFSSL_FAILURE;
+
+        /* Convert key type and hash algorithm to a signature algorithm */
+        if (wolfSSL_EVP_get_hashinfo(md, &hashType, NULL) == WOLFSSL_FAILURE)
+            return WOLFSSL_FAILURE;
+
+
+        if (pkey->type == EVP_PKEY_RSA) {
+            switch (hashType) {
+                case WC_HASH_TYPE_SHA:
+                    sigType = CTC_SHAwRSA;
+                    break;
+                case WC_HASH_TYPE_SHA224:
+                    sigType = CTC_SHA224wRSA;
+                    break;
+                case WC_HASH_TYPE_SHA256:
+                    sigType = CTC_SHA256wRSA;
+                    break;
+                case WC_HASH_TYPE_SHA384:
+                    sigType = CTC_SHA384wRSA;
+                    break;
+                case WC_HASH_TYPE_SHA512:
+                    sigType = CTC_SHA512wRSA;
+                    break;
+                default:
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        else if (pkey->type == EVP_PKEY_EC) {
+            switch (hashType) {
+                case WC_HASH_TYPE_SHA:
+                    sigType = CTC_SHAwECDSA;
+                    break;
+                case WC_HASH_TYPE_SHA224:
+                    sigType = CTC_SHA224wECDSA;
+                    break;
+                case WC_HASH_TYPE_SHA256:
+                    sigType = CTC_SHA256wECDSA;
+                    break;
+                case WC_HASH_TYPE_SHA384:
+                    sigType = CTC_SHA384wECDSA;
+                    break;
+                case WC_HASH_TYPE_SHA512:
+                    sigType = CTC_SHA512wECDSA;
+                    break;
+                default:
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        else
+            return WOLFSSL_FAILURE;
+        return sigType;
+    }
+
+
+    /* generates DER buffer from WOLFSSL_X509
+     * If req == 1 then creates a request DER buffer
+     *
+     * updates derSz with certificate body size on success
+     * return WOLFSSL_SUCCESS on success
+     */
+    static int wolfSSL_X509_make_der(WOLFSSL_X509* x509, int req,
+            unsigned char* der, int* derSz)
+    {
+        int ret;
+        Cert cert;
+        void* key = NULL;
+        int type = -1;
+    #ifndef NO_RSA
+        RsaKey rsa;
+    #endif
+    #ifdef HAVE_ECC
+        ecc_key ecc;
+    #endif
+        WC_RNG rng;
+        word32 idx = 0;
+
+        if (x509 == NULL || der == NULL || derSz == NULL)
+            return BAD_FUNC_ARG;
+
+    #ifndef WOLFSSL_CERT_REQ
+        if (req) {
+            WOLFSSL_MSG("WOLFSSL_CERT_REQ needed for certificate request");
+            return WOLFSSL_FAILURE;
+        }
+    #endif
+
+    #ifdef WOLFSSL_CERT_REQ
+        if (req) {
+            if (ReqCertFromX509(&cert, x509) != WOLFSSL_SUCCESS)
+                return WOLFSSL_FAILURE;
+        }
+        else
+    #endif
+        {
+            /* Create a Cert that has the certificate fields. */
+            if (CertFromX509(&cert, x509) != WOLFSSL_SUCCESS)
+                return WOLFSSL_FAILURE;
+        }
+
+        /* Create a public key object from requests public key. */
+    #ifndef NO_RSA
+        if (x509->pubKeyOID == RSAk) {
+            type = RSA_TYPE;
+            ret = wc_InitRsaKey(&rsa, x509->heap);
+            if (ret != 0)
+                return ret;
+            ret = wc_RsaPublicKeyDecode(x509->pubKey.buffer, &idx, &rsa,
+                                                           x509->pubKey.length);
+            if (ret != 0) {
+                wc_FreeRsaKey(&rsa);
+                return ret;
+            }
+            key = (void*)&rsa;
+        }
+    #endif
+    #ifdef HAVE_ECC
+        if (x509->pubKeyOID == ECDSAk) {
+            type = ECC_TYPE;
+            ret = wc_ecc_init(&ecc);
+            if (ret != 0)
+                return ret;
+            ret = wc_EccPublicKeyDecode(x509->pubKey.buffer, &idx, &ecc,
+                                                           x509->pubKey.length);
+            if (ret != 0) {
+                wc_ecc_free(&ecc);
+                return ret;
+            }
+            key = (void*)&ecc;
+        }
+    #endif
+        if (key == NULL)
+            return WOLFSSL_FAILURE;
+
+        /* Make the body of the certificate request. */
+    #ifdef WOLFSSL_CERT_REQ
+        if (req) {
+            ret = wc_MakeCertReq_ex(&cert, der, *derSz, type, key);
+        }
+        else
+    #endif
+        {
+            ret = wc_InitRng(&rng);
+            if (ret != 0)
+                return WOLFSSL_FAILURE;
+
+            ret = wc_MakeCert_ex(&cert, der, *derSz, type, key, &rng);
+            wc_FreeRng(&rng);
+        }
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* Dispose of the public key object. */
+    #ifndef NO_RSA
+        if (x509->pubKeyOID == RSAk)
+            wc_FreeRsaKey(&rsa);
+    #endif
+    #ifdef HAVE_ECC
+        if (x509->pubKeyOID == ECDSAk)
+            wc_ecc_free(&ecc);
+    #endif
+        *derSz = ret;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+
+    /* signs a der buffer for the WOLFSSL_X509 structure using the PKEY and MD
+     * hash passed in
+     *
+     * WARNING: this free's and replaces the existing DER buffer in the
+     *          WOLFSSL_X509 with the newly signed buffer.
+     * returns size of signed buffer on success and negative values on fail
+     */
+    static int wolfSSL_X509_resign_cert(WOLFSSL_X509* x509, int req,
+            unsigned char* der, int derSz, int certBodySz, WOLFSSL_EVP_MD* md,
+            WOLFSSL_EVP_PKEY* pkey)
+    {
+        int ret;
+        void* key = NULL;
+        int type = -1;
+        int sigType;
+        WC_RNG rng;
+
+        sigType = wolfSSL_sigTypeFromPKEY(md, pkey);
+        if (sigType == WOLFSSL_FAILURE)
+            return WOLFSSL_FATAL_ERROR;
+
+
+        /* Get the private key object and type from pkey. */
+    #ifndef NO_RSA
+        if (pkey->type == EVP_PKEY_RSA) {
+            type = RSA_TYPE;
+            key = pkey->rsa->internal;
+        }
+    #endif
+    #ifdef HAVE_ECC
+        if (pkey->type == EVP_PKEY_EC) {
+            type = ECC_TYPE;
+            key = pkey->ecc->internal;
+        }
+    #endif
+
+        /* Sign the certificate request body. */
+        ret = wc_InitRng(&rng);
+        if (ret != 0)
+            return ret;
+        ret = wc_SignCert_ex(certBodySz, sigType, der, derSz, type, key, &rng);
+        wc_FreeRng(&rng);
+        if (ret < 0)
+            return ret;
+
+        /* Put in the new certificate encoding into the x509 object. */
+        FreeDer(&x509->derCert);
+        type = CERT_TYPE;
+    #ifdef WOLFSSL_REQ_CERT
+        if (req) {
+            type = CERTREQ_TYPE;
+        }
+    #endif
+
+        if (AllocDer(&x509->derCert, ret, type, NULL) != 0)
+            return WOLFSSL_FATAL_ERROR;
+        XMEMCPY(x509->derCert->buffer, der, ret);
+        x509->derCert->length = ret;
+
+        (void)req;
+        return ret;
+    }
+
+
+    /* returns the size of signature on success */
+    int wolfSSL_X509_sign(WOLFSSL_X509* x509, WOLFSSL_EVP_PKEY* pkey,
+            const WOLFSSL_EVP_MD* md)
+    {
+        int  ret;
+        byte der[4096]; /* @TODO dynamic set based on expected cert size */
+        int  derSz = sizeof(der);
+
+        WOLFSSL_ENTER("wolfSSL_X509_sign");
+
+        if (x509 == NULL || pkey == NULL || md == NULL)
+            return WOLFSSL_FAILURE;
+
+        x509->sigOID = wolfSSL_sigTypeFromPKEY((WOLFSSL_EVP_MD*)md, pkey);
+        if ((ret = wolfSSL_X509_make_der(x509, 0, der, &derSz)) !=
+                WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Unable to make DER for X509");
+            WOLFSSL_LEAVE("wolfSSL_X509_sign", ret);
+            return WOLFSSL_FAILURE;
+        }
+
+        ret = wolfSSL_X509_resign_cert(x509, 0, der, sizeof(der), derSz,
+                (WOLFSSL_EVP_MD*)md, pkey);
+        if (ret <= 0) {
+            WOLFSSL_LEAVE("wolfSSL_X509_sign", ret);
+            return WOLFSSL_FAILURE;
+        }
+
+        return ret;
+    }
+
 
     /* Converts the x509 name structure into DER format.
      *
@@ -28865,10 +36814,12 @@
         CertName cName;
         unsigned char buf[256]; /* ASN_MAX_NAME */
         int sz;
+        WOLFSSL_ENTER("wolfSSL_i2d_X509_NAME");
 
         if (out == NULL || name == NULL) {
             return BAD_FUNC_ARG;
         }
+        XMEMSET(&cName, 0, sizeof(CertName));
 
         if (CopyX509NameToCertName(name, &cName) != SSL_SUCCESS) {
             WOLFSSL_MSG("Error converting x509 name to internal CertName");
@@ -28900,28 +36851,40 @@
 
     /* Compares the two X509 names. If the size of x is larger then y then a
      * positive value is returned if x is smaller a negative value is returned.
-     * In the case that the sizes are equal a the value of memcmp between the
+     * In the case that the sizes are equal a the value of strcmp between the
      * two names is returned.
      *
-     * x First name for comparision
+     * x First name for comparison
      * y Second name to compare with x
      */
     int wolfSSL_X509_NAME_cmp(const WOLFSSL_X509_NAME* x,
             const WOLFSSL_X509_NAME* y)
     {
-        WOLFSSL_STUB("wolfSSL_X509_NAME_cmp");
+        const char* _x;
+        const char* _y;
+        WOLFSSL_ENTER("wolfSSL_X509_NAME_cmp");
 
         if (x == NULL || y == NULL) {
             WOLFSSL_MSG("Bad argument passed in");
             return -2;
         }
 
-        if ((x->sz - y->sz) != 0) {
+        if (x == y) {
+            return 0; /* match */
+        }
+
+        if (x->sz != y->sz) {
             return x->sz - y->sz;
         }
-        else {
-            return XMEMCMP(x->name, y->name, x->sz); /* y sz is the same */
-        }
+
+        /*
+         * If the name member is not set or is immediately null terminated then
+         * compare the staticName member
+         */
+        _x = (x->name && *x->name) ? x->name : x->staticName;
+        _y = (y->name && *y->name) ? y->name : y->staticName;
+
+        return XSTRNCMP(_x, _y, x->sz); /* y sz is the same */
     }
 
 
@@ -28942,39 +36905,13 @@
             return NULL;
         }
 
-        if (bp->type == WOLFSSL_BIO_MEMORY) {
-            l = (long)wolfSSL_BIO_ctrl_pending(bp);
-            if (l <= 0) {
-                WOLFSSL_MSG("No pending data in WOLFSSL_BIO");
-                return NULL;
-            }
-        }
-        else if (bp->type == WOLFSSL_BIO_FILE) {
-#ifndef NO_FILESYSTEM
-            /* Read in next certificate from file but no more. */
-            i = XFTELL(bp->file);
-            if (i < 0)
-                return NULL;
-            if (XFSEEK(bp->file, 0, SEEK_END) != 0)
-                return NULL;
-            l = XFTELL(bp->file);
-            if (l < 0)
-                return NULL;
-            if (XFSEEK(bp->file, i, SEEK_SET) != 0)
-                return NULL;
-
-            /* check calculated length */
-            if (l - i < 0)
-                return NULL;
-
-            l -= i;
-#else
-            WOLFSSL_MSG("Unable to read file with NO_FILESYSTEM defined");
-            return NULL;
-#endif /* !NO_FILESYSTEM */
-        }
-        else
-            return NULL;
+        if ((l = wolfSSL_BIO_get_len(bp)) <= 0) {
+    #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
+            /* No certificate in buffer */
+            WOLFSSL_ERROR(ASN_NO_PEM_HEADER);
+    #endif
+            return NULL;
+        }
 
         pem = (unsigned char*)XMALLOC(l, 0, DYNAMIC_TYPE_PEM);
         if (pem == NULL)
@@ -29023,32 +36960,84 @@
         return x509;
     }
 
-#if defined(HAVE_CRL) && !defined(NO_FILESYSTEM)
-    WOLFSSL_API WOLFSSL_X509_CRL* wolfSSL_PEM_read_X509_CRL(XFILE fp, WOLFSSL_X509_CRL **crl,
-                                                    pem_password_cb *cb, void *u)
-    {
-#if defined(WOLFSSL_PEM_TO_DER) || defined(WOLFSSL_DER_TO_PEM)
+    WOLFSSL_X509_CRL *wolfSSL_PEM_read_bio_X509_CRL(WOLFSSL_BIO *bp,
+            WOLFSSL_X509_CRL **x, pem_password_cb *cb, void *u)
+    {
+#if defined(WOLFSSL_PEM_TO_DER) && defined(HAVE_CRL)
         unsigned char* pem = NULL;
-        DerBuffer* der = NULL;
         int pemSz;
         int derSz;
-        long  i = 0, l;
-        WOLFSSL_X509_CRL* newcrl;
-
-        WOLFSSL_ENTER("wolfSSL_PEM_read_X509_CRL");
-
-        if (fp == NULL) {
-            WOLFSSL_LEAVE("wolfSSL_PEM_read_X509_CRL", BAD_FUNC_ARG);
-            return NULL;
-        }
-        /* Read in CRL from file */
+        DerBuffer* der = NULL;
+        WOLFSSL_X509_CRL* crl = NULL;
+
+        if ((pemSz = wolfSSL_BIO_get_len(bp)) <= 0) {
+            goto err;
+        }
+
+        pem = (unsigned char*)XMALLOC(pemSz, 0, DYNAMIC_TYPE_PEM);
+        if (pem == NULL) {
+            goto err;
+        }
+
+        if (wolfSSL_BIO_read(bp, pem, pemSz) != pemSz) {
+            goto err;
+        }
+
+        if((PemToDer(pem, pemSz, CRL_TYPE, &der, NULL, NULL, NULL)) < 0) {
+            goto err;
+        }
+        derSz = der->length;
+        if((crl = wolfSSL_d2i_X509_CRL(x, der->buffer, derSz)) == NULL) {
+            goto err;
+        }
+
+err:
+        if(pem != NULL) {
+            XFREE(pem, 0, DYNAMIC_TYPE_PEM);
+        }
+        if(der != NULL) {
+            FreeDer(&der);
+        }
+
+        (void)cb;
+        (void)u;
+
+        return crl;
+#else
+        (void)bp;
+        (void)x;
+        (void)cb;
+        (void)u;
+
+        return NULL;
+#endif
+    }
+
+#if !defined(NO_FILESYSTEM)
+    static void* wolfSSL_PEM_read_X509_ex(XFILE fp, void **x,
+        pem_password_cb *cb, void *u, int type)
+    {
+        unsigned char* pem = NULL;
+        int pemSz;
+        long i = 0, l;
+        void *newx509;
+        int derSz;
+        DerBuffer* der = NULL;
+
+        WOLFSSL_ENTER("wolfSSL_PEM_read_X509");
+
+        if (fp == XBADFILE) {
+            WOLFSSL_LEAVE("wolfSSL_PEM_read_X509", BAD_FUNC_ARG);
+            return NULL;
+        }
+        /* Read cert from file */
         i = XFTELL(fp);
         if (i < 0) {
-            WOLFSSL_LEAVE("wolfSSL_PEM_read_X509_CRL", BAD_FUNC_ARG);
-            return NULL;
-        }
-
-        if (XFSEEK(fp, 0, SEEK_END) != 0)
+            WOLFSSL_LEAVE("wolfSSL_PEM_read_X509", BAD_FUNC_ARG);
+            return NULL;
+        }
+
+        if (XFSEEK(fp, 0, XSEEK_END) != 0)
             return NULL;
         l = XFTELL(fp);
         if (l < 0)
@@ -29056,38 +37045,439 @@
         if (XFSEEK(fp, i, SEEK_SET) != 0)
             return NULL;
         pemSz = (int)(l - i);
+
         /* check calculated length */
-        if (pemSz  < 0)
-            return NULL;
-        if((pem = (unsigned char*)XMALLOC(pemSz, 0, DYNAMIC_TYPE_PEM)) == NULL)
-            return NULL;
-
-        if((int)XFREAD((char *)pem, 1, pemSz, fp) != pemSz)
+        if (pemSz > MAX_WOLFSSL_FILE_SIZE || pemSz < 0) {
+            WOLFSSL_MSG("PEM_read_X509_ex file size error");
+            return NULL;
+        }
+
+        /* allocate pem buffer */
+        pem = (unsigned char*)XMALLOC(pemSz, NULL, DYNAMIC_TYPE_PEM);
+        if (pem == NULL)
+            return NULL;
+
+        if ((int)XFREAD((char *)pem, 1, pemSz, fp) != pemSz)
             goto err_exit;
-        if((PemToDer(pem, pemSz, CRL_TYPE, &der, NULL, NULL, NULL)) < 0)
-            goto err_exit;
-        XFREE(pem, 0, DYNAMIC_TYPE_PEM);
-
-        derSz = der->length;
-        if((newcrl = wolfSSL_d2i_X509_CRL(crl, (const unsigned char *)der->buffer, derSz)) == NULL)
-            goto err_exit;
-        FreeDer(&der);
-
-        return newcrl;
+
+        switch (type) {
+            case CERT_TYPE:
+                newx509 = (void *)wolfSSL_X509_load_certificate_buffer(pem,
+                    pemSz, WOLFSSL_FILETYPE_PEM);
+                break;
+
+        #ifdef HAVE_CRL
+            case CRL_TYPE:
+                if ((PemToDer(pem, pemSz, CRL_TYPE, &der, NULL, NULL, NULL)) < 0)
+                    goto err_exit;
+                derSz = der->length;
+                newx509 = (void*)wolfSSL_d2i_X509_CRL((WOLFSSL_X509_CRL **)x,
+                    (const unsigned char *)der->buffer, derSz);
+                if (newx509 == NULL)
+                    goto err_exit;
+                FreeDer(&der);
+                break;
+        #endif
+
+            default:
+                goto err_exit;
+        }
+        if (x != NULL) {
+            *x = newx509;
+        }
+        XFREE(pem, NULL, DYNAMIC_TYPE_PEM);
+        return newx509;
 
     err_exit:
-        if(pem != NULL)
-            XFREE(pem, 0, DYNAMIC_TYPE_PEM);
-        if(der != NULL)
+        if (pem != NULL)
+            XFREE(pem, NULL, DYNAMIC_TYPE_PEM);
+        if (der != NULL)
             FreeDer(&der);
-        return NULL;
-
+
+        /* unused */
         (void)cb;
         (void)u;
-    #endif
-
-    }
-#endif
+	    (void)derSz;
+
+        return NULL;
+    }
+
+    WOLFSSL_API WOLFSSL_X509* wolfSSL_PEM_read_X509(XFILE fp, WOLFSSL_X509 **x,
+                                                    pem_password_cb *cb, void *u)
+    {
+        return (WOLFSSL_X509* )wolfSSL_PEM_read_X509_ex(fp, (void **)x, cb, u, CERT_TYPE);
+    }
+
+#if defined(HAVE_CRL)
+    WOLFSSL_API WOLFSSL_X509_CRL* wolfSSL_PEM_read_X509_CRL(XFILE fp, WOLFSSL_X509_CRL **crl,
+                                                    pem_password_cb *cb, void *u)
+    {
+        return (WOLFSSL_X509_CRL* )wolfSSL_PEM_read_X509_ex(fp, (void **)crl, cb, u, CRL_TYPE);
+    }
+#endif
+
+    int wolfSSL_PEM_write_X509(XFILE fp, WOLFSSL_X509* x)
+    {
+        int ret;
+        WOLFSSL_BIO* bio;
+
+        if (x == NULL)
+            return 0;
+
+        bio = wolfSSL_BIO_new(wolfSSL_BIO_s_file());
+        if (bio == NULL)
+            return 0;
+
+        if (wolfSSL_BIO_set_fp(bio, fp, BIO_NOCLOSE) != WOLFSSL_SUCCESS) {
+            wolfSSL_BIO_free(bio);
+            bio = NULL;
+        }
+
+        ret = wolfSSL_PEM_write_bio_X509(bio, x);
+
+        if (bio != NULL)
+            wolfSSL_BIO_free(bio);
+
+        return ret;
+    }
+#endif /* !NO_FILESYSTEM */
+
+    #define PEM_BEGIN              "-----BEGIN "
+    #define PEM_BEGIN_SZ           11
+    #define PEM_END                "-----END "
+    #define PEM_END_SZ             9
+    #define PEM_HDR_FIN            "-----"
+    #define PEM_HDR_FIN_SZ         5
+    #define PEM_HDR_FIN_EOL_NEWLINE   "-----\n"
+    #define PEM_HDR_FIN_EOL_NULL_TERM "-----\0"
+    #define PEM_HDR_FIN_EOL_SZ     6
+
+    int wolfSSL_PEM_read_bio(WOLFSSL_BIO* bio, char **name, char **header,
+                             unsigned char **data, long *len)
+    {
+        int ret = WOLFSSL_SUCCESS;
+        char pem[256];
+        int pemLen;
+        char* p;
+        char* nameStr = NULL;
+        int nameLen = 0;
+        char* headerStr = NULL;
+        int headerLen;
+        int headerFound = 0;
+        unsigned char* der = NULL;
+        word32 derLen = 0;
+
+        if (bio == NULL || name == NULL || header == NULL || data == NULL ||
+                                                                  len == NULL) {
+            return WOLFSSL_FAILURE;
+        }
+
+        /* Find header line. */
+        pem[sizeof(pem) - 1] = '\0';
+        while ((pemLen = wolfSSL_BIO_gets(bio, pem, sizeof(pem) - 1)) > 0) {
+            if (XSTRNCMP(pem, PEM_BEGIN, PEM_BEGIN_SZ) == 0)
+                break;
+        }
+        if (pemLen <= 0)
+            ret = WOLFSSL_FAILURE;
+        /* Have a header line. */
+        if (ret == WOLFSSL_SUCCESS) {
+            while (pem[pemLen - 1] == '\r' || pem[pemLen - 1] == '\n')
+                pemLen--;
+            pem[pemLen] = '\0';
+            if (XSTRNCMP(pem + pemLen - PEM_HDR_FIN_SZ, PEM_HDR_FIN,
+                                                         PEM_HDR_FIN_SZ) != 0) {
+                ret = WOLFSSL_FAILURE;
+            }
+        }
+
+        /* Get out name. */
+        if (ret == WOLFSSL_SUCCESS) {
+            nameLen = pemLen - PEM_BEGIN_SZ - PEM_HDR_FIN_SZ;
+            nameStr = (char*)XMALLOC(nameLen + 1, NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+            if (nameStr == NULL)
+                ret = WOLFSSL_FAILURE;
+        }
+        if (ret == WOLFSSL_SUCCESS) {
+            XSTRNCPY(nameStr, pem + PEM_BEGIN_SZ, nameLen);
+            nameStr[nameLen] = '\0';
+
+            /* Get header of PEM - encryption header. */
+            headerLen = 0;
+            while ((pemLen = wolfSSL_BIO_gets(bio, pem, sizeof(pem) - 1)) > 0) {
+                while (pemLen > 0 && (pem[pemLen - 1] == '\r' ||
+                                                     pem[pemLen - 1] == '\n')) {
+                    pemLen--;
+                }
+                pem[pemLen++] = '\n';
+                pem[pemLen] = '\0';
+
+                /* Header separator is a blank line. */
+                if (pem[0] == '\n') {
+                    headerFound = 1;
+                    break;
+                }
+
+                /* Didn't find a blank line - no header. */
+                if (XSTRNCMP(pem, PEM_END, PEM_END_SZ) == 0) {
+                    der = (unsigned char*)headerStr;
+                    derLen = headerLen;
+                    /* Empty header - empty string. */
+                    headerStr = (char*)XMALLOC(1, NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                    if (headerStr == NULL)
+                        ret = WOLFSSL_FAILURE;
+                    else
+                        headerStr[0] = '\0';
+                    break;
+                }
+
+                p = (char*)XREALLOC(headerStr, headerLen + pemLen + 1, NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (p == NULL) {
+                    ret = WOLFSSL_FAILURE;
+                    break;
+                }
+
+                headerStr = p;
+                XMEMCPY(headerStr + headerLen, pem, pemLen + 1);
+                headerLen += pemLen;
+            }
+            if (pemLen <= 0)
+                ret = WOLFSSL_FAILURE;
+        }
+
+        /* Get body of PEM - if there was a header */
+        if (ret == WOLFSSL_SUCCESS && headerFound) {
+            derLen = 0;
+            while ((pemLen = wolfSSL_BIO_gets(bio, pem, sizeof(pem) - 1)) > 0) {
+                while (pemLen > 0 && (pem[pemLen - 1] == '\r' ||
+                                                     pem[pemLen - 1] == '\n')) {
+                    pemLen--;
+                }
+                pem[pemLen++] = '\n';
+                pem[pemLen] = '\0';
+
+                if (XSTRNCMP(pem, PEM_END, PEM_END_SZ) == 0)
+                    break;
+
+                p = (char*)XREALLOC(der, derLen + pemLen + 1, NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (p == NULL) {
+                    ret = WOLFSSL_FAILURE;
+                    break;
+                }
+
+                der = (unsigned char*)p;
+                XMEMCPY(der + derLen, pem, pemLen + 1);
+                derLen += pemLen;
+            }
+            if (pemLen <= 0)
+                ret = WOLFSSL_FAILURE;
+        }
+
+        /* Check trailer. */
+        if (ret == WOLFSSL_SUCCESS) {
+            if (XSTRNCMP(pem + PEM_END_SZ, nameStr, nameLen) != 0)
+                ret = WOLFSSL_FAILURE;
+        }
+        if (ret == WOLFSSL_SUCCESS) {
+            if (XSTRNCMP(pem + PEM_END_SZ + nameLen,
+                    PEM_HDR_FIN_EOL_NEWLINE,
+                    PEM_HDR_FIN_EOL_SZ) != 0 &&
+                XSTRNCMP(pem + PEM_END_SZ + nameLen,
+                        PEM_HDR_FIN_EOL_NULL_TERM,
+                        PEM_HDR_FIN_EOL_SZ) != 0) {
+                ret = WOLFSSL_FAILURE;
+            }
+        }
+
+        /* Base64 decode body. */
+        if (ret == WOLFSSL_SUCCESS) {
+            if (Base64_Decode(der, derLen, der, &derLen) != 0)
+                ret = WOLFSSL_FAILURE;
+        }
+
+        if (ret == WOLFSSL_SUCCESS) {
+            *name = nameStr;
+            *header = headerStr;
+            *data = der;
+            *len = derLen;
+            nameStr = NULL;
+            headerStr = NULL;
+            der = NULL;
+        }
+
+        if (nameStr != NULL)
+            XFREE(nameStr, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (headerStr != NULL)
+            XFREE(headerStr, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (der != NULL)
+            XFREE(der, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+        return ret;
+    }
+
+    int wolfSSL_PEM_write_bio(WOLFSSL_BIO* bio, const char *name,
+                              const char *header, const unsigned char *data,
+                              long len)
+    {
+        int err = 0;
+        int outSz = 0;
+        int nameLen;
+        int headerLen;
+        byte* pem = NULL;
+        word32 pemLen;
+        word32 derLen = (word32)len;
+
+        if (bio == NULL || name == NULL || header == NULL || data == NULL)
+            return 0;
+
+        nameLen = (int)XSTRLEN(name);
+        headerLen = (int)XSTRLEN(header);
+
+        pemLen = (derLen + 2) / 3 * 4;
+        pemLen += (pemLen + 63) / 64;
+
+        pem = (byte*)XMALLOC(pemLen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        err = pem == NULL;
+        if (!err)
+            err = Base64_Encode(data, derLen, pem, &pemLen) != 0;
+
+        if (!err) {
+            err = wolfSSL_BIO_write(bio, PEM_BEGIN, PEM_BEGIN_SZ) !=
+                                                              (int)PEM_BEGIN_SZ;
+        }
+        if (!err)
+            err = wolfSSL_BIO_write(bio, name, nameLen) != nameLen;
+        if (!err) {
+            err = wolfSSL_BIO_write(bio, PEM_HDR_FIN_EOL_NEWLINE,
+                    PEM_HDR_FIN_EOL_SZ) != (int)PEM_HDR_FIN_EOL_SZ;
+        }
+        if (!err && headerLen > 0) {
+            err = wolfSSL_BIO_write(bio, header, headerLen) != headerLen;
+            /* Blank line after a header and before body. */
+            if (!err)
+                err = wolfSSL_BIO_write(bio, "\n", 1) != 1;
+            headerLen++;
+        }
+        if (!err)
+            err = wolfSSL_BIO_write(bio, pem, pemLen) != (int)pemLen;
+        if (!err)
+            err = wolfSSL_BIO_write(bio, PEM_END, PEM_END_SZ) !=
+                                                                (int)PEM_END_SZ;
+        if (!err)
+            err = wolfSSL_BIO_write(bio, name, nameLen) != nameLen;
+        if (!err) {
+            err = wolfSSL_BIO_write(bio, PEM_HDR_FIN_EOL_NEWLINE,
+                    PEM_HDR_FIN_EOL_SZ) != (int)PEM_HDR_FIN_EOL_SZ;
+        }
+
+        if (!err) {
+            outSz = PEM_BEGIN_SZ + nameLen + PEM_HDR_FIN_EOL_SZ + headerLen +
+                             pemLen + PEM_END_SZ + nameLen + PEM_HDR_FIN_EOL_SZ;
+        }
+
+        if (pem != NULL)
+            XFREE(pem, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+        return outSz;
+    }
+
+#if !defined(NO_FILESYSTEM)
+    int wolfSSL_PEM_read(XFILE fp, char **name, char **header,
+                         unsigned char **data, long *len)
+    {
+        int ret;
+        WOLFSSL_BIO* bio;
+
+        if (name == NULL || header == NULL || data == NULL || len == NULL)
+            return WOLFSSL_FAILURE;
+
+        bio = wolfSSL_BIO_new(wolfSSL_BIO_s_file());
+        if (bio == NULL)
+            return 0;
+
+        if (wolfSSL_BIO_set_fp(bio, fp, BIO_NOCLOSE) != WOLFSSL_SUCCESS) {
+            wolfSSL_BIO_free(bio);
+            bio = NULL;
+        }
+
+        ret = wolfSSL_PEM_read_bio(bio, name, header, data, len);
+
+        if (bio != NULL)
+            wolfSSL_BIO_free(bio);
+
+        return ret;
+    }
+
+    int wolfSSL_PEM_write(XFILE fp, const char *name, const char *header,
+                          const unsigned char *data, long len)
+    {
+        int ret;
+        WOLFSSL_BIO* bio;
+
+        if (name == NULL || header == NULL || data == NULL)
+            return 0;
+
+        bio = wolfSSL_BIO_new(wolfSSL_BIO_s_file());
+        if (bio == NULL)
+            return 0;
+
+        if (wolfSSL_BIO_set_fp(bio, fp, BIO_NOCLOSE) != WOLFSSL_SUCCESS) {
+            wolfSSL_BIO_free(bio);
+            bio = NULL;
+        }
+
+        ret = wolfSSL_PEM_write_bio(bio, name, header, data, len);
+
+        if (bio != NULL)
+            wolfSSL_BIO_free(bio);
+
+        return ret;
+    }
+#endif
+
+    int wolfSSL_PEM_get_EVP_CIPHER_INFO(char* header, EncryptedInfo* cipher)
+    {
+        if (header == NULL || cipher == NULL)
+            return WOLFSSL_FAILURE;
+
+        XMEMSET(cipher, 0, sizeof(*cipher));
+
+        if (wc_EncryptedInfoParse(cipher, &header, XSTRLEN(header)) != 0)
+            return WOLFSSL_FAILURE;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    int wolfSSL_PEM_do_header(EncryptedInfo* cipher, unsigned char* data,
+                              long* len, pem_password_cb* callback, void* ctx)
+    {
+        int ret = WOLFSSL_SUCCESS;
+        char password[NAME_SZ];
+        int passwordSz;
+
+        if (cipher == NULL || data == NULL || len == NULL || callback == NULL)
+            return WOLFSSL_FAILURE;
+
+        passwordSz = callback(password, sizeof(password), PEM_PASS_READ, ctx);
+        if (passwordSz < 0)
+            ret = WOLFSSL_FAILURE;
+
+        if (ret == WOLFSSL_SUCCESS) {
+            if (wc_BufferKeyDecrypt(cipher, data, (word32)*len, (byte*)password,
+                                                     passwordSz, WC_MD5) != 0) {
+                ret = WOLFSSL_FAILURE;
+            }
+        }
+
+        if (passwordSz > 0)
+            XMEMSET(password, 0, passwordSz);
+
+        return ret;
+    }
 
     /*
      * bp : bio to read X509 from
@@ -29103,13 +37493,152 @@
         /* AUX info is; trusted/rejected uses, friendly name, private key id,
          * and potentially a stack of "other" info. wolfSSL does not store
          * friendly name or private key id yet in WOLFSSL_X509 for human
-         * readibility and does not support extra trusted/rejected uses for
+         * readability and does not support extra trusted/rejected uses for
          * root CA. */
         return wolfSSL_PEM_read_bio_X509(bp, x, cb, u);
     }
 
+
+#ifdef OPENSSL_ALL
+    /* create and return a new WOLFSSL_X509_PKEY structure or NULL on failure */
+    static WOLFSSL_X509_PKEY* wolfSSL_X509_PKEY_new(void* heap)
+    {
+        WOLFSSL_X509_PKEY* ret;
+
+        ret = (WOLFSSL_X509_PKEY*)XMALLOC(sizeof(WOLFSSL_X509_PKEY), heap,
+            DYNAMIC_TYPE_KEY);
+        if (ret != NULL) {
+            XMEMSET(ret, 0, sizeof(WOLFSSL_X509_PKEY));
+            ret->heap = heap;
+        }
+        return ret;
+    }
+
+
+    /* sets the values of X509_PKEY based on certificate passed in
+     * return WOLFSSL_SUCCESS on success */
+    static int wolfSSL_X509_PKEY_set(WOLFSSL_X509_PKEY* xPkey,
+            WOLFSSL_X509* x509)
+    {
+        if (xPkey == NULL || x509 == NULL) {
+            return BAD_FUNC_ARG;
+        }
+        wolfSSL_EVP_PKEY_free(xPkey->dec_pkey);
+        xPkey->dec_pkey = wolfSSL_X509_get_pubkey(x509);
+        if (xPkey->dec_pkey == NULL) {
+            return WOLFSSL_FAILURE;
+        }
+        return WOLFSSL_SUCCESS;
+    }
+
+
+    /* free up all memory used by "xPkey" passed in */
+    static void wolfSSL_X509_PKEY_free(WOLFSSL_X509_PKEY* xPkey)
+    {
+        if (xPkey != NULL) {
+            wolfSSL_EVP_PKEY_free(xPkey->dec_pkey);
+        }
+        XFREE(xPkey, xPkey->heap, DYNAMIC_TYPE_KEY);
+    }
+
+
+    /* Takes control of x509 on success
+     * helper function to break out code needed to set WOLFSSL_X509_INFO up
+     * free's "info" passed in if is not defaults
+     *
+     * returns WOLFSSL_SUCCESS on success
+     */
+    static int wolfSSL_X509_INFO_set(WOLFSSL_X509_INFO* info,
+            WOLFSSL_X509* x509)
+    {
+        if (info == NULL || x509 == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+        /* check is fresh "info" passed in, if not free it */
+        if (info->x509 != NULL || info->x_pkey != NULL) {
+            WOLFSSL_X509_INFO* tmp;
+
+            tmp = wolfSSL_X509_INFO_new();
+            if (tmp == NULL) {
+                WOLFSSL_MSG("Unable to create new structure");
+                return MEMORY_E;
+            }
+            wolfSSL_X509_INFO_free(info);
+            info = tmp;
+        }
+
+        info->x509 = x509;
+
+        //@TODO info->num
+        //@TODO info->enc_cipher
+        //@TODO info->enc_len
+        //@TODO info->enc_data
+        //@TODO info->crl
+
+        info->x_pkey = wolfSSL_X509_PKEY_new(x509->heap);
+        return wolfSSL_X509_PKEY_set(info->x_pkey, x509);
+    }
+
+
+    /*
+     * bio WOLFSSL_BIO to read certificates from
+     * sk  possible stack to push more X509_INFO structs to. Can be NULL
+     * cb  callback password for encrypted PEM certificates
+     * u   user input such as password
+     *
+     * returns stack on success and NULL or default stack passed in on fail
+     */
+    WOLF_STACK_OF(WOLFSSL_X509_INFO)* wolfSSL_PEM_X509_INFO_read_bio(
+        WOLFSSL_BIO* bio, WOLF_STACK_OF(WOLFSSL_X509_INFO)* sk,
+        pem_password_cb* cb, void* u)
+    {
+        WOLF_STACK_OF(WOLFSSL_X509_INFO)* localSk;
+        WOLFSSL_X509* x509 = NULL;
+        int ret = WOLFSSL_SUCCESS;
+
+        WOLFSSL_ENTER("wolfSSL_PEM_X509_INFO_read_bio");
+
+        /* attempt to used passed in stack or create a new one */
+        if (sk != NULL) {
+            localSk = sk;
+        }
+        else {
+            localSk = wolfSSL_sk_X509_INFO_new_null();
+        }
+        if (localSk == NULL) {
+            WOLFSSL_LEAVE("wolfSSL_PEM_X509_INFO_read_bio", MEMORY_E);
+            return NULL;
+        }
+
+        /* parse through BIO and push new info's found onto stack */
+        do {
+            x509 = wolfSSL_PEM_read_bio_X509(bio, NULL, cb, u);
+            if (x509 != NULL) {
+                WOLFSSL_X509_INFO* current;
+
+                current = wolfSSL_X509_INFO_new();
+                if (current == NULL) {
+                    WOLFSSL_LEAVE("wolfSSL_PEM_X509_INFO_read_bio", MEMORY_E);
+                    return NULL;
+                }
+                ret = wolfSSL_X509_INFO_set(current, x509);
+                if (ret  != WOLFSSL_SUCCESS) {
+                    wolfSSL_X509_free(x509);
+                }
+                else {
+                    wolfSSL_sk_X509_INFO_push(localSk, current);
+                }
+            }
+        } while (x509 != NULL && ret == WOLFSSL_SUCCESS);
+        WOLFSSL_LEAVE("wolfSSL_PEM_X509_INFO_read_bio", ret);
+        return localSk;
+    }
+#endif /* OPENSSL_ALL */
+
     void wolfSSL_X509_NAME_ENTRY_free(WOLFSSL_X509_NAME_ENTRY* ne)
     {
+        WOLFSSL_ENTER("wolfSSL_X509_NAME_ENTRY_free");
         if (ne != NULL) {
             if (ne->value != NULL && ne->value != &(ne->data)) {
                 wolfSSL_ASN1_STRING_free(ne->value);
@@ -29121,7 +37650,7 @@
 
     WOLFSSL_X509_NAME_ENTRY* wolfSSL_X509_NAME_ENTRY_new(void)
     {
-        WOLFSSL_X509_NAME_ENTRY* ne = NULL;
+        WOLFSSL_X509_NAME_ENTRY* ne;
 
         ne = (WOLFSSL_X509_NAME_ENTRY*)XMALLOC(sizeof(WOLFSSL_X509_NAME_ENTRY),
                 NULL, DYNAMIC_TYPE_NAME_ENTRY);
@@ -29134,11 +37663,54 @@
     }
 
 
+    /* Create a new WOLFSSL_X509_NAME_ENTRY structure based on the text passed
+     * in. Returns NULL on failure */
+    WOLFSSL_X509_NAME_ENTRY* wolfSSL_X509_NAME_ENTRY_create_by_txt(
+            WOLFSSL_X509_NAME_ENTRY **neIn, const char *txt, int type,
+            const unsigned char *data, int dataSz)
+    {
+        int nid = -1;
+        WOLFSSL_X509_NAME_ENTRY* ne = NULL;
+
+        WOLFSSL_ENTER("wolfSSL_X509_NAME_ENTRY_create_by_txt()");
+
+        if (txt == NULL) {
+            return NULL;
+        }
+
+        if (neIn != NULL) {
+            ne = *neIn;
+        }
+
+        nid = wolfSSL_OBJ_txt2nid(txt);
+        if (nid == NID_undef) {
+            WOLFSSL_MSG("Unable to find text");
+            ne = NULL;
+        }
+        else {
+            if (ne == NULL) {
+                ne = wolfSSL_X509_NAME_ENTRY_new();
+                if (ne == NULL) {
+                    return NULL;
+                }
+            }
+            ne->nid = nid;
+            ne->value = wolfSSL_ASN1_STRING_type_new(type);
+            if (ne->value != NULL) {
+                wolfSSL_ASN1_STRING_set(ne->value, (const void*)data, dataSz);
+                ne->set = 1;
+            }
+        }
+
+        return ne;
+    }
+
+
     WOLFSSL_X509_NAME_ENTRY* wolfSSL_X509_NAME_ENTRY_create_by_NID(
             WOLFSSL_X509_NAME_ENTRY** out, int nid, int type,
-            unsigned char* data, int dataSz)
-    {
-        WOLFSSL_X509_NAME_ENTRY* ne = NULL;
+            const unsigned char* data, int dataSz)
+    {
+        WOLFSSL_X509_NAME_ENTRY* ne;
 
         WOLFSSL_ENTER("wolfSSL_X509_NAME_ENTRY_create_by_NID()");
 
@@ -29159,6 +37731,193 @@
         return ne;
     }
 
+    static int RebuildFullNameAdd(DecodedName* dName, char* data)
+    {
+        int totalLen = 0;
+        int i;
+        char* fullName;
+        int idx;
+
+        if (dName->cnLen != 0)
+            totalLen += dName->cnLen + 4;
+        if (dName->snLen != 0)
+            totalLen += dName->snLen + 4;
+        if (dName->cLen != 0)
+            totalLen += dName->cLen + 3;
+        if (dName->lLen != 0)
+            totalLen += dName->lLen + 3;
+        if (dName->stLen != 0)
+            totalLen += dName->stLen + 4;
+        if (dName->oLen != 0)
+            totalLen += dName->oLen + 3;
+        if (dName->ouLen != 0)
+            totalLen += dName->ouLen + 4;
+        if (dName->emailLen != 0)
+            totalLen += dName->emailLen + 14;
+        if (dName->uidLen != 0)
+            totalLen += dName->uidLen + 5;
+        if (dName->serialLen != 0)
+            totalLen += dName->serialLen + 14;
+        if (dName->dcNum != 0) {
+            for (i = 0; i < dName->dcNum; i++)
+                totalLen += dName->dcLen[i] + 4;
+        }
+
+        fullName = (char*)XMALLOC(totalLen + 1, NULL, DYNAMIC_TYPE_X509);
+        if (fullName == NULL)
+            return MEMORY_E;
+
+        idx = 0;
+        dName->entryCount = 0;
+        if (dName->cnLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_COMMON_NAME, 4);
+            idx += 4;
+            if (dName->cnIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->cnLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->cnIdx,
+                                                                  dName->cnLen);
+            }
+            dName->cnIdx = idx;
+            idx += dName->cnLen;
+        }
+        if (dName->snLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_SUR_NAME, 4);
+            idx += 4;
+            if (dName->snIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->snLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->snIdx,
+                                                                  dName->snLen);
+            }
+            dName->snIdx = idx;
+            idx += dName->snLen;
+        }
+        if (dName->cLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_COUNTRY_NAME, 3);
+            idx += 3;
+            if (dName->cIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->cLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->cIdx,
+                                                                   dName->cLen);
+            }
+            dName->cIdx = idx;
+            idx += dName->cLen;
+        }
+        if (dName->lLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_LOCALITY_NAME, 3);
+            idx += 3;
+            if (dName->lIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->lLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->lIdx,
+                                                                   dName->lLen);
+            }
+            dName->lIdx = idx;
+            idx += dName->lLen;
+        }
+        if (dName->stLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_STATE_NAME, 4);
+            idx += 4;
+            if (dName->stIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->stLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->stIdx,
+                                                                  dName->stLen);
+            }
+            dName->stIdx = idx;
+            idx += dName->stLen;
+        }
+        if (dName->oLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_ORG_NAME, 3);
+            idx += 3;
+            if (dName->oIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->oLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->oIdx,
+                                                                   dName->oLen);
+            }
+            dName->oIdx = idx;
+            idx += dName->oLen;
+        }
+        if (dName->ouLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_ORGUNIT_NAME, 4);
+            idx += 4;
+            if (dName->ouIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->ouLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->ouIdx,
+                                                                  dName->ouLen);
+            }
+            dName->ouIdx = idx;
+            idx += dName->ouLen;
+        }
+        if (dName->emailLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, "/emailAddress=", 14);
+            idx += 14;
+            if (dName->emailIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->emailLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->emailIdx,
+                                                               dName->emailLen);
+            }
+            dName->emailIdx = idx;
+            idx += dName->emailLen;
+        }
+        if (dName->dcNum != 0) {
+            for (i = 0; i < dName->dcNum; i++) {
+                dName->entryCount++;
+                XMEMCPY(fullName + idx, WOLFSSL_DOMAIN_COMPONENT, 4);
+                idx += 4;
+                XMEMCPY(fullName + idx, dName->fullName + dName->dcIdx[i],
+                                                               dName->dcLen[i]);
+                dName->dcIdx[i] = idx;
+                idx += dName->dcLen[i];
+            }
+        }
+        if (dName->uidLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, "/UID=", 5);
+            idx += 5;
+            if (dName->uidIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->uidLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->uidIdx,
+                                                                 dName->uidLen);
+            }
+            dName->uidIdx = idx;
+            idx += dName->uidLen;
+        }
+        if (dName->serialLen != 0) {
+            dName->entryCount++;
+            XMEMCPY(fullName + idx, WOLFSSL_SERIAL_NUMBER, 14);
+            idx += 14;
+            if (dName->serialIdx == -1)
+                XMEMCPY(fullName + idx, data, dName->serialLen);
+            else {
+                XMEMCPY(fullName + idx, dName->fullName + dName->serialIdx,
+                                                              dName->serialLen);
+            }
+            dName->serialIdx = idx;
+            idx += dName->serialLen;
+        }
+
+        if (dName->fullName != NULL)
+            XFREE(dName->fullName, NULL, DYNAMIC_TYPE_X509);
+        dName->fullName = fullName;
+        dName->fullNameLen = idx + 1;
+
+        return 0;
+    }
 
     /* Copies entry into name. With it being copied freeing entry becomes the
      * callers responsibility.
@@ -29167,46 +37926,180 @@
             WOLFSSL_X509_NAME_ENTRY* entry, int idx, int set)
     {
         int i;
+        int fullName = 1;
 
         WOLFSSL_ENTER("wolfSSL_X509_NAME_add_entry()");
 
-        for (i = 0; i < MAX_NAME_ENTRIES; i++) {
-            if (name->extra[i].set != 1) { /* not set so overwrited */
-                WOLFSSL_X509_NAME_ENTRY* current = &(name->extra[i]);
-                WOLFSSL_ASN1_STRING*     str;
-
-                WOLFSSL_MSG("Found place for name entry");
-
-                XMEMCPY(current, entry, sizeof(WOLFSSL_X509_NAME_ENTRY));
-                str = entry->value;
-                XMEMCPY(&(current->data), str, sizeof(WOLFSSL_ASN1_STRING));
-                current->value = &(current->data);
-                current->data.data = (char*)XMALLOC(str->length,
-                       name->x509->heap, DYNAMIC_TYPE_OPENSSL);
-
-                if (current->data.data == NULL) {
-                    return SSL_FAILURE;
-                }
-                XMEMCPY(current->data.data, str->data, str->length);
-
-                /* make sure is null terminated */
-                current->data.data[str->length - 1] = '\0';
-
-                current->set = 1; /* make sure now listed as set */
-                break;
-            }
-        }
-
-        if (i == MAX_NAME_ENTRIES) {
-            WOLFSSL_MSG("No spot found for name entry");
-            return SSL_FAILURE;
+        switch (entry->nid) {
+            case ASN_COMMON_NAME:
+                name->fullName.cnIdx = -1;
+                name->fullName.cnLen = entry->value->length;
+                name->fullName.cnNid = entry->nid;
+                break;
+            case ASN_SUR_NAME:
+                name->fullName.snIdx = -1;
+                name->fullName.snLen = entry->value->length;
+                name->fullName.snNid = entry->nid;
+                break;
+            case ASN_SERIAL_NUMBER:
+                name->fullName.serialIdx = -1;
+                name->fullName.serialLen = entry->value->length;
+                name->fullName.serialNid = entry->nid;
+                break;
+            case ASN_COUNTRY_NAME:
+                name->fullName.cIdx = -1;
+                name->fullName.cLen = entry->value->length;
+                name->fullName.cNid = entry->nid;
+                break;
+            case ASN_LOCALITY_NAME:
+                name->fullName.lIdx = -1;
+                name->fullName.lLen = entry->value->length;
+                name->fullName.lNid = entry->nid;
+                break;
+            case ASN_STATE_NAME:
+                name->fullName.stIdx = -1;
+                name->fullName.stLen = entry->value->length;
+                name->fullName.stNid = entry->nid;
+                break;
+            case ASN_ORG_NAME:
+                name->fullName.oIdx = -1;
+                name->fullName.oLen = entry->value->length;
+                name->fullName.oNid = entry->nid;
+                break;
+            case ASN_ORGUNIT_NAME:
+                name->fullName.ouIdx = -1;
+                name->fullName.ouLen = entry->value->length;
+                name->fullName.ouNid = entry->nid;
+                break;
+            case NID_emailAddress:
+                name->fullName.emailIdx = -1;
+                name->fullName.emailLen = entry->value->length;
+                name->fullName.emailNid = entry->nid;
+                break;
+            case ASN_USER_ID:
+                name->fullName.uidIdx = -1;
+                name->fullName.uidLen = entry->value->length;
+                name->fullName.uidNid = entry->nid;
+                break;
+            case ASN_DOMAIN_COMPONENT:
+                name->fullName.dcIdx[0] = -1;
+                name->fullName.dcLen[0] = entry->value->length;
+                break;
+            default:
+                fullName = 0;
+                break;
+        }
+
+        if (fullName) {
+            int nid = entry->nid;
+
+            if (nid == NID_emailAddress) {
+                nid = (int)ASN_EMAIL_NAME;
+            }
+
+            if (idx >= DN_NAMES_MAX + DOMAIN_COMPONENT_MAX) {
+                return WOLFSSL_FAILURE;
+            }
+
+            if (idx >= 0) {
+                name->fullName.loc[idx] = nid;
+                if (idx == name->fullName.locSz) {
+                    name->fullName.locSz += 1;
+                }
+            }
+
+            /* place at end */
+            if (idx < 0 && name->fullName.locSz + 1
+                    < DN_NAMES_MAX + DOMAIN_COMPONENT_MAX) {
+                name->fullName.loc[name->fullName.locSz] = nid;
+                name->fullName.locSz += 1;
+            }
+
+            if (RebuildFullNameAdd(&name->fullName, entry->value->data) != 0)
+                return WOLFSSL_FAILURE;
+        }
+        else {
+            for (i = 0; i < MAX_NAME_ENTRIES; i++) {
+                if (name->extra[i].set != 1) { /* not set so overwritten */
+                    WOLFSSL_X509_NAME_ENTRY* current = &(name->extra[i]);
+                    WOLFSSL_ASN1_STRING*     str;
+
+                    WOLFSSL_MSG("Found place for name entry");
+
+                    XMEMCPY(current, entry, sizeof(WOLFSSL_X509_NAME_ENTRY));
+                    str = entry->value;
+                    XMEMCPY(&(current->data), str, sizeof(WOLFSSL_ASN1_STRING));
+                    current->value = &(current->data);
+                    current->data.data = (char*)XMALLOC(str->length,
+                           name->x509->heap, DYNAMIC_TYPE_OPENSSL);
+
+                    if (current->data.data == NULL) {
+                        return SSL_FAILURE;
+                    }
+                    XMEMCPY(current->data.data, str->data, str->length);
+
+                    /* make sure is null terminated */
+                    current->data.data[str->length - 1] = '\0';
+
+                    current->set = 1; /* make sure now listed as set */
+                    break;
+                }
+            }
+
+            if (i == MAX_NAME_ENTRIES) {
+                WOLFSSL_MSG("No spot found for name entry");
+                return SSL_FAILURE;
+            }
         }
 
         (void)idx;
         (void)set;
         return SSL_SUCCESS;
     }
-    #endif /* ifndef NO_CERTS */
+
+    int wolfSSL_X509_NAME_add_entry_by_txt(WOLFSSL_X509_NAME *name,
+                                           const char *field, int type,
+                                           const unsigned char *bytes, int len,
+                                           int loc, int set)
+    {
+        int ret = WOLFSSL_FAILURE;
+        int nid;
+        WOLFSSL_X509_NAME_ENTRY* entry;
+
+        (void)type;
+        WOLFSSL_ENTER("wolfSSL_X509_NAME_add_entry_by_txt");
+
+        if (name == NULL || field == NULL)
+            return WOLFSSL_FAILURE;
+
+        if ((nid = wolfSSL_OBJ_txt2nid(field)) == NID_undef)
+            return WOLFSSL_FAILURE;
+
+        entry = wolfSSL_X509_NAME_ENTRY_create_by_NID(NULL,
+                  nid, type, (unsigned char*)bytes, len);
+        if (entry == NULL)
+            return WOLFSSL_FAILURE;
+
+        ret = wolfSSL_X509_NAME_add_entry(name, entry, loc, set);
+        wolfSSL_X509_NAME_ENTRY_free(entry);
+
+        return ret;
+    }
+
+    int wolfSSL_X509_NAME_add_entry_by_NID(WOLFSSL_X509_NAME *name, int nid,
+                                           int type, const unsigned char *bytes,
+                                           int len, int loc, int set)
+    {
+        int ret;
+        WOLFSSL_X509_NAME_ENTRY* entry;
+        entry = wolfSSL_X509_NAME_ENTRY_create_by_NID(NULL, nid, type, bytes, len);
+        if (entry == NULL)
+            return WOLFSSL_FAILURE;
+        ret = wolfSSL_X509_NAME_add_entry(name, entry, loc, set);
+        wolfSSL_X509_NAME_ENTRY_free(entry);
+        return ret;
+    }
+    #endif /* !NO_CERTS */
 
 
     /* NID variables are dependent on compatibility header files currently
@@ -29214,410 +38107,43 @@
      * returns a pointer to a new WOLFSSL_ASN1_OBJECT struct on success and NULL
      *         on fail
      */
+
     WOLFSSL_ASN1_OBJECT* wolfSSL_OBJ_nid2obj(int id)
     {
+        return wolfSSL_OBJ_nid2obj_ex(id, NULL);
+    }
+
+
+    WOLFSSL_LOCAL WOLFSSL_ASN1_OBJECT* wolfSSL_OBJ_nid2obj_ex(int id,
+                                                WOLFSSL_ASN1_OBJECT* arg_obj)
+    {
         word32 oidSz = 0;
         const byte* oid;
         word32 type = 0;
-        WOLFSSL_ASN1_OBJECT* obj;
+        WOLFSSL_ASN1_OBJECT* obj = arg_obj;
         byte objBuf[MAX_OID_SZ + MAX_LENGTH_SZ + 1]; /* +1 for object tag */
         word32 objSz = 0;
-        const char* sName;
+        const char* sName = NULL;
+        int i;
 
         WOLFSSL_ENTER("wolfSSL_OBJ_nid2obj()");
 
-        /* get OID type */
-        switch (id) {
-            /* oidHashType */
-        #ifdef WOLFSSL_MD2
-            case NID_md2:
-                id = MD2h;
-                type = oidHashType;
-                sName = "md2";
-                break;
-        #endif
-        #ifndef NO_MD5
-            case NID_md5:
-                id = MD5h;
-                type = oidHashType;
-                sName = "md5";
-                break;
-        #endif
-        #ifndef NO_SHA
-            case NID_sha1:
-                id = SHAh;
-                type = oidHashType;
-                sName = "sha";
-                break;
-        #endif
-            case NID_sha224:
-                id = SHA224h;
-                type = oidHashType;
-                sName = "sha224";
-                break;
-        #ifndef NO_SHA256
-            case NID_sha256:
-                id = SHA256h;
-                type = oidHashType;
-                sName = "sha256";
-                break;
-        #endif
-        #ifdef WOLFSSL_SHA384
-            case NID_sha384:
-                id = SHA384h;
-                type = oidHashType;
-                sName = "sha384";
-                break;
-        #endif
-        #ifdef WOLFSSL_SHA512
-            case NID_sha512:
-                id = SHA512h;
-                type = oidHashType;
-                sName = "sha512";
-                break;
-        #endif
-
-            /*  oidSigType */
-        #ifndef NO_DSA
-            case CTC_SHAwDSA:
-                sName = "shaWithDSA";
-                type = oidSigType;
-                break;
-
-        #endif /* NO_DSA */
-        #ifndef NO_RSA
-            case CTC_MD2wRSA:
-                sName = "md2WithRSA";
-                type = oidSigType;
-                break;
-
-        #ifndef NO_MD5
-            case CTC_MD5wRSA:
-                sName = "md5WithRSA";
-                type = oidSigType;
-                break;
-        #endif
-
-            case CTC_SHAwRSA:
-                sName = "shaWithRSA";
-                type = oidSigType;
-                break;
-
-        #ifdef WOLFSSL_SHA224
-            case CTC_SHA224wRSA:
-                sName = "sha224WithRSA";
-                type = oidSigType;
-                break;
-        #endif
-
-        #ifndef NO_SHA256
-            case CTC_SHA256wRSA:
-                sName = "sha256WithRSA";
-                type = oidSigType;
-                break;
-        #endif
-
-        #ifdef WOLFSSL_SHA384
-            case CTC_SHA384wRSA:
-                sName = "sha384WithRSA";
-                type = oidSigType;
-                break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
-            case CTC_SHA512wRSA:
-                sName = "sha512WithRSA";
-                type = oidSigType;
-                break;
-        #endif
-        #endif /* NO_RSA */
-        #ifdef HAVE_ECC
-            case CTC_SHAwECDSA:
-                sName = "shaWithECDSA";
-                type = oidSigType;
-                break;
-
-            case CTC_SHA224wECDSA:
-                sName = "sha224WithECDSA";
-                type = oidSigType;
-                break;
-
-            case CTC_SHA256wECDSA:
-                sName = "sha256WithECDSA";
-                type = oidSigType;
-                break;
-
-            case CTC_SHA384wECDSA:
-                sName = "sha384WithECDSA";
-                type = oidSigType;
-                break;
-
-            case CTC_SHA512wECDSA:
-                sName = "sha512WithECDSA";
-                type = oidSigType;
-                break;
-        #endif /* HAVE_ECC */
-
-            /* oidKeyType */
-        #ifndef NO_DSA
-            case DSAk:
-                sName = "DSA key";
-                type = oidKeyType;
-                break;
-        #endif /* NO_DSA */
-        #ifndef NO_RSA
-            case RSAk:
-                sName = "RSA key";
-                type = oidKeyType;
-                break;
-        #endif /* NO_RSA */
-        #ifdef HAVE_NTRU
-            case NTRUk:
-                sName = "NTRU key";
-                type = oidKeyType;
-                break;
-        #endif /* HAVE_NTRU */
-        #ifdef HAVE_ECC
-            case ECDSAk:
-                sName = "ECDSA key";
-                type = oidKeyType;
-                break;
-        #endif /* HAVE_ECC */
-
-            /* oidBlkType */
-        #ifdef WOLFSSL_AES_128
-            case AES128CBCb:
-                sName = "AES-128-CBC";
-                type = oidBlkType;
-                break;
-        #endif
-        #ifdef WOLFSSL_AES_192
-            case AES192CBCb:
-                sName = "AES-192-CBC";
-                type = oidBlkType;
-                break;
-        #endif
-
-        #ifdef WOLFSSL_AES_256
-            case AES256CBCb:
-                sName = "AES-256-CBC";
-                type = oidBlkType;
-                break;
-        #endif
-
-        #ifndef NO_DES3
-            case NID_des:
-                id = DESb;
-                sName = "DES-CBC";
-                type = oidBlkType;
-                break;
-
-            case NID_des3:
-                id = DES3b;
-                sName = "DES3-CBC";
-                type = oidBlkType;
-                break;
-        #endif /* !NO_DES3 */
-
-        #ifdef HAVE_OCSP
-            case NID_id_pkix_OCSP_basic:
-                id = OCSP_BASIC_OID;
-                sName = "OCSP_basic";
-                type = oidOcspType;
-                break;
-
-            case OCSP_NONCE_OID:
-                sName = "OCSP_nonce";
-                type = oidOcspType;
-                break;
-        #endif /* HAVE_OCSP */
-
-            /* oidCertExtType */
-            case BASIC_CA_OID:
-                sName = "X509 basic ca";
-                type = oidCertExtType;
-                break;
-
-            case ALT_NAMES_OID:
-                sName = "X509 alt names";
-                type = oidCertExtType;
-                break;
-
-            case CRL_DIST_OID:
-                sName = "X509 crl";
-                type = oidCertExtType;
-                break;
-
-            case AUTH_INFO_OID:
-                sName = "X509 auth info";
-                type = oidCertExtType;
-                break;
-
-            case AUTH_KEY_OID:
-                sName = "X509 auth key";
-                type = oidCertExtType;
-                break;
-
-            case SUBJ_KEY_OID:
-                sName = "X509 subject key";
-                type = oidCertExtType;
-                break;
-
-            case KEY_USAGE_OID:
-                sName = "X509 key usage";
-                type = oidCertExtType;
-                break;
-
-            case INHIBIT_ANY_OID:
-                id = INHIBIT_ANY_OID;
-                sName = "X509 inhibit any";
-                type = oidCertExtType;
-                break;
-
-            case NID_ext_key_usage:
-                id = KEY_USAGE_OID;
-                sName = "X509 ext key usage";
-                type = oidCertExtType;
-                break;
-
-            case NID_name_constraints:
-                id = NAME_CONS_OID;
-                sName = "X509 name constraints";
-                type = oidCertExtType;
-                break;
-
-            case NID_certificate_policies:
-                id = CERT_POLICY_OID;
-                sName = "X509 certificate policies";
-                type = oidCertExtType;
-                break;
-
-            /* oidCertAuthInfoType */
-            case AIA_OCSP_OID:
-                sName = "Cert Auth OCSP";
-                type = oidCertAuthInfoType;
-                break;
-
-            case AIA_CA_ISSUER_OID:
-                sName = "Cert Auth CA Issuer";
-                type = oidCertAuthInfoType;
-                break;
-
-            /* oidCertPolicyType */
-            case NID_any_policy:
-                id = CP_ANY_OID;
-                sName = "Cert any policy";
-                type = oidCertPolicyType;
-                break;
-
-                /* oidCertAltNameType */
-            case NID_hw_name_oid:
-                id = HW_NAME_OID;
-                sName = "Hardware name";
-                type = oidCertAltNameType;
-                break;
-
-            /* oidCertKeyUseType */
-            case NID_anyExtendedKeyUsage:
-                id = EKU_ANY_OID;
-                sName = "Cert any extended key";
-                type = oidCertKeyUseType;
-                break;
-
-            case EKU_SERVER_AUTH_OID:
-                sName = "Cert server auth key";
-                type = oidCertKeyUseType;
-                break;
-
-            case EKU_CLIENT_AUTH_OID:
-                sName = "Cert client auth key";
-                type = oidCertKeyUseType;
-                break;
-
-            case EKU_OCSP_SIGN_OID:
-                sName = "Cert OCSP sign key";
-                type = oidCertKeyUseType;
-                break;
-
-            /* oidKdfType */
-            case PBKDF2_OID:
-                sName = "PBKDFv2";
-                type = oidKdfType;
-                break;
-
-                /* oidPBEType */
-            case PBE_SHA1_RC4_128:
-                sName = "PBE shaWithRC4-128";
-                type = oidPBEType;
-                break;
-
-            case PBE_SHA1_DES:
-                sName = "PBE shaWithDES";
-                type = oidPBEType;
-                break;
-
-            case PBE_SHA1_DES3:
-                sName = "PBE shaWithDES3";
-                type = oidPBEType;
-                break;
-
-                /* oidKeyWrapType */
-        #ifdef WOLFSSL_AES_128
-            case AES128_WRAP:
-                sName = "AES-128 wrap";
-                type = oidKeyWrapType;
-                break;
-        #endif
-
-        #ifdef WOLFSSL_AES_192
-            case AES192_WRAP:
-                sName = "AES-192 wrap";
-                type = oidKeyWrapType;
-                break;
-        #endif
-
-        #ifdef WOLFSSL_AES_256
-            case AES256_WRAP:
-                sName = "AES-256 wrap";
-                type = oidKeyWrapType;
-                break;
-        #endif
-
-                /* oidCmsKeyAgreeType */
-        #ifndef NO_SHA
-            case dhSinglePass_stdDH_sha1kdf_scheme:
-                sName = "DH-SHA kdf";
-                type = oidCmsKeyAgreeType;
-                break;
-        #endif
-        #ifdef WOLFSSL_SHA224
-            case dhSinglePass_stdDH_sha224kdf_scheme:
-                sName = "DH-SHA224 kdf";
-                type = oidCmsKeyAgreeType;
-                break;
-        #endif
-        #ifndef NO_SHA256
-            case dhSinglePass_stdDH_sha256kdf_scheme:
-                sName = "DH-SHA256 kdf";
-                type = oidCmsKeyAgreeType;
-                break;
-
-        #endif
-        #ifdef WOLFSSL_SHA384
-            case dhSinglePass_stdDH_sha384kdf_scheme:
-                sName = "DH-SHA384 kdf";
-                type = oidCmsKeyAgreeType;
-                break;
-        #endif
-        #ifdef WOLFSSL_SHA512
-            case dhSinglePass_stdDH_sha512kdf_scheme:
-                sName = "DH-SHA512 kdf";
-                type = oidCmsKeyAgreeType;
-                break;
-        #endif
-            default:
-                WOLFSSL_MSG("NID not in table");
-                return NULL;
+        for (i = 0; i < (int)WOLFSSL_OBJECT_INFO_SZ; i++) {
+            if (wolfssl_object_info[i].nid == id) {
+                id = wolfssl_object_info[i].id;
+                sName = wolfssl_object_info[i].sName;
+                type = wolfssl_object_info[i].type;
+                break;
+            }
+        }
+        if (i == (int)WOLFSSL_OBJECT_INFO_SZ) {
+            WOLFSSL_MSG("NID not in table");
+        #ifdef WOLFSSL_QT
+            sName = NULL;
+            type = id;
+        #else
+            return NULL;
+        #endif
         }
 
     #ifdef HAVE_ECC
@@ -29626,42 +38152,82 @@
          }
     #endif /* HAVE_ECC */
 
-        if (XSTRLEN(sName) > WOLFSSL_MAX_SNAME - 1) {
-            WOLFSSL_MSG("Attempted short name is too large");
-            return NULL;
+        if (sName != NULL) {
+            if (XSTRLEN(sName) > WOLFSSL_MAX_SNAME - 1) {
+                WOLFSSL_MSG("Attempted short name is too large");
+                return NULL;
+            }
         }
 
         oid = OidFromId(id, type, &oidSz);
 
         /* set object ID to buffer */
-        obj = wolfSSL_ASN1_OBJECT_new();
-        if (obj == NULL) {
-            WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
-            return NULL;
+        if (obj == NULL){
+            obj = wolfSSL_ASN1_OBJECT_new();
+            if (obj == NULL) {
+                WOLFSSL_MSG("Issue creating WOLFSSL_ASN1_OBJECT struct");
+                return NULL;
+            }
+            obj->dynamic |= WOLFSSL_ASN1_DYNAMIC;
+        } else {
+            obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC;
         }
         obj->type    = id;
         obj->grp     = type;
-        obj->dynamic = 1;
-        XMEMCPY(obj->sName, (char*)sName, XSTRLEN((char*)sName));
+
+        obj->sName[0] = '\0';
+        if (sName != NULL) {
+            XMEMCPY(obj->sName, (char*)sName, XSTRLEN((char*)sName));
+        }
 
         objBuf[0] = ASN_OBJECT_ID; objSz++;
         objSz += SetLength(oidSz, objBuf + 1);
         XMEMCPY(objBuf + objSz, oid, oidSz);
         objSz     += oidSz;
         obj->objSz = objSz;
-
-        obj->obj = (byte*)XMALLOC(obj->objSz, NULL, DYNAMIC_TYPE_ASN1);
-        if (obj->obj == NULL) {
-            wolfSSL_ASN1_OBJECT_free(obj);
-            return NULL;
-        }
-        XMEMCPY(obj->obj, objBuf, obj->objSz);
+        if(((obj->dynamic & WOLFSSL_ASN1_DYNAMIC_DATA) != 0) ||
+                                                           (obj->obj == NULL)) {
+            obj->obj = (byte*)XREALLOC((byte*)obj->obj, obj->objSz, NULL,
+                                                             DYNAMIC_TYPE_ASN1);
+            if (obj->obj == NULL) {
+                wolfSSL_ASN1_OBJECT_free(obj);
+                return NULL;
+            }
+            obj->dynamic |= WOLFSSL_ASN1_DYNAMIC_DATA ;
+        } else {
+            obj->dynamic &= ~WOLFSSL_ASN1_DYNAMIC_DATA ;
+        }
+        XMEMCPY((byte*)obj->obj, objBuf, obj->objSz);
 
         (void)type;
 
         return obj;
     }
 
+    static const char* oid_translate_num_to_str(const char* oid)
+    {
+        const struct oid_dict {
+            const char* num;
+            const char* desc;
+        } oid_dict[] = {
+            { "2.5.29.37.0",       "Any Extended Key Usage" },
+            { "1.3.6.1.5.5.7.3.1", "TLS Web Server Authentication" },
+            { "1.3.6.1.5.5.7.3.2", "TLS Web Client Authentication" },
+            { "1.3.6.1.5.5.7.3.3", "Code Signing" },
+            { "1.3.6.1.5.5.7.3.4", "E-mail Protection" },
+            { "1.3.6.1.5.5.7.3.8", "Time Stamping" },
+            { "1.3.6.1.5.5.7.3.9", "OCSP Signing" },
+            { NULL, NULL }
+        };
+        const struct oid_dict* idx;
+
+        for (idx = oid_dict; idx->num != NULL; idx++) {
+            if (!XSTRNCMP(oid, idx->num, XSTRLEN(idx->num))) {
+                return idx->desc;
+            }
+        }
+        return NULL;
+    }
 
     /* if no_name is one than use numerical form otherwise can be short name.
      *
@@ -29670,6 +38236,7 @@
     int wolfSSL_OBJ_obj2txt(char *buf, int bufLen, WOLFSSL_ASN1_OBJECT *a, int no_name)
     {
         int bufSz;
+        const char* desc;
 
         WOLFSSL_ENTER("wolfSSL_OBJ_obj2txt()");
 
@@ -29681,8 +38248,13 @@
         if (no_name == 1) {
             int    length;
             word32 idx = 0;
-
-            if (a->obj[idx++] != ASN_OBJECT_ID) {
+            byte   tag;
+
+            if (GetASNTag(a->obj, &idx, &tag, a->objSz) != 0) {
+                return WOLFSSL_FAILURE;
+            }
+
+            if (tag != ASN_OBJECT_ID) {
                 WOLFSSL_MSG("Bad ASN1 Object");
                 return WOLFSSL_FAILURE;
             }
@@ -29713,12 +38285,88 @@
             else {
                 bufSz = bufLen - 1;
             }
-            XMEMCPY(buf, a->sName, bufSz);
+            if (bufSz) {
+                XMEMCPY(buf, a->sName, bufSz);
+            }
+            else if (wolfSSL_OBJ_obj2txt(buf, bufLen, a, 1)) {
+                if ((desc = oid_translate_num_to_str(buf))) {
+                    bufSz = (int)XSTRLEN(desc);
+                    XMEMCPY(buf, desc, min(bufSz, bufLen));
+                }
+            }
+            else if (a->type == GEN_DNS || a->type == GEN_EMAIL || a->type == GEN_URI) {
+                bufSz = (int)XSTRLEN((const char*)a->obj);
+                XMEMCPY(buf, a->obj, min(bufSz, bufLen));
+            }
         }
 
         buf[bufSz] = '\0';
+        #ifdef WOLFSSL_QT
+            /* For unknown extension types, QT expects the short name to be the
+                text representation of the oid */
+            if (XSTRLEN(a->sName) == 0) {
+                XMEMCPY(a->sName, buf, bufSz);
+            }
+        #endif
         return bufSz;
     }
+#endif /* OPENSSL_EXTRA */
+
+#if defined(OPENSSL_EXTRA) && !defined(NO_ASN)
+    /* DN_Tags to strings */
+    static const struct DN_Tag_Strings {
+        enum DN_Tags tag;
+        const char* str;
+    } dn_tag_strings[] = {
+           { ASN_COMMON_NAME,      "CN" },
+           { ASN_SUR_NAME,         "SN" },
+           { ASN_SERIAL_NUMBER,    "serialNumber" },
+           { ASN_COUNTRY_NAME,     "C" },
+           { ASN_LOCALITY_NAME,    "L" },
+           { ASN_STATE_NAME,       "ST" },
+           { ASN_ORG_NAME,         "O"},
+           { ASN_ORGUNIT_NAME,     "OU"},
+           { ASN_BUS_CAT,          "businessCategory"},
+           { ASN_EMAIL_NAME,       "emailAddress"},
+           { ASN_USER_ID,          "UID"},
+           { ASN_DOMAIN_COMPONENT, "DC"},
+           { ASN_DN_NULL,          NULL }
+    };
+
+    int wolfSSL_X509_NAME_get_index_by_OBJ(WOLFSSL_X509_NAME *name,
+                                           const WOLFSSL_ASN1_OBJECT *obj,
+                                           int idx) {
+        const struct DN_Tag_Strings* dn;
+        enum DN_Tags tag = ASN_DN_NULL;
+
+        if (!name || idx >= name->fullName.locSz ||
+                !obj || !obj->obj) {
+            return -1;
+        }
+
+        if (idx < 0) {
+            idx = 0;
+        }
+        for (dn = dn_tag_strings; dn->str != NULL; dn++) {
+            /* Find the DN_Tags number for the name */
+            if (XSTRNCMP((const char*) obj->sName, dn->str, obj->objSz - 1) == 0) {
+                tag = dn->tag;
+                break;
+            }
+        }
+        if (!tag) {
+            /* Unable to identify desired name */
+            return -1;
+        }
+        for (idx++; idx < name->fullName.locSz; idx++) {
+            /* Find index of desired name */
+            if ((enum DN_Tags)name->fullName.loc[idx] == tag) {
+                return idx;
+            }
+        }
+        return -1;
+    }
+#endif
 
 #if defined(OPENSSL_EXTRA) || defined(HAVE_LIGHTY) || \
     defined(WOLFSSL_MYSQL_COMPATIBLE) || defined(HAVE_STUNNEL) || \
@@ -29913,6 +38561,13 @@
          }
      }
 #endif /* defined(WOLFSSL_SHA512)  */
+#endif /* OPENSSL_EXTRA */
+
+#ifndef WOLFCRYPT_ONLY
+#if defined(OPENSSL_EXTRA) || defined(HAVE_LIGHTY) || \
+    defined(WOLFSSL_MYSQL_COMPATIBLE) || defined(HAVE_STUNNEL) || \
+    defined(WOLFSSL_NGINX) || defined(HAVE_POCO_LIB) || \
+    defined(WOLFSSL_HAPROXY)
 
     char wolfSSL_CTX_use_certificate(WOLFSSL_CTX *ctx, WOLFSSL_X509 *x)
     {
@@ -29933,7 +38588,15 @@
             FreeX509(ctx->ourCert);
             XFREE(ctx->ourCert, ctx->heap, DYNAMIC_TYPE_X509);
         }
+        #ifndef WOLFSSL_X509_STORE_CERTS
         ctx->ourCert = x;
+        #else
+        ctx->ourCert = wolfSSL_X509_d2i(NULL, x->derCert->buffer,x->derCert->length);
+        if(ctx->ourCert == NULL){
+            return 0;
+        }
+        #endif
+
         ctx->ownOurCert = 0;
 #endif
 
@@ -29945,31 +38608,46 @@
         #ifdef HAVE_ED25519
             case ED25519k:
         #endif
+        #ifdef HAVE_ED448
+            case ED448k:
+        #endif
             case ECDSAk:
                 ctx->haveECC = 1;
-            #if defined(HAVE_ECC) || defined(HAVE_ED25519)
+        #if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
                 ctx->pkCurveOID = x->pkCurveOID;
-            #endif
-                break;
-        }
-
-        return WOLFSSL_SUCCESS;
-    }
-
-    #ifndef NO_WOLFSSL_STUB
+        #endif
+                break;
+        }
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    int wolfSSL_CTX_add1_chain_cert(WOLFSSL_CTX* ctx, WOLFSSL_X509* x509)
+    {
+        int ret;
+        if (ctx == NULL || x509 == NULL || x509->derCert == NULL) {
+            return WOLFSSL_FAILURE;
+        }
+
+        ret = wolfSSL_CTX_load_verify_buffer(ctx, x509->derCert->buffer,
+            x509->derCert->length, WOLFSSL_FILETYPE_ASN1);
+
+        return (ret == 0) ? WOLFSSL_SUCCESS : WOLFSSL_FAILURE;
+    }
+
     int wolfSSL_BIO_read_filename(WOLFSSL_BIO *b, const char *name) {
     #ifndef NO_FILESYSTEM
         XFILE fp;
 
         WOLFSSL_ENTER("wolfSSL_BIO_new_file");
 
-        if ((wolfSSL_BIO_get_fp(b, &fp) == WOLFSSL_SUCCESS) && (fp != NULL))
+        if ((wolfSSL_BIO_get_fp(b, &fp) == WOLFSSL_SUCCESS) && (fp != XBADFILE))
         {
             XFCLOSE(fp);
         }
 
         fp = XFOPEN(name, "r");
-        if (fp == NULL)
+        if (fp == XBADFILE)
             return WOLFSSL_BAD_FILE;
 
         if (wolfSSL_BIO_set_fp(b, fp, BIO_CLOSE) != WOLFSSL_SUCCESS) {
@@ -29985,40 +38663,32 @@
         return WOLFSSL_NOT_IMPLEMENTED;
     #endif
     }
-    #endif
-
-#ifdef HAVE_ECC
+
+    /* Return the corresponding short name for the nid <n>.
+     * or NULL if short name can't be found.
+     */
     const char * wolfSSL_OBJ_nid2sn(int n) {
-        int i;
+        const WOLFSSL_ObjectInfo *obj_info = wolfssl_object_info;
+        size_t i;
         WOLFSSL_ENTER("wolfSSL_OBJ_nid2sn");
-
-        /* find based on NID and return name */
-        for (i = 0; i < ecc_sets[i].size; i++) {
-            if (n == ecc_sets[i].id) {
-                return ecc_sets[i].name;
-            }
-        }
-        return NULL;
-    }
-
+        for (i = 0; i < WOLFSSL_OBJECT_INFO_SZ; i++, obj_info++) {
+            if (obj_info->nid == n) {
+                return obj_info->sName;
+            }
+        }
+        WOLFSSL_MSG("SN not found");
+        return NULL;
+    }
+
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     int wolfSSL_OBJ_sn2nid(const char *sn) {
-        int i;
-        WOLFSSL_ENTER("wolfSSL_OBJ_osn2nid");
-
-        /* Nginx uses this OpenSSL string. */
-        if (XSTRNCMP(sn, "prime256v1", 10) == 0)
-            sn = "SECP256R1";
-        if (XSTRNCMP(sn, "secp384r1", 10) == 0)
-            sn = "SECP384R1";
-        /* find based on name and return NID */
-        for (i = 0; i < ecc_sets[i].size; i++) {
-            if (XSTRNCMP(sn, ecc_sets[i].name, ECC_MAXNAME) == 0) {
-                return ecc_sets[i].id;
-            }
-        }
-        return -1;
-    }
-#endif /* HAVE_ECC */
+        WOLFSSL_ENTER("wolfSSL_OBJ_sn2nid");
+        if (sn == NULL)
+            return NID_undef;
+        return wc_OBJ_sn2nid(sn);
+    }
+#endif
+
 
     /* Gets the NID value that corresponds with the ASN1 object.
      *
@@ -30026,10 +38696,10 @@
      *
      * Return NID on success and a negative value on failure
      */
-    int wolfSSL_OBJ_obj2nid(const WOLFSSL_ASN1_OBJECT *o) {
+    int wolfSSL_OBJ_obj2nid(const WOLFSSL_ASN1_OBJECT *o)
+    {
         word32 oid = 0;
         word32 idx = 0;
-        int id;
 
         WOLFSSL_ENTER("wolfSSL_OBJ_obj2nid");
 
@@ -30037,392 +38707,398 @@
             return -1;
         }
 
-        if ((id = GetObjectId(o->obj, &idx, &oid, o->grp, o->objSz)) < 0) {
+        #ifdef WOLFSSL_QT
+        if (o->grp == oidCertExtType) {
+            /* If nid is an unknown extension, return NID_undef */
+            if (wolfSSL_OBJ_nid2sn(o->nid) == NULL)
+                return NID_undef;
+        }
+        #endif
+
+        if (o->nid > 0)
+            return o->nid;
+        if (GetObjectId(o->obj, &idx, &oid, o->grp, o->objSz) < 0) {
             WOLFSSL_MSG("Issue getting OID of object");
             return -1;
         }
 
-        /* get OID type */
-        switch (o->grp) {
-            /* oidHashType */
-            case oidHashType:
-                switch (oid) {
-                #ifdef WOLFSSL_MD2
-                    case MD2h:
-                        return NID_md2;
-                #endif
-                #ifndef NO_MD5
-                    case MD5h:
-                        return NID_md5;
-                #endif
-                #ifndef NO_SHA
-                    case SHAh:
-                        return NID_sha1;
-                #endif
-                    case SHA224h:
-                        return NID_sha224;
-                #ifndef NO_SHA256
-                    case SHA256h:
-                        return NID_sha256;
-                #endif
-                #ifdef WOLFSSL_SHA384
-                    case SHA384h:
-                        return NID_sha384;
-                #endif
-                #ifdef WOLFSSL_SHA512
-                    case SHA512h:
-                        return NID_sha512;
-                #endif
-                }
-                break;
-
-            /*  oidSigType */
-            case oidSigType:
-                switch (oid) {
-                #ifndef NO_DSA
-                    case CTC_SHAwDSA:
-                        return CTC_SHAwDSA;
-                #endif /* NO_DSA */
-                #ifndef NO_RSA
-                    case CTC_MD2wRSA:
-                        return CTC_MD2wRSA;
-                    case CTC_MD5wRSA:
-                        return CTC_MD5wRSA;
-                    case CTC_SHAwRSA:
-                        return CTC_SHAwRSA;
-                    case CTC_SHA224wRSA:
-                        return CTC_SHA224wRSA;
-                    case CTC_SHA256wRSA:
-                        return CTC_SHA256wRSA;
-                    case CTC_SHA384wRSA:
-                        return CTC_SHA384wRSA;
-                    case CTC_SHA512wRSA:
-                        return CTC_SHA512wRSA;
-                #endif /* NO_RSA */
-                #ifdef HAVE_ECC
-                    case CTC_SHAwECDSA:
-                        return CTC_SHAwECDSA;
-                    case CTC_SHA224wECDSA:
-                        return CTC_SHA224wECDSA;
-                    case CTC_SHA256wECDSA:
-                        return CTC_SHA256wECDSA;
-                    case CTC_SHA384wECDSA:
-                        return CTC_SHA384wECDSA;
-                    case CTC_SHA512wECDSA:
-                        return CTC_SHA512wECDSA;
-                #endif /* HAVE_ECC */
-                }
-                break;
-
-            /* oidKeyType */
-            case oidKeyType:
-                switch (oid) {
-                #ifndef NO_DSA
-                    case DSAk:
-                        return DSAk;
-                #endif /* NO_DSA */
-                #ifndef NO_RSA
-                    case RSAk:
-                        return RSAk;
-                #endif /* NO_RSA */
-                #ifdef HAVE_NTRU
-                    case NTRUk:
-                        return NTRUk;
-                #endif /* HAVE_NTRU */
-                #ifdef HAVE_ECC
-                    case ECDSAk:
-                        return ECDSAk;
-                #endif /* HAVE_ECC */
-                }
-                break;
-
-            /* oidBlkType */
-            case oidBlkType:
-                switch (oid) {
-                #ifdef WOLFSSL_AES_128
-                    case AES128CBCb:
-                        return AES128CBCb;
-                #endif
-                #ifdef WOLFSSL_AES_192
-                    case AES192CBCb:
-                        return AES192CBCb;
-                #endif
-                #ifdef WOLFSSL_AES_256
-                    case AES256CBCb:
-                        return AES256CBCb;
-                #endif
-                #ifndef NO_DES3
-                    case DESb:
-                        return NID_des;
-                    case DES3b:
-                        return NID_des3;
-                #endif
-                }
-                break;
-
-        #ifdef HAVE_OCSP
-            case oidOcspType:
-                switch (oid) {
-                    case OCSP_BASIC_OID:
-                        return NID_id_pkix_OCSP_basic;
-                    case OCSP_NONCE_OID:
-                        return OCSP_NONCE_OID;
-                }
-                break;
-        #endif /* HAVE_OCSP */
-
-            /* oidCertExtType */
-            case oidCertExtType:
-                switch (oid) {
-                    case BASIC_CA_OID:
-                        return BASIC_CA_OID;
-                    case ALT_NAMES_OID:
-                        return ALT_NAMES_OID;
-                    case CRL_DIST_OID:
-                        return CRL_DIST_OID;
-                    case AUTH_INFO_OID:
-                        return AUTH_INFO_OID;
-                    case AUTH_KEY_OID:
-                        return AUTH_KEY_OID;
-                    case SUBJ_KEY_OID:
-                        return SUBJ_KEY_OID;
-                    case INHIBIT_ANY_OID:
-                        return INHIBIT_ANY_OID;
-                    case KEY_USAGE_OID:
-                        return NID_ext_key_usage;
-                    case NAME_CONS_OID:
-                        return NID_name_constraints;
-                    case CERT_POLICY_OID:
-                        return NID_certificate_policies;
-                }
-                break;
-
-            /* oidCertAuthInfoType */
-            case oidCertAuthInfoType:
-                switch (oid) {
-                    case AIA_OCSP_OID:
-                        return AIA_OCSP_OID;
-                    case AIA_CA_ISSUER_OID:
-                        return AIA_CA_ISSUER_OID;
-                }
-                break;
-
-            /* oidCertPolicyType */
-            case oidCertPolicyType:
-                switch (oid) {
-                    case CP_ANY_OID:
-                        return NID_any_policy;
-                }
-                break;
-
-            /* oidCertAltNameType */
-            case oidCertAltNameType:
-                switch (oid) {
-                    case HW_NAME_OID:
-                        return NID_hw_name_oid;
-                }
-                break;
-
-            /* oidCertKeyUseType */
-            case oidCertKeyUseType:
-                switch (oid) {
-                    case EKU_ANY_OID:
-                        return NID_anyExtendedKeyUsage;
-                    case EKU_SERVER_AUTH_OID:
-                        return EKU_SERVER_AUTH_OID;
-                    case EKU_CLIENT_AUTH_OID:
-                        return EKU_CLIENT_AUTH_OID;
-                    case EKU_OCSP_SIGN_OID:
-                        return EKU_OCSP_SIGN_OID;
-                }
-                break;
-
-            /* oidKdfType */
-            case oidKdfType:
-                switch (oid) {
-                    case PBKDF2_OID:
-                        return PBKDF2_OID;
-                }
-                break;
-
-            /* oidPBEType */
-            case oidPBEType:
-                switch (oid) {
-                    case PBE_SHA1_RC4_128:
-                        return PBE_SHA1_RC4_128;
-                    case PBE_SHA1_DES:
-                        return PBE_SHA1_DES;
-                    case PBE_SHA1_DES3:
-                        return PBE_SHA1_DES3;
-                }
-                break;
-
-            /* oidKeyWrapType */
-            case oidKeyWrapType:
-                switch (oid) {
-                #ifdef WOLFSSL_AES_128
-                    case AES128_WRAP:
-                        return AES128_WRAP;
-                #endif
-                #ifdef WOLFSSL_AES_192
-                    case AES192_WRAP:
-                        return AES192_WRAP;
-                #endif
-                #ifdef WOLFSSL_AES_256
-                    case AES256_WRAP:
-                        return AES256_WRAP;
-                #endif
-                }
-                break;
-
-            /* oidCmsKeyAgreeType */
-            case oidCmsKeyAgreeType:
-                switch (oid) {
-                    #ifndef NO_SHA
-                    case dhSinglePass_stdDH_sha1kdf_scheme:
-                        return dhSinglePass_stdDH_sha1kdf_scheme;
-                    #endif
-                    #ifdef WOLFSSL_SHA224
-                    case dhSinglePass_stdDH_sha224kdf_scheme:
-                        return dhSinglePass_stdDH_sha224kdf_scheme;
-                    #endif
-                    #ifndef NO_SHA256
-                    case dhSinglePass_stdDH_sha256kdf_scheme:
-                        return dhSinglePass_stdDH_sha256kdf_scheme;
-                    #endif
-                    #ifdef WOLFSSL_SHA384
-                    case dhSinglePass_stdDH_sha384kdf_scheme:
-                        return dhSinglePass_stdDH_sha384kdf_scheme;
-                    #endif
-                    #ifdef WOLFSSL_SHA512
-                    case dhSinglePass_stdDH_sha512kdf_scheme:
-                        return dhSinglePass_stdDH_sha512kdf_scheme;
-                    #endif
-                }
-                break;
-
-            default:
-                WOLFSSL_MSG("NID not in table");
-                return -1;
-        }
-
-        return -1;
-    }
-
-
-#ifndef NO_WOLFSSL_STUB
-    char * wolfSSL_OBJ_nid2ln(int n)
-    {
-        (void)n;
+        return oid2nid(oid, o->grp);
+    }
+
+    /* Returns the long name that corresponds with an ASN1_OBJECT nid value.
+     *  n : NID value of ASN1_OBJECT to search */
+    const char* wolfSSL_OBJ_nid2ln(int n)
+    {
+        const WOLFSSL_ObjectInfo *obj_info = wolfssl_object_info;
+        size_t i;
         WOLFSSL_ENTER("wolfSSL_OBJ_nid2ln");
-        WOLFSSL_STUB("OBJ_nid2ln");
-
-        return NULL;
-    }
-#endif
-
-#ifndef NO_WOLFSSL_STUB
+        for (i = 0; i < WOLFSSL_OBJECT_INFO_SZ; i++, obj_info++) {
+            if (obj_info->nid == n) {
+                return obj_info->lName;
+            }
+        }
+        WOLFSSL_MSG("NID not found in table");
+        return NULL;
+    }
+
+    /* Return the corresponding NID for the long name <ln>
+     * or NID_undef if NID can't be found.
+     */
+    int wolfSSL_OBJ_ln2nid(const char *ln)
+    {
+        const WOLFSSL_ObjectInfo *obj_info = wolfssl_object_info;
+        size_t i, lnlen;
+        WOLFSSL_ENTER("wolfSSL_OBJ_ln2nid");
+        if (ln && (lnlen = XSTRLEN(ln)) > 0) {
+            /* Accept input like "/commonName=" */
+            if (ln[0] == '/') {
+                ln++;
+                lnlen--;
+            }
+            if (lnlen) {
+                if (ln[lnlen-1] == '=') {
+                    lnlen--;
+                }
+                for (i = 0; i < WOLFSSL_OBJECT_INFO_SZ; i++, obj_info++) {
+                    if (lnlen == XSTRLEN(obj_info->lName) &&
+                            XSTRNCMP(ln, obj_info->lName, lnlen) == 0) {
+                        return obj_info->nid;
+                    }
+                }
+            }
+        }
+        return NID_undef;
+    }
+
+    /* compares two objects, return 0 if equal */
+    int wolfSSL_OBJ_cmp(const WOLFSSL_ASN1_OBJECT* a,
+                        const WOLFSSL_ASN1_OBJECT* b)
+    {
+        WOLFSSL_ENTER("wolfSSL_OBJ_cmp");
+
+        if (a != NULL && b != NULL &&
+            a->obj != NULL && b->obj != NULL &&
+            a->objSz == b->objSz) {
+            return XMEMCMP(a->obj, b->obj, a->objSz);
+        }
+
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    /* Gets the NID value that is related to the OID string passed in. Example
+     * string would be "2.5.29.14" for subject key ID.
+     *
+     * returns NID value on success and NID_undef on error
+     */
     int wolfSSL_OBJ_txt2nid(const char* s)
     {
-        (void)s;
-        WOLFSSL_STUB("OBJ_txt2nid");
-
-        return 0;
-    }
-#endif
-
-    /* compatibility function. It's intended use is to remove OID's from an
-     * internal table that have been added with OBJ_create. wolfSSL manages it's
-     * own interenal OID values and does not currently support OBJ_create. */
+        unsigned int i;
+    #ifdef WOLFSSL_CERT_EXT
+        int ret;
+        unsigned int sum = 0;
+        unsigned int outSz = MAX_OID_SZ;
+        unsigned char out[MAX_OID_SZ];
+    #endif
+
+        WOLFSSL_ENTER("OBJ_txt2nid");
+
+        if (s == NULL) {
+            return NID_undef;
+        }
+
+    #ifdef WOLFSSL_CERT_EXT
+        ret = EncodePolicyOID(out, &outSz, s, NULL);
+        if (ret == 0) {
+            /* sum OID */
+            for (i = 0; i < outSz; i++) {
+                sum += out[i];
+            }
+        }
+    #endif /* WOLFSSL_CERT_EXT */
+
+        /* get the group that the OID's sum is in
+         * @TODO possible conflict with multiples */
+        for (i = 0; i < WOLFSSL_OBJECT_INFO_SZ; i++) {
+            int len;
+        #ifdef WOLFSSL_CERT_EXT
+            if (ret == 0) {
+                if (wolfssl_object_info[i].id == (int)sum) {
+                    return wolfssl_object_info[i].nid;
+                }
+            }
+        #endif
+
+            /* try as a short name */
+            len = (int)XSTRLEN(s);
+            if (XSTRNCMP(wolfssl_object_info[i].sName, s, len) == 0) {
+                return wolfssl_object_info[i].nid;
+            }
+
+            /* try as a long name */
+            if (XSTRNCMP(wolfssl_object_info[i].lName, s, len) == 0) {
+                return wolfssl_object_info[i].nid;
+            }
+        }
+
+        return NID_undef;
+    }
+
+    /* Creates new ASN1_OBJECT from short name, long name, or text
+     * representation of oid. If no_name is 0, then short name, long name, and
+     * numerical value of oid are interpreted. If no_name is 1, then only the
+     * numerical value of the oid is interpreted.
+     *
+     * Returns pointer to ASN1_OBJECT on success, or NULL on error.
+     */
+#if defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_CERT_GEN)
+    WOLFSSL_ASN1_OBJECT* wolfSSL_OBJ_txt2obj(const char* s, int no_name)
+    {
+        int len, i, ret;
+        int nid = NID_undef;
+        unsigned int outSz = MAX_OID_SZ;
+        unsigned char out[MAX_OID_SZ];
+        unsigned int sum = 0;
+
+        WOLFSSL_ENTER("wolfSSL_OBJ_txt2obj");
+
+        if (s == NULL)
+            return NULL;
+
+        /* If s is numerical value, try to sum oid */
+        ret = EncodePolicyOID(out, &outSz, s, NULL);
+        if (ret == 0) {
+            for (i = 0; i < (int)outSz; i++) {
+                sum += out[i];
+            }
+        }
+
+        len = (int)XSTRLEN(s);
+
+        /* TODO: update short names in wolfssl_object_info and check OID sums
+           are correct */
+        for (i = 0; i < (int)WOLFSSL_OBJECT_INFO_SZ; i++) {
+            /* Short name, long name, and numerical value are interpreted */
+            if (no_name == 0 && ((XSTRNCMP(s, wolfssl_object_info[i].sName, len) == 0) ||
+                                 (XSTRNCMP(s, wolfssl_object_info[i].lName, len) == 0) ||
+                                 (wolfssl_object_info[i].id == (int)sum)))
+                    nid = wolfssl_object_info[i].nid;
+            /* Only numerical value is interpreted */
+            else if (no_name == 1 && wolfssl_object_info[i].id == (int)sum)
+                    nid = wolfssl_object_info[i].nid;
+        }
+
+        if (nid != NID_undef)
+            return wolfSSL_OBJ_nid2obj(nid);
+
+        return NULL;
+    }
+#endif
+
+    /* compatibility function. Its intended use is to remove OID's from an
+     * internal table that have been added with OBJ_create. wolfSSL manages its
+     * own internal OID values and does not currently support OBJ_create. */
     void wolfSSL_OBJ_cleanup(void)
     {
         WOLFSSL_ENTER("wolfSSL_OBJ_cleanup()");
     }
 
-
     #ifndef NO_WOLFSSL_STUB
-    void wolfSSL_set_verify_depth(WOLFSSL *ssl, int depth) {
-        WOLFSSL_ENTER("wolfSSL_set_verify_depth");
-#ifndef OPENSSL_EXTRA
+    int wolfSSL_OBJ_create(const char *oid, const char *sn, const char *ln)
+    {
+        (void)oid;
+        (void)sn;
+        (void)ln;
+        WOLFSSL_STUB("wolfSSL_OBJ_create");
+        return WOLFSSL_FAILURE;
+    }
+    #endif
+
+    void wolfSSL_set_verify_depth(WOLFSSL *ssl, int depth)
+    {
+    #if !defined(OPENSSL_EXTRA) && !defined(NO_WOLFSSL_STUB)
         (void)ssl;
         (void)depth;
         WOLFSSL_STUB("wolfSSL_set_verify_depth");
-#else
+    #else
+        WOLFSSL_ENTER("wolfSSL_set_verify_depth");
         ssl->options.verifyDepth = (byte)depth;
-#endif
-    }
-    #endif
-
-
-    #ifndef NO_WOLFSSL_STUB
+    #endif
+    }
+
+
     WOLFSSL_ASN1_OBJECT * wolfSSL_X509_NAME_ENTRY_get_object(WOLFSSL_X509_NAME_ENTRY *ne) {
-        (void)ne;
         WOLFSSL_ENTER("wolfSSL_X509_NAME_ENTRY_get_object");
-        WOLFSSL_STUB("X509_NAME_ENTRY_get_object");
-
-        return NULL;
-    }
-    #endif
+        if (ne == NULL) return NULL;
+        if (wolfSSL_OBJ_nid2obj_ex(ne->nid, &ne->object) != NULL) {
+            ne->object.nid = ne->nid;
+            return &ne->object;
+        }
+        return NULL;
+    }
+
+
+    /* looks up the DN given the location "loc". "loc" is the number indicating
+     * the order that the DN was parsed as, 0 is first DN parsed.
+     *
+     * returns the setup WOLFSSL_X509_NAME pointer on success and NULL on fail
+     */
+    static WOLFSSL_X509_NAME* wolfSSL_nameByLoc(WOLFSSL_X509_NAME *name, int loc)
+    {
+        char* pt = NULL;
+        int sz = 0;
+
+        switch (name->fullName.loc[loc])
+        {
+        case ASN_COMMON_NAME:
+            sz = name->fullName.cnLen;
+            pt = &name->fullName.fullName[name->fullName.cnIdx];
+            name->cnEntry.nid           = name->fullName.cnNid;
+            break;
+        case ASN_COUNTRY_NAME:
+            sz = name->fullName.cLen;
+            pt = &name->fullName.fullName[name->fullName.cIdx];
+            name->cnEntry.nid           = name->fullName.cNid;
+            break;
+        case ASN_LOCALITY_NAME:
+            sz = name->fullName.lLen;
+            pt = &name->fullName.fullName[name->fullName.lIdx];
+            name->cnEntry.nid           = name->fullName.lNid;
+            break;
+        case ASN_STATE_NAME:
+            sz = name->fullName.stLen;
+            pt = &name->fullName.fullName[name->fullName.stIdx];
+            name->cnEntry.nid           = name->fullName.stNid;
+            break;
+        case ASN_ORG_NAME:
+            sz = name->fullName.oLen;
+            pt = &name->fullName.fullName[name->fullName.oIdx];
+            name->cnEntry.nid           = name->fullName.oNid;
+            break;
+        case ASN_ORGUNIT_NAME:
+            sz = name->fullName.ouLen;
+            pt = &name->fullName.fullName[name->fullName.ouIdx];
+            name->cnEntry.nid           = name->fullName.ouNid;
+            break;
+        case ASN_EMAIL_NAME:
+            sz = name->fullName.emailLen;
+            pt = &name->fullName.fullName[name->fullName.emailIdx];
+            name->cnEntry.nid           = name->fullName.emailNid;
+            break;
+        case ASN_SUR_NAME:
+            sz = name->fullName.snLen;
+            pt = &name->fullName.fullName[name->fullName.snIdx];
+            name->cnEntry.nid           = name->fullName.snNid;
+            break;
+        case ASN_USER_ID:
+            sz = name->fullName.uidLen;
+            pt = &name->fullName.fullName[name->fullName.uidIdx];
+            name->cnEntry.nid           = name->fullName.uidNid;
+            break;
+        case ASN_SERIAL_NUMBER:
+            sz = name->fullName.serialLen;
+            pt = &name->fullName.fullName[name->fullName.serialIdx];
+            name->cnEntry.nid           = name->fullName.serialNid;
+            break;
+#ifdef WOLFSSL_CERT_EXT
+        case ASN_BUS_CAT:
+            sz = name->fullName.bcLen;
+            pt = &name->fullName.fullName[name->fullName.bcIdx];
+            break;
+#endif
+
+        case ASN_DOMAIN_COMPONENT:
+            /* get index of DC i.e. first or second or ... case */
+            {
+                int idx = 0, i;
+                for (i = 0; i < loc; i++) {
+                    if (name->fullName.loc[i] == ASN_DOMAIN_COMPONENT) {
+                        idx++;
+                    }
+                }
+
+                /* check that index is not larger than max buffer size or larger
+                 * than the number of domain components parsed */
+                if (idx >= DOMAIN_COMPONENT_MAX || idx > name->fullName.dcNum) {
+                    WOLFSSL_MSG("Index was larger then domain buffer");
+                    return NULL;
+                }
+                pt = &name->fullName.fullName[name->fullName.dcIdx[idx]],
+                sz = name->fullName.dcLen[idx];
+                name->cnEntry.nid         = ASN_DOMAIN_COMPONENT;
+                name->cnEntry.data.type   = CTC_UTF8;
+            }
+            break;
+
+        default:
+            return NULL;
+        }
+
+        /* -1 to leave room for trailing terminator 0 */
+        if (sz == 0 || sz >= CTC_NAME_SIZE - 1)
+            return NULL;
+        if (wolfSSL_ASN1_STRING_set(name->cnEntry.value, pt, sz) !=
+                WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Error setting local ASN1 string data");
+            return NULL;
+        }
+        name->cnEntry.value->type   = CTC_UTF8;
+        name->cnEntry.set           = 1;
+        return name;
+    }
+
 
     WOLFSSL_X509_NAME_ENTRY *wolfSSL_X509_NAME_get_entry(
-                                             WOLFSSL_X509_NAME *name, int loc) {
-
-        int maxLoc = name->fullName.fullNameLen;
-
+                                             WOLFSSL_X509_NAME *name, int loc)
+    {
         WOLFSSL_ENTER("wolfSSL_X509_NAME_get_entry");
 
-        if (loc < 0 || loc > maxLoc) {
+        if (name == NULL) {
+            return NULL;
+        }
+
+        if (loc < 0) {
             WOLFSSL_MSG("Bad argument");
             return NULL;
         }
 
+        if (loc <= DN_NAMES_MAX + name->fullName.dcNum) {
+            if (wolfSSL_nameByLoc(name, loc) != NULL)
+                return &name->cnEntry;
+        }
         /* DC component */
-        if (name->fullName.dcMode){
+        if (name->fullName.dcMode) {
             if (name->fullName.fullName != NULL){
                 if (loc == name->fullName.dcNum){
-                    name->cnEntry.data.data   = &name->fullName.fullName[name->fullName.cIdx];
+                    name->cnEntry.data.data
+                        = &name->fullName.fullName[name->fullName.cIdx];
                     name->cnEntry.data.length = name->fullName.cLen;
                     name->cnEntry.nid         = ASN_COUNTRY_NAME;
                 } else {
-                    name->cnEntry.data.data   = &name->fullName.fullName[name->fullName.dcIdx[loc]];
+                    name->cnEntry.data.data
+                        = &name->fullName.fullName[name->fullName.dcIdx[loc]];
                     name->cnEntry.data.length = name->fullName.dcLen[loc];
                     name->cnEntry.nid         = ASN_DOMAIN_COMPONENT;
                 }
             }
             name->cnEntry.data.type = CTC_UTF8;
-            name->cnEntry.set       = 1;
-            return &(name->cnEntry);
-
-         /* common name index case */
-        } else if (loc == name->fullName.cnIdx) {
+        /* common name index case */
+        } else if (loc == name->fullName.cnIdx && name->x509 != NULL) {
             /* get CN shortcut from x509 since it has null terminator */
             name->cnEntry.data.data   = name->x509->subjectCN;
             name->cnEntry.data.length = name->fullName.cnLen;
             name->cnEntry.data.type   = CTC_UTF8;
             name->cnEntry.nid         = ASN_COMMON_NAME;
             name->cnEntry.set         = 1;
-            return &(name->cnEntry);
-        }
-
-        /* additionall cases to check for go here */
-
-        WOLFSSL_MSG("Entry not found or implemented");
-        (void)name;
-        (void)loc;
-
-        return NULL;
+        } else {
+            WOLFSSL_MSG("loc passed in is not in range of parsed DN's");
+            return NULL;
+        }
+        return &name->cnEntry;
     }
 
     #ifndef NO_WOLFSSL_STUB
-    void wolfSSL_sk_X509_NAME_pop_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk, void f (WOLFSSL_X509_NAME*)){
-        (void) sk;
-        (void) f;
-        WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_pop_free");
-        WOLFSSL_STUB("sk_X509_NAME_pop_free");
-    }
-    #endif
-    #ifndef NO_WOLFSSL_STUB
-    int wolfSSL_X509_check_private_key(WOLFSSL_X509 *x509, WOLFSSL_EVP_PKEY *key){
+    int wolfSSL_X509_check_private_key(WOLFSSL_X509 *x509, WOLFSSL_EVP_PKEY *key)
+    {
         (void) x509;
         (void) key;
         WOLFSSL_ENTER("wolfSSL_X509_check_private_key");
@@ -30431,18 +39107,24 @@
         return WOLFSSL_SUCCESS;
     }
 
-    WOLF_STACK_OF(WOLFSSL_X509_NAME) *wolfSSL_dup_CA_list( WOLF_STACK_OF(WOLFSSL_X509_NAME) *sk ){
+    WOLF_STACK_OF(WOLFSSL_X509_NAME) *wolfSSL_dup_CA_list(
+        WOLF_STACK_OF(WOLFSSL_X509_NAME) *sk)
+    {
         (void) sk;
         WOLFSSL_ENTER("wolfSSL_dup_CA_list");
         WOLFSSL_STUB("SSL_dup_CA_list");
 
         return NULL;
     }
-    #endif
-
-#endif /* OPENSSL_ALL || HAVE_LIGHTY || WOLFSSL_MYSQL_COMPATIBLE || HAVE_STUNNEL || WOLFSSL_NGINX || HAVE_POCO_LIB || WOLFSSL_HAPROXY */
+
+    #endif
+
+#endif /* OPENSSL_ALL || HAVE_LIGHTY || WOLFSSL_MYSQL_COMPATIBLE ||
+    HAVE_STUNNEL || WOLFSSL_NGINX || HAVE_POCO_LIB || WOLFSSL_HAPROXY */
 #endif /* OPENSSL_EXTRA */
 
+#ifndef WOLFCRYPT_ONLY
+
 #ifdef OPENSSL_EXTRA
 
 /* wolfSSL uses negative values for error states. This function returns an
@@ -30454,7 +39136,8 @@
 
     (void)line;
     (void)file;
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(DEBUG_WOLFSSL) || defined(WOLFSSL_HAPROXY)
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(DEBUG_WOLFSSL) || \
+    defined(WOLFSSL_HAPROXY)
     {
         int ret;
 
@@ -30496,12 +39179,13 @@
 #endif /* !NO_CERTS */
 
 
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
 void* wolfSSL_CTX_get_ex_data(const WOLFSSL_CTX* ctx, int idx)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_get_ex_data");
     #ifdef HAVE_EX_DATA
-    if(ctx != NULL && idx < MAX_EX_DATA && idx >= 0) {
-        return ctx->ex_data[idx];
+    if(ctx != NULL) {
+        return wolfSSL_CRYPTO_get_ex_data(&ctx->ex_data, idx);
     }
     #else
     (void)ctx;
@@ -30525,7 +39209,6 @@
     return ctx_idx++;
 }
 
-
 /* Return the index that can be used for the WOLFSSL structure to store
  * application data.
  *
@@ -30552,10 +39235,9 @@
 {
     WOLFSSL_ENTER("wolfSSL_CTX_set_ex_data");
     #ifdef HAVE_EX_DATA
-    if (ctx != NULL && idx < MAX_EX_DATA)
-    {
-        ctx->ex_data[idx] = data;
-        return WOLFSSL_SUCCESS;
+    if (ctx != NULL)
+    {
+        return wolfSSL_CRYPTO_set_ex_data(&ctx->ex_data, idx, data);
     }
     #else
     (void)ctx;
@@ -30564,6 +39246,7 @@
     #endif
     return WOLFSSL_FAILURE;
 }
+#endif
 
 
 /* Returns char* to app data stored in ex[0].
@@ -30584,7 +39267,7 @@
  * ssl WOLFSSL struct to set app data in
  * arg data to be stored
  *
- * Returns SSL_SUCCESS on sucess and SSL_FAILURE on failure
+ * Returns SSL_SUCCESS on success and SSL_FAILURE on failure
  */
 int wolfSSL_set_app_data(WOLFSSL *ssl, void* arg) {
     WOLFSSL_ENTER("wolfSSL_set_app_data");
@@ -30597,10 +39280,9 @@
 {
     WOLFSSL_ENTER("wolfSSL_set_ex_data");
 #if defined(HAVE_EX_DATA) || defined(FORTRESS)
-    if (ssl != NULL && idx < MAX_EX_DATA)
-    {
-        ssl->ex_data[idx] = data;
-        return WOLFSSL_SUCCESS;
+    if (ssl != NULL)
+    {
+        return wolfSSL_CRYPTO_set_ex_data(&ssl->ex_data, idx, data);
     }
 #else
     WOLFSSL_MSG("HAVE_EX_DATA macro is not defined");
@@ -30617,8 +39299,9 @@
 {
     WOLFSSL_ENTER("wolfSSL_get_ex_data");
 #if defined(HAVE_EX_DATA) || defined(FORTRESS)
-    if (ssl != NULL && idx < MAX_EX_DATA && idx >= 0)
-        return ssl->ex_data[idx];
+    if (ssl != NULL) {
+        return wolfSSL_CRYPTO_get_ex_data(&ssl->ex_data, idx);
+    }
 #else
     WOLFSSL_MSG("HAVE_EX_DATA macro is not defined");
     (void)ssl;
@@ -30723,9 +39406,10 @@
     return dsa;
 }
 #endif /* NO_DSA */
-
-#define WOLFSSL_BIO_INCLUDED
-#include "src/bio.c"
+#endif /* OPENSSL_EXTRA */
+#endif /* WOLFCRYPT_ONLY */
+
+#if defined(OPENSSL_EXTRA)
 
 /* Begin functions for openssl/buffer.h */
 WOLFSSL_BUF_MEM* wolfSSL_BUF_MEM_new(void)
@@ -30811,7 +39495,7 @@
     WOLFSSL_ENTER("wolfSSL_BIO_new_file");
 
     fp = XFOPEN(filename, mode);
-    if (fp == NULL)
+    if (fp == XBADFILE)
         return NULL;
 
     bio = wolfSSL_BIO_new(wolfSSL_BIO_s_file());
@@ -30835,6 +39519,28 @@
 #endif /* NO_FILESYSTEM */
 }
 
+#ifndef NO_FILESYSTEM
+WOLFSSL_BIO* wolfSSL_BIO_new_fp(XFILE fp, int close_flag)
+{
+    WOLFSSL_BIO* bio;
+
+    WOLFSSL_ENTER("wolfSSL_BIO_new_fp");
+
+    bio = wolfSSL_BIO_new(wolfSSL_BIO_s_file());
+    if (bio == NULL) {
+        return bio;
+    }
+
+    if (wolfSSL_BIO_set_fp(bio, fp, close_flag) != WOLFSSL_SUCCESS) {
+        wolfSSL_BIO_free(bio);
+        bio = NULL;
+    }
+
+    /* file is closed when BIO is free'd or by user depending on flag */
+    return bio;
+}
+#endif
+
 
 #ifndef NO_DH
 WOLFSSL_DH *wolfSSL_PEM_read_bio_DHparams(WOLFSSL_BIO *bio, WOLFSSL_DH **x,
@@ -30873,11 +39579,15 @@
     }
     else if (bio->type == WOLFSSL_BIO_FILE) {
         /* Read whole file into a new buffer. */
-        XFSEEK(bio->file, 0, SEEK_END);
-        sz = XFTELL(bio->file);
-        XFSEEK(bio->file, 0, SEEK_SET);
-        if (sz <= 0L)
+        if (XFSEEK((XFILE)bio->ptr, 0, SEEK_END) != 0)
+            goto end;
+        sz = XFTELL((XFILE)bio->ptr);
+        if (XFSEEK((XFILE)bio->ptr, 0, SEEK_SET) != 0)
             goto end;
+        if (sz > MAX_WOLFSSL_FILE_SIZE || sz <= 0L) {
+            WOLFSSL_MSG("PEM_read_bio_DHparams file size error");
+            goto end;
+        }
         mem = (unsigned char*)XMALLOC(sz, NULL, DYNAMIC_TYPE_PEM);
         if (mem == NULL)
             goto end;
@@ -30900,11 +39610,9 @@
     if (x != NULL)
         localDh = *x;
     if (localDh == NULL) {
-        localDh = (WOLFSSL_DH*)XMALLOC(sizeof(WOLFSSL_DH), NULL,
-                                       DYNAMIC_TYPE_OPENSSL);
+        localDh = wolfSSL_DH_new();
         if (localDh == NULL)
             goto end;
-        XMEMSET(localDh, 0, sizeof(WOLFSSL_DH));
     }
 
     /* Load data in manually */
@@ -30942,6 +39650,14 @@
         localDh = NULL;
     }
 
+    if (localDh != NULL && localDh->inSet == 0) {
+        if (SetDhInternal(localDh) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Unable to set internal DH structure");
+            wolfSSL_DH_free(localDh);
+            localDh = NULL;
+        }
+    }
+
 end:
     if (memAlloced) XFREE(mem, NULL, DYNAMIC_TYPE_PEM);
     if (der != NULL) FreeDer(&der);
@@ -30956,7 +39672,359 @@
     return NULL;
 #endif
 }
-#endif
+
+#ifndef NO_FILESYSTEM
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) || defined(WOLFSSL_OPENSSH)
+/* Convert DH key parameters to DER format, write to output (outSz)
+ * If output is NULL then max expected size is set to outSz and LENGTH_ONLY_E is
+ * returned.
+ *
+ * Note : static function due to redefinition complications with DhKey and FIPS
+ * version 2 build.
+ *
+ * return bytes written on success */
+int wc_DhParamsToDer(DhKey* key, byte* out, word32* outSz)
+{
+    word32 sz = 0, idx = 0;
+    int pSz = 0, gSz = 0, ret;
+    byte scratch[MAX_LENGTH_SZ];
+
+    if (key == NULL || outSz == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    pSz = mp_unsigned_bin_size(&key->p);
+    if (pSz < 0) {
+        return pSz;
+    }
+    if (mp_leading_bit(&key->p)) {
+        pSz++;
+    }
+
+    gSz = mp_unsigned_bin_size(&key->g);
+    if (gSz < 0) {
+        return gSz;
+    }
+    if (mp_leading_bit(&key->g)) {
+        gSz++;
+    }
+
+    sz  = ASN_TAG_SZ; /* Integer */
+    sz += SetLength(pSz, scratch);
+    sz += ASN_TAG_SZ; /* Integer */
+    sz += SetLength(gSz, scratch);
+    sz += gSz + pSz;
+
+    if (out == NULL) {
+        byte seqScratch[MAX_SEQ_SZ];
+
+        *outSz = sz + SetSequence(sz, seqScratch);
+        return LENGTH_ONLY_E;
+    }
+
+    if (*outSz < MAX_SEQ_SZ || *outSz < sz) {
+        return BUFFER_E;
+    }
+
+    idx += SetSequence(sz, out);
+    if (*outSz < idx + sz) {
+        return BUFFER_E;
+    }
+
+    out[idx++] = ASN_INTEGER;
+    idx += SetLength(pSz, out + idx);
+    if (mp_leading_bit(&key->p)) {
+        out[idx++] = 0x00;
+        pSz -= 1; /* subtract 1 from size to account for leading 0 */
+    }
+    ret = mp_to_unsigned_bin(&key->p, out + idx);
+    if (ret != MP_OKAY) {
+        return BUFFER_E;
+    }
+    idx += pSz;
+
+    out[idx++] = ASN_INTEGER;
+    idx += SetLength(gSz, out + idx);
+    if (mp_leading_bit(&key->g)) {
+        out[idx++] = 0x00;
+        gSz -= 1; /* subtract 1 from size to account for leading 0 */
+    }
+    ret = mp_to_unsigned_bin(&key->g, out + idx);
+    if (ret != MP_OKAY) {
+        return BUFFER_E;
+    }
+    idx += gSz;
+    return idx;
+}
+
+int wc_DhPubKeyToDer(DhKey*  key, byte* out, word32* outSz)
+{
+    word32 sz = 0;
+    word32 paramSz = 0;
+    int ret;
+    int pubSz = 0;
+    int idx = 0;
+    byte scratch[MAX_ALGO_SZ];
+
+    /* Get size of entire key */
+
+    /*  SEQUENCE               <--| SetAlgoId
+     *      OBJECT IDENTIFIER  <--|
+     *      SEQUENCE     <--
+     *          INTEGER    | wc_DhParamsToDer
+     *          INTEGER  <--
+     */
+    ret = wc_DhParamsToDer(key, NULL, &paramSz);
+    if (ret != LENGTH_ONLY_E)
+        return ASN_PARSE_E;
+    sz += paramSz;
+    sz += SetAlgoID(DHk, scratch, oidKeyType, paramSz);
+
+    /*  BIT STRING
+     *      INTEGER
+     */
+    pubSz = mp_unsigned_bin_size(&key->pub);
+    if (pubSz < 0)
+        return pubSz;
+
+    if (mp_leading_bit(&key->pub))
+        pubSz++;
+
+    sz += ASN_TAG_SZ; /* Integer */
+    sz += SetLength(pubSz, scratch);
+    sz += pubSz;
+
+    sz += SetBitString(pubSz, 0, scratch);
+
+    if (out == NULL) {
+        /* Uppermost SEQUENCE */
+        *outSz = sz + SetSequence(sz, scratch);
+        return LENGTH_ONLY_E;
+    }
+    /* end get size of entire key */
+
+    /* Check for indexing errors */
+    if (*outSz < MAX_SEQ_SZ || *outSz < sz) {
+        return BUFFER_E;
+    }
+
+    /* Build Up Entire Key */
+
+    idx += SetSequence(sz, out);
+
+    idx += SetAlgoID(DHk, out+idx, oidKeyType, paramSz);
+    ret = wc_DhParamsToDer(key, out+idx, &paramSz);
+    if (ret < 0)
+        return ret;
+    idx += ret;
+
+    /* BIT STRING
+     *   INTEGER
+     */
+    idx += SetBitString(pubSz, 0, out+idx);
+
+    out[idx++] = ASN_INTEGER;
+    idx += SetLength(pubSz, out + idx);
+    if (mp_leading_bit(&key->pub)) {
+        out[idx++] = 0x00;
+        pubSz -= 1; /* subtract 1 from size to account for leading 0 */
+    }
+    ret = mp_to_unsigned_bin(&key->pub, out + idx);
+    if (ret != MP_OKAY) {
+        return BUFFER_E;
+    }
+    idx += pubSz;
+
+    return idx;
+}
+
+int wc_DhPrivKeyToDer(DhKey* key, byte* out, word32* outSz)
+{
+    word32 sz = 0;
+    word32 paramSz = 0;
+    int ret;
+    int privSz = 0;
+    int idx = 0;
+    byte scratch[MAX_ALGO_SZ];
+
+    /* Get size of entire key */
+
+    /*  INTEGER 0 */
+    sz += ASN_TAG_SZ; /* Integer */
+    sz += SetLength(1, scratch);
+    sz += 1;
+
+    /*  SEQUENCE               <--| SetAlgoId
+     *      OBJECT IDENTIFIER  <--|
+     *      SEQUENCE       <--
+     *          INTEGER       | wc_DhParamsToDer
+     *          INTEGER     <--
+     */
+    ret = wc_DhParamsToDer(key, NULL, &paramSz);
+    if (ret != LENGTH_ONLY_E)
+        return ASN_PARSE_E;
+    sz += paramSz;
+    sz += SetAlgoID(DHk, scratch, oidKeyType, paramSz);
+
+    /*  OCTET STRING
+     *      INTEGER
+     */
+    privSz = mp_unsigned_bin_size(&key->priv);
+    if (privSz < 0)
+        return privSz;
+    else if (privSz > 256) /* Key is larger than 2048 */
+        return ASN_VERSION_E;
+
+    if (mp_leading_bit(&key->priv))
+        privSz++;
+
+    sz += ASN_TAG_SZ; /* Integer */
+    sz += SetLength(privSz, scratch);
+    sz += privSz;
+
+    sz += SetOctetString(privSz + ASN_OCTET_STRING, scratch);
+
+    if (out == NULL) {
+        /* Uppermost SEQUENCE */
+        *outSz = sz + SetSequence(sz, scratch);
+        return LENGTH_ONLY_E;
+    }
+    /* end get size of entire key */
+
+    /* Check for indexing errors */
+    if (*outSz < MAX_SEQ_SZ || *outSz < sz) {
+        return BUFFER_E;
+    }
+
+    /* Build Up Entire Key */
+
+    idx += SetSequence(sz, out);
+
+    /* INTEGER 0 */
+    out[idx++] = ASN_INTEGER;
+    idx += SetLength(1, out+idx);
+    out[idx++] = 0;
+
+    idx += SetAlgoID(DHk, out+idx, oidKeyType, paramSz);
+    ret = wc_DhParamsToDer(key, out+idx, &paramSz);
+    if (ret < 0)
+        return ret;
+    idx += ret;
+
+    /* OCTET STRING
+     *   INTEGER
+     */
+    if (privSz == 256) {
+        idx += SetOctetString(privSz + ASN_OCTET_STRING, out+idx);
+    } else if (privSz == 128) {
+        idx += SetOctetString(privSz + ASN_OCTET_STRING-1, out+idx);
+    } else if (privSz == 64) {
+        idx += SetOctetString(privSz + ASN_OCTET_STRING-2, out+idx);
+    } else {
+        WOLFSSL_MSG("Unsupported key size");
+        return ASN_VERSION_E;
+    }
+
+    out[idx++] = ASN_INTEGER;
+    idx += SetLength(privSz, out + idx);
+    if (mp_leading_bit(&key->priv)) {
+        out[idx++] = 0x00;
+        privSz -= 1; /* subtract 1 from size to account for leading 0 */
+    }
+    ret = mp_to_unsigned_bin(&key->priv, out + idx);
+    if (ret != MP_OKAY) {
+        return BUFFER_E;
+    }
+    idx += privSz;
+
+    return idx;
+}
+
+/* Writes the DH parameters in PEM format from "dh" out to the file pointer
+ * passed in.
+ *
+ * returns WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_PEM_write_DHparams(XFILE fp, WOLFSSL_DH* dh)
+{
+    int ret;
+    word32 derSz = 0, pemSz = 0;
+    byte *der, *pem;
+    DhKey* key;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_DHparams");
+
+    if (dh == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", BAD_FUNC_ARG);
+        return WOLFSSL_FAILURE;
+    }
+
+    if (dh->inSet == 0) {
+        if (SetDhInternal(dh) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Unable to set internal DH structure");
+            return WOLFSSL_FAILURE;
+        }
+    }
+    key = (DhKey*)dh->internal;
+    ret = wc_DhParamsToDer(key, NULL, &derSz);
+    if (ret != LENGTH_ONLY_E) {
+        WOLFSSL_MSG("Failed to get size of DH params");
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", ret);
+        return WOLFSSL_FAILURE;
+    }
+
+    der = (byte*)XMALLOC(derSz, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (der == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", MEMORY_E);
+        return WOLFSSL_FAILURE;
+    }
+    ret = wc_DhParamsToDer(key, der, &derSz);
+    if (ret <= 0) {
+        WOLFSSL_MSG("Failed to export DH params");
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", ret);
+        XFREE(der, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* convert to PEM */
+    ret = wc_DerToPem(der, derSz, NULL, 0, DH_PARAM_TYPE);
+    if (ret < 0) {
+        WOLFSSL_MSG("Failed to convert DH params to PEM");
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", ret);
+        XFREE(der, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+    pemSz = (word32)ret;
+
+    pem = (byte*)XMALLOC(pemSz, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (pem == NULL) {
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", MEMORY_E);
+        XFREE(der, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+    ret = wc_DerToPem(der, derSz, pem, pemSz, DH_PARAM_TYPE);
+    XFREE(der, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret < 0) {
+        WOLFSSL_MSG("Failed to convert DH params to PEM");
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", ret);
+        XFREE(pem, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+
+    ret = (int)XFWRITE(pem, 1, pemSz, fp);
+    XFREE(pem, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret <= 0) {
+        WOLFSSL_MSG("Failed to write to file");
+        WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", ret);
+        return WOLFSSL_FAILURE;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_PEM_write_DHparams", WOLFSSL_SUCCESS);
+    return WOLFSSL_SUCCESS;
+}
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+#endif /* !NO_FILESYSTEM */
+#endif /* !NO_DH */
 
 #ifdef WOLFSSL_CERT_GEN
 
@@ -31105,7 +40173,7 @@
 
 
 #if defined(OPENSSL_EXTRA) && !defined(NO_DH)
-/* Intialize ctx->dh with dh's params. Return WOLFSSL_SUCCESS on ok */
+/* Initialize ctx->dh with dh's params. Return WOLFSSL_SUCCESS on ok */
 long wolfSSL_CTX_set_tmp_dh(WOLFSSL_CTX* ctx, WOLFSSL_DH* dh)
 {
     int pSz, gSz;
@@ -31165,7 +40233,188 @@
 }
 #endif /* HAVE_LIGHTY || HAVE_STUNNEL || WOLFSSL_MYSQL_COMPATIBLE */
 
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO)
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO) || defined(WOLFSSL_HAPROXY) \
+    || defined(WOLFSSL_NGINX) || defined(WOLFSSL_QT)
+
+long wolfSSL_ctrl(WOLFSSL* ssl, int cmd, long opt, void* pt)
+{
+    WOLFSSL_ENTER("wolfSSL_ctrl");
+    if (ssl == NULL)
+        return BAD_FUNC_ARG;
+
+    switch (cmd) {
+        #if defined(WOLFSSL_NGINX) || defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        case SSL_CTRL_SET_TLSEXT_HOSTNAME:
+            WOLFSSL_MSG("Entering Case: SSL_CTRL_SET_TLSEXT_HOSTNAME.");
+        #ifdef HAVE_SNI
+            if (pt == NULL) {
+                WOLFSSL_MSG("Passed in NULL Host Name.");
+                break;
+            }
+            return wolfSSL_set_tlsext_host_name(ssl, (const char*) pt);
+        #else
+            WOLFSSL_MSG("SNI not enabled.");
+            break;
+        #endif /* HAVE_SNI */
+        #endif /* WOLFSSL_NGINX || WOLFSSL_QT || OPENSSL_ALL */
+        default:
+            WOLFSSL_MSG("Case not implemented.");
+    }
+    (void)opt;
+    (void)pt;
+    return WOLFSSL_FAILURE;
+}
+
+long wolfSSL_CTX_ctrl(WOLFSSL_CTX* ctx, int cmd, long opt, void* pt)
+{
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+    long ctrl_opt;
+#endif
+    long ret = WOLFSSL_SUCCESS;
+
+    WOLFSSL_ENTER("wolfSSL_CTX_ctrl");
+    if (ctx == NULL)
+        return WOLFSSL_FAILURE;
+
+    switch (cmd) {
+    case SSL_CTRL_CHAIN:
+#ifdef SESSION_CERTS
+    {
+        /*
+         * We don't care about opt here because a copy of the certificate is
+         * stored anyway so increasing the reference counter is not necessary.
+         * Just check to make sure that it is set to one of the correct values.
+         */
+        WOLF_STACK_OF(WOLFSSL_X509)* sk = (WOLF_STACK_OF(WOLFSSL_X509)*) pt;
+        WOLFSSL_X509* x509;
+        int i;
+        if (opt != 0 && opt != 1) {
+            ret = WOLFSSL_FAILURE;
+            break;
+        }
+        /* Clear certificate chain */
+        FreeDer(&ctx->certChain);
+        if (sk) {
+            for (i = 0; i < wolfSSL_sk_X509_num(sk); i++) {
+                x509 = wolfSSL_sk_X509_value(sk, i);
+                /* Prevent wolfSSL_CTX_add_extra_chain_cert from freeing cert */
+                if (wolfSSL_X509_up_ref(x509) != 1) {
+                    WOLFSSL_MSG("Error increasing reference count");
+                    continue;
+                }
+                if (wolfSSL_CTX_add_extra_chain_cert(ctx, x509) !=
+                        WOLFSSL_SUCCESS) {
+                    WOLFSSL_MSG("Error adding certificate to context");
+                    /* Decrease reference count on failure */
+                    wolfSSL_X509_free(x509);
+                }
+            }
+        }
+        /* Free previous chain */
+        wolfSSL_sk_X509_free(ctx->x509Chain);
+        ctx->x509Chain = sk;
+        if (sk) {
+            for (i = 0; i < wolfSSL_sk_X509_num(sk); i++) {
+                x509 = wolfSSL_sk_X509_value(sk, i);
+                /* On successful setting of new chain up all refs */
+                if (wolfSSL_X509_up_ref(x509) != 1) {
+                    WOLFSSL_MSG("Error increasing reference count");
+                    continue;
+                }
+            }
+        }
+    }
+#else
+        WOLFSSL_MSG("Session certificates not compiled in");
+        ret = WOLFSSL_FAILURE;
+#endif
+        break;
+
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+    case SSL_CTRL_OPTIONS:
+        WOLFSSL_MSG("Entering Case: SSL_CTRL_OPTIONS.");
+        ctrl_opt = wolfSSL_CTX_set_options(ctx, opt);
+
+        #ifdef WOLFSSL_QT
+        /* Set whether to use client or server cipher preference */
+        if ((ctrl_opt & SSL_OP_CIPHER_SERVER_PREFERENCE)
+                     == SSL_OP_CIPHER_SERVER_PREFERENCE) {
+            WOLFSSL_MSG("Using Server's Cipher Preference.");
+            ctx->useClientOrder = FALSE;
+        } else {
+            WOLFSSL_MSG("Using Client's Cipher Preference.");
+            ctx->useClientOrder = TRUE;
+        }
+        #endif /* WOLFSSL_QT */
+
+        return ctrl_opt;
+#endif /* OPENSSL_EXTRA || HAVE_WEBSERVER */
+    case SSL_CTRL_EXTRA_CHAIN_CERT:
+        WOLFSSL_MSG("Entering Case: SSL_CTRL_EXTRA_CHAIN_CERT.");
+        if (pt == NULL) {
+            WOLFSSL_MSG("Passed in x509 pointer NULL.");
+            ret = WOLFSSL_FAILURE;
+            break;
+        }
+        return wolfSSL_CTX_add_extra_chain_cert(ctx, (WOLFSSL_X509*)pt);
+
+#ifndef NO_DH
+    case SSL_CTRL_SET_TMP_DH:
+        WOLFSSL_MSG("Entering Case: SSL_CTRL_SET_TMP_DH.");
+        if (pt == NULL) {
+            WOLFSSL_MSG("Passed in DH pointer NULL.");
+            ret = WOLFSSL_FAILURE;
+            break;
+        }
+        return wolfSSL_CTX_set_tmp_dh(ctx, (WOLFSSL_DH*)pt);
+#endif
+
+#ifdef HAVE_ECC
+    case SSL_CTRL_SET_TMP_ECDH:
+        WOLFSSL_MSG("Entering Case: SSL_CTRL_SET_TMP_ECDH.");
+        if (pt == NULL) {
+            WOLFSSL_MSG("Passed in ECDH pointer NULL.");
+            ret = WOLFSSL_FAILURE;
+            break;
+        }
+        return wolfSSL_SSL_CTX_set_tmp_ecdh(ctx, (WOLFSSL_EC_KEY*)pt);
+#endif
+    case SSL_CTRL_MODE:
+        wolfSSL_CTX_set_mode(ctx,opt);
+        break;
+
+    default:
+        WOLFSSL_MSG("CTX_ctrl cmd not implemented");
+        ret = WOLFSSL_FAILURE;
+        break;
+    }
+
+    (void)ctx;
+    (void)cmd;
+    (void)opt;
+    (void)pt;
+    WOLFSSL_LEAVE("wolfSSL_CTX_ctrl", (int)ret);
+    return ret;
+}
+
+#ifndef WOLFSSL_NO_STUB
+long wolfSSL_CTX_callback_ctrl(WOLFSSL_CTX* ctx, int cmd, void (*fp)(void))
+{
+    (void) ctx;
+    (void) cmd;
+    (void) fp;
+    WOLFSSL_STUB("wolfSSL_CTX_callback_ctrl");
+    return WOLFSSL_FAILURE;
+
+}
+#endif /* WOLFSSL_NO_STUB */
+
+#ifndef NO_WOLFSSL_STUB
+long wolfSSL_CTX_clear_extra_chain_certs(WOLFSSL_CTX* ctx)
+{
+    return wolfSSL_CTX_ctrl(ctx, SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS, 0l, NULL);
+}
+#endif
 
 /* Returns the verifyCallback from the ssl structure if successful.
 Returns NULL otherwise. */
@@ -31231,7 +40480,8 @@
 }
 
 
-#if !defined(NO_RSA)
+#if !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \
+    !defined(NO_RSA) && !defined(HAVE_USER_RSA)
 /* Converts an rsa key from a bio buffer into an internal rsa structure.
 Returns a pointer to the new WOLFSSL_RSA structure. */
 WOLFSSL_RSA* wolfSSL_d2i_RSAPrivateKey_bio(WOLFSSL_BIO *bio, WOLFSSL_RSA **out)
@@ -31268,7 +40518,8 @@
 
     bufPtr = maxKeyBuf;
     if (wolfSSL_BIO_read(bio, (unsigned char*)bioMem, (int)bioMemSz) == bioMemSz) {
-        if ((key = wolfSSL_d2i_RSAPrivateKey(NULL, &bioMem, bioMemSz)) == NULL) {
+        const byte* bioMemPt = bioMem; /* leave bioMem pointer unaltered */
+        if ((key = wolfSSL_d2i_RSAPrivateKey(NULL, &bioMemPt, bioMemSz)) == NULL) {
             XFREE((unsigned char*)bioMem, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return NULL;
         }
@@ -31282,10 +40533,10 @@
             extraBioMem = (unsigned char *)XMALLOC(extraBioMemSz, NULL,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
             if (extraBioMem == NULL) {
-                WOLFSSL_MSG("Malloc failure");;
-                XFREE((unsigned char*)extraBioMem, bio->heap, 
+                WOLFSSL_MSG("Malloc failure");
+                XFREE((unsigned char*)extraBioMem, bio->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-                XFREE((unsigned char*)bioMem, bio->heap, 
+                XFREE((unsigned char*)bioMem, bio->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
                 return NULL;
             }
@@ -31298,13 +40549,13 @@
             wolfSSL_BIO_write(bio, extraBioMem, extraBioMemSz);
             if (wolfSSL_BIO_pending(bio) <= 0) {
                 WOLFSSL_MSG("Failed to write memory to bio");
-                XFREE((unsigned char*)extraBioMem, bio->heap, 
+                XFREE((unsigned char*)extraBioMem, bio->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-                XFREE((unsigned char*)bioMem, bio->heap, 
+                XFREE((unsigned char*)bioMem, bio->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
                 return NULL;
             }
-            XFREE((unsigned char*)extraBioMem, bio->heap, 
+            XFREE((unsigned char*)extraBioMem, bio->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
         }
 
@@ -31320,12 +40571,12 @@
 
 /* Adds the ASN1 certificate to the user ctx.
 Returns WOLFSSL_SUCCESS if no error, returns WOLFSSL_FAILURE otherwise.*/
-int wolfSSL_CTX_use_certificate_ASN1(WOLFSSL_CTX *ctx, int derSz, 
+int wolfSSL_CTX_use_certificate_ASN1(WOLFSSL_CTX *ctx, int derSz,
                                                        const unsigned char *der)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_use_certificate_ASN1()");
     if (der != NULL && ctx != NULL) {
-        if (wolfSSL_CTX_use_certificate_buffer(ctx, der, derSz, 
+        if (wolfSSL_CTX_use_certificate_buffer(ctx, der, derSz,
                                       WOLFSSL_FILETYPE_ASN1) == WOLFSSL_SUCCESS) {
             return WOLFSSL_SUCCESS;
         }
@@ -31335,7 +40586,8 @@
 }
 
 
-#if !defined(NO_RSA) && !defined(HAVE_FAST_RSA)
+#if !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \
+    !defined(NO_RSA) && !defined(HAVE_USER_RSA)
 /* Adds the rsa private key to the user ctx.
 Returns WOLFSSL_SUCCESS if no error, returns WOLFSSL_FAILURE otherwise.*/
 int wolfSSL_CTX_use_RSAPrivateKey(WOLFSSL_CTX* ctx, WOLFSSL_RSA* rsa)
@@ -31357,7 +40609,7 @@
         WOLFSSL_MSG("wolfSSL_i2d_RSAPrivateKey() failure");
         return WOLFSSL_FAILURE;
     }
-    ret = wolfSSL_CTX_use_PrivateKey_buffer(ctx, (const unsigned char*)maxDerBuf, 
+    ret = wolfSSL_CTX_use_PrivateKey_buffer(ctx, (const unsigned char*)maxDerBuf,
                                                     derSize, SSL_FILETYPE_ASN1);
     if (ret != WOLFSSL_SUCCESS) {
         WOLFSSL_MSG("wolfSSL_CTX_USE_PrivateKey_buffer() failure");
@@ -31440,7 +40692,7 @@
                                                        DYNAMIC_TYPE_TMP_BUFFER);
         }
 
-        if (out != NULL && key != NULL) {
+        if (out != NULL) {
             *out = key;
         }
     }
@@ -31461,7 +40713,7 @@
 
     WOLFSSL_ENTER("wolfSSL_d2i_PrivateKey_EVP()");
 
-    if (in == NULL || inSz < 0) {
+    if (in == NULL || *in == NULL || inSz < 0) {
         WOLFSSL_MSG("Bad argument");
         return NULL;
     }
@@ -31476,7 +40728,7 @@
         if (wc_InitRsaKey(&rsa, NULL) == 0 &&
             wc_RsaPrivateKeyDecode(mem, &keyIdx, &rsa, (word32)memSz) == 0) {
             wc_FreeRsaKey(&rsa);
-            pkey = wolfSSL_PKEY_new();
+            pkey = wolfSSL_EVP_PKEY_new();
             if (pkey != NULL) {
                 pkey->pkey_sz = keyIdx;
                 pkey->pkey.ptr = (char*)XMALLOC(memSz, NULL,
@@ -31521,7 +40773,7 @@
         if (wc_ecc_init(&ecc) == 0 &&
             wc_EccPrivateKeyDecode(mem, &keyIdx, &ecc, (word32)memSz) == 0) {
             wc_ecc_free(&ecc);
-            pkey = wolfSSL_PKEY_new();
+            pkey = wolfSSL_EVP_PKEY_new();
             if (pkey != NULL) {
                 pkey->pkey_sz = keyIdx;
                 pkey->pkey.ptr = (char*)XMALLOC(keyIdx, NULL,
@@ -31543,12 +40795,13 @@
     #endif /* HAVE_ECC */
     return pkey;
 }
-#endif /* OPENSSL_ALL || WOLFSSL_ASIO */
+#endif /* OPENSSL_ALL || WOLFSSL_ASIO || WOLFSSL_HAPROXY || WOLFSSL_QT */
 
 
 /* stunnel compatibility functions*/
-#if defined(OPENSSL_ALL) || (defined(OPENSSL_EXTRA) && (defined(HAVE_STUNNEL) \
-                          || defined(WOLFSSL_NGINX) || defined(HAVE_LIGHTY)))
+#if defined(OPENSSL_ALL) || (defined(OPENSSL_EXTRA) && (defined(HAVE_STUNNEL) || \
+                             defined(WOLFSSL_NGINX) || defined(HAVE_LIGHTY) || \
+                             defined(WOLFSSL_HAPROXY) || defined(WOLFSSL_OPENSSH)))
 void wolfSSL_ERR_remove_thread_state(void* pid)
 {
     (void) pid;
@@ -31557,7 +40810,7 @@
 
 #ifndef NO_FILESYSTEM
 /***TBD ***/
-void wolfSSL_print_all_errors_fp(XFILE *fp)
+void wolfSSL_print_all_errors_fp(XFILE fp)
 {
     (void)fp;
 }
@@ -31567,9 +40820,8 @@
 {
     WOLFSSL_ENTER("wolfSSL_SESSION_set_ex_data");
 #ifdef HAVE_EX_DATA
-    if(session != NULL && idx < MAX_EX_DATA) {
-        session->ex_data[idx] = data;
-        return WOLFSSL_SUCCESS;
+    if(session != NULL) {
+        return wolfSSL_CRYPTO_set_ex_data(&session->ex_data, idx, data);
     }
 #else
     (void)session;
@@ -31602,8 +40854,9 @@
 {
     WOLFSSL_ENTER("wolfSSL_SESSION_get_ex_data");
 #ifdef HAVE_EX_DATA
-    if (session != NULL && idx < MAX_EX_DATA && idx >= 0)
-        return session->ex_data[idx];
+    if (session != NULL) {
+        return wolfSSL_CRYPTO_get_ex_data(&session->ex_data, idx);
+    }
 #else
     (void)session;
     (void)idx;
@@ -31670,29 +40923,6 @@
 }
 
 #ifndef NO_WOLFSSL_STUB
-unsigned long wolfSSL_ERR_peek_last_error(void)
-{
-    WOLFSSL_ENTER("wolfSSL_ERR_peek_last_error");
-
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
-    {
-        int ret;
-
-        if ((ret = wc_PeekErrorNode(-1, NULL, NULL, NULL)) < 0) {
-            WOLFSSL_MSG("Issue peeking at error node in queue");
-            return 0;
-        }
-        if (ret == -ASN_NO_PEM_HEADER)
-            return (ERR_LIB_PEM << 24) | PEM_R_NO_START_LINE;
-        return (unsigned long)ret;
-    }
-#else
-    return (unsigned long)(0 - NOT_COMPILED_IN);
-#endif
-}
-#endif
-
-#ifndef NO_WOLFSSL_STUB
 int wolfSSL_FIPS_mode(void)
 {
     WOLFSSL_ENTER("wolfSSL_FIPS_mode");
@@ -31730,37 +40960,559 @@
 {
     int ret = WOLFSSL_FAILURE;
     WOLFSSL_ENTER("wolfSSL_CIPHER_get_bits");
-    if(c != NULL && c->ssl != NULL) {
+
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    (void)alg_bits;
+    if (c!= NULL)
+        ret = c->bits;
+    #else
+    if (c != NULL && c->ssl != NULL) {
         ret = 8 * c->ssl->specs.key_size;
-        if(alg_bits != NULL) {
+        if (alg_bits != NULL) {
             *alg_bits = ret;
         }
     }
-    return ret;
-}
-
-int wolfSSL_sk_X509_NAME_num(const WOLF_STACK_OF(WOLFSSL_X509_NAME) *s)
+    #endif
+    return ret;
+}
+
+#if defined(OPENSSL_ALL)
+WOLFSSL_X509_INFO* wolfSSL_X509_INFO_new(void)
+{
+    WOLFSSL_X509_INFO* info;
+    info = (WOLFSSL_X509_INFO*)XMALLOC(sizeof(WOLFSSL_X509_INFO), NULL,
+        DYNAMIC_TYPE_X509);
+    if (info) {
+        XMEMSET(info, 0, sizeof(*info));
+    }
+    return info;
+}
+
+void wolfSSL_X509_INFO_free(WOLFSSL_X509_INFO* info)
+{
+    if (info == NULL)
+        return;
+
+    if (info->x509) {
+        wolfSSL_X509_free(info->x509);
+        info->x509 = NULL;
+    }
+#ifdef HAVE_CRL
+    if (info->crl) {
+        wolfSSL_X509_CRL_free(info->crl);
+        info->crl = NULL;
+    }
+#endif
+    wolfSSL_X509_PKEY_free(info->x_pkey);
+    info->x_pkey = NULL;
+
+    XFREE(info, NULL, DYNAMIC_TYPE_X509);
+}
+#endif
+
+WOLFSSL_STACK* wolfSSL_sk_X509_INFO_new_null(void)
+{
+    WOLFSSL_STACK* sk = wolfSSL_sk_new_node(NULL);
+    if (sk) {
+        sk->type = STACK_TYPE_X509_INFO;
+    }
+    return sk;
+}
+
+
+/* returns value less than 0 on fail to match
+ * On a successful match the priority level found is returned
+ */
+int wolfSSL_sk_SSL_CIPHER_find(
+        WOLF_STACK_OF(WOLFSSL_CIPHER)* sk, const WOLFSSL_CIPHER* toFind)
+{
+    WOLFSSL_STACK* next;
+    int i, sz;
+
+    if (sk == NULL || toFind == NULL) {
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    sz   = wolfSSL_sk_SSL_CIPHER_num(sk);
+    next = sk;
+    for (i = 0; i < sz && next != NULL; i++) {
+        if (next->data.cipher.cipherSuite0 == toFind->cipherSuite0 &&
+                next->data.cipher.cipherSuite == toFind->cipherSuite) {
+            return sz - i; /* reverse because stack pushed highest on first */
+        }
+        next = next->next;
+    }
+    return WOLFSSL_FATAL_ERROR;
+}
+
+
+/* copies over data of "in" to "out" */
+static void wolfSSL_CIPHER_copy(WOLFSSL_CIPHER* in, WOLFSSL_CIPHER* out)
+{
+    if (in == NULL || out == NULL)
+        return;
+
+    out->cipherSuite  = in->cipherSuite;
+    out->cipherSuite0 = in->cipherSuite0;
+}
+
+
+/* create duplicate of stack and return the new stack
+ * returns null on failure */
+WOLF_STACK_OF(WOLFSSL_CIPHER)* wolfSSL_sk_SSL_CIPHER_dup(
+        WOLF_STACK_OF(WOLFSSL_CIPHER)* in)
+{
+    WOLFSSL_STACK* current;
+    WOLF_STACK_OF(WOLFSSL_CIPHER)* ret = NULL;
+    int i, sz;
+
+    sz = wolfSSL_sk_SSL_CIPHER_num(in);
+    current = in;
+    for (i = 0; i < sz && current != NULL; i++) {
+        WOLFSSL_STACK* add = wolfSSL_sk_new_node(in->heap);
+        if (add != NULL) {
+            add->type = STACK_TYPE_CIPHER;
+            wolfSSL_CIPHER_copy(&(current->data.cipher), &(add->data.cipher));
+            add->num = i+1;
+            add->next = ret;
+            ret = add;
+            current = current->next;
+        }
+    }
+    return ret;
+}
+
+/* nothing to do yet */
+static void wolfSSL_CIPHER_free(WOLFSSL_CIPHER* in)
+{
+    (void)in;
+}
+
+
+/* free's all nodes in the stack and there data */
+void wolfSSL_sk_SSL_CIPHER_free(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk)
+{
+    WOLFSSL_STACK* current = sk;
+
+    while (current != NULL) {
+        WOLFSSL_STACK* toFree = current;
+        current = current->next;
+
+        wolfSSL_CIPHER_free(&(toFree->data.cipher));
+        wolfSSL_sk_free_node(toFree);
+    }
+}
+
+
+int wolfSSL_sk_X509_INFO_num(const WOLF_STACK_OF(WOLFSSL_X509_INFO) *sk)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_INFO_num");
+
+    if (sk == NULL)
+        return -1;
+    return (int)sk->num;
+}
+
+WOLFSSL_X509_INFO* wolfSSL_sk_X509_INFO_value(const WOLF_STACK_OF(WOLFSSL_X509_INFO) *sk, int i)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_INFO_value");
+
+    for (; sk != NULL && i > 0; i--)
+        sk = sk->next;
+
+    if (i != 0 || sk == NULL)
+        return NULL;
+    return sk->data.info;
+}
+
+WOLFSSL_X509_INFO* wolfSSL_sk_X509_INFO_pop(WOLF_STACK_OF(WOLFSSL_X509_INFO)* sk)
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_X509_INFO* info;
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    node = sk->next;
+    info = sk->data.info;
+
+    if (node != NULL) { /* update sk and remove node from stack */
+        sk->data.info = node->data.info;
+        sk->next = node->next;
+        XFREE(node, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+    else { /* last x509 in stack */
+        sk->data.info = NULL;
+    }
+
+    if (sk->num > 0) {
+        sk->num -= 1;
+    }
+
+    return info;
+}
+
+#if defined(OPENSSL_ALL)
+void wolfSSL_sk_X509_INFO_pop_free(WOLF_STACK_OF(WOLFSSL_X509_INFO)* sk,
+    void (*f) (WOLFSSL_X509_INFO*))
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_sk_X509_INFO_pop_free");
+
+    if (sk == NULL) {
+        return;
+    }
+
+    /* parse through stack freeing each node */
+    node = sk->next;
+    while (node && sk->num > 1) {
+        WOLFSSL_STACK* tmp = node;
+        node = node->next;
+
+        if (f)
+            f(tmp->data.info);
+        else
+            wolfSSL_X509_INFO_free(tmp->data.info);
+        tmp->data.info = NULL;
+        XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+        sk->num -= 1;
+    }
+
+    /* free head of stack */
+    if (sk->num == 1) {
+        if (f)
+            f(sk->data.info);
+        else
+            wolfSSL_X509_INFO_free(sk->data.info);
+        sk->data.info = NULL;
+    }
+    XFREE(sk, NULL, DYNAMIC_TYPE_OPENSSL);
+}
+
+void wolfSSL_sk_X509_INFO_free(WOLF_STACK_OF(WOLFSSL_X509_INFO) *sk)
+{
+    wolfSSL_sk_X509_INFO_pop_free(sk, NULL);
+}
+
+
+/* Adds the WOLFSSL_X509_INFO to the stack "sk". "sk" takes control of "in" and
+ * tries to free it when the stack is free'd.
+ *
+ * return 1 on success 0 on fail
+ */
+int wolfSSL_sk_X509_INFO_push(WOLF_STACK_OF(WOLFSSL_X509_INFO)* sk,
+                                                      WOLFSSL_X509_INFO* in)
+{
+    WOLFSSL_STACK* node;
+
+    if (sk == NULL || in == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* no previous values in stack */
+    if (sk->data.info == NULL) {
+        sk->data.info = in;
+        sk->num += 1;
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* stack already has value(s) create a new node and add more */
+    node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+            DYNAMIC_TYPE_X509);
+    if (node == NULL) {
+        WOLFSSL_MSG("Memory error");
+        return WOLFSSL_FAILURE;
+    }
+    XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+    /* push new obj onto head of stack */
+    node->data.info = sk->data.info;
+    node->next      = sk->next;
+    node->type      = sk->type;
+    sk->next        = node;
+    sk->data.info   = in;
+    sk->num        += 1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_sk_X509_NAME_new(wolf_sk_compare_cb cb)
+{
+    WOLFSSL_STACK* sk;
+
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_new");
+
+    sk = wolfSSL_sk_new_node(NULL);
+    if (sk != NULL) {
+        sk->type = STACK_TYPE_X509_NAME;
+        sk->comp = cb;
+    }
+
+    return sk;
+}
+
+int wolfSSL_sk_X509_NAME_push(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk,
+    WOLFSSL_X509_NAME* name)
+{
+    WOLFSSL_STACK* node;
+
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_push");
+
+    if (sk == NULL || name == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* no previous values in stack */
+    if (sk->data.name == NULL) {
+        sk->data.name = name;
+        sk->num += 1;
+        return 0;
+    }
+
+    /* stack already has value(s) create a new node and add more */
+    node = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+        DYNAMIC_TYPE_OPENSSL);
+    if (node == NULL) {
+        WOLFSSL_MSG("Memory error");
+        return MEMORY_E;
+    }
+    XMEMSET(node, 0, sizeof(WOLFSSL_STACK));
+
+    /* push new obj onto head of stack */
+    node->data.name = sk->data.name;
+    node->next      = sk->next;
+    sk->type        = STACK_TYPE_X509_NAME;
+    sk->next        = node;
+    sk->data.name   = name;
+    sk->num        += 1;
+
+    return 0;
+}
+
+/* return index of found, or negative to indicate not found */
+int wolfSSL_sk_X509_NAME_find(const WOLF_STACK_OF(WOLFSSL_X509_NAME) *sk,
+    WOLFSSL_X509_NAME *name)
+{
+    int i;
+
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_find");
+
+    if (sk == NULL)
+        return BAD_FUNC_ARG;
+
+    for (i = 0; sk; i++, sk = sk->next) {
+        if (wolfSSL_X509_NAME_cmp(sk->data.name, name) == 0) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+int wolfSSL_sk_X509_OBJECT_num(const WOLF_STACK_OF(WOLFSSL_X509_OBJECT) *s)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_OBJECT_num");
+    if (s) {
+        return (int)s->num;
+    } else {
+        return 0;
+    }
+}
+
+
+int wolfSSL_sk_X509_NAME_set_cmp_func(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk,
+    wolf_sk_compare_cb cb)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_set_cmp_func");
+
+    if (sk == NULL)
+        return BAD_FUNC_ARG;
+
+    sk->comp = cb;
+    return 0;
+}
+#endif /* OPENSSL_ALL */
+
+int wolfSSL_sk_X509_NAME_num(const WOLF_STACK_OF(WOLFSSL_X509_NAME) *sk)
 {
     WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_num");
 
-    if (s == NULL)
-        return -1;
-    return (int)s->num;
-}
-
-
-int wolfSSL_sk_X509_num(const WOLF_STACK_OF(WOLFSSL_X509) *s)
-{
-    WOLFSSL_ENTER("wolfSSL_sk_X509_num");
-
-    if (s == NULL)
-        return -1;
-    return (int)s->num;
-}
-
+    if (sk == NULL)
+        return BAD_FUNC_ARG;
+
+    return (int)sk->num;
+}
+
+/* Getter function for WOLFSSL_X509_NAME pointer
+ *
+ * sk is the stack to retrieve pointer from
+ * i  is the index value in stack
+ *
+ * returns a pointer to a WOLFSSL_X509_NAME structure on success and NULL on
+ *         fail
+ */
+WOLFSSL_X509_NAME* wolfSSL_sk_X509_NAME_value(const STACK_OF(WOLFSSL_X509_NAME)* sk,
+    int i)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_value");
+
+    for (; sk != NULL && i > 0; i--) {
+        sk = sk->next;
+    }
+
+    if (i != 0 || sk == NULL)
+        return NULL;
+
+    return sk->data.name;
+}
+
+WOLFSSL_X509_NAME* wolfSSL_sk_X509_NAME_pop(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk)
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_X509_NAME* name;
+
+    if (sk == NULL) {
+        return NULL;
+    }
+
+    node = sk->next;
+    name = sk->data.name;
+
+    if (node != NULL) { /* update sk and remove node from stack */
+        sk->data.name = node->data.name;
+        sk->next = node->next;
+        XFREE(node, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+    else { /* last x509 in stack */
+        sk->data.name = NULL;
+    }
+
+    if (sk->num > 0) {
+        sk->num -= 1;
+    }
+
+    return name;
+}
+
+void wolfSSL_sk_X509_NAME_pop_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk,
+    void (*f) (WOLFSSL_X509_NAME*))
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_pop_free");
+
+    if (sk == NULL)
+        return;
+
+    node = sk->next;
+    while (node && sk->num > 1) {
+        WOLFSSL_STACK* tmp = node;
+        node = node->next;
+        if (f)
+            f(tmp->data.name);
+        else
+            wolfSSL_X509_NAME_free(tmp->data.name);
+        tmp->data.name = NULL;
+        XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+        sk->num -= 1;
+    }
+
+    /* free head of stack */
+    if (sk->num == 1) {
+        if (f)
+            f(sk->data.name);
+        else
+            wolfSSL_X509_NAME_free(sk->data.name);
+        sk->data.name = NULL;
+    }
+
+    XFREE(sk, sk->heap, DYNAMIC_TYPE_OPENSSL);
+}
+
+/* Free only the sk structure, NOT X509_NAME members */
+void wolfSSL_sk_X509_NAME_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk)
+{
+    WOLFSSL_STACK* node;
+    WOLFSSL_ENTER("wolfSSL_sk_X509_NAME_free");
+
+    if (sk == NULL)
+        return;
+
+    node = sk->next;
+    while (sk->num > 1) {
+        WOLFSSL_STACK* tmp = node;
+        node = node->next;
+        XFREE(tmp, NULL, DYNAMIC_TYPE_OPENSSL);
+        sk->num -= 1;
+    }
+
+    XFREE(sk, sk->heap, DYNAMIC_TYPE_OPENSSL);
+}
+
+#if defined(WOLFSSL_APACHE_HTTPD) || defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
+/* Helper function for X509_NAME_print_ex. Sets *buf to string for domain
+   name attribute based on NID. Returns size of buf */
+static int get_dn_attr_by_nid(int n, const char** buf)
+{
+    int len = 0;
+    const char *str;
+
+    switch(n)
+    {
+        case NID_commonName :
+            str = "CN";
+            len = 2;
+            break;
+        case NID_countryName:
+            str = "C";
+            len = 1;
+            break;
+        case NID_localityName:
+            str = "L";
+            len = 1;
+            break;
+        case NID_stateOrProvinceName:
+            str = "ST";
+            len = 2;
+            break;
+        case NID_organizationName:
+            str = "O";
+            len = 1;
+            break;
+        case NID_organizationalUnitName:
+            str = "OU";
+            len = 2;
+            break;
+        case NID_emailAddress:
+            str = "emailAddress";
+            len = 12;
+            break;
+        default:
+            WOLFSSL_MSG("Attribute type not found");
+            str = NULL;
+
+    }
+    if (buf != NULL)
+        *buf = str;
+    return len;
+}
+#endif
+
+/*
+ * The BIO output of  wolfSSL_X509_NAME_print_ex does NOT include the null terminator
+ */
 int wolfSSL_X509_NAME_print_ex(WOLFSSL_BIO* bio, WOLFSSL_X509_NAME* name,
                 int indent, unsigned long flags)
 {
+#if defined(WOLFSSL_APACHE_HTTPD) || defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
+    int count = 0, len = 0, totalSz = 0, tmpSz = 0;
+    char tmp[ASN_NAME_MAX];
+    char fullName[ASN_NAME_MAX];
+    const char *buf = NULL;
+    WOLFSSL_X509_NAME_ENTRY* ne;
+    WOLFSSL_ASN1_STRING* str;
+#endif
     int i;
     (void)flags;
     WOLFSSL_ENTER("wolfSSL_X509_NAME_print_ex");
@@ -31770,12 +41522,54 @@
             return WOLFSSL_FAILURE;
     }
 
+#if defined(WOLFSSL_APACHE_HTTPD) || defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX)
+    /* If XN_FLAG_DN_REV is present, print X509_NAME in reverse order */
+    if (flags == (XN_FLAG_RFC2253 & ~XN_FLAG_DN_REV)) {
+        fullName[0] = '\0';
+        count = wolfSSL_X509_NAME_entry_count(name);
+        for (i = 0; i < count; i++) {
+            ne = wolfSSL_X509_NAME_get_entry(name, count - i - 1);
+            if (ne == NULL)
+                return WOLFSSL_FAILURE;
+
+            str = wolfSSL_X509_NAME_ENTRY_get_data(ne);
+            if (str == NULL)
+                return WOLFSSL_FAILURE;
+
+            len = get_dn_attr_by_nid(ne->nid, &buf);
+            if (len == 0 || buf == NULL)
+                return WOLFSSL_FAILURE;
+
+            tmpSz = str->length + len + 2; /* + 2 for '=' and comma */
+            if (tmpSz > ASN_NAME_MAX) {
+                WOLFSSL_MSG("Size greater than ASN_NAME_MAX");
+                return WOLFSSL_FAILURE;
+            }
+
+            if (i < count - 1) {
+                /* tmpSz+1 for last null char */
+                XSNPRINTF(tmp, tmpSz+1, "%s=%s,", buf, str->data);
+                XSTRNCAT(fullName, tmp, tmpSz);
+            }
+            else {
+                XSNPRINTF(tmp, tmpSz, "%s=%s", buf, str->data);
+                XSTRNCAT(fullName, tmp, tmpSz-1);
+                tmpSz--; /* Don't include null char in tmpSz */
+            }
+            totalSz += tmpSz;
+        }
+        if (wolfSSL_BIO_write(bio, fullName, totalSz) != totalSz)
+            return WOLFSSL_FAILURE;
+        return WOLFSSL_SUCCESS;
+    }
+#else
     if (flags == XN_FLAG_RFC2253) {
         if (wolfSSL_BIO_write(bio, name->name + 1, name->sz - 2)
                                                                 != name->sz - 2)
             return WOLFSSL_FAILURE;
     }
-    else if (wolfSSL_BIO_write(bio, name->name, name->sz) != name->sz)
+#endif /* WOLFSSL_APACHE_HTTPD || OPENSSL_ALL || WOLFSSL_NGINX */
+    else if (wolfSSL_BIO_write(bio, name->name, name->sz - 1) != name->sz - 1)
         return WOLFSSL_FAILURE;
 
     return WOLFSSL_SUCCESS;
@@ -31813,10 +41607,13 @@
             case SSLv3_MINOR :
                 return SSL3_VERSION;
             case TLSv1_MINOR :
+                return TLS1_VERSION;
             case TLSv1_1_MINOR :
+                return TLS1_1_VERSION;
             case TLSv1_2_MINOR :
+                return TLS1_2_VERSION;
             case TLSv1_3_MINOR :
-                return TLS1_VERSION;
+                return TLS1_3_VERSION;
             default:
                 return WOLFSSL_FAILURE;
         }
@@ -31824,8 +41621,9 @@
     else if (ssl->version.major == DTLS_MAJOR) {
         switch (ssl->version.minor) {
             case DTLS_MINOR :
+                return DTLS1_VERSION;
             case DTLSv1_2_MINOR :
-                return DTLS1_VERSION;
+                return DTLS1_2_VERSION;
             default:
                 return WOLFSSL_FAILURE;
         }
@@ -31834,12 +41632,6 @@
 }
 
 
-WOLFSSL_CTX* wolfSSL_get_SSL_CTX(WOLFSSL* ssl)
-{
-    WOLFSSL_ENTER("wolfSSL_get_SSL_CTX");
-    return ssl->ctx;
-}
-
 int wolfSSL_X509_NAME_get_sz(WOLFSSL_X509_NAME* name)
 {
     WOLFSSL_ENTER("wolfSSL_X509_NAME_get_sz");
@@ -31902,16 +41694,19 @@
     WOLFSSL_ENTER("wolfSSL_CTX_set_tlsext_servername_callback");
     if (ctx) {
         ctx->sniRecvCb = cb;
-        return 1;
-    }
-    return 0;
-}
-
-void wolfSSL_CTX_set_servername_arg(WOLFSSL_CTX* ctx, void* arg)
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
+}
+
+int wolfSSL_CTX_set_servername_arg(WOLFSSL_CTX* ctx, void* arg)
 {
     WOLFSSL_ENTER("wolfSSL_CTX_set_servername_arg");
-    if (ctx)
+    if (ctx) {
         ctx->sniRecvCbArg = arg;
+        return WOLFSSL_SUCCESS;
+    }
+    return WOLFSSL_FAILURE;
 }
 
 void wolfSSL_ERR_load_BIO_strings(void) {
@@ -31942,8 +41737,8 @@
 
 
 #ifndef NO_WOLFSSL_STUB
-WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_X509_STORE_get1_certs(WOLFSSL_X509_STORE_CTX* ctx,
-                                                WOLFSSL_X509_NAME* name)
+WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_X509_STORE_get1_certs(
+    WOLFSSL_X509_STORE_CTX* ctx, WOLFSSL_X509_NAME* name)
 {
     WOLFSSL_ENTER("wolfSSL_X509_STORE_get1_certs");
     WOLFSSL_STUB("X509_STORE_get1_certs");
@@ -31951,14 +41746,82 @@
     (void)name;
     return NULL;
 }
-#endif
+
+WOLF_STACK_OF(WOLFSSL_X509_OBJECT)* wolfSSL_X509_STORE_get0_objects(
+    WOLFSSL_X509_STORE* store)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_STORE_get0_objects");
+    WOLFSSL_STUB("wolfSSL_X509_STORE_get0_objects");
+    (void)store;
+    return NULL;
+}
+
+WOLFSSL_X509_OBJECT* wolfSSL_sk_X509_OBJECT_delete(
+    WOLF_STACK_OF(WOLFSSL_X509_OBJECT)* sk, int i)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_OBJECT_delete");
+    WOLFSSL_STUB("wolfSSL_sk_X509_OBJECT_delete");
+    (void)sk;
+    (void)i;
+    return NULL;
+}
+
+void wolfSSL_X509_OBJECT_free(WOLFSSL_X509_OBJECT *a)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_OBJECT_free");
+    WOLFSSL_STUB("wolfSSL_X509_OBJECT_free");
+    (void)a;
+}
+
+#endif
+
 
 #endif /* OPENSSL_ALL || (OPENSSL_EXTRA && (HAVE_STUNNEL || WOLFSSL_NGINX || HAVE_LIGHTY)) */
 
 
+#if defined(OPENSSL_EXTRA)
+
+int wolfSSL_sk_X509_num(const WOLF_STACK_OF(WOLFSSL_X509) *s)
+{
+    WOLFSSL_ENTER("wolfSSL_sk_X509_num");
+
+    if (s == NULL)
+        return -1;
+    return (int)s->num;
+}
+
+unsigned long wolfSSL_ERR_peek_last_error(void)
+{
+    WOLFSSL_ENTER("wolfSSL_ERR_peek_last_error");
+
+#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_NGINX)
+    {
+        int ret;
+
+        if ((ret = wc_PeekErrorNode(-1, NULL, NULL, NULL)) < 0) {
+            WOLFSSL_MSG("Issue peeking at error node in queue");
+            return 0;
+        }
+        if (ret == -ASN_NO_PEM_HEADER)
+            return (ERR_LIB_PEM << 24) | PEM_R_NO_START_LINE;
+        return (unsigned long)ret;
+    }
+#else
+    return (unsigned long)(0 - NOT_COMPILED_IN);
+#endif
+}
+
+#endif /* OPENSSL_EXTRA */
+
+WOLFSSL_CTX* wolfSSL_get_SSL_CTX(WOLFSSL* ssl)
+{
+    WOLFSSL_ENTER("wolfSSL_get_SSL_CTX");
+    return ssl->ctx;
+}
+
 #if defined(OPENSSL_ALL) || \
-    (defined(OPENSSL_EXTRA) && (defined(HAVE_STUNNEL) || \
-     defined(WOLFSSL_NGINX)) || defined(WOLFSSL_HAPROXY))
+    defined(OPENSSL_EXTRA) || defined(HAVE_STUNNEL) || \
+    defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
 
 const byte* wolfSSL_SESSION_get_id(WOLFSSL_SESSION* sess, unsigned int* idLen)
 {
@@ -31970,7 +41833,187 @@
     *idLen = sess->sessionIDSz;
     return sess->sessionID;
 }
-#endif
+
+#if (defined(HAVE_SESSION_TICKET) || defined(SESSION_CERTS)) && \
+    !defined(NO_FILESYSTEM)
+
+#if defined(SESSION_CERTS) || \
+   (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
+/* returns a pointer to the protocol used by the session */
+static const char* wolfSSL_SESSION_get_protocol(const WOLFSSL_SESSION* in)
+{
+    return wolfSSL_internal_get_version((ProtocolVersion*)&in->version);
+}
+#endif
+
+/* returns true (non 0) if the session has EMS (extended master secret) */
+static int wolfSSL_SESSION_haveEMS(const WOLFSSL_SESSION* in)
+{
+    if (in == NULL)
+        return 0;
+    return in->haveEMS;
+}
+
+#if defined(HAVE_SESSION_TICKET)
+/* prints out the ticket to bio passed in
+ * return WOLFSSL_SUCCESS on success
+ */
+static int wolfSSL_SESSION_print_ticket(WOLFSSL_BIO* bio,
+        const WOLFSSL_SESSION* in, const char* tab)
+{
+    unsigned short i, j, z, sz;
+    short tag = 0;
+    byte* pt;
+
+
+    if (in == NULL || bio == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    sz = in->ticketLen;
+    pt = in->ticket;
+
+    if (wolfSSL_BIO_printf(bio, "%s\n", (sz == 0)? " NONE": "") <= 0)
+        return WOLFSSL_FAILURE;
+
+    for (i = 0; i < sz;) {
+        char asc[16];
+
+        if (sz - i < 16) {
+            if (wolfSSL_BIO_printf(bio, "%s%04X -", tab, tag + (sz - i)) <= 0)
+                return WOLFSSL_FAILURE;
+        }
+        else {
+            if (wolfSSL_BIO_printf(bio, "%s%04X -", tab, tag) <= 0)
+                return WOLFSSL_FAILURE;
+        }
+        for (j = 0; i < sz && j < 8; j++,i++) {
+            asc[j] =  ((pt[i])&0x6f)>='A'?((pt[i])&0x6f):'.';
+            if (wolfSSL_BIO_printf(bio, " %02X", pt[i]) <= 0)
+                return WOLFSSL_FAILURE;
+        }
+
+        if (i < sz) {
+            asc[j] =  ((pt[i])&0x6f)>='A'?((pt[i])&0x6f):'.';
+            if (wolfSSL_BIO_printf(bio, "-%02X", pt[i]) <= 0)
+                return WOLFSSL_FAILURE;
+            j++;
+            i++;
+        }
+
+        for (; i < sz && j < 16; j++,i++) {
+            asc[j] =  ((pt[i])&0x6f)>='A'?((pt[i])&0x6f):'.';
+            if (wolfSSL_BIO_printf(bio, " %02X", pt[i]) <= 0)
+                return WOLFSSL_FAILURE;
+        }
+
+        /* pad out spacing */
+        for (z = j; z < 17; z++) {
+            if (wolfSSL_BIO_printf(bio, "   ") <= 0)
+                return WOLFSSL_FAILURE;
+        }
+
+        for (z = 0; z < j; z++) {
+            if (wolfSSL_BIO_printf(bio, "%c", asc[z]) <= 0)
+                return WOLFSSL_FAILURE;
+        }
+        if (wolfSSL_BIO_printf(bio, "\n") <= 0)
+            return WOLFSSL_FAILURE;
+
+        tag += 16;
+    }
+    return WOLFSSL_SUCCESS;
+}
+#endif /* HAVE_SESSION_TICKET */
+
+
+/* prints out the session information in human readable form
+ * return WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_SESSION_print(WOLFSSL_BIO *bp, const WOLFSSL_SESSION *x)
+{
+    const unsigned char* pt;
+    unsigned char buf[SECRET_LEN];
+    unsigned int sz = 0, i;
+    int ret;
+    WOLFSSL_SESSION* session = (WOLFSSL_SESSION*)x;
+
+    if (session == NULL) {
+        WOLFSSL_MSG("Bad NULL argument");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wolfSSL_BIO_printf(bp, "%s\n", "SSL-Session:") <= 0)
+        return WOLFSSL_FAILURE;
+
+#if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
+                               defined(HAVE_SESSION_TICKET))
+    if (wolfSSL_BIO_printf(bp, "    Protocol  : %s\n",
+            wolfSSL_SESSION_get_protocol(session)) <= 0)
+        return WOLFSSL_FAILURE;
+#endif
+
+    if (wolfSSL_BIO_printf(bp, "    Cipher    : %s\n",
+            wolfSSL_SESSION_CIPHER_get_name(session)) <= 0)
+        return WOLFSSL_FAILURE;
+
+    pt = wolfSSL_SESSION_get_id(session, &sz);
+    if (wolfSSL_BIO_printf(bp, "    Session-ID: ") <= 0)
+        return WOLFSSL_FAILURE;
+
+    for (i = 0; i < sz; i++) {
+        if (wolfSSL_BIO_printf(bp, "%02X", pt[i]) <= 0)
+            return WOLFSSL_FAILURE;
+    }
+    if (wolfSSL_BIO_printf(bp, "\n") <= 0)
+        return WOLFSSL_FAILURE;
+
+    if (wolfSSL_BIO_printf(bp, "    Session-ID-ctx: \n") <= 0)
+        return WOLFSSL_FAILURE;
+
+    ret = wolfSSL_SESSION_get_master_key(x, buf, sizeof(buf));
+    if (wolfSSL_BIO_printf(bp, "    Master-Key: ") <= 0)
+        return WOLFSSL_FAILURE;
+
+    if (ret > 0) {
+        sz = (unsigned int)ret;
+        for (i = 0; i < sz; i++) {
+            if (wolfSSL_BIO_printf(bp, "%02X", buf[i]) <= 0)
+                return WOLFSSL_FAILURE;
+        }
+    }
+    if (wolfSSL_BIO_printf(bp, "\n") <= 0)
+        return WOLFSSL_FAILURE;
+
+    /* @TODO PSK identity hint and SRP */
+
+    if (wolfSSL_BIO_printf(bp, "    TLS session ticket:") <= 0)
+        return WOLFSSL_FAILURE;
+
+#ifdef HAVE_SESSION_TICKET
+    if (wolfSSL_SESSION_print_ticket(bp, x, "    ") != WOLFSSL_SUCCESS)
+        return WOLFSSL_FAILURE;
+#endif
+
+    if (wolfSSL_BIO_printf(bp, "    Start Time: %ld\n",
+                wolfSSL_SESSION_get_time(x)) <= 0)
+        return WOLFSSL_FAILURE;
+
+    if (wolfSSL_BIO_printf(bp, "    Timeout   : %ld (sec)\n",
+            wolfSSL_SESSION_get_timeout(x)) <= 0)
+        return WOLFSSL_FAILURE;
+
+    /* @TODO verify return code print */
+
+    if (wolfSSL_BIO_printf(bp, "    Extended master secret: %s\n",
+            (wolfSSL_SESSION_haveEMS(session) == 0)? "no" : "yes") <= 0)
+        return WOLFSSL_FAILURE;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* (HAVE_SESSION_TICKET || SESSION_CERTS) && !NO_FILESYSTEM */
+
+#endif /* OPENSSL_ALL || OPENSSL_EXTRA || HAVE_STUNNEL || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
 
 #if defined(OPENSSL_ALL) || (defined(OPENSSL_EXTRA) && defined(HAVE_STUNNEL)) \
     || defined(WOLFSSL_MYSQL_COMPATIBLE) || defined(WOLFSSL_NGINX)
@@ -31997,8 +42040,8 @@
     WOLFSSL_LEAVE("wolfSSL_CTX_get_verify_mode", mode);
     return mode;
 }
-#endif
-
+
+#endif
 #if defined(OPENSSL_EXTRA) && defined(HAVE_CURVE25519)
 /* return 1 if success, 0 if error
  * output keys are little endian format
@@ -32333,6 +42376,338 @@
 
 #endif /* OPENSSL_EXTRA && HAVE_ED25519 */
 
+#if defined(OPENSSL_EXTRA) && defined(HAVE_CURVE448)
+/* return 1 if success, 0 if error
+ * output keys are little endian format
+ */
+int wolfSSL_EC448_generate_key(unsigned char *priv, unsigned int *privSz,
+                               unsigned char *pub, unsigned int *pubSz)
+{
+#ifndef WOLFSSL_KEY_GEN
+    WOLFSSL_MSG("No Key Gen built in");
+    (void) priv;
+    (void) privSz;
+    (void) pub;
+    (void) pubSz;
+    return WOLFSSL_FAILURE;
+#else /* WOLFSSL_KEY_GEN */
+    int ret = WOLFSSL_FAILURE;
+    int initTmpRng = 0;
+    WC_RNG *rng = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG *tmpRNG = NULL;
+#else
+    WC_RNG tmpRNG[1];
+#endif
+
+    WOLFSSL_ENTER("wolfSSL_EC448_generate_key");
+
+    if (priv == NULL || privSz == NULL || *privSz < CURVE448_KEY_SIZE ||
+        pub == NULL || pubSz == NULL || *pubSz < CURVE448_KEY_SIZE) {
+        WOLFSSL_MSG("Bad arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
+    if (tmpRNG == NULL)
+        return WOLFSSL_FAILURE;
+#endif
+    if (wc_InitRng(tmpRNG) == 0) {
+        rng = tmpRNG;
+        initTmpRng = 1;
+    }
+    else {
+        WOLFSSL_MSG("Bad RNG Init, trying global");
+        if (initGlobalRNG == 0)
+            WOLFSSL_MSG("Global RNG no Init");
+        else
+            rng = &globalRNG;
+    }
+
+    if (rng) {
+        curve448_key key;
+
+        if (wc_curve448_init(&key) != MP_OKAY)
+            WOLFSSL_MSG("wc_curve448_init failed");
+        else if (wc_curve448_make_key(rng, CURVE448_KEY_SIZE, &key)!=MP_OKAY)
+            WOLFSSL_MSG("wc_curve448_make_key failed");
+        /* export key pair */
+        else if (wc_curve448_export_key_raw_ex(&key, priv, privSz, pub, pubSz,
+                                               EC448_LITTLE_ENDIAN)
+                 != MP_OKAY)
+            WOLFSSL_MSG("wc_curve448_export_key_raw_ex failed");
+        else
+            ret = WOLFSSL_SUCCESS;
+
+        wc_curve448_free(&key);
+    }
+
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+#endif
+
+    return ret;
+#endif /* WOLFSSL_KEY_GEN */
+}
+
+/* return 1 if success, 0 if error
+ * input and output keys are little endian format
+ */
+int wolfSSL_EC448_shared_key(unsigned char *shared, unsigned int *sharedSz,
+                             const unsigned char *priv, unsigned int privSz,
+                             const unsigned char *pub, unsigned int pubSz)
+{
+#ifndef WOLFSSL_KEY_GEN
+    WOLFSSL_MSG("No Key Gen built in");
+    (void) shared;
+    (void) sharedSz;
+    (void) priv;
+    (void) privSz;
+    (void) pub;
+    (void) pubSz;
+    return WOLFSSL_FAILURE;
+#else /* WOLFSSL_KEY_GEN */
+    int ret = WOLFSSL_FAILURE;
+    curve448_key privkey, pubkey;
+
+    WOLFSSL_ENTER("wolfSSL_EC448_shared_key");
+
+    if (shared == NULL || sharedSz == NULL || *sharedSz < CURVE448_KEY_SIZE ||
+            priv == NULL || privSz < CURVE448_KEY_SIZE ||
+            pub == NULL || pubSz < CURVE448_KEY_SIZE) {
+        WOLFSSL_MSG("Bad arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* import private key */
+    if (wc_curve448_init(&privkey) != MP_OKAY) {
+        WOLFSSL_MSG("wc_curve448_init privkey failed");
+        return ret;
+    }
+    if (wc_curve448_import_private_ex(priv, privSz, &privkey,
+                                      EC448_LITTLE_ENDIAN) != MP_OKAY) {
+        WOLFSSL_MSG("wc_curve448_import_private_ex failed");
+        wc_curve448_free(&privkey);
+        return ret;
+    }
+
+    /* import public key */
+    if (wc_curve448_init(&pubkey) != MP_OKAY) {
+        WOLFSSL_MSG("wc_curve448_init pubkey failed");
+        wc_curve448_free(&privkey);
+        return ret;
+    }
+    if (wc_curve448_import_public_ex(pub, pubSz, &pubkey,
+                                     EC448_LITTLE_ENDIAN) != MP_OKAY) {
+        WOLFSSL_MSG("wc_curve448_import_public_ex failed");
+        wc_curve448_free(&privkey);
+        wc_curve448_free(&pubkey);
+        return ret;
+    }
+
+    if (wc_curve448_shared_secret_ex(&privkey, &pubkey, shared, sharedSz,
+                                     EC448_LITTLE_ENDIAN) != MP_OKAY)
+        WOLFSSL_MSG("wc_curve448_shared_secret_ex failed");
+    else
+        ret = WOLFSSL_SUCCESS;
+
+    wc_curve448_free(&privkey);
+    wc_curve448_free(&pubkey);
+
+    return ret;
+#endif /* WOLFSSL_KEY_GEN */
+}
+#endif /* OPENSSL_EXTRA && HAVE_CURVE448 */
+
+#if defined(OPENSSL_EXTRA) && defined(HAVE_ED448)
+/* return 1 if success, 0 if error
+ * output keys are little endian format
+ */
+int wolfSSL_ED448_generate_key(unsigned char *priv, unsigned int *privSz,
+                               unsigned char *pub, unsigned int *pubSz)
+{
+#ifndef WOLFSSL_KEY_GEN
+    WOLFSSL_MSG("No Key Gen built in");
+    (void) priv;
+    (void) privSz;
+    (void) pub;
+    (void) pubSz;
+    return WOLFSSL_FAILURE;
+#else /* WOLFSSL_KEY_GEN */
+    int ret = WOLFSSL_FAILURE;
+    int initTmpRng = 0;
+    WC_RNG *rng = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG *tmpRNG = NULL;
+#else
+    WC_RNG tmpRNG[1];
+#endif
+
+    WOLFSSL_ENTER("wolfSSL_ED448_generate_key");
+
+    if (priv == NULL || privSz == NULL || *privSz < ED448_PRV_KEY_SIZE ||
+            pub == NULL || pubSz == NULL || *pubSz < ED448_PUB_KEY_SIZE) {
+        WOLFSSL_MSG("Bad arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
+    if (tmpRNG == NULL)
+        return WOLFSSL_FATAL_ERROR;
+#endif
+    if (wc_InitRng(tmpRNG) == 0) {
+        rng = tmpRNG;
+        initTmpRng = 1;
+    }
+    else {
+        WOLFSSL_MSG("Bad RNG Init, trying global");
+        if (initGlobalRNG == 0)
+            WOLFSSL_MSG("Global RNG no Init");
+        else
+            rng = &globalRNG;
+    }
+
+    if (rng) {
+        ed448_key key;
+
+        if (wc_ed448_init(&key) != MP_OKAY)
+            WOLFSSL_MSG("wc_ed448_init failed");
+        else if (wc_ed448_make_key(rng, ED448_KEY_SIZE, &key) != MP_OKAY)
+            WOLFSSL_MSG("wc_ed448_make_key failed");
+        /* export private key */
+        else if (wc_ed448_export_key(&key, priv, privSz, pub, pubSz) != MP_OKAY)
+            WOLFSSL_MSG("wc_ed448_export_key failed");
+        else
+            ret = WOLFSSL_SUCCESS;
+
+        wc_ed448_free(&key);
+    }
+
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+#endif
+
+    return ret;
+#endif /* WOLFSSL_KEY_GEN */
+}
+
+/* return 1 if success, 0 if error
+ * input and output keys are little endian format
+ * priv is a buffer containing private and public part of key
+ */
+int wolfSSL_ED448_sign(const unsigned char *msg, unsigned int msgSz,
+                       const unsigned char *priv, unsigned int privSz,
+                       unsigned char *sig, unsigned int *sigSz)
+{
+#ifndef WOLFSSL_KEY_GEN
+    WOLFSSL_MSG("No Key Gen built in");
+    (void) msg;
+    (void) msgSz;
+    (void) priv;
+    (void) privSz;
+    (void) sig;
+    (void) sigSz;
+    return WOLFSSL_FAILURE;
+#else /* WOLFSSL_KEY_GEN */
+    ed448_key key;
+    int ret = WOLFSSL_FAILURE;
+
+    WOLFSSL_ENTER("wolfSSL_ED448_sign");
+
+    if (priv == NULL || privSz != ED448_PRV_KEY_SIZE || msg == NULL ||
+            sig == NULL || *sigSz < ED448_SIG_SIZE) {
+        WOLFSSL_MSG("Bad arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* import key */
+    if (wc_ed448_init(&key) != MP_OKAY) {
+        WOLFSSL_MSG("wc_curve448_init failed");
+        return ret;
+    }
+    if (wc_ed448_import_private_key(priv, privSz/2, priv+(privSz/2),
+                                    ED448_PUB_KEY_SIZE, &key) != MP_OKAY){
+        WOLFSSL_MSG("wc_ed448_import_private failed");
+        wc_ed448_free(&key);
+        return ret;
+    }
+
+    if (wc_ed448_sign_msg(msg, msgSz, sig, sigSz, &key) != MP_OKAY)
+        WOLFSSL_MSG("wc_curve448_shared_secret_ex failed");
+    else
+        ret = WOLFSSL_SUCCESS;
+
+    wc_ed448_free(&key);
+
+    return ret;
+#endif /* WOLFSSL_KEY_GEN */
+}
+
+/* return 1 if success, 0 if error
+ * input and output keys are little endian format
+ * pub is a buffer containing public part of key
+ */
+int wolfSSL_ED448_verify(const unsigned char *msg, unsigned int msgSz,
+                         const unsigned char *pub, unsigned int pubSz,
+                         const unsigned char *sig, unsigned int sigSz)
+{
+#ifndef WOLFSSL_KEY_GEN
+    WOLFSSL_MSG("No Key Gen built in");
+    (void) msg;
+    (void) msgSz;
+    (void) pub;
+    (void) pubSz;
+    (void) sig;
+    (void) sigSz;
+    return WOLFSSL_FAILURE;
+#else /* WOLFSSL_KEY_GEN */
+    ed448_key key;
+    int ret = WOLFSSL_FAILURE, check = 0;
+
+    WOLFSSL_ENTER("wolfSSL_ED448_verify");
+
+    if (pub == NULL || pubSz != ED448_PUB_KEY_SIZE || msg == NULL ||
+            sig == NULL || sigSz != ED448_SIG_SIZE) {
+        WOLFSSL_MSG("Bad arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* import key */
+    if (wc_ed448_init(&key) != MP_OKAY) {
+        WOLFSSL_MSG("wc_curve448_init failed");
+        return ret;
+    }
+    if (wc_ed448_import_public(pub, pubSz, &key) != MP_OKAY){
+        WOLFSSL_MSG("wc_ed448_import_public failed");
+        wc_ed448_free(&key);
+        return ret;
+    }
+
+    if ((ret = wc_ed448_verify_msg((byte*)sig, sigSz, msg, msgSz, &check,
+                                   &key)) != MP_OKAY) {
+        WOLFSSL_MSG("wc_ed448_verify_msg failed");
+    }
+    else if (!check)
+        WOLFSSL_MSG("wc_ed448_verify_msg failed (signature invalid)");
+    else
+        ret = WOLFSSL_SUCCESS;
+
+    wc_ed448_free(&key);
+
+    return ret;
+#endif /* WOLFSSL_KEY_GEN */
+}
+
+#endif /* OPENSSL_EXTRA && HAVE_ED448 */
+
 #ifdef WOLFSSL_JNI
 
 int wolfSSL_set_jobject(WOLFSSL* ssl, void* objPtr)
@@ -32379,14 +42754,13 @@
     }
 
     ret = wolfAsync_EventQueuePoll(&ssl->ctx->event_queue, ssl,
-        events, sizeof(events)/sizeof(events), flags, &eventCount);
+        events, sizeof(events)/sizeof(*events), flags, &eventCount);
     if (ret == 0) {
         ret = eventCount;
     }
 
     return ret;
 }
-
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef OPENSSL_EXTRA
@@ -32407,7 +42781,8 @@
     }
 
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || \
-    defined(WOLFSSL_HAPROXY) || defined(WOLFSSL_MYSQL_COMPATIBLE)
+    defined(WOLFSSL_OPENSSH) || defined(WOLFSSL_HAPROXY) || \
+    defined(WOLFSSL_MYSQL_COMPATIBLE)
     {
         int ret = 0;
 
@@ -32416,9 +42791,12 @@
                 WOLFSSL_MSG("Issue peeking at error node in queue");
                 return 0;
             }
-            ret = -ret;
-
-            if (ret == ASN_NO_PEM_HEADER)
+            /* OpenSSL uses positive error codes */
+            if (ret < 0) {
+                ret = -ret;
+            }
+
+            if (ret == -ASN_NO_PEM_HEADER)
                 return (ERR_LIB_PEM << 24) | PEM_R_NO_START_LINE;
             if (ret != WANT_READ && ret != WANT_WRITE &&
                     ret != ZERO_RETURN && ret != WOLFSSL_ERROR_ZERO_RETURN &&
@@ -32438,14 +42816,54 @@
 
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
 
-#ifndef NO_WOLFSSL_STUB
+/* returns a pointer to internal cipher suite list. Should not be free'd by
+ * caller.
+ */
 WOLF_STACK_OF(WOLFSSL_CIPHER) *wolfSSL_get_ciphers_compat(const WOLFSSL *ssl)
 {
-    (void)ssl;
-    WOLFSSL_STUB("wolfSSL_get_ciphers_compat");
-    return NULL;
-}
-#endif
+    WOLF_STACK_OF(WOLFSSL_CIPHER)* ret = NULL;
+    Suites* suites;
+
+    WOLFSSL_ENTER("wolfSSL_get_ciphers_compat");
+    if (ssl == NULL || (ssl->suites == NULL && ssl->ctx->suites == NULL)) {
+        return NULL;
+    }
+
+    if (ssl->suites != NULL) {
+        suites = ssl->suites;
+    }
+    else {
+        suites = ssl->ctx->suites;
+    }
+
+    /* check if stack needs populated */
+    if (suites->stack == NULL) {
+        int i;
+        for (i = 0; i < suites->suiteSz; i+=2) {
+            WOLFSSL_STACK* add = wolfSSL_sk_new_node(ssl->heap);
+            if (add != NULL) {
+                add->type = STACK_TYPE_CIPHER;
+                add->data.cipher.cipherSuite0 = suites->suites[i];
+                add->data.cipher.cipherSuite  = suites->suites[i+1];
+                #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+                /* in_stack is checked in wolfSSL_CIPHER_description */
+                add->data.cipher.in_stack = 1;
+                #endif
+
+                add->next = ret;
+                if (ret != NULL) {
+                    add->num = ret->num + 1;
+                }
+                else {
+                    add->num = 1;
+                }
+                ret = add;
+            }
+        }
+        suites->stack = ret;
+    }
+    return suites->stack;
+}
 
 #ifndef NO_WOLFSSL_STUB
 void wolfSSL_OPENSSL_config(char *config_name)
@@ -32453,8 +42871,8 @@
     (void)config_name;
     WOLFSSL_STUB("OPENSSL_config");
 }
-#endif
-#endif
+#endif /* !NO_WOLFSSL_STUB */
+#endif /* OPENSSL_ALL || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
 
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) \
     || defined(OPENSSL_EXTRA) || defined(HAVE_LIGHTY)
@@ -32472,12 +42890,44 @@
     return x509_idx++;
 }
 
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
+void* wolfSSL_CRYPTO_get_ex_data(const WOLFSSL_CRYPTO_EX_DATA* ex_data, int idx)
+{
+    WOLFSSL_ENTER("wolfSSL_CTX_get_ex_data");
+#ifdef MAX_EX_DATA
+    if(ex_data && idx < MAX_EX_DATA && idx >= 0) {
+        return ex_data->ex_data[idx];
+    }
+#else
+    (void)ex_data;
+    (void)idx;
+#endif
+    return NULL;
+}
+
+int wolfSSL_CRYPTO_set_ex_data(WOLFSSL_CRYPTO_EX_DATA* ex_data, int idx, void *data)
+{
+    WOLFSSL_ENTER("wolfSSL_CRYPTO_set_ex_data");
+#ifdef MAX_EX_DATA
+    if (ex_data && idx < MAX_EX_DATA && idx >= 0) {
+        ex_data->ex_data[idx] = data;
+        return WOLFSSL_SUCCESS;
+    }
+#else
+    (void)ex_data;
+    (void)idx;
+    (void)data;
+#endif
+    return WOLFSSL_FAILURE;
+}
+#endif /* defined(HAVE_EX_DATA) || defined(FORTRESS) */
+
 void *wolfSSL_X509_get_ex_data(X509 *x509, int idx)
 {
     WOLFSSL_ENTER("wolfSSL_X509_get_ex_data");
     #ifdef HAVE_EX_DATA
-    if (x509 != NULL && idx < MAX_EX_DATA && idx >= 0) {
-        return x509->ex_data[idx];
+    if (x509 != NULL) {
+        return wolfSSL_CRYPTO_get_ex_data(&x509->ex_data, idx);
     }
     #else
     (void)x509;
@@ -32485,14 +42935,14 @@
     #endif
     return NULL;
 }
+
 int wolfSSL_X509_set_ex_data(X509 *x509, int idx, void *data)
 {
     WOLFSSL_ENTER("wolfSSL_X509_set_ex_data");
     #ifdef HAVE_EX_DATA
-    if (x509 != NULL && idx < MAX_EX_DATA)
-    {
-        x509->ex_data[idx] = data;
-        return WOLFSSL_SUCCESS;
+    if (x509 != NULL)
+    {
+        return wolfSSL_CRYPTO_set_ex_data(&x509->ex_data, idx, data);
     }
     #else
     (void)x509;
@@ -32501,6 +42951,7 @@
     #endif
     return WOLFSSL_FAILURE;
 }
+
 int wolfSSL_X509_NAME_digest(const WOLFSSL_X509_NAME *name,
         const WOLFSSL_EVP_MD *type, unsigned char *md, unsigned int *len)
 {
@@ -32509,7 +42960,7 @@
     if (name == NULL || type == NULL)
         return WOLFSSL_FAILURE;
 
-#ifndef NO_FILESYSTEM
+#if !defined(NO_FILESYSTEM) && !defined(NO_PWDBASED)
     return wolfSSL_EVP_Digest((unsigned char*)name->fullName.fullName,
                               name->fullName.fullNameLen, md, len, type, NULL);
 #else
@@ -32529,6 +42980,17 @@
     return ctx->timeout;
 }
 
+
+/* returns the time in seconds of the current timeout */
+long wolfSSL_get_timeout(WOLFSSL* ssl)
+{
+    WOLFSSL_ENTER("wolfSSL_get_timeout");
+
+    if (ssl == NULL)
+        return 0;
+    return ssl->timeout;
+}
+
 #ifdef HAVE_ECC
 int wolfSSL_SSL_CTX_set_tmp_ecdh(WOLFSSL_CTX *ctx, WOLFSSL_EC_KEY *ecdh)
 {
@@ -32570,11 +43032,13 @@
 BIO *wolfSSL_SSL_get_rbio(const WOLFSSL *s)
 {
     WOLFSSL_ENTER("wolfSSL_SSL_get_rbio");
-    (void)s;
     /* Nginx sets the buffer size if the read BIO is different to write BIO.
      * The setting buffer size doesn't do anything so return NULL for both.
      */
-    return NULL;
+    if (s == NULL)
+        return NULL;
+
+    return s->biord;
 }
 BIO *wolfSSL_SSL_get_wbio(const WOLFSSL *s)
 {
@@ -32583,7 +43047,10 @@
     /* Nginx sets the buffer size if the read BIO is different to write BIO.
      * The setting buffer size doesn't do anything so return NULL for both.
      */
-    return NULL;
+    if (s == NULL)
+        return NULL;
+
+    return s->biowr;
 }
 
 int wolfSSL_SSL_do_handshake(WOLFSSL *s)
@@ -32610,16 +43077,33 @@
 #endif
 }
 
-int wolfSSL_SSL_in_init(WOLFSSL *s)
-{
-    WOLFSSL_ENTER("wolfSSL_SSL_in_init");
-
-    if (s == NULL)
-        return WOLFSSL_FAILURE;
-
-    if (s->options.side == WOLFSSL_CLIENT_END)
-        return s->options.connectState < SECOND_REPLY_DONE;
-    return s->options.acceptState < ACCEPT_THIRD_REPLY_DONE;
+int wolfSSL_SSL_in_init(WOLFSSL *ssl)
+{
+    WOLFSSL_ENTER("SSL_in_init");
+
+    if (ssl == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        return ssl->options.connectState < SECOND_REPLY_DONE;
+    }
+    return ssl->options.acceptState < ACCEPT_THIRD_REPLY_DONE;
+}
+
+int wolfSSL_SSL_in_connect_init(WOLFSSL* ssl)
+{
+    WOLFSSL_ENTER("SSL_connect_init");
+
+    if (ssl == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+        return ssl->options.connectState > CONNECT_BEGIN &&
+            ssl->options.connectState < SECOND_REPLY_DONE;
+    }
+
+    return ssl->options.acceptState > ACCEPT_BEGIN &&
+        ssl->options.acceptState < ACCEPT_THIRD_REPLY_DONE;
 }
 
 #ifndef NO_SESSION_CACHE
@@ -32664,8 +43148,10 @@
 
     InitDecodedCert(&dCert, x->derCert->buffer, x->derCert->length, NULL);
     ret = ParseCertRelative(&dCert, CERT_TYPE, 0, NULL);
-    if (ret != 0)
-        return WOLFSSL_FAILURE;
+    if (ret != 0) {
+        FreeDecodedCert(&dCert);
+        return WOLFSSL_FAILURE;
+    }
 
     ret = CheckHostName(&dCert, (char *)chk, chklen);
     FreeDecodedCert(&dCert);
@@ -32689,7 +43175,7 @@
 
     /* Skip ASN.1 INTEGER (type) byte. */
     i = 1;
-    /* When indefinte length, can't determine length with data available. */
+    /* When indefinite length, can't determine length with data available. */
     if (a->data[i] == 0x80)
         return 0;
     /* One length byte if less than 0x80. */
@@ -32960,8 +43446,8 @@
     if (ctx == NULL || ctx->cm == NULL)
         return WOLFSSL_FAILURE;
 
-#if defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
- || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
+#if !defined(NO_WOLFSSL_SERVER) && (defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
+                               ||  defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2))
     /* Ensure stapling is on for callback to be used. */
     wolfSSL_CTX_EnableOCSPStapling(ctx);
 
@@ -33026,24 +43512,20 @@
     if (ca == NULL)
         return WOLFSSL_FAILURE;
 
+#ifdef WOLFSSL_SIGNER_DER_CERT
+    /* populate issuer with Signer DER */
+    *issuer = wolfSSL_X509_d2i(issuer, ca->derCert->buffer,
+                               ca->derCert->length);
+    if (*issuer == NULL)
+        return WOLFSSL_FAILURE;
+#else
+    /* Create an empty certificate as CA doesn't have a certificate. */
     *issuer = (WOLFSSL_X509 *)XMALLOC(sizeof(WOLFSSL_X509), 0,
         DYNAMIC_TYPE_OPENSSL);
     if (*issuer == NULL)
         return WOLFSSL_FAILURE;
 
-    /* Create an empty certificate as CA doesn't have a certificate. */
-    XMEMSET(*issuer, 0, sizeof(WOLFSSL_X509));
-    (*issuer)->dynamicMemory = 1;
-#ifdef WOLFSSL_SIGNER_DER_CERT
-    if (AllocDer(&(*issuer)->derCert, ca->derCert->length, ca->derCert->type,
-                                                                   NULL) == 0) {
-        XMEMCPY((*issuer)->derCert->buffer, ca->derCert->buffer,
-                                                           ca->derCert->length);
-    }
-    else {
-        XFREE(*issuer, 0, DYNAMIC_TYPE_OPENSSL);
-        return WOLFSSL_FAILURE;
-    }
+    InitX509((*issuer), 1, NULL);
 #endif
 
     /* Result is ignored when passed to wolfSSL_OCSP_cert_to_id(). */
@@ -33227,14 +43709,14 @@
 #endif /* WOLFSSL_NGINX  / WOLFSSL_HAPROXY */
 
 #if defined(OPENSSL_EXTRA) && defined(HAVE_ECC)
-WOLFSSL_API int wolfSSL_CTX_set1_curves_list(WOLFSSL_CTX* ctx, char* names)
+int wolfSSL_CTX_set1_curves_list(WOLFSSL_CTX* ctx, const char* names)
 {
     int idx, start = 0, len;
-    int curve;
+    word16 curve;
     char name[MAX_CURVE_NAME_SZ];
 
     /* Disable all curves so that only the ones the user wants are enabled. */
-    ctx->disabledCurves = (word32)-1;
+    ctx->disabledCurves = 0xFFFFFFFFUL;
     for (idx = 1; names[idx-1] != '\0'; idx++) {
         if (names[idx] != ':' && names[idx] != '\0')
             continue;
@@ -33259,19 +43741,68 @@
                                           (XSTRNCMP(name, "P-521", len) == 0)) {
             curve = WOLFSSL_ECC_SECP521R1;
         }
-        else if (XSTRNCMP(name, "X25519", len) == 0)
+        else if (XSTRNCMP(name, "X25519", len) == 0) {
             curve = WOLFSSL_ECC_X25519;
-        else if ((curve = wc_ecc_get_curve_id_from_name(name)) < 0)
-            return WOLFSSL_FAILURE;
+        }
+        else if (XSTRNCMP(name, "X448", len) == 0) {
+            curve = WOLFSSL_ECC_X448;
+        }
+        else {
+        #if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+            int   ret;
+            const ecc_set_type *eccSet;
+
+            ret = wc_ecc_get_curve_idx_from_name(name);
+            if (ret < 0) {
+                WOLFSSL_MSG("Could not find name in set");
+                return WOLFSSL_FAILURE;
+            }
+
+            eccSet = wc_ecc_get_curve_params(ret);
+            if (eccSet == NULL) {
+                WOLFSSL_MSG("NULL set returned");
+                return WOLFSSL_FAILURE;
+            }
+
+            curve = GetCurveByOID(eccSet->oidSum);
+        #else
+            WOLFSSL_MSG("API not present to search farther using name");
+            return WOLFSSL_FAILURE;
+        #endif
+        }
+
+        if (curve > (sizeof(word32) * WOLFSSL_BIT_SIZE)) {
+            /* shift left more than size of ctx->disabledCurves causes static
+             * analysis report */
+            WOLFSSL_MSG("curve value is too large for upcoming shift");
+            return WOLFSSL_FAILURE;
+        }
+
+    #if defined(HAVE_SUPPORTED_CURVES) && !defined(NO_WOLFSSL_CLIENT)
+        /* set the supported curve so client TLS extension contains only the
+         * desired curves */
+        if (wolfSSL_CTX_UseSupportedCurve(ctx, curve) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("Unable to set supported curve");
+            return WOLFSSL_FAILURE;
+        }
+    #endif
 
         /* Switch the bit to off and therefore is enabled. */
-        ctx->disabledCurves &= ~(1 << curve);
+        ctx->disabledCurves &= ~(1U << curve);
         start = idx + 1;
     }
 
     return WOLFSSL_SUCCESS;
 }
-#endif
+
+int wolfSSL_set1_curves_list(WOLFSSL* ssl, const char* names)
+{
+    if (ssl == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+    return wolfSSL_CTX_set1_curves_list(ssl->ctx, names);
+}
+#endif /* OPENSSL_EXTRA && HAVE_ECC */
 
 #ifdef OPENSSL_EXTRA
 #ifndef NO_WOLFSSL_STUB
@@ -33320,6 +43851,9 @@
 int wolfSSL_set_msg_callback_arg(WOLFSSL *ssl, void* arg)
 {
     WOLFSSL_ENTER("wolfSSL_set_msg_callback_arg");
+    if (ssl == NULL)
+        return WOLFSSL_FAILURE;
+
     ssl->protoMsgCtx = arg;
     return WOLFSSL_SUCCESS;
 }
@@ -33358,11 +43892,2056 @@
     return SSL_SUCCESS;
 }
 
+
+#ifdef HAVE_ALPN
+/* Sets the ALPN extension protos
+ *
+ * example format is
+ * unsigned char p[] = {
+ *      8, 'h', 't', 't', 'p', '/', '1', '.', '1'
+ * };
+ *
+ * returns WOLFSSL_SUCCESS on success */
+int wolfSSL_set_alpn_protos(WOLFSSL* ssl,
+        const unsigned char* p, unsigned int p_len)
+{
+    WOLFSSL_BIO* bio;
+    char* pt;
+
+    unsigned int sz;
+    unsigned int idx = 0;
+    int alpn_opt = WOLFSSL_ALPN_CONTINUE_ON_MISMATCH;
+    WOLFSSL_ENTER("wolfSSL_set_alpn_protos");
+
+    if (ssl == NULL || p_len <= 1) {
+        return WOLFSSL_FAILURE;
+    }
+
+    bio = wolfSSL_BIO_new(wolfSSL_BIO_s_mem());
+    if (bio == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    /* convert into comma separated list */
+    while (idx < p_len - 1) {
+        unsigned int i;
+
+        sz = p[idx++];
+        if (idx + sz > p_len) {
+            WOLFSSL_MSG("Bad list format");
+            wolfSSL_BIO_free(bio);
+            return WOLFSSL_FAILURE;
+        }
+        if (sz > 0) {
+            for (i = 0; i < sz; i++) {
+                wolfSSL_BIO_write(bio, &p[idx++], 1);
+            }
+            if (idx < p_len - 1)
+                wolfSSL_BIO_write(bio, ",", 1);
+        }
+    }
+    wolfSSL_BIO_write(bio, "\0", 1);
+
+    /* clears out all current ALPN extensions set */
+    TLSX_Remove(&ssl->extensions, TLSX_APPLICATION_LAYER_PROTOCOL, ssl->heap);
+
+    if ((sz = wolfSSL_BIO_get_mem_data(bio, &pt)) > 0) {
+        wolfSSL_UseALPN(ssl, pt, sz, alpn_opt);
+    }
+    wolfSSL_BIO_free(bio);
+    return WOLFSSL_SUCCESS;
+}
+#endif /* HAVE_ALPN */
 #endif
 
 #endif /* WOLFCRYPT_ONLY */
 
 #if defined(OPENSSL_EXTRA)
+
+#define WOLFSSL_BIO_INCLUDED
+#include "src/bio.c"
+
+int oid2nid(word32 oid, int grp)
+{
+    /* get OID type */
+    switch (grp) {
+        /* oidHashType */
+        case oidHashType:
+            switch (oid) {
+            #ifdef WOLFSSL_MD2
+                case MD2h:
+                    return NID_md2;
+            #endif
+            #ifndef NO_MD5
+                case MD5h:
+                    return NID_md5;
+            #endif
+            #ifndef NO_SHA
+                case SHAh:
+                    return NID_sha1;
+            #endif
+                case SHA224h:
+                    return NID_sha224;
+            #ifndef NO_SHA256
+                case SHA256h:
+                    return NID_sha256;
+            #endif
+            #ifdef WOLFSSL_SHA384
+                case SHA384h:
+                    return NID_sha384;
+            #endif
+            #ifdef WOLFSSL_SHA512
+                case SHA512h:
+                    return NID_sha512;
+            #endif
+            }
+            break;
+
+        /*  oidSigType */
+        case oidSigType:
+            switch (oid) {
+            #ifndef NO_DSA
+                case CTC_SHAwDSA:
+                    return CTC_SHAwDSA;
+            #endif /* NO_DSA */
+            #ifndef NO_RSA
+                case CTC_MD2wRSA:
+                    return CTC_MD2wRSA;
+                case CTC_MD5wRSA:
+                    return CTC_MD5wRSA;
+                case CTC_SHAwRSA:
+                    return CTC_SHAwRSA;
+                case CTC_SHA224wRSA:
+                    return CTC_SHA224wRSA;
+                case CTC_SHA256wRSA:
+                    return CTC_SHA256wRSA;
+                case CTC_SHA384wRSA:
+                    return CTC_SHA384wRSA;
+                case CTC_SHA512wRSA:
+                    return CTC_SHA512wRSA;
+            #endif /* NO_RSA */
+            #ifdef HAVE_ECC
+                case CTC_SHAwECDSA:
+                    return CTC_SHAwECDSA;
+                case CTC_SHA224wECDSA:
+                    return CTC_SHA224wECDSA;
+                case CTC_SHA256wECDSA:
+                    return CTC_SHA256wECDSA;
+                case CTC_SHA384wECDSA:
+                    return CTC_SHA384wECDSA;
+                case CTC_SHA512wECDSA:
+                    return CTC_SHA512wECDSA;
+            #endif /* HAVE_ECC */
+            }
+            break;
+
+        /* oidKeyType */
+        case oidKeyType:
+            switch (oid) {
+            #ifndef NO_DSA
+                case DSAk:
+                    return DSAk;
+            #endif /* NO_DSA */
+            #ifndef NO_RSA
+                case RSAk:
+                    return RSAk;
+            #endif /* NO_RSA */
+            #ifdef HAVE_NTRU
+                case NTRUk:
+                    return NTRUk;
+            #endif /* HAVE_NTRU */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                    return ECDSAk;
+            #endif /* HAVE_ECC */
+            }
+            break;
+
+
+    #ifdef HAVE_ECC
+        case oidCurveType:
+            switch (oid) {
+            case ECC_SECP192R1_OID:
+                return NID_X9_62_prime192v1;
+            case ECC_PRIME192V2_OID:
+                return NID_X9_62_prime192v2;
+            case ECC_PRIME192V3_OID:
+                return NID_X9_62_prime192v3;
+            case ECC_PRIME239V1_OID:
+                return NID_X9_62_prime239v1;
+            case ECC_PRIME239V2_OID:
+                return NID_X9_62_prime239v2;
+            case ECC_PRIME239V3_OID:
+                return NID_X9_62_prime239v3;
+            case ECC_SECP256R1_OID:
+                return NID_X9_62_prime256v1;
+            case ECC_SECP112R1_OID:
+                return NID_secp112r1;
+            case ECC_SECP112R2_OID:
+                return NID_secp112r2;
+            case ECC_SECP128R1_OID:
+                return NID_secp128r1;
+            case ECC_SECP128R2_OID:
+                return NID_secp128r2;
+            case ECC_SECP160R1_OID:
+                return NID_secp160r1;
+            case ECC_SECP160R2_OID:
+                return NID_secp160r2;
+            case ECC_SECP224R1_OID:
+                return NID_secp224r1;
+            case ECC_SECP384R1_OID:
+                return NID_secp384r1;
+            case ECC_SECP521R1_OID:
+                return NID_secp521r1;
+            case ECC_SECP160K1_OID:
+                return NID_secp160k1;
+            case ECC_SECP192K1_OID:
+                return NID_secp192k1;
+            case ECC_SECP224K1_OID:
+                return NID_secp224k1;
+            case ECC_SECP256K1_OID:
+                return NID_secp256k1;
+            case ECC_BRAINPOOLP160R1_OID:
+                return NID_brainpoolP160r1;
+            case ECC_BRAINPOOLP192R1_OID:
+                return NID_brainpoolP192r1;
+            case ECC_BRAINPOOLP224R1_OID:
+                return NID_brainpoolP224r1;
+            case ECC_BRAINPOOLP256R1_OID:
+                return NID_brainpoolP256r1;
+            case ECC_BRAINPOOLP320R1_OID:
+                return NID_brainpoolP320r1;
+            case ECC_BRAINPOOLP384R1_OID:
+                return NID_brainpoolP384r1;
+            case ECC_BRAINPOOLP512R1_OID:
+                return NID_brainpoolP512r1;
+            }
+            break;
+    #endif /* HAVE_ECC */
+
+        /* oidBlkType */
+        case oidBlkType:
+            switch (oid) {
+            #ifdef WOLFSSL_AES_128
+                case AES128CBCb:
+                    return AES128CBCb;
+            #endif
+            #ifdef WOLFSSL_AES_192
+                case AES192CBCb:
+                    return AES192CBCb;
+            #endif
+            #ifdef WOLFSSL_AES_256
+                case AES256CBCb:
+                    return AES256CBCb;
+            #endif
+            #ifndef NO_DES3
+                case DESb:
+                    return NID_des;
+                case DES3b:
+                    return NID_des3;
+            #endif
+            }
+            break;
+
+    #ifdef HAVE_OCSP
+        case oidOcspType:
+            switch (oid) {
+                case OCSP_BASIC_OID:
+                    return NID_id_pkix_OCSP_basic;
+                case OCSP_NONCE_OID:
+                    return OCSP_NONCE_OID;
+            }
+            break;
+    #endif /* HAVE_OCSP */
+
+        /* oidCertExtType */
+        case oidCertExtType:
+            switch (oid) {
+                case BASIC_CA_OID:
+                    return BASIC_CA_OID;
+                case ALT_NAMES_OID:
+                    return ALT_NAMES_OID;
+                case CRL_DIST_OID:
+                    return CRL_DIST_OID;
+                case AUTH_INFO_OID:
+                    return AUTH_INFO_OID;
+                case AUTH_KEY_OID:
+                    return AUTH_KEY_OID;
+                case SUBJ_KEY_OID:
+                    return SUBJ_KEY_OID;
+                case INHIBIT_ANY_OID:
+                    return INHIBIT_ANY_OID;
+                case KEY_USAGE_OID:
+                    return NID_key_usage;
+                case NAME_CONS_OID:
+                    return NID_name_constraints;
+                case CERT_POLICY_OID:
+                    return NID_certificate_policies;
+            }
+            break;
+
+        /* oidCertAuthInfoType */
+        case oidCertAuthInfoType:
+            switch (oid) {
+                case AIA_OCSP_OID:
+                    return AIA_OCSP_OID;
+                case AIA_CA_ISSUER_OID:
+                    return AIA_CA_ISSUER_OID;
+            }
+            break;
+
+        /* oidCertPolicyType */
+        case oidCertPolicyType:
+            switch (oid) {
+                case CP_ANY_OID:
+                    return NID_any_policy;
+            }
+            break;
+
+        /* oidCertAltNameType */
+        case oidCertAltNameType:
+            switch (oid) {
+                case HW_NAME_OID:
+                    return NID_hw_name_oid;
+            }
+            break;
+
+        /* oidCertKeyUseType */
+        case oidCertKeyUseType:
+            switch (oid) {
+                case EKU_ANY_OID:
+                    return NID_anyExtendedKeyUsage;
+                case EKU_SERVER_AUTH_OID:
+                    return EKU_SERVER_AUTH_OID;
+                case EKU_CLIENT_AUTH_OID:
+                    return EKU_CLIENT_AUTH_OID;
+                case EKU_OCSP_SIGN_OID:
+                    return EKU_OCSP_SIGN_OID;
+            }
+            break;
+
+        /* oidKdfType */
+        case oidKdfType:
+            switch (oid) {
+                case PBKDF2_OID:
+                    return PBKDF2_OID;
+            }
+            break;
+
+        /* oidPBEType */
+        case oidPBEType:
+            switch (oid) {
+                case PBE_SHA1_RC4_128:
+                    return PBE_SHA1_RC4_128;
+                case PBE_SHA1_DES:
+                    return PBE_SHA1_DES;
+                case PBE_SHA1_DES3:
+                    return PBE_SHA1_DES3;
+            }
+            break;
+
+        /* oidKeyWrapType */
+        case oidKeyWrapType:
+            switch (oid) {
+            #ifdef WOLFSSL_AES_128
+                case AES128_WRAP:
+                    return AES128_WRAP;
+            #endif
+            #ifdef WOLFSSL_AES_192
+                case AES192_WRAP:
+                    return AES192_WRAP;
+            #endif
+            #ifdef WOLFSSL_AES_256
+                case AES256_WRAP:
+                    return AES256_WRAP;
+            #endif
+            }
+            break;
+
+        /* oidCmsKeyAgreeType */
+        case oidCmsKeyAgreeType:
+            switch (oid) {
+                #ifndef NO_SHA
+                case dhSinglePass_stdDH_sha1kdf_scheme:
+                    return dhSinglePass_stdDH_sha1kdf_scheme;
+                #endif
+                #ifdef WOLFSSL_SHA224
+                case dhSinglePass_stdDH_sha224kdf_scheme:
+                    return dhSinglePass_stdDH_sha224kdf_scheme;
+                #endif
+                #ifndef NO_SHA256
+                case dhSinglePass_stdDH_sha256kdf_scheme:
+                    return dhSinglePass_stdDH_sha256kdf_scheme;
+                #endif
+                #ifdef WOLFSSL_SHA384
+                case dhSinglePass_stdDH_sha384kdf_scheme:
+                    return dhSinglePass_stdDH_sha384kdf_scheme;
+                #endif
+                #ifdef WOLFSSL_SHA512
+                case dhSinglePass_stdDH_sha512kdf_scheme:
+                    return dhSinglePass_stdDH_sha512kdf_scheme;
+                #endif
+            }
+            break;
+
+        default:
+            WOLFSSL_MSG("NID not in table");
+            return -1;
+    }
+
+    return -1;
+}
+
+
+/* when calling SetIndividualInternal, mpi should be cleared by caller if no
+ * longer used. ie mp_free(mpi). This is to free data when fastmath is
+ * disabled since a copy of mpi is made by this function and placed into bn.
+ */
+int SetIndividualInternal(WOLFSSL_BIGNUM* bn, mp_int* mpi)
+{
+    WOLFSSL_MSG("Entering SetIndividualInternal");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (mpi == NULL || (mp_init(mpi) != MP_OKAY)) {
+        WOLFSSL_MSG("mpi NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (mp_copy((mp_int*)bn->internal, mpi) != MP_OKAY) {
+        WOLFSSL_MSG("mp_copy error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+#ifndef NO_ASN
+WOLFSSL_BIGNUM *wolfSSL_ASN1_INTEGER_to_BN(const WOLFSSL_ASN1_INTEGER *ai,
+                                       WOLFSSL_BIGNUM *bn)
+{
+    mp_int mpi;
+    word32 idx = 0;
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_ASN1_INTEGER_to_BN");
+
+    if (ai == NULL) {
+        return NULL;
+    }
+
+    ret = GetInt(&mpi, ai->data, &idx, ai->dataMax);
+    if (ret != 0) {
+    #ifdef WOLFSSL_QT
+        mp_init(&mpi); /* must init mpi */
+        /* Serial number in QT starts at index 0 of data */
+        if (mp_read_unsigned_bin(&mpi, (byte*)ai->data, ai->length) != 0) {
+                mp_clear(&mpi);
+                return NULL;
+            }
+    #else
+        /* expecting ASN1 format for INTEGER */
+        WOLFSSL_LEAVE("wolfSSL_ASN1_INTEGER_to_BN", ret);
+        return NULL;
+    #endif
+    }
+
+    /* mp_clear needs called because mpi is copied and causes memory leak with
+     * --disable-fastmath */
+    ret = SetIndividualExternal(&bn, &mpi);
+    mp_clear(&mpi);
+
+    if (ret != WOLFSSL_SUCCESS) {
+        return NULL;
+    }
+    return bn;
+}
+#endif /* !NO_ASN */
+
+#if !defined(NO_DSA) && !defined(NO_DH)
+WOLFSSL_DH *wolfSSL_DSA_dup_DH(const WOLFSSL_DSA *dsa)
+{
+    WOLFSSL_DH* dh;
+    DhKey*      key;
+
+    WOLFSSL_ENTER("wolfSSL_DSA_dup_DH");
+
+    if (dsa == NULL) {
+        return NULL;
+    }
+
+    dh = wolfSSL_DH_new();
+    if (dh == NULL) {
+        return NULL;
+    }
+    key = (DhKey*)dh->internal;
+
+    if (dsa->p != NULL &&
+        SetIndividualInternal(((WOLFSSL_DSA*)dsa)->p, &key->p) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa p key error");
+        wolfSSL_DH_free(dh);
+        return NULL;
+    }
+    if (dsa->g != NULL &&
+        SetIndividualInternal(((WOLFSSL_DSA*)dsa)->g, &key->g) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa g key error");
+        wolfSSL_DH_free(dh);
+        return NULL;
+    }
+
+    if (SetIndividualExternal(&dh->p, &key->p) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("dsa p key error");
+        wolfSSL_DH_free(dh);
+        return NULL;
+    }
+    if (SetIndividualExternal(&dh->g, &key->g) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("dsa g key error");
+        wolfSSL_DH_free(dh);
+        return NULL;
+    }
+
+    return dh;
+}
+#endif /* !NO_DSA && !NO_DH */
+
+
+#ifndef NO_RSA
+#if !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+/* Openssl -> WolfSSL */
+int SetRsaInternal(WOLFSSL_RSA* rsa)
+{
+    RsaKey* key;
+    WOLFSSL_MSG("Entering SetRsaInternal");
+
+    if (rsa == NULL || rsa->internal == NULL) {
+        WOLFSSL_MSG("rsa key NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    key = (RsaKey*)rsa->internal;
+
+    if (SetIndividualInternal(rsa->n, &key->n) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa n key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetIndividualInternal(rsa->e, &key->e) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa e key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    /* public key */
+    key->type = RSA_PUBLIC;
+
+    if (rsa->d != NULL) {
+        if (SetIndividualInternal(rsa->d, &key->d) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa d key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        /* private key */
+        key->type = RSA_PRIVATE;
+    }
+
+    if (rsa->p != NULL &&
+        SetIndividualInternal(rsa->p, &key->p) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa p key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (rsa->q != NULL &&
+        SetIndividualInternal(rsa->q, &key->q) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa q key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+#ifndef RSA_LOW_MEM
+    if (rsa->dmp1 != NULL &&
+        SetIndividualInternal(rsa->dmp1, &key->dP) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa dP key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (rsa->dmq1 != NULL &&
+        SetIndividualInternal(rsa->dmq1, &key->dQ) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa dQ key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (rsa->iqmp != NULL &&
+        SetIndividualInternal(rsa->iqmp, &key->u) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa u key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+#endif /* !RSA_LOW_MEM */
+
+    rsa->inSet = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+/* SSL_SUCCESS on ok */
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_RSA_blinding_on(WOLFSSL_RSA* rsa, WOLFSSL_BN_CTX* bn)
+{
+    (void)rsa;
+    (void)bn;
+    WOLFSSL_STUB("RSA_blinding_on");
+    WOLFSSL_MSG("wolfSSL_RSA_blinding_on");
+
+    return WOLFSSL_SUCCESS;  /* on by default */
+}
+#endif
+
+/* return compliant with OpenSSL
+ *   size of encrypted data if success , -1 if error
+ */
+int wolfSSL_RSA_public_encrypt(int len, const unsigned char* fr,
+                            unsigned char* to, WOLFSSL_RSA* rsa, int padding)
+{
+    int initTmpRng = 0;
+    WC_RNG *rng = NULL;
+    int outLen;
+    int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG* tmpRNG = NULL;
+#else
+    WC_RNG  _tmpRNG[1];
+    WC_RNG* tmpRNG = _tmpRNG;
+#endif
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+    int  mgf = WC_MGF1NONE;
+    enum wc_HashType hash = WC_HASH_TYPE_NONE;
+#endif
+
+    WOLFSSL_MSG("wolfSSL_RSA_public_encrypt");
+
+    /* Check and remap the padding to internal values, if needed. */
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+    if (padding == RSA_PKCS1_PADDING)
+        padding = WC_RSA_PKCSV15_PAD;
+    else if (padding == RSA_PKCS1_OAEP_PADDING) {
+        padding = WC_RSA_OAEP_PAD;
+        hash = WC_HASH_TYPE_SHA;
+        mgf = WC_MGF1SHA1;
+    }
+    else if (padding == RSA_PKCS1_PSS_PADDING) {
+        padding = WC_RSA_PSS_PAD;
+        hash = WC_HASH_TYPE_SHA256;
+        mgf  = WC_MGF1SHA256;
+    }
+    else if (padding == RSA_NO_PADDING) {
+        padding = WC_RSA_NO_PAD;
+    }
+#else
+    if (padding == RSA_PKCS1_PADDING)
+      ;
+#endif
+    else {
+        WOLFSSL_MSG("wolfSSL_RSA_public_encrypt unsupported padding");
+        return 0;
+    }
+
+    if (rsa->inSet == 0)
+    {
+        if (SetRsaInternal(rsa) != SSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal failed");
+            return 0;
+        }
+    }
+
+    outLen = wolfSSL_RSA_size(rsa);
+
+    rng = WOLFSSL_RSA_GetRNG(rsa, (WC_RNG**)&tmpRNG, &initTmpRng);
+
+    if (outLen == 0) {
+        WOLFSSL_MSG("Bad RSA size");
+    }
+
+    if (rng) {
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+        ret = wc_RsaPublicEncrypt_ex(fr, len, to, outLen,
+                             (RsaKey*)rsa->internal, rng, padding,
+                             hash, mgf, NULL, 0);
+#else
+        ret = wc_RsaPublicEncrypt(fr, len, to, outLen,
+                             (RsaKey*)rsa->internal, rng);
+#endif
+        if (ret <= 0) {
+            WOLFSSL_MSG("Bad Rsa Encrypt");
+        }
+        if (len <= 0) {
+            WOLFSSL_MSG("Bad Rsa Encrypt");
+        }
+    }
+
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+#ifdef WOLFSSL_SMALL_STACK
+    if (tmpRNG)
+        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    if (ret >= 0)
+        WOLFSSL_MSG("wolfSSL_RSA_public_encrypt success");
+    else {
+        WOLFSSL_MSG("wolfSSL_RSA_public_encrypt failed");
+        ret = WOLFSSL_FATAL_ERROR; /* return -1 on error case */
+    }
+    return ret;
+}
+
+
+
+
+/* return compliant with OpenSSL
+ *   size of plain recovered data if success , -1 if error
+ */
+int wolfSSL_RSA_private_decrypt(int len, const unsigned char* fr,
+                            unsigned char* to, WOLFSSL_RSA* rsa, int padding)
+{
+    int outLen;
+    int ret = 0;
+  #if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+    int mgf = WC_MGF1NONE;
+    enum wc_HashType hash = WC_HASH_TYPE_NONE;
+  #endif
+
+    WOLFSSL_MSG("wolfSSL_RSA_private_decrypt");
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+    if (padding == RSA_PKCS1_PADDING)
+        padding = WC_RSA_PKCSV15_PAD;
+    else if (padding == RSA_PKCS1_OAEP_PADDING) {
+        padding = WC_RSA_OAEP_PAD;
+        hash = WC_HASH_TYPE_SHA;
+        mgf = WC_MGF1SHA1;
+    }
+    else if (padding == RSA_PKCS1_PSS_PADDING) {
+        padding = WC_RSA_PSS_PAD;
+        hash = WC_HASH_TYPE_SHA256;
+        mgf  = WC_MGF1SHA256;
+    }
+    else if (padding == RSA_NO_PADDING) {
+        padding = WC_RSA_NO_PAD;
+    }
+#else
+    if (padding == RSA_PKCS1_PADDING)
+        ;
+#endif
+    else {
+        WOLFSSL_MSG("wolfSSL_RSA_private_decrypt unsupported padding");
+        return 0;
+    }
+
+    if (rsa->inSet == 0)
+    {
+        if (SetRsaInternal(rsa) != SSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal failed");
+            return 0;
+        }
+    }
+
+    outLen = wolfSSL_RSA_size(rsa);
+    if (outLen == 0) {
+        WOLFSSL_MSG("Bad RSA size");
+    }
+
+    /* size of 'to' buffer must be size of RSA key */
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA)
+    ret = wc_RsaPrivateDecrypt_ex(fr, len, to, outLen,
+                            (RsaKey*)rsa->internal, padding,
+                            hash, mgf, NULL, 0);
+#else
+    ret = wc_RsaPrivateDecrypt(fr, len, to, outLen,
+                            (RsaKey*)rsa->internal);
+#endif
+
+    if (len <= 0) {
+        WOLFSSL_MSG("Bad Rsa Decrypt");
+    }
+
+    if (ret > 0)
+        WOLFSSL_MSG("wolfSSL_RSA_private_decrypt success");
+    else {
+        WOLFSSL_MSG("wolfSSL_RSA_private_decrypt failed");
+        ret = WOLFSSL_FATAL_ERROR;
+    }
+    return ret;
+}
+
+#if !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+int wolfSSL_RSA_public_decrypt(int flen, const unsigned char* from,
+                          unsigned char* to, WOLFSSL_RSA* rsa, int padding)
+{
+    int tlen = 0;
+    int pad_type;
+
+    WOLFSSL_ENTER("wolfSSL_RSA_public_decrypt");
+
+    if (rsa == NULL || rsa->internal == NULL || from == NULL) {
+        WOLFSSL_MSG("Bad function arguments");
+        return WOLFSSL_FAILURE;
+    }
+
+    switch (padding) {
+    case RSA_PKCS1_PADDING:
+        pad_type = WC_RSA_PKCSV15_PAD;
+        break;
+    case RSA_PKCS1_OAEP_PADDING:
+        pad_type = WC_RSA_OAEP_PAD;
+        break;
+    case RSA_PKCS1_PSS_PADDING:
+        pad_type = WC_RSA_PSS_PAD;
+        break;
+    case RSA_NO_PADDING:
+        pad_type = WC_RSA_NO_PAD;
+        break;
+    default:
+        WOLFSSL_MSG("wolfSSL_RSA_public_decrypt unsupported padding");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (rsa->inSet == 0)
+    {
+        WOLFSSL_MSG("No RSA internal set, do it");
+
+        if (SetRsaInternal(rsa) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+    /* size of 'to' buffer must be size of RSA key */
+    tlen = wc_RsaSSL_Verify_ex(from, flen, to, wolfSSL_RSA_size(rsa),
+                               (RsaKey*)rsa->internal, pad_type);
+    if (tlen <= 0)
+        WOLFSSL_MSG("wolfSSL_RSA_public_decrypt failed");
+    else {
+        WOLFSSL_MSG("wolfSSL_RSA_public_decrypt success");
+    }
+    return tlen;
+}
+#endif /* !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) */
+
+/* RSA private encrypt calls wc_RsaSSL_Sign. Similar function set up as RSA
+ * public decrypt.
+ *
+ * len  Length of input buffer
+ * in   Input buffer to sign
+ * out  Output buffer (expected to be greater than or equal to RSA key size)
+ * rsa     Key to use for encryption
+ * padding Type of RSA padding to use.
+ */
+int wolfSSL_RSA_private_encrypt(int len, unsigned char* in,
+                            unsigned char* out, WOLFSSL_RSA* rsa, int padding)
+{
+    int sz = 0;
+    WC_RNG* rng = NULL;
+#if !defined(WC_RSA_BLINDING) || defined(HAVE_USER_RSA)
+    WC_RNG rng_lcl;
+#endif
+    RsaKey* key;
+
+    WOLFSSL_MSG("wolfSSL_RSA_private_encrypt");
+
+    if (len < 0 || rsa == NULL || rsa->internal == NULL || in == NULL) {
+        WOLFSSL_MSG("Bad function arguments");
+        return 0;
+    }
+
+    if (padding != RSA_PKCS1_PADDING && padding != RSA_PKCS1_PSS_PADDING) {
+        WOLFSSL_MSG("wolfSSL_RSA_private_encrypt unsupported padding");
+        return 0;
+    }
+
+    if (rsa->inSet == 0)
+    {
+        WOLFSSL_MSG("Setting internal RSA structure");
+
+        if (SetRsaInternal(rsa) != SSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal failed");
+            return 0;
+        }
+    }
+
+    key = (RsaKey*)rsa->internal;
+#if defined(WC_RSA_BLINDING) && !defined(HAVE_USER_RSA)
+    rng = key->rng;
+#else
+    rng = &rng_lcl;
+    #ifndef HAVE_FIPS
+    if (wc_InitRng_ex(rng, key->heap, INVALID_DEVID) != 0)
+    #else
+    if (wc_InitRng(rng) != 0)
+    #endif
+    {
+        WOLFSSL_MSG("Error with random number");
+        return SSL_FATAL_ERROR;
+    }
+#endif
+
+    /* size of output buffer must be size of RSA key */
+    sz = wc_RsaSSL_Sign(in, (word32)len, out, wolfSSL_RSA_size(rsa), key, rng);
+    #if !defined(WC_RSA_BLINDING) || defined(HAVE_USER_RSA)
+    if (wc_FreeRng(rng) != 0) {
+        WOLFSSL_MSG("Error freeing random number generator");
+        return SSL_FATAL_ERROR;
+    }
+    #endif
+    if (sz <= 0) {
+        WOLFSSL_LEAVE("wolfSSL_RSA_private_encrypt", sz);
+        return 0;
+    }
+
+    return sz;
+}
+#endif /* HAVE_USER_RSA */
+#endif
+
+
+/* frees all nodes in the current threads error queue
+ *
+ * id  thread id. ERR_remove_state is depreciated and id is ignored. The
+ *     current threads queue will be free'd.
+ */
+void wolfSSL_ERR_remove_state(unsigned long id)
+{
+    WOLFSSL_ENTER("wolfSSL_ERR_remove_state");
+    (void)id;
+    if (wc_ERR_remove_state() != 0) {
+        WOLFSSL_MSG("Error with removing the state");
+    }
+}
+
+
+WOLFSSL_BN_CTX* wolfSSL_BN_CTX_new(void)
+{
+    static int ctx;  /* wolfcrypt doesn't now need ctx */
+
+    WOLFSSL_MSG("wolfSSL_BN_CTX_new");
+    return (WOLFSSL_BN_CTX*)&ctx;
+
+}
+
+void wolfSSL_BN_CTX_init(WOLFSSL_BN_CTX* ctx)
+{
+    (void)ctx;
+    WOLFSSL_MSG("wolfSSL_BN_CTX_init");
+}
+
+
+void wolfSSL_BN_CTX_free(WOLFSSL_BN_CTX* ctx)
+{
+    (void)ctx;
+    WOLFSSL_MSG("wolfSSL_BN_CTX_free");
+    /* do free since static ctx that does nothing */
+}
+
+/* WOLFSSL_SUCCESS on ok */
+int wolfSSL_BN_sub(WOLFSSL_BIGNUM* r, const WOLFSSL_BIGNUM* a,
+                  const WOLFSSL_BIGNUM* b)
+{
+    WOLFSSL_MSG("wolfSSL_BN_sub");
+
+    if (r == NULL || a == NULL || b == NULL)
+        return 0;
+
+    if (mp_sub((mp_int*)a->internal,(mp_int*)b->internal,
+               (mp_int*)r->internal) == MP_OKAY)
+        return WOLFSSL_SUCCESS;
+
+    WOLFSSL_MSG("wolfSSL_BN_sub mp_sub failed");
+    return 0;
+}
+
+/* WOLFSSL_SUCCESS on ok */
+int wolfSSL_BN_mod(WOLFSSL_BIGNUM* r, const WOLFSSL_BIGNUM* a,
+                  const WOLFSSL_BIGNUM* b, const WOLFSSL_BN_CTX* c)
+{
+    (void)c;
+    WOLFSSL_MSG("wolfSSL_BN_mod");
+
+    if (r == NULL || a == NULL || b == NULL)
+        return 0;
+
+    if (mp_mod((mp_int*)a->internal,(mp_int*)b->internal,
+               (mp_int*)r->internal) == MP_OKAY)
+        return WOLFSSL_SUCCESS;
+
+    WOLFSSL_MSG("wolfSSL_BN_mod mp_mod failed");
+    return 0;
+}
+
+
+/* r = (a^p) % m */
+int wolfSSL_BN_mod_exp(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *a,
+      const WOLFSSL_BIGNUM *p, const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
+{
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_BN_mod_exp");
+
+    (void) ctx;
+    if (r == NULL || a == NULL || p == NULL || m == NULL) {
+        WOLFSSL_MSG("Bad Argument");
+        return WOLFSSL_FAILURE;
+    }
+
+    if ((ret = mp_exptmod((mp_int*)a->internal,(mp_int*)p->internal,
+               (mp_int*)m->internal, (mp_int*)r->internal)) == MP_OKAY) {
+        return WOLFSSL_SUCCESS;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_BN_mod_exp", ret);
+    (void)ret;
+
+    return WOLFSSL_FAILURE;
+}
+
+/* r = (a * p) % m */
+int wolfSSL_BN_mod_mul(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *a,
+        const WOLFSSL_BIGNUM *p, const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
+{
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_BN_mod_mul");
+
+    (void) ctx;
+    if (r == NULL || a == NULL || p == NULL || m == NULL) {
+        WOLFSSL_MSG("Bad Argument");
+        return SSL_FAILURE;
+    }
+
+    if ((ret = mp_mulmod((mp_int*)a->internal,(mp_int*)p->internal,
+               (mp_int*)m->internal, (mp_int*)r->internal)) == MP_OKAY) {
+        return SSL_SUCCESS;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_BN_mod_mul", ret);
+    (void)ret;
+
+    return SSL_FAILURE;
+}
+
+#ifdef OPENSSL_EXTRA
+const WOLFSSL_BIGNUM* wolfSSL_BN_value_one(void)
+{
+    WOLFSSL_MSG("wolfSSL_BN_value_one");
+
+    if (bn_one == NULL) {
+        bn_one = wolfSSL_BN_new();
+        if (bn_one) {
+            if (mp_set_int((mp_int*)bn_one->internal, 1) != MP_OKAY) {
+                /* handle error by freeing BN and returning NULL */
+                wolfSSL_BN_free(bn_one);
+                bn_one = NULL;
+            }
+        }
+    }
+
+    return bn_one;
+}
+#endif
+
+/* return compliant with OpenSSL
+ *   size of BIGNUM in bytes, 0 if error */
+int wolfSSL_BN_num_bytes(const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_ENTER("wolfSSL_BN_num_bytes");
+
+    if (bn == NULL || bn->internal == NULL)
+        return WOLFSSL_FAILURE;
+
+    return mp_unsigned_bin_size((mp_int*)bn->internal);
+}
+
+/* return compliant with OpenSSL
+ *   size of BIGNUM in bits, 0 if error */
+int wolfSSL_BN_num_bits(const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_ENTER("wolfSSL_BN_num_bits");
+
+    if (bn == NULL || bn->internal == NULL)
+        return WOLFSSL_FAILURE;
+
+    return mp_count_bits((mp_int*)bn->internal);
+}
+
+int wolfSSL_BN_is_negative(const WOLFSSL_BIGNUM* bn)
+{
+    if (bn == NULL)
+        return WOLFSSL_FAILURE;
+
+    return mp_isneg((mp_int*)bn->internal);
+}
+
+/* return compliant with OpenSSL
+ *   1 if BIGNUM is zero, 0 else */
+int wolfSSL_BN_is_zero(const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_is_zero");
+
+    if (bn == NULL || bn->internal == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (mp_iszero((mp_int*)bn->internal) == MP_YES)
+        return WOLFSSL_SUCCESS;
+
+    return WOLFSSL_FAILURE;
+}
+
+/* return compliant with OpenSSL
+ *   1 if BIGNUM is one, 0 else */
+int wolfSSL_BN_is_one(const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_is_one");
+
+    if (bn == NULL || bn->internal == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (mp_cmp_d((mp_int*)bn->internal, 1) == MP_EQ)
+        return WOLFSSL_SUCCESS;
+
+    return WOLFSSL_FAILURE;
+}
+
+/* return compliant with OpenSSL
+ *   1 if BIGNUM is odd, 0 else */
+int wolfSSL_BN_is_odd(const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_is_odd");
+
+    if (bn == NULL || bn->internal == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (mp_isodd((mp_int*)bn->internal) == MP_YES)
+        return WOLFSSL_SUCCESS;
+
+    return WOLFSSL_FAILURE;
+}
+
+/* return compliant with OpenSSL
+ *   1 if BIGNUM is word, 0 else */
+int wolfSSL_BN_is_word(const WOLFSSL_BIGNUM* bn, WOLFSSL_BN_ULONG w)
+{
+    WOLFSSL_ENTER("wolfSSL_BN_is_word");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_isword((mp_int*)bn->internal, w) == MP_YES) {
+         return WOLFSSL_SUCCESS;
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
+/* return compliant with OpenSSL
+ *   -1 if a < b, 0 if a == b and 1 if a > b
+ */
+int wolfSSL_BN_cmp(const WOLFSSL_BIGNUM* a, const WOLFSSL_BIGNUM* b)
+{
+    int ret;
+
+    WOLFSSL_MSG("wolfSSL_BN_cmp");
+
+    if (a == NULL || a->internal == NULL || b == NULL || b->internal == NULL)
+        return WOLFSSL_FATAL_ERROR;
+
+    ret = mp_cmp((mp_int*)a->internal, (mp_int*)b->internal);
+
+    return (ret == MP_EQ ? 0 : (ret == MP_GT ? 1 : -1));
+}
+
+/* return compliant with OpenSSL
+ *   length of BIGNUM in bytes, -1 if error */
+int wolfSSL_BN_bn2bin(const WOLFSSL_BIGNUM* bn, unsigned char* r)
+{
+    WOLFSSL_MSG("wolfSSL_BN_bn2bin");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("NULL bn error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (r == NULL)
+        return mp_unsigned_bin_size((mp_int*)bn->internal);
+
+    if (mp_to_unsigned_bin((mp_int*)bn->internal, r) != MP_OKAY) {
+        WOLFSSL_MSG("mp_to_unsigned_bin error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return mp_unsigned_bin_size((mp_int*)bn->internal);
+}
+
+
+WOLFSSL_BIGNUM* wolfSSL_BN_bin2bn(const unsigned char* str, int len,
+                            WOLFSSL_BIGNUM* ret)
+{
+    int weOwn = 0;
+
+    WOLFSSL_MSG("wolfSSL_BN_bin2bn");
+
+    /* if ret is null create a BN */
+    if (ret == NULL) {
+        ret = wolfSSL_BN_new();
+        weOwn = 1;
+        if (ret == NULL)
+            return NULL;
+    }
+
+    /* check ret and ret->internal then read in value */
+    if (ret && ret->internal) {
+        if (mp_read_unsigned_bin((mp_int*)ret->internal, str, len) != 0) {
+            WOLFSSL_MSG("mp_read_unsigned_bin failure");
+            if (weOwn)
+                wolfSSL_BN_free(ret);
+            return NULL;
+        }
+    } else {
+        return NULL;
+    }
+
+    return ret;
+}
+
+/* return compliant with OpenSSL
+ *   1 if success, 0 if error */
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_mask_bits(WOLFSSL_BIGNUM* bn, int n)
+{
+    (void)bn;
+    (void)n;
+    WOLFSSL_ENTER("wolfSSL_BN_mask_bits");
+    WOLFSSL_STUB("BN_mask_bits");
+    return SSL_FAILURE;
+}
+#endif
+
+
+/* WOLFSSL_SUCCESS on ok */
+int wolfSSL_BN_rand(WOLFSSL_BIGNUM* bn, int bits, int top, int bottom)
+{
+    int           ret    = 0;
+    int           len    = bits / 8;
+    int           initTmpRng = 0;
+    WC_RNG*       rng    = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG*       tmpRNG = NULL;
+    byte*         buff   = NULL;
+#else
+    WC_RNG        tmpRNG[1];
+    byte          buff[1024];
+#endif
+
+    (void)top;
+    (void)bottom;
+    WOLFSSL_MSG("wolfSSL_BN_rand");
+
+    if (bits % 8)
+        len++;
+
+#ifdef WOLFSSL_SMALL_STACK
+    buff   = (byte*)XMALLOC(1024,        NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    tmpRNG = (WC_RNG*) XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
+    if (buff == NULL || tmpRNG == NULL) {
+        XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+        return ret;
+    }
+#endif
+
+    if (bn == NULL || bn->internal == NULL)
+        WOLFSSL_MSG("Bad function arguments");
+    else if (wc_InitRng(tmpRNG) == 0) {
+        rng = tmpRNG;
+        initTmpRng = 1;
+    }
+    else if (initGlobalRNG)
+        rng = &globalRNG;
+
+    if (rng) {
+        if (wc_RNG_GenerateBlock(rng, buff, len) != 0)
+            WOLFSSL_MSG("Bad wc_RNG_GenerateBlock");
+        else {
+            buff[0]     |= 0x80 | 0x40;
+            buff[len-1] |= 0x01;
+
+            if (mp_read_unsigned_bin((mp_int*)bn->internal,buff,len) != MP_OKAY)
+                WOLFSSL_MSG("mp read bin failed");
+            else
+                ret = WOLFSSL_SUCCESS;
+        }
+    }
+
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+#endif
+
+    return ret;
+}
+
+
+/* WOLFSSL_SUCCESS on ok
+ * code is same as wolfSSL_BN_rand except for how top and bottom is handled.
+ * top -1 then leave most sig bit alone
+ * top 0 then most sig is set to 1
+ * top is 1 then first two most sig bits are 1
+ *
+ * bottom is hot then odd number */
+int wolfSSL_BN_pseudo_rand(WOLFSSL_BIGNUM* bn, int bits, int top, int bottom)
+{
+    int           ret    = 0;
+    int           len    = bits / 8;
+    int           initTmpRng = 0;
+    WC_RNG*       rng    = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG*       tmpRNG = NULL;
+    byte*         buff   = NULL;
+#else
+    WC_RNG        tmpRNG[1];
+    byte          buff[1024];
+#endif
+
+    WOLFSSL_MSG("wolfSSL_BN_rand");
+
+    if (bits % 8)
+        len++;
+
+#ifdef WOLFSSL_SMALL_STACK
+    buff   = (byte*)XMALLOC(1024,        NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    tmpRNG = (WC_RNG*) XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (buff == NULL || tmpRNG == NULL) {
+        XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+#endif
+
+    if (bn == NULL || bn->internal == NULL)
+        WOLFSSL_MSG("Bad function arguments");
+    else if (wc_InitRng(tmpRNG) == 0) {
+        rng = tmpRNG;
+        initTmpRng = 1;
+    }
+    else if (initGlobalRNG)
+        rng = &globalRNG;
+
+    if (rng) {
+        if (wc_RNG_GenerateBlock(rng, buff, len) != 0)
+            WOLFSSL_MSG("Bad wc_RNG_GenerateBlock");
+        else {
+            switch (top) {
+                case -1:
+                    break;
+
+                case 0:
+                    buff[0] |= 0x80;
+                    break;
+
+                case 1:
+                    buff[0] |= 0x80 | 0x40;
+                    break;
+            }
+
+            if (bottom == 1) {
+                buff[len-1] |= 0x01;
+            }
+
+            if (mp_read_unsigned_bin((mp_int*)bn->internal,buff,len) != MP_OKAY)
+                WOLFSSL_MSG("mp read bin failed");
+            else
+                ret = WOLFSSL_SUCCESS;
+        }
+    }
+
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(buff,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    return ret;
+}
+
+/* return code compliant with OpenSSL :
+ *   1 if bit set, 0 else
+ */
+int wolfSSL_BN_is_bit_set(const WOLFSSL_BIGNUM* bn, int n)
+{
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return mp_is_bit_set((mp_int*)bn->internal, (mp_digit)n);
+}
+
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 else
+ */
+int wolfSSL_BN_set_bit(WOLFSSL_BIGNUM* bn, int n)
+{
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_set_bit((mp_int*)bn->internal, n) != MP_OKAY) {
+        WOLFSSL_MSG("mp_set_bit error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+int wolfSSL_BN_clear_bit(WOLFSSL_BIGNUM* bn, int n)
+{
+    int ret = WOLFSSL_FAILURE;
+#ifndef WOLFSSL_SMALL_STACK
+    mp_int tmp[1];
+#else
+    mp_int* tmp = NULL;
+#endif
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        goto cleanup;
+    }
+    if (mp_is_bit_set((mp_int*)bn->internal, n)) {
+#ifdef WOLFSSL_SMALL_STACK
+       tmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_BIGINT);
+       if (tmp == NULL) {
+           goto cleanup;
+       }
+#endif
+        if (mp_init(tmp) != MP_OKAY) {
+            goto cleanup;
+        }
+        if (mp_set_bit(tmp, n) != MP_OKAY) {
+            goto cleanup;
+        }
+        if (mp_sub((mp_int*)bn->internal, tmp, (mp_int*)bn->internal) != MP_OKAY) {
+            goto cleanup;
+        }
+    }
+
+    ret = WOLFSSL_SUCCESS;
+cleanup:
+    mp_clear(tmp);
+#ifdef WOLFSSL_SMALL_STACK
+    if (tmp)
+        XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+    return ret;
+}
+
+
+/* WOLFSSL_SUCCESS on ok */
+/* Note on use: this function expects str to be an even length. It is
+ * converting pairs of bytes into 8-bit values. As an example, the RSA
+ * public exponent is commonly 0x010001. To get it to convert, you need
+ * to pass in the string "010001", it will fail if you use "10001". This
+ * is an affect of how Base16_Decode() works.
+ */
+int wolfSSL_BN_hex2bn(WOLFSSL_BIGNUM** bn, const char* str)
+{
+    int     ret     = 0;
+    word32  decSz   = 1024;
+#ifdef WOLFSSL_SMALL_STACK
+    byte*   decoded;
+#else
+    byte    decoded[1024];
+#endif
+    int     weOwn = 0;
+    int     strLen;
+
+    WOLFSSL_MSG("wolfSSL_BN_hex2bn");
+
+#ifdef WOLFSSL_SMALL_STACK
+    decoded = (byte*)XMALLOC(decSz, NULL, DYNAMIC_TYPE_DER);
+    if (decoded == NULL)
+        return ret;
+#endif
+
+    if (str == NULL || str[0] == '\0') {
+        WOLFSSL_MSG("Bad function argument");
+        ret = WOLFSSL_FAILURE;
+    } else {
+        strLen = (int)XSTRLEN(str);
+        /* ignore trailing new lines */
+        while (str[strLen-1] == '\n' && strLen > 0) strLen--;
+
+        if (Base16_Decode((byte*)str, strLen, decoded, &decSz) < 0)
+            WOLFSSL_MSG("Bad Base16_Decode error");
+        else if (bn == NULL)
+            ret = decSz;
+        else {
+            if (*bn == NULL) {
+                *bn = wolfSSL_BN_new();
+                if (*bn != NULL) {
+                    weOwn = 1;
+                }
+            }
+
+            if (*bn == NULL)
+                WOLFSSL_MSG("BN new failed");
+            else if (wolfSSL_BN_bin2bn(decoded, decSz, *bn) == NULL) {
+                WOLFSSL_MSG("Bad bin2bn error");
+                if (weOwn == 1) {
+                    wolfSSL_BN_free(*bn); /* Free new BN */
+                }
+            }
+            else
+                ret = WOLFSSL_SUCCESS;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(decoded, NULL, DYNAMIC_TYPE_DER);
+#endif
+
+    return ret;
+}
+
+
+WOLFSSL_BIGNUM* wolfSSL_BN_dup(const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_BIGNUM* ret;
+
+    WOLFSSL_MSG("wolfSSL_BN_dup");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return NULL;
+    }
+
+    ret = wolfSSL_BN_new();
+    if (ret == NULL) {
+        WOLFSSL_MSG("bn new error");
+        return NULL;
+    }
+
+    if (mp_copy((mp_int*)bn->internal, (mp_int*)ret->internal) != MP_OKAY) {
+        WOLFSSL_MSG("mp_copy error");
+        wolfSSL_BN_free(ret);
+        return NULL;
+    }
+
+    ret->neg = bn->neg;
+
+    return ret;
+}
+
+
+WOLFSSL_BIGNUM* wolfSSL_BN_copy(WOLFSSL_BIGNUM* r, const WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_copy");
+
+    if (r == NULL || bn == NULL) {
+        WOLFSSL_MSG("r or bn NULL error");
+        return NULL;
+    }
+
+    if (mp_copy((mp_int*)bn->internal, (mp_int*)r->internal) != MP_OKAY) {
+        WOLFSSL_MSG("mp_copy error");
+        return NULL;
+    }
+
+    r->neg = bn->neg;
+
+    return r;
+}
+
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 else
+ */
+int wolfSSL_BN_set_word(WOLFSSL_BIGNUM* bn, WOLFSSL_BN_ULONG w)
+{
+    WOLFSSL_MSG("wolfSSL_BN_set_word");
+
+    if (bn == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_set_int((mp_int*)bn->internal, w) != MP_OKAY) {
+        WOLFSSL_MSG("mp_init_set_int error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+/* Returns the big number as an unsigned long if possible.
+ *
+ * bn  big number structure to get value from
+ *
+ * Returns value or 0xFFFFFFFFL if bigger than unsigned long.
+ */
+unsigned long wolfSSL_BN_get_word(const WOLFSSL_BIGNUM* bn)
+{
+    mp_int* mp;
+
+    WOLFSSL_MSG("wolfSSL_BN_get_word");
+
+    if (bn == NULL) {
+        WOLFSSL_MSG("Invalid argument");
+        return 0;
+    }
+
+    if (wolfSSL_BN_num_bytes(bn) > (int)sizeof(unsigned long)) {
+        WOLFSSL_MSG("bignum is larger than unsigned long");
+        return 0xFFFFFFFFL;
+    }
+    mp = (mp_int*)bn->internal;
+
+    return (unsigned long)(mp->dp[0]);
+}
+
+/* return code compliant with OpenSSL :
+ *   number length in decimal if success, 0 if error
+ */
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_BN_dec2bn(WOLFSSL_BIGNUM** bn, const char* str)
+{
+    (void)bn;
+    (void)str;
+
+    WOLFSSL_MSG("wolfSSL_BN_dec2bn");
+    WOLFSSL_STUB("BN_dec2bn");
+    return SSL_FAILURE;
+}
+#endif
+
+#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
+char *wolfSSL_BN_bn2dec(const WOLFSSL_BIGNUM *bn)
+{
+    int len = 0;
+    char *buf;
+
+    WOLFSSL_MSG("wolfSSL_BN_bn2dec");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return NULL;
+    }
+
+    if (mp_radix_size((mp_int*)bn->internal, MP_RADIX_DEC, &len) != MP_OKAY) {
+        WOLFSSL_MSG("mp_radix_size failure");
+        return NULL;
+    }
+
+    buf = (char*) XMALLOC(len, NULL, DYNAMIC_TYPE_OPENSSL);
+    if (buf == NULL) {
+        WOLFSSL_MSG("BN_bn2dec malloc buffer failure");
+        return NULL;
+    }
+
+    if (mp_todecimal((mp_int*)bn->internal, buf) != MP_OKAY) {
+        XFREE(buf, NULL, DYNAMIC_TYPE_ECC);
+        return NULL;
+    }
+
+    return buf;
+}
+#else
+char* wolfSSL_BN_bn2dec(const WOLFSSL_BIGNUM* bn)
+{
+    (void)bn;
+
+    WOLFSSL_MSG("wolfSSL_BN_bn2dec");
+
+    return NULL;
+}
+#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
+
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 else
+ */
+int wolfSSL_BN_lshift(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *bn, int n)
+{
+    WOLFSSL_MSG("wolfSSL_BN_lshift");
+
+    if (r == NULL || r->internal == NULL || bn == NULL || bn->internal == NULL){
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_mul_2d((mp_int*)bn->internal, n, (mp_int*)r->internal) != MP_OKAY) {
+        WOLFSSL_MSG("mp_mul_2d error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 else
+ */
+int wolfSSL_BN_rshift(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *bn, int n)
+{
+    WOLFSSL_MSG("wolfSSL_BN_rshift");
+
+    if (r == NULL || r->internal == NULL || bn == NULL || bn->internal == NULL){
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_div_2d((mp_int*)bn->internal, n,
+                  (mp_int*)r->internal, NULL) != MP_OKAY) {
+        WOLFSSL_MSG("mp_mul_2d error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 else
+ */
+int wolfSSL_BN_add_word(WOLFSSL_BIGNUM *bn, WOLFSSL_BN_ULONG w)
+{
+    WOLFSSL_MSG("wolfSSL_BN_add_word");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_add_d((mp_int*)bn->internal, w, (mp_int*)bn->internal) != MP_OKAY) {
+        WOLFSSL_MSG("mp_add_d error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 else
+ */
+int wolfSSL_BN_add(WOLFSSL_BIGNUM *r, WOLFSSL_BIGNUM *a, WOLFSSL_BIGNUM *b)
+{
+    WOLFSSL_MSG("wolfSSL_BN_add");
+
+    if (r == NULL || r->internal == NULL || a == NULL || a->internal == NULL ||
+        b == NULL || b->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_add((mp_int*)a->internal, (mp_int*)b->internal,
+               (mp_int*)r->internal) != MP_OKAY) {
+        WOLFSSL_MSG("mp_add_d error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* r = a + b (mod m) */
+int wolfSSL_BN_mod_add(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *a,
+                       const WOLFSSL_BIGNUM *b, const WOLFSSL_BIGNUM *m,
+                       WOLFSSL_BN_CTX *ctx)
+{
+    (void)ctx;
+    WOLFSSL_MSG("wolfSSL_BN_add");
+
+    if (r == NULL || r->internal == NULL ||
+            a == NULL || a->internal == NULL ||
+            b == NULL || b->internal == NULL ||
+            m == NULL || m->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (mp_addmod((mp_int*)a->internal, (mp_int*)b->internal,
+                  (mp_int*)m->internal, (mp_int*)r->internal) != MP_OKAY) {
+        WOLFSSL_MSG("mp_add_d error");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+#ifdef WOLFSSL_KEY_GEN
+
+/* return code compliant with OpenSSL :
+ *   1 if prime, 0 if not, -1 if error
+ */
+int wolfSSL_BN_is_prime_ex(const WOLFSSL_BIGNUM *bn, int nbchecks,
+                           WOLFSSL_BN_CTX *ctx, WOLFSSL_BN_GENCB *cb)
+{
+    WC_RNG*        rng    = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    WC_RNG*        tmpRNG = NULL;
+#else
+    WC_RNG         tmpRNG[1];
+#endif
+    int            initTmpRng = 0;
+    int            res = MP_NO;
+
+    (void)ctx;
+    (void)cb;
+
+    WOLFSSL_MSG("wolfSSL_BN_is_prime_ex");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    tmpRNG = (WC_RNG*)XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
+    if (tmpRNG == NULL)
+        return WOLFSSL_FAILURE;
+#endif
+    if (wc_InitRng(tmpRNG) == 0) {
+        rng = tmpRNG;
+        initTmpRng = 1;
+    }
+    else {
+        WOLFSSL_MSG("Bad RNG Init, trying global");
+        if (initGlobalRNG == 0) {
+            WOLFSSL_MSG("Global RNG no Init");
+        }
+        else
+            rng = &globalRNG;
+    }
+
+    if (rng) {
+        if (mp_prime_is_prime_ex((mp_int*)bn->internal,
+                                 nbchecks, &res, rng) != MP_OKAY) {
+            WOLFSSL_MSG("mp_prime_is_prime_ex error");
+            res = MP_NO;
+        }
+    }
+
+    if (initTmpRng)
+        wc_FreeRng(tmpRNG);
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(tmpRNG, NULL, DYNAMIC_TYPE_RNG);
+#endif
+
+    if (res != MP_YES) {
+        WOLFSSL_MSG("mp_prime_is_prime_ex not prime");
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* return code compliant with OpenSSL :
+ *   (bn mod w) if success, -1 if error
+ */
+WOLFSSL_BN_ULONG wolfSSL_BN_mod_word(const WOLFSSL_BIGNUM *bn,
+                                     WOLFSSL_BN_ULONG w)
+{
+    WOLFSSL_BN_ULONG ret = 0;
+
+    WOLFSSL_MSG("wolfSSL_BN_mod_word");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return (WOLFSSL_BN_ULONG)WOLFSSL_FATAL_ERROR;
+    }
+
+    if (mp_mod_d((mp_int*)bn->internal, w, &ret) != MP_OKAY) {
+        WOLFSSL_MSG("mp_add_d error");
+        return (WOLFSSL_BN_ULONG)WOLFSSL_FATAL_ERROR;
+    }
+
+    return ret;
+}
+#endif /* #ifdef WOLFSSL_KEY_GEN */
+
+char *wolfSSL_BN_bn2hex(const WOLFSSL_BIGNUM *bn)
+{
+    int len = 0;
+    char *buf;
+
+    WOLFSSL_ENTER("wolfSSL_BN_bn2hex");
+
+    if (bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return NULL;
+    }
+
+    if (mp_radix_size((mp_int*)bn->internal, MP_RADIX_HEX, &len) != MP_OKAY) {
+        WOLFSSL_MSG("mp_radix_size failure");
+        return NULL;
+    }
+    len += 1; /* add one for null terminator */
+
+    buf = (char*)XMALLOC(len, NULL, DYNAMIC_TYPE_OPENSSL);
+    if (buf == NULL) {
+        WOLFSSL_MSG("BN_bn2hex malloc buffer failure");
+        return NULL;
+    }
+
+    if (mp_tohex((mp_int*)bn->internal, buf) != MP_OKAY) {
+        XFREE(buf, NULL, DYNAMIC_TYPE_OPENSSL);
+        return NULL;
+    }
+
+    return buf;
+}
+
+#ifndef NO_FILESYSTEM
+/* return code compliant with OpenSSL :
+ *   1 if success, 0 if error
+ */
+int wolfSSL_BN_print_fp(XFILE fp, const WOLFSSL_BIGNUM *bn)
+{
+    char *buf;
+
+    WOLFSSL_ENTER("wolfSSL_BN_print_fp");
+
+    if (fp == XBADFILE || bn == NULL || bn->internal == NULL) {
+        WOLFSSL_MSG("bn NULL error");
+        return WOLFSSL_FAILURE;
+    }
+
+    buf = wolfSSL_BN_bn2hex(bn);
+    if (buf == NULL) {
+        WOLFSSL_MSG("wolfSSL_BN_bn2hex failure");
+        return WOLFSSL_FAILURE;
+    }
+
+    fprintf(fp, "%s", buf);
+    XFREE(buf, NULL, DYNAMIC_TYPE_OPENSSL);
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_FILESYSTEM */
+
+
+WOLFSSL_BIGNUM *wolfSSL_BN_CTX_get(WOLFSSL_BN_CTX *ctx)
+{
+    /* ctx is not used, return new Bignum */
+    (void)ctx;
+
+    WOLFSSL_ENTER("wolfSSL_BN_CTX_get");
+
+    return wolfSSL_BN_new();
+}
+
+#ifndef NO_WOLFSSL_STUB
+void wolfSSL_BN_CTX_start(WOLFSSL_BN_CTX *ctx)
+{
+    (void)ctx;
+
+    WOLFSSL_ENTER("wolfSSL_BN_CTX_start");
+    WOLFSSL_STUB("BN_CTX_start");
+    WOLFSSL_MSG("wolfSSL_BN_CTX_start TBD");
+}
+#endif
+
+
+WOLFSSL_BIGNUM *wolfSSL_BN_mod_inverse(WOLFSSL_BIGNUM *r,
+                                       WOLFSSL_BIGNUM *a,
+                                       const WOLFSSL_BIGNUM *n,
+                                       WOLFSSL_BN_CTX *ctx)
+{
+    int dynamic = 0;
+
+    /* ctx is not used */
+    (void)ctx;
+
+    WOLFSSL_ENTER("wolfSSL_BN_mod_inverse");
+
+    /* check parameter */
+    if (r == NULL) {
+        r = wolfSSL_BN_new();
+        if (r == NULL){
+            WOLFSSL_MSG("WolfSSL_BN_new() failed");
+            return NULL;
+        }
+        dynamic = 1;
+    }
+
+    if (a == NULL) {
+        WOLFSSL_MSG("a NULL error");
+        if (dynamic == 1) {
+            wolfSSL_BN_free(r);
+        }
+        return NULL;
+    }
+
+    if (n == NULL) {
+        WOLFSSL_MSG("n NULL error");
+        if (dynamic == 1) {
+            wolfSSL_BN_free(r);
+        }
+        return NULL;
+    }
+
+    /* Compute inverse of a modulo n and return r */
+    if (mp_invmod((mp_int *)a->internal,(mp_int *)n->internal,
+                  (mp_int*)r->internal) == MP_VAL){
+        WOLFSSL_MSG("mp_invmod() error");
+        if (dynamic == 1) {
+            wolfSSL_BN_free(r);
+        }
+        return NULL;
+    }
+
+    return  r;
+}
+#endif  /* OPENSSL_EXTRA */
+#if (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) && !defined(NO_ASN)
+static int unprintable_char(char c)
+{
+    const unsigned char last_unprintable = 31;
+    const unsigned char LF = 10;
+    const unsigned char CR = 13;
+
+    if (c <= last_unprintable && c != LF && c != CR) {
+        return 1;
+    }
+    return 0;
+}
+
+int wolfSSL_ASN1_STRING_print(WOLFSSL_BIO *out, WOLFSSL_ASN1_STRING *str)
+{
+    int i;
+
+    WOLFSSL_ENTER("wolfSSL_ASN1_STRING_print");
+    if (out == NULL || str == NULL)
+           return WOLFSSL_FAILURE;
+
+    for (i=0; i < str->length; i++) {
+        if (unprintable_char(str->data[i])) {
+            str->data[i] = '.';
+        }
+    }
+
+    if (wolfSSL_BIO_write(out, str->data, str->length) != str->length){
+        return WOLFSSL_FAILURE;
+    }
+
+    return str->length;
+}
+#endif /* (WOLFSSL_QT || OPENSSL_ALL) && !NO_ASN */
+
+#if defined(OPENSSL_EXTRA)
 int wolfSSL_X509_check_ca(WOLFSSL_X509 *x509)
 {
     WOLFSSL_ENTER("X509_check_ca");
@@ -33399,7 +45978,7 @@
 
 static int check_esc_char(char c, char *esc)
 {
-    char *ptr = NULL;
+    char *ptr;
 
     ptr = esc;
     while(*ptr != 0){
@@ -33494,7 +46073,7 @@
 
     if (flags & ASN1_STRFLGS_ESC_2253){
         char esc_ch[] = "+;<>\\";
-        char* esc_ptr = NULL;
+        char* esc_ptr;
 
         esc_ptr = str->data;
         while (*esc_ptr != 0){
@@ -33528,55 +46107,170 @@
     return WOLFSSL_FAILURE;
 }
 
+#if !defined(NO_ASN_TIME) && !defined(USER_TIME) && !defined(TIME_OVERRIDES)
+WOLFSSL_ASN1_TIME* wolfSSL_ASN1_TIME_new(void)
+{
+    WOLFSSL_ASN1_TIME* ret = (WOLFSSL_ASN1_TIME*)
+            XMALLOC(sizeof(WOLFSSL_ASN1_TIME), NULL, DYNAMIC_TYPE_OPENSSL);
+    if (!ret)
+        return NULL;
+    XMEMSET(ret, 0, sizeof(WOLFSSL_ASN1_TIME));
+    return ret;
+}
+
+void wolfSSL_ASN1_TIME_free(WOLFSSL_ASN1_TIME* t)
+{
+    if (t) {
+        XFREE(t, NULL, DYNAMIC_TYPE_OPENSSL);
+    }
+}
+
+WOLFSSL_ASN1_TIME* wolfSSL_ASN1_TIME_adj(WOLFSSL_ASN1_TIME *s, time_t t,
+                                    int offset_day, long offset_sec)
+{
+    const time_t sec_per_day = 24*60*60;
+    struct tm* ts = NULL;
+    struct tm* tmpTime;
+    time_t t_adj = 0;
+    time_t offset_day_sec = 0;
+#if defined(NEED_TMP_TIME)
+    struct tm tmpTimeStorage;
+
+    tmpTime = &tmpTimeStorage;
+#else
+    tmpTime = NULL;
+#endif
+    (void)tmpTime;
+
+    WOLFSSL_ENTER("wolfSSL_ASN1_TIME_adj");
+
+    if (s == NULL){
+        s = wolfSSL_ASN1_TIME_new();
+        if (s == NULL){
+            return NULL;
+        }
+    }
+
+    /* compute GMT time with offset */
+    offset_day_sec = offset_day * sec_per_day;
+    t_adj          = t + offset_day_sec + offset_sec;
+    ts             = (struct tm *)XGMTIME(&t_adj, tmpTime);
+    if (ts == NULL){
+        WOLFSSL_MSG("failed to get time data.");
+        XFREE(s, NULL, DYNAMIC_TYPE_OPENSSL);
+        return NULL;
+    }
+
+    /* create ASN1 time notation */
+    /* UTC Time */
+    if (ts->tm_year >= 50 && ts->tm_year < 150){
+        char utc_str[ASN_UTC_TIME_SIZE];
+        int utc_year = 0,utc_mon,utc_day,utc_hour,utc_min,utc_sec;
+
+        s->type = V_ASN1_UTCTIME;
+        s->length = ASN_UTC_TIME_SIZE;
+
+        if (ts->tm_year >= 50 && ts->tm_year < 100){
+            utc_year = ts->tm_year;
+        } else if (ts->tm_year >= 100 && ts->tm_year < 150){
+            utc_year = ts->tm_year - 100;
+        }
+        utc_mon  = ts->tm_mon + 1;
+        utc_day  = ts->tm_mday;
+        utc_hour = ts->tm_hour;
+        utc_min  = ts->tm_min;
+        utc_sec  = ts->tm_sec;
+        XSNPRINTF((char *)utc_str, sizeof(utc_str),
+                  "%02d%02d%02d%02d%02d%02dZ",
+                  utc_year, utc_mon, utc_day, utc_hour, utc_min, utc_sec);
+        XMEMCPY(s->data, (byte *)utc_str, s->length);
+    /* GeneralizedTime */
+    } else {
+        char gt_str[ASN_GENERALIZED_TIME_MAX];
+        int gt_year,gt_mon,gt_day,gt_hour,gt_min,gt_sec;
+
+        s->type = V_ASN1_GENERALIZEDTIME;
+        s->length = ASN_GENERALIZED_TIME_SIZE;
+
+        gt_year = ts->tm_year + 1900;
+        gt_mon  = ts->tm_mon + 1;
+        gt_day  = ts->tm_mday;
+        gt_hour = ts->tm_hour;
+        gt_min  = ts->tm_min;
+        gt_sec  = ts->tm_sec;
+        XSNPRINTF((char *)gt_str, sizeof(gt_str),
+                  "%4d%02d%02d%02d%02d%02dZ",
+                  gt_year, gt_mon, gt_day, gt_hour, gt_min,gt_sec);
+        XMEMCPY(s->data, (byte *)gt_str, s->length);
+    }
+
+    return s;
+}
+#endif /* !NO_ASN_TIME && !USER_TIME && !TIME_OVERRIDES */
+
 #ifndef NO_ASN_TIME
-WOLFSSL_ASN1_TIME *wolfSSL_ASN1_TIME_to_generalizedtime(WOLFSSL_ASN1_TIME *t,
+/* not a compatibility function - length getter for opaque type */
+int wolfSSL_ASN1_TIME_get_length(WOLFSSL_ASN1_TIME *t)
+{
+    WOLFSSL_ENTER("wolfSSL_ASN1_TIME_get_length");
+    if (t == NULL)
+        return WOLFSSL_FAILURE;
+    return t->length;
+}
+/* not a compatibility function - data getter for opaque type */
+unsigned char* wolfSSL_ASN1_TIME_get_data(WOLFSSL_ASN1_TIME *t)
+{
+    WOLFSSL_ENTER("wolfSSL_ASN1_TIME_get_data");
+    if (t == NULL)
+        return NULL;
+    return t->data;
+}
+
+WOLFSSL_ASN1_TIME* wolfSSL_ASN1_TIME_to_generalizedtime(WOLFSSL_ASN1_TIME *t,
                                                         WOLFSSL_ASN1_TIME **out)
 {
-    unsigned char time_type;
+    int time_type = 0;
     WOLFSSL_ASN1_TIME *ret = NULL;
-    unsigned char *data_ptr = NULL;
 
     WOLFSSL_ENTER("wolfSSL_ASN1_TIME_to_generalizedtime");
-    if (t == NULL)
-        return NULL;
-
-    time_type = t->data[0];
-    if (time_type != ASN_UTC_TIME && time_type != ASN_GENERALIZED_TIME){
-        WOLFSSL_MSG("Invalid ASN_TIME type.");
-        return NULL;
-    }
-    if (out == NULL || *out == NULL){
-        ret = (WOLFSSL_ASN1_TIME*)XMALLOC(sizeof(WOLFSSL_ASN1_TIME), NULL,
-                                        DYNAMIC_TYPE_TMP_BUFFER);
-        if (ret == NULL){
-            WOLFSSL_MSG("memory alloc failed.");
-            return NULL;
-        }
-        XMEMSET(ret, 0, sizeof(WOLFSSL_ASN1_TIME));
-    } else
-        ret = *out;
-
-    if (time_type == ASN_GENERALIZED_TIME){
-        XMEMCPY(ret->data, t->data, ASN_GENERALIZED_TIME_SIZE);
-        return ret;
-    } else if (time_type == ASN_UTC_TIME){
-        ret->data[0] = ASN_GENERALIZED_TIME;
-        ret->data[1] = ASN_GENERALIZED_TIME_SIZE;
-        data_ptr  = ret->data + 2;
-        if (t->data[2] >= '5')
-            XSNPRINTF((char*)data_ptr, ASN_UTC_TIME_SIZE + 2, "19%s", t->data + 2);
-        else
-            XSNPRINTF((char*)data_ptr, ASN_UTC_TIME_SIZE + 2, "20%s", t->data + 2);
-
-        return ret;
-    }
-
-    WOLFSSL_MSG("Invalid ASN_TIME value");
-    return NULL;
+    if (t == NULL) {
+        WOLFSSL_MSG("Invalid ASN_TIME value");
+    } else {
+        time_type = t->type;
+        if (time_type != ASN_UTC_TIME && time_type != ASN_GENERALIZED_TIME){
+            WOLFSSL_MSG("Invalid ASN_TIME type.");
+        } else {
+            if (out == NULL || *out == NULL) {
+                ret = wolfSSL_ASN1_TIME_new();
+                if (ret == NULL){
+                    WOLFSSL_MSG("memory alloc failed.");
+                }
+            } else {
+                ret = *out;
+            }
+        }
+    }
+
+    if (ret != NULL) {
+        if (time_type == ASN_GENERALIZED_TIME){
+            XMEMCPY(ret->data, t->data, ASN_GENERALIZED_TIME_SIZE);
+        } else { /* ASN_UTC_TIME */
+            /* convert UTC to generalized time */
+            ret->type = ASN_GENERALIZED_TIME;
+            ret->length = ASN_GENERALIZED_TIME_SIZE;
+            if (t->data[0] >= '5') {
+                ret->data[0] = '1'; ret->data[1] = '9';
+            } else {
+                ret->data[0] = '2'; ret->data[1] = '0';
+            }
+            XMEMCPY(&ret->data[2], t->data, ASN_UTC_TIME_SIZE);
+        }
+    }
+
+    return ret;
 }
 #endif /* !NO_ASN_TIME */
 
-
 #ifndef NO_ASN
 int wolfSSL_i2c_ASN1_INTEGER(WOLFSSL_ASN1_INTEGER *a, unsigned char **pp)
 {
@@ -33661,5 +46355,1354 @@
 }
 #endif /* !NO_ASN */
 
-#endif  /* OPENSSLEXTRA */
+
+#ifndef NO_CERTS
+int wolfSSL_X509_CA_num(WOLFSSL_X509_STORE* store)
+{
+    int i = 0;
+    int cnt_ret = 0;
+    Signer **table;
+
+    WOLFSSL_ENTER("wolfSSL_X509_CA_num");
+    if (store == NULL || store->cm == NULL){
+        WOLFSSL_MSG("invalid parameter");
+        return WOLFSSL_FAILURE;
+    }
+
+    table = store->cm->caTable;
+    if (table){
+        if (wc_LockMutex(&store->cm->caLock) == 0){
+            for (i = 0; i < CA_TABLE_SIZE; i++) {
+                Signer* signer = table[i];
+                while (signer) {
+                    Signer* next = signer->next;
+                    cnt_ret++;
+                    signer = next;
+                }
+            }
+            wc_UnLockMutex(&store->cm->caLock);
+        }
+    }
+
+    return cnt_ret;
+}
+#endif /* !NO_CERTS */
+
+long wolfSSL_X509_get_version(const WOLFSSL_X509 *x509)
+{
+    int version = 0;
+
+    WOLFSSL_ENTER("wolfSSL_X509_get_version");
+
+    if (x509 == NULL){
+        WOLFSSL_MSG("invalid parameter");
+        return 0L;
+    }
+    version = x509->version;
+    if (version != 0)
+        return (long)version - 1L;
+
+    return 0L;
+}
+
+
+int wolfSSL_X509_get_signature_nid(const WOLFSSL_X509 *x)
+{
+    if (x == NULL)
+        return 0;
+
+    return oid2nid(x->sigOID, oidSigType);
+}
+#endif  /* OPENSSL_EXTRA */
+
+#if defined(OPENSSL_EXTRA) && !defined(NO_RSA)
+/* return compliant with OpenSSL
+ *   RSA modulus size in bytes, -1 if error
+ */
+int wolfSSL_RSA_size(const WOLFSSL_RSA* rsa)
+{
+    WOLFSSL_ENTER("wolfSSL_RSA_size");
+
+    if (rsa == NULL)
+        return WOLFSSL_FATAL_ERROR;
+    if (rsa->inSet == 0)
+    {
+        if (SetRsaInternal((WOLFSSL_RSA*)rsa) != SSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal failed");
+            return 0;
+        }
+    }
+    return wc_RsaEncryptSize((RsaKey*)rsa->internal);
+}
+#endif
+
+#if !defined(HAVE_USER_RSA) && !defined(HAVE_FAST_RSA) && \
+    !defined(NO_RSA) && (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+/* WolfSSL -> OpenSSL */
+int SetRsaExternal(WOLFSSL_RSA* rsa)
+{
+    RsaKey* key;
+    WOLFSSL_MSG("Entering SetRsaExternal");
+
+    if (rsa == NULL || rsa->internal == NULL) {
+        WOLFSSL_MSG("rsa key NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    key = (RsaKey*)rsa->internal;
+
+    if (SetIndividualExternal(&rsa->n, &key->n) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa n key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (SetIndividualExternal(&rsa->e, &key->e) != WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("rsa e key error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (key->type == RSA_PRIVATE) {
+        if (SetIndividualExternal(&rsa->d, &key->d) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa d key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        if (SetIndividualExternal(&rsa->p, &key->p) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa p key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        if (SetIndividualExternal(&rsa->q, &key->q) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa q key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+    #ifndef RSA_LOW_MEM
+        if (SetIndividualExternal(&rsa->dmp1, &key->dP) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa dP key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        if (SetIndividualExternal(&rsa->dmq1, &key->dQ) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa dQ key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+
+        if (SetIndividualExternal(&rsa->iqmp, &key->u) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("rsa u key error");
+            return WOLFSSL_FATAL_ERROR;
+        }
+    #endif /* !RSA_LOW_MEM */
+    }
+    rsa->exSet = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+/* when calling SetIndividualExternal, mpi should be cleared by caller if no
+ * longer used. ie mp_free(mpi). This is to free data when fastmath is
+ * disabled since a copy of mpi is made by this function and placed into bn.
+ */
+int SetIndividualExternal(WOLFSSL_BIGNUM** bn, mp_int* mpi)
+{
+    byte dynamic = 0;
+
+    WOLFSSL_MSG("Entering SetIndividualExternal");
+
+    if (mpi == NULL || bn == NULL) {
+        WOLFSSL_MSG("mpi NULL error");
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    if (*bn == NULL) {
+        *bn = wolfSSL_BN_new();
+        if (*bn == NULL) {
+            WOLFSSL_MSG("SetIndividualExternal alloc failed");
+            return WOLFSSL_FATAL_ERROR;
+        }
+        dynamic = 1;
+    }
+
+    if (mp_copy(mpi, (mp_int*)((*bn)->internal)) != MP_OKAY) {
+        WOLFSSL_MSG("mp_copy error");
+        if (dynamic == 1) {
+            wolfSSL_BN_free(*bn);
+        }
+        return WOLFSSL_FATAL_ERROR;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+static void InitwolfSSL_BigNum(WOLFSSL_BIGNUM* bn)
+{
+    if (bn) {
+        XMEMSET(bn, 0, sizeof(WOLFSSL_BIGNUM));
+        bn->neg      = 0;
+        bn->internal = NULL;
+    }
+}
+
+
+WOLFSSL_BIGNUM* wolfSSL_BN_new(void)
+{
+    WOLFSSL_BIGNUM* external;
+    mp_int*        mpi;
+
+    WOLFSSL_MSG("wolfSSL_BN_new");
+
+#if !defined(USE_FAST_MATH) || defined(HAVE_WOLF_BIGINT)
+    mpi = (mp_int*) XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_BIGINT);
+    if (mpi == NULL) {
+        WOLFSSL_MSG("wolfSSL_BN_new malloc mpi failure");
+        return NULL;
+    }
+#endif
+
+    external = (WOLFSSL_BIGNUM*) XMALLOC(sizeof(WOLFSSL_BIGNUM), NULL,
+                                        DYNAMIC_TYPE_BIGINT);
+    if (external == NULL) {
+        WOLFSSL_MSG("wolfSSL_BN_new malloc WOLFSSL_BIGNUM failure");
+#if !defined(USE_FAST_MATH) || defined(HAVE_WOLF_BIGINT)
+        XFREE(mpi, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+        return NULL;
+    }
+
+#if defined(USE_FAST_MATH) && !defined(HAVE_WOLF_BIGINT)
+    mpi = &external->fp;
+#endif
+
+    InitwolfSSL_BigNum(external);
+    if (mp_init(mpi) != MP_OKAY) {
+        wolfSSL_BN_free(external);
+        return NULL;
+    }
+    external->internal = mpi;
+
+    return external;
+}
+
+
+#if defined(USE_FAST_MATH) && !defined(HAVE_WOLF_BIGINT)
+/* This function works without BN_free only with TFM */
+void wolfSSL_BN_init(WOLFSSL_BIGNUM* bn)
+{
+    if(bn == NULL)return;
+    WOLFSSL_MSG("wolfSSL_BN_init");
+    InitwolfSSL_BigNum(bn);
+    if (mp_init(&bn->fp) != MP_OKAY)
+        return;
+    bn->internal = (void *)&bn->fp;
+}
+#endif
+
+void wolfSSL_BN_free(WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_free");
+    if (bn) {
+        if (bn->internal) {
+            mp_int* bni = (mp_int*)bn->internal;
+            mp_free(bni);
+#if !defined(USE_FAST_MATH) || defined(HAVE_WOLF_BIGINT)
+            XFREE(bn->internal, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+            bn->internal = NULL;
+        }
+        XFREE(bn, NULL, DYNAMIC_TYPE_BIGINT);
+        /* bn = NULL, don't try to access or double free it */
+    }
+}
+
+void wolfSSL_BN_clear_free(WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_clear_free");
+    if (bn) {
+        if (bn->internal) {
+            mp_int* bni = (mp_int*)bn->internal;
+            mp_forcezero(bni);
+        }
+        wolfSSL_BN_free(bn);
+    }
+}
+
+void wolfSSL_BN_clear(WOLFSSL_BIGNUM* bn)
+{
+    WOLFSSL_MSG("wolfSSL_BN_clear");
+    if (bn && bn->internal) {
+        mp_forcezero((mp_int*)bn->internal);
+    }
+}
+#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
+
+#if !defined(NO_RSA) && (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+static void InitwolfSSL_Rsa(WOLFSSL_RSA* rsa)
+{
+    if (rsa) {
+        XMEMSET(rsa, 0, sizeof(WOLFSSL_RSA));
+    }
+}
+
+
+void wolfSSL_RSA_free(WOLFSSL_RSA* rsa)
+{
+    WOLFSSL_ENTER("wolfSSL_RSA_free");
+
+    if (rsa) {
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+        int doFree = 0;
+        if (wc_LockMutex(&rsa->refMutex) != 0) {
+            WOLFSSL_MSG("Couldn't lock rsa mutex");
+        }
+
+        /* only free if all references to it are done */
+        rsa->refCount--;
+        if (rsa->refCount == 0) {
+            doFree = 1;
+        }
+        wc_UnLockMutex(&rsa->refMutex);
+
+        if (!doFree) {
+            return;
+        }
+
+        wc_FreeMutex(&rsa->refMutex);
+#endif
+
+        if (rsa->internal) {
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && \
+    !defined(HAVE_FAST_RSA) && defined(WC_RSA_BLINDING)
+            WC_RNG* rng;
+
+            /* check if RNG is owned before freeing it */
+            if (rsa->ownRng) {
+                rng = ((RsaKey*)rsa->internal)->rng;
+                if (rng != NULL && rng != &globalRNG) {
+                    wc_FreeRng(rng);
+                    XFREE(rng, NULL, DYNAMIC_TYPE_RNG);
+                }
+            }
+#endif /* WC_RSA_BLINDING */
+            wc_FreeRsaKey((RsaKey*)rsa->internal);
+            XFREE(rsa->internal, NULL, DYNAMIC_TYPE_RSA);
+            rsa->internal = NULL;
+        }
+        wolfSSL_BN_free(rsa->iqmp);
+        wolfSSL_BN_free(rsa->dmq1);
+        wolfSSL_BN_free(rsa->dmp1);
+        wolfSSL_BN_free(rsa->q);
+        wolfSSL_BN_free(rsa->p);
+        wolfSSL_BN_free(rsa->d);
+        wolfSSL_BN_free(rsa->e);
+        wolfSSL_BN_free(rsa->n);
+
+    #ifdef WC_RSA_BLINDING
+        if (rsa->rng && wc_FreeRng(rsa->rng) != 0) {
+            WOLFSSL_MSG("Issue freeing rng");
+        }
+        XFREE(rsa->rng, NULL, DYNAMIC_TYPE_RNG);
+    #endif
+
+#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
+        if (rsa->meth) {
+            wolfSSL_RSA_meth_free(rsa->meth);
+        }
+#endif
+
+        InitwolfSSL_Rsa(rsa);  /* set back to NULLs for safety */
+
+        XFREE(rsa, NULL, DYNAMIC_TYPE_RSA);
+
+        /* rsa = NULL, don't try to access or double free it */
+    }
+}
+
+WOLFSSL_RSA* wolfSSL_RSA_new(void)
+{
+    WOLFSSL_RSA* external;
+    RsaKey*     key;
+
+    WOLFSSL_ENTER("wolfSSL_RSA_new");
+
+    key = (RsaKey*) XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_RSA);
+    if (key == NULL) {
+        WOLFSSL_MSG("wolfSSL_RSA_new malloc RsaKey failure");
+        return NULL;
+    }
+
+    external = (WOLFSSL_RSA*) XMALLOC(sizeof(WOLFSSL_RSA), NULL,
+                                     DYNAMIC_TYPE_RSA);
+    if (external == NULL) {
+        WOLFSSL_MSG("wolfSSL_RSA_new malloc WOLFSSL_RSA failure");
+        XFREE(key, NULL, DYNAMIC_TYPE_RSA);
+        return NULL;
+    }
+
+    InitwolfSSL_Rsa(external);
+    if (wc_InitRsaKey(key, NULL) != 0) {
+        WOLFSSL_MSG("InitRsaKey WOLFSSL_RSA failure");
+        XFREE(external, NULL, DYNAMIC_TYPE_RSA);
+        XFREE(key, NULL, DYNAMIC_TYPE_RSA);
+        return NULL;
+    }
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && \
+    !defined(HAVE_FAST_RSA) && defined(WC_RSA_BLINDING)
+    {
+        WC_RNG* rng;
+
+        rng = (WC_RNG*) XMALLOC(sizeof(WC_RNG), NULL, DYNAMIC_TYPE_RNG);
+        if (rng != NULL && wc_InitRng(rng) != 0) {
+            WOLFSSL_MSG("InitRng failure, attempting to use global RNG");
+            XFREE(rng, NULL, DYNAMIC_TYPE_RNG);
+            rng = NULL;
+        }
+
+        external->ownRng = 1;
+        if (rng == NULL && initGlobalRNG) {
+            external->ownRng = 0;
+            rng = &globalRNG;
+        }
+
+        if (rng == NULL) {
+            WOLFSSL_MSG("wolfSSL_RSA_new no WC_RNG for blinding");
+            XFREE(external, NULL, DYNAMIC_TYPE_RSA);
+            XFREE(key, NULL, DYNAMIC_TYPE_RSA);
+            return NULL;
+        }
+
+        wc_RsaSetRNG(key, rng);
+    }
+#else
+    XMEMSET(key, 0, sizeof(RsaKey));
+#endif /* WC_RSA_BLINDING */
+
+    external->internal = key;
+    external->inSet = 0;
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+    external->refCount = 1;
+    wc_InitMutex(&external->refMutex);
+#endif
+    return external;
+}
+#endif /* !NO_RSA && (OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL) */
+
+#if defined(OPENSSL_ALL) && defined(HAVE_PKCS7)
+PKCS7* wolfSSL_PKCS7_new(void)
+{
+    WOLFSSL_PKCS7* pkcs7;
+    int ret = 0;
+
+    pkcs7 = (WOLFSSL_PKCS7*)XMALLOC(sizeof(*pkcs7), NULL, DYNAMIC_TYPE_PKCS7);
+    if (pkcs7 != NULL) {
+        XMEMSET(pkcs7, 0, sizeof(*pkcs7));
+        ret = wc_PKCS7_Init(&pkcs7->pkcs7, NULL, INVALID_DEVID);
+    }
+
+    if (ret != 0 && pkcs7 != NULL)
+        XFREE(pkcs7, NULL, DYNAMIC_TYPE_PKCS7);
+
+    return (PKCS7*)pkcs7;
+}
+/******************************************************************************
+* wolfSSL_PKCS7_SIGNED_new - allocates PKCS7 and initialize it for a signed data
+*
+* RETURNS:
+* returns pointer to the PKCS7 structure on success, otherwise returns NULL
+*/
+PKCS7_SIGNED* wolfSSL_PKCS7_SIGNED_new(void)
+{
+    byte signedData[]= { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x02};
+    PKCS7* pkcs7 = NULL;
+
+    if ((pkcs7 = wolfSSL_PKCS7_new()) == NULL)
+        return NULL;
+    pkcs7->contentOID = SIGNED_DATA;
+    if ((wc_PKCS7_SetContentType(pkcs7, signedData, sizeof(signedData))) < 0) {
+        if (pkcs7) {
+            wolfSSL_PKCS7_free(pkcs7);
+            return NULL;
+        }
+    }
+    return pkcs7;
+}
+
+void wolfSSL_PKCS7_free(PKCS7* pkcs7)
+{
+    WOLFSSL_PKCS7* p7 = (WOLFSSL_PKCS7*)pkcs7;
+
+    if (p7 != NULL) {
+        if (p7->data != NULL)
+            XFREE(p7->data, NULL, DYNAMIC_TYPE_PKCS7);
+        wc_PKCS7_Free(&p7->pkcs7);
+        XFREE(p7, NULL, DYNAMIC_TYPE_PKCS7);
+    }
+}
+void wolfSSL_PKCS7_SIGNED_free(PKCS7_SIGNED* p7)
+{
+    wolfSSL_PKCS7_free(p7);
+    return;
+}
+PKCS7* wolfSSL_d2i_PKCS7(PKCS7** p7, const unsigned char** in, int len)
+{
+    WOLFSSL_PKCS7* pkcs7 = NULL;
+    word32 idx = 0;
+
+    if (in == NULL)
+        return NULL;
+
+    if ((pkcs7 = (WOLFSSL_PKCS7*)wolfSSL_PKCS7_new()) == NULL)
+        return NULL;
+
+    if (GetSequence(*in, &idx, &pkcs7->len, len) < 0) {
+        wolfSSL_PKCS7_free((PKCS7*)pkcs7);
+        return NULL;
+    }
+    pkcs7->len += idx;
+
+    pkcs7->data = (byte*)XMALLOC(pkcs7->len, NULL, DYNAMIC_TYPE_PKCS7);
+    if (pkcs7->data == NULL) {
+        wolfSSL_PKCS7_free((PKCS7*)pkcs7);
+        return NULL;
+    }
+    XMEMCPY(pkcs7->data, *in, pkcs7->len);
+    *in += pkcs7->len;
+
+    if (p7 != NULL)
+        *p7 = (PKCS7*)pkcs7;
+    return (PKCS7*)pkcs7;
+}
+
+PKCS7* wolfSSL_d2i_PKCS7_bio(WOLFSSL_BIO* bio, PKCS7** p7)
+{
+    WOLFSSL_PKCS7* pkcs7;
+
+    if (bio == NULL)
+        return NULL;
+
+    if ((pkcs7 = (WOLFSSL_PKCS7*)wolfSSL_PKCS7_new()) == NULL)
+        return NULL;
+
+    pkcs7->len = wolfSSL_BIO_pending(bio);
+    pkcs7->data = (byte*)XMALLOC(pkcs7->len, NULL, DYNAMIC_TYPE_PKCS7);
+    if (pkcs7->data == NULL) {
+        wolfSSL_PKCS7_free((PKCS7*)pkcs7);
+        return NULL;
+    }
+
+    if (wolfSSL_BIO_read(bio, pkcs7->data, pkcs7->len) != pkcs7->len) {
+        wolfSSL_PKCS7_free((PKCS7*)pkcs7);
+        return NULL;
+    }
+
+    if (p7 != NULL)
+        *p7 = (PKCS7*)pkcs7;
+    return (PKCS7*)pkcs7;
+}
+
+int wolfSSL_PKCS7_verify(PKCS7* pkcs7, WOLFSSL_STACK* certs,
+                         WOLFSSL_X509_STORE* store, WOLFSSL_BIO* in,
+                         WOLFSSL_BIO* out, int flags)
+{
+    int ret = 0;
+    unsigned char* mem = NULL;
+    int memSz = 0;
+    WOLFSSL_PKCS7* p7 = (WOLFSSL_PKCS7*)pkcs7;
+
+    if (pkcs7 == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (in != NULL) {
+        if ((memSz = wolfSSL_BIO_get_mem_data(in, &mem)) < 0)
+            return WOLFSSL_FAILURE;
+
+        p7->pkcs7.content = mem;
+        p7->pkcs7.contentSz = memSz;
+    }
+
+    /* certs is the list of certificates to find the cert with issuer/serial. */
+    (void)certs;
+    /* store is the certificate store to use to verify signer certificate
+     * associated with the signers.
+     */
+    (void)store;
+
+    ret = wc_PKCS7_VerifySignedData_ex(&p7->pkcs7, NULL, 0, p7->data, p7->len,
+                                                                       NULL, 0);
+    if (ret != 0)
+        return WOLFSSL_FAILURE;
+
+    if ((flags & PKCS7_NOVERIFY) != PKCS7_NOVERIFY) {
+        /* All signer certificates are verified. */
+        return WOLFSSL_FAILURE;
+    }
+
+    if (out != NULL)
+       wolfSSL_BIO_write(out, p7->pkcs7.content, p7->pkcs7.contentSz);
+
+    return WOLFSSL_SUCCESS;
+}
+
+WOLFSSL_STACK* wolfSSL_PKCS7_get0_signers(PKCS7* pkcs7, WOLFSSL_STACK* certs,
+                                          int flags)
+{
+    WOLFSSL_STACK* signers = NULL;
+    WOLFSSL_PKCS7* p7 = (WOLFSSL_PKCS7*)pkcs7;
+
+    if (p7 == NULL)
+        return NULL;
+    /* Only PKCS#7 messages with a single cert that is the verifying certificate
+     * is supported.
+     */
+    if ((flags | PKCS7_NOINTERN) == PKCS7_NOINTERN)
+        return NULL;
+
+    signers = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                                             DYNAMIC_TYPE_X509);
+    if (signers == NULL)
+        return NULL;
+
+    signers->num = 1;
+    signers->data.x509 = (WOLFSSL_X509*)XMALLOC(sizeof(WOLFSSL_X509), NULL,
+                                                             DYNAMIC_TYPE_X509);
+    if (signers->data.x509 == NULL) {
+        XFREE(signers, NULL, DYNAMIC_TYPE_X509);
+        return NULL;
+    }
+
+    if (DecodeToX509(signers->data.x509, p7->pkcs7.singleCert,
+                                                 p7->pkcs7.singleCertSz) != 0) {
+        XFREE(signers->data.x509, NULL, DYNAMIC_TYPE_X509);
+        XFREE(signers, NULL, DYNAMIC_TYPE_X509);
+        return NULL;
+    }
+
+    (void)certs;
+
+    return signers;
+}
+
+/******************************************************************************
+* wolfSSL_PEM_write_bio_PKCS7 - writes the PKCS7 data to BIO
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE
+*/
+
+int wolfSSL_PEM_write_bio_PKCS7(WOLFSSL_BIO* bio, PKCS7* p7)
+{
+#ifdef WOLFSSL_SMALL_STACK
+    byte* outputHead;
+    byte* outputFoot;
+#else
+    byte outputHead[2048];
+    byte outputFoot[2048];
+#endif
+    word32 outputHeadSz = 2048;
+    word32 outputFootSz = 2048;
+    word32 outputSz = 0;
+    byte*  output = NULL;
+    byte*  pem = NULL;
+    int    pemSz = -1;
+    enum wc_HashType hashType;
+    byte hashBuf[WC_MAX_DIGEST_SIZE];
+    word32 hashSz = -1;
+
+    WOLFSSL_ENTER("wolfSSL_PEM_write_bio_PKCS7()");
+
+    if (bio == NULL || p7 == NULL)
+        return WOLFSSL_FAILURE;
+
+#ifdef WOLFSSL_SMALL_STACK
+    outputHead = (byte*)XMALLOC(outputHeadSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (outputHead == NULL)
+        return MEMORY_E;
+
+    outputFoot = (byte*)XMALLOC(outputFootSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (outputFoot == NULL)
+        goto error;
+
+#endif
+
+    XMEMSET(hashBuf, 0, WC_MAX_DIGEST_SIZE);
+    XMEMSET(outputHead, 0, outputHeadSz);
+    XMEMSET(outputFoot, 0, outputFootSz);
+
+    hashType = wc_OidGetHash(p7->hashOID);
+    hashSz = wc_HashGetDigestSize(hashType);
+    if (hashSz > WC_MAX_DIGEST_SIZE)
+        return WOLFSSL_FAILURE;
+
+    /* only SIGNED_DATA is supported */
+    switch (p7->contentOID) {
+        case SIGNED_DATA:
+            break;
+        default:
+            WOLFSSL_MSG("Unknown PKCS#7 Type");
+            return WOLFSSL_FAILURE;
+    };
+
+    if ((wc_PKCS7_EncodeSignedData_ex(p7, hashBuf, hashSz,
+        outputHead, &outputHeadSz, outputFoot, &outputFootSz)) != 0)
+        return WOLFSSL_FAILURE;
+
+    outputSz = outputHeadSz + p7->contentSz + outputFootSz;
+    output = (byte*)XMALLOC(outputSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    if (!output)
+         return WOLFSSL_FAILURE;
+
+    XMEMSET(output, 0, outputSz);
+    outputSz = 0;
+    XMEMCPY(&output[outputSz], outputHead, outputHeadSz);
+    outputSz += outputHeadSz;
+    XMEMCPY(&output[outputSz], p7->content, p7->contentSz);
+    outputSz += p7->contentSz;
+    XMEMCPY(&output[outputSz], outputFoot, outputFootSz);
+    outputSz += outputFootSz;
+
+    /* get PEM size */
+    pemSz = wc_DerToPemEx(output, outputSz, NULL, 0, NULL, CERT_TYPE);
+    if (pemSz < 0)
+        goto error;
+
+    pemSz++; /* for '\0'*/
+
+    /* create PEM buffer and convert from DER to PEM*/
+    if ((pem = (byte*)XMALLOC(pemSz, bio->heap, DYNAMIC_TYPE_TMP_BUFFER)) == NULL)
+        goto error;
+
+    XMEMSET(pem, 0, pemSz);
+
+    if (wc_DerToPemEx(output, outputSz, pem, pemSz, NULL, CERT_TYPE) < 0) {
+        goto error;
+    }
+    if ((wolfSSL_BIO_write(bio, pem, pemSz) == pemSz)) {
+        XFREE(output, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(pem, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(outputHead, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(outputFoot, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return WOLFSSL_SUCCESS;
+    }
+
+error:
+#ifdef WOLFSSL_SMALL_STACK
+    if (outputHead) {
+        XFREE(outputHead, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+    if (outputFoot) {
+        XFREE(outputFoot, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+    if (output) {
+        XFREE(output, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+    if (pem) {
+        XFREE(pem, bio->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+    return WOLFSSL_FAILURE;
+}
+#endif /* OPENSSL_ALL && HAVE_PKCS7 */
+
+#if defined(OPENSSL_EXTRA)
+WOLFSSL_STACK* wolfSSL_sk_X509_new(void)
+{
+    WOLFSSL_STACK* s = (WOLFSSL_STACK*)XMALLOC(sizeof(WOLFSSL_STACK), NULL,
+                                                             DYNAMIC_TYPE_X509);
+    if (s != NULL) {
+        XMEMSET(s, 0, sizeof(*s));
+        s->type = STACK_TYPE_X509;
+    }
+
+    return s;
+}
+#endif
+
+#ifdef OPENSSL_ALL
+int wolfSSL_PEM_write_bio_PKCS8PrivateKey(WOLFSSL_BIO* bio,
+                                          WOLFSSL_EVP_PKEY* pkey,
+                                          const WOLFSSL_EVP_CIPHER* enc,
+                                          char* passwd, int passwdSz,
+                                          pem_password_cb* cb, void* ctx)
+{
+    int ret = 0;
+    char password[NAME_SZ];
+    byte* key = NULL;
+    word32 keySz;
+    byte* pem = NULL;
+    int pemSz;
+    int type = PKCS8_PRIVATEKEY_TYPE;
+    int algId;
+    const byte* curveOid;
+    word32 oidSz;
+    int encAlgId;
+
+    if (bio == NULL || pkey == NULL)
+        return -1;
+
+    keySz = pkey->pkey_sz + 128;
+    key = (byte*)XMALLOC(keySz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (key == NULL)
+        ret = MEMORY_E;
+
+    if (ret == 0 && enc != NULL && passwd == NULL) {
+        passwdSz = cb(password, sizeof(password), 1, ctx);
+        if (passwdSz < 0)
+            ret = WOLFSSL_FAILURE;
+        passwd = password;
+    }
+
+    if (ret == 0 && enc != NULL) {
+        WC_RNG rng;
+        ret = wc_InitRng(&rng);
+        if (ret == 0) {
+        #ifndef NO_DES3
+            if (enc == EVP_DES_CBC)
+                encAlgId = DESb;
+            else if (enc == EVP_DES_EDE3_CBC)
+                encAlgId = DES3b;
+            else
+        #endif
+        #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+            #ifdef WOLFSSL_AES_256
+            if (enc == EVP_AES_256_CBC)
+                encAlgId = AES256CBCb;
+            else
+            #endif
+        #endif
+                ret = -1;
+            if (ret == 0) {
+                ret = TraditionalEnc((byte*)pkey->pkey.ptr, pkey->pkey_sz, key,
+                                       &keySz, passwd, passwdSz, PKCS5, PBES2,
+                                       encAlgId, NULL, 0, WC_PKCS12_ITT_DEFAULT,
+                                       &rng, NULL);
+                if (ret > 0) {
+                    keySz = ret;
+                    ret = 0;
+                }
+            }
+            wc_FreeRng(&rng);
+        }
+        type = PKCS8_ENC_PRIVATEKEY_TYPE;
+    }
+    if (ret == 0 && enc == NULL) {
+        type = PKCS8_PRIVATEKEY_TYPE;
+        if (pkey->type == EVP_PKEY_EC) {
+            algId = ECDSAk;
+            ret = wc_ecc_get_oid(pkey->ecc->group->curve_oid, &curveOid,
+                                                                        &oidSz);
+        }
+        else {
+            algId = RSAk;
+            curveOid = NULL;
+            oidSz = 0;
+        }
+
+        if (ret >= 0) {
+            ret = wc_CreatePKCS8Key(key, &keySz, (byte*)pkey->pkey.ptr,
+                                         pkey->pkey_sz, algId, curveOid, oidSz);
+            keySz = ret;
+        }
+    }
+
+    if (password == passwd)
+        XMEMSET(password, 0, passwdSz);
+
+    if (ret >= 0) {
+        pemSz = 2 * keySz + 2 * 64;
+        pem = (byte*)XMALLOC(pemSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (pem == NULL)
+            ret = MEMORY_E;
+    }
+
+    if (ret >= 0)
+        ret = wc_DerToPemEx(key, keySz, pem, pemSz, NULL, type);
+
+    if (key != NULL)
+        XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+    if (ret >= 0) {
+        if (wolfSSL_BIO_write(bio, pem, ret) != ret)
+            ret = -1;
+    }
+
+    if (pem != NULL)
+        XFREE(pem, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret < 0 ? 0 : ret;
+
+}
+
+static int bio_get_data(WOLFSSL_BIO* bio, byte** data)
+{
+    int ret = 0;
+    byte* mem = NULL;
+#ifndef NO_FILESYSTEM
+    long memSz;
+    XFILE file;
+    long curr;
+#endif
+
+    if ((ret = wolfSSL_BIO_pending(bio)) > 0) {
+    }
+#ifndef NO_FILESYSTEM
+    else if (bio->type == WOLFSSL_BIO_FILE) {
+        if (wolfSSL_BIO_get_fp(bio, &file) != WOLFSSL_SUCCESS)
+            ret = BAD_FUNC_ARG;
+        if (ret == 0) {
+            curr = XFTELL(file);
+            if (curr < 0) {
+                ret = WOLFSSL_BAD_FILE;
+            }
+            if (XFSEEK(file, 0, XSEEK_END) != 0)
+                ret = WOLFSSL_BAD_FILE;
+        }
+        if (ret == 0) {
+            memSz = XFTELL(file);
+            if (memSz > MAX_WOLFSSL_FILE_SIZE || memSz < 0) {
+                ret = WOLFSSL_BAD_FILE;
+            }
+        }
+        if (ret == 0) {
+            memSz -= curr;
+            ret = (int)memSz;
+            if (XFSEEK(file, curr, SEEK_SET) != 0)
+                ret = WOLFSSL_BAD_FILE;
+        }
+    }
+#endif
+
+    if (ret > 0) {
+        mem = (byte*)XMALLOC(ret, bio->heap, DYNAMIC_TYPE_OPENSSL);
+        if (mem == NULL) {
+            WOLFSSL_MSG("Memory error");
+            ret = MEMORY_E;
+        }
+        if (ret >= 0) {
+            if ((ret = wolfSSL_BIO_read(bio, mem, ret)) <= 0) {
+                XFREE(mem, bio->heap, DYNAMIC_TYPE_OPENSSL);
+                ret = MEMORY_E;
+                mem = NULL;
+            }
+        }
+    }
+
+    *data = mem;
+
+    return ret;
+}
+
+#ifndef NO_WOLFSSL_STUB
+void wolfSSL_BIO_set_init(WOLFSSL_BIO* bio, int init)
+{
+    WOLFSSL_STUB("wolfSSL_BIO_set_init");
+    (void)bio;
+    (void)init;
+}
+
+void wolfSSL_BIO_set_shutdown(WOLFSSL_BIO* bio, int shut)
+{
+    WOLFSSL_STUB("wolfSSL_BIO_set_shutdown");
+    (void)bio;
+    (void)shut;
+
+}
+int wolfSSL_BIO_get_shutdown(WOLFSSL_BIO* bio)
+{
+    WOLFSSL_STUB("wolfSSL_BIO_get_shutdown");
+    (void)bio;
+    return 0;
+}
+#endif /* NO_WOLFSSL_STUB */
+
+void wolfSSL_BIO_clear_retry_flags(WOLFSSL_BIO* bio)
+{
+    WOLFSSL_ENTER("wolfSSL_BIO_clear_retry_flags");
+
+    if (bio)
+        bio->flags &= ~(WOLFSSL_BIO_FLAG_READ|WOLFSSL_BIO_FLAG_RETRY);
+}
+
+int wolfSSL_BIO_should_retry(WOLFSSL_BIO *bio)
+{
+    int ret = 0;
+    if (bio != NULL) {
+        ret = (int)(bio->flags & WOLFSSL_BIO_FLAG_RETRY);
+    }
+
+    return ret;
+}
+
+/* DER data is PKCS#8 encrypted. */
+WOLFSSL_EVP_PKEY* wolfSSL_d2i_PKCS8PrivateKey_bio(WOLFSSL_BIO* bio,
+                                                  WOLFSSL_EVP_PKEY** pkey,
+                                                  pem_password_cb* cb,
+                                                  void* ctx)
+{
+    int ret;
+    byte* der;
+    int len;
+    byte* p;
+    char password[NAME_SZ];
+    int passwordSz;
+    word32 algId;
+    WOLFSSL_EVP_PKEY* key;
+
+    if ((len = bio_get_data(bio, &der)) < 0)
+        return NULL;
+
+    if (cb != NULL) {
+        passwordSz = cb(password, sizeof(password), PEM_PASS_READ, ctx);
+        if (passwordSz < 0) {
+            XFREE(der, bio->heap, DYNAMIC_TYPE_OPENSSL);
+            return NULL;
+        }
+
+        ret = ToTraditionalEnc(der, len, password, passwordSz, &algId);
+        if (ret < 0) {
+            XFREE(der, bio->heap, DYNAMIC_TYPE_OPENSSL);
+            return NULL;
+        }
+
+        XMEMSET(password, 0, passwordSz);
+    }
+
+    p = der;
+    key = wolfSSL_d2i_PrivateKey_EVP(pkey, &p, len);
+    XFREE(der, bio->heap, DYNAMIC_TYPE_OPENSSL);
+    return key;
+}
+
+/* Detect which type of key it is before decoding. */
+WOLFSSL_EVP_PKEY* wolfSSL_d2i_AutoPrivateKey(WOLFSSL_EVP_PKEY** pkey,
+                                             const unsigned char** pp,
+                                             long length)
+{
+    int ret;
+    WOLFSSL_EVP_PKEY* key = NULL;
+    const byte* der = *pp;
+    word32 idx = 0;
+    int len = 0;
+    word32 end = 0;
+    int cnt = 0;
+    int type;
+    word32 algId;
+    word32 keyLen = (word32)length;
+
+    /* Take off PKCS#8 wrapper if found. */
+    if ((len = ToTraditionalInline_ex(der, &idx, keyLen, &algId)) >= 0) {
+        der += idx;
+        keyLen = len;
+    }
+    idx = 0;
+    len = 0;
+
+    /* Use the number of elements in the outer sequence to determine key type.
+     */
+    ret = GetSequence(der, &idx, &len, keyLen);
+    if (ret >= 0) {
+        end = idx + len;
+        while (ret >= 0 && idx < end) {
+            /* Skip type */
+            idx++;
+            /* Get length and skip over - keeping count */
+            len = 0;
+            ret = GetLength(der, &idx, &len, keyLen);
+            if (ret >= 0) {
+                if (idx + len > end)
+                    ret = ASN_PARSE_E;
+                else {
+                    idx += len;
+                    cnt++;
+                }
+            }
+        }
+    }
+
+    if (ret >= 0) {
+        /* ECC includes version, private[, curve][, public key] */
+        if (cnt >= 2 && cnt <= 4)
+            type = EVP_PKEY_EC;
+        else
+            type = EVP_PKEY_RSA;
+
+        key = wolfSSL_d2i_PrivateKey(type, pkey, &der, keyLen);
+        *pp = der;
+    }
+
+    return key;
+}
+#endif
+
+#if defined(OPENSSL_EXTRA) && !defined(NO_CERTS) && \
+    defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_CERT_REQ)
+int wolfSSL_X509_set_subject_name(WOLFSSL_X509 *cert, WOLFSSL_X509_NAME *name)
+{
+    int i;
+    WOLFSSL_X509_NAME_ENTRY* ne;
+
+    WOLFSSL_ENTER("X509_set_subject_name");
+    if (cert == NULL || name == NULL)
+        return WOLFSSL_FAILURE;
+
+    FreeX509Name(&cert->subject, cert->heap);
+    InitX509Name(&cert->subject, 0);
+    if (name->dynamicName) {
+        cert->subject.name = (char*)XMALLOC(name->sz, cert->heap,
+                                                       DYNAMIC_TYPE_SUBJECT_CN);
+        if (cert->subject.name == NULL)
+            return WOLFSSL_FAILURE;
+    }
+    XMEMCPY(cert->subject.name, name->name, name->sz);
+    cert->subject.sz = name->sz;
+
+    for (i = 0; i < 10; i++) {
+        ne = wolfSSL_X509_NAME_get_entry(name, i);
+        if (ne != NULL)
+            wolfSSL_X509_NAME_add_entry(&cert->subject, ne, i, 1);
+    }
+    cert->subject.x509 = cert;
+    cert->subject.name = cert->subject.fullName.fullName;
+    cert->subject.sz = cert->subject.fullName.fullNameLen;
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_X509_set_issuer_name(WOLFSSL_X509 *cert, WOLFSSL_X509_NAME *name)
+{
+    int i;
+    WOLFSSL_X509_NAME_ENTRY* ne;
+
+    WOLFSSL_ENTER("X509_set_issuer_name");
+    if (cert == NULL || name == NULL)
+        return WOLFSSL_FAILURE;
+
+    FreeX509Name(&cert->issuer, cert->heap);
+    InitX509Name(&cert->issuer, 0);
+    if (name->dynamicName) {
+        cert->issuer.name = (char*)XMALLOC(name->sz, cert->heap,
+                                                       DYNAMIC_TYPE_SUBJECT_CN);
+        if (cert->issuer.name == NULL)
+            return WOLFSSL_FAILURE;
+    }
+    XMEMCPY(cert->issuer.name, name->name, name->sz);
+    cert->issuer.sz = name->sz;
+
+    for (i = 0; i < 10; i++) {
+        ne = wolfSSL_X509_NAME_get_entry(name, i);
+        if (ne != NULL)
+            wolfSSL_X509_NAME_add_entry(&cert->issuer, ne, i, 1);
+    }
+    cert->issuer.x509 = cert;
+    cert->issuer.name = cert->issuer.fullName.fullName;
+    cert->issuer.sz = cert->issuer.fullName.fullNameLen;
+    cert->issuerSet = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_X509_set_notAfter(WOLFSSL_X509* x509, const WOLFSSL_ASN1_TIME* t)
+{
+    if (x509 == NULL || t == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    XMEMCPY(&x509->notAfter, t, sizeof(WOLFSSL_ASN1_TIME));
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_X509_set_notBefore(WOLFSSL_X509* x509, const WOLFSSL_ASN1_TIME* t)
+{
+    if (x509 == NULL || t == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+    XMEMCPY(&x509->notBefore, t, sizeof(WOLFSSL_ASN1_TIME));
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_X509_set_serialNumber(WOLFSSL_X509* x509, WOLFSSL_ASN1_INTEGER* s)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_set_serialNumber");
+    if (!x509 || !s || s->dataMax >= EXTERNAL_SERIAL_SIZE)
+        return WOLFSSL_FAILURE;
+
+    if (s->isDynamic)
+        XSTRNCPY((char*)x509->serial,(char*)s->data,s->dataMax);
+    else
+        XSTRNCPY((char*)x509->serial,(char*)s->intData,s->dataMax);
+
+    x509->serial[s->dataMax] = 0;
+    x509->serialSz = s->dataMax;
+
+    return WOLFSSL_SUCCESS;
+}
+
+
+int wolfSSL_X509_set_pubkey(WOLFSSL_X509 *cert, WOLFSSL_EVP_PKEY *pkey)
+{
+    byte* p;
+    WOLFSSL_ENTER("wolfSSL_X509_set_pubkey");
+
+    if (cert == NULL || pkey == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (pkey->type == EVP_PKEY_RSA)
+        cert->pubKeyOID = RSAk;
+    else if (pkey->type == EVP_PKEY_EC)
+        cert->pubKeyOID = ECDSAk;
+    else
+        return WOLFSSL_FAILURE;
+
+    p = (byte*)XMALLOC(pkey->pkey_sz, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    if (p == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (cert->pubKey.buffer != NULL)
+        XFREE(cert->pubKey.buffer, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    cert->pubKey.buffer = p;
+    XMEMCPY(cert->pubKey.buffer, pkey->pkey.ptr, pkey->pkey_sz);
+    cert->pubKey.length = pkey->pkey_sz;
+
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_X509_set_version(WOLFSSL_X509* x509, long v)
+{
+    WOLFSSL_ENTER("wolfSSL_X509_set_version");
+    if ((x509 == NULL) || (v < 0) || (v > INT_MAX)) {
+        return WOLFSSL_FAILURE;
+    }
+    x509->version = (int) v + 1;
+
+    return WOLFSSL_SUCCESS;
+}
+
+#endif /* OPENSSL_EXTRA && !NO_CERTS && WOLFSSL_CERT_GEN && WOLFSSL_CERT_REQ */
+
+#if defined(OPENSSL_EXTRA) && !defined(NO_CERTS) && \
+    defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_CERT_REQ)
+
+void wolfSSL_X509V3_set_ctx(WOLFSSL_X509V3_CTX* ctx, WOLFSSL_X509* issuer,
+        WOLFSSL_X509* subject, WOLFSSL_X509* req, WOLFSSL_X509_CRL* crl,
+        int flag)
+{
+    int ret = WOLFSSL_SUCCESS;
+    WOLFSSL_ENTER("wolfSSL_X509V3_set_ctx");
+    if (!ctx || !ctx->x509)
+        return;
+
+    /* Set parameters in ctx as long as ret == WOLFSSL_SUCCESS */
+    if (issuer)
+        ret = wolfSSL_X509_set_issuer_name(ctx->x509,&issuer->issuer);
+
+    if (subject && ret == WOLFSSL_SUCCESS)
+        ret = wolfSSL_X509_set_subject_name(ctx->x509,&subject->subject);
+
+    if (req && ret == WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("req not implemented.");
+    }
+
+    if (crl && ret == WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("crl not implemented.");
+    }
+
+    if (flag && ret == WOLFSSL_SUCCESS) {
+        WOLFSSL_MSG("flag not implemented.");
+    }
+
+    if (!ret) {
+        WOLFSSL_MSG("Error setting WOLFSSL_X509V3_CTX parameters.");
+    }
+}
+
+int wolfSSL_i2d_X509_REQ(WOLFSSL_X509* req, unsigned char** out)
+{
+    const unsigned char* der;
+    int derSz = 0;
+    WOLFSSL_ENTER("wolfSSL_i2d_X509_REQ");
+
+    if (req == NULL || out == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    der = wolfSSL_X509_get_der(req, &derSz);
+    if (der == NULL) {
+        return MEMORY_E;
+    }
+
+    if (*out == NULL) {
+        *out = (unsigned char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_OPENSSL);
+        if (*out == NULL) {
+            return MEMORY_E;
+        }
+    }
+
+    XMEMCPY(*out, der, derSz);
+
+    return derSz;
+}
+
+WOLFSSL_X509* wolfSSL_X509_REQ_new(void)
+{
+    return wolfSSL_X509_new();
+}
+
+void wolfSSL_X509_REQ_free(WOLFSSL_X509* req)
+{
+    wolfSSL_X509_free(req);
+}
+
+int wolfSSL_X509_REQ_sign(WOLFSSL_X509 *req, WOLFSSL_EVP_PKEY *pkey,
+                          const WOLFSSL_EVP_MD *md)
+{
+    byte der[2048];
+    int derSz = sizeof(der);
+
+    if (req == NULL || pkey == NULL || md == NULL)
+        return WOLFSSL_FAILURE;
+
+    /* Create a Cert that has the certificate request fields. */
+    req->sigOID = wolfSSL_sigTypeFromPKEY((WOLFSSL_EVP_MD*)md, pkey);
+    if (wolfSSL_X509_make_der(req, 1, der, &derSz) != WOLFSSL_SUCCESS) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (wolfSSL_X509_resign_cert(req, 1, der, sizeof(der), derSz,
+            (WOLFSSL_EVP_MD*)md, pkey) <= 0) {
+        return WOLFSSL_FAILURE;
+    }
+    return WOLFSSL_SUCCESS;
+}
+
+
+#ifndef NO_WOLFSSL_STUB
+int wolfSSL_X509_REQ_add_extensions(WOLFSSL_X509* req,
+        WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* ext)
+{
+    (void)req;
+    (void)ext;
+    return WOLFSSL_FATAL_ERROR;
+}
+#endif
+
+int wolfSSL_X509_REQ_set_subject_name(WOLFSSL_X509 *req,
+                                      WOLFSSL_X509_NAME *name)
+{
+    return wolfSSL_X509_set_subject_name(req, name);
+}
+
+int wolfSSL_X509_REQ_set_pubkey(WOLFSSL_X509 *req, WOLFSSL_EVP_PKEY *pkey)
+{
+    return wolfSSL_X509_set_pubkey(req, pkey);
+}
+#endif /* OPENSSL_EXTRA && !NO_CERTS && WOLFSSL_CERT_GEN && WOLFSSL_CERT_REQ */
+
+
 
--- a/src/tls.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/tls.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* tls.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -43,7 +43,9 @@
 #ifdef HAVE_CURVE25519
     #include <wolfssl/wolfcrypt/curve25519.h>
 #endif
-
+#ifdef HAVE_CURVE448
+    #include <wolfssl/wolfcrypt/curve448.h>
+#endif
 #ifdef HAVE_NTRU
     #include "libntruencrypt/ntru_crypto.h"
     #include <wolfssl/wolfcrypt/random.h>
@@ -63,12 +65,13 @@
 static int TLSX_KeyShare_IsSupported(int namedGroup);
 #endif
 
-#if (!defined(NO_WOLFSSL_SERVER) && defined(WOLFSSL_TLS13) && \
+#if ((!defined(NO_WOLFSSL_SERVER) && defined(WOLFSSL_TLS13) && \
         !defined(WOLFSSL_NO_SERVER_GROUPS_EXT)) || \
-    (defined(WOLFSSL_TLS13) && !defined(HAVE_ECC) && \
-        !defined(HAVE_CURVE25519) && defined(HAVE_SUPPORTED_CURVES)) || \
-    ((defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-        defined(HAVE_SUPPORTED_CURVES))
+    (defined(WOLFSSL_TLS13) && !defined(HAVE_ECC) && !defined(HAVE_CURVE25519) \
+        && !defined(HAVE_CURVE448) && defined(HAVE_SUPPORTED_CURVES)) || \
+    ((defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+        defined(HAVE_CURVE448)) && defined(HAVE_SUPPORTED_CURVES))) && \
+     defined(HAVE_TLS_EXTENSIONS)
 static int TLSX_PopulateSupportedGroups(WOLFSSL* ssl, TLSX** extensions);
 #endif
 
@@ -97,313 +100,75 @@
     #if !defined(NO_RSA) && !defined(WC_RSA_PSS)
         #error The build option WC_RSA_PSS is required for TLS 1.3 with RSA
     #endif
-#endif
-
+    #ifndef HAVE_TLS_EXTENSIONS
+        #ifndef _MSC_VER
+            #error "The build option HAVE_TLS_EXTENSIONS is required for TLS 1.3"
+        #else
+            #pragma message("Error: The build option HAVE_TLS_EXTENSIONS is required for TLS 1.3")
+        #endif
+    #endif
+#endif
+
+/* Warn if secrets logging is enabled */
+#if defined(SHOW_SECRETS) || defined(WOLFSSL_SSLKEYLOGFILE)
+    #ifndef _MSC_VER
+        #warning The SHOW_SECRETS and WOLFSSL_SSLKEYLOGFILE options should only be used for debugging and never in a production environment
+    #else
+        #pragma message("Warning: The SHOW_SECRETS and WOLFSSL_SSLKEYLOGFILE options should only be used for debugging and never in a production environment")
+    #endif
+#endif
+
+/* Optional Pre-Master-Secret logging for Wireshark */
+#if !defined(NO_FILESYSTEM) && defined(WOLFSSL_SSLKEYLOGFILE)
+#ifndef WOLFSSL_SSLKEYLOGFILE_OUTPUT
+    #define WOLFSSL_SSLKEYLOGFILE_OUTPUT "sslkeylog.log"
+#endif
+#endif
 
 #ifndef WOLFSSL_NO_TLS12
 
 #ifdef WOLFSSL_SHA384
-    #define P_HASH_MAX_SIZE WC_SHA384_DIGEST_SIZE
-#else
-    #define P_HASH_MAX_SIZE WC_SHA256_DIGEST_SIZE
-#endif
-
-/* compute p_hash for MD5, SHA-1, SHA-256, or SHA-384 for TLSv1 PRF */
-static int p_hash(byte* result, word32 resLen, const byte* secret,
-                  word32 secLen, const byte* seed, word32 seedLen, int hash,
-                  void* heap, int devId)
-{
-    word32 len = P_HASH_MAX_SIZE;
-    word32 times;
-    word32 lastLen;
-    word32 lastTime;
-    word32 i;
-    word32 idx = 0;
-    int    ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    byte*  previous;
-    byte*  current;
-    Hmac*  hmac;
-#else
-    byte   previous[P_HASH_MAX_SIZE];  /* max size */
-    byte   current[P_HASH_MAX_SIZE];   /* max size */
-    Hmac   hmac[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    previous = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST);
-    current  = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST);
-    hmac     = (Hmac*)XMALLOC(sizeof(Hmac),    heap, DYNAMIC_TYPE_HMAC);
-
-    if (previous == NULL || current == NULL || hmac == NULL) {
-        if (previous) XFREE(previous, heap, DYNAMIC_TYPE_DIGEST);
-        if (current)  XFREE(current,  heap, DYNAMIC_TYPE_DIGEST);
-        if (hmac)     XFREE(hmac,     heap, DYNAMIC_TYPE_HMAC);
-
-        return MEMORY_E;
-    }
-#endif
-
-    switch (hash) {
-        #ifndef NO_MD5
-            case md5_mac:
-                hash = WC_MD5;
-                len  = WC_MD5_DIGEST_SIZE;
-            break;
-        #endif
-
-        #ifndef NO_SHA256
-            case sha256_mac:
-                hash = WC_SHA256;
-                len  = WC_SHA256_DIGEST_SIZE;
-            break;
-        #endif
-
-        #ifdef WOLFSSL_SHA384
-            case sha384_mac:
-                hash = WC_SHA384;
-                len  = WC_SHA384_DIGEST_SIZE;
-            break;
-        #endif
-
-        #ifndef NO_SHA
-            case sha_mac:
-            default:
-                hash = WC_SHA;
-                len  = WC_SHA_DIGEST_SIZE;
-            break;
-        #endif
-    }
-
-    times   = resLen / len;
-    lastLen = resLen % len;
-
-    if (lastLen)
-        times += 1;
-
-    lastTime = times - 1;
-
-    ret = wc_HmacInit(hmac, heap, devId);
-    if (ret == 0) {
-        ret = wc_HmacSetKey(hmac, hash, secret, secLen);
-        if (ret == 0)
-            ret = wc_HmacUpdate(hmac, seed, seedLen); /* A0 = seed */
-        if (ret == 0)
-            ret = wc_HmacFinal(hmac, previous);       /* A1 */
-        if (ret == 0) {
-            for (i = 0; i < times; i++) {
-                ret = wc_HmacUpdate(hmac, previous, len);
-                if (ret != 0)
-                    break;
-                ret = wc_HmacUpdate(hmac, seed, seedLen);
-                if (ret != 0)
-                    break;
-                ret = wc_HmacFinal(hmac, current);
-                if (ret != 0)
-                    break;
-
-                if ((i == lastTime) && lastLen)
-                    XMEMCPY(&result[idx], current,
-                                             min(lastLen, P_HASH_MAX_SIZE));
-                else {
-                    XMEMCPY(&result[idx], current, len);
-                    idx += len;
-                    ret = wc_HmacUpdate(hmac, previous, len);
-                    if (ret != 0)
-                        break;
-                    ret = wc_HmacFinal(hmac, previous);
-                    if (ret != 0)
-                        break;
-                }
-            }
-        }
-        wc_HmacFree(hmac);
-    }
-
-    ForceZero(previous,  P_HASH_MAX_SIZE);
-    ForceZero(current,   P_HASH_MAX_SIZE);
-    ForceZero(hmac,      sizeof(Hmac));
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(previous, heap, DYNAMIC_TYPE_DIGEST);
-    XFREE(current,  heap, DYNAMIC_TYPE_DIGEST);
-    XFREE(hmac,     heap, DYNAMIC_TYPE_HMAC);
-#endif
-
-    return ret;
-}
-
-#undef P_HASH_MAX_SIZE
-
-#endif /* !WOLFSSL_NO_TLS12 */
-
-
-#ifndef NO_OLD_TLS
-
-/* calculate XOR for TLSv1 PRF */
-static WC_INLINE void get_xor(byte *digest, word32 digLen, byte* md5, byte* sha)
-{
-    word32 i;
-
-    for (i = 0; i < digLen; i++)
-        digest[i] = md5[i] ^ sha[i];
-}
-
-
-/* compute TLSv1 PRF (pseudo random function using HMAC) */
-static int doPRF(byte* digest, word32 digLen, const byte* secret,word32 secLen,
-                 const byte* label, word32 labLen, const byte* seed,
-                 word32 seedLen, void* heap, int devId)
-{
-    int    ret  = 0;
-    word32 half = (secLen + 1) / 2;
-
-#ifdef WOLFSSL_SMALL_STACK
-    byte* md5_half;
-    byte* sha_half;
-    byte* md5_result;
-    byte* sha_result;
-#else
-    byte  md5_half[MAX_PRF_HALF];     /* half is real size */
-    byte  sha_half[MAX_PRF_HALF];     /* half is real size */
-    byte  md5_result[MAX_PRF_DIG];    /* digLen is real size */
-    byte  sha_result[MAX_PRF_DIG];    /* digLen is real size */
-#endif
-    DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap);
-
-    if (half > MAX_PRF_HALF)
-        return BUFFER_E;
-    if (labLen + seedLen > MAX_PRF_LABSEED)
-        return BUFFER_E;
-    if (digLen > MAX_PRF_DIG)
-        return BUFFER_E;
-
-#ifdef WOLFSSL_SMALL_STACK
-    md5_half   = (byte*)XMALLOC(MAX_PRF_HALF,    heap, DYNAMIC_TYPE_DIGEST);
-    sha_half   = (byte*)XMALLOC(MAX_PRF_HALF,    heap, DYNAMIC_TYPE_DIGEST);
-    md5_result = (byte*)XMALLOC(MAX_PRF_DIG,     heap, DYNAMIC_TYPE_DIGEST);
-    sha_result = (byte*)XMALLOC(MAX_PRF_DIG,     heap, DYNAMIC_TYPE_DIGEST);
-
-    if (md5_half == NULL || sha_half == NULL || md5_result == NULL ||
-                                                           sha_result == NULL) {
-        if (md5_half)   XFREE(md5_half,   heap, DYNAMIC_TYPE_DIGEST);
-        if (sha_half)   XFREE(sha_half,   heap, DYNAMIC_TYPE_DIGEST);
-        if (md5_result) XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST);
-        if (sha_result) XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST);
-        FREE_VAR(labelSeed, heap);
-
-        return MEMORY_E;
-    }
-#endif
-
-    XMEMSET(md5_result, 0, digLen);
-    XMEMSET(sha_result, 0, digLen);
-
-    XMEMCPY(md5_half, secret, half);
-    XMEMCPY(sha_half, secret + half - secLen % 2, half);
-
-    XMEMCPY(labelSeed, label, labLen);
-    XMEMCPY(labelSeed + labLen, seed, seedLen);
-
-    if ((ret = p_hash(md5_result, digLen, md5_half, half, labelSeed,
-                                labLen + seedLen, md5_mac, heap, devId)) == 0) {
-        if ((ret = p_hash(sha_result, digLen, sha_half, half, labelSeed,
-                                labLen + seedLen, sha_mac, heap, devId)) == 0) {
-            get_xor(digest, digLen, md5_result, sha_result);
-        }
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5_half,   heap, DYNAMIC_TYPE_DIGEST);
-    XFREE(sha_half,   heap, DYNAMIC_TYPE_DIGEST);
-    XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST);
-    XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST);
-#endif
-
-    FREE_VAR(labelSeed, heap);
-
-    return ret;
-}
-
-#endif
-
-
-#ifndef WOLFSSL_NO_TLS12
-
-/* Wrapper to call straight thru to p_hash in TSL 1.2 cases to remove stack
-   use */
-static int PRF(byte* digest, word32 digLen, const byte* secret, word32 secLen,
-            const byte* label, word32 labLen, const byte* seed, word32 seedLen,
-            int useAtLeastSha256, int hash_type, void* heap, int devId)
-{
-    int ret = 0;
-
-    if (useAtLeastSha256) {
-        DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap);
-
-        if (labLen + seedLen > MAX_PRF_LABSEED)
-            return BUFFER_E;
-
-        XMEMCPY(labelSeed, label, labLen);
-        XMEMCPY(labelSeed + labLen, seed, seedLen);
-
-        /* If a cipher suite wants an algorithm better than sha256, it
-         * should use better. */
-        if (hash_type < sha256_mac || hash_type == blake2b_mac)
-            hash_type = sha256_mac;
-        ret = p_hash(digest, digLen, secret, secLen, labelSeed,
-                     labLen + seedLen, hash_type, heap, devId);
-
-        FREE_VAR(labelSeed, heap);
-    }
-#ifndef NO_OLD_TLS
-    else {
-        ret = doPRF(digest, digLen, secret, secLen, label, labLen, seed,
-                    seedLen, heap, devId);
-    }
-#endif
-
-    return ret;
-}
-
-#ifdef WOLFSSL_SHA384
     #define HSHASH_SZ WC_SHA384_DIGEST_SIZE
 #else
     #define HSHASH_SZ FINISHED_SZ
 #endif
 
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    int tsip_useable(const WOLFSSL *ssl);
+    int tsip_generateMasterSecret(const byte *pre,
+                                const byte *cr,const byte *sr,
+                                byte *ms/* out */);
+    int tsip_generateSeesionKey(WOLFSSL *ssl);
+    int tsip_generateVerifyData(const byte *ms, const byte *side,
+                                const byte *handshake_hash,
+                                byte *hashes /* out */);
+#endif
 
 int BuildTlsHandshakeHash(WOLFSSL* ssl, byte* hash, word32* hashLen)
 {
+    int ret = 0;
     word32 hashSz = FINISHED_SZ;
 
     if (ssl == NULL || hash == NULL || hashLen == NULL || *hashLen < HSHASH_SZ)
         return BAD_FUNC_ARG;
 
+    /* for constant timing perform these even if error */
 #ifndef NO_OLD_TLS
-    wc_Md5GetHash(&ssl->hsHashes->hashMd5, hash);
-    wc_ShaGetHash(&ssl->hsHashes->hashSha, &hash[WC_MD5_DIGEST_SIZE]);
+    ret |= wc_Md5GetHash(&ssl->hsHashes->hashMd5, hash);
+    ret |= wc_ShaGetHash(&ssl->hsHashes->hashSha, &hash[WC_MD5_DIGEST_SIZE]);
 #endif
 
     if (IsAtLeastTLSv1_2(ssl)) {
 #ifndef NO_SHA256
         if (ssl->specs.mac_algorithm <= sha256_mac ||
             ssl->specs.mac_algorithm == blake2b_mac) {
-            int ret = wc_Sha256GetHash(&ssl->hsHashes->hashSha256, hash);
-
-            if (ret != 0)
-                return ret;
-
+            ret |= wc_Sha256GetHash(&ssl->hsHashes->hashSha256, hash);
             hashSz = WC_SHA256_DIGEST_SIZE;
         }
 #endif
 #ifdef WOLFSSL_SHA384
         if (ssl->specs.mac_algorithm == sha384_mac) {
-            int ret = wc_Sha384GetHash(&ssl->hsHashes->hashSha384, hash);
-
-            if (ret != 0)
-                return ret;
-
+            ret |= wc_Sha384GetHash(&ssl->hsHashes->hashSha384, hash);
             hashSz = WC_SHA384_DIGEST_SIZE;
         }
 #endif
@@ -411,36 +176,58 @@
 
     *hashLen = hashSz;
 
-    return 0;
+    if (ret != 0)
+        ret = BUILD_MSG_ERROR;
+
+    return ret;
 }
 
 
 int BuildTlsFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
-    int         ret;
+    int ret;
     const byte* side;
-    byte*       handshake_hash;
-    word32      hashSz = HSHASH_SZ;
-
-    /* using allocate here to allow async hardware to use buffer directly */
-    handshake_hash = (byte*)XMALLOC(hashSz, ssl->heap, DYNAMIC_TYPE_DIGEST);
+    word32 hashSz = HSHASH_SZ;
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+    DECLARE_VAR(handshake_hash, byte, HSHASH_SZ, ssl->heap);
     if (handshake_hash == NULL)
         return MEMORY_E;
+#else
+    byte handshake_hash[HSHASH_SZ];
+#endif
 
     ret = BuildTlsHandshakeHash(ssl, handshake_hash, &hashSz);
     if (ret == 0) {
-        if ( XSTRNCMP((const char*)sender, (const char*)client, SIZEOF_SENDER) == 0)
+        if (XSTRNCMP((const char*)sender, (const char*)client, SIZEOF_SENDER) == 0)
             side = tls_client;
         else
             side = tls_server;
 
-        ret = PRF((byte*)hashes, TLS_FINISHED_SZ, ssl->arrays->masterSecret,
+#ifdef WOLFSSL_HAVE_PRF
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+        if (tsip_useable(ssl)) {
+            ret = tsip_generateVerifyData(ssl->arrays->tsip_masterSecret,
+                            side, handshake_hash, (byte*)hashes /* out */);
+        } else
+#endif
+        ret = wc_PRF_TLS((byte*)hashes, TLS_FINISHED_SZ, ssl->arrays->masterSecret,
                    SECRET_LEN, side, FINISHED_LABEL_SZ, handshake_hash, hashSz,
                    IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm,
                    ssl->heap, ssl->devId);
-    }
-
-    XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_DIGEST);
+#else
+        /* Pseudo random function must be enabled in the configuration. */
+        ret = PRF_MISSING;
+        WOLFSSL_MSG("Pseudo-random function is not enabled");
+
+        (void)side;
+        (void)hashes;
+#endif
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+    FREE_VAR(handshake_hash, ssl->heap);
+#endif
 
     return ret;
 }
@@ -517,15 +304,43 @@
                          void* heap, int devId)
 {
     int ret;
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
     DECLARE_VAR(seed, byte, SEED_LEN, heap);
+    if (seed == NULL)
+        return MEMORY_E;
+#else
+    byte seed[SEED_LEN];
+#endif
 
     XMEMCPY(seed,           sr, RAN_LEN);
     XMEMCPY(seed + RAN_LEN, cr, RAN_LEN);
 
-    ret = PRF(key_dig, key_dig_len, ms, msLen, key_label, KEY_LABEL_SZ,
+#ifdef WOLFSSL_HAVE_PRF
+    ret = wc_PRF_TLS(key_dig, key_dig_len, ms, msLen, key_label, KEY_LABEL_SZ,
                seed, SEED_LEN, tls1_2, hash_type, heap, devId);
-
+#else
+    /* Pseudo random function must be enabled in the configuration. */
+    ret = PRF_MISSING;
+    WOLFSSL_MSG("Pseudo-random function is not enabled");
+
+    (void)key_dig;
+    (void)key_dig_len;
+    (void)ms;
+    (void)msLen;
+    (void)tls1_2;
+    (void)hash_type;
+    (void)heap;
+    (void)devId;
+    (void)key_label;
+    (void)master_label;
+#ifdef HAVE_EXTENDED_MASTER
+    (void)ext_master_label;
+#endif
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
     FREE_VAR(seed, heap);
+#endif
 
     return ret;
 }
@@ -559,7 +374,12 @@
         return MEMORY_E;
     }
 #endif
-
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+    if (tsip_useable(ssl))
+        ret = tsip_generateSeesionKey(ssl);
+    else {
+#endif
     ret = _DeriveTlsKeys(key_dig, key_dig_len,
                          ssl->arrays->masterSecret, SECRET_LEN,
                          ssl->arrays->serverRandom, ssl->arrays->clientRandom,
@@ -567,6 +387,10 @@
                          ssl->heap, ssl->devId);
     if (ret == 0)
         ret = StoreKeys(ssl, key_dig, PROVISION_CLIENT_SERVER);
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+    }
+#endif
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(key_dig, ssl->heap, DYNAMIC_TYPE_DIGEST);
@@ -581,13 +405,41 @@
                                int tls1_2, int hash_type,
                                void* heap, int devId)
 {
-    byte  seed[SEED_LEN];
+    int ret;
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+    DECLARE_VAR(seed, byte, SEED_LEN, heap);
+    if (seed == NULL)
+        return MEMORY_E;
+#else
+    byte seed[SEED_LEN];
+#endif
 
     XMEMCPY(seed,           cr, RAN_LEN);
     XMEMCPY(seed + RAN_LEN, sr, RAN_LEN);
 
-    return PRF(ms, msLen, pms, pmsLen, master_label, MASTER_LABEL_SZ,
+#ifdef WOLFSSL_HAVE_PRF
+    ret = wc_PRF_TLS(ms, msLen, pms, pmsLen, master_label, MASTER_LABEL_SZ,
                seed, SEED_LEN, tls1_2, hash_type, heap, devId);
+#else
+    /* Pseudo random function must be enabled in the configuration. */
+    ret = PRF_MISSING;
+    WOLFSSL_MSG("Pseudo-random function is not enabled");
+
+    (void)ms;
+    (void)msLen;
+    (void)pms;
+    (void)pmsLen;
+    (void)tls1_2;
+    (void)hash_type;
+    (void)heap;
+    (void)devId;
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+    FREE_VAR(seed, heap);
+#endif
+
+    return ret;
 }
 
 /* External facing wrapper so user can call as well, 0 on success */
@@ -609,8 +461,28 @@
                                         int tls1_2, int hash_type,
                                         void* heap, int devId)
 {
-    return PRF(ms, msLen, pms, pmsLen, ext_master_label, EXT_MASTER_LABEL_SZ,
+    int ret;
+
+#ifdef WOLFSSL_HAVE_PRF
+    ret = wc_PRF_TLS(ms, msLen, pms, pmsLen, ext_master_label, EXT_MASTER_LABEL_SZ,
                sHash, sHashLen, tls1_2, hash_type, heap, devId);
+#else
+    /* Pseudo random function must be enabled in the configuration. */
+    ret = PRF_MISSING;
+    WOLFSSL_MSG("Pseudo-random function is not enabled");
+
+    (void)ms;
+    (void)msLen;
+    (void)pms;
+    (void)pmsLen;
+    (void)sHash;
+    (void)sHashLen;
+    (void)tls1_2;
+    (void)hash_type;
+    (void)heap;
+    (void)devId;
+#endif
+    return ret;
 }
 
 /* External facing wrapper so user can call as well, 0 on success */
@@ -628,48 +500,93 @@
 
 int MakeTlsMasterSecret(WOLFSSL* ssl)
 {
-    int    ret;
+    int ret;
+
 #ifdef HAVE_EXTENDED_MASTER
     if (ssl->options.haveEMS) {
-        byte*  handshake_hash;
         word32 hashSz = HSHASH_SZ;
-
-        handshake_hash = (byte*)XMALLOC(HSHASH_SZ, ssl->heap,
-                                        DYNAMIC_TYPE_DIGEST);
+    #ifdef WOLFSSL_SMALL_STACK
+        byte* handshake_hash = (byte*)XMALLOC(HSHASH_SZ, ssl->heap,
+                                              DYNAMIC_TYPE_DIGEST);
         if (handshake_hash == NULL)
             return MEMORY_E;
+    #else
+        byte handshake_hash[HSHASH_SZ];
+    #endif
 
         ret = BuildTlsHandshakeHash(ssl, handshake_hash, &hashSz);
-        if (ret < 0) {
-            XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_DIGEST);
-            return ret;
-        }
-
-        ret = _MakeTlsExtendedMasterSecret(
+        if (ret == 0) {
+            ret = _MakeTlsExtendedMasterSecret(
                 ssl->arrays->masterSecret, SECRET_LEN,
                 ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz,
                 handshake_hash, hashSz,
                 IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm,
                 ssl->heap, ssl->devId);
-
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
         XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_DIGEST);
-    } else
-#endif
-    ret = _MakeTlsMasterSecret(ssl->arrays->masterSecret, SECRET_LEN,
+    #endif
+    }
+    else
+#endif /* HAVE_EXTENDED_MASTER */
+    {
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+        if (tsip_useable(ssl)) {
+            ret = tsip_generateMasterSecret(
+                            &ssl->arrays->preMasterSecret[VERSION_SZ],
+                            ssl->arrays->clientRandom,
+                            ssl->arrays->serverRandom,
+                            ssl->arrays->tsip_masterSecret);
+        } else
+#endif
+        ret = _MakeTlsMasterSecret(ssl->arrays->masterSecret, SECRET_LEN,
               ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz,
               ssl->arrays->clientRandom, ssl->arrays->serverRandom,
               IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm,
               ssl->heap, ssl->devId);
-
+    }
     if (ret == 0) {
     #ifdef SHOW_SECRETS
-        int i;
-
-        printf("master secret: ");
-        for (i = 0; i < SECRET_LEN; i++)
-            printf("%02x", ssl->arrays->masterSecret[i]);
-        printf("\n");
-    #endif
+        /* Wireshark Pre-Master-Secret Format:
+         *  CLIENT_RANDOM <clientrandom> <mastersecret>
+         */
+        const char* CLIENT_RANDOM_LABEL = "CLIENT_RANDOM";
+        int i, pmsPos = 0;
+        char pmsBuf[13 + 1 + 64 + 1 + 96 + 1 + 1];
+
+        XSNPRINTF(&pmsBuf[pmsPos], sizeof(pmsBuf) - pmsPos, "%s ",
+            CLIENT_RANDOM_LABEL);
+        pmsPos += XSTRLEN(CLIENT_RANDOM_LABEL) + 1;
+        for (i = 0; i < RAN_LEN; i++) {
+            XSNPRINTF(&pmsBuf[pmsPos], sizeof(pmsBuf) - pmsPos, "%02x",
+                ssl->arrays->clientRandom[i]);
+            pmsPos += 2;
+        }
+        XSNPRINTF(&pmsBuf[pmsPos], sizeof(pmsBuf) - pmsPos, " ");
+        pmsPos += 1;
+        for (i = 0; i < SECRET_LEN; i++) {
+            XSNPRINTF(&pmsBuf[pmsPos], sizeof(pmsBuf) - pmsPos, "%02x",
+                ssl->arrays->masterSecret[i]);
+            pmsPos += 2;
+        }
+        XSNPRINTF(&pmsBuf[pmsPos], sizeof(pmsBuf) - pmsPos, "\n");
+        pmsPos += 1;
+
+        /* print master secret */
+        puts(pmsBuf);
+
+        #if !defined(NO_FILESYSTEM) && defined(WOLFSSL_SSLKEYLOGFILE)
+        {
+            FILE* f = XFOPEN(WOLFSSL_SSLKEYLOGFILE_OUTPUT, "a");
+            if (f != XBADFILE) {
+                XFWRITE(pmsBuf, 1, pmsPos, f);
+                XFCLOSE(f);
+            }
+        }
+        #endif
+    #endif /* SHOW_SECRETS */
 
         ret = DeriveTlsKeys(ssl);
     }
@@ -703,10 +620,20 @@
     XMEMCPY(seed,           ssl->arrays->clientRandom, RAN_LEN);
     XMEMCPY(seed + RAN_LEN, ssl->arrays->serverRandom, RAN_LEN);
 
-    ret = PRF((byte*)msk, len, ssl->arrays->masterSecret, SECRET_LEN,
+#ifdef WOLFSSL_HAVE_PRF
+    ret = wc_PRF_TLS((byte*)msk, len, ssl->arrays->masterSecret, SECRET_LEN,
               (const byte *)label, (word32)XSTRLEN(label), seed, SEED_LEN,
               IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm,
               ssl->heap, ssl->devId);
+#else
+    /* Pseudo random function must be enabled in the configuration. */
+    ret = PRF_MISSING;
+    WOLFSSL_MSG("Pseudo-random function is not enabled");
+
+    (void)msk;
+    (void)len;
+    (void)label;
+#endif
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(seed, ssl->heap, DYNAMIC_TYPE_SEED);
@@ -742,17 +669,17 @@
 {
     if (order == PREV_ORDER) {
         /* Previous epoch case */
-        seq[0] = ((ssl->keys.dtls_epoch - 1) << 16) |
+        seq[0] = (((word32)ssl->keys.dtls_epoch - 1) << 16) |
                  (ssl->keys.dtls_prev_sequence_number_hi & 0xFFFF);
         seq[1] = ssl->keys.dtls_prev_sequence_number_lo;
     }
     else if (order == PEER_ORDER) {
-        seq[0] = (ssl->keys.curEpoch << 16) |
+        seq[0] = ((word32)ssl->keys.curEpoch << 16) |
                  (ssl->keys.curSeq_hi & 0xFFFF);
         seq[1] = ssl->keys.curSeq_lo; /* explicit from peer */
     }
     else {
-        seq[0] = (ssl->keys.dtls_epoch << 16) |
+        seq[0] = ((word32)ssl->keys.dtls_epoch << 16) |
                  (ssl->keys.dtls_sequence_number_hi & 0xFFFF);
         seq[1] = ssl->keys.dtls_sequence_number_lo;
     }
@@ -845,6 +772,7 @@
 }
 
 
+#ifndef WOLFSSL_AEAD_ONLY
 #if !defined(WOLFSSL_NO_HASH_RAW) && !defined(HAVE_FIPS) && \
     !defined(HAVE_SELFTEST)
 
@@ -936,66 +864,23 @@
 static int Hmac_OuterHash(Hmac* hmac, unsigned char* mac)
 {
     int ret = BAD_FUNC_ARG;
-
-    switch (hmac->macType) {
-    #ifndef NO_SHA
-        case WC_SHA:
-            ret = wc_InitSha(&hmac->hash.sha);
-            if (ret == 0)
-                ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
-                                                             WC_SHA_BLOCK_SIZE);
-            if (ret == 0)
-                ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
-                                                            WC_SHA_DIGEST_SIZE);
-            if (ret == 0)
-                ret = wc_ShaFinal(&hmac->hash.sha, mac);
-            break;
-    #endif /* !NO_SHA */
-
-    #ifndef NO_SHA256
-        case WC_SHA256:
-            ret = wc_InitSha256(&hmac->hash.sha256);
-            if (ret == 0)
-                ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
-                                                          WC_SHA256_BLOCK_SIZE);
-            if (ret == 0)
-                ret = wc_Sha256Update(&hmac->hash.sha256,
-                                                         (byte*)hmac->innerHash,
-                                                         WC_SHA256_DIGEST_SIZE);
-            if (ret == 0)
-                ret = wc_Sha256Final(&hmac->hash.sha256, mac);
-            break;
-    #endif /* !NO_SHA256 */
-
-    #ifdef WOLFSSL_SHA384
-        case WC_SHA384:
-            ret = wc_InitSha384(&hmac->hash.sha384);
-            if (ret == 0)
-                ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
-                                                          WC_SHA384_BLOCK_SIZE);
-            if (ret == 0)
-                ret = wc_Sha384Update(&hmac->hash.sha384,
-                                                         (byte*)hmac->innerHash,
-                                                         WC_SHA384_DIGEST_SIZE);
-            if (ret == 0)
-                ret = wc_Sha384Final(&hmac->hash.sha384, mac);
-            break;
-    #endif /* WOLFSSL_SHA384 */
-
-    #ifdef WOLFSSL_SHA512
-        case WC_SHA512:
-            ret = wc_InitSha512(&hmac->hash.sha512);
-            if (ret == 0)
-                ret = wc_Sha512Update(&hmac->hash.sha512,(byte*)hmac->opad,
-                                                          WC_SHA512_BLOCK_SIZE);
-            if (ret == 0)
-                ret = wc_Sha512Update(&hmac->hash.sha512,
-                                                         (byte*)hmac->innerHash,
-                                                         WC_SHA512_DIGEST_SIZE);
-            if (ret == 0)
-                ret = wc_Sha512Final(&hmac->hash.sha512, mac);
-            break;
-    #endif /* WOLFSSL_SHA512 */
+    wc_HashAlg hash;
+    enum wc_HashType hashType = (enum wc_HashType)hmac->macType;
+    int digestSz = wc_HashGetDigestSize(hashType);
+    int blockSz = wc_HashGetBlockSize(hashType);
+
+    if ((digestSz >= 0) && (blockSz >= 0)) {
+        ret = wc_HashInit(&hash, hashType);
+    }
+    if (ret == 0) {
+        ret = wc_HashUpdate(&hash, hashType, (byte*)hmac->opad,
+            blockSz);
+        if (ret == 0)
+            ret = wc_HashUpdate(&hash, hashType, (byte*)hmac->innerHash,
+                digestSz);
+        if (ret == 0)
+            ret = wc_HashFinal(&hash, hashType, mac);
+        wc_HashFree(&hash, hashType);
     }
 
     return ret;
@@ -1017,11 +902,12 @@
     byte lenBytes[8];
     int  i, j, k;
     int  blockBits, blockMask;
-    int  realLen, lastBlockLen, macLen, extraLen, eocIndex;
+    int  lastBlockLen, macLen, extraLen, eocIndex;
     int  blocks, safeBlocks, lenBlock, eocBlock;
     int  maxLen;
     int  blockSz, padSz;
     int  ret;
+    word32 realLen;
     byte extraBlock;
 
     switch (hmac->macType) {
@@ -1129,12 +1015,12 @@
             else if (k < maxLen)
                 b = in[k - WOLFSSL_TLS_HMAC_INNER_SZ];
 
-            b = ctMaskSel(atEoc, b, 0x80);
+            b = ctMaskSel(atEoc, 0x80, b);
             b &= (unsigned char)~(word32)pastEoc;
             b &= ((unsigned char)~(word32)isOutBlock) | isEocBlock;
 
             if (j >= blockSz - 8) {
-                b = ctMaskSel(isOutBlock, b, lenBytes[j - (blockSz - 8)]);
+                b = ctMaskSel(isOutBlock, lenBytes[j - (blockSz - 8)], b);
             }
 
             hashBlock[j] = b;
@@ -1177,7 +1063,7 @@
     byte       dummy[WC_MAX_BLOCK_SIZE] = {0};
     int        ret;
     word32     msgSz, blockSz, macSz, padSz, maxSz, realSz;
-    word32     currSz, offset;
+    word32     currSz, offset = 0;
     int        msgBlocks, blocks, blockBits;
     int        i;
 
@@ -1288,6 +1174,12 @@
     Hmac   hmac;
     byte   myInner[WOLFSSL_TLS_HMAC_INNER_SZ];
     int    ret = 0;
+#ifdef HAVE_TRUNCATED_HMAC
+    word32 hashSz = ssl->truncated_hmac ? (byte)TRUNCATED_HMAC_SZ
+                                        : ssl->specs.hash_size;
+#else
+    word32 hashSz = ssl->specs.hash_size;
+#endif
 
     if (ssl == NULL)
         return BAD_FUNC_ARG;
@@ -1296,8 +1188,8 @@
     /* Fuzz "in" buffer with sz to be used in HMAC algorithm */
     if (ssl->fuzzerCb) {
         if (verify && padSz >= 0) {
-            ssl->fuzzerCb(ssl, in, sz + ssl->specs.hash_size + padSz + 1,
-                FUZZ_HMAC, ssl->fuzzerCtx);
+            ssl->fuzzerCb(ssl, in, sz + hashSz + padSz + 1, FUZZ_HMAC,
+                          ssl->fuzzerCtx);
         }
         else {
             ssl->fuzzerCb(ssl, in, sz, FUZZ_HMAC, ssl->fuzzerCtx);
@@ -1306,7 +1198,21 @@
 #endif
 
     wolfSSL_SetTlsHmacInner(ssl, myInner, sz, content, verify);
-
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+    if (tsip_useable(ssl)) {
+        if (ssl->specs.hash_size == WC_SHA_DIGEST_SIZE)
+            ret = tsip_Sha1Hmac(ssl, myInner, WOLFSSL_TLS_HMAC_INNER_SZ,
+                                                        in, sz, digest, verify);
+        else if (ssl->specs.hash_size == WC_SHA256_DIGEST_SIZE)
+            ret = tsip_Sha256Hmac(ssl, myInner, WOLFSSL_TLS_HMAC_INNER_SZ,
+                                                        in, sz, digest, verify);
+        else
+            ret = TSIP_MAC_DIGSZ_E;
+
+        return ret;
+    }
+#endif
     ret = wc_HmacInit(&hmac, ssl->heap, ssl->devId);
     if (ret != 0)
         return ret;
@@ -1321,21 +1227,18 @@
     !defined(HAVE_SELFTEST)
     #ifdef HAVE_BLAKE2
             if (wolfSSL_GetHmacType(ssl) == WC_HASH_TYPE_BLAKE2B) {
-                ret = Hmac_UpdateFinal(&hmac, digest, in, sz +
-                                               ssl->specs.hash_size + padSz + 1,
-                                               myInner);
+                ret = Hmac_UpdateFinal(&hmac, digest, in,
+                                              sz + hashSz + padSz + 1, myInner);
             }
             else
     #endif
             {
-                ret = Hmac_UpdateFinal_CT(&hmac, digest, in, sz +
-                                               ssl->specs.hash_size + padSz + 1,
-                                               myInner);
+                ret = Hmac_UpdateFinal_CT(&hmac, digest, in,
+                                              sz + hashSz + padSz + 1, myInner);
             }
 #else
-            ret = Hmac_UpdateFinal(&hmac, digest, in, sz +
-                                               ssl->specs.hash_size + padSz + 1,
-                                               myInner);
+            ret = Hmac_UpdateFinal(&hmac, digest, in, sz + hashSz + padSz + 1,
+                                                                       myInner);
 #endif
         }
         else {
@@ -1351,6 +1254,7 @@
 
     return ret;
 }
+#endif /* WOLFSSL_AEAD_ONLY */
 
 #endif /* !WOLFSSL_NO_TLS12 */
 
@@ -1367,7 +1271,7 @@
 /**
  * Converts the extension type (id) to an index in the semaphore.
  *
- * Oficial reference for TLS extension types:
+ * Official reference for TLS extension types:
  *   http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xml
  *
  * Motivation:
@@ -1398,7 +1302,7 @@
                    is assigned to be used by another extension.
                    Use this check value for the new extension and decrement
                    the check value by one. */
-                WOLFSSL_MSG("### TLSX semaphore colision or overflow detected!");
+                WOLFSSL_MSG("### TLSX semaphore collision or overflow detected!");
             }
     }
 
@@ -1550,6 +1454,8 @@
     XMEMCPY(alpn->protocol_name, protocol_name, protocol_nameSz);
     alpn->protocol_name[protocol_nameSz] = 0;
 
+    (void)heap;
+
     return alpn;
 }
 
@@ -1681,6 +1587,9 @@
     ato16(input, &size);
     offset += OPAQUE16_LEN;
 
+    if (size == 0)
+        return BUFFER_ERROR;
+
     extension = TLSX_Find(ssl->extensions, TLSX_APPLICATION_LAYER_PROTOCOL);
     if (extension == NULL)
         extension = TLSX_Find(ssl->ctx->extensions,
@@ -1730,7 +1639,7 @@
     for (size = 0; offset < length; offset += size) {
 
         size = input[offset++];
-        if (offset + size > length)
+        if (offset + size > length || size == 0)
             return BUFFER_ERROR;
 
         if (isRequest) {
@@ -1899,6 +1808,8 @@
 {
     SNI* sni = (SNI*)XMALLOC(sizeof(SNI), heap, DYNAMIC_TYPE_TLSX);
 
+    (void)heap;
+
     if (sni) {
         sni->type = type;
         sni->next = NULL;
@@ -2049,6 +1960,10 @@
     word16 size = 0;
     word16 offset = 0;
     int cacheOnly = 0;
+    SNI *sni = NULL;
+    byte type;
+    int matchStat;
+    byte matched;
 #endif
 
     TLSX *extension = TLSX_Find(ssl->extensions, TLSX_SERVER_NAME);
@@ -2102,73 +2017,64 @@
     offset += OPAQUE16_LEN;
 
     /* validating sni list length */
-    if (length != OPAQUE16_LEN + size)
+    if (length != OPAQUE16_LEN + size || size == 0)
+        return BUFFER_ERROR;
+
+    /* SNI was badly specified and only one type is now recognized and allowed.
+     * Only one SNI value per type (RFC6066), so, no loop. */
+    type = input[offset++];
+    if (type != WOLFSSL_SNI_HOST_NAME)
         return BUFFER_ERROR;
 
-    for (size = 0; offset < length; offset += size) {
-        SNI *sni = NULL;
-        byte type = input[offset++];
-
-        if (offset + OPAQUE16_LEN > length)
-            return BUFFER_ERROR;
-
-        ato16(input + offset, &size);
-        offset += OPAQUE16_LEN;
-
-        if (offset + size > length)
-            return BUFFER_ERROR;
-
-        if (!cacheOnly && !(sni = TLSX_SNI_Find((SNI*)extension->data, type)))
-            continue; /* not using this type of SNI. */
-
-        switch(type) {
-            case WOLFSSL_SNI_HOST_NAME: {
-                int matchStat;
-                byte matched;
+    if (offset + OPAQUE16_LEN > length)
+        return BUFFER_ERROR;
+    ato16(input + offset, &size);
+    offset += OPAQUE16_LEN;
+
+    if (offset + size != length || size == 0)
+        return BUFFER_ERROR;
+
+    if (!cacheOnly && !(sni = TLSX_SNI_Find((SNI*)extension->data, type)))
+        return 0; /* not using this type of SNI. */
 
 #ifdef WOLFSSL_TLS13
-                /* Don't process the second ClientHello SNI extension if there
-                 * was problems with the first.
-                 */
-                if (!cacheOnly && sni->status != 0)
-                    break;
-#endif
-                matched = cacheOnly ||
-                    ((XSTRLEN(sni->data.host_name) == size) &&
-                    (XSTRNCMP(sni->data.host_name,
-                                      (const char*)input + offset, size) == 0));
-
-                if (matched || sni->options & WOLFSSL_SNI_ANSWER_ON_MISMATCH) {
-                    int r = TLSX_UseSNI(&ssl->extensions,
-                                         type, input + offset, size, ssl->heap);
-
-                    if (r != WOLFSSL_SUCCESS)
-                        return r; /* throws error. */
-
-                    if(cacheOnly) {
-                        WOLFSSL_MSG("Forcing storage of SNI, Fake match");
-                        matchStat = WOLFSSL_SNI_FORCE_KEEP;
-                    } else if(matched) {
-                        WOLFSSL_MSG("SNI did match!");
-                        matchStat = WOLFSSL_SNI_REAL_MATCH;
-                    } else {
-                        WOLFSSL_MSG("fake SNI match from ANSWER_ON_MISMATCH");
-                        matchStat = WOLFSSL_SNI_FAKE_MATCH;
-                    }
-
-                    TLSX_SNI_SetStatus(ssl->extensions, type, (byte)matchStat);
-
-                    if(!cacheOnly)
-                        TLSX_SetResponse(ssl, TLSX_SERVER_NAME);
-
-                } else if (!(sni->options & WOLFSSL_SNI_CONTINUE_ON_MISMATCH)) {
-                    SendAlert(ssl, alert_fatal, unrecognized_name);
-
-                    return UNKNOWN_SNI_HOST_NAME_E;
-                }
-                break;
-            }
-        }
+    /* Don't process the second ClientHello SNI extension if there
+     * was problems with the first.
+     */
+    if (!cacheOnly && sni->status != 0)
+        return 0;
+#endif
+    matched = cacheOnly || (XSTRLEN(sni->data.host_name) == size &&
+         XSTRNCMP(sni->data.host_name, (const char*)input + offset, size) == 0);
+
+    if (matched || sni->options & WOLFSSL_SNI_ANSWER_ON_MISMATCH) {
+        int r = TLSX_UseSNI(&ssl->extensions, type, input + offset, size,
+                                                                     ssl->heap);
+        if (r != WOLFSSL_SUCCESS)
+            return r; /* throws error. */
+
+        if (cacheOnly) {
+            WOLFSSL_MSG("Forcing storage of SNI, Fake match");
+            matchStat = WOLFSSL_SNI_FORCE_KEEP;
+        }
+        else if (matched) {
+            WOLFSSL_MSG("SNI did match!");
+            matchStat = WOLFSSL_SNI_REAL_MATCH;
+        }
+        else {
+            WOLFSSL_MSG("fake SNI match from ANSWER_ON_MISMATCH");
+            matchStat = WOLFSSL_SNI_FAKE_MATCH;
+        }
+
+        TLSX_SNI_SetStatus(ssl->extensions, type, (byte)matchStat);
+
+        if(!cacheOnly)
+            TLSX_SetResponse(ssl, TLSX_SERVER_NAME);
+    }
+    else if (!(sni->options & WOLFSSL_SNI_CONTINUE_ON_MISMATCH)) {
+        SendAlert(ssl, alert_fatal, unrecognized_name);
+
+        return UNKNOWN_SNI_HOST_NAME_E;
     }
 #else
     (void)input;
@@ -2468,6 +2374,344 @@
 #endif /* HAVE_SNI */
 
 /******************************************************************************/
+/* Trusted CA Key Indication                                                  */
+/******************************************************************************/
+
+#ifdef HAVE_TRUSTED_CA
+
+/** Creates a new TCA object. */
+static TCA* TLSX_TCA_New(byte type, const byte* id, word16 idSz, void* heap)
+{
+    TCA* tca = (TCA*)XMALLOC(sizeof(TCA), heap, DYNAMIC_TYPE_TLSX);
+
+    if (tca) {
+        XMEMSET(tca, 0, sizeof(TCA));
+        tca->type = type;
+
+        switch (type) {
+            case WOLFSSL_TRUSTED_CA_PRE_AGREED:
+                break;
+
+            #ifndef NO_SHA
+            case WOLFSSL_TRUSTED_CA_KEY_SHA1:
+            case WOLFSSL_TRUSTED_CA_CERT_SHA1:
+                if (idSz == WC_SHA_DIGEST_SIZE &&
+                        (tca->id =
+                            (byte*)XMALLOC(idSz, heap, DYNAMIC_TYPE_TLSX))) {
+                    XMEMCPY(tca->id, id, idSz);
+                    tca->idSz = idSz;
+                }
+                else {
+                    XFREE(tca, heap, DYNAMIC_TYPE_TLSX);
+                    tca = NULL;
+                }
+                break;
+            #endif
+
+            case WOLFSSL_TRUSTED_CA_X509_NAME:
+                if (idSz > 0 &&
+                        (tca->id =
+                            (byte*)XMALLOC(idSz, heap, DYNAMIC_TYPE_TLSX))) {
+                    XMEMCPY(tca->id, id, idSz);
+                    tca->idSz = idSz;
+                }
+                else {
+                    XFREE(tca, heap, DYNAMIC_TYPE_TLSX);
+                    tca = NULL;
+                }
+                break;
+
+            default: /* invalid type */
+                XFREE(tca, heap, DYNAMIC_TYPE_TLSX);
+                tca = NULL;
+        }
+    }
+
+    (void)heap;
+
+    return tca;
+}
+
+/** Releases a TCA object. */
+static void TLSX_TCA_Free(TCA* tca, void* heap)
+{
+    (void)heap;
+
+    if (tca) {
+        if (tca->id)
+            XFREE(tca->id, heap, DYNAMIC_TYPE_TLSX);
+        XFREE(tca, heap, DYNAMIC_TYPE_TLSX);
+    }
+}
+
+/** Releases all TCA objects in the provided list. */
+static void TLSX_TCA_FreeAll(TCA* list, void* heap)
+{
+    TCA* tca;
+
+    while ((tca = list)) {
+        list = tca->next;
+        TLSX_TCA_Free(tca, heap);
+    }
+}
+
+/** Tells the buffered size of the TCA objects in a list. */
+static word16 TLSX_TCA_GetSize(TCA* list)
+{
+    TCA* tca;
+    word16 length = OPAQUE16_LEN; /* list length */
+
+    while ((tca = list)) {
+        list = tca->next;
+
+        length += ENUM_LEN; /* tca type */
+
+        switch (tca->type) {
+            case WOLFSSL_TRUSTED_CA_PRE_AGREED:
+                break;
+            case WOLFSSL_TRUSTED_CA_KEY_SHA1:
+            case WOLFSSL_TRUSTED_CA_CERT_SHA1:
+                length += tca->idSz;
+                break;
+            case WOLFSSL_TRUSTED_CA_X509_NAME:
+                length += OPAQUE16_LEN + tca->idSz;
+                break;
+        }
+    }
+
+    return length;
+}
+
+/** Writes the TCA objects of a list in a buffer. */
+static word16 TLSX_TCA_Write(TCA* list, byte* output)
+{
+    TCA* tca;
+    word16 offset = OPAQUE16_LEN; /* list length offset */
+
+    while ((tca = list)) {
+        list = tca->next;
+
+        output[offset++] = tca->type; /* tca type */
+
+        switch (tca->type) {
+            case WOLFSSL_TRUSTED_CA_PRE_AGREED:
+                break;
+            #ifndef NO_SHA
+            case WOLFSSL_TRUSTED_CA_KEY_SHA1:
+            case WOLFSSL_TRUSTED_CA_CERT_SHA1:
+                if (tca->id != NULL) {
+                    XMEMCPY(output + offset, tca->id, tca->idSz);
+                    offset += tca->idSz;
+                }
+                else {
+                    /* ID missing. Set to an empty string. */
+                    c16toa(0, output + offset);
+                    offset += OPAQUE16_LEN;
+                }
+                break;
+            #endif
+            case WOLFSSL_TRUSTED_CA_X509_NAME:
+                if (tca->id != NULL) {
+                    c16toa(tca->idSz, output + offset); /* tca length */
+                    offset += OPAQUE16_LEN;
+                    XMEMCPY(output + offset, tca->id, tca->idSz);
+                    offset += tca->idSz;
+                }
+                else {
+                    /* ID missing. Set to an empty string. */
+                    c16toa(0, output + offset);
+                    offset += OPAQUE16_LEN;
+                }
+                break;
+            default:
+                /* ID unknown. Set to an empty string. */
+                c16toa(0, output + offset);
+                offset += OPAQUE16_LEN;
+        }
+    }
+
+    c16toa(offset - OPAQUE16_LEN, output); /* writing list length */
+
+    return offset;
+}
+
+#ifndef NO_WOLFSSL_SERVER
+static TCA* TLSX_TCA_Find(TCA *list, byte type, const byte* id, word16 idSz)
+{
+    TCA* tca = list;
+
+    while (tca && tca->type != type && type != WOLFSSL_TRUSTED_CA_PRE_AGREED &&
+           idSz != tca->idSz && !XMEMCMP(id, tca->id, idSz))
+        tca = tca->next;
+
+    return tca;
+}
+#endif /* NO_WOLFSSL_SERVER */
+
+/** Parses a buffer of TCA extensions. */
+static int TLSX_TCA_Parse(WOLFSSL* ssl, const byte* input, word16 length,
+                          byte isRequest)
+{
+#ifndef NO_WOLFSSL_SERVER
+    word16 size = 0;
+    word16 offset = 0;
+#endif
+
+    TLSX *extension = TLSX_Find(ssl->extensions, TLSX_TRUSTED_CA_KEYS);
+
+    if (!extension)
+        extension = TLSX_Find(ssl->ctx->extensions, TLSX_TRUSTED_CA_KEYS);
+
+    if (!isRequest) {
+        #ifndef NO_WOLFSSL_CLIENT
+            if (!extension || !extension->data)
+                return TLSX_HandleUnsupportedExtension(ssl);
+
+            if (length > 0)
+                return BUFFER_ERROR; /* TCA response MUST be empty. */
+
+            /* Set the flag that we're good for keys */
+            TLSX_SetResponse(ssl, TLSX_TRUSTED_CA_KEYS);
+
+            return 0;
+        #endif
+    }
+
+#ifndef NO_WOLFSSL_SERVER
+    if (!extension || !extension->data) {
+        /* Skipping, TCA not enabled at server side. */
+        return 0;
+    }
+
+    if (OPAQUE16_LEN > length)
+        return BUFFER_ERROR;
+
+    ato16(input, &size);
+    offset += OPAQUE16_LEN;
+
+    /* validating tca list length */
+    if (length != OPAQUE16_LEN + size)
+        return BUFFER_ERROR;
+
+    for (size = 0; offset < length; offset += size) {
+        TCA *tca = NULL;
+        byte type;
+        const byte* id = NULL;
+        word16 idSz = 0;
+
+        if (offset + ENUM_LEN > length)
+            return BUFFER_ERROR;
+
+        type = input[offset++];
+
+        switch (type) {
+            case WOLFSSL_TRUSTED_CA_PRE_AGREED:
+                break;
+            #ifndef NO_SHA
+            case WOLFSSL_TRUSTED_CA_KEY_SHA1:
+            case WOLFSSL_TRUSTED_CA_CERT_SHA1:
+                if (offset + WC_SHA_DIGEST_SIZE > length)
+                    return BUFFER_ERROR;
+                idSz = WC_SHA_DIGEST_SIZE;
+                id = input + offset;
+                offset += idSz;
+                break;
+            #endif
+            case WOLFSSL_TRUSTED_CA_X509_NAME:
+                if (offset + OPAQUE16_LEN > length)
+                    return BUFFER_ERROR;
+                ato16(input + offset, &idSz);
+                offset += OPAQUE16_LEN;
+                if ((offset > length) || (idSz > length - offset))
+                    return BUFFER_ERROR;
+                id = input + offset;
+                offset += idSz;
+                break;
+            default:
+                return TCA_INVALID_ID_TYPE;
+        }
+
+        /* Find the type/ID in the TCA list. */
+        tca = TLSX_TCA_Find((TCA*)extension->data, type, id, idSz);
+        if (tca != NULL) {
+            /* Found it. Set the response flag and break out of the loop. */
+            TLSX_SetResponse(ssl, TLSX_TRUSTED_CA_KEYS);
+            break;
+        }
+    }
+#else
+    (void)input;
+#endif
+
+    return 0;
+}
+
+/* Checks to see if the server sent a response for the TCA. */
+static int TLSX_TCA_VerifyParse(WOLFSSL* ssl, byte isRequest)
+{
+    (void)ssl;
+
+    if (!isRequest) {
+    #ifndef NO_WOLFSSL_CLIENT
+        TLSX* extension = TLSX_Find(ssl->extensions, TLSX_TRUSTED_CA_KEYS);
+
+        if (extension && !extension->resp) {
+            SendAlert(ssl, alert_fatal, handshake_failure);
+            return TCA_ABSENT_ERROR;
+        }
+    #endif /* NO_WOLFSSL_CLIENT */
+    }
+
+    return 0;
+}
+
+int TLSX_UseTrustedCA(TLSX** extensions, byte type,
+                    const byte* id, word16 idSz, void* heap)
+{
+    TLSX* extension;
+    TCA* tca = NULL;
+
+    if (extensions == NULL)
+        return BAD_FUNC_ARG;
+
+    if ((tca = TLSX_TCA_New(type, id, idSz, heap)) == NULL)
+        return MEMORY_E;
+
+    extension = TLSX_Find(*extensions, TLSX_TRUSTED_CA_KEYS);
+    if (!extension) {
+        int ret = TLSX_Push(extensions, TLSX_TRUSTED_CA_KEYS, (void*)tca, heap);
+
+        if (ret != 0) {
+            TLSX_TCA_Free(tca, heap);
+            return ret;
+        }
+    }
+    else {
+        /* push new TCA object to extension data. */
+        tca->next = (TCA*)extension->data;
+        extension->data = (void*)tca;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+#define TCA_FREE_ALL     TLSX_TCA_FreeAll
+#define TCA_GET_SIZE     TLSX_TCA_GetSize
+#define TCA_WRITE        TLSX_TCA_Write
+#define TCA_PARSE        TLSX_TCA_Parse
+#define TCA_VERIFY_PARSE TLSX_TCA_VerifyParse
+
+#else /* HAVE_TRUSTED_CA */
+
+#define TCA_FREE_ALL(list, heap)
+#define TCA_GET_SIZE(list)     0
+#define TCA_WRITE(a, b)        0
+#define TCA_PARSE(a, b, c, d)  0
+#define TCA_VERIFY_PARSE(a, b) 0
+
+#endif /* HAVE_TRUSTED_CA */
+
+/******************************************************************************/
 /* Max Fragment Length Negotiation                                            */
 /******************************************************************************/
 
@@ -2495,6 +2739,7 @@
 #endif
 
     switch (*input) {
+        case WOLFSSL_MFL_2_8 : ssl->max_fragment =  256; break;
         case WOLFSSL_MFL_2_9 : ssl->max_fragment =  512; break;
         case WOLFSSL_MFL_2_10: ssl->max_fragment = 1024; break;
         case WOLFSSL_MFL_2_11: ssl->max_fragment = 2048; break;
@@ -2526,7 +2771,7 @@
     byte* data = NULL;
     int ret = 0;
 
-    if (extensions == NULL || mfl < WOLFSSL_MFL_2_9 || WOLFSSL_MFL_2_13 < mfl)
+    if (extensions == NULL || mfl < WOLFSSL_MFL_MIN || mfl > WOLFSSL_MFL_MAX)
         return BAD_FUNC_ARG;
 
     data = (byte*)XMALLOC(ENUM_LEN, heap, DYNAMIC_TYPE_TLSX);
@@ -2653,15 +2898,8 @@
     }
 #endif
 #if defined(WOLFSSL_TLS13) && !defined(NO_WOLFSSL_SERVER)
-    if (!isRequest && csr->ssl->options.tls1_3) {
-        if (csr->response.buffer == NULL) {
-            OcspRequest* request = &csr->request.ocsp;
-            int ret = CreateOcspResponse(csr->ssl, &request, &csr->response);
-            if (ret < 0)
-                return ret;
-        }
+    if (!isRequest && csr->ssl->options.tls1_3)
         return OPAQUE8_LEN + OPAQUE24_LEN + csr->response.length;
-    }
 #endif
 
     return size;
@@ -2770,6 +3008,13 @@
         if (ssl->options.tls1_3) {
             word32       resp_length;
             word32       offset = 0;
+
+            /* Get the new extension potentially created above. */
+            extension = TLSX_Find(ssl->extensions, TLSX_STATUS_REQUEST);
+            csr = extension ? (CertificateStatusRequest*)extension->data : NULL;
+            if (csr == NULL)
+                return MEMORY_ERROR;
+
             ret = 0;
             if (OPAQUE8_LEN + OPAQUE24_LEN > length)
                 ret = BUFFER_ERROR;
@@ -2781,10 +3026,12 @@
                 if (offset + resp_length != length)
                     ret = BUFFER_ERROR;
             }
+        #if !defined(NO_WOLFSSL_SERVER)
             if (ret == 0) {
                 csr->response.buffer = input + offset;
                 csr->response.length = resp_length;
             }
+        #endif
 
             return ret;
         }
@@ -2802,6 +3049,8 @@
         word16 offset = 0;
         word16 size = 0;
 
+        if (length == 0)
+            return 0;
         if (length < ENUM_LEN)
             return BUFFER_ERROR;
 
@@ -2850,7 +3099,25 @@
         if (ret != WOLFSSL_SUCCESS)
             return ret; /* throw error */
 
-        TLSX_SetResponse(ssl, TLSX_STATUS_REQUEST);
+    #if defined(WOLFSSL_TLS13) && !defined(NO_WOLFSSL_SERVER)
+        if (ssl->options.tls1_3) {
+            OcspRequest* request;
+            TLSX* extension = TLSX_Find(ssl->extensions, TLSX_STATUS_REQUEST);
+            CertificateStatusRequest* csr = extension ?
+                (CertificateStatusRequest*)extension->data : NULL;
+            if (csr == NULL)
+                return MEMORY_ERROR;
+
+            request = &csr->request.ocsp;
+            ret = CreateOcspResponse(ssl, &request, &csr->response);
+            if (ret != 0)
+                return ret;
+            if (csr->response.buffer)
+                TLSX_SetResponse(ssl, TLSX_STATUS_REQUEST);
+        }
+        else
+    #endif
+            TLSX_SetResponse(ssl, TLSX_STATUS_REQUEST);
         ssl->status_request = status_type;
 #endif
     }
@@ -3205,15 +3472,19 @@
                         return BUFFER_ERROR;
 
                     ato16(input + offset, &size);
+                    if (length - offset < size)
+                        return BUFFER_ERROR;
+
                     offset += OPAQUE16_LEN + size;
-
                     /* skip request_extensions */
                     if (length - offset < OPAQUE16_LEN)
                         return BUFFER_ERROR;
 
                     ato16(input + offset, &size);
+                    if (length - offset < size)
+                        return BUFFER_ERROR;
+
                     offset += OPAQUE16_LEN + size;
-
                     if (offset > length)
                         return BUFFER_ERROR;
 
@@ -3432,9 +3703,11 @@
 
 #ifdef HAVE_SUPPORTED_CURVES
 
-#if !defined(HAVE_ECC) && !defined(HAVE_CURVE25519) && !defined(WOLFSSL_TLS13)
+#if !defined(HAVE_ECC) && !defined(HAVE_CURVE25519) && !defined(HAVE_CURVE448) \
+                       && !defined(HAVE_FFDHE)
 #error Elliptic Curves Extension requires Elliptic Curve Cryptography. \
-       Use --enable-ecc in the configure script or define HAVE_ECC.
+       Use --enable-ecc in the configure script or define HAVE_ECC. \
+       Alternatively use FFDHE for DH ciperhsuites.
 #endif
 
 static int TLSX_SupportedCurve_New(SupportedCurve** curve, word16 name,
@@ -3503,7 +3776,7 @@
 
     while (list) {
         if (list->name == name) {
-            ret = 0; /* curve alreay in use */
+            ret = 0; /* curve already in use */
             break;
         }
 
@@ -3543,13 +3816,24 @@
 
 static void TLSX_SupportedCurve_ValidateRequest(WOLFSSL* ssl, byte* semaphore)
 {
-    int i;
-
-    for (i = 0; i < ssl->suites->suiteSz; i+= 2)
+    word16 i;
+
+    for (i = 0; i < ssl->suites->suiteSz; i+= 2) {
+        if (ssl->suites->suites[i] == TLS13_BYTE)
+            return;
         if (ssl->suites->suites[i] == ECC_BYTE ||
-                ssl->suites->suites[i] == CHACHA_BYTE ||
-                ssl->suites->suites[i] == TLS13_BYTE)
+                ssl->suites->suites[i] == CHACHA_BYTE) {
+        #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
             return;
+        #endif
+        }
+        else {
+        #ifdef HAVE_FFDHE
+            return;
+        #endif
+        }
+    }
 
     /* turns semaphore on to avoid sending this extension. */
     TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_SUPPORTED_GROUPS));
@@ -3557,13 +3841,24 @@
 
 static void TLSX_PointFormat_ValidateRequest(WOLFSSL* ssl, byte* semaphore)
 {
-    int i;
-
-    for (i = 0; i < ssl->suites->suiteSz; i+= 2)
+    word16 i;
+
+    for (i = 0; i < ssl->suites->suiteSz; i+= 2) {
+        if (ssl->suites->suites[i] == TLS13_BYTE)
+            return;
         if (ssl->suites->suites[i] == ECC_BYTE ||
-                ssl->suites->suites[i] == CHACHA_BYTE ||
-                ssl->suites->suites[i] == TLS13_BYTE)
+                ssl->suites->suites[i] == CHACHA_BYTE) {
+        #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
             return;
+        #endif
+        }
+        else {
+        #ifdef HAVE_FFDHE
+            return;
+        #endif
+        }
+    }
 
     /* turns semaphore on to avoid sending this extension. */
     TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_EC_POINT_FORMATS));
@@ -3575,13 +3870,30 @@
 
 static void TLSX_PointFormat_ValidateResponse(WOLFSSL* ssl, byte* semaphore)
 {
+#if defined(HAVE_FFDHE) || defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                                          defined(HAVE_CURVE448)
+    (void)semaphore;
+#endif
+
+    if (ssl->options.cipherSuite0 == TLS13_BYTE)
+        return;
     if (ssl->options.cipherSuite0 == ECC_BYTE ||
-        ssl->options.cipherSuite0 == CHACHA_BYTE ||
-        ssl->options.cipherSuite0 == TLS13_BYTE)
+        ssl->options.cipherSuite0 == CHACHA_BYTE) {
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
         return;
-
+#endif
+    }
+    else {
+#ifdef HAVE_FFDHE
+        return;
+#endif
+    }
+
+#if !defined(HAVE_FFDHE) || (!defined(HAVE_ECC) && !defined(HAVE_CURVE25519) \
+                                                     && !defined(HAVE_CURVE448))
     /* turns semaphore on to avoid sending this extension. */
     TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_EC_POINT_FORMATS));
+#endif
 }
 
 #endif
@@ -3658,8 +3970,13 @@
     word16 name;
     int ret;
 
-    if(!isRequest && !IsAtLeastTLSv1_3(ssl->version))
+    if(!isRequest && !IsAtLeastTLSv1_3(ssl->version)) {
+#ifdef WOLFSSL_ALLOW_SERVER_SC_EXT
+        return 0;
+#else
         return BUFFER_ERROR; /* servers doesn't send this extension. */
+#endif
+    }
 
     if (OPAQUE16_LEN > length || length % OPAQUE16_LEN)
         return BUFFER_ERROR;
@@ -3710,8 +4027,9 @@
 
 #endif
 
-#if !defined(NO_WOLFSSL_SERVER) && defined(WOLFSSL_TLS13) && \
-                                          !defined(WOLFSSL_NO_SERVER_GROUPS_EXT)
+#if !defined(NO_WOLFSSL_SERVER)
+
+#if defined(WOLFSSL_TLS13) && !defined(WOLFSSL_NO_SERVER_GROUPS_EXT)
 
 /* Checks the priority of the groups on the server and set the supported groups
  * response if there is a group not advertised by the client that is preferred.
@@ -3764,6 +4082,128 @@
 
 #endif
 
+#if defined(HAVE_FFDHE) && !defined(WOLFSSL_NO_TLS12)
+/* Set the highest priority common FFDHE group on the server as compared to
+ * client extensions.
+ *
+ * ssl    SSL/TLS object.
+ * returns 0 on success, otherwise an error.
+ */
+int TLSX_SupportedFFDHE_Set(WOLFSSL* ssl)
+{
+    int ret = 0;
+    TLSX* extension;
+    TLSX* priority = NULL;
+    TLSX* ext = NULL;
+    SupportedCurve* serverGroup;
+    SupportedCurve* clientGroup;
+    SupportedCurve* group;
+    const DhParams* params = NULL;
+    int found = 0;
+
+    extension = TLSX_Find(ssl->extensions, TLSX_SUPPORTED_GROUPS);
+    /* May be doing PSK with no key exchange. */
+    if (extension == NULL)
+        return 0;
+    clientGroup = (SupportedCurve*)extension->data;
+    for (group = clientGroup; group != NULL; group = group->next) {
+        if (group->name >= MIN_FFHDE_GROUP && group->name <= MAX_FFHDE_GROUP) {
+            found = 1;
+            break;
+        }
+    }
+    if (!found)
+        return 0;
+
+    if (ssl->buffers.serverDH_P.buffer && ssl->buffers.weOwnDH) {
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap,
+                                                       DYNAMIC_TYPE_PUBLIC_KEY);
+    }
+    if (ssl->buffers.serverDH_G.buffer && ssl->buffers.weOwnDH) {
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap,
+                                                       DYNAMIC_TYPE_PUBLIC_KEY);
+    }
+    ssl->buffers.serverDH_P.buffer = NULL;
+    ssl->buffers.serverDH_G.buffer = NULL;
+    ssl->buffers.weOwnDH = 0;
+    ssl->options.haveDH = 0;
+
+
+    if ((ret = TLSX_PopulateSupportedGroups(ssl, &priority)) != WOLFSSL_SUCCESS)
+        return ret;
+    ret = 0;
+
+    ext = TLSX_Find(priority, TLSX_SUPPORTED_GROUPS);
+    serverGroup = (SupportedCurve*)ext->data;
+
+    for (; serverGroup != NULL; serverGroup = serverGroup->next) {
+        if ((serverGroup->name & NAMED_DH_MASK) != NAMED_DH_MASK)
+            continue;
+
+        for (group = clientGroup; group != NULL; group = group->next) {
+            if (serverGroup->name != group->name)
+                continue;
+
+            switch (serverGroup->name) {
+            #ifdef HAVE_FFDHE_2048
+                case WOLFSSL_FFDHE_2048:
+                    params = wc_Dh_ffdhe2048_Get();
+                    break;
+            #endif
+            #ifdef HAVE_FFDHE_3072
+                case WOLFSSL_FFDHE_3072:
+                    params = wc_Dh_ffdhe3072_Get();
+                    break;
+            #endif
+            #ifdef HAVE_FFDHE_4096
+                case WOLFSSL_FFDHE_4096:
+                    params = wc_Dh_ffdhe4096_Get();
+                    break;
+            #endif
+            #ifdef HAVE_FFDHE_6144
+                case WOLFSSL_FFDHE_6144:
+                    params = wc_Dh_ffdhe6144_Get();
+                    break;
+            #endif
+            #ifdef HAVE_FFDHE_8192
+                case WOLFSSL_FFDHE_8192:
+                    params = wc_Dh_ffdhe8192_Get();
+                    break;
+            #endif
+            }
+            if (params == NULL)
+                return BAD_FUNC_ARG;
+            if (params->p_len >= ssl->options.minDhKeySz &&
+                                     params->p_len <= ssl->options.maxDhKeySz) {
+                break;
+            }
+        }
+
+        if (group != NULL && serverGroup->name == group->name)
+            break;
+    }
+
+    if (serverGroup) {
+        ssl->buffers.serverDH_P.buffer = (unsigned char *)params->p;
+        ssl->buffers.serverDH_P.length = params->p_len;
+        ssl->buffers.serverDH_G.buffer = (unsigned char *)params->g;
+        ssl->buffers.serverDH_G.length = params->g_len;
+        ssl->namedGroup = serverGroup->name;
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && \
+        !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+        ssl->options.dhDoKeyTest = 0;
+    #endif
+        ssl->options.haveDH = 1;
+    }
+
+    TLSX_FreeAll(priority, ssl->heap);
+
+    return ret;
+}
+#endif /* HAVE_FFDHE && !WOLFSSL_NO_TLS12 */
+
+#endif /* !NO_WOLFSSL_SERVER */
+
 #if defined(WOLFSSL_TLS13) && !defined(WOLFSSL_NO_SERVER_GROUPS_EXT)
 /* Return the preferred group.
  *
@@ -3800,7 +4240,7 @@
     int ret;
 
     /* validating formats list length */
-    if (ENUM_LEN > length || length != ENUM_LEN + input[0])
+    if (ENUM_LEN > length || length != (word16)ENUM_LEN + input[0])
         return BUFFER_ERROR;
 
     if (isRequest) {
@@ -3816,26 +4256,26 @@
     return 0;
 }
 
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
 int TLSX_ValidateSupportedCurves(WOLFSSL* ssl, byte first, byte second) {
-    TLSX*          extension = (first == ECC_BYTE || first == CHACHA_BYTE)
-                             ? TLSX_Find(ssl->extensions, TLSX_SUPPORTED_GROUPS)
-                             : NULL;
+    TLSX*           extension = NULL;
     SupportedCurve* curve     = NULL;
-    word32         oid       = 0;
-    word32         pkOid     = 0;
-    word32         defOid    = 0;
-    word32         defSz     = 80; /* Maximum known curve size is 66. */
-    word32         nextOid   = 0;
-    word32         nextSz    = 80; /* Maximum known curve size is 66. */
-    word32         currOid   = ssl->ecdhCurveOID;
-    int            ephmSuite = 0;
-    word16         octets    = 0; /* according to 'ecc_set_type ecc_sets[];' */
-    int            sig       = 0; /* validate signature */
-    int            key       = 0; /* validate key       */
+    word32          oid       = 0;
+    word32          pkOid     = 0;
+    word32          defOid    = 0;
+    word32          defSz     = 80; /* Maximum known curve size is 66. */
+    word32          nextOid   = 0;
+    word32          nextSz    = 80; /* Maximum known curve size is 66. */
+    word32          currOid   = ssl->ecdhCurveOID;
+    int             ephmSuite = 0;
+    word16          octets    = 0; /* according to 'ecc_set_type ecc_sets[];' */
+    int             sig       = 0; /* validate signature */
+    int             key       = 0; /* validate key       */
 
     (void)oid;
 
+    if (first == ECC_BYTE || first == CHACHA_BYTE)
+        extension = TLSX_Find(ssl->extensions, TLSX_SUPPORTED_GROUPS);
     if (!extension)
         return 1; /* no suite restriction */
 
@@ -3844,6 +4284,10 @@
          curve = curve->next) {
 
     #ifdef OPENSSL_EXTRA
+        /* skip if name is not in supported ECC range */
+        if (curve->name > WOLFSSL_ECC_X448)
+            continue;
+        /* skip if curve is disabled by user */
         if (ssl->ctx->disabledCurves & (1 << curve->name))
             continue;
     #endif
@@ -3934,6 +4378,19 @@
                 break;
         #endif /* HAVE_ECC_BRAINPOOL */
     #endif
+#endif
+        #ifdef HAVE_CURVE448
+            case WOLFSSL_ECC_X448:
+                oid = ECC_X448_OID;
+            #ifdef HAVE_ED448
+                pkOid = ECC_ED448_OID;
+            #else
+                pkOid = ECC_X448_OID;
+            #endif
+                octets = 57;
+                break;
+        #endif /* HAVE_CURVE448 */
+#ifdef HAVE_ECC
     #if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
         #ifndef NO_ECC_SECP
             case WOLFSSL_ECC_SECP384R1:
@@ -3975,6 +4432,7 @@
             defSz = octets;
         }
 
+        /* The eccTempKeySz is the preferred ephemeral key size */
         if (currOid == 0 && ssl->eccTempKeySz == octets)
             currOid = oid;
         if ((nextOid == 0 || nextSz > octets) && ssl->eccTempKeySz <= octets) {
@@ -4027,6 +4485,10 @@
                         defOid = 0;
                         defSz = 80;
                     }
+                    if (oid == ECC_X448_OID && defOid == oid) {
+                        defOid = 0;
+                        defSz = 80;
+                    }
                     sig |= ssl->pkCurveOID == pkOid;
                     key |= ssl->pkCurveOID == oid;
                 break;
@@ -4060,6 +4522,10 @@
                         defOid = 0;
                         defSz = 80;
                     }
+                    if (oid == ECC_X448_OID && defOid == oid) {
+                        defOid = 0;
+                        defSz = 80;
+                    }
                     sig = 1;
                     key |= ssl->pkCurveOID == pkOid;
                 break;
@@ -4070,8 +4536,13 @@
                         defOid = 0;
                         defSz = 80;
                     }
-                    if (oid != ECC_X25519_OID)
+                    if (oid == ECC_X448_OID && defOid == oid) {
+                        defOid = 0;
+                        defSz = 80;
+                    }
+                    if (oid != ECC_X25519_OID && oid != ECC_X448_OID) {
                         sig = 1;
+                    }
                     key = 1;
                 break;
             }
@@ -4260,7 +4731,7 @@
     byte length = OPAQUE8_LEN; /* empty info length */
 
     /* data will be NULL for HAVE_SERVER_RENEGOTIATION_INFO only */
-    if (data && data->enabled) {
+    if (data && data->enabled && data->verifySet) {
         /* client sends client_verify_data only */
         length += TLS_FINISHED_SZ;
 
@@ -4276,8 +4747,7 @@
                                                     byte* output, int isRequest)
 {
     word16 offset = OPAQUE8_LEN; /* RenegotiationInfo length */
-
-    if (data && data->enabled) {
+    if (data && data->enabled && data->verifySet) {
         /* client sends client_verify_data only */
         XMEMCPY(output + offset, data->client_verify_data, TLS_FINISHED_SZ);
         offset += TLS_FINISHED_SZ;
@@ -4300,39 +4770,48 @@
     int ret = SECURE_RENEGOTIATION_E;
 
     if (length >= OPAQUE8_LEN) {
-        if (ssl->secure_renegotiation == NULL) {
+        if (isRequest) {
         #ifndef NO_WOLFSSL_SERVER
-            if (isRequest && *input == 0) {
-            #ifdef HAVE_SERVER_RENEGOTIATION_INFO
-                if (length == OPAQUE8_LEN) {
-                    if (TLSX_Find(ssl->extensions,
-                                  TLSX_RENEGOTIATION_INFO) == NULL) {
-                        ret = TLSX_AddEmptyRenegotiationInfo(&ssl->extensions,
-                                                             ssl->heap);
-                        if (ret == WOLFSSL_SUCCESS)
-                            ret = 0;
-
+            if (ssl->secure_renegotiation == NULL) {
+                ret = wolfSSL_UseSecureRenegotiation(ssl);
+                if (ret == WOLFSSL_SUCCESS)
+                    ret = 0;
+            }
+            if (ret != 0 && ret != SECURE_RENEGOTIATION_E) {
+            }
+            else if (!ssl->secure_renegotiation->enabled) {
+                if (*input == 0) {
+                    input++; /* get past size */
+
+                    ssl->secure_renegotiation->enabled = 1;
+                    TLSX_SetResponse(ssl, TLSX_RENEGOTIATION_INFO);
+                    ret = 0;
+                }
+                else {
+                    /* already in error state */
+                    WOLFSSL_MSG("SCR client verify data present");
+                }
+            }
+            else if (*input == TLS_FINISHED_SZ) {
+                if (length < TLS_FINISHED_SZ + 1) {
+                    WOLFSSL_MSG("SCR malformed buffer");
+                    ret = BUFFER_E;
+                }
+                else {
+                    input++; /* get past size */
+
+                    /* validate client verify data */
+                    if (XMEMCMP(input,
+                            ssl->secure_renegotiation->client_verify_data,
+                            TLS_FINISHED_SZ) == 0) {
+                        WOLFSSL_MSG("SCR client verify data match");
+                        TLSX_SetResponse(ssl, TLSX_RENEGOTIATION_INFO);
+                        ret = 0;  /* verified */
                     } else {
-                        ret = 0;
+                        /* already in error state */
+                        WOLFSSL_MSG("SCR client verify data Failure");
                     }
                 }
-            #else
-                ret = 0;  /* don't reply, user didn't enable */
-            #endif /* HAVE_SERVER_RENEGOTIATION_INFO */
-            }
-            #ifdef HAVE_SERVER_RENEGOTIATION_INFO
-            else if (!isRequest) {
-                /* don't do anything on client side */
-                ret = 0;
-            }
-            #endif
-        #endif
-        }
-        else if (isRequest) {
-        #ifndef NO_WOLFSSL_SERVER
-            if (*input == TLS_FINISHED_SZ) {
-                /* TODO compare client_verify_data */
-                ret = 0;
             }
         #endif
         }
@@ -4376,7 +4855,7 @@
 int TLSX_UseSecureRenegotiation(TLSX** extensions, void* heap)
 {
     int ret = 0;
-    SecureRenegotiation* data = NULL;
+    SecureRenegotiation* data;
 
     data = (SecureRenegotiation*)XMALLOC(sizeof(SecureRenegotiation), heap,
                                                              DYNAMIC_TYPE_TLSX);
@@ -4400,12 +4879,15 @@
 {
     int ret;
 
-    ret = TLSX_Push(extensions, TLSX_RENEGOTIATION_INFO, NULL, heap);
-    if (ret != 0)
-        return ret;
-
     /* send empty renegotiation_info extension */
     TLSX* ext = TLSX_Find(*extensions, TLSX_RENEGOTIATION_INFO);
+    if (ext == NULL) {
+        ret = TLSX_UseSecureRenegotiation(extensions, heap);
+        if (ret != WOLFSSL_SUCCESS)
+            return ret;
+
+        ext = TLSX_Find(*extensions, TLSX_RENEGOTIATION_INFO);
+    }
     if (ext)
         ext->resp = 1;
 
@@ -4514,7 +4996,7 @@
             /* got actual ticket from client */
             ret = DoClientTicket(ssl, input, length);
             if (ret == WOLFSSL_TICKET_RET_OK) {    /* use ticket to resume */
-                WOLFSSL_MSG("Using exisitng client ticket");
+                WOLFSSL_MSG("Using existing client ticket");
                 ssl->options.useTicket = 1;
                 ssl->options.resuming  = 1;
             } else if (ret == WOLFSSL_TICKET_RET_CREATE) {
@@ -4559,6 +5041,8 @@
         ticket->lifetime = lifetime;
     }
 
+    (void)heap;
+
     return ticket;
 }
 WOLFSSL_LOCAL void TLSX_SessionTicket_Free(SessionTicket* ticket, void* heap)
@@ -4735,7 +5219,6 @@
 
 
 /* write public key list in extension */
-static word16 TLSX_QSHPK_WriteR(QSHScheme* format, byte* output);
 static word16 TLSX_QSHPK_WriteR(QSHScheme* format, byte* output)
 {
     word32 offset = 0;
@@ -5127,7 +5610,7 @@
 int TLSX_UseQSHScheme(TLSX** extensions, word16 name, byte* pKey, word16 pkeySz,
                                                                      void* heap)
 {
-    TLSX*      extension = TLSX_Find(*extensions, TLSX_QUANTUM_SAFE_HYBRID);
+    TLSX*      extension = NULL;
     QSHScheme* format    = NULL;
     int        ret       = 0;
 
@@ -5135,11 +5618,14 @@
     if (extensions == NULL || (pKey == NULL && pkeySz != 0))
         return BAD_FUNC_ARG;
 
+    extension = TLSX_Find(*extensions, TLSX_QUANTUM_SAFE_HYBRID);
+
     /* if scheme is implemented than add */
     if (TLSX_HaveQSHScheme(name)) {
         if ((ret = TLSX_QSH_Append(&format, name, pKey, pkeySz)) != 0)
             return ret;
 
+        extension = TLSX_Find(*extensions, TLSX_QUANTUM_SAFE_HYBRID);
         if (!extension) {
             if ((ret = TLSX_Push(extensions, TLSX_QUANTUM_SAFE_HYBRID, format,
                                                                   heap)) != 0) {
@@ -5175,7 +5661,7 @@
 #define QSH_GET_SIZE TLSX_QSH_GetSize
 #define QSH_WRITE    TLSX_QSH_Write
 #else
-#define QSH_GET_SIZE(list)         0
+#define QSH_GET_SIZE(list, a)      0
 #define QSH_WRITE(a, b)            0
 #endif
 
@@ -5199,6 +5685,148 @@
 
 #endif /* HAVE_QSH */
 
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+/******************************************************************************/
+/* Encrypt-then-MAC                                                           */
+/******************************************************************************/
+
+#ifndef WOLFSSL_NO_TLS12
+static int TLSX_EncryptThenMac_Use(WOLFSSL* ssl);
+
+/**
+ * Get the size of the Encrypt-Then-MAC extension.
+ *
+ * msgType  Type of message to put extension into.
+ * pSz      Size of extension data.
+ * return SANITY_MSG_E when the message is not allowed to have extension and
+ *        0 otherwise.
+ */
+static int TLSX_EncryptThenMac_GetSize(byte msgType, word16* pSz)
+{
+    (void)pSz;
+
+    if (msgType != client_hello && msgType != server_hello) {
+        return SANITY_MSG_E;
+    }
+
+    /* Empty extension */
+
+    return 0;
+}
+
+/**
+ * Write the Encrypt-Then-MAC extension.
+ *
+ * data     Unused
+ * output   Extension data buffer. Unused.
+ * msgType  Type of message to put extension into.
+ * pSz      Size of extension data.
+ * return SANITY_MSG_E when the message is not allowed to have extension and
+ *        0 otherwise.
+ */
+static int TLSX_EncryptThenMac_Write(void* data, byte* output, byte msgType,
+                                     word16* pSz)
+{
+    (void)data;
+    (void)output;
+    (void)pSz;
+
+    if (msgType != client_hello && msgType != server_hello) {
+        return SANITY_MSG_E;
+    }
+
+    /* Empty extension */
+
+    return 0;
+}
+
+/**
+ * Parse the Encrypt-Then-MAC extension.
+ *
+ * ssl      SSL object
+ * input    Extension data buffer.
+ * length   Length of this extension's data.
+ * msgType  Type of message to extension appeared in.
+ * return SANITY_MSG_E when the message is not allowed to have extension,
+ *        BUFFER_ERROR when the extension's data is invalid,
+ *        MEMORY_E when unable to allocate memory and
+ *        0 otherwise.
+ */
+static int TLSX_EncryptThenMac_Parse(WOLFSSL* ssl, byte* input, word16 length,
+                                     byte msgType)
+{
+    int ret;
+
+    (void)input;
+
+    if (msgType != client_hello && msgType != server_hello) {
+        return SANITY_MSG_E;
+    }
+
+    /* Empty extension */
+    if (length != 0)
+        return BUFFER_ERROR;
+
+    if (msgType == client_hello) {
+        /* Check the user hasn't disallowed use of Encrypt-Then-Mac. */
+        if (!ssl->options.disallowEncThenMac) {
+            ssl->options.encThenMac = 1;
+            /* Set the extension reply. */
+            ret = TLSX_EncryptThenMac_Use(ssl);
+            if (ret != 0)
+                return ret;
+            TLSX_SetResponse(ssl, TLSX_ENCRYPT_THEN_MAC);
+        }
+        return 0;
+    }
+
+    /* Server Hello */
+    if (ssl->options.disallowEncThenMac)
+        return SANITY_MSG_E;
+
+    ssl->options.encThenMac = 1;
+    return 0;
+
+}
+
+/**
+ * Add the Encrypt-Then-MAC extension to list.
+ *
+ * ssl      SSL object
+ * return MEMORY_E when unable to allocate memory and 0 otherwise.
+ */
+static int TLSX_EncryptThenMac_Use(WOLFSSL* ssl)
+{
+    int   ret = 0;
+    TLSX* extension;
+
+    /* Find the Encrypt-Then-Mac extension if it exists. */
+    extension = TLSX_Find(ssl->extensions, TLSX_ENCRYPT_THEN_MAC);
+    if (extension == NULL) {
+        /* Push new Encrypt-Then-Mac extension. */
+        ret = TLSX_Push(&ssl->extensions, TLSX_ENCRYPT_THEN_MAC, NULL,
+            ssl->heap);
+        if (ret != 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+#define ETM_GET_SIZE  TLSX_EncryptThenMac_GetSize
+#define ETM_WRITE     TLSX_EncryptThenMac_Write
+#define ETM_PARSE     TLSX_EncryptThenMac_Parse
+
+#else
+
+#define ETM_GET_SIZE(a, b)    0
+#define ETM_WRITE(a, b, c, d) 0
+#define ETM_PARSE(a, b, c, d) 0
+
+#endif /* !WOLFSSL_NO_TLS12 */
+
+#endif /* HAVE_ENCRYPT_THEN_MAC && !WOLFSSL_AEAD_ONLY */
+
 /******************************************************************************/
 /* Supported Versions                                                         */
 /******************************************************************************/
@@ -5216,19 +5844,34 @@
 
     if (msgType == client_hello) {
         /* TLS v1.2 and TLS v1.3  */
-        int cnt = 2;
+        int cnt = 0;
+
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1_3) == 0)
+        #endif
+                cnt++;
+
+        if (ssl->options.downgrade) {
+#ifndef WOLFSSL_NO_TLS12
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1_2) == 0)
+        #endif
+                cnt++;
+#endif
 
 #ifndef NO_OLD_TLS
-        /* TLS v1.1  */
-        cnt++;
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1_1) == 0)
+        #endif
+                cnt++;
     #ifdef WOLFSSL_ALLOW_TLSV10
-        /* TLS v1.0  */
-        cnt++;
-    #endif
-#endif
-
-        if (!ssl->options.downgrade)
-            cnt = 1;
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1) == 0)
+        #endif
+                cnt++;
+    #endif
+#endif
+        }
 
         *pSz += (word16)(OPAQUE8_LEN + cnt * OPAQUE16_LEN);
     }
@@ -5250,52 +5893,72 @@
  * returns the length of data that was written.
  */
 static int TLSX_SupportedVersions_Write(void* data, byte* output,
-                                           byte msgType, word16* pSz)
+                                        byte msgType, word16* pSz)
 {
     WOLFSSL* ssl = (WOLFSSL*)data;
-    ProtocolVersion pv;
-    int i;
-    int cnt;
+    byte major;
+    byte* cnt;
 
     if (msgType == client_hello) {
-        pv = ssl->ctx->method->version;
-        /* TLS v1.2 and TLS v1.3  */
-        cnt = 2;
-
-#ifndef NO_OLD_TLS
-        /* TLS v1.1  */
-        cnt++;
-    #ifdef WOLFSSL_ALLOW_TLSV10
-        /* TLS v1.0  */
-        cnt++;
-    #endif
-#endif
-
-        if (!ssl->options.downgrade)
-            cnt = 1;
-
-        *(output++) = (byte)(cnt * OPAQUE16_LEN);
-        for (i = 0; i < cnt; i++) {
-#ifndef WOLFSSL_TLS13_FINAL
-            /* TODO: [TLS13] Remove code when TLS v1.3 becomes an RFC. */
-            if (pv.minor - i == TLSv1_3_MINOR) {
+        major = ssl->ctx->method->version.major;
+
+
+        cnt = output++;
+        *cnt = 0;
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1_3) == 0)
+        #endif
+            {
+                *cnt += OPAQUE16_LEN;
+#ifdef WOLFSSL_TLS13_DRAFT
                 /* The TLS draft major number. */
                 *(output++) = TLS_DRAFT_MAJOR;
                 /* Version of draft supported. */
                 *(output++) = TLS_DRAFT_MINOR;
-                continue;
+#else
+                *(output++) = major;
+                *(output++) = (byte)TLSv1_3_MINOR;
+#endif
+            }
+        if (ssl->options.downgrade) {
+#ifndef WOLFSSL_NO_TLS12
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1_2) == 0)
+        #endif
+            {
+                *cnt += OPAQUE16_LEN;
+                *(output++) = major;
+                *(output++) = (byte)TLSv1_2_MINOR;
             }
 #endif
 
-            *(output++) = pv.major;
-            *(output++) = (byte)(pv.minor - i);
-        }
-
-        *pSz += (word16)(OPAQUE8_LEN + cnt * OPAQUE16_LEN);
+#ifndef NO_OLD_TLS
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1_1) == 0)
+        #endif
+            {
+                *cnt += OPAQUE16_LEN;
+                *(output++) = major;
+                *(output++) = (byte)TLSv1_1_MINOR;
+            }
+    #ifdef WOLFSSL_ALLOW_TLSV10
+        #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+            if ((ssl->options.mask & SSL_OP_NO_TLSv1) == 0)
+        #endif
+            {
+                *cnt += OPAQUE16_LEN;
+                *(output++) = major;
+                *(output++) = (byte)TLSv1_MINOR;
+            }
+    #endif
+#endif
+        }
+
+        *pSz += (word16)(OPAQUE8_LEN + *cnt);
     }
 #ifndef WOLFSSL_TLS13_DRAFT_18
     else if (msgType == server_hello || msgType == hello_retry_request) {
-    #ifndef WOLFSSL_TLS13_FINAL
+    #ifdef WOLFSSL_TLS13_DRAFT
         if (ssl->version.major == SSLv3_MAJOR &&
                                           ssl->version.minor == TLSv1_3_MINOR) {
             output[0] = TLS_DRAFT_MAJOR;
@@ -5333,6 +5996,7 @@
     int len;
     byte major, minor;
     int newMinor = 0;
+    int set = 0;
 
     if (msgType == client_hello) {
         /* Must contain a length and at least one version. */
@@ -5342,7 +6006,7 @@
         len = *input;
 
         /* Protocol version array must fill rest of data. */
-        if (length != OPAQUE8_LEN + len)
+        if (length != (word16)OPAQUE8_LEN + len)
             return BUFFER_ERROR;
 
         input++;
@@ -5352,12 +6016,14 @@
             major = input[i];
             minor = input[i + OPAQUE8_LEN];
 
-#ifndef WOLFSSL_TLS13_FINAL
-            /* TODO: [TLS13] Remove code when TLS v1.3 becomes an RFC. */
+#ifdef WOLFSSL_TLS13_DRAFT
             if (major == TLS_DRAFT_MAJOR && minor == TLS_DRAFT_MINOR) {
                 major = SSLv3_MAJOR;
                 minor = TLSv1_3_MINOR;
             }
+#else
+            if (major == TLS_DRAFT_MAJOR)
+                continue;
 #endif
 
             if (major != pv.major)
@@ -5396,6 +6062,16 @@
             }
             else if (minor > ssl->options.oldMinor)
                 ssl->options.oldMinor = minor;
+
+            set = 1;
+        }
+        if (!set) {
+ #ifdef WOLFSSL_MYSQL_COMPATIBLE
+            SendAlert(ssl, alert_fatal, wc_protocol_version);
+ #else
+            SendAlert(ssl, alert_fatal, protocol_version);
+ #endif
+            return VERSION_ERROR;
         }
     }
 #ifndef WOLFSSL_TLS13_DRAFT_18
@@ -5407,8 +6083,7 @@
         major = input[0];
         minor = input[OPAQUE8_LEN];
 
-    #ifndef WOLFSSL_TLS13_FINAL
-        /* TODO: [TLS13] Remove code when TLS v1.3 becomes an RFC. */
+    #ifdef WOLFSSL_TLS13_DRAFT
         if (major == TLS_DRAFT_MAJOR && minor == TLS_DRAFT_MINOR) {
             major = SSLv3_MAJOR;
             minor = TLSv1_3_MINOR;
@@ -5523,7 +6198,8 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes written into the buffer.
  */
-static int TLSX_Cookie_Write(Cookie* cookie, byte* output, byte msgType, word16* pSz)
+static int TLSX_Cookie_Write(Cookie* cookie, byte* output, byte msgType,
+                             word16* pSz)
 {
     if (msgType == client_hello || msgType == hello_retry_request) {
         c16toa(cookie->len, output);
@@ -5644,7 +6320,7 @@
 #define CKE_PARSE(a, b, c, d) 0
 
 #endif
-
+#if !defined(WOLFSSL_NO_SIGALG)
 /******************************************************************************/
 /* Signature Algorithms                                                       */
 /******************************************************************************/
@@ -5654,6 +6330,7 @@
  * data  Unused
  * returns the length of data that will be in the extension.
  */
+
 static word16 TLSX_SignatureAlgorithms_GetSize(void* data)
 {
     WOLFSSL* ssl = (WOLFSSL*)data;
@@ -5683,6 +6360,12 @@
     for (i = 0; i < length; i += 2) {
         if (input[i] == rsa_pss_sa_algo && input[i + 1] <= sha512_mac)
             ssl->pssAlgo |= 1 << input[i + 1];
+    #ifdef WOLFSSL_TLS13
+        if (input[i] == rsa_pss_sa_algo && input[i + 1] >= pss_sha256 &&
+                                                   input[i + 1] <= pss_sha512) {
+            ssl->pssAlgo |= 1 << input[i + 1];
+        }
+    #endif
     }
 
     return 0;
@@ -5764,7 +6447,7 @@
 #define SA_GET_SIZE  TLSX_SignatureAlgorithms_GetSize
 #define SA_WRITE     TLSX_SignatureAlgorithms_Write
 #define SA_PARSE     TLSX_SignatureAlgorithms_Parse
-
+#endif
 /******************************************************************************/
 /* Signature Algorithms Certificate                                           */
 /******************************************************************************/
@@ -6028,8 +6711,8 @@
     curve25519_key* key;
 
     /* Allocate an ECC key to hold private key. */
-    key = (curve25519_key*)XMALLOC(sizeof(curve25519_key),
-                                           ssl->heap, DYNAMIC_TYPE_PRIVATE_KEY);
+    key = (curve25519_key*)XMALLOC(sizeof(curve25519_key), ssl->heap,
+                                                      DYNAMIC_TYPE_PRIVATE_KEY);
     if (key == NULL) {
         WOLFSSL_MSG("EccTempKey Memory error");
         return MEMORY_E;
@@ -6086,6 +6769,80 @@
     return ret;
 }
 
+/* Create a key share entry using X448 parameters group.
+ * Generates a key pair.
+ *
+ * ssl   The SSL/TLS object.
+ * kse   The key share entry object.
+ * returns 0 on success, otherwise failure.
+ */
+static int TLSX_KeyShare_GenX448Key(WOLFSSL *ssl, KeyShareEntry* kse)
+{
+    int             ret;
+#ifdef HAVE_CURVE448
+    byte*           keyData = NULL;
+    word32          dataSize = CURVE448_KEY_SIZE;
+    curve448_key*   key;
+
+    /* Allocate an ECC key to hold private key. */
+    key = (curve448_key*)XMALLOC(sizeof(curve448_key), ssl->heap,
+                                                      DYNAMIC_TYPE_PRIVATE_KEY);
+    if (key == NULL) {
+        WOLFSSL_MSG("EccTempKey Memory error");
+        return MEMORY_E;
+    }
+
+    /* Make an ECC key. */
+    ret = wc_curve448_init(key);
+    if (ret != 0)
+        goto end;
+    ret = wc_curve448_make_key(ssl->rng, CURVE448_KEY_SIZE, key);
+    if (ret != 0)
+        goto end;
+
+    /* Allocate space for the public key. */
+    keyData = (byte*)XMALLOC(CURVE448_KEY_SIZE, ssl->heap,
+                                                       DYNAMIC_TYPE_PUBLIC_KEY);
+    if (keyData == NULL) {
+        WOLFSSL_MSG("Key data Memory error");
+        ret = MEMORY_E;
+        goto end;
+    }
+
+    /* Export public key. */
+    if (wc_curve448_export_public_ex(key, keyData, &dataSize,
+                                                    EC448_LITTLE_ENDIAN) != 0) {
+        ret = ECC_EXPORT_ERROR;
+        goto end;
+    }
+
+    kse->pubKey = keyData;
+    kse->pubKeyLen = CURVE448_KEY_SIZE;
+    kse->key = key;
+
+#ifdef WOLFSSL_DEBUG_TLS
+    WOLFSSL_MSG("Public Curve448 Key");
+    WOLFSSL_BUFFER(keyData, dataSize);
+#endif
+
+end:
+    if (ret != 0) {
+        /* Data owned by key share entry otherwise. */
+        if (keyData != NULL)
+            XFREE(keyData, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+        wc_curve448_free(key);
+        XFREE(key, ssl->heap, DYNAMIC_TYPE_PRIVATE_KEY);
+    }
+#else
+    (void)ssl;
+    (void)kse;
+
+    ret = NOT_COMPILED_IN;
+#endif /* HAVE_CURVE448 */
+
+    return ret;
+}
+
 /* Create a key share entry using named elliptic curve parameters group.
  * Generates a key pair.
  *
@@ -6220,6 +6977,8 @@
         return TLSX_KeyShare_GenDhKey(ssl, kse);
     if (kse->group == WOLFSSL_ECC_X25519)
         return TLSX_KeyShare_GenX25519Key(ssl, kse);
+    if (kse->group == WOLFSSL_ECC_X448)
+        return TLSX_KeyShare_GenX448Key(ssl, kse);
     return TLSX_KeyShare_GenEccKey(ssl, kse);
 }
 
@@ -6240,13 +6999,19 @@
                 wc_curve25519_free((curve25519_key*)current->key);
 #endif
             }
+            else if (current->group == WOLFSSL_ECC_X448) {
+#ifdef HAVE_CURVE448
+                wc_curve448_free((curve448_key*)current->key);
+#endif
+            }
             else {
 #ifdef HAVE_ECC
                 wc_ecc_free((ecc_key*)(current->key));
 #endif
             }
         }
-        XFREE(current->key, heap, DYNAMIC_TYPE_PRIVATE_KEY);
+        if (current->key != NULL)
+            XFREE(current->key, heap, DYNAMIC_TYPE_PRIVATE_KEY);
         XFREE(current->pubKey, heap, DYNAMIC_TYPE_PUBLIC_KEY);
         XFREE(current->ke, heap, DYNAMIC_TYPE_PUBLIC_KEY);
         XFREE(current, heap, DYNAMIC_TYPE_TLSX);
@@ -6263,7 +7028,7 @@
  */
 static word16 TLSX_KeyShare_GetSize(KeyShareEntry* list, byte msgType)
 {
-    int            len = 0;
+    word16         len = 0;
     byte           isRequest = (msgType == client_hello);
     KeyShareEntry* current;
 
@@ -6280,10 +7045,10 @@
         if (!isRequest && current->key == NULL)
             continue;
 
-        len += (int)(KE_GROUP_LEN + OPAQUE16_LEN + current->pubKeyLen);
-    }
-
-    return (word16)len;
+        len += KE_GROUP_LEN + OPAQUE16_LEN + current->pubKeyLen;
+    }
+
+    return len;
 }
 
 /* Writes the key share extension into the output buffer.
@@ -6425,14 +7190,34 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     /* TODO: Make this function non-blocking */
     if (ret == WC_PENDING_E) {
-        ret = wc_AsyncWait(ret, dhKey.asyncDev, WC_ASYNC_FLAG_NONE);
-    }
-#endif
+        ret = wc_AsyncWait(ret, &dhKey->asyncDev, WC_ASYNC_FLAG_NONE);
+    }
+#endif
+    /* RFC 8446 Section 7.4.1:
+     *     ... left-padded with zeros up to the size of the prime. ...
+     */
+    if (params->p_len > ssl->arrays->preMasterSz) {
+        word32 diff = params->p_len - ssl->arrays->preMasterSz;
+        XMEMMOVE(ssl->arrays->preMasterSecret + diff,
+                        ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
+        XMEMSET(ssl->arrays->preMasterSecret, 0, diff);
+        ssl->arrays->preMasterSz = params->p_len;
+    }
+
+    ssl->options.dhKeySz = params->p_len;
 
     wc_FreeDhKey(dhKey);
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(dhKey, ssl->heap, DYNAMIC_TYPE_DH);
 #endif
+    if (keyShareEntry->key != NULL) {
+        XFREE(keyShareEntry->key, ssl->heap, DYNAMIC_TYPE_PRIVATE_KEY);
+        keyShareEntry->key = NULL;
+    }
+    XFREE(keyShareEntry->pubKey, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    keyShareEntry->pubKey = NULL;
+    XFREE(keyShareEntry->ke, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    keyShareEntry->ke = NULL;
 
     return ret;
 #else
@@ -6480,15 +7265,20 @@
     WOLFSSL_BUFFER(keyShareEntry->ke, keyShareEntry->keLen);
 #endif
 
-    /* Point is validated by import function. */
-    if (wc_curve25519_import_public_ex(keyShareEntry->ke, keyShareEntry->keLen,
-                                                  peerX25519Key,
+    if (wc_curve25519_check_public(keyShareEntry->ke, keyShareEntry->keLen,
                                                   EC25519_LITTLE_ENDIAN) != 0) {
         ret = ECC_PEERKEY_ERROR;
     }
 
     if (ret == 0) {
-        ssl->arrays->preMasterSz = ENCRYPT_LEN;
+        if (wc_curve25519_import_public_ex(keyShareEntry->ke,
+                                            keyShareEntry->keLen, peerX25519Key,
+                                            EC25519_LITTLE_ENDIAN) != 0) {
+            ret = ECC_PEERKEY_ERROR;
+        }
+    }
+
+    if (ret == 0) {
         ssl->ecdhCurveOID = ECC_X25519_OID;
 
         ret = wc_curve25519_shared_secret_ex(key, peerX25519Key,
@@ -6496,8 +7286,14 @@
                                                    &ssl->arrays->preMasterSz,
                                                    EC25519_LITTLE_ENDIAN);
     }
+
     wc_curve25519_free(peerX25519Key);
     XFREE(peerX25519Key, ssl->heap, DYNAMIC_TYPE_TLSX);
+    wc_curve25519_free((curve25519_key*)keyShareEntry->key);
+    if (keyShareEntry->key != NULL) {
+        XFREE(keyShareEntry->key, ssl->heap, DYNAMIC_TYPE_PRIVATE_KEY);
+        keyShareEntry->key = NULL;
+    }
 #else
     (void)ssl;
     (void)keyShareEntry;
@@ -6508,6 +7304,82 @@
     return ret;
 }
 
+/* Process the X448 key share extension on the client side.
+ *
+ * ssl            The SSL/TLS object.
+ * keyShareEntry  The key share entry object to use to calculate shared secret.
+ * returns 0 on success and other values indicate failure.
+ */
+static int TLSX_KeyShare_ProcessX448(WOLFSSL* ssl, KeyShareEntry* keyShareEntry)
+{
+    int ret;
+
+#ifdef HAVE_CURVE448
+    curve448_key* key = (curve448_key*)keyShareEntry->key;
+    curve448_key* peerX448Key;
+
+#ifdef HAVE_ECC
+    if (ssl->peerEccKey != NULL) {
+        wc_ecc_free(ssl->peerEccKey);
+        ssl->peerEccKey = NULL;
+    }
+#endif
+
+    peerX448Key = (curve448_key*)XMALLOC(sizeof(curve448_key), ssl->heap,
+                                                             DYNAMIC_TYPE_TLSX);
+    if (peerX448Key == NULL) {
+        WOLFSSL_MSG("PeerEccKey Memory error");
+        return MEMORY_ERROR;
+    }
+    ret = wc_curve448_init(peerX448Key);
+    if (ret != 0) {
+        XFREE(peerX448Key, ssl->heap, DYNAMIC_TYPE_TLSX);
+        return ret;
+    }
+#ifdef WOLFSSL_DEBUG_TLS
+    WOLFSSL_MSG("Peer Curve448 Key");
+    WOLFSSL_BUFFER(keyShareEntry->ke, keyShareEntry->keLen);
+#endif
+
+    if (wc_curve448_check_public(keyShareEntry->ke, keyShareEntry->keLen,
+                                                    EC448_LITTLE_ENDIAN) != 0) {
+        ret = ECC_PEERKEY_ERROR;
+    }
+
+    if (ret == 0) {
+        if (wc_curve448_import_public_ex(keyShareEntry->ke,
+                                              keyShareEntry->keLen, peerX448Key,
+                                              EC448_LITTLE_ENDIAN) != 0) {
+            ret = ECC_PEERKEY_ERROR;
+        }
+    }
+
+    if (ret == 0) {
+        ssl->ecdhCurveOID = ECC_X448_OID;
+
+        ret = wc_curve448_shared_secret_ex(key, peerX448Key,
+                                                   ssl->arrays->preMasterSecret,
+                                                   &ssl->arrays->preMasterSz,
+                                                   EC448_LITTLE_ENDIAN);
+    }
+
+    wc_curve448_free(peerX448Key);
+    XFREE(peerX448Key, ssl->heap, DYNAMIC_TYPE_TLSX);
+    wc_curve448_free((curve448_key*)keyShareEntry->key);
+    if (keyShareEntry->key != NULL) {
+        XFREE(keyShareEntry->key, ssl->heap, DYNAMIC_TYPE_PRIVATE_KEY);
+        keyShareEntry->key = NULL;
+    }
+#else
+    (void)ssl;
+    (void)keyShareEntry;
+
+    ret = PEER_KEY_ERROR;
+#endif /* HAVE_CURVE448 */
+
+    return ret;
+}
+
 /* Process the ECC key share extension on the client side.
  *
  * ssl            The SSL/TLS object.
@@ -6580,7 +7452,6 @@
     }
     ssl->ecdhCurveOID = ssl->peerEccKey->dp->oidSum;
 
-    ssl->arrays->preMasterSz = ENCRYPT_LEN;
     do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
         ret = wc_AsyncWait(ret, &keyShareKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
@@ -6599,6 +7470,14 @@
     );
 #endif
 
+    wc_ecc_free(ssl->peerEccKey);
+    XFREE(ssl->peerEccKey, ssl->heap, DYNAMIC_TYPE_ECC);
+    ssl->peerEccKey = NULL;
+    wc_ecc_free((ecc_key*)(keyShareEntry->key));
+    if (keyShareEntry->key != NULL) {
+        XFREE(keyShareEntry->key, ssl->heap, DYNAMIC_TYPE_PRIVATE_KEY);
+        keyShareEntry->key = NULL;
+    }
 
 #else
     (void)ssl;
@@ -6628,6 +7507,8 @@
         ret = TLSX_KeyShare_ProcessDh(ssl, keyShareEntry);
     else if (keyShareEntry->group == WOLFSSL_ECC_X25519)
         ret = TLSX_KeyShare_ProcessX25519(ssl, keyShareEntry);
+    else if (keyShareEntry->group == WOLFSSL_ECC_X448)
+        ret = TLSX_KeyShare_ProcessX448(ssl, keyShareEntry);
     else
         ret = TLSX_KeyShare_ProcessEcc(ssl, keyShareEntry);
 
@@ -6665,7 +7546,9 @@
     /* Key exchange data - public key. */
     ato16(&input[offset], &keLen);
     offset += OPAQUE16_LEN;
-    if (keLen < 1 || keLen > length - offset)
+    if (keLen == 0)
+        return INVALID_PARAMETER;
+    if (keLen > length - offset)
         return BUFFER_ERROR;
 
     /* Store a copy in the key share object. */
@@ -6760,7 +7643,7 @@
                                byte msgType)
 {
     int ret;
-    KeyShareEntry *keyShareEntry;
+    KeyShareEntry *keyShareEntry = NULL;
     word16 group;
 
     if (msgType == client_hello) {
@@ -6786,8 +7669,8 @@
             return BUFFER_ERROR;
         offset += OPAQUE16_LEN;
 
-        while (offset < length) {
-            ret = TLSX_KeyShareEntry_Parse(ssl, &input[offset], length,
+        while (offset < (int)length) {
+            ret = TLSX_KeyShareEntry_Parse(ssl, &input[offset], length - offset,
                                                                 &keyShareEntry);
             if (ret < 0)
                 return ret;
@@ -6816,11 +7699,11 @@
 
         /* ServerHello contains one key share entry. */
         len = TLSX_KeyShareEntry_Parse(ssl, input, length, &keyShareEntry);
-        if (len != length)
+        if (len != (int)length)
             return BUFFER_ERROR;
 
         /* Not in list sent if there isn't a private key. */
-        if (keyShareEntry->key == NULL)
+        if (keyShareEntry == NULL || keyShareEntry->key == NULL)
             return BAD_KEY_SHARE_DATA;
 
         /* Process the entry to calculate the secret. */
@@ -6871,6 +7754,7 @@
                              KeyShareEntry** keyShareEntry)
 {
     KeyShareEntry* kse;
+    KeyShareEntry** next;
 
     kse = (KeyShareEntry*)XMALLOC(sizeof(KeyShareEntry), heap,
                                   DYNAMIC_TYPE_TLSX);
@@ -6881,8 +7765,11 @@
     kse->group = (word16)group;
 
     /* Add it to the back and maintain the links. */
-    while (*list != NULL)
-        list = &((*list)->next);
+    while (*list != NULL) {
+        /* Assign to temporary to work around compiler bug found by customer. */
+        next = &((*list)->next);
+        list = next;
+    }
     *list = kse;
     *keyShareEntry = kse;
 
@@ -6938,6 +7825,9 @@
     }
 
     if (data != NULL) {
+        if (keyShareEntry->ke != NULL) {
+            XFREE(keyShareEntry->ke, ssl->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+        }
         keyShareEntry->ke = data;
         keyShareEntry->keLen = len;
     }
@@ -7016,6 +7906,10 @@
         case WOLFSSL_ECC_X25519:
             break;
     #endif
+    #ifdef HAVE_CURVE448
+        case WOLFSSL_ECC_X448:
+            break;
+    #endif
     #if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
         #ifndef NO_ECC_SECP
         case WOLFSSL_ECC_SECP384R1:
@@ -7028,10 +7922,6 @@
             break;
         #endif /* !NO_ECC_SECP */
     #endif
-    #ifdef HAVE_X448
-        case WOLFSSL_ECC_X448:
-            break;
-    #endif
         default:
             return 0;
     }
@@ -7064,6 +7954,11 @@
             ssl->group[ssl->numGroups++] = WOLFSSL_ECC_X25519;
         #endif
     #endif
+    #ifndef HAVE_FIPS
+        #if defined(HAVE_CURVE448)
+            ssl->group[ssl->numGroups++] = WOLFSSL_ECC_X448;
+        #endif
+    #endif
 #if defined(HAVE_ECC) && defined(HAVE_SUPPORTED_CURVES)
     #if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
         #ifndef NO_ECC_SECP
@@ -7095,7 +7990,7 @@
     }
 
     for (i = 0; i < ssl->numGroups; i++)
-        if (ssl->group[i] == group)
+        if (ssl->group[i] == (word16)group)
             return i;
 
     return -1;
@@ -7150,7 +8045,7 @@
     if (ret != 0)
         return ret;
 
-    /* Set extension to be in reponse. */
+    /* Set extension to be in response. */
     extension = TLSX_Find(ssl->extensions, TLSX_KEY_SHARE);
     extension->resp = 1;
 #else
@@ -7327,7 +8222,8 @@
  * returns the number of bytes of the encoded pre-shared key extension or
  * SANITY_MSG_E to indicate invalid message type.
  */
-static word16 TLSX_PreSharedKey_GetSize(PreSharedKey* list, byte msgType)
+static int TLSX_PreSharedKey_GetSize(PreSharedKey* list, byte msgType,
+                                     word16* pSz)
 {
     if (msgType == client_hello) {
         /* Length of identities + Length of binders. */
@@ -7338,14 +8234,16 @@
                    OPAQUE8_LEN + list->binderLen;
             list = list->next;
         }
-        return len;
+        *pSz += len;
+        return 0;
     }
 
     if (msgType == server_hello) {
-        return OPAQUE16_LEN;
-    }
-
-    return 0;
+        *pSz += OPAQUE16_LEN;
+        return 0;
+    }
+
+    return SANITY_MSG_E;
 }
 
 /* The number of bytes to be written for the binders.
@@ -7355,7 +8253,8 @@
  * returns the number of bytes of the encoded pre-shared key extension or
  * SANITY_MSG_E to indicate invalid message type.
  */
-word16 TLSX_PreSharedKey_GetSizeBinders(PreSharedKey* list, byte msgType)
+int TLSX_PreSharedKey_GetSizeBinders(PreSharedKey* list, byte msgType,
+                                     word16* pSz)
 {
     word16 len;
 
@@ -7369,7 +8268,8 @@
         list = list->next;
     }
 
-    return len;
+    *pSz = len;
+    return 0;
 }
 
 /* Writes the pre-shared key extension into the output buffer - binders only.
@@ -7380,8 +8280,8 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes written into the buffer.
  */
-word16 TLSX_PreSharedKey_WriteBinders(PreSharedKey* list, byte* output,
-                                      byte msgType)
+int TLSX_PreSharedKey_WriteBinders(PreSharedKey* list, byte* output,
+                                   byte msgType, word16* pSz)
 {
     PreSharedKey* current = list;
     word16 idx = 0;
@@ -7407,7 +8307,8 @@
     len = idx - lenIdx - OPAQUE16_LEN;
     c16toa(len, output + lenIdx);
 
-    return idx;
+    *pSz = idx;
+    return 0;
 }
 
 
@@ -7419,14 +8320,15 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes written into the buffer.
  */
-static word16 TLSX_PreSharedKey_Write(PreSharedKey* list, byte* output,
-                                      byte msgType)
+static int TLSX_PreSharedKey_Write(PreSharedKey* list, byte* output,
+                                   byte msgType, word16* pSz)
 {
     if (msgType == client_hello) {
         PreSharedKey* current = list;
         word16 idx = 0;
         word16 lenIdx;
         word16 len;
+        int ret;
 
         /* Write identites only. Binders after HMACing over this. */
         lenIdx = idx;
@@ -7453,12 +8355,12 @@
          * The binders are based on the hash of all the ClientHello data up to
          * and include the identities written above.
          */
-        idx += TLSX_PreSharedKey_GetSizeBinders(list, msgType);
-
-        return idx;
-    }
-
-    if (msgType == server_hello) {
+        ret = TLSX_PreSharedKey_GetSizeBinders(list, msgType, &len);
+        if (ret < 0)
+            return ret;
+        *pSz += idx + len;
+    }
+    else if (msgType == server_hello) {
         word16 i;
 
         /* Find the index of the chosen identity. */
@@ -7471,8 +8373,10 @@
          * by the client.
          */
         c16toa(i, output);
-        return OPAQUE16_LEN;
-    }
+        *pSz += OPAQUE16_LEN;
+    }
+    else
+        return SANITY_MSG_E;
 
     return 0;
 }
@@ -7521,7 +8425,8 @@
             /* Length of identity. */
             ato16(input + idx, &identityLen);
             idx += OPAQUE16_LEN;
-            if (len < OPAQUE16_LEN + identityLen + OPAQUE32_LEN)
+            if (len < OPAQUE16_LEN + identityLen + OPAQUE32_LEN ||
+                    identityLen > MAX_PSK_ID_LEN)
                 return BUFFER_E;
             /* Cache identity pointer. */
             identity = input + idx;
@@ -7546,6 +8451,8 @@
         list = (PreSharedKey*)extension->data;
 
         /* Length of binders. */
+        if (idx + OPAQUE16_LEN > length)
+            return BUFFER_E;
         ato16(input + idx, &len);
         idx += OPAQUE16_LEN;
         if (len < MIN_PSK_BINDERS_LEN || length - idx < len)
@@ -7634,6 +8541,7 @@
                                  PreSharedKey** preSharedKey)
 {
     PreSharedKey* psk;
+    PreSharedKey** next;
 
     psk = (PreSharedKey*)XMALLOC(sizeof(PreSharedKey), heap, DYNAMIC_TYPE_TLSX);
     if (psk == NULL)
@@ -7650,11 +8558,16 @@
     psk->identityLen = len;
 
     /* Add it to the end and maintain the links. */
-    while (*list != NULL)
-        list = &((*list)->next);
+    while (*list != NULL) {
+        /* Assign to temporary to work around compiler bug found by customer. */
+        next = &((*list)->next);
+        list = next;
+    }
     *list = psk;
     *preSharedKey = psk;
 
+    (void)heap;
+
     return 0;
 }
 
@@ -7752,8 +8665,8 @@
 #else
 
 #define PSK_FREE_ALL(a, b)
-#define PSK_GET_SIZE(a, b)    0
-#define PSK_WRITE(a, b, c)    0
+#define PSK_GET_SIZE(a, b, c) 0
+#define PSK_WRITE(a, b, c, d) 0
 #define PSK_PARSE(a, b, c, d) 0
 
 #endif
@@ -7770,7 +8683,7 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes of the encoded PSK KE mode extension.
  */
-static word16 TLSX_PskKeModes_GetSize(byte modes, byte msgType)
+static int TLSX_PskKeModes_GetSize(byte modes, byte msgType, word16* pSz)
 {
     if (msgType == client_hello) {
         /* Format: Len | Modes* */
@@ -7780,7 +8693,8 @@
             len += OPAQUE8_LEN;
         if (modes & (1 << PSK_DHE_KE))
             len += OPAQUE8_LEN;
-        return len;
+        *pSz += len;
+        return 0;
     }
 
     return SANITY_MSG_E;
@@ -7795,7 +8709,8 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes written into the buffer.
  */
-static word16 TLSX_PskKeModes_Write(byte modes, byte* output, byte msgType)
+static int TLSX_PskKeModes_Write(byte modes, byte* output, byte msgType,
+                                 word16* pSz)
 {
     if (msgType == client_hello) {
         /* Format: Len | Modes* */
@@ -7809,7 +8724,8 @@
         /* Write out length of mode list. */
         output[0] = idx - OPAQUE8_LEN;
 
-        return idx;
+        *pSz += idx;
+        return 0;
     }
 
     return SANITY_MSG_E;
@@ -7832,7 +8748,7 @@
     if (msgType == client_hello) {
         /* Format: Len | Modes* */
         int   idx = 0;
-        int   len;
+        word16 len;
         byte  modes = 0;
 
         /* Ensure length byte exists. */
@@ -7900,8 +8816,8 @@
 
 #else
 
-#define PKM_GET_SIZE(a, b)    0
-#define PKM_WRITE(a, b, c)    0
+#define PKM_GET_SIZE(a, b, c) 0
+#define PKM_WRITE(a, b, c, d) 0
 #define PKM_PARSE(a, b, c, d) 0
 
 #endif
@@ -7911,17 +8827,19 @@
 /******************************************************************************/
 
 #if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
-/* Get the size of the encoded Post-Hanshake Authentication extension.
+/* Get the size of the encoded Post-Handshake Authentication extension.
  * Only in ClientHello.
  *
  * msgType  The type of the message this extension is being written into.
- * returns the number of bytes of the encoded Post-Hanshake Authentication
+ * returns the number of bytes of the encoded Post-Handshake Authentication
  * extension.
  */
-static word16 TLSX_PostHandAuth_GetSize(byte msgType)
-{
-    if (msgType == client_hello)
+static int TLSX_PostHandAuth_GetSize(byte msgType, word16* pSz)
+{
+    if (msgType == client_hello) {
+        *pSz += 0;
         return 0;
+    }
 
     return SANITY_MSG_E;
 }
@@ -7934,12 +8852,14 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes written into the buffer.
  */
-static word16 TLSX_PostHandAuth_Write(byte* output, byte msgType)
+static int TLSX_PostHandAuth_Write(byte* output, byte msgType, word16* pSz)
 {
     (void)output;
 
-    if (msgType == client_hello)
+    if (msgType == client_hello) {
+        *pSz += 0;
         return 0;
+    }
 
     return SANITY_MSG_E;
 }
@@ -7999,8 +8919,8 @@
 
 #else
 
-#define PHA_GET_SIZE(a)       0
-#define PHA_WRITE(a, b)       0
+#define PHA_GET_SIZE(a, b)    0
+#define PHA_WRITE(a, b, c)    0
 #define PHA_PARSE(a, b, c, d) 0
 
 #endif
@@ -8016,14 +8936,18 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes of the encoded Early Data Indication extension.
  */
-static word16 TLSX_EarlyData_GetSize(byte msgType)
-{
+static int TLSX_EarlyData_GetSize(byte msgType, word16* pSz)
+{
+    int ret = 0;
+
     if (msgType == client_hello || msgType == encrypted_extensions)
-        return 0;
-    if (msgType == session_ticket)
-        return OPAQUE32_LEN;
-
-    return SANITY_MSG_E;
+        *pSz += 0;
+    else if (msgType == session_ticket)
+        *pSz += OPAQUE32_LEN;
+    else
+        ret = SANITY_MSG_E;
+
+    return ret;
 }
 
 /* Writes the Early Data Indicator extension into the output buffer.
@@ -8035,14 +8959,15 @@
  * msgType  The type of the message this extension is being written into.
  * returns the number of bytes written into the buffer.
  */
-static word16 TLSX_EarlyData_Write(word32 max, byte* output, byte msgType)
-{
-    if (msgType == client_hello || msgType == encrypted_extensions) {
+static int TLSX_EarlyData_Write(word32 max, byte* output, byte msgType,
+                                word16* pSz)
+{
+    if (msgType == client_hello || msgType == encrypted_extensions)
         return 0;
-    }
-    if (msgType == session_ticket) {
+    else if (msgType == session_ticket) {
         c32toa(max, output);
-        return OPAQUE32_LEN;
+        *pSz += OPAQUE32_LEN;
+        return 0;
     }
 
     return SANITY_MSG_E;
@@ -8064,7 +8989,10 @@
         if (length != 0)
             return BUFFER_E;
 
-        return TLSX_EarlyData_Use(ssl, 0);
+        if (ssl->earlyData == expecting_early_data)
+            return TLSX_EarlyData_Use(ssl, 0);
+        ssl->earlyData = early_data_ext;
+        return 0;
     }
     if (msgType == encrypted_extensions) {
         if (length != 0)
@@ -8079,13 +9007,13 @@
         return TLSX_EarlyData_Use(ssl, 1);
     }
     if (msgType == session_ticket) {
-        word32 max;
+        word32 maxSz;
 
         if (length != OPAQUE32_LEN)
             return BUFFER_E;
-        ato32(input, &max);
-
-        ssl->session.maxEarlyDataSz = max;
+        ato32(input, &maxSz);
+
+        ssl->session.maxEarlyDataSz = maxSz;
         return 0;
     }
 
@@ -8128,8 +9056,8 @@
 
 #else
 
-#define EDI_GET_SIZE(a)       0
-#define EDI_WRITE(a, b, c)    0
+#define EDI_GET_SIZE(a, b)    0
+#define EDI_WRITE(a, b, c, d) 0
 #define EDI_PARSE(a, b, c, d) 0
 
 #endif
@@ -8181,6 +9109,10 @@
                 SNI_FREE_ALL((SNI*)extension->data, heap);
                 break;
 
+            case TLSX_TRUSTED_CA_KEYS:
+                TCA_FREE_ALL((TCA*)extension->data, heap);
+                break;
+
             case TLSX_MAX_FRAGMENT_LENGTH:
                 MFL_FREE_ALL(extension->data, heap);
                 break;
@@ -8221,10 +9153,14 @@
             case TLSX_APPLICATION_LAYER_PROTOCOL:
                 ALPN_FREE_ALL((ALPN*)extension->data, heap);
                 break;
-
+#if !defined(WOLFSSL_NO_SIGALG)
             case TLSX_SIGNATURE_ALGORITHMS:
                 break;
-
+#endif
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            case TLSX_ENCRYPT_THEN_MAC:
+                break;
+#endif
 #ifdef WOLFSSL_TLS13
             case TLSX_SUPPORTED_VERSIONS:
                 break;
@@ -8275,7 +9211,8 @@
 }
 
 /** Tells the buffered size of the extensions in a list. */
-static int TLSX_GetSize(TLSX* list, byte* semaphore, byte msgType, word16* pLength)
+static int TLSX_GetSize(TLSX* list, byte* semaphore, byte msgType,
+                        word16* pLength)
 {
     int    ret = 0;
     TLSX*  extension;
@@ -8306,6 +9243,12 @@
                     length += SNI_GET_SIZE((SNI*)extension->data);
                 break;
 
+            case TLSX_TRUSTED_CA_KEYS:
+                /* TCA only sends the list on the request. */
+                if (isRequest)
+                    length += TCA_GET_SIZE((TCA*)extension->data);
+                break;
+
             case TLSX_MAX_FRAGMENT_LENGTH:
                 length += MFL_GET_SIZE(extension->data);
                 break;
@@ -8350,11 +9293,16 @@
             case TLSX_APPLICATION_LAYER_PROTOCOL:
                 length += ALPN_GET_SIZE((ALPN*)extension->data);
                 break;
-
+#if !defined(WOLFSSL_NO_SIGALG)
             case TLSX_SIGNATURE_ALGORITHMS:
                 length += SA_GET_SIZE(extension->data);
                 break;
-
+#endif
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            case TLSX_ENCRYPT_THEN_MAC:
+                ret = ETM_GET_SIZE(msgType, &length);
+                break;
+#endif /* HAVE_ENCRYPT_THEN_MAC */
 #ifdef WOLFSSL_TLS13
             case TLSX_SUPPORTED_VERSIONS:
                 ret = SV_GET_SIZE(extension->data, msgType, &length);
@@ -8366,23 +9314,24 @@
 
     #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
             case TLSX_PRE_SHARED_KEY:
-                length += PSK_GET_SIZE((PreSharedKey*)extension->data, msgType);
+                ret = PSK_GET_SIZE((PreSharedKey*)extension->data, msgType,
+                                                                       &length);
                 break;
 
             case TLSX_PSK_KEY_EXCHANGE_MODES:
-                length += PKM_GET_SIZE(extension->val, msgType);
+                ret = PKM_GET_SIZE(extension->val, msgType, &length);
                 break;
     #endif
 
     #ifdef WOLFSSL_EARLY_DATA
             case TLSX_EARLY_DATA:
-                length += EDI_GET_SIZE(msgType);
+                ret = EDI_GET_SIZE(msgType, &length);
                 break;
     #endif
 
     #ifdef WOLFSSL_POST_HANDSHAKE_AUTH
             case TLSX_POST_HANDSHAKE_AUTH:
-                length += PHA_GET_SIZE(msgType);
+                ret = PHA_GET_SIZE(msgType, &length);
                 break;
     #endif
 
@@ -8444,6 +9393,13 @@
                 }
                 break;
 
+            case TLSX_TRUSTED_CA_KEYS:
+                WOLFSSL_MSG("Trusted CA Indication extension to write");
+                if (isRequest) {
+                    offset += TCA_WRITE((TCA*)extension->data, output + offset);
+                }
+                break;
+
             case TLSX_MAX_FRAGMENT_LENGTH:
                 WOLFSSL_MSG("Max Fragment Length extension to write");
                 offset += MFL_WRITE((byte*)extension->data, output + offset);
@@ -8504,12 +9460,18 @@
                 WOLFSSL_MSG("ALPN extension to write");
                 offset += ALPN_WRITE((ALPN*)extension->data, output + offset);
                 break;
-
+#if !defined(WOLFSSL_NO_SIGALG)
             case TLSX_SIGNATURE_ALGORITHMS:
                 WOLFSSL_MSG("Signature Algorithms extension to write");
                 offset += SA_WRITE(extension->data, output + offset);
                 break;
-
+#endif
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            case TLSX_ENCRYPT_THEN_MAC:
+                WOLFSSL_MSG("Encrypt-Then-Mac extension to write");
+                ret = ETM_WRITE(extension->data, output, msgType, &offset);
+                break;
+#endif /* HAVE_ENCRYPT_THEN_MAC */
 #ifdef WOLFSSL_TLS13
             case TLSX_SUPPORTED_VERSIONS:
                 WOLFSSL_MSG("Supported Versions extension to write");
@@ -8525,27 +9487,29 @@
     #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
             case TLSX_PRE_SHARED_KEY:
                 WOLFSSL_MSG("Pre-Shared Key extension to write");
-                offset += PSK_WRITE((PreSharedKey*)extension->data,
-                                    output + offset, msgType);
+                ret = PSK_WRITE((PreSharedKey*)extension->data, output + offset,
+                                                              msgType, &offset);
                 break;
 
             case TLSX_PSK_KEY_EXCHANGE_MODES:
                 WOLFSSL_MSG("PSK Key Exchange Modes extension to write");
-                offset += PKM_WRITE(extension->val, output + offset, msgType);
+                ret = PKM_WRITE(extension->val, output + offset, msgType,
+                                                                       &offset);
                 break;
     #endif
 
     #ifdef WOLFSSL_EARLY_DATA
             case TLSX_EARLY_DATA:
                 WOLFSSL_MSG("Early Data extension to write");
-                offset += EDI_WRITE(extension->val, output + offset, msgType);
+                ret = EDI_WRITE(extension->val, output + offset, msgType,
+                                                                       &offset);
                 break;
     #endif
 
     #ifdef WOLFSSL_POST_HANDSHAKE_AUTH
             case TLSX_POST_HANDSHAKE_AUTH:
                 WOLFSSL_MSG("Post-Handshake Authentication extension to write");
-                offset += PHA_WRITE(output + offset, msgType);
+                ret = PHA_WRITE(output + offset, msgType, &offset);
                 break;
     #endif
 
@@ -8559,7 +9523,7 @@
             case TLSX_KEY_SHARE:
                 WOLFSSL_MSG("Key Share extension to write");
                 offset += KS_WRITE((KeyShareEntry*)extension->data,
-                                   output + offset, msgType);
+                                                      output + offset, msgType);
                 break;
 #endif
         }
@@ -8613,7 +9577,7 @@
 #ifdef HAVE_QSH
 static int TLSX_CreateQSHKey(WOLFSSL* ssl, int type)
 {
-    int ret;
+    int ret = -1;
 
     (void)ssl;
 
@@ -8627,7 +9591,7 @@
 #endif
         default:
             WOLFSSL_MSG("Unknown type for creating NTRU key");
-            return -1;
+            break;
     }
 
     return ret;
@@ -8770,11 +9734,12 @@
 
 #if (!defined(NO_WOLFSSL_SERVER) && defined(WOLFSSL_TLS13) && \
         !defined(WOLFSSL_NO_SERVER_GROUPS_EXT)) || \
-    (defined(WOLFSSL_TLS13) && !defined(HAVE_ECC) && \
-        !defined(HAVE_CURVE25519) && defined(HAVE_SUPPORTED_CURVES)) || \
-    ((defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-        defined(HAVE_SUPPORTED_CURVES))
-
+    (defined(WOLFSSL_TLS13) && !defined(HAVE_ECC) && !defined(HAVE_CURVE25519) \
+        && !defined(HAVE_CURVE448) && defined(HAVE_SUPPORTED_CURVES)) || \
+    ((defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+        defined(HAVE_CURVE448)) && defined(HAVE_SUPPORTED_CURVES))
+
+/* Populates the default supported groups / curves */
 static int TLSX_PopulateSupportedGroups(WOLFSSL* ssl, TLSX** extensions)
 {
     int ret = WOLFSSL_SUCCESS;
@@ -8799,7 +9764,98 @@
 #endif /* WOLFSSL_TLS13 */
 
 #if defined(HAVE_ECC) && defined(HAVE_SUPPORTED_CURVES)
+        /* list in order by strength, since not all servers choose by strength */
+        #if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES)
+            #ifndef NO_ECC_SECP
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP521R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif
+        #if defined(HAVE_ECC512) || defined(HAVE_ALL_CURVES)
+            #ifdef HAVE_ECC_BRAINPOOL
+                ret = TLSX_UseSupportedCurve(extensions,
+                                        WOLFSSL_ECC_BRAINPOOLP512R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif
+        #if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
+            #ifndef NO_ECC_SECP
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP384R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+            #ifdef HAVE_ECC_BRAINPOOL
+                ret = TLSX_UseSupportedCurve(extensions,
+                                        WOLFSSL_ECC_BRAINPOOLP384R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif
+#endif /* HAVE_ECC && HAVE_SUPPORTED_CURVES */
+
+        #ifndef HAVE_FIPS
+            #if defined(HAVE_CURVE448)
+                ret = TLSX_UseSupportedCurve(extensions,
+                                                   WOLFSSL_ECC_X448, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif /* HAVE_FIPS */
+
+#if defined(HAVE_ECC) && defined(HAVE_SUPPORTED_CURVES)
+        #if !defined(NO_ECC256)  || defined(HAVE_ALL_CURVES)
+            #ifndef NO_ECC_SECP
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP256R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+            #ifdef HAVE_ECC_KOBLITZ
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP256K1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+            #ifdef HAVE_ECC_BRAINPOOL
+                ret = TLSX_UseSupportedCurve(extensions,
+                                        WOLFSSL_ECC_BRAINPOOLP256R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif
+#endif /* HAVE_ECC && HAVE_SUPPORTED_CURVES */
+
+        #ifndef HAVE_FIPS
+            #if defined(HAVE_CURVE25519)
+                ret = TLSX_UseSupportedCurve(extensions,
+                                                 WOLFSSL_ECC_X25519, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif /* HAVE_FIPS */
+
+#if defined(HAVE_ECC) && defined(HAVE_SUPPORTED_CURVES)
+        #if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES)
+            #ifndef NO_ECC_SECP
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP224R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+            #ifdef HAVE_ECC_KOBLITZ
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP224K1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif
+
     #ifndef HAVE_FIPS
+        #if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES)
+            #ifndef NO_ECC_SECP
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP192R1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+            #ifdef HAVE_ECC_KOBLITZ
+                ret = TLSX_UseSupportedCurve(extensions,
+                                              WOLFSSL_ECC_SECP192K1, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS) return ret;
+            #endif
+        #endif
         #if defined(HAVE_ECC160) || defined(HAVE_ALL_CURVES)
             #ifndef NO_ECC_SECP
                 ret = TLSX_UseSupportedCurve(extensions,
@@ -8817,124 +9873,55 @@
                 if (ret != WOLFSSL_SUCCESS) return ret;
             #endif
         #endif
-        #if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES)
-            #ifndef NO_ECC_SECP
-                ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP192R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-            #ifdef HAVE_ECC_KOBLITZ
-                ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP192K1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-        #endif
-    #endif
-        #if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES)
-            #ifndef NO_ECC_SECP
+    #endif /* HAVE_FIPS */
+#endif /* HAVE_ECC && HAVE_SUPPORTED_CURVES */
+
+                /* Add FFDHE supported groups. */
+        #ifdef HAVE_FFDHE_8192
+            if (8192/8 >= ssl->options.minDhKeySz &&
+                                            8192/8 <= ssl->options.maxDhKeySz) {
                 ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP224R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-            #ifdef HAVE_ECC_KOBLITZ
-                ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP224K1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
+                                             WOLFSSL_FFDHE_8192, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS)
+                    return ret;
+            }
         #endif
-        #if !defined(NO_ECC256)  || defined(HAVE_ALL_CURVES)
-            #ifndef NO_ECC_SECP
-                ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP256R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-        #endif
-#endif /* HAVE_ECC && HAVE_SUPPORTED_CURVES */
-
-        #ifndef HAVE_FIPS
-            #if defined(HAVE_CURVE25519)
+        #ifdef HAVE_FFDHE_6144
+            if (6144/8 >= ssl->options.minDhKeySz &&
+                                            6144/8 <= ssl->options.maxDhKeySz) {
                 ret = TLSX_UseSupportedCurve(extensions,
-                                                 WOLFSSL_ECC_X25519, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-        #endif /* HAVE_FIPS */
-
-#if defined(HAVE_ECC) && defined(HAVE_SUPPORTED_CURVES)
-        #if !defined(NO_ECC256)  || defined(HAVE_ALL_CURVES)
-            #ifdef HAVE_ECC_KOBLITZ
-                ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP256K1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-            #ifdef HAVE_ECC_BRAINPOOL
-                ret = TLSX_UseSupportedCurve(extensions,
-                                        WOLFSSL_ECC_BRAINPOOLP256R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
+                                             WOLFSSL_FFDHE_6144, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS)
+                    return ret;
+            }
         #endif
-        #if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
-            #ifndef NO_ECC_SECP
+        #ifdef HAVE_FFDHE_4096
+            if (4096/8 >= ssl->options.minDhKeySz &&
+                                            4096/8 <= ssl->options.maxDhKeySz) {
                 ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP384R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-            #ifdef HAVE_ECC_BRAINPOOL
-                ret = TLSX_UseSupportedCurve(extensions,
-                                        WOLFSSL_ECC_BRAINPOOLP384R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
+                                             WOLFSSL_FFDHE_4096, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS)
+                    return ret;
+            }
         #endif
-        #if defined(HAVE_ECC512) || defined(HAVE_ALL_CURVES)
-            #ifdef HAVE_ECC_BRAINPOOL
+        #ifdef HAVE_FFDHE_3072
+            if (3072/8 >= ssl->options.minDhKeySz &&
+                                            3072/8 <= ssl->options.maxDhKeySz) {
                 ret = TLSX_UseSupportedCurve(extensions,
-                                        WOLFSSL_ECC_BRAINPOOLP512R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
+                                             WOLFSSL_FFDHE_3072, ssl->heap);
+                if (ret != WOLFSSL_SUCCESS)
+                    return ret;
+            }
         #endif
-        #if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES)
-            #ifndef NO_ECC_SECP
-                ret = TLSX_UseSupportedCurve(extensions,
-                                              WOLFSSL_ECC_SECP521R1, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS) return ret;
-            #endif
-        #endif
-#endif /* HAVE_ECC && HAVE_SUPPORTED_CURVES */
-
-    #ifdef WOLFSSL_TLS13
-        if (IsAtLeastTLSv1_3(ssl->version)) {
-                /* Add FFDHE supported groups. */
         #ifdef HAVE_FFDHE_2048
+            if (2048/8 >= ssl->options.minDhKeySz &&
+                                            2048/8 <= ssl->options.maxDhKeySz) {
                 ret = TLSX_UseSupportedCurve(extensions,
                                              WOLFSSL_FFDHE_2048, ssl->heap);
                 if (ret != WOLFSSL_SUCCESS)
                     return ret;
-        #endif
-        #ifdef HAVE_FFDHE_3072
-                ret = TLSX_UseSupportedCurve(extensions,
-                                             WOLFSSL_FFDHE_3072, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS)
-                    return ret;
-        #endif
-        #ifdef HAVE_FFDHE_4096
-                ret = TLSX_UseSupportedCurve(extensions,
-                                             WOLFSSL_FFDHE_4096, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS)
-                    return ret;
+            }
         #endif
-        #ifdef HAVE_FFDHE_6144
-                ret = TLSX_UseSupportedCurve(extensions,
-                                             WOLFSSL_FFDHE_6144, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS)
-                    return ret;
-        #endif
-        #ifdef HAVE_FFDHE_8192
-                ret = TLSX_UseSupportedCurve(extensions,
-                                             WOLFSSL_FFDHE_8192, ssl->heap);
-                if (ret != WOLFSSL_SUCCESS)
-                    return ret;
-        #endif
-        }
-    #endif /* WOLFSSL_TLS13 */
 
     (void)ssl;
     (void)extensions;
@@ -8961,7 +9948,7 @@
     WOLFSSL_MSG("Adding supported QSH Schemes");
 #endif
 
-    /* server will add extension depending on whats parsed from client */
+    /* server will add extension depending on what is parsed from client */
     if (!isServer) {
 #ifdef HAVE_QSH
         /* test if user has set a specific scheme already */
@@ -9035,45 +10022,44 @@
         }
 #endif
 
-#if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && \
-                                                  defined(HAVE_SUPPORTED_CURVES)
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+        if (!ssl->options.disallowEncThenMac) {
+            ret = TLSX_EncryptThenMac_Use(ssl);
+            if (ret != 0)
+                return ret;
+        }
+#endif
+
+#if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                       defined(HAVE_CURVE448)) && defined(HAVE_SUPPORTED_CURVES)
         if (!ssl->options.userCurves && !ssl->ctx->userCurves) {
             if (TLSX_Find(ssl->ctx->extensions,
                                                TLSX_SUPPORTED_GROUPS) == NULL) {
                 ret = TLSX_PopulateSupportedGroups(ssl, &ssl->extensions);
                 if (ret != WOLFSSL_SUCCESS)
                     return ret;
-                if (!IsAtLeastTLSv1_3(ssl->version) &&
-                        TLSX_Find(ssl->ctx->extensions,
-                                               TLSX_EC_POINT_FORMATS) == NULL &&
-                        TLSX_Find(ssl->extensions,
-                                               TLSX_EC_POINT_FORMATS) == NULL) {
-                    ret = TLSX_UsePointFormat(&ssl->extensions,
-                                                     WOLFSSL_EC_PF_UNCOMPRESSED,
-                                                     ssl->heap);
-                    if (ret != WOLFSSL_SUCCESS)
-                        return ret;
-                }
             }
-            else if (!IsAtLeastTLSv1_3(ssl->version) &&
-                     TLSX_Find(ssl->ctx->extensions,
-                                               TLSX_EC_POINT_FORMATS) == NULL) {
-                ret = TLSX_UsePointFormat(&ssl->ctx->extensions,
-                                                     WOLFSSL_EC_PF_UNCOMPRESSED,
-                                                     ssl->heap);
-                if (ret != WOLFSSL_SUCCESS)
-                    return ret;
-            }
-        }
-#endif /* (HAVE_ECC || HAVE_CURVE25519) && HAVE_SUPPORTED_CURVES */
+        }
+        if ((!IsAtLeastTLSv1_3(ssl->version) || ssl->options.downgrade) &&
+               TLSX_Find(ssl->ctx->extensions, TLSX_EC_POINT_FORMATS) == NULL &&
+               TLSX_Find(ssl->extensions, TLSX_EC_POINT_FORMATS) == NULL) {
+             ret = TLSX_UsePointFormat(&ssl->extensions,
+                                         WOLFSSL_EC_PF_UNCOMPRESSED, ssl->heap);
+             if (ret != WOLFSSL_SUCCESS)
+                 return ret;
+        }
+#endif /* (HAVE_ECC || CURVE25519 || CURVE448) && HAVE_SUPPORTED_CURVES */
     } /* is not server */
 
+#if !defined(WOLFSSL_NO_SIGALG)
     WOLFSSL_MSG("Adding signature algorithms extension");
     if ((ret = TLSX_SetSignatureAlgorithms(&ssl->extensions, ssl, ssl->heap))
                                                                          != 0) {
             return ret;
     }
-
+#else
+    ret = 0;
+#endif
     #ifdef WOLFSSL_TLS13
         if (!isServer && IsAtLeastTLSv1_3(ssl->version)) {
             /* Add mandatory TLS v1.3 extension: supported version */
@@ -9084,7 +10070,7 @@
             }
 
     #if !defined(HAVE_ECC) && !defined(HAVE_CURVE25519) && \
-                                                  defined(HAVE_SUPPORTED_CURVES)
+                       !defined(HAVE_CURVE448) && defined(HAVE_SUPPORTED_CURVES)
         if (TLSX_Find(ssl->ctx->extensions, TLSX_SUPPORTED_GROUPS) == NULL) {
             /* Put in DH groups for TLS 1.3 only. */
             ret = TLSX_PopulateSupportedGroups(ssl, &ssl->extensions);
@@ -9092,7 +10078,7 @@
                 return ret;
             ret = 0;
         }
-    #endif /* !HAVE_ECC && !HAVE_CURVE25519 && HAVE_SUPPORTED_CURVES */
+    #endif /* (HAVE_ECC || CURVE25519 || CURVE448) && HAVE_SUPPORTED_CURVES */
 
         #if !defined(WOLFSSL_TLS13_DRAFT_18) && !defined(WOLFSSL_TLS13_DRAFT_22)
             if (ssl->certHashSigAlgoSz > 0) {
@@ -9118,6 +10104,8 @@
                     namedGroup = WOLFSSL_ECC_SECP256R1;
         #elif defined(HAVE_CURVE25519)
                     namedGroup = WOLFSSL_ECC_X25519;
+        #elif defined(HAVE_CURVE448)
+                    namedGroup = WOLFSSL_ECC_X448;
         #elif defined(HAVE_ECC) && (!defined(NO_ECC384) || \
                               defined(HAVE_ALL_CURVES)) && !defined(NO_ECC_SECP)
                     namedGroup = WOLFSSL_ECC_SECP384R1;
@@ -9151,6 +10139,11 @@
                 WOLFSSL_SESSION* sess = &ssl->session;
                 word32           milli;
 
+                if (sess->ticketLen > MAX_PSK_ID_LEN) {
+                    WOLFSSL_MSG("Session ticket length for PSK ext is too large");
+                    return BUFFER_ERROR;
+                }
+
                 /* Determine the MAC algorithm for the cipher suite used. */
                 ssl->options.cipherSuite0 = sess->cipherSuite0;
                 ssl->options.cipherSuite  = sess->cipherSuite;
@@ -9172,14 +10165,28 @@
             }
         #endif
         #ifndef NO_PSK
-            if (ssl->options.client_psk_cb != NULL) {
+            if (ssl->options.client_psk_cb != NULL ||
+                                     ssl->options.client_psk_tls13_cb != NULL) {
                 /* Default ciphersuite. */
                 byte cipherSuite0 = TLS13_BYTE;
                 byte cipherSuite = WOLFSSL_DEF_PSK_CIPHER;
-
-                ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
+                const char* cipherName = NULL;
+
+                if (ssl->options.client_psk_tls13_cb != NULL) {
+                    ssl->arrays->psk_keySz = ssl->options.client_psk_tls13_cb(
+                        ssl, ssl->arrays->server_hint,
+                        ssl->arrays->client_identity, MAX_PSK_ID_LEN,
+                        ssl->arrays->psk_key, MAX_PSK_KEY_LEN, &cipherName);
+                    if (GetCipherSuiteFromName(cipherName, &cipherSuite0,
+                                                           &cipherSuite) != 0) {
+                        return PSK_KEY_ERROR;
+                    }
+                }
+                else {
+                    ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
                         ssl->arrays->server_hint, ssl->arrays->client_identity,
                         MAX_PSK_ID_LEN, ssl->arrays->psk_key, MAX_PSK_KEY_LEN);
+                }
                 if (ssl->arrays->psk_keySz == 0 ||
                                      ssl->arrays->psk_keySz > MAX_PSK_KEY_LEN) {
                     return PSK_KEY_ERROR;
@@ -9210,7 +10217,8 @@
 
                 /* Pre-shared key modes: mandatory extension for resumption. */
                 modes = 1 << PSK_KE;
-            #if !defined(NO_DH) || defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+            #if !defined(NO_DH) || defined(HAVE_ECC) || \
+                              defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
                 if (!ssl->options.noPskDheKe)
                     modes |= 1 << PSK_DHE_KE;
             #endif
@@ -9255,8 +10263,10 @@
         PF_VALIDATE_REQUEST(ssl, semaphore);
         QSH_VALIDATE_REQUEST(ssl, semaphore);
         WOLF_STK_VALIDATE_REQUEST(ssl);
+#if !defined(WOLFSSL_NO_SIGALG)
         if (ssl->suites->hashSigAlgoSz == 0)
             TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_SIGNATURE_ALGORITHMS));
+#endif
 #if defined(WOLFSSL_TLS13)
         if (!IsAtLeastTLSv1_2(ssl))
             TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_SUPPORTED_VERSIONS));
@@ -9274,18 +10284,6 @@
             TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_POST_HANDSHAKE_AUTH));
     #endif
         }
-    #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
-        if (IsAtLeastTLSv1_3(ssl->version) && ssl->options.noPskDheKe) {
-        #if !defined(NO_PSK)
-            if (ssl->options.havePSK)
-                TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_KEY_SHARE));
-        #endif
-        #if defined(HAVE_SESSION_TICKET)
-            if (ssl->options.resuming)
-                TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_KEY_SHARE));
-        #endif
-        }
-    #endif
 #endif
     #if defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
      || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
@@ -9296,11 +10294,14 @@
         }
     #endif
     }
+
 #ifdef WOLFSSL_TLS13
     #ifndef NO_CERTS
     else if (msgType == certificate_request) {
         XMEMSET(semaphore, 0xff, SEMAPHORE_SIZE);
+#if !defined(WOLFSSL_NO_SIGALG)
         TURN_OFF(semaphore, TLSX_ToSemaphore(TLSX_SIGNATURE_ALGORITHMS));
+#endif
         /* TODO: TLSX_SIGNED_CERTIFICATE_TIMESTAMP,
          *       TLSX_CERTIFICATE_AUTHORITIES, OID_FILTERS
          *       TLSX_STATUS_REQUEST
@@ -9308,15 +10309,20 @@
     }
     #endif
 #endif
-
-    if (ssl->extensions)
+    if (ssl->extensions) {
         ret = TLSX_GetSize(ssl->extensions, semaphore, msgType, &length);
-    if (ssl->ctx && ssl->ctx->extensions)
+        if (ret != 0)
+            return ret;
+    }
+    if (ssl->ctx && ssl->ctx->extensions) {
         ret = TLSX_GetSize(ssl->ctx->extensions, semaphore, msgType, &length);
+        if (ret != 0)
+            return ret;
+    }
 
 #ifdef HAVE_EXTENDED_MASTER
     if (msgType == client_hello && ssl->options.haveEMS &&
-                                              !IsAtLeastTLSv1_3(ssl->version)) {
+                  (!IsAtLeastTLSv1_3(ssl->version) || ssl->options.downgrade)) {
         length += HELLO_EXT_SZ;
     }
 #endif
@@ -9346,8 +10352,10 @@
         PF_VALIDATE_REQUEST(ssl, semaphore);
         WOLF_STK_VALIDATE_REQUEST(ssl);
         QSH_VALIDATE_REQUEST(ssl, semaphore);
+#if !defined(WOLFSSL_NO_SIGALG)
         if (ssl->suites->hashSigAlgoSz == 0)
             TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_SIGNATURE_ALGORITHMS));
+#endif
 #ifdef WOLFSSL_TLS13
         if (!IsAtLeastTLSv1_2(ssl))
             TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_SUPPORTED_VERSIONS));
@@ -9365,18 +10373,6 @@
     #endif
         }
     #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
-        if (IsAtLeastTLSv1_3(ssl->version) && ssl->options.noPskDheKe) {
-        #if !defined(NO_PSK)
-            if (ssl->options.havePSK)
-                TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_KEY_SHARE));
-        #endif
-        #if defined(HAVE_SESSION_TICKET)
-            if (ssl->options.resuming)
-                TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_KEY_SHARE));
-        #endif
-        }
-    #endif
-    #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
         /* Must write Pre-shared Key extension at the end in TLS v1.3.
          * Must not write out Pre-shared Key extension in earlier versions of
          * protocol.
@@ -9394,10 +10390,12 @@
     #endif
     }
 #ifdef WOLFSSL_TLS13
-    #ifndef NO_CERT
+    #ifndef NO_CERTS
     else if (msgType == certificate_request) {
         XMEMSET(semaphore, 0xff, SEMAPHORE_SIZE);
+#if !defined(WOLFSSL_NO_SIGALG)
         TURN_OFF(semaphore, TLSX_ToSemaphore(TLSX_SIGNATURE_ALGORITHMS));
+#endif
         /* TODO: TLSX_SIGNED_CERTIFICATE_TIMESTAMP,
          *       TLSX_CERTIFICATE_AUTHORITIES, TLSX_OID_FILTERS
          *       TLSX_STATUS_REQUEST
@@ -9405,19 +10403,23 @@
     }
     #endif
 #endif
-
     if (ssl->extensions) {
         ret = TLSX_Write(ssl->extensions, output + offset, semaphore,
                          msgType, &offset);
+        if (ret != 0)
+            return ret;
     }
     if (ssl->ctx && ssl->ctx->extensions) {
         ret = TLSX_Write(ssl->ctx->extensions, output + offset, semaphore,
                          msgType, &offset);
+        if (ret != 0)
+            return ret;
     }
 
 #ifdef HAVE_EXTENDED_MASTER
     if (msgType == client_hello && ssl->options.haveEMS &&
-                                              !IsAtLeastTLSv1_3(ssl->version)) {
+                  (!IsAtLeastTLSv1_3(ssl->version) || ssl->options.downgrade)) {
+        WOLFSSL_MSG("EMS extension to write");
         c16toa(HELLO_EXT_EXTMS, output + offset);
         offset += HELLO_EXT_TYPE_SZ;
         c16toa(0, output + offset);
@@ -9432,6 +10434,8 @@
         TURN_OFF(semaphore, TLSX_ToSemaphore(TLSX_PRE_SHARED_KEY));
         ret = TLSX_Write(ssl->extensions, output + offset, semaphore,
                          client_hello, &offset);
+        if (ret != 0)
+            return ret;
     }
     #endif
 #endif
@@ -9460,7 +10464,7 @@
         case server_hello:
             PF_VALIDATE_RESPONSE(ssl, semaphore);
     #ifdef WOLFSSL_TLS13
-                if (ssl->options.tls1_3) {
+                if (IsAtLeastTLSv1_3(ssl->version)) {
                     XMEMSET(semaphore, 0xff, SEMAPHORE_SIZE);
         #ifndef WOLFSSL_TLS13_DRAFT_18
                     TURN_OFF(semaphore,
@@ -9505,6 +10509,9 @@
         #ifdef HAVE_CERTIFICATE_STATUS_REQUEST
             TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_STATUS_REQUEST));
         #endif
+        #if defined(HAVE_SECURE_RENEGOTIATION)
+            TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_RENEGOTIATION_INFO));
+        #endif
             break;
 
         #ifdef WOLFSSL_EARLY_DATA
@@ -9519,7 +10526,7 @@
 #endif
 
 #ifdef WOLFSSL_TLS13
-    #ifndef NO_CERT
+    #ifndef NO_CERTS
         case certificate:
             XMEMSET(semaphore, 0xff, SEMAPHORE_SIZE);
             TURN_OFF(semaphore, TLSX_ToSemaphore(TLSX_STATUS_REQUEST));
@@ -9541,12 +10548,17 @@
     #endif
 
 #ifdef HAVE_EXTENDED_MASTER
-    if (ssl->options.haveEMS && msgType == server_hello)
+    if (ssl->options.haveEMS && msgType == server_hello &&
+                                              !IsAtLeastTLSv1_3(ssl->version)) {
         length += HELLO_EXT_SZ;
-#endif
-
-    if (TLSX_SupportExtensions(ssl))
+    }
+#endif
+
+    if (TLSX_SupportExtensions(ssl)) {
         ret = TLSX_GetSize(ssl->extensions, semaphore, msgType, &length);
+        if (ret != 0)
+            return ret;
+    }
 
     /* All the response data is set at the ssl object only, so no ctx here. */
 
@@ -9572,7 +10584,7 @@
             case server_hello:
                 PF_VALIDATE_RESPONSE(ssl, semaphore);
     #ifdef WOLFSSL_TLS13
-                if (ssl->options.tls1_3) {
+                if (IsAtLeastTLSv1_3(ssl->version)) {
                     XMEMSET(semaphore, 0xff, SEMAPHORE_SIZE);
         #ifndef WOLFSSL_TLS13_DRAFT_18
                     TURN_OFF(semaphore,
@@ -9617,6 +10629,9 @@
         #ifdef HAVE_CERTIFICATE_STATUS_REQUEST
                 TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_STATUS_REQUEST));
         #endif
+        #if defined(HAVE_SECURE_RENEGOTIATION)
+            TURN_ON(semaphore, TLSX_ToSemaphore(TLSX_RENEGOTIATION_INFO));
+        #endif
                 break;
 
         #ifdef WOLFSSL_EARLY_DATA
@@ -9647,6 +10662,8 @@
 
         ret = TLSX_Write(ssl->extensions, output + offset, semaphore,
                          msgType, &offset);
+        if (ret != 0)
+            return ret;
 
 #ifdef WOLFSSL_TLS13
         if (msgType == hello_retry_request) {
@@ -9654,11 +10671,15 @@
             TURN_OFF(semaphore, TLSX_ToSemaphore(TLSX_COOKIE));
             ret = TLSX_Write(ssl->extensions, output + offset, semaphore,
                              msgType, &offset);
+            if (ret != 0)
+                return ret;
         }
 #endif
 
 #ifdef HAVE_EXTENDED_MASTER
-        if (ssl->options.haveEMS && msgType == server_hello) {
+        if (ssl->options.haveEMS && msgType == server_hello &&
+                                              !IsAtLeastTLSv1_3(ssl->version)) {
+            WOLFSSL_MSG("EMS extension to write");
             c16toa(HELLO_EXT_EXTMS, output + offset);
             offset += HELLO_EXT_TYPE_SZ;
             c16toa(0, output + offset);
@@ -9678,6 +10699,50 @@
 
 #endif /* WOLFSSL_TLS13 || !NO_WOLFSSL_SERVER */
 
+#ifdef WOLFSSL_TLS13
+int TLSX_ParseVersion(WOLFSSL* ssl, byte* input, word16 length, byte msgType,
+                      int* found)
+{
+    int ret = 0;
+    int offset = 0;
+
+    *found = 0;
+    while (offset < (int)length) {
+        word16 type;
+        word16 size;
+
+        if (offset + (2 * OPAQUE16_LEN) > length) {
+            ret = BUFFER_ERROR;
+            break;
+        }
+
+        ato16(input + offset, &type);
+        offset += HELLO_EXT_TYPE_SZ;
+
+        ato16(input + offset, &size);
+        offset += OPAQUE16_LEN;
+
+        if (offset + size > length) {
+            ret = BUFFER_ERROR;
+            break;
+        }
+
+        if (type == TLSX_SUPPORTED_VERSIONS) {
+            *found = 1;
+
+            WOLFSSL_MSG("Supported Versions extension received");
+
+            ret = SV_PARSE(ssl, input + offset, size, msgType);
+            break;
+        }
+
+        offset += size;
+    }
+
+    return ret;
+}
+#endif
+
 /** Parses a buffer of TLS extensions. */
 int TLSX_Parse(WOLFSSL* ssl, byte* input, word16 length, byte msgType,
                                                                  Suites *suites)
@@ -9721,6 +10786,30 @@
         switch (type) {
             case TLSX_SERVER_NAME:
                 WOLFSSL_MSG("SNI extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
+
+#ifdef WOLFSSL_TLS13
+                if (IsAtLeastTLSv1_3(ssl->version) &&
+                        msgType != client_hello &&
+                        msgType != server_hello &&
+                        msgType != encrypted_extensions) {
+                    return EXT_NOT_ALLOWED;
+                }
+                else if (!IsAtLeastTLSv1_3(ssl->version) &&
+                         msgType == encrypted_extensions) {
+                    return EXT_NOT_ALLOWED;
+                }
+#endif
+                ret = SNI_PARSE(ssl, input + offset, size, isRequest);
+                break;
+
+            case TLSX_TRUSTED_CA_KEYS:
+                WOLFSSL_MSG("Trusted CA extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
@@ -9729,11 +10818,14 @@
                     return EXT_NOT_ALLOWED;
                 }
 #endif
-                ret = SNI_PARSE(ssl, input + offset, size, isRequest);
+                ret = TCA_PARSE(ssl, input + offset, size, isRequest);
                 break;
 
             case TLSX_MAX_FRAGMENT_LENGTH:
                 WOLFSSL_MSG("Max Fragment Length extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
@@ -9741,15 +10833,22 @@
                         msgType != encrypted_extensions) {
                     return EXT_NOT_ALLOWED;
                 }
+                else if (!IsAtLeastTLSv1_3(ssl->version) &&
+                         msgType == encrypted_extensions) {
+                    return EXT_NOT_ALLOWED;
+                }
 #endif
                 ret = MFL_PARSE(ssl, input + offset, size, isRequest);
                 break;
 
             case TLSX_TRUNCATED_HMAC:
                 WOLFSSL_MSG("Truncated HMAC extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
-                if (IsAtLeastTLSv1_3(ssl->version) && !ssl->options.downgrade)
+                if (IsAtLeastTLSv1_3(ssl->version))
                     break;
 #endif
                 ret = THM_PARSE(ssl, input + offset, size, isRequest);
@@ -9757,22 +10856,33 @@
 
             case TLSX_SUPPORTED_GROUPS:
                 WOLFSSL_MSG("Supported Groups extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
                         msgType != client_hello &&
+                        msgType != server_hello &&
                         msgType != encrypted_extensions) {
                     return EXT_NOT_ALLOWED;
                 }
+                else if (!IsAtLeastTLSv1_3(ssl->version) &&
+                         msgType == encrypted_extensions) {
+                    return EXT_NOT_ALLOWED;
+                }
 #endif
                 ret = EC_PARSE(ssl, input + offset, size, isRequest);
                 break;
 
             case TLSX_EC_POINT_FORMATS:
                 WOLFSSL_MSG("Point Formats extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
-                if (IsAtLeastTLSv1_3(ssl->version) && !ssl->options.downgrade)
+                if (IsAtLeastTLSv1_3(ssl->version))
                     break;
 #endif
                 ret = PF_PARSE(ssl, input + offset, size, isRequest);
@@ -9780,16 +10890,26 @@
 
             case TLSX_STATUS_REQUEST:
                 WOLFSSL_MSG("Certificate Status Request extension received");
-
-#ifdef WOLFSSL_TLS13
-                if (IsAtLeastTLSv1_3(ssl->version) && !ssl->options.downgrade)
-                    break;
-#endif
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
+
+ #ifdef WOLFSSL_TLS13
+                if (IsAtLeastTLSv1_3(ssl->version) &&
+                        msgType != client_hello &&
+                        msgType != certificate_request &&
+                        msgType != certificate) {
+                     break;
+                }
+ #endif
                 ret = CSR_PARSE(ssl, input + offset, size, isRequest);
                 break;
 
             case TLSX_STATUS_REQUEST_V2:
                 WOLFSSL_MSG("Certificate Status Request v2 extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
@@ -9805,11 +10925,17 @@
 #ifdef HAVE_EXTENDED_MASTER
             case HELLO_EXT_EXTMS:
                 WOLFSSL_MSG("Extended Master Secret extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
-                if (IsAtLeastTLSv1_3(ssl->version) && !ssl->options.downgrade)
+                if (IsAtLeastTLSv1_3(ssl->version))
                     break;
 #endif
+                if (size != 0)
+                    return BUFFER_ERROR;
+
 #ifndef NO_WOLFSSL_SERVER
                 if (isRequest)
                     ssl->options.haveEMS = 1;
@@ -9820,9 +10946,12 @@
 
             case TLSX_RENEGOTIATION_INFO:
                 WOLFSSL_MSG("Secure Renegotiation extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
-                if (IsAtLeastTLSv1_3(ssl->version) && !ssl->options.downgrade)
+                if (IsAtLeastTLSv1_3(ssl->version))
                     break;
 #endif
                 ret = SCR_PARSE(ssl, input + offset, size, isRequest);
@@ -9830,6 +10959,9 @@
 
             case TLSX_SESSION_TICKET:
                 WOLFSSL_MSG("Session Ticket extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
@@ -9842,9 +10974,12 @@
 
             case TLSX_QUANTUM_SAFE_HYBRID:
                 WOLFSSL_MSG("Quantum-Safe-Hybrid extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
 #ifdef WOLFSSL_TLS13
-                if (IsAtLeastTLSv1_3(ssl->version) && !ssl->options.downgrade)
+                if (IsAtLeastTLSv1_3(ssl->version))
                     break;
 #endif
                 ret = QSH_PARSE(ssl, input + offset, size, isRequest);
@@ -9853,22 +10988,33 @@
             case TLSX_APPLICATION_LAYER_PROTOCOL:
                 WOLFSSL_MSG("ALPN extension received");
 
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
+
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
                         msgType != client_hello &&
+                        msgType != server_hello &&
                         msgType != encrypted_extensions) {
                     return EXT_NOT_ALLOWED;
                 }
+                else if (!IsAtLeastTLSv1_3(ssl->version) &&
+                         msgType == encrypted_extensions) {
+                    return EXT_NOT_ALLOWED;
+                }
 #endif
                 ret = ALPN_PARSE(ssl, input + offset, size, isRequest);
                 break;
-
+#if !defined(WOLFSSL_NO_SIGALG)
             case TLSX_SIGNATURE_ALGORITHMS:
                 WOLFSSL_MSG("Signature Algorithms extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
                 if (!IsAtLeastTLSv1_2(ssl))
                     break;
-
 #ifdef WOLFSSL_TLS13
                 if (IsAtLeastTLSv1_3(ssl->version) &&
                         msgType != client_hello &&
@@ -9878,68 +11024,75 @@
 #endif
                 ret = SA_PARSE(ssl, input + offset, size, isRequest, suites);
                 break;
+#endif
+
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+            case TLSX_ENCRYPT_THEN_MAC:
+                WOLFSSL_MSG("Encrypt-Then-Mac extension received");
+
+                /* Ignore for TLS 1.3+ */
+                if (IsAtLeastTLSv1_3(ssl->version))
+                    break;
+
+                ret = ETM_PARSE(ssl, input + offset, size, msgType);
+                break;
+#endif /* HAVE_ENCRYPT_THEN_MAC */
 
 #ifdef WOLFSSL_TLS13
             case TLSX_SUPPORTED_VERSIONS:
-                WOLFSSL_MSG("Supported Versions extension received");
-
-                if (!IsAtLeastTLSv1_3(ssl->ctx->method->version))
-                    break;
-
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-    #ifdef WOLFSSL_TLS13_DRAFT_18
-                        msgType != client_hello
-    #else
-                        msgType != client_hello &&
-                        msgType != server_hello &&
-                        msgType != hello_retry_request
-    #endif
-                   ) {
-                    return EXT_NOT_ALLOWED;
-                }
-                ret = SV_PARSE(ssl, input + offset, size, msgType);
+                WOLFSSL_MSG("Skipping Supported Versions - already processed");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
+
                 break;
 
             case TLSX_COOKIE:
                 WOLFSSL_MSG("Cookie extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
                 if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-                        msgType != client_hello &&
+                if (msgType != client_hello &&
                         msgType != hello_retry_request) {
                     return EXT_NOT_ALLOWED;
                 }
+
                 ret = CKE_PARSE(ssl, input + offset, size, msgType);
                 break;
 
     #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
             case TLSX_PRE_SHARED_KEY:
                 WOLFSSL_MSG("Pre-Shared Key extension received");
-
-                if (!IsAtLeastTLSv1_3(ssl->ctx->method->version))
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
+
+                if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-                        msgType != client_hello &&
-                        msgType != server_hello) {
+                if (msgType != client_hello && msgType != server_hello)
                     return EXT_NOT_ALLOWED;
-                }
+
                 ret = PSK_PARSE(ssl, input + offset, size, msgType);
                 pskDone = 1;
                 break;
 
             case TLSX_PSK_KEY_EXCHANGE_MODES:
                 WOLFSSL_MSG("PSK Key Exchange Modes extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
                 if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-                        msgType != client_hello) {
+                if (msgType != client_hello)
                     return EXT_NOT_ALLOWED;
-                }
+
                 ret = PKM_PARSE(ssl, input + offset, size, msgType);
                 break;
     #endif
@@ -9947,14 +11100,20 @@
     #ifdef WOLFSSL_EARLY_DATA
             case TLSX_EARLY_DATA:
                 WOLFSSL_MSG("Early Data extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
                 if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-                         msgType != client_hello &&
-                         msgType != session_ticket &&
-                         msgType != encrypted_extensions) {
+                if (msgType != client_hello && msgType != session_ticket &&
+                        msgType != encrypted_extensions) {
+                    return EXT_NOT_ALLOWED;
+                }
+                if (!IsAtLeastTLSv1_3(ssl->version) &&
+                        (msgType == session_ticket ||
+                         msgType == encrypted_extensions)) {
                     return EXT_NOT_ALLOWED;
                 }
                 ret = EDI_PARSE(ssl, input + offset, size, msgType);
@@ -9964,14 +11123,16 @@
     #ifdef WOLFSSL_POST_HANDSHAKE_AUTH
             case TLSX_POST_HANDSHAKE_AUTH:
                 WOLFSSL_MSG("Post Handshake Authentication extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
                 if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-                        msgType != client_hello) {
+                if (msgType != client_hello)
                     return EXT_NOT_ALLOWED;
-                }
+
                 ret = PHA_PARSE(ssl, input + offset, size, msgType);
                 break;
     #endif
@@ -9979,15 +11140,21 @@
     #if !defined(WOLFSSL_TLS13_DRAFT_18) && !defined(WOLFSSL_TLS13_DRAFT_22)
             case TLSX_SIGNATURE_ALGORITHMS_CERT:
                 WOLFSSL_MSG("Signature Algorithms extension received");
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
 
                 if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->version) &&
-                        msgType != client_hello &&
+                if (msgType != client_hello &&
                         msgType != certificate_request) {
                     return EXT_NOT_ALLOWED;
                 }
+                if (!IsAtLeastTLSv1_3(ssl->version) &&
+                        msgType == certificate_request) {
+                    return EXT_NOT_ALLOWED;
+                }
 
                 ret = SAC_PARSE(ssl, input + offset, size, isRequest);
                 break;
@@ -9995,19 +11162,22 @@
 
             case TLSX_KEY_SHARE:
                 WOLFSSL_MSG("Key Share extension received");
-
-                if (!IsAtLeastTLSv1_3(ssl->ctx->method->version))
+            #ifdef WOLFSSL_DEBUG_TLS
+                WOLFSSL_BUFFER(input + offset, size);
+            #endif
+
+                if (!IsAtLeastTLSv1_3(ssl->version))
                     break;
 
-                if (IsAtLeastTLSv1_3(ssl->ctx->method->version) &&
-                        msgType != client_hello &&
-                        msgType != server_hello &&
+                if (msgType != client_hello && msgType != server_hello &&
                         msgType != hello_retry_request) {
                     return EXT_NOT_ALLOWED;
                 }
                 ret = KS_PARSE(ssl, input + offset, size, msgType);
                 break;
 #endif
+            default:
+                WOLFSSL_MSG("Unknown TLS extension type");
         }
 
         /* offset should be updated here! */
@@ -10021,6 +11191,8 @@
 
     if (ret == 0)
         ret = SNI_VERIFY_PARSE(ssl, isRequest);
+    if (ret == 0)
+        ret = TCA_VERIFY_PARSE(ssl, isRequest);
 
     return ret;
 }
@@ -10034,41 +11206,49 @@
 
 #ifndef NO_WOLFSSL_CLIENT
 
+    WOLFSSL_METHOD* wolfTLS_client_method(void)
+    {
+        return wolfTLS_client_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfTLS_client_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
+                              (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
+                                                     heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
+        WOLFSSL_ENTER("TLS_client_method_ex");
+        if (method) {
+        #if defined(WOLFSSL_TLS13)
+            InitSSL_Method(method, MakeTLSv1_3());
+        #elif !defined(WOLFSSL_NO_TLS12)
+            InitSSL_Method(method, MakeTLSv1_2());
+        #elif !defined(NO_OLD_TLS)
+            InitSSL_Method(method, MakeTLSv1_1());
+        #elif defined(WOLFSSL_ALLOW_TLSV10)
+            InitSSL_Method(method, MakeTLSv1());
+        #else
+            #error No TLS version enabled!
+        #endif
+
+            method->downgrade = 1;
+            method->side      = WOLFSSL_CLIENT_END;
+        }
+        return method;
+    }
+
 #ifndef NO_OLD_TLS
-
     #ifdef WOLFSSL_ALLOW_TLSV10
-    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
-    /* Gets a WOLFSL_METHOD type that is not set as client or server
-     *
-     * Returns a pointer to a WOLFSSL_METHOD struct
-     */
-    WOLFSSL_METHOD* wolfTLSv1_method(void) {
-        WOLFSSL_METHOD* m;
-        WOLFSSL_ENTER("wolfTLSv1_method");
-    #ifndef NO_WOLFSSL_CLIENT
-        m = wolfTLSv1_client_method();
-    #else
-        m = wolfTLSv1_server_method();
-    #endif
-        if (m != NULL) {
-            m->side = WOLFSSL_NEITHER_END;
-        }
-
-        return m;
-    }
-    #endif /* OPENSSL_EXTRA || OPENSSL_ALL*/
-
     WOLFSSL_METHOD* wolfTLSv1_client_method(void)
     {
         return wolfTLSv1_client_method_ex(NULL);
     }
-
     WOLFSSL_METHOD* wolfTLSv1_client_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                              (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_client_method_ex");
         if (method)
             InitSSL_Method(method, MakeTLSv1());
         return method;
@@ -10079,38 +11259,36 @@
     {
         return wolfTLSv1_1_client_method_ex(NULL);
     }
-
     WOLFSSL_METHOD* wolfTLSv1_1_client_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_1_client_method_ex");
         if (method)
             InitSSL_Method(method, MakeTLSv1_1());
         return method;
     }
-
 #endif /* !NO_OLD_TLS */
 
 #ifndef WOLFSSL_NO_TLS12
-
+    WOLFSSL_ABI
     WOLFSSL_METHOD* wolfTLSv1_2_client_method(void)
     {
         return wolfTLSv1_2_client_method_ex(NULL);
     }
-
     WOLFSSL_METHOD* wolfTLSv1_2_client_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_2_client_method_ex");
         if (method)
             InitSSL_Method(method, MakeTLSv1_2());
         return method;
     }
-
 #endif /* WOLFSSL_NO_TLS12 */
 
 #ifdef WOLFSSL_TLS13
@@ -10118,6 +11296,7 @@
      *
      * returns the method data for a TLS v1.3 client.
      */
+    WOLFSSL_ABI
     WOLFSSL_METHOD* wolfTLSv1_3_client_method(void)
     {
         return wolfTLSv1_3_client_method_ex(NULL);
@@ -10134,63 +11313,295 @@
                                  XMALLOC(sizeof(WOLFSSL_METHOD), heap,
                                          DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_3_client_method_ex");
         if (method)
             InitSSL_Method(method, MakeTLSv1_3());
         return method;
     }
 #endif /* WOLFSSL_TLS13 */
 
-
-    WOLFSSL_METHOD* wolfSSLv23_client_method(void)
+#ifdef WOLFSSL_DTLS
+
+    WOLFSSL_METHOD* wolfDTLS_client_method(void)
     {
-        return wolfSSLv23_client_method_ex(NULL);
-    }
-
-
-    WOLFSSL_METHOD* wolfSSLv23_client_method_ex(void* heap)
+        return wolfDTLS_client_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLS_client_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("DTLS_client_method_ex");
         if (method) {
-#if !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)
-#if defined(WOLFSSL_TLS13)
-            InitSSL_Method(method, MakeTLSv1_3());
-#else
-            InitSSL_Method(method, MakeTLSv1_2());
-#endif
-#else
-    #ifndef NO_OLD_TLS
-            InitSSL_Method(method, MakeTLSv1_1());
-    #endif
-#endif
-#if !defined(NO_OLD_TLS) || defined(WOLFSSL_TLS13)
+        #if !defined(WOLFSSL_NO_TLS12)
+            InitSSL_Method(method, MakeDTLSv1_2());
+        #elif !defined(NO_OLD_TLS)
+            InitSSL_Method(method, MakeDTLSv1());
+        #else
+            #error No DTLS version enabled!
+        #endif
+
             method->downgrade = 1;
-#endif
+            method->side      = WOLFSSL_CLIENT_END;
         }
         return method;
     }
 
+    #ifndef NO_OLD_TLS
+    WOLFSSL_METHOD* wolfDTLSv1_client_method(void)
+    {
+        return wolfDTLSv1_client_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLSv1_client_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
+                          (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
+                                                 heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
+        WOLFSSL_ENTER("DTLSv1_client_method_ex");
+        if (method)
+            InitSSL_Method(method, MakeDTLSv1());
+        return method;
+    }
+    #endif  /* NO_OLD_TLS */
+
+    #ifndef WOLFSSL_NO_TLS12
+    WOLFSSL_METHOD* wolfDTLSv1_2_client_method(void)
+    {
+        return wolfDTLSv1_2_client_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLSv1_2_client_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
+                          (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
+                                                 heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
+        WOLFSSL_ENTER("DTLSv1_2_client_method_ex");
+        if (method)
+            InitSSL_Method(method, MakeDTLSv1_2());
+        (void)heap;
+        return method;
+    }
+    #endif /* !WOLFSSL_NO_TLS12 */
+#endif /* WOLFSSL_DTLS */
+
 #endif /* NO_WOLFSSL_CLIENT */
 
 
+/* EITHER SIDE METHODS */
+#if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EITHER_SIDE)
+    #ifndef NO_OLD_TLS
+    #ifdef WOLFSSL_ALLOW_TLSV10
+    /* Gets a WOLFSL_METHOD type that is not set as client or server
+     *
+     * Returns a pointer to a WOLFSSL_METHOD struct
+     */
+    WOLFSSL_METHOD* wolfTLSv1_method(void)
+    {
+        return wolfTLSv1_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfTLSv1_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("TLSv1_method");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfTLSv1_client_method_ex(heap);
+    #else
+        m = wolfTLSv1_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+
+        return m;
+    }
+    #endif /* WOLFSSL_ALLOW_TLSV10 */
+
+    /* Gets a WOLFSL_METHOD type that is not set as client or server
+     *
+     * Returns a pointer to a WOLFSSL_METHOD struct
+     */
+    WOLFSSL_METHOD* wolfTLSv1_1_method(void)
+    {
+        return wolfTLSv1_1_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfTLSv1_1_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("TLSv1_1_method");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfTLSv1_1_client_method_ex(heap);
+    #else
+        m = wolfTLSv1_1_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+        return m;
+    }
+    #endif /* !NO_OLD_TLS */
+
+    #ifndef WOLFSSL_NO_TLS12
+    /* Gets a WOLFSL_METHOD type that is not set as client or server
+     *
+     * Returns a pointer to a WOLFSSL_METHOD struct
+     */
+    WOLFSSL_METHOD* wolfTLSv1_2_method(void)
+    {
+        return wolfTLSv1_2_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfTLSv1_2_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("TLSv1_2_method");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfTLSv1_2_client_method_ex(heap);
+    #else
+        m = wolfTLSv1_2_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+        return m;
+    }
+    #endif /* !WOLFSSL_NO_TLS12 */
+
+    #ifdef WOLFSSL_TLS13
+    /* Gets a WOLFSL_METHOD type that is not set as client or server
+     *
+     * Returns a pointer to a WOLFSSL_METHOD struct
+     */
+    WOLFSSL_METHOD* wolfTLSv1_3_method(void)
+    {
+        return wolfTLSv1_3_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfTLSv1_3_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("TLSv1_3_method");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfTLSv1_3_client_method_ex(heap);
+    #else
+        m = wolfTLSv1_3_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+        return m;
+    }
+    #endif /* WOLFSSL_TLS13 */
+
+#ifdef WOLFSSL_DTLS
+    WOLFSSL_METHOD* wolfDTLS_method(void)
+    {
+        return wolfDTLS_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLS_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("DTLS_method_ex");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfDTLS_client_method_ex(heap);
+    #else
+        m = wolfDTLS_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+        return m;
+    }
+
+    #ifndef NO_OLD_TLS
+    WOLFSSL_METHOD* wolfDTLSv1_method(void)
+    {
+        return wolfDTLSv1_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLSv1_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("DTLSv1_method_ex");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfDTLSv1_client_method_ex(heap);
+    #else
+        m = wolfDTLSv1_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+        return m;
+    }
+    #endif /* !NO_OLD_TLS */
+    #ifndef WOLFSSL_NO_TLS12
+    WOLFSSL_METHOD* wolfDTLSv1_2_method(void)
+    {
+        return wolfDTLSv1_2_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLSv1_2_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* m;
+        WOLFSSL_ENTER("DTLSv1_2_method");
+    #ifndef NO_WOLFSSL_CLIENT
+        m = wolfDTLSv1_2_client_method_ex(heap);
+    #else
+        m = wolfDTLSv1_2_server_method_ex(heap);
+    #endif
+        if (m != NULL) {
+            m->side = WOLFSSL_NEITHER_END;
+        }
+        return m;
+    }
+    #endif /* !WOLFSSL_NO_TLS12 */
+#endif /* WOLFSSL_DTLS */
+#endif /* OPENSSL_EXTRA || WOLFSSL_EITHER_SIDE */
+
 
 #ifndef NO_WOLFSSL_SERVER
 
+    WOLFSSL_METHOD* wolfTLS_server_method(void)
+    {
+        return wolfTLS_server_method_ex(NULL);
+    }
+
+    WOLFSSL_METHOD* wolfTLS_server_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
+                              (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
+                                                     heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
+        WOLFSSL_ENTER("TLS_server_method_ex");
+        if (method) {
+        #if defined(WOLFSSL_TLS13)
+            InitSSL_Method(method, MakeTLSv1_3());
+        #elif !defined(WOLFSSL_NO_TLS12)
+            InitSSL_Method(method, MakeTLSv1_2());
+        #elif !defined(NO_OLD_TLS)
+            InitSSL_Method(method, MakeTLSv1_1());
+        #elif defined(WOLFSSL_ALLOW_TLSV10)
+            InitSSL_Method(method, MakeTLSv1());
+        #else
+            #error No TLS version enabled!
+        #endif
+
+            method->downgrade = 1;
+            method->side      = WOLFSSL_SERVER_END;
+        }
+        return method;
+    }
+
 #ifndef NO_OLD_TLS
     #ifdef WOLFSSL_ALLOW_TLSV10
     WOLFSSL_METHOD* wolfTLSv1_server_method(void)
     {
         return wolfTLSv1_server_method_ex(NULL);
     }
-
     WOLFSSL_METHOD* wolfTLSv1_server_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_server_method_ex");
         if (method) {
             InitSSL_Method(method, MakeTLSv1());
             method->side = WOLFSSL_SERVER_END;
@@ -10198,38 +11609,18 @@
         return method;
     }
     #endif /* WOLFSSL_ALLOW_TLSV10 */
-    
-    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
-    /* Gets a WOLFSL_METHOD type that is not set as client or server
-     *
-     * Returns a pointer to a WOLFSSL_METHOD struct
-     */
-    WOLFSSL_METHOD* wolfTLSv1_1_method(void) {
-        WOLFSSL_METHOD* m;
-        WOLFSSL_ENTER("wolfTLSv1_1_method");
-    #ifndef NO_WOLFSSL_CLIENT
-        m = wolfTLSv1_1_client_method();
-    #else
-        m = wolfTLSv1_1_server_method();
-    #endif
-        if (m != NULL) {
-            m->side = WOLFSSL_NEITHER_END;
-        }
-        return m;
-    }
-    #endif /* OPENSSL_EXTRA || OPENSSL_ALL */
 
     WOLFSSL_METHOD* wolfTLSv1_1_server_method(void)
     {
         return wolfTLSv1_1_server_method_ex(NULL);
     }
-
     WOLFSSL_METHOD* wolfTLSv1_1_server_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_1_server_method_ex");
         if (method) {
             InitSSL_Method(method, MakeTLSv1_1());
             method->side = WOLFSSL_SERVER_END;
@@ -10238,46 +11629,25 @@
     }
 #endif /* !NO_OLD_TLS */
 
+
 #ifndef WOLFSSL_NO_TLS12
-
-    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
-    /* Gets a WOLFSL_METHOD type that is not set as client or server
-     *
-     * Returns a pointer to a WOLFSSL_METHOD struct
-     */
-    WOLFSSL_METHOD* wolfTLSv1_2_method(void) {
-        WOLFSSL_METHOD* m;
-        WOLFSSL_ENTER("wolfTLSv1_2_method");
-    #ifndef NO_WOLFSSL_CLIENT
-        m = wolfTLSv1_2_client_method();
-    #else
-        m = wolfTLSv1_2_server_method();
-    #endif
-        if (m != NULL) {
-            m->side = WOLFSSL_NEITHER_END;
-        }
-        return m;
-    }
-    #endif /* OPENSSL_EXTRA || OPENSSL_ALL */
-
     WOLFSSL_METHOD* wolfTLSv1_2_server_method(void)
     {
         return wolfTLSv1_2_server_method_ex(NULL);
     }
-
     WOLFSSL_METHOD* wolfTLSv1_2_server_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_2_server_method_ex");
         if (method) {
             InitSSL_Method(method, MakeTLSv1_2());
             method->side = WOLFSSL_SERVER_END;
         }
         return method;
     }
-
 #endif /* !WOLFSSL_NO_TLS12 */
 
 #ifdef WOLFSSL_TLS13
@@ -10301,6 +11671,7 @@
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("TLSv1_3_server_method_ex");
         if (method) {
             InitSSL_Method(method, MakeTLSv1_3());
             method->side = WOLFSSL_SERVER_END;
@@ -10309,41 +11680,77 @@
     }
 #endif /* WOLFSSL_TLS13 */
 
-    WOLFSSL_METHOD* wolfSSLv23_server_method(void)
+#ifdef WOLFSSL_DTLS
+    WOLFSSL_METHOD* wolfDTLS_server_method(void)
     {
-        return wolfSSLv23_server_method_ex(NULL);
-    }
-
-    WOLFSSL_METHOD* wolfSSLv23_server_method_ex(void* heap)
+        return wolfDTLS_server_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLS_server_method_ex(void* heap)
     {
         WOLFSSL_METHOD* method =
                               (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
                                                      heap, DYNAMIC_TYPE_METHOD);
         (void)heap;
+        WOLFSSL_ENTER("DTLS_server_method_ex");
         if (method) {
-#if !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)
-#ifdef WOLFSSL_TLS13
-            InitSSL_Method(method, MakeTLSv1_3());
-#else
-            InitSSL_Method(method, MakeTLSv1_2());
-#endif
-#else
-    #ifndef NO_OLD_TLS
-            InitSSL_Method(method, MakeTLSv1_1());
-    #else
-            #error Must have SHA256, SHA384 or SHA512 enabled for TLS 1.2
-    #endif
-#endif
-#if !defined(NO_OLD_TLS) || defined(WOLFSSL_TLS13)
+        #if !defined(WOLFSSL_NO_TLS12)
+            InitSSL_Method(method, MakeDTLSv1_2());
+        #elif !defined(NO_OLD_TLS)
+            InitSSL_Method(method, MakeDTLSv1());
+        #else
+            #error No DTLS version enabled!
+        #endif
+
             method->downgrade = 1;
-#endif
             method->side      = WOLFSSL_SERVER_END;
         }
         return method;
     }
 
+    #ifndef NO_OLD_TLS
+    WOLFSSL_METHOD* wolfDTLSv1_server_method(void)
+    {
+        return wolfDTLSv1_server_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLSv1_server_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
+                          (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
+                                                 heap, DYNAMIC_TYPE_METHOD);
+        (void)heap;
+        WOLFSSL_ENTER("DTLSv1_server_method_ex");
+        if (method) {
+            InitSSL_Method(method, MakeDTLSv1());
+            method->side = WOLFSSL_SERVER_END;
+        }
+        return method;
+    }
+    #endif /* !NO_OLD_TLS */
+
+    #ifndef WOLFSSL_NO_TLS12
+    WOLFSSL_METHOD* wolfDTLSv1_2_server_method(void)
+    {
+        return wolfDTLSv1_2_server_method_ex(NULL);
+    }
+    WOLFSSL_METHOD* wolfDTLSv1_2_server_method_ex(void* heap)
+    {
+        WOLFSSL_METHOD* method =
+                          (WOLFSSL_METHOD*) XMALLOC(sizeof(WOLFSSL_METHOD),
+                                                 heap, DYNAMIC_TYPE_METHOD);
+        WOLFSSL_ENTER("DTLSv1_2_server_method_ex");
+        (void)heap;
+        if (method) {
+            InitSSL_Method(method, MakeDTLSv1_2());
+            method->side = WOLFSSL_SERVER_END;
+        }
+        (void)heap;
+        return method;
+    }
+    #endif /* !WOLFSSL_NO_TLS12 */
+#endif /* WOLFSSL_DTLS */
 
 #endif /* NO_WOLFSSL_SERVER */
+
 #endif /* NO_TLS */
 #endif /* WOLFCRYPT_ONLY */
 
--- a/src/tls13.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/tls13.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* tls13.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -30,18 +30,20 @@
  * NO_PSK
  *    Do not enable Pre-Shared Keys.
  * TLS13_SUPPORTS_EXPORTERS
- *    Gaurd to compile out any code for exporter keys.
+ *    Guard to compile out any code for exporter keys.
  *    Feature not supported yet.
  * WOLFSSL_ASYNC_CRYPT
- *    Enables the use of asynchornous cryptographic operations.
+ *    Enables the use of asynchronous cryptographic operations.
  *    This is available for ciphers and certificates.
  * HAVE_CHACHA && HAVE_POLY1305
  *    Enables use of CHACHA20-POLY1305 ciphersuites.
  * WOLFSSL_DEBUG_TLS
- *    Writes out details of TLS 1.3 protocol including hanshake message buffers
+ *    Writes out details of TLS 1.3 protocol including handshake message buffers
  *    and key generation input and output.
  * WOLFSSL_EARLY_DATA
  *    Allow 0-RTT Handshake using Early Data extensions and handshake message
+ * WOLFSSL_EARLY_DATA_GROUP
+ *    Group EarlyData message with ClientHello when sending
  * WOLFSSL_NO_SERVER_GROUPS_EXT
  *    Do not send the server's groups in an extension when the server's top
  *    preference is not in client's list.
@@ -60,7 +62,7 @@
  * WOLFSSL_TLS13_DRAFT_23
  *    Conform with Draft 23 of the TLS v1.3 specification.
  * WOLFSSL_TLS13_MIDDLEBOX_COMPAT
- *    Enable middlebox compatability in the TLS 1.3 handshake.
+ *    Enable middlebox compatibility in the TLS 1.3 handshake.
  *    This includes sending ChangeCipherSpec before encrypted messages and
  *    including a session id.
  * WOLFSSL_TLS13_SHA512
@@ -104,19 +106,6 @@
     #include "libntruencrypt/ntru_crypto.h"
 #endif
 
-#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_DEBUG) || \
-    defined(CHACHA_AEAD_TEST) || defined(WOLFSSL_SESSION_EXPORT_DEBUG)
-    #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        #if MQX_USE_IO_OLD
-            #include <fio.h>
-        #else
-            #include <nio.h>
-        #endif
-    #else
-        #include <stdio.h>
-    #endif
-#endif
-
 #ifdef __sun
     #include <sys/filio.h>
 #endif
@@ -132,6 +121,14 @@
     #error The build option HAVE_HKDF is required for TLS 1.3
 #endif
 
+#ifndef HAVE_TLS_EXTENSIONS
+    #ifndef _MSC_VER
+        #error "The build option HAVE_TLS_EXTENSIONS is required for TLS 1.3"
+    #else
+        #pragma message("error: The build option HAVE_TLS_EXTENSIONS is required for TLS 1.3")
+    #endif
+#endif
+
 
 /* Set ret to error value and jump to label.
  *
@@ -443,7 +440,6 @@
                              hash, hashOutSz, digestAlg);
 }
 
-
 #ifndef NO_PSK
 #ifdef WOLFSSL_TLS13_DRAFT_18
 /* The length of the binder key label. */
@@ -524,10 +520,21 @@
  */
 static int DeriveEarlyTrafficSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Early Traffic Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->secret,
-                     earlyTrafficLabel, EARLY_TRAFFIC_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->secret,
+                    earlyTrafficLabel, EARLY_TRAFFIC_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, CLIENT_EARLY_TRAFFIC_SECRET, key,
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 
 #ifdef TLS13_SUPPORTS_EXPORTERS
@@ -552,24 +559,35 @@
  */
 static int DeriveEarlyExporterSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Early Exporter Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->secret,
-                     earlyExporterLabel, EARLY_EXPORTER_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->secret,
+                    earlyExporterLabel, EARLY_EXPORTER_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, EARLY_EXPORTER_SECRET, key
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 #endif
 #endif
 
 #ifdef WOLFSSL_TLS13_DRAFT_18
-/* The length of the client hanshake label. */
+/* The length of the client handshake label. */
 #define CLIENT_HANDSHAKE_LABEL_SZ   31
-/* The client hanshake label. */
+/* The client handshake label. */
 static const byte clientHandshakeLabel[CLIENT_HANDSHAKE_LABEL_SZ + 1] =
     "client handshake traffic secret";
 #else
-/* The length of the client hanshake label. */
+/* The length of the client handshake label. */
 #define CLIENT_HANDSHAKE_LABEL_SZ   12
-/* The client hanshake label. */
+/* The client handshake label. */
 static const byte clientHandshakeLabel[CLIENT_HANDSHAKE_LABEL_SZ + 1] =
     "c hs traffic";
 #endif
@@ -581,10 +599,21 @@
  */
 static int DeriveClientHandshakeSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Client Handshake Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->preMasterSecret,
-                     clientHandshakeLabel, CLIENT_HANDSHAKE_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->preMasterSecret,
+                    clientHandshakeLabel, CLIENT_HANDSHAKE_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, CLIENT_HANDSHAKE_TRAFFIC_SECRET, key,
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 
 #ifdef WOLFSSL_TLS13_DRAFT_18
@@ -608,10 +637,21 @@
  */
 static int DeriveServerHandshakeSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Server Handshake Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->preMasterSecret,
-                     serverHandshakeLabel, SERVER_HANDSHAKE_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->preMasterSecret,
+                    serverHandshakeLabel, SERVER_HANDSHAKE_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, SERVER_HANDSHAKE_TRAFFIC_SECRET, key,
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 
 #ifdef WOLFSSL_TLS13_DRAFT_18
@@ -635,10 +675,21 @@
  */
 static int DeriveClientTrafficSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Client Traffic Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->masterSecret,
-                     clientAppLabel, CLIENT_APP_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->masterSecret,
+                    clientAppLabel, CLIENT_APP_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, CLIENT_TRAFFIC_SECRET, key,
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 
 #ifdef WOLFSSL_TLS13_DRAFT_18
@@ -662,10 +713,21 @@
  */
 static int DeriveServerTrafficSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Server Traffic Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->masterSecret,
-                     serverAppLabel, SERVER_APP_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->masterSecret,
+                    serverAppLabel, SERVER_APP_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, SERVER_TRAFFIC_SECRET, key,
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 
 #ifdef TLS13_SUPPORTS_EXPORTERS
@@ -690,10 +752,21 @@
  */
 static int DeriveExporterSecret(WOLFSSL* ssl, byte* key)
 {
+    int ret;
     WOLFSSL_MSG("Derive Exporter Secret");
-    return DeriveKey(ssl, key, -1, ssl->arrays->masterSecret,
-                     exporterMasterLabel, EXPORTER_MASTER_LABEL_SZ,
-                     ssl->specs.mac_algorithm, 1);
+    ret = DeriveKey(ssl, key, -1, ssl->arrays->masterSecret,
+                    exporterMasterLabel, EXPORTER_MASTER_LABEL_SZ,
+                    ssl->specs.mac_algorithm, 1);
+#ifdef HAVE_SECRET_CALLBACK
+    if (ret == 0 && ssl->tls13SecretCb != NULL) {
+        ret = ssl->tls13SecretCb(ssl, EXPORTER_SECRET, key,
+                                 ssl->specs.hash_size, ssl->tls13SecretCtx);
+        if (ret != 0) {
+            return TLS13_SECRET_CB_E;
+        }
+    }
+#endif /* HAVE_SECRET_CALLBACK */
+    return ret;
 }
 #endif
 
@@ -859,9 +932,9 @@
 #if defined(HAVE_SESSION_TICKET)
 /* Length of the resumption label. */
 #define RESUMPTION_LABEL_SZ         10
-/* Resumption label for generating PSK assocated with the ticket. */
+/* Resumption label for generating PSK associated with the ticket. */
 static const byte resumptionLabel[RESUMPTION_LABEL_SZ+1] = "resumption";
-/* Derive the PSK assocated with the ticket.
+/* Derive the PSK associated with the ticket.
  *
  * ssl       The SSL/TLS object.
  * nonce     The nonce to derive with.
@@ -1027,7 +1100,7 @@
     switch (secret) {
 #ifdef WOLFSSL_EARLY_DATA
         case early_data_key:
-            ret = DeriveEarlyTrafficSecret(ssl, ssl->arrays->clientSecret);
+            ret = DeriveEarlyTrafficSecret(ssl, ssl->clientSecret);
             if (ret != 0)
                 goto end;
             break;
@@ -1036,13 +1109,13 @@
         case handshake_key:
             if (provision & PROVISION_CLIENT) {
                 ret = DeriveClientHandshakeSecret(ssl,
-                                                  ssl->arrays->clientSecret);
+                                                  ssl->clientSecret);
                 if (ret != 0)
                     goto end;
             }
             if (provision & PROVISION_SERVER) {
                 ret = DeriveServerHandshakeSecret(ssl,
-                                                  ssl->arrays->serverSecret);
+                                                  ssl->serverSecret);
                 if (ret != 0)
                     goto end;
             }
@@ -1050,12 +1123,12 @@
 
         case traffic_key:
             if (provision & PROVISION_CLIENT) {
-                ret = DeriveClientTrafficSecret(ssl, ssl->arrays->clientSecret);
+                ret = DeriveClientTrafficSecret(ssl, ssl->clientSecret);
                 if (ret != 0)
                     goto end;
             }
             if (provision & PROVISION_SERVER) {
-                ret = DeriveServerTrafficSecret(ssl, ssl->arrays->serverSecret);
+                ret = DeriveServerTrafficSecret(ssl, ssl->serverSecret);
                 if (ret != 0)
                     goto end;
             }
@@ -1063,12 +1136,12 @@
 
         case update_traffic_key:
             if (provision & PROVISION_CLIENT) {
-                ret = DeriveTrafficSecret(ssl, ssl->arrays->clientSecret);
+                ret = DeriveTrafficSecret(ssl, ssl->clientSecret);
                 if (ret != 0)
                     goto end;
             }
             if (provision & PROVISION_SERVER) {
-                ret = DeriveTrafficSecret(ssl, ssl->arrays->serverSecret);
+                ret = DeriveTrafficSecret(ssl, ssl->serverSecret);
                 if (ret != 0)
                     goto end;
             }
@@ -1084,7 +1157,7 @@
         /* Derive the client key.  */
         WOLFSSL_MSG("Derive Client Key");
         ret = DeriveKey(ssl, &key_dig[i], ssl->specs.key_size,
-                        ssl->arrays->clientSecret, writeKeyLabel,
+                        ssl->clientSecret, writeKeyLabel,
                         WRITE_KEY_LABEL_SZ, ssl->specs.mac_algorithm, 0);
         if (ret != 0)
             goto end;
@@ -1095,7 +1168,7 @@
         /* Derive the server key.  */
         WOLFSSL_MSG("Derive Server Key");
         ret = DeriveKey(ssl, &key_dig[i], ssl->specs.key_size,
-                        ssl->arrays->serverSecret, writeKeyLabel,
+                        ssl->serverSecret, writeKeyLabel,
                         WRITE_KEY_LABEL_SZ, ssl->specs.mac_algorithm, 0);
         if (ret != 0)
             goto end;
@@ -1106,7 +1179,7 @@
         /* Derive the client IV.  */
         WOLFSSL_MSG("Derive Client IV");
         ret = DeriveKey(ssl, &key_dig[i], ssl->specs.iv_size,
-                        ssl->arrays->clientSecret, writeIVLabel,
+                        ssl->clientSecret, writeIVLabel,
                         WRITE_IV_LABEL_SZ, ssl->specs.mac_algorithm, 0);
         if (ret != 0)
             goto end;
@@ -1117,7 +1190,7 @@
         /* Derive the server IV.  */
         WOLFSSL_MSG("Derive Server IV");
         ret = DeriveKey(ssl, &key_dig[i], ssl->specs.iv_size,
-                        ssl->arrays->serverSecret, writeIVLabel,
+                        ssl->serverSecret, writeIVLabel,
                         WRITE_IV_LABEL_SZ, ssl->specs.mac_algorithm, 0);
         if (ret != 0)
             goto end;
@@ -1162,6 +1235,13 @@
     {
         return (word32) XTIME(0) * 1000;
     }
+
+#elif defined(XTIME_MS)
+    word32 TimeNowInMilliseconds(void)
+    {
+        return (word32)XTIME_MS(0);
+    }
+
 #elif defined(USE_WINDOWS_API)
     /* The time in milliseconds.
      * Used for tickets to represent difference between when first seen and when
@@ -1198,6 +1278,14 @@
     {
         return (word32)rtp_get_system_sec() * 1000;
     }
+#elif defined(WOLFSSL_DEOS)
+    word32 TimeNowInMilliseconds(void)
+    {
+        const uint32_t systemTickTimeInHz = 1000000 / systemTickInMicroseconds();
+        uint32_t *systemTickPtr = systemTickPointer();
+
+        return (word32) (*systemTickPtr/systemTickTimeInHz) * 1000;
+    }
 #elif defined(MICRIUM)
     /* The time in milliseconds.
      * Used for tickets to represent difference between when first seen and when
@@ -1376,7 +1464,7 @@
  * ssl       The SSL/TLS object.
  * input     The buffer holding the message data.
  * inOutIdx  On entry, the index into the buffer of the handshake data.
- *           On exit, the start of the hanshake data.
+ *           On exit, the start of the handshake data.
  * type      Type of handshake message.
  * size      The length of the handshake message data.
  * totalSz   The total size of data in the buffer.
@@ -1416,6 +1504,7 @@
 #ifdef WOLFSSL_TLS13_DRAFT_18
     rl->pvMinor = TLSv1_MINOR;
 #else
+    /* NOTE: May be TLSv1_MINOR when sending first ClientHello. */
     rl->pvMinor = TLSv1_2_MINOR;
 #endif
     c16toa((word16)length, rl->length);
@@ -1423,7 +1512,7 @@
 
 /* Add handshake header to message.
  *
- * output      The buffer to write the hanshake header into.
+ * output      The buffer to write the handshake header into.
  * length      The length of the handshake data.
  * fragOffset  The offset of the fragment data. (DTLS)
  * fragLength  The length of the fragment data. (DTLS)
@@ -1465,7 +1554,7 @@
 
 
 #ifndef NO_CERTS
-/* Add both record layer and fragement handshake header to message.
+/* Add both record layer and fragment handshake header to message.
  *
  * output      The buffer to write the headers into.
  * fragOffset  The offset of the fragment data. (DTLS)
@@ -1569,6 +1658,9 @@
     ret = wc_Chacha_Process(ssl->encrypt.chacha, poly, poly, sizeof(poly));
     if (ret != 0)
         return ret;
+    ret = wc_Chacha_SetIV(ssl->encrypt.chacha, nonce, 1);
+    if (ret != 0)
+        return ret;
     /* Encrypt the plain text. */
     ret = wc_Chacha_Process(ssl->encrypt.chacha, output, input, sz);
     if (ret != 0) {
@@ -1589,6 +1681,43 @@
 }
 #endif
 
+#ifdef HAVE_NULL_CIPHER
+/* Create authenication tag and copy data over input.
+ *
+ * ssl     The SSL/TLS object.
+ * output  The buffer to copy data into.
+ *         May be the same pointer as input.
+ * input   The data.
+ * sz      The number of bytes of data.
+ * nonce   The nonce to use with authentication.
+ * aad     The additional authentication data.
+ * aadSz   The size of the addition authentication data.
+ * tag     The authentication tag buffer.
+ * returns 0 on success, otherwise failure.
+ */
+static int Tls13IntegrityOnly_Encrypt(WOLFSSL* ssl, byte* output,
+                                      const byte* input, word16 sz,
+                                      const byte* nonce,
+                                      const byte* aad, word16 aadSz, byte* tag)
+{
+    int ret;
+
+    /* HMAC: nonce | aad | input  */
+    ret = wc_HmacUpdate(ssl->encrypt.hmac, nonce, HMAC_NONCE_SZ);
+    if (ret == 0)
+        ret = wc_HmacUpdate(ssl->encrypt.hmac, aad, aadSz);
+    if (ret == 0)
+        ret = wc_HmacUpdate(ssl->encrypt.hmac, input, sz);
+    if (ret == 0)
+        ret = wc_HmacFinal(ssl->encrypt.hmac, tag);
+    /* Copy the input to output if not the same buffer */
+    if (ret == 0 && output != input)
+        XMEMCPY(output, input, sz);
+
+    return ret;
+}
+#endif
+
 /* Encrypt data for TLS v1.3.
  *
  * ssl     The SSL/TLS object.
@@ -1642,6 +1771,7 @@
 #endif
         #endif
 
+        #ifdef CIPHER_NONCE
             if (ssl->encrypt.nonce == NULL)
                 ssl->encrypt.nonce = (byte*)XMALLOC(AEAD_NONCE_SZ,
                                             ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
@@ -1650,6 +1780,7 @@
 
             BuildTls13Nonce(ssl, ssl->encrypt.nonce, ssl->keys.aead_enc_imp_IV,
                             CUR_ORDER);
+        #endif
 
             /* Advance state and proceed */
             ssl->encrypt.state = CIPHER_STATE_DO;
@@ -1670,9 +1801,20 @@
                 #endif
 
                     nonceSz = AESGCM_NONCE_SZ;
+                #if ((defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) && \
+                    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)))
                     ret = wc_AesGcmEncrypt(ssl->encrypt.aes, output, input,
                         dataSz, ssl->encrypt.nonce, nonceSz,
                         output + dataSz, macSz, aad, aadSz);
+                #else
+                    ret = wc_AesGcmSetExtIV(ssl->encrypt.aes,
+                            ssl->encrypt.nonce, nonceSz);
+                    if (ret == 0) {
+                        ret = wc_AesGcmEncrypt_ex(ssl->encrypt.aes, output,
+                                input, dataSz, ssl->encrypt.nonce, nonceSz,
+                                output + dataSz, macSz, aad, aadSz);
+                    }
+                #endif
                     break;
             #endif
 
@@ -1687,9 +1829,20 @@
                 #endif
 
                     nonceSz = AESCCM_NONCE_SZ;
+                #if ((defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) && \
+                    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)))
                     ret = wc_AesCcmEncrypt(ssl->encrypt.aes, output, input,
                         dataSz, ssl->encrypt.nonce, nonceSz,
                         output + dataSz, macSz, aad, aadSz);
+                #else
+                    ret = wc_AesCcmSetNonce(ssl->encrypt.aes,
+                            ssl->encrypt.nonce, nonceSz);
+                    if (ret == 0) {
+                        ret = wc_AesCcmEncrypt_ex(ssl->encrypt.aes, output,
+                                input, dataSz, ssl->encrypt.nonce, nonceSz,
+                                output + dataSz, macSz, aad, aadSz);
+                    }
+                #endif
                     break;
             #endif
 
@@ -1700,6 +1853,13 @@
                     break;
             #endif
 
+            #ifdef HAVE_NULL_CIPHER
+                case wolfssl_cipher_null:
+                    ret = Tls13IntegrityOnly_Encrypt(ssl, output, input, dataSz,
+                        ssl->encrypt.nonce, aad, aadSz, output + dataSz);
+                    break;
+            #endif
+
                 default:
                     WOLFSSL_MSG("wolfSSL Encrypt programming error");
                     return ENCRYPT_ERROR;
@@ -1725,16 +1885,20 @@
 
         case CIPHER_STATE_END:
         {
-            #ifdef WOLFSSL_DEBUG_TLS
+        #ifdef WOLFSSL_DEBUG_TLS
+            #ifdef CIPHER_NONCE
                 WOLFSSL_MSG("Nonce");
                 WOLFSSL_BUFFER(ssl->encrypt.nonce, ssl->specs.iv_size);
+            #endif
                 WOLFSSL_MSG("Encrypted data");
                 WOLFSSL_BUFFER(output, dataSz);
                 WOLFSSL_MSG("Authentication Tag");
                 WOLFSSL_BUFFER(output + dataSz, macSz);
-            #endif
-
+        #endif
+
+        #ifdef CIPHER_NONCE
             ForceZero(ssl->encrypt.nonce, AEAD_NONCE_SZ);
+        #endif
 
             break;
         }
@@ -1780,6 +1944,9 @@
     ret = wc_Chacha_Process(ssl->decrypt.chacha, poly, poly, sizeof(poly));
     if (ret != 0)
         return ret;
+    ret = wc_Chacha_SetIV(ssl->decrypt.chacha, nonce, 1);
+    if (ret != 0)
+        return ret;
 
     /* Set key for Poly1305. */
     ret = wc_Poly1305SetKey(ssl->auth.poly1305, poly, sizeof(poly));
@@ -1805,6 +1972,48 @@
 }
 #endif
 
+#ifdef HAVE_NULL_CIPHER
+/* Check HMAC tag and copy over input.
+ *
+ * ssl     The SSL/TLS object.
+ * output  The buffer to copy data into.
+ *         May be the same pointer as input.
+ * input   The data.
+ * sz      The number of bytes of data.
+ * nonce   The nonce to use with authentication.
+ * aad     The additional authentication data.
+ * aadSz   The size of the addition authentication data.
+ * tagIn   The authentication tag data from packet.
+ * returns 0 on success, otherwise failure.
+ */
+static int Tls13IntegrityOnly_Decrypt(WOLFSSL* ssl, byte* output,
+                                      const byte* input, word16 sz,
+                                      const byte* nonce,
+                                      const byte* aad, word16 aadSz,
+                                      const byte* tagIn)
+{
+    int ret;
+    byte hmac[WC_MAX_DIGEST_SIZE];
+
+    /* HMAC: nonce | aad | input  */
+    ret = wc_HmacUpdate(ssl->decrypt.hmac, nonce, HMAC_NONCE_SZ);
+    if (ret == 0)
+        ret = wc_HmacUpdate(ssl->decrypt.hmac, aad, aadSz);
+    if (ret == 0)
+        ret = wc_HmacUpdate(ssl->decrypt.hmac, input, sz);
+    if (ret == 0)
+        ret = wc_HmacFinal(ssl->decrypt.hmac, hmac);
+    /* Check authentication tag matches */
+    if (ret == 0 && ConstantCompare(tagIn, hmac, ssl->specs.hash_size) != 0)
+        ret = DECRYPT_ERROR;
+    /* Copy the input to output if not the same buffer */
+    if (ret == 0 && output != input)
+        XMEMCPY(output, input, sz);
+
+    return ret;
+}
+#endif
+
 /* Decrypt data for TLS v1.3.
  *
  * ssl     The SSL/TLS object.
@@ -1867,6 +2076,7 @@
             WOLFSSL_BUFFER(input + dataSz, macSz);
         #endif
 
+        #ifdef CIPHER_NONCE
             if (ssl->decrypt.nonce == NULL)
                 ssl->decrypt.nonce = (byte*)XMALLOC(AEAD_NONCE_SZ,
                                             ssl->heap, DYNAMIC_TYPE_AES_BUFFER);
@@ -1875,6 +2085,7 @@
 
             BuildTls13Nonce(ssl, ssl->decrypt.nonce, ssl->keys.aead_dec_imp_IV,
                             PEER_ORDER);
+        #endif
 
             /* Advance state and proceed */
             ssl->decrypt.state = CIPHER_STATE_DO;
@@ -1937,6 +2148,12 @@
                     break;
             #endif
 
+            #ifdef HAVE_NULL_CIPHER
+                case wolfssl_cipher_null:
+                    ret = Tls13IntegrityOnly_Decrypt(ssl, output, input, dataSz,
+                        ssl->decrypt.nonce, aad, aadSz, input + dataSz);
+                    break;
+            #endif
                 default:
                     WOLFSSL_MSG("wolfSSL Decrypt programming error");
                     return DECRYPT_ERROR;
@@ -1957,13 +2174,17 @@
         case CIPHER_STATE_END:
         {
         #ifdef WOLFSSL_DEBUG_TLS
-            WOLFSSL_MSG("Nonce");
-            WOLFSSL_BUFFER(ssl->decrypt.nonce, ssl->specs.iv_size);
-            WOLFSSL_MSG("Decrypted data");
-            WOLFSSL_BUFFER(output, dataSz);
+            #ifdef CIPHER_NONCE
+                WOLFSSL_MSG("Nonce");
+                WOLFSSL_BUFFER(ssl->decrypt.nonce, ssl->specs.iv_size);
+            #endif
+                WOLFSSL_MSG("Decrypted data");
+                WOLFSSL_BUFFER(output, dataSz);
         #endif
 
+        #ifdef CIPHER_NONCE
             ForceZero(ssl->decrypt.nonce, AEAD_NONCE_SZ);
+        #endif
 
             break;
         }
@@ -2108,15 +2329,15 @@
                     goto exit_buildmsg;
             }
 
+            /* The real record content type goes at the end of the data. */
+            output[args->idx++] = (byte)type;
+
             ssl->options.buildMsgState = BUILD_MSG_ENCRYPT;
         }
         FALL_THROUGH;
 
         case BUILD_MSG_ENCRYPT:
         {
-            /* The real record content type goes at the end of the data. */
-            output[args->idx++] = (byte)type;
-
         #ifdef ATOMIC_USER
             if (ssl->ctx->MacEncryptCb) {
                 /* User Record Layer Callback handling */
@@ -2164,6 +2385,9 @@
 
     /* Final cleanup */
     FreeBuildMsg13Args(ssl, args);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ssl->async.freeArgs = NULL;
+#endif
 
     return ret;
 }
@@ -2175,9 +2399,9 @@
  * suite  Cipher suite to look for.
  * returns 1 when suite is found in SSL/TLS object's list and 0 otherwise.
  */
-static int FindSuite(WOLFSSL* ssl, byte* suite)
+static int FindSuiteSSL(WOLFSSL* ssl, byte* suite)
 {
-    int i;
+    word16 i;
 
     for (i = 0; i < ssl->suites->suiteSz; i += 2) {
         if (ssl->suites->suites[i+0] == suite[0] &&
@@ -2202,10 +2426,10 @@
 static int CreateCookie(WOLFSSL* ssl, byte* hash, byte hashSz)
 {
     int  ret;
-    byte mac[WC_MAX_DIGEST_SIZE];
+    byte mac[WC_MAX_DIGEST_SIZE] = {0};
     Hmac cookieHmac;
-    byte cookieType;
-    byte macSz;
+    byte cookieType = 0;
+    byte macSz = 0;
 
 #if !defined(NO_SHA) && defined(NO_SHA256)
     cookieType = SHA;
@@ -2215,6 +2439,7 @@
     cookieType = WC_SHA256;
     macSz = WC_SHA256_DIGEST_SIZE;
 #endif /* NO_SHA256 */
+    XMEMSET(&cookieHmac, 0, sizeof(Hmac));
 
     ret = wc_HmacSetKey(&cookieHmac, cookieType,
                         ssl->buffers.tls13CookieSecret.buffer,
@@ -2231,7 +2456,7 @@
 }
 #endif
 
-/* Restart the Hanshake hash with a hash of the previous messages.
+/* Restart the handshake hash with a hash of the previous messages.
  *
  * ssl The SSL/TLS object.
  * returns 0 on success, otherwise failure.
@@ -2240,7 +2465,7 @@
 {
     int    ret;
     Hashes hashes;
-    byte   header[HANDSHAKE_HEADER_SZ];
+    byte   header[HANDSHAKE_HEADER_SZ] = {0};
     byte*  hash = NULL;
     byte   hashSz = 0;
 
@@ -2265,6 +2490,11 @@
     #endif
     }
     hashSz = ssl->specs.hash_size;
+
+    /* check hash */
+    if (hash == NULL && hashSz > 0)
+        return BAD_FUNC_ARG;
+
     AddTls13HandShakeHeader(header, hashSz, 0, 0, message_hash, ssl);
 
     WOLFSSL_MSG("Restart Hash");
@@ -2278,7 +2508,8 @@
 
         /* Cookie Data = Hash Len | Hash | CS | KeyShare Group */
         cookie[idx++] = hashSz;
-        XMEMCPY(cookie + idx, hash, hashSz);
+        if (hash)
+            XMEMCPY(cookie + idx, hash, hashSz);
         idx += hashSz;
         cookie[idx++] = ssl->options.cipherSuite0;
         cookie[idx++] = ssl->options.cipherSuite;
@@ -2325,12 +2556,12 @@
     int ret;
     byte suite[2];
 
-    if (ssl->options.noPskDheKe && ssl->arrays->preMasterSz != 0)
-        return PSK_KEY_ERROR;
+    if (psk == NULL)
+        return BAD_FUNC_ARG;
 
     suite[0] = psk->cipherSuite0;
     suite[1] = psk->cipherSuite;
-    if (!FindSuite(ssl, suite))
+    if (!FindSuiteSSL(ssl, suite))
         return PSK_KEY_ERROR;
 
     ssl->options.cipherSuite0 = psk->cipherSuite0;
@@ -2359,22 +2590,43 @@
 #endif
 #ifndef NO_PSK
     if (!psk->resumption) {
+    #ifndef WOLFSSL_PSK_ONE_ID
+        const char* cipherName = NULL;
+        byte cipherSuite0 = TLS13_BYTE, cipherSuite = WOLFSSL_DEF_PSK_CIPHER;
+
         /* Get the pre-shared key. */
-        ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
-                (char *)psk->identity, ssl->arrays->client_identity,
-                MAX_PSK_ID_LEN, ssl->arrays->psk_key, MAX_PSK_KEY_LEN);
+        if (ssl->options.client_psk_tls13_cb != NULL) {
+            ssl->arrays->psk_keySz = ssl->options.client_psk_tls13_cb(ssl,
+                    (char *)psk->identity, ssl->arrays->client_identity,
+                    MAX_PSK_ID_LEN, ssl->arrays->psk_key, MAX_PSK_KEY_LEN,
+                    &cipherName);
+            if (GetCipherSuiteFromName(cipherName, &cipherSuite0,
+                                                           &cipherSuite) != 0) {
+                return PSK_KEY_ERROR;
+            }
+        }
+        else {
+            ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
+                    (char *)psk->identity, ssl->arrays->client_identity,
+                    MAX_PSK_ID_LEN, ssl->arrays->psk_key, MAX_PSK_KEY_LEN);
+        }
         if (ssl->arrays->psk_keySz == 0 ||
-                                 ssl->arrays->psk_keySz > MAX_PSK_KEY_LEN) {
+                                     ssl->arrays->psk_keySz > MAX_PSK_KEY_LEN) {
             return PSK_KEY_ERROR;
         }
-        /* TODO: Callback should be able to specify ciphersuite. */
-
-        if (psk->cipherSuite0 != TLS13_BYTE ||
-            psk->cipherSuite  != WOLFSSL_DEF_PSK_CIPHER) {
+
+        if (psk->cipherSuite0 != cipherSuite0 ||
+                                              psk->cipherSuite != cipherSuite) {
             return PSK_KEY_ERROR;
         }
-    }
-#endif
+    #else
+        /* PSK information loaded during setting of default TLS extensions. */
+    #endif
+    }
+#endif
+
+    if (ssl->options.noPskDheKe)
+        ssl->arrays->preMasterSz = 0;
 
     /* Derive the early secret using the PSK. */
     return DeriveEarlySecret(ssl);
@@ -2403,8 +2655,11 @@
         return SANITY_MSG_E;
 
     /* Get the size of the binders to determine where to write binders. */
-    idx -= TLSX_PreSharedKey_GetSizeBinders((PreSharedKey*)ext->data,
-                                            client_hello);
+    ret = TLSX_PreSharedKey_GetSizeBinders((PreSharedKey*)ext->data,
+                                                            client_hello, &len);
+    if (ret < 0)
+        return ret;
+    idx -= len;
 
     /* Hash truncated ClientHello - up to binders. */
     ret = HashOutput(ssl, output, idx, 0);
@@ -2445,8 +2700,10 @@
     }
 
     /* Data entered into extension, now write to message. */
-    len = TLSX_PreSharedKey_WriteBinders((PreSharedKey*)ext->data, output + idx,
-                                         client_hello);
+    ret = TLSX_PreSharedKey_WriteBinders((PreSharedKey*)ext->data, output + idx,
+                                                            client_hello, &len);
+    if (ret < 0)
+        return ret;
 
     /* Hash binders to complete the hash of the ClientHello. */
     ret = HashOutputRaw(ssl, output + idx, len);
@@ -2534,7 +2791,9 @@
         return ret;
 #ifdef WOLFSSL_EARLY_DATA
     #ifndef NO_PSK
-        if (!ssl->options.resuming && ssl->options.client_psk_cb == NULL)
+        if (!ssl->options.resuming &&
+                                     ssl->options.client_psk_tls13_cb == NULL &&
+                                     ssl->options.client_psk_cb == NULL)
     #else
         if (!ssl->options.resuming)
     #endif
@@ -2548,10 +2807,6 @@
         return ret;
     }
 #endif
-#ifdef HAVE_QSH
-    if (QSH_Init(ssl) != 0)
-        return MEMORY_E;
-#endif
     /* Include length of TLS extensions. */
     ret = TLSX_GetRequestSize(ssl, client_hello, &length);
     if (ret != 0)
@@ -2653,7 +2908,11 @@
 
     ssl->buffers.outputBuffer.length += sendSz;
 
-    ret = SendBuffered(ssl);
+#ifdef WOLFSSL_EARLY_DATA_GROUP
+    if (ssl->earlyData == no_early_data)
+#endif
+        ret = SendBuffered(ssl);
+
 
     WOLFSSL_LEAVE("SendTls13ClientHello", ret);
     WOLFSSL_END(WC_FUNC_CLIENT_HELLO_SEND);
@@ -2752,6 +3011,7 @@
     byte            sessIdSz;
     const byte*     sessId;
     byte            b;
+    int             foundVersion;
 #endif
     word16          totalExtSz;
 #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
@@ -2874,9 +3134,24 @@
             return BUFFER_ERROR;
 
 #ifndef WOLFSSL_TLS13_DRAFT_18
-        if (ssl->options.downgrade)
-            ssl->version.minor = TLSv1_2_MINOR;
-#endif
+        /* Need to negotiate version first. */
+        if ((ret = TLSX_ParseVersion(ssl, (byte*)input + i, totalExtSz,
+                                                 *extMsgType, &foundVersion))) {
+            return ret;
+        }
+        if (!foundVersion) {
+            if (!ssl->options.downgrade) {
+                WOLFSSL_MSG("Server trying to downgrade to version less than "
+                            "TLS v1.3");
+                return VERSION_ERROR;
+            }
+
+            if (pv.minor < ssl->options.minDowngrade)
+                return VERSION_ERROR;
+            ssl->version.minor = pv.minor;
+        }
+#endif
+
         /* Parse and handle extensions. */
         ret = TLSX_Parse(ssl, (byte *) input + i, totalExtSz, *extMsgType,
                                                                           NULL);
@@ -2894,8 +3169,9 @@
         int secretSz = SECRET_LEN;
         ret = ssl->sessionSecretCb(ssl, ssl->session.masterSecret,
                                    &secretSz, ssl->sessionSecretCtx);
-        if (ret != 0 || secretSz != SECRET_LEN)
+        if (ret != 0 || secretSz != SECRET_LEN) {
             return SESSION_SECRET_CB_E;
+        }
     }
 #endif /* HAVE_SECRET_CALLBACK */
 
@@ -2954,6 +3230,19 @@
     ret = SetCipherSpecs(ssl);
     if (ret != 0)
         return ret;
+#ifdef HAVE_NULL_CIPHER
+    if (ssl->options.cipherSuite0 == ECC_BYTE &&
+                              (ssl->options.cipherSuite == TLS_SHA256_SHA256 ||
+                               ssl->options.cipherSuite == TLS_SHA384_SHA384)) {
+        ;
+    }
+    else
+#endif
+    /* Check that the negotiated ciphersuite matches protocol version. */
+    if (ssl->options.cipherSuite0 != TLS13_BYTE) {
+        WOLFSSL_MSG("Server sent non-TLS13 cipher suite in TLS 1.3 packet");
+        return INVALID_PARAMETER;
+    }
 
 #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
 #ifndef WOLFSSL_TLS13_DRAFT_18
@@ -3096,6 +3385,9 @@
     WOLFSSL_START(WC_FUNC_CERTIFICATE_REQUEST_DO);
     WOLFSSL_ENTER("DoTls13CertificateRequest");
 
+#ifndef WOLFSSL_TLS13_DRAFT_18
+    XMEMSET(&peerSuites, 0, sizeof(Suites));
+#endif
 #ifdef WOLFSSL_CALLBACKS
     if (ssl->hsInfoOn) AddPacketName(ssl, "CertificateRequest");
     if (ssl->toInfoOn) AddLateName("CertificateRequest", &ssl->timeoutInfo);
@@ -3135,7 +3427,11 @@
     *inOutIdx += OPAQUE16_LEN;
     if ((*inOutIdx - begin) + len > size)
         return BUFFER_ERROR;
-    PickHashSigAlgo(ssl, input + *inOutIdx, len);
+    if (PickHashSigAlgo(ssl, input + *inOutIdx, len) != 0 &&
+                 ssl->buffers.certificate && ssl->buffers.certificate->buffer &&
+                 ssl->buffers.key && ssl->buffers.key->buffer) {
+        return INVALID_PARAMETER;
+    }
     *inOutIdx += len;
 
     /* Length of certificate authority data. */
@@ -3189,15 +3485,25 @@
         return ret;
     }
     *inOutIdx += len;
-
-    PickHashSigAlgo(ssl, peerSuites.hashSigAlgo, peerSuites.hashSigAlgoSz);
 #endif
 
     if (ssl->buffers.certificate && ssl->buffers.certificate->buffer &&
-        ssl->buffers.key && ssl->buffers.key->buffer)
+        ((ssl->buffers.key && ssl->buffers.key->buffer)
+        #ifdef HAVE_PK_CALLBACKS
+            || wolfSSL_CTX_IsPrivatePkSet(ssl->ctx)
+        #endif
+    )) {
+#ifndef WOLFSSL_TLS13_DRAFT_18
+        if (PickHashSigAlgo(ssl, peerSuites.hashSigAlgo,
+                                               peerSuites.hashSigAlgoSz) != 0) {
+            return INVALID_PARAMETER;
+        }
+#endif
         ssl->options.sendVerify = SEND_CERT;
-    else
+    }
+    else {
         ssl->options.sendVerify = SEND_BLANK_CERT;
+    }
 
     /* This message is always encrypted so add encryption padding. */
     *inOutIdx += ssl->keys.padSz;
@@ -3221,7 +3527,9 @@
 {
     byte suites[WOLFSSL_MAX_SUITE_SZ];
     int suiteSz = 0;
-    int i, j;
+    word16 i, j;
+
+    XMEMSET(suites, 0, WOLFSSL_MAX_SUITE_SZ);
 
     for (i = 0; i < ssl->suites->suiteSz; i += 2) {
         for (j = 0; j < peerSuites->suiteSz; j += 2) {
@@ -3263,15 +3571,19 @@
     int           pskCnt = 0;
     TLSX*         extEarlyData;
 #endif
+#ifndef NO_PSK
+    const char*   cipherName = NULL;
+    byte          cipherSuite0 = TLS13_BYTE;
+    byte          cipherSuite  = WOLFSSL_DEF_PSK_CIPHER;
+#endif
 
     WOLFSSL_ENTER("DoPreSharedKeys");
 
     ext = TLSX_Find(ssl->extensions, TLSX_PRE_SHARED_KEY);
     if (ext == NULL) {
-#ifdef WOLFSSL_EARLY_DATA
-        ssl->earlyData = no_early_data;
-#endif
-        return 0;
+        /* Hash data up to binders for deriving binders in PSK extension. */
+        ret = HashInput(ssl, input,  helloSz);
+        return ret;
     }
 
     /* Extensions pushed on stack/list and PSK must be last. */
@@ -3284,8 +3596,10 @@
     /* Find the pre-shared key extension and calculate hash of truncated
      * ClientHello for binders.
      */
-    bindersLen = TLSX_PreSharedKey_GetSizeBinders((PreSharedKey*)ext->data,
-                                                  client_hello);
+    ret = TLSX_PreSharedKey_GetSizeBinders((PreSharedKey*)ext->data,
+                                                     client_hello, &bindersLen);
+    if (ret < 0)
+        return ret;
 
     /* Hash data up to binders for deriving binders in PSK extension. */
     ret = HashInput(ssl, input,  helloSz - bindersLen);
@@ -3300,6 +3614,9 @@
     #endif
 
     #ifndef NO_PSK
+        if (current->identityLen > MAX_PSK_ID_LEN) {
+            return BUFFER_ERROR;
+        }
         XMEMCPY(ssl->arrays->client_identity, current->identity,
                 current->identityLen);
         ssl->arrays->client_identity[current->identityLen] = '\0';
@@ -3332,7 +3649,7 @@
              */
             suite[0] = ssl->session.cipherSuite0;
             suite[1] = ssl->session.cipherSuite;
-            if (!FindSuite(ssl, suite)) {
+            if (!FindSuiteSSL(ssl, suite)) {
                 current = current->next;
                 continue;
             }
@@ -3371,17 +3688,23 @@
         else
     #endif
     #ifndef NO_PSK
-        if (ssl->options.server_psk_cb != NULL &&
-            (ssl->arrays->psk_keySz = ssl->options.server_psk_cb(ssl,
+        if ((ssl->options.server_psk_tls13_cb != NULL &&
+             (ssl->arrays->psk_keySz = ssl->options.server_psk_tls13_cb(ssl,
                              ssl->arrays->client_identity, ssl->arrays->psk_key,
-                             MAX_PSK_KEY_LEN)) != 0) {
+                             MAX_PSK_KEY_LEN, &cipherName)) != 0 &&
+             GetCipherSuiteFromName(cipherName, &cipherSuite0,
+                                                          &cipherSuite) == 0) ||
+            (ssl->options.server_psk_cb != NULL &&
+             (ssl->arrays->psk_keySz = ssl->options.server_psk_cb(ssl,
+                             ssl->arrays->client_identity, ssl->arrays->psk_key,
+                             MAX_PSK_KEY_LEN)) != 0)) {
             if (ssl->arrays->psk_keySz > MAX_PSK_KEY_LEN)
                 return PSK_KEY_ERROR;
-            /* TODO: Callback should be able to specify ciphersuite. */
-
-            suite[0] = TLS13_BYTE;
-            suite[1] = WOLFSSL_DEF_PSK_CIPHER;
-            if (!FindSuite(ssl, suite)) {
+
+            /* Check whether PSK ciphersuite is in SSL. */
+            suite[0] = cipherSuite0;
+            suite[1] = cipherSuite;
+            if (!FindSuiteSSL(ssl, suite)) {
                 current = current->next;
                 continue;
             }
@@ -3393,9 +3716,9 @@
             if (current->ticketAge != ssl->session.ticketAdd)
                 return PSK_KEY_ERROR;
 
-            /* Check whether PSK ciphersuite is in SSL. */
-            ssl->options.cipherSuite0 = TLS13_BYTE;
-            ssl->options.cipherSuite  = WOLFSSL_DEF_PSK_CIPHER;
+            /* Set PSK ciphersuite into SSL. */
+            ssl->options.cipherSuite0 = cipherSuite0;
+            ssl->options.cipherSuite  = cipherSuite;
             ret = SetCipherSpecs(ssl);
             if (ret != 0)
                 return ret;
@@ -3440,6 +3763,11 @@
         break;
     }
 
+    /* Hash the rest of the ClientHello. */
+    ret = HashInputRaw(ssl, input + helloSz - bindersLen, bindersLen);
+    if (ret != 0)
+        return ret;
+
     if (current == NULL) {
 #ifdef WOLFSSL_PSK_ID_PROTECTION
     #ifndef NO_CERTS
@@ -3452,11 +3780,6 @@
 #endif
     }
 
-    /* Hash the rest of the ClientHello. */
-    ret = HashInputRaw(ssl, input + helloSz - bindersLen, bindersLen);
-    if (ret != 0)
-        return ret;
-
 #ifdef WOLFSSL_EARLY_DATA
     extEarlyData = TLSX_Find(ssl->extensions, TLSX_EARLY_DATA);
     if (extEarlyData != NULL) {
@@ -3506,6 +3829,7 @@
         if ((modes & (1 << PSK_KE)) == 0)
             return PSK_KEY_ERROR;
         ssl->options.noPskDheKe = 1;
+        ssl->arrays->preMasterSz = 0;
     }
 
     *usingPSK = 1;
@@ -3527,10 +3851,10 @@
 static int CheckCookie(WOLFSSL* ssl, byte* cookie, byte cookieSz)
 {
     int  ret;
-    byte mac[WC_MAX_DIGEST_SIZE];
+    byte mac[WC_MAX_DIGEST_SIZE] = {0};
     Hmac cookieHmac;
-    byte cookieType;
-    byte macSz;
+    byte cookieType = 0;
+    byte macSz = 0;
 
 #if !defined(NO_SHA) && defined(NO_SHA256)
     cookieType = SHA;
@@ -3544,6 +3868,7 @@
     if (cookieSz < ssl->specs.hash_size + macSz)
         return HRR_COOKIE_ERROR;
     cookieSz -= macSz;
+    XMEMSET(&cookieHmac, 0, sizeof(Hmac));
 
     ret = wc_HmacSetKey(&cookieHmac, cookieType,
                         ssl->buffers.tls13CookieSecret.buffer,
@@ -3586,7 +3911,7 @@
                           HRR_COOKIE_HDR_SZ)
 #endif
 
-/* Restart the Hanshake hash from the cookie value.
+/* Restart the handshake hash from the cookie value.
  *
  * ssl     SSL/TLS object.
  * cookie  Cookie data from client.
@@ -3594,8 +3919,8 @@
  */
 static int RestartHandshakeHashWithCookie(WOLFSSL* ssl, Cookie* cookie)
 {
-    byte   header[HANDSHAKE_HEADER_SZ];
-    byte   hrr[MAX_HRR_SZ];
+    byte   header[HANDSHAKE_HEADER_SZ] = {0};
+    byte   hrr[MAX_HRR_SZ] = {0};
     int    hrrIdx;
     word32 idx;
     byte   hashSz;
@@ -3638,13 +3963,6 @@
 
     idx += hashSz;
     hrrIdx = HANDSHAKE_HEADER_SZ;
-    /* TODO: [TLS13] Replace existing code with code in comment.
-     * Use the TLS v1.3 draft version for now.
-     *
-     * Change to:
-     * hrr[hrrIdx++] = ssl->version.major;
-     * hrr[hrrIdx++] = ssl->version.minor;
-     */
     /* The negotiated protocol version. */
     hrr[hrrIdx++] = TLS_DRAFT_MAJOR;
     hrr[hrrIdx++] = TLS_DRAFT_MINOR;
@@ -3703,13 +4021,12 @@
     hrrIdx += 2;
     c16toa(OPAQUE16_LEN, hrr + hrrIdx);
     hrrIdx += 2;
-    /* TODO: [TLS13] Change to ssl->version.major and minor once final. */
-    #ifdef WOLFSSL_TLS13_FINAL
+    #ifdef WOLFSSL_TLS13_DRAFT
+        hrr[hrrIdx++] = TLS_DRAFT_MAJOR;
+        hrr[hrrIdx++] = TLS_DRAFT_MINOR;
+    #else
         hrr[hrrIdx++] = ssl->version.major;
         hrr[hrrIdx++] = ssl->version.minor;
-    #else
-        hrr[hrrIdx++] = TLS_DRAFT_MAJOR;
-        hrr[hrrIdx++] = TLS_DRAFT_MINOR;
     #endif
 #endif
     /* Mandatory Cookie Extension */
@@ -3733,6 +4050,65 @@
 }
 #endif
 
+/* Do SupportedVersion extension for TLS v1.3+ otherwise it is not.
+ *
+ * ssl       The SSL/TLS object.
+ * input     The message buffer.
+ * i         The index into the message buffer of ClientHello.
+ * helloSz   The length of the current handshake message.
+ * returns 0 on success and otherwise failure.
+ */
+static int DoTls13SupportedVersions(WOLFSSL* ssl, const byte* input, word32 i,
+                                    word32 helloSz, int* wantDowngrade)
+{
+    int    ret;
+    byte   b;
+    word16 suiteSz;
+    word16 totalExtSz;
+    int    foundVersion = 0;
+
+    /* Client random */
+    i += RAN_LEN;
+    /* Session id - not used in TLS v1.3 */
+    b = input[i++];
+    if (i + b > helloSz) {
+        return BUFFER_ERROR;
+    }
+    i += b;
+    /* Cipher suites */
+    if (i + OPAQUE16_LEN > helloSz)
+        return BUFFER_ERROR;
+    ato16(input + i, &suiteSz);
+    i += OPAQUE16_LEN;
+    if (i + suiteSz + 1 > helloSz)
+        return BUFFER_ERROR;
+    i += suiteSz;
+    /* Compression */
+    b = input[i++];
+    if (i + b > helloSz)
+        return BUFFER_ERROR;
+    i += b;
+
+    /* TLS 1.3 must have extensions */
+    if (i < helloSz) {
+        if (i + OPAQUE16_LEN > helloSz)
+            return BUFFER_ERROR;
+        ato16(&input[i], &totalExtSz);
+        i += OPAQUE16_LEN;
+        if (totalExtSz != helloSz - i)
+            return BUFFER_ERROR;
+
+        /* Need to negotiate version first. */
+        if ((ret = TLSX_ParseVersion(ssl, (byte*)input + i, totalExtSz,
+                                                client_hello, &foundVersion))) {
+            return ret;
+        }
+    }
+    *wantDowngrade = !foundVersion || !IsAtLeastTLSv1_3(ssl->version);
+
+    return 0;
+}
+
 /* Handle a ClientHello handshake message.
  * If the protocol version in the message is not TLS v1.3 or higher, use
  * DoClientHello()
@@ -3750,21 +4126,22 @@
                        word32 helloSz)
 {
     int             ret = VERSION_ERROR;
-    byte            b;
+    byte            b = 0;
     ProtocolVersion pv;
     Suites          clSuites;
     word32          i = *inOutIdx;
     word32          begin = i;
     word16          totalExtSz = 0;
     int             usingPSK = 0;
-    byte            sessIdSz;
-#ifndef WOLFSSL_NO_TLS12
-    int             bogusID = 0;
-#endif
+    byte            sessIdSz = 0;
+    int             wantDowngrade = 0;
 
     WOLFSSL_START(WC_FUNC_CLIENT_HELLO_DO);
     WOLFSSL_ENTER("DoTls13ClientHello");
 
+    XMEMSET(&pv, 0, sizeof(ProtocolVersion));
+    XMEMSET(&clSuites, 0, sizeof(Suites));
+
 #ifdef WOLFSSL_CALLBACKS
     if (ssl->hsInfoOn) AddPacketName(ssl, "ClientHello");
     if (ssl->toInfoOn) AddLateName("ClientHello", &ssl->timeoutInfo);
@@ -3778,21 +4155,54 @@
     XMEMCPY(&pv, input + i, OPAQUE16_LEN);
     ssl->chVersion = pv;   /* store */
     i += OPAQUE16_LEN;
+    if (pv.major < SSLv3_MAJOR) {
+        WOLFSSL_MSG("Legacy version field contains unsupported value");
+ #ifdef WOLFSSL_MYSQL_COMPATIBLE
+        SendAlert(ssl, alert_fatal, wc_protocol_version);
+ #else
+        SendAlert(ssl, alert_fatal, protocol_version);
+ #endif
+        return INVALID_PARAMETER;
+    }
     /* Legacy protocol version cannot negotiate TLS 1.3 or higher. */
-    if (pv.major == SSLv3_MAJOR && pv.minor >= TLSv1_3_MINOR)
+    if (pv.major > SSLv3_MAJOR || (pv.major == SSLv3_MAJOR &&
+                                                   pv.minor >= TLSv1_3_MINOR)) {
+        pv.major = SSLv3_MAJOR;
         pv.minor = TLSv1_2_MINOR;
-
+        wantDowngrade = 1;
+        ssl->version.minor = pv.minor;
+    }
+    /* Legacy version must be [ SSLv3_MAJOR, TLSv1_2_MINOR ] for TLS v1.3 */
+    else if (pv.major == SSLv3_MAJOR && pv.minor < TLSv1_2_MINOR) {
+        wantDowngrade = 1;
+        ssl->version.minor = pv.minor;
+    }
+    else {
+        ret = DoTls13SupportedVersions(ssl, input + begin, i - begin, helloSz,
+                                                                &wantDowngrade);
+        if (ret < 0)
+            return ret;
+    }
+    if (wantDowngrade) {
 #ifndef WOLFSSL_NO_TLS12
-    if (ssl->version.major == SSLv3_MAJOR && ssl->version.minor < TLSv1_3_MINOR)
+        if (!ssl->options.downgrade) {
+            WOLFSSL_MSG("Client trying to connect with lesser version than "
+                        "TLS v1.3");
+            return VERSION_ERROR;
+        }
+
+        if (pv.minor < ssl->options.minDowngrade)
+            return VERSION_ERROR;
+
+        if ((ret = HashInput(ssl, input + begin, helloSz)) != 0)
+            return ret;
         return DoClientHello(ssl, input, inOutIdx, helloSz);
-#endif
-
-#ifdef HAVE_SESSION_TICKET
-    if (ssl->options.downgrade) {
-       if ((ret = HashInput(ssl, input + begin, helloSz)) != 0)
-            return ret;
-    }
-#endif
+#else
+        WOLFSSL_MSG("Client trying to connect with lesser version than "
+                    "TLS v1.3");
+        return VERSION_ERROR;
+#endif
+    }
 
     /* Client random */
     XMEMCPY(ssl->arrays->clientRandom, input + i, RAN_LEN);
@@ -3815,17 +4225,16 @@
     if (sessIdSz != ID_LEN && sessIdSz != 0)
         return INVALID_PARAMETER;
 #endif
+
+    if (sessIdSz + i > helloSz) {
+        return BUFFER_ERROR;
+    }
+
     ssl->session.sessionIDSz = sessIdSz;
     if (sessIdSz == ID_LEN) {
         XMEMCPY(ssl->session.sessionID, input + i, sessIdSz);
         i += ID_LEN;
     }
-#ifndef WOLFSSL_NO_TLS12
-    #ifdef HAVE_SESSION_TICKET
-        if (sessIdSz > 0 && sessIdSz < ID_LEN)
-            bogusID = 1;
-    #endif
-#endif
 
     /* Cipher suites */
     if ((i - begin) + OPAQUE16_LEN > helloSz)
@@ -3841,6 +4250,26 @@
     i += clSuites.suiteSz;
     clSuites.hashSigAlgoSz = 0;
 
+#ifdef HAVE_SERVER_RENEGOTIATION_INFO
+    ret = FindSuite(&clSuites, 0, TLS_EMPTY_RENEGOTIATION_INFO_SCSV);
+    if (ret == SUITES_ERROR)
+        return BUFFER_ERROR;
+    if (ret >= 0) {
+        TLSX* extension;
+
+        /* check for TLS_EMPTY_RENEGOTIATION_INFO_SCSV suite */
+        ret = TLSX_AddEmptyRenegotiationInfo(&ssl->extensions, ssl->heap);
+        if (ret != WOLFSSL_SUCCESS)
+            return ret;
+
+        extension = TLSX_Find(ssl->extensions, TLSX_RENEGOTIATION_INFO);
+        if (extension) {
+            ssl->secure_renegotiation = (SecureRenegotiation*)extension->data;
+            ssl->secure_renegotiation->enabled = 1;
+        }
+    }
+#endif /* HAVE_SERVER_RENEGOTIATION_INFO */
+
     /* Compression */
     b = input[i++];
     if ((i - begin) + b > helloSz)
@@ -3855,27 +4284,26 @@
         return INVALID_PARAMETER;
     }
 
-    if ((i - begin) < helloSz) {
-        if ((i - begin) + OPAQUE16_LEN > helloSz)
-            return BUFFER_ERROR;
-        ato16(&input[i], &totalExtSz);
-        i += OPAQUE16_LEN;
-        if ((i - begin) + totalExtSz > helloSz)
-            return BUFFER_ERROR;
-
-    #ifdef HAVE_QSH
-        QSH_Init(ssl);
-    #endif
-
-        /* Auto populate extensions supported unless user defined. */
-        if ((ret = TLSX_PopulateExtensions(ssl, 1)) != 0)
-            return ret;
-
-        /* Parse extensions */
-        if ((ret = TLSX_Parse(ssl, (byte*)input + i, totalExtSz, client_hello,
+    /* Extensions */
+    if ((i - begin) == helloSz)
+        return BUFFER_ERROR;
+    if ((i - begin) + OPAQUE16_LEN > helloSz)
+        return BUFFER_ERROR;
+
+    ato16(&input[i], &totalExtSz);
+    i += OPAQUE16_LEN;
+    if ((i - begin) + totalExtSz > helloSz)
+        return BUFFER_ERROR;
+
+    /* Auto populate extensions supported unless user defined. */
+    if ((ret = TLSX_PopulateExtensions(ssl, 1)) != 0)
+        return ret;
+
+    /* Parse extensions */
+    if ((ret = TLSX_Parse(ssl, (byte*)input + i, totalExtSz, client_hello,
                                                                   &clSuites))) {
-            return ret;
-        }
+        return ret;
+    }
 
 #if defined(OPENSSL_ALL) || defined(HAVE_STUNNEL) || defined(WOLFSSL_NGINX) || \
                                                         defined(WOLFSSL_HAPROXY)
@@ -3883,53 +4311,36 @@
             return ret;
         ssl->options.side = WOLFSSL_SERVER_END;
 #endif /* OPENSSL_ALL || HAVE_STUNNEL || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
-    }
 
     i += totalExtSz;
     *inOutIdx = i;
 
-    if (TLSX_Find(ssl->extensions, TLSX_SUPPORTED_VERSIONS) == NULL) {
-        if (!ssl->options.downgrade) {
-            WOLFSSL_MSG("Client trying to connect with lesser version than "
-                        "TLS v1.3");
-            return VERSION_ERROR;
-        }
-
-        if (pv.minor < ssl->options.minDowngrade)
-            return VERSION_ERROR;
-        ssl->version.minor = pv.minor;
-    }
-
     ssl->options.sendVerify = SEND_CERT;
 
     ssl->options.clientState = CLIENT_HELLO_COMPLETE;
     ssl->options.haveSessionId = 1;
 
-    if (IsAtLeastTLSv1_3(ssl->version)) {
 #if !defined(WOLFSSL_TLS13_DRAFT_18) && defined(WOLFSSL_SEND_HRR_COOKIE)
-        if (ssl->options.sendCookie &&
+    if (ssl->options.sendCookie &&
               ssl->options.serverState == SERVER_HELLO_RETRY_REQUEST_COMPLETE) {
-            TLSX* ext;
-
-            if ((ext = TLSX_Find(ssl->extensions, TLSX_COOKIE)) == NULL)
-                return HRR_COOKIE_ERROR;
-            /* Ensure the cookie came from client and isn't the one in the
-             * response - HelloRetryRequest.
-             */
-            if (ext->resp == 1)
-                return HRR_COOKIE_ERROR;
-            ret = RestartHandshakeHashWithCookie(ssl, (Cookie*)ext->data);
-            if (ret != 0)
-                return ret;
-        }
-#endif
-
-#if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
-        if (ssl->options.downgrade) {
-            if ((ret = InitHandshakeHashes(ssl)) != 0)
-                return ret;
-        }
-
+        TLSX* ext;
+
+        if ((ext = TLSX_Find(ssl->extensions, TLSX_COOKIE)) == NULL)
+            return HRR_COOKIE_ERROR;
+        /* Ensure the cookie came from client and isn't the one in the
+         * response - HelloRetryRequest.
+         */
+        if (ext->resp == 1)
+            return HRR_COOKIE_ERROR;
+        ret = RestartHandshakeHashWithCookie(ssl, (Cookie*)ext->data);
+        if (ret != 0)
+            return ret;
+    }
+#endif
+
+#if (defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)) && \
+     defined(HAVE_TLS_EXTENSIONS)
+    if (TLSX_Find(ssl->extensions, TLSX_PRE_SHARED_KEY) != NULL) {
         /* Refine list for PSK processing. */
         RefineSuites(ssl, &clSuites);
 
@@ -3937,67 +4348,62 @@
         ret = DoPreSharedKeys(ssl, input + begin, helloSz, &usingPSK);
         if (ret != 0)
             return ret;
-#endif
-    }
-#ifndef WOLFSSL_NO_TLS12
-    else if (ssl->options.resuming) {
-        ret = HandleTlsResumption(ssl, bogusID, &clSuites);
-        if (ret != 0)
+    }
+    else
+#endif
+    {
+#ifdef WOLFSSL_EARLY_DATA
+        ssl->earlyData = no_early_data;
+#endif
+        if ((ret = HashInput(ssl, input + begin, helloSz)) != 0)
             return ret;
-        /* Check wheter resuming has been chosen */
-        if (ssl->options.clientState == CLIENT_KEYEXCHANGE_COMPLETE) {
-            WOLFSSL_LEAVE("DoTls13ClientHello", ret);
-            WOLFSSL_END(WC_FUNC_CLIENT_HELLO_DO);
-
+
+    }
+
+    if (!usingPSK) {
+        if (TLSX_Find(ssl->extensions, TLSX_KEY_SHARE) == NULL) {
+            WOLFSSL_MSG("Client did not send a KeyShare extension");
+            SendAlert(ssl, alert_fatal, missing_extension);
+            return INCOMPLETE_DATA;
+        }
+        if (TLSX_Find(ssl->extensions, TLSX_SIGNATURE_ALGORITHMS) == NULL) {
+            WOLFSSL_MSG("Client did not send a SignatureAlgorithms extension");
+            SendAlert(ssl, alert_fatal, missing_extension);
+            return INCOMPLETE_DATA;
+        }
+
+        if ((ret = MatchSuite(ssl, &clSuites)) < 0) {
+            WOLFSSL_MSG("Unsupported cipher suite, ClientHello");
+            SendAlert(ssl, alert_fatal, handshake_failure);
             return ret;
         }
-    }
-#else
-    else {
-        WOLFSSL_MSG("Negotiated lesser version than TLS v1.3");
-        return VERSION_ERROR;
-    }
-#endif
-
-    if (!usingPSK) {
-        if ((ret = MatchSuite(ssl, &clSuites)) < 0) {
-            WOLFSSL_MSG("Unsupported cipher suite, ClientHello");
-            return ret;
+
+#ifdef HAVE_NULL_CIPHER
+        if (ssl->options.cipherSuite0 == ECC_BYTE &&
+                              (ssl->options.cipherSuite == TLS_SHA256_SHA256 ||
+                               ssl->options.cipherSuite == TLS_SHA384_SHA384)) {
+            ;
         }
-
+        else
+#endif
         /* Check that the negotiated ciphersuite matches protocol version. */
-        if (IsAtLeastTLSv1_3(ssl->version)) {
-            if (ssl->options.cipherSuite0 != TLS13_BYTE) {
-                WOLFSSL_MSG("Negotiated ciphersuite from lesser version than "
-                            "TLS v1.3");
-                return VERSION_ERROR;
-            }
+        if (ssl->options.cipherSuite0 != TLS13_BYTE) {
+            WOLFSSL_MSG("Negotiated ciphersuite from lesser version than "
+                        "TLS v1.3");
+            SendAlert(ssl, alert_fatal, handshake_failure);
+            return VERSION_ERROR;
         }
-        /* VerifyServerSuite handles when version is less than 1.3 */
 
 #ifdef HAVE_SESSION_TICKET
         if (ssl->options.resuming) {
             ssl->options.resuming = 0;
             XMEMSET(ssl->arrays->psk_key, 0, ssl->specs.hash_size);
-            /* May or may not have done any hashing. */
-            if ((ret = InitHandshakeHashes(ssl)) != 0)
-                return ret;
         }
 #endif
 
-#ifdef HAVE_SESSION_TICKET
-        if (IsAtLeastTLSv1_3(ssl->version) || !ssl->options.downgrade)
-#endif
-        {
-            if ((ret = HashInput(ssl, input + begin, helloSz)) != 0)
-                return ret;
-        }
-
-        if (IsAtLeastTLSv1_3(ssl->version)) {
-            /* Derive early secret for handshake secret. */
-            if ((ret = DeriveEarlySecret(ssl)) != 0)
-                return ret;
-        }
+        /* Derive early secret for handshake secret. */
+        if ((ret = DeriveEarlySecret(ssl)) != 0)
+            return ret;
     }
 
     WOLFSSL_LEAVE("DoTls13ClientHello", ret);
@@ -4044,16 +4450,9 @@
     /* Get position in output buffer to write new message to. */
     output = ssl->buffers.outputBuffer.buffer +
              ssl->buffers.outputBuffer.length;
-    /* Add record and hanshake headers. */
+    /* Add record and handshake headers. */
     AddTls13Headers(output, length, hello_retry_request, ssl);
 
-    /* TODO: [TLS13] Replace existing code with code in comment.
-     * Use the TLS v1.3 draft version for now.
-     *
-     * Change to:
-     * output[idx++] = ssl->version.major;
-     * output[idx++] = ssl->version.minor;
-     */
     /* The negotiated protocol version. */
     output[idx++] = TLS_DRAFT_MAJOR;
     output[idx++] = TLS_DRAFT_MINOR;
@@ -4109,6 +4508,7 @@
 
 #ifndef WOLFSSL_TLS13_DRAFT_18
     if (extMsgType == hello_retry_request) {
+        WOLFSSL_MSG("wolfSSL Doing HelloRetryRequest");
         if ((ret = RestartHandshakeHash(ssl)) < 0)
             return ret;
     }
@@ -4144,13 +4544,6 @@
     AddTls13Headers(output, length, server_hello, ssl);
 
 #ifdef WOLFSSL_TLS13_DRAFT_18
-    /* TODO: [TLS13] Replace existing code with code in comment.
-     * Use the TLS v1.3 draft version for now.
-     *
-     * Change to:
-     * output[idx++] = ssl->version.major;
-     * output[idx++] = ssl->version.minor;
-     */
     /* The negotiated protocol version. */
     output[idx++] = TLS_DRAFT_MAJOR;
     output[idx++] = TLS_DRAFT_MINOR;
@@ -4223,7 +4616,11 @@
         ssl->options.serverState = SERVER_HELLO_COMPLETE;
 #endif
 
+#ifdef WOLFSSL_TLS13_DRAFT_18
     if (!ssl->options.groupMessages)
+#else
+    if (!ssl->options.groupMessages || extMsgType != server_hello)
+#endif
         ret = SendBuffered(ssl);
 
     WOLFSSL_LEAVE("SendTls13ServerHello", ret);
@@ -4473,7 +4870,8 @@
 #endif /* NO_WOLFSSL_SERVER */
 
 #ifndef NO_CERTS
-#if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
 /* Encode the signature algorithm into buffer.
  *
  * hashalgo  The hash algorithm.
@@ -4497,6 +4895,14 @@
             (void)hashAlgo;
             break;
 #endif
+#ifdef HAVE_ED448
+        /* ED448: 0x0808 */
+        case ed448_sa_algo:
+            output[0] = ED448_SA_MAJOR;
+            output[1] = ED448_SA_MINOR;
+            (void)hashAlgo;
+            break;
+#endif
 #ifndef NO_RSA
         /* PSS signatures: 0x080[4-6] */
         case rsa_pss_sa_algo:
@@ -4504,7 +4910,6 @@
             output[1] = hashAlgo;
             break;
 #endif
-        /* ED448: 0x0808 */
     }
 }
 
@@ -4512,32 +4917,47 @@
  *
  * input     The encoded signature algorithm.
  * hashalgo  The hash algorithm.
- * hsType   The signature type.
+ * hsType    The signature type.
+ * returns INVALID_PARAMETER if not recognized and 0 otherwise.
  */
-static WC_INLINE void DecodeSigAlg(byte* input, byte* hashAlgo, byte* hsType)
+static WC_INLINE int DecodeTls13SigAlg(byte* input, byte* hashAlgo,
+                                       byte* hsType)
 {
+    int ret = 0;
+
     switch (input[0]) {
         case NEW_SA_MAJOR:
             /* PSS signatures: 0x080[4-6] */
-            if (input[1] <= sha512_mac) {
+            if (input[1] >= sha256_mac && input[1] <= sha512_mac) {
                 *hsType   = input[0];
                 *hashAlgo = input[1];
             }
     #ifdef HAVE_ED25519
             /* ED25519: 0x0807 */
-            if (input[1] == ED25519_SA_MINOR) {
+            else if (input[1] == ED25519_SA_MINOR) {
                 *hsType = ed25519_sa_algo;
                 /* Hash performed as part of sign/verify operation. */
                 *hashAlgo = sha512_mac;
             }
     #endif
+    #ifdef HAVE_ED448
             /* ED448: 0x0808 */
+            else if (input[1] == ED448_SA_MINOR) {
+                *hsType = ed448_sa_algo;
+                /* Hash performed as part of sign/verify operation. */
+                *hashAlgo = sha512_mac;
+            }
+    #endif
+            else
+                ret = INVALID_PARAMETER;
             break;
         default:
             *hashAlgo = input[0];
             *hsType   = input[1];
             break;
     }
+
+    return ret;
 }
 
 /* Get the hash of the messages so far.
@@ -4937,6 +5357,7 @@
             if (ret < 0)
                 return ret;
 
+            extSz = 0;
             ret = TLSX_WriteResponse(ssl, ssl->buffers.certExts->buffer,
                                                            certificate, &extSz);
             if (ret < 0)
@@ -5114,9 +5535,6 @@
 
 typedef struct Scv13Args {
     byte*  output; /* not allocated */
-#ifndef NO_RSA
-    byte*  verifySig;
-#endif
     byte*  verify; /* not allocated */
     word32 idx;
     word32 sigLen;
@@ -5134,12 +5552,6 @@
 
     (void)ssl;
 
-#ifndef NO_RSA
-    if (args->verifySig) {
-        XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_SIGNATURE);
-        args->verifySig = NULL;
-    }
-#endif
     if (args->sigData) {
         XFREE(args->sigData, ssl->heap, DYNAMIC_TYPE_SIGNATURE);
         args->sigData = NULL;
@@ -5198,7 +5610,7 @@
                 return 0;  /* sent blank cert, can't verify */
             }
 
-            args->sendSz = MAX_CERT_VERIFY_SZ;
+            args->sendSz = MAX_CERT_VERIFY_SZ + MAX_MSG_EXTRA;
             /* Always encrypted.  */
             args->sendSz += MAX_MSG_EXTRA;
 
@@ -5250,15 +5662,28 @@
             else if (ssl->hsType == DYNAMIC_TYPE_ED25519)
                 args->sigAlgo = ed25519_sa_algo;
         #endif
+        #ifdef HAVE_ED448
+            else if (ssl->hsType == DYNAMIC_TYPE_ED448)
+                args->sigAlgo = ed448_sa_algo;
+        #endif
             EncodeSigAlg(ssl->suites->hashAlgo, args->sigAlgo, args->verify);
 
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
+                int sigLen = MAX_SIG_DATA_SZ;
+                if (args->length > MAX_SIG_DATA_SZ)
+                    sigLen = args->length;
+                args->sigData = (byte*)XMALLOC(sigLen, ssl->heap,
+                                                        DYNAMIC_TYPE_SIGNATURE);
+            }
+            else {
+                args->sigData = (byte*)XMALLOC(MAX_SIG_DATA_SZ, ssl->heap,
+                                                        DYNAMIC_TYPE_SIGNATURE);
+            }
+            if (args->sigData == NULL) {
+                ERROR_OUT(MEMORY_E, exit_scv);
+            }
+
             /* Create the data to be signed. */
-            args->sigData = (byte*)XMALLOC(MAX_SIG_DATA_SZ, ssl->heap,
-                                                    DYNAMIC_TYPE_SIGNATURE);
-            if (args->sigData == NULL) {
-                ERROR_OUT(MEMORY_E, exit_scv);
-            }
-
             ret = CreateSigData(ssl, args->sigData, &args->sigDataSz, 0);
             if (ret != 0)
                 goto exit_scv;
@@ -5266,9 +5691,9 @@
         #ifndef NO_RSA
             if (ssl->hsType == DYNAMIC_TYPE_RSA) {
                 /* build encoded signature buffer */
-                sig->length = MAX_ENCODED_SIG_SZ;
+                sig->length = WC_MAX_DIGEST_SIZE;
                 sig->buffer = (byte*)XMALLOC(sig->length, ssl->heap,
-                                                    DYNAMIC_TYPE_SIGNATURE);
+                                                        DYNAMIC_TYPE_SIGNATURE);
                 if (sig->buffer == NULL) {
                     ERROR_OUT(MEMORY_E, exit_scv);
                 }
@@ -5304,7 +5729,16 @@
                 }
                 sig->length = ED25519_SIG_SIZE;
             }
-        #endif /* HAVE_ECC */
+        #endif /* HAVE_ED25519 */
+        #ifdef HAVE_ED448
+            if (ssl->hsType == DYNAMIC_TYPE_ED448) {
+                ret = Ed448CheckPubKey(ssl);
+                if (ret < 0) {
+                    ERROR_OUT(ret, exit_scv);
+                }
+                sig->length = ED448_SIG_SIZE;
+            }
+        #endif /* HAVE_ED448 */
 
             /* Advance state and proceed */
             ssl->options.asyncState = TLS_ASYNC_DO;
@@ -5315,9 +5749,10 @@
         {
         #ifdef HAVE_ECC
            if (ssl->hsType == DYNAMIC_TYPE_ECC) {
+
                 ret = EccSign(ssl, args->sigData, args->sigDataSz,
                     args->verify + HASH_SIG_SIZE + VERIFY_HEADER,
-                    &sig->length, (ecc_key*)ssl->hsKey,
+                    (word32*)&sig->length, (ecc_key*)ssl->hsKey,
             #ifdef HAVE_PK_CALLBACKS
                     ssl->buffers.key
             #else
@@ -5331,26 +5766,45 @@
             if (ssl->hsType == DYNAMIC_TYPE_ED25519) {
                 ret = Ed25519Sign(ssl, args->sigData, args->sigDataSz,
                     args->verify + HASH_SIG_SIZE + VERIFY_HEADER,
-                    &sig->length, (ed25519_key*)ssl->hsKey,
+                    (word32*)&sig->length, (ed25519_key*)ssl->hsKey,
             #ifdef HAVE_PK_CALLBACKS
                     ssl->buffers.key
             #else
                     NULL
             #endif
                 );
-                args->length = sig->length;
+                args->length = (word16)sig->length;
+            }
+        #endif
+        #ifdef HAVE_ED448
+            if (ssl->hsType == DYNAMIC_TYPE_ED448) {
+                ret = Ed448Sign(ssl, args->sigData, args->sigDataSz,
+                    args->verify + HASH_SIG_SIZE + VERIFY_HEADER,
+                    (word32*)&sig->length, (ed448_key*)ssl->hsKey,
+            #ifdef HAVE_PK_CALLBACKS
+                    ssl->buffers.key
+            #else
+                    NULL
+            #endif
+                );
+                args->length = (word16)sig->length;
             }
         #endif
         #ifndef NO_RSA
             if (ssl->hsType == DYNAMIC_TYPE_RSA) {
-
-                ret = RsaSign(ssl, sig->buffer, sig->length,
+                ret = RsaSign(ssl, sig->buffer, (word32)sig->length,
                     args->verify + HASH_SIG_SIZE + VERIFY_HEADER, &args->sigLen,
                     args->sigAlgo, ssl->suites->hashAlgo,
                     (RsaKey*)ssl->hsKey,
                     ssl->buffers.key
                 );
-                args->length = (word16)args->sigLen;
+                if (ret == 0) {
+                    args->length = (word16)args->sigLen;
+
+                    XMEMCPY(args->sigData,
+                        args->verify + HASH_SIG_SIZE + VERIFY_HEADER,
+                        args->sigLen);
+                }
             }
         #endif /* !NO_RSA */
 
@@ -5371,20 +5825,9 @@
         {
         #ifndef NO_RSA
             if (ssl->hsType == DYNAMIC_TYPE_RSA) {
-                if (args->verifySig == NULL) {
-                    args->verifySig = (byte*)XMALLOC(args->sigLen, ssl->heap,
-                                                   DYNAMIC_TYPE_SIGNATURE);
-                    if (args->verifySig == NULL) {
-                        ERROR_OUT(MEMORY_E, exit_scv);
-                    }
-                    XMEMCPY(args->verifySig,
-                        args->verify + HASH_SIG_SIZE + VERIFY_HEADER,
-                        args->sigLen);
-                }
-
                 /* check for signature faults */
-                ret = VerifyRsaSign(ssl, args->verifySig, args->sigLen,
-                    sig->buffer, sig->length, args->sigAlgo,
+                ret = VerifyRsaSign(ssl, args->sigData, args->sigLen,
+                    sig->buffer, (word32)sig->length, args->sigAlgo,
                     ssl->suites->hashAlgo, (RsaKey*)ssl->hsKey,
                     ssl->buffers.key
                 );
@@ -5511,7 +5954,8 @@
     return ret;
 }
 
-#if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
 
 typedef struct Dcv13Args {
     byte*  output; /* not allocated */
@@ -5610,7 +6054,10 @@
             if ((args->idx - args->begin) + ENUM_LEN + ENUM_LEN > totalSz) {
                 ERROR_OUT(BUFFER_ERROR, exit_dcv);
             }
-            DecodeSigAlg(input + args->idx, &args->hashAlgo, &args->sigAlgo);
+            ret = DecodeTls13SigAlg(input + args->idx, &args->hashAlgo,
+                                                                &args->sigAlgo);
+            if (ret < 0)
+                goto exit_dcv;
             args->idx += OPAQUE16_LEN;
 
             /* Signature length. */
@@ -5633,6 +6080,11 @@
                 WOLFSSL_MSG("Oops, peer sent ED25519 key but not in verify");
             }
         #endif
+        #ifdef HAVE_ED448
+            if (args->sigAlgo == ed448_sa_algo && !ssl->peerEd448KeyPresent) {
+                WOLFSSL_MSG("Oops, peer sent ED448 key but not in verify");
+            }
+        #endif
         #ifdef HAVE_ECC
             if (args->sigAlgo == ecc_dsa_sa_algo &&
                                                    !ssl->peerEccDsaKeyPresent) {
@@ -5640,8 +6092,11 @@
             }
         #endif
         #ifndef NO_RSA
-            if ((args->sigAlgo == rsa_sa_algo ||
-                 args->sigAlgo == rsa_pss_sa_algo) &&
+            if (args->sigAlgo == rsa_sa_algo) {
+                WOLFSSL_MSG("Oops, peer sent PKCS#1.5 signature");
+                ERROR_OUT(INVALID_PARAMETER, exit_dcv);
+            }
+            if (args->sigAlgo == rsa_pss_sa_algo &&
                          (ssl->peerRsaKey == NULL || !ssl->peerRsaKeyPresent)) {
                 WOLFSSL_MSG("Oops, peer sent RSA key but not in verify");
             }
@@ -5660,7 +6115,7 @@
                 WOLFSSL_MSG("Doing ECC peer cert verify");
 
                 args->sigData = (byte*)XMALLOC(MAX_SIG_DATA_SZ, ssl->heap,
-                                                    DYNAMIC_TYPE_SIGNATURE);
+                                                        DYNAMIC_TYPE_SIGNATURE);
                 if (args->sigData == NULL) {
                     ERROR_OUT(MEMORY_E, exit_dcv);
                 }
@@ -5681,7 +6136,7 @@
                 WOLFSSL_MSG("Doing ED25519 peer cert verify");
 
                 args->sigData = (byte*)XMALLOC(MAX_SIG_DATA_SZ, ssl->heap,
-                                                    DYNAMIC_TYPE_SIGNATURE);
+                                                        DYNAMIC_TYPE_SIGNATURE);
                 if (args->sigData == NULL) {
                     ERROR_OUT(MEMORY_E, exit_dcv);
                 }
@@ -5690,6 +6145,20 @@
                 ret = 0;
             }
         #endif
+        #ifdef HAVE_ED448
+            if (ssl->peerEd448KeyPresent) {
+                WOLFSSL_MSG("Doing ED448 peer cert verify");
+
+                args->sigData = (byte*)XMALLOC(MAX_SIG_DATA_SZ, ssl->heap,
+                                                        DYNAMIC_TYPE_SIGNATURE);
+                if (args->sigData == NULL) {
+                    ERROR_OUT(MEMORY_E, exit_dcv);
+                }
+
+                CreateSigData(ssl, args->sigData, &args->sigDataSz, 1);
+                ret = 0;
+            }
+       #endif
 
             /* Advance state and proceed */
             ssl->options.asyncState = TLS_ASYNC_DO;
@@ -5699,11 +6168,10 @@
         case TLS_ASYNC_DO:
         {
         #ifndef NO_RSA
-            if (args->sigAlgo == rsa_sa_algo ||
-                                             args->sigAlgo == rsa_pss_sa_algo) {
+            if (ssl->peerRsaKey != NULL && ssl->peerRsaKeyPresent != 0) {
                 WOLFSSL_MSG("Doing RSA peer cert verify");
 
-                ret = RsaVerify(ssl, sig->buffer, sig->length, &args->output,
+                ret = RsaVerify(ssl, sig->buffer, (word32)sig->length, &args->output,
                     args->sigAlgo, args->hashAlgo, ssl->peerRsaKey,
                 #ifdef HAVE_PK_CALLBACKS
                     &ssl->buffers.peerRsaKey
@@ -5730,6 +6198,11 @@
                     NULL
                 #endif
                 );
+
+                if (ret >= 0) {
+                    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
+                    ssl->peerEccDsaKeyPresent = 0;
+                }
             }
         #endif /* HAVE_ECC */
         #ifdef HAVE_ED25519
@@ -5745,6 +6218,33 @@
                     NULL
                 #endif
                 );
+
+                if (ret >= 0) {
+                    FreeKey(ssl, DYNAMIC_TYPE_ED25519,
+                                                  (void**)&ssl->peerEd25519Key);
+                    ssl->peerEd25519KeyPresent = 0;
+                }
+            }
+        #endif
+        #ifdef HAVE_ED448
+            if (ssl->peerEd448KeyPresent) {
+                WOLFSSL_MSG("Doing ED448 peer cert verify");
+
+                ret = Ed448Verify(ssl, input + args->idx, args->sz,
+                    args->sigData, args->sigDataSz,
+                    ssl->peerEd448Key,
+                #ifdef HAVE_PK_CALLBACKS
+                    &ssl->buffers.peerEd448Key
+                #else
+                    NULL
+                #endif
+                );
+
+                if (ret >= 0) {
+                    FreeKey(ssl, DYNAMIC_TYPE_ED448,
+                                                    (void**)&ssl->peerEd448Key);
+                    ssl->peerEd448KeyPresent = 0;
+                }
             }
         #endif
 
@@ -5766,6 +6266,9 @@
                                         args->output, args->sendSz);
                 if (ret != 0)
                     goto exit_dcv;
+
+                FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
+                ssl->peerRsaKeyPresent = 0;
             }
         #endif /* !NO_RSA */
 
@@ -5805,14 +6308,14 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     /* Handle async operation */
     if (ret == WC_PENDING_E) {
-        /* Mark message as not recevied so it can process again */
+        /* Mark message as not received so it can process again */
         ssl->msgsReceived.got_certificate_verify = 0;
 
         return ret;
     }
     else
 #endif /* WOLFSSL_ASYNC_CRYPT */
-    if (ret != 0)
+    if (ret != 0 && ret != INVALID_PARAMETER)
         SendAlert(ssl, alert_fatal, decrypt_error);
 
     /* Final cleanup */
@@ -5850,7 +6353,7 @@
         return BUFFER_E;
 
     if (ssl->options.handShakeDone) {
-        ret = DeriveFinishedSecret(ssl, ssl->arrays->clientSecret,
+        ret = DeriveFinishedSecret(ssl, ssl->clientSecret,
                                    ssl->keys.client_write_MAC_secret);
         if (ret != 0)
             return ret;
@@ -5861,12 +6364,12 @@
         /* All the handshake messages have been received to calculate
          * client and server finished keys.
          */
-        ret = DeriveFinishedSecret(ssl, ssl->arrays->clientSecret,
+        ret = DeriveFinishedSecret(ssl, ssl->clientSecret,
                                    ssl->keys.client_write_MAC_secret);
         if (ret != 0)
             return ret;
 
-        ret = DeriveFinishedSecret(ssl, ssl->arrays->serverSecret,
+        ret = DeriveFinishedSecret(ssl, ssl->serverSecret,
                                    ssl->keys.server_write_MAC_secret);
         if (ret != 0)
             return ret;
@@ -5964,7 +6467,7 @@
 
     /* make finished hashes */
     if (ssl->options.handShakeDone) {
-        ret = DeriveFinishedSecret(ssl, ssl->arrays->clientSecret,
+        ret = DeriveFinishedSecret(ssl, ssl->clientSecret,
                                    ssl->keys.client_write_MAC_secret);
         if (ret != 0)
             return ret;
@@ -5977,12 +6480,12 @@
         /* All the handshake messages have been done to calculate client and
          * server finished keys.
          */
-        ret = DeriveFinishedSecret(ssl, ssl->arrays->clientSecret,
+        ret = DeriveFinishedSecret(ssl, ssl->clientSecret,
                                    ssl->keys.client_write_MAC_secret);
         if (ret != 0)
             return ret;
 
-        ret = DeriveFinishedSecret(ssl, ssl->arrays->serverSecret,
+        ret = DeriveFinishedSecret(ssl, ssl->serverSecret,
                                    ssl->keys.server_write_MAC_secret);
         if (ret != 0)
             return ret;
@@ -5999,11 +6502,12 @@
     if (sendSz < 0)
         return BUILD_MSG_ERROR;
 
-    if (!ssl->options.resuming) {
 #ifndef NO_SESSION_CACHE
+    if (!ssl->options.resuming && (ssl->options.side == WOLFSSL_SERVER_END ||
+            (ssl->options.side == WOLFSSL_SERVER_END && ssl->arrays != NULL))) {
         AddSession(ssl);    /* just try */
-#endif
-    }
+    }
+#endif
 
     #ifdef WOLFSSL_CALLBACKS
         if (ssl->hsInfoOn) AddPacketName(ssl, "Finished");
@@ -6180,7 +6684,7 @@
 
     switch (input[i]) {
         case update_not_requested:
-            /* This message in response to any oustanding request. */
+            /* This message in response to any outstanding request. */
             ssl->keys.keyUpdateRespond = 0;
             ssl->keys.updateResponseReq = 0;
             break;
@@ -6190,7 +6694,6 @@
             break;
         default:
             return INVALID_PARAMETER;
-            break;
     }
 
     /* Move index to byte after message. */
@@ -6294,7 +6797,7 @@
         return BUFFER_ERROR;
 
     if (ssl->earlyData == no_early_data) {
-        WOLFSSL_MSG("EndOfEarlyData recieved unexpectedly");
+        WOLFSSL_MSG("EndOfEarlyData received unexpectedly");
         SendAlert(ssl, alert_fatal, unexpected_message);
         return OUT_OF_ORDER_E;
     }
@@ -6323,7 +6826,7 @@
  * inOutIdx  On entry, the index into the message buffer of Finished.
  *           On exit, the index of byte after the Finished message and padding.
  * size      The length of the current handshake message.
- * retuns 0 on success, otherwise failure.
+ * returns 0 on success, otherwise failure.
  */
 static int DoTls13NewSessionTicket(WOLFSSL* ssl, const byte* input,
                                    word32* inOutIdx, word32 size)
@@ -6454,7 +6957,7 @@
  * message.
  *
  * ssl  The SSL/TLS object.
- * retuns 0 on success, otherwise failure.
+ * returns 0 on success, otherwise failure.
  */
 static int ExpectedResumptionSecret(WOLFSSL* ssl)
 {
@@ -6464,7 +6967,7 @@
     Digest      digest;
     static byte header[] = { 0x14, 0x00, 0x00, 0x00 };
 
-    /* Copy the running hash so we cna restore it after. */
+    /* Copy the running hash so we can restore it after. */
     switch (ssl->specs.mac_algorithm) {
     #ifndef NO_SHA256
         case sha256_mac:
@@ -6544,7 +7047,7 @@
  * Message contains the information required to perform resumption.
  *
  * ssl  The SSL/TLS object.
- * retuns 0 on success, otherwise failure.
+ * returns 0 on success, otherwise failure.
  */
 static int SendTls13NewSessionTicket(WOLFSSL* ssl)
 {
@@ -7004,7 +7507,7 @@
 
     WOLFSSL_ENTER("DoTls13HandShakeMsgType");
 
-    /* make sure can read the message */
+    /* make sure we can read the message */
     if (*inOutIdx + size > totalSz)
         return INCOMPLETE_DATA;
 
@@ -7051,10 +7554,10 @@
     /* above checks handshake state */
     switch (type) {
 #ifndef NO_WOLFSSL_CLIENT
-    /* Messages only recieved by client. */
+    /* Messages only received by client. */
     #ifdef WOLFSSL_TLS13_DRAFT_18
     case hello_retry_request:
-        WOLFSSL_MSG("processing hello rety request");
+        WOLFSSL_MSG("processing hello retry request");
         ret = DoTls13HelloRetryRequest(ssl, input, inOutIdx, size);
         break;
     #endif
@@ -7062,6 +7565,18 @@
     case server_hello:
         WOLFSSL_MSG("processing server hello");
         ret = DoTls13ServerHello(ssl, input, inOutIdx, size, &type);
+    #if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
+        if (ssl->options.resuming || !IsAtLeastTLSv1_2(ssl) ||
+                                               IsAtLeastTLSv1_3(ssl->version)) {
+            ssl->options.cacheMessages = 0;
+            if (ssl->hsHashes->messages != NULL) {
+                XFREE(ssl->hsHashes->messages, ssl->heap, DYNAMIC_TYPE_HASHES);
+                ssl->hsHashes->messages = NULL;
+            }
+        }
+    #endif
         break;
 
     case encrypted_extensions:
@@ -7083,7 +7598,7 @@
 #endif /* !NO_WOLFSSL_CLIENT */
 
 #ifndef NO_WOLFSSL_SERVER
-    /* Messages only recieved by server. */
+    /* Messages only received by server. */
     case client_hello:
         WOLFSSL_MSG("processing client hello");
         ret = DoTls13ClientHello(ssl, input, inOutIdx, size);
@@ -7097,7 +7612,7 @@
     #endif
 #endif /* !NO_WOLFSSL_SERVER */
 
-    /* Messages recieved by both client and server. */
+    /* Messages received by both client and server. */
 #ifndef NO_CERTS
     case certificate:
         WOLFSSL_MSG("processing certificate");
@@ -7105,7 +7620,8 @@
         break;
 #endif
 
-#if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if !defined(NO_RSA) || defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
     case certificate_verify:
         WOLFSSL_MSG("processing certificate verify");
         ret = DoTls13CertificateVerify(ssl, input, inOutIdx, size);
@@ -7136,6 +7652,9 @@
                                                            type != key_update) {
         ret = HashInput(ssl, input + inIdx, size);
     }
+    if (ret == 0 && ssl->buffers.inputBuffer.dynamicFlag) {
+        ShrinkInputBuffer(ssl, NO_FORCED_FREE);
+    }
 
     if (ret == BUFFER_ERROR || ret == MISSING_HANDSHAKE_DATA)
         SendAlert(ssl, alert_fatal, decode_error);
@@ -7145,7 +7664,7 @@
         SendAlert(ssl, alert_fatal, illegal_parameter);
     }
 
-    if (ssl->options.tls1_3) {
+    if (ret == 0 && ssl->options.tls1_3) {
         /* Need to hash input message before deriving secrets. */
     #ifndef NO_WOLFSSL_CLIENT
         if (ssl->options.side == WOLFSSL_CLIENT_END) {
@@ -7243,8 +7762,11 @@
         byte   type;
         word32 size;
 
-        if (GetHandshakeHeader(ssl,input,inOutIdx,&type, &size, totalSz) != 0)
+        if (GetHandshakeHeader(ssl, input, inOutIdx, &type, &size,
+                                                                totalSz) != 0) {
+            SendAlert(ssl, alert_fatal, unexpected_message);
             return PARSE_ERROR;
+        }
 
         return DoTls13HandShakeMsgType(ssl, input, inOutIdx, type, size,
                                        totalSz);
@@ -7354,7 +7876,13 @@
         return WOLFSSL_FATAL_ERROR;
     }
 
-    if (ssl->buffers.outputBuffer.length > 0) {
+    if (ssl->buffers.outputBuffer.length > 0
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* do not send buffered or advance state if last error was an 
+            async pending operation */
+        && ssl->error != WC_PENDING_E
+    #endif
+    ) {
         if ((ssl->error = SendBuffered(ssl)) == 0) {
             /* fragOffset is non-zero when sending fragments. On the last
              * fragment, fragOffset is zero again, and the state can be
@@ -7552,6 +8080,10 @@
             }
         #endif /* NO_HANDSHAKE_DONE_CB */
 
+            if (!ssl->options.keepResources) {
+                FreeHandshakeResources(ssl);
+            }
+
             WOLFSSL_LEAVE("wolfSSL_connect_TLSv13()", WOLFSSL_SUCCESS);
             return WOLFSSL_SUCCESS;
 
@@ -7937,6 +8469,85 @@
     return WOLFSSL_SUCCESS;
 }
 
+#ifndef NO_PSK
+void wolfSSL_CTX_set_psk_client_tls13_callback(WOLFSSL_CTX* ctx,
+                                               wc_psk_client_tls13_callback cb)
+{
+    WOLFSSL_ENTER("SSL_CTX_set_psk_client_tls13_callback");
+
+    if (ctx == NULL)
+        return;
+
+    ctx->havePSK = 1;
+    ctx->client_psk_tls13_cb = cb;
+}
+
+
+void wolfSSL_set_psk_client_tls13_callback(WOLFSSL* ssl,
+                                           wc_psk_client_tls13_callback cb)
+{
+    byte haveRSA = 1;
+    int  keySz   = 0;
+
+    WOLFSSL_ENTER("SSL_set_psk_client_tls13_callback");
+
+    if (ssl == NULL)
+        return;
+
+    ssl->options.havePSK = 1;
+    ssl->options.client_psk_tls13_cb = cb;
+
+    #ifdef NO_RSA
+        haveRSA = 0;
+    #endif
+    #ifndef NO_CERTS
+        keySz = ssl->buffers.keySz;
+    #endif
+    InitSuites(ssl->suites, ssl->version, keySz, haveRSA, TRUE,
+               ssl->options.haveDH, ssl->options.haveNTRU,
+               ssl->options.haveECDSAsig, ssl->options.haveECC,
+               ssl->options.haveStaticECC, ssl->options.side);
+}
+
+
+void wolfSSL_CTX_set_psk_server_tls13_callback(WOLFSSL_CTX* ctx,
+                                               wc_psk_server_tls13_callback cb)
+{
+    WOLFSSL_ENTER("SSL_CTX_set_psk_server_tls13_callback");
+    if (ctx == NULL)
+        return;
+    ctx->havePSK = 1;
+    ctx->server_psk_tls13_cb = cb;
+}
+
+
+void wolfSSL_set_psk_server_tls13_callback(WOLFSSL* ssl,
+                                           wc_psk_server_tls13_callback cb)
+{
+    byte haveRSA = 1;
+    int  keySz   = 0;
+
+    WOLFSSL_ENTER("SSL_set_psk_server_tls13_callback");
+    if (ssl == NULL)
+        return;
+
+    ssl->options.havePSK = 1;
+    ssl->options.server_psk_tls13_cb = cb;
+
+    #ifdef NO_RSA
+        haveRSA = 0;
+    #endif
+    #ifndef NO_CERTS
+        keySz = ssl->buffers.keySz;
+    #endif
+    InitSuites(ssl->suites, ssl->version, keySz, haveRSA, TRUE,
+               ssl->options.haveDH, ssl->options.haveNTRU,
+               ssl->options.haveECDSAsig, ssl->options.haveECC,
+               ssl->options.haveStaticECC, ssl->options.side);
+}
+#endif
+
+
 #ifndef NO_WOLFSSL_SERVER
 /* The server accepting a connection from a client.
  * The protocol version is expecting to be TLS v1.3.
@@ -7993,7 +8604,13 @@
     }
 #endif
 
-    if (ssl->buffers.outputBuffer.length > 0) {
+    if (ssl->buffers.outputBuffer.length > 0
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* do not send buffered or advance state if last error was an 
+            async pending operation */
+        && ssl->error != WC_PENDING_E
+    #endif
+    ) {
         if ((ssl->error = SendBuffered(ssl)) == 0) {
             /* fragOffset is non-zero when sending fragments. On the last
              * fragment, fragOffset is zero again, and the state can be
@@ -8016,6 +8633,9 @@
 
     switch (ssl->options.acceptState) {
 
+#ifdef HAVE_SECURE_RENEGOTIATION
+        case TLS13_ACCEPT_BEGIN_RENEG:
+#endif
         case TLS13_ACCEPT_BEGIN :
             /* get client_hello */
             while (ssl->options.clientState < CLIENT_HELLO_COMPLETE) {
@@ -8027,6 +8647,8 @@
 
             ssl->options.acceptState = TLS13_ACCEPT_CLIENT_HELLO_DONE;
             WOLFSSL_MSG("accept state ACCEPT_CLIENT_HELLO_DONE");
+            if (!IsAtLeastTLSv1_3(ssl->version))
+                return wolfSSL_accept(ssl);
             FALL_THROUGH;
 
         case TLS13_ACCEPT_CLIENT_HELLO_DONE :
@@ -8067,6 +8689,7 @@
                     return WOLFSSL_FATAL_ERROR;
                 }
                 ssl->options.sentChangeCipher = 1;
+                ssl->options.serverState = SERVER_HELLO_RETRY_REQUEST_COMPLETE;
             }
     #endif
             ssl->options.acceptState = TLS13_ACCEPT_FIRST_REPLY_DONE;
@@ -8077,7 +8700,7 @@
         case TLS13_ACCEPT_FIRST_REPLY_DONE :
             if (ssl->options.serverState ==
                                           SERVER_HELLO_RETRY_REQUEST_COMPLETE) {
-                ssl->options.clientState = NULL_STATE;
+                ssl->options.clientState = CLIENT_HELLO_RETRY;
                 while (ssl->options.clientState < CLIENT_HELLO_COMPLETE) {
                     if ((ssl->error = ProcessReply(ssl)) < 0) {
                         WOLFSSL_ERROR(ssl->error);
@@ -8191,8 +8814,8 @@
         case TLS13_ACCEPT_FINISHED_SENT :
 #ifdef HAVE_SESSION_TICKET
     #ifdef WOLFSSL_TLS13_TICKET_BEFORE_FINISHED
-            if (!ssl->options.resuming && !ssl->options.verifyPeer &&
-                !ssl->options.noTicketTls13 && ssl->ctx->ticketEncCb != NULL) {
+            if (!ssl->options.verifyPeer && !ssl->options.noTicketTls13 &&
+                                                ssl->ctx->ticketEncCb != NULL) {
                 if ((ssl->error = SendTls13NewSessionTicket(ssl)) != 0) {
                     WOLFSSL_ERROR(ssl->error);
                     return WOLFSSL_FATAL_ERROR;
@@ -8222,8 +8845,7 @@
             }
             else
     #endif
-            if (!ssl->options.resuming &&
-                !ssl->options.noTicketTls13 && ssl->ctx->ticketEncCb != NULL) {
+            if (!ssl->options.noTicketTls13 && ssl->ctx->ticketEncCb != NULL) {
                 if ((ssl->error = SendTls13NewSessionTicket(ssl)) != 0) {
                     WOLFSSL_ERROR(ssl->error);
                     return WOLFSSL_FATAL_ERROR;
@@ -8246,6 +8868,10 @@
             }
 #endif /* NO_HANDSHAKE_DONE_CB */
 
+            if (!ssl->options.keepResources) {
+                FreeHandshakeResources(ssl);
+            }
+
             WOLFSSL_LEAVE("SSL_accept()", WOLFSSL_SUCCESS);
             return WOLFSSL_SUCCESS;
 
@@ -8329,7 +8955,7 @@
     if (ssl->options.handShakeState == NULL_STATE) {
         ssl->earlyData = expecting_early_data;
         ret = wolfSSL_connect_TLSv13(ssl);
-        if (ret <= 0)
+        if (ret != WOLFSSL_SUCCESS)
             return WOLFSSL_FATAL_ERROR;
     }
     if (ssl->options.handShakeState == CLIENT_HELLO_COMPLETE) {
@@ -8401,6 +9027,20 @@
 }
 #endif
 
+#ifdef HAVE_SECRET_CALLBACK
+int wolfSSL_set_tls13_secret_cb(WOLFSSL* ssl, Tls13SecretCb cb, void* ctx)
+{
+    WOLFSSL_ENTER("wolfSSL_set_tls13_secret_cb");
+    if (ssl == NULL)
+        return WOLFSSL_FATAL_ERROR;
+
+    ssl->tls13SecretCb = cb;
+    ssl->tls13SecretCtx = ctx;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
 #undef ERROR_OUT
 
 #endif /* !WOLFCRYPT_ONLY */
--- a/src/wolfio.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/src/wolfio.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wolfio.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,7 +39,7 @@
 #include <wolfssl/wolfio.h>
 
 #if defined(HAVE_HTTP_CLIENT)
-    #include <stdlib.h>   /* atoi(), strtol() */
+    #include <stdlib.h>   /* strtol() */
 #endif
 
 /*
@@ -125,14 +125,24 @@
         return WOLFSSL_CBIO_ERR_GENERAL;
     }
 
+    if (ssl->biord->method && ssl->biord->method->readCb) {
+        WOLFSSL_MSG("Calling custom biord");
+        recvd = ssl->biord->method->readCb(ssl->biord, buf, sz);
+        if (recvd < 0 && recvd != WOLFSSL_CBIO_ERR_WANT_READ)
+            return WOLFSSL_CBIO_ERR_GENERAL;
+        return recvd;
+    }
+
     switch (ssl->biord->type) {
         case WOLFSSL_BIO_MEMORY:
         case WOLFSSL_BIO_BIO:
             if (wolfSSL_BIO_ctrl_pending(ssl->biord) == 0) {
+                WOLFSSL_MSG("BIO want read");
                return WOLFSSL_CBIO_ERR_WANT_READ;
             }
             recvd = wolfSSL_BIO_read(ssl->biord, buf, sz);
             if (recvd <= 0) {
+                WOLFSSL_MSG("BIO general error");
                 return WOLFSSL_CBIO_ERR_GENERAL;
             }
             break;
@@ -161,11 +171,22 @@
 {
     int sent = WOLFSSL_CBIO_ERR_GENERAL;
 
+    WOLFSSL_ENTER("BioSend");
+
     if (ssl->biowr == NULL) {
-        WOLFSSL_MSG("WOLFSSL biowr not set\n");
+        WOLFSSL_MSG("WOLFSSL biowr not set");
         return WOLFSSL_CBIO_ERR_GENERAL;
     }
 
+    if (ssl->biowr->method && ssl->biowr->method->writeCb) {
+        WOLFSSL_MSG("Calling custom biowr");
+        sent = ssl->biowr->method->writeCb(ssl->biowr, buf, sz);
+        if (sent < 0) {
+            return WOLFSSL_CBIO_ERR_GENERAL;
+        }
+        return sent;
+    }
+
     switch (ssl->biowr->type) {
         case WOLFSSL_BIO_MEMORY:
         case WOLFSSL_BIO_BIO:
@@ -238,6 +259,11 @@
     int sd = *(int*)ctx;
     int sent;
 
+#ifdef WOLFSSL_MAX_SEND_SZ
+    if (sz > WOLFSSL_MAX_SEND_SZ)
+        sz = WOLFSSL_MAX_SEND_SZ;
+#endif
+
     sent = wolfIO_Send(sd, buf, sz, ssl->wflags);
     if (sent < 0) {
         int err = wolfSSL_LastError();
@@ -366,12 +392,11 @@
     WOLFSSL_DTLS_CTX* dtlsCtx = (WOLFSSL_DTLS_CTX*)ctx;
     int sd = dtlsCtx->wfd;
     int sent;
-    int len = sz;
     int err;
 
     WOLFSSL_ENTER("EmbedSendTo()");
 
-    sent = (int)SENDTO_FUNCTION(sd, &buf[sz - len], len, ssl->wflags,
+    sent = (int)SENDTO_FUNCTION(sd, buf, sz, ssl->wflags,
                                 (const SOCKADDR*)dtlsCtx->peer.sa,
                                 dtlsCtx->peer.sz);
 
@@ -496,7 +521,7 @@
 
     /* get the peer information in human readable form (ip, port, family)
      * default function assumes BSD sockets
-     * can be overriden with wolfSSL_CTX_SetIOGetPeer
+     * can be overridden with wolfSSL_CTX_SetIOGetPeer
      */
     int EmbedGetPeer(WOLFSSL* ssl, char* ip, int* ipSz,
                                                  unsigned short* port, int* fam)
@@ -552,7 +577,7 @@
 
     /* set the peer information in human readable form (ip, port, family)
      * default function assumes BSD sockets
-     * can be overriden with wolfSSL_CTX_SetIOSetPeer
+     * can be overridden with wolfSSL_CTX_SetIOSetPeer
      */
     int EmbedSetPeer(WOLFSSL* ssl, char* ip, int ipSz,
                                                    unsigned short port, int fam)
@@ -744,7 +769,8 @@
     int ret = 0;
     SOCKADDR_S addr;
     int sockaddr_len = sizeof(SOCKADDR_IN);
-#ifdef HAVE_GETADDRINFO
+    /* use gethostbyname for c99 */
+#if defined(HAVE_GETADDRINFO) && !defined(WOLF_C99)
     ADDRINFO hints;
     ADDRINFO* answer = NULL;
     char strPort[6];
@@ -759,7 +785,8 @@
     printf("TCP Connect: %s:%d\n", ip, port);
 #endif
 
-#ifdef HAVE_GETADDRINFO
+    /* use gethostbyname for c99 */
+#if defined(HAVE_GETADDRINFO) && !defined(WOLF_C99)
     XMEMSET(&hints, 0, sizeof(hints));
     hints.ai_family = AF_UNSPEC;
     hints.ai_socktype = SOCK_STREAM;
@@ -876,7 +903,8 @@
         if (url[cur] == '[') {
             cur++;
             /* copy until ']' */
-            while (url[cur] != 0 && url[cur] != ']' && cur < urlSz) {
+            while (i < MAX_URL_ITEM_SIZE-1 && cur < urlSz && url[cur] != 0 &&
+                    url[cur] != ']') {
                 if (outName)
                     outName[i] = url[cur];
                 i++; cur++;
@@ -884,8 +912,8 @@
             cur++; /* skip ']' */
         }
         else {
-            while (url[cur] != 0 && url[cur] != ':' &&
-                                           url[cur] != '/' && cur < urlSz) {
+            while (i < MAX_URL_ITEM_SIZE-1 && cur < urlSz && url[cur] != 0 &&
+                    url[cur] != ':' && url[cur] != '/') {
                 if (outName)
                     outName[i] = url[cur];
                 i++; cur++;
@@ -901,9 +929,9 @@
             word32 bigPort = 0;
             i = 0;
             cur++;
-            while (cur < urlSz && url[cur] != 0 && url[cur] != '/' &&
-                    i < 6) {
-                port[i++] = url[cur++];
+            while (i < 6 && cur < urlSz && url[cur] != 0 && url[cur] != '/') {
+                port[i] = url[cur];
+                i++; cur++;
             }
 
             for (j = 0; j < i; j++) {
@@ -919,7 +947,7 @@
 
         if (cur < urlSz && url[cur] == '/') {
             i = 0;
-            while (cur < urlSz && url[cur] != 0 && i < MAX_URL_ITEM_SIZE) {
+            while (i < MAX_URL_ITEM_SIZE-1 && cur < urlSz && url[cur] != 0) {
                 if (outPath)
                     outPath[i] = url[cur];
                 i++; cur++;
@@ -938,8 +966,8 @@
     return result;
 }
 
-static int wolfIO_HttpProcessResponseBuf(int sfd, byte **recvBuf, int* recvBufSz,
-    int chunkSz, char* start, int len, int dynType, void* heap)
+static int wolfIO_HttpProcessResponseBuf(int sfd, byte **recvBuf,
+    int* recvBufSz, int chunkSz, char* start, int len, int dynType, void* heap)
 {
     byte* newRecvBuf = NULL;
     int newRecvSz = *recvBufSz + chunkSz;
@@ -950,6 +978,19 @@
     printf("HTTP Chunk %d->%d\n", *recvBufSz, chunkSz);
 #endif
 
+    (void)heap;
+    (void)dynType;
+
+    if (chunkSz < 0 || len < 0) {
+        WOLFSSL_MSG("wolfIO_HttpProcessResponseBuf invalid chunk or length size");
+        return MEMORY_E;
+    }
+
+    if (newRecvSz <= 0) {
+        WOLFSSL_MSG("wolfIO_HttpProcessResponseBuf new receive size overflow");
+        return MEMORY_E;
+    }
+
     newRecvBuf = (byte*)XMALLOC(newRecvSz, heap, dynType);
     if (newRecvBuf == NULL) {
         WOLFSSL_MSG("wolfIO_HttpProcessResponseBuf malloc failed");
@@ -966,8 +1007,15 @@
 
     /* copy the remainder of the httpBuf into the respBuf */
     if (len != 0) {
-        XMEMCPY(&newRecvBuf[pos], start, len);
-        pos += len;
+        if (pos + len <= newRecvSz) {
+            XMEMCPY(&newRecvBuf[pos], start, len);
+            pos += len;
+        }
+        else {
+            WOLFSSL_MSG("wolfIO_HttpProcessResponseBuf bad size");
+            XFREE(newRecvBuf, heap, dynType);
+            return -1;
+        }
     }
 
     /* receive the remainder of chunk */
@@ -1060,6 +1108,12 @@
 
             switch (state) {
                 case phr_init:
+                    if (XSTRLEN(start) < 15) { /* 15 is the length of the two
+                                          constant strings we're about to
+                                          compare against. */
+                        WOLFSSL_MSG("wolfIO_HttpProcessResponse HTTP header too short.");
+                        return -1;
+                    }
                     if (XSTRNCASECMP(start, "HTTP/1", 6) == 0) {
                         start += 9;
                         if (XSTRNCASECMP(start, "200 OK", 6) != 0) {
@@ -1072,11 +1126,17 @@
                 case phr_http_start:
                 case phr_have_length:
                 case phr_have_type:
+                    if (XSTRLEN(start) < 13) { /* 13 is the shortest of the following
+                                          next lines we're checking for. */
+                        WOLFSSL_MSG("wolfIO_HttpProcessResponse content type is too short.");
+                        return -1;
+                    }
+
                     if (XSTRNCASECMP(start, "Content-Type:", 13) == 0) {
                         int i;
 
                         start += 13;
-                        while (*start == ' ' && *start != '\0') start++;
+                        while (*start == ' ') start++;
 
                         /* try and match against appStrList */
                         i = 0;
@@ -1095,13 +1155,13 @@
                     }
                     else if (XSTRNCASECMP(start, "Content-Length:", 15) == 0) {
                         start += 15;
-                        while (*start == ' ' && *start != '\0') start++;
-                        chunkSz = atoi(start);
+                        while (*start == ' ') start++;
+                        chunkSz = XATOI(start);
                         state = (state == phr_http_start) ? phr_have_length : phr_wait_end;
                     }
                     else if (XSTRNCASECMP(start, "Transfer-Encoding:", 18) == 0) {
                         start += 18;
-                        while (*start == ' ' && *start != '\0') start++;
+                        while (*start == ' ') start++;
                         if (XSTRNCASECMP(start, "chunked", 7) == 0) {
                             isChunked = 1;
                             state = (state == phr_http_start) ? phr_have_length : phr_wait_end;
@@ -1139,12 +1199,18 @@
 
     return result;
 }
+int wolfIO_HttpBuildRequest(const char *reqType, const char *domainName,
+                               const char *path, int pathLen, int reqSz, const char *contentType,
+                               byte *buf, int bufSize)
+{
+    return wolfIO_HttpBuildRequest_ex(reqType, domainName, path, pathLen, reqSz, contentType, "", buf, bufSize);
+}
 
-int wolfIO_HttpBuildRequest(const char* reqType, const char* domainName,
-    const char* path, int pathLen, int reqSz, const char* contentType,
-    byte* buf, int bufSize)
-{
-    word32 reqTypeLen, domainNameLen, reqSzStrLen, contentTypeLen, maxLen;
+    int wolfIO_HttpBuildRequest_ex(const char *reqType, const char *domainName,
+                                const char *path, int pathLen, int reqSz, const char *contentType,
+                                const char *exHdrs, byte *buf, int bufSize)
+    {
+    word32 reqTypeLen, domainNameLen, reqSzStrLen, contentTypeLen, exHdrsLen, maxLen;
     char reqSzStr[6];
     char* req = (char*)buf;
     const char* blankStr = " ";
@@ -1152,9 +1218,10 @@
     const char* hostStr = "\r\nHost: ";
     const char* contentLenStr = "\r\nContent-Length: ";
     const char* contentTypeStr = "\r\nContent-Type: ";
+    const char* singleCrLfStr = "\r\n";
     const char* doubleCrLfStr = "\r\n\r\n";
     word32 blankStrLen, http11StrLen, hostStrLen, contentLenStrLen,
-        contentTypeStrLen, doubleCrLfStrLen;
+        contentTypeStrLen, singleCrLfStrLen, doubleCrLfStrLen;
 
     reqTypeLen = (word32)XSTRLEN(reqType);
     domainNameLen = (word32)XSTRLEN(domainName);
@@ -1166,6 +1233,15 @@
     hostStrLen = (word32)XSTRLEN(hostStr);
     contentLenStrLen = (word32)XSTRLEN(contentLenStr);
     contentTypeStrLen = (word32)XSTRLEN(contentTypeStr);
+
+    if(exHdrs){
+        singleCrLfStrLen = (word32)XSTRLEN(singleCrLfStr);
+        exHdrsLen = (word32)XSTRLEN(exHdrs);
+    } else {
+        singleCrLfStrLen = 0;
+        exHdrsLen = 0;
+    }
+
     doubleCrLfStrLen = (word32)XSTRLEN(doubleCrLfStr);
 
     /* determine max length and check it */
@@ -1180,38 +1256,49 @@
         reqSzStrLen +
         contentTypeStrLen +
         contentTypeLen +
+        singleCrLfStrLen +
+        exHdrsLen +
         doubleCrLfStrLen +
         1 /* null term */;
     if (maxLen > (word32)bufSize)
         return 0;
 
-    XSTRNCPY((char*)buf, reqType, reqTypeLen);
-    buf += reqTypeLen;
-    XSTRNCPY((char*)buf, blankStr, blankStrLen+1);
-    buf += blankStrLen;
-    XSTRNCPY((char*)buf, path, pathLen);
-    buf += pathLen;
-    XSTRNCPY((char*)buf, http11Str, http11StrLen+1);
-    buf += http11StrLen;
+    XSTRNCPY((char*)buf, reqType, bufSize);
+    buf += reqTypeLen; bufSize -= reqTypeLen;
+    XSTRNCPY((char*)buf, blankStr, bufSize);
+    buf += blankStrLen; bufSize -= blankStrLen;
+    XSTRNCPY((char*)buf, path, bufSize);
+    buf += pathLen; bufSize -= pathLen;
+    XSTRNCPY((char*)buf, http11Str, bufSize);
+    buf += http11StrLen; bufSize -= http11StrLen;
     if (domainNameLen > 0) {
-        XSTRNCPY((char*)buf, hostStr, hostStrLen+1);
-        buf += hostStrLen;
-        XSTRNCPY((char*)buf, domainName, domainNameLen);
-        buf += domainNameLen;
+        XSTRNCPY((char*)buf, hostStr, bufSize);
+        buf += hostStrLen; bufSize -= hostStrLen;
+        XSTRNCPY((char*)buf, domainName, bufSize);
+        buf += domainNameLen; bufSize -= domainNameLen;
     }
     if (reqSz > 0 && reqSzStrLen > 0) {
-        XSTRNCPY((char*)buf, contentLenStr, contentLenStrLen+1);
-        buf += contentLenStrLen;
-        XSTRNCPY((char*)buf, reqSzStr, reqSzStrLen);
-        buf += reqSzStrLen;
+        XSTRNCPY((char*)buf, contentLenStr, bufSize);
+        buf += contentLenStrLen; bufSize -= contentLenStrLen;
+        XSTRNCPY((char*)buf, reqSzStr, bufSize);
+        buf += reqSzStrLen; bufSize -= reqSzStrLen;
     }
     if (contentTypeLen > 0) {
-        XSTRNCPY((char*)buf, contentTypeStr, contentTypeStrLen+1);
-        buf += contentTypeStrLen;
-        XSTRNCPY((char*)buf, contentType, contentTypeLen);
-        buf += contentTypeLen;
+        XSTRNCPY((char*)buf, contentTypeStr, bufSize);
+        buf += contentTypeStrLen; bufSize -= contentTypeStrLen;
+        XSTRNCPY((char*)buf, contentType, bufSize);
+        buf += contentTypeLen; bufSize -= contentTypeLen;
     }
-    XSTRNCPY((char*)buf, doubleCrLfStr, doubleCrLfStrLen+1);
+    if (exHdrsLen > 0)
+    {
+        XSTRNCPY((char *)buf, singleCrLfStr, bufSize);
+        buf += singleCrLfStrLen;
+        bufSize -= singleCrLfStrLen;
+        XSTRNCPY((char *)buf, exHdrs, bufSize);
+        buf += exHdrsLen;
+        bufSize -= exHdrsLen;
+    }
+    XSTRNCPY((char*)buf, doubleCrLfStr, bufSize);
     buf += doubleCrLfStrLen;
 
 #ifdef WOLFIO_DEBUG
@@ -1228,8 +1315,9 @@
 int wolfIO_HttpBuildRequestOcsp(const char* domainName, const char* path,
                                     int ocspReqSz, byte* buf, int bufSize)
 {
-    return wolfIO_HttpBuildRequest("POST", domainName, path, (int)XSTRLEN(path),
-        ocspReqSz, "application/ocsp-request", buf, bufSize);
+    const char *cacheCtl = "Cache-Control: no-cache";
+    return wolfIO_HttpBuildRequest_ex("POST", domainName, path, (int)XSTRLEN(path),
+        ocspReqSz, "application/ocsp-request", cacheCtl, buf, bufSize);
 }
 
 /* return: >0 OCSP Response Size
@@ -1266,7 +1354,8 @@
     if (path == NULL)
         return MEMORY_E;
 
-    domainName = (char*)XMALLOC(MAX_URL_ITEM_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    domainName = (char*)XMALLOC(MAX_URL_ITEM_SIZE, NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
     if (domainName == NULL) {
         XFREE(path, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         return MEMORY_E;
@@ -1296,7 +1385,7 @@
                                                             httpBuf, httpBufSz);
 
             ret = wolfIO_TcpConnect(&sfd, domainName, port, io_timeout_sec);
-            if ((ret != 0) || (sfd < 0)) {
+            if ((ret != 0) || ((int)sfd < 0)) {
                 WOLFSSL_MSG("OCSP Responder connection failed");
             }
             else if (wolfIO_Send(sfd, (char*)httpBuf, httpBufSz, 0) !=
@@ -1341,8 +1430,9 @@
 int wolfIO_HttpBuildRequestCrl(const char* url, int urlSz,
     const char* domainName, byte* buf, int bufSize)
 {
-    return wolfIO_HttpBuildRequest("GET", domainName, url, urlSz, 0, "",
-        buf, bufSize);
+    const char *cacheCtl = "Cache-Control: no-cache";
+    return wolfIO_HttpBuildRequest_ex("GET", domainName, url, urlSz, 0, "",
+                                   cacheCtl, buf, bufSize);
 }
 
 int wolfIO_HttpProcessResponseCrl(WOLFSSL_CRL* crl, int sfd, byte* httpBuf,
@@ -1430,37 +1520,67 @@
 
 
 
-WOLFSSL_API void wolfSSL_CTX_SetIORecv(WOLFSSL_CTX *ctx, CallbackIORecv CBIORecv)
+void wolfSSL_CTX_SetIORecv(WOLFSSL_CTX *ctx, CallbackIORecv CBIORecv)
 {
-    ctx->CBIORecv = CBIORecv;
+    if (ctx) {
+        ctx->CBIORecv = CBIORecv;
     #ifdef OPENSSL_EXTRA
-    ctx->cbioFlag |= WOLFSSL_CBIO_RECV;
+        ctx->cbioFlag |= WOLFSSL_CBIO_RECV;
     #endif
+    }
+}
+
+
+void wolfSSL_CTX_SetIOSend(WOLFSSL_CTX *ctx, CallbackIOSend CBIOSend)
+{
+    if (ctx) {
+        ctx->CBIOSend = CBIOSend;
+    #ifdef OPENSSL_EXTRA
+        ctx->cbioFlag |= WOLFSSL_CBIO_SEND;
+    #endif
+    }
 }
 
 
-WOLFSSL_API void wolfSSL_CTX_SetIOSend(WOLFSSL_CTX *ctx, CallbackIOSend CBIOSend)
+/* sets the IO callback to use for receives at WOLFSSL level */
+void wolfSSL_SSLSetIORecv(WOLFSSL *ssl, CallbackIORecv CBIORecv)
 {
-    ctx->CBIOSend = CBIOSend;
+    if (ssl) {
+        ssl->CBIORecv = CBIORecv;
     #ifdef OPENSSL_EXTRA
-    ctx->cbioFlag |= WOLFSSL_CBIO_SEND;
+        ssl->cbioFlag |= WOLFSSL_CBIO_RECV;
     #endif
+    }
 }
 
 
-WOLFSSL_API void wolfSSL_SetIOReadCtx(WOLFSSL* ssl, void *rctx)
+/* sets the IO callback to use for sends at WOLFSSL level */
+void wolfSSL_SSLSetIOSend(WOLFSSL *ssl, CallbackIOSend CBIOSend)
 {
-    ssl->IOCB_ReadCtx = rctx;
+    if (ssl) {
+        ssl->CBIOSend = CBIOSend;
+    #ifdef OPENSSL_EXTRA
+        ssl->cbioFlag |= WOLFSSL_CBIO_SEND;
+    #endif
+    }
 }
 
 
-WOLFSSL_API void wolfSSL_SetIOWriteCtx(WOLFSSL* ssl, void *wctx)
+void wolfSSL_SetIOReadCtx(WOLFSSL* ssl, void *rctx)
 {
-    ssl->IOCB_WriteCtx = wctx;
+    if (ssl)
+        ssl->IOCB_ReadCtx = rctx;
 }
 
 
-WOLFSSL_API void* wolfSSL_GetIOReadCtx(WOLFSSL* ssl)
+void wolfSSL_SetIOWriteCtx(WOLFSSL* ssl, void *wctx)
+{
+    if (ssl)
+        ssl->IOCB_WriteCtx = wctx;
+}
+
+
+void* wolfSSL_GetIOReadCtx(WOLFSSL* ssl)
 {
     if (ssl)
         return ssl->IOCB_ReadCtx;
@@ -1469,7 +1589,7 @@
 }
 
 
-WOLFSSL_API void* wolfSSL_GetIOWriteCtx(WOLFSSL* ssl)
+void* wolfSSL_GetIOWriteCtx(WOLFSSL* ssl)
 {
     if (ssl)
         return ssl->IOCB_WriteCtx;
@@ -1478,33 +1598,37 @@
 }
 
 
-WOLFSSL_API void wolfSSL_SetIOReadFlags(WOLFSSL* ssl, int flags)
+void wolfSSL_SetIOReadFlags(WOLFSSL* ssl, int flags)
 {
-    ssl->rflags = flags;
+    if (ssl)
+        ssl->rflags = flags;
 }
 
 
-WOLFSSL_API void wolfSSL_SetIOWriteFlags(WOLFSSL* ssl, int flags)
+void wolfSSL_SetIOWriteFlags(WOLFSSL* ssl, int flags)
 {
-    ssl->wflags = flags;
+    if (ssl)
+        ssl->wflags = flags;
 }
 
 
 #ifdef WOLFSSL_DTLS
 
-WOLFSSL_API void wolfSSL_CTX_SetGenCookie(WOLFSSL_CTX* ctx, CallbackGenCookie cb)
+void wolfSSL_CTX_SetGenCookie(WOLFSSL_CTX* ctx, CallbackGenCookie cb)
 {
-    ctx->CBIOCookie = cb;
+    if (ctx)
+        ctx->CBIOCookie = cb;
 }
 
 
-WOLFSSL_API void wolfSSL_SetCookieCtx(WOLFSSL* ssl, void *ctx)
+void wolfSSL_SetCookieCtx(WOLFSSL* ssl, void *ctx)
 {
-    ssl->IOCB_CookieCtx = ctx;
+    if (ssl)
+        ssl->IOCB_CookieCtx = ctx;
 }
 
 
-WOLFSSL_API void* wolfSSL_GetCookieCtx(WOLFSSL* ssl)
+void* wolfSSL_GetCookieCtx(WOLFSSL* ssl)
 {
     if (ssl)
         return ssl->IOCB_CookieCtx;
@@ -1514,15 +1638,17 @@
 
 #ifdef WOLFSSL_SESSION_EXPORT
 
-WOLFSSL_API void wolfSSL_CTX_SetIOGetPeer(WOLFSSL_CTX* ctx, CallbackGetPeer cb)
+void wolfSSL_CTX_SetIOGetPeer(WOLFSSL_CTX* ctx, CallbackGetPeer cb)
 {
-    ctx->CBGetPeer = cb;
+    if (ctx)
+        ctx->CBGetPeer = cb;
 }
 
 
-WOLFSSL_API void wolfSSL_CTX_SetIOSetPeer(WOLFSSL_CTX* ctx, CallbackSetPeer cb)
+void wolfSSL_CTX_SetIOSetPeer(WOLFSSL_CTX* ctx, CallbackSetPeer cb)
 {
-    ctx->CBSetPeer = cb;
+    if (ctx)
+        ctx->CBSetPeer = cb;
 }
 
 #endif /* WOLFSSL_SESSION_EXPORT */
@@ -1801,12 +1927,11 @@
     WOLFSSL_DTLS_CTX* dtlsCtx = (WOLFSSL_DTLS_CTX*)ctx;
     NET_SOCK_ID sd = dtlsCtx->wfd;
     NET_SOCK_RTN_CODE ret;
-    int len = sz;
     NET_ERR err;
 
     WOLFSSL_ENTER("MicriumSendTo()");
 
-    ret = NetSock_TxDataTo(sd, &buf[sz - len], len, ssl->wflags,
+    ret = NetSock_TxDataTo(sd, buf, sz, ssl->wflags,
                            (NET_SOCK_ADDR*)dtlsCtx->peer.sa,
                            (NET_SOCK_ADDR_LEN)dtlsCtx->peer.sz,
                            &err);
@@ -1858,5 +1983,394 @@
 
 #endif /* MICRIUM */
 
+#if defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+
+#include <os/os_error.h>
+#include <os/os_mbuf.h>
+#include <os/os_mempool.h>
+
+#define MB_NAME "wolfssl_mb"
+
+typedef struct Mynewt_Ctx {
+        struct mn_socket *mnSocket;          /* send/recv socket handler */
+        struct mn_sockaddr_in mnSockAddrIn;  /* socket address */
+        struct os_mbuf *mnPacket;            /* incoming packet handle
+                                                for short reads */
+        int reading;                         /* reading flag */
+
+        /* private */
+        void *mnMemBuffer;                   /* memory buffer for mempool */
+        struct os_mempool mnMempool;         /* mempool */
+        struct os_mbuf_pool mnMbufpool;      /* mbuf pool */
+} Mynewt_Ctx;
+
+void mynewt_ctx_clear(void *ctx) {
+    Mynewt_Ctx *mynewt_ctx = (Mynewt_Ctx*)ctx;
+    if(!mynewt_ctx) return;
+
+    if(mynewt_ctx->mnPacket) {
+        os_mbuf_free_chain(mynewt_ctx->mnPacket);
+        mynewt_ctx->mnPacket = NULL;
+    }
+    os_mempool_clear(&mynewt_ctx->mnMempool);
+    XFREE(mynewt_ctx->mnMemBuffer, 0, 0);
+    XFREE(mynewt_ctx, 0, 0);
+}
+
+/* return Mynewt_Ctx instance */
+void* mynewt_ctx_new() {
+    int rc = 0;
+    Mynewt_Ctx *mynewt_ctx;
+    int mem_buf_count = MYNEWT_VAL(WOLFSSL_MNSOCK_MEM_BUF_COUNT);
+    int mem_buf_size = MYNEWT_VAL(WOLFSSL_MNSOCK_MEM_BUF_SIZE);
+    int mempool_bytes = OS_MEMPOOL_BYTES(mem_buf_count, mem_buf_size);
+
+    mynewt_ctx = (Mynewt_Ctx *)XMALLOC(sizeof(struct Mynewt_Ctx),
+                                       NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if(!mynewt_ctx) return NULL;
+
+    XMEMSET(mynewt_ctx, 0, sizeof(Mynewt_Ctx));
+    mynewt_ctx->mnMemBuffer = XMALLOC(mempool_bytes, 0, 0);
+    if(!mynewt_ctx->mnMemBuffer) {
+        mynewt_ctx_clear((void*)mynewt_ctx);
+        return NULL;
+    }
+
+    rc = os_mempool_init(&mynewt_ctx->mnMempool,
+                         mem_buf_count, mem_buf_size,
+                         mynewt_ctx->mnMemBuffer, MB_NAME);
+    if(rc != 0) {
+        mynewt_ctx_clear((void*)mynewt_ctx);
+        return NULL;
+    }
+    rc = os_mbuf_pool_init(&mynewt_ctx->mnMbufpool, &mynewt_ctx->mnMempool,
+                           mem_buf_count, mem_buf_size);
+    if(rc != 0) {
+        mynewt_ctx_clear((void*)mynewt_ctx);
+        return NULL;
+    }
+
+    return mynewt_ctx;
+}
+
+static void mynewt_sock_writable(void *arg, int err);
+static void mynewt_sock_readable(void *arg, int err);
+static const union mn_socket_cb mynewt_sock_cbs = {
+    .socket.writable = mynewt_sock_writable,
+    .socket.readable = mynewt_sock_readable,
+};
+static void mynewt_sock_writable(void *arg, int err)
+{
+    /* do nothing */
+}
+static void mynewt_sock_readable(void *arg, int err)
+{
+    Mynewt_Ctx *mynewt_ctx = (Mynewt_Ctx *)arg;
+    if (err && mynewt_ctx->reading) {
+        mynewt_ctx->reading = 0;
+    }
+}
+
+/* The Mynewt receive callback
+ *  return :  bytes read, or error
+ */
+int Mynewt_Receive(WOLFSSL *ssl, char *buf, int sz, void *ctx)
+{
+    Mynewt_Ctx *mynewt_ctx = (Mynewt_Ctx*)ctx;
+    int rc = 0;
+    struct mn_sockaddr_in from;
+    struct os_mbuf *m;
+    int read_sz = 0;
+    uint16_t total;
+
+    if (mynewt_ctx == NULL || mynewt_ctx->mnSocket == NULL) {
+        WOLFSSL_MSG("Mynewt Recv NULL parameters");
+        return WOLFSSL_CBIO_ERR_GENERAL;
+    }
+
+    if(mynewt_ctx->mnPacket == NULL) {
+        mynewt_ctx->mnPacket = os_mbuf_get_pkthdr(&mynewt_ctx->mnMbufpool, 0);
+        if(mynewt_ctx->mnPacket == NULL) {
+            return MEMORY_E;
+        }
+
+        mynewt_ctx->reading = 1;
+        while(mynewt_ctx->reading && rc == 0) {
+            rc = mn_recvfrom(mynewt_ctx->mnSocket, &m, (struct mn_sockaddr *) &from);
+            if(rc == MN_ECONNABORTED) {
+                rc = 0;
+                mynewt_ctx->reading = 0;
+                break;
+            }
+            if (!(rc == 0 || rc == MN_EAGAIN)) {
+                WOLFSSL_MSG("Mynewt Recv receive error");
+                mynewt_ctx->reading = 0;
+                break;
+            }
+            if(rc == 0) {
+                int len = OS_MBUF_PKTLEN(m);
+                if(len == 0) {
+                    break;
+                }
+                rc = os_mbuf_appendfrom(mynewt_ctx->mnPacket, m, 0, len);
+                if(rc != 0) {
+                    WOLFSSL_MSG("Mynewt Recv os_mbuf_appendfrom error");
+                    break;
+                }
+                os_mbuf_free_chain(m);
+                m = NULL;
+            } else if(rc == MN_EAGAIN) {
+                /* continue to until reading all of packet data. */
+                rc = 0;
+                break;
+            }
+        }
+        if(rc != 0) {
+            mynewt_ctx->reading = 0;
+            os_mbuf_free_chain(mynewt_ctx->mnPacket);
+            mynewt_ctx->mnPacket = NULL;
+            return rc;
+        }
+    }
+
+    if(mynewt_ctx->mnPacket) {
+        total = OS_MBUF_PKTLEN(mynewt_ctx->mnPacket);
+        read_sz = (total >= sz)? sz : total;
+
+        os_mbuf_copydata(mynewt_ctx->mnPacket, 0, read_sz, (void*)buf);
+        os_mbuf_adj(mynewt_ctx->mnPacket, read_sz);
+
+        if (read_sz == total) {
+            WOLFSSL_MSG("Mynewt Recv Drained packet");
+            os_mbuf_free_chain(mynewt_ctx->mnPacket);
+            mynewt_ctx->mnPacket = NULL;
+        }
+    }
+
+    return read_sz;
+}
+
+/* The Mynewt send callback
+ *  return : bytes sent, or error
+ */
+int Mynewt_Send(WOLFSSL* ssl, char *buf, int sz, void *ctx)
+{
+    Mynewt_Ctx *mynewt_ctx = (Mynewt_Ctx*)ctx;
+    int rc = 0;
+    struct os_mbuf *m;
+    int write_sz = 0;
+    m = os_msys_get_pkthdr(sz, 0);
+    if (!m) {
+        WOLFSSL_MSG("Mynewt Send os_msys_get_pkthdr error");
+        return WOLFSSL_CBIO_ERR_GENERAL;
+    }
+    rc = os_mbuf_copyinto(m, 0, buf, sz);
+    if (rc != 0) {
+        WOLFSSL_MSG("Mynewt Send os_mbuf_copyinto error");
+        os_mbuf_free_chain(m);
+        return rc;
+    }
+    rc = mn_sendto(mynewt_ctx->mnSocket, m, (struct mn_sockaddr *)&mynewt_ctx->mnSockAddrIn);
+    if(rc != 0) {
+        WOLFSSL_MSG("Mynewt Send mn_sendto error");
+        os_mbuf_free_chain(m);
+        return rc;
+    }
+    write_sz = sz;
+    return write_sz;
+}
+
+/* like set_fd, but for default NetX context */
+void wolfSSL_SetIO_Mynewt(WOLFSSL* ssl, struct mn_socket* mnSocket, struct mn_sockaddr_in* mnSockAddrIn)
+{
+    if (ssl && ssl->mnCtx) {
+        Mynewt_Ctx *mynewt_ctx = (Mynewt_Ctx *)ssl->mnCtx;
+        mynewt_ctx->mnSocket = mnSocket;
+        memcpy(&mynewt_ctx->mnSockAddrIn, mnSockAddrIn, sizeof(struct mn_sockaddr_in));
+        mn_socket_set_cbs(mynewt_ctx->mnSocket, mnSocket, &mynewt_sock_cbs);
+    }
+}
+
+#endif /* defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP) */
+
+#ifdef WOLFSSL_UIP
+#include <uip.h>
+#include <stdio.h>
+
+/* uIP TCP/IP port, using the native tcp/udp socket api.
+ * TCP and UDP are currently supported with the callbacks below.
+ *
+ */
+/* The uIP tcp send callback
+ * return : bytes sent, or error
+ */
+int uIPSend(WOLFSSL* ssl, char* buf, int sz, void* _ctx)
+{
+    uip_wolfssl_ctx *ctx = (struct uip_wolfssl_ctx *)_ctx;
+    int ret;
+    unsigned int max_sendlen;
+    int total_written = 0;
+    (void)ssl;
+    do {
+        unsigned int bytes_left = sz - total_written;
+        max_sendlen = tcp_socket_max_sendlen(&ctx->conn.tcp);
+        if (bytes_left > max_sendlen) {
+            printf("Send limited by buffer\r\n");
+            bytes_left = max_sendlen;
+        }
+        if (bytes_left == 0) {
+            printf("Buffer full!\r\n");
+            break;
+        }
+        ret = tcp_socket_send(&ctx->conn.tcp, (unsigned char *)buf + total_written, bytes_left);
+        if (ret <= 0)
+            break;
+        total_written += ret;
+    } while(total_written < sz);
+    if (total_written == 0)
+        return WOLFSSL_CBIO_ERR_WANT_WRITE;
+    return total_written;
+}
+
+int uIPSendTo(WOLFSSL* ssl, char* buf, int sz, void* _ctx)
+{
+    uip_wolfssl_ctx *ctx = (struct uip_wolfssl_ctx *)_ctx;
+    int ret = 0;
+    (void)ssl;
+    ret = udp_socket_sendto(&ctx->conn.udp, (unsigned char *)buf, sz, &ctx->peer_addr, ctx->peer_port );
+    if (ret == 0)
+        return WOLFSSL_CBIO_ERR_WANT_WRITE;
+    return ret;
+}
+
+/* The uIP uTCP/IP receive callback
+ *  return : nb bytes read, or error
+ */
+int uIPReceive(WOLFSSL *ssl, char *buf, int sz, void *_ctx)
+{
+    uip_wolfssl_ctx *ctx = (uip_wolfssl_ctx *)_ctx;
+    if (!ctx || !ctx->ssl_rx_databuf)
+        return -1;
+    (void)ssl;
+    if (ctx->ssl_rb_len > 0) {
+        if (sz > ctx->ssl_rb_len - ctx->ssl_rb_off)
+            sz = ctx->ssl_rb_len - ctx->ssl_rb_off;
+        XMEMCPY(buf, ctx->ssl_rx_databuf + ctx->ssl_rb_off, sz);
+        ctx->ssl_rb_off += sz;
+        if (ctx->ssl_rb_off >= ctx->ssl_rb_len) {
+            ctx->ssl_rb_len = 0;
+            ctx->ssl_rb_off = 0;
+        }
+        return sz;
+    } else {
+        return WOLFSSL_CBIO_ERR_WANT_READ;
+    }
+}
+
+/* uIP DTLS Generate Cookie callback
+ *  return : number of bytes copied into buf, or error
+ */
+int uIPGenerateCookie(WOLFSSL* ssl, byte *buf, int sz, void *_ctx)
+{
+    uip_wolfssl_ctx *ctx = (uip_wolfssl_ctx *)_ctx;
+    byte token[32];
+    byte digest[WC_SHA_DIGEST_SIZE];
+    int  ret = 0;
+    XMEMSET(token, 0, sizeof(token));
+    XMEMCPY(token, &ctx->peer_addr, sizeof(uip_ipaddr_t));
+    XMEMCPY(token + sizeof(uip_ipaddr_t), &ctx->peer_port, sizeof(word16));
+    ret = wc_ShaHash(token, sizeof(uip_ipaddr_t) + sizeof(word16), digest);
+    if (ret != 0)
+        return ret;
+    if (sz > WC_SHA_DIGEST_SIZE)
+        sz = WC_SHA_DIGEST_SIZE;
+    XMEMCPY(buf, digest, sz);
+    return sz;
+}
+
+#endif /* WOLFSSL_UIP */
+
+#ifdef WOLFSSL_GNRC
+
+#include <net/sock.h>
+#include <net/sock/tcp.h>
+#include <stdio.h>
+
+/* GNRC TCP/IP port, using the native tcp/udp socket api.
+ * TCP and UDP are currently supported with the callbacks below.
+ *
+ */
+/* The GNRC tcp send callback
+ * return : bytes sent, or error
+ */
+
+int GNRC_SendTo(WOLFSSL* ssl, char* buf, int sz, void* _ctx)
+{
+    sock_tls_t *ctx = (sock_tls_t *)_ctx;
+    int ret = 0;
+    (void)ssl;
+    if (!ctx)
+        return WOLFSSL_CBIO_ERR_GENERAL;
+    ret = sock_udp_send(&ctx->conn.udp, (unsigned char *)buf, sz, &ctx->peer_addr);
+    if (ret == 0)
+        return WOLFSSL_CBIO_ERR_WANT_WRITE;
+    return ret;
+}
+
+/* The GNRC TCP/IP receive callback
+ *  return : nb bytes read, or error
+ */
+int GNRC_ReceiveFrom(WOLFSSL *ssl, char *buf, int sz, void *_ctx)
+{
+    sock_udp_ep_t ep;
+    int ret;
+    uint32_t timeout = wolfSSL_dtls_get_current_timeout(ssl) * 1000000;
+    sock_tls_t *ctx = (sock_tls_t *)_ctx;
+    if (!ctx)
+        return WOLFSSL_CBIO_ERR_GENERAL;
+    (void)ssl;
+    if (wolfSSL_get_using_nonblock(ctx->ssl)) {
+        timeout = 0;
+    }
+    ret = sock_udp_recv(&ctx->conn.udp, buf, sz, timeout, &ep);
+    if (ret > 0) {
+        if (ctx->peer_addr.port == 0)
+            XMEMCPY(&ctx->peer_addr, &ep, sizeof(sock_udp_ep_t));
+    }
+    if (ret == -ETIMEDOUT) {
+        return WOLFSSL_CBIO_ERR_WANT_READ;
+    }
+    return ret;
+}
+
+/* GNRC DTLS Generate Cookie callback
+ *  return : number of bytes copied into buf, or error
+ */
+#define GNRC_MAX_TOKEN_SIZE (32)
+int GNRC_GenerateCookie(WOLFSSL* ssl, byte *buf, int sz, void *_ctx)
+{
+    sock_tls_t *ctx = (sock_tls_t *)_ctx;
+    if (!ctx)
+        return WOLFSSL_CBIO_ERR_GENERAL;
+    byte token[GNRC_MAX_TOKEN_SIZE];
+    byte digest[WC_SHA_DIGEST_SIZE];
+    int  ret = 0;
+    size_t token_size = sizeof(sock_udp_ep_t);
+    (void)ssl;
+    if (token_size > GNRC_MAX_TOKEN_SIZE)
+        token_size = GNRC_MAX_TOKEN_SIZE;
+    XMEMSET(token, 0, GNRC_MAX_TOKEN_SIZE);
+    XMEMCPY(token, &ctx->peer_addr, token_size);
+    ret = wc_ShaHash(token, token_size, digest);
+    if (ret != 0)
+        return ret;
+    if (sz > WC_SHA_DIGEST_SIZE)
+        sz = WC_SHA_DIGEST_SIZE;
+    XMEMCPY(buf, digest, sz);
+    return sz;
+}
+
+#endif /* WOLFSSL_GNRC */
+
 #endif /* WOLFCRYPT_ONLY */
 
--- a/user_settings.h	Sat Aug 18 22:20:43 2018 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-    //#include <stddef.h>
-    //#define MBED
-    #define WOLFSSL_CMSIS_RTOS    
-    #define WOLFSSL_USER_IO
-    #define NO_WRITEV
-    #define NO_DEV_RANDOM
-    #define HAVE_ECC
-    #define HAVE_AESGCM
-    
-    #define WOLFSSL_SHA384
-    #define WOLFSSL_SHA512
-    #define HAVE_CURVE25519
-    #define HAVE_ED25519   /* with HAVE_SHA512 */
-    //#define HAVE_POLY1305
-    //#define HAVE_CHACHA
-    //#define HAVE_ONE_TIME_AUTH
-    
-    #define NO_SESSION_CACHE // For Small RAM
-
-    #define NO_WOLFSSL_DIR  
-    #define DEBUG_WOLFSSL
-
-    #define HAVE_SUPPORTED_CURVES    
-    #define HAVE_TLS_EXTENSIONS
-    #define HAVE_HKDF
-    #define WC_RSA_PSS
-    #define WOLFSSL_TLS13
-    
-    #define SIZEOF_LONG_LONG  8
-    /* Options for Sample program */
-    //#define WOLFSSL_NO_VERIFYSERVER
-    //#define NO_FILESYSTEM
-    //#ifndef WOLFSSL_NO_VERIFYSERVER
-        #define TIME_OVERRIDES
-        #define HAVE_TM_TYPE
-        #define XTIME time
-        #define XGMTIME localtime
-    //#endif
\ No newline at end of file
--- a/wolfcrypt/src/aes.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/aes.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* aes.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -29,6 +29,8 @@
 
 #if !defined(NO_AES)
 
+/* Tip: Locate the software cipher modes by searching for "Software AES" */
+
 #if defined(HAVE_FIPS) && \
     defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
 
@@ -44,6 +46,10 @@
 #include <wolfssl/wolfcrypt/aes.h>
 #include <wolfssl/wolfcrypt/cpuid.h>
 
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
 
 /* fips wrapper calls, user can call direct */
 #if defined(HAVE_FIPS) && \
@@ -136,9 +142,9 @@
                                       byte* authTag, word32 authTagSz,
                                       const byte* authIn, word32 authInSz)
         {
-            if (aes == NULL || authTagSz > AES_BLOCK_SIZE
-                                    || authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ ||
-                                    ivSz > AES_BLOCK_SIZE) {
+            if (aes == NULL || authTagSz > AES_BLOCK_SIZE ||
+                        authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ ||
+                        ivSz == 0 || ivSz > AES_BLOCK_SIZE) {
                 return BAD_FUNC_ARG;
             }
 
@@ -154,7 +160,7 @@
             {
                 if (aes == NULL || out == NULL || in == NULL || iv == NULL
                         || authTag == NULL || authTagSz > AES_BLOCK_SIZE ||
-                        ivSz > AES_BLOCK_SIZE) {
+                        ivSz == 0 || ivSz > AES_BLOCK_SIZE) {
                     return BAD_FUNC_ARG;
                 }
 
@@ -227,11 +233,14 @@
         #endif /* HAVE_AES_DECRYPT */
     #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */
 
-    int  wc_AesInit(Aes* aes, void* h, int i)
+    int wc_AesInit(Aes* aes, void* h, int i)
     {
-        (void)aes;
+        if (aes == NULL)
+            return BAD_FUNC_ARG;
+
         (void)h;
         (void)i;
+
         /* FIPS doesn't support:
             return AesInit(aes, h, i); */
         return 0;
@@ -279,106 +288,64 @@
 
 /* Define AES implementation includes and functions */
 #if defined(STM32_CRYPTO)
-     /* STM32F2/F4 hardware AES support for CBC, CTR modes */
-
-    #ifdef WOLFSSL_STM32L4
-        #define CRYP AES
-    #endif
-
-    /* CRYPT_AES_GCM starts the IV with 2 */
-    #define STM32_GCM_IV_START 2
+     /* STM32F2/F4/F7/L4 hardware AES support for ECB, CBC, CTR and GCM modes */
 
 #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+
     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     {
         int ret = 0;
     #ifdef WOLFSSL_STM32_CUBEMX
         CRYP_HandleTypeDef hcryp;
-
-        XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-        switch(aes->rounds) {
-            case 10: /* 128-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-                break;
-	#ifdef CRYP_KEYSIZE_192B
-            case 12: /* 192-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-                break;
-	#endif
-            case 14: /* 256-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-                break;
-            default:
-                break;
-        }
-        hcryp.Instance = CRYP;
-        hcryp.Init.DataType = CRYP_DATATYPE_8B;
-        hcryp.Init.pKey = (uint8_t*)aes->key;
-
+    #else
+        CRYP_InitTypeDef cryptInit;
+        CRYP_KeyInitTypeDef keyInit;
+    #endif
+
+    #ifdef WOLFSSL_STM32_CUBEMX
+        ret = wc_Stm32_Aes_Init(aes, &hcryp);
+        if (ret != 0)
+            return ret;
+
+    #ifdef STM32_CRYPTO_AES_ONLY
+        hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+        hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_ECB;
+        hcryp.Init.KeyWriteFlag  = CRYP_KEY_WRITE_ENABLE;
+    #elif defined(STM32_HAL_V2)
+        hcryp.Init.Algorithm  = CRYP_AES_ECB;
+    #endif
         HAL_CRYP_Init(&hcryp);
 
-        if (HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
-                                    outBlock, STM32_HAL_TIMEOUT) != HAL_OK) {
-            ret = WC_TIMEOUT_E;
-        }
-
-        HAL_CRYP_DeInit(&hcryp);
+    #ifdef STM32_CRYPTO_AES_ONLY
+        ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+            outBlock, STM32_HAL_TIMEOUT);
+    #elif defined(STM32_HAL_V2)
+        ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE,
+            (uint32_t*)outBlock, STM32_HAL_TIMEOUT);
     #else
-        word32 *enc_key;
-        CRYP_InitTypeDef AES_CRYP_InitStructure;
-        CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
-
-        enc_key = aes->key;
-
-        /* crypto structure initialization */
-        CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
-        CRYP_StructInit(&AES_CRYP_InitStructure);
+        ret = HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+            outBlock, STM32_HAL_TIMEOUT);
+    #endif
+        if (ret != HAL_OK) {
+            ret = WC_TIMEOUT_E;
+        }
+        HAL_CRYP_DeInit(&hcryp);
+
+    #else /* STD_PERI_LIB */
+        ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+        if (ret != 0)
+            return ret;
 
         /* reset registers to their default values */
         CRYP_DeInit();
 
-        /* load key into correct registers */
-        switch (aes->rounds) {
-            case 10: /* 128-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
-                break;
-
-            case 12: /* 192-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
-                break;
-
-            case 14: /* 256-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
-                AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
-                break;
-
-            default:
-                break;
-        }
-        CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
-
-        /* set direction, mode, and datatype */
-        AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
-        AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
-        AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-        CRYP_Init(&AES_CRYP_InitStructure);
+        /* setup key */
+        CRYP_KeyInit(&keyInit);
+
+        /* set direction and mode */
+        cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
+        cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
+        CRYP_Init(&cryptInit);
 
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
@@ -402,6 +369,7 @@
         /* disable crypto processor */
         CRYP_Cmd(DISABLE);
     #endif /* WOLFSSL_STM32_CUBEMX */
+
         return ret;
     }
 #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */
@@ -413,103 +381,64 @@
         int ret = 0;
     #ifdef WOLFSSL_STM32_CUBEMX
         CRYP_HandleTypeDef hcryp;
-
-        XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-        switch(aes->rounds) {
-            case 10: /* 128-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-                break;
-	#ifdef CRYP_KEYSIZE_192B
-            case 12: /* 192-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-                break;
-	#endif
-            case 14: /* 256-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-                break;
-            default:
-                break;
-        }
-        hcryp.Instance = CRYP;
-        hcryp.Init.DataType = CRYP_DATATYPE_8B;
-        hcryp.Init.pKey = (uint8_t*)aes->key;
-
+    #else
+        CRYP_InitTypeDef cryptInit;
+        CRYP_KeyInitTypeDef keyInit;
+    #endif
+
+    #ifdef WOLFSSL_STM32_CUBEMX
+        ret = wc_Stm32_Aes_Init(aes, &hcryp);
+        if (ret != 0)
+            return ret;
+
+    #ifdef STM32_CRYPTO_AES_ONLY
+        hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT;
+        hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_ECB;
+        hcryp.Init.KeyWriteFlag  = CRYP_KEY_WRITE_ENABLE;
+    #elif defined(STM32_HAL_V2)
+        hcryp.Init.Algorithm  = CRYP_AES_ECB;
+    #endif
         HAL_CRYP_Init(&hcryp);
 
-        if (HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
-                                       outBlock, STM32_HAL_TIMEOUT) != HAL_OK) {
-            ret = WC_TIMEOUT_E;
-        }
-
-        HAL_CRYP_DeInit(&hcryp);
+    #ifdef STM32_CRYPTO_AES_ONLY
+        ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+            outBlock, STM32_HAL_TIMEOUT);
+    #elif defined(STM32_HAL_V2)
+        ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE,
+            (uint32_t*)outBlock, STM32_HAL_TIMEOUT);
     #else
-        word32 *enc_key;
-        CRYP_InitTypeDef AES_CRYP_InitStructure;
-        CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
-
-        enc_key = aes->key;
-
-        /* crypto structure initialization */
-        CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
-        CRYP_StructInit(&AES_CRYP_InitStructure);
+        ret = HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+            outBlock, STM32_HAL_TIMEOUT);
+    #endif
+        if (ret != HAL_OK) {
+            ret = WC_TIMEOUT_E;
+        }
+        HAL_CRYP_DeInit(&hcryp);
+
+    #else /* STD_PERI_LIB */
+        ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+        if (ret != 0)
+            return ret;
 
         /* reset registers to their default values */
         CRYP_DeInit();
 
-        /* load key into correct registers */
-        switch (aes->rounds) {
-            case 10: /* 128-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
-                break;
-
-            case 12: /* 192-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
-                break;
-
-            case 14: /* 256-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
-                AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
-                break;
-
-            default:
-                break;
-        }
-        CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
-
-        /* set direction, key, and datatype */
-        AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
-        AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
-        AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-        CRYP_Init(&AES_CRYP_InitStructure);
+        /* set direction and key */
+        CRYP_KeyInit(&keyInit);
+        cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
+        cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
+        CRYP_Init(&cryptInit);
 
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
 
-        /* wait until decrypt key has been intialized */
+        /* wait until decrypt key has been initialized */
         while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
 
-        /* set direction, mode, and datatype */
-        AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
-        AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
-        AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-        CRYP_Init(&AES_CRYP_InitStructure);
+        /* set direction and mode */
+        cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
+        cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
+        CRYP_Init(&cryptInit);
 
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
@@ -533,6 +462,7 @@
         /* disable crypto processor */
         CRYP_Cmd(DISABLE);
     #endif /* WOLFSSL_STM32_CUBEMX */
+
         return ret;
     }
     #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */
@@ -659,6 +589,24 @@
         #error nRF51 AES Hardware does not support decrypt
     #endif /* HAVE_AES_DECRYPT */
 
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+
+    #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h"
+
+    #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)
+    static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+    {
+        return wc_esp32AesEncrypt(aes, inBlock, outBlock);
+    }
+    #endif
+
+    #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)
+    static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+    {
+       return wc_esp32AesDecrypt(aes, inBlock, outBlock);
+    }
+    #endif
 
 #elif defined(WOLFSSL_AESNI)
 
@@ -672,10 +620,12 @@
         #define AESNI_ALIGN 16
     #endif
 
-    #ifndef _MSC_VER
+    #ifdef _MSC_VER
+        #define XASM_LINK(f)
+    #elif defined(__APPLE__)
+        #define XASM_LINK(f) asm("_" f)
+    #else
         #define XASM_LINK(f) asm(f)
-    #else
-        #define XASM_LINK(f)
     #endif /* _MSC_VER */
 
     static int checkAESNI = 0;
@@ -811,15 +761,164 @@
         }
     #endif /* HAVE_AES_DECRYPT */
 
-#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+#elif (defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)) || \
+      ((defined(WOLFSSL_AFALG) || defined(WOLFSSL_DEVCRYPTO_AES)) && \
+        defined(HAVE_AESCCM))
         static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         {
             wc_AesEncryptDirect(aes, outBlock, inBlock);
             return 0;
         }
+
+#elif defined(WOLFSSL_AFALG)
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+    #include "hal_data.h"
+
+    #ifndef WOLFSSL_SCE_AES256_HANDLE
+        #define WOLFSSL_SCE_AES256_HANDLE g_sce_aes_256
+    #endif
+
+    #ifndef WOLFSSL_SCE_AES192_HANDLE
+        #define WOLFSSL_SCE_AES192_HANDLE g_sce_aes_192
+    #endif
+
+    #ifndef WOLFSSL_SCE_AES128_HANDLE
+        #define WOLFSSL_SCE_AES128_HANDLE g_sce_aes_128
+    #endif
+
+    static int AES_ECB_encrypt(Aes* aes, const byte* inBlock, byte* outBlock,
+            int sz)
+    {
+        uint32_t ret;
+
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+                CRYPTO_WORD_ENDIAN_BIG) {
+            ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+        }
+
+        switch (aes->keylen) {
+        #ifdef WOLFSSL_AES_128
+            case AES_128_KEY_SIZE:
+                ret = WOLFSSL_SCE_AES128_HANDLE.p_api->encrypt(
+                        WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key,
+                        NULL, (sz / sizeof(word32)), (word32*)inBlock,
+                        (word32*)outBlock);
+                break;
+        #endif
+        #ifdef WOLFSSL_AES_192
+            case AES_192_KEY_SIZE:
+                ret = WOLFSSL_SCE_AES192_HANDLE.p_api->encrypt(
+                        WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key,
+                        NULL, (sz / sizeof(word32)), (word32*)inBlock,
+                        (word32*)outBlock);
+                break;
+        #endif
+        #ifdef WOLFSSL_AES_256
+            case AES_256_KEY_SIZE:
+                ret = WOLFSSL_SCE_AES256_HANDLE.p_api->encrypt(
+                        WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key,
+                        NULL, (sz / sizeof(word32)), (word32*)inBlock,
+                        (word32*)outBlock);
+                break;
+        #endif
+            default:
+                WOLFSSL_MSG("Unknown key size");
+                return BAD_FUNC_ARG;
+        }
+
+        if (ret != SSP_SUCCESS) {
+           /* revert input */
+            ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+            return WC_HW_E;
+        }
+
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+                CRYPTO_WORD_ENDIAN_BIG) {
+            ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz);
+            if (inBlock != outBlock) {
+                /* revert input */
+                ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+            }
+        }
+        return 0;
+    }
+
+    #if defined(HAVE_AES_DECRYPT)
+    static int AES_ECB_decrypt(Aes* aes, const byte* inBlock, byte* outBlock,
+            int sz)
+    {
+        uint32_t ret;
+
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+                CRYPTO_WORD_ENDIAN_BIG) {
+            ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+        }
+
+        switch (aes->keylen) {
+        #ifdef WOLFSSL_AES_128
+            case AES_128_KEY_SIZE:
+                ret = WOLFSSL_SCE_AES128_HANDLE.p_api->decrypt(
+                        WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, aes->reg,
+                        (sz / sizeof(word32)), (word32*)inBlock,
+                        (word32*)outBlock);
+                break;
+        #endif
+        #ifdef WOLFSSL_AES_192
+            case AES_192_KEY_SIZE:
+                ret = WOLFSSL_SCE_AES192_HANDLE.p_api->decrypt(
+                        WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, aes->reg,
+                        (sz / sizeof(word32)), (word32*)inBlock,
+                        (word32*)outBlock);
+                break;
+        #endif
+        #ifdef WOLFSSL_AES_256
+            case AES_256_KEY_SIZE:
+                ret = WOLFSSL_SCE_AES256_HANDLE.p_api->decrypt(
+                        WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, aes->reg,
+                        (sz / sizeof(word32)), (word32*)inBlock,
+                        (word32*)outBlock);
+                break;
+        #endif
+            default:
+                WOLFSSL_MSG("Unknown key size");
+                return BAD_FUNC_ARG;
+        }
+        if (ret != SSP_SUCCESS) {
+            return WC_HW_E;
+        }
+
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+                CRYPTO_WORD_ENDIAN_BIG) {
+            ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz);
+            if (inBlock != outBlock) {
+                /* revert input */
+                ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+            }
+        }
+
+        return 0;
+    }
+
+    #endif
+
+    #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)
+    static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+    {
+        return AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+    }
+    #endif
+
+    #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)
+    static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+    {
+        return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+    }
+    #endif
 #else
 
-    /* using wolfCrypt software AES implementation */
+    /* using wolfCrypt software implementation */
     #define NEED_AES_TABLES
 #endif
 
@@ -834,6 +933,7 @@
     /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
 };
 
+#ifndef WOLFSSL_AES_SMALL_TABLES
 static const word32 Te[4][256] = {
 {
     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
@@ -1369,8 +1469,12 @@
     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
 }
 };
-
-
+#endif /* HAVE_AES_DECRYPT */
+#endif
+
+#ifdef HAVE_AES_DECRYPT
+#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) \
+			|| defined(WOLFSSL_AES_DIRECT)
 static const byte Td4[256] =
 {
     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
@@ -1406,11 +1510,67 @@
     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
 };
+#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */
 #endif /* HAVE_AES_DECRYPT */
 
 #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
 
-
+#ifdef WOLFSSL_AES_SMALL_TABLES
+static const byte Tsbox[256] = {
+    0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+    0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+    0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+    0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+    0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+    0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+    0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+    0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+    0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+    0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+    0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+    0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+    0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+    0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+    0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+    0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+    0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+    0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+    0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+    0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+    0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+    0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+    0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+    0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+    0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+    0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+    0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+    0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+    0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+    0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+    0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+    0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+
+#define AES_XTIME(x)    ((byte)((byte)((x) << 1) ^ ((0 - ((x) >> 7)) & 0x1b)))
+
+static word32 col_mul(word32 t, int i2, int i3, int ia, int ib)
+{
+    byte t3 = GETBYTE(t, i3);
+    byte tm = AES_XTIME(GETBYTE(t, i2) ^ t3);
+
+    return GETBYTE(t, ia) ^ GETBYTE(t, ib) ^ t3 ^ tm;
+}
+
+static word32 inv_col_mul(word32 t, int i9, int ib, int id, int ie)
+{
+    byte t9 = GETBYTE(t, i9);
+    byte tb = GETBYTE(t, ib);
+    byte td = GETBYTE(t, id);
+    byte te = GETBYTE(t, ie);
+    byte t0 = t9 ^ tb ^ td;
+    return t0 ^ AES_XTIME(AES_XTIME(AES_XTIME(t0 ^ te) ^ td ^ te) ^ tb ^ te);
+}
+#endif
 
 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM)
 
@@ -1425,6 +1585,7 @@
 #endif
 
 
+#ifndef WOLFSSL_AES_SMALL_TABLES
 /* load 4 Te Tables into cache by cache line stride */
 static WC_INLINE word32 PreFetchTe(void)
 {
@@ -1439,8 +1600,21 @@
     }
     return x;
 }
-
-
+#else
+/* load sbox into cache by cache line stride */
+static WC_INLINE word32 PreFetchSBox(void)
+{
+    word32 x = 0;
+    int i;
+
+    for (i = 0; i < 256; i += WC_CACHE_LINE_SZ/4) {
+        x &= Tsbox[i];
+    }
+    return x;
+}
+#endif
+
+/* Software AES - ECB Encrypt */
 static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 {
     word32 s0, s1, s2, s3;
@@ -1450,7 +1624,7 @@
 
     if (r > 7 || r == 0) {
         WOLFSSL_MSG("AesEncrypt encountered improper key, set it up");
-        return;  /* stop instead of segfaulting, set up your keys! */
+        return;  /* stop instead of seg-faulting, set up your keys! */
     }
 
 #ifdef WOLFSSL_AESNI
@@ -1475,8 +1649,8 @@
             tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
 
             XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
-            AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key,
-                            aes->rounds);
+            AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
+                    (byte*)aes->key, aes->rounds);
             XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
             XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return;
@@ -1497,6 +1671,10 @@
         #endif
     }
 #endif
+#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+    AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+    return;
+#endif
 
     /*
      * map byte array block to cipher state
@@ -1514,11 +1692,13 @@
     s3 = ByteReverseWord32(s3);
 #endif
 
+    /* AddRoundKey */
     s0 ^= rk[0];
     s1 ^= rk[1];
     s2 ^= rk[2];
     s3 ^= rk[3];
 
+#ifndef WOLFSSL_AES_SMALL_TABLES
     s0 |= PreFetchTe();
 
     /*
@@ -1527,28 +1707,28 @@
 
     for (;;) {
         t0 =
-            Te[0][GETBYTE(s0, 3)]  ^
-            Te[1][GETBYTE(s1, 2)]  ^
-            Te[2][GETBYTE(s2, 1)]  ^
-            Te[3][GETBYTE(s3, 0)]  ^
+            Te[0][GETBYTE(s0, 3)] ^
+            Te[1][GETBYTE(s1, 2)] ^
+            Te[2][GETBYTE(s2, 1)] ^
+            Te[3][GETBYTE(s3, 0)] ^
             rk[4];
         t1 =
-            Te[0][GETBYTE(s1, 3)]  ^
-            Te[1][GETBYTE(s2, 2)]  ^
-            Te[2][GETBYTE(s3, 1)]  ^
-            Te[3][GETBYTE(s0, 0)]  ^
+            Te[0][GETBYTE(s1, 3)] ^
+            Te[1][GETBYTE(s2, 2)] ^
+            Te[2][GETBYTE(s3, 1)] ^
+            Te[3][GETBYTE(s0, 0)] ^
             rk[5];
         t2 =
             Te[0][GETBYTE(s2, 3)] ^
-            Te[1][GETBYTE(s3, 2)]  ^
-            Te[2][GETBYTE(s0, 1)]  ^
-            Te[3][GETBYTE(s1, 0)]  ^
+            Te[1][GETBYTE(s3, 2)] ^
+            Te[2][GETBYTE(s0, 1)] ^
+            Te[3][GETBYTE(s1, 0)] ^
             rk[6];
         t3 =
             Te[0][GETBYTE(s3, 3)] ^
-            Te[1][GETBYTE(s0, 2)]  ^
-            Te[2][GETBYTE(s1, 1)]  ^
-            Te[3][GETBYTE(s2, 0)]  ^
+            Te[1][GETBYTE(s0, 2)] ^
+            Te[2][GETBYTE(s1, 1)] ^
+            Te[3][GETBYTE(s2, 0)] ^
             rk[7];
 
         rk += 8;
@@ -1611,6 +1791,84 @@
         (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^
         (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^
         rk[3];
+#else
+    s0 |= PreFetchSBox();
+
+    r *= 2;
+    /* Two rounds at a time */
+    for (rk += 4; r > 1; r--, rk += 4) {
+        t0 =
+            ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^
+            ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^
+            ((word32)Tsbox[GETBYTE(s2, 1)] <<  8) ^
+            ((word32)Tsbox[GETBYTE(s3, 0)]);
+        t1 =
+            ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^
+            ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^
+            ((word32)Tsbox[GETBYTE(s3, 1)] <<  8) ^
+            ((word32)Tsbox[GETBYTE(s0, 0)]);
+        t2 =
+            ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^
+            ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^
+            ((word32)Tsbox[GETBYTE(s0, 1)] <<  8) ^
+            ((word32)Tsbox[GETBYTE(s1, 0)]);
+        t3 =
+            ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^
+            ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^
+            ((word32)Tsbox[GETBYTE(s1, 1)] <<  8) ^
+            ((word32)Tsbox[GETBYTE(s2, 0)]);
+
+        s0 =
+            (col_mul(t0, 3, 2, 0, 1) << 24) ^
+            (col_mul(t0, 2, 1, 0, 3) << 16) ^
+            (col_mul(t0, 1, 0, 2, 3) <<  8) ^
+            (col_mul(t0, 0, 3, 2, 1)      ) ^
+            rk[0];
+        s1 =
+            (col_mul(t1, 3, 2, 0, 1) << 24) ^
+            (col_mul(t1, 2, 1, 0, 3) << 16) ^
+            (col_mul(t1, 1, 0, 2, 3) <<  8) ^
+            (col_mul(t1, 0, 3, 2, 1)      ) ^
+            rk[1];
+        s2 =
+            (col_mul(t2, 3, 2, 0, 1) << 24) ^
+            (col_mul(t2, 2, 1, 0, 3) << 16) ^
+            (col_mul(t2, 1, 0, 2, 3) <<  8) ^
+            (col_mul(t2, 0, 3, 2, 1)      ) ^
+            rk[2];
+        s3 =
+            (col_mul(t3, 3, 2, 0, 1) << 24) ^
+            (col_mul(t3, 2, 1, 0, 3) << 16) ^
+            (col_mul(t3, 1, 0, 2, 3) <<  8) ^
+            (col_mul(t3, 0, 3, 2, 1)      ) ^
+            rk[3];
+    }
+
+    t0 =
+        ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^
+        ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^
+        ((word32)Tsbox[GETBYTE(s2, 1)] <<  8) ^
+        ((word32)Tsbox[GETBYTE(s3, 0)]);
+    t1 =
+        ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^
+        ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^
+        ((word32)Tsbox[GETBYTE(s3, 1)] <<  8) ^
+        ((word32)Tsbox[GETBYTE(s0, 0)]);
+    t2 =
+        ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^
+        ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^
+        ((word32)Tsbox[GETBYTE(s0, 1)] <<  8) ^
+        ((word32)Tsbox[GETBYTE(s1, 0)]);
+    t3 =
+        ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^
+        ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^
+        ((word32)Tsbox[GETBYTE(s1, 1)] <<  8) ^
+        ((word32)Tsbox[GETBYTE(s2, 0)]);
+    s0 = t0 ^ rk[0];
+    s1 = t1 ^ rk[1];
+    s2 = t2 ^ rk[2];
+    s3 = t3 ^ rk[3];
+#endif
 
     /* write out */
 #ifdef LITTLE_ENDIAN_ORDER
@@ -1629,8 +1887,10 @@
 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */
 
 #if defined(HAVE_AES_DECRYPT)
-#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT)
-
+#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) || \
+     defined(WOLFSSL_AES_DIRECT)
+
+#ifndef WOLFSSL_AES_SMALL_TABLES
 /* load 4 Td Tables into cache by cache line stride */
 static WC_INLINE word32 PreFetchTd(void)
 {
@@ -1645,6 +1905,7 @@
     }
     return x;
 }
+#endif
 
 /* load Td Table4 into cache by cache line stride */
 static WC_INLINE word32 PreFetchTd4(void)
@@ -1658,6 +1919,7 @@
     return x;
 }
 
+/* Software AES - ECB Decrypt */
 static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 {
     word32 s0, s1, s2, s3;
@@ -1667,7 +1929,7 @@
     const word32* rk = aes->key;
     if (r > 7 || r == 0) {
         WOLFSSL_MSG("AesDecrypt encountered improper key, set it up");
-        return;  /* stop instead of segfaulting, set up your keys! */
+        return;  /* stop instead of seg-faulting, set up your keys! */
     }
 #ifdef WOLFSSL_AESNI
     if (haveAESNI && aes->use_aesni) {
@@ -1681,7 +1943,8 @@
         #endif
 
         /* if input and output same will overwrite input iv */
-        XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
+        if ((const byte*)aes->tmp != inBlock)
+            XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
         AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
                         aes->rounds);
         return;
@@ -1692,6 +1955,9 @@
         #endif
     }
 #endif /* WOLFSSL_AESNI */
+#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+    return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+#endif
 
     /*
      * map byte array block to cipher state
@@ -1714,6 +1980,7 @@
     s2 ^= rk[2];
     s3 ^= rk[3];
 
+#ifndef WOLFSSL_AES_SMALL_TABLES
     s0 |= PreFetchTd();
 
     /*
@@ -1807,6 +2074,83 @@
         ((word32)Td4[GETBYTE(t1, 1)] <<  8) ^
         ((word32)Td4[GETBYTE(t0, 0)]) ^
         rk[3];
+#else
+    s0 |= PreFetchTd4();
+
+    r *= 2;
+    for (rk += 4; r > 1; r--, rk += 4) {
+        t0 =
+            ((word32)Td4[GETBYTE(s0, 3)] << 24) ^
+            ((word32)Td4[GETBYTE(s3, 2)] << 16) ^
+            ((word32)Td4[GETBYTE(s2, 1)] <<  8) ^
+            ((word32)Td4[GETBYTE(s1, 0)]) ^
+            rk[0];
+        t1 =
+            ((word32)Td4[GETBYTE(s1, 3)] << 24) ^
+            ((word32)Td4[GETBYTE(s0, 2)] << 16) ^
+            ((word32)Td4[GETBYTE(s3, 1)] <<  8) ^
+            ((word32)Td4[GETBYTE(s2, 0)]) ^
+            rk[1];
+        t2 =
+            ((word32)Td4[GETBYTE(s2, 3)] << 24) ^
+            ((word32)Td4[GETBYTE(s1, 2)] << 16) ^
+            ((word32)Td4[GETBYTE(s0, 1)] <<  8) ^
+            ((word32)Td4[GETBYTE(s3, 0)]) ^
+            rk[2];
+        t3 =
+            ((word32)Td4[GETBYTE(s3, 3)] << 24) ^
+            ((word32)Td4[GETBYTE(s2, 2)] << 16) ^
+            ((word32)Td4[GETBYTE(s1, 1)] <<  8) ^
+            ((word32)Td4[GETBYTE(s0, 0)]) ^
+            rk[3];
+
+        s0 =
+            (inv_col_mul(t0, 0, 2, 1, 3) << 24) ^
+            (inv_col_mul(t0, 3, 1, 0, 2) << 16) ^
+            (inv_col_mul(t0, 2, 0, 3, 1) <<  8) ^
+            (inv_col_mul(t0, 1, 3, 2, 0)      );
+        s1 =
+            (inv_col_mul(t1, 0, 2, 1, 3) << 24) ^
+            (inv_col_mul(t1, 3, 1, 0, 2) << 16) ^
+            (inv_col_mul(t1, 2, 0, 3, 1) <<  8) ^
+            (inv_col_mul(t1, 1, 3, 2, 0)      );
+        s2 =
+            (inv_col_mul(t2, 0, 2, 1, 3) << 24) ^
+            (inv_col_mul(t2, 3, 1, 0, 2) << 16) ^
+            (inv_col_mul(t2, 2, 0, 3, 1) <<  8) ^
+            (inv_col_mul(t2, 1, 3, 2, 0)      );
+        s3 =
+            (inv_col_mul(t3, 0, 2, 1, 3) << 24) ^
+            (inv_col_mul(t3, 3, 1, 0, 2) << 16) ^
+            (inv_col_mul(t3, 2, 0, 3, 1) <<  8) ^
+            (inv_col_mul(t3, 1, 3, 2, 0)      );
+    }
+
+    t0 =
+        ((word32)Td4[GETBYTE(s0, 3)] << 24) ^
+        ((word32)Td4[GETBYTE(s3, 2)] << 16) ^
+        ((word32)Td4[GETBYTE(s2, 1)] <<  8) ^
+        ((word32)Td4[GETBYTE(s1, 0)]);
+    t1 =
+        ((word32)Td4[GETBYTE(s1, 3)] << 24) ^
+        ((word32)Td4[GETBYTE(s0, 2)] << 16) ^
+        ((word32)Td4[GETBYTE(s3, 1)] <<  8) ^
+        ((word32)Td4[GETBYTE(s2, 0)]);
+    t2 =
+        ((word32)Td4[GETBYTE(s2, 3)] << 24) ^
+        ((word32)Td4[GETBYTE(s1, 2)] << 16) ^
+        ((word32)Td4[GETBYTE(s0, 1)] <<  8) ^
+        ((word32)Td4[GETBYTE(s3, 0)]);
+    t3 =
+        ((word32)Td4[GETBYTE(s3, 3)] << 24) ^
+        ((word32)Td4[GETBYTE(s2, 2)] << 16) ^
+        ((word32)Td4[GETBYTE(s1, 1)] <<  8) ^
+        ((word32)Td4[GETBYTE(s0, 0)]);
+    s0 = t0 ^ rk[0];
+    s1 = t1 ^ rk[1];
+    s2 = t2 ^ rk[2];
+    s3 = t3 ^ rk[3];
+#endif
 
     /* write out */
 #ifdef LITTLE_ENDIAN_ORDER
@@ -1834,20 +2178,27 @@
     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             const byte* iv, int dir)
     {
-        word32 *rk = aes->key;
+        word32 *rk;
 
         (void)dir;
 
-        if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
+        if (aes == NULL || (keylen != 16 &&
+        #ifdef WOLFSSL_AES_192
+            keylen != 24 &&
+        #endif
+            keylen != 32)) {
             return BAD_FUNC_ARG;
-
+        }
+
+        rk = aes->key;
         aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         XMEMCPY(rk, userKey, keylen);
-    #ifndef WOLFSSL_STM32_CUBEMX
+    #if !defined(WOLFSSL_STM32_CUBEMX) || defined(STM32_HAL_V2)
         ByteReverseWords(rk, rk, keylen);
     #endif
-    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
+    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+        defined(WOLFSSL_AES_OFB)
         aes->left = 0;
     #endif
 
@@ -1920,7 +2271,8 @@
         if (iv)
             XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
 
-    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
+    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+        defined(WOLFSSL_AES_OFB)
         aes->left = 0;
     #endif
 
@@ -1930,13 +2282,14 @@
     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
                   int dir)
     {
-        if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
+        if (aes == NULL || !((keylen == 16) || (keylen == 24) || (keylen == 32)))
             return BAD_FUNC_ARG;
 
         aes->rounds = keylen/4 + 6;
         XMEMCPY(aes->key, userKey, keylen);
 
-    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
+    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+        defined(WOLFSSL_AES_OFB)
         aes->left = 0;
     #endif
 
@@ -1953,34 +2306,65 @@
         const byte* iv, int dir)
     {
         int ret;
-        byte *rk = (byte*)aes->key;
+        byte* rk;
+        byte* tmpKey = (byte*)userKey;
+        int tmpKeyDynamic = 0;
+        word32 alignOffset = 0;
 
         (void)dir;
 
         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
             return BAD_FUNC_ARG;
-
+        if (aes == NULL)
+            return BAD_FUNC_ARG;
+
+        rk = (byte*)aes->key;
         if (rk == NULL)
             return BAD_FUNC_ARG;
 
-    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
+    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+        defined(WOLFSSL_AES_OFB)
         aes->left = 0;
     #endif
 
         aes->rounds = keylen/4 + 6;
 
+    #ifdef FREESCALE_MMCAU_CLASSIC
+        if ((wolfssl_word)userKey % WOLFSSL_MMCAU_ALIGNMENT) {
+        #ifndef NO_WOLFSSL_ALLOC_ALIGN
+            byte* tmp = (byte*)XMALLOC(keylen + WOLFSSL_MMCAU_ALIGNMENT,
+                                       aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            if (tmp == NULL) {
+                return MEMORY_E;
+            }
+            alignOffset = WOLFSSL_MMCAU_ALIGNMENT -
+                          ((wolfssl_word)tmp % WOLFSSL_MMCAU_ALIGNMENT);
+            tmpKey = tmp + alignOffset;
+            XMEMCPY(tmpKey, userKey, keylen);
+            tmpKeyDynamic = 1;
+        #else
+            WOLFSSL_MSG("Bad cau_aes_set_key alignment");
+            return BAD_ALIGN_E;
+        #endif
+        }
+    #endif
+
         ret = wolfSSL_CryptHwMutexLock();
         if(ret == 0) {
         #ifdef FREESCALE_MMCAU_CLASSIC
-            cau_aes_set_key(userKey, keylen*8, rk);
+            cau_aes_set_key(tmpKey, keylen*8, rk);
         #else
-            MMCAU_AES_SetKey(userKey, keylen, rk);
+            MMCAU_AES_SetKey(tmpKey, keylen, rk);
         #endif
             wolfSSL_CryptHwMutexUnLock();
 
             ret = wc_AesSetIV(aes, iv);
         }
 
+        if (tmpKeyDynamic == 1) {
+            XFREE(tmpKey - alignOffset, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        }
+
         return ret;
     }
 
@@ -1999,14 +2383,15 @@
         (void)dir;
         (void)iv;
 
-        if (keylen != 16)
+        if (aes == NULL || keylen != 16)
             return BAD_FUNC_ARG;
 
         aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         ret = nrf51_aes_set_key(userKey);
 
-    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
+    #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+        defined(WOLFSSL_AES_OFB)
         aes->left = 0;
     #endif
 
@@ -2018,11 +2403,122 @@
     {
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
+    {
+        (void)dir;
+        (void)iv;
+
+        if (aes == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) {
+            return BAD_FUNC_ARG;
+        }
+
+        aes->keylen = keylen;
+        aes->rounds = keylen/4 + 6;
+
+        XMEMCPY(aes->key, userKey, keylen);
+        #if defined(WOLFSSL_AES_COUNTER)
+            aes->left = 0;
+        #endif
+        return wc_AesSetIV(aes, iv);
+    }
+
+    int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+                        const byte* iv, int dir)
+    {
+        return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+    }
+#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
+                    int dir)
+    {
+        SaSiError_t ret = SASI_OK;
+        SaSiAesIv_t iv_aes;
+
+        if (aes == NULL ||
+           (keylen != AES_128_KEY_SIZE &&
+            keylen != AES_192_KEY_SIZE &&
+            keylen != AES_256_KEY_SIZE)) {
+            return BAD_FUNC_ARG;
+        }
+    #if defined(AES_MAX_KEY_SIZE)
+        if (keylen > (AES_MAX_KEY_SIZE/8)) {
+            return BAD_FUNC_ARG;
+        }
+    #endif
+        if (dir != AES_ENCRYPTION &&
+            dir != AES_DECRYPTION) {
+            return BAD_FUNC_ARG;
+        }
+
+        if (dir == AES_ENCRYPTION) {
+            aes->ctx.mode = SASI_AES_ENCRYPT;
+            SaSi_AesInit(&aes->ctx.user_ctx,
+                         SASI_AES_ENCRYPT,
+                         SASI_AES_MODE_CBC,
+                         SASI_AES_PADDING_NONE);
+        }
+        else {
+            aes->ctx.mode = SASI_AES_DECRYPT;
+            SaSi_AesInit(&aes->ctx.user_ctx,
+                         SASI_AES_DECRYPT,
+                         SASI_AES_MODE_CBC,
+                         SASI_AES_PADDING_NONE);
+        }
+
+        aes->keylen = keylen;
+        aes->rounds = keylen/4 + 6;
+        XMEMCPY(aes->key, userKey, keylen);
+
+        aes->ctx.key.pKey = (uint8_t*)aes->key;
+        aes->ctx.key.keySize= keylen;
+
+        ret = SaSi_AesSetKey(&aes->ctx.user_ctx,
+                             SASI_AES_USER_KEY,
+                             &aes->ctx.key,
+                             sizeof(aes->ctx.key));
+        if (ret != SASI_OK) {
+            return BAD_FUNC_ARG;
+        }
+
+        ret = wc_AesSetIV(aes, iv);
+
+        if (iv)
+            XMEMCPY(iv_aes, iv, AES_BLOCK_SIZE);
+        else
+            XMEMSET(iv_aes,  0, AES_BLOCK_SIZE);
+
+
+        ret = SaSi_AesSetIv(&aes->ctx.user_ctx, iv_aes);
+        if (ret != SASI_OK) {
+            return ret;
+        }
+       return ret;
+    }
+    #if defined(WOLFSSL_AES_DIRECT)
+        int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+                            const byte* iv, int dir)
+        {
+            return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+        }
+    #endif
 
 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
       /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
 
+#elif defined(WOLFSSL_AFALG)
+    /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+    /* implemented in wolfcrypt/src/port/devcrypto/devcrypto_aes.c */
+
 #else
+
+    /* Software AES - SetKey */
     static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
                 const byte* iv, int dir)
     {
@@ -2035,7 +2531,8 @@
         #ifdef WOLFSSL_AESNI
             aes->use_aesni = 0;
         #endif /* WOLFSSL_AESNI */
-        #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
+        #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+            defined(WOLFSSL_AES_OFB)
             aes->left = 0;
         #endif
 
@@ -2043,12 +2540,13 @@
         aes->rounds = (keylen/4) + 6;
 
         XMEMCPY(rk, userKey, keylen);
-    #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT)
+    #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \
+        (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+          defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES))
         ByteReverseWords(rk, rk, keylen);
     #endif
 
 #ifdef NEED_AES_TABLES
-
         switch (keylen) {
     #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \
             defined(WOLFSSL_AES_128)
@@ -2057,10 +2555,17 @@
             {
                 temp  = rk[3];
                 rk[4] = rk[0] ^
+            #ifndef WOLFSSL_AES_SMALL_TABLES
                     (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
                     (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
                     (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
                     (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
+            #else
+                    ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^
+                    ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^
+                    ((word32)Tsbox[GETBYTE(temp, 0)] <<  8) ^
+                    ((word32)Tsbox[GETBYTE(temp, 3)]) ^
+            #endif
                     rcon[i];
                 rk[5] = rk[1] ^ rk[4];
                 rk[6] = rk[2] ^ rk[5];
@@ -2080,10 +2585,17 @@
             {
                 temp = rk[ 5];
                 rk[ 6] = rk[ 0] ^
+            #ifndef WOLFSSL_AES_SMALL_TABLES
                     (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
                     (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
                     (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
                     (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
+            #else
+                    ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^
+                    ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^
+                    ((word32)Tsbox[GETBYTE(temp, 0)] <<  8) ^
+                    ((word32)Tsbox[GETBYTE(temp, 3)]) ^
+            #endif
                     rcon[i];
                 rk[ 7] = rk[ 1] ^ rk[ 6];
                 rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -2104,10 +2616,17 @@
             {
                 temp = rk[ 7];
                 rk[ 8] = rk[ 0] ^
+            #ifndef WOLFSSL_AES_SMALL_TABLES
                     (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
                     (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
                     (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
                     (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
+            #else
+                    ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^
+                    ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^
+                    ((word32)Tsbox[GETBYTE(temp, 0)] <<  8) ^
+                    ((word32)Tsbox[GETBYTE(temp, 3)]) ^
+            #endif
                     rcon[i];
                 rk[ 9] = rk[ 1] ^ rk[ 8];
                 rk[10] = rk[ 2] ^ rk[ 9];
@@ -2116,10 +2635,17 @@
                     break;
                 temp = rk[11];
                 rk[12] = rk[ 4] ^
+            #ifndef WOLFSSL_AES_SMALL_TABLES
                     (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^
                     (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^
                     (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^
                     (Te[1][GETBYTE(temp, 0)] & 0x000000ff);
+            #else
+                    ((word32)Tsbox[GETBYTE(temp, 3)] << 24) ^
+                    ((word32)Tsbox[GETBYTE(temp, 2)] << 16) ^
+                    ((word32)Tsbox[GETBYTE(temp, 1)] <<  8) ^
+                    ((word32)Tsbox[GETBYTE(temp, 0)]);
+            #endif
                 rk[13] = rk[ 5] ^ rk[12];
                 rk[14] = rk[ 6] ^ rk[13];
                 rk[15] = rk[ 7] ^ rk[14];
@@ -2133,7 +2659,7 @@
             return BAD_FUNC_ARG;
         } /* switch */
 
-    #ifdef HAVE_AES_DECRYPT
+    #if defined(HAVE_AES_DECRYPT)
         if (dir == AES_DECRYPTION) {
             unsigned int j;
             rk = aes->key;
@@ -2145,6 +2671,7 @@
                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
             }
+        #if !defined(WOLFSSL_AES_SMALL_TABLES)
             /* apply the inverse MixColumn transform to all round keys but the
                first and the last: */
             for (i = 1; i < aes->rounds; i++) {
@@ -2170,12 +2697,21 @@
                     Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^
                     Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff];
             }
+        #endif
         }
     #else
         (void)dir;
     #endif /* HAVE_AES_DECRYPT */
+        (void)temp;
 #endif /* NEED_AES_TABLES */
 
+#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+        XMEMCPY((byte*)aes->key, userKey, keylen);
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == CRYPTO_WORD_ENDIAN_BIG) {
+            ByteReverseWords(aes->key, aes->key, 32);
+        }
+#endif
+
         return wc_AesSetIV(aes, iv);
     }
 
@@ -2217,13 +2753,16 @@
         aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
-        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) {
-            XMEMCPY(aes->asyncKey, userKey, keylen);
-            if (iv)
-                XMEMCPY(aes->asyncIv, iv, AES_BLOCK_SIZE);
-        }
-    #endif /* WOLFSSL_ASYNC_CRYPT */
+    #if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \
+        (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \
+        (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES))
+        #ifdef WOLF_CRYPTO_CB
+        if (aes->devId != INVALID_DEVID)
+        #endif
+        {
+            XMEMCPY(aes->devKey, userKey, keylen);
+        }
+    #endif
 
     #ifdef WOLFSSL_AESNI
         if (checkAESNI == 0) {
@@ -2231,12 +2770,15 @@
             checkAESNI = 1;
         }
         if (haveAESNI) {
-            #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+            #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \
+                defined(WOLFSSL_AES_OFB)
                 aes->left = 0;
             #endif /* WOLFSSL_AES_COUNTER */
             aes->use_aesni = 1;
             if (iv)
                 XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
+            else
+                XMEMSET(aes->reg, 0, AES_BLOCK_SIZE);
             if (dir == AES_ENCRYPTION)
                 return AES_set_encrypt_key(userKey, keylen * 8, aes);
         #ifdef HAVE_AES_DECRYPT
@@ -2248,6 +2790,10 @@
 
         ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
 
+    #if defined(WOLFSSL_DEVCRYPTO) && \
+        (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+        aes->ctx.cfd = -1;
+    #endif
     #ifdef WOLFSSL_IMX6_CAAM_BLOB
         ForceZero(local, sizeof(local));
     #endif
@@ -2339,19 +2885,60 @@
     #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
         /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
 
+    #elif defined(WOLFSSL_AFALG)
+        /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+    #elif defined(WOLFSSL_DEVCRYPTO_AES)
+        /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
+    #elif defined(STM32_CRYPTO)
+        /* Allow direct access to one block encrypt */
+        void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+        {
+            if (wolfSSL_CryptHwMutexLock() == 0) {
+                wc_AesEncrypt(aes, in, out);
+                wolfSSL_CryptHwMutexUnLock();
+            }
+        }
+        #ifdef HAVE_AES_DECRYPT
+        /* Allow direct access to one block decrypt */
+        void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+        {
+            if (wolfSSL_CryptHwMutexLock() == 0) {
+                wc_AesDecrypt(aes, in, out);
+                wolfSSL_CryptHwMutexUnLock();
+            }
+        }
+        #endif /* HAVE_AES_DECRYPT */
+
+    #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+        !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+        
+        /* Allow direct access to one block encrypt */
+        void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+        {
+            wc_AesEncrypt(aes, in, out);
+        }
+        #ifdef HAVE_AES_DECRYPT
+        /* Allow direct access to one block decrypt */
+        void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+        {
+            wc_AesDecrypt(aes, in, out);
+        }
+        #endif /* HAVE_AES_DECRYPT */
     #else
         /* Allow direct access to one block encrypt */
         void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
         {
             wc_AesEncrypt(aes, in, out);
         }
-    #ifdef HAVE_AES_DECRYPT
+        #ifdef HAVE_AES_DECRYPT
         /* Allow direct access to one block decrypt */
         void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
         {
             wc_AesDecrypt(aes, in, out);
         }
-    #endif /* HAVE_AES_DECRYPT */
+        #endif /* HAVE_AES_DECRYPT */
     #endif /* AES direct block */
 #endif /* WOLFSSL_AES_DIRECT */
 
@@ -2367,32 +2954,38 @@
         word32 blocks = (sz / AES_BLOCK_SIZE);
         CRYP_HandleTypeDef hcryp;
 
-        XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-        switch (aes->rounds) {
-            case 10: /* 128-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-                break;
-	#ifdef CRYP_KEYSIZE_192B
-            case 12: /* 192-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-                break;
-	#endif
-            case 14: /* 256-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-                break;
-            default:
-                break;
-        }
-        hcryp.Instance = CRYP;
-        hcryp.Init.DataType = CRYP_DATATYPE_8B;
-        hcryp.Init.pKey = (uint8_t*)aes->key;
-        hcryp.Init.pInitVect = (uint8_t*)aes->reg;
-
+        ret = wc_Stm32_Aes_Init(aes, &hcryp);
+        if (ret != 0)
+            return ret;
+
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+
+    #ifdef STM32_CRYPTO_AES_ONLY
+        hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+        hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_CBC;
+        hcryp.Init.KeyWriteFlag  = CRYP_KEY_WRITE_ENABLE;
+    #elif defined(STM32_HAL_V2)
+        hcryp.Init.Algorithm  = CRYP_AES_CBC;
+        ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE);
+    #endif
+        hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
         HAL_CRYP_Init(&hcryp);
 
         while (blocks--) {
-            if (HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
-                                           out, STM32_HAL_TIMEOUT) != HAL_OK) {
+        #ifdef STM32_CRYPTO_AES_ONLY
+            ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+                out, STM32_HAL_TIMEOUT);
+        #elif defined(STM32_HAL_V2)
+            ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE,
+                (uint32_t*)out, STM32_HAL_TIMEOUT);
+        #else
+            ret = HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+                out, STM32_HAL_TIMEOUT);
+        #endif
+            if (ret != HAL_OK) {
                 ret = WC_TIMEOUT_E;
                 break;
             }
@@ -2407,6 +3000,8 @@
 
         HAL_CRYP_DeInit(&hcryp);
 
+        wolfSSL_CryptHwMutexUnLock();
+
         return ret;
     }
     #ifdef HAVE_AES_DECRYPT
@@ -2416,33 +3011,44 @@
         word32 blocks = (sz / AES_BLOCK_SIZE);
         CRYP_HandleTypeDef hcryp;
 
-        XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-        switch (aes->rounds) {
-            case 10: /* 128-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-                break;
-	#ifdef CRYP_KEYSIZE_192B
-            case 12: /* 192-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-                break;
-	#endif
-            case 14: /* 256-bit key */
-                hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-                break;
-            default:
-                break;
-        }
-        hcryp.Instance = CRYP;
-        hcryp.Init.DataType = CRYP_DATATYPE_8B;
-        hcryp.Init.pKey = (uint8_t*)aes->key;
-        hcryp.Init.pInitVect = (uint8_t*)aes->reg;
-
+        ret = wc_Stm32_Aes_Init(aes, &hcryp);
+        if (ret != 0)
+            return ret;
+
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+
+        /* if input and output same will overwrite input iv */
+        XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+
+    #ifdef STM32_CRYPTO_AES_ONLY
+        hcryp.Init.OperatingMode = CRYP_ALGOMODE_KEYDERIVATION_DECRYPT;
+        hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_CBC;
+        hcryp.Init.KeyWriteFlag  = CRYP_KEY_WRITE_ENABLE;
+    #elif defined(STM32_HAL_V2)
+        hcryp.Init.Algorithm  = CRYP_AES_CBC;
+        ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE);
+    #endif
+
+        hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
         HAL_CRYP_Init(&hcryp);
 
         while (blocks--) {
-            if (HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
-                                           out, STM32_HAL_TIMEOUT) != HAL_OK) {
+        #ifdef STM32_CRYPTO_AES_ONLY
+            ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+                out, STM32_HAL_TIMEOUT);
+        #elif defined(STM32_HAL_V2)
+            ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE,
+                (uint32_t*)out, STM32_HAL_TIMEOUT);
+        #else
+            ret = HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+                out, STM32_HAL_TIMEOUT);
+        #endif
+            if (ret != HAL_OK) {
                 ret = WC_TIMEOUT_E;
+                break;
             }
 
             /* store iv for next call */
@@ -2453,80 +3059,51 @@
         }
 
         HAL_CRYP_DeInit(&hcryp);
+        wolfSSL_CryptHwMutexUnLock();
 
         return ret;
     }
     #endif /* HAVE_AES_DECRYPT */
-#else
+
+#else /* STD_PERI_LIB */
     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
-        word32 *enc_key, *iv;
+        int ret;
+        word32 *iv;
         word32 blocks = (sz / AES_BLOCK_SIZE);
-        CRYP_InitTypeDef AES_CRYP_InitStructure;
-        CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
-        CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
-
-        enc_key = aes->key;
-        iv = aes->reg;
-
-        /* crypto structure initialization */
-        CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
-        CRYP_StructInit(&AES_CRYP_InitStructure);
-        CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
+        CRYP_InitTypeDef cryptInit;
+        CRYP_KeyInitTypeDef keyInit;
+        CRYP_IVInitTypeDef ivInit;
+
+        ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+        if (ret != 0)
+            return ret;
+
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
 
         /* reset registers to their default values */
         CRYP_DeInit();
 
-        /* load key into correct registers */
-        switch (aes->rounds) {
-            case 10: /* 128-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
-                break;
-
-            case 12: /* 192-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
-                break;
-
-            case 14: /* 256-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
-                AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
-                break;
-
-            default:
-                break;
-        }
-        CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
+        /* set key */
+        CRYP_KeyInit(&keyInit);
 
         /* set iv */
+        iv = aes->reg;
+        CRYP_IVStructInit(&ivInit);
         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
-        AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
-        AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
-        AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
-        AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
-        CRYP_IVInit(&AES_CRYP_IVInitStructure);
-
-        /* set direction, mode, and datatype */
-        AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
-        AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
-        AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-        CRYP_Init(&AES_CRYP_InitStructure);
+        ivInit.CRYP_IV0Left  = iv[0];
+        ivInit.CRYP_IV0Right = iv[1];
+        ivInit.CRYP_IV1Left  = iv[2];
+        ivInit.CRYP_IV1Right = iv[3];
+        CRYP_IVInit(&ivInit);
+
+        /* set direction and mode */
+        cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
+        cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
+        CRYP_Init(&cryptInit);
 
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
@@ -2558,26 +3135,29 @@
 
         /* disable crypto processor */
         CRYP_Cmd(DISABLE);
-
-        return 0;
+        wolfSSL_CryptHwMutexUnLock();
+
+        return ret;
     }
 
     #ifdef HAVE_AES_DECRYPT
     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
-        word32 *dec_key, *iv;
+        int ret;
+        word32 *iv;
         word32 blocks = (sz / AES_BLOCK_SIZE);
-        CRYP_InitTypeDef AES_CRYP_InitStructure;
-        CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
-        CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
-
-        dec_key = aes->key;
-        iv = aes->reg;
-
-        /* crypto structure initialization */
-        CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
-        CRYP_StructInit(&AES_CRYP_InitStructure);
-        CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
+        CRYP_InitTypeDef cryptInit;
+        CRYP_KeyInitTypeDef keyInit;
+        CRYP_IVInitTypeDef ivInit;
+
+        ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+        if (ret != 0)
+            return ret;
+
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
 
         /* if input and output same will overwrite input iv */
         XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
@@ -2585,48 +3165,11 @@
         /* reset registers to their default values */
         CRYP_DeInit();
 
-        /* load key into correct registers */
-        switch (aes->rounds) {
-            case 10: /* 128-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3];
-                break;
-
-            case 12: /* 192-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = dec_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5];
-                break;
-
-            case 14: /* 256-bit key */
-                AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
-                AES_CRYP_KeyInitStructure.CRYP_Key0Left  = dec_key[0];
-                AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Left  = dec_key[2];
-                AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[4];
-                AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[6];
-                AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7];
-                break;
-
-            default:
-                break;
-        }
-
-        /* set direction, mode, and datatype for key preparation */
-        AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
-        AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
-        AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b;
-        CRYP_Init(&AES_CRYP_InitStructure);
-        CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
+        /* set direction and key */
+        CRYP_KeyInit(&keyInit);
+        cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
+        cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
+        CRYP_Init(&cryptInit);
 
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
@@ -2634,20 +3177,20 @@
         /* wait until key has been prepared */
         while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
 
-        /* set direction, mode, and datatype for decryption */
-        AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
-        AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
-        AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-        CRYP_Init(&AES_CRYP_InitStructure);
+        /* set direction and mode */
+        cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
+        cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
+        CRYP_Init(&cryptInit);
 
         /* set iv */
+        iv = aes->reg;
+        CRYP_IVStructInit(&ivInit);
         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
-
-        AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
-        AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
-        AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
-        AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
-        CRYP_IVInit(&AES_CRYP_IVInitStructure);
+        ivInit.CRYP_IV0Left  = iv[0];
+        ivInit.CRYP_IV0Right = iv[1];
+        ivInit.CRYP_IV1Left  = iv[2];
+        ivInit.CRYP_IV1Right = iv[3];
+        CRYP_IVInit(&ivInit);
 
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
@@ -2678,8 +3221,9 @@
 
         /* disable crypto processor */
         CRYP_Cmd(DISABLE);
-
-        return 0;
+        wolfSSL_CryptHwMutexUnLock();
+
+        return ret;
     }
     #endif /* HAVE_AES_DECRYPT */
 #endif /* WOLFSSL_STM32_CUBEMX */
@@ -2808,6 +3352,12 @@
 
         status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE,
             iv, enc_key, keySize);
+
+        /* store iv for next call */
+        if (status == kStatus_Success) {
+            XMEMCPY(iv, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+        }
+
         return (status == kStatus_Success) ? 0 : -1;
     }
 
@@ -2818,6 +3368,7 @@
         status_t status;
         byte* iv, *dec_key;
         word32 blocks = (sz / AES_BLOCK_SIZE);
+        byte temp_block[AES_BLOCK_SIZE];
 
         iv      = (byte*)aes->reg;
         dec_key = (byte*)aes->key;
@@ -2827,8 +3378,17 @@
             return status;
         }
 
+        /* get IV for next call */
+        XMEMCPY(temp_block, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+
         status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE,
             iv, dec_key, keySize, kLTC_EncryptKey);
+
+        /* store IV for next call */
+        if (status == kStatus_Success) {
+            XMEMCPY(iv, temp_block, AES_BLOCK_SIZE);
+        }
+
         return (status == kStatus_Success) ? 0 : -1;
     }
     #endif /* HAVE_AES_DECRYPT */
@@ -2939,12 +3499,38 @@
         return ret;
     }
     #endif /* HAVE_AES_DECRYPT */
-
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+
+    int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+    {
+        return wc_esp32AesCbcEncrypt(aes, out, in, sz);
+    }
+    int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+    {
+        return wc_esp32AesCbcDecrypt(aes, out, in, sz);
+    }
+#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+    int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+    {
+        return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out);
+    }
+    int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+    {
+        return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out);
+    }
 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
       /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
 
+#elif defined(WOLFSSL_AFALG)
+    /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_CBC)
+    /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
 #else
 
+    /* Software AES - CBC Encrypt */
     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
         word32 blocks = (sz / AES_BLOCK_SIZE);
@@ -2953,6 +3539,14 @@
             return BAD_FUNC_ARG;
         }
 
+    #ifdef WOLF_CRYPTO_CB
+        if (aes->devId != INVALID_DEVID) {
+            int ret = wc_CryptoCb_AesCbcEncrypt(aes, out, in, sz);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+        }
+    #endif
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
         /* if async and byte count above threshold */
         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
@@ -2961,8 +3555,8 @@
             return NitroxAesCbcEncrypt(aes, out, in, sz);
         #elif defined(HAVE_INTEL_QA)
             return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz,
-                (const byte*)aes->asyncKey, aes->keylen,
-                (const byte*)aes->asyncIv, AES_BLOCK_SIZE);
+                (const byte*)aes->devKey, aes->keylen,
+                (byte*)aes->reg, AES_BLOCK_SIZE);
         #else /* WOLFSSL_ASYNC_CRYPT_TEST */
             if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) {
                 WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
@@ -3034,6 +3628,7 @@
     }
 
     #ifdef HAVE_AES_DECRYPT
+    /* Software AES - CBC Decrypt */
     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
         word32 blocks;
@@ -3043,6 +3638,14 @@
             return BAD_FUNC_ARG;
         }
 
+    #ifdef WOLF_CRYPTO_CB
+        if (aes->devId != INVALID_DEVID) {
+            int ret = wc_CryptoCb_AesCbcDecrypt(aes, out, in, sz);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+        }
+    #endif
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
         /* if async and byte count above threshold */
         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
@@ -3051,8 +3654,8 @@
             return NitroxAesCbcDecrypt(aes, out, in, sz);
         #elif defined(HAVE_INTEL_QA)
             return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz,
-                (const byte*)aes->asyncKey, aes->keylen,
-                (const byte*)aes->asyncIv, AES_BLOCK_SIZE);
+                (const byte*)aes->devKey, aes->keylen,
+                (byte*)aes->reg, AES_BLOCK_SIZE);
         #else /* WOLFSSL_ASYNC_CRYPT_TEST */
             if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) {
                 WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
@@ -3101,6 +3704,7 @@
             XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE);
             wc_AesDecrypt(aes, (byte*)aes->tmp, out);
             xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE);
+            /* store iv for next call */
             XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
 
             out += AES_BLOCK_SIZE;
@@ -3126,101 +3730,83 @@
             int ret = 0;
         #ifdef WOLFSSL_STM32_CUBEMX
             CRYP_HandleTypeDef hcryp;
-
-            XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-            switch (aes->rounds) {
-                case 10: /* 128-bit key */
-                    hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-                    break;
-	#ifdef CRYP_KEYSIZE_192B
-                case 12: /* 192-bit key */
-                    hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-                    break;
-	#endif
-                case 14: /* 256-bit key */
-                    hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-                    break;
-                default:
-                    break;
+            #ifdef STM32_HAL_V2
+            word32 iv[AES_BLOCK_SIZE/sizeof(word32)];
+            #endif
+        #else
+            word32 *iv;
+            CRYP_InitTypeDef cryptInit;
+            CRYP_KeyInitTypeDef keyInit;
+            CRYP_IVInitTypeDef ivInit;
+        #endif
+
+            ret = wolfSSL_CryptHwMutexLock();
+            if (ret != 0) {
+                return ret;
+            }
+
+        #ifdef WOLFSSL_STM32_CUBEMX
+            ret = wc_Stm32_Aes_Init(aes, &hcryp);
+            if (ret != 0) {
+                wolfSSL_CryptHwMutexUnLock();
+                return ret;
             }
-            hcryp.Instance = CRYP;
-            hcryp.Init.DataType = CRYP_DATATYPE_8B;
-            hcryp.Init.pKey = (byte*)aes->key;
-            hcryp.Init.pInitVect = (byte*)aes->reg;
-
+
+        #ifdef STM32_CRYPTO_AES_ONLY
+            hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+            hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_CTR;
+            hcryp.Init.KeyWriteFlag  = CRYP_KEY_WRITE_ENABLE;
+            hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
+        #elif defined(STM32_HAL_V2)
+            hcryp.Init.Algorithm  = CRYP_AES_CTR;
+            ByteReverseWords(iv, aes->reg, AES_BLOCK_SIZE);
+            hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)iv;
+        #else
+            hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
+        #endif
             HAL_CRYP_Init(&hcryp);
 
-            if (HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, out,
-                                                STM32_HAL_TIMEOUT) != HAL_OK) {
-                /* failed */
+        #ifdef STM32_CRYPTO_AES_ONLY
+            ret = HAL_CRYPEx_AES(&hcryp, (byte*)in, AES_BLOCK_SIZE,
+                out, STM32_HAL_TIMEOUT);
+        #elif defined(STM32_HAL_V2)
+            ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE,
+                (uint32_t*)out, STM32_HAL_TIMEOUT);
+        #else
+            ret = HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE,
+                out, STM32_HAL_TIMEOUT);
+        #endif
+            if (ret != HAL_OK) {
                 ret = WC_TIMEOUT_E;
             }
-
             HAL_CRYP_DeInit(&hcryp);
 
         #else /* STD_PERI_LIB */
-            word32 *enc_key, *iv;
-            CRYP_InitTypeDef AES_CRYP_InitStructure;
-            CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
-            CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
-
-            enc_key = aes->key;
-            iv = aes->reg;
-
-            /* crypto structure initialization */
-            CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
-            CRYP_StructInit(&AES_CRYP_InitStructure);
-            CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
+            ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+            if (ret != 0) {
+                wolfSSL_CryptHwMutexUnLock();
+                return ret;
+            }
 
             /* reset registers to their default values */
             CRYP_DeInit();
 
-            /* load key into correct registers */
-            switch (aes->rounds) {
-                case 10: /* 128-bit key */
-                    AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
-                    AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
-                    AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
-                    AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
-                    AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
-                    break;
-                case 12: /* 192-bit key */
-                    AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
-                    AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
-                    AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
-                    AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
-                    AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
-                    AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
-                    AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
-                    break;
-                case 14: /* 256-bit key */
-                    AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
-                    AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
-                    AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
-                    AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
-                    AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
-                    AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
-                    AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
-                    AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
-                    AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
-                    break;
-                default:
-                    break;
-            }
-            CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
+            /* set key */
+            CRYP_KeyInit(&keyInit);
 
             /* set iv */
-            AES_CRYP_IVInitStructure.CRYP_IV0Left  = ByteReverseWord32(iv[0]);
-            AES_CRYP_IVInitStructure.CRYP_IV0Right = ByteReverseWord32(iv[1]);
-            AES_CRYP_IVInitStructure.CRYP_IV1Left  = ByteReverseWord32(iv[2]);
-            AES_CRYP_IVInitStructure.CRYP_IV1Right = ByteReverseWord32(iv[3]);
-            CRYP_IVInit(&AES_CRYP_IVInitStructure);
-
-            /* set direction, mode, and datatype */
-            AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
-            AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR;
-            AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-            CRYP_Init(&AES_CRYP_InitStructure);
+            iv = aes->reg;
+            CRYP_IVStructInit(&ivInit);
+            ivInit.CRYP_IV0Left  = ByteReverseWord32(iv[0]);
+            ivInit.CRYP_IV0Right = ByteReverseWord32(iv[1]);
+            ivInit.CRYP_IV1Left  = ByteReverseWord32(iv[2]);
+            ivInit.CRYP_IV1Right = ByteReverseWord32(iv[3]);
+            CRYP_IVInit(&ivInit);
+
+            /* set direction and mode */
+            cryptInit.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
+            cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR;
+            CRYP_Init(&cryptInit);
 
             /* enable crypto processor */
             CRYP_Cmd(ENABLE);
@@ -3245,6 +3831,8 @@
             CRYP_Cmd(DISABLE);
 
         #endif /* WOLFSSL_STM32_CUBEMX */
+
+            wolfSSL_CryptHwMutexUnLock();
             return ret;
         }
 
@@ -3303,6 +3891,18 @@
     #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
         /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
 
+    #elif defined(WOLFSSL_AFALG)
+        /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+    #elif defined(WOLFSSL_DEVCRYPTO_AES)
+        /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+   
+    #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+        !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+        /* esp32 doesn't support CRT mode by hw.     */
+        /* use aes ecnryption plus sw implementation */
+        #define NEED_AES_CTR_SOFT
+
     #else
 
         /* Use software based AES counter */
@@ -3321,9 +3921,11 @@
             }
         }
 
+        /* Software AES - CTR Encrypt */
         int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
         {
             byte* tmp;
+            byte scratch[AES_BLOCK_SIZE];
 
             if (aes == NULL || out == NULL || in == NULL) {
                 return BAD_FUNC_ARG;
@@ -3342,8 +3944,9 @@
             #ifdef XTRANSFORM_AESCTRBLOCK
                 XTRANSFORM_AESCTRBLOCK(aes, out, in);
             #else
-                wc_AesEncrypt(aes, (byte*)aes->reg, out);
-                xorbuf(out, in, AES_BLOCK_SIZE);
+                wc_AesEncrypt(aes, (byte*)aes->reg, scratch);
+                xorbuf(scratch, in, AES_BLOCK_SIZE);
+                XMEMCPY(out, scratch, AES_BLOCK_SIZE);
             #endif
                 IncrementAesCounter((byte*)aes->reg);
 
@@ -3352,6 +3955,7 @@
                 sz  -= AES_BLOCK_SIZE;
                 aes->left = 0;
             }
+            ForceZero(scratch, AES_BLOCK_SIZE);
 
             /* handle non block size remaining and store unused byte count in left */
             if (sz) {
@@ -3412,6 +4016,13 @@
 
 #ifdef WOLFSSL_ARMASM
     /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */
+
+#elif defined(WOLFSSL_AFALG)
+    /* implemented in wolfcrypt/src/port/afalg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+    /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
 #else /* software + AESNI implementation */
 
 #if !defined(FREESCALE_LTC_AES_GCM)
@@ -3425,6 +4036,18 @@
             return;
     }
 }
+#ifdef STM32_CRYPTO_AES_GCM
+static WC_INLINE void DecrementGcmCounter(byte* inOutCtr)
+{
+    int i;
+
+    /* in network byte order so start at end and work back */
+    for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
+        if (--inOutCtr[i] != 0xFF)  /* we're done unless we underflow */
+            return;
+    }
+}
+#endif /* STM32_CRYPTO_AES_GCM */
 #endif /* !FREESCALE_LTC_AES_GCM */
 
 #if defined(GCM_SMALL) || defined(GCM_TABLE)
@@ -3491,7 +4114,7 @@
 
 #endif /* GCM_TABLE */
 
-
+/* Software AES - GCM SetKey */
 int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
 {
     int  ret;
@@ -3517,6 +4140,12 @@
     if (!((len == 16) || (len == 24) || (len == 32)))
         return BAD_FUNC_ARG;
 
+#ifdef OPENSSL_EXTRA
+    if (aes != NULL) {
+        XMEMSET(aes->aadH, 0, sizeof(aes->aadH));
+        aes->aadLen = 0;
+    }
+#endif
     XMEMSET(iv, 0, AES_BLOCK_SIZE);
     ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION);
 
@@ -3537,6 +4166,14 @@
 
 #if defined(WOLFSSL_XILINX_CRYPT)
     wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP);
+#elif defined(WOLFSSL_AFALG_XILINX_AES)
+    wc_AesGcmSetKey_ex(aes, key, len, 0);
+#endif
+
+#ifdef WOLF_CRYPTO_CB
+    if (aes->devId != INVALID_DEVID) {
+        XMEMCPY(aes->devKey, key, len);
+    }
 #endif
 
 #ifdef WOLFSSL_IMX6_CAAM_BLOB
@@ -3554,18 +4191,68 @@
     #define HAVE_INTEL_AVX2
 #endif /* USE_INTEL_SPEEDUP */
 
-#ifdef _MSC_VER
-    #define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF))
-    #define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \
-                             S((x),3), S((x),2), S((x),1), S((x),0), \
-                             S((y),7), S((y),6), S((y),5), S((y),4), \
-                             S((y),3), S((y),2), S((y),1), S((y),0) }
-#else
-    #define M128_INIT(x,y) { (x), (y) }
-#endif
-
-static const __m128i MOD2_128 = M128_INIT(0x1,
-                                           (long long int)0xc200000000000000UL);
+#ifndef _MSC_VER
+
+void AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
+                     const unsigned char* addt, const unsigned char* ivec,
+                     unsigned char *tag, unsigned int nbytes,
+                     unsigned int abytes, unsigned int ibytes,
+                     unsigned int tbytes, const unsigned char* key, int nr)
+                     XASM_LINK("AES_GCM_encrypt");
+#ifdef HAVE_INTEL_AVX1
+void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out,
+                          const unsigned char* addt, const unsigned char* ivec,
+                          unsigned char *tag, unsigned int nbytes,
+                          unsigned int abytes, unsigned int ibytes,
+                          unsigned int tbytes, const unsigned char* key,
+                          int nr)
+                          XASM_LINK("AES_GCM_encrypt_avx1");
+#ifdef HAVE_INTEL_AVX2
+void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out,
+                          const unsigned char* addt, const unsigned char* ivec,
+                          unsigned char *tag, unsigned int nbytes,
+                          unsigned int abytes, unsigned int ibytes,
+                          unsigned int tbytes, const unsigned char* key,
+                          int nr)
+                          XASM_LINK("AES_GCM_encrypt_avx2");
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* HAVE_INTEL_AVX1 */
+
+#ifdef HAVE_AES_DECRYPT
+void AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
+                     const unsigned char* addt, const unsigned char* ivec,
+                     const unsigned char *tag, int nbytes, int abytes,
+                     int ibytes, int tbytes, const unsigned char* key, int nr,
+                     int* res)
+                     XASM_LINK("AES_GCM_decrypt");
+#ifdef HAVE_INTEL_AVX1
+void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out,
+                          const unsigned char* addt, const unsigned char* ivec,
+                          const unsigned char *tag, int nbytes, int abytes,
+                          int ibytes, int tbytes, const unsigned char* key,
+                          int nr, int* res)
+                          XASM_LINK("AES_GCM_decrypt_avx1");
+#ifdef HAVE_INTEL_AVX2
+void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out,
+                          const unsigned char* addt, const unsigned char* ivec,
+                          const unsigned char *tag, int nbytes, int abytes,
+                          int ibytes, int tbytes, const unsigned char* key,
+                          int nr, int* res)
+                          XASM_LINK("AES_GCM_decrypt_avx2");
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* HAVE_INTEL_AVX1 */
+#endif /* HAVE_AES_DECRYPT */
+
+#else /* _MSC_VER */
+
+#define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF))
+#define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \
+                         S((x),3), S((x),2), S((x),1), S((x),0), \
+                         S((y),7), S((y),6), S((y),5), S((y),4), \
+                         S((y),3), S((y),2), S((y),1), S((y),0) }
+
+static const __m128i MOD2_128 =
+        M128_INIT(0x1, (long long int)0xc200000000000000UL);
 
 
 /* See Intel® Carry-Less Multiplication Instruction
@@ -3586,3105 +4273,12 @@
 static const __m128i SEVEN = M128_INIT(0x0, 0x7);
 static const __m128i EIGHT = M128_INIT(0x0, 0x8);
 #endif
-static const __m128i BSWAP_EPI64 = M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f);
-static const __m128i BSWAP_MASK  = M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607);
-
-
-#ifndef _MSC_VER
-
-#define _VAR(a) "" #a ""
-#define VAR(a) _VAR(a)
-
-#define HR     %%xmm14
-#define XR     %%xmm15
-#define KR     %%ebx
-#define KR64   %%rbx
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-#define CTR1   128(%%rsp)
-#define TR     144(%%rsp)
-#define HTR    %%rsp
-#define STACK_OFFSET    160
-#else
-#define CTR1   (%%rsp)
-#define TR     16(%%rsp)
-#define STACK_OFFSET    32
-#endif
-
-#define AESENC()                      \
-    "aesenc	%%xmm12, %%xmm4\n\t"  \
-    "aesenc	%%xmm12, %%xmm5\n\t"  \
-    "aesenc	%%xmm12, %%xmm6\n\t"  \
-    "aesenc	%%xmm12, %%xmm7\n\t"  \
-    "aesenc	%%xmm12, %%xmm8\n\t"  \
-    "aesenc	%%xmm12, %%xmm9\n\t"  \
-    "aesenc	%%xmm12, %%xmm10\n\t" \
-    "aesenc	%%xmm12, %%xmm11\n\t"
-
-#define AESENC_SET(o)                        \
-    "movdqa	" #o "(%[KEY]), %%xmm12\n\t" \
-    AESENC()
-
-#define AESENC_CTR()                        \
-    "movdqu	" VAR(CTR1) ", %%xmm4\n\t"  \
-    "movdqa	%[BSWAP_EPI64], %%xmm1\n\t" \
-    "movdqu	%%xmm4, %%xmm0\n\t"         \
-    "pshufb	%%xmm1, %%xmm4\n\t"         \
-    "movdqa	%%xmm0, %%xmm5\n\t"         \
-    "paddd	%[ONE], %%xmm5\n\t"         \
-    "pshufb	%%xmm1, %%xmm5\n\t"         \
-    "movdqa	%%xmm0, %%xmm6\n\t"         \
-    "paddd	%[TWO], %%xmm6\n\t"         \
-    "pshufb	%%xmm1, %%xmm6\n\t"         \
-    "movdqa	%%xmm0, %%xmm7\n\t"         \
-    "paddd	%[THREE], %%xmm7\n\t"       \
-    "pshufb	%%xmm1, %%xmm7\n\t"         \
-    "movdqa	%%xmm0, %%xmm8\n\t"         \
-    "paddd	%[FOUR], %%xmm8\n\t"        \
-    "pshufb	%%xmm1, %%xmm8\n\t"         \
-    "movdqa	%%xmm0, %%xmm9\n\t"         \
-    "paddd	%[FIVE], %%xmm9\n\t"        \
-    "pshufb	%%xmm1, %%xmm9\n\t"         \
-    "movdqa	%%xmm0, %%xmm10\n\t"        \
-    "paddd	%[SIX], %%xmm10\n\t"        \
-    "pshufb	%%xmm1, %%xmm10\n\t"        \
-    "movdqa	%%xmm0, %%xmm11\n\t"        \
-    "paddd	%[SEVEN], %%xmm11\n\t"      \
-    "pshufb	%%xmm1, %%xmm11\n\t"        \
-    "paddd	%[EIGHT], %%xmm0\n\t"
-
-#define AESENC_XOR()                       \
-    "movdqa	(%[KEY]), %%xmm12\n\t"     \
-    "movdqu	%%xmm0, " VAR(CTR1) "\n\t" \
-    "pxor	%%xmm12, %%xmm4\n\t"       \
-    "pxor	%%xmm12, %%xmm5\n\t"       \
-    "pxor	%%xmm12, %%xmm6\n\t"       \
-    "pxor	%%xmm12, %%xmm7\n\t"       \
-    "pxor	%%xmm12, %%xmm8\n\t"       \
-    "pxor	%%xmm12, %%xmm9\n\t"       \
-    "pxor	%%xmm12, %%xmm10\n\t"      \
-    "pxor	%%xmm12, %%xmm11\n\t"
-
-/* Encrypt and carry-less multiply for AVX1. */
-#define AESENC_PCLMUL_1(src, o1, o2, o3)            \
-    "movdqu	" #o3 "(" VAR(HTR) "), %%xmm12\n\t" \
-    "movdqu	" #o2 "(" #src "), %%xmm0\n\t"      \
-    "aesenc	" #o1 "(%[KEY]), %%xmm4\n\t"        \
-    "pshufb	%[BSWAP_MASK], %%xmm0\n\t"          \
-    "pxor	%%xmm2, %%xmm0\n\t"                 \
-    "pshufd	$0x4e, %%xmm12, %%xmm1\n\t"         \
-    "pshufd	$0x4e, %%xmm0, %%xmm14\n\t"         \
-    "pxor	%%xmm12, %%xmm1\n\t"                \
-    "pxor	%%xmm0, %%xmm14\n\t"                \
-    "movdqa	%%xmm0, %%xmm3\n\t"                 \
-    "pclmulqdq	$0x11, %%xmm12, %%xmm3\n\t"         \
-    "aesenc	" #o1 "(%[KEY]), %%xmm5\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm6\n\t"        \
-    "movdqa	%%xmm0, %%xmm2\n\t"                 \
-    "pclmulqdq	$0x00, %%xmm12, %%xmm2\n\t"         \
-    "aesenc	" #o1 "(%[KEY]), %%xmm7\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm8\n\t"        \
-    "pclmulqdq	$0x00, %%xmm14, %%xmm1\n\t"         \
-    "aesenc	" #o1 "(%[KEY]), %%xmm9\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm10\n\t"       \
-    "aesenc	" #o1 "(%[KEY]), %%xmm11\n\t"       \
-    "pxor      %%xmm2, %%xmm1\n\t"                  \
-    "pxor      %%xmm3, %%xmm1\n\t"                  \
-
-#define AESENC_PCLMUL_N(src, o1, o2, o3)            \
-    "movdqu	" #o3 "(" VAR(HTR) "), %%xmm12\n\t" \
-    "movdqu	" #o2 "(" #src" ), %%xmm0\n\t"      \
-    "pshufd	$0x4e, %%xmm12, %%xmm13\n\t"        \
-    "pshufb	%[BSWAP_MASK], %%xmm0\n\t"          \
-    "aesenc	" #o1 "(%[KEY]), %%xmm4\n\t"        \
-    "pxor	%%xmm12, %%xmm13\n\t"               \
-    "pshufd	$0x4e, %%xmm0, %%xmm14\n\t"         \
-    "pxor	%%xmm0, %%xmm14\n\t"                \
-    "movdqa	%%xmm0, %%xmm15\n\t"                \
-    "pclmulqdq	$0x11, %%xmm12, %%xmm15\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm5\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm6\n\t"        \
-    "pclmulqdq	$0x00, %%xmm0, %%xmm12\n\t"         \
-    "aesenc	" #o1 "(%[KEY]), %%xmm7\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm8\n\t"        \
-    "pclmulqdq	$0x00, %%xmm14, %%xmm13\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm9\n\t"        \
-    "aesenc	" #o1 "(%[KEY]), %%xmm10\n\t"       \
-    "aesenc	" #o1 "(%[KEY]), %%xmm11\n\t"       \
-    "pxor      %%xmm12, %%xmm1\n\t"                 \
-    "pxor      %%xmm12, %%xmm2\n\t"                 \
-    "pxor      %%xmm15, %%xmm1\n\t"                 \
-    "pxor      %%xmm15, %%xmm3\n\t"                 \
-    "pxor      %%xmm13, %%xmm1\n\t"                 \
-
-#define AESENC_PCLMUL_L(o)                   \
-    "movdqa	%%xmm1, %%xmm14\n\t"         \
-    "psrldq	$8, %%xmm1\n\t"              \
-    "pslldq	$8, %%xmm14\n\t"             \
-    "aesenc	" #o "(%[KEY]), %%xmm4\n\t"  \
-    "pxor      %%xmm14, %%xmm2\n\t"          \
-    "pxor      %%xmm1, %%xmm3\n\t"           \
-    "movdqa	%%xmm2, %%xmm12\n\t"         \
-    "movdqa	%%xmm2, %%xmm13\n\t"         \
-    "movdqa	%%xmm2, %%xmm14\n\t"         \
-    "aesenc	" #o "(%[KEY]), %%xmm5\n\t"  \
-    "pslld	$31, %%xmm12\n\t"            \
-    "pslld	$30, %%xmm13\n\t"            \
-    "pslld	$25, %%xmm14\n\t"            \
-    "aesenc	" #o "(%[KEY]), %%xmm6\n\t"  \
-    "pxor	%%xmm13, %%xmm12\n\t"        \
-    "pxor	%%xmm14, %%xmm12\n\t"        \
-    "aesenc	" #o "(%[KEY]), %%xmm7\n\t"  \
-    "movdqa	%%xmm12, %%xmm13\n\t"        \
-    "pslldq	$12, %%xmm12\n\t"            \
-    "psrldq	$4, %%xmm13\n\t"             \
-    "aesenc	" #o "(%[KEY]), %%xmm8\n\t"  \
-    "pxor	%%xmm12, %%xmm2\n\t"         \
-    "movdqa	%%xmm2, %%xmm14\n\t"         \
-    "movdqa	%%xmm2, %%xmm1\n\t"          \
-    "movdqa	%%xmm2, %%xmm0\n\t"          \
-    "aesenc	" #o "(%[KEY]), %%xmm9\n\t"  \
-    "psrld	$1, %%xmm14\n\t"             \
-    "psrld	$2, %%xmm1\n\t"              \
-    "psrld	$7, %%xmm0\n\t"              \
-    "aesenc	" #o "(%[KEY]), %%xmm10\n\t" \
-    "pxor	%%xmm1, %%xmm14\n\t"         \
-    "pxor	%%xmm0, %%xmm14\n\t"         \
-    "aesenc	" #o "(%[KEY]), %%xmm11\n\t" \
-    "pxor	%%xmm13, %%xmm14\n\t"        \
-    "pxor	%%xmm14, %%xmm2\n\t"         \
-    "pxor	%%xmm3, %%xmm2\n\t"          \
-
-/* Encrypt and carry-less multiply with last key. */
-#define AESENC_LAST(in, out)                \
-    "aesenclast	%%xmm12, %%xmm4\n\t"        \
-    "aesenclast	%%xmm12, %%xmm5\n\t"        \
-    "movdqu	   (" #in "),%%xmm0\n\t"    \
-    "movdqu	 16(" #in "),%%xmm1\n\t"    \
-    "pxor	%%xmm0, %%xmm4\n\t"         \
-    "pxor	%%xmm1, %%xmm5\n\t"         \
-    "movdqu	%%xmm4,    (" #out ")\n\t"  \
-    "movdqu	%%xmm5,  16(" #out ")\n\t"  \
-    "aesenclast	%%xmm12, %%xmm6\n\t"        \
-    "aesenclast	%%xmm12, %%xmm7\n\t"        \
-    "movdqu	 32(" #in "),%%xmm0\n\t"    \
-    "movdqu	 48(" #in "),%%xmm1\n\t"    \
-    "pxor	%%xmm0, %%xmm6\n\t"         \
-    "pxor	%%xmm1, %%xmm7\n\t"         \
-    "movdqu	%%xmm6,  32(" #out ")\n\t"  \
-    "movdqu	%%xmm7,  48(" #out ")\n\t"  \
-    "aesenclast	%%xmm12, %%xmm8\n\t"        \
-    "aesenclast	%%xmm12, %%xmm9\n\t"        \
-    "movdqu	 64(" #in "),%%xmm0\n\t"    \
-    "movdqu	 80(" #in "),%%xmm1\n\t"    \
-    "pxor	%%xmm0, %%xmm8\n\t"         \
-    "pxor	%%xmm1, %%xmm9\n\t"         \
-    "movdqu	%%xmm8,  64(" #out ")\n\t"  \
-    "movdqu	%%xmm9,  80(" #out ")\n\t"  \
-    "aesenclast	%%xmm12, %%xmm10\n\t"       \
-    "aesenclast	%%xmm12, %%xmm11\n\t"       \
-    "movdqu	 96(" #in "),%%xmm0\n\t"    \
-    "movdqu	112(" #in "),%%xmm1\n\t"    \
-    "pxor	%%xmm0, %%xmm10\n\t"        \
-    "pxor	%%xmm1, %%xmm11\n\t"        \
-    "movdqu	%%xmm10,  96(" #out ")\n\t" \
-    "movdqu	%%xmm11, 112(" #out ")\n\t"
-
-#define _AESENC_AVX(r)                    \
-    "aesenc	16(%[KEY]), " #r "\n\t"   \
-    "aesenc	32(%[KEY]), " #r "\n\t"   \
-    "aesenc	48(%[KEY]), " #r "\n\t"   \
-    "aesenc	64(%[KEY]), " #r "\n\t"   \
-    "aesenc	80(%[KEY]), " #r "\n\t"   \
-    "aesenc	96(%[KEY]), " #r "\n\t"   \
-    "aesenc	112(%[KEY]), " #r "\n\t"  \
-    "aesenc	128(%[KEY]), " #r "\n\t"  \
-    "aesenc	144(%[KEY]), " #r "\n\t"  \
-    "cmpl	$11, %[nr]\n\t"           \
-    "movdqa	160(%[KEY]), %%xmm5\n\t"  \
-    "jl		%=f\n\t"                  \
-    "aesenc	%%xmm5, " #r "\n\t"       \
-    "aesenc	176(%[KEY]), " #r "\n\t"  \
-    "cmpl	$13, %[nr]\n\t"           \
-    "movdqa	192(%[KEY]), %%xmm5\n\t"  \
-    "jl		%=f\n\t"                  \
-    "aesenc	%%xmm5, " #r "\n\t"       \
-    "aesenc	208(%[KEY]), " #r "\n\t"  \
-    "movdqa	224(%[KEY]), %%xmm5\n\t"  \
-    "%=:\n\t"                             \
-    "aesenclast	%%xmm5, " #r "\n\t"
-#define AESENC_AVX(r)                     \
-        _AESENC_AVX(r)
-
-#define AESENC_BLOCK(in, out)               \
-    "movdqu	" VAR(CTR1) ", %%xmm4\n\t"  \
-    "movdqu	%%xmm4, %%xmm5\n\t"         \
-    "pshufb	%[BSWAP_EPI64], %%xmm4\n\t" \
-    "paddd	%[ONE], %%xmm5\n\t"         \
-    "pxor	(%[KEY]), %%xmm4\n\t"       \
-    "movdqu	%%xmm5, " VAR(CTR1) "\n\t"  \
-    AESENC_AVX(%%xmm4)                      \
-    "movdqu	(" #in "), %%xmm5\n\t"      \
-    "pxor	%%xmm5, %%xmm4\n\t"         \
-    "movdqu	%%xmm4, (" #out ")\n\t"     \
-    "pshufb	%[BSWAP_MASK], %%xmm4\n\t"  \
-    "pxor	%%xmm4, " VAR(XR) "\n\t"
-
-#define _AESENC_GFMUL(in, out, H, X)            \
-    "movdqu	" VAR(CTR1) ", %%xmm4\n\t"      \
-    "movdqu	%%xmm4, %%xmm5\n\t"             \
-    "pshufb	%[BSWAP_EPI64], %%xmm4\n\t"     \
-    "paddd	%[ONE], %%xmm5\n\t"             \
-    "pxor	(%[KEY]), %%xmm4\n\t"           \
-    "movdqu	%%xmm5, " VAR(CTR1) "\n\t"      \
-    "movdqa	" #X ", %%xmm6\n\t"             \
-    "pclmulqdq	$0x10, " #H ", %%xmm6\n\t"      \
-    "aesenc	16(%[KEY]), %%xmm4\n\t"         \
-    "aesenc	32(%[KEY]), %%xmm4\n\t"         \
-    "movdqa	" #X ", %%xmm7\n\t"             \
-    "pclmulqdq	$0x01, " #H ", %%xmm7\n\t"      \
-    "aesenc	48(%[KEY]), %%xmm4\n\t"         \
-    "aesenc	64(%[KEY]), %%xmm4\n\t"         \
-    "movdqa	" #X ", %%xmm8\n\t"             \
-    "pclmulqdq	$0x00, " #H ", %%xmm8\n\t"      \
-    "aesenc	80(%[KEY]), %%xmm4\n\t"         \
-    "movdqa	" #X ", %%xmm1\n\t"             \
-    "pclmulqdq	$0x11, " #H ", %%xmm1\n\t"      \
-    "aesenc	96(%[KEY]), %%xmm4\n\t"         \
-    "pxor	%%xmm7, %%xmm6\n\t"             \
-    "movdqa	%%xmm6, %%xmm2\n\t"             \
-    "psrldq	$8, %%xmm6\n\t"                 \
-    "pslldq	$8, %%xmm2\n\t"                 \
-    "aesenc	112(%[KEY]), %%xmm4\n\t"        \
-    "movdqa	%%xmm1, %%xmm3\n\t"             \
-    "pxor	%%xmm8, %%xmm2\n\t"             \
-    "pxor	%%xmm6, %%xmm3\n\t"             \
-    "movdqa	%[MOD2_128], %%xmm0\n\t"        \
-    "movdqa	%%xmm2, %%xmm7\n\t"             \
-    "pclmulqdq	$0x10, %%xmm0, %%xmm7\n\t"      \
-    "aesenc	128(%[KEY]), %%xmm4\n\t"        \
-    "pshufd	$0x4e, %%xmm2, %%xmm6\n\t"      \
-    "pxor	%%xmm7, %%xmm6\n\t"             \
-    "movdqa	%%xmm6, %%xmm7\n\t"             \
-    "pclmulqdq	$0x10, %%xmm0, %%xmm7\n\t"      \
-    "aesenc	144(%[KEY]), %%xmm4\n\t"        \
-    "pshufd	$0x4e, %%xmm6, " VAR(XR) "\n\t" \
-    "pxor	%%xmm7, " VAR(XR) "\n\t"        \
-    "pxor	%%xmm3, " VAR(XR) "\n\t"        \
-    "cmpl	$11, %[nr]\n\t"                 \
-    "movdqu	160(%[KEY]), %%xmm5\n\t"        \
-    "jl		%=f\n\t"                        \
-    "aesenc	%%xmm5, %%xmm4\n\t"             \
-    "aesenc	176(%[KEY]), %%xmm4\n\t"        \
-    "cmpl	$13, %[nr]\n\t"                 \
-    "movdqu	192(%[KEY]), %%xmm5\n\t"        \
-    "jl		%=f\n\t"                        \
-    "aesenc	%%xmm5, %%xmm4\n\t"             \
-    "aesenc	208(%[KEY]), %%xmm4\n\t"        \
-    "movdqa	224(%[KEY]), %%xmm5\n\t"        \
-    "%=:\n\t"                                   \
-    "aesenclast	%%xmm5, %%xmm4\n\t"             \
-    "movdqu	(" #in "), %%xmm5\n\t"          \
-    "pxor	%%xmm5, %%xmm4\n\t"             \
-    "movdqu	%%xmm4, (" #out ")\n\t"
-#define AESENC_GFMUL(in, out, H, X)             \
-       _AESENC_GFMUL(in, out, H, X)
-
-#define _GHASH_GFMUL_AVX(r, r2, a, b)      \
-    "pshufd	$0x4e, "#a", %%xmm1\n\t"   \
-    "pshufd	$0x4e, "#b", %%xmm2\n\t"   \
-    "movdqa	"#b", %%xmm3\n\t"          \
-    "movdqa	"#b", %%xmm0\n\t"          \
-    "pclmulqdq	$0x11, "#a", %%xmm3\n\t"   \
-    "pclmulqdq	$0x00, "#a", %%xmm0\n\t"   \
-    "pxor	"#a", %%xmm1\n\t"          \
-    "pxor	"#b", %%xmm2\n\t"          \
-    "pclmulqdq	$0x00, %%xmm2, %%xmm1\n\t" \
-    "pxor	%%xmm0, %%xmm1\n\t"        \
-    "pxor	%%xmm3, %%xmm1\n\t"        \
-    "movdqa	%%xmm1, %%xmm2\n\t"        \
-    "movdqa	%%xmm0, "#r2"\n\t"         \
-    "movdqa	%%xmm3, " #r "\n\t"        \
-    "pslldq	$8, %%xmm2\n\t"            \
-    "psrldq	$8, %%xmm1\n\t"            \
-    "pxor	%%xmm2, "#r2"\n\t"         \
-    "pxor	%%xmm1, " #r "\n\t"
-#define GHASH_GFMUL_AVX(r, r2, a, b)       \
-       _GHASH_GFMUL_AVX(r, r2, a, b)
-
-#define _GHASH_GFMUL_XOR_AVX(r, r2, a, b)  \
-    "pshufd	$0x4e, "#a", %%xmm1\n\t"   \
-    "pshufd	$0x4e, "#b", %%xmm2\n\t"   \
-    "movdqa	"#b", %%xmm3\n\t"          \
-    "movdqa	"#b", %%xmm0\n\t"          \
-    "pclmulqdq	$0x11, "#a", %%xmm3\n\t"   \
-    "pclmulqdq	$0x00, "#a", %%xmm0\n\t"   \
-    "pxor	"#a", %%xmm1\n\t"          \
-    "pxor	"#b", %%xmm2\n\t"          \
-    "pclmulqdq	$0x00, %%xmm2, %%xmm1\n\t" \
-    "pxor	%%xmm0, %%xmm1\n\t"        \
-    "pxor	%%xmm3, %%xmm1\n\t"        \
-    "movdqa	%%xmm1, %%xmm2\n\t"        \
-    "pxor	%%xmm0, "#r2"\n\t"         \
-    "pxor	%%xmm3, " #r "\n\t"        \
-    "pslldq	$8, %%xmm2\n\t"            \
-    "psrldq	$8, %%xmm1\n\t"            \
-    "pxor	%%xmm2, "#r2"\n\t"         \
-    "pxor	%%xmm1, " #r "\n\t"
-#define GHASH_GFMUL_XOR_AVX(r, r2, a, b)   \
-       _GHASH_GFMUL_XOR_AVX(r, r2, a, b)
-
-#define GHASH_MID_AVX(r, r2)        \
-    "movdqa	"#r2", %%xmm0\n\t"  \
-    "movdqa	" #r ", %%xmm1\n\t" \
-    "psrld	$31, %%xmm0\n\t"    \
-    "psrld	$31, %%xmm1\n\t"    \
-    "pslld	$1, "#r2"\n\t"      \
-    "pslld	$1, " #r "\n\t"     \
-    "movdqa	%%xmm0, %%xmm2\n\t" \
-    "pslldq	$4, %%xmm0\n\t"     \
-    "psrldq	$12, %%xmm2\n\t"    \
-    "pslldq	$4, %%xmm1\n\t"     \
-    "por	%%xmm2, " #r "\n\t" \
-    "por	%%xmm0, "#r2"\n\t"  \
-    "por	%%xmm1, " #r "\n\t"
-
-#define _GHASH_GFMUL_RED_AVX(r, a, b)      \
-    "pshufd	$0x4e, "#a", %%xmm5\n\t"   \
-    "pshufd	$0x4e, "#b", %%xmm6\n\t"   \
-    "movdqa	"#b", %%xmm7\n\t"          \
-    "movdqa	"#b", %%xmm4\n\t"          \
-    "pclmulqdq	$0x11, "#a", %%xmm7\n\t"   \
-    "pclmulqdq	$0x00, "#a", %%xmm4\n\t"   \
-    "pxor	"#a", %%xmm5\n\t"          \
-    "pxor	"#b", %%xmm6\n\t"          \
-    "pclmulqdq	$0x00, %%xmm6, %%xmm5\n\t" \
-    "pxor	%%xmm4, %%xmm5\n\t"        \
-    "pxor	%%xmm7, %%xmm5\n\t"        \
-    "movdqa	%%xmm5, %%xmm6\n\t"        \
-    "movdqa	%%xmm7, " #r "\n\t"        \
-    "pslldq	$8, %%xmm6\n\t"            \
-    "psrldq	$8, %%xmm5\n\t"            \
-    "pxor	%%xmm6, %%xmm4\n\t"        \
-    "pxor	%%xmm5, " #r "\n\t"        \
-    "movdqa	%%xmm4, %%xmm8\n\t"        \
-    "movdqa	%%xmm4, %%xmm9\n\t"        \
-    "movdqa	%%xmm4, %%xmm10\n\t"       \
-    "pslld	$31, %%xmm8\n\t"           \
-    "pslld	$30, %%xmm9\n\t"           \
-    "pslld	$25, %%xmm10\n\t"          \
-    "pxor	%%xmm9, %%xmm8\n\t"        \
-    "pxor	%%xmm10, %%xmm8\n\t"       \
-    "movdqa	%%xmm8, %%xmm9\n\t"        \
-    "psrldq	$4, %%xmm9\n\t"            \
-    "pslldq	$12, %%xmm8\n\t"           \
-    "pxor	%%xmm8, %%xmm4\n\t"        \
-    "movdqa	%%xmm4, %%xmm10\n\t"       \
-    "movdqa	%%xmm4, %%xmm6\n\t"        \
-    "movdqa	%%xmm4, %%xmm5\n\t"        \
-    "psrld	$1, %%xmm10\n\t"           \
-    "psrld	$2, %%xmm6\n\t"            \
-    "psrld	$7, %%xmm5\n\t"            \
-    "pxor	%%xmm6, %%xmm10\n\t"       \
-    "pxor	%%xmm5, %%xmm10\n\t"       \
-    "pxor	%%xmm9, %%xmm10\n\t"       \
-    "pxor	%%xmm4, %%xmm10\n\t"       \
-    "pxor	%%xmm10, " #r "\n\t"
-#define GHASH_GFMUL_RED_AVX(r, a, b)       \
-       _GHASH_GFMUL_RED_AVX(r, a, b)
-
-#define GHASH_RED_AVX(r, r2)           \
-    "movdqa	"#r2", %%xmm0\n\t"     \
-    "movdqa	"#r2", %%xmm1\n\t"     \
-    "movdqa	"#r2", %%xmm2\n\t"     \
-    "pslld	$31, %%xmm0\n\t"       \
-    "pslld	$30, %%xmm1\n\t"       \
-    "pslld	$25, %%xmm2\n\t"       \
-    "pxor	%%xmm1, %%xmm0\n\t"    \
-    "pxor	%%xmm2, %%xmm0\n\t"    \
-    "movdqa	%%xmm0, %%xmm1\n\t"    \
-    "psrldq	$4, %%xmm1\n\t"        \
-    "pslldq	$12, %%xmm0\n\t"       \
-    "pxor	%%xmm0, "#r2"\n\t"     \
-    "movdqa	"#r2", %%xmm2\n\t"     \
-    "movdqa	"#r2", %%xmm3\n\t"     \
-    "movdqa	"#r2", %%xmm0\n\t"     \
-    "psrld	$1, %%xmm2\n\t"        \
-    "psrld	$2, %%xmm3\n\t"        \
-    "psrld	$7, %%xmm0\n\t"        \
-    "pxor	%%xmm3, %%xmm2\n\t"    \
-    "pxor	%%xmm0, %%xmm2\n\t"    \
-    "pxor	%%xmm1, %%xmm2\n\t"    \
-    "pxor	"#r2", %%xmm2\n\t"     \
-    "pxor	%%xmm2, " #r "\n\t"
-
-#define GHASH_GFMUL_RED_XOR_AVX(r, r2, a, b) \
-    GHASH_GFMUL_XOR_AVX(r, r2, a, b)         \
-    GHASH_RED_AVX(r, r2)
-
-#define GHASH_FULL_AVX(r, r2, a, b) \
-    GHASH_GFMUL_AVX(r, r2, a, b)    \
-    GHASH_MID_AVX(r, r2)            \
-    GHASH_RED_AVX(r, r2)
-
-#define CALC_IV_12() \
-    "# Calculate values when IV is 12 bytes\n\t"      \
-    "# Set counter based on IV\n\t"                   \
-    "movl	$0x01000000, %%ecx\n\t"               \
-    "pinsrq	$0, 0(%%rax), %%xmm13\n\t"            \
-    "pinsrd	$2, 8(%%rax), %%xmm13\n\t"            \
-    "pinsrd	$3, %%ecx, %%xmm13\n\t"               \
-    "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \
-    "movdqu	%%xmm13, %%xmm1\n\t"                  \
-    "movdqa	  0(%[KEY]), " VAR(HR) "\n\t"         \
-    "pxor	" VAR(HR) ", %%xmm1\n\t"              \
-    "movdqa	 16(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	 32(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	 48(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	 64(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	 80(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	 96(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	112(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	128(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	144(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "cmpl	$11, %[nr]\n\t"                       \
-    "movdqa	160(%[KEY]), %%xmm12\n\t"             \
-    "jl	31f\n\t"                                      \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqa	176(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "cmpl	$13, %[nr]\n\t"                       \
-    "movdqa	192(%[KEY]), %%xmm12\n\t"             \
-    "jl	31f\n\t"                                      \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqu	208(%[KEY]), %%xmm12\n\t"             \
-    "aesenc	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenc	%%xmm12, %%xmm1\n\t"                  \
-    "movdqu	224(%[KEY]), %%xmm12\n\t"             \
-    "31:\n\t"                                         \
-    "aesenclast	%%xmm12, " VAR(HR) "\n\t"             \
-    "aesenclast	%%xmm12, %%xmm1\n\t"                  \
-    "pshufb	%[BSWAP_MASK], " VAR(HR) "\n\t"       \
-    "movdqu	%%xmm1, " VAR(TR) "\n\t"              \
-    "jmp	39f\n\t"
-
-#define CALC_IV()                                    \
-    "# Calculate values when IV is not 12 bytes\n\t" \
-    "# H = Encrypt X(=0)\n\t"                        \
-    "movdqa	0(%[KEY]), " VAR(HR) "\n\t"          \
-    AESENC_AVX(HR)                                   \
-    "pshufb	%[BSWAP_MASK], " VAR(HR) "\n\t"      \
-    "# Calc counter\n\t"                             \
-    "# Initialization vector\n\t"                    \
-    "cmpl	$0, %%edx\n\t"                       \
-    "movq	$0, %%rcx\n\t"                       \
-    "je	45f\n\t"                                     \
-    "cmpl	$16, %%edx\n\t"                      \
-    "jl	44f\n\t"                                     \
-    "andl	$0xfffffff0, %%edx\n\t"              \
-    "\n"                                             \
-    "43:\n\t"                                        \
-    "movdqu	(%%rax,%%rcx,1), %%xmm4\n\t"         \
-    "pshufb	%[BSWAP_MASK], %%xmm4\n\t"           \
-    "pxor	%%xmm4, %%xmm13\n\t"                 \
-    GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR)    \
-    "addl	$16, %%ecx\n\t"                      \
-    "cmpl	%%edx, %%ecx\n\t"                    \
-    "jl	43b\n\t"                                     \
-    "movl	%[ibytes], %%edx\n\t"                \
-    "cmpl	%%edx, %%ecx\n\t"                    \
-    "je	45f\n\t"                                     \
-    "\n"                                             \
-    "44:\n\t"                                        \
-    "subq	$16, %%rsp\n\t"                      \
-    "pxor	%%xmm4, %%xmm4\n\t"                  \
-    "xorl	%%ebx, %%ebx\n\t"                    \
-    "movdqu	%%xmm4, (%%rsp)\n\t"                 \
-    "42:\n\t"                                        \
-    "movzbl	(%%rax,%%rcx,1), %%r13d\n\t"         \
-    "movb	%%r13b, (%%rsp,%%rbx,1)\n\t"         \
-    "incl	%%ecx\n\t"                           \
-    "incl	%%ebx\n\t"                           \
-    "cmpl	%%edx, %%ecx\n\t"                    \
-    "jl	42b\n\t"                                     \
-    "movdqu	(%%rsp), %%xmm4\n\t"                 \
-    "addq	$16, %%rsp\n\t"                      \
-    "pshufb	%[BSWAP_MASK], %%xmm4\n\t"           \
-    "pxor	%%xmm4, %%xmm13\n\t"                 \
-    GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR)    \
-    "\n"                                             \
-    "45:\n\t"                                        \
-    "# T = Encrypt counter\n\t"                      \
-    "pxor	%%xmm0, %%xmm0\n\t"                  \
-    "shll	$3, %%edx\n\t"                       \
-    "pinsrq	$0, %%rdx, %%xmm0\n\t"               \
-    "pxor	%%xmm0, %%xmm13\n\t"                 \
-    GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR)    \
-    "pshufb	%[BSWAP_MASK], %%xmm13\n\t"          \
-    "#   Encrypt counter\n\t"                        \
-    "movdqa	0(%[KEY]), %%xmm4\n\t"               \
-    "pxor	%%xmm13, %%xmm4\n\t"                 \
-    AESENC_AVX(%%xmm4)                               \
-    "movdqu	%%xmm4, " VAR(TR) "\n\t"
-
-#define CALC_AAD()                           \
-    "# Additional authentication data\n\t"   \
-    "movl	%[abytes], %%edx\n\t"        \
-    "cmpl	$0, %%edx\n\t"               \
-    "je		25f\n\t"                     \
-    "movq	%[addt], %%rax\n\t"          \
-    "xorl	%%ecx, %%ecx\n\t"            \
-    "cmpl	$16, %%edx\n\t"              \
-    "jl		24f\n\t"                     \
-    "andl	$0xfffffff0, %%edx\n\t"      \
-    "\n"                                     \
-    "23:\n\t"                                \
-    "movdqu	(%%rax,%%rcx,1), %%xmm4\n\t" \
-    "pshufb	%[BSWAP_MASK], %%xmm4\n\t"   \
-    "pxor	%%xmm4, " VAR(XR) "\n\t"     \
-    GHASH_FULL_AVX(XR, %%xmm12, XR, HR)      \
-    "addl	$16, %%ecx\n\t"              \
-    "cmpl	%%edx, %%ecx\n\t"            \
-    "jl		23b\n\t"                     \
-    "movl	%[abytes], %%edx\n\t"        \
-    "cmpl	%%edx, %%ecx\n\t"            \
-    "je		25f\n\t"                     \
-    "\n"                                     \
-    "24:\n\t"                                \
-    "subq	$16, %%rsp\n\t"              \
-    "pxor	%%xmm4, %%xmm4\n\t"          \
-    "xorl	%%ebx, %%ebx\n\t"            \
-    "movdqu	%%xmm4, (%%rsp)\n\t"         \
-    "22:\n\t"                                \
-    "movzbl	(%%rax,%%rcx,1), %%r13d\n\t" \
-    "movb	%%r13b, (%%rsp,%%rbx,1)\n\t" \
-    "incl	%%ecx\n\t"                   \
-    "incl	%%ebx\n\t"                   \
-    "cmpl	%%edx, %%ecx\n\t"            \
-    "jl		22b\n\t"                     \
-    "movdqu	(%%rsp), %%xmm4\n\t"         \
-    "addq	$16, %%rsp\n\t"              \
-    "pshufb	%[BSWAP_MASK], %%xmm4\n\t"   \
-    "pxor	%%xmm4, " VAR(XR) "\n\t"     \
-    GHASH_FULL_AVX(XR, %%xmm12, XR, HR)      \
-    "\n"                                     \
-    "25:\n\t"
-
-#define CALC_HT_8_AVX()                            \
-    "movdqa	" VAR(XR) ", %%xmm2\n\t"           \
-    "# H ^ 1\n\t"                                  \
-    "movdqu	" VAR(HR) ", 0(" VAR(HTR) ")\n\t"  \
-    "# H ^ 2\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm0, HR, HR)            \
-    "movdqu	%%xmm0 ,  16(" VAR(HTR) ")\n\t"    \
-    "# H ^ 3\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm1, HR, %%xmm0)        \
-    "movdqu	%%xmm1 ,  32(" VAR(HTR) ")\n\t"    \
-    "# H ^ 4\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm3, %%xmm0, %%xmm0)    \
-    "movdqu	%%xmm3 ,  48(" VAR(HTR) ")\n\t"    \
-    "# H ^ 5\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm0, %%xmm1)   \
-    "movdqu	%%xmm12,  64(" VAR(HTR) ")\n\t"    \
-    "# H ^ 6\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm1)   \
-    "movdqu	%%xmm12,  80(" VAR(HTR) ")\n\t"    \
-    "# H ^ 7\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm3)   \
-    "movdqu	%%xmm12,  96(" VAR(HTR) ")\n\t"    \
-    "# H ^ 8\n\t"                                  \
-    GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm3, %%xmm3)   \
-    "movdqu	%%xmm12, 112(" VAR(HTR) ")\n\t"
-
-#define AESENC_128_GHASH_AVX(src, o)                 \
-    "leaq	(%[in]," VAR(KR64) ",1), %%rcx\n\t"  \
-    "leaq	(%[out]," VAR(KR64) ",1), %%rdx\n\t" \
-    /* src is either %%rcx or %%rdx */               \
-    AESENC_CTR()                                     \
-    AESENC_XOR()                                     \
-    AESENC_PCLMUL_1(src,  16, o-128, 112)            \
-    AESENC_PCLMUL_N(src,  32, o-112,  96)            \
-    AESENC_PCLMUL_N(src,  48, o -96,  80)            \
-    AESENC_PCLMUL_N(src,  64, o -80,  64)            \
-    AESENC_PCLMUL_N(src,  80, o -64,  48)            \
-    AESENC_PCLMUL_N(src,  96, o -48,  32)            \
-    AESENC_PCLMUL_N(src, 112, o -32,  16)            \
-    AESENC_PCLMUL_N(src, 128, o -16,   0)            \
-    AESENC_PCLMUL_L(144)                             \
-    "cmpl	$11, %[nr]\n\t"                      \
-    "movdqa	160(%[KEY]), %%xmm12\n\t"            \
-    "jl		4f\n\t"                              \
-    AESENC()                                         \
-    AESENC_SET(176)                                  \
-    "cmpl	$13, %[nr]\n\t"                      \
-    "movdqa	192(%[KEY]), %%xmm12\n\t"            \
-    "jl		4f\n\t"                              \
-    AESENC()                                         \
-    AESENC_SET(208)                                  \
-    "movdqa	224(%[KEY]), %%xmm12\n\t"            \
-    "\n"                                             \
-"4:\n\t"                                             \
-    AESENC_LAST(%%rcx, %%rdx)
-
-#define AESENC_LAST15_ENC_AVX()                       \
-    "movl	%[nbytes], %%ecx\n\t"                 \
-    "movl	%%ecx, %%edx\n\t"                     \
-    "andl	$0x0f, %%ecx\n\t"                     \
-    "jz		55f\n\t"                              \
-    "movdqu	" VAR(CTR1) ", %%xmm13\n\t"           \
-    "pshufb	%[BSWAP_EPI64], %%xmm13\n\t"          \
-    "pxor	0(%[KEY]), %%xmm13\n\t"               \
-    AESENC_AVX(%%xmm13)                               \
-    "subq	$16, %%rsp\n\t"                       \
-    "xorl	%%ecx, %%ecx\n\t"                     \
-    "movdqu	%%xmm13, (%%rsp)\n\t"                 \
-    "\n"                                              \
-    "51:\n\t"                                         \
-    "movzbl	(%[in]," VAR(KR64) ",1), %%r13d\n\t"  \
-    "xorb	(%%rsp,%%rcx,1), %%r13b\n\t"          \
-    "movb	%%r13b, (%[out]," VAR(KR64) ",1)\n\t" \
-    "movb	%%r13b, (%%rsp,%%rcx,1)\n\t"          \
-    "incl	" VAR(KR) "\n\t"                      \
-    "incl	%%ecx\n\t"                            \
-    "cmpl	%%edx, " VAR(KR) "\n\t"               \
-    "jl		51b\n\t"                              \
-    "xorq	%%r13, %%r13\n\t"                     \
-    "cmpl	$16, %%ecx\n\t"                       \
-    "je		53f\n\t"                              \
-    "\n"                                              \
-    "52:\n\t"                                         \
-    "movb	%%r13b, (%%rsp,%%rcx,1)\n\t"          \
-    "incl	%%ecx\n\t"                            \
-    "cmpl	$16, %%ecx\n\t"                       \
-    "jl		52b\n\t"                              \
-    "53:\n\t"                                         \
-    "movdqu	(%%rsp), %%xmm13\n\t"                 \
-    "addq	$16, %%rsp\n\t"                       \
-    "pshufb	%[BSWAP_MASK], %%xmm13\n\t"           \
-    "pxor	%%xmm13, " VAR(XR) "\n\t"             \
-    GHASH_GFMUL_RED_AVX(XR, HR, XR)                   \
-
-#define AESENC_LAST15_DEC_AVX()                       \
-    "movl	%[nbytes], %%ecx\n\t"                 \
-    "movl	%%ecx, %%edx\n\t"                     \
-    "andl	$0x0f, %%ecx\n\t"                     \
-    "jz		55f\n\t"                              \
-    "movdqu	" VAR(CTR1) ", %%xmm13\n\t"           \
-    "pshufb	%[BSWAP_EPI64], %%xmm13\n\t"          \
-    "pxor	0(%[KEY]), %%xmm13\n\t"               \
-    AESENC_AVX(%%xmm13)                               \
-    "subq	$32, %%rsp\n\t"                       \
-    "xorl	%%ecx, %%ecx\n\t"                     \
-    "movdqu	%%xmm13, (%%rsp)\n\t"                 \
-    "pxor	%%xmm0, %%xmm0\n\t"                   \
-    "movdqu	%%xmm0, 16(%%rsp)\n\t"                \
-    "\n"                                              \
-    "51:\n\t"                                         \
-    "movzbl	(%[in]," VAR(KR64) ",1), %%r13d\n\t"  \
-    "movb	%%r13b, 16(%%rsp,%%rcx,1)\n\t"        \
-    "xorb	(%%rsp,%%rcx,1), %%r13b\n\t"          \
-    "movb	%%r13b, (%[out]," VAR(KR64) ",1)\n\t" \
-    "incl	" VAR(KR) "\n\t"                      \
-    "incl	%%ecx\n\t"                            \
-    "cmpl	%%edx, " VAR(KR) "\n\t"               \
-    "jl		51b\n\t"                              \
-    "53:\n\t"                                         \
-    "movdqu	16(%%rsp), %%xmm13\n\t"               \
-    "addq	$32, %%rsp\n\t"                       \
-    "pshufb	%[BSWAP_MASK], %%xmm13\n\t"           \
-    "pxor	%%xmm13, " VAR(XR) "\n\t"             \
-    GHASH_GFMUL_RED_AVX(XR, HR, XR)                   \
-
-#define CALC_TAG()                              \
-    "movl	%[nbytes], %%edx\n\t"           \
-    "movl	%[abytes], %%ecx\n\t"           \
-    "shlq	$3, %%rdx\n\t"                  \
-    "shlq	$3, %%rcx\n\t"                  \
-    "pinsrq	$0, %%rdx, %%xmm0\n\t"          \
-    "pinsrq	$1, %%rcx, %%xmm0\n\t"          \
-    "pxor	%%xmm0, " VAR(XR) "\n\t"        \
-    GHASH_GFMUL_RED_AVX(XR, HR, XR)             \
-    "pshufb	%[BSWAP_MASK], " VAR(XR) "\n\t" \
-    "movdqu	" VAR(TR) ", %%xmm0\n\t"        \
-    "pxor	" VAR(XR) ", %%xmm0\n\t"        \
-
-#define STORE_TAG()                           \
-    "cmpl	$16, %[tbytes]\n\t"           \
-    "je		71f\n\t"                      \
-    "xorq	%%rcx, %%rcx\n\t"             \
-    "movdqu	%%xmm0, (%%rsp)\n\t"          \
-    "73:\n\t"                                 \
-    "movzbl	(%%rsp,%%rcx,1), %%r13d\n\t"  \
-    "movb	%%r13b, (%[tag],%%rcx,1)\n\t" \
-    "incl	%%ecx\n\t"                    \
-    "cmpl	%[tbytes], %%ecx\n\t"         \
-    "jne	73b\n\t"                      \
-    "jmp	72f\n\t"                      \
-    "\n"                                      \
-    "71:\n\t"                                 \
-    "movdqu	%%xmm0, (%[tag])\n\t"         \
-    "\n"                                      \
-    "72:\n\t"
-
-#define CMP_TAG()                                          \
-    "cmpl	$16, %[tbytes]\n\t"                        \
-    "je		71f\n\t"                                   \
-    "subq	$16, %%rsp\n\t"                            \
-    "xorq	%%rcx, %%rcx\n\t"                          \
-    "xorq	%%rax, %%rax\n\t"                          \
-    "movdqu	%%xmm0, (%%rsp)\n\t"                       \
-    "\n"                                                   \
-    "73:\n\t"                                              \
-    "movzbl	(%%rsp,%%rcx,1), %%r13d\n\t"               \
-    "xorb	(%[tag],%%rcx,1), %%r13b\n\t"              \
-    "orb	%%r13b, %%al\n\t"                          \
-    "incl	%%ecx\n\t"                                 \
-    "cmpl	%[tbytes], %%ecx\n\t"                      \
-    "jne	73b\n\t"                                   \
-    "cmpb	$0x00, %%al\n\t"                           \
-    "sete	%%al\n\t"                                  \
-    "addq	$16, %%rsp\n\t"                            \
-    "xorq	%%rcx, %%rcx\n\t"                          \
-    "jmp	72f\n\t"                                   \
-    "\n"                                                   \
-    "71:\n\t"                                              \
-    "movdqu	(%[tag]), %%xmm1\n\t"                      \
-    "pcmpeqb	%%xmm1, %%xmm0\n\t"                        \
-    "pmovmskb	%%xmm0, %%edx\n\t"                         \
-    "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \
-    "xorl	%%eax, %%eax\n\t"                          \
-    "cmpl	$0xffff, %%edx\n\t"                        \
-    "sete	%%al\n\t"                                  \
-    "\n"                                                   \
-    "72:\n\t"                                              \
-    "movl	%%eax, (%[res])\n\t"
-
-static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
-                            const unsigned char* addt,
-                            const unsigned char* ivec, unsigned char *tag,
-                            unsigned int nbytes, unsigned int abytes,
-                            unsigned int ibytes, unsigned int tbytes,
-                            const unsigned char* key, int nr)
-{
-    register const unsigned char* iv asm("rax") = ivec;
-    register unsigned int ivLen asm("ebx") = ibytes;
-
-    __asm__ __volatile__ (
-        "subq	$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        /* Counter is xmm13 */
-        "pxor	%%xmm13, %%xmm13\n\t"
-        "pxor	" VAR(XR) ", " VAR(XR) "\n\t"
-        "movl	%[ibytes], %%edx\n\t"
-        "cmpl	$12, %%edx\n\t"
-        "jne	35f\n\t"
-        CALC_IV_12()
-        "\n"
-        "35:\n\t"
-        CALC_IV()
-        "\n"
-        "39:\n\t"
-
-        CALC_AAD()
-
-        "# Calculate counter and H\n\t"
-        "pshufb	%[BSWAP_EPI64], %%xmm13\n\t"
-        "movdqa	" VAR(HR) ", %%xmm5\n\t"
-        "paddd	%[ONE], %%xmm13\n\t"
-        "movdqa	" VAR(HR) ", %%xmm4\n\t"
-        "movdqu	%%xmm13, " VAR(CTR1) "\n\t"
-        "psrlq	$63, %%xmm5\n\t"
-        "psllq	$1, %%xmm4\n\t"
-        "pslldq	$8, %%xmm5\n\t"
-        "por	%%xmm5, %%xmm4\n\t"
-        "pshufd	$0xff, " VAR(HR) ", " VAR(HR) "\n\t"
-        "psrad	$31, " VAR(HR) "\n\t"
-        "pand	%[MOD2_128], " VAR(HR) "\n\t"
-        "pxor	%%xmm4, " VAR(HR) "\n\t"
-
-        "xorl	" VAR(KR) ", " VAR(KR) "\n\t"
-
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-        "cmpl	$128, %[nbytes]\n\t"
-        "movl	%[nbytes], %%r13d\n\t"
-        "jl	5f\n\t"
-        "andl	$0xffffff80, %%r13d\n\t"
-
-        CALC_HT_8_AVX()
-
-        "# First 128 bytes of input\n\t"
-        AESENC_CTR()
-        AESENC_XOR()
-        AESENC_SET(16)
-        AESENC_SET(32)
-        AESENC_SET(48)
-        AESENC_SET(64)
-        AESENC_SET(80)
-        AESENC_SET(96)
-        AESENC_SET(112)
-        AESENC_SET(128)
-        AESENC_SET(144)
-        "cmpl	$11, %[nr]\n\t"
-        "movdqa	160(%[KEY]), %%xmm12\n\t"
-        "jl	1f\n\t"
-        AESENC()
-        AESENC_SET(176)
-        "cmpl	$13, %[nr]\n\t"
-        "movdqa	192(%[KEY]), %%xmm12\n\t"
-        "jl	1f\n\t"
-        AESENC()
-        AESENC_SET(208)
-        "movdqa	224(%[KEY]), %%xmm12\n\t"
-        "\n"
-    "1:\n\t"
-        AESENC_LAST(%[in], %[out])
-
-        "cmpl	$128, %%r13d\n\t"
-        "movl	$128, " VAR(KR) "\n\t"
-        "jle	2f\n\t"
-
-        "# More 128 bytes of input\n\t"
-        "\n"
-    "3:\n\t"
-        AESENC_128_GHASH_AVX(%%rdx, 0)
-        "addl	$128, " VAR(KR) "\n\t"
-        "cmpl	%%r13d, " VAR(KR) "\n\t"
-        "jl	3b\n\t"
-        "\n"
-    "2:\n\t"
-        "movdqa	%[BSWAP_MASK], %%xmm13\n\t"
-        "pshufb	%%xmm13, %%xmm4\n\t"
-        "pshufb	%%xmm13, %%xmm5\n\t"
-        "pshufb	%%xmm13, %%xmm6\n\t"
-        "pshufb	%%xmm13, %%xmm7\n\t"
-        "pxor	%%xmm2, %%xmm4\n\t"
-        "pshufb	%%xmm13, %%xmm8\n\t"
-        "pshufb	%%xmm13, %%xmm9\n\t"
-        "pshufb	%%xmm13, %%xmm10\n\t"
-        "pshufb	%%xmm13, %%xmm11\n\t"
-
-        "movdqu	112(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_AVX(XR, %%xmm13, %%xmm4, %%xmm12)
-        "movdqu	 96(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm5, %%xmm12)
-        "movdqu	 80(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm6, %%xmm12)
-        "movdqu	 64(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm7, %%xmm12)
-        "movdqu	 48(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm8, %%xmm12)
-        "movdqu	 32(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm9, %%xmm12)
-        "movdqu	 16(" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm10, %%xmm12)
-        "movdqu	   (" VAR(HTR) "), %%xmm12\n\t"
-        GHASH_GFMUL_RED_XOR_AVX(XR, %%xmm13, %%xmm11, %%xmm12)
-
-        "movdqu	0(" VAR(HTR) "), " VAR(HR) "\n\t"
-        "\n"
-    "5:\n\t"
-        "movl	%[nbytes], %%edx\n\t"
-        "cmpl	%%edx, " VAR(KR) "\n\t"
-        "jge	55f\n\t"
-#endif
-
-        "movl	%[nbytes], %%r13d\n\t"
-        "andl	$0xfffffff0, %%r13d\n\t"
-        "cmpl	%%r13d, " VAR(KR) "\n\t"
-        "jge	14f\n\t"
-
-        "leaq	(%[in]," VAR(KR64) ",1), %%rcx\n\t"
-        "leaq	(%[out]," VAR(KR64) ",1), %%rdx\n\t"
-        AESENC_BLOCK(%%rcx, %%rdx)
-        "addl	$16, " VAR(KR) "\n\t"
-        "cmpl	%%r13d, " VAR(KR) "\n\t"
-        "jge	13f\n\t"
-        "\n"
-        "12:\n\t"
-        "leaq	(%[in]," VAR(KR64) ",1), %%rcx\n\t"
-        "leaq	(%[out]," VAR(KR64) ",1), %%rdx\n\t"
-        AESENC_GFMUL(%%rcx, %%rdx, HR, XR)
-        "pshufb	%[BSWAP_MASK], %%xmm4\n\t"
-        "pxor	%%xmm4, " VAR(XR) "\n\t"
-        "addl	$16, " VAR(KR) "\n\t"
-        "cmpl	%%r13d, " VAR(KR) "\n\t"
-        "jl	12b\n\t"
-        "\n"
-        "13:\n\t"
-        GHASH_GFMUL_RED_AVX(XR, HR, XR)
-        "\n"
-        "14:\n\t"
-
-        AESENC_LAST15_ENC_AVX()
-        "\n"
-        "55:\n\t"
-
-        CALC_TAG()
-        STORE_TAG()
-        "addq	$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-
-        :
-        : [KEY] "r" (key),
-          [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
-          [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
-          [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes),
-          [tag] "r" (tag),
-          [BSWAP_MASK] "m" (BSWAP_MASK),
-          [BSWAP_EPI64] "m" (BSWAP_EPI64),
-          [ONE] "m" (ONE),
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-          [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
-          [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
-          [EIGHT] "m" (EIGHT),
-#endif
-          [MOD2_128] "m" (MOD2_128)
-        : "xmm15", "xmm14", "xmm13", "xmm12",
-          "xmm0", "xmm1", "xmm2", "xmm3", "memory",
-          "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
-          "rcx", "rdx", "r13"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX1
-/* Encrypt with key in xmm12. */
-#define VAESENC()                              \
-    "vaesenc	%%xmm12, %%xmm4, %%xmm4\n\t"   \
-    "vaesenc	%%xmm12, %%xmm5, %%xmm5\n\t"   \
-    "vaesenc	%%xmm12, %%xmm6, %%xmm6\n\t"   \
-    "vaesenc	%%xmm12, %%xmm7, %%xmm7\n\t"   \
-    "vaesenc	%%xmm12, %%xmm8, %%xmm8\n\t"   \
-    "vaesenc	%%xmm12, %%xmm9, %%xmm9\n\t"   \
-    "vaesenc	%%xmm12, %%xmm10, %%xmm10\n\t" \
-    "vaesenc	%%xmm12, %%xmm11, %%xmm11\n\t"
-
-#define VAESENC_SET(o)                         \
-    "vmovdqa	"#o"(%[KEY]), %%xmm12\n\t"     \
-    VAESENC()
-
-#define VAESENC_CTR()                          \
-    "vmovdqu	" VAR(CTR1) ", %%xmm0\n\t"     \
-    "vmovdqa	%[BSWAP_EPI64], %%xmm1\n\t"    \
-    "vpshufb	%%xmm1, %%xmm0, %%xmm4\n\t"    \
-    "vpaddd	%[ONE], %%xmm0, %%xmm5\n\t"    \
-    "vpshufb	%%xmm1, %%xmm5, %%xmm5\n\t"    \
-    "vpaddd	%[TWO], %%xmm0, %%xmm6\n\t"    \
-    "vpshufb	%%xmm1, %%xmm6, %%xmm6\n\t"    \
-    "vpaddd	%[THREE], %%xmm0, %%xmm7\n\t"  \
-    "vpshufb	%%xmm1, %%xmm7, %%xmm7\n\t"    \
-    "vpaddd	%[FOUR], %%xmm0, %%xmm8\n\t"   \
-    "vpshufb	%%xmm1, %%xmm8, %%xmm8\n\t"    \
-    "vpaddd	%[FIVE], %%xmm0, %%xmm9\n\t"   \
-    "vpshufb	%%xmm1, %%xmm9, %%xmm9\n\t"    \
-    "vpaddd	%[SIX], %%xmm0, %%xmm10\n\t"   \
-    "vpshufb	%%xmm1, %%xmm10, %%xmm10\n\t"  \
-    "vpaddd	%[SEVEN], %%xmm0, %%xmm11\n\t" \
-    "vpshufb	%%xmm1, %%xmm11, %%xmm11\n\t"  \
-    "vpaddd	%[EIGHT], %%xmm0, %%xmm0\n\t"
-
-#define VAESENC_XOR()                          \
-    "vmovdqa	(%[KEY]), %%xmm12\n\t"         \
-    "vmovdqu	%%xmm0, " VAR(CTR1) "\n\t"     \
-    "vpxor	%%xmm12, %%xmm4, %%xmm4\n\t"   \
-    "vpxor	%%xmm12, %%xmm5, %%xmm5\n\t"   \
-    "vpxor	%%xmm12, %%xmm6, %%xmm6\n\t"   \
-    "vpxor	%%xmm12, %%xmm7, %%xmm7\n\t"   \
-    "vpxor	%%xmm12, %%xmm8, %%xmm8\n\t"   \
-    "vpxor	%%xmm12, %%xmm9, %%xmm9\n\t"   \
-    "vpxor	%%xmm12, %%xmm10, %%xmm10\n\t" \
-    "vpxor	%%xmm12, %%xmm11, %%xmm11\n\t"
-
-#define VAESENC_128()                     \
-    VAESENC_CTR()                         \
-    VAESENC_XOR()                         \
-    VAESENC_SET(16)                       \
-    VAESENC_SET(32)                       \
-    VAESENC_SET(48)                       \
-    VAESENC_SET(64)                       \
-    VAESENC_SET(80)                       \
-    VAESENC_SET(96)                       \
-    VAESENC_SET(112)                      \
-    VAESENC_SET(128)                      \
-    VAESENC_SET(144)                      \
-    "cmpl	$11, %[nr]\n\t"           \
-    "vmovdqa	160(%[KEY]), %%xmm12\n\t" \
-    "jl	1f\n\t"                           \
-    VAESENC()                             \
-    VAESENC_SET(176)                      \
-    "cmpl	$13, %[nr]\n\t"           \
-    "vmovdqa	192(%[KEY]), %%xmm12\n\t" \
-    "jl	1f\n\t"                           \
-    VAESENC()                             \
-    VAESENC_SET(208)                      \
-    "vmovdqa	224(%[KEY]), %%xmm12\n\t" \
-    "\n"                                  \
-"1:\n\t"                                  \
-    VAESENC_LAST(%[in], %[out])
-
-/* Encrypt and carry-less multiply for AVX1. */
-#define VAESENC_PCLMUL_1(src, o1, o2, o3)              \
-    "vmovdqu	" #o3 "(" VAR(HTR) "), %%xmm12\n\t"    \
-    "vmovdqu	" #o2 "(" #src "), %%xmm0\n\t"         \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t"   \
-    "vpshufb	%[BSWAP_MASK], %%xmm0, %%xmm0\n\t"     \
-    "vpxor	%%xmm2, %%xmm0, %%xmm0\n\t"            \
-    "vpshufd	$0x4e, %%xmm12, %%xmm1\n\t"            \
-    "vpshufd	$0x4e, %%xmm0, %%xmm14\n\t"            \
-    "vpxor	%%xmm12, %%xmm1, %%xmm1\n\t"           \
-    "vpxor	%%xmm0, %%xmm14, %%xmm14\n\t"          \
-    "vpclmulqdq	$0x11, %%xmm12, %%xmm0, %%xmm3\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t"   \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t"   \
-    "vpclmulqdq	$0x00, %%xmm12, %%xmm0, %%xmm2\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t"   \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t"   \
-    "vpclmulqdq	$0x00, %%xmm14, %%xmm1, %%xmm1\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t"   \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \
-    "vpxor      %%xmm2, %%xmm1, %%xmm1\n\t"            \
-    "vpxor      %%xmm3, %%xmm1, %%xmm1\n\t"            \
-
-#define VAESENC_PCLMUL_N(src, o1, o2, o3)               \
-    "vmovdqu	" #o3 "(" VAR(HTR) "), %%xmm12\n\t"     \
-    "vmovdqu	" #o2 "(" #src "), %%xmm0\n\t"          \
-    "vpshufd	$0x4e, %%xmm12, %%xmm13\n\t"            \
-    "vpshufb	%[BSWAP_MASK], %%xmm0, %%xmm0\n\t"      \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vpxor	%%xmm12, %%xmm13, %%xmm13\n\t"          \
-    "vpshufd	$0x4e, %%xmm0, %%xmm14\n\t"             \
-    "vpxor	%%xmm0, %%xmm14, %%xmm14\n\t"           \
-    "vpclmulqdq	$0x11, %%xmm12, %%xmm0, %%xmm15\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t"    \
-    "vpclmulqdq	$0x00, %%xmm12, %%xmm0, %%xmm12\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t"    \
-    "vpclmulqdq	$0x00, %%xmm14, %%xmm13, %%xmm13\n\t"   \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t"    \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t"  \
-    "vaesenc	" #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t"  \
-    "vpxor      %%xmm12, %%xmm1, %%xmm1\n\t"            \
-    "vpxor      %%xmm12, %%xmm2, %%xmm2\n\t"            \
-    "vpxor      %%xmm15, %%xmm1, %%xmm1\n\t"            \
-    "vpxor      %%xmm15, %%xmm3, %%xmm3\n\t"            \
-    "vpxor      %%xmm13, %%xmm1, %%xmm1\n\t"            \
-
-#define VAESENC_PCLMUL_L(o)                         \
-    "vpslldq	$8, %%xmm1, %%xmm14\n\t"            \
-    "vpsrldq	$8, %%xmm1, %%xmm1\n\t"             \
-    "vaesenc	"#o"(%[KEY]), %%xmm4, %%xmm4\n\t"   \
-    "vpxor      %%xmm14, %%xmm2, %%xmm2\n\t"        \
-    "vpxor      %%xmm1, %%xmm3, %%xmm3\n\t"         \
-    "vaesenc	"#o"(%[KEY]), %%xmm5, %%xmm5\n\t"   \
-    "vpslld	$31, %%xmm2, %%xmm12\n\t"           \
-    "vpslld	$30, %%xmm2, %%xmm13\n\t"           \
-    "vpslld	$25, %%xmm2, %%xmm14\n\t"           \
-    "vaesenc	"#o"(%[KEY]), %%xmm6, %%xmm6\n\t"   \
-    "vpxor	%%xmm13, %%xmm12, %%xmm12\n\t"      \
-    "vpxor	%%xmm14, %%xmm12, %%xmm12\n\t"      \
-    "vaesenc	"#o"(%[KEY]), %%xmm7, %%xmm7\n\t"   \
-    "vpsrldq	$4, %%xmm12, %%xmm13\n\t"           \
-    "vpslldq	$12, %%xmm12, %%xmm12\n\t"          \
-    "vaesenc	"#o"(%[KEY]), %%xmm8, %%xmm8\n\t"   \
-    "vpxor	%%xmm12, %%xmm2, %%xmm2\n\t"        \
-    "vpsrld	$1, %%xmm2, %%xmm14\n\t"            \
-    "vaesenc	"#o"(%[KEY]), %%xmm9, %%xmm9\n\t"   \
-    "vpsrld	$2, %%xmm2, %%xmm1\n\t"             \
-    "vpsrld	$7, %%xmm2, %%xmm0\n\t"             \
-    "vaesenc	"#o"(%[KEY]), %%xmm10, %%xmm10\n\t" \
-    "vpxor	%%xmm1, %%xmm14, %%xmm14\n\t"       \
-    "vpxor	%%xmm0, %%xmm14, %%xmm14\n\t"       \
-    "vaesenc	"#o"(%[KEY]), %%xmm11, %%xmm11\n\t" \
-    "vpxor	%%xmm13, %%xmm14, %%xmm14\n\t"      \
-    "vpxor	%%xmm14, %%xmm2, %%xmm2\n\t"        \
-    "vpxor	%%xmm3, %%xmm2, %%xmm2\n\t"         \
-
-
-/* Encrypt and carry-less multiply with last key. */
-#define VAESENC_LAST(in, out)                          \
-    "vaesenclast	%%xmm12, %%xmm4, %%xmm4\n\t"   \
-    "vaesenclast	%%xmm12, %%xmm5, %%xmm5\n\t"   \
-    "vmovdqu		   (" #in "), %%xmm0\n\t"      \
-    "vmovdqu		 16(" #in "), %%xmm1\n\t"      \
-    "vpxor		%%xmm0, %%xmm4, %%xmm4\n\t"    \
-    "vpxor		%%xmm1, %%xmm5, %%xmm5\n\t"    \
-    "vmovdqu		%%xmm4,    (" #out ")\n\t"     \
-    "vmovdqu		%%xmm5,  16(" #out ")\n\t"     \
-    "vaesenclast	%%xmm12, %%xmm6, %%xmm6\n\t"   \
-    "vaesenclast	%%xmm12, %%xmm7, %%xmm7\n\t"   \
-    "vmovdqu		 32(" #in "), %%xmm0\n\t"      \
-    "vmovdqu		 48(" #in "), %%xmm1\n\t"      \
-    "vpxor		%%xmm0, %%xmm6, %%xmm6\n\t"    \
-    "vpxor		%%xmm1, %%xmm7, %%xmm7\n\t"    \
-    "vmovdqu		%%xmm6,  32(" #out ")\n\t"     \
-    "vmovdqu		%%xmm7,  48(" #out ")\n\t"     \
-    "vaesenclast	%%xmm12, %%xmm8, %%xmm8\n\t"   \
-    "vaesenclast	%%xmm12, %%xmm9, %%xmm9\n\t"   \
-    "vmovdqu		 64(" #in "), %%xmm0\n\t"      \
-    "vmovdqu		 80(" #in "), %%xmm1\n\t"      \
-    "vpxor		%%xmm0, %%xmm8, %%xmm8\n\t"    \
-    "vpxor		%%xmm1, %%xmm9, %%xmm9\n\t"    \
-    "vmovdqu		%%xmm8,  64(" #out ")\n\t"     \
-    "vmovdqu		%%xmm9,  80(" #out ")\n\t"     \
-    "vaesenclast	%%xmm12, %%xmm10, %%xmm10\n\t" \
-    "vaesenclast	%%xmm12, %%xmm11, %%xmm11\n\t" \
-    "vmovdqu		 96(" #in "), %%xmm0\n\t"      \
-    "vmovdqu		112(" #in "), %%xmm1\n\t"      \
-    "vpxor		%%xmm0, %%xmm10, %%xmm10\n\t"  \
-    "vpxor		%%xmm1, %%xmm11, %%xmm11\n\t"  \
-    "vmovdqu		%%xmm10,  96(" #out ")\n\t"    \
-    "vmovdqu		%%xmm11, 112(" #out ")\n\t"
-
-#define VAESENC_BLOCK()                                       \
-    "vmovdqu		" VAR(CTR1) ", %%xmm5\n\t"            \
-    "vpshufb		%[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"   \
-    "vpaddd		%[ONE], %%xmm5, %%xmm5\n\t"           \
-    "vmovdqu		%%xmm5, " VAR(CTR1) "\n\t"            \
-    "vpxor		(%[KEY]), %%xmm4, %%xmm4\n\t"         \
-    "vaesenc		16(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		32(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		48(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		64(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		80(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		96(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		112(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		128(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		144(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "cmpl		$11, %[nr]\n\t"                       \
-    "vmovdqa		160(%[KEY]), %%xmm5\n\t"              \
-    "jl			%=f\n\t"                              \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc		176(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "cmpl		$13, %[nr]\n\t"                       \
-    "vmovdqa		192(%[KEY]), %%xmm5\n\t"              \
-    "jl			%=f\n\t"                              \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc		208(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vmovdqa		224(%[KEY]), %%xmm5\n\t"              \
-    "%=:\n\t"                                                 \
-    "vaesenclast	%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vmovdqu		(%[in]," VAR(KR64) ",1), %%xmm5\n\t"  \
-    "vpxor		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vmovdqu		%%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \
-    "vpshufb		%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"    \
-    "vpxor		%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
-
-#define _VAESENC_GFMUL(in, H, X)                              \
-    "vmovdqu		" VAR(CTR1) ", %%xmm5\n\t"            \
-    "vpshufb		%[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"   \
-    "vpaddd		%[ONE], %%xmm5, %%xmm5\n\t"           \
-    "vmovdqu		%%xmm5, " VAR(CTR1) "\n\t"            \
-    "vpxor		(%[KEY]), %%xmm4, %%xmm4\n\t"         \
-    "vpclmulqdq		$0x10, " #H ", " #X ", %%xmm6\n\t"    \
-    "vaesenc		16(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		32(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vpclmulqdq		$0x01, " #H ", " #X ", %%xmm7\n\t"    \
-    "vaesenc		48(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vaesenc		64(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vpclmulqdq		$0x00, " #H ", " #X ", %%xmm8\n\t"    \
-    "vaesenc		80(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vpclmulqdq		$0x11, " #H ", " #X ", %%xmm1\n\t"    \
-    "vaesenc		96(%[KEY]), %%xmm4, %%xmm4\n\t"       \
-    "vpxor		%%xmm7, %%xmm6, %%xmm6\n\t"           \
-    "vpslldq		$8, %%xmm6, %%xmm2\n\t"               \
-    "vpsrldq		$8, %%xmm6, %%xmm6\n\t"               \
-    "vaesenc		112(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vpxor		%%xmm8, %%xmm2, %%xmm2\n\t"           \
-    "vpxor		%%xmm6, %%xmm1, %%xmm3\n\t"           \
-    "vmovdqa		%[MOD2_128], %%xmm0\n\t"              \
-    "vpclmulqdq		$0x10, %%xmm0, %%xmm2, %%xmm7\n\t"    \
-    "vaesenc		128(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vpshufd		$0x4e, %%xmm2, %%xmm6\n\t"            \
-    "vpxor		%%xmm7, %%xmm6, %%xmm6\n\t"           \
-    "vpclmulqdq		$0x10, %%xmm0, %%xmm6, %%xmm7\n\t"    \
-    "vaesenc		144(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vpshufd		$0x4e, %%xmm6, %%xmm6\n\t"            \
-    "vpxor		%%xmm7, %%xmm6, %%xmm6\n\t"           \
-    "vpxor		%%xmm3, %%xmm6, " VAR(XR) "\n\t"      \
-    "cmpl		$11, %[nr]\n\t"                       \
-    "vmovdqa		160(%[KEY]), %%xmm5\n\t"              \
-    "jl			1f\n\t"                               \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc		176(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "cmpl		$13, %[nr]\n\t"                       \
-    "vmovdqa		192(%[KEY]), %%xmm5\n\t"              \
-    "jl			1f\n\t"                               \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc		208(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vmovdqa		224(%[KEY]), %%xmm5\n\t"              \
-    "1:\n\t"                                                  \
-    "vaesenclast	%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vmovdqu		" #in ", %%xmm0\n\t"                  \
-    "vpxor		%%xmm0, %%xmm4, %%xmm4\n\t"           \
-    "vmovdqu		%%xmm4, (%[out]," VAR(KR64) ",1)\n\t"
-#define VAESENC_GFMUL(in, H, X)                               \
-       _VAESENC_GFMUL(in, H, X)
-
-
-#define _GHASH_GFMUL_AVX1(r, r2, a, b)             \
-    "vpshufd	$0x4e, "#a", %%xmm1\n\t"           \
-    "vpshufd	$0x4e, "#b", %%xmm2\n\t"           \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm3\n\t"     \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm0\n\t"     \
-    "vpxor	"#a", %%xmm1, %%xmm1\n\t"          \
-    "vpxor	"#b", %%xmm2, %%xmm2\n\t"          \
-    "vpclmulqdq	$0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \
-    "vpxor	%%xmm0, %%xmm1, %%xmm1\n\t"        \
-    "vpxor	%%xmm3, %%xmm1, %%xmm1\n\t"        \
-    "vmovdqa	%%xmm0, "#r2"\n\t"                 \
-    "vmovdqa	%%xmm3, " #r "\n\t"                \
-    "vpslldq	$8, %%xmm1, %%xmm2\n\t"            \
-    "vpsrldq	$8, %%xmm1, %%xmm1\n\t"            \
-    "vpxor	%%xmm2, "#r2", "#r2"\n\t"          \
-    "vpxor	%%xmm1, " #r ", " #r "\n\t"
-#define GHASH_GFMUL_AVX1(r, r2, a, b)              \
-       _GHASH_GFMUL_AVX1(r, r2, a, b)
-
-#define _GHASH_GFMUL_XOR_AVX1(r, r2, a, b)         \
-    "vpshufd	$0x4e, "#a", %%xmm1\n\t"           \
-    "vpshufd	$0x4e, "#b", %%xmm2\n\t"           \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm3\n\t"     \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm0\n\t"     \
-    "vpxor	"#a", %%xmm1, %%xmm1\n\t"          \
-    "vpxor	"#b", %%xmm2, %%xmm2\n\t"          \
-    "vpclmulqdq	$0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \
-    "vpxor	%%xmm0, %%xmm1, %%xmm1\n\t"        \
-    "vpxor	%%xmm3, %%xmm1, %%xmm1\n\t"        \
-    "vpxor	%%xmm0, "#r2", "#r2"\n\t"          \
-    "vpxor	%%xmm3, " #r ", " #r "\n\t"        \
-    "vpslldq	$8, %%xmm1, %%xmm2\n\t"            \
-    "vpsrldq	$8, %%xmm1, %%xmm1\n\t"            \
-    "vpxor	%%xmm2, "#r2", "#r2"\n\t"          \
-    "vpxor	%%xmm1, " #r ", " #r "\n\t"
-#define GHASH_GFMUL_XOR_AVX1(r, r2, a, b)          \
-       _GHASH_GFMUL_XOR_AVX1(r, r2, a, b)
-
-#define GHASH_MID_AVX1(r, r2)               \
-    "vpsrld	$31, "#r2", %%xmm0\n\t"     \
-    "vpsrld	$31, " #r ", %%xmm1\n\t"    \
-    "vpslld	$1, "#r2", "#r2"\n\t"       \
-    "vpslld	$1, " #r ", " #r "\n\t"     \
-    "vpsrldq	$12, %%xmm0, %%xmm2\n\t"    \
-    "vpslldq	$4, %%xmm0, %%xmm0\n\t"     \
-    "vpslldq	$4, %%xmm1, %%xmm1\n\t"     \
-    "vpor	%%xmm2, " #r ", " #r "\n\t" \
-    "vpor	%%xmm0, "#r2", "#r2"\n\t"   \
-    "vpor	%%xmm1, " #r ", " #r "\n\t"
-
-#define _GHASH_GFMUL_RED_AVX1(r, a, b)             \
-    "vpshufd	$0x4e, "#a", %%xmm5\n\t"           \
-    "vpshufd	$0x4e, "#b", %%xmm6\n\t"           \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm7\n\t"     \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm4\n\t"     \
-    "vpxor	"#a", %%xmm5, %%xmm5\n\t"          \
-    "vpxor	"#b", %%xmm6, %%xmm6\n\t"          \
-    "vpclmulqdq	$0x00, %%xmm6, %%xmm5, %%xmm5\n\t" \
-    "vpxor	%%xmm4, %%xmm5, %%xmm5\n\t"        \
-    "vpxor	%%xmm7, %%xmm5, %%xmm5\n\t"        \
-    "vpslldq	$8, %%xmm5, %%xmm6\n\t"            \
-    "vpsrldq	$8, %%xmm5, %%xmm5\n\t"            \
-    "vpxor	%%xmm6, %%xmm4, %%xmm4\n\t"        \
-    "vpxor	%%xmm5, %%xmm7, " #r "\n\t"        \
-    "vpslld	$31, %%xmm4, %%xmm8\n\t"           \
-    "vpslld	$30, %%xmm4, %%xmm9\n\t"           \
-    "vpslld	$25, %%xmm4, %%xmm10\n\t"          \
-    "vpxor	%%xmm9, %%xmm8, %%xmm8\n\t"        \
-    "vpxor	%%xmm10, %%xmm8, %%xmm8\n\t"       \
-    "vpsrldq	$4, %%xmm8, %%xmm9\n\t"            \
-    "vpslldq	$12, %%xmm8, %%xmm8\n\t"           \
-    "vpxor	%%xmm8, %%xmm4, %%xmm4\n\t"        \
-    "vpsrld	$1, %%xmm4, %%xmm10\n\t"           \
-    "vpsrld	$2, %%xmm4, %%xmm6\n\t"            \
-    "vpsrld	$7, %%xmm4, %%xmm5\n\t"            \
-    "vpxor	%%xmm6, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm5, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm9, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm4, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm10, " #r ", " #r "\n\t"
-#define GHASH_GFMUL_RED_AVX1(r, a, b)              \
-       _GHASH_GFMUL_RED_AVX1(r, a, b)
-
-#define _GHASH_GFSQR_RED_AVX1(r, a)                \
-    "vpclmulqdq	$0x00, "#a", "#a", %%xmm4\n\t"     \
-    "vpclmulqdq	$0x11, "#a", "#a", " #r "\n\t"     \
-    "vpslld	$31, %%xmm4, %%xmm8\n\t"           \
-    "vpslld	$30, %%xmm4, %%xmm9\n\t"           \
-    "vpslld	$25, %%xmm4, %%xmm10\n\t"          \
-    "vpxor	%%xmm9, %%xmm8, %%xmm8\n\t"        \
-    "vpxor	%%xmm10, %%xmm8, %%xmm8\n\t"       \
-    "vpsrldq	$4, %%xmm8, %%xmm9\n\t"            \
-    "vpslldq	$12, %%xmm8, %%xmm8\n\t"           \
-    "vpxor	%%xmm8, %%xmm4, %%xmm4\n\t"        \
-    "vpsrld	$1, %%xmm4, %%xmm10\n\t"           \
-    "vpsrld	$2, %%xmm4, %%xmm6\n\t"            \
-    "vpsrld	$7, %%xmm4, %%xmm5\n\t"            \
-    "vpxor	%%xmm6, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm5, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm9, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm4, %%xmm10, %%xmm10\n\t"      \
-    "vpxor	%%xmm10, " #r ", " #r "\n\t"
-#define GHASH_GFSQR_RED_AVX1(r, a)                 \
-       _GHASH_GFSQR_RED_AVX1(r, a)
-
-#define GHASH_RED_AVX1(r, r2)                \
-    "vpslld	$31, "#r2", %%xmm0\n\t"      \
-    "vpslld	$30, "#r2", %%xmm1\n\t"      \
-    "vpslld	$25, "#r2", %%xmm2\n\t"      \
-    "vpxor	%%xmm1, %%xmm0, %%xmm0\n\t"  \
-    "vpxor	%%xmm2, %%xmm0, %%xmm0\n\t"  \
-    "vmovdqa	%%xmm0, %%xmm1\n\t"          \
-    "vpsrldq	$4, %%xmm1, %%xmm1\n\t"      \
-    "vpslldq	$12, %%xmm0, %%xmm0\n\t"     \
-    "vpxor	%%xmm0, "#r2", "#r2"\n\t"    \
-    "vpsrld	$1, "#r2", %%xmm2\n\t"       \
-    "vpsrld	$2, "#r2", %%xmm3\n\t"       \
-    "vpsrld	$7, "#r2", %%xmm0\n\t"       \
-    "vpxor	%%xmm3, %%xmm2, %%xmm2\n\t"  \
-    "vpxor	%%xmm0, %%xmm2, %%xmm2\n\t"  \
-    "vpxor	%%xmm1, %%xmm2, %%xmm2\n\t"  \
-    "vpxor	"#r2", %%xmm2, %%xmm2\n\t"   \
-    "vpxor	%%xmm2, " #r ", " #r "\n\t"
-
-#define GHASH_GFMUL_RED_XOR_AVX1(r, r2, a, b) \
-    GHASH_GFMUL_XOR_AVX1(r, r2, a, b)         \
-    GHASH_RED_AVX1(r, r2)
-
-#define GHASH_FULL_AVX1(r, r2, a, b) \
-    GHASH_GFMUL_AVX1(r, r2, a, b)    \
-    GHASH_MID_AVX1(r, r2)            \
-    GHASH_RED_AVX1(r, r2)
-
-#define CALC_IV_12_AVX1()                                            \
-    "# Calculate values when IV is 12 bytes\n\t"                     \
-    "# Set counter based on IV\n\t"                                  \
-    "movl		$0x01000000, %%ecx\n\t"                      \
-    "vpinsrq		$0, 0(%%rax), %%xmm13, %%xmm13\n\t"          \
-    "vpinsrd		$2, 8(%%rax), %%xmm13, %%xmm13\n\t"          \
-    "vpinsrd		$3, %%ecx, %%xmm13, %%xmm13\n\t"             \
-    "# H = Encrypt X(=0) and T = Encrypt counter\n\t"                \
-    "vmovdqa		  0(%[KEY]), " VAR(HR) "\n\t"                \
-    "vpxor		" VAR(HR) ", %%xmm13, %%xmm1\n\t"            \
-    "vmovdqa		 16(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 32(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 48(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 64(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 80(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 96(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		112(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		128(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		144(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "cmpl		$11, %[nr]\n\t"                              \
-    "vmovdqa		160(%[KEY]), %%xmm12\n\t"                    \
-    "jl	31f\n\t"                                                     \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		176(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "cmpl		$13, %[nr]\n\t"                              \
-    "vmovdqa		192(%[KEY]), %%xmm12\n\t"                    \
-    "jl	31f\n\t"                                                     \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		208(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqu		224(%[KEY]), %%xmm12\n\t"                    \
-    "31:\n\t"                                                        \
-    "vaesenclast	%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenclast	%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vpshufb		%[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
-    "vmovdqu		%%xmm1, " VAR(TR) "\n\t"                     \
-    "jmp		39f\n\t"
-
-#define CALC_IV_AVX1()                                       \
-    "# Calculate values when IV is not 12 bytes\n\t"         \
-    "# H = Encrypt X(=0)\n\t"                                \
-    "vmovdqa	0(%[KEY]), " VAR(HR) "\n\t"                  \
-    VAESENC_AVX(HR)                                          \
-    "vpshufb	%[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
-    "# Calc counter\n\t"                                     \
-    "# Initialization vector\n\t"                            \
-    "cmpl	$0, %%edx\n\t"                               \
-    "movq	$0, %%rcx\n\t"                               \
-    "je	45f\n\t"                                             \
-    "cmpl	$16, %%edx\n\t"                              \
-    "jl	44f\n\t"                                             \
-    "andl	$0xfffffff0, %%edx\n\t"                      \
-    "\n"                                                     \
-    "43:\n\t"                                                \
-    "vmovdqu	(%%rax,%%rcx,1), %%xmm4\n\t"                 \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
-    "vpxor	%%xmm4, %%xmm13, %%xmm13\n\t"                \
-    GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR)           \
-    "addl	$16, %%ecx\n\t"                              \
-    "cmpl	%%edx, %%ecx\n\t"                            \
-    "jl	43b\n\t"                                             \
-    "movl	%[ibytes], %%edx\n\t"                        \
-    "cmpl	%%edx, %%ecx\n\t"                            \
-    "je	45f\n\t"                                             \
-    "\n"                                                     \
-    "44:\n\t"                                                \
-    "subq	$16, %%rsp\n\t"                              \
-    "vpxor	%%xmm4, %%xmm4, %%xmm4\n\t"                  \
-    "xorl	%%ebx, %%ebx\n\t"                            \
-    "vmovdqu	%%xmm4, (%%rsp)\n\t"                         \
-    "42:\n\t"                                                \
-    "movzbl	(%%rax,%%rcx,1), %%r13d\n\t"                 \
-    "movb	%%r13b, (%%rsp,%%rbx,1)\n\t"                 \
-    "incl	%%ecx\n\t"                                   \
-    "incl	%%ebx\n\t"                                   \
-    "cmpl	%%edx, %%ecx\n\t"                            \
-    "jl	42b\n\t"                                             \
-    "vmovdqu	(%%rsp), %%xmm4\n\t"                         \
-    "addq	$16, %%rsp\n\t"                              \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
-    "vpxor	%%xmm4, %%xmm13, %%xmm13\n\t"                \
-    GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR)           \
-    "\n"                                                     \
-    "45:\n\t"                                                \
-    "# T = Encrypt counter\n\t"                              \
-    "vpxor	%%xmm0, %%xmm0, %%xmm0\n\t"                  \
-    "shll	$3, %%edx\n\t"                               \
-    "vpinsrq	$0, %%rdx, %%xmm0, %%xmm0\n\t"               \
-    "vpxor	%%xmm0, %%xmm13, %%xmm13\n\t"                \
-    GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR)           \
-    "vpshufb	%[BSWAP_MASK], %%xmm13, %%xmm13\n\t"         \
-    "#   Encrypt counter\n\t"                                \
-    "vmovdqa	0(%[KEY]), %%xmm4\n\t"                       \
-    "vpxor	%%xmm13, %%xmm4, %%xmm4\n\t"                 \
-    VAESENC_AVX(%%xmm4)                                      \
-    "vmovdqu	%%xmm4, " VAR(TR) "\n\t"
-
-#define CALC_AAD_AVX1()                                \
-    "# Additional authentication data\n\t"             \
-    "movl	%[abytes], %%edx\n\t"                  \
-    "cmpl	$0, %%edx\n\t"                         \
-    "je		25f\n\t"                               \
-    "movq	%[addt], %%rax\n\t"                    \
-    "xorl	%%ecx, %%ecx\n\t"                      \
-    "cmpl	$16, %%edx\n\t"                        \
-    "jl		24f\n\t"                               \
-    "andl	$0xfffffff0, %%edx\n\t"                \
-    "\n"                                               \
-    "23:\n\t"                                          \
-    "vmovdqu	(%%rax,%%rcx,1), %%xmm4\n\t"           \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
-    "vpxor	%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_FULL_AVX1(XR, %%xmm12, XR, HR)               \
-    "addl	$16, %%ecx\n\t"                        \
-    "cmpl	%%edx, %%ecx\n\t"                      \
-    "jl		23b\n\t"                               \
-    "movl	%[abytes], %%edx\n\t"                  \
-    "cmpl	%%edx, %%ecx\n\t"                      \
-    "je		25f\n\t"                               \
-    "\n"                                               \
-    "24:\n\t"                                          \
-    "subq	$16, %%rsp\n\t"                        \
-    "vpxor	%%xmm4, %%xmm4, %%xmm4\n\t"            \
-    "xorl	%%ebx, %%ebx\n\t"                      \
-    "vmovdqu	%%xmm4, (%%rsp)\n\t"                   \
-    "22:\n\t"                                          \
-    "movzbl	(%%rax,%%rcx,1), %%r13d\n\t"           \
-    "movb	%%r13b, (%%rsp,%%rbx,1)\n\t"           \
-    "incl	%%ecx\n\t"                             \
-    "incl	%%ebx\n\t"                             \
-    "cmpl	%%edx, %%ecx\n\t"                      \
-    "jl		22b\n\t"                               \
-    "vmovdqu	(%%rsp), %%xmm4\n\t"                   \
-    "addq	$16, %%rsp\n\t"                        \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
-    "vpxor	%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_FULL_AVX1(XR, %%xmm12, XR, HR)               \
-    "\n"                                               \
-    "25:\n\t"
-
-#define CALC_HT_8_AVX1()                          \
-    "vmovdqa	" VAR(XR) ", %%xmm2\n\t"          \
-    "# H ^ 1\n\t"                                 \
-    "vmovdqu	" VAR(HR) ", 0(" VAR(HTR) ")\n\t" \
-    "# H ^ 2\n\t"                                 \
-    GHASH_GFSQR_RED_AVX1(%%xmm0, HR)              \
-    "vmovdqu	%%xmm0 ,  16(" VAR(HTR) ")\n\t"   \
-    "# H ^ 3\n\t"                                 \
-    GHASH_GFMUL_RED_AVX1(%%xmm1, HR, %%xmm0)      \
-    "vmovdqu	%%xmm1 ,  32(" VAR(HTR) ")\n\t"   \
-    "# H ^ 4\n\t"                                 \
-    GHASH_GFSQR_RED_AVX1(%%xmm3, %%xmm0)          \
-    "vmovdqu	%%xmm3 ,  48(" VAR(HTR) ")\n\t"   \
-    "# H ^ 5\n\t"                                 \
-    GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm0, %%xmm1) \
-    "vmovdqu	%%xmm12,  64(" VAR(HTR) ")\n\t"   \
-    "# H ^ 6\n\t"                                 \
-    GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm1)         \
-    "vmovdqu	%%xmm12,  80(" VAR(HTR) ")\n\t"   \
-    "# H ^ 7\n\t"                                 \
-    GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm1, %%xmm3) \
-    "vmovdqu	%%xmm12,  96(" VAR(HTR) ")\n\t"   \
-    "# H ^ 8\n\t"                                 \
-    GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm3)         \
-    "vmovdqu	%%xmm12, 112(" VAR(HTR) ")\n\t"
-
-#define VAESENC_128_GHASH_AVX1(src, o)               \
-    "leaq	(%[in]," VAR(KR64) ",1), %%rcx\n\t"  \
-    "leaq	(%[out]," VAR(KR64) ",1), %%rdx\n\t" \
-    /* src is either %%rcx or %%rdx */             \
-    VAESENC_CTR()                                  \
-    VAESENC_XOR()                                  \
-    VAESENC_PCLMUL_1(src,  16, (o-128), 112)       \
-    VAESENC_PCLMUL_N(src,  32, (o-112),  96)       \
-    VAESENC_PCLMUL_N(src,  48, (o- 96),  80)       \
-    VAESENC_PCLMUL_N(src,  64, (o- 80),  64)       \
-    VAESENC_PCLMUL_N(src,  80, (o- 64),  48)       \
-    VAESENC_PCLMUL_N(src,  96, (o- 48),  32)       \
-    VAESENC_PCLMUL_N(src, 112, (o- 32),  16)       \
-    VAESENC_PCLMUL_N(src, 128, (o- 16),   0)       \
-    VAESENC_PCLMUL_L(144)                          \
-    "cmpl	$11, %[nr]\n\t"                    \
-    "vmovdqa	160(%[KEY]), %%xmm12\n\t"          \
-    "jl		4f\n\t"                            \
-    VAESENC()                                      \
-    VAESENC_SET(176)                               \
-    "cmpl	$13, %[nr]\n\t"                    \
-    "vmovdqa	192(%[KEY]), %%xmm12\n\t"          \
-    "jl		4f\n\t"                            \
-    VAESENC()                                      \
-    VAESENC_SET(208)                               \
-    "vmovdqa	224(%[KEY]), %%xmm12\n\t"          \
-    "\n"                                           \
-"4:\n\t"                                           \
-    VAESENC_LAST(%%rcx, %%rdx)
-
-#define _VAESENC_AVX(r)                                  \
-    "vaesenc		16(%[KEY]), " #r ", " #r "\n\t"  \
-    "vaesenc		32(%[KEY]), " #r ", " #r "\n\t"  \
-    "vaesenc		48(%[KEY]), " #r ", " #r "\n\t"  \
-    "vaesenc		64(%[KEY]), " #r ", " #r "\n\t"  \
-    "vaesenc		80(%[KEY]), " #r ", " #r "\n\t"  \
-    "vaesenc		96(%[KEY]), " #r ", " #r "\n\t"  \
-    "vaesenc		112(%[KEY]), " #r ", " #r "\n\t" \
-    "vaesenc		128(%[KEY]), " #r ", " #r "\n\t" \
-    "vaesenc		144(%[KEY]), " #r ", " #r "\n\t" \
-    "cmpl		$11, %[nr]\n\t"                  \
-    "vmovdqa		160(%[KEY]), %%xmm5\n\t"         \
-    "jl			%=f\n\t"                         \
-    "vaesenc		%%xmm5, " #r ", " #r "\n\t"      \
-    "vaesenc		176(%[KEY]), " #r ", " #r "\n\t" \
-    "cmpl		$13, %[nr]\n\t"                  \
-    "vmovdqa		192(%[KEY]), %%xmm5\n\t"         \
-    "jl			%=f\n\t"                         \
-    "vaesenc		%%xmm5, " #r ", " #r "\n\t"      \
-    "vaesenc		208(%[KEY]), " #r ", " #r "\n\t" \
-    "vmovdqa		224(%[KEY]), %%xmm5\n\t"         \
-    "%=:\n\t"                                            \
-    "vaesenclast	%%xmm5, " #r ", " #r "\n\t"
-#define VAESENC_AVX(r)                                   \
-        _VAESENC_AVX(r)
-
-#define AESENC_LAST15_ENC_AVX1()                        \
-    "movl	%[nbytes], %%ecx\n\t"                   \
-    "movl	%%ecx, %%edx\n\t"                       \
-    "andl	$0x0f, %%ecx\n\t"                       \
-    "jz		55f\n\t"                                \
-    "vmovdqu	" VAR(CTR1) ", %%xmm13\n\t"             \
-    "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
-    "vpxor	0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
-    VAESENC_AVX(%%xmm13)                                \
-    "subq	$16, %%rsp\n\t"                         \
-    "xorl	%%ecx, %%ecx\n\t"                       \
-    "vmovdqu	%%xmm13, (%%rsp)\n\t"                   \
-    "\n"                                                \
-    "51:\n\t"                                           \
-    "movzbl	(%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
-    "xorb	(%%rsp,%%rcx,1), %%r13b\n\t"            \
-    "movb	%%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
-    "movb	%%r13b, (%%rsp,%%rcx,1)\n\t"            \
-    "incl	" VAR(KR) "\n\t"                        \
-    "incl	%%ecx\n\t"                              \
-    "cmpl	%%edx, " VAR(KR) "\n\t"                 \
-    "jl		51b\n\t"                                \
-    "xorq	%%r13, %%r13\n\t"                       \
-    "cmpl	$16, %%ecx\n\t"                         \
-    "je		53f\n\t"                                \
-    "\n"                                                \
-    "52:\n\t"                                           \
-    "movb	%%r13b, (%%rsp,%%rcx,1)\n\t"            \
-    "incl	%%ecx\n\t"                              \
-    "cmpl	$16, %%ecx\n\t"                         \
-    "jl		52b\n\t"                                \
-    "53:\n\t"                                           \
-    "vmovdqu	(%%rsp), %%xmm13\n\t"                   \
-    "addq	$16, %%rsp\n\t"                         \
-    "vpshufb	%[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
-    "vpxor	%%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_GFMUL_RED_AVX1(XR, HR, XR)                    \
-
-#define AESENC_LAST15_DEC_AVX1()                        \
-    "movl	%[nbytes], %%ecx\n\t"                   \
-    "movl	%%ecx, %%edx\n\t"                       \
-    "andl	$0x0f, %%ecx\n\t"                       \
-    "jz		55f\n\t"                                \
-    "vmovdqu	" VAR(CTR1) ", %%xmm13\n\t"             \
-    "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
-    "vpxor	0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
-    VAESENC_AVX(%%xmm13)                                \
-    "subq	$32, %%rsp\n\t"                         \
-    "xorl	%%ecx, %%ecx\n\t"                       \
-    "vmovdqu	%%xmm13, (%%rsp)\n\t"                   \
-    "vpxor	%%xmm0, %%xmm0, %%xmm0\n\t"             \
-    "vmovdqu	%%xmm0, 16(%%rsp)\n\t"                  \
-    "\n"                                                \
-    "51:\n\t"                                           \
-    "movzbl	(%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
-    "movb	%%r13b, 16(%%rsp,%%rcx,1)\n\t"          \
-    "xorb	(%%rsp,%%rcx,1), %%r13b\n\t"            \
-    "movb	%%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
-    "incl	" VAR(KR) "\n\t"                        \
-    "incl	%%ecx\n\t"                              \
-    "cmpl	%%edx, " VAR(KR) "\n\t"                 \
-    "jl		51b\n\t"                                \
-    "53:\n\t"                                           \
-    "vmovdqu	16(%%rsp), %%xmm13\n\t"                 \
-    "addq	$32, %%rsp\n\t"                         \
-    "vpshufb	%[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
-    "vpxor	%%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_GFMUL_RED_AVX1(XR, HR, XR)                    \
-
-#define CALC_TAG_AVX1()                                      \
-    "movl	%[nbytes], %%edx\n\t"                        \
-    "movl	%[abytes], %%ecx\n\t"                        \
-    "shlq	$3, %%rdx\n\t"                               \
-    "shlq	$3, %%rcx\n\t"                               \
-    "vpinsrq	$0, %%rdx, %%xmm0, %%xmm0\n\t"               \
-    "vpinsrq	$1, %%rcx, %%xmm0, %%xmm0\n\t"               \
-    "vpxor	%%xmm0, " VAR(XR) ", " VAR(XR) "\n\t"        \
-    GHASH_GFMUL_RED_AVX1(XR, HR, XR)                         \
-    "vpshufb	%[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \
-    "vpxor	" VAR(TR) ", " VAR(XR) ", %%xmm0\n\t"        \
-
-#define STORE_TAG_AVX()                       \
-    "cmpl	$16, %[tbytes]\n\t"           \
-    "je		71f\n\t"                      \
-    "xorq	%%rcx, %%rcx\n\t"             \
-    "vmovdqu	%%xmm0, (%%rsp)\n\t"          \
-    "73:\n\t"                                 \
-    "movzbl	(%%rsp,%%rcx,1), %%r13d\n\t"  \
-    "movb	%%r13b, (%[tag],%%rcx,1)\n\t" \
-    "incl	%%ecx\n\t"                    \
-    "cmpl	%[tbytes], %%ecx\n\t"         \
-    "jne	73b\n\t"                      \
-    "jmp	72f\n\t"                      \
-    "\n"                                      \
-    "71:\n\t"                                 \
-    "vmovdqu	%%xmm0, (%[tag])\n\t"         \
-    "\n"                                      \
-    "72:\n\t"
-
-#define CMP_TAG_AVX()                                      \
-    "cmpl	$16, %[tbytes]\n\t"                        \
-    "je		71f\n\t"                                   \
-    "subq	$16, %%rsp\n\t"                            \
-    "xorq	%%rcx, %%rcx\n\t"                          \
-    "xorq	%%rax, %%rax\n\t"                          \
-    "vmovdqu	%%xmm0, (%%rsp)\n\t"                       \
-    "\n"                                                   \
-    "73:\n\t"                                              \
-    "movzbl	(%%rsp,%%rcx,1), %%r13d\n\t"               \
-    "xorb	(%[tag],%%rcx,1), %%r13b\n\t"              \
-    "orb	%%r13b, %%al\n\t"                          \
-    "incl	%%ecx\n\t"                                 \
-    "cmpl	%[tbytes], %%ecx\n\t"                      \
-    "jne	73b\n\t"                                   \
-    "cmpb	$0x00, %%al\n\t"                           \
-    "sete	%%al\n\t"                                  \
-    "addq	$16, %%rsp\n\t"                            \
-    "jmp	72f\n\t"                                   \
-    "\n"                                                   \
-    "71:\n\t"                                              \
-    "vmovdqu	(%[tag]), %%xmm1\n\t"                      \
-    "vpcmpeqb	%%xmm1, %%xmm0, %%xmm0\n\t"                \
-    "vpmovmskb	%%xmm0, %%edx\n\t"                         \
-    "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \
-    "xorl	%%eax, %%eax\n\t"                          \
-    "cmpl	$0xffff, %%edx\n\t"                        \
-    "sete	%%al\n\t"                                  \
-    "\n"                                                   \
-    "72:\n\t"                                              \
-    "movl	%%eax, (%[res])\n\t"
-
-static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out,
-                                 const unsigned char* addt,
-                                 const unsigned char* ivec, unsigned char *tag,
-                                 unsigned int nbytes, unsigned int abytes,
-                                 unsigned int ibytes, unsigned int tbytes,
-                                 const unsigned char* key, int nr)
-{
-    register const unsigned char* iv asm("rax") = ivec;
-    register unsigned int ivLen asm("ebx") = ibytes;
-
-    __asm__ __volatile__ (
-        "subq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        /* Counter is xmm13 */
-        "vpxor		%%xmm13, %%xmm13, %%xmm13\n\t"
-        "vpxor		" VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t"
-        "movl		%[ibytes], %%edx\n\t"
-        "cmpl		$12, %%edx\n\t"
-        "jne		35f\n\t"
-        CALC_IV_12_AVX1()
-        "\n"
-        "35:\n\t"
-        CALC_IV_AVX1()
-        "\n"
-        "39:\n\t"
-
-        CALC_AAD_AVX1()
-
-        "# Calculate counter and H\n\t"
-        "vpsrlq		$63, " VAR(HR) ", %%xmm5\n\t"
-        "vpsllq		$1, " VAR(HR) ", %%xmm4\n\t"
-        "vpslldq	$8, %%xmm5, %%xmm5\n\t"
-        "vpor		%%xmm5, %%xmm4, %%xmm4\n\t"
-        "vpshufd	$0xff, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpsrad		$31, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
-        "vpand		%[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpaddd		%[ONE], %%xmm13, %%xmm13\n\t"
-        "vpxor		%%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vmovdqu	%%xmm13, " VAR(CTR1) "\n\t"
-
-        "xorl		" VAR(KR) ", " VAR(KR) "\n\t"
-
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-        "cmpl	$128, %[nbytes]\n\t"
-        "movl	%[nbytes], %%r13d\n\t"
-        "jl	5f\n\t"
-        "andl	$0xffffff80, %%r13d\n\t"
-
-        CALC_HT_8_AVX1()
-
-        "# First 128 bytes of input\n\t"
-        VAESENC_128()
-
-        "cmpl	$128, %%r13d\n\t"
-        "movl	$128, " VAR(KR) "\n\t"
-        "jle	2f\n\t"
-
-        "# More 128 bytes of input\n\t"
-        "\n"
-    "3:\n\t"
-        VAESENC_128_GHASH_AVX1(%%rdx, 0)
-        "addl	$128, " VAR(KR) "\n\t"
-        "cmpl	%%r13d, " VAR(KR) "\n\t"
-        "jl	3b\n\t"
-        "\n"
-    "2:\n\t"
-        "vmovdqa	%[BSWAP_MASK], %%xmm13\n\t"
-        "vpshufb	%%xmm13, %%xmm4, %%xmm4\n\t"
-        "vpshufb	%%xmm13, %%xmm5, %%xmm5\n\t"
-        "vpshufb	%%xmm13, %%xmm6, %%xmm6\n\t"
-        "vpshufb	%%xmm13, %%xmm7, %%xmm7\n\t"
-        "vpxor		%%xmm2, %%xmm4, %%xmm4\n\t"
-        "vpshufb	%%xmm13, %%xmm8, %%xmm8\n\t"
-        "vpshufb	%%xmm13, %%xmm9, %%xmm9\n\t"
-        "vpshufb	%%xmm13, %%xmm10, %%xmm10\n\t"
-        "vpshufb	%%xmm13, %%xmm11, %%xmm11\n\t"
-
-        "vmovdqu	   (" VAR(HTR) "), %%xmm12\n\t"
-        "vmovdqu	 16(" VAR(HTR) "), %%xmm14\n\t"
-        GHASH_GFMUL_AVX1(XR, %%xmm13, %%xmm11, %%xmm12)
-        GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm10, %%xmm14)
-        "vmovdqu	 32(" VAR(HTR) "), %%xmm12\n\t"
-        "vmovdqu	 48(" VAR(HTR) "), %%xmm14\n\t"
-        GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm9, %%xmm12)
-        GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm8, %%xmm14)
-        "vmovdqu	 64(" VAR(HTR) "), %%xmm12\n\t"
-        "vmovdqu	 80(" VAR(HTR) "), %%xmm14\n\t"
-        GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm7, %%xmm12)
-        GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm6, %%xmm14)
-        "vmovdqu	 96(" VAR(HTR) "), %%xmm12\n\t"
-        "vmovdqu	112(" VAR(HTR) "), %%xmm14\n\t"
-        GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm5, %%xmm12)
-        GHASH_GFMUL_RED_XOR_AVX1(XR, %%xmm13, %%xmm4, %%xmm14)
-
-        "vmovdqu	0(" VAR(HTR) "), " VAR(HR) "\n\t"
-        "\n"
-    "5:\n\t"
-        "movl		%[nbytes], %%edx\n\t"
-        "cmpl		%%edx, " VAR(KR) "\n\t"
-        "jge		55f\n\t"
-#endif
-
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xfffffff0, %%r13d\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		14f\n\t"
-
-        VAESENC_BLOCK()
-        "addl		$16, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		13f\n\t"
-        "\n"
-        "12:\n\t"
-        "vmovdqu	(%[in]," VAR(KR64) ",1), %%xmm9\n\t"
-        VAESENC_GFMUL(%%xmm9, HR, XR)
-        "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"
-        "addl		$16, " VAR(KR) "\n\t"
-        "vpxor		%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		12b\n\t"
-        "\n"
-        "13:\n\t"
-        GHASH_GFMUL_RED_AVX1(XR, HR, XR)
-        "\n"
-        "14:\n\t"
-
-        AESENC_LAST15_ENC_AVX1()
-        "\n"
-        "55:\n\t"
-
-        CALC_TAG_AVX1()
-        STORE_TAG_AVX()
-        "addq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        "vzeroupper\n\t"
-
-        :
-        : [KEY] "r" (key),
-          [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
-          [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
-          [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes),
-          [tag] "r" (tag),
-          [BSWAP_MASK] "m" (BSWAP_MASK),
-          [BSWAP_EPI64] "m" (BSWAP_EPI64),
-          [ONE] "m" (ONE),
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-          [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
-          [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
-          [EIGHT] "m" (EIGHT),
-#endif
-          [MOD2_128] "m" (MOD2_128)
-        : "xmm15", "xmm14", "xmm13", "xmm12",
-          "xmm0", "xmm1", "xmm2", "xmm3", "memory",
-          "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
-          "rcx", "rdx", "r13"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Encrypt and carry-less multiply for AVX2. */
-#define VAESENC_PCLMUL_AVX2_1(src, o1, o2, o3)        \
-    "vmovdqu	" #o2 "(" #src "), %%xmm12\n\t"       \
-    "vmovdqa	" #o1 "(%[KEY]), %%xmm0\n\t"          \
-    "vpshufb	%[BSWAP_MASK], %%xmm12, %%xmm12\n\t"  \
-    "vmovdqu	" #o3 "(" VAR(HTR) "), %%xmm13\n\t"   \
-    "vpxor	%%xmm2, %%xmm12, %%xmm12\n\t"         \
-    "vpclmulqdq	$0x10, %%xmm13, %%xmm12, %%xmm1\n\t"  \
-    "vpclmulqdq	$0x01, %%xmm13, %%xmm12, %%xmm14\n\t" \
-    "vpclmulqdq	$0x00, %%xmm13, %%xmm12, %%xmm2\n\t"  \
-    "vpclmulqdq	$0x11, %%xmm13, %%xmm12, %%xmm3\n\t"  \
-    "vaesenc	%%xmm0, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc	%%xmm0, %%xmm5, %%xmm5\n\t"           \
-    "vaesenc	%%xmm0, %%xmm6, %%xmm6\n\t"           \
-    "vaesenc	%%xmm0, %%xmm7, %%xmm7\n\t"           \
-    "vaesenc	%%xmm0, %%xmm8, %%xmm8\n\t"           \
-    "vaesenc	%%xmm0, %%xmm9, %%xmm9\n\t"           \
-    "vaesenc	%%xmm0, %%xmm10, %%xmm10\n\t"         \
-    "vaesenc	%%xmm0, %%xmm11, %%xmm11\n\t"         \
-
-#define VAESENC_PCLMUL_AVX2_2(src, o1, o2, o3)        \
-    "vmovdqu	" #o2 "(" #src "), %%xmm12\n\t"       \
-    "vmovdqu	" #o3 "(" VAR(HTR) "), %%xmm0\n\t"    \
-    "vpshufb	%[BSWAP_MASK], %%xmm12, %%xmm12\n\t"  \
-    "vpxor	%%xmm14, %%xmm1, %%xmm1\n\t"          \
-    "vpclmulqdq	$0x10, %%xmm0, %%xmm12, %%xmm13\n\t"  \
-    "vpclmulqdq	$0x01, %%xmm0, %%xmm12, %%xmm14\n\t"  \
-    "vpclmulqdq	$0x00, %%xmm0, %%xmm12, %%xmm15\n\t"  \
-    "vpclmulqdq	$0x11, %%xmm0, %%xmm12, %%xmm12\n\t"  \
-    "vmovdqa	" #o1 "(%[KEY]), %%xmm0\n\t"          \
-    "vpxor	%%xmm13, %%xmm1, %%xmm1\n\t"          \
-    "vpxor	%%xmm12, %%xmm3, %%xmm3\n\t"          \
-    "vaesenc	%%xmm0, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc	%%xmm0, %%xmm5, %%xmm5\n\t"           \
-    "vaesenc	%%xmm0, %%xmm6, %%xmm6\n\t"           \
-    "vaesenc	%%xmm0, %%xmm7, %%xmm7\n\t"           \
-    "vaesenc	%%xmm0, %%xmm8, %%xmm8\n\t"           \
-    "vaesenc	%%xmm0, %%xmm9, %%xmm9\n\t"           \
-    "vaesenc	%%xmm0, %%xmm10, %%xmm10\n\t"         \
-    "vaesenc	%%xmm0, %%xmm11, %%xmm11\n\t"         \
-
-#define VAESENC_PCLMUL_AVX2_N(src, o1, o2, o3)        \
-    "vmovdqu	" #o2 "(" #src "), %%xmm12\n\t"       \
-    "vmovdqu	" #o3 "(" VAR(HTR) "), %%xmm0\n\t"    \
-    "vpshufb	%[BSWAP_MASK], %%xmm12, %%xmm12\n\t"  \
-    "vpxor	%%xmm14, %%xmm1, %%xmm1\n\t"          \
-    "vpxor	%%xmm15, %%xmm2, %%xmm2\n\t"          \
-    "vpclmulqdq	$0x10, %%xmm0, %%xmm12, %%xmm13\n\t"  \
-    "vpclmulqdq	$0x01, %%xmm0, %%xmm12, %%xmm14\n\t"  \
-    "vpclmulqdq	$0x00, %%xmm0, %%xmm12, %%xmm15\n\t"  \
-    "vpclmulqdq	$0x11, %%xmm0, %%xmm12, %%xmm12\n\t"  \
-    "vmovdqa	" #o1 "(%[KEY]), %%xmm0\n\t"          \
-    "vpxor	%%xmm13, %%xmm1, %%xmm1\n\t"          \
-    "vpxor	%%xmm12, %%xmm3, %%xmm3\n\t"          \
-    "vaesenc	%%xmm0, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc	%%xmm0, %%xmm5, %%xmm5\n\t"           \
-    "vaesenc	%%xmm0, %%xmm6, %%xmm6\n\t"           \
-    "vaesenc	%%xmm0, %%xmm7, %%xmm7\n\t"           \
-    "vaesenc	%%xmm0, %%xmm8, %%xmm8\n\t"           \
-    "vaesenc	%%xmm0, %%xmm9, %%xmm9\n\t"           \
-    "vaesenc	%%xmm0, %%xmm10, %%xmm10\n\t"         \
-    "vaesenc	%%xmm0, %%xmm11, %%xmm11\n\t"         \
-
-#define VAESENC_PCLMUL_AVX2_L(o)                      \
-    "vpxor	%%xmm14, %%xmm1, %%xmm1\n\t"          \
-    "vpxor	%%xmm15, %%xmm2, %%xmm2\n\t"          \
-    "vpslldq	$8, %%xmm1, %%xmm12\n\t"              \
-    "vpsrldq	$8, %%xmm1, %%xmm1\n\t"               \
-    "vmovdqa	"#o"(%[KEY]), %%xmm15\n\t"            \
-    "vmovdqa	%[MOD2_128], %%xmm0\n\t"              \
-    "vaesenc	%%xmm15, %%xmm4, %%xmm4\n\t"          \
-    "vpxor	%%xmm12, %%xmm2, %%xmm2\n\t"          \
-    "vpxor	%%xmm1, %%xmm3, %%xmm3\n\t"           \
-    "vpclmulqdq	$0x10, %%xmm0, %%xmm2, %%xmm14\n\t"   \
-    "vaesenc	%%xmm15, %%xmm5, %%xmm5\n\t"          \
-    "vaesenc	%%xmm15, %%xmm6, %%xmm6\n\t"          \
-    "vaesenc	%%xmm15, %%xmm7, %%xmm7\n\t"          \
-    "vpshufd	$0x4e, %%xmm2, %%xmm2\n\t"            \
-    "vpxor	%%xmm14, %%xmm2, %%xmm2\n\t"          \
-    "vpclmulqdq	$0x10, %%xmm0, %%xmm2, %%xmm14\n\t"   \
-    "vaesenc	%%xmm15, %%xmm8, %%xmm8\n\t"          \
-    "vaesenc	%%xmm15, %%xmm9, %%xmm9\n\t"          \
-    "vaesenc	%%xmm15, %%xmm10, %%xmm10\n\t"        \
-    "vpshufd	$0x4e, %%xmm2, %%xmm2\n\t"            \
-    "vpxor	%%xmm14, %%xmm2, %%xmm2\n\t"          \
-    "vpxor	%%xmm3, %%xmm2, %%xmm2\n\t"           \
-    "vaesenc	%%xmm15, %%xmm11, %%xmm11\n\t"
-
-#define VAESENC_BLOCK_AVX2()                                  \
-    "vmovdqu		" VAR(CTR1) ", %%xmm5\n\t"            \
-    "vpshufb		%[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"   \
-    "vpaddd		%[ONE], %%xmm5, %%xmm5\n\t"           \
-    "vmovdqu		%%xmm5, " VAR(CTR1) "\n\t"            \
-    "vpxor		   (%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		 16(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		 32(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		 48(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		 64(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		 80(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		 96(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		112(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		128(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vaesenc		144(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "cmpl		$11, %[nr]\n\t"                       \
-    "vmovdqa		160(%[KEY]), %%xmm5\n\t"              \
-    "jl			%=f\n\t"                              \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc		176(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "cmpl		$13, %[nr]\n\t"                       \
-    "vmovdqa		192(%[KEY]), %%xmm5\n\t"              \
-    "jl			%=f\n\t"                              \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vaesenc		208(%[KEY]), %%xmm4, %%xmm4\n\t"      \
-    "vmovdqa		224(%[KEY]), %%xmm5\n\t"              \
-    "%=:\n\t"                                                 \
-    "vaesenclast	%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vmovdqu		(%[in]," VAR(KR64) ",1), %%xmm5\n\t"  \
-    "vpxor		%%xmm5, %%xmm4, %%xmm4\n\t"           \
-    "vmovdqu		%%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \
-    "vpshufb		%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"    \
-    "vpxor		%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
-
-/* Karatsuba multiplication - slower
- * H01 = H[1] ^ H[0] (top and bottom 64-bits XORed)
- */
-#define _VAESENC_GFMUL_AVX2(in, H, X, ctr1, H01)            \
-    "vpxor		   (%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 16(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 32(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 48(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 64(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 80(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 96(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		112(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		128(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		144(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "cmpl		$11, %[nr]\n\t"                     \
-    "vmovdqa		160(%[KEY]), %%xmm5\n\t"            \
-    "jl			%=f\n\t"                            \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"         \
-    "vaesenc		176(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "cmpl		$13, %[nr]\n\t"                     \
-    "vmovdqa		192(%[KEY]), %%xmm5\n\t"            \
-    "jl			%=f\n\t"                            \
-    "vaesenc		%%xmm5, %%xmm4, %%xmm4\n\t"         \
-    "vaesenc		208(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vmovdqa		224(%[KEY]), %%xmm5\n\t"            \
-    "%=:\n\t"                                               \
-    "vaesenclast	%%xmm5, %%xmm4, %%xmm4\n\t"         \
-    "vmovdqu		" #in ", %%xmm0\n\t"                \
-    "vpxor		%%xmm0, %%xmm4, %%xmm4\n\t"         \
-                                                            \
-    "vpsrldq	$8, " #X ", %%xmm2\n\t"                     \
-    "vpxor	" #X ", %%xmm2, %%xmm2\n\t"                 \
-    "vpclmulqdq	$0x00, " #H ", " #X ", %%xmm5\n\t"          \
-    "vpclmulqdq	$0x11, " #H ", " #X ", %%xmm8\n\t"          \
-    "vpclmulqdq	$0x00, "#H01", %%xmm2, %%xmm7\n\t"          \
-    "vpxor	%%xmm5, %%xmm7, %%xmm7\n\t"                 \
-    "vpxor	%%xmm8, %%xmm7, %%xmm7\n\t"                 \
-    "vpslldq	$8, %%xmm7, %%xmm6\n\t"                     \
-    "vpsrldq	$8, %%xmm7, %%xmm7\n\t"                     \
-    "vpxor	%%xmm7, %%xmm8, %%xmm8\n\t"                 \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"                 \
-                                                            \
-    "vpclmulqdq	$0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"                  \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"                 \
-    "vpclmulqdq	$0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"                  \
-    "vpxor	%%xmm8, %%xmm6, %%xmm6\n\t"                 \
-    "vpxor	%%xmm5, %%xmm6, " VAR(XR) "\n\t"
-#define VAESENC_GFMUL_AVX2(in, H, X, ctr1)                  \
-       _VAESENC_GFMUL_AVX2(in, H, X, ctr1)
-
-#define _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1)              \
-    "vpclmulqdq	$0x10, " #H ", " #X ", %%xmm7\n\t"          \
-    "vpclmulqdq	$0x01, " #H ", " #X ", %%xmm6\n\t"          \
-    "vpclmulqdq	$0x00, " #H ", " #X ", %%xmm5\n\t"          \
-    "vpclmulqdq	$0x11, " #H ", " #X ", %%xmm8\n\t"          \
-    "vpxor		   (%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 16(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vpxor	%%xmm6, %%xmm7, %%xmm7\n\t"                 \
-    "vpslldq	$8, %%xmm7, %%xmm6\n\t"                     \
-    "vpsrldq	$8, %%xmm7, %%xmm7\n\t"                     \
-    "vaesenc		 32(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"                 \
-    "vpclmulqdq	$0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
-    "vaesenc		 48(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 64(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		 80(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"                  \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"                 \
-    "vpclmulqdq	$0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
-    "vaesenc		 96(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		112(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vaesenc		128(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"                  \
-    "vaesenc		144(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vpxor	%%xmm7, %%xmm8, %%xmm8\n\t"                 \
-    "vpxor	%%xmm8, %%xmm6, %%xmm6\n\t"                 \
-    "cmpl		$11, %[nr]\n\t"                     \
-    "vmovdqa		160(%[KEY]), %%xmm3\n\t"            \
-    "jl			%=f\n\t"                            \
-    "vaesenc		%%xmm3, %%xmm4, %%xmm4\n\t"         \
-    "vaesenc		176(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "cmpl		$13, %[nr]\n\t"                     \
-    "vmovdqa		192(%[KEY]), %%xmm3\n\t"            \
-    "jl			%=f\n\t"                            \
-    "vaesenc		%%xmm3, %%xmm4, %%xmm4\n\t"         \
-    "vaesenc		208(%[KEY]), %%xmm4, %%xmm4\n\t"    \
-    "vmovdqa		224(%[KEY]), %%xmm3\n\t"            \
-    "%=:\n\t"                                               \
-    "vaesenclast	%%xmm3, %%xmm4, %%xmm4\n\t"         \
-    "vpxor	%%xmm5, %%xmm6, " VAR(XR) "\n\t"            \
-    "vmovdqu		" #in ", %%xmm5\n\t"                \
-    "vpxor		%%xmm5, %%xmm4, %%xmm4\n\t"
-#define VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1)               \
-       _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1)
-
-
-#define _GHASH_GFMUL_AVX2(r, r2, a, b)         \
-    "vpclmulqdq	$0x10, "#a", "#b", %%xmm2\n\t" \
-    "vpclmulqdq	$0x01, "#a", "#b", %%xmm1\n\t" \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm0\n\t" \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm3\n\t" \
-    "vpxor	%%xmm1, %%xmm2, %%xmm2\n\t"    \
-    "vpslldq	$8, %%xmm2, %%xmm1\n\t"        \
-    "vpsrldq	$8, %%xmm2, %%xmm2\n\t"        \
-    "vpxor	%%xmm1, %%xmm0, "#r2"\n\t"     \
-    "vpxor	%%xmm2, %%xmm3, " #r "\n\t"
-#define GHASH_GFMUL_AVX2(r, r2, a, b)          \
-       _GHASH_GFMUL_AVX2(r, r2, a, b)
-
-#define GHASH_MID_AVX2(r, r2)               \
-    "vpsrld	$31, "#r2", %%xmm0\n\t"     \
-    "vpsrld	$31, " #r ", %%xmm1\n\t"    \
-    "vpslld	$1, "#r2", "#r2"\n\t"       \
-    "vpslld	$1, " #r ", " #r "\n\t"     \
-    "vpsrldq	$12, %%xmm0, %%xmm2\n\t"    \
-    "vpslldq	$4, %%xmm0, %%xmm0\n\t"     \
-    "vpslldq	$4, %%xmm1, %%xmm1\n\t"     \
-    "vpor	%%xmm2, " #r ", " #r "\n\t" \
-    "vpor	%%xmm0, "#r2", "#r2"\n\t"   \
-    "vpor	%%xmm1, " #r ", " #r "\n\t"
-
-#define _GHASH_GFMUL_RED_AVX2(r, a, b)                  \
-    "vpclmulqdq	$0x10, "#a", "#b", %%xmm7\n\t"          \
-    "vpclmulqdq	$0x01, "#a", "#b", %%xmm6\n\t"          \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm5\n\t"          \
-    "vpxor	%%xmm6, %%xmm7, %%xmm7\n\t"             \
-    "vpslldq	$8, %%xmm7, %%xmm6\n\t"                 \
-    "vpsrldq	$8, %%xmm7, %%xmm7\n\t"                 \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"             \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm8\n\t"          \
-    "vpclmulqdq	$0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"              \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"             \
-    "vpclmulqdq	$0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"              \
-    "vpxor	%%xmm7, %%xmm8, %%xmm8\n\t"             \
-    "vpxor	%%xmm8, %%xmm6, %%xmm6\n\t"             \
-    "vpxor	%%xmm5, %%xmm6, " #r "\n\t"
-#define GHASH_GFMUL_RED_AVX2(r, a, b)                   \
-       _GHASH_GFMUL_RED_AVX2(r, a, b)
-
-#define _GHASH_GFSQR_RED2_AVX2(r, a, mod128)            \
-    "vpclmulqdq	$0x00, "#a", "#a", %%xmm6\n\t"          \
-    "vpclmulqdq	$0x11, "#a", "#a", %%xmm8\n\t"          \
-    "vpclmulqdq	$0x10, "#mod128", %%xmm6, %%xmm5\n\t"   \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"              \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"             \
-    "vpclmulqdq	$0x10, "#mod128", %%xmm6, %%xmm5\n\t"   \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"              \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"             \
-    "vpxor	%%xmm6, %%xmm8, " #r "\n\t"
-#define GHASH_GFSQR_RED2_AVX2(r, a, mod128)             \
-       _GHASH_GFSQR_RED2_AVX2(r, a, mod128)
-
-#define _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) \
-    "vpclmulqdq	$0x10, "#a", "#b", %%xmm7\n\t"           \
-    "vpclmulqdq	$0x01, "#a", "#b", %%xmm6\n\t"           \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm5\n\t"           \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm8\n\t"           \
-    "vpclmulqdq	$0x00, "#b", "#b", %%xmm9\n\t"           \
-    "vpclmulqdq	$0x11, "#b", "#b", %%xmm10\n\t"          \
-    "vpxor	%%xmm6, %%xmm7, %%xmm7\n\t"              \
-    "vpslldq	$8, %%xmm7, %%xmm6\n\t"                  \
-    "vpsrldq	$8, %%xmm7, %%xmm7\n\t"                  \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"              \
-    "vpclmulqdq	$0x10, "#mod128", %%xmm9, %%xmm4\n\t"    \
-    "vpclmulqdq	$0x10, "#mod128", %%xmm6, %%xmm5\n\t"    \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"               \
-    "vpshufd	$0x4e, %%xmm9, %%xmm9\n\t"               \
-    "vpxor	%%xmm5, %%xmm6, %%xmm6\n\t"              \
-    "vpxor	%%xmm4, %%xmm9, %%xmm9\n\t"              \
-    "vpclmulqdq	$0x10, "#mod128", %%xmm6, %%xmm5\n\t"    \
-    "vpclmulqdq	$0x10, "#mod128", %%xmm9, %%xmm4\n\t"    \
-    "vpshufd	$0x4e, %%xmm6, %%xmm6\n\t"               \
-    "vpshufd	$0x4e, %%xmm9, %%xmm9\n\t"               \
-    "vpxor	%%xmm7, %%xmm8, %%xmm8\n\t"              \
-    "vpxor	%%xmm4, %%xmm9, %%xmm9\n\t"              \
-    "vpxor	%%xmm8, %%xmm6, %%xmm6\n\t"              \
-    "vpxor	%%xmm10, %%xmm9, "#rs"\n\t"              \
-    "vpxor	%%xmm5, %%xmm6, "#rm"\n\t"
-#define GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128)  \
-       _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128)
-
-#define CALC_HT_8_AVX2()                                                \
-    "vmovdqa	%[MOD2_128], %%xmm11\n\t"                               \
-    "vmovdqa	" VAR(XR) ", %%xmm2\n\t"                                \
-    "# H ^ 1 and H ^ 2\n\t"                                             \
-    GHASH_GFSQR_RED2_AVX2(%%xmm0, HR, %%xmm11)                          \
-    "vmovdqu	" VAR(HR) ", 0(" VAR(HTR) ")\n\t"                       \
-    "vmovdqu	%%xmm0 ,  16(" VAR(HTR) ")\n\t"                         \
-    "# H ^ 3 and H ^ 4\n\t"                                             \
-    GHASH_GFMUL_SQR_RED2_AVX2(%%xmm1, %%xmm3, HR, %%xmm0, %%xmm11)      \
-    "vmovdqu	%%xmm1 ,  32(" VAR(HTR) ")\n\t"                         \
-    "vmovdqu	%%xmm3 ,  48(" VAR(HTR) ")\n\t"                         \
-    "# H ^ 5 and H ^ 6\n\t"                                             \
-    GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm0, %%xmm1, %%xmm11) \
-    "vmovdqu	%%xmm12,  64(" VAR(HTR) ")\n\t"                         \
-    "vmovdqu	%%xmm0 ,  80(" VAR(HTR) ")\n\t"                         \
-    "# H ^ 7 and H ^ 8\n\t"                                             \
-    GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm1, %%xmm3, %%xmm11) \
-    "vmovdqu	%%xmm12,  96(" VAR(HTR) ")\n\t"                         \
-    "vmovdqu	%%xmm0 , 112(" VAR(HTR) ")\n\t"
-
-#define _GHASH_RED_AVX2(r, r2)                     \
-    "vmovdqa	%[MOD2_128], %%xmm2\n\t"           \
-    "vpclmulqdq	$0x10, %%xmm2, "#r2", %%xmm0\n\t"  \
-    "vpshufd	$0x4e, "#r2", %%xmm1\n\t"          \
-    "vpxor	%%xmm0, %%xmm1, %%xmm1\n\t"        \
-    "vpclmulqdq	$0x10, %%xmm2, %%xmm1, %%xmm0\n\t" \
-    "vpshufd	$0x4e, %%xmm1, %%xmm1\n\t"         \
-    "vpxor	%%xmm0, %%xmm1, %%xmm1\n\t"        \
-    "vpxor	%%xmm1, " #r ", " #r "\n\t"
-#define GHASH_RED_AVX2(r, r2)                      \
-       _GHASH_RED_AVX2(r, r2)
-
-#define GHASH_FULL_AVX2(r, r2, a, b) \
-    GHASH_GFMUL_AVX2(r, r2, a, b)    \
-    GHASH_MID_AVX2(r, r2)            \
-    GHASH_RED_AVX2(r, r2)
-
-#define _GFMUL_3V_AVX2(r, r2, r3, a, b)        \
-    "vpclmulqdq	$0x10, "#a", "#b", "#r3"\n\t"  \
-    "vpclmulqdq	$0x01, "#a", "#b", %%xmm1\n\t" \
-    "vpclmulqdq	$0x00, "#a", "#b", "#r2"\n\t"  \
-    "vpclmulqdq	$0x11, "#a", "#b", " #r "\n\t" \
-    "vpxor	%%xmm1, "#r3", "#r3"\n\t"
-#define GFMUL_3V_AVX2(r, r2, r3, a, b)         \
-       _GFMUL_3V_AVX2(r, r2, r3, a, b)
-
-#define _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b)    \
-    "vpclmulqdq	$0x10, "#a", "#b", %%xmm2\n\t" \
-    "vpclmulqdq	$0x01, "#a", "#b", %%xmm1\n\t" \
-    "vpclmulqdq	$0x00, "#a", "#b", %%xmm0\n\t" \
-    "vpclmulqdq	$0x11, "#a", "#b", %%xmm3\n\t" \
-    "vpxor	%%xmm1, %%xmm2, %%xmm2\n\t"    \
-    "vpxor	%%xmm3, " #r ", " #r "\n\t"    \
-    "vpxor	%%xmm2, "#r3", "#r3"\n\t"      \
-    "vpxor	%%xmm0, "#r2", "#r2"\n\t"
-#define GFMUL_XOR_3V_AVX2(r, r2, r3, a, b)     \
-       _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b)
-
-#define GHASH_GFMUL_RED_8_AVX2()                              \
-    "vmovdqu	   (" VAR(HTR) "), %%xmm12\n\t"               \
-    GFMUL_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm11, %%xmm12)     \
-    "vmovdqu	 16(" VAR(HTR) "), %%xmm12\n\t"               \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm10, %%xmm12) \
-    "vmovdqu	 32(" VAR(HTR) "), %%xmm11\n\t"               \
-    "vmovdqu	 48(" VAR(HTR) "), %%xmm12\n\t"               \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm9, %%xmm11)  \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm8, %%xmm12)  \
-    "vmovdqu	 64(" VAR(HTR) "), %%xmm11\n\t"               \
-    "vmovdqu	 80(" VAR(HTR) "), %%xmm12\n\t"               \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm7, %%xmm11)  \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm6, %%xmm12)  \
-    "vmovdqu	 96(" VAR(HTR) "), %%xmm11\n\t"               \
-    "vmovdqu	112(" VAR(HTR) "), %%xmm12\n\t"               \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm5, %%xmm11)  \
-    GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm4, %%xmm12)  \
-    "vpslldq	$8, %%xmm14, %%xmm12\n\t"                     \
-    "vpsrldq	$8, %%xmm14, %%xmm14\n\t"                     \
-    "vpxor	%%xmm12, %%xmm13, %%xmm13\n\t"                \
-    "vpxor	%%xmm14, " VAR(XR) ", " VAR(XR) "\n\t"        \
-    GHASH_RED_AVX2(XR, %%xmm13)
-
-#define CALC_IV_12_AVX2()                                            \
-    "# Calculate values when IV is 12 bytes\n\t"                     \
-    "# Set counter based on IV\n\t"                                  \
-    "movl		$0x01000000, %%ecx\n\t"                      \
-    "vpinsrq		$0, 0(%%rax), %%xmm13, %%xmm13\n\t"          \
-    "vpinsrd		$2, 8(%%rax), %%xmm13, %%xmm13\n\t"          \
-    "vpinsrd		$3, %%ecx, %%xmm13, %%xmm13\n\t"             \
-    "# H = Encrypt X(=0) and T = Encrypt counter\n\t"                \
-    "vmovdqa		  0(%[KEY]), " VAR(HR) "\n\t"                \
-    "vmovdqa		 16(%[KEY]), %%xmm12\n\t"                    \
-    "vpxor		" VAR(HR) ", %%xmm13, %%xmm1\n\t"            \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 32(%[KEY]), %%xmm0\n\t"                     \
-    "vmovdqa		 48(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenc		%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 64(%[KEY]), %%xmm0\n\t"                     \
-    "vmovdqa		 80(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenc		%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		 96(%[KEY]), %%xmm0\n\t"                     \
-    "vmovdqa		112(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenc		%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqa		128(%[KEY]), %%xmm0\n\t"                     \
-    "vmovdqa		144(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenc		%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "cmpl		$11, %[nr]\n\t"                              \
-    "vmovdqa		160(%[KEY]), %%xmm0\n\t"                     \
-    "jl	31f\n\t"                                                     \
-    "vmovdqa		176(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenc		%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "cmpl		$13, %[nr]\n\t"                              \
-    "vmovdqa		192(%[KEY]), %%xmm0\n\t"                     \
-    "jl	31f\n\t"                                                     \
-    "vmovdqa		208(%[KEY]), %%xmm12\n\t"                    \
-    "vaesenc		%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenc		%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vaesenc		%%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
-    "vaesenc		%%xmm12, %%xmm1, %%xmm1\n\t"                 \
-    "vmovdqu		224(%[KEY]), %%xmm0\n\t"                     \
-    "31:\n\t"                                                        \
-    "vaesenclast	%%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
-    "vaesenclast	%%xmm0, %%xmm1, %%xmm1\n\t"                  \
-    "vpshufb		%[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
-    "vmovdqu		%%xmm1, " VAR(TR) "\n\t"                     \
-
-#define CALC_IV_AVX2()                                       \
-    "# Calculate values when IV is not 12 bytes\n\t"         \
-    "# H = Encrypt X(=0)\n\t"                                \
-    "vmovdqa	0(%[KEY]), " VAR(HR) "\n\t"                  \
-    VAESENC_AVX(HR)                                          \
-    "vpshufb	%[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
-    "# Calc counter\n\t"                                     \
-    "# Initialization vector\n\t"                            \
-    "cmpl	$0, %%edx\n\t"                               \
-    "movq	$0, %%rcx\n\t"                               \
-    "je	45f\n\t"                                             \
-    "cmpl	$16, %%edx\n\t"                              \
-    "jl	44f\n\t"                                             \
-    "andl	$0xfffffff0, %%edx\n\t"                      \
-    "\n"                                                     \
-    "43:\n\t"                                                \
-    "vmovdqu	(%%rax,%%rcx,1), %%xmm4\n\t"                 \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
-    "vpxor	%%xmm4, %%xmm13, %%xmm13\n\t"                \
-    GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR)           \
-    "addl	$16, %%ecx\n\t"                              \
-    "cmpl	%%edx, %%ecx\n\t"                            \
-    "jl	43b\n\t"                                             \
-    "movl	%[ibytes], %%edx\n\t"                        \
-    "cmpl	%%edx, %%ecx\n\t"                            \
-    "je	45f\n\t"                                             \
-    "\n"                                                     \
-    "44:\n\t"                                                \
-    "subq	$16, %%rsp\n\t"                              \
-    "vpxor	%%xmm4, %%xmm4, %%xmm4\n\t"                  \
-    "xorl	%%ebx, %%ebx\n\t"                            \
-    "vmovdqu	%%xmm4, (%%rsp)\n\t"                         \
-    "42:\n\t"                                                \
-    "movzbl	(%%rax,%%rcx,1), %%r13d\n\t"                 \
-    "movb	%%r13b, (%%rsp,%%rbx,1)\n\t"                 \
-    "incl	%%ecx\n\t"                                   \
-    "incl	%%ebx\n\t"                                   \
-    "cmpl	%%edx, %%ecx\n\t"                            \
-    "jl	42b\n\t"                                             \
-    "vmovdqu	(%%rsp), %%xmm4\n\t"                         \
-    "addq	$16, %%rsp\n\t"                              \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
-    "vpxor	%%xmm4, %%xmm13, %%xmm13\n\t"                \
-    GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR)           \
-    "\n"                                                     \
-    "45:\n\t"                                                \
-    "# T = Encrypt counter\n\t"                              \
-    "vpxor	%%xmm0, %%xmm0, %%xmm0\n\t"                  \
-    "shll	$3, %%edx\n\t"                               \
-    "vpinsrq	$0, %%rdx, %%xmm0, %%xmm0\n\t"               \
-    "vpxor	%%xmm0, %%xmm13, %%xmm13\n\t"                \
-    GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR)           \
-    "vpshufb	%[BSWAP_MASK], %%xmm13, %%xmm13\n\t"         \
-    "#   Encrypt counter\n\t"                                \
-    "vmovdqa	0(%[KEY]), %%xmm4\n\t"                       \
-    "vpxor	%%xmm13, %%xmm4, %%xmm4\n\t"                 \
-    VAESENC_AVX(%%xmm4)                                      \
-    "vmovdqu	%%xmm4, " VAR(TR) "\n\t"
-
-#define CALC_AAD_AVX2()                                \
-    "# Additional authentication data\n\t"             \
-    "movl	%[abytes], %%edx\n\t"                  \
-    "cmpl	$0, %%edx\n\t"                         \
-    "je		25f\n\t"                               \
-    "movq	%[addt], %%rax\n\t"                    \
-    "xorl	%%ecx, %%ecx\n\t"                      \
-    "cmpl	$16, %%edx\n\t"                        \
-    "jl		24f\n\t"                               \
-    "andl	$0xfffffff0, %%edx\n\t"                \
-    "\n"                                               \
-    "23:\n\t"                                          \
-    "vmovdqu	(%%rax,%%rcx,1), %%xmm4\n\t"           \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
-    "vpxor	%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_FULL_AVX2(XR, %%xmm12, XR, HR)               \
-    "addl	$16, %%ecx\n\t"                        \
-    "cmpl	%%edx, %%ecx\n\t"                      \
-    "jl		23b\n\t"                               \
-    "movl	%[abytes], %%edx\n\t"                  \
-    "cmpl	%%edx, %%ecx\n\t"                      \
-    "je		25f\n\t"                               \
-    "\n"                                               \
-    "24:\n\t"                                          \
-    "subq	$16, %%rsp\n\t"                        \
-    "vpxor	%%xmm4, %%xmm4, %%xmm4\n\t"            \
-    "xorl	%%ebx, %%ebx\n\t"                      \
-    "vmovdqu	%%xmm4, (%%rsp)\n\t"                   \
-    "22:\n\t"                                          \
-    "movzbl	(%%rax,%%rcx,1), %%r13d\n\t"           \
-    "movb	%%r13b, (%%rsp,%%rbx,1)\n\t"           \
-    "incl	%%ecx\n\t"                             \
-    "incl	%%ebx\n\t"                             \
-    "cmpl	%%edx, %%ecx\n\t"                      \
-    "jl		22b\n\t"                               \
-    "vmovdqu	(%%rsp), %%xmm4\n\t"                   \
-    "addq	$16, %%rsp\n\t"                        \
-    "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
-    "vpxor	%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_FULL_AVX2(XR, %%xmm12, XR, HR)               \
-    "\n"                                               \
-    "25:\n\t"
-
-#define VAESENC_128_GHASH_AVX2(src, o)               \
-    "leaq	(%[in]," VAR(KR64) ",1), %%rcx\n\t"  \
-    "leaq	(%[out]," VAR(KR64) ",1), %%rdx\n\t" \
-    /* src is either %%rcx or %%rdx */             \
-    VAESENC_CTR()                                  \
-    VAESENC_XOR()                                  \
-    VAESENC_PCLMUL_AVX2_1(src,  16, (o-128), 112)  \
-    VAESENC_PCLMUL_AVX2_2(src,  32, (o-112),  96)  \
-    VAESENC_PCLMUL_AVX2_N(src,  48, (o- 96),  80)  \
-    VAESENC_PCLMUL_AVX2_N(src,  64, (o- 80),  64)  \
-    VAESENC_PCLMUL_AVX2_N(src,  80, (o- 64),  48)  \
-    VAESENC_PCLMUL_AVX2_N(src,  96, (o- 48),  32)  \
-    VAESENC_PCLMUL_AVX2_N(src, 112, (o- 32),  16)  \
-    VAESENC_PCLMUL_AVX2_N(src, 128, (o- 16),   0)  \
-    VAESENC_PCLMUL_AVX2_L(144)                     \
-    "cmpl	$11, %[nr]\n\t"                    \
-    "vmovdqa	160(%[KEY]), %%xmm12\n\t"          \
-    "jl		4f\n\t"                            \
-    VAESENC()                                      \
-    VAESENC_SET(176)                               \
-    "cmpl	$13, %[nr]\n\t"                    \
-    "vmovdqa	192(%[KEY]), %%xmm12\n\t"          \
-    "jl		4f\n\t"                            \
-    VAESENC()                                      \
-    VAESENC_SET(208)                               \
-    "vmovdqa	224(%[KEY]), %%xmm12\n\t"          \
-    "\n"                                           \
-"4:\n\t"                                           \
-    VAESENC_LAST(%%rcx, %%rdx)
-
-#define AESENC_LAST15_ENC_AVX2()                        \
-    "movl	%[nbytes], %%ecx\n\t"                   \
-    "movl	%%ecx, %%edx\n\t"                       \
-    "andl	$0x0f, %%ecx\n\t"                       \
-    "jz		55f\n\t"                                \
-    "vmovdqu	" VAR(CTR1) ", %%xmm13\n\t"             \
-    "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
-    "vpxor	0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
-    VAESENC_AVX(%%xmm13)                                \
-    "subq	$16, %%rsp\n\t"                         \
-    "xorl	%%ecx, %%ecx\n\t"                       \
-    "vmovdqu	%%xmm13, (%%rsp)\n\t"                   \
-    "\n"                                                \
-    "51:\n\t"                                           \
-    "movzbl	(%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
-    "xorb	(%%rsp,%%rcx,1), %%r13b\n\t"            \
-    "movb	%%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
-    "movb	%%r13b, (%%rsp,%%rcx,1)\n\t"            \
-    "incl	" VAR(KR) "\n\t"                        \
-    "incl	%%ecx\n\t"                              \
-    "cmpl	%%edx, " VAR(KR) "\n\t"                 \
-    "jl		51b\n\t"                                \
-    "xorq	%%r13, %%r13\n\t"                       \
-    "cmpl	$16, %%ecx\n\t"                         \
-    "je		53f\n\t"                                \
-    "\n"                                                \
-    "52:\n\t"                                           \
-    "movb	%%r13b, (%%rsp,%%rcx,1)\n\t"            \
-    "incl	%%ecx\n\t"                              \
-    "cmpl	$16, %%ecx\n\t"                         \
-    "jl		52b\n\t"                                \
-    "53:\n\t"                                           \
-    "vmovdqu	(%%rsp), %%xmm13\n\t"                   \
-    "addq	$16, %%rsp\n\t"                         \
-    "vpshufb	%[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
-    "vpxor	%%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_GFMUL_RED_AVX2(XR, HR, XR)                    \
-
-#define AESENC_LAST15_DEC_AVX2()                        \
-    "movl	%[nbytes], %%ecx\n\t"                   \
-    "movl	%%ecx, %%edx\n\t"                       \
-    "andl	$0x0f, %%ecx\n\t"                       \
-    "jz		55f\n\t"                                \
-    "vmovdqu	" VAR(CTR1) ", %%xmm13\n\t"             \
-    "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
-    "vpxor	0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
-    VAESENC_AVX(%%xmm13)                                \
-    "subq	$32, %%rsp\n\t"                         \
-    "xorl	%%ecx, %%ecx\n\t"                       \
-    "vmovdqu	%%xmm13, (%%rsp)\n\t"                   \
-    "vpxor	%%xmm0, %%xmm0, %%xmm0\n\t"             \
-    "vmovdqu	%%xmm0, 16(%%rsp)\n\t"                  \
-    "\n"                                                \
-    "51:\n\t"                                           \
-    "movzbl	(%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
-    "movb	%%r13b, 16(%%rsp,%%rcx,1)\n\t"          \
-    "xorb	(%%rsp,%%rcx,1), %%r13b\n\t"            \
-    "movb	%%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
-    "incl	" VAR(KR) "\n\t"                        \
-    "incl	%%ecx\n\t"                              \
-    "cmpl	%%edx, " VAR(KR) "\n\t"                 \
-    "jl		51b\n\t"                                \
-    "53:\n\t"                                           \
-    "vmovdqu	16(%%rsp), %%xmm13\n\t"                 \
-    "addq	$32, %%rsp\n\t"                         \
-    "vpshufb	%[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
-    "vpxor	%%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
-    GHASH_GFMUL_RED_AVX2(XR, HR, XR)                    \
-
-#define CALC_TAG_AVX2()                                      \
-    "movl	%[nbytes], %%edx\n\t"                        \
-    "movl	%[abytes], %%ecx\n\t"                        \
-    "shlq	$3, %%rdx\n\t"                               \
-    "shlq	$3, %%rcx\n\t"                               \
-    "vpinsrq	$0, %%rdx, %%xmm0, %%xmm0\n\t"               \
-    "vpinsrq	$1, %%rcx, %%xmm0, %%xmm0\n\t"               \
-    "vpxor	%%xmm0, " VAR(XR) ", " VAR(XR) "\n\t"        \
-    GHASH_GFMUL_RED_AVX2(XR, HR, XR)                         \
-    "vpshufb	%[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \
-    "vpxor	" VAR(TR) ", " VAR(XR) ", %%xmm0\n\t"        \
-
-
-static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out,
-                                 const unsigned char* addt,
-                                 const unsigned char* ivec, unsigned char *tag,
-                                 unsigned int nbytes, unsigned int abytes,
-                                 unsigned int ibytes, unsigned int tbytes,
-                                 const unsigned char* key, int nr)
-{
-    register const unsigned char* iv asm("rax") = ivec;
-    register unsigned int ivLen asm("ebx") = ibytes;
-
-    __asm__ __volatile__ (
-        "subq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        /* Counter is xmm13 */
-        "vpxor		%%xmm13, %%xmm13, %%xmm13\n\t"
-        "vpxor		" VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t"
-        "movl		%[ibytes], %%edx\n\t"
-        "cmpl		$12, %%edx\n\t"
-        "jne		35f\n\t"
-        CALC_IV_12_AVX2()
-        "jmp		39f\n\t"
-        "\n"
-        "35:\n\t"
-        CALC_IV_AVX2()
-        "\n"
-        "39:\n\t"
-
-        CALC_AAD_AVX2()
-
-        "# Calculate counter and H\n\t"
-        "vpsrlq		$63, " VAR(HR) ", %%xmm5\n\t"
-        "vpsllq		$1, " VAR(HR) ", %%xmm4\n\t"
-        "vpslldq	$8, %%xmm5, %%xmm5\n\t"
-        "vpor		%%xmm5, %%xmm4, %%xmm4\n\t"
-        "vpshufd	$0xff, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpsrad		$31, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
-        "vpand		%[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpaddd		%[ONE], %%xmm13, %%xmm13\n\t"
-        "vpxor		%%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vmovdqu	%%xmm13, " VAR(CTR1) "\n\t"
-
-        "xorl		" VAR(KR) ", " VAR(KR) "\n\t"
-
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
-        "cmpl	$128, %[nbytes]\n\t"
-        "movl	%[nbytes], %%r13d\n\t"
-        "jl	5f\n\t"
-        "andl	$0xffffff80, %%r13d\n\t"
-
-        CALC_HT_8_AVX2()
-
-        "# First 128 bytes of input\n\t"
-        VAESENC_128()
-
-        "cmpl	$128, %%r13d\n\t"
-        "movl	$128, " VAR(KR) "\n\t"
-        "jle	2f\n\t"
-
-        "# More 128 bytes of input\n\t"
-        "\n"
-    "3:\n\t"
-        VAESENC_128_GHASH_AVX2(%%rdx, 0)
-        "addl	$128, " VAR(KR) "\n\t"
-        "cmpl	%%r13d, " VAR(KR) "\n\t"
-        "jl	3b\n\t"
-        "\n"
-    "2:\n\t"
-        "vmovdqa	%[BSWAP_MASK], %%xmm13\n\t"
-        "vpshufb	%%xmm13, %%xmm4, %%xmm4\n\t"
-        "vpshufb	%%xmm13, %%xmm5, %%xmm5\n\t"
-        "vpshufb	%%xmm13, %%xmm6, %%xmm6\n\t"
-        "vpshufb	%%xmm13, %%xmm7, %%xmm7\n\t"
-        "vpshufb	%%xmm13, %%xmm8, %%xmm8\n\t"
-        "vpshufb	%%xmm13, %%xmm9, %%xmm9\n\t"
-        "vpshufb	%%xmm13, %%xmm10, %%xmm10\n\t"
-        "vpshufb	%%xmm13, %%xmm11, %%xmm11\n\t"
-        "vpxor		%%xmm2, %%xmm4, %%xmm4\n\t"
-
-        GHASH_GFMUL_RED_8_AVX2()
-
-        "vmovdqu	0(" VAR(HTR) "), " VAR(HR) "\n\t"
-        "\n"
-    "5:\n\t"
-        "movl		%[nbytes], %%edx\n\t"
-        "cmpl		%%edx, " VAR(KR) "\n\t"
-        "jge		55f\n\t"
-#endif
-
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xfffffff0, %%r13d\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		14f\n\t"
-
-        VAESENC_BLOCK_AVX2()
-        "addl		$16, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		13f\n\t"
-        "vmovdqa	%[MOD2_128], %%xmm0\n\t"
-        "\n"
-        "12:\n\t"
-        "vmovdqu	(%[in]," VAR(KR64) ",1), %%xmm9\n\t"
-        "vmovdqu	" VAR(CTR1) ", %%xmm5\n\t"
-        "vpshufb	%[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"
-        "vpaddd		%[ONE], %%xmm5, %%xmm5\n\t"
-        "vmovdqu	%%xmm5, " VAR(CTR1) "\n\t"
-        VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, XR, CTR1)
-        "vmovdqu	%%xmm4, (%[out]," VAR(KR64) ",1)\n\t"
-        "vpshufb	%[BSWAP_MASK], %%xmm4, %%xmm4\n\t"
-        "addl		$16, " VAR(KR) "\n\t"
-        "vpxor		%%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		12b\n\t"
-        "\n"
-        "13:\n\t"
-        GHASH_GFMUL_RED_AVX2(XR, HR, XR)
-        "\n"
-        "14:\n\t"
-
-        AESENC_LAST15_ENC_AVX2()
-        "\n"
-        "55:\n\t"
-
-        CALC_TAG_AVX2()
-        STORE_TAG_AVX()
-        "addq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        "vzeroupper\n\t"
-
-        :
-        : [KEY] "r" (key),
-          [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
-          [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
-          [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes),
-          [tag] "r" (tag),
-          [BSWAP_MASK] "m" (BSWAP_MASK),
-          [BSWAP_EPI64] "m" (BSWAP_EPI64),
-          [ONE] "m" (ONE),
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
-          [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
-          [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
-          [EIGHT] "m" (EIGHT),
-#endif
-          [MOD2_128] "m" (MOD2_128)
-        : "xmm15", "xmm14", "xmm13", "xmm12",
-          "xmm0", "xmm1", "xmm2", "xmm3", "memory",
-          "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
-          "rcx", "rdx", "r13"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* HAVE_INTEL_AVX1 */
-
-#ifdef HAVE_AES_DECRYPT
-/* Figure 10. AES-GCM – Decrypt With Single Block Ghash at a Time */
-
-static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
-                            const unsigned char* addt,
-                            const unsigned char* ivec, const unsigned char *tag,
-                            int nbytes, int abytes, int ibytes, int tbytes,
-                            const unsigned char* key, int nr, int* res)
-{
-    register const unsigned char* iv asm("rax") = ivec;
-    register int ivLen asm("ebx") = ibytes;
-    register int tagLen asm("edx") = tbytes;
-
-    __asm__ __volatile__ (
-        "pushq		%%rdx\n\t"
-        "subq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        /* Counter is xmm13 */
-        "pxor		%%xmm13, %%xmm13\n\t"
-        "pxor		%%xmm15, %%xmm15\n\t"
-        "movl		%[ibytes], %%edx\n\t"
-        "cmpl		$12, %%edx\n\t"
-        "jne		35f\n\t"
-        CALC_IV_12()
-        "\n"
-        "35:\n\t"
-        CALC_IV()
-        "\n"
-        "39:\n\t"
-
-        CALC_AAD()
-
-        "# Calculate counter and H\n\t"
-        "pshufb		%[BSWAP_EPI64], %%xmm13\n\t"
-        "movdqa		" VAR(HR) ", %%xmm5\n\t"
-        "paddd		%[ONE], %%xmm13\n\t"
-        "movdqa		" VAR(HR) ", %%xmm4\n\t"
-        "movdqu		%%xmm13, " VAR(CTR1) "\n\t"
-        "psrlq		$63, %%xmm5\n\t"
-        "psllq		$1, %%xmm4\n\t"
-        "pslldq		$8, %%xmm5\n\t"
-        "por		%%xmm5, %%xmm4\n\t"
-        "pshufd		$0xff, " VAR(HR) ", " VAR(HR) "\n\t"
-        "psrad		$31, " VAR(HR) "\n\t"
-        "pand		%[MOD2_128], " VAR(HR) "\n\t"
-        "pxor		%%xmm4, " VAR(HR) "\n\t"
-
-        "xorl		" VAR(KR) ", " VAR(KR) "\n\t"
-
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-        "cmpl		$128, %[nbytes]\n\t"
-        "jl		5f\n\t"
-
-        CALC_HT_8_AVX()
-
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xffffff80, %%r13d\n\t"
-        "\n"
-        "2:\n\t"
-        AESENC_128_GHASH_AVX(%%rcx, 128)
-        "addl		$128, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		2b\n\t"
-
-        "movdqa		%%xmm2, " VAR(XR) "\n\t"
-        "movdqu		(%%rsp), " VAR(HR) "\n\t"
-    "5:\n\t"
-        "movl		%[nbytes], %%edx\n\t"
-        "cmpl		%%edx, " VAR(KR) "\n\t"
-        "jge		55f\n\t"
-#endif
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xfffffff0, %%r13d\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		13f\n\t"
-
-        "\n"
-        "12:\n\t"
-        "leaq		(%[in]," VAR(KR64) ",1), %%rcx\n\t"
-        "leaq		(%[out]," VAR(KR64) ",1), %%rdx\n\t"
-        "movdqu		(%%rcx), %%xmm1\n\t"
-        "movdqa		" VAR(HR) ", %%xmm0\n\t"
-        "pshufb		%[BSWAP_MASK], %%xmm1\n\t"
-        "pxor		" VAR(XR) ", %%xmm1\n\t"
-        AESENC_GFMUL(%%rcx, %%rdx, %%xmm0, %%xmm1)
-        "addl		$16, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		12b\n\t"
-        "\n"
-        "13:\n\t"
-
-        AESENC_LAST15_DEC_AVX()
-        "\n"
-        "55:\n\t"
-
-        CALC_TAG()
-        "addq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        "popq		%%rdx\n\t"
-        CMP_TAG()
-
-        :
-        : [KEY] "r" (key),
-          [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
-          [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
-          [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen),
-          [tag] "r" (tag), [res] "r" (res),
-          [BSWAP_MASK] "m" (BSWAP_MASK),
-          [BSWAP_EPI64] "m" (BSWAP_EPI64),
-          [ONE] "m" (ONE),
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-          [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
-          [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
-          [EIGHT] "m" (EIGHT),
-#endif
-          [MOD2_128] "m" (MOD2_128)
-        : "xmm15", "xmm14", "xmm13", "xmm12",
-          "xmm0", "xmm1", "xmm2", "xmm3", "memory",
-          "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
-          "rcx", "r13"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX1
-static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out,
-                                 const unsigned char* addt,
-                                 const unsigned char* ivec,
-                                 const unsigned char *tag, int nbytes,
-                                 int abytes, int ibytes, int tbytes,
-                                 const unsigned char* key, int nr, int* res)
-{
-    register const unsigned char* iv asm("rax") = ivec;
-    register int ivLen asm("ebx") = ibytes;
-    register int tagLen asm("edx") = tbytes;
-
-    __asm__ __volatile__ (
-        "pushq		%%rdx\n\t"
-        "subq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        /* Counter is xmm13 */
-        "vpxor		%%xmm13, %%xmm13, %%xmm13\n\t"
-        "vpxor		%%xmm15, %%xmm15, %%xmm15\n\t"
-        "movl		%[ibytes], %%edx\n\t"
-        "cmpl		$12, %%edx\n\t"
-        "jne		35f\n\t"
-        CALC_IV_12_AVX1()
-        "\n"
-        "35:\n\t"
-        CALC_IV_AVX1()
-        "\n"
-        "39:\n\t"
-
-        CALC_AAD_AVX1()
-
-        "# Calculate counter and H\n\t"
-        "vpsrlq		$63, " VAR(HR) ", %%xmm5\n\t"
-        "vpsllq		$1, " VAR(HR) ", %%xmm4\n\t"
-        "vpslldq	$8, %%xmm5, %%xmm5\n\t"
-        "vpor		%%xmm5, %%xmm4, %%xmm4\n\t"
-        "vpshufd	$0xff, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpsrad		$31, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
-        "vpand		%[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpaddd		%[ONE], %%xmm13, %%xmm13\n\t"
-        "vpxor		%%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vmovdqu	%%xmm13, " VAR(CTR1) "\n\t"
-
-        "xorl		" VAR(KR) ", " VAR(KR) "\n\t"
-
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-        "cmpl		$128, %[nbytes]\n\t"
-        "jl		5f\n\t"
-
-        CALC_HT_8_AVX1()
-
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xffffff80, %%r13d\n\t"
-        "\n"
-        "2:\n\t"
-        VAESENC_128_GHASH_AVX1(%%rcx, 128)
-        "addl		$128, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		2b\n\t"
-
-        "vmovdqa	%%xmm2, " VAR(XR) "\n\t"
-        "vmovdqu	(%%rsp), " VAR(HR) "\n\t"
-    "5:\n\t"
-        "movl		%[nbytes], %%edx\n\t"
-        "cmpl		%%edx, " VAR(KR) "\n\t"
-        "jge		55f\n\t"
-#endif
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xfffffff0, %%r13d\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		13f\n\t"
-
-        "\n"
-        "12:\n\t"
-        "vmovdqu	(%[in]," VAR(KR64) ",1), %%xmm9\n\t"
-        "vmovdqa	" VAR(HR) ", %%xmm0\n\t"
-        "vpshufb	%[BSWAP_MASK], %%xmm9, %%xmm1\n\t"
-        "vpxor		" VAR(XR) ", %%xmm1, %%xmm1\n\t"
-        VAESENC_GFMUL(%%xmm9, %%xmm0, %%xmm1)
-        "addl		$16, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		12b\n\t"
-        "\n"
-        "13:\n\t"
-
-        AESENC_LAST15_DEC_AVX1()
-        "\n"
-        "55:\n\t"
-
-        CALC_TAG_AVX1()
-        "addq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        "popq		%%rdx\n\t"
-        CMP_TAG_AVX()
-        "vzeroupper\n\t"
-
-        :
-        : [KEY] "r" (key),
-          [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
-          [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
-          [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen),
-          [tag] "r" (tag), [res] "r" (res),
-          [BSWAP_MASK] "m" (BSWAP_MASK),
-          [BSWAP_EPI64] "m" (BSWAP_EPI64),
-          [ONE] "m" (ONE),
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
-          [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
-          [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
-          [EIGHT] "m" (EIGHT),
-#endif
-          [MOD2_128] "m" (MOD2_128)
-        : "xmm15", "xmm14", "xmm13", "xmm12",
-          "xmm0", "xmm1", "xmm2", "xmm3", "memory",
-          "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
-          "rcx", "r13"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out,
-                                 const unsigned char* addt,
-                                 const unsigned char* ivec,
-                                 const unsigned char *tag, int nbytes,
-                                 int abytes, int ibytes, int tbytes,
-                                 const unsigned char* key, int nr, int* res)
-{
-    register const unsigned char* iv asm("rax") = ivec;
-    register int ivLen asm("ebx") = ibytes;
-    register int tagLen asm("edx") = tbytes;
-
-    __asm__ __volatile__ (
-        "pushq		%%rdx\n\t"
-        "subq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        /* Counter is xmm13 */
-        "vpxor		%%xmm13, %%xmm13, %%xmm13\n\t"
-        "vpxor		%%xmm15, %%xmm15, %%xmm15\n\t"
-        "movl		%[ibytes], %%edx\n\t"
-        "cmpl		$12, %%edx\n\t"
-        "jne		35f\n\t"
-        CALC_IV_12_AVX2()
-        "jmp		39f\n\t"
-        "\n"
-        "35:\n\t"
-        CALC_IV_AVX2()
-        "\n"
-        "39:\n\t"
-
-        CALC_AAD_AVX2()
-
-        "# Calculate counter and H\n\t"
-        "vpsrlq		$63, " VAR(HR) ", %%xmm5\n\t"
-        "vpsllq		$1, " VAR(HR) ", %%xmm4\n\t"
-        "vpslldq	$8, %%xmm5, %%xmm5\n\t"
-        "vpor		%%xmm5, %%xmm4, %%xmm4\n\t"
-        "vpshufd	$0xff, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpsrad		$31, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpshufb	%[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
-        "vpand		%[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
-        "vpaddd		%[ONE], %%xmm13, %%xmm13\n\t"
-        "vpxor		%%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
-        "vmovdqu	%%xmm13, " VAR(CTR1) "\n\t"
-
-        "xorl		" VAR(KR) ", " VAR(KR) "\n\t"
-
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
-        "cmpl		$128, %[nbytes]\n\t"
-        "jl		5f\n\t"
-
-        CALC_HT_8_AVX2()
-
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xffffff80, %%r13d\n\t"
-        "\n"
-        "2:\n\t"
-        VAESENC_128_GHASH_AVX2(%%rcx, 128)
-        "addl		$128, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		2b\n\t"
-
-        "vmovdqa	%%xmm2, " VAR(XR) "\n\t"
-        "vmovdqu	(%%rsp), " VAR(HR) "\n\t"
-    "5:\n\t"
-        "movl		%[nbytes], %%edx\n\t"
-        "cmpl		%%edx, " VAR(KR) "\n\t"
-        "jge		55f\n\t"
-#endif
-        "movl		%[nbytes], %%r13d\n\t"
-        "andl		$0xfffffff0, %%r13d\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jge		13f\n\t"
-
-        "vmovdqa	%[MOD2_128], %%xmm0\n\t"
-        "\n"
-        "12:\n\t"
-        "vmovdqu	(%[in]," VAR(KR64) ",1), %%xmm9\n\t"
-        "vmovdqu	" VAR(CTR1) ", %%xmm5\n\t"
-        "vpshufb	%[BSWAP_MASK], %%xmm9, %%xmm1\n\t"
-        "vpshufb	%[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"
-        "vpaddd		%[ONE], %%xmm5, %%xmm5\n\t"
-        "vpxor		" VAR(XR) ", %%xmm1, %%xmm1\n\t"
-        "vmovdqu	%%xmm5, " VAR(CTR1) "\n\t"
-        VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, %%xmm1, CTR1)
-        "vmovdqu	%%xmm4, (%[out]," VAR(KR64) ",1)\n\t"
-        "addl		$16, " VAR(KR) "\n\t"
-        "cmpl		%%r13d, " VAR(KR) "\n\t"
-        "jl		12b\n\t"
-        "\n"
-        "13:\n\t"
-
-        AESENC_LAST15_DEC_AVX2()
-        "\n"
-        "55:\n\t"
-
-        CALC_TAG_AVX2()
-        "addq		$" VAR(STACK_OFFSET) ", %%rsp\n\t"
-        "popq		%%rdx\n\t"
-        CMP_TAG_AVX()
-        "vzeroupper\n\t"
-
-        :
-        : [KEY] "r" (key),
-          [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
-          [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
-          [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen),
-          [tag] "r" (tag), [res] "r" (res),
-          [BSWAP_MASK] "m" (BSWAP_MASK),
-          [BSWAP_EPI64] "m" (BSWAP_EPI64),
-          [ONE] "m" (ONE),
-#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
-          [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
-          [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
-          [EIGHT] "m" (EIGHT),
-#endif
-          [MOD2_128] "m" (MOD2_128)
-        : "xmm15", "xmm14", "xmm13", "xmm12",
-          "xmm0", "xmm1", "xmm2", "xmm3", "memory",
-          "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
-          "rcx", "r13"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* HAVE_INTEL_AVX1 */
-#endif /* HAVE_AES_DECRYPT */
-
-#else /* _MSC_VER */
+static const __m128i BSWAP_EPI64 =
+        M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f);
+static const __m128i BSWAP_MASK =
+        M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607);
+
+
 /* The following are for MSC based builds which do not allow
  * inline assembly. Intrinsic functions are used instead. */
 
@@ -7013,7 +4607,7 @@
     __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
 #endif
 
-    if (ibytes == 12)
+    if (ibytes == GCM_NONCE_MID_SZ)
         aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T);
     else
         aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T);
@@ -7451,7 +5045,7 @@
     __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
 #endif /* AES_GCM_AESNI_NO_UNROLL */
 
-    if (ibytes == 12)
+    if (ibytes == GCM_NONCE_MID_SZ)
         aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T);
     else
         aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T);
@@ -8070,6 +5664,13 @@
             x[1] ^= bigA[1];
             GMULT(x, bigH);
         }
+#ifdef OPENSSL_EXTRA
+        /* store AAD partial tag for next call */
+        aes->aadH[0] = (word32)((x[0] & 0xFFFFFFFF00000000) >> 32);
+        aes->aadH[1] = (word32)(x[0] & 0xFFFFFFFF);
+        aes->aadH[2] = (word32)((x[1] & 0xFFFFFFFF00000000) >> 32);
+        aes->aadH[3] = (word32)(x[1] & 0xFFFFFFFF);
+#endif
     }
 
     /* Hash in C, the Ciphertext */
@@ -8077,6 +5678,13 @@
         word64 bigC[2];
         blocks = cSz / AES_BLOCK_SIZE;
         partial = cSz % AES_BLOCK_SIZE;
+#ifdef OPENSSL_EXTRA
+        /* Start from last AAD partial tag */
+        if(aes->aadLen) {
+            x[0] = ((word64)aes->aadH[0]) << 32 | aes->aadH[1];
+            x[1] = ((word64)aes->aadH[2]) << 32 | aes->aadH[3];
+         }
+#endif
         while (blocks--) {
             XMEMCPY(bigC, c, AES_BLOCK_SIZE);
             #ifdef LITTLE_ENDIAN_ORDER
@@ -8103,7 +5711,10 @@
     {
         word64 len[2];
         len[0] = aSz; len[1] = cSz;
-
+#ifdef OPENSSL_EXTRA
+        if (aes->aadLen)
+            len[0] = (word64)aes->aadLen;
+#endif
         /* Lengths are in bytes. Convert to bits. */
         len[0] *= 8;
         len[1] *= 8;
@@ -8269,7 +5880,7 @@
 #endif /* end GCM_WORD32 */
 
 
-#if !defined(WOLFSSL_XILINX_CRYPT)
+#if !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES)
 #ifdef FREESCALE_LTC_AES_GCM
 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
                    const byte* iv, word32 ivSz,
@@ -8280,7 +5891,7 @@
     word32 keySize;
 
     /* argument checks */
-    if (aes == NULL || authTagSz > AES_BLOCK_SIZE) {
+    if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) {
         return BAD_FUNC_ARG;
     }
 
@@ -8298,43 +5909,66 @@
 
     return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E;
 }
+
 #else
-#if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \
-                              defined(WOLFSSL_STM32F7) || \
-                              defined(WOLFSSL_STM32L4))
-
-static WC_INLINE int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in,
-                                         word32 sz, const byte* iv, word32 ivSz,
-                                         byte* authTag, word32 authTagSz,
-                                         const byte* authIn, word32 authInSz)
+
+#ifdef STM32_CRYPTO_AES_GCM
+
+/* this function supports inline encrypt */
+static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz,
+                                  const byte* iv, word32 ivSz,
+                                  byte* authTag, word32 authTagSz,
+                                  const byte* authIn, word32 authInSz)
 {
     int ret;
+#ifdef WOLFSSL_STM32_CUBEMX
+    CRYP_HandleTypeDef hcryp;
+#else
+    word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)];
+#endif
     word32 keySize;
-    byte initialCounter[AES_BLOCK_SIZE];
-    #ifdef WOLFSSL_STM32_CUBEMX
-        CRYP_HandleTypeDef hcryp;
-    #else
-        byte keyCopy[AES_BLOCK_SIZE * 2];
-    #endif /* WOLFSSL_STM32_CUBEMX */
-    int status = 0;
+    int status = HAL_OK;
+    word32 blocks = sz / AES_BLOCK_SIZE;
+    word32 partial = sz % AES_BLOCK_SIZE;
+    byte tag[AES_BLOCK_SIZE];
+    byte partialBlock[AES_BLOCK_SIZE];
+    byte ctr[AES_BLOCK_SIZE];
     byte* authInPadded = NULL;
-    byte tag[AES_BLOCK_SIZE];
     int authPadSz;
 
     ret = wc_AesGetKeySize(aes, &keySize);
     if (ret != 0)
         return ret;
 
-    XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
-    XMEMCPY(initialCounter, iv, ivSz);
-    initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START;
-
-    /* pad authIn if it is not a block multiple */
-    if ((authInSz % AES_BLOCK_SIZE) != 0) {
+#ifdef WOLFSSL_STM32_CUBEMX
+    ret = wc_Stm32_Aes_Init(aes, &hcryp);
+    if (ret != 0)
+        return ret;
+#endif
+
+    ret = wolfSSL_CryptHwMutexLock();
+    if (ret != 0) {
+        return ret;
+    }
+
+    XMEMSET(ctr, 0, AES_BLOCK_SIZE);
+    if (ivSz == GCM_NONCE_MID_SZ) {
+        XMEMCPY(ctr, iv, ivSz);
+        ctr[AES_BLOCK_SIZE - 1] = 1;
+    }
+    else {
+        GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE);
+    }
+    /* Hardware requires counter + 1 */
+    IncrementGcmCounter(ctr);
+
+    if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) {
+        /* Need to pad the AAD to a full block with zeros. */
         authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
-        /* Need to pad the AAD to a full block with zeros. */
-        authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        authInPadded = (byte*)XMALLOC(authPadSz, aes->heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
         if (authInPadded == NULL) {
+            wolfSSL_CryptHwMutexUnLock();
             return MEMORY_E;
         }
         XMEMSET(authInPadded, 0, authPadSz);
@@ -8344,32 +5978,12 @@
         authInPadded = (byte*)authIn;
     }
 
-
 #ifdef WOLFSSL_STM32_CUBEMX
-    XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-    switch (keySize) {
-        case 16: /* 128-bit key */
-            hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-            break;
-#ifdef CRYP_KEYSIZE_192B
-        case 24: /* 192-bit key */
-            hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-            break;
-#endif
-    	case 32: /* 256-bit key */
-            hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-            break;
-        default:
-            break;
-    }
-    hcryp.Instance = CRYP;
-    hcryp.Init.DataType = CRYP_DATATYPE_8B;
-    hcryp.Init.pKey = (byte*)aes->key;
-    hcryp.Init.pInitVect = initialCounter;
-    hcryp.Init.Header = authInPadded;
+    hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr;
+    hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded;
     hcryp.Init.HeaderSize = authInSz;
 
-#ifdef WOLFSSL_STM32L4
+#ifdef STM32_CRYPTO_AES_ONLY
     /* Set the CRYP parameters */
     hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_GCM_GMAC;
     hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
@@ -8382,24 +5996,59 @@
         /* GCM header phase */
         hcryp.Init.GCMCMACPhase  = CRYP_HEADER_PHASE;
         status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
-        if (status == HAL_OK) {
-            /* GCM payload phase */
-            hcryp.Init.GCMCMACPhase  = CRYP_PAYLOAD_PHASE;
-            status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, sz, out, STM32_HAL_TIMEOUT);
-            if (status == HAL_OK) {
-                /* GCM final phase */
-                hcryp.Init.GCMCMACPhase  = CRYP_FINAL_PHASE;
-                status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
-            }
-        }
+    }
+    if (status == HAL_OK) {
+        /* GCM payload phase - blocks */
+        hcryp.Init.GCMCMACPhase  = CRYP_PAYLOAD_PHASE;
+        if (blocks) {
+            status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in,
+                (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+        }
+    }
+    if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+        /* GCM payload phase - partial remainder */
+        XMEMSET(partialBlock, 0, sizeof(partialBlock));
+        XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+        status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial,
+            partialBlock, STM32_HAL_TIMEOUT);
+        XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+    }
+    if (status == HAL_OK) {
+        /* GCM final phase */
+        hcryp.Init.GCMCMACPhase  = CRYP_FINAL_PHASE;
+        status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
+    }
+#elif defined(STM32_HAL_V2)
+    hcryp.Init.Algorithm  = CRYP_AES_GCM;
+    ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE);
+    hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock;
+    HAL_CRYP_Init(&hcryp);
+
+    /* GCM payload phase - can handle partial blocks */
+    status = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in,
+        (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT);
+    if (status == HAL_OK) {
+        /* Compute the authTag */
+        status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag,
+            STM32_HAL_TIMEOUT);
     }
 #else
     HAL_CRYP_Init(&hcryp);
-
-    status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, sz,
-                                       out, STM32_HAL_TIMEOUT);
-    /* Compute the authTag */
+    if (blocks) {
+        /* GCM payload phase - blocks */
+        status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in,
+            (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+    }
+    if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+        /* GCM payload phase - partial remainder */
+        XMEMSET(partialBlock, 0, sizeof(partialBlock));
+        XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+        status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, partialBlock, partial,
+            partialBlock, STM32_HAL_TIMEOUT);
+        XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+    }
     if (status == HAL_OK) {
+        /* Compute the authTag */
         status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
     }
 #endif
@@ -8407,29 +6056,46 @@
     if (status != HAL_OK)
         ret = AES_GCM_AUTH_E;
     HAL_CRYP_DeInit(&hcryp);
-#else
-    ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize);
-    status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)initialCounter,
-                         (uint8_t*)keyCopy,     keySize * 8,
-                         (uint8_t*)in,          sz,
-                         (uint8_t*)authInPadded,authInSz,
-                         (uint8_t*)out,         tag);
+
+#else /* STD_PERI_LIB */
+    ByteReverseWords(keyCopy, (word32*)aes->key, keySize);
+    status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)ctr,
+                         (uint8_t*)keyCopy,      keySize * 8,
+                         (uint8_t*)in,           sz,
+                         (uint8_t*)authInPadded, authInSz,
+                         (uint8_t*)out,          tag);
     if (status != SUCCESS)
         ret = AES_GCM_AUTH_E;
 #endif /* WOLFSSL_STM32_CUBEMX */
 
-    /* authTag may be shorter than AES_BLOCK_SZ, store separately */
-    if (ret == 0)
-    	XMEMCPY(authTag, tag, authTagSz);
-
-    /* We only allocate extra memory if authInPadded is not a multiple of AES_BLOCK_SZ */
-    if (authInPadded != NULL && authInSz != authPadSz) {
+    if (ret == 0) {
+        /* return authTag */
+        if (authTag) {
+            /* STM32 GCM won't compute Auth correctly for partial or
+                when IV != 12, so use software here */
+            if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) {
+                DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */
+                GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
+                wc_AesEncrypt(aes, ctr, tag);
+                xorbuf(authTag, tag, authTagSz);
+            }
+            else {
+                XMEMCPY(authTag, tag, authTagSz);
+            }
+        }
+    }
+
+    /* Free memory if not a multiple of AES_BLOCK_SZ */
+    if (authInPadded != authIn) {
         XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
     }
 
+    wolfSSL_CryptHwMutexUnLock();
+
     return ret;
 }
-#endif /* STM32_CRYPTO */
+
+#endif /* STM32_CRYPTO_AES_GCM */
 
 #ifdef WOLFSSL_AESNI
 int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
@@ -8453,21 +6119,31 @@
     byte initialCounter[AES_BLOCK_SIZE];
     byte *ctr;
     byte scratch[AES_BLOCK_SIZE];
-
+#ifdef OPENSSL_EXTRA
+    word32 aadTemp;
+#endif
     ctr = counter;
     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
+    XMEMSET(scratch, 0, AES_BLOCK_SIZE);
     if (ivSz == GCM_NONCE_MID_SZ) {
         XMEMCPY(initialCounter, iv, ivSz);
         initialCounter[AES_BLOCK_SIZE - 1] = 1;
     }
     else {
+#ifdef OPENSSL_EXTRA
+        aadTemp = aes->aadLen;
+        aes->aadLen = 0;
+#endif
         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
+#ifdef OPENSSL_EXTRA
+        aes->aadLen = aadTemp;
+#endif
     }
     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
 
 #ifdef WOLFSSL_PIC32MZ_CRYPT
     if (blocks) {
-        /* use intitial IV for PIC32 HW, but don't use it below */
+        /* use initial IV for HW, but don't use it below */
         XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE);
 
         ret = wc_Pic32AesCrypt(
@@ -8483,7 +6159,7 @@
 #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT)
     /* some hardware acceleration can gain performance from doing AES encryption
      * of the whole buffer at once */
-    if (c != p) { /* can not handle inline encryption */
+    if (c != p && blocks > 0) { /* can not handle inline encryption */
         while (blocks--) {
             IncrementGcmCounter(ctr);
             XMEMCPY(c, ctr, AES_BLOCK_SIZE);
@@ -8497,11 +6173,11 @@
         p += AES_BLOCK_SIZE * blocks;
     }
     else
-#endif /* HAVE_AES_ECB */
+#endif /* HAVE_AES_ECB && !WOLFSSL_PIC32MZ_CRYPT */
 
     while (blocks--) {
         IncrementGcmCounter(ctr);
-    #ifndef WOLFSSL_PIC32MZ_CRYPT
+    #if !defined(WOLFSSL_PIC32MZ_CRYPT)
         wc_AesEncrypt(aes, ctr, scratch);
         xorbuf(scratch, p, AES_BLOCK_SIZE);
         XMEMCPY(c, scratch, AES_BLOCK_SIZE);
@@ -8516,21 +6192,28 @@
         xorbuf(scratch, p, partial);
         XMEMCPY(c, scratch, partial);
     }
-
-    GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
-    wc_AesEncrypt(aes, initialCounter, scratch);
-    xorbuf(authTag, scratch, authTagSz);
+    if (authTag) {
+        GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
+        wc_AesEncrypt(aes, initialCounter, scratch);
+        xorbuf(authTag, scratch, authTagSz);
+#ifdef OPENSSL_EXTRA
+        if (!in && !sz)
+            /* store AAD size for next call */
+            aes->aadLen = authInSz;
+#endif
+    }
 
     return ret;
 }
 
+/* Software AES - GCM Encrypt */
 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
                    const byte* iv, word32 ivSz,
                    byte* authTag, word32 authTagSz,
                    const byte* authIn, word32 authInSz)
 {
     /* argument checks */
-    if (aes == NULL || authTagSz > AES_BLOCK_SIZE) {
+    if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) {
         return BAD_FUNC_ARG;
     }
 
@@ -8539,27 +6222,13 @@
         return BAD_FUNC_ARG;
     }
 
-#if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \
-                              defined(WOLFSSL_STM32F7) || \
-                              defined(WOLFSSL_STM32L4))
-
-    /* additional argument checks - STM32 HW only supports 12 byte IV */
-    if (ivSz != GCM_NONCE_MID_SZ) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* STM32 HW AES-GCM requires / assumes inputs are a multiple of block size.
-     * We can avoid this by zero padding (authIn) AAD, but zero-padded plaintext
-     * will be encrypted and output incorrectly, causing a bad authTag.
-     * We will use HW accelerated AES-GCM if plain%AES_BLOCK_SZ==0.
-     * Otherwise, we will use accelerated AES_CTR for encrypt, and then
-     * perform GHASH in software.
-     * See NIST SP 800-38D */
-
-    /* Plain text is a multiple of block size, so use HW-Accelerated AES_GCM */
-    if (sz % AES_BLOCK_SIZE == 0) {
-        return wc_AesGcmEncrypt_STM32(aes, out, in, sz, iv, ivSz,
-                                      authTag, authTagSz, authIn, authInSz);
+#ifdef WOLF_CRYPTO_CB
+    if (aes->devId != INVALID_DEVID) {
+        int ret = wc_CryptoCb_AesGcmEncrypt(aes, out, in, sz, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
     }
 #endif
 
@@ -8572,13 +6241,13 @@
         #ifdef HAVE_CAVIUM_V
         if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */
             return NitroxAesGcmEncrypt(aes, out, in, sz,
-                (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
+                (const byte*)aes->devKey, aes->keylen, iv, ivSz,
                 authTag, authTagSz, authIn, authInSz);
         }
         #endif
     #elif defined(HAVE_INTEL_QA)
         return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz,
-            (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
+            (const byte*)aes->devKey, aes->keylen, iv, ivSz,
             authTag, authTagSz, authIn, authInSz);
     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
         if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) {
@@ -8599,7 +6268,17 @@
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    /* Software AES-GCM */
+#ifdef STM32_CRYPTO_AES_GCM
+    /* The STM standard peripheral library API's doesn't support partial blocks */
+    #ifdef STD_PERI_LIB
+    if (partial == 0)
+    #endif
+    {
+        return wc_AesGcmEncrypt_STM32(
+            aes, out, in, sz, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+    }
+#endif /* STM32_CRYPTO_AES_GCM */
 
 #ifdef WOLFSSL_AESNI
     #ifdef HAVE_INTEL_AVX2
@@ -8633,6 +6312,8 @@
 #endif
 
 
+
+/* AES GCM Decrypt */
 #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT)
 #ifdef FREESCALE_LTC_AES_GCM
 int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
@@ -8645,8 +6326,12 @@
     status_t status;
 
     /* argument checks */
-    if (aes == NULL || out == NULL || in == NULL || iv == NULL ||
-        authTag == NULL || authTagSz > AES_BLOCK_SIZE) {
+    /* If the sz is non-zero, both in and out must be set. If sz is 0,
+     * in and out are don't cares, as this is is the GMAC case. */
+    if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
+        authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
+        ivSz == 0) {
+
         return BAD_FUNC_ARG;
     }
 
@@ -8660,72 +6345,66 @@
 
     return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E;
 }
-#elif defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \
-                                defined(WOLFSSL_STM32F7) || \
-                                defined(WOLFSSL_STM32L4))
-int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
-                   const byte* iv, word32 ivSz,
-                   const byte* authTag, word32 authTagSz,
-                   const byte* authIn, word32 authInSz)
+
+#else
+
+#ifdef STM32_CRYPTO_AES_GCM
+/* this function supports inline decrypt */
+static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
+                                  const byte* in, word32 sz,
+                                  const byte* iv, word32 ivSz,
+                                  const byte* authTag, word32 authTagSz,
+                                  const byte* authIn, word32 authInSz)
 {
     int ret;
+#ifdef WOLFSSL_STM32_CUBEMX
+    CRYP_HandleTypeDef hcryp;
+#else
+    word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)];
+#endif
     word32 keySize;
-    #ifdef WOLFSSL_STM32_CUBEMX
-        CRYP_HandleTypeDef hcryp;
-    #else
-        byte keyCopy[AES_BLOCK_SIZE * 2];
-    #endif /* WOLFSSL_STM32_CUBEMX */
-    int  status;
-    int  inPadSz, authPadSz;
+    int status = HAL_OK;
+    word32 blocks = sz / AES_BLOCK_SIZE;
+    word32 partial = sz % AES_BLOCK_SIZE;
     byte tag[AES_BLOCK_SIZE];
-    byte *inPadded = NULL;
-    byte *authInPadded = NULL;
-    byte initialCounter[AES_BLOCK_SIZE];
-
-    /* argument checks */
-    if (aes == NULL || out == NULL || in == NULL || iv == NULL ||
-        authTag == NULL || authTagSz > AES_BLOCK_SIZE) {
-        return BAD_FUNC_ARG;
-    }
+    byte partialBlock[AES_BLOCK_SIZE];
+    byte ctr[AES_BLOCK_SIZE];
+    byte* authInPadded = NULL;
+    int authPadSz;
 
     ret = wc_AesGetKeySize(aes, &keySize);
+    if (ret != 0)
+        return ret;
+
+#ifdef WOLFSSL_STM32_CUBEMX
+    ret = wc_Stm32_Aes_Init(aes, &hcryp);
+    if (ret != 0)
+        return ret;
+#endif
+
+    ret = wolfSSL_CryptHwMutexLock();
     if (ret != 0) {
         return ret;
     }
 
-    /* additional argument checks - STM32 HW only supports 12 byte IV */
-    if (ivSz != GCM_NONCE_MID_SZ) {
-        return BAD_FUNC_ARG;
-    }
-
-    XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
-    XMEMCPY(initialCounter, iv, ivSz);
-    initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START;
-
-    /* Need to pad the AAD and input cipher text to a full block size since
-     * CRYP_AES_GCM will assume these are a multiple of AES_BLOCK_SIZE.
-     * It is okay to pad with zeros because GCM does this before GHASH already.
-     * See NIST SP 800-38D */
-
-    if ((sz % AES_BLOCK_SIZE) > 0) {
-        inPadSz = ((sz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
-        inPadded = XMALLOC(inPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        if (inPadded == NULL) {
-            return MEMORY_E;
-        }
-        XMEMSET(inPadded, 0, inPadSz);
-        XMEMCPY(inPadded, in, sz);
-    } else {
-        inPadSz = sz;
-        inPadded = (byte*)in;
-    }
-
-    if ((authInSz % AES_BLOCK_SIZE) > 0) {
+    XMEMSET(ctr, 0, AES_BLOCK_SIZE);
+    if (ivSz == GCM_NONCE_MID_SZ) {
+        XMEMCPY(ctr, iv, ivSz);
+        ctr[AES_BLOCK_SIZE - 1] = 1;
+    }
+    else {
+        GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE);
+    }
+    /* Hardware requires counter + 1 */
+    IncrementGcmCounter(ctr);
+
+    if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) {
+        /* Need to pad the AAD to a full block with zeros. */
         authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
-        authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        authInPadded = (byte*)XMALLOC(authPadSz, aes->heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
         if (authInPadded == NULL) {
-            if (inPadded != NULL && inPadSz != sz)
-                XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            wolfSSL_CryptHwMutexUnLock();
             return MEMORY_E;
         }
         XMEMSET(authInPadded, 0, authPadSz);
@@ -8736,30 +6415,11 @@
     }
 
 #ifdef WOLFSSL_STM32_CUBEMX
-    XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-    switch(keySize) {
-        case 16: /* 128-bit key */
-            hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
-            break;
-#ifdef CRYP_KEYSIZE_192B
-        case 24: /* 192-bit key */
-            hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
-            break;
-#endif
-        case 32: /* 256-bit key */
-            hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
-            break;
-        default:
-            break;
-    }
-    hcryp.Instance = CRYP;
-    hcryp.Init.DataType = CRYP_DATATYPE_8B;
-    hcryp.Init.pKey = (byte*)aes->key;
-    hcryp.Init.pInitVect = initialCounter;
-    hcryp.Init.Header = authInPadded;
+    hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr;
+    hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded;
     hcryp.Init.HeaderSize = authInSz;
 
-#ifdef WOLFSSL_STM32L4
+#ifdef STM32_CRYPTO_AES_ONLY
     /* Set the CRYP parameters */
     hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_GCM_GMAC;
     hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT;
@@ -8770,29 +6430,61 @@
     status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
     if (status == HAL_OK) {
         /* GCM header phase */
-        hcryp.Init.GCMCMACPhase  = CRYP_HEADER_PHASE;
+        hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE;
         status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
-        if (status == HAL_OK) {
-            /* GCM payload phase */
-            hcryp.Init.GCMCMACPhase  = CRYP_PAYLOAD_PHASE;
-            status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)inPadded, sz, inPadded,
-                STM32_HAL_TIMEOUT);
-            if (status == HAL_OK) {
-                /* GCM final phase */
-                hcryp.Init.GCMCMACPhase  = CRYP_FINAL_PHASE;
-                status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag,
-                    STM32_HAL_TIMEOUT);
-            }
-        }
+    }
+    if (status == HAL_OK) {
+        /* GCM payload phase - blocks */
+        hcryp.Init.GCMCMACPhase  = CRYP_PAYLOAD_PHASE;
+        if (blocks) {
+            status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in,
+                (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+        }
+    }
+    if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+        /* GCM payload phase - partial remainder */
+        XMEMSET(partialBlock, 0, sizeof(partialBlock));
+        XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+        status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial,
+            partialBlock, STM32_HAL_TIMEOUT);
+        XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+    }
+    if (status == HAL_OK) {
+        /* GCM final phase */
+        hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE;
+        status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
+    }
+#elif defined(STM32_HAL_V2)
+    hcryp.Init.Algorithm = CRYP_AES_GCM;
+    ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE);
+    hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock;
+    HAL_CRYP_Init(&hcryp);
+
+    /* GCM payload phase - can handle partial blocks */
+    status = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in,
+        (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT);
+    if (status == HAL_OK) {
+        /* Compute the authTag */
+        status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag,
+            STM32_HAL_TIMEOUT);
     }
 #else
     HAL_CRYP_Init(&hcryp);
-    /* Use inPadded for output buffer instead of
-    * out so that we don't overflow our size. */
-    status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)inPadded,
-                                    sz, inPadded, STM32_HAL_TIMEOUT);
-    /* Compute the authTag */
+    if (blocks) {
+        /* GCM payload phase - blocks */
+        status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)in,
+            (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+    }
+    if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+        /* GCM payload phase - partial remainder */
+        XMEMSET(partialBlock, 0, sizeof(partialBlock));
+        XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+        status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, partialBlock, partial,
+            partialBlock, STM32_HAL_TIMEOUT);
+        XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+    }
     if (status == HAL_OK) {
+        /* Compute the authTag */
         status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
     }
 #endif
@@ -8801,37 +6493,46 @@
         ret = AES_GCM_AUTH_E;
 
     HAL_CRYP_DeInit(&hcryp);
-#else
-    ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize);
+
+#else /* STD_PERI_LIB */
+    ByteReverseWords(keyCopy, (word32*)aes->key, aes->keylen);
 
     /* Input size and auth size need to be the actual sizes, even though
      * they are not block aligned, because this length (in bits) is used
-     * in the final GHASH. Use inPadded for output buffer instead of
-     * out so that we don't overflow our size.                         */
-    status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)initialCounter,
-                         (uint8_t*)keyCopy,     keySize * 8,
-                         (uint8_t*)inPadded,    sz,
-                         (uint8_t*)authInPadded,authInSz,
-                         (uint8_t*)inPadded,    tag);
+     * in the final GHASH. */
+    status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)ctr,
+                         (uint8_t*)keyCopy,      keySize * 8,
+                         (uint8_t*)in,           sz,
+                         (uint8_t*)authInPadded, authInSz,
+                         (uint8_t*)out,          tag);
     if (status != SUCCESS)
         ret = AES_GCM_AUTH_E;
 #endif /* WOLFSSL_STM32_CUBEMX */
 
-    if (ret == 0 && ConstantCompare(authTag, tag, authTagSz) == 0) {
-        /* Only keep the decrypted data if authTag success. */
-        XMEMCPY(out, inPadded, sz);
-        ret = 0; /* success */
-    }
-
-    /* only allocate padding buffers if the inputs are not a multiple of block sz */
-    if (inPadded != NULL && inPadSz != sz)
-        XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (authInPadded != NULL && authPadSz != authInSz)
+    /* STM32 GCM hardware only supports IV of 12 bytes, so use software for auth */
+    if (sz == 0 || ivSz != GCM_NONCE_MID_SZ) {
+        DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */
+        GHASH(aes, authIn, authInSz, in, sz, tag, sizeof(tag));
+        wc_AesEncrypt(aes, ctr, partialBlock);
+        xorbuf(tag, partialBlock, sizeof(tag));
+    }
+
+    if (ConstantCompare(authTag, tag, authTagSz) != 0) {
+        ret = AES_GCM_AUTH_E;
+    }
+
+    /* Free memory if not a multiple of AES_BLOCK_SZ */
+    if (authInPadded != authIn) {
         XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    wolfSSL_CryptHwMutexUnLock();
 
     return ret;
 }
-#else
+
+#endif /* STM32_CRYPTO_AES_GCM */
+
 #ifdef WOLFSSL_AESNI
 int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
                       const byte* iv, word32 ivSz,
@@ -8856,15 +6557,24 @@
     byte scratch[AES_BLOCK_SIZE];
     byte Tprime[AES_BLOCK_SIZE];
     byte EKY0[AES_BLOCK_SIZE];
+#ifdef OPENSSL_EXTRA
+    word32 aadTemp;
+#endif
     ctr = counter;
-
     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
     if (ivSz == GCM_NONCE_MID_SZ) {
         XMEMCPY(initialCounter, iv, ivSz);
         initialCounter[AES_BLOCK_SIZE - 1] = 1;
     }
     else {
+#ifdef OPENSSL_EXTRA
+        aadTemp = aes->aadLen;
+        aes->aadLen = 0;
+#endif
         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
+#ifdef OPENSSL_EXTRA
+        aes->aadLen = aadTemp;
+#endif
     }
     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
 
@@ -8873,13 +6583,20 @@
     wc_AesEncrypt(aes, ctr, EKY0);
     xorbuf(Tprime, EKY0, sizeof(Tprime));
 
+#ifdef OPENSSL_EXTRA
+    if (!out) {
+        /* authenticated, non-confidential data */
+        /* store AAD size for next call */
+        aes->aadLen = authInSz;
+    }
+#endif
     if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
         return AES_GCM_AUTH_E;
     }
 
-#ifdef WOLFSSL_PIC32MZ_CRYPT
+#if defined(WOLFSSL_PIC32MZ_CRYPT)
     if (blocks) {
-        /* use intitial IV for PIC32 HW, but don't use it below */
+        /* use initial IV for HW, but don't use it below */
         XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE);
 
         ret = wc_Pic32AesCrypt(
@@ -8895,7 +6612,7 @@
 #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT)
     /* some hardware acceleration can gain performance from doing AES encryption
      * of the whole buffer at once */
-    if (c != p) { /* can not handle inline decryption */
+    if (c != p && blocks > 0) { /* can not handle inline decryption */
         while (blocks--) {
             IncrementGcmCounter(ctr);
             XMEMCPY(p, ctr, AES_BLOCK_SIZE);
@@ -8904,15 +6621,16 @@
 
         /* reset number of blocks and then do encryption */
         blocks = sz / AES_BLOCK_SIZE;
+
         wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks);
         xorbuf(out, c, AES_BLOCK_SIZE * blocks);
         c += AES_BLOCK_SIZE * blocks;
     }
     else
-#endif /* HAVE_AES_ECB */
+#endif /* HAVE_AES_ECB && !PIC32MZ */
     while (blocks--) {
         IncrementGcmCounter(ctr);
-    #ifndef WOLFSSL_PIC32MZ_CRYPT
+    #if !defined(WOLFSSL_PIC32MZ_CRYPT)
         wc_AesEncrypt(aes, ctr, scratch);
         xorbuf(scratch, c, AES_BLOCK_SIZE);
         XMEMCPY(p, scratch, AES_BLOCK_SIZE);
@@ -8931,24 +6649,36 @@
     return ret;
 }
 
+/* Software AES - GCM Decrypt */
 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
                      const byte* iv, word32 ivSz,
                      const byte* authTag, word32 authTagSz,
                      const byte* authIn, word32 authInSz)
 {
 #ifdef WOLFSSL_AESNI
-    int res;
+    int res = AES_GCM_AUTH_E;
 #endif
 
     /* argument checks */
     /* If the sz is non-zero, both in and out must be set. If sz is 0,
      * in and out are don't cares, as this is is the GMAC case. */
     if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
-        authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0) {
+        authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
+        ivSz == 0) {
 
         return BAD_FUNC_ARG;
     }
 
+#ifdef WOLF_CRYPTO_CB
+    if (aes->devId != INVALID_DEVID) {
+        int ret = wc_CryptoCb_AesGcmDecrypt(aes, out, in, sz, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
+    }
+#endif
+
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
     /* if async and byte count above threshold */
     /* only 12-byte IV is supported in HW */
@@ -8958,13 +6688,13 @@
         #ifdef HAVE_CAVIUM_V
         if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */
             return NitroxAesGcmDecrypt(aes, out, in, sz,
-                (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
+                (const byte*)aes->devKey, aes->keylen, iv, ivSz,
                 authTag, authTagSz, authIn, authInSz);
         }
         #endif
     #elif defined(HAVE_INTEL_QA)
         return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz,
-            (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
+            (const byte*)aes->devKey, aes->keylen, iv, ivSz,
             authTag, authTagSz, authIn, authInSz);
     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
         if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) {
@@ -8985,7 +6715,17 @@
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    /* software AES GCM */
+#ifdef STM32_CRYPTO_AES_GCM
+    /* The STM standard peripheral library API's doesn't support partial blocks */
+    #ifdef STD_PERI_LIB
+    if (partial == 0)
+    #endif
+    {
+        return wc_AesGcmDecrypt_STM32(
+            aes, out, in, sz, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+    }
+#endif /* STM32_CRYPTO_AES_GCM */
 
 #ifdef WOLFSSL_AESNI
     #ifdef HAVE_INTEL_AVX2
@@ -9024,7 +6764,7 @@
 }
 #endif
 #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
-#endif /* (WOLFSSL_XILINX_CRYPT) */
+#endif /* WOLFSSL_XILINX_CRYPT */
 #endif /* end of block for AESGCM implementation selection */
 
 
@@ -9124,7 +6864,8 @@
                                (byte*)aes->reg, ivOutSz,
                                authTag, authTagSz,
                                authIn, authInSz);
-        IncCtr((byte*)aes->reg, ivOutSz);
+        if (ret == 0)
+            IncCtr((byte*)aes->reg, ivOutSz);
     }
 
     return ret;
@@ -9135,21 +6876,24 @@
             byte* authTag, word32 authTagSz, WC_RNG* rng)
 {
     Aes aes;
-    int ret = 0;
+    int ret;
 
     if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) ||
         authTag == NULL || authTagSz == 0 || rng == NULL) {
 
-        ret = BAD_FUNC_ARG;
-    }
-
-    if (ret == 0)
+        return BAD_FUNC_ARG;
+    }
+
+    ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+    if (ret == 0) {
         ret = wc_AesGcmSetKey(&aes, key, keySz);
-    if (ret == 0)
-        ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng);
-    if (ret == 0)
-        ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz,
+        if (ret == 0)
+            ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng);
+        if (ret == 0)
+            ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz,
                                   authTag, authTagSz, authIn, authInSz);
+        wc_AesFree(&aes);
+    }
     ForceZero(&aes, sizeof(aes));
 
     return ret;
@@ -9160,22 +6904,36 @@
                   const byte* authIn, word32 authInSz,
                   const byte* authTag, word32 authTagSz)
 {
+    int ret;
+#ifndef NO_AES_DECRYPT
     Aes aes;
-    int ret = 0;
 
     if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) ||
         authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) {
 
-        ret = BAD_FUNC_ARG;
-    }
-
-    if (ret == 0)
+        return BAD_FUNC_ARG;
+    }
+
+    ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+    if (ret == 0) {
         ret = wc_AesGcmSetKey(&aes, key, keySz);
-    if (ret == 0)
-        ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz,
+        if (ret == 0)
+            ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz,
                                   authTag, authTagSz, authIn, authInSz);
+        wc_AesFree(&aes);
+    }
     ForceZero(&aes, sizeof(aes));
-
+#else
+    (void)key;
+    (void)keySz;
+    (void)iv;
+    (void)ivSz;
+    (void)authIn;
+    (void)authInSz;
+    (void)authTag;
+    (void)authTagSz;
+    ret = NOT_COMPILED_IN;
+#endif
     return ret;
 }
 
@@ -9286,10 +7044,9 @@
 }
 #endif /* HAVE_AES_DECRYPT */
 
-
-/* software AES CCM */
 #else
 
+/* Software CCM */
 static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out)
 {
     /* process the bulk of the data */
@@ -9362,14 +7119,60 @@
     }
 }
 
+#ifdef WOLFSSL_AESNI
+static WC_INLINE void AesCcmCtrIncSet4(byte* B, word32 lenSz)
+{
+    word32 i;
+
+    /* B+1 = B */
+    XMEMCPY(B + AES_BLOCK_SIZE * 1, B, AES_BLOCK_SIZE);
+    /* B+2,B+3 = B,B+1 */
+    XMEMCPY(B + AES_BLOCK_SIZE * 2, B, AES_BLOCK_SIZE * 2);
+
+    for (i = 0; i < lenSz; i++) {
+        if (++B[AES_BLOCK_SIZE * 1 - 1 - i] != 0) break;
+    }
+    B[AES_BLOCK_SIZE * 2 - 1] += 2;
+    if (B[AES_BLOCK_SIZE * 2 - 1] < 2) {
+        for (i = 1; i < lenSz; i++) {
+            if (++B[AES_BLOCK_SIZE * 2 - 1 - i] != 0) break;
+        }
+    }
+    B[AES_BLOCK_SIZE * 3 - 1] += 3;
+    if (B[AES_BLOCK_SIZE * 3 - 1] < 3) {
+        for (i = 1; i < lenSz; i++) {
+            if (++B[AES_BLOCK_SIZE * 3 - 1 - i] != 0) break;
+        }
+    }
+}
+
+static WC_INLINE void AesCcmCtrInc4(byte* B, word32 lenSz)
+{
+    word32 i;
+
+    B[AES_BLOCK_SIZE - 1] += 4;
+    if (B[AES_BLOCK_SIZE - 1] < 4) {
+        for (i = 1; i < lenSz; i++) {
+            if (++B[AES_BLOCK_SIZE - 1 - i] != 0) break;
+        }
+    }
+}
+#endif
+
+/* Software AES - CCM Encrypt */
 /* return 0 on success */
 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
                    const byte* nonce, word32 nonceSz,
                    byte* authTag, word32 authTagSz,
                    const byte* authIn, word32 authInSz)
 {
+#ifndef WOLFSSL_AESNI
     byte A[AES_BLOCK_SIZE];
     byte B[AES_BLOCK_SIZE];
+#else
+    ALIGN128 byte A[AES_BLOCK_SIZE * 4];
+    ALIGN128 byte B[AES_BLOCK_SIZE * 4];
+#endif
     byte lenSz;
     word32 i;
     byte mask = 0xFF;
@@ -9381,6 +7184,7 @@
             authTagSz > AES_BLOCK_SIZE)
         return BAD_FUNC_ARG;
 
+    XMEMSET(A, 0, sizeof(A));
     XMEMCPY(B+1, nonce, nonceSz);
     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
     B[0] = (authInSz > 0 ? 64 : 0)
@@ -9407,6 +7211,26 @@
     xorbuf(authTag, A, authTagSz);
 
     B[15] = 1;
+#ifdef WOLFSSL_AESNI
+    if (haveAESNI && aes->use_aesni) {
+        while (inSz >= AES_BLOCK_SIZE * 4) {
+            AesCcmCtrIncSet4(B, lenSz);
+
+            AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
+                            aes->rounds);
+            xorbuf(A, in, AES_BLOCK_SIZE * 4);
+            XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
+
+            inSz -= AES_BLOCK_SIZE * 4;
+            in += AES_BLOCK_SIZE * 4;
+            out += AES_BLOCK_SIZE * 4;
+
+            if (inSz < AES_BLOCK_SIZE * 4) {
+                AesCcmCtrInc4(B, lenSz);
+            }
+        }
+    }
+#endif
     while (inSz >= AES_BLOCK_SIZE) {
         wc_AesEncrypt(aes, B, A);
         xorbuf(A, in, AES_BLOCK_SIZE);
@@ -9430,13 +7254,19 @@
 }
 
 #ifdef HAVE_AES_DECRYPT
+/* Software AES - CCM Decrypt */
 int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
                    const byte* nonce, word32 nonceSz,
                    const byte* authTag, word32 authTagSz,
                    const byte* authIn, word32 authInSz)
 {
+#ifndef WOLFSSL_AESNI
     byte A[AES_BLOCK_SIZE];
     byte B[AES_BLOCK_SIZE];
+#else
+    ALIGN128 byte B[AES_BLOCK_SIZE * 4];
+    ALIGN128 byte A[AES_BLOCK_SIZE * 4];
+#endif
     byte* o;
     byte lenSz;
     word32 i, oSz;
@@ -9460,6 +7290,26 @@
         B[AES_BLOCK_SIZE - 1 - i] = 0;
     B[15] = 1;
 
+#ifdef WOLFSSL_AESNI
+    if (haveAESNI && aes->use_aesni) {
+        while (oSz >= AES_BLOCK_SIZE * 4) {
+            AesCcmCtrIncSet4(B, lenSz);
+
+            AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
+                            aes->rounds);
+            xorbuf(A, in, AES_BLOCK_SIZE * 4);
+            XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
+
+            oSz -= AES_BLOCK_SIZE * 4;
+            in += AES_BLOCK_SIZE * 4;
+            o += AES_BLOCK_SIZE * 4;
+
+            if (oSz < AES_BLOCK_SIZE * 4) {
+                AesCcmCtrInc4(B, lenSz);
+            }
+        }
+    }
+#endif
     while (oSz >= AES_BLOCK_SIZE) {
         wc_AesEncrypt(aes, B, A);
         xorbuf(A, in, AES_BLOCK_SIZE);
@@ -9521,7 +7371,7 @@
 }
 
 #endif /* HAVE_AES_DECRYPT */
-#endif /* software AES CCM */
+#endif /* software CCM */
 
 /* abstract functions that call lower level AESCCM functions */
 #ifndef WC_NO_RNG
@@ -9579,8 +7429,10 @@
                                (byte*)aes->reg, aes->nonceSz,
                                authTag, authTagSz,
                                authIn, authInSz);
-        XMEMCPY(ivOut, aes->reg, aes->nonceSz);
-        IncCtr((byte*)aes->reg, aes->nonceSz);
+        if (ret == 0) {
+            XMEMCPY(ivOut, aes->reg, aes->nonceSz);
+            IncCtr((byte*)aes->reg, aes->nonceSz);
+        }
     }
 
     return ret;
@@ -9601,15 +7453,57 @@
 
     aes->heap = heap;
 
+#ifdef WOLF_CRYPTO_CB
+    aes->devId = devId;
+    aes->devCtx = NULL;
+#else
+    (void)devId;
+#endif
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
     ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES,
                                                         aes->heap, devId);
-#else
-    (void)devId;
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
+#ifdef WOLFSSL_AFALG
+    aes->alFd = -1;
+    aes->rdFd = -1;
+#endif
+#if defined(WOLFSSL_DEVCRYPTO) && \
+   (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+    aes->ctx.cfd = -1;
+#endif
+#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+    XMEMSET(&aes->ctx, 0, sizeof(aes->ctx));
+#endif
+#ifdef HAVE_AESGCM
+#ifdef OPENSSL_EXTRA
+    XMEMSET(aes->aadH, 0, sizeof(aes->aadH));
+    aes->aadLen = 0;
+#endif
+#endif
+    return ret;
+}
+
+#ifdef HAVE_PKCS11
+int  wc_AesInit_Id(Aes* aes, unsigned char* id, int len, void* heap, int devId)
+{
+    int ret = 0;
+
+    if (aes == NULL)
+        ret = BAD_FUNC_ARG;
+    if (ret == 0 && (len < 0 || len > AES_MAX_ID_LEN))
+        ret = BUFFER_E;
+
+    if (ret == 0)
+        ret  = wc_AesInit(aes, heap, devId);
+    if (ret == 0) {
+        XMEMCPY(aes->id, id, len);
+        aes->idLen = len;
+    }
+
     return ret;
 }
+#endif
 
 /* Free Aes from use with async hardware */
 void wc_AesFree(Aes* aes)
@@ -9620,6 +7514,23 @@
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
     wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES)
+    if (aes->rdFd > 0) { /* negative is error case */
+        close(aes->rdFd);
+    }
+    if (aes->alFd > 0) {
+        close(aes->alFd);
+    }
+#endif /* WOLFSSL_AFALG */
+#if defined(WOLFSSL_DEVCRYPTO) && \
+    (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+    wc_DevCryptoFree(&aes->ctx);
+#endif
+#if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \
+    (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \
+    (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES))
+    ForceZero((byte*)aes->devKey, AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE);
+#endif
 }
 
 
@@ -9630,23 +7541,26 @@
     if (aes == NULL || keySize == NULL) {
         return BAD_FUNC_ARG;
     }
-
+#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+    *keySize = aes->ctx.key.keySize;
+    return ret;
+#endif
     switch (aes->rounds) {
-    #ifdef WOLFSSL_AES_128
+#ifdef WOLFSSL_AES_128
     case 10:
         *keySize = 16;
         break;
-    #endif
-    #ifdef WOLFSSL_AES_192
+#endif
+#ifdef WOLFSSL_AES_192
     case 12:
         *keySize = 24;
         break;
-    #endif
-    #ifdef WOLFSSL_AES_256
+#endif
+#ifdef WOLFSSL_AES_256
     case 14:
         *keySize = 32;
         break;
-    #endif
+#endif
     default:
         *keySize = 0;
         ret = BAD_FUNC_ARG;
@@ -9660,9 +7574,36 @@
 #ifdef HAVE_AES_ECB
 #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
     /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
+
+#elif defined(WOLFSSL_AFALG)
+    /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+    /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+
+/* Software AES - ECB */
+int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    if ((in == NULL) || (out == NULL) || (aes == NULL))
+        return BAD_FUNC_ARG;
+
+        return AES_ECB_encrypt(aes, in, out, sz);
+}
+
+
+int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    if ((in == NULL) || (out == NULL) || (aes == NULL))
+        return BAD_FUNC_ARG;
+
+        return AES_ECB_decrypt(aes, in, out, sz);
+}
+
 #else
 
-/* software implementation */
+/* Software AES - ECB */
 int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
 {
     word32 blocks = sz / AES_BLOCK_SIZE;
@@ -9698,44 +7639,66 @@
 #endif
 #endif /* HAVE_AES_ECB */
 
-#ifdef WOLFSSL_AES_CFB
-/* CFB 128
+#if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_OFB)
+/* Feedback AES mode
  *
  * aes structure holding key to use for encryption
  * out buffer to hold result of encryption (must be at least as large as input
  *     buffer)
  * in  buffer to encrypt
  * sz  size of input buffer
+ * mode flag to specify AES mode
  *
  * returns 0 on success and negative error values on failure
  */
-int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+/* Software AES - CFB Encrypt */
+static int wc_AesFeedbackEncrypt(Aes* aes, byte* out, const byte* in,
+        word32 sz, byte mode)
 {
     byte*  tmp = NULL;
+#ifdef WOLFSSL_AES_CFB
     byte*  reg = NULL;
-
-    WOLFSSL_ENTER("wc_AesCfbEncrypt");
+#endif
 
     if (aes == NULL || out == NULL || in == NULL) {
         return BAD_FUNC_ARG;
     }
 
+#ifdef WOLFSSL_AES_CFB
     if (aes->left && sz) {
         reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left;
     }
+#endif
 
     /* consume any unused bytes left in aes->tmp */
     tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
     while (aes->left && sz) {
-        *(out++) = *(reg++) = *(in++) ^ *(tmp++);
+        *(out) = *(in++) ^ *(tmp++);
+    #ifdef WOLFSSL_AES_CFB
+        if (mode == AES_CFB_MODE) {
+            *(reg++) = *out;
+        }
+    #endif
+        out++;
         aes->left--;
         sz--;
     }
 
     while (sz >= AES_BLOCK_SIZE) {
-        wc_AesEncryptDirect(aes, out, (byte*)aes->reg);
-        xorbuf(out, in, AES_BLOCK_SIZE);
-        XMEMCPY(aes->reg, out, AES_BLOCK_SIZE);
+        /* Using aes->tmp here for inline case i.e. in=out */
+        wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+    #ifdef WOLFSSL_AES_OFB
+        if (mode == AES_OFB_MODE) {
+            XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+        }
+    #endif
+        xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE);
+    #ifdef WOLFSSL_AES_CFB
+        if (mode == AES_CFB_MODE) {
+            XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+        }
+    #endif
+        XMEMCPY(out, aes->tmp, AES_BLOCK_SIZE);
         out += AES_BLOCK_SIZE;
         in  += AES_BLOCK_SIZE;
         sz  -= AES_BLOCK_SIZE;
@@ -9747,10 +7710,23 @@
         wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
         aes->left = AES_BLOCK_SIZE;
         tmp = (byte*)aes->tmp;
+    #ifdef WOLFSSL_AES_OFB
+        if (mode == AES_OFB_MODE) {
+            XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+        }
+    #endif
+    #ifdef WOLFSSL_AES_CFB
         reg = (byte*)aes->reg;
+    #endif
 
         while (sz--) {
-            *(out++) = *(reg++) = *(in++) ^ *(tmp++);
+            *(out) = *(in++) ^ *(tmp++);
+        #ifdef WOLFSSL_AES_CFB
+            if (mode == AES_CFB_MODE) {
+                *(reg++) = *out;
+            }
+        #endif
+            out++;
             aes->left--;
         }
     }
@@ -9770,21 +7746,23 @@
  *
  * returns 0 on success and negative error values on failure
  */
-int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+/* Software AES - CFB Decrypt */
+static int wc_AesFeedbackDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+        byte mode)
 {
     byte*  tmp;
 
-    WOLFSSL_ENTER("wc_AesCfbDecrypt");
-
     if (aes == NULL || out == NULL || in == NULL) {
         return BAD_FUNC_ARG;
     }
 
+    #ifdef WOLFSSL_AES_CFB
     /* check if more input needs copied over to aes->reg */
-    if (aes->left && sz) {
+    if (aes->left && sz && mode == AES_CFB_MODE) {
         int size = min(aes->left, sz);
         XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size);
     }
+    #endif
 
     /* consume any unused bytes left in aes->tmp */
     tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
@@ -9795,9 +7773,20 @@
     }
 
     while (sz > AES_BLOCK_SIZE) {
-        wc_AesEncryptDirect(aes, out, (byte*)aes->reg);
-        xorbuf(out, in, AES_BLOCK_SIZE);
-        XMEMCPY(aes->reg, in, AES_BLOCK_SIZE);
+        /* Using aes->tmp here for inline case i.e. in=out */
+        wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+    #ifdef WOLFSSL_AES_OFB
+        if (mode == AES_OFB_MODE) {
+            XMEMCPY((byte*)aes->reg, (byte*)aes->tmp, AES_BLOCK_SIZE);
+        }
+    #endif
+        xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE);
+    #ifdef WOLFSSL_AES_CFB
+        if (mode == AES_CFB_MODE) {
+            XMEMCPY(aes->reg, in, AES_BLOCK_SIZE);
+        }
+    #endif
+        XMEMCPY(out, (byte*)aes->tmp, AES_BLOCK_SIZE);
         out += AES_BLOCK_SIZE;
         in  += AES_BLOCK_SIZE;
         sz  -= AES_BLOCK_SIZE;
@@ -9807,7 +7796,17 @@
     /* decrypt left over data */
     if (sz) {
         wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
-        XMEMCPY(aes->reg, in, sz);
+    #ifdef WOLFSSL_AES_CFB
+        if (mode == AES_CFB_MODE) {
+            XMEMCPY(aes->reg, in, sz);
+        }
+    #endif
+    #ifdef WOLFSSL_AES_OFB
+        if (mode == AES_OFB_MODE) {
+            XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+        }
+    #endif
+
         aes->left = AES_BLOCK_SIZE;
         tmp = (byte*)aes->tmp;
 
@@ -9822,6 +7821,282 @@
 #endif /* HAVE_AES_DECRYPT */
 #endif /* WOLFSSL_AES_CFB */
 
+#ifdef WOLFSSL_AES_CFB
+/* CFB 128
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to encrypt
+ * sz  size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Encrypt */
+int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_CFB_MODE);
+}
+
+
+#ifdef HAVE_AES_DECRYPT
+/* CFB 128
+ *
+ * aes structure holding key to use for decryption
+ * out buffer to hold result of decryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to decrypt
+ * sz  size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Decrypt */
+int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_CFB_MODE);
+}
+#endif /* HAVE_AES_DECRYPT */
+
+
+/* shift the whole AES_BLOCK_SIZE array left by 8 or 1 bits */
+static void shiftLeftArray(byte* ary, byte shift)
+{
+    int i;
+
+    if (shift == WOLFSSL_BIT_SIZE) {
+        /* shifting over by 8 bits */
+        for (i = 0; i < AES_BLOCK_SIZE - 1; i++) {
+            ary[i] = ary[i+1];
+        }
+        ary[i] = 0;
+    }
+    else {
+        byte carry = 0;
+
+        /* shifting over by 7 or less bits */
+        for (i = 0; i < AES_BLOCK_SIZE - 1; i++) {
+            carry = ary[i+1] & (0XFF << (WOLFSSL_BIT_SIZE - shift));
+            carry >>= (WOLFSSL_BIT_SIZE - shift);
+            ary[i] = (ary[i] << shift) + carry;
+        }
+        ary[i] = ary[i] << shift;
+    }
+}
+
+
+/* returns 0 on success and negative values on failure */
+static int wc_AesFeedbackCFB8(Aes* aes, byte* out, const byte* in,
+        word32 sz, byte dir)
+{
+    byte *pt;
+
+    if (aes == NULL || out == NULL || in == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (sz == 0) {
+        return 0;
+    }
+
+    while (sz > 0) {
+        wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+        if (dir == AES_DECRYPTION) {
+            pt = (byte*)aes->reg;
+
+            /* LSB + CAT */
+            shiftLeftArray(pt, WOLFSSL_BIT_SIZE);
+            pt[AES_BLOCK_SIZE - 1] = in[0];
+        }
+
+        /* MSB + XOR */
+        out[0] = aes->tmp[0] ^ in[0];
+        if (dir == AES_ENCRYPTION) {
+            pt = (byte*)aes->reg;
+
+            /* LSB + CAT */
+            shiftLeftArray(pt, WOLFSSL_BIT_SIZE);
+            pt[AES_BLOCK_SIZE - 1] = out[0];
+        }
+
+        out += 1;
+        in  += 1;
+        sz  -= 1;
+    }
+
+    return 0;
+}
+
+
+/* returns 0 on success and negative values on failure */
+static int wc_AesFeedbackCFB1(Aes* aes, byte* out, const byte* in,
+        word32 sz, byte dir)
+{
+    byte tmp;
+    byte cur = 0; /* hold current work in order to handle inline in=out */
+    byte* pt;
+    int bit = 7;
+
+    if (aes == NULL || out == NULL || in == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (sz == 0) {
+        return 0;
+    }
+
+    while (sz > 0) {
+        wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+        if (dir == AES_DECRYPTION) {
+            pt = (byte*)aes->reg;
+
+            /* LSB + CAT */
+            tmp = (0X01 << bit) & in[0];
+            tmp = tmp >> bit;
+            tmp &= 0x01;
+            shiftLeftArray((byte*)aes->reg, 1);
+            pt[AES_BLOCK_SIZE - 1] |= tmp;
+        }
+
+        /* MSB  + XOR */
+        tmp = (0X01 << bit) & in[0];
+        pt = (byte*)aes->tmp;
+        tmp = (pt[0] >> 7) ^ (tmp >> bit);
+        tmp &= 0x01;
+        cur |= (tmp << bit);
+
+
+        if (dir == AES_ENCRYPTION) {
+            pt = (byte*)aes->reg;
+
+            /* LSB + CAT */
+            shiftLeftArray((byte*)aes->reg, 1);
+            pt[AES_BLOCK_SIZE - 1] |= tmp;
+        }
+
+        bit--;
+        if (bit < 0) {
+            out[0] = cur;
+            out += 1;
+            in  += 1;
+            sz  -= 1;
+            bit = 7;
+            cur = 0;
+        }
+        else {
+            sz -= 1;
+        }
+    }
+
+    if (bit > 0 && bit < 7) {
+        out[0] = cur;
+    }
+
+    return 0;
+}
+
+
+/* CFB 1
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to encrypt (packed to left, i.e. 101 is 0x90)
+ * sz  size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8)
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb1Encrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackCFB1(aes, out, in, sz, AES_ENCRYPTION);
+}
+
+
+/* CFB 8
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to encrypt
+ * sz  size of input buffer
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb8Encrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackCFB8(aes, out, in, sz, AES_ENCRYPTION);
+}
+#ifdef HAVE_AES_DECRYPT
+
+/* CFB 1
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to encrypt
+ * sz  size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8)
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb1Decrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackCFB1(aes, out, in, sz, AES_DECRYPTION);
+}
+
+
+/* CFB 8
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to encrypt
+ * sz  size of input buffer
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb8Decrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackCFB8(aes, out, in, sz, AES_DECRYPTION);
+}
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_CFB */
+
+#ifdef WOLFSSL_AES_OFB
+/* OFB
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to encrypt
+ * sz  size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Encrypt */
+int wc_AesOfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_OFB_MODE);
+}
+
+
+#ifdef HAVE_AES_DECRYPT
+/* OFB
+ *
+ * aes structure holding key to use for decryption
+ * out buffer to hold result of decryption (must be at least as large as input
+ *     buffer)
+ * in  buffer to decrypt
+ * sz  size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - OFB Decrypt */
+int wc_AesOfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+    return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_OFB_MODE);
+}
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_OFB */
+
 
 #ifdef HAVE_AES_KEYWRAP
 
@@ -10148,7 +8423,7 @@
         word32 j;
         byte carry = 0;
 
-        /* multiply by shift left and propogate carry */
+        /* multiply by shift left and propagate carry */
         for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) {
             byte tmpC;
 
@@ -10186,6 +8461,7 @@
  *
  * returns 0 on success
  */
+/* Software AES - XTS Encrypt  */
 int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         const byte* i, word32 iSz)
 {
@@ -10238,7 +8514,7 @@
     #endif
             xorbuf(out, tmp, AES_BLOCK_SIZE);
 
-            /* multiply by shift left and propogate carry */
+            /* multiply by shift left and propagate carry */
             for (j = 0; j < AES_BLOCK_SIZE; j++) {
                 byte tmpC;
 
@@ -10293,6 +8569,7 @@
  *
  * returns 0 on success
  */
+/* Software AES - XTS Decrypt */
 int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         const byte* i, word32 iSz)
 {
@@ -10352,7 +8629,7 @@
     #endif
             xorbuf(out, tmp, AES_BLOCK_SIZE);
 
-            /* multiply by shift left and propogate carry */
+            /* multiply by shift left and propagate carry */
             for (j = 0; j < AES_BLOCK_SIZE; j++) {
                 byte tmpC;
 
@@ -10376,7 +8653,7 @@
             byte buf[AES_BLOCK_SIZE];
             byte tmp2[AES_BLOCK_SIZE];
 
-            /* multiply by shift left and propogate carry */
+            /* multiply by shift left and propagate carry */
             for (j = 0; j < AES_BLOCK_SIZE; j++) {
                 byte tmpC;
 
--- a/wolfcrypt/src/arc4.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/arc4.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* arc4.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -38,7 +38,7 @@
     word32 i;
     word32 keyIndex = 0, stateIndex = 0;
 
-    if (arc4 == NULL || key == NULL) {
+    if (arc4 == NULL || key == NULL || length == 0) {
         return BAD_FUNC_ARG;
     }
 
--- a/wolfcrypt/src/asm.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/asm.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* asm.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/asn.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/asn.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* asn.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -44,8 +44,16 @@
     Only enabled for OCSP.
  * WOLFSSL_NO_OCSP_ISSUER_CHECK: Can be defined for backwards compatibility to
     disable checking of OCSP subject hash with issuer hash.
- * WOLFSSL_ALT_CERT_CHAINS: Allows matching multiple CA's to validate
-    chain based on issuer and public key (includes signature confirmation)
+ * WOLFSSL_SMALL_CERT_VERIFY: Verify the certificate signature without using
+    DecodedCert. Doubles up on some code but allows smaller dynamic memory
+    usage.
+ * WOLFSSL_NO_OCSP_DATE_CHECK: Disable date checks for OCSP responses. This
+    may be required when the system's real-time clock is not very accurate.
+    It is recommended to enforce the nonce check instead if possible.
+ * WOLFSSL_FORCE_OCSP_NONCE_CHECK: Require nonces to be available in OCSP
+    responses. The nonces are optional and may not be supported by all
+    responders. If it can be ensured that the used responder sends nonces this
+    option may improve security.
 */
 
 #ifndef NO_ASN
@@ -70,9 +78,6 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
-#ifndef NO_PWDBASED
-    #include <wolfssl/wolfcrypt/aes.h>
-#endif
 #ifndef NO_RC4
     #include <wolfssl/wolfcrypt/arc4.h>
 #endif
@@ -97,22 +102,24 @@
     #include <wolfssl/wolfcrypt/ed25519.h>
 #endif
 
+#ifdef HAVE_ED448
+    #include <wolfssl/wolfcrypt/ed448.h>
+#endif
+
 #ifndef NO_RSA
     #include <wolfssl/wolfcrypt/rsa.h>
-#endif
-
-#ifdef WOLFSSL_DEBUG_ENCODING
-    #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        #if MQX_USE_IO_OLD
-            #include <fio.h>
-        #else
-            #include <nio.h>
-        #endif
-    #else
-        #include <stdio.h>
-    #endif
-#endif
-
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL)
+extern int wc_InitRsaHw(RsaKey* key);
+#endif
+#endif
+
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+    #include <wolfssl/openssl/objects.h>
+#endif
 
 #ifdef _MSC_VER
     /* 4996 warning to use MS extensions e.g., strcpy_s instead of XSTRNCPY */
@@ -121,9 +128,40 @@
 
 #define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; }
 
-WOLFSSL_LOCAL int GetLength(const byte* input, word32* inOutIdx, int* len,
+#if defined(HAVE_SELFTEST) || ( !defined(NO_SKID) && \
+                                ( !defined(HAVE_FIPS) || \
+                                  !defined(HAVE_FIPS_VERSION) ))
+    #ifndef WOLFSSL_AES_KEY_SIZE_ENUM
+    #define WOLFSSL_AES_KEY_SIZE_ENUM
+    enum Asn_Misc {
+        AES_IV_SIZE         = 16,
+        AES_128_KEY_SIZE    = 16,
+        AES_192_KEY_SIZE    = 24,
+        AES_256_KEY_SIZE    = 32
+    };
+    #endif
+#endif
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+void tsip_inform_key_position(const word32 key_n_start,
+                const word32 key_n_len, const word32 key_e_start,
+                const word32 key_e_len);
+int tsip_tls_CertVerify(const byte *cert, word32 certSz,
+                        const byte *signature, word32 sigSz,
+                        word32 key_n_start, word32 key_n_len,
+                        word32 key_e_start, word32 key_e_len,
+                        byte *tsip_encRsaKeyIdx);
+#endif
+int GetLength(const byte* input, word32* inOutIdx, int* len,
                            word32 maxIdx)
 {
+    return GetLength_ex(input, inOutIdx, len, maxIdx, 1);
+}
+
+
+/* give option to check length value found against index. 1 to check 0 to not */
+int GetLength_ex(const byte* input, word32* inOutIdx, int* len,
+                           word32 maxIdx, int check)
+{
     int     length = 0;
     word32  idx = *inOutIdx;
     byte    b;
@@ -144,15 +182,21 @@
             return BUFFER_E;
         }
 
+        if (bytes > sizeof(length)) {
+            return ASN_PARSE_E;
+        }
         while (bytes--) {
             b = input[idx++];
             length = (length << 8) | b;
         }
+        if (length < 0) {
+            return ASN_PARSE_E;
+        }
     }
     else
         length = b;
 
-    if ((idx + length) > maxIdx) {   /* for user of length */
+    if (check && (idx + length) > maxIdx) {   /* for user of length */
         WOLFSSL_MSG("GetLength value exceeds buffer length");
         return BUFFER_E;
     }
@@ -165,6 +209,55 @@
 }
 
 
+/* input : buffer to read from
+ * inOutIdx : index to start reading from, gets advanced by 1 if successful
+ * maxIdx : maximum index value
+ * tag : ASN tag value found
+ *
+ * returns 0 on success
+ */
+int GetASNTag(const byte* input, word32* inOutIdx, byte* tag, word32 maxIdx)
+{
+    word32 idx;
+
+    if (tag == NULL || inOutIdx == NULL || input == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    idx = *inOutIdx;
+    if (idx + ASN_TAG_SZ > maxIdx) {
+        WOLFSSL_MSG("Buffer too small for ASN tag");
+        return BUFFER_E;
+    }
+
+    *tag = input[idx];
+    *inOutIdx = idx + ASN_TAG_SZ;
+    return 0;
+}
+
+
+static int GetASNHeader_ex(const byte* input, byte tag, word32* inOutIdx, int* len,
+                        word32 maxIdx, int check)
+{
+    word32 idx = *inOutIdx;
+    byte   tagFound;
+    int    length;
+
+    if (GetASNTag(input, &idx, &tagFound, maxIdx) != 0)
+        return ASN_PARSE_E;
+
+    if (tagFound != tag)
+        return ASN_PARSE_E;
+
+    if (GetLength_ex(input, &idx, &length, maxIdx, check) < 0)
+        return ASN_PARSE_E;
+
+    *len      = length;
+    *inOutIdx = idx;
+    return length;
+}
+
+
 /* Get the DER/BER encoding of an ASN.1 header.
  *
  * input     Buffer holding DER/BER encoded data.
@@ -179,18 +272,21 @@
 static int GetASNHeader(const byte* input, byte tag, word32* inOutIdx, int* len,
                         word32 maxIdx)
 {
+    return GetASNHeader_ex(input, tag, inOutIdx, len, maxIdx, 1);
+}
+
+static int GetHeader(const byte* input, byte* tag, word32* inOutIdx, int* len,
+                     word32 maxIdx, int check)
+{
     word32 idx = *inOutIdx;
-    byte   b;
     int    length;
 
     if ((idx + 1) > maxIdx)
         return BUFFER_E;
 
-    b = input[idx++];
-    if (b != tag)
-        return ASN_PARSE_E;
-
-    if (GetLength(input, &idx, &length, maxIdx) < 0)
+    *tag = input[idx++];
+
+    if (GetLength_ex(input, &idx, &length, maxIdx, check) < 0)
         return ASN_PARSE_E;
 
     *len      = length;
@@ -198,7 +294,7 @@
     return length;
 }
 
-WOLFSSL_LOCAL int GetSequence(const byte* input, word32* inOutIdx, int* len,
+int GetSequence(const byte* input, word32* inOutIdx, int* len,
                            word32 maxIdx)
 {
     return GetASNHeader(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len,
@@ -206,13 +302,29 @@
 }
 
 
-WOLFSSL_LOCAL int GetSet(const byte* input, word32* inOutIdx, int* len,
+int GetSequence_ex(const byte* input, word32* inOutIdx, int* len,
+                           word32 maxIdx, int check)
+{
+    return GetASNHeader_ex(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len,
+                        maxIdx, check);
+}
+
+
+int GetSet(const byte* input, word32* inOutIdx, int* len,
                         word32 maxIdx)
 {
     return GetASNHeader(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len,
                         maxIdx);
 }
 
+
+int GetSet_ex(const byte* input, word32* inOutIdx, int* len,
+                        word32 maxIdx, int check)
+{
+    return GetASNHeader_ex(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len,
+                        maxIdx, check);
+}
+
 /* Get the DER/BER encoded ASN.1 NULL element.
  * Ensure that the all fields are as expected and move index past the element.
  *
@@ -315,7 +427,7 @@
  *         invalid.
  *         Otherwise, the number of bytes in the ASN.1 data.
  */
-static int GetOctetString(const byte* input, word32* inOutIdx, int* len,
+int GetOctetString(const byte* input, word32* inOutIdx, int* len,
                           word32 maxIdx)
 {
     return GetASNHeader(input, ASN_OCTET_STRING, inOutIdx, len, maxIdx);
@@ -374,7 +486,9 @@
     if ((idx + 3) > maxIdx)
         return BUFFER_E;
 
-    if (input[idx++] != ASN_INTEGER)
+    if (GetASNTag(input, &idx, &b, maxIdx) != 0)
+        return ASN_PARSE_E;
+    if (b != ASN_INTEGER)
         return ASN_PARSE_E;
     if (input[idx++] != 1)
         return ASN_PARSE_E;
@@ -386,56 +500,56 @@
 
 
 #if !defined(NO_DSA) && !defined(NO_SHA)
-static char sigSha1wDsaName[] = "SHAwDSA";
+static const char sigSha1wDsaName[] = "SHAwDSA";
 #endif /* NO_DSA */
 #ifndef NO_RSA
 #ifdef WOLFSSL_MD2
-    static char sigMd2wRsaName[] = "MD2wRSA";
+    static const char  sigMd2wRsaName[] = "md2WithRSAEncryption";
 #endif
 #ifndef NO_MD5
-    static char sigMd5wRsaName[] = "MD5wRSA";
+    static const char  sigMd5wRsaName[] = "md5WithRSAEncryption";
 #endif
 #ifndef NO_SHA
-    static char sigSha1wRsaName[] = "SHAwRSA";
+    static const char  sigSha1wRsaName[] = "sha1WithRSAEncryption";
 #endif
 #ifdef WOLFSSL_SHA224
-    static char sigSha224wRsaName[] = "SHA224wRSA";
+    static const char sigSha224wRsaName[] = "sha224WithRSAEncryption";
 #endif
 #ifndef NO_SHA256
-    static char sigSha256wRsaName[] = "SHA256wRSA";
+    static const char sigSha256wRsaName[] = "sha256WithRSAEncryption";
 #endif
 #ifdef WOLFSSL_SHA384
-    static char sigSha384wRsaName[] = "SHA384wRSA";
+    static const char sigSha384wRsaName[] = "sha384WithRSAEncryption";
 #endif
 #ifdef WOLFSSL_SHA512
-    static char sigSha512wRsaName[] = "SHA512wRSA";
+    static const char sigSha512wRsaName[] = "sha512WithRSAEncryption";
 #endif
 #endif /* NO_RSA */
 #ifdef HAVE_ECC
 #ifndef NO_SHA
-    static char sigSha1wEcdsaName[] = "SHAwECDSA";
+    static const char sigSha1wEcdsaName[] = "SHAwECDSA";
 #endif
 #ifdef WOLFSSL_SHA224
-    static char sigSha224wEcdsaName[] = "SHA224wECDSA";
+    static const char sigSha224wEcdsaName[] = "SHA224wECDSA";
 #endif
 #ifndef NO_SHA256
-    static char sigSha256wEcdsaName[] = "SHA256wECDSA";
+    static const char sigSha256wEcdsaName[] = "SHA256wECDSA";
 #endif
 #ifdef WOLFSSL_SHA384
-    static char sigSha384wEcdsaName[] = "SHA384wECDSA";
+    static const char sigSha384wEcdsaName[] = "SHA384wECDSA";
 #endif
 #ifdef WOLFSSL_SHA512
-    static char sigSha512wEcdsaName[] = "SHA512wECDSA";
+    static const char sigSha512wEcdsaName[] = "SHA512wECDSA";
 #endif
 #endif /* HAVE_ECC */
-static char sigUnknownName[] = "Unknown";
+static const char sigUnknownName[] = "Unknown";
 
 
 /* Get the human readable string for a signature type
  *
  * oid  Oid value for signature
  */
-char* GetSigName(int oid) {
+const char* GetSigName(int oid) {
     switch (oid) {
     #if !defined(NO_DSA) && !defined(NO_SHA)
         case CTC_SHAwDSA:
@@ -499,7 +613,7 @@
 }
 
 
-#if !defined(NO_DSA) || defined(HAVE_ECC) || \
+#if !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_CERTS) || \
    (!defined(NO_RSA) && \
         (defined(WOLFSSL_CERT_GEN) || \
         ((defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(HAVE_USER_RSA))))
@@ -514,19 +628,25 @@
 {
     word32 idx = 0;
 
-    output[idx++] = ASN_INTEGER;
+    if (output)
+        output[idx] = ASN_INTEGER;
+    idx++;
     if (firstByte & 0x80)
         len++;
-    idx += SetLength(len, output + idx);
-    if (firstByte & 0x80)
-        output[idx++] = 0x00;
+    idx += SetLength(len, output ? output + idx : NULL);
+    if (firstByte & 0x80) {
+        if (output)
+            output[idx] = 0x00;
+        idx++;
+    }
 
     return idx;
 }
 #endif
 
-#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_CERT_GEN) || \
-    ((defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(NO_RSA) && !defined(HAVE_USER_RSA))
+#if !defined(NO_DSA) || defined(HAVE_ECC) || (defined(WOLFSSL_CERT_GEN) && \
+    !defined(NO_RSA)) || ((defined(WOLFSSL_KEY_GEN) || \
+    defined(OPENSSL_EXTRA)) && !defined(NO_RSA) && !defined(HAVE_USER_RSA))
 /* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int.
  * The number is assumed to be positive.
  *
@@ -551,16 +671,19 @@
     if (maxSz >= 0 && (idx + length) > maxSz)
         return BUFFER_E;
 
-    err = mp_to_unsigned_bin(n, output + idx);
-    if (err != MP_OKAY)
-        return MP_TO_E;
+    if (output) {
+        err = mp_to_unsigned_bin(n, output + idx);
+        if (err != MP_OKAY)
+            return MP_TO_E;
+    }
     idx += length;
 
     return idx;
 }
 #endif
 
-#if !defined(NO_RSA) && defined(HAVE_USER_RSA) && defined(WOLFSSL_CERT_GEN)
+#if !defined(NO_RSA) && defined(HAVE_USER_RSA) && \
+    (defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA))
 /* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int from
  * an RSA key.
  * The number is assumed to be positive.
@@ -571,7 +694,7 @@
  *         MP_TO_E when encoding the integer fails.
  *         Otherwise, the number of bytes added to the buffer.
  */
-static int SetASNIntRSA(mp_int* n, byte* output)
+static int SetASNIntRSA(void* n, byte* output)
 {
     int idx = 0;
     int leadingBit;
@@ -584,9 +707,11 @@
     if ((idx + length) > MAX_RSA_INT_SZ)
         return BUFFER_E;
 
-    err = wc_Rsa_to_unsigned_bin(n, output + idx, length);
-    if (err != MP_OKAY)
-        return MP_TO_E;
+    if (output) {
+        err = wc_Rsa_to_unsigned_bin(n, output + idx, length);
+        if (err != MP_OKAY)
+            return MP_TO_E;
+    }
     idx += length;
 
     return idx;
@@ -594,15 +719,19 @@
 #endif /* !NO_RSA && HAVE_USER_RSA && WOLFSSL_CERT_GEN */
 
 /* Windows header clash for WinCE using GetVersion */
-WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx,
+int GetMyVersion(const byte* input, word32* inOutIdx,
                                int* version, word32 maxIdx)
 {
     word32 idx = *inOutIdx;
+    byte   tag;
 
     if ((idx + MIN_VERSION_SZ) > maxIdx)
         return ASN_PARSE_E;
 
-    if (input[idx++] != ASN_INTEGER)
+    if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_INTEGER)
         return ASN_PARSE_E;
 
     if (input[idx++] != 0x01)
@@ -621,6 +750,7 @@
 {
     word32 idx = *inOutIdx;
     word32 len;
+    byte   tag;
 
     *number = 0;
 
@@ -628,7 +758,10 @@
     if ((idx + 2) > maxIdx)
         return BUFFER_E;
 
-    if (input[idx++] != ASN_INTEGER)
+    if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_INTEGER)
         return ASN_PARSE_E;
 
     len = input[idx++];
@@ -650,8 +783,7 @@
 
 /* Set small integer, 32 bits or less. DER encoding with no leading 0s
  * returns total amount written including ASN tag and length byte on success */
-static int SetShortInt(byte* input, word32* inOutIdx, word32 number,
-        word32 maxIdx)
+int SetShortInt(byte* input, word32* inOutIdx, word32 number, word32 maxIdx)
 {
     word32 idx = *inOutIdx;
     word32 len = 0;
@@ -699,13 +831,14 @@
                               word32 maxIdx)
 {
     word32 idx = *inOutIdx;
+    byte tag;
 
     WOLFSSL_ENTER("GetExplicitVersion");
 
-    if ((idx + 1) > maxIdx)
-        return BUFFER_E;
-
-    if (input[idx++] == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
+    if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+        return ASN_PARSE_E;
+
+    if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
         *inOutIdx = ++idx;  /* skip header */
         return GetMyVersion(input, inOutIdx, version, maxIdx);
     }
@@ -746,7 +879,8 @@
     return 0;
 }
 
-#if !defined(WOLFSSL_KEY_GEN) && !defined(OPENSSL_EXTRA) && defined(RSA_LOW_MEM)
+#if (!defined(WOLFSSL_KEY_GEN) && !defined(OPENSSL_EXTRA) && defined(RSA_LOW_MEM)) \
+    || defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DSA) && defined(WOLFSSL_QT))
 #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
 static int SkipInt(const byte* input, word32* inOutIdx, word32 maxIdx)
 {
@@ -772,11 +906,13 @@
     int    length;
     byte   b;
 
-    if ((idx + 1) > maxIdx)
-        return BUFFER_E;
-
-    if (input[idx++] != ASN_BIT_STRING)
+    if (GetASNTag(input, &idx, &b, maxIdx) != 0) {
         return ASN_BITSTR_E;
+    }
+
+    if (b != ASN_BIT_STRING) {
+        return ASN_BITSTR_E;
+    }
 
     if (GetLength(input, &idx, &length, maxIdx) < 0)
         return ASN_PARSE_E;
@@ -813,11 +949,12 @@
     return 0;
 }
 
-/* RSA (with CertGen or KeyGen) OR ECC OR ED25519 (with CertGen or KeyGen) */
+/* RSA (with CertGen or KeyGen) OR ECC OR ED25519 OR ED448 (with CertGen or
+ * KeyGen) */
 #if (!defined(NO_RSA) && !defined(HAVE_USER_RSA) && \
         (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA))) || \
-     defined(HAVE_ECC) || \
-    (defined(HAVE_ED25519) && \
+    (defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)) || \
+    ((defined(HAVE_ED25519) || defined(HAVE_ED448)) && \
         (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)))
 
 /* Set the DER/BER encoding of the ASN.1 BIT_STRING header.
@@ -829,19 +966,168 @@
  * output      Buffer to write into.
  * returns the number of bytes added to the buffer.
  */
-static word32 SetBitString(word32 len, byte unusedBits, byte* output)
+word32 SetBitString(word32 len, byte unusedBits, byte* output)
 {
     word32 idx = 0;
 
-    output[idx++] = ASN_BIT_STRING;
-    idx += SetLength(len + 1, output + idx);
-    output[idx++] = unusedBits;
+    if (output)
+        output[idx] = ASN_BIT_STRING;
+    idx++;
+
+    idx += SetLength(len + 1, output ? output + idx : NULL);
+    if (output)
+        output[idx] = unusedBits;
+    idx++;
 
     return idx;
 }
-#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 */
+#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 || HAVE_ED448 */
 
 #ifdef ASN_BER_TO_DER
+/* Pull informtation from the ASN.1 BER encoded item header */
+static int GetBerHeader(const byte* data, word32* idx, word32 maxIdx,
+                        byte* pTag, word32* pLen, int* indef)
+{
+    int len = 0;
+    byte tag;
+    word32 i = *idx;
+
+    *indef = 0;
+
+    /* Check there is enough data for a minimal header */
+    if (i + 2 > maxIdx) {
+        return ASN_PARSE_E;
+    }
+
+    /* Retrieve tag */
+    tag = data[i++];
+
+    /* Indefinite length handled specially */
+    if (data[i] == 0x80) {
+        /* Check valid tag for indefinite */
+        if (((tag & 0xc0) == 0) && ((tag & ASN_CONSTRUCTED) == 0x00)) {
+            return ASN_PARSE_E;
+        }
+        i++;
+        *indef = 1;
+    }
+    else if (GetLength(data, &i, &len, maxIdx) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    /* Return tag, length and index after BER item header */
+    *pTag = tag;
+    *pLen = len;
+    *idx = i;
+    return 0;
+}
+
+#ifndef INDEF_ITEMS_MAX
+#define INDEF_ITEMS_MAX       20
+#endif
+
+/* Indef length item data */
+typedef struct Indef {
+    word32 start;
+    int depth;
+    int headerLen;
+    word32 len;
+} Indef;
+
+/* Indef length items */
+typedef struct IndefItems
+{
+    Indef len[INDEF_ITEMS_MAX];
+    int cnt;
+    int idx;
+    int depth;
+} IndefItems;
+
+
+/* Get header length of current item */
+static int IndefItems_HeaderLen(IndefItems* items)
+{
+    return items->len[items->idx].headerLen;
+}
+
+/* Get data length of current item */
+static word32 IndefItems_Len(IndefItems* items)
+{
+    return items->len[items->idx].len;
+}
+
+/* Add a indefinite length item */
+static int IndefItems_AddItem(IndefItems* items, word32 start)
+{
+    int ret = 0;
+    int i;
+
+    if (items->cnt == INDEF_ITEMS_MAX) {
+        ret = MEMORY_E;
+    }
+    else {
+        i = items->cnt++;
+        items->len[i].start = start;
+        items->len[i].depth = items->depth++;
+        items->len[i].headerLen = 1;
+        items->len[i].len = 0;
+        items->idx = i;
+    }
+
+    return ret;
+}
+
+/* Increase data length of current item */
+static void IndefItems_AddData(IndefItems* items, word32 length)
+{
+    items->len[items->idx].len += length;
+}
+
+/* Update header length of current item to reflect data length */
+static void IndefItems_UpdateHeaderLen(IndefItems* items)
+{
+    items->len[items->idx].headerLen +=
+                                    SetLength(items->len[items->idx].len, NULL);
+}
+
+/* Go to indefinite parent of current item */
+static void IndefItems_Up(IndefItems* items)
+{
+    int i;
+    int depth = items->len[items->idx].depth - 1;
+
+    for (i = items->cnt - 1; i >= 0; i--) {
+        if (items->len[i].depth == depth) {
+            break;
+        }
+    }
+    items->idx = i;
+    items->depth = depth + 1;
+}
+
+/* Calculate final length by adding length of indefinite child items */
+static void IndefItems_CalcLength(IndefItems* items)
+{
+    int i;
+    int idx = items->idx;
+
+    for (i = idx + 1; i < items->cnt; i++) {
+        if (items->len[i].depth == items->depth) {
+            items->len[idx].len += items->len[i].headerLen;
+            items->len[idx].len += items->len[i].len;
+        }
+    }
+    items->len[idx].headerLen += SetLength(items->len[idx].len, NULL);
+}
+
+/* Add more data to indefinite length item */
+static void IndefItems_MoreData(IndefItems* items, word32 length)
+{
+    if (items->cnt > 0 && items->idx >= 0) {
+        items->len[items->idx].len += length;
+    }
+}
+
 /* Convert a BER encoding with indefinite length items to DER.
  *
  * ber    BER encoded data.
@@ -856,200 +1142,224 @@
  */
 int wc_BerToDer(const byte* ber, word32 berSz, byte* der, word32* derSz)
 {
-    int ret;
-    word32 i, j, k;
-    int len, l;
+    int ret = 0;
+    word32 i, j;
+#ifdef WOLFSSL_SMALL_STACK
+    IndefItems* indefItems = NULL;
+#else
+    IndefItems indefItems[1];
+#endif
+    byte tag, basic;
+    word32 length;
     int indef;
-    int depth = 0;
-    byte type;
-    word32 cnt, sz;
-    word32 outSz;
-    byte lenBytes[4];
 
     if (ber == NULL || derSz == NULL)
         return BAD_FUNC_ARG;
 
-    outSz = *derSz;
-
-    for (i = 0, j = 0; i < berSz; ) {
-        /* Check that there is data for an ASN item to parse. */
-        if (i + 2 > berSz)
-            return ASN_PARSE_E;
-
-        /* End Of Content (EOC) mark end of indefinite length items.
-         * EOCs are not encoded in DER.
-         * Keep track of no. indefinite length items that have not been
-         * terminated in depth.
-         */
-        if (ber[i] == 0 && ber[i+1] == 0) {
-            if (depth == 0)
-                break;
-            if (--depth == 0)
-                break;
-
-            i += 2;
-            continue;
-        }
-
-        /* Indefinite length is encoded as: 0x80 */
-        type = ber[i];
-        indef = ber[i+1] == ASN_INDEF_LENGTH;
-        if (indef && (type & 0xC0) == 0 &&
-                                   ber[i] != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
-                                   ber[i] != (ASN_SET      | ASN_CONSTRUCTED)) {
-            /* Indefinite length OCTET STRING or other simple type.
-             * Put all the data into one entry.
-             */
-
-            /* Type no longer constructed. */
-            type &= ~ASN_CONSTRUCTED;
-            if (der != NULL) {
-                /* Ensure space for type. */
-                if (j + 1 >= outSz)
-                    return BUFFER_E;
-                der[j] = type;
-            }
-            i++; j++;
-            /* Skip indefinite length. */
-            i++;
-
-            /* There must be further ASN1 items to combine. */
-            if (i + 2 > berSz)
-                return ASN_PARSE_E;
-
-            /* Calculate length of combined data. */
-            len = 0;
-            k = i;
-            while (ber[k] != 0x00) {
-                /* Each ASN item must be the same type as the constructed. */
-                if (ber[k] != type)
-                    return ASN_PARSE_E;
-                k++;
-
-                ret = GetLength(ber, &k, &l, berSz);
-                if (ret < 0)
-                    return ASN_PARSE_E;
-                k += l;
-                len += l;
-
-                /* Must at least have terminating EOC. */
-                if (k + 2 > berSz)
-                    return ASN_PARSE_E;
-            }
-            /* Ensure a valid EOC ASN item. */
-            if (ber[k+1] != 0x00)
-                return ASN_PARSE_E;
-
-            if (der == NULL) {
-                /* Add length of ASN item length encoding and data. */
-                j += SetLength(len, lenBytes);
-                j += len;
-            }
-            else {
-                /* Check space for encoded length. */
-                if (SetLength(len, lenBytes) > outSz - j)
-                    return BUFFER_E;
-                /* Encode new length. */
-                j += SetLength(len, der + j);
-
-                /* Encode data in single item. */
-                k = i;
-                while (ber[k] != 0x00) {
-                    /* Skip ASN type. */
-                    k++;
-
-                    /* Find length of data in ASN item. */
-                    ret = GetLength(ber, &k, &l, berSz);
-                    if (ret < 0)
-                        return ASN_PARSE_E;
-
-                    /* Ensure space for data and copy in. */
-                    if (j + l > outSz)
-                        return BUFFER_E;
-                    XMEMCPY(der + j, ber + k, l);
-                    k += l; j += l;
-                }
-            }
-            /* Continue conversion after EOC. */
-            i = k + 2;
-
-            continue;
-        }
-
-        if (der != NULL) {
-            /* Ensure space for type and at least one byte of length. */
-            if (j + 1 >= outSz)
-                return BUFFER_E;
-            /* Put in type. */
-            der[j] = ber[i];
-        }
-        i++; j++;
+#ifdef WOLFSSL_SMALL_STACK
+    indefItems = XMALLOC(sizeof(IndefItems), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (indefItems == NULL) {
+        ret = MEMORY_E;
+        goto end;
+    }
+#endif
+
+    XMEMSET(indefItems, 0, sizeof(*indefItems));
+
+    /* Calculate indefinite item lengths */
+    for (i = 0; i < berSz; ) {
+        word32 start = i;
+
+        /* Get next BER item */
+        ret = GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+        if (ret != 0) {
+            goto end;
+        }
 
         if (indef) {
-            /* Skip indefinite length. */
-            i++;
-            /* Calculate the size of the data inside constructed. */
-            ret = wc_BerToDer(ber + i, berSz - i, NULL, &sz);
-            if (ret != LENGTH_ONLY_E)
-                return ret;
-
-            if (der != NULL) {
-                /* Ensure space for encoded length. */
-                if (SetLength(sz, lenBytes) > outSz - j)
-                    return BUFFER_E;
-                /* Encode real length. */
-                j += SetLength(sz, der + j);
-            }
-            else {
-                /* Add size of encoded length. */
-                j += SetLength(sz, lenBytes);
-            }
-
-            /* Another EOC to find. */
-            depth++;
+            /* Indefinite item - add to list */
+            ret = IndefItems_AddItem(indefItems, i);
+            if (ret != 0) {
+                goto end;
+            }
+
+            if ((tag & 0xC0) == 0 &&
+                tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
+                tag != (ASN_SET      | ASN_CONSTRUCTED)) {
+                /* Constructed basic type - get repeating tag */
+                basic = tag & (~ASN_CONSTRUCTED);
+
+                /* Add up lengths of each item below */
+                for (; i < berSz; ) {
+                    /* Get next BER_item */
+                    ret = GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+                    if (ret != 0) {
+                        goto end;
+                    }
+
+                    /* End of content closes item */
+                    if (tag == ASN_EOC) {
+                        /* Must be zero length */
+                        if (length != 0) {
+                            ret = ASN_PARSE_E;
+                            goto end;
+                        }
+                        break;
+                    }
+
+                    /* Must not be indefinite and tag must match parent */
+                    if (indef || tag != basic) {
+                        ret = ASN_PARSE_E;
+                        goto end;
+                    }
+
+                    /* Add to length */
+                    IndefItems_AddData(indefItems, length);
+                    /* Skip data */
+                    i += length;
+                }
+
+                /* Ensure we got an EOC and not end of data */
+                if (tag != ASN_EOC) {
+                    ret = ASN_PARSE_E;
+                    goto end;
+                }
+
+                /* Set the header length to include the length field */
+                IndefItems_UpdateHeaderLen(indefItems);
+                /* Go to indefinte parent item */
+                IndefItems_Up(indefItems);
+            }
+        }
+        else if (tag == ASN_EOC) {
+            /* End-of-content must be 0 length */
+            if (length != 0) {
+                ret = ASN_PARSE_E;
+                goto end;
+            }
+            /* Check there is an item to close - missing EOC */
+            if (indefItems->depth == 0) {
+                ret = ASN_PARSE_E;
+                goto end;
+            }
+
+            /* Finish calculation of data length for indefinite item */
+            IndefItems_CalcLength(indefItems);
+            /* Go to indefinte parent item */
+            IndefItems_Up(indefItems);
         }
         else {
-            /* Get the size of the encode length and length value. */
-            cnt = i;
-            ret = GetLength(ber, &cnt, &len, berSz);
-            if (ret < 0)
-                return ASN_PARSE_E;
-            cnt -= i;
-
-            /* Check there is enough data to copy out. */
-            if (i + cnt + len > berSz)
-                return ASN_PARSE_E;
-
+            /* Known length item to add in - make sure enough data for it */
+            if (i + length > berSz) {
+                ret = ASN_PARSE_E;
+                goto end;
+            }
+
+            /* Include all data - can't have indefinite inside definite */
+            i += length;
+            /* Add entire item to current indefinite item */
+            IndefItems_MoreData(indefItems, i - start);
+        }
+    }
+    /* Check we had a EOC for each indefinite item */
+    if (indefItems->depth != 0) {
+        ret = ASN_PARSE_E;
+        goto end;
+    }
+
+    /* Write out DER */
+
+    j = 0;
+    /* Reset index */
+    indefItems->idx = 0;
+    for (i = 0; i < berSz; ) {
+        word32 start = i;
+
+        /* Get item - checked above */
+        (void)GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+        if (indef) {
             if (der != NULL) {
-                /* Ensure space in DER buffer. */
-                if (j + cnt + len > outSz)
-                    return BUFFER_E;
-                /* Copy length and data into DER buffer. */
-                XMEMCPY(der + j, ber + i, cnt + len);
-            }
-            /* Continue conversion after this ASN item. */
-            i += cnt + len;
-            j += cnt + len;
-        }
-    }
-
-    if (depth >= 1)
-        return ASN_PARSE_E;
-
-    /* Return length if no buffer to write to. */
+                /* Check enough space for header */
+                if (j + IndefItems_HeaderLen(indefItems) > *derSz) {
+                    ret = BUFFER_E;
+                    goto end;
+                }
+
+                if ((tag & 0xC0) == 0 &&
+                    tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
+                    tag != (ASN_SET      | ASN_CONSTRUCTED)) {
+                    /* Remove constructed tag for basic types */
+                    tag &= ~ASN_CONSTRUCTED;
+                }
+                /* Add tag and length */
+                der[j] = tag;
+                (void)SetLength(IndefItems_Len(indefItems), der + j + 1);
+            }
+            /* Add header length of indefinite item */
+            j += IndefItems_HeaderLen(indefItems);
+
+            if ((tag & 0xC0) == 0 &&
+                tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
+                tag != (ASN_SET      | ASN_CONSTRUCTED)) {
+                /* For basic type - get each child item and add data */
+                for (; i < berSz; ) {
+                    (void)GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+                    if (tag == ASN_EOC) {
+                        break;
+                    }
+                    if (der != NULL) {
+                        if (j + length > *derSz) {
+                            ret = BUFFER_E;
+                            goto end;
+                        }
+                        XMEMCPY(der + j, ber + i, length);
+                    }
+                    j += length;
+                    i += length;
+                }
+            }
+
+            /* Move to next indef item in list */
+            indefItems->idx++;
+        }
+        else if (tag == ASN_EOC) {
+            /* End-Of-Content is not written out in DER */
+        }
+        else {
+            /* Write out definite length item as is. */
+            i += length;
+            if (der != NULL) {
+                /* Ensure space for item */
+                if (j + i - start > *derSz) {
+                    ret = BUFFER_E;
+                    goto end;
+                }
+                /* Copy item as is */
+                XMEMCPY(der + j, ber + start, i - start);
+            }
+            j += i - start;
+        }
+    }
+
+    /* Return the length of the DER encoded ASN.1 */
+    *derSz = j;
     if (der == NULL) {
-        *derSz = j;
-        return LENGTH_ONLY_E;
-    }
-
-    return 0;
+        ret = LENGTH_ONLY_E;
+    }
+end:
+#ifdef WOLFSSL_SMALL_STACK
+    if (indefItems != NULL) {
+        XFREE(indefItems, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+    return ret;
 }
 #endif
 
 #if defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN)
 
 #if (!defined(NO_RSA) && !defined(HAVE_USER_RSA)) || \
-    defined(HAVE_ECC) || defined(HAVE_ED25519)
+    defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
 
 #ifdef WOLFSSL_CERT_EXT
 /* Set the DER/BER encoding of the ASN.1 BIT_STRING with a 16-bit value.
@@ -1085,7 +1395,7 @@
     return idx;
 }
 #endif /* WOLFSSL_CERT_EXT */
-#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 */
+#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 || defined(HAVE_ED448) */
 #endif /* WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN */
 
 
@@ -1176,6 +1486,9 @@
 #ifdef HAVE_ED25519
     static const byte sigEd25519Oid[] = {43, 101, 112};
 #endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+    static const byte sigEd448Oid[] = {43, 101, 113};
+#endif /* HAVE_ED448 */
 
 /* keyType */
 #ifndef NO_DSA
@@ -1193,6 +1506,12 @@
 #ifdef HAVE_ED25519
     static const byte keyEd25519Oid[] = {43, 101, 112};
 #endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+    static const byte keyEd448Oid[] = {43, 101, 113};
+#endif /* HAVE_ED448 */
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+    static const byte keyDhOid[] = {42, 134, 72, 134, 247, 13, 1, 3, 1};
+#endif /* ! NO_DH ... */
 
 /* curveType */
 #ifdef HAVE_ECC
@@ -1211,6 +1530,28 @@
     static const byte blkAes256CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 42};
     #endif
 #endif /* HAVE_AES_CBC */
+#ifdef HAVE_AESGCM
+    #ifdef WOLFSSL_AES_128
+    static const byte blkAes128GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 6};
+    #endif
+    #ifdef WOLFSSL_AES_192
+    static const byte blkAes192GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 26};
+    #endif
+    #ifdef WOLFSSL_AES_256
+    static const byte blkAes256GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 46};
+    #endif
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AESCCM
+    #ifdef WOLFSSL_AES_128
+    static const byte blkAes128CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 7};
+    #endif
+    #ifdef WOLFSSL_AES_192
+    static const byte blkAes192CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 27};
+    #endif
+    #ifdef WOLFSSL_AES_256
+    static const byte blkAes256CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 47};
+    #endif
+#endif /* HAVE_AESCCM */
 
 #ifndef NO_DES3
     static const byte blkDesCbcOid[]  = {43, 14, 3, 2, 7};
@@ -1227,6 +1568,10 @@
 #ifdef WOLFSSL_AES_256
     static const byte wrapAes256Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 45};
 #endif
+#ifdef HAVE_PKCS7
+/* From RFC 3211 */
+static const byte wrapPwriKekOid[] = {42, 134, 72, 134, 247, 13, 1, 9, 16, 3,9};
+#endif
 
 /* cmsKeyAgreeType */
 #ifndef NO_SHA
@@ -1295,6 +1640,7 @@
 #if !defined(NO_DES3) && !defined(NO_SHA)
 static const byte pbeSha1Des[] = {42, 134, 72, 134, 247, 13, 1, 5, 10};
 #endif
+static const byte pbes2[] = {42, 134, 72, 134, 247, 13, 1, 5, 13};
 
 /* PKCS12 */
 #if !defined(NO_RC4) && !defined(NO_SHA)
@@ -1304,6 +1650,17 @@
 static const byte pbeSha1Des3[] = {42, 134, 72, 134, 247, 13, 1, 12, 1, 3};
 #endif
 
+#ifdef HAVE_LIBZ
+/* zlib compression */
+static const byte zlibCompress[] = {42, 134, 72, 134, 247, 13, 1, 9, 16, 3, 8};
+#endif
+#ifdef WOLFSSL_APACHE_HTTPD
+/* tlsExtType */
+static const byte tlsFeatureOid[] = {43, 6, 1, 5, 5, 7, 1, 24};
+/* certNameType */
+static const byte dnsSRVOid[] = {43, 6, 1, 5, 5, 7, 8, 7};
+#endif
+
 
 /* returns a pointer to the OID string on success and NULL on fail */
 const byte* OidFromId(word32 id, word32 type, word32* oidSz)
@@ -1451,6 +1808,12 @@
                     *oidSz = sizeof(sigEd25519Oid);
                     break;
                 #endif
+                #ifdef HAVE_ED448
+                case CTC_ED448:
+                    oid = sigEd448Oid;
+                    *oidSz = sizeof(sigEd448Oid);
+                    break;
+                #endif
                 default:
                     break;
             }
@@ -1488,6 +1851,18 @@
                     *oidSz = sizeof(keyEd25519Oid);
                     break;
                 #endif /* HAVE_ED25519 */
+                #ifdef HAVE_ED448
+                case ED448k:
+                    oid = keyEd448Oid;
+                    *oidSz = sizeof(keyEd448Oid);
+                    break;
+                #endif /* HAVE_ED448 */
+                #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+                case DHk:
+                    oid = keyDhOid;
+                    *oidSz = sizeof(keyDhOid);
+                    break;
+                #endif /* ! NO_DH && (WOLFSSL_QT || OPENSSL_ALL */
                 default:
                     break;
             }
@@ -1523,6 +1898,46 @@
                     break;
         #endif
     #endif /* HAVE_AES_CBC */
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+                case AES128GCMb:
+                    oid = blkAes128GcmOid;
+                    *oidSz = sizeof(blkAes128GcmOid);
+                    break;
+        #endif
+        #ifdef WOLFSSL_AES_192
+                case AES192GCMb:
+                    oid = blkAes192GcmOid;
+                    *oidSz = sizeof(blkAes192GcmOid);
+                    break;
+        #endif
+        #ifdef WOLFSSL_AES_256
+                case AES256GCMb:
+                    oid = blkAes256GcmOid;
+                    *oidSz = sizeof(blkAes256GcmOid);
+                    break;
+        #endif
+    #endif /* HAVE_AESGCM */
+    #ifdef HAVE_AESCCM
+        #ifdef WOLFSSL_AES_128
+                case AES128CCMb:
+                    oid = blkAes128CcmOid;
+                    *oidSz = sizeof(blkAes128CcmOid);
+                    break;
+        #endif
+        #ifdef WOLFSSL_AES_192
+                case AES192CCMb:
+                    oid = blkAes192CcmOid;
+                    *oidSz = sizeof(blkAes192CcmOid);
+                    break;
+        #endif
+        #ifdef WOLFSSL_AES_256
+                case AES256CCMb:
+                    oid = blkAes256CcmOid;
+                    *oidSz = sizeof(blkAes256CcmOid);
+                    break;
+        #endif
+    #endif /* HAVE_AESCCM */
     #ifndef NO_DES3
                 case DESb:
                     oid = blkDesCbcOid;
@@ -1602,6 +2017,17 @@
             }
             break;
 
+        case oidCrlExtType:
+            #ifdef HAVE_CRL
+            switch (id) {
+                case AUTH_KEY_OID:
+                    oid = extAuthKeyOid;
+                    *oidSz = sizeof(extAuthKeyOid);
+                    break;
+            }
+            #endif
+            break;
+
         case oidCertAuthInfoType:
             switch (id) {
             #ifdef HAVE_OCSP
@@ -1698,6 +2124,10 @@
                     *oidSz = sizeof(pbeSha1Des3);
                     break;
         #endif
+                case PBES2:
+                    oid = pbes2;
+                    *oidSz = sizeof(pbes2);
+                    break;
             }
             break;
 
@@ -1721,6 +2151,12 @@
                     *oidSz = sizeof(wrapAes256Oid);
                     break;
             #endif
+            #ifdef HAVE_PKCS7
+                case PWRI_KEK_WRAP:
+                    oid = wrapPwriKekOid;
+                    *oidSz = sizeof(wrapPwriKekOid);
+                    break;
+            #endif
             }
             break;
 
@@ -1790,6 +2226,34 @@
             break;
 #endif /* !NO_HMAC */
 
+#ifdef HAVE_LIBZ
+        case oidCompressType:
+            switch (id) {
+                case ZLIBc:
+                    oid = zlibCompress;
+                    *oidSz = sizeof(zlibCompress);
+                    break;
+            }
+            break;
+#endif /* HAVE_LIBZ */
+#ifdef WOLFSSL_APACHE_HTTPD
+        case oidCertNameType:
+            switch (id) {
+                 case NID_id_on_dnsSRV:
+                    oid = dnsSRVOid;
+                    *oidSz = sizeof(dnsSRVOid);
+                    break;
+            }
+            break;
+        case oidTlsExtType:
+            switch (id) {
+                case TLS_FEATURE_OID:
+                    oid = tlsFeatureOid;
+                    *oidSz = sizeof(tlsFeatureOid);
+                    break;
+            }
+            break;
+#endif /* WOLFSSL_APACHE_HTTPD */
         case oidIgnoreType:
         default:
             break;
@@ -1923,18 +2387,20 @@
  *         ASN_PARSE_E when length is invalid.
  *         Otherwise, 0 to indicate success.
  */
-static int GetASNObjectId(const byte* input, word32* inOutIdx, int* len,
+int GetASNObjectId(const byte* input, word32* inOutIdx, int* len,
                           word32 maxIdx)
 {
     word32 idx = *inOutIdx;
-    byte   b;
     int    length;
+    byte   tag;
 
     if ((idx + 1) > maxIdx)
         return BUFFER_E;
 
-    b = input[idx++];
-    if (b != ASN_OBJECT_ID)
+    if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_OBJECT_ID)
         return ASN_OBJECT_ID_E;
 
     if (GetLength(input, &idx, &length, maxIdx) < 0)
@@ -1951,7 +2417,7 @@
  * output      Buffer to write into.
  * returns the number of bytes added to the buffer.
  */
-static int SetObjectId(int len, byte* output)
+int SetObjectId(int len, byte* output)
 {
     int idx = 0;
 
@@ -2060,7 +2526,7 @@
     return 0;
 }
 
-WOLFSSL_LOCAL int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
+int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
                      word32 oidType, word32 maxIdx)
 {
     int    length;
@@ -2077,10 +2543,17 @@
         return ASN_OBJECT_ID_E;
 
     /* could have NULL tag and 0 terminator, but may not */
-    if (idx < maxIdx && input[idx] == ASN_TAG_NULL) {
-        ret = GetASNNull(input, &idx, maxIdx);
-        if (ret != 0)
-            return ret;
+    if (idx < maxIdx) {
+        word32 localIdx = idx; /*use localIdx to not advance when checking tag*/
+        byte   tag;
+
+        if (GetASNTag(input, &localIdx, &tag, maxIdx) == 0) {
+            if (tag == ASN_TAG_NULL) {
+                ret = GetASNNull(input, &idx, maxIdx);
+                if (ret != 0)
+                    return ret;
+            }
+        }
     }
 
     *inOutIdx = idx;
@@ -2109,10 +2582,19 @@
 
     if (GetInt(&key->n,  input, inOutIdx, inSz) < 0 ||
         GetInt(&key->e,  input, inOutIdx, inSz) < 0 ||
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
         GetInt(&key->d,  input, inOutIdx, inSz) < 0 ||
         GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
-        GetInt(&key->q,  input, inOutIdx, inSz) < 0)   return ASN_RSA_KEY_E;
-#if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)
+        GetInt(&key->q,  input, inOutIdx, inSz) < 0)
+#else
+        SkipInt(input, inOutIdx, inSz) < 0 ||
+        SkipInt(input, inOutIdx, inSz) < 0 ||
+        SkipInt(input, inOutIdx, inSz) < 0 )
+
+#endif
+            return ASN_RSA_KEY_E;
+#if (defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)) \
+    && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
     if (GetInt(&key->dP, input, inOutIdx, inSz) < 0 ||
         GetInt(&key->dQ, input, inOutIdx, inSz) < 0 ||
         GetInt(&key->u,  input, inOutIdx, inSz) < 0 )  return ASN_RSA_KEY_E;
@@ -2122,7 +2604,7 @@
         SkipInt(input, inOutIdx, inSz) < 0 )  return ASN_RSA_KEY_E;
 #endif
 
-#ifdef WOLFSSL_XILINX_CRYPT
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL)
     if (wc_InitRsaHw(key) != 0) {
         return BAD_STATE_E;
     }
@@ -2133,13 +2615,17 @@
 #endif /* HAVE_USER_RSA */
 #endif /* NO_RSA */
 
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+
 /* Remove PKCS8 header, place inOutIdx at beginning of traditional,
  * return traditional length on success, negative on error */
-int ToTraditionalInline(const byte* input, word32* inOutIdx, word32 sz)
-{
-    word32 idx, oid;
+int ToTraditionalInline_ex(const byte* input, word32* inOutIdx, word32 sz,
+                           word32* algId)
+{
+    word32 idx;
     int    version, length;
     int    ret;
+    byte   tag;
 
     if (input == NULL || inOutIdx == NULL)
         return BAD_FUNC_ARG;
@@ -2152,25 +2638,40 @@
     if (GetMyVersion(input, &idx, &version, sz) < 0)
         return ASN_PARSE_E;
 
-    if (GetAlgoId(input, &idx, &oid, oidKeyType, sz) < 0)
-        return ASN_PARSE_E;
-
-    if (input[idx] == ASN_OBJECT_ID) {
+    if (GetAlgoId(input, &idx, algId, oidKeyType, sz) < 0)
+        return ASN_PARSE_E;
+
+    if (GetASNTag(input, &idx, &tag, sz) < 0)
+        return ASN_PARSE_E;
+    idx = idx - 1; /* reset idx after finding tag */
+
+    if (tag == ASN_OBJECT_ID) {
         if (SkipObjectId(input, &idx, sz) < 0)
             return ASN_PARSE_E;
     }
 
     ret = GetOctetString(input, &idx, &length, sz);
-    if (ret < 0)
-        return ret;
+    if (ret < 0) {
+        if (ret == BUFFER_E)
+            return ASN_PARSE_E;
+        /* Some private keys don't expect an octet string */
+        WOLFSSL_MSG("Couldn't find Octet string");
+    }
 
     *inOutIdx = idx;
 
     return length;
 }
 
+int ToTraditionalInline(const byte* input, word32* inOutIdx, word32 sz)
+{
+    word32 oid;
+
+    return ToTraditionalInline_ex(input, inOutIdx, sz, &oid);
+}
+
 /* Remove PKCS8 header, move beginning of traditional to beginning of input */
-int ToTraditional(byte* input, word32 sz)
+int ToTraditional_ex(byte* input, word32 sz, word32* algId)
 {
     word32 inOutIdx = 0;
     int    length;
@@ -2178,7 +2679,7 @@
     if (input == NULL)
         return BAD_FUNC_ARG;
 
-    length = ToTraditionalInline(input, &inOutIdx, sz);
+    length = ToTraditionalInline_ex(input, &inOutIdx, sz, algId);
     if (length < 0)
         return length;
 
@@ -2187,6 +2688,16 @@
     return length;
 }
 
+int ToTraditional(byte* input, word32 sz)
+{
+    word32 oid;
+
+    return ToTraditional_ex(input, sz, &oid);
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS8
 
 /* find beginning of traditional key inside PKCS#8 unencrypted buffer
  * return traditional length on success, with inOutIdx at beginning of
@@ -2195,11 +2706,12 @@
 int wc_GetPkcs8TraditionalOffset(byte* input, word32* inOutIdx, word32 sz)
 {
     int length;
+    word32 algId;
 
     if (input == NULL || inOutIdx == NULL || (*inOutIdx > sz))
         return BAD_FUNC_ARG;
 
-    length = ToTraditionalInline(input, inOutIdx, sz);
+    length = ToTraditionalInline_ex(input, inOutIdx, sz, &algId);
 
     return length;
 }
@@ -2315,12 +2827,14 @@
         return tmpSz + sz;
 }
 
-
+#endif /* HAVE_PKCS8 */
+
+#if defined(HAVE_PKCS12) || !defined(NO_CHECK_PRIVATE_KEY)
 /* check that the private key is a pair for the public key in certificate
  * return 1 (true) on match
  * return 0 or negative value on failure/error
  *
- * key   : buffer holding DER fromat key
+ * key   : buffer holding DER format key
  * keySz : size of key buffer
  * der   : a initialized and parsed DecodedCert holding a certificate */
 int wc_CheckPrivateKey(byte* key, word32 keySz, DecodedCert* der)
@@ -2332,11 +2846,11 @@
         return BAD_FUNC_ARG;
     }
 
-    #if !defined(NO_RSA)
+    #if !defined(NO_RSA) && !defined(NO_ASN_CRYPT)
     /* test if RSA key */
     if (der->keyOID == RSAk) {
     #ifdef WOLFSSL_SMALL_STACK
-        RsaKey* a = NULL;
+        RsaKey* a;
         RsaKey* b = NULL;
     #else
         RsaKey a[1], b[1];
@@ -2400,12 +2914,12 @@
     #endif
     }
     else
-    #endif /* NO_RSA */
-
-    #ifdef HAVE_ECC
+    #endif /* !NO_RSA && !NO_ASN_CRYPT */
+
+    #if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT) && !defined(NO_ASN_CRYPT)
     if (der->keyOID == ECDSAk) {
     #ifdef WOLFSSL_SMALL_STACK
-        ecc_key* key_pair = NULL;
+        ecc_key* key_pair;
         byte*    privDer;
     #else
         ecc_key  key_pair[1];
@@ -2447,7 +2961,7 @@
                                             der->pubKeySize, key_pair);
                 }
 
-                /* public and private extracted successfuly now check if is
+                /* public and private extracted successfully now check if is
                  * a pair and also do sanity checks on key. wc_ecc_check_key
                  * checks that private * base generator equals pubkey */
                 if (ret == 0) {
@@ -2465,12 +2979,12 @@
     #endif
     }
     else
-    #endif /* HAVE_ECC */
-
-    #ifdef HAVE_ED25519
+    #endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT && !NO_ASN_CRYPT */
+
+    #if defined(HAVE_ED25519) && !defined(NO_ASN_CRYPT)
     if (der->keyOID == ED25519k) {
     #ifdef WOLFSSL_SMALL_STACK
-        ed25519_key* key_pair = NULL;
+        ed25519_key* key_pair;
     #else
         ed25519_key  key_pair[1];
     #endif
@@ -2495,7 +3009,7 @@
             keyIdx = 0;
             if ((ret = wc_ed25519_import_public(der->publicKey, der->pubKeySize,
                                                               key_pair)) == 0) {
-                /* public and private extracted successfuly no check if is
+                /* public and private extracted successfully no check if is
                  * a pair and also do sanity checks on key. wc_ecc_check_key
                  * checks that private * base generator equals pubkey */
                 if ((ret = wc_ed25519_check_key(key_pair)) == 0)
@@ -2508,7 +3022,50 @@
     #endif
     }
     else
-    #endif
+    #endif /* HAVE_ED25519 && !NO_ASN_CRYPT */
+
+    #if defined(HAVE_ED448) && !defined(NO_ASN_CRYPT)
+    if (der->keyOID == ED448k) {
+    #ifdef WOLFSSL_SMALL_STACK
+        ed448_key* key_pair = NULL;
+    #else
+        ed448_key  key_pair[1];
+    #endif
+        word32     keyIdx = 0;
+
+    #ifdef WOLFSSL_SMALL_STACK
+        key_pair = (ed448_key*)XMALLOC(sizeof(ed448_key), NULL,
+                                                            DYNAMIC_TYPE_ED448);
+        if (key_pair == NULL)
+            return MEMORY_E;
+    #endif
+
+        if ((ret = wc_ed448_init(key_pair)) < 0) {
+    #ifdef WOLFSSL_SMALL_STACK
+            XFREE(key_pair, NULL, DYNAMIC_TYPE_ED448);
+    #endif
+            return ret;
+        }
+        if ((ret = wc_Ed448PrivateKeyDecode(key, &keyIdx, key_pair,
+                                                                 keySz)) == 0) {
+            WOLFSSL_MSG("Checking ED448 key pair");
+            keyIdx = 0;
+            if ((ret = wc_ed448_import_public(der->publicKey, der->pubKeySize,
+                                                              key_pair)) == 0) {
+                /* public and private extracted successfully no check if is
+                 * a pair and also do sanity checks on key. wc_ecc_check_key
+                 * checks that private * base generator equals pubkey */
+                if ((ret = wc_ed448_check_key(key_pair)) == 0)
+                    ret = 1;
+            }
+        }
+        wc_ed448_free(key_pair);
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(key_pair, NULL, DYNAMIC_TYPE_ED448);
+    #endif
+    }
+    else
+    #endif /* HAVE_ED448 && !NO_ASN_CRYPT */
     {
         ret = 0;
     }
@@ -2518,14 +3075,18 @@
     return ret;
 }
 
+#endif /* HAVE_PKCS12 || !NO_CHECK_PRIVATE_KEY */
+
 #ifndef NO_PWDBASED
 
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
 /* Check To see if PKCS version algo is supported, set id if it is return 0
    < 0 on error */
-static int CheckAlgo(int first, int second, int* id, int* version)
+static int CheckAlgo(int first, int second, int* id, int* version, int* blockSz)
 {
     *id      = ALGO_ID_E;
     *version = PKCS5;   /* default */
+    if (blockSz) *blockSz = 8; /* default */
 
     if (first == 1) {
         switch (second) {
@@ -2540,6 +3101,12 @@
         case PBE_SHA1_DES3:
             *id = PBE_SHA1_DES3;
             *version = PKCS12v1;
+            if (blockSz) *blockSz = DES_BLOCK_SIZE;
+            return 0;
+        case PBE_SHA1_DES:
+            *id = PBE_SHA1_DES;
+            *version = PKCS12v1;
+            if (blockSz) *blockSz = DES_BLOCK_SIZE;
             return 0;
     #endif
 #endif /* !NO_SHA */
@@ -2561,11 +3128,13 @@
     #ifndef NO_MD5
     case 3:                   /* see RFC 2898 for ids */
         *id = PBE_MD5_DES;
+        if (blockSz) *blockSz = DES_BLOCK_SIZE;
         return 0;
     #endif
     #ifndef NO_SHA
     case 10:
         *id = PBE_SHA1_DES;
+        if (blockSz) *blockSz = DES_BLOCK_SIZE;
         return 0;
     #endif
 #endif /* !NO_DES3 */
@@ -2575,32 +3144,45 @@
     }
 }
 
-
 /* Check To see if PKCS v2 algo is supported, set id if it is return 0
    < 0 on error */
-static int CheckAlgoV2(int oid, int* id)
-{
+static int CheckAlgoV2(int oid, int* id, int* blockSz)
+{
+    if (blockSz) *blockSz = 8; /* default */
     (void)id; /* not used if AES and DES3 disabled */
     switch (oid) {
 #if !defined(NO_DES3) && !defined(NO_SHA)
     case DESb:
         *id = PBE_SHA1_DES;
+        if (blockSz) *blockSz = DES_BLOCK_SIZE;
         return 0;
     case DES3b:
         *id = PBE_SHA1_DES3;
+        if (blockSz) *blockSz = DES_BLOCK_SIZE;
         return 0;
 #endif
 #ifdef WOLFSSL_AES_256
     case AES256CBCb:
         *id = PBE_AES256_CBC;
+        if (blockSz) *blockSz = AES_BLOCK_SIZE;
+        return 0;
+#endif
+#ifdef WOLFSSL_AES_128
+    case AES128CBCb:
+        *id = PBE_AES128_CBC;
+        if (blockSz) *blockSz = AES_BLOCK_SIZE;
         return 0;
 #endif
     default:
+        WOLFSSL_MSG("No PKCS v2 algo found");
         return ALGO_ID_E;
 
     }
 }
 
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS8
 
 int wc_GetKeyOID(byte* key, word32 keySz, const byte** curveOID, word32* oidSz,
         int* algoID, void* heap)
@@ -2612,7 +3194,7 @@
 
     *algoID = 0;
 
-    #ifndef NO_RSA
+    #if !defined(NO_RSA) && !defined(NO_ASN_CRYPT)
     {
         RsaKey rsa;
 
@@ -2625,8 +3207,8 @@
         }
         wc_FreeRsaKey(&rsa);
     }
-    #endif /* NO_RSA */
-    #ifdef HAVE_ECC
+    #endif /* !NO_RSA && !NO_ASN_CRYPT */
+    #if defined(HAVE_ECC) && !defined(NO_ASN_CRYPT)
     if (*algoID == 0) {
         ecc_key ecc;
 
@@ -2647,8 +3229,8 @@
         }
         wc_ecc_free(&ecc);
     }
-#endif /* HAVE_ECC */
-#ifdef HAVE_ED25519
+#endif /* HAVE_ECC && !NO_ASN_CRYPT */
+#if defined(HAVE_ED25519) && !defined(NO_ASN_CRYPT)
     if (*algoID != RSAk && *algoID != ECDSAk) {
         ed25519_key ed25519;
 
@@ -2667,7 +3249,26 @@
             WOLFSSL_MSG("GetKeyOID wc_ed25519_init failed");
         }
     }
-#endif
+#endif /* HAVE_ED25519 && !NO_ASN_CRYPT */
+#if defined(HAVE_ED448) && !defined(NO_ASN_CRYPT)
+    if (*algoID != RSAk && *algoID != ECDSAk && *algoID != ED25519k) {
+        ed448_key ed448;
+
+        tmpIdx = 0;
+        if (wc_ed448_init(&ed448) == 0) {
+            if (wc_Ed448PrivateKeyDecode(key, &tmpIdx, &ed448, keySz) == 0) {
+                *algoID = ED448k;
+            }
+            else {
+                WOLFSSL_MSG("Not ED448 DER key");
+            }
+            wc_ed448_free(&ed448);
+        }
+        else {
+            WOLFSSL_MSG("GetKeyOID wc_ed448_init failed");
+        }
+    }
+#endif /* HAVE_ED448 && !NO_ASN_CRYPT */
 
     /* if flag is not set then is neither RSA or ECC key that could be
      * found */
@@ -2676,12 +3277,41 @@
         return BAD_FUNC_ARG;
     }
 
+    (void)tmpIdx;
     (void)curveOID;
     (void)oidSz;
+    (void)keySz;
+    (void)heap;
 
     return 1;
 }
 
+#endif /* HAVE_PKCS8 */
+
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+
+#define PKCS8_MIN_BLOCK_SIZE 8
+static int Pkcs8Pad(byte* buf, int sz, int blockSz)
+{
+    int i, padSz;
+
+    /* calculate pad size */
+    padSz = blockSz - (sz & (blockSz - 1));
+
+    /* pad with padSz value */
+    if (buf) {
+        for (i = 0; i < padSz; i++) {
+            buf[sz+i] = (byte)(padSz & 0xFF);
+        }
+    }
+
+    /* return adjusted length */
+    return sz + padSz;
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS8
 
 /*
  * Used when creating PKCS12 shrouded key bags
@@ -2693,7 +3323,7 @@
  * returns the size of encrypted data on success
  */
 int UnTraditionalEnc(byte* key, word32 keySz, byte* out, word32* outSz,
-        const char* password,int passwordSz, int vPKCS, int vAlgo,
+        const char* password, int passwordSz, int vPKCS, int vAlgo,
         byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap)
 {
     int algoID = 0;
@@ -2705,6 +3335,7 @@
     word32 totalSz = 0;
     int    version, id;
     int    ret;
+    int    blockSz = 0;
 
     const byte* curveOID = NULL;
     word32 oidSz   = 0;
@@ -2724,7 +3355,7 @@
 
 
     inOutIdx += MAX_SEQ_SZ; /* leave room for size of finished shroud */
-    if (CheckAlgo(vPKCS, vAlgo, &id, &version) < 0) {
+    if (CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz) < 0) {
         WOLFSSL_MSG("Bad/Unsupported algorithm ID");
         return ASN_INPUT_E;  /* Algo ID error */
     }
@@ -2739,7 +3370,7 @@
             return ASN_VERSION_E;
         }
 
-        if (salt == NULL || saltSz <= 0) {
+        if (salt == NULL || saltSz == 0) {
             saltSz = 8;
         #ifdef WOLFSSL_SMALL_STACK
             saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -2795,7 +3426,8 @@
 
     /* check key type and get OID if ECC */
     if ((ret = wc_GetKeyOID(key, keySz, &curveOID, &oidSz, &algoID, heap))< 0) {
-            return ret;
+        WOLFSSL_MSG("Error getting key OID");
+        return ret;
     }
 
     /* PKCS#8 wrapping around key */
@@ -2811,7 +3443,7 @@
     /* check if should return max size */
     if (out == NULL) {
         /* account for salt size */
-        if (salt == NULL || saltSz <= 0) {
+        if (salt == NULL || saltSz == 0) {
             tmpSz += MAX_SALT_SIZE;
         }
         else {
@@ -2824,7 +3456,8 @@
         return LENGTH_ONLY_E;
     }
 
-    tmp = (byte*)XMALLOC(tmpSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    /* reserve buffer for crypto and make sure it supports full blocks */
+    tmp = (byte*)XMALLOC(tmpSz + (blockSz-1), heap, DYNAMIC_TYPE_TMP_BUFFER);
     if (tmp == NULL) {
     #ifdef WOLFSSL_SMALL_STACK
         if (saltTmp != NULL)
@@ -2845,6 +3478,9 @@
     }
     tmpSz = ret;
 
+    /* adjust size to pad */
+    tmpSz = Pkcs8Pad(tmp, tmpSz, blockSz);
+
 #ifdef WOLFSSL_SMALL_STACK
     cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
     if (cbcIv == NULL) {
@@ -2857,7 +3493,7 @@
 
     /* encrypt PKCS#8 wrapped key */
     if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
-               tmp, tmpSz, version, cbcIv, 1)) < 0) {
+               tmp, tmpSz, version, cbcIv, 1, 0)) < 0) {
         XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
         WOLFSSL_MSG("Error encrypting key");
     #ifdef WOLFSSL_SMALL_STACK
@@ -2888,265 +3524,86 @@
     XMEMCPY(out + inOutIdx, tmp, tmpSz);
     XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
 
-    /* set total size at begining */
+    /* set total size at beginning */
     sz = SetSequence(totalSz, out);
     XMEMMOVE(out + sz, out + MAX_SEQ_SZ, totalSz);
 
+    (void)rng;
+
     return totalSz + sz;
 }
 
-
-/* Remove Encrypted PKCS8 header, move beginning of traditional to beginning
-   of input */
-int ToTraditionalEnc(byte* input, word32 sz,const char* password,int passwordSz)
-{
-    word32 inOutIdx = 0, seqEnd, oid;
-    int    ret = 0, first, second, length = 0, version, saltSz, id;
-    int    iterations = 0, keySz = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    byte*  salt = NULL;
-    byte*  cbcIv = NULL;
-#else
-    byte   salt[MAX_SALT_SIZE];
-    byte   cbcIv[MAX_IV_SIZE];
-#endif
-
-    if (passwordSz < 0) {
-        WOLFSSL_MSG("Bad password size");
-        return BAD_FUNC_ARG;
-    }
-
-    if (GetSequence(input, &inOutIdx, &length, sz) < 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-
-    if (GetAlgoId(input, &inOutIdx, &oid, oidIgnoreType, sz) < 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-
-    first  = input[inOutIdx - 2];   /* PKCS version always 2nd to last byte */
-    second = input[inOutIdx - 1];   /* version.algo, algo id last byte */
-
-    if (CheckAlgo(first, second, &id, &version) < 0) {
-        ERROR_OUT(ASN_INPUT_E, exit_tte); /* Algo ID error */
-    }
-
-    if (version == PKCS5v2) {
-        if (GetSequence(input, &inOutIdx, &length, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-
-        if (GetAlgoId(input, &inOutIdx, &oid, oidKdfType, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-
-        if (oid != PBKDF2_OID) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-    }
-
-    if (GetSequence(input, &inOutIdx, &length, sz) <= 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-    /* Find the end of this SEQUENCE so we can check for the OPTIONAL and
-     * DEFAULT items. */
-    seqEnd = inOutIdx + length;
-
-    ret = GetOctetString(input, &inOutIdx, &saltSz, sz);
-    if (ret < 0)
-        goto exit_tte;
-
-    if (saltSz > MAX_SALT_SIZE) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    salt = (byte*)XMALLOC(MAX_SALT_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (salt == NULL) {
-        ERROR_OUT(MEMORY_E, exit_tte);
-    }
-#endif
-
-    XMEMCPY(salt, &input[inOutIdx], saltSz);
-    inOutIdx += saltSz;
-
-    if (GetShortInt(input, &inOutIdx, &iterations, sz) < 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-
-    /* OPTIONAL key length */
-    if (seqEnd > inOutIdx && input[inOutIdx] == ASN_INTEGER) {
-        if (GetShortInt(input, &inOutIdx, &keySz, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-    }
-
-    /* DEFAULT HMAC is SHA-1 */
-    if (seqEnd > inOutIdx) {
-        if (GetAlgoId(input, &inOutIdx, &oid, oidHmacType, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (cbcIv == NULL) {
-        ERROR_OUT(MEMORY_E, exit_tte);
-    }
-#endif
-
-    if (version == PKCS5v2) {
-        /* get encryption algo */
-        if (GetAlgoId(input, &inOutIdx, &oid, oidBlkType, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-
-        if (CheckAlgoV2(oid, &id) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte); /* PKCS v2 algo id error */
-        }
-
-        ret = GetOctetString(input, &inOutIdx, &length, sz);
-        if (ret < 0)
-            goto exit_tte;
-
-        if (length > MAX_IV_SIZE) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-
-        XMEMCPY(cbcIv, &input[inOutIdx], length);
-        inOutIdx += length;
-    }
-
-    ret = GetOctetString(input, &inOutIdx, &length, sz);
-    if (ret < 0)
-        goto exit_tte;
-
-    ret = wc_CryptKey(password, passwordSz, salt, saltSz, iterations, id,
-                   input + inOutIdx, length, version, cbcIv, 0);
-
-exit_tte:
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(salt,  NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    if (ret == 0) {
-        XMEMMOVE(input, input + inOutIdx, length);
-        ret = ToTraditional(input, length);
-    }
-
-    return ret;
-}
-
-
-/* encrypt PKCS 12 content
- *
- * NOTE: if out is NULL then outSz is set with the total buffer size needed and
- *       the error value LENGTH_ONLY_E is returned.
- *
- * input      data to encrypt
- * inputSz    size of input buffer
- * out        buffer to hold the result
- * outSz      size of out buffer
- * password   password if used. Can be NULL for no password
- * passwordSz size of password buffer
- * vPKCS      version of PKCS i.e. PKCS5v2
- * vAlgo      algorithm version
- * salt       buffer holding salt if used. If NULL then a random salt is created
- * saltSz     size of salt buffer if it is not NULL
- * itt        number of iterations used
- * rng        random number generator to use
- * heap       possible heap hint for mallocs/frees
- *
- * returns the total size of encrypted content on success.
+static int GetAlgoV2(int encAlgId, const byte** oid, int *len, int* id,
+                     int *blkSz)
+{
+    int ret = 0;
+
+    switch (encAlgId) {
+#if !defined(NO_DES3) && !defined(NO_SHA)
+    case DESb:
+        *len = sizeof(blkDesCbcOid);
+        *oid = blkDesCbcOid;
+        *id = PBE_SHA1_DES;
+        *blkSz = 8;
+        break;
+    case DES3b:
+        *len = sizeof(blkDes3CbcOid);
+        *oid = blkDes3CbcOid;
+        *id = PBE_SHA1_DES3;
+        *blkSz = 8;
+        break;
+#endif
+#if defined(WOLFSSL_AES_256) && defined(HAVE_AES_CBC)
+    case AES256CBCb:
+        *len = sizeof(blkAes256CbcOid);
+        *oid = blkAes256CbcOid;
+        *id = PBE_AES256_CBC;
+        *blkSz = 16;
+        break;
+#endif
+    default:
+        (void)len;
+        (void)oid;
+        (void)id;
+        (void)blkSz;
+        ret = ALGO_ID_E;
+    }
+
+    return ret;
+}
+
+/* Converts Encrypted PKCS#8 to 'traditional' (i.e. PKCS#8 removed from
+ * decrypted key.)
  */
-int EncryptContent(byte* input, word32 inputSz, byte* out, word32* outSz,
+int TraditionalEnc(byte* key, word32 keySz, byte* out, word32* outSz,
         const char* password, int passwordSz, int vPKCS, int vAlgo,
-        byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap)
-{
-    word32 sz;
-    word32 inOutIdx = 0;
-    word32 tmpIdx   = 0;
-    word32 totalSz  = 0;
-    word32 seqSz;
-    int    ret;
-    int    version, id;
-#ifdef WOLFSSL_SMALL_STACK
-    byte*  saltTmp = NULL;
-    byte*  cbcIv   = NULL;
-#else
-    byte   saltTmp[MAX_SALT_SIZE];
-    byte   cbcIv[MAX_IV_SIZE];
-#endif
-
-    (void)heap;
-
-    WOLFSSL_ENTER("EncryptContent()");
-
-    if (CheckAlgo(vPKCS, vAlgo, &id, &version) < 0)
-        return ASN_INPUT_E;  /* Algo ID error */
-
-    if (version == PKCS5v2) {
-        WOLFSSL_MSG("PKCS#5 version 2 not supported yet");
-        return BAD_FUNC_ARG;
-    }
-
-    if (saltSz > MAX_SALT_SIZE)
-        return ASN_PARSE_E;
-
-    if (outSz == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    if (out == NULL) {
-        sz = inputSz;
-        switch (id) {
-        #if !defined(NO_DES3) && (!defined(NO_MD5) || !defined(NO_SHA))
-            case PBE_MD5_DES:
-            case PBE_SHA1_DES:
-            case PBE_SHA1_DES3:
-                /* set to block size of 8 for DES operations. This rounds up
-                 * to the nearset multiple of 8 */
-                sz &= 0xfffffff8;
-                sz += 8;
-                break;
-        #endif /* !NO_DES3 && (!NO_MD5 || !NO_SHA) */
-        #if !defined(NO_RC4) && !defined(NO_SHA)
-            case PBE_SHA1_RC4_128:
-                break;
-        #endif
-            case -1:
-                break;
-
-            default:
-                return ALGO_ID_E;
-        }
-
-        if (saltSz <= 0) {
-            sz += MAX_SALT_SIZE;
-        }
-        else {
-            sz += saltSz;
-        }
-
-        /* add 2 for tags */
-        *outSz = sz + MAX_ALGO_SZ + MAX_SEQ_SZ + MAX_LENGTH_SZ +
-            MAX_LENGTH_SZ + MAX_LENGTH_SZ + MAX_SHORT_SZ + 2;
-
-        return LENGTH_ONLY_E;
-    }
-
-    if (inOutIdx + MAX_ALGO_SZ + MAX_SEQ_SZ + 1 > *outSz)
-        return BUFFER_E;
-
-    sz = SetAlgoID(id, out + inOutIdx, oidPBEType, 0);
-    inOutIdx += sz; totalSz += sz;
-    tmpIdx = inOutIdx;
-    tmpIdx += MAX_SEQ_SZ; /* save room for salt and itter sequence */
-    out[tmpIdx++] = ASN_OCTET_STRING;
-
+        int encAlgId, byte* salt, word32 saltSz, int itt, WC_RNG* rng,
+        void* heap)
+{
+    int ret = 0;
+    int version, blockSz, id;
+    word32 idx = 0, encIdx;
+#ifdef WOLFSSL_SMALL_STACK
+    byte* saltTmp = NULL;
+#else
+    byte saltTmp[MAX_SALT_SIZE];
+#endif
+    byte cbcIv[MAX_IV_SIZE];
+    byte *pkcs8Key = NULL;
+    word32 pkcs8KeySz = 0, padSz = 0;
+    int algId = 0;
+    const byte* curveOid = NULL;
+    word32 curveOidSz = 0;
+    const byte* pbeOid = NULL;
+    word32 pbeOidSz = 0;
+    const byte* encOid = NULL;
+    int encOidSz = 0;
+    word32 pbeLen = 0, kdfLen = 0, encLen = 0;
+    word32 innerLen = 0, outerLen;
+
+    ret = CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz);
     /* create random salt if one not provided */
-    if (salt == NULL || saltSz <= 0) {
+    if (ret == 0 && (salt == NULL || saltSz == 0)) {
         saltSz = 8;
     #ifdef WOLFSSL_SMALL_STACK
         saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -3164,71 +3621,142 @@
         }
     }
 
-    if (tmpIdx + MAX_LENGTH_SZ + saltSz + MAX_SHORT_SZ > *outSz) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-        return BUFFER_E;
-    }
-
-    sz = SetLength(saltSz, out + tmpIdx);
-    tmpIdx += sz;
-
-    XMEMCPY(out + tmpIdx, salt, saltSz);
-    tmpIdx += saltSz;
-
-    /* place itteration setting in buffer */
-    ret = SetShortInt(out, &tmpIdx, itt, *outSz);
-    if (ret < 0) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-        return ret;
-    }
-
-    /* rewind and place sequence */
-    sz = tmpIdx - inOutIdx - MAX_SEQ_SZ;
-    seqSz = SetSequence(sz, out + inOutIdx);
-    XMEMMOVE(out + inOutIdx + seqSz, out + inOutIdx + MAX_SEQ_SZ, sz);
-    inOutIdx += seqSz; totalSz += seqSz;
-    inOutIdx += sz; totalSz += sz;
-
-#ifdef WOLFSSL_SMALL_STACK
-    cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (cbcIv == NULL) {
+    if (ret == 0) {
+        /* check key type and get OID if ECC */
+        ret = wc_GetKeyOID(key, keySz, &curveOid, &curveOidSz, &algId, heap);
+        if (ret == 1)
+            ret = 0;
+    }
+    if (ret == 0) {
+        ret = wc_CreatePKCS8Key(NULL, &pkcs8KeySz, key, keySz, algId, curveOid,
+                                                                    curveOidSz);
+        if (ret == LENGTH_ONLY_E)
+            ret = 0;
+    }
+    if (ret == 0) {
+        pkcs8Key = (byte*)XMALLOC(pkcs8KeySz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (pkcs8Key == NULL)
+            ret = MEMORY_E;
+    }
+    if (ret == 0) {
+        ret = wc_CreatePKCS8Key(pkcs8Key, &pkcs8KeySz, key, keySz, algId,
+                                                          curveOid, curveOidSz);
+        if (ret >= 0) {
+            pkcs8KeySz = ret;
+            ret = 0;
+        }
+    }
+
+    if (ret == 0 && version == PKCS5v2)
+        ret = GetAlgoV2(encAlgId, &encOid, &encOidSz, &id, &blockSz);
+
+    if (ret == 0) {
+        padSz = (blockSz - (pkcs8KeySz & (blockSz - 1))) & (blockSz - 1);
+        /* inner = OCT salt INT itt */
+        innerLen = 2 + saltSz + 2 + (itt < 256 ? 1 : 2);
+
+        if (version != PKCS5v2) {
+            pbeOid = OidFromId(id, oidPBEType, &pbeOidSz);
+            /* pbe = OBJ pbse1 SEQ [ inner ] */
+            pbeLen = 2 + pbeOidSz + 2 + innerLen;
+        }
+        else {
+            pbeOid = pbes2;
+            pbeOidSz = sizeof(pbes2);
+            /* kdf = OBJ pbkdf2 [ SEQ innerLen ] */
+            kdfLen = 2 + sizeof(pbkdf2Oid) + 2 + innerLen;
+            /* enc = OBJ enc_alg OCT iv */
+            encLen = 2 + encOidSz + 2 + blockSz;
+            /* pbe = OBJ pbse2 SEQ [ SEQ [ kdf ] SEQ [ enc ] ] */
+            pbeLen = 2 + sizeof(pbes2) + 2 + 2 + kdfLen + 2 + encLen;
+
+            ret = wc_RNG_GenerateBlock(rng, cbcIv, blockSz);
+        }
+    }
+    if (ret == 0) {
+        /* outer = SEQ [ pbe ] OCT encrypted_PKCS#8_key */
+        outerLen = 2 + pbeLen;
+        outerLen += SetOctetString(pkcs8KeySz + padSz, out);
+        outerLen += pkcs8KeySz + padSz;
+
+        idx += SetSequence(outerLen, out + idx);
+
+        encIdx = idx + outerLen - pkcs8KeySz - padSz;
+        /* Put Encrypted content in place. */
+        XMEMCPY(out + encIdx, pkcs8Key, pkcs8KeySz);
+        if (padSz > 0) {
+            XMEMSET(out + encIdx + pkcs8KeySz, padSz, padSz);
+            pkcs8KeySz += padSz;
+        }
+        ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
+                                out + encIdx, pkcs8KeySz, version, cbcIv, 1, 0);
+    }
+    if (ret == 0) {
+        if (version != PKCS5v2) {
+            /* PBE algorithm */
+            idx += SetSequence(pbeLen, out + idx);
+            idx += SetObjectId(pbeOidSz, out + idx);
+            XMEMCPY(out + idx, pbeOid, pbeOidSz);
+            idx += pbeOidSz;
+        }
+        else {
+            /* PBES2 algorithm identifier */
+            idx += SetSequence(pbeLen, out + idx);
+            idx += SetObjectId(pbeOidSz, out + idx);
+            XMEMCPY(out + idx, pbeOid, pbeOidSz);
+            idx += pbeOidSz;
+            /* PBES2 Parameters: SEQ [ kdf ] SEQ [ enc ] */
+            idx += SetSequence(2 + kdfLen + 2 + encLen, out + idx);
+            /* KDF Algorithm Identifier */
+            idx += SetSequence(kdfLen, out + idx);
+            idx += SetObjectId(sizeof(pbkdf2Oid), out + idx);
+            XMEMCPY(out + idx, pbkdf2Oid, sizeof(pbkdf2Oid));
+            idx += sizeof(pbkdf2Oid);
+        }
+        idx += SetSequence(innerLen, out + idx);
+        idx += SetOctetString(saltSz, out + idx);
+        XMEMCPY(out + idx, salt, saltSz); idx += saltSz;
+        ret = SetShortInt(out, &idx, itt, *outSz);
+        if (ret > 0)
+            ret = 0;
+    }
+    if (ret == 0) {
+        if (version == PKCS5v2) {
+            /* Encryption Algorithm Identifier */
+            idx += SetSequence(encLen, out + idx);
+            idx += SetObjectId(encOidSz, out + idx);
+            XMEMCPY(out + idx, encOid, encOidSz);
+            idx += encOidSz;
+            /* Encryption Algorithm Parameter: CBC IV */
+            idx += SetOctetString(blockSz, out + idx);
+            XMEMCPY(out + idx, cbcIv, blockSz);
+            idx += blockSz;
+        }
+        idx += SetOctetString(pkcs8KeySz, out + idx);
+        /* Default PRF - no need to write out OID */
+        idx += pkcs8KeySz;
+
+        ret = idx;
+    }
+
+    if (pkcs8Key != NULL) {
+        ForceZero(pkcs8Key, pkcs8KeySz);
+        XFREE(pkcs8Key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (saltTmp != NULL) {
         XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-
-    if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
-                   input, inputSz, version, cbcIv, 1)) < 0) {
-
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(cbcIv,   heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-        return ret;  /* encrypt failure */
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(cbcIv,   heap, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    if (inOutIdx + 1 + MAX_LENGTH_SZ + inputSz > *outSz)
-        return BUFFER_E;
-
-    out[inOutIdx++] = ASN_LONG_LENGTH; totalSz++;
-    sz = SetLength(inputSz, out + inOutIdx);
-    inOutIdx += sz; totalSz += sz;
-    XMEMCPY(out + inOutIdx, input, inputSz);
-    totalSz += inputSz;
-
-    return totalSz;
-}
-
-
+    }
+#endif
+
+    (void)rng;
+
+    return ret;
+}
+
+#endif /* HAVE_PKCS8 */
+
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
 /* decrypt PKCS
  *
  * NOTE: input buffer is overwritten with decrypted data!
@@ -3240,11 +3768,10 @@
  *
  * returns the total size of decrypted content on success.
  */
-int DecryptContent(byte* input, word32 sz,const char* password,int passwordSz)
-{
-    word32 inOutIdx = 0, seqEnd, oid;
-    int    ret = 0;
-    int    first, second, length = 0, version, saltSz, id;
+int DecryptContent(byte* input, word32 sz, const char* password, int passwordSz)
+{
+    word32 inOutIdx = 0, seqEnd, oid, shaOid = 0;
+    int    ret = 0, first, second, length = 0, version, saltSz, id;
     int    iterations = 0, keySz = 0;
 #ifdef WOLFSSL_SMALL_STACK
     byte*  salt = NULL;
@@ -3253,6 +3780,12 @@
     byte   salt[MAX_SALT_SIZE];
     byte   cbcIv[MAX_IV_SIZE];
 #endif
+    byte   tag;
+
+    if (passwordSz < 0) {
+        WOLFSSL_MSG("Bad password size");
+        return BAD_FUNC_ARG;
+    }
 
     if (GetAlgoId(input, &inOutIdx, &oid, oidIgnoreType, sz) < 0) {
         ERROR_OUT(ASN_PARSE_E, exit_dc);
@@ -3261,7 +3794,7 @@
     first  = input[inOutIdx - 2];   /* PKCS version always 2nd to last byte */
     second = input[inOutIdx - 1];   /* version.algo, algo id last byte */
 
-    if (CheckAlgo(first, second, &id, &version) < 0) {
+    if (CheckAlgo(first, second, &id, &version, NULL) < 0) {
         ERROR_OUT(ASN_INPUT_E, exit_dc); /* Algo ID error */
     }
 
@@ -3309,8 +3842,15 @@
     }
 
     /* OPTIONAL key length */
-    if (seqEnd > inOutIdx && input[inOutIdx] == ASN_INTEGER) {
-        if (GetShortInt(input, &inOutIdx, &keySz, sz) < 0) {
+    if (seqEnd > inOutIdx) {
+        word32 localIdx = inOutIdx;
+
+        if (GetASNTag(input, &localIdx, &tag, sz) < 0) {
+            ERROR_OUT(ASN_PARSE_E, exit_dc);
+        }
+
+        if (tag == ASN_INTEGER &&
+                GetShortInt(input, &inOutIdx, &keySz, sz) < 0) {
             ERROR_OUT(ASN_PARSE_E, exit_dc);
         }
     }
@@ -3320,6 +3860,8 @@
         if (GetAlgoId(input, &inOutIdx, &oid, oidHmacType, sz) < 0) {
             ERROR_OUT(ASN_PARSE_E, exit_dc);
         }
+
+        shaOid = oid;
     }
 
 #ifdef WOLFSSL_SMALL_STACK
@@ -3335,10 +3877,13 @@
             ERROR_OUT(ASN_PARSE_E, exit_dc);
         }
 
-        if (CheckAlgoV2(oid, &id) < 0) {
+        if (CheckAlgoV2(oid, &id, NULL) < 0) {
             ERROR_OUT(ASN_PARSE_E, exit_dc); /* PKCS v2 algo id error */
         }
 
+        if (shaOid == 0)
+            shaOid = oid;
+
         ret = GetOctetString(input, &inOutIdx, &length, sz);
         if (ret < 0)
             goto exit_dc;
@@ -3351,7 +3896,11 @@
         inOutIdx += length;
     }
 
-    if (input[inOutIdx++] != (ASN_CONTEXT_SPECIFIC | 0)) {
+    if (GetASNTag(input, &inOutIdx, &tag, sz) < 0) {
+        ERROR_OUT(ASN_PARSE_E, exit_dc);
+    }
+
+    if (tag != (ASN_CONTEXT_SPECIFIC | 0) && tag != ASN_OCTET_STRING) {
         ERROR_OUT(ASN_PARSE_E, exit_dc);
     }
 
@@ -3360,10 +3909,9 @@
     }
 
     ret = wc_CryptKey(password, passwordSz, salt, saltSz, iterations, id,
-                   input + inOutIdx, length, version, cbcIv, 0);
+                   input + inOutIdx, length, version, cbcIv, 0, shaOid);
 
 exit_dc:
-
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(salt,  NULL, DYNAMIC_TYPE_TMP_BUFFER);
     XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -3376,28 +3924,265 @@
 
     return ret;
 }
+
+
+/* Remove Encrypted PKCS8 header, move beginning of traditional to beginning
+   of input */
+int ToTraditionalEnc(byte* input, word32 sz,const char* password,
+                     int passwordSz, word32* algId)
+{
+    int ret, length;
+    word32 inOutIdx = 0;
+
+    if (GetSequence(input, &inOutIdx, &length, sz) < 0) {
+        ret = ASN_PARSE_E;
+    }
+    else {
+        ret = DecryptContent(input + inOutIdx, sz - inOutIdx, password,
+                passwordSz);
+        if (ret > 0) {
+            XMEMMOVE(input, input + inOutIdx, ret);
+            ret = ToTraditional_ex(input, ret, algId);
+        }
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS12
+
+/* encrypt PKCS 12 content
+ *
+ * NOTE: if out is NULL then outSz is set with the total buffer size needed and
+ *       the error value LENGTH_ONLY_E is returned.
+ *
+ * input      data to encrypt
+ * inputSz    size of input buffer
+ * out        buffer to hold the result
+ * outSz      size of out buffer
+ * password   password if used. Can be NULL for no password
+ * passwordSz size of password buffer
+ * vPKCS      version of PKCS i.e. PKCS5v2
+ * vAlgo      algorithm version
+ * salt       buffer holding salt if used. If NULL then a random salt is created
+ * saltSz     size of salt buffer if it is not NULL
+ * itt        number of iterations used
+ * rng        random number generator to use
+ * heap       possible heap hint for mallocs/frees
+ *
+ * returns the total size of encrypted content on success.
+ *
+ * data returned is :
+ * [ seq - obj [ seq -salt,itt]] , construct with encrypted data
+ */
+int EncryptContent(byte* input, word32 inputSz, byte* out, word32* outSz,
+        const char* password, int passwordSz, int vPKCS, int vAlgo,
+        byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap)
+{
+    word32 sz;
+    word32 inOutIdx = 0;
+    word32 tmpIdx   = 0;
+    word32 totalSz  = 0;
+    word32 seqSz;
+    word32 innerSz;
+    int    ret;
+    int    version, id, blockSz = 0;
+#ifdef WOLFSSL_SMALL_STACK
+    byte*  saltTmp = NULL;
+    byte*  cbcIv   = NULL;
+#else
+    byte   saltTmp[MAX_SALT_SIZE];
+    byte   cbcIv[MAX_IV_SIZE];
+#endif
+    byte   seq[MAX_SEQ_SZ];
+    byte   shr[MAX_SHORT_SZ];
+    word32 maxShr = MAX_SHORT_SZ;
+    word32 algoSz;
+    const  byte* algoName;
+
+    (void)heap;
+
+    WOLFSSL_ENTER("EncryptContent()");
+
+    if (CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz) < 0)
+        return ASN_INPUT_E;  /* Algo ID error */
+
+    if (version == PKCS5v2) {
+        WOLFSSL_MSG("PKCS#5 version 2 not supported yet");
+        return BAD_FUNC_ARG;
+    }
+
+    if (saltSz > MAX_SALT_SIZE)
+        return ASN_PARSE_E;
+
+    if (outSz == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* calculate size */
+    /* size of constructed string at end */
+    sz = Pkcs8Pad(NULL, inputSz, blockSz);
+    totalSz  = ASN_TAG_SZ;
+    totalSz += SetLength(sz, seq);
+    totalSz += sz;
+
+    /* size of sequence holding object id and sub sequence of salt and itt */
+    algoName = OidFromId(id, oidPBEType, &algoSz);
+    if (algoName == NULL) {
+        WOLFSSL_MSG("Unknown Algorithm");
+        return 0;
+    }
+    innerSz = SetObjectId(algoSz, seq);
+    innerSz += algoSz;
+
+    /* get subsequence of salt and itt */
+    if (salt == NULL || saltSz == 0) {
+        sz = 8;
+    }
+    else {
+        sz = saltSz;
+    }
+    seqSz  = SetOctetString(sz, seq);
+    seqSz += sz;
+
+    tmpIdx = 0;
+    seqSz += SetShortInt(shr, &tmpIdx, itt, maxShr);
+    innerSz += seqSz + SetSequence(seqSz, seq);
+    totalSz += innerSz + SetSequence(innerSz, seq);
+
+    if (out == NULL) {
+        *outSz = totalSz;
+        return LENGTH_ONLY_E;
+    }
+
+    inOutIdx = 0;
+    if (totalSz > *outSz)
+        return BUFFER_E;
+
+    inOutIdx += SetSequence(innerSz, out + inOutIdx);
+    inOutIdx += SetObjectId(algoSz, out + inOutIdx);
+    XMEMCPY(out + inOutIdx, algoName, algoSz);
+    inOutIdx += algoSz;
+    inOutIdx += SetSequence(seqSz, out + inOutIdx);
+
+    /* create random salt if one not provided */
+    if (salt == NULL || saltSz == 0) {
+        saltSz = 8;
+    #ifdef WOLFSSL_SMALL_STACK
+        saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (saltTmp == NULL)
+            return MEMORY_E;
+    #endif
+        salt = saltTmp;
+
+        if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) {
+            WOLFSSL_MSG("Error generating random salt");
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            return ret;
+        }
+    }
+    inOutIdx += SetOctetString(saltSz, out + inOutIdx);
+    if (saltSz + inOutIdx > *outSz) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return BUFFER_E;
+    }
+    XMEMCPY(out + inOutIdx, salt, saltSz);
+    inOutIdx += saltSz;
+
+    /* place iteration setting in buffer */
+    ret = SetShortInt(out, &inOutIdx, itt, *outSz);
+    if (ret < 0) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return ret;
+    }
+
+    if (inOutIdx + 1 > *outSz) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return BUFFER_E;
+    }
+    out[inOutIdx++] = ASN_CONTEXT_SPECIFIC | 0;
+
+    /* get pad size and verify buffer room */
+    sz = Pkcs8Pad(NULL, inputSz, blockSz);
+    if (sz + inOutIdx > *outSz) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return BUFFER_E;
+    }
+    inOutIdx += SetLength(sz, out + inOutIdx);
+
+    /* copy input to output buffer and pad end */
+    XMEMCPY(out + inOutIdx, input, inputSz);
+    sz = Pkcs8Pad(out + inOutIdx, inputSz, blockSz);
+#ifdef WOLFSSL_SMALL_STACK
+    cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (cbcIv == NULL) {
+        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return MEMORY_E;
+    }
+#endif
+
+    /* encrypt */
+    if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
+                   out + inOutIdx, sz, version, cbcIv, 1, 0)) < 0) {
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(cbcIv,   heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return ret;  /* encrypt failure */
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(cbcIv,   heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    (void)rng;
+
+    return inOutIdx + sz;
+}
+
+
+#endif /* HAVE_PKCS12 */
 #endif /* NO_PWDBASED */
 
 #ifndef NO_RSA
 
 #ifndef HAVE_USER_RSA
-int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
-                       word32 inSz)
-{
-    int  length;
+#ifdef WOLFSSL_RENESAS_TSIP
+/* This function is to retrieve key position information in a cert.*
+ * The information will be used to call TSIP TLS-linked API for    *
+ * certificate verification.                                       */
+static int RsaPublicKeyDecodeRawIndex(const byte* input, word32* inOutIdx,
+                                      word32 inSz, word32* key_n,
+                                      word32* key_n_len, word32* key_e,
+                                      word32* key_e_len)
+{
+
+    int ret = 0;
+    int length = 0;
 #if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
     byte b;
 #endif
-    int ret;
-
-    if (input == NULL || inOutIdx == NULL || key == NULL)
+
+    if (input == NULL || inOutIdx == NULL)
         return BAD_FUNC_ARG;
 
     if (GetSequence(input, inOutIdx, &length, inSz) < 0)
         return ASN_PARSE_E;
 
-    key->type = RSA_PUBLIC;
-
 #if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
     if ((*inOutIdx + 1) > inSz)
         return BUFFER_E;
@@ -3431,20 +4216,124 @@
     }
 #endif /* OPENSSL_EXTRA */
 
-    if (GetInt(&key->n,  input, inOutIdx, inSz) < 0)
+    /* Get modulus */
+    ret = GetASNInt(input, inOutIdx, &length, inSz);
+    *key_n += *inOutIdx;
+    if (ret < 0) {
         return ASN_RSA_KEY_E;
-    if (GetInt(&key->e,  input, inOutIdx, inSz) < 0) {
-        mp_clear(&key->n);
+    }
+    if (key_n_len)
+        *key_n_len = length;
+    *inOutIdx += length;
+
+    /* Get exponent */
+    ret = GetASNInt(input, inOutIdx, &length, inSz);
+    *key_e += *inOutIdx;
+    if (ret < 0) {
         return ASN_RSA_KEY_E;
     }
-
-#ifdef WOLFSSL_XILINX_CRYPT
-    if (wc_InitRsaHw(key) != 0) {
-        return BAD_STATE_E;
-    }
-#endif
-
-    return 0;
+    if (key_e_len)
+        *key_e_len = length;
+
+    return ret;
+}
+#endif /* WOLFSSL_RENESAS_TSIP */
+
+int wc_RsaPublicKeyDecode_ex(const byte* input, word32* inOutIdx, word32 inSz,
+    const byte** n, word32* nSz, const byte** e, word32* eSz)
+{
+    int ret = 0;
+    int length = 0;
+#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
+    word32 localIdx;
+    byte   tag;
+#endif
+
+    if (input == NULL || inOutIdx == NULL)
+        return BAD_FUNC_ARG;
+
+    if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+        return ASN_PARSE_E;
+
+#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
+    localIdx = *inOutIdx;
+    if (GetASNTag(input, &localIdx, &tag, inSz) < 0)
+        return BUFFER_E;
+
+    if (tag != ASN_INTEGER) {
+        /* not from decoded cert, will have algo id, skip past */
+        if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (SkipObjectId(input, inOutIdx, inSz) < 0)
+            return ASN_PARSE_E;
+
+        /* Option NULL ASN.1 tag */
+        if (*inOutIdx  >= inSz) {
+            return BUFFER_E;
+        }
+
+        localIdx = *inOutIdx;
+        if (GetASNTag(input, &localIdx, &tag, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (tag == ASN_TAG_NULL) {
+            ret = GetASNNull(input, inOutIdx, inSz);
+            if (ret != 0)
+                return ret;
+        }
+
+        /* should have bit tag length and seq next */
+        ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+        if (ret != 0)
+            return ret;
+
+        if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+    }
+#endif /* OPENSSL_EXTRA */
+
+    /* Get modulus */
+    ret = GetASNInt(input, inOutIdx, &length, inSz);
+    if (ret < 0) {
+        return ASN_RSA_KEY_E;
+    }
+    if (nSz)
+        *nSz = length;
+    if (n)
+        *n = &input[*inOutIdx];
+    *inOutIdx += length;
+
+    /* Get exponent */
+    ret = GetASNInt(input, inOutIdx, &length, inSz);
+    if (ret < 0) {
+        return ASN_RSA_KEY_E;
+    }
+    if (eSz)
+        *eSz = length;
+    if (e)
+        *e = &input[*inOutIdx];
+    *inOutIdx += length;
+
+    return ret;
+}
+
+int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
+                       word32 inSz)
+{
+    int ret;
+    const byte *n = NULL, *e = NULL;
+    word32 nSz = 0, eSz = 0;
+
+    if (key == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_RsaPublicKeyDecode_ex(input, inOutIdx, inSz, &n, &nSz, &e, &eSz);
+    if (ret == 0) {
+        ret = wc_RsaPublicKeyDecodeRaw(n, nSz, e, eSz, key);
+    }
+
+    return ret;
 }
 
 /* import RSA public key elements (n, e) into RsaKey structure (key) */
@@ -3463,6 +4352,12 @@
         mp_clear(&key->n);
         return ASN_GETINT_E;
     }
+#ifdef HAVE_WOLF_BIGINT
+    if ((int)nSz > 0 && wc_bigint_from_unsigned_bin(&key->n.raw, n, nSz) != 0) {
+        mp_clear(&key->n);
+        return ASN_GETINT_E;
+    }
+#endif /* HAVE_WOLF_BIGINT */
 
     if (mp_init(&key->e) != MP_OKAY) {
         mp_clear(&key->n);
@@ -3474,6 +4369,13 @@
         mp_clear(&key->e);
         return ASN_GETINT_E;
     }
+#ifdef HAVE_WOLF_BIGINT
+    if ((int)eSz > 0 && wc_bigint_from_unsigned_bin(&key->e.raw, e, eSz) != 0) {
+        mp_clear(&key->n);
+        mp_clear(&key->e);
+        return ASN_GETINT_E;
+    }
+#endif /* HAVE_WOLF_BIGINT */
 
 #ifdef WOLFSSL_XILINX_CRYPT
     if (wc_InitRsaHw(key) != 0) {
@@ -3484,23 +4386,86 @@
     return 0;
 }
 #endif /* HAVE_USER_RSA */
-#endif
+#endif /* !NO_RSA */
 
 #ifndef NO_DH
 
 int wc_DhKeyDecode(const byte* input, word32* inOutIdx, DhKey* key, word32 inSz)
 {
-    int    length;
+    int ret = 0;
+    int length;
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    word32 oid = 0, temp = 0;
+    #endif
+
+    WOLFSSL_ENTER("wc_DhKeyDecode");
+
+    if (inOutIdx == NULL)
+        return BAD_FUNC_ARG;
 
     if (GetSequence(input, inOutIdx, &length, inSz) < 0)
         return ASN_PARSE_E;
 
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    temp = *inOutIdx;
+    #endif
+
+    /* Assume input started after 1.2.840.113549.1.3.1 dhKeyAgreement */
     if (GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
         GetInt(&key->g,  input, inOutIdx, inSz) < 0) {
-        return ASN_DH_KEY_E;
-    }
-
-    return 0;
+        ret = ASN_DH_KEY_E;
+    }
+
+    #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    /* If ASN_DH_KEY_E: Check if input started at beginning of key */
+    if (ret == ASN_DH_KEY_E) {
+        /* rewind back to after the first sequence */
+        *inOutIdx = temp;
+        if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+
+        /* Check for dhKeyAgreement */
+        ret = GetObjectId(input, inOutIdx, &oid, oidKeyType, inSz);
+        if (oid != DHk || ret < 0)
+            return ASN_DH_KEY_E;
+
+        if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->g,  input, inOutIdx, inSz) < 0) {
+            return ASN_DH_KEY_E;
+        }
+    }
+
+    temp = *inOutIdx;
+    ret = (CheckBitString(input, inOutIdx, &length, inSz, 0, NULL) == 0);
+    if (ret > 0) {
+        /* Found Bit String */
+        if (GetInt(&key->pub, input, inOutIdx, inSz) == 0) {
+            WOLFSSL_MSG("Found Public Key");
+            ret = 0;
+        }
+    } else {
+        *inOutIdx = temp;
+        ret = (GetOctetString(input, inOutIdx, &length, inSz) >= 0);
+        if (ret > 0) {
+            /* Found Octet String */
+            if (GetInt(&key->priv, input, inOutIdx, inSz) == 0) {
+                WOLFSSL_MSG("Found Private Key");
+                ret = 0;
+            }
+        } else {
+            /* Don't use length from failed CheckBitString/GetOctetString */
+            *inOutIdx = temp;
+            ret = 0;
+        }
+    }
+    #endif /* WOLFSSL_QT || OPENSSL_ALL */
+
+    WOLFSSL_MSG("wc_DhKeyDecode Success");
+
+    return ret;
 }
 
 
@@ -3541,7 +4506,6 @@
 
     return 0;
 }
-
 #endif /* NO_DH */
 
 
@@ -3551,10 +4515,11 @@
                         word32 inSz)
 {
     int    length;
-
-    if (input == NULL || inOutIdx == NULL || key == NULL) {
-        return BAD_FUNC_ARG;
-    }
+    int    ret = 0;
+    word32 oid;
+
+    if (input == NULL || inOutIdx == NULL || key == NULL)
+        return BAD_FUNC_ARG;
 
     if (GetSequence(input, inOutIdx, &length, inSz) < 0)
         return ASN_PARSE_E;
@@ -3563,17 +4528,42 @@
         GetInt(&key->q,  input, inOutIdx, inSz) < 0 ||
         GetInt(&key->g,  input, inOutIdx, inSz) < 0 ||
         GetInt(&key->y,  input, inOutIdx, inSz) < 0 )
-        return ASN_DH_KEY_E;
+        ret = ASN_DH_KEY_E;
+
+    if (ret != 0) {
+        if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+
+        ret = GetObjectId(input, inOutIdx, &oid, oidIgnoreType, inSz);
+        if (ret != 0)
+            return ret;
+
+        if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->q,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->g,  input, inOutIdx, inSz) < 0)
+            return ASN_DH_KEY_E;
+
+        if (CheckBitString(input, inOutIdx, &length, inSz, 0, NULL) < 0)
+            return ASN_PARSE_E;
+
+        if (GetInt(&key->y,  input, inOutIdx, inSz) < 0 )
+            return ASN_DH_KEY_E;
+
+        ret = 0;
+    }
 
     key->type = DSA_PUBLIC;
-    return 0;
+    return ret;
 }
 
 
 int DsaPrivateKeyDecode(const byte* input, word32* inOutIdx, DsaKey* key,
                         word32 inSz)
 {
-    int    length, version;
+    int    length, version, ret = 0, temp = 0;
 
     /* Sanity checks on input */
     if (input == NULL || inOutIdx == NULL || key == NULL) {
@@ -3583,15 +4573,63 @@
     if (GetSequence(input, inOutIdx, &length, inSz) < 0)
         return ASN_PARSE_E;
 
-    if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
-        return ASN_PARSE_E;
-
-    if (GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
-        GetInt(&key->q,  input, inOutIdx, inSz) < 0 ||
-        GetInt(&key->g,  input, inOutIdx, inSz) < 0 ||
-        GetInt(&key->y,  input, inOutIdx, inSz) < 0 ||
-        GetInt(&key->x,  input, inOutIdx, inSz) < 0 )
-        return ASN_DH_KEY_E;
+    temp = (int)*inOutIdx;
+
+    /* Default case expects a certificate with OctetString but no version ID */
+    ret = GetInt(&key->p, input, inOutIdx, inSz);
+    if (ret < 0) {
+        mp_clear(&key->p);
+        ret = ASN_PARSE_E;
+    }
+    else {
+        ret = GetInt(&key->q, input, inOutIdx, inSz);
+        if (ret < 0) {
+            mp_clear(&key->p);
+            mp_clear(&key->q);
+            ret = ASN_PARSE_E;
+        }
+        else {
+            ret = GetInt(&key->g, input, inOutIdx, inSz);
+            if (ret < 0) {
+                mp_clear(&key->p);
+                mp_clear(&key->q);
+                mp_clear(&key->g);
+                ret = ASN_PARSE_E;
+            }
+            else {
+                ret = GetOctetString(input, inOutIdx, &length, inSz);
+                if (ret < 0) {
+                    mp_clear(&key->p);
+                    mp_clear(&key->q);
+                    mp_clear(&key->g);
+                    ret = ASN_PARSE_E;
+                }
+                else {
+                    ret = GetInt(&key->y, input, inOutIdx, inSz);
+                    if (ret < 0) {
+                        mp_clear(&key->p);
+                        mp_clear(&key->q);
+                        mp_clear(&key->g);
+                        mp_clear(&key->y);
+                        ret = ASN_PARSE_E;
+                    }
+                }
+            }
+        }
+    }
+    /* An alternate pass if default certificate fails parsing */
+    if (ret == ASN_PARSE_E) {
+        *inOutIdx = temp;
+        if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->q,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->g,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->y,  input, inOutIdx, inSz) < 0 ||
+            GetInt(&key->x,  input, inOutIdx, inSz) < 0 )
+            return ASN_DH_KEY_E;
+    }
 
     key->type = DSA_PRIVATE;
     return 0;
@@ -3624,8 +4662,199 @@
     (void)heap;
 }
 
-/* Convert DsaKey key to DER format, write to output (inLen), return bytes
- written */
+#if !defined(HAVE_SELFTEST) && defined(WOLFSSL_KEY_GEN)
+/* Write a public DSA key to output */
+int wc_SetDsaPublicKey(byte* output, DsaKey* key,
+                           int outLen, int with_header)
+{
+    /* p, g, q = DSA params, y = public exponent */
+#ifdef WOLFSSL_SMALL_STACK
+    byte* p = NULL;
+    byte* g = NULL;
+    byte* q = NULL;
+    byte* y = NULL;
+#else
+    byte p[MAX_DSA_INT_SZ];
+    byte g[MAX_DSA_INT_SZ];
+    byte q[MAX_DSA_INT_SZ];
+    byte y[MAX_DSA_INT_SZ];
+#endif
+    byte innerSeq[MAX_SEQ_SZ];
+    byte outerSeq[MAX_SEQ_SZ];
+    byte bitString[1 + MAX_LENGTH_SZ + 1];
+    int  idx, pSz, gSz, qSz, ySz, innerSeqSz, outerSeqSz, bitStringSz = 0;
+
+    WOLFSSL_ENTER("wc_SetDsaPublicKey");
+
+    if (output == NULL || key == NULL || outLen < MAX_SEQ_SZ) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* p */
+#ifdef WOLFSSL_SMALL_STACK
+    p = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (p == NULL)
+        return MEMORY_E;
+#endif
+    if ((pSz = SetASNIntMP(&key->p, MAX_DSA_INT_SZ, p)) < 0) {
+        WOLFSSL_MSG("SetASNIntMP Error with p");
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return pSz;
+    }
+
+    /* q */
+#ifdef WOLFSSL_SMALL_STACK
+    q = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (q == NULL)
+        return MEMORY_E;
+#endif
+    if ((qSz = SetASNIntMP(&key->q, MAX_DSA_INT_SZ, q)) < 0) {
+        WOLFSSL_MSG("SetASNIntMP Error with q");
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return qSz;
+    }
+
+    /* g */
+#ifdef WOLFSSL_SMALL_STACK
+    g = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (g == NULL)
+        return MEMORY_E;
+#endif
+    if ((gSz = SetASNIntMP(&key->g, MAX_DSA_INT_SZ, g)) < 0) {
+        WOLFSSL_MSG("SetASNIntMP Error with g");
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return gSz;
+    }
+
+    /* y */
+#ifdef WOLFSSL_SMALL_STACK
+    y = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (y == NULL)
+        return MEMORY_E;
+#endif
+    if ((ySz = SetASNIntMP(&key->y, MAX_DSA_INT_SZ, y)) < 0) {
+        WOLFSSL_MSG("SetASNIntMP Error with y");
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return ySz;
+    }
+
+    innerSeqSz  = SetSequence(pSz + qSz + gSz, innerSeq);
+
+    /* check output size */
+    if ((innerSeqSz + pSz + qSz + gSz) > outLen) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(p,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(q,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(g,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(y,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        WOLFSSL_MSG("Error, output size smaller than outlen");
+        return BUFFER_E;
+    }
+
+    if (with_header) {
+        int algoSz;
+#ifdef WOLFSSL_SMALL_STACK
+        byte* algo = NULL;
+
+        algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (algo == NULL) {
+            XFREE(p,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(q,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(g,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(y,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            return MEMORY_E;
+        }
+#else
+        byte algo[MAX_ALGO_SZ];
+#endif
+        algoSz = SetAlgoID(DSAk, algo, oidKeyType, 0);
+        bitStringSz  = SetBitString(ySz, 0, bitString);
+        outerSeqSz = SetSequence(algoSz + innerSeqSz + pSz + qSz + gSz,
+                                                                      outerSeq);
+
+        idx = SetSequence(algoSz + innerSeqSz + pSz + qSz + gSz + bitStringSz +
+                                                      ySz + outerSeqSz, output);
+
+        /* check output size */
+        if ((idx + algoSz + bitStringSz + innerSeqSz + pSz + qSz + gSz + ySz) >
+                                                                       outLen) {
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(p,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(q,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(g,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(y,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+            WOLFSSL_MSG("Error, output size smaller than outlen");
+            return BUFFER_E;
+        }
+
+        /* outerSeq */
+        XMEMCPY(output + idx, outerSeq, outerSeqSz);
+        idx += outerSeqSz;
+        /* algo */
+        XMEMCPY(output + idx, algo, algoSz);
+        idx += algoSz;
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+    } else {
+        idx = 0;
+    }
+
+    /* innerSeq */
+    XMEMCPY(output + idx, innerSeq, innerSeqSz);
+    idx += innerSeqSz;
+    /* p */
+    XMEMCPY(output + idx, p, pSz);
+    idx += pSz;
+    /* q */
+    XMEMCPY(output + idx, q, qSz);
+    idx += qSz;
+    /* g */
+    XMEMCPY(output + idx, g, gSz);
+    idx += gSz;
+    /* bit string */
+    XMEMCPY(output + idx, bitString, bitStringSz);
+    idx += bitStringSz;
+    /* y */
+    XMEMCPY(output + idx, y, ySz);
+    idx += ySz;
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(p,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(q,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(g,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(y,    key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+    return idx;
+}
+
+/* Convert DSA Public key to DER format, write to output (inLen), return bytes
+   written */
+int wc_DsaKeyToPublicDer(DsaKey* key, byte* output, word32 inLen)
+{
+    return wc_SetDsaPublicKey(output, key, inLen, 1);
+}
+#endif /* !HAVE_SELFTEST && WOLFSSL_KEY_GEN */
+
+/* Convert private DsaKey key to DER format, write to output (inLen),
+   return bytes written */
 int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen)
 {
     word32 seqSz, verSz, rawLen, intTotalLen = 0;
@@ -3675,8 +4904,10 @@
     seqSz = SetSequence(verSz + intTotalLen, seq);
 
     outLen = seqSz + verSz + intTotalLen;
-    if (outLen > (int)inLen)
-        return BAD_FUNC_ARG;
+    if (outLen > (int)inLen) {
+        FreeTmpDsas(tmps, key->heap);
+        return BAD_FUNC_ARG;
+    }
 
     /* write to output */
     XMEMCPY(output, seq, seqSz);
@@ -3695,8 +4926,8 @@
 
 #endif /* NO_DSA */
 
-
-void InitDecodedCert(DecodedCert* cert, byte* source, word32 inSz, void* heap)
+void InitDecodedCert(DecodedCert* cert,
+                     const byte* source, word32 inSz, void* heap)
 {
     if (cert != NULL) {
         XMEMSET(cert, 0, sizeof(DecodedCert));
@@ -3707,6 +4938,7 @@
         cert->source          = source;  /* don't own */
         cert->maxIdx          = inSz;    /* can't go over this index */
         cert->heap            = heap;
+        cert->maxPathLen      = WOLFSSL_MAX_PATH_LEN;
     #ifdef WOLFSSL_CERT_GEN
         cert->subjectSNEnc    = CTC_UTF8;
         cert->subjectCEnc     = CTC_PRINTABLE;
@@ -3716,7 +4948,9 @@
         cert->subjectOUEnc    = CTC_UTF8;
     #endif /* WOLFSSL_CERT_GEN */
 
+    #ifndef NO_CERTS
         InitSignatureCtx(&cert->sigCtx, heap, INVALID_DEVID);
+    #endif
     }
 }
 
@@ -3753,10 +4987,12 @@
 
 void FreeDecodedCert(DecodedCert* cert)
 {
+    if (cert == NULL)
+        return;
     if (cert->subjectCNStored == 1)
         XFREE(cert->subjectCN, cert->heap, DYNAMIC_TYPE_SUBJECT_CN);
     if (cert->pubKeyStored == 1)
-        XFREE(cert->publicKey, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+        XFREE((void*)cert->publicKey, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY);
     if (cert->weOwnAltNames && cert->altNames)
         FreeAltNames(cert->altNames, cert->heap);
 #ifndef IGNORE_NAME_CONSTRAINTS
@@ -3778,7 +5014,13 @@
     if (cert->subjectName.fullName != NULL)
         XFREE(cert->subjectName.fullName, cert->heap, DYNAMIC_TYPE_X509);
 #endif /* OPENSSL_EXTRA */
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    if (cert->tsip_encRsaKeyIdx != NULL)
+        XFREE(cert->tsip_encRsaKeyIdx, cert->heap, DYNAMIC_TYPE_RSA);
+#endif
+#ifndef NO_CERTS
     FreeSignatureCtx(&cert->sigCtx);
+#endif
 }
 
 static int GetCertHeader(DecodedCert* cert)
@@ -3788,18 +5030,23 @@
     if (GetSequence(cert->source, &cert->srcIdx, &len, cert->maxIdx) < 0)
         return ASN_PARSE_E;
 
+    /* Reset the max index for the size indicated in the outer wrapper. */
+    cert->maxIdx = len + cert->srcIdx;
     cert->certBegin = cert->srcIdx;
 
     if (GetSequence(cert->source, &cert->srcIdx, &len, cert->maxIdx) < 0)
         return ASN_PARSE_E;
+
     cert->sigIndex = len + cert->srcIdx;
+    if (cert->sigIndex > cert->maxIdx)
+        return ASN_PARSE_E;
 
     if (GetExplicitVersion(cert->source, &cert->srcIdx, &cert->version,
-                                                              cert->maxIdx) < 0)
+                                                            cert->sigIndex) < 0)
         return ASN_PARSE_E;
 
     if (GetSerialNumber(cert->source, &cert->srcIdx, cert->serial,
-                                        &cert->serialSz, cert->maxIdx) < 0)
+                                           &cert->serialSz, cert->sigIndex) < 0)
         return ASN_PARSE_E;
 
     return ret;
@@ -3807,12 +5054,12 @@
 
 #if !defined(NO_RSA)
 /* Store Rsa Key, may save later, Dsa could use in future */
-static int StoreRsaKey(DecodedCert* cert)
+static int StoreRsaKey(DecodedCert* cert, word32 bitStringEnd)
 {
     int    length;
     word32 recvd = cert->srcIdx;
 
-    if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
+    if (GetSequence(cert->source, &cert->srcIdx, &length, bitStringEnd) < 0)
         return ASN_PARSE_E;
 
     recvd = cert->srcIdx - recvd;
@@ -3820,7 +5067,9 @@
 
     while (recvd--)
        cert->srcIdx--;
-
+#if defined(WOLFSSL_RENESAS_TSIP)
+    cert->sigCtx.pubkey_n_start = cert->sigCtx.pubkey_e_start = cert->srcIdx;
+#endif
     cert->pubKeySize = length;
     cert->publicKey = cert->source + cert->srcIdx;
     cert->srcIdx += length;
@@ -3838,7 +5087,7 @@
         word32 oidSz = 0;
 
         ret = wc_ecc_get_oid(oid, NULL, &oidSz);
-        if (ret < 0 || oidSz <= 0) {
+        if (ret < 0 || oidSz == 0) {
             WOLFSSL_MSG("CheckCurve not found");
             ret = ALGO_ID_E;
         }
@@ -3851,6 +5100,9 @@
 static int GetKey(DecodedCert* cert)
 {
     int length;
+#if !defined(NO_DSA) && defined(WOLFSSL_QT)
+    int tmpLen;
+#endif
 #if defined(HAVE_ECC) || defined(HAVE_NTRU)
     int tmpIdx = cert->srcIdx;
 #endif
@@ -3858,6 +5110,10 @@
     if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
         return ASN_PARSE_E;
 
+#if !defined(NO_DSA) && defined(WOLFSSL_QT)
+    tmpLen = length + 4;
+#endif
+
     if (GetAlgoId(cert->source, &cert->srcIdx,
                   &cert->keyOID, oidKeyType, cert->maxIdx) < 0)
         return ASN_PARSE_E;
@@ -3867,12 +5123,20 @@
         case RSAk:
         {
             int ret;
-            ret = CheckBitString(cert->source, &cert->srcIdx, NULL,
+
+            ret = CheckBitString(cert->source, &cert->srcIdx, &length,
                                  cert->maxIdx, 1, NULL);
             if (ret != 0)
                 return ret;
 
-            return StoreRsaKey(cert);
+            #ifdef HAVE_OCSP
+                ret = CalcHashId(cert->source + cert->srcIdx, length,
+                        cert->subjectKeyHash);
+                if (ret != 0)
+                    return ret;
+            #endif
+
+            return StoreRsaKey(cert, cert->srcIdx + length);
         }
 
     #endif /* NO_RSA */
@@ -3884,6 +5148,7 @@
             word16      keyLen;
             word32      rc;
             word32      remaining = cert->maxIdx - cert->srcIdx;
+            byte*       publicKey;
 #ifdef WOLFSSL_SMALL_STACK
             byte*       keyBlob = NULL;
 #else
@@ -3921,15 +5186,16 @@
 
             cert->srcIdx = tmpIdx + (int)(next - key);
 
-            cert->publicKey = (byte*)XMALLOC(keyLen, cert->heap,
-                                             DYNAMIC_TYPE_PUBLIC_KEY);
-            if (cert->publicKey == NULL) {
+            publicKey = (byte*)XMALLOC(keyLen, cert->heap,
+                                       DYNAMIC_TYPE_PUBLIC_KEY);
+            if (publicKey == NULL) {
 #ifdef WOLFSSL_SMALL_STACK
                 XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
                 return MEMORY_E;
             }
-            XMEMCPY(cert->publicKey, keyBlob, keyLen);
+            XMEMCPY(publicKey, keyBlob, keyLen);
+            cert->publicKey = publicKey;
             cert->pubKeyStored = 1;
             cert->pubKeySize   = keyLen;
 
@@ -3946,9 +5212,15 @@
             int ret;
             byte seq[5];
             int pubLen = length + 1 + SetLength(length, seq);
-
-            if (cert->source[cert->srcIdx] !=
-                                             (ASN_SEQUENCE | ASN_CONSTRUCTED)) {
+            word32 localIdx;
+            byte* publicKey;
+            byte  tag;
+
+            localIdx = cert->srcIdx;
+            if (GetASNTag(cert->source, &localIdx, &tag, cert->maxIdx) < 0)
+                return ASN_PARSE_E;
+
+            if (tag != (ASN_SEQUENCE | ASN_CONSTRUCTED)) {
                 if (GetObjectId(cert->source, &cert->srcIdx,
                             &cert->pkCurveOID, oidCurveType, cert->maxIdx) < 0)
                     return ASN_PARSE_E;
@@ -3961,13 +5233,20 @@
                                                          cert->maxIdx, 1, NULL);
                 if (ret != 0)
                     return ret;
-            }
-
-            cert->publicKey = (byte*)XMALLOC(pubLen, cert->heap,
-                                             DYNAMIC_TYPE_PUBLIC_KEY);
-            if (cert->publicKey == NULL)
+            #ifdef HAVE_OCSP
+                ret = CalcHashId(cert->source + cert->srcIdx, length,
+                        cert->subjectKeyHash);
+                if (ret != 0)
+                    return ret;
+            #endif
+            }
+
+            publicKey = (byte*)XMALLOC(pubLen, cert->heap,
+                                       DYNAMIC_TYPE_PUBLIC_KEY);
+            if (publicKey == NULL)
                 return MEMORY_E;
-            XMEMCPY(cert->publicKey, &cert->source[tmpIdx], pubLen);
+            XMEMCPY(publicKey, &cert->source[tmpIdx], pubLen);
+            cert->publicKey = publicKey;
             cert->pubKeyStored = 1;
             cert->pubKeySize   = pubLen;
 
@@ -3979,6 +5258,7 @@
     #ifdef HAVE_ED25519
         case ED25519k:
         {
+            byte* publicKey;
             int ret;
 
             cert->pkCurveOID = ED25519k;
@@ -3988,11 +5268,19 @@
             if (ret != 0)
                 return ret;
 
-            cert->publicKey = (byte*) XMALLOC(length, cert->heap,
-                                              DYNAMIC_TYPE_PUBLIC_KEY);
-            if (cert->publicKey == NULL)
+            #ifdef HAVE_OCSP
+                ret = CalcHashId(cert->source + cert->srcIdx, length,
+                        cert->subjectKeyHash);
+                if (ret != 0)
+                    return ret;
+            #endif
+
+            publicKey = (byte*) XMALLOC(length, cert->heap,
+                                        DYNAMIC_TYPE_PUBLIC_KEY);
+            if (publicKey == NULL)
                 return MEMORY_E;
-            XMEMCPY(cert->publicKey, &cert->source[cert->srcIdx], length);
+            XMEMCPY(publicKey, &cert->source[cert->srcIdx], length);
+            cert->publicKey = publicKey;
             cert->pubKeyStored = 1;
             cert->pubKeySize   = length;
 
@@ -4001,24 +5289,237 @@
             return 0;
         }
     #endif /* HAVE_ED25519 */
+    #ifdef HAVE_ED448
+        case ED448k:
+        {
+            byte* publicKey;
+            int ret;
+
+            cert->pkCurveOID = ED448k;
+
+            ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+                                 cert->maxIdx, 1, NULL);
+            if (ret != 0)
+                return ret;
+
+            #ifdef HAVE_OCSP
+                ret = CalcHashId(cert->source + cert->srcIdx, length,
+                                 cert->subjectKeyHash);
+                if (ret != 0)
+                    return ret;
+            #endif
+
+            publicKey = (byte*) XMALLOC(length, cert->heap,
+                                        DYNAMIC_TYPE_PUBLIC_KEY);
+            if (publicKey == NULL)
+                return MEMORY_E;
+            XMEMCPY(publicKey, &cert->source[cert->srcIdx], length);
+            cert->publicKey = publicKey;
+            cert->pubKeyStored = 1;
+            cert->pubKeySize   = length;
+
+            cert->srcIdx += length;
+
+            return 0;
+        }
+    #endif /* HAVE_ED448 */
+    #if !defined(NO_DSA) && defined(WOLFSSL_QT)
+        case DSAk:
+        {
+            int ret;
+            ret = GetSequence(cert->source, &cert->srcIdx, &length,
+                           cert->maxIdx);
+            if (ret < 0)
+                return ret;
+
+            ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx);
+            if (ret != 0)
+                return ret;
+            ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx);
+            if (ret != 0)
+                return ret;
+            ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx);
+            if (ret != 0)
+                return ret;
+
+            ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+                                                         cert->maxIdx, 1, NULL);
+            if (ret != 0)
+                return ret;
+
+            ret = GetASNInt(cert->source, &cert->srcIdx, &length, cert->maxIdx);
+            if (ret !=0)
+                return ASN_PARSE_E;
+
+            cert->publicKey = cert->source + tmpIdx;
+            cert->pubKeySize = tmpLen;
+            cert->srcIdx += length;
+            return 0;
+        }
+    #endif /* NO_DSA && QT */
         default:
             return ASN_UNKNOWN_OID_E;
     }
 }
 
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+#if defined(HAVE_ECC)
+/* Converts ECC curve enum values in ecc_curve_id to the associated OpenSSL NID
+    value */
+WOLFSSL_API int EccEnumToNID(int n)
+{
+    WOLFSSL_ENTER("EccEnumToNID()");
+
+    switch(n) {
+        case ECC_SECP192R1:
+            return NID_X9_62_prime192v1;
+        case ECC_PRIME192V2:
+            return NID_X9_62_prime192v2;
+        case ECC_PRIME192V3:
+            return NID_X9_62_prime192v3;
+        case ECC_PRIME239V1:
+            return NID_X9_62_prime239v1;
+        case ECC_PRIME239V2:
+            return NID_X9_62_prime239v2;
+        case ECC_PRIME239V3:
+            return NID_X9_62_prime239v3;
+        case ECC_SECP256R1:
+            return NID_X9_62_prime256v1;
+        case ECC_SECP112R1:
+            return NID_secp112r1;
+        case ECC_SECP112R2:
+            return NID_secp112r2;
+        case ECC_SECP128R1:
+            return NID_secp128r1;
+        case ECC_SECP128R2:
+            return NID_secp128r2;
+        case ECC_SECP160R1:
+            return NID_secp160r1;
+        case ECC_SECP160R2:
+            return NID_secp160r2;
+        case ECC_SECP224R1:
+            return NID_secp224r1;
+        case ECC_SECP384R1:
+            return NID_secp384r1;
+        case ECC_SECP521R1:
+            return NID_secp521r1;
+        case ECC_SECP160K1:
+            return NID_secp160k1;
+        case ECC_SECP192K1:
+            return NID_secp192k1;
+        case ECC_SECP224K1:
+            return NID_secp224k1;
+        case ECC_SECP256K1:
+            return NID_secp256k1;
+        case ECC_BRAINPOOLP160R1:
+            return NID_brainpoolP160r1;
+        case ECC_BRAINPOOLP192R1:
+            return NID_brainpoolP192r1;
+        case ECC_BRAINPOOLP224R1:
+            return NID_brainpoolP224r1;
+        case ECC_BRAINPOOLP256R1:
+            return NID_brainpoolP256r1;
+        case ECC_BRAINPOOLP320R1:
+            return NID_brainpoolP320r1;
+        case ECC_BRAINPOOLP384R1:
+            return NID_brainpoolP384r1;
+        case ECC_BRAINPOOLP512R1:
+            return NID_brainpoolP512r1;
+        default:
+            WOLFSSL_MSG("NID not found");
+            return -1;
+    }
+}
+#endif /* HAVE_ECC */
+#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
+
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+int wc_OBJ_sn2nid(const char *sn)
+{
+    const struct {
+        const char *sn;
+        int  nid;
+    } sn2nid[] = {
+        {WOLFSSL_COMMON_NAME, NID_commonName},
+        {WOLFSSL_COUNTRY_NAME, NID_countryName},
+        {WOLFSSL_LOCALITY_NAME, NID_localityName},
+        {WOLFSSL_STATE_NAME, NID_stateOrProvinceName},
+        {WOLFSSL_ORG_NAME, NID_organizationName},
+        {WOLFSSL_ORGUNIT_NAME, NID_organizationalUnitName},
+        {WOLFSSL_EMAIL_ADDR, NID_emailAddress},
+        {NULL, -1}};
+
+    int i;
+    #ifdef HAVE_ECC
+    int eccEnum;
+    #endif
+    WOLFSSL_ENTER("OBJ_sn2nid");
+    for(i=0; sn2nid[i].sn != NULL; i++) {
+        if(XSTRNCMP(sn, sn2nid[i].sn, XSTRLEN(sn2nid[i].sn)) == 0) {
+            return sn2nid[i].nid;
+        }
+    }
+    #ifdef HAVE_ECC
+    /* Nginx uses this OpenSSL string. */
+    if (XSTRNCMP(sn, "prime256v1", 10) == 0)
+        sn = "SECP256R1";
+    if (XSTRNCMP(sn, "secp384r1", 10) == 0)
+        sn = "SECP384R1";
+    /* find based on name and return NID */
+    for (i = 0; ecc_sets[i].size != 0 && ecc_sets[i].name != NULL; i++) {
+        if (XSTRNCMP(sn, ecc_sets[i].name, ECC_MAXNAME) == 0) {
+            eccEnum = ecc_sets[i].id;
+            /* Convert enum value in ecc_curve_id to OpenSSL NID */
+            return EccEnumToNID(eccEnum);
+        }
+    }
+    #endif
+
+    return NID_undef;
+}
+#endif
+
+/* Routine for calculating hashId */
+int CalcHashId(const byte* data, word32 len, byte* hash)
+{
+    int ret;
+
+#ifdef WOLF_CRYPTO_CB
+    /* try to use a registered crypto callback */
+    ret = wc_CryptoCb_Sha256Hash(NULL, data, len, hash);
+    if (ret != CRYPTOCB_UNAVAILABLE)
+        return ret;
+    /* fall-through when unavailable */
+#endif
+
+#if defined(NO_SHA) && !defined(NO_SHA256)
+    ret = wc_Sha256Hash(data, len, hash);
+#elif !defined(NO_SHA)
+    ret = wc_ShaHash(data, len, hash);
+#else
+    ret = NOT_COMPILED_IN;
+#endif
+
+    return ret;
+}
+
 /* process NAME, either issuer or subject */
-static int GetName(DecodedCert* cert, int nameType)
+static int GetName(DecodedCert* cert, int nameType, int maxIdx)
 {
     int    length;  /* length of all distinguished names */
     int    dummy;
     int    ret;
     char*  full;
     byte*  hash;
-    word32 idx;
+    word32 idx, localIdx = 0;
+    byte   tag;
     #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
         DecodedName* dName =
                   (nameType == ISSUER) ? &cert->issuerName : &cert->subjectName;
         int dcnum = 0;
+        #ifdef OPENSSL_EXTRA
+        int count = 0;
+        #endif
     #endif /* OPENSSL_EXTRA */
 
     WOLFSSL_MSG("Getting Cert Name");
@@ -4032,14 +5533,19 @@
         hash = cert->subjectHash;
     }
 
-    if (cert->srcIdx >= cert->maxIdx) {
-        return BUFFER_E;
-    }
-
-    if (cert->source[cert->srcIdx] == ASN_OBJECT_ID) {
+    if (cert->srcIdx >= (word32)maxIdx) {
+        return BUFFER_E;
+    }
+
+    localIdx = cert->srcIdx;
+    if (GetASNTag(cert->source, &localIdx, &tag, maxIdx) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (tag == ASN_OBJECT_ID) {
         WOLFSSL_MSG("Trying optional prefix...");
 
-        if (SkipObjectId(cert->source, &cert->srcIdx, cert->maxIdx) < 0)
+        if (SkipObjectId(cert->source, &cert->srcIdx, maxIdx) < 0)
             return ASN_PARSE_E;
         WOLFSSL_MSG("Got optional prefix");
     }
@@ -4048,21 +5554,17 @@
      * calculated over the entire DER encoding of the Name field, including
      * the tag and length. */
     idx = cert->srcIdx;
-    if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
-        return ASN_PARSE_E;
-
-#ifdef NO_SHA
-    ret = wc_Sha256Hash(&cert->source[idx], length + cert->srcIdx - idx, hash);
-#else
-    ret = wc_ShaHash(&cert->source[idx], length + cert->srcIdx - idx, hash);
-#endif
+    if (GetSequence(cert->source, &cert->srcIdx, &length, maxIdx) < 0)
+        return ASN_PARSE_E;
+
+    ret = CalcHashId(&cert->source[idx], length + cert->srcIdx - idx, hash);
     if (ret != 0)
         return ret;
 
     length += cert->srcIdx;
     idx = 0;
 
-#ifdef HAVE_PKCS7
+#if defined(HAVE_PKCS7) || defined(WOLFSSL_CERT_EXT)
     /* store pointer to raw issuer */
     if (nameType == ISSUER) {
         cert->issuerRaw = &cert->source[cert->srcIdx];
@@ -4077,46 +5579,39 @@
 #endif
 
     while (cert->srcIdx < (word32)length) {
-        byte   b;
-        byte   joint[2];
-        byte   tooBig = FALSE;
-        int    oidSz;
-
-        if (GetSet(cert->source, &cert->srcIdx, &dummy, cert->maxIdx) < 0) {
+        byte        b       = 0;
+        byte        joint[3];
+        byte        tooBig  = FALSE;
+        int         oidSz;
+        const char* copy    = NULL;
+        int         copyLen = 0;
+        int         strLen  = 0;
+        byte        id      = 0;
+
+        if (GetSet(cert->source, &cert->srcIdx, &dummy, maxIdx) < 0) {
             WOLFSSL_MSG("Cert name lacks set header, trying sequence");
         }
 
-        if (GetSequence(cert->source, &cert->srcIdx, &dummy, cert->maxIdx) <= 0)
-            return ASN_PARSE_E;
-
-        ret = GetASNObjectId(cert->source, &cert->srcIdx, &oidSz, cert->maxIdx);
+        if (GetSequence(cert->source, &cert->srcIdx, &dummy, maxIdx) <= 0)
+            return ASN_PARSE_E;
+
+        ret = GetASNObjectId(cert->source, &cert->srcIdx, &oidSz, maxIdx);
         if (ret != 0)
             return ret;
 
         /* make sure there is room for joint */
-        if ((cert->srcIdx + sizeof(joint)) > cert->maxIdx)
+        if ((cert->srcIdx + sizeof(joint)) > (word32)maxIdx)
             return ASN_PARSE_E;
 
         XMEMCPY(joint, &cert->source[cert->srcIdx], sizeof(joint));
 
         /* v1 name types */
         if (joint[0] == 0x55 && joint[1] == 0x04) {
-            const char*  copy = NULL;
-            int    strLen;
-            byte   id;
-
-            cert->srcIdx += 2;
-            id = cert->source[cert->srcIdx++];
-            b  = cert->source[cert->srcIdx++]; /* encoding */
-
-            if (GetLength(cert->source, &cert->srcIdx, &strLen,
-                          cert->maxIdx) < 0)
+            cert->srcIdx += 3;
+            id = joint[2];
+            if (GetHeader(cert->source, &b, &cert->srcIdx, &strLen,
+                          maxIdx, 1) < 0) {
                 return ASN_PARSE_E;
-
-            if ( (strLen + 14) > (int)(ASN_NAME_MAX - idx)) {
-                /* include biggest pre fix header too 4 = "/serialNumber=" */
-                WOLFSSL_MSG("ASN Name too big, skipping");
-                tooBig = TRUE;
             }
 
             if (id == ASN_COMMON_NAME) {
@@ -4127,6 +5622,7 @@
                 }
 
                 copy = WOLFSSL_COMMON_NAME;
+                copyLen = sizeof(WOLFSSL_COMMON_NAME) - 1;
                 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     dName->cnIdx = cert->srcIdx;
                     dName->cnLen = strLen;
@@ -4134,6 +5630,7 @@
             }
             else if (id == ASN_SUR_NAME) {
                 copy = WOLFSSL_SUR_NAME;
+                copyLen = sizeof(WOLFSSL_SUR_NAME) - 1;
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectSN = (char*)&cert->source[cert->srcIdx];
@@ -4148,6 +5645,7 @@
             }
             else if (id == ASN_COUNTRY_NAME) {
                 copy = WOLFSSL_COUNTRY_NAME;
+                copyLen = sizeof(WOLFSSL_COUNTRY_NAME) - 1;
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectC = (char*)&cert->source[cert->srcIdx];
@@ -4162,6 +5660,7 @@
             }
             else if (id == ASN_LOCALITY_NAME) {
                 copy = WOLFSSL_LOCALITY_NAME;
+                copyLen = sizeof(WOLFSSL_LOCALITY_NAME) - 1;
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectL = (char*)&cert->source[cert->srcIdx];
@@ -4176,6 +5675,7 @@
             }
             else if (id == ASN_STATE_NAME) {
                 copy = WOLFSSL_STATE_NAME;
+                copyLen = sizeof(WOLFSSL_STATE_NAME) - 1;
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectST = (char*)&cert->source[cert->srcIdx];
@@ -4190,6 +5690,7 @@
             }
             else if (id == ASN_ORG_NAME) {
                 copy = WOLFSSL_ORG_NAME;
+                copyLen = sizeof(WOLFSSL_ORG_NAME) - 1;
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectO = (char*)&cert->source[cert->srcIdx];
@@ -4204,6 +5705,7 @@
             }
             else if (id == ASN_ORGUNIT_NAME) {
                 copy = WOLFSSL_ORGUNIT_NAME;
+                copyLen = sizeof(WOLFSSL_ORGUNIT_NAME) - 1;
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectOU = (char*)&cert->source[cert->srcIdx];
@@ -4218,33 +5720,101 @@
             }
             else if (id == ASN_SERIAL_NUMBER) {
                 copy = WOLFSSL_SERIAL_NUMBER;
+                copyLen = sizeof(WOLFSSL_SERIAL_NUMBER) - 1;
+                #ifdef WOLFSSL_CERT_GEN
+                    if (nameType == SUBJECT) {
+                        cert->subjectSND = (char*)&cert->source[cert->srcIdx];
+                        cert->subjectSNDLen = strLen;
+                        cert->subjectSNDEnc = b;
+                    }
+                #endif /* WOLFSSL_CERT_GEN */
                 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     dName->snIdx = cert->srcIdx;
                     dName->snLen = strLen;
                 #endif /* OPENSSL_EXTRA */
             }
-            if (copy && !tooBig) {
-                XMEMCPY(&full[idx], copy, XSTRLEN(copy));
-                idx += (word32)XSTRLEN(copy);
-            #ifdef WOLFSSL_WPAS
-                full[idx] = '=';
-                idx++;
-            #endif
-                XMEMCPY(&full[idx], &cert->source[cert->srcIdx], strLen);
-                idx += strLen;
-            }
-
-            cert->srcIdx += strLen;
-        }
+        #ifdef WOLFSSL_CERT_EXT
+            else if (id == ASN_BUS_CAT) {
+                copy = WOLFSSL_BUS_CAT;
+                copyLen = sizeof(WOLFSSL_BUS_CAT) - 1;
+            #ifdef WOLFSSL_CERT_GEN
+                if (nameType == SUBJECT) {
+                    cert->subjectBC = (char*)&cert->source[cert->srcIdx];
+                    cert->subjectBCLen = strLen;
+                    cert->subjectBCEnc = b;
+                }
+            #endif /* WOLFSSL_CERT_GEN */
+            #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+                dName->bcIdx = cert->srcIdx;
+                dName->bcLen = strLen;
+            #endif /* OPENSSL_EXTRA */
+            }
+        #endif /* WOLFSSL_CERT_EXT */
+        }
+    #ifdef WOLFSSL_CERT_EXT
+        else if ((cert->srcIdx + ASN_JOI_PREFIX_SZ + 2 <= (word32)maxIdx) &&
+                 (0 == XMEMCMP(&cert->source[cert->srcIdx], ASN_JOI_PREFIX,
+                               ASN_JOI_PREFIX_SZ)) &&
+                 ((cert->source[cert->srcIdx+ASN_JOI_PREFIX_SZ] == ASN_JOI_C) ||
+                  (cert->source[cert->srcIdx+ASN_JOI_PREFIX_SZ] == ASN_JOI_ST)))
+        {
+            cert->srcIdx += ASN_JOI_PREFIX_SZ;
+            id = cert->source[cert->srcIdx++];
+            b = cert->source[cert->srcIdx++]; /* encoding */
+
+            if (GetLength(cert->source, &cert->srcIdx, &strLen,
+                          maxIdx) < 0)
+                return ASN_PARSE_E;
+
+            /* Check for jurisdiction of incorporation country name */
+            if (id == ASN_JOI_C) {
+                copy = WOLFSSL_JOI_C;
+                copyLen = sizeof(WOLFSSL_JOI_C) - 1;
+                #ifdef WOLFSSL_CERT_GEN
+                    if (nameType == SUBJECT) {
+                        cert->subjectJC = (char*)&cert->source[cert->srcIdx];
+                        cert->subjectJCLen = strLen;
+                        cert->subjectJCEnc = b;
+                    }
+                #endif /* WOLFSSL_CERT_GEN */
+                #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+                    dName->jcIdx = cert->srcIdx;
+                    dName->jcLen = strLen;
+                #endif /* OPENSSL_EXTRA */
+            }
+
+            /* Check for jurisdiction of incorporation state name */
+            else if (id == ASN_JOI_ST) {
+                copy = WOLFSSL_JOI_ST;
+                copyLen = sizeof(WOLFSSL_JOI_ST) - 1;
+                #ifdef WOLFSSL_CERT_GEN
+                    if (nameType == SUBJECT) {
+                        cert->subjectJS = (char*)&cert->source[cert->srcIdx];
+                        cert->subjectJSLen = strLen;
+                        cert->subjectJSEnc = b;
+                    }
+                #endif /* WOLFSSL_CERT_GEN */
+                #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+                    dName->jsIdx = cert->srcIdx;
+                    dName->jsLen = strLen;
+                #endif /* OPENSSL_EXTRA */
+            }
+
+            if ((strLen + copyLen) > (int)(ASN_NAME_MAX - idx)) {
+                WOLFSSL_MSG("ASN Name too big, skipping");
+                tooBig = TRUE;
+            }
+        }
+    #endif /* WOLFSSL_CERT_EXT */
         else {
             /* skip */
             byte email = FALSE;
             byte pilot = FALSE;
-            byte id    = 0;
-            int  adv;
-
-            if (joint[0] == 0x2a && joint[1] == 0x86)  /* email id hdr */
+
+            if (joint[0] == 0x2a && joint[1] == 0x86) {  /* email id hdr */
+                id = ASN_EMAIL_NAME;
                 email = TRUE;
+            }
 
             if (joint[0] == 0x9  && joint[1] == 0x92) { /* uid id hdr */
                 /* last value of OID is the type of pilot attribute */
@@ -4254,37 +5824,37 @@
 
             cert->srcIdx += oidSz + 1;
 
-            if (GetLength(cert->source, &cert->srcIdx, &adv, cert->maxIdx) < 0)
+            if (GetLength(cert->source, &cert->srcIdx, &strLen, maxIdx) < 0)
                 return ASN_PARSE_E;
 
-            if (adv > (int)(ASN_NAME_MAX - idx)) {
+            if (strLen > (int)(ASN_NAME_MAX - idx)) {
                 WOLFSSL_MSG("ASN name too big, skipping");
                 tooBig = TRUE;
             }
 
             if (email) {
-                if ( (14 + adv) > (int)(ASN_NAME_MAX - idx)) {
+                copyLen = sizeof(WOLFSSL_EMAIL_ADDR) - 1;
+                if ((copyLen + strLen) > (int)(ASN_NAME_MAX - idx)) {
                     WOLFSSL_MSG("ASN name too big, skipping");
                     tooBig = TRUE;
                 }
-                if (!tooBig) {
-                    XMEMCPY(&full[idx], "/emailAddress=", 14);
-                    idx += 14;
+                else {
+                    copy = WOLFSSL_EMAIL_ADDR;
                 }
 
                 #ifdef WOLFSSL_CERT_GEN
                     if (nameType == SUBJECT) {
                         cert->subjectEmail = (char*)&cert->source[cert->srcIdx];
-                        cert->subjectEmailLen = adv;
+                        cert->subjectEmailLen = strLen;
                     }
                 #endif /* WOLFSSL_CERT_GEN */
                 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
                     dName->emailIdx = cert->srcIdx;
-                    dName->emailLen = adv;
+                    dName->emailLen = strLen;
                 #endif /* OPENSSL_EXTRA */
                 #ifndef IGNORE_NAME_CONSTRAINTS
                     {
-                        DNS_entry* emailName = NULL;
+                        DNS_entry* emailName;
 
                         emailName = (DNS_entry*)XMALLOC(sizeof(DNS_entry),
                                               cert->heap, DYNAMIC_TYPE_ALTNAME);
@@ -4293,70 +5863,79 @@
                             return MEMORY_E;
                         }
                         emailName->type = 0;
-                        emailName->name = (char*)XMALLOC(adv + 1,
+                        emailName->name = (char*)XMALLOC(strLen + 1,
                                               cert->heap, DYNAMIC_TYPE_ALTNAME);
                         if (emailName->name == NULL) {
                             WOLFSSL_MSG("\tOut of Memory");
                             XFREE(emailName, cert->heap, DYNAMIC_TYPE_ALTNAME);
                             return MEMORY_E;
                         }
-                        emailName->len = adv;
-                        XMEMCPY(emailName->name,
-                                              &cert->source[cert->srcIdx], adv);
-                        emailName->name[adv] = '\0';
+                        emailName->len = strLen;
+                        XMEMCPY(emailName->name, &cert->source[cert->srcIdx],
+                                                                        strLen);
+                        emailName->name[strLen] = '\0';
 
                         emailName->next = cert->altEmailNames;
                         cert->altEmailNames = emailName;
                     }
                 #endif /* IGNORE_NAME_CONSTRAINTS */
-                if (!tooBig) {
-                    XMEMCPY(&full[idx], &cert->source[cert->srcIdx], adv);
-                    idx += adv;
-                }
             }
 
             if (pilot) {
-                if ( (5 + adv) > (int)(ASN_NAME_MAX - idx)) {
-                    WOLFSSL_MSG("ASN name too big, skipping");
-                    tooBig = TRUE;
-                }
-                if (!tooBig) {
-                    switch (id) {
-                        case ASN_USER_ID:
-                            XMEMCPY(&full[idx], "/UID=", 5);
-                            idx += 5;
-                        #if defined(OPENSSL_EXTRA) || \
-                            defined(OPENSSL_EXTRA_X509_SMALL)
-                            dName->uidIdx = cert->srcIdx;
-                            dName->uidLen = adv;
-                        #endif /* OPENSSL_EXTRA */
-                            break;
-
-                        case ASN_DOMAIN_COMPONENT:
-                            XMEMCPY(&full[idx], "/DC=", 4);
-                            idx += 4;
-                        #if defined(OPENSSL_EXTRA) || \
-                            defined(OPENSSL_EXTRA_X509_SMALL)
-                            dName->dcIdx[dcnum] = cert->srcIdx;
-                            dName->dcLen[dcnum] = adv;
-                            dName->dcNum = dcnum + 1;
-                            dcnum++;
-                        #endif /* OPENSSL_EXTRA */
-                            break;
-
-                        default:
-                            WOLFSSL_MSG("Unknown pilot attribute type");
-                            return ASN_PARSE_E;
-                    }
-                    XMEMCPY(&full[idx], &cert->source[cert->srcIdx], adv);
-                    idx += adv;
-                }
-            }
-
-            cert->srcIdx += adv;
-        }
+                switch (id) {
+                    case ASN_USER_ID:
+                        copy = WOLFSSL_USER_ID;
+                        copyLen = sizeof(WOLFSSL_USER_ID) - 1;
+                    #if defined(OPENSSL_EXTRA) || \
+                        defined(OPENSSL_EXTRA_X509_SMALL)
+                        dName->uidIdx = cert->srcIdx;
+                        dName->uidLen = strLen;
+                    #endif /* OPENSSL_EXTRA */
+                        break;
+
+                    case ASN_DOMAIN_COMPONENT:
+                        copy = WOLFSSL_DOMAIN_COMPONENT;
+                        copyLen = sizeof(WOLFSSL_DOMAIN_COMPONENT) - 1;
+                    #if defined(OPENSSL_EXTRA) || \
+                        defined(OPENSSL_EXTRA_X509_SMALL)
+                        dName->dcIdx[dcnum] = cert->srcIdx;
+                        dName->dcLen[dcnum] = strLen;
+                        dName->dcNum = dcnum + 1;
+                        dcnum++;
+                    #endif /* OPENSSL_EXTRA */
+                        break;
+
+                    default:
+                        WOLFSSL_MSG("Unknown pilot attribute type");
+                        return ASN_PARSE_E;
+                }
+            }
+        }
+        if ((copyLen + strLen) > (int)(ASN_NAME_MAX - idx))
+        {
+            WOLFSSL_MSG("ASN Name too big, skipping");
+            tooBig = TRUE;
+        }
+        if ((copy != NULL) && !tooBig) {
+            XMEMCPY(&full[idx], copy, copyLen);
+            idx += copyLen;
+            XMEMCPY(&full[idx], &cert->source[cert->srcIdx], strLen);
+            idx += strLen;
+
+        #ifdef OPENSSL_EXTRA
+            if (count < DOMAIN_COMPONENT_MAX) {
+                /* store order that DN was parsed */
+                dName->loc[count++] = id;
+            }
+        #endif
+        }
+        cert->srcIdx += strLen;
     }
     full[idx++] = 0;
+#if defined(OPENSSL_EXTRA)
+    /* store order that DN was parsed */
+    dName->locSz = count;
+#endif
 
     #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     {
@@ -4396,6 +5975,7 @@
             if (dName->cnLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_COMMON_NAME, 4);
+                dName->cnNid = wc_OBJ_sn2nid((const char *)WOLFSSL_COMMON_NAME);
                 idx += 4;
                 XMEMCPY(&dName->fullName[idx],
                                      &cert->source[dName->cnIdx], dName->cnLen);
@@ -4405,6 +5985,7 @@
             if (dName->snLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_SUR_NAME, 4);
+                dName->snNid = wc_OBJ_sn2nid((const char *)WOLFSSL_SUR_NAME);
                 idx += 4;
                 XMEMCPY(&dName->fullName[idx],
                                      &cert->source[dName->snIdx], dName->snLen);
@@ -4414,6 +5995,7 @@
             if (dName->cLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_COUNTRY_NAME, 3);
+                dName->cNid = wc_OBJ_sn2nid((const char *)WOLFSSL_COUNTRY_NAME);
                 idx += 3;
                 XMEMCPY(&dName->fullName[idx],
                                        &cert->source[dName->cIdx], dName->cLen);
@@ -4423,6 +6005,7 @@
             if (dName->lLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_LOCALITY_NAME, 3);
+                dName->lNid = wc_OBJ_sn2nid((const char *)WOLFSSL_LOCALITY_NAME);
                 idx += 3;
                 XMEMCPY(&dName->fullName[idx],
                                        &cert->source[dName->lIdx], dName->lLen);
@@ -4432,6 +6015,7 @@
             if (dName->stLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_STATE_NAME, 4);
+                dName->stNid = wc_OBJ_sn2nid((const char *)WOLFSSL_STATE_NAME);
                 idx += 4;
                 XMEMCPY(&dName->fullName[idx],
                                      &cert->source[dName->stIdx], dName->stLen);
@@ -4441,6 +6025,7 @@
             if (dName->oLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_ORG_NAME, 3);
+                dName->oNid = wc_OBJ_sn2nid((const char *)WOLFSSL_ORG_NAME);
                 idx += 3;
                 XMEMCPY(&dName->fullName[idx],
                                        &cert->source[dName->oIdx], dName->oLen);
@@ -4450,6 +6035,7 @@
             if (dName->ouLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_ORGUNIT_NAME, 4);
+                dName->ouNid = wc_OBJ_sn2nid((const char *)WOLFSSL_ORGUNIT_NAME);
                 idx += 4;
                 XMEMCPY(&dName->fullName[idx],
                                      &cert->source[dName->ouIdx], dName->ouLen);
@@ -4459,6 +6045,7 @@
             if (dName->emailLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], "/emailAddress=", 14);
+                dName->emailNid = wc_OBJ_sn2nid((const char *)"/emailAddress=");
                 idx += 14;
                 XMEMCPY(&dName->fullName[idx],
                                &cert->source[dName->emailIdx], dName->emailLen);
@@ -4479,6 +6066,7 @@
             if (dName->uidLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], "/UID=", 5);
+                dName->uidNid = wc_OBJ_sn2nid((const char *)"/UID=");
                 idx += 5;
                 XMEMCPY(&dName->fullName[idx],
                                    &cert->source[dName->uidIdx], dName->uidLen);
@@ -4488,6 +6076,7 @@
             if (dName->serialLen != 0) {
                 dName->entryCount++;
                 XMEMCPY(&dName->fullName[idx], WOLFSSL_SERIAL_NUMBER, 14);
+                dName->serialNid = wc_OBJ_sn2nid((const char *)WOLFSSL_SERIAL_NUMBER);
                 idx += 14;
                 XMEMCPY(&dName->fullName[idx],
                              &cert->source[dName->serialIdx], dName->serialLen);
@@ -4507,14 +6096,21 @@
 #ifndef NO_ASN_TIME
 
 /* two byte date/time, add to value */
-static WC_INLINE void GetTime(int* value, const byte* date, int* idx)
+static WC_INLINE int GetTime(int* value, const byte* date, int* idx)
 {
     int i = *idx;
 
+    if (date[i] < 0x30 || date[i] > 0x39 || date[i+1] < 0x30 ||
+                                                             date[i+1] > 0x39) {
+        return ASN_PARSE_E;
+    }
+
     *value += btoi(date[i++]) * 10;
     *value += btoi(date[i++]);
 
     *idx = i;
+
+    return 0;
 }
 
 int ExtractDate(const unsigned char* date, unsigned char format,
@@ -4523,23 +6119,25 @@
     XMEMSET(certTime, 0, sizeof(struct tm));
 
     if (format == ASN_UTC_TIME) {
-        if (btoi(date[0]) >= 5)
+        if (btoi(date[*idx]) >= 5)
             certTime->tm_year = 1900;
         else
             certTime->tm_year = 2000;
     }
     else  { /* format == GENERALIZED_TIME */
-        certTime->tm_year += btoi(date[*idx]) * 1000; *idx = *idx + 1;
-        certTime->tm_year += btoi(date[*idx]) * 100;  *idx = *idx + 1;
+        if (GetTime(&certTime->tm_year, date, idx) != 0) return 0;
+        certTime->tm_year *= 100;
     }
 
     /* adjust tm_year, tm_mon */
-    GetTime((int*)&certTime->tm_year, date, idx); certTime->tm_year -= 1900;
-    GetTime((int*)&certTime->tm_mon,  date, idx); certTime->tm_mon  -= 1;
-    GetTime((int*)&certTime->tm_mday, date, idx);
-    GetTime((int*)&certTime->tm_hour, date, idx);
-    GetTime((int*)&certTime->tm_min,  date, idx);
-    GetTime((int*)&certTime->tm_sec,  date, idx);
+    if (GetTime(&certTime->tm_year, date, idx) != 0) return 0;
+    certTime->tm_year -= 1900;
+    if (GetTime(&certTime->tm_mon , date, idx) != 0) return 0;
+    certTime->tm_mon  -= 1;
+    if (GetTime(&certTime->tm_mday, date, idx) != 0) return 0;
+    if (GetTime(&certTime->tm_hour, date, idx) != 0) return 0;
+    if (GetTime(&certTime->tm_min , date, idx) != 0) return 0;
+    if (GetTime(&certTime->tm_sec , date, idx) != 0) return 0;
 
     return 1;
 }
@@ -4564,24 +6162,23 @@
     /* place month in buffer */
     buf[0] = '\0';
     switch(t.tm_mon) {
-        case 0:  XSTRNCAT(buf, "Jan ", 4); break;
-        case 1:  XSTRNCAT(buf, "Feb ", 4); break;
-        case 2:  XSTRNCAT(buf, "Mar ", 4); break;
-        case 3:  XSTRNCAT(buf, "Apr ", 4); break;
-        case 4:  XSTRNCAT(buf, "May ", 4); break;
-        case 5:  XSTRNCAT(buf, "Jun ", 4); break;
-        case 6:  XSTRNCAT(buf, "Jul ", 4); break;
-        case 7:  XSTRNCAT(buf, "Aug ", 4); break;
-        case 8:  XSTRNCAT(buf, "Sep ", 4); break;
-        case 9:  XSTRNCAT(buf, "Oct ", 4); break;
-        case 10: XSTRNCAT(buf, "Nov ", 4); break;
-        case 11: XSTRNCAT(buf, "Dec ", 4); break;
+        case 0:  XSTRNCAT(buf, "Jan ", 5); break;
+        case 1:  XSTRNCAT(buf, "Feb ", 5); break;
+        case 2:  XSTRNCAT(buf, "Mar ", 5); break;
+        case 3:  XSTRNCAT(buf, "Apr ", 5); break;
+        case 4:  XSTRNCAT(buf, "May ", 5); break;
+        case 5:  XSTRNCAT(buf, "Jun ", 5); break;
+        case 6:  XSTRNCAT(buf, "Jul ", 5); break;
+        case 7:  XSTRNCAT(buf, "Aug ", 5); break;
+        case 8:  XSTRNCAT(buf, "Sep ", 5); break;
+        case 9:  XSTRNCAT(buf, "Oct ", 5); break;
+        case 10: XSTRNCAT(buf, "Nov ", 5); break;
+        case 11: XSTRNCAT(buf, "Dec ", 5); break;
         default:
             return 0;
 
     }
     idx = 4; /* use idx now for char buffer */
-    buf[idx] = ' ';
 
     XSNPRINTF(buf + idx, len - idx, "%2d %02d:%02d:%02d %d GMT",
               t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, t.tm_year + 1900);
@@ -4591,10 +6188,101 @@
 #endif /* OPENSSL_ALL || WOLFSSL_MYSQL_COMPATIBLE || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
 
 
+#if !defined(NO_ASN_TIME) && defined(HAVE_PKCS7)
+
+/* Set current time string, either UTC or GeneralizedTime.
+ * (void*) tm should be a pointer to time_t, output is placed in buf.
+ *
+ * Return time string length placed in buf on success, negative on error */
+int GetAsnTimeString(void* currTime, byte* buf, word32 len)
+{
+    struct tm* ts      = NULL;
+    struct tm* tmpTime = NULL;
+#if defined(NEED_TMP_TIME)
+    struct tm tmpTimeStorage;
+    tmpTime = &tmpTimeStorage;
+#else
+    (void)tmpTime;
+#endif
+    byte* data_ptr  = buf;
+    word32 data_len = 0;
+    int year, mon, day, hour, mini, sec;
+
+    WOLFSSL_ENTER("SetAsnTimeString");
+
+    if (buf == NULL || len == 0)
+        return BAD_FUNC_ARG;
+
+    ts = (struct tm *)XGMTIME((time_t*)currTime, tmpTime);
+    if (ts == NULL){
+        WOLFSSL_MSG("failed to get time data.");
+        return ASN_TIME_E;
+    }
+
+    /* Note ASN_UTC_TIME_SIZE and ASN_GENERALIZED_TIME_SIZE include space for
+     * the null terminator. ASN encoded values leave off the terminator. */
+
+    if (ts->tm_year >= 50 && ts->tm_year < 150) {
+        /* UTC Time */
+        char utc_str[ASN_UTC_TIME_SIZE];
+        data_len = ASN_UTC_TIME_SIZE - 1 + 2;
+
+        if (len < data_len)
+            return BUFFER_E;
+
+        if (ts->tm_year >= 50 && ts->tm_year < 100) {
+            year = ts->tm_year;
+        } else if (ts->tm_year >= 100 && ts->tm_year < 150) {
+            year = ts->tm_year - 100;
+        }
+        else {
+            WOLFSSL_MSG("unsupported year range");
+            return BAD_FUNC_ARG;
+        }
+        mon  = ts->tm_mon + 1;
+        day  = ts->tm_mday;
+        hour = ts->tm_hour;
+        mini = ts->tm_min;
+        sec  = ts->tm_sec;
+        XSNPRINTF((char *)utc_str, ASN_UTC_TIME_SIZE,
+                  "%02d%02d%02d%02d%02d%02dZ", year, mon, day, hour, mini, sec);
+        *data_ptr = (byte) ASN_UTC_TIME; data_ptr++;
+        /* -1 below excludes null terminator */
+        *data_ptr = (byte) ASN_UTC_TIME_SIZE - 1; data_ptr++;
+        XMEMCPY(data_ptr,(byte *)utc_str, ASN_UTC_TIME_SIZE - 1);
+
+    } else {
+        /* GeneralizedTime */
+        char gt_str[ASN_GENERALIZED_TIME_SIZE];
+        data_len = ASN_GENERALIZED_TIME_SIZE - 1 + 2;
+
+        if (len < data_len)
+            return BUFFER_E;
+
+        year = ts->tm_year + 1900;
+        mon  = ts->tm_mon + 1;
+        day  = ts->tm_mday;
+        hour = ts->tm_hour;
+        mini = ts->tm_min;
+        sec  = ts->tm_sec;
+        XSNPRINTF((char *)gt_str, ASN_GENERALIZED_TIME_SIZE,
+                  "%4d%02d%02d%02d%02d%02dZ", year, mon, day, hour, mini, sec);
+        *data_ptr = (byte) ASN_GENERALIZED_TIME; data_ptr++;
+        /* -1 below excludes null terminator */
+        *data_ptr = (byte) ASN_GENERALIZED_TIME_SIZE - 1; data_ptr++;
+        XMEMCPY(data_ptr,(byte *)gt_str, ASN_GENERALIZED_TIME_SIZE - 1);
+    }
+
+    return data_len;
+}
+
+#endif /* !NO_ASN_TIME && HAVE_PKCS7 */
+
+
 #if defined(USE_WOLF_VALIDDATE)
 
 /* to the second */
-static int DateGreaterThan(const struct tm* a, const struct tm* b)
+int DateGreaterThan(const struct tm* a, const struct tm* b)
 {
     if (a->tm_year > b->tm_year)
         return 1;
@@ -4636,7 +6324,7 @@
     time_t ltime;
     struct tm  certTime;
     struct tm* localTime;
-    struct tm* tmpTime = NULL;
+    struct tm* tmpTime;
     int    i = 0;
     int    timeDiff = 0 ;
     int    diffHH = 0 ; int diffMM = 0 ;
@@ -4646,8 +6334,9 @@
     struct tm tmpTimeStorage;
     tmpTime = &tmpTimeStorage;
 #else
+    tmpTime = NULL;
+#endif
     (void)tmpTime;
-#endif
 
     ltime = XTIME(0);
 
@@ -4673,11 +6362,13 @@
     if ((date[i] == '+') || (date[i] == '-')) {
         WOLFSSL_MSG("Using time differential, not Zulu") ;
         diffSign = date[i++] == '+' ? 1 : -1 ;
-        GetTime(&diffHH, date, &i);
-        GetTime(&diffMM, date, &i);
+        if (GetTime(&diffHH, date, &i) != 0)
+            return 0;
+        if (GetTime(&diffMM, date, &i) != 0)
+            return 0;
         timeDiff = diffSign * (diffHH*60 + diffMM) * 60 ;
     } else if (date[i] != 'Z') {
-        WOLFSSL_MSG("UTCtime, niether Zulu or time differential") ;
+        WOLFSSL_MSG("UTCtime, neither Zulu or time differential") ;
         return 0;
     }
 
@@ -4763,7 +6454,7 @@
     return 0;
 }
 
-static int GetDate(DecodedCert* cert, int dateType, int verify)
+static int GetDate(DecodedCert* cert, int dateType, int verify, int maxIdx)
 {
     int    ret, length;
     const byte *datePtr = NULL;
@@ -4778,7 +6469,7 @@
     startIdx = cert->srcIdx;
 
     ret = GetDateInfo(cert->source, &cert->srcIdx, &datePtr, &format,
-                      &length, cert->maxIdx);
+                      &length, maxIdx);
     if (ret < 0)
         return ret;
 
@@ -4791,7 +6482,8 @@
         cert->afterDateLen  = cert->srcIdx - startIdx;
 
 #ifndef NO_ASN_TIME
-    if (verify != NO_VERIFY && !XVALIDATE_DATE(date, format, dateType)) {
+    if (verify != NO_VERIFY && verify != VERIFY_SKIP_DATE &&
+            !XVALIDATE_DATE(date, format, dateType)) {
         if (dateType == BEFORE)
             return ASN_BEFORE_DATE_E;
         else
@@ -4804,18 +6496,20 @@
     return 0;
 }
 
-static int GetValidity(DecodedCert* cert, int verify)
+static int GetValidity(DecodedCert* cert, int verify, int maxIdx)
 {
     int length;
     int badDate = 0;
 
-    if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
-        return ASN_PARSE_E;
-
-    if (GetDate(cert, BEFORE, verify) < 0)
+    if (GetSequence(cert->source, &cert->srcIdx, &length, maxIdx) < 0)
+        return ASN_PARSE_E;
+
+    maxIdx = cert->srcIdx + length;
+
+    if (GetDate(cert, BEFORE, verify, maxIdx) < 0)
         badDate = ASN_BEFORE_DATE_E; /* continue parsing */
 
-    if (GetDate(cert, AFTER, verify) < 0)
+    if (GetDate(cert, AFTER, verify, maxIdx) < 0)
         return ASN_AFTER_DATE_E;
 
     if (badDate != 0)
@@ -4878,33 +6572,58 @@
 #endif /* WOLFSSL_CERT_GEN && WOLFSSL_ALT_NAMES */
 #endif /* !NO_ASN_TIME */
 
+/* parses certificate up to point of X.509 public key
+ *
+ * if cert date is invalid then badDate gets set to error value, otherwise is 0
+ *
+ * returns a negative value on fail case
+ */
+int wc_GetPubX509(DecodedCert* cert, int verify, int* badDate)
+{
+    int ret;
+
+    if (cert == NULL || badDate == NULL)
+        return BAD_FUNC_ARG;
+
+    *badDate = 0;
+    if ( (ret = GetCertHeader(cert)) < 0)
+        return ret;
+
+    WOLFSSL_MSG("Got Cert Header");
+
+    /* Using the sigIndex as the upper bound because that's where the
+     * actual certificate data ends. */
+    if ( (ret = GetAlgoId(cert->source, &cert->srcIdx, &cert->signatureOID,
+                          oidSigType, cert->sigIndex)) < 0)
+        return ret;
+
+    WOLFSSL_MSG("Got Algo ID");
+
+    if ( (ret = GetName(cert, ISSUER, cert->sigIndex)) < 0)
+        return ret;
+
+    if ( (ret = GetValidity(cert, verify, cert->sigIndex)) < 0)
+        *badDate = ret;
+
+    if ( (ret = GetName(cert, SUBJECT, cert->sigIndex)) < 0)
+        return ret;
+
+    WOLFSSL_MSG("Got Subject Name");
+    return ret;
+}
 
 int DecodeToKey(DecodedCert* cert, int verify)
 {
     int badDate = 0;
     int ret;
 
-    if ( (ret = GetCertHeader(cert)) < 0)
-        return ret;
-
-    WOLFSSL_MSG("Got Cert Header");
-
-    if ( (ret = GetAlgoId(cert->source, &cert->srcIdx, &cert->signatureOID,
-                          oidSigType, cert->maxIdx)) < 0)
-        return ret;
-
-    WOLFSSL_MSG("Got Algo ID");
-
-    if ( (ret = GetName(cert, ISSUER)) < 0)
-        return ret;
-
-    if ( (ret = GetValidity(cert, verify)) < 0)
-        badDate = ret;
-
-    if ( (ret = GetName(cert, SUBJECT)) < 0)
-        return ret;
-
-    WOLFSSL_MSG("Got Subject Name");
+    if ( (ret = wc_GetPubX509(cert, verify, &badDate)) < 0)
+        return ret;
+
+    /* Determine if self signed */
+    cert->selfSigned = XMEMCMP(cert->issuerHash,
+                               cert->subjectHash,
+                               KEYID_SIZE) == 0 ? 1 : 0;
 
     if ( (ret = GetKey(cert)) < 0)
         return ret;
@@ -4960,17 +6679,23 @@
 }
 
 
-WOLFSSL_LOCAL word32 SetLength(word32 length, byte* output)
+word32 SetLength(word32 length, byte* output)
 {
     word32 i = 0, j;
 
-    if (length < ASN_LONG_LENGTH)
-        output[i++] = (byte)length;
+    if (length < ASN_LONG_LENGTH) {
+        if (output)
+            output[i] = (byte)length;
+        i++;
+    }
     else {
-        output[i++] = (byte)(BytePrecision(length) | ASN_LONG_LENGTH);
+        if (output)
+            output[i] = (byte)(BytePrecision(length) | ASN_LONG_LENGTH);
+        i++;
 
         for (j = BytePrecision(length); j; --j) {
-            output[i] = (byte)(length >> ((j - 1) * WOLFSSL_BIT_SIZE));
+            if (output)
+                output[i] = (byte)(length >> ((j - 1) * WOLFSSL_BIT_SIZE));
             i++;
         }
     }
@@ -4978,27 +6703,27 @@
     return i;
 }
 
-
-WOLFSSL_LOCAL word32 SetSequence(word32 len, byte* output)
-{
-    output[0] = ASN_SEQUENCE | ASN_CONSTRUCTED;
-    return SetLength(len, output + 1) + 1;
-}
-
-WOLFSSL_LOCAL word32 SetOctetString(word32 len, byte* output)
+word32 SetSequence(word32 len, byte* output)
+{
+    if (output)
+        output[0] = ASN_SEQUENCE | ASN_CONSTRUCTED;
+    return SetLength(len, output ? output + 1 : NULL) + 1;
+}
+
+word32 SetOctetString(word32 len, byte* output)
 {
     output[0] = ASN_OCTET_STRING;
     return SetLength(len, output + 1) + 1;
 }
 
 /* Write a set header to output */
-WOLFSSL_LOCAL word32 SetSet(word32 len, byte* output)
+word32 SetSet(word32 len, byte* output)
 {
     output[0] = ASN_SET | ASN_CONSTRUCTED;
     return SetLength(len, output + 1) + 1;
 }
 
-WOLFSSL_LOCAL word32 SetImplicit(byte tag, byte number, word32 len, byte* output)
+word32 SetImplicit(byte tag, byte number, word32 len, byte* output)
 {
 
     output[0] = ((tag == ASN_SEQUENCE || tag == ASN_SET) ? ASN_CONSTRUCTED : 0)
@@ -5006,14 +6731,14 @@
     return SetLength(len, output + 1) + 1;
 }
 
-WOLFSSL_LOCAL word32 SetExplicit(byte number, word32 len, byte* output)
+word32 SetExplicit(byte number, word32 len, byte* output)
 {
     output[0] = ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | number;
     return SetLength(len, output + 1) + 1;
 }
 
 
-#if defined(HAVE_ECC)
+#if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)
 
 static int SetCurve(ecc_key* key, byte* output)
 {
@@ -5052,7 +6777,7 @@
     return idx;
 }
 
-#endif /* HAVE_ECC */
+#endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT */
 
 
 #ifdef HAVE_ECC
@@ -5068,12 +6793,13 @@
 }
 #endif
 
-WOLFSSL_LOCAL word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz)
+word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz)
 {
     word32 tagSz, idSz, seqSz, algoSz = 0;
     const  byte* algoName = 0;
     byte   ID_Length[1 + MAX_LENGTH_SZ];
     byte   seqArray[MAX_SEQ_SZ + 1];  /* add object_id to end */
+    int    length = 0;
 
     tagSz = (type == oidHashType ||
              (type == oidSigType
@@ -5083,6 +6809,9 @@
         #ifdef HAVE_ED25519
               && algoOID != ED25519k
         #endif
+        #ifdef HAVE_ED448
+              && algoOID != ED448k
+        #endif
               ) ||
              (type == oidKeyType && algoOID == RSAk)) ? 2 : 0;
 
@@ -5096,14 +6825,27 @@
     idSz  = SetObjectId(algoSz, ID_Length);
     seqSz = SetSequence(idSz + algoSz + tagSz + curveSz, seqArray);
 
-    XMEMCPY(output, seqArray, seqSz);
-    XMEMCPY(output + seqSz, ID_Length, idSz);
-    XMEMCPY(output + seqSz + idSz, algoName, algoSz);
-    if (tagSz == 2)
-        SetASNNull(&output[seqSz + idSz + algoSz]);
-
-    return seqSz + idSz + algoSz + tagSz;
-
+    /* Copy only algo to output for DSA keys */
+    if (algoOID == DSAk && output) {
+        XMEMCPY(output, ID_Length, idSz);
+        XMEMCPY(output + idSz, algoName, algoSz);
+        if (tagSz == 2)
+            SetASNNull(&output[seqSz + idSz + algoSz]);
+    }
+    else if (output) {
+        XMEMCPY(output, seqArray, seqSz);
+        XMEMCPY(output + seqSz, ID_Length, idSz);
+        XMEMCPY(output + seqSz + idSz, algoName, algoSz);
+        if (tagSz == 2)
+            SetASNNull(&output[seqSz + idSz + algoSz]);
+    }
+
+    if (algoOID == DSAk)
+        length = idSz + algoSz + tagSz;
+    else
+        length = seqSz + idSz + algoSz + tagSz;
+
+    return length;
 }
 
 
@@ -5127,6 +6869,8 @@
 }
 
 
+#ifndef NO_CERTS
+
 int wc_GetCTC_HashOID(int type)
 {
     int ret;
@@ -5164,6 +6908,7 @@
         sigCtx->plain = NULL;
     }
 #endif
+#ifndef NO_ASN_CRYPT
     if (sigCtx->key.ptr) {
         switch (sigCtx->keyOID) {
         #ifndef NO_RSA
@@ -5184,16 +6929,24 @@
                 XFREE(sigCtx->key.ed25519, sigCtx->heap, DYNAMIC_TYPE_ED25519);
                 break;
         #endif /* HAVE_ED25519 */
+        #ifdef HAVE_ED448
+            case ED448k:
+                wc_ed448_free(sigCtx->key.ed448);
+                XFREE(sigCtx->key.ed448, sigCtx->heap, DYNAMIC_TYPE_ED448);
+                break;
+        #endif /* HAVE_ED448 */
             default:
                 break;
         } /* switch (keyOID) */
         sigCtx->key.ptr = NULL;
     }
+#endif
 
     /* reset state, we are done */
     sigCtx->state = SIG_STATE_BEGIN;
 }
 
+#ifndef NO_ASN_CRYPT
 static int HashForSignature(const byte* buf, word32 bufSz, word32 sigOID,
                             byte* digest, int* typeH, int* digestSz, int verify)
 {
@@ -5268,11 +7021,20 @@
             }
             break;
     #endif
+    #ifdef HAVE_ED25519
         case CTC_ED25519:
             /* Hashes done in signing operation.
              * Two dependent hashes with prefixes performed.
              */
             break;
+    #endif
+    #ifdef HAVE_ED448
+        case CTC_ED448:
+            /* Hashes done in signing operation.
+             * Two dependent hashes with prefixes performed.
+             */
+            break;
+    #endif
         default:
             ret = HASH_TYPE_E;
             WOLFSSL_MSG("Hash for Signature has unsupported type");
@@ -5280,15 +7042,18 @@
 
     return ret;
 }
+#endif /* !NO_ASN_CRYPT */
 
 /* Return codes: 0=Success, Negative (see error-crypt.h), ASN_SIG_CONFIRM_E */
 static int ConfirmSignature(SignatureCtx* sigCtx,
     const byte* buf, word32 bufSz,
     const byte* key, word32 keySz, word32 keyOID,
-    const byte* sig, word32 sigSz, word32 sigOID)
+    const byte* sig, word32 sigSz, word32 sigOID, byte* rsaKeyIdx)
 {
     int ret = 0;
-
+#ifndef WOLFSSL_RENESAS_TSIP_TLS
+    (void)rsaKeyIdx;
+#endif
     if (sigCtx == NULL || buf == NULL || bufSz == 0 || key == NULL ||
         keySz == 0 || sig == NULL || sigSz == 0) {
         return BAD_FUNC_ARG;
@@ -5301,9 +7066,12 @@
 
     WOLFSSL_ENTER("ConfirmSignature");
 
+#ifndef NO_ASN_CRYPT
     switch (sigCtx->state) {
         case SIG_STATE_BEGIN:
         {
+            sigCtx->keyOID = keyOID; /* must set early for cleanup */
+
             sigCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, sigCtx->heap,
                                                     DYNAMIC_TYPE_DIGEST);
             if (sigCtx->digest == NULL) {
@@ -5328,8 +7096,6 @@
 
         case SIG_STATE_KEY:
         {
-            sigCtx->keyOID = keyOID;
-
             switch (keyOID) {
             #ifndef NO_RSA
                 case RSAk:
@@ -5416,6 +7182,30 @@
                     break;
                 }
             #endif
+            #ifdef HAVE_ED448
+                case ED448k:
+                {
+                    sigCtx->verify = 0;
+                    sigCtx->key.ed448 = (ed448_key*)XMALLOC(
+                                                sizeof(ed448_key), sigCtx->heap,
+                                                DYNAMIC_TYPE_ED448);
+                    if (sigCtx->key.ed448 == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+                    if ((ret = wc_ed448_init(sigCtx->key.ed448)) < 0) {
+                        goto exit_cs;
+                    }
+                    if ((ret = wc_ed448_import_public(key, keySz,
+                                                      sigCtx->key.ed448)) < 0) {
+                        WOLFSSL_MSG("ASN Key import error ED448");
+                        goto exit_cs;
+                    }
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    sigCtx->asyncDev = &sigCtx->key.ed448->asyncDev;
+                #endif
+                    break;
+                }
+            #endif
                 default:
                     WOLFSSL_MSG("Verify Key type unknown");
                     ret = ASN_UNKNOWN_OID_E;
@@ -5430,7 +7220,7 @@
 
         #ifdef WOLFSSL_ASYNC_CRYPT
             if (sigCtx->devId != INVALID_DEVID && sigCtx->asyncDev && sigCtx->asyncCtx) {
-                /* make sure event is intialized */
+                /* make sure event is initialized */
                 WOLF_EVENT* event = &sigCtx->asyncDev->event;
                 ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
                     sigCtx->asyncCtx, WC_ASYNC_FLAG_CALL_AGAIN);
@@ -5455,13 +7245,33 @@
                     else
                 #endif /* HAVE_PK_CALLBACKS */
                     {
+                     #ifdef WOLFSSL_RENESAS_TSIP_TLS
+                        if (rsaKeyIdx != NULL)
+                        {
+                            ret = tsip_tls_CertVerify(buf, bufSz, sigCtx->plain,
+                                sigSz,
+                                sigCtx->pubkey_n_start - sigCtx->certBegin,
+                                sigCtx->pubkey_n_len - 1,
+                                sigCtx->pubkey_e_start - sigCtx->certBegin,
+                                sigCtx->pubkey_e_len - 1,
+                                rsaKeyIdx);
+
+                            if (ret == 0){
+                                sigCtx->verifyByTSIP = 1;
+                                ret = 0;
+                            } else {
+                                WOLFSSL_MSG("RSA Verify by tsip didn't match");
+                                ret = ASN_SIG_CONFIRM_E;
+                            }
+                        } else
+                    #endif
                         ret = wc_RsaSSL_VerifyInline(sigCtx->plain, sigSz,
                                                  &sigCtx->out, sigCtx->key.rsa);
                     }
                     break;
                 }
             #endif /* !NO_RSA */
-            #ifdef HAVE_ECC
+            #if defined(HAVE_ECC)
                 case ECDSAk:
                 {
                 #ifdef HAVE_PK_CALLBACKS
@@ -5490,14 +7300,28 @@
                     break;
                 }
             #endif
+            #ifdef HAVE_ED448
+                case ED448k:
+                {
+                    ret = wc_ed448_verify_msg(sig, sigSz, buf, bufSz,
+                                             &sigCtx->verify, sigCtx->key.ed448,
+                                             NULL, 0);
+                    break;
+                }
+            #endif
                 default:
                     break;
             }  /* switch (keyOID) */
 
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                goto exit_cs;
+            }
+        #endif
+
             if (ret < 0) {
-                /* treat all non async RSA errors as ASN_SIG_CONFIRM_E */
-                if (ret != WC_PENDING_E)
-                    ret = ASN_SIG_CONFIRM_E;
+                /* treat all RSA errors as ASN_SIG_CONFIRM_E */
+                ret = ASN_SIG_CONFIRM_E;
                 goto exit_cs;
             }
 
@@ -5512,6 +7336,9 @@
                 case RSAk:
                 {
                     int encodedSigSz, verifySz;
+                #ifdef WOLFSSL_RENESAS_TSIP
+                    if (sigCtx->verifyByTSIP == 1) break;
+                #endif
                 #ifdef WOLFSSL_SMALL_STACK
                     byte* encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
                                         sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -5568,6 +7395,19 @@
                     break;
                 }
             #endif /* HAVE_ED25519 */
+            #ifdef HAVE_ED448
+                case ED448k:
+                {
+                    if (sigCtx->verify == 1) {
+                        ret = 0;
+                    }
+                    else {
+                        WOLFSSL_MSG("ED448 Verify didn't match");
+                        ret = ASN_SIG_CONFIRM_E;
+                    }
+                    break;
+                }
+            #endif /* HAVE_ED448 */
                 default:
                     break;
             }  /* switch (keyOID) */
@@ -5578,11 +7418,19 @@
 
 exit_cs:
 
+#endif /* !NO_ASN_CRYPT */
+
+    (void)keyOID;
+    (void)sigOID;
+
     WOLFSSL_LEAVE("ConfirmSignature", ret);
 
-    if (ret != WC_PENDING_E) {
-        FreeSignatureCtx(sigCtx);
-    }
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E)
+        return ret;
+#endif
+
+    FreeSignatureCtx(sigCtx);
 
     return ret;
 }
@@ -5775,7 +7623,7 @@
 
 #endif /* IGNORE_NAME_CONSTRAINTS */
 
-static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
+static int DecodeAltNames(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length = 0;
@@ -5787,6 +7635,13 @@
         return ASN_PARSE_E;
     }
 
+    if (length == 0) {
+        /* RFC 5280 4.2.1.6.  Subject Alternative Name
+           If the subjectAltName extension is present, the sequence MUST
+           contain at least one entry. */
+        return ASN_PARSE_E;
+    }
+
     cert->weOwnAltNames = 1;
 
     while (length > 0) {
@@ -5940,6 +7795,49 @@
             length -= strLen;
             idx    += strLen;
         }
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        else if (b == (ASN_CONTEXT_SPECIFIC | ASN_IP_TYPE)) {
+            DNS_entry* ipAddr;
+            int strLen;
+            word32 lenStartIdx = idx;
+            WOLFSSL_MSG("Decoding Subject Alt. Name: IP Address");
+
+            if (GetLength(input, &idx, &strLen, sz) < 0) {
+                WOLFSSL_MSG("\tfail: str length");
+                return ASN_PARSE_E;
+            }
+            length -= (idx - lenStartIdx);
+            /* check that strLen at index is not past input buffer */
+            if (strLen + (int)idx > sz) {
+                return BUFFER_E;
+            }
+
+            ipAddr = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap,
+                                        DYNAMIC_TYPE_ALTNAME);
+            if (ipAddr == NULL) {
+                WOLFSSL_MSG("\tOut of Memory");
+                return MEMORY_E;
+            }
+
+            ipAddr->type = ASN_IP_TYPE;
+            ipAddr->name = (char*)XMALLOC(strLen + 1, cert->heap,
+                                         DYNAMIC_TYPE_ALTNAME);
+            if (ipAddr->name == NULL) {
+                WOLFSSL_MSG("\tOut of Memory");
+                XFREE(ipAddr, cert->heap, DYNAMIC_TYPE_ALTNAME);
+                return MEMORY_E;
+            }
+            ipAddr->len = strLen;
+            XMEMCPY(ipAddr->name, &input[idx], strLen);
+            ipAddr->name[strLen] = '\0';
+
+            ipAddr->next   = cert->altNames;
+            cert->altNames = ipAddr;
+
+            length -= strLen;
+            idx    += strLen;
+        }
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
 #endif /* IGNORE_NAME_CONSTRAINTS */
 #ifdef WOLFSSL_SEP
         else if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | ASN_OTHER_TYPE))
@@ -5948,6 +7846,7 @@
             word32 lenStartIdx = idx;
             word32 oid = 0;
             int    ret;
+            byte   tag;
 
             if (GetLength(input, &idx, &strLen, sz) < 0) {
                 WOLFSSL_MSG("\tfail: other name length");
@@ -5966,7 +7865,11 @@
                 return ASN_PARSE_E;
             }
 
-            if (input[idx++] != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
+            if (GetASNTag(input, &idx, &tag, sz) < 0) {
+                return ASN_PARSE_E;
+            }
+
+            if (tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
                 WOLFSSL_MSG("\twrong type");
                 return ASN_PARSE_E;
             }
@@ -6032,7 +7935,7 @@
     return 0;
 }
 
-static int DecodeBasicCaConstraint(byte* input, int sz, DecodedCert* cert)
+static int DecodeBasicCaConstraint(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length = 0;
@@ -6052,21 +7955,32 @@
      * left empty. So, if the length is 0, just return. */
 
     ret = GetBoolean(input, &idx, sz);
+
+#ifndef WOLFSSL_X509_BASICCONS_INT
     if (ret < 0) {
         WOLFSSL_MSG("\tfail: constraint not valid BOOLEAN");
         return ret;
     }
 
     cert->isCA = (byte)ret;
+#else
+    if (ret < 0) {
+        if(input[idx] == ASN_INTEGER) {
+            /* For OpenSSL compatibility, if ASN_INTEGER it is valid format */
+            cert->isCA = FALSE;
+        } else return ret;
+    } else
+        cert->isCA = (byte)ret;
+#endif
 
     /* If there isn't any more data, return. */
-    if (idx >= (word32)sz)
+    if (idx >= (word32)sz) {
         return 0;
+    }
 
     ret = GetInteger7Bit(input, &idx, sz);
     if (ret < 0)
         return ret;
-
     cert->pathLength = (byte)ret;
     cert->pathLengthSet = 1;
 
@@ -6079,10 +7993,11 @@
 #define GENERALNAME_URI 6
     /* From RFC3280 SS4.2.1.7, GeneralName */
 
-static int DecodeCrlDist(byte* input, int sz, DecodedCert* cert)
-{
-    word32 idx = 0;
+static int DecodeCrlDist(const byte* input, int sz, DecodedCert* cert)
+{
+    word32 idx = 0, localIdx;
     int length = 0;
+    byte tag   = 0;
 
     WOLFSSL_ENTER("DecodeCrlDist");
 
@@ -6097,20 +8012,26 @@
     /* The Distribution Point has three explicit optional members
      *  First check for a DistributionPointName
      */
-    if (input[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+    localIdx = idx;
+    if (GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+            tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
     {
         idx++;
         if (GetLength(input, &idx, &length, sz) < 0)
             return ASN_PARSE_E;
 
-        if (input[idx] ==
-                    (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | CRLDP_FULL_NAME))
+        localIdx = idx;
+        if (GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+                tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED |
+                    CRLDP_FULL_NAME))
         {
             idx++;
             if (GetLength(input, &idx, &length, sz) < 0)
                 return ASN_PARSE_E;
 
-            if (input[idx] == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI))
+            localIdx = idx;
+            if (GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+                    tag == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI))
             {
                 idx++;
                 if (GetLength(input, &idx, &length, sz) < 0)
@@ -6131,8 +8052,10 @@
     }
 
     /* Check for reasonFlags */
+    localIdx = idx;
     if (idx < (word32)sz &&
-        input[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+        GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+        tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
     {
         idx++;
         if (GetLength(input, &idx, &length, sz) < 0)
@@ -6141,8 +8064,10 @@
     }
 
     /* Check for cRLIssuer */
+    localIdx = idx;
     if (idx < (word32)sz &&
-        input[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2))
+        GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+        tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2))
     {
         idx++;
         if (GetLength(input, &idx, &length, sz) < 0)
@@ -6160,15 +8085,16 @@
 }
 
 
-static int DecodeAuthInfo(byte* input, int sz, DecodedCert* cert)
+static int DecodeAuthInfo(const byte* input, int sz, DecodedCert* cert)
 /*
- *  Read the first of the Authority Information Access records. If there are
+ *  Read Authority Information Access records. If there are
  *  any issues, return without saving the record.
  */
 {
     word32 idx = 0;
     int length = 0;
-    byte b;
+    int count  = 0;
+    byte b = 0;
     word32 oid;
 
     WOLFSSL_ENTER("DecodeAuthInfo");
@@ -6177,7 +8103,7 @@
     if (GetSequence(input, &idx, &length, sz) < 0)
         return ASN_PARSE_E;
 
-    while (idx < (word32)sz) {
+    while ((idx < (word32)sz) && (count < MAX_AIA_SZ)) {
         /* Unwrap a single AIA */
         if (GetSequence(input, &idx, &length, sz) < 0)
             return ASN_PARSE_E;
@@ -6186,19 +8112,34 @@
         if (GetObjectId(input, &idx, &oid, oidCertAuthInfoType, sz) < 0)
             return ASN_PARSE_E;
 
-
         /* Only supporting URIs right now. */
-        b = input[idx++];
+        if (GetASNTag(input, &idx, &b, sz) < 0)
+            return ASN_PARSE_E;
+
         if (GetLength(input, &idx, &length, sz) < 0)
             return ASN_PARSE_E;
 
+        /* Set ocsp entry */
         if (b == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI) &&
             oid == AIA_OCSP_OID)
         {
             cert->extAuthInfoSz = length;
             cert->extAuthInfo = input + idx;
-            break;
-        }
+            count++;
+        #if !defined(OPENSSL_ALL) || !defined(WOLFSSL_QT)
+            break;
+        #endif
+        }
+        #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+        /* Set CaIssuers entry */
+        else if ((b == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI)) &&
+            oid == AIA_CA_ISSUER_OID)
+        {
+            cert->extAuthInfoCaIssuerSz = length;
+            cert->extAuthInfoCaIssuer = input + idx;
+            count++;
+        }
+        #endif
         idx += length;
     }
 
@@ -6206,10 +8147,11 @@
 }
 
 
-static int DecodeAuthKeyId(byte* input, int sz, DecodedCert* cert)
+static int DecodeAuthKeyId(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length = 0, ret = 0;
+    byte tag;
 
     WOLFSSL_ENTER("DecodeAuthKeyId");
 
@@ -6218,8 +8160,13 @@
         return ASN_PARSE_E;
     }
 
-    if (input[idx++] != (ASN_CONTEXT_SPECIFIC | 0)) {
+    if (GetASNTag(input, &idx, &tag, sz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (tag != (ASN_CONTEXT_SPECIFIC | 0)) {
         WOLFSSL_MSG("\tinfo: OPTIONAL item 0, not available\n");
+        cert->extAuthKeyIdSet = 0;
         return 0;
     }
 
@@ -6236,19 +8183,14 @@
     if (length == KEYID_SIZE) {
         XMEMCPY(cert->extAuthKeyId, input + idx, length);
     }
-    else {
-    #ifdef NO_SHA
-        ret = wc_Sha256Hash(input + idx, length, cert->extAuthKeyId);
-    #else
-        ret = wc_ShaHash(input + idx, length, cert->extAuthKeyId);
-    #endif
-    }
-
-    return ret;
-}
-
-
-static int DecodeSubjKeyId(byte* input, int sz, DecodedCert* cert)
+    else
+        ret = CalcHashId(input + idx, length, cert->extAuthKeyId);
+
+    return ret;
+}
+
+
+static int DecodeSubjKeyId(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length = 0, ret = 0;
@@ -6267,22 +8209,17 @@
         cert->extSubjKeyIdSz = length;
     #endif /* OPENSSL_EXTRA */
 
-    if (length == SIGNER_DIGEST_SIZE) {
+    if (length == KEYID_SIZE) {
         XMEMCPY(cert->extSubjKeyId, input + idx, length);
     }
-    else {
-    #ifdef NO_SHA
-        ret = wc_Sha256Hash(input + idx, length, cert->extSubjKeyId);
-    #else
-        ret = wc_ShaHash(input + idx, length, cert->extSubjKeyId);
-    #endif
-    }
-
-    return ret;
-}
-
-
-static int DecodeKeyUsage(byte* input, int sz, DecodedCert* cert)
+    else
+        ret = CalcHashId(input + idx, length, cert->extSubjKeyId);
+
+    return ret;
+}
+
+
+static int DecodeKeyUsage(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length;
@@ -6301,12 +8238,12 @@
 }
 
 
-static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
+static int DecodeExtKeyUsage(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0, oid;
-    int length;
-
-    WOLFSSL_ENTER("DecodeExtKeyUsage");
+    int length, ret;
+
+    WOLFSSL_MSG("DecodeExtKeyUsage");
 
     if (GetSequence(input, &idx, &length, sz) < 0) {
         WOLFSSL_MSG("\tfail: should be a SEQUENCE");
@@ -6319,8 +8256,11 @@
 #endif
 
     while (idx < (word32)sz) {
-        if (GetObjectId(input, &idx, &oid, oidCertKeyUseType, sz) < 0)
-            return ASN_PARSE_E;
+        ret = GetObjectId(input, &idx, &oid, oidCertKeyUseType, sz);
+        if (ret == ASN_UNKNOWN_OID_E)
+            continue;
+        else if (ret < 0)
+            return ret;
 
         switch (oid) {
             case EKU_ANY_OID:
@@ -6357,7 +8297,8 @@
 
 #ifndef IGNORE_NAME_CONSTRAINTS
 #define ASN_TYPE_MASK 0xF
-static int DecodeSubtree(byte* input, int sz, Base_entry** head, void* heap)
+static int DecodeSubtree(const byte* input, int sz,
+                         Base_entry** head, void* heap)
 {
     word32 idx = 0;
 
@@ -6424,7 +8365,7 @@
 }
 
 
-static int DecodeNameConstraints(byte* input, int sz, DecodedCert* cert)
+static int DecodeNameConstraints(const byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length = 0;
@@ -6465,94 +8406,57 @@
 
 #if (defined(WOLFSSL_CERT_EXT) && !defined(WOLFSSL_SEP)) || defined(OPENSSL_EXTRA)
 
-static int Word32ToString(char* d, word32 number)
-{
-    int i = 0;
-
-    if (d != NULL) {
-        word32 order = 1000000000;
-        word32 digit;
-
-        if (number == 0) {
-            d[i++] = '0';
-        }
-        else {
-            while (order) {
-                digit = number / order;
-                if (i > 0 || digit != 0) {
-                    d[i++] = (char)digit + '0';
-                }
-                if (digit != 0)
-                    number %= digit * order;
-                if (order > 1)
-                    order /= 10;
-                else
-                    order = 0;
-            }
-        }
-        d[i] = 0;
-    }
-
-    return i;
-}
-
-
 /* Decode ITU-T X.690 OID format to a string representation
  * return string length */
-int DecodePolicyOID(char *out, word32 outSz, byte *in, word32 inSz)
-{
-    word32 val, idx = 0, nb_bytes;
-    size_t w_bytes = 0;
+int DecodePolicyOID(char *out, word32 outSz, const byte *in, word32 inSz)
+{
+    word32 val, inIdx = 0, outIdx = 0;
+    int w = 0;
 
     if (out == NULL || in == NULL || outSz < 4 || inSz < 2)
         return BAD_FUNC_ARG;
 
-    /* first two byte must be interpreted as : 40 * int1 + int2 */
-    val = (word16)in[idx++];
-
-    w_bytes = Word32ToString(out, val / 40);
-    out[w_bytes++] = '.';
-    w_bytes += Word32ToString(out+w_bytes, val % 40);
-
-    while (idx < inSz) {
-        /* init value */
-        val = 0;
-        nb_bytes = 0;
-
-        /* check that output size is ok */
-        if (w_bytes > (outSz - 3))
-            return BUFFER_E;
-
+    /* The first byte expands into b/40 dot b%40. */
+    val = in[inIdx++];
+
+    w = XSNPRINTF(out, outSz, "%u.%u", val / 40, val % 40);
+    if (w < 0)
+        goto exit;
+    outIdx += w;
+    val = 0;
+
+    while (inIdx < inSz && outIdx < outSz) {
+        /* extract the next OID digit from in to val */
         /* first bit is used to set if value is coded on 1 or multiple bytes */
-        while ((in[idx+nb_bytes] & 0x80))
-            nb_bytes++;
-
-        if (!nb_bytes)
-            val = (word32)(in[idx++] & 0x7f);
+        if (in[inIdx] & 0x80) {
+            val += in[inIdx] & 0x7F;
+            val *= 128;
+        }
         else {
-            word32 base = 1, tmp = nb_bytes;
-
-            while (tmp != 0) {
-                val += (word32)(in[idx+tmp] & 0x7f) * base;
-                base *= 128;
-                tmp--;
-            }
-            val += (word32)(in[idx++] & 0x7f) * base;
-
-            idx += nb_bytes;
-        }
-
-        out[w_bytes++] = '.';
-        w_bytes += Word32ToString(out+w_bytes, val);
-    }
-
-    return (int)w_bytes;
+            /* write val as text into out */
+            val += in[inIdx];
+            w = XSNPRINTF(out + outIdx, outSz - outIdx, ".%u", val);
+            if (w < 0)
+                goto exit;
+            outIdx += w;
+            val = 0;
+        }
+        inIdx++;
+    }
+    if (outIdx == outSz)
+        outIdx--;
+    out[outIdx] = 0;
+
+    w = (int)outIdx;
+
+exit:
+    return w;
 }
 #endif /* WOLFSSL_CERT_EXT && !WOLFSSL_SEP */
 
-#if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT)
+#if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_QT)
     /* Reference: https://tools.ietf.org/html/rfc5280#section-4.2.1.4 */
-    static int DecodeCertPolicy(byte* input, int sz, DecodedCert* cert)
+    static int DecodeCertPolicy(const byte* input, int sz, DecodedCert* cert)
     {
         word32 idx = 0;
         word32 oldIdx;
@@ -6564,6 +8468,15 @@
     #endif
 
         WOLFSSL_ENTER("DecodeCertPolicy");
+        #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT)
+        /* Check if cert is null before dereferencing below */
+        if (cert == NULL)
+            return BAD_FUNC_ARG;
+        #endif
+
+    #if defined(WOLFSSL_CERT_EXT)
+         cert->extCertPoliciesNb = 0;
+    #endif
 
         if (GetSequence(input, &idx, &total_length, sz) < 0) {
             WOLFSSL_MSG("\tGet CertPolicy total seq failed");
@@ -6598,7 +8511,7 @@
 
         #if defined(WOLFSSL_SEP)
                 cert->deviceType = (byte*)XMALLOC(length, cert->heap,
-                                                  DYNAMIC_TYPE_X509_EXT);
+                                                         DYNAMIC_TYPE_X509_EXT);
                 if (cert->deviceType == NULL) {
                     WOLFSSL_MSG("\tCouldn't alloc memory for deviceType");
                     return MEMORY_E;
@@ -6608,8 +8521,9 @@
                 break;
         #elif defined(WOLFSSL_CERT_EXT)
                 /* decode cert policy */
-                if (DecodePolicyOID(cert->extCertPolicies[cert->extCertPoliciesNb], MAX_CERTPOL_SZ,
-                                    input + idx, length) <= 0) {
+                if (DecodePolicyOID(cert->extCertPolicies[
+                                       cert->extCertPoliciesNb], MAX_CERTPOL_SZ,
+                                       input + idx, length) <= 0) {
                     WOLFSSL_MSG("\tCouldn't decode CertPolicy");
                     return ASN_PARSE_E;
                 }
@@ -6618,7 +8532,7 @@
                  * NOT appear more than once in a certificate policies
                  * extension". This is a sanity check for duplicates.
                  * extCertPolicies should only have OID values, additional
-                 * qualifiers need to be stored in a seperate array. */
+                 * qualifiers need to be stored in a separate array. */
                 for (i = 0; i < cert->extCertPoliciesNb; i++) {
                     if (XMEMCMP(cert->extCertPolicies[i],
                             cert->extCertPolicies[cert->extCertPoliciesNb],
@@ -6670,18 +8584,23 @@
     int ret = 0;
     word32 idx = 0;
     int sz = cert->extensionsSz;
-    byte* input = cert->extensions;
+    const byte* input = cert->extensions;
     int length;
     word32 oid;
     byte critical = 0;
     byte criticalFail = 0;
+    byte tag = 0;
 
     WOLFSSL_ENTER("DecodeCertExtensions");
 
     if (input == NULL || sz == 0)
         return BAD_FUNC_ARG;
 
-    if (input[idx++] != ASN_EXTENSIONS) {
+    if (GetASNTag(input, &idx, &tag, sz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (tag != ASN_EXTENSIONS) {
         WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
         return ASN_PARSE_E;
     }
@@ -6697,6 +8616,8 @@
     }
 
     while (idx < (word32)sz) {
+        word32 localIdx;
+
         if (GetSequence(input, &idx, &length, sz) < 0) {
             WOLFSSL_MSG("\tfail: should be a SEQUENCE");
             return ASN_PARSE_E;
@@ -6710,14 +8631,22 @@
 
         /* check for critical flag */
         critical = 0;
-        if (input[idx] == ASN_BOOLEAN) {
-            ret = GetBoolean(input, &idx, sz);
-            if (ret < 0) {
-                WOLFSSL_MSG("\tfail: critical boolean");
-                return ret;
-            }
-
-            critical = (byte)ret;
+        if ((idx + 1) > (word32)sz) {
+            WOLFSSL_MSG("\tfail: malformed buffer");
+            return BUFFER_E;
+        }
+
+        localIdx = idx;
+        if (GetASNTag(input, &localIdx, &tag, sz) == 0) {
+            if (tag == ASN_BOOLEAN) {
+                ret = GetBoolean(input, &idx, sz);
+                if (ret < 0) {
+                    WOLFSSL_MSG("\tfail: critical boolean");
+                    return ret;
+                }
+
+                critical = (byte)ret;
+            }
         }
 
         /* process the extension based on the OID */
@@ -6807,14 +8736,15 @@
                 break;
 
             case CERT_POLICY_OID:
-                #ifdef WOLFSSL_SEP
+                #if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
                     VERIFY_AND_SET_OID(cert->extCertPolicySet);
                     #if defined(OPENSSL_EXTRA) || \
                         defined(OPENSSL_EXTRA_X509_SMALL)
                         cert->extCertPolicyCrit = critical;
                     #endif
                 #endif
-                #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT)
+                #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) || \
+                    defined(WOLFSSL_QT)
                     if (DecodeCertPolicy(&input[idx], length, cert) < 0) {
                         return ASN_PARSE_E;
                     }
@@ -6866,14 +8796,27 @@
                 WOLFSSL_MSG("Inhibit anyPolicy extension not supported yet.");
                 break;
 
+       #ifndef IGNORE_NETSCAPE_CERT_TYPE
+            case NETSCAPE_CT_OID:
+                WOLFSSL_MSG("Netscape certificate type extension not supported "
+                            "yet.");
+                if (CheckBitString(input, &idx, &length, idx + length, 0,
+                                                                    NULL) < 0) {
+                    return ASN_PARSE_E;
+                }
+                break;
+        #endif
+
             default:
+            #ifndef WOLFSSL_NO_ASN_STRICT
                 /* While it is a failure to not support critical extensions,
                  * still parse the certificate ignoring the unsupported
                  * extension to allow caller to accept it with the verify
                  * callback. */
                 if (critical)
                     criticalFail = 1;
-                break;
+            #endif
+            break;
         }
         idx += length;
     }
@@ -6919,15 +8862,14 @@
 #ifdef __cplusplus
     extern "C" {
 #endif
-    WOLFSSL_LOCAL Signer* GetCA(void* signers, byte* hash);
+    Signer* GetCA(void* signers, byte* hash);
     #ifndef NO_SKID
-        WOLFSSL_LOCAL Signer* GetCAByName(void* signers, byte* hash);
+        Signer* GetCAByName(void* signers, byte* hash);
     #endif
 #ifdef __cplusplus
     }
 #endif
 
-
 #if defined(WOLFCRYPT_ONLY) || defined(NO_CERTS)
 
 /* dummy functions, not using wolfSSL so don't need actual ones */
@@ -6949,8 +8891,7 @@
 
 #endif /* WOLFCRYPT_ONLY || NO_CERTS */
 
-#if (defined(WOLFSSL_ALT_CERT_CHAINS) || \
-    defined(WOLFSSL_NO_TRUSTED_CERTS_VERIFY)) && !defined(NO_SKID)
+#if defined(WOLFSSL_NO_TRUSTED_CERTS_VERIFY) && !defined(NO_SKID)
 static Signer* GetCABySubjectAndPubKey(DecodedCert* cert, void* cm)
 {
     Signer* ca = NULL;
@@ -6968,22 +8909,349 @@
 }
 #endif
 
+#if defined(WOLFSSL_SMALL_CERT_VERIFY) || defined(OPENSSL_EXTRA)
+/* Only quick step through the certificate to find fields that are then used
+ * in certificate signature verification.
+ * Must use the signature OID from the signed part of the certificate.
+ *
+ * This is only for minimizing dynamic memory usage during TLS certificate
+ * chain processing.
+ * Doesn't support:
+ *   OCSP Only: alt lookup using subject and pub key w/o sig check
+ */
+static int CheckCertSignature_ex(const byte* cert, word32 certSz, void* heap,
+        void* cm, const byte* pubKey, word32 pubKeySz, int pubKeyOID)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    SignatureCtx  sigCtx[1];
+#else
+    SignatureCtx* sigCtx;
+#endif
+    byte          hash[KEYID_SIZE];
+    Signer*       ca = NULL;
+    word32        idx = 0;
+    int           len;
+    word32        tbsCertIdx = 0;
+    word32        sigIndex   = 0;
+    word32        signatureOID = 0;
+    word32        oid = 0;
+    word32        issuerIdx = 0;
+    word32        issuerSz  = 0;
+#ifndef NO_SKID
+    int           extLen = 0;
+    word32        extIdx = 0;
+    word32        extEndIdx = 0;
+    int           extAuthKeyIdSet = 0;
+#endif
+    int           ret = 0;
+    word32        localIdx;
+    byte          tag;
+
+
+    if (cert == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    sigCtx = (SignatureCtx*)XMALLOC(sizeof(*sigCtx), heap, DYNAMIC_TYPE_SIGNATURE);
+    if (sigCtx == NULL)
+        return MEMORY_E;
+#endif
+    InitSignatureCtx(sigCtx, heap, INVALID_DEVID);
+
+    /* Certificate SEQUENCE */
+    if (GetSequence(cert, &idx, &len, certSz) < 0)
+        ret = ASN_PARSE_E;
+    if (ret == 0) {
+        tbsCertIdx = idx;
+
+        /* TBSCertificate SEQUENCE */
+        if (GetSequence(cert, &idx, &len, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        sigIndex = len + idx;
+
+        if ((idx + 1) > certSz)
+            ret = BUFFER_E;
+    }
+    if (ret == 0) {
+        /* version - optional */
+        localIdx = idx;
+        if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) {
+            if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
+                idx++;
+                if (GetLength(cert, &idx, &len, certSz) < 0)
+                    ret = ASN_PARSE_E;
+                idx += len;
+            }
+        }
+    }
+
+    if (ret == 0) {
+        /* serialNumber */
+        if (GetASNHeader(cert, ASN_INTEGER, &idx, &len, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        idx += len;
+
+        /* signature */
+        if (GetAlgoId(cert, &idx, &signatureOID, oidSigType, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+
+    if (ret == 0) {
+        issuerIdx = idx;
+        /* issuer */
+        if (GetSequence(cert, &idx, &len, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        issuerSz = len + idx - issuerIdx;
+    }
+#ifndef NO_SKID
+    if (ret == 0) {
+        idx += len;
+
+        /* validity */
+        if (GetSequence(cert, &idx, &len, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        idx += len;
+
+        /* subject */
+        if (GetSequence(cert, &idx, &len, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        idx += len;
+
+        /* subjectPublicKeyInfo */
+        if (GetSequence(cert, &idx, &len, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        idx += len;
+
+        if ((idx + 1) > certSz)
+            ret = BUFFER_E;
+    }
+    if (ret == 0) {
+        /* issuerUniqueID - optional */
+        localIdx = idx;
+        if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) {
+            if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1)) {
+                idx++;
+                if (GetLength(cert, &idx, &len, certSz) < 0)
+                    ret = ASN_PARSE_E;
+                idx += len;
+            }
+        }
+    }
+    if (ret == 0) {
+        if ((idx + 1) > certSz)
+            ret = BUFFER_E;
+    }
+    if (ret == 0) {
+        /* subjectUniqueID - optional */
+        localIdx = idx;
+        if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) {
+            if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2)) {
+                idx++;
+                if (GetLength(cert, &idx, &len, certSz) < 0)
+                    ret = ASN_PARSE_E;
+                idx += len;
+            }
+        }
+    }
+
+    if (ret == 0) {
+        if ((idx + 1) > certSz)
+            ret = BUFFER_E;
+    }
+    /* extensions - optional */
+    localIdx = idx;
+    if (ret == 0 && GetASNTag(cert, &localIdx, &tag, certSz) == 0 &&
+            tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 3)) {
+        idx++;
+        if (GetLength(cert, &idx, &extLen, certSz) < 0)
+            ret = ASN_PARSE_E;
+        if (ret == 0) {
+            if (GetSequence(cert, &idx, &extLen, certSz) < 0)
+                ret = ASN_PARSE_E;
+        }
+        if (ret == 0) {
+            extEndIdx = idx + extLen;
+
+            /* Check each extension for the ones we want. */
+            while (ret == 0 && idx < extEndIdx) {
+                if (GetSequence(cert, &idx, &len, certSz) < 0)
+                    ret = ASN_PARSE_E;
+                if (ret == 0) {
+                    extIdx = idx;
+                    if (GetObjectId(cert, &extIdx, &oid, oidCertExtType,
+                                                                  certSz) < 0) {
+                        ret = ASN_PARSE_E;
+                    }
+
+                    if (ret == 0) {
+                        if ((extIdx + 1) > certSz)
+                            ret = BUFFER_E;
+                    }
+                }
+
+                if (ret == 0) {
+                    localIdx = extIdx;
+                    if (GetASNTag(cert, &localIdx, &tag, certSz) == 0 &&
+                            tag == ASN_BOOLEAN) {
+                        if (GetBoolean(cert, &extIdx, certSz) < 0)
+                            ret = ASN_PARSE_E;
+                    }
+                }
+                if (ret == 0) {
+                    if (GetOctetString(cert, &extIdx, &extLen, certSz) < 0)
+                        ret = ASN_PARSE_E;
+                }
+
+                if (ret == 0) {
+                    switch (oid) {
+                    case AUTH_KEY_OID:
+                        if (GetSequence(cert, &extIdx, &extLen, certSz) < 0)
+                            ret = ASN_PARSE_E;
+
+                        if (ret == 0 && (extIdx + 1) >= certSz)
+                            ret = BUFFER_E;
+
+                        if (ret == 0 &&
+                                GetASNTag(cert, &extIdx, &tag, certSz) == 0 &&
+                                tag == (ASN_CONTEXT_SPECIFIC | 0)) {
+                            if (GetLength(cert, &extIdx, &extLen, certSz) <= 0)
+                                ret = ASN_PARSE_E;
+                            if (ret == 0) {
+                                extAuthKeyIdSet = 1;
+                                if (extLen == KEYID_SIZE)
+                                    XMEMCPY(hash, cert + extIdx, extLen);
+                                else {
+                                    ret = CalcHashId(cert + extIdx, extLen,
+                                                                          hash);
+                                }
+                            }
+                        }
+                        break;
+
+                    default:
+                        break;
+                    }
+                }
+                idx += len;
+            }
+        }
+    }
+
+    if (ret == 0 && pubKey == NULL) {
+        if (extAuthKeyIdSet)
+            ca = GetCA(cm, hash);
+        if (ca == NULL) {
+            ret = CalcHashId(cert + issuerIdx, issuerSz, hash);
+            if (ret == 0)
+                ca = GetCAByName(cm, hash);
+        }
+    }
+#else
+    if (ret == 0 && pubKey == NULL) {
+        ret = CalcHashId(cert + issuerIdx, issuerSz, hash);
+        if (ret == 0)
+            ca = GetCA(cm, hash);
+    }
+#endif /* !NO_SKID */
+    if (ca == NULL && pubKey == NULL)
+        ret = ASN_NO_SIGNER_E;
+
+    if (ret == 0) {
+        idx = sigIndex;
+        /* signatureAlgorithm */
+        if (GetAlgoId(cert, &idx, &oid, oidSigType, certSz) < 0)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0) {
+        if (oid != signatureOID)
+            ret = ASN_SIG_OID_E;
+    }
+    if (ret == 0) {
+        /* signatureValue */
+        if (CheckBitString(cert, &idx, &len, certSz, 1, NULL) < 0)
+            ret = ASN_PARSE_E;
+    }
+
+    if (ret == 0) {
+        if (pubKey != NULL) {
+            ret = ConfirmSignature(sigCtx, cert + tbsCertIdx,
+                               sigIndex - tbsCertIdx,
+                               pubKey, pubKeySz, pubKeyOID,
+                               cert + idx, len, signatureOID, NULL);
+        }
+        else {
+            ret = ConfirmSignature(sigCtx, cert + tbsCertIdx,
+                               sigIndex - tbsCertIdx,
+                               ca->publicKey, ca->pubKeySize, ca->keyOID,
+                               cert + idx, len, signatureOID, NULL);
+        }
+        if (ret != 0) {
+            WOLFSSL_MSG("Confirm signature failed");
+        }
+    }
+
+    FreeSignatureCtx(sigCtx);
+#ifdef WOLFSSL_SMALL_STACK
+    if (sigCtx != NULL)
+        XFREE(sigCtx, heap, DYNAMIC_TYPE_SIGNATURE);
+#endif
+    return ret;
+}
+
+#ifdef OPENSSL_EXTRA
+/* Call CheckCertSignature_ex using a public key buffer for verification
+ */
+int CheckCertSignaturePubKey(const byte* cert, word32 certSz, void* heap,
+        const byte* pubKey, word32 pubKeySz, int pubKeyOID)
+{
+    return CheckCertSignature_ex(cert, certSz, heap, NULL,
+            pubKey, pubKeySz, pubKeyOID);
+}
+#endif /* OPENSSL_EXTRA */
+#ifdef WOLFSSL_SMALL_CERT_VERIFY
+/* Call CheckCertSignature_ex using a certificate manager (cm)
+ */
+int CheckCertSignature(const byte* cert, word32 certSz, void* heap, void* cm)
+{
+    return CheckCertSignature_ex(cert, certSz, heap, cm, NULL, 0, 0);
+}
+#endif /* WOLFSSL_SMALL_CERT_VERIFY */
+#endif /* WOLFSSL_SMALL_CERT_VERIFY || OPENSSL_EXTRA */
+
 int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
 {
     int    ret = 0;
-    int    badDate = 0;
-    int    criticalExt = 0;
+    int    checkPathLen = 0;
+    int    decrementMaxPathLen = 0;
     word32 confirmOID;
-    int    selfSigned = 0;
+#if defined(WOLFSSL_RENESAS_TSIP)
+    int    idx = 0;
+#endif
+    byte*  tsip_encRsaKeyIdx;
 
     if (cert == NULL) {
         return BAD_FUNC_ARG;
     }
 
     if (cert->sigCtx.state == SIG_STATE_BEGIN) {
+        cert->badDate = 0;
+        cert->criticalExt = 0;
         if ((ret = DecodeToKey(cert, verify)) < 0) {
             if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
-                badDate = ret;
+                cert->badDate = ret;
             else
                 return ret;
         }
@@ -7005,7 +9273,7 @@
 
             if ((ret = DecodeCertExtensions(cert)) < 0) {
                 if (ret == ASN_CRIT_EXT_E)
-                    criticalExt = ret;
+                    cert->criticalExt = ret;
                 else
                     return ret;
             }
@@ -7026,26 +9294,38 @@
 
     #ifndef NO_SKID
         if (cert->extSubjKeyIdSet == 0 && cert->publicKey != NULL &&
-                                                        cert->pubKeySize > 0) {
-        #ifdef NO_SHA
-            ret = wc_Sha256Hash(cert->publicKey, cert->pubKeySize,
+                                                         cert->pubKeySize > 0) {
+            ret = CalcHashId(cert->publicKey, cert->pubKeySize,
                                                             cert->extSubjKeyId);
-        #else
-            ret = wc_ShaHash(cert->publicKey, cert->pubKeySize,
-                                                            cert->extSubjKeyId);
-        #endif /* NO_SHA */
             if (ret != 0)
                 return ret;
         }
     #endif /* !NO_SKID */
 
-        if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
+        if (!cert->selfSigned || (verify != NO_VERIFY && type != CA_TYPE &&
+                                                   type != TRUSTED_PEER_TYPE)) {
             cert->ca = NULL;
     #ifndef NO_SKID
-            if (cert->extAuthKeyIdSet)
+            if (cert->extAuthKeyIdSet) {
                 cert->ca = GetCA(cm, cert->extAuthKeyId);
-            if (cert->ca == NULL)
+            }
+            if (cert->ca == NULL && cert->extSubjKeyIdSet
+                                 && verify != VERIFY_OCSP) {
+                cert->ca = GetCA(cm, cert->extSubjKeyId);
+            }
+            if (cert->ca != NULL && XMEMCMP(cert->issuerHash,
+                                  cert->ca->subjectNameHash, KEYID_SIZE) != 0) {
+                cert->ca = NULL;
+            }
+            if (cert->ca == NULL) {
                 cert->ca = GetCAByName(cm, cert->issuerHash);
+                /* If AKID is available then this CA doesn't have the public
+                 * key required */
+                if (cert->ca && cert->extAuthKeyIdSet) {
+                    WOLFSSL_MSG("CA SKID doesn't match AKID");
+                    cert->ca = NULL;
+                }
+            }
 
             /* OCSP Only: alt lookup using subject and pub key w/o sig check */
         #ifdef WOLFSSL_NO_TRUSTED_CERTS_VERIFY
@@ -7057,80 +9337,183 @@
                 }
             }
         #endif /* WOLFSSL_NO_TRUSTED_CERTS_VERIFY */
-
-            /* alt lookup using subject and public key */
-        #ifdef WOLFSSL_ALT_CERT_CHAINS
-            if (cert->ca == NULL)
-                cert->ca = GetCABySubjectAndPubKey(cert, cm);
-        #endif
     #else
             cert->ca = GetCA(cm, cert->issuerHash);
-            if (XMEMCMP(cert->issuerHash, cert->subjectHash, KEYID_SIZE) == 0)
-                selfSigned = 1;
     #endif /* !NO_SKID */
-
-            WOLFSSL_MSG("About to verify certificate signature");
-            if (cert->ca) {
-                if (cert->isCA && cert->ca->pathLengthSet) {
-                    if (selfSigned) {
-                        if (cert->ca->pathLength != 0) {
-                           WOLFSSL_MSG("Root CA with path length > 0");
-                           return ASN_PATHLEN_INV_E;
-                        }
-                    }
-                    else {
-                        if (cert->ca->pathLength == 0) {
-                            WOLFSSL_MSG("CA with path length 0 signing a CA");
-                            return ASN_PATHLEN_INV_E;
-                        }
-                        else if (cert->pathLength >= cert->ca->pathLength) {
-
-                            WOLFSSL_MSG("CA signing CA with longer path length");
-                            return ASN_PATHLEN_INV_E;
-                        }
-                    }
-                }
-
-        #ifdef HAVE_OCSP
-                /* Need the CA's public key hash for OCSP */
-            #ifdef NO_SHA
-                ret = wc_Sha256Hash(cert->ca->publicKey, cert->ca->pubKeySize,
-                                                           cert->issuerKeyHash);
-            #else
-                ret = wc_ShaHash(cert->ca->publicKey, cert->ca->pubKeySize,
-                                                           cert->issuerKeyHash);
-            #endif /* NO_SHA */
-                if (ret != 0)
-                    return ret;
-        #endif /* HAVE_OCSP */
-            }
-        }
-    }
+        }
+
+        if (cert->selfSigned) {
+            cert->maxPathLen = WOLFSSL_MAX_PATH_LEN;
+        } else {
+            /* RFC 5280 Section 4.2.1.9:
+             *
+             * load/receive check
+             *
+             * 1) Is CA boolean set?
+             *      No  - SKIP CHECK
+             *      Yes - Check key usage
+             * 2) Is Key usage extension present?
+             *      No  - goto 3
+             *      Yes - check keyCertSign assertion
+             *     2.a) Is keyCertSign asserted?
+             *          No  - goto 4
+             *          Yes - goto 3
+             * 3) Is pathLen set?
+             *      No  - goto 4
+             *      Yes - check pathLen against maxPathLen.
+             *      3.a) Is pathLen less than maxPathLen?
+             *           No - goto 4
+             *           Yes - set maxPathLen to pathLen and EXIT
+             * 4) Is maxPathLen > 0?
+             *      Yes - Reduce by 1
+             *      No  - ERROR
+             */
+
+            if (cert->ca && cert->pathLengthSet) {
+                cert->maxPathLen = cert->pathLength;
+                if (cert->isCA) {
+                    WOLFSSL_MSG("\tCA boolean set");
+                    if (cert->extKeyUsageSet) {
+                         WOLFSSL_MSG("\tExtension Key Usage Set");
+                         if ((cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) != 0) {
+                            checkPathLen = 1;
+                         } else {
+                            decrementMaxPathLen = 1;
+                         }
+                    } else {
+                        checkPathLen = 1;
+                    } /* !cert->ca check */
+                } /* cert is not a CA (assuming entity cert) */
+
+                if (checkPathLen && cert->pathLengthSet) {
+                    if (cert->pathLength < cert->ca->maxPathLen) {
+                        WOLFSSL_MSG("\tmaxPathLen status: set to pathLength");
+                        cert->maxPathLen = cert->pathLength;
+                    } else {
+                        decrementMaxPathLen = 1;
+                    }
+                }
+
+                if (decrementMaxPathLen && cert->ca->maxPathLen > 0) {
+                    WOLFSSL_MSG("\tmaxPathLen status: reduce by 1");
+                    cert->maxPathLen = cert->ca->maxPathLen - 1;
+                    if (verify != NO_VERIFY && type != CA_TYPE &&
+                                                    type != TRUSTED_PEER_TYPE) {
+                        WOLFSSL_MSG("\tmaxPathLen status: OK");
+                    }
+                } else if (decrementMaxPathLen && cert->ca->maxPathLen == 0) {
+                    cert->maxPathLen = 0;
+                    if (verify != NO_VERIFY && type != CA_TYPE &&
+                                                    type != TRUSTED_PEER_TYPE) {
+                        WOLFSSL_MSG("\tNon-entity cert, maxPathLen is 0");
+                        WOLFSSL_MSG("\tmaxPathLen status: ERROR");
+                        return ASN_PATHLEN_INV_E;
+                    }
+                }
+            } else if (cert->ca && cert->isCA) {
+                /* case where cert->pathLength extension is not set */
+                if (cert->ca->maxPathLen > 0) {
+                    cert->maxPathLen = cert->ca->maxPathLen - 1;
+                } else {
+                    cert->maxPathLen = 0;
+                    if (verify != NO_VERIFY && type != CA_TYPE &&
+                                                    type != TRUSTED_PEER_TYPE) {
+                        WOLFSSL_MSG("\tNon-entity cert, maxPathLen is 0");
+                        WOLFSSL_MSG("\tmaxPathLen status: ERROR");
+                        return ASN_PATHLEN_INV_E;
+                    }
+                }
+            }
+            #ifdef HAVE_OCSP
+            if (verify != NO_VERIFY && type != CA_TYPE &&
+                                                    type != TRUSTED_PEER_TYPE) {
+                if (cert->ca) {
+                    /* Need the CA's public key hash for OCSP */
+                    XMEMCPY(cert->issuerKeyHash, cert->ca->subjectKeyHash,
+                                                                    KEYID_SIZE);
+                }
+
+            }
+            #endif /* HAVE_OCSP */
+        }
+    }
+#if defined(WOLFSSL_RENESAS_TSIP)
+    /* prepare for TSIP TLS cert verification API use */
+    if (cert->keyOID == RSAk) {
+        /* to call TSIP API, it needs keys position info in bytes */
+        if ((ret = RsaPublicKeyDecodeRawIndex(cert->publicKey, (word32*)&idx,
+                                   cert->pubKeySize,
+                                   &cert->sigCtx.pubkey_n_start,
+                                   &cert->sigCtx.pubkey_n_len,
+                                   &cert->sigCtx.pubkey_e_start,
+                                   &cert->sigCtx.pubkey_e_len)) != 0) {
+            WOLFSSL_MSG("Decoding index from cert failed.");
+            return ret;
+        }
+        cert->sigCtx.certBegin = cert->certBegin;
+    }
+    /* check if we can use TSIP for cert verification */
+    /* if the ca is verified as tsip root ca.         */
+    /* TSIP can only handle 2048 bits(256 byte) key.  */
+    if (cert->ca && tsip_checkCA(cert->ca->cm_idx) != 0 &&
+        cert->sigCtx.pubkey_n_len == 256) {
+
+        /* assign memory to encrypted tsip Rsa key index */
+        if (!cert->tsip_encRsaKeyIdx)
+            cert->tsip_encRsaKeyIdx =
+                            (byte*)XMALLOC(TSIP_TLS_ENCPUBKEY_SZ_BY_CERTVRFY,
+                             cert->heap, DYNAMIC_TYPE_RSA);
+        if (cert->tsip_encRsaKeyIdx == NULL)
+                return MEMORY_E;
+    } else {
+        if (cert->ca) {
+            /* TSIP isn't usable */
+            if (tsip_checkCA(cert->ca->cm_idx) == 0)
+                WOLFSSL_MSG("TSIP isn't usable because the ca isn't verified "
+                            "by TSIP.");
+            else if (cert->sigCtx.pubkey_n_len != 256)
+                WOLFSSL_MSG("TSIP isn't usable because the ca isn't signed by "
+                            "RSA 2048.");
+            else
+                WOLFSSL_MSG("TSIP isn't usable");
+        }
+        cert->tsip_encRsaKeyIdx = NULL;
+    }
+
+    tsip_encRsaKeyIdx = cert->tsip_encRsaKeyIdx;
+#else
+    tsip_encRsaKeyIdx = NULL;
+#endif
 
     if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
         if (cert->ca) {
-            if (verify == VERIFY || verify == VERIFY_OCSP) {
+            if (verify == VERIFY || verify == VERIFY_OCSP ||
+                                                 verify == VERIFY_SKIP_DATE) {
                 /* try to confirm/verify signature */
                 if ((ret = ConfirmSignature(&cert->sigCtx,
                         cert->source + cert->certBegin,
                         cert->sigIndex - cert->certBegin,
                         cert->ca->publicKey, cert->ca->pubKeySize,
                         cert->ca->keyOID, cert->signature,
-                        cert->sigLength, cert->signatureOID)) != 0) {
-                    if (ret != WC_PENDING_E) {
+                        cert->sigLength, cert->signatureOID,
+                        tsip_encRsaKeyIdx)) != 0) {
+                    if (ret != 0 && ret != WC_PENDING_E) {
                         WOLFSSL_MSG("Confirm signature failed");
                     }
                     return ret;
                 }
-            #ifndef IGNORE_NAME_CONSTRAINTS
+            }
+        #ifndef IGNORE_NAME_CONSTRAINTS
+            if (verify == VERIFY || verify == VERIFY_OCSP ||
+                        verify == VERIFY_NAME || verify == VERIFY_SKIP_DATE) {
                 /* check that this cert's name is permitted by the signer's
                  * name constraints */
                 if (!ConfirmNameConstraints(cert->ca, cert)) {
                     WOLFSSL_MSG("Confirm name constraint failed");
                     return ASN_NAME_INVALID_E;
                 }
-            #endif /* IGNORE_NAME_CONSTRAINTS */
-            }
+            }
+        #endif /* IGNORE_NAME_CONSTRAINTS */
         }
         else {
             /* no signer */
@@ -7143,11 +9526,15 @@
 exit_pcr:
 #endif
 
-    if (badDate != 0)
-        return badDate;
-
-    if (criticalExt != 0)
-        return criticalExt;
+    if (cert->badDate != 0) {
+        if (verify != VERIFY_SKIP_DATE) {
+            return cert->badDate;
+        }
+        WOLFSSL_MSG("Date error: Verify option is skipping");
+    }
+
+    if (cert->criticalExt != 0)
+        return cert->criticalExt;
 
     return ret;
 }
@@ -7158,21 +9545,7 @@
     Signer* signer = (Signer*) XMALLOC(sizeof(Signer), heap,
                                        DYNAMIC_TYPE_SIGNER);
     if (signer) {
-        signer->pubKeySize = 0;
-        signer->keyOID     = 0;
-        signer->publicKey  = NULL;
-        signer->nameLen    = 0;
-        signer->name       = NULL;
-    #ifndef IGNORE_NAME_CONSTRAINTS
-        signer->permittedNames = NULL;
-        signer->excludedNames = NULL;
-    #endif /* IGNORE_NAME_CONSTRAINTS */
-        signer->pathLengthSet = 0;
-        signer->pathLength = 0;
-    #ifdef WOLFSSL_SIGNER_DER_CERT
-        signer->derCert    = NULL;
-    #endif
-        signer->next       = NULL;
+        XMEMSET(signer, 0, sizeof(Signer));
     }
     (void)heap;
 
@@ -7184,7 +9557,7 @@
 void FreeSigner(Signer* signer, void* heap)
 {
     XFREE(signer->name, heap, DYNAMIC_TYPE_SUBJECT_CN);
-    XFREE(signer->publicKey, heap, DYNAMIC_TYPE_PUBLIC_KEY);
+    XFREE((void*)signer->publicKey, heap, DYNAMIC_TYPE_PUBLIC_KEY);
 #ifndef IGNORE_NAME_CONSTRAINTS
     if (signer->permittedNames)
         FreeNameSubtrees(signer->permittedNames, heap);
@@ -7259,7 +9632,7 @@
 }
 #endif /* WOLFSSL_TRUST_PEER_CERT */
 
-WOLFSSL_LOCAL int SetMyVersion(word32 version, byte* output, int header)
+int SetMyVersion(word32 version, byte* output, int header)
 {
     int i = 0;
 
@@ -7277,11 +9650,10 @@
     return i;
 }
 
-
-WOLFSSL_LOCAL int SetSerialNumber(const byte* sn, word32 snSz, byte* output,
-    int maxSnSz)
-{
-    int i = 0;
+int SetSerialNumber(const byte* sn, word32 snSz, byte* output,
+    word32 outputSz, int maxSnSz)
+{
+    int i;
     int snSzInt = (int)snSz;
 
     if (sn == NULL || output == NULL || snSzInt < 0)
@@ -7292,29 +9664,29 @@
         snSzInt--;
         sn++;
     }
-
+    /* RFC 5280 - 4.1.2.2:
+     *   Serial numbers must be a positive value (and not zero) */
+    if (snSzInt == 0)
+        return BAD_FUNC_ARG;
+
+    if (sn[0] & 0x80)
+        maxSnSz--;
     /* truncate if input is too long */
     if (snSzInt > maxSnSz)
         snSzInt = maxSnSz;
 
-    /* encode ASN Integer, with length and value */
-    output[i++] = ASN_INTEGER;
-
-    /* handle MSB, to make sure value is positive */
-    if (sn[0] & 0x80) {
-        /* make room for zero pad */
-        if (snSzInt > maxSnSz-1)
-            snSzInt = maxSnSz-1;
-
-        /* add zero pad */
-        i += SetLength(snSzInt+1, &output[i]);
-        output[i++] = 0x00;
-        XMEMCPY(&output[i], sn, snSzInt);
-    }
-    else {
-        i += SetLength(snSzInt, &output[i]);
-        XMEMCPY(&output[i], sn, snSzInt);
-    }
+    i = SetASNInt(snSzInt, sn[0], NULL);
+    /* truncate if input is too long */
+    if (snSzInt > (int)outputSz - i)
+        snSzInt = (int)outputSz - i;
+    /* sanity check number of bytes to copy */
+    if (snSzInt <= 0) {
+        return BUFFER_E;
+    }
+
+    /* write out ASN.1 Integer */
+    (void)SetASNInt(snSzInt, sn[0], output);
+    XMEMCPY(output + i, sn, snSzInt);
 
     /* compute final length */
     i += snSzInt;
@@ -7322,7 +9694,9 @@
     return i;
 }
 
-WOLFSSL_LOCAL int GetSerialNumber(const byte* input, word32* inOutIdx,
+#endif /* !NO_CERTS */
+
+int GetSerialNumber(const byte* input, word32* inOutIdx,
     byte* serial, int* serialSz, word32 maxIdx)
 {
     int result = 0;
@@ -7356,6 +9730,7 @@
     return result;
 }
 
+#ifndef NO_CERTS
 
 int AllocDer(DerBuffer** pDer, word32 length, int type, void* heap)
 {
@@ -7411,54 +9786,78 @@
     }
 }
 
+int wc_AllocDer(DerBuffer** pDer, word32 length, int type, void* heap)
+{
+    return AllocDer(pDer, length, type, heap);
+}
+void wc_FreeDer(DerBuffer** pDer)
+{
+    FreeDer(pDer);
+}
+
 
 #if defined(WOLFSSL_PEM_TO_DER) || defined(WOLFSSL_DER_TO_PEM)
 
 /* Max X509 header length indicates the max length + 2 ('\n', '\0') */
 #define MAX_X509_HEADER_SZ  (37 + 2)
 
-const char* const BEGIN_CERT           = "-----BEGIN CERTIFICATE-----";
-const char* const END_CERT             = "-----END CERTIFICATE-----";
+wcchar BEGIN_CERT           = "-----BEGIN CERTIFICATE-----";
+wcchar END_CERT             = "-----END CERTIFICATE-----";
 #ifdef WOLFSSL_CERT_REQ
-    const char* const BEGIN_CERT_REQ   = "-----BEGIN CERTIFICATE REQUEST-----";
-    const char* const END_CERT_REQ     = "-----END CERTIFICATE REQUEST-----";
+    wcchar BEGIN_CERT_REQ   = "-----BEGIN CERTIFICATE REQUEST-----";
+    wcchar END_CERT_REQ     = "-----END CERTIFICATE REQUEST-----";
 #endif
 #ifndef NO_DH
-    const char* const BEGIN_DH_PARAM   = "-----BEGIN DH PARAMETERS-----";
-    const char* const END_DH_PARAM     = "-----END DH PARAMETERS-----";
+    wcchar BEGIN_DH_PARAM   = "-----BEGIN DH PARAMETERS-----";
+    wcchar END_DH_PARAM     = "-----END DH PARAMETERS-----";
 #endif
 #ifndef NO_DSA
-    const char* const BEGIN_DSA_PARAM  = "-----BEGIN DSA PARAMETERS-----";
-    const char* const END_DSA_PARAM    = "-----END DSA PARAMETERS-----";
-#endif
-const char* const BEGIN_X509_CRL       = "-----BEGIN X509 CRL-----";
-const char* const END_X509_CRL         = "-----END X509 CRL-----";
-const char* const BEGIN_RSA_PRIV       = "-----BEGIN RSA PRIVATE KEY-----";
-const char* const END_RSA_PRIV         = "-----END RSA PRIVATE KEY-----";
-const char* const BEGIN_PRIV_KEY       = "-----BEGIN PRIVATE KEY-----";
-const char* const END_PRIV_KEY         = "-----END PRIVATE KEY-----";
-const char* const BEGIN_ENC_PRIV_KEY   = "-----BEGIN ENCRYPTED PRIVATE KEY-----";
-const char* const END_ENC_PRIV_KEY     = "-----END ENCRYPTED PRIVATE KEY-----";
+    wcchar BEGIN_DSA_PARAM  = "-----BEGIN DSA PARAMETERS-----";
+    wcchar END_DSA_PARAM    = "-----END DSA PARAMETERS-----";
+#endif
+wcchar BEGIN_X509_CRL       = "-----BEGIN X509 CRL-----";
+wcchar END_X509_CRL         = "-----END X509 CRL-----";
+wcchar BEGIN_RSA_PRIV       = "-----BEGIN RSA PRIVATE KEY-----";
+wcchar END_RSA_PRIV         = "-----END RSA PRIVATE KEY-----";
+wcchar BEGIN_PRIV_KEY       = "-----BEGIN PRIVATE KEY-----";
+wcchar END_PRIV_KEY         = "-----END PRIVATE KEY-----";
+wcchar BEGIN_ENC_PRIV_KEY   = "-----BEGIN ENCRYPTED PRIVATE KEY-----";
+wcchar END_ENC_PRIV_KEY     = "-----END ENCRYPTED PRIVATE KEY-----";
 #ifdef HAVE_ECC
-    const char* const BEGIN_EC_PRIV    = "-----BEGIN EC PRIVATE KEY-----";
-    const char* const END_EC_PRIV      = "-----END EC PRIVATE KEY-----";
-#endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519) || !defined(NO_DSA)
-    const char* const BEGIN_DSA_PRIV   = "-----BEGIN DSA PRIVATE KEY-----";
-    const char* const END_DSA_PRIV     = "-----END DSA PRIVATE KEY-----";
-#endif
-const char* const BEGIN_PUB_KEY        = "-----BEGIN PUBLIC KEY-----";
-const char* const END_PUB_KEY          = "-----END PUBLIC KEY-----";
-#ifdef HAVE_ED25519
-    const char* const BEGIN_EDDSA_PRIV = "-----BEGIN EDDSA PRIVATE KEY-----";
-    const char* const END_EDDSA_PRIV   = "-----END EDDSA PRIVATE KEY-----";
+    wcchar BEGIN_EC_PRIV    = "-----BEGIN EC PRIVATE KEY-----";
+    wcchar END_EC_PRIV      = "-----END EC PRIVATE KEY-----";
+#endif
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
+                                                                !defined(NO_DSA)
+    wcchar BEGIN_DSA_PRIV   = "-----BEGIN DSA PRIVATE KEY-----";
+    wcchar END_DSA_PRIV     = "-----END DSA PRIVATE KEY-----";
+#endif
+#ifdef OPENSSL_EXTRA
+    const char BEGIN_PRIV_KEY_PREFIX[] = "-----BEGIN";
+    const char PRIV_KEY_SUFFIX[] = "PRIVATE KEY-----";
+    const char END_PRIV_KEY_PREFIX[]   = "-----END";
+#endif
+wcchar BEGIN_PUB_KEY        = "-----BEGIN PUBLIC KEY-----";
+wcchar END_PUB_KEY          = "-----END PUBLIC KEY-----";
+#if defined(HAVE_ED25519) || defined(HAVE_ED448)
+    wcchar BEGIN_EDDSA_PRIV = "-----BEGIN EDDSA PRIVATE KEY-----";
+    wcchar END_EDDSA_PRIV   = "-----END EDDSA PRIVATE KEY-----";
 #endif
 #ifdef HAVE_CRL
     const char *const BEGIN_CRL = "-----BEGIN X509 CRL-----";
-    const char* const END_CRL   = "-----END X509 CRL-----";
-#endif
-
-
+    wcchar END_CRL   = "-----END X509 CRL-----";
+#endif
+
+
+static WC_INLINE char* SkipEndOfLineChars(char* line, const char* endOfLine)
+{
+    /* eat end of line characters */
+    while (line < endOfLine &&
+              (line[0] == '\r' || line[0] == '\n')) {
+        line++;
+    }
+    return line;
+}
 
 int wc_PemGetHeaderFooter(int type, const char** header, const char** footer)
 {
@@ -7523,6 +9922,11 @@
             break;
     #ifdef HAVE_ED25519
         case ED25519_TYPE:
+    #endif
+    #ifdef HAVE_ED448
+        case ED448_TYPE:
+    #endif
+    #if defined(HAVE_ED25519) || defined(HAVE_ED448)
         case EDDSA_PRIVATEKEY_TYPE:
             if (header) *header = BEGIN_EDDSA_PRIV;
             if (footer) *footer = END_EDDSA_PRIV;
@@ -7530,10 +9934,24 @@
             break;
     #endif
         case PUBLICKEY_TYPE:
+        case ECC_PUBLICKEY_TYPE:
             if (header) *header = BEGIN_PUB_KEY;
             if (footer) *footer = END_PUB_KEY;
             ret = 0;
             break;
+    #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+        case DH_PRIVATEKEY_TYPE:
+    #endif
+        case PKCS8_PRIVATEKEY_TYPE:
+            if (header) *header = BEGIN_PRIV_KEY;
+            if (footer) *footer = END_PRIV_KEY;
+            ret = 0;
+            break;
+        case PKCS8_ENC_PRIVATEKEY_TYPE:
+            if (header) *header = BEGIN_ENC_PRIV_KEY;
+            if (footer) *footer = END_ENC_PRIV_KEY;
+            ret = 0;
+            break;
         default:
             break;
     }
@@ -7542,22 +9960,22 @@
 
 #ifdef WOLFSSL_ENCRYPTED_KEYS
 
-static const char* const kProcTypeHeader = "Proc-Type";
-static const char* const kDecInfoHeader = "DEK-Info";
+static wcchar kProcTypeHeader = "Proc-Type";
+static wcchar kDecInfoHeader = "DEK-Info";
 
 #ifdef WOLFSSL_PEM_TO_DER
 #ifndef NO_DES3
-    static const char* const kEncTypeDes = "DES-CBC";
-    static const char* const kEncTypeDes3 = "DES-EDE3-CBC";
+    static wcchar kEncTypeDes = "DES-CBC";
+    static wcchar kEncTypeDes3 = "DES-EDE3-CBC";
 #endif
 #if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128)
-    static const char* const kEncTypeAesCbc128 = "AES-128-CBC";
+    static wcchar kEncTypeAesCbc128 = "AES-128-CBC";
 #endif
 #if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_192)
-    static const char* const kEncTypeAesCbc192 = "AES-192-CBC";
+    static wcchar kEncTypeAesCbc192 = "AES-192-CBC";
 #endif
 #if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_256)
-    static const char* const kEncTypeAesCbc256 = "AES-256-CBC";
+    static wcchar kEncTypeAesCbc256 = "AES-256-CBC";
 #endif
 
 int wc_EncryptedInfoGet(EncryptedInfo* info, const char* cipherInfo)
@@ -7611,8 +10029,7 @@
     return ret;
 }
 
-static int wc_EncryptedInfoParse(EncryptedInfo* info,
-    char** pBuffer, size_t bufSz)
+int wc_EncryptedInfoParse(EncryptedInfo* info, char** pBuffer, size_t bufSz)
 {
     int err = 0;
     char*  bufferStart;
@@ -7678,17 +10095,26 @@
                 return BUFFER_E;
             info->name[finish - start] = '\0'; /* null term */
 
+            /* populate info */
+            err = wc_EncryptedInfoGet(info, info->name);
+            if (err != 0)
+                return err;
+
             /* get IV */
-            if (finishSz < sizeof(info->iv) + 1)
+            if (finishSz < info->ivSz + 1)
                 return BUFFER_E;
-            if (XMEMCPY(info->iv, finish + 1, sizeof(info->iv)) == NULL)
-                return BUFFER_E;
-
-            if (newline == NULL)
+
+            if (newline == NULL) {
                 newline = XSTRNSTR(finish, "\n", min(finishSz,
                                                      PEM_LINE_LEN));
+            }
             if ((newline != NULL) && (newline > finish)) {
-                info->ivSz = (word32)(newline - (finish + 1));
+                finish++;
+                info->ivSz = (word32)(newline - finish);
+                if (info->ivSz > IV_SZ)
+                    return BUFFER_E;
+                if (XMEMCPY(info->iv, finish, info->ivSz) == NULL)
+                    return BUFFER_E;
                 info->set = 1;
             }
             else
@@ -7697,18 +10123,12 @@
         else
             return BUFFER_E;
 
-        /* eat blank line */
-        while (newline < bufferEnd &&
-                (*newline == '\r' || *newline == '\n')) {
-            newline++;
-        }
+        /* eat end of line characters */
+        newline = SkipEndOfLineChars(newline, bufferEnd);
 
         /* return new headerEnd */
-        if (pBuffer)
-            *pBuffer = newline;
-
-        /* populate info */
-        err = wc_EncryptedInfoGet(info, info->name);
+
+        *pBuffer = newline;
     }
 
     return err;
@@ -7716,19 +10136,23 @@
 #endif /* WOLFSSL_PEM_TO_DER */
 
 #ifdef WOLFSSL_DER_TO_PEM
-static int wc_EncryptedInfoAppend(char* dest, char* cipherInfo)
+static int wc_EncryptedInfoAppend(char* dest, int destSz, char* cipherInfo)
 {
     if (cipherInfo != NULL) {
-        size_t cipherInfoStrLen = XSTRLEN(cipherInfo);
+        int cipherInfoStrLen = (int)XSTRLEN((char*)cipherInfo);
+
         if (cipherInfoStrLen > HEADER_ENCRYPTED_KEY_SIZE - (9+14+10+3))
             cipherInfoStrLen = HEADER_ENCRYPTED_KEY_SIZE - (9+14+10+3);
 
-        XSTRNCAT(dest, kProcTypeHeader, 9);
-        XSTRNCAT(dest, ": 4,ENCRYPTED\n", 14);
-        XSTRNCAT(dest, kDecInfoHeader, 8);
-        XSTRNCAT(dest, ": ", 2);
-        XSTRNCAT(dest, cipherInfo, cipherInfoStrLen);
-        XSTRNCAT(dest, "\n\n", 3);
+        if (destSz - (int)XSTRLEN(dest) >= cipherInfoStrLen + (9+14+8+2+2+1)) {
+            /* strncat's src length needs to include the NULL */
+            XSTRNCAT(dest, kProcTypeHeader, 10);
+            XSTRNCAT(dest, ": 4,ENCRYPTED\n", 15);
+            XSTRNCAT(dest, kDecInfoHeader, 9);
+            XSTRNCAT(dest, ": ", 3);
+            XSTRNCAT(dest, cipherInfo, destSz - (int)XSTRLEN(dest) - 1);
+            XSTRNCAT(dest, "\n\n", 4);
+        }
     }
     return 0;
 }
@@ -7785,20 +10209,18 @@
     }
 #endif
 
-    /* null term and leave room for newline */
-    header[--headerLen] = '\0'; header[--headerLen] = '\0';
-    footer[--footerLen] = '\0'; footer[--footerLen] = '\0';
-
     /* build header and footer based on type */
-    XSTRNCPY(header, headerStr, headerLen);
-    XSTRNCPY(footer, footerStr, footerLen);
+    XSTRNCPY(header, headerStr, headerLen - 1);
+    header[headerLen - 2] = 0;
+    XSTRNCPY(footer, footerStr, footerLen - 1);
+    footer[footerLen - 2] = 0;
 
     /* add new line to end */
     XSTRNCAT(header, "\n", 2);
     XSTRNCAT(footer, "\n", 2);
 
 #ifdef WOLFSSL_ENCRYPTED_KEYS
-    err = wc_EncryptedInfoAppend(header, (char*)cipher_info);
+    err = wc_EncryptedInfoAppend(header, headerLen, (char*)cipher_info);
     if (err != 0) {
     #ifdef WOLFSSL_SMALL_STACK
         XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7883,7 +10305,7 @@
 /* Remove PEM header/footer, convert to ASN1, store any encrypted data
    info->consumed tracks of PEM bytes consumed in case multiple parts */
 int PemToDer(const unsigned char* buff, long longSz, int type,
-              DerBuffer** pDer, void* heap, EncryptedInfo* info, int* eccKey)
+              DerBuffer** pDer, void* heap, EncryptedInfo* info, int* keyFormat)
 {
     const char* header      = NULL;
     const char* footer      = NULL;
@@ -7896,6 +10318,16 @@
     int         sz          = (int)longSz;
     int         encrypted_key = 0;
     DerBuffer*  der;
+#if defined(HAVE_PKCS8) || defined(WOLFSSL_ENCRYPTED_KEYS)
+    word32      algId = 0;
+    #if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_DES3) && !defined(NO_WOLFSSL_SKIP_TRAILING_PAD)
+        int     padVal = 0;
+    #endif
+#endif
+#ifdef OPENSSL_EXTRA
+    char        beginBuf[PEM_LINE_LEN + 1]; /* add 1 for null terminator */
+    char        endBuf[PEM_LINE_LEN + 1];   /* add 1 for null terminator */
+#endif
 
     WOLFSSL_ENTER("PemToDer");
 
@@ -7908,36 +10340,41 @@
     for (;;) {
         headerEnd = XSTRNSTR((char*)buff, header, sz);
 
-        if (headerEnd || type != PRIVATEKEY_TYPE) {
+        if (headerEnd) {
             break;
         } else
-        if (header == BEGIN_RSA_PRIV) {
-            header =  BEGIN_PRIV_KEY;       footer = END_PRIV_KEY;
-        } else
-        if (header == BEGIN_PRIV_KEY) {
-            header =  BEGIN_ENC_PRIV_KEY;   footer = END_ENC_PRIV_KEY;
-        } else
-#ifdef HAVE_ECC
-        if (header == BEGIN_ENC_PRIV_KEY) {
-            header =  BEGIN_EC_PRIV;        footer = END_EC_PRIV;
-        } else
-        if (header == BEGIN_EC_PRIV) {
-            header =  BEGIN_DSA_PRIV;       footer = END_DSA_PRIV;
+        if (type == PRIVATEKEY_TYPE) {
+            if (header == BEGIN_RSA_PRIV) {
+                header =  BEGIN_PRIV_KEY;       footer = END_PRIV_KEY;
+            } else
+            if (header == BEGIN_PRIV_KEY) {
+                header =  BEGIN_ENC_PRIV_KEY;   footer = END_ENC_PRIV_KEY;
+            } else
+    #ifdef HAVE_ECC
+            if (header == BEGIN_ENC_PRIV_KEY) {
+                header =  BEGIN_EC_PRIV;        footer = END_EC_PRIV;
+            } else
+            if (header == BEGIN_EC_PRIV) {
+                header =  BEGIN_DSA_PRIV;       footer = END_DSA_PRIV;
+            } else
+    #endif
+    #if defined(HAVE_ED25519) || defined(HAVE_ED448)
+        #ifdef HAVE_ECC
+            if (header == BEGIN_DSA_PRIV)
+        #else
+            if (header == BEGIN_ENC_PRIV_KEY)
+        #endif
+            {
+                header =  BEGIN_EDDSA_PRIV;     footer = END_EDDSA_PRIV;
+            } else
+    #endif
+            {
+                break;
+            }
         } else
-#endif
-#ifdef HAVE_ED25519
-    #ifdef HAVE_ECC
-        if (header == BEGIN_DSA_PRIV)
-    #else
-        if (header == BEGIN_ENC_PRIV_KEY)
-    #endif
-        {
-            header =  BEGIN_EDDSA_PRIV;     footer = END_EDDSA_PRIV;
-        } else
-#endif
 #ifdef HAVE_CRL
-        if (type == CRL_TYPE) {
-            header =  BEGIN_CRL;        footer = END_CRL;
+        if ((type == CRL_TYPE) && (header != BEGIN_CRL)) {
+            header =  BEGIN_CRL;                footer = END_CRL;
         } else
 #endif
         {
@@ -7946,32 +10383,73 @@
     }
 
     if (!headerEnd) {
+#ifdef OPENSSL_EXTRA
+        char* beginEnd;
+        int endLen;
+        /* see if there is a -----BEGIN * PRIVATE KEY----- header */
+        headerEnd = XSTRNSTR((char*)buff, PRIV_KEY_SUFFIX, sz);
+        if (headerEnd) {
+            beginEnd = headerEnd + XSTR_SIZEOF(PRIV_KEY_SUFFIX);
+            /* back up to BEGIN_PRIV_KEY_PREFIX */
+            headerEnd -= XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX);
+            while (headerEnd > (char*)buff &&
+                    XSTRNCMP(headerEnd, BEGIN_PRIV_KEY_PREFIX,
+                            XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX)) != 0) {
+                headerEnd--;
+            }
+            if (headerEnd <= (char*)buff ||
+                    XSTRNCMP(headerEnd, BEGIN_PRIV_KEY_PREFIX,
+                    XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX)) != 0 ||
+                    beginEnd - headerEnd > PEM_LINE_LEN) {
+                WOLFSSL_MSG("Couldn't find PEM header");
+                return ASN_NO_PEM_HEADER;
+            }
+            /* headerEnd now points to beginning of header */
+            XMEMCPY(beginBuf, headerEnd, beginEnd - headerEnd);
+            beginBuf[beginEnd - headerEnd] = '\0';
+            /* look for matching footer */
+            footer = XSTRNSTR(beginEnd,
+                            beginBuf + XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX),
+                            (unsigned int)((char*)buff + sz - beginEnd));
+            if (!footer) {
+                WOLFSSL_MSG("Couldn't find PEM footer");
+                return ASN_NO_PEM_HEADER;
+            }
+            footer -= XSTR_SIZEOF(END_PRIV_KEY_PREFIX);
+            endLen = (unsigned int)(beginEnd - headerEnd -
+                        (XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX) -
+                                XSTR_SIZEOF(END_PRIV_KEY_PREFIX)));
+            XMEMCPY(endBuf, footer, endLen);
+            endBuf[endLen] = '\0';
+
+            header = beginBuf;
+            footer = endBuf;
+            headerEnd = beginEnd;
+        } else {
+            WOLFSSL_MSG("Couldn't find PEM header");
+            return ASN_NO_PEM_HEADER;
+        }
+#else
         WOLFSSL_MSG("Couldn't find PEM header");
         return ASN_NO_PEM_HEADER;
-    }
-
-    headerEnd += XSTRLEN(header);
-
-    if ((headerEnd + 1) >= bufferEnd)
-        return BUFFER_E;
-
-    /* eat end of line */
-    if (headerEnd[0] == '\n')
-        headerEnd++;
-    else if (headerEnd[1] == '\n')
-        headerEnd += 2;
-    else {
-        if (info)
-            info->consumed = (long)(headerEnd+2 - (char*)buff);
-        return BUFFER_E;
-    }
+#endif
+    } else {
+        headerEnd += XSTRLEN(header);
+    }
+
+    /* eat end of line characters */
+    headerEnd = SkipEndOfLineChars(headerEnd, bufferEnd);
 
     if (type == PRIVATEKEY_TYPE) {
-        if (eccKey) {
+        /* keyFormat is Key_Sum enum */
+        if (keyFormat) {
         #ifdef HAVE_ECC
-            *eccKey = (header == BEGIN_EC_PRIV) ? 1 : 0;
-        #else
-            *eccKey = 0;
+            if (header == BEGIN_EC_PRIV)
+                *keyFormat = ECDSAk;
+        #endif
+        #if !defined(NO_DSA)
+            if (header == BEGIN_DSA_PRIV)
+                *keyFormat = DSAk;
         #endif
         }
     }
@@ -7987,7 +10465,7 @@
 #endif /* WOLFSSL_ENCRYPTED_KEYS */
 
     /* find footer */
-    footerEnd = XSTRNSTR((char*)buff, footer, sz);
+    footerEnd = XSTRNSTR(headerEnd, footer, (unsigned int)((char*)buff + sz - headerEnd));
     if (!footerEnd) {
         if (info)
             info->consumed = longSz; /* No more certs if no footer */
@@ -7996,17 +10474,12 @@
 
     consumedEnd = footerEnd + XSTRLEN(footer);
 
-    if (consumedEnd < bufferEnd) {  /* handle no end of line on last line */
-        /* eat end of line */
-        if (consumedEnd[0] == '\n')
+    if (consumedEnd < bufferEnd) { /* handle no end of line on last line */
+        /* eat end of line characters */
+        consumedEnd = SkipEndOfLineChars(consumedEnd, bufferEnd);
+        /* skip possible null term */
+        if (consumedEnd < bufferEnd && consumedEnd[0] == '\0')
             consumedEnd++;
-        else if ((consumedEnd + 1 < bufferEnd) && consumedEnd[1] == '\n')
-            consumedEnd += 2;
-        else {
-            if (info)
-                info->consumed = (long)(consumedEnd+2 - (char*)buff);
-            return BUFFER_E;
-        }
     }
 
     if (info)
@@ -8027,12 +10500,28 @@
                       der->buffer, &der->length) < 0)
         return BUFFER_E;
 
-    if (header == BEGIN_PRIV_KEY && !encrypted_key) {
+    if ((header == BEGIN_PRIV_KEY
+#ifdef OPENSSL_EXTRA
+         || header == beginBuf
+#endif
+#ifdef HAVE_ECC
+         || header == BEGIN_EC_PRIV
+#endif
+        ) && !encrypted_key)
+    {
+    #ifdef HAVE_PKCS8
         /* pkcs8 key, convert and adjust length */
-        if ((ret = ToTraditional(der->buffer, der->length)) < 0)
-            return ret;
-
-        der->length = ret;
+        if ((ret = ToTraditional_ex(der->buffer, der->length, &algId)) > 0) {
+            der->length = ret;
+            if (keyFormat) {
+                *keyFormat = algId;
+            }
+        }
+        else {
+            /* ignore failure here and assume key is not pkcs8 wrapped */
+        }
+    #endif
+
         return 0;
     }
 
@@ -8066,10 +10555,14 @@
             if (header == BEGIN_ENC_PRIV_KEY) {
             #ifndef NO_PWDBASED
                 ret = ToTraditionalEnc(der->buffer, der->length,
-                                       password, passwordSz);
+                                       password, passwordSz, &algId);
 
                 if (ret >= 0) {
                     der->length = ret;
+                    if (keyFormat) {
+                        *keyFormat = algId;
+                    }
+                    ret = 0;
                 }
             #else
                 ret = NOT_COMPILED_IN;
@@ -8077,11 +10570,39 @@
             }
             /* decrypt the key */
             else {
-                ret = wc_BufferKeyDecrypt(info, der->buffer, der->length,
-                    (byte*)password, passwordSz, WC_MD5);
-            }
+                if (passwordSz == 0) {
+                    /* The key is encrypted but does not have a password */
+                    WOLFSSL_MSG("No password for encrypted key");
+                    ret = NO_PASSWORD;
+                }
+                else {
+                    ret = wc_BufferKeyDecrypt(info, der->buffer, der->length,
+                        (byte*)password, passwordSz, WC_MD5);
+
+#ifndef NO_WOLFSSL_SKIP_TRAILING_PAD
+                #ifndef NO_DES3
+                    if (info->cipherType == WC_CIPHER_DES3) {
+                        padVal = der->buffer[der->length-1];
+                        if (padVal <= DES_BLOCK_SIZE) {
+                            der->length -= padVal;
+                        }
+                    }
+                #endif /* !NO_DES3 */
+#endif /* !NO_WOLFSSL_SKIP_TRAILING_PAD */
+                }
+            }
+#ifdef OPENSSL_EXTRA
+            if (ret) {
+                PEMerr(0, PEM_R_BAD_DECRYPT);
+            }
+#endif
             ForceZero(password, passwordSz);
         }
+#ifdef OPENSSL_EXTRA
+        else {
+            PEMerr(0, PEM_R_BAD_PASSWORD_READ);
+        }
+#endif
 
     #ifdef WOLFSSL_SMALL_STACK
         XFREE(password, heap, DYNAMIC_TYPE_STRING);
@@ -8148,7 +10669,7 @@
     XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO);
 #endif
 
-    if (ret < 0) {
+    if (ret < 0 || der == NULL) {
         WOLFSSL_MSG("Bad Pem To Der");
     }
     else {
@@ -8189,7 +10710,7 @@
 
 
     ret = PemToDer(pem, pemSz, type, &der, NULL, NULL, &eccKey);
-    if (ret < 0) {
+    if (ret < 0 || der == NULL) {
         WOLFSSL_MSG("Bad Pem To Der");
     }
     else {
@@ -8228,7 +10749,7 @@
     }
 
     ret = PemToDer(pem, pemSz, PUBLICKEY_TYPE, &der, NULL, NULL, NULL);
-    if (ret < 0) {
+    if (ret < 0 || der == NULL) {
         WOLFSSL_MSG("Bad Pem To Der");
     }
     else {
@@ -8263,16 +10784,24 @@
     int    dynamic = 0;
     int    ret     = 0;
     long   sz      = 0;
-    XFILE  file    = XFOPEN(fileName, "rb");
+    XFILE  file;
     DerBuffer* converted = NULL;
 
     WOLFSSL_ENTER("wc_PemCertToDer");
 
-    if (file == XBADFILE) {
-        ret = BUFFER_E;
+    if (fileName == NULL) {
+        ret = BAD_FUNC_ARG;
     }
     else {
-        XFSEEK(file, 0, XSEEK_END);
+        file = XFOPEN(fileName, "rb");
+        if (file == XBADFILE) {
+            ret = BUFFER_E;
+        }
+    }
+
+    if (ret == 0) {
+        if(XFSEEK(file, 0, XSEEK_END) != 0)
+            ret = BUFFER_E;
         sz = XFTELL(file);
         XREWIND(file);
 
@@ -8336,16 +10865,24 @@
     int    dynamic = 0;
     int    ret     = 0;
     long   sz      = 0;
-    XFILE  file    = XFOPEN(fileName, "rb");
+    XFILE  file;
     DerBuffer* converted = NULL;
 
     WOLFSSL_ENTER("wc_PemPubKeyToDer");
 
-    if (file == XBADFILE) {
-        ret = BUFFER_E;
+    if (fileName == NULL) {
+        ret = BAD_FUNC_ARG;
     }
     else {
-        XFSEEK(file, 0, XSEEK_END);
+        file = XFOPEN(fileName, "rb");
+        if (file == XBADFILE) {
+            ret = BUFFER_E;
+        }
+    }
+
+    if (ret == 0) {
+        if(XFSEEK(file, 0, XSEEK_END) != 0)
+            ret = BUFFER_E;
         sz = XFTELL(file);
         XREWIND(file);
 
@@ -8482,7 +11019,7 @@
     if (with_header) {
         int  algoSz;
 #ifdef WOLFSSL_SMALL_STACK
-        byte* algo = NULL;
+        byte* algo;
 
         algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
         if (algo == NULL) {
@@ -8540,74 +11077,52 @@
     return idx;
 }
 
-int RsaPublicKeyDerSize(RsaKey* key, int with_header)
-{
-    byte* dummy = NULL;
-    byte seq[MAX_SEQ_SZ];
-    byte bitString[1 + MAX_LENGTH_SZ + 1];
-    int  nSz;
-    int  eSz;
-    int  seqSz;
-    int  bitStringSz;
-    int  idx;
+#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
+                                           !HAVE_USER_RSA))) */
+
+#if !defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA))
+int wc_RsaPublicKeyDerSize(RsaKey* key, int with_header)
+{
+    int  idx = 0;
+    int  nSz, eSz, seqSz, bitStringSz, algoSz;
 
     if (key == NULL)
         return BAD_FUNC_ARG;
 
     /* n */
-    dummy = (byte*)XMALLOC(MAX_RSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (dummy == NULL)
-        return MEMORY_E;
-
 #ifdef HAVE_USER_RSA
-    nSz = SetASNIntRSA(key->n, dummy);
-#else
-    nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, dummy);
-#endif
-    XFREE(dummy, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    nSz = SetASNIntRSA(key->n, NULL);
+#else
+    nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, NULL);
+#endif
     if (nSz < 0) {
         return nSz;
     }
 
     /* e */
-    dummy = (byte*)XMALLOC(MAX_RSA_E_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (dummy == NULL) {
-        XFREE(dummy, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-
 #ifdef HAVE_USER_RSA
-    eSz = SetASNIntRSA(key->e, dummy);
-#else
-    eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, dummy);
-#endif
-    XFREE(dummy, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    eSz = SetASNIntRSA(key->e, NULL);
+#else
+    eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, NULL);
+#endif
     if (eSz < 0) {
         return eSz;
     }
 
-    seqSz  = SetSequence(nSz + eSz, seq);
+    seqSz  = SetSequence(nSz + eSz, NULL);
 
     /* headers */
     if (with_header) {
-        int  algoSz;
-        dummy = (byte*)XMALLOC(MAX_RSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        if (dummy == NULL)
-            return MEMORY_E;
-
-        algoSz = SetAlgoID(RSAk, dummy, oidKeyType, 0);
-        bitStringSz  = SetBitString(seqSz + nSz + eSz, 0, bitString);
-
-        idx = SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, dummy);
-        XFREE(dummy, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        algoSz = SetAlgoID(RSAk, NULL, oidKeyType, 0);
+        bitStringSz = SetBitString(seqSz + nSz + eSz, 0, NULL);
+
+        idx += SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, NULL);
 
         /* algo */
         idx += algoSz;
         /* bit string */
         idx += bitStringSz;
     }
-    else
-        idx = 0;
 
     /* seq */
     idx += seqSz;
@@ -8618,13 +11133,12 @@
 
     return idx;
 }
-#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
-                                           !HAVE_USER_RSA))) */
+
+#endif /* !NO_RSA && WOLFSSL_CERT_GEN */
 
 
 #if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
 
-
 static mp_int* GetRsaInt(RsaKey* key, int idx)
 {
     if (idx == 0)
@@ -8672,7 +11186,7 @@
     byte  ver[MAX_VERSION_SZ];
     byte* tmps[RSA_INTS];
 
-    if (!key || !output)
+    if (!key)
         return BAD_FUNC_ARG;
 
     if (key->type != RSA_PRIVATE)
@@ -8711,20 +11225,22 @@
     seqSz = SetSequence(verSz + intTotalLen, seq);
 
     outLen = seqSz + verSz + intTotalLen;
-    if (outLen > (int)inLen) {
-        FreeTmpRsas(tmps, key->heap);
-        return BAD_FUNC_ARG;
-    }
-
-    /* write to output */
-    XMEMCPY(output, seq, seqSz);
-    j = seqSz;
-    XMEMCPY(output + j, ver, verSz);
-    j += verSz;
-
-    for (i = 0; i < RSA_INTS; i++) {
-        XMEMCPY(output + j, tmps[i], sizes[i]);
-        j += sizes[i];
+    if (output) {
+        if (outLen > (int)inLen) {
+            FreeTmpRsas(tmps, key->heap);
+            return BAD_FUNC_ARG;
+        }
+
+        /* write to output */
+        XMEMCPY(output, seq, seqSz);
+        j = seqSz;
+        XMEMCPY(output + j, ver, verSz);
+        j += verSz;
+
+        for (i = 0; i < RSA_INTS; i++) {
+            XMEMCPY(output + j, tmps[i], sizes[i]);
+            j += sizes[i];
+        }
     }
     FreeTmpRsas(tmps, key->heap);
 
@@ -8740,7 +11256,7 @@
     return SetRsaPublicKey(output, key, inLen, 1);
 }
 
-#endif /* WOLFSSL_KEY_GEN && !NO_RSA && !HAVE_USER_RSA */
+#endif /* (WOLFSSL_KEY_GEN || OPENSSL_EXTRA) && !NO_RSA && !HAVE_USER_RSA */
 
 
 #ifdef WOLFSSL_CERT_GEN
@@ -8836,7 +11352,7 @@
     int  sizeSz;                       /* encoded size length */
     int  versionSz;                    /* encoded version length */
     int  serialSz;                     /* encoded serial length */
-    int  sigAlgoSz;                    /* encoded sig alog length */
+    int  sigAlgoSz;                    /* encoded sig algo length */
     int  issuerSz;                     /* encoded issuer length */
     int  subjectSz;                    /* encoded subject length */
     int  validitySz;                   /* encoded validity length */
@@ -8863,6 +11379,12 @@
 #ifdef WOLFSSL_CERT_REQ
 
 /* Write a set header to output */
+static word32 SetPrintableString(word32 len, byte* output)
+{
+    output[0] = ASN_PRINTABLE_STRING;
+    return SetLength(len, output + 1) + 1;
+}
+
 static word32 SetUTF8String(word32 len, byte* output)
 {
     output[0] = ASN_UTF8STRING;
@@ -8871,9 +11393,62 @@
 
 #endif /* WOLFSSL_CERT_REQ */
 
-#endif /*WOLFSSL_CERT_GEN */
-
-#if defined(HAVE_ECC)
+
+#ifndef WOLFSSL_CERT_GEN_CACHE
+/* wc_SetCert_Free is only public when WOLFSSL_CERT_GEN_CACHE is not defined */
+static
+#endif
+void wc_SetCert_Free(Cert* cert)
+{
+    if (cert != NULL) {
+        cert->der = NULL;
+        if (cert->decodedCert) {
+            FreeDecodedCert((DecodedCert*)cert->decodedCert);
+
+            XFREE(cert->decodedCert, cert->heap, DYNAMIC_TYPE_DCERT);
+            cert->decodedCert = NULL;
+        }
+    }
+}
+
+static int wc_SetCert_LoadDer(Cert* cert, const byte* der, word32 derSz)
+{
+    int ret;
+
+    if (cert == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Allocate DecodedCert struct and Zero */
+        cert->decodedCert = (void*)XMALLOC(sizeof(DecodedCert), cert->heap,
+            DYNAMIC_TYPE_DCERT);
+
+        if (cert->decodedCert == NULL) {
+            ret = MEMORY_E;
+        }
+        else {
+            XMEMSET(cert->decodedCert, 0, sizeof(DecodedCert));
+
+            InitDecodedCert((DecodedCert*)cert->decodedCert, der, derSz,
+                    cert->heap);
+            ret = ParseCertRelative((DecodedCert*)cert->decodedCert,
+                    CERT_TYPE, 0, NULL);
+            if (ret >= 0) {
+                cert->der = (byte*)der;
+            }
+            else {
+                wc_SetCert_Free(cert);
+            }
+        }
+    }
+
+    return ret;
+}
+
+#endif /* WOLFSSL_CERT_GEN */
+
+
+#if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)
 
 /* Write a public ECC key to output */
 static int SetEccPublicKey(byte* output, ecc_key* key, int with_header)
@@ -8887,7 +11462,7 @@
 #ifdef WOLFSSL_SMALL_STACK
     byte* algo = NULL;
     byte* curve = NULL;
-    byte* pub = NULL;
+    byte* pub;
 #else
     byte algo[MAX_ALGO_SZ];
     byte curve[MAX_ALGO_SZ];
@@ -8901,7 +11476,17 @@
         return MEMORY_E;
 #endif
 
+#ifdef HAVE_SELFTEST
+    /* older version of ecc.c can not handle dp being NULL */
+    if (key != NULL && key->dp == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        ret = wc_ecc_export_x963(key, pub, &pubSz);
+    }
+#else
     ret = wc_ecc_export_x963(key, pub, &pubSz);
+#endif
     if (ret != 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -8941,20 +11526,24 @@
 
         idx = SetSequence(pubSz + curveSz + bitStringSz + algoSz, output);
         /* algo */
-        XMEMCPY(output + idx, algo, algoSz);
+        if (output)
+            XMEMCPY(output + idx, algo, algoSz);
         idx += algoSz;
-       /* curve */
-        XMEMCPY(output + idx, curve, curveSz);
+        /* curve */
+        if (output)
+            XMEMCPY(output + idx, curve, curveSz);
         idx += curveSz;
         /* bit string */
-        XMEMCPY(output + idx, bitString, bitStringSz);
+        if (output)
+            XMEMCPY(output + idx, bitString, bitStringSz);
         idx += bitStringSz;
     }
     else
         idx = 0;
 
     /* pub */
-    XMEMCPY(output + idx, pub, pubSz);
+    if (output)
+        XMEMCPY(output + idx, pub, pubSz);
     idx += pubSz;
 
 #ifdef WOLFSSL_SMALL_STACK
@@ -8972,7 +11561,7 @@
 /* returns the size of buffer used, the public ECC key in DER format is stored
    in output buffer
    with_AlgCurve is a flag for when to include a header that has the Algorithm
-   and Curve infromation */
+   and Curve information */
 int wc_EccPublicKeyToDer(ecc_key* key, byte* output, word32 inLen,
                                                               int with_AlgCurve)
 {
@@ -8980,7 +11569,7 @@
     word32 keySz  = 0;
     int ret;
 
-    if (output == NULL || key == NULL) {
+    if (key == NULL) {
         return BAD_FUNC_ARG;
     }
 
@@ -8994,18 +11583,41 @@
         infoSz += TRAILING_ZERO;
     }
 
-    if ((ret = wc_ecc_export_x963(key, NULL, &keySz)) != LENGTH_ONLY_E) {
+#ifdef HAVE_SELFTEST
+    /* older version of ecc.c can not handle dp being NULL */
+    if (key != NULL && key->dp == NULL) {
+        keySz = 1 + 2 * MAX_ECC_BYTES;
+        ret = LENGTH_ONLY_E;
+    }
+    else {
+        ret = wc_ecc_export_x963(key, NULL, &keySz);
+    }
+#else
+    ret = wc_ecc_export_x963(key, NULL, &keySz);
+#endif
+    if (ret != LENGTH_ONLY_E) {
         WOLFSSL_MSG("Error in getting ECC public key size");
         return ret;
     }
 
+    /* if output null then just return size */
+    if (output == NULL) {
+        return keySz + infoSz;
+    }
+
     if (inLen < keySz + infoSz) {
         return BUFFER_E;
     }
 
     return SetEccPublicKey(output, key, with_AlgCurve);
 }
-#endif /* HAVE_ECC */
+
+int wc_EccPublicKeyDerSize(ecc_key* key, int with_AlgCurve)
+{
+    return wc_EccPublicKeyToDer(key, NULL, 0, with_AlgCurve);
+}
+
+#endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT */
 
 #if defined(HAVE_ED25519) && (defined(WOLFSSL_CERT_GEN) || \
                               defined(WOLFSSL_KEY_GEN))
@@ -9020,7 +11632,7 @@
     word32 pubSz = ED25519_PUB_KEY_SIZE;
 #ifdef WOLFSSL_SMALL_STACK
     byte* algo = NULL;
-    byte* pub = NULL;
+    byte* pub;
 #else
     byte algo[MAX_ALGO_SZ];
     byte pub[ED25519_PUB_KEY_SIZE];
@@ -9032,12 +11644,12 @@
         return MEMORY_E;
 #endif
 
-    int ret = wc_ed25519_export_public(key, pub, &pubSz);
-    if (ret != 0) {
+    idx = wc_ed25519_export_public(key, pub, &pubSz);
+    if (idx != 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-        return ret;
+        return idx;
     }
 
     /* headers */
@@ -9045,7 +11657,7 @@
 #ifdef WOLFSSL_SMALL_STACK
         algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         if (algo == NULL) {
-            XFREE(pub,   key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return MEMORY_E;
         }
 #endif
@@ -9111,6 +11723,110 @@
     return SetEd25519PublicKey(output, key, withAlg);
 }
 #endif /* HAVE_ED25519 && (WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN) */
+#if defined(HAVE_ED448) && (defined(WOLFSSL_CERT_GEN) || \
+                                                       defined(WOLFSSL_KEY_GEN))
+
+/* Write a public ECC key to output */
+static int SetEd448PublicKey(byte* output, ed448_key* key, int with_header)
+{
+    byte bitString[1 + MAX_LENGTH_SZ + 1];
+    int  algoSz;
+    int  bitStringSz;
+    int  idx;
+    word32 pubSz = ED448_PUB_KEY_SIZE;
+#ifdef WOLFSSL_SMALL_STACK
+    byte* algo = NULL;
+    byte* pub = NULL;
+#else
+    byte algo[MAX_ALGO_SZ];
+    byte pub[ED448_PUB_KEY_SIZE];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+    pub = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (pub == NULL)
+        return MEMORY_E;
+#endif
+
+    idx = wc_ed448_export_public(key, pub, &pubSz);
+    if (idx != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return idx;
+    }
+
+    /* headers */
+    if (with_header) {
+#ifdef WOLFSSL_SMALL_STACK
+        algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (algo == NULL) {
+            XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            return MEMORY_E;
+        }
+#endif
+        algoSz  = SetAlgoID(ED448k, algo, oidKeyType, 0);
+
+        bitStringSz = SetBitString(pubSz, 0, bitString);
+
+        idx = SetSequence(pubSz + bitStringSz + algoSz, output);
+        /* algo */
+        XMEMCPY(output + idx, algo, algoSz);
+        idx += algoSz;
+        /* bit string */
+        XMEMCPY(output + idx, bitString, bitStringSz);
+        idx += bitStringSz;
+    }
+    else
+        idx = 0;
+
+    /* pub */
+    XMEMCPY(output + idx, pub, pubSz);
+    idx += pubSz;
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (with_header) {
+        XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+    XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    return idx;
+}
+
+int wc_Ed448PublicKeyToDer(ed448_key* key, byte* output, word32 inLen,
+                                                                    int withAlg)
+{
+    word32 infoSz = 0;
+    word32 keySz  = 0;
+    int ret;
+
+    if (output == NULL || key == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (withAlg) {
+        /* buffer space for algorithm */
+        infoSz += MAX_SEQ_SZ;
+        infoSz += MAX_ALGO_SZ;
+
+        /* buffer space for public key sequence */
+        infoSz += MAX_SEQ_SZ;
+        infoSz += TRAILING_ZERO;
+    }
+
+    if ((ret = wc_ed448_export_public(key, output, &keySz)) != BUFFER_E) {
+        WOLFSSL_MSG("Error in getting ECC public key size");
+        return ret;
+    }
+
+    if (inLen < keySz + infoSz) {
+        return BUFFER_E;
+    }
+
+    return SetEd448PublicKey(output, key, withAlg);
+}
+#endif /* HAVE_ED448 && (WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN) */
 
 
 #ifdef WOLFSSL_CERT_GEN
@@ -9161,9 +11877,11 @@
 
     /* headers and output */
     seqSz = SetSequence(cert->beforeDateSz + cert->afterDateSz, output);
-    XMEMCPY(output + seqSz, cert->beforeDate, cert->beforeDateSz);
-    XMEMCPY(output + seqSz + cert->beforeDateSz, cert->afterDate,
-                                                 cert->afterDateSz);
+    if (output) {
+        XMEMCPY(output + seqSz, cert->beforeDate, cert->beforeDateSz);
+        XMEMCPY(output + seqSz + cert->beforeDateSz, cert->afterDate,
+                                                     cert->afterDateSz);
+    }
     return seqSz + cert->beforeDateSz + cert->afterDateSz;
 }
 
@@ -9183,7 +11901,7 @@
 
     time_t now;
     time_t then;
-    struct tm* tmpTime = NULL;
+    struct tm* tmpTime;
     struct tm* expandedTime;
     struct tm localTime;
 
@@ -9192,8 +11910,9 @@
     struct tm tmpTimeStorage;
     tmpTime = &tmpTimeStorage;
 #else
+    tmpTime = NULL;
+#endif
     (void)tmpTime;
-#endif
 
     now = XTIME(0);
 
@@ -9221,7 +11940,7 @@
     afterSz  = SetLength(ASN_GEN_TIME_SZ, after + 1) + 1;  /* gen tag */
 
     /* add daysValid of seconds */
-    then = now + (daysValid * 3600);
+    then = now + (daysValid * (time_t)86400);
     expandedTime = XGMTIME(&then, tmpTime);
     if (expandedTime == NULL) {
         WOLFSSL_MSG("XGMTIME failed");
@@ -9281,6 +12000,16 @@
        return name->commonName;
 
     case 7:
+       return name->serialDev;
+
+#ifdef WOLFSSL_CERT_EXT
+    case 8:
+       return name->busCat;
+
+    case 9:
+#else
+    case 8:
+#endif
        return name->email;
 
     default:
@@ -9314,6 +12043,20 @@
     case 6:
        return name->commonNameEnc;
 
+    case 7:
+       return name->serialDevEnc;
+
+#ifdef WOLFSSL_CERT_EXT
+    case 8:
+       return name->busCatEnc;
+
+    case 9:
+#else
+    case 8:
+#endif
+        /* FALL THROUGH */
+        /* The last index, email name, does not have encoding type.
+           The empty case here is to keep track of it for future reference. */
     default:
        return 0;
     }
@@ -9346,7 +12089,17 @@
        return ASN_COMMON_NAME;
 
     case 7:
-       return ASN_EMAIL_NAME;
+       return ASN_SERIAL_NUMBER;
+
+#ifdef WOLFSSL_CERT_EXT
+    case 8:
+        return ASN_BUS_CAT;
+
+    case 9:
+#else
+    case 8:
+#endif
+        return ASN_EMAIL_NAME;
 
     default:
        return 0;
@@ -9413,7 +12166,7 @@
 /* encode CA basic constraint true, return total bytes written */
 static int SetCa(byte* out, word32 outSz)
 {
-    static const byte ca[] = { 0x30, 0x0c, 0x06, 0x03, 0x55, 0x1d, 0x13, 0x04,
+    const byte ca[] = { 0x30, 0x0c, 0x06, 0x03, 0x55, 0x1d, 0x13, 0x04,
                                0x05, 0x30, 0x03, 0x01, 0x01, 0xff };
 
     if (out == NULL)
@@ -9462,7 +12215,7 @@
     byte skid_len[1 + MAX_LENGTH_SZ];
     byte skid_enc_len[MAX_LENGTH_SZ];
     int idx = 0, skid_lenSz, skid_enc_lenSz;
-    static const byte skid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0e, 0x04 };
+    const byte skid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0e, 0x04 };
 
     if (output == NULL || input == NULL)
         return BAD_FUNC_ARG;
@@ -9508,8 +12261,10 @@
 {
     byte    *enc_val;
     int     ret, enc_valSz;
-    static const byte akid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x23, 0x04 };
-    static const byte akid_cs[] = { 0x80 };
+    const byte akid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x23, 0x04 };
+    const byte akid_cs[] = { 0x80 };
+
+    (void)heap;
 
     if (output == NULL || input == NULL)
         return BAD_FUNC_ARG;
@@ -9539,7 +12294,7 @@
 {
     byte ku[5];
     int  idx;
-    static const byte keyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0f,
+    const byte keyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0f,
                                          0x01, 0x01, 0xff, 0x04};
     if (output == NULL)
         return BAD_FUNC_ARG;
@@ -9567,7 +12322,7 @@
 static int SetExtKeyUsage(Cert* cert, byte* output, word32 outSz, byte input)
 {
     int idx = 0, oidListSz = 0, totalSz, ret = 0;
-    static const byte extkeyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x25 };
+    const byte extkeyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x25 };
 
     if (output == NULL)
         return BAD_FUNC_ARG;
@@ -9629,7 +12384,7 @@
     XMEMCPY(&output[idx], extkeyusage_oid, sizeof(extkeyusage_oid));
     idx += sizeof(extkeyusage_oid);
 
-    /* 3. Octect String (2) */
+    /* 3. Octet String (2) */
     idx += SetOctetString(totalSz - idx, &output[idx]);
 
     /* 4. Seq + OidListLen (2) */
@@ -9642,87 +12397,6 @@
     return idx;
 }
 
-/* Encode OID string representation to ITU-T X.690 format */
-static int EncodePolicyOID(byte *out, word32 *outSz, const char *in, void* heap)
-{
-    word32 val, idx = 0, nb_val;
-    char *token, *str, *ptr;
-    word32 len;
-
-    if (out == NULL || outSz == NULL || *outSz < 2 || in == NULL)
-        return BAD_FUNC_ARG;
-
-    len = (word32)XSTRLEN(in);
-
-    str = (char *)XMALLOC(len+1, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (str == NULL)
-        return MEMORY_E;
-
-    XSTRNCPY(str, in, len);
-    str[len] = '\0';
-
-    nb_val = 0;
-
-    /* parse value, and set corresponding Policy OID value */
-    token = XSTRTOK(str, ".", &ptr);
-    while (token != NULL)
-    {
-        val = (word32)atoi(token);
-
-        if (nb_val == 0) {
-            if (val > 2) {
-                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                return ASN_OBJECT_ID_E;
-            }
-
-            out[idx] = (byte)(40 * val);
-        }
-        else if (nb_val == 1) {
-            if (val > 127) {
-                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                return ASN_OBJECT_ID_E;
-            }
-
-            if (idx > *outSz) {
-                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                return BUFFER_E;
-            }
-
-            out[idx++] += (byte)val;
-        }
-        else {
-            word32  tb = 0, x;
-            int     i = 0;
-            byte    oid[MAX_OID_SZ];
-
-            while (val >= 128) {
-                x = val % 128;
-                val /= 128;
-                oid[i++] = (byte) (((tb++) ? 0x80 : 0) | x);
-            }
-
-            if ((idx+(word32)i) > *outSz) {
-                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                return BUFFER_E;
-            }
-
-            oid[i] = (byte) (((tb++) ? 0x80 : 0) | val);
-
-            /* push value in the right order */
-            while (i >= 0)
-                out[idx++] = oid[i--];
-        }
-
-        token = XSTRTOK(NULL, ".", &ptr);
-        nb_val++;
-    }
-
-    *outSz = idx;
-
-    XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    return 0;
-}
-
 /* encode Certificate Policies, return total bytes written
  * each input value must be ITU-T X.690 formatted : a.b.c...
  * input must be an array of values with a NULL terminated for the latest
@@ -9740,8 +12414,8 @@
     word32  outSz, i = 0, der_oidSz[MAX_CERTPOL_NB];
     int     ret;
 
-    static const byte certpol_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x20, 0x04 };
-    static const byte oid_oid[] = { 0x06 };
+    const byte certpol_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x20, 0x04 };
+    const byte oid_oid[] = { 0x06 };
 
     if (output == NULL || input == NULL || nb_certpol > MAX_CERTPOL_NB)
         return BAD_FUNC_ARG;
@@ -9780,22 +12454,92 @@
 }
 #endif /* WOLFSSL_CERT_EXT */
 
+
 #ifdef WOLFSSL_ALT_NAMES
+
 /* encode Alternative Names, return total bytes written */
-static int SetAltNames(byte *out, word32 outSz, byte *input, word32 length)
-{
-    if (out == NULL || input == NULL)
+static int SetAltNames(byte *output, word32 outSz,
+        const byte *input, word32 length)
+{
+    byte san_len[1 + MAX_LENGTH_SZ];
+    int idx = 0, san_lenSz;
+    const byte san_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x11 };
+
+    if (output == NULL || input == NULL)
         return BAD_FUNC_ARG;
 
     if (outSz < length)
         return BUFFER_E;
 
-    /* Alternative Names come from certificate or computed by
-     * external function, so already encoded. Just copy value */
-    XMEMCPY(out, input, length);
-    return length;
-}
-#endif /* WOLFSL_ALT_NAMES */
+    /* Octet String header */
+    san_lenSz = SetOctetString(length, san_len);
+
+    if (outSz < MAX_SEQ_SZ)
+        return BUFFER_E;
+
+    idx = SetSequence(length + sizeof(san_oid) + san_lenSz, output);
+
+    if ((length + sizeof(san_oid) + san_lenSz) > outSz)
+        return BUFFER_E;
+
+    /* put oid */
+    XMEMCPY(output+idx, san_oid, sizeof(san_oid));
+    idx += sizeof(san_oid);
+
+    /* put octet header */
+    XMEMCPY(output+idx, san_len, san_lenSz);
+    idx += san_lenSz;
+
+    /* put value */
+    XMEMCPY(output+idx, input, length);
+    idx += length;
+
+    return idx;
+}
+
+
+#ifdef WOLFSSL_CERT_GEN
+
+int FlattenAltNames(byte* output, word32 outputSz, const DNS_entry* names)
+{
+    word32 idx;
+    const DNS_entry* curName;
+    word32 namesSz = 0;
+
+    if (output == NULL)
+        return BAD_FUNC_ARG;
+
+    if (names == NULL)
+        return 0;
+
+    curName = names;
+    do {
+        namesSz += curName->len + 2 +
+            ((curName->len < ASN_LONG_LENGTH) ? 0
+             : BytePrecision(curName->len));
+        curName = curName->next;
+    } while (curName != NULL);
+
+    if (outputSz < MAX_SEQ_SZ + namesSz)
+        return BUFFER_E;
+
+    idx = SetSequence(namesSz, output);
+
+    curName = names;
+    do {
+        output[idx++] = ASN_CONTEXT_SPECIFIC | curName->type;
+        idx += SetLength(curName->len, output + idx);
+        XMEMCPY(output + idx, curName->name, curName->len);
+        idx += curName->len;
+        curName = curName->next;
+    } while (curName != NULL);
+
+    return idx;
+}
+
+#endif /* WOLFSSL_CERT_GEN */
+
+#endif /* WOLFSSL_ALT_NAMES */
 
 /* Encodes one attribute of the name (issuer/subject)
  *
@@ -10061,7 +12805,7 @@
 /* encode info from cert into DER encoded format */
 static int EncodeCert(Cert* cert, DerCert* der, RsaKey* rsaKey, ecc_key* eccKey,
                       WC_RNG* rng, const byte* ntruKey, word16 ntruSz,
-                      ed25519_key* ed25519Key)
+                      ed25519_key* ed25519Key, ed448_key* ed448Key)
 {
     int ret;
 
@@ -10069,8 +12813,10 @@
         return BAD_FUNC_ARG;
 
     /* make sure at least one key type is provided */
-    if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL && ntruKey == NULL)
+    if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL &&
+                                          ed448Key == NULL && ntruKey == NULL) {
         return PUBLIC_KEY_E;
+    }
 
     /* init */
     XMEMSET(der, 0, sizeof(DerCert));
@@ -10081,13 +12827,15 @@
     /* serial number (must be positive) */
     if (cert->serialSz == 0) {
         /* generate random serial */
-        cert->serialSz = CTC_SERIAL_SIZE;
+        cert->serialSz = CTC_GEN_SERIAL_SZ;
         ret = wc_RNG_GenerateBlock(rng, cert->serial, cert->serialSz);
         if (ret != 0)
             return ret;
+        /* Clear the top bit to avoid a negative value */
+        cert->serial[0] &= 0x7f;
     }
     der->serialSz = SetSerialNumber(cert->serial, cert->serialSz, der->serial,
-        CTC_SERIAL_SIZE);
+        sizeof(der->serial), CTC_SERIAL_SIZE);
     if (der->serialSz < 0)
         return der->serialSz;
 
@@ -10122,6 +12870,14 @@
     }
 #endif
 
+#ifdef HAVE_ED448
+    if (cert->keyType == ED448_KEY) {
+        if (ed448Key == NULL)
+            return PUBLIC_KEY_E;
+        der->publicKeySz = SetEd448PublicKey(der->publicKey, ed448Key, 1);
+    }
+#endif
+
 #ifdef HAVE_NTRU
     if (cert->keyType == NTRU_KEY) {
         word32 rc;
@@ -10169,13 +12925,58 @@
     }
 
     /* subject name */
-    der->subjectSz = SetName(der->subject, sizeof(der->subject), &cert->subject);
+#ifdef WOLFSSL_CERT_EXT
+    if (XSTRLEN((const char*)cert->sbjRaw) > 0) {
+        /* Use the raw subject */
+        int idx;
+
+        der->subjectSz = min(sizeof(der->subject),
+                (word32)XSTRLEN((const char*)cert->sbjRaw));
+        /* header */
+        idx = SetSequence(der->subjectSz, der->subject);
+        if (der->subjectSz + idx > (int)sizeof(der->subject)) {
+            return SUBJECT_E;
+        }
+
+        XMEMCPY((char*)der->subject + idx, (const char*)cert->sbjRaw,
+                der->subjectSz);
+        der->subjectSz += idx;
+    }
+    else
+#endif
+    {
+        /* Use the name structure */
+        der->subjectSz = SetName(der->subject, sizeof(der->subject),
+                &cert->subject);
+    }
     if (der->subjectSz <= 0)
         return SUBJECT_E;
 
     /* issuer name */
-    der->issuerSz = SetName(der->issuer, sizeof(der->issuer), cert->selfSigned ?
-             &cert->subject : &cert->issuer);
+#ifdef WOLFSSL_CERT_EXT
+    if (XSTRLEN((const char*)cert->issRaw) > 0) {
+        /* Use the raw issuer */
+        int idx;
+
+        der->issuerSz = min(sizeof(der->issuer),
+                (word32)XSTRLEN((const char*)cert->issRaw));
+        /* header */
+        idx = SetSequence(der->issuerSz, der->issuer);
+        if (der->issuerSz + idx > (int)sizeof(der->issuer)) {
+            return ISSUER_E;
+        }
+
+        XMEMCPY((char*)der->issuer + idx, (const char*)cert->issRaw,
+                der->issuerSz);
+        der->issuerSz += idx;
+    }
+    else
+#endif
+    {
+        /* Use the name structure */
+        der->issuerSz = SetName(der->issuer, sizeof(der->issuer),
+                cert->selfSigned ? &cert->subject : &cert->issuer);
+    }
     if (der->issuerSz <= 0)
         return ISSUER_E;
 
@@ -10369,36 +13170,36 @@
 
 
 /* write DER encoded cert to buffer, size already checked */
-static int WriteCertBody(DerCert* der, byte* buffer)
+static int WriteCertBody(DerCert* der, byte* buf)
 {
     int idx;
 
     /* signed part header */
-    idx = SetSequence(der->total, buffer);
+    idx = SetSequence(der->total, buf);
     /* version */
-    XMEMCPY(buffer + idx, der->version, der->versionSz);
+    XMEMCPY(buf + idx, der->version, der->versionSz);
     idx += der->versionSz;
     /* serial */
-    XMEMCPY(buffer + idx, der->serial, der->serialSz);
+    XMEMCPY(buf + idx, der->serial, der->serialSz);
     idx += der->serialSz;
     /* sig algo */
-    XMEMCPY(buffer + idx, der->sigAlgo, der->sigAlgoSz);
+    XMEMCPY(buf + idx, der->sigAlgo, der->sigAlgoSz);
     idx += der->sigAlgoSz;
     /* issuer */
-    XMEMCPY(buffer + idx, der->issuer, der->issuerSz);
+    XMEMCPY(buf + idx, der->issuer, der->issuerSz);
     idx += der->issuerSz;
     /* validity */
-    XMEMCPY(buffer + idx, der->validity, der->validitySz);
+    XMEMCPY(buf + idx, der->validity, der->validitySz);
     idx += der->validitySz;
     /* subject */
-    XMEMCPY(buffer + idx, der->subject, der->subjectSz);
+    XMEMCPY(buf + idx, der->subject, der->subjectSz);
     idx += der->subjectSz;
     /* public key */
-    XMEMCPY(buffer + idx, der->publicKey, der->publicKeySz);
+    XMEMCPY(buf + idx, der->publicKey, der->publicKeySz);
     idx += der->publicKeySz;
     if (der->extensionsSz) {
         /* extensions */
-        XMEMCPY(buffer + idx, der->extensions, min(der->extensionsSz,
+        XMEMCPY(buf + idx, der->extensions, min(der->extensionsSz,
                                                    (int)sizeof(der->extensions)));
         idx += der->extensionsSz;
     }
@@ -10408,22 +13209,25 @@
 
 
 /* Make RSA signature from buffer (sz), write to sig (sigSz) */
-static int MakeSignature(CertSignCtx* certSignCtx, const byte* buffer, int sz,
+static int MakeSignature(CertSignCtx* certSignCtx, const byte* buf, int sz,
     byte* sig, int sigSz, RsaKey* rsaKey, ecc_key* eccKey,
-    ed25519_key* ed25519Key, WC_RNG* rng, int sigAlgoType, void* heap)
+    ed25519_key* ed25519Key, ed448_key* ed448Key, WC_RNG* rng, int sigAlgoType,
+    void* heap)
 {
     int digestSz = 0, typeH = 0, ret = 0;
 
     (void)digestSz;
     (void)typeH;
-    (void)buffer;
+    (void)buf;
     (void)sz;
     (void)sig;
     (void)sigSz;
     (void)rsaKey;
     (void)eccKey;
     (void)ed25519Key;
+    (void)ed448Key;
     (void)rng;
+    (void)heap;
 
     switch (certSignCtx->state) {
     case CERTSIGN_STATE_BEGIN:
@@ -10436,9 +13240,9 @@
             ret = MEMORY_E; goto exit_ms;
         }
 
-        ret = HashForSignature(buffer, sz, sigAlgoType, certSignCtx->digest,
+        ret = HashForSignature(buf, sz, sigAlgoType, certSignCtx->digest,
                                &typeH, &digestSz, 0);
-        /* set next state, since WC_PENDING rentry for these are not "call again" */
+        /* set next state, since WC_PENDING_E rentry for these are not "call again" */
         certSignCtx->state = CERTSIGN_STATE_ENCODE;
         if (ret != 0) {
             goto exit_ms;
@@ -10488,7 +13292,17 @@
         if (!rsaKey && !eccKey && ed25519Key) {
             word32 outSz = sigSz;
 
-            ret = wc_ed25519_sign_msg(buffer, sz, sig, &outSz, ed25519Key);
+            ret = wc_ed25519_sign_msg(buf, sz, sig, &outSz, ed25519Key);
+            if (ret == 0)
+                ret = outSz;
+        }
+    #endif /* HAVE_ECC */
+
+    #ifdef HAVE_ED448
+        if (!rsaKey && !eccKey && !ed25519Key && ed448Key) {
+            word32 outSz = sigSz;
+
+            ret = wc_ed448_sign_msg(buf, sz, sig, &outSz, ed448Key, NULL, 0);
             if (ret == 0)
                 ret = outSz;
         }
@@ -10498,9 +13312,11 @@
 
 exit_ms:
 
+#ifdef WOLFSSL_ASYNC_CRYPT
     if (ret == WC_PENDING_E) {
         return ret;
     }
+#endif
 
 #ifndef NO_RSA
     if (rsaKey) {
@@ -10520,24 +13336,27 @@
 
 /* add signature to end of buffer, size of buffer assumed checked, return
    new length */
-static int AddSignature(byte* buffer, int bodySz, const byte* sig, int sigSz,
+static int AddSignature(byte* buf, int bodySz, const byte* sig, int sigSz,
                         int sigAlgoType)
 {
     byte seq[MAX_SEQ_SZ];
     int  idx = bodySz, seqSz;
 
     /* algo */
-    idx += SetAlgoID(sigAlgoType, buffer + idx, oidSigType, 0);
+    idx += SetAlgoID(sigAlgoType, buf ? buf + idx : NULL, oidSigType, 0);
     /* bit string */
-    idx += SetBitString(sigSz, 0, buffer + idx);
+    idx += SetBitString(sigSz, 0, buf ? buf + idx : NULL);
     /* signature */
-    XMEMCPY(buffer + idx, sig, sigSz);
+    if (buf)
+        XMEMCPY(buf + idx, sig, sigSz);
     idx += sigSz;
 
     /* make room for overall header */
     seqSz = SetSequence(idx, seq);
-    XMEMMOVE(buffer + seqSz, buffer, idx);
-    XMEMCPY(buffer, seq, seqSz);
+    if (buf) {
+        XMEMMOVE(buf + seqSz, buf, idx);
+        XMEMCPY(buf, seq, seqSz);
+    }
 
     return idx + seqSz;
 }
@@ -10547,7 +13366,7 @@
 static int MakeAnyCert(Cert* cert, byte* derBuffer, word32 derSz,
                        RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng,
                        const byte* ntruKey, word16 ntruSz,
-                       ed25519_key* ed25519Key)
+                       ed25519_key* ed25519Key, ed448_key* ed448Key)
 {
     int ret;
 #ifdef WOLFSSL_SMALL_STACK
@@ -10556,8 +13375,12 @@
     DerCert der[1];
 #endif
 
+    if (derBuffer == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
     cert->keyType = eccKey ? ECC_KEY : (rsaKey ? RSA_KEY :
-                                         (ed25519Key ? ED25519_KEY : NTRU_KEY));
+            (ed25519Key ? ED25519_KEY : (ed448Key ? ED448_KEY : NTRU_KEY)));
 
 #ifdef WOLFSSL_SMALL_STACK
     der = (DerCert*)XMALLOC(sizeof(DerCert), cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -10566,7 +13389,7 @@
 #endif
 
     ret = EncodeCert(cert, der, rsaKey, eccKey, rng, ntruKey, ntruSz,
-                     ed25519Key);
+                     ed25519Key, ed448Key);
     if (ret == 0) {
         if (der->total + MAX_SEQ_SZ * 2 > (int)derSz)
             ret = BUFFER_E;
@@ -10586,9 +13409,10 @@
 int wc_MakeCert_ex(Cert* cert, byte* derBuffer, word32 derSz, int keyType,
                    void* key, WC_RNG* rng)
 {
-    RsaKey* rsaKey = NULL;
-    ecc_key* eccKey = NULL;
+    RsaKey*      rsaKey = NULL;
+    ecc_key*     eccKey = NULL;
     ed25519_key* ed25519Key = NULL;
+    ed448_key*   ed448Key = NULL;
 
     if (keyType == RSA_TYPE)
         rsaKey = (RsaKey*)key;
@@ -10596,16 +13420,18 @@
         eccKey = (ecc_key*)key;
     else if (keyType == ED25519_TYPE)
         ed25519Key = (ed25519_key*)key;
+    else if (keyType == ED448_TYPE)
+        ed448Key = (ed448_key*)key;
 
     return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0,
-                       ed25519Key);
+                       ed25519Key, ed448Key);
 }
 /* Make an x509 Certificate v3 RSA or ECC from cert input, write to buffer */
 int wc_MakeCert(Cert* cert, byte* derBuffer, word32 derSz, RsaKey* rsaKey,
              ecc_key* eccKey, WC_RNG* rng)
 {
     return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0,
-                       NULL);
+                       NULL, NULL);
 }
 
 
@@ -10622,12 +13448,13 @@
 
 #ifdef WOLFSSL_CERT_REQ
 
-static int SetReqAttrib(byte* output, char* pw, int extSz)
-{
-    static const byte cpOid[] =
+static int SetReqAttrib(byte* output, char* pw, int pwPrintableString,
+                        int extSz)
+{
+    const byte cpOid[] =
         { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
                          0x09, 0x07 };
-    static const byte erOid[] =
+    const byte erOid[] =
         { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
                          0x09, 0x0e };
 
@@ -10651,7 +13478,11 @@
 
     if (pw && pw[0]) {
         pwSz = (int)XSTRLEN(pw);
-        cpStrSz = SetUTF8String(pwSz, cpStr);
+        if (pwPrintableString) {
+            cpStrSz = SetPrintableString(pwSz, cpStr);
+        } else {
+            cpStrSz = SetUTF8String(pwSz, cpStr);
+        }
         cpSetSz = SetSet(cpStrSz + pwSz, cpSet);
         cpSeqSz = SetSequence(sizeof(cpOid) + cpSetSz + cpStrSz + pwSz, cpSeq);
         cpSz = cpSeqSz + sizeof(cpOid) + cpSetSz + cpStrSz + pwSz;
@@ -10695,16 +13526,20 @@
 
 /* encode info from cert into DER encoded format */
 static int EncodeCertReq(Cert* cert, DerCert* der, RsaKey* rsaKey,
-                         ecc_key* eccKey, ed25519_key* ed25519Key)
+                         ecc_key* eccKey, ed25519_key* ed25519Key,
+                         ed448_key* ed448Key)
 {
     (void)eccKey;
     (void)ed25519Key;
+    (void)ed448Key;
 
     if (cert == NULL || der == NULL)
         return BAD_FUNC_ARG;
 
-    if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL)
+    if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL &&
+                                                             ed448Key == NULL) {
             return PUBLIC_KEY_E;
+    }
 
     /* init */
     XMEMSET(der, 0, sizeof(DerCert));
@@ -10741,6 +13576,13 @@
     }
 #endif
 
+#ifdef HAVE_ED448
+    if (cert->keyType == ED448_KEY) {
+        if (ed448Key == NULL)
+            return PUBLIC_KEY_E;
+        der->publicKeySz = SetEd448PublicKey(der->publicKey, ed448Key, 1);
+    }
+#endif
     if (der->publicKeySz <= 0)
         return PUBLIC_KEY_E;
 
@@ -10859,8 +13701,9 @@
 #endif /* WOLFSSL_CERT_EXT */
     }
 
-    der->attribSz = SetReqAttrib(der->attrib,
-                                 cert->challengePw, der->extensionsSz);
+    der->attribSz = SetReqAttrib(der->attrib, cert->challengePw,
+                                 cert->challengePwPrintableString,
+                                 der->extensionsSz);
     if (der->attribSz <= 0)
         return REQ_ATTRIBUTE_E;
 
@@ -10872,27 +13715,32 @@
 
 
 /* write DER encoded cert req to buffer, size already checked */
-static int WriteCertReqBody(DerCert* der, byte* buffer)
+static int WriteCertReqBody(DerCert* der, byte* buf)
 {
     int idx;
 
     /* signed part header */
-    idx = SetSequence(der->total, buffer);
+    idx = SetSequence(der->total, buf);
     /* version */
-    XMEMCPY(buffer + idx, der->version, der->versionSz);
+    if (buf)
+        XMEMCPY(buf + idx, der->version, der->versionSz);
     idx += der->versionSz;
     /* subject */
-    XMEMCPY(buffer + idx, der->subject, der->subjectSz);
+    if (buf)
+        XMEMCPY(buf + idx, der->subject, der->subjectSz);
     idx += der->subjectSz;
     /* public key */
-    XMEMCPY(buffer + idx, der->publicKey, der->publicKeySz);
+    if (buf)
+        XMEMCPY(buf + idx, der->publicKey, der->publicKeySz);
     idx += der->publicKeySz;
     /* attributes */
-    XMEMCPY(buffer + idx, der->attrib, der->attribSz);
+    if (buf)
+        XMEMCPY(buf + idx, der->attrib, der->attribSz);
     idx += der->attribSz;
     /* extensions */
     if (der->extensionsSz) {
-        XMEMCPY(buffer + idx, der->extensions, min(der->extensionsSz,
+        if (buf)
+            XMEMCPY(buf + idx, der->extensions, min(der->extensionsSz,
                                                (int)sizeof(der->extensions)));
         idx += der->extensionsSz;
     }
@@ -10902,7 +13750,8 @@
 
 
 static int MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
-                   RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key)
+                   RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key,
+                   ed448_key* ed448Key)
 {
     int ret;
 #ifdef WOLFSSL_SMALL_STACK
@@ -10911,7 +13760,8 @@
     DerCert der[1];
 #endif
 
-    cert->keyType = eccKey ? ECC_KEY : (ed25519Key ? ED25519_KEY : RSA_KEY);
+    cert->keyType = eccKey ? ECC_KEY : (ed25519Key ? ED25519_KEY :
+                                       (ed448Key ? ED448_KEY: RSA_KEY));
 
 #ifdef WOLFSSL_SMALL_STACK
     der = (DerCert*)XMALLOC(sizeof(DerCert), cert->heap,
@@ -10920,7 +13770,7 @@
         return MEMORY_E;
 #endif
 
-    ret = EncodeCertReq(cert, der, rsaKey, eccKey, ed25519Key);
+    ret = EncodeCertReq(cert, der, rsaKey, eccKey, ed25519Key, ed448Key);
 
     if (ret == 0) {
         if (der->total + MAX_SEQ_SZ * 2 > (int)derSz)
@@ -10939,9 +13789,10 @@
 int wc_MakeCertReq_ex(Cert* cert, byte* derBuffer, word32 derSz, int keyType,
                       void* key)
 {
-    RsaKey* rsaKey = NULL;
-    ecc_key* eccKey = NULL;
+    RsaKey*      rsaKey = NULL;
+    ecc_key*     eccKey = NULL;
     ed25519_key* ed25519Key = NULL;
+    ed448_key*   ed448Key = NULL;
 
     if (keyType == RSA_TYPE)
         rsaKey = (RsaKey*)key;
@@ -10949,29 +13800,35 @@
         eccKey = (ecc_key*)key;
     else if (keyType == ED25519_TYPE)
         ed25519Key = (ed25519_key*)key;
-
-    return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, ed25519Key);
+    else if (keyType == ED448_TYPE)
+        ed448Key = (ed448_key*)key;
+
+    return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, ed25519Key,
+                       ed448Key);
 }
 
 int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
                    RsaKey* rsaKey, ecc_key* eccKey)
 {
-    return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, NULL);
+    return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, NULL, NULL);
 }
 #endif /* WOLFSSL_CERT_REQ */
 
 
-static int SignCert(int requestSz, int sType, byte* buffer, word32 buffSz,
+static int SignCert(int requestSz, int sType, byte* buf, word32 buffSz,
                     RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key,
-                    WC_RNG* rng)
+                    ed448_key* ed448Key, WC_RNG* rng)
 {
     int sigSz = 0;
     void* heap = NULL;
-    CertSignCtx* certSignCtx = NULL;
+    CertSignCtx* certSignCtx;
 #ifndef WOLFSSL_ASYNC_CRYPT
     CertSignCtx  certSignCtx_lcl;
+
     certSignCtx = &certSignCtx_lcl;
     XMEMSET(certSignCtx, 0, sizeof(CertSignCtx));
+#else
+    certSignCtx = NULL;
 #endif
 
     if (requestSz < 0)
@@ -11012,19 +13869,22 @@
             return MEMORY_E;
     }
 
-    sigSz = MakeSignature(certSignCtx, buffer, requestSz, certSignCtx->sig,
-        MAX_ENCODED_SIG_SZ, rsaKey, eccKey, ed25519Key, rng, sType, heap);
+    sigSz = MakeSignature(certSignCtx, buf, requestSz, certSignCtx->sig,
+        MAX_ENCODED_SIG_SZ, rsaKey, eccKey, ed25519Key, ed448Key, rng, sType,
+        heap);
+#ifdef WOLFSSL_ASYNC_CRYPT
     if (sigSz == WC_PENDING_E) {
         /* Not free'ing certSignCtx->sig here because it could still be in use
          * with async operations. */
         return sigSz;
     }
+#endif
 
     if (sigSz >= 0) {
         if (requestSz + MAX_SEQ_SZ * 2 + sigSz > (int)buffSz)
             sigSz = BUFFER_E;
         else
-            sigSz = AddSignature(buffer, requestSz, certSignCtx->sig, sigSz,
+            sigSz = AddSignature(buf, requestSz, certSignCtx->sig, sigSz,
                                  sType);
     }
 
@@ -11034,12 +13894,13 @@
     return sigSz;
 }
 
-int wc_SignCert_ex(int requestSz, int sType, byte* buffer, word32 buffSz,
+int wc_SignCert_ex(int requestSz, int sType, byte* buf, word32 buffSz,
                    int keyType, void* key, WC_RNG* rng)
 {
-    RsaKey* rsaKey = NULL;
-    ecc_key* eccKey = NULL;
+    RsaKey*      rsaKey = NULL;
+    ecc_key*     eccKey = NULL;
     ed25519_key* ed25519Key = NULL;
+    ed448_key*   ed448Key = NULL;
 
     if (keyType == RSA_TYPE)
         rsaKey = (RsaKey*)key;
@@ -11047,51 +13908,66 @@
         eccKey = (ecc_key*)key;
     else if (keyType == ED25519_TYPE)
         ed25519Key = (ed25519_key*)key;
-
-    return SignCert(requestSz, sType, buffer, buffSz, rsaKey, eccKey,
-                    ed25519Key, rng);
-}
-
-int wc_SignCert(int requestSz, int sType, byte* buffer, word32 buffSz,
+    else if (keyType == ED448_TYPE)
+        ed448Key = (ed448_key*)key;
+
+    return SignCert(requestSz, sType, buf, buffSz, rsaKey, eccKey, ed25519Key,
+                    ed448Key, rng);
+}
+
+int wc_SignCert(int requestSz, int sType, byte* buf, word32 buffSz,
                 RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng)
 {
-    return SignCert(requestSz, sType, buffer, buffSz, rsaKey, eccKey, NULL,
+    return SignCert(requestSz, sType, buf, buffSz, rsaKey, eccKey, NULL, NULL,
                     rng);
 }
 
-int wc_MakeSelfCert(Cert* cert, byte* buffer, word32 buffSz,
+int wc_MakeSelfCert(Cert* cert, byte* buf, word32 buffSz,
                     RsaKey* key, WC_RNG* rng)
 {
     int ret;
 
-    ret = wc_MakeCert(cert, buffer, buffSz, key, NULL, rng);
+    ret = wc_MakeCert(cert, buf, buffSz, key, NULL, rng);
     if (ret < 0)
         return ret;
 
     return wc_SignCert(cert->bodySz, cert->sigType,
-                       buffer, buffSz, key, NULL, rng);
+                       buf, buffSz, key, NULL, rng);
 }
 
 
 #ifdef WOLFSSL_CERT_EXT
 
+/* Get raw subject from cert, which may contain OIDs not parsed by Decode.
+   The raw subject pointer will only be valid while "cert" is valid. */
+int wc_GetSubjectRaw(byte **subjectRaw, Cert *cert)
+{
+    int rc = BAD_FUNC_ARG;
+    if ((subjectRaw != NULL) && (cert != NULL)) {
+        *subjectRaw = cert->sbjRaw;
+        rc = 0;
+    }
+    return rc;
+}
+
 /* Set KID from public key */
 static int SetKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey,
                                  byte *ntruKey, word16 ntruKeySz,
-                                 ed25519_key* ed25519Key, int kid_type)
-{
-    byte *buffer;
+                                 ed25519_key* ed25519Key, ed448_key* ed448Key,
+                                 int kid_type)
+{
+    byte *buf;
     int   bufferSz, ret;
 
     if (cert == NULL ||
         (rsakey == NULL && eckey == NULL && ntruKey == NULL &&
-                                            ed25519Key == NULL) ||
+                                      ed25519Key == NULL && ed448Key == NULL) ||
         (kid_type != SKID_TYPE && kid_type != AKID_TYPE))
         return BAD_FUNC_ARG;
 
-    buffer = (byte *)XMALLOC(MAX_PUBLIC_KEY_SZ, cert->heap,
+    buf = (byte *)XMALLOC(MAX_PUBLIC_KEY_SZ, cert->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-    if (buffer == NULL)
+    if (buf == NULL)
         return MEMORY_E;
 
     /* Public Key */
@@ -11099,19 +13975,19 @@
 #ifndef NO_RSA
     /* RSA public key */
     if (rsakey != NULL)
-        bufferSz = SetRsaPublicKey(buffer, rsakey, MAX_PUBLIC_KEY_SZ, 0);
+        bufferSz = SetRsaPublicKey(buf, rsakey, MAX_PUBLIC_KEY_SZ, 0);
 #endif
 #ifdef HAVE_ECC
     /* ECC public key */
     if (eckey != NULL)
-        bufferSz = SetEccPublicKey(buffer, eckey, 0);
+        bufferSz = SetEccPublicKey(buf, eckey, 0);
 #endif
 #ifdef HAVE_NTRU
     /* NTRU public key */
     if (ntruKey != NULL) {
         bufferSz = MAX_PUBLIC_KEY_SZ;
         ret = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(
-                        ntruKeySz, ntruKey, (word16 *)(&bufferSz), buffer);
+                        ntruKeySz, ntruKey, (word16 *)(&bufferSz), buf);
         if (ret != NTRU_OK)
             bufferSz = -1;
     }
@@ -11121,48 +13997,41 @@
 #ifdef HAVE_ED25519
     /* ED25519 public key */
     if (ed25519Key != NULL)
-        bufferSz = SetEd25519PublicKey(buffer, ed25519Key, 0);
+        bufferSz = SetEd25519PublicKey(buf, ed25519Key, 0);
+#endif
+#ifdef HAVE_ED448
+    /* ED448 public key */
+    if (ed448Key != NULL)
+        bufferSz = SetEd448PublicKey(buffer, ed448Key, 0);
 #endif
 
     if (bufferSz <= 0) {
-        XFREE(buffer, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(buf, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
         return PUBLIC_KEY_E;
     }
 
     /* Compute SKID by hashing public key */
-#ifdef NO_SHA
     if (kid_type == SKID_TYPE) {
-        ret = wc_Sha256Hash(buffer, bufferSz, cert->skid);
-        cert->skidSz = WC_SHA256_DIGEST_SIZE;
+        ret = CalcHashId(buf, bufferSz, cert->skid);
+        cert->skidSz = KEYID_SIZE;
     }
     else if (kid_type == AKID_TYPE) {
-        ret = wc_Sha256Hash(buffer, bufferSz, cert->akid);
-        cert->akidSz = WC_SHA256_DIGEST_SIZE;
+        ret = CalcHashId(buf, bufferSz, cert->akid);
+        cert->akidSz = KEYID_SIZE;
     }
     else
         ret = BAD_FUNC_ARG;
-#else /* NO_SHA */
-    if (kid_type == SKID_TYPE) {
-        ret = wc_ShaHash(buffer, bufferSz, cert->skid);
-        cert->skidSz = WC_SHA_DIGEST_SIZE;
-    }
-    else if (kid_type == AKID_TYPE) {
-        ret = wc_ShaHash(buffer, bufferSz, cert->akid);
-        cert->akidSz = WC_SHA_DIGEST_SIZE;
-    }
-    else
-        ret = BAD_FUNC_ARG;
-#endif /* NO_SHA */
-
-    XFREE(buffer, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    XFREE(buf, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
     return ret;
 }
 
 int wc_SetSubjectKeyIdFromPublicKey_ex(Cert *cert, int keyType, void* key)
 {
-    RsaKey* rsaKey = NULL;
-    ecc_key* eccKey = NULL;
+    RsaKey*      rsaKey = NULL;
+    ecc_key*     eccKey = NULL;
     ed25519_key* ed25519Key = NULL;
+    ed448_key*   ed448Key = NULL;
 
     if (keyType == RSA_TYPE)
         rsaKey = (RsaKey*)key;
@@ -11170,15 +14039,18 @@
         eccKey = (ecc_key*)key;
     else if (keyType == ED25519_TYPE)
         ed25519Key = (ed25519_key*)key;
+    else if (keyType == ED448_TYPE)
+        ed448Key = (ed448_key*)key;
 
     return SetKeyIdFromPublicKey(cert, rsaKey, eccKey, NULL, 0, ed25519Key,
-                                 SKID_TYPE);
+                                 ed448Key, SKID_TYPE);
 }
 
 /* Set SKID from RSA or ECC public key */
 int wc_SetSubjectKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey)
 {
-    return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, SKID_TYPE);
+    return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, NULL,
+                                 SKID_TYPE);
 }
 
 #ifdef HAVE_NTRU
@@ -11186,16 +14058,17 @@
 int wc_SetSubjectKeyIdFromNtruPublicKey(Cert *cert,
                                         byte *ntruKey, word16 ntruKeySz)
 {
-    return SetKeyIdFromPublicKey(cert, NULL,NULL,ntruKey, ntruKeySz, NULL,
+    return SetKeyIdFromPublicKey(cert, NULL,NULL,ntruKey, ntruKeySz, NULL, NULL,
                                  SKID_TYPE);
 }
 #endif
 
 int wc_SetAuthKeyIdFromPublicKey_ex(Cert *cert, int keyType, void* key)
 {
-    RsaKey* rsaKey = NULL;
-    ecc_key* eccKey = NULL;
+    RsaKey*      rsaKey = NULL;
+    ecc_key*     eccKey = NULL;
     ed25519_key* ed25519Key = NULL;
+    ed448_key*   ed448Key = NULL;
 
     if (keyType == RSA_TYPE)
         rsaKey = (RsaKey*)key;
@@ -11203,19 +14076,22 @@
         eccKey = (ecc_key*)key;
     else if (keyType == ED25519_TYPE)
         ed25519Key = (ed25519_key*)key;
+    else if (keyType == ED448_TYPE)
+        ed448Key = (ed448_key*)key;
 
     return SetKeyIdFromPublicKey(cert, rsaKey, eccKey, NULL, 0, ed25519Key,
-                                 AKID_TYPE);
+                                 ed448Key, AKID_TYPE);
 }
 
 /* Set SKID from RSA or ECC public key */
 int wc_SetAuthKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey)
 {
-    return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, AKID_TYPE);
-}
-
-
-#ifndef NO_FILESYSTEM
+    return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, NULL,
+                                 AKID_TYPE);
+}
+
+
+#if !defined(NO_FILESYSTEM) && !defined(NO_ASN_CRYPT)
 
 /* Set SKID from public key file in PEM */
 int wc_SetSubjectKeyId(Cert *cert, const char* file)
@@ -11234,10 +14110,11 @@
         WOLFSSL_MSG("wc_SetSubjectKeyId memory Problem");
         return MEMORY_E;
     }
-
-    derSz = wc_PemPubKeyToDer(file, der, MAX_PUBLIC_KEY_SZ);
-    if (derSz <= 0)
-    {
+    derSz = MAX_PUBLIC_KEY_SZ;
+
+    XMEMSET(der, 0, derSz);
+    derSz = wc_PemPubKeyToDer(file, der, derSz);
+    if (derSz <= 0) {
         XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
         return derSz;
     }
@@ -11315,68 +14192,55 @@
     return ret;
 }
 
-#endif /* NO_FILESYSTEM */
+#endif /* !NO_FILESYSTEM && !NO_ASN_CRYPT */
+
+static int SetAuthKeyIdFromDcert(Cert* cert, DecodedCert* decoded)
+{
+    int ret = 0;
+
+    /* Subject Key Id not found !! */
+    if (decoded->extSubjKeyIdSet == 0) {
+        ret = ASN_NO_SKID;
+    }
+
+    /* SKID invalid size */
+    else if (sizeof(cert->akid) < sizeof(decoded->extSubjKeyId)) {
+        ret = MEMORY_E;
+    }
+
+    else {
+        /* Put the SKID of CA to AKID of certificate */
+        XMEMCPY(cert->akid, decoded->extSubjKeyId, KEYID_SIZE);
+        cert->akidSz = KEYID_SIZE;
+    }
+
+    return ret;
+}
 
 /* Set AKID from certificate contains in buffer (DER encoded) */
 int wc_SetAuthKeyIdFromCert(Cert *cert, const byte *der, int derSz)
 {
-    int ret;
-
-#ifdef WOLFSSL_SMALL_STACK
-    DecodedCert* decoded;
-#else
-    DecodedCert decoded[1];
-#endif
-
-    if (cert == NULL || der == NULL || derSz <= 0)
-        return BAD_FUNC_ARG;
-
-#ifdef WOLFSSL_SMALL_STACK
-    decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert),
-                                    cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (decoded == NULL)
-        return MEMORY_E;
-#endif
-
-    /* decode certificate and get SKID that will be AKID of current cert */
-    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
-    ret = ParseCert(decoded, CERT_TYPE, NO_VERIFY, 0);
-    if (ret != 0) {
-        FreeDecodedCert(decoded);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        #endif
-        return ret;
-    }
-
-    /* Subject Key Id not found !! */
-    if (decoded->extSubjKeyIdSet == 0) {
-        FreeDecodedCert(decoded);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        #endif
-        return ASN_NO_SKID;
-    }
-
-    /* SKID invalid size */
-    if (sizeof(cert->akid) < sizeof(decoded->extSubjKeyId)) {
-        FreeDecodedCert(decoded);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        #endif
-        return MEMORY_E;
-    }
-
-    /* Put the SKID of CA to AKID of certificate */
-    XMEMCPY(cert->akid, decoded->extSubjKeyId, KEYID_SIZE);
-    cert->akidSz = KEYID_SIZE;
-
-    FreeDecodedCert(decoded);
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-
-    return 0;
+    int ret = 0;
+
+    if (cert == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            ret = SetAuthKeyIdFromDcert(cert, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+        }
+    }
+
+    return ret;
 }
 
 
@@ -11411,7 +14275,7 @@
     return ret;
 }
 
-#endif /* NO_FILESYSTEM */
+#endif /* !NO_FILESYSTEM */
 
 /* Set KeyUsage from human readable string */
 int wc_SetKeyUsage(Cert *cert, const char *value)
@@ -11425,13 +14289,12 @@
 
     cert->keyUsage = 0;
 
+    /* duplicate string (including terminator) */
     len = (word32)XSTRLEN(value);
     str = (char*)XMALLOC(len+1, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
     if (str == NULL)
         return MEMORY_E;
-
-    XSTRNCPY(str, value, len);
-    str[len] = '\0';
+    XMEMCPY(str, value, len+1);
 
     /* parse value, and set corresponding Key Usage value */
     if ((token = XSTRTOK(str, ",", &ptr)) == NULL) {
@@ -11485,13 +14348,12 @@
 
     cert->extKeyUsage = 0;
 
+    /* duplicate string (including terminator) */
     len = (word32)XSTRLEN(value);
     str = (char*)XMALLOC(len+1, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
     if (str == NULL)
         return MEMORY_E;
-
-    XSTRNCPY(str, value, len);
-    str[len] = '\0';
+    XMEMCPY(str, value, len+1);
 
     /* parse value, and set corresponding Key Usage value */
     if ((token = XSTRTOK(str, ",", &ptr)) == NULL) {
@@ -11565,41 +14427,22 @@
 
 #ifdef WOLFSSL_ALT_NAMES
 
-/* Set Alt Names from der cert, return 0 on success */
-static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
-{
-    int ret;
-#ifdef WOLFSSL_SMALL_STACK
-    DecodedCert* decoded;
-#else
-    DecodedCert decoded[1];
-#endif
-
-    if (derSz < 0)
-        return derSz;
-
-#ifdef WOLFSSL_SMALL_STACK
-    decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), cert->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-    if (decoded == NULL)
-        return MEMORY_E;
-#endif
-
-    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
-    ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
-
-    if (ret < 0) {
-        WOLFSSL_MSG("ParseCertRelative error");
-    }
-    else if (decoded->extensions) {
-        byte   b;
+static int SetAltNamesFromDcert(Cert* cert, DecodedCert* decoded)
+{
+    int ret = 0;
+    byte tag;
+
+    if (decoded->extensions) {
         int    length;
         word32 maxExtensionsIdx;
 
         decoded->srcIdx = decoded->extensionsIdx;
-        b = decoded->source[decoded->srcIdx++];
-
-        if (b != ASN_EXTENSIONS) {
+        if (GetASNTag(decoded->source, &decoded->srcIdx, &tag, decoded->maxIdx)
+                != 0) {
+            return ASN_PARSE_E;
+        }
+
+        if (tag != ASN_EXTENSIONS) {
             ret = ASN_PARSE_E;
         }
         else if (GetLength(decoded->source, &decoded->srcIdx, &length,
@@ -11651,17 +14494,13 @@
         }
     }
 
-    FreeDecodedCert(decoded);
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret < 0 ? ret : 0;
-}
-
-
-/* Set Dates from der cert, return 0 on success */
-static int SetDatesFromCert(Cert* cert, const byte* der, int derSz)
+    return ret;
+}
+
+#ifndef NO_FILESYSTEM
+
+/* Set Alt Names from der cert, return 0 on success */
+static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
 {
     int ret;
 #ifdef WOLFSSL_SMALL_STACK
@@ -11670,7 +14509,6 @@
     DecodedCert decoded[1];
 #endif
 
-    WOLFSSL_ENTER("SetDatesFromCert");
     if (derSz < 0)
         return derSz;
 
@@ -11681,13 +14519,31 @@
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
+    InitDecodedCert(decoded, der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
         WOLFSSL_MSG("ParseCertRelative error");
     }
-    else if (decoded->beforeDate == NULL || decoded->afterDate == NULL) {
+    else {
+        ret = SetAltNamesFromDcert(cert, decoded);
+    }
+
+    FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    return ret < 0 ? ret : 0;
+}
+
+#endif
+
+static int SetDatesFromDcert(Cert* cert, DecodedCert* decoded)
+{
+    int ret = 0;
+
+    if (decoded->beforeDate == NULL || decoded->afterDate == NULL) {
         WOLFSSL_MSG("Couldn't extract dates");
         ret = -1;
     }
@@ -11704,21 +14560,108 @@
         cert->afterDateSz  = decoded->afterDateLen;
     }
 
-    FreeDecodedCert(decoded);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret < 0 ? ret : 0;
+    return ret;
 }
 
 #endif /* WOLFSSL_ALT_NAMES */
 
+static void SetNameFromDcert(CertName* cn, DecodedCert* decoded)
+{
+    int sz;
+
+    if (decoded->subjectCN) {
+        sz = (decoded->subjectCNLen < CTC_NAME_SIZE) ? decoded->subjectCNLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->commonName, decoded->subjectCN, sz);
+        cn->commonName[sz] = '\0';
+        cn->commonNameEnc = decoded->subjectCNEnc;
+    }
+    if (decoded->subjectC) {
+        sz = (decoded->subjectCLen < CTC_NAME_SIZE) ? decoded->subjectCLen
+                                                    : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->country, decoded->subjectC, sz);
+        cn->country[sz] = '\0';
+        cn->countryEnc = decoded->subjectCEnc;
+    }
+    if (decoded->subjectST) {
+        sz = (decoded->subjectSTLen < CTC_NAME_SIZE) ? decoded->subjectSTLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->state, decoded->subjectST, sz);
+        cn->state[sz] = '\0';
+        cn->stateEnc = decoded->subjectSTEnc;
+    }
+    if (decoded->subjectL) {
+        sz = (decoded->subjectLLen < CTC_NAME_SIZE) ? decoded->subjectLLen
+                                                    : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->locality, decoded->subjectL, sz);
+        cn->locality[sz] = '\0';
+        cn->localityEnc = decoded->subjectLEnc;
+    }
+    if (decoded->subjectO) {
+        sz = (decoded->subjectOLen < CTC_NAME_SIZE) ? decoded->subjectOLen
+                                                    : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->org, decoded->subjectO, sz);
+        cn->org[sz] = '\0';
+        cn->orgEnc = decoded->subjectOEnc;
+    }
+    if (decoded->subjectOU) {
+        sz = (decoded->subjectOULen < CTC_NAME_SIZE) ? decoded->subjectOULen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->unit, decoded->subjectOU, sz);
+        cn->unit[sz] = '\0';
+        cn->unitEnc = decoded->subjectOUEnc;
+    }
+    if (decoded->subjectSN) {
+        sz = (decoded->subjectSNLen < CTC_NAME_SIZE) ? decoded->subjectSNLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->sur, decoded->subjectSN, sz);
+        cn->sur[sz] = '\0';
+        cn->surEnc = decoded->subjectSNEnc;
+    }
+    if (decoded->subjectSND) {
+        sz = (decoded->subjectSNDLen < CTC_NAME_SIZE) ? decoded->subjectSNDLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->serialDev, decoded->subjectSND, sz);
+        cn->serialDev[sz] = '\0';
+        cn->serialDevEnc = decoded->subjectSNDEnc;
+    }
+#ifdef WOLFSSL_CERT_EXT
+    if (decoded->subjectBC) {
+        sz = (decoded->subjectBCLen < CTC_NAME_SIZE) ? decoded->subjectBCLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->busCat, decoded->subjectBC, sz);
+        cn->busCat[sz] = '\0';
+        cn->busCatEnc = decoded->subjectBCEnc;
+    }
+    if (decoded->subjectJC) {
+        sz = (decoded->subjectJCLen < CTC_NAME_SIZE) ? decoded->subjectJCLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->joiC, decoded->subjectJC, sz);
+        cn->joiC[sz] = '\0';
+        cn->joiCEnc = decoded->subjectJCEnc;
+    }
+    if (decoded->subjectJS) {
+        sz = (decoded->subjectJSLen < CTC_NAME_SIZE) ? decoded->subjectJSLen
+                                                     : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->joiSt, decoded->subjectJS, sz);
+        cn->joiSt[sz] = '\0';
+        cn->joiStEnc = decoded->subjectJSEnc;
+    }
+#endif
+    if (decoded->subjectEmail) {
+        sz = (decoded->subjectEmailLen < CTC_NAME_SIZE)
+           ?  decoded->subjectEmailLen : CTC_NAME_SIZE - 1;
+        XSTRNCPY(cn->email, decoded->subjectEmail, sz);
+        cn->email[sz] = '\0';
+    }
+}
+
+#ifndef NO_FILESYSTEM
+
 /* Set cn name from der buffer, return 0 on success */
 static int SetNameFromCert(CertName* cn, const byte* der, int derSz)
 {
-    int ret, sz;
+    int ret;
 #ifdef WOLFSSL_SMALL_STACK
     DecodedCert* decoded;
 #else
@@ -11735,68 +14678,14 @@
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
+    InitDecodedCert(decoded, der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
         WOLFSSL_MSG("ParseCertRelative error");
     }
     else {
-        if (decoded->subjectCN) {
-            sz = (decoded->subjectCNLen < CTC_NAME_SIZE) ? decoded->subjectCNLen
-                                                         : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->commonName, decoded->subjectCN, CTC_NAME_SIZE);
-            cn->commonName[sz] = '\0';
-            cn->commonNameEnc = decoded->subjectCNEnc;
-        }
-        if (decoded->subjectC) {
-            sz = (decoded->subjectCLen < CTC_NAME_SIZE) ? decoded->subjectCLen
-                                                        : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->country, decoded->subjectC, CTC_NAME_SIZE);
-            cn->country[sz] = '\0';
-            cn->countryEnc = decoded->subjectCEnc;
-        }
-        if (decoded->subjectST) {
-            sz = (decoded->subjectSTLen < CTC_NAME_SIZE) ? decoded->subjectSTLen
-                                                         : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->state, decoded->subjectST, CTC_NAME_SIZE);
-            cn->state[sz] = '\0';
-            cn->stateEnc = decoded->subjectSTEnc;
-        }
-        if (decoded->subjectL) {
-            sz = (decoded->subjectLLen < CTC_NAME_SIZE) ? decoded->subjectLLen
-                                                        : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->locality, decoded->subjectL, CTC_NAME_SIZE);
-            cn->locality[sz] = '\0';
-            cn->localityEnc = decoded->subjectLEnc;
-        }
-        if (decoded->subjectO) {
-            sz = (decoded->subjectOLen < CTC_NAME_SIZE) ? decoded->subjectOLen
-                                                        : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->org, decoded->subjectO, CTC_NAME_SIZE);
-            cn->org[sz] = '\0';
-            cn->orgEnc = decoded->subjectOEnc;
-        }
-        if (decoded->subjectOU) {
-            sz = (decoded->subjectOULen < CTC_NAME_SIZE) ? decoded->subjectOULen
-                                                         : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->unit, decoded->subjectOU, CTC_NAME_SIZE);
-            cn->unit[sz] = '\0';
-            cn->unitEnc = decoded->subjectOUEnc;
-        }
-        if (decoded->subjectSN) {
-            sz = (decoded->subjectSNLen < CTC_NAME_SIZE) ? decoded->subjectSNLen
-                                                         : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->sur, decoded->subjectSN, CTC_NAME_SIZE);
-            cn->sur[sz] = '\0';
-            cn->surEnc = decoded->subjectSNEnc;
-        }
-        if (decoded->subjectEmail) {
-            sz = (decoded->subjectEmailLen < CTC_NAME_SIZE)
-               ?  decoded->subjectEmailLen : CTC_NAME_SIZE - 1;
-            XSTRNCPY(cn->email, decoded->subjectEmail, CTC_NAME_SIZE);
-            cn->email[sz] = '\0';
-        }
+        SetNameFromDcert(cn, decoded);
     }
 
     FreeDecodedCert(decoded);
@@ -11808,16 +14697,18 @@
     return ret < 0 ? ret : 0;
 }
 
-
-#ifndef NO_FILESYSTEM
-
 /* Set cert issuer from issuerFile in PEM */
 int wc_SetIssuer(Cert* cert, const char* issuerFile)
 {
     int         ret;
     int         derSz;
-    byte*       der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
-
+    byte*       der;
+
+    if (cert == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
     if (der == NULL) {
         WOLFSSL_MSG("wc_SetIssuer OOF Problem");
         return MEMORY_E;
@@ -11836,12 +14727,18 @@
 {
     int         ret;
     int         derSz;
-    byte*       der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
-
+    byte*       der;
+
+    if (cert == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
     if (der == NULL) {
         WOLFSSL_MSG("wc_SetSubject OOF Problem");
         return MEMORY_E;
     }
+
     derSz = wc_PemCertToDer(subjectFile, der, EIGHTK_BUF);
     ret = SetNameFromCert(&cert->subject, der, derSz);
     XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
@@ -11849,7 +14746,6 @@
     return ret;
 }
 
-
 #ifdef WOLFSSL_ALT_NAMES
 
 /* Set alt names from file in PEM */
@@ -11857,8 +14753,13 @@
 {
     int         ret;
     int         derSz;
-    byte*       der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
-
+    byte*       der;
+
+    if (cert == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
     if (der == NULL) {
         WOLFSSL_MSG("wc_SetAltNames OOF Problem");
         return MEMORY_E;
@@ -11872,41 +14773,321 @@
 
 #endif /* WOLFSSL_ALT_NAMES */
 
-#endif /* NO_FILESYSTEM */
+#endif /* !NO_FILESYSTEM */
 
 /* Set cert issuer from DER buffer */
 int wc_SetIssuerBuffer(Cert* cert, const byte* der, int derSz)
 {
-    cert->selfSigned = 0;
-    return SetNameFromCert(&cert->issuer, der, derSz);
-}
-
+    int ret = 0;
+
+    if (cert == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        cert->selfSigned = 0;
+
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            SetNameFromDcert(&cert->issuer, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+        }
+    }
+
+    return ret;
+}
 
 /* Set cert subject from DER buffer */
 int wc_SetSubjectBuffer(Cert* cert, const byte* der, int derSz)
 {
-    return SetNameFromCert(&cert->subject, der, derSz);
-}
-
+    int ret = 0;
+
+    if (cert == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            SetNameFromDcert(&cert->subject, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+        }
+    }
+
+    return ret;
+}
+#ifdef WOLFSSL_CERT_EXT
+/* Set cert raw subject from DER buffer */
+int wc_SetSubjectRaw(Cert* cert, const byte* der, int derSz)
+{
+    int ret = 0;
+
+    if (cert == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            if ((((DecodedCert*)cert->decodedCert)->subjectRaw) &&
+                (((DecodedCert*)cert->decodedCert)->subjectRawLen <=
+                        (int)sizeof(CertName))) {
+                XMEMCPY(cert->sbjRaw,
+                        ((DecodedCert*)cert->decodedCert)->subjectRaw,
+                        ((DecodedCert*)cert->decodedCert)->subjectRawLen);
+            }
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+        }
+    }
+
+    return ret;
+}
+
+/* Set cert raw issuer from DER buffer */
+int wc_SetIssuerRaw(Cert* cert, const byte* der, int derSz)
+{
+    int ret = 0;
+
+    if (cert == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            if ((((DecodedCert*)cert->decodedCert)->issuerRaw) &&
+                (((DecodedCert*)cert->decodedCert)->issuerRawLen <=
+                        (int)sizeof(CertName))) {
+                XMEMCPY(cert->issRaw,
+                        ((DecodedCert*)cert->decodedCert)->issuerRaw,
+                        ((DecodedCert*)cert->decodedCert)->issuerRawLen);
+            }
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+        }
+    }
+    return ret;
+}
+#endif
 
 #ifdef WOLFSSL_ALT_NAMES
 
 /* Set cert alt names from DER buffer */
 int wc_SetAltNamesBuffer(Cert* cert, const byte* der, int derSz)
 {
-    return SetAltNamesFromCert(cert, der, derSz);
+    int ret = 0;
+
+    if (cert == NULL) {
+     ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            ret = SetAltNamesFromDcert(cert, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+       }
+    }
+
+    return(ret);
 }
 
 /* Set cert dates from DER buffer */
 int wc_SetDatesBuffer(Cert* cert, const byte* der, int derSz)
 {
-    return SetDatesFromCert(cert, der, derSz);
+    int ret = 0;
+
+    if (cert == NULL) {
+     ret = BAD_FUNC_ARG;
+    }
+    else {
+        /* Check if decodedCert is cached */
+        if (cert->der != der) {
+            /* Allocate cache for the decoded cert */
+            ret = wc_SetCert_LoadDer(cert, der, derSz);
+        }
+
+        if (ret >= 0) {
+            ret = SetDatesFromDcert(cert, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+            wc_SetCert_Free(cert);
+#endif
+        }
+    }
+
+    return(ret);
 }
 
 #endif /* WOLFSSL_ALT_NAMES */
 
 #endif /* WOLFSSL_CERT_GEN */
 
+#if (defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_CERT_EXT)) \
+        || defined(OPENSSL_EXTRA)
+/* Encode OID string representation to ITU-T X.690 format */
+int EncodePolicyOID(byte *out, word32 *outSz, const char *in, void* heap)
+{
+    word32 val, idx = 0, nb_val;
+    char *token, *str, *ptr;
+    word32 len;
+
+    (void)heap;
+
+    if (out == NULL || outSz == NULL || *outSz < 2 || in == NULL)
+        return BAD_FUNC_ARG;
+
+    /* duplicate string (including terminator) */
+    len = (word32)XSTRLEN(in);
+    str = (char *)XMALLOC(len+1, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (str == NULL)
+        return MEMORY_E;
+    XMEMCPY(str, in, len+1);
+
+    nb_val = 0;
+
+    /* parse value, and set corresponding Policy OID value */
+    token = XSTRTOK(str, ".", &ptr);
+    while (token != NULL)
+    {
+        val = (word32)XATOI(token);
+
+        if (nb_val == 0) {
+            if (val > 2) {
+                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                return ASN_OBJECT_ID_E;
+            }
+
+            out[idx] = (byte)(40 * val);
+        }
+        else if (nb_val == 1) {
+            if (val > 127) {
+                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                return ASN_OBJECT_ID_E;
+            }
+
+            if (idx > *outSz) {
+                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                return BUFFER_E;
+            }
+
+            out[idx++] += (byte)val;
+        }
+        else {
+            word32  tb = 0, x;
+            int     i = 0;
+            byte    oid[MAX_OID_SZ];
+
+            while (val >= 128) {
+                x = val % 128;
+                val /= 128;
+                oid[i++] = (byte) (((tb++) ? 0x80 : 0) | x);
+            }
+
+            if ((idx+(word32)i) > *outSz) {
+                XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                return BUFFER_E;
+            }
+
+            oid[i] = (byte) (((tb++) ? 0x80 : 0) | val);
+
+            /* push value in the right order */
+            while (i >= 0)
+                out[idx++] = oid[i--];
+        }
+
+        token = XSTRTOK(NULL, ".", &ptr);
+        nb_val++;
+    }
+
+    *outSz = idx;
+
+    XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    return 0;
+}
+#endif /* WOLFSSL_CERT_EXT || OPENSSL_EXTRA */
+
+#endif /* !NO_CERTS */
+
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+/* Helper function for wolfSSL_i2d_DHparams */
+int StoreDHparams(byte* out, word32* outLen, mp_int* p, mp_int* g)
+{
+    word32 idx = 0;
+    int pSz;
+    int gSz;
+    unsigned int tmp;
+    word32 headerSz = 4; /* 2*ASN_TAG + 2*LEN(ENUM) */
+
+    /* If the leading bit on the INTEGER is a 1, add a leading zero */
+    int pLeadingZero = mp_leading_bit(p);
+    int gLeadingZero = mp_leading_bit(g);
+    int pLen = mp_unsigned_bin_size(p);
+    int gLen = mp_unsigned_bin_size(g);
+
+    WOLFSSL_ENTER("StoreDHparams");
+    if (out == NULL) {
+        WOLFSSL_MSG("Null buffer error");
+        return BUFFER_E;
+    }
+
+    tmp = pLeadingZero + gLeadingZero + pLen + gLen;
+    if (*outLen < (tmp + headerSz)) {
+        return BUFFER_E;
+    }
+
+    /* Set sequence */
+    idx = SetSequence(tmp + headerSz + 2, out);
+
+    /* Encode p */
+    pSz = SetASNIntMP(p, -1, &out[idx]);
+    if (pSz < 0) {
+        WOLFSSL_MSG("SetASNIntMP failed");
+        return pSz;
+    }
+    idx += pSz;
+
+    /* Encode g */
+    gSz = SetASNIntMP(g, -1, &out[idx]);
+    if (gSz < 0) {
+        WOLFSSL_MSG("SetASNIntMP failed");
+        return gSz;
+    }
+    idx += gSz;
+
+    *outLen = idx;
+
+    return 0;
+}
+#endif /* !NO_DH && WOLFSSL_QT || OPENSSL_ALL */
 
 #ifdef HAVE_ECC
 
@@ -11958,9 +15139,17 @@
         return ASN_ECC_KEY_E;
     }
 
+#ifndef NO_STRICT_ECDSA_LEN
+    /* enable strict length checking for signature */
+    if (sigLen != idx + (word32)len) {
+        return ASN_ECC_KEY_E;
+    }
+#else
+    /* allow extra signature bytes at end */
     if ((word32)len > (sigLen - idx)) {
         return ASN_ECC_KEY_E;
     }
+#endif
 
     if (GetInt(r, sig, &idx, sigLen) < 0) {
         return ASN_ECC_KEY_E;
@@ -12034,7 +15223,7 @@
     XMEMCPY(priv, &input[*inOutIdx], privSz);
     *inOutIdx += length;
 
-    if (ret == 0 && (*inOutIdx + 1) < inSz) {
+    if ((*inOutIdx + 1) < inSz) {
         /* prefix 0, may have */
         b = input[*inOutIdx];
         if (b == ECC_PREFIX_0) {
@@ -12102,7 +15291,7 @@
 #ifdef WOLFSSL_CUSTOM_CURVES
 static void ByteToHex(byte n, char* str)
 {
-    static const char hexChar[] = { '0', '1', '2', '3', '4', '5', '6', '7',
+    const char hexChar[] = { '0', '1', '2', '3', '4', '5', '6', '7',
                                     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
     str[0] = hexChar[n >> 4];
@@ -12116,12 +15305,15 @@
     int len;
     int i;
     char* str;
+    word32 localIdx;
+    byte   tag;
 
     if (*inOutIdx >= inSz) {
         return BUFFER_E;
     }
 
-    if (input[*inOutIdx] == ASN_INTEGER) {
+    localIdx = *inOutIdx;
+    if (GetASNTag(input, &localIdx, &tag, inSz) == 0 && tag == ASN_INTEGER) {
         if (GetASNInt(input, inOutIdx, &len, inSz) < 0)
             return ASN_PARSE_E;
     }
@@ -12138,9 +15330,41 @@
     *inOutIdx += len;
     *out = str;
 
-    return 0;
-}
-#endif
+    (void)heap;
+    (void)heapType;
+
+    return 0;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
+
+#ifdef WOLFSSL_CUSTOM_CURVES
+static int EccKeyParamCopy(char** dst, char* src)
+{
+    int ret = 0;
+#ifdef WOLFSSL_ECC_CURVE_STATIC
+    word32 length;
+#endif
+
+    if (dst == NULL || src == NULL)
+        return BAD_FUNC_ARG;
+
+#ifndef WOLFSSL_ECC_CURVE_STATIC
+    *dst = src;
+#else
+    length = (int)XSTRLEN(src) + 1;
+    if (length > MAX_ECC_STRING) {
+        WOLFSSL_MSG("ECC Param too large for buffer");
+        ret = BUFFER_E;
+    }
+    else {
+        XSTRNCPY(*dst, src, length);
+    }
+    XFREE(src, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+
+    return ret;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
 
 int wc_EccPublicKeyDecode(const byte* input, word32* inOutIdx,
                           ecc_key* key, word32 inSz)
@@ -12148,7 +15372,8 @@
     int    length;
     int    ret;
     int    curve_id = ECC_CURVE_DEF;
-    word32 oidSum;
+    word32 oidSum, localIdx;
+    byte   tag;
 
     if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
         return BAD_FUNC_ARG;
@@ -12167,11 +15392,13 @@
         return BUFFER_E;
     }
 
-    if (input[*inOutIdx] == (ASN_SEQUENCE | ASN_CONSTRUCTED)) {
+    localIdx = *inOutIdx;
+    if (GetASNTag(input, &localIdx, &tag, inSz) == 0 &&
+            tag == (ASN_SEQUENCE | ASN_CONSTRUCTED)) {
 #ifdef WOLFSSL_CUSTOM_CURVES
         ecc_set_type* curve;
         int len;
-        char* point;
+        char* point = NULL;
 
         ret = 0;
 
@@ -12181,8 +15408,13 @@
             ret = MEMORY_E;
 
         if (ret == 0) {
+            static const char customName[] = "Custom";
             XMEMSET(curve, 0, sizeof(*curve));
-            curve->name = "Custom";
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
+            curve->name = customName;
+        #else
+            XMEMCPY((void*)curve->name, customName, sizeof(customName));
+        #endif
             curve->id = ECC_CURVE_CUSTOM;
 
             if (GetSequence(input, inOutIdx, &length, inSz) < 0)
@@ -12195,9 +15427,12 @@
                 ret = ASN_PARSE_E;
         }
         if (ret == 0) {
+            char* p = NULL;
             SkipObjectId(input, inOutIdx, inSz);
-            ret = ASNToHexString(input, inOutIdx, (char**)&curve->prime, inSz,
+            ret = ASNToHexString(input, inOutIdx, &p, inSz,
                                             key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+            if (ret == 0)
+                ret = EccKeyParamCopy((char**)&curve->prime, p);
         }
         if (ret == 0) {
             curve->size = (int)XSTRLEN(curve->prime) / 2;
@@ -12206,15 +15441,23 @@
                 ret = ASN_PARSE_E;
         }
         if (ret == 0) {
-            ret = ASNToHexString(input, inOutIdx, (char**)&curve->Af, inSz,
+            char* af = NULL;
+            ret = ASNToHexString(input, inOutIdx, &af, inSz,
                                             key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+            if (ret == 0)
+                ret = EccKeyParamCopy((char**)&curve->Af, af);
         }
         if (ret == 0) {
-            ret = ASNToHexString(input, inOutIdx, (char**)&curve->Bf, inSz,
+            char* bf = NULL;
+            ret = ASNToHexString(input, inOutIdx, &bf, inSz,
                                             key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+            if (ret == 0)
+                ret = EccKeyParamCopy((char**)&curve->Bf, bf);
         }
         if (ret == 0) {
-            if (*inOutIdx < inSz && input[*inOutIdx] == ASN_BIT_STRING) {
+            localIdx = *inOutIdx;
+            if (*inOutIdx < inSz && GetASNTag(input, &localIdx, &tag, inSz)
+                    == 0 && tag == ASN_BIT_STRING) {
                 len = 0;
                 ret = GetASNHeader(input, ASN_BIT_STRING, inOutIdx, &len, inSz);
                 *inOutIdx += len;
@@ -12233,6 +15476,7 @@
             }
         }
         if (ret == 0) {
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             curve->Gx = (const char*)XMALLOC(curve->size * 2 + 2, key->heap,
                                                        DYNAMIC_TYPE_ECC_BUFFER);
             curve->Gy = (const char*)XMALLOC(curve->size * 2 + 2, key->heap,
@@ -12241,28 +15485,44 @@
                 XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
                 ret = MEMORY_E;
             }
+        #else
+            if (curve->size * 2 + 2 > MAX_ECC_STRING) {
+                WOLFSSL_MSG("curve size is too large to fit in buffer");
+                ret = BUFFER_E;
+            }
+        #endif
         }
         if (ret == 0) {
+            char* o = NULL;
+
             XMEMCPY((char*)curve->Gx, point + 2, curve->size * 2);
             XMEMCPY((char*)curve->Gy, point + curve->size * 2 + 2,
                                                                curve->size * 2);
             ((char*)curve->Gx)[curve->size * 2] = '\0';
             ((char*)curve->Gy)[curve->size * 2] = '\0';
             XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
-            ret = ASNToHexString(input, inOutIdx, (char**)&curve->order, inSz,
+            ret = ASNToHexString(input, inOutIdx, &o, inSz,
                                             key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+            if (ret == 0)
+                ret = EccKeyParamCopy((char**)&curve->order, o);
         }
         if (ret == 0) {
             curve->cofactor = GetInteger7Bit(input, inOutIdx, inSz);
 
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             curve->oid = NULL;
+        #else
+            XMEMSET((void*)curve->oid, 0, sizeof(curve->oid));
+        #endif
             curve->oidSz = 0;
             curve->oidSum = 0;
 
             if (wc_ecc_set_custom_curve(key, curve) < 0) {
                 ret = ASN_PARSE_E;
             }
+        #ifdef WOLFSSL_CUSTOM_CURVES
             key->deallocSet = 1;
+        #endif
             curve = NULL;
         }
         if (curve != NULL)
@@ -12272,7 +15532,7 @@
             return ret;
 #else
         return ASN_PARSE_E;
-#endif
+#endif /* WOLFSSL_CUSTOM_CURVES */
     }
     else {
         /* ecc params information */
@@ -12287,20 +15547,22 @@
     }
 
     /* key header */
-    ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+    ret = CheckBitString(input, inOutIdx, &length, inSz, 1, NULL);
     if (ret != 0)
         return ret;
 
     /* This is the raw point data compressed or uncompressed. */
-    if (wc_ecc_import_x963_ex(input + *inOutIdx, inSz - *inOutIdx, key,
+    if (wc_ecc_import_x963_ex(input + *inOutIdx, length, key,
                                                             curve_id) != 0) {
         return ASN_ECC_KEY_E;
     }
 
-    return 0;
-}
-
-
+    *inOutIdx += length;
+
+    return 0;
+}
+
+#if defined(HAVE_ECC_KEY_EXPORT) && !defined(NO_ASN_CRYPT)
 /* build DER formatted ECC key, include optional public key if requested,
  * return length on success, negative on error */
 static int wc_BuildEccKeyDer(ecc_key* key, byte* output, word32 inLen,
@@ -12418,7 +15680,6 @@
     return totalSz;
 }
 
-
 /* Write a Private ecc key, including public to DER format,
  * length on success else < 0 */
 int wc_EccKeyToDer(ecc_key* key, byte* output, word32 inLen)
@@ -12434,6 +15695,7 @@
     return wc_BuildEccKeyDer(key, output, inLen, 0);
 }
 
+#ifdef HAVE_PKCS8
 /* Write only private ecc key to unencrypted PKCS#8 format.
  *
  * If output is NULL, places required PKCS#8 buffer size in outLen and
@@ -12502,8 +15764,9 @@
     *outLen = ret;
     return ret;
 }
-
-#endif  /* HAVE_ECC */
+#endif /* HAVE_PKCS8 */
+#endif /* HAVE_ECC_KEY_EXPORT && !NO_ASN_CRYPT */
+#endif /* HAVE_ECC */
 
 
 #ifdef HAVE_ED25519
@@ -12547,6 +15810,9 @@
         if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
             return ASN_PARSE_E;
 
+        if (privSz != 32)
+            return ASN_PARSE_E;
+
         priv = input + *inOutIdx;
         *inOutIdx += privSz;
         endKeyIdx = *inOutIdx;
@@ -12680,8 +15946,187 @@
 
 #endif /* WOLFSSL_KEY_GEN */
 
-#endif  /* HAVE_ED25519 */
-
+#endif /* HAVE_ED25519 */
+
+#ifdef HAVE_ED448
+
+int wc_Ed448PrivateKeyDecode(const byte* input, word32* inOutIdx,
+                             ed448_key* key, word32 inSz)
+{
+    word32      oid;
+    int         ret, version, length, endKeyIdx, privSz, pubSz;
+    const byte* priv;
+    const byte* pub;
+
+    if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+        return BAD_FUNC_ARG;
+
+    if (GetSequence(input, inOutIdx, &length, inSz) >= 0) {
+        endKeyIdx = *inOutIdx + length;
+
+        if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
+            return ASN_PARSE_E;
+        if (version != 0) {
+            WOLFSSL_MSG("Unrecognized version of ED448 private key");
+            return ASN_PARSE_E;
+        }
+
+        if (GetAlgoId(input, inOutIdx, &oid, oidKeyType, inSz) < 0)
+            return ASN_PARSE_E;
+        if (oid != ED448k)
+            return ASN_PARSE_E;
+
+        if (GetOctetString(input, inOutIdx, &length, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
+            return ASN_PARSE_E;
+
+        priv = input + *inOutIdx;
+        *inOutIdx += privSz;
+    }
+    else {
+        if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
+            return ASN_PARSE_E;
+
+        if (privSz != 57)
+            return ASN_PARSE_E;
+
+        priv = input + *inOutIdx;
+        *inOutIdx += privSz;
+        endKeyIdx = *inOutIdx;
+    }
+
+    if (endKeyIdx == (int)*inOutIdx) {
+        ret = wc_ed448_import_private_only(priv, privSz, key);
+    }
+    else {
+        if (GetASNHeader(input, ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1,
+                         inOutIdx, &length, inSz) < 0) {
+            return ASN_PARSE_E;
+        }
+        if (GetOctetString(input, inOutIdx, &pubSz, inSz) < 0)
+            return ASN_PARSE_E;
+        pub = input + *inOutIdx;
+        *inOutIdx += pubSz;
+
+        ret = wc_ed448_import_private_key(priv, privSz, pub, pubSz, key);
+    }
+    if (ret == 0 && endKeyIdx != (int)*inOutIdx)
+        return ASN_PARSE_E;
+
+    return ret;
+}
+
+
+int wc_Ed448PublicKeyDecode(const byte* input, word32* inOutIdx,
+                            ed448_key* key, word32 inSz)
+{
+    int    length;
+    int    ret;
+
+    if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+        return BAD_FUNC_ARG;
+
+    if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+        return ASN_PARSE_E;
+
+    if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+        return ASN_PARSE_E;
+
+    ret = SkipObjectId(input, inOutIdx, inSz);
+    if (ret != 0)
+        return ret;
+
+    /* key header */
+    ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+    if (ret != 0)
+        return ret;
+
+    /* This is the raw point data compressed or uncompressed. */
+    if (wc_ed448_import_public(input + *inOutIdx, inSz - *inOutIdx, key) != 0)
+        return ASN_ECC_KEY_E;
+
+    return 0;
+}
+
+
+#ifdef WOLFSSL_KEY_GEN
+
+/* build DER formatted ED448 key,
+ * return length on success, negative on error */
+static int wc_BuildEd448KeyDer(ed448_key* key, byte* output, word32 inLen,
+                               int pubOut)
+{
+    byte   algoArray[MAX_ALGO_SZ];
+    byte   ver[MAX_VERSION_SZ];
+    byte   seq[MAX_SEQ_SZ];
+    int    ret;
+    word32 idx = 0, seqSz, verSz, algoSz, privSz, pubSz = 0;
+
+    if (key == NULL || output == NULL || inLen == 0)
+        return BAD_FUNC_ARG;
+
+    if (pubOut) {
+        pubSz = 2 + 2 + ED448_PUB_KEY_SIZE;
+    }
+    privSz = 2 + 2 + ED448_KEY_SIZE;
+    algoSz = SetAlgoID(ED448k, algoArray, oidKeyType, 0);
+    verSz  = SetMyVersion(0, ver, FALSE);
+    seqSz  = SetSequence(verSz + algoSz + privSz + pubSz, seq);
+
+    if (seqSz + verSz + algoSz + privSz + pubSz > inLen)
+        return BAD_FUNC_ARG;
+
+    /* write out */
+    /* seq */
+    XMEMCPY(output + idx, seq, seqSz);
+    idx = seqSz;
+    /* ver */
+    XMEMCPY(output + idx, ver, verSz);
+    idx += verSz;
+    /* algo */
+    XMEMCPY(output + idx, algoArray, algoSz);
+    idx += algoSz;
+    /* privKey */
+    idx += SetOctetString(2 + ED448_KEY_SIZE, output + idx);
+    idx += SetOctetString(ED448_KEY_SIZE, output + idx);
+    ret = wc_ed448_export_private_only(key, output + idx, &privSz);
+    if (ret != 0)
+        return ret;
+    idx += privSz;
+    /* pubKey */
+    if (pubOut) {
+        idx += SetExplicit(1, 2 + ED448_PUB_KEY_SIZE, output + idx);
+        idx += SetOctetString(ED448_KEY_SIZE, output + idx);
+        ret = wc_ed448_export_public(key, output + idx, &pubSz);
+        if (ret != 0)
+            return ret;
+        idx += pubSz;
+    }
+
+    return idx;
+}
+
+/* Write a Private ecc key, including public to DER format,
+ * length on success else < 0 */
+int wc_Ed448KeyToDer(ed448_key* key, byte* output, word32 inLen)
+{
+    return wc_BuildEd448KeyDer(key, output, inLen, 1);
+}
+
+
+
+/* Write only private ecc key to DER format,
+ * length on success else < 0 */
+int wc_Ed448PrivateKeyToDer(ed448_key* key, byte* output, word32 inLen)
+{
+    return wc_BuildEd448KeyDer(key, output, inLen, 0);
+}
+
+#endif /* WOLFSSL_KEY_GEN */
+
+#endif /* HAVE_ED448 */
 
 #if defined(HAVE_OCSP) || defined(HAVE_CRL)
 
@@ -12703,25 +16148,33 @@
     return 0;
 }
 
-#endif
+#endif /* HAVE_OCSP || HAVE_CRL */
 
 
 #ifdef HAVE_OCSP
 
-static int GetEnumerated(const byte* input, word32* inOutIdx, int *value)
+static int GetEnumerated(const byte* input, word32* inOutIdx, int *value,
+        int sz)
 {
     word32 idx = *inOutIdx;
     word32 len;
+    byte   tag;
 
     WOLFSSL_ENTER("GetEnumerated");
 
     *value = 0;
 
-    if (input[idx++] != ASN_ENUMERATED)
-        return ASN_PARSE_E;
+    if (GetASNTag(input, &idx, &tag, sz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_ENUMERATED)
+        return ASN_PARSE_E;
+
+    if ((int)idx >= sz)
+        return BUFFER_E;
 
     len = input[idx++];
-    if (len > 4)
+    if (len > 4 || (int)(len + idx) > sz)
         return ASN_PARSE_E;
 
     while (len--) {
@@ -12737,10 +16190,11 @@
 static int DecodeSingleResponse(byte* source,
                             word32* ioIndex, OcspResponse* resp, word32 size)
 {
-    word32 idx = *ioIndex, prevIndex, oid;
+    word32 idx = *ioIndex, prevIndex, oid, localIdx;
     int length, wrapperSz;
     CertStatus* cs = resp->status;
     int ret;
+    byte tag;
 
     WOLFSSL_ENTER("DecodeSingleResponse");
 
@@ -12780,6 +16234,9 @@
     if (GetSerialNumber(source, &idx, cs->serial, &cs->serialSz, size) < 0)
         return ASN_PARSE_E;
 
+    if ( idx >= size )
+        return BUFFER_E;
+
     /* CertStatus */
     switch (source[idx++])
     {
@@ -12803,39 +16260,64 @@
 
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
     cs->thisDateAsn = source + idx;
+    localIdx = 0;
+    if (GetDateInfo(cs->thisDateAsn, &localIdx, NULL,
+                    (byte*)&cs->thisDateParsed.type,
+                    &cs->thisDateParsed.length, size) < 0)
+        return ASN_PARSE_E;
+    XMEMCPY(cs->thisDateParsed.data,
+            cs->thisDateAsn + localIdx - cs->thisDateParsed.length,
+            cs->thisDateParsed.length);
 #endif
     if (GetBasicDate(source, &idx, cs->thisDate,
                                                 &cs->thisDateFormat, size) < 0)
         return ASN_PARSE_E;
 
 #ifndef NO_ASN_TIME
+#ifndef WOLFSSL_NO_OCSP_DATE_CHECK
     if (!XVALIDATE_DATE(cs->thisDate, cs->thisDateFormat, BEFORE))
         return ASN_BEFORE_DATE_E;
 #endif
+#endif
 
     /* The following items are optional. Only check for them if there is more
      * unprocessed data in the singleResponse wrapper. */
 
+    localIdx = idx;
     if (((int)(idx - prevIndex) < wrapperSz) &&
-        (source[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)))
+        GetASNTag(source, &localIdx, &tag, size) == 0 &&
+        tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
     {
         idx++;
         if (GetLength(source, &idx, &length, size) < 0)
             return ASN_PARSE_E;
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
         cs->nextDateAsn = source + idx;
+        localIdx = 0;
+        if (GetDateInfo(cs->nextDateAsn, &localIdx, NULL,
+                        (byte*)&cs->nextDateParsed.type,
+                        &cs->nextDateParsed.length, size) < 0)
+            return ASN_PARSE_E;
+        XMEMCPY(cs->nextDateParsed.data,
+                cs->nextDateAsn + localIdx - cs->nextDateParsed.length,
+                cs->nextDateParsed.length);
 #endif
         if (GetBasicDate(source, &idx, cs->nextDate,
                                                 &cs->nextDateFormat, size) < 0)
             return ASN_PARSE_E;
 
 #ifndef NO_ASN_TIME
+#ifndef WOLFSSL_NO_OCSP_DATE_CHECK
         if (!XVALIDATE_DATE(cs->nextDate, cs->nextDateFormat, AFTER))
             return ASN_AFTER_DATE_E;
 #endif
-    }
+#endif
+    }
+
+    localIdx = idx;
     if (((int)(idx - prevIndex) < wrapperSz) &&
-        (source[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)))
+        GetASNTag(source, &localIdx, &tag, size) == 0 &&
+        tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
     {
         idx++;
         if (GetLength(source, &idx, &length, size) < 0)
@@ -12856,13 +16338,17 @@
     int ext_bound; /* boundary index for the sequence of extensions */
     word32 oid;
     int ret;
+    byte tag;
 
     WOLFSSL_ENTER("DecodeOcspRespExtensions");
 
     if ((idx + 1) > sz)
         return BUFFER_E;
 
-    if (source[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+    if (GetASNTag(source, &idx, &tag, sz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
         return ASN_PARSE_E;
 
     if (GetLength(source, &idx, &length, sz) < 0)
@@ -12874,6 +16360,8 @@
     ext_bound = idx + length;
 
     while (idx < (word32)ext_bound) {
+        word32 localIdx;
+
         if (GetSequence(source, &idx, &length, sz) < 0) {
             WOLFSSL_MSG("\tfail: should be a SEQUENCE");
             return ASN_PARSE_E;
@@ -12886,7 +16374,13 @@
         }
 
         /* check for critical flag */
-        if (source[idx] == ASN_BOOLEAN) {
+        if ((idx + 1) > (word32)sz) {
+            WOLFSSL_MSG("\tfail: malformed buffer");
+            return BUFFER_E;
+        }
+
+        localIdx = idx;
+        if (GetASNTag(source, &localIdx, &tag, sz) == 0 && tag == ASN_BOOLEAN) {
             WOLFSSL_MSG("\tfound optional critical flag, moving past");
             ret = GetBoolean(source, &idx, sz);
             if (ret < 0)
@@ -12918,10 +16412,11 @@
 static int DecodeResponseData(byte* source,
                             word32* ioIndex, OcspResponse* resp, word32 size)
 {
-    word32 idx = *ioIndex, prev_idx;
+    word32 idx = *ioIndex, prev_idx, localIdx;
     int length;
     int version;
-    word32 responderId = 0;
+    int ret;
+    byte tag;
 
     WOLFSSL_ENTER("DecodeResponseData");
 
@@ -12935,7 +16430,9 @@
      * item isn't an EXPLICIT[0], then set version to zero and move
      * onto the next item.
      */
-    if (source[idx] == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED))
+    localIdx = idx;
+    if (GetASNTag(source, &localIdx, &tag, size) == 0 &&
+            tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED))
     {
         idx += 2; /* Eat the value and length */
         if (GetMyVersion(source, &idx, &version, size) < 0)
@@ -12943,10 +16440,12 @@
     } else
         version = 0;
 
-    responderId = source[idx++];
-    if ((responderId == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1)) ||
-        (responderId == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2)))
+    localIdx = idx;
+    if (GetASNTag(source, &localIdx, &tag, size) == 0 &&
+        ( tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1) ||
+          tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2) ))
     {
+        idx++; /* advance past ASN tag */
         if (GetLength(source, &idx, &length, size) < 0)
             return ASN_PARSE_E;
         idx += length;
@@ -12959,8 +16458,8 @@
                                         &resp->producedDateFormat, size) < 0)
         return ASN_PARSE_E;
 
-    if (DecodeSingleResponse(source, &idx, resp, size) < 0)
-        return ASN_PARSE_E;
+    if ((ret = DecodeSingleResponse(source, &idx, resp, size)) < 0)
+        return ret; /* ASN_PARSE_E, ASN_BEFORE_DATE_E, ASN_AFTER_DATE_E */
 
     /*
      * Check the length of the ResponseData against the current index to
@@ -12981,10 +16480,14 @@
                             word32* ioIndex, OcspResponse* resp, word32 size)
 {
     word32 idx = *ioIndex;
+    byte tag;
 
     WOLFSSL_ENTER("DecodeCerts");
 
-    if (source[idx++] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
+    if (GetASNTag(source, &idx, &tag, size) < 0)
+        return ASN_PARSE_E;
+
+    if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
     {
         int length;
 
@@ -13025,8 +16528,8 @@
         return ASN_INPUT_E;
     end_index = idx + length;
 
-    if (DecodeResponseData(source, &idx, resp, size) < 0)
-        return ASN_PARSE_E;
+    if ((ret = DecodeResponseData(source, &idx, resp, size)) < 0)
+        return ret; /* ASN_PARSE_E, ASN_BEFORE_DATE_E, ASN_AFTER_DATE_E */
 
     /* Get the signature algorithm */
     if (GetAlgoId(source, &idx, &resp->sigOID, oidSigType, size) < 0)
@@ -13085,7 +16588,8 @@
         ret = ConfirmSignature(&cert.sigCtx,
             resp->response, resp->responseSz,
             cert.publicKey, cert.pubKeySize, cert.keyOID,
-            resp->sig, resp->sigSz, resp->sigOID);
+            resp->sig, resp->sigSz, resp->sigOID, NULL);
+
         FreeDecodedCert(&cert);
 
         if (ret != 0) {
@@ -13096,7 +16600,7 @@
     else
 #endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */
     {
-        Signer* ca = NULL;
+        Signer* ca;
         int sigValid = -1;
 
         #ifndef NO_SKID
@@ -13112,7 +16616,7 @@
             /* ConfirmSignature is blocking here */
             sigValid = ConfirmSignature(&sigCtx, resp->response,
                 resp->responseSz, ca->publicKey, ca->pubKeySize, ca->keyOID,
-                                resp->sig, resp->sigSz, resp->sigOID);
+                                resp->sig, resp->sigSz, resp->sigOID, NULL);
         }
         if (ca == NULL || sigValid != 0) {
             WOLFSSL_MSG("\tOCSP Confirm signature failed");
@@ -13150,6 +16654,7 @@
     byte* source = resp->source;
     word32 size = resp->maxIdx;
     word32 oid;
+    byte   tag;
 
     WOLFSSL_ENTER("OcspResponseDecode");
 
@@ -13158,7 +16663,7 @@
         return ASN_PARSE_E;
 
     /* First get the responseStatus, an ENUMERATED */
-    if (GetEnumerated(source, &idx, &resp->responseStatus) < 0)
+    if (GetEnumerated(source, &idx, &resp->responseStatus, size) < 0)
         return ASN_PARSE_E;
 
     if (resp->responseStatus != OCSP_SUCCESSFUL)
@@ -13167,7 +16672,9 @@
     /* Next is an EXPLICIT record called ResponseBytes, OPTIONAL */
     if (idx >= size)
         return ASN_INPUT_E;
-    if (source[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
+    if (GetASNTag(source, &idx, &tag, size) < 0)
+        return ASN_PARSE_E;
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
         return ASN_PARSE_E;
     if (GetLength(source, &idx, &length, size) < 0)
         return ASN_PARSE_E;
@@ -13195,7 +16702,7 @@
 
 word32 EncodeOcspRequestExtensions(OcspRequest* req, byte* output, word32 size)
 {
-    static const byte NonceObjId[] = { 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07,
+    const byte NonceObjId[] = { 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07,
                                        0x30, 0x01, 0x02 };
     byte seqArray[5][MAX_SEQ_SZ];
     word32 seqSz[5], totalSz = (word32)sizeof(NonceObjId);
@@ -13264,7 +16771,8 @@
 
     issuerSz    = SetDigest(req->issuerHash,    KEYID_SIZE,    issuerArray);
     issuerKeySz = SetDigest(req->issuerKeyHash, KEYID_SIZE,    issuerKeyArray);
-    snSz        = SetSerialNumber(req->serial,  req->serialSz, snArray, MAX_SN_SZ);
+    snSz        = SetSerialNumber(req->serial,  req->serialSz, snArray,
+                                                          MAX_SN_SZ, MAX_SN_SZ);
     extSz       = 0;
 
     if (snSz < 0)
@@ -13344,7 +16852,7 @@
         req->serialSz = cert->serialSz;
 
         if (cert->extAuthInfoSz != 0 && cert->extAuthInfo != NULL) {
-            req->url = (byte*)XMALLOC(cert->extAuthInfoSz, req->heap,
+            req->url = (byte*)XMALLOC(cert->extAuthInfoSz + 1, req->heap,
                                                      DYNAMIC_TYPE_OCSP_REQUEST);
             if (req->url == NULL) {
                 XFREE(req->serial, req->heap, DYNAMIC_TYPE_OCSP);
@@ -13353,6 +16861,7 @@
 
             XMEMCPY(req->url, cert->extAuthInfo, cert->extAuthInfoSz);
             req->urlSz = cert->extAuthInfoSz;
+            req->url[req->urlSz] = 0;
         }
     }
 
@@ -13386,9 +16895,21 @@
     if (req) {
         if (req->serial)
             XFREE(req->serial, req->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+        req->serial = NULL;
+
+#ifdef OPENSSL_EXTRA
+        if (req->serialInt) {
+            if (req->serialInt->isDynamic) {
+                XFREE(req->serialInt->data, NULL, DYNAMIC_TYPE_OPENSSL);
+            }
+            XFREE(req->serialInt, NULL, DYNAMIC_TYPE_OPENSSL);
+        }
+        req->serialInt = NULL;
+#endif
 
         if (req->url)
             XFREE(req->url, req->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+        req->url = NULL;
     }
 }
 
@@ -13413,7 +16934,11 @@
 
     /* Nonces are not critical. The responder may not necessarily add
      * the nonce to the response. */
-    if (resp->nonceSz != 0) {
+    if (req->nonceSz
+#ifndef WOLFSSL_FORCE_OCSP_NONCE_CHECK
+            && resp->nonceSz != 0
+#endif
+    ) {
         cmp = req->nonceSz - resp->nonceSz;
         if (cmp != 0)
         {
@@ -13460,20 +16985,22 @@
     return 0;
 }
 
-#endif
+#endif /* HAVE_OCSP */
 
 
 /* store WC_SHA hash of NAME */
-WOLFSSL_LOCAL int GetNameHash(const byte* source, word32* idx, byte* hash,
+int GetNameHash(const byte* source, word32* idx, byte* hash,
                              int maxIdx)
 {
     int    length;  /* length of all distinguished names */
     int    ret;
     word32 dummy;
+    byte   tag;
 
     WOLFSSL_ENTER("GetNameHash");
 
-    if (source[*idx] == ASN_OBJECT_ID) {
+    dummy = *idx;
+    if (GetASNTag(source, &dummy, &tag, maxIdx) == 0 && tag == ASN_OBJECT_ID) {
         WOLFSSL_MSG("Trying optional prefix...");
 
         if (GetLength(source, idx, &length, maxIdx) < 0)
@@ -13490,11 +17017,7 @@
     if (GetSequence(source, idx, &length, maxIdx) < 0)
         return ASN_PARSE_E;
 
-#ifdef NO_SHA
-    ret = wc_Sha256Hash(source + dummy, length + *idx - dummy, hash);
-#else
-    ret = wc_ShaHash(source + dummy, length + *idx - dummy, hash);
-#endif
+    ret = CalcHashId(source + dummy, length + *idx - dummy, hash);
 
     *idx += length;
 
@@ -13509,13 +17032,8 @@
 {
     WOLFSSL_MSG("InitDecodedCRL");
 
-    dcrl->certBegin    = 0;
-    dcrl->sigIndex     = 0;
-    dcrl->sigLength    = 0;
-    dcrl->signatureOID = 0;
-    dcrl->certs        = NULL;
-    dcrl->totalCerts   = 0;
-    dcrl->heap         = heap;
+    XMEMSET(dcrl, 0, sizeof(DecodedCRL));
+    dcrl->heap = heap;
     #ifdef WOLFSSL_HEAP_TEST
         dcrl->heap = (void*)WOLFSSL_HEAP_TEST;
     #endif
@@ -13578,8 +17096,8 @@
         return ret;
     }
 
-    if (*idx != end)  /* skip extensions */
-        *idx = end;
+    /* skip extensions */
+    *idx = end;
 
     return 0;
 }
@@ -13620,7 +17138,7 @@
     InitSignatureCtx(sigCtx, heap, INVALID_DEVID);
     if (ConfirmSignature(sigCtx, toBeSigned, tbsSz, ca->publicKey,
                          ca->pubKeySize, ca->keyOID, signature, sigSz,
-                         signatureOID) != 0) {
+                         signatureOID, NULL) != 0) {
         WOLFSSL_MSG("CRL Confirm signature failed");
         return ASN_CRL_CONFIRM_E;
     }
@@ -13628,11 +17146,216 @@
     return 0;
 }
 
+
+static int ParseCRL_CertList(DecodedCRL* dcrl, const byte* buf,
+        word32* inOutIdx, int sz)
+{
+    word32 oid, dateIdx, idx, checkIdx;
+    int version, doNextDate = 1;
+    byte tag;
+
+    if (dcrl == NULL || inOutIdx == NULL || buf == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* may have version */
+    idx = *inOutIdx;
+
+    checkIdx = idx;
+    if (GetASNTag(buf, &checkIdx, &tag, sz) == 0 && tag == ASN_INTEGER) {
+        if (GetMyVersion(buf, &idx, &version, sz) < 0)
+            return ASN_PARSE_E;
+    }
+
+    if (GetAlgoId(buf, &idx, &oid, oidIgnoreType, sz) < 0)
+        return ASN_PARSE_E;
+
+    if (GetNameHash(buf, &idx, dcrl->issuerHash, sz) < 0)
+        return ASN_PARSE_E;
+
+    if (GetBasicDate(buf, &idx, dcrl->lastDate, &dcrl->lastDateFormat, sz) < 0)
+        return ASN_PARSE_E;
+
+    dateIdx = idx;
+
+    if (GetBasicDate(buf, &idx, dcrl->nextDate, &dcrl->nextDateFormat, sz) < 0)
+    {
+#ifndef WOLFSSL_NO_CRL_NEXT_DATE
+        (void)dateIdx;
+        return ASN_PARSE_E;
+#else
+        dcrl->nextDateFormat = ASN_OTHER_TYPE;  /* skip flag */
+        doNextDate = 0;
+        idx = dateIdx;
+#endif
+    }
+
+    if (doNextDate) {
+#ifndef NO_ASN_TIME
+        if (!XVALIDATE_DATE(dcrl->nextDate, dcrl->nextDateFormat, AFTER)) {
+            WOLFSSL_MSG("CRL after date is no longer valid");
+            return ASN_AFTER_DATE_E;
+        }
+#endif
+    }
+
+    checkIdx = idx;
+    if (idx != dcrl->sigIndex &&
+           GetASNTag(buf, &checkIdx, &tag, sz) == 0 && tag != CRL_EXTENSIONS) {
+
+        int len;
+
+        if (GetSequence(buf, &idx, &len, sz) < 0)
+            return ASN_PARSE_E;
+        len += idx;
+
+        while (idx < (word32)len) {
+            if (GetRevoked(buf, &idx, dcrl, len) < 0)
+                return ASN_PARSE_E;
+        }
+    }
+
+    *inOutIdx = idx;
+
+    return 0;
+}
+
+
+#ifndef NO_SKID
+static int ParseCRL_AuthKeyIdExt(const byte* input, int sz, DecodedCRL* dcrl)
+{
+    word32 idx = 0;
+    int length = 0, ret = 0;
+    byte tag;
+
+    WOLFSSL_ENTER("ParseCRL_AuthKeyIdExt");
+
+    if (GetSequence(input, &idx, &length, sz) < 0) {
+        WOLFSSL_MSG("\tfail: should be a SEQUENCE\n");
+        return ASN_PARSE_E;
+    }
+
+    if (GetASNTag(input, &idx, &tag, sz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (tag != (ASN_CONTEXT_SPECIFIC | 0)) {
+        WOLFSSL_MSG("\tinfo: OPTIONAL item 0, not available\n");
+        return 0;
+    }
+
+    if (GetLength(input, &idx, &length, sz) <= 0) {
+        WOLFSSL_MSG("\tfail: extension data length");
+        return ASN_PARSE_E;
+    }
+
+    dcrl->extAuthKeyIdSet = 1;
+    if (length == KEYID_SIZE) {
+        XMEMCPY(dcrl->extAuthKeyId, input + idx, length);
+    }
+    else {
+        ret = CalcHashId(input + idx, length, dcrl->extAuthKeyId);
+    }
+
+    return ret;
+}
+#endif
+
+
+static int ParseCRL_Extensions(DecodedCRL* dcrl, const byte* buf,
+        word32* inOutIdx, word32 sz)
+{
+    int length;
+    word32 idx;
+    word32 ext_bound; /* boundary index for the sequence of extensions */
+    word32 oid;
+    byte tag;
+
+    WOLFSSL_ENTER("ParseCRL_Extensions");
+    (void)dcrl;
+
+    if (inOutIdx == NULL)
+        return BAD_FUNC_ARG;
+
+    idx = *inOutIdx;
+
+    /* CRL Extensions are optional */
+    if ((idx + 1) > sz)
+        return 0;
+
+    /* CRL Extensions are optional */
+    if (GetASNTag(buf, &idx, &tag, sz) < 0)
+        return 0;
+
+    /* CRL Extensions are optional */
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+        return 0;
+
+    if (GetLength(buf, &idx, &length, sz) < 0)
+        return ASN_PARSE_E;
+
+    if (GetSequence(buf, &idx, &length, sz) < 0)
+        return ASN_PARSE_E;
+
+    ext_bound = idx + length;
+
+    while (idx < (word32)ext_bound) {
+        word32 localIdx;
+        int ret;
+
+        if (GetSequence(buf, &idx, &length, sz) < 0) {
+            WOLFSSL_MSG("\tfail: should be a SEQUENCE");
+            return ASN_PARSE_E;
+        }
+
+        oid = 0;
+        if (GetObjectId(buf, &idx, &oid, oidCrlExtType, sz) < 0) {
+            WOLFSSL_MSG("\tfail: OBJECT ID");
+            return ASN_PARSE_E;
+        }
+
+        /* check for critical flag */
+        if ((idx + 1) > (word32)sz) {
+            WOLFSSL_MSG("\tfail: malformed buffer");
+            return BUFFER_E;
+        }
+
+        localIdx = idx;
+        if (GetASNTag(buf, &localIdx, &tag, sz) == 0 && tag == ASN_BOOLEAN) {
+            WOLFSSL_MSG("\tfound optional critical flag, moving past");
+            ret = GetBoolean(buf, &idx, sz);
+            if (ret < 0)
+                return ret;
+        }
+
+        ret = GetOctetString(buf, &idx, &length, sz);
+        if (ret < 0)
+            return ret;
+
+        if (oid == AUTH_KEY_OID) {
+        #ifndef NO_SKID
+            ret = ParseCRL_AuthKeyIdExt(buf + idx, length, dcrl);
+            if (ret < 0) {
+                WOLFSSL_MSG("\tcouldn't parse AuthKeyId extension");
+                return ret;
+            }
+        #endif
+        }
+
+        idx += length;
+    }
+
+    *inOutIdx = idx;
+
+    return 0;
+}
+
+
 /* prase crl buffer into decoded state, 0 on success */
 int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
 {
-    int          version, len, doNextDate = 1;
-    word32       oid, idx = 0, dateIdx;
+    int          len;
+    word32       idx = 0;
     Signer*      ca = NULL;
     SignatureCtx sigCtx;
 
@@ -13649,63 +17372,20 @@
         return ASN_PARSE_E;
 
     dcrl->certBegin = idx;
+    /* Normalize sz for the length inside the outer sequence. */
+    sz = len + idx;
 
     if (GetSequence(buff, &idx, &len, sz) < 0)
         return ASN_PARSE_E;
     dcrl->sigIndex = len + idx;
 
-    /* may have version */
-    if (buff[idx] == ASN_INTEGER) {
-        if (GetMyVersion(buff, &idx, &version, sz) < 0)
-            return ASN_PARSE_E;
-    }
-
-    if (GetAlgoId(buff, &idx, &oid, oidIgnoreType, sz) < 0)
-        return ASN_PARSE_E;
-
-    if (GetNameHash(buff, &idx, dcrl->issuerHash, sz) < 0)
-        return ASN_PARSE_E;
-
-    if (GetBasicDate(buff, &idx, dcrl->lastDate, &dcrl->lastDateFormat, sz) < 0)
-        return ASN_PARSE_E;
-
-    dateIdx = idx;
-
-    if (GetBasicDate(buff, &idx, dcrl->nextDate, &dcrl->nextDateFormat, sz) < 0)
-    {
-#ifndef WOLFSSL_NO_CRL_NEXT_DATE
-        (void)dateIdx;
-        return ASN_PARSE_E;
-#else
-        dcrl->nextDateFormat = ASN_OTHER_TYPE;  /* skip flag */
-        doNextDate = 0;
-        idx = dateIdx;
-#endif
-    }
-
-    if (doNextDate) {
-#ifndef NO_ASN_TIME
-        if (!XVALIDATE_DATE(dcrl->nextDate, dcrl->nextDateFormat, AFTER)) {
-            WOLFSSL_MSG("CRL after date is no longer valid");
-            return ASN_AFTER_DATE_E;
-        }
-#endif
-    }
-
-    if (idx != dcrl->sigIndex && buff[idx] != CRL_EXTENSIONS) {
-        if (GetSequence(buff, &idx, &len, sz) < 0)
-            return ASN_PARSE_E;
-
-        len += idx;
-
-        while (idx < (word32)len) {
-            if (GetRevoked(buff, &idx, dcrl, sz) < 0)
-                return ASN_PARSE_E;
-        }
-    }
-
-    if (idx != dcrl->sigIndex)
-        idx = dcrl->sigIndex;   /* skip extensions */
+    if (ParseCRL_CertList(dcrl, buff, &idx, idx + len) < 0)
+        return ASN_PARSE_E;
+
+    if (ParseCRL_Extensions(dcrl, buff, &idx, idx + len) < 0)
+        return ASN_PARSE_E;
+
+    idx = dcrl->sigIndex;
 
     if (GetAlgoId(buff, &idx, &dcrl->signatureOID, oidSigType, sz) < 0)
         return ASN_PARSE_E;
@@ -13714,15 +17394,28 @@
         return ASN_PARSE_E;
 
     /* openssl doesn't add skid by default for CRLs cause firefox chokes
-       we're not assuming it's available yet */
-#if !defined(NO_SKID) && defined(CRL_SKID_READY)
-    if (dcrl->extAuthKeyIdSet)
-        ca = GetCA(cm, dcrl->extAuthKeyId);
-    if (ca == NULL)
-        ca = GetCAByName(cm, dcrl->issuerHash);
+       if experiencing issues uncomment NO_SKID define in CRL section of
+       wolfssl/wolfcrypt/settings.h */
+#ifndef NO_SKID
+    if (dcrl->extAuthKeyIdSet) {
+        ca = GetCA(cm, dcrl->extAuthKeyId); /* more unique than issuerHash */
+    }
+    if (ca != NULL && XMEMCMP(dcrl->issuerHash, ca->subjectNameHash,
+                KEYID_SIZE) != 0) {
+        ca = NULL;
+    }
+    if (ca == NULL) {
+        ca = GetCAByName(cm, dcrl->issuerHash); /* last resort */
+        /* If AKID is available then this CA doesn't have the public
+         * key required */
+        if (ca && dcrl->extAuthKeyIdSet) {
+            WOLFSSL_MSG("CA SKID doesn't match AKID");
+            ca = NULL;
+        }
+    }
 #else
     ca = GetCA(cm, dcrl->issuerHash);
-#endif /* !NO_SKID && CRL_SKID_READY */
+#endif /* !NO_SKID */
     WOLFSSL_MSG("About to verify CRL signature");
 
     if (ca == NULL) {
@@ -13738,6 +17431,90 @@
 
 #endif /* HAVE_CRL */
 
+
+
+#ifdef WOLFSSL_CERT_PIV
+
+int wc_ParseCertPIV(wc_CertPIV* piv, const byte* buf, word32 totalSz)
+{
+    int length = 0;
+    word32 idx = 0;
+
+    WOLFSSL_ENTER("wc_ParseCertPIV");
+
+    if (piv == NULL || buf == NULL || totalSz == 0)
+        return BAD_FUNC_ARG;
+
+    XMEMSET(piv, 0, sizeof(wc_CertPIV));
+
+    /* Detect Identiv PIV (with 0x0A, 0x0B and 0x0C sections) */
+    /* Certificate (0A 82 05FA) */
+    if (GetASNHeader(buf, ASN_PIV_CERT, &idx, &length, totalSz) >= 0) {
+        /* Identiv Type PIV card */
+        piv->isIdentiv = 1;
+
+        piv->cert =   &buf[idx];
+        piv->certSz = length;
+        idx += length;
+
+        /* Nonce (0B 14) */
+        if (GetASNHeader(buf, ASN_PIV_NONCE, &idx, &length, totalSz) >= 0) {
+            piv->nonce =   &buf[idx];
+            piv->nonceSz = length;
+            idx += length;
+        }
+
+        /* Signed Nonce (0C 82 0100) */
+        if (GetASNHeader(buf, ASN_PIV_SIGNED_NONCE, &idx, &length, totalSz) >= 0) {
+            piv->signedNonce =   &buf[idx];
+            piv->signedNonceSz = length;
+        }
+
+        idx = 0;
+        buf = piv->cert;
+        totalSz = piv->certSz;
+    }
+
+    /* Certificate Buffer Total Size (53 82 05F6) */
+    if (GetASNHeader(buf, ASN_APPLICATION | ASN_PRINTABLE_STRING, &idx,
+                                                   &length, totalSz) < 0) {
+        return ASN_PARSE_E;
+    }
+    /* PIV Certificate (70 82 05ED) */
+    if (GetASNHeader(buf, ASN_PIV_TAG_CERT, &idx, &length,
+                                                         totalSz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    /* Capture certificate buffer pointer and length */
+    piv->cert =   &buf[idx];
+    piv->certSz = length;
+    idx += length;
+
+    /* PIV Certificate Info (71 01 00) */
+    if (GetASNHeader(buf, ASN_PIV_TAG_CERT_INFO, &idx, &length,
+                                                        totalSz) >= 0) {
+        if (length >= 1) {
+            piv->compression = (buf[idx] & ASN_PIV_CERT_INFO_COMPRESSED);
+            piv->isX509 =      (buf[idx] & ASN_PIV_CERT_INFO_ISX509);
+        }
+        idx += length;
+    }
+
+    /* PIV Error Detection (FE 00) */
+    if (GetASNHeader(buf, ASN_PIV_TAG_ERR_DET, &idx, &length,
+                                                        totalSz) >= 0) {
+        piv->certErrDet =   &buf[idx];
+        piv->certErrDetSz = length;
+        idx += length;
+    }
+
+    return 0;
+}
+
+#endif /* WOLFSSL_CERT_PIV */
+
+
 #undef ERROR_OUT
 
 #endif /* !NO_ASN */
--- a/wolfcrypt/src/blake2b.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/blake2b.c	Thu Jun 04 23:57:22 2020 +0000
@@ -12,7 +12,7 @@
 */
 /* blake2b.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -317,8 +317,7 @@
     {
       XMEMCPY( S->buf + left, in, (wolfssl_word)inlen );
       S->buflen += inlen; /* Be lazy, do not compress */
-      in += inlen;
-      inlen -= inlen;
+      inlen = 0;
     }
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/blake2s.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,447 @@
+/*
+   BLAKE2 reference source code package - reference C implementations
+
+   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+/* blake2s.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_BLAKE2S
+
+#include <wolfssl/wolfcrypt/blake2.h>
+#include <wolfssl/wolfcrypt/blake2-impl.h>
+
+
+static const word32 blake2s_IV[8] =
+{
+  0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+  0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+static const byte blake2s_sigma[10][16] =
+{
+  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
+  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
+  { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
+  {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
+  {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
+  {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
+  { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
+  { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
+  {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
+  { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 }
+};
+
+
+static WC_INLINE int blake2s_set_lastnode( blake2s_state *S )
+{
+  S->f[1] = ~0;
+  return 0;
+}
+
+/* Some helper functions, not necessarily useful */
+static WC_INLINE int blake2s_set_lastblock( blake2s_state *S )
+{
+  if( S->last_node ) blake2s_set_lastnode( S );
+
+  S->f[0] = ~0;
+  return 0;
+}
+
+static WC_INLINE int blake2s_increment_counter( blake2s_state *S, const word32
+                                             inc )
+{
+  S->t[0] += inc;
+  S->t[1] += ( S->t[0] < inc );
+  return 0;
+}
+
+static WC_INLINE int blake2s_init0( blake2s_state *S )
+{
+  int i;
+  XMEMSET( S, 0, sizeof( blake2s_state ) );
+
+  for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
+
+  return 0;
+}
+
+/* init xors IV with input parameter block */
+int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
+{
+  word32 i;
+  byte *p ;
+  blake2s_init0( S );
+  p =  ( byte * )( P );
+
+  /* IV XOR ParamBlock */
+  for( i = 0; i < 8; ++i )
+    S->h[i] ^= load32( p + sizeof( S->h[i] ) * i );
+
+  return 0;
+}
+
+
+
+int blake2s_init( blake2s_state *S, const byte outlen )
+{
+  blake2s_param P[1];
+
+  if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
+
+#ifdef WOLFSSL_BLAKE2S_INIT_EACH_FIELD
+  P->digest_length = outlen;
+  P->key_length    = 0;
+  P->fanout        = 1;
+  P->depth         = 1;
+  store32( &P->leaf_length, 0 );
+  store32( &P->node_offset, 0 );
+  P->node_depth    = 0;
+  P->inner_length  = 0;
+  XMEMSET( P->reserved, 0, sizeof( P->reserved ) );
+  XMEMSET( P->salt,     0, sizeof( P->salt ) );
+  XMEMSET( P->personal, 0, sizeof( P->personal ) );
+#else
+  XMEMSET( P, 0, sizeof( *P ) );
+  P->digest_length = outlen;
+  P->fanout        = 1;
+  P->depth         = 1;
+#endif
+  return blake2s_init_param( S, P );
+}
+
+
+int blake2s_init_key( blake2s_state *S, const byte outlen, const void *key,
+                      const byte keylen )
+{
+  blake2s_param P[1];
+
+  if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
+
+  if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
+
+#ifdef WOLFSSL_BLAKE2S_INIT_EACH_FIELD
+  P->digest_length = outlen;
+  P->key_length    = keylen;
+  P->fanout        = 1;
+  P->depth         = 1;
+  store32( &P->leaf_length, 0 );
+  store64( &P->node_offset, 0 );
+  P->node_depth    = 0;
+  P->inner_length  = 0;
+  XMEMSET( P->reserved, 0, sizeof( P->reserved ) );
+  XMEMSET( P->salt,     0, sizeof( P->salt ) );
+  XMEMSET( P->personal, 0, sizeof( P->personal ) );
+#else
+  XMEMSET( P, 0, sizeof( *P ) );
+  P->digest_length = outlen;
+  P->key_length    = keylen;
+  P->fanout        = 1;
+  P->depth         = 1;
+#endif
+
+  if( blake2s_init_param( S, P ) < 0 ) return -1;
+
+  {
+#ifdef WOLFSSL_SMALL_STACK
+    byte* block;
+
+    block = (byte*)XMALLOC(BLAKE2S_BLOCKBYTES, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+    if ( block == NULL ) return -1;
+#else
+    byte block[BLAKE2S_BLOCKBYTES];
+#endif
+
+    XMEMSET( block, 0, BLAKE2S_BLOCKBYTES );
+    XMEMCPY( block, key, keylen );
+    blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
+    secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from */
+                                                     /* memory */
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(block, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+  }
+  return 0;
+}
+
+static int blake2s_compress( blake2s_state *S,
+                             const byte block[BLAKE2S_BLOCKBYTES] )
+{
+  int i;
+
+#ifdef WOLFSSL_SMALL_STACK
+  word32* m;
+  word32* v;
+
+  m = (word32*)XMALLOC(sizeof(word32) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+  if ( m == NULL ) return -1;
+
+  v = (word32*)XMALLOC(sizeof(word32) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+  if ( v == NULL )
+  {
+    XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    return -1;
+  }
+#else
+  word32 m[16];
+  word32 v[16];
+#endif
+
+  for( i = 0; i < 16; ++i )
+    m[i] = load32( block + i * sizeof( m[i] ) );
+
+  for( i = 0; i < 8; ++i )
+    v[i] = S->h[i];
+
+  v[ 8] = blake2s_IV[0];
+  v[ 9] = blake2s_IV[1];
+  v[10] = blake2s_IV[2];
+  v[11] = blake2s_IV[3];
+  v[12] = S->t[0] ^ blake2s_IV[4];
+  v[13] = S->t[1] ^ blake2s_IV[5];
+  v[14] = S->f[0] ^ blake2s_IV[6];
+  v[15] = S->f[1] ^ blake2s_IV[7];
+#define G(r,i,a,b,c,d) \
+  do { \
+    a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+    d = rotr32(d ^ a, 16); \
+    c = c + d; \
+    b = rotr32(b ^ c, 12); \
+    a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+    d = rotr32(d ^ a, 8); \
+    c = c + d; \
+    b = rotr32(b ^ c, 7); \
+  } while(0)
+#define ROUND(r)  \
+  do { \
+    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+    G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+  } while(0)
+  ROUND( 0 );
+  ROUND( 1 );
+  ROUND( 2 );
+  ROUND( 3 );
+  ROUND( 4 );
+  ROUND( 5 );
+  ROUND( 6 );
+  ROUND( 7 );
+  ROUND( 8 );
+  ROUND( 9 );
+
+  for( i = 0; i < 8; ++i )
+    S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+
+#undef G
+#undef ROUND
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+  return 0;
+}
+
+/* inlen now in bytes */
+int blake2s_update( blake2s_state *S, const byte *in, word32 inlen )
+{
+  while( inlen > 0 )
+  {
+    word32 left = S->buflen;
+    word32 fill = 2 * BLAKE2S_BLOCKBYTES - left;
+
+    if( inlen > fill )
+    {
+      XMEMCPY( S->buf + left, in, (wolfssl_word)fill ); /* Fill buffer */
+      S->buflen += fill;
+      blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
+
+      if ( blake2s_compress( S, S->buf ) < 0 ) return -1; /* Compress */
+
+      XMEMCPY( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
+              /* Shift buffer left */
+      S->buflen -= BLAKE2S_BLOCKBYTES;
+      in += fill;
+      inlen -= fill;
+    }
+    else /* inlen <= fill */
+    {
+      XMEMCPY( S->buf + left, in, (wolfssl_word)inlen );
+      S->buflen += inlen; /* Be lazy, do not compress */
+      inlen = 0;
+    }
+  }
+
+  return 0;
+}
+
+/* Is this correct? */
+int blake2s_final( blake2s_state *S, byte *out, byte outlen )
+{
+  int     i;
+  byte buffer[BLAKE2S_BLOCKBYTES];
+
+  if( S->buflen > BLAKE2S_BLOCKBYTES )
+  {
+    blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
+
+    if ( blake2s_compress( S, S->buf ) < 0 ) return -1;
+
+    S->buflen -= BLAKE2S_BLOCKBYTES;
+    XMEMCPY( S->buf, S->buf + BLAKE2S_BLOCKBYTES, (wolfssl_word)S->buflen );
+  }
+
+  blake2s_increment_counter( S, S->buflen );
+  blake2s_set_lastblock( S );
+  XMEMSET( S->buf + S->buflen, 0, (wolfssl_word)(2 * BLAKE2S_BLOCKBYTES - S->buflen) );
+         /* Padding */
+  if ( blake2s_compress( S, S->buf ) < 0 ) return -1;
+
+  for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
+    store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
+
+  XMEMCPY( out, buffer, outlen );
+  return 0;
+}
+
+/* inlen, at least, should be word32. Others can be size_t. */
+int blake2s( byte *out, const void *in, const void *key, const byte outlen,
+             const word32 inlen, byte keylen )
+{
+  blake2s_state S[1];
+
+  /* Verify parameters */
+  if ( NULL == in ) return -1;
+
+  if ( NULL == out ) return -1;
+
+  if( NULL == key ) keylen = 0;
+
+  if( keylen > 0 )
+  {
+    if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
+  }
+  else
+  {
+    if( blake2s_init( S, outlen ) < 0 ) return -1;
+  }
+
+  if ( blake2s_update( S, ( byte * )in, inlen ) < 0) return -1;
+
+  return blake2s_final( S, out, outlen );
+}
+
+#if defined(BLAKE2S_SELFTEST)
+#include <string.h>
+#include "blake2-kat.h"
+int main( int argc, char **argv )
+{
+  byte key[BLAKE2S_KEYBYTES];
+  byte buf[KAT_LENGTH];
+
+  for( word32 i = 0; i < BLAKE2S_KEYBYTES; ++i )
+    key[i] = ( byte )i;
+
+  for( word32 i = 0; i < KAT_LENGTH; ++i )
+    buf[i] = ( byte )i;
+
+  for( word32 i = 0; i < KAT_LENGTH; ++i )
+  {
+    byte hash[BLAKE2S_OUTBYTES];
+    if ( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 )
+    {
+      puts( "error" );
+      return -1;
+    }
+
+    if( 0 != XMEMCMP( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
+    {
+      puts( "error" );
+      return -1;
+    }
+  }
+
+  puts( "ok" );
+  return 0;
+}
+#endif
+
+
+/* wolfCrypt API */
+
+/* Init Blake2s digest, track size in case final doesn't want to "remember" */
+int wc_InitBlake2s(Blake2s* b2s, word32 digestSz)
+{
+    if (b2s == NULL){
+        return -1;
+    }
+    b2s->digestSz = digestSz;
+
+    return blake2s_init(b2s->S, (byte)digestSz);
+}
+
+
+/* Blake2s Update */
+int wc_Blake2sUpdate(Blake2s* b2s, const byte* data, word32 sz)
+{
+    return blake2s_update(b2s->S, data, sz);
+}
+
+
+/* Blake2s Final, if pass in zero size we use init digestSz */
+int wc_Blake2sFinal(Blake2s* b2s, byte* final, word32 requestSz)
+{
+    word32 sz = requestSz ? requestSz : b2s->digestSz;
+
+    return blake2s_final(b2s->S, final, (byte)sz);
+}
+
+
+/* end CTaoCrypt API */
+
+#endif  /* HAVE_BLAKE2S */
+
+
--- a/wolfcrypt/src/camellia.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/camellia.c	Thu Jun 04 23:57:22 2020 +0000
@@ -27,7 +27,7 @@
 
 /* camellia.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/chacha.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/chacha.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* chacha.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -17,7 +17,9 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
- *
+ */
+
+/*
  *  based from
  *  chacha-ref.c version 20080118
  *  D. J. Bernstein
@@ -25,14 +27,17 @@
  */
 
 
+#ifdef WOLFSSL_ARMASM
+    /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
 
+#else
 #ifdef HAVE_CONFIG_H
     #include <config.h>
 #endif
 
 #include <wolfssl/wolfcrypt/settings.h>
 
-#ifdef HAVE_CHACHA
+#if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM)
 
 #include <wolfssl/wolfcrypt/chacha.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
@@ -55,10 +60,12 @@
 
     #if defined(__GNUC__) && ((__GNUC__ < 4) || \
                               (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+        #undef  NO_AVX2_SUPPORT
         #define NO_AVX2_SUPPORT
     #endif
     #if defined(__clang__) && ((__clang_major__ < 3) || \
                                (__clang_major__ == 3 && __clang_minor__ <= 5))
+        #undef  NO_AVX2_SUPPORT
         #define NO_AVX2_SUPPORT
     #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
         #undef NO_AVX2_SUPPORT
@@ -68,14 +75,6 @@
         #define HAVE_INTEL_AVX2
     #endif
 
-    #if defined(_MSC_VER)
-        #define CHACHA20_NOINLINE __declspec(noinline)
-    #elif defined(__GNUC__)
-        #define CHACHA20_NOINLINE __attribute__((noinline))
-    #else
-        #define CHACHA20_NOINLINE
-    #endif
-
     static int cpuidFlagsSet = 0;
     static int cpuidFlags = 0;
 #endif
@@ -105,182 +104,6 @@
   x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
 
 
-
-#define QUARTERROUND_INTEL_ASM(a0,b0,c0,d0,   \
-                               a1,b1,c1,d1,   \
-                               a2,b2,c2,d2,   \
-                               a3,b3,c3,d3,   \
-                               t1,o1)         \
-       "vpaddd	"#b0", "#a0", "#a0"\n\t"      \
-       "vpxor	"#a0", "#d0", "#d0"\n\t"      \
-       "vmovdqa	"#o1"(%[x]), "#c3"\n\t"       \
-       "vpshufb	%[rotl16], "#d0", "#d0"\n\t"  \
-       "vpaddd	"#d0", "#c0", "#c0"\n\t"      \
-       "vpxor	"#c0", "#b0", "#b0"\n\t"      \
-       "vpaddd	"#b1", "#a1", "#a1"\n\t"      \
-       "vpxor	"#a1", "#d1", "#d1"\n\t"      \
-       "vpshufb	%[rotl16], "#d1", "#d1"\n\t"  \
-       "vpaddd	"#d1", "#c1", "#c1"\n\t"      \
-       "vpxor	"#c1", "#b1", "#b1"\n\t"      \
-       "vpaddd	"#b2", "#a2", "#a2"\n\t"      \
-       "vpxor	"#a2", "#d2", "#d2"\n\t"      \
-       "vpshufb	%[rotl16], "#d2", "#d2"\n\t"  \
-       "vpaddd	"#d2", "#c2", "#c2"\n\t"      \
-       "vpxor	"#c2", "#b2", "#b2"\n\t"      \
-       "vpaddd	"#b3", "#a3", "#a3"\n\t"      \
-       "vpxor	"#a3", "#d3", "#d3"\n\t"      \
-       "vpshufb	%[rotl16], "#d3", "#d3"\n\t"  \
-       "vpaddd  "#d3", "#c3", "#c3"\n\t"      \
-       "vpxor	"#c3", "#b3", "#b3"\n\t"      \
-       "vmovdqa	"#c3", "#o1"(%[x])\n\t"       \
-       "vpsrld	$20, "#b0", "#t1"\n\t"        \
-       "vpslld	$12, "#b0", "#b0"\n\t"        \
-       "vpxor	"#t1", "#b0", "#b0"\n\t"      \
-       "vpsrld	$20, "#b1", "#t1"\n\t"        \
-       "vpslld	$12, "#b1", "#b1"\n\t"        \
-       "vpxor	"#t1", "#b1", "#b1"\n\t"      \
-       "vpsrld	$20, "#b2", "#t1"\n\t"        \
-       "vpslld	$12, "#b2", "#b2"\n\t"        \
-       "vpxor	"#t1", "#b2", "#b2"\n\t"      \
-       "vpsrld	$20, "#b3", "#t1"\n\t"        \
-       "vpslld	$12, "#b3", "#b3"\n\t"        \
-       "vpxor	"#t1", "#b3", "#b3"\n\t"      \
-       "vpaddd	"#b0", "#a0", "#a0"\n\t"      \
-       "vpxor	"#a0", "#d0", "#d0"\n\t"      \
-       "vmovdqa	"#o1"(%[x]), "#c3"\n\t"       \
-       "vpshufb	%[rotl8], "#d0", "#d0"\n\t"   \
-       "vpaddd	"#d0", "#c0", "#c0"\n\t"      \
-       "vpxor	"#c0", "#b0", "#b0"\n\t"      \
-       "vpaddd	"#b1", "#a1", "#a1"\n\t"      \
-       "vpxor	"#a1", "#d1", "#d1"\n\t"      \
-       "vpshufb	%[rotl8], "#d1", "#d1"\n\t"   \
-       "vpaddd	"#d1", "#c1", "#c1"\n\t"      \
-       "vpxor	"#c1", "#b1", "#b1"\n\t"      \
-       "vpaddd	"#b2", "#a2", "#a2"\n\t"      \
-       "vpxor	"#a2", "#d2", "#d2"\n\t"      \
-       "vpshufb	%[rotl8], "#d2", "#d2"\n\t"   \
-       "vpaddd	"#d2", "#c2", "#c2"\n\t"      \
-       "vpxor	"#c2", "#b2", "#b2"\n\t"      \
-       "vpaddd	"#b3", "#a3", "#a3"\n\t"      \
-       "vpxor	"#a3", "#d3", "#d3"\n\t"      \
-       "vpshufb	%[rotl8], "#d3", "#d3"\n\t"   \
-       "vpaddd	"#d3", "#c3", "#c3"\n\t"      \
-       "vpxor	"#c3", "#b3", "#b3"\n\t"      \
-       "vmovdqa	"#c3", "#o1"(%[x])\n\t"       \
-       "vpsrld	$25, "#b0", "#t1"\n\t"        \
-       "vpslld	 $7, "#b0", "#b0"\n\t"        \
-       "vpxor	"#t1", "#b0", "#b0"\n\t"      \
-       "vpsrld	$25, "#b1", "#t1"\n\t"        \
-       "vpslld	 $7, "#b1", "#b1"\n\t"        \
-       "vpxor	"#t1", "#b1", "#b1"\n\t"      \
-       "vpsrld	$25, "#b2", "#t1"\n\t"        \
-       "vpslld	 $7, "#b2", "#b2"\n\t"        \
-       "vpxor	"#t1", "#b2", "#b2"\n\t"      \
-       "vpsrld	$25, "#b3", "#t1"\n\t"        \
-       "vpslld	 $7, "#b3", "#b3"\n\t"        \
-       "vpxor	"#t1", "#b3", "#b3"\n\t"
-
-#define QUARTERROUND_INTEL_ASM_2(a0,b0,c0,d0, \
-                                 a1,b1,c1,d1, \
-                                 a2,b2,c2,d2, \
-                                 a3,b3,c3,d3, \
-                                 t1,o1)       \
-       "vpaddd	"#b0", "#a0", "#a0"\n\t"      \
-       "vpxor	"#a0", "#d0", "#d0"\n\t"      \
-       "vmovdqa	"#o1"(%[x]), "#c1"\n\t"       \
-       "vpshufb	%[rotl16], "#d0", "#d0"\n\t"  \
-       "vpaddd	"#d0", "#c0", "#c0"\n\t"      \
-       "vpxor	"#c0", "#b0", "#b0"\n\t"      \
-       "vpaddd	"#b1", "#a1", "#a1"\n\t"      \
-       "vpxor	"#a1", "#d1", "#d1"\n\t"      \
-       "vpshufb	%[rotl16], "#d1", "#d1"\n\t"  \
-       "vpaddd	"#d1", "#c1", "#c1"\n\t"      \
-       "vpxor	"#c1", "#b1", "#b1"\n\t"      \
-       "vpaddd	"#b2", "#a2", "#a2"\n\t"      \
-       "vpxor	"#a2", "#d2", "#d2"\n\t"      \
-       "vpshufb	%[rotl16], "#d2", "#d2"\n\t"  \
-       "vpaddd	"#d2", "#c2", "#c2"\n\t"      \
-       "vpxor	"#c2", "#b2", "#b2"\n\t"      \
-       "vpaddd	"#b3", "#a3", "#a3"\n\t"      \
-       "vpxor	"#a3", "#d3", "#d3"\n\t"      \
-       "vpshufb	%[rotl16], "#d3", "#d3"\n\t"  \
-       "vpaddd	"#d3", "#c3", "#c3"\n\t"      \
-       "vpxor	"#c3", "#b3", "#b3"\n\t"      \
-       "vmovdqa	"#c1", "#o1"(%[x])\n\t"       \
-       "vpsrld	$20, "#b0", "#t1"\n\t"        \
-       "vpslld	$12, "#b0", "#b0"\n\t"        \
-       "vpxor	"#t1", "#b0", "#b0"\n\t"      \
-       "vpsrld	$20, "#b1", "#t1"\n\t"        \
-       "vpslld	$12, "#b1", "#b1"\n\t"        \
-       "vpxor	"#t1", "#b1", "#b1"\n\t"      \
-       "vpsrld	$20, "#b2", "#t1"\n\t"        \
-       "vpslld	$12, "#b2", "#b2"\n\t"        \
-       "vpxor	"#t1", "#b2", "#b2"\n\t"      \
-       "vpsrld	$20, "#b3", "#t1"\n\t"        \
-       "vpslld	$12, "#b3", "#b3"\n\t"        \
-       "vpxor	"#t1", "#b3", "#b3"\n\t"      \
-       "vpaddd	"#b0", "#a0", "#a0"\n\t"      \
-       "vpxor	"#a0", "#d0", "#d0"\n\t"      \
-       "vmovdqa	"#o1"(%[x]), "#c1"\n\t"       \
-       "vpshufb	%[rotl8], "#d0", "#d0"\n\t"   \
-       "vpaddd	"#d0", "#c0", "#c0"\n\t"      \
-       "vpxor	"#c0", "#b0", "#b0"\n\t"      \
-       "vpaddd	"#b1", "#a1", "#a1"\n\t"      \
-       "vpxor	"#a1", "#d1", "#d1"\n\t"      \
-       "vpshufb	%[rotl8], "#d1", "#d1"\n\t"   \
-       "vpaddd	"#d1", "#c1", "#c1"\n\t"      \
-       "vpxor	"#c1", "#b1", "#b1"\n\t"      \
-       "vpaddd	"#b2", "#a2", "#a2"\n\t"      \
-       "vpxor	"#a2", "#d2", "#d2"\n\t"      \
-       "vpshufb	%[rotl8], "#d2", "#d2"\n\t"   \
-       "vpaddd	"#d2", "#c2", "#c2"\n\t"      \
-       "vpxor	"#c2", "#b2", "#b2"\n\t"      \
-       "vpaddd	"#b3", "#a3", "#a3"\n\t"      \
-       "vpxor	"#a3", "#d3", "#d3"\n\t"      \
-       "vpshufb	%[rotl8], "#d3", "#d3"\n\t"   \
-       "vpaddd	"#d3", "#c3", "#c3"\n\t"      \
-       "vpxor	"#c3", "#b3", "#b3"\n\t"      \
-       "vmovdqa	"#c1", "#o1"(%[x])\n\t"       \
-       "vpsrld	$25, "#b0", "#t1"\n\t"        \
-       "vpslld	 $7, "#b0", "#b0"\n\t"        \
-       "vpxor	"#t1", "#b0", "#b0"\n\t"      \
-       "vpsrld	$25, "#b1", "#t1"\n\t"        \
-       "vpslld	 $7, "#b1", "#b1"\n\t"        \
-       "vpxor	"#t1", "#b1", "#b1"\n\t"      \
-       "vpsrld	$25, "#b2", "#t1"\n\t"        \
-       "vpslld	 $7, "#b2", "#b2"\n\t"        \
-       "vpxor	"#t1", "#b2", "#b2"\n\t"      \
-       "vpsrld	$25, "#b3", "#t1"\n\t"        \
-       "vpslld	 $7, "#b3", "#b3"\n\t"        \
-       "vpxor	"#t1", "#b3", "#b3"\n\t"
-
-
-#define QUARTERROUND_XMM()                                      \
-        QUARTERROUND_INTEL_ASM(%%xmm0,%%xmm4,%%xmm8,%%xmm12,    \
-                               %%xmm1,%%xmm5,%%xmm9,%%xmm13,    \
-                               %%xmm2,%%xmm6,%%xmm10,%%xmm14,   \
-                               %%xmm3,%%xmm7,%%xmm11,%%xmm15,   \
-                               %%xmm11,48)
-#define QUARTERROUND_XMM_2()                                    \
-        QUARTERROUND_INTEL_ASM_2(%%xmm0,%%xmm5,%%xmm10,%%xmm15, \
-                                 %%xmm1,%%xmm6,%%xmm11,%%xmm12, \
-                                 %%xmm2,%%xmm7,%%xmm8,%%xmm13,  \
-                                 %%xmm3,%%xmm4,%%xmm9,%%xmm14,  \
-                                 %%xmm11,48)
-
-#define QUARTERROUND_YMM()                                      \
-        QUARTERROUND_INTEL_ASM(%%ymm0,%%ymm4,%%ymm8,%%ymm12,    \
-                               %%ymm1,%%ymm5,%%ymm9,%%ymm13,    \
-                               %%ymm2,%%ymm6,%%ymm10,%%ymm14,   \
-                               %%ymm3,%%ymm7,%%ymm11,%%ymm15,   \
-                               %%ymm11,96)
-#define QUARTERROUND_YMM_2()                                    \
-        QUARTERROUND_INTEL_ASM_2(%%ymm0,%%ymm5,%%ymm10,%%ymm15, \
-                                 %%ymm1,%%ymm6,%%ymm11,%%ymm12, \
-                                 %%ymm2,%%ymm7,%%ymm8,%%ymm13,  \
-                                 %%ymm3,%%ymm4,%%ymm9,%%ymm14,  \
-                                 %%ymm11,96)
-
 /**
   * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
   * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
@@ -303,6 +126,7 @@
 
     XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
 
+    ctx->left = 0; /* resets state */
     ctx->X[CHACHA_IV_BYTES+0] = counter;           /* block counter */
     ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
     ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */
@@ -377,6 +201,7 @@
     ctx->X[ 1] = constants[1];
     ctx->X[ 2] = constants[2];
     ctx->X[ 3] = constants[3];
+    ctx->left = 0; /* resets state */
 
     return 0;
 }
@@ -414,1037 +239,21 @@
     }
 }
 
-
-#ifdef USE_INTEL_CHACHA_SPEEDUP
-
-#define QUARTERROUND_2_X64(r11, r12, r13, r14, r21, r22, r23, r24) \
-        "addl	"#r12", "#r11"\n\t"                                \
-        "addl	"#r22", "#r21"\n\t"                                \
-        "xorl	"#r11", "#r14"\n\t"                                \
-        "xorl	"#r21", "#r24"\n\t"                                \
-        "roll	$16, "#r14"\n\t"                                   \
-        "roll	$16, "#r24"\n\t"                                   \
-        "addl	"#r14", "#r13"\n\t"                                \
-        "addl	"#r24", "#r23"\n\t"                                \
-        "xorl	"#r13", "#r12"\n\t"                                \
-        "xorl	"#r23", "#r22"\n\t"                                \
-        "roll	$12, "#r12"\n\t"                                   \
-        "roll	$12, "#r22"\n\t"                                   \
-        "addl	"#r12", "#r11"\n\t"                                \
-        "addl	"#r22", "#r21"\n\t"                                \
-        "xorl	"#r11", "#r14"\n\t"                                \
-        "xorl	"#r21", "#r24"\n\t"                                \
-        "roll	$8, "#r14"\n\t"                                    \
-        "roll	$8, "#r24"\n\t"                                    \
-        "addl	"#r14", "#r13"\n\t"                                \
-        "addl	"#r24", "#r23"\n\t"                                \
-        "xorl	"#r13", "#r12"\n\t"                                \
-        "xorl	"#r23", "#r22"\n\t"                                \
-        "roll	$7, "#r12"\n\t"                                    \
-        "roll	$7, "#r22"\n\t"                                    \
-
-#define CHACHA_CRYPT_X64()                                                     \
-        "subq	$40, %%rsp\n\t"                                                \
-        "movq	32(%[input]), %%rax\n\t"                                       \
-        "movq	40(%[input]), %%rdx\n\t"                                       \
-        "movq	%%rax,  8(%%rsp)\n\t"                                          \
-        "movq	%%rdx, 16(%%rsp)\n\t"                                          \
-        "movl	 0(%[input]), %%eax\n\t"                                       \
-        "movl	 4(%[input]), %%ebx\n\t"                                       \
-        "movl	 8(%[input]), %%ecx\n\t"                                       \
-        "movl	12(%[input]), %%edx\n\t"                                       \
-        "movl	16(%[input]), %%r8d\n\t"                                       \
-        "movl	20(%[input]), %%r9d\n\t"                                       \
-        "movl	24(%[input]), %%r10d\n\t"                                      \
-        "movl	28(%[input]), %%r11d\n\t"                                      \
-        "movl	48(%[input]), %%r12d\n\t"                                      \
-        "movl	52(%[input]), %%r13d\n\t"                                      \
-        "movl	56(%[input]), %%r14d\n\t"                                      \
-        "movl	60(%[input]), %%r15d\n\t"                                      \
-        "movb	$10, (%%rsp)\n\t"                                              \
-        "movq	%%rsi, 32(%%rsp)\n\t"                                          \
-        "movq	%%rdi, 24(%%rsp)\n\t"                                          \
-        "movl	 8(%%rsp), %%esi\n\t"                                          \
-        "movl	12(%%rsp), %%edi\n\t"                                          \
-        "\n"                                                                   \
-        "1:\n\t"                                                               \
-        QUARTERROUND_2_X64(%%eax,  %%r8d, %%esi, %%r12d,                       \
-                           %%ebx,  %%r9d, %%edi, %%r13d)                       \
-        "movl	%%esi,  8(%%rsp)\n\t"                                          \
-        "movl	%%edi, 12(%%rsp)\n\t"                                          \
-        "movl	16(%%rsp), %%esi\n\t"                                          \
-        "movl	20(%%rsp), %%edi\n\t"                                          \
-        QUARTERROUND_2_X64(%%ecx, %%r10d, %%esi, %%r14d,                       \
-                           %%edx, %%r11d, %%edi, %%r15d)                       \
-        QUARTERROUND_2_X64(%%eax,  %%r9d, %%esi, %%r15d,                       \
-                           %%ebx, %%r10d, %%edi, %%r12d)                       \
-        "movl	%%esi, 16(%%rsp)\n\t"                                          \
-        "movl	%%edi, 20(%%rsp)\n\t"                                          \
-        "movl	 8(%%rsp), %%esi\n\t"                                          \
-        "movl	12(%%rsp), %%edi\n\t"                                          \
-        QUARTERROUND_2_X64(%%ecx, %%r11d, %%esi, %%r13d,                       \
-                           %%edx,  %%r8d, %%edi, %%r14d)                       \
-        "decb	(%%rsp)\n\t"                                                   \
-        "jnz	1b\n\t"                                                        \
-        "movl	%%esi,  8(%%rsp)\n\t"                                          \
-        "movl	%%edi, 12(%%rsp)\n\t"                                          \
-        "movq	32(%%rsp), %%rsi\n\t"                                          \
-        "movq	24(%%rsp), %%rdi\n\t"                                          \
-        "addl	 0(%[input]), %%eax\n\t"                                       \
-        "addl	 4(%[input]), %%ebx\n\t"                                       \
-        "addl	 8(%[input]), %%ecx\n\t"                                       \
-        "addl	12(%[input]), %%edx\n\t"                                       \
-        "addl	16(%[input]), %%r8d\n\t"                                       \
-        "addl	20(%[input]), %%r9d\n\t"                                       \
-        "addl	24(%[input]), %%r10d\n\t"                                      \
-        "addl	28(%[input]), %%r11d\n\t"                                      \
-        "addl	48(%[input]), %%r12d\n\t"                                      \
-        "addl	52(%[input]), %%r13d\n\t"                                      \
-        "addl	56(%[input]), %%r14d\n\t"                                      \
-        "addl	60(%[input]), %%r15d\n\t"                                      \
-
-#define CHACHA_PARTIAL_CHUNK_X64()                                             \
-    __asm__ __volatile__ (                                                     \
-        CHACHA_CRYPT_X64()                                                     \
-        "movl	%%eax ,  0(%[c])\n\t"                                          \
-        "movl	%%ebx ,  4(%[c])\n\t"                                          \
-        "movl	%%ecx ,  8(%[c])\n\t"                                          \
-        "movl	%%edx , 12(%[c])\n\t"                                          \
-        "movl	%%r8d , 16(%[c])\n\t"                                          \
-        "movl	%%r9d , 20(%[c])\n\t"                                          \
-        "movl	%%r10d, 24(%[c])\n\t"                                          \
-        "movl	%%r11d, 28(%[c])\n\t"                                          \
-        "movl	%%r12d, 48(%[c])\n\t"                                          \
-        "movl	%%r13d, 52(%[c])\n\t"                                          \
-        "movl	%%r14d, 56(%[c])\n\t"                                          \
-        "movl	%%r15d, 60(%[c])\n\t"                                          \
-        "movl	 8(%%rsp), %%eax\n\t"                                          \
-        "movl	12(%%rsp), %%ebx\n\t"                                          \
-        "movl	16(%%rsp), %%ecx\n\t"                                          \
-        "movl	20(%%rsp), %%edx\n\t"                                          \
-        "addl	32(%[input]), %%eax\n\t"                                       \
-        "addl	36(%[input]), %%ebx\n\t"                                       \
-        "addl	40(%[input]), %%ecx\n\t"                                       \
-        "addl	44(%[input]), %%edx\n\t"                                       \
-        "movl	%%eax , 32(%[c])\n\t"                                          \
-        "movl	%%ebx , 36(%[c])\n\t"                                          \
-        "movl	%%ecx , 40(%[c])\n\t"                                          \
-        "movl	%%edx , 44(%[c])\n\t"                                          \
-        "addl	$1, 48(%[input])\n\t"                                          \
-        "addq	$40, %%rsp\n\t"                                                \
-        "movq	%[output], %%rax\n\t"                                          \
-        "movq	%[m], %%rbx\n\t"                                               \
-        "movl	%[bytes], %%r8d\n\t"                                           \
-        "xorq	%%rdx, %%rdx\n\t"                                              \
-        "movl	%%r8d, %%r9d\n\t"                                              \
-        "andl	$7, %%r9d\n\t"                                                 \
-        "jz	4f\n\t"                                                        \
-        "\n"                                                                   \
-        "2:\n\t"                                                               \
-        "movzbl	(%[c],%%rdx,1), %%ecx\n\t"                                     \
-        "xorb	(%%rbx,%%rdx,1), %%cl\n\t"                                     \
-        "movb	%%cl, (%%rax,%%rdx,1)\n\t"                                     \
-        "incl	%%edx\n\t"                                                     \
-        "cmpl	%%r9d, %%edx\n\t"                                              \
-        "jne	2b\n\t"                                                        \
-        "je	3f\n\t"                                                        \
-        "\n"                                                                   \
-        "4:\n\t"                                                               \
-        "movq	(%[c],%%rdx,1), %%rcx\n\t"                                     \
-        "xorq	(%%rbx,%%rdx,1), %%rcx\n\t"                                    \
-        "movq	%%rcx, (%%rax,%%rdx,1)\n\t"                                    \
-        "addl	$8, %%edx\n\t"                                                 \
-        "\n"                                                                   \
-        "3:\n\t"                                                               \
-        "cmpl	%%r8d, %%edx\n\t"                                              \
-        "jne	4b\n\t"                                                        \
-        :                                                                      \
-        : [input] "r" (ctx->X), [c] "r" (x),                                   \
-          [output] "m" (c), [bytes] "m" (bytes), [m] "m" (m)                   \
-        : "eax", "ebx", "ecx", "edx", "r8", "r9", "r10", "r11", "r12", "r13",  \
-          "r14", "r15", "memory"                                               \
-    )
-
-
-#define CHACHA_CHUNK_X64()                                                     \
-    __asm__ __volatile__ (                                                     \
-        CHACHA_CRYPT_X64()                                                     \
-        "movq	%%rsi, 32(%%rsp)\n\t"                                          \
-        "addq	$40, %%rsp\n\t"                                                \
-        "movq	%[m], %%rsi\n\t"                                               \
-        "subq	$40, %%rsp\n\t"                                                \
-        "xorl	 0(%%rsi), %%eax\n\t"                                          \
-        "xorl	 4(%%rsi), %%ebx\n\t"                                          \
-        "xorl	 8(%%rsi), %%ecx\n\t"                                          \
-        "xorl	12(%%rsi), %%edx\n\t"                                          \
-        "xorl	16(%%rsi), %%r8d\n\t"                                          \
-        "xorl	20(%%rsi), %%r9d\n\t"                                          \
-        "xorl	24(%%rsi), %%r10d\n\t"                                         \
-        "xorl	28(%%rsi), %%r11d\n\t"                                         \
-        "xorl	48(%%rsi), %%r12d\n\t"                                         \
-        "xorl	52(%%rsi), %%r13d\n\t"                                         \
-        "xorl	56(%%rsi), %%r14d\n\t"                                         \
-        "xorl	60(%%rsi), %%r15d\n\t"                                         \
-        "movq	32(%%rsp), %%rsi\n\t"                                          \
-        "movl	%%eax ,  0(%[c])\n\t"                                          \
-        "movl	%%ebx ,  4(%[c])\n\t"                                          \
-        "movl	%%ecx ,  8(%[c])\n\t"                                          \
-        "movl	%%edx , 12(%[c])\n\t"                                          \
-        "movl	%%r8d , 16(%[c])\n\t"                                          \
-        "movl	%%r9d , 20(%[c])\n\t"                                          \
-        "movl	%%r10d, 24(%[c])\n\t"                                          \
-        "movl	%%r11d, 28(%[c])\n\t"                                          \
-        "movl	%%r12d, 48(%[c])\n\t"                                          \
-        "movl	%%r13d, 52(%[c])\n\t"                                          \
-        "movl	%%r14d, 56(%[c])\n\t"                                          \
-        "movl	%%r15d, 60(%[c])\n\t"                                          \
-        "addq	$40, %%rsp\n\t"                                                \
-        "movq	%[m], %%r8\n\t"                                                \
-        "subq	$40, %%rsp\n\t"                                                \
-        "movl	 8(%%rsp), %%eax\n\t"                                          \
-        "movl	12(%%rsp), %%ebx\n\t"                                          \
-        "movl	16(%%rsp), %%ecx\n\t"                                          \
-        "movl	20(%%rsp), %%edx\n\t"                                          \
-        "addl	32(%[input]), %%eax\n\t"                                       \
-        "addl	36(%[input]), %%ebx\n\t"                                       \
-        "addl	40(%[input]), %%ecx\n\t"                                       \
-        "addl	44(%[input]), %%edx\n\t"                                       \
-        "xorl	32(%%r8), %%eax\n\t"                                           \
-        "xorl	36(%%r8), %%ebx\n\t"                                           \
-        "xorl	40(%%r8), %%ecx\n\t"                                           \
-        "xorl	44(%%r8), %%edx\n\t"                                           \
-        "movl	%%eax , 32(%[c])\n\t"                                          \
-        "movl	%%ebx , 36(%[c])\n\t"                                          \
-        "movl	%%ecx , 40(%[c])\n\t"                                          \
-        "movl	%%edx , 44(%[c])\n\t"                                          \
-        "addl	$1, 48(%[input])\n\t"                                          \
-        "addq	$40, %%rsp\n\t"                                                \
-        :                                                                      \
-        : [input] "r" (ctx->X), [c] "r" (c), [m] "m" (m)                       \
-        : "eax", "ebx", "ecx", "edx", "r8", "r9", "r10", "r11", "r12", "r13",  \
-          "r14", "r15", "memory"                                               \
-    )
-
-
-static void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
-                               word32 bytes)
-{
-    word32 x[CHACHA_CHUNK_WORDS];
-
-    if (bytes == 0)
-        return;
-
-    for (; bytes >= CHACHA_CHUNK_BYTES;) {
-        CHACHA_CHUNK_X64();
-        bytes -= CHACHA_CHUNK_BYTES;
-        c += CHACHA_CHUNK_BYTES;
-        m += CHACHA_CHUNK_BYTES;
-    }
-    if (bytes > 0) {
-        CHACHA_PARTIAL_CHUNK_X64();
-    }
-}
-
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-static const __m128i rotl8 =  { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
-static const __m128i rotl16 = { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
-#endif /* HAVE_INTEL_AVX1 || HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX1
-#define QUARTERROUND_2_AVX()               \
-        "paddd	%%xmm1, %%xmm0\n\t"        \
-        "pxor	%%xmm0, %%xmm3\n\t"        \
-        "pshufb	%[rotl16], %%xmm3\n\t"     \
-        "paddd	%%xmm3, %%xmm2\n\t"        \
-        "pxor	%%xmm2, %%xmm1\n\t"        \
-        "movdqa	%%xmm1, %%xmm4\n\t"        \
-        "pslld	$12, %%xmm1\n\t"           \
-        "psrld	$20, %%xmm4\n\t"           \
-        "pxor	%%xmm4, %%xmm1\n\t"        \
-        "paddd	%%xmm1, %%xmm0\n\t"        \
-        "pxor	%%xmm0, %%xmm3\n\t"        \
-        "pshufb	%[rotl8], %%xmm3\n\t"      \
-        "paddd	%%xmm3, %%xmm2\n\t"        \
-        "pxor	%%xmm2, %%xmm1\n\t"        \
-        "movdqa	%%xmm1, %%xmm4\n\t"        \
-        "pslld	$7, %%xmm1\n\t"            \
-        "psrld	$25, %%xmm4\n\t"           \
-        "pxor	%%xmm4, %%xmm1\n\t"        \
-        "# Swap words for next round\n\t"  \
-        "pshufd	$0x39, %%xmm1, %%xmm1\n\t" \
-        "pshufd	$0x4e, %%xmm2, %%xmm2\n\t" \
-        "pshufd	$0x93, %%xmm3, %%xmm3\n\t" \
-        "paddd	%%xmm1, %%xmm0\n\t"        \
-        "pxor	%%xmm0, %%xmm3\n\t"        \
-        "pshufb	%[rotl16], %%xmm3\n\t"     \
-        "paddd	%%xmm3, %%xmm2\n\t"        \
-        "pxor	%%xmm2, %%xmm1\n\t"        \
-        "movdqa	%%xmm1, %%xmm4\n\t"        \
-        "pslld	$12, %%xmm1\n\t"           \
-        "psrld	$20, %%xmm4\n\t"           \
-        "pxor	%%xmm4, %%xmm1\n\t"        \
-        "paddd	%%xmm1, %%xmm0\n\t"        \
-        "pxor	%%xmm0, %%xmm3\n\t"        \
-        "pshufb	%[rotl8], %%xmm3\n\t"      \
-        "paddd	%%xmm3, %%xmm2\n\t"        \
-        "pxor	%%xmm2, %%xmm1\n\t"        \
-        "movdqa	%%xmm1, %%xmm4\n\t"        \
-        "pslld	$7, %%xmm1\n\t"            \
-        "psrld	$25, %%xmm4\n\t"           \
-        "pxor	%%xmm4, %%xmm1\n\t"        \
-        "# Swap words back\n\t"            \
-        "pshufd	$0x93, %%xmm1, %%xmm1\n\t" \
-        "pshufd	$0x4e, %%xmm2, %%xmm2\n\t" \
-        "pshufd	$0x39, %%xmm3, %%xmm3\n\t" \
-
-#define CHACHA_CRYPT_AVX()                                                     \
-        "movdqu	 0(%[input]), %%xmm0\n\t"                                      \
-        "movdqu	16(%[input]), %%xmm1\n\t"                                      \
-        "movdqu	32(%[input]), %%xmm2\n\t"                                      \
-        "movdqu	48(%[input]), %%xmm3\n\t"                                      \
-        "movb	$10, %%al\n\t"                                                 \
-        "\n"                                                                   \
-        "1:\n\t"                                                               \
-        QUARTERROUND_2_AVX()                                                   \
-        "decb	%%al\n\t"                                                      \
-        "jnz	1b\n\t"                                                        \
-        "movdqu	 0(%[input]), %%xmm4\n\t"                                      \
-        "movdqu	16(%[input]), %%xmm5\n\t"                                      \
-        "movdqu	32(%[input]), %%xmm6\n\t"                                      \
-        "movdqu	48(%[input]), %%xmm7\n\t"                                      \
-        "paddd	%%xmm4, %%xmm0\n\t"                                            \
-        "paddd	%%xmm5, %%xmm1\n\t"                                            \
-        "paddd	%%xmm6, %%xmm2\n\t"                                            \
-        "paddd	%%xmm7, %%xmm3\n\t"                                            \
-
-#define CHACHA_PARTIAL_CHUNK_AVX()                                             \
-    __asm__ __volatile__ (                                                     \
-        CHACHA_CRYPT_AVX()                                                     \
-        "movdqu	%%xmm0,  0(%[c])\n\t"                                          \
-        "movdqu	%%xmm1, 16(%[c])\n\t"                                          \
-        "movdqu	%%xmm2, 32(%[c])\n\t"                                          \
-        "movdqu	%%xmm3, 48(%[c])\n\t"                                          \
-        "addl	$1, 48(%[input])\n\t"                                          \
-        "movl	%[bytes], %%r8d\n\t"                                           \
-        "xorq	%%rdx, %%rdx\n\t"                                              \
-        "movl	%%r8d, %%r9d\n\t"                                              \
-        "andl	$7, %%r9d\n\t"                                                 \
-        "jz	4f\n\t"                                                        \
-        "\n"                                                                   \
-        "2:\n\t"                                                               \
-        "movzbl	(%[c],%%rdx,1), %%ecx\n\t"                                     \
-        "xorb	(%[m],%%rdx,1), %%cl\n\t"                                      \
-        "movb	%%cl, (%[output],%%rdx,1)\n\t"                                 \
-        "incl	%%edx\n\t"                                                     \
-        "cmpl	%%r9d, %%edx\n\t"                                              \
-        "jne	2b\n\t"                                                        \
-        "je	3f\n\t"                                                        \
-        "\n"                                                                   \
-        "4:\n\t"                                                               \
-        "movq	(%[c],%%rdx,1), %%rcx\n\t"                                     \
-        "xorq	(%[m],%%rdx,1), %%rcx\n\t"                                     \
-        "movq	%%rcx, (%[output],%%rdx,1)\n\t"                                \
-        "addl	$8, %%edx\n\t"                                                 \
-        "\n"                                                                   \
-        "3:\n\t"                                                               \
-        "cmpl	%%r8d, %%edx\n\t"                                              \
-        "jne	4b\n\t"                                                        \
-        :                                                                      \
-        : [input] "r" (ctx->X), [c] "r" (x),                                   \
-          [output] "r" (c), [bytes] "r" (bytes), [m] "r" (m),                  \
-          [rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)                       \
-        : "eax", "ecx", "edx", "r8", "r9", "memory",                           \
-          "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"       \
-    )
-
-
-#define CHACHA_CHUNK_AVX()                                                     \
-    __asm__ __volatile__ (                                                     \
-        CHACHA_CRYPT_AVX()                                                     \
-        "movdqu	 0(%[m]), %%xmm4\n\t"                                          \
-        "movdqu	16(%[m]), %%xmm5\n\t"                                          \
-        "movdqu	32(%[m]), %%xmm6\n\t"                                          \
-        "movdqu	48(%[m]), %%xmm7\n\t"                                          \
-        "pxor	%%xmm4, %%xmm0\n\t"                                            \
-        "pxor	%%xmm5, %%xmm1\n\t"                                            \
-        "pxor	%%xmm6, %%xmm2\n\t"                                            \
-        "pxor	%%xmm7, %%xmm3\n\t"                                            \
-        "movdqu	%%xmm0,  0(%[c])\n\t"                                          \
-        "movdqu	%%xmm1, 16(%[c])\n\t"                                          \
-        "movdqu	%%xmm2, 32(%[c])\n\t"                                          \
-        "movdqu	%%xmm3, 48(%[c])\n\t"                                          \
-        "addl	$1, 48(%[input])\n\t"                                          \
-        :                                                                      \
-        : [input] "r" (ctx->X), [c] "r" (c), [m] "r" (m),                      \
-          [rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)                       \
-        : "rax", "memory",                                                     \
-          "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"       \
-    )
-
-CHACHA20_NOINLINE static void chacha_encrypt_avx(ChaCha* ctx, const byte* m,
-                                                 byte* c, word32 bytes)
-{
-    ALIGN128 word32 X[4*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
-    ALIGN128 word32 x[2*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
-    word32 cnt = 0;
-    static const __m128i add =    { 0x0000000100000000UL,0x0000000300000002UL };
-    static const __m128i four =   { 0x0000000400000004UL,0x0000000400000004UL };
-
-    if (bytes == 0)
-        return;
-
-    __asm__ __volatile__ (
-       "movl	%[bytes], %[cnt]\n\t"
-       "shrl	$8, %[cnt]\n\t"
-       "jz      L_end128\n\t"
-
-       "vpshufd	$0,   (%[key]), %%xmm0\n\t"
-       "vpshufd	$0,  4(%[key]), %%xmm1\n\t"
-       "vpshufd	$0,  8(%[key]), %%xmm2\n\t"
-       "vpshufd	$0, 12(%[key]), %%xmm3\n\t"
-       "vpshufd	$0, 16(%[key]), %%xmm4\n\t"
-       "vpshufd	$0, 20(%[key]), %%xmm5\n\t"
-       "vpshufd	$0, 24(%[key]), %%xmm6\n\t"
-       "vpshufd	$0, 28(%[key]), %%xmm7\n\t"
-       "vpshufd	$0, 32(%[key]), %%xmm8\n\t"
-       "vpshufd	$0, 36(%[key]), %%xmm9\n\t"
-       "vpshufd	$0, 40(%[key]), %%xmm10\n\t"
-       "vpshufd	$0, 44(%[key]), %%xmm11\n\t"
-       "vpshufd	$0, 48(%[key]), %%xmm12\n\t"
-       "vpshufd	$0, 52(%[key]), %%xmm13\n\t"
-       "vpshufd	$0, 56(%[key]), %%xmm14\n\t"
-       "vpshufd	$0, 60(%[key]), %%xmm15\n\t"
-
-       "vpaddd	%[add], %%xmm12, %%xmm12\n\t"
-
-       "vmovdqa	%%xmm0,     (%[X])\n\t"
-       "vmovdqa	%%xmm1,   16(%[X])\n\t"
-       "vmovdqa	%%xmm2,   32(%[X])\n\t"
-       "vmovdqa	%%xmm3,   48(%[X])\n\t"
-       "vmovdqa	%%xmm4,   64(%[X])\n\t"
-       "vmovdqa	%%xmm5,   80(%[X])\n\t"
-       "vmovdqa	%%xmm6,   96(%[X])\n\t"
-       "vmovdqa	%%xmm7,  112(%[X])\n\t"
-       "vmovdqa	%%xmm8,  128(%[X])\n\t"
-       "vmovdqa	%%xmm9,  144(%[X])\n\t"
-       "vmovdqa	%%xmm10, 160(%[X])\n\t"
-       "vmovdqa	%%xmm11, 176(%[X])\n\t"
-       "vmovdqa	%%xmm12, 192(%[X])\n\t"
-       "vmovdqa	%%xmm13, 208(%[X])\n\t"
-       "vmovdqa	%%xmm14, 224(%[X])\n\t"
-       "vmovdqa	%%xmm15, 240(%[X])\n\t"
-       "\n"
-   "L_enc128_loop:\n\t"
-       "vmovdqa	%%xmm11, 48(%[x])\n\t"
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       QUARTERROUND_XMM()
-       QUARTERROUND_XMM_2()
-       "vmovdqa	48(%[x]), %%xmm11\n\t"
-
-       "vpaddd	   (%[X]), %%xmm0,  %%xmm0\n\t"
-       "vpaddd	 16(%[X]), %%xmm1,  %%xmm1\n\t"
-       "vpaddd	 32(%[X]), %%xmm2,  %%xmm2\n\t"
-       "vpaddd	 48(%[X]), %%xmm3,  %%xmm3\n\t"
-       "vpaddd	 64(%[X]), %%xmm4,  %%xmm4\n\t"
-       "vpaddd	 80(%[X]), %%xmm5,  %%xmm5\n\t"
-       "vpaddd	 96(%[X]), %%xmm6,  %%xmm6\n\t"
-       "vpaddd	112(%[X]), %%xmm7,  %%xmm7\n\t"
-       "vpaddd	128(%[X]), %%xmm8,  %%xmm8\n\t"
-       "vpaddd	144(%[X]), %%xmm9,  %%xmm9\n\t"
-       "vpaddd	160(%[X]), %%xmm10, %%xmm10\n\t"
-       "vpaddd	176(%[X]), %%xmm11, %%xmm11\n\t"
-       "vpaddd	192(%[X]), %%xmm12, %%xmm12\n\t"
-       "vpaddd	208(%[X]), %%xmm13, %%xmm13\n\t"
-       "vpaddd	224(%[X]), %%xmm14, %%xmm14\n\t"
-       "vpaddd	240(%[X]), %%xmm15, %%xmm15\n\t"
-
-       "vmovdqa	%%xmm8,     (%[x])\n\t"
-       "vmovdqa	%%xmm9,   16(%[x])\n\t"
-       "vmovdqa	%%xmm10,  32(%[x])\n\t"
-       "vmovdqa	%%xmm11,  48(%[x])\n\t"
-       "vmovdqa	%%xmm12,  64(%[x])\n\t"
-       "vmovdqa	%%xmm13,  80(%[x])\n\t"
-       "vmovdqa	%%xmm14,  96(%[x])\n\t"
-       "vmovdqa	%%xmm15, 112(%[x])\n\t"
-
-       "vpunpckldq %%xmm1, %%xmm0, %%xmm8\n\t"
-       "vpunpckldq %%xmm3, %%xmm2, %%xmm9\n\t"
-       "vpunpckhdq %%xmm1, %%xmm0, %%xmm12\n\t"
-       "vpunpckhdq %%xmm3, %%xmm2, %%xmm13\n\t"
-       "vpunpckldq %%xmm5, %%xmm4, %%xmm10\n\t"
-       "vpunpckldq %%xmm7, %%xmm6, %%xmm11\n\t"
-       "vpunpckhdq %%xmm5, %%xmm4, %%xmm14\n\t"
-       "vpunpckhdq %%xmm7, %%xmm6, %%xmm15\n\t"
-       "vpunpcklqdq %%xmm9,  %%xmm8,  %%xmm0\n\t"
-       "vpunpcklqdq %%xmm11, %%xmm10, %%xmm1\n\t"
-       "vpunpckhqdq %%xmm9,  %%xmm8,  %%xmm2\n\t"
-       "vpunpckhqdq %%xmm11, %%xmm10, %%xmm3\n\t"
-       "vpunpcklqdq %%xmm13, %%xmm12, %%xmm4\n\t"
-       "vpunpcklqdq %%xmm15, %%xmm14, %%xmm5\n\t"
-       "vpunpckhqdq %%xmm13, %%xmm12, %%xmm6\n\t"
-       "vpunpckhqdq %%xmm15, %%xmm14, %%xmm7\n\t"
-       "vmovdqu	   (%[in]), %%xmm8\n\t"
-       "vmovdqu	 16(%[in]), %%xmm9\n\t"
-       "vmovdqu	 64(%[in]), %%xmm10\n\t"
-       "vmovdqu	 80(%[in]), %%xmm11\n\t"
-       "vmovdqu	128(%[in]), %%xmm12\n\t"
-       "vmovdqu	144(%[in]), %%xmm13\n\t"
-       "vmovdqu	192(%[in]), %%xmm14\n\t"
-       "vmovdqu	208(%[in]), %%xmm15\n\t"
-       "vpxor	%%xmm8,  %%xmm0, %%xmm0\n\t"
-       "vpxor	%%xmm9,  %%xmm1, %%xmm1\n\t"
-       "vpxor	%%xmm10, %%xmm2, %%xmm2\n\t"
-       "vpxor	%%xmm11, %%xmm3, %%xmm3\n\t"
-       "vpxor	%%xmm12, %%xmm4, %%xmm4\n\t"
-       "vpxor	%%xmm13, %%xmm5, %%xmm5\n\t"
-       "vpxor	%%xmm14, %%xmm6, %%xmm6\n\t"
-       "vpxor	%%xmm15, %%xmm7, %%xmm7\n\t"
-       "vmovdqu	%%xmm0,    (%[out])\n\t"
-       "vmovdqu	%%xmm1,  16(%[out])\n\t"
-       "vmovdqu	%%xmm2,  64(%[out])\n\t"
-       "vmovdqu	%%xmm3,  80(%[out])\n\t"
-       "vmovdqu	%%xmm4, 128(%[out])\n\t"
-       "vmovdqu	%%xmm5, 144(%[out])\n\t"
-       "vmovdqu	%%xmm6, 192(%[out])\n\t"
-       "vmovdqu	%%xmm7, 208(%[out])\n\t"
+#ifdef __cplusplus
+    extern "C" {
+#endif
 
-       "vmovdqa	   (%[x]), %%xmm0\n\t"
-       "vmovdqa	 16(%[x]), %%xmm1\n\t"
-       "vmovdqa	 32(%[x]), %%xmm2\n\t"
-       "vmovdqa	 48(%[x]), %%xmm3\n\t"
-       "vmovdqa	 64(%[x]), %%xmm4\n\t"
-       "vmovdqa	 80(%[x]), %%xmm5\n\t"
-       "vmovdqa	 96(%[x]), %%xmm6\n\t"
-       "vmovdqa	112(%[x]), %%xmm7\n\t"
-
-       "vpunpckldq %%xmm1, %%xmm0, %%xmm8\n\t"
-       "vpunpckldq %%xmm3, %%xmm2, %%xmm9\n\t"
-       "vpunpckhdq %%xmm1, %%xmm0, %%xmm12\n\t"
-       "vpunpckhdq %%xmm3, %%xmm2, %%xmm13\n\t"
-       "vpunpckldq %%xmm5, %%xmm4, %%xmm10\n\t"
-       "vpunpckldq %%xmm7, %%xmm6, %%xmm11\n\t"
-       "vpunpckhdq %%xmm5, %%xmm4, %%xmm14\n\t"
-       "vpunpckhdq %%xmm7, %%xmm6, %%xmm15\n\t"
-       "vpunpcklqdq %%xmm9,  %%xmm8,  %%xmm0\n\t"
-       "vpunpcklqdq %%xmm11, %%xmm10, %%xmm1\n\t"
-       "vpunpckhqdq %%xmm9,  %%xmm8,  %%xmm2\n\t"
-       "vpunpckhqdq %%xmm11, %%xmm10, %%xmm3\n\t"
-       "vpunpcklqdq %%xmm13, %%xmm12, %%xmm4\n\t"
-       "vpunpcklqdq %%xmm15, %%xmm14, %%xmm5\n\t"
-       "vpunpckhqdq %%xmm13, %%xmm12, %%xmm6\n\t"
-       "vpunpckhqdq %%xmm15, %%xmm14, %%xmm7\n\t"
-       "vmovdqu	 32(%[in]), %%xmm8\n\t"
-       "vmovdqu	 48(%[in]), %%xmm9\n\t"
-       "vmovdqu	 96(%[in]), %%xmm10\n\t"
-       "vmovdqu	112(%[in]), %%xmm11\n\t"
-       "vmovdqu	160(%[in]), %%xmm12\n\t"
-       "vmovdqu	176(%[in]), %%xmm13\n\t"
-       "vmovdqu	224(%[in]), %%xmm14\n\t"
-       "vmovdqu	240(%[in]), %%xmm15\n\t"
-       "vpxor	%%xmm8,  %%xmm0, %%xmm0\n\t"
-       "vpxor	%%xmm9,  %%xmm1, %%xmm1\n\t"
-       "vpxor	%%xmm10, %%xmm2, %%xmm2\n\t"
-       "vpxor	%%xmm11, %%xmm3, %%xmm3\n\t"
-       "vpxor	%%xmm12, %%xmm4, %%xmm4\n\t"
-       "vpxor	%%xmm13, %%xmm5, %%xmm5\n\t"
-       "vpxor	%%xmm14, %%xmm6, %%xmm6\n\t"
-       "vpxor	%%xmm15, %%xmm7, %%xmm7\n\t"
-       "vmovdqu	%%xmm0,  32(%[out])\n\t"
-       "vmovdqu	%%xmm1,  48(%[out])\n\t"
-       "vmovdqu	%%xmm2,  96(%[out])\n\t"
-       "vmovdqu	%%xmm3, 112(%[out])\n\t"
-       "vmovdqu	%%xmm4, 160(%[out])\n\t"
-       "vmovdqu	%%xmm5, 176(%[out])\n\t"
-       "vmovdqu	%%xmm6, 224(%[out])\n\t"
-       "vmovdqu	%%xmm7, 240(%[out])\n\t"
-
-       "vmovdqa	192(%[X]), %%xmm12\n\t"
-       "add	$256, %[in]\n\t"
-       "add	$256, %[out]\n\t"
-       "vpaddd	%[four], %%xmm12, %%xmm12\n\t"
-       "sub	$256, %[bytes]\n\t"
-       "vmovdqa	%%xmm12, 192(%[X])\n\t"
-       "cmp	$256, %[bytes]\n\t"
-       "jl	L_done\n\t"
-
-       "vmovdqa	   (%[X]), %%xmm0\n\t"
-       "vmovdqa	 16(%[X]), %%xmm1\n\t"
-       "vmovdqa	 32(%[X]), %%xmm2\n\t"
-       "vmovdqa	 48(%[X]), %%xmm3\n\t"
-       "vmovdqa	 64(%[X]), %%xmm4\n\t"
-       "vmovdqa	 80(%[X]), %%xmm5\n\t"
-       "vmovdqa	 96(%[X]), %%xmm6\n\t"
-       "vmovdqa	112(%[X]), %%xmm7\n\t"
-       "vmovdqa	128(%[X]), %%xmm8\n\t"
-       "vmovdqa	144(%[X]), %%xmm9\n\t"
-       "vmovdqa	160(%[X]), %%xmm10\n\t"
-       "vmovdqa	176(%[X]), %%xmm11\n\t"
-       "vmovdqa	192(%[X]), %%xmm12\n\t"
-       "vmovdqa	208(%[X]), %%xmm13\n\t"
-       "vmovdqa	224(%[X]), %%xmm14\n\t"
-       "vmovdqa	240(%[X]), %%xmm15\n\t"
-       "jmp	L_enc128_loop\n\t"
-
-       "\n"
-   "L_done:\n\t"
-
-       "shl	$2, %[cnt]\n\t"
-       "add	48(%[key]), %[cnt]\n\t"
-       "movl	%[cnt], 48(%[key])\n\t"
-       "\n"
-   "L_end128:\n\t"
-       : [bytes] "+r" (bytes), [cnt] "+r" (cnt),
-         [in] "+r" (m), [out] "+r" (c)
-       : [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
-         [add] "xrm" (add), [four] "xrm" (four),
-         [rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)
-       : "xmm0", "xmm1", "xmm2", "xmm3",
-         "xmm4", "xmm5", "xmm6", "xmm7",
-         "xmm8", "xmm9", "xmm10", "xmm11",
-         "xmm12", "xmm13", "xmm14", "xmm15", "memory"
-    );
-
-    for (; bytes >= CHACHA_CHUNK_BYTES;) {
-        CHACHA_CHUNK_AVX();
-        bytes -= CHACHA_CHUNK_BYTES;
-        c += CHACHA_CHUNK_BYTES;
-        m += CHACHA_CHUNK_BYTES;
-    }
-    if (bytes > 0) {
-        CHACHA_PARTIAL_CHUNK_AVX();
-    }
-}
-#endif /* HAVE_INTEL_AVX1 */
-
-#ifdef HAVE_INTEL_AVX2
-#define QUARTERROUND_2_AVX2()                          \
-        "vpaddd		%%xmm1, %%xmm0, %%xmm0\n\t"    \
-        "vpxor		%%xmm0, %%xmm3, %%xmm3\n\t"    \
-        "vpshufb	%[rotl16], %%xmm3, %%xmm3\n\t" \
-        "vpaddd		%%xmm3, %%xmm2, %%xmm2\n\t"    \
-        "vpxor		%%xmm2, %%xmm1, %%xmm1\n\t"    \
-        "vpsrld		$20, %%xmm1, %%xmm4\n\t"       \
-        "vpslld		$12, %%xmm1, %%xmm1\n\t"       \
-        "vpxor		%%xmm4, %%xmm1, %%xmm1\n\t"    \
-        "vpaddd		%%xmm1, %%xmm0, %%xmm0\n\t"    \
-        "vpxor		%%xmm0, %%xmm3, %%xmm3\n\t"    \
-        "vpshufb	%[rotl8], %%xmm3, %%xmm3\n\t"  \
-        "vpaddd		%%xmm3, %%xmm2, %%xmm2\n\t"    \
-        "vpxor		%%xmm2, %%xmm1, %%xmm1\n\t"    \
-        "vpsrld		$25, %%xmm1, %%xmm4\n\t"       \
-        "vpslld		$7, %%xmm1, %%xmm1\n\t"        \
-        "vpxor		%%xmm4, %%xmm1, %%xmm1\n\t"    \
-        "# Swap words for next round\n\t"              \
-        "vpshufd	$0x39, %%xmm1, %%xmm1\n\t"     \
-        "vpshufd	$0x4e, %%xmm2, %%xmm2\n\t"     \
-        "vpshufd	$0x93, %%xmm3, %%xmm3\n\t"     \
-        "vpaddd		%%xmm1, %%xmm0, %%xmm0\n\t"    \
-        "vpxor		%%xmm0, %%xmm3, %%xmm3\n\t"    \
-        "vpshufb	%[rotl16], %%xmm3, %%xmm3\n\t" \
-        "vpaddd		%%xmm3, %%xmm2, %%xmm2\n\t"    \
-        "vpxor		%%xmm2, %%xmm1, %%xmm1\n\t"    \
-        "vpsrld		$20, %%xmm1, %%xmm4\n\t"       \
-        "vpslld		$12, %%xmm1, %%xmm1\n\t"       \
-        "vpxor		%%xmm4, %%xmm1, %%xmm1\n\t"    \
-        "vpaddd		%%xmm1, %%xmm0, %%xmm0\n\t"    \
-        "vpxor		%%xmm0, %%xmm3, %%xmm3\n\t"    \
-        "vpshufb	%[rotl8], %%xmm3, %%xmm3\n\t"  \
-        "vpaddd		%%xmm3, %%xmm2, %%xmm2\n\t"    \
-        "vpxor		%%xmm2, %%xmm1, %%xmm1\n\t"    \
-        "vpsrld		$25, %%Xmm1, %%xmm4\n\t"       \
-        "vpslld		$7, %%xmm1, %%xmm1\n\t"        \
-        "vpxor		%%xmm4, %%xmm1, %%xmm1\n\t"    \
-        "# Swap words back\n\t"                        \
-        "vpshufd	$0x93, %%xmm1, %%xmm1\n\t"     \
-        "vpshufd	$0x4e, %%xmm2, %%xmm2\n\t"     \
-        "vpshufd	$0x39, %%xmm3, %%xmm3\n\t"     \
-
-#define CHACHA_CRYPT_AVX2()                                                    \
-        "vmovdqu	 0(%[input]), %%xmm8\n\t"                              \
-        "vmovdqu	16(%[input]), %%xmm9\n\t"                              \
-        "vmovdqu	32(%[input]), %%xmm10\n\t"                             \
-        "vmovdqu	48(%[input]), %%xmm11\n\t"                             \
-        "vmovdqu	%%xmm8, %%xmm0\n\t"                                    \
-        "vmovdqu	%%xmm9, %%xmm1\n\t"                                    \
-        "vmovdqu	%%xmm10, %%xmm2\n\t"                                   \
-        "vmovdqu	%%xmm11, %%xmm3\n\t"                                   \
-        "movb		$10, %%al\n\t"                                         \
-        "\n"                                                                   \
-        "1:\n\t"                                                               \
-        QUARTERROUND_2_AVX2()                                                  \
-        "decb		%%al\n\t"                                              \
-        "jnz		1b\n\t"                                                \
-        "vpaddd		%%xmm8, %%xmm0, %%xmm0\n\t"                            \
-        "vpaddd		%%xmm9, %%xmm1, %%xmm1\n\t"                            \
-        "vpaddd		%%xmm10, %%xmm2, %%xmm2\n\t"                           \
-        "vpaddd		%%xmm11, %%xmm3, %%xmm3\n\t"                           \
-
-#define CHACHA_PARTIAL_CHUNK_AVX2()                                            \
-    __asm__ __volatile__ (                                                     \
-        CHACHA_CRYPT_AVX2()                                                    \
-        "vmovdqu	%%xmm0,  0(%[c])\n\t"                                  \
-        "vmovdqu	%%xmm1, 16(%[c])\n\t"                                  \
-        "vmovdqu	%%xmm2, 32(%[c])\n\t"                                  \
-        "vmovdqu	%%xmm3, 48(%[c])\n\t"                                  \
-        "addl		$1, 48(%[input])\n\t"                                  \
-        "movl		%[bytes], %%r8d\n\t"                                   \
-        "xorq		%%rdx, %%rdx\n\t"                                      \
-        "movl		%%r8d, %%r9d\n\t"                                      \
-        "andl		$7, %%r9d\n\t"                                         \
-        "jz		4f\n\t"                                                \
-        "\n"                                                                   \
-        "2:\n\t"                                                               \
-        "movzbl		(%[c],%%rdx,1), %%ecx\n\t"                             \
-        "xorb		(%[m],%%rdx,1), %%cl\n\t"                              \
-        "movb		%%cl, (%[output],%%rdx,1)\n\t"                         \
-        "incl		%%edx\n\t"                                             \
-        "cmpl		%%r9d, %%edx\n\t"                                      \
-        "jne		2b\n\t"                                                \
-        "je		3f\n\t"                                                \
-        "\n"                                                                   \
-        "4:\n\t"                                                               \
-        "movq		(%[c],%%rdx,1), %%rcx\n\t"                             \
-        "xorq		(%[m],%%rdx,1), %%rcx\n\t"                             \
-        "movq		%%rcx, (%[output],%%rdx,1)\n\t"                        \
-        "addl		$8, %%edx\n\t"                                         \
-        "\n"                                                                   \
-        "3:\n\t"                                                               \
-        "cmpl		%%r8d, %%edx\n\t"                                      \
-        "jne		4b\n\t"                                                \
-        :                                                                      \
-        : [input] "r" (ctx->X), [c] "r" (x),                                   \
-          [output] "r" (c), [bytes] "r" (bytes), [m] "r" (m),                  \
-          [rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)                       \
-        : "eax", "ecx", "edx", "r8", "r9", "memory",                           \
-          "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",      \
-          "xmm8", "xmm9", "xmm10", "xmm11"                                     \
-    )
-
-
-#define CHACHA_CHUNK_AVX2()                                                    \
-    __asm__ __volatile__ (                                                     \
-        CHACHA_CRYPT_AVX2()                                                    \
-        "vmovdqu	 0(%[m]), %%xmm4\n\t"                                  \
-        "vmovdqu	16(%[m]), %%xmm5\n\t"                                  \
-        "vmovdqu	32(%[m]), %%xmm6\n\t"                                  \
-        "vmovdqu	48(%[m]), %%xmm7\n\t"                                  \
-        "vpxor		%%xmm4, %%xmm0, %%xmm0\n\t"                            \
-        "vpxor		%%xmm5, %%xmm1, %%xmm1\n\t"                            \
-        "vpxor		%%xmm6, %%xmm2, %%xmm2\n\t"                            \
-        "vpxor		%%xmm7, %%xmm3, %%xmm3\n\t"                            \
-        "vmovdqu	%%xmm0,  0(%[c])\n\t"                                  \
-        "vmovdqu	%%xmm1, 16(%[c])\n\t"                                  \
-        "vmovdqu	%%xmm2, 32(%[c])\n\t"                                  \
-        "vmovdqu	%%xmm3, 48(%[c])\n\t"                                  \
-        "addl		$1, 48(%[input])\n\t"                                  \
-        :                                                                      \
-        : [input] "r" (ctx->X), [c] "r" (c), [m] "r" (m),                      \
-          [rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)                       \
-        : "rax", "memory",                                                     \
-          "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",      \
-          "xmm8", "xmm9", "xmm10", "xmm11"                                     \
-    )
-
-
-static void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
-                                 word32 bytes)
-{
-    ALIGN256 word32 X[8*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
-    ALIGN256 word32 x[4*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
-    word32 cnt = 0;
-    static const __m256i add    = { 0x0000000100000000UL,0x0000000300000002UL,
-                                    0x0000000500000004UL,0x0000000700000006UL };
-    static const __m256i eight  = { 0x0000000800000008UL,0x0000000800000008UL,
-                                    0x0000000800000008UL,0x0000000800000008UL };
-    static const __m256i rotl8_256  =
-                                  { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL,
-                                    0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
-    static const __m256i rotl16_256 =
-                                  { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL,
-                                    0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
-
-    if (bytes == 0)
-        return;
+extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
+                               word32 bytes);
+extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
+                                word32 bytes);
+extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
+                                word32 bytes);
 
-    __asm__ __volatile__ (
-       "movl	%[bytes], %[cnt]\n\t"
-       "shrl	$9, %[cnt]\n\t"
-       "jz      L_end256\n\t"
-
-       "vpbroadcastd	  (%[key]), %%ymm0\n\t"
-       "vpbroadcastd	 4(%[key]), %%ymm1\n\t"
-       "vpbroadcastd	 8(%[key]), %%ymm2\n\t"
-       "vpbroadcastd	12(%[key]), %%ymm3\n\t"
-       "vpbroadcastd	16(%[key]), %%ymm4\n\t"
-       "vpbroadcastd	20(%[key]), %%ymm5\n\t"
-       "vpbroadcastd	24(%[key]), %%ymm6\n\t"
-       "vpbroadcastd	28(%[key]), %%ymm7\n\t"
-       "vpbroadcastd	32(%[key]), %%ymm8\n\t"
-       "vpbroadcastd	36(%[key]), %%ymm9\n\t"
-       "vpbroadcastd	40(%[key]), %%ymm10\n\t"
-       "vpbroadcastd	44(%[key]), %%ymm11\n\t"
-       "vpbroadcastd	48(%[key]), %%ymm12\n\t"
-       "vpbroadcastd	52(%[key]), %%ymm13\n\t"
-       "vpbroadcastd	56(%[key]), %%ymm14\n\t"
-       "vpbroadcastd	60(%[key]), %%ymm15\n\t"
-
-       "vpaddd	%[add], %%ymm12, %%ymm12\n\t"
-
-       "vmovdqa	%%ymm0,     (%[X])\n\t"
-       "vmovdqa	%%ymm1,   32(%[X])\n\t"
-       "vmovdqa	%%ymm2,   64(%[X])\n\t"
-       "vmovdqa	%%ymm3,   96(%[X])\n\t"
-       "vmovdqa	%%ymm4,  128(%[X])\n\t"
-       "vmovdqa	%%ymm5,  160(%[X])\n\t"
-       "vmovdqa	%%ymm6,  192(%[X])\n\t"
-       "vmovdqa	%%ymm7,  224(%[X])\n\t"
-       "vmovdqa	%%ymm8,  256(%[X])\n\t"
-       "vmovdqa	%%ymm9,  288(%[X])\n\t"
-       "vmovdqa	%%ymm10, 320(%[X])\n\t"
-       "vmovdqa	%%ymm11, 352(%[X])\n\t"
-       "vmovdqa	%%ymm12, 384(%[X])\n\t"
-       "vmovdqa	%%ymm13, 416(%[X])\n\t"
-       "vmovdqa	%%ymm14, 448(%[X])\n\t"
-       "vmovdqa	%%ymm15, 480(%[X])\n\t"
-       "\n"
-   "L_enc256_loop:\n\t"
-       "vmovdqa	%%ymm11, 96(%[x])\n\t"
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       QUARTERROUND_YMM()
-       QUARTERROUND_YMM_2()
-       "vmovdqa	96(%[x]), %%ymm11\n\t"
-
-       "vpaddd	   (%[X]), %%ymm0,  %%ymm0\n\t"
-       "vpaddd	 32(%[X]), %%ymm1,  %%ymm1\n\t"
-       "vpaddd	 64(%[X]), %%ymm2,  %%ymm2\n\t"
-       "vpaddd	 96(%[X]), %%ymm3,  %%ymm3\n\t"
-       "vpaddd	128(%[X]), %%ymm4,  %%ymm4\n\t"
-       "vpaddd	160(%[X]), %%ymm5,  %%ymm5\n\t"
-       "vpaddd	192(%[X]), %%ymm6,  %%ymm6\n\t"
-       "vpaddd	224(%[X]), %%ymm7,  %%ymm7\n\t"
-       "vpaddd	256(%[X]), %%ymm8,  %%ymm8\n\t"
-       "vpaddd	288(%[X]), %%ymm9,  %%ymm9\n\t"
-       "vpaddd	320(%[X]), %%ymm10, %%ymm10\n\t"
-       "vpaddd	352(%[X]), %%ymm11, %%ymm11\n\t"
-       "vpaddd	384(%[X]), %%ymm12, %%ymm12\n\t"
-       "vpaddd	416(%[X]), %%ymm13, %%ymm13\n\t"
-       "vpaddd	448(%[X]), %%ymm14, %%ymm14\n\t"
-       "vpaddd	480(%[X]), %%ymm15, %%ymm15\n\t"
-
-       "vmovdqa	%%ymm8,     (%[x])\n\t"
-       "vmovdqa	%%ymm9,   32(%[x])\n\t"
-       "vmovdqa	%%ymm10,  64(%[x])\n\t"
-       "vmovdqa	%%ymm11,  96(%[x])\n\t"
-       "vmovdqa	%%ymm12, 128(%[x])\n\t"
-       "vmovdqa	%%ymm13, 160(%[x])\n\t"
-       "vmovdqa	%%ymm14, 192(%[x])\n\t"
-       "vmovdqa	%%ymm15, 224(%[x])\n\t"
-
-       "vpunpckldq	%%ymm1, %%ymm0, %%ymm8\n\t"
-       "vpunpckldq	%%ymm3, %%ymm2, %%ymm9\n\t"
-       "vpunpckhdq	%%ymm1, %%ymm0, %%ymm12\n\t"
-       "vpunpckhdq	%%ymm3, %%ymm2, %%ymm13\n\t"
-       "vpunpckldq	%%ymm5, %%ymm4, %%ymm10\n\t"
-       "vpunpckldq	%%ymm7, %%ymm6, %%ymm11\n\t"
-       "vpunpckhdq	%%ymm5, %%ymm4, %%ymm14\n\t"
-       "vpunpckhdq	%%ymm7, %%ymm6, %%ymm15\n\t"
-       "vpunpcklqdq	%%ymm9,  %%ymm8,  %%ymm0\n\t"
-       "vpunpcklqdq	%%ymm11, %%ymm10, %%ymm1\n\t"
-       "vpunpckhqdq	%%ymm9,  %%ymm8,  %%ymm2\n\t"
-       "vpunpckhqdq	%%ymm11, %%ymm10, %%ymm3\n\t"
-       "vpunpcklqdq	%%ymm13, %%ymm12, %%ymm4\n\t"
-       "vpunpcklqdq	%%ymm15, %%ymm14, %%ymm5\n\t"
-       "vpunpckhqdq	%%ymm13, %%ymm12, %%ymm6\n\t"
-       "vpunpckhqdq	%%ymm15, %%ymm14, %%ymm7\n\t"
-       "vperm2i128	$0x20, %%ymm1, %%ymm0, %%ymm8\n\t"
-       "vperm2i128	$0x20, %%ymm3, %%ymm2, %%ymm9\n\t"
-       "vperm2i128	$0x31, %%ymm1, %%ymm0, %%ymm12\n\t"
-       "vperm2i128	$0x31, %%ymm3, %%ymm2, %%ymm13\n\t"
-       "vperm2i128	$0x20, %%ymm5, %%ymm4, %%ymm10\n\t"
-       "vperm2i128	$0x20, %%ymm7, %%ymm6, %%ymm11\n\t"
-       "vperm2i128	$0x31, %%ymm5, %%ymm4, %%ymm14\n\t"
-       "vperm2i128	$0x31, %%ymm7, %%ymm6, %%ymm15\n\t"
+#ifdef __cplusplus
+    }  /* extern "C" */
+#endif
 
-       "vmovdqu	   (%[in]), %%ymm0\n\t"
-       "vmovdqu	 64(%[in]), %%ymm1\n\t"
-       "vmovdqu	128(%[in]), %%ymm2\n\t"
-       "vmovdqu	192(%[in]), %%ymm3\n\t"
-       "vmovdqu	256(%[in]), %%ymm4\n\t"
-       "vmovdqu	320(%[in]), %%ymm5\n\t"
-       "vmovdqu	384(%[in]), %%ymm6\n\t"
-       "vmovdqu	448(%[in]), %%ymm7\n\t"
-       "vpxor	%%ymm0, %%ymm8,  %%ymm8\n\t"
-       "vpxor	%%ymm1, %%ymm9,  %%ymm9\n\t"
-       "vpxor	%%ymm2, %%ymm10, %%ymm10\n\t"
-       "vpxor	%%ymm3, %%ymm11, %%ymm11\n\t"
-       "vpxor	%%ymm4, %%ymm12, %%ymm12\n\t"
-       "vpxor	%%ymm5, %%ymm13, %%ymm13\n\t"
-       "vpxor	%%ymm6, %%ymm14, %%ymm14\n\t"
-       "vpxor	%%ymm7, %%ymm15, %%ymm15\n\t"
-       "vmovdqu	%%ymm8,     (%[out])\n\t"
-       "vmovdqu	%%ymm9,   64(%[out])\n\t"
-       "vmovdqu	%%ymm10, 128(%[out])\n\t"
-       "vmovdqu	%%ymm11, 192(%[out])\n\t"
-       "vmovdqu	%%ymm12, 256(%[out])\n\t"
-       "vmovdqu	%%ymm13, 320(%[out])\n\t"
-       "vmovdqu	%%ymm14, 384(%[out])\n\t"
-       "vmovdqu	%%ymm15, 448(%[out])\n\t"
-
-       "vmovdqa	   (%[x]), %%ymm0\n\t"
-       "vmovdqa	 32(%[x]), %%ymm1\n\t"
-       "vmovdqa	 64(%[x]), %%ymm2\n\t"
-       "vmovdqa	 96(%[x]), %%ymm3\n\t"
-       "vmovdqa	128(%[x]), %%ymm4\n\t"
-       "vmovdqa	160(%[x]), %%ymm5\n\t"
-       "vmovdqa	192(%[x]), %%ymm6\n\t"
-       "vmovdqa	224(%[x]), %%ymm7\n\t"
-
-       "vpunpckldq	%%ymm1, %%ymm0, %%ymm8\n\t"
-       "vpunpckldq	%%ymm3, %%ymm2, %%ymm9\n\t"
-       "vpunpckhdq	%%ymm1, %%ymm0, %%ymm12\n\t"
-       "vpunpckhdq	%%ymm3, %%ymm2, %%ymm13\n\t"
-       "vpunpckldq	%%ymm5, %%ymm4, %%ymm10\n\t"
-       "vpunpckldq	%%ymm7, %%ymm6, %%ymm11\n\t"
-       "vpunpckhdq	%%ymm5, %%ymm4, %%ymm14\n\t"
-       "vpunpckhdq	%%ymm7, %%ymm6, %%ymm15\n\t"
-       "vpunpcklqdq	%%ymm9,  %%ymm8,  %%ymm0\n\t"
-       "vpunpcklqdq	%%ymm11, %%ymm10, %%ymm1\n\t"
-       "vpunpckhqdq	%%ymm9 , %%ymm8,  %%ymm2\n\t"
-       "vpunpckhqdq	%%ymm11, %%ymm10, %%ymm3\n\t"
-       "vpunpcklqdq	%%ymm13, %%ymm12, %%ymm4\n\t"
-       "vpunpcklqdq	%%ymm15, %%ymm14, %%ymm5\n\t"
-       "vpunpckhqdq	%%ymm13, %%ymm12, %%ymm6\n\t"
-       "vpunpckhqdq	%%ymm15, %%ymm14, %%ymm7\n\t"
-       "vperm2i128	$0x20, %%ymm1, %%ymm0, %%ymm8\n\t"
-       "vperm2i128	$0x20, %%ymm3, %%ymm2, %%ymm9\n\t"
-       "vperm2i128	$0x31, %%ymm1, %%ymm0, %%ymm12\n\t"
-       "vperm2i128	$0x31, %%ymm3, %%ymm2, %%ymm13\n\t"
-       "vperm2i128	$0x20, %%ymm5, %%ymm4, %%ymm10\n\t"
-       "vperm2i128	$0x20, %%ymm7, %%ymm6, %%ymm11\n\t"
-       "vperm2i128	$0x31, %%ymm5, %%ymm4, %%ymm14\n\t"
-       "vperm2i128	$0x31, %%ymm7, %%ymm6, %%ymm15\n\t"
-
-       "vmovdqu	 32(%[in]), %%ymm0\n\t"
-       "vmovdqu	 96(%[in]), %%ymm1\n\t"
-       "vmovdqu	160(%[in]), %%ymm2\n\t"
-       "vmovdqu	224(%[in]), %%ymm3\n\t"
-       "vmovdqu	288(%[in]), %%ymm4\n\t"
-       "vmovdqu	352(%[in]), %%ymm5\n\t"
-       "vmovdqu	416(%[in]), %%ymm6\n\t"
-       "vmovdqu	480(%[in]), %%ymm7\n\t"
-       "vpxor	%%ymm0, %%ymm8,  %%ymm8\n\t"
-       "vpxor	%%ymm1, %%ymm9,  %%ymm9\n\t"
-       "vpxor	%%ymm2, %%ymm10, %%ymm10\n\t"
-       "vpxor	%%ymm3, %%ymm11, %%ymm11\n\t"
-       "vpxor	%%ymm4, %%ymm12, %%ymm12\n\t"
-       "vpxor	%%ymm5, %%ymm13, %%ymm13\n\t"
-       "vpxor	%%ymm6, %%ymm14, %%ymm14\n\t"
-       "vpxor	%%ymm7, %%ymm15, %%ymm15\n\t"
-       "vmovdqu	%%ymm8,   32(%[out])\n\t"
-       "vmovdqu	%%ymm9,   96(%[out])\n\t"
-       "vmovdqu	%%ymm10, 160(%[out])\n\t"
-       "vmovdqu	%%ymm11, 224(%[out])\n\t"
-       "vmovdqu	%%ymm12, 288(%[out])\n\t"
-       "vmovdqu	%%ymm13, 352(%[out])\n\t"
-       "vmovdqu	%%ymm14, 416(%[out])\n\t"
-       "vmovdqu	%%ymm15, 480(%[out])\n\t"
-
-       "vmovdqa	384(%[X]), %%ymm12\n\t"
-       "add	$512, %[in]\n\t"
-       "add	$512, %[out]\n\t"
-       "vpaddd	%[eight], %%ymm12, %%ymm12\n\t"
-       "sub	$512, %[bytes]\n\t"
-       "vmovdqa	%%ymm12, 384(%[X])\n\t"
-       "cmp	$512, %[bytes]\n\t"
-       "jl	L_done256\n\t"
-
-       "vmovdqa	   (%[X]), %%ymm0\n\t"
-       "vmovdqa	 32(%[X]), %%ymm1\n\t"
-       "vmovdqa	 64(%[X]), %%ymm2\n\t"
-       "vmovdqa	 96(%[X]), %%ymm3\n\t"
-       "vmovdqa	128(%[X]), %%ymm4\n\t"
-       "vmovdqa	160(%[X]), %%ymm5\n\t"
-       "vmovdqa	192(%[X]), %%ymm6\n\t"
-       "vmovdqa	224(%[X]), %%ymm7\n\t"
-       "vmovdqa	256(%[X]), %%ymm8\n\t"
-       "vmovdqa	288(%[X]), %%ymm9\n\t"
-       "vmovdqa	320(%[X]), %%ymm10\n\t"
-       "vmovdqa	352(%[X]), %%ymm11\n\t"
-       "vmovdqa	384(%[X]), %%ymm12\n\t"
-       "vmovdqa	416(%[X]), %%ymm13\n\t"
-       "vmovdqa	448(%[X]), %%ymm14\n\t"
-       "vmovdqa	480(%[X]), %%ymm15\n\t"
-       "jmp	L_enc256_loop\n\t"
-       "\n"
-   "L_done256:\n\t"
-       "shl	$3, %[cnt]\n\t"
-       "add	48(%[key]), %[cnt]\n\t"
-       "movl	%[cnt], 48(%[key])\n\t"
-       "\n"
-   "L_end256:\n\t"
-       : [bytes] "+r" (bytes), [cnt] "+r" (cnt),
-         [in] "+r" (m), [out] "+r" (c)
-       : [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
-         [add] "m" (add), [eight] "m" (eight),
-         [rotl8] "m" (rotl8_256), [rotl16] "m" (rotl16_256)
-       : "ymm0", "ymm1", "ymm2", "ymm3",
-         "ymm4", "ymm5", "ymm6", "ymm7",
-         "ymm8", "ymm9", "ymm10", "ymm11",
-         "ymm12", "ymm13", "ymm14", "ymm15", "memory"
-    );
-
-    /* AVX code optimised for multiples of 256 bytes. */
-    if (bytes == 256) {
-        chacha_encrypt_avx(ctx, m, c, bytes);
-        bytes -= 256;
-    }
-
-    for (; bytes >= CHACHA_CHUNK_BYTES;) {
-        CHACHA_CHUNK_AVX2();
-        bytes -= CHACHA_CHUNK_BYTES;
-        c += CHACHA_CHUNK_BYTES;
-        m += CHACHA_CHUNK_BYTES;
-    }
-    if (bytes > 0) {
-        CHACHA_PARTIAL_CHUNK_AVX2();
-    }
-}
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* USE_INTEL_CHACHA_SPEEDUP */
 
 /**
   * Encrypt a stream of bytes
@@ -1456,17 +265,28 @@
     word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
     word32 i;
 
-    output = (byte*)temp;
+    /* handle left overs */
+    if (bytes > 0 && ctx->left > 0) {
+        wc_Chacha_wordtobyte(temp, ctx->X); /* recreate the stream */
+        output = (byte*)temp + CHACHA_CHUNK_BYTES - ctx->left;
+        for (i = 0; i < bytes && i < ctx->left; i++) {
+            c[i] = m[i] ^ output[i];
+        }
+        ctx->left = ctx->left - i;
 
-    for (; bytes > 0;) {
+        /* Used up all of the stream that was left, increment the counter */
+        if (ctx->left == 0) {
+            ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
+        }
+        bytes = bytes - i;
+        c += i;
+        m += i;
+    }
+
+    output = (byte*)temp;
+    while (bytes >= CHACHA_CHUNK_BYTES) {
         wc_Chacha_wordtobyte(temp, ctx->X);
         ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
-        if (bytes <= CHACHA_CHUNK_BYTES) {
-            for (i = 0; i < bytes; ++i) {
-                c[i] = m[i] ^ output[i];
-            }
-            return;
-        }
         for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
             c[i] = m[i] ^ output[i];
         }
@@ -1474,8 +294,20 @@
         c += CHACHA_CHUNK_BYTES;
         m += CHACHA_CHUNK_BYTES;
     }
+
+    if (bytes) {
+        /* in this case there will always be some left over since bytes is less
+         * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
+         * stream in order for the stream to be recreated on next call */
+        wc_Chacha_wordtobyte(temp, ctx->X);
+        for (i = 0; i < bytes; ++i) {
+            c[i] = m[i] ^ output[i];
+        }
+        ctx->left = CHACHA_CHUNK_BYTES - i;
+    }
 }
 
+
 /**
   * API to encrypt/decrypt a message of any size.
   */
@@ -1498,7 +330,7 @@
     }
     #endif
     if (IS_INTEL_AVX1(cpuidFlags)) {
-        chacha_encrypt_avx(ctx, input, output, msglen);
+        chacha_encrypt_avx1(ctx, input, output, msglen);
         return 0;
     }
     else {
@@ -1513,4 +345,5 @@
 
 #endif /* HAVE_CHACHA*/
 
+#endif /* WOLFSSL_ARMASM */
 
--- a/wolfcrypt/src/chacha20_poly1305.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/chacha20_poly1305.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* chacha.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -32,8 +32,6 @@
 #include <wolfssl/wolfcrypt/chacha20_poly1305.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/logging.h>
-#include <wolfssl/wolfcrypt/chacha.h>
-#include <wolfssl/wolfcrypt/poly1305.h>
 
 #ifdef NO_INLINE
 #include <wolfssl/wolfcrypt/misc.h>
@@ -42,20 +40,7 @@
 #include <wolfcrypt/src/misc.c>
 #endif
 
-#ifdef CHACHA_AEAD_TEST
-#include <stdio.h>
-#endif
-
 #define CHACHA20_POLY1305_AEAD_INITIAL_COUNTER  0
-#define CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT 16
-
-static void word32ToLittle64(const word32 inLittle32, byte outLittle64[8]);
-static int calculateAuthTag(
-                  const byte inAuthKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
-                  const byte* inAAD, const word32 inAADLen,
-                  const byte *inCiphertext, const word32 inCiphertextLen,
-                  byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]);
-
 int wc_ChaCha20Poly1305_Encrypt(
                 const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
                 const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
@@ -64,12 +49,10 @@
                 byte* outCiphertext,
                 byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
 {
-    int err;
-    byte poly1305Key[CHACHA20_POLY1305_AEAD_KEYSIZE];
-    ChaCha chaChaCtx;
+    int ret;
+    ChaChaPoly_Aead aead;
 
     /* Validate function arguments */
-
     if (!inKey || !inIV ||
         !inPlaintext || !inPlaintextLen ||
         !outCiphertext ||
@@ -78,35 +61,18 @@
         return BAD_FUNC_ARG;
     }
 
-    XMEMSET(poly1305Key, 0, sizeof(poly1305Key));
-
-    /* Create the Poly1305 key */
-    err = wc_Chacha_SetKey(&chaChaCtx, inKey, CHACHA20_POLY1305_AEAD_KEYSIZE);
-    if (err != 0) return err;
-
-    err = wc_Chacha_SetIV(&chaChaCtx, inIV,
-                           CHACHA20_POLY1305_AEAD_INITIAL_COUNTER);
-    if (err != 0) return err;
-
-    err = wc_Chacha_Process(&chaChaCtx, poly1305Key, poly1305Key,
-                             CHACHA20_POLY1305_AEAD_KEYSIZE);
-    if (err != 0) return err;
-
-    /* Encrypt the plaintext using ChaCha20 */
-    err = wc_Chacha_Process(&chaChaCtx, outCiphertext, inPlaintext,
-                            inPlaintextLen);
-    /* Calculate the Poly1305 auth tag */
-    if (err == 0)
-        err = calculateAuthTag(poly1305Key,
-                               inAAD, inAADLen,
-                               outCiphertext, inPlaintextLen,
-                               outAuthTag);
-    ForceZero(poly1305Key, sizeof(poly1305Key));
-
-    return err;
+    ret = wc_ChaCha20Poly1305_Init(&aead, inKey, inIV,
+        CHACHA20_POLY1305_AEAD_ENCRYPT);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_UpdateAad(&aead, inAAD, inAADLen);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_UpdateData(&aead, inPlaintext, outCiphertext,
+            inPlaintextLen);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_Final(&aead, outAuthTag);
+    return ret;
 }
 
-
 int wc_ChaCha20Poly1305_Decrypt(
                 const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
                 const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
@@ -115,13 +81,11 @@
                 const byte inAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE],
                 byte* outPlaintext)
 {
-    int err;
-    byte poly1305Key[CHACHA20_POLY1305_AEAD_KEYSIZE];
-    ChaCha chaChaCtx;
+    int ret;
+    ChaChaPoly_Aead aead;
     byte calculatedAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE];
 
     /* Validate function arguments */
-
     if (!inKey || !inIV ||
         !inCiphertext || !inCiphertextLen ||
         !inAuthTag ||
@@ -131,122 +95,193 @@
     }
 
     XMEMSET(calculatedAuthTag, 0, sizeof(calculatedAuthTag));
-    XMEMSET(poly1305Key, 0, sizeof(poly1305Key));
+
+    ret = wc_ChaCha20Poly1305_Init(&aead, inKey, inIV,
+        CHACHA20_POLY1305_AEAD_DECRYPT);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_UpdateAad(&aead, inAAD, inAADLen);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_UpdateData(&aead, inCiphertext, outPlaintext,
+            inCiphertextLen);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_Final(&aead, calculatedAuthTag);
+    if (ret == 0)
+        ret = wc_ChaCha20Poly1305_CheckTag(inAuthTag, calculatedAuthTag);
+    return ret;
+}
+
+int wc_ChaCha20Poly1305_CheckTag(
+    const byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE],
+    const byte authTagChk[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
+{
+    int ret = 0;
+    if (authTag == NULL || authTagChk == NULL) {
+        return BAD_FUNC_ARG;
+    }
+    if (ConstantCompare(authTag, authTagChk,
+            CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE) != 0) {
+        ret = MAC_CMP_FAILED_E;
+    }
+    return ret;
+}
+
+int wc_ChaCha20Poly1305_Init(ChaChaPoly_Aead* aead,
+    const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
+    const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
+    int isEncrypt)
+{
+    int ret;
+    byte authKey[CHACHA20_POLY1305_AEAD_KEYSIZE];
+
+    /* check arguments */
+    if (aead == NULL || inKey == NULL || inIV == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* setup aead context */
+    XMEMSET(aead, 0, sizeof(ChaChaPoly_Aead));
+    XMEMSET(authKey, 0, sizeof(authKey));
+    aead->isEncrypt = isEncrypt;
+
+    /* Initialize the ChaCha20 context (key and iv) */
+    ret = wc_Chacha_SetKey(&aead->chacha, inKey,
+        CHACHA20_POLY1305_AEAD_KEYSIZE);
+    if (ret == 0) {
+        ret = wc_Chacha_SetIV(&aead->chacha, inIV,
+            CHACHA20_POLY1305_AEAD_INITIAL_COUNTER);
+    }
 
     /* Create the Poly1305 key */
-    err = wc_Chacha_SetKey(&chaChaCtx, inKey, CHACHA20_POLY1305_AEAD_KEYSIZE);
-    if (err != 0) return err;
-
-    err = wc_Chacha_SetIV(&chaChaCtx, inIV,
-                           CHACHA20_POLY1305_AEAD_INITIAL_COUNTER);
-    if (err != 0) return err;
-
-    err = wc_Chacha_Process(&chaChaCtx, poly1305Key, poly1305Key,
-                             CHACHA20_POLY1305_AEAD_KEYSIZE);
-    if (err != 0) return err;
+    if (ret == 0) {
+        ret = wc_Chacha_Process(&aead->chacha, authKey, authKey,
+            CHACHA20_POLY1305_AEAD_KEYSIZE);
+    }
 
-    /* Calculate the Poly1305 auth tag */
-    err = calculateAuthTag(poly1305Key,
-                           inAAD, inAADLen,
-                           inCiphertext, inCiphertextLen,
-                           calculatedAuthTag);
-
-    /* Compare the calculated auth tag with the received one */
-    if (err == 0 && ConstantCompare(inAuthTag, calculatedAuthTag,
-                                    CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE) != 0)
-    {
-        err = MAC_CMP_FAILED_E;
+    /* Initialize Poly1305 context */
+    if (ret == 0) {
+        ret = wc_Poly1305SetKey(&aead->poly, authKey,
+            CHACHA20_POLY1305_AEAD_KEYSIZE);
     }
 
-    /* Decrypt the received ciphertext */
-    if (err == 0)
-        err = wc_Chacha_Process(&chaChaCtx, outPlaintext, inCiphertext,
-                                inCiphertextLen);
-    ForceZero(poly1305Key, sizeof(poly1305Key));
+    /* advance counter by 1 after creating Poly1305 key */
+    if (ret == 0) {
+        ret = wc_Chacha_SetIV(&aead->chacha, inIV,
+            CHACHA20_POLY1305_AEAD_INITIAL_COUNTER + 1);
+    }
 
-    return err;
+    if (ret == 0) {
+        aead->state = CHACHA20_POLY1305_STATE_READY;
+    }
+
+    return ret;
 }
 
-
-static int calculateAuthTag(
-                const byte inAuthKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
-                const byte *inAAD, const word32 inAADLen,
-                const byte *inCiphertext, const word32 inCiphertextLen,
-                 byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
+/* optional additional authentication data */
+int wc_ChaCha20Poly1305_UpdateAad(ChaChaPoly_Aead* aead,
+    const byte* inAAD, word32 inAADLen)
 {
-    int err;
-    Poly1305 poly1305Ctx;
-    byte padding[CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT - 1];
-    word32 paddingLen;
-    byte little64[16];
+    int ret = 0;
 
-    XMEMSET(padding, 0, sizeof(padding));
+    if (aead == NULL || (inAAD == NULL && inAADLen > 0)) {
+        return BAD_FUNC_ARG;
+    }
+    if (aead->state != CHACHA20_POLY1305_STATE_READY &&
+        aead->state != CHACHA20_POLY1305_STATE_AAD) {
+        return BAD_STATE_E;
+    }
+
+    if (inAAD && inAADLen > 0) {
+        ret = wc_Poly1305Update(&aead->poly, inAAD, inAADLen);
+        if (ret == 0) {
+            aead->aadLen += inAADLen;
+            aead->state = CHACHA20_POLY1305_STATE_AAD;
+        }
+    }
 
-    /* Initialize Poly1305 */
-    err = wc_Poly1305SetKey(&poly1305Ctx, inAuthKey,
-                            CHACHA20_POLY1305_AEAD_KEYSIZE);
-    if (err)
-        return err;
+    return ret;
+}
+
+/* inData and outData can be same pointer (inline) */
+int wc_ChaCha20Poly1305_UpdateData(ChaChaPoly_Aead* aead,
+    const byte* inData, byte* outData, word32 dataLen)
+{
+    int ret = 0;
 
-    /* Create the authTag by MAC'ing the following items: */
-    /* -- AAD */
-    if (inAAD && inAADLen)
-    {
-        err = wc_Poly1305Update(&poly1305Ctx, inAAD, inAADLen);
-        /* -- padding1: pad the AAD to 16 bytes */
-        paddingLen = -(int)inAADLen &
-                                  (CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT - 1);
-        if (paddingLen)
-            err += wc_Poly1305Update(&poly1305Ctx, padding, paddingLen);
+    if (aead == NULL || inData == NULL || outData == NULL) {
+        return BAD_FUNC_ARG;
+    }
+    if (aead->state != CHACHA20_POLY1305_STATE_READY &&
+        aead->state != CHACHA20_POLY1305_STATE_AAD &&
+        aead->state != CHACHA20_POLY1305_STATE_DATA) {
+        return BAD_STATE_E;
+    }
 
-        if (err)
-            return err;
+    /* Pad the AAD */
+    if (aead->state == CHACHA20_POLY1305_STATE_AAD) {
+        ret = wc_Poly1305_Pad(&aead->poly, aead->aadLen);
     }
 
-    /* -- Ciphertext */
-    err = wc_Poly1305Update(&poly1305Ctx, inCiphertext, inCiphertextLen);
-    if (err)
-        return err;
+    /* advance state */
+    aead->state = CHACHA20_POLY1305_STATE_DATA;
 
-    /* -- padding2: pad the ciphertext to 16 bytes */
-    paddingLen = -(int)inCiphertextLen &
-                                  (CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT - 1);
-    if (paddingLen)
-    {
-        err = wc_Poly1305Update(&poly1305Ctx, padding, paddingLen);
-        if (err)
-            return err;
+    /* Perform ChaCha20 encrypt/decrypt and Poly1305 auth calc */
+    if (ret == 0) {
+        if (aead->isEncrypt) {
+            ret = wc_Chacha_Process(&aead->chacha, outData, inData, dataLen);
+            if (ret == 0)
+                ret = wc_Poly1305Update(&aead->poly, outData, dataLen);
+        }
+        else {
+            ret = wc_Poly1305Update(&aead->poly, inData, dataLen);
+            if (ret == 0)
+                ret = wc_Chacha_Process(&aead->chacha, outData, inData, dataLen);
+        }
+    }
+    if (ret == 0) {
+        aead->dataLen += dataLen;
+    }
+    return ret;
+}
+
+int wc_ChaCha20Poly1305_Final(ChaChaPoly_Aead* aead,
+    byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
+{
+    int ret = 0;
+
+    if (aead == NULL || outAuthTag == NULL) {
+        return BAD_FUNC_ARG;
+    }
+    if (aead->state != CHACHA20_POLY1305_STATE_AAD &&
+        aead->state != CHACHA20_POLY1305_STATE_DATA) {
+        return BAD_STATE_E;
     }
 
-    /* -- AAD length as a 64-bit little endian integer */
-    word32ToLittle64(inAADLen, little64);
-    /* -- Ciphertext length as a 64-bit little endian integer */
-    word32ToLittle64(inCiphertextLen, little64 + 8);
-    err = wc_Poly1305Update(&poly1305Ctx, little64, sizeof(little64));
-    if (err)
-        return err;
+    /* Pad the AAD - Make sure it is done */
+    if (aead->state == CHACHA20_POLY1305_STATE_AAD) {
+        ret = wc_Poly1305_Pad(&aead->poly, aead->aadLen);
+    }
+
+    /* Pad the ciphertext to 16 bytes */
+    if (ret == 0) {
+        ret = wc_Poly1305_Pad(&aead->poly, aead->dataLen);
+    }
+
+    /* Add the aad length and plaintext/ciphertext length */
+    if (ret == 0) {
+        ret = wc_Poly1305_EncodeSizes(&aead->poly, aead->aadLen,
+            aead->dataLen);
+    }
 
     /* Finalize the auth tag */
-    err = wc_Poly1305Final(&poly1305Ctx, outAuthTag);
-
-    return err;
-}
-
+    if (ret == 0) {
+        ret = wc_Poly1305Final(&aead->poly, outAuthTag);
+    }
 
-static void word32ToLittle64(const word32 inLittle32, byte outLittle64[8])
-{
-#ifndef WOLFSSL_X86_64_BUILD
-    XMEMSET(outLittle64 + 4, 0, 4);
+    /* reset and cleanup sensitive context */
+    ForceZero(aead, sizeof(ChaChaPoly_Aead));
 
-    outLittle64[0] = (byte)(inLittle32 & 0x000000FF);
-    outLittle64[1] = (byte)((inLittle32 & 0x0000FF00) >> 8);
-    outLittle64[2] = (byte)((inLittle32 & 0x00FF0000) >> 16);
-    outLittle64[3] = (byte)((inLittle32 & 0xFF000000) >> 24);
-#else
-    *(word64*)outLittle64 = inLittle32;
-#endif
+    return ret;
 }
 
-
 #endif /* HAVE_CHACHA && HAVE_POLY1305 */
 
--- a/wolfcrypt/src/cmac.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/cmac.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* cmac.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/coding.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/coding.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* coding.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -36,10 +36,14 @@
 enum {
     BAD         = 0xFF,  /* invalid encoding */
     PAD         = '=',
-    PEM_LINE_SZ = 64
+    PEM_LINE_SZ = 64,
+    BASE64_MIN  = 0x2B,
+    BASE16_MIN  = 0x30,
 };
 
 
+#ifdef WOLFSSL_BASE64_DECODE
+
 static
 const byte base64Decode[] = { 62, BAD, BAD, BAD, 63,   /* + starts at 0x2B */
                               52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
@@ -53,27 +57,81 @@
                               46, 47, 48, 49, 50, 51
                             };
 
+static WC_INLINE int Base64_SkipNewline(const byte* in, word32 *inLen, word32 *outJ)
+{
+    word32 len = *inLen;
+    word32 j = *outJ;
+    if (len && (in[j] == ' ' || in[j] == '\r' || in[j] == '\n')) {
+        byte endLine = in[j++];
+        len--;
+        while (len && endLine == ' ') {   /* allow trailing whitespace */
+            endLine = in[j++];
+            len--;
+        }
+        if (endLine == '\r') {
+            if (len) {
+                endLine = in[j++];
+                len--;
+            }
+        }
+        if (endLine != '\n') {
+            WOLFSSL_MSG("Bad end of line in Base64 Decode");
+            return ASN_INPUT_E;
+        }
+    }
+    if (!len) {
+        return BUFFER_E;
+    }
+    *inLen = len;
+    *outJ = j;
+    return 0;
+}
 
 int Base64_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
 {
     word32 i = 0;
     word32 j = 0;
     word32 plainSz = inLen - ((inLen + (PEM_LINE_SZ - 1)) / PEM_LINE_SZ );
-    const byte maxIdx = (byte)sizeof(base64Decode) + 0x2B - 1;
+    int ret;
+    const byte maxIdx = (byte)sizeof(base64Decode) + BASE64_MIN - 1;
 
     plainSz = (plainSz * 3 + 3) / 4;
     if (plainSz > *outLen) return BAD_FUNC_ARG;
 
     while (inLen > 3) {
-        byte b1, b2, b3;
-        byte e1 = in[j++];
-        byte e2 = in[j++];
-        byte e3 = in[j++];
-        byte e4 = in[j++];
-
         int pad3 = 0;
         int pad4 = 0;
 
+        byte b1, b2, b3;
+        byte e1, e2, e3, e4;
+        if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+            if (ret == BUFFER_E) {
+                /* Running out of buffer here is not an error */
+                break;
+            }
+            return ret;
+        }
+        e1 = in[j++];
+        if (e1 == '\0') {
+            break;
+        }
+        inLen--;
+        if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+            return ret;
+        }
+        e2 = in[j++];
+        inLen--;
+        if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+            return ret;
+        }
+        e3 = in[j++];
+        inLen--;
+        if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+            return ret;
+        }
+        e4 = in[j++];
+        inLen--;
+
         if (e1 == 0)            /* end file 0's */
             break;
         if (e3 == PAD)
@@ -81,7 +139,7 @@
         if (e4 == PAD)
             pad4 = 1;
 
-        if (e1 < 0x2B || e2 < 0x2B || e3 < 0x2B || e4 < 0x2B) {
+        if (e1 < BASE64_MIN || e2 < BASE64_MIN || e3 < BASE64_MIN || e4 < BASE64_MIN) {
             WOLFSSL_MSG("Bad Base64 Decode data, too small");
             return ASN_INPUT_E;
         }
@@ -91,10 +149,15 @@
             return ASN_INPUT_E;
         }
 
-        e1 = base64Decode[e1 - 0x2B];
-        e2 = base64Decode[e2 - 0x2B];
-        e3 = (e3 == PAD) ? 0 : base64Decode[e3 - 0x2B];
-        e4 = (e4 == PAD) ? 0 : base64Decode[e4 - 0x2B];
+        if (i + 1 + !pad3 + !pad4 > *outLen) {
+            WOLFSSL_MSG("Bad Base64 Decode out buffer, too small");
+            return BAD_FUNC_ARG;
+        }
+
+        e1 = base64Decode[e1 - BASE64_MIN];
+        e2 = base64Decode[e2 - BASE64_MIN];
+        e3 = (e3 == PAD) ? 0 : base64Decode[e3 - BASE64_MIN];
+        e4 = (e4 == PAD) ? 0 : base64Decode[e4 - BASE64_MIN];
 
         b1 = (byte)((e1 << 2) | (e2 >> 4));
         b2 = (byte)(((e2 & 0xF) << 4) | (e3 >> 2));
@@ -107,32 +170,17 @@
             out[i++] = b3;
         else
             break;
+    }
+/* If the output buffer has a room for an extra byte, add a null terminator */
+    if (out && *outLen > i)
+        out[i]= '\0';
 
-        inLen -= 4;
-        if (inLen && (in[j] == ' ' || in[j] == '\r' || in[j] == '\n')) {
-            byte endLine = in[j++];
-            inLen--;
-            while (inLen && endLine == ' ') {   /* allow trailing whitespace */
-                endLine = in[j++];
-                inLen--;
-            }
-            if (endLine == '\r') {
-                if (inLen) {
-                    endLine = in[j++];
-                    inLen--;
-                }
-            }
-            if (endLine != '\n') {
-                WOLFSSL_MSG("Bad end of line in Base64 Decode");
-                return ASN_INPUT_E;
-            }
-        }
-    }
     *outLen = i;
 
     return 0;
 }
 
+#endif /* WOLFSSL_BASE64_DECODE */
 
 #if defined(WOLFSSL_BASE64_ENCODE)
 
@@ -281,7 +329,7 @@
         inLen -= 3;
 
         /* Insert newline after PEM_LINE_SZ, unless no \n requested */
-        if (escaped != WC_NO_NL_ENC && (++n % (PEM_LINE_SZ/4)) == 0 && inLen){
+        if (escaped != WC_NO_NL_ENC && (++n % (PEM_LINE_SZ/4)) == 0 && inLen) {
             ret = CEscape(escaped, '\n', out, &i, *outLen, 1, getSzOnly);
             if (ret != 0) break;
         }
@@ -318,10 +366,15 @@
 
     if (i != outSz && escaped != 1 && ret == 0)
         return ASN_INPUT_E;
+/* If the output buffer has a room for an extra byte, add a null terminator */
+    if (out && *outLen > i)
+        out[i]= '\0';
 
     *outLen = i;
-    if(ret == 0)
+
+    if (ret == 0)
         return getSzOnly ? LENGTH_ONLY_E : 0;
+
     return ret;
 }
 
@@ -369,7 +422,7 @@
         return BAD_FUNC_ARG;
 
     if (inLen == 1 && *outLen && in) {
-        byte b = in[inIdx++] - 0x30;  /* 0 starts at 0x30 */
+        byte b = in[inIdx++] - BASE16_MIN;  /* 0 starts at 0x30 */
 
         /* sanity check */
         if (b >=  sizeof(hexDecode)/sizeof(hexDecode[0]))
@@ -393,8 +446,8 @@
         return BAD_FUNC_ARG;
 
     while (inLen) {
-        byte b  = in[inIdx++] - 0x30;  /* 0 starts at 0x30 */
-        byte b2 = in[inIdx++] - 0x30;
+        byte b  = in[inIdx++] - BASE16_MIN;  /* 0 starts at 0x30 */
+        byte b2 = in[inIdx++] - BASE16_MIN;
 
         /* sanity checks */
         if (b >=  sizeof(hexDecode)/sizeof(hexDecode[0]))
--- a/wolfcrypt/src/compress.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/compress.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* compress.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -67,7 +67,6 @@
 #endif
 
 
-int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 flags)
 /*
  * out - pointer to destination buffer
  * outSz - size of destination buffer
@@ -84,6 +83,8 @@
  * add to the size of the output. The libz code says the compressed
  * buffer should be srcSz + 0.1% + 12.
  */
+int wc_Compress_ex(byte* out, word32 outSz, const byte* in, word32 inSz,
+    word32 flags, word32 windowBits)
 {
     z_stream stream;
     int result = 0;
@@ -103,7 +104,8 @@
     stream.opaque = (voidpf)0;
 
     if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-                     DEFLATE_DEFAULT_WINDOWBITS, DEFLATE_DEFAULT_MEMLEVEL,
+                     DEFLATE_DEFAULT_WINDOWBITS | windowBits,
+                     DEFLATE_DEFAULT_MEMLEVEL,
                      flags ? Z_FIXED : Z_DEFAULT_STRATEGY) != Z_OK)
         return COMPRESS_INIT_E;
 
@@ -120,14 +122,32 @@
     return result;
 }
 
+int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 flags)
+{
+    return wc_Compress_ex(out, outSz, in, inSz, flags, 0);
+}
 
-int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz)
+
+/* windowBits:
+* deflateInit() and inflateInit(), as well as deflateInit2() and inflateInit2()
+    with windowBits in 0..15 all process zlib-wrapped deflate data.
+    (See RFC 1950 and RFC 1951.)
+* deflateInit2() and inflateInit2() with negative windowBits in -1..-15 process
+    raw deflate data with no header or trailer.
+* deflateInit2() and inflateInit2() with windowBits in 16..31, i.e. 16
+    added to 0..15, process gzip-wrapped deflate data (RFC 1952).
+* inflateInit2() with windowBits in 32..47 (32 added to 0..15) will
+    automatically detect either a gzip or zlib header (but not raw deflate
+    data), and decompress accordingly.
+*/
+int wc_DeCompress_ex(byte* out, word32 outSz, const byte* in, word32 inSz,
+    int windowBits)
 /*
  * out - pointer to destination buffer
  * outSz - size of destination buffer
  * in - pointer to source buffer to compress
  * inSz - size of source to compress
- * flags - flags to control how compress operates
+ * windowBits - flags to control how decompress operates
  *
  * return:
  *    negative - error code
@@ -150,10 +170,11 @@
     stream.zfree = (free_func)myFree;
     stream.opaque = (voidpf)0;
 
-    if (inflateInit2(&stream, DEFLATE_DEFAULT_WINDOWBITS) != Z_OK)
+    if (inflateInit2(&stream, DEFLATE_DEFAULT_WINDOWBITS | windowBits) != Z_OK)
         return DECOMPRESS_INIT_E;
 
-    if (inflate(&stream, Z_FINISH) != Z_STREAM_END) {
+    result = inflate(&stream, Z_FINISH);
+    if (result != Z_STREAM_END) {
         inflateEnd(&stream);
         return DECOMPRESS_E;
     }
@@ -167,6 +188,12 @@
 }
 
 
+int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz)
+{
+    return wc_DeCompress_ex(out, outSz, in, inSz, 0);
+}
+
+
 #endif /* HAVE_LIBZ */
 
 
--- a/wolfcrypt/src/cpuid.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/cpuid.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* cpuid.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -95,7 +95,7 @@
             if (cpuid_flag(7, 0, EBX,  8)) { cpuid_flags |= CPUID_BMI2  ; }
             if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
             if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
-            if (cpuid_flag(1, 0, ECX, 26)) { cpuid_flags |= CPUID_AESNI ; }
+            if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; }
             if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX   ; }
             cpuid_check = 1;
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/cryptocb.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,649 @@
+/* cryptocb.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This framework provides a central place for crypto hardware integration
+   using the devId scheme. If not supported return `CRYPTOCB_UNAVAILABLE`. */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef WOLF_CRYPTO_CB
+
+#include <wolfssl/wolfcrypt/cryptocb.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+
+
+/* TODO: Consider linked list with mutex */
+#ifndef MAX_CRYPTO_DEVID_CALLBACKS
+#define MAX_CRYPTO_DEVID_CALLBACKS 8
+#endif
+
+typedef struct CryptoCb {
+    int devId;
+    CryptoDevCallbackFunc cb;
+    void* ctx;
+} CryptoCb;
+static WOLFSSL_GLOBAL CryptoCb gCryptoDev[MAX_CRYPTO_DEVID_CALLBACKS];
+
+static CryptoCb* wc_CryptoCb_FindDevice(int devId)
+{
+    int i;
+    for (i=0; i<MAX_CRYPTO_DEVID_CALLBACKS; i++) {
+        if (gCryptoDev[i].devId == devId)
+            return &gCryptoDev[i];
+    }
+    return NULL;
+}
+static CryptoCb* wc_CryptoCb_FindDeviceByIndex(int startIdx)
+{
+    int i;
+    for (i=startIdx; i<MAX_CRYPTO_DEVID_CALLBACKS; i++) {
+        if (gCryptoDev[i].devId != INVALID_DEVID)
+            return &gCryptoDev[i];
+    }
+    return NULL;
+}
+
+static WC_INLINE int wc_CryptoCb_TranslateErrorCode(int ret)
+{
+    if (ret == NOT_COMPILED_IN) {
+        /* backwards compatibility for older NOT_COMPILED_IN syntax */
+        ret = CRYPTOCB_UNAVAILABLE;
+    }
+    return ret;
+}
+
+void wc_CryptoCb_Init(void)
+{
+    int i;
+    for (i=0; i<MAX_CRYPTO_DEVID_CALLBACKS; i++) {
+        gCryptoDev[i].devId = INVALID_DEVID;
+    }
+}
+
+int wc_CryptoCb_RegisterDevice(int devId, CryptoDevCallbackFunc cb, void* ctx)
+{
+    /* find existing or new */
+    CryptoCb* dev = wc_CryptoCb_FindDevice(devId);
+    if (dev == NULL)
+        dev = wc_CryptoCb_FindDevice(INVALID_DEVID);
+
+    if (dev == NULL)
+        return BUFFER_E; /* out of devices */
+
+    dev->devId = devId;
+    dev->cb = cb;
+    dev->ctx = ctx;
+
+    return 0;
+}
+
+void wc_CryptoCb_UnRegisterDevice(int devId)
+{
+    CryptoCb* dev = wc_CryptoCb_FindDevice(devId);
+    if (dev) {
+        XMEMSET(dev, 0, sizeof(*dev));
+        dev->devId = INVALID_DEVID;
+    }
+}
+
+#ifndef NO_RSA
+int wc_CryptoCb_Rsa(const byte* in, word32 inLen, byte* out,
+    word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (key == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(key->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+        cryptoInfo.pk.type = WC_PK_TYPE_RSA;
+        cryptoInfo.pk.rsa.in = in;
+        cryptoInfo.pk.rsa.inLen = inLen;
+        cryptoInfo.pk.rsa.out = out;
+        cryptoInfo.pk.rsa.outLen = outLen;
+        cryptoInfo.pk.rsa.type = type;
+        cryptoInfo.pk.rsa.key = key;
+        cryptoInfo.pk.rsa.rng = rng;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+#ifdef WOLFSSL_KEY_GEN
+int wc_CryptoCb_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (key == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(key->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+        cryptoInfo.pk.type = WC_PK_TYPE_RSA_KEYGEN;
+        cryptoInfo.pk.rsakg.key = key;
+        cryptoInfo.pk.rsakg.size = size;
+        cryptoInfo.pk.rsakg.e = e;
+        cryptoInfo.pk.rsakg.rng = rng;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif
+#endif /* !NO_RSA */
+
+#ifdef HAVE_ECC
+int wc_CryptoCb_MakeEccKey(WC_RNG* rng, int keySize, ecc_key* key, int curveId)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (key == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(key->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+        cryptoInfo.pk.type = WC_PK_TYPE_EC_KEYGEN;
+        cryptoInfo.pk.eckg.rng = rng;
+        cryptoInfo.pk.eckg.size = keySize;
+        cryptoInfo.pk.eckg.key = key;
+        cryptoInfo.pk.eckg.curveId = curveId;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_Ecdh(ecc_key* private_key, ecc_key* public_key,
+    byte* out, word32* outlen)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (private_key == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(private_key->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+        cryptoInfo.pk.type = WC_PK_TYPE_ECDH;
+        cryptoInfo.pk.ecdh.private_key = private_key;
+        cryptoInfo.pk.ecdh.public_key = public_key;
+        cryptoInfo.pk.ecdh.out = out;
+        cryptoInfo.pk.ecdh.outlen = outlen;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_EccSign(const byte* in, word32 inlen, byte* out,
+    word32 *outlen, WC_RNG* rng, ecc_key* key)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (key == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(key->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+        cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_SIGN;
+        cryptoInfo.pk.eccsign.in = in;
+        cryptoInfo.pk.eccsign.inlen = inlen;
+        cryptoInfo.pk.eccsign.out = out;
+        cryptoInfo.pk.eccsign.outlen = outlen;
+        cryptoInfo.pk.eccsign.rng = rng;
+        cryptoInfo.pk.eccsign.key = key;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_EccVerify(const byte* sig, word32 siglen,
+    const byte* hash, word32 hashlen, int* res, ecc_key* key)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (key == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(key->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+        cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_VERIFY;
+        cryptoInfo.pk.eccverify.sig = sig;
+        cryptoInfo.pk.eccverify.siglen = siglen;
+        cryptoInfo.pk.eccverify.hash = hash;
+        cryptoInfo.pk.eccverify.hashlen = hashlen;
+        cryptoInfo.pk.eccverify.res = res;
+        cryptoInfo.pk.eccverify.key = key;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* HAVE_ECC */
+
+#ifndef NO_AES
+#ifdef HAVE_AESGCM
+int wc_CryptoCb_AesGcmEncrypt(Aes* aes, byte* out,
+                               const byte* in, word32 sz,
+                               const byte* iv, word32 ivSz,
+                               byte* authTag, word32 authTagSz,
+                               const byte* authIn, word32 authInSz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (aes) {
+        dev = wc_CryptoCb_FindDevice(aes->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+        cryptoInfo.cipher.type = WC_CIPHER_AES_GCM;
+        cryptoInfo.cipher.enc = 1;
+        cryptoInfo.cipher.aesgcm_enc.aes       = aes;
+        cryptoInfo.cipher.aesgcm_enc.out       = out;
+        cryptoInfo.cipher.aesgcm_enc.in        = in;
+        cryptoInfo.cipher.aesgcm_enc.sz        = sz;
+        cryptoInfo.cipher.aesgcm_enc.iv        = iv;
+        cryptoInfo.cipher.aesgcm_enc.ivSz      = ivSz;
+        cryptoInfo.cipher.aesgcm_enc.authTag   = authTag;
+        cryptoInfo.cipher.aesgcm_enc.authTagSz = authTagSz;
+        cryptoInfo.cipher.aesgcm_enc.authIn    = authIn;
+        cryptoInfo.cipher.aesgcm_enc.authInSz  = authInSz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_AesGcmDecrypt(Aes* aes, byte* out,
+                               const byte* in, word32 sz,
+                               const byte* iv, word32 ivSz,
+                               const byte* authTag, word32 authTagSz,
+                               const byte* authIn, word32 authInSz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (aes) {
+        dev = wc_CryptoCb_FindDevice(aes->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+        cryptoInfo.cipher.type = WC_CIPHER_AES_GCM;
+        cryptoInfo.cipher.enc = 0;
+        cryptoInfo.cipher.aesgcm_dec.aes       = aes;
+        cryptoInfo.cipher.aesgcm_dec.out       = out;
+        cryptoInfo.cipher.aesgcm_dec.in        = in;
+        cryptoInfo.cipher.aesgcm_dec.sz        = sz;
+        cryptoInfo.cipher.aesgcm_dec.iv        = iv;
+        cryptoInfo.cipher.aesgcm_dec.ivSz      = ivSz;
+        cryptoInfo.cipher.aesgcm_dec.authTag   = authTag;
+        cryptoInfo.cipher.aesgcm_dec.authTagSz = authTagSz;
+        cryptoInfo.cipher.aesgcm_dec.authIn    = authIn;
+        cryptoInfo.cipher.aesgcm_dec.authInSz  = authInSz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* HAVE_AESGCM */
+
+#ifdef HAVE_AES_CBC
+int wc_CryptoCb_AesCbcEncrypt(Aes* aes, byte* out,
+                               const byte* in, word32 sz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (aes) {
+        dev = wc_CryptoCb_FindDevice(aes->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+        cryptoInfo.cipher.type = WC_CIPHER_AES_CBC;
+        cryptoInfo.cipher.enc = 1;
+        cryptoInfo.cipher.aescbc.aes = aes;
+        cryptoInfo.cipher.aescbc.out = out;
+        cryptoInfo.cipher.aescbc.in = in;
+        cryptoInfo.cipher.aescbc.sz = sz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_AesCbcDecrypt(Aes* aes, byte* out,
+                               const byte* in, word32 sz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (aes) {
+        dev = wc_CryptoCb_FindDevice(aes->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+        cryptoInfo.cipher.type = WC_CIPHER_AES_CBC;
+        cryptoInfo.cipher.enc = 0;
+        cryptoInfo.cipher.aescbc.aes = aes;
+        cryptoInfo.cipher.aescbc.out = out;
+        cryptoInfo.cipher.aescbc.in = in;
+        cryptoInfo.cipher.aescbc.sz = sz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* HAVE_AES_CBC */
+#endif /* !NO_AES */
+
+#ifndef NO_DES3
+int wc_CryptoCb_Des3Encrypt(Des3* des3, byte* out,
+                               const byte* in, word32 sz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (des3) {
+        dev = wc_CryptoCb_FindDevice(des3->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+        cryptoInfo.cipher.type = WC_CIPHER_DES3;
+        cryptoInfo.cipher.enc = 1;
+        cryptoInfo.cipher.des3.des = des3;
+        cryptoInfo.cipher.des3.out = out;
+        cryptoInfo.cipher.des3.in = in;
+        cryptoInfo.cipher.des3.sz = sz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_Des3Decrypt(Des3* des3, byte* out,
+                               const byte* in, word32 sz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (des3) {
+        dev = wc_CryptoCb_FindDevice(des3->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+        cryptoInfo.cipher.type = WC_CIPHER_DES3;
+        cryptoInfo.cipher.enc = 0;
+        cryptoInfo.cipher.des3.des = des3;
+        cryptoInfo.cipher.des3.out = out;
+        cryptoInfo.cipher.des3.in = in;
+        cryptoInfo.cipher.des3.sz = sz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_DES3 */
+
+#ifndef NO_SHA
+int wc_CryptoCb_ShaHash(wc_Sha* sha, const byte* in,
+    word32 inSz, byte* digest)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (sha) {
+        dev = wc_CryptoCb_FindDevice(sha->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_HASH;
+        cryptoInfo.hash.type = WC_HASH_TYPE_SHA;
+        cryptoInfo.hash.sha1 = sha;
+        cryptoInfo.hash.in = in;
+        cryptoInfo.hash.inSz = inSz;
+        cryptoInfo.hash.digest = digest;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_SHA */
+
+#ifndef NO_SHA256
+int wc_CryptoCb_Sha256Hash(wc_Sha256* sha256, const byte* in,
+    word32 inSz, byte* digest)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (sha256) {
+        dev = wc_CryptoCb_FindDevice(sha256->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_HASH;
+        cryptoInfo.hash.type = WC_HASH_TYPE_SHA256;
+        cryptoInfo.hash.sha256 = sha256;
+        cryptoInfo.hash.in = in;
+        cryptoInfo.hash.inSz = inSz;
+        cryptoInfo.hash.digest = digest;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_SHA256 */
+
+#ifndef NO_HMAC
+int wc_CryptoCb_Hmac(Hmac* hmac, int macType, const byte* in, word32 inSz,
+    byte* digest)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    if (hmac == NULL)
+        return ret;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(hmac->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_HMAC;
+        cryptoInfo.hmac.macType = macType;
+        cryptoInfo.hmac.in = in;
+        cryptoInfo.hmac.inSz = inSz;
+        cryptoInfo.hmac.digest = digest;
+        cryptoInfo.hmac.hmac = hmac;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_HMAC */
+
+#ifndef WC_NO_RNG
+int wc_CryptoCb_RandomBlock(WC_RNG* rng, byte* out, word32 sz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    if (rng) {
+        dev = wc_CryptoCb_FindDevice(rng->devId);
+    }
+    else {
+        /* locate first callback and try using it */
+        dev = wc_CryptoCb_FindDeviceByIndex(0);
+    }
+
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_RNG;
+        cryptoInfo.rng.rng = rng;
+        cryptoInfo.rng.out = out;
+        cryptoInfo.rng.sz = sz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_RandomSeed(OS_Seed* os, byte* seed, word32 sz)
+{
+    int ret = CRYPTOCB_UNAVAILABLE;
+    CryptoCb* dev;
+
+    /* locate registered callback */
+    dev = wc_CryptoCb_FindDevice(os->devId);
+    if (dev && dev->cb) {
+        wc_CryptoInfo cryptoInfo;
+        XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+        cryptoInfo.algo_type = WC_ALGO_TYPE_SEED;
+        cryptoInfo.seed.os = os;
+        cryptoInfo.seed.seed = seed;
+        cryptoInfo.seed.sz = sz;
+
+        ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+    }
+
+    return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !WC_NO_RNG */
+
+#endif /* WOLF_CRYPTO_CB */
+
--- a/wolfcrypt/src/cryptodev.c	Sat Aug 18 22:20:43 2018 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,208 +0,0 @@
-/* cryptodev.c
- *
- * Copyright (C) 2006-2018 wolfSSL Inc.
- *
- * This file is part of wolfSSL.
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/* This framework provides a central place for crypto hardware integration
-   using the devId scheme. If not supported return `NOT_COMPILED_IN`. */
-
-#ifdef HAVE_CONFIG_H
-    #include <config.h>
-#endif
-
-#include <wolfssl/wolfcrypt/settings.h>
-
-#ifdef WOLF_CRYPTO_DEV
-
-#include <wolfssl/wolfcrypt/cryptodev.h>
-#include <wolfssl/wolfcrypt/error-crypt.h>
-#include <wolfssl/wolfcrypt/logging.h>
-
-
-/* TODO: Consider linked list with mutex */
-#ifndef MAX_CRYPTO_DEVICES
-#define MAX_CRYPTO_DEVICES 8
-#endif
-
-typedef struct CryptoDev {
-    int devId;
-    CryptoDevCallbackFunc cb;
-    void* ctx;
-} CryptoDev;
-static CryptoDev gCryptoDev[MAX_CRYPTO_DEVICES];
-
-static CryptoDev* wc_CryptoDev_FindDevice(int devId)
-{
-    int i;
-    for (i=0; i<MAX_CRYPTO_DEVICES; i++) {
-        if (gCryptoDev[i].devId == devId)
-            return &gCryptoDev[i];
-    }
-    return NULL;
-}
-
-void wc_CryptoDev_Init(void)
-{
-    int i;
-    for (i=0; i<MAX_CRYPTO_DEVICES; i++)
-        gCryptoDev[i].devId = INVALID_DEVID;
-}
-
-int wc_CryptoDev_RegisterDevice(int devId, CryptoDevCallbackFunc cb, void* ctx)
-{
-    /* find existing or new */
-    CryptoDev* dev = wc_CryptoDev_FindDevice(devId);
-    if (dev == NULL)
-        dev = wc_CryptoDev_FindDevice(INVALID_DEVID);
-
-    if (dev == NULL)
-        return BUFFER_E; /* out of devices */
-
-    dev->devId = devId;
-    dev->cb = cb;
-    dev->ctx = ctx;
-
-    return 0;
-}
-
-void wc_CryptoDev_UnRegisterDevice(int devId)
-{
-    CryptoDev* dev = wc_CryptoDev_FindDevice(devId);
-    if (dev) {
-        XMEMSET(dev, 0, sizeof(*dev));
-        dev->devId = INVALID_DEVID;
-    }
-}
-
-#ifndef NO_RSA
-int wc_CryptoDev_Rsa(const byte* in, word32 inLen, byte* out,
-    word32* outLen, int type, RsaKey* key, WC_RNG* rng)
-{
-    int ret = NOT_COMPILED_IN;
-    CryptoDev* dev;
-
-    /* locate registered callback */
-    dev = wc_CryptoDev_FindDevice(key->devId);
-    if (dev) {
-        if (dev->cb) {
-            wc_CryptoInfo cryptoInfo;
-            XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
-            cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
-            cryptoInfo.pk.type = WC_PK_TYPE_RSA;
-            cryptoInfo.pk.rsa.in = in;
-            cryptoInfo.pk.rsa.inLen = inLen;
-            cryptoInfo.pk.rsa.out = out;
-            cryptoInfo.pk.rsa.outLen = outLen;
-            cryptoInfo.pk.rsa.type = type;
-            cryptoInfo.pk.rsa.key = key;
-            cryptoInfo.pk.rsa.rng = rng;
-
-            ret = dev->cb(key->devId, &cryptoInfo, dev->ctx);
-        }
-    }
-
-    return ret;
-}
-#endif /* !NO_RSA */
-
-#ifdef HAVE_ECC
-int wc_CryptoDev_Ecdh(ecc_key* private_key, ecc_key* public_key,
-    byte* out, word32* outlen)
-{
-    int ret = NOT_COMPILED_IN;
-    CryptoDev* dev;
-
-    /* locate registered callback */
-    dev = wc_CryptoDev_FindDevice(private_key->devId);
-    if (dev) {
-        if (dev->cb) {
-            wc_CryptoInfo cryptoInfo;
-            XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
-            cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
-            cryptoInfo.pk.type = WC_PK_TYPE_ECDH;
-            cryptoInfo.pk.ecdh.private_key = private_key;
-            cryptoInfo.pk.ecdh.public_key = public_key;
-            cryptoInfo.pk.ecdh.out = out;
-            cryptoInfo.pk.ecdh.outlen = outlen;
-
-            ret = dev->cb(private_key->devId, &cryptoInfo, dev->ctx);
-        }
-    }
-
-    return ret;
-}
-
-int wc_CryptoDev_EccSign(const byte* in, word32 inlen, byte* out,
-    word32 *outlen, WC_RNG* rng, ecc_key* key)
-{
-    int ret = NOT_COMPILED_IN;
-    CryptoDev* dev;
-
-    /* locate registered callback */
-    dev = wc_CryptoDev_FindDevice(key->devId);
-    if (dev) {
-        if (dev->cb) {
-            wc_CryptoInfo cryptoInfo;
-            XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
-            cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
-            cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_SIGN;
-            cryptoInfo.pk.eccsign.in = in;
-            cryptoInfo.pk.eccsign.inlen = inlen;
-            cryptoInfo.pk.eccsign.out = out;
-            cryptoInfo.pk.eccsign.outlen = outlen;
-            cryptoInfo.pk.eccsign.rng = rng;
-            cryptoInfo.pk.eccsign.key = key;
-
-            ret = dev->cb(key->devId, &cryptoInfo, dev->ctx);
-        }
-    }
-
-    return ret;
-}
-
-int wc_CryptoDev_EccVerify(const byte* sig, word32 siglen,
-    const byte* hash, word32 hashlen, int* res, ecc_key* key)
-{
-    int ret = NOT_COMPILED_IN;
-    CryptoDev* dev;
-
-    /* locate registered callback */
-    dev = wc_CryptoDev_FindDevice(key->devId);
-    if (dev) {
-        if (dev->cb) {
-            wc_CryptoInfo cryptoInfo;
-            XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
-            cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
-            cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_VERIFY;
-            cryptoInfo.pk.eccverify.sig = sig;
-            cryptoInfo.pk.eccverify.siglen = siglen;
-            cryptoInfo.pk.eccverify.hash = hash;
-            cryptoInfo.pk.eccverify.hashlen = hashlen;
-            cryptoInfo.pk.eccverify.res = res;
-            cryptoInfo.pk.eccverify.key = key;
-
-            ret = dev->cb(key->devId, &cryptoInfo, dev->ctx);
-        }
-    }
-
-    return ret;
-}
-#endif /* HAVE_ECC */
-
-#endif /* WOLF_CRYPTO_DEV */
-
--- a/wolfcrypt/src/curve25519.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/curve25519.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* curve25519.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -257,6 +257,64 @@
     return 0;
 }
 
+/* Check the public key value (big or little endian)
+ *
+ * pub     Public key bytes.
+ * pubSz   Size of public key in bytes.
+ * endian  Public key bytes passed in as big-endian or little-endian.
+ * returns BAD_FUNC_ARGS when pub is NULL,
+ *         BUFFER_E when size of public key is zero;
+ *         ECC_OUT_OF_RANGE_E if the high bit is set;
+ *         ECC_BAD_ARG_E if key length is not 32 bytes, public key value is
+ *         zero or one; and
+ *         0 otherwise.
+ */
+int wc_curve25519_check_public(const byte* pub, word32 pubSz, int endian)
+{
+    word32 i;
+
+    if (pub == NULL)
+        return BAD_FUNC_ARG;
+
+    /* Check for empty key data */
+    if (pubSz == 0)
+        return BUFFER_E;
+
+    /* Check key length */
+    if (pubSz != CURVE25519_KEYSIZE)
+        return ECC_BAD_ARG_E;
+
+
+    if (endian == EC25519_LITTLE_ENDIAN) {
+        /* Check for value of zero or one */
+        for (i = pubSz - 1; i > 0; i--) {
+            if (pub[i] != 0)
+                break;
+        }
+        if (i == 0 && (pub[0] == 0 || pub[0] == 1))
+            return ECC_BAD_ARG_E;
+
+        /* Check high bit set */
+        if (pub[CURVE25519_KEYSIZE-1] & 0x80)
+            return ECC_OUT_OF_RANGE_E;
+    }
+    else {
+        /* Check for value of zero or one */
+        for (i = 0; i < pubSz-1; i++) {
+            if (pub[i] != 0)
+                break;
+        }
+        if (i == pubSz - 1 && (pub[i] == 0 || pub[i] == 1))
+            return ECC_BAD_ARG_E;
+
+        /* Check high bit set */
+        if (pub[0] & 0x80)
+            return ECC_OUT_OF_RANGE_E;
+    }
+
+    return 0;
+}
+
 #endif /* HAVE_CURVE25519_KEY_IMPORT */
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/curve448.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,636 @@
+/* curve448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implemented to: RFC 7748 */
+
+/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
+ * Reworked for curve448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_CURVE448
+
+#include <wolfssl/wolfcrypt/curve448.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+
+/* Make a new curve448 private/public key.
+ *
+ * rng      [in]  Random number generator.
+ * keysize  [in]  Size of the key to generate.
+ * key      [in]  Curve448 key object.
+ * returns BAD_FUNC_ARG when rng or key are NULL,
+ *         ECC_BAD_ARG_E when keysize is not CURVE448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_make_key(WC_RNG* rng, int keysize, curve448_key* key)
+{
+    unsigned char basepoint[CURVE448_KEY_SIZE] = {5};
+    int  ret = 0;
+
+    if ((key == NULL) || (rng == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* currently only a key size of 56 bytes is used */
+    if ((ret == 0) && (keysize != CURVE448_KEY_SIZE)) {
+        ret = ECC_BAD_ARG_E;
+    }
+
+    if (ret == 0) {
+        fe448_init();
+
+        /* random number for private key */
+        ret = wc_RNG_GenerateBlock(rng, key->k, keysize);
+    }
+    if (ret == 0) {
+        /* Clamp the private key */
+        key->k[0] &= 0xfc;
+        key->k[CURVE448_KEY_SIZE-1] |= 0x80;
+
+        /* compute public key */
+        ret = curve448(key->p, key->k, basepoint);
+        if (ret != 0) {
+            ForceZero(key->k, keysize);
+            ForceZero(key->p, keysize);
+        }
+    }
+
+    return ret;
+}
+
+#ifdef HAVE_CURVE448_SHARED_SECRET
+
+/* Calculate the shared secret from the private key and peer's public key.
+ * Calculation over curve448.
+ * Secret encoded big-endian.
+ *
+ * private_key  [in]      Curve448 private key.
+ * public_key   [in]      Curve448 public key.
+ * out          [in]      Array to hold shared secret.
+ * outLen       [in/out]  On in, the number of bytes in array.
+ *                        On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL or outLen is less than
+ *         CURVE448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_shared_secret(curve448_key* private_key,
+                              curve448_key* public_key,
+                              byte* out, word32* outLen)
+{
+    return wc_curve448_shared_secret_ex(private_key, public_key, out, outLen,
+                                        EC448_BIG_ENDIAN);
+}
+
+/* Calculate the shared secret from the private key and peer's public key.
+ * Calculation over curve448.
+ *
+ * private_key  [in]      Curve448 private key.
+ * public_key   [in]      Curve448 public key.
+ * out          [in]      Array to hold shared secret.
+ * outLen       [in/out]  On in, the number of bytes in array.
+ *                        On out, the number bytes put into array.
+ * endian       [in]      Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL or outLen is less than
+ *         CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_shared_secret_ex(curve448_key* private_key,
+                                 curve448_key* public_key,
+                                 byte* out, word32* outLen, int endian)
+{
+    unsigned char o[CURVE448_PUB_KEY_SIZE];
+    int ret = 0;
+    int i;
+
+    /* sanity check */
+    if ((private_key == NULL) || (public_key == NULL) || (out == NULL) ||
+                        (outLen == NULL) || (*outLen < CURVE448_PUB_KEY_SIZE)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        ret = curve448(o, private_key->k, public_key->p);
+    }
+    if (ret == 0) {
+        if (endian == EC448_BIG_ENDIAN) {
+            /* put shared secret key in Big Endian format */
+            for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) {
+                 out[i] = o[CURVE448_PUB_KEY_SIZE - i -1];
+            }
+        }
+        else {
+            /* put shared secret key in Little Endian format */
+            XMEMCPY(out, o, CURVE448_PUB_KEY_SIZE);
+        }
+
+        *outLen = CURVE448_PUB_KEY_SIZE;
+    }
+
+    ForceZero(o, CURVE448_PUB_KEY_SIZE);
+
+    return ret;
+}
+
+#endif /* HAVE_CURVE448_SHARED_SECRET */
+
+#ifdef HAVE_CURVE448_KEY_EXPORT
+
+/* Export the curve448 public key.
+ * Public key encoded big-endian.
+ *
+ * key     [in]      Curve448 public key.
+ * out     [in]      Array to hold public key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when outLen is less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_export_public(curve448_key* key, byte* out, word32* outLen)
+{
+    return wc_curve448_export_public_ex(key, out, outLen, EC448_BIG_ENDIAN);
+}
+
+/* Export the curve448 public key.
+ *
+ * key     [in]      Curve448 public key.
+ * out     [in]      Array to hold public key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * endian  [in]      Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when outLen is less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_export_public_ex(curve448_key* key, byte* out, word32* outLen,
+                                 int endian)
+{
+    int ret = 0;
+    int i;
+
+    if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* check and set outgoing key size */
+    if ((ret == 0) && (*outLen < CURVE448_PUB_KEY_SIZE)) {
+        *outLen = CURVE448_PUB_KEY_SIZE;
+        ret = ECC_BAD_ARG_E;
+    }
+    if (ret == 0) {
+        *outLen = CURVE448_PUB_KEY_SIZE;
+
+        if (endian == EC448_BIG_ENDIAN) {
+            /* read keys in Big Endian format */
+            for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) {
+                out[i] = key->p[CURVE448_PUB_KEY_SIZE - i - 1];
+            }
+        }
+        else {
+            XMEMCPY(out, key->p, CURVE448_PUB_KEY_SIZE);
+        }
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_EXPORT */
+
+#ifdef HAVE_CURVE448_KEY_IMPORT
+
+/* Import a curve448 public key from a byte array.
+ * Public key encoded in big-endian.
+ *
+ * in      [in]  Array holding public key.
+ * inLen   [in]  Number of bytes of data in array.
+ * key     [in]  Curve448 public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when inLen is less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_import_public(const byte* in, word32 inLen, curve448_key* key)
+{
+    return wc_curve448_import_public_ex(in, inLen, key, EC448_BIG_ENDIAN);
+}
+
+/* Import a curve448 public key from a byte array.
+ *
+ * in      [in]  Array holding public key.
+ * inLen   [in]  Number of bytes of data in array.
+ * key     [in]  Curve448 public key.
+ * endian  [in]  Endianness of encoded number in byte array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when inLen is less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_import_public_ex(const byte* in, word32 inLen,
+                                 curve448_key* key, int endian)
+{
+    int ret = 0;
+    int i;
+
+    /* sanity check */
+    if ((key == NULL) || (in == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* check size of incoming keys */
+    if ((ret == 0) && (inLen != CURVE448_PUB_KEY_SIZE)) {
+       ret = ECC_BAD_ARG_E;
+    }
+
+    if (ret == 0) {
+        if (endian == EC448_BIG_ENDIAN) {
+            /* read keys in Big Endian format */
+            for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) {
+                key->p[i] = in[CURVE448_PUB_KEY_SIZE - i - 1];
+            }
+        }
+        else
+            XMEMCPY(key->p, in, inLen);
+    }
+
+    return ret;
+}
+
+/* Check the public key value (big or little endian)
+ *
+ * pub     [in]  Public key bytes.
+ * pubSz   [in]  Size of public key in bytes.
+ * endian  [in]  Public key bytes passed in as big-endian or little-endian.
+ * returns BAD_FUNC_ARGS when pub is NULL,
+ *         ECC_BAD_ARG_E when key length is not 56 bytes, public key value is
+ *         zero or one;
+ *         BUFFER_E when size of public key is zero;
+ *         0 otherwise.
+ */
+int wc_curve448_check_public(const byte* pub, word32 pubSz, int endian)
+{
+    int ret = 0;
+    word32 i;
+
+    if (pub == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* Check for empty key data */
+    if ((ret == 0) && (pubSz == 0)) {
+        ret = BUFFER_E;
+    }
+
+    /* Check key length */
+    if ((ret == 0) && (pubSz != CURVE448_PUB_KEY_SIZE)) {
+        ret = ECC_BAD_ARG_E;
+    }
+
+    if (ret == 0) {
+        if (endian == EC448_LITTLE_ENDIAN) {
+            /* Check for value of zero or one */
+            for (i = pubSz - 1; i > 0; i--) {
+                if (pub[i] != 0) {
+                    break;
+                }
+            }
+            if ((i == 0) && (pub[0] == 0 || pub[0] == 1)) {
+                return ECC_BAD_ARG_E;
+            }
+        }
+        else {
+            /* Check for value of zero or one */
+            for (i = 0; i < pubSz-1; i++) {
+                if (pub[i] != 0) {
+                    break;
+                }
+            }
+            if ((i == pubSz - 1) && (pub[i] == 0 || pub[i] == 1)) {
+                ret = ECC_BAD_ARG_E;
+            }
+        }
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_IMPORT */
+
+
+#ifdef HAVE_CURVE448_KEY_EXPORT
+
+/* Export the curve448 private key raw form.
+ * Private key encoded big-endian.
+ *
+ * key     [in]      Curve448 private key.
+ * out     [in]      Array to hold private key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when outLen is less than CURVE448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_export_private_raw(curve448_key* key, byte* out, word32* outLen)
+{
+    return wc_curve448_export_private_raw_ex(key, out, outLen,
+                                             EC448_BIG_ENDIAN);
+}
+
+/* Export the curve448 private key raw form.
+ *
+ * key     [in]      Curve448 private key.
+ * out     [in]      Array to hold private key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * endian  [in]      Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when outLen is less than CURVE448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_export_private_raw_ex(curve448_key* key, byte* out,
+                                      word32* outLen, int endian)
+{
+    int ret = 0;
+    int i;
+
+    /* sanity check */
+    if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* check size of outgoing buffer */
+    if ((ret == 0) && (*outLen < CURVE448_KEY_SIZE)) {
+        *outLen = CURVE448_KEY_SIZE;
+        ret = ECC_BAD_ARG_E;
+    }
+    if (ret == 0) {
+        *outLen = CURVE448_KEY_SIZE;
+
+        if (endian == EC448_BIG_ENDIAN) {
+            /* put the key in Big Endian format */
+            for (i = 0; i < CURVE448_KEY_SIZE; i++) {
+                out[i] = key->k[CURVE448_KEY_SIZE - i - 1];
+            }
+        }
+        else {
+            XMEMCPY(out, key->k, CURVE448_KEY_SIZE);
+        }
+    }
+
+    return ret;
+}
+
+/* Export the curve448 private and public keys in raw form.
+ * Private and public key encoded big-endian.
+ *
+ * key     [in]      Curve448 private key.
+ * priv    [in]      Array to hold private key.
+ * privSz  [in/out]  On in, the number of bytes in private key array.
+ *                   On out, the number bytes put into private key array.
+ * pub     [in]      Array to hold public key.
+ * pubSz   [in/out]  On in, the number of bytes in public key array.
+ *                   On out, the number bytes put into public key array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ *         less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_export_key_raw(curve448_key* key, byte* priv, word32 *privSz,
+                               byte* pub, word32 *pubSz)
+{
+    return wc_curve448_export_key_raw_ex(key, priv, privSz, pub, pubSz,
+                                         EC448_BIG_ENDIAN);
+}
+
+/* Export the curve448 private and public keys in raw form.
+ *
+ * key     [in]      Curve448 private key.
+ * priv    [in]      Array to hold private key.
+ * privSz  [in/out]  On in, the number of bytes in private key array.
+ *                   On out, the number bytes put into private key array.
+ * pub     [in]      Array to hold public key.
+ * pubSz   [in/out]  On in, the number of bytes in public key array.
+ *                   On out, the number bytes put into public key array.
+ * endian  [in]      Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ *         less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_export_key_raw_ex(curve448_key* key, byte* priv, word32 *privSz,
+                                  byte* pub, word32 *pubSz, int endian)
+{
+    int ret;
+
+    /* export private part */
+    ret = wc_curve448_export_private_raw_ex(key, priv, privSz, endian);
+    if (ret == 0) {
+        /* export public part */
+        ret = wc_curve448_export_public_ex(key, pub, pubSz, endian);
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_EXPORT */
+
+#ifdef HAVE_CURVE448_KEY_IMPORT
+
+/* Import curve448 private and public keys from a byte arrays.
+ * Private and public keys encoded in big-endian.
+ *
+ * piv     [in]  Array holding private key.
+ * privSz  [in]  Number of bytes of data in private key array.
+ * pub     [in]  Array holding public key.
+ * pubSz   [in]  Number of bytes of data in public key array.
+ * key     [in]  Curve448 private/public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ *         less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_import_private_raw(const byte* priv, word32 privSz,
+                                   const byte* pub, word32 pubSz,
+                                   curve448_key* key)
+{
+    return wc_curve448_import_private_raw_ex(priv, privSz, pub, pubSz, key,
+                                             EC448_BIG_ENDIAN);
+}
+
+/* Import curve448 private and public keys from a byte arrays.
+ *
+ * piv     [in]  Array holding private key.
+ * privSz  [in]  Number of bytes of data in private key array.
+ * pub     [in]  Array holding public key.
+ * pubSz   [in]  Number of bytes of data in public key array.
+ * key     [in]  Curve448 private/public key.
+ * endian  [in]  Endianness of encoded numbers in byte arrays.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ *         less than CURVE448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_import_private_raw_ex(const byte* priv, word32 privSz,
+                                      const byte* pub, word32 pubSz,
+                                      curve448_key* key, int endian)
+{
+    int ret;
+
+    /* import private part */
+    ret = wc_curve448_import_private_ex(priv, privSz, key, endian);
+    if (ret == 0) {
+        /* import public part */
+        return wc_curve448_import_public_ex(pub, pubSz, key, endian);
+    }
+
+    return ret;
+}
+
+/* Import curve448 private key from a byte array.
+ * Private key encoded in big-endian.
+ *
+ * piv     [in]  Array holding private key.
+ * privSz  [in]  Number of bytes of data in private key array.
+ * key     [in]  Curve448 private/public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_import_private(const byte* priv, word32 privSz,
+                               curve448_key* key)
+{
+    return wc_curve448_import_private_ex(priv, privSz, key, EC448_BIG_ENDIAN);
+}
+
+/* Import curve448 private key from a byte array.
+ *
+ * piv     [in]  Array holding private key.
+ * privSz  [in]  Number of bytes of data in private key array.
+ * key     [in]  Curve448 private/public key.
+ * endian  [in]  Endianness of encoded number in byte array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_curve448_import_private_ex(const byte* priv, word32 privSz,
+                                  curve448_key* key, int endian)
+{
+    int ret = 0;
+    int i;
+
+    /* sanity check */
+    if ((key == NULL) || (priv == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* check size of incoming keys */
+    if ((ret == 0) && ((int)privSz != CURVE448_KEY_SIZE)) {
+        ret = ECC_BAD_ARG_E;
+    }
+
+    if (ret == 0) {
+        if (endian == EC448_BIG_ENDIAN) {
+            /* read the key in Big Endian format */
+            for (i = 0; i < CURVE448_KEY_SIZE; i++) {
+                key->k[i] = priv[CURVE448_KEY_SIZE - i - 1];
+            }
+        }
+        else {
+            XMEMCPY(key->k, priv, CURVE448_KEY_SIZE);
+        }
+
+        /* Clamp the key */
+        key->k[0] &= 0xfc;
+        key->k[CURVE448_KEY_SIZE-1] |= 0x80;
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_IMPORT */
+
+
+/* Initialize the curve448 key.
+ *
+ * key  [in]  Curve448 key object.
+ * returns BAD_FUNC_ARG when key is NULL,
+ *         0 otherwise.
+ */
+int wc_curve448_init(curve448_key* key)
+{
+    int ret = 0;
+
+    if (key == NULL) {
+       ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        XMEMSET(key, 0, sizeof(*key));
+
+        fe448_init();
+    }
+
+    return ret;
+}
+
+
+/* Clears the curve448 key data.
+ *
+ * key  [in]  Curve448 key object.
+ */
+void wc_curve448_free(curve448_key* key)
+{
+   if (key != NULL) {
+       ForceZero(key->p, sizeof(key->p));
+       ForceZero(key->k, sizeof(key->k));
+   }
+}
+
+
+/* Get the curve448 key's size.
+ *
+ * key  [in]  Curve448 key object.
+ * returns 0 if key is NULL,
+ *         CURVE448_KEY_SIZE otherwise.
+ */
+int wc_curve448_size(curve448_key* key)
+{
+    int ret = 0;
+
+    if (key != NULL) {
+        ret = CURVE448_KEY_SIZE;
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_CURVE448 */
+
+
--- a/wolfcrypt/src/des3.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/des3.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* des3.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -45,6 +45,10 @@
 
 #include <wolfssl/wolfcrypt/des3.h>
 
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
 /* fips wrapper calls, user can call direct */
 #if defined(HAVE_FIPS) && \
     (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
@@ -163,22 +167,26 @@
 
     int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
     {
-    #ifndef WOLFSSL_STM32_CUBEMX
-        word32 *dkey1 = des->key[0];
-        word32 *dkey2 = des->key[1];
-        word32 *dkey3 = des->key[2];
+        if (des == NULL || key == NULL)
+            return BAD_FUNC_ARG;
 
         (void)dir;
 
-        XMEMCPY(dkey1, key, 8);         /* set key 1 */
-        XMEMCPY(dkey2, key + 8, 8);     /* set key 2 */
-        XMEMCPY(dkey3, key + 16, 8);    /* set key 3 */
+    #ifndef WOLFSSL_STM32_CUBEMX
+        {
+            word32 *dkey1 = des->key[0];
+            word32 *dkey2 = des->key[1];
+            word32 *dkey3 = des->key[2];
 
-        ByteReverseWords(dkey1, dkey1, 8);
-        ByteReverseWords(dkey2, dkey2, 8);
-        ByteReverseWords(dkey3, dkey3, 8);
+            XMEMCPY(dkey1, key, 8);         /* set key 1 */
+            XMEMCPY(dkey2, key + 8, 8);     /* set key 2 */
+            XMEMCPY(dkey3, key + 16, 8);    /* set key 3 */
+
+            ByteReverseWords(dkey1, dkey1, 8);
+            ByteReverseWords(dkey2, dkey2, 8);
+            ByteReverseWords(dkey3, dkey3, 8);
+        }
     #else
-        (void)dir;
         XMEMCPY(des->key[0], key, DES3_KEYLEN); /* CUBEMX wants keys in sequential memory */
     #endif
 
@@ -188,9 +196,22 @@
     static void DesCrypt(Des* des, byte* out, const byte* in, word32 sz,
                   int dir, int mode)
     {
+        int ret;
     #ifdef WOLFSSL_STM32_CUBEMX
         CRYP_HandleTypeDef hcryp;
+    #else
+        word32 *dkey, *iv;
+        CRYP_InitTypeDef DES_CRYP_InitStructure;
+        CRYP_KeyInitTypeDef DES_CRYP_KeyInitStructure;
+        CRYP_IVInitTypeDef DES_CRYP_IVInitStructure;
+    #endif
 
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return;
+        }
+
+    #ifdef WOLFSSL_STM32_CUBEMX
         XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
         hcryp.Instance = CRYP;
         hcryp.Init.KeySize  = CRYP_KEYSIZE_128B;
@@ -200,8 +221,7 @@
 
         HAL_CRYP_Init(&hcryp);
 
-        while (sz > 0)
-        {
+        while (sz > 0) {
             /* if input and output same will overwrite input iv */
             XMEMCPY(des->tmp, in + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
 
@@ -236,11 +256,6 @@
 
         HAL_CRYP_DeInit(&hcryp);
     #else
-        word32 *dkey, *iv;
-        CRYP_InitTypeDef DES_CRYP_InitStructure;
-        CRYP_KeyInitTypeDef DES_CRYP_KeyInitStructure;
-        CRYP_IVInitTypeDef DES_CRYP_IVInitStructure;
-
         dkey = des->key;
         iv = des->reg;
 
@@ -282,8 +297,7 @@
         /* enable crypto processor */
         CRYP_Cmd(ENABLE);
 
-        while (sz > 0)
-        {
+        while (sz > 0) {
             /* flush IN/OUT FIFOs */
             CRYP_FIFOFlush();
 
@@ -310,6 +324,7 @@
         /* disable crypto processor */
         CRYP_Cmd(DISABLE);
     #endif /* WOLFSSL_STM32_CUBEMX */
+        wolfSSL_CryptHwMutexUnLock();
     }
 
     int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
@@ -333,110 +348,117 @@
     static void Des3Crypt(Des3* des, byte* out, const byte* in, word32 sz,
                    int dir)
     {
+        if (des == NULL || out == NULL || in == NULL)
+            return BAD_FUNC_ARG;
+
     #ifdef WOLFSSL_STM32_CUBEMX
-        CRYP_HandleTypeDef hcryp;
+        {
+            CRYP_HandleTypeDef hcryp;
 
-        XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
-        hcryp.Instance = CRYP;
-        hcryp.Init.KeySize  = CRYP_KEYSIZE_128B;
-        hcryp.Init.DataType = CRYP_DATATYPE_8B;
-        hcryp.Init.pKey = (uint8_t*)des->key;
-        hcryp.Init.pInitVect = (uint8_t*)des->reg;
+            XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
+            hcryp.Instance = CRYP;
+            hcryp.Init.KeySize  = CRYP_KEYSIZE_128B;
+            hcryp.Init.DataType = CRYP_DATATYPE_8B;
+            hcryp.Init.pKey = (uint8_t*)des->key;
+            hcryp.Init.pInitVect = (uint8_t*)des->reg;
+
+            HAL_CRYP_Init(&hcryp);
 
-        HAL_CRYP_Init(&hcryp);
+            while (sz > 0)
+            {
+                if (dir == DES_ENCRYPTION) {
+                    HAL_CRYP_TDESCBC_Encrypt(&hcryp, (byte*)in,
+                                       DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+                }
+                else {
+                    HAL_CRYP_TDESCBC_Decrypt(&hcryp, (byte*)in,
+                                       DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+                }
 
-        while (sz > 0)
-        {
-            if (dir == DES_ENCRYPTION) {
-                HAL_CRYP_TDESCBC_Encrypt(&hcryp, (byte*)in,
-                                    DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
-            }
-            else {
-                HAL_CRYP_TDESCBC_Decrypt(&hcryp, (byte*)in,
-                                    DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+                /* store iv for next call */
+                XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+
+                sz  -= DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+                out += DES_BLOCK_SIZE;
             }
 
-            /* store iv for next call */
-            XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-
-            sz  -= DES_BLOCK_SIZE;
-            in  += DES_BLOCK_SIZE;
-            out += DES_BLOCK_SIZE;
+            HAL_CRYP_DeInit(&hcryp);
         }
+    #else
+        {
+            word32 *dkey1, *dkey2, *dkey3, *iv;
+            CRYP_InitTypeDef DES3_CRYP_InitStructure;
+            CRYP_KeyInitTypeDef DES3_CRYP_KeyInitStructure;
+            CRYP_IVInitTypeDef DES3_CRYP_IVInitStructure;
 
-        HAL_CRYP_DeInit(&hcryp);
-    #else
-        word32 *dkey1, *dkey2, *dkey3, *iv;
-        CRYP_InitTypeDef DES3_CRYP_InitStructure;
-        CRYP_KeyInitTypeDef DES3_CRYP_KeyInitStructure;
-        CRYP_IVInitTypeDef DES3_CRYP_IVInitStructure;
+            dkey1 = des->key[0];
+            dkey2 = des->key[1];
+            dkey3 = des->key[2];
+            iv = des->reg;
 
-        dkey1 = des->key[0];
-        dkey2 = des->key[1];
-        dkey3 = des->key[2];
-        iv = des->reg;
+            /* crypto structure initialization */
+            CRYP_KeyStructInit(&DES3_CRYP_KeyInitStructure);
+            CRYP_StructInit(&DES3_CRYP_InitStructure);
+            CRYP_IVStructInit(&DES3_CRYP_IVInitStructure);
 
-        /* crypto structure initialization */
-        CRYP_KeyStructInit(&DES3_CRYP_KeyInitStructure);
-        CRYP_StructInit(&DES3_CRYP_InitStructure);
-        CRYP_IVStructInit(&DES3_CRYP_IVInitStructure);
+            /* reset registers to their default values */
+            CRYP_DeInit();
 
-        /* reset registers to their default values */
-        CRYP_DeInit();
+            /* set direction, mode, and datatype */
+            if (dir == DES_ENCRYPTION) {
+                DES3_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
+            } else {
+                DES3_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
+            }
 
-        /* set direction, mode, and datatype */
-        if (dir == DES_ENCRYPTION) {
-            DES3_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
-        } else {
-            DES3_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
-        }
-
-        DES3_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_TDES_CBC;
-        DES3_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
-        CRYP_Init(&DES3_CRYP_InitStructure);
+            DES3_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_TDES_CBC;
+            DES3_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
+            CRYP_Init(&DES3_CRYP_InitStructure);
 
-        /* load key into correct registers */
-        DES3_CRYP_KeyInitStructure.CRYP_Key1Left  = dkey1[0];
-        DES3_CRYP_KeyInitStructure.CRYP_Key1Right = dkey1[1];
-        DES3_CRYP_KeyInitStructure.CRYP_Key2Left  = dkey2[0];
-        DES3_CRYP_KeyInitStructure.CRYP_Key2Right = dkey2[1];
-        DES3_CRYP_KeyInitStructure.CRYP_Key3Left  = dkey3[0];
-        DES3_CRYP_KeyInitStructure.CRYP_Key3Right = dkey3[1];
-        CRYP_KeyInit(&DES3_CRYP_KeyInitStructure);
+            /* load key into correct registers */
+            DES3_CRYP_KeyInitStructure.CRYP_Key1Left  = dkey1[0];
+            DES3_CRYP_KeyInitStructure.CRYP_Key1Right = dkey1[1];
+            DES3_CRYP_KeyInitStructure.CRYP_Key2Left  = dkey2[0];
+            DES3_CRYP_KeyInitStructure.CRYP_Key2Right = dkey2[1];
+            DES3_CRYP_KeyInitStructure.CRYP_Key3Left  = dkey3[0];
+            DES3_CRYP_KeyInitStructure.CRYP_Key3Right = dkey3[1];
+            CRYP_KeyInit(&DES3_CRYP_KeyInitStructure);
 
-        /* set iv */
-        ByteReverseWords(iv, iv, DES_BLOCK_SIZE);
-        DES3_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
-        DES3_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
-        CRYP_IVInit(&DES3_CRYP_IVInitStructure);
+            /* set iv */
+            ByteReverseWords(iv, iv, DES_BLOCK_SIZE);
+            DES3_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
+            DES3_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
+            CRYP_IVInit(&DES3_CRYP_IVInitStructure);
 
-        /* enable crypto processor */
-        CRYP_Cmd(ENABLE);
+            /* enable crypto processor */
+            CRYP_Cmd(ENABLE);
 
-        while (sz > 0)
-        {
-            /* flush IN/OUT FIFOs */
-            CRYP_FIFOFlush();
+            while (sz > 0)
+            {
+                /* flush IN/OUT FIFOs */
+                CRYP_FIFOFlush();
 
-            CRYP_DataIn(*(uint32_t*)&in[0]);
-            CRYP_DataIn(*(uint32_t*)&in[4]);
+                CRYP_DataIn(*(uint32_t*)&in[0]);
+                CRYP_DataIn(*(uint32_t*)&in[4]);
 
-            /* wait until the complete message has been processed */
-            while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+                /* wait until the complete message has been processed */
+                while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
 
-            *(uint32_t*)&out[0]  = CRYP_DataOut();
-            *(uint32_t*)&out[4]  = CRYP_DataOut();
+                *(uint32_t*)&out[0]  = CRYP_DataOut();
+                *(uint32_t*)&out[4]  = CRYP_DataOut();
 
-            /* store iv for next call */
-            XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+                /* store iv for next call */
+                XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
 
-            sz  -= DES_BLOCK_SIZE;
-            in  += DES_BLOCK_SIZE;
-            out += DES_BLOCK_SIZE;
+                sz  -= DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+                out += DES_BLOCK_SIZE;
+            }
+
+            /* disable crypto processor */
+            CRYP_Cmd(DISABLE);
         }
-
-        /* disable crypto processor */
-        CRYP_Cmd(DISABLE);
     #endif /* WOLFSSL_STM32_CUBEMX */
     }
 
@@ -644,10 +666,13 @@
 
     int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
     {
+        if (des3 == NULL || key == NULL) {
+            return BAD_FUNC_ARG;
+        }
 
-        if(desBuffIn == NULL) {
+        if (desBuffIn == NULL) {
         #if defined (HAVE_THREADX)
-    			  int s1, s2, s3, s4, s5;
+            int s1, s2, s3, s4, s5;
             s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
                                                          sizeof(SECdescriptorType), TX_NO_WAIT);
             s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
@@ -677,7 +702,13 @@
     #include "fsl_ltc.h"
     int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
     {
-        byte* dkey = (byte*)des->key;
+        byte* dkey;
+
+        if (des == NULL || key == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+        dkey = (byte*)des->key;
 
         XMEMCPY(dkey, key, 8);
 
@@ -781,7 +812,14 @@
     int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
     {
         int i = 0;
-        byte* dkey = (byte*)des->key;
+        byte* dkey;
+
+
+        if (des == NULL || key == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+        dkey = (byte*)des->key;
 
         XMEMCPY(dkey, key, 8);
 
@@ -1443,15 +1481,15 @@
             return BAD_FUNC_ARG;
         }
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
-        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-            /* key_raw holds orignal key copy */
-            des->key_raw = key;
-            des->iv_raw = iv;
-
-            /* continue on to set normal key for smaller DES operations */
+    #if defined(WOLF_CRYPTO_CB) || \
+        (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES))
+        #ifdef WOLF_CRYPTO_CB
+        if (des->devId != INVALID_DEVID)
+        #endif
+        {
+            XMEMCPY(des->devKey, key, DES3_KEYLEN);
         }
-    #endif /* WOLFSSL_ASYNC_CRYPT */
+    #endif
 
         ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
         if (ret != 0)
@@ -1587,6 +1625,15 @@
             return BAD_FUNC_ARG;
         }
 
+    #ifdef WOLF_CRYPTO_CB
+        if (des->devId != INVALID_DEVID) {
+            int ret = wc_CryptoCb_Des3Encrypt(des, out, in, sz);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+        }
+    #endif
+
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
         if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
                                             sz >= WC_ASYNC_THRESH_DES3_CBC) {
@@ -1594,7 +1641,7 @@
             return NitroxDes3CbcEncrypt(des, out, in, sz);
         #elif defined(HAVE_INTEL_QA)
             return IntelQaSymDes3CbcEncrypt(&des->asyncDev, out, in, sz,
-                des->key_raw, DES3_KEYLEN, (byte*)des->iv_raw, DES3_IVLEN);
+                (const byte*)des->devKey, DES3_KEYLEN, (byte*)des->reg, DES3_IVLEN);
         #else /* WOLFSSL_ASYNC_CRYPT_TEST */
             if (wc_AsyncTestInit(&des->asyncDev, ASYNC_TEST_DES3_CBC_ENCRYPT)) {
                 WC_ASYNC_TEST* testDev = &des->asyncDev.test;
@@ -1629,14 +1676,23 @@
             return BAD_FUNC_ARG;
         }
 
-    #if defined(WOLFSSL_ASYNC_CRYPT)
+    #ifdef WOLF_CRYPTO_CB
+        if (des->devId != INVALID_DEVID) {
+            int ret = wc_CryptoCb_Des3Decrypt(des, out, in, sz);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+        }
+    #endif
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
         if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
                                             sz >= WC_ASYNC_THRESH_DES3_CBC) {
         #if defined(HAVE_CAVIUM)
             return NitroxDes3CbcDecrypt(des, out, in, sz);
         #elif defined(HAVE_INTEL_QA)
             return IntelQaSymDes3CbcDecrypt(&des->asyncDev, out, in, sz,
-                des->key_raw, DES3_KEYLEN, (byte*)des->iv_raw, DES3_IVLEN);
+                (const byte*)des->devKey, DES3_KEYLEN, (byte*)des->reg, DES3_IVLEN);
         #else /* WOLFSSL_ASYNC_CRYPT_TEST */
             if (wc_AsyncTestInit(&des->asyncDev, ASYNC_TEST_DES3_CBC_DECRYPT)) {
                 WC_ASYNC_TEST* testDev = &des->asyncDev.test;
@@ -1734,11 +1790,16 @@
 
     des3->heap = heap;
 
+#ifdef WOLF_CRYPTO_CB
+    des3->devId = devId;
+    des3->devCtx = NULL;
+#else
+    (void)devId;
+#endif
+
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
     ret = wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES,
                                                         des3->heap, devId);
-#else
-    (void)devId;
 #endif
 
     return ret;
@@ -1753,6 +1814,10 @@
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
     wolfAsync_DevCtxFree(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#if defined(WOLF_CRYPTO_CB) || \
+        (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES))
+    ForceZero(des3->devKey, sizeof(des3->devKey));
+#endif
 }
 
 #endif /* WOLFSSL_TI_CRYPT */
--- a/wolfcrypt/src/dh.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/dh.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* dh.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -29,7 +29,7 @@
 #ifndef NO_DH
 
 #if defined(HAVE_FIPS) && \
-	defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+    defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
 
     /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
     #define FIPS_NO_WRAPPERS
@@ -56,6 +56,17 @@
 #endif
 
 
+/*
+Possible DH enable options:
+ * NO_RSA:              Overall control of DH                 default: on (not defined)
+ * WOLFSSL_OLD_PRIME_CHECK: Disables the new prime number check. It does not
+                        directly effect this file, but it does speed up DH
+                        removing the testing. It is not recommended to
+                        disable the prime checking.           default: off
+
+*/
+
+
 #if !defined(USER_MATH_LIB) && !defined(WOLFSSL_DH_CONST)
     #include <math.h>
     #define XPOW(x,y) pow((x),(y))
@@ -765,7 +776,7 @@
 };
 static const byte dh_ffdhe8192_g[] = { 0x02 };
 #ifdef HAVE_FFDHE_Q
-static const byte dh_ffdhe8192_g[] = {
+static const byte dh_ffdhe8192_q[] = {
     0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D,
     0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78,
@@ -919,7 +930,11 @@
 
     key->heap = heap; /* for XMALLOC/XFREE in future */
 
+#if !defined(WOLFSSL_QT) && !defined(OPENSSL_ALL)
     if (mp_init_multi(&key->p, &key->g, &key->q, NULL, NULL, NULL) != MP_OKAY)
+#else
+    if (mp_init_multi(&key->p,&key->g,&key->q,&key->pub,&key->priv,NULL) != MP_OKAY)
+#endif
         return MEMORY_E;
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
@@ -954,6 +969,7 @@
 }
 
 
+#ifndef WC_NO_RNG
 /* if defined to not use floating point values do not compile in */
 #ifndef WOLFSSL_DH_CONST
     static word32 DiscreteLogWorkFactor(word32 n)
@@ -968,7 +984,7 @@
 #endif /* WOLFSSL_DH_CONST*/
 
 
-/* if not using fixed points use DiscreteLogWorkFactor function for unsual size
+/* if not using fixed points use DiscreteLogWorkFactor function for unusual size
    otherwise round up on size needed */
 #ifndef WOLFSSL_DH_CONST
     #define WOLFSSL_DH_ROUND(x)
@@ -1142,11 +1158,12 @@
     return err;
 }
 #endif /* WOLFSSL_NO_DH186 */
-
+#endif /* !WC_NO_RNG */
 
 static int GeneratePrivateDh(DhKey* key, WC_RNG* rng, byte* priv,
                              word32* privSz)
 {
+#ifndef WC_NO_RNG
     int ret = 0;
     word32 sz = 0;
 
@@ -1198,6 +1215,13 @@
     }
 
     return ret;
+#else
+    (void)key;
+    (void)rng;
+    (void)priv;
+    (void)privSz;
+    return NOT_COMPILED_IN;
+#endif /* WC_NO_RNG */
 }
 
 
@@ -1207,8 +1231,8 @@
     int ret = 0;
 #ifndef WOLFSSL_SP_MATH
 #ifdef WOLFSSL_SMALL_STACK
-    mp_int* x = NULL;
-    mp_int* y = NULL;
+    mp_int* x;
+    mp_int* y;
 #else
     mp_int x[1];
     mp_int y[1];
@@ -1224,6 +1248,10 @@
     if (mp_count_bits(&key->p) == 3072)
         return sp_DhExp_3072(&key->g, priv, privSz, &key->p, pub, pubSz);
 #endif
+#ifdef WOLFSSL_SP_4096
+    if (mp_count_bits(&key->p) == 4096)
+        return sp_DhExp_4096(&key->g, priv, privSz, &key->p, pub, pubSz);
+#endif
 #endif
 
 #ifndef WOLFSSL_SP_MATH
@@ -1237,8 +1265,13 @@
         return MEMORY_E;
     }
 #endif
-    if (mp_init_multi(x, y, 0, 0, 0, 0) != MP_OKAY)
+    if (mp_init_multi(x, y, 0, 0, 0, 0) != MP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+    #endif
         return MP_INIT_E;
+    }
 
     if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY)
         ret = MP_READ_E;
@@ -1287,12 +1320,12 @@
     int ret;
 
 #if defined(HAVE_INTEL_QA)
-    word32 sz;
+    word32 pBits;
 
-    /* verify prime is at least 768-bits */
-    /* QAT HW must have prime at least 768-bits */
-    sz = mp_unsigned_bin_size(&key->p);
-    if (sz >= (768/8)) {
+    /* QAT DH sizes: 768, 1024, 1536, 2048, 3072 and 4096 bits */
+    pBits = mp_unsigned_bin_size(&key->p) * 8;
+    if (pBits == 768 ||  pBits == 1024 || pBits == 1536 ||
+        pBits == 2048 || pBits == 3072 || pBits == 4096) {
         mp_int x;
 
         ret = mp_init(&x);
@@ -1388,6 +1421,11 @@
 #endif
 
     if (mp_init_multi(y, p, q, NULL, NULL, NULL) != MP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(q, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(p, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+    #endif
         return MP_INIT_E;
     }
 
@@ -1447,13 +1485,21 @@
         }
         else
 #endif
+#ifdef WOLFSSL_SP_NO_4096
+        if (mp_count_bits(&key->p) == 4096) {
+            ret = sp_ModExp_4096(y, q, p, y);
+            if (ret != 0)
+                ret = MP_EXPTMOD_E;
+        }
+        else
+#endif
 #endif
 
         {
     /* SP 800-56Ar3, section 5.6.2.3.1, process step 2 */
 #ifndef WOLFSSL_SP_MATH
             /* calculate (y^q) mod(p), store back into y */
-            if (ret == 0 && mp_exptmod(y, q, p, y) != MP_OKAY)
+            if (mp_exptmod(y, q, p, y) != MP_OKAY)
                 ret = MP_EXPTMOD_E;
 #else
             ret = WC_KEY_SIZE_E;
@@ -1492,6 +1538,46 @@
 }
 
 
+/**
+ * Quick validity check of public key value against prime.
+ * Checks are:
+ *   - Public key not 0 or 1
+ *   - Public key not equal to prime or prime - 1
+ *   - Public key not bigger than prime.
+ *
+ * prime    Big-endian encoding of prime in bytes.
+ * primeSz  Size of prime in bytes.
+ * pub      Big-endian encoding of public key in bytes.
+ * pubSz    Size of public key in bytes.
+ */
+int wc_DhCheckPubValue(const byte* prime, word32 primeSz, const byte* pub,
+                       word32 pubSz)
+{
+    int ret = 0;
+    word32 i;
+
+    for (i = 0; i < pubSz && pub[i] == 0; i++) {
+    }
+    pubSz -= i;
+    pub += i;
+
+    if (pubSz == 0 || (pubSz == 1 && pub[0] == 1))
+        ret = MP_VAL;
+    else if (pubSz == primeSz) {
+        for (i = 0; i < pubSz-1 && pub[i] == prime[i]; i++) {
+        }
+        if (i == pubSz-1 && (pub[i] == prime[i] || pub[i] == prime[i] - 1))
+            ret = MP_VAL;
+        else if (pub[i] > prime[i])
+            ret = MP_VAL;
+    }
+    else if (pubSz > primeSz)
+        ret = MP_VAL;
+
+    return ret;
+}
+
+
 /* Check DH Private Key for invalid numbers, optionally allowing
  * the private key to be checked against the large prime (q).
  * Check per process in SP 800-56Ar3, section 5.6.2.1.2.
@@ -1532,6 +1618,10 @@
 #endif
 
     if (mp_init_multi(x, q, NULL, NULL, NULL, NULL) != MP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(q, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+    #endif
         return MP_INIT_E;
     }
 
@@ -1648,6 +1738,11 @@
     if (mp_init_multi(publicKey, privateKey, checkKey,
                       NULL, NULL, NULL) != MP_OKAY) {
 
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH);
+    #endif
         return MP_INIT_E;
     }
 
@@ -1677,6 +1772,14 @@
         }
         else
 #endif
+#ifdef WOLFSSL_SP_4096
+        if (mp_count_bits(&key->p) == 4096) {
+            ret = sp_ModExp_4096(&key->g, privateKey, &key->p, checkKey);
+            if (ret != 0)
+                ret = MP_EXPTMOD_E;
+        }
+        else
+#endif
 #endif
         {
 #ifndef WOLFSSL_SP_MATH
@@ -1737,10 +1840,10 @@
 {
     int ret = 0;
 #ifdef WOLFSSL_SMALL_STACK
-    mp_int* y = NULL;
+    mp_int* y;
 #ifndef WOLFSSL_SP_MATH
-    mp_int* x = NULL;
-    mp_int* z = NULL;
+    mp_int* x;
+    mp_int* z;
 #endif
 #else
     mp_int y[1];
@@ -1826,11 +1929,39 @@
         return ret;
     }
 #endif
+#ifdef WOLFSSL_SP_4096
+    if (mp_count_bits(&key->p) == 4096) {
+        if (mp_init(y) != MP_OKAY)
+            return MP_INIT_E;
+
+        if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
+            ret = MP_READ_E;
+
+        if (ret == 0)
+            ret = sp_DhExp_4096(y, priv, privSz, &key->p, agree, agreeSz);
+
+        mp_clear(y);
+    #ifdef WOLFSSL_SMALL_STACK
+    #ifndef WOLFSSL_SP_MATH
+        XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+    #endif
+        XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+    #endif
+        return ret;
+    }
+#endif
 #endif
 
 #ifndef WOLFSSL_SP_MATH
-    if (mp_init_multi(x, y, z, 0, 0, 0) != MP_OKAY)
+    if (mp_init_multi(x, y, z, 0, 0, 0) != MP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+        XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+    #endif
         return MP_INIT_E;
+    }
 
     if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY)
         ret = MP_READ_E;
@@ -1873,15 +2004,23 @@
 {
     int ret;
 
-#ifdef HAVE_CAVIUM
-    /* TODO: Not implemented - use software for now */
-    ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+#if defined(HAVE_INTEL_QA)
+    word32 pBits;
 
-#elif defined(HAVE_INTEL_QA)
-    ret = wc_mp_to_bigint(&key->p, &key->p.raw);
-    if (ret == MP_OKAY)
-        ret = IntelQaDhAgree(&key->asyncDev, &key->p.raw,
-            agree, agreeSz, priv, privSz, otherPub, pubSz);
+    /* QAT DH sizes: 768, 1024, 1536, 2048, 3072 and 4096 bits */
+    pBits = mp_unsigned_bin_size(&key->p) * 8;
+    if (pBits == 768 ||  pBits == 1024 || pBits == 1536 ||
+        pBits == 2048 || pBits == 3072 || pBits == 4096) {
+        ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+        if (ret == MP_OKAY)
+            ret = IntelQaDhAgree(&key->asyncDev, &key->p.raw,
+                agree, agreeSz, priv, privSz, otherPub, pubSz);
+        return ret;
+    }
+
+#elif defined(HAVE_CAVIUM)
+    /* TODO: Not implemented - use software for now */
+
 #else /* WOLFSSL_ASYNC_CRYPT_TEST */
     if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_DH_AGREE)) {
         WC_ASYNC_TEST* testDev = &key->asyncDev.test;
@@ -1894,8 +2033,10 @@
         testDev->dhAgree.pubSz = pubSz;
         return WC_PENDING_E;
     }
+#endif
+
+    /* otherwise use software DH */
     ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
-#endif
 
     return ret;
 }
@@ -1924,14 +2065,84 @@
     return ret;
 }
 
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+/* Sets private and public key in DhKey if both are available, otherwise sets
+    either private or public key, depending on which is available.
+    Returns WOLFSSL_SUCCESS if at least one of the keys was set. */
+WOLFSSL_LOCAL int wc_DhSetFullKeys(DhKey* key,const byte* priv_key,word32 privSz,
+                                   const byte* pub_key, word32 pubSz)
+{
+    byte havePriv = 0;
+    byte havePub = 0;
+    mp_int* keyPriv = NULL;
+    mp_int* keyPub  = NULL;
 
-int wc_DhSetKey_ex(DhKey* key, const byte* p, word32 pSz, const byte* g,
-                   word32 gSz, const byte* q, word32 qSz)
+    if (key == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    havePriv = ( (priv_key != NULL) && (privSz > 0) );
+    havePub  = ( (pub_key  != NULL) && (pubSz  > 0) );
+
+    if (!havePub && !havePriv) {
+        WOLFSSL_MSG("No Public or Private Key to Set");
+        return BAD_FUNC_ARG;
+    }
+    /* Set Private Key */
+    if (havePriv == TRUE) {
+        /* may have leading 0 */
+        if (priv_key[0] == 0) {
+            privSz--; priv_key++;
+        }
+        if (mp_init(&key->priv) != MP_OKAY)
+            havePriv = FALSE;
+    }
+
+    if (havePriv == TRUE) {
+        if (mp_read_unsigned_bin(&key->priv, priv_key, privSz) != MP_OKAY) {
+            havePriv = FALSE;
+        } else {
+            keyPriv = &key->priv;
+            WOLFSSL_MSG("DH Private Key Set.");
+        }
+    }
+
+    /* Set Public Key */
+    if (havePub == TRUE) {
+        /* may have leading 0 */
+        if (pub_key[0] == 0) {
+            pubSz--; pub_key++;
+        }
+        if (mp_init(&key->pub) != MP_OKAY)
+            havePub = FALSE;
+    }
+
+    if (havePub == TRUE) {
+        if (mp_read_unsigned_bin(&key->pub, pub_key, pubSz) != MP_OKAY) {
+            havePub = FALSE;
+        } else {
+            keyPub = &key->pub;
+            WOLFSSL_MSG("DH Public Key Set.");
+        }
+    }
+    /* Free Memory if error occured */
+    if (havePriv == FALSE && keyPriv != NULL)
+        mp_clear(keyPriv);
+    if (havePub == FALSE && keyPub != NULL)
+        mp_clear(keyPub);
+
+    /* WOLFSSL_SUCCESS if private or public was set else WOLFSSL_FAILURE */
+    return havePriv || havePub;
+}
+#endif
+
+static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
+                   word32 gSz, const byte* q, word32 qSz, int trusted,
+                   WC_RNG* rng)
 {
     int ret = 0;
     mp_int* keyP = NULL;
     mp_int* keyG = NULL;
-    mp_int* keyQ = NULL;
 
     if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0) {
         ret = BAD_FUNC_ARG;
@@ -1963,6 +2174,18 @@
         else
             keyP = &key->p;
     }
+
+    if (ret == 0 && !trusted) {
+        int isPrime = 0;
+        if (rng != NULL)
+            ret = mp_prime_is_prime_ex(keyP, 8, &isPrime, rng);
+        else
+            ret = mp_prime_is_prime(keyP, 8, &isPrime);
+
+        if (ret == 0 && isPrime == 0)
+            ret = DH_CHECK_PUB_E;
+    }
+
     if (ret == 0 && mp_init(&key->g) != MP_OKAY)
         ret = MP_INIT_E;
     if (ret == 0) {
@@ -1979,13 +2202,9 @@
     if (ret == 0 && q != NULL) {
         if (mp_read_unsigned_bin(&key->q, q, qSz) != MP_OKAY)
             ret = MP_INIT_E;
-        else
-            keyQ = &key->q;
     }
 
     if (ret != 0 && key != NULL) {
-        if (keyQ)
-            mp_clear(keyQ);
         if (keyG)
             mp_clear(keyG);
         if (keyP)
@@ -1996,11 +2215,26 @@
 }
 
 
+int wc_DhSetCheckKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
+                   word32 gSz, const byte* q, word32 qSz, int trusted,
+                   WC_RNG* rng)
+{
+    return _DhSetKey(key, p, pSz, g, gSz, q, qSz, trusted, rng);
+}
+
+
+int wc_DhSetKey_ex(DhKey* key, const byte* p, word32 pSz, const byte* g,
+                   word32 gSz, const byte* q, word32 qSz)
+{
+    return _DhSetKey(key, p, pSz, g, gSz, q, qSz, 1, NULL);
+}
+
+
 /* not in asn anymore since no actual asn types used */
 int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
                 word32 gSz)
 {
-    return wc_DhSetKey_ex(key, p, pSz, g, gSz, NULL, 0);
+    return _DhSetKey(key, p, pSz, g, gSz, NULL, 0, 1, NULL);
 }
 
 
@@ -2049,7 +2283,7 @@
             ret = MEMORY_E;
     }
 
-    /* make a random string that will be multplied against q */
+    /* make a random string that will be multiplied against q */
     if (ret == 0)
         ret = wc_RNG_GenerateBlock(rng, buf, bufSz);
 
@@ -2097,7 +2331,7 @@
     /* loop until p is prime */
     if (ret == 0) {
         do {
-            if (mp_prime_is_prime(&dh->p, 8, &primeCheck) != MP_OKAY)
+            if (mp_prime_is_prime_ex(&dh->p, 8, &primeCheck, rng) != MP_OKAY)
                 ret = PRIME_GEN_E;
 
             if (primeCheck != MP_YES) {
@@ -2113,13 +2347,13 @@
     /* tmp2 += (2*loop_check_prime)
      * to have p = (q * tmp2) + 1 prime
      */
-    if (primeCheckCount) {
+    if ((ret == 0) && (primeCheckCount)) {
         if (mp_add_d(&tmp2, 2 * primeCheckCount, &tmp2) != MP_OKAY)
             ret = MP_ADD_E;
     }
 
     /* find a value g for which g^tmp2 != 1 */
-    if (mp_set(&dh->g, 1) != MP_OKAY)
+    if ((ret == 0) && (mp_set(&dh->g, 1) != MP_OKAY))
         ret = MP_ZERO_E;
 
     if (ret == 0) {
@@ -2131,18 +2365,24 @@
         } while (ret == 0 && mp_cmp_d(&tmp, 1) == MP_EQ);
     }
 
-    /* at this point tmp generates a group of order q mod p */
-    mp_exch(&tmp, &dh->g);
+    if (ret == 0) {
+        /* at this point tmp generates a group of order q mod p */
+        mp_exch(&tmp, &dh->g);
+    }
 
     /* clear the parameters if there was an error */
-    if (ret != 0) {
+    if ((ret != 0) && (dh != NULL)) {
         mp_clear(&dh->q);
         mp_clear(&dh->p);
         mp_clear(&dh->g);
     }
 
-    ForceZero(buf, bufSz);
-    XFREE(buf, dh->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (buf != NULL) {
+        ForceZero(buf, bufSz);
+        if (dh != NULL) {
+            XFREE(buf, dh->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        }
+    }
     mp_clear(&tmp);
     mp_clear(&tmp2);
 
--- a/wolfcrypt/src/dsa.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/dsa.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* dsa.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,14 +42,6 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
-
-enum {
-    DSA_HALF_SIZE = 20,   /* r and s size  */
-    DSA_SIG_SIZE  = 40    /* signature size */
-};
-
-
-
 int wc_InitDsaKey(DsaKey* key)
 {
     if (key == NULL)
@@ -209,7 +201,7 @@
 
     /* public key : y = g^x mod p */
     if (err == MP_OKAY)
-        err = mp_exptmod(&dsa->g, &dsa->x, &dsa->p, &dsa->y);
+        err = mp_exptmod_ex(&dsa->g, &dsa->x, dsa->q.used, &dsa->p, &dsa->y);
 
     if (err == MP_OKAY)
         dsa->type = DSA_PRIVATE;
@@ -249,7 +241,6 @@
             break;
         default:
             return BAD_FUNC_ARG;
-            break;
     }
 
     /* modulus size in bytes */
@@ -262,7 +253,7 @@
         return MEMORY_E;
     }
 
-    /* make a random string that will be multplied against q */
+    /* make a random string that will be multiplied against q */
     err = wc_RNG_GenerateBlock(rng, buf, msize - qsize);
     if (err != MP_OKAY) {
         XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -337,7 +328,7 @@
 
     /* loop until p is prime */
     while (check_prime == MP_NO) {
-        err = mp_prime_is_prime(&dsa->p, 8, &check_prime);
+        err = mp_prime_is_prime_ex(&dsa->p, 8, &check_prime, rng);
         if (err != MP_OKAY) {
             mp_clear(&dsa->q);
             mp_clear(&dsa->p);
@@ -426,21 +417,8 @@
 #endif /* WOLFSSL_KEY_GEN */
 
 
-/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(),
- * input parameters (p,q,g) should be represented as ASCII hex values.
- *
- * dsa  - pointer to initialized DsaKey structure
- * p    - DSA (p) parameter, ASCII hex string
- * pSz  - length of p
- * q    - DSA (q) parameter, ASCII hex string
- * qSz  - length of q
- * g    - DSA (g) parameter, ASCII hex string
- * gSz  - length of g
- *
- * returns 0 on success, negative upon failure
- */
-int wc_DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q,
-                          const char* g)
+static int _DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q,
+                          const char* g, int trusted, WC_RNG* rng)
 {
     int err;
     word32 pSz, qSz;
@@ -450,6 +428,18 @@
 
     /* read p */
     err = mp_read_radix(&dsa->p, p, MP_RADIX_HEX);
+    if (err == MP_OKAY && !trusted) {
+        int isPrime = 1;
+        if (rng == NULL)
+            err = mp_prime_is_prime(&dsa->p, 8, &isPrime);
+        else
+            err = mp_prime_is_prime_ex(&dsa->p, 8, &isPrime, rng);
+
+        if (err == MP_OKAY) {
+            if (!isPrime)
+                err = DH_CHECK_PUB_E;
+        }
+    }
 
     /* read q */
     if (err == MP_OKAY)
@@ -478,6 +468,49 @@
 }
 
 
+/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(),
+ * input parameters (p,q,g) should be represented as ASCII hex values.
+ *
+ * dsa  - pointer to initialized DsaKey structure
+ * p    - DSA (p) parameter, ASCII hex string
+ * pSz  - length of p
+ * q    - DSA (q) parameter, ASCII hex string
+ * qSz  - length of q
+ * g    - DSA (g) parameter, ASCII hex string
+ * gSz  - length of g
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q,
+                          const char* g)
+{
+    return _DsaImportParamsRaw(dsa, p, q, g, 1, NULL);
+}
+
+
+/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(),
+ * input parameters (p,q,g) should be represented as ASCII hex values. Check
+ * that the p value is probably prime.
+ *
+ * dsa  - pointer to initialized DsaKey structure
+ * p    - DSA (p) parameter, ASCII hex string
+ * pSz  - length of p
+ * q    - DSA (q) parameter, ASCII hex string
+ * qSz  - length of q
+ * g    - DSA (g) parameter, ASCII hex string
+ * gSz  - length of g
+ * trusted - trust that p is OK
+ * rng  - random number generator for the prime test
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DsaImportParamsRawCheck(DsaKey* dsa, const char* p, const char* q,
+                          const char* g, int trusted, WC_RNG* rng)
+{
+    return _DsaImportParamsRaw(dsa, p, q, g, trusted, rng);
+}
+
+
 /* Export raw DSA parameters from DsaKey structure
  *
  * dsa  - pointer to initialized DsaKey structure
@@ -621,10 +654,14 @@
 
 int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng)
 {
-    mp_int k, kInv, r, s, H;
-    int    ret, sz;
-    byte   buffer[DSA_HALF_SIZE];
-    byte*  tmp;  /* initial output pointer */
+    mp_int  k, kInv, r, s, H;
+#ifndef WOLFSSL_MP_INVMOD_CONSTANT_TIME
+    mp_int  b;
+#endif
+    mp_int* qMinus1;
+    int     ret = 0, sz;
+    byte    buffer[DSA_HALF_SIZE];
+    byte*   tmp;  /* initial output pointer */
 
     if (digest == NULL || out == NULL || key == NULL || rng == NULL) {
         return BAD_FUNC_ARG;
@@ -634,35 +671,54 @@
 
     sz = min((int)sizeof(buffer), mp_unsigned_bin_size(&key->q));
 
+#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME
     if (mp_init_multi(&k, &kInv, &r, &s, &H, 0) != MP_OKAY)
+#else
+    if (mp_init_multi(&k, &kInv, &r, &s, &H, &b) != MP_OKAY)
+#endif
+    {
         return MP_INIT_E;
+    }
+    qMinus1 = &kInv;
 
-    do {
-        /* generate k */
-        ret = wc_RNG_GenerateBlock(rng, buffer, sz);
-        if (ret != 0)
-            return ret;
-
-        buffer[0] |= 0x0C;
+    /* NIST FIPS 186-4: B.2.2
+     * Per-Message Secret Number Generation by Testing Candidates
+     * Generate k in range [1, q-1].
+     *   Check that k is less than q-1: range [0, q-2].
+     *   Add 1 to k: range [1, q-1].
+     */
+    if (mp_sub_d(&key->q, 1, qMinus1))
+        ret = MP_SUB_E;
 
-        if (mp_read_unsigned_bin(&k, buffer, sz) != MP_OKAY)
-            ret = MP_READ_E;
+    if (ret == 0) {
+        do {
+            /* Step 4: generate k */
+            ret = wc_RNG_GenerateBlock(rng, buffer, sz);
+
+            /* Step 5 */
+            if (ret == 0 && mp_read_unsigned_bin(&k, buffer, sz) != MP_OKAY)
+                ret = MP_READ_E;
 
-        /* k is a random numnber and it should be less than q
-         * if k greater than repeat
-         */
-    } while (mp_cmp(&k, &key->q) != MP_LT);
+            /* k is a random numnber and it should be less than q-1
+             * if k greater than repeat
+             */
+        /* Step 6 */
+        } while (ret == 0 && mp_cmp(&k, qMinus1) != MP_LT);
+    }
+    /* Step 7 */
+    if (ret == 0 && mp_add_d(&k, 1, &k) != MP_OKAY)
+        ret = MP_MOD_E;
 
-    if (ret == 0 && mp_cmp_d(&k, 1) != MP_GT)
-        ret = MP_CMP_E;
-
+#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME
     /* inverse k mod q */
     if (ret == 0 && mp_invmod(&k, &key->q, &kInv) != MP_OKAY)
         ret = MP_INVMOD_E;
 
     /* generate r, r = (g exp k mod p) mod q */
-    if (ret == 0 && mp_exptmod(&key->g, &k, &key->p, &r) != MP_OKAY)
+    if (ret == 0 && mp_exptmod_ex(&key->g, &k, key->q.used, &key->p,
+                                                               &r) != MP_OKAY) {
         ret = MP_EXPTMOD_E;
+    }
 
     if (ret == 0 && mp_mod(&r, &key->q, &r) != MP_OKAY)
         ret = MP_MOD_E;
@@ -680,6 +736,72 @@
 
     if (ret == 0 && mp_mulmod(&s, &kInv, &key->q, &s) != MP_OKAY)
         ret = MP_MULMOD_E;
+#else
+    /* Blinding value
+     * Generate b in range [1, q-1].
+     */
+    if (ret == 0) {
+        do {
+            ret = wc_RNG_GenerateBlock(rng, buffer, sz);
+            if (ret == 0 && mp_read_unsigned_bin(&b, buffer, sz) != MP_OKAY)
+                ret = MP_READ_E;
+        } while (ret == 0 && mp_cmp(&b, qMinus1) != MP_LT);
+    }
+    if (ret == 0 && mp_add_d(&b, 1, &b) != MP_OKAY)
+        ret = MP_MOD_E;
+
+    /* set H from sha digest */
+    if (ret == 0 && mp_read_unsigned_bin(&H, digest,
+                                               WC_SHA_DIGEST_SIZE) != MP_OKAY) {
+        ret = MP_READ_E;
+    }
+
+    /* generate r, r = (g exp k mod p) mod q */
+    if (ret == 0 && mp_exptmod_ex(&key->g, &k, key->q.used, &key->p,
+                                                               &r) != MP_OKAY) {
+        ret = MP_EXPTMOD_E;
+    }
+
+    /* calculate s = (H + xr)/k
+                   = b.(H/k.b + x.r/k.b) */
+
+    /* k = k.b */
+    if (ret == 0 && mp_mulmod(&k, &b, &key->q, &k) != MP_OKAY)
+        ret = MP_MULMOD_E;
+
+    /* kInv = 1/k.b mod q */
+    if (ret == 0 && mp_invmod(&k, &key->q, &kInv) != MP_OKAY)
+        ret = MP_INVMOD_E;
+
+    if (ret == 0 && mp_mod(&r, &key->q, &r) != MP_OKAY)
+        ret = MP_MOD_E;
+
+    /* s = x.r */
+    if (ret == 0 && mp_mul(&key->x, &r, &s) != MP_OKAY)
+        ret = MP_MUL_E;
+
+    /* s = x.r/k.b */
+    if (ret == 0 && mp_mulmod(&s, &kInv, &key->q, &s) != MP_OKAY)
+        ret = MP_MULMOD_E;
+
+    /* H = H/k.b */
+    if (ret == 0 && mp_mulmod(&H, &kInv, &key->q, &H) != MP_OKAY)
+        ret = MP_MULMOD_E;
+
+    /* s = H/k.b + x.r/k.b
+         = (H + x.r)/k.b */
+    if (ret == 0 && mp_add(&s, &H, &s) != MP_OKAY)
+        ret = MP_ADD_E;
+
+    /* s = b.(e + x.r)/k.b
+         = (e + x.r)/k */
+    if (ret == 0 && mp_mulmod(&s, &b, &key->q, &s) != MP_OKAY)
+        ret = MP_MULMOD_E;
+
+    /* s = (e + x.r)/k */
+    if (ret == 0 && mp_mod(&s, &key->q, &s) != MP_OKAY)
+        ret = MP_MOD_E;
+#endif
 
     /* detect zero r or s */
     if (ret == 0 && (mp_iszero(&r) == MP_YES || mp_iszero(&s) == MP_YES))
@@ -705,6 +827,14 @@
         }
     }
 
+    ForceZero(buffer, sz);
+    mp_forcezero(&kInv);
+    mp_forcezero(&k);
+#ifndef WOLFSSL_MP_INVMOD_CONSTANT_TIME
+    mp_forcezero(&b);
+
+    mp_clear(&b);
+#endif
     mp_clear(&H);
     mp_clear(&s);
     mp_clear(&r);
--- a/wolfcrypt/src/ecc.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/ecc.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ecc.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -28,6 +28,9 @@
 /* in case user set HAVE_ECC there */
 #include <wolfssl/wolfcrypt/settings.h>
 
+/* public ASN interface */
+#include <wolfssl/wolfcrypt/asn_public.h>
+
 /*
 Possible ECC enable options:
  * HAVE_ECC:            Overall control of ECC                  default: on
@@ -50,6 +53,9 @@
  * FP_ECC:              ECC Fixed Point Cache                   default: off
  * USE_ECC_B_PARAM:     Enable ECC curve B param                default: off
                          (on for HAVE_COMP_KEY)
+ * WOLFSSL_ECC_CURVE_STATIC:                                    default off (on for windows)
+                        For the ECC curve paramaters `ecc_set_type` use fixed
+                        array for hex string
  */
 
 /*
@@ -87,12 +93,6 @@
     #error Brainpool and Koblitz curves requires WOLFSSL_CUSTOM_CURVES
 #endif
 
-/* Make sure ASN is enabled for ECC sign/verify */
-#if (defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)) && defined(NO_ASN)
-    #error ASN must be enabled for ECC sign/verify
-#endif
-
-
 #if defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
     /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
     #define FIPS_NO_WRAPPERS
@@ -122,8 +122,8 @@
     #include <wolfssl/wolfcrypt/hash.h>
 #endif
 
-#ifdef WOLF_CRYPTO_DEV
-    #include <wolfssl/wolfcrypt/cryptodev.h>
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
 #endif
 
 #ifdef NO_INLINE
@@ -137,6 +137,10 @@
     #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
 #endif
 
+#if defined(WOLFSSL_STM32_PKA)
+    #include <wolfssl/wolfcrypt/port/st/stm32.h>
+#endif
+
 #ifdef WOLFSSL_SP_MATH
     #define GEN_MEM_ERR MP_MEM
 #elif defined(USE_FAST_MATH)
@@ -211,7 +215,7 @@
             #define CODED_SECP112R1    {0x2B,0x81,0x04,0x00,0x06}
             #define CODED_SECP112R1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp112r1[] = CODED_SECP112R1;
         #else
             #define ecc_oid_secp112r1 CODED_SECP112R1
@@ -226,7 +230,7 @@
             #define CODED_SECP112R2    {0x2B,0x81,0x04,0x00,0x07}
             #define CODED_SECP112R2_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp112r2[] = CODED_SECP112R2;
         #else
             #define ecc_oid_secp112r2 CODED_SECP112R2
@@ -243,7 +247,7 @@
             #define CODED_SECP128R1    {0x2B,0x81,0x04,0x00,0x1C}
             #define CODED_SECP128R1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp128r1[] = CODED_SECP128R1;
         #else
             #define ecc_oid_secp128r1 CODED_SECP128R1
@@ -258,7 +262,7 @@
             #define CODED_SECP128R2    {0x2B,0x81,0x04,0x00,0x1D}
             #define CODED_SECP128R2_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp128r2[] = CODED_SECP128R2;
         #else
             #define ecc_oid_secp128r2 CODED_SECP128R2
@@ -275,7 +279,7 @@
             #define CODED_SECP160R1    {0x2B,0x81,0x04,0x00,0x08}
             #define CODED_SECP160R1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp160r1[] = CODED_SECP160R1;
         #else
             #define ecc_oid_secp160r1 CODED_SECP160R1
@@ -285,12 +289,12 @@
     #ifdef HAVE_ECC_SECPR2
         #ifdef HAVE_OID_ENCODING
             #define CODED_SECP160R2    {1,3,132,0,30}
-            #define CODED_SECP160R1_SZ 5
+            #define CODED_SECP160R2_SZ 5
         #else
             #define CODED_SECP160R2    {0x2B,0x81,0x04,0x00,0x1E}
             #define CODED_SECP160R2_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp160r2[] = CODED_SECP160R2;
         #else
             #define ecc_oid_secp160r2 CODED_SECP160R2
@@ -305,7 +309,7 @@
             #define CODED_SECP160K1    {0x2B,0x81,0x04,0x00,0x09}
             #define CODED_SECP160K1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp160k1[] = CODED_SECP160K1;
         #else
             #define ecc_oid_secp160k1 CODED_SECP160K1
@@ -320,7 +324,7 @@
             #define CODED_BRAINPOOLP160R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x01}
             #define CODED_BRAINPOOLP160R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp160r1[] = CODED_BRAINPOOLP160R1;
         #else
             #define ecc_oid_brainpoolp160r1 CODED_BRAINPOOLP160R1
@@ -337,7 +341,7 @@
             #define CODED_SECP192R1    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x01}
             #define CODED_SECP192R1_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp192r1[] = CODED_SECP192R1;
         #else
             #define ecc_oid_secp192r1 CODED_SECP192R1
@@ -352,7 +356,7 @@
             #define CODED_PRIME192V2    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x02}
             #define CODED_PRIME192V2_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_prime192v2[] = CODED_PRIME192V2;
         #else
             #define ecc_oid_prime192v2 CODED_PRIME192V2
@@ -367,7 +371,7 @@
             #define CODED_PRIME192V3    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x03}
             #define CODED_PRIME192V3_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_prime192v3[] = CODED_PRIME192V3;
         #else
             #define ecc_oid_prime192v3 CODED_PRIME192V3
@@ -382,7 +386,7 @@
             #define CODED_SECP192K1    {0x2B,0x81,0x04,0x00,0x1F}
             #define CODED_SECP192K1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp192k1[] = CODED_SECP192K1;
         #else
             #define ecc_oid_secp192k1 CODED_SECP192K1
@@ -397,7 +401,7 @@
             #define CODED_BRAINPOOLP192R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x03}
             #define CODED_BRAINPOOLP192R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp192r1[] = CODED_BRAINPOOLP192R1;
         #else
             #define ecc_oid_brainpoolp192r1 CODED_BRAINPOOLP192R1
@@ -414,7 +418,7 @@
             #define CODED_SECP224R1    {0x2B,0x81,0x04,0x00,0x21}
             #define CODED_SECP224R1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp224r1[] = CODED_SECP224R1;
         #else
             #define ecc_oid_secp224r1 CODED_SECP224R1
@@ -429,7 +433,7 @@
             #define CODED_SECP224K1    {0x2B,0x81,0x04,0x00,0x20}
             #define CODED_SECP224K1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp224k1[] = CODED_SECP224K1;
         #else
             #define ecc_oid_secp224k1 CODED_SECP224K1
@@ -444,7 +448,7 @@
             #define CODED_BRAINPOOLP224R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x05}
             #define CODED_BRAINPOOLP224R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp224r1[] = CODED_BRAINPOOLP224R1;
         #else
             #define ecc_oid_brainpoolp224r1 CODED_BRAINPOOLP224R1
@@ -461,7 +465,7 @@
             #define CODED_PRIME239V1    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x04}
             #define CODED_PRIME239V1_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_prime239v1[] = CODED_PRIME239V1;
         #else
             #define ecc_oid_prime239v1 CODED_PRIME239V1
@@ -476,7 +480,7 @@
             #define CODED_PRIME239V2    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x05}
             #define CODED_PRIME239V2_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_prime239v2[] = CODED_PRIME239V2;
         #else
             #define ecc_oid_prime239v2 CODED_PRIME239V2
@@ -491,7 +495,7 @@
             #define CODED_PRIME239V3    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x06}
             #define CODED_PRIME239V3_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_prime239v3[] = CODED_PRIME239V3;
         #else
             #define ecc_oid_prime239v3 CODED_PRIME239V3
@@ -508,7 +512,7 @@
             #define CODED_SECP256R1    {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x07}
             #define CODED_SECP256R1_SZ 8
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp256r1[] = CODED_SECP256R1;
         #else
             #define ecc_oid_secp256r1 CODED_SECP256R1
@@ -523,7 +527,7 @@
             #define CODED_SECP256K1    {0x2B,0x81,0x04,0x00,0x0A}
             #define CODED_SECP256K1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp256k1[] = CODED_SECP256K1;
         #else
             #define ecc_oid_secp256k1 CODED_SECP256K1
@@ -538,7 +542,7 @@
             #define CODED_BRAINPOOLP256R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07}
             #define CODED_BRAINPOOLP256R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp256r1[] = CODED_BRAINPOOLP256R1;
         #else
             #define ecc_oid_brainpoolp256r1 CODED_BRAINPOOLP256R1
@@ -555,7 +559,7 @@
             #define CODED_BRAINPOOLP320R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x09}
             #define CODED_BRAINPOOLP320R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp320r1[] = CODED_BRAINPOOLP320R1;
         #else
             #define ecc_oid_brainpoolp320r1 CODED_BRAINPOOLP320R1
@@ -572,7 +576,7 @@
             #define CODED_SECP384R1    {0x2B,0x81,0x04,0x00,0x22}
             #define CODED_SECP384R1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp384r1[] = CODED_SECP384R1;
             #define CODED_SECP384R1_OID ecc_oid_secp384r1
         #else
@@ -588,7 +592,7 @@
             #define CODED_BRAINPOOLP384R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0B}
             #define CODED_BRAINPOOLP384R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp384r1[] = CODED_BRAINPOOLP384R1;
         #else
             #define ecc_oid_brainpoolp384r1 CODED_BRAINPOOLP384R1
@@ -605,7 +609,7 @@
             #define CODED_BRAINPOOLP512R1    {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0D}
             #define CODED_BRAINPOOLP512R1_SZ 9
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_brainpoolp512r1[] = CODED_BRAINPOOLP512R1;
         #else
             #define ecc_oid_brainpoolp512r1 CODED_BRAINPOOLP512R1
@@ -622,7 +626,7 @@
             #define CODED_SECP521R1     {0x2B,0x81,0x04,0x00,0x23}
             #define CODED_SECP521R1_SZ 5
         #endif
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             static const ecc_oid_t ecc_oid_secp521r1[] = CODED_SECP521R1;
         #else
             #define ecc_oid_secp521r1 CODED_SECP521R1
@@ -1117,25 +1121,27 @@
     {
         1, /* non-zero */
         ECC_CURVE_CUSTOM,
-        #ifndef USE_WINDOWS_API
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
         #else
-            0, 0, 0, 0, 0, 0, 0, 0,
+            {0},{0},{0},{0},{0},{0},{0},{0},
         #endif
         0, 0, 0
     },
 #endif
     {
-        0, -1,
-        #ifndef USE_WINDOWS_API
+        0,
+        ECC_CURVE_INVALID,
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
             NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
         #else
-            0, 0, 0, 0, 0, 0, 0, 0,
+            {0},{0},{0},{0},{0},{0},{0},{0},
         #endif
         0, 0, 0
     }
 };
 #define ECC_SET_COUNT   (sizeof(ecc_sets)/sizeof(ecc_set_type))
+const size_t ecc_sets_count = ECC_SET_COUNT - 1;
 
 
 #ifdef HAVE_OID_ENCODING
@@ -1152,11 +1158,9 @@
 static int wc_ecc_export_x963_compressed(ecc_key*, byte* out, word32* outLen);
 #endif
 
-#ifdef WOLFSSL_ATECC508A
-    typedef void* ecc_curve_spec;
-#else
-
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH)
+
+#if (defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH)) && \
+    !defined(WOLFSSL_ATECC508A)
 static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a,
         mp_int* prime, mp_int* order);
 #endif
@@ -1441,8 +1445,6 @@
 }
 #endif /* ECC_CACHE_CURVE */
 
-#endif /* WOLFSSL_ATECC508A */
-
 
 /* Retrieve the curve name for the ECC curve id.
  *
@@ -1499,10 +1501,10 @@
 
 
 #ifdef ALT_ECC_SIZE
-static void alt_fp_init(fp_int* a)
+static void alt_fp_init(mp_int* a)
 {
     a->size = FP_SIZE_ECC;
-    fp_zero(a);
+    mp_zero(a);
 }
 #endif /* ALT_ECC_SIZE */
 
@@ -1884,8 +1886,19 @@
     (void)a;
     (void)mp;
 
-    return sp_ecc_proj_add_point_256(P->x, P->y, P->z, Q->x, Q->y, Q->z,
-                                     R->x, R->y, R->z);
+#ifndef WOLFSSL_SP_NO_256
+    if (mp_count_bits(modulus) == 256) {
+        return sp_ecc_proj_add_point_256(P->x, P->y, P->z, Q->x, Q->y, Q->z,
+                                         R->x, R->y, R->z);
+    }
+#endif
+#ifdef WOLFSSL_SP_384
+    if (mp_count_bits(modulus) == 384) {
+        return sp_ecc_proj_add_point_384(P->x, P->y, P->z, Q->x, Q->y, Q->z,
+                                         R->x, R->y, R->z);
+    }
+#endif
+    return ECC_BAD_ARG_E;
 #endif
 }
 
@@ -2253,19 +2266,30 @@
     (void)a;
     (void)mp;
 
-    return sp_ecc_proj_dbl_point_256(P->x, P->y, P->z, R->x, R->y, R->z);
+#ifndef WOLFSSL_SP_NO_256
+    if (mp_count_bits(modulus) == 256) {
+        return sp_ecc_proj_dbl_point_256(P->x, P->y, P->z, R->x, R->y, R->z);
+    }
+#endif
+#ifdef WOLFSSL_SP_384
+    if (mp_count_bits(modulus) == 384) {
+        return sp_ecc_proj_dbl_point_384(P->x, P->y, P->z, R->x, R->y, R->z);
+    }
+#endif
+    return ECC_BAD_ARG_E;
 #endif
 }
 
 
 /**
-  Map a projective jacbobian point back to affine space
+  Map a projective Jacobian point back to affine space
   P        [in/out] The point to map
   modulus  The modulus of the field the ECC curve is in
   mp       The "b" value from montgomery_setup()
+  ct       Operation should be constant time.
   return   MP_OKAY on success
 */
-int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp)
+int ecc_map_ex(ecc_point* P, mp_int* modulus, mp_digit mp, int ct)
 {
 #ifndef WOLFSSL_SP_MATH
 #ifdef WOLFSSL_SMALL_STACK
@@ -2285,6 +2309,8 @@
    mp_int *x, *y, *z;
    int    err;
 
+   (void)ct;
+
    if (P == NULL || modulus == NULL)
        return ECC_BAD_ARG_E;
 
@@ -2380,12 +2406,23 @@
    z = P->z;
 #endif
 
-   /* first map z back to normal */
-   err = mp_montgomery_reduce(z, modulus, mp);
-
    /* get 1/z */
-   if (err == MP_OKAY)
-       err = mp_invmod(z, modulus, t1);
+   if (err == MP_OKAY) {
+#if defined(ECC_TIMING_RESISTANT) && defined(USE_FAST_MATH)
+       if (ct) {
+           err = mp_invmod_mont_ct(z, modulus, t1, mp);
+           if (err == MP_OKAY)
+               err = mp_montgomery_reduce(t1, modulus, mp);
+       }
+       else
+#endif
+       {
+           /* first map z back to normal */
+           err = mp_montgomery_reduce(z, modulus, mp);
+           if (err == MP_OKAY)
+               err = mp_invmod(z, modulus, t1);
+       }
+   }
 
    /* get 1/z^2 and 1/z^3 */
    if (err == MP_OKAY)
@@ -2448,13 +2485,27 @@
 
     (void)mp;
 
-    return sp_ecc_map_256(P->x, P->y, P->z);
-#endif
-}
-
+#ifndef WOLFSSL_SP_NO_256
+    if (mp_count_bits(modulus) == 256) {
+        return sp_ecc_map_256(P->x, P->y, P->z);
+    }
+#endif
+#ifdef WOLFSSL_SP_384
+    if (mp_count_bits(modulus) == 384) {
+        return sp_ecc_map_384(P->x, P->y, P->z);
+    }
+#endif
+    return ECC_BAD_ARG_E;
+#endif
+}
+
+int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp)
+{
+    return ecc_map_ex(P, modulus, mp, 0);
+}
 #endif /* !WOLFSSL_SP_MATH || WOLFSSL_PUBLIC_ECC_ADD_DBL */
 
-#if !defined(FREESCALE_LTC_ECC)
+#if !defined(FREESCALE_LTC_ECC) && !defined(WOLFSSL_STM32_PKA)
 
 #if !defined(FP_ECC) || !defined(WOLFSSL_SP_MATH)
 /**
@@ -2484,17 +2535,19 @@
    #define WINSIZE  4
    #define M_POINTS 8
    int           first = 1, bitbuf = 0, bitcpy = 0, j;
+#elif defined(WC_NO_CACHE_RESISTANT)
+   #define M_POINTS 4
 #else
-   #define M_POINTS 3
+   #define M_POINTS 5
 #endif
 
    ecc_point     *tG, *M[M_POINTS];
    int           i, err;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+   ecc_key       key;
+#endif
 #ifdef WOLFSSL_SMALL_STACK
    mp_int*       mu = NULL;
-#ifdef WOLFSSL_SMALL_STACK_CACHE
-   ecc_key       key;
-#endif
 #else
    mp_int        mu[1];
 #endif
@@ -2642,8 +2695,8 @@
    /* now find (8+k)G for k=1..7 */
    if (err == MP_OKAY)
        for (j = 9; j < 16; j++) {
-           err = ecc_projective_add_point(M[j-9], tG, M[j-M_POINTS], a,
-                                                                modulus, mp);
+           err = ecc_projective_add_point(M[j-9], tG, M[j-M_POINTS], a, modulus,
+                                                                            mp);
            if (err != MP_OKAY) break;
        }
 
@@ -2711,7 +2764,7 @@
 
                    /* then add, bitbuf will be 8..15 [8..2^WINSIZE] guaranteed */
                    err = ecc_projective_add_point(R, M[bitbuf-M_POINTS], R, a,
-                                                               modulus, mp);
+                                                                   modulus, mp);
                }
                if (err != MP_OKAY) break;
                /* empty window and reset */
@@ -2747,8 +2800,7 @@
                        first = 0;
                    } else {
                        /* then add */
-                       err = ecc_projective_add_point(R, tG, R, a, modulus,
-                                                                       mp);
+                       err = ecc_projective_add_point(R, tG, R, a, modulus, mp);
                        if (err != MP_OKAY) break;
                    }
                }
@@ -2772,12 +2824,23 @@
    /* M[1] == 2G */
    if (err == MP_OKAY)
        err = ecc_projective_dbl_point(tG, M[1], a, modulus, mp);
+#ifdef WC_NO_CACHE_RESISTANT
+   if (err == MP_OKAY)
+       err = wc_ecc_copy_point(M[0], M[2]);
+#else
+   if (err == MP_OKAY)
+       err = wc_ecc_copy_point(M[0], M[3]);
+   if (err == MP_OKAY)
+       err = wc_ecc_copy_point(M[1], M[4]);
+#endif
 
    /* setup sliding window */
    mode   = 0;
    bitcnt = 1;
    buf    = 0;
-   digidx = get_digit_count(k) - 1;
+   digidx = get_digit_count(modulus) - 1;
+   /* The order MAY be 1 bit longer than the modulus. */
+   digidx += (modulus->dp[digidx] >> (DIGIT_BIT-1));
 
    /* perform ops */
    if (err == MP_OKAY) {
@@ -2796,75 +2859,84 @@
            i = (buf >> (DIGIT_BIT - 1)) & 1;
            buf <<= 1;
 
-           if (mode == 0 && i == 0) {
+#ifdef WC_NO_CACHE_RESISTANT
+           if (mode == 0) {
                /* timing resistant - dummy operations */
                if (err == MP_OKAY)
-                   err = ecc_projective_add_point(M[0], M[1], M[2], a, modulus,
+                   err = ecc_projective_add_point(M[1], M[2], M[2], a, modulus,
                                                   mp);
                if (err == MP_OKAY)
-                   err = ecc_projective_dbl_point(M[1], M[2], a, modulus, mp);
+                   err = ecc_projective_dbl_point(M[2], M[3], a, modulus, mp);
+           }
+           else {
                if (err == MP_OKAY)
-                   continue;
+                   err = ecc_projective_add_point(M[0], M[1], M[i^1], a,
+                                                  modulus, mp);
+               if (err == MP_OKAY)
+                   err = ecc_projective_dbl_point(M[i], M[i], a, modulus, mp);
            }
-
-           if (mode == 0 && i == 1) {
-               mode = 1;
-               /* timing resistant - dummy operations */
-               if (err == MP_OKAY)
-                   err = ecc_projective_add_point(M[0], M[1], M[2], a, modulus,
-                                                  mp);
-               if (err == MP_OKAY)
-                   err = ecc_projective_dbl_point(M[1], M[2], a, modulus, mp);
-               if (err == MP_OKAY)
-                   continue;
-           }
+#else
+           if (err == MP_OKAY)
+               err = ecc_projective_add_point(M[0], M[1], M[2], a, modulus, mp);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->x, i, M[0]->x);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->y, i, M[0]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->z, i, M[0]->z);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->x, i ^ 1, M[1]->x);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->y, i ^ 1, M[1]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->z, i ^ 1, M[1]->z);
 
            if (err == MP_OKAY)
-               err = ecc_projective_add_point(M[0], M[1], M[i^1], a, modulus,
-                                                                       mp);
-#ifdef WC_NO_CACHE_RESISTANT
+               err = mp_cond_copy(M[0]->x, i ^ 1, M[2]->x);
            if (err == MP_OKAY)
-               err = ecc_projective_dbl_point(M[i], M[i], a, modulus, mp);
-#else
-            /* instead of using M[i] for double, which leaks key bit to cache
-             * monitor, use M[2] as temp, make sure address calc is constant,
-             * keep M[0] and M[1] in cache */
+               err = mp_cond_copy(M[0]->y, i ^ 1, M[2]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[0]->z, i ^ 1, M[2]->z);
            if (err == MP_OKAY)
-               err = mp_copy((mp_int*)
-                             ( ((wolfssl_word)M[0]->x & wc_off_on_addr[i^1]) +
-                               ((wolfssl_word)M[1]->x & wc_off_on_addr[i])),
-                             M[2]->x);
+               err = mp_cond_copy(M[1]->x, i, M[2]->x);
            if (err == MP_OKAY)
-               err = mp_copy((mp_int*)
-                             ( ((wolfssl_word)M[0]->y & wc_off_on_addr[i^1]) +
-                               ((wolfssl_word)M[1]->y & wc_off_on_addr[i])),
-                             M[2]->y);
+               err = mp_cond_copy(M[1]->y, i, M[2]->y);
            if (err == MP_OKAY)
-               err = mp_copy((mp_int*)
-                             ( ((wolfssl_word)M[0]->z & wc_off_on_addr[i^1]) +
-                               ((wolfssl_word)M[1]->z & wc_off_on_addr[i])),
-                             M[2]->z);
+               err = mp_cond_copy(M[1]->z, i, M[2]->z);
+
            if (err == MP_OKAY)
                err = ecc_projective_dbl_point(M[2], M[2], a, modulus, mp);
-           /* copy M[2] back to M[i] */
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->x, i ^ 1, M[0]->x);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->y, i ^ 1, M[0]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->z, i ^ 1, M[0]->z);
            if (err == MP_OKAY)
-               err = mp_copy(M[2]->x,
-                             (mp_int*)
-                             ( ((wolfssl_word)M[0]->x & wc_off_on_addr[i^1]) +
-                               ((wolfssl_word)M[1]->x & wc_off_on_addr[i])) );
+               err = mp_cond_copy(M[2]->x, i, M[1]->x);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->y, i, M[1]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[2]->z, i, M[1]->z);
+
            if (err == MP_OKAY)
-               err = mp_copy(M[2]->y,
-                             (mp_int*)
-                             ( ((wolfssl_word)M[0]->y & wc_off_on_addr[i^1]) +
-                               ((wolfssl_word)M[1]->y & wc_off_on_addr[i])) );
+               err = mp_cond_copy(M[3]->x, (mode ^ 1) & i, M[0]->x);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[3]->y, (mode ^ 1) & i, M[0]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[3]->z, (mode ^ 1) & i, M[0]->z);
            if (err == MP_OKAY)
-               err = mp_copy(M[2]->z,
-                             (mp_int*)
-                             ( ((wolfssl_word)M[0]->z & wc_off_on_addr[i^1]) +
-                               ((wolfssl_word)M[1]->z & wc_off_on_addr[i])) );
+               err = mp_cond_copy(M[4]->x, (mode ^ 1) & i, M[1]->x);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[4]->y, (mode ^ 1) & i, M[1]->y);
+           if (err == MP_OKAY)
+               err = mp_cond_copy(M[4]->z, (mode ^ 1) & i, M[1]->z);
+#endif /* WC_NO_CACHE_RESISTANT */
+
            if (err != MP_OKAY)
                break;
-#endif /* WC_NO_CACHE_RESISTANT */
+
+           mode |= i;
        } /* end for */
    }
 
@@ -2911,13 +2983,23 @@
 
    (void)a;
 
-   return sp_ecc_mulmod_256(k, G, R, map, heap);
+#ifndef WOLFSSL_SP_NO_256
+   if (mp_count_bits(modulus) == 256) {
+       return sp_ecc_mulmod_256(k, G, R, map, heap);
+   }
+#endif
+#ifdef WOLFSSL_SP_384
+   if (mp_count_bits(modulus) == 384) {
+       return sp_ecc_mulmod_384(k, G, R, map, heap);
+   }
+#endif
+   return ECC_BAD_ARG_E;
 #endif
 }
 
 #endif /* !FP_ECC || !WOLFSSL_SP_MATH */
 
-#endif /* !FREESCALE_LTC_ECC */
+#endif /* !FREESCALE_LTC_ECC && !WOLFSSL_STM32_PKA */
 
 /** ECC Fixed Point mulmod global
     k        The multiplicand
@@ -2925,7 +3007,7 @@
     R        [out] Destination of product
     a        ECC curve parameter a
     modulus  The modulus for the curve
-    map      [boolean] If non-zero maps the point back to affine co-ordinates,
+    map      [boolean] If non-zero maps the point back to affine coordinates,
              otherwise it's left in jacobian-montgomery form
     return MP_OKAY if successful
 */
@@ -3126,7 +3208,10 @@
     len = (word32)XSTRLEN(curveName);
 
     for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) {
-        if (ecc_sets[curve_idx].name &&
+        if (
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
+            ecc_sets[curve_idx].name &&
+        #endif
                 XSTRNCASECMP(ecc_sets[curve_idx].name, curveName, len) == 0) {
             break;
         }
@@ -3179,11 +3264,11 @@
 }
 
 /* Compares a curve parameter (hex, from ecc_sets[]) to given input
- * parameter (byte array) for equality.
- *
+ * parameter for equality.
+ * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR
  * Returns MP_EQ on success, negative on error */
 static int wc_ecc_cmp_param(const char* curveParam,
-                            const byte* param, word32 paramSz)
+                            const byte* param, word32 paramSz, int encType)
 {
     int err = MP_OKAY;
 #ifdef WOLFSSL_SMALL_STACK
@@ -3196,6 +3281,9 @@
     if (param == NULL || curveParam == NULL)
         return BAD_FUNC_ARG;
 
+    if (encType == WC_TYPE_HEX_STR)
+        return XSTRNCMP(curveParam, (char*) param, paramSz);
+
 #ifdef WOLFSSL_SMALL_STACK
     a = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
     if (a == NULL)
@@ -3207,12 +3295,17 @@
     }
 #endif
 
-    if ((err = mp_init_multi(a, b, NULL, NULL, NULL, NULL)) != MP_OKAY)
+    if ((err = mp_init_multi(a, b, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(a, NULL, DYNAMIC_TYPE_ECC);
+        XFREE(b, NULL, DYNAMIC_TYPE_ECC);
+    #endif
         return err;
-
-    if (err == MP_OKAY)
+    }
+
+    if (err == MP_OKAY) {
         err = mp_read_unsigned_bin(a, param, paramSz);
-
+    }
     if (err == MP_OKAY)
         err = mp_read_radix(b, curveParam, MP_RADIX_HEX);
 
@@ -3271,13 +3364,17 @@
     for (idx = 0; ecc_sets[idx].size != 0; idx++) {
         if (curveSz == ecc_sets[idx].size) {
             if ((wc_ecc_cmp_param(ecc_sets[idx].prime, prime,
-                            primeSz) == MP_EQ) &&
-                (wc_ecc_cmp_param(ecc_sets[idx].Af, Af, AfSz) == MP_EQ) &&
-                (wc_ecc_cmp_param(ecc_sets[idx].Bf, Bf, BfSz) == MP_EQ) &&
+                            primeSz, WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Af, Af, AfSz,
+                                  WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Bf, Bf, BfSz,
+                                  WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
                 (wc_ecc_cmp_param(ecc_sets[idx].order, order,
-                                  orderSz) == MP_EQ) &&
-                (wc_ecc_cmp_param(ecc_sets[idx].Gx, Gx, GxSz) == MP_EQ) &&
-                (wc_ecc_cmp_param(ecc_sets[idx].Gy, Gy, GySz) == MP_EQ) &&
+                                  orderSz, WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Gx, Gx, GxSz,
+                                  WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Gy, Gy, GySz,
+                                  WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
                 (cofactor == ecc_sets[idx].cofactor)) {
                     break;
             }
@@ -3290,8 +3387,97 @@
     return ecc_sets[idx].id;
 }
 
-
-#ifdef WOLFSSL_ASYNC_CRYPT
+/* Returns the curve id in ecc_sets[] that corresponds
+ * to a given domain parameters pointer.
+ *
+ * dp   domain parameters pointer
+ *
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_dp_params(const ecc_set_type* dp)
+{
+    int idx;
+
+    if (dp == NULL
+    #ifndef WOLFSSL_ECC_CURVE_STATIC
+         || dp->prime == NULL ||  dp->Af == NULL ||
+        dp->Bf == NULL || dp->order == NULL || dp->Gx == NULL || dp->Gy == NULL
+    #endif
+    ) {
+        return BAD_FUNC_ARG;
+    }
+
+    for (idx = 0; ecc_sets[idx].size != 0; idx++) {
+        if (dp->size == ecc_sets[idx].size) {
+            if ((wc_ecc_cmp_param(ecc_sets[idx].prime, (const byte*)dp->prime,
+                    (word32)XSTRLEN(dp->prime), WC_TYPE_HEX_STR) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Af, (const byte*)dp->Af,
+                    (word32)XSTRLEN(dp->Af),WC_TYPE_HEX_STR) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Bf, (const byte*)dp->Bf,
+                    (word32)XSTRLEN(dp->Bf),WC_TYPE_HEX_STR) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].order, (const byte*)dp->order,
+                    (word32)XSTRLEN(dp->order),WC_TYPE_HEX_STR) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Gx, (const byte*)dp->Gx,
+                    (word32)XSTRLEN(dp->Gx),WC_TYPE_HEX_STR) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Gy, (const byte*)dp->Gy,
+                    (word32)XSTRLEN(dp->Gy),WC_TYPE_HEX_STR) == MP_EQ) &&
+                (dp->cofactor == ecc_sets[idx].cofactor)) {
+                    break;
+            }
+        }
+    }
+
+    if (ecc_sets[idx].size == 0)
+        return ECC_CURVE_INVALID;
+
+    return ecc_sets[idx].id;
+}
+
+/* Returns the curve id that corresponds to a given OID,
+ * as listed in ecc_sets[] of ecc.c.
+ *
+ * oid   OID, from ecc_sets[].name in ecc.c
+ * len   OID len, from ecc_sets[].name in ecc.c
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_oid(const byte* oid, word32 len)
+{
+    int curve_idx;
+
+    if (oid == NULL)
+        return BAD_FUNC_ARG;
+
+    for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) {
+        if (
+        #ifndef WOLFSSL_ECC_CURVE_STATIC
+            ecc_sets[curve_idx].oid &&
+        #endif
+            ecc_sets[curve_idx].oidSz == len &&
+                              XMEMCMP(ecc_sets[curve_idx].oid, oid, len) == 0) {
+            break;
+        }
+    }
+    if (ecc_sets[curve_idx].size == 0) {
+        WOLFSSL_MSG("ecc_set curve name not found");
+        return ECC_CURVE_INVALID;
+    }
+
+    return ecc_sets[curve_idx].id;
+}
+
+/* Get curve parameters using curve index */
+const ecc_set_type* wc_ecc_get_curve_params(int curve_idx)
+{
+    const ecc_set_type* ecc_set = NULL;
+
+    if (curve_idx >= 0 && curve_idx < (int)ECC_SET_COUNT) {
+        ecc_set = &ecc_sets[curve_idx];
+    }
+    return ecc_set;
+}
+
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
 static WC_INLINE int wc_ecc_alloc_mpint(ecc_key* key, mp_int** mp)
 {
    if (key == NULL || mp == NULL)
@@ -3331,7 +3517,7 @@
     wc_ecc_free_mpint(key, &key->signK);
 #endif /* HAVE_CAVIUM_V */
 }
-#endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
 
 
 #ifdef HAVE_ECC_DHE
@@ -3348,17 +3534,20 @@
                       word32* outlen)
 {
    int err;
-
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+   CRYS_ECDH_TempData_t tempBuff;
+#endif
    if (private_key == NULL || public_key == NULL || out == NULL ||
                                                             outlen == NULL) {
        return BAD_FUNC_ARG;
    }
 
-#ifdef WOLF_CRYPTO_DEV
+#ifdef WOLF_CRYPTO_CB
     if (private_key->devId != INVALID_DEVID) {
-        err = wc_CryptoDev_Ecdh(private_key, public_key, out, outlen);
-        if (err != NOT_COMPILED_IN)
+        err = wc_CryptoCb_Ecdh(private_key, public_key, out, outlen);
+        if (err != CRYPTOCB_UNAVAILABLE)
             return err;
+        /* fall-through when unavailable */
     }
 #endif
 
@@ -3380,11 +3569,28 @@
    }
 
 #ifdef WOLFSSL_ATECC508A
-   err = atcatls_ecdh(private_key->slot, public_key->pubkey_raw, out);
-   if (err != ATCA_SUCCESS) {
-      err = BAD_COND_E;
-   }
-   *outlen = private_key->dp->size;
+   /* For SECP256R1 use hardware */
+   if (private_key->dp->id == ECC_SECP256R1) {
+       err = atmel_ecc_create_pms(private_key->slot, public_key->pubkey_raw, out);
+       *outlen = private_key->dp->size;
+   }
+   else {
+      err = NOT_COMPILED_IN;
+   }
+#elif defined(WOLFSSL_CRYPTOCELL)
+
+    /* generate a secret*/
+    err = CRYS_ECDH_SVDP_DH(&public_key->ctx.pubKey,
+                            &private_key->ctx.privKey,
+                            out,
+                            outlen,
+                            &tempBuff);
+
+    if (err != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_ECDH_SVDP_DH for secret failed");
+        return err;
+    }
+
 #else
    err = wc_ecc_shared_secret_ex(private_key, &public_key->pubkey, out, outlen);
 #endif /* WOLFSSL_ATECC508A */
@@ -3393,7 +3599,7 @@
 }
 
 
-#ifndef WOLFSSL_ATECC508A
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 
 static int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point,
                                byte* out, word32* outlen, ecc_curve_spec* curve)
@@ -3433,6 +3639,13 @@
     }
     else
 #endif
+#ifdef WOLFSSL_SP_384
+    if (private_key->idx != ECC_CUSTOM_IDX &&
+                               ecc_sets[private_key->idx].id == ECC_SECP384R1) {
+        err = sp_ecc_secret_gen_384(k, point, out, outlen, private_key->heap);
+    }
+    else
+#endif
 #endif
 #ifdef WOLFSSL_SP_MATH
     {
@@ -3442,6 +3655,8 @@
     }
 #else
     {
+        mp_digit mp = 0;
+
         /* make new point */
         result = wc_ecc_new_point_h(private_key->heap);
         if (result == NULL) {
@@ -3452,11 +3667,19 @@
             return MEMORY_E;
         }
 
-        err = wc_ecc_mulmod_ex(k, point, result, curve->Af, curve->prime, 1,
+        /* Map in a separate call as this should be constant time */
+        err = wc_ecc_mulmod_ex(k, point, result, curve->Af, curve->prime, 0,
                                                              private_key->heap);
         if (err == MP_OKAY) {
+            err = mp_montgomery_setup(curve->prime, &mp);
+        }
+        if (err == MP_OKAY) {
+            /* Use constant time map if compiled in */
+            err = ecc_map_ex(result, curve->prime, mp, 1);
+        }
+        if (err == MP_OKAY) {
             x = mp_unsigned_bin_size(curve->prime);
-            if (*outlen < x) {
+            if (*outlen < x || (int)x < mp_unsigned_bin_size(result->x)) {
                 err = BUFFER_E;
             }
         }
@@ -3540,7 +3763,7 @@
 
     return err;
 }
-#endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
 
 int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point,
                                                     byte* out, word32 *outlen)
@@ -3553,9 +3776,8 @@
         return BAD_FUNC_ARG;
     }
 
+    /* load curve info */
     ALLOC_CURVE_SPECS(2);
-
-    /* load curve info */
     err = wc_ecc_curve_load(private_key->dp, &curve,
         (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF));
     if (err != MP_OKAY) {
@@ -3649,18 +3871,18 @@
     }
 
     /* cleanup */
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     wc_ecc_free_async(private_key);
 #endif
     private_key->state = ECC_STATE_NONE;
 
     return err;
 }
-#endif /* !WOLFSSL_ATECC508A */
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */
 #endif /* HAVE_ECC_DHE */
 
 
-#ifndef WOLFSSL_ATECC508A
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 /* return 1 if point is at infinity, 0 if not, < 0 on error */
 int wc_ecc_point_is_at_infinity(ecc_point* p)
 {
@@ -3673,12 +3895,12 @@
     return 0;
 }
 
-#ifndef WOLFSSL_SP_MATH
 /* generate random and ensure its greater than 0 and less than order */
-static int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order)
-{
+int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order)
+{
+#ifndef WC_NO_RNG
     int err;
-    DECLARE_VAR(buf, byte, ECC_MAXSIZE_GEN, rng->heap);
+    byte buf[ECC_MAXSIZE_GEN];
 
     /*generate 8 extra bytes to mitigate bias from the modulo operation below*/
     /*see section A.1.2 in 'Suite B Implementor's Guide to FIPS 186-3 (ECDSA)'*/
@@ -3705,12 +3927,17 @@
     }
 
     ForceZero(buf, ECC_MAXSIZE);
-    FREE_VAR(buf, rng->heap);
 
     return err;
-}
-#endif
-#endif /* !WOLFSSL_ATECC508A */
+#else
+    (void)rng;
+    (void)size;
+    (void)k;
+    (void)order;
+    return NOT_COMPILED_IN;
+#endif /* !WC_NO_RNG */
+}
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */
 
 static WC_INLINE void wc_ecc_reset(ecc_key* key)
 {
@@ -3742,7 +3969,7 @@
 #endif
     ecc_point* pub;
     DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
-#endif
+#endif /* !WOLFSSL_ATECC508A */
 
     if (key == NULL) {
         return BAD_FUNC_ARG;
@@ -3766,11 +3993,11 @@
         curve = curveIn;
     }
     else {
-        ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
-
         /* load curve info */
-        if (err == MP_OKAY)
+        if (err == MP_OKAY) {
+            ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
             err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3787,11 +4014,19 @@
     }
 
 
+    if (err != MP_OKAY) {
+    }
+    else
 #ifdef WOLFSSL_HAVE_SP_ECC
 #ifndef WOLFSSL_SP_NO_256
     if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
-        if (err == MP_OKAY)
-            err = sp_ecc_mulmod_base_256(&key->k, pub, 1, key->heap);
+        err = sp_ecc_mulmod_base_256(&key->k, pub, 1, key->heap);
+    }
+    else
+#endif
+#ifdef WOLFSSL_SP_384
+    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+        err = sp_ecc_mulmod_base_384(&key->k, pub, 1, key->heap);
     }
     else
 #endif
@@ -3800,11 +4035,11 @@
         err = WC_KEY_SIZE_E;
 #else
     {
-        if (err == MP_OKAY) {
-            base = wc_ecc_new_point_h(key->heap);
-            if (base == NULL)
-                err = MEMORY_E;
-        }
+        mp_digit mp;
+
+        base = wc_ecc_new_point_h(key->heap);
+        if (base == NULL)
+            err = MEMORY_E;
         /* read in the x/y for this key */
         if (err == MP_OKAY)
             err = mp_copy(curve->Gx, base->x);
@@ -3815,8 +4050,19 @@
 
         /* make the public key */
         if (err == MP_OKAY) {
+            /* Map in a separate call as this should be constant time */
             err = wc_ecc_mulmod_ex(&key->k, base, pub, curve->Af, curve->prime,
-                                                                  1, key->heap);
+                                                                  0, key->heap);
+            if (err == MP_MEM) {
+               err = MEMORY_E;
+            }
+        }
+        if (err == MP_OKAY) {
+            err = mp_montgomery_setup(curve->prime, &mp);
+        }
+        if (err == MP_OKAY) {
+            /* Use constant time map if compiled in */
+            err = ecc_map_ex(pub, curve->prime, mp, 1);
         }
 
         wc_ecc_del_point_h(base, key->heap);
@@ -3842,13 +4088,12 @@
     /* free up local curve */
     if (curveIn == NULL) {
         wc_ecc_curve_free(curve);
-    #ifndef WOLFSSL_ATECC508A
         FREE_CURVE_SPECS();
-    #endif
     }
 
 #else
     (void)curveIn;
+    err = NOT_COMPILED_IN;
 #endif /* WOLFSSL_ATECC508A */
 
     /* change key state if public part is cached */
@@ -3877,15 +4122,22 @@
 }
 
 
+WOLFSSL_ABI
 int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id)
 {
-    int            err;
-#ifndef WOLFSSL_ATECC508A
+    int err;
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 #ifndef WOLFSSL_SP_MATH
     DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
 #endif
-#endif
-
+#endif /* !WOLFSSL_ATECC508A */
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+    const CRYS_ECPKI_Domain_t*  pDomain;
+    CRYS_ECPKI_KG_TempData_t    tempBuff;
+    CRYS_ECPKI_KG_FipsContext_t fipsCtx;
+    byte ucompressed_key[ECC_MAX_CRYPTO_HW_SIZE*2 + 1];
+    word32 raw_size = 0;
+#endif
     if (key == NULL || rng == NULL) {
         return BAD_FUNC_ARG;
     }
@@ -3898,6 +4150,15 @@
         return err;
     }
 
+#ifdef WOLF_CRYPTO_CB
+    if (key->devId != INVALID_DEVID) {
+        err = wc_CryptoCb_MakeEccKey(rng, keysize, key, curve_id);
+        if (err != CRYPTOCB_UNAVAILABLE)
+            return err;
+        /* fall-through when unavailable */
+    }
+#endif
+
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
     #ifdef HAVE_CAVIUM
@@ -3915,46 +4176,116 @@
         }
     #endif
     }
-#endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
 
 #ifdef WOLFSSL_ATECC508A
-   key->type = ECC_PRIVATEKEY;
-   err = atcatls_create_key(key->slot, key->pubkey_raw);
-   if (err != ATCA_SUCCESS) {
-      err = BAD_COND_E;
-   }
-
-   /* populate key->pubkey */
-   err = mp_read_unsigned_bin(key->pubkey.x, key->pubkey_raw,
-                              ECC_MAX_CRYPTO_HW_SIZE);
-   if (err == MP_OKAY)
-       err = mp_read_unsigned_bin(key->pubkey.y,
-                                  key->pubkey_raw + ECC_MAX_CRYPTO_HW_SIZE,
-                                  ECC_MAX_CRYPTO_HW_SIZE);
+   if (key->dp->id == ECC_SECP256R1) {
+       key->type = ECC_PRIVATEKEY;
+       key->slot = atmel_ecc_alloc(ATMEL_SLOT_ECDHE);
+       err = atmel_ecc_create_key(key->slot, key->pubkey_raw);
+
+       /* populate key->pubkey */
+       if (err == 0
+       #ifdef ALT_ECC_SIZE
+          && key->pubkey.x
+       #endif
+       ) {
+           err = mp_read_unsigned_bin(key->pubkey.x, key->pubkey_raw,
+                                      ECC_MAX_CRYPTO_HW_SIZE);
+       }
+       if (err == 0
+       #ifdef ALT_ECC_SIZE
+          && key->pubkey.y
+       #endif
+       ) {
+           err = mp_read_unsigned_bin(key->pubkey.y,
+                                      key->pubkey_raw + ECC_MAX_CRYPTO_HW_SIZE,
+                                      ECC_MAX_CRYPTO_HW_SIZE);
+       }
+   }
+   else {
+      err = NOT_COMPILED_IN;
+   }
+#elif defined(WOLFSSL_CRYPTOCELL)
+
+    pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id));
+    raw_size = (word32)(key->dp->size)*2 + 1;
+
+    /* generate first key pair */
+    err = CRYS_ECPKI_GenKeyPair(&wc_rndState,
+                                wc_rndGenVectFunc,
+                                pDomain,
+                                &key->ctx.privKey,
+                                &key->ctx.pubKey,
+                                &tempBuff,
+                                &fipsCtx);
+
+    if (err != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_ECPKI_GenKeyPair for key pair failed");
+        return err;
+    }
+    key->type = ECC_PRIVATEKEY;
+
+    err = CRYS_ECPKI_ExportPublKey(&key->ctx.pubKey,
+                                   CRYS_EC_PointUncompressed,
+                                   &ucompressed_key[0],
+                                   &raw_size);
+
+    if (err == SA_SILIB_RET_OK && key->pubkey.x && key->pubkey.y) {
+        err = mp_read_unsigned_bin(key->pubkey.x,
+                                   &ucompressed_key[1], key->dp->size);
+        if (err == MP_OKAY) {
+            err = mp_read_unsigned_bin(key->pubkey.y,
+                            &ucompressed_key[1+key->dp->size],key->dp->size);
+        }
+    }
+    raw_size = key->dp->size;
+    if (err == MP_OKAY) {
+        err = CRYS_ECPKI_ExportPrivKey(&key->ctx.privKey,
+                                       ucompressed_key,
+                                       &raw_size);
+    }
+
+    if (err == SA_SILIB_RET_OK) {
+        err = mp_read_unsigned_bin(&key->k, ucompressed_key, raw_size);
+    }
+
 #else
 
 #ifdef WOLFSSL_HAVE_SP_ECC
 #ifndef WOLFSSL_SP_NO_256
     if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
         err = sp_ecc_make_key_256(rng, &key->k, &key->pubkey, key->heap);
-        if (err == MP_OKAY)
+        if (err == MP_OKAY) {
             key->type = ECC_PRIVATEKEY;
+        }
     }
     else
 #endif
-#endif
+#ifdef WOLFSSL_SP_384
+    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+        err = sp_ecc_make_key_384(rng, &key->k, &key->pubkey, key->heap);
+        if (err == MP_OKAY) {
+            key->type = ECC_PRIVATEKEY;
+        }
+    }
+    else
+#endif
+#endif /* WOLFSSL_HAVE_SP_ECC */
+
+   { /* software key gen */
 #ifdef WOLFSSL_SP_MATH
         err = WC_KEY_SIZE_E;
 #else
-    {
-        ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
 
         /* setup the key variables */
         err = mp_init(&key->k);
 
         /* load curve info */
-        if (err == MP_OKAY)
+        if (err == MP_OKAY) {
+            ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
             err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+        }
 
         /* generate k */
         if (err == MP_OKAY)
@@ -3975,10 +4306,19 @@
 
         /* cleanup allocations */
         wc_ecc_curve_free(curve);
-    #ifndef WOLFSSL_ATECC508A
         FREE_CURVE_SPECS();
-    #endif
-    }
+#endif /* WOLFSSL_SP_MATH */
+    }
+
+#ifdef HAVE_WOLF_BIGINT
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->k, &key->k.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(key->pubkey.x, &key->pubkey.x->raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(key->pubkey.y, &key->pubkey.y->raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(key->pubkey.z, &key->pubkey.z->raw);
 #endif
 
 #endif /* WOLFSSL_ATECC508A */
@@ -4043,6 +4383,38 @@
 }
 #endif /* ECC_DUMP_OID */
 
+
+WOLFSSL_ABI
+ecc_key* wc_ecc_key_new(void* heap)
+{
+    ecc_key* key;
+
+    key = (ecc_key*)XMALLOC(sizeof(ecc_key), heap, DYNAMIC_TYPE_ECC);
+    if (key) {
+        if (wc_ecc_init_ex(key, heap, INVALID_DEVID) != 0) {
+            XFREE(key, heap, DYNAMIC_TYPE_ECC);
+            key = NULL;
+        }
+    }
+
+    return key;
+}
+
+
+WOLFSSL_ABI
+void wc_ecc_key_free(ecc_key* key)
+{
+    if (key) {
+        void* heap = key->heap;
+
+        wc_ecc_free(key);
+        ForceZero(key, sizeof(ecc_key));
+        XFREE(key, heap, DYNAMIC_TYPE_ECC);
+        (void)heap;
+    }
+}
+
+
 /**
  Make a new ECC key
  rng          An active RNG state
@@ -4057,6 +4429,7 @@
 }
 
 /* Setup dynamic pointers if using normal math for proper freeing */
+WOLFSSL_ABI
 int wc_ecc_init_ex(ecc_key* key, void* heap, int devId)
 {
     int ret = 0;
@@ -4072,17 +4445,14 @@
     XMEMSET(key, 0, sizeof(ecc_key));
     key->state = ECC_STATE_NONE;
 
-#if defined(PLUTON_CRYPTO_ECC) || defined(WOLF_CRYPTO_DEV)
+#if defined(PLUTON_CRYPTO_ECC) || defined(WOLF_CRYPTO_CB)
     key->devId = devId;
 #else
     (void)devId;
 #endif
 
 #ifdef WOLFSSL_ATECC508A
-    key->slot = atmel_ecc_alloc();
-    if (key->slot == ATECC_INVALID_SLOT) {
-        return ECC_BAD_ARG_E;
-    }
+    key->slot = ATECC_INVALID_SLOT;
 #else
 #ifdef ALT_ECC_SIZE
     key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
@@ -4092,13 +4462,16 @@
     alt_fp_init(key->pubkey.y);
     alt_fp_init(key->pubkey.z);
     ret = mp_init(&key->k);
+    if (ret != MP_OKAY) {
+        return MEMORY_E;
+    }
 #else
     ret = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z,
                                                                     NULL, NULL);
-#endif /* ALT_ECC_SIZE */
     if (ret != MP_OKAY) {
         return MEMORY_E;
     }
+#endif /* ALT_ECC_SIZE */
 #endif /* WOLFSSL_ATECC508A */
 
 #ifdef WOLFSSL_HEAP_TEST
@@ -4113,6 +4486,9 @@
                                                             key->heap, devId);
 #endif
 
+#if defined(WOLFSSL_DSP)
+    key->handle = -1;
+#endif
     return ret;
 }
 
@@ -4121,6 +4497,29 @@
     return wc_ecc_init_ex(key, NULL, INVALID_DEVID);
 }
 
+#ifdef HAVE_PKCS11
+int wc_ecc_init_id(ecc_key* key, unsigned char* id, int len, void* heap,
+                   int devId)
+{
+    int ret = 0;
+
+    if (key == NULL)
+        ret = BAD_FUNC_ARG;
+    if (ret == 0 && (len < 0 || len > ECC_MAX_ID_LEN))
+        ret = BUFFER_E;
+
+    if (ret == 0)
+        ret = wc_ecc_init_ex(key, heap, devId);
+
+    if (ret == 0 && id != NULL && len != 0) {
+        XMEMCPY(key->id, id, len);
+        key->idLen = len;
+    }
+
+    return ret;
+}
+#endif
+
 int wc_ecc_set_flags(ecc_key* key, word32 flags)
 {
     if (key == NULL) {
@@ -4130,37 +4529,73 @@
     return 0;
 }
 
+
+static int wc_ecc_get_curve_order_bit_count(const ecc_set_type* dp)
+{
+    int err;
+    word32 orderBits;
+    DECLARE_CURVE_SPECS(curve, 1);
+
+    ALLOC_CURVE_SPECS(1);
+    err = wc_ecc_curve_load(dp, &curve, ECC_CURVE_FIELD_ORDER);
+    if (err != 0) {
+       FREE_CURVE_SPECS();
+       return err;
+    }
+    orderBits = mp_count_bits(curve->order);
+
+    wc_ecc_curve_free(curve);
+    FREE_CURVE_SPECS();
+    return (int)orderBits;
+}
+
 #ifdef HAVE_ECC_SIGN
 
 #ifndef NO_ASN
 
-#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC)
+#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || \
+    defined(WOLFSSL_CRYPTOCELL)
 static int wc_ecc_sign_hash_hw(const byte* in, word32 inlen,
     mp_int* r, mp_int* s, byte* out, word32 *outlen, WC_RNG* rng,
     ecc_key* key)
 {
     int err;
-
 #ifdef PLUTON_CRYPTO_ECC
     if (key->devId != INVALID_DEVID) /* use hardware */
 #endif
     {
+    #if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+        CRYS_ECDSA_SignUserContext_t sigCtxTemp;
+        word32 raw_sig_size = *outlen;
+        word32 msgLenInBytes = inlen;
+        CRYS_ECPKI_HASH_OpMode_t hash_mode;
+    #endif
         word32 keysize = (word32)key->dp->size;
+        word32 orderBits = wc_ecc_get_curve_order_bit_count(key->dp);
 
         /* Check args */
-        if (keysize > ECC_MAX_CRYPTO_HW_SIZE || inlen != keysize ||
-                                                *outlen < keysize*2) {
+        if (keysize > ECC_MAX_CRYPTO_HW_SIZE || *outlen < keysize*2) {
             return ECC_BAD_ARG_E;
         }
 
     #if defined(WOLFSSL_ATECC508A)
+        key->slot = atmel_ecc_alloc(ATMEL_SLOT_DEVICE);
+        if (key->slot == ATECC_INVALID_SLOT) {
+            return ECC_BAD_ARG_E;
+        }
+
         /* Sign: Result is 32-bytes of R then 32-bytes of S */
-        err = atcatls_sign(key->slot, in, out);
-        if (err != ATCA_SUCCESS) {
-           return BAD_COND_E;
+        err = atmel_ecc_sign(key->slot, in, out);
+        if (err != 0) {
+           return err;
         }
     #elif defined(PLUTON_CRYPTO_ECC)
         {
+            /* if the input is larger than curve order, we must truncate */
+            if ((inlen * WOLFSSL_BIT_SIZE) > orderBits) {
+               inlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE;
+            }
+
             /* perform ECC sign */
             word32 raw_sig_size = *outlen;
             err = Crypto_EccSign(in, inlen, out, &raw_sig_size);
@@ -4168,6 +4603,34 @@
                return BAD_COND_E;
             }
         }
+    #elif defined(WOLFSSL_CRYPTOCELL)
+
+        hash_mode = cc310_hashModeECC(msgLenInBytes);
+        if (hash_mode == CRYS_ECPKI_HASH_OpModeLast) {
+            hash_mode = cc310_hashModeECC(keysize);
+            hash_mode = CRYS_ECPKI_HASH_SHA256_mode;
+        }
+
+        /* truncate if hash is longer than key size */
+        if (msgLenInBytes > keysize) {
+            msgLenInBytes = keysize;
+        }
+
+        /* create signature from an input buffer using a private key*/
+        err = CRYS_ECDSA_Sign(&wc_rndState,
+                               wc_rndGenVectFunc,
+                               &sigCtxTemp,
+                               &key->ctx.privKey,
+                               hash_mode,
+                               (byte*)in,
+                               msgLenInBytes,
+                               out,
+                               &raw_sig_size);
+
+        if (err != SA_SILIB_RET_OK){
+            WOLFSSL_MSG("CRYS_ECDSA_Sign failed");
+            return err;
+        }
     #endif
 
         /* Load R and S */
@@ -4194,58 +4657,26 @@
 
     return err;
 }
-#endif /* WOLFSSL_ATECC508A || PLUTON_CRYPTO_ECC */
-
-/**
- Sign a message digest
- in        The message digest to sign
- inlen     The length of the digest
- out       [out] The destination for the signature
- outlen    [in/out] The max size and resulting size of the signature
- key       A private ECC key
- return    MP_OKAY if successful
- */
-int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
-                     WC_RNG* rng, ecc_key* key)
+#endif /* WOLFSSL_ATECC508A || PLUTON_CRYPTO_ECC || WOLFSSL_CRYPTOCELL */
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+static int wc_ecc_sign_hash_async(const byte* in, word32 inlen, byte* out,
+    word32 *outlen, WC_RNG* rng, ecc_key* key)
 {
     int err;
     mp_int *r = NULL, *s = NULL;
-#if !defined(WOLFSSL_ASYNC_CRYPT) && !defined(WOLFSSL_SMALL_STACK)
-    mp_int r_lcl, s_lcl;
-#endif
 
     if (in == NULL || out == NULL || outlen == NULL || key == NULL ||
                                                                 rng == NULL) {
         return ECC_BAD_ARG_E;
     }
 
-#ifdef WOLF_CRYPTO_DEV
-    if (key->devId != INVALID_DEVID) {
-        err = wc_CryptoDev_EccSign(in, inlen, out, outlen, rng, key);
-        if (err != NOT_COMPILED_IN)
-            return err;
-    }
-#endif
-
-#ifdef WOLFSSL_ASYNC_CRYPT
     err = wc_ecc_alloc_async(key);
-    if (err != 0)
+    if (err != 0) {
         return err;
+    }
     r = key->r;
     s = key->s;
-#elif !defined(WOLFSSL_SMALL_STACK)
-    r = &r_lcl;
-    s = &s_lcl;
-#else
-    r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
-    if (r == NULL)
-        return MEMORY_E;
-    s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
-    if (s == NULL) {
-        XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
-        return MEMORY_E;
-    }
-#endif
 
     switch(key->state) {
         case ECC_STATE_NONE:
@@ -4256,12 +4687,7 @@
                 break;
             }
 
-        /* hardware crypto */
-        #if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC)
-            err = wc_ecc_sign_hash_hw(in, inlen, r, s, out, outlen, rng, key);
-        #else
             err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
-        #endif
             if (err < 0) {
                 break;
             }
@@ -4271,7 +4697,6 @@
         case ECC_STATE_SIGN_ENCODE:
             key->state = ECC_STATE_SIGN_ENCODE;
 
-        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
             if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
                 #ifdef HAVE_CAVIUM_V
                     /* Nitrox requires r and s in sep buffer, so split it */
@@ -4283,7 +4708,6 @@
                     wc_bigint_to_mp(&s->raw, s);
                 #endif
             }
-        #endif /* WOLFSSL_ASYNC_CRYPT */
 
             /* encoded with DSA header */
             err = StoreECC_DSA_Sig(out, outlen, r, s);
@@ -4291,10 +4715,6 @@
             /* done with R/S */
             mp_clear(r);
             mp_clear(s);
-        #if !defined(WOLFSSL_ASYNC_CRYPT) && defined(WOLFSSL_SMALL_STACK)
-            XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
-            XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
-        #endif
             break;
 
         default:
@@ -4309,16 +4729,113 @@
     }
 
     /* cleanup */
-#ifdef WOLFSSL_ASYNC_CRYPT
     wc_ecc_free_async(key);
-#endif
     key->state = ECC_STATE_NONE;
 
     return err;
 }
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
+
+/**
+ Sign a message digest
+ in        The message digest to sign
+ inlen     The length of the digest
+ out       [out] The destination for the signature
+ outlen    [in/out] The max size and resulting size of the signature
+ key       A private ECC key
+ return    MP_OKAY if successful
+ */
+WOLFSSL_ABI
+int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
+                     WC_RNG* rng, ecc_key* key)
+{
+    int err;
+#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(WC_ASYNC_ENABLE_ECC)
+#ifdef WOLFSSL_SMALL_STACK
+    mp_int *r = NULL, *s = NULL;
+#else
+    mp_int r[1], s[1];
+#endif
+#endif
+
+    if (in == NULL || out == NULL || outlen == NULL || key == NULL ||
+                                                                rng == NULL) {
+        return ECC_BAD_ARG_E;
+    }
+
+#ifdef WOLF_CRYPTO_CB
+    if (key->devId != INVALID_DEVID) {
+        err = wc_CryptoCb_EccSign(in, inlen, out, outlen, rng, key);
+        if (err != CRYPTOCB_UNAVAILABLE)
+            return err;
+        /* fall-through when unavailable */
+    }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    /* handle async cases */
+    err = wc_ecc_sign_hash_async(in, inlen, out, outlen, rng, key);
+#else
+
+#ifdef WOLFSSL_SMALL_STACK
+    r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+    if (r == NULL)
+        return MEMORY_E;
+    s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+    if (s == NULL) {
+        XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+        return MEMORY_E;
+    }
+#endif
+    XMEMSET(r, 0, sizeof(mp_int));
+    XMEMSET(s, 0, sizeof(mp_int));
+
+    if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+        XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+    #endif
+        return err;
+    }
+
+/* hardware crypto */
+#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || defined(WOLFSSL_CRYPTOCELL)
+    err = wc_ecc_sign_hash_hw(in, inlen, r, s, out, outlen, rng, key);
+#else
+    err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
+#endif
+    if (err < 0) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+        XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+    #endif
+        return err;
+    }
+
+    /* encoded with DSA header */
+    err = StoreECC_DSA_Sig(out, outlen, r, s);
+
+    /* cleanup */
+    mp_clear(r);
+    mp_clear(s);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+    XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return err;
+}
 #endif /* !NO_ASN */
 
-#ifndef WOLFSSL_ATECC508A
+#if defined(WOLFSSL_STM32_PKA)
+int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
+                     ecc_key* key, mp_int *r, mp_int *s)
+{
+    return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
+}
+#elif !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 /**
   Sign a message digest
   in        The message digest to sign
@@ -4331,18 +4848,26 @@
 int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
                      ecc_key* key, mp_int *r, mp_int *s)
 {
-   int    err;
+   int    err = 0;
 #ifndef WOLFSSL_SP_MATH
    mp_int* e;
 #if (!defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)) && \
                                                    !defined(WOLFSSL_SMALL_STACK)
    mp_int  e_lcl;
 #endif
+
+#if defined(WOLFSSL_ECDSA_SET_K) || \
+    (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+    (defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA)))
+   DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
+#else
    DECLARE_CURVE_SPECS(curve, 1);
+#endif
 #endif /* !WOLFSSL_SP_MATH */
 
-   if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL)
+   if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL) {
        return ECC_BAD_ARG_E;
+   }
 
    /* is this a private key? */
    if (key->type != ECC_PRIVATEKEY && key->type != ECC_PRIVATEKEY_ONLY) {
@@ -4355,20 +4880,56 @@
    }
 
 #ifdef WOLFSSL_SP_MATH
-    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1)
-        return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->heap);
-    else
-        return WC_KEY_SIZE_E;
+#ifndef WOLFSSL_SP_NO_256
+    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+    #ifndef WOLFSSL_ECDSA_SET_K
+        return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, NULL, key->heap);
+    #else
+        return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->sign_k,
+                                                                     key->heap);
+    #endif
+    }
+#endif
+#ifdef WOLFSSL_SP_384
+    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+    #ifndef WOLFSSL_ECDSA_SET_K
+        return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, NULL, key->heap);
+    #else
+        return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, key->sign_k,
+                                                                     key->heap);
+    #endif
+    }
+#endif
+    return WC_KEY_SIZE_E;
 #else
 #ifdef WOLFSSL_HAVE_SP_ECC
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
-           defined(WOLFSSL_ASYNC_CRYPT_TEST)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC)
     #endif
     {
 #ifndef WOLFSSL_SP_NO_256
-        if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1)
-            return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->heap);
+        if (key->idx != ECC_CUSTOM_IDX &&
+                                       ecc_sets[key->idx].id == ECC_SECP256R1) {
+        #ifndef WOLFSSL_ECDSA_SET_K
+            return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, NULL,
+                                                                     key->heap);
+        #else
+            return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->sign_k,
+                                                                     key->heap);
+        #endif
+        }
+#endif
+#ifdef WOLFSSL_SP_384
+        if (key->idx != ECC_CUSTOM_IDX &&
+                                       ecc_sets[key->idx].id == ECC_SECP384R1) {
+        #ifndef WOLFSSL_ECDSA_SET_K
+            return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, NULL,
+                                                                     key->heap);
+        #else
+            return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, key->sign_k,
+                                                                     key->heap);
+        #endif
+        }
 #endif
     }
 #endif /* WOLFSSL_HAVE_SP_ECC */
@@ -4390,12 +4951,9 @@
     }
 #endif
 
-   ALLOC_CURVE_SPECS(1);
-
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM_V)
    err = wc_ecc_alloc_mpint(key, &key->e);
    if (err != 0) {
-      FREE_CURVE_SPECS();
       return err;
    }
    e = key->e;
@@ -4404,7 +4962,6 @@
 #else
    e = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
    if (e == NULL) {
-      FREE_CURVE_SPECS();
       return MEMORY_E;
    }
 #endif
@@ -4415,12 +4972,27 @@
    #ifdef WOLFSSL_SMALL_STACK
       XFREE(e, key->heap, DYNAMIC_TYPE_ECC);
    #endif
-      FREE_CURVE_SPECS();
       return err;
    }
 
    /* load curve info */
-   err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ORDER);
+#if defined(WOLFSSL_ECDSA_SET_K)
+   ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+   err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+#else
+   #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+      (defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA))
+   if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+      ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+      err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+   }
+   else
+   #endif
+   {
+      ALLOC_CURVE_SPECS(1);
+      err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ORDER);
+   }
+#endif
 
    /* load digest into e */
    if (err == MP_OKAY) {
@@ -4441,7 +5013,7 @@
    if (err == MP_OKAY) {
        int      loop_check = 0;
    #ifdef WOLFSSL_SMALL_STACK
-       ecc_key* pubkey = NULL;
+       ecc_key* pubkey;
    #else
        ecc_key  pubkey[1];
    #endif
@@ -4518,7 +5090,7 @@
            }
        #endif /* HAVE_CAVIUM_V || HAVE_INTEL_QA */
        }
-   #endif /* WOLFSSL_ASYNC_CRYPT */
+   #endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
 
    #ifdef WOLFSSL_SMALL_STACK
        pubkey = (ecc_key*)XMALLOC(sizeof(ecc_key), key->heap, DYNAMIC_TYPE_ECC);
@@ -4574,8 +5146,28 @@
                     err = RNG_FAILURE_E;
                     break;
                }
-               err = wc_ecc_make_key_ex(rng, key->dp->size, pubkey,
+       #ifdef WOLFSSL_ECDSA_SET_K
+               if (key->sign_k != NULL) {
+                   if (loop_check > 1) {
+                      err = RNG_FAILURE_E;
+                      break;
+                   }
+
+                   err = mp_copy(key->sign_k, &pubkey->k);
+                   if (err != MP_OKAY) break;
+
+                   mp_forcezero(key->sign_k);
+                   mp_free(key->sign_k);
+                   XFREE(key->sign_k, key->heap, DYNAMIC_TYPE_ECC);
+                   key->sign_k = NULL;
+                   err = wc_ecc_make_pub_ex(pubkey, curve, NULL);
+               }
+               else
+       #endif
+               {
+                   err = wc_ecc_make_key_ex(rng, key->dp->size, pubkey,
                                                                    key->dp->id);
+               }
                if (err != MP_OKAY) break;
 
                /* find r = x1 mod n */
@@ -4654,12 +5246,44 @@
 
    return err;
 }
-#endif /* WOLFSSL_ATECC508A */
+
+#ifdef WOLFSSL_ECDSA_SET_K
+int wc_ecc_sign_set_k(const byte* k, word32 klen, ecc_key* key)
+{
+    int ret = 0;
+
+    if (k == NULL || klen == 0 || key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        if (key->sign_k == NULL) {
+            key->sign_k = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
+                                                              DYNAMIC_TYPE_ECC);
+            if (key->sign_k == NULL) {
+                ret = MEMORY_E;
+            }
+        }
+    }
+
+    if (ret == 0) {
+        ret = mp_init(key->sign_k);
+    }
+    if (ret == 0) {
+        ret = mp_read_unsigned_bin(key->sign_k, k, klen);
+    }
+
+    return ret;
+}
+#endif /* WOLFSSL_ECDSA_SET_K */
+#endif /* WOLFSSL_ATECC508A && WOLFSSL_CRYPTOCELL*/
+
 #endif /* HAVE_ECC_SIGN */
 
 #ifdef WOLFSSL_CUSTOM_CURVES
 void wc_ecc_free_curve(const ecc_set_type* curve, void* heap)
 {
+#ifndef WOLFSSL_ECC_CURVE_STATIC
     if (curve->prime != NULL)
         XFREE((void*)curve->prime, heap, DYNAMIC_TYPE_ECC_BUFFER);
     if (curve->Af != NULL)
@@ -4672,6 +5296,7 @@
         XFREE((void*)curve->Gx, heap, DYNAMIC_TYPE_ECC_BUFFER);
     if (curve->Gy != NULL)
         XFREE((void*)curve->Gy, heap, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
 
     XFREE((void*)curve, heap, DYNAMIC_TYPE_ECC_BUFFER);
 
@@ -4683,13 +5308,22 @@
   Free an ECC key from memory
   key   The key you wish to free
 */
+WOLFSSL_ABI
 int wc_ecc_free(ecc_key* key)
 {
     if (key == NULL) {
         return 0;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
+#ifdef WOLFSSL_ECDSA_SET_K
+    if (key->sign_k != NULL) {
+        mp_forcezero(key->sign_k);
+        mp_free(key->sign_k);
+        XFREE(key->sign_k, key->heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     #ifdef WC_ASYNC_ENABLE_ECC
     wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC);
     #endif
@@ -4698,15 +5332,14 @@
 
 #ifdef WOLFSSL_ATECC508A
     atmel_ecc_free(key->slot);
-    key->slot = -1;
-#else
+    key->slot = ATECC_INVALID_SLOT;
+#endif /* WOLFSSL_ATECC508A */
 
     mp_clear(key->pubkey.x);
     mp_clear(key->pubkey.y);
     mp_clear(key->pubkey.z);
 
     mp_forcezero(&key->k);
-#endif /* WOLFSSL_ATECC508A */
 
 #ifdef WOLFSSL_CUSTOM_CURVES
     if (key->deallocSet && key->dp != NULL)
@@ -4716,7 +5349,7 @@
     return 0;
 }
 
-#if !defined(WOLFSSL_SP_MATH) && !defined(WOLFSSL_ATECC508A)
+#if !defined(WOLFSSL_SP_MATH) && !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 #ifdef ECC_SHAMIR
 
 /** Computes kA*A + kB*B = C using Shamir's Trick
@@ -4741,11 +5374,11 @@
                     void* heap)
 #endif
 {
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+  ecc_key        key;
+#endif
 #ifdef WOLFSSL_SMALL_STACK
   ecc_point**    precomp = NULL;
-#ifdef WOLFSSL_SMALL_STACK_CACHE
-  ecc_key        key;
-#endif
 #else
   ecc_point*     precomp[SHAMIR_PRECOMP_SZ];
 #endif
@@ -4856,7 +5489,7 @@
 
   if (err == MP_OKAY) {
   #ifdef WOLFSSL_SMALL_STACK
-    mp_int* mu = NULL;
+    mp_int* mu;
   #else
     mp_int  mu[1];
   #endif
@@ -4889,10 +5522,12 @@
 
       /* done with mu */
       mp_clear(mu);
+    }
   #ifdef WOLFSSL_SMALL_STACK
+    if (mu != NULL) {
       XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+    }
   #endif
-    }
   }
 
   if (err == MP_OKAY)
@@ -4981,8 +5616,30 @@
                 if (err == MP_OKAY)
                     err = ecc_projective_add_point(C, precomp[nA + (nB<<2)], C,
                                                    a, modulus, mp);
-                else
+                if (err != MP_OKAY)
                     break;
+                if (mp_iszero(C->z)) {
+                    /* When all zero then should have done an add */
+                    if (mp_iszero(C->x) && mp_iszero(C->y)) {
+                        err = ecc_projective_dbl_point(precomp[nA + (nB<<2)], C,
+                                                       a, modulus, mp);
+                        if (err != MP_OKAY)
+                            break;
+                    }
+                    /* When only Z zero then result is infinity */
+                    else {
+                        err = mp_set(C->x, 0);
+                        if (err != MP_OKAY)
+                            break;
+                        err = mp_set(C->y, 0);
+                        if (err != MP_OKAY)
+                            break;
+                        err = mp_set(C->z, 1);
+                        if (err != MP_OKAY)
+                            break;
+                        first = 1;
+                    }
+                }
             }
         }
     }
@@ -5019,7 +5676,7 @@
 }
 
 #endif /* ECC_SHAMIR */
-#endif /* !WOLFSSL_SP_MATH && !WOLFSSL_ATECC508A */
+#endif /* !WOLFSSL_SP_MATH && !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCEL*/
 
 
 #ifdef HAVE_ECC_VERIFY
@@ -5049,35 +5706,35 @@
 {
     int err;
     mp_int *r = NULL, *s = NULL;
-#ifndef WOLFSSL_ASYNC_CRYPT
-#ifndef WOLFSSL_SMALL_STACK
-    mp_int r_lcl[1], s_lcl[1];
-#endif
+#if (!defined(WOLFSSL_ASYNC_CRYPT) || !defined(WC_ASYNC_ENABLE_ECC)) && \
+    !defined(WOLFSSL_SMALL_STACK)
+    mp_int r_lcl, s_lcl;
 #endif
 
     if (sig == NULL || hash == NULL || res == NULL || key == NULL) {
         return ECC_BAD_ARG_E;
     }
 
-#ifdef WOLF_CRYPTO_DEV
+#ifdef WOLF_CRYPTO_CB
     if (key->devId != INVALID_DEVID) {
-        err = wc_CryptoDev_EccVerify(sig, siglen, hash, hashlen, res, key);
-        if (err != NOT_COMPILED_IN)
+        err = wc_CryptoCb_EccVerify(sig, siglen, hash, hashlen, res, key);
+        if (err != CRYPTOCB_UNAVAILABLE)
             return err;
-    }
-#endif
-
-#ifdef WOLFSSL_ASYNC_CRYPT
+        /* fall-through when unavailable */
+    }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     err = wc_ecc_alloc_async(key);
     if (err != 0)
         return err;
     r = key->r;
     s = key->s;
 #else
-#ifndef WOLFSSL_SMALL_STACK
-    r = r_lcl;
-    s = s_lcl;
-#else
+    #ifndef WOLFSSL_SMALL_STACK
+    r = &r_lcl;
+    s = &s_lcl;
+    #else
     r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
     if (r == NULL)
         return MEMORY_E;
@@ -5086,10 +5743,12 @@
         XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
         return MEMORY_E;
     }
-#endif
-#endif
-
-    switch(key->state) {
+    #endif
+    XMEMSET(r, 0, sizeof(mp_int));
+    XMEMSET(s, 0, sizeof(mp_int));
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    switch (key->state) {
         case ECC_STATE_NONE:
         case ECC_STATE_VERIFY_DECODE:
             key->state = ECC_STATE_VERIFY_DECODE;
@@ -5120,6 +5779,8 @@
         #ifdef WOLFSSL_SMALL_STACK
             XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
             XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+            r = NULL;
+            s = NULL;
         #endif
         #endif
 
@@ -5144,9 +5805,15 @@
     }
 
     /* cleanup */
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     wc_ecc_free_async(key);
-#endif
+#elif defined(WOLFSSL_SMALL_STACK)
+    XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+    XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+    r = NULL;
+    s = NULL;
+#endif
+
     key->state = ECC_STATE_NONE;
 
     return err;
@@ -5164,32 +5831,44 @@
    key         The corresponding public ECC key
    return      MP_OKAY if successful (even if the signature is not valid)
 */
+
 int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
                     word32 hashlen, int* res, ecc_key* key)
+#if defined(WOLFSSL_STM32_PKA)
+{
+    return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key);
+}
+#else
 {
    int           err;
+   word32        keySz;
 #ifdef WOLFSSL_ATECC508A
    byte sigRS[ATECC_KEY_SIZE*2];
-#elif !defined(WOLFSSL_SP_MATH)
+#elif defined(WOLFSSL_CRYPTOCELL)
+   byte sigRS[ECC_MAX_CRYPTO_HW_SIZE*2];
+   CRYS_ECDSA_VerifyUserContext_t sigCtxTemp;
+   word32 msgLenInBytes = hashlen;
+   CRYS_ECPKI_HASH_OpMode_t hash_mode;
+#elif !defined(WOLFSSL_SP_MATH) || defined(FREESCALE_LTC_ECC)
    int          did_init = 0;
    ecc_point    *mG = NULL, *mQ = NULL;
-#ifdef WOLFSSL_SMALL_STACK
+   #ifdef WOLFSSL_SMALL_STACK
    mp_int*       v = NULL;
    mp_int*       w = NULL;
    mp_int*       u1 = NULL;
    mp_int*       u2 = NULL;
-#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
+      #if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
    mp_int*       e_lcl = NULL;
-#endif
-#else /* WOLFSSL_SMALL_STACK */
+      #endif
+   #else /* WOLFSSL_SMALL_STACK */
    mp_int        v[1];
    mp_int        w[1];
    mp_int        u1[1];
    mp_int        u2[1];
-#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
+      #if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
    mp_int        e_lcl[1];
-#endif
-#endif /* WOLFSSL_SMALL_STACK */
+      #endif
+   #endif /* WOLFSSL_SMALL_STACK */
    mp_int*       e;
    DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
 #endif
@@ -5205,6 +5884,8 @@
       return ECC_BAD_ARG_E;
    }
 
+   keySz = key->dp->size;
+
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
        defined(WOLFSSL_ASYNC_CRYPT_TEST)
     if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
@@ -5227,17 +5908,54 @@
     if (err != MP_OKAY) {
         return err;
     }
-    err = mp_to_unsigned_bin(s, &sigRS[ATECC_KEY_SIZE]);
+    err = mp_to_unsigned_bin(s, &sigRS[keySz]);
     if (err != MP_OKAY) {
         return err;
     }
 
-    err = atcatls_verify(hash, sigRS, key->pubkey_raw, (bool*)res);
-    if (err != ATCA_SUCCESS) {
-       return BAD_COND_E;
+    err = atmel_ecc_verify(hash, sigRS, key->pubkey_raw, res);
+    if (err != 0) {
+       return err;
     }
     (void)hashlen;
-
+#elif defined(WOLFSSL_CRYPTOCELL)
+
+   /* Extract R and S */
+
+   err = mp_to_unsigned_bin(r, &sigRS[0]);
+   if (err != MP_OKAY) {
+       return err;
+   }
+   err = mp_to_unsigned_bin(s, &sigRS[keySz]);
+   if (err != MP_OKAY) {
+       return err;
+   }
+
+   hash_mode = cc310_hashModeECC(msgLenInBytes);
+   if (hash_mode == CRYS_ECPKI_HASH_OpModeLast) {
+       /* hash_mode = */ cc310_hashModeECC(keySz);
+       hash_mode = CRYS_ECPKI_HASH_SHA256_mode;
+   }
+   /* truncate if hash is longer than key size */
+   if (msgLenInBytes > keySz) {
+       msgLenInBytes = keySz;
+   }
+
+   /* verify the signature using the public key */
+   err = CRYS_ECDSA_Verify(&sigCtxTemp,
+                           &key->ctx.pubKey,
+                           hash_mode,
+                           &sigRS[0],
+                           keySz*2,
+                           (byte*)hash,
+                           msgLenInBytes);
+
+   if (err != SA_SILIB_RET_OK) {
+       WOLFSSL_MSG("CRYS_ECDSA_Verify failed");
+       return err;
+   }
+   /* valid signature if we get to this point */
+   *res = 1;
 #else
   /* checking if private key with no public part */
   if (key->type == ECC_PRIVATEKEY_ONLY) {
@@ -5249,26 +5967,53 @@
       }
   }
 
-#ifdef WOLFSSL_SP_MATH
+#if defined(WOLFSSL_DSP) && !defined(FREESCALE_LTC_ECC)
+  if (key->handle != -1) {
+      return sp_dsp_ecc_verify_256(key->handle, hash, hashlen, key->pubkey.x, key->pubkey.y,
+                                           key->pubkey.z, r, s, res, key->heap);
+  }
+  if (wolfSSL_GetHandleCbSet() == 1) {
+      return sp_dsp_ecc_verify_256(0, hash, hashlen, key->pubkey.x, key->pubkey.y,
+                                           key->pubkey.z, r, s, res, key->heap);
+  }
+#endif
+#if defined(WOLFSSL_SP_MATH) && !defined(FREESCALE_LTC_ECC)
+#ifndef WOLFSSL_SP_NO_256
   if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
       return sp_ecc_verify_256(hash, hashlen, key->pubkey.x, key->pubkey.y,
                                            key->pubkey.z, r, s, res, key->heap);
   }
-  else
-      return WC_KEY_SIZE_E;
+#endif
+#ifdef WOLFSSL_SP_384
+  if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+      return sp_ecc_verify_384(hash, hashlen, key->pubkey.x, key->pubkey.y,
+                                           key->pubkey.z, r, s, res, key->heap);
+  }
+#endif
+  return WC_KEY_SIZE_E;
 #else
-#ifdef WOLFSSL_HAVE_SP_ECC
-#ifndef WOLFSSL_SP_NO_256
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
-           defined(WOLFSSL_ASYNC_CRYPT_TEST)
+#if defined WOLFSSL_HAVE_SP_ECC && !defined(FREESCALE_LTC_ECC)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC)
     #endif
     {
-        if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1)
-            return sp_ecc_verify_256(hash, hashlen, key->pubkey.x, key->pubkey.y,
-                                     key->pubkey.z,r, s, res, key->heap);
-    }
+#ifndef WOLFSSL_SP_NO_256
+        if (key->idx != ECC_CUSTOM_IDX &&
+                                       ecc_sets[key->idx].id == ECC_SECP256R1) {
+            return sp_ecc_verify_256(hash, hashlen, key->pubkey.x,
+                                         key->pubkey.y, key->pubkey.z,r, s, res,
+                                         key->heap);
+        }
 #endif /* WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+        if (key->idx != ECC_CUSTOM_IDX &&
+                                       ecc_sets[key->idx].id == ECC_SECP384R1) {
+            return sp_ecc_verify_384(hash, hashlen, key->pubkey.x,
+                                         key->pubkey.y, key->pubkey.z,r, s, res,
+                                         key->heap);
+        }
+#endif /* WOLFSSL_SP_384 */
+    }
 #endif /* WOLFSSL_HAVE_SP_ECC */
 
    ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
@@ -5289,7 +6034,7 @@
    }
 #endif
    e = e_lcl;
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT && HAVE_CAVIUM_V */
 
    err = mp_init(e);
    if (err != MP_OKAY)
@@ -5330,8 +6075,6 @@
       if (NitroxEccIsCurveSupported(key))
    #endif
       {
-          word32 keySz = key->dp->size;
-
           err = wc_mp_to_bigint_sz(e, &e->raw, keySz);
           if (err == MP_OKAY)
               err = wc_mp_to_bigint_sz(key->pubkey.x, &key->pubkey.x->raw, keySz);
@@ -5359,7 +6102,7 @@
       }
    #endif /* HAVE_CAVIUM_V || HAVE_INTEL_QA */
    }
-#endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
 
 #ifdef WOLFSSL_SMALL_STACK
    if (err == MP_OKAY) {
@@ -5427,7 +6170,7 @@
    if (err == MP_OKAY)
        err = mp_copy(key->pubkey.z, mQ->z);
 
-#ifdef FREESCALE_LTC_ECC
+#if defined(FREESCALE_LTC_ECC)
    /* use PKHA to compute u1*mG + u2*mQ */
    if (err == MP_OKAY)
        err = wc_ecc_mulmod_ex(u1, mG, mG, curve->Af, curve->prime, 0, key->heap);
@@ -5437,38 +6180,62 @@
        err = wc_ecc_point_add(mG, mQ, mG, curve->prime);
 #else
 #ifndef ECC_SHAMIR
+    if (err == MP_OKAY)
     {
-        mp_digit      mp = 0;
-
-        /* compute u1*mG + u2*mQ = mG */
-        if (err == MP_OKAY) {
+        mp_digit mp = 0;
+
+        if (!mp_iszero(u1)) {
+            /* compute u1*mG + u2*mQ = mG */
             err = wc_ecc_mulmod_ex(u1, mG, mG, curve->Af, curve->prime, 0,
                                                                      key->heap);
-        }
-        if (err == MP_OKAY) {
-            err = wc_ecc_mulmod_ex(u2, mQ, mQ, curve->Af, curve->prime, 0,
+            if (err == MP_OKAY) {
+                err = wc_ecc_mulmod_ex(u2, mQ, mQ, curve->Af, curve->prime, 0,
                                                                      key->heap);
-        }
-
-        /* find the montgomery mp */
-        if (err == MP_OKAY)
-            err = mp_montgomery_setup(curve->prime, &mp);
-
-        /* add them */
-        if (err == MP_OKAY)
-            err = ecc_projective_add_point(mQ, mG, mG, curve->Af,
-                                                             curve->prime, mp);
+            }
+
+            /* find the montgomery mp */
+            if (err == MP_OKAY)
+                err = mp_montgomery_setup(curve->prime, &mp);
+
+            /* add them */
+            if (err == MP_OKAY)
+                err = ecc_projective_add_point(mQ, mG, mG, curve->Af,
+                                                              curve->prime, mp);
+            if (err == MP_OKAY && mp_iszero(mG->z)) {
+                /* When all zero then should have done an add */
+                if (mp_iszero(mG->x) && mp_iszero(mG->y)) {
+                    err = ecc_projective_dbl_point(mQ, mG, curve->Af,
+                                                              curve->prime, mp);
+                }
+                /* When only Z zero then result is infinity */
+                else {
+                    err = mp_set(mG->x, 0);
+                    if (err == MP_OKAY)
+                        err = mp_set(mG->y, 0);
+                    if (err == MP_OKAY)
+                        err = mp_set(mG->z, 1);
+                }
+            }
+        }
+        else {
+            /* compute 0*mG + u2*mQ = mG */
+            err = wc_ecc_mulmod_ex(u2, mQ, mG, curve->Af, curve->prime, 0,
+                                                                     key->heap);
+            /* find the montgomery mp */
+            if (err == MP_OKAY)
+                err = mp_montgomery_setup(curve->prime, &mp);
+        }
 
         /* reduce */
         if (err == MP_OKAY)
             err = ecc_map(mG, curve->prime, mp);
     }
 #else
-       /* use Shamir's trick to compute u1*mG + u2*mQ using half the doubles */
-        if (err == MP_OKAY) {
-            err = ecc_mul2add(mG, u1, mQ, u2, mG, curve->Af, curve->prime,
-                                                                    key->heap);
-        }
+    /* use Shamir's trick to compute u1*mG + u2*mQ using half the doubles */
+    if (err == MP_OKAY) {
+        err = ecc_mul2add(mG, u1, mQ, u2, mG, curve->Af, curve->prime,
+                                                                     key->heap);
+    }
 #endif /* ECC_SHAMIR */
 #endif /* FREESCALE_LTC_ECC */
    /* v = X_x1 mod n */
@@ -5508,21 +6275,31 @@
 #endif /* WOLFSSL_SP_MATH */
 #endif /* WOLFSSL_ATECC508A */
 
+   (void)keySz;
+   (void)hashlen;
+
    return err;
 }
+#endif /* WOLFSSL_STM32_PKA */
 #endif /* HAVE_ECC_VERIFY */
 
 #ifdef HAVE_ECC_KEY_IMPORT
-/* import point from der */
-int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
-                            ecc_point* point)
+/* import point from der
+ * if shortKeySize != 0 then keysize is always (inLen-1)>>1 */
+int wc_ecc_import_point_der_ex(byte* in, word32 inLen, const int curve_idx,
+                               ecc_point* point, int shortKeySize)
 {
     int err = 0;
-#ifndef WOLFSSL_ATECC508A
+#ifdef HAVE_COMP_KEY
     int compressed = 0;
+#endif
     int keysize;
     byte pointType;
 
+#ifndef HAVE_COMP_KEY
+    (void)shortKeySize;
+#endif
+
     if (in == NULL || point == NULL || (curve_idx < 0) ||
         (wc_ecc_is_valid_idx(curve_idx) == 0))
         return ECC_BAD_ARG_E;
@@ -5565,12 +6342,12 @@
     inLen -= 1;
     in += 1;
 
-    /* calculate key size based on inLen / 2 */
+    /* calculate key size based on inLen / 2 if uncompressed or shortKeySize
+     * is true */
+#ifdef HAVE_COMP_KEY
+    keysize = compressed && !shortKeySize ? inLen : inLen>>1;
+#else
     keysize = inLen>>1;
-
-#ifdef WOLFSSL_ATECC508A
-    /* populate key->pubkey_raw */
-    XMEMCPY(key->pubkey_raw, (byte*)in, sizeof(key->pubkey_raw));
 #endif
 
     /* read data */
@@ -5636,13 +6413,33 @@
         wc_ecc_curve_free(curve);
         FREE_CURVE_SPECS();
 #else
-        sp_ecc_uncompress_256(point->x, pointType, point->y);
-#endif
-    }
-#endif
-
-    if (err == MP_OKAY && compressed == 0)
-        err = mp_read_unsigned_bin(point->y, (byte*)in + keysize, keysize);
+    #ifndef WOLFSSL_SP_NO_256
+        if (curve_idx != ECC_CUSTOM_IDX &&
+                                      ecc_sets[curve_idx].id == ECC_SECP256R1) {
+            sp_ecc_uncompress_256(point->x, pointType, point->y);
+        }
+        else
+    #endif
+    #ifdef WOLFSSL_SP_384
+        if (curve_idx != ECC_CUSTOM_IDX &&
+                                      ecc_sets[curve_idx].id == ECC_SECP384R1) {
+            sp_ecc_uncompress_384(point->x, pointType, point->y);
+        }
+        else
+    #endif
+        {
+            err = WC_KEY_SIZE_E;
+        }
+#endif
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef HAVE_COMP_KEY
+        if (compressed == 0)
+#endif
+            err = mp_read_unsigned_bin(point->y, (byte*)in + keysize, keysize);
+     }
     if (err == MP_OKAY)
         err = mp_set(point->z, 1);
 
@@ -5652,39 +6449,51 @@
         mp_clear(point->z);
     }
 
-#else
-    err = NOT_COMPILED_IN;
-    (void)in;
-    (void)inLen;
-    (void)curve_idx;
-    (void)point;
-#endif /* !WOLFSSL_ATECC508A */
-
     return err;
 }
+
+/* function for backwards compatiblity with previous implementations */
+int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
+                            ecc_point* point)
+{
+    return wc_ecc_import_point_der_ex(in, inLen, curve_idx, point, 1);
+}
 #endif /* HAVE_ECC_KEY_IMPORT */
 
 #ifdef HAVE_ECC_KEY_EXPORT
 /* export point to der */
+
+int wc_ecc_export_point_der_ex(const int curve_idx, ecc_point* point, byte* out,
+                               word32* outLen, int compressed)
+{
+    if (compressed == 0)
+        return wc_ecc_export_point_der(curve_idx, point, out, outLen);
+#ifdef HAVE_COMP_KEY
+    else
+        return wc_ecc_export_point_der_compressed(curve_idx, point, out, outLen);
+#else
+    return NOT_COMPILED_IN;
+#endif
+}
+
 int wc_ecc_export_point_der(const int curve_idx, ecc_point* point, byte* out,
                             word32* outLen)
 {
     int    ret = MP_OKAY;
     word32 numlen;
-#ifndef WOLFSSL_ATECC508A
 #ifdef WOLFSSL_SMALL_STACK
     byte*  buf;
 #else
     byte   buf[ECC_BUFSIZE];
 #endif
-#endif /* !WOLFSSL_ATECC508A */
 
     if ((curve_idx < 0) || (wc_ecc_is_valid_idx(curve_idx) == 0))
         return ECC_BAD_ARG_E;
 
+    numlen = ecc_sets[curve_idx].size;
+
     /* return length needed only */
     if (point != NULL && out == NULL && outLen != NULL) {
-        numlen = ecc_sets[curve_idx].size;
         *outLen = 1 + 2*numlen;
         return LENGTH_ONLY_E;
     }
@@ -5692,19 +6501,11 @@
     if (point == NULL || out == NULL || outLen == NULL)
         return ECC_BAD_ARG_E;
 
-    numlen = ecc_sets[curve_idx].size;
-
     if (*outLen < (1 + 2*numlen)) {
         *outLen = 1 + 2*numlen;
         return BUFFER_E;
     }
 
-#ifdef WOLFSSL_ATECC508A
-   /* TODO: Implement equiv call to ATECC508A */
-   ret = BAD_COND_E;
-
-#else
-
     /* store byte point type */
     out[0] = ECC_POINT_UNCOMP;
 
@@ -5736,12 +6537,75 @@
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER);
 #endif
-#endif /* WOLFSSL_ATECC508A */
 
     return ret;
 }
 
 
+/* export point to der */
+#ifdef HAVE_COMP_KEY
+int wc_ecc_export_point_der_compressed(const int curve_idx, ecc_point* point,
+                                       byte* out, word32* outLen)
+{
+    int    ret = MP_OKAY;
+    word32 numlen;
+    word32 output_len;
+#ifdef WOLFSSL_SMALL_STACK
+    byte*  buf;
+#else
+    byte   buf[ECC_BUFSIZE];
+#endif
+
+    if ((curve_idx < 0) || (wc_ecc_is_valid_idx(curve_idx) == 0))
+        return ECC_BAD_ARG_E;
+
+    numlen = ecc_sets[curve_idx].size;
+    output_len = 1 + numlen; /* y point type + x */
+
+    /* return length needed only */
+    if (point != NULL && out == NULL && outLen != NULL) {
+        *outLen = output_len;
+        return LENGTH_ONLY_E;
+    }
+
+    if (point == NULL || out == NULL || outLen == NULL)
+        return ECC_BAD_ARG_E;
+
+
+    if (*outLen < output_len) {
+        *outLen = output_len;
+        return BUFFER_E;
+    }
+
+    /* store byte point type */
+    out[0] = mp_isodd(point->y) == MP_YES ? ECC_POINT_COMP_ODD :
+                                            ECC_POINT_COMP_EVEN;
+
+#ifdef WOLFSSL_SMALL_STACK
+    buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+    if (buf == NULL)
+        return MEMORY_E;
+#endif
+
+    /* pad and store x */
+    XMEMSET(buf, 0, ECC_BUFSIZE);
+    ret = mp_to_unsigned_bin(point->x, buf +
+                                 (numlen - mp_unsigned_bin_size(point->x)));
+    if (ret != MP_OKAY)
+        goto done;
+    XMEMCPY(out+1, buf, numlen);
+
+    *outLen = output_len;
+
+done:
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+
+    return ret;
+}
+#endif /* HAVE_COMP_KEY */
+
 /* export public ECC key in ANSI X9.63 format */
 int wc_ecc_export_x963(ecc_key* key, byte* out, word32* outLen)
 {
@@ -5756,7 +6620,8 @@
 
    /* return length needed only */
    if (key != NULL && out == NULL && outLen != NULL) {
-      numlen = key->dp->size;
+      /* if key hasn't been setup assume max bytes for size estimation */
+      numlen = key->dp ? key->dp->size : MAX_ECC_BYTES;
       *outLen = 1 + 2*numlen;
       return LENGTH_ONLY_E;
    }
@@ -5767,7 +6632,7 @@
    if (key->type == ECC_PRIVATEKEY_ONLY)
        return ECC_PRIVATEONLY_E;
 
-   if (wc_ecc_is_valid_idx(key->idx) == 0) {
+   if (wc_ecc_is_valid_idx(key->idx) == 0 || key->dp == NULL) {
       return ECC_BAD_ARG_E;
    }
    numlen = key->dp->size;
@@ -5837,7 +6702,7 @@
 #endif /* HAVE_ECC_KEY_EXPORT */
 
 
-#ifndef WOLFSSL_ATECC508A
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 
 /* is ecc point on curve described by dp ? */
 int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime)
@@ -5845,8 +6710,8 @@
 #ifndef WOLFSSL_SP_MATH
    int err;
 #ifdef WOLFSSL_SMALL_STACK
-   mp_int* t1 = NULL;
-   mp_int* t2 = NULL;
+   mp_int* t1;
+   mp_int* t2;
 #else
    mp_int  t1[1], t2[1];
 #endif
@@ -5946,9 +6811,18 @@
 #else
    (void)a;
    (void)b;
-   (void)prime;
-
-   return sp_ecc_is_point_256(ecp->x, ecp->y);
+
+#ifndef WOLFSSL_SP_NO_256
+   if (mp_count_bits(prime) == 256) {
+       return sp_ecc_is_point_256(ecp->x, ecp->y);
+   }
+#endif
+#ifdef WOLFSSL_SP_384
+   if (mp_count_bits(prime) == 384) {
+       return sp_ecc_is_point_384(ecp->x, ecp->y);
+   }
+#endif
+   return WC_KEY_SIZE_E;
 #endif
 }
 
@@ -5973,8 +6847,17 @@
 #ifdef WOLFSSL_HAVE_SP_ECC
 #ifndef WOLFSSL_SP_NO_256
     if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
-        if (err == MP_OKAY)
+        if (err == MP_OKAY) {
             err = sp_ecc_mulmod_base_256(&key->k, res, 1, key->heap);
+        }
+    }
+    else
+#endif
+#ifdef WOLFSSL_SP_384
+    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+        if (err == MP_OKAY) {
+            err = sp_ecc_mulmod_base_384(&key->k, res, 1, key->heap);
+        }
     }
     else
 #endif
@@ -6035,8 +6918,8 @@
         return BAD_FUNC_ARG;
 
 #ifdef WOLFSSL_ATECC508A
-    /* TODO: Implement equiv call to ATECC508A */
-    err = BAD_COND_E;
+    /* Hardware based private key, so this operation is not supported */
+    err = MP_OKAY; /* just report success */
 
 #else
     ALLOC_CURVE_SPECS(2);
@@ -6058,7 +6941,6 @@
 
 #endif /* WOLFSSL_VALIDATE_ECC_IMPORT */
 
-
 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH)
 /* validate order * pubkey = point at infinity, 0 on success */
 static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a,
@@ -6082,6 +6964,13 @@
         }
         else
 #endif
+#ifdef WOLFSSL_SP_384
+        if (key->idx != ECC_CUSTOM_IDX &&
+                                       ecc_sets[key->idx].id == ECC_SECP384R1) {
+            err = sp_ecc_mulmod_384(order, pubkey, inf, 1, key->heap);
+        }
+        else
+#endif
 #endif
 #ifndef WOLFSSL_SP_MATH
             err = wc_ecc_mulmod_ex(order, pubkey, inf, a, prime, 1, key->heap);
@@ -6100,15 +6989,41 @@
     return err;
 }
 #endif
-#endif /* !WOLFSSL_ATECC508A */
-
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL*/
+
+#ifdef OPENSSL_EXTRA
+int wc_ecc_get_generator(ecc_point* ecp, int curve_idx)
+{
+    int err = MP_OKAY;
+    DECLARE_CURVE_SPECS(curve, 2);
+
+    if (!ecp || curve_idx < 0 || curve_idx > (int)(ECC_SET_COUNT-1))
+        return BAD_FUNC_ARG;
+
+    ALLOC_CURVE_SPECS(2);
+
+    err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve,
+                            (ECC_CURVE_FIELD_GX | ECC_CURVE_FIELD_GY));
+    if (err == MP_OKAY)
+        err = mp_copy(curve->Gx, ecp->x);
+    if (err == MP_OKAY)
+        err = mp_copy(curve->Gy, ecp->y);
+    if (err == MP_OKAY)
+        err = mp_set(ecp->z, 1);
+
+    wc_ecc_curve_free(curve);
+    FREE_CURVE_SPECS();
+
+    return err;
+}
+#endif /* OPENSSLALL */
 
 /* perform sanity checks on ecc key validity, 0 on success */
 int wc_ecc_check_key(ecc_key* key)
 {
     int    err;
 #ifndef WOLFSSL_SP_MATH
-#ifndef WOLFSSL_ATECC508A
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
     mp_int* b = NULL;
 #ifdef USE_ECC_B_PARAM
     DECLARE_CURVE_SPECS(curve, 4);
@@ -6123,12 +7038,9 @@
     if (key == NULL)
         return BAD_FUNC_ARG;
 
-#ifdef WOLFSSL_ATECC508A
-
-    if (key->slot == ATECC_INVALID_SLOT)
-        return ECC_BAD_ARG_E;
-
-    err = 0; /* consider key check success on ECC508A */
+#if defined(WOLFSSL_ATECC508A) || defined(WOLFSSL_CRYPTOCELL)
+
+    err = 0; /* consider key check success on ATECC508A */
 
 #else
     #ifdef USE_ECC_B_PARAM
@@ -6172,17 +7084,22 @@
     if (err == MP_OKAY)
         err = mp_read_radix(b, key->dp->Bf, MP_RADIX_HEX);
 #else
-    b = curve->Bf;
+    if (err == MP_OKAY)
+        b = curve->Bf;
 #endif
 
     /* SP 800-56Ar3, section 5.6.2.3.3, process step 2 */
     /* Qx must be in the range [0, p-1] */
-    if (mp_cmp(key->pubkey.x, curve->prime) != MP_LT)
-        err = ECC_OUT_OF_RANGE_E;
+    if (err == MP_OKAY) {
+        if (mp_cmp(key->pubkey.x, curve->prime) != MP_LT)
+            err = ECC_OUT_OF_RANGE_E;
+    }
 
     /* Qy must be in the range [0, p-1] */
-    if (mp_cmp(key->pubkey.y, curve->prime) != MP_LT)
-        err = ECC_OUT_OF_RANGE_E;
+    if (err == MP_OKAY) {
+        if (mp_cmp(key->pubkey.y, curve->prime) != MP_LT)
+            err = ECC_OUT_OF_RANGE_E;
+    }
 
     /* SP 800-56Ar3, section 5.6.2.3.3, process steps 3 */
     /* make sure point is actually on curve */
@@ -6217,12 +7134,23 @@
         return BAD_FUNC_ARG;
 
     /* pubkey point cannot be at infinity */
+#ifndef WOLFSSL_SP_NO_256
     if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
         err = sp_ecc_check_key_256(key->pubkey.x, key->pubkey.y, &key->k,
                                                                      key->heap);
     }
     else
+#endif
+#ifdef WOLFSSL_SP_384
+    if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+        err = sp_ecc_check_key_384(key->pubkey.x, key->pubkey.y, &key->k,
+                                                                     key->heap);
+    }
+    else
+#endif
+    {
         err = WC_KEY_SIZE_E;
+    }
 #endif
 
     return err;
@@ -6234,7 +7162,9 @@
                           int curve_id)
 {
     int err = MP_OKAY;
+#ifdef HAVE_COMP_KEY
     int compressed = 0;
+#endif
     int keysize = 0;
     byte pointType;
 
@@ -6284,6 +7214,16 @@
     inLen -= 1;
     in += 1;
 
+#ifdef WOLFSSL_ATECC508A
+    /* For SECP256R1 only save raw public key for hardware */
+    if (curve_id == ECC_SECP256R1 && inLen <= sizeof(key->pubkey_raw)) {
+    #ifdef HAVE_COMP_KEY
+        if (!compressed)
+    #endif
+            XMEMCPY(key->pubkey_raw, (byte*)in, inLen);
+    }
+#endif
+
     if (err == MP_OKAY) {
     #ifdef HAVE_COMP_KEY
         /* adjust inLen if compressed */
@@ -6362,13 +7302,34 @@
         wc_ecc_curve_free(curve);
         FREE_CURVE_SPECS();
 #else
-        sp_ecc_uncompress_256(key->pubkey.x, pointType, key->pubkey.y);
+    #ifndef WOLFSSL_SP_NO_256
+        if (key->dp->id == ECC_SECP256R1) {
+            sp_ecc_uncompress_256(key->pubkey.x, pointType, key->pubkey.y);
+        }
+        else
+    #endif
+    #ifdef WOLFSSL_SP_384
+        if (key->dp->id == ECC_SECP384R1) {
+            sp_ecc_uncompress_384(key->pubkey.x, pointType, key->pubkey.y);
+        }
+        else
+    #endif
+        {
+            err = WC_KEY_SIZE_E;
+        }
 #endif
     }
 #endif /* HAVE_COMP_KEY */
 
-    if (err == MP_OKAY && compressed == 0)
-        err = mp_read_unsigned_bin(key->pubkey.y, (byte*)in + keysize, keysize);
+    if (err == MP_OKAY) {
+    #ifdef HAVE_COMP_KEY
+        if (compressed == 0)
+    #endif
+        {
+            err = mp_read_unsigned_bin(key->pubkey.y, (byte*)in + keysize,
+                                                                      keysize);
+        }
+    }
     if (err == MP_OKAY)
         err = mp_set(key->pubkey.z, 1);
 
@@ -6387,6 +7348,7 @@
     return err;
 }
 
+WOLFSSL_ABI
 int wc_ecc_import_x963(const byte* in, word32 inLen, ecc_key* key)
 {
     return wc_ecc_import_x963_ex(in, inLen, key, ECC_CURVE_DEF);
@@ -6394,143 +7356,98 @@
 #endif /* HAVE_ECC_KEY_IMPORT */
 
 #ifdef HAVE_ECC_KEY_EXPORT
-/* export ecc private key only raw, outLen is in/out size
-   return MP_OKAY on success */
-int wc_ecc_export_private_only(ecc_key* key, byte* out, word32* outLen)
-{
-    word32 numlen;
-
-    if (key == NULL || out == NULL || outLen == NULL) {
+
+/* export ecc key to component form, d is optional if only exporting public
+ * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR
+ * return MP_OKAY on success */
+int wc_ecc_export_ex(ecc_key* key, byte* qx, word32* qxLen,
+                 byte* qy, word32* qyLen, byte* d, word32* dLen, int encType)
+{
+    int err = 0;
+    word32 keySz;
+
+    if (key == NULL) {
         return BAD_FUNC_ARG;
     }
 
     if (wc_ecc_is_valid_idx(key->idx) == 0) {
         return ECC_BAD_ARG_E;
     }
-    numlen = key->dp->size;
-
-    if (*outLen < numlen) {
-        *outLen = numlen;
-        return BUFFER_E;
-    }
-    *outLen = numlen;
-    XMEMSET(out, 0, *outLen);
-
-#ifdef WOLFSSL_ATECC508A
-   /* TODO: Implement equiv call to ATECC508A */
-   return BAD_COND_E;
-
-#else
-
-    return mp_to_unsigned_bin(&key->k, out + (numlen -
-                                           mp_unsigned_bin_size(&key->k)));
-#endif /* WOLFSSL_ATECC508A */
-}
-
-
-/* export ecc key to component form, d is optional if only exporting public
- * return MP_OKAY on success */
-static int wc_ecc_export_raw(ecc_key* key, byte* qx, word32* qxLen,
-                             byte* qy, word32* qyLen, byte* d, word32* dLen)
-{
-    int  err;
-    byte exportPriv = 0;
-    word32 numLen;
-
-    if (key == NULL || qx == NULL || qxLen == NULL || qy == NULL ||
-        qyLen == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    if (key->type == ECC_PRIVATEKEY_ONLY) {
-        return ECC_PRIVATEONLY_E;
-    }
-
-    if (wc_ecc_is_valid_idx(key->idx) == 0) {
-        return ECC_BAD_ARG_E;
-    }
-    numLen = key->dp->size;
-
+    keySz = key->dp->size;
+
+    /* private key, d */
     if (d != NULL) {
-        if (dLen == NULL || key->type != ECC_PRIVATEKEY)
+        if (dLen == NULL ||
+            (key->type != ECC_PRIVATEKEY && key->type != ECC_PRIVATEKEY_ONLY))
             return BAD_FUNC_ARG;
-        exportPriv = 1;
-    }
-
-    /* check public buffer sizes */
-    if ((*qxLen < numLen) || (*qyLen < numLen)) {
-        *qxLen = numLen;
-        *qyLen = numLen;
-        return BUFFER_E;
-    }
-
-    *qxLen = numLen;
-    *qyLen = numLen;
-
-    XMEMSET(qx, 0, *qxLen);
-    XMEMSET(qy, 0, *qyLen);
-
-    /* private d component */
-    if (exportPriv == 1) {
-
-        /* check private buffer size */
-        if (*dLen < numLen) {
-            *dLen = numLen;
-            return BUFFER_E;
-        }
-
-        *dLen = numLen;
-        XMEMSET(d, 0, *dLen);
 
     #ifdef WOLFSSL_ATECC508A
-       /* TODO: Implement equiv call to ATECC508A */
-       return BAD_COND_E;
-
+        /* Hardware cannot export private portion */
+        return NOT_COMPILED_IN;
     #else
-
-        /* private key, d */
-        err = mp_to_unsigned_bin(&key->k, d +
-                            (numLen - mp_unsigned_bin_size(&key->k)));
+        err = wc_export_int(&key->k, d, dLen, keySz, encType);
         if (err != MP_OKAY)
             return err;
-    #endif /* WOLFSSL_ATECC508A */
+    #endif
     }
 
     /* public x component */
-    err = mp_to_unsigned_bin(key->pubkey.x, qx +
-                            (numLen - mp_unsigned_bin_size(key->pubkey.x)));
-    if (err != MP_OKAY)
-        return err;
+    if (qx != NULL) {
+        if (qxLen == NULL || key->type == ECC_PRIVATEKEY_ONLY)
+            return BAD_FUNC_ARG;
+
+        err = wc_export_int(key->pubkey.x, qx, qxLen, keySz, encType);
+        if (err != MP_OKAY)
+            return err;
+    }
 
     /* public y component */
-    err = mp_to_unsigned_bin(key->pubkey.y, qy +
-                            (numLen - mp_unsigned_bin_size(key->pubkey.y)));
-    if (err != MP_OKAY)
-        return err;
-
-    return 0;
-}
-
-
-/* export public key to raw elements including public (Qx,Qy)
+    if (qy != NULL) {
+        if (qyLen == NULL || key->type == ECC_PRIVATEKEY_ONLY)
+            return BAD_FUNC_ARG;
+
+        err = wc_export_int(key->pubkey.y, qy, qyLen, keySz, encType);
+        if (err != MP_OKAY)
+            return err;
+    }
+
+    return err;
+}
+
+
+/* export ecc private key only raw, outLen is in/out size as unsigned bin
+   return MP_OKAY on success */
+int wc_ecc_export_private_only(ecc_key* key, byte* out, word32* outLen)
+{
+    if (out == NULL || outLen == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    return wc_ecc_export_ex(key, NULL, NULL, NULL, NULL, out, outLen,
+        WC_TYPE_UNSIGNED_BIN);
+}
+
+/* export public key to raw elements including public (Qx,Qy) as unsigned bin
  * return MP_OKAY on success, negative on error */
 int wc_ecc_export_public_raw(ecc_key* key, byte* qx, word32* qxLen,
                              byte* qy, word32* qyLen)
 {
-    return wc_ecc_export_raw(key, qx, qxLen, qy, qyLen, NULL, NULL);
-}
-
-
-/* export ecc key to raw elements including public (Qx,Qy) and private (d)
+    if (qx == NULL || qxLen == NULL || qy == NULL || qyLen == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    return wc_ecc_export_ex(key, qx, qxLen, qy, qyLen, NULL, NULL,
+        WC_TYPE_UNSIGNED_BIN);
+}
+
+/* export ecc key to raw elements including public (Qx,Qy) and
+ *   private (d) as unsigned bin
  * return MP_OKAY on success, negative on error */
 int wc_ecc_export_private_raw(ecc_key* key, byte* qx, word32* qxLen,
                               byte* qy, word32* qyLen, byte* d, word32* dLen)
 {
-    /* sanitize d and dLen, other args are checked later */
-    if (d == NULL || dLen == NULL)
-        return BAD_FUNC_ARG;
-
-    return wc_ecc_export_raw(key, qx, qxLen, qy, qyLen, d, dLen);
+    return wc_ecc_export_ex(key, qx, qxLen, qy, qyLen, d, dLen,
+        WC_TYPE_UNSIGNED_BIN);
 }
 
 #endif /* HAVE_ECC_KEY_EXPORT */
@@ -6542,17 +7459,24 @@
                                  int curve_id)
 {
     int ret;
-    word32 idx = 0;
-
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+    const CRYS_ECPKI_Domain_t* pDomain;
+    CRYS_ECPKI_BUILD_TempData_t tempBuff;
+#endif
     if (key == NULL || priv == NULL)
         return BAD_FUNC_ARG;
 
     /* public optional, NULL if only importing private */
     if (pub != NULL) {
+    #ifndef NO_ASN
+        word32 idx = 0;
         ret = wc_ecc_import_x963_ex(pub, pubSz, key, curve_id);
         if (ret < 0)
             ret = wc_EccPublicKeyDecode(pub, &idx, key, pubSz);
         key->type = ECC_PRIVATEKEY;
+    #else
+        ret = NOT_COMPILED_IN;
+    #endif
     }
     else {
         /* make sure required variables are reset */
@@ -6567,12 +7491,52 @@
         return ret;
 
 #ifdef WOLFSSL_ATECC508A
-    /* TODO: Implement equiv call to ATECC508A */
-    return BAD_COND_E;
+    /* Hardware does not support loading private keys */
+    return NOT_COMPILED_IN;
+#elif defined(WOLFSSL_CRYPTOCELL)
+    pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id));
+
+    if (pub != NULL && pub[0] != '\0') {
+        /* create public key from external key buffer */
+        ret = CRYS_ECPKI_BuildPublKeyFullCheck(pDomain,
+                                               (byte*)pub,
+                                               pubSz,
+                                               &key->ctx.pubKey,
+                                               &tempBuff);
+
+        if (ret != SA_SILIB_RET_OK){
+            WOLFSSL_MSG("CRYS_ECPKI_BuildPublKeyFullCheck failed");
+            return ret;
+        }
+    }
+    /* import private key */
+    if (priv != NULL && priv[0] != '\0') {
+
+        /* Create private key from external key buffer*/
+        ret = CRYS_ECPKI_BuildPrivKey(pDomain,
+                                      priv,
+                                      privSz,
+                                      &key->ctx.privKey);
+
+        if (ret != SA_SILIB_RET_OK) {
+            WOLFSSL_MSG("CRYS_ECPKI_BuildPrivKey failed");
+            return ret;
+        }
+
+        ret = mp_read_unsigned_bin(&key->k, priv, privSz);
+    }
 
 #else
 
     ret = mp_read_unsigned_bin(&key->k, priv, privSz);
+#ifdef HAVE_WOLF_BIGINT
+    if (ret == 0 &&
+                  wc_bigint_from_unsigned_bin(&key->k.raw, priv, privSz) != 0) {
+        mp_clear(&key->k);
+        ret = ASN_GETINT_E;
+    }
+#endif /* HAVE_WOLF_BIGINT */
+
 
 #endif /* WOLFSSL_ATECC508A */
 
@@ -6742,6 +7706,7 @@
                      byte* s, word32* sLen)
 {
     int err;
+    int tmp_valid = 0;
     word32 x = 0;
 #ifdef WOLFSSL_SMALL_STACK
     mp_int* rtmp = NULL;
@@ -6767,16 +7732,18 @@
 
     err = DecodeECC_DSA_Sig(sig, sigLen, rtmp, stmp);
 
-    /* extract r */
+    /* rtmp and stmp are initialized */
     if (err == MP_OKAY) {
+        tmp_valid = 1;
+
+        /* extract r */
         x = mp_unsigned_bin_size(rtmp);
         if (*rLen < x)
             err = BUFFER_E;
-
-        if (err == MP_OKAY) {
-            *rLen = x;
-            err = mp_to_unsigned_bin(rtmp, r);
-        }
+    }
+    if (err == MP_OKAY) {
+        *rLen = x;
+        err = mp_to_unsigned_bin(rtmp, r);
     }
 
     /* extract s */
@@ -6791,8 +7758,10 @@
         }
     }
 
-    mp_clear(rtmp);
-    mp_clear(stmp);
+    if (tmp_valid) {
+        mp_clear(rtmp);
+        mp_clear(stmp);
+    }
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(stmp, NULL, DYNAMIC_TYPE_ECC);
     XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
@@ -6807,7 +7776,12 @@
           const char* qy, const char* d, int curve_id, int encType)
 {
     int err = MP_OKAY;
-
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+    const CRYS_ECPKI_Domain_t* pDomain;
+    CRYS_ECPKI_BUILD_TempData_t tempBuff;
+    byte key_raw[ECC_MAX_CRYPTO_HW_SIZE*2 + 1];
+    word32 keySz = 0;
+#endif
     /* if d is NULL, only import as public key using Qx,Qy */
     if (key == NULL || qx == NULL || qy == NULL) {
         return BAD_FUNC_ARG;
@@ -6822,14 +7796,6 @@
         return err;
     }
 
-#ifdef WOLFSSL_ATECC508A
-    /* TODO: Implement equiv call to ATECC508A */
-    err = BAD_COND_E;
-    (void)d;
-    (void)encType;
-
-#else
-
     /* init key */
 #ifdef ALT_ECC_SIZE
     key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
@@ -6848,7 +7814,7 @@
 
     /* read Qx */
     if (err == MP_OKAY) {
-        if (encType == ECC_TYPE_HEX_STR)
+        if (encType == WC_TYPE_HEX_STR)
             err = mp_read_radix(key->pubkey.x, qx, MP_RADIX_HEX);
         else
             err = mp_read_unsigned_bin(key->pubkey.x, (const byte*)qx,
@@ -6857,7 +7823,7 @@
 
     /* read Qy */
     if (err == MP_OKAY) {
-        if (encType == ECC_TYPE_HEX_STR)
+        if (encType == WC_TYPE_HEX_STR)
             err = mp_read_radix(key->pubkey.y, qy, MP_RADIX_HEX);
         else
             err = mp_read_unsigned_bin(key->pubkey.y, (const byte*)qy,
@@ -6868,17 +7834,89 @@
     if (err == MP_OKAY)
         err = mp_set(key->pubkey.z, 1);
 
+#ifdef WOLFSSL_ATECC508A
+    /* For SECP256R1 only save raw public key for hardware */
+    if (err == MP_OKAY && curve_id == ECC_SECP256R1) {
+        word32 keySz = key->dp->size;
+        err = wc_export_int(key->pubkey.x, key->pubkey_raw,
+            &keySz, keySz, WC_TYPE_UNSIGNED_BIN);
+        if (err == MP_OKAY)
+            err = wc_export_int(key->pubkey.y, &key->pubkey_raw[keySz],
+                &keySz, keySz, WC_TYPE_UNSIGNED_BIN);
+    }
+#elif defined(WOLFSSL_CRYPTOCELL)
+    if (err == MP_OKAY) {
+        key_raw[0] = ECC_POINT_UNCOMP;
+        keySz = (word32)key->dp->size;
+        err = wc_export_int(key->pubkey.x, &key_raw[1], &keySz, keySz,
+            WC_TYPE_UNSIGNED_BIN);
+        if (err == MP_OKAY) {
+            err = wc_export_int(key->pubkey.y, &key_raw[1+keySz],
+                &keySz, keySz, WC_TYPE_UNSIGNED_BIN);
+        }
+
+        if (err == MP_OKAY) {
+            pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id));
+
+            /* create public key from external key buffer */
+            err = CRYS_ECPKI_BuildPublKeyFullCheck(pDomain,
+                                                   key_raw,
+                                                   keySz*2 + 1,
+                                                   &key->ctx.pubKey,
+                                                   &tempBuff);
+        }
+
+        if (err != SA_SILIB_RET_OK){
+            WOLFSSL_MSG("CRYS_ECPKI_BuildPublKeyFullCheck failed");
+            return err;
+        }
+    }
+
+#endif
+
     /* import private key */
     if (err == MP_OKAY) {
-        if (d != NULL) {
+        if (d != NULL && d[0] != '\0') {
+        #ifdef WOLFSSL_ATECC508A
+            /* Hardware doesn't support loading private key */
+            err = NOT_COMPILED_IN;
+
+        #elif defined(WOLFSSL_CRYPTOCELL)
+
             key->type = ECC_PRIVATEKEY;
 
-            if (encType == ECC_TYPE_HEX_STR)
+            if (encType == WC_TYPE_HEX_STR)
                 err = mp_read_radix(&key->k, d, MP_RADIX_HEX);
             else
                 err = mp_read_unsigned_bin(&key->k, (const byte*)d,
                     key->dp->size);
-
+            if (err == MP_OKAY) {
+                err = wc_export_int(&key->k, &key_raw[0], &keySz, keySz,
+                    WC_TYPE_UNSIGNED_BIN);
+            }
+
+            if (err == MP_OKAY) {
+                /* Create private key from external key buffer*/
+                err = CRYS_ECPKI_BuildPrivKey(pDomain,
+                                              key_raw,
+                                              keySz,
+                                              &key->ctx.privKey);
+
+                if (err != SA_SILIB_RET_OK){
+                    WOLFSSL_MSG("CRYS_ECPKI_BuildPrivKey failed");
+                    return err;
+                }
+            }
+
+        #else
+            key->type = ECC_PRIVATEKEY;
+
+            if (encType == WC_TYPE_HEX_STR)
+                err = mp_read_radix(&key->k, d, MP_RADIX_HEX);
+            else
+                err = mp_read_unsigned_bin(&key->k, (const byte*)d,
+                    key->dp->size);
+        #endif /* WOLFSSL_ATECC508A */
         } else {
             key->type = ECC_PUBLICKEY;
         }
@@ -6895,7 +7933,6 @@
         mp_clear(key->pubkey.z);
         mp_clear(&key->k);
     }
-#endif /* WOLFSSL_ATECC508A */
 
     return err;
 }
@@ -6914,7 +7951,7 @@
                    const char* d, int curve_id)
 {
     return wc_ecc_import_raw_private(key, qx, qy, d, curve_id,
-        ECC_TYPE_HEX_STR);
+        WC_TYPE_HEX_STR);
 
 }
 
@@ -6923,7 +7960,7 @@
                    byte* d, int curve_id)
 {
     return wc_ecc_import_raw_private(key, (const char*)qx, (const char*)qy,
-        (const char*)d, curve_id, ECC_TYPE_UNSIGNED_BIN);
+        (const char*)d, curve_id, WC_TYPE_UNSIGNED_BIN);
 }
 
 /**
@@ -6959,7 +7996,7 @@
         err = ASN_PARSE_E;
     } else {
         return wc_ecc_import_raw_private(key, qx, qy, d, ecc_sets[x].id,
-            ECC_TYPE_HEX_STR);
+            WC_TYPE_HEX_STR);
     }
 
     return err;
@@ -6969,25 +8006,58 @@
 /* key size in octets */
 int wc_ecc_size(ecc_key* key)
 {
-    if (key == NULL) return 0;
+    if (key == NULL)
+        return 0;
 
     return key->dp->size;
 }
 
+/* maximum signature size based on key size */
 int wc_ecc_sig_size_calc(int sz)
 {
-    return (sz * 2) + SIG_HEADER_SZ + ECC_MAX_PAD_SZ;
-}
-
-/* worst case estimate, check actual return from wc_ecc_sign_hash for actual
-   value of signature size in octets */
+    int maxSigSz = 0;
+
+    /* calculate based on key bits */
+    /* maximum possible signature header size is 7 bytes plus 2 bytes padding */
+    maxSigSz = (sz * 2) + SIG_HEADER_SZ + ECC_MAX_PAD_SZ;
+
+    /* if total length is less than 128 + SEQ(1)+LEN(1) then subtract 1 */
+    if (maxSigSz < (128 + 2)) {
+        maxSigSz -= 1;
+    }
+
+    return maxSigSz;
+}
+
+/* maximum signature size based on actual key curve */
 int wc_ecc_sig_size(ecc_key* key)
 {
-    int sz = wc_ecc_size(key);
-    if (sz <= 0)
-        return sz;
-
-    return wc_ecc_sig_size_calc(sz);
+    int maxSigSz;
+    int orderBits, keySz;
+
+    if (key == NULL || key->dp == NULL)
+        return 0;
+
+    /* the signature r and s will always be less than order */
+    /* if the order MSB (top bit of byte) is set then ASN encoding needs
+        extra byte for r and s, so add 2 */
+    keySz = key->dp->size;
+    orderBits = wc_ecc_get_curve_order_bit_count(key->dp);
+    if (orderBits > keySz * 8) {
+        keySz = (orderBits + 7) / 8;
+    }
+    /* maximum possible signature header size is 7 bytes */
+    maxSigSz = (keySz * 2) + SIG_HEADER_SZ;
+    if ((orderBits % 8) == 0) {
+        /* MSB can be set, so add 2 */
+        maxSigSz += ECC_MAX_PAD_SZ;
+    }
+    /* if total length is less than 128 + SEQ(1)+LEN(1) then subtract 1 */
+    if (maxSigSz < (128 + 2)) {
+        maxSigSz -= 1;
+    }
+
+    return maxSigSz;
 }
 
 
@@ -7899,17 +8969,42 @@
           /* double if not first */
           if (!first) {
              if ((err = ecc_projective_dbl_point(R, R, a, modulus,
-                                                             mp)) != MP_OKAY) {
+                                                              mp)) != MP_OKAY) {
                 break;
              }
           }
 
           /* add if not first, otherwise copy */
           if (!first && z) {
-             if ((err = ecc_projective_add_point(R, fp_cache[idx].LUT[z], R,
-                                                 a, modulus, mp)) != MP_OKAY) {
+             if ((err = ecc_projective_add_point(R, fp_cache[idx].LUT[z], R, a,
+                                                     modulus, mp)) != MP_OKAY) {
                 break;
              }
+             if (mp_iszero(R->z)) {
+                 /* When all zero then should have done an add */
+                 if (mp_iszero(R->x) && mp_iszero(R->y)) {
+                     if ((err = ecc_projective_dbl_point(fp_cache[idx].LUT[z],
+                                               R, a, modulus, mp)) != MP_OKAY) {
+                         break;
+                     }
+                 }
+                 /* When only Z zero then result is infinity */
+                 else {
+                    err = mp_set(R->x, 0);
+                    if (err != MP_OKAY) {
+                       break;
+                    }
+                    err = mp_set(R->y, 0);
+                    if (err != MP_OKAY) {
+                       break;
+                    }
+                    err = mp_copy(&fp_cache[idx].mu, R->z);
+                    if (err != MP_OKAY) {
+                       break;
+                    }
+                    first = 1;
+                 }
+             }
           } else if (z) {
              if ((mp_copy(fp_cache[idx].LUT[z]->x, R->x) != MP_OKAY) ||
                  (mp_copy(fp_cache[idx].LUT[z]->y, R->y) != MP_OKAY) ||
@@ -7917,7 +9012,7 @@
                  err = GEN_MEM_ERR;
                  break;
              }
-                 first = 0;
+             first = 0;
           }
       }
    }
@@ -8114,27 +9209,78 @@
                                                               mp)) != MP_OKAY) {
                 break;
              }
-          }
-
-          /* add if not first, otherwise copy */
-          if (!first) {
+
+             /* add if not first, otherwise copy */
              if (zA) {
                 if ((err = ecc_projective_add_point(R, fp_cache[idx1].LUT[zA],
-                                                  R, a, modulus, mp)) != MP_OKAY) {
+                                               R, a, modulus, mp)) != MP_OKAY) {
                    break;
                 }
+                if (mp_iszero(R->z)) {
+                    /* When all zero then should have done an add */
+                    if (mp_iszero(R->x) && mp_iszero(R->y)) {
+                        if ((err = ecc_projective_dbl_point(
+                                                  fp_cache[idx1].LUT[zA], R,
+                                                  a, modulus, mp)) != MP_OKAY) {
+                            break;
+                        }
+                    }
+                    /* When only Z zero then result is infinity */
+                    else {
+                       err = mp_set(R->x, 0);
+                       if (err != MP_OKAY) {
+                          break;
+                       }
+                       err = mp_set(R->y, 0);
+                       if (err != MP_OKAY) {
+                          break;
+                       }
+                       err = mp_copy(&fp_cache[idx1].mu, R->z);
+                       if (err != MP_OKAY) {
+                          break;
+                       }
+                       first = 1;
+                    }
+                }
              }
+
              if (zB) {
                 if ((err = ecc_projective_add_point(R, fp_cache[idx2].LUT[zB],
-                                                  R, a, modulus, mp)) != MP_OKAY) {
+                                               R, a, modulus, mp)) != MP_OKAY) {
                    break;
                 }
+                if (mp_iszero(R->z)) {
+                    /* When all zero then should have done an add */
+                    if (mp_iszero(R->x) && mp_iszero(R->y)) {
+                        if ((err = ecc_projective_dbl_point(
+                                                  fp_cache[idx2].LUT[zB], R,
+                                                  a, modulus, mp)) != MP_OKAY) {
+                            break;
+                        }
+                    }
+                    /* When only Z zero then result is infinity */
+                    else {
+                       err = mp_set(R->x, 0);
+                       if (err != MP_OKAY) {
+                          break;
+                       }
+                       err = mp_set(R->y, 0);
+                       if (err != MP_OKAY) {
+                          break;
+                       }
+                       err = mp_copy(&fp_cache[idx2].mu, R->z);
+                       if (err != MP_OKAY) {
+                          break;
+                       }
+                       first = 1;
+                    }
+                }
              }
           } else {
              if (zA) {
                  if ((mp_copy(fp_cache[idx1].LUT[zA]->x, R->x) != MP_OKAY) ||
-                    (mp_copy(fp_cache[idx1].LUT[zA]->y,  R->y) != MP_OKAY) ||
-                    (mp_copy(&fp_cache[idx1].mu,         R->z) != MP_OKAY)) {
+                     (mp_copy(fp_cache[idx1].LUT[zA]->y, R->y) != MP_OKAY) ||
+                     (mp_copy(&fp_cache[idx1].mu,        R->z) != MP_OKAY)) {
                      err = GEN_MEM_ERR;
                      break;
                  }
@@ -8143,14 +9289,40 @@
              if (zB && first == 0) {
                 if (zB) {
                    if ((err = ecc_projective_add_point(R,
-                           fp_cache[idx2].LUT[zB], R, a, modulus, mp)) != MP_OKAY){
+                        fp_cache[idx2].LUT[zB], R, a, modulus, mp)) != MP_OKAY){
                       break;
                    }
+                   if (mp_iszero(R->z)) {
+                       /* When all zero then should have done an add */
+                       if (mp_iszero(R->x) && mp_iszero(R->y)) {
+                           if ((err = ecc_projective_dbl_point(
+                                                  fp_cache[idx2].LUT[zB], R,
+                                                  a, modulus, mp)) != MP_OKAY) {
+                               break;
+                           }
+                       }
+                       /* When only Z zero then result is infinity */
+                       else {
+                          err = mp_set(R->x, 0);
+                          if (err != MP_OKAY) {
+                             break;
+                          }
+                          err = mp_set(R->y, 0);
+                          if (err != MP_OKAY) {
+                             break;
+                          }
+                          err = mp_copy(&fp_cache[idx2].mu, R->z);
+                          if (err != MP_OKAY) {
+                             break;
+                          }
+                          first = 1;
+                       }
+                   }
                 }
              } else if (zB && first == 1) {
                  if ((mp_copy(fp_cache[idx2].LUT[zB]->x, R->x) != MP_OKAY) ||
-                    (mp_copy(fp_cache[idx2].LUT[zB]->y, R->y) != MP_OKAY) ||
-                    (mp_copy(&fp_cache[idx2].mu,        R->z) != MP_OKAY)) {
+                     (mp_copy(fp_cache[idx2].LUT[zB]->y, R->y) != MP_OKAY) ||
+                     (mp_copy(&fp_cache[idx2].mu,        R->z) != MP_OKAY)) {
                      err = GEN_MEM_ERR;
                      break;
                  }
@@ -8204,7 +9376,7 @@
                 ecc_point* B, mp_int* kB,
                 ecc_point* C, mp_int* a, mp_int* modulus, void* heap)
 {
-   int  idx1 = -1, idx2 = -1, err = MP_OKAY, mpInit = 0;
+   int  idx1 = -1, idx2 = -1, err, mpInit = 0;
    mp_digit mp;
    mp_int   mu;
 
@@ -8236,11 +9408,10 @@
          ++(fp_cache[idx1].lru_count);
       }
 
-      if (err == MP_OKAY)
+      if (err == MP_OKAY) {
         /* find point */
         idx2 = find_base(B);
 
-      if (err == MP_OKAY) {
         /* no entry? */
         if (idx2 == -1) {
            /* find hole and add it */
@@ -8320,7 +9491,7 @@
     R        [out] Destination of product
     a        ECC curve parameter a
     modulus  The modulus for the curve
-    map      [boolean] If non-zero maps the point back to affine co-ordinates,
+    map      [boolean] If non-zero maps the point back to affine coordinates,
              otherwise it's left in jacobian-montgomery form
     return MP_OKAY if successful
 */
@@ -8409,7 +9580,17 @@
         return ECC_BAD_ARG_E;
     }
 
-    return sp_ecc_mulmod_256(k, G, R, map, heap);
+#ifndef WOLFSSL_SP_NO_256
+    if (mp_count_bits(modulus) == 256) {
+        return sp_ecc_mulmod_256(k, G, R, map, heap);
+    }
+#endif
+#ifdef WOLFSSL_SP_384
+    if (mp_count_bits(modulus) == 384) {
+        return sp_ecc_mulmod_384(k, G, R, map, heap);
+    }
+#endif
+    return WC_KEY_SIZE_E;
 #endif
 }
 
@@ -8786,7 +9967,7 @@
 #endif
 
     do {
-    #if defined(WOLFSSL_ASYNC_CRYPT)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
         ret = wc_AsyncWait(ret, &privKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
         if (ret != 0)
             break;
@@ -8816,14 +9997,21 @@
            case ecAES_128_CBC:
                {
                    Aes aes;
-                   ret = wc_AesSetKey(&aes, encKey, KEY_SIZE_128, encIv,
+                   ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+                   if (ret == 0) {
+                       ret = wc_AesSetKey(&aes, encKey, KEY_SIZE_128, encIv,
                                                                 AES_ENCRYPTION);
+                       if (ret == 0) {
+                           ret = wc_AesCbcEncrypt(&aes, out, msg, msgSz);
+                       #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+                           ret = wc_AsyncWait(ret, &aes.asyncDev,
+                                              WC_ASYNC_FLAG_NONE);
+                       #endif
+                       }
+                       wc_AesFree(&aes);
+                   }
                    if (ret != 0)
-                       break;
-                   ret = wc_AesCbcEncrypt(&aes, out, msg, msgSz);
-                #if defined(WOLFSSL_ASYNC_CRYPT)
-                   ret = wc_AsyncWait(ret, &aes.asyncDev, WC_ASYNC_FLAG_NONE);
-                #endif
+                      break;
                }
                break;
 
@@ -8948,7 +10136,7 @@
 #endif
 
     do {
-    #if defined(WOLFSSL_ASYNC_CRYPT)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
         ret = wc_AsyncWait(ret, &privKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
         if (ret != 0)
             break;
@@ -9016,7 +10204,7 @@
                    if (ret != 0)
                        break;
                    ret = wc_AesCbcDecrypt(&aes, out, msg, msgSz-digestSz);
-                #if defined(WOLFSSL_ASYNC_CRYPT)
+                #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
                    ret = wc_AsyncWait(ret, &aes.asyncDev, WC_ASYNC_FLAG_NONE);
                 #endif
                }
@@ -9044,7 +10232,7 @@
 
 
 #ifdef HAVE_COMP_KEY
-#ifndef WOLFSSL_ATECC508A
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
 
 #ifndef WOLFSSL_SP_MATH
 int do_mp_jacobi(mp_int* a, mp_int* n, int* c);
@@ -9368,7 +10556,7 @@
 #endif
 }
 #endif
-#endif /* !WOLFSSL_ATECC508A */
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */
 
 
 /* export public ECC key in ANSI X9.63 format compressed */
@@ -9390,12 +10578,6 @@
       return BUFFER_E;
    }
 
-#ifdef WOLFSSL_ATECC508A
-   /* TODO: Implement equiv call to ATECC508A */
-   ret = BAD_COND_E;
-
-#else
-
    /* store first byte */
    out[0] = mp_isodd(key->pubkey.y) == MP_YES ? ECC_POINT_COMP_ODD : ECC_POINT_COMP_EVEN;
 
@@ -9405,8 +10587,6 @@
                        out+1 + (numlen - mp_unsigned_bin_size(key->pubkey.x)));
    *outLen = 1 + numlen;
 
-#endif /* WOLFSSL_ATECC508A */
-
    return ret;
 }
 
@@ -9424,8 +10604,9 @@
     /* find matching OID sum (based on encoded value) */
     for (x = 0; ecc_sets[x].size != 0; x++) {
         if (ecc_sets[x].oidSum == oidSum) {
-            int ret = 0;
+            int ret;
         #ifdef HAVE_OID_ENCODING
+            ret = 0;
             /* check cache */
             oid_cache_t* o = &ecc_oid_cache[x];
             if (o->oidSz == 0) {
@@ -9439,6 +10620,10 @@
             if (oid) {
                 *oid = o->oid;
             }
+            /* on success return curve id */
+            if (ret == 0) {
+                ret = ecc_sets[x].id;
+            }
         #else
             if (oidSz) {
                 *oidSz = ecc_sets[x].oidSz;
@@ -9446,11 +10631,8 @@
             if (oid) {
                 *oid = ecc_sets[x].oid;
             }
+            ret = ecc_sets[x].id;
         #endif
-            /* on success return curve id */
-            if (ret == 0) {
-                ret = ecc_sets[x].id;
-            }
             return ret;
         }
     }
@@ -9539,36 +10721,24 @@
 
         ret = wc_HashUpdate(hash, type, secret, secretSz);
         if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
-#endif
-            return ret;
+            break;
         }
 
         ret = wc_HashUpdate(hash, type, counter, sizeof(counter));
         if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
-#endif
-            return ret;
+            break;
         }
 
         if (sinfo) {
             ret = wc_HashUpdate(hash, type, sinfo, sinfoSz);
             if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-                XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
-#endif
-                return ret;
+                break;
             }
         }
 
         ret = wc_HashFinal(hash, type, tmp);
         if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
-#endif
-            return ret;
+            break;
         }
 
         copySz = min(remaining, digestSz);
@@ -9578,11 +10748,13 @@
         outIdx += copySz;
     }
 
+    wc_HashFree(hash, type);
+
 #ifdef WOLFSSL_SMALL_STACK
      XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
 #endif
 
-    return 0;
+    return ret;
 }
 #endif /* HAVE_X963_KDF */
 
--- a/wolfcrypt/src/ed25519.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/ed25519.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ed25519.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -45,16 +45,55 @@
     #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
 #endif
 
+#if defined(HAVE_ED25519_SIGN) || defined(HAVE_ED25519_VERIFY)
+#define ED25519CTX_SIZE    32
+
+static const byte ed25519Ctx[ED25519CTX_SIZE+1] =
+                                             "SigEd25519 no Ed25519 collisions";
+#endif
+
+int wc_ed25519_make_public(ed25519_key* key, unsigned char* pubKey,
+                           word32 pubKeySz)
+{
+    int   ret = 0;
+    byte  az[ED25519_PRV_KEY_SIZE];
+#if !defined(FREESCALE_LTC_ECC)
+    ge_p3 A;
+#endif
+
+    if (key == NULL || pubKeySz != ED25519_PUB_KEY_SIZE)
+        ret = BAD_FUNC_ARG;
+
+    if (ret == 0)
+        ret = wc_Sha512Hash(key->k, ED25519_KEY_SIZE, az);
+    if (ret == 0) {
+        /* apply clamp */
+        az[0]  &= 248;
+        az[31] &= 63; /* same than az[31] &= 127 because of az[31] |= 64 */
+        az[31] |= 64;
+
+    #ifdef FREESCALE_LTC_ECC
+        ltc_pkha_ecc_point_t publicKey = {0};
+        publicKey.X = key->pointX;
+        publicKey.Y = key->pointY;
+        LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), az,
+            ED25519_KEY_SIZE, &publicKey, kLTC_Ed25519 /* result on Ed25519 */);
+        LTC_PKHA_Ed25519_Compress(&publicKey, pubKey);
+    #else
+        ge_scalarmult_base(&A, az);
+        ge_p3_tobytes(pubKey, &A);
+    #endif
+    }
+
+    return ret;
+}
+
 /* generate an ed25519 key pair.
  * returns 0 on success
  */
 int wc_ed25519_make_key(WC_RNG* rng, int keySz, ed25519_key* key)
 {
-    byte  az[ED25519_PRV_KEY_SIZE];
-    int   ret;
-#if !defined(FREESCALE_LTC_ECC)
-    ge_p3 A;
-#endif
+    int ret;
 
     if (rng == NULL || key == NULL)
         return BAD_FUNC_ARG;
@@ -66,27 +105,13 @@
     ret  = wc_RNG_GenerateBlock(rng, key->k, ED25519_KEY_SIZE);
     if (ret != 0)
         return ret;
-    ret = wc_Sha512Hash(key->k, ED25519_KEY_SIZE, az);
+
+    ret = wc_ed25519_make_public(key, key->p, ED25519_PUB_KEY_SIZE);
     if (ret != 0) {
         ForceZero(key->k, ED25519_KEY_SIZE);
         return ret;
     }
 
-    /* apply clamp */
-    az[0]  &= 248;
-    az[31] &= 63; /* same than az[31] &= 127 because of az[31] |= 64 */
-    az[31] |= 64;
-
-#ifdef FREESCALE_LTC_ECC
-    ltc_pkha_ecc_point_t publicKey = {0};
-    publicKey.X = key->pointX;
-    publicKey.Y = key->pointY;
-    LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), az, ED25519_KEY_SIZE, &publicKey, kLTC_Ed25519 /* result on Ed25519 */);
-    LTC_PKHA_Ed25519_Compress(&publicKey, key->p);
-#else
-    ge_scalarmult_base(&A, az);
-    ge_p3_tobytes(key->p, &A);
-#endif
     /* put public key after private key, on the same buffer */
     XMEMMOVE(key->k + ED25519_KEY_SIZE, key->p, ED25519_PUB_KEY_SIZE);
 
@@ -98,16 +123,20 @@
 
 #ifdef HAVE_ED25519_SIGN
 /*
-    in     contains the message to sign
-    inlen  is the length of the message to sign
-    out    is the buffer to write the signature
-    outLen [in/out] input size of out buf
-                     output gets set as the final length of out
-    key    is the ed25519 key to use when signing
+    in          contains the message to sign
+    inLen       is the length of the message to sign
+    out         is the buffer to write the signature
+    outLen      [in/out] input size of out buf
+                          output gets set as the final length of out
+    key         is the ed25519 key to use when signing
+    type        one of Ed25519, Ed25519ctx or Ed25519ph
+    context     extra signing data
+    contextLen  length of extra signing data
     return 0 on success
  */
-int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out,
-                        word32 *outLen, ed25519_key* key)
+static int ed25519_sign_msg(const byte* in, word32 inLen, byte* out,
+                            word32 *outLen, ed25519_key* key, byte type,
+                            const byte* context, byte contextLen)
 {
 #ifdef FREESCALE_LTC_ECC
     byte   tempBuf[ED25519_PRV_KEY_SIZE];
@@ -121,8 +150,10 @@
     int    ret;
 
     /* sanity check on arguments */
-    if (in == NULL || out == NULL || outLen == NULL || key == NULL)
+    if (in == NULL || out == NULL || outLen == NULL || key == NULL ||
+                                         (context == NULL && contextLen != 0)) {
         return BAD_FUNC_ARG;
+    }
     if (!key->pubKeySet)
         return BAD_FUNC_ARG;
 
@@ -147,9 +178,19 @@
     ret = wc_InitSha512(&sha);
     if (ret != 0)
         return ret;
-    ret = wc_Sha512Update(&sha, az + ED25519_KEY_SIZE, ED25519_KEY_SIZE);
+    if (type == Ed25519ctx || type == Ed25519ph) {
+        ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE);
+        if (ret == 0)
+            ret = wc_Sha512Update(&sha, &type, sizeof(type));
+        if (ret == 0)
+            ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen));
+        if (ret == 0 && context != NULL)
+            ret = wc_Sha512Update(&sha, context, contextLen);
+    }
     if (ret == 0)
-        ret = wc_Sha512Update(&sha, in, inlen);
+        ret = wc_Sha512Update(&sha, az + ED25519_KEY_SIZE, ED25519_KEY_SIZE);
+    if (ret == 0)
+        ret = wc_Sha512Update(&sha, in, inLen);
     if (ret == 0)
         ret = wc_Sha512Final(&sha, nonce);
     wc_Sha512Free(&sha);
@@ -161,7 +202,8 @@
     ltcPoint.X = &tempBuf[0];
     ltcPoint.Y = &tempBuf[32];
     LTC_PKHA_sc_reduce(nonce);
-    LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), nonce, ED25519_KEY_SIZE, &ltcPoint, kLTC_Ed25519 /* result on Ed25519 */);
+    LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), nonce,
+           ED25519_KEY_SIZE, &ltcPoint, kLTC_Ed25519 /* result on Ed25519 */);
     LTC_PKHA_Ed25519_Compress(&ltcPoint, out);
 #else
     sc_reduce(nonce);
@@ -177,11 +219,21 @@
     ret = wc_InitSha512(&sha);
     if (ret != 0)
         return ret;
-    ret = wc_Sha512Update(&sha, out, ED25519_SIG_SIZE/2);
+    if (type == Ed25519ctx || type == Ed25519ph) {
+        ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE);
+        if (ret == 0)
+            ret = wc_Sha512Update(&sha, &type, sizeof(type));
+        if (ret == 0)
+            ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen));
+        if (ret == 0 && context != NULL)
+            ret = wc_Sha512Update(&sha, context, contextLen);
+    }
+    if (ret == 0)
+        ret = wc_Sha512Update(&sha, out, ED25519_SIG_SIZE/2);
     if (ret == 0)
         ret = wc_Sha512Update(&sha, key->p, ED25519_PUB_KEY_SIZE);
     if (ret == 0)
-        ret = wc_Sha512Update(&sha, in, inlen);
+        ret = wc_Sha512Update(&sha, in, inLen);
     if (ret == 0)
         ret = wc_Sha512Final(&sha, hram);
     wc_Sha512Free(&sha);
@@ -199,20 +251,100 @@
     return ret;
 }
 
+/*
+    in     contains the message to sign
+    inLen  is the length of the message to sign
+    out    is the buffer to write the signature
+    outLen [in/out] input size of out buf
+                     output gets set as the final length of out
+    key    is the ed25519 key to use when signing
+    return 0 on success
+ */
+int wc_ed25519_sign_msg(const byte* in, word32 inLen, byte* out,
+                        word32 *outLen, ed25519_key* key)
+{
+    return ed25519_sign_msg(in, inLen, out, outLen, key, (byte)Ed25519, NULL, 0);
+}
+
+/*
+    in          contains the message to sign
+    inLen       is the length of the message to sign
+    out         is the buffer to write the signature
+    outLen      [in/out] input size of out buf
+                          output gets set as the final length of out
+    key         is the ed25519 key to use when signing
+    context     extra signing data
+    contextLen  length of extra signing data
+    return 0 on success
+ */
+int wc_ed25519ctx_sign_msg(const byte* in, word32 inLen, byte* out,
+                           word32 *outLen, ed25519_key* key,
+                           const byte* context, byte contextLen)
+{
+    return ed25519_sign_msg(in, inLen, out, outLen, key, Ed25519ctx, context,
+                                                                    contextLen);
+}
+
+/*
+    hash        contains the SHA-512 hash of the message to sign
+    hashLen     is the length of the SHA-512 hash of the message to sign
+    out         is the buffer to write the signature
+    outLen      [in/out] input size of out buf
+                          output gets set as the final length of out
+    key         is the ed25519 key to use when signing
+    context     extra signing data
+    contextLen  length of extra signing data
+    return 0 on success
+ */
+int wc_ed25519ph_sign_hash(const byte* hash, word32 hashLen, byte* out,
+                           word32 *outLen, ed25519_key* key,
+                           const byte* context, byte contextLen)
+{
+    return ed25519_sign_msg(hash, hashLen, out, outLen, key, Ed25519ph, context,
+                                                                    contextLen);
+}
+
+/*
+    in          contains the message to sign
+    inLen       is the length of the message to sign
+    out         is the buffer to write the signature
+    outLen      [in/out] input size of out buf
+                          output gets set as the final length of out
+    key         is the ed25519 key to use when signing
+    context     extra signing data
+    contextLen  length of extra signing data
+    return 0 on success
+ */
+int wc_ed25519ph_sign_msg(const byte* in, word32 inLen, byte* out,
+                          word32 *outLen, ed25519_key* key,
+                          const byte* context, byte contextLen)
+{
+    int  ret;
+    byte hash[WC_SHA512_DIGEST_SIZE];
+
+    ret = wc_Sha512Hash(in, inLen, hash);
+    if (ret != 0)
+        return ret;
+
+    return wc_ed25519ph_sign_hash(hash, sizeof(hash), out, outLen, key, context,
+                                                                    contextLen);
+}
 #endif /* HAVE_ED25519_SIGN */
 
 #ifdef HAVE_ED25519_VERIFY
 
 /*
    sig     is array of bytes containing the signature
-   siglen  is the length of sig byte array
+   sigLen  is the length of sig byte array
    msg     the array of bytes containing the message
-   msglen  length of msg array
+   msgLen  length of msg array
    res     will be 1 on successful verify and 0 on unsuccessful
+   key     Ed25519 public key
    return  0 and res of 1 on success
 */
-int wc_ed25519_verify_msg(const byte* sig, word32 siglen, const byte* msg,
-                          word32 msglen, int* res, ed25519_key* key)
+static int ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                              word32 msgLen, int* res, ed25519_key* key,
+                              byte type, const byte* context, byte contextLen)
 {
     byte   rcheck[ED25519_KEY_SIZE];
     byte   h[WC_SHA512_DIGEST_SIZE];
@@ -224,14 +356,16 @@
     wc_Sha512 sha;
 
     /* sanity check on arguments */
-    if (sig == NULL || msg == NULL || res == NULL || key == NULL)
+    if (sig == NULL || msg == NULL || res == NULL || key == NULL ||
+                                         (context == NULL && contextLen != 0)) {
         return BAD_FUNC_ARG;
+    }
 
     /* set verification failed by default */
     *res = 0;
 
     /* check on basics needed to verify signature */
-    if (siglen < ED25519_SIG_SIZE || (sig[ED25519_SIG_SIZE-1] & 224))
+    if (sigLen < ED25519_SIG_SIZE || (sig[ED25519_SIG_SIZE-1] & 224))
         return BAD_FUNC_ARG;
 
     /* uncompress A (public key), test if valid, and negate it */
@@ -244,11 +378,21 @@
     ret  = wc_InitSha512(&sha);
     if (ret != 0)
         return ret;
-    ret = wc_Sha512Update(&sha, sig,    ED25519_SIG_SIZE/2);
+    if (type == Ed25519ctx || type == Ed25519ph) {
+        ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE);
+        if (ret == 0)
+            ret = wc_Sha512Update(&sha, &type, sizeof(type));
+        if (ret == 0)
+            ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen));
+        if (ret == 0 && context != NULL)
+            ret = wc_Sha512Update(&sha, context, contextLen);
+    }
+    if (ret == 0)
+        ret = wc_Sha512Update(&sha, sig, ED25519_SIG_SIZE/2);
     if (ret == 0)
         ret = wc_Sha512Update(&sha, key->p, ED25519_PUB_KEY_SIZE);
     if (ret == 0)
-        ret = wc_Sha512Update(&sha, msg,    msglen);
+        ret = wc_Sha512Update(&sha, msg, msgLen);
     if (ret == 0)
         ret = wc_Sha512Final(&sha,  h);
     wc_Sha512Free(&sha);
@@ -283,6 +427,85 @@
     return ret;
 }
 
+/*
+   sig     is array of bytes containing the signature
+   sigLen  is the length of sig byte array
+   msg     the array of bytes containing the message
+   msgLen  length of msg array
+   res     will be 1 on successful verify and 0 on unsuccessful
+   key     Ed25519 public key
+   return  0 and res of 1 on success
+*/
+int wc_ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                          word32 msgLen, int* res, ed25519_key* key)
+{
+    return ed25519_verify_msg(sig, sigLen, msg, msgLen, res, key, (byte)Ed25519,
+                                                                       NULL, 0);
+}
+
+/*
+   sig         is array of bytes containing the signature
+   sigLen      is the length of sig byte array
+   msg         the array of bytes containing the message
+   msgLen      length of msg array
+   res         will be 1 on successful verify and 0 on unsuccessful
+   key         Ed25519 public key
+   context     extra sigining data
+   contextLen  length of extra sigining data
+   return  0 and res of 1 on success
+*/
+int wc_ed25519ctx_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                             word32 msgLen, int* res, ed25519_key* key,
+                             const byte* context, byte contextLen)
+{
+    return ed25519_verify_msg(sig, sigLen, msg, msgLen, res, key, Ed25519ctx,
+                                                           context, contextLen);
+}
+
+/*
+   sig         is array of bytes containing the signature
+   sigLen      is the length of sig byte array
+   hash        the array of bytes containing the SHA-512 hash of the message
+   hashLen     length of hash array
+   res         will be 1 on successful verify and 0 on unsuccessful
+   key         Ed25519 public key
+   context     extra sigining data
+   contextLen  length of extra sigining data
+   return  0 and res of 1 on success
+*/
+int wc_ed25519ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash,
+                             word32 hashLen, int* res, ed25519_key* key,
+                             const byte* context, byte contextLen)
+{
+    return ed25519_verify_msg(sig, sigLen, hash, hashLen, res, key, Ed25519ph,
+                                                           context, contextLen);
+}
+
+/*
+   sig         is array of bytes containing the signature
+   sigLen      is the length of sig byte array
+   msg         the array of bytes containing the message
+   msgLen      length of msg array
+   res         will be 1 on successful verify and 0 on unsuccessful
+   key         Ed25519 public key
+   context     extra sigining data
+   contextLen  length of extra sigining data
+   return  0 and res of 1 on success
+*/
+int wc_ed25519ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                            word32 msgLen, int* res, ed25519_key* key,
+                            const byte* context, byte contextLen)
+{
+    int  ret;
+    byte hash[WC_SHA512_DIGEST_SIZE];
+
+    ret = wc_Sha512Hash(msg, msgLen, hash);
+    if (ret != 0)
+        return ret;
+
+    return wc_ed25519ph_verify_hash(sig, sigLen, hash, sizeof(hash), res, key,
+                                                           context, contextLen);
+}
 #endif /* HAVE_ED25519_VERIFY */
 
 
@@ -383,14 +606,15 @@
             key->pointY[i] = *(in + 2*ED25519_KEY_SIZE - i);
         }
         XMEMCPY(key->p, key->pointY, ED25519_KEY_SIZE);
+        key->pubKeySet = 1;
         ret = 0;
 #else
         /* pass in (x,y) and store compressed key */
         ret = ge_compress_key(key->p, in+1,
                               in+1+ED25519_PUB_KEY_SIZE, ED25519_PUB_KEY_SIZE);
-#endif /* FREESCALE_LTC_ECC */
         if (ret == 0)
             key->pubKeySet = 1;
+#endif /* FREESCALE_LTC_ECC */
         return ret;
     }
 
@@ -537,10 +761,17 @@
 /* check the private and public keys match */
 int wc_ed25519_check_key(ed25519_key* key)
 {
-    /* TODO: Perform check of private and public key */
-    (void)key;
+    int ret = 0;
+    unsigned char pubKey[ED25519_PUB_KEY_SIZE];
 
-    return 0;
+    if (!key->pubKeySet)
+        ret = PUBLIC_KEY_E;
+    if (ret == 0)
+        ret = wc_ed25519_make_public(key, pubKey, sizeof(pubKey));
+    if (ret == 0 && XMEMCMP(pubKey, key->p, ED25519_PUB_KEY_SIZE) != 0)
+        ret = PUBLIC_KEY_E;
+
+    return ret;
 }
 
 /* returns the private key size (secret only) in bytes */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/ed448.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,918 @@
+/* ed448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implemented to: RFC 8032 */
+
+/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work.
+ * Reworked for curve448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+/* in case user set HAVE_ED448 there */
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_ED448
+
+#include <wolfssl/wolfcrypt/ed448.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(HAVE_ED448_SIGN) || defined(HAVE_ED448_VERIFY)
+/* Size of context bytes to use with hash when signing and verifying. */
+#define ED448CTX_SIZE    8
+/* Context to pass to hash when signing and verifying. */
+static const byte ed448Ctx[ED448CTX_SIZE+1] = "SigEd448";
+#endif
+
+/* Derive the public key for the private key.
+ *
+ * key       [in]  Ed448 key object.
+ * pubKey    [in]  Byte array to hold te public key.
+ * pubKeySz  [in]  Size of the array in bytes.
+ * returns BAD_FUNC_ARG when key is NULL or pubKeySz is not equal to
+ *         ED448_PUB_KEY_SIZE,
+ *         other -ve value on hash failure,
+ *         0 otherwise.
+ */
+int wc_ed448_make_public(ed448_key* key, unsigned char* pubKey, word32 pubKeySz)
+{
+    int   ret = 0;
+    byte  az[ED448_PRV_KEY_SIZE];
+    ge448_p2 A;
+
+    if ((key == NULL) || (pubKeySz != ED448_PUB_KEY_SIZE)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        ret = wc_Shake256Hash(key->k, ED448_KEY_SIZE, az, sizeof(az));
+    }
+    if (ret == 0) {
+        /* apply clamp */
+        az[0]  &= 0xfc;
+        az[55] |= 0x80;
+        az[56]  = 0x00;
+
+        ge448_scalarmult_base(&A, az);
+        ge448_to_bytes(pubKey, &A);
+    }
+
+    return ret;
+}
+
+/* Make a new ed448 private/public key.
+ *
+ * rng      [in]  Random number generator.
+ * keysize  [in]  Size of the key to generate.
+ * key      [in]  Ed448 key object.
+ * returns BAD_FUNC_ARG when rng or key is NULL or keySz is not equal to
+ *         ED448_KEY_SIZE,
+ *         other -ve value on random number or hash failure,
+ *         0 otherwise.
+ */
+int wc_ed448_make_key(WC_RNG* rng, int keySz, ed448_key* key)
+{
+    int ret = 0;
+
+    if ((rng == NULL) || (key == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* ed448 has 57 byte key sizes */
+    if ((ret == 0) && (keySz != ED448_KEY_SIZE)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        ret = wc_RNG_GenerateBlock(rng, key->k, ED448_KEY_SIZE);
+    }
+    if (ret == 0) {
+        ret = wc_ed448_make_public(key, key->p, ED448_PUB_KEY_SIZE);
+        if (ret != 0) {
+            ForceZero(key->k, ED448_KEY_SIZE);
+        }
+    }
+    if (ret == 0) {
+        /* put public key after private key, on the same buffer */
+        XMEMMOVE(key->k + ED448_KEY_SIZE, key->p, ED448_PUB_KEY_SIZE);
+
+        key->pubKeySet = 1;
+    }
+
+    return ret;
+}
+
+
+#ifdef HAVE_ED448_SIGN
+/* Sign the message using the ed448 private key.
+ *
+ *  in          [in]      Message to sign.
+ *  inLen       [in]      Length of the message in bytes.
+ *  out         [in]      Buffer to write signature into.
+ *  outLen      [in/out]  On in, size of buffer.
+ *                        On out, the length of the signature in bytes.
+ *  key         [in]      Ed448 key to use when signing
+ *  type        [in]      Type of signature to perform: Ed448 or Ed448ph
+ *  context     [in]      Context of signing.
+ *  contextLen  [in]      Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+static int ed448_sign_msg(const byte* in, word32 inLen, byte* out,
+                          word32 *outLen, ed448_key* key, byte type,
+                          const byte* context, byte contextLen)
+{
+    ge448_p2 R;
+    byte     nonce[ED448_SIG_SIZE];
+    byte     hram[ED448_SIG_SIZE];
+    byte     az[ED448_PRV_KEY_SIZE];
+    wc_Shake sha;
+    int      ret = 0;
+
+    /* sanity check on arguments */
+    if ((in == NULL) || (out == NULL) || (outLen == NULL) || (key == NULL) ||
+                                     ((context == NULL) && (contextLen != 0))) {
+        ret = BAD_FUNC_ARG;
+    }
+    if ((ret == 0) && (!key->pubKeySet)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* check and set up out length */
+    if ((ret == 0) && (*outLen < ED448_SIG_SIZE)) {
+        *outLen = ED448_SIG_SIZE;
+        ret = BUFFER_E;
+    }
+
+    if (ret == 0) {
+        *outLen = ED448_SIG_SIZE;
+
+        /* step 1: create nonce to use where nonce is r in
+           r = H(h_b, ... ,h_2b-1,M) */
+        ret = wc_Shake256Hash(key->k, ED448_KEY_SIZE, az, sizeof(az));
+    }
+    if (ret == 0) {
+        /* apply clamp */
+        az[0]  &= 0xfc;
+        az[55] |= 0x80;
+        az[56]  = 0x00;
+
+        ret = wc_InitShake256(&sha, NULL, INVALID_DEVID);
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE);
+        }
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, &type, sizeof(type));
+        }
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen));
+        }
+        if (ret == 0 && context != NULL) {
+            ret = wc_Shake256_Update(&sha, context, contextLen);
+        }
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, az + ED448_KEY_SIZE, ED448_KEY_SIZE);
+        }
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, in, inLen);
+        }
+        if (ret == 0) {
+            ret = wc_Shake256_Final(&sha, nonce, sizeof(nonce));
+        }
+        wc_Shake256_Free(&sha);
+    }
+    if (ret == 0) {
+        sc448_reduce(nonce);
+
+        /* step 2: computing R = rB where rB is the scalar multiplication of
+           r and B */
+        ge448_scalarmult_base(&R,nonce);
+        ge448_to_bytes(out,&R);
+
+        /* step 3: hash R + public key + message getting H(R,A,M) then
+           creating S = (r + H(R,A,M)a) mod l */
+        ret = wc_InitShake256(&sha, NULL, INVALID_DEVID);
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE);
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, &type, sizeof(type));
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen));
+            }
+            if (ret == 0 && context != NULL) {
+                ret = wc_Shake256_Update(&sha, context, contextLen);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, out, ED448_SIG_SIZE/2);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, key->p, ED448_PUB_KEY_SIZE);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, in, inLen);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Final(&sha, hram, sizeof(hram));
+            }
+            wc_Shake256_Free(&sha);
+        }
+    }
+
+    if (ret == 0) {
+        sc448_reduce(hram);
+        sc448_muladd(out + (ED448_SIG_SIZE/2), hram, az, nonce);
+    }
+
+    return ret;
+}
+
+/* Sign the message using the ed448 private key.
+ * Signature type is Ed448.
+ *
+ *  in          [in]      Message to sign.
+ *  inLen       [in]      Length of the message in bytes.
+ *  out         [in]      Buffer to write signature into.
+ *  outLen      [in/out]  On in, size of buffer.
+ *                        On out, the length of the signature in bytes.
+ *  key         [in]      Ed448 key to use when signing
+ *  context     [in]      Context of signing.
+ *  contextLen  [in]      Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+int wc_ed448_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen,
+                      ed448_key* key, const byte* context, byte contextLen)
+{
+    return ed448_sign_msg(in, inLen, out, outLen, key, Ed448, context,
+                                                                    contextLen);
+}
+
+/* Sign the hash using the ed448 private key.
+ * Signature type is Ed448ph.
+ *
+ *  hash        [in]      Hash of message to sign.
+ *  hashLen     [in]      Length of hash of message in bytes.
+ *  out         [in]      Buffer to write signature into.
+ *  outLen      [in/out]  On in, size of buffer.
+ *                        On out, the length of the signature in bytes.
+ *  key         [in]      Ed448 key to use when signing
+ *  context     [in]      Context of signing.
+ *  contextLen  [in]      Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+int wc_ed448ph_sign_hash(const byte* hash, word32 hashLen, byte* out,
+                         word32 *outLen, ed448_key* key,
+                         const byte* context, byte contextLen)
+{
+    return ed448_sign_msg(hash, hashLen, out, outLen, key, Ed448ph, context,
+                                                                    contextLen);
+}
+
+/* Sign the message using the ed448 private key.
+ * Signature type is Ed448ph.
+ *
+ *  in          [in]      Message to sign.
+ *  inLen       [in]      Length of the message to sign in bytes.
+ *  out         [in]      Buffer to write signature into.
+ *  outLen      [in/out]  On in, size of buffer.
+ *                        On out, the length of the signature in bytes.
+ *  key         [in]      Ed448 key to use when signing
+ *  context     [in]      Context of signing.
+ *  contextLen  [in]      Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+int wc_ed448ph_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen,
+                        ed448_key* key, const byte* context, byte contextLen)
+{
+    int  ret = 0;
+    byte hash[64];
+
+    ret = wc_Shake256Hash(in, inLen, hash, sizeof(hash));
+    if (ret == 0) {
+        ret = wc_ed448ph_sign_hash(hash, sizeof(hash), out, outLen, key,
+                                                           context, contextLen);
+    }
+
+    return ret;
+}
+#endif /* HAVE_ED448_SIGN */
+
+#ifdef HAVE_ED448_VERIFY
+
+/* Verify the message using the ed448 public key.
+ *
+ *  sig         [in]  Signature to verify.
+ *  sigLen      [in]  Size of signature in bytes.
+ *  msg         [in]  Message to verify.
+ *  msgLen      [in]  Length of the message in bytes.
+ *  key         [in]  Ed448 key to use to verify.
+ *  type        [in]  Type of signature to verify: Ed448 or Ed448ph
+ *  context     [in]  Context of verification.
+ *  contextLen  [in]  Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+static int ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                            word32 msgLen, int* res, ed448_key* key,
+                            byte type, const byte* context, byte contextLen)
+{
+    byte     rcheck[ED448_KEY_SIZE];
+    byte     h[ED448_SIG_SIZE];
+    ge448_p2 A;
+    ge448_p2 R;
+    int      ret = 0;
+    wc_Shake sha;
+
+    /* sanity check on arguments */
+    if ((sig == NULL) || (msg == NULL) || (res == NULL) || (key == NULL) ||
+                                     ((context == NULL) && (contextLen != 0))) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        /* set verification failed by default */
+        *res = 0;
+
+        /* check on basics needed to verify signature */
+        if (sigLen < ED448_SIG_SIZE) {
+            ret = BAD_FUNC_ARG;
+        }
+    }
+
+    /* uncompress A (public key), test if valid, and negate it */
+    if ((ret == 0) && (ge448_from_bytes_negate_vartime(&A, key->p) != 0)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        /* find H(R,A,M) and store it as h */
+        ret  = wc_InitShake256(&sha, NULL, INVALID_DEVID);
+        if (ret == 0) {
+            ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE);
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, &type, sizeof(type));
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen));
+            }
+            if (ret == 0 && context != NULL) {
+                ret = wc_Shake256_Update(&sha, context, contextLen);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, sig, ED448_SIG_SIZE/2);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, key->p, ED448_PUB_KEY_SIZE);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Update(&sha, msg, msgLen);
+            }
+            if (ret == 0) {
+                ret = wc_Shake256_Final(&sha,  h, sizeof(h));
+            }
+            wc_Shake256_Free(&sha);
+        }
+    }
+    if (ret == 0) {
+        sc448_reduce(h);
+
+        /* Uses a fast single-signature verification SB = R + H(R,A,M)A becomes
+         * SB - H(R,A,M)A saving decompression of R
+         */
+        ret = ge448_double_scalarmult_vartime(&R, h, &A,
+                                                      sig + (ED448_SIG_SIZE/2));
+    }
+
+    if (ret == 0) {
+        ge448_to_bytes(rcheck, &R);
+
+        /* comparison of R created to R in sig */
+        if (ConstantCompare(rcheck, sig, ED448_SIG_SIZE/2) != 0) {
+            ret = SIG_VERIFY_E;
+        }
+        else {
+            /* set the verification status */
+            *res = 1;
+        }
+    }
+
+    return ret;
+}
+
+/* Verify the message using the ed448 public key.
+ * Signature type is Ed448.
+ *
+ *  sig         [in]  Signature to verify.
+ *  sigLen      [in]  Size of signature in bytes.
+ *  msg         [in]  Message to verify.
+ *  msgLen      [in]  Length of the message in bytes.
+ *  key         [in]  Ed448 key to use to verify.
+ *  context     [in]  Context of verification.
+ *  contextLen  [in]  Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+int wc_ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                        word32 msgLen, int* res, ed448_key* key,
+                        const byte* context, byte contextLen)
+{
+    return ed448_verify_msg(sig, sigLen, msg, msgLen, res, key, Ed448,
+                                                           context, contextLen);
+}
+
+/* Verify the hash using the ed448 public key.
+ * Signature type is Ed448ph.
+ *
+ *  sig         [in]  Signature to verify.
+ *  sigLen      [in]  Size of signature in bytes.
+ *  hash        [in]  Hash of message to verify.
+ *  hashLen     [in]  Length of the hash in bytes.
+ *  key         [in]  Ed448 key to use to verify.
+ *  context     [in]  Context of verification.
+ *  contextLen  [in]  Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+int wc_ed448ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash,
+                           word32 hashLen, int* res, ed448_key* key,
+                           const byte* context, byte contextLen)
+{
+    return ed448_verify_msg(sig, sigLen, hash, hashLen, res, key, Ed448ph,
+                                                           context, contextLen);
+}
+
+/* Verify the message using the ed448 public key.
+ * Signature type is Ed448ph.
+ *
+ *  sig         [in]  Signature to verify.
+ *  sigLen      [in]  Size of signature in bytes.
+ *  msg         [in]  Message to verify.
+ *  msgLen      [in]  Length of the message in bytes.
+ *  key         [in]  Ed448 key to use to verify.
+ *  context     [in]  Context of verification.
+ *  contextLen  [in]  Length of context in bytes.
+ *  returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ *          context is not NULL or public key not set,
+ *          BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ *          other -ve values when hash fails,
+ *          0 otherwise.
+ */
+int wc_ed448ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                          word32 msgLen, int* res, ed448_key* key,
+                          const byte* context, byte contextLen)
+{
+    int  ret = 0;
+    byte hash[64];
+
+    ret = wc_Shake256Hash(msg, msgLen, hash, sizeof(hash));
+    if (ret == 0) {
+        ret = wc_ed448ph_verify_hash(sig, sigLen, hash, sizeof(hash), res, key,
+                                                           context, contextLen);
+    }
+
+    return ret;
+}
+#endif /* HAVE_ED448_VERIFY */
+
+/* Initialize the ed448 private/public key.
+ *
+ * key  [in]  Ed448 key.
+ * returns BAD_FUNC_ARG when key is NULL
+ */
+int wc_ed448_init(ed448_key* key)
+{
+    int ret = 0;
+
+    if (key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        XMEMSET(key, 0, sizeof(ed448_key));
+
+        fe448_init();
+    }
+
+    return ret;
+}
+
+
+/* Clears the ed448 key data
+ *
+ * key  [in]  Ed448 key.
+ */
+void wc_ed448_free(ed448_key* key)
+{
+    if (key != NULL) {
+        ForceZero(key, sizeof(ed448_key));
+    }
+}
+
+
+#ifdef HAVE_ED448_KEY_EXPORT
+
+/* Export the ed448 public key.
+ *
+ * key     [in]      Ed448 public key.
+ * out     [in]      Array to hold public key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when outLen is less than ED448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_ed448_export_public(ed448_key* key, byte* out, word32* outLen)
+{
+    int ret = 0;
+
+    /* sanity check on arguments */
+    if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if ((ret == 0) && (*outLen < ED448_PUB_KEY_SIZE)) {
+        *outLen = ED448_PUB_KEY_SIZE;
+        ret = BUFFER_E;
+    }
+
+    if (ret == 0) {
+        *outLen = ED448_PUB_KEY_SIZE;
+        XMEMCPY(out, key->p, ED448_PUB_KEY_SIZE);
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_ED448_KEY_EXPORT */
+
+
+#ifdef HAVE_ED448_KEY_IMPORT
+/* Import a compressed or uncompressed ed448 public key from a byte array.
+ * Public key encoded in big-endian.
+ *
+ * in      [in]  Array holding public key.
+ * inLen   [in]  Number of bytes of data in array.
+ * key     [in]  Ed448 public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL or key format is not supported,
+ *         0 otherwise.
+ */
+int wc_ed448_import_public(const byte* in, word32 inLen, ed448_key* key)
+{
+    int ret = 0;
+
+    /* sanity check on arguments */
+    if ((in == NULL) || (key == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        /* compressed prefix according to draft
+         * https://tools.ietf.org/html/draft-ietf-openpgp-rfc4880bis-06 */
+        if (in[0] == 0x40 && inLen > ED448_PUB_KEY_SIZE) {
+            /* key is stored in compressed format so just copy in */
+            XMEMCPY(key->p, (in + 1), ED448_PUB_KEY_SIZE);
+            key->pubKeySet = 1;
+        }
+        /* importing uncompressed public key */
+        else if (in[0] == 0x04 && inLen > 2*ED448_PUB_KEY_SIZE) {
+            /* pass in (x,y) and store compressed key */
+            ret = ge448_compress_key(key->p, in+1, in+1+ED448_PUB_KEY_SIZE);
+            if (ret == 0)
+                key->pubKeySet = 1;
+        }
+        else if (inLen == ED448_PUB_KEY_SIZE) {
+            /* if not specified compressed or uncompressed check key size
+             * if key size is equal to compressed key size copy in key */
+            XMEMCPY(key->p, in, ED448_PUB_KEY_SIZE);
+            key->pubKeySet = 1;
+        }
+        else {
+            /* bad public key format */
+            ret = BAD_FUNC_ARG;
+        }
+    }
+
+    return ret;
+}
+
+
+/* Import an ed448 private key from a byte array.
+ *
+ * priv    [in]  Array holding private key.
+ * privSz  [in]  Number of bytes of data in array.
+ * key     [in]  Ed448 private key.
+ * returns BAD_FUNC_ARG when a parameter is NULL or privSz is less than
+ *         ED448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_ed448_import_private_only(const byte* priv, word32 privSz,
+                                 ed448_key* key)
+{
+    int ret = 0;
+
+    /* sanity check on arguments */
+    if ((priv == NULL) || (key == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* key size check */
+    if ((ret == 0) && (privSz < ED448_KEY_SIZE)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        XMEMCPY(key->k, priv, ED448_KEY_SIZE);
+    }
+
+    return ret;
+}
+
+/* Import an ed448 private and public keys from a byte arrays.
+ *
+ * priv    [in]  Array holding private key.
+ * privSz  [in]  Number of bytes of data in private key array.
+ * pub     [in]  Array holding private key.
+ * pubSz   [in]  Number of bytes of data in public key array.
+ * key     [in]  Ed448 private/public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL or privSz is less than
+ *         ED448_KEY_SIZE or pubSz is less than ED448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_ed448_import_private_key(const byte* priv, word32 privSz,
+                                const byte* pub, word32 pubSz, ed448_key* key)
+{
+    int ret = 0;
+
+    /* sanity check on arguments */
+    if ((priv == NULL) || (pub == NULL) || (key == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* key size check */
+    if ((ret == 0) && (privSz < ED448_KEY_SIZE || pubSz < ED448_PUB_KEY_SIZE)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        /* import public key */
+        ret = wc_ed448_import_public(pub, pubSz, key);
+    }
+    if (ret == 0) {
+        /* make the private key (priv + pub) */
+        XMEMCPY(key->k, priv, ED448_KEY_SIZE);
+        XMEMCPY(key->k + ED448_KEY_SIZE, key->p, ED448_PUB_KEY_SIZE);
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_ED448_KEY_IMPORT */
+
+
+#ifdef HAVE_ED448_KEY_EXPORT
+
+/* Export the ed448 private key.
+ *
+ * key     [in]      Ed448 private key.
+ * out     [in]      Array to hold private key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         ECC_BAD_ARG_E when outLen is less than ED448_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_ed448_export_private_only(ed448_key* key, byte* out, word32* outLen)
+{
+    int ret = 0;
+
+    /* sanity checks on arguments */
+    if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if ((ret == 0) && (*outLen < ED448_KEY_SIZE)) {
+        *outLen = ED448_KEY_SIZE;
+        ret = BUFFER_E;
+    }
+
+    if (ret == 0) {
+        *outLen = ED448_KEY_SIZE;
+        XMEMCPY(out, key->k, ED448_KEY_SIZE);
+    }
+
+    return ret;
+}
+
+/* Export the ed448 private and public key.
+ *
+ * key     [in]      Ed448 private/public key.
+ * out     [in]      Array to hold private and public key.
+ * outLen  [in/out]  On in, the number of bytes in array.
+ *                   On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         BUFFER_E when outLen is less than ED448_PRV_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_ed448_export_private(ed448_key* key, byte* out, word32* outLen)
+{
+    int ret = 0;
+
+    /* sanity checks on arguments */
+    if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if ((ret == 0) && (*outLen < ED448_PRV_KEY_SIZE)) {
+        *outLen = ED448_PRV_KEY_SIZE;
+        ret = BUFFER_E;
+    }
+
+    if (ret == 0) {
+        *outLen = ED448_PRV_KEY_SIZE;
+        XMEMCPY(out, key->k, ED448_PRV_KEY_SIZE);
+     }
+
+    return ret;
+}
+
+/* Export the ed448 private and public key.
+ *
+ * key     [in]      Ed448 private/public key.
+ * priv    [in]      Array to hold private key.
+ * privSz  [in/out]  On in, the number of bytes in private key array.
+ * pub     [in]      Array to hold  public key.
+ * pubSz   [in/out]  On in, the number of bytes in public key array.
+ *                   On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ *         BUFFER_E when privSz is less than ED448_PRV_KEY_SIZE or pubSz is less
+ *         than ED448_PUB_KEY_SIZE,
+ *         0 otherwise.
+ */
+int wc_ed448_export_key(ed448_key* key, byte* priv, word32 *privSz,
+                        byte* pub, word32 *pubSz)
+{
+    int ret = 0;
+
+    /* export 'full' private part */
+    ret = wc_ed448_export_private(key, priv, privSz);
+    if (ret == 0) {
+        /* export public part */
+        ret = wc_ed448_export_public(key, pub, pubSz);
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_ED448_KEY_EXPORT */
+
+/* Check the public key of the ed448 key matches the private key.
+ *
+ * key     [in]      Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ *         PUBLIC_KEY_E when the public key is not set or doesn't match,
+ *         other -ve value on hash failure,
+ *         0 otherwise.
+ */
+int wc_ed448_check_key(ed448_key* key)
+{
+    int ret = 0;
+    unsigned char pubKey[ED448_PUB_KEY_SIZE];
+
+    if (key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (!key->pubKeySet) {
+        ret = PUBLIC_KEY_E;
+    }
+    if (ret == 0) {
+        ret = wc_ed448_make_public(key, pubKey, sizeof(pubKey));
+    }
+    if ((ret == 0) && (XMEMCMP(pubKey, key->p, ED448_PUB_KEY_SIZE) != 0)) {
+        ret = PUBLIC_KEY_E;
+    }
+
+    return ret;
+}
+
+/* Returns the size of an ed448 private key.
+ *
+ * key     [in]      Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ *         ED448_KEY_SIZE otherwise.
+ */
+int wc_ed448_size(ed448_key* key)
+{
+    int ret = ED448_KEY_SIZE;
+
+    if (key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    return ret;
+}
+
+/* Returns the size of an ed448 private plus public key.
+ *
+ * key     [in]      Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ *         ED448_PRV_KEY_SIZE otherwise.
+ */
+int wc_ed448_priv_size(ed448_key* key)
+{
+    int ret = ED448_PRV_KEY_SIZE;
+
+    if (key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    return ret;
+}
+
+/* Returns the size of an ed448 public key.
+ *
+ * key     [in]      Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ *         ED448_PUB_KEY_SIZE otherwise.
+ */
+int wc_ed448_pub_size(ed448_key* key)
+{
+    int ret = ED448_PUB_KEY_SIZE;
+
+    if (key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    return ret;
+}
+
+/* Returns the size of an ed448 signature.
+ *
+ * key     [in]      Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ *         ED448_SIG_SIZE otherwise.
+ */
+int wc_ed448_sig_size(ed448_key* key)
+{
+    int ret = ED448_SIG_SIZE;
+
+    if (key == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_ED448 */
+
+
--- a/wolfcrypt/src/error.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/error.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* error.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -281,6 +281,9 @@
     case PKCS7_RECIP_E:
         return "PKCS#7 error: no matching recipient found";
 
+    case WC_PKCS7_WANT_READ_E:
+        return "PKCS#7 operations wants more input, call again";
+
     case FIPS_NOT_ALLOWED_E:
         return "FIPS mode not allowed error";
 
@@ -482,6 +485,36 @@
     case DH_CHECK_PRIV_E:
         return "DH Check Private Key failure";
 
+    case WC_AFALG_SOCK_E:
+        return "AF_ALG socket error";
+
+    case WC_DEVCRYPTO_E:
+        return "Error with /dev/crypto";
+
+    case ZLIB_INIT_ERROR:
+        return "zlib init error";
+
+    case ZLIB_COMPRESS_ERROR:
+        return "zlib compress error";
+
+    case ZLIB_DECOMPRESS_ERROR:
+        return "zlib decompress error";
+
+    case PKCS7_NO_SIGNER_E:
+        return "No signer in PKCS#7 signed data";
+
+    case CRYPTOCB_UNAVAILABLE:
+        return "Crypto callback unavailable";
+
+    case PKCS7_SIGNEEDS_CHECK:
+        return "Signature found but no certificate to verify";
+
+    case PSS_SALTLEN_RECOVER_E:
+        return "PSS - Salt length unable to be recovered";
+
+    case ASN_SELF_SIGNED_E:
+        return "ASN self-signed certificate error";
+
     default:
         return "unknown error number";
 
@@ -491,6 +524,7 @@
 void wc_ErrorString(int error, char* buffer)
 {
     XSTRNCPY(buffer, wc_GetErrorString(error), WOLFSSL_MAX_ERROR_SZ);
+    buffer[WOLFSSL_MAX_ERROR_SZ-1] = 0;
 }
 #endif /* !NO_ERROR_STRINGS */
 
--- a/wolfcrypt/src/evp.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/evp.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* evp.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -21,9 +21,139 @@
 
 #if !defined(WOLFSSL_EVP_INCLUDED)
     #ifndef WOLFSSL_IGNORE_FILE_WARN
-        #warning evp.c does not need to be compiled seperatly from ssl.c
+        #warning evp.c does not need to be compiled separately from ssl.c
+    #endif
+#elif defined(WOLFCRYPT_ONLY)
+#else
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#include <wolfssl/openssl/ecdsa.h>
+#include <wolfssl/openssl/evp.h>
+
+#if defined(OPENSSL_EXTRA)
+
+#ifndef NO_AES
+    #ifdef HAVE_AES_CBC
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_CBC = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_CBC = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_CBC = NULL;
+    #endif
+    #endif /* HAVE_AES_CBC */
+
+    #ifdef WOLFSSL_AES_OFB
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_OFB = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_OFB = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_OFB = NULL;
+    #endif
+    #endif /* WOLFSSL_AES_OFB */
+
+    #ifdef WOLFSSL_AES_XTS
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_XTS = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_XTS = NULL;
+    #endif
+    #endif /* WOLFSSL_AES_XTS */
+
+    #ifdef WOLFSSL_AES_CFB
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_CFB1 = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_CFB1 = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_CFB1 = NULL;
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_CFB8 = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_CFB8 = NULL;
     #endif
-#else
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_CFB8 = NULL;
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_CFB128 = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_CFB128 = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_CFB128 = NULL;
+    #endif
+    #endif /* WOLFSSL_AES_CFB */
+
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+            static char *EVP_AES_128_GCM = NULL;
+        #endif
+        #ifdef WOLFSSL_AES_192
+            static char *EVP_AES_192_GCM = NULL;
+        #endif
+        #ifdef WOLFSSL_AES_256
+            static char *EVP_AES_256_GCM = NULL;
+        #endif
+    #endif /* HAVE_AESGCM */
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_CTR = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_CTR = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_CTR = NULL;
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+        static char *EVP_AES_128_ECB = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        static char *EVP_AES_192_ECB = NULL;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        static char *EVP_AES_256_ECB = NULL;
+    #endif
+        #define      EVP_AES_SIZE 11
+    #ifdef WOLFSSL_AES_CFB
+        #define      EVP_AESCFB_SIZE 14
+    #endif
+#endif
+
+#ifndef NO_DES3
+    static char *EVP_DES_CBC = NULL;
+    static char *EVP_DES_ECB = NULL;
+
+    static char *EVP_DES_EDE3_CBC = NULL;
+    static char *EVP_DES_EDE3_ECB = NULL;
+
+    #define EVP_DES_SIZE 7
+    #define EVP_DES_EDE3_SIZE 12
+#endif
+
+#ifdef HAVE_IDEA
+    static char *EVP_IDEA_CBC;
+    #define EVP_IDEA_SIZE 8
+#endif
 
 static unsigned int cipherType(const WOLFSSL_EVP_CIPHER *cipher);
 
@@ -46,21 +176,48 @@
     }
 
     switch (cipherType(c)) {
-    #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+#if !defined(NO_AES)
+  #if defined(HAVE_AES_CBC)
       case AES_128_CBC_TYPE: return 16;
       case AES_192_CBC_TYPE: return 24;
       case AES_256_CBC_TYPE: return 32;
   #endif
-  #if !defined(NO_AES) && defined(WOLFSSL_AES_COUNTER)
+  #if defined(WOLFSSL_AES_CFB)
+      case AES_128_CFB1_TYPE: return 16;
+      case AES_192_CFB1_TYPE: return 24;
+      case AES_256_CFB1_TYPE: return 32;
+      case AES_128_CFB8_TYPE: return 16;
+      case AES_192_CFB8_TYPE: return 24;
+      case AES_256_CFB8_TYPE: return 32;
+      case AES_128_CFB128_TYPE: return 16;
+      case AES_192_CFB128_TYPE: return 24;
+      case AES_256_CFB128_TYPE: return 32;
+  #endif
+  #if defined(WOLFSSL_AES_OFB)
+      case AES_128_OFB_TYPE: return 16;
+      case AES_192_OFB_TYPE: return 24;
+      case AES_256_OFB_TYPE: return 32;
+  #endif
+  #if defined(WOLFSSL_AES_XTS)
+      case AES_128_XTS_TYPE: return 16;
+      case AES_256_XTS_TYPE: return 32;
+  #endif
+  #if defined(HAVE_AESGCM)
+      case AES_128_GCM_TYPE: return 16;
+      case AES_192_GCM_TYPE: return 24;
+      case AES_256_GCM_TYPE: return 32;
+  #endif
+  #if defined(WOLFSSL_AES_COUNTER)
       case AES_128_CTR_TYPE: return 16;
       case AES_192_CTR_TYPE: return 24;
       case AES_256_CTR_TYPE: return 32;
   #endif
-  #if !defined(NO_AES) && defined(HAVE_AES_ECB)
+  #if defined(HAVE_AES_ECB)
       case AES_128_ECB_TYPE: return 16;
       case AES_192_ECB_TYPE: return 24;
       case AES_256_ECB_TYPE: return 32;
   #endif
+#endif /* !NO_AES */
   #ifndef NO_DES3
       case DES_CBC_TYPE:      return 8;
       case DES_EDE3_CBC_TYPE: return 24;
@@ -73,7 +230,7 @@
 }
 
 
-WOLFSSL_API int  wolfSSL_EVP_EncryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
+int  wolfSSL_EVP_EncryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
                                         const WOLFSSL_EVP_CIPHER* type,
                                         const unsigned char* key,
                                         const unsigned char* iv)
@@ -81,7 +238,7 @@
     return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 1);
 }
 
-WOLFSSL_API int  wolfSSL_EVP_EncryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
+int  wolfSSL_EVP_EncryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
                                         const WOLFSSL_EVP_CIPHER* type,
                                         WOLFSSL_ENGINE *impl,
                                         const unsigned char* key,
@@ -91,7 +248,7 @@
     return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 1);
 }
 
-WOLFSSL_API int  wolfSSL_EVP_DecryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
+int  wolfSSL_EVP_DecryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
                                         const WOLFSSL_EVP_CIPHER* type,
                                         const unsigned char* key,
                                         const unsigned char* iv)
@@ -100,7 +257,7 @@
     return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 0);
 }
 
-WOLFSSL_API int  wolfSSL_EVP_DecryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
+int  wolfSSL_EVP_DecryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
                                         const WOLFSSL_EVP_CIPHER* type,
                                         WOLFSSL_ENGINE *impl,
                                         const unsigned char* key,
@@ -112,33 +269,46 @@
 }
 
 
-WOLFSSL_API WOLFSSL_EVP_CIPHER_CTX *wolfSSL_EVP_CIPHER_CTX_new(void)
+WOLFSSL_EVP_CIPHER_CTX *wolfSSL_EVP_CIPHER_CTX_new(void)
 {
-	WOLFSSL_EVP_CIPHER_CTX *ctx = (WOLFSSL_EVP_CIPHER_CTX*)XMALLOC(sizeof *ctx,
+    WOLFSSL_EVP_CIPHER_CTX *ctx = (WOLFSSL_EVP_CIPHER_CTX*)XMALLOC(sizeof *ctx,
                                                  NULL, DYNAMIC_TYPE_TMP_BUFFER);
-	if (ctx) {
-      WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_new");
-		  wolfSSL_EVP_CIPHER_CTX_init(ctx);
-  }
-	return ctx;
+    if (ctx) {
+        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_new");
+        wolfSSL_EVP_CIPHER_CTX_init(ctx);
+    }
+    return ctx;
 }
 
-WOLFSSL_API void wolfSSL_EVP_CIPHER_CTX_free(WOLFSSL_EVP_CIPHER_CTX *ctx)
+void wolfSSL_EVP_CIPHER_CTX_free(WOLFSSL_EVP_CIPHER_CTX *ctx)
 {
     if (ctx) {
         WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_free");
-		    wolfSSL_EVP_CIPHER_CTX_cleanup(ctx);
-		    XFREE(ctx, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-		}
+        wolfSSL_EVP_CIPHER_CTX_cleanup(ctx);
+        XFREE(ctx, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 }
 
-WOLFSSL_API unsigned long wolfSSL_EVP_CIPHER_CTX_mode(const WOLFSSL_EVP_CIPHER_CTX *ctx)
+int wolfSSL_EVP_CIPHER_CTX_reset(WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+    int ret = WOLFSSL_FAILURE;
+
+    if (ctx != NULL) {
+        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_reset");
+        wolfSSL_EVP_CIPHER_CTX_cleanup(ctx);
+        ret = WOLFSSL_SUCCESS;
+    }
+
+    return ret;
+}
+
+unsigned long wolfSSL_EVP_CIPHER_CTX_mode(const WOLFSSL_EVP_CIPHER_CTX *ctx)
 {
   if (ctx == NULL) return 0;
   return ctx->flags & WOLFSSL_EVP_CIPH_MODE;
 }
 
-WOLFSSL_API int  wolfSSL_EVP_EncryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
+int  wolfSSL_EVP_EncryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl)
 {
     if (ctx && ctx->enc) {
@@ -150,7 +320,7 @@
 }
 
 
-WOLFSSL_API int  wolfSSL_EVP_CipherInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
+int  wolfSSL_EVP_CipherInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
                                     const WOLFSSL_EVP_CIPHER* type,
                                     WOLFSSL_ENGINE *impl,
                                     const unsigned char* key,
@@ -161,7 +331,7 @@
     return wolfSSL_EVP_CipherInit(ctx, type, key, iv, enc);
 }
 
-WOLFSSL_API int  wolfSSL_EVP_EncryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
+int  wolfSSL_EVP_EncryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl)
 {
     if (ctx && ctx->enc) {
@@ -172,30 +342,32 @@
         return WOLFSSL_FAILURE;
 }
 
-WOLFSSL_API int  wolfSSL_EVP_DecryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
+int  wolfSSL_EVP_DecryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl)
 {
-  if (ctx && ctx->enc)
-      return WOLFSSL_FAILURE;
-  else {
-      WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal");
-      return wolfSSL_EVP_CipherFinal(ctx, out, outl);
-  }
+    if (ctx && !ctx->enc) {
+        WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal");
+        return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+    }
+    else {
+        return WOLFSSL_FAILURE;
+    }
 }
 
-WOLFSSL_API int  wolfSSL_EVP_DecryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
+int  wolfSSL_EVP_DecryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl)
 {
-    if (ctx && ctx->enc)
-        return WOLFSSL_FAILURE;
+    if (ctx && !ctx->enc) {
+        WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal_ex");
+        return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+    }
     else {
-        WOLFSSL_ENTER("wolfSSL_EVP_CipherFinal_ex");
-        return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+        return WOLFSSL_FAILURE;
     }
 }
 
 
-WOLFSSL_API int wolfSSL_EVP_DigestInit_ex(WOLFSSL_EVP_MD_CTX* ctx,
+int wolfSSL_EVP_DigestInit_ex(WOLFSSL_EVP_MD_CTX* ctx,
                                      const WOLFSSL_EVP_MD* type,
                                      WOLFSSL_ENGINE *impl)
 {
@@ -234,7 +406,8 @@
     int ret = 0;
 
     switch (ctx->cipherType) {
-    #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+#if !defined(NO_AES)
+    #if defined(HAVE_AES_CBC)
         case AES_128_CBC_TYPE:
         case AES_192_CBC_TYPE:
         case AES_256_CBC_TYPE:
@@ -244,14 +417,55 @@
                 ret = wc_AesCbcDecrypt(&ctx->cipher.aes, out, in, inl);
             break;
     #endif
-    #if !defined(NO_AES) && defined(WOLFSSL_AES_COUNTER)
+    #if defined(HAVE_AESGCM)
+        case AES_128_GCM_TYPE:
+        case AES_192_GCM_TYPE:
+        case AES_256_GCM_TYPE:
+            if (ctx->enc) {
+                if (out){
+                    /* encrypt confidential data*/
+                    ret = wc_AesGcmEncrypt(&ctx->cipher.aes, out, in, inl,
+                              ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+                              NULL, 0);
+                }
+                else {
+                    /* authenticated, non-confidential data */
+                    ret = wc_AesGcmEncrypt(&ctx->cipher.aes, NULL, NULL, 0,
+                              ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+                              in, inl);
+                    /* Reset partial authTag error for AAD*/
+                    if (ret == AES_GCM_AUTH_E)
+                        ret = 0;
+                }
+            }
+            else {
+                if (out){
+                    /* decrypt confidential data*/
+                    ret = wc_AesGcmDecrypt(&ctx->cipher.aes, out, in, inl,
+                              ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+                              NULL, 0);
+                }
+                else {
+                    /* authenticated, non-confidential data*/
+                    ret = wc_AesGcmDecrypt(&ctx->cipher.aes, NULL, NULL, 0,
+                              ctx->iv, ctx->ivSz,
+                              ctx->authTag, ctx->authTagSz,
+                              in, inl);
+                    /* Reset partial authTag error for AAD*/
+                    if (ret == AES_GCM_AUTH_E)
+                        ret = 0;
+                }
+            }
+            break;
+    #endif
+    #if defined(WOLFSSL_AES_COUNTER)
         case AES_128_CTR_TYPE:
         case AES_192_CTR_TYPE:
         case AES_256_CTR_TYPE:
             ret = wc_AesCtrEncrypt(&ctx->cipher.aes, out, in, inl);
             break;
     #endif
-    #if !defined(NO_AES) && defined(HAVE_AES_ECB)
+    #if defined(HAVE_AES_ECB)
         case AES_128_ECB_TYPE:
         case AES_192_ECB_TYPE:
         case AES_256_ECB_TYPE:
@@ -261,6 +475,60 @@
                 ret = wc_AesEcbDecrypt(&ctx->cipher.aes, out, in, inl);
             break;
     #endif
+    #if defined(WOLFSSL_AES_OFB)
+        case AES_128_OFB_TYPE:
+        case AES_192_OFB_TYPE:
+        case AES_256_OFB_TYPE:
+            if (ctx->enc)
+                ret = wc_AesOfbEncrypt(&ctx->cipher.aes, out, in, inl);
+            else
+                ret = wc_AesOfbDecrypt(&ctx->cipher.aes, out, in, inl);
+            break;
+    #endif
+    #if defined(WOLFSSL_AES_CFB)
+    #if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+        case AES_128_CFB1_TYPE:
+        case AES_192_CFB1_TYPE:
+        case AES_256_CFB1_TYPE:
+            if (ctx->enc)
+                ret = wc_AesCfb1Encrypt(&ctx->cipher.aes, out, in,
+                        inl * WOLFSSL_BIT_SIZE);
+            else
+                ret = wc_AesCfb1Decrypt(&ctx->cipher.aes, out, in,
+                        inl * WOLFSSL_BIT_SIZE);
+            break;
+
+        case AES_128_CFB8_TYPE:
+        case AES_192_CFB8_TYPE:
+        case AES_256_CFB8_TYPE:
+            if (ctx->enc)
+                ret = wc_AesCfb8Encrypt(&ctx->cipher.aes, out, in, inl);
+            else
+                ret = wc_AesCfb8Decrypt(&ctx->cipher.aes, out, in, inl);
+            break;
+    #endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+
+        case AES_128_CFB128_TYPE:
+        case AES_192_CFB128_TYPE:
+        case AES_256_CFB128_TYPE:
+            if (ctx->enc)
+                ret = wc_AesCfbEncrypt(&ctx->cipher.aes, out, in, inl);
+            else
+                ret = wc_AesCfbDecrypt(&ctx->cipher.aes, out, in, inl);
+            break;
+    #endif
+#if defined(WOLFSSL_AES_XTS)
+    case AES_128_XTS_TYPE:
+    case AES_256_XTS_TYPE:
+        if (ctx->enc)
+            ret = wc_AesXtsEncrypt(&ctx->cipher.xts, out, in, inl,
+                    ctx->iv, ctx->ivSz);
+        else
+            ret = wc_AesXtsDecrypt(&ctx->cipher.xts, out, in, inl,
+                    ctx->iv, ctx->ivSz);
+        break;
+#endif
+#endif /* !NO_AES */
     #ifndef NO_DES3
         case DES_CBC_TYPE:
             if (ctx->enc)
@@ -302,6 +570,20 @@
     return WOLFSSL_SUCCESS; /* success */
 }
 
+#if defined(HAVE_AESGCM)
+static int wolfSSL_EVP_CipherUpdate_GCM(WOLFSSL_EVP_CIPHER_CTX *ctx,
+                                   unsigned char *out, int *outl,
+                                   const unsigned char *in, int inl)
+{
+    /* process blocks */
+    if (evpCipherBlock(ctx, out, in, inl) == 0)
+        return WOLFSSL_FAILURE;
+    *outl = inl;
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
+/* returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure */
 WOLFSSL_API int wolfSSL_EVP_CipherUpdate(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl,
                                    const unsigned char *in, int inl)
@@ -309,45 +591,88 @@
     int blocks;
     int fill;
 
-    if ((ctx == NULL) || (inl < 0) ||
-        (outl == NULL)|| (out == NULL) || (in == NULL)) return BAD_FUNC_ARG;
     WOLFSSL_ENTER("wolfSSL_EVP_CipherUpdate");
+    if ((ctx == NULL) || (inl < 0) || (outl == NULL)|| (in == NULL)) {
+        WOLFSSL_MSG("Bad argument");
+        return WOLFSSL_FAILURE;
+    }
 
     *outl = 0;
-    if (inl == 0) return WOLFSSL_SUCCESS;
-
-    if (ctx->bufUsed > 0) { /* concatinate them if there is anything */
+    if (inl == 0) {
+        return WOLFSSL_SUCCESS;
+    }
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+        switch (ctx->cipherType) {
+            case AES_128_GCM_TYPE:
+            case AES_192_GCM_TYPE:
+            case AES_256_GCM_TYPE:
+/* if out == NULL, in/inl contains the additional authenticated data for GCM */
+                return wolfSSL_EVP_CipherUpdate_GCM(ctx, out, outl, in, inl);
+            default:
+                /* fall-through */
+                break;
+        }
+#endif /* !defined(NO_AES) && defined(HAVE_AESGCM) */
+
+    if (out == NULL) {
+        return WOLFSSL_FAILURE;
+    }
+
+
+    if (ctx->bufUsed > 0) { /* concatenate them if there is anything */
         fill = fillBuff(ctx, in, inl);
         inl -= fill;
         in  += fill;
     }
-    if ((ctx->enc == 0)&& (ctx->lastUsed == 1)) {
-        PRINT_BUF(ctx->lastBlock, ctx->block_size);
-        XMEMCPY(out, ctx->lastBlock, ctx->block_size);
-        *outl+= ctx->block_size;
-        out  += ctx->block_size;
-    }
+
+    /* check if the buff is full, and if so flash it out */
     if (ctx->bufUsed == ctx->block_size) {
-        /* the buff is full, flash out */
+        byte* output = out;
+
+        /* During decryption we save the last block to check padding on Final.
+         * Update the last block stored if one has already been stored */
+        if (ctx->enc == 0) {
+            if (ctx->lastUsed == 1) {
+                XMEMCPY(out, ctx->lastBlock, ctx->block_size);
+                *outl+= ctx->block_size;
+                out  += ctx->block_size;
+            }
+            output = ctx->lastBlock; /* redirect output to last block buffer */
+            ctx->lastUsed = 1;
+        }
+
         PRINT_BUF(ctx->buf, ctx->block_size);
-        if (evpCipherBlock(ctx, out, ctx->buf, ctx->block_size) == 0)
+        if (evpCipherBlock(ctx, output, ctx->buf, ctx->block_size) == 0) {
             return WOLFSSL_FAILURE;
+        }
         PRINT_BUF(out, ctx->block_size);
-        if (ctx->enc == 0) {
-            ctx->lastUsed = 1;
-            XMEMCPY(ctx->lastBlock, out, ctx->block_size);
-        } else {
+        ctx->bufUsed = 0;
+
+        /* if doing encryption update the new output block, decryption will
+         * always have the last block saved for when Final is called */
+        if ((ctx->enc != 0)) {
             *outl+= ctx->block_size;
             out  += ctx->block_size;
         }
-        ctx->bufUsed = 0;
     }
 
     blocks = inl / ctx->block_size;
     if (blocks > 0) {
+        /* During decryption we save the last block to check padding on Final.
+         * Update the last block stored if one has already been stored */
+        if ((ctx->enc == 0) && (ctx->lastUsed == 1)) {
+            PRINT_BUF(ctx->lastBlock, ctx->block_size);
+            XMEMCPY(out, ctx->lastBlock, ctx->block_size);
+            *outl += ctx->block_size;
+            out += ctx->block_size;
+            ctx->lastUsed = 0;
+        }
+
         /* process blocks */
-        if (evpCipherBlock(ctx, out, in, blocks * ctx->block_size) == 0)
+        if (evpCipherBlock(ctx, out, in, blocks * ctx->block_size) == 0) {
             return WOLFSSL_FAILURE;
+        }
         PRINT_BUF(in, ctx->block_size*blocks);
         PRINT_BUF(out,ctx->block_size*blocks);
         inl  -= ctx->block_size * blocks;
@@ -356,23 +681,30 @@
             if ((ctx->flags & WOLFSSL_EVP_CIPH_NO_PADDING) ||
                     (ctx->block_size == 1)) {
                 ctx->lastUsed = 0;
-                XMEMCPY(ctx->lastBlock, &out[ctx->block_size * blocks], ctx->block_size);
-                *outl+= ctx->block_size * blocks;
+                *outl += ctx->block_size * blocks;
             } else {
-                ctx->lastUsed = 1;
-                XMEMCPY(ctx->lastBlock, &out[ctx->block_size * (blocks-1)], ctx->block_size);
-                *outl+= ctx->block_size * (blocks-1);
+                /* in the case of decryption and padding, store the last block
+                 * here in order to verify the padding when Final is called */
+                if (inl == 0) { /* if not 0 then we know leftovers are checked*/
+                    ctx->lastUsed = 1;
+                    blocks = blocks - 1; /* save last block to check padding in
+                                          * EVP_CipherFinal call */
+                    XMEMCPY(ctx->lastBlock, &out[ctx->block_size * blocks],
+                            ctx->block_size);
+                }
+                *outl += ctx->block_size * blocks;
             }
         } else {
-            *outl+= ctx->block_size * blocks;
-        }
-    }
+            *outl += ctx->block_size * blocks;
+        }
+    }
+
+
     if (inl > 0) {
         /* put fraction into buff */
         fillBuff(ctx, in, inl);
         /* no increase of outl */
     }
-
     (void)out; /* silence warning in case not read */
 
     return WOLFSSL_SUCCESS;
@@ -398,67 +730,189 @@
     return ctx->block_size - n;
 }
 
-WOLFSSL_API int  wolfSSL_EVP_CipherFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
+int  wolfSSL_EVP_CipherFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl)
 {
     int fl;
-    if (ctx == NULL || out == NULL) return BAD_FUNC_ARG;
+    int ret = WOLFSSL_SUCCESS;
+    if (!ctx || !outl)
+        return WOLFSSL_FAILURE;
+
     WOLFSSL_ENTER("wolfSSL_EVP_CipherFinal");
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+        switch (ctx->cipherType) {
+            case AES_128_GCM_TYPE:
+            case AES_192_GCM_TYPE:
+            case AES_256_GCM_TYPE:
+                *outl = 0;
+                /* Clear IV, since IV reuse is not recommended for AES GCM. */
+                XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE);
+                return WOLFSSL_SUCCESS;
+            default:
+                /* fall-through */
+                break;
+        }
+#endif /* !NO_AES && HAVE_AESGCM */
+
+    if (!out)
+        return WOLFSSL_FAILURE;
+
     if (ctx->flags & WOLFSSL_EVP_CIPH_NO_PADDING) {
         if (ctx->bufUsed != 0) return WOLFSSL_FAILURE;
         *outl = 0;
-        return WOLFSSL_SUCCESS;
-    }
-    if (ctx->enc) {
+    }
+    else if (ctx->enc) {
         if (ctx->block_size == 1) {
             *outl = 0;
-            return WOLFSSL_SUCCESS;
-        }
-        if ((ctx->bufUsed >= 0) && (ctx->block_size != 1)) {
+        }
+        else if ((ctx->bufUsed >= 0) && (ctx->block_size != 1)) {
             padBlock(ctx);
             PRINT_BUF(ctx->buf, ctx->block_size);
-            if (evpCipherBlock(ctx, out, ctx->buf, ctx->block_size) == 0)
-                return WOLFSSL_FAILURE;
-
-            PRINT_BUF(out, ctx->block_size);
-            *outl = ctx->block_size;
-        }
-    } else {
+            if (evpCipherBlock(ctx, out, ctx->buf, ctx->block_size) == 0) {
+                WOLFSSL_MSG("Final Cipher Block failed");
+                ret = WOLFSSL_FAILURE;
+            }
+            else {
+                PRINT_BUF(out, ctx->block_size);
+                *outl = ctx->block_size;
+            }
+        }
+    }
+    else {
         if (ctx->block_size == 1) {
             *outl = 0;
-            return WOLFSSL_SUCCESS;
-        }
-        if (ctx->lastUsed) {
+        }
+        else if ((ctx->bufUsed % ctx->block_size) != 0) {
+            *outl = 0;
+            /* not enough padding for decrypt */
+            WOLFSSL_MSG("Final Cipher Block not enough padding");
+            ret = WOLFSSL_FAILURE;
+        }
+        else if (ctx->lastUsed) {
             PRINT_BUF(ctx->lastBlock, ctx->block_size);
             if ((fl = checkPad(ctx, ctx->lastBlock)) >= 0) {
                 XMEMCPY(out, ctx->lastBlock, fl);
                 *outl = fl;
-            } else return 0;
-        }
-    }
+                if (ctx->lastUsed == 0 && ctx->bufUsed == 0) {
+                    /* return error in cases where the block length is incorrect */
+                    WOLFSSL_MSG("Final Cipher Block bad length");
+                    ret = WOLFSSL_FAILURE;
+                }
+            }
+            else {
+                ret = WOLFSSL_FAILURE;
+            }
+        }
+        else if (ctx->lastUsed == 0 && ctx->bufUsed == 0) {
+            /* return error in cases where the block length is incorrect */
+            ret = WOLFSSL_FAILURE;
+        }
+    }
+    if (ret == WOLFSSL_SUCCESS) {
+        /* reset cipher state after final */
+        wolfSSL_EVP_CipherInit(ctx, NULL, NULL, NULL, -1);
+    }
+    return ret;
+}
+
+
+#ifdef WOLFSSL_EVP_DECRYPT_LEGACY
+/* This is a version of DecryptFinal to work with data encrypted with
+ * wolfSSL_EVP_EncryptFinal() with the broken padding. (pre-v3.12.0)
+ * Only call this after wolfSSL_EVP_CipherFinal() fails on a decrypt.
+ * Note, you don't know if the padding is good or bad with the old
+ * encrypt, but it is likely to be or bad. It will update the output
+ * length with the block_size so the last block is still captured. */
+WOLFSSL_API int  wolfSSL_EVP_DecryptFinal_legacy(WOLFSSL_EVP_CIPHER_CTX *ctx,
+        unsigned char *out, int *outl)
+{
+    int fl;
+    if (ctx == NULL || out == NULL || outl == NULL)
+        return BAD_FUNC_ARG;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal_legacy");
+    if (ctx->block_size == 1) {
+        *outl = 0;
+        return WOLFSSL_SUCCESS;
+    }
+    if ((ctx->bufUsed % ctx->block_size) != 0) {
+        *outl = 0;
+        /* not enough padding for decrypt */
+        return WOLFSSL_FAILURE;
+    }
+    /* The original behavior of CipherFinal() was like it is now,
+     * but checkPad would return 0 in case of a bad pad. It would
+     * treat the pad as 0, and leave the data in the output buffer,
+     * and not try to copy anything. This converts checkPad's -1 error
+     * code to block_size.
+     */
+    if (ctx->lastUsed) {
+        PRINT_BUF(ctx->lastBlock, ctx->block_size);
+        if ((fl = checkPad(ctx, ctx->lastBlock)) < 0) {
+            fl = ctx->block_size;
+        }
+        else {
+            XMEMCPY(out, ctx->lastBlock, fl);
+        }
+        *outl = fl;
+    }
+    /* return error in cases where the block length is incorrect */
+    if (ctx->lastUsed == 0 && ctx->bufUsed == 0) {
+        return WOLFSSL_FAILURE;
+    }
+
     return WOLFSSL_SUCCESS;
 }
-
-WOLFSSL_API int wolfSSL_EVP_CIPHER_CTX_block_size(const WOLFSSL_EVP_CIPHER_CTX *ctx)
+#endif
+
+
+int wolfSSL_EVP_CIPHER_CTX_block_size(const WOLFSSL_EVP_CIPHER_CTX *ctx)
 {
     if (ctx == NULL) return BAD_FUNC_ARG;
     switch (ctx->cipherType) {
 #if !defined(NO_AES) || !defined(NO_DES3)
-#if !defined(NO_AES) && defined(HAVE_AES_CBC)
+#if !defined(NO_AES)
+#if defined(HAVE_AES_CBC)
     case AES_128_CBC_TYPE:
     case AES_192_CBC_TYPE:
     case AES_256_CBC_TYPE:
 #endif
-#if !defined(NO_AES) && defined(WOLFSSL_AES_COUNTER)
+#if defined(HAVE_AESGCM)
+    case AES_128_GCM_TYPE:
+    case AES_192_GCM_TYPE:
+    case AES_256_GCM_TYPE:
+#endif
+#if defined(WOLFSSL_AES_COUNTER)
     case AES_128_CTR_TYPE:
     case AES_192_CTR_TYPE:
     case AES_256_CTR_TYPE:
 #endif
-#if !defined(NO_AES)
+#if defined(WOLFSSL_AES_CFB)
+    case AES_128_CFB1_TYPE:
+    case AES_192_CFB1_TYPE:
+    case AES_256_CFB1_TYPE:
+    case AES_128_CFB8_TYPE:
+    case AES_192_CFB8_TYPE:
+    case AES_256_CFB8_TYPE:
+    case AES_128_CFB128_TYPE:
+    case AES_192_CFB128_TYPE:
+    case AES_256_CFB128_TYPE:
+#endif
+#if defined(WOLFSSL_AES_OFB)
+    case AES_128_OFB_TYPE:
+    case AES_192_OFB_TYPE:
+    case AES_256_OFB_TYPE:
+#endif
+#if defined(WOLFSSL_AES_XTS)
+    case AES_128_XTS_TYPE:
+    case AES_256_XTS_TYPE:
+#endif
+
     case AES_128_ECB_TYPE:
     case AES_192_ECB_TYPE:
     case AES_256_ECB_TYPE:
-#endif
+#endif /* !NO_AES */
 #ifndef NO_DES3
     case DES_CBC_TYPE:
     case DES_ECB_TYPE:
@@ -475,86 +929,157 @@
 static unsigned int cipherType(const WOLFSSL_EVP_CIPHER *cipher)
 {
     if (cipher == NULL) return 0; /* dummy for #ifdef */
-  #ifndef NO_DES3
-      else if (XSTRNCMP(cipher, EVP_DES_CBC, EVP_DES_SIZE) == 0)
-          return DES_CBC_TYPE;
-      else if (XSTRNCMP(cipher, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)
-          return DES_EDE3_CBC_TYPE;
-  #if !defined(NO_DES3)
-      else if (XSTRNCMP(cipher, EVP_DES_ECB, EVP_DES_SIZE) == 0)
-          return DES_ECB_TYPE;
-      else if (XSTRNCMP(cipher, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)
-          return DES_EDE3_ECB_TYPE;
-  #endif /* NO_DES3 && HAVE_AES_ECB */
-  #endif
-
-  #if !defined(NO_AES) && defined(HAVE_AES_CBC)
-      #ifdef WOLFSSL_AES_128
-      else if (XSTRNCMP(cipher, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)
-          return AES_128_CBC_TYPE;
-      #endif
-      #ifdef WOLFSSL_AES_192
-      else if (XSTRNCMP(cipher, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)
-          return AES_192_CBC_TYPE;
-      #endif
-      #ifdef WOLFSSL_AES_256
-      else if (XSTRNCMP(cipher, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)
-          return AES_256_CBC_TYPE;
-      #endif
-  #endif /* !NO_AES && HAVE_AES_CBC */
-  #if !defined(NO_AES) && defined(WOLFSSL_AES_COUNTER)
-      #ifdef WOLFSSL_AES_128
-      else if (XSTRNCMP(cipher, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)
-          return AES_128_CTR_TYPE;
-      #endif
-      #ifdef WOLFSSL_AES_192
-      else if (XSTRNCMP(cipher, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)
-          return AES_192_CTR_TYPE;
-      #endif
-      #ifdef WOLFSSL_AES_256
-      else if (XSTRNCMP(cipher, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)
-          return AES_256_CTR_TYPE;
-      #endif
-  #endif /* !NO_AES && HAVE_AES_CBC */
-  #if !defined(NO_AES) && defined(HAVE_AES_ECB)
-      #ifdef WOLFSSL_AES_128
-      else if (XSTRNCMP(cipher, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)
-          return AES_128_ECB_TYPE;
-      #endif
-      #ifdef WOLFSSL_AES_192
-      else if (XSTRNCMP(cipher, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)
-          return AES_192_ECB_TYPE;
-      #endif
-      #ifdef WOLFSSL_AES_256
-      else if (XSTRNCMP(cipher, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)
-          return AES_256_ECB_TYPE;
-      #endif
-  #endif /* !NO_AES && HAVE_AES_CBC */
+#ifndef NO_DES3
+    else if (EVP_DES_CBC && XSTRNCMP(cipher, EVP_DES_CBC, EVP_DES_SIZE) == 0)
+        return DES_CBC_TYPE;
+    else if (EVP_DES_EDE3_CBC && XSTRNCMP(cipher, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)
+        return DES_EDE3_CBC_TYPE;
+#if !defined(NO_DES3)
+    else if (EVP_DES_ECB && XSTRNCMP(cipher, EVP_DES_ECB, EVP_DES_SIZE) == 0)
+        return DES_ECB_TYPE;
+    else if (EVP_DES_EDE3_ECB && XSTRNCMP(cipher, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)
+        return DES_EDE3_ECB_TYPE;
+#endif /* NO_DES3 && HAVE_AES_ECB */
+#endif
+#if !defined(NO_AES)
+#if defined(HAVE_AES_CBC)
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_CBC && XSTRNCMP(cipher, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)
+        return AES_128_CBC_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_CBC && XSTRNCMP(cipher, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)
+        return AES_192_CBC_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_CBC && XSTRNCMP(cipher, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)
+        return AES_256_CBC_TYPE;
+    #endif
+#endif /* HAVE_AES_CBC */
+#if defined(HAVE_AESGCM)
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_GCM && XSTRNCMP(cipher, EVP_AES_128_GCM, EVP_AES_SIZE) == 0)
+        return AES_128_GCM_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_GCM && XSTRNCMP(cipher, EVP_AES_192_GCM, EVP_AES_SIZE) == 0)
+        return AES_192_GCM_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_GCM && XSTRNCMP(cipher, EVP_AES_256_GCM, EVP_AES_SIZE) == 0)
+        return AES_256_GCM_TYPE;
+    #endif
+#endif /* HAVE_AESGCM */
+#if defined(WOLFSSL_AES_COUNTER)
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_CTR && XSTRNCMP(cipher, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)
+        return AES_128_CTR_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_CTR && XSTRNCMP(cipher, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)
+        return AES_192_CTR_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_CTR && XSTRNCMP(cipher, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)
+        return AES_256_CTR_TYPE;
+    #endif
+#endif /* HAVE_AES_CBC */
+#if defined(HAVE_AES_ECB)
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_ECB && XSTRNCMP(cipher, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)
+        return AES_128_ECB_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_ECB && XSTRNCMP(cipher, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)
+        return AES_192_ECB_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_ECB && XSTRNCMP(cipher, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)
+        return AES_256_ECB_TYPE;
+    #endif
+#endif /*HAVE_AES_CBC */
+#if defined(WOLFSSL_AES_XTS)
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_XTS && XSTRNCMP(cipher, EVP_AES_128_XTS, EVP_AES_SIZE) == 0)
+        return AES_128_XTS_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_XTS && XSTRNCMP(cipher, EVP_AES_256_XTS, EVP_AES_SIZE) == 0)
+        return AES_256_XTS_TYPE;
+    #endif
+#endif /* WOLFSSL_AES_XTS */
+#if defined(WOLFSSL_AES_CFB)
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_CFB1 && XSTRNCMP(cipher, EVP_AES_128_CFB1, EVP_AESCFB_SIZE) == 0)
+        return AES_128_CFB1_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_CFB1 && XSTRNCMP(cipher, EVP_AES_192_CFB1, EVP_AESCFB_SIZE) == 0)
+        return AES_192_CFB1_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_CFB1 && XSTRNCMP(cipher, EVP_AES_256_CFB1, EVP_AESCFB_SIZE) == 0)
+        return AES_256_CFB1_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_CFB8 && XSTRNCMP(cipher, EVP_AES_128_CFB8, EVP_AESCFB_SIZE) == 0)
+        return AES_128_CFB8_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_CFB8 && XSTRNCMP(cipher, EVP_AES_192_CFB8, EVP_AESCFB_SIZE) == 0)
+        return AES_192_CFB8_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_CFB8 && XSTRNCMP(cipher, EVP_AES_256_CFB8, EVP_AESCFB_SIZE) == 0)
+        return AES_256_CFB8_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_128
+    else if (EVP_AES_128_CFB128 && XSTRNCMP(cipher, EVP_AES_128_CFB128, EVP_AESCFB_SIZE) == 0)
+        return AES_128_CFB128_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    else if (EVP_AES_192_CFB128 && XSTRNCMP(cipher, EVP_AES_192_CFB128, EVP_AESCFB_SIZE) == 0)
+        return AES_192_CFB128_TYPE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    else if (EVP_AES_256_CFB128 && XSTRNCMP(cipher, EVP_AES_256_CFB128, EVP_AESCFB_SIZE) == 0)
+        return AES_256_CFB128_TYPE;
+    #endif
+#endif /*HAVE_AES_CBC */
+#endif /* !NO_AES */
       else return 0;
 }
 
-WOLFSSL_API int wolfSSL_EVP_CIPHER_block_size(const WOLFSSL_EVP_CIPHER *cipher)
+int wolfSSL_EVP_CIPHER_block_size(const WOLFSSL_EVP_CIPHER *cipher)
 {
   if (cipher == NULL) return BAD_FUNC_ARG;
   switch (cipherType(cipher)) {
-  #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+#if !defined(NO_AES)
+  #if defined(HAVE_AES_CBC)
       case AES_128_CBC_TYPE:
       case AES_192_CBC_TYPE:
       case AES_256_CBC_TYPE:
-                             return AES_BLOCK_SIZE;
+          return AES_BLOCK_SIZE;
   #endif
-  #if !defined(NO_AES) && defined(WOLFSSL_AES_COUNTER)
+  #if defined(HAVE_AESGCM)
+      case AES_128_GCM_TYPE:
+      case AES_192_GCM_TYPE:
+      case AES_256_GCM_TYPE:
+          return AES_BLOCK_SIZE;
+  #endif
+  #if defined(WOLFSSL_AES_COUNTER)
       case AES_128_CTR_TYPE:
       case AES_192_CTR_TYPE:
       case AES_256_CTR_TYPE:
-                             return AES_BLOCK_SIZE;
+          return AES_BLOCK_SIZE;
   #endif
-  #if !defined(NO_AES) && defined(HAVE_AES_ECB)
+  #if defined(HAVE_AES_ECB)
       case AES_128_ECB_TYPE:
       case AES_192_ECB_TYPE:
       case AES_256_ECB_TYPE:
-                             return AES_BLOCK_SIZE;
+          return AES_BLOCK_SIZE;
   #endif
+#endif /* NO_AES */
   #ifndef NO_DES3
       case DES_CBC_TYPE: return 8;
       case DES_EDE3_CBC_TYPE: return 8;
@@ -569,24 +1094,30 @@
 unsigned long WOLFSSL_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher)
 {
     switch (cipherType(cipher)) {
-    #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+#if !defined(NO_AES)
+    #if defined(HAVE_AES_CBC)
         case AES_128_CBC_TYPE:
         case AES_192_CBC_TYPE:
         case AES_256_CBC_TYPE:
             return WOLFSSL_EVP_CIPH_CBC_MODE;
     #endif
-    #if !defined(NO_AES) && defined(WOLFSSL_AES_COUNTER)
+    #if defined(HAVE_AESGCM)
+        case AES_128_GCM_TYPE:
+        case AES_192_GCM_TYPE:
+        case AES_256_GCM_TYPE:
+            return WOLFSSL_EVP_CIPH_GCM_MODE;
+    #endif
+    #if defined(WOLFSSL_AES_COUNTER)
         case AES_128_CTR_TYPE:
         case AES_192_CTR_TYPE:
         case AES_256_CTR_TYPE:
             return WOLFSSL_EVP_CIPH_CTR_MODE;
     #endif
-    #if !defined(NO_AES)
         case AES_128_ECB_TYPE:
         case AES_192_ECB_TYPE:
         case AES_256_ECB_TYPE:
             return WOLFSSL_EVP_CIPH_ECB_MODE;
-    #endif
+#endif /* NO_ASE */
     #ifndef NO_DES3
         case DES_CBC_TYPE:
         case DES_EDE3_CBC_TYPE:
@@ -604,26 +1135,33 @@
         }
 }
 
-WOLFSSL_API unsigned long WOLFSSL_EVP_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher)
+unsigned long WOLFSSL_EVP_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher)
 {
   if (cipher == NULL) return 0;
   return WOLFSSL_CIPHER_mode(cipher);
 }
 
-WOLFSSL_API void wolfSSL_EVP_CIPHER_CTX_set_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags)
+void wolfSSL_EVP_CIPHER_CTX_set_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags)
 {
     if (ctx != NULL) {
-        ctx->flags = flags;
+        ctx->flags |= flags;
     }
 }
 
-WOLFSSL_API unsigned long wolfSSL_EVP_CIPHER_flags(const WOLFSSL_EVP_CIPHER *cipher)
+void wolfSSL_EVP_CIPHER_CTX_clear_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags)
+{
+    if (ctx != NULL) {
+        ctx->flags &= ~flags;
+    }
+}
+
+unsigned long wolfSSL_EVP_CIPHER_flags(const WOLFSSL_EVP_CIPHER *cipher)
 {
   if (cipher == NULL) return 0;
   return WOLFSSL_CIPHER_mode(cipher);
 }
 
-WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_set_padding(WOLFSSL_EVP_CIPHER_CTX *ctx, int padding)
+int  wolfSSL_EVP_CIPHER_CTX_set_padding(WOLFSSL_EVP_CIPHER_CTX *ctx, int padding)
 {
   if (ctx == NULL) return BAD_FUNC_ARG;
   if (padding) {
@@ -635,7 +1173,7 @@
   return 1;
 }
 
-WOLFSSL_API int wolfSSL_EVP_add_digest(const WOLFSSL_EVP_MD *digest)
+int wolfSSL_EVP_add_digest(const WOLFSSL_EVP_MD *digest)
 {
     (void)digest;
     /* nothing to do */
@@ -647,10 +1185,14 @@
  *
  * return WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_CTX_free(WOLFSSL_EVP_PKEY_CTX *ctx)
+int wolfSSL_EVP_PKEY_CTX_free(WOLFSSL_EVP_PKEY_CTX *ctx)
 {
     if (ctx == NULL) return 0;
     WOLFSSL_ENTER("EVP_PKEY_CTX_free");
+    if (ctx->pkey != NULL)
+        wolfSSL_EVP_PKEY_free(ctx->pkey);
+    if (ctx->peerKey != NULL)
+        wolfSSL_EVP_PKEY_free(ctx->peerKey);
     XFREE(ctx, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
     return WOLFSSL_SUCCESS;
 }
@@ -663,9 +1205,10 @@
  *
  * return the new structure on success and NULL if failed.
  */
-WOLFSSL_API WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_ENGINE *e)
+WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_ENGINE *e)
 {
     WOLFSSL_EVP_PKEY_CTX* ctx;
+    int type = NID_undef;
 
     if (pkey == NULL) return 0;
     if (e != NULL) return 0;
@@ -679,7 +1222,16 @@
 #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
     ctx->padding = RSA_PKCS1_PADDING;
 #endif
-
+    type = wolfSSL_EVP_PKEY_type(pkey->type);
+
+    if (type != NID_undef) {
+        if (wc_LockMutex(&pkey->refMutex) != 0) {
+            WOLFSSL_MSG("Couldn't lock pkey mutex");
+        }
+        pkey->references++;
+
+        wc_UnLockMutex(&pkey->refMutex);
+    }
     return ctx;
 }
 
@@ -691,7 +1243,7 @@
  *
  * returns WOLFSSL_SUCCESS on success.
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_CTX_set_rsa_padding(WOLFSSL_EVP_PKEY_CTX *ctx, int padding)
+int wolfSSL_EVP_PKEY_CTX_set_rsa_padding(WOLFSSL_EVP_PKEY_CTX *ctx, int padding)
 {
     if (ctx == NULL) return 0;
     WOLFSSL_ENTER("EVP_PKEY_CTX_set_rsa_padding");
@@ -699,6 +1251,144 @@
     return WOLFSSL_SUCCESS;
 }
 
+/* create a PKEY contxt and return it */
+WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new_id(int id, WOLFSSL_ENGINE *e)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    WOLFSSL_EVP_PKEY_CTX* ctx = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_CTX_new_id");
+
+    pkey = wolfSSL_EVP_PKEY_new_ex(NULL);
+    if (pkey) {
+        pkey->type = id;
+        ctx = wolfSSL_EVP_PKEY_CTX_new(pkey, e);
+        if (ctx == NULL) {
+            wolfSSL_EVP_PKEY_free(pkey);
+        }
+    }
+    return ctx;
+}
+
+/* Returns WOLFSSL_SUCCESS or error */
+int wolfSSL_EVP_PKEY_CTX_set_rsa_keygen_bits(WOLFSSL_EVP_PKEY_CTX *ctx, int bits)
+{
+    if (ctx) {
+        ctx->nbits = bits;
+    }
+    return WOLFSSL_SUCCESS;
+}
+
+
+int wolfSSL_EVP_PKEY_derive_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive_init");
+
+    if (!ctx) {
+        return WOLFSSL_FAILURE;
+    }
+    wolfSSL_EVP_PKEY_free(ctx->peerKey);
+    ctx->op = EVP_PKEY_OP_DERIVE;
+    ctx->padding = 0;
+    ctx->nbits = 0;
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_EVP_PKEY_derive_set_peer(WOLFSSL_EVP_PKEY_CTX *ctx, WOLFSSL_EVP_PKEY *peer)
+{
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive_set_peer");
+
+    if (!ctx || ctx->op != EVP_PKEY_OP_DERIVE) {
+        return WOLFSSL_FAILURE;
+    }
+    wolfSSL_EVP_PKEY_free(ctx->peerKey);
+    ctx->peerKey = peer;
+    if (!wolfSSL_EVP_PKEY_up_ref(peer)) {
+        ctx->peerKey = NULL;
+        return WOLFSSL_FAILURE;
+    }
+    return WOLFSSL_SUCCESS;
+}
+
+#if !defined(NO_DH) && defined(HAVE_ECC)
+int wolfSSL_EVP_PKEY_derive(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen)
+{
+    int len;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive");
+
+    if (!ctx || ctx->op != EVP_PKEY_OP_DERIVE || !ctx->pkey || !ctx->peerKey || !keylen
+            || ctx->pkey->type != ctx->peerKey->type) {
+        return WOLFSSL_FAILURE;
+    }
+    switch (ctx->pkey->type) {
+#ifndef NO_DH
+    case EVP_PKEY_DH:
+        /* Use DH */
+        if (!ctx->pkey->dh || !ctx->peerKey->dh || !ctx->peerKey->dh->pub_key) {
+            return WOLFSSL_FAILURE;
+        }
+        if ((len = wolfSSL_DH_size(ctx->pkey->dh)) <= 0) {
+            return WOLFSSL_FAILURE;
+        }
+        if (key) {
+            if (*keylen < (size_t)len) {
+                return WOLFSSL_FAILURE;
+            }
+            if (wolfSSL_DH_compute_key(key, ctx->peerKey->dh->pub_key,
+                                       ctx->pkey->dh) != len) {
+                return WOLFSSL_FAILURE;
+            }
+        }
+        *keylen = (size_t)len;
+        break;
+#endif
+#ifdef HAVE_ECC
+    case EVP_PKEY_EC:
+        /* Use ECDH */
+        if (!ctx->pkey->ecc || !ctx->peerKey->ecc) {
+            return WOLFSSL_FAILURE;
+        }
+        /* set internal key if not done */
+        if (!ctx->pkey->ecc->inSet) {
+            if (SetECKeyInternal(ctx->pkey->ecc) != WOLFSSL_SUCCESS) {
+                WOLFSSL_MSG("SetECKeyInternal failed");
+                return WOLFSSL_FAILURE;
+            }
+        }
+        if (!ctx->peerKey->ecc->exSet || !ctx->peerKey->ecc->pub_key->internal) {
+            if (SetECKeyExternal(ctx->peerKey->ecc) != WOLFSSL_SUCCESS) {
+                WOLFSSL_MSG("SetECKeyExternal failed");
+                return WOLFSSL_FAILURE;
+            }
+        }
+        if (!(len = wc_ecc_size((ecc_key*)ctx->pkey->ecc->internal))) {
+            return WOLFSSL_FAILURE;
+        }
+        if (key) {
+            word32 len32 = (word32)len;
+            if (*keylen < len32) {
+                WOLFSSL_MSG("buffer too short");
+                return WOLFSSL_FAILURE;
+            }
+            if (wc_ecc_shared_secret_ssh((ecc_key*)ctx->pkey->ecc->internal,
+                                         (ecc_point*)ctx->peerKey->ecc->pub_key->internal,
+                                         key, &len32) != MP_OKAY) {
+                WOLFSSL_MSG("wc_ecc_shared_secret failed");
+                return WOLFSSL_FAILURE;
+            }
+            len = (int)len32;
+        }
+        *keylen = (size_t)len;
+        break;
+#endif
+    default:
+        WOLFSSL_MSG("Unknown key type");
+        return WOLFSSL_FAILURE;
+    }
+    return WOLFSSL_SUCCESS;
+}
+#endif
 
 /* Uses the WOLFSSL_EVP_PKEY_CTX to decrypt a buffer.
  *
@@ -710,11 +1400,11 @@
  *
  * returns WOLFSSL_SUCCESS on success.
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_decrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
+int wolfSSL_EVP_PKEY_decrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
                      unsigned char *out, size_t *outlen,
                      const unsigned char *in, size_t inlen)
 {
-    int len;
+    int len = 0;
 
     if (ctx == NULL) return 0;
     WOLFSSL_ENTER("EVP_PKEY_decrypt");
@@ -753,7 +1443,7 @@
  *
  * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_decrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+int wolfSSL_EVP_PKEY_decrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx)
 {
     if (ctx == NULL) return WOLFSSL_FAILURE;
     WOLFSSL_ENTER("EVP_PKEY_decrypt_init");
@@ -781,11 +1471,11 @@
  *
  * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_encrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
+int wolfSSL_EVP_PKEY_encrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
                      unsigned char *out, size_t *outlen,
                      const unsigned char *in, size_t inlen)
 {
-    int len;
+    int len = 0;
     if (ctx == NULL) return WOLFSSL_FAILURE;
     WOLFSSL_ENTER("EVP_PKEY_encrypt");
     if (ctx->op != EVP_PKEY_OP_ENCRYPT) return WOLFSSL_FAILURE;
@@ -824,7 +1514,7 @@
  *
  * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_encrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+int wolfSSL_EVP_PKEY_encrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx)
 {
     if (ctx == NULL) return WOLFSSL_FAILURE;
     WOLFSSL_ENTER("EVP_PKEY_encrypt_init");
@@ -841,7 +1531,79 @@
     }
     return WOLFSSL_FAILURE;
 }
-
+/******************************************************************************
+* wolfSSL_EVP_PKEY_sign_init -  initializes a public key algorithm context for
+* a signing operation.
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns -2
+*/
+WOLFSSL_API int wolfSSL_EVP_PKEY_sign_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+    int ret = -2;
+
+    WOLFSSL_MSG("wolfSSL_EVP_PKEY_sign_init");
+    if (!ctx  || !ctx->pkey)
+        return ret;
+
+    switch (ctx->pkey->type) {
+        case EVP_PKEY_RSA:
+            ctx->op = EVP_PKEY_OP_SIGN;
+            ret = WOLFSSL_SUCCESS;
+            break;
+        case EVP_PKEY_EC:
+            WOLFSSL_MSG("not implemented");
+            FALL_THROUGH;
+        default:
+            ret = -2;
+    }
+    return ret;
+}
+/******************************************************************************
+* wolfSSL_EVP_PKEY_sign - performs a public key signing operation using ctx
+* The data to be signed should be hashed since the function does not hash the data.
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE
+*/
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_sign(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *sig,
+                        size_t *siglen, const unsigned char *tbs, size_t tbslen)
+{
+    int len = 0;
+
+    WOLFSSL_MSG("wolfSSL_EVP_PKEY_sign");
+
+    if (!ctx || ctx->op != EVP_PKEY_OP_SIGN || !ctx->pkey)
+        return WOLFSSL_FAILURE;
+
+    (void)sig;
+    (void)siglen;
+    (void)tbs;
+    (void)tbslen;
+    (void)len;
+
+    switch (ctx->pkey->type) {
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+    case EVP_PKEY_RSA:
+        len = wolfSSL_RSA_private_encrypt((int)tbslen, (unsigned char*)tbs, sig,
+              ctx->pkey->rsa, ctx->padding);
+        if (len < 0)
+            break;
+        else {
+            *siglen = len;
+            return WOLFSSL_SUCCESS;
+        }
+#endif /* NO_RSA */
+
+    case EVP_PKEY_EC:
+        WOLFSSL_MSG("not implemented");
+        FALL_THROUGH;
+    default:
+        break;
+    }
+    return WOLFSSL_FAILURE;
+}
 
 /* Get the size in bits for WOLFSSL_EVP_PKEY key
  *
@@ -849,7 +1611,7 @@
  *
  * returns the size in bits of key on success
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_bits(const WOLFSSL_EVP_PKEY *pkey)
+int wolfSSL_EVP_PKEY_bits(const WOLFSSL_EVP_PKEY *pkey)
 {
     int bytes;
 
@@ -860,6 +1622,70 @@
 }
 
 
+int wolfSSL_EVP_PKEY_keygen_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+    (void)ctx;
+    return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_EVP_PKEY_keygen(WOLFSSL_EVP_PKEY_CTX *ctx,
+  WOLFSSL_EVP_PKEY **ppkey)
+{
+    int ret = WOLFSSL_FAILURE;
+    int ownPkey = 0;
+    WOLFSSL_EVP_PKEY* pkey;
+
+    if (ctx == NULL || ppkey == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    pkey = *ppkey;
+    if (pkey == NULL) {
+        ownPkey = 1;
+        pkey = wolfSSL_EVP_PKEY_new();
+
+        if (pkey == NULL)
+            return ret;
+    }
+
+    switch (pkey->type) {
+#if !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \
+    !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+        case EVP_PKEY_RSA:
+            pkey->rsa = wolfSSL_RSA_generate_key(ctx->nbits, WC_RSA_EXPONENT,
+                NULL, NULL);
+            if (pkey->rsa) {
+                pkey->ownRsa = 1;
+                pkey->pkey_sz = wolfSSL_i2d_RSAPrivateKey(pkey->rsa,
+                        (unsigned char**)&pkey->pkey.ptr);
+                ret = WOLFSSL_SUCCESS;
+            }
+            break;
+#endif
+#ifdef HAVE_ECC
+        case EVP_PKEY_EC:
+            pkey->ecc = wolfSSL_EC_KEY_new();
+            if (pkey->ecc) {
+                ret = wolfSSL_EC_KEY_generate_key(pkey->ecc);
+                if (ret == WOLFSSL_SUCCESS) {
+                    pkey->ownEcc = 1;
+                }
+            }
+#endif
+        default:
+            break;
+    }
+
+    if (ret != WOLFSSL_SUCCESS && ownPkey) {
+        wolfSSL_EVP_PKEY_free(pkey);
+        pkey = NULL;
+    }
+
+    *ppkey = pkey;
+
+    return ret;
+}
+
 /* Get the size in bytes for WOLFSSL_EVP_PKEY key
  *
  * pkey WOLFSSL_EVP_PKEY structure to get key size of
@@ -867,16 +1693,16 @@
  * returns the size of a key on success which is the maximum size of a
  *         signature
  */
-WOLFSSL_API int wolfSSL_EVP_PKEY_size(WOLFSSL_EVP_PKEY *pkey)
+int wolfSSL_EVP_PKEY_size(WOLFSSL_EVP_PKEY *pkey)
 {
     if (pkey == NULL) return 0;
     WOLFSSL_ENTER("EVP_PKEY_size");
 
     switch (pkey->type) {
-#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+#ifndef NO_RSA
     case EVP_PKEY_RSA:
         return (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(pkey->rsa));
-#endif /* NO_RSA */
+#endif /* !NO_RSA */
 
 #ifdef HAVE_ECC
     case EVP_PKEY_EC:
@@ -893,6 +1719,69 @@
     return 0;
 }
 
+#ifndef NO_WOLFSSL_STUB
+WOLFSSL_API int wolfSSL_EVP_PKEY_missing_parameters(WOLFSSL_EVP_PKEY *pkey)
+{
+    (void)pkey;
+    /* not using missing params callback and returning zero to indicate success */
+    return 0;
+}
+#endif
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_cmp(const WOLFSSL_EVP_PKEY *a, const WOLFSSL_EVP_PKEY *b)
+{
+    int ret = -1; /* failure */
+    int a_sz = 0, b_sz = 0;
+
+    if (a == NULL || b == NULL)
+        return ret;
+
+    /* check its the same type of key */
+    if (a->type != b->type)
+        return ret;
+
+    /* get size based on key type */
+    switch (a->type) {
+#ifndef NO_RSA
+    case EVP_PKEY_RSA:
+        a_sz = (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(a->rsa));
+        b_sz = (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(b->rsa));
+        break;
+#endif /* !NO_RSA */
+#ifdef HAVE_ECC
+    case EVP_PKEY_EC:
+        if (a->ecc == NULL || a->ecc->internal == NULL ||
+            b->ecc == NULL || b->ecc->internal == NULL) {
+            return ret;
+        }
+        a_sz = wc_ecc_size((ecc_key*)(a->ecc->internal));
+        b_sz = wc_ecc_size((ecc_key*)(b->ecc->internal));
+        break;
+#endif /* HAVE_ECC */
+    default:
+        break;
+    } /* switch (a->type) */
+
+    /* check size */
+    if (a_sz <= 0 || b_sz <= 0 || a_sz != b_sz) {
+        return ret;
+    }
+
+    /* check public key size */
+    if (a->pkey_sz > 0 && b->pkey_sz > 0 && a->pkey_sz != b->pkey_sz) {
+        return ret;
+    }
+
+    /* check public key */
+    if (a->pkey.ptr && b->pkey.ptr) {
+        if (XMEMCMP(a->pkey.ptr, b->pkey.ptr, a->pkey_sz) != 0) {
+            return ret;
+        }
+    }
+    ret = 0; /* success */
+
+    return ret;
+}
 
 /* Initialize structure for signing
  *
@@ -901,13 +1790,22 @@
  *
  * returns WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_SignInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type)
+int wolfSSL_EVP_SignInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type)
 {
     if (ctx == NULL) return WOLFSSL_FAILURE;
     WOLFSSL_ENTER("EVP_SignInit");
     return wolfSSL_EVP_DigestInit(ctx,type);
 }
 
+WOLFSSL_API int wolfSSL_EVP_SignInit_ex(WOLFSSL_EVP_MD_CTX* ctx,
+                                     const WOLFSSL_EVP_MD* type,
+                                     WOLFSSL_ENGINE *impl)
+{
+    if (ctx == NULL) return WOLFSSL_FAILURE;
+    WOLFSSL_ENTER("EVP_SignInit");
+    return wolfSSL_EVP_DigestInit_ex(ctx,type,impl);
+}
+
 
 /* Update structure with data for signing
  *
@@ -917,45 +1815,69 @@
  *
  * returns WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_SignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len)
+int wolfSSL_EVP_SignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len)
 {
     if (ctx == NULL) return 0;
     WOLFSSL_ENTER("EVP_SignUpdate(");
     return wolfSSL_EVP_DigestUpdate(ctx, data, len);
 }
 
-/* macro gaurd because currently only used with RSA */
-#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
-/* Helper function for getting the NID value from md
- *
- * returns the NID value associated with md on success */
-static int md2nid(int md)
+static const struct s_ent {
+    const int macType;
+    const int nid;
+    const char *name;
+} md_tbl[] = {
+#ifndef NO_MD4
+    {WC_HASH_TYPE_MD4, NID_md4, "MD4"},
+#endif /* NO_MD4 */
+
+#ifndef NO_MD5
+    {WC_HASH_TYPE_MD5, NID_md5, "MD5"},
+#endif /* NO_MD5 */
+
+#ifndef NO_SHA
+    {WC_HASH_TYPE_SHA, NID_sha1, "SHA"},
+#endif /* NO_SHA */
+
+#ifdef WOLFSSL_SHA224
+    {WC_HASH_TYPE_SHA224, NID_sha224, "SHA224"},
+#endif /* WOLFSSL_SHA224 */
+#ifndef NO_SHA256
+    {WC_HASH_TYPE_SHA256, NID_sha256, "SHA256"},
+#endif
+
+#ifdef WOLFSSL_SHA384
+    {WC_HASH_TYPE_SHA384, NID_sha384, "SHA384"},
+#endif /* WOLFSSL_SHA384 */
+#ifdef WOLFSSL_SHA512
+    {WC_HASH_TYPE_SHA512, NID_sha512, "SHA512"},
+#endif /* WOLFSSL_SHA512 */
+#ifndef WOLFSSL_NOSHA3_224
+    {WC_HASH_TYPE_SHA3_224, NID_sha3_224, "SHA3_224"},
+#endif
+#ifndef WOLFSSL_NOSHA3_256
+    {WC_HASH_TYPE_SHA3_256, NID_sha3_256, "SHA3_256"},
+#endif
+    {WC_HASH_TYPE_SHA3_384, NID_sha3_384, "SHA3_384"},
+#ifndef WOLFSSL_NOSHA3_512
+    {WC_HASH_TYPE_SHA3_512, NID_sha3_512, "SHA3_512"},
+#endif
+    {0, 0, NULL}
+};
+
+static int wolfSSL_EVP_md2macType(const WOLFSSL_EVP_MD *md)
 {
-    const char * d;
-    d = (const char *)wolfSSL_EVP_get_md((const unsigned char)md);
-    if (XSTRNCMP(d, "SHA", 3) == 0) {
-        if (XSTRLEN(d) > 3) {
-            if (XSTRNCMP(d, "SHA256", 6) == 0) {
-                return NID_sha256;
+    const struct s_ent *ent ;
+
+    if (md != NULL) {
+        for( ent = md_tbl; ent->name != NULL; ent++) {
+            if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) {
+                return ent->macType;
             }
-            if (XSTRNCMP(d, "SHA384", 6) == 0) {
-                return NID_sha384;
-            }
-            if (XSTRNCMP(d, "SHA512", 6) == 0) {
-                return NID_sha512;
-            }
-            WOLFSSL_MSG("Unknown SHA type");
-            return 0;
-        }
-        else {
-            return NID_sha1;
-        }
-    }
-    if (XSTRNCMP(d, "MD5", 3) == 0)
-        return NID_md5;
-    return 0;
+        }
+    }
+    return WC_HASH_TYPE_NONE;
 }
-#endif /* NO_RSA */
 
 /* Finalize structure for signing
  *
@@ -966,7 +1888,7 @@
  *
  * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
  */
-WOLFSSL_API int wolfSSL_EVP_SignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sigret,
+int wolfSSL_EVP_SignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sigret,
                   unsigned int *siglen, WOLFSSL_EVP_PKEY *pkey)
 {
     unsigned int mdsize;
@@ -984,7 +1906,7 @@
     switch (pkey->type) {
 #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
     case EVP_PKEY_RSA: {
-        int nid = md2nid(ctx->macType);
+        int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
         if (nid < 0) break;
         return wolfSSL_RSA_sign(nid, md, mdsize, sigret,
                                 siglen, pkey->rsa);
@@ -1009,7 +1931,7 @@
  *
  * returns WOLFSSL_SUCCESS on success
  */
-WOLFSSL_API int wolfSSL_EVP_VerifyInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type)
+int wolfSSL_EVP_VerifyInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type)
 {
     if (ctx == NULL) return WOLFSSL_FAILURE;
     WOLFSSL_ENTER("EVP_VerifyInit");
@@ -1025,7 +1947,7 @@
  *
  * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
  */
-WOLFSSL_API int wolfSSL_EVP_VerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len)
+int wolfSSL_EVP_VerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len)
 {
     if (ctx == NULL) return WOLFSSL_FAILURE;
     WOLFSSL_ENTER("EVP_VerifyUpdate");
@@ -1042,7 +1964,7 @@
  *
  * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
  */
-WOLFSSL_API int wolfSSL_EVP_VerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
+int wolfSSL_EVP_VerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
         unsigned char*sig, unsigned int siglen, WOLFSSL_EVP_PKEY *pkey)
 {
     int ret;
@@ -1060,7 +1982,7 @@
     switch (pkey->type) {
 #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
     case EVP_PKEY_RSA: {
-        int nid = md2nid(ctx->macType);
+        int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
         if (nid < 0) break;
         return wolfSSL_RSA_verify(nid, md, mdsize, sig,
                 (unsigned int)siglen, pkey->rsa);
@@ -1077,7 +1999,7 @@
     return WOLFSSL_FAILURE;
 }
 
-WOLFSSL_API int wolfSSL_EVP_add_cipher(const WOLFSSL_EVP_CIPHER *cipher)
+int wolfSSL_EVP_add_cipher(const WOLFSSL_EVP_CIPHER *cipher)
 {
     (void)cipher;
     /* nothing to do */
@@ -1095,7 +2017,7 @@
     if (type != EVP_PKEY_HMAC || (key == NULL && keylen != 0))
         return NULL;
 
-    pkey = wolfSSL_PKEY_new();
+    pkey = wolfSSL_EVP_PKEY_new();
     if (pkey != NULL) {
         pkey->pkey.ptr = (char*)XMALLOC(keylen, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
         if (pkey->pkey.ptr == NULL && keylen > 0) {
@@ -1125,109 +2047,138 @@
 }
 
 
-int wolfSSL_EVP_DigestSignInit(WOLFSSL_EVP_MD_CTX *ctx,
-                               WOLFSSL_EVP_PKEY_CTX **pctx,
-                               const WOLFSSL_EVP_MD *type,
-                               WOLFSSL_ENGINE *e,
-                               WOLFSSL_EVP_PKEY *pkey)
+/* Initialize an EVP_DigestSign/Verify operation.
+ * Initialize a digest for RSA and ECC keys, or HMAC for HMAC key.
+ */
+static int wolfSSL_evp_digest_pk_init(WOLFSSL_EVP_MD_CTX *ctx,
+                                      WOLFSSL_EVP_PKEY_CTX **pctx,
+                                      const WOLFSSL_EVP_MD *type,
+                                      WOLFSSL_ENGINE *e,
+                                      WOLFSSL_EVP_PKEY *pkey)
 {
-    int hashType;
-    const unsigned char* key;
-    size_t keySz;
-
-    /* Unused parameters */
-    (void)pctx;
-    (void)e;
-
-    WOLFSSL_ENTER("EVP_DigestSignInit");
-
-    if (ctx == NULL || type == NULL || pkey == NULL)
-        return BAD_FUNC_ARG;
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    /* compile-time validation of ASYNC_CTX_SIZE */
-    typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ?
-                                                                    1 : -1];
-    (void)sizeof(async_test);
-#endif
-
-    if (XSTRNCMP(type, "SHA256", 6) == 0) {
-         hashType = WC_SHA256;
-    }
-#ifdef WOLFSSL_SHA224
-    else if (XSTRNCMP(type, "SHA224", 6) == 0) {
-         hashType = WC_SHA224;
-    }
-#endif
-#ifdef WOLFSSL_SHA384
-    else if (XSTRNCMP(type, "SHA384", 6) == 0) {
-         hashType = WC_SHA384;
-    }
-#endif
-#ifdef WOLFSSL_SHA512
-    else if (XSTRNCMP(type, "SHA512", 6) == 0) {
-         hashType = WC_SHA512;
-    }
-#endif
-#ifndef NO_MD5
-    else if (XSTRNCMP(type, "MD5", 3) == 0) {
-        hashType = WC_MD5;
-    }
-#endif
-#ifndef NO_SHA
-    /* has to be last since would pick or 224, 256, 384, or 512 too */
-    else if (XSTRNCMP(type, "SHA", 3) == 0) {
-         hashType = WC_SHA;
-    }
-#endif /* NO_SHA */
-    else
-         return BAD_FUNC_ARG;
-
-    key = wolfSSL_EVP_PKEY_get0_hmac(pkey, &keySz);
-
-    if (wc_HmacInit(&ctx->hash.hmac, NULL, INVALID_DEVID) != 0)
-        return WOLFSSL_FAILURE;
-
-    if (wc_HmacSetKey(&ctx->hash.hmac, hashType, key, (word32)keySz) != 0)
-        return WOLFSSL_FAILURE;
-
-    ctx->macType = NID_hmac & 0xFF;
+    if (pkey->type == EVP_PKEY_HMAC) {
+        int                  hashType;
+        const unsigned char* key;
+        size_t               keySz;
+
+        if (XSTRNCMP(type, "SHA256", 6) == 0) {
+            hashType = WC_SHA256;
+        }
+    #ifdef WOLFSSL_SHA224
+        else if (XSTRNCMP(type, "SHA224", 6) == 0) {
+            hashType = WC_SHA224;
+        }
+    #endif
+    #ifdef WOLFSSL_SHA384
+        else if (XSTRNCMP(type, "SHA384", 6) == 0) {
+            hashType = WC_SHA384;
+        }
+    #endif
+    #ifdef WOLFSSL_SHA512
+        else if (XSTRNCMP(type, "SHA512", 6) == 0) {
+            hashType = WC_SHA512;
+        }
+    #endif
+    #ifndef NO_MD5
+        else if (XSTRNCMP(type, "MD5", 3) == 0) {
+            hashType = WC_MD5;
+        }
+    #endif
+    #ifndef NO_SHA
+        /* has to be last since would pick or 224, 256, 384, or 512 too */
+        else if (XSTRNCMP(type, "SHA", 3) == 0) {
+             hashType = WC_SHA;
+        }
+    #endif /* NO_SHA */
+        else
+             return BAD_FUNC_ARG;
+
+        key = wolfSSL_EVP_PKEY_get0_hmac(pkey, &keySz);
+
+        if (wc_HmacInit(&ctx->hash.hmac, NULL, INVALID_DEVID) != 0)
+            return WOLFSSL_FAILURE;
+
+        if (wc_HmacSetKey(&ctx->hash.hmac, hashType, key, (word32)keySz) != 0)
+            return WOLFSSL_FAILURE;
+
+        ctx->macType = NID_hmac;
+    }
+    else {
+        int ret;
+
+        if (ctx->pctx == NULL) {
+            ctx->pctx = wolfSSL_EVP_PKEY_CTX_new(pkey, e);
+            if (ctx->pctx == NULL)
+                return WOLFSSL_FAILURE;
+        }
+
+        ret = wolfSSL_EVP_DigestInit(ctx, type);
+        if (ret == WOLFSSL_SUCCESS && pctx != NULL)
+            *pctx = ctx->pctx;
+        return ret;
+    }
 
     return WOLFSSL_SUCCESS;
 }
 
-
-int wolfSSL_EVP_DigestSignUpdate(WOLFSSL_EVP_MD_CTX *ctx,
-                                 const void *d, unsigned int cnt)
+/* Update an EVP_DigestSign/Verify operation.
+ * Update a digest for RSA and ECC keys, or HMAC for HMAC key.
+ */
+static int wolfssl_evp_digest_pk_update(WOLFSSL_EVP_MD_CTX *ctx,
+                                        const void *d, unsigned int cnt)
 {
-    WOLFSSL_ENTER("EVP_DigestSignFinal");
-
-    if (ctx->macType != (NID_hmac & 0xFF))
-        return WOLFSSL_FAILURE;
-
-    if (wc_HmacUpdate(&ctx->hash.hmac, (const byte *)d, cnt) != 0)
-        return WOLFSSL_FAILURE;
-
-    return WOLFSSL_SUCCESS;
+    if (ctx->pctx == NULL) {
+        if (ctx->macType != NID_hmac)
+            return WOLFSSL_FAILURE;
+
+        if (wc_HmacUpdate(&ctx->hash.hmac, (const byte *)d, cnt) != 0)
+            return WOLFSSL_FAILURE;
+
+        return WOLFSSL_SUCCESS;
+    }
+    else
+        return wolfSSL_EVP_DigestUpdate(ctx, d, cnt);
 }
 
-
-int wolfSSL_EVP_DigestSignFinal(WOLFSSL_EVP_MD_CTX *ctx,
-                                unsigned char *sig, size_t *siglen)
+/* Finalize an EVP_DigestSign/Verify operation - common part only.
+ * Finalize a digest for RSA and ECC keys, or HMAC for HMAC key.
+ * Copies the digest so that you can keep updating.
+ */
+static int wolfssl_evp_digest_pk_final(WOLFSSL_EVP_MD_CTX *ctx,
+                                       unsigned char *md, unsigned int* mdlen)
 {
-    unsigned char digest[WC_MAX_DIGEST_SIZE];
-    Hmac hmacCopy;
-    int hashLen, ret;
-
-    WOLFSSL_ENTER("EVP_DigestSignFinal");
-
-    if (ctx == NULL || siglen == NULL)
-        return WOLFSSL_FAILURE;
-
-    if (ctx->macType != (NID_hmac & 0xFF))
-        return WOLFSSL_FAILURE;
-
-    switch (ctx->hash.hmac.macType) {
+    int  ret;
+
+    if (ctx->pctx == NULL) {
+        Hmac hmacCopy;
+
+        if (ctx->macType != NID_hmac)
+            return WOLFSSL_FAILURE;
+
+        if (wolfSSL_HmacCopy(&hmacCopy, &ctx->hash.hmac) != WOLFSSL_SUCCESS)
+            return WOLFSSL_FAILURE;
+        ret = wc_HmacFinal(&hmacCopy, md) == 0;
+        wc_HmacFree(&hmacCopy);
+        return ret;
+    }
+    else {
+        WOLFSSL_EVP_MD_CTX ctxCopy;
+
+        if (wolfSSL_EVP_MD_CTX_copy_ex(&ctxCopy, ctx) != WOLFSSL_SUCCESS)
+            return WOLFSSL_FAILURE;
+
+        ret = wolfSSL_EVP_DigestFinal(&ctxCopy, md, mdlen);
+        wolfSSL_EVP_MD_CTX_cleanup(&ctxCopy);
+        return ret;
+    }
+}
+
+/* Get the length of the mac based on the digest algorithm. */
+static int wolfssl_mac_len(unsigned char macType)
+{
+    int hashLen;
+
+    switch (macType) {
     #ifndef NO_MD5
         case WC_MD5:
             hashLen = WC_MD5_DIGEST_SIZE;
@@ -1252,12 +2203,12 @@
             break;
     #endif /* !NO_SHA256 */
 
-    #ifdef WOLFSSL_SHA512
     #ifdef WOLFSSL_SHA384
         case WC_SHA384:
             hashLen = WC_SHA384_DIGEST_SIZE;
             break;
     #endif /* WOLFSSL_SHA384 */
+    #ifdef WOLFSSL_SHA512
         case WC_SHA512:
             hashLen = WC_SHA512_DIGEST_SIZE;
             break;
@@ -1270,30 +2221,301 @@
     #endif /* HAVE_BLAKE2 */
 
         default:
-            return 0;
-    }
-
-    if (sig == NULL) {
-        *siglen = hashLen;
-        return WOLFSSL_SUCCESS;
-    }
-
-    if ((int)(*siglen) > hashLen)
-        *siglen = hashLen;
-
-    XMEMCPY(&hmacCopy, &ctx->hash.hmac, sizeof(hmacCopy));
-    ret = wc_HmacFinal(&hmacCopy, digest) == 0;
-    if (ret == 1)
+            hashLen = 0;
+    }
+
+    return hashLen;
+}
+
+int wolfSSL_EVP_DigestSignInit(WOLFSSL_EVP_MD_CTX *ctx,
+                               WOLFSSL_EVP_PKEY_CTX **pctx,
+                               const WOLFSSL_EVP_MD *type,
+                               WOLFSSL_ENGINE *e,
+                               WOLFSSL_EVP_PKEY *pkey)
+{
+    WOLFSSL_ENTER("EVP_DigestSignInit");
+
+    if (ctx == NULL || type == NULL || pkey == NULL)
+        return BAD_FUNC_ARG;
+
+    return wolfSSL_evp_digest_pk_init(ctx, pctx, type, e, pkey);
+}
+
+
+int wolfSSL_EVP_DigestSignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *d,
+                                 unsigned int cnt)
+{
+    WOLFSSL_ENTER("EVP_DigestSignUpdate");
+
+    if (ctx == NULL || d == NULL)
+        return BAD_FUNC_ARG;
+
+    return wolfssl_evp_digest_pk_update(ctx, d, cnt);
+}
+
+int wolfSSL_EVP_DigestSignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sig,
+                                size_t *siglen)
+{
+    unsigned char digest[WC_MAX_DIGEST_SIZE];
+    unsigned int  hashLen;
+    int           ret = WOLFSSL_FAILURE;
+
+    WOLFSSL_ENTER("EVP_DigestSignFinal");
+
+    if (ctx == NULL || siglen == NULL)
+        return WOLFSSL_FAILURE;
+
+    /* Return the maximum size of the signaure when sig is NULL. */
+    if (ctx->pctx == NULL) {
+        if (ctx->macType != NID_hmac)
+            return WOLFSSL_FAILURE;
+
+        hashLen = wolfssl_mac_len(ctx->hash.hmac.macType);
+
+        if (sig == NULL) {
+            *siglen = hashLen;
+            return WOLFSSL_SUCCESS;
+        }
+    }
+#ifndef NO_RSA
+    else if (ctx->pctx->pkey->type == EVP_PKEY_RSA) {
+        if (sig == NULL) {
+            *siglen = wolfSSL_RSA_size(ctx->pctx->pkey->rsa);
+            return WOLFSSL_SUCCESS;
+        }
+    }
+#endif /* !NO_RSA */
+#ifdef HAVE_ECC
+    else if (ctx->pctx->pkey->type == EVP_PKEY_EC) {
+        if (sig == NULL) {
+            /* SEQ + INT + INT */
+            *siglen = ecc_sets[ctx->pctx->pkey->ecc->group->curve_idx].size * 2
+                    + 8;
+            return WOLFSSL_SUCCESS;
+        }
+    }
+#endif
+
+    if (wolfssl_evp_digest_pk_final(ctx, digest, &hashLen) <= 0)
+        return WOLFSSL_FAILURE;
+
+    if (ctx->pctx == NULL) {
+        /* Copy the HMAC result as signature. */
+        if ((unsigned int)(*siglen) > hashLen)
+            *siglen = hashLen;
+        /* May be a truncated signature. */
+
         XMEMCPY(sig, digest, *siglen);
-
-    ForceZero(&hmacCopy, sizeof(hmacCopy));
+        ret = WOLFSSL_SUCCESS;
+    }
+    else {
+        /* Sign the digest. */
+        switch (ctx->pctx->pkey->type) {
+    #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+        case EVP_PKEY_RSA: {
+            unsigned int sigSz;
+            int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
+            if (nid < 0)
+                break;
+            ret = wolfSSL_RSA_sign(nid, digest, hashLen, sig, &sigSz,
+                                   ctx->pctx->pkey->rsa);
+            if (ret >= 0)
+                *siglen = sigSz;
+            break;
+        }
+    #endif /* NO_RSA */
+
+    #ifdef HAVE_ECC
+        case EVP_PKEY_EC: {
+            WOLFSSL_ECDSA_SIG *ecdsaSig;
+            ecdsaSig = wolfSSL_ECDSA_do_sign(digest, hashLen,
+                                             ctx->pctx->pkey->ecc);
+            if (ecdsaSig == NULL)
+                break;
+            *siglen = wolfSSL_i2d_ECDSA_SIG(ecdsaSig, &sig);
+            wolfSSL_ECDSA_SIG_free(ecdsaSig);
+            ret = WOLFSSL_SUCCESS;
+            break;
+        }
+    #endif
+        default:
+            break;
+        }
+    }
+
     ForceZero(digest, sizeof(digest));
     return ret;
 }
-#endif /* WOLFSSL_EVP_INCLUDED */
-
-#if defined(OPENSSL_EXTRA) && !defined(NO_PWDBASED) && !defined(NO_SHA)
-WOLFSSL_API int wolfSSL_PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
+int wolfSSL_EVP_DigestVerifyInit(WOLFSSL_EVP_MD_CTX *ctx,
+                                 WOLFSSL_EVP_PKEY_CTX **pctx,
+                                 const WOLFSSL_EVP_MD *type,
+                                 WOLFSSL_ENGINE *e,
+                                 WOLFSSL_EVP_PKEY *pkey)
+{
+    WOLFSSL_ENTER("EVP_DigestVerifyInit");
+
+    if (ctx == NULL || type == NULL || pkey == NULL)
+        return BAD_FUNC_ARG;
+
+    return wolfSSL_evp_digest_pk_init(ctx, pctx, type, e, pkey);
+}
+
+
+int wolfSSL_EVP_DigestVerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *d,
+                                   size_t cnt)
+{
+    WOLFSSL_ENTER("EVP_DigestVerifyUpdate");
+
+    if (ctx == NULL || d == NULL)
+        return BAD_FUNC_ARG;
+
+    return wolfssl_evp_digest_pk_update(ctx, d, (unsigned int)cnt);
+}
+
+
+int wolfSSL_EVP_DigestVerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
+                                  const unsigned char *sig, size_t siglen)
+{
+    unsigned char digest[WC_MAX_DIGEST_SIZE];
+    unsigned int  hashLen;
+
+    WOLFSSL_ENTER("EVP_DigestVerifyFinal");
+
+    if (ctx == NULL || sig == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (ctx->pctx == NULL) {
+        if (ctx->macType != NID_hmac)
+            return WOLFSSL_FAILURE;
+
+        hashLen = wolfssl_mac_len(ctx->hash.hmac.macType);
+
+        if (siglen > hashLen)
+            return WOLFSSL_FAILURE;
+        /* May be a truncated signature. */
+    }
+
+    if (wolfssl_evp_digest_pk_final(ctx, digest, &hashLen) <= 0)
+        return WOLFSSL_FAILURE;
+
+    if (ctx->pctx == NULL) {
+        /* Check HMAC result matches the signature. */
+        if (XMEMCMP(sig, digest, siglen) == 0)
+            return WOLFSSL_SUCCESS;
+        return WOLFSSL_FAILURE;
+    }
+    else {
+        /* Verify the signature with the digest. */
+        switch (ctx->pctx->pkey->type) {
+    #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+        case EVP_PKEY_RSA: {
+            int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
+            if (nid < 0)
+                return WOLFSSL_FAILURE;
+            return wolfSSL_RSA_verify(nid, digest, hashLen, sig,
+                                      (unsigned int)siglen,
+                                      ctx->pctx->pkey->rsa);
+        }
+    #endif /* NO_RSA */
+
+    #ifdef HAVE_ECC
+        case EVP_PKEY_EC: {
+            int ret;
+            WOLFSSL_ECDSA_SIG *ecdsaSig;
+            ecdsaSig = wolfSSL_d2i_ECDSA_SIG(NULL, &sig, (long)siglen);
+            if (ecdsaSig == NULL)
+                return WOLFSSL_FAILURE;
+            ret = wolfSSL_ECDSA_do_verify(digest, hashLen, ecdsaSig,
+                                          ctx->pctx->pkey->ecc);
+            wolfSSL_ECDSA_SIG_free(ecdsaSig);
+            return ret;
+        }
+    #endif
+        default:
+            break;
+        }
+    }
+
+    return WOLFSSL_FAILURE;
+}
+
+
+#ifdef WOLFSSL_APACHE_HTTPD
+#if !defined(USE_WINDOWS_API) && !defined(MICROCHIP_PIC32)
+    #include <termios.h>
+#endif
+
+#ifndef XGETPASSWD
+    static int XGETPASSWD(char* buf, int bufSz) {
+        int ret = WOLFSSL_SUCCESS;
+
+        /* turn off echo for passwords */
+    #ifdef USE_WINDOWS_API
+        DWORD originalTerm;
+        DWORD newTerm;
+        CONSOLE_SCREEN_BUFFER_INFO screenOrig;
+        HANDLE stdinHandle = GetStdHandle(STD_INPUT_HANDLE);
+        if (GetConsoleMode(stdinHandle, &originalTerm) == 0) {
+            WOLFSSL_MSG("Couldn't get the original terminal settings");
+            return WOLFSSL_FAILURE;
+        }
+        newTerm = originalTerm;
+        newTerm &= ~ENABLE_ECHO_INPUT;
+        if (SetConsoleMode(stdinHandle, newTerm) == 0) {
+            WOLFSSL_MSG("Couldn't turn off echo");
+            return WOLFSSL_FAILURE;
+        }
+    #else
+        struct termios originalTerm;
+        struct termios newTerm;
+        if (tcgetattr(STDIN_FILENO, &originalTerm) != 0) {
+            WOLFSSL_MSG("Couldn't get the original terminal settings");
+            return WOLFSSL_FAILURE;
+        }
+        XMEMCPY(&newTerm, &originalTerm, sizeof(struct termios));
+
+        newTerm.c_lflag &= ~ECHO;
+        newTerm.c_lflag |= (ICANON | ECHONL);
+        if (tcsetattr(STDIN_FILENO, TCSANOW, &newTerm) != 0) {
+            WOLFSSL_MSG("Couldn't turn off echo");
+            return WOLFSSL_FAILURE;
+        }
+    #endif
+
+        if (XFGETS(buf, bufSz, stdin) == NULL) {
+            ret = WOLFSSL_FAILURE;
+        }
+
+        /* restore default echo */
+    #ifdef USE_WINDOWS_API
+        if (SetConsoleMode(stdinHandle, originalTerm) == 0) {
+            WOLFSSL_MSG("Couldn't restore the terminal settings");
+            return WOLFSSL_FAILURE;
+        }
+    #else
+        if (tcsetattr(STDIN_FILENO, TCSANOW, &originalTerm) != 0) {
+            WOLFSSL_MSG("Couldn't restore the terminal settings");
+            return WOLFSSL_FAILURE;
+        }
+    #endif
+        return ret;
+    }
+#endif
+
+/* returns 0 on success and -2 or -1 on failure */
+int wolfSSL_EVP_read_pw_string(char* buf, int bufSz, const char* banner, int v)
+{
+    printf("%s", banner);
+    if (XGETPASSWD(buf, bufSz) == WOLFSSL_FAILURE) {
+        return -1;
+    }
+    (void)v; /* fgets always sanity checks size of input vs buffer */
+    return 0;
+}
+#endif /* WOLFSSL_APACHE_HTTPD */
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+int wolfSSL_PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
                                                const unsigned char *salt,
                                                int saltlen, int iter,
                                                int keylen, unsigned char *out)
@@ -1304,16 +2526,4071 @@
     if (pass == NULL) {
         passlen = 0;
         pass = nostring;
+    }
+    else if (passlen == -1) {
+        passlen = (int)XSTRLEN(pass);
+    }
+
+    ret = wc_PBKDF2((byte*)out, (byte*)pass, passlen, (byte*)salt, saltlen,
+                    iter, keylen, WC_SHA);
+    if (ret == 0)
+        return WOLFSSL_SUCCESS;
+    else
+        return WOLFSSL_FAILURE;
+}
+#endif /* !NO_PWDBASED !NO_SHA*/
+
+#if !defined(NO_PWDBASED)
+WOLFSSL_API int wolfSSL_PKCS5_PBKDF2_HMAC(const char *pass, int passlen,
+                                           const unsigned char *salt,
+                                           int saltlen, int iter,
+                                           const WOLFSSL_EVP_MD *digest,
+                                           int keylen, unsigned char *out)
+{
+    const char *nostring = "";
+    int ret = 0;
+
+    if (pass == NULL) {
+        passlen = 0;
+        pass = nostring;
     } else if (passlen == -1) {
         passlen = (int)XSTRLEN(pass);
     }
 
     ret = wc_PBKDF2((byte*)out, (byte*)pass, passlen, (byte*)salt, saltlen,
-                    iter, keylen, WC_SHA);
+                    iter, keylen, wolfSSL_EVP_md2macType(digest));
     if (ret == 0)
         return WOLFSSL_SUCCESS;
     else
         return WOLFSSL_FAILURE;
 }
-#endif /* OPENSSL_EXTRA && !NO_PWDBASED !NO_SHA*/
+#endif /* !NO_PWDBASED */
+
+static const struct cipher{
+        unsigned char type;
+        const char *name;
+        int nid;
+} cipher_tbl[] = {
+
+#ifndef NO_AES
+    #ifdef WOLFSSL_AES_128
+    {AES_128_CBC_TYPE, "AES-128-CBC", NID_aes_128_cbc},
+    #endif
+    #ifdef WOLFSSL_AES_192
+    {AES_192_CBC_TYPE, "AES-192-CBC", NID_aes_192_cbc},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_CBC_TYPE, "AES-256-CBC", NID_aes_256_cbc},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+    {AES_128_CFB1_TYPE, "AES-128-CFB1", NID_aes_128_cfb1},
+    #endif
+    #ifdef WOLFSSL_AES_192
+    {AES_192_CFB1_TYPE, "AES-192-CFB1", NID_aes_192_cfb1},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_CFB1_TYPE, "AES-256-CFB1", NID_aes_256_cfb1},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+    {AES_128_CFB8_TYPE, "AES-128-CFB8", NID_aes_128_cfb8},
+    #endif
+    #ifdef WOLFSSL_AES_192
+    {AES_192_CFB8_TYPE, "AES-192-CFB8", NID_aes_192_cfb8},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_CFB8_TYPE, "AES-256-CFB8", NID_aes_256_cfb8},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+    {AES_128_CFB128_TYPE, "AES-128-CFB128", NID_aes_128_cfb128},
+    #endif
+    #ifdef WOLFSSL_AES_192
+    {AES_192_CFB128_TYPE, "AES-192-CFB128", NID_aes_192_cfb128},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_CFB128_TYPE, "AES-256-CFB128", NID_aes_256_cfb128},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+    {AES_128_OFB_TYPE, "AES-128-OFB", NID_aes_128_ofb},
+    #endif
+    #ifdef WOLFSSL_AES_192
+    {AES_192_OFB_TYPE, "AES-192-OFB", NID_aes_192_ofb},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_OFB_TYPE, "AES-256-OFB", NID_aes_256_ofb},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+    {AES_128_XTS_TYPE, "AES-128-XTS", NID_aes_128_xts},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_XTS_TYPE, "AES-256-XTS", NID_aes_256_xts},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+    {AES_128_GCM_TYPE, "AES-128-GCM", NID_aes_128_gcm},
+    #endif
+    #ifdef WOLFSSL_AES_192
+    {AES_192_GCM_TYPE, "AES-192-GCM", NID_aes_192_gcm},
+    #endif
+    #ifdef WOLFSSL_AES_256
+    {AES_256_GCM_TYPE, "AES-256-GCM", NID_aes_256_gcm},
+    #endif
+    #ifdef WOLFSSL_AES_128
+        {AES_128_CTR_TYPE, "AES-128-CTR", NID_aes_128_ctr},
+    #endif
+    #ifdef WOLFSSL_AES_192
+        {AES_192_CTR_TYPE, "AES-192-CTR", NID_aes_192_ctr},
+    #endif
+    #ifdef WOLFSSL_AES_256
+        {AES_256_CTR_TYPE, "AES-256-CTR", NID_aes_256_ctr},
+    #endif
+
+    #ifdef WOLFSSL_AES_128
+        {AES_128_ECB_TYPE, "AES-128-ECB", NID_aes_128_ecb},
+    #endif
+    #ifdef WOLFSSL_AES_192
+        {AES_192_ECB_TYPE, "AES-192-ECB", NID_aes_192_ecb},
+    #endif
+    #ifdef WOLFSSL_AES_256
+        {AES_256_ECB_TYPE, "AES-256-ECB", NID_aes_256_ecb},
+    #endif
+
+#endif
+
+#ifndef NO_DES3
+    {DES_CBC_TYPE, "DES-CBC", NID_des_cbc},
+    {DES_ECB_TYPE, "DES-ECB", NID_des_ecb},
+
+    {DES_EDE3_CBC_TYPE, "DES-EDE3-CBC", NID_des_ede3_cbc},
+    {DES_EDE3_ECB_TYPE, "DES-EDE3-ECB", NID_des_ede3_ecb},
+#endif
+
+#ifndef NO_RC4
+    {ARC4_TYPE, "ARC4", NID_undef},
+#endif
+
+#ifdef HAVE_IDEA
+    {IDEA_CBC_TYPE, "IDEA-CBC", NID_idea_cbc},
+#endif
+    { 0, NULL, 0}
+};
+
+/* returns cipher using provided ctx type */
+const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_CIPHER_CTX_cipher(
+    const WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+    const struct cipher* c;
+
+    if (!ctx || !ctx->cipherType) {
+        return NULL;
+    }
+
+    for (c = cipher_tbl; c->type != 0; c++) {
+        if (ctx->cipherType == c->type) {
+            return wolfSSL_EVP_get_cipherbyname(c->name);
+        }
+    }
+
+    return NULL;
+}
+
+int wolfSSL_EVP_CIPHER_nid(const WOLFSSL_EVP_CIPHER *cipher)
+{
+    const struct cipher* c;
+
+    if (!cipher) {
+        return 0;
+    }
+
+    for (c = cipher_tbl; c->type != 0; c++) {
+        if (XSTRNCMP(cipher, c->name, XSTRLEN(c->name)+1) == 0) {
+            return c->nid;
+        }
+    }
+
+    return 0;
+}
+
+const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbyname(const char *name)
+{
+
+    static const struct alias {
+        const char *name;
+        const char *alias;
+    } alias_tbl[] =
+    {
+#ifndef NO_DES3
+        {"DES-CBC", "DES"},
+        {"DES-CBC", "des"},
+        {"DES-ECB", "DES-ECB"},
+        {"DES-ECB", "des-ecb"},
+        {"DES-EDE3-CBC", "DES3"},
+        {"DES-EDE3-CBC", "des3"},
+        {"DES-EDE3-ECB", "DES-EDE3"},
+        {"DES-EDE3-ECB", "des-ede3"},
+        {"DES-EDE3-ECB", "des-ede3-ecb"},
+#endif
+#ifdef HAVE_IDEA
+        {"IDEA-CBC", "IDEA"},
+        {"IDEA-CBC", "idea"},
+#endif
+#ifndef NO_AES
+    #ifdef HAVE_AES_CBC
+        #ifdef WOLFSSL_AES_128
+        {"AES-128-CBC", "AES128-CBC"},
+        {"AES-128-CBC", "aes128-cbc"},
+        #endif
+        #ifdef WOLFSSL_AES_192
+        {"AES-192-CBC", "AES192-CBC"},
+        {"AES-192-CBC", "aes192-cbc"},
+        #endif
+        #ifdef WOLFSSL_AES_256
+        {"AES-256-CBC", "AES256-CBC"},
+        {"AES-256-CBC", "aes256-cbc"},
+        #endif
+    #endif
+    #ifdef WOLFSSL_AES_128
+        {"AES-128-ECB", "AES128-ECB"},
+        {"AES-128-ECB", "aes128-ecb"},
+    #endif
+    #ifdef WOLFSSL_AES_192
+        {"AES-192-ECB", "AES192-ECB"},
+        {"AES-192-ECB", "aes192-ecb"},
+    #endif
+    #ifdef WOLFSSL_AES_256
+        {"AES-256-ECB", "AES256-ECB"},
+    #endif
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+        {"AES-128-GCM", "aes-128-gcm"},
+        {"AES-128-GCM", "id-aes128-GCM"},
+        #endif
+        #ifdef WOLFSSL_AES_192
+        {"AES-192-GCM", "aes-192-gcm"},
+        {"AES-192-GCM", "id-aes192-GCM"},
+        #endif
+        #ifdef WOLFSSL_AES_256
+        {"AES-256-GCM", "aes-256-gcm"},
+        {"AES-256-GCM", "id-aes256-GCM"},
+        #endif
+    #endif
+#endif
+#ifndef NO_RC4
+        {"ARC4", "RC4"},
+#endif
+        { NULL, NULL}
+    };
+
+    const struct cipher *ent;
+    const struct alias  *al;
+
+    WOLFSSL_ENTER("EVP_get_cipherbyname");
+
+    for( al = alias_tbl; al->name != NULL; al++)
+        if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) {
+            name = al->name;
+            break;
+        }
+
+    for( ent = cipher_tbl; ent->name != NULL; ent++)
+        if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) {
+            return (WOLFSSL_EVP_CIPHER *)ent->name;
+        }
+
+    return NULL;
+}
+
+/*
+ * return an EVP_CIPHER structure when cipher NID is passed.
+ *
+ * id  cipher NID
+ *
+ * return WOLFSSL_EVP_CIPHER
+*/
+const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbynid(int id)
+{
+    WOLFSSL_ENTER("EVP_get_cipherbynid");
+
+    switch(id) {
+
+#ifndef NO_AES
+    #ifdef HAVE_AES_CBC
+        #ifdef WOLFSSL_AES_128
+        case NID_aes_128_cbc:
+            return wolfSSL_EVP_aes_128_cbc();
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case NID_aes_192_cbc:
+            return wolfSSL_EVP_aes_192_cbc();
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case NID_aes_256_cbc:
+            return wolfSSL_EVP_aes_256_cbc();
+        #endif
+    #endif
+    #ifdef WOLFSSL_AES_COUNTER
+        #ifdef WOLFSSL_AES_128
+        case NID_aes_128_ctr:
+            return wolfSSL_EVP_aes_128_ctr();
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case NID_aes_192_ctr:
+            return wolfSSL_EVP_aes_192_ctr();
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case NID_aes_256_ctr:
+            return wolfSSL_EVP_aes_256_ctr();
+        #endif
+    #endif /* WOLFSSL_AES_COUNTER */
+    #ifdef HAVE_AES_ECB
+        #ifdef WOLFSSL_AES_128
+        case NID_aes_128_ecb:
+            return wolfSSL_EVP_aes_128_ecb();
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case NID_aes_192_ecb:
+            return wolfSSL_EVP_aes_192_ecb();
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case NID_aes_256_ecb:
+            return wolfSSL_EVP_aes_256_ecb();
+        #endif
+    #endif /* HAVE_AES_ECB */
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+        case NID_aes_128_gcm:
+            return wolfSSL_EVP_aes_128_gcm();
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case NID_aes_192_gcm:
+            return wolfSSL_EVP_aes_192_gcm();
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case NID_aes_256_gcm:
+            return wolfSSL_EVP_aes_256_gcm();
+        #endif
+    #endif
+#endif
+
+#ifndef NO_DES3
+        case NID_des_cbc:
+            return wolfSSL_EVP_des_cbc();
+#ifdef WOLFSSL_DES_ECB
+        case NID_des_ecb:
+            return wolfSSL_EVP_des_ecb();
+#endif
+        case NID_des_ede3_cbc:
+            return wolfSSL_EVP_des_ede3_cbc();
+#ifdef WOLFSSL_DES_ECB
+        case NID_des_ede3_ecb:
+            return wolfSSL_EVP_des_ede3_ecb();
+#endif
+#endif /*NO_DES3*/
+
+#ifdef HAVE_IDEA
+        case NID_idea_cbc:
+            return wolfSSL_EVP_idea_cbc();
+#endif
+
+        default:
+            WOLFSSL_MSG("Bad cipher id value");
+    }
+
+    return NULL;
+}
+
+void wolfSSL_EVP_init(void)
+{
+#ifndef NO_AES
+    #ifdef HAVE_AES_CBC
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_CBC = (char *)EVP_get_cipherbyname("AES-128-CBC");
+        #endif
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_CBC = (char *)EVP_get_cipherbyname("AES-192-CBC");
+        #endif
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_CBC = (char *)EVP_get_cipherbyname("AES-256-CBC");
+        #endif
+    #endif /* HAVE_AES_CBC */
+
+    #ifdef WOLFSSL_AES_CFB
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_CFB1 = (char *)EVP_get_cipherbyname("AES-128-CFB1");
+        #endif
+
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_CFB1 = (char *)EVP_get_cipherbyname("AES-192-CFB1");
+        #endif
+
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_CFB1 = (char *)EVP_get_cipherbyname("AES-256-CFB1");
+        #endif
+
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_CFB8 = (char *)EVP_get_cipherbyname("AES-128-CFB8");
+        #endif
+
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_CFB8 = (char *)EVP_get_cipherbyname("AES-192-CFB8");
+        #endif
+
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_CFB8 = (char *)EVP_get_cipherbyname("AES-256-CFB8");
+        #endif
+
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_CFB128 = (char *)EVP_get_cipherbyname("AES-128-CFB128");
+        #endif
+
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_CFB128 = (char *)EVP_get_cipherbyname("AES-192-CFB128");
+        #endif
+
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_CFB128 = (char *)EVP_get_cipherbyname("AES-256-CFB128");
+        #endif
+    #endif /* WOLFSSL_AES_CFB */
+
+    #ifdef WOLFSSL_AES_OFB
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_OFB = (char *)EVP_get_cipherbyname("AES-128-OFB");
+        #endif
+
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_OFB = (char *)EVP_get_cipherbyname("AES-192-OFB");
+        #endif
+
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_OFB = (char *)EVP_get_cipherbyname("AES-256-OFB");
+        #endif
+    #endif /* WOLFSSL_AES_OFB */
+
+    #ifdef WOLFSSL_AES_XTS
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_XTS = (char *)EVP_get_cipherbyname("AES-128-XTS");
+        #endif
+
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_XTS = (char *)EVP_get_cipherbyname("AES-256-XTS");
+        #endif
+    #endif /* WOLFSSL_AES_XTS */
+
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_GCM = (char *)EVP_get_cipherbyname("AES-128-GCM");
+        #endif
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_GCM = (char *)EVP_get_cipherbyname("AES-192-GCM");
+        #endif
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_GCM = (char *)EVP_get_cipherbyname("AES-256-GCM");
+        #endif
+    #endif /* HAVE_AESGCM*/
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_CTR = (char *)EVP_get_cipherbyname("AES-128-CTR");
+        #endif
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_CTR = (char *)EVP_get_cipherbyname("AES-192-CTR");
+        #endif
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_CTR = (char *)EVP_get_cipherbyname("AES-256-CTR");
+        #endif
+
+        #ifdef WOLFSSL_AES_128
+        EVP_AES_128_ECB = (char *)EVP_get_cipherbyname("AES-128-ECB");
+        #endif
+        #ifdef WOLFSSL_AES_192
+        EVP_AES_192_ECB = (char *)EVP_get_cipherbyname("AES-192-ECB");
+        #endif
+        #ifdef WOLFSSL_AES_256
+        EVP_AES_256_ECB = (char *)EVP_get_cipherbyname("AES-256-ECB");
+        #endif
+#endif /* ifndef NO_AES*/
+
+#ifndef NO_DES3
+    EVP_DES_CBC = (char *)EVP_get_cipherbyname("DES-CBC");
+    EVP_DES_ECB = (char *)EVP_get_cipherbyname("DES-ECB");
+
+    EVP_DES_EDE3_CBC = (char *)EVP_get_cipherbyname("DES-EDE3-CBC");
+    EVP_DES_EDE3_ECB = (char *)EVP_get_cipherbyname("DES-EDE3-ECB");
+#endif
+
+#ifdef HAVE_IDEA
+    EVP_IDEA_CBC = (char *)EVP_get_cipherbyname("IDEA-CBC");
+#endif
+}
+
+#if !defined(NO_PWDBASED)
+int wolfSSL_EVP_get_hashinfo(const WOLFSSL_EVP_MD* evp,
+    int* pHash, int* pHashSz)
+{
+    enum wc_HashType hash = WC_HASH_TYPE_NONE;
+    int hashSz;
+
+    if (XSTRLEN(evp) < 3) {
+        /* do not try comparing strings if size is too small */
+        return WOLFSSL_FAILURE;
+    }
+
+    if (XSTRNCMP("SHA", evp, 3) == 0) {
+        if (XSTRLEN(evp) > 3) {
+        #ifndef NO_SHA256
+            if (XSTRNCMP("SHA256", evp, 6) == 0) {
+                hash = WC_HASH_TYPE_SHA256;
+            }
+            else
+        #endif
+        #ifdef WOLFSSL_SHA384
+            if (XSTRNCMP("SHA384", evp, 6) == 0) {
+                hash = WC_HASH_TYPE_SHA384;
+            }
+            else
+        #endif
+        #ifdef WOLFSSL_SHA512
+            if (XSTRNCMP("SHA512", evp, 6) == 0) {
+                hash = WC_HASH_TYPE_SHA512;
+            }
+            else
+        #endif
+            {
+                WOLFSSL_MSG("Unknown SHA hash");
+            }
+        }
+        else {
+            hash = WC_HASH_TYPE_SHA;
+        }
+    }
+#ifdef WOLFSSL_MD2
+    else if (XSTRNCMP("MD2", evp, 3) == 0) {
+        hash = WC_HASH_TYPE_MD2;
+    }
+#endif
+#ifndef NO_MD4
+    else if (XSTRNCMP("MD4", evp, 3) == 0) {
+        hash = WC_HASH_TYPE_MD4;
+    }
+#endif
+#ifndef NO_MD5
+    else if (XSTRNCMP("MD5", evp, 3) == 0) {
+        hash = WC_HASH_TYPE_MD5;
+    }
+#endif
+
+    if (pHash)
+        *pHash = hash;
+
+    hashSz = wc_HashGetDigestSize(hash);
+    if (pHashSz)
+        *pHashSz = hashSz;
+
+    if (hashSz < 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+
+/* this function makes the assumption that out buffer is big enough for digest*/
+int wolfSSL_EVP_Digest(const unsigned char* in, int inSz, unsigned char* out,
+                              unsigned int* outSz, const WOLFSSL_EVP_MD* evp,
+                              WOLFSSL_ENGINE* eng)
+{
+    int err;
+    int hashType = WC_HASH_TYPE_NONE;
+    int hashSz;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_Digest");
+    if (in == NULL || out == NULL || evp == NULL) {
+        WOLFSSL_MSG("Null argument passed in");
+        return WOLFSSL_FAILURE;
+    }
+
+    err = wolfSSL_EVP_get_hashinfo(evp, &hashType, &hashSz);
+    if (err != WOLFSSL_SUCCESS)
+        return err;
+
+    if (wc_Hash((enum wc_HashType)hashType, in, inSz, out, hashSz) != 0) {
+        return WOLFSSL_FAILURE;
+    }
+
+    if (outSz != NULL)
+        *outSz = hashSz;
+
+    (void)eng;
+    return WOLFSSL_SUCCESS;
+}
+#endif
+
+const WOLFSSL_EVP_MD *wolfSSL_EVP_get_digestbyname(const char *name)
+{
+    static const struct alias {
+        const char *name;
+        const char *alias;
+    } alias_tbl[] =
+    {
+        {"MD4", "ssl3-md4"},
+        {"MD5", "ssl3-md5"},
+        {"SHA", "ssl3-sha1"},
+        {"SHA", "SHA1"},
+        { NULL, NULL}
+    };
+
+    const struct alias  *al;
+    const struct s_ent *ent;
+
+
+    for (al = alias_tbl; al->name != NULL; al++)
+        if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) {
+            name = al->name;
+            break;
+        }
+
+    for (ent = md_tbl; ent->name != NULL; ent++)
+        if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) {
+            return (EVP_MD *)ent->name;
+        }
+    return NULL;
+}
+
+int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
+{
+    const struct s_ent *ent ;
+    WOLFSSL_ENTER("EVP_MD_type");
+    for( ent = md_tbl; ent->name != NULL; ent++){
+        if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) {
+            return ent->nid;
+        }
+    }
+    return 0;
+}
+
+#ifndef NO_MD4
+
+    /* return a pointer to MD4 EVP type */
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_md4(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_md4");
+        return EVP_get_digestbyname("MD4");
+    }
+
+#endif /* !NO_MD4 */
+
+
+#ifndef NO_MD5
+
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_md5(void)
+    {
+        WOLFSSL_ENTER("EVP_md5");
+        return EVP_get_digestbyname("MD5");
+    }
+
+#endif /* !NO_MD5 */
+
+
+#ifndef NO_WOLFSSL_STUB
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_mdc2(void)
+    {
+        WOLFSSL_STUB("EVP_mdc2");
+        return NULL;
+    }
+#endif
+
+#ifndef NO_SHA
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha1(void)
+    {
+        WOLFSSL_ENTER("EVP_sha1");
+        return EVP_get_digestbyname("SHA");
+    }
+#endif /* NO_SHA */
+
+#ifdef WOLFSSL_SHA224
+
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha224(void)
+    {
+        WOLFSSL_ENTER("EVP_sha224");
+        return EVP_get_digestbyname("SHA224");
+    }
+
+#endif /* WOLFSSL_SHA224 */
+
+
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha256(void)
+    {
+        WOLFSSL_ENTER("EVP_sha256");
+        return EVP_get_digestbyname("SHA256");
+    }
+
+#ifdef WOLFSSL_SHA384
+
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha384(void)
+    {
+        WOLFSSL_ENTER("EVP_sha384");
+        return EVP_get_digestbyname("SHA384");
+    }
+
+#endif /* WOLFSSL_SHA384 */
+
+#ifdef WOLFSSL_SHA512
+
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha512(void)
+    {
+        WOLFSSL_ENTER("EVP_sha512");
+        return EVP_get_digestbyname("SHA512");
+    }
+
+#endif /* WOLFSSL_SHA512 */
+
+#ifdef WOLFSSL_SHA3
+#ifndef WOLFSSL_NOSHA3_224
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_224(void)
+    {
+        WOLFSSL_ENTER("EVP_sha3_224");
+        return EVP_get_digestbyname("SHA3_224");
+    }
+#endif /* WOLFSSL_NOSHA3_224 */
+
+
+#ifndef WOLFSSL_NOSHA3_256
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_256(void)
+    {
+        WOLFSSL_ENTER("EVP_sha3_256");
+        return EVP_get_digestbyname("SHA3_256");
+    }
+#endif /* WOLFSSL_NOSHA3_256 */
+
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_384(void)
+    {
+        WOLFSSL_ENTER("EVP_sha3_384");
+        return EVP_get_digestbyname("SHA3_384");
+    }
+
+#ifndef WOLFSSL_NOSHA3_512
+    const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_512(void)
+    {
+        WOLFSSL_ENTER("EVP_sha3_512");
+        return EVP_get_digestbyname("SHA3_512");
+    }
+#endif /* WOLFSSL_NOSHA3_512 */
+#endif /* WOLFSSL_SHA3 */
+
+    WOLFSSL_EVP_MD_CTX *wolfSSL_EVP_MD_CTX_new(void)
+    {
+        WOLFSSL_EVP_MD_CTX* ctx;
+        WOLFSSL_ENTER("EVP_MD_CTX_new");
+        ctx = (WOLFSSL_EVP_MD_CTX*)XMALLOC(sizeof *ctx, NULL,
+                                                       DYNAMIC_TYPE_OPENSSL);
+        if (ctx){
+            wolfSSL_EVP_MD_CTX_init(ctx);
+        }
+        return ctx;
+    }
+
+    WOLFSSL_API void wolfSSL_EVP_MD_CTX_free(WOLFSSL_EVP_MD_CTX *ctx)
+    {
+        if (ctx) {
+            WOLFSSL_ENTER("EVP_MD_CTX_free");
+                wolfSSL_EVP_MD_CTX_cleanup(ctx);
+                XFREE(ctx, NULL, DYNAMIC_TYPE_OPENSSL);
+            }
+    }
+
+    /* returns the NID of message digest used by the ctx */
+    int wolfSSL_EVP_MD_CTX_type(const WOLFSSL_EVP_MD_CTX *ctx) {
+        const struct s_ent *ent;
+
+        WOLFSSL_ENTER("EVP_MD_CTX_type");
+
+        if (ctx) {
+            for(ent = md_tbl; ent->name != NULL; ent++) {
+                if (ctx->macType == ent->macType) {
+                    return ent->nid;
+                }
+            }
+            /* Return whatever we got */
+            return ctx->macType;
+        }
+        return 0;
+    }
+
+
+    /* returns WOLFSSL_SUCCESS on success */
+    int wolfSSL_EVP_MD_CTX_copy(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in)
+    {
+        return wolfSSL_EVP_MD_CTX_copy_ex(out, in);
+    }
+
+    /* returns digest size */
+    int wolfSSL_EVP_MD_CTX_size(const WOLFSSL_EVP_MD_CTX *ctx) {
+        return(wolfSSL_EVP_MD_size(wolfSSL_EVP_MD_CTX_md(ctx)));
+    }
+    /* returns block size */
+    int wolfSSL_EVP_MD_CTX_block_size(const WOLFSSL_EVP_MD_CTX *ctx) {
+        return(wolfSSL_EVP_MD_block_size(wolfSSL_EVP_MD_CTX_md(ctx)));
+    }
+
+    /* Deep copy of EVP_MD hasher
+     * return WOLFSSL_SUCCESS on success */
+    static int wolfSSL_EVP_MD_Copy_Hasher(WOLFSSL_EVP_MD_CTX* des,
+            const WOLFSSL_EVP_MD_CTX* src)
+    {
+        if (src->macType == NID_hmac) {
+            wolfSSL_HmacCopy(&des->hash.hmac, (Hmac*)&src->hash.hmac);
+        }
+        else {
+            switch (src->macType) {
+            #ifndef NO_MD5
+                case WC_HASH_TYPE_MD5:
+                    wc_Md5Copy((wc_Md5*)&src->hash.digest,
+                            (wc_Md5*)&des->hash.digest);
+                    break;
+            #endif /* !NO_MD5 */
+
+            #ifndef NO_SHA
+                case WC_HASH_TYPE_SHA:
+                    wc_ShaCopy((wc_Sha*)&src->hash.digest,
+                            (wc_Sha*)&des->hash.digest);
+                    break;
+            #endif /* !NO_SHA */
+
+            #ifdef WOLFSSL_SHA224
+                case WC_HASH_TYPE_SHA224:
+                    wc_Sha224Copy((wc_Sha224*)&src->hash.digest,
+                            (wc_Sha224*)&des->hash.digest);
+                    break;
+            #endif /* WOLFSSL_SHA224 */
+
+            #ifndef NO_SHA256
+                case WC_HASH_TYPE_SHA256:
+                    wc_Sha256Copy((wc_Sha256*)&src->hash.digest,
+                            (wc_Sha256*)&des->hash.digest);
+                    break;
+            #endif /* !NO_SHA256 */
+
+            #ifdef WOLFSSL_SHA384
+                case WC_HASH_TYPE_SHA384:
+                    wc_Sha384Copy((wc_Sha384*)&src->hash.digest,
+                            (wc_Sha384*)&des->hash.digest);
+                    break;
+            #endif /* WOLFSSL_SHA384 */
+            #ifdef WOLFSSL_SHA512
+                case WC_HASH_TYPE_SHA512:
+                    wc_Sha512Copy((wc_Sha512*)&src->hash.digest,
+                        (wc_Sha512*)&des->hash.digest);
+                    break;
+            #endif /* WOLFSSL_SHA512 */
+        #ifdef WOLFSSL_SHA3
+            #ifndef WOLFSSL_NOSHA3_224
+                case WC_HASH_TYPE_SHA3_224:
+                    wc_Sha3_224_Copy((wc_Sha3*)&src->hash.digest,
+                            (wc_Sha3*)&des->hash.digest);
+                    break;
+            #endif
+
+            #ifndef WOLFSSL_NOSHA3_256
+                case WC_HASH_TYPE_SHA3_256:
+                    wc_Sha3_256_Copy((wc_Sha3*)&src->hash.digest,
+                            (wc_Sha3*)&des->hash.digest);
+                    break;
+            #endif
+
+                case WC_HASH_TYPE_SHA3_384:
+                    wc_Sha3_384_Copy((wc_Sha3*)&src->hash.digest,
+                            (wc_Sha3*)&des->hash.digest);
+                    break;
+
+            #ifndef WOLFSSL_NOSHA3_512
+                case WC_HASH_TYPE_SHA3_512:
+                    wc_Sha3_512_Copy((wc_Sha3*)&src->hash.digest,
+                        (wc_Sha3*)&des->hash.digest);
+                    break;
+            #endif
+        #endif
+                default:
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* copies structure in to the structure out
+     *
+     * returns WOLFSSL_SUCCESS on success */
+    int wolfSSL_EVP_MD_CTX_copy_ex(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in)
+    {
+        if ((out == NULL) || (in == NULL)) return WOLFSSL_FAILURE;
+        WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_copy_ex");
+        XMEMCPY(out, in, sizeof(WOLFSSL_EVP_MD_CTX));
+        if (in->pctx != NULL) {
+            out->pctx = wolfSSL_EVP_PKEY_CTX_new(in->pctx->pkey, NULL);
+            if (out->pctx == NULL)
+                return WOLFSSL_FAILURE;
+        }
+        return wolfSSL_EVP_MD_Copy_Hasher(out, (WOLFSSL_EVP_MD_CTX*)in);
+    }
+
+    void wolfSSL_EVP_MD_CTX_init(WOLFSSL_EVP_MD_CTX* ctx)
+    {
+        WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_init");
+        XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_MD_CTX));
+    }
+
+    const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx)
+    {
+        const struct s_ent *ent;
+        if (ctx == NULL)
+            return NULL;
+        WOLFSSL_ENTER("EVP_MD_CTX_md");
+        for(ent = md_tbl; ent->name != NULL; ent++) {
+            if(ctx->macType == ent->macType) {
+                return (const WOLFSSL_EVP_MD *)ent->name;
+            }
+        }
+        return (WOLFSSL_EVP_MD *)NULL;
+    }
+
+    #ifndef NO_AES
+
+    #ifdef HAVE_AES_CBC
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cbc");
+        if (EVP_AES_128_CBC == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_CBC;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cbc");
+        if (EVP_AES_192_CBC == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_CBC;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cbc");
+        if (EVP_AES_256_CBC == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_CBC;
+    }
+    #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AES_CBC */
+
+    #ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb1(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb1");
+        if (EVP_AES_128_CFB1 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_CFB1;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb1(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb1");
+        if (EVP_AES_192_CFB1 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_CFB1;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb1(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb1");
+        if (EVP_AES_256_CFB1 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_CFB1;
+    }
+    #endif /* WOLFSSL_AES_256 */
+
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb8(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb8");
+        if (EVP_AES_128_CFB8 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_CFB8;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb8(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb8");
+        if (EVP_AES_192_CFB8 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_CFB8;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb8(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb8");
+        if (EVP_AES_256_CFB8 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_CFB8;
+    }
+    #endif /* WOLFSSL_AES_256 */
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb128(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb128");
+        if (EVP_AES_128_CFB128 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_CFB128;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb128(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb128");
+        if (EVP_AES_192_CFB128 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_CFB128;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb128(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb128");
+        if (EVP_AES_256_CFB128 == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_CFB128;
+    }
+    #endif /* WOLFSSL_AES_256 */
+    #endif /* WOLFSSL_AES_CFB */
+
+    #ifdef WOLFSSL_AES_OFB
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ofb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ofb");
+        if (EVP_AES_128_OFB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_OFB;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ofb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ofb");
+        if (EVP_AES_192_OFB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_OFB;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ofb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ofb");
+        if (EVP_AES_256_OFB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_OFB;
+    }
+    #endif /* WOLFSSL_AES_256 */
+    #endif /* WOLFSSL_AES_OFB */
+
+    #ifdef WOLFSSL_AES_XTS
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_xts(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_xts");
+        if (EVP_AES_128_XTS == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_XTS;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_xts(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_xts");
+        if (EVP_AES_256_XTS == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_XTS;
+    }
+    #endif /* WOLFSSL_AES_256 */
+    #endif /* WOLFSSL_AES_XTS */
+
+    #ifdef HAVE_AESGCM
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_gcm(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_gcm");
+        if (EVP_AES_128_GCM == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_GCM;
+    }
+    #endif /* WOLFSSL_GCM_128 */
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_gcm(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_gcm");
+        if (EVP_AES_192_GCM == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_GCM;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_gcm(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_gcm");
+        if (EVP_AES_256_GCM == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_GCM;
+    }
+    #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AESGCM */
+
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ctr(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ctr");
+        if (EVP_AES_128_CTR == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_CTR;
+    }
+    #endif /* WOLFSSL_AES_2128 */
+
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ctr(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ctr");
+        if (EVP_AES_192_CTR == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_CTR;
+    }
+    #endif /* WOLFSSL_AES_192 */
+
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ctr(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ctr");
+        if (EVP_AES_256_CTR == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_CTR;
+    }
+    #endif /* WOLFSSL_AES_256 */
+
+    #ifdef WOLFSSL_AES_128
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ecb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ecb");
+        if (EVP_AES_128_ECB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_128_ECB;
+    }
+    #endif /* WOLFSSL_AES_128 */
+
+
+    #ifdef WOLFSSL_AES_192
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ecb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ecb");
+        if (EVP_AES_192_ECB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_192_ECB;
+    }
+    #endif /* WOLFSSL_AES_192*/
+
+
+    #ifdef WOLFSSL_AES_256
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ecb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ecb");
+        if (EVP_AES_256_ECB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_AES_256_ECB;
+    }
+    #endif /* WOLFSSL_AES_256 */
+    #endif /* NO_AES */
+
+#ifndef NO_DES3
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_des_cbc");
+        if (EVP_DES_CBC == NULL)
+            wolfSSL_EVP_init();
+        return EVP_DES_CBC;
+    }
+#ifdef WOLFSSL_DES_ECB
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ecb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_des_ecb");
+        if (EVP_DES_ECB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_DES_ECB;
+    }
+#endif
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_cbc");
+        if (EVP_DES_EDE3_CBC == NULL)
+            wolfSSL_EVP_init();
+        return EVP_DES_EDE3_CBC;
+    }
+#ifdef WOLFSSL_DES_ECB
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_ecb(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_ecb");
+        if (EVP_DES_EDE3_ECB == NULL)
+            wolfSSL_EVP_init();
+        return EVP_DES_EDE3_ECB;
+    }
+#endif
+#endif /* NO_DES3 */
+
+#ifndef NO_RC4
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc4(void)
+    {
+        static const char* type = "ARC4";
+        WOLFSSL_ENTER("wolfSSL_EVP_rc4");
+        return type;
+    }
+#endif
+
+#ifdef HAVE_IDEA
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_idea_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_idea_cbc");
+        if (EVP_IDEA_CBC == NULL)
+            wolfSSL_EVP_init();
+        return EVP_IDEA_CBC;
+    }
+#endif
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_enc_null(void)
+    {
+        static const char* type = "NULL";
+        WOLFSSL_ENTER("wolfSSL_EVP_enc_null");
+        return type;
+    }
+
+    int wolfSSL_EVP_MD_CTX_cleanup(WOLFSSL_EVP_MD_CTX* ctx)
+    {
+        WOLFSSL_ENTER("EVP_MD_CTX_cleanup");
+        if (ctx->pctx != NULL)
+            wolfSSL_EVP_PKEY_CTX_free(ctx->pctx);
+
+        if (ctx->macType == NID_hmac) {
+            wc_HmacFree(&ctx->hash.hmac);
+        }
+        else {
+            switch (ctx->macType) {
+            #ifndef NO_MD5
+                case WC_HASH_TYPE_MD5:
+                    wc_Md5Free((wc_Md5*)&ctx->hash.digest);
+                    break;
+            #endif /* !NO_MD5 */
+
+            #ifndef NO_SHA
+                case WC_HASH_TYPE_SHA:
+                    wc_ShaFree((wc_Sha*)&ctx->hash.digest);
+                    break;
+            #endif /* !NO_SHA */
+
+            #ifdef WOLFSSL_SHA224
+                case WC_HASH_TYPE_SHA224:
+                    wc_Sha224Free((wc_Sha224*)&ctx->hash.digest);
+                    break;
+            #endif /* WOLFSSL_SHA224 */
+
+            #ifndef NO_SHA256
+                case WC_HASH_TYPE_SHA256:
+                    wc_Sha256Free((wc_Sha256*)&ctx->hash.digest);
+                    break;
+            #endif /* !NO_SHA256 */
+
+            #ifdef WOLFSSL_SHA384
+                case WC_HASH_TYPE_SHA384:
+                    wc_Sha384Free((wc_Sha384*)&ctx->hash.digest);
+                    break;
+            #endif /* WOLFSSL_SHA384 */
+            #ifdef WOLFSSL_SHA512
+                case WC_HASH_TYPE_SHA512:
+                    wc_Sha512Free((wc_Sha512*)&ctx->hash.digest);
+                    break;
+            #endif /* WOLFSSL_SHA512 */
+        #ifdef WOLFSSL_SHA3
+            #ifndef WOLFSSL_NOSHA3_224
+                case WC_HASH_TYPE_SHA3_224:
+                    wc_Sha3_224_Free((wc_Sha3*)&ctx->hash.digest);
+                    break;
+            #endif
+
+            #ifndef WOLFSSL_NOSHA3_256
+                case WC_HASH_TYPE_SHA3_256:
+                    wc_Sha3_256_Free((wc_Sha3*)&ctx->hash.digest);
+                    break;
+            #endif
+
+                case WC_HASH_TYPE_SHA3_384:
+                    wc_Sha3_384_Free((wc_Sha3*)&ctx->hash.digest);
+                    break;
+
+            #ifndef WOLFSSL_NOSHA3_512
+                case WC_HASH_TYPE_SHA3_512:
+                    wc_Sha3_512_Free((wc_Sha3*)&ctx->hash.digest);
+                    break;
+            #endif
+        #endif
+                default:
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        ForceZero(ctx, sizeof(*ctx));
+        ctx->macType = WC_HASH_TYPE_NONE;
+        return 1;
+    }
+
+    void wolfSSL_EVP_CIPHER_CTX_init(WOLFSSL_EVP_CIPHER_CTX* ctx)
+    {
+        WOLFSSL_ENTER("EVP_CIPHER_CTX_init");
+        if (ctx) {
+            XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_CIPHER_CTX));
+            ctx->cipherType = WOLFSSL_EVP_CIPH_TYPE_INIT;   /* not yet initialized */
+            ctx->keyLen     = 0;
+            ctx->enc        = 1;      /* start in encrypt mode */
+        }
+    }
+
+#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST)
+    static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
+    {
+        int i;
+        for (i = ctrSz-1; i >= 0; i--) {
+            if (++ctr[i])
+                break;
+        }
+    }
+#endif
+
+    /* This function allows cipher specific parameters to be
+    determined and set. */
+    int wolfSSL_EVP_CIPHER_CTX_ctrl(WOLFSSL_EVP_CIPHER_CTX *ctx, int type, \
+                                    int arg, void *ptr)
+    {
+        int ret = WOLFSSL_FAILURE;
+#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) && !defined(WC_NO_RNG)
+        WC_RNG rng;
+#endif
+        if (ctx == NULL)
+            return WOLFSSL_FAILURE;
+
+        (void)arg;
+        (void)ptr;
+
+        WOLFSSL_ENTER("EVP_CIPHER_CTX_ctrl");
+
+        switch(type) {
+            case EVP_CTRL_INIT:
+                wolfSSL_EVP_CIPHER_CTX_init(ctx);
+                if(ctx)
+                    ret = WOLFSSL_SUCCESS;
+                break;
+            case EVP_CTRL_SET_KEY_LENGTH:
+                ret = wolfSSL_EVP_CIPHER_CTX_set_key_length(ctx, arg);
+                break;
+#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) && !defined(WC_NO_RNG)
+            case EVP_CTRL_GCM_SET_IVLEN:
+                if(arg <= 0 || arg > 16)
+                    return WOLFSSL_FAILURE;
+                ret = wolfSSL_EVP_CIPHER_CTX_set_iv_length(ctx, arg);
+                break;
+            case EVP_CTRL_AEAD_SET_IV_FIXED:
+                if (arg == -1) {
+                    /* arg == -1 copies ctx->ivSz from ptr */
+                    ret = wolfSSL_EVP_CIPHER_CTX_set_iv(ctx, (byte*)ptr, ctx->ivSz);
+                }
+                else {
+                    /*
+                     * Fixed field must be at least 4 bytes and invocation
+                     * field at least 8.
+                     */
+                    if ((arg < 4) || (ctx->ivSz - arg) < 8) {
+                        WOLFSSL_MSG("Fixed field or invocation field too short");
+                        ret = WOLFSSL_FAILURE;
+                        break;
+                    }
+                    if (wc_InitRng(&rng) != 0) {
+                        WOLFSSL_MSG("wc_InitRng failed");
+                        ret = WOLFSSL_FAILURE;
+                        break;
+                    }
+                    if (arg) {
+                        XMEMCPY(ctx->iv, ptr, arg);
+                    }
+                    if (wc_RNG_GenerateBlock(&rng, ctx->iv   + arg,
+                                                   ctx->ivSz - arg) != 0) {
+                        /* rng is freed immediately after if block so no need
+                         * to do it here
+                         */
+                        WOLFSSL_MSG("wc_RNG_GenerateBlock failed");
+                        ret = WOLFSSL_FAILURE;
+                    }
+
+                    if (wc_FreeRng(&rng) != 0) {
+                        WOLFSSL_MSG("wc_FreeRng failed");
+                        ret = WOLFSSL_FAILURE;
+                        break;
+                    }
+                }
+                break;
+#if !defined(_WIN32) && !defined(HAVE_FIPS)
+            case EVP_CTRL_GCM_IV_GEN:
+                if (ctx->cipher.aes.keylen == 0 || ctx->ivSz == 0) {
+                    ret = WOLFSSL_FAILURE;
+                    WOLFSSL_MSG("Key or IV not set");
+                    break;
+                }
+                if ((ret = wc_AesGcmSetExtIV(&ctx->cipher.aes, ctx->iv, ctx->ivSz)) != 0) {
+                    WOLFSSL_MSG("wc_AesGcmSetIV failed");
+                    ret = WOLFSSL_FAILURE;
+                }
+                /* OpenSSL increments the IV. Not sure why */
+                IncCtr(ctx->iv, ctx->ivSz);
+                break;
+#endif
+            case EVP_CTRL_AEAD_SET_TAG:
+                if(arg <= 0 || arg > 16 || (ptr == NULL))
+                    return WOLFSSL_FAILURE;
+
+                XMEMCPY(ctx->authTag, ptr, arg);
+                ctx->authTagSz = arg;
+                ret = WOLFSSL_SUCCESS;
+
+                break;
+            case EVP_CTRL_AEAD_GET_TAG:
+                if(arg <= 0 || arg > 16)
+                    return WOLFSSL_FAILURE;
+
+                XMEMCPY(ptr, ctx->authTag, arg);
+                ret = WOLFSSL_SUCCESS;
+                break;
+#endif /* HAVE_AESGCM && !HAVE_SELFTEST && !WC_NO_RNG */
+            default:
+                WOLFSSL_MSG("EVP_CIPHER_CTX_ctrl operation not yet handled");
+                ret = WOLFSSL_FAILURE;
+        }
+        return ret;
+    }
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_CIPHER_CTX_cleanup(WOLFSSL_EVP_CIPHER_CTX* ctx)
+    {
+        WOLFSSL_ENTER("EVP_CIPHER_CTX_cleanup");
+        if (ctx) {
+            ctx->cipherType = WOLFSSL_EVP_CIPH_TYPE_INIT;  /* not yet initialized  */
+            ctx->keyLen     = 0;
+        }
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* Permanent stub for Qt compilation. */
+    #if defined(WOLFSSL_QT) && !defined(NO_WOLFSSL_STUB)
+    const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc2_cbc(void)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_rc2_cbc");
+        WOLFSSL_STUB("EVP_rc2_cbc");
+        return NULL;
+    }
+    #endif
+
+#if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_PWDBASED)
+
+    int wolfSSL_EVP_BytesToKey(const WOLFSSL_EVP_CIPHER* type,
+                       const WOLFSSL_EVP_MD* md, const byte* salt,
+                       const byte* data, int sz, int count, byte* key, byte* iv)
+    {
+        int ret;
+        int hashType = WC_HASH_TYPE_NONE;
+    #ifdef WOLFSSL_SMALL_STACK
+        EncryptedInfo* info;
+    #else
+        EncryptedInfo  info[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL,
+                                       DYNAMIC_TYPE_ENCRYPTEDINFO);
+        if (info == NULL) {
+            WOLFSSL_MSG("malloc failed");
+            return WOLFSSL_FAILURE;
+        }
+    #endif
+
+        XMEMSET(info, 0, sizeof(EncryptedInfo));
+
+        ret = wc_EncryptedInfoGet(info, type);
+        if (ret < 0)
+            goto end;
+
+        if (data == NULL) {
+            ret = info->keySz;
+            goto end;
+        }
+
+        ret = wolfSSL_EVP_get_hashinfo(md, &hashType, NULL);
+        if (ret == WOLFSSL_FAILURE)
+            goto end;
+
+        ret = wc_PBKDF1_ex(key, info->keySz, iv, info->ivSz, data, sz, salt,
+                           EVP_SALT_SIZE, count, hashType, NULL);
+        if (ret == 0)
+            ret = info->keySz;
+
+    end:
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO);
+    #endif
+        if (ret < 0)
+            return 0; /* failure - for compatibility */
+
+        return ret;
+    }
+
+#endif /* WOLFSSL_ENCRYPTED_KEYS && !NO_PWDBASED */
+
+#ifndef NO_AES
+    static int   AesSetKey_ex(Aes* aes, const byte* key, word32 len,
+                              const byte* iv, int dir, int direct)
+    {
+        int ret;
+        /* wc_AesSetKey clear aes.reg if iv == NULL.
+           Keep IV for openSSL compatibility */
+        if (iv == NULL)
+            XMEMCPY((byte *)aes->tmp, (byte *)aes->reg, AES_BLOCK_SIZE);
+        if (direct) {
+        #if defined(WOLFSSL_AES_DIRECT)
+            ret = wc_AesSetKeyDirect(aes, key, len, iv, dir);
+        #else
+            ret = NOT_COMPILED_IN;
+        #endif
+        }
+        else {
+            ret = wc_AesSetKey(aes, key, len, iv, dir);
+        }
+        if (iv == NULL)
+            XMEMCPY((byte *)aes->reg, (byte *)aes->tmp, AES_BLOCK_SIZE);
+        return ret;
+    }
+#endif
+
+    /* return WOLFSSL_SUCCESS on ok, 0 on failure to match API compatibility */
+    int  wolfSSL_EVP_CipherInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
+                               const WOLFSSL_EVP_CIPHER* type, const byte* key,
+                               const byte* iv, int enc)
+    {
+        int ret = 0;
+        (void)key;
+        (void)iv;
+        (void)enc;
+
+        WOLFSSL_ENTER("wolfSSL_EVP_CipherInit");
+        if (ctx == NULL) {
+            WOLFSSL_MSG("no ctx");
+            return WOLFSSL_FAILURE;
+        }
+
+        if (type == NULL && ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT) {
+            WOLFSSL_MSG("no type set");
+            return WOLFSSL_FAILURE;
+        }
+        if (ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT){
+            /* only first EVP_CipherInit invoke. ctx->cipherType is set below */
+            XMEMSET(&ctx->cipher, 0, sizeof(ctx->cipher));
+            ctx->flags   = 0;
+        }
+        /* always clear buffer state */
+        ctx->bufUsed = 0;
+        ctx->lastUsed = 0;
+
+#ifdef HAVE_WOLFSSL_EVP_CIPHER_CTX_IV
+        if (!iv && ctx->ivSz) {
+            iv = ctx->iv;
+        }
+#endif
+
+#ifndef NO_AES
+    #ifdef HAVE_AES_CBC
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_CBC_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_CBC");
+            ctx->cipherType = AES_128_CBC_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = AES_BLOCK_SIZE;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                                ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_CBC_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_CBC");
+            ctx->cipherType = AES_192_CBC_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = AES_BLOCK_SIZE;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                                ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_CBC_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_CBC");
+            ctx->cipherType = AES_256_CBC_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = AES_BLOCK_SIZE;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                                ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0);
+                if (ret != 0){
+                    WOLFSSL_MSG("AesSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0){
+                    WOLFSSL_MSG("wc_AesSetIV() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AES_CBC */
+#if !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_GCM_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_GCM, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_GCM");
+            ctx->cipherType = AES_128_GCM_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_GCM_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = AES_BLOCK_SIZE;
+            ctx->authTagSz  = AES_BLOCK_SIZE;
+            ctx->ivSz       = GCM_NONCE_MID_SZ;
+
+            XMEMSET(ctx->authTag, 0, ctx->authTagSz);
+            if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) {
+                WOLFSSL_MSG("wc_AesGcmSetKey() failed");
+                return WOLFSSL_FAILURE;
+            }
+            if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) {
+                WOLFSSL_MSG("wc_AesGcmSetExtIV() failed");
+                return WOLFSSL_FAILURE;
+            }
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_GCM_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_GCM, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_GCM");
+            ctx->cipherType = AES_192_GCM_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_GCM_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = AES_BLOCK_SIZE;
+            ctx->authTagSz  = AES_BLOCK_SIZE;
+            ctx->ivSz       = GCM_NONCE_MID_SZ;
+
+            XMEMSET(ctx->authTag, 0, ctx->authTagSz);
+            if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) {
+                WOLFSSL_MSG("wc_AesGcmSetKey() failed");
+                return WOLFSSL_FAILURE;
+            }
+            if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) {
+                WOLFSSL_MSG("wc_AesGcmSetExtIV() failed");
+                return WOLFSSL_FAILURE;
+            }
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_GCM_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_GCM, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_GCM");
+            ctx->cipherType = AES_256_GCM_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_GCM_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = AES_BLOCK_SIZE;
+            ctx->authTagSz  = AES_BLOCK_SIZE;
+            ctx->ivSz       = GCM_NONCE_MID_SZ;
+
+            XMEMSET(ctx->authTag, 0, ctx->authTagSz);
+            if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) {
+                WOLFSSL_MSG("wc_AesGcmSetKey() failed");
+                return WOLFSSL_FAILURE;
+            }
+            if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) {
+                WOLFSSL_MSG("wc_AesGcmSetExtIV() failed");
+                return WOLFSSL_FAILURE;
+            }
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+        }
+        #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AESGCM */
+#endif /* !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) */
+#ifdef WOLFSSL_AES_COUNTER
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_CTR_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_CTR");
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->cipherType = AES_128_CTR_TYPE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CTR_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = NO_PADDING_BLOCK_SIZE;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+            ctx->cipher.aes.left = 0;
+#endif
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret =  AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                    AES_ENCRYPTION, 1);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_CTR_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_CTR");
+            ctx->cipherType = AES_192_CTR_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CTR_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = NO_PADDING_BLOCK_SIZE;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+            ctx->cipher.aes.left = 0;
+#endif
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret =  AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                      AES_ENCRYPTION, 1);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_CTR_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_CTR");
+            ctx->cipherType = AES_256_CTR_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CTR_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = NO_PADDING_BLOCK_SIZE;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+            ctx->cipher.aes.left = 0;
+#endif
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret =  AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                      AES_ENCRYPTION, 1);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+#endif /* WOLFSSL_AES_COUNTER */
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_ECB_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_ECB");
+            ctx->cipherType = AES_128_ECB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = AES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret =  AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL,
+                      ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1);
+            }
+            if (ret != 0)
+                return WOLFSSL_FAILURE;
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_ECB_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_ECB");
+            ctx->cipherType = AES_192_ECB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = AES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret =  AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL,
+                      ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1);
+            }
+            if (ret != 0)
+                return WOLFSSL_FAILURE;
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_ECB_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_ECB");
+            ctx->cipherType = AES_256_ECB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = AES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret =  AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL,
+                    ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1);
+            }
+            if (ret != 0)
+                return WOLFSSL_FAILURE;
+        }
+        #endif /* WOLFSSL_AES_256 */
+    #ifdef WOLFSSL_AES_CFB
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_CFB1_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_CFB1, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_CFB1");
+            ctx->cipherType = AES_128_CFB1_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                        AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_CFB1_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_CFB1, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_CFB1");
+            ctx->cipherType = AES_192_CFB1_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_CFB1_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_CFB1, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_CFB1");
+            ctx->cipherType = AES_256_CFB1_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0){
+                    WOLFSSL_MSG("AesSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0){
+                    WOLFSSL_MSG("wc_AesSetIV() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_CFB8_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_CFB8, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_CFB8");
+            ctx->cipherType = AES_128_CFB8_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                        AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_CFB8_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_CFB8, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_CFB8");
+            ctx->cipherType = AES_192_CFB8_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_CFB8_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_CFB8, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_CFB8");
+            ctx->cipherType = AES_256_CFB8_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0){
+                    WOLFSSL_MSG("AesSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0){
+                    WOLFSSL_MSG("wc_AesSetIV() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_CFB128_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_CFB128, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_CFB128");
+            ctx->cipherType = AES_128_CFB128_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                        AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_CFB128_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_CFB128, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_CFB128");
+            ctx->cipherType = AES_192_CFB128_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_CFB128_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_CFB128, EVP_AESCFB_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_CFB128");
+            ctx->cipherType = AES_256_CFB128_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CFB_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0){
+                    WOLFSSL_MSG("AesSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0){
+                    WOLFSSL_MSG("wc_AesSetIV() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AES_CFB */
+    #ifdef WOLFSSL_AES_OFB
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_OFB_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_OFB, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_OFB");
+            ctx->cipherType = AES_128_OFB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_OFB_MODE;
+            ctx->keyLen     = 16;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                        AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_192
+        if (ctx->cipherType == AES_192_OFB_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_192_OFB, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_192_OFB");
+            ctx->cipherType = AES_192_OFB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_OFB_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        #endif /* WOLFSSL_AES_192 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_OFB_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_OFB, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_OFB");
+            ctx->cipherType = AES_256_OFB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_OFB_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = 1;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+                            AES_ENCRYPTION, 0);
+                if (ret != 0){
+                    WOLFSSL_MSG("AesSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+            if (iv && key == NULL) {
+                ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+                if (ret != 0){
+                    WOLFSSL_MSG("wc_AesSetIV() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AES_OFB */
+    #ifdef WOLFSSL_AES_XTS
+        #ifdef WOLFSSL_AES_128
+        if (ctx->cipherType == AES_128_XTS_TYPE ||
+            (type && XSTRNCMP(type, EVP_AES_128_XTS, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_128_XTS");
+            ctx->cipherType = AES_128_XTS_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_XTS_MODE;
+            ctx->keyLen     = 32;
+            ctx->block_size = 1;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+
+            if (iv != NULL) {
+                if (iv != ctx->iv) /* Valgrind error when src == dst */
+                    XMEMCPY(ctx->iv, iv, ctx->ivSz);
+            }
+            else
+                XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE);
+
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = wc_AesXtsSetKey(&ctx->cipher.xts, key, ctx->keyLen,
+                    ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, NULL, 0);
+                if (ret != 0) {
+                    WOLFSSL_MSG("wc_AesXtsSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_128 */
+        #ifdef WOLFSSL_AES_256
+        if (ctx->cipherType == AES_256_XTS_TYPE ||
+                 (type && XSTRNCMP(type, EVP_AES_256_XTS, EVP_AES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_AES_256_XTS");
+            ctx->cipherType = AES_256_XTS_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_XTS_MODE;
+            ctx->keyLen     = 64;
+            ctx->block_size = 1;
+            ctx->ivSz       = AES_BLOCK_SIZE;
+
+            if (iv != NULL) {
+                if (iv != ctx->iv) /* Valgrind error when src == dst */
+                    XMEMCPY(ctx->iv, iv, ctx->ivSz);
+            }
+            else
+                XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE);
+
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = wc_AesXtsSetKey(&ctx->cipher.xts, key, ctx->keyLen,
+                        ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, NULL, 0);
+                if (ret != 0) {
+                    WOLFSSL_MSG("wc_AesXtsSetKey() failed");
+                    return WOLFSSL_FAILURE;
+                }
+            }
+        }
+        #endif /* WOLFSSL_AES_256 */
+    #endif /* HAVE_AES_XTS */
+#endif /* NO_AES */
+
+#ifndef NO_DES3
+        if (ctx->cipherType == DES_CBC_TYPE ||
+                 (type && XSTRNCMP(type, EVP_DES_CBC, EVP_DES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_DES_CBC");
+            ctx->cipherType = DES_CBC_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
+            ctx->keyLen     = 8;
+            ctx->block_size = DES_BLOCK_SIZE;
+            ctx->ivSz       = DES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = wc_Des_SetKey(&ctx->cipher.des, key, iv,
+                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+
+            if (iv && key == NULL)
+                wc_Des_SetIV(&ctx->cipher.des, iv);
+        }
+#ifdef WOLFSSL_DES_ECB
+        else if (ctx->cipherType == DES_ECB_TYPE ||
+                 (type && XSTRNCMP(type, EVP_DES_ECB, EVP_DES_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_DES_ECB");
+            ctx->cipherType = DES_ECB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
+            ctx->keyLen     = 8;
+            ctx->block_size = DES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                WOLFSSL_MSG("Des_SetKey");
+                ret = wc_Des_SetKey(&ctx->cipher.des, key, NULL,
+                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+#endif
+        else if (ctx->cipherType == DES_EDE3_CBC_TYPE ||
+                 (type &&
+                  XSTRNCMP(type, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_DES_EDE3_CBC");
+            ctx->cipherType = DES_EDE3_CBC_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = DES_BLOCK_SIZE;
+            ctx->ivSz       = DES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = wc_Des3_SetKey(&ctx->cipher.des3, key, iv,
+                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+
+            if (iv && key == NULL) {
+                ret = wc_Des3_SetIV(&ctx->cipher.des3, iv);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+        else if (ctx->cipherType == DES_EDE3_ECB_TYPE ||
+                 (type &&
+                  XSTRNCMP(type, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_DES_EDE3_ECB");
+            ctx->cipherType = DES_EDE3_ECB_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_ECB_MODE;
+            ctx->keyLen     = 24;
+            ctx->block_size = DES_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = wc_Des3_SetKey(&ctx->cipher.des3, key, NULL,
+                          ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+        }
+#endif /* NO_DES3 */
+#ifndef NO_RC4
+        if (ctx->cipherType == ARC4_TYPE || (type &&
+                                     XSTRNCMP(type, "ARC4", 4) == 0)) {
+            WOLFSSL_MSG("ARC4");
+            ctx->cipherType = ARC4_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_STREAM_CIPHER;
+            ctx->block_size = 1;
+            if (ctx->keyLen == 0)  /* user may have already set */
+                ctx->keyLen = 16;  /* default to 128 */
+            if (key)
+                wc_Arc4SetKey(&ctx->cipher.arc4, key, ctx->keyLen);
+        }
+#endif /* NO_RC4 */
+#ifdef HAVE_IDEA
+        if (ctx->cipherType == IDEA_CBC_TYPE ||
+                 (type && XSTRNCMP(type, EVP_IDEA_CBC, EVP_IDEA_SIZE) == 0)) {
+            WOLFSSL_MSG("EVP_IDEA_CBC");
+            ctx->cipherType = IDEA_CBC_TYPE;
+            ctx->flags     &= ~WOLFSSL_EVP_CIPH_MODE;
+            ctx->flags     |= WOLFSSL_EVP_CIPH_CBC_MODE;
+            ctx->keyLen     = IDEA_KEY_SIZE;
+            ctx->block_size = 8;
+            ctx->ivSz       = IDEA_BLOCK_SIZE;
+            if (enc == 0 || enc == 1)
+                ctx->enc = enc ? 1 : 0;
+            if (key) {
+                ret = wc_IdeaSetKey(&ctx->cipher.idea, key, (word16)ctx->keyLen,
+                                    iv, ctx->enc ? IDEA_ENCRYPTION :
+                                                   IDEA_DECRYPTION);
+                if (ret != 0)
+                    return WOLFSSL_FAILURE;
+            }
+
+            if (iv && key == NULL)
+                wc_IdeaSetIV(&ctx->cipher.idea, iv);
+        }
+#endif /* HAVE_IDEA */
+        if (ctx->cipherType == NULL_CIPHER_TYPE || (type &&
+                                     XSTRNCMP(type, "NULL", 4) == 0)) {
+            WOLFSSL_MSG("NULL cipher");
+            ctx->cipherType = NULL_CIPHER_TYPE;
+            ctx->keyLen = 0;
+            ctx->block_size = 16;
+        }
+#ifdef HAVE_WOLFSSL_EVP_CIPHER_CTX_IV
+        if (iv && iv != ctx->iv) {
+            if (wolfSSL_StoreExternalIV(ctx) != WOLFSSL_SUCCESS) {
+                return WOLFSSL_FAILURE;
+            }
+        }
+#endif
+        (void)ret; /* remove warning. If execution reaches this point, ret=0 */
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_CIPHER_CTX_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_key_length");
+        if (ctx)
+            return ctx->keyLen;
+
+        return 0;   /* failure */
+    }
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_CIPHER_CTX_set_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
+                                             int keylen)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_key_length");
+        if (ctx)
+            ctx->keyLen = keylen;
+        else
+            return 0;  /* failure */
+
+        return WOLFSSL_SUCCESS;
+    }
+#if defined(HAVE_AESGCM)
+    /* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE */
+    int wolfSSL_EVP_CIPHER_CTX_set_iv_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
+                                             int ivLen)
+    {
+        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_iv_length");
+        if (ctx)
+            ctx->ivSz= ivLen;
+        else
+            return WOLFSSL_FAILURE;
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE */
+    int wolfSSL_EVP_CIPHER_CTX_set_iv(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* iv,
+                                             int ivLen)
+    {
+        int expectedIvLen;
+
+        WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_iv_length");
+        if (!ctx || !iv || !ivLen) {
+            return WOLFSSL_FAILURE;
+        }
+
+        expectedIvLen = wolfSSL_EVP_CIPHER_CTX_iv_length(ctx);
+
+        if (expectedIvLen == 0 || expectedIvLen != ivLen) {
+            WOLFSSL_MSG("Wrong ivLen value");
+            return WOLFSSL_FAILURE;
+        }
+
+        return wolfSSL_EVP_CipherInit(ctx, NULL, NULL, iv, -1);
+    }
+#endif
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_Cipher(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* dst, byte* src,
+                          word32 len)
+    {
+        int ret = 0;
+        WOLFSSL_ENTER("wolfSSL_EVP_Cipher");
+
+        if (ctx == NULL || src == NULL ||
+            (dst == NULL &&
+             ctx->cipherType != AES_128_GCM_TYPE &&
+             ctx->cipherType != AES_192_GCM_TYPE &&
+             ctx->cipherType != AES_256_GCM_TYPE)) {
+            WOLFSSL_MSG("Bad function argument");
+            return 0;  /* failure */
+        }
+
+        if (ctx->cipherType == 0xff) {
+            WOLFSSL_MSG("no init");
+            return 0;  /* failure */
+        }
+
+        switch (ctx->cipherType) {
+
+#ifndef NO_AES
+#ifdef HAVE_AES_CBC
+            case AES_128_CBC_TYPE :
+            case AES_192_CBC_TYPE :
+            case AES_256_CBC_TYPE :
+                WOLFSSL_MSG("AES CBC");
+                if (ctx->enc)
+                    ret = wc_AesCbcEncrypt(&ctx->cipher.aes, dst, src, len);
+                else
+                    ret = wc_AesCbcDecrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+#endif /* HAVE_AES_CBC */
+
+#ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+            case AES_128_CFB1_TYPE:
+            case AES_192_CFB1_TYPE:
+            case AES_256_CFB1_TYPE:
+                WOLFSSL_MSG("AES CFB1");
+                if (ctx->enc)
+                    ret = wc_AesCfb1Encrypt(&ctx->cipher.aes, dst, src, len);
+                else
+                    ret = wc_AesCfb1Decrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+            case AES_128_CFB8_TYPE:
+            case AES_192_CFB8_TYPE:
+            case AES_256_CFB8_TYPE:
+                WOLFSSL_MSG("AES CFB8");
+                if (ctx->enc)
+                    ret = wc_AesCfb8Encrypt(&ctx->cipher.aes, dst, src, len);
+                else
+                    ret = wc_AesCfb8Decrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+            case AES_128_CFB128_TYPE:
+            case AES_192_CFB128_TYPE:
+            case AES_256_CFB128_TYPE:
+                WOLFSSL_MSG("AES CFB128");
+                if (ctx->enc)
+                    ret = wc_AesCfbEncrypt(&ctx->cipher.aes, dst, src, len);
+                else
+                    ret = wc_AesCfbDecrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+#endif /* WOLFSSL_AES_CFB */
+#if defined(WOLFSSL_AES_OFB)
+            case AES_128_OFB_TYPE:
+            case AES_192_OFB_TYPE:
+            case AES_256_OFB_TYPE:
+                WOLFSSL_MSG("AES OFB");
+                if (ctx->enc)
+                    ret = wc_AesOfbEncrypt(&ctx->cipher.aes, dst, src, len);
+                else
+                    ret = wc_AesOfbDecrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+#endif /* WOLFSSL_AES_OFB */
+#if defined(WOLFSSL_AES_XTS)
+            case AES_128_XTS_TYPE:
+            case AES_256_XTS_TYPE:
+                WOLFSSL_MSG("AES XTS");
+                if (ctx->enc)
+                    ret = wc_AesXtsEncrypt(&ctx->cipher.xts, dst, src, len,
+                            ctx->iv, ctx->ivSz);
+                else
+                    ret = wc_AesXtsDecrypt(&ctx->cipher.xts, dst, src, len,
+                            ctx->iv, ctx->ivSz);
+                break;
+#endif /* WOLFSSL_AES_XTS */
+
+#ifdef HAVE_AESGCM
+            case AES_128_GCM_TYPE :
+            case AES_192_GCM_TYPE :
+            case AES_256_GCM_TYPE :
+                WOLFSSL_MSG("AES GCM");
+                if (ctx->enc) {
+                    if (dst){
+                        /* encrypt confidential data*/
+                        ret = wc_AesGcmEncrypt(&ctx->cipher.aes, dst, src, len,
+                                  ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+                                  NULL, 0);
+                    }
+                    else {
+                        /* authenticated, non-confidential data */
+                        ret = wc_AesGcmEncrypt(&ctx->cipher.aes, NULL, NULL, 0,
+                                  ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+                                  src, len);
+                        /* Reset partial authTag error for AAD*/
+                        if (ret == AES_GCM_AUTH_E)
+                            ret = 0;
+                    }
+                }
+                else {
+                    if (dst){
+                        /* decrypt confidential data*/
+                        ret = wc_AesGcmDecrypt(&ctx->cipher.aes, dst, src, len,
+                                  ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+                                  NULL, 0);
+                    }
+                    else {
+                        /* authenticated, non-confidential data*/
+                        ret = wc_AesGcmDecrypt(&ctx->cipher.aes, NULL, NULL, 0,
+                                  ctx->iv, ctx->ivSz,
+                                  ctx->authTag, ctx->authTagSz,
+                                  src, len);
+                        /* Reset partial authTag error for AAD*/
+                        if (ret == AES_GCM_AUTH_E)
+                            ret = 0;
+                    }
+                }
+                break;
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AES_ECB
+            case AES_128_ECB_TYPE :
+            case AES_192_ECB_TYPE :
+            case AES_256_ECB_TYPE :
+                WOLFSSL_MSG("AES ECB");
+                if (ctx->enc)
+                    ret = wc_AesEcbEncrypt(&ctx->cipher.aes, dst, src, len);
+                else
+                    ret = wc_AesEcbDecrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+#endif
+#ifdef WOLFSSL_AES_COUNTER
+            case AES_128_CTR_TYPE :
+            case AES_192_CTR_TYPE :
+            case AES_256_CTR_TYPE :
+                    WOLFSSL_MSG("AES CTR");
+                    ret = wc_AesCtrEncrypt(&ctx->cipher.aes, dst, src, len);
+                break;
+#endif /* WOLFSSL_AES_COUNTER */
+#endif /* NO_AES */
+
+#ifndef NO_DES3
+            case DES_CBC_TYPE :
+                WOLFSSL_MSG("DES CBC");
+                if (ctx->enc)
+                    wc_Des_CbcEncrypt(&ctx->cipher.des, dst, src, len);
+                else
+                    wc_Des_CbcDecrypt(&ctx->cipher.des, dst, src, len);
+                break;
+            case DES_EDE3_CBC_TYPE :
+                WOLFSSL_MSG("DES3 CBC");
+                if (ctx->enc)
+                    ret = wc_Des3_CbcEncrypt(&ctx->cipher.des3, dst, src, len);
+                else
+                    ret = wc_Des3_CbcDecrypt(&ctx->cipher.des3, dst, src, len);
+                break;
+#ifdef WOLFSSL_DES_ECB
+            case DES_ECB_TYPE :
+                WOLFSSL_MSG("DES ECB");
+                ret = wc_Des_EcbEncrypt(&ctx->cipher.des, dst, src, len);
+                break;
+            case DES_EDE3_ECB_TYPE :
+                WOLFSSL_MSG("DES3 ECB");
+                ret = wc_Des3_EcbEncrypt(&ctx->cipher.des3, dst, src, len);
+                break;
+#endif
+#endif /* !NO_DES3 */
+
+#ifndef NO_RC4
+            case ARC4_TYPE :
+                WOLFSSL_MSG("ARC4");
+                wc_Arc4Process(&ctx->cipher.arc4, dst, src, len);
+                break;
+#endif
+
+#ifdef HAVE_IDEA
+            case IDEA_CBC_TYPE :
+                WOLFSSL_MSG("IDEA CBC");
+                if (ctx->enc)
+                    wc_IdeaCbcEncrypt(&ctx->cipher.idea, dst, src, len);
+                else
+                    wc_IdeaCbcDecrypt(&ctx->cipher.idea, dst, src, len);
+                break;
+#endif
+            case NULL_CIPHER_TYPE :
+                WOLFSSL_MSG("NULL CIPHER");
+                XMEMCPY(dst, src, len);
+                break;
+
+            default: {
+                WOLFSSL_MSG("bad type");
+                return 0;  /* failure */
+            }
+        }
+
+        if (ret != 0) {
+            WOLFSSL_MSG("wolfSSL_EVP_Cipher failure");
+            return 0;  /* failure */
+        }
+
+        if (wolfSSL_StoreExternalIV(ctx) != WOLFSSL_SUCCESS) {
+            return WOLFSSL_FAILURE;
+        }
+
+        WOLFSSL_MSG("wolfSSL_EVP_Cipher success");
+        return WOLFSSL_SUCCESS;  /* success */
+    }
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_DigestInit(WOLFSSL_EVP_MD_CTX* ctx,
+                               const WOLFSSL_EVP_MD* md)
+    {
+        int ret = WOLFSSL_SUCCESS;
+
+        WOLFSSL_ENTER("EVP_DigestInit");
+
+        if (ctx == NULL || md == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* compile-time validation of ASYNC_CTX_SIZE */
+        typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ?
+                                                                        1 : -1];
+        (void)sizeof(async_test);
+    #endif
+
+        /* Set to 0 if no match */
+        ctx->macType = wolfSSL_EVP_md2macType(md);
+        if (XSTRNCMP(md, "SHA256", 6) == 0) {
+             ret = wolfSSL_SHA256_Init(&(ctx->hash.digest.sha256));
+        }
+    #ifdef WOLFSSL_SHA224
+        else if (XSTRNCMP(md, "SHA224", 6) == 0) {
+             ret = wolfSSL_SHA224_Init(&(ctx->hash.digest.sha224));
+        }
+    #endif
+    #ifdef WOLFSSL_SHA384
+        else if (XSTRNCMP(md, "SHA384", 6) == 0) {
+             ret = wolfSSL_SHA384_Init(&(ctx->hash.digest.sha384));
+        }
+    #endif
+    #ifdef WOLFSSL_SHA512
+        else if (XSTRNCMP(md, "SHA512", 6) == 0) {
+             ret = wolfSSL_SHA512_Init(&(ctx->hash.digest.sha512));
+        }
+    #endif
+    #ifndef NO_MD4
+        else if (XSTRNCMP(md, "MD4", 3) == 0) {
+            wolfSSL_MD4_Init(&(ctx->hash.digest.md4));
+        }
+    #endif
+    #ifndef NO_MD5
+        else if (XSTRNCMP(md, "MD5", 3) == 0) {
+            ret = wolfSSL_MD5_Init(&(ctx->hash.digest.md5));
+        }
+    #endif
+#ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
+        else if (XSTRNCMP(md, "SHA3_224", 8) == 0) {
+             ret = wolfSSL_SHA3_224_Init(&(ctx->hash.digest.sha3_224));
+        }
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
+        else if (XSTRNCMP(md, "SHA3_256", 8) == 0) {
+             ret = wolfSSL_SHA3_256_Init(&(ctx->hash.digest.sha3_256));
+        }
+    #endif
+        else if (XSTRNCMP(md, "SHA3_384", 8) == 0) {
+             ret = wolfSSL_SHA3_384_Init(&(ctx->hash.digest.sha3_384));
+        }
+    #ifndef WOLFSSL_NOSHA3_512
+        else if (XSTRNCMP(md, "SHA3_512", 8) == 0) {
+             ret = wolfSSL_SHA3_512_Init(&(ctx->hash.digest.sha3_512));
+        }
+    #endif
+#endif
+    #ifndef NO_SHA
+        /* has to be last since would pick or 224, 256, 384, or 512 too */
+        else if (XSTRNCMP(md, "SHA", 3) == 0) {
+             ret = wolfSSL_SHA_Init(&(ctx->hash.digest.sha));
+        }
+    #endif /* NO_SHA */
+        else {
+             ctx->macType = WC_HASH_TYPE_NONE;
+             return BAD_FUNC_ARG;
+        }
+
+        return ret;
+    }
+
+    /* WOLFSSL_SUCCESS on ok, WOLFSSL_FAILURE on failure */
+    int wolfSSL_EVP_DigestUpdate(WOLFSSL_EVP_MD_CTX* ctx, const void* data,
+                                size_t sz)
+    {
+        int macType;
+
+        WOLFSSL_ENTER("EVP_DigestUpdate");
+
+        macType = wolfSSL_EVP_md2macType(EVP_MD_CTX_md(ctx));
+        switch (macType) {
+#ifndef NO_MD4
+            case WC_HASH_TYPE_MD4:
+                wolfSSL_MD4_Update((MD4_CTX*)&ctx->hash, data,
+                                  (unsigned long)sz);
+                break;
+#endif
+#ifndef NO_MD5
+            case WC_HASH_TYPE_MD5:
+                wolfSSL_MD5_Update((MD5_CTX*)&ctx->hash, data,
+                                  (unsigned long)sz);
+                break;
+#endif
+#ifndef NO_SHA
+            case WC_HASH_TYPE_SHA:
+                wolfSSL_SHA_Update((SHA_CTX*)&ctx->hash, data,
+                                  (unsigned long)sz);
+                break;
+#endif
+#ifdef WOLFSSL_SHA224
+            case WC_HASH_TYPE_SHA224:
+                wolfSSL_SHA224_Update((SHA224_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+#endif
+#ifndef NO_SHA256
+            case WC_HASH_TYPE_SHA256:
+                wolfSSL_SHA256_Update((SHA256_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+#endif /* !NO_SHA256 */
+#ifdef WOLFSSL_SHA384
+            case WC_HASH_TYPE_SHA384:
+                wolfSSL_SHA384_Update((SHA384_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+#endif
+#ifdef WOLFSSL_SHA512
+            case WC_HASH_TYPE_SHA512:
+                wolfSSL_SHA512_Update((SHA512_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+#endif /* WOLFSSL_SHA512 */
+    #ifdef WOLFSSL_SHA3
+        #ifndef WOLFSSL_NOSHA3_224
+            case WC_HASH_TYPE_SHA3_224:
+                wolfSSL_SHA3_224_Update((SHA3_224_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+        #endif
+        #ifndef WOLFSSL_NOSHA3_256
+            case WC_HASH_TYPE_SHA3_256:
+                wolfSSL_SHA3_256_Update((SHA3_256_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+        #endif
+            case WC_HASH_TYPE_SHA3_384:
+                wolfSSL_SHA3_384_Update((SHA3_384_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+        #ifndef WOLFSSL_NOSHA3_512
+            case WC_HASH_TYPE_SHA3_512:
+                wolfSSL_SHA3_512_Update((SHA3_512_CTX*)&ctx->hash, data,
+                                     (unsigned long)sz);
+                break;
+        #endif
+    #endif
+            default:
+                return WOLFSSL_FAILURE;
+        }
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_DigestFinal(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md,
+                               unsigned int* s)
+    {
+        int macType;
+
+        WOLFSSL_ENTER("EVP_DigestFinal");
+        macType = wolfSSL_EVP_md2macType(EVP_MD_CTX_md(ctx));
+        switch (macType) {
+#ifndef NO_MD4
+            case WC_HASH_TYPE_MD4:
+                wolfSSL_MD4_Final(md, (MD4_CTX*)&ctx->hash);
+                if (s) *s = MD4_DIGEST_SIZE;
+                break;
+#endif
+#ifndef NO_MD5
+            case WC_HASH_TYPE_MD5:
+                wolfSSL_MD5_Final(md, (MD5_CTX*)&ctx->hash);
+                if (s) *s = WC_MD5_DIGEST_SIZE;
+                break;
+#endif
+#ifndef NO_SHA
+            case WC_HASH_TYPE_SHA:
+                wolfSSL_SHA_Final(md, (SHA_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA_DIGEST_SIZE;
+                break;
+#endif
+#ifdef WOLFSSL_SHA224
+            case WC_HASH_TYPE_SHA224:
+                wolfSSL_SHA224_Final(md, (SHA224_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA224_DIGEST_SIZE;
+                break;
+#endif
+#ifndef NO_SHA256
+            case WC_HASH_TYPE_SHA256:
+                wolfSSL_SHA256_Final(md, (SHA256_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA256_DIGEST_SIZE;
+                break;
+#endif /* !NO_SHA256 */
+#ifdef WOLFSSL_SHA384
+            case WC_HASH_TYPE_SHA384:
+                wolfSSL_SHA384_Final(md, (SHA384_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA384_DIGEST_SIZE;
+                break;
+#endif
+#ifdef WOLFSSL_SHA512
+            case WC_HASH_TYPE_SHA512:
+                wolfSSL_SHA512_Final(md, (SHA512_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA512_DIGEST_SIZE;
+                break;
+#endif /* WOLFSSL_SHA512 */
+    #ifdef WOLFSSL_SHA3
+        #ifndef WOLFSSL_NOSHA3_224
+            case WC_HASH_TYPE_SHA3_224:
+                wolfSSL_SHA3_224_Final(md, (SHA3_224_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA3_224_DIGEST_SIZE;
+                break;
+        #endif
+        #ifndef WOLFSSL_NOSHA3_256
+            case WC_HASH_TYPE_SHA3_256:
+                wolfSSL_SHA3_256_Final(md, (SHA3_256_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA3_256_DIGEST_SIZE;
+                break;
+        #endif
+            case WC_HASH_TYPE_SHA3_384:
+                wolfSSL_SHA3_384_Final(md, (SHA3_384_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA3_384_DIGEST_SIZE;
+                break;
+        #ifndef WOLFSSL_NOSHA3_512
+            case WC_HASH_TYPE_SHA3_512:
+                wolfSSL_SHA3_512_Final(md, (SHA3_512_CTX*)&ctx->hash);
+                if (s) *s = WC_SHA3_512_DIGEST_SIZE;
+                break;
+        #endif
+    #endif
+            default:
+                return WOLFSSL_FAILURE;
+        }
+
+        return WOLFSSL_SUCCESS;
+    }
+
+    /* WOLFSSL_SUCCESS on ok */
+    int wolfSSL_EVP_DigestFinal_ex(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md,
+                                   unsigned int* s)
+    {
+        WOLFSSL_ENTER("EVP_DigestFinal_ex");
+        return EVP_DigestFinal(ctx, md, s);
+    }
+
+    void wolfSSL_EVP_cleanup(void)
+    {
+        /* nothing to do here */
+    }
+
+const WOLFSSL_EVP_MD* wolfSSL_EVP_get_digestbynid(int id)
+{
+    WOLFSSL_MSG("wolfSSL_get_digestbynid");
+
+    switch(id) {
+#ifndef NO_MD5
+        case NID_md5:
+            return wolfSSL_EVP_md5();
+#endif
+#ifndef NO_SHA
+        case NID_sha1:
+            return wolfSSL_EVP_sha1();
+#endif
+        default:
+            WOLFSSL_MSG("Bad digest id value");
+    }
+
+    return NULL;
+}
+
+#ifndef NO_RSA
+WOLFSSL_RSA* wolfSSL_EVP_PKEY_get0_RSA(WOLFSSL_EVP_PKEY *pkey)
+{
+    if (!pkey) {
+        return NULL;
+    }
+    return pkey->rsa;
+}
+
+WOLFSSL_RSA* wolfSSL_EVP_PKEY_get1_RSA(WOLFSSL_EVP_PKEY* key)
+{
+    WOLFSSL_RSA* local;
+
+    WOLFSSL_MSG("wolfSSL_EVP_PKEY_get1_RSA");
+
+    if (key == NULL) {
+        return NULL;
+    }
+
+    local = wolfSSL_RSA_new();
+    if (local == NULL) {
+        WOLFSSL_MSG("Error creating a new WOLFSSL_RSA structure");
+        return NULL;
+    }
+
+    if (key->type == EVP_PKEY_RSA) {
+        if (wolfSSL_RSA_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+                    key->pkey_sz) != SSL_SUCCESS) {
+            /* now try public key */
+            if (wolfSSL_RSA_LoadDer_ex(local,
+                        (const unsigned char*)key->pkey.ptr, key->pkey_sz,
+                        WOLFSSL_RSA_LOAD_PUBLIC) != SSL_SUCCESS) {
+                wolfSSL_RSA_free(local);
+                local = NULL;
+            }
+        }
+    }
+    else {
+        WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an RSA key");
+        wolfSSL_RSA_free(local);
+        local = NULL;
+    }
+    return local;
+}
+
+/* with set1 functions the pkey struct does not own the RSA structure
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_PKEY_set1_RSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_RSA *key)
+{
+#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA)
+    int derMax = 0;
+    int derSz  = 0;
+    byte* derBuf = NULL;
+    RsaKey* rsa  = NULL;
+#endif
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_RSA");
+    if ((pkey == NULL) || (key == NULL))
+        return WOLFSSL_FAILURE;
+
+    if (pkey->rsa != NULL && pkey->ownRsa == 1) {
+        wolfSSL_RSA_free(pkey->rsa);
+    }
+    pkey->rsa    = key;
+    pkey->ownRsa = 0; /* pkey does not own RSA */
+    pkey->type   = EVP_PKEY_RSA;
+    if (key->inSet == 0) {
+        if (SetRsaInternal(key) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetRsaInternal failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA)
+    rsa = (RsaKey*)key->internal;
+    /* 5 > size of n, d, p, q, d%(p-1), d(q-1), 1/q%p, e + ASN.1 additional
+     * information */
+    derMax = 5 * wolfSSL_RSA_size(key) + (2 * AES_BLOCK_SIZE);
+
+    derBuf = (byte*)XMALLOC(derMax, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("malloc failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (rsa->type == RSA_PRIVATE) {
+        /* Private key to DER */
+        derSz = wc_RsaKeyToDer(rsa, derBuf, derMax);
+    }
+    else {
+        /* Public key to DER */
+        derSz = wc_RsaKeyToPublicDer(rsa, derBuf, derMax);
+    }
+
+    if (derSz < 0) {
+        if (rsa->type == RSA_PRIVATE) {
+            WOLFSSL_MSG("wc_RsaKeyToDer failed");
+        }
+        else {
+            WOLFSSL_MSG("wc_RsaKeyToPublicDer failed");
+        }
+        XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_DER);
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+    pkey->pkey_sz = derSz;
+    XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+    XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif /* WOLFSSL_KEY_GEN && !HAVE_USER_RSA */
+
+#ifdef WC_RSA_BLINDING
+    if (key->ownRng == 0) {
+        if (wc_RsaSetRNG((RsaKey*)(pkey->rsa->internal), &(pkey->rng)) != 0) {
+            WOLFSSL_MSG("Error setting RSA rng");
+            return WOLFSSL_FAILURE;
+        }
+    }
+#endif
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_RSA */
+
+#if !defined (NO_DSA) && !defined(HAVE_SELFTEST) && defined(WOLFSSL_KEY_GEN)
+/* with set1 functions the pkey struct does not own the DSA structure
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_PKEY_set1_DSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DSA *key)
+{
+    int derMax = 0;
+    int derSz  = 0;
+    DsaKey* dsa  = NULL;
+    byte* derBuf = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_DSA");
+
+    if((pkey == NULL) || (key == NULL))return WOLFSSL_FAILURE;
+    if (pkey->dsa != NULL && pkey->ownDsa == 1) {
+        wolfSSL_DSA_free(pkey->dsa);
+    }
+    pkey->dsa    = key;
+    pkey->ownDsa = 0; /* pkey does not own DSA */
+    pkey->type   = EVP_PKEY_DSA;
+    if (key->inSet == 0) {
+        if (SetDsaInternal(key) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetDsaInternal failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+    dsa = (DsaKey*)key->internal;
+
+    /* 4 > size of pub, priv, p, q, g + ASN.1 additional information */
+    derMax = 4 * wolfSSL_BN_num_bytes(key->g) + AES_BLOCK_SIZE;
+
+    derBuf = (byte*)XMALLOC(derMax, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("malloc failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    if (dsa->type == DSA_PRIVATE) {
+        /* Private key to DER */
+        derSz = wc_DsaKeyToDer(dsa, derBuf, derMax);
+    }
+    else {
+        /* Public key to DER */
+        derSz = wc_DsaKeyToPublicDer(dsa, derBuf, derMax);
+    }
+
+    if (derSz < 0) {
+        if (dsa->type == DSA_PRIVATE) {
+            WOLFSSL_MSG("wc_DsaKeyToDer failed");
+        }
+        else {
+            WOLFSSL_MSG("wc_DsaKeyToPublicDer failed");
+        }
+        XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+
+    pkey->pkey.ptr = (char*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_DER);
+    if (pkey->pkey.ptr == NULL) {
+        WOLFSSL_MSG("key malloc failed");
+        XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+    pkey->pkey_sz = derSz;
+    XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+    XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return WOLFSSL_SUCCESS;
+}
+
+WOLFSSL_DSA* wolfSSL_EVP_PKEY_get1_DSA(WOLFSSL_EVP_PKEY* key)
+{
+    WOLFSSL_DSA* local;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_DSA");
+
+    if (key == NULL) {
+        WOLFSSL_MSG("Bad function argument");
+        return NULL;
+    }
+
+    local = wolfSSL_DSA_new();
+    if (local == NULL) {
+        WOLFSSL_MSG("Error creating a new WOLFSSL_DSA structure");
+        return NULL;
+    }
+
+    if (key->type == EVP_PKEY_DSA) {
+        if (wolfSSL_DSA_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+                    key->pkey_sz) != SSL_SUCCESS) {
+            /* now try public key */
+            if (wolfSSL_DSA_LoadDer_ex(local,
+                        (const unsigned char*)key->pkey.ptr, key->pkey_sz,
+                        WOLFSSL_DSA_LOAD_PUBLIC) != SSL_SUCCESS) {
+                wolfSSL_DSA_free(local);
+                local = NULL;
+            }
+        }
+    }
+    else {
+        WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold a DSA key");
+        wolfSSL_DSA_free(local);
+        local = NULL;
+    }
+    return local;
+}
+#endif /* !NO_DSA && !HAVE_SELFTEST && WOLFSSL_KEY_GEN */
+
+#ifdef HAVE_ECC
+WOLFSSL_EC_KEY *wolfSSL_EVP_PKEY_get0_EC_KEY(WOLFSSL_EVP_PKEY *pkey)
+{
+    WOLFSSL_EC_KEY *eckey = NULL;
+    if (pkey) {
+#ifdef HAVE_ECC
+        eckey = pkey->ecc;
+#endif
+    }
+    return eckey;
+}
+
+WOLFSSL_EC_KEY* wolfSSL_EVP_PKEY_get1_EC_KEY(WOLFSSL_EVP_PKEY* key)
+{
+    WOLFSSL_EC_KEY* local;
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_EC_KEY");
+
+    if (key == NULL) {
+        return NULL;
+    }
+
+    local = wolfSSL_EC_KEY_new();
+    if (local == NULL) {
+        WOLFSSL_MSG("Error creating a new WOLFSSL_EC_KEY structure");
+        return NULL;
+    }
+
+    if (key->type == EVP_PKEY_EC) {
+        if (wolfSSL_EC_KEY_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+                    key->pkey_sz) != SSL_SUCCESS) {
+            /* now try public key */
+            if (wolfSSL_EC_KEY_LoadDer_ex(local,
+                    (const unsigned char*)key->pkey.ptr,
+                    key->pkey_sz, WOLFSSL_EC_KEY_LOAD_PUBLIC) != SSL_SUCCESS) {
+
+                wolfSSL_EC_KEY_free(local);
+                local = NULL;
+            }
+        }
+    }
+    else {
+        WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an EC key");
+        wolfSSL_EC_KEY_free(local);
+        local = NULL;
+    }
+#ifdef OPENSSL_ALL
+    if (!local && key->ecc) {
+        local = wolfSSL_EC_KEY_dup(key->ecc);
+    }
+#endif
+    return local;
+}
+#endif /* HAVE_ECC */
+
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+#if !defined(NO_DH) && !defined(NO_FILESYSTEM)
+/* with set1 functions the pkey struct does not own the DH structure
+ * Build the following DH Key format from the passed in WOLFSSL_DH
+ * then store in WOLFSSL_EVP_PKEY in DER format.
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_PKEY_set1_DH(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DH *key)
+{
+    byte havePublic = 0, havePrivate = 0;
+    int ret;
+    word32 derSz = 0;
+    byte* derBuf = NULL;
+    DhKey* dhkey = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_DH");
+
+    if (pkey == NULL || key == NULL)
+        return WOLFSSL_FAILURE;
+
+    if (pkey->dh != NULL && pkey->ownDh == 1)
+        wolfSSL_DH_free(pkey->dh);
+
+    pkey->dh    = key;
+    pkey->ownDh = 0; /* pkey does not own DH */
+    pkey->type  = EVP_PKEY_DH;
+    if (key->inSet == 0) {
+        if (SetDhInternal(key) != WOLFSSL_SUCCESS) {
+            WOLFSSL_MSG("SetDhInternal failed");
+            return WOLFSSL_FAILURE;
+        }
+    }
+
+    dhkey = (DhKey*)key->internal;
+
+    havePublic  = mp_unsigned_bin_size(&dhkey->pub)  > 0;
+    havePrivate = mp_unsigned_bin_size(&dhkey->priv) > 0;
+
+    /* Get size of DER buffer only */
+    if (havePublic && !havePrivate) {
+        ret = wc_DhPubKeyToDer(dhkey, NULL, &derSz);
+    } else if (havePrivate && !havePublic) {
+        ret = wc_DhPrivKeyToDer(dhkey, NULL, &derSz);
+    } else {
+        ret = wc_DhParamsToDer(dhkey,NULL,&derSz);
+    }
+
+    if (derSz <= 0 || ret != LENGTH_ONLY_E) {
+       WOLFSSL_MSG("Failed to get size of DH Key");
+       return WOLFSSL_FAILURE;
+    }
+
+    derBuf = (byte*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (derBuf == NULL) {
+        WOLFSSL_MSG("malloc failed");
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Fill DER buffer */
+    if (havePublic && !havePrivate) {
+        ret = wc_DhPubKeyToDer(dhkey, derBuf, &derSz);
+    } else if (havePrivate && !havePublic) {
+        ret = wc_DhPrivKeyToDer(dhkey, derBuf, &derSz);
+    } else {
+        ret = wc_DhParamsToDer(dhkey,derBuf,&derSz);
+    }
+
+    if (ret <= 0) {
+        WOLFSSL_MSG("Failed to export DH Key");
+        XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return WOLFSSL_FAILURE;
+    }
+
+    /* Store DH key into pkey (DER format) */
+    pkey->pkey.ptr = (char*)derBuf;
+    pkey->pkey_sz = derSz;
+
+    return WOLFSSL_SUCCESS;
+}
+
+WOLFSSL_DH* wolfSSL_EVP_PKEY_get0_DH(WOLFSSL_EVP_PKEY* key)
+{
+    if (!key) {
+        return NULL;
+    }
+    return key->dh;
+}
+
+WOLFSSL_DH* wolfSSL_EVP_PKEY_get1_DH(WOLFSSL_EVP_PKEY* key)
+{
+    WOLFSSL_DH* local = NULL;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_DH");
+
+    if (key == NULL || key->dh == NULL) {
+        WOLFSSL_MSG("Bad function argument");
+        return NULL;
+    }
+
+    if (key->type == EVP_PKEY_DH) {
+        local = wolfSSL_DH_new();
+        if (local == NULL) {
+            WOLFSSL_MSG("Error creating a new WOLFSSL_DH structure");
+            return NULL;
+        }
+
+        if (wolfSSL_DH_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+                    key->pkey_sz) != SSL_SUCCESS) {
+            wolfSSL_DH_free(local);
+            WOLFSSL_MSG("Error wolfSSL_DH_LoadDer");
+            local = NULL;
+        }
+    }
+    else {
+        WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold a DH key");
+        wolfSSL_DH_free(local);
+        return NULL;
+    }
+
+    return local;
+}
+#endif /* NO_DH && NO_FILESYSTEM */
+
+int wolfSSL_EVP_PKEY_assign(WOLFSSL_EVP_PKEY *pkey, int type, void *key)
+{
+    int ret;
+
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_assign");
+
+    /* pkey and key checked if NULL in subsequent assign functions */
+    switch(type) {
+    #ifndef NO_RSA
+        case EVP_PKEY_RSA:
+            ret = wolfSSL_EVP_PKEY_assign_RSA(pkey, (WOLFSSL_RSA*)key);
+            break;
+    #endif
+    #ifndef NO_DSA
+        case EVP_PKEY_DSA:
+            ret = wolfSSL_EVP_PKEY_assign_DSA(pkey, (WOLFSSL_DSA*)key);
+            break;
+    #endif
+    #ifdef HAVE_ECC
+        case EVP_PKEY_EC:
+            ret = wolfSSL_EVP_PKEY_assign_EC_KEY(pkey, (WOLFSSL_EC_KEY*)key);
+            break;
+    #endif
+    #ifdef NO_DH
+         case EVP_PKEY_DH:
+            ret = wolfSSL_EVP_PKEY_assign_DH(pkey, (WOLFSSL_DH*)key);
+            break;
+    #endif
+        default:
+            WOLFSSL_MSG("Unknown EVP_PKEY type in wolfSSL_EVP_PKEY_assign.");
+            ret = WOLFSSL_FAILURE;
+    }
+
+    return ret;
+}
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+
+#if defined(HAVE_ECC)
+/* try and populate public pkey_sz and pkey.ptr */
+static void ECC_populate_EVP_PKEY(EVP_PKEY* pkey, ecc_key* ecc)
+{
+    int ret;
+    if (!pkey || !ecc)
+        return;
+    if ((ret = wc_EccPublicKeyDerSize(ecc, 1)) > 0) {
+        int derSz = ret;
+        char* derBuf = (char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (derBuf) {
+            ret = wc_EccPublicKeyToDer(ecc, (byte*)derBuf, derSz, 1);
+            if (ret >= 0) {
+                if (pkey->pkey.ptr) {
+                    XFREE(pkey->pkey.ptr, NULL, DYNAMIC_TYPE_OPENSSL);
+                }
+                pkey->pkey_sz = ret;
+                pkey->pkey.ptr = derBuf;
+            }
+            else { /* failure - okay to ignore */
+                XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                derBuf = NULL;
+            }
+        }
+    }
+}
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_set1_EC_KEY(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_EC_KEY *key)
+{
+#ifdef HAVE_ECC
+    if((pkey == NULL) || (key ==NULL))return WOLFSSL_FAILURE;
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_EC_KEY");
+#ifndef NO_RSA
+    if (pkey->rsa != NULL && pkey->ownRsa == 1) {
+        wolfSSL_RSA_free(pkey->rsa);
+    }
+    pkey->ownRsa = 0;
+#endif
+#ifndef NO_DSA
+    if (pkey->dsa != NULL && pkey->ownDsa == 1) {
+        wolfSSL_DSA_free(pkey->dsa);
+    }
+    pkey->ownDsa = 0;
+#endif
+#ifndef NO_DH
+    if (pkey->dh != NULL && pkey->ownDh == 1) {
+        wolfSSL_DH_free(pkey->dh);
+    }
+    pkey->ownDh = 0;
+#endif
+    if (pkey->ecc != NULL && pkey->ownEcc == 1) {
+        wolfSSL_EC_KEY_free(pkey->ecc);
+    }
+    pkey->ecc    = key;
+    pkey->ownEcc = 0; /* pkey does not own EC key */
+    pkey->type   = EVP_PKEY_EC;
+    ECC_populate_EVP_PKEY(pkey, (ecc_key*)key->internal);
+    return WOLFSSL_SUCCESS;
+#else
+    (void)pkey;
+    (void)key;
+    return WOLFSSL_FAILURE;
+#endif
+}
+
+void* wolfSSL_EVP_X_STATE(const WOLFSSL_EVP_CIPHER_CTX* ctx)
+{
+    WOLFSSL_MSG("wolfSSL_EVP_X_STATE");
+
+    if (ctx) {
+        switch (ctx->cipherType) {
+            case ARC4_TYPE:
+                WOLFSSL_MSG("returning arc4 state");
+                return (void*)&ctx->cipher.arc4.x;
+
+            default:
+                WOLFSSL_MSG("bad x state type");
+                return 0;
+        }
+    }
+
+    return NULL;
+}
+int wolfSSL_EVP_PKEY_assign_EC_KEY(EVP_PKEY* pkey, WOLFSSL_EC_KEY* key)
+{
+    if (pkey == NULL || key == NULL)
+        return WOLFSSL_FAILURE;
+
+    pkey->type = EVP_PKEY_EC;
+    pkey->ecc = key;
+    pkey->ownEcc = 1;
+
+    /* try and populate public pkey_sz and pkey.ptr */
+    ECC_populate_EVP_PKEY(pkey, (ecc_key*)key->internal);
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* HAVE_ECC */
+
+#ifndef NO_WOLFSSL_STUB
+const WOLFSSL_EVP_MD* wolfSSL_EVP_ripemd160(void)
+{
+    WOLFSSL_MSG("wolfSSL_ripemd160");
+    WOLFSSL_STUB("EVP_ripemd160");
+    return NULL;
+}
+#endif
+
+
+int wolfSSL_EVP_MD_block_size(const WOLFSSL_EVP_MD* type)
+{
+    WOLFSSL_MSG("wolfSSL_EVP_MD_block_size");
+
+    if (type == NULL) {
+        WOLFSSL_MSG("No md type arg");
+        return BAD_FUNC_ARG;
+    }
+
+    if (XSTRNCMP(type, "SHA256", 6) == 0) {
+        return WC_SHA256_BLOCK_SIZE;
+    }
+#ifndef NO_MD5
+    else if (XSTRNCMP(type, "MD5", 3) == 0) {
+        return WC_MD5_BLOCK_SIZE;
+    }
+#endif
+#ifdef WOLFSSL_SHA224
+    else if (XSTRNCMP(type, "SHA224", 6) == 0) {
+        return WC_SHA224_BLOCK_SIZE;
+    }
+#endif
+#ifdef WOLFSSL_SHA384
+    else if (XSTRNCMP(type, "SHA384", 6) == 0) {
+        return WC_SHA384_BLOCK_SIZE;
+    }
+#endif
+#ifdef WOLFSSL_SHA512
+    else if (XSTRNCMP(type, "SHA512", 6) == 0) {
+        return WC_SHA512_BLOCK_SIZE;
+    }
+#endif
+#ifndef NO_SHA
+    /* has to be last since would pick or 256, 384, or 512 too */
+    else if (XSTRNCMP(type, "SHA", 3) == 0) {
+        return WC_SHA_BLOCK_SIZE;
+    }
+#endif
+
+    return BAD_FUNC_ARG;
+}
+
+int wolfSSL_EVP_MD_size(const WOLFSSL_EVP_MD* type)
+{
+    WOLFSSL_MSG("wolfSSL_EVP_MD_size");
+
+    if (type == NULL) {
+        WOLFSSL_MSG("No md type arg");
+        return BAD_FUNC_ARG;
+    }
+
+    if (XSTRNCMP(type, "SHA256", 6) == 0) {
+        return WC_SHA256_DIGEST_SIZE;
+    }
+#ifndef NO_MD5
+    else if (XSTRNCMP(type, "MD5", 3) == 0) {
+        return WC_MD5_DIGEST_SIZE;
+    }
+#endif
+#ifdef WOLFSSL_SHA224
+    else if (XSTRNCMP(type, "SHA224", 6) == 0) {
+        return WC_SHA224_DIGEST_SIZE;
+    }
+#endif
+#ifdef WOLFSSL_SHA384
+    else if (XSTRNCMP(type, "SHA384", 6) == 0) {
+        return WC_SHA384_DIGEST_SIZE;
+    }
+#endif
+#ifdef WOLFSSL_SHA512
+    else if (XSTRNCMP(type, "SHA512", 6) == 0) {
+        return WC_SHA512_DIGEST_SIZE;
+    }
+#endif
+#ifndef NO_SHA
+    /* has to be last since would pick or 256, 384, or 512 too */
+    else if (XSTRNCMP(type, "SHA", 3) == 0) {
+        return WC_SHA_DIGEST_SIZE;
+    }
+#endif
+
+    return BAD_FUNC_ARG;
+}
+
+
+int wolfSSL_EVP_CIPHER_CTX_iv_length(const WOLFSSL_EVP_CIPHER_CTX* ctx)
+{
+    WOLFSSL_MSG("wolfSSL_EVP_CIPHER_CTX_iv_length");
+
+    switch (ctx->cipherType) {
+
+#ifdef HAVE_AES_CBC
+        case AES_128_CBC_TYPE :
+        case AES_192_CBC_TYPE :
+        case AES_256_CBC_TYPE :
+            WOLFSSL_MSG("AES CBC");
+            return AES_BLOCK_SIZE;
+#endif
+#ifdef HAVE_AESGCM
+        case AES_128_GCM_TYPE :
+        case AES_192_GCM_TYPE :
+        case AES_256_GCM_TYPE :
+            WOLFSSL_MSG("AES GCM");
+            return GCM_NONCE_MID_SZ;
+#endif
+#ifdef WOLFSSL_AES_COUNTER
+        case AES_128_CTR_TYPE :
+        case AES_192_CTR_TYPE :
+        case AES_256_CTR_TYPE :
+            WOLFSSL_MSG("AES CTR");
+            return AES_BLOCK_SIZE;
+#endif
+#ifndef NO_DES3
+        case DES_CBC_TYPE :
+            WOLFSSL_MSG("DES CBC");
+            return DES_BLOCK_SIZE;
+
+        case DES_EDE3_CBC_TYPE :
+            WOLFSSL_MSG("DES EDE3 CBC");
+            return DES_BLOCK_SIZE;
+#endif
+#ifdef HAVE_IDEA
+        case IDEA_CBC_TYPE :
+            WOLFSSL_MSG("IDEA CBC");
+            return IDEA_BLOCK_SIZE;
+#endif
+#ifndef NO_RC4
+        case ARC4_TYPE :
+            WOLFSSL_MSG("ARC4");
+            return 0;
+#endif
+#ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+        case AES_128_CFB1_TYPE:
+        case AES_192_CFB1_TYPE:
+        case AES_256_CFB1_TYPE:
+            WOLFSSL_MSG("AES CFB1");
+            return AES_BLOCK_SIZE;
+        case AES_128_CFB8_TYPE:
+        case AES_192_CFB8_TYPE:
+        case AES_256_CFB8_TYPE:
+            WOLFSSL_MSG("AES CFB8");
+            return AES_BLOCK_SIZE;
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+        case AES_128_CFB128_TYPE:
+        case AES_192_CFB128_TYPE:
+        case AES_256_CFB128_TYPE:
+            WOLFSSL_MSG("AES CFB128");
+            return AES_BLOCK_SIZE;
+#endif /* WOLFSSL_AES_CFB */
+#if defined(WOLFSSL_AES_OFB)
+        case AES_128_OFB_TYPE:
+        case AES_192_OFB_TYPE:
+        case AES_256_OFB_TYPE:
+            WOLFSSL_MSG("AES OFB");
+            return AES_BLOCK_SIZE;
+#endif /* WOLFSSL_AES_OFB */
+#ifdef WOLFSSL_AES_XTS
+        case AES_128_XTS_TYPE:
+        case AES_256_XTS_TYPE:
+            WOLFSSL_MSG("AES XTS");
+            return AES_BLOCK_SIZE;
+#endif /* WOLFSSL_AES_XTS */
+
+        case NULL_CIPHER_TYPE :
+            WOLFSSL_MSG("NULL");
+            return 0;
+
+        default: {
+            WOLFSSL_MSG("bad type");
+        }
+    }
+    return 0;
+}
+
+int wolfSSL_EVP_CIPHER_iv_length(const WOLFSSL_EVP_CIPHER* cipher)
+{
+    const char *name = (const char *)cipher;
+    WOLFSSL_MSG("wolfSSL_EVP_CIPHER_iv_length");
+
+#ifndef NO_AES
+#ifdef HAVE_AES_CBC
+    #ifdef WOLFSSL_AES_128
+    if (EVP_AES_128_CBC && XSTRNCMP(name, EVP_AES_128_CBC, XSTRLEN(EVP_AES_128_CBC)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    if (EVP_AES_192_CBC && XSTRNCMP(name, EVP_AES_192_CBC, XSTRLEN(EVP_AES_192_CBC)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    if (EVP_AES_256_CBC && XSTRNCMP(name, EVP_AES_256_CBC, XSTRLEN(EVP_AES_256_CBC)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif
+#endif /* HAVE_AES_CBC */
+#ifdef HAVE_AESGCM
+    #ifdef WOLFSSL_AES_128
+    if (EVP_AES_128_GCM && XSTRNCMP(name, EVP_AES_128_GCM, XSTRLEN(EVP_AES_128_GCM)) == 0)
+        return GCM_NONCE_MID_SZ;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    if (EVP_AES_192_GCM && XSTRNCMP(name, EVP_AES_192_GCM, XSTRLEN(EVP_AES_192_GCM)) == 0)
+        return GCM_NONCE_MID_SZ;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    if (EVP_AES_256_GCM && XSTRNCMP(name, EVP_AES_256_GCM, XSTRLEN(EVP_AES_256_GCM)) == 0)
+        return GCM_NONCE_MID_SZ;
+    #endif
+#endif /* HAVE_AESGCM */
+#ifdef WOLFSSL_AES_COUNTER
+    #ifdef WOLFSSL_AES_128
+    if (EVP_AES_128_CTR && XSTRNCMP(name, EVP_AES_128_CTR, XSTRLEN(EVP_AES_128_CTR)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif
+    #ifdef WOLFSSL_AES_192
+    if (EVP_AES_192_CTR && XSTRNCMP(name, EVP_AES_192_CTR, XSTRLEN(EVP_AES_192_CTR)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif
+    #ifdef WOLFSSL_AES_256
+    if (EVP_AES_256_CTR && XSTRNCMP(name, EVP_AES_256_CTR, XSTRLEN(EVP_AES_256_CTR)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif
+#endif
+#ifdef WOLFSSL_AES_XTS
+    #ifdef WOLFSSL_AES_128
+    if (EVP_AES_128_XTS && XSTRNCMP(name, EVP_AES_128_XTS, XSTRLEN(EVP_AES_128_XTS)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif /* WOLFSSL_AES_128 */
+
+    #ifdef WOLFSSL_AES_256
+    if (EVP_AES_256_XTS && XSTRNCMP(name, EVP_AES_256_XTS, XSTRLEN(EVP_AES_256_XTS)) == 0)
+        return AES_BLOCK_SIZE;
+    #endif /* WOLFSSL_AES_256 */
+#endif /* WOLFSSL_AES_XTS */
+
+#endif
+
+#ifndef NO_DES3
+    if ((EVP_DES_CBC && XSTRNCMP(name, EVP_DES_CBC, XSTRLEN(EVP_DES_CBC)) == 0) ||
+           (EVP_DES_EDE3_CBC && XSTRNCMP(name, EVP_DES_EDE3_CBC, XSTRLEN(EVP_DES_EDE3_CBC)) == 0)) {
+        return DES_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef HAVE_IDEA
+    if (EVP_IDEA_CBC && XSTRNCMP(name, EVP_IDEA_CBC, XSTRLEN(EVP_IDEA_CBC)) == 0)
+        return IDEA_BLOCK_SIZE;
+#endif
+
+    (void)name;
+
+    return 0;
+}
+
+
+int wolfSSL_EVP_X_STATE_LEN(const WOLFSSL_EVP_CIPHER_CTX* ctx)
+{
+    WOLFSSL_MSG("wolfSSL_EVP_X_STATE_LEN");
+
+    if (ctx) {
+        switch (ctx->cipherType) {
+            case ARC4_TYPE:
+                WOLFSSL_MSG("returning arc4 state size");
+                return sizeof(Arc4);
+
+            default:
+                WOLFSSL_MSG("bad x state type");
+                return 0;
+        }
+    }
+
+    return 0;
+}
+
+
+/* return of pkey->type which will be EVP_PKEY_RSA for example.
+ *
+ * type  type of EVP_PKEY
+ *
+ * returns type or if type is not found then NID_undef
+ */
+int wolfSSL_EVP_PKEY_type(int type)
+{
+    WOLFSSL_MSG("wolfSSL_EVP_PKEY_type");
+
+    switch (type) {
+        case EVP_PKEY_RSA:
+            return EVP_PKEY_RSA;
+        case EVP_PKEY_DSA:
+            return EVP_PKEY_DSA;
+        case EVP_PKEY_EC:
+            return EVP_PKEY_EC;
+        case EVP_PKEY_DH:
+            return EVP_PKEY_DH;
+        default:
+            return NID_undef;
+    }
+}
+
+
+int wolfSSL_EVP_PKEY_id(const EVP_PKEY *pkey)
+{
+    if (pkey != NULL)
+        return pkey->type;
+    return 0;
+}
+
+
+int wolfSSL_EVP_PKEY_base_id(const EVP_PKEY *pkey)
+{
+    if (pkey == NULL)
+        return NID_undef;
+    return wolfSSL_EVP_PKEY_type(pkey->type);
+}
+
+
+/* increments ref count of WOLFSSL_EVP_PKEY. Return 1 on success, 0 on error */
+int wolfSSL_EVP_PKEY_up_ref(WOLFSSL_EVP_PKEY* pkey)
+{
+    if (pkey) {
+        if (wc_LockMutex(&pkey->refMutex) != 0) {
+            WOLFSSL_MSG("Failed to lock pkey mutex");
+        }
+        pkey->references++;
+        wc_UnLockMutex(&pkey->refMutex);
+
+        return 1;
+    }
+
+    return 0;
+}
+
+#ifndef NO_RSA
+int wolfSSL_EVP_PKEY_assign_RSA(EVP_PKEY* pkey, WOLFSSL_RSA* key)
+{
+    if (pkey == NULL || key == NULL)
+        return WOLFSSL_FAILURE;
+
+    pkey->type = EVP_PKEY_RSA;
+    pkey->rsa = key;
+    pkey->ownRsa = 1;
+
+    /* try and populate public pkey_sz and pkey.ptr */
+    if (key->internal) {
+        RsaKey* rsa = (RsaKey*)key->internal;
+        int ret = wc_RsaPublicKeyDerSize(rsa, 1);
+        if (ret > 0) {
+            int derSz = ret;
+            char* derBuf = (char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (derBuf) {
+                ret = wc_RsaKeyToPublicDer(rsa, (byte*)derBuf, derSz);
+                if (ret >= 0) {
+                    pkey->pkey_sz = ret;
+                    pkey->pkey.ptr = derBuf;
+                }
+                else { /* failure - okay to ignore */
+                    XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                    derBuf = NULL;
+                }
+            }
+        }
+    }
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_RSA */
+
+#ifndef NO_DSA
+int wolfSSL_EVP_PKEY_assign_DSA(EVP_PKEY* pkey, WOLFSSL_DSA* key)
+{
+    if (pkey == NULL || key == NULL)
+        return WOLFSSL_FAILURE;
+
+    pkey->type = EVP_PKEY_DSA;
+    pkey->dsa = key;
+    pkey->ownDsa = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_DSA */
+
+#ifndef NO_DH
+int wolfSSL_EVP_PKEY_assign_DH(EVP_PKEY* pkey, WOLFSSL_DH* key)
+{
+    if (pkey == NULL || key == NULL)
+        return WOLFSSL_FAILURE;
+
+    pkey->type = EVP_PKEY_DH;
+    pkey->dh = key;
+    pkey->ownDh = 1;
+
+    return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_DH */
+
+#endif /* OPENSSL_EXTRA */
+
+#if defined(OPENSSL_EXTRA_X509_SMALL)
+/* Subset of OPENSSL_EXTRA for PKEY operations PKEY free is needed by the
+ * subset of X509 API */
+
+WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new(void){
+    return wolfSSL_EVP_PKEY_new_ex(NULL);
+}
+
+WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_ex(void* heap)
+{
+    WOLFSSL_EVP_PKEY* pkey;
+    int ret;
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_new_ex");
+    pkey = (WOLFSSL_EVP_PKEY*)XMALLOC(sizeof(WOLFSSL_EVP_PKEY), heap,
+            DYNAMIC_TYPE_PUBLIC_KEY);
+    if (pkey != NULL) {
+        XMEMSET(pkey, 0, sizeof(WOLFSSL_EVP_PKEY));
+        pkey->heap = heap;
+        pkey->type = WOLFSSL_EVP_PKEY_DEFAULT;
+#ifndef HAVE_FIPS
+        ret = wc_InitRng_ex(&pkey->rng, heap, INVALID_DEVID);
+#else
+        ret = wc_InitRng(&pkey->rng);
+#endif
+        if (ret != 0){
+            wolfSSL_EVP_PKEY_free(pkey);
+            WOLFSSL_MSG("memory failure");
+            return NULL;
+        }
+        pkey->references = 1;
+        wc_InitMutex(&pkey->refMutex);
+    }
+    else {
+        WOLFSSL_MSG("memory failure");
+    }
+
+    return pkey;
+}
+
+void wolfSSL_EVP_PKEY_free(WOLFSSL_EVP_PKEY* key)
+{
+    int doFree = 0;
+    WOLFSSL_ENTER("wolfSSL_EVP_PKEY_free");
+    if (key != NULL) {
+        if (wc_LockMutex(&key->refMutex) != 0) {
+            WOLFSSL_MSG("Couldn't lock pkey mutex");
+        }
+
+        /* only free if all references to it are done */
+        key->references--;
+        if (key->references == 0) {
+            doFree = 1;
+        }
+        wc_UnLockMutex(&key->refMutex);
+
+        if (doFree) {
+            wc_FreeRng(&key->rng);
+
+            if (key->pkey.ptr != NULL) {
+                XFREE(key->pkey.ptr, key->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+                key->pkey.ptr = NULL;
+            }
+            switch(key->type)
+            {
+                #ifndef NO_RSA
+                case EVP_PKEY_RSA:
+                    if (key->rsa != NULL && key->ownRsa == 1) {
+                        wolfSSL_RSA_free(key->rsa);
+                        key->rsa = NULL;
+                    }
+                    break;
+                #endif /* NO_RSA */
+
+                #if defined(HAVE_ECC) && defined(OPENSSL_EXTRA)
+                case EVP_PKEY_EC:
+                    if (key->ecc != NULL && key->ownEcc == 1) {
+                        wolfSSL_EC_KEY_free(key->ecc);
+                        key->ecc = NULL;
+                    }
+                    break;
+                #endif /* HAVE_ECC && OPENSSL_EXTRA */
+
+                #ifndef NO_DSA
+                case EVP_PKEY_DSA:
+                    if (key->dsa != NULL && key->ownDsa == 1) {
+                        wolfSSL_DSA_free(key->dsa);
+                        key->dsa = NULL;
+                    }
+                    break;
+                #endif /* NO_DSA */
+
+                #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+                case EVP_PKEY_DH:
+                    if (key->dh != NULL && key->ownDh == 1) {
+                        wolfSSL_DH_free(key->dh);
+                        key->dh = NULL;
+                    }
+                    break;
+                #endif /* ! NO_DH ... */
+
+                default:
+                break;
+            }
+
+            if (wc_FreeMutex(&key->refMutex) != 0) {
+                WOLFSSL_MSG("Couldn't free pkey mutex");
+            }
+            XFREE(key, key->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+        }
+    }
+}
+
+#endif /* OPENSSL_EXTRA_X509_SMALL */
+
+#endif /* WOLFSSL_EVP_INCLUDED */
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/fe_448.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,2459 @@
+/* fe_448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
+ * Small implementation based on Daniel Beer's curve25519 public domain work.
+ * Reworked for curve448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if defined(HAVE_CURVE448) || defined(HAVE_ED448)
+
+#include <wolfssl/wolfcrypt/fe_448.h>
+#include <stdint.h>
+
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(CURVE448_SMALL) || defined(ED448_SMALL)
+
+/* Initialize the field element operations.
+ */
+void fe448_init(void)
+{
+}
+
+/* Normalize the field element.
+ * Ensure result is in range: 0..2^448-2^224-2
+ *
+ * a  [in]  Field element in range 0..2^448-1.
+ */
+void fe448_norm(uint8_t* a)
+{
+    int i;
+    int16_t c = 0;
+    int16_t o = 0;
+
+    for (i = 0; i < 56; i++) {
+        c += a[i];
+        if ((i == 0) || (i == 28))
+            c += 1;
+        c >>= 8;
+    }
+
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) o += c;
+        o += a[i];
+        a[i] = (uint8_t)o;
+        o >>= 8;
+    }
+}
+
+/* Copy one field element into another: d = a.
+ *
+ * d  [in]  Destination field element.
+ * a  [in]  Source field element.
+ */
+void fe448_copy(uint8_t* d, const uint8_t* a)
+{
+    int i;
+    for (i = 0; i < 56; i++) {
+         d[i] = a[i];
+    }
+}
+
+/* Conditionally swap the elements.
+ * Constant time implementation.
+ *
+ * a  [in]  First field element.
+ * b  [in]  Second field element.
+ * c  [in]  Swap when 1. Valid values: 0, 1.
+ */
+static void fe448_cswap(uint8_t* a, uint8_t* b, int c)
+{
+    int i;
+    uint8_t mask = -(uint8_t)c;
+    uint8_t t[56];
+
+    for (i = 0; i < 56; i++)
+        t[i] = (a[i] ^ b[i]) & mask;
+    for (i = 0; i < 56; i++)
+        a[i] ^= t[i];
+    for (i = 0; i < 56; i++)
+        b[i] ^= t[i];
+}
+
+/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold sum.
+ * a  [in]  Field element to add.
+ * b  [in]  Field element to add.
+ */
+void fe448_add(uint8_t* r, const uint8_t* a, const uint8_t* b)
+{
+    int i;
+    int16_t c = 0;
+    int16_t o = 0;
+
+    for (i = 0; i < 56; i++) {
+        c += a[i];
+        c += b[i];
+        r[i] = (uint8_t)c;
+        c >>= 8;
+    }
+
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) o += c;
+        o += r[i];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+}
+
+/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold difference.
+ * a  [in]  Field element to subtract from.
+ * b  [in]  Field element to subtract.
+ */
+void fe448_sub(uint8_t* r, const uint8_t* a, const uint8_t* b)
+{
+    int i;
+    int16_t c = 0;
+    int16_t o = 0;
+
+    for (i = 0; i < 56; i++) {
+        if (i == 28)
+            c += 0x1fc;
+        else
+            c += 0x1fe;
+        c += a[i];
+        c -= b[i];
+        r[i] = (uint8_t)c;
+        c >>= 8;
+    }
+
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) o += c;
+        o += r[i];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+}
+
+/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ */
+void fe448_mul39081(uint8_t* r, const uint8_t* a)
+{
+    int i;
+    int32_t c = 0;
+    int32_t o = 0;
+
+    for (i = 0; i < 56; i++) {
+        c += a[i] * (int32_t)39081;
+        r[i] = (uint8_t)c;
+        c >>= 8;
+    }
+
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) o += c;
+        o += r[i];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+}
+
+/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ * b  [in]  Field element to multiply.
+ */
+void fe448_mul(uint8_t* r, const uint8_t* a, const uint8_t* b)
+{
+    int i, k;
+    int32_t c = 0;
+    int16_t o = 0, cc = 0;
+    uint8_t t[112];
+
+    for (k = 0; k < 56; k++) {
+        i = 0;
+        for (; i <= k; i++) {
+            c += (int32_t)a[i] * b[k - i];
+        }
+        t[k] = (uint8_t)c;
+        c >>= 8;
+    }
+    for (; k < 111; k++) {
+        i = k - 55;
+        for (; i < 56; i++) {
+            c += (int32_t)a[i] * b[k - i];
+        }
+        t[k] = (uint8_t)c;
+        c >>= 8;
+    }
+    t[k] = (uint8_t)c;
+
+    for (i = 0; i < 28; i++) {
+        o += t[i];
+        o += t[i + 56];
+        o += t[i + 84];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+    for (i = 28; i < 56; i++) {
+        o += t[i];
+        o += t[i + 56];
+        o += t[i + 28];
+        o += t[i + 56];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) cc += o;
+        cc += r[i];
+        r[i] = (uint8_t)cc;
+        cc >>= 8;
+    }
+}
+
+/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to square.
+ */
+void fe448_sqr(uint8_t* r, const uint8_t* a)
+{
+    int i, k;
+    int32_t c = 0;
+    int32_t p;
+    int16_t o = 0, cc = 0;
+    uint8_t t[112];
+
+    for (k = 0; k < 56; k++) {
+        i = 0;
+        for (; i <= k; i++) {
+            if (k - i < i)
+                break;
+            p = (int32_t)a[i] * a[k - i];
+            if (k - i != i)
+                p *= 2;
+            c += p;
+        }
+        t[k] = (uint8_t)c;
+        c >>= 8;
+    }
+    for (; k < 111; k++) {
+         i = k - 55;
+        for (; i < 56; i++) {
+            if (k - i < i)
+                break;
+            p = (int32_t)a[i] * a[k - i];
+            if (k - i != i)
+                p *= 2;
+            c += p;
+        }
+        t[k] = (uint8_t)c;
+        c >>= 8;
+    }
+    t[k] = (uint8_t)c;
+
+    for (i = 0; i < 28; i++) {
+        o += t[i];
+        o += t[i + 56];
+        o += t[i + 84];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+    for (i = 28; i < 56; i++) {
+        o += t[i];
+        o += t[i + 56];
+        o += t[i + 28];
+        o += t[i + 56];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) cc += o;
+        cc += r[i];
+        r[i] = (uint8_t)cc;
+        cc >>= 8;
+    }
+    fe448_norm(r);
+}
+
+/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
+ * Constant time implementation - using Fermat's little theorem:
+ *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
+ * For curve448: p - 2 = 2^448 - 2^224 - 3
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to invert.
+ */
+void fe448_invert(uint8_t* r, const uint8_t* a)
+{
+    int i;
+    uint8_t t[56];
+
+    fe448_sqr(t, a);
+    fe448_mul(t, t, a);
+    for (i = 0; i < 221; i++) {
+        fe448_sqr(t, t);
+        fe448_mul(t, t, a);
+    }
+    fe448_sqr(t, t);
+    for (i = 0; i < 222; i++) {
+        fe448_sqr(t, t);
+        fe448_mul(t, t, a);
+    }
+    fe448_sqr(t, t);
+    fe448_sqr(t, t);
+    fe448_mul(r, t, a);
+}
+
+/* Scalar multiply the point by a number. r = n.a
+ * Uses Montogmery ladder and only requires the x-ordinate.
+ *
+ * r  [in]  Field element to hold result.
+ * n  [in]  Scalar as an array of bytes.
+ * a  [in]  Point to multiply - x-ordinate only.
+ */
+int curve448(byte* r, const byte* n, const byte* a)
+{
+    uint8_t x1[56];
+    uint8_t x2[56] = {1};
+    uint8_t z2[56] = {0};
+    uint8_t x3[56];
+    uint8_t z3[56] = {1};
+    uint8_t t0[56];
+    uint8_t t1[56];
+    int i;
+    unsigned int swap;
+    unsigned int b;
+
+    fe448_copy(x1, a);
+    fe448_copy(x3, a);
+
+    swap = 0;
+    for (i = 447; i >= 0; --i) {
+        b = (n[i >> 3] >> (i & 7)) & 1;
+        swap ^= b;
+        fe448_cswap(x2, x3, swap);
+        fe448_cswap(z2, z3, swap);
+        swap = b;
+
+        /* Montgomery Ladder - double and add */
+        fe448_add(t0, x2, z2);
+        fe448_add(t1, x3, z3);
+        fe448_sub(x2, x2, z2);
+        fe448_sub(x3, x3, z3);
+        fe448_mul(t1, t1, x2);
+        fe448_mul(z3, x3, t0);
+        fe448_sqr(t0, t0);
+        fe448_sqr(x2, x2);
+        fe448_add(x3, z3, t1);
+        fe448_sqr(x3, x3);
+        fe448_sub(z3, z3, t1);
+        fe448_sqr(z3, z3);
+        fe448_mul(z3, z3, x1);
+        fe448_sub(t1, t0, x2);
+        fe448_mul(x2, t0, x2);
+        fe448_mul39081(z2, t1);
+        fe448_add(z2, t0, z2);
+        fe448_mul(z2, z2, t1);
+    }
+    fe448_cswap(x2, x3, swap);
+    fe448_cswap(z2, z3, swap);
+
+    fe448_invert(z2, z2);
+    fe448_mul(r, x2, z2);
+    fe448_norm(r);
+
+    return 0;
+}
+
+#ifdef HAVE_ED448
+/* Check whether field element is not 0.
+ * Field element must have been normalized before call.
+ *
+ * a  [in]  Field element.
+ * returns 0 when zero, and any other value otherwise.
+ */
+int fe448_isnonzero(const uint8_t* a)
+{
+    int i;
+    uint8_t c = 0;
+    for (i = 0; i < 56; i++)
+        c |= a[i];
+    return c;
+}
+
+/* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
+ * Add 0x200 to each element and subtract 2 from next.
+ * Top element overflow handled by subtracting 2 from index 0 and 28.
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element.
+ */
+void fe448_neg(uint8_t* r, const uint8_t* a)
+{
+    int i;
+    int16_t c = 0;
+    int16_t o = 0;
+
+    for (i = 0; i < 56; i++) {
+        if (i == 28)
+            c += 0x1fc;
+        else
+            c += 0x1fe;
+        c -= a[i];
+        r[i] = (uint8_t)c;
+        c >>= 8;
+    }
+
+    for (i = 0; i < 56; i++) {
+        if ((i == 0) || (i == 28)) o += c;
+        o += r[i];
+        r[i] = (uint8_t)o;
+        o >>= 8;
+    }
+}
+
+/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
+ * Used for calcualting y-ordinate from x-ordinate for Ed448.
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to exponentiate.
+ */
+void fe448_pow_2_446_222_1(uint8_t* r, const uint8_t* a)
+{
+    int i;
+    uint8_t t[56];
+
+    fe448_sqr(t, a);
+    fe448_mul(t, t, a);
+    for (i = 0; i < 221; i++) {
+        fe448_sqr(t, t);
+        fe448_mul(t, t, a);
+    }
+    fe448_sqr(t, t);
+    for (i = 0; i < 221; i++) {
+        fe448_sqr(t, t);
+        fe448_mul(t, t, a);
+    }
+    fe448_sqr(t, t);
+    fe448_mul(r, t, a);
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a  A field element.
+ * b  A field element.
+ * c  If 1 then copy and if 0 then don't copy.
+ */
+void fe448_cmov(uint8_t* a, const uint8_t* b, int c)
+{
+    int i;
+    uint8_t m = -(uint8_t)c;
+    uint8_t t[56];
+
+    for (i = 0; i < 56; i++)
+        t[i] = m & (a[i] ^ b[i]);
+    for (i = 0; i < 56; i++)
+        a[i] ^= t[i];
+}
+
+#endif /* HAVE_ED448 */
+#elif defined(CURVED448_128BIT)
+
+/* Initialize the field element operations.
+ */
+void fe448_init(void)
+{
+}
+
+/* Convert the field element from a byte array to an array of 56-bits.
+ *
+ * r  [in]  Array to encode into.
+ * b  [in]  Byte array.
+ */
+void fe448_from_bytes(int64_t* r, const unsigned char* b)
+{
+    r[ 0] =  ((int64_t) (b[ 0]) <<  0)
+          |  ((int64_t) (b[ 1]) <<  8)
+          |  ((int64_t) (b[ 2]) << 16)
+          |  ((int64_t) (b[ 3]) << 24)
+          |  ((int64_t) (b[ 4]) << 32)
+          |  ((int64_t) (b[ 5]) << 40)
+          |  ((int64_t) (b[ 6]) << 48);
+    r[ 1] =  ((int64_t) (b[ 7]) <<  0)
+          |  ((int64_t) (b[ 8]) <<  8)
+          |  ((int64_t) (b[ 9]) << 16)
+          |  ((int64_t) (b[10]) << 24)
+          |  ((int64_t) (b[11]) << 32)
+          |  ((int64_t) (b[12]) << 40)
+          |  ((int64_t) (b[13]) << 48);
+    r[ 2] =  ((int64_t) (b[14]) <<  0)
+          |  ((int64_t) (b[15]) <<  8)
+          |  ((int64_t) (b[16]) << 16)
+          |  ((int64_t) (b[17]) << 24)
+          |  ((int64_t) (b[18]) << 32)
+          |  ((int64_t) (b[19]) << 40)
+          |  ((int64_t) (b[20]) << 48);
+    r[ 3] =  ((int64_t) (b[21]) <<  0)
+          |  ((int64_t) (b[22]) <<  8)
+          |  ((int64_t) (b[23]) << 16)
+          |  ((int64_t) (b[24]) << 24)
+          |  ((int64_t) (b[25]) << 32)
+          |  ((int64_t) (b[26]) << 40)
+          |  ((int64_t) (b[27]) << 48);
+    r[ 4] =  ((int64_t) (b[28]) <<  0)
+          |  ((int64_t) (b[29]) <<  8)
+          |  ((int64_t) (b[30]) << 16)
+          |  ((int64_t) (b[31]) << 24)
+          |  ((int64_t) (b[32]) << 32)
+          |  ((int64_t) (b[33]) << 40)
+          |  ((int64_t) (b[34]) << 48);
+    r[ 5] =  ((int64_t) (b[35]) <<  0)
+          |  ((int64_t) (b[36]) <<  8)
+          |  ((int64_t) (b[37]) << 16)
+          |  ((int64_t) (b[38]) << 24)
+          |  ((int64_t) (b[39]) << 32)
+          |  ((int64_t) (b[40]) << 40)
+          |  ((int64_t) (b[41]) << 48);
+    r[ 6] =  ((int64_t) (b[42]) <<  0)
+          |  ((int64_t) (b[43]) <<  8)
+          |  ((int64_t) (b[44]) << 16)
+          |  ((int64_t) (b[45]) << 24)
+          |  ((int64_t) (b[46]) << 32)
+          |  ((int64_t) (b[47]) << 40)
+          |  ((int64_t) (b[48]) << 48);
+    r[ 7] =  ((int64_t) (b[49]) <<  0)
+          |  ((int64_t) (b[50]) <<  8)
+          |  ((int64_t) (b[51]) << 16)
+          |  ((int64_t) (b[52]) << 24)
+          |  ((int64_t) (b[53]) << 32)
+          |  ((int64_t) (b[54]) << 40)
+          |  ((int64_t) (b[55]) << 48);
+}
+
+/* Convert the field element to a byte array from an array of 56-bits.
+ *
+ * b  [in]  Byte array.
+ * a  [in]  Array to encode into.
+ */
+void fe448_to_bytes(unsigned char* b, const int64_t* a)
+{
+    int128_t t;
+    /* Mod */
+    int64_t in0 = a[0];
+    int64_t in1 = a[1];
+    int64_t in2 = a[2];
+    int64_t in3 = a[3];
+    int64_t in4 = a[4];
+    int64_t in5 = a[5];
+    int64_t in6 = a[6];
+    int64_t in7 = a[7];
+    int64_t o = in7 >> 56;
+    in7 -= o << 56;
+    in0 += o;
+    in4 += o;
+    o = (in0 + 1) >> 56;
+    o = (o + in1) >> 56;
+    o = (o + in2) >> 56;
+    o = (o + in3) >> 56;
+    o = (o + in4 + 1) >> 56;
+    o = (o + in5) >> 56;
+    o = (o + in6) >> 56;
+    o = (o + in7) >> 56;
+    in0 += o;
+    in4 += o;
+    in7 -= o << 56;
+    o = in0  >> 56; in1  += o; t = o << 56; in0  -= t;
+    o = in1  >> 56; in2  += o; t = o << 56; in1  -= t;
+    o = in2  >> 56; in3  += o; t = o << 56; in2  -= t;
+    o = in3  >> 56; in4  += o; t = o << 56; in3  -= t;
+    o = in4  >> 56; in5  += o; t = o << 56; in4  -= t;
+    o = in5  >> 56; in6  += o; t = o << 56; in5  -= t;
+    o = in6  >> 56; in7  += o; t = o << 56; in6  -= t;
+    o = in7  >> 56; in0  += o;
+                    in4  += o; t = o << 56; in7  -= t;
+
+    /* Output as bytes */
+    b[ 0] = (in0  >>  0);
+    b[ 1] = (in0  >>  8);
+    b[ 2] = (in0  >> 16);
+    b[ 3] = (in0  >> 24);
+    b[ 4] = (in0  >> 32);
+    b[ 5] = (in0  >> 40);
+    b[ 6] = (in0  >> 48);
+    b[ 7] = (in1  >>  0);
+    b[ 8] = (in1  >>  8);
+    b[ 9] = (in1  >> 16);
+    b[10] = (in1  >> 24);
+    b[11] = (in1  >> 32);
+    b[12] = (in1  >> 40);
+    b[13] = (in1  >> 48);
+    b[14] = (in2  >>  0);
+    b[15] = (in2  >>  8);
+    b[16] = (in2  >> 16);
+    b[17] = (in2  >> 24);
+    b[18] = (in2  >> 32);
+    b[19] = (in2  >> 40);
+    b[20] = (in2  >> 48);
+    b[21] = (in3  >>  0);
+    b[22] = (in3  >>  8);
+    b[23] = (in3  >> 16);
+    b[24] = (in3  >> 24);
+    b[25] = (in3  >> 32);
+    b[26] = (in3  >> 40);
+    b[27] = (in3  >> 48);
+    b[28] = (in4  >>  0);
+    b[29] = (in4  >>  8);
+    b[30] = (in4  >> 16);
+    b[31] = (in4  >> 24);
+    b[32] = (in4  >> 32);
+    b[33] = (in4  >> 40);
+    b[34] = (in4  >> 48);
+    b[35] = (in5  >>  0);
+    b[36] = (in5  >>  8);
+    b[37] = (in5  >> 16);
+    b[38] = (in5  >> 24);
+    b[39] = (in5  >> 32);
+    b[40] = (in5  >> 40);
+    b[41] = (in5  >> 48);
+    b[42] = (in6  >>  0);
+    b[43] = (in6  >>  8);
+    b[44] = (in6  >> 16);
+    b[45] = (in6  >> 24);
+    b[46] = (in6  >> 32);
+    b[47] = (in6  >> 40);
+    b[48] = (in6  >> 48);
+    b[49] = (in7  >>  0);
+    b[50] = (in7  >>  8);
+    b[51] = (in7  >> 16);
+    b[52] = (in7  >> 24);
+    b[53] = (in7  >> 32);
+    b[54] = (in7  >> 40);
+    b[55] = (in7  >> 48);
+}
+
+/* Set the field element to 0.
+ *
+ * a  [in]  Field element.
+ */
+void fe448_1(int64_t* a)
+{
+    a[0] = 1;
+    a[1] = 0;
+    a[2] = 0;
+    a[3] = 0;
+    a[4] = 0;
+    a[5] = 0;
+    a[6] = 0;
+    a[7] = 0;
+}
+
+/* Set the field element to 0.
+ *
+ * a  [in]  Field element.
+ */
+void fe448_0(int64_t* a)
+{
+    a[0] = 0;
+    a[1] = 0;
+    a[2] = 0;
+    a[3] = 0;
+    a[4] = 0;
+    a[5] = 0;
+    a[6] = 0;
+    a[7] = 0;
+}
+
+/* Copy one field element into another: d = a.
+ *
+ * d  [in]  Destination field element.
+ * a  [in]  Source field element.
+ */
+void fe448_copy(int64_t* d, const int64_t* a)
+{
+    d[0] = a[0];
+    d[1] = a[1];
+    d[2] = a[2];
+    d[3] = a[3];
+    d[4] = a[4];
+    d[5] = a[5];
+    d[6] = a[6];
+    d[7] = a[7];
+}
+
+/* Conditionally swap the elements.
+ * Constant time implementation.
+ *
+ * a  [in]  First field element.
+ * b  [in]  Second field element.
+ * c  [in]  Swap when 1. Valid values: 0, 1.
+ */
+static void fe448_cswap(int64_t* a, int64_t* b, int c)
+{
+    int64_t mask = -(int64_t)c;
+    int64_t t0 = (a[0] ^ b[0]) & mask;
+    int64_t t1 = (a[1] ^ b[1]) & mask;
+    int64_t t2 = (a[2] ^ b[2]) & mask;
+    int64_t t3 = (a[3] ^ b[3]) & mask;
+    int64_t t4 = (a[4] ^ b[4]) & mask;
+    int64_t t5 = (a[5] ^ b[5]) & mask;
+    int64_t t6 = (a[6] ^ b[6]) & mask;
+    int64_t t7 = (a[7] ^ b[7]) & mask;
+    a[0] ^= t0;
+    a[1] ^= t1;
+    a[2] ^= t2;
+    a[3] ^= t3;
+    a[4] ^= t4;
+    a[5] ^= t5;
+    a[6] ^= t6;
+    a[7] ^= t7;
+    b[0] ^= t0;
+    b[1] ^= t1;
+    b[2] ^= t2;
+    b[3] ^= t3;
+    b[4] ^= t4;
+    b[5] ^= t5;
+    b[6] ^= t6;
+    b[7] ^= t7;
+}
+
+/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold sum.
+ * a  [in]  Field element to add.
+ * b  [in]  Field element to add.
+ */
+void fe448_add(int64_t* r, const int64_t* a, const int64_t* b)
+{
+    r[0] = a[0] + b[0];
+    r[1] = a[1] + b[1];
+    r[2] = a[2] + b[2];
+    r[3] = a[3] + b[3];
+    r[4] = a[4] + b[4];
+    r[5] = a[5] + b[5];
+    r[6] = a[6] + b[6];
+    r[7] = a[7] + b[7];
+}
+
+/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold difference.
+ * a  [in]  Field element to subtract from.
+ * b  [in]  Field element to subtract.
+ */
+void fe448_sub(int64_t* r, const int64_t* a, const int64_t* b)
+{
+    r[0] = a[0] - b[0];
+    r[1] = a[1] - b[1];
+    r[2] = a[2] - b[2];
+    r[3] = a[3] - b[3];
+    r[4] = a[4] - b[4];
+    r[5] = a[5] - b[5];
+    r[6] = a[6] - b[6];
+    r[7] = a[7] - b[7];
+}
+
+/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ */
+void fe448_mul39081(int64_t* r, const int64_t* a)
+{
+    int128_t t;
+    int64_t o;
+    int128_t t0 = a[0] * (int128_t)39081;
+    int128_t t1 = a[1] * (int128_t)39081;
+    int128_t t2 = a[2] * (int128_t)39081;
+    int128_t t3 = a[3] * (int128_t)39081;
+    int128_t t4 = a[4] * (int128_t)39081;
+    int128_t t5 = a[5] * (int128_t)39081;
+    int128_t t6 = a[6] * (int128_t)39081;
+    int128_t t7 = a[7] * (int128_t)39081;
+    o = t0  >> 56; t1  += o; t = (int128_t)o << 56; t0  -= t;
+    o = t1  >> 56; t2  += o; t = (int128_t)o << 56; t1  -= t;
+    o = t2  >> 56; t3  += o; t = (int128_t)o << 56; t2  -= t;
+    o = t3  >> 56; t4  += o; t = (int128_t)o << 56; t3  -= t;
+    o = t4  >> 56; t5  += o; t = (int128_t)o << 56; t4  -= t;
+    o = t5  >> 56; t6  += o; t = (int128_t)o << 56; t5  -= t;
+    o = t6  >> 56; t7  += o; t = (int128_t)o << 56; t6  -= t;
+    o = t7  >> 56; t0  += o;
+                   t4  += o; t = (int128_t)o << 56; t7  -= t;
+
+    /* Store */
+    r[0] = t0;
+    r[1] = t1;
+    r[2] = t2;
+    r[3] = t3;
+    r[4] = t4;
+    r[5] = t5;
+    r[6] = t6;
+    r[7] = t7;
+}
+
+/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ * b  [in]  Field element to multiply.
+ */
+void fe448_mul(int64_t* r, const int64_t* a, const int64_t* b)
+{
+    int128_t t;
+    int64_t o;
+    int128_t t0   = (int128_t)a[ 0] * b[ 0];
+    int128_t t1   = (int128_t)a[ 0] * b[ 1];
+    int128_t t101 = (int128_t)a[ 1] * b[ 0];
+    int128_t t2   = (int128_t)a[ 0] * b[ 2];
+    int128_t t102 = (int128_t)a[ 1] * b[ 1];
+    int128_t t202 = (int128_t)a[ 2] * b[ 0];
+    int128_t t3   = (int128_t)a[ 0] * b[ 3];
+    int128_t t103 = (int128_t)a[ 1] * b[ 2];
+    int128_t t203 = (int128_t)a[ 2] * b[ 1];
+    int128_t t303 = (int128_t)a[ 3] * b[ 0];
+    int128_t t4   = (int128_t)a[ 0] * b[ 4];
+    int128_t t104 = (int128_t)a[ 1] * b[ 3];
+    int128_t t204 = (int128_t)a[ 2] * b[ 2];
+    int128_t t304 = (int128_t)a[ 3] * b[ 1];
+    int128_t t404 = (int128_t)a[ 4] * b[ 0];
+    int128_t t5   = (int128_t)a[ 0] * b[ 5];
+    int128_t t105 = (int128_t)a[ 1] * b[ 4];
+    int128_t t205 = (int128_t)a[ 2] * b[ 3];
+    int128_t t305 = (int128_t)a[ 3] * b[ 2];
+    int128_t t405 = (int128_t)a[ 4] * b[ 1];
+    int128_t t505 = (int128_t)a[ 5] * b[ 0];
+    int128_t t6   = (int128_t)a[ 0] * b[ 6];
+    int128_t t106 = (int128_t)a[ 1] * b[ 5];
+    int128_t t206 = (int128_t)a[ 2] * b[ 4];
+    int128_t t306 = (int128_t)a[ 3] * b[ 3];
+    int128_t t406 = (int128_t)a[ 4] * b[ 2];
+    int128_t t506 = (int128_t)a[ 5] * b[ 1];
+    int128_t t606 = (int128_t)a[ 6] * b[ 0];
+    int128_t t7   = (int128_t)a[ 0] * b[ 7];
+    int128_t t107 = (int128_t)a[ 1] * b[ 6];
+    int128_t t207 = (int128_t)a[ 2] * b[ 5];
+    int128_t t307 = (int128_t)a[ 3] * b[ 4];
+    int128_t t407 = (int128_t)a[ 4] * b[ 3];
+    int128_t t507 = (int128_t)a[ 5] * b[ 2];
+    int128_t t607 = (int128_t)a[ 6] * b[ 1];
+    int128_t t707 = (int128_t)a[ 7] * b[ 0];
+    int128_t t8   = (int128_t)a[ 1] * b[ 7];
+    int128_t t108 = (int128_t)a[ 2] * b[ 6];
+    int128_t t208 = (int128_t)a[ 3] * b[ 5];
+    int128_t t308 = (int128_t)a[ 4] * b[ 4];
+    int128_t t408 = (int128_t)a[ 5] * b[ 3];
+    int128_t t508 = (int128_t)a[ 6] * b[ 2];
+    int128_t t608 = (int128_t)a[ 7] * b[ 1];
+    int128_t t9   = (int128_t)a[ 2] * b[ 7];
+    int128_t t109 = (int128_t)a[ 3] * b[ 6];
+    int128_t t209 = (int128_t)a[ 4] * b[ 5];
+    int128_t t309 = (int128_t)a[ 5] * b[ 4];
+    int128_t t409 = (int128_t)a[ 6] * b[ 3];
+    int128_t t509 = (int128_t)a[ 7] * b[ 2];
+    int128_t t10  = (int128_t)a[ 3] * b[ 7];
+    int128_t t110 = (int128_t)a[ 4] * b[ 6];
+    int128_t t210 = (int128_t)a[ 5] * b[ 5];
+    int128_t t310 = (int128_t)a[ 6] * b[ 4];
+    int128_t t410 = (int128_t)a[ 7] * b[ 3];
+    int128_t t11  = (int128_t)a[ 4] * b[ 7];
+    int128_t t111 = (int128_t)a[ 5] * b[ 6];
+    int128_t t211 = (int128_t)a[ 6] * b[ 5];
+    int128_t t311 = (int128_t)a[ 7] * b[ 4];
+    int128_t t12  = (int128_t)a[ 5] * b[ 7];
+    int128_t t112 = (int128_t)a[ 6] * b[ 6];
+    int128_t t212 = (int128_t)a[ 7] * b[ 5];
+    int128_t t13  = (int128_t)a[ 6] * b[ 7];
+    int128_t t113 = (int128_t)a[ 7] * b[ 6];
+    int128_t t14  = (int128_t)a[ 7] * b[ 7];
+    t1  += t101;
+    t2  += t102; t2  += t202;
+    t3  += t103; t3  += t203; t3  += t303;
+    t4  += t104; t4  += t204; t4  += t304; t4  += t404;
+    t5  += t105; t5  += t205; t5  += t305; t5  += t405; t5  += t505;
+    t6  += t106; t6  += t206; t6  += t306; t6  += t406; t6  += t506;
+    t6  += t606;
+    t7  += t107; t7  += t207; t7  += t307; t7  += t407; t7  += t507;
+    t7  += t607;
+    t7  += t707;
+    t8  += t108; t8  += t208; t8  += t308; t8  += t408; t8  += t508;
+    t8  += t608;
+    t9  += t109; t9  += t209; t9  += t309; t9  += t409; t9  += t509;
+    t10 += t110; t10 += t210; t10 += t310; t10 += t410;
+    t11 += t111; t11 += t211; t11 += t311;
+    t12 += t112; t12 += t212;
+    t13 += t113;
+
+    /* Reduce */
+    t0  += t8  + t12;
+    t1  += t9  + t13;
+    t2  += t10 + t14;
+    t3  += t11;
+    t4  += t12 + t8  + t12;
+    t5  += t13 + t9  + t13;
+    t6  += t14 + t10 + t14;
+    t7  +=       t11;
+    o = t7  >> 56; t0  += o;
+                   t4  += o; t = (int128_t)o << 56; t7  -= t;
+    o = t0  >> 56; t1  += o; t = (int128_t)o << 56; t0  -= t;
+    o = t1  >> 56; t2  += o; t = (int128_t)o << 56; t1  -= t;
+    o = t2  >> 56; t3  += o; t = (int128_t)o << 56; t2  -= t;
+    o = t3  >> 56; t4  += o; t = (int128_t)o << 56; t3  -= t;
+    o = t4  >> 56; t5  += o; t = (int128_t)o << 56; t4  -= t;
+    o = t5  >> 56; t6  += o; t = (int128_t)o << 56; t5  -= t;
+    o = t6  >> 56; t7  += o; t = (int128_t)o << 56; t6  -= t;
+    o = t7  >> 56; t0  += o;
+                   t4  += o; t = (int128_t)o << 56; t7  -= t;
+
+    /* Store */
+    r[0] = t0;
+    r[1] = t1;
+    r[2] = t2;
+    r[3] = t3;
+    r[4] = t4;
+    r[5] = t5;
+    r[6] = t6;
+    r[7] = t7;
+}
+
+/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to square.
+ */
+void fe448_sqr(int64_t* r, const int64_t* a)
+{
+    int128_t t;
+    int64_t o;
+    int128_t t0   =     (int128_t)a[ 0] * a[ 0];
+    int128_t t1   = 2 * (int128_t)a[ 0] * a[ 1];
+    int128_t t2   = 2 * (int128_t)a[ 0] * a[ 2];
+    int128_t t102 =     (int128_t)a[ 1] * a[ 1];
+    int128_t t3   = 2 * (int128_t)a[ 0] * a[ 3];
+    int128_t t103 = 2 * (int128_t)a[ 1] * a[ 2];
+    int128_t t4   = 2 * (int128_t)a[ 0] * a[ 4];
+    int128_t t104 = 2 * (int128_t)a[ 1] * a[ 3];
+    int128_t t204 =     (int128_t)a[ 2] * a[ 2];
+    int128_t t5   = 2 * (int128_t)a[ 0] * a[ 5];
+    int128_t t105 = 2 * (int128_t)a[ 1] * a[ 4];
+    int128_t t205 = 2 * (int128_t)a[ 2] * a[ 3];
+    int128_t t6   = 2 * (int128_t)a[ 0] * a[ 6];
+    int128_t t106 = 2 * (int128_t)a[ 1] * a[ 5];
+    int128_t t206 = 2 * (int128_t)a[ 2] * a[ 4];
+    int128_t t306 =     (int128_t)a[ 3] * a[ 3];
+    int128_t t7   = 2 * (int128_t)a[ 0] * a[ 7];
+    int128_t t107 = 2 * (int128_t)a[ 1] * a[ 6];
+    int128_t t207 = 2 * (int128_t)a[ 2] * a[ 5];
+    int128_t t307 = 2 * (int128_t)a[ 3] * a[ 4];
+    int128_t t8   = 2 * (int128_t)a[ 1] * a[ 7];
+    int128_t t108 = 2 * (int128_t)a[ 2] * a[ 6];
+    int128_t t208 = 2 * (int128_t)a[ 3] * a[ 5];
+    int128_t t308 =     (int128_t)a[ 4] * a[ 4];
+    int128_t t9   = 2 * (int128_t)a[ 2] * a[ 7];
+    int128_t t109 = 2 * (int128_t)a[ 3] * a[ 6];
+    int128_t t209 = 2 * (int128_t)a[ 4] * a[ 5];
+    int128_t t10  = 2 * (int128_t)a[ 3] * a[ 7];
+    int128_t t110 = 2 * (int128_t)a[ 4] * a[ 6];
+    int128_t t210 =     (int128_t)a[ 5] * a[ 5];
+    int128_t t11  = 2 * (int128_t)a[ 4] * a[ 7];
+    int128_t t111 = 2 * (int128_t)a[ 5] * a[ 6];
+    int128_t t12  = 2 * (int128_t)a[ 5] * a[ 7];
+    int128_t t112 =     (int128_t)a[ 6] * a[ 6];
+    int128_t t13  = 2 * (int128_t)a[ 6] * a[ 7];
+    int128_t t14  =     (int128_t)a[ 7] * a[ 7];
+    t2  += t102;
+    t3  += t103;
+    t4  += t104; t4  += t204;
+    t5  += t105; t5  += t205;
+    t6  += t106; t6  += t206; t6  += t306;
+    t7  += t107; t7  += t207; t7  += t307;
+    t8  += t108; t8  += t208; t8  += t308;
+    t9  += t109; t9  += t209;
+    t10 += t110; t10 += t210;
+    t11 += t111;
+    t12 += t112;
+
+    /* Reduce */
+    t0  += t8  + t12;
+    t1  += t9  + t13;
+    t2  += t10 + t14;
+    t3  += t11;
+    t4  += t12 + t8  + t12;
+    t5  += t13 + t9  + t13;
+    t6  += t14 + t10 + t14;
+    t7  +=       t11;
+    o = t7  >> 56; t0  += o;
+                   t4  += o; t = (int128_t)o << 56; t7  -= t;
+    o = t0  >> 56; t1  += o; t = (int128_t)o << 56; t0  -= t;
+    o = t1  >> 56; t2  += o; t = (int128_t)o << 56; t1  -= t;
+    o = t2  >> 56; t3  += o; t = (int128_t)o << 56; t2  -= t;
+    o = t3  >> 56; t4  += o; t = (int128_t)o << 56; t3  -= t;
+    o = t4  >> 56; t5  += o; t = (int128_t)o << 56; t4  -= t;
+    o = t5  >> 56; t6  += o; t = (int128_t)o << 56; t5  -= t;
+    o = t6  >> 56; t7  += o; t = (int128_t)o << 56; t6  -= t;
+    o = t7  >> 56; t0  += o;
+                   t4  += o; t = (int128_t)o << 56; t7  -= t;
+
+    /* Store */
+    r[0] = t0;
+    r[1] = t1;
+    r[2] = t2;
+    r[3] = t3;
+    r[4] = t4;
+    r[5] = t5;
+    r[6] = t6;
+    r[7] = t7;
+}
+
+/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
+ * Constant time implementation - using Fermat's little theorem:
+ *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
+ * For curve448: p - 2 = 2^448 - 2^224 - 3
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to invert.
+ */
+void fe448_invert(int64_t* r, const int64_t* a)
+{
+    int64_t t1[8];
+    int64_t t2[8];
+    int64_t t3[8];
+    int64_t t4[8];
+    int i;
+
+    fe448_sqr(t1, a);
+    /* t1 = 2 */
+    fe448_mul(t1, t1, a);
+    /* t1 = 3 */
+    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+    /* t2 = c */
+    fe448_mul(t3, t2, a);
+    /* t3 = d */
+    fe448_mul(t1, t2, t1);
+    /* t1 = f */
+    fe448_sqr(t2, t1);
+    /* t2 = 1e */
+    fe448_mul(t4, t2, a);
+    /* t4 = 1f */
+    fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3e0 */
+    fe448_mul(t1, t2, t4);
+    /* t1 = 3ff */
+    fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffc00 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = fffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+    /* t2 = 1ffffe0 */
+    fe448_mul(t1, t2, t4);
+    /* t1 = 1ffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3fffffe000000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = 3ffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+    /* t2 = 7fffffffffffe0 */
+    fe448_mul(t1, t2, t4);
+    /* t1 = 7fffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3fffffffffffff80000000000000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = 3fffffffffffffffffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
+    /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+    /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+    fe448_mul(t1, t3, a);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+    fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
+    fe448_mul(r, t3, t1);
+    /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+}
+
+/* Scalar multiply the point by a number. r = n.a
+ * Uses Montogmery ladder and only requires the x-ordinate.
+ *
+ * r  [in]  Field element to hold result.
+ * n  [in]  Scalar as an array of bytes.
+ * a  [in]  Point to multiply - x-ordinate only.
+ */
+int curve448(byte* r, const byte* n, const byte* a)
+{
+    int64_t x1[8];
+    int64_t x2[8];
+    int64_t z2[8];
+    int64_t x3[8];
+    int64_t z3[8];
+    int64_t t0[8];
+    int64_t t1[8];
+    int i;
+    unsigned int swap;
+    unsigned int b;
+
+    fe448_from_bytes(x1, a);
+    fe448_1(x2);
+    fe448_0(z2);
+    fe448_copy(x3, x1);
+    fe448_1(z3);
+
+    swap = 0;
+    for (i = 447; i >= 0; --i) {
+        b = (n[i >> 3] >> (i & 7)) & 1;
+        swap ^= b;
+        fe448_cswap(x2, x3, swap);
+        fe448_cswap(z2, z3, swap);
+        swap = b;
+
+        /* Montgomery Ladder - double and add */
+        fe448_add(t0, x2, z2);
+        fe448_reduce(t0);
+        fe448_add(t1, x3, z3);
+        fe448_reduce(t1);
+        fe448_sub(x2, x2, z2);
+        fe448_sub(x3, x3, z3);
+        fe448_mul(t1, t1, x2);
+        fe448_mul(z3, x3, t0);
+        fe448_sqr(t0, t0);
+        fe448_sqr(x2, x2);
+        fe448_add(x3, z3, t1);
+        fe448_reduce(x3);
+        fe448_sqr(x3, x3);
+        fe448_sub(z3, z3, t1);
+        fe448_sqr(z3, z3);
+        fe448_mul(z3, z3, x1);
+        fe448_sub(t1, t0, x2);
+        fe448_mul(x2, t0, x2);
+        fe448_mul39081(z2, t1);
+        fe448_add(z2, t0, z2);
+        fe448_mul(z2, z2, t1);
+    }
+    /* Last two bits are 0 - no final swap check required. */
+
+    fe448_invert(z2, z2);
+    fe448_mul(x2, x2, z2);
+    fe448_to_bytes(r, x2);
+
+    return 0;
+}
+
+#ifdef HAVE_ED448
+/* Check whether field element is not 0.
+ * Must convert to a normalized form before checking.
+ *
+ * a  [in]  Field element.
+ * returns 0 when zero, and any other value otherwise.
+ */
+int fe448_isnonzero(const int64_t* a)
+{
+    uint8_t b[56];
+    int i;
+    uint8_t c = 0;
+    fe448_to_bytes(b, a);
+    for (i = 0; i < 56; i++)
+        c |= b[i];
+    return c;
+}
+
+/* Check whether field element is negative.
+ * Must convert to a normalized form before checking.
+ *
+ * a  [in]  Field element.
+ * returns 1 when negative, and 0 otherwise.
+ */
+int fe448_isnegative(const int64_t* a)
+{
+    uint8_t b[56];
+    fe448_to_bytes(b, a);
+    return b[0] & 1;
+}
+
+/* Negates the field element. r = -a
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element.
+ */
+void fe448_neg(int64_t* r, const int64_t* a)
+{
+    r[0] = -a[0];
+    r[1] = -a[1];
+    r[2] = -a[2];
+    r[3] = -a[3];
+    r[4] = -a[4];
+    r[5] = -a[5];
+    r[6] = -a[6];
+    r[7] = -a[7];
+}
+
+/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
+ * Used for calcualting y-ordinate from x-ordinate for Ed448.
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to exponentiate.
+ */
+void fe448_pow_2_446_222_1(int64_t* r, const int64_t* a)
+{
+    int64_t t1[8];
+    int64_t t2[8];
+    int64_t t3[8];
+    int64_t t4[8];
+    int64_t t5[8];
+    int i;
+
+    fe448_sqr(t3, a);
+    /* t3 = 2 */
+    fe448_mul(t1, t3, a);
+    /* t1 = 3 */
+    fe448_sqr(t5, t1);
+    /* t5 = 6 */
+    fe448_mul(t5, t5, a);
+    /* t5 = 7 */
+    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+    /* t2 = c */
+    fe448_mul(t3, t2, t3);
+    /* t3 = e */
+    fe448_mul(t1, t2, t1);
+    /* t1 = f */
+    fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
+    /* t2 = 78 */
+    fe448_mul(t5, t2, t5);
+    /* t5 = 7f */
+    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+    /* t2 = f0 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = ff */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fe */
+    fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
+    /* t2 = 7f80 */
+    fe448_mul(t5, t2, t5);
+    /* t5 = 7fff */
+    fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
+    /* t2 = ff00 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = ffff */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffe */
+    fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3fff8000 */
+    fe448_mul(t5, t2, t5);
+    /* t5 = 3fffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffff0000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = ffffffff */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffffffe */
+    fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffffffff00000000 */
+    fe448_mul(t2, t2, t1);
+    /* t2 = ffffffffffffffff */
+    fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+    /* t1 = ffffffffffffffff0000000000000000 */
+    fe448_mul(t1, t1, t2);
+    /* t1 = ffffffffffffffffffffffffffffffff */
+    fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+    /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
+    fe448_mul(t4, t1, t2);
+    /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
+    fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+    fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
+    fe448_mul(t1, t1, t4);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
+    fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
+    /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
+    fe448_mul(r, t5, t1);
+    /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a  A field element.
+ * b  A field element.
+ * c  If 1 then copy and if 0 then don't copy.
+ */
+void fe448_cmov(int64_t* a, const int64_t* b, int c)
+{
+    int64_t m = -(int64_t)c;
+    int64_t t0 = m & (a[0] ^ b[0]);
+    int64_t t1 = m & (a[1] ^ b[1]);
+    int64_t t2 = m & (a[2] ^ b[2]);
+    int64_t t3 = m & (a[3] ^ b[3]);
+    int64_t t4 = m & (a[4] ^ b[4]);
+    int64_t t5 = m & (a[5] ^ b[5]);
+    int64_t t6 = m & (a[6] ^ b[6]);
+    int64_t t7 = m & (a[7] ^ b[7]);
+
+    a[0] ^= t0;
+    a[1] ^= t1;
+    a[2] ^= t2;
+    a[3] ^= t3;
+    a[4] ^= t4;
+    a[5] ^= t5;
+    a[6] ^= t6;
+    a[7] ^= t7;
+}
+
+#endif /* HAVE_ED448 */
+#else
+
+/* Initialize the field element operations.
+ */
+void fe448_init(void)
+{
+}
+
+/* Convert the field element from a byte array to an array of 28-bits.
+ *
+ * r  [in]  Array to encode into.
+ * b  [in]  Byte array.
+ */
+void fe448_from_bytes(int32_t* r, const unsigned char* b)
+{
+    r[ 0] =  (((int32_t)((b[ 0]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[ 1]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[ 2]        ) >>  0)) << 16)
+          | ((((int32_t)((b[ 3] & 0xf )) >>  0)) << 24);
+    r[ 1] =  (((int32_t)((b[ 3]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[ 4]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[ 5]        ) >>  0)) << 12)
+          |  (((int32_t)((b[ 6]        ) >>  0)) << 20);
+    r[ 2] =  (((int32_t)((b[ 7]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[ 8]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[ 9]        ) >>  0)) << 16)
+          | ((((int32_t)((b[10] & 0xf )) >>  0)) << 24);
+    r[ 3] =  (((int32_t)((b[10]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[11]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[12]        ) >>  0)) << 12)
+          |  (((int32_t)((b[13]        ) >>  0)) << 20);
+    r[ 4] =  (((int32_t)((b[14]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[15]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[16]        ) >>  0)) << 16)
+          | ((((int32_t)((b[17] & 0xf )) >>  0)) << 24);
+    r[ 5] =  (((int32_t)((b[17]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[18]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[19]        ) >>  0)) << 12)
+          |  (((int32_t)((b[20]        ) >>  0)) << 20);
+    r[ 6] =  (((int32_t)((b[21]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[22]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[23]        ) >>  0)) << 16)
+          | ((((int32_t)((b[24] & 0xf )) >>  0)) << 24);
+    r[ 7] =  (((int32_t)((b[24]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[25]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[26]        ) >>  0)) << 12)
+          |  (((int32_t)((b[27]        ) >>  0)) << 20);
+    r[ 8] =  (((int32_t)((b[28]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[29]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[30]        ) >>  0)) << 16)
+          | ((((int32_t)((b[31] & 0xf )) >>  0)) << 24);
+    r[ 9] =  (((int32_t)((b[31]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[32]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[33]        ) >>  0)) << 12)
+          |  (((int32_t)((b[34]        ) >>  0)) << 20);
+    r[10] =  (((int32_t)((b[35]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[36]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[37]        ) >>  0)) << 16)
+          | ((((int32_t)((b[38] & 0xf )) >>  0)) << 24);
+    r[11] =  (((int32_t)((b[38]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[39]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[40]        ) >>  0)) << 12)
+          |  (((int32_t)((b[41]        ) >>  0)) << 20);
+    r[12] =  (((int32_t)((b[42]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[43]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[44]        ) >>  0)) << 16)
+          | ((((int32_t)((b[45] & 0xf )) >>  0)) << 24);
+    r[13] =  (((int32_t)((b[45]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[46]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[47]        ) >>  0)) << 12)
+          |  (((int32_t)((b[48]        ) >>  0)) << 20);
+    r[14] =  (((int32_t)((b[49]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[50]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[51]        ) >>  0)) << 16)
+          | ((((int32_t)((b[52] & 0xf )) >>  0)) << 24);
+    r[15] =  (((int32_t)((b[52]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[53]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[54]        ) >>  0)) << 12)
+          |  (((int32_t)((b[55]        ) >>  0)) << 20);
+}
+
+/* Convert the field element to a byte array from an array of 28-bits.
+ *
+ * b  [in]  Byte array.
+ * a  [in]  Array to encode into.
+ */
+void fe448_to_bytes(unsigned char* b, const int32_t* a)
+{
+    int64_t t;
+    /* Mod */
+    int32_t in0 = a[0];
+    int32_t in1 = a[1];
+    int32_t in2 = a[2];
+    int32_t in3 = a[3];
+    int32_t in4 = a[4];
+    int32_t in5 = a[5];
+    int32_t in6 = a[6];
+    int32_t in7 = a[7];
+    int32_t in8 = a[8];
+    int32_t in9 = a[9];
+    int32_t in10 = a[10];
+    int32_t in11 = a[11];
+    int32_t in12 = a[12];
+    int32_t in13 = a[13];
+    int32_t in14 = a[14];
+    int32_t in15 = a[15];
+    int32_t o = in15 >> 28;
+    in15 -= o << 28;
+    in0 += o;
+    in8 += o;
+    o = (in0 + 1) >> 28;
+    o = (o + in1) >> 28;
+    o = (o + in2) >> 28;
+    o = (o + in3) >> 28;
+    o = (o + in4) >> 28;
+    o = (o + in5) >> 28;
+    o = (o + in6) >> 28;
+    o = (o + in7) >> 28;
+    o = (o + in8 + 1) >> 28;
+    o = (o + in9) >> 28;
+    o = (o + in10) >> 28;
+    o = (o + in11) >> 28;
+    o = (o + in12) >> 28;
+    o = (o + in13) >> 28;
+    o = (o + in14) >> 28;
+    o = (o + in15) >> 28;
+    in0 += o;
+    in8 += o;
+    in15 -= o << 28;
+    o = in0  >> 28; in1  += o; t = o << 28; in0  -= t;
+    o = in1  >> 28; in2  += o; t = o << 28; in1  -= t;
+    o = in2  >> 28; in3  += o; t = o << 28; in2  -= t;
+    o = in3  >> 28; in4  += o; t = o << 28; in3  -= t;
+    o = in4  >> 28; in5  += o; t = o << 28; in4  -= t;
+    o = in5  >> 28; in6  += o; t = o << 28; in5  -= t;
+    o = in6  >> 28; in7  += o; t = o << 28; in6  -= t;
+    o = in7  >> 28; in8  += o; t = o << 28; in7  -= t;
+    o = in8  >> 28; in9  += o; t = o << 28; in8  -= t;
+    o = in9  >> 28; in10 += o; t = o << 28; in9  -= t;
+    o = in10 >> 28; in11 += o; t = o << 28; in10 -= t;
+    o = in11 >> 28; in12 += o; t = o << 28; in11 -= t;
+    o = in12 >> 28; in13 += o; t = o << 28; in12 -= t;
+    o = in13 >> 28; in14 += o; t = o << 28; in13 -= t;
+    o = in14 >> 28; in15 += o; t = o << 28; in14 -= t;
+    o = in15 >> 28; in0  += o;
+                    in8  += o; t = o << 28; in15 -= t;
+
+    /* Output as bytes */
+    b[ 0] = (in0  >>  0);
+    b[ 1] = (in0  >>  8);
+    b[ 2] = (in0  >> 16);
+    b[ 3] = (in0  >> 24) + ((in1  >>  0) <<  4);
+    b[ 4] = (in1  >>  4);
+    b[ 5] = (in1  >> 12);
+    b[ 6] = (in1  >> 20);
+    b[ 7] = (in2  >>  0);
+    b[ 8] = (in2  >>  8);
+    b[ 9] = (in2  >> 16);
+    b[10] = (in2  >> 24) + ((in3  >>  0) <<  4);
+    b[11] = (in3  >>  4);
+    b[12] = (in3  >> 12);
+    b[13] = (in3  >> 20);
+    b[14] = (in4  >>  0);
+    b[15] = (in4  >>  8);
+    b[16] = (in4  >> 16);
+    b[17] = (in4  >> 24) + ((in5  >>  0) <<  4);
+    b[18] = (in5  >>  4);
+    b[19] = (in5  >> 12);
+    b[20] = (in5  >> 20);
+    b[21] = (in6  >>  0);
+    b[22] = (in6  >>  8);
+    b[23] = (in6  >> 16);
+    b[24] = (in6  >> 24) + ((in7  >>  0) <<  4);
+    b[25] = (in7  >>  4);
+    b[26] = (in7  >> 12);
+    b[27] = (in7  >> 20);
+    b[28] = (in8  >>  0);
+    b[29] = (in8  >>  8);
+    b[30] = (in8  >> 16);
+    b[31] = (in8  >> 24) + ((in9  >>  0) <<  4);
+    b[32] = (in9  >>  4);
+    b[33] = (in9  >> 12);
+    b[34] = (in9  >> 20);
+    b[35] = (in10 >>  0);
+    b[36] = (in10 >>  8);
+    b[37] = (in10 >> 16);
+    b[38] = (in10 >> 24) + ((in11 >>  0) <<  4);
+    b[39] = (in11 >>  4);
+    b[40] = (in11 >> 12);
+    b[41] = (in11 >> 20);
+    b[42] = (in12 >>  0);
+    b[43] = (in12 >>  8);
+    b[44] = (in12 >> 16);
+    b[45] = (in12 >> 24) + ((in13 >>  0) <<  4);
+    b[46] = (in13 >>  4);
+    b[47] = (in13 >> 12);
+    b[48] = (in13 >> 20);
+    b[49] = (in14 >>  0);
+    b[50] = (in14 >>  8);
+    b[51] = (in14 >> 16);
+    b[52] = (in14 >> 24) + ((in15 >>  0) <<  4);
+    b[53] = (in15 >>  4);
+    b[54] = (in15 >> 12);
+    b[55] = (in15 >> 20);
+}
+
+/* Set the field element to 0.
+ *
+ * a  [in]  Field element.
+ */
+void fe448_1(int32_t* a)
+{
+    a[0] = 1;
+    a[1] = 0;
+    a[2] = 0;
+    a[3] = 0;
+    a[4] = 0;
+    a[5] = 0;
+    a[6] = 0;
+    a[7] = 0;
+    a[8] = 0;
+    a[9] = 0;
+    a[10] = 0;
+    a[11] = 0;
+    a[12] = 0;
+    a[13] = 0;
+    a[14] = 0;
+    a[15] = 0;
+}
+
+/* Set the field element to 0.
+ *
+ * a  [in]  Field element.
+ */
+void fe448_0(int32_t* a)
+{
+    a[0] = 0;
+    a[1] = 0;
+    a[2] = 0;
+    a[3] = 0;
+    a[4] = 0;
+    a[5] = 0;
+    a[6] = 0;
+    a[7] = 0;
+    a[8] = 0;
+    a[9] = 0;
+    a[10] = 0;
+    a[11] = 0;
+    a[12] = 0;
+    a[13] = 0;
+    a[14] = 0;
+    a[15] = 0;
+}
+
+/* Copy one field element into another: d = a.
+ *
+ * d  [in]  Destination field element.
+ * a  [in]  Source field element.
+ */
+void fe448_copy(int32_t* d, const int32_t* a)
+{
+    d[0] = a[0];
+    d[1] = a[1];
+    d[2] = a[2];
+    d[3] = a[3];
+    d[4] = a[4];
+    d[5] = a[5];
+    d[6] = a[6];
+    d[7] = a[7];
+    d[8] = a[8];
+    d[9] = a[9];
+    d[10] = a[10];
+    d[11] = a[11];
+    d[12] = a[12];
+    d[13] = a[13];
+    d[14] = a[14];
+    d[15] = a[15];
+}
+
+/* Conditionally swap the elements.
+ * Constant time implementation.
+ *
+ * a  [in]  First field element.
+ * b  [in]  Second field element.
+ * c  [in]  Swap when 1. Valid values: 0, 1.
+ */
+static void fe448_cswap(int32_t* a, int32_t* b, int c)
+{
+    int32_t mask = -(int32_t)c;
+    int32_t t0 = (a[0] ^ b[0]) & mask;
+    int32_t t1 = (a[1] ^ b[1]) & mask;
+    int32_t t2 = (a[2] ^ b[2]) & mask;
+    int32_t t3 = (a[3] ^ b[3]) & mask;
+    int32_t t4 = (a[4] ^ b[4]) & mask;
+    int32_t t5 = (a[5] ^ b[5]) & mask;
+    int32_t t6 = (a[6] ^ b[6]) & mask;
+    int32_t t7 = (a[7] ^ b[7]) & mask;
+    int32_t t8 = (a[8] ^ b[8]) & mask;
+    int32_t t9 = (a[9] ^ b[9]) & mask;
+    int32_t t10 = (a[10] ^ b[10]) & mask;
+    int32_t t11 = (a[11] ^ b[11]) & mask;
+    int32_t t12 = (a[12] ^ b[12]) & mask;
+    int32_t t13 = (a[13] ^ b[13]) & mask;
+    int32_t t14 = (a[14] ^ b[14]) & mask;
+    int32_t t15 = (a[15] ^ b[15]) & mask;
+    a[0] ^= t0;
+    a[1] ^= t1;
+    a[2] ^= t2;
+    a[3] ^= t3;
+    a[4] ^= t4;
+    a[5] ^= t5;
+    a[6] ^= t6;
+    a[7] ^= t7;
+    a[8] ^= t8;
+    a[9] ^= t9;
+    a[10] ^= t10;
+    a[11] ^= t11;
+    a[12] ^= t12;
+    a[13] ^= t13;
+    a[14] ^= t14;
+    a[15] ^= t15;
+    b[0] ^= t0;
+    b[1] ^= t1;
+    b[2] ^= t2;
+    b[3] ^= t3;
+    b[4] ^= t4;
+    b[5] ^= t5;
+    b[6] ^= t6;
+    b[7] ^= t7;
+    b[8] ^= t8;
+    b[9] ^= t9;
+    b[10] ^= t10;
+    b[11] ^= t11;
+    b[12] ^= t12;
+    b[13] ^= t13;
+    b[14] ^= t14;
+    b[15] ^= t15;
+}
+
+/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold sum.
+ * a  [in]  Field element to add.
+ * b  [in]  Field element to add.
+ */
+void fe448_add(int32_t* r, const int32_t* a, const int32_t* b)
+{
+    r[0] = a[0] + b[0];
+    r[1] = a[1] + b[1];
+    r[2] = a[2] + b[2];
+    r[3] = a[3] + b[3];
+    r[4] = a[4] + b[4];
+    r[5] = a[5] + b[5];
+    r[6] = a[6] + b[6];
+    r[7] = a[7] + b[7];
+    r[8] = a[8] + b[8];
+    r[9] = a[9] + b[9];
+    r[10] = a[10] + b[10];
+    r[11] = a[11] + b[11];
+    r[12] = a[12] + b[12];
+    r[13] = a[13] + b[13];
+    r[14] = a[14] + b[14];
+    r[15] = a[15] + b[15];
+}
+
+/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold difference.
+ * a  [in]  Field element to subtract from.
+ * b  [in]  Field element to subtract.
+ */
+void fe448_sub(int32_t* r, const int32_t* a, const int32_t* b)
+{
+    r[0] = a[0] - b[0];
+    r[1] = a[1] - b[1];
+    r[2] = a[2] - b[2];
+    r[3] = a[3] - b[3];
+    r[4] = a[4] - b[4];
+    r[5] = a[5] - b[5];
+    r[6] = a[6] - b[6];
+    r[7] = a[7] - b[7];
+    r[8] = a[8] - b[8];
+    r[9] = a[9] - b[9];
+    r[10] = a[10] - b[10];
+    r[11] = a[11] - b[11];
+    r[12] = a[12] - b[12];
+    r[13] = a[13] - b[13];
+    r[14] = a[14] - b[14];
+    r[15] = a[15] - b[15];
+}
+
+void fe448_reduce(int32_t* a)
+{
+    int64_t o;
+
+    o = a[0 ] >> 28; a[1 ] += o; a[0 ] -= o << 28;
+    o = a[1 ] >> 28; a[2 ] += o; a[1 ] -= o << 28;
+    o = a[2 ] >> 28; a[3 ] += o; a[2 ] -= o << 28;
+    o = a[3 ] >> 28; a[4 ] += o; a[3 ] -= o << 28;
+    o = a[4 ] >> 28; a[5 ] += o; a[4 ] -= o << 28;
+    o = a[5 ] >> 28; a[6 ] += o; a[5 ] -= o << 28;
+    o = a[6 ] >> 28; a[7 ] += o; a[6 ] -= o << 28;
+    o = a[7 ] >> 28; a[8 ] += o; a[7 ] -= o << 28;
+    o = a[8 ] >> 28; a[9 ] += o; a[8 ] -= o << 28;
+    o = a[9 ] >> 28; a[10] += o; a[9 ] -= o << 28;
+    o = a[10] >> 28; a[11] += o; a[10] -= o << 28;
+    o = a[11] >> 28; a[12] += o; a[11] -= o << 28;
+    o = a[12] >> 28; a[13] += o; a[12] -= o << 28;
+    o = a[13] >> 28; a[14] += o; a[13] -= o << 28;
+    o = a[14] >> 28; a[15] += o; a[14] -= o << 28;
+    o = a[15] >> 28; a[0]  += o;
+                     a[8]  += o; a[15] -= o << 28;
+}
+/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ */
+void fe448_mul39081(int32_t* r, const int32_t* a)
+{
+    int64_t t;
+    int32_t o;
+    int64_t t0 = a[0] * (int64_t)39081;
+    int64_t t1 = a[1] * (int64_t)39081;
+    int64_t t2 = a[2] * (int64_t)39081;
+    int64_t t3 = a[3] * (int64_t)39081;
+    int64_t t4 = a[4] * (int64_t)39081;
+    int64_t t5 = a[5] * (int64_t)39081;
+    int64_t t6 = a[6] * (int64_t)39081;
+    int64_t t7 = a[7] * (int64_t)39081;
+    int64_t t8 = a[8] * (int64_t)39081;
+    int64_t t9 = a[9] * (int64_t)39081;
+    int64_t t10 = a[10] * (int64_t)39081;
+    int64_t t11 = a[11] * (int64_t)39081;
+    int64_t t12 = a[12] * (int64_t)39081;
+    int64_t t13 = a[13] * (int64_t)39081;
+    int64_t t14 = a[14] * (int64_t)39081;
+    int64_t t15 = a[15] * (int64_t)39081;
+    o = t0  >> 28; t1  += o; t = (int64_t)o << 28; t0  -= t;
+    o = t1  >> 28; t2  += o; t = (int64_t)o << 28; t1  -= t;
+    o = t2  >> 28; t3  += o; t = (int64_t)o << 28; t2  -= t;
+    o = t3  >> 28; t4  += o; t = (int64_t)o << 28; t3  -= t;
+    o = t4  >> 28; t5  += o; t = (int64_t)o << 28; t4  -= t;
+    o = t5  >> 28; t6  += o; t = (int64_t)o << 28; t5  -= t;
+    o = t6  >> 28; t7  += o; t = (int64_t)o << 28; t6  -= t;
+    o = t7  >> 28; t8  += o; t = (int64_t)o << 28; t7  -= t;
+    o = t8  >> 28; t9  += o; t = (int64_t)o << 28; t8  -= t;
+    o = t9  >> 28; t10 += o; t = (int64_t)o << 28; t9  -= t;
+    o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t;
+    o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t;
+    o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t;
+    o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t;
+    o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t;
+    o = t15 >> 28; t0  += o;
+                   t8  += o; t = (int64_t)o << 28; t15 -= t;
+
+    /* Store */
+    r[0] = t0;
+    r[1] = t1;
+    r[2] = t2;
+    r[3] = t3;
+    r[4] = t4;
+    r[5] = t5;
+    r[6] = t6;
+    r[7] = t7;
+    r[8] = t8;
+    r[9] = t9;
+    r[10] = t10;
+    r[11] = t11;
+    r[12] = t12;
+    r[13] = t13;
+    r[14] = t14;
+    r[15] = t15;
+}
+
+/* Mulitply two field elements. r = a * b
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ * b  [in]  Field element to multiply.
+ */
+static WC_INLINE void fe448_mul_8(int32_t* r, const int32_t* a, const int32_t* b)
+{
+    int64_t t;
+    int64_t t0   = (int64_t)a[ 0] * b[ 0];
+    int64_t t1   = (int64_t)a[ 0] * b[ 1];
+    int64_t t101 = (int64_t)a[ 1] * b[ 0];
+    int64_t t2   = (int64_t)a[ 0] * b[ 2];
+    int64_t t102 = (int64_t)a[ 1] * b[ 1];
+    int64_t t202 = (int64_t)a[ 2] * b[ 0];
+    int64_t t3   = (int64_t)a[ 0] * b[ 3];
+    int64_t t103 = (int64_t)a[ 1] * b[ 2];
+    int64_t t203 = (int64_t)a[ 2] * b[ 1];
+    int64_t t303 = (int64_t)a[ 3] * b[ 0];
+    int64_t t4   = (int64_t)a[ 0] * b[ 4];
+    int64_t t104 = (int64_t)a[ 1] * b[ 3];
+    int64_t t204 = (int64_t)a[ 2] * b[ 2];
+    int64_t t304 = (int64_t)a[ 3] * b[ 1];
+    int64_t t404 = (int64_t)a[ 4] * b[ 0];
+    int64_t t5   = (int64_t)a[ 0] * b[ 5];
+    int64_t t105 = (int64_t)a[ 1] * b[ 4];
+    int64_t t205 = (int64_t)a[ 2] * b[ 3];
+    int64_t t305 = (int64_t)a[ 3] * b[ 2];
+    int64_t t405 = (int64_t)a[ 4] * b[ 1];
+    int64_t t505 = (int64_t)a[ 5] * b[ 0];
+    int64_t t6   = (int64_t)a[ 0] * b[ 6];
+    int64_t t106 = (int64_t)a[ 1] * b[ 5];
+    int64_t t206 = (int64_t)a[ 2] * b[ 4];
+    int64_t t306 = (int64_t)a[ 3] * b[ 3];
+    int64_t t406 = (int64_t)a[ 4] * b[ 2];
+    int64_t t506 = (int64_t)a[ 5] * b[ 1];
+    int64_t t606 = (int64_t)a[ 6] * b[ 0];
+    int64_t t7   = (int64_t)a[ 0] * b[ 7];
+    int64_t t107 = (int64_t)a[ 1] * b[ 6];
+    int64_t t207 = (int64_t)a[ 2] * b[ 5];
+    int64_t t307 = (int64_t)a[ 3] * b[ 4];
+    int64_t t407 = (int64_t)a[ 4] * b[ 3];
+    int64_t t507 = (int64_t)a[ 5] * b[ 2];
+    int64_t t607 = (int64_t)a[ 6] * b[ 1];
+    int64_t t707 = (int64_t)a[ 7] * b[ 0];
+    int64_t t8   = (int64_t)a[ 1] * b[ 7];
+    int64_t t108 = (int64_t)a[ 2] * b[ 6];
+    int64_t t208 = (int64_t)a[ 3] * b[ 5];
+    int64_t t308 = (int64_t)a[ 4] * b[ 4];
+    int64_t t408 = (int64_t)a[ 5] * b[ 3];
+    int64_t t508 = (int64_t)a[ 6] * b[ 2];
+    int64_t t608 = (int64_t)a[ 7] * b[ 1];
+    int64_t t9   = (int64_t)a[ 2] * b[ 7];
+    int64_t t109 = (int64_t)a[ 3] * b[ 6];
+    int64_t t209 = (int64_t)a[ 4] * b[ 5];
+    int64_t t309 = (int64_t)a[ 5] * b[ 4];
+    int64_t t409 = (int64_t)a[ 6] * b[ 3];
+    int64_t t509 = (int64_t)a[ 7] * b[ 2];
+    int64_t t10  = (int64_t)a[ 3] * b[ 7];
+    int64_t t110 = (int64_t)a[ 4] * b[ 6];
+    int64_t t210 = (int64_t)a[ 5] * b[ 5];
+    int64_t t310 = (int64_t)a[ 6] * b[ 4];
+    int64_t t410 = (int64_t)a[ 7] * b[ 3];
+    int64_t t11  = (int64_t)a[ 4] * b[ 7];
+    int64_t t111 = (int64_t)a[ 5] * b[ 6];
+    int64_t t211 = (int64_t)a[ 6] * b[ 5];
+    int64_t t311 = (int64_t)a[ 7] * b[ 4];
+    int64_t t12  = (int64_t)a[ 5] * b[ 7];
+    int64_t t112 = (int64_t)a[ 6] * b[ 6];
+    int64_t t212 = (int64_t)a[ 7] * b[ 5];
+    int64_t t13  = (int64_t)a[ 6] * b[ 7];
+    int64_t t113 = (int64_t)a[ 7] * b[ 6];
+    int64_t t14  = (int64_t)a[ 7] * b[ 7];
+    t1  += t101;
+    t2  += t102; t2  += t202;
+    t3  += t103; t3  += t203; t3  += t303;
+    t4  += t104; t4  += t204; t4  += t304; t4  += t404;
+    t5  += t105; t5  += t205; t5  += t305; t5  += t405; t5  += t505;
+    t6  += t106; t6  += t206; t6  += t306; t6  += t406; t6  += t506;
+    t6  += t606;
+    t7  += t107; t7  += t207; t7  += t307; t7  += t407; t7  += t507;
+    t7  += t607;
+    t7  += t707;
+    t8  += t108; t8  += t208; t8  += t308; t8  += t408; t8  += t508;
+    t8  += t608;
+    t9  += t109; t9  += t209; t9  += t309; t9  += t409; t9  += t509;
+    t10 += t110; t10 += t210; t10 += t310; t10 += t410;
+    t11 += t111; t11 += t211; t11 += t311;
+    t12 += t112; t12 += t212;
+    t13 += t113;
+    int64_t o = t14 >> 28;
+    int64_t t15 = o;
+    t14 -= o << 28;
+    o = t0  >> 28; t1  += o; t = (int64_t)o << 28; t0  -= t;
+    o = t1  >> 28; t2  += o; t = (int64_t)o << 28; t1  -= t;
+    o = t2  >> 28; t3  += o; t = (int64_t)o << 28; t2  -= t;
+    o = t3  >> 28; t4  += o; t = (int64_t)o << 28; t3  -= t;
+    o = t4  >> 28; t5  += o; t = (int64_t)o << 28; t4  -= t;
+    o = t5  >> 28; t6  += o; t = (int64_t)o << 28; t5  -= t;
+    o = t6  >> 28; t7  += o; t = (int64_t)o << 28; t6  -= t;
+    o = t7  >> 28; t8  += o; t = (int64_t)o << 28; t7  -= t;
+    o = t8  >> 28; t9  += o; t = (int64_t)o << 28; t8  -= t;
+    o = t9  >> 28; t10 += o; t = (int64_t)o << 28; t9  -= t;
+    o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t;
+    o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t;
+    o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t;
+    o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t;
+    o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t;
+    o = t15 >> 28; t0  += o;
+                   t8  += o; t = (int64_t)o << 28; t15 -= t;
+
+    /* Store */
+    r[0] = t0;
+    r[1] = t1;
+    r[2] = t2;
+    r[3] = t3;
+    r[4] = t4;
+    r[5] = t5;
+    r[6] = t6;
+    r[7] = t7;
+    r[8] = t8;
+    r[9] = t9;
+    r[10] = t10;
+    r[11] = t11;
+    r[12] = t12;
+    r[13] = t13;
+    r[14] = t14;
+    r[15] = t15;
+}
+
+/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to multiply.
+ * b  [in]  Field element to multiply.
+ */
+void fe448_mul(int32_t* r, const int32_t* a, const int32_t* b)
+{
+    int32_t r0[16];
+    int32_t r1[16];
+    int32_t* a1 = r1;
+    int32_t b1[8];
+    int32_t r2[16];
+    a1[0] = a[0] + a[8];
+    a1[1] = a[1] + a[9];
+    a1[2] = a[2] + a[10];
+    a1[3] = a[3] + a[11];
+    a1[4] = a[4] + a[12];
+    a1[5] = a[5] + a[13];
+    a1[6] = a[6] + a[14];
+    a1[7] = a[7] + a[15];
+    b1[0] = b[0] + b[8];
+    b1[1] = b[1] + b[9];
+    b1[2] = b[2] + b[10];
+    b1[3] = b[3] + b[11];
+    b1[4] = b[4] + b[12];
+    b1[5] = b[5] + b[13];
+    b1[6] = b[6] + b[14];
+    b1[7] = b[7] + b[15];
+    fe448_mul_8(r2, a + 8, b + 8);
+    fe448_mul_8(r0, a, b);
+    fe448_mul_8(r1, a1, b1);
+    r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
+    r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
+    r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
+    r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
+    r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
+    r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
+    r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
+    r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
+    r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
+    r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
+    r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
+    r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
+    r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
+    r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
+    r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
+    r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
+}
+
+/* Square a field element. r = a * a
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to square.
+ */
+static WC_INLINE void fe448_sqr_8(int32_t* r, const int32_t* a)
+{
+    int64_t t;
+    int64_t t0   =     (int64_t)a[ 0] * a[ 0];
+    int64_t t1   = 2 * (int64_t)a[ 0] * a[ 1];
+    int64_t t2   = 2 * (int64_t)a[ 0] * a[ 2];
+    int64_t t102 =     (int64_t)a[ 1] * a[ 1];
+    int64_t t3   = 2 * (int64_t)a[ 0] * a[ 3];
+    int64_t t103 = 2 * (int64_t)a[ 1] * a[ 2];
+    int64_t t4   = 2 * (int64_t)a[ 0] * a[ 4];
+    int64_t t104 = 2 * (int64_t)a[ 1] * a[ 3];
+    int64_t t204 =     (int64_t)a[ 2] * a[ 2];
+    int64_t t5   = 2 * (int64_t)a[ 0] * a[ 5];
+    int64_t t105 = 2 * (int64_t)a[ 1] * a[ 4];
+    int64_t t205 = 2 * (int64_t)a[ 2] * a[ 3];
+    int64_t t6   = 2 * (int64_t)a[ 0] * a[ 6];
+    int64_t t106 = 2 * (int64_t)a[ 1] * a[ 5];
+    int64_t t206 = 2 * (int64_t)a[ 2] * a[ 4];
+    int64_t t306 =     (int64_t)a[ 3] * a[ 3];
+    int64_t t7   = 2 * (int64_t)a[ 0] * a[ 7];
+    int64_t t107 = 2 * (int64_t)a[ 1] * a[ 6];
+    int64_t t207 = 2 * (int64_t)a[ 2] * a[ 5];
+    int64_t t307 = 2 * (int64_t)a[ 3] * a[ 4];
+    int64_t t8   = 2 * (int64_t)a[ 1] * a[ 7];
+    int64_t t108 = 2 * (int64_t)a[ 2] * a[ 6];
+    int64_t t208 = 2 * (int64_t)a[ 3] * a[ 5];
+    int64_t t308 =     (int64_t)a[ 4] * a[ 4];
+    int64_t t9   = 2 * (int64_t)a[ 2] * a[ 7];
+    int64_t t109 = 2 * (int64_t)a[ 3] * a[ 6];
+    int64_t t209 = 2 * (int64_t)a[ 4] * a[ 5];
+    int64_t t10  = 2 * (int64_t)a[ 3] * a[ 7];
+    int64_t t110 = 2 * (int64_t)a[ 4] * a[ 6];
+    int64_t t210 =     (int64_t)a[ 5] * a[ 5];
+    int64_t t11  = 2 * (int64_t)a[ 4] * a[ 7];
+    int64_t t111 = 2 * (int64_t)a[ 5] * a[ 6];
+    int64_t t12  = 2 * (int64_t)a[ 5] * a[ 7];
+    int64_t t112 =     (int64_t)a[ 6] * a[ 6];
+    int64_t t13  = 2 * (int64_t)a[ 6] * a[ 7];
+    int64_t t14  =     (int64_t)a[ 7] * a[ 7];
+    t2  += t102;
+    t3  += t103;
+    t4  += t104; t4  += t204;
+    t5  += t105; t5  += t205;
+    t6  += t106; t6  += t206; t6  += t306;
+    t7  += t107; t7  += t207; t7  += t307;
+    t8  += t108; t8  += t208; t8  += t308;
+    t9  += t109; t9  += t209;
+    t10 += t110; t10 += t210;
+    t11 += t111;
+    t12 += t112;
+    int64_t o = t14 >> 28;
+    int64_t t15 = o;
+    t14 -= o << 28;
+    o = t0  >> 28; t1  += o; t = (int64_t)o << 28; t0  -= t;
+    o = t1  >> 28; t2  += o; t = (int64_t)o << 28; t1  -= t;
+    o = t2  >> 28; t3  += o; t = (int64_t)o << 28; t2  -= t;
+    o = t3  >> 28; t4  += o; t = (int64_t)o << 28; t3  -= t;
+    o = t4  >> 28; t5  += o; t = (int64_t)o << 28; t4  -= t;
+    o = t5  >> 28; t6  += o; t = (int64_t)o << 28; t5  -= t;
+    o = t6  >> 28; t7  += o; t = (int64_t)o << 28; t6  -= t;
+    o = t7  >> 28; t8  += o; t = (int64_t)o << 28; t7  -= t;
+    o = t8  >> 28; t9  += o; t = (int64_t)o << 28; t8  -= t;
+    o = t9  >> 28; t10 += o; t = (int64_t)o << 28; t9  -= t;
+    o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t;
+    o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t;
+    o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t;
+    o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t;
+    o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t;
+    o = t15 >> 28; t0  += o;
+                   t8  += o; t = (int64_t)o << 28; t15 -= t;
+
+    /* Store */
+    r[0] = t0;
+    r[1] = t1;
+    r[2] = t2;
+    r[3] = t3;
+    r[4] = t4;
+    r[5] = t5;
+    r[6] = t6;
+    r[7] = t7;
+    r[8] = t8;
+    r[9] = t9;
+    r[10] = t10;
+    r[11] = t11;
+    r[12] = t12;
+    r[13] = t13;
+    r[14] = t14;
+    r[15] = t15;
+}
+
+/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to square.
+ */
+void fe448_sqr(int32_t* r, const int32_t* a)
+{
+    int32_t r0[16];
+    int32_t r1[16];
+    int32_t* a1 = r1;
+    int32_t r2[16];
+    a1[0] = a[0] + a[8];
+    a1[1] = a[1] + a[9];
+    a1[2] = a[2] + a[10];
+    a1[3] = a[3] + a[11];
+    a1[4] = a[4] + a[12];
+    a1[5] = a[5] + a[13];
+    a1[6] = a[6] + a[14];
+    a1[7] = a[7] + a[15];
+    fe448_sqr_8(r2, a + 8);
+    fe448_sqr_8(r0, a);
+    fe448_sqr_8(r1, a1);
+    r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
+    r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
+    r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
+    r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
+    r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
+    r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
+    r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
+    r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
+    r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
+    r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
+    r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
+    r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
+    r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
+    r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
+    r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
+    r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
+}
+
+/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
+ * Constant time implementation - using Fermat's little theorem:
+ *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
+ * For curve448: p - 2 = 2^448 - 2^224 - 3
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to invert.
+ */
+void fe448_invert(int32_t* r, const int32_t* a)
+{
+    int32_t t1[16];
+    int32_t t2[16];
+    int32_t t3[16];
+    int32_t t4[16];
+    int i;
+
+    fe448_sqr(t1, a);
+    /* t1 = 2 */
+    fe448_mul(t1, t1, a);
+    /* t1 = 3 */
+    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+    /* t2 = c */
+    fe448_mul(t3, t2, a);
+    /* t3 = d */
+    fe448_mul(t1, t2, t1);
+    /* t1 = f */
+    fe448_sqr(t2, t1);
+    /* t2 = 1e */
+    fe448_mul(t4, t2, a);
+    /* t4 = 1f */
+    fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3e0 */
+    fe448_mul(t1, t2, t4);
+    /* t1 = 3ff */
+    fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffc00 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = fffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+    /* t2 = 1ffffe0 */
+    fe448_mul(t1, t2, t4);
+    /* t1 = 1ffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3fffffe000000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = 3ffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+    /* t2 = 7fffffffffffe0 */
+    fe448_mul(t1, t2, t4);
+    /* t1 = 7fffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3fffffffffffff80000000000000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = 3fffffffffffffffffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
+    /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+    /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+    fe448_mul(t1, t3, a);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+    fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
+    fe448_mul(r, t3, t1);
+    /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+}
+
+/* Scalar multiply the point by a number. r = n.a
+ * Uses Montogmery ladder and only requires the x-ordinate.
+ *
+ * r  [in]  Field element to hold result.
+ * n  [in]  Scalar as an array of bytes.
+ * a  [in]  Point to multiply - x-ordinate only.
+ */
+int curve448(byte* r, const byte* n, const byte* a)
+{
+    int32_t x1[16];
+    int32_t x2[16];
+    int32_t z2[16];
+    int32_t x3[16];
+    int32_t z3[16];
+    int32_t t0[16];
+    int32_t t1[16];
+    int i;
+    unsigned int swap;
+    unsigned int b;
+
+    fe448_from_bytes(x1, a);
+    fe448_1(x2);
+    fe448_0(z2);
+    fe448_copy(x3, x1);
+    fe448_1(z3);
+
+    swap = 0;
+    for (i = 447; i >= 0; --i) {
+        b = (n[i >> 3] >> (i & 7)) & 1;
+        swap ^= b;
+        fe448_cswap(x2, x3, swap);
+        fe448_cswap(z2, z3, swap);
+        swap = b;
+
+        /* Montgomery Ladder - double and add */
+        fe448_add(t0, x2, z2);
+        fe448_reduce(t0);
+        fe448_add(t1, x3, z3);
+        fe448_reduce(t1);
+        fe448_sub(x2, x2, z2);
+        fe448_sub(x3, x3, z3);
+        fe448_mul(t1, t1, x2);
+        fe448_mul(z3, x3, t0);
+        fe448_sqr(t0, t0);
+        fe448_sqr(x2, x2);
+        fe448_add(x3, z3, t1);
+        fe448_reduce(x3);
+        fe448_sqr(x3, x3);
+        fe448_sub(z3, z3, t1);
+        fe448_sqr(z3, z3);
+        fe448_mul(z3, z3, x1);
+        fe448_sub(t1, t0, x2);
+        fe448_mul(x2, t0, x2);
+        fe448_mul39081(z2, t1);
+        fe448_add(z2, t0, z2);
+        fe448_mul(z2, z2, t1);
+    }
+    /* Last two bits are 0 - no final swap check required. */
+
+    fe448_invert(z2, z2);
+    fe448_mul(x2, x2, z2);
+    fe448_to_bytes(r, x2);
+
+    return 0;
+}
+
+#ifdef HAVE_ED448
+/* Check whether field element is not 0.
+ * Must convert to a normalized form before checking.
+ *
+ * a  [in]  Field element.
+ * returns 0 when zero, and any other value otherwise.
+ */
+int fe448_isnonzero(const int32_t* a)
+{
+    uint8_t b[56];
+    int i;
+    uint8_t c = 0;
+    fe448_to_bytes(b, a);
+    for (i = 0; i < 56; i++)
+        c |= b[i];
+    return c;
+}
+
+/* Check whether field element is negative.
+ * Must convert to a normalized form before checking.
+ *
+ * a  [in]  Field element.
+ * returns 1 when negative, and 0 otherwise.
+ */
+int fe448_isnegative(const int32_t* a)
+{
+    uint8_t b[56];
+    fe448_to_bytes(b, a);
+    return b[0] & 1;
+}
+
+/* Negates the field element. r = -a
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element.
+ */
+void fe448_neg(int32_t* r, const int32_t* a)
+{
+    r[0] = -a[0];
+    r[1] = -a[1];
+    r[2] = -a[2];
+    r[3] = -a[3];
+    r[4] = -a[4];
+    r[5] = -a[5];
+    r[6] = -a[6];
+    r[7] = -a[7];
+    r[8] = -a[8];
+    r[9] = -a[9];
+    r[10] = -a[10];
+    r[11] = -a[11];
+    r[12] = -a[12];
+    r[13] = -a[13];
+    r[14] = -a[14];
+    r[15] = -a[15];
+}
+
+/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
+ * Used for calcualting y-ordinate from x-ordinate for Ed448.
+ *
+ * r  [in]  Field element to hold result.
+ * a  [in]  Field element to exponentiate.
+ */
+void fe448_pow_2_446_222_1(int32_t* r, const int32_t* a)
+{
+    int32_t t1[16];
+    int32_t t2[16];
+    int32_t t3[16];
+    int32_t t4[16];
+    int32_t t5[16];
+    int i;
+
+    fe448_sqr(t3, a);
+    /* t3 = 2 */
+    fe448_mul(t1, t3, a);
+    /* t1 = 3 */
+    fe448_sqr(t5, t1);
+    /* t5 = 6 */
+    fe448_mul(t5, t5, a);
+    /* t5 = 7 */
+    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+    /* t2 = c */
+    fe448_mul(t3, t2, t3);
+    /* t3 = e */
+    fe448_mul(t1, t2, t1);
+    /* t1 = f */
+    fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
+    /* t2 = 78 */
+    fe448_mul(t5, t2, t5);
+    /* t5 = 7f */
+    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+    /* t2 = f0 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = ff */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fe */
+    fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
+    /* t2 = 7f80 */
+    fe448_mul(t5, t2, t5);
+    /* t5 = 7fff */
+    fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
+    /* t2 = ff00 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = ffff */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffe */
+    fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
+    /* t2 = 3fff8000 */
+    fe448_mul(t5, t2, t5);
+    /* t5 = 3fffffff */
+    fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffff0000 */
+    fe448_mul(t1, t2, t1);
+    /* t1 = ffffffff */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffffffe */
+    fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffffffff00000000 */
+    fe448_mul(t2, t2, t1);
+    /* t2 = ffffffffffffffff */
+    fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+    /* t1 = ffffffffffffffff0000000000000000 */
+    fe448_mul(t1, t1, t2);
+    /* t1 = ffffffffffffffffffffffffffffffff */
+    fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+    /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
+    fe448_mul(t4, t1, t2);
+    /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
+    fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+    /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
+    fe448_mul(t3, t3, t2);
+    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+    fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
+    fe448_mul(t1, t1, t4);
+    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
+    fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
+    /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
+    fe448_mul(r, t5, t1);
+    /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a  A field element.
+ * b  A field element.
+ * c  If 1 then copy and if 0 then don't copy.
+ */
+void fe448_cmov(int32_t* a, const int32_t* b, int c)
+{
+    int32_t m = -(int32_t)c;
+    int32_t t0 = m & (a[0] ^ b[0]);
+    int32_t t1 = m & (a[1] ^ b[1]);
+    int32_t t2 = m & (a[2] ^ b[2]);
+    int32_t t3 = m & (a[3] ^ b[3]);
+    int32_t t4 = m & (a[4] ^ b[4]);
+    int32_t t5 = m & (a[5] ^ b[5]);
+    int32_t t6 = m & (a[6] ^ b[6]);
+    int32_t t7 = m & (a[7] ^ b[7]);
+    int32_t t8 = m & (a[8] ^ b[8]);
+    int32_t t9 = m & (a[9] ^ b[9]);
+    int32_t t10 = m & (a[10] ^ b[10]);
+    int32_t t11 = m & (a[11] ^ b[11]);
+    int32_t t12 = m & (a[12] ^ b[12]);
+    int32_t t13 = m & (a[13] ^ b[13]);
+    int32_t t14 = m & (a[14] ^ b[14]);
+    int32_t t15 = m & (a[15] ^ b[15]);
+
+    a[0] ^= t0;
+    a[1] ^= t1;
+    a[2] ^= t2;
+    a[3] ^= t3;
+    a[4] ^= t4;
+    a[5] ^= t5;
+    a[6] ^= t6;
+    a[7] ^= t7;
+    a[8] ^= t8;
+    a[9] ^= t9;
+    a[10] ^= t10;
+    a[11] ^= t11;
+    a[12] ^= t12;
+    a[13] ^= t13;
+    a[14] ^= t14;
+    a[15] ^= t15;
+}
+
+#endif /* HAVE_ED448 */
+#endif
+
+#endif /* HAVE_CURVE448 || HAVE_ED448 */
+
--- a/wolfcrypt/src/fe_low_mem.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/fe_low_mem.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* fe_low_mem.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -60,7 +60,7 @@
     !defined(FREESCALE_LTC_ECC)
     /* to be Complementary to fe_low_mem.c */
 #else
-void fe_init()
+void fe_init(void)
 {
 }
 #endif
--- a/wolfcrypt/src/fe_operations.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/fe_operations.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* fe_operations.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,7 +42,9 @@
 #endif
 
 #ifdef CURVED25519_X64
-#include "fe_x25519_x64.i"
+/* Assembly code in fe_x25519_asm.* */
+#elif defined(WOLFSSL_ARMASM)
+/* Assembly code in fe_armv[78]_x25519.* */
 #elif defined(CURVED25519_128BIT)
 #include "fe_x25519_128.i"
 #else
@@ -120,7 +122,7 @@
      (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \
     !defined(FREESCALE_LTC_ECC)
 /* to be Complementary to fe_low_mem.c */
-void fe_init()
+void fe_init(void)
 {
 }
 #endif
@@ -132,16 +134,16 @@
 #if 0
   unsigned char e[32];
 #endif
-  fe x1;
-  fe x2;
-  fe z2;
-  fe x3;
-  fe z3;
-  fe tmp0;
-  fe tmp1;
-  int pos;
-  unsigned int swap;
-  unsigned int b;
+  fe x1 = {0};
+  fe x2 = {0};
+  fe z2 = {0};
+  fe x3 = {0};
+  fe z3 = {0};
+  fe tmp0 = {0};
+  fe tmp1 = {0};
+  int pos = 0;
+  unsigned int swap = 0;
+  unsigned int b = 0;
 
   /* Clamp already done during key generation and import */
 #if 0
@@ -321,24 +323,24 @@
   int64_t carry8;
   int64_t carry9;
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
 
-  carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-  carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+  carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+  carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
 
-  carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-  carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+  carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+  carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
 
-  carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-  carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+  carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+  carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
 
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-  carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
 
-  carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+  carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
 
   h[0] = (int32_t)h0;
   h[1] = (int32_t)h1;
@@ -615,17 +617,17 @@
   int64_t carry8;
   int64_t carry9;
 
-  carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
-  carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-  carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-  carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
-  carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+  carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+  carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+  carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+  carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+  carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-  carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-  carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
-  carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+  carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
 
   h[0] = (int32_t)h0;
   h[1] = (int32_t)h1;
@@ -643,11 +645,11 @@
 
 void fe_invert(fe out,const fe z)
 {
-  fe t0;
-  fe t1;
-  fe t2;
-  fe t3;
-  int i;
+  fe t0 = {0};
+  fe t1 = {0};
+  fe t2 = {0};
+  fe t3 = {0};
+  int i = 0;
 
   /* pow225521 */
   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
@@ -902,46 +904,46 @@
     i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
   */
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
   /* |h0| <= 2^25 */
   /* |h4| <= 2^25 */
   /* |h1| <= 1.71*2^59 */
   /* |h5| <= 1.71*2^59 */
 
-  carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-  carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+  carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+  carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
   /* |h1| <= 2^24; from now on fits into int32 */
   /* |h5| <= 2^24; from now on fits into int32 */
   /* |h2| <= 1.41*2^60 */
   /* |h6| <= 1.41*2^60 */
 
-  carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-  carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+  carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+  carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
   /* |h3| <= 1.71*2^59 */
   /* |h7| <= 1.71*2^59 */
 
-  carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-  carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+  carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+  carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
   /* |h4| <= 1.72*2^34 */
   /* |h8| <= 1.41*2^60 */
 
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-  carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
   /* |h5| <= 1.01*2^24 */
   /* |h9| <= 1.71*2^59 */
 
-  carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+  carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
   /* |h0| <= 1.1*2^39 */
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
   /* |h1| <= 1.01*2^24 */
 
@@ -1075,17 +1077,17 @@
   int64_t carry8;
   int64_t carry9;
 
-  carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
-  carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-  carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-  carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
-  carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+  carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+  carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+  carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+  carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+  carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-  carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-  carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
-  carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+  carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
 
   h[0] = (int32_t)h0;
   h[1] = (int32_t)h1;
@@ -1227,24 +1229,24 @@
   h8 += h8;
   h9 += h9;
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
 
-  carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-  carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+  carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+  carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
 
-  carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-  carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+  carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+  carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
 
-  carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-  carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+  carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+  carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
 
-  carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-  carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+  carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+  carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
 
-  carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+  carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
 
-  carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+  carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
 
   h[0] = (int32_t)h0;
   h[1] = (int32_t)h1;
@@ -1261,10 +1263,10 @@
 
 void fe_pow22523(fe out,const fe z)
 {
-  fe t0;
-  fe t1;
-  fe t2;
-  int i;
+  fe t0 = {0};
+  fe t1 = {0};
+  fe t2 = {0};
+  int i = 0;
 
   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/fips.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,1 @@
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/fips_test.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,1 @@
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/ge_448.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,10781 @@
+/* ge_448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work.
+ * Small implementation based on Daniel Beer's ed25519 public domain work.
+ * Reworked for ed448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_ED448
+
+#include <wolfssl/wolfcrypt/ge_448.h>
+#include <wolfssl/wolfcrypt/ed448.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+/*
+sc means scalar.
+ge means group element.
+
+Here the group is the set of pairs (x,y) of field elements (see ge_448.h)
+satisfying -x^2 + y^2 = 1 + d x^2y^2
+where d = -39081
+
+Representations:
+  ge448_p2 (projective) :  (X:Y:Z) satisfying x=X/Z, y=Y/Z
+  ge448_precomp (affine):  (X:Y)
+*/
+
+
+#ifdef ED448_SMALL
+
+/* Base point of ed448 */
+static const ge448_p2 ed448_base = {
+    { 0x5e, 0xc0, 0x0c, 0xc7, 0x2b, 0xa8, 0x26, 0x26, 0x8e, 0x93, 0x00, 0x8b,
+      0xe1, 0x80, 0x3b, 0x43, 0x11, 0x65, 0xb6, 0x2a, 0xf7, 0x1a, 0xae, 0x12,
+      0x64, 0xa4, 0xd3, 0xa3, 0x24, 0xe3, 0x6d, 0xea, 0x67, 0x17, 0x0f, 0x47,
+      0x70, 0x65, 0x14, 0x9e, 0xda, 0x36, 0xbf, 0x22, 0xa6, 0x15, 0x1d, 0x22,
+      0xed, 0x0d, 0xed, 0x6b, 0xc6, 0x70, 0x19, 0x4f },
+    { 0x14, 0xfa, 0x30, 0xf2, 0x5b, 0x79, 0x08, 0x98, 0xad, 0xc8, 0xd7, 0x4e,
+      0x2c, 0x13, 0xbd, 0xfd, 0xc4, 0x39, 0x7c, 0xe6, 0x1c, 0xff, 0xd3, 0x3a,
+      0xd7, 0xc2, 0xa0, 0x05, 0x1e, 0x9c, 0x78, 0x87, 0x40, 0x98, 0xa3, 0x6c,
+      0x73, 0x73, 0xea, 0x4b, 0x62, 0xc7, 0xc9, 0x56, 0x37, 0x20, 0x76, 0x88,
+      0x24, 0xbc, 0xb6, 0x6e, 0x71, 0x46, 0x3f, 0x69 },
+    { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
+};
+
+/* Part of order of ed448 that needs tp be multiplied when reducing */
+static const uint8_t ed448_order_mul[56] = {
+    0x0d, 0xbb, 0xa7, 0x54, 0x6d, 0x3d, 0x87, 0xdc, 0xaa, 0x70, 0x3a, 0x72,
+    0x8d, 0x3d, 0x93, 0xde, 0x6f, 0xc9, 0x29, 0x51, 0xb6, 0x24, 0xb1, 0x3b,
+    0x16, 0xdc, 0x35, 0x83,
+};
+
+/* Reduce scalar mod the order of the curve.
+ * Scalar Will be 114 bytes.
+ *
+ * b  [in]  Scalar to reduce.
+ */
+void sc448_reduce(uint8_t* b)
+{
+    int i, j;
+    uint32_t t[114];
+    uint8_t o;
+
+    for (i = 0; i < 86; i++) {
+        t[i] = b[i];
+    }
+    for (i = 0; i < 58; i++) {
+        for (j = 0; j < 28; j++)
+            t[i+j] += b[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+        t[i+56] = 0;
+    }
+    for (i = 54; i < 87; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    for (i = 0; i < 31; i++) {
+        for (j = 0; j < 28; j++)
+            t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+        t[i+56] = 0;
+    }
+    for (i = 54; i < 60; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 28; j++)
+            t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+        t[i+56] = 0;
+    }
+    for (i = 0; i < 55; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    o = t[55] >> 6;
+    t[55] &= 0x3f;
+    for (j = 0; j < 28; j++)
+        t[j] += o * (uint32_t)ed448_order_mul[j];
+    for (i = 0; i < 55; i++) {
+        t[i+1] += t[i] >> 8;
+        b[i] = t[i] & 0xff;
+    }
+    b[i] = t[i] & 0xff;
+    b[i+1] = 0;
+}
+
+/* Multiply a by b and add d. r = (a * b + d) mod order
+ *
+ * r  [in]  Scalar to hold result.
+ * a  [in]  Scalar to multiply.
+ * b  [in]  Scalar to multiply.
+ * d  [in]  Scalar to add to multiplicative result.
+ */
+void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b,
+                  const uint8_t* d)
+{
+    int i, j;
+    uint32_t t[112];
+    uint8_t o;
+
+    /* a * b + d */
+    for (i = 0; i < 56; i++)
+        t[i] = d[i];
+    for (i = 0; i < 56; i++) {
+        for (j = 0; j < 56; j++)
+            t[i+j] += (int16_t)a[i] * b[j];
+        t[i+56] = 0;
+    }
+
+    for (i = 0; i < 111; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    for (i = 0; i < 56; i++) {
+        for (j = 0; j < 28; j++)
+            t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+        t[i+56] = 0;
+    }
+    for (i = 54; i < 85; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    for (i = 0; i < 29; i++) {
+        for (j = 0; j < 28; j++)
+            t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+        t[i+56] = 0;
+    }
+    for (i = 54; i < 58; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    for (i = 0; i < 2; i++) {
+        for (j = 0; j < 28; j++)
+            t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+        t[i+56] = 0;
+    }
+    for (i = 0; i < 55; i++) {
+        t[i+1] += t[i] >> 8;
+        t[i] &= 0xff;
+    }
+    o = t[55] >> 6;
+    t[55] &= 0x3f;
+    for (j = 0; j < 28; j++)
+        t[j] += o * (uint32_t)ed448_order_mul[j];
+    for (i = 0; i < 55; i++) {
+        t[i+1] += t[i] >> 8;
+        r[i] = t[i] & 0xff;
+    }
+    r[i] = t[i] & 0xff;
+    r[i+1] = 0;
+}
+
+/* Double the point on the Twisted Edwards curve. r = 2.p
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to double.
+ */
+static WC_INLINE void ge448_dbl(ge448_p2 *r,const ge448_p2 *p)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+
+    fe448_add(t0, p->X, p->Y);    /* t0 = B1 = X1+Y1     */
+    fe448_reduce(t0);
+    fe448_sqr(t0, t0);            /* t0 = B = (X1+Y1)^2  */
+    fe448_sqr(r->X, p->X);        /* r->X = C = X1^2     */
+    fe448_sqr(r->Y, p->Y);        /* r->Y = D = Y1^2     */
+    fe448_add(t1, r->X, r->Y);    /* t1 = E = C+D        */
+    fe448_reduce(t1);
+    fe448_sub(r->Y, r->X, r->Y);  /* r->Y = Y31 = C-D    */
+    fe448_sqr(r->Z, p->Z);        /* r->Z = H = Z1^2     */
+    fe448_add(r->Z, r->Z, r->Z);  /* r->Z = J1 = 2*H     */
+    fe448_sub(r->Z, t1, r->Z);    /* r->Z = J = E-2*H    */
+    fe448_reduce(r->Z);
+    fe448_sub(r->X, t0, t1);      /* r->X = X31 = B-E    */
+    fe448_mul(r->X, r->X, r->Z);  /* r->X = X3 = (B-E)*J */
+    fe448_mul(r->Y, r->Y, t1);    /* r->Y = Y3 = E*(C-D) */
+    fe448_mul(r->Z, t1, r->Z);    /* r->Z = Z3 = E*J     */
+}
+
+/* Add two point on the Twisted Edwards curve. r = p + q
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to add.
+ * q  [in]  Point to add.
+ */
+static WC_INLINE void ge448_add(ge448_p2* r, const ge448_p2* p,
+                                const ge448_p2* q)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+    ge448 t2[GE448_WORDS];
+    ge448 t3[GE448_WORDS];
+    ge448 t4[GE448_WORDS];
+
+    fe448_mul(t1, p->X, q->X);      /* t1 = C = X1*X2             */
+    fe448_mul(t2, p->Y, q->Y);      /* t2 = D = Y1*Y2             */
+    fe448_mul(t3, t1, t2);          /* t3 = E1 = C*D              */
+    fe448_mul39081(t3, t3);         /* t3 = E = d*C*D             */
+    fe448_mul(r->Z, p->Z, q->Z);    /* r->Z = A = Z1*Z2           */
+    fe448_sqr(t0, r->Z);            /* t0 = B = A^2               */
+    fe448_add(t4, t0, t3);          /* t4 = F = B-(-E)            */
+    fe448_sub(t0, t0, t3);          /* t0 = G = B+(-E)            */
+    fe448_reduce(t0);
+    fe448_add(r->X, p->X, p->Y);    /* r->X = H1 = X1+Y1          */
+    fe448_reduce(r->X);
+    fe448_add(r->Y, q->X, q->Y);    /* r->Y = H2 = X2+Y2          */
+    fe448_reduce(r->Y);
+    fe448_mul(r->X, r->X, r->Y);    /* r->X = H = (X1+Y1)*(X2+Y2) */
+    fe448_sub(r->X, r->X, t1);      /* r->X = X31 = H-C           */
+    fe448_sub(r->X, r->X, t2);      /* r->X = X32 = H-C-D         */
+    fe448_reduce(r->X);
+    fe448_mul(r->X, r->X, t4);      /* r->X = X33 = F*(H-C-D)     */
+    fe448_mul(r->X, r->X, r->Z);    /* r->X = X3 = A*F*(H-C-D)    */
+    fe448_sub(r->Y, t2, t1);        /* r->Y = Y31 = D-C           */
+    fe448_reduce(r->Y);
+    fe448_mul(r->Y, r->Y, t0);      /* r->Y = Y32 = G*(D-C)       */
+    fe448_mul(r->Y, r->Y, r->Z);    /* r->Y = Y3 = A*F*(D-C)      */
+    fe448_mul(r->Z, t4, t0);        /* r->Z = Z3 = F*G            */
+}
+
+/* Convert point to byte array assuming projective ordinates.
+ *
+ * b  [in]  Array of bytes to hold compressed point.
+ * p  [in]  Point to convert.
+ */
+void ge448_to_bytes(uint8_t *s, const ge448_p2 *h)
+{
+    ge448 recip[56];
+    ge448 x[56];
+
+    fe448_invert(recip, h->Z);
+    fe448_mul(x, h->X, recip);
+    fe448_mul(s, h->Y, recip);
+    fe448_norm(x);
+    fe448_norm(s);
+    s[56] = (x[0] & 1) << 7;
+}
+
+/* Compress the point to y-ordinate and negative bit.
+ *
+ * out    [in]  Array of bytes to hold compressed key.
+ * xIn    [in]  The x-ordinate.
+ * yIn    [in]  The y-ordinate.
+ */
+int ge448_compress_key(uint8_t* out, const uint8_t* xIn, const uint8_t* yIn)
+{
+    ge448 x[56];
+
+    fe448_copy(x, xIn);
+    fe448_copy(out, yIn);
+    fe448_norm(x);
+    fe448_norm(out);
+    out[56] = (x[0] & 1) << 7;
+
+    return 0;
+}
+
+/* Perform a scalar multiplication of the a point. r = p * base
+ *
+ * r  [in]  Point to hold result.
+ * a  [in]  Scalar to multiply by.
+ */
+static void ge448_scalarmult(ge448_p2* h, const ge448_p2* p, const uint8_t* a)
+{
+    ge448_p2 r;
+    ge448_p2 s;
+    int i;
+
+    XMEMSET(&r, 0, sizeof(r));
+    r.Y[0] = 1;
+    r.Z[0] = 1;
+
+    for (i = 447; i >= 0; i--) {
+        const byte bit = (a[i >> 3] >> (i & 7)) & 1;
+
+        ge448_dbl(&r, &r);
+        ge448_add(&s, &r, p);
+
+        fe448_cmov(r.X, s.X, bit);
+        fe448_cmov(r.Y, s.Y, bit);
+        fe448_cmov(r.Z, s.Z, bit);
+    }
+
+    XMEMCPY(h, &r, sizeof(r));
+}
+
+/* Perform a scalar multiplication of the base point. r = a * base
+ *
+ * r  [in]  Point to hold result.
+ * a  [in]  Scalar to multiply by.
+ */
+void ge448_scalarmult_base(ge448_p2* h, const uint8_t* a)
+{
+    ge448_scalarmult(h, &ed448_base, a);
+}
+
+/* Perform a scalar multplication of the base point and public point.
+ *   r = a * p + b * base
+ * Uses a sliding window of 5 bits.
+ * Not constant time.
+ *
+ * r  [in]  Point to hold result.
+ * a  [in]  Scalar to multiply by.
+ */
+int ge448_double_scalarmult_vartime(ge448_p2 *r, const uint8_t *a,
+                                    const ge448_p2 *A, const uint8_t *b)
+{
+    ge448_p2 t;
+
+    ge448_scalarmult(&t, &ed448_base, b);
+    ge448_scalarmult(r, A, a);
+    ge448_add(r, r, &t);
+
+    return 0;
+}
+
+/* Convert compressed point to negative of affine point.
+ * Calculates x from the y and the negative bit.
+ * Not constant time.
+ *
+ * r  [in]  Uncompressed point.
+ * b  [in]  Array of bytes representing point.
+ * returns 0 on success and -1 on failure.
+ */
+int ge448_from_bytes_negate_vartime(ge448_p2 *r, const uint8_t *b)
+{
+    int   ret = 0;
+    ge448 u[GE448_WORDS];
+    ge448 v[GE448_WORDS];
+    ge448 u3[GE448_WORDS];
+    ge448 vxx[GE448_WORDS];
+    ge448 check[GE448_WORDS];
+
+    fe448_copy(r->Y, b);
+    XMEMSET(r->Z, 0, sizeof(r->Z));
+    r->Z[0] = 1;
+    fe448_sqr(u, r->Y);                /* u = y^2                      */
+    fe448_mul39081(v, u);              /* v = 39081.y^2                */
+    fe448_sub(u, u, r->Z);             /* u = y^2-1                    */
+    fe448_add(v, v, r->Z);             /* v = 39081.y^2-1              */
+    fe448_neg(v, v);                   /* v = -39081.y^2-1 = d.y^2-1   */
+
+    fe448_sqr(r->X, v);                /* x = v^2                      */
+    fe448_mul(r->X, r->X, v);          /* x = v^3                      */
+    fe448_sqr(u3, u);                  /* x = u^2.v^3                  */
+    fe448_mul(r->X, r->X, u3);         /* x = u^2.v^3                  */
+    fe448_mul(u3, u3, u);              /* u3 = u^3                     */
+    fe448_mul(r->X, r->X, u3);         /* x = u^5.v^3                  */
+
+    fe448_pow_2_446_222_1(r->X, r->X); /* x = (u^5.v^3)^((q-3)/4)      */
+    fe448_mul(r->X, r->X, u3);         /* x = u^3(u^5.v^3)^((q-3)/4)   */
+    fe448_mul(r->X, r->X, v);          /* x = u^3.v(u^5.v^3)^((q-3)/4) */
+
+    fe448_sqr(vxx, r->X);
+    fe448_mul(vxx, vxx, v);
+    fe448_sub(check, vxx, u);          /* check = v.x^2-u              */
+    fe448_norm(check);
+    fe448_norm(r->X);
+    fe448_norm(r->Y);
+    /* Note; vx^2+u is NOT correct. */
+    if (fe448_isnonzero(check)) {
+        ret = -1;
+    }
+
+    /* Calculating negative of point in bytes - negate only if X is correct. */
+    if ((r->X[0] & 1) == (b[56] >> 7)) {
+        fe448_neg(r->X, r->X);
+    }
+
+    return ret;
+}
+
+#else /* !ED448_SMALL */
+
+#if defined(CURVED448_128BIT)
+
+/* Reduce scalar mod the order of the curve.
+ * Scalar Will be 114 bytes.
+ *
+ * b  [in]  Scalar to reduce.
+ */
+void sc448_reduce(uint8_t* b)
+{
+    uint64_t d[8];
+    uint128_t t[17];
+    uint128_t c;
+    uint64_t o;
+
+    /* Load from bytes */
+    t[ 0] =  ((int64_t) (b[ 0]) <<  0)
+          |  ((int64_t) (b[ 1]) <<  8)
+          |  ((int64_t) (b[ 2]) << 16)
+          |  ((int64_t) (b[ 3]) << 24)
+          |  ((int64_t) (b[ 4]) << 32)
+          |  ((int64_t) (b[ 5]) << 40)
+          |  ((int64_t) (b[ 6]) << 48);
+    t[ 1] =  ((int64_t) (b[ 7]) <<  0)
+          |  ((int64_t) (b[ 8]) <<  8)
+          |  ((int64_t) (b[ 9]) << 16)
+          |  ((int64_t) (b[10]) << 24)
+          |  ((int64_t) (b[11]) << 32)
+          |  ((int64_t) (b[12]) << 40)
+          |  ((int64_t) (b[13]) << 48);
+    t[ 2] =  ((int64_t) (b[14]) <<  0)
+          |  ((int64_t) (b[15]) <<  8)
+          |  ((int64_t) (b[16]) << 16)
+          |  ((int64_t) (b[17]) << 24)
+          |  ((int64_t) (b[18]) << 32)
+          |  ((int64_t) (b[19]) << 40)
+          |  ((int64_t) (b[20]) << 48);
+    t[ 3] =  ((int64_t) (b[21]) <<  0)
+          |  ((int64_t) (b[22]) <<  8)
+          |  ((int64_t) (b[23]) << 16)
+          |  ((int64_t) (b[24]) << 24)
+          |  ((int64_t) (b[25]) << 32)
+          |  ((int64_t) (b[26]) << 40)
+          |  ((int64_t) (b[27]) << 48);
+    t[ 4] =  ((int64_t) (b[28]) <<  0)
+          |  ((int64_t) (b[29]) <<  8)
+          |  ((int64_t) (b[30]) << 16)
+          |  ((int64_t) (b[31]) << 24)
+          |  ((int64_t) (b[32]) << 32)
+          |  ((int64_t) (b[33]) << 40)
+          |  ((int64_t) (b[34]) << 48);
+    t[ 5] =  ((int64_t) (b[35]) <<  0)
+          |  ((int64_t) (b[36]) <<  8)
+          |  ((int64_t) (b[37]) << 16)
+          |  ((int64_t) (b[38]) << 24)
+          |  ((int64_t) (b[39]) << 32)
+          |  ((int64_t) (b[40]) << 40)
+          |  ((int64_t) (b[41]) << 48);
+    t[ 6] =  ((int64_t) (b[42]) <<  0)
+          |  ((int64_t) (b[43]) <<  8)
+          |  ((int64_t) (b[44]) << 16)
+          |  ((int64_t) (b[45]) << 24)
+          |  ((int64_t) (b[46]) << 32)
+          |  ((int64_t) (b[47]) << 40)
+          |  ((int64_t) (b[48]) << 48);
+    t[ 7] =  ((int64_t) (b[49]) <<  0)
+          |  ((int64_t) (b[50]) <<  8)
+          |  ((int64_t) (b[51]) << 16)
+          |  ((int64_t) (b[52]) << 24)
+          |  ((int64_t) (b[53]) << 32)
+          |  ((int64_t) (b[54]) << 40)
+          |  ((int64_t) (b[55]) << 48);
+    t[ 8] =  ((int64_t) (b[56]) <<  0)
+          |  ((int64_t) (b[57]) <<  8)
+          |  ((int64_t) (b[58]) << 16)
+          |  ((int64_t) (b[59]) << 24)
+          |  ((int64_t) (b[60]) << 32)
+          |  ((int64_t) (b[61]) << 40)
+          |  ((int64_t) (b[62]) << 48);
+    t[ 9] =  ((int64_t) (b[63]) <<  0)
+          |  ((int64_t) (b[64]) <<  8)
+          |  ((int64_t) (b[65]) << 16)
+          |  ((int64_t) (b[66]) << 24)
+          |  ((int64_t) (b[67]) << 32)
+          |  ((int64_t) (b[68]) << 40)
+          |  ((int64_t) (b[69]) << 48);
+    t[10] =  ((int64_t) (b[70]) <<  0)
+          |  ((int64_t) (b[71]) <<  8)
+          |  ((int64_t) (b[72]) << 16)
+          |  ((int64_t) (b[73]) << 24)
+          |  ((int64_t) (b[74]) << 32)
+          |  ((int64_t) (b[75]) << 40)
+          |  ((int64_t) (b[76]) << 48);
+    t[11] =  ((int64_t) (b[77]) <<  0)
+          |  ((int64_t) (b[78]) <<  8)
+          |  ((int64_t) (b[79]) << 16)
+          |  ((int64_t) (b[80]) << 24)
+          |  ((int64_t) (b[81]) << 32)
+          |  ((int64_t) (b[82]) << 40)
+          |  ((int64_t) (b[83]) << 48);
+    t[12] =  ((int64_t) (b[84]) <<  0)
+          |  ((int64_t) (b[85]) <<  8)
+          |  ((int64_t) (b[86]) << 16)
+          |  ((int64_t) (b[87]) << 24)
+          |  ((int64_t) (b[88]) << 32)
+          |  ((int64_t) (b[89]) << 40)
+          |  ((int64_t) (b[90]) << 48);
+    t[13] =  ((int64_t) (b[91]) <<  0)
+          |  ((int64_t) (b[92]) <<  8)
+          |  ((int64_t) (b[93]) << 16)
+          |  ((int64_t) (b[94]) << 24)
+          |  ((int64_t) (b[95]) << 32)
+          |  ((int64_t) (b[96]) << 40)
+          |  ((int64_t) (b[97]) << 48);
+    t[14] =  ((int64_t) (b[98]) <<  0)
+          |  ((int64_t) (b[99]) <<  8)
+          |  ((int64_t) (b[100]) << 16)
+          |  ((int64_t) (b[101]) << 24)
+          |  ((int64_t) (b[102]) << 32)
+          |  ((int64_t) (b[103]) << 40)
+          |  ((int64_t) (b[104]) << 48);
+    t[15] =  ((int64_t) (b[105]) <<  0)
+          |  ((int64_t) (b[106]) <<  8)
+          |  ((int64_t) (b[107]) << 16)
+          |  ((int64_t) (b[108]) << 24)
+          |  ((int64_t) (b[109]) << 32)
+          |  ((int64_t) (b[110]) << 40)
+          |  ((int64_t) (b[111]) << 48);
+    t[16] =  ((int64_t) (b[112]) <<  0)
+          |  ((int64_t) (b[113]) <<  8);
+
+    /* Mod curve order */
+    /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+    /* Mod top half of extra words */
+    t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12];
+    t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12];
+    t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12];
+    t[ 7] += (int128_t)0x20cd77058eec490L * t[12];
+    t[ 5] += (int128_t)0x21cf5b5529eec34L * t[13];
+    t[ 6] += (int128_t)0x0f635c8e9c2ab70L * t[13];
+    t[ 7] += (int128_t)0x2d944a725bf7a4cL * t[13];
+    t[ 8] += (int128_t)0x20cd77058eec490L * t[13];
+    t[ 6] += (int128_t)0x21cf5b5529eec34L * t[14];
+    t[ 7] += (int128_t)0x0f635c8e9c2ab70L * t[14];
+    t[ 8] += (int128_t)0x2d944a725bf7a4cL * t[14];
+    t[ 9] += (int128_t)0x20cd77058eec490L * t[14];
+    t[ 7] += (int128_t)0x21cf5b5529eec34L * t[15];
+    t[ 8] += (int128_t)0x0f635c8e9c2ab70L * t[15];
+    t[ 9] += (int128_t)0x2d944a725bf7a4cL * t[15];
+    t[10] += (int128_t)0x20cd77058eec490L * t[15];
+    t[ 8] += (int128_t)0x21cf5b5529eec34L * t[16];
+    t[ 9] += (int128_t)0x0f635c8e9c2ab70L * t[16];
+    t[10] += (int128_t)0x2d944a725bf7a4cL * t[16];
+    t[11] += (int128_t)0x20cd77058eec490L * t[16];
+    t[12]  = 0;
+    /* Propagate carries */
+    c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+    c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+    c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+    c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+    c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff;
+    c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff;
+    c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff;
+    c = t[11] >> 56; t[12] += c; t[11] = t[11] & 0xffffffffffffff;
+    /* Mod bottom half of extra words */
+    t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8];
+    t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8];
+    t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8];
+    t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8];
+    t[ 1] += (int128_t)0x21cf5b5529eec34L * t[ 9];
+    t[ 2] += (int128_t)0x0f635c8e9c2ab70L * t[ 9];
+    t[ 3] += (int128_t)0x2d944a725bf7a4cL * t[ 9];
+    t[ 4] += (int128_t)0x20cd77058eec490L * t[ 9];
+    t[ 2] += (int128_t)0x21cf5b5529eec34L * t[10];
+    t[ 3] += (int128_t)0x0f635c8e9c2ab70L * t[10];
+    t[ 4] += (int128_t)0x2d944a725bf7a4cL * t[10];
+    t[ 5] += (int128_t)0x20cd77058eec490L * t[10];
+    t[ 3] += (int128_t)0x21cf5b5529eec34L * t[11];
+    t[ 4] += (int128_t)0x0f635c8e9c2ab70L * t[11];
+    t[ 5] += (int128_t)0x2d944a725bf7a4cL * t[11];
+    t[ 6] += (int128_t)0x20cd77058eec490L * t[11];
+    t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12];
+    t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12];
+    t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12];
+    t[ 7] += (int128_t)0x20cd77058eec490L * t[12];
+    t[ 8]  = 0;
+    /* Propagate carries */
+    c = t[ 0] >> 56; t[ 1] += c; t[ 0] = t[ 0] & 0xffffffffffffff;
+    c = t[ 1] >> 56; t[ 2] += c; t[ 1] = t[ 1] & 0xffffffffffffff;
+    c = t[ 2] >> 56; t[ 3] += c; t[ 2] = t[ 2] & 0xffffffffffffff;
+    c = t[ 3] >> 56; t[ 4] += c; t[ 3] = t[ 3] & 0xffffffffffffff;
+    c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+    c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+    c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+    c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+    t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8];
+    t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8];
+    t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8];
+    t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8];
+    /* Propagate carries */
+    c = t[ 0] >> 56; t[ 1] += c; d[ 0] = (int64_t)(t[ 0] & 0xffffffffffffff);
+    c = t[ 1] >> 56; t[ 2] += c; d[ 1] = (int64_t)(t[ 1] & 0xffffffffffffff);
+    c = t[ 2] >> 56; t[ 3] += c; d[ 2] = (int64_t)(t[ 2] & 0xffffffffffffff);
+    c = t[ 3] >> 56; t[ 4] += c; d[ 3] = (int64_t)(t[ 3] & 0xffffffffffffff);
+    c = t[ 4] >> 56; t[ 5] += c; d[ 4] = (int64_t)(t[ 4] & 0xffffffffffffff);
+    c = t[ 5] >> 56; t[ 6] += c; d[ 5] = (int64_t)(t[ 5] & 0xffffffffffffff);
+    c = t[ 6] >> 56; t[ 7] += c; d[ 6] = (int64_t)(t[ 6] & 0xffffffffffffff);
+    d[ 7] = t[7];
+    /* Mod bits over 56 in last word */
+    o = d[7] >> 54; d[ 7] &= 0x3fffffffffffff;
+    d[ 0] += 0x873d6d54a7bb0dL * o;
+    d[ 1] += 0x3d8d723a70aadcL * o;
+    d[ 2] += 0xb65129c96fde93L * o;
+    d[ 3] += 0x8335dc163bb124L * o;
+    /* Propagate carries */
+    o = d[ 0] >> 56; d[ 1] += o; d[ 0] = d[ 0] & 0xffffffffffffff;
+    o = d[ 1] >> 56; d[ 2] += o; d[ 1] = d[ 1] & 0xffffffffffffff;
+    o = d[ 2] >> 56; d[ 3] += o; d[ 2] = d[ 2] & 0xffffffffffffff;
+    o = d[ 3] >> 56; d[ 4] += o; d[ 3] = d[ 3] & 0xffffffffffffff;
+    o = d[ 4] >> 56; d[ 5] += o; d[ 4] = d[ 4] & 0xffffffffffffff;
+    o = d[ 5] >> 56; d[ 6] += o; d[ 5] = d[ 5] & 0xffffffffffffff;
+    o = d[ 6] >> 56; d[ 7] += o; d[ 6] = d[ 6] & 0xffffffffffffff;
+
+    /* Convert to bytes */
+    b[ 0] = (d[0 ] >>  0);
+    b[ 1] = (d[0 ] >>  8);
+    b[ 2] = (d[0 ] >> 16);
+    b[ 3] = (d[0 ] >> 24);
+    b[ 4] = (d[0 ] >> 32);
+    b[ 5] = (d[0 ] >> 40);
+    b[ 6] = (d[0 ] >> 48);
+    b[ 7] = (d[1 ] >>  0);
+    b[ 8] = (d[1 ] >>  8);
+    b[ 9] = (d[1 ] >> 16);
+    b[10] = (d[1 ] >> 24);
+    b[11] = (d[1 ] >> 32);
+    b[12] = (d[1 ] >> 40);
+    b[13] = (d[1 ] >> 48);
+    b[14] = (d[2 ] >>  0);
+    b[15] = (d[2 ] >>  8);
+    b[16] = (d[2 ] >> 16);
+    b[17] = (d[2 ] >> 24);
+    b[18] = (d[2 ] >> 32);
+    b[19] = (d[2 ] >> 40);
+    b[20] = (d[2 ] >> 48);
+    b[21] = (d[3 ] >>  0);
+    b[22] = (d[3 ] >>  8);
+    b[23] = (d[3 ] >> 16);
+    b[24] = (d[3 ] >> 24);
+    b[25] = (d[3 ] >> 32);
+    b[26] = (d[3 ] >> 40);
+    b[27] = (d[3 ] >> 48);
+    b[28] = (d[4 ] >>  0);
+    b[29] = (d[4 ] >>  8);
+    b[30] = (d[4 ] >> 16);
+    b[31] = (d[4 ] >> 24);
+    b[32] = (d[4 ] >> 32);
+    b[33] = (d[4 ] >> 40);
+    b[34] = (d[4 ] >> 48);
+    b[35] = (d[5 ] >>  0);
+    b[36] = (d[5 ] >>  8);
+    b[37] = (d[5 ] >> 16);
+    b[38] = (d[5 ] >> 24);
+    b[39] = (d[5 ] >> 32);
+    b[40] = (d[5 ] >> 40);
+    b[41] = (d[5 ] >> 48);
+    b[42] = (d[6 ] >>  0);
+    b[43] = (d[6 ] >>  8);
+    b[44] = (d[6 ] >> 16);
+    b[45] = (d[6 ] >> 24);
+    b[46] = (d[6 ] >> 32);
+    b[47] = (d[6 ] >> 40);
+    b[48] = (d[6 ] >> 48);
+    b[49] = (d[7 ] >>  0);
+    b[50] = (d[7 ] >>  8);
+    b[51] = (d[7 ] >> 16);
+    b[52] = (d[7 ] >> 24);
+    b[53] = (d[7 ] >> 32);
+    b[54] = (d[7 ] >> 40);
+    b[55] = (d[7 ] >> 48);
+    b[56] = 0;
+}
+
+/* Multiply a by b and add d. r = (a * b + d) mod order
+ *
+ * r  [in]  Scalar to hold result.
+ * a  [in]  Scalar to multiply.
+ * b  [in]  Scalar to multiply.
+ * d  [in]  Scalar to add to multiplicative result.
+ */
+void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b,
+                  const uint8_t* d)
+{
+    uint64_t ad[8], bd[8], dd[8], rd[8];
+    uint128_t t[16];
+    uint128_t c;
+    uint64_t o;
+
+    /* Load from bytes */
+    ad[ 0] =  ((int64_t) (a[ 0]) <<  0)
+           |  ((int64_t) (a[ 1]) <<  8)
+           |  ((int64_t) (a[ 2]) << 16)
+           |  ((int64_t) (a[ 3]) << 24)
+           |  ((int64_t) (a[ 4]) << 32)
+           |  ((int64_t) (a[ 5]) << 40)
+           |  ((int64_t) (a[ 6]) << 48);
+    ad[ 1] =  ((int64_t) (a[ 7]) <<  0)
+           |  ((int64_t) (a[ 8]) <<  8)
+           |  ((int64_t) (a[ 9]) << 16)
+           |  ((int64_t) (a[10]) << 24)
+           |  ((int64_t) (a[11]) << 32)
+           |  ((int64_t) (a[12]) << 40)
+           |  ((int64_t) (a[13]) << 48);
+    ad[ 2] =  ((int64_t) (a[14]) <<  0)
+           |  ((int64_t) (a[15]) <<  8)
+           |  ((int64_t) (a[16]) << 16)
+           |  ((int64_t) (a[17]) << 24)
+           |  ((int64_t) (a[18]) << 32)
+           |  ((int64_t) (a[19]) << 40)
+           |  ((int64_t) (a[20]) << 48);
+    ad[ 3] =  ((int64_t) (a[21]) <<  0)
+           |  ((int64_t) (a[22]) <<  8)
+           |  ((int64_t) (a[23]) << 16)
+           |  ((int64_t) (a[24]) << 24)
+           |  ((int64_t) (a[25]) << 32)
+           |  ((int64_t) (a[26]) << 40)
+           |  ((int64_t) (a[27]) << 48);
+    ad[ 4] =  ((int64_t) (a[28]) <<  0)
+           |  ((int64_t) (a[29]) <<  8)
+           |  ((int64_t) (a[30]) << 16)
+           |  ((int64_t) (a[31]) << 24)
+           |  ((int64_t) (a[32]) << 32)
+           |  ((int64_t) (a[33]) << 40)
+           |  ((int64_t) (a[34]) << 48);
+    ad[ 5] =  ((int64_t) (a[35]) <<  0)
+           |  ((int64_t) (a[36]) <<  8)
+           |  ((int64_t) (a[37]) << 16)
+           |  ((int64_t) (a[38]) << 24)
+           |  ((int64_t) (a[39]) << 32)
+           |  ((int64_t) (a[40]) << 40)
+           |  ((int64_t) (a[41]) << 48);
+    ad[ 6] =  ((int64_t) (a[42]) <<  0)
+           |  ((int64_t) (a[43]) <<  8)
+           |  ((int64_t) (a[44]) << 16)
+           |  ((int64_t) (a[45]) << 24)
+           |  ((int64_t) (a[46]) << 32)
+           |  ((int64_t) (a[47]) << 40)
+           |  ((int64_t) (a[48]) << 48);
+    ad[ 7] =  ((int64_t) (a[49]) <<  0)
+           |  ((int64_t) (a[50]) <<  8)
+           |  ((int64_t) (a[51]) << 16)
+           |  ((int64_t) (a[52]) << 24)
+           |  ((int64_t) (a[53]) << 32)
+           |  ((int64_t) (a[54]) << 40)
+           |  ((int64_t) (a[55]) << 48);
+    /* Load from bytes */
+    bd[ 0] =  ((int64_t) (b[ 0]) <<  0)
+           |  ((int64_t) (b[ 1]) <<  8)
+           |  ((int64_t) (b[ 2]) << 16)
+           |  ((int64_t) (b[ 3]) << 24)
+           |  ((int64_t) (b[ 4]) << 32)
+           |  ((int64_t) (b[ 5]) << 40)
+           |  ((int64_t) (b[ 6]) << 48);
+    bd[ 1] =  ((int64_t) (b[ 7]) <<  0)
+           |  ((int64_t) (b[ 8]) <<  8)
+           |  ((int64_t) (b[ 9]) << 16)
+           |  ((int64_t) (b[10]) << 24)
+           |  ((int64_t) (b[11]) << 32)
+           |  ((int64_t) (b[12]) << 40)
+           |  ((int64_t) (b[13]) << 48);
+    bd[ 2] =  ((int64_t) (b[14]) <<  0)
+           |  ((int64_t) (b[15]) <<  8)
+           |  ((int64_t) (b[16]) << 16)
+           |  ((int64_t) (b[17]) << 24)
+           |  ((int64_t) (b[18]) << 32)
+           |  ((int64_t) (b[19]) << 40)
+           |  ((int64_t) (b[20]) << 48);
+    bd[ 3] =  ((int64_t) (b[21]) <<  0)
+           |  ((int64_t) (b[22]) <<  8)
+           |  ((int64_t) (b[23]) << 16)
+           |  ((int64_t) (b[24]) << 24)
+           |  ((int64_t) (b[25]) << 32)
+           |  ((int64_t) (b[26]) << 40)
+           |  ((int64_t) (b[27]) << 48);
+    bd[ 4] =  ((int64_t) (b[28]) <<  0)
+           |  ((int64_t) (b[29]) <<  8)
+           |  ((int64_t) (b[30]) << 16)
+           |  ((int64_t) (b[31]) << 24)
+           |  ((int64_t) (b[32]) << 32)
+           |  ((int64_t) (b[33]) << 40)
+           |  ((int64_t) (b[34]) << 48);
+    bd[ 5] =  ((int64_t) (b[35]) <<  0)
+           |  ((int64_t) (b[36]) <<  8)
+           |  ((int64_t) (b[37]) << 16)
+           |  ((int64_t) (b[38]) << 24)
+           |  ((int64_t) (b[39]) << 32)
+           |  ((int64_t) (b[40]) << 40)
+           |  ((int64_t) (b[41]) << 48);
+    bd[ 6] =  ((int64_t) (b[42]) <<  0)
+           |  ((int64_t) (b[43]) <<  8)
+           |  ((int64_t) (b[44]) << 16)
+           |  ((int64_t) (b[45]) << 24)
+           |  ((int64_t) (b[46]) << 32)
+           |  ((int64_t) (b[47]) << 40)
+           |  ((int64_t) (b[48]) << 48);
+    bd[ 7] =  ((int64_t) (b[49]) <<  0)
+           |  ((int64_t) (b[50]) <<  8)
+           |  ((int64_t) (b[51]) << 16)
+           |  ((int64_t) (b[52]) << 24)
+           |  ((int64_t) (b[53]) << 32)
+           |  ((int64_t) (b[54]) << 40)
+           |  ((int64_t) (b[55]) << 48);
+    /* Load from bytes */
+    dd[ 0] =  ((int64_t) (d[ 0]) <<  0)
+           |  ((int64_t) (d[ 1]) <<  8)
+           |  ((int64_t) (d[ 2]) << 16)
+           |  ((int64_t) (d[ 3]) << 24)
+           |  ((int64_t) (d[ 4]) << 32)
+           |  ((int64_t) (d[ 5]) << 40)
+           |  ((int64_t) (d[ 6]) << 48);
+    dd[ 1] =  ((int64_t) (d[ 7]) <<  0)
+           |  ((int64_t) (d[ 8]) <<  8)
+           |  ((int64_t) (d[ 9]) << 16)
+           |  ((int64_t) (d[10]) << 24)
+           |  ((int64_t) (d[11]) << 32)
+           |  ((int64_t) (d[12]) << 40)
+           |  ((int64_t) (d[13]) << 48);
+    dd[ 2] =  ((int64_t) (d[14]) <<  0)
+           |  ((int64_t) (d[15]) <<  8)
+           |  ((int64_t) (d[16]) << 16)
+           |  ((int64_t) (d[17]) << 24)
+           |  ((int64_t) (d[18]) << 32)
+           |  ((int64_t) (d[19]) << 40)
+           |  ((int64_t) (d[20]) << 48);
+    dd[ 3] =  ((int64_t) (d[21]) <<  0)
+           |  ((int64_t) (d[22]) <<  8)
+           |  ((int64_t) (d[23]) << 16)
+           |  ((int64_t) (d[24]) << 24)
+           |  ((int64_t) (d[25]) << 32)
+           |  ((int64_t) (d[26]) << 40)
+           |  ((int64_t) (d[27]) << 48);
+    dd[ 4] =  ((int64_t) (d[28]) <<  0)
+           |  ((int64_t) (d[29]) <<  8)
+           |  ((int64_t) (d[30]) << 16)
+           |  ((int64_t) (d[31]) << 24)
+           |  ((int64_t) (d[32]) << 32)
+           |  ((int64_t) (d[33]) << 40)
+           |  ((int64_t) (d[34]) << 48);
+    dd[ 5] =  ((int64_t) (d[35]) <<  0)
+           |  ((int64_t) (d[36]) <<  8)
+           |  ((int64_t) (d[37]) << 16)
+           |  ((int64_t) (d[38]) << 24)
+           |  ((int64_t) (d[39]) << 32)
+           |  ((int64_t) (d[40]) << 40)
+           |  ((int64_t) (d[41]) << 48);
+    dd[ 6] =  ((int64_t) (d[42]) <<  0)
+           |  ((int64_t) (d[43]) <<  8)
+           |  ((int64_t) (d[44]) << 16)
+           |  ((int64_t) (d[45]) << 24)
+           |  ((int64_t) (d[46]) << 32)
+           |  ((int64_t) (d[47]) << 40)
+           |  ((int64_t) (d[48]) << 48);
+    dd[ 7] =  ((int64_t) (d[49]) <<  0)
+           |  ((int64_t) (d[50]) <<  8)
+           |  ((int64_t) (d[51]) << 16)
+           |  ((int64_t) (d[52]) << 24)
+           |  ((int64_t) (d[53]) << 32)
+           |  ((int64_t) (d[54]) << 40)
+           |  ((int64_t) (d[55]) << 48);
+
+    /* a * b + d */
+    t[ 0] =  dd[ 0] + (int128_t)ad[ 0] * bd[ 0];
+    t[ 1] =  dd[ 1] + (int128_t)ad[ 0] * bd[ 1]
+                    + (int128_t)ad[ 1] * bd[ 0];
+    t[ 2] =  dd[ 2] + (int128_t)ad[ 0] * bd[ 2]
+                    + (int128_t)ad[ 1] * bd[ 1]
+                    + (int128_t)ad[ 2] * bd[ 0];
+    t[ 3] =  dd[ 3] + (int128_t)ad[ 0] * bd[ 3]
+                    + (int128_t)ad[ 1] * bd[ 2]
+                    + (int128_t)ad[ 2] * bd[ 1]
+                    + (int128_t)ad[ 3] * bd[ 0];
+    t[ 4] =  dd[ 4] + (int128_t)ad[ 0] * bd[ 4]
+                    + (int128_t)ad[ 1] * bd[ 3]
+                    + (int128_t)ad[ 2] * bd[ 2]
+                    + (int128_t)ad[ 3] * bd[ 1]
+                    + (int128_t)ad[ 4] * bd[ 0];
+    t[ 5] =  dd[ 5] + (int128_t)ad[ 0] * bd[ 5]
+                    + (int128_t)ad[ 1] * bd[ 4]
+                    + (int128_t)ad[ 2] * bd[ 3]
+                    + (int128_t)ad[ 3] * bd[ 2]
+                    + (int128_t)ad[ 4] * bd[ 1]
+                    + (int128_t)ad[ 5] * bd[ 0];
+    t[ 6] =  dd[ 6] + (int128_t)ad[ 0] * bd[ 6]
+                    + (int128_t)ad[ 1] * bd[ 5]
+                    + (int128_t)ad[ 2] * bd[ 4]
+                    + (int128_t)ad[ 3] * bd[ 3]
+                    + (int128_t)ad[ 4] * bd[ 2]
+                    + (int128_t)ad[ 5] * bd[ 1]
+                    + (int128_t)ad[ 6] * bd[ 0];
+    t[ 7] =  dd[ 7] + (int128_t)ad[ 0] * bd[ 7]
+                    + (int128_t)ad[ 1] * bd[ 6]
+                    + (int128_t)ad[ 2] * bd[ 5]
+                    + (int128_t)ad[ 3] * bd[ 4]
+                    + (int128_t)ad[ 4] * bd[ 3]
+                    + (int128_t)ad[ 5] * bd[ 2]
+                    + (int128_t)ad[ 6] * bd[ 1]
+                    + (int128_t)ad[ 7] * bd[ 0];
+    t[ 8] =           (int128_t)ad[ 1] * bd[ 7]
+                    + (int128_t)ad[ 2] * bd[ 6]
+                    + (int128_t)ad[ 3] * bd[ 5]
+                    + (int128_t)ad[ 4] * bd[ 4]
+                    + (int128_t)ad[ 5] * bd[ 3]
+                    + (int128_t)ad[ 6] * bd[ 2]
+                    + (int128_t)ad[ 7] * bd[ 1];
+    t[ 9] =           (int128_t)ad[ 2] * bd[ 7]
+                    + (int128_t)ad[ 3] * bd[ 6]
+                    + (int128_t)ad[ 4] * bd[ 5]
+                    + (int128_t)ad[ 5] * bd[ 4]
+                    + (int128_t)ad[ 6] * bd[ 3]
+                    + (int128_t)ad[ 7] * bd[ 2];
+    t[10] =           (int128_t)ad[ 3] * bd[ 7]
+                    + (int128_t)ad[ 4] * bd[ 6]
+                    + (int128_t)ad[ 5] * bd[ 5]
+                    + (int128_t)ad[ 6] * bd[ 4]
+                    + (int128_t)ad[ 7] * bd[ 3];
+    t[11] =           (int128_t)ad[ 4] * bd[ 7]
+                    + (int128_t)ad[ 5] * bd[ 6]
+                    + (int128_t)ad[ 6] * bd[ 5]
+                    + (int128_t)ad[ 7] * bd[ 4];
+    t[12] =           (int128_t)ad[ 5] * bd[ 7]
+                    + (int128_t)ad[ 6] * bd[ 6]
+                    + (int128_t)ad[ 7] * bd[ 5];
+    t[13] =           (int128_t)ad[ 6] * bd[ 7]
+                    + (int128_t)ad[ 7] * bd[ 6];
+    t[14] =           (int128_t)ad[ 7] * bd[ 7];
+    t[15] = 0;
+
+    /* Mod curve order */
+    /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+    /* Propagate carries */
+    c = t[ 0] >> 56; t[ 1] += c; t[ 0] = t[ 0] & 0xffffffffffffff;
+    c = t[ 1] >> 56; t[ 2] += c; t[ 1] = t[ 1] & 0xffffffffffffff;
+    c = t[ 2] >> 56; t[ 3] += c; t[ 2] = t[ 2] & 0xffffffffffffff;
+    c = t[ 3] >> 56; t[ 4] += c; t[ 3] = t[ 3] & 0xffffffffffffff;
+    c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+    c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+    c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+    c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+    c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff;
+    c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff;
+    c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff;
+    c = t[11] >> 56; t[12] += c; t[11] = t[11] & 0xffffffffffffff;
+    c = t[12] >> 56; t[13] += c; t[12] = t[12] & 0xffffffffffffff;
+    c = t[13] >> 56; t[14] += c; t[13] = t[13] & 0xffffffffffffff;
+    c = t[14] >> 56; t[15] += c; t[14] = t[14] & 0xffffffffffffff;
+    /* Mod top half of extra words */
+    t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12];
+    t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12];
+    t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12];
+    t[ 7] += (int128_t)0x20cd77058eec490L * t[12];
+    t[ 5] += (int128_t)0x21cf5b5529eec34L * t[13];
+    t[ 6] += (int128_t)0x0f635c8e9c2ab70L * t[13];
+    t[ 7] += (int128_t)0x2d944a725bf7a4cL * t[13];
+    t[ 8] += (int128_t)0x20cd77058eec490L * t[13];
+    t[ 6] += (int128_t)0x21cf5b5529eec34L * t[14];
+    t[ 7] += (int128_t)0x0f635c8e9c2ab70L * t[14];
+    t[ 8] += (int128_t)0x2d944a725bf7a4cL * t[14];
+    t[ 9] += (int128_t)0x20cd77058eec490L * t[14];
+    t[ 7] += (int128_t)0x21cf5b5529eec34L * t[15];
+    t[ 8] += (int128_t)0x0f635c8e9c2ab70L * t[15];
+    t[ 9] += (int128_t)0x2d944a725bf7a4cL * t[15];
+    t[10] += (int128_t)0x20cd77058eec490L * t[15];
+    /* Propagate carries */
+    c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+    c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+    c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+    c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+    c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff;
+    c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff;
+    c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff;
+    /* Mod bottom half of extra words */
+    t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8];
+    t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8];
+    t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8];
+    t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8];
+    t[ 1] += (int128_t)0x21cf5b5529eec34L * t[ 9];
+    t[ 2] += (int128_t)0x0f635c8e9c2ab70L * t[ 9];
+    t[ 3] += (int128_t)0x2d944a725bf7a4cL * t[ 9];
+    t[ 4] += (int128_t)0x20cd77058eec490L * t[ 9];
+    t[ 2] += (int128_t)0x21cf5b5529eec34L * t[10];
+    t[ 3] += (int128_t)0x0f635c8e9c2ab70L * t[10];
+    t[ 4] += (int128_t)0x2d944a725bf7a4cL * t[10];
+    t[ 5] += (int128_t)0x20cd77058eec490L * t[10];
+    t[ 3] += (int128_t)0x21cf5b5529eec34L * t[11];
+    t[ 4] += (int128_t)0x0f635c8e9c2ab70L * t[11];
+    t[ 5] += (int128_t)0x2d944a725bf7a4cL * t[11];
+    t[ 6] += (int128_t)0x20cd77058eec490L * t[11];
+    /* Propagate carries */
+    c = t[ 0] >> 56; t[ 1] += c; rd[ 0] = (int64_t)(t[ 0] & 0xffffffffffffff);
+    c = t[ 1] >> 56; t[ 2] += c; rd[ 1] = (int64_t)(t[ 1] & 0xffffffffffffff);
+    c = t[ 2] >> 56; t[ 3] += c; rd[ 2] = (int64_t)(t[ 2] & 0xffffffffffffff);
+    c = t[ 3] >> 56; t[ 4] += c; rd[ 3] = (int64_t)(t[ 3] & 0xffffffffffffff);
+    c = t[ 4] >> 56; t[ 5] += c; rd[ 4] = (int64_t)(t[ 4] & 0xffffffffffffff);
+    c = t[ 5] >> 56; t[ 6] += c; rd[ 5] = (int64_t)(t[ 5] & 0xffffffffffffff);
+    c = t[ 6] >> 56; t[ 7] += c; rd[ 6] = (int64_t)(t[ 6] & 0xffffffffffffff);
+    rd[ 7] = t[7];
+    /* Mod bits over 56 in last word */
+    o = rd[7] >> 54; rd[ 7] &= 0x3fffffffffffff;
+    rd[ 0] += 0x873d6d54a7bb0dL * o;
+    rd[ 1] += 0x3d8d723a70aadcL * o;
+    rd[ 2] += 0xb65129c96fde93L * o;
+    rd[ 3] += 0x8335dc163bb124L * o;
+    /* Propagate carries */
+    o = rd[ 0] >> 56; rd[ 1] += o; rd[ 0] = rd[ 0] & 0xffffffffffffff;
+    o = rd[ 1] >> 56; rd[ 2] += o; rd[ 1] = rd[ 1] & 0xffffffffffffff;
+    o = rd[ 2] >> 56; rd[ 3] += o; rd[ 2] = rd[ 2] & 0xffffffffffffff;
+    o = rd[ 3] >> 56; rd[ 4] += o; rd[ 3] = rd[ 3] & 0xffffffffffffff;
+    o = rd[ 4] >> 56; rd[ 5] += o; rd[ 4] = rd[ 4] & 0xffffffffffffff;
+    o = rd[ 5] >> 56; rd[ 6] += o; rd[ 5] = rd[ 5] & 0xffffffffffffff;
+    o = rd[ 6] >> 56; rd[ 7] += o; rd[ 6] = rd[ 6] & 0xffffffffffffff;
+
+    /* Convert to bytes */
+    r[ 0] = (rd[0 ] >>  0);
+    r[ 1] = (rd[0 ] >>  8);
+    r[ 2] = (rd[0 ] >> 16);
+    r[ 3] = (rd[0 ] >> 24);
+    r[ 4] = (rd[0 ] >> 32);
+    r[ 5] = (rd[0 ] >> 40);
+    r[ 6] = (rd[0 ] >> 48);
+    r[ 7] = (rd[1 ] >>  0);
+    r[ 8] = (rd[1 ] >>  8);
+    r[ 9] = (rd[1 ] >> 16);
+    r[10] = (rd[1 ] >> 24);
+    r[11] = (rd[1 ] >> 32);
+    r[12] = (rd[1 ] >> 40);
+    r[13] = (rd[1 ] >> 48);
+    r[14] = (rd[2 ] >>  0);
+    r[15] = (rd[2 ] >>  8);
+    r[16] = (rd[2 ] >> 16);
+    r[17] = (rd[2 ] >> 24);
+    r[18] = (rd[2 ] >> 32);
+    r[19] = (rd[2 ] >> 40);
+    r[20] = (rd[2 ] >> 48);
+    r[21] = (rd[3 ] >>  0);
+    r[22] = (rd[3 ] >>  8);
+    r[23] = (rd[3 ] >> 16);
+    r[24] = (rd[3 ] >> 24);
+    r[25] = (rd[3 ] >> 32);
+    r[26] = (rd[3 ] >> 40);
+    r[27] = (rd[3 ] >> 48);
+    r[28] = (rd[4 ] >>  0);
+    r[29] = (rd[4 ] >>  8);
+    r[30] = (rd[4 ] >> 16);
+    r[31] = (rd[4 ] >> 24);
+    r[32] = (rd[4 ] >> 32);
+    r[33] = (rd[4 ] >> 40);
+    r[34] = (rd[4 ] >> 48);
+    r[35] = (rd[5 ] >>  0);
+    r[36] = (rd[5 ] >>  8);
+    r[37] = (rd[5 ] >> 16);
+    r[38] = (rd[5 ] >> 24);
+    r[39] = (rd[5 ] >> 32);
+    r[40] = (rd[5 ] >> 40);
+    r[41] = (rd[5 ] >> 48);
+    r[42] = (rd[6 ] >>  0);
+    r[43] = (rd[6 ] >>  8);
+    r[44] = (rd[6 ] >> 16);
+    r[45] = (rd[6 ] >> 24);
+    r[46] = (rd[6 ] >> 32);
+    r[47] = (rd[6 ] >> 40);
+    r[48] = (rd[6 ] >> 48);
+    r[49] = (rd[7 ] >>  0);
+    r[50] = (rd[7 ] >>  8);
+    r[51] = (rd[7 ] >> 16);
+    r[52] = (rd[7 ] >> 24);
+    r[53] = (rd[7 ] >> 32);
+    r[54] = (rd[7 ] >> 40);
+    r[55] = (rd[7 ] >> 48);
+    r[56] = 0;
+}
+
+/* Precomputed multiples of the base point. */
+static const ge448_precomp base[58][8] = {
+{
+    {
+        { 0x26a82bc70cc05eL, 0x80e18b00938e26L, 0xf72ab66511433bL,
+          0xa3d3a46412ae1aL, 0x0f1767ea6de324L, 0x36da9e14657047L,
+          0xed221d15a622bfL, 0x4f1970c66bed0dL },
+        { 0x08795bf230fa14L, 0x132c4ed7c8ad98L, 0x1ce67c39c4fdbdL,
+          0x05a0c2d73ad3ffL, 0xa3984087789c1eL, 0xc7624bea73736cL,
+          0x248876203756c9L, 0x693f46716eb6bcL }
+    },
+    {
+        { 0x55555555555555L, 0x55555555555555L, 0x55555555555555L,
+          0x55555555555555L, 0xaaaaaaaaaaaaa9L, 0xaaaaaaaaaaaaaaL,
+          0xaaaaaaaaaaaaaaL, 0xaaaaaaaaaaaaaaL },
+        { 0xeafbcdea9386edL, 0xb2bed1cda06bdaL, 0x833a2a3098bbbcL,
+          0x8ad8c4b80d6565L, 0x884dd7b7e36d72L, 0xc2b0036ed7a035L,
+          0x8db359d6205086L, 0xae05e9634ad704L }
+    },
+    {
+        { 0x28173286ff2f8fL, 0xb769465da85757L, 0xf7f6271fd6e862L,
+          0x4a3fcfe8daa9cbL, 0xda82c7e2ba077aL, 0x943332241b8b8cL,
+          0x6455bd64316cb6L, 0x0865886b9108afL },
+        { 0x22ac13588ed6fcL, 0x9a68fed02dafb8L, 0x1bdb6767f0bffaL,
+          0xec4e1d58bb3a33L, 0x56c3b9fce43c82L, 0xa6449a4a8d9523L,
+          0xf706cbda7ad43aL, 0xe005a8dbd5125cL }
+    },
+    {
+        { 0xce42ac48ba7f30L, 0xe1798949e120e2L, 0xf1515dd8ba21aeL,
+          0x70c74cc301b7bdL, 0x0891c693fda4beL, 0x29ea255a09cf4eL,
+          0x2c1419a17226f9L, 0x49dcbc5c6c0cceL },
+        { 0xe236f86de51839L, 0x44285d0d4f5b32L, 0x7ea1ca9472b5d4L,
+          0x7b8a5bc1c0d8f9L, 0x57d845c90dc322L, 0x1b979cb7c02f04L,
+          0x27164b33a5de02L, 0xd49077e4accde5L }
+    },
+    {
+        { 0xa99d1092030034L, 0x2d8cefc6f950d0L, 0x7a920c3c96f07bL,
+          0x958812808bc0d5L, 0x62ada756d761e8L, 0x0def80cbcf7285L,
+          0x0e2ba7601eedb5L, 0x7a9f9335a48dcbL },
+        { 0xb4731472f435ebL, 0x5512881f225443L, 0xee59d2b33c5840L,
+          0xb698017127d7a4L, 0xb18fced86551f7L, 0x0ade260ca1823aL,
+          0xd3b9109ce4fd58L, 0xadfd751a2517edL }
+    },
+    {
+        { 0x7fd7652abef79cL, 0x6c20a07443a878L, 0x5c1840d12a7109L,
+          0x4a06e4a876451cL, 0x3bed0b4ad95f65L, 0x25d2e673fb0260L,
+          0x2e00349aebd971L, 0x54523e04498b72L },
+        { 0xea5d1da07c7bccL, 0xcce776938ea98cL, 0x80284e861d2b3eL,
+          0x48de76b6e1ff1bL, 0x7b121869c58522L, 0xbfd053a2765a1aL,
+          0x2d743ec056c667L, 0x3f99b9cd8ab61cL }
+    },
+    {
+        { 0xdf9567ceb5eaf7L, 0x110a6b478ac7d7L, 0x2d335014706e0bL,
+          0x0df9c7b0b5a209L, 0xba4223d568e684L, 0xd78af2d8c3719bL,
+          0x77467b9a5291b6L, 0x079748e5c89befL },
+        { 0xe20d3fadac377fL, 0x34e866972b5c09L, 0xd8687a3c40bbb7L,
+          0x7b3946fd2f84c9L, 0xd00e40ca78f50eL, 0xb87594417e7179L,
+          0x9c7373bcb23583L, 0x7ddeda3c90fd69L }
+    },
+    {
+        { 0x2538a67153bde0L, 0x223aca9406b696L, 0xf9080dc1ad713eL,
+          0x6c4cb47d816a64L, 0xbc285685dc8b97L, 0xd97b037c08e2d7L,
+          0x5b63fb45d0e66bL, 0xd1f1bc5520e8a3L },
+        { 0x4eb873ce69e09bL, 0x1663164bc8ee45L, 0x08f7003ba8d89fL,
+          0x4b98ead386ad82L, 0xa4b93b7bd94c7bL, 0x46ba408c6b38b3L,
+          0xdae87d1f3574ffL, 0xc7564f4e9bea9bL }
+    },
+},
+{
+    {
+        { 0x2e4fdb25bfac1cL, 0xf0d79aaf5f3bcaL, 0xe756b0d20fb7ccL,
+          0xe3696beb39609aL, 0xa019fc35a5ab58L, 0xa2b24853b281ddL,
+          0xe3e2be761ac0a2L, 0xf19c34feb56730L },
+        { 0x2d25ce8a30241eL, 0xf5661eab73d7a1L, 0x4611ed0daac9f4L,
+          0xd5442344ced72cL, 0xce78f52e92e985L, 0x6fe5dd44da4aadL,
+          0xfcaddc61d363ceL, 0x3beb69cc9111bfL }
+    },
+    {
+        { 0xd2e7660940ebc9L, 0xe032018b17bbe0L, 0xad4939175c0575L,
+          0xdd0b14721c7f34L, 0x52c2ba43e147e0L, 0x7dd03c60ee8973L,
+          0x5472e8decf2754L, 0x17a1cd1d6482bbL },
+        { 0xdd43b848128b3fL, 0xf0cae34ea7dd25L, 0x81ca99fff07df2L,
+          0x1c8959792ebbdcL, 0x45c7a6872155e6L, 0x907a50e39ddd08L,
+          0xbe398c2bb2d89bL, 0x38063f91b3b536L }
+    },
+    {
+        { 0x149fafbf843b23L, 0x00ab582ac7f22aL, 0xa3b981bf2f4d4cL,
+          0x2ce1a654341a22L, 0x68a40747c03b63L, 0x63206a212f2cf8L,
+          0xc9961d35149741L, 0xfb85430bc7099eL },
+        { 0x9c9107290a9e59L, 0x734e94a06de367L, 0x5cf3cbedb99214L,
+          0xc6bce3245b1fb9L, 0x1a82abedd7be0dL, 0xf74976aede7d1cL,
+          0x7025b7c21503bdL, 0xf7894910d096abL }
+    },
+    {
+        { 0x6bd48bb555a41bL, 0xfbdd0d067de206L, 0x98bc477dd6dfd1L,
+          0x1d0693b3e40b8aL, 0x6e15563da32ae4L, 0x0194a20fcebaa2L,
+          0xda116150980a93L, 0x8e119200109cecL },
+        { 0x8ea0552ffb9726L, 0xeba50a4047e44bL, 0xc050d2460ddf76L,
+          0xe009204ac690e0L, 0x47b86399b18edcL, 0x2f5b76ac77f23fL,
+          0x4296c240792905L, 0x73f6b4a06f6dc7L }
+    },
+    {
+        { 0xb6ef9ea3b10cadL, 0x312843df7c8fceL, 0x5bdcd528bedf86L,
+          0x2889059f6dd823L, 0x04578e908bfde0L, 0x3245df3123e2e5L,
+          0xbf461d57ee9e3aL, 0xddec2d46f94cebL },
+        { 0x21b43b9145768fL, 0xe79a8f9dae962aL, 0xff1972bcbb043fL,
+          0xe3dcf6d239649bL, 0xed592bdc533b85L, 0x14ff94fdbe22d0L,
+          0x6c4eb87f1d8e22L, 0xd8d4c71d18cf6dL }
+    },
+    {
+        { 0xcda666c8d96345L, 0x9ecaa25836cd21L, 0x6e885bd984606eL,
+          0x1dd5fef804f054L, 0x9dfff6b6959ae4L, 0x99b9cf8c9b55ccL,
+          0xb4716b062b9b80L, 0x13ec87c554b128L },
+        { 0xe696d1f75aacc2L, 0xf78c99387fc5ffL, 0x76c09473809d42L,
+          0x99ce62db618fa8L, 0x35e3e022f53341L, 0x62fc1ac0db6c5eL,
+          0xa1fb8e600d8b47L, 0x0bc107058f0d1eL }
+    },
+    {
+        { 0x1f4526916da513L, 0x1f2fc04f5cf341L, 0xae9208664d23e0L,
+          0x4e33082da8a113L, 0x2688ec61cfc085L, 0x6f2e8de6e5327fL,
+          0x2070db3b4e48a8L, 0xd6626973240adeL },
+        { 0xa6b317ffbd997bL, 0x9fa1b5649e26bdL, 0xcbf0d258cba0f3L,
+          0x4a7791b17b4745L, 0x25f555b5c9e190L, 0x7cd3940923ec4cL,
+          0x16f4c6ae98f1b6L, 0x7962116bcd4e0fL }
+    },
+    {
+        { 0x8d58fa302491e3L, 0x7cf76c67ab3898L, 0xbc2f657647ebc7L,
+          0x5f4bfe0d25f5a3L, 0x503f478d69505dL, 0x4a889fc3fb6645L,
+          0x33e1bc1fa86b18L, 0xabb234f5508dd8L },
+        { 0x5348e1b9a05b48L, 0x57ac5f164dc858L, 0x21f4d38ec8a2d3L,
+          0x5ec6d3ca3a3e9dL, 0xcd4062e560a0b8L, 0x49b74f73433f59L,
+          0xefd9d87cab14e3L, 0x858ce7feb964f5L }
+    },
+},
+{
+    {
+        { 0x7577254eb731b4L, 0x9fff1fb4e2397eL, 0x749b145c821715L,
+          0x40619fe2e65e67L, 0x57b82812e618d8L, 0x063186c707b83eL,
+          0xcfc80cb31b24a2L, 0xcca6185ac75169L },
+        { 0x6539f44b255818L, 0x5895da00368bceL, 0x841a30917c7482L,
+          0x85469e1b1a9c9eL, 0x05664c0e4f7d9dL, 0x8a063187b35cc0L,
+          0x214763aa0e9b0aL, 0x1bd872c4b26ac2L }
+    },
+    {
+        { 0x3578f97a93762bL, 0x434f69a72d52bcL, 0xddcca4022cb565L,
+          0xa7d1e41ff20544L, 0x823475d8a66588L, 0x9fc97c799d7bafL,
+          0x15542f1660e421L, 0xa7d1f60843faf6L },
+        { 0xbbfaab54063cccL, 0x3ad9bada49855aL, 0xffd5f1c5bddbfeL,
+          0x0e419c2ae87e59L, 0xdce6ed6f89956bL, 0xf047c21ccd8951L,
+          0x6ed4a1ba83c991L, 0x85af86e2d28e0aL }
+    },
+    {
+        { 0x04433c49ed48a8L, 0xeffa8580bc375dL, 0xfb0e1b2fa6e3b5L,
+          0x51483a2a1aaddaL, 0x733448df8b2ea8L, 0xaa0513cf639f0cL,
+          0x6bc61a3a23bf84L, 0x3e64f68dc2430dL },
+        { 0x51bf502c5876b1L, 0x6b833751c0dd2aL, 0xe597be1342914fL,
+          0x43d5ab0f8e632cL, 0x2696715d62587bL, 0xe87d20aed34f24L,
+          0x25b7e14e18baf7L, 0xf5eb753e22e084L }
+    },
+    {
+        { 0x51da71724d8295L, 0xd478e4318d1340L, 0xacf94f42cf7f66L,
+          0x230d7d13760711L, 0x078a66a5abc626L, 0xd78b0bd6b5f6daL,
+          0x23a971396d1d0bL, 0x87623d64bd960fL },
+        { 0x0841a9977db53fL, 0x23c1a53f4d03eeL, 0x2f62c2e1f95df1L,
+          0xd1e2ec1116f4e7L, 0x896d2fe34811a9L, 0xad65e2bec8096eL,
+          0x09d36f9b1744a6L, 0x564bac7ff5ddf7L }
+    },
+    {
+        { 0x48b41e2c3f77cbL, 0x52276730968938L, 0xff1b899fd9b452L,
+          0x67cf3bf2e03908L, 0x3731d90248a6fbL, 0xd800a05256598fL,
+          0x347d2f2bdc8530L, 0xc72a3007ad08a1L },
+        { 0x5e5be741d65f73L, 0x183d4ae4206eadL, 0xcb50c1cade4013L,
+          0x39db43d3102483L, 0x0eb49fa70d6325L, 0xa18f6a2c1f02b9L,
+          0x3e6fe30dbf5e66L, 0xac4eeb93a82aa5L }
+    },
+    {
+        { 0x295affd3613d47L, 0x7b7e68ab56f343L, 0x980629692b173bL,
+          0x937061ebad35fbL, 0x25019785c21eeaL, 0xe92721b787a746L,
+          0x463c46c3651631L, 0x6da4b5dc6f2d5aL },
+        { 0xcb67cc16e6d18cL, 0x1b30d520010588L, 0x1bb6ea6db1d1e8L,
+          0x9c6308aad11474L, 0xc3167413d19b1cL, 0xf2e84d7be4fb79L,
+          0xeccb873e050f77L, 0xf7c8d80cc2bf86L }
+    },
+    {
+        { 0x16fe2e17ab20e5L, 0x274deadecf3a92L, 0x9f434870972f67L,
+          0x9a65a454605751L, 0x9351f07b8980b2L, 0x412962b0eb08a5L,
+          0xb8c9bfd733f440L, 0xac2cd641ca250fL },
+        { 0x68cdd0f2ba7d26L, 0xd3d2a4a4e0beeaL, 0x50135c19f4a258L,
+          0xb475e53f0d02e4L, 0x432d8c6589283aL, 0x29141bfa0a2b6cL,
+          0xd7379ec13704bcL, 0x831562c52459bfL }
+    },
+    {
+        { 0x676b366eeec506L, 0xdd6cad545da557L, 0x9de39cb77057d2L,
+          0x388c5fedf05bf1L, 0x6e55650dfb1f03L, 0xdbceffa52126c9L,
+          0xe4d187b3a4a220L, 0xac914f9eb27020L },
+        { 0x3f4ab98d2e5f30L, 0x6ae97dadd94451L, 0x64af6950d80981L,
+          0x36b4b90f2aa2ceL, 0x6adcd7a18fcf59L, 0x3ddfe6dc116c81L,
+          0x661072b549b9e3L, 0xd9e3134ec4584dL }
+    },
+},
+{
+    {
+        { 0x6e46707a1e400cL, 0xcdc990b551e806L, 0xfa512513a07724L,
+          0x500553f1b3e4f5L, 0x67e8b58ef4dac3L, 0x958349f2cb4cc7L,
+          0x948b4ed7f9143cL, 0xe646d092b7822bL },
+        { 0xd185dd52bc3c26L, 0x34ba16ec837fc9L, 0x516d4ba5a788b7L,
+          0x72f2de756142b0L, 0x5846f61f445b3dL, 0xdaec5c9f4631a1L,
+          0xa10b18d169ea9bL, 0x85d2998af6751bL }
+    },
+    {
+        { 0xda0cac443ddf31L, 0x0966e171860911L, 0x9c3a7173cba600L,
+          0x5781880571f895L, 0x5e2a927737ac21L, 0x8a461486c253fbL,
+          0xe801cf595ee626L, 0x271166a5f84fc0L },
+        { 0x306937fba856bdL, 0x80cb179be80a43L, 0x70393b2ffb5980L,
+          0xa8e4a1c660fc64L, 0x5078abfc0d5c98L, 0x62ba530fbd31ffL,
+          0xda608449e51b88L, 0xdb6ecb0355ae15L }
+    },
+    {
+        { 0xbcbb6ea23c5d49L, 0x08906ba87959bcL, 0x61cc0880991665L,
+          0x21d6b41d90d13cL, 0x0c27ac1d03afe9L, 0x159995f5cfea52L,
+          0x4057e20bdfe220L, 0xdd1b349cbdf058L },
+        { 0x0cd66262e37159L, 0x8cea8e43eb0d17L, 0x553af085bce7f0L,
+          0xb94cb5f5b6511dL, 0x7b8d3a550e0330L, 0x415911057ab7e7L,
+          0x320820e6aa886fL, 0x130d4d6c5b6b81L }
+    },
+    {
+        { 0x2f98059c7bb2edL, 0x33ebf4ca49bdfbL, 0x04c72a1b0a675bL,
+          0x94f9ea4adb6c14L, 0x03376d8cf728c0L, 0x5c059d34c6eb6aL,
+          0x0178408eb8da48L, 0x8bf607b2956817L },
+        { 0x7ad2822ceb3d28L, 0xd07a40337ae653L, 0xbc68739c1e46b2L,
+          0x15d7cca9154ba9L, 0x6b97103a26617dL, 0xa610314b2e0d28L,
+          0x52a08bafd4d363L, 0x80c2638c7dc2afL }
+    },
+    {
+        { 0x0cde7ef3187140L, 0x93b92ca4b70acdL, 0x5696e507a79cdcL,
+          0x73cc9728eaab66L, 0x6b8c5b68f1b0c7L, 0xb39a3184f7e0b1L,
+          0x72cfb0d376108aL, 0x0c53efc98536a7L },
+        { 0x03b52a824c2f1eL, 0x717132e6399b78L, 0x31ebd25349a85dL,
+          0x265ee811a200d4L, 0x0b1aad2407d7adL, 0x9a9ebc894d2962L,
+          0x994e6cd41171d9L, 0x09178d86c8fa83L }
+    },
+    {
+        { 0x7d1d238a2593a1L, 0x863e93ab38fb19L, 0xd23a4cce7712a9L,
+          0x7477b1327efcd5L, 0x3ba69ff1392f6cL, 0x63e0c32f7bb5a5L,
+          0x20412c0026effdL, 0xd3ee8e4ef424abL },
+        { 0x14c0b2d64e5174L, 0x2a611f2e58c47bL, 0xaa58a06c1e8635L,
+          0x1870c3ecf17034L, 0xb0d5e3483f1bf3L, 0xb19905c16c7eb3L,
+          0xbf85d626efa4caL, 0xfd16b2f180f92bL }
+    },
+    {
+        { 0xc0431af3adcb48L, 0xc9a7a8dba90496L, 0xd765a163895294L,
+          0xb02a41a551de70L, 0xb71b261749b8a1L, 0x0dfa89ec6f3e47L,
+          0x392c0d80f5d9ceL, 0x43c59d831aee3cL },
+        { 0x94bfb6d4d76f49L, 0xe8f5b8227d68a5L, 0x78ae1d9630fd08L,
+          0x1379029ce1bdaeL, 0x9689da066715dcL, 0x5d4cb24d3278c7L,
+          0x77c98339e84fbcL, 0xc8478dcea1048cL }
+    },
+    {
+        { 0xe4b8f31770d2baL, 0x744f65242ea095L, 0xd06e090036f138L,
+          0xd3a3d5b3b078caL, 0xc7ae54178b8417L, 0xad6c5d4c738fd7L,
+          0x61789844676454L, 0xfbf34235d9a392L },
+        { 0x8e451a7fff772fL, 0x8605bb75ffbeadL, 0x6f75cc1930d59fL,
+          0xd4f47558f3f460L, 0xefd2d796700c8aL, 0xceb462a2406421L,
+          0x8ed0f979dfe8f1L, 0x0280bf1d1d7600L }
+    },
+},
+{
+    {
+        { 0x761c219dd9a54dL, 0x1127fcb86a39c0L, 0x7d0e4f04c9beddL,
+          0x27c017a4d976b6L, 0x800c973da042cfL, 0xe7419af2593f11L,
+          0xbd49448ae67960L, 0xd3b60b7744fd85L },
+        { 0x5e74ed961676feL, 0x7383ef339af627L, 0x34407e05e62df7L,
+          0xb0534618bf3196L, 0xd6b7184583b407L, 0xe3d068555011beL,
+          0x94083d02124b52L, 0xa908324f780aafL }
+    },
+    {
+        { 0xb27af1a73ec9c3L, 0xb66ad9f70fa725L, 0x07724f58cf73e4L,
+          0xc3fcd579949358L, 0x06efb79da0cc01L, 0x1e977d210597c9L,
+          0xcd732be703e8d6L, 0x6fd29bf6d0b69eL },
+        { 0xca658ac667128eL, 0xca0036ac7872b3L, 0xc9698585355837L,
+          0x59f3be8075cf1cL, 0x9f1b9b03809a11L, 0x6881ced9733871L,
+          0x8cda0fbe902a5fL, 0x4d8c69b4e3871eL }
+    },
+    {
+        { 0x5c3bd07ddee82fL, 0xe52dd312f9723bL, 0xcf8761174f1be8L,
+          0xd9ecbd835f8657L, 0x4f77393fbfea17L, 0xec9579fd78fe2cL,
+          0x320de920fb0450L, 0xbfc9b8d95d9c47L },
+        { 0x818bd425e1b4c3L, 0x0e0c41c40e2c78L, 0x0f7ce9abccb0d0L,
+          0xc7e9fa45ef81fbL, 0x2561d6f73574adL, 0xa2d8d99d2efb0bL,
+          0xcf8f316e96cd0aL, 0x088f0f14964807L }
+    },
+    {
+        { 0x0a8498945d5a19L, 0x47ab39c6c2131fL, 0x5c02824f3fc35dL,
+          0x3be77c89ee8127L, 0xa8491b7c90b80aL, 0x5397631a28aa93L,
+          0x54d6e816c0b344L, 0x22878be876d0e4L },
+        { 0xeecb8a46db3bf6L, 0x340f29554577a3L, 0xa7798689a00f85L,
+          0x98465d74bb9147L, 0x9532d7dda3c736L, 0x6d574f17504b20L,
+          0x6e356f4d86e435L, 0x70c2e8d4533887L }
+    },
+    {
+        { 0xdce5a0ad293980L, 0x32d7210069010eL, 0x64af59f06deaaaL,
+          0xd6b43c459239e4L, 0x74bf2559199c29L, 0x3efff4111e1e2bL,
+          0x1aa7b5ecb0f8d8L, 0x9baa22b989e395L },
+        { 0xf78db807b33ac1L, 0x05a3b4354ce80aL, 0x371defc7bc8e12L,
+          0x63305a01224610L, 0x028b1ae6d697efL, 0x7aba39c1cd8051L,
+          0x76ed7a928ee4b4L, 0x31bd02a7f99901L }
+    },
+    {
+        { 0xf9dab7af075566L, 0x84e29a5f56f18bL, 0x3a4c45af64e56dL,
+          0xcf3644a6a7302dL, 0xfb40808156b658L, 0xf33ef9cf96be52L,
+          0xfe92038caa2f08L, 0xcfaf2e3b261894L },
+        { 0xf2a0dbc224ce3fL, 0xed05009592eb27L, 0x501743f95889d0L,
+          0xa88a47877c95c2L, 0x86755fbdd63da9L, 0x9024acfc7ee828L,
+          0x634b020f38113bL, 0x3c5aacc6056e64L }
+    },
+    {
+        { 0xe03ff3aa2ef760L, 0x3b95767b1c3bacL, 0x51ce6aa940d754L,
+          0x7cbac3f47a9a3dL, 0xa864ac434f8d1aL, 0x1eff3f280dbd47L,
+          0xd8ab6607ebd5caL, 0xc4df5c405b07edL },
+        { 0x3dc92dfa4f095bL, 0x5ae36a57cdbd9aL, 0x7ff29737891e04L,
+          0x37c03130a5fe7bL, 0x210d7b0aa6e35eL, 0x6edfb53bf200d8L,
+          0x787b68d84afb85L, 0x9b5c49b72c6de3L }
+    },
+    {
+        { 0x51857164010f4eL, 0xe0b144b0536ebeL, 0xacabb14887d663L,
+          0xac1caededf584fL, 0xb43fb8faf175a3L, 0x310b6d5f992a3cL,
+          0xf2c4aa285178a4L, 0x69c99698bd56bfL },
+        { 0x73d6372a4d972eL, 0x3d5bb2e9583803L, 0x7bf7d18d891581L,
+          0xa5ce5d7568a34aL, 0x670b4331f45c81L, 0x97265a71f96910L,
+          0xdb14eb3b07c1eaL, 0xdf008eafed447cL }
+    },
+},
+{
+    {
+        { 0x0379f5a00c2f10L, 0xb320b4fd350285L, 0x74e560e8efdd7dL,
+          0xf2f017ef46a140L, 0x2ced1a60f34624L, 0x7c4b4e3ca08ec9L,
+          0xdffc2a15d8bc6bL, 0xcc8f3f3527b007L },
+        { 0x59f8ac4861fe83L, 0x8d48d2cd03144cL, 0xa8457d2bfa6dceL,
+          0xd7ed333677c136L, 0xcb8e219c228e18L, 0x5f70bc916ab1e4L,
+          0x2ae3a3d3780370L, 0x9f3365488f17adL }
+    },
+    {
+        { 0xeab0710960e4bbL, 0xc668a78ab9cfd3L, 0x2e85553b0ef946L,
+          0xa43c4b98df5df3L, 0x0ecd5593cb3646L, 0x6f543c418dbe71L,
+          0xee7edaaf59818bL, 0xc44e8d290911c1L },
+        { 0xafb38b1269b509L, 0x9e2737c52afe2cL, 0x5b2ef02ccfa664L,
+          0x1e0aeace1cc58bL, 0x37a57e95ea134eL, 0xc9c465a83b9fc2L,
+          0x4b9e8c76e3eccaL, 0xca07dbe9bdbab5L }
+    },
+    {
+        { 0xd297f3cb0d7807L, 0xee441a5f59ce61L, 0x728553bb2db844L,
+          0x90f87e5640e9e0L, 0xaa72cbfcb76dffL, 0x065c6864012d57L,
+          0xd5ee88f9678b44L, 0x3d74b852177603L },
+        { 0x3f9c947748b68eL, 0x03856d98f44d44L, 0xde34b84462426cL,
+          0xc16d1bb845ab29L, 0x9df6217d2e18deL, 0xec6d219b154643L,
+          0x22a8ec32ee0f8fL, 0x632ad3891c5175L }
+    },
+    {
+        { 0x19d9d236869267L, 0x628df94fe5532aL, 0x458d76c6dc9a01L,
+          0x405fe6c2cc39c8L, 0x7dddc67f3a04baL, 0xfee630312500c7L,
+          0x580b6f0a50e9deL, 0xfb5918a6090604L },
+        { 0xd7159253af6b2dL, 0x83d62d61c7d1ecL, 0x94398c185858c4L,
+          0x94643dc14bfb64L, 0x758fa38af7db80L, 0xe2d7d93a8a1557L,
+          0xa569e853562af1L, 0xd226bdd84346aaL }
+    },
+    {
+        { 0xc2d0a5ed0ccd20L, 0xeb9adb85dbc0cfL, 0xe0a29ee26d7e88L,
+          0x8bb39f884a8e98L, 0x511f1c137396eaL, 0xbc9ec5ac8b2fb3L,
+          0x299d81c090e5bcL, 0xe1dfe344cdd587L },
+        { 0x80f61f45e465b7L, 0x5699c531bad59eL, 0x85e92e4b79ff92L,
+          0x1e64fce9db244cL, 0x3748574a22097dL, 0xe2aa6b9efff24eL,
+          0xb951be70a10bc6L, 0x66853269067a1cL }
+    },
+    {
+        { 0xf716ddfa6114d3L, 0x9e515f5037ec1fL, 0x773454144944a6L,
+          0x1540c4caba97ccL, 0xe41e5488b54bb7L, 0x4363156cae37bcL,
+          0xc384eaff3d2ce8L, 0x72a4f454c58ba4L },
+        { 0x0ceb530dcaf3fcL, 0x72d536578dcdbbL, 0x9b44084c6320faL,
+          0x6262d34eb74c70L, 0x8abac85608e6dcL, 0x82a526410dd38dL,
+          0xbc39911a819b8dL, 0xbda15fe03ad0d9L }
+    },
+    {
+        { 0xadbf587f9dc60bL, 0xf9d814f7d846d2L, 0xccdd241b77bde0L,
+          0x89cb6d72242f50L, 0x95c0e3ee6360a8L, 0x7c7dd5adf49713L,
+          0x68e0e4957d5814L, 0x3aa097d0c16571L },
+        { 0xb56b672267d03aL, 0x4f557088c44af4L, 0x67c49e7f3252a5L,
+          0x871d6cfc94a469L, 0x57ae99801fbfaaL, 0x5c0e48f48a5d8eL,
+          0xe9bf9c85e240b9L, 0xa41018999d41caL }
+    },
+    {
+        { 0x6beb0c7b2889b4L, 0x78b7f899455370L, 0xd43421447ca364L,
+          0xdd9d2da9f21e5bL, 0xa0c7c180a7e4aaL, 0x022c0d4da1660cL,
+          0xe1f5c165a57002L, 0x51c7c9e518f68fL },
+        { 0x6d521b62586502L, 0xa0f2cb3183ec1bL, 0x578b4e0caa5e16L,
+          0x7bd4fbd764997fL, 0x7ec56c364b1804L, 0xb75a2540ee08e4L,
+          0x6bf74a6dc19080L, 0x6ec793d97d6e59L }
+    },
+},
+{
+    {
+        { 0x16789d60a4beb9L, 0x512b2cd9b9c801L, 0xf8b6d108c7bb9cL,
+          0xd85651e9ebdc8cL, 0xc9450829ba971aL, 0x852d9ea7e1cf78L,
+          0x6a45e350af01e2L, 0xe6cdadf6151dcfL },
+        { 0xc454bb42b8c01bL, 0x59e0c493d54cd2L, 0x8e1e686454d608L,
+          0x0dbae4bd8c6103L, 0xa5603a16c18b18L, 0x227a6b23369093L,
+          0xf1e89295f3de1cL, 0x42f0b588ab63c5L }
+    },
+    {
+        { 0xf1974cc5b596d8L, 0xee8093f44719f0L, 0x40ba933f6f5b54L,
+          0xd6e53652f3d654L, 0x9aeb83526d73b8L, 0x50ed5350776382L,
+          0x3be47d6ad43875L, 0x21d56dfc786e48L },
+        { 0x8a75e18b73bb39L, 0x9eba84cf265a78L, 0x7c02a4d2e772e7L,
+          0xf7df6d44c1ecd2L, 0xa8d9ea06cef71bL, 0x86e8f91cae3b68L,
+          0x2fd141199efefaL, 0x0b36ab2214e6f6L }
+    },
+    {
+        { 0xd79065cbdce61cL, 0xcb562ffdecb229L, 0xef5d3d14600849L,
+          0x348b31b1d23ac8L, 0xb2ea69915c36b8L, 0x268683d4822836L,
+          0x083edbec6f0b7dL, 0xaf4f39d1a7821cL },
+        { 0x23be6e84e64841L, 0xe9e246365bf791L, 0xa3208ac02bfd7cL,
+          0x231989cd01357dL, 0x79b8aad6422ab4L, 0x57d2b7e91b8564L,
+          0x28ebbcc8c04421L, 0xdc787d87d09c05L }
+    },
+    {
+        { 0xeb99f626c7bed5L, 0x326b15f39cd0e8L, 0xd9d53dcd860615L,
+          0xdf636e71bf4205L, 0x1eaa0bf0752209L, 0x17ce69a4744abbL,
+          0x474572df3ea2fbL, 0xc4f6f73224a7f3L },
+        { 0x7ed86ad63081b4L, 0xcd4cdc74a20afbL, 0x7563831b301b2eL,
+          0x5b4d2b1e038699L, 0xa15d1fa802a15fL, 0x6687aaf13e9172L,
+          0x3eccd36ba6da90L, 0x34e829d7474e83L }
+    },
+    {
+        { 0x4cea19b19c9b27L, 0xa14c37a5f52523L, 0x248b16d726625cL,
+          0x8c40f9f6cabc21L, 0x918470c32a5c65L, 0x314056b2a98d5bL,
+          0x6c974cf34a0714L, 0x0c8f8a94f6314aL },
+        { 0x484455770bccfdL, 0xf5835db740c9fdL, 0x12e59b5a21407cL,
+          0xbe338e0db1689dL, 0x5a50ce9dd5e915L, 0xb1780e9ef99f39L,
+          0x1262b55ee4d833L, 0x4be3f2289c5340L }
+    },
+    {
+        { 0xbb99b906c4b858L, 0xa7724d1550ca53L, 0x7d31f5a826962eL,
+          0xf239322a5804daL, 0x3e113200275048L, 0xcbb1bb83ee4cb6L,
+          0xdb865251331191L, 0xb7caf9e7d1d903L },
+        { 0x06e3b0577d7a9dL, 0x7a132b0b3bbbf5L, 0xd61fbc57c50575L,
+          0x393f712af4b646L, 0xef77972cb7efe9L, 0x20e6d5d5ea4995L,
+          0x0ac23d4fbbe4c6L, 0x8456617c807f2aL }
+    },
+    {
+        { 0x4995fb35396143L, 0xa8b4bd1b99dc46L, 0x2293e8e4150064L,
+          0x2f77d4922a3545L, 0xe866b03b2192c4L, 0x58b01f05e0aa38L,
+          0xe406b232ed246bL, 0x447edb3ed60974L },
+        { 0xf541b338869703L, 0x6959fe0383420aL, 0xd6b39db4be4e48L,
+          0x048f3b4b5714efL, 0x68b49685d9e4b8L, 0xbda8e6c2177963L,
+          0x5094e35c4211feL, 0xea591c32d46d1aL }
+    },
+    {
+        { 0x3a768ff2fef780L, 0x4218d2832970c6L, 0xce598e4ec6da17L,
+          0xf675645fbb126aL, 0xb04c23f0427617L, 0xc9f93fbe4fce74L,
+          0x44a414b3c91b00L, 0x4d982f31d3b3ccL },
+        { 0xb1d40e8b24cce0L, 0x5a21c07133e73dL, 0x6e9358e0bb589dL,
+          0x39cfb172399844L, 0x83f7647166080eL, 0xcfe7bf8450b468L,
+          0x2a288f71e8434fL, 0xd39f1e521a81e3L }
+    },
+},
+{
+    {
+        { 0x78c6f13528af6fL, 0x0001fe294b74d9L, 0xae7742501aab44L,
+          0x7cbe937ef0039cL, 0xaf3e4f00fa2a67L, 0xe28175fda1378eL,
+          0x72adeed8ccd90eL, 0x16a8ce100af22fL },
+        { 0x69fae17cbf63ddL, 0x67861729e39e26L, 0xe92b3d5f827a18L,
+          0x4d75e418403682L, 0x01a4fd99056a79L, 0x89efb2d20008f5L,
+          0xa2f6918b78ff15L, 0xf41c870a3437f5L }
+    },
+    {
+        { 0xc840ae57be353cL, 0x465a5eb3fb2691L, 0x34a89f07eba833L,
+          0xf620896013346eL, 0x563b5f0e875df2L, 0x5f7fc8bfbc44ceL,
+          0x22fcb5acfedf9dL, 0x7cf68d47dc691bL },
+        { 0x37f7c2d76a103fL, 0x728a128fd87b7dL, 0x7db2ad8ccf2132L,
+          0xa4c13feb100e63L, 0xcd28a517b511d5L, 0xb910280721ca5cL,
+          0xec1305fd84bd52L, 0xb9646422729791L }
+    },
+    {
+        { 0x83fccdf5bc7462L, 0x01f3ddad6f012fL, 0x57f11713a6a87cL,
+          0xedb47ceff403acL, 0x6c184e5baab073L, 0x5b17c7d6f0d6a1L,
+          0x45a4c4f3ef2c91L, 0x26c3f7e86a8f41L },
+        { 0x81a6db0b646514L, 0xf84059fca8b9aeL, 0xd73dab69f02305L,
+          0x0de3faec4b7c6cL, 0x18abb88696df2fL, 0x45dd1b975d7740L,
+          0x3aeccc69ee35bcL, 0x478252eb029f88L }
+    },
+    {
+        { 0x66bf85b8b2ce15L, 0x1175425335709dL, 0x00169ef8123874L,
+          0xfd3c18c9b89868L, 0xb3612f9775204eL, 0x4b8d09dc2cd510L,
+          0xafa12e614559adL, 0x1ddaa889657493L },
+        { 0x87d700b1e77a08L, 0xaf4cf2f14d2e71L, 0xe00835dbf90c94L,
+          0xb16a6ec6dc8429L, 0x02a7210f8a4d92L, 0x5a5ab403d0c48dL,
+          0x0052b3ab5b9beaL, 0x6242739e138f89L }
+    },
+    {
+        { 0x7c215d316b2819L, 0xdacb65efeb9d7aL, 0xc3c569ed833423L,
+          0xbc08435886a058L, 0x132c4db7e5cb61L, 0x6373a279422affL,
+          0x43b9d7efca9fc4L, 0xe3319a5dbe465fL },
+        { 0x51d36870b39da7L, 0xcb6d7984b75492L, 0x77eb272eadd87aL,
+          0xf2fb47de0d3f6cL, 0x807fd86f9f791cL, 0xf01086b975e885L,
+          0xf9314b5b6a3604L, 0x8cd453867be852L }
+    },
+    {
+        { 0x7c1e6b3858f79bL, 0xf0477c4938caf9L, 0xb311bbf3e88c44L,
+          0x9234c091e3a3c1L, 0x531af2b95a1d4dL, 0xf3cc969b8d1c64L,
+          0x6f3c328b51e78dL, 0x5a1bd6c34e8881L },
+        { 0x2e312393a9336fL, 0x020f0cc5ced897L, 0x4b45d7b5fab121L,
+          0x8068b1c1841210L, 0x1bd85fc8349170L, 0xfe816d80f97fe5L,
+          0x108981814b84fcL, 0x1d4fabbb93cd48L }
+    },
+    {
+        { 0x1f11d45aef599eL, 0x8d91243b09c58aL, 0xd2eec7bd08c3c3L,
+          0x5a6039b3b02793L, 0xb27fed58fb2c00L, 0xb5de44de8acf5eL,
+          0x2c3e0cd6e6c698L, 0x2f96ed4777180dL },
+        { 0x67de8bf96d0e36L, 0xd36a2b6c9b6d65L, 0x8df5d37637d59cL,
+          0x951899fc8d9878L, 0x0fa090db13fcf8L, 0xa5270811f5c7b4L,
+          0x56a6560513a37aL, 0xc6f553014dc1feL }
+    },
+    {
+        { 0x7f6def794945d6L, 0x2f52fe38cc8832L, 0x0228ad9a812ff5L,
+          0xcd282e5bb8478aL, 0xa0bc9afbe91b07L, 0x0360cdc11165e2L,
+          0xb5240fd7b857e4L, 0x67f1665fa36b08L },
+        { 0x84ce588ad2c93fL, 0x94db722e8ff4c0L, 0xad2edbb489c8a3L,
+          0x6b2d5b87e5f278L, 0x0265e58d1d0798L, 0xd2c9f264c5589eL,
+          0xde81f094e4074dL, 0xc539595303089fL }
+    },
+},
+{
+    {
+        { 0x183492f83e882cL, 0x4d58203b5e6c12L, 0x1ac96c3efec20bL,
+          0xabd5a5be1cd15eL, 0x7e1e242cbbb14bL, 0x9f03f45d0543b3L,
+          0xc94bc47d678158L, 0x7917be0a446cadL },
+        { 0x53f2be29b37394L, 0x0cb0a6c064cc76L, 0x3a857bcfba3da3L,
+          0xac86bc580fcb49L, 0x9d5336e30ab146L, 0xafb093d5bc1270L,
+          0x996689de5c3b6eL, 0x55189faea076baL }
+    },
+    {
+        { 0x99ef986646ce03L, 0xa155f8130e6100L, 0x75bef1729b6b07L,
+          0xc46f08e1de077bL, 0xf52fdc57ed0526L, 0xe09d98961a299aL,
+          0x95273297b8e93aL, 0x11255b50acd185L },
+        { 0x57919db4a6acddL, 0x708a5784451d74L, 0x5b0bd01283f7b3L,
+          0xe82f40cc3d9260L, 0x2ab96ec82bbdc2L, 0x921f680c164d87L,
+          0xf0f7883c17a6a9L, 0xc366478382a001L }
+    },
+    {
+        { 0x5c9aa072e40791L, 0xf0b72d6a0776bfL, 0x445f9b2eaa50dcL,
+          0xa929fa96bda47fL, 0x539dc713bbfc49L, 0x4f16dd0006a78bL,
+          0x331ba3deef39c7L, 0xbfa0a24c34157cL },
+        { 0x0220beb6a3b482L, 0x3164d4d6c43885L, 0xa03bb5dacdea23L,
+          0xd6b8b5a9d8f450L, 0xd218e65bd208feL, 0x43948ed35c476fL,
+          0x29a0dd80a2ed2bL, 0xa6ccf3325295b7L }
+    },
+    {
+        { 0xf68f15fac38939L, 0xb3dd5a2f8010c1L, 0xf7ac290a35f141L,
+          0xdc8f3b27388574L, 0x7ec3de1e95fed2L, 0xc625451257ac7dL,
+          0x66fc33e664e55aL, 0xd3968d34832ba5L },
+        { 0x980291bc026448L, 0xfcb212524da4a5L, 0xbca7df4827a360L,
+          0xfcc395c85ca63bL, 0xcf566ec8e9f733L, 0x835ee9bd465f70L,
+          0xe66d111372f916L, 0xc066cf904d9211L }
+    },
+    {
+        { 0xb9763a38b48818L, 0xa6d23cc4288f96L, 0xe27fcf5ed3a229L,
+          0x6aebf9cabaff00L, 0xf3375038131cd1L, 0x13ad41dffabd58L,
+          0x1bee6af861c83bL, 0x274fe969c142e7L },
+        { 0x70ebcc99b84b5bL, 0xe1a57d78191cfcL, 0x46ccd06cbf00b8L,
+          0xc233e8eefe402dL, 0xb4ab215beebeb3L, 0xb7424eabd14e7bL,
+          0x351259aa679578L, 0x6d6d01e471d684L }
+    },
+    {
+        { 0x755c465815ae38L, 0xadc3e85611db56L, 0x633999b188dd50L,
+          0xfdf7509c12d907L, 0x25bcfde238b6afL, 0x50d705d397f5e7L,
+          0xb65f60b944c974L, 0x8867fc327ac325L },
+        { 0x2edc4413763effL, 0x892c0b3341fb63L, 0xb34b83ab3a7f28L,
+          0x9aa106d15c2f18L, 0x720bbc61bb2277L, 0x637f72a5cfaefdL,
+          0xf57db6ef43e565L, 0xceb7c67b58e772L }
+    },
+    {
+        { 0x2793da56ecc1deL, 0x4e1097438f31b2L, 0x4229b4f8781267L,
+          0xe5d2272dec04a1L, 0x6abb463ec17cffL, 0x28aaa7e0cbb048L,
+          0x41dc081d22ef85L, 0xcbc361e5e63d0fL },
+        { 0xb78aafcad5dbaaL, 0x0111505fc1edc3L, 0x63ed66d92c7bfaL,
+          0x2982284e468919L, 0x30f1f21b8c0d8cL, 0xf0567472685093L,
+          0x0e085b6f03dd0fL, 0xa8c8db85581e66L }
+    },
+    {
+        { 0x42009a6264ad0cL, 0x13bf2b8593bef4L, 0x1d111905d4e8b1L,
+          0xfe3e940ef7bddcL, 0xa012275624e62cL, 0xcb659241d6d3ccL,
+          0xc7bcc70edb7ab6L, 0xff9fafbb750b1cL },
+        { 0xf65df297fea84bL, 0x17c84a890b0e02L, 0xa92a859301e821L,
+          0xbee8cb2fb480d1L, 0x7010b8c59c604eL, 0x47bf3f4e803c43L,
+          0xd64514247b3fffL, 0xc4c5dcb9f0da13L }
+    },
+},
+{
+    {
+        { 0x8af700cb5253b3L, 0x31ca605206957aL, 0x25744393eafdcdL,
+          0x2ba5ae1d3ae15eL, 0x710b7385b82579L, 0x145ab57112b95aL,
+          0x4b133a038c55c5L, 0xf7559c92a16fefL },
+        { 0x70c3e68d9ba896L, 0x475dd32c33d07aL, 0xe084e473a41e40L,
+          0xddc9382fd2e706L, 0x34b727579510bdL, 0x5e78a69a5f901eL,
+          0x429dfd7dcfb823L, 0x1d9dc18014f0a3L }
+    },
+    {
+        { 0x364fcdfaf403d7L, 0xd9ea4ffb7d7b34L, 0x21a3426cbb1dacL,
+          0xfa51052143b4f5L, 0x2bca0736df2409L, 0x7e6985a8ad7285L,
+          0x3a1a9d04aaa27fL, 0x1a815e19fc0c6cL },
+        { 0xfab6147bb65bb3L, 0xa36dc0d33ced0bL, 0x26a88592062d78L,
+          0x343861728a5fb7L, 0xe82da254ebb1adL, 0x70f5071d05aa11L,
+          0x0b7f847adaac48L, 0xeb812bc93cb269L }
+    },
+    {
+        { 0xcb317ccf7cacccL, 0xd3410d9cf85098L, 0xca68c8d7f078d7L,
+          0xfe9e812b782efcL, 0x32e7c0f5f544b5L, 0x44fe95a3a7b7f2L,
+          0xf4f1543e91327bL, 0x27d118d76645edL },
+        { 0x690547cd7abc2cL, 0xf64680fb53c8afL, 0xbe0cbe079ea989L,
+          0x6cf0ccea91af28L, 0xa3b85a29daa2f9L, 0xd4b663c91faed0L,
+          0x782c7b7a8b20baL, 0xf494fafb8d98ceL }
+    },
+    {
+        { 0x080c0d7002f55aL, 0xf4f8f142d6d9ddL, 0xb326229382f025L,
+          0x58fd0b5ad28c20L, 0x704b9928d06a15L, 0xf4545d97fbd8e4L,
+          0xc32fa63ed55581L, 0x3ab793601ac0fdL },
+        { 0x13ece526099fd1L, 0x776dba89c79178L, 0x8d28212ce26c45L,
+          0x09fddaf60d739cL, 0xf9931eda84826eL, 0x6e73d90b29439eL,
+          0x94cfefc9095e61L, 0x3050d16802f474L }
+    },
+    {
+        { 0x0898f8f9f6394bL, 0x48b8cea88b0e91L, 0x4bc99254c1b362L,
+          0xe3fccb4827d9ecL, 0x5d4cf9ad950d6aL, 0xa16f1ef39b5b38L,
+          0x3c76d1d620f288L, 0x9fdd059e119390L },
+        { 0x7b5de9efb5edf8L, 0x3e290b9769d14eL, 0x4df3a916bd10b5L,
+          0xae99bca82f8f7bL, 0x5481d5dc9524afL, 0xf112e4f69504f1L,
+          0xb048f0951931ecL, 0xbff876a18f51b1L }
+    },
+    {
+        { 0x932e2a746c1c37L, 0x903ad529aea4c1L, 0x717ac918f161f2L,
+          0xa57d197f425e2aL, 0xae89dac7f39e0eL, 0x91655c0baa2a58L,
+          0xe3dc28654836ddL, 0xb5f0baaa9ec9e6L },
+        { 0xf7c4662bdbda04L, 0xbe5393b51059c0L, 0xb16d552dd95b0fL,
+          0xde495b31b3bd96L, 0xb2a6e02c0206c5L, 0x045cc09014d3a9L,
+          0xf66a3152a2f490L, 0x208c108c5dea05L }
+    },
+    {
+        { 0x6e38b6865237eaL, 0x93a13039f27fc6L, 0x9a6d510a95068aL,
+          0x6fbf216e7c9e54L, 0x7824290571ac1dL, 0x8cb23ba91c2a0cL,
+          0x611202ec7e434dL, 0x8f901bf76058b4L },
+        { 0xef0ac050849588L, 0xe0d2ddedd31804L, 0xaf5417ceb2ca81L,
+          0x420ac065d1a509L, 0x46e345e9683bb6L, 0x6daf635f613f7fL,
+          0xc9e829148a9576L, 0x5f9f1d1176d147L }
+    },
+    {
+        { 0xd24ae1d77e9709L, 0x77751dc0047b8aL, 0xe325334c6a1593L,
+          0x9baf962671f86aL, 0x425af6ac29a15eL, 0x31086002796e33L,
+          0xb6ea78cfc253a5L, 0x4c733e0afae0eaL },
+        { 0x4b7443a97c99b9L, 0xc14e9e450203a6L, 0xd1bb51552680baL,
+          0xa56a3efd55533aL, 0xa66e38c169e1a0L, 0xb3e4df9eed7da0L,
+          0x022c937ddce3d9L, 0x8552089f6e36b4L }
+    },
+},
+{
+    {
+        { 0x8e4bf95f5cc82eL, 0x2ad80c3c3ed6c9L, 0xf2e5b2cc9045e1L,
+          0x42c906559b06d4L, 0xc1f73797b43b84L, 0x1710dbf72d7992L,
+          0xe98cf47767b41cL, 0xe713fce7bfb9e9L },
+        { 0x9f54ae99fa5134L, 0x3002fd8de40d0eL, 0xdc282b79311334L,
+          0x5519810bfeb360L, 0x31539c70f96ffeL, 0x04eacc0d27777bL,
+          0x59824108ff5053L, 0x598236632b67adL }
+    },
+    {
+        { 0x6eb45546bea5c2L, 0x82cfae0d509a33L, 0x6a69bd8394bb59L,
+          0x1880d8d5770ee1L, 0x63518447dacf9eL, 0x5b1ecc5f02b891L,
+          0xeb7d900b6c9a5aL, 0xdab8a768897da8L },
+        { 0x28c7be598851a6L, 0x0101d4f4d73c3bL, 0x3c2569c5084996L,
+          0xb9bc911280bde0L, 0x513a22acd0d4f9L, 0xdf2986d2a15f3bL,
+          0x231c28f2aa4943L, 0x29623ad0333870L }
+    },
+    {
+        { 0x2ceb1784084416L, 0x924cf1c49516cdL, 0x76536c04be856fL,
+          0x11b59cd47a265bL, 0x720dc844999494L, 0x910f794007b795L,
+          0x8434e142d3df83L, 0x8f53878bd478d3L },
+        { 0xd9b072eaeb9c2fL, 0x16f87eafd8a29fL, 0x8c42f9b2fd0de1L,
+          0x916721e0e816efL, 0x2ecb47018bde37L, 0xcde3b7a2375da2L,
+          0x30d0657ef94281L, 0x51054565cd7af8L }
+    },
+    {
+        { 0x7230b334bdced3L, 0x0c6a3e10838569L, 0xf19c9ece3493b8L,
+          0xf2759270d97c57L, 0xf14181e0c862ebL, 0xfd3bac132c72bcL,
+          0x620563ff3be362L, 0x672ccaf47283b7L },
+        { 0x191e3fa2b7bf16L, 0xf838633520dad7L, 0xd3dde553629d87L,
+          0x14d8836af86ebeL, 0x3db7dfb221b2ceL, 0x3872abb0aed72aL,
+          0xb60de528c665b7L, 0x89c259644982cbL }
+    },
+    {
+        { 0x799a2de4dbba25L, 0xd818aaea42715eL, 0xbc88f4df55c362L,
+          0x142a163713c9aeL, 0x411e8eefbfb33fL, 0x34b46296bb684aL,
+          0x4344becdc81817L, 0xcc9573d17f9d46L },
+        { 0xf85f8bcff38a7dL, 0xa14bf730caf117L, 0x126874f4ba6429L,
+          0xcc9bf22aa5db97L, 0x62b56df6aba827L, 0xfee1cb89c9772aL,
+          0xe36838f177e541L, 0x698815dadd438fL }
+    },
+    {
+        { 0xc9fd89438ed1adL, 0x73cd79d7b6a601L, 0x2210e6205e8d20L,
+          0x72384ac3592af5L, 0x5ccc079763d07eL, 0x2f31a4aa5f79ebL,
+          0x693f4ed2945a95L, 0xc7120178056fdcL },
+        { 0x361ecd2df4b09aL, 0xa5644eab7d929aL, 0x34abc0b3fabe9aL,
+          0x1a2473ce942a8cL, 0xe00c9246454bc3L, 0xab324bcdff7366L,
+          0xe1412f121b8f99L, 0x970b572e33551eL }
+    },
+    {
+        { 0x6ca4cacbd0a6b5L, 0x5584787921d654L, 0x18e5253c809bdaL,
+          0x01b32c3f0cbe5eL, 0xb9aa7540f987ddL, 0x628f4bb6dfa4dbL,
+          0x0255f0b891890bL, 0x25b7df4874e590L },
+        { 0xbded3188ed5f95L, 0x9dc428dca93023L, 0xc68f25abccf520L,
+          0xc4f3764e616e6cL, 0xd9a57f1a1d9993L, 0xd1964a5533431bL,
+          0x06cd77f02ab6d0L, 0xa66079103e52e0L }
+    },
+    {
+        { 0xab088645f72700L, 0xf77b2ff0a1a44eL, 0x43ebdd8c2a24b5L,
+          0xa6d67114f564d7L, 0x495df63f414160L, 0xf5bacd776f6de6L,
+          0x3011aff7c2b43dL, 0xbb1e64c3241928L },
+        { 0xf70c5725034073L, 0x891c62a68f1e97L, 0xed8eb2eb22e374L,
+          0xd3a53e97dbcc2fL, 0x1d06281dc8f220L, 0x9eef48face4393L,
+          0x96014f5d2abecdL, 0x1da7e092653cebL }
+    },
+},
+{
+    {
+        { 0x7593318d00bc94L, 0x586f3c6c7262a2L, 0xea68f52958ad31L,
+          0x6707fccd4e8bedL, 0xb7e35d6cb3f9ceL, 0x2cbb6f7f4b1be8L,
+          0xa5352687b41aeeL, 0x1d77845f7b39b8L },
+        { 0xb1f3995eaf9554L, 0x3250f70fe9e7d4L, 0x62e5d1ba00c23cL,
+          0x5e422f5c10e3bfL, 0x7a18039c25cec4L, 0xb4e66a17cc4d5bL,
+          0xad7c5f636d0e0cL, 0x9f40b12a4cf347L }
+    },
+    {
+        { 0x697f88251e3696L, 0xc89bc40ab0a648L, 0x8f261a59785804L,
+          0x4c7f900b51a2bdL, 0xd00e7af8a2dfcfL, 0xf9c534db642aebL,
+          0xea2a79fb63df0eL, 0x392a69af2f64a4L },
+        { 0x0c0f01cc331b6cL, 0x414bf2e6a5edb5L, 0xfe5ed815068391L,
+          0x0a8078d62fbc34L, 0x78a438254bca98L, 0xf7a49ae3d727c7L,
+          0x96c1de1ab4dffeL, 0x45901f73b9440aL }
+    },
+    {
+        { 0x3f1189facfe46eL, 0xdca6f464467443L, 0xac385422eb5bcfL,
+          0xb02dce9906bf72L, 0xdd8cdacfe1d454L, 0xc26f04c65f7218L,
+          0xb4748596ea145dL, 0xc53dc6b5bdb315L },
+        { 0xbe5be749ad7197L, 0x627e91918b5eccL, 0x57c889c9ea405dL,
+          0x2e5650c1a5360bL, 0x42290df1b30b27L, 0x4a071575242687L,
+          0x553ed1fd379133L, 0xb9d7a0701db019L }
+    },
+    {
+        { 0xcfe551c56597dcL, 0x81af92a925ebd6L, 0x83efe16f4e8d57L,
+          0x61bb4311f640d3L, 0xf80440f78b414aL, 0x72f3c636c9e3b4L,
+          0xb55f43a6a03c66L, 0x47a9dede417037L },
+        { 0x1a7e287dbb612bL, 0x895c3c7dbb9220L, 0xd50c86e6c04764L,
+          0xed5269853cf7caL, 0xc78d799f74af55L, 0xb2ba0f2b969ff2L,
+          0x06d48151c6530bL, 0x764a1fe165a575L }
+    },
+    {
+        { 0x4383a3bc1b5eceL, 0x0563c8854ff148L, 0x9a452795af796eL,
+          0xffba7c088e9953L, 0xfe9fb5eb6a3001L, 0x795098825b6b19L,
+          0x67c899ad81be5eL, 0xc89ac8d2f9d29bL },
+        { 0x7c76ba329ab8f7L, 0xb2a18c96e40f74L, 0x1b5056e3864d9bL,
+          0xdfa503d9b582b8L, 0xfb035197c9c68eL, 0xdc501316b3c22bL,
+          0x38ab231a6c96ffL, 0x4ea527c8cb1c10L }
+    },
+    {
+        { 0xd632f20c05b4edL, 0xe0199fab2a032dL, 0x373295626812d7L,
+          0x2aed855013df13L, 0x92ca24b39f96acL, 0x620273dbb9751aL,
+          0x5d0d21ef7437a1L, 0x9de2a43077de56L },
+        { 0x0569b1211a4674L, 0xfc3923e89c3989L, 0x3d127042c5c770L,
+          0x0072b9084e8c37L, 0x7178d4dac39f9aL, 0x5f8292f778d345L,
+          0x9e5bf0f77c7307L, 0x7691610c3a20f5L }
+    },
+    {
+        { 0x7c4ead5705fe96L, 0x377ec35c8e464cL, 0x3e5b9907689954L,
+          0xc0f6949a2d31eaL, 0x839d395c580671L, 0x2f347a6b215b09L,
+          0xfdcfa33683df83L, 0x6e12cc26af39a8L },
+        { 0xae46ec813a3bd2L, 0x03a7d3b59366f8L, 0xe2029d5b87aed4L,
+          0xbdc4e43fe1b83dL, 0x768437cdb8a1a8L, 0xe47acc3ea0dd7fL,
+          0x550e0cc62a0af4L, 0xcaf2cbc1a20962L }
+    },
+    {
+        { 0x5a784f7f28a78fL, 0x952a9b507e9724L, 0x8ac5e411bab7a3L,
+          0x1251e3fb7bc1e1L, 0xe360f82dc15e22L, 0x3ac72da95213f5L,
+          0x65ee9ba4dcd47bL, 0xdfeab7b3af5952L },
+        { 0x34c5c8026fd3c6L, 0xd977b08f3ac7eeL, 0x003bd017dba2f6L,
+          0xcfc5cf8ac98c8dL, 0x05eb6040e46922L, 0xc248b17faa9352L,
+          0xfa41c0f395c7a7L, 0x29931d4b71ee44L }
+    },
+},
+{
+    {
+        { 0xac087bb07861c5L, 0x3bd37db5ae8240L, 0x94c68ecf94518fL,
+          0xd32a378ff88a5bL, 0x42c8aaf9b441d1L, 0x089db70fc07f12L,
+          0x211c386d3d4455L, 0x1db9af7546b158L },
+        { 0xdfd1b6551bc927L, 0x69c04930733df4L, 0xdc72cd42aeb586L,
+          0xeebdace823aa13L, 0x51b3b3c56ad643L, 0xb983a99d4e0426L,
+          0xa1e5b6c69c4eccL, 0x37cd38245e6668L }
+    },
+    {
+        { 0x158ce6d9f73aeaL, 0x36a774914ff475L, 0x0d4e424dc0b018L,
+          0xc2c44483946f09L, 0x7a7de3ffacda62L, 0x49a19e6b486709L,
+          0x65094d8db61da7L, 0x09edfd98f5ee87L },
+        { 0xe460fcfb37226dL, 0x3b9d03969bf470L, 0x3d4d511247ca22L,
+          0xc7248d6c782cb1L, 0x91189a000ad293L, 0x1244942e8abe75L,
+          0x9f88d12bf52cdbL, 0x368463ebbbcadfL }
+    },
+    {
+        { 0x419e4b38074f45L, 0xd3f8e2e0771c83L, 0xd2743b42e68d34L,
+          0xc68b7dbb116a00L, 0xfad2cf7d84cc37L, 0xcfd27c0b7a0f4dL,
+          0x3b9e23f190e587L, 0x7bab499751ca9eL },
+        { 0x3270861a8f12eeL, 0xee1f38d31b36d5L, 0x748bb31e4c0eedL,
+          0x9be5c9b110ebadL, 0x728660bc8b6cb6L, 0x7bc9df793d914aL,
+          0x73a4f2cc88c859L, 0xbe4a2fdb4e7f0eL }
+    },
+    {
+        { 0xe566ff8a450e77L, 0xb0b40066a13abaL, 0x483a510cd7dc90L,
+          0xb1a20135fa9cccL, 0xeb0b631a80e67cL, 0x7c34e1f020801aL,
+          0x0257dc8f4e447cL, 0x7abe7d174c6f0fL },
+        { 0xf115a3ab19a576L, 0x8f0474a064ca0eL, 0x999bb6b351f99bL,
+          0x855254b773edc3L, 0x49f6c2f427d717L, 0x9f682532e0cef2L,
+          0x1fe126c2ee34f5L, 0x1ec2cae80150f7L }
+    },
+    {
+        { 0x862c5afc005b7aL, 0x61adea7ec4ef17L, 0xf885fd3007b446L,
+          0x25c129d9b0e30eL, 0xbc10f25feec7e0L, 0x3901ac4df79ee1L,
+          0xad49db7fe9e19fL, 0xc8624d9360d050L },
+        { 0xc74a576bf3260bL, 0xbde80248c010c2L, 0xf15532909b6977L,
+          0x6a5a82ed52dcf8L, 0x4fbf59d29b9dfcL, 0x337d049c7b730cL,
+          0xb3deac63a89cd4L, 0x1e07595ad2f2ebL }
+    },
+    {
+        { 0xa0b0a4d3b7c84eL, 0xf132c378cf2b00L, 0x192814beaaa8ecL,
+          0xe7929f97b4b5dfL, 0xf08a68e42d0ab7L, 0x814afb17b60cddL,
+          0x78c348c7d9c160L, 0xf8a948844db217L },
+        { 0xcdefd88eaa2578L, 0xf717f56bd0e260L, 0x7754e131694d02L,
+          0x1254c14181dbd8L, 0x0dacdd26e5f312L, 0xb8abdfbcef87bfL,
+          0xb985972e74e2eaL, 0x1717621002b424L }
+    },
+    {
+        { 0x92cc75e162df70L, 0x1e20c0618ee849L, 0xc036b4626aa590L,
+          0x31be67e4da5155L, 0x04911b5f7213b0L, 0x39261d7bb2e72eL,
+          0x9e844665c015a3L, 0x2f59fc0298ae67L },
+        { 0xa3ea7ba1701fccL, 0x87a5fa90ebd651L, 0xa607ed4301d7b1L,
+          0xbd4ec5f3b2e271L, 0x732a1a2dc4180fL, 0xbe15d82feaa8c1L,
+          0x103670266f2f3fL, 0xccfd3979e79ce8L }
+    },
+    {
+        { 0x82ab83570a54adL, 0x5c1dee8e3bec75L, 0xf583ff454b556bL,
+          0x9220199f461e60L, 0xdf61ca887fc4e7L, 0x6641fd20776dadL,
+          0x00c6edd8edd061L, 0xaf9b14255f7e87L },
+        { 0x73f15e49bbe3ecL, 0xdd3b788f8bc1faL, 0xb24cc071b8ff86L,
+          0x6c260d241be58bL, 0xec1c4e36b10adaL, 0xf6b42097fdb985L,
+          0x0d0ac85d47c212L, 0x967191c07d78d1L }
+    },
+},
+{
+    {
+        { 0x3b11638843d0f3L, 0x4b89297f27f10eL, 0x477236e863ba2aL,
+          0x1949622add280cL, 0x7cd523504da757L, 0xe0e99d279e4ff7L,
+          0xb4ef894537da41L, 0xc55dde45a24ff1L },
+        { 0x18d8e21b587521L, 0x8010b5d3777833L, 0x4af522dd3a54c8L,
+          0x7cd476b4c0ac13L, 0x4587e614099f67L, 0x494d0ed605ee64L,
+          0x3218ba2cc80903L, 0x5ff56aa0b2e169L }
+    },
+    {
+        { 0x51ec94e3a06c69L, 0xa26d7be5e65c52L, 0x156f113d44ee96L,
+          0x70f0968bf5b9b4L, 0x9b7e4695f5332dL, 0x36c295f6703829L,
+          0x1522690d04f492L, 0xcf35ca4728043bL },
+        { 0xf9ca3e1190a7c3L, 0x53d2413f971b07L, 0xae596529c48b49L,
+          0x74672b8fefff5cL, 0x0a3018ba7643b0L, 0x51919e83e9b0a8L,
+          0x89ad33dc932fb5L, 0x52a4419643e687L }
+    },
+    {
+        { 0x7778990d2d0acdL, 0x3bdbcce487fdf1L, 0xdc413ca2b03dd2L,
+          0x278755b9a2b7d0L, 0x4ebb8b535ddd7fL, 0x0465152bcbdb92L,
+          0x34f22d6671d051L, 0x1ba04c787192b9L },
+        { 0xb1693f483560c1L, 0xe08a5937d174e9L, 0x47ffdc464dc9afL,
+          0x1123596ce8126cL, 0x632d95f1124628L, 0x66287abfee7c76L,
+          0xb40fe60c552332L, 0x3f11729e304e1eL }
+    },
+    {
+        { 0x97a6ea05030a8cL, 0x692419809c27b2L, 0x3308501ac9dd5dL,
+          0x9fed7fabe73fdcL, 0xea555440535286L, 0xc7c07ab6c9b832L,
+          0x178c882c51b967L, 0x6fa0c6986ee075L },
+        { 0xbaa4a15b8b5c4aL, 0xf83c0ea3130c0aL, 0xcf8624b2800331L,
+          0xade85cd7ccbcb8L, 0x971d7f6f08445dL, 0xfd480b76a546dcL,
+          0xdc15a38c93761cL, 0xc4c495c9d04631L }
+    },
+    {
+        { 0x5f4cee89470efeL, 0x9fe896188d93adL, 0x24783b3f4e49ceL,
+          0x1bc7ed752ffb3eL, 0xa3abe6a6d81e17L, 0xd6bb8b47a333c3L,
+          0x3485c0b10a3527L, 0x7cddc9c31a9d10L },
+        { 0x0c78112c38ca37L, 0x10e249ddd2f8d8L, 0x72c88ccc511911L,
+          0x4d75b5a29a6c84L, 0xc74b267a227b1eL, 0x698390cf8e35adL,
+          0x8f27edfe98d230L, 0xec922f26bdc7f4L }
+    },
+    {
+        { 0xac34023fc32e11L, 0xe0ae2f547200d1L, 0xa7c7492bd98c82L,
+          0x3910b687b02154L, 0x6fdd06ce28ab6dL, 0xd3a7e49d98b012L,
+          0x4c1c82b9f54207L, 0xef5bbe645c176fL },
+        { 0x3d17960d3e71ebL, 0x90d7e84080e70cL, 0x83e6438bff5d9eL,
+          0x1877e1f535d85cL, 0x931ed6efbb69ccL, 0xcf962651247848L,
+          0x76d618b750da4eL, 0xc076708717fbf6L }
+    },
+    {
+        { 0x80a5ac5eec5126L, 0x6d05dd13379c80L, 0x514b0892336d32L,
+          0x586c0066725137L, 0xab2365a574f954L, 0x3c89ea0ac7d356L,
+          0xf1f2edd27460baL, 0xf200ddbab9870fL },
+        { 0xc8f1b2ca35e885L, 0x5d22f86e6e7550L, 0x24b9a409554615L,
+          0xcb41107616314fL, 0xca752f0c976a11L, 0x3e2f839a08291aL,
+          0x0cff22ff2c420eL, 0xafd603e82b9747L }
+    },
+    {
+        { 0xaddeddc810a3daL, 0x78b6c2dd3a87bfL, 0xbc7020bde3a04cL,
+          0x47ab9739b6d045L, 0x3b046d60959358L, 0x0f953e7509ee3eL,
+          0x803dc8669fc61bL, 0xcceaec0893c8d4L },
+        { 0x21f8c40b048a45L, 0xb535073fcaea8aL, 0xe712c3590e360bL,
+          0x5d0f3f48403338L, 0xe0ea26c7207f2dL, 0x20f6b57ffd9e05L,
+          0xb97d68e4788b00L, 0xb1215541889cceL }
+    },
+},
+{
+    {
+        { 0x0079817464238eL, 0x21103020d381caL, 0x1cc4c6ed9f01b5L,
+          0x5e35dc55a131b1L, 0xb61848d06944ebL, 0x83792a029631a3L,
+          0xbe1017fafca0ddL, 0x70aaa01782fcbbL },
+        { 0xc63b7a099945e7L, 0xe9164ecc4486c1L, 0xb133e35885f2c1L,
+          0x186f0d3c99ae02L, 0x2fca4922bf53e6L, 0xf922aa248a02bcL,
+          0x4fe64900dd3dcaL, 0xe8c313ff6a8207L }
+    },
+    {
+        { 0xc5b358397caf1eL, 0xa001922922a4b6L, 0x67e36bedf07c95L,
+          0xabaa0aeb2f4f34L, 0x66dc926dedc333L, 0x82021c438ec5b3L,
+          0x82b4f2600ab176L, 0x1b7c22e69c45afL },
+        { 0x07b0dbe0924ad9L, 0xe030936a407ddeL, 0x66e1ce926ccd06L,
+          0xb50c108e3505a9L, 0x8b921e1da98f51L, 0x449ca1a20cf7c7L,
+          0xadb80c7e67d079L, 0x205aa54834372dL }
+    },
+    {
+        { 0x1482b4819bf847L, 0xd6c16ab5906f0fL, 0x323fb1723ad060L,
+          0x0346389c832be7L, 0xe71b2d82ee45bfL, 0x761c37dfb22276L,
+          0xa9b33345d70be2L, 0x81a06565a0627aL },
+        { 0x337750399a6282L, 0xafc8d2ed0436f0L, 0x22f71d3c53342fL,
+          0x66ca56d8939ad3L, 0x15a919230e09baL, 0x261091ea6de890L,
+          0x609d700e78f2d5L, 0x8aa52ee8eaaf78L }
+    },
+    {
+        { 0xa398788ce76258L, 0x3031d07494b975L, 0x4a6d652043dfe2L,
+          0xdb1a849b4401ecL, 0xf81ebbbce8bbccL, 0x937dd4716efe9eL,
+          0x9c19350ef85eccL, 0x260d932214273bL },
+        { 0x1d7e21e77bf1a3L, 0x199d689a544eb7L, 0x9da594194ced50L,
+          0x71a60be8a0aeaaL, 0x183a0ae26d3b51L, 0x49f176a8df9728L,
+          0x744376e3230674L, 0xb2cb21ae25541cL }
+    },
+    {
+        { 0x7a721589a0071fL, 0xe19dd29e7d2a6bL, 0x3deb34e55113f0L,
+          0xef1f8ebede573bL, 0xa8f7ff95665e37L, 0xa2c21eaf2d7777L,
+          0x1387afa91e2e39L, 0x04057b97db68f6L },
+        { 0x8b9d5ae1c241f7L, 0x689588a8e75993L, 0x79585b45c0e2d4L,
+          0xba1ef167b64974L, 0x72685bc1c08a75L, 0xf0a5814d572eddL,
+          0x71464a35ab0e70L, 0xc93c92b339aea7L }
+    },
+    {
+        { 0x1917e2a5b8a87dL, 0xea5db763a82756L, 0x5bba2fb6420e2bL,
+          0x5cc0501019372aL, 0xb1ef8beccc5efdL, 0xaf06393f49c57dL,
+          0x3ab1adf87a0bc4L, 0x2ee4cca34fe6b6L },
+        { 0xd1606686b8ba9bL, 0xef137d97efec13L, 0x7b6046550abb76L,
+          0xb40ec2bf753a00L, 0x696ed22eaf8f1dL, 0x398c91fd8ba3d8L,
+          0x11f203437db313L, 0xe1ec33bfe5079eL }
+    },
+    {
+        { 0x8a10c00bdc81f0L, 0x5f392566fe8e05L, 0xa595dab14a368eL,
+          0x32b318138cec6bL, 0xd77afde1b00d00L, 0x3c979284d9923dL,
+          0x78f0e7a76e13ddL, 0x5ee8e59bf75675L },
+        { 0x49ec89391b130cL, 0x9416182a47a441L, 0x54555b576e2ce8L,
+          0xcbdd2fd349c40bL, 0x10ae7379392bbeL, 0x270b1112e2dab0L,
+          0x5cb7712af293f4L, 0xfc22a33d6095c6L }
+    },
+    {
+        { 0xdcb5bbd0f15878L, 0xbcf27adb6bba48L, 0x979913e7b70ebaL,
+          0x4c0f34b158578aL, 0x53f59a76ed6088L, 0x19b3b2c75b0fc2L,
+          0xad628dc0153f3cL, 0x5195a2bcec1607L },
+        { 0x95f8b84dfe0f7aL, 0x935c6b0152920bL, 0x25f9e314da1056L,
+          0x4910a94b28c229L, 0x54b03b48ee4d6eL, 0xc991fc3694e3edL,
+          0x68c4c26dbe5709L, 0xc9cfce463d7657L }
+    },
+},
+{
+    {
+        { 0x21c9227f52a44eL, 0x7f105a2e85bfbdL, 0x887781f6268fc2L,
+          0x56ee808a2d7e35L, 0x14f9de52d3930fL, 0x4a4e356dcb561aL,
+          0x87362267f95598L, 0x211c3425f34151L },
+        { 0x8fcb75b0eaf9cbL, 0xcc9edf93d60ce2L, 0x54412c9a5fe627L,
+          0x6036a72842dd09L, 0x71ce668a6c6099L, 0x02b30d75386764L,
+          0xb69bed36f18e23L, 0x124c9b1d1de9f4L }
+    },
+    {
+        { 0xe8f8d95e69b531L, 0xe1e115eaff1049L, 0x9087cd1eddea0cL,
+          0x8ed55a57449916L, 0x8009f547808404L, 0x990f21617fea55L,
+          0x68ba624fe8ecf9L, 0x8ac295056d1f47L },
+        { 0x3257887529dfb0L, 0xc4a613f244c080L, 0xabb1ac028672faL,
+          0xb2915c531eb291L, 0x6e368ca8fababaL, 0x6b8c2591fde498L,
+          0x67724a1f2a548cL, 0x6b3b7e8f90409bL }
+    },
+    {
+        { 0x5415003fae20aaL, 0x95858a985df5ceL, 0x42bc9870ac6beeL,
+          0x8d843c539ea1a9L, 0x5de200cb571043L, 0x084fcd51741a33L,
+          0xe1ca20c0009d1cL, 0x0271d28e957e6dL },
+        { 0x84cbf809e3be55L, 0xc804dda1c578c6L, 0xea85489409a93aL,
+          0x64a450a972021dL, 0xc6a2161e681312L, 0x280bff965bc111L,
+          0xd358a4b0f8526fL, 0xd967be8953a3abL }
+    },
+    {
+        { 0x4c5e6157dd066cL, 0x37afd33634c8d4L, 0xa3ac88a42d8b87L,
+          0x9681e9b938b607L, 0x7a286ab37fe4c8L, 0xdeee5742494245L,
+          0x184b9d36af75a8L, 0x20f696a3670c04L },
+        { 0x1340adfa39e8b9L, 0x03c19290850b2eL, 0x435ebd42c0e1efL,
+          0x49de18b142ee9bL, 0xb440b273f116f2L, 0xd94e9fa2214463L,
+          0x1b0ddd36311543L, 0x1ae042a991ba3cL }
+    },
+    {
+        { 0xbc322f85bb47aaL, 0x9e2562554a5845L, 0x96b65ae21115f3L,
+          0x46fbed4bb5757bL, 0x18aec4f4c42dceL, 0xc59caf68d801f0L,
+          0x91894631205521L, 0x66bd8e089feb7aL },
+        { 0x39ebe95c529ee7L, 0x28d89928eadb99L, 0x6058c786927544L,
+          0x877e7a5d3808ecL, 0x8f651111c52eafL, 0xfb59812ae221cdL,
+          0x22289c6f890391L, 0xa97695b4966e92L }
+    },
+    {
+        { 0xf0a91226ff10f0L, 0x49a931ba2a65c8L, 0x3fcebbcb1d3cb0L,
+          0x70eb79bca9685fL, 0x82520b5ab38cb6L, 0xccf991b76304c3L,
+          0x575aab1af8b07cL, 0xec8166a5ed5efbL },
+        { 0xddc5698c8689b1L, 0x227c949b2e78d7L, 0x61323218e07d91L,
+          0x658a11d22cfd62L, 0x908fb44004dd5fL, 0xe3d14f090d21b1L,
+          0x6f3db9da6a1639L, 0x09d86c0333a525L }
+    },
+    {
+        { 0xd83eaf06f043f7L, 0x88ab648b52d5f6L, 0x67c664d57144d7L,
+          0x55d7644eafc8b5L, 0x1c89f20cceb291L, 0x51aec7b831ac47L,
+          0x51172fa6148854L, 0x8fabf7ef6d7bfeL },
+        { 0x5910316477ee27L, 0x5f299dd20fe61eL, 0x48079a842826abL,
+          0xf4a83ba22591faL, 0x8fac66055482ecL, 0x48fd5f16b65b3bL,
+          0x4288a7c9fd9e19L, 0x27db8199377894L }
+    },
+    {
+        { 0x2936ee47fd9dd6L, 0xcce5f0e9ec87c6L, 0x15a50e3db6e3b4L,
+          0x61df105ad701c8L, 0x3601add1dff1f7L, 0xb761e06e8a16e1L,
+          0x4341e021af3f91L, 0x9156a4a933fa3fL },
+        { 0x9dc46ae54bc01dL, 0x605577a64eb910L, 0x22b99f85a59a99L,
+          0xab2dbaf0a229d8L, 0xa8bfb656599364L, 0x39ed4a5e94ebf0L,
+          0x7b46a1e0dbb23eL, 0x117b1958751422L }
+    },
+},
+{
+    {
+        { 0xd19e8fd423bddfL, 0x9d77042387ef59L, 0x315cbdd849590aL,
+          0xfdc637c7866c1eL, 0x72be83d03515a6L, 0xd44a4a00376780L,
+          0x3b9613119e0c2bL, 0x023aca37b1a689L },
+        { 0xf5f368782282eaL, 0x44710898a8b5c7L, 0xcd2f00a17a3066L,
+          0x754e11281ed681L, 0x9c6c70c0bfcefdL, 0xd6aced03b6f29bL,
+          0xe443d562817a2aL, 0xe590ef4e7c0012L }
+    },
+    {
+        { 0xc2f96763e62e2aL, 0x661816eb2daa26L, 0x3515fd2dd5f512L,
+          0xdc36e2756b6e75L, 0x0bdde4674cc658L, 0x102908600e7644L,
+          0xfdf00451694a09L, 0x454bcb6ceac169L },
+        { 0xf4c92ab6481eb6L, 0x8b77afa09750e7L, 0xe6f42316362d6dL,
+          0x0d45deef53a3aeL, 0xdac7aacd7dcf98L, 0x628cb7f125ec4aL,
+          0x41e8a20aec0320L, 0x7418c7eea2e35bL }
+    },
+    {
+        { 0x4d649abdf40519L, 0x8cb22d43525833L, 0x15f6d137a5333fL,
+          0x8c3991b72c23eeL, 0x248b9a50cd44a3L, 0x6b4c4e0ccc1a75L,
+          0x3221efb15c99a9L, 0x236d5040a9c504L },
+        { 0x401c7fbd559100L, 0xcf0e07507c524dL, 0x39647c034a9275L,
+          0x2355422f7e8683L, 0x3e0a16eb3ae670L, 0x1c83bcbad61b7fL,
+          0x491bcb19ca6cbeL, 0xe668dc45e29458L }
+    },
+    {
+        { 0xe44c65b219379eL, 0x211381bbb607eeL, 0xd4c7428b7bc6dbL,
+          0xba62a03b76a2e8L, 0xe1729c98bb0b31L, 0x3caeb50c6bbc10L,
+          0x6c66727b0187aaL, 0xbf9d2f0fb90dcfL },
+        { 0xec693501184dc6L, 0xd58d2a32698eb5L, 0xb366d8da316b07L,
+          0xe1e39bb251c017L, 0xbe44ba9adb157fL, 0xbaa9a9a8a8b06cL,
+          0xd0f46356e473e1L, 0xd25a8f61d681c6L }
+    },
+    {
+        { 0xba39d5fcb102c7L, 0x66eba21d8aa1ebL, 0xcc2591a697fbf4L,
+          0x5adb5792317f54L, 0xa01ae71f76c6f9L, 0x2c525de5042705L,
+          0xc8f42724f4479fL, 0x26ab54ae6d7a5bL },
+        { 0xda217b5dc28106L, 0xc7cadeaeb2ae6aL, 0x0b1609453ea3b2L,
+          0xcddcc1ccc6111bL, 0x5c47affa7a7bebL, 0xf9931bd0e52dabL,
+          0x5231835c6dcf96L, 0x7095bdef27ea4eL }
+    },
+    {
+        { 0xee8adaec33b4e2L, 0x300665163ceb44L, 0xf1476fb880b086L,
+          0x07033289569ce8L, 0x2cabf9a238b595L, 0x85017bc26c8158L,
+          0x420b5b568d5144L, 0xa9f5f1ef9c696fL },
+        { 0x1409c3ac8fec5aL, 0x541516f28e9579L, 0x06573f70e1f446L,
+          0x3e3c7062311b96L, 0x0033f1a3c2ffd8L, 0x8e808fcca6711cL,
+          0x716752d07aef98L, 0x5e53e9a92525b3L }
+    },
+    {
+        { 0xce98a425a1c29fL, 0xaa703483ca6dc9L, 0xe77d822edfa48bL,
+          0xd2e3455068abcaL, 0xb456e81482cfcaL, 0xc5aa9817fbfb08L,
+          0x8979f258243194L, 0x727f2172cd043dL },
+        { 0x7cca616aa53923L, 0x387c5aee9bcb72L, 0x0173fd437580bbL,
+          0xdd7795b75fc0d9L, 0x47d1c37345deaeL, 0x2eb5d7fb0d1c03L,
+          0xf7a1b92958f002L, 0x7365cf48f61b67L }
+    },
+    {
+        { 0x4b22c3b562a5edL, 0x711216f5c7cd07L, 0x51f72c49ba0648L,
+          0xc10d0930de9e6fL, 0xaca479bfda63baL, 0x4722a55af532b0L,
+          0x8d59eb77236f39L, 0x5cad8744465c34L },
+        { 0xa2119e5722b0c1L, 0xb670264f343ea4L, 0x6910f02c19f387L,
+          0xcfec5bc0381fbaL, 0x5f5de0d52c0a1dL, 0x4e474d56378cb6L,
+          0x2fc802727e2ba3L, 0xa215da3159b541L }
+    },
+},
+{
+    {
+        { 0xed535858499895L, 0xa0aefd565c998dL, 0x210d8502d5a561L,
+          0xc2cc23ca2cd9d6L, 0x2371d46c4d297eL, 0x88b2143d18d441L,
+          0xbebdad9043993dL, 0x6ba91e7ad5f28dL },
+        { 0xc2bb3f13a731f4L, 0xd35cfac5d0d5c3L, 0x995099835ac427L,
+          0x8938bb55458adbL, 0x0bd738cab26f3bL, 0x56db3d5a28cd8dL,
+          0x87eb95fa1d8b4bL, 0xd6700efe7f3b4bL }
+    },
+    {
+        { 0x962c920ea1e57bL, 0xd3be37e6dded6dL, 0xf499b622c96a73L,
+          0x3eaf7b46c99752L, 0xa310c89025590bL, 0x535aa4a721db23L,
+          0x56ab57819714a0L, 0xeecb4fad4048c1L },
+        { 0x7b79ec4470c466L, 0xc4e8f2e1383ceeL, 0x0f5d7765750c45L,
+          0xa3b3bc3725527dL, 0x2f5deb66d00cceL, 0x5d5a0f495a8d81L,
+          0x50a442ee02b824L, 0xafb04462a11628L }
+    },
+    {
+        { 0x72b67bc0c613deL, 0x0150d4be6f0b24L, 0x847854e8ed289dL,
+          0xe08292fa320f88L, 0xd5b6da329c6160L, 0x2a48e2d4fb9d06L,
+          0x55d9e412de087cL, 0x65683b54f02100L },
+        { 0x4dc8c2ea8886c6L, 0xe966dd220d6114L, 0x99745eba57af97L,
+          0x23a9a71b854725L, 0x8effe05621a047L, 0xf16d284049a4beL,
+          0x95828c25b0660fL, 0xd5b69ba56e96b0L }
+    },
+    {
+        { 0x0b5b4244ffa0b8L, 0x0585b45096cc5eL, 0x413e1aef505d37L,
+          0xe5652a30c7ab8dL, 0xab32fb72990120L, 0x6b8b16e3f09368L,
+          0xbf9fadbefe128eL, 0x85f366b14b7671L },
+        { 0xcb2f294090608dL, 0x25e2769ac3045fL, 0x069c4f06131904L,
+          0x1c57cf1329a779L, 0x72fe0d5b7cace7L, 0x04d9f430897a45L,
+          0xbaf32f6359a645L, 0x0fa854ffa7485aL }
+    },
+    {
+        { 0xae3533c5f56f60L, 0x9773bbb0ad9360L, 0x769b34a38fbe6bL,
+          0xb5ba8e9ffb0c00L, 0xa93931875472e4L, 0x12cac92ce5f30fL,
+          0x514fc06a9e7dbcL, 0xd7ca86558b4734L },
+        { 0xd101ff365a730bL, 0x92da451abe70e9L, 0xfb5f94aef7bf4bL,
+          0x8c3ef4c1d56c7bL, 0xb0857668435c10L, 0x7fbbbdae7ed4ccL,
+          0x1da6eaf24f372fL, 0x0ab2c1f59b8ae3L }
+    },
+    {
+        { 0x63a1a78f10a4b9L, 0xbb5278d0c7e510L, 0x97b224ef874142L,
+          0x0a9ff52b2517b1L, 0x1b5a485c5cd920L, 0x1a8e2eba1823b9L,
+          0x2b088c00e914a8L, 0xe5ec3adcf13432L },
+        { 0x0d6ab3e6e7e253L, 0x9f0f5cd6f18458L, 0x839a744f459a6dL,
+          0xb4b4f941eb15f7L, 0xe0313acc72cb14L, 0x58ee933b20472dL,
+          0x5f73d7a872543eL, 0xb1700c5501f067L }
+    },
+    {
+        { 0xb70428e085f67fL, 0x5441d5143cabe5L, 0x4d0e8c2e0a6055L,
+          0x8d39a080882e4fL, 0x615bb32c1cb39dL, 0x113f18df7a1642L,
+          0xbab8cf5250681fL, 0x3017ba2677b72aL },
+        { 0xcd2b6e95a3a876L, 0x04765012035a69L, 0x31d6440efa2ea0L,
+          0xde8f8d156874d5L, 0xcbc71cd0199d4aL, 0xc546b61e7f2170L,
+          0x4e57e4e112c4c3L, 0x58955a8d1622baL }
+    },
+    {
+        { 0x0064cd704e2f6fL, 0xe9d458de0edd38L, 0xeb1a5977e0a5c8L,
+          0xe322ece01fc0a8L, 0x8b9d1661032a19L, 0x3e7b539a89de94L,
+          0xfa30262001c754L, 0xe33de4ddb588f6L },
+        { 0x4dafbdb954eb94L, 0xbb436480584c1bL, 0x622c93e5dbe29bL,
+          0x968f9e3f57b931L, 0x98f03be0f6453bL, 0xb0ecc7f08f696cL,
+          0x5af55f4a505335L, 0x028533efb3fa9bL }
+    },
+},
+{
+    {
+        { 0x3bc8e6827e8d86L, 0x4e43b3063f105aL, 0x5301b7d4981250L,
+          0x8b0a75e9f72fa8L, 0x88f59db357348cL, 0x5f0ebb1ec4208eL,
+          0x4712561c043d3bL, 0x9e5ded0c806b97L },
+        { 0xf9bd0a62121d09L, 0x1759ecbe337cd1L, 0xd1acc0ee945542L,
+          0x3683febbd2f63aL, 0x44f1bccda5dfe9L, 0xa3606c9707f22fL,
+          0x45ef0642d96ca5L, 0xfc3107d9022df9L }
+    },
+    {
+        { 0xe81320b44be755L, 0xdf213d55c7c761L, 0xf43d2d5b4e5db9L,
+          0x3bcfd828dedcd2L, 0xdf368a6d37a9ecL, 0xfef20aef475a77L,
+          0x22f5894162c064L, 0x956bc660142a7dL },
+        { 0xaaa10e27daec78L, 0x3cb9b72b6e9a78L, 0xa740bade383f72L,
+          0xc31b4017759007L, 0xdada964a7afc50L, 0x6bf062cfd3d11fL,
+          0x9470d535db3679L, 0x339447303abf13L }
+    },
+    {
+        { 0x533f44046e5d7fL, 0xd1793e349048c8L, 0x59e11501929b94L,
+          0xcddbbcb8364134L, 0x795c794582774fL, 0x114dfc4e03081aL,
+          0x541ef68ef54042L, 0x159295b23f18cdL },
+        { 0xfb7e2ba48a2c8cL, 0xe2d4572bb6d116L, 0x7bb0b22d750b53L,
+          0xc58888cd142ee8L, 0xd11537a90c9e2dL, 0x77d5858d02eb9eL,
+          0x1fa4c75d444a79L, 0xf19b2d3d58a68dL }
+    },
+    {
+        { 0x37e5b73eb8b90fL, 0x3737f7a3f2a963L, 0x87913fa9de35e0L,
+          0xec7f9928731eddL, 0x6e6259e219491eL, 0xb2148a04de236cL,
+          0x89700e8fdd309bL, 0x9ce51e49f0bf80L },
+        { 0xe7ec421301f17bL, 0xa4b570a3bc5f4fL, 0xc2b1b2a1285ee2L,
+          0x5e86bc8c53db73L, 0xb65fceaf24fa90L, 0x9e74c5608ab024L,
+          0x5c8003df9ed877L, 0xa632e9e4a2cbbcL }
+    },
+    {
+        { 0x32a4546c91c8b5L, 0xc122b5ac969363L, 0xbbbec5e3648b3aL,
+          0xd5a365e25143b0L, 0xcf3e46454157ceL, 0x9712f04f9bab64L,
+          0xc12d43a04b4008L, 0x51932d72edf1c7L },
+        { 0xaef1655b2f8470L, 0xaa8e3f36c24aceL, 0x7da75da6b4e761L,
+          0xd371827b90bca2L, 0x84db4500afb45cL, 0xae12045ef46b5dL,
+          0x91639a5d962f98L, 0x669cbe672f2ac0L }
+    },
+    {
+        { 0x851bb3183a4356L, 0x7d436bf9a1bf15L, 0x46a3f0e120b378L,
+          0x9302abc3f5b357L, 0x1e0672693fef53L, 0xb12f4a95fd2ee9L,
+          0x94a884c7de9433L, 0x2645234a6f2874L },
+        { 0x6fb56f5cdb8dfaL, 0x4a17dfc9e0ee4eL, 0xe269d8383ab01eL,
+          0xda932dab77c10fL, 0x463af0c0321243L, 0xbe1d68216fc8a3L,
+          0x2eae3ea48b39e3L, 0x94230213b03e7bL }
+    },
+    {
+        { 0xaeb507cb22f28aL, 0xa77458b49a6b44L, 0x232ed5ac03dc17L,
+          0x79dfc169c61ac6L, 0x7c48be9cd71b93L, 0x983d68ac429cd9L,
+          0x7709c4798ae2c8L, 0xe4765c0a5df075L },
+        { 0x23c4deb3367f33L, 0xbdf2b7e37d72a7L, 0xbaab5c70af2d26L,
+          0xd609f7ffd026abL, 0x23b72b2541b039L, 0x8d06bac83be852L,
+          0x911d4a9cb23d1cL, 0xeae815cfb0dbd7L }
+    },
+    {
+        { 0x487c35c2c33481L, 0xffab636b6136dbL, 0xccd4daea3d3aa4L,
+          0x87149bbc3704e0L, 0x9de8119c0e8396L, 0xd49357a58e7ca6L,
+          0x68789181562d75L, 0xc7453815ab1fadL },
+        { 0x0f1579802c9b91L, 0x7ffc3f0b1ddde5L, 0xa01d5e06aae50dL,
+          0x6a97e65e279873L, 0x4bcf42fb5b1b41L, 0x1c6410f32f5982L,
+          0xd4f760050701c8L, 0xff02663873b90dL }
+    },
+},
+{
+    {
+        { 0xdc53ea2e5b2de2L, 0x94b352d38acecbL, 0x37d960b0d9d5e5L,
+          0xabd868f90bd997L, 0x781668f35a7376L, 0x043d59710118bfL,
+          0xd4da719f57928aL, 0x01942f6983e46cL },
+        { 0xab97fc8728bd76L, 0x825956b4b5c1c5L, 0x202809fc82a104L,
+          0xdb63e9cc8e3132L, 0xa41c701c2181afL, 0xd28018043e066aL,
+          0xc734e4124044ceL, 0x4d9ab23505193cL }
+    },
+    {
+        { 0x0bcd42af9f0c3fL, 0xda21a46b94a218L, 0xe55243c0ffc788L,
+          0x318aae647a5551L, 0x8c2938b79af9cbL, 0x5d15232ec1dce5L,
+          0x3d310ba8ad2e5cL, 0xd3d972494f792aL },
+        { 0xdeb4ca112a9553L, 0x2f1ed04eb54d9dL, 0xaa9c9cf69fb7a1L,
+          0xeb73c3a54dcd3aL, 0xee3eddcf5f201fL, 0x35f9e1cba7d234L,
+          0x1d1d04cd2e242fL, 0x48df9d80df7515L }
+    },
+    {
+        { 0x4ecc77da81dd9aL, 0xa6ac4bb03aa015L, 0x7645842bbc4fedL,
+          0x9ae34cd9d6cf52L, 0xf8ff0335917e0bL, 0x7c9da37c2cc175L,
+          0x1e74dccaaacfbeL, 0xa8f2df07999af8L },
+        { 0xd06c4ea102a466L, 0x2156e87ae190ddL, 0xc95db8aec4a863L,
+          0x49edffd244a6feL, 0x110fae6904f81eL, 0xbaa3e50a1cd104L,
+          0x5bd38a20478b65L, 0x2b57d05daefbccL }
+    },
+    {
+        { 0x1ce92ba86f4534L, 0xb2a8592414f5e3L, 0xdd7a4c69979436L,
+          0x7599aff3f0add7L, 0xe0ce4d3e2d4f64L, 0x74475cc401a29fL,
+          0xaef6541a2377d9L, 0x54048f53f917b6L },
+        { 0x1b86b2205312ecL, 0x779ba2231493cbL, 0xc718369aac9320L,
+          0xeab01a8617fce4L, 0x17b1f10f7187faL, 0xe68eda0a1aca46L,
+          0x61033fe2586342L, 0xfc14e790b6ca43L }
+    },
+    {
+        { 0x9f2231913d2491L, 0x66bdb537997202L, 0x0bafb0c4617f34L,
+          0x5917831f3bb7b3L, 0x6feb2a6b45bddbL, 0x08662b30202c19L,
+          0x0bc2b5705852f6L, 0x2c00fd491818c2L },
+        { 0xca7672cda37dacL, 0xfe4c04c5a30865L, 0x5f1399f322e92aL,
+          0xe7d67ea25b1bebL, 0xe08b014dce7f68L, 0x24df52af2f2b3cL,
+          0x2028b23750ecd1L, 0x9b25d4bc810a45L }
+    },
+    {
+        { 0xa35b7157a9d799L, 0x6da1eb301f9c99L, 0x33ef91ce363ba8L,
+          0x21c0e2ece140daL, 0xb0b11bf158cd84L, 0x6a8744293da438L,
+          0x924f10d3db585bL, 0xf5ddd7310c6159L },
+        { 0xb72dcb86a74c21L, 0x6d14198cc8f79fL, 0x99f4b6c9c5a8d6L,
+          0x063968890e135cL, 0x330edb883f6385L, 0xe1a5a6b9079675L,
+          0x6e37fa8b8f5fe0L, 0x60e2fd961dca1eL }
+    },
+    {
+        { 0xc6cb40366c395eL, 0x03b21a7b51d0f1L, 0xbc478a5e693181L,
+          0x0017c2fc6cff33L, 0x740a5b839d8d1eL, 0x3968d664d9ec6dL,
+          0xfd53738b0ef1b0L, 0x73ca8fd1ed0a04L },
+        { 0x4ace93875ab371L, 0xd602936ddad7e9L, 0x1f5424a750bcc2L,
+          0xfe09b3668c7a17L, 0x165f7de58341ecL, 0x95b825a6ce61e5L,
+          0x9d31e1966c83c4L, 0x65b3e08cc5887bL }
+    },
+    {
+        { 0xd37e93221482d1L, 0x9af659708b6380L, 0x279426a7d61e4bL,
+          0x80dd0ec80997adL, 0x7239b0dd5b76d4L, 0x92e6c73e76c098L,
+          0xeeb2321eab3e1dL, 0xa69c4a7eb1a910L },
+        { 0x46d6aa7833d9aeL, 0x3ee6957572b0feL, 0x44ccbedcdb3d97L,
+          0x342f29dcbea01bL, 0x0d518c58926876L, 0xaaabae75585d2cL,
+          0xc548c77e008f58L, 0x819e2fa21fab2cL }
+    },
+},
+{
+    {
+        { 0x468e149c16e981L, 0x286c7909ddbb7cL, 0x2a92d47db7a38aL,
+          0xde614e68a27cb2L, 0x8dc8822e5b0ab6L, 0x38441aecf48565L,
+          0x11ed5c9089435bL, 0x238928682d0d31L },
+        { 0xc6698d472f2f31L, 0x295242c56d76afL, 0x4099205eba563bL,
+          0xae7de5a3ab7384L, 0xccdf127d0ed86cL, 0xb9b6d5b965c3c3L,
+          0xe351a8f2c31ad7L, 0xa761dd8ac12f13L }
+    },
+    {
+        { 0xda115ddf171ab7L, 0x2de17b1401f93dL, 0x95019ca40964b4L,
+          0x169d1f465ba3c3L, 0x534a0070090d08L, 0x805c5e282bf410L,
+          0x15dfe1165f8d90L, 0x827a416ca72456L },
+        { 0x5af888433a36c4L, 0x8bfa54cd8ee604L, 0x08fd1419ce290fL,
+          0x2db5e8c287b3a6L, 0xe5be98103cdad2L, 0x155b874bf810b9L,
+          0x2ae42de670f473L, 0x22185847f74657L }
+    },
+    {
+        { 0x54b2a5023ffa43L, 0xcf87b16a24d919L, 0x1ff540263524e8L,
+          0x73c94e056d1e54L, 0x76515523899fb5L, 0x13a721418723bfL,
+          0x39afbdd3561517L, 0x49b790a9f2862eL },
+        { 0xc8c1f4f527d2ceL, 0x1997aec7609bb7L, 0x583ad8002a3400L,
+          0xac2374e4f79706L, 0xbf1f9a821b7183L, 0x06158ab6600fe0L,
+          0xfcc9b2ebd56751L, 0xe1de5acddaaec7L }
+    },
+    {
+        { 0x230baa1788fdabL, 0xf30860a7d04597L, 0xa2c7ece99f4caaL,
+          0xbd39f106ad065eL, 0xfd92f5d3bef7bdL, 0x6069fad96d2203L,
+          0xbff38cac4d9e0dL, 0x419a0171fda313L },
+        { 0x5d77fd8572f035L, 0x5af99f2b282b40L, 0x7257d3b23facffL,
+          0xf2ee22358c90afL, 0xcc2687d9b6a52aL, 0x140892c302430eL,
+          0xa934d5e3ec4f38L, 0xc087d7c3bd18beL }
+    },
+    {
+        { 0x7e94138a2c5ed7L, 0xbc8ceef53610bfL, 0xe89356bd86f803L,
+          0x9a3a3805a55330L, 0xe894aba11ad648L, 0x2e68fbaba95918L,
+          0x643e2bafcad344L, 0x0dd025661640aaL },
+        { 0xc02e479e25cbddL, 0xd78c4d813a1b3fL, 0xa6dae8fcca9692L,
+          0x3dd91e9e5de8a0L, 0x78ae0ce764ea36L, 0xb4ad99985dbc5eL,
+          0x967ff23e82a169L, 0xaeb26ecbaee1fcL }
+    },
+    {
+        { 0x8c502559a6f90cL, 0x56e7abe0ea374aL, 0x675c72256413b2L,
+          0xd3fc17e946753fL, 0x28c4e1fe235f7cL, 0xe209bcdb028eb0L,
+          0x7d0f93a489fe88L, 0xb966a2e063706aL },
+        { 0xb6c228c4a30319L, 0x6868efeca6d674L, 0x0610a70057311aL,
+          0x0808112bad7f89L, 0x2a2462c1dd6181L, 0x52ed9feb58e88aL,
+          0xbbff16f33821a2L, 0xda53e9617f882aL }
+    },
+    {
+        { 0xb6ffca38c30e5dL, 0xa90f9915c905f5L, 0x72fb200d753e88L,
+          0xe509d4c7256c6aL, 0x369e552d866500L, 0xee4b7e033cf8aeL,
+          0x280d954efcf6ebL, 0x5b275d3d557f0eL },
+        { 0xeb17211b5cecf8L, 0xd6ad50fbdb2f8dL, 0x2478c7b35e04b7L,
+          0x97e7143ac73bd3L, 0x09d6ede4817e24L, 0x68fea712c405e1L,
+          0x34adbc905f67a1L, 0xd20ab7073edf99L }
+    },
+    {
+        { 0xe116a96569f191L, 0xb3f0bce4d6e29aL, 0x30b9e1af51dbabL,
+          0x1dd36f3346d276L, 0x83151030749a27L, 0x242f148ab47f70L,
+          0xe8a5bcf5585681L, 0x8b801845ed79baL },
+        { 0xa4042fd3894ad1L, 0x82f781d2b88bc6L, 0x2d34cacbe4c397L,
+          0x8731aeadd99c9fL, 0x0f95498ef1d382L, 0xcaba2e1dd0bbc9L,
+          0x78889e954064e8L, 0x8cd9c9761a8ab9L }
+    },
+},
+{
+    {
+        { 0xf31f53ffa0459eL, 0xf8742a1315cd6bL, 0xabe2f50ae64e97L,
+          0xbd787419b9da48L, 0x4521a3351e526eL, 0xfa05935e10ba45L,
+          0x5c947e1e8f903cL, 0x0aa47d15a754eeL },
+        { 0xb2849efd814825L, 0x9c2a5d25c9968dL, 0x24dbb2604e634cL,
+          0x33f3a4cdb38194L, 0xe04f609c8a2b6bL, 0xcaefd8eabbbfdbL,
+          0x683119a404498bL, 0x24ab7a98b21cbdL }
+    },
+    {
+        { 0x6f1326921fa2ddL, 0xd79e61cc10a4bcL, 0xac4b3ce4bd6d46L,
+          0x52459b6bd3f37bL, 0xce0f0a3a396966L, 0x050d1d5a1ed488L,
+          0x1b9c403e0b17faL, 0xee1abd004a2e66L },
+        { 0x97065c35cf3e3bL, 0x6513d5fbe33441L, 0xcd3463479047aeL,
+          0x45cbb1cfd22df1L, 0x7a173ae967b17cL, 0x75f5ba72223cdaL,
+          0xe3d12dbefe0a73L, 0x3b7f94dfd7adcfL }
+    },
+    {
+        { 0xd596a13f1e9b7dL, 0x04f5bdd6734e0cL, 0x18b694f8be163aL,
+          0x15620c7d959fa3L, 0x65fc2c553d2a3bL, 0xd44a364c4d36f2L,
+          0xc8b421f268ceabL, 0x564139abfe2bd4L },
+        { 0xb52461019d4633L, 0x5ab3f886346934L, 0x96691fe9819422L,
+          0xdfdec898b39b82L, 0x84b1c7997cfb27L, 0xe59a98d4d6d004L,
+          0x5e5d0c612c350fL, 0xb431220d415774L }
+    },
+    {
+        { 0x3d0ca736aae0a2L, 0x7b1991f48c2d8cL, 0x00ae8565cdae72L,
+          0xdbb6ca0bd55128L, 0x3c2ab2a45c82bfL, 0xea5a55979545caL,
+          0xeba9a26d5927d0L, 0xb52e40183257fcL },
+        { 0x55ed517ca9650aL, 0xbdaa081e3ebff2L, 0x8cf7ce49f8831bL,
+          0x1d0b5bd6e3b8d3L, 0xa314a9fd8fc869L, 0x07f2079b892babL,
+          0xb700dbfa0cc9d9L, 0x7105a086dc0a39L }
+    },
+    {
+        { 0x0c7e05d8c7d901L, 0xa7ff681af3182bL, 0xb88e3caf9a0d06L,
+          0xfe20a12c343b7fL, 0x9f0257703251f9L, 0xf225dedc40c5ebL,
+          0x50e0cecb208ea7L, 0x5b250f0e6eeb65L },
+        { 0x807a1534806b6eL, 0xded120afa94139L, 0x237ddc749366fbL,
+          0xdd3674e5a34bcbL, 0xef6cdff9c4a61dL, 0x036194bb2fb896L,
+          0x38659539528cd9L, 0x0723c596936a52L }
+    },
+    {
+        { 0x1f84cd5e17719dL, 0x545939bc73b394L, 0xefbf3c583e84e7L,
+          0x6cc46f1f77fd66L, 0xa629f591383ab8L, 0x9177ffacd35cd2L,
+          0x039187f9dd411bL, 0xa9cf1cf7b7eea8L },
+        { 0xa3b105aac47e5dL, 0xa755bead0a9da4L, 0x50cfbae73da15eL,
+          0x9456cbc60b628cL, 0x7ffc3629b7a910L, 0x30b5924cd6d6a4L,
+          0x198629f0b04ab6L, 0xc74609c624dea9L }
+    },
+    {
+        { 0x27d4d77af12fa6L, 0xdd8a216690aeb2L, 0xe48fc02fe24417L,
+          0x1970403720e17eL, 0x95013fdce37b42L, 0x06817d2de4bd9bL,
+          0xc5863e763d0ba2L, 0xa1bafc0a556f5dL },
+        { 0xf28ec7b410a78aL, 0x0dcac420a01a63L, 0xfcd3fa4b5bce11L,
+          0x054d7e5d278b89L, 0x5195db85ce49e3L, 0x4c0b1672c73d96L,
+          0xd94307720a1bdbL, 0x66fa8b359c77a7L }
+    },
+    {
+        { 0xb9e93aed7462feL, 0xbfe54b218dde4fL, 0xaabb5283dbb08eL,
+          0x8c367020e5fc45L, 0x35028888e69be3L, 0x6d2efc1c12a11dL,
+          0xfce5cebf265e30L, 0x58c8bb35742c7eL },
+        { 0x32e89dcccf7fa0L, 0xa811f33dd020a4L, 0xa10d6205129fe5L,
+          0x3841c88e4ed29bL, 0xf3303a9d8b1ea6L, 0xa9a0cad1781f58L,
+          0x4502b388f3ef0bL, 0x2b7587e74c6d35L }
+    },
+},
+{
+    {
+        { 0xc6eaea123ae7cdL, 0xa1884d473c0caaL, 0x901e76fef1ea88L,
+          0xdb9935ca14269dL, 0xe8b2486947f1deL, 0x4ad56f4a657588L,
+          0xe7680542913fb1L, 0x2abff5d37600daL },
+        { 0xa814813a81a797L, 0x63e76a446acb69L, 0xb1038394ab8277L,
+          0x587de349d8e759L, 0xdfaeb8dddf62dfL, 0x24fe1cf9239d49L,
+          0x7de7409e130d1cL, 0x3ecfef9581d070L }
+    },
+    {
+        { 0x8d177a0f87c72dL, 0xae7e5818c6d1deL, 0x0077b5f8cece85L,
+          0x382483832d2187L, 0x49d8b156db2bd2L, 0xe9e5513c8d85b9L,
+          0x63c410ce05c53fL, 0xceaf2fbd86f752L },
+        { 0x0b432fe93806c5L, 0x18eb15d3d06c75L, 0xcaad82612cfc02L,
+          0x581e0401e2d045L, 0xd573cb595edcfdL, 0xce71948dbc66e3L,
+          0xcf68721acc14eaL, 0xf68bea26cac4dcL }
+    },
+    {
+        { 0xd8576afcb74da2L, 0x8771c29c433f46L, 0x7315af6e2f5b8eL,
+          0xc195481ba33928L, 0xb77dcc22fb1f94L, 0xcb3e57ca610f75L,
+          0xeb2a92753907dfL, 0x916f14923eff95L },
+        { 0xbb378e4b6cd291L, 0xa2a5e2b2f13ce1L, 0xa8a0e60bcd00b0L,
+          0x5902741682b75aL, 0xa0882c93f65a77L, 0x2069f75c93cfffL,
+          0x1ede40570c0cb9L, 0x13840c90d526c4L }
+    },
+    {
+        { 0xdc2caaa03ced48L, 0x2079219a0315beL, 0xca493563b1f642L,
+          0x0202dc7b0665f2L, 0xe5d6bbdb7a5238L, 0x36fbd5e26eab32L,
+          0xb3988f1f5819b4L, 0x5b15dc84aa4d69L },
+        { 0xa52feed54e5c24L, 0x927471be91a797L, 0xd119bfdd57f677L,
+          0xde38f7b78e4c4fL, 0xa7af516b150bc3L, 0x403b21e26b76c2L,
+          0x589067d92300dcL, 0x04e406a066802aL }
+    },
+    {
+        { 0x28e7d09a9ca9bbL, 0xaa84fd5fccf4a0L, 0xdbe9fb8635b7edL,
+          0x9ede3f5d56fc7cL, 0xa4b5031b01cb29L, 0x584299d7f93703L,
+          0xbd28868b6fe825L, 0x1d385d48b9c2d9L },
+        { 0x6606f4a822be80L, 0xb5a0165626d0fdL, 0x9920a2014568adL,
+          0x7d430f41c6d174L, 0xc243e16e02e9e9L, 0x367f1d2a6bd649L,
+          0x693910071b8c36L, 0x2ede1314de2984L }
+    },
+    {
+        { 0xdc781875beec32L, 0x1fff0cca525ff4L, 0x6e86425676df34L,
+          0x2b4e8a63f638e1L, 0xc4991d29b1e59fL, 0x399d0011589717L,
+          0x406464ebe041cdL, 0x901cb3d9e65bb0L },
+        { 0xf5f4572fb42307L, 0xf81b3b0f1b7307L, 0x8fb695cf2094d1L,
+          0x7db4792db56f7bL, 0x36836d55a794e0L, 0x2da477b09bc879L,
+          0x1cdfadb1887c40L, 0x65dc6c2f2699b6L }
+    },
+    {
+        { 0x36f9f214737972L, 0x48f0c8b7a387b0L, 0xa156ed339a1d24L,
+          0x375293a0fed268L, 0xf679f487ff75cbL, 0xd15a00f1cc9e62L,
+          0x92a7dc722c3877L, 0xe9870636fb0ed4L },
+        { 0xfd8e59c16f5f3cL, 0x375732eaeeb48eL, 0x2dd9213ca1ab42L,
+          0xcb062099ffcceaL, 0xfc611f6b23edfdL, 0x271634999b060eL,
+          0xb938b5d820de8aL, 0x138f6e7eb49a32L }
+    },
+    {
+        { 0x7feda63e485f70L, 0x646380aeb27b2cL, 0xcf8fe32c4511c7L,
+          0x2c68e1eff9406aL, 0xa9f2fd920b6020L, 0x1c98fc63b3e465L,
+          0xb8dac3593e53aaL, 0x2fb47b6a750e96L },
+        { 0xea373ef1950bb3L, 0x81566944ac7aecL, 0x8d6b3c2b55b931L,
+          0x5d13f2db62ef7dL, 0x4647f2aab9182bL, 0x8f56c5a33bf07cL,
+          0xc5ab284b35a221L, 0x0747ab75a46a6bL }
+    },
+},
+{
+    {
+        { 0x5b9236c86b85c5L, 0x5967a0dc482448L, 0x397c9557df6ae0L,
+          0xf83ee1c5378f2bL, 0xf82df656e05dd1L, 0x4c424f619d7c8bL,
+          0xa612550a6d5f2aL, 0xfe8482a63c3ebfL },
+        { 0xcb8d4030142c82L, 0x08b06623679e6cL, 0x3ea51463eca5eeL,
+          0x089eb3b1370500L, 0xcbfb19c5a0d306L, 0x2f6858842a65bbL,
+          0xe3e1db5e51e119L, 0x2c150e7110895eL }
+    },
+    {
+        { 0xf323488f6d4c4cL, 0x5fc931f63b87e2L, 0x8867da035c759fL,
+          0xb6f1eff9746d4cL, 0x8a8172d990be0aL, 0x1113eee5c407b4L,
+          0xd80dacf378ed8aL, 0x99b57cf3fa7fd1L },
+        { 0xf5bb6d95176405L, 0x6b8963a92e83b5L, 0xac55b6b8a7ef8dL,
+          0xe73fa126c1fbf0L, 0xdb3756060148dfL, 0x72f1a98f3f1fbaL,
+          0x1f71d0aea550f2L, 0xc3ea4f09544a87L }
+    },
+    {
+        { 0x5b09da24322bf3L, 0x2a573d561264e1L, 0x93cb2e1803acc4L,
+          0x397b4fbe502fc6L, 0xddfb21239e0ebcL, 0xeccd8f5bbcbc57L,
+          0x49d3bed4663788L, 0x37192aa1218df9L },
+        { 0x8a05bc92ffa3c6L, 0xc38c28123ebf4dL, 0xc80d547fe343a8L,
+          0xa8d5a5b6c63516L, 0xc5d8ce18d8fa6bL, 0xeb5e87224a87c0L,
+          0x9806e9e75bfa23L, 0x11f0889689469aL }
+    },
+    {
+        { 0x81005f68e75666L, 0xb84d861d349505L, 0xe0832829f321eaL,
+          0xb751d7acfa33a1L, 0x793cf6f067c550L, 0x073a6b21027e56L,
+          0x53f40ee66a6012L, 0x70bfaa8c210fa9L },
+        { 0x1518e39e4b5998L, 0x8f0b53024b8d9cL, 0xd91c281afdf923L,
+          0xc5cfb2824e3f69L, 0x63a529a870871fL, 0x3d3e8872128dadL,
+          0xed658dccb30cceL, 0xf9373b9afb7baeL }
+    },
+    {
+        { 0x22d4dbede58ed2L, 0x4fefc1d03f8789L, 0x6b0a1fe344817fL,
+          0x96bef40a56b0b2L, 0x32684eeda249faL, 0x8298864524a91bL,
+          0xa958baf0c736a1L, 0xd033a7def2f3e5L },
+        { 0x5be3edc43f4d6aL, 0x326a39d9c89abbL, 0x90c44f755d997aL,
+          0x20581066e966c2L, 0xdbae4906548038L, 0xac7bc97d473fc1L,
+          0xb34488b4b2603aL, 0x27aea275e9bb98L }
+    },
+    {
+        { 0xa59e7281b88773L, 0xe2f05d40c241f6L, 0xa56229e4e75749L,
+          0x8f00c0b1b10705L, 0x855994619394d3L, 0x0d7e352aaf5e32L,
+          0x526c462787b8eaL, 0x89297d9a179d48L },
+        { 0xeff17e6ef43892L, 0x17091eb221f841L, 0x82f5eb34a4b848L,
+          0x6bea4778eb7b76L, 0x21f227176c536cL, 0xd9ef2c896c81bbL,
+          0x7c2754654bf4d3L, 0x9dd4662d7c28c8L }
+    },
+    {
+        { 0xe7fff0020e1a6bL, 0x26a35c6a08d467L, 0xb3c773d3248c91L,
+          0xa646615ba7d935L, 0xa91f453b0d26faL, 0xdcf9c3460c6d32L,
+          0x63668619e3e3dcL, 0x3012813f30f3e2L },
+        { 0xac6623dc2fc61aL, 0x108dc252bfd2ffL, 0xd7f5c0d231d6eaL,
+          0xa904f9aad1107eL, 0x46941c20d1e9c8L, 0xe5b6451c810cf2L,
+          0xaba8e674f511d1L, 0x5b4b94f08373feL }
+    },
+    {
+        { 0x002d4e2849c230L, 0x9bed0efd8ba391L, 0x745e0c0828e319L,
+          0xcd40907ca58de2L, 0x2c87ab11abaa4aL, 0x3c17a97db64391L,
+          0x36b184e86c72d2L, 0xb03d202485f7aaL },
+        { 0x2b6b79bde24abaL, 0xdcb78542325fb2L, 0xf5d1db966ebae2L,
+          0x35a4d5b903840aL, 0x7afeb09190e9daL, 0x1818f6a35c1792L,
+          0x90091fa3faa269L, 0xc4ccff62570235L }
+    },
+},
+{
+    {
+        { 0xa177619ec85940L, 0xfca24db7ef7eeeL, 0xb2450f37a90c11L,
+          0x29d256ddbf4f85L, 0x920c8d051316c3L, 0x2f7f7ba04474daL,
+          0x308117f2ec9a0bL, 0xd0a231ad0d2085L },
+        { 0xf3288fc7ab641dL, 0xc68bade9f4fa32L, 0x768f014bbf8253L,
+          0x5eff260c0a33f0L, 0xc71b4536bb93ceL, 0xa71d045680697fL,
+          0xb62444cce72bc3L, 0x11f03e8d1379f3L }
+    },
+    {
+        { 0x1f54789c16df92L, 0x874c642e3ed142L, 0x6699f60fa2a9f1L,
+          0xbd1b8d33fecfc1L, 0x59682d58a3d953L, 0xf17c0214a36b81L,
+          0xeb9621d181a666L, 0x7c2c3ab3cf1ad8L },
+        { 0xe6888c3e529f7cL, 0x197b66ab355315L, 0x63b558a83e31acL,
+          0x4aa7bc5891c68eL, 0xc17d989592e360L, 0xc750a291363666L,
+          0x0d534704909ac0L, 0xd6d02724594a10L }
+    },
+    {
+        { 0x35c541b3fbb635L, 0x50016d05982afaL, 0x58ebce496b0ca0L,
+          0xb940027577ea56L, 0xf29d305e38480fL, 0x43705b0ebd6a2cL,
+          0x0e4acdae90c639L, 0xbe94a29f56e05eL },
+        { 0xc61f4a030659adL, 0x39074adc402211L, 0xfe0d8d551b621dL,
+          0x2d02e8dd1d5222L, 0x05ece3c46c2683L, 0xf70705ac689d41L,
+          0xe3caf444d837bfL, 0xfda058475ba6d0L }
+    },
+    {
+        { 0x1098163cb7d458L, 0x12b645ff5ba834L, 0x70a318128af72cL,
+          0x5f4727ef32e5ddL, 0x7cbae1510a21b4L, 0xa80bf806785389L,
+          0x9827402b8f93b7L, 0xe385f8208349daL },
+        { 0x2d054619589f6eL, 0x6aa5b26e7c0191L, 0xe79ae12bd5574dL,
+          0x5d13f914148e61L, 0x7b2be0f13716ffL, 0x82b0fe680bb81fL,
+          0x697633c3e2569cL, 0x6c1f083873f8b3L }
+    },
+    {
+        { 0x6e26d850be1674L, 0xe4e47f6ab8044fL, 0xfdf46e882fc434L,
+          0x639ae2cc89cadcL, 0x2244a524b85bdcL, 0xb1e4790b7cf4eaL,
+          0x51dce037e0bb8fL, 0xdd143352716ceeL },
+        { 0x1c049b48e8841dL, 0x6bf26dcb97c621L, 0x21d6255ba01178L,
+          0x477258a8e4f0e4L, 0xf5e437e68f8ef1L, 0xd118fbc8b03e1eL,
+          0x3d6bc51e1c91b3L, 0xa259486d5b6907L }
+    },
+    {
+        { 0x4159cfc7b6f5dcL, 0x05a52b3493694aL, 0xeeb511c83b8883L,
+          0x19d79e42b06400L, 0x8e503a2738f37eL, 0xa30e5795a94ad9L,
+          0x3981c75262618dL, 0x06b6c692dcba19L },
+        { 0xd7242ee4d1b051L, 0x6274ccb3b350c4L, 0x66df0bbf540019L,
+          0x4d66be65ae12d5L, 0xcea29601049cbaL, 0x40473398df84b3L,
+          0x7d6c96b75a31c8L, 0xbb80159874174cL }
+    },
+    {
+        { 0xf0f7be059f1aa4L, 0x798f39adcff451L, 0x96763ff8014e1eL,
+          0x03987a809cc5ecL, 0x4919656893650aL, 0x92e8eef75e24dfL,
+          0x54e97cde89d639L, 0x8081d067682cc0L },
+        { 0xb9ef41aa8ceb71L, 0xb8173a4a4d7aaaL, 0x93d81b1c54ee10L,
+          0xabe180570a445aL, 0xac0ff9764d569dL, 0x86946b23e570beL,
+          0x8e11dd24180641L, 0x3d0b33c99f67dcL }
+    },
+    {
+        { 0x2c9637e48bf5a4L, 0x9fdec19ccaf112L, 0xe5cde9d5c42023L,
+          0x9869620878f0ccL, 0xcf970a21fe6ebaL, 0x1df5ec854e678bL,
+          0x4667f0128d00ddL, 0xfa7260db0b3fa8L },
+        { 0x6bd2895b34239bL, 0x04c8bc52d2a50dL, 0x14e55ef6cb23e2L,
+          0x6440c273a278d5L, 0xf4b12e32193046L, 0x46adf645dd4c08L,
+          0x70e29984656e8cL, 0xe7b36eae4acd44L }
+    },
+},
+{
+    {
+        { 0xea64a5716cf664L, 0x8497ee426fd357L, 0x44d94b4814e851L,
+          0xf4aac225a6a2cfL, 0x947b30980c301fL, 0xf390ba17865383L,
+          0x16c4fc6d1773d3L, 0x61b98146227220L },
+        { 0x07dd03a1dd0270L, 0x290ca820f160dfL, 0x8f2205444ba955L,
+          0x4e85e450b6f1b3L, 0xfd73ce9ad78089L, 0x67c12702f2cb0eL,
+          0xa7de0d7ee33a61L, 0x6a811cc6553261L }
+    },
+    {
+        { 0x5ef05742d0a427L, 0xe8d2e95220a341L, 0xdd28cbf8044886L,
+          0xdad7b4ba1aa58bL, 0xb28f3738ec901bL, 0x1841a935bbe3dbL,
+          0x8fd7cd1a075feeL, 0x93b603fc0d3cddL },
+        { 0xca54fd55edd859L, 0xa4cb05f64ed687L, 0x3138668ed1a3d7L,
+          0x1224fdaee32be5L, 0xf1f532bc80aeb3L, 0xa4f65d0e8d4d69L,
+          0xc697a015905fe5L, 0x514da7a6690ce4L }
+    },
+    {
+        { 0xc7b9af83de4a55L, 0xc79bad7b318d93L, 0x1808071f5b1c83L,
+          0x92112efb965b16L, 0x655ab387bb740aL, 0x53dbc8b384ff87L,
+          0xd153c2872dc6f2L, 0x2ec20e199c7819L },
+        { 0x65e46ea3b854b5L, 0x272d5aec711db5L, 0xfd1bb5326e19e8L,
+          0x33280b83dc0665L, 0x95b986eb8f1c4aL, 0xa671fc4a685c4aL,
+          0xa03cbd583bdbbfL, 0xd329402ab77544L }
+    },
+    {
+        { 0x40fa6518e62b35L, 0x3913b11f9e55a6L, 0x4e8089b5270a41L,
+          0x565f52a80d1886L, 0x93b5f05512749bL, 0x35c869c141c547L,
+          0x9a44a1af86717fL, 0x2b9984b9c2b2cbL },
+        { 0x61fb6074952322L, 0x2d4072f7af1464L, 0x9b2fa8c600eb30L,
+          0x6071fb7f10668eL, 0x27cc24d90634caL, 0x3875bc2471d32bL,
+          0x678590ba11210cL, 0x352b447fcc5a9aL }
+    },
+    {
+        { 0x795d5415fa3200L, 0xadaa557a92949fL, 0x42fff063cc88c4L,
+          0x26d683171b68a5L, 0x3286549e67ad8cL, 0x5bf636386396b2L,
+          0x41229b6e12c8eaL, 0x05320c9748952eL },
+        { 0xae36b63900b460L, 0x9354ff2f2b6affL, 0x10b810b065ee0cL,
+          0x4d6925fcc8bb38L, 0x31c03fd7a22f14L, 0x76b7f4457544e8L,
+          0x3a9123cc0eed26L, 0x77acd67e0cd1ccL }
+    },
+    {
+        { 0x2e9053007ec527L, 0x32388ef62937cfL, 0xa445389e229188L,
+          0xa44b68e33bcebeL, 0x5a8722e4c4e701L, 0xfd066e8cf07e41L,
+          0xa3c1a4f95fab62L, 0xb4d6a1be542f24L },
+        { 0xe6a92e4af6c9b5L, 0x9452484c83d61dL, 0x422b55b0062276L,
+          0x261973a5279688L, 0xde8be263999fb2L, 0x64e96287b029caL,
+          0xd8edfaa06897d4L, 0x408319c6955511L }
+    },
+    {
+        { 0xff6baed50a5632L, 0x922b7d05c5885aL, 0xdf0f3b31b45864L,
+          0x27e49c0c04340eL, 0x618c566122c447L, 0x7863a38eafee7eL,
+          0x7143affb828cb0L, 0x51fcf4cf9d054eL },
+        { 0xc4a4b3127f5e09L, 0x021f47a90be2bdL, 0x1a060197ab956dL,
+          0xe77fa1586ea86bL, 0x9ccde87d550ef3L, 0x7dee53a6532654L,
+          0x8b4f060e826387L, 0xda38637ad077b5L }
+    },
+    {
+        { 0xbc901b30e9fac8L, 0xfa082046fb2a2aL, 0x92f68ab5e04efcL,
+          0x184a30a9ac12d0L, 0x1aa11aab25d479L, 0x8bc5f4c0f03161L,
+          0x7e3a083cfc8817L, 0x84d9355597f93fL },
+        { 0xc014478239abc6L, 0xb226b098d37b04L, 0xb056942f575789L,
+          0x816b95aba745ebL, 0x2a49d39b98ddb6L, 0xc41ca26291af81L,
+          0xb3afe99ab26347L, 0x59c31bc604b638L }
+    },
+},
+{
+    {
+        { 0xa16a8b9c42befdL, 0x731c9c92052f00L, 0x1ad49b41f5dfa0L,
+          0x7a289e3bffce36L, 0x868fac00c79cf1L, 0x6d6d28486721abL,
+          0x590f928e726c94L, 0x0e802cb51f3841L },
+        { 0x6a6a57a0b694bcL, 0xb9bb0cd8120fb8L, 0xad96ac79c05826L,
+          0x294da8c7768df0L, 0xfe32311b56c6c6L, 0x291c2c6ae8d050L,
+          0x1c765e7e7db4c9L, 0xe058298d65f9f7L }
+    },
+    {
+        { 0x4bfa85b7e8d345L, 0xa04ef95de1dfc8L, 0xb5f7f21324ace3L,
+          0x4b350a1574b14aL, 0x11436bff8e5c8dL, 0x1c789f97642369L,
+          0xeb5e335fb623ceL, 0x9deacd2442d562L },
+        { 0x4ff989f531ee71L, 0x43e2c49aacb52aL, 0xa76319885bfadcL,
+          0x08b6d5cd0161a0L, 0x010e3fa541f197L, 0x83a589e3279a16L,
+          0xf0991376309f9bL, 0x07c093bf1cea10L }
+    },
+    {
+        { 0x1ce3f0f33d2192L, 0x07b559ac37ce73L, 0xaa2ad38207be27L,
+          0x84f053b7ed93deL, 0xbc5c7973b98a4bL, 0xc92346163aa9b9L,
+          0x807cc16231a10cL, 0x8ffdf57a061209L },
+        { 0xa9ca741497070fL, 0xf608ec9d113b3aL, 0x51327268d0384dL,
+          0x96686acf5ec307L, 0x437bbbd71c4665L, 0xdef09d57c379caL,
+          0xf8be033621747cL, 0x2775b378ae8047L }
+    },
+    {
+        { 0x4009798b2c4fc2L, 0x148d7d1203772eL, 0x9d9392df8423fbL,
+          0xa5bd72eaf8cef4L, 0x579d58d4380b53L, 0x2ff88f18c39d24L,
+          0x9ca2fbc5706466L, 0xb42987d1e56af2L },
+        { 0xcc2556e5d94ea8L, 0x4e5c2b35369d76L, 0x5de35742a94f9cL,
+          0x8d068c95cb4145L, 0x4d553ff51bfcbfL, 0x3ab71648a23fceL,
+          0xc9cb3a9d0fa7f3L, 0xf81209bed9ced1L }
+    },
+    {
+        { 0xde7356ee5b66f5L, 0x7b2bf1ae8a25e0L, 0x09a444a2c9b725L,
+          0xfd8a2f44906c55L, 0x409cc8082514f3L, 0x47e009928999a9L,
+          0x0a582a66a312f4L, 0xf7946f8f6723deL },
+        { 0xa55f6ba92d8affL, 0xb62c3c8a544b1cL, 0xa1d14115c16a94L,
+          0xc3783192ad5e71L, 0x13d784706b1dd6L, 0x99005f8ee7ff55L,
+          0xfb5ea3f8a1e7d8L, 0xdc7f53cb4cac39L }
+    },
+    {
+        { 0x482abaf36e3794L, 0xc23e9e5c74684fL, 0x4544cf6f1629beL,
+          0xd8a8ee52f40374L, 0x2eea87ff433bdbL, 0x489a99cae9990eL,
+          0xefc131e54b23b6L, 0x25fe6998600270L },
+        { 0x03d2d9ec059a7eL, 0xa6445b56979c3cL, 0x491a10c9bfbceaL,
+          0x15b5974e937af1L, 0x4be8002797c7fcL, 0xbed8a49fedcfeeL,
+          0x35751cea9e0691L, 0xe9a9fa39ef5982L }
+    },
+    {
+        { 0xeffeaca3065de7L, 0x841d544ac4d4e2L, 0x8144679caf199fL,
+          0x98cf4f9443967aL, 0x8cd57f4f33183cL, 0x390832ac1b15ebL,
+          0xc4b1feaa53b500L, 0xd762a10dff24b5L },
+        { 0xccd3eedb0ee2a9L, 0xa6dd4a9362d485L, 0xeb4ff26f1d047aL,
+          0xc0771fd23860fcL, 0xdbb4e394b64114L, 0x2ff3f244d29b29L,
+          0x9cac005387b365L, 0x05b7aa6de5994aL }
+    },
+    {
+        { 0x5e71752c03dd63L, 0xad10fe9bc74687L, 0x51a5b0c54c76abL,
+          0x763fd501f586d4L, 0xc7bd5ce816048bL, 0x8fc83d23f744dcL,
+          0x0561802109df9aL, 0x18fb01fccf0e43L },
+        { 0xe4606fc038ab23L, 0x5878f1fa664c98L, 0x3aedbbd5da7356L,
+          0x3c578f5516746aL, 0x259477f1a17210L, 0xc7a869d028248fL,
+          0x6517a6148cbf95L, 0xbc5f91d3d04d47L }
+    },
+},
+{
+    {
+        { 0x15fd9a9083ca53L, 0x1161da02697ca6L, 0xf516af356b676cL,
+          0x8a420d575eec13L, 0x72d67421a9526bL, 0x8d8c29e76b463fL,
+          0x38a4f588815627L, 0xf7e528be0650f9L },
+        { 0x2cfa78e382edcaL, 0x638d183c4ad83cL, 0x96d3b9de4a0119L,
+          0x5769ccba7c1101L, 0xc3b3b792b8d04aL, 0x96212f64951bdeL,
+          0xad7905a481161eL, 0x8fd676241c5edfL }
+    },
+    {
+        { 0xf7b063539d6cdeL, 0x69d0549115a84aL, 0x4a976c6cbd9fe4L,
+          0xc92953f950ff96L, 0x1d7f0fe654d127L, 0x7293870da0f75dL,
+          0x7bb3652cf2277fL, 0x64798c9834484fL },
+        { 0xb94d8bfac3a76cL, 0xf5721a97ff776bL, 0x23a6e9f2722e31L,
+          0xe9da9969a5c034L, 0xb9bbf83456ebc3L, 0x239f58a96956a4L,
+          0x8b75beb18b7f00L, 0x6c2b5b8a51cb97L }
+    },
+    {
+        { 0x78b1c627eb41f3L, 0x0638fcf17c4352L, 0x939edd80c5709cL,
+          0x0a8dfc3edc906cL, 0x3942f47efb01edL, 0x4c8275749986feL,
+          0x792545c4dffa57L, 0xeee68836c3ff26L },
+        { 0x824d08e12b1218L, 0x515a478902457fL, 0xc70cc9cbae55b3L,
+          0x1240737bcef9d4L, 0xf22e6162f9db7fL, 0x98c4f0291f8da2L,
+          0xa89219cafaaa67L, 0xf35fd87e7d27e2L }
+    },
+    {
+        { 0x19b0cd701b80d0L, 0x3d7e29df9aebd1L, 0xd39c9ca0477cbcL,
+          0xac0f6155ff0d3dL, 0x8a51993520fd01L, 0x508ff54b22d6fbL,
+          0x8786c47318d3abL, 0x4312c464a683f8L },
+        { 0x73b1d3995359f6L, 0x0d94fa5963011eL, 0x5723af29bfe83eL,
+          0xafa90016841df3L, 0x791e92ab7c498aL, 0xbc931ad7ea4253L,
+          0x438e016b783c06L, 0x1347db22ca662bL }
+    },
+    {
+        { 0x41df37dfbaa861L, 0x98ecb23329e4deL, 0xdaf1560507e018L,
+          0xa902269b088e32L, 0xad898a5e4cab2fL, 0xd84e9ed02c1e1bL,
+          0xc20a5d58488af3L, 0xc7165af6cc77c6L },
+        { 0x8526f3adeb7461L, 0x03577b14a2d332L, 0x28e469de4760b5L,
+          0x442c7f9b276266L, 0x90d5c77f9c90faL, 0x7aa87163e211bdL,
+          0x56d8ff05decfd6L, 0xa204b56ee23e6eL }
+    },
+    {
+        { 0x2e4374e4aceafcL, 0x978743b6fcd5e5L, 0xa0f6345c4855caL,
+          0x9bc7e4fe98074bL, 0x3835d57c33d08aL, 0xeec7c8b6f00566L,
+          0x71628a21acf55cL, 0x5da375097fb19eL },
+        { 0x6904a8e01a7125L, 0xad33c85e6e3780L, 0x1702928c19f94aL,
+          0xb424ff27c04b3dL, 0xb212e3919e2ba3L, 0x4cca8e8c9af4c9L,
+          0x98ab7aefd9bf0eL, 0x21d245d9799db5L }
+    },
+    {
+        { 0x6b034dcec08806L, 0xfd763f2b40f2d9L, 0x5e16de029cb906L,
+          0x02b70148a0e16aL, 0x463c8eee071e12L, 0x644728125ad509L,
+          0x9ee6f2ddc0e07aL, 0x188895c68d4d97L },
+        { 0x092fff3b27f971L, 0xb3c159fc9b7722L, 0xe27d8ff3cae42dL,
+          0xf8a5ed6e87071dL, 0x318388f607ebd2L, 0x924967b53486f1L,
+          0x77304947c46e1fL, 0xf279c60f21d196L }
+    },
+    {
+        { 0xef2bc0384f3201L, 0xf8750c71f94c51L, 0xbaa4f5a986ec65L,
+          0x6f8a5de2732a33L, 0x0f13d80299e365L, 0x2709530e85261fL,
+          0x097d922f527d56L, 0x4969687be1f3f8L },
+        { 0x9f3f5043e1708dL, 0xac67b874aa4be4L, 0x75fb042320a87eL,
+          0xa361ad36e2cad6L, 0xcb01470203e9f6L, 0xe3807b7c9b76c6L,
+          0xf086833b907c09L, 0xe9bed3c7e85a01L }
+    },
+},
+{
+    {
+        { 0xa7ea98991780c7L, 0x04e4eccd2476b6L, 0x0af9f58c494b68L,
+          0xe0f269fdee64fdL, 0x85a61f6021bd26L, 0xc265c35b5d284bL,
+          0x58755ea3775afdL, 0x617f1742ecf2c6L },
+        { 0x50109e25ec556aL, 0x235366bfd57e39L, 0x7b3c97644b6b2eL,
+          0xf7f9e82b2b7b9cL, 0xb6196ab0ec6409L, 0x88f1d160a20d9eL,
+          0xe3be3b4586f761L, 0x9983c26e26395dL }
+    },
+    {
+        { 0x1d7605c6909ee2L, 0xfc4d970995ec8aL, 0x2d82e9dcf2b361L,
+          0x07f0ef61225f55L, 0xa240c13aee9c55L, 0xd449d1e5627b54L,
+          0x07164a73a44575L, 0x61a15fdbd4bd71L },
+        { 0x30696b9d3a9fe4L, 0x68308c77e7e326L, 0x3ac222bce0b8c8L,
+          0x83ee319304db8eL, 0xeca503b5e5db0bL, 0x78a8dceb1c6539L,
+          0x4a8b05e2d256bcL, 0xa1c3cb8bd9fd57L }
+    },
+    {
+        { 0x5685531d95aa96L, 0xc6f11746bd51ffL, 0xb38308ac9c2343L,
+          0x52ee64a2921841L, 0x60809c478f3b01L, 0xe297a99ae403acL,
+          0x7edc18fcb09a5bL, 0x4808bcb81ac92aL },
+        { 0x3ec1bb234dc89aL, 0x1e8b42e4e39da5L, 0xde67d5ee526486L,
+          0x237654876f0684L, 0x0a583bd285a3ddL, 0x3d8b87dfe9b009L,
+          0x45bd7360413979L, 0xb5d5f9038a727fL }
+    },
+    {
+        { 0x7b8820f4bde3eeL, 0xea712ef24d5170L, 0x517f88cdf6ec7bL,
+          0xb15cecf983ea9aL, 0x9eeee4431a4592L, 0x786c784ebb013eL,
+          0x2f06cb31f4e15dL, 0x5603fd84f4fda1L },
+        { 0xf6790e99e1321fL, 0x274c66a74a4c09L, 0xa4b70b49a41a4eL,
+          0x7700bddada5157L, 0xe54a60d51be8dcL, 0xfaf92761a477e0L,
+          0x6661c72b027eacL, 0x50e2340280b917L }
+    },
+    {
+        { 0x635f40f96ec123L, 0x4a331337a766a4L, 0x9ce4416b935587L,
+          0xbb6e1f595d97e4L, 0x26147239d4197dL, 0xabd4478490e896L,
+          0xf6a1b2a8bba895L, 0x401fa405e27a45L },
+        { 0x7354ba50620900L, 0xc443a29385678bL, 0x48aba1053cf5faL,
+          0xd67e723bbe152dL, 0x4b858e02a63d68L, 0x174e1ee72be4eeL,
+          0xad0fbb39ab8d46L, 0xa0fdffbce17dd7L }
+    },
+    {
+        { 0xa1ea3259c46fd8L, 0xeca122e9fb96efL, 0xf9074a26767acdL,
+          0x9b004a22787082L, 0x389f8077f3ba8eL, 0x6463de90d5aabeL,
+          0xf30ceaab090585L, 0x71b31e85634ab8L },
+        { 0x0dee65caf02aedL, 0x506886e20ac252L, 0x0665f7886b8a59L,
+          0xb9b784df2bb328L, 0x46e443adc6b089L, 0x3d5de1966c27fdL,
+          0x0419265f0fde70L, 0xed946122b5c034L }
+    },
+    {
+        { 0x5a52ad213b0056L, 0x9fbeb92b909ee3L, 0xb42ba18bdaab08L,
+          0xec127c4ffc8a77L, 0xc6d2985fda906aL, 0x5355547994bbe7L,
+          0xa7470c09cdfd62L, 0x31a3971d2e675aL },
+        { 0x8d8311ccc8b356L, 0xabb0bf801b4372L, 0x33c1cad0294566L,
+          0xe2e649ce07b672L, 0x9084d882ae3284L, 0x7a90d4c1835ce2L,
+          0xb4d1cd5809d44cL, 0x78227149f0528fL }
+    },
+    {
+        { 0xca884cfbf5844bL, 0x9dd05c48524cf9L, 0xdbffa1936ba889L,
+          0xef94fdd29e7666L, 0x358f81b3eaf48fL, 0x96734d51530d56L,
+          0x378b2d14adf9e5L, 0x2f850464731f61L },
+        { 0xd6ae90599dcb83L, 0xa4f89e06199239L, 0x64052498f0f958L,
+          0x2866d99cc27707L, 0x64681a2f551c0fL, 0x2c7b0d04c37080L,
+          0x218925b00ac301L, 0x8d57fb354df895L }
+    },
+},
+{
+    {
+        { 0xdaebde0809c8d7L, 0x58c761c0e95ea1L, 0xbd9965000ae5e2L,
+          0x6117a85cd51acdL, 0xc4424d87c55d56L, 0xe9b1ddedfbeeafL,
+          0xda98bb50db4791L, 0xff3a5a63fca108L },
+        { 0x172fb8e5ccbea1L, 0x9fe12a7a9f6cc9L, 0x1de4b0b8967ce2L,
+          0xc1ab60f671dbc6L, 0x338385a5dedcdaL, 0x647a4203a043feL,
+          0xe9abc6428ebc89L, 0xc357ff003ba3c8L }
+    },
+    {
+        { 0x37061e7de39ebdL, 0xebb91352be567aL, 0xa9a6f6bd6bb80aL,
+          0x039345d99f0ba2L, 0x215494e98bbf47L, 0xf2cb7a4a2a1ccbL,
+          0xf51aa1037f67c9L, 0xd29c85c17fff71L },
+        { 0x8d4e4f24d30b87L, 0x20fdf5593a8309L, 0x9b9f9cf757075cL,
+          0x09142adcd70101L, 0x901d0ee766ca55L, 0x6a5d86a32e418bL,
+          0x550ad92d7fcaecL, 0x64e8818d91b26eL }
+    },
+    {
+        { 0x5cea0f747e5ee5L, 0x8ca1d31be99699L, 0x52db8465c136c7L,
+          0x8cecb3890e0d74L, 0xb8efe9dede2ad8L, 0x18d6ff8f17ade8L,
+          0xd2227352d66c20L, 0xc46593ef2005fdL },
+        { 0xe5ebe6ff7141e1L, 0xc968315e0126f2L, 0x95adc731cb91b6L,
+          0x753b54c38a6003L, 0xa6141254230a61L, 0x23ac6eb559feceL,
+          0x9816b603865c23L, 0x567014e543a570L }
+    },
+    {
+        { 0xd46091ddd2b71fL, 0x3999a5d97d24ffL, 0xce2a4f11ecff3cL,
+          0xab2687c581c6f0L, 0xa9fb2ebcba70b4L, 0x6fde35642093e1L,
+          0x00253ecaee724aL, 0xa08ce3c2b81bddL },
+        { 0xa251238935a2b3L, 0x8cae1d4584f750L, 0x011469e988a219L,
+          0x61f7ed35a6a50eL, 0xe13ebaa01fcebdL, 0x794b97631d8867L,
+          0xf25755ccda32e7L, 0x368a97b4564cd1L }
+    },
+    {
+        { 0x0d22224aa3397bL, 0x1dbb3e638066dbL, 0xfe0b5ee0ce8e32L,
+          0x09c17c87bab4dcL, 0x5cc65ddf188b64L, 0x74c4abf211b5faL,
+          0xdcc17b7ab0ba86L, 0xfbdf46fa535501L },
+        { 0x4775087aca569eL, 0x6575f9006a1718L, 0xb5c45a9b94de93L,
+          0x0fc80068497171L, 0x775d965489f7abL, 0x8775b58f5c0c89L,
+          0x05d4e201a06254L, 0x8cab349b6d73a5L }
+    },
+    {
+        { 0xca7816339465b0L, 0x3ef914814498fdL, 0x9ca1f346255c11L,
+          0x389fd15b7f38f1L, 0xdac2089354b8f3L, 0x82d07fca840a70L,
+          0xf53fd731dd483aL, 0xa6e4eae1590578L },
+        { 0x7bf65af3c01b77L, 0x27542f3a75c982L, 0xc5bd947716cfceL,
+          0xba5fe76884b9e7L, 0x39bae14d55725dL, 0x982f64efae0eabL,
+          0xcfae6627a5293aL, 0x22a25a1d60f464L }
+    },
+    {
+        { 0x74caecc7dd5e16L, 0x23678a2ce7bca3L, 0x467393257f1ba1L,
+          0x4eb9948a4c1697L, 0x5d400e8eaba18dL, 0x128d1c89807871L,
+          0x78f9627bff38a6L, 0xf80b813a39d4ccL },
+        { 0x8aeefa031d3aadL, 0x504219927db664L, 0x244fc694cb6383L,
+          0x319047772192a3L, 0xcc86075bbfb57bL, 0xbae3a134451511L,
+          0x16cf416f6174f0L, 0xb343cc0d376813L }
+    },
+    {
+        { 0x31ac9b9d1824b7L, 0x6282260ec8f61aL, 0xbbeb9f8c781765L,
+          0x06ab5c02d110daL, 0xd583e2247146b8L, 0x79a16084100d05L,
+          0x16dbbb4f0a5c95L, 0xfe2af1de331667L },
+        { 0x26f0364af8710eL, 0x1cb8c91eec08feL, 0x436bce61d95e9fL,
+          0xfe9050c57944a0L, 0x5f45acf07b626bL, 0x48dc93f9cf1276L,
+          0x4491371a05bfb7L, 0x51063044bcf785L }
+    },
+},
+{
+    {
+        { 0xac2e294ed0b3b6L, 0x5c5ade6671637bL, 0x2f289ce1140677L,
+          0xaf446e2754eb53L, 0x70911b720421adL, 0x4b73836e0b7556L,
+          0xcadf1042a97827L, 0x4824e498005bc6L },
+        { 0xb0eeccd937c28aL, 0x1ce061d0c3ee97L, 0xcb076319f33faaL,
+          0x9980bf4aea66dcL, 0x2bd0755d111d98L, 0x43feaf67fe4de0L,
+          0xe76fb80b077b2fL, 0x227dc9f5793b04L }
+    },
+    {
+        { 0xea24ae514f49baL, 0xbc39ea611436e7L, 0x9d7fed278485d8L,
+          0xb6ef00cdf8b131L, 0x0237b4bfdbc7afL, 0x08745b564ccd27L,
+          0xaf8595dafc5a76L, 0x43657af29f5500L },
+        { 0x300718348470f8L, 0x51f91fd640fd53L, 0x859c807be15512L,
+          0x7d1a474ab3e9c5L, 0x5d714d981553e5L, 0x07573436f62310L,
+          0xedc5be06b02a62L, 0x5a4b9b7ea47832L }
+    },
+    {
+        { 0x03e0a24e93dbb3L, 0x25841dccadc884L, 0xabc1a818d10ad5L,
+          0x207e38a2042dddL, 0x7fffbdbfeba8d8L, 0x74efebba3ec9b5L,
+          0x0bc39ca0b40a9fL, 0x69ee9c90267febL },
+        { 0xd402facbc62919L, 0xe9f8fc11cf53c6L, 0xe76fa5a7cc7d81L,
+          0x4f2d87696bb19dL, 0xd4fb7f9adc67c7L, 0x40621d596702dcL,
+          0x5b6a98e438f6c5L, 0xa7c64def1a1036L }
+    },
+    {
+        { 0x84c5e809a092c7L, 0x9e40e0a11c22b7L, 0x820a091d06c99bL,
+          0x45fdc77eecca8fL, 0xfe1b8a35794f16L, 0x31f7e5b4ce3d6dL,
+          0xfd5e01082c74c8L, 0xfdabf30c1f6f7dL },
+        { 0xbfa6017b9248a0L, 0xe898d30546b941L, 0x878c492207ff65L,
+          0xbf22e8db874e64L, 0x43fdb1b53a547eL, 0xb66deda5fbd464L,
+          0x59127a6c7ae1b5L, 0xa4636466a7515aL }
+    },
+    {
+        { 0x22c4e66de9ab2eL, 0xfaf60c20203c58L, 0xed2d7bf0d5c5edL,
+          0xdbc16fe4ca0f19L, 0x54e8ef6465b979L, 0xe2d64b1a310ef9L,
+          0xa0f2c953778636L, 0xf3b4aa4281883bL },
+        { 0x4ac9af09be6629L, 0xba455e11ca90c5L, 0x0147538856f492L,
+          0xc80db7eabd7840L, 0xb3526d96beb9cdL, 0x37657fb9d81503L,
+          0x8729a16193cec3L, 0xd9a93fbd69952aL }
+    },
+    {
+        { 0xfce017594f47c6L, 0x228da21e366d05L, 0x27ce0b2dc8baf3L,
+          0x8cc660b6b4a951L, 0xf678947384bb01L, 0xc629d7d44d980cL,
+          0x47980e4e85e81fL, 0xa2e636a1cd723eL },
+        { 0x6b6ebae77fb207L, 0x70179614c92891L, 0x5569541b4d279cL,
+          0xbb6b36a41758cbL, 0xecaa22227a8e30L, 0x8b6746ab470ad9L,
+          0x4c4601763e2d3dL, 0xe19c4edd3edaecL }
+    },
+    {
+        { 0x0b43fec34718c8L, 0x553c407f33499fL, 0x8272efb970d1dbL,
+          0x008c62ca8e8d1cL, 0xe4b79d763eec45L, 0x1fd4230f2d71a3L,
+          0x090fdafa368c36L, 0xf62c101fca7baaL },
+        { 0x1c9e6c8d2395b3L, 0x671ed6304c5513L, 0x577d933299a465L,
+          0x286890e63f9986L, 0xd92a95dbfc979cL, 0xcebd79d2b51019L,
+          0xe74d88b3d07251L, 0x8b6db73906f9adL }
+    },
+    {
+        { 0xc0c43db7b3d90cL, 0x85d154e4304a06L, 0xe8aceefaf2f38eL,
+          0x5e0429383d9459L, 0x65e5e32431afd1L, 0x9e5f050a900a65L,
+          0xcbaa1718a26671L, 0x33d0b249c93de7L },
+        { 0x3dcbf92d5b6680L, 0xc47e5ec20006f9L, 0xc9711299a51924L,
+          0x665d9b8cd0ed46L, 0xed2d63fa5fcab6L, 0xa817eb6cfbfc5aL,
+          0xb38169fb76eb76L, 0x8b93544f11160bL }
+    },
+},
+{
+    {
+        { 0x02eca52693bdcdL, 0xbbf09232ae01d6L, 0x0b0a2de8b44b3eL,
+          0xdb82449b250dffL, 0x0c42b866e1c530L, 0xcd226dca64c2c4L,
+          0xcfb2bb1f046b5fL, 0x97e2fae3fccb0dL },
+        { 0xdf9290745ed156L, 0x224dcb9f641229L, 0x2126abc5f1f67eL,
+          0xa7eed5ae9c8a6bL, 0x40abedc9857d9bL, 0x3f9c7f6de941c6L,
+          0x2158d42d725ddfL, 0xbdd10158c69543L }
+    },
+    {
+        { 0xa7dd24e8df2fbcL, 0x3adbcfd13d1aeeL, 0xf6a32d113b2177L,
+          0x89a72327a9a14cL, 0xe3aef43dc65df9L, 0xeaec3e3a64d74cL,
+          0x4d387d84fec33bL, 0xaba2a0521a2128L },
+        { 0x2382c226b85e30L, 0x4352d85cd2aad3L, 0xb0c6001d9772c4L,
+          0x7ed82635f3653fL, 0x3626a6f0300f47L, 0x23909de6ca7e4eL,
+          0xb43dd81c154141L, 0x9a49fad7e4bc68L }
+    },
+    {
+        { 0xa3661df2428f88L, 0xbe48b0256e0db2L, 0x3cd1871ce79aa9L,
+          0x90ab87123dddacL, 0x9c58fb971871a6L, 0xf031f7fa34910eL,
+          0xb501eea81060e4L, 0xdb668ba791224eL },
+        { 0x240bbcb6a705bcL, 0x7e76fbd2d1865eL, 0x6e2cd022513641L,
+          0xe6c522546365c9L, 0xe46a8b8a5a01fbL, 0x696fa7bb67618bL,
+          0x418b3b90db6792L, 0x7204acd7108b9cL }
+    },
+    {
+        { 0xb5a143b8456b45L, 0x8a3ab25f53b4d9L, 0xb112a58e13a570L,
+          0x613ca3281487d2L, 0x837d8233b1e7c9L, 0x592baded41e9d5L,
+          0xdc1893a5cd02f2L, 0x08795028972e23L },
+        { 0x7003c08cb76261L, 0x14bde9e332a5e0L, 0x14b2872cbbd78eL,
+          0x5594061de238e8L, 0xad12645067466cL, 0xa8d0e64f5e4952L,
+          0x5b44b82c7f8d06L, 0xb51bea8fb1b828L }
+    },
+    {
+        { 0xebad6853f0daccL, 0x5c31b8b1cbebbcL, 0x6746975fa5a2dcL,
+          0x2d9596531d9faaL, 0x343797d00fc0e4L, 0x38d821c55fe01bL,
+          0x0bfdb247323aa0L, 0x42613c4f962a8eL },
+        { 0x599a211e134bc0L, 0x75fa4a147a7084L, 0x6e719487f734b5L,
+          0xd5ced2d6dfca2bL, 0x9fa0fdc8aeabd2L, 0x5e6b03f12361daL,
+          0xad23d315859fcfL, 0x3120ef125a5fc8L }
+    },
+    {
+        { 0x990ef628e9f638L, 0xfdaa240626a60cL, 0x4a3de202abddabL,
+          0xd5d10b7d8872b2L, 0xa01b7301ea5880L, 0x481697fa81b9d8L,
+          0x29841533471ed8L, 0xefd73f8292d37cL },
+        { 0xdda76269994bebL, 0xa0377036a4f865L, 0xda992ece5b47d5L,
+          0x912a427e53edbaL, 0x64675989264e45L, 0xd3b68c3af71222L,
+          0x9d3436c6dedc5fL, 0x1e027af076b2adL }
+    },
+    {
+        { 0xd56fca14382f4aL, 0x83712a48966b7bL, 0xd6b2cf5a4c9ddbL,
+          0xa66be29f602875L, 0x70e4266894f3d0L, 0x007d220b3195caL,
+          0xba38d8f82c74d4L, 0xdccc5fcd975cbdL },
+        { 0x03e1610c88b38bL, 0xeb9f9a152e0d8dL, 0x6a57ecab646eb7L,
+          0x161641fc76b6c1L, 0xf9025adbd2e12bL, 0x87c74db5c0e26dL,
+          0xed5cb51bfeca74L, 0x603dfb6e34a08cL }
+    },
+    {
+        { 0xc4be728cb03307L, 0xde34c0ec2741ccL, 0xe01db05a74eb17L,
+          0x1bfce0c8905e4bL, 0xb18830ad1b1826L, 0xcacbb41e87bbfbL,
+          0x8696842d2f1a79L, 0xa80e5fb08c83eaL },
+        { 0xe48f1633f1439cL, 0xc1d4108cd6987bL, 0x05705c4b751814L,
+          0xa9bffd0c1c622dL, 0x23de4af46cd053L, 0xf782f5e39457c3L,
+          0x815276b5e5d243L, 0x31320416161ae3L }
+    },
+},
+{
+    {
+        { 0x245966177f2542L, 0x203be7e8372b25L, 0xc7c9426ee2007bL,
+          0xc5641380621799L, 0xda56589c28c3ceL, 0x13e8a7c7afc1e3L,
+          0xdba81e9e352082L, 0xf43054904435c7L },
+        { 0x4d26533691de4aL, 0x364408cfb777abL, 0xccdfb43eae7f88L,
+          0xbc40f44a525b11L, 0x8e112a53c60627L, 0x7f7c581e17e696L,
+          0x0fd78781ea774aL, 0xd09e6320b1f582L }
+    },
+    {
+        { 0x44390bd70aab15L, 0x41112bc889c3f2L, 0x6b02894d685349L,
+          0x71030015584dfeL, 0x373cb1b1ba7887L, 0x53d286c2a017c7L,
+          0x2ed03883c81fdcL, 0x3bfc5e3fbcc6fcL },
+        { 0xd38ac6ffd6418dL, 0xc667e96bfad89eL, 0x46f4f77eab4d66L,
+          0x194c04f0911293L, 0x0fd09cf68c48d5L, 0x6f5b05563cf7f4L,
+          0x0c0a8c4acd562fL, 0x94c1d8336d965dL }
+    },
+    {
+        { 0x94fc8f0caa127aL, 0xc762d5dd803690L, 0x8bfdfd11ebf0d3L,
+          0xa98cdf248eac50L, 0x3d7365d8b5ff10L, 0x20dc29bc65b4deL,
+          0x62ac28e8ec7c68L, 0x7f5a13290372d2L },
+        { 0xf3d8a253246658L, 0xa4bebd39ac202aL, 0x078ede75cc1697L,
+          0x5525800c8fc022L, 0x302a8025fae77bL, 0x018013957917b6L,
+          0x7c8806d864bf55L, 0x4e2d87812f06f1L }
+    },
+    {
+        { 0x8d351183d66e88L, 0xfb861a1a91d02aL, 0x8c27c2a7850e5fL,
+          0x9fd6399a5496f6L, 0x52152ae8080049L, 0x600e2fffd1c2dcL,
+          0xc75902affe8b2eL, 0x5c4d2cce03b175L },
+        { 0x8ad7c424f57e78L, 0x77cf6061736f87L, 0x2876012f85038aL,
+          0xff328451b97b95L, 0x3cc6dd5392dfc8L, 0x72f1363a6f5075L,
+          0x028ec4471de894L, 0x7030f2f6f45a86L }
+    },
+    {
+        { 0x66400f59695817L, 0xeda0a7df20ea36L, 0x855be51d394992L,
+          0x2d082c18336f62L, 0x30944ddf28c868L, 0xfb5f8530dc86d0L,
+          0x9562ae5564a0bdL, 0x1f7ea12b6b9b51L },
+        { 0x5bd74e0d0a7148L, 0x6c8247fb91e572L, 0x699aba547da498L,
+          0xed825811f7c814L, 0x434674b62057b9L, 0x8b4df5e15c15b4L,
+          0x2a97da1b110081L, 0x2a96b0c4c417feL }
+    },
+    {
+        { 0x4f75dfc237639dL, 0xe5ad6bc1db7029L, 0xd43e06eb3d28f7L,
+          0x89f3bb5e447989L, 0xc426a2c01a1a6eL, 0x33ea71c315878fL,
+          0x8a7784ab1b5705L, 0xa59e86e77ca811L },
+        { 0xddb133c36ae155L, 0x49f1d4c0d51b42L, 0x55080829d05519L,
+          0x20e23be5291816L, 0x35047ec67181ecL, 0x6237dc47aad091L,
+          0xa1d3ce1e2e25a2L, 0x1de05220d3db4cL }
+    },
+    {
+        { 0xe9a5e19d9fd423L, 0x0c2c3d09801e43L, 0x043c2dd28df2daL,
+          0x4eecab4e1ad12aL, 0x97e17979615aa5L, 0xe57b879ca7bb5eL,
+          0xa2a903ccc92619L, 0x5cef370aa56e93L },
+        { 0xbef29fa7f3232cL, 0x1cf35ed2b7ad5cL, 0x35c48933b6077aL,
+          0xe0651487a1d47dL, 0xedb4673ce14572L, 0xdc9e98c0b17629L,
+          0xef98ebe9a02a5cL, 0x1f772e311d03c0L }
+    },
+    {
+        { 0xcbdbdcd4608f72L, 0xb4352235a13c6fL, 0xa6497f64bb3c21L,
+          0x3af238312c15c9L, 0xfbbf4b36322d11L, 0x520a5c6c641775L,
+          0x18cd967e81e0e1L, 0x980b2c63de3871L },
+        { 0xfa9db619ae44a2L, 0x0281dd2176bc56L, 0xfd037118a7f817L,
+          0x9c485454129b30L, 0xb439648039626dL, 0x355050ee4ada6bL,
+          0xc9c16d67f5d98cL, 0xf53ccc318c4d5eL }
+    },
+},
+{
+    {
+        { 0x50ae9423ffb20bL, 0xa6c0b426865eb4L, 0x4677f7d09930f1L,
+          0x742e0b64a16427L, 0x521d18ef976f9aL, 0x43ac9cfa454749L,
+          0xda3a91dc51f50dL, 0xf657029ad6f954L },
+        { 0xfe5f0646b4f99aL, 0xd92a5d963ad4ceL, 0xfcb55092e0e081L,
+          0xadc85ab8d8a858L, 0x8e9b9660632f0fL, 0xe7a4f168d7216dL,
+          0x00a4cc559c3b99L, 0xed6d0bdba09dc1L }
+    },
+    {
+        { 0x7236d141621bebL, 0x1751fd4bc7ca95L, 0xaa619d12f5319cL,
+          0xfc2b15b4e9316fL, 0x2d1a9069fd4d33L, 0x28c3bac8ced829L,
+          0xf2efab51dd998fL, 0x2c133303b149edL },
+        { 0x65237c9f601ac6L, 0xb54dd6507d6a45L, 0xa1ce391fb1a4cfL,
+          0x2957533115f67eL, 0x6456da8465279bL, 0x02890aaa993e02L,
+          0x6891853b7175e4L, 0x3fda2030f3e59bL }
+    },
+    {
+        { 0xe99fe12d8c6e0bL, 0x7cb07ff5341c56L, 0xc292c7bdf77b24L,
+          0xf52dfd0ca29906L, 0x4a6aa26772f02cL, 0x26f7684e1bbd09L,
+          0xec56b2bee7c2a8L, 0x67709e6ad4a312L },
+        { 0x99c57b2c570263L, 0xeb0100b2faafaeL, 0x980d5d1ff25ecaL,
+          0xace35e682cf936L, 0x5a82ce544679edL, 0x5c76a41074b81eL,
+          0xf36fa43a00abb1L, 0x064281904ffb2dL }
+    },
+    {
+        { 0x68f6bc804bdd28L, 0xc311d96b5dc7adL, 0xff0d646ed32e45L,
+          0xaf3cdc6e0f712dL, 0xd4508e9d483861L, 0xb624be50e1c277L,
+          0xc510275c5dd841L, 0x451c5c3298dc02L },
+        { 0xf87d479dd34d6bL, 0xda7f293dd06a38L, 0x575e129b699e9fL,
+          0x79e5fb2215b2ccL, 0xd280028657e690L, 0x7fecd09e702a71L,
+          0x85160abfa13677L, 0x5de3427ce65f64L }
+    },
+    {
+        { 0x84e4bf6e8fff38L, 0x16f3725b358b1cL, 0x360371c3b472a5L,
+          0xe64c06152f217aL, 0x8e673790501241L, 0x88e81d6ab2dd96L,
+          0xf3e218a1385604L, 0x9736cafe84184dL },
+        { 0xb55a043dbb93a3L, 0x335088f9301088L, 0xcea7a2db2a4959L,
+          0x48e5d4ab882c33L, 0x114f09bad46179L, 0x4416467b446576L,
+          0x01cb23e34c6c2fL, 0xddebf04a02db8aL }
+    },
+    {
+        { 0x36d60cc9bde8a1L, 0x20fd2f2676e4adL, 0xebdcfb78936581L,
+          0x245d0d5dbfc2c3L, 0x104c62ca9f82e5L, 0x7387457d654d9bL,
+          0xe966777ae7f10eL, 0xefeb16f1d8e582L },
+        { 0x4faf4f170364b5L, 0x0e1ab58d612472L, 0x11bbfe7fed6085L,
+          0xb360a14a59a09aL, 0x61d96e9722fdb6L, 0x16a12f194068bdL,
+          0x225bf07f73c2beL, 0x1e64665c8bd24eL }
+    },
+    {
+        { 0x27a478a3698c75L, 0x778ccd36202aa2L, 0x0149c638d87f1fL,
+          0xa660e5f784edaeL, 0xe0d4d2f82adfa8L, 0xf512dd61ba1f9dL,
+          0x90cfed96245c58L, 0x6c3a54818b53ddL },
+        { 0x833f70cbdc094fL, 0xa5f26f5b1514e7L, 0x93e7cf51c8cf13L,
+          0x1436601186ec43L, 0x81924ace78170aL, 0xcc880a08694368L,
+          0x2dfa9550b62cbbL, 0x0bc6aa496b4a2cL }
+    },
+    {
+        { 0x5157a7e3561aa2L, 0x525c5008645c1eL, 0x22feb4ece7cbb3L,
+          0x36d0d25c89a58bL, 0x43131f7c9bde9cL, 0x74afdda881f731L,
+          0x99ab87c7c8e36aL, 0xf07a476c1d4fb2L },
+        { 0x1b82056bebc606L, 0x95a1e5afcf089fL, 0xc5bccfa2b55d5cL,
+          0x8fbc18e00eb0b1L, 0x93a06fe9efb483L, 0xcafd7252d74c57L,
+          0xc7518f03de4350L, 0x9a719bfc6fd762L }
+    },
+},
+{
+    {
+        { 0x5ee0d832362087L, 0x7f2c0d70b167e8L, 0xb7327895e0e865L,
+          0xef5b2e898c4e65L, 0x222797d8fe9cc1L, 0xfe6d73e82d1e15L,
+          0xc7c0e9cf62dc4bL, 0x962acfe937cedaL },
+        { 0xd763711c1e85c7L, 0x8f2dbbc2836978L, 0xbadc0558c44e98L,
+          0xed63eaba3e93f8L, 0x807e85741b55c7L, 0xd51ae5e6d1207bL,
+          0xa0ef9a639d541bL, 0x58855f9a0c56a5L }
+    },
+    {
+        { 0x7d88eaa213091dL, 0xcbdfee745b6a0dL, 0x826a0124f5e077L,
+          0xb04fc1390f1e4cL, 0x1961ac3aea69aaL, 0x3afb719d5bb63eL,
+          0x2a378374ac7e5cL, 0x78efcc1c50ca45L },
+        { 0x346e8f0b8abdefL, 0x27e3dbd88095d0L, 0x56d3379ffc6c22L,
+          0x67d416cfa4b291L, 0xc3baaf63b1b373L, 0x0184e1fdf73baeL,
+          0x38ae8f79167528L, 0x7329d4c35d6297L }
+    },
+    {
+        { 0x45d2ac9f568c52L, 0x51348149808593L, 0x0c92d8331b7ed8L,
+          0x921327a0876ecdL, 0xf752d75052736aL, 0x7b56487bc6b837L,
+          0x6b1a320a23b4ccL, 0x1983937ec0d665L },
+        { 0x2c3017c08554abL, 0x40ad955366e87fL, 0x88c4edf8ed7f02L,
+          0x64a7db13cc5e6dL, 0x5ac91fa2dc978bL, 0x016a20d925d2a2L,
+          0x3604dfeabb57b4L, 0xc3683ecd7e2e85L }
+    },
+    {
+        { 0xc47150a4c0c6d0L, 0x30af45ee22adcfL, 0x39b5acb022ea4bL,
+          0xfbe318577203b5L, 0xe5aaa346fd9b59L, 0x0062c90dd1c8dcL,
+          0xcf113f354049acL, 0xd8fba4d63a31b5L },
+        { 0x73b54881056a69L, 0x3be6cbcd780bdaL, 0x5776ec230ba2b9L,
+          0xbe883cf8e8d6f7L, 0x64efe945c2be6fL, 0x064f704f1ade8dL,
+          0x41cfd17743110eL, 0xaac94114c20abeL }
+    },
+    {
+        { 0x91f9192f1c1468L, 0x8176e744563e13L, 0xa48b5f90bda15dL,
+          0x2a085aeda42af6L, 0xfd38ab2425c018L, 0x2884ba408abafbL,
+          0x356f318cbd091dL, 0x454e450817871bL },
+        { 0xe080e818ada531L, 0xa40f1eb3152ba8L, 0x051049f0c38eb1L,
+          0x37e4bb3bd45003L, 0x6d0980454a01e5L, 0x6de932feeb824aL,
+          0xccdef37dc93481L, 0x8633e0793a05e8L }
+    },
+    {
+        { 0xbe94256034675cL, 0x376c01d08db789L, 0x8707ee79af1b6bL,
+          0x633b3ef11bfbacL, 0x694f33fd06db60L, 0x2a68bfcbb13407L,
+          0x1c860c9da27c3aL, 0xbca16ded701ac3L },
+        { 0x2b76cfac59ffd0L, 0xf9a116554d718dL, 0xf86a1db67f0878L,
+          0xe313e05af34e85L, 0xa1888113343159L, 0xdbe4c3f0bb7ed1L,
+          0x73b67e80c732bcL, 0xa4e1c87e74110eL }
+    },
+    {
+        { 0xce1106b5c6770cL, 0x422c70b5c0bcb7L, 0x32a39908195e7fL,
+          0xa24968d1ccd4aaL, 0x8f08ecf720e557L, 0x5da10a454bcc81L,
+          0x9d3c73b6cd846eL, 0xaeb12c7368d065L },
+        { 0x2110859cf9fd1bL, 0xd2a4801ee2bd6dL, 0x376e556e9466acL,
+          0x767803b3b5aa35L, 0x343f842b8a89baL, 0x3263cc16726bbfL,
+          0x26caf1725871b0L, 0xef66ad641b8578L }
+    },
+    {
+        { 0xc9f2249638068cL, 0x96d282c1ccf9afL, 0x71df30c69b435aL,
+          0x88c943acb9d5c9L, 0xbf98ef12a8f378L, 0xffc1824114c6ffL,
+          0xda3ad2cd52e8c7L, 0xf1222bc1afcb59L },
+        { 0x459e94b0ee334aL, 0xd4477b8421933aL, 0x60fb7b0a1e401eL,
+          0xfde6e820d1e330L, 0xcecfe9b3233fdeL, 0x09ec4662e93523L,
+          0xa5ba64930775b9L, 0xcc397e5adf80f2L }
+    },
+},
+{
+    {
+        { 0x2fe182d4ddc8a8L, 0x88d6e79ac056bfL, 0xc3ff2d10e41e4eL,
+          0x32ec7f92c3679fL, 0x3561f094e61051L, 0x4553f5a6c6250aL,
+          0x2b765efdd25c5bL, 0xe3a40a26a1cd7fL },
+        { 0xb27309b5d821ddL, 0x950fb8dc2c17caL, 0xfeed0158fb0d4cL,
+          0x762c479f550179L, 0x306cf44e095840L, 0x84b413ad379e66L,
+          0xd6e5d5abb2e4f1L, 0x8bc12b794b085dL }
+    },
+    {
+        { 0xc0d4cb804b5532L, 0x7a31525b9940a6L, 0x010e7dd68c69d1L,
+          0xd81f29d2a18c35L, 0x08ae7703f11e73L, 0x5358f876e55106L,
+          0x299e8cac960ef5L, 0x89a6fb4acfc8dcL },
+        { 0x5996a406dc7d4aL, 0x21e5112e51b96eL, 0x95b8c3d09a202bL,
+          0x306ab0fd441f1fL, 0x2834fed98d4245L, 0xc29c387d0abbdeL,
+          0xf6a9bf1b805c15L, 0x602f4f8c4e458dL }
+    },
+    {
+        { 0xf041486e5a893aL, 0x53b891d8934327L, 0x11e000d4000758L,
+          0xa4ccde8662bad9L, 0xe34d3edb9a1b64L, 0x72d967584e7a6dL,
+          0x773da2f6627be4L, 0xa11c946e835ae3L },
+        { 0x02e8203650bc15L, 0x2d35936e58b78dL, 0xe9cfbe8f21a3ccL,
+          0x55ad8311049222L, 0xbf99de438fff47L, 0xebbfd803831db5L,
+          0xe990636af2af42L, 0xc26ae52b7f5a0eL }
+    },
+    {
+        { 0xb5d85b1fa8f846L, 0x4166489b3b1455L, 0x768260dd36a305L,
+          0xc6a82354ff5645L, 0xd241cd8d6e93e5L, 0xeed9aa1a406e74L,
+          0x9e96ab05f600d9L, 0xa26b8b56eca2a1L },
+        { 0x78321cfd705aefL, 0xc4fb6b3c0161ecL, 0xdc324415199cf1L,
+          0x33627d0d0a5067L, 0x13490cb15143eeL, 0x77e0ede85b4f44L,
+          0x904f12e394b165L, 0x90f50f5efab32dL }
+    },
+    {
+        { 0x4aa0a16bc2de96L, 0x172596aaa9c12bL, 0xd512e1e60e8a29L,
+          0x77d35c1f637e83L, 0xbb0d141d2aae0bL, 0x8a878a58c03738L,
+          0x6d24c01ab0e525L, 0xb7d3136f760887L },
+        { 0xdbc3f8f3f91b7cL, 0xe7b4bcaa8722c0L, 0x3286a91da0ae65L,
+          0x8372274225b084L, 0x5884cd5ae1886cL, 0xb4e63ef3a23cf7L,
+          0xfe5f202f2dd0daL, 0x951fac9653916cL }
+    },
+    {
+        { 0x05e2e8f854fa4eL, 0xf411f941edaf10L, 0x26cc562a0a928dL,
+          0x78fd34e4abce65L, 0x1d8760998a32e2L, 0x85dc76f4c37518L,
+          0xdcaeef500e8021L, 0x7fcb2f84e9b2a5L },
+        { 0x9eba91ef382c06L, 0x2052e8524cae53L, 0x617336ef5c1519L,
+          0xf1546d5b4e632bL, 0xa9edc81d7b8ffdL, 0xdb2914f29ab68cL,
+          0xe805070debbabaL, 0x775e53bc3b719eL }
+    },
+    {
+        { 0xa40e294065256aL, 0x9f113868fb031aL, 0xac03af8059667cL,
+          0x432eb3a0475f58L, 0x22332bf01faad0L, 0xc8132e9bc57a11L,
+          0x27d5a173bc3f8bL, 0x5471fc6930bf3eL },
+        { 0xba28bc0e6bff40L, 0x198d57e555e564L, 0x13ce8319c65b8fL,
+          0xb0a5c9d5681b51L, 0x467588bdeb9e11L, 0xf1891a7bb4250bL,
+          0x10b938bd12b433L, 0x0b8c80224dcda4L }
+    },
+    {
+        { 0xc428703cf332d3L, 0x9d0053cf2a5b98L, 0x4e4c6207838a15L,
+          0x2e92919fbf8a43L, 0x39ad52421cd9a5L, 0x584ed6c1561588L,
+          0x20af30517a95c8L, 0xa223077b70e1c8L },
+        { 0x679cfea2fa4871L, 0x54f2a46ac633c7L, 0x60306514cdc5f1L,
+          0xc4facda75a1dc7L, 0x710a2882d07d19L, 0xd55864e6b44992L,
+          0x44d4b6c454c5b2L, 0x2855d2872f9981L }
+    },
+},
+{
+    {
+        { 0x4071b3ec7b0674L, 0x800eb14f8794d5L, 0x70573afbe6783eL,
+          0xafaa4407785901L, 0x112d2a1405f32cL, 0x3761a52169b3e2L,
+          0xe168b31842a366L, 0x5bc322f9bf4734L },
+        { 0x36ef240976c4a0L, 0x066f3d6fea4e64L, 0x0e954bda989e57L,
+          0xe36ef5ef9466e4L, 0x6bb615abeb9226L, 0x5571e5f3d5a2caL,
+          0xa86efe24897a86L, 0xed7e9cf28a9f77L }
+    },
+    {
+        { 0xdf10c971f82c68L, 0x796ba1e3b597e6L, 0x1ac77ece718cbfL,
+          0xc8175bb410eac8L, 0x0cdf9a1bc555efL, 0x6b889f17524e05L,
+          0x6bf1e61ae26d82L, 0xb3f6ad5d2e97d9L },
+        { 0x94dcff9f226487L, 0x60e6356be03ddeL, 0xda1f93b6a3dd7dL,
+          0xf1be72179ca90cL, 0x05ed3131e6bce5L, 0xcf50908d48af3eL,
+          0x3b0e85c61e554fL, 0xfe7e35ba2778d3L }
+    },
+    {
+        { 0x42c503275ac5a9L, 0xa66a66dda062c2L, 0xa4f4f82caa7023L,
+          0x489d47664b4f86L, 0x10b108897311adL, 0x55dd637177b2ecL,
+          0xa5ccff09a267b1L, 0xf07690bff327b0L },
+        { 0x39162ed2250cd2L, 0x1426de08b255f1L, 0xf227afd1bdd731L,
+          0x78f8a36fa4c844L, 0x267a211157379cL, 0x3f05f92cc04acbL,
+          0x374496cfc69caeL, 0xbf2c5d016ebfecL }
+    },
+    {
+        { 0x605418bd0518d1L, 0x3237f809e1cbc6L, 0x37a7005286c019L,
+          0xf1fb0e0b15af0bL, 0xfc3b97caa853c0L, 0x1f48bd0e6beba2L,
+          0x8e5d7c5e6a72f1L, 0x575e66d26ebf0cL },
+        { 0x099477662eae3dL, 0x53f074f96c9c65L, 0x6cfbfdbb81badeL,
+          0x98b4efe3fed7d1L, 0xdaa112338c3382L, 0xdf88b7347b8ec6L,
+          0x9b0fe4b9504a4fL, 0x2e7df4cf30c1c3L }
+    },
+    {
+        { 0x25380cb2fc1833L, 0xb8e248c18d62deL, 0x91c8f59d82f9dbL,
+          0x5ec2b202444750L, 0x3f3a1f766b6f74L, 0x0180aa9dd7d14dL,
+          0xd0a342d2956b9cL, 0x26e910e7139873L },
+        { 0x2261dc4139e23dL, 0x7edb181b8343ddL, 0xfcf1073b4038ddL,
+          0x88870efa3bfea3L, 0x4e98ba964a263eL, 0x3c6e5dc70811f5L,
+          0x17d28f5f86055dL, 0xca9c27666e4199L }
+    },
+    {
+        { 0x0b2d8bd964ef8cL, 0x5a99b8588e2ba6L, 0x9e927b204498ceL,
+          0x9ff20c5756eb25L, 0x97cc27b3f27736L, 0xf32dd6d4729583L,
+          0xbdc26580381a94L, 0x70fef15ef2c06fL },
+        { 0x50a619149252ccL, 0x9eb4a14236b4b9L, 0x9b1b2158e00f78L,
+          0x27add366ea9c23L, 0xef61763c3a8e79L, 0xed4542fd82ce56L,
+          0xa8737e70caed75L, 0xeca0ac2d452d76L }
+    },
+    {
+        { 0x20c07793d082d0L, 0x6e3ce64c9e9f3bL, 0xb3a4dce75a195fL,
+          0x3a3c305bdd9f24L, 0xe2545c88688942L, 0xa463c82080f32bL,
+          0x442974842686b8L, 0xf50e20d7213866L },
+        { 0x265ac523826e74L, 0x26fba57228e8ecL, 0x8a1e1dbe6b3ed8L,
+          0x7c7b278f0fe65aL, 0x9a6df23c395234L, 0x99562060b0f114L,
+          0x440c8c4ef90837L, 0x21ad22a3645f65L }
+    },
+    {
+        { 0x1e023a6edd31b2L, 0xf76d1459ff8668L, 0x970705617b45c8L,
+          0x06120781e88e37L, 0x85c51c8922faacL, 0x4df392e22756d9L,
+          0x8907fd0a03c98eL, 0x626f46a52ea51cL },
+        { 0xf8f766a486c8a2L, 0x8c499a288ed18cL, 0x44d2dc63c4f0deL,
+          0x47dde686f2a0b6L, 0x9a655f84a973fdL, 0x3e7124e786ac80L,
+          0x699e61ce8a0574L, 0xdf0ba9a31cdd0dL }
+    },
+},
+{
+    {
+        { 0x76270add73e69bL, 0x991120fc67d38aL, 0x7be58309469f0cL,
+          0x93aba597db40acL, 0x2b707bc822fc08L, 0x4199fc069551cdL,
+          0x38deed4f367324L, 0xca518e12228787L },
+        { 0x72f1befd9a9277L, 0x57d4aabe49ae90L, 0x13810d5db23478L,
+          0x2a8b7809b4b77fL, 0xb542f4e1b4e004L, 0x4080fd03ec77f0L,
+          0xb49e9fecec6596L, 0x20338d33f16037L }
+    },
+    {
+        { 0x4adcdae53554b0L, 0xfea4906e04c4dbL, 0x0808bec7748233L,
+          0xde7477c47148d7L, 0xdd9124c03da38cL, 0x6b2503125ee8e9L,
+          0xae67399b0d6161L, 0x70c4acd82203b6L },
+        { 0x9683916d31dae8L, 0x34775031ac7f69L, 0x9553153988e4adL,
+          0xb58f41153a15e1L, 0xb65a2d492ba2ddL, 0x7c3efb1a90169cL,
+          0x210f45e6b1747dL, 0x16e8d1bcff488dL }
+    },
+    {
+        { 0x252adf89d703dbL, 0x259ac1dfdfeb39L, 0x7faf6af115e806L,
+          0x7aaefd6c1aff21L, 0x80542107c0113dL, 0x481f1a5e19b4b1L,
+          0x7c17d43fcc8c61L, 0x8b04452bb0bbbeL },
+        { 0xe51e5f54cebae1L, 0x05341ba56a414cL, 0x0083a2c7fb8a30L,
+          0xb4663f277f4952L, 0xce72eec4bb0074L, 0x74fdd66a3584d1L,
+          0x6b9e58eb02e076L, 0x5be45d53b961f4L }
+    },
+    {
+        { 0xc7474f31ab2e0bL, 0x2838ccbf4bf454L, 0x634392ef3c3eacL,
+          0x440e40a137602bL, 0xeea67e9d1ae8e3L, 0xafdf93a77e221eL,
+          0x3c9f3da2719a10L, 0x466ecef32c8256L },
+        { 0x1061c19f9c432fL, 0xa1332d9b1c7d98L, 0xbc735f2a425c2cL,
+          0x1429cdf4b1bccbL, 0x77b42a16bbb5f9L, 0x30078e35955ae4L,
+          0x8acd77721cc315L, 0xaa90d5fe86fa99L }
+    },
+    {
+        { 0xfcfd460721115aL, 0x6a7de3e08269b8L, 0xe5964a696dd47eL,
+          0x6717cd58dca975L, 0x7ea4ebe98b149eL, 0x6f894d5b7b8057L,
+          0xbd6f9607f30e31L, 0x61ca45323df092L },
+        { 0x32241f99d782f3L, 0x55173b02abfae2L, 0x0abe0edd15bbbdL,
+          0xb6d3c0ab438abbL, 0x62fb4679ffa20bL, 0x30926b5d31560aL,
+          0x44bf27c2a0aa6dL, 0xf7473131a4cb97L }
+    },
+    {
+        { 0xa2f6c0db0535deL, 0xcb02ae1c855166L, 0xc699e6bb3422f0L,
+          0x774febe281ba8aL, 0x1d9d24fffabcc7L, 0x0b31ba1fe12ba5L,
+          0x4c8680313d0af7L, 0x90640d32f47160L },
+        { 0xa0c4bf45876603L, 0x717f6fa950ab08L, 0xf12bb53a710de8L,
+          0xc500c616a88f50L, 0x0070f992645351L, 0x57aab5d2446893L,
+          0xd553fa8b68f657L, 0xe8537c1693c55dL }
+    },
+    {
+        { 0x58e86eb7fc7684L, 0xdf330f7bfc73a9L, 0x41e337dcc11936L,
+          0x36d92006e35759L, 0x01327033500d8bL, 0xfa684059483354L,
+          0xc8f2980667851bL, 0x538ec8918296b0L },
+        { 0xa2a2c4fcff55f9L, 0xb260d4d60d20bdL, 0x3ed576fd9cc59fL,
+          0x4ed8c64d514fccL, 0x37ebfb2c22b315L, 0xca67a3694c212cL,
+          0x4f8e08c3a1795eL, 0x498f9264e7261fL }
+    },
+    {
+        { 0xfea7382c59b3d4L, 0xb9942ed3f2925fL, 0xe4b00dc8ea77e8L,
+          0x74a18ec3cab02eL, 0xbbbb752ef16d0bL, 0x639da4fffab032L,
+          0xc371a4a3aa30f0L, 0x8e26b22caa175bL },
+        { 0x94e41567e2b62eL, 0x7cceea625a794cL, 0x931d2f4479f015L,
+          0x946183d90b25b2L, 0x1504e9768a2807L, 0xa7577d3fa49dddL,
+          0x24fc87edd48699L, 0x9edefd63d7d99cL }
+    },
+},
+{
+    {
+        { 0x0508b340f0b450L, 0xe0069a5c36f7f4L, 0x26556642a5a761L,
+          0x0193fd8848e04dL, 0xc108cf573fe2e7L, 0x05eb0ecfd787d4L,
+          0x1555ccbff28985L, 0xb5af09f651b995L },
+        { 0x167d72ce1134beL, 0xd6d98bf57c669aL, 0x40fb7166dd76faL,
+          0xeabbf202a41b31L, 0x300ff0e09b75b0L, 0x32b6fadd9a0c1eL,
+          0x805188365a80e0L, 0x8bef69332110feL }
+    },
+    {
+        { 0x637802fbef47d4L, 0xfac114b2d16eaaL, 0x7b3f3ab0415644L,
+          0x17ab8d12dd895bL, 0x271b7fe87195f3L, 0xa3f867ea71f65fL,
+          0x39ba40cc80583aL, 0x6db067256e1fccL },
+        { 0x4feab4e06662a8L, 0xc857415c74bd46L, 0x18032ed732b126L,
+          0x87c8aea7a099eaL, 0xb4a753536fe0a8L, 0x33a98da27673f6L,
+          0x3e40c022b8e549L, 0x2def1af9a4c587L }
+    },
+    {
+        { 0x9618b68a8c9ad9L, 0xd70b4aa49defdaL, 0xae8b1385f788efL,
+          0x87c3542dd523f4L, 0xe42c7055c5b004L, 0x6303360fa7df57L,
+          0x33e27a75f6d068L, 0x9b3268e8ff331aL },
+        { 0x845cc9623ee0c3L, 0x003af70ac80084L, 0x6a9f931530c41dL,
+          0xa1d7051bb127f0L, 0x642ce05ca36245L, 0xc34205b0323ee9L,
+          0x7cc8912b7b3513L, 0x6252cc8076cbdbL }
+    },
+    {
+        { 0x10e68a07089522L, 0x36c136158fc658L, 0x490397d74723a4L,
+          0x42692c0519d56cL, 0x69d251bf1ff235L, 0xe689d03c2cbf37L,
+          0xf04ceba825b7f4L, 0xd6b9bee2281c2eL },
+        { 0xc52ef3fe0043abL, 0x351bf28d1d1be8L, 0x277615f0f18a5aL,
+          0x31f717f5d6800fL, 0xf5fb82dab922e2L, 0x99aee2f2d6ae43L,
+          0x42477fec63b982L, 0x904aeb1a594a01L }
+    },
+    {
+        { 0xaa82174eb39974L, 0xbc38e6195e6aa0L, 0x6a3df8a25c0675L,
+          0xf324203ffbe739L, 0xfa5a0b4a3f0649L, 0x79c87327a7a6b8L,
+          0xeb65ecd40ad3f5L, 0x718d416e4e45c5L },
+        { 0x029dbf4e2326fdL, 0x0c63416e7942f0L, 0x6d0c7286f4e678L,
+          0x59f0b10a138601L, 0x8a1d9788d92ea9L, 0x9f8d712c22eca5L,
+          0x73970447b6b96bL, 0xa2d49eee6fb955L }
+    },
+    {
+        { 0x249f900bf14a19L, 0xd3522da63a8cd2L, 0x28a32f386964d2L,
+          0xacf712bc1fa743L, 0x98a9bfc0bb94d3L, 0x318ece1bc06824L,
+          0xfc476754fce7f0L, 0x19caec9e4135b7L },
+        { 0x6de68a8c6817bbL, 0x7121960f3b6d89L, 0xa7d4261f5a818eL,
+          0x0c0ba519157455L, 0x78b6acf450d5ffL, 0x198b4934e8649aL,
+          0x0941a3cfd05da3L, 0x264ea4adb55951L }
+    },
+    {
+        { 0xcfee91c46e5a31L, 0x47b6806fff7366L, 0xdb14be45df849dL,
+          0x3c5e22bac66cc7L, 0x7f3f284a5f4769L, 0x4e00815383be36L,
+          0x39a9f0b8072b0bL, 0x9887cd5c7eadd6L },
+        { 0x7dd8f05b659511L, 0x15c796dd2e1cb9L, 0xe5edb0c0d31345L,
+          0x2025df06939c60L, 0x6314c08bf15de1L, 0x03c154804c7fb5L,
+          0x413337fbb5d3edL, 0xfc20b40477e983L }
+    },
+    {
+        { 0x7f968805db0ef9L, 0x05562dee9c2a70L, 0x071e5bc7dae133L,
+          0xa8cdd12237fc4aL, 0x6d565e74ea492bL, 0xa17cf94381ee52L,
+          0x6ab8a4e9f5c546L, 0xbb642f340288efL },
+        { 0x64e59215df5c2dL, 0x43696e3bb906f4L, 0x73a841a74ae46cL,
+          0xe264883c506b8aL, 0x9542e1aa1be548L, 0x89385395e81b4aL,
+          0x5642cfaeaca6ceL, 0xed8077b806e0f9L }
+    },
+},
+{
+    {
+        { 0x1c776c47e13597L, 0x0ec8b289e584fdL, 0x0bb6043b8b61e8L,
+          0xdcc17489cd835bL, 0x493e6ac39fef9aL, 0xb44eb34d133e17L,
+          0xfebcd0071cb6f9L, 0xe6cf543d20eff2L },
+        { 0xf265cad0a004c7L, 0x9b06c9dd35cc12L, 0x769f985cb4ea53L,
+          0x29160a20993434L, 0xdf8dd108d939c4L, 0xefa177c6711e2fL,
+          0x1695790cd7a2cdL, 0x38da3d777f6642L }
+    },
+    {
+        { 0x9bfcfd96307b74L, 0xc26a36dbfdabc3L, 0x9341be04abe28eL,
+          0xdb20b5273d1387L, 0xf8d229c3d1949cL, 0xf1e0afeb8b3a41L,
+          0x29c60dfed565d0L, 0x6930bb58b43b2cL },
+        { 0x1d76527fc0718fL, 0xdb981431f67189L, 0x0c62f6451f32ccL,
+          0x70a66268bd35e5L, 0x1725641c1cece7L, 0x7f130a8f96f4a4L,
+          0x72319e9f06ee98L, 0x215b73867bf9b2L }
+    },
+    {
+        { 0x8d1bec20aaddd7L, 0xfb8b95bb8be4f9L, 0xeac193efde1026L,
+          0xa5edea79d5860cL, 0x4adbaea44280d3L, 0xce8b67038f4798L,
+          0x914c107ec30deaL, 0xbdc5cf7000776bL },
+        { 0xb6fd7d1a206a13L, 0x9941ebadae986eL, 0x76c27a81f1caaaL,
+          0x6967c123f108b4L, 0x6f115284aea2d0L, 0x9bb4319144ddacL,
+          0x1a4d3eac8ec6fcL, 0xfe4b0b8bf37420L }
+    },
+    {
+        { 0x5d9a4a1ec0ac6fL, 0x84b79f2fc7c80dL, 0x64222f7c14fac3L,
+          0xdd9e039c23b3f2L, 0x4a84abdea956bbL, 0x370dcbaebe09dcL,
+          0x79a9ea8e0eaf82L, 0x4cfb60aaee375fL },
+        { 0x6a10dbf9106827L, 0xa3ba5cf43f305bL, 0x481b885c1bb083L,
+          0x2f52380b3117b1L, 0x0066122ddd6791L, 0x4f8923e63bace3L,
+          0x5c5f499ecb88d4L, 0xfdc780a3bac146L }
+    },
+    {
+        { 0x34b70ae7ba1f71L, 0x909182945bd184L, 0x3b39778e707313L,
+          0xdeefc5e6164e91L, 0xbb55bed4971f39L, 0x7d523398dafc8bL,
+          0x82391bfa6adf0fL, 0xfd6f90ae319522L },
+        { 0x60fdf77f29bbc9L, 0xeff9ed8aaa4030L, 0x978e045f8c0d3fL,
+          0xe0502c3eed65cdL, 0x3104d8f3cfd4c8L, 0xab1be44a639005L,
+          0xe83f4319eeab3fL, 0x01970e8451d797L }
+    },
+    {
+        { 0xbc972f83180f4bL, 0xac053c0617779dL, 0x89392c57fa149fL,
+          0xdc4699bbcb6263L, 0x0ae8b28ce12882L, 0xdca19a7af1a4dcL,
+          0xd3d719f64e1a74L, 0xbb50201affdd5dL },
+        { 0x56f73107ac30e9L, 0x65cc9c71878900L, 0x83f586627338a3L,
+          0x122adefac5bb13L, 0x97de2001bcd4d5L, 0x6ed3985b8aa3a0L,
+          0x8680f1d6821f9bL, 0xcb42028dda9f98L }
+    },
+    {
+        { 0xcdb07080ec2db3L, 0xe28c8333dad1a1L, 0x2093e32de2da07L,
+          0x731707383b8987L, 0xad17871f552b8dL, 0x846da9851cf70aL,
+          0xf94a16e5c4f5e1L, 0x84299960f8348aL },
+        { 0x4bf3f6898db78aL, 0xad77fa83d19b52L, 0x69767728b972dcL,
+          0x7dfa35a5321be0L, 0x9881846dd344a6L, 0xe550292ad4e2a8L,
+          0x8075217bc68bf1L, 0xdd837c4893be15L }
+    },
+    {
+        { 0x09c931ed4fab5bL, 0xb2dcf08b77a0f1L, 0x7dac5c0e0d38a6L,
+          0xa5570b00ae73afL, 0xc7c19d3f5aed28L, 0x575fa6f5251e92L,
+          0xb843cd6cdf7275L, 0xd9d3d8e9a01287L },
+        { 0xf94e356b3c370bL, 0xc62b99ffe464b0L, 0x7792650a986057L,
+          0xeaa67d5c4b1874L, 0xba1ba4d0b07078L, 0xdbf636d7a03699L,
+          0x1a16c34edd32a3L, 0x6ce2495a45cb5dL }
+    },
+},
+{
+    {
+        { 0xd7c4d9aa684441L, 0xce62af630cd42aL, 0xcd2669b43014c4L,
+          0xce7e7116f65b24L, 0x1847ce9576fa19L, 0x82585ac9dd8ca6L,
+          0x3009096b42e1dbL, 0x2b2c83e384ab8bL },
+        { 0xe171ffcb4e9a6eL, 0x9de42187374b40L, 0x5701f9fdb1d616L,
+          0x211e122a3e8cbcL, 0x04e8c1a1e400bfL, 0x02974700f37159L,
+          0x41775d13df8c28L, 0xcfaad4a61ac2dbL }
+    },
+    {
+        { 0x6341b4d7dc0f49L, 0xaff6c2df471a53L, 0x20ec795fb8e91eL,
+          0x4c7a4dfc3b7b62L, 0x9f33ff2d374938L, 0x38f8c653a60f2eL,
+          0xc1168ac2efef73L, 0x046146fce408eeL },
+        { 0x9b39ac0308b0c3L, 0xe032d6136b8570L, 0xee07d8dfc4aacfL,
+          0x0a82acbd5a41ddL, 0xbe0ded27c3d726L, 0xce51d60b926ce9L,
+          0xfa2f7f45806c1eL, 0xe367c6d1dec59cL }
+    },
+    {
+        { 0x64511b6da2547bL, 0x76a349c0761405L, 0x37d662601223abL,
+          0x0e243c1f4d7c48L, 0xdc9c8b4da756a0L, 0xc7430dfd72e7e9L,
+          0x0eb130827b4210L, 0x7a9c044cf11cbdL },
+        { 0x2c08ff6e8dd150L, 0x18b738c2932fc6L, 0x07d565104513e8L,
+          0x0ca5cffaa40a17L, 0xd48634101baa8fL, 0xfb20fafb72b79eL,
+          0x1a051e5654020fL, 0xe3b33174e17f23L }
+    },
+    {
+        { 0x05910484de9428L, 0x620542a5abdf97L, 0xaa0ededa16a4d1L,
+          0xa93f71c6d65bb9L, 0x88be135b8dfaf9L, 0x1d9f4e557ca8eeL,
+          0x4c896aa26781adL, 0xd3fbe316c6c49fL },
+        { 0x088d8522c34c3dL, 0xbb6d645badff1eL, 0xe3080b8385450dL,
+          0x5ccc54c50ab1f3L, 0x4e07e6eac0657dL, 0xa7ba596b7ef2c0L,
+          0xcceca8a73a81e9L, 0xa0b804c8284c35L }
+    },
+    {
+        { 0x7c55956f17a6a2L, 0xb451d81789cfa8L, 0xdf414e82506eaaL,
+          0x6ef40fbae96562L, 0x63ea2830e0297eL, 0xf5df26e73c46faL,
+          0xe00641caac8bceL, 0xc89ed8f64371f3L },
+        { 0xd22b08e793202eL, 0x39a9033875cb50L, 0xe64eec0f85ddb4L,
+          0xdce45a77acf7b5L, 0x39d1e71b9b802dL, 0xafdfe7cbd559acL,
+          0x17ec1f8809eeb5L, 0x8c0e38a4889b8cL }
+    },
+    {
+        { 0x47eabfe17089daL, 0x2d18466ec90c50L, 0xa511aa45861531L,
+          0xebb3d348c39b39L, 0xa0ac4daf1b5282L, 0xea26be7a9dadbaL,
+          0x8992ba8554d86eL, 0x7fcbdb6d5f2ef5L },
+        { 0x320e79b56863e7L, 0xeb9d0c0a7dce2dL, 0xb9f4031784cbc6L,
+          0x68823ee7ac1f81L, 0xa6b6f4f9d87497L, 0x83c67b657f9b6eL,
+          0x37357470fef2a7L, 0xf38028f59596e2L }
+    },
+    {
+        { 0x9ea57ab7e82886L, 0x18221c548c44d5L, 0xbf8e6cf314a24fL,
+          0x70ff18efd025e5L, 0x08d03de5334468L, 0x2b206d57404fb7L,
+          0xb92327155e36b0L, 0xcc7604ab88ddd9L },
+        { 0x3df51524a746f0L, 0x8fdebd8168e3fcL, 0xffc550c7f8c32cL,
+          0x1dbbc17148743eL, 0xd48af29b88e18bL, 0x8dca11c750027cL,
+          0x717f9db1832be3L, 0x22923e02b06019L }
+    },
+    {
+        { 0xd4e06f5c1cc4d3L, 0x0fa32e32b4f03aL, 0x956b9afc4628d0L,
+          0x95c39ce939dad1L, 0x39d41e08a00416L, 0xfd7ff266fb01aaL,
+          0xc6033d545af340L, 0x2f655428e36584L },
+        { 0x14cfb1f8dff960L, 0x7236ffcda81474L, 0xc6a6788d452d0fL,
+          0x2ad4a5277f6094L, 0x369d65a07eea74L, 0x27c6c38d6229aaL,
+          0xe590e098863976L, 0x361ca6eb38b142L }
+    },
+},
+{
+    {
+        { 0x6803413dfeb7efL, 0xb669d71d3f4fadL, 0x5df402ac941606L,
+          0xe5d17768e6c5b7L, 0x131bcb392ab236L, 0x7f1fb31ce2e0e0L,
+          0xa2c020d9e98c35L, 0x33b23c0f28657bL },
+        { 0xed14e739cf7879L, 0x10d4867b4357b3L, 0x127cea331e4e04L,
+          0xc60d25faa5f8a7L, 0xfef840a025b987L, 0x78081d666f2a0aL,
+          0x0fa0b97ac36198L, 0xe0bb919134dc9fL }
+    },
+    {
+        { 0xc1d2461cc32eaeL, 0x0fdbfdf0f79a37L, 0x70f2bc21c95f02L,
+          0x7d68bec372cddfL, 0x44f78178439342L, 0xa3d56784843a6cL,
+          0xbadf77a07f8959L, 0xf45819873db4caL },
+        { 0xe8eaaf3d54f805L, 0x2f529d1b84c1e7L, 0x404e32e21e535cL,
+          0xabac85c159b5f5L, 0x4e8e594b00466fL, 0x40fcaabc941873L,
+          0x3b4e370be407c6L, 0xccd57885b2e58dL }
+    },
+    {
+        { 0x3ee615e88b74a8L, 0xd7d6608eab4e69L, 0x27cf9f1e4ace36L,
+          0x282359e7aebabbL, 0x96e509bf6d162fL, 0xad906f3f1a290aL,
+          0xe7d6c4f1314a58L, 0xeecffe4218431dL },
+        { 0xa66e0e9e2cfed9L, 0xb0887ec71f0544L, 0xd34e36ba04c5d7L,
+          0x094daa5ed4392dL, 0xcda83adc8aa925L, 0x1adef91b979786L,
+          0x3124dcbfddc5d6L, 0x5cc27ed0b70c14L }
+    },
+    {
+        { 0x386dbc00eac2d8L, 0xa716ecbc50ca30L, 0x9e3fc0580d9f04L,
+          0x37dde44cfeacebL, 0xd88d74da3522d5L, 0x6bb9e9f2cf239aL,
+          0x9e7fb49a7cbfecL, 0xe1a75f00a5c0efL },
+        { 0x6e434e7fb9229dL, 0x0ec6df5c8a79b3L, 0x7046380d3fb311L,
+          0xe957ef052e20faL, 0x0f4fe9a9ef4614L, 0x1b37d9c54d8f2bL,
+          0x23b2dc139d84a2L, 0xf62c4f6724e713L }
+    },
+    {
+        { 0xbd6922c747e219L, 0x34d14383869b7bL, 0x8c875a596f2272L,
+          0xd9602c03fe361eL, 0x081348f744839fL, 0x61bd16c61ac1f1L,
+          0x993b727d8da4e1L, 0xbb40ba87741271L },
+        { 0xe6dcc9881dcfffL, 0x9f513f593ce616L, 0xdc09683618cd8fL,
+          0xc3b1d1026639beL, 0xe8f149fc762ee2L, 0x59f26efb244aaeL,
+          0x3f2de27693dd96L, 0xd8b68f79c3a7deL }
+    },
+    {
+        { 0x6fa20b9970bd5bL, 0x87242d775f6179L, 0xa95a6c672d9308L,
+          0x6eb251837a8a58L, 0xfdea12ac59562cL, 0x4419c1e20f1fc3L,
+          0x0c1bd999d66788L, 0x4b7428832c0547L },
+        { 0x4f38accdf479abL, 0x01f6271c52a942L, 0xe3298f402ca9a7L,
+          0x533dacab718fc8L, 0x133602ab093ca8L, 0xc04da808f98104L,
+          0xd0f2e23af08620L, 0x882c817178b164L }
+    },
+    {
+        { 0x28e6678ec30a71L, 0xe646879f78aca1L, 0x868a64b88fa078L,
+          0x671030afee3433L, 0xb2a06bb87c0211L, 0x202eca946c406aL,
+          0x64d6284e4f0f59L, 0x56ae4a23c9f907L },
+        { 0x5abbb561dcc100L, 0x6fef6cf07c7784L, 0xb6e25cddb7302dL,
+          0xa26785b42980e8L, 0xe7d4043fb96801L, 0x46df55d8e4282bL,
+          0x9c0a5f5c602d6eL, 0xf06560475dfe29L }
+    },
+    {
+        { 0x0e82a1a3dcbc90L, 0xb1ee285656feacL, 0xfa4353b0d3d3b2L,
+          0xc2e7a6edd5c5dfL, 0x13707e1416ce53L, 0xc84ce0787ebc07L,
+          0xdd273ce8a9a834L, 0x432a6175e8e1e7L },
+        { 0xa359670bd0064aL, 0xc899dd56534516L, 0x666560edb27169L,
+          0x1537b22a19a068L, 0x3420507eac7527L, 0x479f25e6fc13a7L,
+          0xc847acc1bc19b3L, 0xecdecf00b20d45L }
+    },
+},
+{
+    {
+        { 0x6f241004acea57L, 0xdace1c6da68597L, 0xea7dd4150ce77fL,
+          0x1aecb841585884L, 0x92ff208ea4a85cL, 0xde9433c88eebd2L,
+          0x53cd3183f4d289L, 0x397085826539afL },
+        { 0x4b57599b827d87L, 0xdc82ac03d77638L, 0x694336652f6e61L,
+          0xb8fc4b0ad5e8a6L, 0x1b6f7dcf388642L, 0x6f24533a74dd57L,
+          0xc66937841750cfL, 0x06757eb28a37afL }
+    },
+    {
+        { 0x0e70d53c133995L, 0x88a5e0c7c8c97dL, 0x4e59dbf85f3be3L,
+          0x0f364ac0e92698L, 0x3a1e79bef6940fL, 0xc8a3941d85d23aL,
+          0x143bb999a00e58L, 0x61cf7d6c6f2f10L },
+        { 0x979c99485150feL, 0xcfd0df259d773fL, 0xce97b9daab7bcdL,
+          0xc9fff8e6afd8fcL, 0x246befd89a4628L, 0xf6302821567090L,
+          0x15393426749c58L, 0xff47d0ea0f3fd3L }
+    },
+    {
+        { 0x09b0bfd35f6706L, 0x74645812c82e69L, 0xb60729f50d5fe9L,
+          0xf13324595c74f1L, 0x33647e3bb76c89L, 0x01264045a9afccL,
+          0x46d57ee0f154abL, 0x2efa55525680a4L },
+        { 0x12ebfc65329d90L, 0xcb37ae579800afL, 0x5bb53496f8e310L,
+          0x9b59c63f1bb936L, 0x5b49baaf4610e9L, 0x2bbeeef4f2d6acL,
+          0x87ee21e0badc67L, 0x12e2aadf1ddfa0L }
+    },
+    {
+        { 0x5b4668fa9109eeL, 0xfa951338a6cea2L, 0xe45e6fc4068e16L,
+          0x8ae9a0c0205ed8L, 0x2993b96679b79bL, 0xc6b878fed604d3L,
+          0x01d020832c77f3L, 0xd45d890495a1abL },
+        { 0x99348fa29d2030L, 0x961f9a661f8f7aL, 0xfd53212674f74bL,
+          0x45cee23b3e72bcL, 0x3fccb86b77e2d5L, 0xdff03104219cb7L,
+          0x233771dc056871L, 0x1214e327d2c521L }
+    },
+    {
+        { 0x9f51e15ff2a8e1L, 0x86571c5138bc70L, 0xbfc4caf0c09d46L,
+          0x65e33fec2a0c18L, 0x8214392426867dL, 0x51ce6c080ae4edL,
+          0x6cbe8d7b110de6L, 0x7f6e947fd22ea4L },
+        { 0x7373a75cadefc4L, 0x6fca1d2b0c682fL, 0xcd2140df3c7c1eL,
+          0x8653a37558b7a5L, 0x653e74e55eb321L, 0xbe0c6b3c31af73L,
+          0x3376379f4fc365L, 0x3570b3771add4dL }
+    },
+    {
+        { 0x9061ec183c3494L, 0xaf2f28d677bc95L, 0x6fe72793bf8768L,
+          0xc5f50e30fa86d8L, 0x6c03060a3293ceL, 0x4d53357e2355a6L,
+          0x43a59eae4df931L, 0x6f48f5d13b79c6L },
+        { 0xa4d073dddc5192L, 0x6d0e318a65773fL, 0x1008792765de9eL,
+          0xa724ed239a0375L, 0x510ff1497d7c9eL, 0x251f6225baa863L,
+          0x86464fe648a351L, 0xf85e98fd50fd91L }
+    },
+    {
+        { 0x29c963486ee987L, 0x93e8e5210dcc9fL, 0xa1fc4d1c910b1fL,
+          0x015acacfeb603eL, 0xc9f25f80844a5fL, 0x50de93c73f4dacL,
+          0x1758783310a4aaL, 0x544d570358f106L },
+        { 0x4eeec7b1dc68caL, 0x6238e6fe00fbcbL, 0x34d394cb4e83c9L,
+          0x764ffa22292656L, 0x5614cd1f641f2eL, 0x4252eb69e07234L,
+          0xcbaef4568d2ba4L, 0x8c9c5508a98b17L }
+    },
+    {
+        { 0xf235d9d4106140L, 0x1bf2fc39eb601eL, 0x6fb6ca9375e0c3L,
+          0x4bf5492c0024d2L, 0x3d97093eb54cc6L, 0xc60931f5c90cb5L,
+          0xfa88808fbe0f1aL, 0xc22b83dd33e7d4L },
+        { 0x9cfec53c0abbf5L, 0x52c3f0a93723dfL, 0x0622b7e39b96b6L,
+          0x300de281667270L, 0x50b66c79ef426aL, 0x8849189c6eb295L,
+          0xeaec3a98914a7eL, 0x7ed56b0c4c99e0L }
+    },
+},
+{
+    {
+        { 0x7926403687e557L, 0xa3498165310017L, 0x1b06e91d43a8fdL,
+          0xf201db46ac23cbL, 0x6f172ad4f48750L, 0x5ed8c8ce74bd3eL,
+          0x492a654daba648L, 0x123010ba9b64ffL },
+        { 0xa83125b6e89f93L, 0x3a3b0b0398378aL, 0x9622e0b0aebe7cL,
+          0xb9cbfdc49512a4L, 0x13edffd6aaf12aL, 0x555dff59f5eafdL,
+          0x3cba6fe1212efaL, 0xd07b744d9bb0f8L }
+    },
+    {
+        { 0x45732b09a48920L, 0xf3080fc13ff36dL, 0x9347395de8f950L,
+          0x14d025a382b897L, 0x60c5a7404d72adL, 0x30be7e511a9c71L,
+          0x43ffabd31ac33aL, 0x97b06f335cbb14L },
+        { 0xe4ff5c57740de9L, 0x5fed090aacf81eL, 0x97196eee8b7c9dL,
+          0x316dcd1045910bL, 0x7a2b2f55ad8c63L, 0x674fffdc5b03bbL,
+          0xc1cd133e65953cL, 0x3c060520a83556L }
+    },
+    {
+        { 0x797c3f6091c23dL, 0x2ea2de339c9c05L, 0x5d958b4a31f67cL,
+          0xf97afe5d5f088cL, 0xbcfbd2a0b37243L, 0xc43ad3eeca630cL,
+          0xb92a33742845e0L, 0x970bff7a9a0f16L },
+        { 0x86355115970a79L, 0xcee332ef205928L, 0x2c58d70c04c208L,
+          0xdbfe19a3f5e5bfL, 0x8f8f2c88e51c56L, 0xb61f58e8e2da75L,
+          0x4046a19624d93fL, 0x7de64dbe1f9538L }
+    },
+    {
+        { 0xd018e1cc2d850eL, 0x8cdb64363a723cL, 0x9a65abe90a42afL,
+          0xfeece9616f20ccL, 0xc906800d5cff56L, 0x0acf23a3f0deedL,
+          0x2143061728dd3aL, 0x66276e2b8ce34cL },
+        { 0x23700dc73cc9c7L, 0xdb448515b1778bL, 0x330f41e4aab669L,
+          0x2f5aabcf5282a4L, 0xff837a930f9e01L, 0x1a1eb2f901cc98L,
+          0xd3f4ed9e69bd7fL, 0xa6b11418a72a7dL }
+    },
+    {
+        { 0x34bde809ea3b43L, 0x5ddcb705ced6aeL, 0x8257f5b95a6cb8L,
+          0xaac205dc77dcb8L, 0x77d740d035b397L, 0xca7847fcf7e0a6L,
+          0x9404dd6085601bL, 0x0a5046c457e4f9L },
+        { 0xcaee868bc11470L, 0xb118796005c5f6L, 0xcc04976ec79173L,
+          0x7f51ba721f6827L, 0xa8e3f0c486ff7eL, 0x327163af87838cL,
+          0xcf2883e6d039fdL, 0x6fb7ab6db8b0e2L }
+    },
+    {
+        { 0x8ca5bac620d669L, 0xff707c8ed7caa9L, 0xdaefa2b927909bL,
+          0x1d2f9557029da3L, 0x52a3ba46d131a0L, 0xe5a94fd3ab1041L,
+          0x508917799bc0aeL, 0xf750354fa1bd16L },
+        { 0xdd4e83a6cd31fdL, 0xd33505392fac84L, 0xf914cbc1691382L,
+          0x669683fda6ade6L, 0x69446438878513L, 0x429d3cc4b1a72dL,
+          0x655c46a61eec36L, 0x881eded4bc4970L }
+    },
+    {
+        { 0x5b39d377ca647fL, 0x41533c1e917b34L, 0xea2aeb57daf734L,
+          0xf1ef1eb1286560L, 0x582f2e008e0473L, 0x5913d7d5edc74aL,
+          0x588c7ec3c1e754L, 0xbd6db057146fe1L },
+        { 0x3b0d49e7634907L, 0x4c65ce4e43b9ccL, 0xb87e9582d92d5bL,
+          0x05135727ab1519L, 0x03ec0848c3aed0L, 0x4d7aa21561a641L,
+          0xe5f821199e92adL, 0x379b55f48a457cL }
+    },
+    {
+        { 0x8317c34d6a8442L, 0xb0ab4a5ae499daL, 0xebcb16e720e8ebL,
+          0xfd5c5639a96908L, 0xcab4d67ad23acfL, 0xa600a79bcdf748L,
+          0x18a6340a2a6a51L, 0xf2f415c3aabd69L },
+        { 0xdb38a4f747258aL, 0xb6ea5602e24415L, 0xfad1ea9f1f7655L,
+          0x4e27eb5c957684L, 0xf8283e1b2e1cfcL, 0x8f83bd6aa6291cL,
+          0x28d23b55619e84L, 0xb9f34e893770a4L }
+    },
+},
+{
+    {
+        { 0x1bb84377515fb1L, 0xac73f2a7b860a6L, 0x78afdfa22b390fL,
+          0x815502b66048aaL, 0xf513b9785bf620L, 0x2524e653fc5d7cL,
+          0xa10adc0178c969L, 0xa1d53965391c8dL },
+        { 0x09fccc5a8bcc45L, 0xa1f97d67710e1eL, 0xd694442897d0a1L,
+          0x7030beb5f42400L, 0xdebe08c7127908L, 0x96b715c2187637L,
+          0xc598250b528129L, 0x0f62f45a1ccb07L }
+    },
+    {
+        { 0x8404941b765479L, 0xfdecff45837dc4L, 0x1796372adbd465L,
+          0x5f84c793159806L, 0x6d2e46b6aaad34L, 0xd303b4a384b375L,
+          0x440acd5b392002L, 0x4f2a4a7c475e87L },
+        { 0x038e1da5606fc2L, 0x2d821c29c2f050L, 0xc074cb3f139db4L,
+          0xde2fee74ec59beL, 0x5a819eea84ed59L, 0xd65c62c3e98711L,
+          0x72eb440b9723c1L, 0xb92775401be611L }
+    },
+    {
+        { 0x929fe64ab9e9fcL, 0x04379fd0bf1e85L, 0xb322093bc28ee3L,
+          0x78ac4e2e4555e1L, 0xdb42b58abc5588L, 0x1c1b5e177c8b12L,
+          0xf6d78dd40366c4L, 0xc21ff75bdae22eL },
+        { 0x1e3d28ea211df2L, 0xc5a65a13617c0aL, 0x3fa02c058140d5L,
+          0x155c346b62d10cL, 0xc9cf142e48268fL, 0xdc140831993bc3L,
+          0x07c44d40ee69dcL, 0x61699505e2ac46L }
+    },
+    {
+        { 0x44e4a51d0fb585L, 0x00846bef1f3ce8L, 0xedef39a8e2de1eL,
+          0x430afe333b3934L, 0xac78b054337188L, 0x0f39de4c9a3f24L,
+          0x039edddc9ae6a4L, 0xf4701578eacd51L },
+        { 0x1e396949a2f31aL, 0xc8a40f4b19a8b1L, 0xdddd10c9d239d8L,
+          0xf974245887e066L, 0xfdb51113ea28c6L, 0xb5af0fbe1122a9L,
+          0xd30c89f36e0267L, 0x7b1c0f774f024cL }
+    },
+    {
+        { 0x1ec995607a39bfL, 0x1c3ecf23a68d15L, 0xd8a5c4e4f59fe9L,
+          0xacb2032271abc3L, 0xbc6bdf071ef239L, 0x660d7abb39b391L,
+          0x2e73bb2b627a0eL, 0x3464d7e248fc7eL },
+        { 0xaa492491666760L, 0xa257b6a8582659L, 0xf572cef5593089L,
+          0x2f51bde73ca6bfL, 0x234b63f764cff5L, 0x29f48ead411a35L,
+          0xd837840afe1db1L, 0x58ec0b1d9f4c4bL }
+    },
+    {
+        { 0x8e1deba5e6f3dcL, 0xc636cf406a5ff7L, 0xe172b06c80ca0fL,
+          0x56dc0985ffb90aL, 0x895c2189a05e83L, 0x6ddfaec7561ac2L,
+          0xaa3574996283a0L, 0x6dfb2627e7cd43L },
+        { 0x6576de52c8ca27L, 0x6a4a87249018ebL, 0x00c275c5c34342L,
+          0xe34805ad2d90c4L, 0x651b161d8743c4L, 0xb3b9d9b7312bf3L,
+          0x5d4b8e20bf7e00L, 0x8899bdf78d3d7eL }
+    },
+    {
+        { 0x9644ad8faa9cd1L, 0x34c98bf6e0e58eL, 0x6022aad404c637L,
+          0x2a11a737ac013bL, 0x5bdd1035540899L, 0x2e675721e022a4L,
+          0xe32045db834c33L, 0x74a260c2f2d01cL },
+        { 0x20d59e9c48841cL, 0x05045dde560359L, 0xeba779cac998acL,
+          0x5bed10c00a6218L, 0x25d4f8e5327ef4L, 0xa2784744597794L,
+          0xefd68ca831d11eL, 0x9ad370d934446aL }
+    },
+    {
+        { 0x3089b3e73c92acL, 0x0ff3f27957a75cL, 0x843d3d9d676f50L,
+          0xe547a19d496d43L, 0x68911c98e924a4L, 0xfab38f885b5522L,
+          0x104881183e0ac5L, 0xcaccea9dc788c4L },
+        { 0xfbe2e95e3c6aadL, 0xa7b3992b3a6cf1L, 0x5302ec587d78b1L,
+          0xf589a0e1826100L, 0x2acdb978610632L, 0x1e4ea8f9232b26L,
+          0xb21194e9c09a15L, 0xab13645849b909L }
+    },
+},
+{
+    {
+        { 0x92e5d6df3a71c1L, 0x349ed29297d661L, 0xe58bd521713fc9L,
+          0xad999a7b9ddfb5L, 0x271c30f3c28ce0L, 0xf6cd7dc2a9d460L,
+          0xaf728e9207dec7L, 0x9c2a532fcb8bf0L },
+        { 0xd70218468bf486L, 0x73b45be7ab8ea8L, 0xddfc6581795c93L,
+          0x79416606bb8da2L, 0x658f19788e07a2L, 0xa9d5b0826d3d12L,
+          0x4d7c95f9535b52L, 0xad55e25268ef8aL }
+    },
+    {
+        { 0x94a9b0ba2bc326L, 0x485ecc5167e5f3L, 0x8340bc7c97fc74L,
+          0x06f882b07aaa5cL, 0x4b57455849698aL, 0xd9281ebb36a0baL,
+          0x8918c6c8b8108fL, 0xedd1eea5b50d1dL },
+        { 0x94d737d2a25f50L, 0x0e5a8232446ad0L, 0x02a54357ced3e2L,
+          0xb09a92a4af8cedL, 0x85fc498eeecef2L, 0x06a02b9e71e3d4L,
+          0x00ad30784bb49aL, 0xf61585e64a5b4aL }
+    },
+    {
+        { 0x915f6d8b86a4c9L, 0x944bc6ba861e1fL, 0x3091ca754465efL,
+          0x11df859eb53a38L, 0xd44dde50144679L, 0x6c8da9a0994eddL,
+          0xeebcebf91241efL, 0xc419354c2f6859L },
+        { 0x1f4969349581b6L, 0x5712b10bb26cb4L, 0x8fcaa41b09fd59L,
+          0xbd39aad72e22e3L, 0xf70e794b1199b0L, 0xdf63c0cc6f863dL,
+          0xd58166fee9df4fL, 0xb9224eac45e70bL }
+    },
+    {
+        { 0x80072face525f4L, 0x8597bd666a5502L, 0xf65e203dbc9725L,
+          0xeccfbe3f2222a4L, 0x490aa422339834L, 0x134889162489e8L,
+          0xaff3f80a735084L, 0x69d53d2f3f1bd6L },
+        { 0xb123ffc813341aL, 0x359084c1173848L, 0x751425ed29b08dL,
+          0x1edda523890ad4L, 0xb64974c607cf20L, 0xa8c8cb8b42ac7cL,
+          0xd5cb305edd42e5L, 0xf3034dc44c090aL }
+    },
+    {
+        { 0x428921dbb18e19L, 0x4cfd680fed2127L, 0x671144d92ac8c3L,
+          0x2121901132c894L, 0x25d0e567604cd9L, 0xa372223afbc2a0L,
+          0xcf98a5256c16f7L, 0x71f129ab5459e1L },
+        { 0xf4afdc5b668b2eL, 0xc5d937a0c2d410L, 0xe2cc4af285d54aL,
+          0x1c827778c53e18L, 0x270f2c369a92f6L, 0x799f9ac616327aL,
+          0xce658d9d4246f2L, 0x0fb681ffb12e36L }
+    },
+    {
+        { 0xc5ab11ee0690feL, 0x80261e33f74249L, 0x8eb4b4758c1cf2L,
+          0x4895a80184ae9bL, 0x4a4bdb6d3e27ebL, 0xa7a1638bfd251cL,
+          0x29ec144417a7e3L, 0xd0736093f1b960L },
+        { 0xcb1ed8349c73d1L, 0x33fc84a8d1945aL, 0x9f668dbe965118L,
+          0x3331743a82811fL, 0xf394dec28ba540L, 0x44ce601654a454L,
+          0x240dbb63623645L, 0xf07e7f22e61048L }
+    },
+    {
+        { 0x7c9f1763d45213L, 0x3eefa709c1f77fL, 0xde3c3c51b48350L,
+          0x4a2bc649d481a7L, 0xfd4a58a7874f3dL, 0x96655d4037b302L,
+          0x945252868bf5abL, 0x1b6d46a75177f6L },
+        { 0x7de6763efb8d00L, 0xb2c1ba7a741b7bL, 0xcca6af47bae6edL,
+          0xe4378ca5b68b3fL, 0xfb757deaf71948L, 0x7f07b5ebc6ac99L,
+          0x752a56827d636dL, 0xc8b7d1d4b8a34fL }
+    },
+    {
+        { 0x76cb78e325331bL, 0x41f41c9add2eedL, 0x03db2385c5f623L,
+          0xbbc1d177102fa2L, 0x80f137a60182ecL, 0xfdd856955adf15L,
+          0x4f53f5ee3373dcL, 0xec6faf021b669bL },
+        { 0x7d4e9830b86081L, 0x10d3cd9f2d979cL, 0x0f48f5824a22c8L,
+          0x86c540c02f99eeL, 0xf4c66545e6c5fcL, 0xaf0c588bc404c8L,
+          0x2e6edbd423118aL, 0x86e32e90690eabL }
+    },
+},
+{
+    {
+        { 0x1d12656dfbfa6fL, 0xa4980957646018L, 0x2f1071bc3597d0L,
+          0x3df83f91dda80aL, 0x5853e28f3ae449L, 0xb853d319e19aadL,
+          0x863f01ba0d8a46L, 0xa84fca62fef108L },
+        { 0xbe4c0b7fb84de9L, 0x40a03dcc0727bfL, 0x781f841b18575cL,
+          0x6a63045466cddbL, 0x6be758205dc7a2L, 0x420f87f07ae811L,
+          0x28082423bf96c8L, 0x723998c51c6821L }
+    },
+    {
+        { 0x38ab64181f5863L, 0xd82ecbd05ff9e1L, 0x339c94ea065856L,
+          0x143054aa45156dL, 0xe6d64bf065628cL, 0xe530086a938589L,
+          0x22d3a49385d79bL, 0x0b107900ab8245L },
+        { 0xb0d80fbca387b5L, 0x698206e35551d7L, 0x199685da10bb73L,
+          0xa8e5fa89107378L, 0x36e5724d99dbbfL, 0xd67f476d581b03L,
+          0x7a15be788dd1e6L, 0x8dac8e4e5baa31L }
+    },
+    {
+        { 0x4d5d88fe170ef8L, 0xb6ba5de1e9e600L, 0x4a89d41edeabc5L,
+          0x737c66b8fac936L, 0x8d05b2365c3125L, 0x85a5cbcb61b68eL,
+          0x8fea62620a6af9L, 0x85115ded8b50ecL },
+        { 0x5430c8d6a6f30bL, 0x8bef9cf8474295L, 0x0648f5bbe77f38L,
+          0xfe2b72f9e47bd7L, 0xad6c5da93106e2L, 0x4fa6f3dfa7a6c3L,
+          0xdcd2ed8b396650L, 0x7de1cce1157ef9L }
+    },
+    {
+        { 0x70a5f6c1f241d1L, 0x6c354d8798cd5cL, 0x23c78381a729fbL,
+          0xcff8f15523cbdaL, 0x5683ff43493697L, 0xef7dbab7534f53L,
+          0xd7bd08e2243d53L, 0x6f644cbf8072a9L },
+        { 0xac960f9b22db63L, 0xa97f41723af04dL, 0x692b652d9798afL,
+          0x0e35967fedb156L, 0x14b5e50dfe6ee8L, 0x7597edeb411070L,
+          0x116f3ce442b3f9L, 0xe9b5ae81b2b6dbL }
+    },
+    {
+        { 0xf4385ee2315930L, 0xc8d029827a8740L, 0x7907a8dd934a43L,
+          0x20bc946c582191L, 0xa4acb3e6a405e7L, 0x8c1d6c843df2f5L,
+          0x9df1593991f0b5L, 0xbb9df984d9be9dL },
+        { 0x63620088e4b190L, 0xee1421eada3a88L, 0xb84f0ccf93b027L,
+          0x7a5d6678e95091L, 0x3974462f3e3704L, 0xfa6fb5ec593e98L,
+          0x44b6cf7a6477d2L, 0xe885b57b09a562L }
+    },
+    {
+        { 0x6e339e909a0c02L, 0x57afff00e75f29L, 0x797d8d6fb7db03L,
+          0xc6e11a3d25a236L, 0x643ce1c0107260L, 0xe644ec462eae1cL,
+          0x821d5b83f5a3f5L, 0xa8ad453c0579d6L },
+        { 0x6518ed417d43a4L, 0x46e76a53f87ccdL, 0xd6cbaabf9bef95L,
+          0x25688324f7cbcfL, 0x367159a08476b4L, 0x1d1b401be6d324L,
+          0x348cb98a605026L, 0x144f3fe43b6b1eL }
+    },
+    {
+        { 0xbabbd787b1822cL, 0xd34ba7e2aa51f8L, 0x086f1cc41fbea4L,
+          0x96f7eac746f3d9L, 0xad97f26281ecafL, 0x751a905a14ee2cL,
+          0xb4e7fe90d7335fL, 0x0d97b8f4892ff0L },
+        { 0xdb8a3155a5c40eL, 0x64e5de77ba567bL, 0x4f155f71eefe88L,
+          0xe2297e9fb6fbf4L, 0xfe24bf96c16be5L, 0x2251847cdd83e2L,
+          0x13ac2c85eda444L, 0x49d1b85283275fL }
+    },
+    {
+        { 0xca08731423e08fL, 0x7046bb087d2f14L, 0x876f10c3bc846cL,
+          0x2202b76358fbe3L, 0x0d4fc1c0e26ac6L, 0x1fc748bb986881L,
+          0x609e61c8384a18L, 0x28a72d60d88e00L },
+        { 0x1332a3178c6e2fL, 0x0367919b3526a4L, 0x53989e4698fe3eL,
+          0x14b1145b16a99bL, 0xef9ec80ddbb75fL, 0x76256240e53955L,
+          0x54e087a8744ae1L, 0xce50e8a672b875L }
+    },
+},
+{
+    {
+        { 0x4c88b2ba29629cL, 0x946559c7b2642fL, 0x933d432f7ebe4cL,
+          0x97109b663632c9L, 0x799b3fbe53184dL, 0xd4628710f069a6L,
+          0x0c182a13a68351L, 0x974a8399a2437aL },
+        { 0x29f19972a70278L, 0x01b98b6d9c424bL, 0xd85a60b08f4c37L,
+          0xcc3523f2b1da15L, 0xf922115ddffb0fL, 0xee0fe4dde84ae2L,
+          0x810440c55365beL, 0xd2f66391a457e8L }
+    },
+    {
+        { 0x5e6879fe2ddd05L, 0x92a7545abdfc61L, 0x7dedd63a5cede8L,
+          0x8a03b3f70df4bdL, 0xa5d1f6591f6cbbL, 0x372fde610f3fb2L,
+          0x4537f9ea9dee05L, 0x7eb85bbdf7aa50L },
+        { 0x963edf8e8c504dL, 0x53c8dcae7bdb6bL, 0xa246e4c6fedf2dL,
+          0x75533400c55bdeL, 0x2aa748d0270a54L, 0xadb6cf005860ddL,
+          0x8d314509b84763L, 0x626720deb405efL }
+    },
+    {
+        { 0xa3709ae6601328L, 0x68e94fd2ac2478L, 0x38793439d5d247L,
+          0xfa467af392c198L, 0x49e7b0d15df607L, 0x8c5812261792a8L,
+          0x79f76581d3762fL, 0xaa38895244a39dL },
+        { 0xef60af9c5cd0bcL, 0x2b0db53a33b3bbL, 0xe3e0b1f251015dL,
+          0xc608afce64489eL, 0xe52b05703651aaL, 0x1dda8b91c6f7b9L,
+          0x833f022ff41893L, 0x58eb0a0192818cL }
+    },
+    {
+        { 0x6c1300cfc7b5a7L, 0x6d2ffe1a83ab33L, 0x7b3cd019c02eefL,
+          0x6c64559ba60d55L, 0x2e9c16c19e2f73L, 0x11b24aedbe47b1L,
+          0xc10a2ee1b8153bL, 0x35c0e081e02e1aL },
+        { 0xa9f470c1dd6f16L, 0x4ea93b6f41a290L, 0xac240f825ee03fL,
+          0x6cd88adb85aabdL, 0x378a64a1be2f8fL, 0xbf254da417bac1L,
+          0x7e4e5a59231142L, 0x057aadc3b8c057L }
+    },
+    {
+        { 0x607c77a80af479L, 0xd3e01ff5ccdf74L, 0x9680aaf101b4c7L,
+          0xd2a7be12fc50a6L, 0x92a788db72d782L, 0x35daf2e4640b52L,
+          0xc170d6939e601cL, 0x16e05f57b25c2fL },
+        { 0x47a42a66fe37f8L, 0xeb74271beca298L, 0x401e11e179da16L,
+          0xfb8da82aa53873L, 0xd657d635bb4783L, 0x6847758fcea0b1L,
+          0x2f261fb0993154L, 0x868abe3592853aL }
+    },
+    {
+        { 0x1a4c54335766abL, 0xa1c84d66f4e4eaL, 0x5d737a660ba199L,
+          0x4a7b1e298b15a2L, 0x207877ffd967d3L, 0xcaec82dc262b4dL,
+          0x0b278494f2a37dL, 0x34781416ac1711L },
+        { 0x28e3df18fc6856L, 0xbec03f816d003fL, 0x2bd705bff39ebdL,
+          0x1dcb53b2d776d3L, 0xabafa7d5c0e7ceL, 0x5b9c8c24a53332L,
+          0xe9f90d99d90214L, 0x789747ec129690L }
+    },
+    {
+        { 0x94d3c3954e2dfaL, 0x919f406afb2a8fL, 0x159ef0534e3927L,
+          0xcdb4d14a165c37L, 0xa23e5e8288f337L, 0x95867c00f90242L,
+          0x2528150e34e781L, 0x104e5016657b95L },
+        { 0x695a6c9bcdda24L, 0x609b99523eb5faL, 0xcbce4f516a60f8L,
+          0xec63f7df084a29L, 0x3075ada20c811fL, 0x129a1928c716a1L,
+          0xd65f4d4cd4cd4aL, 0xe18fa9c62188beL }
+    },
+    {
+        { 0x1672757bac60e3L, 0x525b3b9577144bL, 0x38fc997887055bL,
+          0x7a7712631e4408L, 0x884f173cba2fcfL, 0x783cbdc5962ac0L,
+          0x4f3ed0a22287dcL, 0x8a73e3450e20e6L },
+        { 0xe7a1cd0d764583L, 0x8997d8d0d58ee6L, 0x0ea08e9aa13ed6L,
+          0xed478d0cf363cbL, 0x068523d5b37bf4L, 0x8b5a9e8783f13cL,
+          0xde47bbd87528a9L, 0xd6499cccaec313L }
+    },
+},
+{
+    {
+        { 0x54781bbe09859dL, 0x89b6e067f5e648L, 0xb006dfe7075824L,
+          0x17316600717f68L, 0x9c865540b4efe2L, 0xdbdb2575e30d8eL,
+          0xa6a5db13b4d50fL, 0x3b5662cfa47bebL },
+        { 0x9d4091f89d4a59L, 0x790517b550a7dcL, 0x19eae96c52965eL,
+          0x1a7b3c5b5ed7a4L, 0x19e9ac6eb16541L, 0x5f6262fef66852L,
+          0x1b83091c4cda27L, 0xa4adf6f3bf742bL }
+    },
+    {
+        { 0x8cc2365a5100e7L, 0x3026f508592422L, 0xa4de79a3d714d0L,
+          0xefa0d3f90fcb30L, 0x126d559474ada0L, 0xd68fa77c94350aL,
+          0xfa80e570c7cb45L, 0xe042bb83985fbfL },
+        { 0x51c80f1fe13dbaL, 0xeace234cf055d7L, 0x6b8197b73f95f7L,
+          0x9ca5a89dcdbe89L, 0x2124d5fdfd9896L, 0x7c695569e7ca37L,
+          0x58e806a8babb37L, 0x91b4cc7baf99ceL }
+    },
+    {
+        { 0x874e253197e968L, 0x36277f53160668L, 0x0b65dda8b95dbeL,
+          0x477a792f0872a1L, 0x03a7e3a314268dL, 0xa96c8420c805c7L,
+          0xb941968b7bc4a8L, 0x79dce3075db390L },
+        { 0x577d4ef6f4cc14L, 0x5b0d205b5d1107L, 0x64ff20f9f93624L,
+          0x0b15e315034a2fL, 0x3a0f6bb8b6f35cL, 0x0399a84e0d0ec5L,
+          0xd0e58230d5d521L, 0xdeb3da1cb1dd54L }
+    },
+    {
+        { 0x24684ae182401aL, 0x0b79c1c21a706fL, 0xe1d81f8d8998afL,
+          0xadf870f4bb069fL, 0xd57f85cf3dd7aaL, 0x62d8e06e4a40f8L,
+          0x0c5228c8b55aa1L, 0xc34244aa9c0a1aL },
+        { 0xb5c6cf968f544eL, 0xa560533de23ab7L, 0xaa5512047c690cL,
+          0x20eda5b12aaaa6L, 0xea0a49a751a6a0L, 0x6d6cfff2baa272L,
+          0x95b756ebf4c28aL, 0xd747074e6178a4L }
+    },
+    {
+        { 0xa27b453221a94bL, 0xd56ad13e635f20L, 0x03574b08c95117L,
+          0xf0ee953ed30b70L, 0xb48d733957796fL, 0xf5d958358c336bL,
+          0x6170cd882db529L, 0xcd3ef00ec9d1eaL },
+        { 0xd1bea0de4d105fL, 0xd2d670fad6a559L, 0x652d01252f9690L,
+          0x5f51fb2c2529b0L, 0x5e88bf0e89df2aL, 0x9a90684cd686e4L,
+          0xf519ccd882c7a1L, 0x933a0dfc2f4d37L }
+    },
+    {
+        { 0x0720a9f3f66938L, 0x99356b6d8149dfL, 0xb89c419a3d7f61L,
+          0xe6581344ba6e31L, 0xd130561ab936c8L, 0x0625f6c40dbef1L,
+          0x7b2d6a2b6bb847L, 0x3ca8b2984d506bL },
+        { 0x6bf729afb011b0L, 0x01c307833448c9L, 0x6ae95080837420L,
+          0xf781a8da207fb8L, 0xcc54d5857562a9L, 0xc9b7364858c5abL,
+          0xdfb5035359908fL, 0x8bf77fd9631138L }
+    },
+    {
+        { 0xf523365c13fbb1L, 0x88532ea9993ed5L, 0x5318b025a73492L,
+          0x94bff5ce5a8f3cL, 0x73f9e61306c2a0L, 0x00abbacf2668a3L,
+          0x23ce332076237dL, 0xc867f1734c0f9bL },
+        { 0x1e50995cfd2136L, 0x0026a6eb2b70f8L, 0x66cb1845077a7dL,
+          0xc31b2b8a3b498eL, 0xc12035b260ec86L, 0x1cbee81e1b3df0L,
+          0xfd7b8048d55a42L, 0x912a41cf47a8c8L }
+    },
+    {
+        { 0xab9ffe79e157e3L, 0x9cfe46d44dc158L, 0x435551c8a4a3efL,
+          0x638acc03b7e3a8L, 0x08a4ebd49954a7L, 0x295390c13194f7L,
+          0x3a2b68b253892aL, 0xc1662c225d5b11L },
+        { 0xcfba0723a5d2bbL, 0xffaf6d3cc327c9L, 0x6c6314bc67e254L,
+          0x66616312f32208L, 0xf780f97bea72e1L, 0x495af40002122fL,
+          0x3562f247578a99L, 0x5f479a377ce51eL }
+    },
+},
+{
+    {
+        { 0x91a58841a82a12L, 0xa75417580f3a62L, 0x399009ff73417aL,
+          0x2db1fb90a8c5cdL, 0x82c8912c046d51L, 0x0a3f5778f18274L,
+          0x2ad0ede26ccae2L, 0x7d6bd8b8a4e9c2L },
+        { 0xaa0d7974b3de44L, 0xf8658b996ac9bbL, 0x31e7be25f6c334L,
+          0x23836ce4df12c9L, 0x029027b59eb5c9L, 0x2f225315b8649dL,
+          0xa0fdf03d907162L, 0x101d9df9e80226L }
+    },
+    {
+        { 0xf12037a9a90835L, 0xd2d0882f0222a7L, 0xeaf8d40c3814e2L,
+          0xa986dc68b8146bL, 0x147a3318504653L, 0x734e0032feaf67L,
+          0x6f27bbf602bec5L, 0xa1e21f16a688f3L },
+        { 0x5a8eeab73c4ae5L, 0x4dbaddbe70b412L, 0x871cebacfd2af1L,
+          0x18603827d7a286L, 0x024059db5bb401L, 0x2557c093c39b73L,
+          0xfc5a7116681697L, 0xf881c0f891b57cL }
+    },
+    {
+        { 0x3c443f18ea191aL, 0x76faa58d700ad0L, 0x6fe6cfabe7fcbfL,
+          0xaefc5288990ef7L, 0x44e30fa80004ccL, 0xc744adc6d8ef85L,
+          0xafcd931912df70L, 0xf62a9d1572a6d8L },
+        { 0x47158a03219f27L, 0x76fb27ead73136L, 0x41bb2adcc2d614L,
+          0x8858cb9de1ec21L, 0xab402c45f15866L, 0x6675d5bbc82bbfL,
+          0x4ee9dd6f1b28d3L, 0x875884fe373c17L }
+    },
+    {
+        { 0x17806dd2a67d36L, 0xaa23a8632c9ec1L, 0xd914126fc1ee55L,
+          0xbf8f7bd653701bL, 0x9b0111aea71367L, 0x61fd4aba98e417L,
+          0xeb45298561c5a5L, 0x2187b0ae7af394L },
+        { 0x71f12db1616ddeL, 0x061760907da7b4L, 0x414d37602ddb04L,
+          0x1100be7286fb58L, 0xd7cf88d6f0d95bL, 0x8539d23746d703L,
+          0xdccc9d64e23d73L, 0xaeef1d2ec89680L }
+    },
+    {
+        { 0x82ccf1a336508dL, 0xa128c1f5bad150L, 0x551d8c029a188dL,
+          0xef13dd4771404fL, 0xdd67696c37b993L, 0x428c0e20dddad2L,
+          0x222278d038c94cL, 0x1a24a51078e3f2L },
+        { 0xd297fe6edb0db9L, 0x00988d28251a87L, 0xbb946f8bfaa0d7L,
+          0x380f7b9df45ea0L, 0x8526415afccf5eL, 0x909bfbfe9ec7bcL,
+          0x2ed7093124755cL, 0x436802889404e2L }
+    },
+    {
+        { 0x21b9fa036d9ef1L, 0xfd64b7ce433526L, 0xd9d7eb76544849L,
+          0x201620cd5b54b3L, 0x25fab3dbb61159L, 0x90d4eb0c53e0d3L,
+          0xba098319e74772L, 0x8749658ec1681cL },
+        { 0xa354349fec316bL, 0x639a9b1a743ea2L, 0x2e514ca37c50e6L,
+          0x9f4a4fddbaf6c5L, 0x0df87ef6f511c9L, 0xadd4cef0c00d95L,
+          0x401c0ebaa1433fL, 0x3c3a59ebb38af9L }
+    },
+    {
+        { 0x8706245f0e7dcaL, 0xad238cd3fb29caL, 0x03304439b7d8f0L,
+          0xfdcd6e6154f495L, 0xc67e24a7d4ad09L, 0x1b209e85438390L,
+          0xf893b81b0c211eL, 0x1aa86f07e11e36L },
+        { 0x2cca3ffedea8b1L, 0x7eedd073b306cdL, 0x78e37bc12ee222L,
+          0x257870bbc42a1dL, 0x5fb2bb91fbd397L, 0x470247009d6c60L,
+          0x11748a320bdc36L, 0x3ff24dc04280e8L }
+    },
+    {
+        { 0x0eb1c679839b52L, 0x5bcca27acfbd32L, 0xb506c1674898e3L,
+          0x37d662e2489e5eL, 0x8dc0731f694887L, 0x571149ef43f1dcL,
+          0x6430a3766d63dcL, 0x0d2640eb50dd70L },
+        { 0x2b561493b2675bL, 0x1b4806588c604fL, 0x55c86a8aafbabcL,
+          0xa7b9447608aabaL, 0xa42f63504cad8cL, 0x0f72b1dcee7788L,
+          0x1d68374755d99aL, 0xd7cdd8f5be2531L }
+    },
+},
+{
+    {
+        { 0x67873bdbcdfee1L, 0xa5a0c0afcd0a3fL, 0x59389f93cfa3d4L,
+          0x14e945ce1c865cL, 0x62d2f8e1d588ccL, 0xfd02f8a8e228b4L,
+          0x208f791b42b649L, 0x0e0dff1ab397adL },
+        { 0x30ac3d90bc6eb1L, 0xf14f16a5f313bbL, 0x70fa447e2a0ad2L,
+          0x6e406855a0db84L, 0xd52282be32e1e7L, 0x315a02a15ca330L,
+          0x9a57a70867c2feL, 0x55f07650054923L }
+    },
+    {
+        { 0x2d729f6c0cf08fL, 0x6b80138ebaf57fL, 0x6285bcc0200c25L,
+          0xee845192cd2ac7L, 0x28fce4d922778aL, 0x761325ccd1011cL,
+          0xd01f2475100e47L, 0xc7a1665c60d8e1L },
+        { 0x950966d7ceb064L, 0x0a88e8578420dbL, 0x44f2cfce096f29L,
+          0x9d9325f640f1d2L, 0x6a4a81fd2426f1L, 0x3ed6b189c905acL,
+          0xba3c0e2008854dL, 0x1df0bd6a0d321bL }
+    },
+    {
+        { 0x0117ad63feb1e7L, 0xa058ba2f1ae02fL, 0x5eee5aa31b3f06L,
+          0x540d9d4afacd4dL, 0x38992f41571d91L, 0xef2738ebf2c7deL,
+          0x28bfcab92a798dL, 0x37c7c5d2286733L },
+        { 0xb99936e6470df0L, 0x3d762d58af6a42L, 0xa8c357ac74eec5L,
+          0x9917bebf13afbcL, 0x28f0941f2dc073L, 0x306abf36ce7df7L,
+          0xa3c5f6fd6973c8L, 0x640209b3677632L }
+    },
+    {
+        { 0xee872a2e23aef7L, 0xb497b6feb9b08eL, 0xfb94d973f33c63L,
+          0x9ea1ff42b32315L, 0x537b49249a4166L, 0x89c7fe6ab4f8beL,
+          0xf68007fdad8f0fL, 0xe56ef0b71b8474L },
+        { 0x478b2e83f333f9L, 0x144e718b2607f5L, 0x13aa605a4c7ab5L,
+          0xfc1fc991d0730dL, 0xe7a04375ab3ea1L, 0xc59986a306d8d3L,
+          0x24f6111702a8b1L, 0x7741394e040ad2L }
+    },
+    {
+        { 0x34c6a2560723a7L, 0x8aabd0df4ea691L, 0x9d676a55d7497fL,
+          0x12c09577d91fa4L, 0x581c7a86479284L, 0xa54f3daf4fd449L,
+          0x2f89f3c4ef44cfL, 0xfc266b5c9ec97cL },
+        { 0xfcd3fbe88b142aL, 0x9f3109f4bd69c1L, 0x08839c0b5f5a6aL,
+          0x63ca8502e68303L, 0x2f0628dbba0a74L, 0x743cccf5d56b54L,
+          0xbd4b06613e09fdL, 0x7a8415bde2ba3eL }
+    },
+    {
+        { 0x2234a3bc076ab2L, 0xd6953e54977a98L, 0xc12215831ebe2eL,
+          0x632145fbad78e2L, 0xd7ba78aa5c4b08L, 0x6f4ea71998e32aL,
+          0x25900d23485a63L, 0x97ac6286a5176fL },
+        { 0x5df91181093f7bL, 0x2bf9829c844563L, 0x525d99d6272449L,
+          0x4281cb5b5c8a18L, 0x35df2780544a08L, 0xf4c3d2dbaeb8f4L,
+          0xc7ff3175230447L, 0x6b4d7645d2fbffL }
+    },
+    {
+        { 0x4837f802b0c9cbL, 0xb65f8168ce8418L, 0xdf66ea99fc1428L,
+          0x9788ee804ea7e8L, 0x9eae9008334e3cL, 0xbc91058d6ba1b6L,
+          0x634aba1d7064b6L, 0x12d9bb3397b368L },
+        { 0x0645c85c413aa8L, 0xb09dea6ac6b5e3L, 0x29a620d289a50bL,
+          0x104db3bbbcceb1L, 0x42e479287b3309L, 0xdfc373eec97f01L,
+          0xe953f94b93f84eL, 0x3274b7f052dfbfL }
+    },
+    {
+        { 0x9d5670a1bd6fa9L, 0xec42fc9db6c4d4L, 0xaecd4ed1b42845L,
+          0x4eed90e1b03549L, 0xeb3225cbbab1faL, 0x5345e1d28a2816L,
+          0x3741cfa0b77d2aL, 0x712b19f7ea8caaL },
+        { 0x42e6844661853eL, 0x4cf4126e4a6e5dL, 0x196a9cfc3649f6L,
+          0x06621bcf21b6b1L, 0x887021c32e29eaL, 0x5703aeb8c5680fL,
+          0x974be24660f6d7L, 0xaf09badc71864eL }
+    },
+},
+{
+    {
+        { 0x3483535a81b6d3L, 0x19e7301ca037dcL, 0x748cab763ddfebL,
+          0xe5d87f66f01a38L, 0xbba4a5c2795cd6L, 0x411c5d4615c36cL,
+          0xff48efc706f412L, 0x205bafc4b519dfL },
+        { 0xfcaa5be5227110L, 0x7832f463ad0af0L, 0x34ef2c42642b1bL,
+          0x7bbef7b072f822L, 0x93cb0a8923a616L, 0x5df02366d91ba7L,
+          0x5da94f142f7d21L, 0x3478298a14e891L }
+    },
+    {
+        { 0xad79a0fc831d39L, 0x24d19484803c44L, 0x4f8a86486aeeb2L,
+          0x0ca284b926f6b9L, 0x501829c1acd7cdL, 0x9f6038b3d12c52L,
+          0x77223abf371ef5L, 0x2e0351613bf4deL },
+        { 0x7a5a4f2b4468ccL, 0xdcea921470ae46L, 0xf23b7e811be696L,
+          0xe59ad0d720d6fbL, 0x9eacac22983469L, 0x4dd4110c4397eeL,
+          0x4ef85bdcbe2675L, 0xe4999f7aa7c74bL }
+    },
+    {
+        { 0x031838c8ea1e98L, 0x539b38304d96a2L, 0x5fbdef0163956eL,
+          0x6bd4d35ce3f52aL, 0xe538c2355e897fL, 0x6078d3a472dd3fL,
+          0x590241eca9f452L, 0x2bc8495fd7fc07L },
+        { 0x23d0c89ead4c8cL, 0x1ea55a9601c66eL, 0x41493c94f5b833L,
+          0xc49a300aa5a978L, 0xc98bdc90c69594L, 0x4e44cedccbdc8cL,
+          0xb0d4e916adccbfL, 0xd56e36b32c37aeL }
+    },
+    {
+        { 0x052bd405b93152L, 0x688b1d44f1dbfaL, 0xe77ba1abe5cc5fL,
+          0x11f8a38a6ac543L, 0x3355fd6e4bb988L, 0xdf29c5af8dffb4L,
+          0x751f58981f20eeL, 0x22a0f74da9b7fbL },
+        { 0xec8f2bc6397b49L, 0xff59fc93639201L, 0xb7f130aa048264L,
+          0xe156a63afdc4ccL, 0x0fd7c34b13acafL, 0x87698d40cb4999L,
+          0x6d6ecae7f26f24L, 0xae51fad0f296e2L }
+    },
+    {
+        { 0xd0ad5ebdd0f58dL, 0x6ec6a2c5c67880L, 0xe1ce0349af1e0fL,
+          0x08014853996d32L, 0x59af51e5e69d20L, 0x0ef743aaa48ecfL,
+          0x8d3d2ea7dafcb0L, 0x4ac4fad89189b6L },
+        { 0x92d91c2eae97f1L, 0xef5eca262b4662L, 0x440b213b38b10aL,
+          0xec90187fc661daL, 0x85f3f25f64cf8dL, 0xcee53ca457ad1bL,
+          0x8deed4bf517672L, 0x7706fb34761828L }
+    },
+    {
+        { 0x1577d9117494feL, 0x52d29be2fd7239L, 0x9a0eef00186d37L,
+          0x241d0f527fe108L, 0x42824bae6fb59fL, 0xb8d33df0d48c25L,
+          0xfffdb0a47af4b0L, 0x534c601073b0b6L },
+        { 0xe6df35951c033bL, 0x3e1002b86c0f94L, 0xa7cb55548fb9b6L,
+          0x999818ba7bbff8L, 0xe4ba3d684d8bf2L, 0x53dbb326358f0aL,
+          0xeebc1e2f2568e8L, 0xc6917ebb3e0f68L }
+    },
+    {
+        { 0xbe1bbfc19f8d13L, 0xc3951b62d4795cL, 0x9371c49ed535a9L,
+          0x77c389f68cebeaL, 0xfc1a947a141d0eL, 0x4b48d7ade44f8bL,
+          0x3db1f058580a26L, 0xeed1466258b5fcL },
+        { 0x5daa4a19854b21L, 0x5bfa46f1ab1eadL, 0xc152e3559957ebL,
+          0xdc84277ea48adaL, 0x68709cffc169b5L, 0xde50ce3720e617L,
+          0xe42f262dd9a832L, 0xddffd4d2d6ce29L }
+    },
+    {
+        { 0xd5ba5578fa0a56L, 0x0d7d0f1fafaf4cL, 0x7666e4138b63edL,
+          0x04e65135d87f02L, 0xdca8866c958f32L, 0xaa8486d3ce2686L,
+          0xe3785caf1cbcd3L, 0x8a9b11403c8335L },
+        { 0x5c1dca22e0ef60L, 0x775af5b7d3fb20L, 0xe690ffc2b373a8L,
+          0x30fe15d28330e6L, 0x8a1022bdd0f393L, 0x6bd7364966a828L,
+          0x8d4b154949208aL, 0xfb38c6bb9d9828L }
+    },
+},
+{
+    {
+        { 0x6d197640340ac2L, 0x969f473ecab5ffL, 0xead46f7c458e42L,
+          0x168646a1d00eedL, 0xf70c878e0ce0cfL, 0xa7291d38d8d15aL,
+          0x92cf916fdd10ccL, 0x6d3613424f86d5L },
+        { 0xba50d172d5c4b4L, 0xe0af5024626f15L, 0x76f3809d76098aL,
+          0x433dc27d6caaa8L, 0x72dc67a70d97a7L, 0x935b360f5c7355L,
+          0xdbaac93179bb31L, 0x76738487ed1a33L }
+    },
+    {
+        { 0x8d1ca668f9fa0dL, 0x4ed95d8a02f2bfL, 0xd19fc79f630d7bL,
+          0x0448ec4f46fa51L, 0xb371dd8623bf3fL, 0xe94fabcd650e94L,
+          0x3af3fcacd90a70L, 0x0f720c403ce3b7L },
+        { 0x590814cd636c3bL, 0xcf6928d4469945L, 0x5843aaf484a4c6L,
+          0xb5a4c1af9b4722L, 0x25116b36cfb2f9L, 0xf248cf032c2640L,
+          0x8cd059e27412a1L, 0x866d536862fc5dL }
+    },
+    {
+        { 0x156e62f6de4a2eL, 0x0365af7aafcc78L, 0x65c861819e925eL,
+          0x4db5c01f8b2191L, 0x1fd26d1ad564faL, 0x16bbc5319c8610L,
+          0x0718eef815f262L, 0x8684f4727f83d1L },
+        { 0xa30fd28b0f48dbL, 0x6fef5066ab8278L, 0xd164e771a652dfL,
+          0x5a486f3c6ebc8cL, 0xb68b498dc3132bL, 0x264b6efd73323fL,
+          0xc261eb669b2262L, 0xd17015f2a35748L }
+    },
+    {
+        { 0x4241f657c4bb1dL, 0x5671702f5187c4L, 0x8a9449f3973753L,
+          0x272f772cc0c0cdL, 0x1b7efee58e280cL, 0x7b323494b5ee9cL,
+          0xf23af4731142a5L, 0x80c0e1dd62cc9eL },
+        { 0xcbc05bf675ffe3L, 0x66215cf258ce3cL, 0xc5d223928c9110L,
+          0x30e12a32a69bc2L, 0x5ef5e8076a9f48L, 0x77964ed2329d5fL,
+          0xdf81ba58a72cf2L, 0x38ea70d6e1b365L }
+    },
+    {
+        { 0x1b186802f75c80L, 0x0c153a0698665aL, 0x6f5a7fe522e8ddL,
+          0x96738668ddfc27L, 0x7e421d50d3bdceL, 0x2d737cf25001b2L,
+          0x568840f0e8490cL, 0xea2610be30c8daL },
+        { 0xe7b1bc09561fd4L, 0xeda786c26decb0L, 0x22369906a76160L,
+          0x371c71478a3da3L, 0x1db8fce2a2d9bfL, 0x59d7b843292f92L,
+          0x8097af95a665f9L, 0x7cb4662542b7a9L }
+    },
+    {
+        { 0xa5c53aec6b0c2fL, 0xc4b87327312d84L, 0xfc374cbc732736L,
+          0xa8d78fe9310cc0L, 0xd980e8665d1752L, 0xa62692d6004727L,
+          0x5d079280146220L, 0xbd1fedb860fea5L },
+        { 0xcbc4f8ab35d111L, 0x5ba8cdf3e32f77L, 0xd5b71adb614b93L,
+          0x7b3a2df2f8808dL, 0x09b89c26ef2721L, 0x55a505447c3030L,
+          0x21044312986ae6L, 0x427a0112367d4cL }
+    },
+    {
+        { 0xe9fe256c1942d8L, 0x9e7377d96e3546L, 0x43e734cb0c1744L,
+          0x5f46821211fbcaL, 0x44f83dc32b6203L, 0x84513086ad1d96L,
+          0x54dd5192fbb455L, 0xc2a18222f10089L },
+        { 0x01055a21855bfaL, 0x9e6d7b477078b4L, 0x3f8df6d30cea0eL,
+          0x81c215032973f7L, 0x17dd761c0b3d40L, 0x040424c50d0abeL,
+          0x5599413783deabL, 0xde9271e8f3146fL }
+    },
+    {
+        { 0x5edfd25af4a11dL, 0x3a3c5307846783L, 0xb20086873edd31L,
+          0x74e00ecfe0eef8L, 0xba65d2f3dd78c7L, 0xab1364371999f1L,
+          0xfa9be5dde9a7e8L, 0xeb146ce87a8609L },
+        { 0x76afd6565353e9L, 0xfa7023dd51ba1cL, 0x7a09f2237ede4fL,
+          0xca085760ba7a1bL, 0xd973882b99950aL, 0xe894266ea5057aL,
+          0xd01c4217f55e49L, 0x69cfb9c5555679L }
+    },
+},
+{
+    {
+        { 0x67867e7c5d631aL, 0x1de88c55bcf47bL, 0x8366d06afd1352L,
+          0xd7dbdef6e20337L, 0xb0f9e2f1253ec7L, 0x1be984510ad240L,
+          0x63ec533f4a6118L, 0xd5e4c5b96ce633L },
+        { 0x1d0b6c34df4a25L, 0xef9486a5a1b554L, 0x2f0e59e47b6ef3L,
+          0x4d8042f2ff84d7L, 0x3e74aa3da359c9L, 0x1baa16fd21c160L,
+          0xb4cff210191cbaL, 0x50032d8ebc6472L }
+    },
+    {
+        { 0xb6833e01fc1b13L, 0x8a8b7ba1a5ad8fL, 0xc0cafa2622b820L,
+          0xc6663af738ed20L, 0xd8944868b18f97L, 0xcf0c1f9774fbe4L,
+          0xeedd4355be814fL, 0xd81c02db57e543L },
+        { 0x5e32afc310bad8L, 0x065bc819b813d1L, 0x8efc5fc3142795L,
+          0x5006514732d59cL, 0x91e39df2b5a3ceL, 0x2ad4477faf4204L,
+          0x1a96b184d9bd4fL, 0xc3fee95a4d9c07L }
+    },
+    {
+        { 0xfac7df06b4ba61L, 0xa6ed551061aaefL, 0x35aa2d6133f609L,
+          0x420cfba20ed13dL, 0x861c63eea03d0cL, 0x75f0c56f936d6eL,
+          0xa25f68f3d9a3d5L, 0xba0b7fecd9f66eL },
+        { 0x292e1354680772L, 0x6f6a2dba73f405L, 0xca6add924ea9e4L,
+          0x81cfd61268daaaL, 0x7a4cb6ce6f147aL, 0x8ec3454bded8f5L,
+          0xc8a893b11d61cbL, 0x2256ffc7656022L }
+    },
+    {
+        { 0x6b33271575cb78L, 0x560d305adcd23eL, 0xeedbd3ad6d834bL,
+          0x614a64a5a31e27L, 0xe40b47647ee0c8L, 0x8ef4ff68bd7c2cL,
+          0xa5297fc0b77727L, 0x8759208baf88adL },
+        { 0x86cfe64918df68L, 0x9d60a73cdd882eL, 0x546b642b953014L,
+          0xbaceae38bbef55L, 0xdf58e43f1c3467L, 0x99a83fee9f9babL,
+          0xcd52cbf57a4a8bL, 0xf744e968ae36ecL }
+    },
+    {
+        { 0xb945869a607124L, 0x810dbe9440e6f6L, 0x9911e60738e381L,
+          0x51df68c343b80bL, 0xe424336f7a3f39L, 0x2d32acb989015cL,
+          0xa69b14931019e8L, 0x8a31a38ec12f93L },
+        { 0x0d0d36997c916aL, 0xdc95f3b8885372L, 0xcf1a2613549040L,
+          0x60f6f5eabe95a2L, 0xa909e9fe141325L, 0x7d598f2355c865L,
+          0x70c6442931a9c9L, 0x2354a85b423850L }
+    },
+    {
+        { 0x4cdd22497f9619L, 0x4776fffc22162eL, 0xee5ec330cd31c2L,
+          0x7c04c10f209bb8L, 0x35bbfde579e211L, 0x0e3832515cdfc2L,
+          0x657e6d3e26ffa7L, 0xc66a7c3c65c604L },
+        { 0x322acd7b45e567L, 0x1589cf0296db9bL, 0x1fd0bd3ba1db73L,
+          0xe8826109337a40L, 0xf505a50b3035c7L, 0x4d5af066ed08d7L,
+          0xb3c376b5eda400L, 0x9c7b7001944748L }
+    },
+    {
+        { 0xd76832570c3716L, 0xda62af0dd540e0L, 0x76b155d6580feaL,
+          0x4f42acc32b5464L, 0x881bb603f5b72bL, 0x09c130ee68b9baL,
+          0x37ede3b5c50342L, 0xce61a9cfd15e7dL },
+        { 0xfff1d8572605d0L, 0x62ac2d3062abc2L, 0xa85e02efbe43ddL,
+          0x859d2baa947020L, 0x2ebc8a9111c20bL, 0x7f590a7a656f66L,
+          0x0e1384316b21a6L, 0x29b30c500c7db6L }
+    },
+    {
+        { 0x61e55e2906b8deL, 0x6a97e96949974dL, 0x24b52b526eef67L,
+          0x512f5361aa595aL, 0x81cc7b83c48fcbL, 0xa64af2328115adL,
+          0x9edf6f93d44b8eL, 0x68d7f7c1fe22e3L },
+        { 0x2b2116a520d151L, 0x66a0b7d6aa3efbL, 0x48ae70a9b0f791L,
+          0xcf12174037db88L, 0x36868cd317d9f3L, 0xb57305922fc344L,
+          0xbaa852646a5d23L, 0xad6569137fc10dL }
+    },
+},
+{
+    {
+        { 0xcf8e5f512c78d5L, 0xeb94d98805cdbdL, 0xad1dcdf2ab50b5L,
+          0xf33c136f33cd31L, 0x0d6226b10aeff5L, 0xf7ff493f2f8fc5L,
+          0x7e520d4df57165L, 0x41fbae505271a7L },
+        { 0x72c898776480baL, 0x260835925f4523L, 0xed36b8d49f5f01L,
+          0x3bc1dcef3d49ebL, 0x30c1c1a4940322L, 0x78c1cda7e0f731L,
+          0x51f2dc86d05a31L, 0x57b0aa807f3522L }
+    },
+    {
+        { 0x7ab628e71f88bcL, 0xcf585f38018f21L, 0xdbbe3a413d64f6L,
+          0x0f86df1ec493a5L, 0x8355e6c7725de9L, 0x3954ffee00fe1eL,
+          0xbb8978f9924e32L, 0x1c192987812714L },
+        { 0x7c4ce3eaabca8bL, 0xf861eb59bf7019L, 0x31a84fc682e541L,
+          0x2307ca9acd1b92L, 0x6f8b6ce4bf2842L, 0xde252accb9f9a9L,
+          0x7f0611d93c46d1L, 0x8e2bd80751dc98L }
+    },
+    {
+        { 0xf2fd8fbe27d54bL, 0x2a1e37ec248071L, 0x2fcc888ab8f49aL,
+          0x42c62a3c18a9e5L, 0xe30290870b2446L, 0x90277fac5ac55dL,
+          0x8d97d56d6dde41L, 0xf4cf8a95db04feL },
+        { 0x3e280f5d30d077L, 0x2c903073cb3293L, 0xe0be2ac24eb0ddL,
+          0xa2d1a498bcb4f0L, 0x16db466cd0cd45L, 0x3b28aa79a80232L,
+          0xdd7e52f17b008eL, 0x20685f2868e4daL }
+    },
+    {
+        { 0x0a68c147c7a486L, 0xd8ef234c429633L, 0x470667bffe7506L,
+          0x55a13c88828d51L, 0x5f327412e44befL, 0x537d92a5929f92L,
+          0x0a01d5b31c5cd5L, 0xb77aa7867eb3d7L },
+        { 0x36ec45f8b82e4dL, 0x6821da0b37b199L, 0x8af37aad7fa94eL,
+          0xf0206421085010L, 0x9b886787e56851L, 0x35f394452948ceL,
+          0x125c2baafc1361L, 0x8a57d0e453e332L }
+    },
+    {
+        { 0xefe99488043664L, 0xb8b8509db1aa55L, 0x1a2e5a9332523fL,
+          0x5e255dd1045c0fL, 0xe68dd8a7ae7180L, 0x55f1cf345bf532L,
+          0xe00722ee63a716L, 0xd1c21386116bacL },
+        { 0x626221f1c6d1f4L, 0x240b8303773278L, 0xe393a0d88def16L,
+          0x229266eca0495cL, 0x7b5c6c9d3e4608L, 0xdc559cb7927190L,
+          0x06afe42c7b3c57L, 0x8a2ad0bb439c9bL }
+    },
+    {
+        { 0xd7360fbffc3e2fL, 0xf721317fbd2e95L, 0x8cacbab5748e69L,
+          0x7c89f279054bb9L, 0xcbe50faaa86881L, 0x7aa05d375206e4L,
+          0x1ea01bcc752c66L, 0x5968cde1f2c2bcL },
+        { 0x487c55f09a853eL, 0x82cbef1e09204bL, 0xad5c492abd8670L,
+          0x7175963f12dcb3L, 0x7a85762bf6aa06L, 0x02e5697f8d5237L,
+          0xccf7d1937c6157L, 0x3b14ca6c2fd59cL }
+    },
+    {
+        { 0x5e610d81b9f77fL, 0x85876d0051b02fL, 0x5d81c63b8020ddL,
+          0xd0b4116d6ce614L, 0x91810e5aa8bf0cL, 0xf27f91fcbf8c66L,
+          0x2e5dc5f38480aeL, 0x0a13ffebec7633L },
+        { 0x61ff6492bf6af8L, 0xe6aef2d641f827L, 0xad5708a5de5f04L,
+          0xe5c3a80cdfee20L, 0x88466e268fcfa2L, 0x8e5bb3ad6e1d7bL,
+          0xa514f06ed236b8L, 0x51c9c7ba5f5274L }
+    },
+    {
+        { 0xa19d228f9bc3d8L, 0xf89c3f03381069L, 0xfee890e5c3f379L,
+          0x3d3ef3d32fb857L, 0x39988495b418ddL, 0x6786f73c46e89aL,
+          0x79691a59e0f12fL, 0x76916bf3bc022bL },
+        { 0xea073b62cd8a0aL, 0x1fbedd4102fdbcL, 0x1888b14cb9d015L,
+          0x98f2cfd76655f7L, 0xb9b591059f0494L, 0xa3dbbe1e6986a3L,
+          0xef016a5eaf2b04L, 0xf671ba7cd2d876L }
+    },
+},
+{
+    {
+        { 0x1dae3bf1ae05e9L, 0x6a029961f21fefL, 0x95df2b97aec3c6L,
+          0x9abbc5ad83189bL, 0xaf994af2d13140L, 0xc3f884686aa406L,
+          0xcd77e5075284c5L, 0x1c1e13d2a9a4d7L },
+        { 0x7f8815d744b89dL, 0xb1891332ba673eL, 0x55ea93cd594570L,
+          0x19c8a18d61b041L, 0x938ebaa8d2c580L, 0x9b4344d05ba078L,
+          0x622da438eaf9b7L, 0x809b8079fea368L }
+    },
+    {
+        { 0x3780e51c33b7a2L, 0xd7a205c387b1c8L, 0x79515f84be60e4L,
+          0xde02a8b1e18277L, 0x4645c96f0d9150L, 0x45f8acbe0b3fd1L,
+          0x5d532ba9b53ac3L, 0x7984dcdb0557c9L },
+        { 0x5ae5ca68a92f01L, 0xd2fbb3c9d569caL, 0x668cc570c297c1L,
+          0xa4829436295e89L, 0xf646bc1a33ad40L, 0x066aaa4c3f425dL,
+          0x23434cdd005de2L, 0x5aca9e9db35af4L }
+    },
+    {
+        { 0x2bca35c6877c56L, 0xab864b4f0ddd7dL, 0x5f6aa74404f46cL,
+          0x72be164539c279L, 0x1b1d73ee0283cfL, 0xe550f46ad583d9L,
+          0x4ac6518e739ad1L, 0x6b6def78d42100L },
+        { 0x4d36b8cfa8468dL, 0x2cb37735a3d7b8L, 0x577f86f5016281L,
+          0xdb6fe5f9124733L, 0xacb6d2ae29e039L, 0x2ab8330580b8a1L,
+          0x130a4ac643b2d0L, 0xa7996e35e6884eL }
+    },
+    {
+        { 0x6fb627760a0aa8L, 0xe046843cbe04f0L, 0xc01d120e6ad443L,
+          0xa42a05cabef2fcL, 0x6b793f112ff09cL, 0x5734ea8a3e5854L,
+          0xe482b36775f0adL, 0x2f4f60df864a34L },
+        { 0xf521c5884f2449L, 0x58734a99186a71L, 0x157f5d5ac5eaccL,
+          0x858d9a4248ee61L, 0x0727e6d48149c3L, 0xd5c3eaaac9ec50L,
+          0xa63a64a20ee9b5L, 0x3f0dfc487be9deL }
+    },
+    {
+        { 0x836349db13e3f4L, 0xebdd0263e9316dL, 0x3fd61e8324fd6cL,
+          0x85dddfa0964f41L, 0x06e72de52add1bL, 0xb752cff8c4a9e2L,
+          0x53b0894fdf09f7L, 0xd5220ab0bc24fdL },
+        { 0x8442b35fb1981aL, 0xa733a373edd701L, 0x42b60c3d0ef089L,
+          0xa1b16ec46e7bcaL, 0xc0df179a09aaf4L, 0xcd4f187638f3a1L,
+          0x9af64f79eab1c2L, 0x86fed79d1d78e3L }
+    },
+    {
+        { 0x42c8d86fe29980L, 0x6657b816575660L, 0x82d52c680f92caL,
+          0x8587af102d42beL, 0xb5151316e8bdf0L, 0x706e2d9c333495L,
+          0xd53601a9673064L, 0x27b1fbb8219099L },
+        { 0x3f0929d705f7c8L, 0xff40b10f3d6e6fL, 0x673c703026af5cL,
+          0x2c1dce4e25a422L, 0x5348bd73dad8b6L, 0xc39b6b6be2c329L,
+          0x47854ffb921084L, 0xb347b8bb391f20L }
+    },
+    {
+        { 0x79fc841eb9b774L, 0xf32da25b4b6c1dL, 0xcbba76bfe492cbL,
+          0x76c51fcd623903L, 0x114cf6fcf0705aL, 0x6b720497815dafL,
+          0x630b362473382eL, 0xbf40c3a9704db5L },
+        { 0xa8a9ddcc5456ebL, 0x2b4472a72f2dc1L, 0x9874444d6d6ef3L,
+          0x27e8d85a0ba5edL, 0x5d225b4194849fL, 0xe852cd6ebaa40dL,
+          0xb669c248d4bf3fL, 0xa8601eb2343991L }
+    },
+    {
+        { 0x8a0485459502d3L, 0xcab27eee269a7bL, 0x41793074875adaL,
+          0x179e685e2405f9L, 0x0d7b6987b28963L, 0x80c9db8422a43eL,
+          0xf5ff318a0f43eeL, 0x7a928054ba7aa7L },
+        { 0xa5c79fe0c0834eL, 0x837ca0d1f849ecL, 0xfe0d7fa628ab7bL,
+          0x94bcb956edd19aL, 0xa18bc932226fbfL, 0x2795379aad54a3L,
+          0xceeacf8371129eL, 0x65ca57fa588be5L }
+    },
+},
+{
+    {
+        { 0x7a578b52caa330L, 0x7c21944d8ca34aL, 0x6c0fbbb6447282L,
+          0xa8a9957f90b2e5L, 0xbbe10666586b71L, 0x716a90249138a2L,
+          0x2fa6034e7ed66dL, 0x56f77ed2b9916aL },
+        { 0x69f1e26bddefb3L, 0xa4978098c08420L, 0xc3377eb09bc184L,
+          0x796ce0cbe6dadeL, 0x3be0625d103bbbL, 0x01be27c992685cL,
+          0xc0e25597755f9fL, 0x165c40d1c0dbfaL }
+    },
+    {
+        { 0xc63a397659c761L, 0x10a0e5b630fbadL, 0xf21e8a6655ac56L,
+          0xe8580fac1181e2L, 0xbfc2d9c0a84b5cL, 0x2cdbaff7afd5d1L,
+          0x95f1182f61e85aL, 0x1173e96719eaf4L },
+        { 0xc06d55ec6de8b9L, 0x1b4c8ebafcbcaaL, 0x52af5cbbc2bbcdL,
+          0x564fab877bcd10L, 0xfd53a18ae85a6eL, 0x225785994c712fL,
+          0x29b11d71352121L, 0xab1cb76c40491aL }
+    },
+    {
+        { 0xb4e8ca8ce32eb4L, 0x7e484acb250b49L, 0x062c6f7a3e31a2L,
+          0x497fd83625d1fcL, 0x98f821c362dda7L, 0xcae1f8f6be3111L,
+          0x9077e955d4fa42L, 0xa589971a65855aL },
+        { 0xda6321d28832a9L, 0xf9ef5dc3936e9eL, 0xa37f117c9797efL,
+          0x0eb3c80db581beL, 0x207c5c4baa0002L, 0xc0401b5f38faa0L,
+          0xceee523d0f1e6eL, 0x8d27a5fd1f0045L }
+    },
+    {
+        { 0x9411063cf0af29L, 0x304385789a6693L, 0x9a9fb8f640145eL,
+          0x7d82fe954832ebL, 0xf2789e1898c520L, 0x448b402f948dc0L,
+          0xeca8fdf68996ddL, 0x22227e9a149b2fL },
+        { 0x63509ff8e62d6aL, 0xe98d81c8c9c57fL, 0xd3874071fe3bedL,
+          0xf1db013539538fL, 0xb04092e48418ceL, 0xbbf8e76d6d9d4dL,
+          0x2ea9cda2cec5aeL, 0x8414b3e5078fa9L }
+    },
+    {
+        { 0x5ad1cdbd68a073L, 0xd4cedafc18b591L, 0x78267078e4c1c9L,
+          0x9b8d9209ca302aL, 0x3101bd2326115bL, 0x6f154b54c2717aL,
+          0x618c31b263e84bL, 0x12c4138bbd6942L },
+        { 0xf9ead2580da426L, 0xe748e9947d9680L, 0x9b396a38a4210eL,
+          0xfaf03ddf4b8f72L, 0xbd94a5266159e7L, 0x5e730491d4c7cbL,
+          0x31d1f9a7910f38L, 0x4fd10ca08d6dd1L }
+    },
+    {
+        { 0x4f510ac9f2331eL, 0xee872dc7e3dcc2L, 0x4a11a32a0a0c73L,
+          0x27e5803aa5a630L, 0xe5ae5037af4a8aL, 0x2dcdeba9fffeb0L,
+          0x8c27748719d91fL, 0xd3b5b62b9cc61cL },
+        { 0x998ac90cca7939L, 0xc22b59864514e5L, 0x950aaa1b35738aL,
+          0x4b208bbdab0264L, 0x6677931a557d2eL, 0x2c696d8f7c17d3L,
+          0x1672d4a3e15c51L, 0x95fab663db0e82L }
+    },
+    {
+        { 0x3d427346ff205eL, 0x7f187d90ea9fbeL, 0xbd9367f466b2afL,
+          0x188e53203daf2fL, 0xefe132927b54d8L, 0x14faf85ef70435L,
+          0xa5061281ec95c4L, 0xad01705c22cba7L },
+        { 0x7d2dfa66197333L, 0xedd7f078b4f6edL, 0xe0cb68575df105L,
+          0x47c9ddb80f76bcL, 0x49ab5319073c54L, 0x845255ae607f44L,
+          0x0b4ed9fcc74b7cL, 0xcfb52d50f5c3a6L }
+    },
+    {
+        { 0x545c7c6c278776L, 0x92a39ae98c30f0L, 0x8aa8c01d2f4680L,
+          0xa5409ed6b7f840L, 0x0c450acdcb24e7L, 0x5da6fb2c5770d9L,
+          0x5b8e8be8658333L, 0xb26bf4a67ea4adL },
+        { 0x2e30c81c7d91faL, 0x6e50a490eeb69fL, 0x9458c2bee4bc26L,
+          0x419acf233be250L, 0x79d6f8187881abL, 0x694565d403b1beL,
+          0x34b3990234fe1dL, 0x60997d72132b38L }
+    },
+},
+{
+    {
+        { 0x00a974126975dcL, 0x42161c46cf94e7L, 0xcc9fe4bc64ed99L,
+          0x020019a4680570L, 0x885595a698da0dL, 0x008444b77dd962L,
+          0xbf3c22da4fea0eL, 0xc4630482c81245L },
+        { 0xcb248c5793ab18L, 0x4dc7a20eb4320bL, 0x9a0906f1572b7dL,
+          0xd5b3019f9ac20fL, 0x79b1bf534520a3L, 0x788dfe869b5322L,
+          0x9a05298455b7e2L, 0x2f4aecb016bca9L }
+    },
+    {
+        { 0x414d3798745618L, 0x64ba22eb7c983cL, 0x9a5d19f9f9d532L,
+          0x81a00d844a80c8L, 0xb9e24f5cae98d6L, 0x6c3769caca965aL,
+          0x50d6081f6e4e6dL, 0x0d9698054422a6L },
+        { 0xbd7e7925cdd790L, 0xcff65da6a35219L, 0x40dc3638b60ebeL,
+          0x84bee7492a50dcL, 0x57d4be415ad65eL, 0xc54256b1a6d1d3L,
+          0x141c64945717ccL, 0x05eb609cd1c736L }
+    },
+    {
+        { 0xfd52eab1e3c7ecL, 0xa4a5eca9f24895L, 0xaaa2a8d79fdb83L,
+          0xd105e6072bdfdaL, 0x59e6ae2681d97eL, 0xfedf8e08e8077fL,
+          0xb06d0ad629e462L, 0x8c7c2d096fa863L },
+        { 0x5eecc4cee8fc91L, 0x5e83ab29e61174L, 0x1fd8925b28c02dL,
+          0x93be5382072864L, 0xda0c88624c984eL, 0xdcf9f0ca008286L,
+          0x1ecb5a6a58ba75L, 0x1d9b890c2e3c83L }
+    },
+    {
+        { 0x19e866eeeee062L, 0x31c1c7f4f7b387L, 0x9be60181c06652L,
+          0xc00a93a2b68bbbL, 0x54c65d69d52b2bL, 0x4591416e8b744aL,
+          0x641bcca9a64ab6L, 0xf22bcb1ab08098L },
+        { 0x3c0db8ff1f726cL, 0x4f5739e9d2e6a6L, 0x5cb669b45c9530L,
+          0x861b04e7b472d0L, 0x3e30515894da77L, 0x3344685c9ac39bL,
+          0x9e1730573bdd29L, 0x9cac12c808dc85L }
+    },
+    {
+        { 0xf152b865e27087L, 0x267bd8590a580eL, 0xba79cec8baafc1L,
+          0x6140ab19442686L, 0xa67090c5b31693L, 0x50a103a28b4117L,
+          0x7722e610ddc08fL, 0x5d19d43e6569b2L },
+        { 0x70e0c525962bf6L, 0x808e316fb5fb02L, 0x3fb80da5b667beL,
+          0x8aa366efcfacecL, 0xcb0b3e7134280eL, 0x0bf1de4cd7d944L,
+          0x0cd23bed092df5L, 0xc9a6a79a153a0cL }
+    },
+    {
+        { 0x1c69ad02d5a4b7L, 0x4bb28d0d9e6f4aL, 0x815308ca984fc6L,
+          0x40929c79037ca5L, 0x0ea2b491bd0357L, 0xec17e5b42aad4eL,
+          0x1f32ade18e7235L, 0xbc60b05a96a9d3L },
+        { 0x3b0229ae20f707L, 0xd63505056bdfadL, 0xac2d922d8b2e1eL,
+          0x92b2998235c748L, 0x6002c3ad766f97L, 0x99198001a2a862L,
+          0x2af7567b58b684L, 0xd8fe707aaafce5L }
+    },
+    {
+        { 0x54487ab5df7a4bL, 0x51cccdec57ccc2L, 0x23943277510b53L,
+          0x3a09f02f555de3L, 0xa696aec1be484dL, 0x56f459f37817a2L,
+          0x8d8f61c623dcb4L, 0xc52223c5335656L },
+        { 0xf634111b49914aL, 0xbf8e1ab8e4f9bbL, 0x2f59578f4dba02L,
+          0x2a94199e004319L, 0x87931f0654d005L, 0x7df57d96fa0814L,
+          0xc8da316a154031L, 0x2a44ac041f658bL }
+    },
+    {
+        { 0xfb5f4f89e34ac6L, 0x0a1b10b97790f2L, 0x58fe4e74b8a06cL,
+          0x10c1710955f27cL, 0x77b798ad5ebe19L, 0xaf1c35b1f1c2dcL,
+          0xc25b8e6a1f8d69L, 0x49cf751f76bf23L },
+        { 0x15cb2db436f7b7L, 0x186d7c27e74d1aL, 0x60731dec00a415L,
+          0xea1e15615f0772L, 0xf02d591714463fL, 0x26a0c6451adeb1L,
+          0x20174cdcc5229eL, 0xb817e50efd512aL }
+    },
+},
+};
+
+static const ge448_precomp base_i[16] = {
+    {
+        { 0x26a82bc70cc05eL, 0x80e18b00938e26L, 0xf72ab66511433bL,
+          0xa3d3a46412ae1aL, 0x0f1767ea6de324L, 0x36da9e14657047L,
+          0xed221d15a622bfL, 0x4f1970c66bed0dL },
+        { 0x08795bf230fa14L, 0x132c4ed7c8ad98L, 0x1ce67c39c4fdbdL,
+          0x05a0c2d73ad3ffL, 0xa3984087789c1eL, 0xc7624bea73736cL,
+          0x248876203756c9L, 0x693f46716eb6bcL }
+    },
+    {
+        { 0x28173286ff2f8fL, 0xb769465da85757L, 0xf7f6271fd6e862L,
+          0x4a3fcfe8daa9cbL, 0xda82c7e2ba077aL, 0x943332241b8b8cL,
+          0x6455bd64316cb6L, 0x0865886b9108afL },
+        { 0x22ac13588ed6fcL, 0x9a68fed02dafb8L, 0x1bdb6767f0bffaL,
+          0xec4e1d58bb3a33L, 0x56c3b9fce43c82L, 0xa6449a4a8d9523L,
+          0xf706cbda7ad43aL, 0xe005a8dbd5125cL }
+    },
+    {
+        { 0xa99d1092030034L, 0x2d8cefc6f950d0L, 0x7a920c3c96f07bL,
+          0x958812808bc0d5L, 0x62ada756d761e8L, 0x0def80cbcf7285L,
+          0x0e2ba7601eedb5L, 0x7a9f9335a48dcbL },
+        { 0xb4731472f435ebL, 0x5512881f225443L, 0xee59d2b33c5840L,
+          0xb698017127d7a4L, 0xb18fced86551f7L, 0x0ade260ca1823aL,
+          0xd3b9109ce4fd58L, 0xadfd751a2517edL }
+    },
+    {
+        { 0xdf9567ceb5eaf7L, 0x110a6b478ac7d7L, 0x2d335014706e0bL,
+          0x0df9c7b0b5a209L, 0xba4223d568e684L, 0xd78af2d8c3719bL,
+          0x77467b9a5291b6L, 0x079748e5c89befL },
+        { 0xe20d3fadac377fL, 0x34e866972b5c09L, 0xd8687a3c40bbb7L,
+          0x7b3946fd2f84c9L, 0xd00e40ca78f50eL, 0xb87594417e7179L,
+          0x9c7373bcb23583L, 0x7ddeda3c90fd69L }
+    },
+    {
+        { 0x3d0def76ab686bL, 0x1a467ec49f7c79L, 0x3e53f4fc8989edL,
+          0x101e344430a0d9L, 0xa3ae7318ad44eeL, 0xaefa6cdae1d134L,
+          0xaa8cd7d824ad4dL, 0xef1650ced584fcL },
+        { 0xa74df674f4754fL, 0xf52cea8ef3fb8bL, 0x47c32d42971140L,
+          0x391c15da256fbbL, 0xc165faba605671L, 0xf2518c687993b9L,
+          0x2daf7acbd5a84dL, 0x1560b6298f12aeL }
+    },
+    {
+        { 0xef4da0254dc10aL, 0x63118655940db8L, 0xe20b14982f2948L,
+          0x67b93775581dbaL, 0x422ee7104f5029L, 0x5d440db5122d34L,
+          0xb1e56d71a4c640L, 0xbf12abbc2408eeL },
+        { 0x0cc9f86016af01L, 0x88366abf3d8cabL, 0x85dda13a2efe12L,
+          0x390df605d00674L, 0xf18f5806d187f7L, 0x28c900ff0c5d20L,
+          0xad308123e01733L, 0x42d35b554bf2fdL }
+    },
+    {
+        { 0x009135f2ffb1f1L, 0x099fc7e8f9c605L, 0xcc67da626bfa5aL,
+          0xc186d12344552bL, 0xb5232501b339e1L, 0x70a544fc9708c5L,
+          0x06baaec1e928e7L, 0x0baedd2ef0f50fL },
+        { 0x535d6d8bf479e5L, 0x156e536e4ec3e9L, 0x3165741ddb9be2L,
+          0x988af7159fd736L, 0x13d8a782e33dddL, 0x54604214e69002L,
+          0x34d56e0804a268L, 0xc59b84f0e52a4cL }
+    },
+    {
+        { 0x525d45f24729d9L, 0x5768aba8712327L, 0xa25e43b43035dbL,
+          0x15a1ee8927ef21L, 0xa785d216056112L, 0x45e2fbfd508af9L,
+          0xb6f721a37ba969L, 0x30d6d8c216d8d3L },
+        { 0x3065e0852074c3L, 0xfa40b4a2a0684eL, 0x851325a763f955L,
+          0xd4ef19c9f25900L, 0x799c869f665756L, 0x7b052223312990L,
+          0xc986c2b28db802L, 0xf48fb8f28ade0aL }
+    },
+    {
+        { 0x1e461731649b68L, 0xa96e5d65beb9dcL, 0x765ddff481935dL,
+          0x6cf132c9f3bf2aL, 0x9f6c5c97c35658L, 0x99cd1394696e60L,
+          0x99fa9249c0d5e4L, 0x1acd0638845a95L },
+        { 0x0b065413636087L, 0xea20e78ea17b7fL, 0x20afc5f6161967L,
+          0xfd6c8a2dc81028L, 0x4ef1357e32c8fdL, 0x8aa400400e4a88L,
+          0xd6fcaef48cb82fL, 0x7ba7c6db3cd4faL }
+    },
+    {
+        { 0xf843473d19c7abL, 0x968e76dc655c4dL, 0x52c87d9c4b9c2fL,
+          0x65f641ae4aa082L, 0x491a39733c3603L, 0xa606ffe5810098L,
+          0x09920e68bf8ad4L, 0x691a0c86db7882L },
+        { 0x5205883a4d3ef5L, 0xee839b7acf2efeL, 0x4b78e2ac00ca66L,
+          0xbe3f071f9fcb91L, 0x61e66c9bf6943aL, 0xe9b4e57061b79dL,
+          0x8d1b01b56c06bdL, 0x0dfa315df76ae5L }
+    },
+    {
+        { 0x803df65f1fd093L, 0x1cd6523489b77eL, 0x2cd2e15c20e295L,
+          0xcd490be9b912d1L, 0xdd9a2ff2e886d2L, 0xa3c836dfe9d72aL,
+          0xfcad5f2298e0c1L, 0xed126e24bcf067L },
+        { 0x1e339533dc81bcL, 0xbea4d76ece6a08L, 0x1d15de3991b252L,
+          0x74cc5cfe6daf97L, 0x5ad343f0826493L, 0x2d38a471064049L,
+          0xf7f47b9ffcfa4dL, 0xef14490418066cL }
+    },
+    {
+        { 0x4e7f86b9bb55abL, 0x310d7853f496a3L, 0xbd682fc0dec42cL,
+          0xbde047a411d32aL, 0xea639b4c5a5ea2L, 0x5052078ba08fa1L,
+          0xc968b2307729f2L, 0x567b5a623d3e28L },
+        { 0x171e825977fbf7L, 0x0319c70be990aaL, 0x8f65023e12cd69L,
+          0x1fb9b19f5015e6L, 0x0083f603568a7cL, 0xba3d30b1f3c5acL,
+          0xe7b509d3d7a988L, 0x2318b99cd0f6b6L }
+    },
+    {
+        { 0x54d3b8793ab2cfL, 0x366abead2d8306L, 0x66e8eb6d7a4977L,
+          0xa61888cae0072eL, 0x9eeeef5dbc3315L, 0x93f09db163e7f5L,
+          0xee9095959ade9aL, 0xaf7f578ce59be0L },
+        { 0x24bfd8d5ece59eL, 0x8aa698b3689523L, 0xa9a65de2de92cfL,
+          0xec11dbca6ad300L, 0x217f3fa09f88caL, 0xf6c33e3b4d6af7L,
+          0xcd3bfa21d86d2dL, 0x1497f835f13f25L }
+    },
+    {
+        { 0xa579568cd03d1dL, 0xd717cdae158af6L, 0x59eda97389a19fL,
+          0xb32c370099e99cL, 0xa2dba91dabb591L, 0x6d697d577c2c97L,
+          0x5423fc2d43fa6dL, 0x56ea8a50b382bfL },
+        { 0x4a987bad80c11aL, 0xe4cde217d590a5L, 0x3dd8860f97e559L,
+          0xff45e2543b593cL, 0x00eb4535343cb5L, 0x06b9b997bbfbddL,
+          0x4da36b716aea24L, 0x247651757a624eL }
+    },
+    {
+        { 0x32207d03474e0dL, 0x3ffbf04b41cc73L, 0x5c4dc45319eb39L,
+          0xfee29be758b463L, 0xcc8a381c30c7a7L, 0x147f4e49fe0e53L,
+          0x05b2e26e35a2deL, 0x4362f0292f3666L },
+        { 0x0476d0c8474b85L, 0x9d8c65fccaf108L, 0xf58d4041d54b6aL,
+          0x3ee6862f38e4b0L, 0x7c7c9d53b44f54L, 0x36a3fd80fb0db5L,
+          0xfcd94ba18a8ac8L, 0xc1b1d568f35c05L }
+    },
+    {
+        { 0x16539fc1bdd30dL, 0x1356e538df4afbL, 0xc0545d85a1aedbL,
+          0xeb2037a489396bL, 0x897fcbd5660894L, 0x02a58a9b7d104aL,
+          0x57fa24cc96b980L, 0xf6448e35bd8946L },
+        { 0xee727418805c83L, 0x10fa274992cfc6L, 0x95141939e66b21L,
+          0xe0ffa44bd08009L, 0x174332220da22bL, 0x4891ff359e6831L,
+          0x407ed73a7d687bL, 0x2fb4e0751d99cfL }
+    },
+};
+#else
+
+/* Reduce scalar mod the order of the curve.
+ * Scalar Will be 114 bytes.
+ *
+ * b  [in]  Scalar to reduce.
+ */
+void sc448_reduce(uint8_t* b)
+{
+    uint32_t d[16];
+    uint64_t t[33];
+    uint64_t c;
+    uint32_t o;
+
+    /* Load from bytes */
+    t[ 0] =  (((int32_t)((b[ 0]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[ 1]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[ 2]        ) >>  0)) << 16)
+          | ((((int32_t)((b[ 3] & 0xf )) >>  0)) << 24);
+    t[ 1] =  (((int32_t)((b[ 3]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[ 4]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[ 5]        ) >>  0)) << 12)
+          |  (((int32_t)((b[ 6]        ) >>  0)) << 20);
+    t[ 2] =  (((int32_t)((b[ 7]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[ 8]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[ 9]        ) >>  0)) << 16)
+          | ((((int32_t)((b[10] & 0xf )) >>  0)) << 24);
+    t[ 3] =  (((int32_t)((b[10]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[11]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[12]        ) >>  0)) << 12)
+          |  (((int32_t)((b[13]        ) >>  0)) << 20);
+    t[ 4] =  (((int32_t)((b[14]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[15]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[16]        ) >>  0)) << 16)
+          | ((((int32_t)((b[17] & 0xf )) >>  0)) << 24);
+    t[ 5] =  (((int32_t)((b[17]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[18]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[19]        ) >>  0)) << 12)
+          |  (((int32_t)((b[20]        ) >>  0)) << 20);
+    t[ 6] =  (((int32_t)((b[21]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[22]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[23]        ) >>  0)) << 16)
+          | ((((int32_t)((b[24] & 0xf )) >>  0)) << 24);
+    t[ 7] =  (((int32_t)((b[24]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[25]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[26]        ) >>  0)) << 12)
+          |  (((int32_t)((b[27]        ) >>  0)) << 20);
+    t[ 8] =  (((int32_t)((b[28]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[29]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[30]        ) >>  0)) << 16)
+          | ((((int32_t)((b[31] & 0xf )) >>  0)) << 24);
+    t[ 9] =  (((int32_t)((b[31]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[32]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[33]        ) >>  0)) << 12)
+          |  (((int32_t)((b[34]        ) >>  0)) << 20);
+    t[10] =  (((int32_t)((b[35]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[36]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[37]        ) >>  0)) << 16)
+          | ((((int32_t)((b[38] & 0xf )) >>  0)) << 24);
+    t[11] =  (((int32_t)((b[38]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[39]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[40]        ) >>  0)) << 12)
+          |  (((int32_t)((b[41]        ) >>  0)) << 20);
+    t[12] =  (((int32_t)((b[42]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[43]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[44]        ) >>  0)) << 16)
+          | ((((int32_t)((b[45] & 0xf )) >>  0)) << 24);
+    t[13] =  (((int32_t)((b[45]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[46]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[47]        ) >>  0)) << 12)
+          |  (((int32_t)((b[48]        ) >>  0)) << 20);
+    t[14] =  (((int32_t)((b[49]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[50]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[51]        ) >>  0)) << 16)
+          | ((((int32_t)((b[52] & 0xf )) >>  0)) << 24);
+    t[15] =  (((int32_t)((b[52]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[53]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[54]        ) >>  0)) << 12)
+          |  (((int32_t)((b[55]        ) >>  0)) << 20);
+    t[16] =  (((int32_t)((b[56]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[57]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[58]        ) >>  0)) << 16)
+          | ((((int32_t)((b[59] & 0xf )) >>  0)) << 24);
+    t[17] =  (((int32_t)((b[59]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[60]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[61]        ) >>  0)) << 12)
+          |  (((int32_t)((b[62]        ) >>  0)) << 20);
+    t[18] =  (((int32_t)((b[63]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[64]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[65]        ) >>  0)) << 16)
+          | ((((int32_t)((b[66] & 0xf )) >>  0)) << 24);
+    t[19] =  (((int32_t)((b[66]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[67]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[68]        ) >>  0)) << 12)
+          |  (((int32_t)((b[69]        ) >>  0)) << 20);
+    t[20] =  (((int32_t)((b[70]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[71]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[72]        ) >>  0)) << 16)
+          | ((((int32_t)((b[73] & 0xf )) >>  0)) << 24);
+    t[21] =  (((int32_t)((b[73]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[74]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[75]        ) >>  0)) << 12)
+          |  (((int32_t)((b[76]        ) >>  0)) << 20);
+    t[22] =  (((int32_t)((b[77]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[78]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[79]        ) >>  0)) << 16)
+          | ((((int32_t)((b[80] & 0xf )) >>  0)) << 24);
+    t[23] =  (((int32_t)((b[80]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[81]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[82]        ) >>  0)) << 12)
+          |  (((int32_t)((b[83]        ) >>  0)) << 20);
+    t[24] =  (((int32_t)((b[84]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[85]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[86]        ) >>  0)) << 16)
+          | ((((int32_t)((b[87] & 0xf )) >>  0)) << 24);
+    t[25] =  (((int32_t)((b[87]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[88]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[89]        ) >>  0)) << 12)
+          |  (((int32_t)((b[90]        ) >>  0)) << 20);
+    t[26] =  (((int32_t)((b[91]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[92]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[93]        ) >>  0)) << 16)
+          | ((((int32_t)((b[94] & 0xf )) >>  0)) << 24);
+    t[27] =  (((int32_t)((b[94]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[95]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[96]        ) >>  0)) << 12)
+          |  (((int32_t)((b[97]        ) >>  0)) << 20);
+    t[28] =  (((int32_t)((b[98]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[99]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[100]        ) >>  0)) << 16)
+          | ((((int32_t)((b[101] & 0xf )) >>  0)) << 24);
+    t[29] =  (((int32_t)((b[101]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[102]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[103]        ) >>  0)) << 12)
+          |  (((int32_t)((b[104]        ) >>  0)) << 20);
+    t[30] =  (((int32_t)((b[105]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[106]        ) >>  0)) <<  8)
+          |  (((int32_t)((b[107]        ) >>  0)) << 16)
+          | ((((int32_t)((b[108] & 0xf )) >>  0)) << 24);
+    t[31] =  (((int32_t)((b[108]        ) >>  4)) <<  0)
+          |  (((int32_t)((b[109]        ) >>  0)) <<  4)
+          |  (((int32_t)((b[110]        ) >>  0)) << 12)
+          |  (((int32_t)((b[111]        ) >>  0)) << 20);
+    t[32] =  (((int32_t)((b[112]        ) >>  0)) <<  0)
+          |  (((int32_t)((b[113]        ) >>  0)) <<  8);
+
+    /* Mod curve order */
+    /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+    /* Mod top half of extra words */
+    t[ 8] += (int64_t)0x129eec34 * t[24];
+    t[ 9] += (int64_t)0x21cf5b54 * t[24];
+    t[10] += (int64_t)0x29c2ab70 * t[24];
+    t[11] += (int64_t)0x0f635c8c * t[24];
+    t[12] += (int64_t)0x25bf7a4c * t[24];
+    t[13] += (int64_t)0x2d944a70 * t[24];
+    t[14] += (int64_t)0x18eec490 * t[24];
+    t[15] += (int64_t)0x20cd7704 * t[24];
+    t[ 9] += (int64_t)0x129eec34 * t[25];
+    t[10] += (int64_t)0x21cf5b54 * t[25];
+    t[11] += (int64_t)0x29c2ab70 * t[25];
+    t[12] += (int64_t)0x0f635c8c * t[25];
+    t[13] += (int64_t)0x25bf7a4c * t[25];
+    t[14] += (int64_t)0x2d944a70 * t[25];
+    t[15] += (int64_t)0x18eec490 * t[25];
+    t[16] += (int64_t)0x20cd7704 * t[25];
+    t[10] += (int64_t)0x129eec34 * t[26];
+    t[11] += (int64_t)0x21cf5b54 * t[26];
+    t[12] += (int64_t)0x29c2ab70 * t[26];
+    t[13] += (int64_t)0x0f635c8c * t[26];
+    t[14] += (int64_t)0x25bf7a4c * t[26];
+    t[15] += (int64_t)0x2d944a70 * t[26];
+    t[16] += (int64_t)0x18eec490 * t[26];
+    t[17] += (int64_t)0x20cd7704 * t[26];
+    t[11] += (int64_t)0x129eec34 * t[27];
+    t[12] += (int64_t)0x21cf5b54 * t[27];
+    t[13] += (int64_t)0x29c2ab70 * t[27];
+    t[14] += (int64_t)0x0f635c8c * t[27];
+    t[15] += (int64_t)0x25bf7a4c * t[27];
+    t[16] += (int64_t)0x2d944a70 * t[27];
+    t[17] += (int64_t)0x18eec490 * t[27];
+    t[18] += (int64_t)0x20cd7704 * t[27];
+    t[12] += (int64_t)0x129eec34 * t[28];
+    t[13] += (int64_t)0x21cf5b54 * t[28];
+    t[14] += (int64_t)0x29c2ab70 * t[28];
+    t[15] += (int64_t)0x0f635c8c * t[28];
+    t[16] += (int64_t)0x25bf7a4c * t[28];
+    t[17] += (int64_t)0x2d944a70 * t[28];
+    t[18] += (int64_t)0x18eec490 * t[28];
+    t[19] += (int64_t)0x20cd7704 * t[28];
+    t[13] += (int64_t)0x129eec34 * t[29];
+    t[14] += (int64_t)0x21cf5b54 * t[29];
+    t[15] += (int64_t)0x29c2ab70 * t[29];
+    t[16] += (int64_t)0x0f635c8c * t[29];
+    t[17] += (int64_t)0x25bf7a4c * t[29];
+    t[18] += (int64_t)0x2d944a70 * t[29];
+    t[19] += (int64_t)0x18eec490 * t[29];
+    t[20] += (int64_t)0x20cd7704 * t[29];
+    t[14] += (int64_t)0x129eec34 * t[30];
+    t[15] += (int64_t)0x21cf5b54 * t[30];
+    t[16] += (int64_t)0x29c2ab70 * t[30];
+    t[17] += (int64_t)0x0f635c8c * t[30];
+    t[18] += (int64_t)0x25bf7a4c * t[30];
+    t[19] += (int64_t)0x2d944a70 * t[30];
+    t[20] += (int64_t)0x18eec490 * t[30];
+    t[21] += (int64_t)0x20cd7704 * t[30];
+    t[15] += (int64_t)0x129eec34 * t[31];
+    t[16] += (int64_t)0x21cf5b54 * t[31];
+    t[17] += (int64_t)0x29c2ab70 * t[31];
+    t[18] += (int64_t)0x0f635c8c * t[31];
+    t[19] += (int64_t)0x25bf7a4c * t[31];
+    t[20] += (int64_t)0x2d944a70 * t[31];
+    t[21] += (int64_t)0x18eec490 * t[31];
+    t[22] += (int64_t)0x20cd7704 * t[31];
+    t[16] += (int64_t)0x129eec34 * t[32];
+    t[17] += (int64_t)0x21cf5b54 * t[32];
+    t[18] += (int64_t)0x29c2ab70 * t[32];
+    t[19] += (int64_t)0x0f635c8c * t[32];
+    t[20] += (int64_t)0x25bf7a4c * t[32];
+    t[21] += (int64_t)0x2d944a70 * t[32];
+    t[22] += (int64_t)0x18eec490 * t[32];
+    t[23] += (int64_t)0x20cd7704 * t[32];
+    t[24]  = 0;
+    /* Propagate carries */
+    c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+    c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+    c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+    c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+    c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+    c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+    c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+    c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+    c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff;
+    c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff;
+    c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff;
+    c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff;
+    c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff;
+    c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff;
+    c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff;
+    c = t[23] >> 28; t[24] += c; t[23] = t[23] & 0xfffffff;
+    /* Mod bottom half of extra words */
+    t[ 0] += (int64_t)0x129eec34 * t[16];
+    t[ 1] += (int64_t)0x21cf5b54 * t[16];
+    t[ 2] += (int64_t)0x29c2ab70 * t[16];
+    t[ 3] += (int64_t)0x0f635c8c * t[16];
+    t[ 4] += (int64_t)0x25bf7a4c * t[16];
+    t[ 5] += (int64_t)0x2d944a70 * t[16];
+    t[ 6] += (int64_t)0x18eec490 * t[16];
+    t[ 7] += (int64_t)0x20cd7704 * t[16];
+    t[ 1] += (int64_t)0x129eec34 * t[17];
+    t[ 2] += (int64_t)0x21cf5b54 * t[17];
+    t[ 3] += (int64_t)0x29c2ab70 * t[17];
+    t[ 4] += (int64_t)0x0f635c8c * t[17];
+    t[ 5] += (int64_t)0x25bf7a4c * t[17];
+    t[ 6] += (int64_t)0x2d944a70 * t[17];
+    t[ 7] += (int64_t)0x18eec490 * t[17];
+    t[ 8] += (int64_t)0x20cd7704 * t[17];
+    t[ 2] += (int64_t)0x129eec34 * t[18];
+    t[ 3] += (int64_t)0x21cf5b54 * t[18];
+    t[ 4] += (int64_t)0x29c2ab70 * t[18];
+    t[ 5] += (int64_t)0x0f635c8c * t[18];
+    t[ 6] += (int64_t)0x25bf7a4c * t[18];
+    t[ 7] += (int64_t)0x2d944a70 * t[18];
+    t[ 8] += (int64_t)0x18eec490 * t[18];
+    t[ 9] += (int64_t)0x20cd7704 * t[18];
+    t[ 3] += (int64_t)0x129eec34 * t[19];
+    t[ 4] += (int64_t)0x21cf5b54 * t[19];
+    t[ 5] += (int64_t)0x29c2ab70 * t[19];
+    t[ 6] += (int64_t)0x0f635c8c * t[19];
+    t[ 7] += (int64_t)0x25bf7a4c * t[19];
+    t[ 8] += (int64_t)0x2d944a70 * t[19];
+    t[ 9] += (int64_t)0x18eec490 * t[19];
+    t[10] += (int64_t)0x20cd7704 * t[19];
+    t[ 4] += (int64_t)0x129eec34 * t[20];
+    t[ 5] += (int64_t)0x21cf5b54 * t[20];
+    t[ 6] += (int64_t)0x29c2ab70 * t[20];
+    t[ 7] += (int64_t)0x0f635c8c * t[20];
+    t[ 8] += (int64_t)0x25bf7a4c * t[20];
+    t[ 9] += (int64_t)0x2d944a70 * t[20];
+    t[10] += (int64_t)0x18eec490 * t[20];
+    t[11] += (int64_t)0x20cd7704 * t[20];
+    t[ 5] += (int64_t)0x129eec34 * t[21];
+    t[ 6] += (int64_t)0x21cf5b54 * t[21];
+    t[ 7] += (int64_t)0x29c2ab70 * t[21];
+    t[ 8] += (int64_t)0x0f635c8c * t[21];
+    t[ 9] += (int64_t)0x25bf7a4c * t[21];
+    t[10] += (int64_t)0x2d944a70 * t[21];
+    t[11] += (int64_t)0x18eec490 * t[21];
+    t[12] += (int64_t)0x20cd7704 * t[21];
+    t[ 6] += (int64_t)0x129eec34 * t[22];
+    t[ 7] += (int64_t)0x21cf5b54 * t[22];
+    t[ 8] += (int64_t)0x29c2ab70 * t[22];
+    t[ 9] += (int64_t)0x0f635c8c * t[22];
+    t[10] += (int64_t)0x25bf7a4c * t[22];
+    t[11] += (int64_t)0x2d944a70 * t[22];
+    t[12] += (int64_t)0x18eec490 * t[22];
+    t[13] += (int64_t)0x20cd7704 * t[22];
+    t[ 7] += (int64_t)0x129eec34 * t[23];
+    t[ 8] += (int64_t)0x21cf5b54 * t[23];
+    t[ 9] += (int64_t)0x29c2ab70 * t[23];
+    t[10] += (int64_t)0x0f635c8c * t[23];
+    t[11] += (int64_t)0x25bf7a4c * t[23];
+    t[12] += (int64_t)0x2d944a70 * t[23];
+    t[13] += (int64_t)0x18eec490 * t[23];
+    t[14] += (int64_t)0x20cd7704 * t[23];
+    t[ 8] += (int64_t)0x129eec34 * t[24];
+    t[ 9] += (int64_t)0x21cf5b54 * t[24];
+    t[10] += (int64_t)0x29c2ab70 * t[24];
+    t[11] += (int64_t)0x0f635c8c * t[24];
+    t[12] += (int64_t)0x25bf7a4c * t[24];
+    t[13] += (int64_t)0x2d944a70 * t[24];
+    t[14] += (int64_t)0x18eec490 * t[24];
+    t[15] += (int64_t)0x20cd7704 * t[24];
+    t[16]  = 0;
+    /* Propagate carries */
+    c = t[ 0] >> 28; t[ 1] += c; t[ 0] = t[ 0] & 0xfffffff;
+    c = t[ 1] >> 28; t[ 2] += c; t[ 1] = t[ 1] & 0xfffffff;
+    c = t[ 2] >> 28; t[ 3] += c; t[ 2] = t[ 2] & 0xfffffff;
+    c = t[ 3] >> 28; t[ 4] += c; t[ 3] = t[ 3] & 0xfffffff;
+    c = t[ 4] >> 28; t[ 5] += c; t[ 4] = t[ 4] & 0xfffffff;
+    c = t[ 5] >> 28; t[ 6] += c; t[ 5] = t[ 5] & 0xfffffff;
+    c = t[ 6] >> 28; t[ 7] += c; t[ 6] = t[ 6] & 0xfffffff;
+    c = t[ 7] >> 28; t[ 8] += c; t[ 7] = t[ 7] & 0xfffffff;
+    c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+    c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+    c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+    c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+    c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+    c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+    c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+    c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+    t[ 0] += (int64_t)0x129eec34 * t[16];
+    t[ 1] += (int64_t)0x21cf5b54 * t[16];
+    t[ 2] += (int64_t)0x29c2ab70 * t[16];
+    t[ 3] += (int64_t)0x0f635c8c * t[16];
+    t[ 4] += (int64_t)0x25bf7a4c * t[16];
+    t[ 5] += (int64_t)0x2d944a70 * t[16];
+    t[ 6] += (int64_t)0x18eec490 * t[16];
+    t[ 7] += (int64_t)0x20cd7704 * t[16];
+    /* Propagate carries */
+    c = t[ 0] >> 28; t[ 1] += c; d[ 0] = (int32_t)(t[ 0] & 0xfffffff);
+    c = t[ 1] >> 28; t[ 2] += c; d[ 1] = (int32_t)(t[ 1] & 0xfffffff);
+    c = t[ 2] >> 28; t[ 3] += c; d[ 2] = (int32_t)(t[ 2] & 0xfffffff);
+    c = t[ 3] >> 28; t[ 4] += c; d[ 3] = (int32_t)(t[ 3] & 0xfffffff);
+    c = t[ 4] >> 28; t[ 5] += c; d[ 4] = (int32_t)(t[ 4] & 0xfffffff);
+    c = t[ 5] >> 28; t[ 6] += c; d[ 5] = (int32_t)(t[ 5] & 0xfffffff);
+    c = t[ 6] >> 28; t[ 7] += c; d[ 6] = (int32_t)(t[ 6] & 0xfffffff);
+    c = t[ 7] >> 28; t[ 8] += c; d[ 7] = (int32_t)(t[ 7] & 0xfffffff);
+    c = t[ 8] >> 28; t[ 9] += c; d[ 8] = (int32_t)(t[ 8] & 0xfffffff);
+    c = t[ 9] >> 28; t[10] += c; d[ 9] = (int32_t)(t[ 9] & 0xfffffff);
+    c = t[10] >> 28; t[11] += c; d[10] = (int32_t)(t[10] & 0xfffffff);
+    c = t[11] >> 28; t[12] += c; d[11] = (int32_t)(t[11] & 0xfffffff);
+    c = t[12] >> 28; t[13] += c; d[12] = (int32_t)(t[12] & 0xfffffff);
+    c = t[13] >> 28; t[14] += c; d[13] = (int32_t)(t[13] & 0xfffffff);
+    c = t[14] >> 28; t[15] += c; d[14] = (int32_t)(t[14] & 0xfffffff);
+    d[15] = t[15];
+    /* Mod bits over 28 in last word */
+    o = d[15] >> 26; d[15] &= 0x3ffffff;
+    d[ 0] += 0x4a7bb0d * o;
+    d[ 1] += 0x873d6d5 * o;
+    d[ 2] += 0xa70aadc * o;
+    d[ 3] += 0x3d8d723 * o;
+    d[ 4] += 0x96fde93 * o;
+    d[ 5] += 0xb65129c * o;
+    d[ 6] += 0x63bb124 * o;
+    d[ 7] += 0x8335dc1 * o;
+    /* Propagate carries */
+    o = d[ 0] >> 28; d[ 1] += o; d[ 0] = d[ 0] & 0xfffffff;
+    o = d[ 1] >> 28; d[ 2] += o; d[ 1] = d[ 1] & 0xfffffff;
+    o = d[ 2] >> 28; d[ 3] += o; d[ 2] = d[ 2] & 0xfffffff;
+    o = d[ 3] >> 28; d[ 4] += o; d[ 3] = d[ 3] & 0xfffffff;
+    o = d[ 4] >> 28; d[ 5] += o; d[ 4] = d[ 4] & 0xfffffff;
+    o = d[ 5] >> 28; d[ 6] += o; d[ 5] = d[ 5] & 0xfffffff;
+    o = d[ 6] >> 28; d[ 7] += o; d[ 6] = d[ 6] & 0xfffffff;
+    o = d[ 7] >> 28; d[ 8] += o; d[ 7] = d[ 7] & 0xfffffff;
+    o = d[ 8] >> 28; d[ 9] += o; d[ 8] = d[ 8] & 0xfffffff;
+    o = d[ 9] >> 28; d[10] += o; d[ 9] = d[ 9] & 0xfffffff;
+    o = d[10] >> 28; d[11] += o; d[10] = d[10] & 0xfffffff;
+    o = d[11] >> 28; d[12] += o; d[11] = d[11] & 0xfffffff;
+    o = d[12] >> 28; d[13] += o; d[12] = d[12] & 0xfffffff;
+    o = d[13] >> 28; d[14] += o; d[13] = d[13] & 0xfffffff;
+    o = d[14] >> 28; d[15] += o; d[14] = d[14] & 0xfffffff;
+
+    /* Convert to bytes */
+    b[ 0] = (d[0 ] >>  0);
+    b[ 1] = (d[0 ] >>  8);
+    b[ 2] = (d[0 ] >> 16);
+    b[ 3] = (d[0 ] >> 24) + ((d[1 ] >>  0) <<  4);
+    b[ 4] = (d[1 ] >>  4);
+    b[ 5] = (d[1 ] >> 12);
+    b[ 6] = (d[1 ] >> 20);
+    b[ 7] = (d[2 ] >>  0);
+    b[ 8] = (d[2 ] >>  8);
+    b[ 9] = (d[2 ] >> 16);
+    b[10] = (d[2 ] >> 24) + ((d[3 ] >>  0) <<  4);
+    b[11] = (d[3 ] >>  4);
+    b[12] = (d[3 ] >> 12);
+    b[13] = (d[3 ] >> 20);
+    b[14] = (d[4 ] >>  0);
+    b[15] = (d[4 ] >>  8);
+    b[16] = (d[4 ] >> 16);
+    b[17] = (d[4 ] >> 24) + ((d[5 ] >>  0) <<  4);
+    b[18] = (d[5 ] >>  4);
+    b[19] = (d[5 ] >> 12);
+    b[20] = (d[5 ] >> 20);
+    b[21] = (d[6 ] >>  0);
+    b[22] = (d[6 ] >>  8);
+    b[23] = (d[6 ] >> 16);
+    b[24] = (d[6 ] >> 24) + ((d[7 ] >>  0) <<  4);
+    b[25] = (d[7 ] >>  4);
+    b[26] = (d[7 ] >> 12);
+    b[27] = (d[7 ] >> 20);
+    b[28] = (d[8 ] >>  0);
+    b[29] = (d[8 ] >>  8);
+    b[30] = (d[8 ] >> 16);
+    b[31] = (d[8 ] >> 24) + ((d[9 ] >>  0) <<  4);
+    b[32] = (d[9 ] >>  4);
+    b[33] = (d[9 ] >> 12);
+    b[34] = (d[9 ] >> 20);
+    b[35] = (d[10] >>  0);
+    b[36] = (d[10] >>  8);
+    b[37] = (d[10] >> 16);
+    b[38] = (d[10] >> 24) + ((d[11] >>  0) <<  4);
+    b[39] = (d[11] >>  4);
+    b[40] = (d[11] >> 12);
+    b[41] = (d[11] >> 20);
+    b[42] = (d[12] >>  0);
+    b[43] = (d[12] >>  8);
+    b[44] = (d[12] >> 16);
+    b[45] = (d[12] >> 24) + ((d[13] >>  0) <<  4);
+    b[46] = (d[13] >>  4);
+    b[47] = (d[13] >> 12);
+    b[48] = (d[13] >> 20);
+    b[49] = (d[14] >>  0);
+    b[50] = (d[14] >>  8);
+    b[51] = (d[14] >> 16);
+    b[52] = (d[14] >> 24) + ((d[15] >>  0) <<  4);
+    b[53] = (d[15] >>  4);
+    b[54] = (d[15] >> 12);
+    b[55] = (d[15] >> 20);
+    b[56] = 0;
+}
+
+/* Multiply a by b and add d. r = (a * b + d) mod order
+ *
+ * r  [in]  Scalar to hold result.
+ * a  [in]  Scalar to multiply.
+ * b  [in]  Scalar to multiply.
+ * d  [in]  Scalar to add to multiplicative result.
+ */
+void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b,
+                  const uint8_t* d)
+{
+    uint32_t ad[16], bd[16], dd[16], rd[16];
+    uint64_t t[32];
+    uint64_t c;
+    uint32_t o;
+
+    /* Load from bytes */
+    ad[ 0] =  (((int32_t)((a[ 0]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[ 1]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[ 2]        ) >>  0)) << 16)
+           | ((((int32_t)((a[ 3] & 0xf )) >>  0)) << 24);
+    ad[ 1] =  (((int32_t)((a[ 3]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[ 4]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[ 5]        ) >>  0)) << 12)
+           |  (((int32_t)((a[ 6]        ) >>  0)) << 20);
+    ad[ 2] =  (((int32_t)((a[ 7]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[ 8]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[ 9]        ) >>  0)) << 16)
+           | ((((int32_t)((a[10] & 0xf )) >>  0)) << 24);
+    ad[ 3] =  (((int32_t)((a[10]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[11]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[12]        ) >>  0)) << 12)
+           |  (((int32_t)((a[13]        ) >>  0)) << 20);
+    ad[ 4] =  (((int32_t)((a[14]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[15]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[16]        ) >>  0)) << 16)
+           | ((((int32_t)((a[17] & 0xf )) >>  0)) << 24);
+    ad[ 5] =  (((int32_t)((a[17]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[18]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[19]        ) >>  0)) << 12)
+           |  (((int32_t)((a[20]        ) >>  0)) << 20);
+    ad[ 6] =  (((int32_t)((a[21]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[22]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[23]        ) >>  0)) << 16)
+           | ((((int32_t)((a[24] & 0xf )) >>  0)) << 24);
+    ad[ 7] =  (((int32_t)((a[24]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[25]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[26]        ) >>  0)) << 12)
+           |  (((int32_t)((a[27]        ) >>  0)) << 20);
+    ad[ 8] =  (((int32_t)((a[28]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[29]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[30]        ) >>  0)) << 16)
+           | ((((int32_t)((a[31] & 0xf )) >>  0)) << 24);
+    ad[ 9] =  (((int32_t)((a[31]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[32]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[33]        ) >>  0)) << 12)
+           |  (((int32_t)((a[34]        ) >>  0)) << 20);
+    ad[10] =  (((int32_t)((a[35]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[36]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[37]        ) >>  0)) << 16)
+           | ((((int32_t)((a[38] & 0xf )) >>  0)) << 24);
+    ad[11] =  (((int32_t)((a[38]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[39]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[40]        ) >>  0)) << 12)
+           |  (((int32_t)((a[41]        ) >>  0)) << 20);
+    ad[12] =  (((int32_t)((a[42]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[43]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[44]        ) >>  0)) << 16)
+           | ((((int32_t)((a[45] & 0xf )) >>  0)) << 24);
+    ad[13] =  (((int32_t)((a[45]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[46]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[47]        ) >>  0)) << 12)
+           |  (((int32_t)((a[48]        ) >>  0)) << 20);
+    ad[14] =  (((int32_t)((a[49]        ) >>  0)) <<  0)
+           |  (((int32_t)((a[50]        ) >>  0)) <<  8)
+           |  (((int32_t)((a[51]        ) >>  0)) << 16)
+           | ((((int32_t)((a[52] & 0xf )) >>  0)) << 24);
+    ad[15] =  (((int32_t)((a[52]        ) >>  4)) <<  0)
+           |  (((int32_t)((a[53]        ) >>  0)) <<  4)
+           |  (((int32_t)((a[54]        ) >>  0)) << 12)
+           |  (((int32_t)((a[55]        ) >>  0)) << 20);
+    /* Load from bytes */
+    bd[ 0] =  (((int32_t)((b[ 0]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[ 1]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[ 2]        ) >>  0)) << 16)
+           | ((((int32_t)((b[ 3] & 0xf )) >>  0)) << 24);
+    bd[ 1] =  (((int32_t)((b[ 3]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[ 4]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[ 5]        ) >>  0)) << 12)
+           |  (((int32_t)((b[ 6]        ) >>  0)) << 20);
+    bd[ 2] =  (((int32_t)((b[ 7]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[ 8]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[ 9]        ) >>  0)) << 16)
+           | ((((int32_t)((b[10] & 0xf )) >>  0)) << 24);
+    bd[ 3] =  (((int32_t)((b[10]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[11]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[12]        ) >>  0)) << 12)
+           |  (((int32_t)((b[13]        ) >>  0)) << 20);
+    bd[ 4] =  (((int32_t)((b[14]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[15]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[16]        ) >>  0)) << 16)
+           | ((((int32_t)((b[17] & 0xf )) >>  0)) << 24);
+    bd[ 5] =  (((int32_t)((b[17]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[18]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[19]        ) >>  0)) << 12)
+           |  (((int32_t)((b[20]        ) >>  0)) << 20);
+    bd[ 6] =  (((int32_t)((b[21]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[22]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[23]        ) >>  0)) << 16)
+           | ((((int32_t)((b[24] & 0xf )) >>  0)) << 24);
+    bd[ 7] =  (((int32_t)((b[24]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[25]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[26]        ) >>  0)) << 12)
+           |  (((int32_t)((b[27]        ) >>  0)) << 20);
+    bd[ 8] =  (((int32_t)((b[28]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[29]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[30]        ) >>  0)) << 16)
+           | ((((int32_t)((b[31] & 0xf )) >>  0)) << 24);
+    bd[ 9] =  (((int32_t)((b[31]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[32]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[33]        ) >>  0)) << 12)
+           |  (((int32_t)((b[34]        ) >>  0)) << 20);
+    bd[10] =  (((int32_t)((b[35]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[36]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[37]        ) >>  0)) << 16)
+           | ((((int32_t)((b[38] & 0xf )) >>  0)) << 24);
+    bd[11] =  (((int32_t)((b[38]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[39]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[40]        ) >>  0)) << 12)
+           |  (((int32_t)((b[41]        ) >>  0)) << 20);
+    bd[12] =  (((int32_t)((b[42]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[43]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[44]        ) >>  0)) << 16)
+           | ((((int32_t)((b[45] & 0xf )) >>  0)) << 24);
+    bd[13] =  (((int32_t)((b[45]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[46]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[47]        ) >>  0)) << 12)
+           |  (((int32_t)((b[48]        ) >>  0)) << 20);
+    bd[14] =  (((int32_t)((b[49]        ) >>  0)) <<  0)
+           |  (((int32_t)((b[50]        ) >>  0)) <<  8)
+           |  (((int32_t)((b[51]        ) >>  0)) << 16)
+           | ((((int32_t)((b[52] & 0xf )) >>  0)) << 24);
+    bd[15] =  (((int32_t)((b[52]        ) >>  4)) <<  0)
+           |  (((int32_t)((b[53]        ) >>  0)) <<  4)
+           |  (((int32_t)((b[54]        ) >>  0)) << 12)
+           |  (((int32_t)((b[55]        ) >>  0)) << 20);
+    /* Load from bytes */
+    dd[ 0] =  (((int32_t)((d[ 0]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[ 1]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[ 2]        ) >>  0)) << 16)
+           | ((((int32_t)((d[ 3] & 0xf )) >>  0)) << 24);
+    dd[ 1] =  (((int32_t)((d[ 3]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[ 4]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[ 5]        ) >>  0)) << 12)
+           |  (((int32_t)((d[ 6]        ) >>  0)) << 20);
+    dd[ 2] =  (((int32_t)((d[ 7]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[ 8]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[ 9]        ) >>  0)) << 16)
+           | ((((int32_t)((d[10] & 0xf )) >>  0)) << 24);
+    dd[ 3] =  (((int32_t)((d[10]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[11]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[12]        ) >>  0)) << 12)
+           |  (((int32_t)((d[13]        ) >>  0)) << 20);
+    dd[ 4] =  (((int32_t)((d[14]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[15]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[16]        ) >>  0)) << 16)
+           | ((((int32_t)((d[17] & 0xf )) >>  0)) << 24);
+    dd[ 5] =  (((int32_t)((d[17]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[18]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[19]        ) >>  0)) << 12)
+           |  (((int32_t)((d[20]        ) >>  0)) << 20);
+    dd[ 6] =  (((int32_t)((d[21]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[22]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[23]        ) >>  0)) << 16)
+           | ((((int32_t)((d[24] & 0xf )) >>  0)) << 24);
+    dd[ 7] =  (((int32_t)((d[24]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[25]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[26]        ) >>  0)) << 12)
+           |  (((int32_t)((d[27]        ) >>  0)) << 20);
+    dd[ 8] =  (((int32_t)((d[28]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[29]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[30]        ) >>  0)) << 16)
+           | ((((int32_t)((d[31] & 0xf )) >>  0)) << 24);
+    dd[ 9] =  (((int32_t)((d[31]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[32]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[33]        ) >>  0)) << 12)
+           |  (((int32_t)((d[34]        ) >>  0)) << 20);
+    dd[10] =  (((int32_t)((d[35]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[36]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[37]        ) >>  0)) << 16)
+           | ((((int32_t)((d[38] & 0xf )) >>  0)) << 24);
+    dd[11] =  (((int32_t)((d[38]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[39]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[40]        ) >>  0)) << 12)
+           |  (((int32_t)((d[41]        ) >>  0)) << 20);
+    dd[12] =  (((int32_t)((d[42]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[43]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[44]        ) >>  0)) << 16)
+           | ((((int32_t)((d[45] & 0xf )) >>  0)) << 24);
+    dd[13] =  (((int32_t)((d[45]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[46]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[47]        ) >>  0)) << 12)
+           |  (((int32_t)((d[48]        ) >>  0)) << 20);
+    dd[14] =  (((int32_t)((d[49]        ) >>  0)) <<  0)
+           |  (((int32_t)((d[50]        ) >>  0)) <<  8)
+           |  (((int32_t)((d[51]        ) >>  0)) << 16)
+           | ((((int32_t)((d[52] & 0xf )) >>  0)) << 24);
+    dd[15] =  (((int32_t)((d[52]        ) >>  4)) <<  0)
+           |  (((int32_t)((d[53]        ) >>  0)) <<  4)
+           |  (((int32_t)((d[54]        ) >>  0)) << 12)
+           |  (((int32_t)((d[55]        ) >>  0)) << 20);
+
+    /* a * b + d */
+    t[ 0] =  dd[ 0] + (int64_t)ad[ 0] * bd[ 0];
+    t[ 1] =  dd[ 1] + (int64_t)ad[ 0] * bd[ 1]
+                    + (int64_t)ad[ 1] * bd[ 0];
+    t[ 2] =  dd[ 2] + (int64_t)ad[ 0] * bd[ 2]
+                    + (int64_t)ad[ 1] * bd[ 1]
+                    + (int64_t)ad[ 2] * bd[ 0];
+    t[ 3] =  dd[ 3] + (int64_t)ad[ 0] * bd[ 3]
+                    + (int64_t)ad[ 1] * bd[ 2]
+                    + (int64_t)ad[ 2] * bd[ 1]
+                    + (int64_t)ad[ 3] * bd[ 0];
+    t[ 4] =  dd[ 4] + (int64_t)ad[ 0] * bd[ 4]
+                    + (int64_t)ad[ 1] * bd[ 3]
+                    + (int64_t)ad[ 2] * bd[ 2]
+                    + (int64_t)ad[ 3] * bd[ 1]
+                    + (int64_t)ad[ 4] * bd[ 0];
+    t[ 5] =  dd[ 5] + (int64_t)ad[ 0] * bd[ 5]
+                    + (int64_t)ad[ 1] * bd[ 4]
+                    + (int64_t)ad[ 2] * bd[ 3]
+                    + (int64_t)ad[ 3] * bd[ 2]
+                    + (int64_t)ad[ 4] * bd[ 1]
+                    + (int64_t)ad[ 5] * bd[ 0];
+    t[ 6] =  dd[ 6] + (int64_t)ad[ 0] * bd[ 6]
+                    + (int64_t)ad[ 1] * bd[ 5]
+                    + (int64_t)ad[ 2] * bd[ 4]
+                    + (int64_t)ad[ 3] * bd[ 3]
+                    + (int64_t)ad[ 4] * bd[ 2]
+                    + (int64_t)ad[ 5] * bd[ 1]
+                    + (int64_t)ad[ 6] * bd[ 0];
+    t[ 7] =  dd[ 7] + (int64_t)ad[ 0] * bd[ 7]
+                    + (int64_t)ad[ 1] * bd[ 6]
+                    + (int64_t)ad[ 2] * bd[ 5]
+                    + (int64_t)ad[ 3] * bd[ 4]
+                    + (int64_t)ad[ 4] * bd[ 3]
+                    + (int64_t)ad[ 5] * bd[ 2]
+                    + (int64_t)ad[ 6] * bd[ 1]
+                    + (int64_t)ad[ 7] * bd[ 0];
+    t[ 8] =  dd[ 8] + (int64_t)ad[ 0] * bd[ 8]
+                    + (int64_t)ad[ 1] * bd[ 7]
+                    + (int64_t)ad[ 2] * bd[ 6]
+                    + (int64_t)ad[ 3] * bd[ 5]
+                    + (int64_t)ad[ 4] * bd[ 4]
+                    + (int64_t)ad[ 5] * bd[ 3]
+                    + (int64_t)ad[ 6] * bd[ 2]
+                    + (int64_t)ad[ 7] * bd[ 1]
+                    + (int64_t)ad[ 8] * bd[ 0];
+    t[ 9] =  dd[ 9] + (int64_t)ad[ 0] * bd[ 9]
+                    + (int64_t)ad[ 1] * bd[ 8]
+                    + (int64_t)ad[ 2] * bd[ 7]
+                    + (int64_t)ad[ 3] * bd[ 6]
+                    + (int64_t)ad[ 4] * bd[ 5]
+                    + (int64_t)ad[ 5] * bd[ 4]
+                    + (int64_t)ad[ 6] * bd[ 3]
+                    + (int64_t)ad[ 7] * bd[ 2]
+                    + (int64_t)ad[ 8] * bd[ 1]
+                    + (int64_t)ad[ 9] * bd[ 0];
+    t[10] =  dd[10] + (int64_t)ad[ 0] * bd[10]
+                    + (int64_t)ad[ 1] * bd[ 9]
+                    + (int64_t)ad[ 2] * bd[ 8]
+                    + (int64_t)ad[ 3] * bd[ 7]
+                    + (int64_t)ad[ 4] * bd[ 6]
+                    + (int64_t)ad[ 5] * bd[ 5]
+                    + (int64_t)ad[ 6] * bd[ 4]
+                    + (int64_t)ad[ 7] * bd[ 3]
+                    + (int64_t)ad[ 8] * bd[ 2]
+                    + (int64_t)ad[ 9] * bd[ 1]
+                    + (int64_t)ad[10] * bd[ 0];
+    t[11] =  dd[11] + (int64_t)ad[ 0] * bd[11]
+                    + (int64_t)ad[ 1] * bd[10]
+                    + (int64_t)ad[ 2] * bd[ 9]
+                    + (int64_t)ad[ 3] * bd[ 8]
+                    + (int64_t)ad[ 4] * bd[ 7]
+                    + (int64_t)ad[ 5] * bd[ 6]
+                    + (int64_t)ad[ 6] * bd[ 5]
+                    + (int64_t)ad[ 7] * bd[ 4]
+                    + (int64_t)ad[ 8] * bd[ 3]
+                    + (int64_t)ad[ 9] * bd[ 2]
+                    + (int64_t)ad[10] * bd[ 1]
+                    + (int64_t)ad[11] * bd[ 0];
+    t[12] =  dd[12] + (int64_t)ad[ 0] * bd[12]
+                    + (int64_t)ad[ 1] * bd[11]
+                    + (int64_t)ad[ 2] * bd[10]
+                    + (int64_t)ad[ 3] * bd[ 9]
+                    + (int64_t)ad[ 4] * bd[ 8]
+                    + (int64_t)ad[ 5] * bd[ 7]
+                    + (int64_t)ad[ 6] * bd[ 6]
+                    + (int64_t)ad[ 7] * bd[ 5]
+                    + (int64_t)ad[ 8] * bd[ 4]
+                    + (int64_t)ad[ 9] * bd[ 3]
+                    + (int64_t)ad[10] * bd[ 2]
+                    + (int64_t)ad[11] * bd[ 1]
+                    + (int64_t)ad[12] * bd[ 0];
+    t[13] =  dd[13] + (int64_t)ad[ 0] * bd[13]
+                    + (int64_t)ad[ 1] * bd[12]
+                    + (int64_t)ad[ 2] * bd[11]
+                    + (int64_t)ad[ 3] * bd[10]
+                    + (int64_t)ad[ 4] * bd[ 9]
+                    + (int64_t)ad[ 5] * bd[ 8]
+                    + (int64_t)ad[ 6] * bd[ 7]
+                    + (int64_t)ad[ 7] * bd[ 6]
+                    + (int64_t)ad[ 8] * bd[ 5]
+                    + (int64_t)ad[ 9] * bd[ 4]
+                    + (int64_t)ad[10] * bd[ 3]
+                    + (int64_t)ad[11] * bd[ 2]
+                    + (int64_t)ad[12] * bd[ 1]
+                    + (int64_t)ad[13] * bd[ 0];
+    t[14] =  dd[14] + (int64_t)ad[ 0] * bd[14]
+                    + (int64_t)ad[ 1] * bd[13]
+                    + (int64_t)ad[ 2] * bd[12]
+                    + (int64_t)ad[ 3] * bd[11]
+                    + (int64_t)ad[ 4] * bd[10]
+                    + (int64_t)ad[ 5] * bd[ 9]
+                    + (int64_t)ad[ 6] * bd[ 8]
+                    + (int64_t)ad[ 7] * bd[ 7]
+                    + (int64_t)ad[ 8] * bd[ 6]
+                    + (int64_t)ad[ 9] * bd[ 5]
+                    + (int64_t)ad[10] * bd[ 4]
+                    + (int64_t)ad[11] * bd[ 3]
+                    + (int64_t)ad[12] * bd[ 2]
+                    + (int64_t)ad[13] * bd[ 1]
+                    + (int64_t)ad[14] * bd[ 0];
+    t[15] =  dd[15] + (int64_t)ad[ 0] * bd[15]
+                    + (int64_t)ad[ 1] * bd[14]
+                    + (int64_t)ad[ 2] * bd[13]
+                    + (int64_t)ad[ 3] * bd[12]
+                    + (int64_t)ad[ 4] * bd[11]
+                    + (int64_t)ad[ 5] * bd[10]
+                    + (int64_t)ad[ 6] * bd[ 9]
+                    + (int64_t)ad[ 7] * bd[ 8]
+                    + (int64_t)ad[ 8] * bd[ 7]
+                    + (int64_t)ad[ 9] * bd[ 6]
+                    + (int64_t)ad[10] * bd[ 5]
+                    + (int64_t)ad[11] * bd[ 4]
+                    + (int64_t)ad[12] * bd[ 3]
+                    + (int64_t)ad[13] * bd[ 2]
+                    + (int64_t)ad[14] * bd[ 1]
+                    + (int64_t)ad[15] * bd[ 0];
+    t[16] =           (int64_t)ad[ 1] * bd[15]
+                    + (int64_t)ad[ 2] * bd[14]
+                    + (int64_t)ad[ 3] * bd[13]
+                    + (int64_t)ad[ 4] * bd[12]
+                    + (int64_t)ad[ 5] * bd[11]
+                    + (int64_t)ad[ 6] * bd[10]
+                    + (int64_t)ad[ 7] * bd[ 9]
+                    + (int64_t)ad[ 8] * bd[ 8]
+                    + (int64_t)ad[ 9] * bd[ 7]
+                    + (int64_t)ad[10] * bd[ 6]
+                    + (int64_t)ad[11] * bd[ 5]
+                    + (int64_t)ad[12] * bd[ 4]
+                    + (int64_t)ad[13] * bd[ 3]
+                    + (int64_t)ad[14] * bd[ 2]
+                    + (int64_t)ad[15] * bd[ 1];
+    t[17] =           (int64_t)ad[ 2] * bd[15]
+                    + (int64_t)ad[ 3] * bd[14]
+                    + (int64_t)ad[ 4] * bd[13]
+                    + (int64_t)ad[ 5] * bd[12]
+                    + (int64_t)ad[ 6] * bd[11]
+                    + (int64_t)ad[ 7] * bd[10]
+                    + (int64_t)ad[ 8] * bd[ 9]
+                    + (int64_t)ad[ 9] * bd[ 8]
+                    + (int64_t)ad[10] * bd[ 7]
+                    + (int64_t)ad[11] * bd[ 6]
+                    + (int64_t)ad[12] * bd[ 5]
+                    + (int64_t)ad[13] * bd[ 4]
+                    + (int64_t)ad[14] * bd[ 3]
+                    + (int64_t)ad[15] * bd[ 2];
+    t[18] =           (int64_t)ad[ 3] * bd[15]
+                    + (int64_t)ad[ 4] * bd[14]
+                    + (int64_t)ad[ 5] * bd[13]
+                    + (int64_t)ad[ 6] * bd[12]
+                    + (int64_t)ad[ 7] * bd[11]
+                    + (int64_t)ad[ 8] * bd[10]
+                    + (int64_t)ad[ 9] * bd[ 9]
+                    + (int64_t)ad[10] * bd[ 8]
+                    + (int64_t)ad[11] * bd[ 7]
+                    + (int64_t)ad[12] * bd[ 6]
+                    + (int64_t)ad[13] * bd[ 5]
+                    + (int64_t)ad[14] * bd[ 4]
+                    + (int64_t)ad[15] * bd[ 3];
+    t[19] =           (int64_t)ad[ 4] * bd[15]
+                    + (int64_t)ad[ 5] * bd[14]
+                    + (int64_t)ad[ 6] * bd[13]
+                    + (int64_t)ad[ 7] * bd[12]
+                    + (int64_t)ad[ 8] * bd[11]
+                    + (int64_t)ad[ 9] * bd[10]
+                    + (int64_t)ad[10] * bd[ 9]
+                    + (int64_t)ad[11] * bd[ 8]
+                    + (int64_t)ad[12] * bd[ 7]
+                    + (int64_t)ad[13] * bd[ 6]
+                    + (int64_t)ad[14] * bd[ 5]
+                    + (int64_t)ad[15] * bd[ 4];
+    t[20] =           (int64_t)ad[ 5] * bd[15]
+                    + (int64_t)ad[ 6] * bd[14]
+                    + (int64_t)ad[ 7] * bd[13]
+                    + (int64_t)ad[ 8] * bd[12]
+                    + (int64_t)ad[ 9] * bd[11]
+                    + (int64_t)ad[10] * bd[10]
+                    + (int64_t)ad[11] * bd[ 9]
+                    + (int64_t)ad[12] * bd[ 8]
+                    + (int64_t)ad[13] * bd[ 7]
+                    + (int64_t)ad[14] * bd[ 6]
+                    + (int64_t)ad[15] * bd[ 5];
+    t[21] =           (int64_t)ad[ 6] * bd[15]
+                    + (int64_t)ad[ 7] * bd[14]
+                    + (int64_t)ad[ 8] * bd[13]
+                    + (int64_t)ad[ 9] * bd[12]
+                    + (int64_t)ad[10] * bd[11]
+                    + (int64_t)ad[11] * bd[10]
+                    + (int64_t)ad[12] * bd[ 9]
+                    + (int64_t)ad[13] * bd[ 8]
+                    + (int64_t)ad[14] * bd[ 7]
+                    + (int64_t)ad[15] * bd[ 6];
+    t[22] =           (int64_t)ad[ 7] * bd[15]
+                    + (int64_t)ad[ 8] * bd[14]
+                    + (int64_t)ad[ 9] * bd[13]
+                    + (int64_t)ad[10] * bd[12]
+                    + (int64_t)ad[11] * bd[11]
+                    + (int64_t)ad[12] * bd[10]
+                    + (int64_t)ad[13] * bd[ 9]
+                    + (int64_t)ad[14] * bd[ 8]
+                    + (int64_t)ad[15] * bd[ 7];
+    t[23] =           (int64_t)ad[ 8] * bd[15]
+                    + (int64_t)ad[ 9] * bd[14]
+                    + (int64_t)ad[10] * bd[13]
+                    + (int64_t)ad[11] * bd[12]
+                    + (int64_t)ad[12] * bd[11]
+                    + (int64_t)ad[13] * bd[10]
+                    + (int64_t)ad[14] * bd[ 9]
+                    + (int64_t)ad[15] * bd[ 8];
+    t[24] =           (int64_t)ad[ 9] * bd[15]
+                    + (int64_t)ad[10] * bd[14]
+                    + (int64_t)ad[11] * bd[13]
+                    + (int64_t)ad[12] * bd[12]
+                    + (int64_t)ad[13] * bd[11]
+                    + (int64_t)ad[14] * bd[10]
+                    + (int64_t)ad[15] * bd[ 9];
+    t[25] =           (int64_t)ad[10] * bd[15]
+                    + (int64_t)ad[11] * bd[14]
+                    + (int64_t)ad[12] * bd[13]
+                    + (int64_t)ad[13] * bd[12]
+                    + (int64_t)ad[14] * bd[11]
+                    + (int64_t)ad[15] * bd[10];
+    t[26] =           (int64_t)ad[11] * bd[15]
+                    + (int64_t)ad[12] * bd[14]
+                    + (int64_t)ad[13] * bd[13]
+                    + (int64_t)ad[14] * bd[12]
+                    + (int64_t)ad[15] * bd[11];
+    t[27] =           (int64_t)ad[12] * bd[15]
+                    + (int64_t)ad[13] * bd[14]
+                    + (int64_t)ad[14] * bd[13]
+                    + (int64_t)ad[15] * bd[12];
+    t[28] =           (int64_t)ad[13] * bd[15]
+                    + (int64_t)ad[14] * bd[14]
+                    + (int64_t)ad[15] * bd[13];
+    t[29] =           (int64_t)ad[14] * bd[15]
+                    + (int64_t)ad[15] * bd[14];
+    t[30] =           (int64_t)ad[15] * bd[15];
+    t[31] = 0;
+
+    /* Mod curve order */
+    /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+    /* Propagate carries */
+    c = t[ 0] >> 28; t[ 1] += c; t[ 0] = t[ 0] & 0xfffffff;
+    c = t[ 1] >> 28; t[ 2] += c; t[ 1] = t[ 1] & 0xfffffff;
+    c = t[ 2] >> 28; t[ 3] += c; t[ 2] = t[ 2] & 0xfffffff;
+    c = t[ 3] >> 28; t[ 4] += c; t[ 3] = t[ 3] & 0xfffffff;
+    c = t[ 4] >> 28; t[ 5] += c; t[ 4] = t[ 4] & 0xfffffff;
+    c = t[ 5] >> 28; t[ 6] += c; t[ 5] = t[ 5] & 0xfffffff;
+    c = t[ 6] >> 28; t[ 7] += c; t[ 6] = t[ 6] & 0xfffffff;
+    c = t[ 7] >> 28; t[ 8] += c; t[ 7] = t[ 7] & 0xfffffff;
+    c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+    c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+    c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+    c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+    c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+    c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+    c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+    c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+    c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff;
+    c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff;
+    c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff;
+    c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff;
+    c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff;
+    c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff;
+    c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff;
+    c = t[23] >> 28; t[24] += c; t[23] = t[23] & 0xfffffff;
+    c = t[24] >> 28; t[25] += c; t[24] = t[24] & 0xfffffff;
+    c = t[25] >> 28; t[26] += c; t[25] = t[25] & 0xfffffff;
+    c = t[26] >> 28; t[27] += c; t[26] = t[26] & 0xfffffff;
+    c = t[27] >> 28; t[28] += c; t[27] = t[27] & 0xfffffff;
+    c = t[28] >> 28; t[29] += c; t[28] = t[28] & 0xfffffff;
+    c = t[29] >> 28; t[30] += c; t[29] = t[29] & 0xfffffff;
+    c = t[30] >> 28; t[31] += c; t[30] = t[30] & 0xfffffff;
+    /* Mod top half of extra words */
+    t[ 8] += (int64_t)0x129eec34 * t[24];
+    t[ 9] += (int64_t)0x21cf5b54 * t[24];
+    t[10] += (int64_t)0x29c2ab70 * t[24];
+    t[11] += (int64_t)0x0f635c8c * t[24];
+    t[12] += (int64_t)0x25bf7a4c * t[24];
+    t[13] += (int64_t)0x2d944a70 * t[24];
+    t[14] += (int64_t)0x18eec490 * t[24];
+    t[15] += (int64_t)0x20cd7704 * t[24];
+    t[ 9] += (int64_t)0x129eec34 * t[25];
+    t[10] += (int64_t)0x21cf5b54 * t[25];
+    t[11] += (int64_t)0x29c2ab70 * t[25];
+    t[12] += (int64_t)0x0f635c8c * t[25];
+    t[13] += (int64_t)0x25bf7a4c * t[25];
+    t[14] += (int64_t)0x2d944a70 * t[25];
+    t[15] += (int64_t)0x18eec490 * t[25];
+    t[16] += (int64_t)0x20cd7704 * t[25];
+    t[10] += (int64_t)0x129eec34 * t[26];
+    t[11] += (int64_t)0x21cf5b54 * t[26];
+    t[12] += (int64_t)0x29c2ab70 * t[26];
+    t[13] += (int64_t)0x0f635c8c * t[26];
+    t[14] += (int64_t)0x25bf7a4c * t[26];
+    t[15] += (int64_t)0x2d944a70 * t[26];
+    t[16] += (int64_t)0x18eec490 * t[26];
+    t[17] += (int64_t)0x20cd7704 * t[26];
+    t[11] += (int64_t)0x129eec34 * t[27];
+    t[12] += (int64_t)0x21cf5b54 * t[27];
+    t[13] += (int64_t)0x29c2ab70 * t[27];
+    t[14] += (int64_t)0x0f635c8c * t[27];
+    t[15] += (int64_t)0x25bf7a4c * t[27];
+    t[16] += (int64_t)0x2d944a70 * t[27];
+    t[17] += (int64_t)0x18eec490 * t[27];
+    t[18] += (int64_t)0x20cd7704 * t[27];
+    t[12] += (int64_t)0x129eec34 * t[28];
+    t[13] += (int64_t)0x21cf5b54 * t[28];
+    t[14] += (int64_t)0x29c2ab70 * t[28];
+    t[15] += (int64_t)0x0f635c8c * t[28];
+    t[16] += (int64_t)0x25bf7a4c * t[28];
+    t[17] += (int64_t)0x2d944a70 * t[28];
+    t[18] += (int64_t)0x18eec490 * t[28];
+    t[19] += (int64_t)0x20cd7704 * t[28];
+    t[13] += (int64_t)0x129eec34 * t[29];
+    t[14] += (int64_t)0x21cf5b54 * t[29];
+    t[15] += (int64_t)0x29c2ab70 * t[29];
+    t[16] += (int64_t)0x0f635c8c * t[29];
+    t[17] += (int64_t)0x25bf7a4c * t[29];
+    t[18] += (int64_t)0x2d944a70 * t[29];
+    t[19] += (int64_t)0x18eec490 * t[29];
+    t[20] += (int64_t)0x20cd7704 * t[29];
+    t[14] += (int64_t)0x129eec34 * t[30];
+    t[15] += (int64_t)0x21cf5b54 * t[30];
+    t[16] += (int64_t)0x29c2ab70 * t[30];
+    t[17] += (int64_t)0x0f635c8c * t[30];
+    t[18] += (int64_t)0x25bf7a4c * t[30];
+    t[19] += (int64_t)0x2d944a70 * t[30];
+    t[20] += (int64_t)0x18eec490 * t[30];
+    t[21] += (int64_t)0x20cd7704 * t[30];
+    t[15] += (int64_t)0x129eec34 * t[31];
+    t[16] += (int64_t)0x21cf5b54 * t[31];
+    t[17] += (int64_t)0x29c2ab70 * t[31];
+    t[18] += (int64_t)0x0f635c8c * t[31];
+    t[19] += (int64_t)0x25bf7a4c * t[31];
+    t[20] += (int64_t)0x2d944a70 * t[31];
+    t[21] += (int64_t)0x18eec490 * t[31];
+    t[22] += (int64_t)0x20cd7704 * t[31];
+    /* Propagate carries */
+    c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+    c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+    c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+    c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+    c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+    c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+    c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+    c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+    c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff;
+    c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff;
+    c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff;
+    c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff;
+    c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff;
+    c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff;
+    c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff;
+    /* Mod bottom half of extra words */
+    t[ 0] += (int64_t)0x129eec34 * t[16];
+    t[ 1] += (int64_t)0x21cf5b54 * t[16];
+    t[ 2] += (int64_t)0x29c2ab70 * t[16];
+    t[ 3] += (int64_t)0x0f635c8c * t[16];
+    t[ 4] += (int64_t)0x25bf7a4c * t[16];
+    t[ 5] += (int64_t)0x2d944a70 * t[16];
+    t[ 6] += (int64_t)0x18eec490 * t[16];
+    t[ 7] += (int64_t)0x20cd7704 * t[16];
+    t[ 1] += (int64_t)0x129eec34 * t[17];
+    t[ 2] += (int64_t)0x21cf5b54 * t[17];
+    t[ 3] += (int64_t)0x29c2ab70 * t[17];
+    t[ 4] += (int64_t)0x0f635c8c * t[17];
+    t[ 5] += (int64_t)0x25bf7a4c * t[17];
+    t[ 6] += (int64_t)0x2d944a70 * t[17];
+    t[ 7] += (int64_t)0x18eec490 * t[17];
+    t[ 8] += (int64_t)0x20cd7704 * t[17];
+    t[ 2] += (int64_t)0x129eec34 * t[18];
+    t[ 3] += (int64_t)0x21cf5b54 * t[18];
+    t[ 4] += (int64_t)0x29c2ab70 * t[18];
+    t[ 5] += (int64_t)0x0f635c8c * t[18];
+    t[ 6] += (int64_t)0x25bf7a4c * t[18];
+    t[ 7] += (int64_t)0x2d944a70 * t[18];
+    t[ 8] += (int64_t)0x18eec490 * t[18];
+    t[ 9] += (int64_t)0x20cd7704 * t[18];
+    t[ 3] += (int64_t)0x129eec34 * t[19];
+    t[ 4] += (int64_t)0x21cf5b54 * t[19];
+    t[ 5] += (int64_t)0x29c2ab70 * t[19];
+    t[ 6] += (int64_t)0x0f635c8c * t[19];
+    t[ 7] += (int64_t)0x25bf7a4c * t[19];
+    t[ 8] += (int64_t)0x2d944a70 * t[19];
+    t[ 9] += (int64_t)0x18eec490 * t[19];
+    t[10] += (int64_t)0x20cd7704 * t[19];
+    t[ 4] += (int64_t)0x129eec34 * t[20];
+    t[ 5] += (int64_t)0x21cf5b54 * t[20];
+    t[ 6] += (int64_t)0x29c2ab70 * t[20];
+    t[ 7] += (int64_t)0x0f635c8c * t[20];
+    t[ 8] += (int64_t)0x25bf7a4c * t[20];
+    t[ 9] += (int64_t)0x2d944a70 * t[20];
+    t[10] += (int64_t)0x18eec490 * t[20];
+    t[11] += (int64_t)0x20cd7704 * t[20];
+    t[ 5] += (int64_t)0x129eec34 * t[21];
+    t[ 6] += (int64_t)0x21cf5b54 * t[21];
+    t[ 7] += (int64_t)0x29c2ab70 * t[21];
+    t[ 8] += (int64_t)0x0f635c8c * t[21];
+    t[ 9] += (int64_t)0x25bf7a4c * t[21];
+    t[10] += (int64_t)0x2d944a70 * t[21];
+    t[11] += (int64_t)0x18eec490 * t[21];
+    t[12] += (int64_t)0x20cd7704 * t[21];
+    t[ 6] += (int64_t)0x129eec34 * t[22];
+    t[ 7] += (int64_t)0x21cf5b54 * t[22];
+    t[ 8] += (int64_t)0x29c2ab70 * t[22];
+    t[ 9] += (int64_t)0x0f635c8c * t[22];
+    t[10] += (int64_t)0x25bf7a4c * t[22];
+    t[11] += (int64_t)0x2d944a70 * t[22];
+    t[12] += (int64_t)0x18eec490 * t[22];
+    t[13] += (int64_t)0x20cd7704 * t[22];
+    t[ 7] += (int64_t)0x129eec34 * t[23];
+    t[ 8] += (int64_t)0x21cf5b54 * t[23];
+    t[ 9] += (int64_t)0x29c2ab70 * t[23];
+    t[10] += (int64_t)0x0f635c8c * t[23];
+    t[11] += (int64_t)0x25bf7a4c * t[23];
+    t[12] += (int64_t)0x2d944a70 * t[23];
+    t[13] += (int64_t)0x18eec490 * t[23];
+    t[14] += (int64_t)0x20cd7704 * t[23];
+    /* Propagate carries */
+    c = t[ 0] >> 28; t[ 1] += c; rd[ 0] = (int32_t)(t[ 0] & 0xfffffff);
+    c = t[ 1] >> 28; t[ 2] += c; rd[ 1] = (int32_t)(t[ 1] & 0xfffffff);
+    c = t[ 2] >> 28; t[ 3] += c; rd[ 2] = (int32_t)(t[ 2] & 0xfffffff);
+    c = t[ 3] >> 28; t[ 4] += c; rd[ 3] = (int32_t)(t[ 3] & 0xfffffff);
+    c = t[ 4] >> 28; t[ 5] += c; rd[ 4] = (int32_t)(t[ 4] & 0xfffffff);
+    c = t[ 5] >> 28; t[ 6] += c; rd[ 5] = (int32_t)(t[ 5] & 0xfffffff);
+    c = t[ 6] >> 28; t[ 7] += c; rd[ 6] = (int32_t)(t[ 6] & 0xfffffff);
+    c = t[ 7] >> 28; t[ 8] += c; rd[ 7] = (int32_t)(t[ 7] & 0xfffffff);
+    c = t[ 8] >> 28; t[ 9] += c; rd[ 8] = (int32_t)(t[ 8] & 0xfffffff);
+    c = t[ 9] >> 28; t[10] += c; rd[ 9] = (int32_t)(t[ 9] & 0xfffffff);
+    c = t[10] >> 28; t[11] += c; rd[10] = (int32_t)(t[10] & 0xfffffff);
+    c = t[11] >> 28; t[12] += c; rd[11] = (int32_t)(t[11] & 0xfffffff);
+    c = t[12] >> 28; t[13] += c; rd[12] = (int32_t)(t[12] & 0xfffffff);
+    c = t[13] >> 28; t[14] += c; rd[13] = (int32_t)(t[13] & 0xfffffff);
+    c = t[14] >> 28; t[15] += c; rd[14] = (int32_t)(t[14] & 0xfffffff);
+    rd[15] = t[15];
+    /* Mod bits over 28 in last word */
+    o = rd[15] >> 26; rd[15] &= 0x3ffffff;
+    rd[ 0] += 0x4a7bb0d * o;
+    rd[ 1] += 0x873d6d5 * o;
+    rd[ 2] += 0xa70aadc * o;
+    rd[ 3] += 0x3d8d723 * o;
+    rd[ 4] += 0x96fde93 * o;
+    rd[ 5] += 0xb65129c * o;
+    rd[ 6] += 0x63bb124 * o;
+    rd[ 7] += 0x8335dc1 * o;
+    /* Propagate carries */
+    o = rd[ 0] >> 28; rd[ 1] += o; rd[ 0] = rd[ 0] & 0xfffffff;
+    o = rd[ 1] >> 28; rd[ 2] += o; rd[ 1] = rd[ 1] & 0xfffffff;
+    o = rd[ 2] >> 28; rd[ 3] += o; rd[ 2] = rd[ 2] & 0xfffffff;
+    o = rd[ 3] >> 28; rd[ 4] += o; rd[ 3] = rd[ 3] & 0xfffffff;
+    o = rd[ 4] >> 28; rd[ 5] += o; rd[ 4] = rd[ 4] & 0xfffffff;
+    o = rd[ 5] >> 28; rd[ 6] += o; rd[ 5] = rd[ 5] & 0xfffffff;
+    o = rd[ 6] >> 28; rd[ 7] += o; rd[ 6] = rd[ 6] & 0xfffffff;
+    o = rd[ 7] >> 28; rd[ 8] += o; rd[ 7] = rd[ 7] & 0xfffffff;
+    o = rd[ 8] >> 28; rd[ 9] += o; rd[ 8] = rd[ 8] & 0xfffffff;
+    o = rd[ 9] >> 28; rd[10] += o; rd[ 9] = rd[ 9] & 0xfffffff;
+    o = rd[10] >> 28; rd[11] += o; rd[10] = rd[10] & 0xfffffff;
+    o = rd[11] >> 28; rd[12] += o; rd[11] = rd[11] & 0xfffffff;
+    o = rd[12] >> 28; rd[13] += o; rd[12] = rd[12] & 0xfffffff;
+    o = rd[13] >> 28; rd[14] += o; rd[13] = rd[13] & 0xfffffff;
+    o = rd[14] >> 28; rd[15] += o; rd[14] = rd[14] & 0xfffffff;
+
+    /* Convert to bytes */
+    r[ 0] = (rd[0 ] >>  0);
+    r[ 1] = (rd[0 ] >>  8);
+    r[ 2] = (rd[0 ] >> 16);
+    r[ 3] = (rd[0 ] >> 24) + ((rd[1 ] >>  0) <<  4);
+    r[ 4] = (rd[1 ] >>  4);
+    r[ 5] = (rd[1 ] >> 12);
+    r[ 6] = (rd[1 ] >> 20);
+    r[ 7] = (rd[2 ] >>  0);
+    r[ 8] = (rd[2 ] >>  8);
+    r[ 9] = (rd[2 ] >> 16);
+    r[10] = (rd[2 ] >> 24) + ((rd[3 ] >>  0) <<  4);
+    r[11] = (rd[3 ] >>  4);
+    r[12] = (rd[3 ] >> 12);
+    r[13] = (rd[3 ] >> 20);
+    r[14] = (rd[4 ] >>  0);
+    r[15] = (rd[4 ] >>  8);
+    r[16] = (rd[4 ] >> 16);
+    r[17] = (rd[4 ] >> 24) + ((rd[5 ] >>  0) <<  4);
+    r[18] = (rd[5 ] >>  4);
+    r[19] = (rd[5 ] >> 12);
+    r[20] = (rd[5 ] >> 20);
+    r[21] = (rd[6 ] >>  0);
+    r[22] = (rd[6 ] >>  8);
+    r[23] = (rd[6 ] >> 16);
+    r[24] = (rd[6 ] >> 24) + ((rd[7 ] >>  0) <<  4);
+    r[25] = (rd[7 ] >>  4);
+    r[26] = (rd[7 ] >> 12);
+    r[27] = (rd[7 ] >> 20);
+    r[28] = (rd[8 ] >>  0);
+    r[29] = (rd[8 ] >>  8);
+    r[30] = (rd[8 ] >> 16);
+    r[31] = (rd[8 ] >> 24) + ((rd[9 ] >>  0) <<  4);
+    r[32] = (rd[9 ] >>  4);
+    r[33] = (rd[9 ] >> 12);
+    r[34] = (rd[9 ] >> 20);
+    r[35] = (rd[10] >>  0);
+    r[36] = (rd[10] >>  8);
+    r[37] = (rd[10] >> 16);
+    r[38] = (rd[10] >> 24) + ((rd[11] >>  0) <<  4);
+    r[39] = (rd[11] >>  4);
+    r[40] = (rd[11] >> 12);
+    r[41] = (rd[11] >> 20);
+    r[42] = (rd[12] >>  0);
+    r[43] = (rd[12] >>  8);
+    r[44] = (rd[12] >> 16);
+    r[45] = (rd[12] >> 24) + ((rd[13] >>  0) <<  4);
+    r[46] = (rd[13] >>  4);
+    r[47] = (rd[13] >> 12);
+    r[48] = (rd[13] >> 20);
+    r[49] = (rd[14] >>  0);
+    r[50] = (rd[14] >>  8);
+    r[51] = (rd[14] >> 16);
+    r[52] = (rd[14] >> 24) + ((rd[15] >>  0) <<  4);
+    r[53] = (rd[15] >>  4);
+    r[54] = (rd[15] >> 12);
+    r[55] = (rd[15] >> 20);
+    r[56] = 0;
+}
+
+/* Precomputed multiples of the base point. */
+static const ge448_precomp base[58][8] = {
+{
+    {
+        { 0x70cc05e, 0x26a82bc, 0x0938e26, 0x80e18b0, 0x511433b, 0xf72ab66,
+          0x412ae1a, 0xa3d3a46, 0xa6de324, 0x0f1767e, 0x4657047, 0x36da9e1,
+          0x5a622bf, 0xed221d1, 0x66bed0d, 0x4f1970c },
+        { 0x230fa14, 0x08795bf, 0x7c8ad98, 0x132c4ed, 0x9c4fdbd, 0x1ce67c3,
+          0x73ad3ff, 0x05a0c2d, 0x7789c1e, 0xa398408, 0xa73736c, 0xc7624be,
+          0x03756c9, 0x2488762, 0x16eb6bc, 0x693f467 }
+    },
+    {
+        { 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0x5555555,
+          0x5555555, 0x5555555, 0xaaaaaa9, 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa,
+          0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa },
+        { 0xa9386ed, 0xeafbcde, 0xda06bda, 0xb2bed1c, 0x098bbbc, 0x833a2a3,
+          0x80d6565, 0x8ad8c4b, 0x7e36d72, 0x884dd7b, 0xed7a035, 0xc2b0036,
+          0x6205086, 0x8db359d, 0x34ad704, 0xae05e96 }
+    },
+    {
+        { 0x6ff2f8f, 0x2817328, 0xda85757, 0xb769465, 0xfd6e862, 0xf7f6271,
+          0x8daa9cb, 0x4a3fcfe, 0x2ba077a, 0xda82c7e, 0x41b8b8c, 0x9433322,
+          0x4316cb6, 0x6455bd6, 0xb9108af, 0x0865886 },
+        { 0x88ed6fc, 0x22ac135, 0x02dafb8, 0x9a68fed, 0x7f0bffa, 0x1bdb676,
+          0x8bb3a33, 0xec4e1d5, 0xce43c82, 0x56c3b9f, 0xa8d9523, 0xa6449a4,
+          0xa7ad43a, 0xf706cbd, 0xbd5125c, 0xe005a8d }
+    },
+    {
+        { 0x8ba7f30, 0xce42ac4, 0x9e120e2, 0xe179894, 0x8ba21ae, 0xf1515dd,
+          0x301b7bd, 0x70c74cc, 0x3fda4be, 0x0891c69, 0xa09cf4e, 0x29ea255,
+          0x17226f9, 0x2c1419a, 0xc6c0cce, 0x49dcbc5 },
+        { 0xde51839, 0xe236f86, 0xd4f5b32, 0x44285d0, 0x472b5d4, 0x7ea1ca9,
+          0x1c0d8f9, 0x7b8a5bc, 0x90dc322, 0x57d845c, 0x7c02f04, 0x1b979cb,
+          0x3a5de02, 0x27164b3, 0x4accde5, 0xd49077e }
+    },
+    {
+        { 0x2030034, 0xa99d109, 0x6f950d0, 0x2d8cefc, 0xc96f07b, 0x7a920c3,
+          0x08bc0d5, 0x9588128, 0x6d761e8, 0x62ada75, 0xbcf7285, 0x0def80c,
+          0x01eedb5, 0x0e2ba76, 0x5a48dcb, 0x7a9f933 },
+        { 0x2f435eb, 0xb473147, 0xf225443, 0x5512881, 0x33c5840, 0xee59d2b,
+          0x127d7a4, 0xb698017, 0x86551f7, 0xb18fced, 0xca1823a, 0x0ade260,
+          0xce4fd58, 0xd3b9109, 0xa2517ed, 0xadfd751 }
+    },
+    {
+        { 0xabef79c, 0x7fd7652, 0x443a878, 0x6c20a07, 0x12a7109, 0x5c1840d,
+          0x876451c, 0x4a06e4a, 0xad95f65, 0x3bed0b4, 0x3fb0260, 0x25d2e67,
+          0xaebd971, 0x2e00349, 0x4498b72, 0x54523e0 },
+        { 0x07c7bcc, 0xea5d1da, 0x38ea98c, 0xcce7769, 0x61d2b3e, 0x80284e8,
+          0x6e1ff1b, 0x48de76b, 0x9c58522, 0x7b12186, 0x2765a1a, 0xbfd053a,
+          0x056c667, 0x2d743ec, 0xd8ab61c, 0x3f99b9c }
+    },
+    {
+        { 0xeb5eaf7, 0xdf9567c, 0x78ac7d7, 0x110a6b4, 0x4706e0b, 0x2d33501,
+          0x0b5a209, 0x0df9c7b, 0x568e684, 0xba4223d, 0x8c3719b, 0xd78af2d,
+          0xa5291b6, 0x77467b9, 0x5c89bef, 0x079748e },
+        { 0xdac377f, 0xe20d3fa, 0x72b5c09, 0x34e8669, 0xc40bbb7, 0xd8687a3,
+          0xd2f84c9, 0x7b3946f, 0xa78f50e, 0xd00e40c, 0x17e7179, 0xb875944,
+          0xcb23583, 0x9c7373b, 0xc90fd69, 0x7ddeda3 }
+    },
+    {
+        { 0x153bde0, 0x2538a67, 0x406b696, 0x223aca9, 0x1ad713e, 0xf9080dc,
+          0xd816a64, 0x6c4cb47, 0x5dc8b97, 0xbc28568, 0xc08e2d7, 0xd97b037,
+          0x5d0e66b, 0x5b63fb4, 0x520e8a3, 0xd1f1bc5 },
+        { 0xe69e09b, 0x4eb873c, 0xbc8ee45, 0x1663164, 0xba8d89f, 0x08f7003,
+          0x386ad82, 0x4b98ead, 0xbd94c7b, 0xa4b93b7, 0xc6b38b3, 0x46ba408,
+          0xf3574ff, 0xdae87d1, 0xe9bea9b, 0xc7564f4 }
+    },
+},
+{
+    {
+        { 0x5bfac1c, 0x2e4fdb2, 0xf5f3bca, 0xf0d79aa, 0x20fb7cc, 0xe756b0d,
+          0xb39609a, 0xe3696be, 0x5a5ab58, 0xa019fc3, 0x3b281dd, 0xa2b2485,
+          0x61ac0a2, 0xe3e2be7, 0xeb56730, 0xf19c34f },
+        { 0xa30241e, 0x2d25ce8, 0xb73d7a1, 0xf5661ea, 0xdaac9f4, 0x4611ed0,
+          0x4ced72c, 0xd544234, 0xe92e985, 0xce78f52, 0x4da4aad, 0x6fe5dd4,
+          0x1d363ce, 0xfcaddc6, 0xc9111bf, 0x3beb69c }
+    },
+    {
+        { 0x940ebc9, 0xd2e7660, 0xb17bbe0, 0xe032018, 0x75c0575, 0xad49391,
+          0x21c7f34, 0xdd0b147, 0x3e147e0, 0x52c2ba4, 0x0ee8973, 0x7dd03c6,
+          0xecf2754, 0x5472e8d, 0xd6482bb, 0x17a1cd1 },
+        { 0x8128b3f, 0xdd43b84, 0xea7dd25, 0xf0cae34, 0xff07df2, 0x81ca99f,
+          0x92ebbdc, 0x1c89597, 0x72155e6, 0x45c7a68, 0x39ddd08, 0x907a50e,
+          0xbb2d89b, 0xbe398c2, 0x1b3b536, 0x38063f9 }
+    },
+    {
+        { 0xf843b23, 0x149fafb, 0xac7f22a, 0x00ab582, 0xf2f4d4c, 0xa3b981b,
+          0x4341a22, 0x2ce1a65, 0x7c03b63, 0x68a4074, 0x12f2cf8, 0x63206a2,
+          0x5149741, 0xc9961d3, 0xbc7099e, 0xfb85430 },
+        { 0x90a9e59, 0x9c91072, 0x06de367, 0x734e94a, 0xdb99214, 0x5cf3cbe,
+          0x45b1fb9, 0xc6bce32, 0xdd7be0d, 0x1a82abe, 0xede7d1c, 0xf74976a,
+          0x21503bd, 0x7025b7c, 0x0d096ab, 0xf789491 }
+    },
+    {
+        { 0x555a41b, 0x6bd48bb, 0x67de206, 0xfbdd0d0, 0xdd6dfd1, 0x98bc477,
+          0x3e40b8a, 0x1d0693b, 0xda32ae4, 0x6e15563, 0xfcebaa2, 0x0194a20,
+          0x0980a93, 0xda11615, 0x0109cec, 0x8e11920 },
+        { 0xffb9726, 0x8ea0552, 0x047e44b, 0xeba50a4, 0x60ddf76, 0xc050d24,
+          0xac690e0, 0xe009204, 0x9b18edc, 0x47b8639, 0xc77f23f, 0x2f5b76a,
+          0x0792905, 0x4296c24, 0x06f6dc7, 0x73f6b4a }
+    },
+    {
+        { 0x3b10cad, 0xb6ef9ea, 0xf7c8fce, 0x312843d, 0x8bedf86, 0x5bdcd52,
+          0xf6dd823, 0x2889059, 0x08bfde0, 0x04578e9, 0x123e2e5, 0x3245df3,
+          0x7ee9e3a, 0xbf461d5, 0x6f94ceb, 0xddec2d4 },
+        { 0x145768f, 0x21b43b9, 0xdae962a, 0xe79a8f9, 0xcbb043f, 0xff1972b,
+          0x239649b, 0xe3dcf6d, 0xc533b85, 0xed592bd, 0xdbe22d0, 0x14ff94f,
+          0xf1d8e22, 0x6c4eb87, 0xd18cf6d, 0xd8d4c71 }
+    },
+    {
+        { 0x8d96345, 0xcda666c, 0x836cd21, 0x9ecaa25, 0x984606e, 0x6e885bd,
+          0x804f054, 0x1dd5fef, 0x6959ae4, 0x9dfff6b, 0xc9b55cc, 0x99b9cf8,
+          0x62b9b80, 0xb4716b0, 0x554b128, 0x13ec87c },
+        { 0x75aacc2, 0xe696d1f, 0x87fc5ff, 0xf78c993, 0x3809d42, 0x76c0947,
+          0xb618fa8, 0x99ce62d, 0x2f53341, 0x35e3e02, 0x0db6c5e, 0x62fc1ac,
+          0x00d8b47, 0xa1fb8e6, 0x58f0d1e, 0x0bc1070 }
+    },
+    {
+        { 0x16da513, 0x1f45269, 0xf5cf341, 0x1f2fc04, 0x64d23e0, 0xae92086,
+          0xda8a113, 0x4e33082, 0x1cfc085, 0x2688ec6, 0x6e5327f, 0x6f2e8de,
+          0xb4e48a8, 0x2070db3, 0x3240ade, 0xd662697 },
+        { 0xfbd997b, 0xa6b317f, 0x49e26bd, 0x9fa1b56, 0x8cba0f3, 0xcbf0d25,
+          0x17b4745, 0x4a7791b, 0x5c9e190, 0x25f555b, 0x923ec4c, 0x7cd3940,
+          0xe98f1b6, 0x16f4c6a, 0xbcd4e0f, 0x7962116 }
+    },
+    {
+        { 0x02491e3, 0x8d58fa3, 0x7ab3898, 0x7cf76c6, 0x647ebc7, 0xbc2f657,
+          0xd25f5a3, 0x5f4bfe0, 0xd69505d, 0x503f478, 0x3fb6645, 0x4a889fc,
+          0xfa86b18, 0x33e1bc1, 0x5508dd8, 0xabb234f },
+        { 0x9a05b48, 0x5348e1b, 0x64dc858, 0x57ac5f1, 0xec8a2d3, 0x21f4d38,
+          0xa3a3e9d, 0x5ec6d3c, 0x560a0b8, 0xcd4062e, 0x3433f59, 0x49b74f7,
+          0xcab14e3, 0xefd9d87, 0xeb964f5, 0x858ce7f }
+    },
+},
+{
+    {
+        { 0xeb731b4, 0x7577254, 0x4e2397e, 0x9fff1fb, 0xc821715, 0x749b145,
+          0x2e65e67, 0x40619fe, 0x2e618d8, 0x57b8281, 0x707b83e, 0x063186c,
+          0x31b24a2, 0xcfc80cb, 0xac75169, 0xcca6185 },
+        { 0xb255818, 0x6539f44, 0x0368bce, 0x5895da0, 0x17c7482, 0x841a309,
+          0xb1a9c9e, 0x85469e1, 0xe4f7d9d, 0x05664c0, 0x7b35cc0, 0x8a06318,
+          0xa0e9b0a, 0x214763a, 0x4b26ac2, 0x1bd872c }
+    },
+    {
+        { 0xa93762b, 0x3578f97, 0x72d52bc, 0x434f69a, 0x22cb565, 0xddcca40,
+          0xff20544, 0xa7d1e41, 0x8a66588, 0x823475d, 0x99d7baf, 0x9fc97c7,
+          0x660e421, 0x15542f1, 0x843faf6, 0xa7d1f60 },
+        { 0x4063ccc, 0xbbfaab5, 0xa49855a, 0x3ad9bad, 0x5bddbfe, 0xffd5f1c,
+          0xae87e59, 0x0e419c2, 0xf89956b, 0xdce6ed6, 0xccd8951, 0xf047c21,
+          0xa83c991, 0x6ed4a1b, 0x2d28e0a, 0x85af86e }
+    },
+    {
+        { 0x9ed48a8, 0x04433c4, 0x0bc375d, 0xeffa858, 0xfa6e3b5, 0xfb0e1b2,
+          0xa1aadda, 0x51483a2, 0xf8b2ea8, 0x733448d, 0xf639f0c, 0xaa0513c,
+          0xa23bf84, 0x6bc61a3, 0xdc2430d, 0x3e64f68 },
+        { 0xc5876b1, 0x51bf502, 0x1c0dd2a, 0x6b83375, 0x342914f, 0xe597be1,
+          0xf8e632c, 0x43d5ab0, 0xd62587b, 0x2696715, 0xed34f24, 0xe87d20a,
+          0xe18baf7, 0x25b7e14, 0xe22e084, 0xf5eb753 }
+    },
+    {
+        { 0x24d8295, 0x51da717, 0x18d1340, 0xd478e43, 0x2cf7f66, 0xacf94f4,
+          0x3760711, 0x230d7d1, 0x5abc626, 0x078a66a, 0x6b5f6da, 0xd78b0bd,
+          0x96d1d0b, 0x23a9713, 0x4bd960f, 0x87623d6 },
+        { 0x77db53f, 0x0841a99, 0xf4d03ee, 0x23c1a53, 0x1f95df1, 0x2f62c2e,
+          0x116f4e7, 0xd1e2ec1, 0x34811a9, 0x896d2fe, 0xec8096e, 0xad65e2b,
+          0xb1744a6, 0x09d36f9, 0xff5ddf7, 0x564bac7 }
+    },
+    {
+        { 0xc3f77cb, 0x48b41e2, 0x0968938, 0x5227673, 0xfd9b452, 0xff1b899,
+          0x2e03908, 0x67cf3bf, 0x248a6fb, 0x3731d90, 0x256598f, 0xd800a05,
+          0xbdc8530, 0x347d2f2, 0x7ad08a1, 0xc72a300 },
+        { 0x1d65f73, 0x5e5be74, 0x4206ead, 0x183d4ae, 0xade4013, 0xcb50c1c,
+          0x3102483, 0x39db43d, 0x70d6325, 0x0eb49fa, 0xc1f02b9, 0xa18f6a2,
+          0xdbf5e66, 0x3e6fe30, 0x3a82aa5, 0xac4eeb9 }
+    },
+    {
+        { 0x3613d47, 0x295affd, 0xb56f343, 0x7b7e68a, 0x92b173b, 0x9806296,
+          0xbad35fb, 0x937061e, 0x5c21eea, 0x2501978, 0x787a746, 0xe92721b,
+          0x3651631, 0x463c46c, 0xc6f2d5a, 0x6da4b5d },
+        { 0x6e6d18c, 0xcb67cc1, 0x0010588, 0x1b30d52, 0xdb1d1e8, 0x1bb6ea6,
+          0xad11474, 0x9c6308a, 0x3d19b1c, 0xc316741, 0xbe4fb79, 0xf2e84d7,
+          0xe050f77, 0xeccb873, 0xcc2bf86, 0xf7c8d80 }
+    },
+    {
+        { 0x7ab20e5, 0x16fe2e1, 0xecf3a92, 0x274dead, 0x0972f67, 0x9f43487,
+          0x4605751, 0x9a65a45, 0xb8980b2, 0x9351f07, 0x0eb08a5, 0x412962b,
+          0x733f440, 0xb8c9bfd, 0x1ca250f, 0xac2cd64 },
+        { 0x2ba7d26, 0x68cdd0f, 0x4e0beea, 0xd3d2a4a, 0x9f4a258, 0x50135c1,
+          0xf0d02e4, 0xb475e53, 0x589283a, 0x432d8c6, 0xa0a2b6c, 0x29141bf,
+          0x13704bc, 0xd7379ec, 0x52459bf, 0x831562c }
+    },
+    {
+        { 0xeeec506, 0x676b366, 0x45da557, 0xdd6cad5, 0x77057d2, 0x9de39cb,
+          0xdf05bf1, 0x388c5fe, 0xdfb1f03, 0x6e55650, 0x52126c9, 0xdbceffa,
+          0x3a4a220, 0xe4d187b, 0xeb27020, 0xac914f9 },
+        { 0xd2e5f30, 0x3f4ab98, 0xdd94451, 0x6ae97da, 0x0d80981, 0x64af695,
+          0xf2aa2ce, 0x36b4b90, 0x18fcf59, 0x6adcd7a, 0xc116c81, 0x3ddfe6d,
+          0x549b9e3, 0x661072b, 0xec4584d, 0xd9e3134 }
+    },
+},
+{
+    {
+        { 0xa1e400c, 0x6e46707, 0x551e806, 0xcdc990b, 0x3a07724, 0xfa51251,
+          0x1b3e4f5, 0x500553f, 0xef4dac3, 0x67e8b58, 0x2cb4cc7, 0x958349f,
+          0x7f9143c, 0x948b4ed, 0x2b7822b, 0xe646d09 },
+        { 0x2bc3c26, 0xd185dd5, 0xc837fc9, 0x34ba16e, 0x5a788b7, 0x516d4ba,
+          0x56142b0, 0x72f2de7, 0xf445b3d, 0x5846f61, 0xf4631a1, 0xdaec5c9,
+          0x169ea9b, 0xa10b18d, 0xaf6751b, 0x85d2998 }
+    },
+    {
+        { 0x43ddf31, 0xda0cac4, 0x1860911, 0x0966e17, 0x3cba600, 0x9c3a717,
+          0x571f895, 0x5781880, 0x737ac21, 0x5e2a927, 0x6c253fb, 0x8a46148,
+          0x95ee626, 0xe801cf5, 0x5f84fc0, 0x271166a },
+        { 0xba856bd, 0x306937f, 0xbe80a43, 0x80cb179, 0xffb5980, 0x70393b2,
+          0x660fc64, 0xa8e4a1c, 0xc0d5c98, 0x5078abf, 0xfbd31ff, 0x62ba530,
+          0x9e51b88, 0xda60844, 0x355ae15, 0xdb6ecb0 }
+    },
+    {
+        { 0x23c5d49, 0xbcbb6ea, 0x87959bc, 0x08906ba, 0x0991665, 0x61cc088,
+          0xd90d13c, 0x21d6b41, 0xd03afe9, 0x0c27ac1, 0x5cfea52, 0x159995f,
+          0xbdfe220, 0x4057e20, 0xcbdf058, 0xdd1b349 },
+        { 0x2e37159, 0x0cd6626, 0x3eb0d17, 0x8cea8e4, 0x5bce7f0, 0x553af08,
+          0x5b6511d, 0xb94cb5f, 0x50e0330, 0x7b8d3a5, 0x57ab7e7, 0x4159110,
+          0x6aa886f, 0x320820e, 0xc5b6b81, 0x130d4d6 }
+    },
+    {
+        { 0xc7bb2ed, 0x2f98059, 0xa49bdfb, 0x33ebf4c, 0xb0a675b, 0x04c72a1,
+          0xadb6c14, 0x94f9ea4, 0xcf728c0, 0x03376d8, 0x4c6eb6a, 0x5c059d3,
+          0xeb8da48, 0x0178408, 0x2956817, 0x8bf607b },
+        { 0xceb3d28, 0x7ad2822, 0x37ae653, 0xd07a403, 0xc1e46b2, 0xbc68739,
+          0x9154ba9, 0x15d7cca, 0xa26617d, 0x6b97103, 0xb2e0d28, 0xa610314,
+          0xfd4d363, 0x52a08ba, 0xc7dc2af, 0x80c2638 }
+    },
+    {
+        { 0x3187140, 0x0cde7ef, 0x4b70acd, 0x93b92ca, 0x7a79cdc, 0x5696e50,
+          0x8eaab66, 0x73cc972, 0x8f1b0c7, 0x6b8c5b6, 0x4f7e0b1, 0xb39a318,
+          0x376108a, 0x72cfb0d, 0x98536a7, 0x0c53efc },
+        { 0x24c2f1e, 0x03b52a8, 0x6399b78, 0x717132e, 0x349a85d, 0x31ebd25,
+          0x1a200d4, 0x265ee81, 0x407d7ad, 0x0b1aad2, 0x94d2962, 0x9a9ebc8,
+          0x41171d9, 0x994e6cd, 0x6c8fa83, 0x09178d8 }
+    },
+    {
+        { 0xa2593a1, 0x7d1d238, 0xb38fb19, 0x863e93a, 0xe7712a9, 0xd23a4cc,
+          0x27efcd5, 0x7477b13, 0x1392f6c, 0x3ba69ff, 0xf7bb5a5, 0x63e0c32,
+          0x026effd, 0x20412c0, 0xef424ab, 0xd3ee8e4 },
+        { 0x64e5174, 0x14c0b2d, 0xe58c47b, 0x2a611f2, 0xc1e8635, 0xaa58a06,
+          0xcf17034, 0x1870c3e, 0x83f1bf3, 0xb0d5e34, 0x16c7eb3, 0xb19905c,
+          0x6efa4ca, 0xbf85d62, 0x180f92b, 0xfd16b2f }
+    },
+    {
+        { 0x3adcb48, 0xc0431af, 0xba90496, 0xc9a7a8d, 0x3895294, 0xd765a16,
+          0x551de70, 0xb02a41a, 0x749b8a1, 0xb71b261, 0xc6f3e47, 0x0dfa89e,
+          0x0f5d9ce, 0x392c0d8, 0x31aee3c, 0x43c59d8 },
+        { 0x4d76f49, 0x94bfb6d, 0x27d68a5, 0xe8f5b82, 0x630fd08, 0x78ae1d9,
+          0xce1bdae, 0x1379029, 0x66715dc, 0x9689da0, 0xd3278c7, 0x5d4cb24,
+          0x9e84fbc, 0x77c9833, 0xea1048c, 0xc8478dc }
+    },
+    {
+        { 0x770d2ba, 0xe4b8f31, 0x42ea095, 0x744f652, 0x036f138, 0xd06e090,
+          0x3b078ca, 0xd3a3d5b, 0x78b8417, 0xc7ae541, 0xc738fd7, 0xad6c5d4,
+          0x4676454, 0x6178984, 0x5d9a392, 0xfbf3423 },
+        { 0xfff772f, 0x8e451a7, 0x5ffbead, 0x8605bb7, 0x930d59f, 0x6f75cc1,
+          0x8f3f460, 0xd4f4755, 0x6700c8a, 0xefd2d79, 0x2406421, 0xceb462a,
+          0x9dfe8f1, 0x8ed0f97, 0xd1d7600, 0x0280bf1 }
+    },
+},
+{
+    {
+        { 0xdd9a54d, 0x761c219, 0x86a39c0, 0x1127fcb, 0x4c9bedd, 0x7d0e4f0,
+          0x4d976b6, 0x27c017a, 0xda042cf, 0x800c973, 0x2593f11, 0xe7419af,
+          0xae67960, 0xbd49448, 0x744fd85, 0xd3b60b7 },
+        { 0x61676fe, 0x5e74ed9, 0x39af627, 0x7383ef3, 0x5e62df7, 0x34407e0,
+          0x8bf3196, 0xb053461, 0x583b407, 0xd6b7184, 0x55011be, 0xe3d0685,
+          0x2124b52, 0x94083d0, 0xf780aaf, 0xa908324 }
+    },
+    {
+        { 0x73ec9c3, 0xb27af1a, 0x70fa725, 0xb66ad9f, 0x8cf73e4, 0x07724f5,
+          0x9949358, 0xc3fcd57, 0xda0cc01, 0x06efb79, 0x10597c9, 0x1e977d2,
+          0x703e8d6, 0xcd732be, 0x6d0b69e, 0x6fd29bf },
+        { 0x667128e, 0xca658ac, 0xc7872b3, 0xca0036a, 0x5355837, 0xc969858,
+          0x075cf1c, 0x59f3be8, 0x3809a11, 0x9f1b9b0, 0x9733871, 0x6881ced,
+          0xe902a5f, 0x8cda0fb, 0x4e3871e, 0x4d8c69b }
+    },
+    {
+        { 0xddee82f, 0x5c3bd07, 0x2f9723b, 0xe52dd31, 0x74f1be8, 0xcf87611,
+          0x35f8657, 0xd9ecbd8, 0xfbfea17, 0x4f77393, 0xd78fe2c, 0xec9579f,
+          0x0fb0450, 0x320de92, 0x95d9c47, 0xbfc9b8d },
+        { 0x5e1b4c3, 0x818bd42, 0x40e2c78, 0x0e0c41c, 0xbccb0d0, 0x0f7ce9a,
+          0x5ef81fb, 0xc7e9fa4, 0x73574ad, 0x2561d6f, 0xd2efb0b, 0xa2d8d99,
+          0xe96cd0a, 0xcf8f316, 0x4964807, 0x088f0f1 }
+    },
+    {
+        { 0x45d5a19, 0x0a84989, 0x6c2131f, 0x47ab39c, 0xf3fc35d, 0x5c02824,
+          0x9ee8127, 0x3be77c8, 0xc90b80a, 0xa8491b7, 0xa28aa93, 0x5397631,
+          0x6c0b344, 0x54d6e81, 0x876d0e4, 0x22878be },
+        { 0x6db3bf6, 0xeecb8a4, 0x54577a3, 0x340f295, 0x9a00f85, 0xa779868,
+          0x4bb9147, 0x98465d7, 0xda3c736, 0x9532d7d, 0x7504b20, 0x6d574f1,
+          0xd86e435, 0x6e356f4, 0x4533887, 0x70c2e8d }
+    },
+    {
+        { 0xd293980, 0xdce5a0a, 0x069010e, 0x32d7210, 0x06deaaa, 0x64af59f,
+          0x59239e4, 0xd6b43c4, 0x9199c29, 0x74bf255, 0x11e1e2b, 0x3efff41,
+          0xcb0f8d8, 0x1aa7b5e, 0x989e395, 0x9baa22b },
+        { 0x7b33ac1, 0xf78db80, 0x54ce80a, 0x05a3b43, 0x7bc8e12, 0x371defc,
+          0x1224610, 0x63305a0, 0x6d697ef, 0x028b1ae, 0x1cd8051, 0x7aba39c,
+          0x28ee4b4, 0x76ed7a9, 0x7f99901, 0x31bd02a }
+    },
+    {
+        { 0xf075566, 0xf9dab7a, 0xf56f18b, 0x84e29a5, 0xf64e56d, 0x3a4c45a,
+          0x6a7302d, 0xcf3644a, 0x156b658, 0xfb40808, 0xf96be52, 0xf33ef9c,
+          0xcaa2f08, 0xfe92038, 0xb261894, 0xcfaf2e3 },
+        { 0x224ce3f, 0xf2a0dbc, 0x592eb27, 0xed05009, 0x95889d0, 0x501743f,
+          0x77c95c2, 0xa88a478, 0xdd63da9, 0x86755fb, 0xc7ee828, 0x9024acf,
+          0xf38113b, 0x634b020, 0x6056e64, 0x3c5aacc }
+    },
+    {
+        { 0xa2ef760, 0xe03ff3a, 0xb1c3bac, 0x3b95767, 0x940d754, 0x51ce6aa,
+          0x47a9a3d, 0x7cbac3f, 0x34f8d1a, 0xa864ac4, 0x80dbd47, 0x1eff3f2,
+          0x7ebd5ca, 0xd8ab660, 0x05b07ed, 0xc4df5c4 },
+        { 0xa4f095b, 0x3dc92df, 0x7cdbd9a, 0x5ae36a5, 0x7891e04, 0x7ff2973,
+          0x0a5fe7b, 0x37c0313, 0xaa6e35e, 0x210d7b0, 0xbf200d8, 0x6edfb53,
+          0x84afb85, 0x787b68d, 0x72c6de3, 0x9b5c49b }
+    },
+    {
+        { 0x4010f4e, 0x5185716, 0x0536ebe, 0xe0b144b, 0x887d663, 0xacabb14,
+          0xedf584f, 0xac1caed, 0xaf175a3, 0xb43fb8f, 0xf992a3c, 0x310b6d5,
+          0x85178a4, 0xf2c4aa2, 0x8bd56bf, 0x69c9969 },
+        { 0xa4d972e, 0x73d6372, 0x9583803, 0x3d5bb2e, 0xd891581, 0x7bf7d18,
+          0x568a34a, 0xa5ce5d7, 0x1f45c81, 0x670b433, 0x1f96910, 0x97265a7,
+          0xb07c1ea, 0xdb14eb3, 0xfed447c, 0xdf008ea }
+    },
+},
+{
+    {
+        { 0x00c2f10, 0x0379f5a, 0xd350285, 0xb320b4f, 0x8efdd7d, 0x74e560e,
+          0xf46a140, 0xf2f017e, 0x0f34624, 0x2ced1a6, 0xca08ec9, 0x7c4b4e3,
+          0x5d8bc6b, 0xdffc2a1, 0x527b007, 0xcc8f3f3 },
+        { 0x861fe83, 0x59f8ac4, 0xd03144c, 0x8d48d2c, 0xbfa6dce, 0xa8457d2,
+          0x677c136, 0xd7ed333, 0xc228e18, 0xcb8e219, 0x16ab1e4, 0x5f70bc9,
+          0x3780370, 0x2ae3a3d, 0x88f17ad, 0x9f33654 }
+    },
+    {
+        { 0x960e4bb, 0xeab0710, 0xab9cfd3, 0xc668a78, 0xb0ef946, 0x2e85553,
+          0x8df5df3, 0xa43c4b9, 0x3cb3646, 0x0ecd559, 0x18dbe71, 0x6f543c4,
+          0xf59818b, 0xee7edaa, 0x90911c1, 0xc44e8d2 },
+        { 0x269b509, 0xafb38b1, 0x52afe2c, 0x9e2737c, 0xccfa664, 0x5b2ef02,
+          0xe1cc58b, 0x1e0aeac, 0x5ea134e, 0x37a57e9, 0x83b9fc2, 0xc9c465a,
+          0x6e3ecca, 0x4b9e8c7, 0x9bdbab5, 0xca07dbe }
+    },
+    {
+        { 0xb0d7807, 0xd297f3c, 0xf59ce61, 0xee441a5, 0xb2db844, 0x728553b,
+          0x640e9e0, 0x90f87e5, 0xcb76dff, 0xaa72cbf, 0x4012d57, 0x065c686,
+          0x9678b44, 0xd5ee88f, 0x2177603, 0x3d74b85 },
+        { 0x748b68e, 0x3f9c947, 0x8f44d44, 0x03856d9, 0x462426c, 0xde34b84,
+          0x845ab29, 0xc16d1bb, 0xd2e18de, 0x9df6217, 0xb154643, 0xec6d219,
+          0x2ee0f8f, 0x22a8ec3, 0x91c5175, 0x632ad38 }
+    },
+    {
+        { 0x6869267, 0x19d9d23, 0xfe5532a, 0x628df94, 0x6dc9a01, 0x458d76c,
+          0x2cc39c8, 0x405fe6c, 0xf3a04ba, 0x7dddc67, 0x12500c7, 0xfee6303,
+          0xa50e9de, 0x580b6f0, 0x6090604, 0xfb5918a },
+        { 0x3af6b2d, 0xd715925, 0x1c7d1ec, 0x83d62d6, 0x85858c4, 0x94398c1,
+          0x14bfb64, 0x94643dc, 0xaf7db80, 0x758fa38, 0xa8a1557, 0xe2d7d93,
+          0x3562af1, 0xa569e85, 0x84346aa, 0xd226bdd }
+    },
+    {
+        { 0xd0ccd20, 0xc2d0a5e, 0x5dbc0cf, 0xeb9adb8, 0x26d7e88, 0xe0a29ee,
+          0x84a8e98, 0x8bb39f8, 0x37396ea, 0x511f1c1, 0xc8b2fb3, 0xbc9ec5a,
+          0x090e5bc, 0x299d81c, 0x4cdd587, 0xe1dfe34 },
+        { 0x5e465b7, 0x80f61f4, 0x1bad59e, 0x5699c53, 0xb79ff92, 0x85e92e4,
+          0x9db244c, 0x1e64fce, 0xa22097d, 0x3748574, 0xefff24e, 0xe2aa6b9,
+          0x0a10bc6, 0xb951be7, 0x9067a1c, 0x6685326 }
+    },
+    {
+        { 0xa6114d3, 0xf716ddf, 0x037ec1f, 0x9e515f5, 0x44944a6, 0x7734541,
+          0xaba97cc, 0x1540c4c, 0x8b54bb7, 0xe41e548, 0xcae37bc, 0x4363156,
+          0xf3d2ce8, 0xc384eaf, 0x4c58ba4, 0x72a4f45 },
+        { 0xdcaf3fc, 0x0ceb530, 0x78dcdbb, 0x72d5365, 0xc6320fa, 0x9b44084,
+          0xeb74c70, 0x6262d34, 0x608e6dc, 0x8abac85, 0x10dd38d, 0x82a5264,
+          0xa819b8d, 0xbc39911, 0x03ad0d9, 0xbda15fe }
+    },
+    {
+        { 0xf9dc60b, 0xadbf587, 0x7d846d2, 0xf9d814f, 0xb77bde0, 0xccdd241,
+          0x2242f50, 0x89cb6d7, 0xe6360a8, 0x95c0e3e, 0xdf49713, 0x7c7dd5a,
+          0x57d5814, 0x68e0e49, 0x0c16571, 0x3aa097d },
+        { 0x267d03a, 0xb56b672, 0x8c44af4, 0x4f55708, 0xf3252a5, 0x67c49e7,
+          0xc94a469, 0x871d6cf, 0x01fbfaa, 0x57ae998, 0x48a5d8e, 0x5c0e48f,
+          0x5e240b9, 0xe9bf9c8, 0x99d41ca, 0xa410189 }
+    },
+    {
+        { 0xb2889b4, 0x6beb0c7, 0x9455370, 0x78b7f89, 0x47ca364, 0xd434214,
+          0x9f21e5b, 0xdd9d2da, 0x0a7e4aa, 0xa0c7c18, 0xda1660c, 0x022c0d4,
+          0x5a57002, 0xe1f5c16, 0x518f68f, 0x51c7c9e },
+        { 0x2586502, 0x6d521b6, 0x183ec1b, 0xa0f2cb3, 0xcaa5e16, 0x578b4e0,
+          0x764997f, 0x7bd4fbd, 0x64b1804, 0x7ec56c3, 0x0ee08e4, 0xb75a254,
+          0xdc19080, 0x6bf74a6, 0x97d6e59, 0x6ec793d }
+    },
+},
+{
+    {
+        { 0x0a4beb9, 0x16789d6, 0x9b9c801, 0x512b2cd, 0x8c7bb9c, 0xf8b6d10,
+          0x9ebdc8c, 0xd85651e, 0x9ba971a, 0xc945082, 0x7e1cf78, 0x852d9ea,
+          0x0af01e2, 0x6a45e35, 0x6151dcf, 0xe6cdadf },
+        { 0x2b8c01b, 0xc454bb4, 0x3d54cd2, 0x59e0c49, 0x454d608, 0x8e1e686,
+          0xd8c6103, 0x0dbae4b, 0x6c18b18, 0xa5603a1, 0x3369093, 0x227a6b2,
+          0x5f3de1c, 0xf1e8929, 0x8ab63c5, 0x42f0b58 }
+    },
+    {
+        { 0x5b596d8, 0xf1974cc, 0x44719f0, 0xee8093f, 0xf6f5b54, 0x40ba933,
+          0x2f3d654, 0xd6e5365, 0x26d73b8, 0x9aeb835, 0x0776382, 0x50ed535,
+          0xad43875, 0x3be47d6, 0xc786e48, 0x21d56df },
+        { 0xb73bb39, 0x8a75e18, 0xf265a78, 0x9eba84c, 0x2e772e7, 0x7c02a4d,
+          0x4c1ecd2, 0xf7df6d4, 0x6cef71b, 0xa8d9ea0, 0xcae3b68, 0x86e8f91,
+          0x99efefa, 0x2fd1411, 0x214e6f6, 0x0b36ab2 }
+    },
+    {
+        { 0xbdce61c, 0xd79065c, 0xdecb229, 0xcb562ff, 0x4600849, 0xef5d3d1,
+          0x1d23ac8, 0x348b31b, 0x15c36b8, 0xb2ea699, 0x4822836, 0x268683d,
+          0xc6f0b7d, 0x083edbe, 0x1a7821c, 0xaf4f39d },
+        { 0x4e64841, 0x23be6e8, 0x65bf791, 0xe9e2463, 0x02bfd7c, 0xa3208ac,
+          0xd01357d, 0x231989c, 0x6422ab4, 0x79b8aad, 0x91b8564, 0x57d2b7e,
+          0x8c04421, 0x28ebbcc, 0x7d09c05, 0xdc787d8 }
+    },
+    {
+        { 0x6c7bed5, 0xeb99f62, 0x39cd0e8, 0x326b15f, 0xd860615, 0xd9d53dc,
+          0x1bf4205, 0xdf636e7, 0x0752209, 0x1eaa0bf, 0x4744abb, 0x17ce69a,
+          0xf3ea2fb, 0x474572d, 0x224a7f3, 0xc4f6f73 },
+        { 0x63081b4, 0x7ed86ad, 0x4a20afb, 0xcd4cdc7, 0xb301b2e, 0x7563831,
+          0xe038699, 0x5b4d2b1, 0x802a15f, 0xa15d1fa, 0x13e9172, 0x6687aaf,
+          0xba6da90, 0x3eccd36, 0x7474e83, 0x34e829d }
+    },
+    {
+        { 0x19c9b27, 0x4cea19b, 0x5f52523, 0xa14c37a, 0x726625c, 0x248b16d,
+          0x6cabc21, 0x8c40f9f, 0x32a5c65, 0x918470c, 0x2a98d5b, 0x314056b,
+          0x34a0714, 0x6c974cf, 0x4f6314a, 0x0c8f8a9 },
+        { 0x70bccfd, 0x4844557, 0x740c9fd, 0xf5835db, 0xa21407c, 0x12e59b5,
+          0xdb1689d, 0xbe338e0, 0xdd5e915, 0x5a50ce9, 0xef99f39, 0xb1780e9,
+          0xee4d833, 0x1262b55, 0x89c5340, 0x4be3f22 }
+    },
+    {
+        { 0x6c4b858, 0xbb99b90, 0x550ca53, 0xa7724d1, 0x826962e, 0x7d31f5a,
+          0xa5804da, 0xf239322, 0x0275048, 0x3e11320, 0x3ee4cb6, 0xcbb1bb8,
+          0x1331191, 0xdb86525, 0x7d1d903, 0xb7caf9e },
+        { 0x77d7a9d, 0x06e3b05, 0xb3bbbf5, 0x7a132b0, 0x7c50575, 0xd61fbc5,
+          0xaf4b646, 0x393f712, 0xcb7efe9, 0xef77972, 0x5ea4995, 0x20e6d5d,
+          0xfbbe4c6, 0x0ac23d4, 0xc807f2a, 0x8456617 }
+    },
+    {
+        { 0x5396143, 0x4995fb3, 0xb99dc46, 0xa8b4bd1, 0x4150064, 0x2293e8e,
+          0x22a3545, 0x2f77d49, 0xb2192c4, 0xe866b03, 0x5e0aa38, 0x58b01f0,
+          0x2ed246b, 0xe406b23, 0xed60974, 0x447edb3 },
+        { 0x8869703, 0xf541b33, 0x383420a, 0x6959fe0, 0x4be4e48, 0xd6b39db,
+          0xb5714ef, 0x048f3b4, 0x5d9e4b8, 0x68b4968, 0x2177963, 0xbda8e6c,
+          0xc4211fe, 0x5094e35, 0x2d46d1a, 0xea591c3 }
+    },
+    {
+        { 0x2fef780, 0x3a768ff, 0x32970c6, 0x4218d28, 0xec6da17, 0xce598e4,
+          0xfbb126a, 0xf675645, 0x0427617, 0xb04c23f, 0xe4fce74, 0xc9f93fb,
+          0x3c91b00, 0x44a414b, 0x1d3b3cc, 0x4d982f3 },
+        { 0xb24cce0, 0xb1d40e8, 0x133e73d, 0x5a21c07, 0x0bb589d, 0x6e9358e,
+          0x2399844, 0x39cfb17, 0x166080e, 0x83f7647, 0x450b468, 0xcfe7bf8,
+          0x1e8434f, 0x2a288f7, 0x21a81e3, 0xd39f1e5 }
+    },
+},
+{
+    {
+        { 0x528af6f, 0x78c6f13, 0x94b74d9, 0x0001fe2, 0x01aab44, 0xae77425,
+          0xef0039c, 0x7cbe937, 0x0fa2a67, 0xaf3e4f0, 0xda1378e, 0xe28175f,
+          0x8ccd90e, 0x72adeed, 0x00af22f, 0x16a8ce1 },
+        { 0xcbf63dd, 0x69fae17, 0x9e39e26, 0x6786172, 0xf827a18, 0xe92b3d5,
+          0x8403682, 0x4d75e41, 0x9056a79, 0x01a4fd9, 0x20008f5, 0x89efb2d,
+          0xb78ff15, 0xa2f6918, 0xa3437f5, 0xf41c870 }
+    },
+    {
+        { 0x7be353c, 0xc840ae5, 0x3fb2691, 0x465a5eb, 0x7eba833, 0x34a89f0,
+          0x013346e, 0xf620896, 0xe875df2, 0x563b5f0, 0xfbc44ce, 0x5f7fc8b,
+          0xcfedf9d, 0x22fcb5a, 0x7dc691b, 0x7cf68d4 },
+        { 0x76a103f, 0x37f7c2d, 0xfd87b7d, 0x728a128, 0xccf2132, 0x7db2ad8,
+          0xb100e63, 0xa4c13fe, 0x7b511d5, 0xcd28a51, 0x721ca5c, 0xb910280,
+          0xd84bd52, 0xec1305f, 0x2729791, 0xb964642 }
+    },
+    {
+        { 0x5bc7462, 0x83fccdf, 0xd6f012f, 0x01f3dda, 0x3a6a87c, 0x57f1171,
+          0xff403ac, 0xedb47ce, 0xbaab073, 0x6c184e5, 0x6f0d6a1, 0x5b17c7d,
+          0x3ef2c91, 0x45a4c4f, 0x86a8f41, 0x26c3f7e },
+        { 0xb646514, 0x81a6db0, 0xca8b9ae, 0xf84059f, 0x9f02305, 0xd73dab6,
+          0xc4b7c6c, 0x0de3fae, 0x696df2f, 0x18abb88, 0x75d7740, 0x45dd1b9,
+          0x9ee35bc, 0x3aeccc6, 0xb029f88, 0x478252e }
+    },
+    {
+        { 0x8b2ce15, 0x66bf85b, 0x335709d, 0x1175425, 0x8123874, 0x00169ef,
+          0x9b89868, 0xfd3c18c, 0x775204e, 0xb3612f9, 0xc2cd510, 0x4b8d09d,
+          0x14559ad, 0xafa12e6, 0x9657493, 0x1ddaa88 },
+        { 0x1e77a08, 0x87d700b, 0x14d2e71, 0xaf4cf2f, 0xbf90c94, 0xe00835d,
+          0x6dc8429, 0xb16a6ec, 0xf8a4d92, 0x02a7210, 0x3d0c48d, 0x5a5ab40,
+          0xb5b9bea, 0x0052b3a, 0xe138f89, 0x6242739 }
+    },
+    {
+        { 0x16b2819, 0x7c215d3, 0xfeb9d7a, 0xdacb65e, 0xd833423, 0xc3c569e,
+          0x886a058, 0xbc08435, 0x7e5cb61, 0x132c4db, 0x9422aff, 0x6373a27,
+          0xfca9fc4, 0x43b9d7e, 0xdbe465f, 0xe3319a5 },
+        { 0x0b39da7, 0x51d3687, 0x4b75492, 0xcb6d798, 0xeadd87a, 0x77eb272,
+          0xe0d3f6c, 0xf2fb47d, 0xf9f791c, 0x807fd86, 0x975e885, 0xf01086b,
+          0xb6a3604, 0xf9314b5, 0x67be852, 0x8cd4538 }
+    },
+    {
+        { 0x858f79b, 0x7c1e6b3, 0x938caf9, 0xf0477c4, 0x3e88c44, 0xb311bbf,
+          0x1e3a3c1, 0x9234c09, 0x95a1d4d, 0x531af2b, 0xb8d1c64, 0xf3cc969,
+          0xb51e78d, 0x6f3c328, 0x34e8881, 0x5a1bd6c },
+        { 0x3a9336f, 0x2e31239, 0x5ced897, 0x020f0cc, 0x5fab121, 0x4b45d7b,
+          0x1841210, 0x8068b1c, 0x8349170, 0x1bd85fc, 0x0f97fe5, 0xfe816d8,
+          0x14b84fc, 0x1089818, 0xb93cd48, 0x1d4fabb }
+    },
+    {
+        { 0xaef599e, 0x1f11d45, 0xb09c58a, 0x8d91243, 0xd08c3c3, 0xd2eec7b,
+          0x3b02793, 0x5a6039b, 0x8fb2c00, 0xb27fed5, 0xe8acf5e, 0xb5de44d,
+          0x6e6c698, 0x2c3e0cd, 0x777180d, 0x2f96ed4 },
+        { 0x96d0e36, 0x67de8bf, 0xc9b6d65, 0xd36a2b6, 0x637d59c, 0x8df5d37,
+          0xc8d9878, 0x951899f, 0xb13fcf8, 0x0fa090d, 0x1f5c7b4, 0xa527081,
+          0x513a37a, 0x56a6560, 0x14dc1fe, 0xc6f5530 }
+    },
+    {
+        { 0x94945d6, 0x7f6def7, 0x8cc8832, 0x2f52fe3, 0xa812ff5, 0x0228ad9,
+          0xbb8478a, 0xcd282e5, 0xbe91b07, 0xa0bc9af, 0x11165e2, 0x0360cdc,
+          0x7b857e4, 0xb5240fd, 0xfa36b08, 0x67f1665 },
+        { 0xad2c93f, 0x84ce588, 0xe8ff4c0, 0x94db722, 0x489c8a3, 0xad2edbb,
+          0x7e5f278, 0x6b2d5b8, 0xd1d0798, 0x0265e58, 0x4c5589e, 0xd2c9f26,
+          0x4e4074d, 0xde81f09, 0x303089f, 0xc539595 }
+    },
+},
+{
+    {
+        { 0x83e882c, 0x183492f, 0xb5e6c12, 0x4d58203, 0xefec20b, 0x1ac96c3,
+          0xe1cd15e, 0xabd5a5b, 0xcbbb14b, 0x7e1e242, 0xd0543b3, 0x9f03f45,
+          0xd678158, 0xc94bc47, 0xa446cad, 0x7917be0 },
+        { 0x9b37394, 0x53f2be2, 0x064cc76, 0x0cb0a6c, 0xfba3da3, 0x3a857bc,
+          0x80fcb49, 0xac86bc5, 0x30ab146, 0x9d5336e, 0x5bc1270, 0xafb093d,
+          0xe5c3b6e, 0x996689d, 0xea076ba, 0x55189fa }
+    },
+    {
+        { 0x646ce03, 0x99ef986, 0x30e6100, 0xa155f81, 0x29b6b07, 0x75bef17,
+          0x1de077b, 0xc46f08e, 0x7ed0526, 0xf52fdc5, 0x61a299a, 0xe09d989,
+          0x7b8e93a, 0x9527329, 0x0acd185, 0x11255b5 },
+        { 0x4a6acdd, 0x57919db, 0x4451d74, 0x708a578, 0x283f7b3, 0x5b0bd01,
+          0xc3d9260, 0xe82f40c, 0x82bbdc2, 0x2ab96ec, 0xc164d87, 0x921f680,
+          0xc17a6a9, 0xf0f7883, 0x382a001, 0xc366478 }
+    },
+    {
+        { 0x2e40791, 0x5c9aa07, 0xa0776bf, 0xf0b72d6, 0xeaa50dc, 0x445f9b2,
+          0x6bda47f, 0xa929fa9, 0x3bbfc49, 0x539dc71, 0x006a78b, 0x4f16dd0,
+          0xeef39c7, 0x331ba3d, 0xc34157c, 0xbfa0a24 },
+        { 0x6a3b482, 0x0220beb, 0x6c43885, 0x3164d4d, 0xacdea23, 0xa03bb5d,
+          0x9d8f450, 0xd6b8b5a, 0xbd208fe, 0xd218e65, 0x35c476f, 0x43948ed,
+          0x0a2ed2b, 0x29a0dd8, 0x25295b7, 0xa6ccf33 }
+    },
+    {
+        { 0xac38939, 0xf68f15f, 0xf8010c1, 0xb3dd5a2, 0xa35f141, 0xf7ac290,
+          0x7388574, 0xdc8f3b2, 0xe95fed2, 0x7ec3de1, 0x257ac7d, 0xc625451,
+          0x664e55a, 0x66fc33e, 0x4832ba5, 0xd3968d3 },
+        { 0xc026448, 0x980291b, 0x24da4a5, 0xfcb2125, 0x827a360, 0xbca7df4,
+          0x85ca63b, 0xfcc395c, 0x8e9f733, 0xcf566ec, 0xd465f70, 0x835ee9b,
+          0x372f916, 0xe66d111, 0x04d9211, 0xc066cf9 }
+    },
+    {
+        { 0x8b48818, 0xb9763a3, 0x4288f96, 0xa6d23cc, 0xed3a229, 0xe27fcf5,
+          0xabaff00, 0x6aebf9c, 0x8131cd1, 0xf337503, 0xffabd58, 0x13ad41d,
+          0x861c83b, 0x1bee6af, 0x9c142e7, 0x274fe96 },
+        { 0x9b84b5b, 0x70ebcc9, 0x8191cfc, 0xe1a57d7, 0xcbf00b8, 0x46ccd06,
+          0xefe402d, 0xc233e8e, 0xbeebeb3, 0xb4ab215, 0xbd14e7b, 0xb7424ea,
+          0xa679578, 0x351259a, 0x471d684, 0x6d6d01e }
+    },
+    {
+        { 0x815ae38, 0x755c465, 0x611db56, 0xadc3e85, 0x188dd50, 0x633999b,
+          0xc12d907, 0xfdf7509, 0x238b6af, 0x25bcfde, 0x397f5e7, 0x50d705d,
+          0x944c974, 0xb65f60b, 0x27ac325, 0x8867fc3 },
+        { 0x3763eff, 0x2edc441, 0x341fb63, 0x892c0b3, 0xb3a7f28, 0xb34b83a,
+          0x15c2f18, 0x9aa106d, 0x1bb2277, 0x720bbc6, 0x5cfaefd, 0x637f72a,
+          0xf43e565, 0xf57db6e, 0xb58e772, 0xceb7c67 }
+    },
+    {
+        { 0x6ecc1de, 0x2793da5, 0x38f31b2, 0x4e10974, 0x8781267, 0x4229b4f,
+          0xdec04a1, 0xe5d2272, 0xec17cff, 0x6abb463, 0x0cbb048, 0x28aaa7e,
+          0xd22ef85, 0x41dc081, 0x5e63d0f, 0xcbc361e },
+        { 0xad5dbaa, 0xb78aafc, 0xfc1edc3, 0x0111505, 0x92c7bfa, 0x63ed66d,
+          0xe468919, 0x2982284, 0xb8c0d8c, 0x30f1f21, 0x2685093, 0xf056747,
+          0xf03dd0f, 0x0e085b6, 0x5581e66, 0xa8c8db8 }
+    },
+    {
+        { 0x264ad0c, 0x42009a6, 0x593bef4, 0x13bf2b8, 0x5d4e8b1, 0x1d11190,
+          0xef7bddc, 0xfe3e940, 0x624e62c, 0xa012275, 0x1d6d3cc, 0xcb65924,
+          0xedb7ab6, 0xc7bcc70, 0xb750b1c, 0xff9fafb },
+        { 0x7fea84b, 0xf65df29, 0x90b0e02, 0x17c84a8, 0x301e821, 0xa92a859,
+          0xfb480d1, 0xbee8cb2, 0x59c604e, 0x7010b8c, 0xe803c43, 0x47bf3f4,
+          0x47b3fff, 0xd645142, 0x9f0da13, 0xc4c5dcb }
+    },
+},
+{
+    {
+        { 0xb5253b3, 0x8af700c, 0x206957a, 0x31ca605, 0x3eafdcd, 0x2574439,
+          0xd3ae15e, 0x2ba5ae1, 0x5b82579, 0x710b738, 0x112b95a, 0x145ab57,
+          0x38c55c5, 0x4b133a0, 0x2a16fef, 0xf7559c9 },
+        { 0xd9ba896, 0x70c3e68, 0xc33d07a, 0x475dd32, 0x3a41e40, 0xe084e47,
+          0xfd2e706, 0xddc9382, 0x79510bd, 0x34b7275, 0xa5f901e, 0x5e78a69,
+          0xdcfb823, 0x429dfd7, 0x014f0a3, 0x1d9dc18 }
+    },
+    {
+        { 0xaf403d7, 0x364fcdf, 0xb7d7b34, 0xd9ea4ff, 0xcbb1dac, 0x21a3426,
+          0x143b4f5, 0xfa51052, 0x6df2409, 0x2bca073, 0x8ad7285, 0x7e6985a,
+          0x4aaa27f, 0x3a1a9d0, 0x9fc0c6c, 0x1a815e1 },
+        { 0xbb65bb3, 0xfab6147, 0x33ced0b, 0xa36dc0d, 0x2062d78, 0x26a8859,
+          0x28a5fb7, 0x3438617, 0x4ebb1ad, 0xe82da25, 0xd05aa11, 0x70f5071,
+          0xadaac48, 0x0b7f847, 0x93cb269, 0xeb812bc }
+    },
+    {
+        { 0xf7caccc, 0xcb317cc, 0xcf85098, 0xd3410d9, 0x7f078d7, 0xca68c8d,
+          0xb782efc, 0xfe9e812, 0x5f544b5, 0x32e7c0f, 0x3a7b7f2, 0x44fe95a,
+          0xe91327b, 0xf4f1543, 0x76645ed, 0x27d118d },
+        { 0xd7abc2c, 0x690547c, 0xb53c8af, 0xf64680f, 0x79ea989, 0xbe0cbe0,
+          0xa91af28, 0x6cf0cce, 0x9daa2f9, 0xa3b85a2, 0x91faed0, 0xd4b663c,
+          0xa8b20ba, 0x782c7b7, 0xb8d98ce, 0xf494faf }
+    },
+    {
+        { 0x002f55a, 0x080c0d7, 0x2d6d9dd, 0xf4f8f14, 0x382f025, 0xb326229,
+          0xad28c20, 0x58fd0b5, 0x8d06a15, 0x704b992, 0x7fbd8e4, 0xf4545d9,
+          0xed55581, 0xc32fa63, 0x01ac0fd, 0x3ab7936 },
+        { 0x6099fd1, 0x13ece52, 0x9c79178, 0x776dba8, 0xce26c45, 0x8d28212,
+          0x60d739c, 0x09fddaf, 0xa84826e, 0xf9931ed, 0xb29439e, 0x6e73d90,
+          0x9095e61, 0x94cfefc, 0x802f474, 0x3050d16 }
+    },
+    {
+        { 0x9f6394b, 0x0898f8f, 0x88b0e91, 0x48b8cea, 0x4c1b362, 0x4bc9925,
+          0x827d9ec, 0xe3fccb4, 0xd950d6a, 0x5d4cf9a, 0x39b5b38, 0xa16f1ef,
+          0x620f288, 0x3c76d1d, 0xe119390, 0x9fdd059 },
+        { 0xfb5edf8, 0x7b5de9e, 0x769d14e, 0x3e290b9, 0x6bd10b5, 0x4df3a91,
+          0x82f8f7b, 0xae99bca, 0xc9524af, 0x5481d5d, 0x69504f1, 0xf112e4f,
+          0x51931ec, 0xb048f09, 0x18f51b1, 0xbff876a }
+    },
+    {
+        { 0x46c1c37, 0x932e2a7, 0x9aea4c1, 0x903ad52, 0x8f161f2, 0x717ac91,
+          0xf425e2a, 0xa57d197, 0x7f39e0e, 0xae89dac, 0xbaa2a58, 0x91655c0,
+          0x54836dd, 0xe3dc286, 0xa9ec9e6, 0xb5f0baa },
+        { 0xbdbda04, 0xf7c4662, 0x51059c0, 0xbe5393b, 0xdd95b0f, 0xb16d552,
+          0x1b3bd96, 0xde495b3, 0xc0206c5, 0xb2a6e02, 0x014d3a9, 0x045cc09,
+          0x2a2f490, 0xf66a315, 0xc5dea05, 0x208c108 }
+    },
+    {
+        { 0x65237ea, 0x6e38b68, 0x9f27fc6, 0x93a1303, 0xa95068a, 0x9a6d510,
+          0xe7c9e54, 0x6fbf216, 0x571ac1d, 0x7824290, 0x91c2a0c, 0x8cb23ba,
+          0xc7e434d, 0x611202e, 0x76058b4, 0x8f901bf },
+        { 0x0849588, 0xef0ac05, 0xdd31804, 0xe0d2dde, 0xeb2ca81, 0xaf5417c,
+          0x5d1a509, 0x420ac06, 0x9683bb6, 0x46e345e, 0xf613f7f, 0x6daf635,
+          0x48a9576, 0xc9e8291, 0x176d147, 0x5f9f1d1 }
+    },
+    {
+        { 0x77e9709, 0xd24ae1d, 0x0047b8a, 0x77751dc, 0xc6a1593, 0xe325334,
+          0x671f86a, 0x9baf962, 0xc29a15e, 0x425af6a, 0x2796e33, 0x3108600,
+          0xfc253a5, 0xb6ea78c, 0xafae0ea, 0x4c733e0 },
+        { 0x97c99b9, 0x4b7443a, 0x50203a6, 0xc14e9e4, 0x52680ba, 0xd1bb515,
+          0xd55533a, 0xa56a3ef, 0x169e1a0, 0xa66e38c, 0xeed7da0, 0xb3e4df9,
+          0xddce3d9, 0x022c937, 0xf6e36b4, 0x8552089 }
+    },
+},
+{
+    {
+        { 0xf5cc82e, 0x8e4bf95, 0xc3ed6c9, 0x2ad80c3, 0xc9045e1, 0xf2e5b2c,
+          0x59b06d4, 0x42c9065, 0x7b43b84, 0xc1f7379, 0x72d7992, 0x1710dbf,
+          0x767b41c, 0xe98cf47, 0x7bfb9e9, 0xe713fce },
+        { 0x9fa5134, 0x9f54ae9, 0xde40d0e, 0x3002fd8, 0x9311334, 0xdc282b7,
+          0xbfeb360, 0x5519810, 0x0f96ffe, 0x31539c7, 0xd27777b, 0x04eacc0,
+          0x8ff5053, 0x5982410, 0x32b67ad, 0x5982366 }
+    },
+    {
+        { 0x6bea5c2, 0x6eb4554, 0xd509a33, 0x82cfae0, 0x394bb59, 0x6a69bd8,
+          0x5770ee1, 0x1880d8d, 0x7dacf9e, 0x6351844, 0xf02b891, 0x5b1ecc5,
+          0xb6c9a5a, 0xeb7d900, 0x8897da8, 0xdab8a76 },
+        { 0x98851a6, 0x28c7be5, 0x4d73c3b, 0x0101d4f, 0x5084996, 0x3c2569c,
+          0x280bde0, 0xb9bc911, 0xcd0d4f9, 0x513a22a, 0x2a15f3b, 0xdf2986d,
+          0x2aa4943, 0x231c28f, 0x0333870, 0x29623ad }
+    },
+    {
+        { 0x4084416, 0x2ceb178, 0x49516cd, 0x924cf1c, 0x4be856f, 0x76536c0,
+          0x47a265b, 0x11b59cd, 0x4999494, 0x720dc84, 0x007b795, 0x910f794,
+          0x2d3df83, 0x8434e14, 0xbd478d3, 0x8f53878 },
+        { 0xaeb9c2f, 0xd9b072e, 0xfd8a29f, 0x16f87ea, 0x2fd0de1, 0x8c42f9b,
+          0x0e816ef, 0x916721e, 0x18bde37, 0x2ecb470, 0x2375da2, 0xcde3b7a,
+          0xef94281, 0x30d0657, 0x5cd7af8, 0x5105456 }
+    },
+    {
+        { 0x4bdced3, 0x7230b33, 0x0838569, 0x0c6a3e1, 0xe3493b8, 0xf19c9ec,
+          0x0d97c57, 0xf275927, 0x0c862eb, 0xf14181e, 0x32c72bc, 0xfd3bac1,
+          0xf3be362, 0x620563f, 0x47283b7, 0x672ccaf },
+        { 0x2b7bf16, 0x191e3fa, 0x520dad7, 0xf838633, 0x3629d87, 0xd3dde55,
+          0xaf86ebe, 0x14d8836, 0x221b2ce, 0x3db7dfb, 0x0aed72a, 0x3872abb,
+          0x8c665b7, 0xb60de52, 0x44982cb, 0x89c2596 }
+    },
+    {
+        { 0x4dbba25, 0x799a2de, 0xa42715e, 0xd818aae, 0xf55c362, 0xbc88f4d,
+          0x713c9ae, 0x142a163, 0xfbfb33f, 0x411e8ee, 0x6bb684a, 0x34b4629,
+          0xdc81817, 0x4344bec, 0x17f9d46, 0xcc9573d },
+        { 0xff38a7d, 0xf85f8bc, 0x0caf117, 0xa14bf73, 0x4ba6429, 0x126874f,
+          0xaa5db97, 0xcc9bf22, 0x6aba827, 0x62b56df, 0x9c9772a, 0xfee1cb8,
+          0x177e541, 0xe36838f, 0xadd438f, 0x698815d }
+    },
+    {
+        { 0x38ed1ad, 0xc9fd894, 0x7b6a601, 0x73cd79d, 0x05e8d20, 0x2210e62,
+          0x3592af5, 0x72384ac, 0x763d07e, 0x5ccc079, 0xa5f79eb, 0x2f31a4a,
+          0x2945a95, 0x693f4ed, 0x8056fdc, 0xc712017 },
+        { 0xdf4b09a, 0x361ecd2, 0xb7d929a, 0xa5644ea, 0x3fabe9a, 0x34abc0b,
+          0xe942a8c, 0x1a2473c, 0x6454bc3, 0xe00c924, 0xdff7366, 0xab324bc,
+          0x21b8f99, 0xe1412f1, 0xe33551e, 0x970b572 }
+    },
+    {
+        { 0xbd0a6b5, 0x6ca4cac, 0x921d654, 0x5584787, 0xc809bda, 0x18e5253,
+          0xf0cbe5e, 0x01b32c3, 0x0f987dd, 0xb9aa754, 0x6dfa4db, 0x628f4bb,
+          0x891890b, 0x0255f0b, 0x874e590, 0x25b7df4 },
+        { 0x8ed5f95, 0xbded318, 0xca93023, 0x9dc428d, 0xbccf520, 0xc68f25a,
+          0xe616e6c, 0xc4f3764, 0xa1d9993, 0xd9a57f1, 0x533431b, 0xd1964a5,
+          0x02ab6d0, 0x06cd77f, 0x03e52e0, 0xa660791 }
+    },
+    {
+        { 0x5f72700, 0xab08864, 0x0a1a44e, 0xf77b2ff, 0xc2a24b5, 0x43ebdd8,
+          0x4f564d7, 0xa6d6711, 0xf414160, 0x495df63, 0x76f6de6, 0xf5bacd7,
+          0x7c2b43d, 0x3011aff, 0x3241928, 0xbb1e64c },
+        { 0x5034073, 0xf70c572, 0x68f1e97, 0x891c62a, 0xb22e374, 0xed8eb2e,
+          0x7dbcc2f, 0xd3a53e9, 0xdc8f220, 0x1d06281, 0xace4393, 0x9eef48f,
+          0xd2abecd, 0x96014f5, 0x2653ceb, 0x1da7e09 }
+    },
+},
+{
+    {
+        { 0xd00bc94, 0x7593318, 0xc7262a2, 0x586f3c6, 0x958ad31, 0xea68f52,
+          0xd4e8bed, 0x6707fcc, 0xcb3f9ce, 0xb7e35d6, 0xf4b1be8, 0x2cbb6f7,
+          0x7b41aee, 0xa535268, 0xf7b39b8, 0x1d77845 },
+        { 0xeaf9554, 0xb1f3995, 0xfe9e7d4, 0x3250f70, 0xa00c23c, 0x62e5d1b,
+          0xc10e3bf, 0x5e422f5, 0xc25cec4, 0x7a18039, 0x7cc4d5b, 0xb4e66a1,
+          0x36d0e0c, 0xad7c5f6, 0xa4cf347, 0x9f40b12 }
+    },
+    {
+        { 0x51e3696, 0x697f882, 0xab0a648, 0xc89bc40, 0x9785804, 0x8f261a5,
+          0xb51a2bd, 0x4c7f900, 0x8a2dfcf, 0xd00e7af, 0xb642aeb, 0xf9c534d,
+          0xb63df0e, 0xea2a79f, 0xf2f64a4, 0x392a69a },
+        { 0xc331b6c, 0x0c0f01c, 0x6a5edb5, 0x414bf2e, 0x5068391, 0xfe5ed81,
+          0x62fbc34, 0x0a8078d, 0x54bca98, 0x78a4382, 0x3d727c7, 0xf7a49ae,
+          0xab4dffe, 0x96c1de1, 0x3b9440a, 0x45901f7 }
+    },
+    {
+        { 0xacfe46e, 0x3f1189f, 0x4467443, 0xdca6f46, 0x2eb5bcf, 0xac38542,
+          0x906bf72, 0xb02dce9, 0xfe1d454, 0xdd8cdac, 0x65f7218, 0xc26f04c,
+          0x6ea145d, 0xb474859, 0x5bdb315, 0xc53dc6b },
+        { 0x9ad7197, 0xbe5be74, 0x18b5ecc, 0x627e919, 0x9ea405d, 0x57c889c,
+          0x1a5360b, 0x2e5650c, 0x1b30b27, 0x42290df, 0x5242687, 0x4a07157,
+          0xd379133, 0x553ed1f, 0x01db019, 0xb9d7a07 }
+    },
+    {
+        { 0x56597dc, 0xcfe551c, 0x925ebd6, 0x81af92a, 0xf4e8d57, 0x83efe16,
+          0x1f640d3, 0x61bb431, 0x78b414a, 0xf80440f, 0x6c9e3b4, 0x72f3c63,
+          0x6a03c66, 0xb55f43a, 0xe417037, 0x47a9ded },
+        { 0xdbb612b, 0x1a7e287, 0xdbb9220, 0x895c3c7, 0x6c04764, 0xd50c86e,
+          0x53cf7ca, 0xed52698, 0xf74af55, 0xc78d799, 0xb969ff2, 0xb2ba0f2,
+          0x1c6530b, 0x06d4815, 0x165a575, 0x764a1fe }
+    },
+    {
+        { 0xc1b5ece, 0x4383a3b, 0x54ff148, 0x0563c88, 0x5af796e, 0x9a45279,
+          0x88e9953, 0xffba7c0, 0xb6a3001, 0xfe9fb5e, 0x25b6b19, 0x7950988,
+          0xd81be5e, 0x67c899a, 0x2f9d29b, 0xc89ac8d },
+        { 0x29ab8f7, 0x7c76ba3, 0x6e40f74, 0xb2a18c9, 0x3864d9b, 0x1b5056e,
+          0x9b582b8, 0xdfa503d, 0x7c9c68e, 0xfb03519, 0x6b3c22b, 0xdc50131,
+          0xa6c96ff, 0x38ab231, 0x8cb1c10, 0x4ea527c }
+    },
+    {
+        { 0xc05b4ed, 0xd632f20, 0xb2a032d, 0xe0199fa, 0x26812d7, 0x3732956,
+          0x013df13, 0x2aed855, 0x39f96ac, 0x92ca24b, 0xbb9751a, 0x620273d,
+          0xf7437a1, 0x5d0d21e, 0x077de56, 0x9de2a43 },
+        { 0x11a4674, 0x0569b12, 0x89c3989, 0xfc3923e, 0x2c5c770, 0x3d12704,
+          0x84e8c37, 0x0072b90, 0xac39f9a, 0x7178d4d, 0x778d345, 0x5f8292f,
+          0x77c7307, 0x9e5bf0f, 0xc3a20f5, 0x7691610 }
+    },
+    {
+        { 0x705fe96, 0x7c4ead5, 0xc8e464c, 0x377ec35, 0x7689954, 0x3e5b990,
+          0xa2d31ea, 0xc0f6949, 0xc580671, 0x839d395, 0xb215b09, 0x2f347a6,
+          0x683df83, 0xfdcfa33, 0x6af39a8, 0x6e12cc2 },
+        { 0x13a3bd2, 0xae46ec8, 0x59366f8, 0x03a7d3b, 0xb87aed4, 0xe2029d5,
+          0xfe1b83d, 0xbdc4e43, 0xdb8a1a8, 0x768437c, 0xea0dd7f, 0xe47acc3,
+          0x62a0af4, 0x550e0cc, 0x1a20962, 0xcaf2cbc }
+    },
+    {
+        { 0xf28a78f, 0x5a784f7, 0x07e9724, 0x952a9b5, 0x1bab7a3, 0x8ac5e41,
+          0xb7bc1e1, 0x1251e3f, 0xdc15e22, 0xe360f82, 0x95213f5, 0x3ac72da,
+          0x4dcd47b, 0x65ee9ba, 0x3af5952, 0xdfeab7b },
+        { 0x26fd3c6, 0x34c5c80, 0xf3ac7ee, 0xd977b08, 0x7dba2f6, 0x003bd01,
+          0xac98c8d, 0xcfc5cf8, 0x0e46922, 0x05eb604, 0xfaa9352, 0xc248b17,
+          0x395c7a7, 0xfa41c0f, 0xb71ee44, 0x29931d4 }
+    },
+},
+{
+    {
+        { 0x07861c5, 0xac087bb, 0x5ae8240, 0x3bd37db, 0xf94518f, 0x94c68ec,
+          0xff88a5b, 0xd32a378, 0x9b441d1, 0x42c8aaf, 0xfc07f12, 0x089db70,
+          0xd3d4455, 0x211c386, 0x546b158, 0x1db9af7 },
+        { 0x51bc927, 0xdfd1b65, 0x0733df4, 0x69c0493, 0x2aeb586, 0xdc72cd4,
+          0x823aa13, 0xeebdace, 0x56ad643, 0x51b3b3c, 0xd4e0426, 0xb983a99,
+          0x69c4ecc, 0xa1e5b6c, 0x45e6668, 0x37cd382 }
+    },
+    {
+        { 0x9f73aea, 0x158ce6d, 0x14ff475, 0x36a7749, 0xdc0b018, 0x0d4e424,
+          0x3946f09, 0xc2c4448, 0xfacda62, 0x7a7de3f, 0xb486709, 0x49a19e6,
+          0xdb61da7, 0x65094d8, 0x8f5ee87, 0x09edfd9 },
+        { 0xb37226d, 0xe460fcf, 0x69bf470, 0x3b9d039, 0x247ca22, 0x3d4d511,
+          0xc782cb1, 0xc7248d6, 0x00ad293, 0x91189a0, 0xe8abe75, 0x1244942,
+          0xbf52cdb, 0x9f88d12, 0xbbbcadf, 0x368463e }
+    },
+    {
+        { 0x8074f45, 0x419e4b3, 0x0771c83, 0xd3f8e2e, 0x2e68d34, 0xd2743b4,
+          0xb116a00, 0xc68b7db, 0xd84cc37, 0xfad2cf7, 0xb7a0f4d, 0xcfd27c0,
+          0x190e587, 0x3b9e23f, 0x751ca9e, 0x7bab499 },
+        { 0xa8f12ee, 0x3270861, 0x31b36d5, 0xee1f38d, 0xe4c0eed, 0x748bb31,
+          0x110ebad, 0x9be5c9b, 0xc8b6cb6, 0x728660b, 0x93d914a, 0x7bc9df7,
+          0xc88c859, 0x73a4f2c, 0xb4e7f0e, 0xbe4a2fd }
+    },
+    {
+        { 0xa450e77, 0xe566ff8, 0x6a13aba, 0xb0b4006, 0xcd7dc90, 0x483a510,
+          0x5fa9ccc, 0xb1a2013, 0xa80e67c, 0xeb0b631, 0x020801a, 0x7c34e1f,
+          0xf4e447c, 0x0257dc8, 0x74c6f0f, 0x7abe7d1 },
+        { 0xb19a576, 0xf115a3a, 0x064ca0e, 0x8f0474a, 0x351f99b, 0x999bb6b,
+          0x773edc3, 0x855254b, 0x427d717, 0x49f6c2f, 0x2e0cef2, 0x9f68253,
+          0x2ee34f5, 0x1fe126c, 0x80150f7, 0x1ec2cae }
+    },
+    {
+        { 0xc005b7a, 0x862c5af, 0xec4ef17, 0x61adea7, 0x007b446, 0xf885fd3,
+          0x9b0e30e, 0x25c129d, 0xfeec7e0, 0xbc10f25, 0xdf79ee1, 0x3901ac4,
+          0xfe9e19f, 0xad49db7, 0x360d050, 0xc8624d9 },
+        { 0xbf3260b, 0xc74a576, 0x8c010c2, 0xbde8024, 0x09b6977, 0xf155329,
+          0xd52dcf8, 0x6a5a82e, 0x29b9dfc, 0x4fbf59d, 0xc7b730c, 0x337d049,
+          0x3a89cd4, 0xb3deac6, 0xad2f2eb, 0x1e07595 }
+    },
+    {
+        { 0x3b7c84e, 0xa0b0a4d, 0x8cf2b00, 0xf132c37, 0xeaaa8ec, 0x192814b,
+          0x7b4b5df, 0xe7929f9, 0x42d0ab7, 0xf08a68e, 0x7b60cdd, 0x814afb1,
+          0x7d9c160, 0x78c348c, 0x44db217, 0xf8a9488 },
+        { 0xeaa2578, 0xcdefd88, 0xbd0e260, 0xf717f56, 0x1694d02, 0x7754e13,
+          0x181dbd8, 0x1254c14, 0x6e5f312, 0x0dacdd2, 0xcef87bf, 0xb8abdfb,
+          0xe74e2ea, 0xb985972, 0x002b424, 0x1717621 }
+    },
+    {
+        { 0x162df70, 0x92cc75e, 0x18ee849, 0x1e20c06, 0x26aa590, 0xc036b46,
+          0x4da5155, 0x31be67e, 0xf7213b0, 0x04911b5, 0xbb2e72e, 0x39261d7,
+          0x5c015a3, 0x9e84466, 0x298ae67, 0x2f59fc0 },
+        { 0x1701fcc, 0xa3ea7ba, 0x0ebd651, 0x87a5fa9, 0x301d7b1, 0xa607ed4,
+          0x3b2e271, 0xbd4ec5f, 0xdc4180f, 0x732a1a2, 0xfeaa8c1, 0xbe15d82,
+          0x66f2f3f, 0x1036702, 0x9e79ce8, 0xccfd397 }
+    },
+    {
+        { 0x70a54ad, 0x82ab835, 0xe3bec75, 0x5c1dee8, 0x54b556b, 0xf583ff4,
+          0xf461e60, 0x9220199, 0x87fc4e7, 0xdf61ca8, 0x0776dad, 0x6641fd2,
+          0x8edd061, 0x00c6edd, 0x55f7e87, 0xaf9b142 },
+        { 0x9bbe3ec, 0x73f15e4, 0xf8bc1fa, 0xdd3b788, 0x1b8ff86, 0xb24cc07,
+          0x41be58b, 0x6c260d2, 0x6b10ada, 0xec1c4e3, 0x7fdb985, 0xf6b4209,
+          0xd47c212, 0x0d0ac85, 0x07d78d1, 0x967191c }
+    },
+},
+{
+    {
+        { 0x843d0f3, 0x3b11638, 0xf27f10e, 0x4b89297, 0x863ba2a, 0x477236e,
+          0xadd280c, 0x1949622, 0x04da757, 0x7cd5235, 0x79e4ff7, 0xe0e99d2,
+          0x537da41, 0xb4ef894, 0x5a24ff1, 0xc55dde4 },
+        { 0xb587521, 0x18d8e21, 0x3777833, 0x8010b5d, 0xd3a54c8, 0x4af522d,
+          0x4c0ac13, 0x7cd476b, 0x4099f67, 0x4587e61, 0x605ee64, 0x494d0ed,
+          0xcc80903, 0x3218ba2, 0x0b2e169, 0x5ff56aa }
+    },
+    {
+        { 0x3a06c69, 0x51ec94e, 0x5e65c52, 0xa26d7be, 0xd44ee96, 0x156f113,
+          0xbf5b9b4, 0x70f0968, 0x5f5332d, 0x9b7e469, 0x6703829, 0x36c295f,
+          0xd04f492, 0x1522690, 0x728043b, 0xcf35ca4 },
+        { 0x190a7c3, 0xf9ca3e1, 0xf971b07, 0x53d2413, 0x9c48b49, 0xae59652,
+          0xfefff5c, 0x74672b8, 0xa7643b0, 0x0a3018b, 0x3e9b0a8, 0x51919e8,
+          0xc932fb5, 0x89ad33d, 0x643e687, 0x52a4419 }
+    },
+    {
+        { 0xd2d0acd, 0x7778990, 0x487fdf1, 0x3bdbcce, 0x2b03dd2, 0xdc413ca,
+          0x9a2b7d0, 0x278755b, 0x35ddd7f, 0x4ebb8b5, 0xbcbdb92, 0x0465152,
+          0x671d051, 0x34f22d6, 0x87192b9, 0x1ba04c7 },
+        { 0x83560c1, 0xb1693f4, 0x7d174e9, 0xe08a593, 0x64dc9af, 0x47ffdc4,
+          0xce8126c, 0x1123596, 0x1124628, 0x632d95f, 0xfee7c76, 0x66287ab,
+          0xc552332, 0xb40fe60, 0xe304e1e, 0x3f11729 }
+    },
+    {
+        { 0x5030a8c, 0x97a6ea0, 0x09c27b2, 0x6924198, 0xac9dd5d, 0x3308501,
+          0xbe73fdc, 0x9fed7fa, 0x0535286, 0xea55544, 0x6c9b832, 0xc7c07ab,
+          0xc51b967, 0x178c882, 0x86ee075, 0x6fa0c69 },
+        { 0xb8b5c4a, 0xbaa4a15, 0x3130c0a, 0xf83c0ea, 0x2800331, 0xcf8624b,
+          0x7ccbcb8, 0xade85cd, 0xf08445d, 0x971d7f6, 0x6a546dc, 0xfd480b7,
+          0xc93761c, 0xdc15a38, 0x9d04631, 0xc4c495c }
+    },
+    {
+        { 0x9470efe, 0x5f4cee8, 0x88d93ad, 0x9fe8961, 0xf4e49ce, 0x24783b3,
+          0x52ffb3e, 0x1bc7ed7, 0x6d81e17, 0xa3abe6a, 0x7a333c3, 0xd6bb8b4,
+          0x10a3527, 0x3485c0b, 0x31a9d10, 0x7cddc9c },
+        { 0xc38ca37, 0x0c78112, 0xdd2f8d8, 0x10e249d, 0xc511911, 0x72c88cc,
+          0x29a6c84, 0x4d75b5a, 0xa227b1e, 0xc74b267, 0xf8e35ad, 0x698390c,
+          0xe98d230, 0x8f27edf, 0x6bdc7f4, 0xec922f2 }
+    },
+    {
+        { 0xfc32e11, 0xac34023, 0x47200d1, 0xe0ae2f5, 0xbd98c82, 0xa7c7492,
+          0x7b02154, 0x3910b68, 0xe28ab6d, 0x6fdd06c, 0xd98b012, 0xd3a7e49,
+          0x9f54207, 0x4c1c82b, 0x45c176f, 0xef5bbe6 },
+        { 0xd3e71eb, 0x3d17960, 0x080e70c, 0x90d7e84, 0xbff5d9e, 0x83e6438,
+          0x535d85c, 0x1877e1f, 0xfbb69cc, 0x931ed6e, 0x1247848, 0xcf96265,
+          0x750da4e, 0x76d618b, 0x717fbf6, 0xc076708 }
+    },
+    {
+        { 0xeec5126, 0x80a5ac5, 0x3379c80, 0x6d05dd1, 0x2336d32, 0x514b089,
+          0x6725137, 0x586c006, 0x574f954, 0xab2365a, 0xac7d356, 0x3c89ea0,
+          0x27460ba, 0xf1f2edd, 0xab9870f, 0xf200ddb },
+        { 0xa35e885, 0xc8f1b2c, 0xe6e7550, 0x5d22f86, 0x9554615, 0x24b9a40,
+          0x616314f, 0xcb41107, 0xc976a11, 0xca752f0, 0xa08291a, 0x3e2f839,
+          0xf2c420e, 0x0cff22f, 0x82b9747, 0xafd603e }
+    },
+    {
+        { 0x810a3da, 0xaddeddc, 0xd3a87bf, 0x78b6c2d, 0xde3a04c, 0xbc7020b,
+          0x9b6d045, 0x47ab973, 0x0959358, 0x3b046d6, 0x509ee3e, 0x0f953e7,
+          0x69fc61b, 0x803dc86, 0x893c8d4, 0xcceaec0 },
+        { 0xb048a45, 0x21f8c40, 0xfcaea8a, 0xb535073, 0x90e360b, 0xe712c35,
+          0x8403338, 0x5d0f3f4, 0x7207f2d, 0xe0ea26c, 0xffd9e05, 0x20f6b57,
+          0x4788b00, 0xb97d68e, 0x1889cce, 0xb121554 }
+    },
+},
+{
+    {
+        { 0x464238e, 0x0079817, 0x0d381ca, 0x2110302, 0xd9f01b5, 0x1cc4c6e,
+          0x5a131b1, 0x5e35dc5, 0x06944eb, 0xb61848d, 0x29631a3, 0x83792a0,
+          0xafca0dd, 0xbe1017f, 0x782fcbb, 0x70aaa01 },
+        { 0x99945e7, 0xc63b7a0, 0xc4486c1, 0xe9164ec, 0x885f2c1, 0xb133e35,
+          0xc99ae02, 0x186f0d3, 0x2bf53e6, 0x2fca492, 0x48a02bc, 0xf922aa2,
+          0x0dd3dca, 0x4fe6490, 0xf6a8207, 0xe8c313f }
+    },
+    {
+        { 0x97caf1e, 0xc5b3583, 0x922a4b6, 0xa001922, 0xdf07c95, 0x67e36be,
+          0xb2f4f34, 0xabaa0ae, 0xdedc333, 0x66dc926, 0x38ec5b3, 0x82021c4,
+          0x00ab176, 0x82b4f26, 0x69c45af, 0x1b7c22e },
+        { 0x0924ad9, 0x07b0dbe, 0xa407dde, 0xe030936, 0x26ccd06, 0x66e1ce9,
+          0xe3505a9, 0xb50c108, 0xda98f51, 0x8b921e1, 0x20cf7c7, 0x449ca1a,
+          0xe67d079, 0xadb80c7, 0x834372d, 0x205aa54 }
+    },
+    {
+        { 0x19bf847, 0x1482b48, 0x5906f0f, 0xd6c16ab, 0x23ad060, 0x323fb17,
+          0xc832be7, 0x0346389, 0x2ee45bf, 0xe71b2d8, 0xfb22276, 0x761c37d,
+          0x5d70be2, 0xa9b3334, 0x5a0627a, 0x81a0656 },
+        { 0x99a6282, 0x3377503, 0xd0436f0, 0xafc8d2e, 0xc53342f, 0x22f71d3,
+          0x8939ad3, 0x66ca56d, 0x30e09ba, 0x15a9192, 0xa6de890, 0x261091e,
+          0xe78f2d5, 0x609d700, 0x8eaaf78, 0x8aa52ee }
+    },
+    {
+        { 0xce76258, 0xa398788, 0x494b975, 0x3031d07, 0x043dfe2, 0x4a6d652,
+          0xb4401ec, 0xdb1a849, 0xce8bbcc, 0xf81ebbb, 0x16efe9e, 0x937dd47,
+          0xef85ecc, 0x9c19350, 0x214273b, 0x260d932 },
+        { 0x77bf1a3, 0x1d7e21e, 0xa544eb7, 0x199d689, 0x94ced50, 0x9da5941,
+          0x8a0aeaa, 0x71a60be, 0x26d3b51, 0x183a0ae, 0x8df9728, 0x49f176a,
+          0x3230674, 0x744376e, 0xe25541c, 0xb2cb21a }
+    },
+    {
+        { 0x9a0071f, 0x7a72158, 0xe7d2a6b, 0xe19dd29, 0x55113f0, 0x3deb34e,
+          0xede573b, 0xef1f8eb, 0x5665e37, 0xa8f7ff9, 0xf2d7777, 0xa2c21ea,
+          0x91e2e39, 0x1387afa, 0x7db68f6, 0x04057b9 },
+        { 0x1c241f7, 0x8b9d5ae, 0x8e75993, 0x689588a, 0x5c0e2d4, 0x79585b4,
+          0x7b64974, 0xba1ef16, 0x1c08a75, 0x72685bc, 0xd572edd, 0xf0a5814,
+          0x5ab0e70, 0x71464a3, 0x339aea7, 0xc93c92b }
+    },
+    {
+        { 0x5b8a87d, 0x1917e2a, 0x3a82756, 0xea5db76, 0x6420e2b, 0x5bba2fb,
+          0x019372a, 0x5cc0501, 0xccc5efd, 0xb1ef8be, 0xf49c57d, 0xaf06393,
+          0x87a0bc4, 0x3ab1adf, 0x34fe6b6, 0x2ee4cca },
+        { 0x6b8ba9b, 0xd160668, 0x7efec13, 0xef137d9, 0x50abb76, 0x7b60465,
+          0xf753a00, 0xb40ec2b, 0xeaf8f1d, 0x696ed22, 0xd8ba3d8, 0x398c91f,
+          0x37db313, 0x11f2034, 0xfe5079e, 0xe1ec33b }
+    },
+    {
+        { 0xbdc81f0, 0x8a10c00, 0x6fe8e05, 0x5f39256, 0x14a368e, 0xa595dab,
+          0x38cec6b, 0x32b3181, 0x1b00d00, 0xd77afde, 0x4d9923d, 0x3c97928,
+          0x76e13dd, 0x78f0e7a, 0xbf75675, 0x5ee8e59 },
+        { 0x91b130c, 0x49ec893, 0xa47a441, 0x9416182, 0x76e2ce8, 0x54555b5,
+          0x349c40b, 0xcbdd2fd, 0x9392bbe, 0x10ae737, 0x2e2dab0, 0x270b111,
+          0xaf293f4, 0x5cb7712, 0xd6095c6, 0xfc22a33 }
+    },
+    {
+        { 0x0f15878, 0xdcb5bbd, 0xb6bba48, 0xbcf27ad, 0x7b70eba, 0x979913e,
+          0x158578a, 0x4c0f34b, 0x6ed6088, 0x53f59a7, 0x75b0fc2, 0x19b3b2c,
+          0x0153f3c, 0xad628dc, 0xcec1607, 0x5195a2b },
+        { 0xdfe0f7a, 0x95f8b84, 0x152920b, 0x935c6b0, 0x4da1056, 0x25f9e31,
+          0xb28c229, 0x4910a94, 0x8ee4d6e, 0x54b03b4, 0x694e3ed, 0xc991fc3,
+          0xdbe5709, 0x68c4c26, 0x63d7657, 0xc9cfce4 }
+    },
+},
+{
+    {
+        { 0xf52a44e, 0x21c9227, 0xe85bfbd, 0x7f105a2, 0x6268fc2, 0x887781f,
+          0xa2d7e35, 0x56ee808, 0x2d3930f, 0x14f9de5, 0xdcb561a, 0x4a4e356,
+          0x7f95598, 0x8736226, 0x5f34151, 0x211c342 },
+        { 0x0eaf9cb, 0x8fcb75b, 0x3d60ce2, 0xcc9edf9, 0xa5fe627, 0x54412c9,
+          0x842dd09, 0x6036a72, 0xa6c6099, 0x71ce668, 0x5386764, 0x02b30d7,
+          0x6f18e23, 0xb69bed3, 0xd1de9f4, 0x124c9b1 }
+    },
+    {
+        { 0xe69b531, 0xe8f8d95, 0xaff1049, 0xe1e115e, 0xeddea0c, 0x9087cd1,
+          0x7449916, 0x8ed55a5, 0x7808404, 0x8009f54, 0x17fea55, 0x990f216,
+          0xfe8ecf9, 0x68ba624, 0x56d1f47, 0x8ac2950 },
+        { 0x529dfb0, 0x3257887, 0x244c080, 0xc4a613f, 0x28672fa, 0xabb1ac0,
+          0x31eb291, 0xb2915c5, 0x8fababa, 0x6e368ca, 0x1fde498, 0x6b8c259,
+          0xf2a548c, 0x67724a1, 0xf90409b, 0x6b3b7e8 }
+    },
+    {
+        { 0xfae20aa, 0x5415003, 0x85df5ce, 0x95858a9, 0x0ac6bee, 0x42bc987,
+          0x39ea1a9, 0x8d843c5, 0xb571043, 0x5de200c, 0x1741a33, 0x084fcd5,
+          0x0009d1c, 0xe1ca20c, 0xe957e6d, 0x0271d28 },
+        { 0x9e3be55, 0x84cbf80, 0x1c578c6, 0xc804dda, 0x409a93a, 0xea85489,
+          0x972021d, 0x64a450a, 0xe681312, 0xc6a2161, 0x65bc111, 0x280bff9,
+          0x0f8526f, 0xd358a4b, 0x953a3ab, 0xd967be8 }
+    },
+    {
+        { 0x7dd066c, 0x4c5e615, 0x634c8d4, 0x37afd33, 0x42d8b87, 0xa3ac88a,
+          0x938b607, 0x9681e9b, 0x37fe4c8, 0x7a286ab, 0x2494245, 0xdeee574,
+          0x6af75a8, 0x184b9d3, 0x3670c04, 0x20f696a },
+        { 0xa39e8b9, 0x1340adf, 0x0850b2e, 0x03c1929, 0x2c0e1ef, 0x435ebd4,
+          0x142ee9b, 0x49de18b, 0x3f116f2, 0xb440b27, 0x2214463, 0xd94e9fa,
+          0x6311543, 0x1b0ddd3, 0x991ba3c, 0x1ae042a }
+    },
+    {
+        { 0x5bb47aa, 0xbc322f8, 0x54a5845, 0x9e25625, 0x21115f3, 0x96b65ae,
+          0xbb5757b, 0x46fbed4, 0x4c42dce, 0x18aec4f, 0x8d801f0, 0xc59caf6,
+          0x1205521, 0x9189463, 0x89feb7a, 0x66bd8e0 },
+        { 0xc529ee7, 0x39ebe95, 0x8eadb99, 0x28d8992, 0x6927544, 0x6058c78,
+          0xd3808ec, 0x877e7a5, 0x1c52eaf, 0x8f65111, 0xae221cd, 0xfb59812,
+          0xf890391, 0x22289c6, 0x4966e92, 0xa97695b }
+    },
+    {
+        { 0x6ff10f0, 0xf0a9122, 0xa2a65c8, 0x49a931b, 0xb1d3cb0, 0x3fcebbc,
+          0xca9685f, 0x70eb79b, 0xab38cb6, 0x82520b5, 0x76304c3, 0xccf991b,
+          0xaf8b07c, 0x575aab1, 0x5ed5efb, 0xec8166a },
+        { 0xc8689b1, 0xddc5698, 0xb2e78d7, 0x227c949, 0x8e07d91, 0x6132321,
+          0x22cfd62, 0x658a11d, 0x004dd5f, 0x908fb44, 0x90d21b1, 0xe3d14f0,
+          0xa6a1639, 0x6f3db9d, 0x333a525, 0x09d86c0 }
+    },
+    {
+        { 0x6f043f7, 0xd83eaf0, 0xb52d5f6, 0x88ab648, 0x57144d7, 0x67c664d,
+          0xeafc8b5, 0x55d7644, 0xcceb291, 0x1c89f20, 0x831ac47, 0x51aec7b,
+          0x6148854, 0x51172fa, 0xf6d7bfe, 0x8fabf7e },
+        { 0x477ee27, 0x5910316, 0x20fe61e, 0x5f299dd, 0x42826ab, 0x48079a8,
+          0x22591fa, 0xf4a83ba, 0x55482ec, 0x8fac660, 0x6b65b3b, 0x48fd5f1,
+          0x9fd9e19, 0x4288a7c, 0x9377894, 0x27db819 }
+    },
+    {
+        { 0x7fd9dd6, 0x2936ee4, 0x9ec87c6, 0xcce5f0e, 0xdb6e3b4, 0x15a50e3,
+          0xad701c8, 0x61df105, 0x1dff1f7, 0x3601add, 0xe8a16e1, 0xb761e06,
+          0x1af3f91, 0x4341e02, 0x933fa3f, 0x9156a4a },
+        { 0x54bc01d, 0x9dc46ae, 0x64eb910, 0x605577a, 0x5a59a99, 0x22b99f8,
+          0x0a229d8, 0xab2dbaf, 0x6599364, 0xa8bfb65, 0xe94ebf0, 0x39ed4a5,
+          0x0dbb23e, 0x7b46a1e, 0x8751422, 0x117b195 }
+    },
+},
+{
+    {
+        { 0x423bddf, 0xd19e8fd, 0x387ef59, 0x9d77042, 0x849590a, 0x315cbdd,
+          0x7866c1e, 0xfdc637c, 0x03515a6, 0x72be83d, 0x0376780, 0xd44a4a0,
+          0x19e0c2b, 0x3b96131, 0x7b1a689, 0x023aca3 },
+        { 0x82282ea, 0xf5f3687, 0x8a8b5c7, 0x4471089, 0x17a3066, 0xcd2f00a,
+          0x81ed681, 0x754e112, 0x0bfcefd, 0x9c6c70c, 0x3b6f29b, 0xd6aced0,
+          0x2817a2a, 0xe443d56, 0xe7c0012, 0xe590ef4 }
+    },
+    {
+        { 0x3e62e2a, 0xc2f9676, 0xb2daa26, 0x661816e, 0xdd5f512, 0x3515fd2,
+          0x56b6e75, 0xdc36e27, 0x74cc658, 0x0bdde46, 0x00e7644, 0x1029086,
+          0x1694a09, 0xfdf0045, 0xceac169, 0x454bcb6 },
+        { 0x6481eb6, 0xf4c92ab, 0x09750e7, 0x8b77afa, 0x6362d6d, 0xe6f4231,
+          0xf53a3ae, 0x0d45dee, 0xd7dcf98, 0xdac7aac, 0x125ec4a, 0x628cb7f,
+          0xaec0320, 0x41e8a20, 0xea2e35b, 0x7418c7e }
+    },
+    {
+        { 0xdf40519, 0x4d649ab, 0x3525833, 0x8cb22d4, 0x7a5333f, 0x15f6d13,
+          0x72c23ee, 0x8c3991b, 0x0cd44a3, 0x248b9a5, 0xccc1a75, 0x6b4c4e0,
+          0x15c99a9, 0x3221efb, 0x0a9c504, 0x236d504 },
+        { 0xd559100, 0x401c7fb, 0x07c524d, 0xcf0e075, 0x34a9275, 0x39647c0,
+          0xf7e8683, 0x2355422, 0xb3ae670, 0x3e0a16e, 0xad61b7f, 0x1c83bcb,
+          0x9ca6cbe, 0x491bcb1, 0x5e29458, 0xe668dc4 }
+    },
+    {
+        { 0x219379e, 0xe44c65b, 0xbb607ee, 0x211381b, 0xb7bc6db, 0xd4c7428,
+          0xb76a2e8, 0xba62a03, 0x8bb0b31, 0xe1729c9, 0xc6bbc10, 0x3caeb50,
+          0xb0187aa, 0x6c66727, 0xfb90dcf, 0xbf9d2f0 },
+        { 0x1184dc6, 0xec69350, 0x2698eb5, 0xd58d2a3, 0xa316b07, 0xb366d8d,
+          0x251c017, 0xe1e39bb, 0xadb157f, 0xbe44ba9, 0x8a8b06c, 0xbaa9a9a,
+          0x6e473e1, 0xd0f4635, 0x1d681c6, 0xd25a8f6 }
+    },
+    {
+        { 0xcb102c7, 0xba39d5f, 0xd8aa1eb, 0x66eba21, 0x697fbf4, 0xcc2591a,
+          0x2317f54, 0x5adb579, 0xf76c6f9, 0xa01ae71, 0x5042705, 0x2c525de,
+          0x4f4479f, 0xc8f4272, 0xe6d7a5b, 0x26ab54a },
+        { 0xdc28106, 0xda217b5, 0xeb2ae6a, 0xc7cadea, 0x53ea3b2, 0x0b16094,
+          0xcc6111b, 0xcddcc1c, 0xa7a7beb, 0x5c47aff, 0x0e52dab, 0xf9931bd,
+          0xc6dcf96, 0x5231835, 0xf27ea4e, 0x7095bde }
+    },
+    {
+        { 0xc33b4e2, 0xee8adae, 0x63ceb44, 0x3006651, 0x880b086, 0xf1476fb,
+          0x9569ce8, 0x0703328, 0x238b595, 0x2cabf9a, 0x26c8158, 0x85017bc,
+          0x68d5144, 0x420b5b5, 0xf9c696f, 0xa9f5f1e },
+        { 0xc8fec5a, 0x1409c3a, 0x28e9579, 0x541516f, 0x0e1f446, 0x06573f7,
+          0x2311b96, 0x3e3c706, 0x3c2ffd8, 0x0033f1a, 0xca6711c, 0x8e808fc,
+          0x07aef98, 0x716752d, 0x92525b3, 0x5e53e9a }
+    },
+    {
+        { 0x5a1c29f, 0xce98a42, 0x3ca6dc9, 0xaa70348, 0xedfa48b, 0xe77d822,
+          0x068abca, 0xd2e3455, 0x482cfca, 0xb456e81, 0x7fbfb08, 0xc5aa981,
+          0x8243194, 0x8979f25, 0x2cd043d, 0x727f217 },
+        { 0xaa53923, 0x7cca616, 0xe9bcb72, 0x387c5ae, 0x37580bb, 0x0173fd4,
+          0x75fc0d9, 0xdd7795b, 0x345deae, 0x47d1c37, 0xb0d1c03, 0x2eb5d7f,
+          0x958f002, 0xf7a1b92, 0x8f61b67, 0x7365cf4 }
+    },
+    {
+        { 0x562a5ed, 0x4b22c3b, 0x5c7cd07, 0x711216f, 0x9ba0648, 0x51f72c4,
+          0x0de9e6f, 0xc10d093, 0xfda63ba, 0xaca479b, 0xaf532b0, 0x4722a55,
+          0x7236f39, 0x8d59eb7, 0x4465c34, 0x5cad874 },
+        { 0x722b0c1, 0xa2119e5, 0xf343ea4, 0xb670264, 0xc19f387, 0x6910f02,
+          0x0381fba, 0xcfec5bc, 0x52c0a1d, 0x5f5de0d, 0x6378cb6, 0x4e474d5,
+          0x27e2ba3, 0x2fc8027, 0x159b541, 0xa215da3 }
+    },
+},
+{
+    {
+        { 0x8499895, 0xed53585, 0x65c998d, 0xa0aefd5, 0x2d5a561, 0x210d850,
+          0xa2cd9d6, 0xc2cc23c, 0xc4d297e, 0x2371d46, 0xd18d441, 0x88b2143,
+          0x043993d, 0xbebdad9, 0xad5f28d, 0x6ba91e7 },
+        { 0x3a731f4, 0xc2bb3f1, 0x5d0d5c3, 0xd35cfac, 0x35ac427, 0x9950998,
+          0x5458adb, 0x8938bb5, 0xab26f3b, 0x0bd738c, 0xa28cd8d, 0x56db3d5,
+          0xa1d8b4b, 0x87eb95f, 0xe7f3b4b, 0xd6700ef }
+    },
+    {
+        { 0xea1e57b, 0x962c920, 0x6dded6d, 0xd3be37e, 0x2c96a73, 0xf499b62,
+          0x6c99752, 0x3eaf7b4, 0x025590b, 0xa310c89, 0x721db23, 0x535aa4a,
+          0x19714a0, 0x56ab578, 0xd4048c1, 0xeecb4fa },
+        { 0x470c466, 0x7b79ec4, 0x1383cee, 0xc4e8f2e, 0x5750c45, 0x0f5d776,
+          0x725527d, 0xa3b3bc3, 0x6d00cce, 0x2f5deb6, 0x95a8d81, 0x5d5a0f4,
+          0xe02b824, 0x50a442e, 0x2a11628, 0xafb0446 }
+    },
+    {
+        { 0x0c613de, 0x72b67bc, 0xe6f0b24, 0x0150d4b, 0x8ed289d, 0x847854e,
+          0xa320f88, 0xe08292f, 0x29c6160, 0xd5b6da3, 0x4fb9d06, 0x2a48e2d,
+          0x2de087c, 0x55d9e41, 0x4f02100, 0x65683b5 },
+        { 0xa8886c6, 0x4dc8c2e, 0x20d6114, 0xe966dd2, 0xa57af97, 0x99745eb,
+          0xb854725, 0x23a9a71, 0x621a047, 0x8effe05, 0x049a4be, 0xf16d284,
+          0x5b0660f, 0x95828c2, 0x56e96b0, 0xd5b69ba }
+    },
+    {
+        { 0x4ffa0b8, 0x0b5b424, 0x096cc5e, 0x0585b45, 0xf505d37, 0x413e1ae,
+          0x0c7ab8d, 0xe5652a3, 0x2990120, 0xab32fb7, 0x3f09368, 0x6b8b16e,
+          0xefe128e, 0xbf9fadb, 0x14b7671, 0x85f366b },
+        { 0x090608d, 0xcb2f294, 0xac3045f, 0x25e2769, 0x6131904, 0x069c4f0,
+          0x329a779, 0x1c57cf1, 0xb7cace7, 0x72fe0d5, 0x0897a45, 0x04d9f43,
+          0x359a645, 0xbaf32f6, 0xfa7485a, 0x0fa854f }
+    },
+    {
+        { 0x5f56f60, 0xae3533c, 0x0ad9360, 0x9773bbb, 0x38fbe6b, 0x769b34a,
+          0xffb0c00, 0xb5ba8e9, 0x75472e4, 0xa939318, 0xce5f30f, 0x12cac92,
+          0xa9e7dbc, 0x514fc06, 0x58b4734, 0xd7ca865 },
+        { 0x65a730b, 0xd101ff3, 0xabe70e9, 0x92da451, 0xef7bf4b, 0xfb5f94a,
+          0x1d56c7b, 0x8c3ef4c, 0x8435c10, 0xb085766, 0xe7ed4cc, 0x7fbbbda,
+          0x24f372f, 0x1da6eaf, 0x59b8ae3, 0x0ab2c1f }
+    },
+    {
+        { 0xf10a4b9, 0x63a1a78, 0x0c7e510, 0xbb5278d, 0xf874142, 0x97b224e,
+          0xb2517b1, 0x0a9ff52, 0xc5cd920, 0x1b5a485, 0xa1823b9, 0x1a8e2eb,
+          0x0e914a8, 0x2b088c0, 0xcf13432, 0xe5ec3ad },
+        { 0x6e7e253, 0x0d6ab3e, 0x6f18458, 0x9f0f5cd, 0xf459a6d, 0x839a744,
+          0x1eb15f7, 0xb4b4f94, 0xc72cb14, 0xe0313ac, 0xb20472d, 0x58ee933,
+          0x872543e, 0x5f73d7a, 0x501f067, 0xb1700c5 }
+    },
+    {
+        { 0x085f67f, 0xb70428e, 0x43cabe5, 0x5441d51, 0xe0a6055, 0x4d0e8c2,
+          0x0882e4f, 0x8d39a08, 0xc1cb39d, 0x615bb32, 0xf7a1642, 0x113f18d,
+          0x250681f, 0xbab8cf5, 0x677b72a, 0x3017ba2 },
+        { 0x5a3a876, 0xcd2b6e9, 0x2035a69, 0x0476501, 0xefa2ea0, 0x31d6440,
+          0x56874d5, 0xde8f8d1, 0x0199d4a, 0xcbc71cd, 0xe7f2170, 0xc546b61,
+          0x112c4c3, 0x4e57e4e, 0xd1622ba, 0x58955a8 }
+    },
+    {
+        { 0x04e2f6f, 0x0064cd7, 0xe0edd38, 0xe9d458d, 0x7e0a5c8, 0xeb1a597,
+          0x01fc0a8, 0xe322ece, 0x1032a19, 0x8b9d166, 0xa89de94, 0x3e7b539,
+          0x001c754, 0xfa30262, 0xdb588f6, 0xe33de4d },
+        { 0x954eb94, 0x4dafbdb, 0x0584c1b, 0xbb43648, 0x5dbe29b, 0x622c93e,
+          0xf57b931, 0x968f9e3, 0x0f6453b, 0x98f03be, 0x08f696c, 0xb0ecc7f,
+          0xa505335, 0x5af55f4, 0xfb3fa9b, 0x028533e }
+    },
+},
+{
+    {
+        { 0x27e8d86, 0x3bc8e68, 0x63f105a, 0x4e43b30, 0x4981250, 0x5301b7d,
+          0x9f72fa8, 0x8b0a75e, 0x357348c, 0x88f59db, 0xec4208e, 0x5f0ebb1,
+          0xc043d3b, 0x4712561, 0xc806b97, 0x9e5ded0 },
+        { 0x2121d09, 0xf9bd0a6, 0xe337cd1, 0x1759ecb, 0xe945542, 0xd1acc0e,
+          0xbd2f63a, 0x3683feb, 0xda5dfe9, 0x44f1bcc, 0x707f22f, 0xa3606c9,
+          0x2d96ca5, 0x45ef064, 0x9022df9, 0xfc3107d }
+    },
+    {
+        { 0x44be755, 0xe81320b, 0x5c7c761, 0xdf213d5, 0xb4e5db9, 0xf43d2d5,
+          0x8dedcd2, 0x3bcfd82, 0xd37a9ec, 0xdf368a6, 0xf475a77, 0xfef20ae,
+          0x162c064, 0x22f5894, 0x0142a7d, 0x956bc66 },
+        { 0x7daec78, 0xaaa10e2, 0xb6e9a78, 0x3cb9b72, 0xe383f72, 0xa740bad,
+          0x7759007, 0xc31b401, 0xa7afc50, 0xdada964, 0xfd3d11f, 0x6bf062c,
+          0x5db3679, 0x9470d53, 0x03abf13, 0x3394473 }
+    },
+    {
+        { 0x46e5d7f, 0x533f440, 0x49048c8, 0xd1793e3, 0x1929b94, 0x59e1150,
+          0x8364134, 0xcddbbcb, 0x582774f, 0x795c794, 0xe03081a, 0x114dfc4,
+          0xef54042, 0x541ef68, 0x23f18cd, 0x159295b },
+        { 0x48a2c8c, 0xfb7e2ba, 0xbb6d116, 0xe2d4572, 0xd750b53, 0x7bb0b22,
+          0xd142ee8, 0xc58888c, 0x90c9e2d, 0xd11537a, 0xd02eb9e, 0x77d5858,
+          0xd444a79, 0x1fa4c75, 0xd58a68d, 0xf19b2d3 }
+    },
+    {
+        { 0xeb8b90f, 0x37e5b73, 0x3f2a963, 0x3737f7a, 0x9de35e0, 0x87913fa,
+          0x8731edd, 0xec7f992, 0x219491e, 0x6e6259e, 0x4de236c, 0xb2148a0,
+          0xfdd309b, 0x89700e8, 0x9f0bf80, 0x9ce51e4 },
+        { 0x301f17b, 0xe7ec421, 0x3bc5f4f, 0xa4b570a, 0x1285ee2, 0xc2b1b2a,
+          0xc53db73, 0x5e86bc8, 0xf24fa90, 0xb65fcea, 0x08ab024, 0x9e74c56,
+          0xf9ed877, 0x5c8003d, 0x4a2cbbc, 0xa632e9e }
+    },
+    {
+        { 0xc91c8b5, 0x32a4546, 0xc969363, 0xc122b5a, 0x3648b3a, 0xbbbec5e,
+          0x25143b0, 0xd5a365e, 0x54157ce, 0xcf3e464, 0xf9bab64, 0x9712f04,
+          0x04b4008, 0xc12d43a, 0x2edf1c7, 0x51932d7 },
+        { 0xb2f8470, 0xaef1655, 0x6c24ace, 0xaa8e3f3, 0x6b4e761, 0x7da75da,
+          0xb90bca2, 0xd371827, 0x0afb45c, 0x84db450, 0xef46b5d, 0xae12045,
+          0xd962f98, 0x91639a5, 0x72f2ac0, 0x669cbe6 }
+    },
+    {
+        { 0x83a4356, 0x851bb31, 0x9a1bf15, 0x7d436bf, 0x120b378, 0x46a3f0e,
+          0x3f5b357, 0x9302abc, 0x93fef53, 0x1e06726, 0x5fd2ee9, 0xb12f4a9,
+          0x7de9433, 0x94a884c, 0xa6f2874, 0x2645234 },
+        { 0xcdb8dfa, 0x6fb56f5, 0x9e0ee4e, 0x4a17dfc, 0x83ab01e, 0xe269d83,
+          0xb77c10f, 0xda932da, 0x0321243, 0x463af0c, 0x16fc8a3, 0xbe1d682,
+          0x48b39e3, 0x2eae3ea, 0x3b03e7b, 0x9423021 }
+    },
+    {
+        { 0xb22f28a, 0xaeb507c, 0x49a6b44, 0xa77458b, 0xc03dc17, 0x232ed5a,
+          0x9c61ac6, 0x79dfc16, 0xcd71b93, 0x7c48be9, 0xc429cd9, 0x983d68a,
+          0x98ae2c8, 0x7709c47, 0xa5df075, 0xe4765c0 },
+        { 0x3367f33, 0x23c4deb, 0x37d72a7, 0xbdf2b7e, 0x0af2d26, 0xbaab5c7,
+          0xfd026ab, 0xd609f7f, 0x541b039, 0x23b72b2, 0x83be852, 0x8d06bac,
+          0xcb23d1c, 0x911d4a9, 0xfb0dbd7, 0xeae815c }
+    },
+    {
+        { 0x2c33481, 0x487c35c, 0xb6136db, 0xffab636, 0xa3d3aa4, 0xccd4dae,
+          0xc3704e0, 0x87149bb, 0xc0e8396, 0x9de8119, 0x58e7ca6, 0xd49357a,
+          0x1562d75, 0x6878918, 0x5ab1fad, 0xc745381 },
+        { 0x02c9b91, 0x0f15798, 0xb1ddde5, 0x7ffc3f0, 0x6aae50d, 0xa01d5e0,
+          0xe279873, 0x6a97e65, 0xb5b1b41, 0x4bcf42f, 0x32f5982, 0x1c6410f,
+          0x50701c8, 0xd4f7600, 0x873b90d, 0xff02663 }
+    },
+},
+{
+    {
+        { 0xe5b2de2, 0xdc53ea2, 0x38acecb, 0x94b352d, 0x0d9d5e5, 0x37d960b,
+          0x90bd997, 0xabd868f, 0x35a7376, 0x781668f, 0x10118bf, 0x043d597,
+          0xf57928a, 0xd4da719, 0x983e46c, 0x01942f6 },
+        { 0x728bd76, 0xab97fc8, 0x4b5c1c5, 0x825956b, 0xc82a104, 0x202809f,
+          0xc8e3132, 0xdb63e9c, 0xc2181af, 0xa41c701, 0x43e066a, 0xd280180,
+          0x24044ce, 0xc734e41, 0x505193c, 0x4d9ab23 }
+    },
+    {
+        { 0xf9f0c3f, 0x0bcd42a, 0xb94a218, 0xda21a46, 0x0ffc788, 0xe55243c,
+          0x47a5551, 0x318aae6, 0x79af9cb, 0x8c2938b, 0xec1dce5, 0x5d15232,
+          0x8ad2e5c, 0x3d310ba, 0x94f792a, 0xd3d9724 },
+        { 0x12a9553, 0xdeb4ca1, 0xeb54d9d, 0x2f1ed04, 0x69fb7a1, 0xaa9c9cf,
+          0x54dcd3a, 0xeb73c3a, 0xf5f201f, 0xee3eddc, 0xba7d234, 0x35f9e1c,
+          0xd2e242f, 0x1d1d04c, 0x0df7515, 0x48df9d8 }
+    },
+    {
+        { 0xa81dd9a, 0x4ecc77d, 0x03aa015, 0xa6ac4bb, 0xbbc4fed, 0x7645842,
+          0x9d6cf52, 0x9ae34cd, 0x5917e0b, 0xf8ff033, 0xc2cc175, 0x7c9da37,
+          0xaaacfbe, 0x1e74dcc, 0x7999af8, 0xa8f2df0 },
+        { 0x102a466, 0xd06c4ea, 0xae190dd, 0x2156e87, 0xec4a863, 0xc95db8a,
+          0x244a6fe, 0x49edffd, 0x904f81e, 0x110fae6, 0xa1cd104, 0xbaa3e50,
+          0x0478b65, 0x5bd38a2, 0xdaefbcc, 0x2b57d05 }
+    },
+    {
+        { 0x86f4534, 0x1ce92ba, 0x414f5e3, 0xb2a8592, 0x9979436, 0xdd7a4c6,
+          0x3f0add7, 0x7599aff, 0xe2d4f64, 0xe0ce4d3, 0x401a29f, 0x74475cc,
+          0xa2377d9, 0xaef6541, 0x3f917b6, 0x54048f5 },
+        { 0x05312ec, 0x1b86b22, 0x31493cb, 0x779ba22, 0xaac9320, 0xc718369,
+          0x617fce4, 0xeab01a8, 0xf7187fa, 0x17b1f10, 0xa1aca46, 0xe68eda0,
+          0x2586342, 0x61033fe, 0x0b6ca43, 0xfc14e79 }
+    },
+    {
+        { 0x13d2491, 0x9f22319, 0x7997202, 0x66bdb53, 0x4617f34, 0x0bafb0c,
+          0xf3bb7b3, 0x5917831, 0xb45bddb, 0x6feb2a6, 0x0202c19, 0x08662b3,
+          0x05852f6, 0x0bc2b57, 0x91818c2, 0x2c00fd4 },
+        { 0xda37dac, 0xca7672c, 0x5a30865, 0xfe4c04c, 0x322e92a, 0x5f1399f,
+          0x25b1beb, 0xe7d67ea, 0xdce7f68, 0xe08b014, 0xf2f2b3c, 0x24df52a,
+          0x750ecd1, 0x2028b23, 0xc810a45, 0x9b25d4b }
+    },
+    {
+        { 0x7a9d799, 0xa35b715, 0x01f9c99, 0x6da1eb3, 0xe363ba8, 0x33ef91c,
+          0xce140da, 0x21c0e2e, 0x158cd84, 0xb0b11bf, 0x93da438, 0x6a87442,
+          0x3db585b, 0x924f10d, 0x10c6159, 0xf5ddd73 },
+        { 0x6a74c21, 0xb72dcb8, 0xcc8f79f, 0x6d14198, 0x9c5a8d6, 0x99f4b6c,
+          0x90e135c, 0x0639688, 0x83f6385, 0x330edb8, 0x9079675, 0xe1a5a6b,
+          0xb8f5fe0, 0x6e37fa8, 0x61dca1e, 0x60e2fd9 }
+    },
+    {
+        { 0x66c395e, 0xc6cb403, 0xb51d0f1, 0x03b21a7, 0xe693181, 0xbc478a5,
+          0xc6cff33, 0x0017c2f, 0x39d8d1e, 0x740a5b8, 0x4d9ec6d, 0x3968d66,
+          0xb0ef1b0, 0xfd53738, 0x1ed0a04, 0x73ca8fd },
+        { 0x75ab371, 0x4ace938, 0xddad7e9, 0xd602936, 0x750bcc2, 0x1f5424a,
+          0x68c7a17, 0xfe09b36, 0x58341ec, 0x165f7de, 0x6ce61e5, 0x95b825a,
+          0x66c83c4, 0x9d31e19, 0xcc5887b, 0x65b3e08 }
+    },
+    {
+        { 0x21482d1, 0xd37e932, 0x08b6380, 0x9af6597, 0x7d61e4b, 0x279426a,
+          0x80997ad, 0x80dd0ec, 0xd5b76d4, 0x7239b0d, 0xe76c098, 0x92e6c73,
+          0xeab3e1d, 0xeeb2321, 0xeb1a910, 0xa69c4a7 },
+        { 0x833d9ae, 0x46d6aa7, 0x572b0fe, 0x3ee6957, 0xcdb3d97, 0x44ccbed,
+          0xcbea01b, 0x342f29d, 0x8926876, 0x0d518c5, 0x5585d2c, 0xaaabae7,
+          0xe008f58, 0xc548c77, 0x21fab2c, 0x819e2fa }
+    },
+},
+{
+    {
+        { 0xc16e981, 0x468e149, 0x9ddbb7c, 0x286c790, 0xdb7a38a, 0x2a92d47,
+          0x8a27cb2, 0xde614e6, 0xe5b0ab6, 0x8dc8822, 0xcf48565, 0x38441ae,
+          0x089435b, 0x11ed5c9, 0x82d0d31, 0x2389286 },
+        { 0x72f2f31, 0xc6698d4, 0x56d76af, 0x295242c, 0xeba563b, 0x4099205,
+          0x3ab7384, 0xae7de5a, 0xd0ed86c, 0xccdf127, 0x965c3c3, 0xb9b6d5b,
+          0x2c31ad7, 0xe351a8f, 0xac12f13, 0xa761dd8 }
+    },
+    {
+        { 0xf171ab7, 0xda115dd, 0x401f93d, 0x2de17b1, 0x40964b4, 0x95019ca,
+          0x65ba3c3, 0x169d1f4, 0x0090d08, 0x534a007, 0x82bf410, 0x805c5e2,
+          0x65f8d90, 0x15dfe11, 0xca72456, 0x827a416 },
+        { 0x33a36c4, 0x5af8884, 0xd8ee604, 0x8bfa54c, 0x9ce290f, 0x08fd141,
+          0x287b3a6, 0x2db5e8c, 0x03cdad2, 0xe5be981, 0xbf810b9, 0x155b874,
+          0x670f473, 0x2ae42de, 0x7f74657, 0x2218584 }
+    },
+    {
+        { 0x23ffa43, 0x54b2a50, 0xa24d919, 0xcf87b16, 0x63524e8, 0x1ff5402,
+          0x56d1e54, 0x73c94e0, 0x3899fb5, 0x7651552, 0x18723bf, 0x13a7214,
+          0x3561517, 0x39afbdd, 0x9f2862e, 0x49b790a },
+        { 0x527d2ce, 0xc8c1f4f, 0x7609bb7, 0x1997aec, 0x02a3400, 0x583ad80,
+          0x4f79706, 0xac2374e, 0x21b7183, 0xbf1f9a8, 0x6600fe0, 0x06158ab,
+          0xbd56751, 0xfcc9b2e, 0xddaaec7, 0xe1de5ac }
+    },
+    {
+        { 0x788fdab, 0x230baa1, 0x7d04597, 0xf30860a, 0x99f4caa, 0xa2c7ece,
+          0x6ad065e, 0xbd39f10, 0x3bef7bd, 0xfd92f5d, 0x96d2203, 0x6069fad,
+          0xc4d9e0d, 0xbff38ca, 0x1fda313, 0x419a017 },
+        { 0x572f035, 0x5d77fd8, 0xb282b40, 0x5af99f2, 0x23facff, 0x7257d3b,
+          0x58c90af, 0xf2ee223, 0x9b6a52a, 0xcc2687d, 0x302430e, 0x140892c,
+          0x3ec4f38, 0xa934d5e, 0x3bd18be, 0xc087d7c }
+    },
+    {
+        { 0xa2c5ed7, 0x7e94138, 0x53610bf, 0xbc8ceef, 0xd86f803, 0xe89356b,
+          0x5a55330, 0x9a3a380, 0x11ad648, 0xe894aba, 0xba95918, 0x2e68fba,
+          0xfcad344, 0x643e2ba, 0x61640aa, 0x0dd0256 },
+        { 0xe25cbdd, 0xc02e479, 0x13a1b3f, 0xd78c4d8, 0xcca9692, 0xa6dae8f,
+          0xe5de8a0, 0x3dd91e9, 0x764ea36, 0x78ae0ce, 0x85dbc5e, 0xb4ad999,
+          0xe82a169, 0x967ff23, 0xbaee1fc, 0xaeb26ec }
+    },
+    {
+        { 0x9a6f90c, 0x8c50255, 0x0ea374a, 0x56e7abe, 0x56413b2, 0x675c722,
+          0x946753f, 0xd3fc17e, 0xe235f7c, 0x28c4e1f, 0xb028eb0, 0xe209bcd,
+          0x489fe88, 0x7d0f93a, 0x063706a, 0xb966a2e },
+        { 0x4a30319, 0xb6c228c, 0xca6d674, 0x6868efe, 0x057311a, 0x0610a70,
+          0xbad7f89, 0x0808112, 0x1dd6181, 0x2a2462c, 0xb58e88a, 0x52ed9fe,
+          0x33821a2, 0xbbff16f, 0x17f882a, 0xda53e96 }
+    },
+    {
+        { 0x8c30e5d, 0xb6ffca3, 0x5c905f5, 0xa90f991, 0xd753e88, 0x72fb200,
+          0x7256c6a, 0xe509d4c, 0xd866500, 0x369e552, 0x33cf8ae, 0xee4b7e0,
+          0xefcf6eb, 0x280d954, 0xd557f0e, 0x5b275d3 },
+        { 0xb5cecf8, 0xeb17211, 0xbdb2f8d, 0xd6ad50f, 0x35e04b7, 0x2478c7b,
+          0xac73bd3, 0x97e7143, 0x4817e24, 0x09d6ede, 0x2c405e1, 0x68fea71,
+          0x05f67a1, 0x34adbc9, 0x73edf99, 0xd20ab70 }
+    },
+    {
+        { 0x569f191, 0xe116a96, 0x4d6e29a, 0xb3f0bce, 0xf51dbab, 0x30b9e1a,
+          0x346d276, 0x1dd36f3, 0x0749a27, 0x8315103, 0xab47f70, 0x242f148,
+          0x5585681, 0xe8a5bcf, 0x5ed79ba, 0x8b80184 },
+        { 0x3894ad1, 0xa4042fd, 0x2b88bc6, 0x82f781d, 0xbe4c397, 0x2d34cac,
+          0xdd99c9f, 0x8731aea, 0xef1d382, 0x0f95498, 0xdd0bbc9, 0xcaba2e1,
+          0x54064e8, 0x78889e9, 0x61a8ab9, 0x8cd9c97 }
+    },
+},
+{
+    {
+        { 0xfa0459e, 0xf31f53f, 0x315cd6b, 0xf8742a1, 0xae64e97, 0xabe2f50,
+          0x9b9da48, 0xbd78741, 0x51e526e, 0x4521a33, 0xe10ba45, 0xfa05935,
+          0xe8f903c, 0x5c947e1, 0x5a754ee, 0x0aa47d1 },
+        { 0xd814825, 0xb2849ef, 0x5c9968d, 0x9c2a5d2, 0x04e634c, 0x24dbb26,
+          0xdb38194, 0x33f3a4c, 0xc8a2b6b, 0xe04f609, 0xabbbfdb, 0xcaefd8e,
+          0x404498b, 0x683119a, 0x8b21cbd, 0x24ab7a9 }
+    },
+    {
+        { 0x21fa2dd, 0x6f13269, 0xc10a4bc, 0xd79e61c, 0x4bd6d46, 0xac4b3ce,
+          0xbd3f37b, 0x52459b6, 0xa396966, 0xce0f0a3, 0xa1ed488, 0x050d1d5,
+          0xe0b17fa, 0x1b9c403, 0x04a2e66, 0xee1abd0 },
+        { 0x5cf3e3b, 0x97065c3, 0xbe33441, 0x6513d5f, 0x79047ae, 0xcd34634,
+          0xfd22df1, 0x45cbb1c, 0x967b17c, 0x7a173ae, 0x2223cda, 0x75f5ba7,
+          0xefe0a73, 0xe3d12db, 0xfd7adcf, 0x3b7f94d }
+    },
+    {
+        { 0xf1e9b7d, 0xd596a13, 0x6734e0c, 0x04f5bdd, 0x8be163a, 0x18b694f,
+          0xd959fa3, 0x15620c7, 0x53d2a3b, 0x65fc2c5, 0xc4d36f2, 0xd44a364,
+          0x268ceab, 0xc8b421f, 0xbfe2bd4, 0x564139a },
+        { 0x19d4633, 0xb524610, 0x6346934, 0x5ab3f88, 0x9819422, 0x96691fe,
+          0x8b39b82, 0xdfdec89, 0x97cfb27, 0x84b1c79, 0x4d6d004, 0xe59a98d,
+          0x12c350f, 0x5e5d0c6, 0xd415774, 0xb431220 }
+    },
+    {
+        { 0x6aae0a2, 0x3d0ca73, 0x48c2d8c, 0x7b1991f, 0x5cdae72, 0x00ae856,
+          0xbd55128, 0xdbb6ca0, 0x45c82bf, 0x3c2ab2a, 0x79545ca, 0xea5a559,
+          0xd5927d0, 0xeba9a26, 0x83257fc, 0xb52e401 },
+        { 0xca9650a, 0x55ed517, 0xe3ebff2, 0xbdaa081, 0x9f8831b, 0x8cf7ce4,
+          0x6e3b8d3, 0x1d0b5bd, 0xd8fc869, 0xa314a9f, 0xb892bab, 0x07f2079,
+          0xa0cc9d9, 0xb700dbf, 0x6dc0a39, 0x7105a08 }
+    },
+    {
+        { 0x8c7d901, 0x0c7e05d, 0xaf3182b, 0xa7ff681, 0xf9a0d06, 0xb88e3ca,
+          0xc343b7f, 0xfe20a12, 0x03251f9, 0x9f02577, 0xc40c5eb, 0xf225ded,
+          0xb208ea7, 0x50e0cec, 0xe6eeb65, 0x5b250f0 },
+        { 0x4806b6e, 0x807a153, 0xfa94139, 0xded120a, 0x49366fb, 0x237ddc7,
+          0x5a34bcb, 0xdd3674e, 0x9c4a61d, 0xef6cdff, 0xb2fb896, 0x036194b,
+          0x9528cd9, 0x3865953, 0x6936a52, 0x0723c59 }
+    },
+    {
+        { 0xe17719d, 0x1f84cd5, 0xc73b394, 0x545939b, 0x83e84e7, 0xefbf3c5,
+          0xf77fd66, 0x6cc46f1, 0x1383ab8, 0xa629f59, 0xcd35cd2, 0x9177ffa,
+          0x9dd411b, 0x039187f, 0x7b7eea8, 0xa9cf1cf },
+        { 0xac47e5d, 0xa3b105a, 0xd0a9da4, 0xa755bea, 0x73da15e, 0x50cfbae,
+          0x60b628c, 0x9456cbc, 0x9b7a910, 0x7ffc362, 0xcd6d6a4, 0x30b5924,
+          0x0b04ab6, 0x198629f, 0x624dea9, 0xc74609c }
+    },
+    {
+        { 0xaf12fa6, 0x27d4d77, 0x690aeb2, 0xdd8a216, 0xfe24417, 0xe48fc02,
+          0x720e17e, 0x1970403, 0xce37b42, 0x95013fd, 0xde4bd9b, 0x06817d2,
+          0x63d0ba2, 0xc5863e7, 0xa556f5d, 0xa1bafc0 },
+        { 0x410a78a, 0xf28ec7b, 0x0a01a63, 0x0dcac42, 0xb5bce11, 0xfcd3fa4,
+          0xd278b89, 0x054d7e5, 0x5ce49e3, 0x5195db8, 0x2c73d96, 0x4c0b167,
+          0x20a1bdb, 0xd943077, 0x59c77a7, 0x66fa8b3 }
+    },
+    {
+        { 0xd7462fe, 0xb9e93ae, 0x18dde4f, 0xbfe54b2, 0x3dbb08e, 0xaabb528,
+          0x0e5fc45, 0x8c36702, 0x8e69be3, 0x3502888, 0xc12a11d, 0x6d2efc1,
+          0xf265e30, 0xfce5ceb, 0x5742c7e, 0x58c8bb3 },
+        { 0xccf7fa0, 0x32e89dc, 0xdd020a4, 0xa811f33, 0x5129fe5, 0xa10d620,
+          0xe4ed29b, 0x3841c88, 0xd8b1ea6, 0xf3303a9, 0x1781f58, 0xa9a0cad,
+          0x8f3ef0b, 0x4502b38, 0x74c6d35, 0x2b7587e }
+    },
+},
+{
+    {
+        { 0x23ae7cd, 0xc6eaea1, 0x73c0caa, 0xa1884d4, 0xef1ea88, 0x901e76f,
+          0xa14269d, 0xdb9935c, 0x947f1de, 0xe8b2486, 0xa657588, 0x4ad56f4,
+          0x2913fb1, 0xe768054, 0x37600da, 0x2abff5d },
+        { 0xa81a797, 0xa814813, 0x46acb69, 0x63e76a4, 0x4ab8277, 0xb103839,
+          0x9d8e759, 0x587de34, 0xddf62df, 0xdfaeb8d, 0x9239d49, 0x24fe1cf,
+          0xe130d1c, 0x7de7409, 0x581d070, 0x3ecfef9 }
+    },
+    {
+        { 0xf87c72d, 0x8d177a0, 0x8c6d1de, 0xae7e581, 0x8cece85, 0x0077b5f,
+          0x32d2187, 0x3824838, 0x6db2bd2, 0x49d8b15, 0xc8d85b9, 0xe9e5513,
+          0xe05c53f, 0x63c410c, 0xd86f752, 0xceaf2fb },
+        { 0x93806c5, 0x0b432fe, 0x3d06c75, 0x18eb15d, 0x12cfc02, 0xcaad826,
+          0x1e2d045, 0x581e040, 0x95edcfd, 0xd573cb5, 0xdbc66e3, 0xce71948,
+          0xacc14ea, 0xcf68721, 0x6cac4dc, 0xf68bea2 }
+    },
+    {
+        { 0xcb74da2, 0xd8576af, 0xc433f46, 0x8771c29, 0xe2f5b8e, 0x7315af6,
+          0xba33928, 0xc195481, 0x2fb1f94, 0xb77dcc2, 0xa610f75, 0xcb3e57c,
+          0x53907df, 0xeb2a927, 0x23eff95, 0x916f149 },
+        { 0xb6cd291, 0xbb378e4, 0x2f13ce1, 0xa2a5e2b, 0xbcd00b0, 0xa8a0e60,
+          0x682b75a, 0x5902741, 0x3f65a77, 0xa0882c9, 0xc93cfff, 0x2069f75,
+          0x70c0cb9, 0x1ede405, 0x0d526c4, 0x13840c9 }
+    },
+    {
+        { 0x03ced48, 0xdc2caaa, 0xa0315be, 0x2079219, 0x3b1f642, 0xca49356,
+          0xb0665f2, 0x0202dc7, 0xb7a5238, 0xe5d6bbd, 0x26eab32, 0x36fbd5e,
+          0xf5819b4, 0xb3988f1, 0x4aa4d69, 0x5b15dc8 },
+        { 0x54e5c24, 0xa52feed, 0xe91a797, 0x927471b, 0xd57f677, 0xd119bfd,
+          0x78e4c4f, 0xde38f7b, 0xb150bc3, 0xa7af516, 0x26b76c2, 0x403b21e,
+          0x92300dc, 0x589067d, 0x066802a, 0x04e406a }
+    },
+    {
+        { 0xa9ca9bb, 0x28e7d09, 0xfccf4a0, 0xaa84fd5, 0x635b7ed, 0xdbe9fb8,
+          0xd56fc7c, 0x9ede3f5, 0xb01cb29, 0xa4b5031, 0x7f93703, 0x584299d,
+          0xb6fe825, 0xbd28868, 0x8b9c2d9, 0x1d385d4 },
+        { 0x822be80, 0x6606f4a, 0x626d0fd, 0xb5a0165, 0x14568ad, 0x9920a20,
+          0x1c6d174, 0x7d430f4, 0xe02e9e9, 0xc243e16, 0xa6bd649, 0x367f1d2,
+          0x71b8c36, 0x6939100, 0x4de2984, 0x2ede131 }
+    },
+    {
+        { 0x5beec32, 0xdc78187, 0xa525ff4, 0x1fff0cc, 0x676df34, 0x6e86425,
+          0x3f638e1, 0x2b4e8a6, 0x9b1e59f, 0xc4991d2, 0x1589717, 0x399d001,
+          0xbe041cd, 0x406464e, 0x9e65bb0, 0x901cb3d },
+        { 0xfb42307, 0xf5f4572, 0xf1b7307, 0xf81b3b0, 0xf2094d1, 0x8fb695c,
+          0xdb56f7b, 0x7db4792, 0x5a794e0, 0x36836d5, 0x09bc879, 0x2da477b,
+          0x1887c40, 0x1cdfadb, 0xf2699b6, 0x65dc6c2 }
+    },
+    {
+        { 0x4737972, 0x36f9f21, 0x7a387b0, 0x48f0c8b, 0x39a1d24, 0xa156ed3,
+          0x0fed268, 0x375293a, 0x7ff75cb, 0xf679f48, 0x1cc9e62, 0xd15a00f,
+          0x22c3877, 0x92a7dc7, 0x6fb0ed4, 0xe987063 },
+        { 0x16f5f3c, 0xfd8e59c, 0xaeeb48e, 0x375732e, 0xca1ab42, 0x2dd9213,
+          0x9ffccea, 0xcb06209, 0xb23edfd, 0xfc611f6, 0x99b060e, 0x2716349,
+          0x820de8a, 0xb938b5d, 0xeb49a32, 0x138f6e7 }
+    },
+    {
+        { 0xe485f70, 0x7feda63, 0xeb27b2c, 0x646380a, 0xc4511c7, 0xcf8fe32,
+          0xff9406a, 0x2c68e1e, 0x20b6020, 0xa9f2fd9, 0x3b3e465, 0x1c98fc6,
+          0x93e53aa, 0xb8dac35, 0xa750e96, 0x2fb47b6 },
+        { 0x1950bb3, 0xea373ef, 0x4ac7aec, 0x8156694, 0xb55b931, 0x8d6b3c2,
+          0xb62ef7d, 0x5d13f2d, 0xab9182b, 0x4647f2a, 0x33bf07c, 0x8f56c5a,
+          0xb35a221, 0xc5ab284, 0x5a46a6b, 0x0747ab7 }
+    },
+},
+{
+    {
+        { 0x86b85c5, 0x5b9236c, 0xc482448, 0x5967a0d, 0x7df6ae0, 0x397c955,
+          0x5378f2b, 0xf83ee1c, 0x6e05dd1, 0xf82df65, 0x19d7c8b, 0x4c424f6,
+          0xa6d5f2a, 0xa612550, 0x63c3ebf, 0xfe8482a },
+        { 0x0142c82, 0xcb8d403, 0x3679e6c, 0x08b0662, 0x3eca5ee, 0x3ea5146,
+          0x1370500, 0x089eb3b, 0x5a0d306, 0xcbfb19c, 0x42a65bb, 0x2f68588,
+          0xe51e119, 0xe3e1db5, 0x110895e, 0x2c150e7 }
+    },
+    {
+        { 0xf6d4c4c, 0xf323488, 0x63b87e2, 0x5fc931f, 0x35c759f, 0x8867da0,
+          0x9746d4c, 0xb6f1eff, 0x990be0a, 0x8a8172d, 0x5c407b4, 0x1113eee,
+          0x378ed8a, 0xd80dacf, 0x3fa7fd1, 0x99b57cf },
+        { 0x5176405, 0xf5bb6d9, 0x92e83b5, 0x6b8963a, 0x8a7ef8d, 0xac55b6b,
+          0x6c1fbf0, 0xe73fa12, 0x60148df, 0xdb37560, 0xf3f1fba, 0x72f1a98,
+          0xea550f2, 0x1f71d0a, 0x9544a87, 0xc3ea4f0 }
+    },
+    {
+        { 0x4322bf3, 0x5b09da2, 0x61264e1, 0x2a573d5, 0x803acc4, 0x93cb2e1,
+          0xe502fc6, 0x397b4fb, 0x39e0ebc, 0xddfb212, 0xbbcbc57, 0xeccd8f5,
+          0x4663788, 0x49d3bed, 0x1218df9, 0x37192aa },
+        { 0x2ffa3c6, 0x8a05bc9, 0x23ebf4d, 0xc38c281, 0xfe343a8, 0xc80d547,
+          0x6c63516, 0xa8d5a5b, 0x8d8fa6b, 0xc5d8ce1, 0x24a87c0, 0xeb5e872,
+          0x75bfa23, 0x9806e9e, 0x689469a, 0x11f0889 }
+    },
+    {
+        { 0x8e75666, 0x81005f6, 0xd349505, 0xb84d861, 0x9f321ea, 0xe083282,
+          0xcfa33a1, 0xb751d7a, 0x067c550, 0x793cf6f, 0x1027e56, 0x073a6b2,
+          0x66a6012, 0x53f40ee, 0xc210fa9, 0x70bfaa8 },
+        { 0xe4b5998, 0x1518e39, 0x24b8d9c, 0x8f0b530, 0xafdf923, 0xd91c281,
+          0x24e3f69, 0xc5cfb28, 0x870871f, 0x63a529a, 0x2128dad, 0x3d3e887,
+          0xcb30cce, 0xed658dc, 0xafb7bae, 0xf9373b9 }
+    },
+    {
+        { 0xde58ed2, 0x22d4dbe, 0x03f8789, 0x4fefc1d, 0x344817f, 0x6b0a1fe,
+          0xa56b0b2, 0x96bef40, 0xda249fa, 0x32684ee, 0x524a91b, 0x8298864,
+          0x0c736a1, 0xa958baf, 0xef2f3e5, 0xd033a7d },
+        { 0x43f4d6a, 0x5be3edc, 0x9c89abb, 0x326a39d, 0x55d997a, 0x90c44f7,
+          0x6e966c2, 0x2058106, 0x6548038, 0xdbae490, 0xd473fc1, 0xac7bc97,
+          0x4b2603a, 0xb34488b, 0x5e9bb98, 0x27aea27 }
+    },
+    {
+        { 0x1b88773, 0xa59e728, 0x0c241f6, 0xe2f05d4, 0x4e75749, 0xa56229e,
+          0x1b10705, 0x8f00c0b, 0x19394d3, 0x8559946, 0xaaf5e32, 0x0d7e352,
+          0x787b8ea, 0x526c462, 0xa179d48, 0x89297d9 },
+        { 0xef43892, 0xeff17e6, 0x221f841, 0x17091eb, 0x4a4b848, 0x82f5eb3,
+          0x8eb7b76, 0x6bea477, 0x76c536c, 0x21f2271, 0x96c81bb, 0xd9ef2c8,
+          0x54bf4d3, 0x7c27546, 0xd7c28c8, 0x9dd4662 }
+    },
+    {
+        { 0x20e1a6b, 0xe7fff00, 0xa08d467, 0x26a35c6, 0x3248c91, 0xb3c773d,
+          0xba7d935, 0xa646615, 0xb0d26fa, 0xa91f453, 0x60c6d32, 0xdcf9c34,
+          0x9e3e3dc, 0x6366861, 0xf30f3e2, 0x3012813 },
+        { 0xc2fc61a, 0xac6623d, 0x2bfd2ff, 0x108dc25, 0x231d6ea, 0xd7f5c0d,
+          0xad1107e, 0xa904f9a, 0x0d1e9c8, 0x46941c2, 0xc810cf2, 0xe5b6451,
+          0x4f511d1, 0xaba8e67, 0x08373fe, 0x5b4b94f }
+    },
+    {
+        { 0x849c230, 0x002d4e2, 0xd8ba391, 0x9bed0ef, 0x828e319, 0x745e0c0,
+          0xca58de2, 0xcd40907, 0x1abaa4a, 0x2c87ab1, 0xdb64391, 0x3c17a97,
+          0x86c72d2, 0x36b184e, 0x485f7aa, 0xb03d202 },
+        { 0xde24aba, 0x2b6b79b, 0x2325fb2, 0xdcb7854, 0x66ebae2, 0xf5d1db9,
+          0x903840a, 0x35a4d5b, 0x190e9da, 0x7afeb09, 0x35c1792, 0x1818f6a,
+          0x3faa269, 0x90091fa, 0x2570235, 0xc4ccff6 }
+    },
+},
+{
+    {
+        { 0xec85940, 0xa177619, 0x7ef7eee, 0xfca24db, 0x7a90c11, 0xb2450f3,
+          0xdbf4f85, 0x29d256d, 0x51316c3, 0x920c8d0, 0x04474da, 0x2f7f7ba,
+          0x2ec9a0b, 0x308117f, 0xd0d2085, 0xd0a231a },
+        { 0x7ab641d, 0xf3288fc, 0x9f4fa32, 0xc68bade, 0xbbf8253, 0x768f014,
+          0xc0a33f0, 0x5eff260, 0x6bb93ce, 0xc71b453, 0x680697f, 0xa71d045,
+          0xce72bc3, 0xb62444c, 0xd1379f3, 0x11f03e8 }
+    },
+    {
+        { 0xc16df92, 0x1f54789, 0xe3ed142, 0x874c642, 0xfa2a9f1, 0x6699f60,
+          0x3fecfc1, 0xbd1b8d3, 0x8a3d953, 0x59682d5, 0x4a36b81, 0xf17c021,
+          0x181a666, 0xeb9621d, 0x3cf1ad8, 0x7c2c3ab },
+        { 0xe529f7c, 0xe6888c3, 0xb355315, 0x197b66a, 0x83e31ac, 0x63b558a,
+          0x891c68e, 0x4aa7bc5, 0x592e360, 0xc17d989, 0x1363666, 0xc750a29,
+          0x4909ac0, 0x0d53470, 0x4594a10, 0xd6d0272 }
+    },
+    {
+        { 0x3fbb635, 0x35c541b, 0x5982afa, 0x50016d0, 0x96b0ca0, 0x58ebce4,
+          0x577ea56, 0xb940027, 0xe38480f, 0xf29d305, 0xebd6a2c, 0x43705b0,
+          0xe90c639, 0x0e4acda, 0xf56e05e, 0xbe94a29 },
+        { 0x30659ad, 0xc61f4a0, 0xc402211, 0x39074ad, 0x51b621d, 0xfe0d8d5,
+          0xd1d5222, 0x2d02e8d, 0x46c2683, 0x05ece3c, 0xc689d41, 0xf70705a,
+          0x4d837bf, 0xe3caf44, 0x75ba6d0, 0xfda0584 }
+    },
+    {
+        { 0xcb7d458, 0x1098163, 0xf5ba834, 0x12b645f, 0x28af72c, 0x70a3181,
+          0xf32e5dd, 0x5f4727e, 0x10a21b4, 0x7cbae15, 0x6785389, 0xa80bf80,
+          0xb8f93b7, 0x9827402, 0x08349da, 0xe385f82 },
+        { 0x9589f6e, 0x2d05461, 0xe7c0191, 0x6aa5b26, 0xbd5574d, 0xe79ae12,
+          0x4148e61, 0x5d13f91, 0x13716ff, 0x7b2be0f, 0x80bb81f, 0x82b0fe6,
+          0x3e2569c, 0x697633c, 0x873f8b3, 0x6c1f083 }
+    },
+    {
+        { 0x0be1674, 0x6e26d85, 0xab8044f, 0xe4e47f6, 0x82fc434, 0xfdf46e8,
+          0xc89cadc, 0x639ae2c, 0x4b85bdc, 0x2244a52, 0xb7cf4ea, 0xb1e4790,
+          0x7e0bb8f, 0x51dce03, 0x2716cee, 0xdd14335 },
+        { 0x8e8841d, 0x1c049b4, 0xb97c621, 0x6bf26dc, 0xba01178, 0x21d6255,
+          0x8e4f0e4, 0x477258a, 0x68f8ef1, 0xf5e437e, 0x8b03e1e, 0xd118fbc,
+          0xe1c91b3, 0x3d6bc51, 0xd5b6907, 0xa259486 }
+    },
+    {
+        { 0x7b6f5dc, 0x4159cfc, 0x493694a, 0x05a52b3, 0x83b8883, 0xeeb511c,
+          0x2b06400, 0x19d79e4, 0x738f37e, 0x8e503a2, 0x5a94ad9, 0xa30e579,
+          0x262618d, 0x3981c75, 0x2dcba19, 0x06b6c69 },
+        { 0x4d1b051, 0xd7242ee, 0x3b350c4, 0x6274ccb, 0xf540019, 0x66df0bb,
+          0x5ae12d5, 0x4d66be6, 0x1049cba, 0xcea2960, 0x8df84b3, 0x4047339,
+          0x75a31c8, 0x7d6c96b, 0x874174c, 0xbb80159 }
+    },
+    {
+        { 0x59f1aa4, 0xf0f7be0, 0xdcff451, 0x798f39a, 0x8014e1e, 0x96763ff,
+          0x09cc5ec, 0x03987a8, 0x893650a, 0x4919656, 0x75e24df, 0x92e8eef,
+          0xe89d639, 0x54e97cd, 0x7682cc0, 0x8081d06 },
+        { 0xa8ceb71, 0xb9ef41a, 0xa4d7aaa, 0xb8173a4, 0xc54ee10, 0x93d81b1,
+          0x70a445a, 0xabe1805, 0x64d569d, 0xac0ff97, 0x3e570be, 0x86946b2,
+          0x4180641, 0x8e11dd2, 0x99f67dc, 0x3d0b33c }
+    },
+    {
+        { 0x48bf5a4, 0x2c9637e, 0xccaf112, 0x9fdec19, 0x5c42023, 0xe5cde9d,
+          0x878f0cc, 0x9869620, 0x1fe6eba, 0xcf970a2, 0x54e678b, 0x1df5ec8,
+          0x28d00dd, 0x4667f01, 0xb0b3fa8, 0xfa7260d },
+        { 0xb34239b, 0x6bd2895, 0x2d2a50d, 0x04c8bc5, 0x6cb23e2, 0x14e55ef,
+          0x3a278d5, 0x6440c27, 0x2193046, 0xf4b12e3, 0x5dd4c08, 0x46adf64,
+          0x4656e8c, 0x70e2998, 0xe4acd44, 0xe7b36ea }
+    },
+},
+{
+    {
+        { 0x16cf664, 0xea64a57, 0x26fd357, 0x8497ee4, 0x814e851, 0x44d94b4,
+          0x5a6a2cf, 0xf4aac22, 0x80c301f, 0x947b309, 0x7865383, 0xf390ba1,
+          0xd1773d3, 0x16c4fc6, 0x6227220, 0x61b9814 },
+        { 0x1dd0270, 0x07dd03a, 0x0f160df, 0x290ca82, 0x44ba955, 0x8f22054,
+          0x0b6f1b3, 0x4e85e45, 0xad78089, 0xfd73ce9, 0x2f2cb0e, 0x67c1270,
+          0xee33a61, 0xa7de0d7, 0x6553261, 0x6a811cc }
+    },
+    {
+        { 0x2d0a427, 0x5ef0574, 0x220a341, 0xe8d2e95, 0x8044886, 0xdd28cbf,
+          0xa1aa58b, 0xdad7b4b, 0x8ec901b, 0xb28f373, 0x5bbe3db, 0x1841a93,
+          0xa075fee, 0x8fd7cd1, 0xc0d3cdd, 0x93b603f },
+        { 0x5edd859, 0xca54fd5, 0x64ed687, 0xa4cb05f, 0xed1a3d7, 0x3138668,
+          0xee32be5, 0x1224fda, 0xc80aeb3, 0xf1f532b, 0xe8d4d69, 0xa4f65d0,
+          0x5905fe5, 0xc697a01, 0x6690ce4, 0x514da7a }
+    },
+    {
+        { 0x3de4a55, 0xc7b9af8, 0xb318d93, 0xc79bad7, 0xf5b1c83, 0x1808071,
+          0xb965b16, 0x92112ef, 0x7bb740a, 0x655ab38, 0x384ff87, 0x53dbc8b,
+          0x72dc6f2, 0xd153c28, 0x99c7819, 0x2ec20e1 },
+        { 0x3b854b5, 0x65e46ea, 0xc711db5, 0x272d5ae, 0x26e19e8, 0xfd1bb53,
+          0x3dc0665, 0x33280b8, 0xb8f1c4a, 0x95b986e, 0xa685c4a, 0xa671fc4,
+          0x83bdbbf, 0xa03cbd5, 0xab77544, 0xd329402 }
+    },
+    {
+        { 0x8e62b35, 0x40fa651, 0xf9e55a6, 0x3913b11, 0x5270a41, 0x4e8089b,
+          0x80d1886, 0x565f52a, 0x512749b, 0x93b5f05, 0x141c547, 0x35c869c,
+          0xf86717f, 0x9a44a1a, 0x9c2b2cb, 0x2b9984b },
+        { 0x4952322, 0x61fb607, 0x7af1464, 0x2d4072f, 0x600eb30, 0x9b2fa8c,
+          0xf10668e, 0x6071fb7, 0x90634ca, 0x27cc24d, 0x471d32b, 0x3875bc2,
+          0xa11210c, 0x678590b, 0xfcc5a9a, 0x352b447 }
+    },
+    {
+        { 0x5fa3200, 0x795d541, 0xa92949f, 0xadaa557, 0x3cc88c4, 0x42fff06,
+          0x71b68a5, 0x26d6831, 0xe67ad8c, 0x3286549, 0x86396b2, 0x5bf6363,
+          0xe12c8ea, 0x41229b6, 0x748952e, 0x05320c9 },
+        { 0x900b460, 0xae36b63, 0xf2b6aff, 0x9354ff2, 0x065ee0c, 0x10b810b,
+          0xcc8bb38, 0x4d6925f, 0x7a22f14, 0x31c03fd, 0x57544e8, 0x76b7f44,
+          0xc0eed26, 0x3a9123c, 0xe0cd1cc, 0x77acd67 }
+    },
+    {
+        { 0x07ec527, 0x2e90530, 0x62937cf, 0x32388ef, 0xe229188, 0xa445389,
+          0x33bcebe, 0xa44b68e, 0x4c4e701, 0x5a8722e, 0xcf07e41, 0xfd066e8,
+          0x95fab62, 0xa3c1a4f, 0xe542f24, 0xb4d6a1b },
+        { 0xaf6c9b5, 0xe6a92e4, 0xc83d61d, 0x9452484, 0x0062276, 0x422b55b,
+          0x5279688, 0x261973a, 0x3999fb2, 0xde8be26, 0x7b029ca, 0x64e9628,
+          0x06897d4, 0xd8edfaa, 0x6955511, 0x408319c }
+    },
+    {
+        { 0x50a5632, 0xff6baed, 0x5c5885a, 0x922b7d0, 0x1b45864, 0xdf0f3b3,
+          0xc04340e, 0x27e49c0, 0x122c447, 0x618c566, 0xeafee7e, 0x7863a38,
+          0xb828cb0, 0x7143aff, 0xf9d054e, 0x51fcf4c },
+        { 0x27f5e09, 0xc4a4b31, 0x90be2bd, 0x021f47a, 0x7ab956d, 0x1a06019,
+          0x86ea86b, 0xe77fa15, 0xd550ef3, 0x9ccde87, 0x6532654, 0x7dee53a,
+          0xe826387, 0x8b4f060, 0xad077b5, 0xda38637 }
+    },
+    {
+        { 0x0e9fac8, 0xbc901b3, 0x6fb2a2a, 0xfa08204, 0x5e04efc, 0x92f68ab,
+          0x9ac12d0, 0x184a30a, 0xb25d479, 0x1aa11aa, 0x0f03161, 0x8bc5f4c,
+          0xcfc8817, 0x7e3a083, 0x597f93f, 0x84d9355 },
+        { 0x239abc6, 0xc014478, 0x8d37b04, 0xb226b09, 0xf575789, 0xb056942,
+          0xba745eb, 0x816b95a, 0xb98ddb6, 0x2a49d39, 0x291af81, 0xc41ca26,
+          0xab26347, 0xb3afe99, 0x604b638, 0x59c31bc }
+    },
+},
+{
+    {
+        { 0xc42befd, 0xa16a8b9, 0x2052f00, 0x731c9c9, 0x1f5dfa0, 0x1ad49b4,
+          0xbffce36, 0x7a289e3, 0x0c79cf1, 0x868fac0, 0x86721ab, 0x6d6d284,
+          0xe726c94, 0x590f928, 0x51f3841, 0x0e802cb },
+        { 0x0b694bc, 0x6a6a57a, 0x8120fb8, 0xb9bb0cd, 0x9c05826, 0xad96ac7,
+          0x7768df0, 0x294da8c, 0xb56c6c6, 0xfe32311, 0xae8d050, 0x291c2c6,
+          0xe7db4c9, 0x1c765e7, 0xd65f9f7, 0xe058298 }
+    },
+    {
+        { 0x7e8d345, 0x4bfa85b, 0xde1dfc8, 0xa04ef95, 0x324ace3, 0xb5f7f21,
+          0x574b14a, 0x4b350a1, 0xf8e5c8d, 0x11436bf, 0x7642369, 0x1c789f9,
+          0xfb623ce, 0xeb5e335, 0x442d562, 0x9deacd2 },
+        { 0x531ee71, 0x4ff989f, 0xaacb52a, 0x43e2c49, 0x85bfadc, 0xa763198,
+          0xd0161a0, 0x08b6d5c, 0x541f197, 0x010e3fa, 0x3279a16, 0x83a589e,
+          0x6309f9b, 0xf099137, 0xf1cea10, 0x07c093b }
+    },
+    {
+        { 0x33d2192, 0x1ce3f0f, 0xc37ce73, 0x07b559a, 0x207be27, 0xaa2ad38,
+          0x7ed93de, 0x84f053b, 0x3b98a4b, 0xbc5c797, 0x63aa9b9, 0xc923461,
+          0x231a10c, 0x807cc16, 0xa061209, 0x8ffdf57 },
+        { 0x497070f, 0xa9ca741, 0xd113b3a, 0xf608ec9, 0x8d0384d, 0x5132726,
+          0xf5ec307, 0x96686ac, 0x71c4665, 0x437bbbd, 0x7c379ca, 0xdef09d5,
+          0x621747c, 0xf8be033, 0x8ae8047, 0x2775b37 }
+    },
+    {
+        { 0xb2c4fc2, 0x4009798, 0x203772e, 0x148d7d1, 0xf8423fb, 0x9d9392d,
+          0xaf8cef4, 0xa5bd72e, 0x4380b53, 0x579d58d, 0x8c39d24, 0x2ff88f1,
+          0x5706466, 0x9ca2fbc, 0x1e56af2, 0xb42987d },
+        { 0x5d94ea8, 0xcc2556e, 0x5369d76, 0x4e5c2b3, 0x2a94f9c, 0x5de3574,
+          0x5cb4145, 0x8d068c9, 0x51bfcbf, 0x4d553ff, 0x8a23fce, 0x3ab7164,
+          0xd0fa7f3, 0xc9cb3a9, 0xed9ced1, 0xf81209b }
+    },
+    {
+        { 0xe5b66f5, 0xde7356e, 0xe8a25e0, 0x7b2bf1a, 0x2c9b725, 0x09a444a,
+          0x4906c55, 0xfd8a2f4, 0x82514f3, 0x409cc80, 0x28999a9, 0x47e0099,
+          0x6a312f4, 0x0a582a6, 0xf6723de, 0xf7946f8 },
+        { 0x92d8aff, 0xa55f6ba, 0xa544b1c, 0xb62c3c8, 0x5c16a94, 0xa1d1411,
+          0x2ad5e71, 0xc378319, 0x06b1dd6, 0x13d7847, 0xee7ff55, 0x99005f8,
+          0x8a1e7d8, 0xfb5ea3f, 0xb4cac39, 0xdc7f53c }
+    },
+    {
+        { 0x36e3794, 0x482abaf, 0xc74684f, 0xc23e9e5, 0xf1629be, 0x4544cf6,
+          0x2f40374, 0xd8a8ee5, 0xf433bdb, 0x2eea87f, 0xae9990e, 0x489a99c,
+          0x54b23b6, 0xefc131e, 0x8600270, 0x25fe699 },
+        { 0xc059a7e, 0x03d2d9e, 0x6979c3c, 0xa6445b5, 0x9bfbcea, 0x491a10c,
+          0xe937af1, 0x15b5974, 0x797c7fc, 0x4be8002, 0xfedcfee, 0xbed8a49,
+          0xa9e0691, 0x35751ce, 0x9ef5982, 0xe9a9fa3 }
+    },
+    {
+        { 0x3065de7, 0xeffeaca, 0xac4d4e2, 0x841d544, 0xcaf199f, 0x8144679,
+          0x443967a, 0x98cf4f9, 0xf33183c, 0x8cd57f4, 0xc1b15eb, 0x390832a,
+          0xa53b500, 0xc4b1fea, 0xdff24b5, 0xd762a10 },
+        { 0xb0ee2a9, 0xccd3eed, 0x362d485, 0xa6dd4a9, 0xf1d047a, 0xeb4ff26,
+          0x23860fc, 0xc0771fd, 0x4b64114, 0xdbb4e39, 0x4d29b29, 0x2ff3f24,
+          0x387b365, 0x9cac005, 0xde5994a, 0x05b7aa6 }
+    },
+    {
+        { 0xc03dd63, 0x5e71752, 0xbc74687, 0xad10fe9, 0x54c76ab, 0x51a5b0c,
+          0x1f586d4, 0x763fd50, 0x816048b, 0xc7bd5ce, 0x3f744dc, 0x8fc83d2,
+          0x109df9a, 0x0561802, 0xccf0e43, 0x18fb01f },
+        { 0x038ab23, 0xe4606fc, 0xa664c98, 0x5878f1f, 0x5da7356, 0x3aedbbd,
+          0x516746a, 0x3c578f5, 0x1a17210, 0x259477f, 0x028248f, 0xc7a869d,
+          0x48cbf95, 0x6517a61, 0x3d04d47, 0xbc5f91d }
+    },
+},
+{
+    {
+        { 0x083ca53, 0x15fd9a9, 0x2697ca6, 0x1161da0, 0x56b676c, 0xf516af3,
+          0x75eec13, 0x8a420d5, 0x1a9526b, 0x72d6742, 0x76b463f, 0x8d8c29e,
+          0x8815627, 0x38a4f58, 0xe0650f9, 0xf7e528b },
+        { 0x382edca, 0x2cfa78e, 0xc4ad83c, 0x638d183, 0xe4a0119, 0x96d3b9d,
+          0xa7c1101, 0x5769ccb, 0x2b8d04a, 0xc3b3b79, 0x4951bde, 0x96212f6,
+          0x481161e, 0xad7905a, 0x41c5edf, 0x8fd6762 }
+    },
+    {
+        { 0x39d6cde, 0xf7b0635, 0x115a84a, 0x69d0549, 0xcbd9fe4, 0x4a976c6,
+          0x950ff96, 0xc92953f, 0x654d127, 0x1d7f0fe, 0xda0f75d, 0x7293870,
+          0xcf2277f, 0x7bb3652, 0x834484f, 0x64798c9 },
+        { 0xac3a76c, 0xb94d8bf, 0x7ff776b, 0xf5721a9, 0x2722e31, 0x23a6e9f,
+          0x9a5c034, 0xe9da996, 0x456ebc3, 0xb9bbf83, 0x96956a4, 0x239f58a,
+          0x18b7f00, 0x8b75beb, 0xa51cb97, 0x6c2b5b8 }
+    },
+    {
+        { 0x7eb41f3, 0x78b1c62, 0x17c4352, 0x0638fcf, 0x0c5709c, 0x939edd8,
+          0xedc906c, 0x0a8dfc3, 0xefb01ed, 0x3942f47, 0x49986fe, 0x4c82757,
+          0x4dffa57, 0x792545c, 0x6c3ff26, 0xeee6883 },
+        { 0x12b1218, 0x824d08e, 0x902457f, 0x515a478, 0xbae55b3, 0xc70cc9c,
+          0xbcef9d4, 0x1240737, 0x2f9db7f, 0xf22e616, 0x91f8da2, 0x98c4f02,
+          0xafaaa67, 0xa89219c, 0xe7d27e2, 0xf35fd87 }
+    },
+    {
+        { 0x01b80d0, 0x19b0cd7, 0xf9aebd1, 0x3d7e29d, 0x0477cbc, 0xd39c9ca,
+          0x5ff0d3d, 0xac0f615, 0x520fd01, 0x8a51993, 0xb22d6fb, 0x508ff54,
+          0x318d3ab, 0x8786c47, 0x4a683f8, 0x4312c46 },
+        { 0x95359f6, 0x73b1d39, 0x963011e, 0x0d94fa5, 0x9bfe83e, 0x5723af2,
+          0x6841df3, 0xafa9001, 0xb7c498a, 0x791e92a, 0x7ea4253, 0xbc931ad,
+          0xb783c06, 0x438e016, 0x2ca662b, 0x1347db2 }
+    },
+    {
+        { 0xfbaa861, 0x41df37d, 0x329e4de, 0x98ecb23, 0x507e018, 0xdaf1560,
+          0xb088e32, 0xa902269, 0xe4cab2f, 0xad898a5, 0x02c1e1b, 0xd84e9ed,
+          0x8488af3, 0xc20a5d5, 0x6cc77c6, 0xc7165af },
+        { 0xdeb7461, 0x8526f3a, 0x4a2d332, 0x03577b1, 0xe4760b5, 0x28e469d,
+          0xb276266, 0x442c7f9, 0xf9c90fa, 0x90d5c77, 0x3e211bd, 0x7aa8716,
+          0x5decfd6, 0x56d8ff0, 0xee23e6e, 0xa204b56 }
+    },
+    {
+        { 0x4aceafc, 0x2e4374e, 0x6fcd5e5, 0x978743b, 0xc4855ca, 0xa0f6345,
+          0xe98074b, 0x9bc7e4f, 0xc33d08a, 0x3835d57, 0x6f00566, 0xeec7c8b,
+          0x1acf55c, 0x71628a2, 0x97fb19e, 0x5da3750 },
+        { 0x01a7125, 0x6904a8e, 0xe6e3780, 0xad33c85, 0xc19f94a, 0x1702928,
+          0x7c04b3d, 0xb424ff2, 0x19e2ba3, 0xb212e39, 0xc9af4c9, 0x4cca8e8,
+          0xfd9bf0e, 0x98ab7ae, 0x9799db5, 0x21d245d }
+    },
+    {
+        { 0xec08806, 0x6b034dc, 0xb40f2d9, 0xfd763f2, 0x29cb906, 0x5e16de0,
+          0x8a0e16a, 0x02b7014, 0xe071e12, 0x463c8ee, 0x25ad509, 0x6447281,
+          0xdc0e07a, 0x9ee6f2d, 0x68d4d97, 0x188895c },
+        { 0xb27f971, 0x092fff3, 0xc9b7722, 0xb3c159f, 0x3cae42d, 0xe27d8ff,
+          0xe87071d, 0xf8a5ed6, 0x607ebd2, 0x318388f, 0x53486f1, 0x924967b,
+          0x7c46e1f, 0x7730494, 0xf21d196, 0xf279c60 }
+    },
+    {
+        { 0x84f3201, 0xef2bc03, 0x1f94c51, 0xf8750c7, 0x986ec65, 0xbaa4f5a,
+          0x2732a33, 0x6f8a5de, 0x299e365, 0x0f13d80, 0xe85261f, 0x2709530,
+          0xf527d56, 0x097d922, 0xbe1f3f8, 0x4969687 },
+        { 0x3e1708d, 0x9f3f504, 0x4aa4be4, 0xac67b87, 0x320a87e, 0x75fb042,
+          0x6e2cad6, 0xa361ad3, 0x203e9f6, 0xcb01470, 0xc9b76c6, 0xe3807b7,
+          0xb907c09, 0xf086833, 0x7e85a01, 0xe9bed3c }
+    },
+},
+{
+    {
+        { 0x91780c7, 0xa7ea989, 0xd2476b6, 0x04e4ecc, 0xc494b68, 0x0af9f58,
+          0xdee64fd, 0xe0f269f, 0x021bd26, 0x85a61f6, 0xb5d284b, 0xc265c35,
+          0x3775afd, 0x58755ea, 0x2ecf2c6, 0x617f174 },
+        { 0x5ec556a, 0x50109e2, 0xfd57e39, 0x235366b, 0x44b6b2e, 0x7b3c976,
+          0xb2b7b9c, 0xf7f9e82, 0x0ec6409, 0xb6196ab, 0x0a20d9e, 0x88f1d16,
+          0x586f761, 0xe3be3b4, 0xe26395d, 0x9983c26 }
+    },
+    {
+        { 0x6909ee2, 0x1d7605c, 0x995ec8a, 0xfc4d970, 0xcf2b361, 0x2d82e9d,
+          0x1225f55, 0x07f0ef6, 0xaee9c55, 0xa240c13, 0x5627b54, 0xd449d1e,
+          0x3a44575, 0x07164a7, 0xbd4bd71, 0x61a15fd },
+        { 0xd3a9fe4, 0x30696b9, 0x7e7e326, 0x68308c7, 0xce0b8c8, 0x3ac222b,
+          0x304db8e, 0x83ee319, 0x5e5db0b, 0xeca503b, 0xb1c6539, 0x78a8dce,
+          0x2d256bc, 0x4a8b05e, 0xbd9fd57, 0xa1c3cb8 }
+    },
+    {
+        { 0xd95aa96, 0x5685531, 0x6bd51ff, 0xc6f1174, 0xc9c2343, 0xb38308a,
+          0x2921841, 0x52ee64a, 0x78f3b01, 0x60809c4, 0xae403ac, 0xe297a99,
+          0xcb09a5b, 0x7edc18f, 0x81ac92a, 0x4808bcb },
+        { 0x34dc89a, 0x3ec1bb2, 0x4e39da5, 0x1e8b42e, 0xe526486, 0xde67d5e,
+          0x76f0684, 0x2376548, 0x285a3dd, 0x0a583bd, 0xfe9b009, 0x3d8b87d,
+          0x0413979, 0x45bd736, 0x38a727f, 0xb5d5f90 }
+    },
+    {
+        { 0x4bde3ee, 0x7b8820f, 0x24d5170, 0xea712ef, 0xdf6ec7b, 0x517f88c,
+          0x983ea9a, 0xb15cecf, 0x31a4592, 0x9eeee44, 0xebb013e, 0x786c784,
+          0x1f4e15d, 0x2f06cb3, 0x4f4fda1, 0x5603fd8 },
+        { 0x9e1321f, 0xf6790e9, 0x74a4c09, 0x274c66a, 0x9a41a4e, 0xa4b70b4,
+          0xada5157, 0x7700bdd, 0x51be8dc, 0xe54a60d, 0x1a477e0, 0xfaf9276,
+          0xb027eac, 0x6661c72, 0x280b917, 0x50e2340 }
+    },
+    {
+        { 0x96ec123, 0x635f40f, 0x7a766a4, 0x4a33133, 0xb935587, 0x9ce4416,
+          0x95d97e4, 0xbb6e1f5, 0x9d4197d, 0x2614723, 0x490e896, 0xabd4478,
+          0x8bba895, 0xf6a1b2a, 0x5e27a45, 0x401fa40 },
+        { 0x0620900, 0x7354ba5, 0x385678b, 0xc443a29, 0x53cf5fa, 0x48aba10,
+          0xbbe152d, 0xd67e723, 0x2a63d68, 0x4b858e0, 0x72be4ee, 0x174e1ee,
+          0x9ab8d46, 0xad0fbb3, 0xce17dd7, 0xa0fdffb }
+    },
+    {
+        { 0x9c46fd8, 0xa1ea325, 0x9fb96ef, 0xeca122e, 0x6767acd, 0xf9074a2,
+          0x2787082, 0x9b004a2, 0x7f3ba8e, 0x389f807, 0x0d5aabe, 0x6463de9,
+          0xb090585, 0xf30ceaa, 0x5634ab8, 0x71b31e8 },
+        { 0xaf02aed, 0x0dee65c, 0x20ac252, 0x506886e, 0x86b8a59, 0x0665f78,
+          0xf2bb328, 0xb9b784d, 0xdc6b089, 0x46e443a, 0x66c27fd, 0x3d5de19,
+          0xf0fde70, 0x0419265, 0x2b5c034, 0xed94612 }
+    },
+    {
+        { 0x13b0056, 0x5a52ad2, 0xb909ee3, 0x9fbeb92, 0xbdaab08, 0xb42ba18,
+          0xffc8a77, 0xec127c4, 0xfda906a, 0xc6d2985, 0x994bbe7, 0x5355547,
+          0x9cdfd62, 0xa7470c0, 0xd2e675a, 0x31a3971 },
+        { 0xcc8b356, 0x8d8311c, 0x01b4372, 0xabb0bf8, 0x0294566, 0x33c1cad,
+          0xe07b672, 0xe2e649c, 0x2ae3284, 0x9084d88, 0x1835ce2, 0x7a90d4c,
+          0x809d44c, 0xb4d1cd5, 0x9f0528f, 0x7822714 }
+    },
+    {
+        { 0xbf5844b, 0xca884cf, 0x8524cf9, 0x9dd05c4, 0x36ba889, 0xdbffa19,
+          0x29e7666, 0xef94fdd, 0x3eaf48f, 0x358f81b, 0x1530d56, 0x96734d5,
+          0x4adf9e5, 0x378b2d1, 0x4731f61, 0x2f85046 },
+        { 0x99dcb83, 0xd6ae905, 0x6199239, 0xa4f89e0, 0x8f0f958, 0x6405249,
+          0xcc27707, 0x2866d99, 0xf551c0f, 0x64681a2, 0x4c37080, 0x2c7b0d0,
+          0x00ac301, 0x218925b, 0x54df895, 0x8d57fb3 }
+    },
+},
+{
+    {
+        { 0x809c8d7, 0xdaebde0, 0x0e95ea1, 0x58c761c, 0x00ae5e2, 0xbd99650,
+          0xcd51acd, 0x6117a85, 0x7c55d56, 0xc4424d8, 0xdfbeeaf, 0xe9b1dde,
+          0x0db4791, 0xda98bb5, 0x3fca108, 0xff3a5a6 },
+        { 0x5ccbea1, 0x172fb8e, 0xa9f6cc9, 0x9fe12a7, 0x8967ce2, 0x1de4b0b,
+          0x671dbc6, 0xc1ab60f, 0x5dedcda, 0x338385a, 0x3a043fe, 0x647a420,
+          0x28ebc89, 0xe9abc64, 0x03ba3c8, 0xc357ff0 }
+    },
+    {
+        { 0xde39ebd, 0x37061e7, 0x2be567a, 0xebb9135, 0xd6bb80a, 0xa9a6f6b,
+          0x99f0ba2, 0x039345d, 0x98bbf47, 0x215494e, 0xa2a1ccb, 0xf2cb7a4,
+          0x37f67c9, 0xf51aa10, 0x17fff71, 0xd29c85c },
+        { 0x4d30b87, 0x8d4e4f2, 0x93a8309, 0x20fdf55, 0x757075c, 0x9b9f9cf,
+          0xcd70101, 0x09142ad, 0x766ca55, 0x901d0ee, 0x32e418b, 0x6a5d86a,
+          0xd7fcaec, 0x550ad92, 0xd91b26e, 0x64e8818 }
+    },
+    {
+        { 0x47e5ee5, 0x5cea0f7, 0xbe99699, 0x8ca1d31, 0x5c136c7, 0x52db846,
+          0x90e0d74, 0x8cecb38, 0xede2ad8, 0xb8efe9d, 0xf17ade8, 0x18d6ff8,
+          0x2d66c20, 0xd222735, 0xf2005fd, 0xc46593e },
+        { 0xf7141e1, 0xe5ebe6f, 0xe0126f2, 0xc968315, 0x1cb91b6, 0x95adc73,
+          0x38a6003, 0x753b54c, 0x4230a61, 0xa614125, 0x559fece, 0x23ac6eb,
+          0x3865c23, 0x9816b60, 0x543a570, 0x567014e }
+    },
+    {
+        { 0xdd2b71f, 0xd46091d, 0x97d24ff, 0x3999a5d, 0x1ecff3c, 0xce2a4f1,
+          0x581c6f0, 0xab2687c, 0xcba70b4, 0xa9fb2eb, 0x42093e1, 0x6fde356,
+          0xaee724a, 0x00253ec, 0x2b81bdd, 0xa08ce3c },
+        { 0x935a2b3, 0xa251238, 0x584f750, 0x8cae1d4, 0x988a219, 0x011469e,
+          0x5a6a50e, 0x61f7ed3, 0x01fcebd, 0xe13ebaa, 0x31d8867, 0x794b976,
+          0xcda32e7, 0xf25755c, 0x4564cd1, 0x368a97b }
+    },
+    {
+        { 0xaa3397b, 0x0d22224, 0x38066db, 0x1dbb3e6, 0x0ce8e32, 0xfe0b5ee,
+          0x7bab4dc, 0x09c17c8, 0xf188b64, 0x5cc65dd, 0x211b5fa, 0x74c4abf,
+          0xab0ba86, 0xdcc17b7, 0xa535501, 0xfbdf46f },
+        { 0xaca569e, 0x4775087, 0x06a1718, 0x6575f90, 0xb94de93, 0xb5c45a9,
+          0x8497171, 0x0fc8006, 0x489f7ab, 0x775d965, 0xf5c0c89, 0x8775b58,
+          0x1a06254, 0x05d4e20, 0xb6d73a5, 0x8cab349 }
+    },
+    {
+        { 0x39465b0, 0xca78163, 0x14498fd, 0x3ef9148, 0x6255c11, 0x9ca1f34,
+          0xb7f38f1, 0x389fd15, 0x354b8f3, 0xdac2089, 0xa840a70, 0x82d07fc,
+          0x1dd483a, 0xf53fd73, 0x1590578, 0xa6e4eae },
+        { 0x3c01b77, 0x7bf65af, 0xa75c982, 0x27542f3, 0x716cfce, 0xc5bd947,
+          0x884b9e7, 0xba5fe76, 0xd55725d, 0x39bae14, 0xfae0eab, 0x982f64e,
+          0x7a5293a, 0xcfae662, 0xd60f464, 0x22a25a1 }
+    },
+    {
+        { 0x7dd5e16, 0x74caecc, 0xce7bca3, 0x23678a2, 0x57f1ba1, 0x4673932,
+          0xa4c1697, 0x4eb9948, 0xeaba18d, 0x5d400e8, 0x9807871, 0x128d1c8,
+          0xbff38a6, 0x78f9627, 0xa39d4cc, 0xf80b813 },
+        { 0x31d3aad, 0x8aeefa0, 0x27db664, 0x5042199, 0x4cb6383, 0x244fc69,
+          0x72192a3, 0x3190477, 0xbbfb57b, 0xcc86075, 0x4451511, 0xbae3a13,
+          0xf6174f0, 0x16cf416, 0xd376813, 0xb343cc0 }
+    },
+    {
+        { 0xd1824b7, 0x31ac9b9, 0xec8f61a, 0x6282260, 0xc781765, 0xbbeb9f8,
+          0x2d110da, 0x06ab5c0, 0x47146b8, 0xd583e22, 0x4100d05, 0x79a1608,
+          0xf0a5c95, 0x16dbbb4, 0xe331667, 0xfe2af1d },
+        { 0xaf8710e, 0x26f0364, 0xeec08fe, 0x1cb8c91, 0x1d95e9f, 0x436bce6,
+          0x57944a0, 0xfe9050c, 0x07b626b, 0x5f45acf, 0x9cf1276, 0x48dc93f,
+          0xa05bfb7, 0x4491371, 0x4bcf785, 0x5106304 }
+    },
+},
+{
+    {
+        { 0xed0b3b6, 0xac2e294, 0x671637b, 0x5c5ade6, 0x1140677, 0x2f289ce,
+          0x754eb53, 0xaf446e2, 0x20421ad, 0x70911b7, 0xe0b7556, 0x4b73836,
+          0x2a97827, 0xcadf104, 0x8005bc6, 0x4824e49 },
+        { 0x937c28a, 0xb0eeccd, 0x0c3ee97, 0x1ce061d, 0x9f33faa, 0xcb07631,
+          0xaea66dc, 0x9980bf4, 0xd111d98, 0x2bd0755, 0x7fe4de0, 0x43feaf6,
+          0xb077b2f, 0xe76fb80, 0x5793b04, 0x227dc9f }
+    },
+    {
+        { 0x14f49ba, 0xea24ae5, 0x11436e7, 0xbc39ea6, 0x78485d8, 0x9d7fed2,
+          0xdf8b131, 0xb6ef00c, 0xfdbc7af, 0x0237b4b, 0x64ccd27, 0x08745b5,
+          0xafc5a76, 0xaf8595d, 0x29f5500, 0x43657af },
+        { 0x48470f8, 0x3007183, 0x640fd53, 0x51f91fd, 0xbe15512, 0x859c807,
+          0xab3e9c5, 0x7d1a474, 0x81553e5, 0x5d714d9, 0x6f62310, 0x0757343,
+          0x6b02a62, 0xedc5be0, 0xea47832, 0x5a4b9b7 }
+    },
+    {
+        { 0xe93dbb3, 0x03e0a24, 0xcadc884, 0x25841dc, 0x8d10ad5, 0xabc1a81,
+          0x2042ddd, 0x207e38a, 0xfeba8d8, 0x7fffbdb, 0xa3ec9b5, 0x74efebb,
+          0x0b40a9f, 0x0bc39ca, 0x0267feb, 0x69ee9c9 },
+        { 0xbc62919, 0xd402fac, 0x1cf53c6, 0xe9f8fc1, 0x7cc7d81, 0xe76fa5a,
+          0x96bb19d, 0x4f2d876, 0xadc67c7, 0xd4fb7f9, 0x96702dc, 0x40621d5,
+          0x438f6c5, 0x5b6a98e, 0xf1a1036, 0xa7c64de }
+    },
+    {
+        { 0x9a092c7, 0x84c5e80, 0x11c22b7, 0x9e40e0a, 0xd06c99b, 0x820a091,
+          0xeecca8f, 0x45fdc77, 0x5794f16, 0xfe1b8a3, 0x4ce3d6d, 0x31f7e5b,
+          0x82c74c8, 0xfd5e010, 0xc1f6f7d, 0xfdabf30 },
+        { 0xb9248a0, 0xbfa6017, 0x546b941, 0xe898d30, 0x207ff65, 0x878c492,
+          0xb874e64, 0xbf22e8d, 0x53a547e, 0x43fdb1b, 0x5fbd464, 0xb66deda,
+          0xc7ae1b5, 0x59127a6, 0x6a7515a, 0xa463646 }
+    },
+    {
+        { 0xde9ab2e, 0x22c4e66, 0x0203c58, 0xfaf60c2, 0x0d5c5ed, 0xed2d7bf,
+          0x4ca0f19, 0xdbc16fe, 0x465b979, 0x54e8ef6, 0xa310ef9, 0xe2d64b1,
+          0x3778636, 0xa0f2c95, 0x281883b, 0xf3b4aa4 },
+        { 0x9be6629, 0x4ac9af0, 0x1ca90c5, 0xba455e1, 0x856f492, 0x0147538,
+          0xabd7840, 0xc80db7e, 0x6beb9cd, 0xb3526d9, 0x9d81503, 0x37657fb,
+          0x193cec3, 0x8729a16, 0xd69952a, 0xd9a93fb }
+    },
+    {
+        { 0x94f47c6, 0xfce0175, 0xe366d05, 0x228da21, 0xdc8baf3, 0x27ce0b2,
+          0x6b4a951, 0x8cc660b, 0x384bb01, 0xf678947, 0x44d980c, 0xc629d7d,
+          0xe85e81f, 0x47980e4, 0x1cd723e, 0xa2e636a },
+        { 0x77fb207, 0x6b6ebae, 0x4c92891, 0x7017961, 0xb4d279c, 0x5569541,
+          0x41758cb, 0xbb6b36a, 0x27a8e30, 0xecaa222, 0xb470ad9, 0x8b6746a,
+          0x63e2d3d, 0x4c46017, 0xd3edaec, 0xe19c4ed }
+    },
+    {
+        { 0x34718c8, 0x0b43fec, 0xf33499f, 0x553c407, 0x970d1db, 0x8272efb,
+          0xa8e8d1c, 0x008c62c, 0x63eec45, 0xe4b79d7, 0xf2d71a3, 0x1fd4230,
+          0xa368c36, 0x090fdaf, 0xfca7baa, 0xf62c101 },
+        { 0xd2395b3, 0x1c9e6c8, 0x04c5513, 0x671ed63, 0x299a465, 0x577d933,
+          0x63f9986, 0x286890e, 0xbfc979c, 0xd92a95d, 0x2b51019, 0xcebd79d,
+          0x3d07251, 0xe74d88b, 0x906f9ad, 0x8b6db73 }
+    },
+    {
+        { 0x7b3d90c, 0xc0c43db, 0x4304a06, 0x85d154e, 0xaf2f38e, 0xe8aceef,
+          0x83d9459, 0x5e04293, 0x431afd1, 0x65e5e32, 0xa900a65, 0x9e5f050,
+          0x8a26671, 0xcbaa171, 0x9c93de7, 0x33d0b24 },
+        { 0xd5b6680, 0x3dcbf92, 0x20006f9, 0xc47e5ec, 0x9a51924, 0xc971129,
+          0xcd0ed46, 0x665d9b8, 0xa5fcab6, 0xed2d63f, 0xcfbfc5a, 0xa817eb6,
+          0xb76eb76, 0xb38169f, 0xf11160b, 0x8b93544 }
+    },
+},
+{
+    {
+        { 0x693bdcd, 0x02eca52, 0x2ae01d6, 0xbbf0923, 0x8b44b3e, 0x0b0a2de,
+          0xb250dff, 0xdb82449, 0x6e1c530, 0x0c42b86, 0xa64c2c4, 0xcd226dc,
+          0xf046b5f, 0xcfb2bb1, 0x3fccb0d, 0x97e2fae },
+        { 0x45ed156, 0xdf92907, 0xf641229, 0x224dcb9, 0x5f1f67e, 0x2126abc,
+          0xe9c8a6b, 0xa7eed5a, 0x9857d9b, 0x40abedc, 0xde941c6, 0x3f9c7f6,
+          0xd725ddf, 0x2158d42, 0x8c69543, 0xbdd1015 }
+    },
+    {
+        { 0x8df2fbc, 0xa7dd24e, 0x13d1aee, 0x3adbcfd, 0x13b2177, 0xf6a32d1,
+          0x7a9a14c, 0x89a7232, 0xdc65df9, 0xe3aef43, 0xa64d74c, 0xeaec3e3,
+          0x4fec33b, 0x4d387d8, 0x21a2128, 0xaba2a05 },
+        { 0x6b85e30, 0x2382c22, 0xcd2aad3, 0x4352d85, 0xd9772c4, 0xb0c6001,
+          0x5f3653f, 0x7ed8263, 0x0300f47, 0x3626a6f, 0x6ca7e4e, 0x23909de,
+          0xc154141, 0xb43dd81, 0x7e4bc68, 0x9a49fad }
+    },
+    {
+        { 0x2428f88, 0xa3661df, 0x56e0db2, 0xbe48b02, 0xce79aa9, 0x3cd1871,
+          0x23dddac, 0x90ab871, 0x71871a6, 0x9c58fb9, 0xa34910e, 0xf031f7f,
+          0x81060e4, 0xb501eea, 0x791224e, 0xdb668ba },
+        { 0x6a705bc, 0x240bbcb, 0x2d1865e, 0x7e76fbd, 0x2513641, 0x6e2cd02,
+          0x46365c9, 0xe6c5225, 0xa5a01fb, 0xe46a8b8, 0xb67618b, 0x696fa7b,
+          0x0db6792, 0x418b3b9, 0x7108b9c, 0x7204acd }
+    },
+    {
+        { 0x8456b45, 0xb5a143b, 0xf53b4d9, 0x8a3ab25, 0xe13a570, 0xb112a58,
+          0x81487d2, 0x613ca32, 0x3b1e7c9, 0x837d823, 0xd41e9d5, 0x592bade,
+          0x5cd02f2, 0xdc1893a, 0x8972e23, 0x0879502 },
+        { 0xcb76261, 0x7003c08, 0x332a5e0, 0x14bde9e, 0xcbbd78e, 0x14b2872,
+          0xde238e8, 0x5594061, 0x067466c, 0xad12645, 0xf5e4952, 0xa8d0e64,
+          0xc7f8d06, 0x5b44b82, 0xfb1b828, 0xb51bea8 }
+    },
+    {
+        { 0x3f0dacc, 0xebad685, 0x1cbebbc, 0x5c31b8b, 0xfa5a2dc, 0x6746975,
+          0x31d9faa, 0x2d95965, 0x00fc0e4, 0x343797d, 0x55fe01b, 0x38d821c,
+          0x7323aa0, 0x0bfdb24, 0xf962a8e, 0x42613c4 },
+        { 0xe134bc0, 0x599a211, 0x47a7084, 0x75fa4a1, 0x7f734b5, 0x6e71948,
+          0x6dfca2b, 0xd5ced2d, 0x8aeabd2, 0x9fa0fdc, 0x12361da, 0x5e6b03f,
+          0x5859fcf, 0xad23d31, 0x25a5fc8, 0x3120ef1 }
+    },
+    {
+        { 0x8e9f638, 0x990ef62, 0x626a60c, 0xfdaa240, 0x2abddab, 0x4a3de20,
+          0xd8872b2, 0xd5d10b7, 0x1ea5880, 0xa01b730, 0xa81b9d8, 0x481697f,
+          0x3471ed8, 0x2984153, 0x292d37c, 0xefd73f8 },
+        { 0x9994beb, 0xdda7626, 0x6a4f865, 0xa037703, 0xe5b47d5, 0xda992ec,
+          0xe53edba, 0x912a427, 0x9264e45, 0x6467598, 0xaf71222, 0xd3b68c3,
+          0x6dedc5f, 0x9d3436c, 0x076b2ad, 0x1e027af }
+    },
+    {
+        { 0x4382f4a, 0xd56fca1, 0x8966b7b, 0x83712a4, 0xa4c9ddb, 0xd6b2cf5,
+          0xf602875, 0xa66be29, 0x894f3d0, 0x70e4266, 0xb3195ca, 0x007d220,
+          0x82c74d4, 0xba38d8f, 0xd975cbd, 0xdccc5fc },
+        { 0xc88b38b, 0x03e1610, 0x52e0d8d, 0xeb9f9a1, 0xb646eb7, 0x6a57eca,
+          0xc76b6c1, 0x161641f, 0xbd2e12b, 0xf9025ad, 0x5c0e26d, 0x87c74db,
+          0xbfeca74, 0xed5cb51, 0xe34a08c, 0x603dfb6 }
+    },
+    {
+        { 0xcb03307, 0xc4be728, 0xc2741cc, 0xde34c0e, 0xa74eb17, 0xe01db05,
+          0x8905e4b, 0x1bfce0c, 0xd1b1826, 0xb18830a, 0xe87bbfb, 0xcacbb41,
+          0xd2f1a79, 0x8696842, 0x08c83ea, 0xa80e5fb },
+        { 0x3f1439c, 0xe48f163, 0xcd6987b, 0xc1d4108, 0xb751814, 0x05705c4,
+          0xc1c622d, 0xa9bffd0, 0x46cd053, 0x23de4af, 0x39457c3, 0xf782f5e,
+          0x5e5d243, 0x815276b, 0x6161ae3, 0x3132041 }
+    },
+},
+{
+    {
+        { 0x77f2542, 0x2459661, 0x8372b25, 0x203be7e, 0xee2007b, 0xc7c9426,
+          0x0621799, 0xc564138, 0xc28c3ce, 0xda56589, 0x7afc1e3, 0x13e8a7c,
+          0xe352082, 0xdba81e9, 0x04435c7, 0xf430549 },
+        { 0x691de4a, 0x4d26533, 0xfb777ab, 0x364408c, 0xeae7f88, 0xccdfb43,
+          0xa525b11, 0xbc40f44, 0x3c60627, 0x8e112a5, 0xe17e696, 0x7f7c581,
+          0x1ea774a, 0x0fd7878, 0x0b1f582, 0xd09e632 }
+    },
+    {
+        { 0x70aab15, 0x44390bd, 0x889c3f2, 0x41112bc, 0xd685349, 0x6b02894,
+          0x5584dfe, 0x7103001, 0x1ba7887, 0x373cb1b, 0x2a017c7, 0x53d286c,
+          0x3c81fdc, 0x2ed0388, 0xfbcc6fc, 0x3bfc5e3 },
+        { 0xfd6418d, 0xd38ac6f, 0xbfad89e, 0xc667e96, 0xeab4d66, 0x46f4f77,
+          0x0911293, 0x194c04f, 0x68c48d5, 0x0fd09cf, 0x63cf7f4, 0x6f5b055,
+          0xacd562f, 0x0c0a8c4, 0x36d965d, 0x94c1d83 }
+    },
+    {
+        { 0xcaa127a, 0x94fc8f0, 0xd803690, 0xc762d5d, 0x1ebf0d3, 0x8bfdfd1,
+          0x48eac50, 0xa98cdf2, 0x8b5ff10, 0x3d7365d, 0xc65b4de, 0x20dc29b,
+          0x8ec7c68, 0x62ac28e, 0x90372d2, 0x7f5a132 },
+        { 0x3246658, 0xf3d8a25, 0x9ac202a, 0xa4bebd3, 0x5cc1697, 0x078ede7,
+          0xc8fc022, 0x5525800, 0x5fae77b, 0x302a802, 0x57917b6, 0x0180139,
+          0x864bf55, 0x7c8806d, 0x12f06f1, 0x4e2d878 }
+    },
+    {
+        { 0x3d66e88, 0x8d35118, 0xa91d02a, 0xfb861a1, 0x7850e5f, 0x8c27c2a,
+          0xa5496f6, 0x9fd6399, 0x8080049, 0x52152ae, 0xfd1c2dc, 0x600e2ff,
+          0xffe8b2e, 0xc75902a, 0xe03b175, 0x5c4d2cc },
+        { 0x4f57e78, 0x8ad7c42, 0x1736f87, 0x77cf606, 0xf85038a, 0x2876012,
+          0x1b97b95, 0xff32845, 0x392dfc8, 0x3cc6dd5, 0xa6f5075, 0x72f1363,
+          0x71de894, 0x028ec44, 0x6f45a86, 0x7030f2f }
+    },
+    {
+        { 0x9695817, 0x66400f5, 0xf20ea36, 0xeda0a7d, 0xd394992, 0x855be51,
+          0x8336f62, 0x2d082c1, 0xf28c868, 0x30944dd, 0x0dc86d0, 0xfb5f853,
+          0x564a0bd, 0x9562ae5, 0xb6b9b51, 0x1f7ea12 },
+        { 0xd0a7148, 0x5bd74e0, 0xb91e572, 0x6c8247f, 0x47da498, 0x699aba5,
+          0x1f7c814, 0xed82581, 0x62057b9, 0x434674b, 0x15c15b4, 0x8b4df5e,
+          0xb110081, 0x2a97da1, 0x4c417fe, 0x2a96b0c }
+    },
+    {
+        { 0x237639d, 0x4f75dfc, 0x1db7029, 0xe5ad6bc, 0xb3d28f7, 0xd43e06e,
+          0xe447989, 0x89f3bb5, 0x01a1a6e, 0xc426a2c, 0x315878f, 0x33ea71c,
+          0xb1b5705, 0x8a7784a, 0x77ca811, 0xa59e86e },
+        { 0x36ae155, 0xddb133c, 0x0d51b42, 0x49f1d4c, 0x9d05519, 0x5508082,
+          0x5291816, 0x20e23be, 0x67181ec, 0x35047ec, 0x7aad091, 0x6237dc4,
+          0xe2e25a2, 0xa1d3ce1, 0x0d3db4c, 0x1de0522 }
+    },
+    {
+        { 0xd9fd423, 0xe9a5e19, 0x9801e43, 0x0c2c3d0, 0x28df2da, 0x043c2dd,
+          0xe1ad12a, 0x4eecab4, 0x9615aa5, 0x97e1797, 0xca7bb5e, 0xe57b879,
+          0xcc92619, 0xa2a903c, 0xaa56e93, 0x5cef370 },
+        { 0x7f3232c, 0xbef29fa, 0x2b7ad5c, 0x1cf35ed, 0x3b6077a, 0x35c4893,
+          0x7a1d47d, 0xe065148, 0xce14572, 0xedb4673, 0x0b17629, 0xdc9e98c,
+          0x9a02a5c, 0xef98ebe, 0x11d03c0, 0x1f772e3 }
+    },
+    {
+        { 0x4608f72, 0xcbdbdcd, 0x5a13c6f, 0xb435223, 0x4bb3c21, 0xa6497f6,
+          0x12c15c9, 0x3af2383, 0x6322d11, 0xfbbf4b3, 0xc641775, 0x520a5c6,
+          0xe81e0e1, 0x18cd967, 0x3de3871, 0x980b2c6 },
+        { 0x9ae44a2, 0xfa9db61, 0x176bc56, 0x0281dd2, 0x8a7f817, 0xfd03711,
+          0x4129b30, 0x9c48545, 0x039626d, 0xb439648, 0xe4ada6b, 0x355050e,
+          0x7f5d98c, 0xc9c16d6, 0x18c4d5e, 0xf53ccc3 }
+    },
+},
+{
+    {
+        { 0x3ffb20b, 0x50ae942, 0x6865eb4, 0xa6c0b42, 0x09930f1, 0x4677f7d,
+          0x4a16427, 0x742e0b6, 0xf976f9a, 0x521d18e, 0xa454749, 0x43ac9cf,
+          0xc51f50d, 0xda3a91d, 0xad6f954, 0xf657029 },
+        { 0x6b4f99a, 0xfe5f064, 0x63ad4ce, 0xd92a5d9, 0x2e0e081, 0xfcb5509,
+          0x8d8a858, 0xadc85ab, 0x0632f0f, 0x8e9b966, 0x8d7216d, 0xe7a4f16,
+          0x59c3b99, 0x00a4cc5, 0xba09dc1, 0xed6d0bd }
+    },
+    {
+        { 0x1621beb, 0x7236d14, 0xbc7ca95, 0x1751fd4, 0x2f5319c, 0xaa619d1,
+          0x4e9316f, 0xfc2b15b, 0x9fd4d33, 0x2d1a906, 0x8ced829, 0x28c3bac,
+          0x1dd998f, 0xf2efab5, 0x3b149ed, 0x2c13330 },
+        { 0xf601ac6, 0x65237c9, 0x07d6a45, 0xb54dd65, 0xfb1a4cf, 0xa1ce391,
+          0x115f67e, 0x2957533, 0x465279b, 0x6456da8, 0xa993e02, 0x02890aa,
+          0xb7175e4, 0x6891853, 0x0f3e59b, 0x3fda203 }
+    },
+    {
+        { 0xd8c6e0b, 0xe99fe12, 0x5341c56, 0x7cb07ff, 0xdf77b24, 0xc292c7b,
+          0xca29906, 0xf52dfd0, 0x772f02c, 0x4a6aa26, 0xe1bbd09, 0x26f7684,
+          0xee7c2a8, 0xec56b2b, 0xad4a312, 0x67709e6 },
+        { 0xc570263, 0x99c57b2, 0x2faafae, 0xeb0100b, 0xff25eca, 0x980d5d1,
+          0x82cf936, 0xace35e6, 0x44679ed, 0x5a82ce5, 0x074b81e, 0x5c76a41,
+          0xa00abb1, 0xf36fa43, 0x04ffb2d, 0x0642819 }
+    },
+    {
+        { 0x04bdd28, 0x68f6bc8, 0xb5dc7ad, 0xc311d96, 0xed32e45, 0xff0d646,
+          0xe0f712d, 0xaf3cdc6, 0xd483861, 0xd4508e9, 0x0e1c277, 0xb624be5,
+          0xc5dd841, 0xc510275, 0x298dc02, 0x451c5c3 },
+        { 0xdd34d6b, 0xf87d479, 0xdd06a38, 0xda7f293, 0xb699e9f, 0x575e129,
+          0x215b2cc, 0x79e5fb2, 0x657e690, 0xd280028, 0xe702a71, 0x7fecd09,
+          0xfa13677, 0x85160ab, 0xce65f64, 0x5de3427 }
+    },
+    {
+        { 0xe8fff38, 0x84e4bf6, 0xb358b1c, 0x16f3725, 0x3b472a5, 0x360371c,
+          0x52f217a, 0xe64c061, 0x0501241, 0x8e67379, 0xab2dd96, 0x88e81d6,
+          0x1385604, 0xf3e218a, 0xe84184d, 0x9736caf },
+        { 0xdbb93a3, 0xb55a043, 0x9301088, 0x335088f, 0xb2a4959, 0xcea7a2d,
+          0xb882c33, 0x48e5d4a, 0xad46179, 0x114f09b, 0xb446576, 0x4416467,
+          0x34c6c2f, 0x01cb23e, 0xa02db8a, 0xddebf04 }
+    },
+    {
+        { 0x9bde8a1, 0x36d60cc, 0x676e4ad, 0x20fd2f2, 0x8936581, 0xebdcfb7,
+          0xdbfc2c3, 0x245d0d5, 0xa9f82e5, 0x104c62c, 0xd654d9b, 0x7387457,
+          0xae7f10e, 0xe966777, 0x1d8e582, 0xefeb16f },
+        { 0x70364b5, 0x4faf4f1, 0xd612472, 0x0e1ab58, 0xfed6085, 0x11bbfe7,
+          0xa59a09a, 0xb360a14, 0x722fdb6, 0x61d96e9, 0x94068bd, 0x16a12f1,
+          0xf73c2be, 0x225bf07, 0xc8bd24e, 0x1e64665 }
+    },
+    {
+        { 0x3698c75, 0x27a478a, 0x6202aa2, 0x778ccd3, 0x8d87f1f, 0x0149c63,
+          0x784edae, 0xa660e5f, 0x82adfa8, 0xe0d4d2f, 0x1ba1f9d, 0xf512dd6,
+          0x6245c58, 0x90cfed9, 0x18b53dd, 0x6c3a548 },
+        { 0xbdc094f, 0x833f70c, 0xb1514e7, 0xa5f26f5, 0x1c8cf13, 0x93e7cf5,
+          0x186ec43, 0x1436601, 0xe78170a, 0x81924ac, 0x8694368, 0xcc880a0,
+          0x0b62cbb, 0x2dfa955, 0x96b4a2c, 0x0bc6aa4 }
+    },
+    {
+        { 0x3561aa2, 0x5157a7e, 0x8645c1e, 0x525c500, 0xce7cbb3, 0x22feb4e,
+          0xc89a58b, 0x36d0d25, 0xc9bde9c, 0x43131f7, 0x881f731, 0x74afdda,
+          0x7c8e36a, 0x99ab87c, 0xc1d4fb2, 0xf07a476 },
+        { 0xbebc606, 0x1b82056, 0xfcf089f, 0x95a1e5a, 0x2b55d5c, 0xc5bccfa,
+          0x00eb0b1, 0x8fbc18e, 0x9efb483, 0x93a06fe, 0x2d74c57, 0xcafd725,
+          0x3de4350, 0xc7518f0, 0xc6fd762, 0x9a719bf }
+    },
+},
+{
+    {
+        { 0x2362087, 0x5ee0d83, 0x0b167e8, 0x7f2c0d7, 0x5e0e865, 0xb732789,
+          0x98c4e65, 0xef5b2e8, 0x8fe9cc1, 0x222797d, 0x82d1e15, 0xfe6d73e,
+          0xf62dc4b, 0xc7c0e9c, 0x937ceda, 0x962acfe },
+        { 0xc1e85c7, 0xd763711, 0x2836978, 0x8f2dbbc, 0x8c44e98, 0xbadc055,
+          0xa3e93f8, 0xed63eab, 0x41b55c7, 0x807e857, 0x6d1207b, 0xd51ae5e,
+          0x39d541b, 0xa0ef9a6, 0xa0c56a5, 0x58855f9 }
+    },
+    {
+        { 0x213091d, 0x7d88eaa, 0x45b6a0d, 0xcbdfee7, 0x4f5e077, 0x826a012,
+          0x90f1e4c, 0xb04fc13, 0xaea69aa, 0x1961ac3, 0xd5bb63e, 0x3afb719,
+          0x4ac7e5c, 0x2a37837, 0xc50ca45, 0x78efcc1 },
+        { 0xb8abdef, 0x346e8f0, 0x88095d0, 0x27e3dbd, 0xffc6c22, 0x56d3379,
+          0xfa4b291, 0x67d416c, 0x3b1b373, 0xc3baaf6, 0xdf73bae, 0x0184e1f,
+          0x9167528, 0x38ae8f7, 0x35d6297, 0x7329d4c }
+    },
+    {
+        { 0xf568c52, 0x45d2ac9, 0x9808593, 0x5134814, 0x31b7ed8, 0x0c92d83,
+          0x0876ecd, 0x921327a, 0x052736a, 0xf752d75, 0xbc6b837, 0x7b56487,
+          0xa23b4cc, 0x6b1a320, 0xec0d665, 0x1983937 },
+        { 0x08554ab, 0x2c3017c, 0x366e87f, 0x40ad955, 0x8ed7f02, 0x88c4edf,
+          0x3cc5e6d, 0x64a7db1, 0x2dc978b, 0x5ac91fa, 0x925d2a2, 0x016a20d,
+          0xabb57b4, 0x3604dfe, 0xd7e2e85, 0xc3683ec }
+    },
+    {
+        { 0x4c0c6d0, 0xc47150a, 0xe22adcf, 0x30af45e, 0x022ea4b, 0x39b5acb,
+          0x77203b5, 0xfbe3185, 0x6fd9b59, 0xe5aaa34, 0xdd1c8dc, 0x0062c90,
+          0x54049ac, 0xcf113f3, 0x63a31b5, 0xd8fba4d },
+        { 0x1056a69, 0x73b5488, 0xd780bda, 0x3be6cbc, 0x30ba2b9, 0x5776ec2,
+          0x8e8d6f7, 0xbe883cf, 0x5c2be6f, 0x64efe94, 0xf1ade8d, 0x064f704,
+          0x743110e, 0x41cfd17, 0x4c20abe, 0xaac9411 }
+    },
+    {
+        { 0xf1c1468, 0x91f9192, 0x4563e13, 0x8176e74, 0x0bda15d, 0xa48b5f9,
+          0xda42af6, 0x2a085ae, 0x425c018, 0xfd38ab2, 0x08abafb, 0x2884ba4,
+          0xcbd091d, 0x356f318, 0x817871b, 0x454e450 },
+        { 0x8ada531, 0xe080e81, 0x3152ba8, 0xa40f1eb, 0x0c38eb1, 0x051049f,
+          0xbd45003, 0x37e4bb3, 0x54a01e5, 0x6d09804, 0xeeb824a, 0x6de932f,
+          0xdc93481, 0xccdef37, 0x93a05e8, 0x8633e07 }
+    },
+    {
+        { 0x034675c, 0xbe94256, 0x08db789, 0x376c01d, 0x9af1b6b, 0x8707ee7,
+          0x11bfbac, 0x633b3ef, 0xd06db60, 0x694f33f, 0xbb13407, 0x2a68bfc,
+          0xda27c3a, 0x1c860c9, 0xd701ac3, 0xbca16de },
+        { 0xc59ffd0, 0x2b76cfa, 0x54d718d, 0xf9a1165, 0x67f0878, 0xf86a1db,
+          0xaf34e85, 0xe313e05, 0x3343159, 0xa188811, 0x0bb7ed1, 0xdbe4c3f,
+          0x0c732bc, 0x73b67e8, 0xe74110e, 0xa4e1c87 }
+    },
+    {
+        { 0x5c6770c, 0xce1106b, 0x5c0bcb7, 0x422c70b, 0x8195e7f, 0x32a3990,
+          0x1ccd4aa, 0xa24968d, 0x720e557, 0x8f08ecf, 0x54bcc81, 0x5da10a4,
+          0x6cd846e, 0x9d3c73b, 0x368d065, 0xaeb12c7 },
+        { 0xcf9fd1b, 0x2110859, 0xee2bd6d, 0xd2a4801, 0xe9466ac, 0x376e556,
+          0x3b5aa35, 0x767803b, 0xb8a89ba, 0x343f842, 0x6726bbf, 0x3263cc1,
+          0x25871b0, 0x26caf17, 0x41b8578, 0xef66ad6 }
+    },
+    {
+        { 0x638068c, 0xc9f2249, 0x1ccf9af, 0x96d282c, 0x69b435a, 0x71df30c,
+          0xcb9d5c9, 0x88c943a, 0x2a8f378, 0xbf98ef1, 0x114c6ff, 0xffc1824,
+          0xd52e8c7, 0xda3ad2c, 0x1afcb59, 0xf1222bc },
+        { 0x0ee334a, 0x459e94b, 0x421933a, 0xd4477b8, 0xa1e401e, 0x60fb7b0,
+          0x0d1e330, 0xfde6e82, 0x3233fde, 0xcecfe9b, 0x2e93523, 0x09ec466,
+          0x30775b9, 0xa5ba649, 0xadf80f2, 0xcc397e5 }
+    },
+},
+{
+    {
+        { 0x4ddc8a8, 0x2fe182d, 0xac056bf, 0x88d6e79, 0x0e41e4e, 0xc3ff2d1,
+          0x2c3679f, 0x32ec7f9, 0x4e61051, 0x3561f09, 0x6c6250a, 0x4553f5a,
+          0xdd25c5b, 0x2b765ef, 0x6a1cd7f, 0xe3a40a2 },
+        { 0x5d821dd, 0xb27309b, 0xc2c17ca, 0x950fb8d, 0x8fb0d4c, 0xfeed015,
+          0xf550179, 0x762c479, 0xe095840, 0x306cf44, 0xd379e66, 0x84b413a,
+          0xbb2e4f1, 0xd6e5d5a, 0x94b085d, 0x8bc12b7 }
+    },
+    {
+        { 0x04b5532, 0xc0d4cb8, 0xb9940a6, 0x7a31525, 0x68c69d1, 0x010e7dd,
+          0x2a18c35, 0xd81f29d, 0x3f11e73, 0x08ae770, 0x6e55106, 0x5358f87,
+          0xc960ef5, 0x299e8ca, 0xacfc8dc, 0x89a6fb4 },
+        { 0x6dc7d4a, 0x5996a40, 0xe51b96e, 0x21e5112, 0x09a202b, 0x95b8c3d,
+          0xd441f1f, 0x306ab0f, 0x98d4245, 0x2834fed, 0xd0abbde, 0xc29c387,
+          0xb805c15, 0xf6a9bf1, 0xc4e458d, 0x602f4f8 }
+    },
+    {
+        { 0xe5a893a, 0xf041486, 0x8934327, 0x53b891d, 0x4000758, 0x11e000d,
+          0x662bad9, 0xa4ccde8, 0xb9a1b64, 0xe34d3ed, 0x84e7a6d, 0x72d9675,
+          0x6627be4, 0x773da2f, 0xe835ae3, 0xa11c946 },
+        { 0x650bc15, 0x02e8203, 0xe58b78d, 0x2d35936, 0xf21a3cc, 0xe9cfbe8,
+          0x1049222, 0x55ad831, 0x38fff47, 0xbf99de4, 0x3831db5, 0xebbfd80,
+          0xaf2af42, 0xe990636, 0xb7f5a0e, 0xc26ae52 }
+    },
+    {
+        { 0xfa8f846, 0xb5d85b1, 0xb3b1455, 0x4166489, 0xd36a305, 0x768260d,
+          0x4ff5645, 0xc6a8235, 0xd6e93e5, 0xd241cd8, 0xa406e74, 0xeed9aa1,
+          0x5f600d9, 0x9e96ab0, 0x6eca2a1, 0xa26b8b5 },
+        { 0xd705aef, 0x78321cf, 0xc0161ec, 0xc4fb6b3, 0x5199cf1, 0xdc32441,
+          0xd0a5067, 0x33627d0, 0x15143ee, 0x13490cb, 0x85b4f44, 0x77e0ede,
+          0x394b165, 0x904f12e, 0xefab32d, 0x90f50f5 }
+    },
+    {
+        { 0xbc2de96, 0x4aa0a16, 0xaa9c12b, 0x172596a, 0x60e8a29, 0xd512e1e,
+          0xf637e83, 0x77d35c1, 0xd2aae0b, 0xbb0d141, 0x8c03738, 0x8a878a5,
+          0xab0e525, 0x6d24c01, 0xf760887, 0xb7d3136 },
+        { 0x3f91b7c, 0xdbc3f8f, 0xa8722c0, 0xe7b4bca, 0xda0ae65, 0x3286a91,
+          0x225b084, 0x8372274, 0xae1886c, 0x5884cd5, 0x3a23cf7, 0xb4e63ef,
+          0xf2dd0da, 0xfe5f202, 0x653916c, 0x951fac9 }
+    },
+    {
+        { 0x854fa4e, 0x05e2e8f, 0x1edaf10, 0xf411f94, 0xa0a928d, 0x26cc562,
+          0x4abce65, 0x78fd34e, 0x98a32e2, 0x1d87609, 0x4c37518, 0x85dc76f,
+          0x00e8021, 0xdcaeef5, 0x4e9b2a5, 0x7fcb2f8 },
+        { 0xf382c06, 0x9eba91e, 0x24cae53, 0x2052e85, 0xf5c1519, 0x617336e,
+          0xb4e632b, 0xf1546d5, 0xd7b8ffd, 0xa9edc81, 0x29ab68c, 0xdb2914f,
+          0xdebbaba, 0xe805070, 0xc3b719e, 0x775e53b }
+    },
+    {
+        { 0x065256a, 0xa40e294, 0x8fb031a, 0x9f11386, 0x059667c, 0xac03af8,
+          0x0475f58, 0x432eb3a, 0x01faad0, 0x22332bf, 0xbc57a11, 0xc8132e9,
+          0x3bc3f8b, 0x27d5a17, 0x930bf3e, 0x5471fc6 },
+        { 0xe6bff40, 0xba28bc0, 0x555e564, 0x198d57e, 0x9c65b8f, 0x13ce831,
+          0x5681b51, 0xb0a5c9d, 0xdeb9e11, 0x467588b, 0xbb4250b, 0xf1891a7,
+          0xd12b433, 0x10b938b, 0x24dcda4, 0x0b8c802 }
+    },
+    {
+        { 0xcf332d3, 0xc428703, 0xf2a5b98, 0x9d0053c, 0x7838a15, 0x4e4c620,
+          0xfbf8a43, 0x2e92919, 0x21cd9a5, 0x39ad524, 0x1561588, 0x584ed6c,
+          0x17a95c8, 0x20af305, 0xb70e1c8, 0xa223077 },
+        { 0x2fa4871, 0x679cfea, 0xac633c7, 0x54f2a46, 0x4cdc5f1, 0x6030651,
+          0x75a1dc7, 0xc4facda, 0x2d07d19, 0x710a288, 0x6b44992, 0xd55864e,
+          0x454c5b2, 0x44d4b6c, 0x72f9981, 0x2855d28 }
+    },
+},
+{
+    {
+        { 0xc7b0674, 0x4071b3e, 0xf8794d5, 0x800eb14, 0xbe6783e, 0x70573af,
+          0x7785901, 0xafaa440, 0x405f32c, 0x112d2a1, 0x169b3e2, 0x3761a52,
+          0x842a366, 0xe168b31, 0x9bf4734, 0x5bc322f },
+        { 0x976c4a0, 0x36ef240, 0xfea4e64, 0x066f3d6, 0xa989e57, 0x0e954bd,
+          0xf9466e4, 0xe36ef5e, 0xbeb9226, 0x6bb615a, 0x3d5a2ca, 0x5571e5f,
+          0x4897a86, 0xa86efe2, 0x28a9f77, 0xed7e9cf }
+    },
+    {
+        { 0x1f82c68, 0xdf10c97, 0x3b597e6, 0x796ba1e, 0xe718cbf, 0x1ac77ec,
+          0x410eac8, 0xc8175bb, 0xbc555ef, 0x0cdf9a1, 0x7524e05, 0x6b889f1,
+          0xae26d82, 0x6bf1e61, 0xd2e97d9, 0xb3f6ad5 },
+        { 0xf226487, 0x94dcff9, 0xbe03dde, 0x60e6356, 0x6a3dd7d, 0xda1f93b,
+          0x79ca90c, 0xf1be721, 0x1e6bce5, 0x05ed313, 0xd48af3e, 0xcf50908,
+          0x61e554f, 0x3b0e85c, 0xa2778d3, 0xfe7e35b }
+    },
+    {
+        { 0x75ac5a9, 0x42c5032, 0xda062c2, 0xa66a66d, 0xcaa7023, 0xa4f4f82,
+          0x64b4f86, 0x489d476, 0x97311ad, 0x10b1088, 0x177b2ec, 0x55dd637,
+          0x9a267b1, 0xa5ccff0, 0xff327b0, 0xf07690b },
+        { 0x2250cd2, 0x39162ed, 0x8b255f1, 0x1426de0, 0x1bdd731, 0xf227afd,
+          0xfa4c844, 0x78f8a36, 0x157379c, 0x267a211, 0xcc04acb, 0x3f05f92,
+          0xfc69cae, 0x374496c, 0x16ebfec, 0xbf2c5d0 }
+    },
+    {
+        { 0xd0518d1, 0x605418b, 0x9e1cbc6, 0x3237f80, 0x286c019, 0x37a7005,
+          0xb15af0b, 0xf1fb0e0, 0xaa853c0, 0xfc3b97c, 0xe6beba2, 0x1f48bd0,
+          0xe6a72f1, 0x8e5d7c5, 0x26ebf0c, 0x575e66d },
+        { 0x62eae3d, 0x0994776, 0x96c9c65, 0x53f074f, 0xb81bade, 0x6cfbfdb,
+          0x3fed7d1, 0x98b4efe, 0x38c3382, 0xdaa1123, 0x47b8ec6, 0xdf88b73,
+          0x9504a4f, 0x9b0fe4b, 0xf30c1c3, 0x2e7df4c }
+    },
+    {
+        { 0x2fc1833, 0x25380cb, 0x18d62de, 0xb8e248c, 0xd82f9db, 0x91c8f59,
+          0x2444750, 0x5ec2b20, 0x66b6f74, 0x3f3a1f7, 0xdd7d14d, 0x0180aa9,
+          0x2956b9c, 0xd0a342d, 0x7139873, 0x26e910e },
+        { 0x139e23d, 0x2261dc4, 0xb8343dd, 0x7edb181, 0xb4038dd, 0xfcf1073,
+          0xa3bfea3, 0x88870ef, 0x64a263e, 0x4e98ba9, 0x70811f5, 0x3c6e5dc,
+          0xf86055d, 0x17d28f5, 0x66e4199, 0xca9c276 }
+    },
+    {
+        { 0x964ef8c, 0x0b2d8bd, 0x88e2ba6, 0x5a99b85, 0x04498ce, 0x9e927b2,
+          0x756eb25, 0x9ff20c5, 0x3f27736, 0x97cc27b, 0x4729583, 0xf32dd6d,
+          0x0381a94, 0xbdc2658, 0xef2c06f, 0x70fef15 },
+        { 0x49252cc, 0x50a6191, 0x236b4b9, 0x9eb4a14, 0x8e00f78, 0x9b1b215,
+          0x6ea9c23, 0x27add36, 0xc3a8e79, 0xef61763, 0xd82ce56, 0xed4542f,
+          0x0caed75, 0xa8737e7, 0xd452d76, 0xeca0ac2 }
+    },
+    {
+        { 0x3d082d0, 0x20c0779, 0xc9e9f3b, 0x6e3ce64, 0x75a195f, 0xb3a4dce,
+          0xbdd9f24, 0x3a3c305, 0x8688942, 0xe2545c8, 0x080f32b, 0xa463c82,
+          0x42686b8, 0x4429748, 0x7213866, 0xf50e20d },
+        { 0x3826e74, 0x265ac52, 0x228e8ec, 0x26fba57, 0xe6b3ed8, 0x8a1e1db,
+          0xf0fe65a, 0x7c7b278, 0xc395234, 0x9a6df23, 0x0b0f114, 0x9956206,
+          0xef90837, 0x440c8c4, 0x3645f65, 0x21ad22a }
+    },
+    {
+        { 0xedd31b2, 0x1e023a6, 0x9ff8668, 0xf76d145, 0x17b45c8, 0x9707056,
+          0x1e88e37, 0x0612078, 0x922faac, 0x85c51c8, 0x22756d9, 0x4df392e,
+          0xa03c98e, 0x8907fd0, 0x52ea51c, 0x626f46a },
+        { 0x486c8a2, 0xf8f766a, 0x88ed18c, 0x8c499a2, 0x3c4f0de, 0x44d2dc6,
+          0x6f2a0b6, 0x47dde68, 0x4a973fd, 0x9a655f8, 0x786ac80, 0x3e7124e,
+          0xe8a0574, 0x699e61c, 0x31cdd0d, 0xdf0ba9a }
+    },
+},
+{
+    {
+        { 0xd73e69b, 0x76270ad, 0xc67d38a, 0x991120f, 0x9469f0c, 0x7be5830,
+          0x7db40ac, 0x93aba59, 0x822fc08, 0x2b707bc, 0x69551cd, 0x4199fc0,
+          0xf367324, 0x38deed4, 0x2228787, 0xca518e1 },
+        { 0xd9a9277, 0x72f1bef, 0xe49ae90, 0x57d4aab, 0xdb23478, 0x13810d5,
+          0x9b4b77f, 0x2a8b780, 0x1b4e004, 0xb542f4e, 0x3ec77f0, 0x4080fd0,
+          0xcec6596, 0xb49e9fe, 0x3f16037, 0x20338d3 }
+    },
+    {
+        { 0x53554b0, 0x4adcdae, 0xe04c4db, 0xfea4906, 0x7748233, 0x0808bec,
+          0x47148d7, 0xde7477c, 0x03da38c, 0xdd9124c, 0x25ee8e9, 0x6b25031,
+          0xb0d6161, 0xae67399, 0x82203b6, 0x70c4acd },
+        { 0xd31dae8, 0x9683916, 0x1ac7f69, 0x3477503, 0x988e4ad, 0x9553153,
+          0x53a15e1, 0xb58f411, 0x92ba2dd, 0xb65a2d4, 0xa90169c, 0x7c3efb1,
+          0x6b1747d, 0x210f45e, 0xcff488d, 0x16e8d1b }
+    },
+    {
+        { 0x9d703db, 0x252adf8, 0xfdfeb39, 0x259ac1d, 0x115e806, 0x7faf6af,
+          0xc1aff21, 0x7aaefd6, 0x7c0113d, 0x8054210, 0xe19b4b1, 0x481f1a5,
+          0xfcc8c61, 0x7c17d43, 0xbb0bbbe, 0x8b04452 },
+        { 0x4cebae1, 0xe51e5f5, 0x56a414c, 0x05341ba, 0x7fb8a30, 0x0083a2c,
+          0x77f4952, 0xb4663f2, 0x4bb0074, 0xce72eec, 0xa3584d1, 0x74fdd66,
+          0xb02e076, 0x6b9e58e, 0x3b961f4, 0x5be45d5 }
+    },
+    {
+        { 0x1ab2e0b, 0xc7474f3, 0xf4bf454, 0x2838ccb, 0xf3c3eac, 0x634392e,
+          0x137602b, 0x440e40a, 0xd1ae8e3, 0xeea67e9, 0x77e221e, 0xafdf93a,
+          0x2719a10, 0x3c9f3da, 0x32c8256, 0x466ecef },
+        { 0xf9c432f, 0x1061c19, 0xb1c7d98, 0xa1332d9, 0xa425c2c, 0xbc735f2,
+          0x4b1bccb, 0x1429cdf, 0x6bbb5f9, 0x77b42a1, 0x5955ae4, 0x30078e3,
+          0x21cc315, 0x8acd777, 0xe86fa99, 0xaa90d5f }
+    },
+    {
+        { 0x721115a, 0xfcfd460, 0x08269b8, 0x6a7de3e, 0x96dd47e, 0xe5964a6,
+          0x8dca975, 0x6717cd5, 0x98b149e, 0x7ea4ebe, 0xb7b8057, 0x6f894d5,
+          0x7f30e31, 0xbd6f960, 0x23df092, 0x61ca453 },
+        { 0x9d782f3, 0x32241f9, 0x2abfae2, 0x55173b0, 0xd15bbbd, 0x0abe0ed,
+          0xb438abb, 0xb6d3c0a, 0x9ffa20b, 0x62fb467, 0xd31560a, 0x30926b5,
+          0x2a0aa6d, 0x44bf27c, 0x1a4cb97, 0xf747313 }
+    },
+    {
+        { 0xb0535de, 0xa2f6c0d, 0xc855166, 0xcb02ae1, 0xb3422f0, 0xc699e6b,
+          0x281ba8a, 0x774febe, 0xffabcc7, 0x1d9d24f, 0xfe12ba5, 0x0b31ba1,
+          0x13d0af7, 0x4c86803, 0x2f47160, 0x90640d3 },
+        { 0x5876603, 0xa0c4bf4, 0x950ab08, 0x717f6fa, 0xa710de8, 0xf12bb53,
+          0x6a88f50, 0xc500c61, 0x2645351, 0x0070f99, 0x2446893, 0x57aab5d,
+          0xb68f657, 0xd553fa8, 0x693c55d, 0xe8537c1 }
+    },
+    {
+        { 0x7fc7684, 0x58e86eb, 0xbfc73a9, 0xdf330f7, 0xcc11936, 0x41e337d,
+          0x6e35759, 0x36d9200, 0x3500d8b, 0x0132703, 0x9483354, 0xfa68405,
+          0x667851b, 0xc8f2980, 0x18296b0, 0x538ec89 },
+        { 0xcff55f9, 0xa2a2c4f, 0x60d20bd, 0xb260d4d, 0xd9cc59f, 0x3ed576f,
+          0xd514fcc, 0x4ed8c64, 0xc22b315, 0x37ebfb2, 0x94c212c, 0xca67a36,
+          0x3a1795e, 0x4f8e08c, 0x4e7261f, 0x498f926 }
+    },
+    {
+        { 0xc59b3d4, 0xfea7382, 0x3f2925f, 0xb9942ed, 0x8ea77e8, 0xe4b00dc,
+          0x3cab02e, 0x74a18ec, 0xef16d0b, 0xbbbb752, 0xffab032, 0x639da4f,
+          0x3aa30f0, 0xc371a4a, 0xcaa175b, 0x8e26b22 },
+        { 0x7e2b62e, 0x94e4156, 0x25a794c, 0x7cceea6, 0x479f015, 0x931d2f4,
+          0x90b25b2, 0x946183d, 0x68a2807, 0x1504e97, 0xfa49ddd, 0xa7577d3,
+          0xdd48699, 0x24fc87e, 0x3d7d99c, 0x9edefd6 }
+    },
+},
+{
+    {
+        { 0x0f0b450, 0x0508b34, 0xc36f7f4, 0xe0069a5, 0x2a5a761, 0x2655664,
+          0x848e04d, 0x0193fd8, 0x73fe2e7, 0xc108cf5, 0xfd787d4, 0x05eb0ec,
+          0xff28985, 0x1555ccb, 0x651b995, 0xb5af09f },
+        { 0xe1134be, 0x167d72c, 0x57c669a, 0xd6d98bf, 0x6dd76fa, 0x40fb716,
+          0x2a41b31, 0xeabbf20, 0x09b75b0, 0x300ff0e, 0xd9a0c1e, 0x32b6fad,
+          0x65a80e0, 0x8051883, 0x32110fe, 0x8bef693 }
+    },
+    {
+        { 0xbef47d4, 0x637802f, 0x2d16eaa, 0xfac114b, 0x0415644, 0x7b3f3ab,
+          0x2dd895b, 0x17ab8d1, 0x87195f3, 0x271b7fe, 0xa71f65f, 0xa3f867e,
+          0xc80583a, 0x39ba40c, 0x56e1fcc, 0x6db0672 },
+        { 0x06662a8, 0x4feab4e, 0xc74bd46, 0xc857415, 0x732b126, 0x18032ed,
+          0x7a099ea, 0x87c8aea, 0x36fe0a8, 0xb4a7535, 0x27673f6, 0x33a98da,
+          0x2b8e549, 0x3e40c02, 0x9a4c587, 0x2def1af }
+    },
+    {
+        { 0xa8c9ad9, 0x9618b68, 0x49defda, 0xd70b4aa, 0x5f788ef, 0xae8b138,
+          0xdd523f4, 0x87c3542, 0x5c5b004, 0xe42c705, 0xfa7df57, 0x6303360,
+          0x5f6d068, 0x33e27a7, 0x8ff331a, 0x9b3268e },
+        { 0x23ee0c3, 0x845cc96, 0xac80084, 0x003af70, 0x530c41d, 0x6a9f931,
+          0xbb127f0, 0xa1d7051, 0xca36245, 0x642ce05, 0x0323ee9, 0xc34205b,
+          0xb7b3513, 0x7cc8912, 0x076cbdb, 0x6252cc8 }
+    },
+    {
+        { 0x7089522, 0x10e68a0, 0x58fc658, 0x36c1361, 0x74723a4, 0x490397d,
+          0x519d56c, 0x42692c0, 0xf1ff235, 0x69d251b, 0xc2cbf37, 0xe689d03,
+          0x825b7f4, 0xf04ceba, 0x2281c2e, 0xd6b9bee },
+        { 0xe0043ab, 0xc52ef3f, 0xd1d1be8, 0x351bf28, 0x0f18a5a, 0x277615f,
+          0x5d6800f, 0x31f717f, 0xab922e2, 0xf5fb82d, 0x2d6ae43, 0x99aee2f,
+          0xc63b982, 0x42477fe, 0xa594a01, 0x904aeb1 }
+    },
+    {
+        { 0xeb39974, 0xaa82174, 0x95e6aa0, 0xbc38e61, 0x25c0675, 0x6a3df8a,
+          0xffbe739, 0xf324203, 0xa3f0649, 0xfa5a0b4, 0x7a7a6b8, 0x79c8732,
+          0x40ad3f5, 0xeb65ecd, 0xe4e45c5, 0x718d416 },
+        { 0xe2326fd, 0x029dbf4, 0xe7942f0, 0x0c63416, 0x6f4e678, 0x6d0c728,
+          0xa138601, 0x59f0b10, 0x8d92ea9, 0x8a1d978, 0xc22eca5, 0x9f8d712,
+          0x7b6b96b, 0x7397044, 0xe6fb955, 0xa2d49ee }
+    },
+    {
+        { 0xbf14a19, 0x249f900, 0x63a8cd2, 0xd3522da, 0x86964d2, 0x28a32f3,
+          0xc1fa743, 0xacf712b, 0x0bb94d3, 0x98a9bfc, 0xbc06824, 0x318ece1,
+          0x4fce7f0, 0xfc47675, 0xe4135b7, 0x19caec9 },
+        { 0xc6817bb, 0x6de68a8, 0xf3b6d89, 0x7121960, 0xf5a818e, 0xa7d4261,
+          0x9157455, 0x0c0ba51, 0x450d5ff, 0x78b6acf, 0x4e8649a, 0x198b493,
+          0xfd05da3, 0x0941a3c, 0xdb55951, 0x264ea4a }
+    },
+    {
+        { 0x46e5a31, 0xcfee91c, 0xfff7366, 0x47b6806, 0x5df849d, 0xdb14be4,
+          0xac66cc7, 0x3c5e22b, 0xa5f4769, 0x7f3f284, 0x383be36, 0x4e00815,
+          0x8072b0b, 0x39a9f0b, 0xc7eadd6, 0x9887cd5 },
+        { 0xb659511, 0x7dd8f05, 0xd2e1cb9, 0x15c796d, 0x0d31345, 0xe5edb0c,
+          0x6939c60, 0x2025df0, 0xbf15de1, 0x6314c08, 0x04c7fb5, 0x03c1548,
+          0xbb5d3ed, 0x413337f, 0x477e983, 0xfc20b40 }
+    },
+    {
+        { 0x5db0ef9, 0x7f96880, 0xe9c2a70, 0x05562de, 0x7dae133, 0x071e5bc,
+          0x237fc4a, 0xa8cdd12, 0x4ea492b, 0x6d565e7, 0x381ee52, 0xa17cf94,
+          0x9f5c546, 0x6ab8a4e, 0x40288ef, 0xbb642f3 },
+        { 0x5df5c2d, 0x64e5921, 0xbb906f4, 0x43696e3, 0x74ae46c, 0x73a841a,
+          0xc506b8a, 0xe264883, 0xa1be548, 0x9542e1a, 0x5e81b4a, 0x8938539,
+          0xeaca6ce, 0x5642cfa, 0x806e0f9, 0xed8077b }
+    },
+},
+{
+    {
+        { 0x7e13597, 0x1c776c4, 0x9e584fd, 0x0ec8b28, 0xb8b61e8, 0x0bb6043,
+          0x9cd835b, 0xdcc1748, 0x39fef9a, 0x493e6ac, 0xd133e17, 0xb44eb34,
+          0x71cb6f9, 0xfebcd00, 0xd20eff2, 0xe6cf543 },
+        { 0x0a004c7, 0xf265cad, 0xd35cc12, 0x9b06c9d, 0xcb4ea53, 0x769f985,
+          0x0993434, 0x29160a2, 0x8d939c4, 0xdf8dd10, 0x6711e2f, 0xefa177c,
+          0xcd7a2cd, 0x1695790, 0x77f6642, 0x38da3d7 }
+    },
+    {
+        { 0x6307b74, 0x9bfcfd9, 0xbfdabc3, 0xc26a36d, 0x4abe28e, 0x9341be0,
+          0x73d1387, 0xdb20b52, 0x3d1949c, 0xf8d229c, 0xb8b3a41, 0xf1e0afe,
+          0xed565d0, 0x29c60df, 0x8b43b2c, 0x6930bb5 },
+        { 0xfc0718f, 0x1d76527, 0x1f67189, 0xdb98143, 0x51f32cc, 0x0c62f64,
+          0x8bd35e5, 0x70a6626, 0xc1cece7, 0x1725641, 0xf96f4a4, 0x7f130a8,
+          0xf06ee98, 0x72319e9, 0x67bf9b2, 0x215b738 }
+    },
+    {
+        { 0x0aaddd7, 0x8d1bec2, 0xb8be4f9, 0xfb8b95b, 0xfde1026, 0xeac193e,
+          0x9d5860c, 0xa5edea7, 0x44280d3, 0x4adbaea, 0x38f4798, 0xce8b670,
+          0xec30dea, 0x914c107, 0x000776b, 0xbdc5cf7 },
+        { 0xa206a13, 0xb6fd7d1, 0xdae986e, 0x9941eba, 0x1f1caaa, 0x76c27a8,
+          0x3f108b4, 0x6967c12, 0x4aea2d0, 0x6f11528, 0x144ddac, 0x9bb4319,
+          0xc8ec6fc, 0x1a4d3ea, 0xbf37420, 0xfe4b0b8 }
+    },
+    {
+        { 0xec0ac6f, 0x5d9a4a1, 0xfc7c80d, 0x84b79f2, 0xc14fac3, 0x64222f7,
+          0xc23b3f2, 0xdd9e039, 0xea956bb, 0x4a84abd, 0xebe09dc, 0x370dcba,
+          0xe0eaf82, 0x79a9ea8, 0xaee375f, 0x4cfb60a },
+        { 0x9106827, 0x6a10dbf, 0x43f305b, 0xa3ba5cf, 0xc1bb083, 0x481b885,
+          0xb3117b1, 0x2f52380, 0xddd6791, 0x0066122, 0x63bace3, 0x4f8923e,
+          0xecb88d4, 0x5c5f499, 0x3bac146, 0xfdc780a }
+    },
+    {
+        { 0x7ba1f71, 0x34b70ae, 0x45bd184, 0x9091829, 0xe707313, 0x3b39778,
+          0x6164e91, 0xdeefc5e, 0x4971f39, 0xbb55bed, 0x8dafc8b, 0x7d52339,
+          0xa6adf0f, 0x82391bf, 0xe319522, 0xfd6f90a },
+        { 0xf29bbc9, 0x60fdf77, 0xaaa4030, 0xeff9ed8, 0xf8c0d3f, 0x978e045,
+          0xeed65cd, 0xe0502c3, 0x3cfd4c8, 0x3104d8f, 0xa639005, 0xab1be44,
+          0x9eeab3f, 0xe83f431, 0x451d797, 0x01970e8 }
+    },
+    {
+        { 0x3180f4b, 0xbc972f8, 0x617779d, 0xac053c0, 0x7fa149f, 0x89392c5,
+          0xbcb6263, 0xdc4699b, 0xce12882, 0x0ae8b28, 0xaf1a4dc, 0xdca19a7,
+          0x64e1a74, 0xd3d719f, 0xaffdd5d, 0xbb50201 },
+        { 0x7ac30e9, 0x56f7310, 0x1878900, 0x65cc9c7, 0x27338a3, 0x83f5866,
+          0xac5bb13, 0x122adef, 0x1bcd4d5, 0x97de200, 0xb8aa3a0, 0x6ed3985,
+          0x6821f9b, 0x8680f1d, 0xdda9f98, 0xcb42028 }
+    },
+    {
+        { 0x0ec2db3, 0xcdb0708, 0x3dad1a1, 0xe28c833, 0xde2da07, 0x2093e32,
+          0x83b8987, 0x7317073, 0xf552b8d, 0xad17871, 0x51cf70a, 0x846da98,
+          0x5c4f5e1, 0xf94a16e, 0x0f8348a, 0x8429996 },
+        { 0x98db78a, 0x4bf3f68, 0x3d19b52, 0xad77fa8, 0x8b972dc, 0x6976772,
+          0x5321be0, 0x7dfa35a, 0xdd344a6, 0x9881846, 0xad4e2a8, 0xe550292,
+          0xbc68bf1, 0x8075217, 0x893be15, 0xdd837c4 }
+    },
+    {
+        { 0xd4fab5b, 0x09c931e, 0xb77a0f1, 0xb2dcf08, 0xe0d38a6, 0x7dac5c0,
+          0x0ae73af, 0xa5570b0, 0xf5aed28, 0xc7c19d3, 0x5251e92, 0x575fa6f,
+          0xcdf7275, 0xb843cd6, 0x9a01287, 0xd9d3d8e },
+        { 0xb3c370b, 0xf94e356, 0xfe464b0, 0xc62b99f, 0xa986057, 0x7792650,
+          0xc4b1874, 0xeaa67d5, 0x0b07078, 0xba1ba4d, 0x7a03699, 0xdbf636d,
+          0xedd32a3, 0x1a16c34, 0xa45cb5d, 0x6ce2495 }
+    },
+},
+{
+    {
+        { 0xa684441, 0xd7c4d9a, 0x30cd42a, 0xce62af6, 0x43014c4, 0xcd2669b,
+          0x6f65b24, 0xce7e711, 0x576fa19, 0x1847ce9, 0x9dd8ca6, 0x82585ac,
+          0xb42e1db, 0x3009096, 0x384ab8b, 0x2b2c83e },
+        { 0xb4e9a6e, 0xe171ffc, 0x7374b40, 0x9de4218, 0xdb1d616, 0x5701f9f,
+          0xa3e8cbc, 0x211e122, 0x1e400bf, 0x04e8c1a, 0x0f37159, 0x0297470,
+          0x3df8c28, 0x41775d1, 0x61ac2db, 0xcfaad4a }
+    },
+    {
+        { 0x7dc0f49, 0x6341b4d, 0xf471a53, 0xaff6c2d, 0xfb8e91e, 0x20ec795,
+          0xc3b7b62, 0x4c7a4df, 0xd374938, 0x9f33ff2, 0x3a60f2e, 0x38f8c65,
+          0x2efef73, 0xc1168ac, 0xce408ee, 0x046146f },
+        { 0x308b0c3, 0x9b39ac0, 0x36b8570, 0xe032d61, 0xfc4aacf, 0xee07d8d,
+          0xd5a41dd, 0x0a82acb, 0x7c3d726, 0xbe0ded2, 0xb926ce9, 0xce51d60,
+          0x5806c1e, 0xfa2f7f4, 0x1dec59c, 0xe367c6d }
+    },
+    {
+        { 0xda2547b, 0x64511b6, 0x0761405, 0x76a349c, 0x01223ab, 0x37d6626,
+          0xf4d7c48, 0x0e243c1, 0xda756a0, 0xdc9c8b4, 0xd72e7e9, 0xc7430df,
+          0x27b4210, 0x0eb1308, 0xcf11cbd, 0x7a9c044 },
+        { 0xe8dd150, 0x2c08ff6, 0x2932fc6, 0x18b738c, 0x04513e8, 0x07d5651,
+          0xaa40a17, 0x0ca5cff, 0x01baa8f, 0xd486341, 0xb72b79e, 0xfb20faf,
+          0x654020f, 0x1a051e5, 0x4e17f23, 0xe3b3317 }
+    },
+    {
+        { 0x4de9428, 0x0591048, 0x5abdf97, 0x620542a, 0xa16a4d1, 0xaa0eded,
+          0x6d65bb9, 0xa93f71c, 0xb8dfaf9, 0x88be135, 0x57ca8ee, 0x1d9f4e5,
+          0x26781ad, 0x4c896aa, 0x6c6c49f, 0xd3fbe31 },
+        { 0x2c34c3d, 0x088d852, 0xbadff1e, 0xbb6d645, 0x385450d, 0xe3080b8,
+          0x50ab1f3, 0x5ccc54c, 0xac0657d, 0x4e07e6e, 0xb7ef2c0, 0xa7ba596,
+          0x73a81e9, 0xcceca8a, 0x8284c35, 0xa0b804c }
+    },
+    {
+        { 0xf17a6a2, 0x7c55956, 0x789cfa8, 0xb451d81, 0x2506eaa, 0xdf414e8,
+          0xae96562, 0x6ef40fb, 0x0e0297e, 0x63ea283, 0x73c46fa, 0xf5df26e,
+          0xaac8bce, 0xe00641c, 0x64371f3, 0xc89ed8f },
+        { 0x793202e, 0xd22b08e, 0x875cb50, 0x39a9033, 0xf85ddb4, 0xe64eec0,
+          0x7acf7b5, 0xdce45a7, 0xb9b802d, 0x39d1e71, 0xbd559ac, 0xafdfe7c,
+          0x809eeb5, 0x17ec1f8, 0x4889b8c, 0x8c0e38a }
+    },
+    {
+        { 0x17089da, 0x47eabfe, 0xec90c50, 0x2d18466, 0x5861531, 0xa511aa4,
+          0x8c39b39, 0xebb3d34, 0xf1b5282, 0xa0ac4da, 0xa9dadba, 0xea26be7,
+          0x554d86e, 0x8992ba8, 0xd5f2ef5, 0x7fcbdb6 },
+        { 0x56863e7, 0x320e79b, 0xa7dce2d, 0xeb9d0c0, 0x784cbc6, 0xb9f4031,
+          0x7ac1f81, 0x68823ee, 0x9d87497, 0xa6b6f4f, 0x57f9b6e, 0x83c67b6,
+          0x0fef2a7, 0x3735747, 0x59596e2, 0xf38028f }
+    },
+    {
+        { 0x7e82886, 0x9ea57ab, 0x48c44d5, 0x18221c5, 0x314a24f, 0xbf8e6cf,
+          0xfd025e5, 0x70ff18e, 0x5334468, 0x08d03de, 0x7404fb7, 0x2b206d5,
+          0x55e36b0, 0xb923271, 0xb88ddd9, 0xcc7604a },
+        { 0x4a746f0, 0x3df5152, 0x168e3fc, 0x8fdebd8, 0x7f8c32c, 0xffc550c,
+          0x148743e, 0x1dbbc17, 0xb88e18b, 0xd48af29, 0x750027c, 0x8dca11c,
+          0x1832be3, 0x717f9db, 0x2b06019, 0x22923e0 }
+    },
+    {
+        { 0xc1cc4d3, 0xd4e06f5, 0x2b4f03a, 0x0fa32e3, 0xc4628d0, 0x956b9af,
+          0x939dad1, 0x95c39ce, 0x8a00416, 0x39d41e0, 0x6fb01aa, 0xfd7ff26,
+          0x45af340, 0xc6033d5, 0x8e36584, 0x2f65542 },
+        { 0x8dff960, 0x14cfb1f, 0xda81474, 0x7236ffc, 0xd452d0f, 0xc6a6788,
+          0x77f6094, 0x2ad4a52, 0x07eea74, 0x369d65a, 0xd6229aa, 0x27c6c38,
+          0x8863976, 0xe590e09, 0xb38b142, 0x361ca6e }
+    },
+},
+{
+    {
+        { 0xdfeb7ef, 0x6803413, 0xd3f4fad, 0xb669d71, 0xc941606, 0x5df402a,
+          0x8e6c5b7, 0xe5d1776, 0x92ab236, 0x131bcb3, 0xce2e0e0, 0x7f1fb31,
+          0x9e98c35, 0xa2c020d, 0xf28657b, 0x33b23c0 },
+        { 0x9cf7879, 0xed14e73, 0xb4357b3, 0x10d4867, 0x31e4e04, 0x127cea3,
+          0xaa5f8a7, 0xc60d25f, 0x025b987, 0xfef840a, 0x66f2a0a, 0x78081d6,
+          0xac36198, 0x0fa0b97, 0x134dc9f, 0xe0bb919 }
+    },
+    {
+        { 0xcc32eae, 0xc1d2461, 0x0f79a37, 0x0fdbfdf, 0x1c95f02, 0x70f2bc2,
+          0x372cddf, 0x7d68bec, 0x8439342, 0x44f7817, 0x4843a6c, 0xa3d5678,
+          0x07f8959, 0xbadf77a, 0x73db4ca, 0xf458198 },
+        { 0xd54f805, 0xe8eaaf3, 0xb84c1e7, 0x2f529d1, 0x21e535c, 0x404e32e,
+          0x159b5f5, 0xabac85c, 0xb00466f, 0x4e8e594, 0xc941873, 0x40fcaab,
+          0xbe407c6, 0x3b4e370, 0x5b2e58d, 0xccd5788 }
+    },
+    {
+        { 0x88b74a8, 0x3ee615e, 0xeab4e69, 0xd7d6608, 0xe4ace36, 0x27cf9f1,
+          0x7aebabb, 0x282359e, 0xf6d162f, 0x96e509b, 0xf1a290a, 0xad906f3,
+          0x1314a58, 0xe7d6c4f, 0x218431d, 0xeecffe4 },
+        { 0xe2cfed9, 0xa66e0e9, 0x71f0544, 0xb0887ec, 0xa04c5d7, 0xd34e36b,
+          0xed4392d, 0x094daa5, 0xc8aa925, 0xcda83ad, 0xb979786, 0x1adef91,
+          0xfddc5d6, 0x3124dcb, 0x0b70c14, 0x5cc27ed }
+    },
+    {
+        { 0x0eac2d8, 0x386dbc0, 0xc50ca30, 0xa716ecb, 0x80d9f04, 0x9e3fc05,
+          0xcfeaceb, 0x37dde44, 0xa3522d5, 0xd88d74d, 0x2cf239a, 0x6bb9e9f,
+          0xa7cbfec, 0x9e7fb49, 0x0a5c0ef, 0xe1a75f0 },
+        { 0xfb9229d, 0x6e434e7, 0xc8a79b3, 0x0ec6df5, 0xd3fb311, 0x7046380,
+          0x52e20fa, 0xe957ef0, 0x9ef4614, 0x0f4fe9a, 0x54d8f2b, 0x1b37d9c,
+          0x39d84a2, 0x23b2dc1, 0x724e713, 0xf62c4f6 }
+    },
+    {
+        { 0x747e219, 0xbd6922c, 0x3869b7b, 0x34d1438, 0x96f2272, 0x8c875a5,
+          0x3fe361e, 0xd9602c0, 0x744839f, 0x081348f, 0x61ac1f1, 0x61bd16c,
+          0xd8da4e1, 0x993b727, 0x7741271, 0xbb40ba8 },
+        { 0x81dcfff, 0xe6dcc98, 0x93ce616, 0x9f513f5, 0x618cd8f, 0xdc09683,
+          0x26639be, 0xc3b1d10, 0xc762ee2, 0xe8f149f, 0xb244aae, 0x59f26ef,
+          0x693dd96, 0x3f2de27, 0x9c3a7de, 0xd8b68f7 }
+    },
+    {
+        { 0x970bd5b, 0x6fa20b9, 0x75f6179, 0x87242d7, 0x72d9308, 0xa95a6c6,
+          0x37a8a58, 0x6eb2518, 0xc59562c, 0xfdea12a, 0x20f1fc3, 0x4419c1e,
+          0x9d66788, 0x0c1bd99, 0x32c0547, 0x4b74288 },
+        { 0xdf479ab, 0x4f38acc, 0xc52a942, 0x01f6271, 0x02ca9a7, 0xe3298f4,
+          0xb718fc8, 0x533daca, 0xb093ca8, 0x133602a, 0x8f98104, 0xc04da80,
+          0xaf08620, 0xd0f2e23, 0x178b164, 0x882c817 }
+    },
+    {
+        { 0xec30a71, 0x28e6678, 0xf78aca1, 0xe646879, 0x88fa078, 0x868a64b,
+          0xfee3433, 0x671030a, 0x87c0211, 0xb2a06bb, 0x46c406a, 0x202eca9,
+          0xe4f0f59, 0x64d6284, 0x3c9f907, 0x56ae4a2 },
+        { 0x1dcc100, 0x5abbb56, 0x07c7784, 0x6fef6cf, 0xdb7302d, 0xb6e25cd,
+          0x42980e8, 0xa26785b, 0xfb96801, 0xe7d4043, 0x8e4282b, 0x46df55d,
+          0xc602d6e, 0x9c0a5f5, 0x75dfe29, 0xf065604 }
+    },
+    {
+        { 0x3dcbc90, 0x0e82a1a, 0x656feac, 0xb1ee285, 0x0d3d3b2, 0xfa4353b,
+          0xdd5c5df, 0xc2e7a6e, 0x416ce53, 0x13707e1, 0x87ebc07, 0xc84ce07,
+          0x8a9a834, 0xdd273ce, 0x5e8e1e7, 0x432a617 },
+        { 0xbd0064a, 0xa359670, 0x6534516, 0xc899dd5, 0xdb27169, 0x666560e,
+          0xa19a068, 0x1537b22, 0xeac7527, 0x3420507, 0x6fc13a7, 0x479f25e,
+          0x1bc19b3, 0xc847acc, 0x0b20d45, 0xecdecf0 }
+    },
+},
+{
+    {
+        { 0x4acea57, 0x6f24100, 0xda68597, 0xdace1c6, 0x50ce77f, 0xea7dd41,
+          0x1585884, 0x1aecb84, 0xea4a85c, 0x92ff208, 0x88eebd2, 0xde9433c,
+          0x3f4d289, 0x53cd318, 0x26539af, 0x3970858 },
+        { 0xb827d87, 0x4b57599, 0x3d77638, 0xdc82ac0, 0x52f6e61, 0x6943366,
+          0xad5e8a6, 0xb8fc4b0, 0xf388642, 0x1b6f7dc, 0xa74dd57, 0x6f24533,
+          0x41750cf, 0xc669378, 0x28a37af, 0x06757eb }
+    },
+    {
+        { 0xc133995, 0x0e70d53, 0x7c8c97d, 0x88a5e0c, 0x85f3be3, 0x4e59dbf,
+          0x0e92698, 0x0f364ac, 0xef6940f, 0x3a1e79b, 0xd85d23a, 0xc8a3941,
+          0x9a00e58, 0x143bb99, 0xc6f2f10, 0x61cf7d6 },
+        { 0x85150fe, 0x979c994, 0x59d773f, 0xcfd0df2, 0xaab7bcd, 0xce97b9d,
+          0x6afd8fc, 0xc9fff8e, 0x89a4628, 0x246befd, 0x1567090, 0xf630282,
+          0x6749c58, 0x1539342, 0xa0f3fd3, 0xff47d0e }
+    },
+    {
+        { 0x35f6706, 0x09b0bfd, 0x2c82e69, 0x7464581, 0x50d5fe9, 0xb60729f,
+          0x95c74f1, 0xf133245, 0xbb76c89, 0x33647e3, 0x5a9afcc, 0x0126404,
+          0x0f154ab, 0x46d57ee, 0x25680a4, 0x2efa555 },
+        { 0x5329d90, 0x12ebfc6, 0x79800af, 0xcb37ae5, 0x6f8e310, 0x5bb5349,
+          0xf1bb936, 0x9b59c63, 0xf4610e9, 0x5b49baa, 0x4f2d6ac, 0x2bbeeef,
+          0x0badc67, 0x87ee21e, 0xf1ddfa0, 0x12e2aad }
+    },
+    {
+        { 0xa9109ee, 0x5b4668f, 0x8a6cea2, 0xfa95133, 0x4068e16, 0xe45e6fc,
+          0x0205ed8, 0x8ae9a0c, 0x679b79b, 0x2993b96, 0xed604d3, 0xc6b878f,
+          0x32c77f3, 0x01d0208, 0x495a1ab, 0xd45d890 },
+        { 0x29d2030, 0x99348fa, 0x61f8f7a, 0x961f9a6, 0x674f74b, 0xfd53212,
+          0xb3e72bc, 0x45cee23, 0xb77e2d5, 0x3fccb86, 0x4219cb7, 0xdff0310,
+          0xc056871, 0x233771d, 0x7d2c521, 0x1214e32 }
+    },
+    {
+        { 0xff2a8e1, 0x9f51e15, 0x138bc70, 0x86571c5, 0x0c09d46, 0xbfc4caf,
+          0xc2a0c18, 0x65e33fe, 0x426867d, 0x8214392, 0x80ae4ed, 0x51ce6c0,
+          0xb110de6, 0x6cbe8d7, 0xfd22ea4, 0x7f6e947 },
+        { 0xcadefc4, 0x7373a75, 0xb0c682f, 0x6fca1d2, 0xf3c7c1e, 0xcd2140d,
+          0x558b7a5, 0x8653a37, 0x55eb321, 0x653e74e, 0xc31af73, 0xbe0c6b3,
+          0xf4fc365, 0x3376379, 0x71add4d, 0x3570b37 }
+    },
+    {
+        { 0x83c3494, 0x9061ec1, 0x677bc95, 0xaf2f28d, 0x3bf8768, 0x6fe7279,
+          0x0fa86d8, 0xc5f50e3, 0xa3293ce, 0x6c03060, 0xe2355a6, 0x4d53357,
+          0xe4df931, 0x43a59ea, 0x13b79c6, 0x6f48f5d },
+        { 0xddc5192, 0xa4d073d, 0xa65773f, 0x6d0e318, 0x765de9e, 0x1008792,
+          0x39a0375, 0xa724ed2, 0x97d7c9e, 0x510ff14, 0x5baa863, 0x251f622,
+          0x648a351, 0x86464fe, 0xd50fd91, 0xf85e98f }
+    },
+    {
+        { 0x86ee987, 0x29c9634, 0x10dcc9f, 0x93e8e52, 0xc910b1f, 0xa1fc4d1,
+          0xfeb603e, 0x015acac, 0x0844a5f, 0xc9f25f8, 0x73f4dac, 0x50de93c,
+          0x310a4aa, 0x1758783, 0x358f106, 0x544d570 },
+        { 0x1dc68ca, 0x4eeec7b, 0xe00fbcb, 0x6238e6f, 0xb4e83c9, 0x34d394c,
+          0x2292656, 0x764ffa2, 0xf641f2e, 0x5614cd1, 0x9e07234, 0x4252eb6,
+          0x68d2ba4, 0xcbaef45, 0x8a98b17, 0x8c9c550 }
+    },
+    {
+        { 0x4106140, 0xf235d9d, 0x9eb601e, 0x1bf2fc3, 0x375e0c3, 0x6fb6ca9,
+          0xc0024d2, 0x4bf5492, 0xeb54cc6, 0x3d97093, 0x5c90cb5, 0xc60931f,
+          0xfbe0f1a, 0xfa88808, 0xd33e7d4, 0xc22b83d },
+        { 0xc0abbf5, 0x9cfec53, 0x93723df, 0x52c3f0a, 0x39b96b6, 0x0622b7e,
+          0x1667270, 0x300de28, 0x9ef426a, 0x50b66c7, 0xc6eb295, 0x8849189,
+          0x8914a7e, 0xeaec3a9, 0xc4c99e0, 0x7ed56b0 }
+    },
+},
+{
+    {
+        { 0x687e557, 0x7926403, 0x5310017, 0xa349816, 0xd43a8fd, 0x1b06e91,
+          0x6ac23cb, 0xf201db4, 0x4f48750, 0x6f172ad, 0xe74bd3e, 0x5ed8c8c,
+          0xdaba648, 0x492a654, 0xa9b64ff, 0x123010b },
+        { 0x6e89f93, 0xa83125b, 0x398378a, 0x3a3b0b0, 0x0aebe7c, 0x9622e0b,
+          0x49512a4, 0xb9cbfdc, 0x6aaf12a, 0x13edffd, 0x9f5eafd, 0x555dff5,
+          0x1212efa, 0x3cba6fe, 0xd9bb0f8, 0xd07b744 }
+    },
+    {
+        { 0x9a48920, 0x45732b0, 0x13ff36d, 0xf3080fc, 0xde8f950, 0x9347395,
+          0x382b897, 0x14d025a, 0x04d72ad, 0x60c5a74, 0x11a9c71, 0x30be7e5,
+          0x31ac33a, 0x43ffabd, 0x35cbb14, 0x97b06f3 },
+        { 0x7740de9, 0xe4ff5c5, 0xaacf81e, 0x5fed090, 0xe8b7c9d, 0x97196ee,
+          0x045910b, 0x316dcd1, 0x5ad8c63, 0x7a2b2f5, 0xc5b03bb, 0x674fffd,
+          0xe65953c, 0xc1cd133, 0x0a83556, 0x3c06052 }
+    },
+    {
+        { 0x091c23d, 0x797c3f6, 0x39c9c05, 0x2ea2de3, 0xa31f67c, 0x5d958b4,
+          0xd5f088c, 0xf97afe5, 0x0b37243, 0xbcfbd2a, 0xeca630c, 0xc43ad3e,
+          0x42845e0, 0xb92a337, 0xa9a0f16, 0x970bff7 },
+        { 0x5970a79, 0x8635511, 0xf205928, 0xcee332e, 0xc04c208, 0x2c58d70,
+          0x3f5e5bf, 0xdbfe19a, 0x8e51c56, 0x8f8f2c8, 0x8e2da75, 0xb61f58e,
+          0x624d93f, 0x4046a19, 0xe1f9538, 0x7de64db }
+    },
+    {
+        { 0xc2d850e, 0xd018e1c, 0x63a723c, 0x8cdb643, 0x90a42af, 0x9a65abe,
+          0x16f20cc, 0xfeece96, 0xd5cff56, 0xc906800, 0x3f0deed, 0x0acf23a,
+          0x728dd3a, 0x2143061, 0xb8ce34c, 0x66276e2 },
+        { 0x73cc9c7, 0x23700dc, 0x5b1778b, 0xdb44851, 0x4aab669, 0x330f41e,
+          0xf5282a4, 0x2f5aabc, 0x30f9e01, 0xff837a9, 0x901cc98, 0x1a1eb2f,
+          0xe69bd7f, 0xd3f4ed9, 0x8a72a7d, 0xa6b1141 }
+    },
+    {
+        { 0x9ea3b43, 0x34bde80, 0x5ced6ae, 0x5ddcb70, 0x95a6cb8, 0x8257f5b,
+          0xc77dcb8, 0xaac205d, 0x035b397, 0x77d740d, 0xcf7e0a6, 0xca7847f,
+          0x085601b, 0x9404dd6, 0x457e4f9, 0x0a5046c },
+        { 0xbc11470, 0xcaee868, 0x005c5f6, 0xb118796, 0xec79173, 0xcc04976,
+          0x21f6827, 0x7f51ba7, 0x486ff7e, 0xa8e3f0c, 0xf87838c, 0x327163a,
+          0x6d039fd, 0xcf2883e, 0xdb8b0e2, 0x6fb7ab6 }
+    },
+    {
+        { 0x620d669, 0x8ca5bac, 0xed7caa9, 0xff707c8, 0x927909b, 0xdaefa2b,
+          0x7029da3, 0x1d2f955, 0x6d131a0, 0x52a3ba4, 0x3ab1041, 0xe5a94fd,
+          0x99bc0ae, 0x5089177, 0xfa1bd16, 0xf750354 },
+        { 0x6cd31fd, 0xdd4e83a, 0x92fac84, 0xd335053, 0x1691382, 0xf914cbc,
+          0xda6ade6, 0x669683f, 0x8878513, 0x6944643, 0x4b1a72d, 0x429d3cc,
+          0x61eec36, 0x655c46a, 0x4bc4970, 0x881eded }
+    },
+    {
+        { 0x7ca647f, 0x5b39d37, 0xe917b34, 0x41533c1, 0x7daf734, 0xea2aeb5,
+          0x1286560, 0xf1ef1eb, 0x08e0473, 0x582f2e0, 0x5edc74a, 0x5913d7d,
+          0x3c1e754, 0x588c7ec, 0x7146fe1, 0xbd6db05 },
+        { 0x7634907, 0x3b0d49e, 0xe43b9cc, 0x4c65ce4, 0x2d92d5b, 0xb87e958,
+          0x7ab1519, 0x0513572, 0x8c3aed0, 0x03ec084, 0x561a641, 0x4d7aa21,
+          0x99e92ad, 0xe5f8211, 0x48a457c, 0x379b55f }
+    },
+    {
+        { 0xd6a8442, 0x8317c34, 0xae499da, 0xb0ab4a5, 0x720e8eb, 0xebcb16e,
+          0x9a96908, 0xfd5c563, 0xad23acf, 0xcab4d67, 0xbcdf748, 0xa600a79,
+          0xa2a6a51, 0x18a6340, 0x3aabd69, 0xf2f415c },
+        { 0x747258a, 0xdb38a4f, 0x2e24415, 0xb6ea560, 0xf1f7655, 0xfad1ea9,
+          0xc957684, 0x4e27eb5, 0xb2e1cfc, 0xf8283e1, 0xaa6291c, 0x8f83bd6,
+          0x5619e84, 0x28d23b5, 0x93770a4, 0xb9f34e8 }
+    },
+},
+{
+    {
+        { 0x7515fb1, 0x1bb8437, 0x7b860a6, 0xac73f2a, 0x22b390f, 0x78afdfa,
+          0x66048aa, 0x815502b, 0x85bf620, 0xf513b97, 0x3fc5d7c, 0x2524e65,
+          0x178c969, 0xa10adc0, 0x5391c8d, 0xa1d5396 },
+        { 0xa8bcc45, 0x09fccc5, 0x7710e1e, 0xa1f97d6, 0x897d0a1, 0xd694442,
+          0x5f42400, 0x7030beb, 0x7127908, 0xdebe08c, 0x2187637, 0x96b715c,
+          0xb528129, 0xc598250, 0xa1ccb07, 0x0f62f45 }
+    },
+    {
+        { 0xb765479, 0x8404941, 0x5837dc4, 0xfdecff4, 0xadbd465, 0x1796372,
+          0x3159806, 0x5f84c79, 0x6aaad34, 0x6d2e46b, 0x384b375, 0xd303b4a,
+          0xb392002, 0x440acd5, 0xc475e87, 0x4f2a4a7 },
+        { 0x5606fc2, 0x038e1da, 0x9c2f050, 0x2d821c2, 0xf139db4, 0xc074cb3,
+          0x4ec59be, 0xde2fee7, 0xa84ed59, 0x5a819ee, 0x3e98711, 0xd65c62c,
+          0xb9723c1, 0x72eb440, 0x01be611, 0xb927754 }
+    },
+    {
+        { 0xab9e9fc, 0x929fe64, 0x0bf1e85, 0x04379fd, 0xbc28ee3, 0xb322093,
+          0xe4555e1, 0x78ac4e2, 0xabc5588, 0xdb42b58, 0x77c8b12, 0x1c1b5e1,
+          0x40366c4, 0xf6d78dd, 0xbdae22e, 0xc21ff75 },
+        { 0xa211df2, 0x1e3d28e, 0x3617c0a, 0xc5a65a1, 0x58140d5, 0x3fa02c0,
+          0xb62d10c, 0x155c346, 0xe48268f, 0xc9cf142, 0x1993bc3, 0xdc14083,
+          0x0ee69dc, 0x07c44d4, 0x5e2ac46, 0x6169950 }
+    },
+    {
+        { 0xd0fb585, 0x44e4a51, 0xf1f3ce8, 0x00846be, 0x8e2de1e, 0xedef39a,
+          0x33b3934, 0x430afe3, 0x4337188, 0xac78b05, 0xc9a3f24, 0x0f39de4,
+          0xc9ae6a4, 0x039eddd, 0x8eacd51, 0xf470157 },
+        { 0x9a2f31a, 0x1e39694, 0xb19a8b1, 0xc8a40f4, 0x9d239d8, 0xdddd10c,
+          0x887e066, 0xf974245, 0x3ea28c6, 0xfdb5111, 0xe1122a9, 0xb5af0fb,
+          0x36e0267, 0xd30c89f, 0x74f024c, 0x7b1c0f7 }
+    },
+    {
+        { 0x07a39bf, 0x1ec9956, 0x3a68d15, 0x1c3ecf2, 0x4f59fe9, 0xd8a5c4e,
+          0x271abc3, 0xacb2032, 0x71ef239, 0xbc6bdf0, 0xb39b391, 0x660d7ab,
+          0xb627a0e, 0x2e73bb2, 0x248fc7e, 0x3464d7e },
+        { 0x1666760, 0xaa49249, 0x8582659, 0xa257b6a, 0x5593089, 0xf572cef,
+          0x73ca6bf, 0x2f51bde, 0x764cff5, 0x234b63f, 0xd411a35, 0x29f48ea,
+          0xafe1db1, 0xd837840, 0xd9f4c4b, 0x58ec0b1 }
+    },
+    {
+        { 0x5e6f3dc, 0x8e1deba, 0x06a5ff7, 0xc636cf4, 0xc80ca0f, 0xe172b06,
+          0x5ffb90a, 0x56dc098, 0x9a05e83, 0x895c218, 0x7561ac2, 0x6ddfaec,
+          0x96283a0, 0xaa35749, 0x7e7cd43, 0x6dfb262 },
+        { 0x2c8ca27, 0x6576de5, 0x49018eb, 0x6a4a872, 0x5c34342, 0x00c275c,
+          0xd2d90c4, 0xe34805a, 0xd8743c4, 0x651b161, 0x7312bf3, 0xb3b9d9b,
+          0x0bf7e00, 0x5d4b8e2, 0x78d3d7e, 0x8899bdf }
+    },
+    {
+        { 0xfaa9cd1, 0x9644ad8, 0x6e0e58e, 0x34c98bf, 0x404c637, 0x6022aad,
+          0x7ac013b, 0x2a11a73, 0x5540899, 0x5bdd103, 0x1e022a4, 0x2e67572,
+          0xb834c33, 0xe32045d, 0x2f2d01c, 0x74a260c },
+        { 0xc48841c, 0x20d59e9, 0xe560359, 0x05045dd, 0xac998ac, 0xeba779c,
+          0x00a6218, 0x5bed10c, 0x5327ef4, 0x25d4f8e, 0x4597794, 0xa278474,
+          0x831d11e, 0xefd68ca, 0x934446a, 0x9ad370d }
+    },
+    {
+        { 0x73c92ac, 0x3089b3e, 0x957a75c, 0x0ff3f27, 0xd676f50, 0x843d3d9,
+          0xd496d43, 0xe547a19, 0x8e924a4, 0x68911c9, 0x85b5522, 0xfab38f8,
+          0x83e0ac5, 0x1048811, 0xdc788c4, 0xcaccea9 },
+        { 0xe3c6aad, 0xfbe2e95, 0xb3a6cf1, 0xa7b3992, 0x87d78b1, 0x5302ec5,
+          0x1826100, 0xf589a0e, 0x8610632, 0x2acdb97, 0x9232b26, 0x1e4ea8f,
+          0x9c09a15, 0xb21194e, 0x849b909, 0xab13645 }
+    },
+},
+{
+    {
+        { 0xf3a71c1, 0x92e5d6d, 0x297d661, 0x349ed29, 0x1713fc9, 0xe58bd52,
+          0xb9ddfb5, 0xad999a7, 0x3c28ce0, 0x271c30f, 0x2a9d460, 0xf6cd7dc,
+          0x207dec7, 0xaf728e9, 0xfcb8bf0, 0x9c2a532 },
+        { 0x68bf486, 0xd702184, 0x7ab8ea8, 0x73b45be, 0x1795c93, 0xddfc658,
+          0x6bb8da2, 0x7941660, 0x88e07a2, 0x658f197, 0x26d3d12, 0xa9d5b08,
+          0x9535b52, 0x4d7c95f, 0x268ef8a, 0xad55e25 }
+    },
+    {
+        { 0xa2bc326, 0x94a9b0b, 0x167e5f3, 0x485ecc5, 0xc97fc74, 0x8340bc7,
+          0x07aaa5c, 0x06f882b, 0x849698a, 0x4b57455, 0xb36a0ba, 0xd9281eb,
+          0x8b8108f, 0x8918c6c, 0x5b50d1d, 0xedd1eea },
+        { 0x2a25f50, 0x94d737d, 0x2446ad0, 0x0e5a823, 0x7ced3e2, 0x02a5435,
+          0x4af8ced, 0xb09a92a, 0xeeecef2, 0x85fc498, 0xe71e3d4, 0x06a02b9,
+          0x84bb49a, 0x00ad307, 0x64a5b4a, 0xf61585e }
+    },
+    {
+        { 0xb86a4c9, 0x915f6d8, 0xa861e1f, 0x944bc6b, 0x54465ef, 0x3091ca7,
+          0xeb53a38, 0x11df859, 0x0144679, 0xd44dde5, 0x0994edd, 0x6c8da9a,
+          0x91241ef, 0xeebcebf, 0xc2f6859, 0xc419354 },
+        { 0x49581b6, 0x1f49693, 0xbb26cb4, 0x5712b10, 0xb09fd59, 0x8fcaa41,
+          0x72e22e3, 0xbd39aad, 0xb1199b0, 0xf70e794, 0xc6f863d, 0xdf63c0c,
+          0xee9df4f, 0xd58166f, 0xc45e70b, 0xb9224ea }
+    },
+    {
+        { 0xce525f4, 0x80072fa, 0x66a5502, 0x8597bd6, 0xdbc9725, 0xf65e203,
+          0xf2222a4, 0xeccfbe3, 0x2339834, 0x490aa42, 0x62489e8, 0x1348891,
+          0xa735084, 0xaff3f80, 0xf3f1bd6, 0x69d53d2 },
+        { 0x813341a, 0xb123ffc, 0x1173848, 0x359084c, 0xd29b08d, 0x751425e,
+          0x3890ad4, 0x1edda52, 0x607cf20, 0xb64974c, 0xb42ac7c, 0xa8c8cb8,
+          0xedd42e5, 0xd5cb305, 0x44c090a, 0xf3034dc }
+    },
+    {
+        { 0xbb18e19, 0x428921d, 0xfed2127, 0x4cfd680, 0x92ac8c3, 0x671144d,
+          0x132c894, 0x2121901, 0x7604cd9, 0x25d0e56, 0xafbc2a0, 0xa372223,
+          0x56c16f7, 0xcf98a52, 0xb5459e1, 0x71f129a },
+        { 0xb668b2e, 0xf4afdc5, 0x0c2d410, 0xc5d937a, 0x285d54a, 0xe2cc4af,
+          0x8c53e18, 0x1c82777, 0x69a92f6, 0x270f2c3, 0x616327a, 0x799f9ac,
+          0xd4246f2, 0xce658d9, 0xfb12e36, 0x0fb681f }
+    },
+    {
+        { 0xe0690fe, 0xc5ab11e, 0x3f74249, 0x80261e3, 0x58c1cf2, 0x8eb4b47,
+          0x184ae9b, 0x4895a80, 0xd3e27eb, 0x4a4bdb6, 0xbfd251c, 0xa7a1638,
+          0x417a7e3, 0x29ec144, 0x3f1b960, 0xd073609 },
+        { 0x49c73d1, 0xcb1ed83, 0x8d1945a, 0x33fc84a, 0xe965118, 0x9f668db,
+          0xa82811f, 0x3331743, 0x28ba540, 0xf394dec, 0x654a454, 0x44ce601,
+          0x3623645, 0x240dbb6, 0x2e61048, 0xf07e7f2 }
+    },
+    {
+        { 0x3d45213, 0x7c9f176, 0x9c1f77f, 0x3eefa70, 0x1b48350, 0xde3c3c5,
+          0x9d481a7, 0x4a2bc64, 0x7874f3d, 0xfd4a58a, 0x037b302, 0x96655d4,
+          0x68bf5ab, 0x9452528, 0x75177f6, 0x1b6d46a },
+        { 0xefb8d00, 0x7de6763, 0xa741b7b, 0xb2c1ba7, 0x7bae6ed, 0xcca6af4,
+          0x5b68b3f, 0xe4378ca, 0xaf71948, 0xfb757de, 0xbc6ac99, 0x7f07b5e,
+          0x27d636d, 0x752a568, 0x4b8a34f, 0xc8b7d1d }
+    },
+    {
+        { 0x325331b, 0x76cb78e, 0xadd2eed, 0x41f41c9, 0x5c5f623, 0x03db238,
+          0x7102fa2, 0xbbc1d17, 0x60182ec, 0x80f137a, 0x55adf15, 0xfdd8569,
+          0xe3373dc, 0x4f53f5e, 0x21b669b, 0xec6faf0 },
+        { 0x0b86081, 0x7d4e983, 0xf2d979c, 0x10d3cd9, 0x24a22c8, 0x0f48f58,
+          0x02f99ee, 0x86c540c, 0x5e6c5fc, 0xf4c6654, 0xbc404c8, 0xaf0c588,
+          0x423118a, 0x2e6edbd, 0x0690eab, 0x86e32e9 }
+    },
+},
+{
+    {
+        { 0xdfbfa6f, 0x1d12656, 0x7646018, 0xa498095, 0xc3597d0, 0x2f1071b,
+          0x1dda80a, 0x3df83f9, 0xf3ae449, 0x5853e28, 0x9e19aad, 0xb853d31,
+          0xa0d8a46, 0x863f01b, 0x2fef108, 0xa84fca6 },
+        { 0xfb84de9, 0xbe4c0b7, 0xc0727bf, 0x40a03dc, 0xb18575c, 0x781f841,
+          0x466cddb, 0x6a63045, 0x05dc7a2, 0x6be7582, 0x07ae811, 0x420f87f,
+          0x3bf96c8, 0x2808242, 0x51c6821, 0x723998c }
+    },
+    {
+        { 0x81f5863, 0x38ab641, 0x05ff9e1, 0xd82ecbd, 0xa065856, 0x339c94e,
+          0xa45156d, 0x143054a, 0x065628c, 0xe6d64bf, 0xa938589, 0xe530086,
+          0x385d79b, 0x22d3a49, 0x0ab8245, 0x0b10790 },
+        { 0xca387b5, 0xb0d80fb, 0x35551d7, 0x698206e, 0xa10bb73, 0x199685d,
+          0x9107378, 0xa8e5fa8, 0xd99dbbf, 0x36e5724, 0xd581b03, 0xd67f476,
+          0x88dd1e6, 0x7a15be7, 0xe5baa31, 0x8dac8e4 }
+    },
+    {
+        { 0xe170ef8, 0x4d5d88f, 0x1e9e600, 0xb6ba5de, 0xedeabc5, 0x4a89d41,
+          0x8fac936, 0x737c66b, 0x65c3125, 0x8d05b23, 0xb61b68e, 0x85a5cbc,
+          0x20a6af9, 0x8fea626, 0xd8b50ec, 0x85115de },
+        { 0x6a6f30b, 0x5430c8d, 0x8474295, 0x8bef9cf, 0xbe77f38, 0x0648f5b,
+          0x9e47bd7, 0xfe2b72f, 0x93106e2, 0xad6c5da, 0xfa7a6c3, 0x4fa6f3d,
+          0xb396650, 0xdcd2ed8, 0x1157ef9, 0x7de1cce }
+    },
+    {
+        { 0x1f241d1, 0x70a5f6c, 0x798cd5c, 0x6c354d8, 0x1a729fb, 0x23c7838,
+          0x523cbda, 0xcff8f15, 0x3493697, 0x5683ff4, 0x7534f53, 0xef7dbab,
+          0x2243d53, 0xd7bd08e, 0xf8072a9, 0x6f644cb },
+        { 0xb22db63, 0xac960f9, 0x23af04d, 0xa97f417, 0xd9798af, 0x692b652,
+          0xfedb156, 0x0e35967, 0xdfe6ee8, 0x14b5e50, 0xb411070, 0x7597ede,
+          0x442b3f9, 0x116f3ce, 0x1b2b6db, 0xe9b5ae8 }
+    },
+    {
+        { 0x2315930, 0xf4385ee, 0x27a8740, 0xc8d0298, 0xd934a43, 0x7907a8d,
+          0xc582191, 0x20bc946, 0x6a405e7, 0xa4acb3e, 0x43df2f5, 0x8c1d6c8,
+          0x991f0b5, 0x9df1593, 0x4d9be9d, 0xbb9df98 },
+        { 0x8e4b190, 0x6362008, 0xada3a88, 0xee1421e, 0xf93b027, 0xb84f0cc,
+          0x8e95091, 0x7a5d667, 0xf3e3704, 0x3974462, 0xc593e98, 0xfa6fb5e,
+          0xa6477d2, 0x44b6cf7, 0xb09a562, 0xe885b57 }
+    },
+    {
+        { 0x09a0c02, 0x6e339e9, 0x0e75f29, 0x57afff0, 0xfb7db03, 0x797d8d6,
+          0xd25a236, 0xc6e11a3, 0x0107260, 0x643ce1c, 0x62eae1c, 0xe644ec4,
+          0x3f5a3f5, 0x821d5b8, 0xc0579d6, 0xa8ad453 },
+        { 0x17d43a4, 0x6518ed4, 0x3f87ccd, 0x46e76a5, 0xf9bef95, 0xd6cbaab,
+          0x4f7cbcf, 0x2568832, 0x08476b4, 0x367159a, 0xbe6d324, 0x1d1b401,
+          0xa605026, 0x348cb98, 0x43b6b1e, 0x144f3fe }
+    },
+    {
+        { 0x7b1822c, 0xbabbd78, 0x2aa51f8, 0xd34ba7e, 0x41fbea4, 0x086f1cc,
+          0x746f3d9, 0x96f7eac, 0x281ecaf, 0xad97f26, 0xa14ee2c, 0x751a905,
+          0x0d7335f, 0xb4e7fe9, 0x4892ff0, 0x0d97b8f },
+        { 0x5a5c40e, 0xdb8a315, 0x7ba567b, 0x64e5de7, 0x1eefe88, 0x4f155f7,
+          0xfb6fbf4, 0xe2297e9, 0x6c16be5, 0xfe24bf9, 0xcdd83e2, 0x2251847,
+          0x5eda444, 0x13ac2c8, 0x283275f, 0x49d1b85 }
+    },
+    {
+        { 0x423e08f, 0xca08731, 0x87d2f14, 0x7046bb0, 0x3bc846c, 0x876f10c,
+          0x358fbe3, 0x2202b76, 0x0e26ac6, 0x0d4fc1c, 0xb986881, 0x1fc748b,
+          0x8384a18, 0x609e61c, 0x0d88e00, 0x28a72d6 },
+        { 0x78c6e2f, 0x1332a31, 0xb3526a4, 0x0367919, 0x698fe3e, 0x53989e4,
+          0xb16a99b, 0x14b1145, 0xddbb75f, 0xef9ec80, 0x0e53955, 0x7625624,
+          0x8744ae1, 0x54e087a, 0x672b875, 0xce50e8a }
+    },
+},
+{
+    {
+        { 0xa29629c, 0x4c88b2b, 0x7b2642f, 0x946559c, 0xf7ebe4c, 0x933d432,
+          0x63632c9, 0x97109b6, 0xe53184d, 0x799b3fb, 0x0f069a6, 0xd462871,
+          0x3a68351, 0x0c182a1, 0x9a2437a, 0x974a839 },
+        { 0x2a70278, 0x29f1997, 0xd9c424b, 0x01b98b6, 0x08f4c37, 0xd85a60b,
+          0x2b1da15, 0xcc3523f, 0xddffb0f, 0xf922115, 0xde84ae2, 0xee0fe4d,
+          0x55365be, 0x810440c, 0x1a457e8, 0xd2f6639 }
+    },
+    {
+        { 0xe2ddd05, 0x5e6879f, 0xabdfc61, 0x92a7545, 0xa5cede8, 0x7dedd63,
+          0x70df4bd, 0x8a03b3f, 0x91f6cbb, 0xa5d1f65, 0x10f3fb2, 0x372fde6,
+          0xa9dee05, 0x4537f9e, 0xdf7aa50, 0x7eb85bb },
+        { 0xe8c504d, 0x963edf8, 0xe7bdb6b, 0x53c8dca, 0x6fedf2d, 0xa246e4c,
+          0x0c55bde, 0x7553340, 0x0270a54, 0x2aa748d, 0x05860dd, 0xadb6cf0,
+          0x9b84763, 0x8d31450, 0xeb405ef, 0x626720d }
+    },
+    {
+        { 0x6601328, 0xa3709ae, 0x2ac2478, 0x68e94fd, 0x9d5d247, 0x3879343,
+          0x392c198, 0xfa467af, 0x15df607, 0x49e7b0d, 0x61792a8, 0x8c58122,
+          0x1d3762f, 0x79f7658, 0x244a39d, 0xaa38895 },
+        { 0xc5cd0bc, 0xef60af9, 0xa33b3bb, 0x2b0db53, 0x251015d, 0xe3e0b1f,
+          0xe64489e, 0xc608afc, 0x03651aa, 0xe52b057, 0x1c6f7b9, 0x1dda8b9,
+          0xff41893, 0x833f022, 0x192818c, 0x58eb0a0 }
+    },
+    {
+        { 0xfc7b5a7, 0x6c1300c, 0xa83ab33, 0x6d2ffe1, 0x9c02eef, 0x7b3cd01,
+          0xba60d55, 0x6c64559, 0x19e2f73, 0x2e9c16c, 0xdbe47b1, 0x11b24ae,
+          0x1b8153b, 0xc10a2ee, 0x1e02e1a, 0x35c0e08 },
+        { 0x1dd6f16, 0xa9f470c, 0xf41a290, 0x4ea93b6, 0x25ee03f, 0xac240f8,
+          0xb85aabd, 0x6cd88ad, 0x1be2f8f, 0x378a64a, 0x417bac1, 0xbf254da,
+          0x9231142, 0x7e4e5a5, 0x3b8c057, 0x057aadc }
+    },
+    {
+        { 0x80af479, 0x607c77a, 0x5ccdf74, 0xd3e01ff, 0x101b4c7, 0x9680aaf,
+          0x2fc50a6, 0xd2a7be1, 0xb72d782, 0x92a788d, 0x4640b52, 0x35daf2e,
+          0x39e601c, 0xc170d69, 0x7b25c2f, 0x16e05f5 },
+        { 0x6fe37f8, 0x47a42a6, 0xbeca298, 0xeb74271, 0x179da16, 0x401e11e,
+          0xaa53873, 0xfb8da82, 0x5bb4783, 0xd657d63, 0xfcea0b1, 0x6847758,
+          0x0993154, 0x2f261fb, 0x592853a, 0x868abe3 }
+    },
+    {
+        { 0x35766ab, 0x1a4c543, 0x6f4e4ea, 0xa1c84d6, 0x60ba199, 0x5d737a6,
+          0x98b15a2, 0x4a7b1e2, 0xfd967d3, 0x207877f, 0xc262b4d, 0xcaec82d,
+          0x4f2a37d, 0x0b27849, 0x6ac1711, 0x3478141 },
+        { 0x8fc6856, 0x28e3df1, 0x16d003f, 0xbec03f8, 0xff39ebd, 0x2bd705b,
+          0x2d776d3, 0x1dcb53b, 0x5c0e7ce, 0xabafa7d, 0x4a53332, 0x5b9c8c2,
+          0x9d90214, 0xe9f90d9, 0xc129690, 0x789747e }
+    },
+    {
+        { 0x54e2dfa, 0x94d3c39, 0xafb2a8f, 0x919f406, 0x34e3927, 0x159ef05,
+          0xa165c37, 0xcdb4d14, 0x288f337, 0xa23e5e8, 0x0f90242, 0x95867c0,
+          0xe34e781, 0x2528150, 0x6657b95, 0x104e501 },
+        { 0xbcdda24, 0x695a6c9, 0x23eb5fa, 0x609b995, 0x16a60f8, 0xcbce4f5,
+          0xf084a29, 0xec63f7d, 0x20c811f, 0x3075ada, 0x8c716a1, 0x129a192,
+          0xcd4cd4a, 0xd65f4d4, 0x62188be, 0xe18fa9c }
+    },
+    {
+        { 0xbac60e3, 0x1672757, 0x577144b, 0x525b3b9, 0x887055b, 0x38fc997,
+          0x31e4408, 0x7a77126, 0xcba2fcf, 0x884f173, 0x5962ac0, 0x783cbdc,
+          0x22287dc, 0x4f3ed0a, 0x50e20e6, 0x8a73e34 },
+        { 0xd764583, 0xe7a1cd0, 0x0d58ee6, 0x8997d8d, 0xaa13ed6, 0x0ea08e9,
+          0xcf363cb, 0xed478d0, 0x5b37bf4, 0x068523d, 0x783f13c, 0x8b5a9e8,
+          0x87528a9, 0xde47bbd, 0xcaec313, 0xd6499cc }
+    },
+},
+{
+    {
+        { 0xe09859d, 0x54781bb, 0x7f5e648, 0x89b6e06, 0x7075824, 0xb006dfe,
+          0x0717f68, 0x1731660, 0x0b4efe2, 0x9c86554, 0x5e30d8e, 0xdbdb257,
+          0x3b4d50f, 0xa6a5db1, 0xfa47beb, 0x3b5662c },
+        { 0x89d4a59, 0x9d4091f, 0x550a7dc, 0x790517b, 0xc52965e, 0x19eae96,
+          0xb5ed7a4, 0x1a7b3c5, 0xeb16541, 0x19e9ac6, 0xef66852, 0x5f6262f,
+          0xc4cda27, 0x1b83091, 0x3bf742b, 0xa4adf6f }
+    },
+    {
+        { 0xa5100e7, 0x8cc2365, 0x8592422, 0x3026f50, 0x3d714d0, 0xa4de79a,
+          0x90fcb30, 0xefa0d3f, 0x474ada0, 0x126d559, 0xc94350a, 0xd68fa77,
+          0x0c7cb45, 0xfa80e57, 0x3985fbf, 0xe042bb8 },
+        { 0xfe13dba, 0x51c80f1, 0xcf055d7, 0xeace234, 0x73f95f7, 0x6b8197b,
+          0xdcdbe89, 0x9ca5a89, 0xdfd9896, 0x2124d5f, 0x9e7ca37, 0x7c69556,
+          0x8babb37, 0x58e806a, 0xbaf99ce, 0x91b4cc7 }
+    },
+    {
+        { 0x197e968, 0x874e253, 0x3160668, 0x36277f5, 0x8b95dbe, 0x0b65dda,
+          0xf0872a1, 0x477a792, 0x314268d, 0x03a7e3a, 0x0c805c7, 0xa96c842,
+          0xb7bc4a8, 0xb941968, 0x75db390, 0x79dce30 },
+        { 0x6f4cc14, 0x577d4ef, 0xb5d1107, 0x5b0d205, 0x9f93624, 0x64ff20f,
+          0x5034a2f, 0x0b15e31, 0x8b6f35c, 0x3a0f6bb, 0xe0d0ec5, 0x0399a84,
+          0x0d5d521, 0xd0e5823, 0xcb1dd54, 0xdeb3da1 }
+    },
+    {
+        { 0x182401a, 0x24684ae, 0x21a706f, 0x0b79c1c, 0xd8998af, 0xe1d81f8,
+          0x4bb069f, 0xadf870f, 0xf3dd7aa, 0xd57f85c, 0xe4a40f8, 0x62d8e06,
+          0x8b55aa1, 0x0c5228c, 0xa9c0a1a, 0xc34244a },
+        { 0x68f544e, 0xb5c6cf9, 0xde23ab7, 0xa560533, 0x47c690c, 0xaa55120,
+          0x12aaaa6, 0x20eda5b, 0x751a6a0, 0xea0a49a, 0x2baa272, 0x6d6cfff,
+          0xbf4c28a, 0x95b756e, 0xe6178a4, 0xd747074 }
+    },
+    {
+        { 0x221a94b, 0xa27b453, 0xe635f20, 0xd56ad13, 0x8c95117, 0x03574b0,
+          0xed30b70, 0xf0ee953, 0x957796f, 0xb48d733, 0x58c336b, 0xf5d9583,
+          0x82db529, 0x6170cd8, 0xec9d1ea, 0xcd3ef00 },
+        { 0xe4d105f, 0xd1bea0d, 0xad6a559, 0xd2d670f, 0x52f9690, 0x652d012,
+          0xc2529b0, 0x5f51fb2, 0xe89df2a, 0x5e88bf0, 0xcd686e4, 0x9a90684,
+          0x882c7a1, 0xf519ccd, 0xc2f4d37, 0x933a0df }
+    },
+    {
+        { 0x3f66938, 0x0720a9f, 0xd8149df, 0x99356b6, 0xa3d7f61, 0xb89c419,
+          0x4ba6e31, 0xe658134, 0xab936c8, 0xd130561, 0x40dbef1, 0x0625f6c,
+          0xb6bb847, 0x7b2d6a2, 0x84d506b, 0x3ca8b29 },
+        { 0xfb011b0, 0x6bf729a, 0x33448c9, 0x01c3078, 0x0837420, 0x6ae9508,
+          0xa207fb8, 0xf781a8d, 0x57562a9, 0xcc54d58, 0x858c5ab, 0xc9b7364,
+          0x359908f, 0xdfb5035, 0x9631138, 0x8bf77fd }
+    },
+    {
+        { 0xc13fbb1, 0xf523365, 0x9993ed5, 0x88532ea, 0x5a73492, 0x5318b02,
+          0xe5a8f3c, 0x94bff5c, 0x306c2a0, 0x73f9e61, 0xf2668a3, 0x00abbac,
+          0x076237d, 0x23ce332, 0x34c0f9b, 0xc867f17 },
+        { 0xcfd2136, 0x1e50995, 0xb2b70f8, 0x0026a6e, 0x5077a7d, 0x66cb184,
+          0xa3b498e, 0xc31b2b8, 0x260ec86, 0xc12035b, 0xe1b3df0, 0x1cbee81,
+          0x8d55a42, 0xfd7b804, 0xf47a8c8, 0x912a41c }
+    },
+    {
+        { 0x9e157e3, 0xab9ffe7, 0x44dc158, 0x9cfe46d, 0x8a4a3ef, 0x435551c,
+          0x3b7e3a8, 0x638acc0, 0x49954a7, 0x08a4ebd, 0x13194f7, 0x295390c,
+          0x253892a, 0x3a2b68b, 0x25d5b11, 0xc1662c2 },
+        { 0x3a5d2bb, 0xcfba072, 0xcc327c9, 0xffaf6d3, 0xc67e254, 0x6c6314b,
+          0x2f32208, 0x6661631, 0xbea72e1, 0xf780f97, 0x002122f, 0x495af40,
+          0x7578a99, 0x3562f24, 0x77ce51e, 0x5f479a3 }
+    },
+},
+{
+    {
+        { 0x1a82a12, 0x91a5884, 0x80f3a62, 0xa754175, 0xf73417a, 0x399009f,
+          0x0a8c5cd, 0x2db1fb9, 0xc046d51, 0x82c8912, 0x8f18274, 0x0a3f577,
+          0x26ccae2, 0x2ad0ede, 0x8a4e9c2, 0x7d6bd8b },
+        { 0x4b3de44, 0xaa0d797, 0x96ac9bb, 0xf8658b9, 0x5f6c334, 0x31e7be2,
+          0x4df12c9, 0x23836ce, 0x59eb5c9, 0x029027b, 0x5b8649d, 0x2f22531,
+          0xd907162, 0xa0fdf03, 0x9e80226, 0x101d9df }
+    },
+    {
+        { 0x9a90835, 0xf12037a, 0xf0222a7, 0xd2d0882, 0xc3814e2, 0xeaf8d40,
+          0x8b8146b, 0xa986dc6, 0x8504653, 0x147a331, 0x2feaf67, 0x734e003,
+          0x602bec5, 0x6f27bbf, 0x6a688f3, 0xa1e21f1 },
+        { 0x73c4ae5, 0x5a8eeab, 0xe70b412, 0x4dbaddb, 0xcfd2af1, 0x871ceba,
+          0x7d7a286, 0x1860382, 0xb5bb401, 0x024059d, 0x3c39b73, 0x2557c09,
+          0x6681697, 0xfc5a711, 0x891b57c, 0xf881c0f }
+    },
+    {
+        { 0x8ea191a, 0x3c443f1, 0xd700ad0, 0x76faa58, 0xbe7fcbf, 0x6fe6cfa,
+          0x8990ef7, 0xaefc528, 0x80004cc, 0x44e30fa, 0x6d8ef85, 0xc744adc,
+          0x912df70, 0xafcd931, 0x572a6d8, 0xf62a9d1 },
+        { 0x3219f27, 0x47158a0, 0xad73136, 0x76fb27e, 0xcc2d614, 0x41bb2ad,
+          0xde1ec21, 0x8858cb9, 0x5f15866, 0xab402c4, 0xbc82bbf, 0x6675d5b,
+          0xf1b28d3, 0x4ee9dd6, 0xe373c17, 0x875884f }
+    },
+    {
+        { 0x2a67d36, 0x17806dd, 0x32c9ec1, 0xaa23a86, 0xfc1ee55, 0xd914126,
+          0x653701b, 0xbf8f7bd, 0xea71367, 0x9b0111a, 0xa98e417, 0x61fd4ab,
+          0x561c5a5, 0xeb45298, 0xe7af394, 0x2187b0a },
+        { 0x1616dde, 0x71f12db, 0x07da7b4, 0x0617609, 0x02ddb04, 0x414d376,
+          0x286fb58, 0x1100be7, 0x6f0d95b, 0xd7cf88d, 0x746d703, 0x8539d23,
+          0x4e23d73, 0xdccc9d6, 0xec89680, 0xaeef1d2 }
+    },
+    {
+        { 0x336508d, 0x82ccf1a, 0x5bad150, 0xa128c1f, 0x29a188d, 0x551d8c0,
+          0x771404f, 0xef13dd4, 0xc37b993, 0xdd67696, 0x0dddad2, 0x428c0e2,
+          0x038c94c, 0x222278d, 0x078e3f2, 0x1a24a51 },
+        { 0xedb0db9, 0xd297fe6, 0x8251a87, 0x00988d2, 0xbfaa0d7, 0xbb946f8,
+          0xdf45ea0, 0x380f7b9, 0xafccf5e, 0x8526415, 0xe9ec7bc, 0x909bfbf,
+          0x124755c, 0x2ed7093, 0x89404e2, 0x4368028 }
+    },
+    {
+        { 0x36d9ef1, 0x21b9fa0, 0xe433526, 0xfd64b7c, 0x6544849, 0xd9d7eb7,
+          0xd5b54b3, 0x201620c, 0xbb61159, 0x25fab3d, 0xc53e0d3, 0x90d4eb0,
+          0x9e74772, 0xba09831, 0xec1681c, 0x8749658 },
+        { 0xfec316b, 0xa354349, 0xa743ea2, 0x639a9b1, 0x37c50e6, 0x2e514ca,
+          0xdbaf6c5, 0x9f4a4fd, 0x6f511c9, 0x0df87ef, 0x0c00d95, 0xadd4cef,
+          0xaa1433f, 0x401c0eb, 0xbb38af9, 0x3c3a59e }
+    },
+    {
+        { 0xf0e7dca, 0x8706245, 0x3fb29ca, 0xad238cd, 0x9b7d8f0, 0x0330443,
+          0x154f495, 0xfdcd6e6, 0x7d4ad09, 0xc67e24a, 0x5438390, 0x1b209e8,
+          0xb0c211e, 0xf893b81, 0x7e11e36, 0x1aa86f0 },
+        { 0xedea8b1, 0x2cca3ff, 0x3b306cd, 0x7eedd07, 0x12ee222, 0x78e37bc,
+          0xbc42a1d, 0x257870b, 0x1fbd397, 0x5fb2bb9, 0x09d6c60, 0x4702470,
+          0x20bdc36, 0x11748a3, 0x04280e8, 0x3ff24dc }
+    },
+    {
+        { 0x9839b52, 0x0eb1c67, 0xacfbd32, 0x5bcca27, 0x74898e3, 0xb506c16,
+          0x2489e5e, 0x37d662e, 0xf694887, 0x8dc0731, 0xf43f1dc, 0x571149e,
+          0x66d63dc, 0x6430a37, 0xb50dd70, 0x0d2640e },
+        { 0x3b2675b, 0x2b56149, 0x88c604f, 0x1b48065, 0xaafbabc, 0x55c86a8,
+          0x608aaba, 0xa7b9447, 0x04cad8c, 0xa42f635, 0xcee7788, 0x0f72b1d,
+          0x755d99a, 0x1d68374, 0x5be2531, 0xd7cdd8f }
+    },
+},
+{
+    {
+        { 0xbcdfee1, 0x67873bd, 0xfcd0a3f, 0xa5a0c0a, 0x3cfa3d4, 0x59389f9,
+          0xe1c865c, 0x14e945c, 0x1d588cc, 0x62d2f8e, 0x8e228b4, 0xfd02f8a,
+          0xb42b649, 0x208f791, 0xab397ad, 0x0e0dff1 },
+        { 0x0bc6eb1, 0x30ac3d9, 0x5f313bb, 0xf14f16a, 0xe2a0ad2, 0x70fa447,
+          0x5a0db84, 0x6e40685, 0xe32e1e7, 0xd52282b, 0x15ca330, 0x315a02a,
+          0x867c2fe, 0x9a57a70, 0x0054923, 0x55f0765 }
+    },
+    {
+        { 0xc0cf08f, 0x2d729f6, 0xebaf57f, 0x6b80138, 0x0200c25, 0x6285bcc,
+          0x2cd2ac7, 0xee84519, 0x922778a, 0x28fce4d, 0xcd1011c, 0x761325c,
+          0x5100e47, 0xd01f247, 0xc60d8e1, 0xc7a1665 },
+        { 0x7ceb064, 0x950966d, 0x78420db, 0x0a88e85, 0xe096f29, 0x44f2cfc,
+          0x640f1d2, 0x9d9325f, 0xd2426f1, 0x6a4a81f, 0x9c905ac, 0x3ed6b18,
+          0x008854d, 0xba3c0e2, 0xa0d321b, 0x1df0bd6 }
+    },
+    {
+        { 0x3feb1e7, 0x0117ad6, 0xf1ae02f, 0xa058ba2, 0x31b3f06, 0x5eee5aa,
+          0xafacd4d, 0x540d9d4, 0x1571d91, 0x38992f4, 0xbf2c7de, 0xef2738e,
+          0x92a798d, 0x28bfcab, 0x2286733, 0x37c7c5d },
+        { 0x6470df0, 0xb99936e, 0x8af6a42, 0x3d762d5, 0xc74eec5, 0xa8c357a,
+          0xf13afbc, 0x9917beb, 0xf2dc073, 0x28f0941, 0x6ce7df7, 0x306abf3,
+          0xd6973c8, 0xa3c5f6f, 0x3677632, 0x640209b }
+    },
+    {
+        { 0xe23aef7, 0xee872a2, 0xeb9b08e, 0xb497b6f, 0x3f33c63, 0xfb94d97,
+          0x2b32315, 0x9ea1ff4, 0x49a4166, 0x537b492, 0xab4f8be, 0x89c7fe6,
+          0xdad8f0f, 0xf68007f, 0x71b8474, 0xe56ef0b },
+        { 0x3f333f9, 0x478b2e8, 0xb2607f5, 0x144e718, 0xa4c7ab5, 0x13aa605,
+          0x1d0730d, 0xfc1fc99, 0x5ab3ea1, 0xe7a0437, 0x306d8d3, 0xc59986a,
+          0x702a8b1, 0x24f6111, 0xe040ad2, 0x7741394 }
+    },
+    {
+        { 0x60723a7, 0x34c6a25, 0xf4ea691, 0x8aabd0d, 0x5d7497f, 0x9d676a5,
+          0x7d91fa4, 0x12c0957, 0x6479284, 0x581c7a8, 0xf4fd449, 0xa54f3da,
+          0x4ef44cf, 0x2f89f3c, 0xc9ec97c, 0xfc266b5 },
+        { 0x88b142a, 0xfcd3fbe, 0x4bd69c1, 0x9f3109f, 0xb5f5a6a, 0x08839c0,
+          0x2e68303, 0x63ca850, 0xbba0a74, 0x2f0628d, 0x5d56b54, 0x743cccf,
+          0x13e09fd, 0xbd4b066, 0xde2ba3e, 0x7a8415b }
+    },
+    {
+        { 0xc076ab2, 0x2234a3b, 0x4977a98, 0xd6953e5, 0x31ebe2e, 0xc122158,
+          0xbad78e2, 0x632145f, 0xa5c4b08, 0xd7ba78a, 0x998e32a, 0x6f4ea71,
+          0x3485a63, 0x25900d2, 0x6a5176f, 0x97ac628 },
+        { 0x1093f7b, 0x5df9118, 0xc844563, 0x2bf9829, 0x6272449, 0x525d99d,
+          0xb5c8a18, 0x4281cb5, 0x0544a08, 0x35df278, 0xbaeb8f4, 0xf4c3d2d,
+          0x5230447, 0xc7ff317, 0x5d2fbff, 0x6b4d764 }
+    },
+    {
+        { 0x2b0c9cb, 0x4837f80, 0x8ce8418, 0xb65f816, 0x9fc1428, 0xdf66ea9,
+          0x04ea7e8, 0x9788ee8, 0x8334e3c, 0x9eae900, 0xd6ba1b6, 0xbc91058,
+          0xd7064b6, 0x634aba1, 0x397b368, 0x12d9bb3 },
+        { 0xc413aa8, 0x0645c85, 0xac6b5e3, 0xb09dea6, 0x289a50b, 0x29a620d,
+          0xbbcceb1, 0x104db3b, 0x87b3309, 0x42e4792, 0xec97f01, 0xdfc373e,
+          0xb93f84e, 0xe953f94, 0x052dfbf, 0x3274b7f }
+    },
+    {
+        { 0x1bd6fa9, 0x9d5670a, 0xdb6c4d4, 0xec42fc9, 0x1b42845, 0xaecd4ed,
+          0x1b03549, 0x4eed90e, 0xbbab1fa, 0xeb3225c, 0x28a2816, 0x5345e1d,
+          0x0b77d2a, 0x3741cfa, 0x7ea8caa, 0x712b19f },
+        { 0x661853e, 0x42e6844, 0xe4a6e5d, 0x4cf4126, 0xc3649f6, 0x196a9cf,
+          0xf21b6b1, 0x06621bc, 0x32e29ea, 0x887021c, 0x8c5680f, 0x5703aeb,
+          0x660f6d7, 0x974be24, 0xc71864e, 0xaf09bad }
+    },
+},
+{
+    {
+        { 0xa81b6d3, 0x3483535, 0xca037dc, 0x19e7301, 0x63ddfeb, 0x748cab7,
+          0x6f01a38, 0xe5d87f6, 0x2795cd6, 0xbba4a5c, 0x615c36c, 0x411c5d4,
+          0x706f412, 0xff48efc, 0x4b519df, 0x205bafc },
+        { 0x5227110, 0xfcaa5be, 0x3ad0af0, 0x7832f46, 0x2642b1b, 0x34ef2c4,
+          0x072f822, 0x7bbef7b, 0x923a616, 0x93cb0a8, 0x6d91ba7, 0x5df0236,
+          0x42f7d21, 0x5da94f1, 0xa14e891, 0x3478298 }
+    },
+    {
+        { 0xc831d39, 0xad79a0f, 0x4803c44, 0x24d1948, 0x86aeeb2, 0x4f8a864,
+          0x926f6b9, 0x0ca284b, 0x1acd7cd, 0x501829c, 0x3d12c52, 0x9f6038b,
+          0xf371ef5, 0x77223ab, 0x13bf4de, 0x2e03516 },
+        { 0xb4468cc, 0x7a5a4f2, 0x470ae46, 0xdcea921, 0x11be696, 0xf23b7e8,
+          0x720d6fb, 0xe59ad0d, 0x2983469, 0x9eacac2, 0xc4397ee, 0x4dd4110,
+          0xcbe2675, 0x4ef85bd, 0xaa7c74b, 0xe4999f7 }
+    },
+    {
+        { 0x8ea1e98, 0x031838c, 0x04d96a2, 0x539b383, 0x163956e, 0x5fbdef0,
+          0xce3f52a, 0x6bd4d35, 0x55e897f, 0xe538c23, 0x472dd3f, 0x6078d3a,
+          0xca9f452, 0x590241e, 0xfd7fc07, 0x2bc8495 },
+        { 0xead4c8c, 0x23d0c89, 0x601c66e, 0x1ea55a9, 0x4f5b833, 0x41493c9,
+          0xaa5a978, 0xc49a300, 0x0c69594, 0xc98bdc9, 0xccbdc8c, 0x4e44ced,
+          0x6adccbf, 0xb0d4e91, 0x32c37ae, 0xd56e36b }
+    },
+    {
+        { 0x5b93152, 0x052bd40, 0x4f1dbfa, 0x688b1d4, 0xbe5cc5f, 0xe77ba1a,
+          0xa6ac543, 0x11f8a38, 0xe4bb988, 0x3355fd6, 0xf8dffb4, 0xdf29c5a,
+          0x81f20ee, 0x751f589, 0xda9b7fb, 0x22a0f74 },
+        { 0x6397b49, 0xec8f2bc, 0x3639201, 0xff59fc9, 0xa048264, 0xb7f130a,
+          0xafdc4cc, 0xe156a63, 0xb13acaf, 0x0fd7c34, 0x0cb4999, 0x87698d4,
+          0x7f26f24, 0x6d6ecae, 0x0f296e2, 0xae51fad }
+    },
+    {
+        { 0xdd0f58d, 0xd0ad5eb, 0x5c67880, 0x6ec6a2c, 0x9af1e0f, 0xe1ce034,
+          0x3996d32, 0x0801485, 0x5e69d20, 0x59af51e, 0xaa48ecf, 0x0ef743a,
+          0x7dafcb0, 0x8d3d2ea, 0x89189b6, 0x4ac4fad },
+        { 0xeae97f1, 0x92d91c2, 0x62b4662, 0xef5eca2, 0xb38b10a, 0x440b213,
+          0xfc661da, 0xec90187, 0xf64cf8d, 0x85f3f25, 0x457ad1b, 0xcee53ca,
+          0xf517672, 0x8deed4b, 0x4761828, 0x7706fb3 }
+    },
+    {
+        { 0x17494fe, 0x1577d91, 0x2fd7239, 0x52d29be, 0x0186d37, 0x9a0eef0,
+          0x27fe108, 0x241d0f5, 0xe6fb59f, 0x42824ba, 0x0d48c25, 0xb8d33df,
+          0x47af4b0, 0xfffdb0a, 0x073b0b6, 0x534c601 },
+        { 0x51c033b, 0xe6df359, 0x86c0f94, 0x3e1002b, 0x48fb9b6, 0xa7cb555,
+          0xa7bbff8, 0x999818b, 0x84d8bf2, 0xe4ba3d6, 0x6358f0a, 0x53dbb32,
+          0xf2568e8, 0xeebc1e2, 0xb3e0f68, 0xc6917eb }
+    },
+    {
+        { 0x19f8d13, 0xbe1bbfc, 0x2d4795c, 0xc3951b6, 0xed535a9, 0x9371c49,
+          0x68cebea, 0x77c389f, 0xa141d0e, 0xfc1a947, 0xde44f8b, 0x4b48d7a,
+          0x8580a26, 0x3db1f05, 0x258b5fc, 0xeed1466 },
+        { 0x9854b21, 0x5daa4a1, 0x1ab1ead, 0x5bfa46f, 0x59957eb, 0xc152e35,
+          0xea48ada, 0xdc84277, 0xfc169b5, 0x68709cf, 0x720e617, 0xde50ce3,
+          0xdd9a832, 0xe42f262, 0x2d6ce29, 0xddffd4d }
+    },
+    {
+        { 0x8fa0a56, 0xd5ba557, 0xfafaf4c, 0x0d7d0f1, 0x38b63ed, 0x7666e41,
+          0x5d87f02, 0x04e6513, 0xc958f32, 0xdca8866, 0x3ce2686, 0xaa8486d,
+          0xf1cbcd3, 0xe3785ca, 0x03c8335, 0x8a9b114 },
+        { 0x2e0ef60, 0x5c1dca2, 0x7d3fb20, 0x775af5b, 0x2b373a8, 0xe690ffc,
+          0x28330e6, 0x30fe15d, 0xdd0f393, 0x8a1022b, 0x966a828, 0x6bd7364,
+          0x949208a, 0x8d4b154, 0xb9d9828, 0xfb38c6b }
+    },
+},
+{
+    {
+        { 0x0340ac2, 0x6d19764, 0xecab5ff, 0x969f473, 0xc458e42, 0xead46f7,
+          0x1d00eed, 0x168646a, 0xe0ce0cf, 0xf70c878, 0x8d8d15a, 0xa7291d3,
+          0xfdd10cc, 0x92cf916, 0x24f86d5, 0x6d36134 },
+        { 0x2d5c4b4, 0xba50d17, 0x4626f15, 0xe0af502, 0xd76098a, 0x76f3809,
+          0xd6caaa8, 0x433dc27, 0x70d97a7, 0x72dc67a, 0xf5c7355, 0x935b360,
+          0x179bb31, 0xdbaac93, 0x7ed1a33, 0x7673848 }
+    },
+    {
+        { 0x8f9fa0d, 0x8d1ca66, 0xa02f2bf, 0x4ed95d8, 0xf630d7b, 0xd19fc79,
+          0xf46fa51, 0x0448ec4, 0x623bf3f, 0xb371dd8, 0xd650e94, 0xe94fabc,
+          0xcd90a70, 0x3af3fca, 0x03ce3b7, 0x0f720c4 },
+        { 0xd636c3b, 0x590814c, 0x4469945, 0xcf6928d, 0x484a4c6, 0x5843aaf,
+          0xf9b4722, 0xb5a4c1a, 0x6cfb2f9, 0x25116b3, 0x32c2640, 0xf248cf0,
+          0x27412a1, 0x8cd059e, 0x862fc5d, 0x866d536 }
+    },
+    {
+        { 0x6de4a2e, 0x156e62f, 0xaafcc78, 0x0365af7, 0x19e925e, 0x65c8618,
+          0xf8b2191, 0x4db5c01, 0xad564fa, 0x1fd26d1, 0x19c8610, 0x16bbc53,
+          0x815f262, 0x0718eef, 0x27f83d1, 0x8684f47 },
+        { 0xb0f48db, 0xa30fd28, 0x6ab8278, 0x6fef506, 0x1a652df, 0xd164e77,
+          0xc6ebc8c, 0x5a486f3, 0xdc3132b, 0xb68b498, 0xd73323f, 0x264b6ef,
+          0x69b2262, 0xc261eb6, 0x2a35748, 0xd17015f }
+    },
+    {
+        { 0x7c4bb1d, 0x4241f65, 0xf5187c4, 0x5671702, 0x3973753, 0x8a9449f,
+          0xcc0c0cd, 0x272f772, 0x58e280c, 0x1b7efee, 0x4b5ee9c, 0x7b32349,
+          0x31142a5, 0xf23af47, 0xd62cc9e, 0x80c0e1d },
+        { 0x675ffe3, 0xcbc05bf, 0x258ce3c, 0x66215cf, 0x28c9110, 0xc5d2239,
+          0x2a69bc2, 0x30e12a3, 0x76a9f48, 0x5ef5e80, 0x2329d5f, 0x77964ed,
+          0x8a72cf2, 0xdf81ba5, 0x6e1b365, 0x38ea70d }
+    },
+    {
+        { 0x2f75c80, 0x1b18680, 0x698665a, 0x0c153a0, 0x522e8dd, 0x6f5a7fe,
+          0x8ddfc27, 0x9673866, 0x0d3bdce, 0x7e421d5, 0x25001b2, 0x2d737cf,
+          0x0e8490c, 0x568840f, 0xe30c8da, 0xea2610b },
+        { 0x9561fd4, 0xe7b1bc0, 0x26decb0, 0xeda786c, 0x6a76160, 0x2236990,
+          0x78a3da3, 0x371c714, 0x2a2d9bf, 0x1db8fce, 0x3292f92, 0x59d7b84,
+          0x5a665f9, 0x8097af9, 0x542b7a9, 0x7cb4662 }
+    },
+    {
+        { 0xc6b0c2f, 0xa5c53ae, 0x7312d84, 0xc4b8732, 0xc732736, 0xfc374cb,
+          0x9310cc0, 0xa8d78fe, 0x65d1752, 0xd980e86, 0x6004727, 0xa62692d,
+          0x0146220, 0x5d07928, 0x860fea5, 0xbd1fedb },
+        { 0xb35d111, 0xcbc4f8a, 0x3e32f77, 0x5ba8cdf, 0xb614b93, 0xd5b71ad,
+          0x2f8808d, 0x7b3a2df, 0x6ef2721, 0x09b89c2, 0x47c3030, 0x55a5054,
+          0x2986ae6, 0x2104431, 0x2367d4c, 0x427a011 }
+    },
+    {
+        { 0xc1942d8, 0xe9fe256, 0x96e3546, 0x9e7377d, 0xb0c1744, 0x43e734c,
+          0x211fbca, 0x5f46821, 0x32b6203, 0x44f83dc, 0x6ad1d96, 0x8451308,
+          0x2fbb455, 0x54dd519, 0x2f10089, 0xc2a1822 },
+        { 0x1855bfa, 0x01055a2, 0x77078b4, 0x9e6d7b4, 0x30cea0e, 0x3f8df6d,
+          0x32973f7, 0x81c2150, 0xc0b3d40, 0x17dd761, 0x50d0abe, 0x040424c,
+          0x783deab, 0x5599413, 0x8f3146f, 0xde9271e }
+    },
+    {
+        { 0xaf4a11d, 0x5edfd25, 0x7846783, 0x3a3c530, 0x73edd31, 0xb200868,
+          0xfe0eef8, 0x74e00ec, 0x3dd78c7, 0xba65d2f, 0x71999f1, 0xab13643,
+          0xde9a7e8, 0xfa9be5d, 0x87a8609, 0xeb146ce },
+        { 0x65353e9, 0x76afd65, 0xd51ba1c, 0xfa7023d, 0x37ede4f, 0x7a09f22,
+          0x0ba7a1b, 0xca08576, 0xb99950a, 0xd973882, 0xea5057a, 0xe894266,
+          0x7f55e49, 0xd01c421, 0x5555679, 0x69cfb9c }
+    },
+},
+{
+    {
+        { 0xc5d631a, 0x67867e7, 0x5bcf47b, 0x1de88c5, 0xafd1352, 0x8366d06,
+          0x6e20337, 0xd7dbdef, 0x1253ec7, 0xb0f9e2f, 0x10ad240, 0x1be9845,
+          0xf4a6118, 0x63ec533, 0x96ce633, 0xd5e4c5b },
+        { 0x4df4a25, 0x1d0b6c3, 0x5a1b554, 0xef9486a, 0x47b6ef3, 0x2f0e59e,
+          0x2ff84d7, 0x4d8042f, 0xda359c9, 0x3e74aa3, 0xd21c160, 0x1baa16f,
+          0x0191cba, 0xb4cff21, 0xebc6472, 0x50032d8 }
+    },
+    {
+        { 0x1fc1b13, 0xb6833e0, 0x1a5ad8f, 0x8a8b7ba, 0x622b820, 0xc0cafa2,
+          0x738ed20, 0xc6663af, 0x8b18f97, 0xd894486, 0x774fbe4, 0xcf0c1f9,
+          0x5be814f, 0xeedd435, 0xb57e543, 0xd81c02d },
+        { 0x310bad8, 0x5e32afc, 0x9b813d1, 0x065bc81, 0x3142795, 0x8efc5fc,
+          0x732d59c, 0x5006514, 0x2b5a3ce, 0x91e39df, 0xfaf4204, 0x2ad4477,
+          0x4d9bd4f, 0x1a96b18, 0xa4d9c07, 0xc3fee95 }
+    },
+    {
+        { 0x6b4ba61, 0xfac7df0, 0x061aaef, 0xa6ed551, 0x133f609, 0x35aa2d6,
+          0x20ed13d, 0x420cfba, 0xea03d0c, 0x861c63e, 0xf936d6e, 0x75f0c56,
+          0x3d9a3d5, 0xa25f68f, 0xcd9f66e, 0xba0b7fe },
+        { 0x4680772, 0x292e135, 0xa73f405, 0x6f6a2db, 0x24ea9e4, 0xca6add9,
+          0x268daaa, 0x81cfd61, 0xe6f147a, 0x7a4cb6c, 0xbded8f5, 0x8ec3454,
+          0x11d61cb, 0xc8a893b, 0x7656022, 0x2256ffc }
+    },
+    {
+        { 0x575cb78, 0x6b33271, 0xadcd23e, 0x560d305, 0xd6d834b, 0xeedbd3a,
+          0x5a31e27, 0x614a64a, 0x47ee0c8, 0xe40b476, 0x8bd7c2c, 0x8ef4ff6,
+          0x0b77727, 0xa5297fc, 0xbaf88ad, 0x8759208 },
+        { 0x918df68, 0x86cfe64, 0xcdd882e, 0x9d60a73, 0xb953014, 0x546b642,
+          0x8bbef55, 0xbaceae3, 0xf1c3467, 0xdf58e43, 0xe9f9bab, 0x99a83fe,
+          0x57a4a8b, 0xcd52cbf, 0x8ae36ec, 0xf744e96 }
+    },
+    {
+        { 0xa607124, 0xb945869, 0x440e6f6, 0x810dbe9, 0x738e381, 0x9911e60,
+          0x343b80b, 0x51df68c, 0xf7a3f39, 0xe424336, 0x989015c, 0x2d32acb,
+          0x31019e8, 0xa69b149, 0xec12f93, 0x8a31a38 },
+        { 0x97c916a, 0x0d0d369, 0x8885372, 0xdc95f3b, 0x3549040, 0xcf1a261,
+          0xabe95a2, 0x60f6f5e, 0xe141325, 0xa909e9f, 0x355c865, 0x7d598f2,
+          0x931a9c9, 0x70c6442, 0xb423850, 0x2354a85 }
+    },
+    {
+        { 0x97f9619, 0x4cdd224, 0xc22162e, 0x4776fff, 0x0cd31c2, 0xee5ec33,
+          0xf209bb8, 0x7c04c10, 0x579e211, 0x35bbfde, 0x15cdfc2, 0x0e38325,
+          0xe26ffa7, 0x657e6d3, 0xc65c604, 0xc66a7c3 },
+        { 0xb45e567, 0x322acd7, 0x296db9b, 0x1589cf0, 0xba1db73, 0x1fd0bd3,
+          0x9337a40, 0xe882610, 0xb3035c7, 0xf505a50, 0x6ed08d7, 0x4d5af06,
+          0x5eda400, 0xb3c376b, 0x1944748, 0x9c7b700 }
+    },
+    {
+        { 0x70c3716, 0xd768325, 0xdd540e0, 0xda62af0, 0x6580fea, 0x76b155d,
+          0x32b5464, 0x4f42acc, 0x3f5b72b, 0x881bb60, 0xe68b9ba, 0x09c130e,
+          0x5c50342, 0x37ede3b, 0xfd15e7d, 0xce61a9c },
+        { 0x72605d0, 0xfff1d85, 0x062abc2, 0x62ac2d3, 0xfbe43dd, 0xa85e02e,
+          0xa947020, 0x859d2ba, 0x111c20b, 0x2ebc8a9, 0xa656f66, 0x7f590a7,
+          0x16b21a6, 0x0e13843, 0x00c7db6, 0x29b30c5 }
+    },
+    {
+        { 0x906b8de, 0x61e55e2, 0x949974d, 0x6a97e96, 0x26eef67, 0x24b52b5,
+          0x1aa595a, 0x512f536, 0x3c48fcb, 0x81cc7b8, 0x28115ad, 0xa64af23,
+          0x3d44b8e, 0x9edf6f9, 0x1fe22e3, 0x68d7f7c },
+        { 0x520d151, 0x2b2116a, 0x6aa3efb, 0x66a0b7d, 0x9b0f791, 0x48ae70a,
+          0x037db88, 0xcf12174, 0x317d9f3, 0x36868cd, 0x22fc344, 0xb573059,
+          0x46a5d23, 0xbaa8526, 0x37fc10d, 0xad65691 }
+    },
+},
+{
+    {
+        { 0x12c78d5, 0xcf8e5f5, 0x805cdbd, 0xeb94d98, 0x2ab50b5, 0xad1dcdf,
+          0xf33cd31, 0xf33c136, 0x10aeff5, 0x0d6226b, 0xf2f8fc5, 0xf7ff493,
+          0xdf57165, 0x7e520d4, 0x05271a7, 0x41fbae5 },
+        { 0x76480ba, 0x72c8987, 0x25f4523, 0x2608359, 0x49f5f01, 0xed36b8d,
+          0xf3d49eb, 0x3bc1dce, 0x4940322, 0x30c1c1a, 0x7e0f731, 0x78c1cda,
+          0x6d05a31, 0x51f2dc8, 0x07f3522, 0x57b0aa8 }
+    },
+    {
+        { 0x71f88bc, 0x7ab628e, 0x8018f21, 0xcf585f3, 0x13d64f6, 0xdbbe3a4,
+          0xec493a5, 0x0f86df1, 0x7725de9, 0x8355e6c, 0xe00fe1e, 0x3954ffe,
+          0x9924e32, 0xbb8978f, 0x7812714, 0x1c19298 },
+        { 0xaabca8b, 0x7c4ce3e, 0x9bf7019, 0xf861eb5, 0x682e541, 0x31a84fc,
+          0xacd1b92, 0x2307ca9, 0x4bf2842, 0x6f8b6ce, 0xcb9f9a9, 0xde252ac,
+          0x93c46d1, 0x7f0611d, 0x751dc98, 0x8e2bd80 }
+    },
+    {
+        { 0xe27d54b, 0xf2fd8fb, 0xc248071, 0x2a1e37e, 0xab8f49a, 0x2fcc888,
+          0xc18a9e5, 0x42c62a3, 0x70b2446, 0xe302908, 0xc5ac55d, 0x90277fa,
+          0xd6dde41, 0x8d97d56, 0x5db04fe, 0xf4cf8a9 },
+        { 0xd30d077, 0x3e280f5, 0x3cb3293, 0x2c90307, 0x24eb0dd, 0xe0be2ac,
+          0x8bcb4f0, 0xa2d1a49, 0xcd0cd45, 0x16db466, 0x9a80232, 0x3b28aa7,
+          0x17b008e, 0xdd7e52f, 0x868e4da, 0x20685f2 }
+    },
+    {
+        { 0x7c7a486, 0x0a68c14, 0xc429633, 0xd8ef234, 0xffe7506, 0x470667b,
+          0x8828d51, 0x55a13c8, 0x2e44bef, 0x5f32741, 0x5929f92, 0x537d92a,
+          0x31c5cd5, 0x0a01d5b, 0x67eb3d7, 0xb77aa78 },
+        { 0x8b82e4d, 0x36ec45f, 0xb37b199, 0x6821da0, 0xd7fa94e, 0x8af37aa,
+          0x1085010, 0xf020642, 0x7e56851, 0x9b88678, 0x52948ce, 0x35f3944,
+          0xafc1361, 0x125c2ba, 0x453e332, 0x8a57d0e }
+    },
+    {
+        { 0x8043664, 0xefe9948, 0xdb1aa55, 0xb8b8509, 0x332523f, 0x1a2e5a9,
+          0x1045c0f, 0x5e255dd, 0x7ae7180, 0xe68dd8a, 0x45bf532, 0x55f1cf3,
+          0xe63a716, 0xe00722e, 0x6116bac, 0xd1c2138 },
+        { 0x1c6d1f4, 0x626221f, 0x3773278, 0x240b830, 0x88def16, 0xe393a0d,
+          0xca0495c, 0x229266e, 0xd3e4608, 0x7b5c6c9, 0x7927190, 0xdc559cb,
+          0xc7b3c57, 0x06afe42, 0xb439c9b, 0x8a2ad0b }
+    },
+    {
+        { 0xffc3e2f, 0xd7360fb, 0xfbd2e95, 0xf721317, 0x5748e69, 0x8cacbab,
+          0x9054bb9, 0x7c89f27, 0xaa86881, 0xcbe50fa, 0x75206e4, 0x7aa05d3,
+          0xc752c66, 0x1ea01bc, 0x1f2c2bc, 0x5968cde },
+        { 0x09a853e, 0x487c55f, 0xe09204b, 0x82cbef1, 0xabd8670, 0xad5c492,
+          0xf12dcb3, 0x7175963, 0xbf6aa06, 0x7a85762, 0xf8d5237, 0x02e5697,
+          0x37c6157, 0xccf7d19, 0xc2fd59c, 0x3b14ca6 }
+    },
+    {
+        { 0x1b9f77f, 0x5e610d8, 0x051b02f, 0x85876d0, 0xb8020dd, 0x5d81c63,
+          0xd6ce614, 0xd0b4116, 0xaa8bf0c, 0x91810e5, 0xcbf8c66, 0xf27f91f,
+          0x38480ae, 0x2e5dc5f, 0xbec7633, 0x0a13ffe },
+        { 0x2bf6af8, 0x61ff649, 0x641f827, 0xe6aef2d, 0x5de5f04, 0xad5708a,
+          0xcdfee20, 0xe5c3a80, 0x68fcfa2, 0x88466e2, 0xd6e1d7b, 0x8e5bb3a,
+          0xed236b8, 0xa514f06, 0xa5f5274, 0x51c9c7b }
+    },
+    {
+        { 0xf9bc3d8, 0xa19d228, 0x3381069, 0xf89c3f0, 0x5c3f379, 0xfee890e,
+          0x32fb857, 0x3d3ef3d, 0x5b418dd, 0x3998849, 0xc46e89a, 0x6786f73,
+          0x9e0f12f, 0x79691a5, 0x3bc022b, 0x76916bf },
+        { 0x2cd8a0a, 0xea073b6, 0x102fdbc, 0x1fbedd4, 0xcb9d015, 0x1888b14,
+          0x76655f7, 0x98f2cfd, 0x59f0494, 0xb9b5910, 0xe6986a3, 0xa3dbbe1,
+          0xeaf2b04, 0xef016a5, 0xcd2d876, 0xf671ba7 }
+    },
+},
+{
+    {
+        { 0x1ae05e9, 0x1dae3bf, 0x1f21fef, 0x6a02996, 0x7aec3c6, 0x95df2b9,
+          0xd83189b, 0x9abbc5a, 0x2d13140, 0xaf994af, 0x86aa406, 0xc3f8846,
+          0x75284c5, 0xcd77e50, 0x2a9a4d7, 0x1c1e13d },
+        { 0x744b89d, 0x7f8815d, 0x2ba673e, 0xb189133, 0xd594570, 0x55ea93c,
+          0xd61b041, 0x19c8a18, 0x8d2c580, 0x938ebaa, 0x05ba078, 0x9b4344d,
+          0x8eaf9b7, 0x622da43, 0x9fea368, 0x809b807 }
+    },
+    {
+        { 0xc33b7a2, 0x3780e51, 0x387b1c8, 0xd7a205c, 0x4be60e4, 0x79515f8,
+          0x1e18277, 0xde02a8b, 0xf0d9150, 0x4645c96, 0xe0b3fd1, 0x45f8acb,
+          0x9b53ac3, 0x5d532ba, 0xb0557c9, 0x7984dcd },
+        { 0x8a92f01, 0x5ae5ca6, 0x9d569ca, 0xd2fbb3c, 0x0c297c1, 0x668cc57,
+          0x6295e89, 0xa482943, 0xa33ad40, 0xf646bc1, 0xc3f425d, 0x066aaa4,
+          0xd005de2, 0x23434cd, 0xdb35af4, 0x5aca9e9 }
+    },
+    {
+        { 0x6877c56, 0x2bca35c, 0xf0ddd7d, 0xab864b4, 0x404f46c, 0x5f6aa74,
+          0x539c279, 0x72be164, 0xe0283cf, 0x1b1d73e, 0xad583d9, 0xe550f46,
+          0xe739ad1, 0x4ac6518, 0x8d42100, 0x6b6def7 },
+        { 0xfa8468d, 0x4d36b8c, 0x5a3d7b8, 0x2cb3773, 0x5016281, 0x577f86f,
+          0x9124733, 0xdb6fe5f, 0xe29e039, 0xacb6d2a, 0x580b8a1, 0x2ab8330,
+          0x643b2d0, 0x130a4ac, 0x5e6884e, 0xa7996e3 }
+    },
+    {
+        { 0x60a0aa8, 0x6fb6277, 0xcbe04f0, 0xe046843, 0xe6ad443, 0xc01d120,
+          0xabef2fc, 0xa42a05c, 0x12ff09c, 0x6b793f1, 0xa3e5854, 0x5734ea8,
+          0x775f0ad, 0xe482b36, 0xf864a34, 0x2f4f60d },
+        { 0x84f2449, 0xf521c58, 0x9186a71, 0x58734a9, 0xac5eacc, 0x157f5d5,
+          0x248ee61, 0x858d9a4, 0x48149c3, 0x0727e6d, 0xac9ec50, 0xd5c3eaa,
+          0x20ee9b5, 0xa63a64a, 0x87be9de, 0x3f0dfc4 }
+    },
+    {
+        { 0xb13e3f4, 0x836349d, 0x3e9316d, 0xebdd026, 0x324fd6c, 0x3fd61e8,
+          0x0964f41, 0x85dddfa, 0x52add1b, 0x06e72de, 0x8c4a9e2, 0xb752cff,
+          0xfdf09f7, 0x53b0894, 0x0bc24fd, 0xd5220ab },
+        { 0xfb1981a, 0x8442b35, 0x3edd701, 0xa733a37, 0xd0ef089, 0x42b60c3,
+          0x46e7bca, 0xa1b16ec, 0xa09aaf4, 0xc0df179, 0x638f3a1, 0xcd4f187,
+          0x9eab1c2, 0x9af64f7, 0xd1d78e3, 0x86fed79 }
+    },
+    {
+        { 0xfe29980, 0x42c8d86, 0x6575660, 0x6657b81, 0x80f92ca, 0x82d52c6,
+          0x02d42be, 0x8587af1, 0x6e8bdf0, 0xb515131, 0xc333495, 0x706e2d9,
+          0x9673064, 0xd53601a, 0x8219099, 0x27b1fbb },
+        { 0x705f7c8, 0x3f0929d, 0xf3d6e6f, 0xff40b10, 0x026af5c, 0x673c703,
+          0xe25a422, 0x2c1dce4, 0x3dad8b6, 0x5348bd7, 0xbe2c329, 0xc39b6b6,
+          0xb921084, 0x47854ff, 0xb391f20, 0xb347b8b }
+    },
+    {
+        { 0xeb9b774, 0x79fc841, 0xb4b6c1d, 0xf32da25, 0xfe492cb, 0xcbba76b,
+          0xd623903, 0x76c51fc, 0xcf0705a, 0x114cf6f, 0x7815daf, 0x6b72049,
+          0x473382e, 0x630b362, 0x9704db5, 0xbf40c3a },
+        { 0xc5456eb, 0xa8a9ddc, 0x72f2dc1, 0x2b4472a, 0xd6d6ef3, 0x9874444,
+          0xa0ba5ed, 0x27e8d85, 0x194849f, 0x5d225b4, 0xebaa40d, 0xe852cd6,
+          0x8d4bf3f, 0xb669c24, 0x2343991, 0xa8601eb }
+    },
+    {
+        { 0x59502d3, 0x8a04854, 0xe269a7b, 0xcab27ee, 0x4875ada, 0x4179307,
+          0xe2405f9, 0x179e685, 0x7b28963, 0x0d7b698, 0x422a43e, 0x80c9db8,
+          0xa0f43ee, 0xf5ff318, 0x4ba7aa7, 0x7a92805 },
+        { 0x0c0834e, 0xa5c79fe, 0x1f849ec, 0x837ca0d, 0x628ab7b, 0xfe0d7fa,
+          0x6edd19a, 0x94bcb95, 0x2226fbf, 0xa18bc93, 0xaad54a3, 0x2795379,
+          0x371129e, 0xceeacf8, 0xa588be5, 0x65ca57f }
+    },
+},
+{
+    {
+        { 0x2caa330, 0x7a578b5, 0xd8ca34a, 0x7c21944, 0x6447282, 0x6c0fbbb,
+          0xf90b2e5, 0xa8a9957, 0x6586b71, 0xbbe1066, 0x49138a2, 0x716a902,
+          0xe7ed66d, 0x2fa6034, 0x2b9916a, 0x56f77ed },
+        { 0xbddefb3, 0x69f1e26, 0x8c08420, 0xa497809, 0x09bc184, 0xc3377eb,
+          0xbe6dade, 0x796ce0c, 0xd103bbb, 0x3be0625, 0x992685c, 0x01be27c,
+          0x7755f9f, 0xc0e2559, 0x1c0dbfa, 0x165c40d }
+    },
+    {
+        { 0x659c761, 0xc63a397, 0x630fbad, 0x10a0e5b, 0x655ac56, 0xf21e8a6,
+          0xc1181e2, 0xe8580fa, 0x0a84b5c, 0xbfc2d9c, 0x7afd5d1, 0x2cdbaff,
+          0xf61e85a, 0x95f1182, 0x719eaf4, 0x1173e96 },
+        { 0xc6de8b9, 0xc06d55e, 0xafcbcaa, 0x1b4c8eb, 0xbc2bbcd, 0x52af5cb,
+          0x77bcd10, 0x564fab8, 0xae85a6e, 0xfd53a18, 0x94c712f, 0x2257859,
+          0x1352121, 0x29b11d7, 0xc40491a, 0xab1cb76 }
+    },
+    {
+        { 0xce32eb4, 0xb4e8ca8, 0xb250b49, 0x7e484ac, 0xa3e31a2, 0x062c6f7,
+          0x625d1fc, 0x497fd83, 0x362dda7, 0x98f821c, 0x6be3111, 0xcae1f8f,
+          0x5d4fa42, 0x9077e95, 0xa65855a, 0xa589971 },
+        { 0x28832a9, 0xda6321d, 0x3936e9e, 0xf9ef5dc, 0xc9797ef, 0xa37f117,
+          0xdb581be, 0x0eb3c80, 0xbaa0002, 0x207c5c4, 0xf38faa0, 0xc0401b5,
+          0xd0f1e6e, 0xceee523, 0xd1f0045, 0x8d27a5f }
+    },
+    {
+        { 0xcf0af29, 0x9411063, 0x89a6693, 0x3043857, 0x640145e, 0x9a9fb8f,
+          0x54832eb, 0x7d82fe9, 0x898c520, 0xf2789e1, 0xf948dc0, 0x448b402,
+          0x68996dd, 0xeca8fdf, 0xa149b2f, 0x22227e9 },
+        { 0x8e62d6a, 0x63509ff, 0x8c9c57f, 0xe98d81c, 0x1fe3bed, 0xd387407,
+          0x539538f, 0xf1db013, 0x48418ce, 0xb04092e, 0xd6d9d4d, 0xbbf8e76,
+          0x2cec5ae, 0x2ea9cda, 0x5078fa9, 0x8414b3e }
+    },
+    {
+        { 0xd68a073, 0x5ad1cdb, 0xc18b591, 0xd4cedaf, 0x8e4c1c9, 0x7826707,
+          0x9ca302a, 0x9b8d920, 0x326115b, 0x3101bd2, 0x4c2717a, 0x6f154b5,
+          0x263e84b, 0x618c31b, 0xbbd6942, 0x12c4138 },
+        { 0x80da426, 0xf9ead25, 0x47d9680, 0xe748e99, 0x8a4210e, 0x9b396a3,
+          0xf4b8f72, 0xfaf03dd, 0x66159e7, 0xbd94a52, 0x1d4c7cb, 0x5e73049,
+          0x7910f38, 0x31d1f9a, 0x08d6dd1, 0x4fd10ca }
+    },
+    {
+        { 0x9f2331e, 0x4f510ac, 0x7e3dcc2, 0xee872dc, 0xa0a0c73, 0x4a11a32,
+          0xaa5a630, 0x27e5803, 0x7af4a8a, 0xe5ae503, 0x9fffeb0, 0x2dcdeba,
+          0x719d91f, 0x8c27748, 0xb9cc61c, 0xd3b5b62 },
+        { 0xcca7939, 0x998ac90, 0x64514e5, 0xc22b598, 0xb35738a, 0x950aaa1,
+          0xdab0264, 0x4b208bb, 0xa557d2e, 0x6677931, 0xf7c17d3, 0x2c696d8,
+          0x3e15c51, 0x1672d4a, 0x3db0e82, 0x95fab66 }
+    },
+    {
+        { 0x6ff205e, 0x3d42734, 0x0ea9fbe, 0x7f187d9, 0x466b2af, 0xbd9367f,
+          0x03daf2f, 0x188e532, 0x27b54d8, 0xefe1329, 0xef70435, 0x14faf85,
+          0x1ec95c4, 0xa506128, 0xc22cba7, 0xad01705 },
+        { 0x6197333, 0x7d2dfa6, 0x8b4f6ed, 0xedd7f07, 0x75df105, 0xe0cb685,
+          0x80f76bc, 0x47c9ddb, 0x9073c54, 0x49ab531, 0xe607f44, 0x845255a,
+          0xcc74b7c, 0x0b4ed9f, 0x0f5c3a6, 0xcfb52d5 }
+    },
+    {
+        { 0xc278776, 0x545c7c6, 0x98c30f0, 0x92a39ae, 0xd2f4680, 0x8aa8c01,
+          0x6b7f840, 0xa5409ed, 0xdcb24e7, 0x0c450ac, 0xc5770d9, 0x5da6fb2,
+          0x8658333, 0x5b8e8be, 0x67ea4ad, 0xb26bf4a },
+        { 0xc7d91fa, 0x2e30c81, 0x0eeb69f, 0x6e50a49, 0xee4bc26, 0x9458c2b,
+          0x33be250, 0x419acf2, 0x87881ab, 0x79d6f81, 0x403b1be, 0x694565d,
+          0x234fe1d, 0x34b3990, 0x2132b38, 0x60997d7 }
+    },
+},
+{
+    {
+        { 0x26975dc, 0x00a9741, 0x6cf94e7, 0x42161c4, 0xc64ed99, 0xcc9fe4b,
+          0x4680570, 0x020019a, 0x698da0d, 0x885595a, 0x77dd962, 0x008444b,
+          0xa4fea0e, 0xbf3c22d, 0x2c81245, 0xc463048 },
+        { 0x793ab18, 0xcb248c5, 0xeb4320b, 0x4dc7a20, 0x1572b7d, 0x9a0906f,
+          0xf9ac20f, 0xd5b3019, 0x34520a3, 0x79b1bf5, 0x69b5322, 0x788dfe8,
+          0x455b7e2, 0x9a05298, 0x016bca9, 0x2f4aecb }
+    },
+    {
+        { 0x8745618, 0x414d379, 0xb7c983c, 0x64ba22e, 0x9f9d532, 0x9a5d19f,
+          0x44a80c8, 0x81a00d8, 0xcae98d6, 0xb9e24f5, 0xaca965a, 0x6c3769c,
+          0xf6e4e6d, 0x50d6081, 0x54422a6, 0x0d96980 },
+        { 0x5cdd790, 0xbd7e792, 0x6a35219, 0xcff65da, 0x8b60ebe, 0x40dc363,
+          0x92a50dc, 0x84bee74, 0x15ad65e, 0x57d4be4, 0x1a6d1d3, 0xc54256b,
+          0x45717cc, 0x141c649, 0xcd1c736, 0x05eb609 }
+    },
+    {
+        { 0x1e3c7ec, 0xfd52eab, 0x9f24895, 0xa4a5eca, 0x79fdb83, 0xaaa2a8d,
+          0x72bdfda, 0xd105e60, 0x681d97e, 0x59e6ae2, 0x8e8077f, 0xfedf8e0,
+          0x629e462, 0xb06d0ad, 0x96fa863, 0x8c7c2d0 },
+        { 0xee8fc91, 0x5eecc4c, 0x9e61174, 0x5e83ab2, 0xb28c02d, 0x1fd8925,
+          0x2072864, 0x93be538, 0x24c984e, 0xda0c886, 0xa008286, 0xdcf9f0c,
+          0xa58ba75, 0x1ecb5a6, 0xc2e3c83, 0x1d9b890 }
+    },
+    {
+        { 0xeeee062, 0x19e866e, 0x4f7b387, 0x31c1c7f, 0x1c06652, 0x9be6018,
+          0x2b68bbb, 0xc00a93a, 0x9d52b2b, 0x54c65d6, 0xe8b744a, 0x4591416,
+          0x9a64ab6, 0x641bcca, 0xab08098, 0xf22bcb1 },
+        { 0xf1f726c, 0x3c0db8f, 0x9d2e6a6, 0x4f5739e, 0x45c9530, 0x5cb669b,
+          0x7b472d0, 0x861b04e, 0x894da77, 0x3e30515, 0xc9ac39b, 0x3344685,
+          0x73bdd29, 0x9e17305, 0x808dc85, 0x9cac12c }
+    },
+    {
+        { 0x5e27087, 0xf152b86, 0x90a580e, 0x267bd85, 0x8baafc1, 0xba79cec,
+          0x9442686, 0x6140ab1, 0x5b31693, 0xa67090c, 0x28b4117, 0x50a103a,
+          0x0ddc08f, 0x7722e61, 0xe6569b2, 0x5d19d43 },
+        { 0x5962bf6, 0x70e0c52, 0xfb5fb02, 0x808e316, 0x5b667be, 0x3fb80da,
+          0xfcfacec, 0x8aa366e, 0x134280e, 0xcb0b3e7, 0xcd7d944, 0x0bf1de4,
+          0xd092df5, 0x0cd23be, 0xa153a0c, 0xc9a6a79 }
+    },
+    {
+        { 0x2d5a4b7, 0x1c69ad0, 0xd9e6f4a, 0x4bb28d0, 0xa984fc6, 0x815308c,
+          0x9037ca5, 0x40929c7, 0x1bd0357, 0x0ea2b49, 0x42aad4e, 0xec17e5b,
+          0x18e7235, 0x1f32ade, 0xa96a9d3, 0xbc60b05 },
+        { 0xe20f707, 0x3b0229a, 0x56bdfad, 0xd635050, 0xd8b2e1e, 0xac2d922,
+          0x235c748, 0x92b2998, 0xd766f97, 0x6002c3a, 0x1a2a862, 0x9919800,
+          0xb58b684, 0x2af7567, 0xaaafce5, 0xd8fe707 }
+    },
+    {
+        { 0x5df7a4b, 0x54487ab, 0xc57ccc2, 0x51cccde, 0x7510b53, 0x2394327,
+          0xf555de3, 0x3a09f02, 0x1be484d, 0xa696aec, 0x37817a2, 0x56f459f,
+          0x623dcb4, 0x8d8f61c, 0x5335656, 0xc52223c },
+        { 0xb49914a, 0xf634111, 0x8e4f9bb, 0xbf8e1ab, 0xf4dba02, 0x2f59578,
+          0xe004319, 0x2a94199, 0x654d005, 0x87931f0, 0x6fa0814, 0x7df57d9,
+          0xa154031, 0xc8da316, 0x41f658b, 0x2a44ac0 }
+    },
+    {
+        { 0x9e34ac6, 0xfb5f4f8, 0x97790f2, 0x0a1b10b, 0x4b8a06c, 0x58fe4e7,
+          0x955f27c, 0x10c1710, 0xd5ebe19, 0x77b798a, 0x1f1c2dc, 0xaf1c35b,
+          0xa1f8d69, 0xc25b8e6, 0xf76bf23, 0x49cf751 },
+        { 0x436f7b7, 0x15cb2db, 0x7e74d1a, 0x186d7c2, 0xc00a415, 0x60731de,
+          0x15f0772, 0xea1e156, 0x714463f, 0xf02d591, 0x51adeb1, 0x26a0c64,
+          0xcc5229e, 0x20174cd, 0xefd512a, 0xb817e50 }
+    },
+},
+};
+
+static const ge448_precomp base_i[16] = {
+    {
+        { 0x70cc05e, 0x26a82bc, 0x0938e26, 0x80e18b0, 0x511433b, 0xf72ab66,
+          0x412ae1a, 0xa3d3a46, 0xa6de324, 0x0f1767e, 0x4657047, 0x36da9e1,
+          0x5a622bf, 0xed221d1, 0x66bed0d, 0x4f1970c },
+        { 0x230fa14, 0x08795bf, 0x7c8ad98, 0x132c4ed, 0x9c4fdbd, 0x1ce67c3,
+          0x73ad3ff, 0x05a0c2d, 0x7789c1e, 0xa398408, 0xa73736c, 0xc7624be,
+          0x03756c9, 0x2488762, 0x16eb6bc, 0x693f467 }
+    },
+    {
+        { 0x6ff2f8f, 0x2817328, 0xda85757, 0xb769465, 0xfd6e862, 0xf7f6271,
+          0x8daa9cb, 0x4a3fcfe, 0x2ba077a, 0xda82c7e, 0x41b8b8c, 0x9433322,
+          0x4316cb6, 0x6455bd6, 0xb9108af, 0x0865886 },
+        { 0x88ed6fc, 0x22ac135, 0x02dafb8, 0x9a68fed, 0x7f0bffa, 0x1bdb676,
+          0x8bb3a33, 0xec4e1d5, 0xce43c82, 0x56c3b9f, 0xa8d9523, 0xa6449a4,
+          0xa7ad43a, 0xf706cbd, 0xbd5125c, 0xe005a8d }
+    },
+    {
+        { 0x2030034, 0xa99d109, 0x6f950d0, 0x2d8cefc, 0xc96f07b, 0x7a920c3,
+          0x08bc0d5, 0x9588128, 0x6d761e8, 0x62ada75, 0xbcf7285, 0x0def80c,
+          0x01eedb5, 0x0e2ba76, 0x5a48dcb, 0x7a9f933 },
+        { 0x2f435eb, 0xb473147, 0xf225443, 0x5512881, 0x33c5840, 0xee59d2b,
+          0x127d7a4, 0xb698017, 0x86551f7, 0xb18fced, 0xca1823a, 0x0ade260,
+          0xce4fd58, 0xd3b9109, 0xa2517ed, 0xadfd751 }
+    },
+    {
+        { 0xeb5eaf7, 0xdf9567c, 0x78ac7d7, 0x110a6b4, 0x4706e0b, 0x2d33501,
+          0x0b5a209, 0x0df9c7b, 0x568e684, 0xba4223d, 0x8c3719b, 0xd78af2d,
+          0xa5291b6, 0x77467b9, 0x5c89bef, 0x079748e },
+        { 0xdac377f, 0xe20d3fa, 0x72b5c09, 0x34e8669, 0xc40bbb7, 0xd8687a3,
+          0xd2f84c9, 0x7b3946f, 0xa78f50e, 0xd00e40c, 0x17e7179, 0xb875944,
+          0xcb23583, 0x9c7373b, 0xc90fd69, 0x7ddeda3 }
+    },
+    {
+        { 0x6ab686b, 0x3d0def7, 0x49f7c79, 0x1a467ec, 0xc8989ed, 0x3e53f4f,
+          0x430a0d9, 0x101e344, 0x8ad44ee, 0xa3ae731, 0xae1d134, 0xaefa6cd,
+          0x824ad4d, 0xaa8cd7d, 0xed584fc, 0xef1650c },
+        { 0x4f4754f, 0xa74df67, 0xef3fb8b, 0xf52cea8, 0x2971140, 0x47c32d4,
+          0xa256fbb, 0x391c15d, 0xa605671, 0xc165fab, 0x87993b9, 0xf2518c6,
+          0xbd5a84d, 0x2daf7ac, 0x98f12ae, 0x1560b62 }
+    },
+    {
+        { 0x54dc10a, 0xef4da02, 0x5940db8, 0x6311865, 0x82f2948, 0xe20b149,
+          0x5581dba, 0x67b9377, 0x04f5029, 0x422ee71, 0x5122d34, 0x5d440db,
+          0x1a4c640, 0xb1e56d7, 0xc2408ee, 0xbf12abb },
+        { 0x016af01, 0x0cc9f86, 0xf3d8cab, 0x88366ab, 0xa2efe12, 0x85dda13,
+          0x5d00674, 0x390df60, 0x6d187f7, 0xf18f580, 0xf0c5d20, 0x28c900f,
+          0x3e01733, 0xad30812, 0x54bf2fd, 0x42d35b5 }
+    },
+    {
+        { 0x2ffb1f1, 0x009135f, 0x8f9c605, 0x099fc7e, 0x26bfa5a, 0xcc67da6,
+          0x344552b, 0xc186d12, 0x1b339e1, 0xb523250, 0xc9708c5, 0x70a544f,
+          0x1e928e7, 0x06baaec, 0xef0f50f, 0x0baedd2 },
+        { 0xbf479e5, 0x535d6d8, 0xe4ec3e9, 0x156e536, 0xddb9be2, 0x3165741,
+          0x59fd736, 0x988af71, 0x2e33ddd, 0x13d8a78, 0x4e69002, 0x5460421,
+          0x804a268, 0x34d56e0, 0x0e52a4c, 0xc59b84f }
+    },
+    {
+        { 0x24729d9, 0x525d45f, 0x8712327, 0x5768aba, 0x43035db, 0xa25e43b,
+          0x927ef21, 0x15a1ee8, 0x6056112, 0xa785d21, 0xd508af9, 0x45e2fbf,
+          0x37ba969, 0xb6f721a, 0x216d8d3, 0x30d6d8c },
+        { 0x52074c3, 0x3065e08, 0x2a0684e, 0xfa40b4a, 0x763f955, 0x851325a,
+          0x9f25900, 0xd4ef19c, 0xf665756, 0x799c869, 0x3312990, 0x7b05222,
+          0x28db802, 0xc986c2b, 0x28ade0a, 0xf48fb8f }
+    },
+    {
+        { 0x1649b68, 0x1e46173, 0x5beb9dc, 0xa96e5d6, 0x481935d, 0x765ddff,
+          0x9f3bf2a, 0x6cf132c, 0x7c35658, 0x9f6c5c9, 0x4696e60, 0x99cd139,
+          0x9c0d5e4, 0x99fa924, 0x8845a95, 0x1acd063 },
+        { 0x3636087, 0x0b06541, 0xea17b7f, 0xea20e78, 0x6161967, 0x20afc5f,
+          0xdc81028, 0xfd6c8a2, 0xe32c8fd, 0x4ef1357, 0x00e4a88, 0x8aa4004,
+          0x48cb82f, 0xd6fcaef, 0xb3cd4fa, 0x7ba7c6d }
+    },
+    {
+        { 0xd19c7ab, 0xf843473, 0xc655c4d, 0x968e76d, 0xc4b9c2f, 0x52c87d9,
+          0xe4aa082, 0x65f641a, 0x33c3603, 0x491a397, 0x5810098, 0xa606ffe,
+          0x8bf8ad4, 0x09920e6, 0x6db7882, 0x691a0c8 },
+        { 0xa4d3ef5, 0x5205883, 0xacf2efe, 0xee839b7, 0xc00ca66, 0x4b78e2a,
+          0xf9fcb91, 0xbe3f071, 0xbf6943a, 0x61e66c9, 0x061b79d, 0xe9b4e57,
+          0x56c06bd, 0x8d1b01b, 0xdf76ae5, 0x0dfa315 }
+    },
+    {
+        { 0xf1fd093, 0x803df65, 0x489b77e, 0x1cd6523, 0xc20e295, 0x2cd2e15,
+          0x9b912d1, 0xcd490be, 0x2e886d2, 0xdd9a2ff, 0xfe9d72a, 0xa3c836d,
+          0x298e0c1, 0xfcad5f2, 0x4bcf067, 0xed126e2 },
+        { 0x3dc81bc, 0x1e33953, 0xece6a08, 0xbea4d76, 0x991b252, 0x1d15de3,
+          0xe6daf97, 0x74cc5cf, 0x0826493, 0x5ad343f, 0x1064049, 0x2d38a47,
+          0xffcfa4d, 0xf7f47b9, 0x418066c, 0xef14490 }
+    },
+    {
+        { 0x9bb55ab, 0x4e7f86b, 0x3f496a3, 0x310d785, 0x0dec42c, 0xbd682fc,
+          0x411d32a, 0xbde047a, 0xc5a5ea2, 0xea639b4, 0xba08fa1, 0x5052078,
+          0x07729f2, 0xc968b23, 0x23d3e28, 0x567b5a6 },
+        { 0x977fbf7, 0x171e825, 0xbe990aa, 0x0319c70, 0xe12cd69, 0x8f65023,
+          0xf5015e6, 0x1fb9b19, 0x3568a7c, 0x0083f60, 0x1f3c5ac, 0xba3d30b,
+          0x3d7a988, 0xe7b509d, 0xcd0f6b6, 0x2318b99 }
+    },
+    {
+        { 0x93ab2cf, 0x54d3b87, 0xd2d8306, 0x366abea, 0xd7a4977, 0x66e8eb6,
+          0xae0072e, 0xa61888c, 0xdbc3315, 0x9eeeef5, 0x163e7f5, 0x93f09db,
+          0x59ade9a, 0xee90959, 0xce59be0, 0xaf7f578 },
+        { 0x5ece59e, 0x24bfd8d, 0x3689523, 0x8aa698b, 0x2de92cf, 0xa9a65de,
+          0xa6ad300, 0xec11dbc, 0x09f88ca, 0x217f3fa, 0xb4d6af7, 0xf6c33e3,
+          0x1d86d2d, 0xcd3bfa2, 0x5f13f25, 0x1497f83 }
+    },
+    {
+        { 0xcd03d1d, 0xa579568, 0xe158af6, 0xd717cda, 0x389a19f, 0x59eda97,
+          0x099e99c, 0xb32c370, 0xdabb591, 0xa2dba91, 0x77c2c97, 0x6d697d5,
+          0xd43fa6d, 0x5423fc2, 0x0b382bf, 0x56ea8a5 },
+        { 0xd80c11a, 0x4a987ba, 0x7d590a5, 0xe4cde21, 0xf97e559, 0x3dd8860,
+          0x43b593c, 0xff45e25, 0x5343cb5, 0x00eb453, 0x7bbfbdd, 0x06b9b99,
+          0x16aea24, 0x4da36b7, 0x57a624e, 0x2476517 }
+    },
+    {
+        { 0x3474e0d, 0x32207d0, 0xb41cc73, 0x3ffbf04, 0x319eb39, 0x5c4dc45,
+          0x758b463, 0xfee29be, 0xc30c7a7, 0xcc8a381, 0x9fe0e53, 0x147f4e4,
+          0xe35a2de, 0x05b2e26, 0x92f3666, 0x4362f02 },
+        { 0x8474b85, 0x0476d0c, 0xccaf108, 0x9d8c65f, 0x1d54b6a, 0xf58d404,
+          0xf38e4b0, 0x3ee6862, 0x3b44f54, 0x7c7c9d5, 0x0fb0db5, 0x36a3fd8,
+          0x18a8ac8, 0xfcd94ba, 0x8f35c05, 0xc1b1d56 }
+    },
+    {
+        { 0x1bdd30d, 0x16539fc, 0x8df4afb, 0x1356e53, 0x5a1aedb, 0xc0545d8,
+          0x489396b, 0xeb2037a, 0x5660894, 0x897fcbd, 0xb7d104a, 0x02a58a9,
+          0xc96b980, 0x57fa24c, 0x5bd8946, 0xf6448e3 },
+        { 0x8805c83, 0xee72741, 0x992cfc6, 0x10fa274, 0x9e66b21, 0x9514193,
+          0xbd08009, 0xe0ffa44, 0x20da22b, 0x1743322, 0x59e6831, 0x4891ff3,
+          0xa7d687b, 0x407ed73, 0x51d99cf, 0x2fb4e07 }
+    },
+};
+#endif
+
+/* Set the 0 point.
+ *
+ * p  [in]  Point to set to 0.
+ */
+static WC_INLINE void ge448_0(ge448_p2 *p)
+{
+    fe448_0(p->X);
+    fe448_1(p->Y);
+    fe448_1(p->Z);
+}
+
+/* Set the precompute point to 0.
+ *
+ * p  [in]  Precompute point to set.
+ */
+static void ge448_precomp_0(ge448_precomp *p)
+{
+    fe448_0(p->x);
+    fe448_1(p->y);
+}
+
+/* Double the point on the Twisted Edwards curve. r = 2.p
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to double.
+ */
+static WC_INLINE void ge448_dbl(ge448_p2 *r,const ge448_p2 *p)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+
+    fe448_add(t0, p->X, p->Y);    /* t0 = B1 = X1+Y1     */
+    fe448_reduce(t0);
+    fe448_sqr(t0, t0);            /* t0 = B = (X1+Y1)^2  */
+    fe448_sqr(r->X, p->X);        /* r->X = C = X1^2     */
+    fe448_sqr(r->Y, p->Y);        /* r->Y = D = Y1^2     */
+    fe448_add(t1, r->X, r->Y);    /* t1 = E = C+D        */
+    fe448_reduce(t1);
+    fe448_sub(r->Y, r->X, r->Y);  /* r->Y = Y31 = C-D    */
+    fe448_sqr(r->Z, p->Z);        /* r->Z = H = Z1^2     */
+    fe448_add(r->Z, r->Z, r->Z);  /* r->Z = J1 = 2*H     */
+    fe448_sub(r->Z, t1, r->Z);    /* r->Z = J = E-2*H    */
+    fe448_reduce(r->Z);
+    fe448_sub(r->X, t0, t1);      /* r->X = X31 = B-E    */
+    fe448_mul(r->X, r->X, r->Z);  /* r->X = X3 = (B-E)*J */
+    fe448_mul(r->Y, r->Y, t1);    /* r->Y = Y3 = E*(C-D) */
+    fe448_mul(r->Z, t1, r->Z);    /* r->Z = Z3 = E*J     */
+}
+
+/* Add two point on the Twisted Edwards curve. r = p + q
+ * Second point has z-ordinate of 1.
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to add.
+ * q  [in]  Point to add.
+ */
+static WC_INLINE void ge448_madd(ge448_p2 *r, const ge448_p2 *p,
+                                 const ge448_precomp *q)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+    ge448 t2[GE448_WORDS];
+    ge448 t3[GE448_WORDS];
+    ge448 t4[GE448_WORDS];
+
+                                    /* p->Z = A                   */
+    fe448_mul(t1, p->X, q->x);      /* t1 = C = X1*X2             */
+    fe448_mul(t2, p->Y, q->y);      /* t2 = D = Y1*Y2             */
+    fe448_mul(t3, t1, t2);          /* t3 = E1 = C*D              */
+    fe448_mul39081(t3, t3);         /* t3 = E = d*C*D             */
+    fe448_sqr(t0, p->Z);            /* t0 = B = A^2               */
+    fe448_add(t4, t0, t3);          /* t4 = F = B-(-E)            */
+    fe448_sub(t0, t0, t3);          /* t0 = G = B+(-E)            */
+    fe448_reduce(t0);
+    fe448_add(r->X, p->X, p->Y);    /* r->X = H1 = X1+Y1          */
+    fe448_reduce(r->X);
+    fe448_add(r->Y, q->x, q->y);    /* r->Y = H2 = X2+Y2          */
+    fe448_reduce(r->Y);
+    fe448_mul(r->X, r->X, r->Y);    /* r->X = H = (X1+Y1)*(X2+Y2) */
+    fe448_sub(r->X, r->X, t1);      /* r->X = X31 = H-C           */
+    fe448_sub(r->X, r->X, t2);      /* r->X = X32 = H-C-D         */
+    fe448_reduce(r->X);
+    fe448_mul(r->X, r->X, t4);      /* r->X = X33 = F*(H-C-D)     */
+    fe448_mul(r->X, r->X, p->Z);    /* r->X = X3 = A*F*(H-C-D)    */
+    fe448_sub(r->Y, t2, t1);        /* r->Y = Y31 = D-C           */
+    fe448_reduce(r->Y);
+    fe448_mul(r->Y, r->Y, t0);      /* r->Y = Y32 = G*(D-C)       */
+    fe448_mul(r->Y, r->Y, p->Z);    /* r->Y = Y3 = A*F*(D-C)      */
+    fe448_mul(r->Z, t4, t0);        /* r->Z = Z3 = F*G            */
+}
+
+/* Subtract one point from another on the Twisted Edwards curve. r = p - q
+ * Second point has z-ordinate of 1.
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to subtract from.
+ * q  [in]  Point to subtract.
+ */
+static WC_INLINE void ge448_msub(ge448_p2 *r, const ge448_p2 *p,
+                                 const ge448_precomp *q)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+    ge448 t2[GE448_WORDS];
+    ge448 t3[GE448_WORDS];
+    ge448 t4[GE448_WORDS];
+
+                                    /* p->Z = A                   */
+    fe448_sqr(t0, p->Z);            /* t0 = B = A^2               */
+    fe448_mul(t1, p->X, q->x);      /* t1 = C = X1*X2             */
+    fe448_mul(t2, p->Y, q->y);      /* t2 = D = Y1*Y2             */
+    fe448_mul(t3, t1, t2);          /* t3 = E1 = C*D              */
+    fe448_mul39081(t3, t3);         /* t3 = E = d*C*D             */
+    fe448_sub(t4, t0, t3);          /* t4 = F = B-(--E)           */
+    fe448_add(t0, t0, t3);          /* t0 = G = B+(--E)           */
+    fe448_reduce(t0);
+    fe448_add(r->X, p->X, p->Y);    /* r->X = H1 = X1+Y1          */
+    fe448_reduce(r->X);
+    fe448_sub(r->Y, q->y, q->x);    /* r->Y = H2 = Y2+(-X2)       */
+    fe448_reduce(r->Y);
+    fe448_mul(r->X, r->X, r->Y);    /* r->X = H = (X1+Y1)*(X2+Y2) */
+    fe448_add(r->X, r->X, t1);      /* r->X = X31 = H-(-C)        */
+    fe448_sub(r->X, r->X, t2);      /* r->X = X32 = H-(-C)-D      */
+    fe448_reduce(r->X);
+    fe448_mul(r->X, r->X, t4);      /* r->X = X33 = F*(H-C-D)     */
+    fe448_mul(r->X, r->X, p->Z);    /* r->X = X3 = A*F*(H-C-D)    */
+    fe448_add(r->Y, t2, t1);        /* r->Y = Y31 = D-C           */
+    fe448_reduce(r->Y);
+    fe448_mul(r->Y, r->Y, t0);      /* r->Y = Y32 = G*(D-C)       */
+    fe448_mul(r->Y, r->Y, p->Z);    /* r->Y = Y3 = A*F*(D-C)      */
+    fe448_mul(r->Z, t4, t0);        /* r->Z = Z3 = F*G            */
+}
+
+/* Add two point on the Twisted Edwards curve. r = p + q
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to add.
+ * q  [in]  Point to add.
+ */
+static WC_INLINE void ge448_add(ge448_p2* r, const ge448_p2* p,
+                                const ge448_p2* q)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+    ge448 t2[GE448_WORDS];
+    ge448 t3[GE448_WORDS];
+    ge448 t4[GE448_WORDS];
+
+    fe448_mul(t1, p->X, q->X);      /* t1 = C = X1*X2             */
+    fe448_mul(t2, p->Y, q->Y);      /* t2 = D = Y1*Y2             */
+    fe448_mul(t3, t1, t2);          /* t3 = E1 = C*D              */
+    fe448_mul39081(t3, t3);         /* t3 = E = d*C*D             */
+    fe448_mul(r->Z, p->Z, q->Z);    /* r->Z = A = Z1*Z2           */
+    fe448_sqr(t0, r->Z);            /* t0 = B = A^2               */
+    fe448_add(t4, t0, t3);          /* t4 = F = B-(-E)            */
+    fe448_sub(t0, t0, t3);          /* t0 = G = B+(-E)            */
+    fe448_reduce(t0);
+    fe448_add(r->X, p->X, p->Y);    /* r->X = H1 = X1+Y1          */
+    fe448_reduce(r->X);
+    fe448_add(r->Y, q->X, q->Y);    /* r->Y = H2 = X2+Y2          */
+    fe448_reduce(r->Y);
+    fe448_mul(r->X, r->X, r->Y);    /* r->X = H = (X1+Y1)*(X2+Y2) */
+    fe448_sub(r->X, r->X, t1);      /* r->X = X31 = H-C           */
+    fe448_sub(r->X, r->X, t2);      /* r->X = X32 = H-C-D         */
+    fe448_reduce(r->X);
+    fe448_mul(r->X, r->X, t4);      /* r->X = X33 = F*(H-C-D)     */
+    fe448_mul(r->X, r->X, r->Z);    /* r->X = X3 = A*F*(H-C-D)    */
+    fe448_sub(r->Y, t2, t1);        /* r->Y = Y31 = D-C           */
+    fe448_reduce(r->Y);
+    fe448_mul(r->Y, r->Y, t0);      /* r->Y = Y32 = G*(D-C)       */
+    fe448_mul(r->Y, r->Y, r->Z);    /* r->Y = Y3 = A*F*(D-C)      */
+    fe448_mul(r->Z, t4, t0);        /* r->Z = Z3 = F*G            */
+}
+
+/* Subtract one point from another on the Twisted Edwards curve. r = p - q
+ *
+ * r  [in]  Point to hold result.
+ * p  [in]  Point to subtract from.
+ * q  [in]  Point to subtract.
+ */
+static WC_INLINE void ge448_sub(ge448_p2 *r, const ge448_p2 *p,
+                                const ge448_p2 *q)
+{
+    ge448 t0[GE448_WORDS];
+    ge448 t1[GE448_WORDS];
+    ge448 t2[GE448_WORDS];
+    ge448 t3[GE448_WORDS];
+    ge448 t4[GE448_WORDS];
+
+    fe448_mul(t1, p->X, q->X);      /* t1 = C = X1*X2             */
+    fe448_mul(t2, p->Y, q->Y);      /* t2 = D = Y1*Y2             */
+    fe448_mul(t3, t1, t2);          /* t3 = E1 = C*D              */
+    fe448_mul39081(t3, t3);         /* t3 = E = d*C*D             */
+    fe448_mul(r->Z, p->Z, q->Z);    /* r->Z = A = Z1*Z2           */
+    fe448_sqr(t0, p->Z);            /* t0 = B = A^2               */
+    fe448_sub(t4, t0, t3);          /* t4 = F = B-(--E)           */
+    fe448_add(t0, t0, t3);          /* t0 = G = B+(--E)           */
+    fe448_reduce(t0);
+    fe448_add(r->X, p->X, p->Y);    /* r->X = H1 = X1+Y1          */
+    fe448_reduce(r->X);
+    fe448_sub(r->Y, q->Y, q->X);    /* r->Y = H2 = Y2+(-X2)       */
+    fe448_reduce(r->Y);
+    fe448_mul(r->X, r->X, r->Y);    /* r->X = H = (X1+Y1)*(X2+Y2) */
+    fe448_add(r->X, r->X, t1);      /* r->X = X31 = H-(-C)        */
+    fe448_sub(r->X, r->X, t2);      /* r->X = X32 = H-(-C)-D      */
+    fe448_reduce(r->X);
+    fe448_mul(r->X, r->X, t4);      /* r->X = X33 = F*(H-C-D)     */
+    fe448_mul(r->X, r->X, r->Z);    /* r->X = X3 = A*F*(H-C-D)    */
+    fe448_add(r->Y, t2, t1);        /* r->Y = Y31 = D-C           */
+    fe448_reduce(r->Y);
+    fe448_mul(r->Y, r->Y, t0);      /* r->Y = Y32 = G*(D-C)       */
+    fe448_mul(r->Y, r->Y, r->Z);    /* r->Y = Y3 = A*F*(D-C)      */
+    fe448_mul(r->Z, t4, t0);        /* r->Z = Z3 = F*G            */
+}
+
+/* Convert point to byte array assuming projective ordinates.
+ *
+ * b  [in]  Array of bytes to hold compressed point.
+ * p  [in]  Point to convert.
+ */
+void ge448_to_bytes(uint8_t *b, const ge448_p2 *p)
+{
+    ge448 recip[GE448_WORDS];
+    ge448 x[GE448_WORDS];
+    ge448 y[GE448_WORDS];
+
+    fe448_invert(recip, p->Z);
+    fe448_mul(x, p->X, recip);
+    fe448_mul(y, p->Y, recip);
+    fe448_to_bytes(b, y);
+    b[56] = fe448_isnegative(x) << 7;
+}
+
+/* Convert point to byte array assuming z is 1.
+ *
+ * b  [in]  Array of bytes to hold compressed point.
+ * p  [in]  Point to convert.
+ */
+static void ge448_p2z1_to_bytes(uint8_t *b, const ge448_p2 *p)
+{
+    fe448_to_bytes(b, p->Y);
+    b[56] = fe448_isnegative(p->X) << 7;
+}
+
+/* Compress the point to y-ordinate and negative bit.
+ *
+ * out    [in]  Array of bytes to hold compressed key.
+ * xIn    [in]  The x-ordinate.
+ * yIn    [in]  The y-ordinate.
+ */
+int ge448_compress_key(uint8_t* out, const uint8_t* xIn, const uint8_t* yIn)
+{
+    ge448_p2  g;
+    uint8_t   bArray[ED448_KEY_SIZE];
+    uint32_t  i;
+
+    fe448_from_bytes(g.X, xIn);
+    fe448_from_bytes(g.Y, yIn);
+    fe448_1(g.Z);
+
+    ge448_p2z1_to_bytes(bArray, &g);
+
+    for (i = 0; i < 57; i++) {
+        out[57 - 1 - i] = bArray[i];
+    }
+
+    return 0;
+}
+
+/* Determine whether the value is negative.
+ *
+ * b  [in]  An 8-bit signed value.
+ * returns 1 when negative and 0 otherwise.
+ */
+static uint8_t negative(int8_t b)
+{
+    return ((uint8_t)b) >> 7;
+}
+
+/* Determine whether two values are equal. a == b
+ * Constant time implementation.
+ *
+ * a  [in]  An 8-bit unsigned value.
+ * b  [in]  An 8-bit unsigned value.
+ * returns 1 when equal and 0 otherwise.
+ */
+static uint8_t equal(uint8_t a, uint8_t b)
+{
+    return (uint8_t)(((uint32_t)(a ^ b) - 1) >> 31);
+}
+
+/* Conditional move the point into result point if two values are equal.
+ * Constant time implementation.
+ *
+ * f  [in]  Point to conditionally overwrite.
+ * p  [in]  Point to conditionally copy.
+ * b  [in]  An 8-bit unsigned value.
+ * n  [in]  An 8-bit unsigned value.
+ */
+static WC_INLINE void cmov(ge448_precomp* r, const ge448_precomp* p, uint8_t b,
+                           uint8_t n)
+{
+    b = equal(b, n);
+    fe448_cmov(r->x, p->x, b);
+    fe448_cmov(r->y, p->y, b);
+}
+
+/* Select one of the entries from the precomputed table and negate if required.
+ * Constant time implementation.
+ *
+ * r    [in]  Point to hold chosen point.
+ * pos  [in]  Position of array of entries to choose from.
+ * b    [in]  Index of point to select. -ve value means negate the point.
+ */
+static void ge448_select(ge448_precomp* r, int pos, int8_t b)
+{
+    ge448 minusx[16];
+    uint8_t bnegative = negative(b);
+    uint8_t babs = b - (((-bnegative) & b) << 1);
+
+    ge448_precomp_0(r);
+    cmov(r, &base[pos][0], babs, 1);
+    cmov(r, &base[pos][1], babs, 2);
+    cmov(r, &base[pos][2], babs, 3);
+    cmov(r, &base[pos][3], babs, 4);
+    cmov(r, &base[pos][4], babs, 5);
+    cmov(r, &base[pos][5], babs, 6);
+    cmov(r, &base[pos][6], babs, 7);
+    cmov(r, &base[pos][7], babs, 8);
+    fe448_neg(minusx, r->x);
+    fe448_cmov(r->x, minusx, bnegative);
+}
+
+/* Perform a scalar multiplication of the base point. r = a * base
+ *
+ * r  [in]  Point to hold result.
+ * a  [in]  Scalar to multiply by.
+ */
+void ge448_scalarmult_base(ge448_p2* r, const uint8_t* a)
+{
+    int8_t        carry;
+    ge448_precomp t;
+    int           i;
+    int8_t        e[113];
+
+    carry = 0;
+    for (i = 0; i < 56; ++i) {
+        e[2 * i + 0] = ((a[i] >> 0) & 0xf) + carry;
+        carry = e[2 * i + 0] + 8;
+        carry >>= 4;
+        e[2 * i + 0] -= carry << 4;
+
+        e[2 * i + 1] = ((a[i] >> 4) & 0xf) + carry;
+        carry = e[2 * i + 1] + 8;
+        carry >>= 4;
+        e[2 * i + 1] -= carry << 4;
+    }
+    e[112] = carry;
+    /* each e[i] is between -8 and 8 */
+
+    /* Odd indeces first - sum based on even index so multiply by 16 */
+    ge448_select(&t, 0, e[1]);
+    fe448_copy(r->X, t.x);
+    fe448_copy(r->Y, t.y);
+    fe448_1(r->Z);
+    for (i = 3; i < 112; i += 2) {
+        ge448_select(&t, i / 2, e[i]);
+        ge448_madd(r, r, &t);
+    }
+
+    ge448_dbl(r, r);
+    ge448_dbl(r, r);
+    ge448_dbl(r, r);
+    ge448_dbl(r, r);
+
+    /* Add even indeces */
+    for (i = 0; i <= 112; i += 2) {
+        ge448_select(&t, i / 2, e[i]);
+        ge448_madd(r, r, &t);
+    }
+}
+
+/* Create to a sliding window for the scalar multiplicaton.
+ *
+ * r  [in]  Array of indeces.
+ * a  [in]  Scalar to break up.
+ */
+static void slide(int8_t *r, const uint8_t *a)
+{
+    int i;
+    int b;
+    int k;
+
+    for (i = 0; i < 448; ++i) {
+        r[i] = (a[i >> 3] >> (i & 7)) & 1;
+    }
+
+    for (i = 0; i < 448; ++i) {
+        if (r[i] == 0) {
+            continue;
+        }
+
+        for (b = 1; b <= 7 && i + b < 448; ++b) {
+            if (r[i + b] == 0) {
+                continue;
+            }
+
+            if (r[i] + (r[i + b] << b) <= 31) {
+                r[i] += r[i + b] << b; r[i + b] = 0;
+            }
+            else if (r[i] - (r[i + b] << b) >= -31) {
+                r[i] -= r[i + b] << b;
+                for (k = i + b; k < 448; ++k) {
+                    if (!r[k]) {
+                        r[k] = 1;
+                        break;
+                    }
+                    r[k] = 0;
+                }
+            }
+            else {
+                break;
+            }
+        }
+    }
+}
+
+/* Perform a scalar multplication of the base point and public point.
+ *   r = a * p + b * base
+ * Uses a sliding window of 5 bits.
+ * Not constant time.
+ *
+ * r  [in]  Point to hold result.
+ * a  [in]  Scalar to multiply by.
+ */
+int ge448_double_scalarmult_vartime(ge448_p2 *r, const uint8_t *a,
+                                    const ge448_p2 *p, const uint8_t *b)
+{
+    int8_t       aslide[448];
+    int8_t       bslide[448];
+    ge448_p2     pi[16]; /* p,3p,..,31p */
+    ge448_p2     p2;
+    int          i;
+
+    slide(aslide, a);
+    slide(bslide, b);
+
+    fe448_copy(pi[0].X, p->X);
+    fe448_copy(pi[0].Y, p->Y);
+    fe448_copy(pi[0].Z, p->Z);
+    ge448_dbl(&p2, p);
+    ge448_add(&pi[1], &p2, &pi[0]);
+    ge448_add(&pi[2], &p2, &pi[1]);
+    ge448_add(&pi[3], &p2, &pi[2]);
+    ge448_add(&pi[4], &p2, &pi[3]);
+    ge448_add(&pi[5], &p2, &pi[4]);
+    ge448_add(&pi[6], &p2, &pi[5]);
+    ge448_add(&pi[7], &p2, &pi[6]);
+    ge448_add(&pi[8], &p2, &pi[7]);
+    ge448_add(&pi[9], &p2, &pi[8]);
+    ge448_add(&pi[10], &p2, &pi[9]);
+    ge448_add(&pi[11], &p2, &pi[10]);
+    ge448_add(&pi[12], &p2, &pi[11]);
+    ge448_add(&pi[13], &p2, &pi[12]);
+    ge448_add(&pi[14], &p2, &pi[13]);
+    ge448_add(&pi[15], &p2, &pi[14]);
+
+    ge448_0(r);
+
+    /* Find first index that is not 0. */
+    for (i = 447; i >= 0; --i) {
+        if (aslide[i] || bslide[i]) {
+            break;
+        }
+    }
+
+    for (; i >= 0; --i) {
+        ge448_dbl(r, r);
+
+        if (aslide[i] > 0)
+            ge448_add(r, r, &pi[aslide[i]/2]);
+        else if (aslide[i] < 0)
+            ge448_sub(r, r ,&pi[(-aslide[i])/2]);
+
+        if (bslide[i] > 0)
+            ge448_madd(r, r, &base_i[bslide[i]/2]);
+        else if (bslide[i] < 0)
+            ge448_msub(r, r, &base_i[(-bslide[i])/2]);
+    }
+
+    return 0;
+}
+
+/* Convert compressed point to negative of affine point.
+ * Calculates x from the y and the negative bit.
+ * Not constant time.
+ *
+ * r  [in]  Uncompressed point.
+ * b  [in]  Array of bytes representing point.
+ * returns 0 on success and -1 on failure.
+ */
+int ge448_from_bytes_negate_vartime(ge448_p2 *r, const uint8_t *b)
+{
+    int   ret = 0;
+    ge448 u[GE448_WORDS];
+    ge448 v[GE448_WORDS];
+    ge448 u3[GE448_WORDS];
+    ge448 vxx[GE448_WORDS];
+    ge448 check[GE448_WORDS];
+
+    fe448_from_bytes(r->Y, b);
+    fe448_1(r->Z);
+    fe448_sqr(u, r->Y);                /* u = y^2                      */
+    fe448_mul39081(v, u);              /* v = 39081.y^2                */
+    fe448_sub(u, u, r->Z);             /* u = y^2-1                    */
+    fe448_reduce(u);
+    fe448_add(v, v, r->Z);             /* v = 39081.y^2-1              */
+    fe448_reduce(v);
+    fe448_neg(v, v);                   /* v = -39081.y^2-1 = d.y^2-1   */
+
+    fe448_sqr(r->X, v);                /* x = v^2                      */
+    fe448_mul(r->X, r->X, v);          /* x = v^3                      */
+    fe448_sqr(u3, u);                  /* x = u^2.v^3                  */
+    fe448_mul(r->X, r->X, u3);         /* x = u^2.v^3                  */
+    fe448_mul(u3, u3, u);              /* u3 = u^3                     */
+    fe448_mul(r->X, r->X, u3);         /* x = u^5.v^3                  */
+
+    fe448_pow_2_446_222_1(r->X, r->X); /* x = (u^5.v^3)^((q-3)/4)      */
+    fe448_mul(r->X, r->X, u3);         /* x = u^3(u^5.v^3)^((q-3)/4)   */
+    fe448_mul(r->X, r->X, v);          /* x = u^3.v(u^5.v^3)^((q-3)/4) */
+
+    fe448_sqr(vxx, r->X);
+    fe448_mul(vxx, vxx, v);
+    fe448_sub(check, vxx, u);          /* check = v.x^2-u              */
+    fe448_reduce(check);
+    /* Note; vx^2+u is NOT correct. */
+    if (fe448_isnonzero(check)) {
+        ret = -1;
+    }
+
+    /* Calculating negative of point in bytes - negate only if X is correct. */
+    if (fe448_isnegative(r->X) == (b[56] >> 7)) {
+        fe448_neg(r->X, r->X);
+    }
+
+    return ret;
+}
+
+#endif /* ED448_SMALL */
+#endif /* HAVE_CURVE448 || HAVE_ED448 */
+
--- a/wolfcrypt/src/ge_low_mem.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/ge_low_mem.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ge_low_mem.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -93,7 +93,7 @@
 
 static word32 lt(word32 a,word32 b) /* 16-bit inputs */
 {
-  unsigned int x = a;
+  word32 x = a;
   x -= (unsigned int) b; /* 0..65535: no; 4294901761..4294967295: yes */
   x >>= 31; /* 0: no; 1: yes */
   return x;
--- a/wolfcrypt/src/ge_operations.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/ge_operations.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ge_operations.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,8 +42,22 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
+#if defined(CURVED25519_X64)
+    #define CURVED25519_ASM_64BIT
+    #define CURVED25519_ASM
+#endif
+#if defined(WOLFSSL_ARMASM)
+    #if defined(__aarch64__)
+        #define CURVED25519_ASM_64BIT
+    #else
+        #define CURVED25519_ASM_32BIT
+    #endif
+    #define CURVED25519_ASM
+#endif
+
+
 static void ge_p2_0(ge_p2 *);
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
 static void ge_precomp_0(ge_precomp *);
 #endif
 static void ge_p3_to_p2(ge_p2 *,const ge_p3 *);
@@ -81,6 +95,28 @@
 #define ORDER_4     0x1dea2f
 #define ORDER_5     0xa6f7c
 
+#ifdef CURVED25519_ASM_32BIT
+uint64_t load_3(const unsigned char *in)
+{
+  uint64_t result;
+  result = (uint64_t) in[0];
+  result |= ((uint64_t) in[1]) << 8;
+  result |= ((uint64_t) in[2]) << 16;
+  return result;
+}
+
+
+uint64_t load_4(const unsigned char *in)
+{
+  uint64_t result;
+  result = (uint64_t) in[0];
+  result |= ((uint64_t) in[1]) << 8;
+  result |= ((uint64_t) in[2]) << 16;
+  result |= ((uint64_t) in[3]) << 24;
+  return result;
+}
+#endif
+
 /*
 Input:
   s[0]+256*s[1]+...+256^63*s[63] = s
@@ -927,7 +963,7 @@
 */
 static WC_INLINE void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
     ge t0;
     fe_add(r->X,p->Y,p->X);
     fe_sub(r->Y,p->Y,p->X);
@@ -947,7 +983,7 @@
 }
 
 
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
 /* ge_scalar mult base */
 static unsigned char equal(signed char b,signed char c)
 {
@@ -977,1051 +1013,1051 @@
 }
 #endif
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM_64BIT
 static const ge_precomp base[64][8] = {
 {
     {
-        { 0x2fbc93c6f58c3b85, 0xcf932dc6fb8c0e19, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65 },
-        { 0x9d103905d740913e, 0xfd399f05d140beb3, 0xa5c18434688f8a09, 0x44fd2f9298f81267 },
-        { 0xdbbd15674b6fbb59, 0x41e13f00eea2a5ea, 0xcdd49d1cc957c6fa, 0x4f0ebe1faf16ecca }
-    },
-    {
-        { 0x9224e7fc933c71d7, 0x9f469d967a0ff5b5, 0x5aa69a65e1d60702, 0x590c063fa87d2e2e },
-        { 0x8a99a56042b4d5a8, 0x8f2b810c4e60acf6, 0xe09e236bb16e37aa, 0x6bb595a669c92555 },
-        { 0x6e347eaadad36802, 0xbaf3599383ee4805, 0x3bcabe10e6076826, 0x49314f0a165ed1b8 }
-    },
-    {
-        { 0xaf25b0a84cee9730, 0x025a8430e8864b8a, 0xc11b50029f016732, 0x7a164e1b9a80f8f4 },
-        { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, 0x8131f31a214bd6bd, 0x2ab91587555bda62 },
-        { 0x9bf211f4f1674834, 0xb84e6b17f62df895, 0xd7de6f075b722a4e, 0x549a04b963bb2a21 }
-    },
-    {
-        { 0x287351b98efc099f, 0x6765c6f47dfd2538, 0xca348d3dfb0a9265, 0x680e910321e58727 },
-        { 0x95fe050a056818bf, 0x327e89715660faa9, 0xc3e8e3cd06a05073, 0x27933f4c7445a49a },
-        { 0xbf1e45ece51426b0, 0xe32bc63d6dba0f94, 0xe42974d58cf852c0, 0x44f079b1b0e64c18 }
-    },
-    {
-        { 0xa212bc4408a5bb33, 0x8d5048c3c75eed02, 0xdd1beb0c5abfec44, 0x2945ccf146e206eb },
-        { 0x7f9182c3a447d6ba, 0xd50014d14b2729b7, 0xe33cf11cb864a087, 0x154a7e73eb1b55f3 },
-        { 0xc832a179e7d003b3, 0x5f729d0a00124d7e, 0x62c1d4a10e6d8ff3, 0x68b8ac5938b27a98 }
-    },
-    {
-        { 0x3a0ceeeb77157131, 0x9b27158900c8af88, 0x8065b668da59a736, 0x51e57bb6a2cc38bd },
-        { 0x499806b67b7d8ca4, 0x575be28427d22739, 0xbb085ce7204553b9, 0x38b64c41ae417884 },
-        { 0x8f9dad91689de3a4, 0x175f2428f8fb9137, 0x050ab5329fcfb988, 0x7865dfa21354c09f }
+        { 0x2fbc93c6f58c3b85, -0x306cd2390473f1e7, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65 },
+        { -0x62efc6fa28bf6ec2, -0x02c660fa2ebf414d, -0x5a3e7bcb977075f7, 0x44fd2f9298f81267 },
+        { -0x2442ea98b49044a7, 0x41e13f00eea2a5ea, -0x322b62e336a83906, 0x4f0ebe1faf16ecca }
+    },
+    {
+        { -0x6ddb18036cc38e29, -0x60b9626985f00a4b, 0x5aa69a65e1d60702, 0x590c063fa87d2e2e },
+        { -0x75665a9fbd4b2a58, -0x70d47ef3b19f530a, -0x1f61dc944e91c856, 0x6bb595a669c92555 },
+        { 0x6e347eaadad36802, -0x450ca66c7c11b7fb, 0x3bcabe10e6076826, 0x49314f0a165ed1b8 }
+    },
+    {
+        { -0x50da4f57b31168d0, 0x025a8430e8864b8a, -0x3ee4affd60fe98ce, 0x7a164e1b9a80f8f4 },
+        { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, -0x7ece0ce5deb42943, 0x2ab91587555bda62 },
+        { -0x640dee0b0e98b7cc, -0x47b194e809d2076b, -0x282190f8a48dd5b2, 0x549a04b963bb2a21 }
+    },
+    {
+        { 0x287351b98efc099f, 0x6765c6f47dfd2538, -0x35cb72c204f56d9b, 0x680e910321e58727 },
+        { -0x6a01faf5fa97e741, 0x327e89715660faa9, -0x3c171c32f95faf8d, 0x27933f4c7445a49a },
+        { -0x40e1ba131aebd950, -0x1cd439c29245f06c, -0x1bd68b2a7307ad40, 0x44f079b1b0e64c18 }
+    },
+    {
+        { -0x5ded43bbf75a44cd, -0x72afb73c38a112fe, -0x22e414f3a54013bc, 0x2945ccf146e206eb },
+        { 0x7f9182c3a447d6ba, -0x2affeb2eb4d8d649, -0x1cc30ee3479b5f79, 0x154a7e73eb1b55f3 },
+        { -0x37cd5e86182ffc4d, 0x5f729d0a00124d7e, 0x62c1d4a10e6d8ff3, 0x68b8ac5938b27a98 }
+    },
+    {
+        { 0x3a0ceeeb77157131, -0x64d8ea76ff375078, -0x7f9a499725a658ca, 0x51e57bb6a2cc38bd },
+        { 0x499806b67b7d8ca4, 0x575be28427d22739, -0x44f7a318dfbaac47, 0x38b64c41ae417884 },
+        { -0x7062526e97621c5c, 0x175f2428f8fb9137, 0x050ab5329fcfb988, 0x7865dfa21354c09f }
     },
     {
         { 0x6b1a5cd0944ea3bf, 0x7470353ab39dc0d2, 0x71b2528228542e49, 0x461bea69283c927e },
-        { 0xba6f2c9aaa3221b1, 0x6ca021533bba23a7, 0x9dea764f92192c3a, 0x1d6edd5d2e5317e0 },
-        { 0x217a8aacab0fda36, 0xa528c6543d3549c8, 0x37d05b8b13ab7568, 0x233cef623a2cbc37 }
-    },
-    {
-        { 0x59b7596604dd3e8f, 0x6cb30377e288702c, 0xb1339c665ed9c323, 0x0915e76061bce52f },
-        { 0xe2a75dedf39234d9, 0x963d7680e1b558f9, 0x2c2741ac6e3c23fb, 0x3a9024a1320e01c3 },
-        { 0xdf7de835a834a37e, 0x8be19cda689857ea, 0x2c1185367167b326, 0x589eb3d9dbefd5c2 }
-    },
-},
-{
-    {
-        { 0x322d04a52d9021f6, 0xb9c19f3375c6bf9c, 0x587a3a4342d20b09, 0x143b1cf8aa64fe61 },
-        { 0x7ec851ca553e2df3, 0xa71284cba64878b3, 0xe6b5e4193288d1e7, 0x4cf210ec5a9a8883 },
-        { 0x9f867c7d968acaab, 0x5f54258e27092729, 0xd0a7d34bea180975, 0x21b546a3374126e1 }
-    },
-    {
-        { 0x490a7a45d185218f, 0x9a15377846049335, 0x0060ea09cc31e1f6, 0x7e041577f86ee965 },
-        { 0xa94ff858a2888343, 0xce0ed4565313ed3c, 0xf55c3dcfb5bf34fa, 0x0a653ca5c9eab371 },
-        { 0x66b2a496ce5b67f3, 0xff5492d8bd569796, 0x503cec294a592cd0, 0x566943650813acb2 }
-    },
-    {
-        { 0x5672f9eb1dabb69d, 0xba70b535afe853fc, 0x47ac0f752796d66d, 0x32a5351794117275 },
-        { 0xb818db0c26620798, 0x5d5c31d9606e354a, 0x0982fa4f00a8cdc7, 0x17e12bcd4653e2d4 },
-        { 0xd3a644a6df648437, 0x703b6559880fbfdd, 0xcb852540ad3a1aa5, 0x0900b3f78e4c6468 }
-    },
-    {
-        { 0xed280fbec816ad31, 0x52d9595bd8e6efe3, 0x0fe71772f6c623f5, 0x4314030b051e293c },
-        { 0x0a851b9f679d651b, 0xe108cb61033342f2, 0xd601f57fe88b30a3, 0x371f3acaed2dd714 },
-        { 0xd560005efbf0bcad, 0x8eb70f2ed1870c5e, 0x201f9033d084e6a0, 0x4c3a5ae1ce7b6670 }
-    },
-    {
-        { 0xbaf875e4c93da0dd, 0xb93282a771b9294d, 0x80d63fb7f4c6c460, 0x6de9c73dea66c181 },
-        { 0x4138a434dcb8fa95, 0x870cf67d6c96840b, 0xde388574297be82c, 0x7c814db27262a55a },
-        { 0x478904d5a04df8f2, 0xfafbae4ab10142d3, 0xf6c8ac63555d0998, 0x5aac4a412f90b104 }
-    },
-    {
-        { 0x603a0d0abd7f5134, 0x8089c932e1d3ae46, 0xdf2591398798bd63, 0x1c145cd274ba0235 },
-        { 0xc64f326b3ac92908, 0x5551b282e663e1e0, 0x476b35f54a1a4b83, 0x1b9da3fe189f68c2 },
-        { 0x32e8386475f3d743, 0x365b8baf6ae5d9ef, 0x825238b6385b681e, 0x234929c1167d65e1 }
-    },
-    {
-        { 0x48145cc21d099fcf, 0x4535c192cc28d7e5, 0x80e7c1e548247e01, 0x4a5f28743b2973ee },
-        { 0x984decaba077ade8, 0x383f77ad19eb389d, 0xc7ec6b7e2954d794, 0x59c77b3aeb7c3a7a },
-        { 0xd3add725225ccf62, 0x911a3381b2152c5d, 0xd8b39fad5b08f87d, 0x6f05606b4799fe3b }
-    },
-    {
-        { 0x5b433149f91b6483, 0xadb5dc655a2cbf62, 0x87fa8412632827b3, 0x60895e91ab49f8d8 },
-        { 0x9ffe9e92177ba962, 0x98aee71d0de5cae1, 0x3ff4ae942d831044, 0x714de12e58533ac8 },
-        { 0xe9ecf2ed0cf86c18, 0xb46d06120735dfd4, 0xbc9da09804b96be7, 0x73e2e62fd96dc26b }
+        { -0x4590d36555cdde4f, 0x6ca021533bba23a7, -0x621589b06de6d3c6, 0x1d6edd5d2e5317e0 },
+        { 0x217a8aacab0fda36, -0x5ad739abc2cab638, 0x37d05b8b13ab7568, 0x233cef623a2cbc37 }
+    },
+    {
+        { 0x59b7596604dd3e8f, 0x6cb30377e288702c, -0x4ecc6399a1263cdd, 0x0915e76061bce52f },
+        { -0x1d58a2120c6dcb27, -0x69c2897f1e4aa707, 0x2c2741ac6e3c23fb, 0x3a9024a1320e01c3 },
+        { -0x208217ca57cb5c82, -0x741e63259767a816, 0x2c1185367167b326, 0x589eb3d9dbefd5c2 }
+    },
+},
+{
+    {
+        { 0x322d04a52d9021f6, -0x463e60cc8a394064, 0x587a3a4342d20b09, 0x143b1cf8aa64fe61 },
+        { 0x7ec851ca553e2df3, -0x58ed7b3459b7874d, -0x194a1be6cd772e19, 0x4cf210ec5a9a8883 },
+        { -0x6079838269753555, 0x5f54258e27092729, -0x2f582cb415e7f68b, 0x21b546a3374126e1 }
+    },
+    {
+        { 0x490a7a45d185218f, -0x65eac887b9fb6ccb, 0x0060ea09cc31e1f6, 0x7e041577f86ee965 },
+        { -0x56b007a75d777cbd, -0x31f12ba9acec12c4, -0x0aa3c2304a40cb06, 0x0a653ca5c9eab371 },
+        { 0x66b2a496ce5b67f3, -0x00ab6d2742a9686a, 0x503cec294a592cd0, 0x566943650813acb2 }
+    },
+    {
+        { 0x5672f9eb1dabb69d, -0x458f4aca5017ac04, 0x47ac0f752796d66d, 0x32a5351794117275 },
+        { -0x47e724f3d99df868, 0x5d5c31d9606e354a, 0x0982fa4f00a8cdc7, 0x17e12bcd4653e2d4 },
+        { -0x2c59bb59209b7bc9, 0x703b6559880fbfdd, -0x347adabf52c5e55b, 0x0900b3f78e4c6468 }
+    },
+    {
+        { -0x12d7f04137e952cf, 0x52d9595bd8e6efe3, 0x0fe71772f6c623f5, 0x4314030b051e293c },
+        { 0x0a851b9f679d651b, -0x1ef7349efcccbd0e, -0x29fe0a801774cf5d, 0x371f3acaed2dd714 },
+        { -0x2a9fffa1040f4353, -0x7148f0d12e78f3a2, 0x201f9033d084e6a0, 0x4c3a5ae1ce7b6670 }
+    },
+    {
+        { -0x45078a1b36c25f23, -0x46cd7d588e46d6b3, -0x7f29c0480b393ba0, 0x6de9c73dea66c181 },
+        { 0x4138a434dcb8fa95, -0x78f3098293697bf5, -0x21c77a8bd68417d4, 0x7c814db27262a55a },
+        { 0x478904d5a04df8f2, -0x050451b54efebd2d, -0x0937539caaa2f668, 0x5aac4a412f90b104 }
+    },
+    {
+        { 0x603a0d0abd7f5134, -0x7f7636cd1e2c51ba, -0x20da6ec67867429d, 0x1c145cd274ba0235 },
+        { -0x39b0cd94c536d6f8, 0x5551b282e663e1e0, 0x476b35f54a1a4b83, 0x1b9da3fe189f68c2 },
+        { 0x32e8386475f3d743, 0x365b8baf6ae5d9ef, -0x7dadc749c7a497e2, 0x234929c1167d65e1 }
+    },
+    {
+        { 0x48145cc21d099fcf, 0x4535c192cc28d7e5, -0x7f183e1ab7db81ff, 0x4a5f28743b2973ee },
+        { -0x67b213545f885218, 0x383f77ad19eb389d, -0x38139481d6ab286c, 0x59c77b3aeb7c3a7a },
+        { -0x2c5228dadda3309e, -0x6ee5cc7e4dead3a3, -0x274c6052a4f70783, 0x6f05606b4799fe3b }
+    },
+    {
+        { 0x5b433149f91b6483, -0x524a239aa5d3409e, -0x78057bed9cd7d84d, 0x60895e91ab49f8d8 },
+        { -0x6001616de884569e, -0x675118e2f21a351f, 0x3ff4ae942d831044, 0x714de12e58533ac8 },
+        { -0x16130d12f30793e8, -0x4b92f9edf8ca202c, -0x43625f67fb469419, 0x73e2e62fd96dc26b }
     },
 },
 {
     {
         { 0x2eccdd0e632f9c1d, 0x51d0b69676893115, 0x52dfb76ba8637a58, 0x6dd37d49a00eef39 },
-        { 0xed5b635449aa515e, 0xa865c49f0bc6823a, 0x850c1fe95b42d1c4, 0x30d76d6f03d315b9 },
-        { 0x6c4444172106e4c7, 0xfb53d680928d7f69, 0xb4739ea4694d3f26, 0x10c697112e864bb0 }
+        { -0x12a49cabb655aea2, -0x579a3b60f4397dc6, -0x7af3e016a4bd2e3c, 0x30d76d6f03d315b9 },
+        { 0x6c4444172106e4c7, -0x04ac297f6d728097, -0x4b8c615b96b2c0da, 0x10c697112e864bb0 }
     },
     {
         { 0x0ca62aa08358c805, 0x6a3d4ae37a204247, 0x7464d3a63b11eddc, 0x03bf9baf550806ef },
         { 0x6493c4277dbe5fde, 0x265d4fad19ad7ea2, 0x0e00dfc846304590, 0x25e61cabed66fe09 },
-        { 0x3f13e128cc586604, 0x6f5873ecb459747e, 0xa0b63dedcc1268f5, 0x566d78634586e22c }
-    },
-    {
-        { 0xa1054285c65a2fd0, 0x6c64112af31667c3, 0x680ae240731aee58, 0x14fba5f34793b22a },
-        { 0x1637a49f9cc10834, 0xbc8e56d5a89bc451, 0x1cb5ec0f7f7fd2db, 0x33975bca5ecc35d9 },
+        { 0x3f13e128cc586604, 0x6f5873ecb459747e, -0x5f49c21233ed970b, 0x566d78634586e22c }
+    },
+    {
+        { -0x5efabd7a39a5d030, 0x6c64112af31667c3, 0x680ae240731aee58, 0x14fba5f34793b22a },
+        { 0x1637a49f9cc10834, -0x4371a92a57643baf, 0x1cb5ec0f7f7fd2db, 0x33975bca5ecc35d9 },
         { 0x3cd746166985f7d4, 0x593e5e84c9c80057, 0x2fc3f2b67b61131e, 0x14829cea83fc526c }
     },
     {
-        { 0x21e70b2f4e71ecb8, 0xe656ddb940a477e3, 0xbf6556cece1d4f80, 0x05fc3bc4535d7b7e },
-        { 0xff437b8497dd95c2, 0x6c744e30aa4eb5a7, 0x9e0c5d613c85e88b, 0x2fd9c71e5f758173 },
+        { 0x21e70b2f4e71ecb8, -0x19a92246bf5b881d, -0x409aa93131e2b080, 0x05fc3bc4535d7b7e },
+        { -0x00bc847b68226a3e, 0x6c744e30aa4eb5a7, -0x61f3a29ec37a1775, 0x2fd9c71e5f758173 },
         { 0x24b8b3ae52afdedd, 0x3495638ced3b30cf, 0x33a4bc83a9be8195, 0x373767475c651f04 }
     },
     {
-        { 0x634095cb14246590, 0xef12144016c15535, 0x9e38140c8910bc60, 0x6bf5905730907c8c },
-        { 0x2fba99fd40d1add9, 0xb307166f96f4d027, 0x4363f05215f03bae, 0x1fbea56c3b18f999 },
+        { 0x634095cb14246590, -0x10edebbfe93eaacb, -0x61c7ebf376ef43a0, 0x6bf5905730907c8c },
+        { 0x2fba99fd40d1add9, -0x4cf8e990690b2fd9, 0x4363f05215f03bae, 0x1fbea56c3b18f999 },
         { 0x0fa778f1e1415b8a, 0x06409ff7bac3a77e, 0x6f52d7b89aa29a50, 0x02521cf67a635a56 }
     },
     {
-        { 0xb1146720772f5ee4, 0xe8f894b196079ace, 0x4af8224d00ac824a, 0x001753d9f7cd6cc4 },
-        { 0x513fee0b0a9d5294, 0x8f98e75c0fdf5a66, 0xd4618688bfe107ce, 0x3fa00a7e71382ced },
-        { 0x3c69232d963ddb34, 0x1dde87dab4973858, 0xaad7d1f9a091f285, 0x12b5fe2fa048edb6 }
-    },
-    {
-        { 0xdf2b7c26ad6f1e92, 0x4b66d323504b8913, 0x8c409dc0751c8bc3, 0x6f7e93c20796c7b8 },
-        { 0x71f0fbc496fce34d, 0x73b9826badf35bed, 0xd2047261ff28c561, 0x749b76f96fb1206f },
-        { 0x1f5af604aea6ae05, 0xc12351f1bee49c99, 0x61a808b5eeff6b66, 0x0fcec10f01e02151 }
+        { -0x4eeb98df88d0a11c, -0x17076b4e69f86532, 0x4af8224d00ac824a, 0x001753d9f7cd6cc4 },
+        { 0x513fee0b0a9d5294, -0x706718a3f020a59a, -0x2b9e7977401ef832, 0x3fa00a7e71382ced },
+        { 0x3c69232d963ddb34, 0x1dde87dab4973858, -0x55282e065f6e0d7b, 0x12b5fe2fa048edb6 }
+    },
+    {
+        { -0x20d483d95290e16e, 0x4b66d323504b8913, -0x73bf623f8ae3743d, 0x6f7e93c20796c7b8 },
+        { 0x71f0fbc496fce34d, 0x73b9826badf35bed, -0x2dfb8d9e00d73a9f, 0x749b76f96fb1206f },
+        { 0x1f5af604aea6ae05, -0x3edcae0e411b6367, 0x61a808b5eeff6b66, 0x0fcec10f01e02151 }
     },
     {
         { 0x3df2d29dc4244e45, 0x2b020e7493d8de0a, 0x6cc8067e820c214d, 0x413779166feab90a },
         { 0x644d58a649fe1e44, 0x21fcaea231ad777e, 0x02441c5a887fd0d2, 0x4901aa7183c511f3 },
-        { 0x08b1b7548c1af8f0, 0xce0f7a7c246299b4, 0xf760b0f91e06d939, 0x41bb887b726d1213 }
-    },
-},
-{
-    {
-        { 0x97d980e0aa39f7d2, 0x35d0384252c6b51c, 0x7d43f49307cd55aa, 0x56bd36cfb78ac362 },
-        { 0x9267806c567c49d8, 0x066d04ccca791e6a, 0xa69f5645e3cc394b, 0x5c95b686a0788cd2 },
-        { 0x2ac519c10d14a954, 0xeaf474b494b5fa90, 0xe6af8382a9f87a5a, 0x0dea6db1879be094 }
+        { 0x08b1b7548c1af8f0, -0x31f08583db9d664c, -0x089f4f06e1f926c7, 0x41bb887b726d1213 }
+    },
+},
+{
+    {
+        { -0x68267f1f55c6082e, 0x35d0384252c6b51c, 0x7d43f49307cd55aa, 0x56bd36cfb78ac362 },
+        { -0x6d987f93a983b628, 0x066d04ccca791e6a, -0x5960a9ba1c33c6b5, 0x5c95b686a0788cd2 },
+        { 0x2ac519c10d14a954, -0x150b8b4b6b4a0570, -0x19507c7d560785a6, 0x0dea6db1879be094 }
     },
     {
         { 0x15baeb74d6a8797a, 0x7ef55cf1fac41732, 0x29001f5a3c8b05c5, 0x0ad7cc8752eaccfb },
-        { 0xaa66bf547344e5ab, 0xda1258888f1b4309, 0x5e87d2b3fd564b2f, 0x5b2c78885483b1dd },
-        { 0x52151362793408cf, 0xeb0f170319963d94, 0xa833b2fa883d9466, 0x093a7fa775003c78 }
-    },
-    {
-        { 0xb8e9604460a91286, 0x7f3fd8047778d3de, 0x67d01e31bf8a5e2d, 0x7b038a06c27b653e },
-        { 0xe5107de63a16d7be, 0xa377ffdc9af332cf, 0x70d5bf18440b677f, 0x6a252b19a4a31403 },
-        { 0x9ed919d5d36990f3, 0x5213aebbdb4eb9f2, 0xc708ea054cb99135, 0x58ded57f72260e56 }
-    },
-    {
-        { 0xda6d53265b0fd48b, 0x8960823193bfa988, 0xd78ac93261d57e28, 0x79f2942d3a5c8143 },
-        { 0x78e79dade9413d77, 0xf257f9d59729e67d, 0x59db910ee37aa7e6, 0x6aa11b5bbb9e039c },
-        { 0x97da2f25b6c88de9, 0x251ba7eaacf20169, 0x09b44f87ef4eb4e4, 0x7d90ab1bbc6a7da5 }
+        { -0x559940ab8cbb1a55, -0x25eda77770e4bcf7, 0x5e87d2b3fd564b2f, 0x5b2c78885483b1dd },
+        { 0x52151362793408cf, -0x14f0e8fce669c26c, -0x57cc4d0577c26b9a, 0x093a7fa775003c78 }
+    },
+    {
+        { -0x47169fbb9f56ed7a, 0x7f3fd8047778d3de, 0x67d01e31bf8a5e2d, 0x7b038a06c27b653e },
+        { -0x1aef8219c5e92842, -0x5c880023650ccd31, 0x70d5bf18440b677f, 0x6a252b19a4a31403 },
+        { -0x6126e62a2c966f0d, 0x5213aebbdb4eb9f2, -0x38f715fab3466ecb, 0x58ded57f72260e56 }
+    },
+    {
+        { -0x2592acd9a4f02b75, -0x769f7dce6c405678, -0x287536cd9e2a81d8, 0x79f2942d3a5c8143 },
+        { 0x78e79dade9413d77, -0x0da8062a68d61983, 0x59db910ee37aa7e6, 0x6aa11b5bbb9e039c },
+        { -0x6825d0da49377217, 0x251ba7eaacf20169, 0x09b44f87ef4eb4e4, 0x7d90ab1bbc6a7da5 }
     },
     {
         { 0x1a07a3f496b3c397, 0x11ceaa188f4e2532, 0x7d9498d5a7751bf0, 0x19ed161f508dd8a0 },
-        { 0x9acca683a7016bfe, 0x90505f4df2c50b6d, 0x6b610d5fcce435aa, 0x19a10d446198ff96 },
-        { 0x560a2cd687dce6ca, 0x7f3568c48664cf4d, 0x8741e95222803a38, 0x483bdab1595653fc }
-    },
-    {
-        { 0xd6cf4d0ab4da80f6, 0x82483e45f8307fe0, 0x05005269ae6f9da4, 0x1c7052909cf7877a },
-        { 0xfa780f148734fa49, 0x106f0b70360534e0, 0x2210776fe3e307bd, 0x3286c109dde6a0fe },
+        { -0x6533597c58fe9402, -0x6fafa0b20d3af493, 0x6b610d5fcce435aa, 0x19a10d446198ff96 },
+        { 0x560a2cd687dce6ca, 0x7f3568c48664cf4d, -0x78be16addd7fc5c8, 0x483bdab1595653fc }
+    },
+    {
+        { -0x2930b2f54b257f0a, -0x7db7c1ba07cf8020, 0x05005269ae6f9da4, 0x1c7052909cf7877a },
+        { -0x0587f0eb78cb05b7, 0x106f0b70360534e0, 0x2210776fe3e307bd, 0x3286c109dde6a0fe },
         { 0x32ee7de2874e98d4, 0x14c362e9b97e0c60, 0x5781dcde6a60a38a, 0x217dd5eaaa7aa840 }
     },
     {
-        { 0x8bdf1fb9be8c0ec8, 0x00bae7f8e30a0282, 0x4963991dad6c4f6c, 0x07058a6e5df6f60a },
-        { 0x9db7c4d0248e1eb0, 0xe07697e14d74bf52, 0x1e6a9b173c562354, 0x7fa7c21f795a4965 },
-        { 0xe9eb02c4db31f67f, 0xed25fd8910bcfb2b, 0x46c8131f5c5cddb4, 0x33b21c13a0cb9bce }
-    },
-    {
-        { 0x9aafb9b05ee38c5b, 0xbf9d2d4e071a13c7, 0x8eee6e6de933290a, 0x1c3bab17ae109717 },
-        { 0x360692f8087d8e31, 0xf4dcc637d27163f7, 0x25a4e62065ea5963, 0x659bf72e5ac160d9 },
+        { -0x7420e0464173f138, 0x00bae7f8e30a0282, 0x4963991dad6c4f6c, 0x07058a6e5df6f60a },
+        { -0x62483b2fdb71e150, -0x1f89681eb28b40ae, 0x1e6a9b173c562354, 0x7fa7c21f795a4965 },
+        { -0x1614fd3b24ce0981, -0x12da0276ef4304d5, 0x46c8131f5c5cddb4, 0x33b21c13a0cb9bce }
+    },
+    {
+        { -0x6550464fa11c73a5, -0x4062d2b1f8e5ec39, -0x7111919216ccd6f6, 0x1c3bab17ae109717 },
+        { 0x360692f8087d8e31, -0x0b2339c82d8e9c09, 0x25a4e62065ea5963, 0x659bf72e5ac160d9 },
         { 0x1c9ab216c7cab7b0, 0x7d65d37407bbc3cc, 0x52744750504a58d5, 0x09f2606b131a2990 }
     },
 },
 {
     {
-        { 0x7e234c597c6691ae, 0x64889d3d0a85b4c8, 0xdae2c90c354afae7, 0x0a871e070c6a9e1d },
+        { 0x7e234c597c6691ae, 0x64889d3d0a85b4c8, -0x251d36f3cab50519, 0x0a871e070c6a9e1d },
         { 0x40e87d44744346be, 0x1d48dad415b52b25, 0x7c3a8a18a13b603e, 0x4eb728c12fcdbdf7 },
         { 0x3301b5994bbc8989, 0x736bae3a5bdd4260, 0x0d61ade219d59e3c, 0x3ee7300f2685d464 }
     },
     {
-        { 0x43fa7947841e7518, 0xe5c6fa59639c46d7, 0xa1065e1de3052b74, 0x7d47c6a2cfb89030 },
-        { 0xf5d255e49e7dd6b7, 0x8016115c610b1eac, 0x3c99975d92e187ca, 0x13815762979125c2 },
-        { 0x3fdad0148ef0d6e0, 0x9d3e749a91546f3c, 0x71ec621026bb8157, 0x148cf58d34c9ec80 }
-    },
-    {
-        { 0xe2572f7d9ae4756d, 0x56c345bb88f3487f, 0x9fd10b6d6960a88d, 0x278febad4eaea1b9 },
+        { 0x43fa7947841e7518, -0x1a3905a69c63b929, -0x5ef9a1e21cfad48c, 0x7d47c6a2cfb89030 },
+        { -0x0a2daa1b61822949, -0x7fe9eea39ef4e154, 0x3c99975d92e187ca, 0x13815762979125c2 },
+        { 0x3fdad0148ef0d6e0, -0x62c18b656eab90c4, 0x71ec621026bb8157, 0x148cf58d34c9ec80 }
+    },
+    {
+        { -0x1da8d082651b8a93, 0x56c345bb88f3487f, -0x602ef492969f5773, 0x278febad4eaea1b9 },
         { 0x46a492f67934f027, 0x469984bef6840aa9, 0x5ca1bc2a89611854, 0x3ff2fa1ebd5dbbd4 },
-        { 0xb1aa681f8c933966, 0x8c21949c20290c98, 0x39115291219d3c52, 0x4104dd02fe9c677b }
-    },
-    {
-        { 0x81214e06db096ab8, 0x21a8b6c90ce44f35, 0x6524c12a409e2af5, 0x0165b5a48efca481 },
-        { 0x72b2bf5e1124422a, 0xa1fa0c3398a33ab5, 0x94cb6101fa52b666, 0x2c863b00afaf53d5 },
-        { 0xf190a474a0846a76, 0x12eff984cd2f7cc0, 0x695e290658aa2b8f, 0x591b67d9bffec8b8 }
-    },
-    {
-        { 0x99b9b3719f18b55d, 0xe465e5faa18c641e, 0x61081136c29f05ed, 0x489b4f867030128b },
+        { -0x4e5597e0736cc69a, -0x73de6b63dfd6f368, 0x39115291219d3c52, 0x4104dd02fe9c677b }
+    },
+    {
+        { -0x7edeb1f924f69548, 0x21a8b6c90ce44f35, 0x6524c12a409e2af5, 0x0165b5a48efca481 },
+        { 0x72b2bf5e1124422a, -0x5e05f3cc675cc54b, -0x6b349efe05ad499a, 0x2c863b00afaf53d5 },
+        { -0x0e6f5b8b5f7b958a, 0x12eff984cd2f7cc0, 0x695e290658aa2b8f, 0x591b67d9bffec8b8 }
+    },
+    {
+        { -0x66464c8e60e74aa3, -0x1b9a1a055e739be2, 0x61081136c29f05ed, 0x489b4f867030128b },
         { 0x312f0d1c80b49bfa, 0x5979515eabf3ec8a, 0x727033c09ef01c88, 0x3de02ec7ca8f7bcb },
-        { 0xd232102d3aeb92ef, 0xe16253b46116a861, 0x3d7eabe7190baa24, 0x49f5fbba496cbebf }
-    },
-    {
-        { 0x155d628c1e9c572e, 0x8a4d86acc5884741, 0x91a352f6515763eb, 0x06a1a6c28867515b },
-        { 0x30949a108a5bcfd4, 0xdc40dd70bc6473eb, 0x92c294c1307c0d1c, 0x5604a86dcbfa6e74 },
-        { 0x7288d1d47c1764b6, 0x72541140e0418b51, 0x9f031a6018acf6d1, 0x20989e89fe2742c6 }
+        { -0x2dcdefd2c5146d11, -0x1e9dac4b9ee9579f, 0x3d7eabe7190baa24, 0x49f5fbba496cbebf }
+    },
+    {
+        { 0x155d628c1e9c572e, -0x75b279533a77b8bf, -0x6e5cad09aea89c15, 0x06a1a6c28867515b },
+        { 0x30949a108a5bcfd4, -0x23bf228f439b8c15, -0x6d3d6b3ecf83f2e4, 0x5604a86dcbfa6e74 },
+        { 0x7288d1d47c1764b6, 0x72541140e0418b51, -0x60fce59fe753092f, 0x20989e89fe2742c6 }
     },
     {
         { 0x1674278b85eaec2e, 0x5621dc077acb2bdf, 0x640a4c1661cbf45a, 0x730b9950f70595d3 },
-        { 0x499777fd3a2dcc7f, 0x32857c2ca54fd892, 0xa279d864d207e3a0, 0x0403ed1d0ca67e29 },
-        { 0xc94b2d35874ec552, 0xc5e6c8cf98246f8d, 0xf7cb46fa16c035ce, 0x5bd7454308303dcc }
-    },
-    {
-        { 0x85c4932115e7792a, 0xc64c89a2bdcdddc9, 0x9d1e3da8ada3d762, 0x5bb7db123067f82c },
+        { 0x499777fd3a2dcc7f, 0x32857c2ca54fd892, -0x5d86279b2df81c60, 0x0403ed1d0ca67e29 },
+        { -0x36b4d2ca78b13aae, -0x3a19373067db9073, -0x0834b905e93fca32, 0x5bd7454308303dcc }
+    },
+    {
+        { -0x7a3b6cdeea1886d6, -0x39b3765d42322237, -0x62e1c257525c289e, 0x5bb7db123067f82c },
         { 0x7f9ad19528b24cc2, 0x7f6b54656335c181, 0x66b8b66e4fc07236, 0x133a78007380ad83 },
         { 0x0961f467c6ca62be, 0x04ec21d6211952ee, 0x182360779bd54770, 0x740dca6d58f0e0d2 }
     },
 },
 {
     {
-        { 0x3906c72aed261ae5, 0x9ab68fd988e100f7, 0xf5e9059af3360197, 0x0e53dc78bf2b6d47 },
+        { 0x3906c72aed261ae5, -0x65497026771eff09, -0x0a16fa650cc9fe69, 0x0e53dc78bf2b6d47 },
         { 0x50b70bf5d3f0af0b, 0x4feaf48ae32e71f7, 0x60e84ed3a55bbd34, 0x00ed489b3f50d1ed },
-        { 0xb90829bf7971877a, 0x5e4444636d17e631, 0x4d05c52e18276893, 0x27632d9a5a4a4af5 }
-    },
-    {
-        { 0xa98285d187eaffdb, 0xa5b4fbbbd8d0a864, 0xb658f27f022663f7, 0x3bbc2b22d99ce282 },
-        { 0xd11ff05154b260ce, 0xd86dc38e72f95270, 0x601fcd0d267cc138, 0x2b67916429e90ccd },
-        { 0xb917c952583c0a58, 0x653ff9b80fe4c6f3, 0x9b0da7d7bcdf3c0c, 0x43a0eeb6ab54d60e }
-    },
-    {
-        { 0x3ac6322357875fe8, 0xd9d4f4ecf5fbcb8f, 0x8dee8493382bb620, 0x50c5eaa14c799fdc },
-        { 0x396966a46d4a5487, 0xf811a18aac2bb3ba, 0x66e4685b5628b26b, 0x70a477029d929b92 },
-        { 0xdd0edc8bd6f2fb3c, 0x54c63aa79cc7b7a0, 0xae0b032b2c8d9f1a, 0x6f9ce107602967fb }
-    },
-    {
-        { 0x139693063520e0b5, 0x437fcf7c88ea03fe, 0xf7d4c40bd3c959bc, 0x699154d1f893ded9 },
-        { 0xad1054b1cde1c22a, 0xc4a8e90248eb32df, 0x5f3e7b33accdc0ea, 0x72364713fc79963e },
-        { 0x315d5c75b4b27526, 0xcccb842d0236daa5, 0x22f0c8a3345fee8e, 0x73975a617d39dbed }
+        { -0x46f7d640868e7886, 0x5e4444636d17e631, 0x4d05c52e18276893, 0x27632d9a5a4a4af5 }
+    },
+    {
+        { -0x567d7a2e78150025, -0x5a4b0444272f579c, -0x49a70d80fdd99c09, 0x3bbc2b22d99ce282 },
+        { -0x2ee00faeab4d9f32, -0x27923c718d06ad90, 0x601fcd0d267cc138, 0x2b67916429e90ccd },
+        { -0x46e836ada7c3f5a8, 0x653ff9b80fe4c6f3, -0x64f258284320c3f4, 0x43a0eeb6ab54d60e }
+    },
+    {
+        { 0x3ac6322357875fe8, -0x262b0b130a043471, -0x72117b6cc7d449e0, 0x50c5eaa14c799fdc },
+        { 0x396966a46d4a5487, -0x07ee5e7553d44c46, 0x66e4685b5628b26b, 0x70a477029d929b92 },
+        { -0x22f12374290d04c4, 0x54c63aa79cc7b7a0, -0x51f4fcd4d37260e6, 0x6f9ce107602967fb }
+    },
+    {
+        { 0x139693063520e0b5, 0x437fcf7c88ea03fe, -0x082b3bf42c36a644, 0x699154d1f893ded9 },
+        { -0x52efab4e321e3dd6, -0x3b5716fdb714cd21, 0x5f3e7b33accdc0ea, 0x72364713fc79963e },
+        { 0x315d5c75b4b27526, -0x33347bd2fdc9255b, 0x22f0c8a3345fee8e, 0x73975a617d39dbed }
     },
     {
         { 0x6f37f392f4433e46, 0x0e19b9a11f566b18, 0x220fb78a1fd1d662, 0x362a4258a381c94d },
-        { 0xe4024df96375da10, 0x78d3251a1830c870, 0x902b1948658cd91c, 0x7e18b10b29b7438a },
-        { 0x9071d9132b6beb2f, 0x0f26e9ad28418247, 0xeab91ec9bdec925d, 0x4be65bc8f48af2de }
-    },
-    {
-        { 0x1d50fba257c26234, 0x7bd4823adeb0678b, 0xc2b0dc6ea6538af5, 0x5665eec6351da73e },
-        { 0x78487feba36e7028, 0x5f3f13001dd8ce34, 0x934fb12d4b30c489, 0x056c244d397f0a2b },
-        { 0xdb3ee00943bfb210, 0x4972018720800ac2, 0x26ab5d6173bd8667, 0x20b209c2ab204938 }
+        { -0x1bfdb2069c8a25f0, 0x78d3251a1830c870, -0x6fd4e6b79a7326e4, 0x7e18b10b29b7438a },
+        { -0x6f8e26ecd49414d1, 0x0f26e9ad28418247, -0x1546e13642136da3, 0x4be65bc8f48af2de }
+    },
+    {
+        { 0x1d50fba257c26234, 0x7bd4823adeb0678b, -0x3d4f239159ac750b, 0x5665eec6351da73e },
+        { 0x78487feba36e7028, 0x5f3f13001dd8ce34, -0x6cb04ed2b4cf3b77, 0x056c244d397f0a2b },
+        { -0x24c11ff6bc404df0, 0x4972018720800ac2, 0x26ab5d6173bd8667, 0x20b209c2ab204938 }
     },
     {
         { 0x1fcca94516bd3289, 0x448d65aa41420428, 0x59c3b7b216a55d62, 0x49992cc64e612cd8 },
-        { 0x549e342ac07fb34b, 0x02d8220821373d93, 0xbc262d70acd1f567, 0x7a92c9fdfbcac784 },
-        { 0x65bd1bea70f801de, 0x1befb7c0fe49e28a, 0xa86306cdb1b2ae4a, 0x3b7ac0cd265c2a09 }
-    },
-    {
-        { 0xf0d54e4f22ed39a7, 0xa2aae91e5608150a, 0xf421b2e9eddae875, 0x31bc531d6b7de992 },
-        { 0x822bee438c01bcec, 0x530cb525c0fbc73b, 0x48519034c1953fe9, 0x265cc261e09a0f5b },
-        { 0xdf3d134da980f971, 0x7a4fb8d1221a22a7, 0x3df7d42035aad6d8, 0x2a14edcc6a1a125e }
-    },
-},
-{
-    {
-        { 0x231a8c570478433c, 0xb7b5270ec281439d, 0xdbaa99eae3d9079f, 0x2c03f5256c2b03d9 },
-        { 0xdf48ee0752cfce4e, 0xc3fffaf306ec08b7, 0x05710b2ab95459c4, 0x161d25fa963ea38d },
+        { 0x549e342ac07fb34b, 0x02d8220821373d93, -0x43d9d28f532e0a99, 0x7a92c9fdfbcac784 },
+        { 0x65bd1bea70f801de, 0x1befb7c0fe49e28a, -0x579cf9324e4d51b6, 0x3b7ac0cd265c2a09 }
+    },
+    {
+        { -0x0f2ab1b0dd12c659, -0x5d5516e1a9f7eaf6, -0x0bde4d161225178b, 0x31bc531d6b7de992 },
+        { -0x7dd411bc73fe4314, 0x530cb525c0fbc73b, 0x48519034c1953fe9, 0x265cc261e09a0f5b },
+        { -0x20c2ecb2567f068f, 0x7a4fb8d1221a22a7, 0x3df7d42035aad6d8, 0x2a14edcc6a1a125e }
+    },
+},
+{
+    {
+        { 0x231a8c570478433c, -0x484ad8f13d7ebc63, -0x245566151c26f861, 0x2c03f5256c2b03d9 },
+        { -0x20b711f8ad3031b2, -0x3c00050cf913f749, 0x05710b2ab95459c4, 0x161d25fa963ea38d },
         { 0x790f18757b53a47d, 0x307b0130cf0c5879, 0x31903d77257ef7f9, 0x699468bdbd96bbaf }
     },
     {
-        { 0xd8dd3de66aa91948, 0x485064c22fc0d2cc, 0x9b48246634fdea2f, 0x293e1c4e6c4a2e3a },
-        { 0xbd1f2f46f4dafecf, 0x7cef0114a47fd6f7, 0xd31ffdda4a47b37f, 0x525219a473905785 },
-        { 0x376e134b925112e1, 0x703778b5dca15da0, 0xb04589af461c3111, 0x5b605c447f032823 }
-    },
-    {
-        { 0x3be9fec6f0e7f04c, 0x866a579e75e34962, 0x5542ef161e1de61a, 0x2f12fef4cc5abdd5 },
-        { 0xb965805920c47c89, 0xe7f0100c923b8fcc, 0x0001256502e2ef77, 0x24a76dcea8aeb3ee },
-        { 0x0a4522b2dfc0c740, 0x10d06e7f40c9a407, 0xc6cf144178cff668, 0x5e607b2518a43790 }
-    },
-    {
-        { 0xa02c431ca596cf14, 0xe3c42d40aed3e400, 0xd24526802e0f26db, 0x201f33139e457068 },
-        { 0x58b31d8f6cdf1818, 0x35cfa74fc36258a2, 0xe1b3ff4f66e61d6e, 0x5067acab6ccdd5f7 },
-        { 0xfd527f6b08039d51, 0x18b14964017c0006, 0xd5220eb02e25a4a8, 0x397cba8862460375 }
-    },
-    {
-        { 0x7815c3fbc81379e7, 0xa6619420dde12af1, 0xffa9c0f885a8fdd5, 0x771b4022c1e1c252 },
-        { 0x30c13093f05959b2, 0xe23aa18de9a97976, 0x222fd491721d5e26, 0x2339d320766e6c3a },
-        { 0xd87dd986513a2fa7, 0xf5ac9b71f9d4cf08, 0xd06bc31b1ea283b3, 0x331a189219971a76 }
-    },
-    {
-        { 0x26512f3a9d7572af, 0x5bcbe28868074a9e, 0x84edc1c11180f7c4, 0x1ac9619ff649a67b },
-        { 0xf5166f45fb4f80c6, 0x9c36c7de61c775cf, 0xe3d4e81b9041d91c, 0x31167c6b83bdfe21 },
-        { 0xf22b3842524b1068, 0x5068343bee9ce987, 0xfc9d71844a6250c8, 0x612436341f08b111 }
-    },
-    {
-        { 0x8b6349e31a2d2638, 0x9ddfb7009bd3fd35, 0x7f8bf1b8a3a06ba4, 0x1522aa3178d90445 },
-        { 0xd99d41db874e898d, 0x09fea5f16c07dc20, 0x793d2c67d00f9bbc, 0x46ebe2309e5eff40 },
-        { 0x2c382f5369614938, 0xdafe409ab72d6d10, 0xe8c83391b646f227, 0x45fe70f50524306c }
+        { -0x2722c2199556e6b8, 0x485064c22fc0d2cc, -0x64b7db99cb0215d1, 0x293e1c4e6c4a2e3a },
+        { -0x42e0d0b90b250131, 0x7cef0114a47fd6f7, -0x2ce00225b5b84c81, 0x525219a473905785 },
+        { 0x376e134b925112e1, 0x703778b5dca15da0, -0x4fba7650b9e3ceef, 0x5b605c447f032823 }
+    },
+    {
+        { 0x3be9fec6f0e7f04c, -0x7995a8618a1cb69e, 0x5542ef161e1de61a, 0x2f12fef4cc5abdd5 },
+        { -0x469a7fa6df3b8377, -0x180feff36dc47034, 0x0001256502e2ef77, 0x24a76dcea8aeb3ee },
+        { 0x0a4522b2dfc0c740, 0x10d06e7f40c9a407, -0x3930ebbe87300998, 0x5e607b2518a43790 }
+    },
+    {
+        { -0x5fd3bce35a6930ec, -0x1c3bd2bf512c1c00, -0x2dbad97fd1f0d925, 0x201f33139e457068 },
+        { 0x58b31d8f6cdf1818, 0x35cfa74fc36258a2, -0x1e4c00b09919e292, 0x5067acab6ccdd5f7 },
+        { -0x02ad8094f7fc62af, 0x18b14964017c0006, -0x2addf14fd1da5b58, 0x397cba8862460375 }
+    },
+    {
+        { 0x7815c3fbc81379e7, -0x599e6bdf221ed50f, -0x00563f077a57022b, 0x771b4022c1e1c252 },
+        { 0x30c13093f05959b2, -0x1dc55e721656868a, 0x222fd491721d5e26, 0x2339d320766e6c3a },
+        { -0x27822679aec5d059, -0x0a53648e062b30f8, -0x2f943ce4e15d7c4d, 0x331a189219971a76 }
+    },
+    {
+        { 0x26512f3a9d7572af, 0x5bcbe28868074a9e, -0x7b123e3eee7f083c, 0x1ac9619ff649a67b },
+        { -0x0ae990ba04b07f3a, -0x63c938219e388a31, -0x1c2b17e46fbe26e4, 0x31167c6b83bdfe21 },
+        { -0x0dd4c7bdadb4ef98, 0x5068343bee9ce987, -0x03628e7bb59daf38, 0x612436341f08b111 }
+    },
+    {
+        { -0x749cb61ce5d2d9c8, -0x622048ff642c02cb, 0x7f8bf1b8a3a06ba4, 0x1522aa3178d90445 },
+        { -0x2662be2478b17673, 0x09fea5f16c07dc20, 0x793d2c67d00f9bbc, 0x46ebe2309e5eff40 },
+        { 0x2c382f5369614938, -0x2501bf6548d292f0, -0x1737cc6e49b90dd9, 0x45fe70f50524306c }
     },
     {
         { 0x62f24920c8951491, 0x05f007c83f630ca2, 0x6fbb45d2f5c9d4b8, 0x16619f6db57a2245 },
-        { 0xda4875a6960c0b8c, 0x5b68d076ef0e2f20, 0x07fb51cf3d0b8fd4, 0x428d1623a0e392d4 },
-        { 0x084f4a4401a308fd, 0xa82219c376a5caac, 0xdeb8de4643d1bc7d, 0x1d81592d60bd38c6 }
-    },
-},
-{
-    {
-        { 0x3a4a369a2f89c8a1, 0x63137a1d7c8de80d, 0xbcac008a78eda015, 0x2cb8b3a5b483b03f },
-        { 0xd833d7beec2a4c38, 0x2c9162830acc20ed, 0xe93a47aa92df7581, 0x702d67a3333c4a81 },
+        { -0x25b78a5969f3f474, 0x5b68d076ef0e2f20, 0x07fb51cf3d0b8fd4, 0x428d1623a0e392d4 },
+        { 0x084f4a4401a308fd, -0x57dde63c895a3554, -0x214721b9bc2e4383, 0x1d81592d60bd38c6 }
+    },
+},
+{
+    {
+        { 0x3a4a369a2f89c8a1, 0x63137a1d7c8de80d, -0x4353ff7587125feb, 0x2cb8b3a5b483b03f },
+        { -0x27cc284113d5b3c8, 0x2c9162830acc20ed, -0x16c5b8556d208a7f, 0x702d67a3333c4a81 },
         { 0x36e417cbcb1b90a1, 0x33b3ddaa7f11794e, 0x3f510808885bc607, 0x24141dc0e6a8020d }
     },
     {
-        { 0x91925dccbd83157d, 0x3ca1205322cc8094, 0x28e57f183f90d6e4, 0x1a4714cede2e767b },
-        { 0x59f73c773fefee9d, 0xb3f1ef89c1cf989d, 0xe35dfb42e02e545f, 0x5766120b47a1b47c },
-        { 0xdb20ba0fb8b6b7ff, 0xb732c3b677511fa1, 0xa92b51c099f02d89, 0x4f3875ad489ca5f1 }
-    },
-    {
-        { 0x79ed13f6ee73eec0, 0xa5c6526d69110bb1, 0xe48928c38603860c, 0x722a1446fd7059f5 },
-        { 0xc7fc762f4932ab22, 0x7ac0edf72f4c3c1b, 0x5f6b55aa9aa895e8, 0x3680274dad0a0081 },
-        { 0xd0959fe9a8cf8819, 0xd0a995508475a99c, 0x6eac173320b09cc5, 0x628ecf04331b1095 }
-    },
-    {
-        { 0x9b41acf85c74ccf1, 0xb673318108265251, 0x99c92aed11adb147, 0x7a47d70d34ecb40f },
-        { 0x98bcb118a9d0ddbc, 0xee449e3408b4802b, 0x87089226b8a6b104, 0x685f349a45c7915d },
-        { 0x60a0c4cbcc43a4f5, 0x775c66ca3677bea9, 0xa17aa1752ff8f5ed, 0x11ded9020e01fdc0 }
-    },
-    {
-        { 0x471f95b03bea93b7, 0x0552d7d43313abd3, 0xbd9370e2e17e3f7b, 0x7b120f1db20e5bec },
-        { 0x890e7809caefe704, 0x8728296de30e8c6c, 0x4c5cd2a392aeb1c9, 0x194263d15771531f },
-        { 0x17d2fb3d86502d7a, 0xb564d84450a69352, 0x7da962c8a60ed75d, 0x00d0f85b318736aa }
-    },
-    {
-        { 0xa6753c1efd7621c1, 0x69c0b4a7445671f5, 0x971f527405b23c11, 0x387bc74851a8c7cd },
-        { 0x978b142e777c84fd, 0xf402644705a8c062, 0xa67ad51be7e612c7, 0x2f7b459698dd6a33 },
-        { 0x81894b4d4a52a9a8, 0xadd93e12f6b8832f, 0x184d8548b61bd638, 0x3f1c62dbd6c9f6cd }
+        { -0x6e6da233427cea83, 0x3ca1205322cc8094, 0x28e57f183f90d6e4, 0x1a4714cede2e767b },
+        { 0x59f73c773fefee9d, -0x4c0e10763e306763, -0x1ca204bd1fd1aba1, 0x5766120b47a1b47c },
+        { -0x24df45f047494801, -0x48cd3c4988aee05f, -0x56d4ae3f660fd277, 0x4f3875ad489ca5f1 }
+    },
+    {
+        { 0x79ed13f6ee73eec0, -0x5a39ad9296eef44f, -0x1b76d73c79fc79f4, 0x722a1446fd7059f5 },
+        { -0x380389d0b6cd54de, 0x7ac0edf72f4c3c1b, 0x5f6b55aa9aa895e8, 0x3680274dad0a0081 },
+        { -0x2f6a6016573077e7, -0x2f566aaf7b8a5664, 0x6eac173320b09cc5, 0x628ecf04331b1095 }
+    },
+    {
+        { -0x64be5307a38b330f, -0x498cce7ef7d9adaf, -0x6636d512ee524eb9, 0x7a47d70d34ecb40f },
+        { -0x67434ee7562f2244, -0x11bb61cbf74b7fd5, -0x78f76dd947594efc, 0x685f349a45c7915d },
+        { 0x60a0c4cbcc43a4f5, 0x775c66ca3677bea9, -0x5e855e8ad0070a13, 0x11ded9020e01fdc0 }
+    },
+    {
+        { 0x471f95b03bea93b7, 0x0552d7d43313abd3, -0x426c8f1d1e81c085, 0x7b120f1db20e5bec },
+        { -0x76f187f6351018fc, -0x78d7d6921cf17394, 0x4c5cd2a392aeb1c9, 0x194263d15771531f },
+        { 0x17d2fb3d86502d7a, -0x4a9b27bbaf596cae, 0x7da962c8a60ed75d, 0x00d0f85b318736aa }
+    },
+    {
+        { -0x598ac3e10289de3f, 0x69c0b4a7445671f5, -0x68e0ad8bfa4dc3ef, 0x387bc74851a8c7cd },
+        { -0x6874ebd188837b03, -0x0bfd9bb8fa573f9e, -0x59852ae41819ed39, 0x2f7b459698dd6a33 },
+        { -0x7e76b4b2b5ad5658, -0x5226c1ed09477cd1, 0x184d8548b61bd638, 0x3f1c62dbd6c9f6cd }
     },
     {
         { 0x3fad3e40148f693d, 0x052656e194eb9a72, 0x2f4dcbfd184f4e2f, 0x406f8db1c482e18b },
-        { 0x2e8f1f0091910c1f, 0xa4df4fe0bff2e12c, 0x60c6560aee927438, 0x6338283facefc8fa },
-        { 0x9e630d2c7f191ee4, 0x4fbf8301bc3ff670, 0x787d8e4e7afb73c4, 0x50d83d5be8f58fa5 }
-    },
-    {
-        { 0xc0accf90b4d3b66d, 0xa7059de561732e60, 0x033d1f7870c6b0ba, 0x584161cd26d946e4 },
-        { 0x85683916c11a1897, 0x2d69a4efe506d008, 0x39af1378f664bd01, 0x65942131361517c6 },
-        { 0xbbf2b1a072d27ca2, 0xbf393c59fbdec704, 0xe98dbbcee262b81e, 0x02eebd0b3029b589 }
-    },
-},
-{
-    {
-        { 0x8765b69f7b85c5e8, 0x6ff0678bd168bab2, 0x3a70e77c1d330f9b, 0x3a5f6d51b0af8e7c },
+        { 0x2e8f1f0091910c1f, -0x5b20b01f400d1ed4, 0x60c6560aee927438, 0x6338283facefc8fa },
+        { -0x619cf2d380e6e11c, 0x4fbf8301bc3ff670, 0x787d8e4e7afb73c4, 0x50d83d5be8f58fa5 }
+    },
+    {
+        { -0x3f53306f4b2c4993, -0x58fa621a9e8cd1a0, 0x033d1f7870c6b0ba, 0x584161cd26d946e4 },
+        { -0x7a97c6e93ee5e769, 0x2d69a4efe506d008, 0x39af1378f664bd01, 0x65942131361517c6 },
+        { -0x440d4e5f8d2d835e, -0x40c6c3a6042138fc, -0x167244311d9d47e2, 0x02eebd0b3029b589 }
+    },
+},
+{
+    {
+        { -0x789a4960847a3a18, 0x6ff0678bd168bab2, 0x3a70e77c1d330f9b, 0x3a5f6d51b0af8e7c },
         { 0x61368756a60dac5f, 0x17e02f6aebabdc57, 0x7f193f2d4cce0f7d, 0x20234a7789ecdcf0 },
-        { 0x76d20db67178b252, 0x071c34f9d51ed160, 0xf62a4a20b3e41170, 0x7cd682353cffe366 }
-    },
-    {
-        { 0xa665cd6068acf4f3, 0x42d92d183cd7e3d3, 0x5759389d336025d9, 0x3ef0253b2b2cd8ff },
-        { 0x0be1a45bd887fab6, 0x2a846a32ba403b6e, 0xd9921012e96e6000, 0x2838c8863bdc0943 },
-        { 0xd16bb0cf4a465030, 0xfa496b4115c577ab, 0x82cfae8af4ab419d, 0x21dcb8a606a82812 }
-    },
-    {
-        { 0x9a8d00fabe7731ba, 0x8203607e629e1889, 0xb2cc023743f3d97f, 0x5d840dbf6c6f678b },
+        { 0x76d20db67178b252, 0x071c34f9d51ed160, -0x09d5b5df4c1bee90, 0x7cd682353cffe366 }
+    },
+    {
+        { -0x599a329f97530b0d, 0x42d92d183cd7e3d3, 0x5759389d336025d9, 0x3ef0253b2b2cd8ff },
+        { 0x0be1a45bd887fab6, 0x2a846a32ba403b6e, -0x266defed1691a000, 0x2838c8863bdc0943 },
+        { -0x2e944f30b5b9afd0, -0x05b694beea3a8855, -0x7d3051750b54be63, 0x21dcb8a606a82812 }
+    },
+    {
+        { -0x6572ff054188ce46, -0x7dfc9f819d61e777, -0x4d33fdc8bc0c2681, 0x5d840dbf6c6f678b },
         { 0x5c6004468c9d9fc8, 0x2540096ed42aa3cb, 0x125b4d4c12ee2f9c, 0x0bc3d08194a31dab },
         { 0x706e380d309fe18b, 0x6eb02da6b9e165c7, 0x57bbba997dae20ab, 0x3a4276232ac196dd }
     },
     {
-        { 0x3bf8c172db447ecb, 0x5fcfc41fc6282dbd, 0x80acffc075aa15fe, 0x0770c9e824e1a9f9 },
-        { 0x4b42432c8a7084fa, 0x898a19e3dfb9e545, 0xbe9f00219c58e45d, 0x1ff177cea16debd1 },
-        { 0xcf61d99a45b5b5fd, 0x860984e91b3a7924, 0xe7300919303e3e89, 0x39f264fd41500b1e }
-    },
-    {
-        { 0xd19b4aabfe097be1, 0xa46dfce1dfe01929, 0xc3c908942ca6f1ff, 0x65c621272c35f14e },
-        { 0xa7ad3417dbe7e29c, 0xbd94376a2b9c139c, 0xa0e91b8e93597ba9, 0x1712d73468889840 },
-        { 0xe72b89f8ce3193dd, 0x4d103356a125c0bb, 0x0419a93d2e1cfe83, 0x22f9800ab19ce272 }
-    },
-    {
-        { 0x42029fdd9a6efdac, 0xb912cebe34a54941, 0x640f64b987bdf37b, 0x4171a4d38598cab4 },
-        { 0x605a368a3e9ef8cb, 0xe3e9c022a5504715, 0x553d48b05f24248f, 0x13f416cd647626e5 },
-        { 0xfa2758aa99c94c8c, 0x23006f6fb000b807, 0xfbd291ddadda5392, 0x508214fa574bd1ab }
-    },
-    {
-        { 0x461a15bb53d003d6, 0xb2102888bcf3c965, 0x27c576756c683a5a, 0x3a7758a4c86cb447 },
-        { 0xc20269153ed6fe4b, 0xa65a6739511d77c4, 0xcbde26462c14af94, 0x22f960ec6faba74b },
+        { 0x3bf8c172db447ecb, 0x5fcfc41fc6282dbd, -0x7f53003f8a55ea02, 0x0770c9e824e1a9f9 },
+        { 0x4b42432c8a7084fa, -0x7675e61c20461abb, -0x4160ffde63a71ba3, 0x1ff177cea16debd1 },
+        { -0x309e2665ba4a4a03, -0x79f67b16e4c586dc, -0x18cff6e6cfc1c177, 0x39f264fd41500b1e }
+    },
+    {
+        { -0x2e64b55401f6841f, -0x5b92031e201fe6d7, -0x3c36f76bd3590e01, 0x65c621272c35f14e },
+        { -0x5852cbe824181d64, -0x426bc895d463ec64, -0x5f16e4716ca68457, 0x1712d73468889840 },
+        { -0x18d4760731ce6c23, 0x4d103356a125c0bb, 0x0419a93d2e1cfe83, 0x22f9800ab19ce272 }
+    },
+    {
+        { 0x42029fdd9a6efdac, -0x46ed3141cb5ab6bf, 0x640f64b987bdf37b, 0x4171a4d38598cab4 },
+        { 0x605a368a3e9ef8cb, -0x1c163fdd5aafb8eb, 0x553d48b05f24248f, 0x13f416cd647626e5 },
+        { -0x05d8a7556636b374, 0x23006f6fb000b807, -0x042d6e225225ac6e, 0x508214fa574bd1ab }
+    },
+    {
+        { 0x461a15bb53d003d6, -0x4defd777430c369b, 0x27c576756c683a5a, 0x3a7758a4c86cb447 },
+        { -0x3dfd96eac12901b5, -0x59a598c6aee2883c, -0x3421d9b9d3eb506c, 0x22f960ec6faba74b },
         { 0x548111f693ae5076, 0x1dae21df1dfd54a6, 0x12248c90f3115e65, 0x5d9fd15f8de7f494 }
     },
     {
-        { 0x3f244d2aeed7521e, 0x8e3a9028432e9615, 0xe164ba772e9c16d4, 0x3bc187fa47eb98d8 },
-        { 0x031408d36d63727f, 0x6a379aefd7c7b533, 0xa9e18fc5ccaee24b, 0x332f35914f8fbed3 },
-        { 0x6d470115ea86c20c, 0x998ab7cb6c46d125, 0xd77832b53a660188, 0x450d81ce906fba03 }
+        { 0x3f244d2aeed7521e, -0x71c56fd7bcd169eb, -0x1e9b4588d163e92c, 0x3bc187fa47eb98d8 },
+        { 0x031408d36d63727f, 0x6a379aefd7c7b533, -0x561e703a33511db5, 0x332f35914f8fbed3 },
+        { 0x6d470115ea86c20c, -0x6675483493b92edb, -0x2887cd4ac599fe78, 0x450d81ce906fba03 }
     },
 },
 {
     {
         { 0x23264d66b2cae0b5, 0x7dbaed33ebca6576, 0x030ebed6f0d24ac8, 0x2a887f78f7635510 },
-        { 0xf8ae4d2ad8453902, 0x7018058ee8db2d1d, 0xaab3995fc7d2c11e, 0x53b16d2324ccca79 },
+        { -0x0751b2d527bac6fe, 0x7018058ee8db2d1d, -0x554c66a0382d3ee2, 0x53b16d2324ccca79 },
         { 0x2a23b9e75c012d4f, 0x0c974651cae1f2ea, 0x2fb63273675d70ca, 0x0ba7250b864403f5 }
     },
     {
-        { 0xdd63589386f86d9c, 0x61699176e13a85a4, 0x2e5111954eaa7d57, 0x32c21b57fb60bdfb },
-        { 0xbb0d18fd029c6421, 0xbc2d142189298f02, 0x8347f8e68b250e96, 0x7b9f2fe8032d71c9 },
-        { 0xd87823cd319e0780, 0xefc4cfc1897775c5, 0x4854fb129a0ab3f7, 0x12c49d417238c371 }
-    },
-    {
-        { 0x09b3a01783799542, 0x626dd08faad5ee3f, 0xba00bceeeb70149f, 0x1421b246a0a444c9 },
-        { 0x0950b533ffe83769, 0x21861c1d8e1d6bd1, 0xf022d8381302e510, 0x2509200c6391cab4 },
-        { 0x4aa43a8e8c24a7c7, 0x04c1f540d8f05ef5, 0xadba5e0c0b3eb9dc, 0x2ab5504448a49ce3 }
-    },
-    {
-        { 0xdc07ac631c5d3afa, 0x58615171f9df8c6c, 0x72a079d89d73e2b0, 0x7301f4ceb4eae15d },
-        { 0x2ed227266f0f5dec, 0x9824ee415ed50824, 0x807bec7c9468d415, 0x7093bae1b521e23f },
-        { 0x6409e759d6722c41, 0xa674e1cf72bf729b, 0xbc0a24eb3c21e569, 0x390167d24ebacb23 }
-    },
-    {
-        { 0xd7bb054ba2f2120b, 0xe2b9ceaeb10589b7, 0x3fe8bac8f3c0edbe, 0x4cbd40767112cb69 },
-        { 0x27f58e3bba353f1c, 0x4c47764dbf6a4361, 0xafbbc4e56e562650, 0x07db2ee6aae1a45d },
+        { -0x229ca76c79079264, 0x61699176e13a85a4, 0x2e5111954eaa7d57, 0x32c21b57fb60bdfb },
+        { -0x44f2e702fd639bdf, -0x43d2ebde76d670fe, -0x7cb8071974daf16a, 0x7b9f2fe8032d71c9 },
+        { -0x2787dc32ce61f880, -0x103b303e76888a3b, 0x4854fb129a0ab3f7, 0x12c49d417238c371 }
+    },
+    {
+        { 0x09b3a01783799542, 0x626dd08faad5ee3f, -0x45ff4311148feb61, 0x1421b246a0a444c9 },
+        { 0x0950b533ffe83769, 0x21861c1d8e1d6bd1, -0x0fdd27c7ecfd1af0, 0x2509200c6391cab4 },
+        { 0x4aa43a8e8c24a7c7, 0x04c1f540d8f05ef5, -0x5245a1f3f4c14624, 0x2ab5504448a49ce3 }
+    },
+    {
+        { -0x23f8539ce3a2c506, 0x58615171f9df8c6c, 0x72a079d89d73e2b0, 0x7301f4ceb4eae15d },
+        { 0x2ed227266f0f5dec, -0x67db11bea12af7dc, -0x7f8413836b972beb, 0x7093bae1b521e23f },
+        { 0x6409e759d6722c41, -0x598b1e308d408d65, -0x43f5db14c3de1a97, 0x390167d24ebacb23 }
+    },
+    {
+        { -0x2844fab45d0dedf5, -0x1d4631514efa7649, 0x3fe8bac8f3c0edbe, 0x4cbd40767112cb69 },
+        { 0x27f58e3bba353f1c, 0x4c47764dbf6a4361, -0x50443b1a91a9d9b0, 0x07db2ee6aae1a45d },
         { 0x0b603cc029c58176, 0x5988e3825cb15d61, 0x2bb61413dcf0ad8d, 0x7b8eec6c74183287 }
     },
     {
-        { 0x32fee570fc386b73, 0xda8b0141da3a8cc7, 0x975ffd0ac8968359, 0x6ee809a1b132a855 },
-        { 0xe4ca40782cd27cb0, 0xdaf9c323fbe967bd, 0xb29bd34a8ad41e9e, 0x72810497626ede4d },
-        { 0x9444bb31fcfd863a, 0x2fe3690a3e4e48c5, 0xdc29c867d088fa25, 0x13bd1e38d173292e }
+        { 0x32fee570fc386b73, -0x2574febe25c57339, -0x68a002f537697ca7, 0x6ee809a1b132a855 },
+        { -0x1b35bf87d32d8350, -0x25063cdc04169843, -0x4d642cb5752be162, 0x72810497626ede4d },
+        { -0x6bbb44ce030279c6, 0x2fe3690a3e4e48c5, -0x23d637982f7705db, 0x13bd1e38d173292e }
     },
     {
         { 0x223fb5cf1dfac521, 0x325c25316f554450, 0x030b98d7659177ac, 0x1ed018b64f88a4bd },
-        { 0xd32b4cd8696149b5, 0xe55937d781d8aab7, 0x0bcb2127ae122b94, 0x41e86fcfb14099b0 },
-        { 0x3630dfa1b802a6b0, 0x880f874742ad3bd5, 0x0af90d6ceec5a4d4, 0x746a247a37cdc5d9 }
-    },
-    {
-        { 0x6eccd85278d941ed, 0x2254ae83d22f7843, 0xc522d02e7bbfcdb7, 0x681e3351bff0e4e2 },
-        { 0xd531b8bd2b7b9af6, 0x5005093537fc5b51, 0x232fcf25c593546d, 0x20a365142bb40f49 },
-        { 0x8b64b59d83034f45, 0x2f8b71f21fa20efb, 0x69249495ba6550e4, 0x539ef98e45d5472b }
-    },
-},
-{
-    {
-        { 0xd074d8961cae743f, 0xf86d18f5ee1c63ed, 0x97bdc55be7f4ed29, 0x4cbad279663ab108 },
-        { 0x6e7bb6a1a6205275, 0xaa4f21d7413c8e83, 0x6f56d155e88f5cb2, 0x2de25d4ba6345be1 },
-        { 0x80d19024a0d71fcd, 0xc525c20afb288af8, 0xb1a3974b5f3a6419, 0x7d7fbcefe2007233 }
-    },
-    {
-        { 0xcd7c5dc5f3c29094, 0xc781a29a2a9105ab, 0x80c61d36421c3058, 0x4f9cd196dcd8d4d7 },
-        { 0xfaef1e6a266b2801, 0x866c68c4d5739f16, 0xf68a2fbc1b03762c, 0x5975435e87b75a8d },
-        { 0x199297d86a7b3768, 0xd0d058241ad17a63, 0xba029cad5c1c0c17, 0x7ccdd084387a0307 }
-    },
-    {
-        { 0x9b0c84186760cc93, 0xcdae007a1ab32a99, 0xa88dec86620bda18, 0x3593ca848190ca44 },
-        { 0xdca6422c6d260417, 0xae153d50948240bd, 0xa9c0c1b4fb68c677, 0x428bd0ed61d0cf53 },
-        { 0x9213189a5e849aa7, 0xd4d8c33565d8facd, 0x8c52545b53fdbbd1, 0x27398308da2d63e6 }
-    },
-    {
-        { 0xb9a10e4c0a702453, 0x0fa25866d57d1bde, 0xffb9d9b5cd27daf7, 0x572c2945492c33fd },
-        { 0x42c38d28435ed413, 0xbd50f3603278ccc9, 0xbb07ab1a79da03ef, 0x269597aebe8c3355 },
-        { 0xc77fc745d6cd30be, 0xe4dfe8d3e3baaefb, 0xa22c8830aa5dda0c, 0x7f985498c05bca80 }
-    },
-    {
-        { 0xd35615520fbf6363, 0x08045a45cf4dfba6, 0xeec24fbc873fa0c2, 0x30f2653cd69b12e7 },
-        { 0x3849ce889f0be117, 0x8005ad1b7b54a288, 0x3da3c39f23fc921c, 0x76c2ec470a31f304 },
-        { 0x8a08c938aac10c85, 0x46179b60db276bcb, 0xa920c01e0e6fac70, 0x2f1273f1596473da }
-    },
-    {
-        { 0x30488bd755a70bc0, 0x06d6b5a4f1d442e7, 0xead1a69ebc596162, 0x38ac1997edc5f784 },
-        { 0x4739fc7c8ae01e11, 0xfd5274904a6aab9f, 0x41d98a8287728f2e, 0x5d9e572ad85b69f2 },
-        { 0x0666b517a751b13b, 0x747d06867e9b858c, 0xacacc011454dde49, 0x22dfcd9cbfe9e69c }
+        { -0x2cd4b327969eb64b, -0x1aa6c8287e275549, 0x0bcb2127ae122b94, 0x41e86fcfb14099b0 },
+        { 0x3630dfa1b802a6b0, -0x77f078b8bd52c42b, 0x0af90d6ceec5a4d4, 0x746a247a37cdc5d9 }
+    },
+    {
+        { 0x6eccd85278d941ed, 0x2254ae83d22f7843, -0x3add2fd184403249, 0x681e3351bff0e4e2 },
+        { -0x2ace4742d484650a, 0x5005093537fc5b51, 0x232fcf25c593546d, 0x20a365142bb40f49 },
+        { -0x749b4a627cfcb0bb, 0x2f8b71f21fa20efb, 0x69249495ba6550e4, 0x539ef98e45d5472b }
+    },
+},
+{
+    {
+        { -0x2f8b2769e3518bc1, -0x0792e70a11e39c13, -0x68423aa4180b12d7, 0x4cbad279663ab108 },
+        { 0x6e7bb6a1a6205275, -0x55b0de28bec3717d, 0x6f56d155e88f5cb2, 0x2de25d4ba6345be1 },
+        { -0x7f2e6fdb5f28e033, -0x3ada3df504d77508, -0x4e5c68b4a0c59be7, 0x7d7fbcefe2007233 }
+    },
+    {
+        { -0x3283a23a0c3d6f6c, -0x387e5d65d56efa55, -0x7f39e2c9bde3cfa8, 0x4f9cd196dcd8d4d7 },
+        { -0x0510e195d994d7ff, -0x7993973b2a8c60ea, -0x0975d043e4fc89d4, 0x5975435e87b75a8d },
+        { 0x199297d86a7b3768, -0x2f2fa7dbe52e859d, -0x45fd6352a3e3f3e9, 0x7ccdd084387a0307 }
+    },
+    {
+        { -0x64f37be7989f336d, -0x3251ff85e54cd567, -0x577213799df425e8, 0x3593ca848190ca44 },
+        { -0x2359bdd392d9fbe9, -0x51eac2af6b7dbf43, -0x563f3e4b04973989, 0x428bd0ed61d0cf53 },
+        { -0x6dece765a17b6559, -0x2b273cca9a270533, -0x73adaba4ac02442f, 0x27398308da2d63e6 }
+    },
+    {
+        { -0x465ef1b3f58fdbad, 0x0fa25866d57d1bde, -0x0046264a32d82509, 0x572c2945492c33fd },
+        { 0x42c38d28435ed413, -0x42af0c9fcd873337, -0x44f854e58625fc11, 0x269597aebe8c3355 },
+        { -0x388038ba2932cf42, -0x1b20172c1c455105, -0x5dd377cf55a225f4, 0x7f985498c05bca80 }
+    },
+    {
+        { -0x2ca9eaadf0409c9d, 0x08045a45cf4dfba6, -0x113db04378c05f3e, 0x30f2653cd69b12e7 },
+        { 0x3849ce889f0be117, -0x7ffa52e484ab5d78, 0x3da3c39f23fc921c, 0x76c2ec470a31f304 },
+        { -0x75f736c7553ef37b, 0x46179b60db276bcb, -0x56df3fe1f1905390, 0x2f1273f1596473da }
+    },
+    {
+        { 0x30488bd755a70bc0, 0x06d6b5a4f1d442e7, -0x152e596143a69e9e, 0x38ac1997edc5f784 },
+        { 0x4739fc7c8ae01e11, -0x02ad8b6fb5955461, 0x41d98a8287728f2e, 0x5d9e572ad85b69f2 },
+        { 0x0666b517a751b13b, 0x747d06867e9b858c, -0x53533feebab221b7, 0x22dfcd9cbfe9e69c }
     },
     {
         { 0x56ec59b4103be0a1, 0x2ee3baecd259f969, 0x797cb29413f5cd32, 0x0fe9877824cde472 },
-        { 0x8ddbd2e0c30d0cd9, 0xad8e665facbb4333, 0x8f6b258c322a961f, 0x6b2916c05448c1c7 },
+        { -0x72242d1f3cf2f327, -0x527199a05344bccd, -0x7094da73cdd569e1, 0x6b2916c05448c1c7 },
         { 0x7edb34d10aba913b, 0x4ea3cd822e6dac0e, 0x66083dff6578f815, 0x4c303f307ff00a17 }
     },
     {
-        { 0x29fc03580dd94500, 0xecd27aa46fbbec93, 0x130a155fc2e2a7f8, 0x416b151ab706a1d5 },
-        { 0xd30a3bd617b28c85, 0xc5d377b739773bea, 0xc6c6e78c1e6a5cbf, 0x0d61b8f78b2ab7c4 },
-        { 0x56a8d7efe9c136b0, 0xbd07e5cd58e44b20, 0xafe62fda1b57e0ab, 0x191a2af74277e8d2 }
-    },
-},
-{
-    {
-        { 0x09d4b60b2fe09a14, 0xc384f0afdbb1747e, 0x58e2ea8978b5fd6e, 0x519ef577b5e09b0a },
-        { 0xd550095bab6f4985, 0x04f4cd5b4fbfaf1a, 0x9d8e2ed12a0c7540, 0x2bc24e04b2212286 },
+        { 0x29fc03580dd94500, -0x132d855b9044136d, 0x130a155fc2e2a7f8, 0x416b151ab706a1d5 },
+        { -0x2cf5c429e84d737b, -0x3a2c8848c688c416, -0x39391873e195a341, 0x0d61b8f78b2ab7c4 },
+        { 0x56a8d7efe9c136b0, -0x42f81a32a71bb4e0, -0x5019d025e4a81f55, 0x191a2af74277e8d2 }
+    },
+},
+{
+    {
+        { 0x09d4b60b2fe09a14, -0x3c7b0f50244e8b82, 0x58e2ea8978b5fd6e, 0x519ef577b5e09b0a },
+        { -0x2aaff6a45490b67b, 0x04f4cd5b4fbfaf1a, -0x6271d12ed5f38ac0, 0x2bc24e04b2212286 },
         { 0x1863d7d91124cca9, 0x7ac08145b88a708e, 0x2bcd7309857031f5, 0x62337a6e8ab8fae5 }
     },
     {
-        { 0xd1ab324e1b3a1273, 0x18947cf181055340, 0x3b5d9567a98c196e, 0x7fa00425802e1e68 },
-        { 0x4bcef17f06ffca16, 0xde06e1db692ae16a, 0x0753702d614f42b0, 0x5f6041b45b9212d0 },
-        { 0x7d531574028c2705, 0x80317d69db0d75fe, 0x30fface8ef8c8ddd, 0x7e9de97bb6c3e998 }
-    },
-    {
-        { 0xf004be62a24d40dd, 0xba0659910452d41f, 0x81c45ee162a44234, 0x4cb829d8a22266ef },
-        { 0x1558967b9e6585a3, 0x97c99ce098e98b92, 0x10af149b6eb3adad, 0x42181fe8f4d38cfa },
+        { -0x2e54cdb1e4c5ed8d, 0x18947cf181055340, 0x3b5d9567a98c196e, 0x7fa00425802e1e68 },
+        { 0x4bcef17f06ffca16, -0x21f91e2496d51e96, 0x0753702d614f42b0, 0x5f6041b45b9212d0 },
+        { 0x7d531574028c2705, -0x7fce829624f28a02, 0x30fface8ef8c8ddd, 0x7e9de97bb6c3e998 }
+    },
+    {
+        { -0x0ffb419d5db2bf23, -0x45f9a66efbad2be1, -0x7e3ba11e9d5bbdcc, 0x4cb829d8a22266ef },
+        { 0x1558967b9e6585a3, -0x6836631f6716746e, 0x10af149b6eb3adad, 0x42181fe8f4d38cfa },
         { 0x1dbcaa8407b86681, 0x081f001e8b26753b, 0x3cd7ce6a84048e81, 0x78af11633f25f22c }
     },
     {
-        { 0x3241c00e7d65318c, 0xe6bee5dcd0e86de7, 0x118b2dc2fbc08c26, 0x680d04a7fc603dc3 },
-        { 0x8416ebd40b50babc, 0x1508722628208bee, 0xa3148fafb9c1c36d, 0x0d07daacd32d7d5d },
-        { 0xf9c2414a695aa3eb, 0xdaa42c4c05a68f21, 0x7c6c23987f93963e, 0x210e8cd30c3954e3 }
-    },
-    {
-        { 0x2b50f16137fe6c26, 0xe102bcd856e404d8, 0x12b0f1414c561f6b, 0x51b17bc8d028ec91 },
-        { 0xac4201f210a71c06, 0x6a65e0aef3bfb021, 0xbc42c35c393632f7, 0x56ea8db1865f0742 },
-        { 0xfff5fb4bcf535119, 0xf4989d79df1108a0, 0xbdfcea659a3ba325, 0x18a11f1174d1a6f2 }
-    },
-    {
-        { 0xfbd63cdad27a5f2c, 0xf00fc4bc8aa106d7, 0x53fb5c1a8e64a430, 0x04eaabe50c1a2e85 },
-        { 0x407375ab3f6bba29, 0x9ec3b6d8991e482e, 0x99c80e82e55f92e9, 0x307c13b6fb0c0ae1 },
-        { 0x24751021cb8ab5e7, 0xfc2344495c5010eb, 0x5f1e717b4e5610a1, 0x44da5f18c2710cd5 }
-    },
-    {
-        { 0x9156fe6b89d8eacc, 0xe6b79451e23126a1, 0xbd7463d93944eb4e, 0x726373f6767203ae },
-        { 0x033cc55ff1b82eb5, 0xb15ae36d411cae52, 0xba40b6198ffbacd3, 0x768edce1532e861f },
-        { 0xe305ca72eb7ef68a, 0x662cf31f70eadb23, 0x18f026fdb4c45b68, 0x513b5384b5d2ecbd }
-    },
-    {
-        { 0x5e2702878af34ceb, 0x900b0409b946d6ae, 0x6512ebf7dabd8512, 0x61d9b76988258f81 },
+        { 0x3241c00e7d65318c, -0x19411a232f179219, 0x118b2dc2fbc08c26, 0x680d04a7fc603dc3 },
+        { -0x7be9142bf4af4544, 0x1508722628208bee, -0x5ceb7050463e3c93, 0x0d07daacd32d7d5d },
+        { -0x063dbeb596a55c15, -0x255bd3b3fa5970df, 0x7c6c23987f93963e, 0x210e8cd30c3954e3 }
+    },
+    {
+        { 0x2b50f16137fe6c26, -0x1efd4327a91bfb28, 0x12b0f1414c561f6b, 0x51b17bc8d028ec91 },
+        { -0x53bdfe0def58e3fa, 0x6a65e0aef3bfb021, -0x43bd3ca3c6c9cd09, 0x56ea8db1865f0742 },
+        { -0x000a04b430acaee7, -0x0b67628620eef760, -0x4203159a65c45cdb, 0x18a11f1174d1a6f2 }
+    },
+    {
+        { -0x0429c3252d85a0d4, -0x0ff03b43755ef929, 0x53fb5c1a8e64a430, 0x04eaabe50c1a2e85 },
+        { 0x407375ab3f6bba29, -0x613c492766e1b7d2, -0x6637f17d1aa06d17, 0x307c13b6fb0c0ae1 },
+        { 0x24751021cb8ab5e7, -0x03dcbbb6a3afef15, 0x5f1e717b4e5610a1, 0x44da5f18c2710cd5 }
+    },
+    {
+        { -0x6ea9019476271534, -0x19486bae1dced95f, -0x428b9c26c6bb14b2, 0x726373f6767203ae },
+        { 0x033cc55ff1b82eb5, -0x4ea51c92bee351ae, -0x45bf49e67004532d, 0x768edce1532e861f },
+        { -0x1cfa358d14810976, 0x662cf31f70eadb23, 0x18f026fdb4c45b68, 0x513b5384b5d2ecbd }
+    },
+    {
+        { 0x5e2702878af34ceb, -0x6ff4fbf646b92952, 0x6512ebf7dabd8512, 0x61d9b76988258f81 },
         { 0x46d46280c729989e, 0x4b93fbd05368a5dd, 0x63df3f81d1765a89, 0x34cebd64b9a0a223 },
-        { 0xa6c5a71349b7d94b, 0xa3f3d15823eb9446, 0x0416fbd277484834, 0x69d45e6f2c70812f }
-    },
-},
-{
-    {
-        { 0x9fe62b434f460efb, 0xded303d4a63607d6, 0xf052210eb7a0da24, 0x237e7dbe00545b93 },
-        { 0xce16f74bc53c1431, 0x2b9725ce2072edde, 0xb8b9c36fb5b23ee7, 0x7e2e0e450b5cc908 },
+        { -0x593a58ecb64826b5, -0x5c0c2ea7dc146bba, 0x0416fbd277484834, 0x69d45e6f2c70812f }
+    },
+},
+{
+    {
+        { -0x6019d4bcb0b9f105, -0x212cfc2b59c9f82a, -0x0faddef1485f25dc, 0x237e7dbe00545b93 },
+        { -0x31e908b43ac3ebcf, 0x2b9725ce2072edde, -0x47463c904a4dc119, 0x7e2e0e450b5cc908 },
         { 0x013575ed6701b430, 0x231094e69f0bfd10, 0x75320f1583e47f22, 0x71afa699b11155e3 }
     },
     {
-        { 0xea423c1c473b50d6, 0x51e87a1f3b38ef10, 0x9b84bf5fb2c9be95, 0x00731fbc78f89a1c },
-        { 0x65ce6f9b3953b61d, 0xc65839eaafa141e6, 0x0f435ffda9f759fe, 0x021142e9c2b1c28e },
-        { 0xe430c71848f81880, 0xbf960c225ecec119, 0xb6dae0836bba15e3, 0x4c4d6f3347e15808 }
+        { -0x15bdc3e3b8c4af2a, 0x51e87a1f3b38ef10, -0x647b40a04d36416b, 0x00731fbc78f89a1c },
+        { 0x65ce6f9b3953b61d, -0x39a7c615505ebe1a, 0x0f435ffda9f759fe, 0x021142e9c2b1c28e },
+        { -0x1bcf38e7b707e780, -0x4069f3dda1313ee7, -0x49251f7c9445ea1d, 0x4c4d6f3347e15808 }
     },
     {
         { 0x2f0cddfc988f1970, 0x6b916227b0b9f51b, 0x6ec7b6c4779176be, 0x38bf9500a88f9fa8 },
-        { 0x18f7eccfc17d1fc9, 0x6c75f5a651403c14, 0xdbde712bf7ee0cdf, 0x193fddaaa7e47a22 },
-        { 0x1fd2c93c37e8876f, 0xa2f61e5a18d1462c, 0x5080f58239241276, 0x6a6fb99ebf0d4969 }
-    },
-    {
-        { 0xeeb122b5b6e423c6, 0x939d7010f286ff8e, 0x90a92a831dcf5d8c, 0x136fda9f42c5eb10 },
-        { 0x6a46c1bb560855eb, 0x2416bb38f893f09d, 0xd71d11378f71acc1, 0x75f76914a31896ea },
-        { 0xf94cdfb1a305bdd1, 0x0f364b9d9ff82c08, 0x2a87d8a5c3bb588a, 0x022183510be8dcba }
-    },
-    {
-        { 0x9d5a710143307a7f, 0xb063de9ec47da45f, 0x22bbfe52be927ad3, 0x1387c441fd40426c },
-        { 0x4af766385ead2d14, 0xa08ed880ca7c5830, 0x0d13a6e610211e3d, 0x6a071ce17b806c03 },
-        { 0xb5d3c3d187978af8, 0x722b5a3d7f0e4413, 0x0d7b4848bb477ca0, 0x3171b26aaf1edc92 }
-    },
-    {
-        { 0xa60db7d8b28a47d1, 0xa6bf14d61770a4f1, 0xd4a1f89353ddbd58, 0x6c514a63344243e9 },
-        { 0xa92f319097564ca8, 0xff7bb84c2275e119, 0x4f55fe37a4875150, 0x221fd4873cf0835a },
-        { 0x2322204f3a156341, 0xfb73e0e9ba0a032d, 0xfce0dd4c410f030e, 0x48daa596fb924aaa }
-    },
-    {
-        { 0x14f61d5dc84c9793, 0x9941f9e3ef418206, 0xcdf5b88f346277ac, 0x58c837fa0e8a79a9 },
-        { 0x6eca8e665ca59cc7, 0xa847254b2e38aca0, 0x31afc708d21e17ce, 0x676dd6fccad84af7 },
-        { 0x0cf9688596fc9058, 0x1ddcbbf37b56a01b, 0xdcc2e77d4935d66a, 0x1c4f73f2c6a57f0a }
-    },
-    {
-        { 0xb36e706efc7c3484, 0x73dfc9b4c3c1cf61, 0xeb1d79c9781cc7e5, 0x70459adb7daf675c },
-        { 0x0e7a4fbd305fa0bb, 0x829d4ce054c663ad, 0xf421c3832fe33848, 0x795ac80d1bf64c42 },
-        { 0x1b91db4991b42bb3, 0x572696234b02dcca, 0x9fdf9ee51f8c78dc, 0x5fe162848ce21fd3 }
-    },
-},
-{
-    {
-        { 0x315c29c795115389, 0xd7e0e507862f74ce, 0x0c4a762185927432, 0x72de6c984a25a1e4 },
-        { 0xe2790aae4d077c41, 0x8b938270db7469a3, 0x6eb632dc8abd16a2, 0x720814ecaa064b72 },
-        { 0xae9ab553bf6aa310, 0x050a50a9806d6e1b, 0x92bb7403adff5139, 0x0394d27645be618b }
-    },
-    {
-        { 0xf5396425b23545a4, 0x15a7a27e98fbb296, 0xab6c52bc636fdd86, 0x79d995a8419334ee },
-        { 0x4d572251857eedf4, 0xe3724edde19e93c5, 0x8a71420e0b797035, 0x3b3c833687abe743 },
-        { 0xcd8a8ea61195dd75, 0xa504d8a81dd9a82f, 0x540dca81a35879b6, 0x60dd16a379c86a8a }
-    },
-    {
-        { 0x3501d6f8153e47b8, 0xb7a9675414a2f60c, 0x112ee8b6455d9523, 0x4e62a3c18112ea8a },
-        { 0x35a2c8487381e559, 0x596ffea6d78082cb, 0xcb9771ebdba7b653, 0x5a08b5019b4da685 },
-        { 0xc8d4ac04516ab786, 0x595af3215295b23d, 0xd6edd234db0230c1, 0x0929efe8825b41cc }
-    },
-    {
-        { 0x8b3172b7ad56651d, 0x01581b7a3fabd717, 0x2dc94df6424df6e4, 0x30376e5d2c29284f },
-        { 0x5f0601d1cbd0f2d3, 0x736e412f6132bb7f, 0x83604432238dde87, 0x1e3a5272f5c0753c },
-        { 0xd2918da78159a59c, 0x6bdc1cd93f0713f3, 0x565f7a934acd6590, 0x53daacec4cb4c128 }
-    },
-    {
-        { 0x99852bc3852cfdb0, 0x2cc12e9559d6ed0b, 0x70f9e2bf9b5ac27b, 0x4f3b8c117959ae99 },
-        { 0x4ca73bd79cc8a7d6, 0x4d4a738f47e9a9b2, 0xf4cbf12942f5fe00, 0x01a13ff9bdbf0752 },
-        { 0x55b6c9c82ff26412, 0x1ac4a8c91fb667a8, 0xd527bfcfeb778bf2, 0x303337da7012a3be }
-    },
-    {
-        { 0x976d3ccbfad2fdd1, 0xcb88839737a640a8, 0x2ff00c1d6734cb25, 0x269ff4dc789c2d2b },
-        { 0x955422228c1c9d7c, 0x01fac1371a9b340f, 0x7e8d9177925b48d7, 0x53f8ad5661b3e31b },
-        { 0x0c003fbdc08d678d, 0x4d982fa37ead2b17, 0xc07e6bcdb2e582f1, 0x296c7291df412a44 }
-    },
-    {
-        { 0xdfb23205dab8b59e, 0x465aeaa0c8092250, 0xd133c1189a725d18, 0x2327370261f117d1 },
-        { 0x7903de2b33daf397, 0xd0ff0619c9a624b3, 0x8a1d252b555b3e18, 0x2b6d581c52e0b7c0 },
-        { 0x3d0543d3623e7986, 0x679414c2c278a354, 0xae43f0cc726196f6, 0x7836c41f8245eaba }
-    },
-    {
-        { 0xca651e848011937c, 0xc6b0c46e6ef41a28, 0xb7021ba75f3f8d52, 0x119dff99ead7b9fd },
-        { 0xe7a254db49e95a81, 0x5192d5d008b0ad73, 0x4d20e5b1d00afc07, 0x5d55f8012cf25f38 },
-        { 0x43eadfcbf4b31d4d, 0xc6503f7411148892, 0xfeee68c5060d3b17, 0x329293b3dd4a0ac8 }
-    },
-},
-{
-    {
-        { 0x2879852d5d7cb208, 0xb8dedd70687df2e7, 0xdc0bffab21687891, 0x2b44c043677daa35 },
-        { 0x4e59214fe194961a, 0x49be7dc70d71cd4f, 0x9300cfd23b50f22d, 0x4789d446fc917232 },
-        { 0x1a1c87ab074eb78e, 0xfac6d18e99daf467, 0x3eacbbcd484f9067, 0x60c52eef2bb9a4e4 }
-    },
-    {
-        { 0x702bc5c27cae6d11, 0x44c7699b54a48cab, 0xefbc4056ba492eb2, 0x70d77248d9b6676d },
-        { 0x0b5d89bc3bfd8bf1, 0xb06b9237c9f3551a, 0x0e4c16b0d53028f5, 0x10bc9c312ccfcaab },
-        { 0xaa8ae84b3ec2a05b, 0x98699ef4ed1781e0, 0x794513e4708e85d1, 0x63755bd3a976f413 }
+        { 0x18f7eccfc17d1fc9, 0x6c75f5a651403c14, -0x24218ed40811f321, 0x193fddaaa7e47a22 },
+        { 0x1fd2c93c37e8876f, -0x5d09e1a5e72eb9d4, 0x5080f58239241276, 0x6a6fb99ebf0d4969 }
+    },
+    {
+        { -0x114edd4a491bdc3a, -0x6c628fef0d790072, -0x6f56d57ce230a274, 0x136fda9f42c5eb10 },
+        { 0x6a46c1bb560855eb, 0x2416bb38f893f09d, -0x28e2eec8708e533f, 0x75f76914a31896ea },
+        { -0x06b3204e5cfa422f, 0x0f364b9d9ff82c08, 0x2a87d8a5c3bb588a, 0x022183510be8dcba }
+    },
+    {
+        { -0x62a58efebccf8581, -0x4f9c21613b825ba1, 0x22bbfe52be927ad3, 0x1387c441fd40426c },
+        { 0x4af766385ead2d14, -0x5f71277f3583a7d0, 0x0d13a6e610211e3d, 0x6a071ce17b806c03 },
+        { -0x4a2c3c2e78687508, 0x722b5a3d7f0e4413, 0x0d7b4848bb477ca0, 0x3171b26aaf1edc92 }
+    },
+    {
+        { -0x59f248274d75b82f, -0x5940eb29e88f5b0f, -0x2b5e076cac2242a8, 0x6c514a63344243e9 },
+        { -0x56d0ce6f68a9b358, -0x008447b3dd8a1ee7, 0x4f55fe37a4875150, 0x221fd4873cf0835a },
+        { 0x2322204f3a156341, -0x048c1f1645f5fcd3, -0x031f22b3bef0fcf2, 0x48daa596fb924aaa }
+    },
+    {
+        { 0x14f61d5dc84c9793, -0x66be061c10be7dfa, -0x320a4770cb9d8854, 0x58c837fa0e8a79a9 },
+        { 0x6eca8e665ca59cc7, -0x57b8dab4d1c75360, 0x31afc708d21e17ce, 0x676dd6fccad84af7 },
+        { 0x0cf9688596fc9058, 0x1ddcbbf37b56a01b, -0x233d1882b6ca2996, 0x1c4f73f2c6a57f0a }
+    },
+    {
+        { -0x4c918f910383cb7c, 0x73dfc9b4c3c1cf61, -0x14e2863687e3381b, 0x70459adb7daf675c },
+        { 0x0e7a4fbd305fa0bb, -0x7d62b31fab399c53, -0x0bde3c7cd01cc7b8, 0x795ac80d1bf64c42 },
+        { 0x1b91db4991b42bb3, 0x572696234b02dcca, -0x6020611ae0738724, 0x5fe162848ce21fd3 }
+    },
+},
+{
+    {
+        { 0x315c29c795115389, -0x281f1af879d08b32, 0x0c4a762185927432, 0x72de6c984a25a1e4 },
+        { -0x1d86f551b2f883bf, -0x746c7d8f248b965d, 0x6eb632dc8abd16a2, 0x720814ecaa064b72 },
+        { -0x51654aac40955cf0, 0x050a50a9806d6e1b, -0x6d448bfc5200aec7, 0x0394d27645be618b }
+    },
+    {
+        { -0x0ac69bda4dcaba5c, 0x15a7a27e98fbb296, -0x5493ad439c90227a, 0x79d995a8419334ee },
+        { 0x4d572251857eedf4, -0x1c8db1221e616c3b, -0x758ebdf1f4868fcb, 0x3b3c833687abe743 },
+        { -0x32757159ee6a228b, -0x5afb2757e22657d1, 0x540dca81a35879b6, 0x60dd16a379c86a8a }
+    },
+    {
+        { 0x3501d6f8153e47b8, -0x485698abeb5d09f4, 0x112ee8b6455d9523, 0x4e62a3c18112ea8a },
+        { 0x35a2c8487381e559, 0x596ffea6d78082cb, -0x34688e14245849ad, 0x5a08b5019b4da685 },
+        { -0x372b53fbae95487a, 0x595af3215295b23d, -0x29122dcb24fdcf3f, 0x0929efe8825b41cc }
+    },
+    {
+        { -0x74ce8d4852a99ae3, 0x01581b7a3fabd717, 0x2dc94df6424df6e4, 0x30376e5d2c29284f },
+        { 0x5f0601d1cbd0f2d3, 0x736e412f6132bb7f, -0x7c9fbbcddc722179, 0x1e3a5272f5c0753c },
+        { -0x2d6e72587ea65a64, 0x6bdc1cd93f0713f3, 0x565f7a934acd6590, 0x53daacec4cb4c128 }
+    },
+    {
+        { -0x667ad43c7ad30250, 0x2cc12e9559d6ed0b, 0x70f9e2bf9b5ac27b, 0x4f3b8c117959ae99 },
+        { 0x4ca73bd79cc8a7d6, 0x4d4a738f47e9a9b2, -0x0b340ed6bd0a0200, 0x01a13ff9bdbf0752 },
+        { 0x55b6c9c82ff26412, 0x1ac4a8c91fb667a8, -0x2ad840301488740e, 0x303337da7012a3be }
+    },
+    {
+        { -0x6892c334052d022f, -0x34777c68c859bf58, 0x2ff00c1d6734cb25, 0x269ff4dc789c2d2b },
+        { -0x6aabdddd73e36284, 0x01fac1371a9b340f, 0x7e8d9177925b48d7, 0x53f8ad5661b3e31b },
+        { 0x0c003fbdc08d678d, 0x4d982fa37ead2b17, -0x3f8194324d1a7d0f, 0x296c7291df412a44 }
+    },
+    {
+        { -0x204dcdfa25474a62, 0x465aeaa0c8092250, -0x2ecc3ee7658da2e8, 0x2327370261f117d1 },
+        { 0x7903de2b33daf397, -0x2f00f9e63659db4d, -0x75e2dad4aaa4c1e8, 0x2b6d581c52e0b7c0 },
+        { 0x3d0543d3623e7986, 0x679414c2c278a354, -0x51bc0f338d9e690a, 0x7836c41f8245eaba }
+    },
+    {
+        { -0x359ae17b7fee6c84, -0x394f3b91910be5d8, -0x48fde458a0c072ae, 0x119dff99ead7b9fd },
+        { -0x185dab24b616a57f, 0x5192d5d008b0ad73, 0x4d20e5b1d00afc07, 0x5d55f8012cf25f38 },
+        { 0x43eadfcbf4b31d4d, -0x39afc08beeeb776e, -0x0111973af9f2c4e9, 0x329293b3dd4a0ac8 }
+    },
+},
+{
+    {
+        { 0x2879852d5d7cb208, -0x4721228f97820d19, -0x23f40054de97876f, 0x2b44c043677daa35 },
+        { 0x4e59214fe194961a, 0x49be7dc70d71cd4f, -0x6cff302dc4af0dd3, 0x4789d446fc917232 },
+        { 0x1a1c87ab074eb78e, -0x05392e7166250b99, 0x3eacbbcd484f9067, 0x60c52eef2bb9a4e4 }
+    },
+    {
+        { 0x702bc5c27cae6d11, 0x44c7699b54a48cab, -0x1043bfa945b6d14e, 0x70d77248d9b6676d },
+        { 0x0b5d89bc3bfd8bf1, -0x4f946dc8360caae6, 0x0e4c16b0d53028f5, 0x10bc9c312ccfcaab },
+        { -0x557517b4c13d5fa5, -0x6796610b12e87e20, 0x794513e4708e85d1, 0x63755bd3a976f413 }
     },
     {
         { 0x3dc7101897f1acb7, 0x5dda7d5ec165bbd8, 0x508e5b9c0fa1020f, 0x2763751737c52a56 },
-        { 0xb55fa03e2ad10853, 0x356f75909ee63569, 0x9ff9f1fdbe69b890, 0x0d8cc1c48bc16f84 },
-        { 0x029402d36eb419a9, 0xf0b44e7e77b460a5, 0xcfa86230d43c4956, 0x70c2dd8a7ad166e7 }
-    },
-    {
-        { 0x91d4967db8ed7e13, 0x74252f0ad776817a, 0xe40982e00d852564, 0x32b8613816a53ce5 },
-        { 0x656194509f6fec0e, 0xee2e7ea946c6518d, 0x9733c1f367e09b5c, 0x2e0fac6363948495 },
-        { 0x79e7f7bee448cd64, 0x6ac83a67087886d0, 0xf89fd4d9a0e4db2e, 0x4179215c735a4f41 }
-    },
-    {
-        { 0xe4ae33b9286bcd34, 0xb7ef7eb6559dd6dc, 0x278b141fb3d38e1f, 0x31fa85662241c286 },
-        { 0x8c7094e7d7dced2a, 0x97fb8ac347d39c70, 0xe13be033a906d902, 0x700344a30cd99d76 },
-        { 0xaf826c422e3622f4, 0xc12029879833502d, 0x9bc1b7e12b389123, 0x24bb2312a9952489 }
-    },
-    {
-        { 0x41f80c2af5f85c6b, 0x687284c304fa6794, 0x8945df99a3ba1bad, 0x0d1d2af9ffeb5d16 },
-        { 0xb1a8ed1732de67c3, 0x3cb49418461b4948, 0x8ebd434376cfbcd2, 0x0fee3e871e188008 },
-        { 0xa9da8aa132621edf, 0x30b822a159226579, 0x4004197ba79ac193, 0x16acd79718531d76 }
-    },
-    {
-        { 0xc959c6c57887b6ad, 0x94e19ead5f90feba, 0x16e24e62a342f504, 0x164ed34b18161700 },
+        { -0x4aa05fc1d52ef7ad, 0x356f75909ee63569, -0x60060e0241964770, 0x0d8cc1c48bc16f84 },
+        { 0x029402d36eb419a9, -0x0f4bb181884b9f5b, -0x30579dcf2bc3b6aa, 0x70c2dd8a7ad166e7 }
+    },
+    {
+        { -0x6e2b6982471281ed, 0x74252f0ad776817a, -0x1bf67d1ff27ada9c, 0x32b8613816a53ce5 },
+        { 0x656194509f6fec0e, -0x11d18156b939ae73, -0x68cc3e0c981f64a4, 0x2e0fac6363948495 },
+        { 0x79e7f7bee448cd64, 0x6ac83a67087886d0, -0x07602b265f1b24d2, 0x4179215c735a4f41 }
+    },
+    {
+        { -0x1b51cc46d79432cc, -0x48108149aa622924, 0x278b141fb3d38e1f, 0x31fa85662241c286 },
+        { -0x738f6b18282312d6, -0x6804753cb82c6390, -0x1ec41fcc56f926fe, 0x700344a30cd99d76 },
+        { -0x507d93bdd1c9dd0c, -0x3edfd67867ccafd3, -0x643e481ed4c76edd, 0x24bb2312a9952489 }
+    },
+    {
+        { 0x41f80c2af5f85c6b, 0x687284c304fa6794, -0x76ba20665c45e453, 0x0d1d2af9ffeb5d16 },
+        { -0x4e5712e8cd21983d, 0x3cb49418461b4948, -0x7142bcbc8930432e, 0x0fee3e871e188008 },
+        { -0x5625755ecd9de121, 0x30b822a159226579, 0x4004197ba79ac193, 0x16acd79718531d76 }
+    },
+    {
+        { -0x36a6393a87784953, -0x6b1e6152a06f0146, 0x16e24e62a342f504, 0x164ed34b18161700 },
         { 0x72df72af2d9b1d3d, 0x63462a36a432245a, 0x3ecea07916b39637, 0x123e0ef6b9302309 },
-        { 0x487ed94c192fe69a, 0x61ae2cea3a911513, 0x877bf6d3b9a4de27, 0x78da0fc61073f3eb }
-    },
-    {
-        { 0xa29f80f1680c3a94, 0x71f77e151ae9e7e6, 0x1100f15848017973, 0x054aa4b316b38ddd },
+        { 0x487ed94c192fe69a, 0x61ae2cea3a911513, -0x7884092c465b21d9, 0x78da0fc61073f3eb }
+    },
+    {
+        { -0x5d607f0e97f3c56c, 0x71f77e151ae9e7e6, 0x1100f15848017973, 0x054aa4b316b38ddd },
         { 0x5bf15d28e52bc66a, 0x2c47e31870f01a8e, 0x2419afbc06c28bdd, 0x2d25deeb256b173a },
-        { 0xdfc8468d19267cb8, 0x0b28789c66e54daf, 0x2aeb1d2a666eec17, 0x134610a6ab7da760 }
-    },
-},
-{
-    {
-        { 0xd91430e0dc028c3c, 0x0eb955a85217c771, 0x4b09e1ed2c99a1fa, 0x42881af2bd6a743c },
-        { 0xcaf55ec27c59b23f, 0x99aeed3e154d04f2, 0x68441d72e14141f4, 0x140345133932a0a2 },
-        { 0x7bfec69aab5cad3d, 0xc23e8cd34cb2cfad, 0x685dd14bfb37d6a2, 0x0ad6d64415677a18 }
+        { -0x2037b972e6d98348, 0x0b28789c66e54daf, 0x2aeb1d2a666eec17, 0x134610a6ab7da760 }
+    },
+},
+{
+    {
+        { -0x26ebcf1f23fd73c4, 0x0eb955a85217c771, 0x4b09e1ed2c99a1fa, 0x42881af2bd6a743c },
+        { -0x350aa13d83a64dc1, -0x665112c1eab2fb0e, 0x68441d72e14141f4, 0x140345133932a0a2 },
+        { 0x7bfec69aab5cad3d, -0x3dc1732cb34d3053, 0x685dd14bfb37d6a2, 0x0ad6d64415677a18 }
     },
     {
         { 0x7914892847927e9f, 0x33dad6ef370aa877, 0x1f8f24fa11122703, 0x5265ac2f2adf9592 },
         { 0x781a439e417becb5, 0x4ac5938cd10e0266, 0x5da385110692ac24, 0x11b065a2ade31233 },
-        { 0x405fdd309afcb346, 0xd9723d4428e63f54, 0x94c01df05f65aaae, 0x43e4dc3ae14c0809 }
-    },
-    {
-        { 0xea6f7ac3adc2c6a3, 0xd0e928f6e9717c94, 0xe2d379ead645eaf5, 0x46dd8785c51ffbbe },
-        { 0xbc12c7f1a938a517, 0x473028ab3180b2e1, 0x3f78571efbcd254a, 0x74e534426ff6f90f },
+        { 0x405fdd309afcb346, -0x268dc2bbd719c0ac, -0x6b3fe20fa09a5552, 0x43e4dc3ae14c0809 }
+    },
+    {
+        { -0x1590853c523d395d, -0x2f16d709168e836c, -0x1d2c861529ba150b, 0x46dd8785c51ffbbe },
+        { -0x43ed380e56c75ae9, 0x473028ab3180b2e1, 0x3f78571efbcd254a, 0x74e534426ff6f90f },
         { 0x709801be375c8898, 0x4b06dab5e3fd8348, 0x75880ced27230714, 0x2b09468fdd2f4c42 }
     },
     {
-        { 0x5b97946582ffa02a, 0xda096a51fea8f549, 0xa06351375f77af9b, 0x1bcfde61201d1e76 },
-        { 0x97c749eeb701cb96, 0x83f438d4b6a369c3, 0x62962b8b9a402cd9, 0x6976c7509888df7b },
-        { 0x4a4a5490246a59a2, 0xd63ebddee87fdd90, 0xd9437c670d2371fa, 0x69e87308d30f8ed6 }
-    },
-    {
-        { 0x0f80bf028bc80303, 0x6aae16b37a18cefb, 0xdd47ea47d72cd6a3, 0x61943588f4ed39aa },
-        { 0x435a8bb15656beb0, 0xf8fac9ba4f4d5bca, 0xb9b278c41548c075, 0x3eb0ef76e892b622 },
-        { 0xd26e5c3e91039f85, 0xc0e9e77df6f33aa9, 0xe8968c5570066a93, 0x3c34d1881faaaddd }
-    },
-    {
-        { 0xbd5b0b8f2fffe0d9, 0x6aa254103ed24fb9, 0x2ac7d7bcb26821c4, 0x605b394b60dca36a },
-        { 0x3f9d2b5ea09f9ec0, 0x1dab3b6fb623a890, 0xa09ba3ea72d926c4, 0x374193513fd8b36d },
-        { 0xb4e856e45a9d1ed2, 0xefe848766c97a9a2, 0xb104cf641e5eee7d, 0x2f50b81c88a71c8f }
-    },
-    {
-        { 0x2b552ca0a7da522a, 0x3230b336449b0250, 0xf2c4c5bca4b99fb9, 0x7b2c674958074a22 },
-        { 0x31723c61fc6811bb, 0x9cb450486211800f, 0x768933d347995753, 0x3491a53502752fcd },
-        { 0xd55165883ed28cdf, 0x12d84fd2d362de39, 0x0a874ad3e3378e4f, 0x000d2b1f7c763e74 }
-    },
-    {
-        { 0x9624778c3e94a8ab, 0x0ad6f3cee9a78bec, 0x948ac7810d743c4f, 0x76627935aaecfccc },
-        { 0x3d420811d06d4a67, 0xbefc048590e0ffe3, 0xf870c6b7bd487bde, 0x6e2a7316319afa28 },
-        { 0x56a8ac24d6d59a9f, 0xc8db753e3096f006, 0x477f41e68f4c5299, 0x588d851cf6c86114 }
-    },
-},
-{
-    {
-        { 0xcd2a65e777d1f515, 0x548991878faa60f1, 0xb1b73bbcdabc06e5, 0x654878cba97cc9fb },
+        { 0x5b97946582ffa02a, -0x25f695ae01570ab7, -0x5f9caec8a0885065, 0x1bcfde61201d1e76 },
+        { -0x6838b61148fe346a, -0x7c0bc72b495c963d, 0x62962b8b9a402cd9, 0x6976c7509888df7b },
+        { 0x4a4a5490246a59a2, -0x29c1422117802270, -0x26bc8398f2dc8e06, 0x69e87308d30f8ed6 }
+    },
+    {
+        { 0x0f80bf028bc80303, 0x6aae16b37a18cefb, -0x22b815b828d3295d, 0x61943588f4ed39aa },
+        { 0x435a8bb15656beb0, -0x07053645b0b2a436, -0x464d873beab73f8b, 0x3eb0ef76e892b622 },
+        { -0x2d91a3c16efc607b, -0x3f161882090cc557, -0x176973aa8ff9956d, 0x3c34d1881faaaddd }
+    },
+    {
+        { -0x42a4f470d0001f27, 0x6aa254103ed24fb9, 0x2ac7d7bcb26821c4, 0x605b394b60dca36a },
+        { 0x3f9d2b5ea09f9ec0, 0x1dab3b6fb623a890, -0x5f645c158d26d93c, 0x374193513fd8b36d },
+        { -0x4b17a91ba562e12e, -0x1017b7899368565e, -0x4efb309be1a11183, 0x2f50b81c88a71c8f }
+    },
+    {
+        { 0x2b552ca0a7da522a, 0x3230b336449b0250, -0x0d3b3a435b466047, 0x7b2c674958074a22 },
+        { 0x31723c61fc6811bb, -0x634bafb79dee7ff1, 0x768933d347995753, 0x3491a53502752fcd },
+        { -0x2aae9a77c12d7321, 0x12d84fd2d362de39, 0x0a874ad3e3378e4f, 0x000d2b1f7c763e74 }
+    },
+    {
+        { -0x69db8873c16b5755, 0x0ad6f3cee9a78bec, -0x6b75387ef28bc3b1, 0x76627935aaecfccc },
+        { 0x3d420811d06d4a67, -0x4103fb7a6f1f001d, -0x078f394842b78422, 0x6e2a7316319afa28 },
+        { 0x56a8ac24d6d59a9f, -0x37248ac1cf690ffa, 0x477f41e68f4c5299, 0x588d851cf6c86114 }
+    },
+},
+{
+    {
+        { -0x32d59a18882e0aeb, 0x548991878faa60f1, -0x4e48c4432543f91b, 0x654878cba97cc9fb },
         { 0x51138ec78df6b0fe, 0x5397da89e575f51b, 0x09207a1d717af1b9, 0x2102fdba2b20d650 },
-        { 0x969ee405055ce6a1, 0x36bca7681251ad29, 0x3a1af517aa7da415, 0x0ad725db29ecb2ba }
-    },
-    {
-        { 0xfec7bc0c9b056f85, 0x537d5268e7f5ffd7, 0x77afc6624312aefa, 0x4f675f5302399fd9 },
-        { 0xdc4267b1834e2457, 0xb67544b570ce1bc5, 0x1af07a0bf7d15ed7, 0x4aefcffb71a03650 },
-        { 0xc32d36360415171e, 0xcd2bef118998483b, 0x870a6eadd0945110, 0x0bccbb72a2a86561 }
-    },
-    {
-        { 0x186d5e4c50fe1296, 0xe0397b82fee89f7e, 0x3bc7f6c5507031b0, 0x6678fd69108f37c2 },
-        { 0x185e962feab1a9c8, 0x86e7e63565147dcd, 0xb092e031bb5b6df2, 0x4024f0ab59d6b73e },
+        { -0x69611bfafaa3195f, 0x36bca7681251ad29, 0x3a1af517aa7da415, 0x0ad725db29ecb2ba }
+    },
+    {
+        { -0x013843f364fa907b, 0x537d5268e7f5ffd7, 0x77afc6624312aefa, 0x4f675f5302399fd9 },
+        { -0x23bd984e7cb1dba9, -0x498abb4a8f31e43b, 0x1af07a0bf7d15ed7, 0x4aefcffb71a03650 },
+        { -0x3cd2c9c9fbeae8e2, -0x32d410ee7667b7c5, -0x78f591522f6baef0, 0x0bccbb72a2a86561 }
+    },
+    {
+        { 0x186d5e4c50fe1296, -0x1fc6847d01176082, 0x3bc7f6c5507031b0, 0x6678fd69108f37c2 },
+        { 0x185e962feab1a9c8, -0x791819ca9aeb8233, -0x4f6d1fce44a4920e, 0x4024f0ab59d6b73e },
         { 0x1586fa31636863c2, 0x07f68c48572d33f2, 0x4f73cc9f789eaefc, 0x2d42e2108ead4701 }
     },
     {
-        { 0x21717b0d0f537593, 0x914e690b131e064c, 0x1bb687ae752ae09f, 0x420bf3a79b423c6e },
-        { 0x97f5131594dfd29b, 0x6155985d313f4c6a, 0xeba13f0708455010, 0x676b2608b8d2d322 },
-        { 0x8138ba651c5b2b47, 0x8671b6ec311b1b80, 0x7bff0cb1bc3135b0, 0x745d2ffa9c0cf1e0 }
-    },
-    {
-        { 0x6036df5721d34e6a, 0xb1db8827997bb3d0, 0xd3c209c3c8756afa, 0x06e15be54c1dc839 },
-        { 0xbf525a1e2bc9c8bd, 0xea5b260826479d81, 0xd511c70edf0155db, 0x1ae23ceb960cf5d0 },
+        { 0x21717b0d0f537593, -0x6eb196f4ece1f9b4, 0x1bb687ae752ae09f, 0x420bf3a79b423c6e },
+        { -0x680aecea6b202d65, 0x6155985d313f4c6a, -0x145ec0f8f7baaff0, 0x676b2608b8d2d322 },
+        { -0x7ec7459ae3a4d4b9, -0x798e4913cee4e480, 0x7bff0cb1bc3135b0, 0x745d2ffa9c0cf1e0 }
+    },
+    {
+        { 0x6036df5721d34e6a, -0x4e2477d866844c30, -0x2c3df63c378a9506, 0x06e15be54c1dc839 },
+        { -0x40ada5e1d4363743, -0x15a4d9f7d9b8627f, -0x2aee38f120feaa25, 0x1ae23ceb960cf5d0 },
         { 0x5b725d871932994a, 0x32351cb5ceb1dab0, 0x7dc41549dab7ca05, 0x58ded861278ec1f7 }
     },
     {
         { 0x2dfb5ba8b6c2c9a8, 0x48eeef8ef52c598c, 0x33809107f12d1573, 0x08ba696b531d5bd8 },
-        { 0xd8173793f266c55c, 0xc8c976c5cc454e49, 0x5ce382f8bc26c3a8, 0x2ff39de85485f6f9 },
-        { 0x77ed3eeec3efc57a, 0x04e05517d4ff4811, 0xea3d7a3ff1a671cb, 0x120633b4947cfe54 }
-    },
-    {
-        { 0x82bd31474912100a, 0xde237b6d7e6fbe06, 0xe11e761911ea79c6, 0x07433be3cb393bde },
+        { -0x27e8c86c0d993aa4, -0x3736893a33bab1b7, 0x5ce382f8bc26c3a8, 0x2ff39de85485f6f9 },
+        { 0x77ed3eeec3efc57a, 0x04e05517d4ff4811, -0x15c285c00e598e35, 0x120633b4947cfe54 }
+    },
+    {
+        { -0x7d42ceb8b6edeff6, -0x21dc8492819041fa, -0x1ee189e6ee15863a, 0x07433be3cb393bde },
         { 0x0b94987891610042, 0x4ee7b13cecebfae8, 0x70be739594f0a4c0, 0x35d30a99b4d59185 },
-        { 0xff7944c05ce997f4, 0x575d3de4b05c51a3, 0x583381fd5a76847c, 0x2d873ede7af6da9f }
-    },
-    {
-        { 0xaa6202e14e5df981, 0xa20d59175015e1f5, 0x18a275d3bae21d6c, 0x0543618a01600253 },
-        { 0x157a316443373409, 0xfab8b7eef4aa81d9, 0xb093fee6f5a64806, 0x2e773654707fa7b6 },
-        { 0x0deabdf4974c23c1, 0xaa6f0a259dce4693, 0x04202cb8a29aba2c, 0x4b1443362d07960d }
-    },
-},
-{
-    {
-        { 0x299b1c3f57c5715e, 0x96cb929e6b686d90, 0x3004806447235ab3, 0x2c435c24a44d9fe1 },
-        { 0x47b837f753242cec, 0x256dc48cc04212f2, 0xe222fbfbe1d928c5, 0x48ea295bad8a2c07 },
+        { -0x0086bb3fa316680c, 0x575d3de4b05c51a3, 0x583381fd5a76847c, 0x2d873ede7af6da9f }
+    },
+    {
+        { -0x559dfd1eb1a2067f, -0x5df2a6e8afea1e0b, 0x18a275d3bae21d6c, 0x0543618a01600253 },
+        { 0x157a316443373409, -0x054748110b557e27, -0x4f6c01190a59b7fa, 0x2e773654707fa7b6 },
+        { 0x0deabdf4974c23c1, -0x5590f5da6231b96d, 0x04202cb8a29aba2c, 0x4b1443362d07960d }
+    },
+},
+{
+    {
+        { 0x299b1c3f57c5715e, -0x69346d6194979270, 0x3004806447235ab3, 0x2c435c24a44d9fe1 },
+        { 0x47b837f753242cec, 0x256dc48cc04212f2, -0x1ddd04041e26d73b, 0x48ea295bad8a2c07 },
         { 0x0607c97c80f8833f, 0x0e851578ca25ec5b, 0x54f7450b161ebb6f, 0x7bcb4792a0def80e }
     },
     {
         { 0x1cecd0a0045224c2, 0x757f1b1b69e53952, 0x775b7a925289f681, 0x1b6cc62016736148 },
-        { 0x8487e3d02bc73659, 0x4baf8445059979df, 0xd17c975adcad6fbf, 0x57369f0bdefc96b6 },
-        { 0xf1a9990175638698, 0x353dd1beeeaa60d3, 0x849471334c9ba488, 0x63fa6e6843ade311 }
-    },
-    {
-        { 0x2195becdd24b5eb7, 0x5e41f18cc0cd44f9, 0xdf28074441ca9ede, 0x07073b98f35b7d67 },
-        { 0xd15c20536597c168, 0x9f73740098d28789, 0x18aee7f13257ba1f, 0x3418bfda07346f14 },
-        { 0xd03c676c4ce530d4, 0x0b64c0473b5df9f4, 0x065cef8b19b3a31e, 0x3084d661533102c9 }
-    },
-    {
-        { 0x9a6ce876760321fd, 0x7fe2b5109eb63ad8, 0x00e7d4ae8ac80592, 0x73d86b7abb6f723a },
-        { 0xe1f6b79ebf8469ad, 0x15801004e2663135, 0x9a498330af74181b, 0x3ba2504f049b673c },
-        { 0x0b52b5606dba5ab6, 0xa9134f0fbbb1edab, 0x30a9520d9b04a635, 0x6813b8f37973e5db }
-    },
-    {
-        { 0xf194ca56f3157e29, 0x136d35705ef528a5, 0xdd4cef778b0599bc, 0x7d5472af24f833ed },
-        { 0x9854b054334127c1, 0x105d047882fbff25, 0xdb49f7f944186f4f, 0x1768e838bed0b900 },
-        { 0xd0ef874daf33da47, 0x00d3be5db6e339f9, 0x3f2a8a2f9c9ceece, 0x5d1aeb792352435a }
-    },
-    {
-        { 0x12c7bfaeb61ba775, 0xb84e621fe263bffd, 0x0b47a5c35c840dcf, 0x7e83be0bccaf8634 },
-        { 0xf59e6bb319cd63ca, 0x670c159221d06839, 0xb06d565b2150cab6, 0x20fb199d104f12a3 },
-        { 0x61943dee6d99c120, 0x86101f2e460b9fe0, 0x6bb2f1518ee8598d, 0x76b76289fcc475cc }
+        { -0x7b781c2fd438c9a7, 0x4baf8445059979df, -0x2e8368a523529041, 0x57369f0bdefc96b6 },
+        { -0x0e5666fe8a9c7968, 0x353dd1beeeaa60d3, -0x7b6b8eccb3645b78, 0x63fa6e6843ade311 }
+    },
+    {
+        { 0x2195becdd24b5eb7, 0x5e41f18cc0cd44f9, -0x20d7f8bbbe356122, 0x07073b98f35b7d67 },
+        { -0x2ea3dfac9a683e98, -0x608c8bff672d7877, 0x18aee7f13257ba1f, 0x3418bfda07346f14 },
+        { -0x2fc39893b31acf2c, 0x0b64c0473b5df9f4, 0x065cef8b19b3a31e, 0x3084d661533102c9 }
+    },
+    {
+        { -0x6593178989fcde03, 0x7fe2b5109eb63ad8, 0x00e7d4ae8ac80592, 0x73d86b7abb6f723a },
+        { -0x1e094861407b9653, 0x15801004e2663135, -0x65b67ccf508be7e5, 0x3ba2504f049b673c },
+        { 0x0b52b5606dba5ab6, -0x56ecb0f0444e1255, 0x30a9520d9b04a635, 0x6813b8f37973e5db }
+    },
+    {
+        { -0x0e6b35a90cea81d7, 0x136d35705ef528a5, -0x22b3108874fa6644, 0x7d5472af24f833ed },
+        { -0x67ab4fabccbed83f, 0x105d047882fbff25, -0x24b60806bbe790b1, 0x1768e838bed0b900 },
+        { -0x2f1078b250cc25b9, 0x00d3be5db6e339f9, 0x3f2a8a2f9c9ceece, 0x5d1aeb792352435a }
+    },
+    {
+        { 0x12c7bfaeb61ba775, -0x47b19de01d9c4003, 0x0b47a5c35c840dcf, 0x7e83be0bccaf8634 },
+        { -0x0a61944ce6329c36, 0x670c159221d06839, -0x4f92a9a4deaf354a, 0x20fb199d104f12a3 },
+        { 0x61943dee6d99c120, -0x79efe0d1b9f46020, 0x6bb2f1518ee8598d, 0x76b76289fcc475cc }
     },
     {
         { 0x4245f1a1522ec0b3, 0x558785b22a75656d, 0x1d485a2548a1b3c0, 0x60959eccd58fe09f },
-        { 0x791b4cc1756286fa, 0xdbced317d74a157c, 0x7e732421ea72bde6, 0x01fe18491131c8e9 },
+        { 0x791b4cc1756286fa, -0x24312ce828b5ea84, 0x7e732421ea72bde6, 0x01fe18491131c8e9 },
         { 0x3ebfeb7ba8ed7a09, 0x49fdc2bbe502789c, 0x44ebce5d3c119428, 0x35e1eb55be947f4a }
     },
     {
-        { 0x14fd6dfa726ccc74, 0x3b084cfe2f53b965, 0xf33ae4f552a2c8b4, 0x59aab07a0d40166a },
-        { 0xdbdae701c5738dd3, 0xf9c6f635b26f1bee, 0x61e96a8042f15ef4, 0x3aa1d11faf60a4d8 },
+        { 0x14fd6dfa726ccc74, 0x3b084cfe2f53b965, -0x0cc51b0aad5d374c, 0x59aab07a0d40166a },
+        { -0x242518fe3a8c722d, -0x063909ca4d90e412, 0x61e96a8042f15ef4, 0x3aa1d11faf60a4d8 },
         { 0x77bcec4c925eac25, 0x1848718460137738, 0x5b374337fea9f451, 0x1865e78ec8e6aa46 }
     },
 },
 {
     {
-        { 0x967c54e91c529ccb, 0x30f6269264c635fb, 0x2747aff478121965, 0x17038418eaf66f5c },
-        { 0xccc4b7c7b66e1f7a, 0x44157e25f50c2f7e, 0x3ef06dfc713eaf1c, 0x582f446752da63f7 },
-        { 0xc6317bd320324ce4, 0xa81042e8a4488bc4, 0xb21ef18b4e5a1364, 0x0c2a1c4bcda28dc9 }
-    },
-    {
-        { 0xedc4814869bd6945, 0x0d6d907dbe1c8d22, 0xc63bd212d55cc5ab, 0x5a6a9b30a314dc83 },
-        { 0xd24dc7d06f1f0447, 0xb2269e3edb87c059, 0xd15b0272fbb2d28f, 0x7c558bd1c6f64877 },
-        { 0xd0ec1524d396463d, 0x12bb628ac35a24f0, 0xa50c3a791cbc5fa4, 0x0404a5ca0afbafc3 }
-    },
-    {
-        { 0x62bc9e1b2a416fd1, 0xb5c6f728e350598b, 0x04343fd83d5d6967, 0x39527516e7f8ee98 },
-        { 0x8c1f40070aa743d6, 0xccbad0cb5b265ee8, 0x574b046b668fd2de, 0x46395bfdcadd9633 },
-        { 0x117fdb2d1a5d9a9c, 0x9c7745bcd1005c2a, 0xefd4bef154d56fea, 0x76579a29e822d016 }
-    },
-    {
-        { 0x333cb51352b434f2, 0xd832284993de80e1, 0xb5512887750d35ce, 0x02c514bb2a2777c1 },
+        { -0x6983ab16e3ad6335, 0x30f6269264c635fb, 0x2747aff478121965, 0x17038418eaf66f5c },
+        { -0x333b48384991e086, 0x44157e25f50c2f7e, 0x3ef06dfc713eaf1c, 0x582f446752da63f7 },
+        { -0x39ce842cdfcdb31c, -0x57efbd175bb7743c, -0x4de10e74b1a5ec9c, 0x0c2a1c4bcda28dc9 }
+    },
+    {
+        { -0x123b7eb7964296bb, 0x0d6d907dbe1c8d22, -0x39c42ded2aa33a55, 0x5a6a9b30a314dc83 },
+        { -0x2db2382f90e0fbb9, -0x4dd961c124783fa7, -0x2ea4fd8d044d2d71, 0x7c558bd1c6f64877 },
+        { -0x2f13eadb2c69b9c3, 0x12bb628ac35a24f0, -0x5af3c586e343a05c, 0x0404a5ca0afbafc3 }
+    },
+    {
+        { 0x62bc9e1b2a416fd1, -0x4a3908d71cafa675, 0x04343fd83d5d6967, 0x39527516e7f8ee98 },
+        { -0x73e0bff8f558bc2a, -0x33452f34a4d9a118, 0x574b046b668fd2de, 0x46395bfdcadd9633 },
+        { 0x117fdb2d1a5d9a9c, -0x6388ba432effa3d6, -0x102b410eab2a9016, 0x76579a29e822d016 }
+    },
+    {
+        { 0x333cb51352b434f2, -0x27cdd7b66c217f1f, -0x4aaed7788af2ca32, 0x02c514bb2a2777c1 },
         { 0x45b68e7e49c02a17, 0x23cd51a2bca9a37f, 0x3ed65f11ec224c1b, 0x43a384dc9e05bdb1 },
-        { 0x684bd5da8bf1b645, 0xfb8bd37ef6b54b53, 0x313916d7a9b0d253, 0x1160920961548059 }
+        { 0x684bd5da8bf1b645, -0x04742c81094ab4ad, 0x313916d7a9b0d253, 0x1160920961548059 }
     },
     {
         { 0x7a385616369b4dcd, 0x75c02ca7655c3563, 0x7dc21bf9d4f18021, 0x2f637d7491e6e042 },
-        { 0xb44d166929dacfaa, 0xda529f4c8413598f, 0xe9ef63ca453d5559, 0x351e125bc5698e0b },
-        { 0xd4b49b461af67bbe, 0xd603037ac8ab8961, 0x71dee19ff9a699fb, 0x7f182d06e7ce2a9a }
-    },
-    {
-        { 0x09454b728e217522, 0xaa58e8f4d484b8d8, 0xd358254d7f46903c, 0x44acc043241c5217 },
-        { 0x7a7c8e64ab0168ec, 0xcb5a4a5515edc543, 0x095519d347cd0eda, 0x67d4ac8c343e93b0 },
-        { 0x1c7d6bbb4f7a5777, 0x8b35fed4918313e1, 0x4adca1c6c96b4684, 0x556d1c8312ad71bd }
-    },
-    {
-        { 0x81f06756b11be821, 0x0faff82310a3f3dd, 0xf8b2d0556a99465d, 0x097abe38cc8c7f05 },
+        { -0x4bb2e996d6253056, -0x25ad60b37beca671, -0x16109c35bac2aaa7, 0x351e125bc5698e0b },
+        { -0x2b4b64b9e5098442, -0x29fcfc853754769f, 0x71dee19ff9a699fb, 0x7f182d06e7ce2a9a }
+    },
+    {
+        { 0x09454b728e217522, -0x55a7170b2b7b4728, -0x2ca7dab280b96fc4, 0x44acc043241c5217 },
+        { 0x7a7c8e64ab0168ec, -0x34a5b5aaea123abd, 0x095519d347cd0eda, 0x67d4ac8c343e93b0 },
+        { 0x1c7d6bbb4f7a5777, -0x74ca012b6e7cec1f, 0x4adca1c6c96b4684, 0x556d1c8312ad71bd }
+    },
+    {
+        { -0x7e0f98a94ee417df, 0x0faff82310a3f3dd, -0x074d2faa9566b9a3, 0x097abe38cc8c7f05 },
         { 0x17ef40e30c8d3982, 0x31f7073e15a3fa34, 0x4f21f3cb0773646e, 0x746c6c6d1d824eff },
         { 0x0c49c9877ea52da4, 0x4c4369559bdc1d43, 0x022c3809f7ccebd2, 0x577e14a34bee84bd }
     },
     {
-        { 0x94fecebebd4dd72b, 0xf46a4fda060f2211, 0x124a5977c0c8d1ff, 0x705304b8fb009295 },
-        { 0xf0e268ac61a73b0a, 0xf2fafa103791a5f5, 0xc1e13e826b6d00e9, 0x60fa7ee96fd78f42 },
-        { 0xb63d1d354d296ec6, 0xf3c3053e5fad31d8, 0x670b958cb4bd42ec, 0x21398e0ca16353fd }
-    },
-},
-{
-    {
-        { 0x86c5fc16861b7e9a, 0xf6a330476a27c451, 0x01667267a1e93597, 0x05ffb9cd6082dfeb },
-        { 0x216ab2ca8da7d2ef, 0x366ad9dd99f42827, 0xae64b9004fdd3c75, 0x403a395b53909e62 },
-        { 0xa617fa9ff53f6139, 0x60f2b5e513e66cb6, 0xd7a8beefb3448aa4, 0x7a2932856f5ea192 }
-    },
-    {
-        { 0xb89c444879639302, 0x4ae4f19350c67f2c, 0xf0b35da8c81af9c6, 0x39d0003546871017 },
-        { 0x0b39d761b02de888, 0x5f550e7ed2414e1f, 0xa6bfa45822e1a940, 0x050a2f7dfd447b99 },
-        { 0x437c3b33a650db77, 0x6bafe81dbac52bb2, 0xfe99402d2db7d318, 0x2b5b7eec372ba6ce }
-    },
-    {
-        { 0xa694404d613ac8f4, 0x500c3c2bfa97e72c, 0x874104d21fcec210, 0x1b205fb38604a8ee },
-        { 0xb3bc4bbd83f50eef, 0x508f0c998c927866, 0x43e76587c8b7e66e, 0x0f7655a3a47f98d9 },
+        { -0x6b01314142b228d5, -0x0b95b025f9f0ddef, 0x124a5977c0c8d1ff, 0x705304b8fb009295 },
+        { -0x0f1d97539e58c4f6, -0x0d0505efc86e5a0b, -0x3e1ec17d9492ff17, 0x60fa7ee96fd78f42 },
+        { -0x49c2e2cab2d6913a, -0x0c3cfac1a052ce28, 0x670b958cb4bd42ec, 0x21398e0ca16353fd }
+    },
+},
+{
+    {
+        { -0x793a03e979e48166, -0x095ccfb895d83baf, 0x01667267a1e93597, 0x05ffb9cd6082dfeb },
+        { 0x216ab2ca8da7d2ef, 0x366ad9dd99f42827, -0x519b46ffb022c38b, 0x403a395b53909e62 },
+        { -0x59e805600ac09ec7, 0x60f2b5e513e66cb6, -0x285741104cbb755c, 0x7a2932856f5ea192 }
+    },
+    {
+        { -0x4763bbb7869c6cfe, 0x4ae4f19350c67f2c, -0x0f4ca25737e5063a, 0x39d0003546871017 },
+        { 0x0b39d761b02de888, 0x5f550e7ed2414e1f, -0x59405ba7dd1e56c0, 0x050a2f7dfd447b99 },
+        { 0x437c3b33a650db77, 0x6bafe81dbac52bb2, -0x0166bfd2d2482ce8, 0x2b5b7eec372ba6ce }
+    },
+    {
+        { -0x596bbfb29ec5370c, 0x500c3c2bfa97e72c, -0x78befb2de0313df0, 0x1b205fb38604a8ee },
+        { -0x4c43b4427c0af111, 0x508f0c998c927866, 0x43e76587c8b7e66e, 0x0f7655a3a47f98d9 },
         { 0x55ecad37d24b133c, 0x441e147d6038c90b, 0x656683a1d62c6fee, 0x0157d5dc87e0ecae }
     },
     {
-        { 0x95265514d71eb524, 0xe603d8815df14593, 0x147cdf410d4de6b7, 0x5293b1730437c850 },
-        { 0xf2a7af510354c13d, 0xd7a0b145aa372b60, 0x2869b96a05a3d470, 0x6528e42d82460173 },
-        { 0x23d0e0814bccf226, 0x92c745cd8196fb93, 0x8b61796c59541e5b, 0x40a44df0c021f978 }
-    },
-    {
-        { 0x86c96e514bc5d095, 0xf20d4098fca6804a, 0x27363d89c826ea5d, 0x39ca36565719cacf },
-        { 0xdaa869894f20ea6a, 0xea14a3d14c620618, 0x6001fccb090bf8be, 0x35f4e822947e9cf0 },
-        { 0x97506f2f6f87b75c, 0xc624aea0034ae070, 0x1ec856e3aad34dd6, 0x055b0be0e440e58f }
-    },
-    {
-        { 0x4d12a04b6ea33da2, 0x57cf4c15e36126dd, 0x90ec9675ee44d967, 0x64ca348d2a985aac },
-        { 0x6469a17d89735d12, 0xdb6f27d5e662b9f1, 0x9fcba3286a395681, 0x363b8004d269af25 },
-        { 0x99588e19e4c4912d, 0xefcc3b4e1ca5ce6b, 0x4522ea60fa5b98d5, 0x7064bbab1de4a819 }
-    },
-    {
-        { 0xa290c06142542129, 0xf2e2c2aebe8d5b90, 0xcf2458db76abfe1b, 0x02157ade83d626bf },
-        { 0xb919e1515a770641, 0xa9a2e2c74e7f8039, 0x7527250b3df23109, 0x756a7330ac27b78b },
-        { 0x3e46972a1b9a038b, 0x2e4ee66a7ee03fb4, 0x81a248776edbb4ca, 0x1a944ee88ecd0563 }
-    },
-    {
-        { 0xbb40a859182362d6, 0xb99f55778a4d1abb, 0x8d18b427758559f6, 0x26c20fe74d26235a },
-        { 0xd5a91d1151039372, 0x2ed377b799ca26de, 0xa17202acfd366b6b, 0x0730291bd6901995 },
+        { -0x6ad9aaeb28e14adc, -0x19fc277ea20eba6d, 0x147cdf410d4de6b7, 0x5293b1730437c850 },
+        { -0x0d5850aefcab3ec3, -0x285f4eba55c8d4a0, 0x2869b96a05a3d470, 0x6528e42d82460173 },
+        { 0x23d0e0814bccf226, -0x6d38ba327e69046d, -0x749e8693a6abe1a5, 0x40a44df0c021f978 }
+    },
+    {
+        { -0x793691aeb43a2f6b, -0x0df2bf6703597fb6, 0x27363d89c826ea5d, 0x39ca36565719cacf },
+        { -0x25579676b0df1596, -0x15eb5c2eb39df9e8, 0x6001fccb090bf8be, 0x35f4e822947e9cf0 },
+        { -0x68af90d0907848a4, -0x39db515ffcb51f90, 0x1ec856e3aad34dd6, 0x055b0be0e440e58f }
+    },
+    {
+        { 0x4d12a04b6ea33da2, 0x57cf4c15e36126dd, -0x6f13698a11bb2699, 0x64ca348d2a985aac },
+        { 0x6469a17d89735d12, -0x2490d82a199d460f, -0x60345cd795c6a97f, 0x363b8004d269af25 },
+        { -0x66a771e61b3b6ed3, -0x1033c4b1e35a3195, 0x4522ea60fa5b98d5, 0x7064bbab1de4a819 }
+    },
+    {
+        { -0x5d6f3f9ebdabded7, -0x0d1d3d514172a470, -0x30dba724895401e5, 0x02157ade83d626bf },
+        { -0x46e61eaea588f9bf, -0x565d1d38b1807fc7, 0x7527250b3df23109, 0x756a7330ac27b78b },
+        { 0x3e46972a1b9a038b, 0x2e4ee66a7ee03fb4, -0x7e5db78891244b36, 0x1a944ee88ecd0563 }
+    },
+    {
+        { -0x44bf57a6e7dc9d2a, -0x4660aa8875b2e545, -0x72e74bd88a7aa60a, 0x26c20fe74d26235a },
+        { -0x2a56e2eeaefc6c8e, 0x2ed377b799ca26de, -0x5e8dfd5302c99495, 0x0730291bd6901995 },
         { 0x648d1d9fe9cc22f5, 0x66bc561928dd577c, 0x47d3ed21652439d1, 0x49d271acedaf8b49 }
     },
 },
 {
     {
-        { 0x2798aaf9b4b75601, 0x5eac72135c8dad72, 0xd2ceaa6161b7a023, 0x1bbfb284e98f7d4e },
-        { 0x89f5058a382b33f3, 0x5ae2ba0bad48c0b4, 0x8f93b503a53db36e, 0x5aa3ed9d95a232e6 },
-        { 0x656777e9c7d96561, 0xcb2b125472c78036, 0x65053299d9506eee, 0x4a07e14e5e8957cc }
-    },
-    {
-        { 0x240b58cdc477a49b, 0xfd38dade6447f017, 0x19928d32a7c86aad, 0x50af7aed84afa081 },
-        { 0x4ee412cb980df999, 0xa315d76f3c6ec771, 0xbba5edde925c77fd, 0x3f0bac391d313402 },
+        { 0x2798aaf9b4b75601, 0x5eac72135c8dad72, -0x2d31559e9e485fdd, 0x1bbfb284e98f7d4e },
+        { -0x760afa75c7d4cc0d, 0x5ae2ba0bad48c0b4, -0x706c4afc5ac24c92, 0x5aa3ed9d95a232e6 },
+        { 0x656777e9c7d96561, -0x34d4edab8d387fca, 0x65053299d9506eee, 0x4a07e14e5e8957cc }
+    },
+    {
+        { 0x240b58cdc477a49b, -0x02c725219bb80fe9, 0x19928d32a7c86aad, 0x50af7aed84afa081 },
+        { 0x4ee412cb980df999, -0x5cea2890c391388f, -0x445a12216da38803, 0x3f0bac391d313402 },
         { 0x6e4fde0115f65be5, 0x29982621216109b2, 0x780205810badd6d9, 0x1921a316baebd006 }
     },
     {
-        { 0xd75aad9ad9f3c18b, 0x566a0eef60b1c19c, 0x3e9a0bac255c0ed9, 0x7b049deca062c7f5 },
-        { 0x89422f7edfb870fc, 0x2c296beb4f76b3bd, 0x0738f1d436c24df7, 0x6458df41e273aeb0 },
-        { 0xdccbe37a35444483, 0x758879330fedbe93, 0x786004c312c5dd87, 0x6093dccbc2950e64 }
-    },
-    {
-        { 0x6bdeeebe6084034b, 0x3199c2b6780fb854, 0x973376abb62d0695, 0x6e3180c98b647d90 },
+        { -0x28a55265260c3e75, 0x566a0eef60b1c19c, 0x3e9a0bac255c0ed9, 0x7b049deca062c7f5 },
+        { -0x76bdd08120478f04, 0x2c296beb4f76b3bd, 0x0738f1d436c24df7, 0x6458df41e273aeb0 },
+        { -0x23341c85cabbbb7d, 0x758879330fedbe93, 0x786004c312c5dd87, 0x6093dccbc2950e64 }
+    },
+    {
+        { 0x6bdeeebe6084034b, 0x3199c2b6780fb854, -0x68cc895449d2f96b, 0x6e3180c98b647d90 },
         { 0x1ff39a8585e0706d, 0x36d0a5d8b3e73933, 0x43b9f2e1718f453b, 0x57d1ea084827a97c },
-        { 0xee7ab6e7a128b071, 0xa4c1596d93a88baa, 0xf7b4de82b2216130, 0x363e999ddd97bd18 }
-    },
-    {
-        { 0x2f1848dce24baec6, 0x769b7255babcaf60, 0x90cb3c6e3cefe931, 0x231f979bc6f9b355 },
-        { 0x96a843c135ee1fc4, 0x976eb35508e4c8cf, 0xb42f6801b58cd330, 0x48ee9b78693a052b },
-        { 0x5c31de4bcc2af3c6, 0xb04bb030fe208d1f, 0xb78d7009c14fb466, 0x079bfa9b08792413 }
-    },
-    {
-        { 0xf3c9ed80a2d54245, 0x0aa08b7877f63952, 0xd76dac63d1085475, 0x1ef4fb159470636b },
-        { 0xe3903a51da300df4, 0x843964233da95ab0, 0xed3cf12d0b356480, 0x038c77f684817194 },
-        { 0x854e5ee65b167bec, 0x59590a4296d0cdc2, 0x72b2df3498102199, 0x575ee92a4a0bff56 }
-    },
-    {
-        { 0x5d46bc450aa4d801, 0xc3af1227a533b9d8, 0x389e3b262b8906c2, 0x200a1e7e382f581b },
-        { 0xd4c080908a182fcf, 0x30e170c299489dbd, 0x05babd5752f733de, 0x43d4e7112cd3fd00 },
-        { 0x518db967eaf93ac5, 0x71bc989b056652c0, 0xfe2b85d9567197f5, 0x050eca52651e4e38 }
-    },
-    {
-        { 0x97ac397660e668ea, 0x9b19bbfe153ab497, 0x4cb179b534eca79f, 0x6151c09fa131ae57 },
-        { 0xc3431ade453f0c9c, 0xe9f5045eff703b9b, 0xfcd97ac9ed847b3d, 0x4b0ee6c21c58f4c6 },
-        { 0x3af55c0dfdf05d96, 0xdd262ee02ab4ee7a, 0x11b2bb8712171709, 0x1fef24fa800f030b }
-    },
-},
-{
-    {
-        { 0xff91a66a90166220, 0xf22552ae5bf1e009, 0x7dff85d87f90df7c, 0x4f620ffe0c736fb9 },
-        { 0xb496123a6b6c6609, 0xa750fe8580ab5938, 0xf471bf39b7c27a5f, 0x507903ce77ac193c },
-        { 0x62f90d65dfde3e34, 0xcf28c592b9fa5fad, 0x99c86ef9c6164510, 0x25d448044a256c84 }
-    },
-    {
-        { 0x2c7c4415c9022b55, 0x56a0d241812eb1fe, 0xf02ea1c9d7b65e0d, 0x4180512fd5323b26 },
-        { 0xbd68230ec7e9b16f, 0x0eb1b9c1c1c5795d, 0x7943c8c495b6b1ff, 0x2f9faf620bbacf5e },
-        { 0xa4ff3e698a48a5db, 0xba6a3806bd95403b, 0x9f7ce1af47d5b65d, 0x15e087e55939d2fb }
-    },
-    {
-        { 0x8894186efb963f38, 0x48a00e80dc639bd5, 0xa4e8092be96c1c99, 0x5a097d54ca573661 },
-        { 0x12207543745c1496, 0xdaff3cfdda38610c, 0xe4e797272c71c34f, 0x39c07b1934bdede9 },
-        { 0x2d45892b17c9e755, 0xd033fd7289308df8, 0x6c2fe9d9525b8bd9, 0x2edbecf1c11cc079 }
-    },
-    {
-        { 0xee0f0fddd087a25f, 0x9c7531555c3e34ee, 0x660c572e8fab3ab5, 0x0854fc44544cd3b2 },
-        { 0x1616a4e3c715a0d2, 0x53623cb0f8341d4d, 0x96ef5329c7e899cb, 0x3d4e8dbba668baa6 },
+        { -0x118549185ed74f8f, -0x5b3ea6926c577456, -0x084b217d4dde9ed0, 0x363e999ddd97bd18 }
+    },
+    {
+        { 0x2f1848dce24baec6, 0x769b7255babcaf60, -0x6f34c391c31016cf, 0x231f979bc6f9b355 },
+        { -0x6957bc3eca11e03c, -0x68914caaf71b3731, -0x4bd097fe4a732cd0, 0x48ee9b78693a052b },
+        { 0x5c31de4bcc2af3c6, -0x4fb44fcf01df72e1, -0x48728ff63eb04b9a, 0x079bfa9b08792413 }
+    },
+    {
+        { -0x0c36127f5d2abdbb, 0x0aa08b7877f63952, -0x2892539c2ef7ab8b, 0x1ef4fb159470636b },
+        { -0x1c6fc5ae25cff20c, -0x7bc69bdcc256a550, -0x12c30ed2f4ca9b80, 0x038c77f684817194 },
+        { -0x7ab1a119a4e98414, 0x59590a4296d0cdc2, 0x72b2df3498102199, 0x575ee92a4a0bff56 }
+    },
+    {
+        { 0x5d46bc450aa4d801, -0x3c50edd85acc4628, 0x389e3b262b8906c2, 0x200a1e7e382f581b },
+        { -0x2b3f7f6f75e7d031, 0x30e170c299489dbd, 0x05babd5752f733de, 0x43d4e7112cd3fd00 },
+        { 0x518db967eaf93ac5, 0x71bc989b056652c0, -0x01d47a26a98e680b, 0x050eca52651e4e38 }
+    },
+    {
+        { -0x6853c6899f199716, -0x64e64401eac54b69, 0x4cb179b534eca79f, 0x6151c09fa131ae57 },
+        { -0x3cbce521bac0f364, -0x160afba1008fc465, -0x03268536127b84c3, 0x4b0ee6c21c58f4c6 },
+        { 0x3af55c0dfdf05d96, -0x22d9d11fd54b1186, 0x11b2bb8712171709, 0x1fef24fa800f030b }
+    },
+},
+{
+    {
+        { -0x006e59956fe99de0, -0x0ddaad51a40e1ff7, 0x7dff85d87f90df7c, 0x4f620ffe0c736fb9 },
+        { -0x4b69edc5949399f7, -0x58af017a7f54a6c8, -0x0b8e40c6483d85a1, 0x507903ce77ac193c },
+        { 0x62f90d65dfde3e34, -0x30d73a6d4605a053, -0x6637910639e9baf0, 0x25d448044a256c84 }
+    },
+    {
+        { 0x2c7c4415c9022b55, 0x56a0d241812eb1fe, -0x0fd15e362849a1f3, 0x4180512fd5323b26 },
+        { -0x4297dcf138164e91, 0x0eb1b9c1c1c5795d, 0x7943c8c495b6b1ff, 0x2f9faf620bbacf5e },
+        { -0x5b00c19675b75a25, -0x4595c7f9426abfc5, -0x60831e50b82a49a3, 0x15e087e55939d2fb }
+    },
+    {
+        { -0x776be7910469c0c8, 0x48a00e80dc639bd5, -0x5b17f6d41693e367, 0x5a097d54ca573661 },
+        { 0x12207543745c1496, -0x2500c30225c79ef4, -0x1b1868d8d38e3cb1, 0x39c07b1934bdede9 },
+        { 0x2d45892b17c9e755, -0x2fcc028d76cf7208, 0x6c2fe9d9525b8bd9, 0x2edbecf1c11cc079 }
+    },
+    {
+        { -0x11f0f0222f785da1, -0x638aceaaa3c1cb12, 0x660c572e8fab3ab5, 0x0854fc44544cd3b2 },
+        { 0x1616a4e3c715a0d2, 0x53623cb0f8341d4d, -0x6910acd638176635, 0x3d4e8dbba668baa6 },
         { 0x61eba0c555edad19, 0x24b533fef0a83de6, 0x3b77042883baa5f8, 0x678f82b898a47e8d }
     },
     {
-        { 0x1e09d94057775696, 0xeed1265c3cd951db, 0xfa9dac2b20bce16f, 0x0f7f76e0e8d089f4 },
-        { 0xb1491d0bd6900c54, 0x3539722c9d132636, 0x4db928920b362bc9, 0x4d7cd1fea68b69df },
-        { 0x36d9ebc5d485b00c, 0xa2596492e4adb365, 0xc1659480c2119ccd, 0x45306349186e0d5f }
-    },
-    {
-        { 0x96a414ec2b072491, 0x1bb2218127a7b65b, 0x6d2849596e8a4af0, 0x65f3b08ccd27765f },
-        { 0x94ddd0c1a6cdff1d, 0x55f6f115e84213ae, 0x6c935f85992fcf6a, 0x067ee0f54a37f16f },
-        { 0xecb29fff199801f7, 0x9d361d1fa2a0f72f, 0x25f11d2375fd2f49, 0x124cefe80fe10fe2 }
-    },
-    {
-        { 0x1518e85b31b16489, 0x8faadcb7db710bfb, 0x39b0bdf4a14ae239, 0x05f4cbea503d20c1 },
-        { 0x4c126cf9d18df255, 0xc1d471e9147a63b6, 0x2c6d3c73f3c93b5f, 0x6be3a6a2e3ff86a2 },
-        { 0xce040e9ec04145bc, 0xc71ff4e208f6834c, 0xbd546e8dab8847a3, 0x64666aa0a4d2aba5 }
-    },
-    {
-        { 0xb0c53bf73337e94c, 0x7cb5697e11e14f15, 0x4b84abac1930c750, 0x28dd4abfe0640468 },
-        { 0x6841435a7c06d912, 0xca123c21bb3f830b, 0xd4b37b27b1cbe278, 0x1d753b84c76f5046 },
+        { 0x1e09d94057775696, -0x112ed9a3c326ae25, -0x056253d4df431e91, 0x0f7f76e0e8d089f4 },
+        { -0x4eb6e2f4296ff3ac, 0x3539722c9d132636, 0x4db928920b362bc9, 0x4d7cd1fea68b69df },
+        { 0x36d9ebc5d485b00c, -0x5da69b6d1b524c9b, -0x3e9a6b7f3dee6333, 0x45306349186e0d5f }
+    },
+    {
+        { -0x695beb13d4f8db6f, 0x1bb2218127a7b65b, 0x6d2849596e8a4af0, 0x65f3b08ccd27765f },
+        { -0x6b222f3e593200e3, 0x55f6f115e84213ae, 0x6c935f85992fcf6a, 0x067ee0f54a37f16f },
+        { -0x134d6000e667fe09, -0x62c9e2e05d5f08d1, 0x25f11d2375fd2f49, 0x124cefe80fe10fe2 }
+    },
+    {
+        { 0x1518e85b31b16489, -0x70552348248ef405, 0x39b0bdf4a14ae239, 0x05f4cbea503d20c1 },
+        { 0x4c126cf9d18df255, -0x3e2b8e16eb859c4a, 0x2c6d3c73f3c93b5f, 0x6be3a6a2e3ff86a2 },
+        { -0x31fbf1613fbeba44, -0x38e00b1df7097cb4, -0x42ab91725477b85d, 0x64666aa0a4d2aba5 }
+    },
+    {
+        { -0x4f3ac408ccc816b4, 0x7cb5697e11e14f15, 0x4b84abac1930c750, 0x28dd4abfe0640468 },
+        { 0x6841435a7c06d912, -0x35edc3de44c07cf5, -0x2b4c84d84e341d88, 0x1d753b84c76f5046 },
         { 0x7dc0b64c44cb9f44, 0x18a3e1ace3925dbf, 0x7a3034862d0457c4, 0x4c498bf78a0c892e }
     },
 },
 {
     {
-        { 0x22d2aff530976b86, 0x8d90b806c2d24604, 0xdca1896c4de5bae5, 0x28005fe6c8340c17 },
-        { 0x37d653fb1aa73196, 0x0f9495303fd76418, 0xad200b09fb3a17b2, 0x544d49292fc8613e },
-        { 0x6aefba9f34528688, 0x5c1bff9425107da1, 0xf75bbbcd66d94b36, 0x72e472930f316dfa }
+        { 0x22d2aff530976b86, -0x726f47f93d2db9fc, -0x235e7693b21a451b, 0x28005fe6c8340c17 },
+        { 0x37d653fb1aa73196, 0x0f9495303fd76418, -0x52dff4f604c5e84e, 0x544d49292fc8613e },
+        { 0x6aefba9f34528688, 0x5c1bff9425107da1, -0x08a444329926b4ca, 0x72e472930f316dfa }
     },
     {
         { 0x07f3f635d32a7627, 0x7aaa4d865f6566f0, 0x3c85e79728d04450, 0x1fee7f000fe06438 },
-        { 0x2695208c9781084f, 0xb1502a0b23450ee1, 0xfd9daea603efde02, 0x5a9d2e8c2733a34c },
-        { 0x765305da03dbf7e5, 0xa4daf2491434cdbd, 0x7b4ad5cdd24a88ec, 0x00f94051ee040543 }
-    },
-    {
-        { 0xd7ef93bb07af9753, 0x583ed0cf3db766a7, 0xce6998bf6e0b1ec5, 0x47b7ffd25dd40452 },
-        { 0x8d356b23c3d330b2, 0xf21c8b9bb0471b06, 0xb36c316c6e42b83c, 0x07d79c7e8beab10d },
-        { 0x87fbfb9cbc08dd12, 0x8a066b3ae1eec29b, 0x0d57242bdb1fc1bf, 0x1c3520a35ea64bb6 }
-    },
-    {
-        { 0xcda86f40216bc059, 0x1fbb231d12bcd87e, 0xb4956a9e17c70990, 0x38750c3b66d12e55 },
-        { 0x80d253a6bccba34a, 0x3e61c3a13838219b, 0x90c3b6019882e396, 0x1c3d05775d0ee66f },
-        { 0x692ef1409422e51a, 0xcbc0c73c2b5df671, 0x21014fe7744ce029, 0x0621e2c7d330487c }
-    },
-    {
-        { 0xb7ae1796b0dbf0f3, 0x54dfafb9e17ce196, 0x25923071e9aaa3b4, 0x5d8e589ca1002e9d },
-        { 0xaf9860cc8259838d, 0x90ea48c1c69f9adc, 0x6526483765581e30, 0x0007d6097bd3a5bc },
-        { 0xc0bf1d950842a94b, 0xb2d3c363588f2e3e, 0x0a961438bb51e2ef, 0x1583d7783c1cbf86 }
-    },
-    {
-        { 0x90034704cc9d28c7, 0x1d1b679ef72cc58f, 0x16e12b5fbe5b8726, 0x4958064e83c5580a },
-        { 0xeceea2ef5da27ae1, 0x597c3a1455670174, 0xc9a62a126609167a, 0x252a5f2e81ed8f70 },
-        { 0x0d2894265066e80d, 0xfcc3f785307c8c6b, 0x1b53da780c1112fd, 0x079c170bd843b388 }
-    },
-    {
-        { 0xcdd6cd50c0d5d056, 0x9af7686dbb03573b, 0x3ca6723ff3c3ef48, 0x6768c0d7317b8acc },
-        { 0x0506ece464fa6fff, 0xbee3431e6205e523, 0x3579422451b8ea42, 0x6dec05e34ac9fb00 },
-        { 0x94b625e5f155c1b3, 0x417bf3a7997b7b91, 0xc22cbddc6d6b2600, 0x51445e14ddcd52f4 }
-    },
-    {
-        { 0x893147ab2bbea455, 0x8c53a24f92079129, 0x4b49f948be30f7a7, 0x12e990086e4fd43d },
-        { 0x57502b4b3b144951, 0x8e67ff6b444bbcb3, 0xb8bd6927166385db, 0x13186f31e39295c8 },
-        { 0xf10c96b37fdfbb2e, 0x9f9a935e121ceaf9, 0xdf1136c43a5b983f, 0x77b2e3f05d3e99af }
-    },
-},
-{
-    {
-        { 0x9532f48fcc5cd29b, 0x2ba851bea3ce3671, 0x32dacaa051122941, 0x478d99d9350004f2 },
-        { 0xfd0d75879cf12657, 0xe82fef94e53a0e29, 0xcc34a7f05bbb4be7, 0x0b251172a50c38a2 },
-        { 0x1d5ad94890bb02c0, 0x50e208b10ec25115, 0xa26a22894ef21702, 0x4dc923343b524805 }
-    },
-    {
-        { 0x3ad3e3ebf36c4975, 0xd75d25a537862125, 0xe873943da025a516, 0x6bbc7cb4c411c847 },
-        { 0xe3828c400f8086b6, 0x3f77e6f7979f0dc8, 0x7ef6de304df42cb4, 0x5265797cb6abd784 },
-        { 0x3c6f9cd1d4a50d56, 0xb6244077c6feab7e, 0x6ff9bf483580972e, 0x00375883b332acfb }
-    },
-    {
-        { 0xc98bec856c75c99c, 0xe44184c000e33cf4, 0x0a676b9bba907634, 0x669e2cb571f379d7 },
-        { 0x0001b2cd28cb0940, 0x63fb51a06f1c24c9, 0xb5ad8691dcd5ca31, 0x67238dbd8c450660 },
-        { 0xcb116b73a49bd308, 0x025aad6b2392729e, 0xb4793efa3f55d9b1, 0x72a1056140678bb9 }
-    },
-    {
-        { 0x0d8d2909e2e505b6, 0x98ca78abc0291230, 0x77ef5569a9b12327, 0x7c77897b81439b47 },
-        { 0xa2b6812b1cc9249d, 0x62866eee21211f58, 0x2cb5c5b85df10ece, 0x03a6b259e263ae00 },
-        { 0xf1c1b5e2de331cb5, 0x5a9f5d8e15fca420, 0x9fa438f17bd932b1, 0x2a381bf01c6146e7 }
-    },
-    {
-        { 0xf7c0be32b534166f, 0x27e6ca6419cf70d4, 0x934df7d7a957a759, 0x5701461dabdec2aa },
-        { 0xac9b9879cfc811c1, 0x8b7d29813756e567, 0x50da4e607c70edfc, 0x5dbca62f884400b6 },
+        { 0x2695208c9781084f, -0x4eafd5f4dcbaf11f, -0x02625159fc1021fe, 0x5a9d2e8c2733a34c },
+        { 0x765305da03dbf7e5, -0x5b250db6ebcb3243, 0x7b4ad5cdd24a88ec, 0x00f94051ee040543 }
+    },
+    {
+        { -0x28106c44f85068ad, 0x583ed0cf3db766a7, -0x3196674091f4e13b, 0x47b7ffd25dd40452 },
+        { -0x72ca94dc3c2ccf4e, -0x0de374644fb8e4fa, -0x4c93ce9391bd47c4, 0x07d79c7e8beab10d },
+        { -0x7804046343f722ee, -0x75f994c51e113d65, 0x0d57242bdb1fc1bf, 0x1c3520a35ea64bb6 }
+    },
+    {
+        { -0x325790bfde943fa7, 0x1fbb231d12bcd87e, -0x4b6a9561e838f670, 0x38750c3b66d12e55 },
+        { -0x7f2dac5943345cb6, 0x3e61c3a13838219b, -0x6f3c49fe677d1c6a, 0x1c3d05775d0ee66f },
+        { 0x692ef1409422e51a, -0x343f38c3d4a2098f, 0x21014fe7744ce029, 0x0621e2c7d330487c }
+    },
+    {
+        { -0x4851e8694f240f0d, 0x54dfafb9e17ce196, 0x25923071e9aaa3b4, 0x5d8e589ca1002e9d },
+        { -0x50679f337da67c73, -0x6f15b73e39606524, 0x6526483765581e30, 0x0007d6097bd3a5bc },
+        { -0x3f40e26af7bd56b5, -0x4d2c3c9ca770d1c2, 0x0a961438bb51e2ef, 0x1583d7783c1cbf86 }
+    },
+    {
+        { -0x6ffcb8fb3362d739, 0x1d1b679ef72cc58f, 0x16e12b5fbe5b8726, 0x4958064e83c5580a },
+        { -0x13115d10a25d851f, 0x597c3a1455670174, -0x3659d5ed99f6e986, 0x252a5f2e81ed8f70 },
+        { 0x0d2894265066e80d, -0x033c087acf837395, 0x1b53da780c1112fd, 0x079c170bd843b388 }
+    },
+    {
+        { -0x322932af3f2a2faa, -0x6508979244fca8c5, 0x3ca6723ff3c3ef48, 0x6768c0d7317b8acc },
+        { 0x0506ece464fa6fff, -0x411cbce19dfa1add, 0x3579422451b8ea42, 0x6dec05e34ac9fb00 },
+        { -0x6b49da1a0eaa3e4d, 0x417bf3a7997b7b91, -0x3dd342239294da00, 0x51445e14ddcd52f4 }
+    },
+    {
+        { -0x76ceb854d4415bab, -0x73ac5db06df86ed7, 0x4b49f948be30f7a7, 0x12e990086e4fd43d },
+        { 0x57502b4b3b144951, -0x71980094bbb4434d, -0x474296d8e99c7a25, 0x13186f31e39295c8 },
+        { -0x0ef3694c802044d2, -0x60656ca1ede31507, -0x20eec93bc5a467c1, 0x77b2e3f05d3e99af }
+    },
+},
+{
+    {
+        { -0x6acd0b7033a32d65, 0x2ba851bea3ce3671, 0x32dacaa051122941, 0x478d99d9350004f2 },
+        { -0x02f28a78630ed9a9, -0x17d0106b1ac5f1d7, -0x33cb580fa444b419, 0x0b251172a50c38a2 },
+        { 0x1d5ad94890bb02c0, 0x50e208b10ec25115, -0x5d95dd76b10de8fe, 0x4dc923343b524805 }
+    },
+    {
+        { 0x3ad3e3ebf36c4975, -0x28a2da5ac879dedb, -0x178c6bc25fda5aea, 0x6bbc7cb4c411c847 },
+        { -0x1c7d73bff07f794a, 0x3f77e6f7979f0dc8, 0x7ef6de304df42cb4, 0x5265797cb6abd784 },
+        { 0x3c6f9cd1d4a50d56, -0x49dbbf8839015482, 0x6ff9bf483580972e, 0x00375883b332acfb }
+    },
+    {
+        { -0x3674137a938a3664, -0x1bbe7b3fff1cc30c, 0x0a676b9bba907634, 0x669e2cb571f379d7 },
+        { 0x0001b2cd28cb0940, 0x63fb51a06f1c24c9, -0x4a52796e232a35cf, 0x67238dbd8c450660 },
+        { -0x34ee948c5b642cf8, 0x025aad6b2392729e, -0x4b86c105c0aa264f, 0x72a1056140678bb9 }
+    },
+    {
+        { 0x0d8d2909e2e505b6, -0x673587543fd6edd0, 0x77ef5569a9b12327, 0x7c77897b81439b47 },
+        { -0x5d497ed4e336db63, 0x62866eee21211f58, 0x2cb5c5b85df10ece, 0x03a6b259e263ae00 },
+        { -0x0e3e4a1d21cce34b, 0x5a9f5d8e15fca420, -0x605bc70e8426cd4f, 0x2a381bf01c6146e7 }
+    },
+    {
+        { -0x083f41cd4acbe991, 0x27e6ca6419cf70d4, -0x6cb2082856a858a7, 0x5701461dabdec2aa },
+        { -0x536467863037ee3f, -0x7482d67ec8a91a99, 0x50da4e607c70edfc, 0x5dbca62f884400b6 },
         { 0x2c6747402c915c25, 0x1bdcd1a80b0d340a, 0x5e5601bd07b43f5f, 0x2555b4e05539a242 }
     },
     {
-        { 0x78409b1d87e463d4, 0xad4da95acdfb639d, 0xec28773755259b9c, 0x69c806e9c31230ab },
-        { 0x6fc09f5266ddd216, 0xdce560a7c8e37048, 0xec65939da2df62fd, 0x7a869ae7e52ed192 },
-        { 0x7b48f57414bb3f22, 0x68c7cee4aedccc88, 0xed2f936179ed80be, 0x25d70b885f77bc4b }
-    },
-    {
-        { 0x98459d29bb1ae4d4, 0x56b9c4c739f954ec, 0x832743f6c29b4b3e, 0x21ea8e2798b6878a },
+        { 0x78409b1d87e463d4, -0x52b256a532049c63, -0x13d788c8aada6464, 0x69c806e9c31230ab },
+        { 0x6fc09f5266ddd216, -0x231a9f58371c8fb8, -0x139a6c625d209d03, 0x7a869ae7e52ed192 },
+        { 0x7b48f57414bb3f22, 0x68c7cee4aedccc88, -0x12d06c9e86127f42, 0x25d70b885f77bc4b }
+    },
+    {
+        { -0x67ba62d644e51b2c, 0x56b9c4c739f954ec, -0x7cd8bc093d64b4c2, 0x21ea8e2798b6878a },
         { 0x4151c3d9762bf4de, 0x083f435f2745d82b, 0x29775a2e0d23ddd5, 0x138e3a6269a5db24 },
-        { 0x87bef4b46a5a7b9c, 0xd2299d1b5fc1d062, 0x82409818dd321648, 0x5c5abeb1e5a2e03d }
-    },
-    {
-        { 0x02cde6de1306a233, 0x7b5a52a2116f8ec7, 0xe1c681f4c1163b5b, 0x241d350660d32643 },
-        { 0x14722af4b73c2ddb, 0xbc470c5f5a05060d, 0x00943eac2581b02e, 0x0e434b3b1f499c8f },
-        { 0x6be4404d0ebc52c7, 0xae46233bb1a791f5, 0x2aec170ed25db42b, 0x1d8dfd966645d694 }
-    },
-},
-{
-    {
-        { 0xd598639c12ddb0a4, 0xa5d19f30c024866b, 0xd17c2f0358fce460, 0x07a195152e095e8a },
-        { 0x296fa9c59c2ec4de, 0xbc8b61bf4f84f3cb, 0x1c7706d917a8f908, 0x63b795fc7ad3255d },
-        { 0xa8368f02389e5fc8, 0x90433b02cf8de43b, 0xafa1fd5dc5412643, 0x3e8fe83d032f0137 }
-    },
-    {
-        { 0x08704c8de8efd13c, 0xdfc51a8e33e03731, 0xa59d5da51260cde3, 0x22d60899a6258c86 },
-        { 0x2f8b15b90570a294, 0x94f2427067084549, 0xde1c5ae161bbfd84, 0x75ba3b797fac4007 },
-        { 0x6239dbc070cdd196, 0x60fe8a8b6c7d8a9a, 0xb38847bceb401260, 0x0904d07b87779e5e }
-    },
-    {
-        { 0xf4322d6648f940b9, 0x06952f0cbd2d0c39, 0x167697ada081f931, 0x6240aacebaf72a6c },
-        { 0xb4ce1fd4ddba919c, 0xcf31db3ec74c8daa, 0x2c63cc63ad86cc51, 0x43e2143fbc1dde07 },
-        { 0xf834749c5ba295a0, 0xd6947c5bca37d25a, 0x66f13ba7e7c9316a, 0x56bdaf238db40cac }
+        { -0x78410b4b95a58464, -0x2dd662e4a03e2f9e, -0x7dbf67e722cde9b8, 0x5c5abeb1e5a2e03d }
+    },
+    {
+        { 0x02cde6de1306a233, 0x7b5a52a2116f8ec7, -0x1e397e0b3ee9c4a5, 0x241d350660d32643 },
+        { 0x14722af4b73c2ddb, -0x43b8f3a0a5faf9f3, 0x00943eac2581b02e, 0x0e434b3b1f499c8f },
+        { 0x6be4404d0ebc52c7, -0x51b9dcc44e586e0b, 0x2aec170ed25db42b, 0x1d8dfd966645d694 }
+    },
+},
+{
+    {
+        { -0x2a679c63ed224f5c, -0x5a2e60cf3fdb7995, -0x2e83d0fca7031ba0, 0x07a195152e095e8a },
+        { 0x296fa9c59c2ec4de, -0x43749e40b07b0c35, 0x1c7706d917a8f908, 0x63b795fc7ad3255d },
+        { -0x57c970fdc761a038, -0x6fbcc4fd30721bc5, -0x505e02a23abed9bd, 0x3e8fe83d032f0137 }
+    },
+    {
+        { 0x08704c8de8efd13c, -0x203ae571cc1fc8cf, -0x5a62a25aed9f321d, 0x22d60899a6258c86 },
+        { 0x2f8b15b90570a294, -0x6b0dbd8f98f7bab7, -0x21e3a51e9e44027c, 0x75ba3b797fac4007 },
+        { 0x6239dbc070cdd196, 0x60fe8a8b6c7d8a9a, -0x4c77b84314bfeda0, 0x0904d07b87779e5e }
+    },
+    {
+        { -0x0bcdd299b706bf47, 0x06952f0cbd2d0c39, 0x167697ada081f931, 0x6240aacebaf72a6c },
+        { -0x4b31e02b22456e64, -0x30ce24c138b37256, 0x2c63cc63ad86cc51, 0x43e2143fbc1dde07 },
+        { -0x07cb8b63a45d6a60, -0x296b83a435c82da6, 0x66f13ba7e7c9316a, 0x56bdaf238db40cac }
     },
     {
         { 0x1310d36cc19d3bb2, 0x062a6bb7622386b9, 0x7c9b8591d7a14f5c, 0x03aa31507e1e5754 },
-        { 0x362ab9e3f53533eb, 0x338568d56eb93d40, 0x9e0e14521d5a5572, 0x1d24a86d83741318 },
-        { 0xf4ec7648ffd4ce1f, 0xe045eaf054ac8c1c, 0x88d225821d09357c, 0x43b261dc9aeb4859 }
-    },
-    {
-        { 0x19513d8b6c951364, 0x94fe7126000bf47b, 0x028d10ddd54f9567, 0x02b4d5e242940964 },
-        { 0xe55b1e1988bb79bb, 0xa09ed07dc17a359d, 0xb02c2ee2603dea33, 0x326055cf5b276bc2 },
-        { 0xb4a155cb28d18df2, 0xeacc4646186ce508, 0xc49cf4936c824389, 0x27a6c809ae5d3410 }
-    },
-    {
-        { 0xcd2c270ac43d6954, 0xdd4a3e576a66cab2, 0x79fa592469d7036c, 0x221503603d8c2599 },
-        { 0x8ba6ebcd1f0db188, 0x37d3d73a675a5be8, 0xf22edfa315f5585a, 0x2cb67174ff60a17e },
-        { 0x59eecdf9390be1d0, 0xa9422044728ce3f1, 0x82891c667a94f0f4, 0x7b1df4b73890f436 }
-    },
-    {
-        { 0x5f2e221807f8f58c, 0xe3555c9fd49409d4, 0xb2aaa88d1fb6a630, 0x68698245d352e03d },
-        { 0xe492f2e0b3b2a224, 0x7c6c9e062b551160, 0x15eb8fe20d7f7b0e, 0x61fcef2658fc5992 },
-        { 0xdbb15d852a18187a, 0xf3e4aad386ddacd7, 0x44bae2810ff6c482, 0x46cf4c473daf01cf }
+        { 0x362ab9e3f53533eb, 0x338568d56eb93d40, -0x61f1ebade2a5aa8e, 0x1d24a86d83741318 },
+        { -0x0b1389b7002b31e1, -0x1fba150fab5373e4, -0x772dda7de2f6ca84, 0x43b261dc9aeb4859 }
+    },
+    {
+        { 0x19513d8b6c951364, -0x6b018ed9fff40b85, 0x028d10ddd54f9567, 0x02b4d5e242940964 },
+        { -0x1aa4e1e677448645, -0x5f612f823e85ca63, -0x4fd3d11d9fc215cd, 0x326055cf5b276bc2 },
+        { -0x4b5eaa34d72e720e, -0x1533b9b9e7931af8, -0x3b630b6c937dbc77, 0x27a6c809ae5d3410 }
+    },
+    {
+        { -0x32d3d8f53bc296ac, -0x22b5c1a89599354e, 0x79fa592469d7036c, 0x221503603d8c2599 },
+        { -0x74591432e0f24e78, 0x37d3d73a675a5be8, -0x0dd1205cea0aa7a6, 0x2cb67174ff60a17e },
+        { 0x59eecdf9390be1d0, -0x56bddfbb8d731c0f, -0x7d76e399856b0f0c, 0x7b1df4b73890f436 }
+    },
+    {
+        { 0x5f2e221807f8f58c, -0x1caaa3602b6bf62c, -0x4d555772e04959d0, 0x68698245d352e03d },
+        { -0x1b6d0d1f4c4d5ddc, 0x7c6c9e062b551160, 0x15eb8fe20d7f7b0e, 0x61fcef2658fc5992 },
+        { -0x244ea27ad5e7e786, -0x0c1b552c79225329, 0x44bae2810ff6c482, 0x46cf4c473daf01cf }
     },
     {
         { 0x213c6ea7f1498140, 0x7c1e7ef8392b4854, 0x2488c38c5629ceba, 0x1065aae50d8cc5bb },
@@ -2031,1640 +2067,4331 @@
 },
 {
     {
-        { 0x7b26e56b9e2d4734, 0xc4c7132b81c61675, 0xef5c9525ec9cde7f, 0x39c80b16e71743ad },
-        { 0x7afcd613efa9d697, 0x0cc45aa41c067959, 0xa56fe104c1fada96, 0x3a73b70472e40365 },
-        { 0x0f196e0d1b826c68, 0xf71ff0e24960e3db, 0x6113167023b7436c, 0x0cf0ea5877da7282 }
-    },
-    {
-        { 0xe332ced43ba6945a, 0xde0b1361e881c05d, 0x1ad40f095e67ed3b, 0x5da8acdab8c63d5d },
-        { 0x196c80a4ddd4ccbd, 0x22e6f55d95f2dd9d, 0xc75e33c740d6c71b, 0x7bb51279cb3c042f },
-        { 0xc4b6664a3a70159f, 0x76194f0f0a904e14, 0xa5614c39a4096c13, 0x6cd0ff50979feced }
-    },
-    {
-        { 0x7fecfabdb04ba18e, 0xd0fc7bfc3bddbcf7, 0xa41d486e057a131c, 0x641a4391f2223a61 },
-        { 0xc0e067e78f4428ac, 0x14835ab0a61135e3, 0xf21d14f338062935, 0x6390a4c8df04849c },
-        { 0xc5c6b95aa606a8db, 0x914b7f9eb06825f1, 0x2a731f6b44fc9eff, 0x30ddf38562705cfc }
-    },
-    {
-        { 0x33bef2bd68bcd52c, 0xc649dbb069482ef2, 0xb5b6ee0c41cb1aee, 0x5c294d270212a7e5 },
-        { 0x4e3dcbdad1bff7f9, 0xc9118e8220645717, 0xbacccebc0f189d56, 0x1b4822e9d4467668 },
-        { 0xab360a7f25563781, 0x2512228a480f7958, 0xc75d05276114b4e3, 0x222d9625d976fe2a }
-    },
-    {
-        { 0x0f94be7e0a344f85, 0xeb2faa8c87f22c38, 0x9ce1e75e4ee16f0f, 0x43e64e5418a08dea },
-        { 0x1c717f85b372ace1, 0x81930e694638bf18, 0x239cad056bc08b58, 0x0b34271c87f8fff4 },
-        { 0x8155e2521a35ce63, 0xbe100d4df912028e, 0xbff80bf8a57ddcec, 0x57342dc96d6bc6e4 }
-    },
-    {
-        { 0xf3c3bcb71e707bf6, 0x351d9b8c7291a762, 0x00502e6edad69a33, 0x522f521f1ec8807f },
-        { 0xefeef065c8ce5998, 0xbf029510b5cbeaa2, 0x8c64a10620b7c458, 0x35134fb231c24855 },
-        { 0x272c1f46f9a3902b, 0xc91ba3b799657bcc, 0xae614b304f8a1c0e, 0x7afcaad70b99017b }
-    },
-    {
-        { 0xa88141ecef842b6b, 0x55e7b14797abe6c5, 0x8c748f9703784ffe, 0x5b50a1f7afcd00b7 },
-        { 0xc25ded54a4b8be41, 0x902d13e11bb0e2dd, 0x41f43233cde82ab2, 0x1085faa5c3aae7cb },
-        { 0x9b840f66f1361315, 0x18462242701003e9, 0x65ed45fae4a25080, 0x0a2862393fda7320 }
-    },
-    {
-        { 0x960e737b6ecb9d17, 0xfaf24948d67ceae1, 0x37e7a9b4d55e1b89, 0x5cb7173cb46c59eb },
+        { 0x7b26e56b9e2d4734, -0x3b38ecd47e39e98b, -0x10a36ada13632181, 0x39c80b16e71743ad },
+        { 0x7afcd613efa9d697, 0x0cc45aa41c067959, -0x5a901efb3e05256a, 0x3a73b70472e40365 },
+        { 0x0f196e0d1b826c68, -0x08e00f1db69f1c25, 0x6113167023b7436c, 0x0cf0ea5877da7282 }
+    },
+    {
+        { -0x1ccd312bc4596ba6, -0x21f4ec9e177e3fa3, 0x1ad40f095e67ed3b, 0x5da8acdab8c63d5d },
+        { 0x196c80a4ddd4ccbd, 0x22e6f55d95f2dd9d, -0x38a1cc38bf2938e5, 0x7bb51279cb3c042f },
+        { -0x3b4999b5c58fea61, 0x76194f0f0a904e14, -0x5a9eb3c65bf693ed, 0x6cd0ff50979feced }
+    },
+    {
+        { 0x7fecfabdb04ba18e, -0x2f038403c4224309, -0x5be2b791fa85ece4, 0x641a4391f2223a61 },
+        { -0x3f1f981870bbd754, 0x14835ab0a61135e3, -0x0de2eb0cc7f9d6cb, 0x6390a4c8df04849c },
+        { -0x3a3946a559f95725, -0x6eb480614f97da0f, 0x2a731f6b44fc9eff, 0x30ddf38562705cfc }
+    },
+    {
+        { 0x33bef2bd68bcd52c, -0x39b6244f96b7d10e, -0x4a4911f3be34e512, 0x5c294d270212a7e5 },
+        { 0x4e3dcbdad1bff7f9, -0x36ee717ddf9ba8e9, -0x45333143f0e762aa, 0x1b4822e9d4467668 },
+        { -0x54c9f580daa9c87f, 0x2512228a480f7958, -0x38a2fad89eeb4b1d, 0x222d9625d976fe2a }
+    },
+    {
+        { 0x0f94be7e0a344f85, -0x14d05573780dd3c8, -0x631e18a1b11e90f1, 0x43e64e5418a08dea },
+        { 0x1c717f85b372ace1, -0x7e6cf196b9c740e8, 0x239cad056bc08b58, 0x0b34271c87f8fff4 },
+        { -0x7eaa1dade5ca319d, -0x41eff2b206edfd72, -0x4007f4075a822314, 0x57342dc96d6bc6e4 }
+    },
+    {
+        { -0x0c3c4348e18f840a, 0x351d9b8c7291a762, 0x00502e6edad69a33, 0x522f521f1ec8807f },
+        { -0x10110f9a3731a668, -0x40fd6aef4a34155e, -0x739b5ef9df483ba8, 0x35134fb231c24855 },
+        { 0x272c1f46f9a3902b, -0x36e45c48669a8434, -0x519eb4cfb075e3f2, 0x7afcaad70b99017b }
+    },
+    {
+        { -0x577ebe13107bd495, 0x55e7b14797abe6c5, -0x738b7068fc87b002, 0x5b50a1f7afcd00b7 },
+        { -0x3da212ab5b4741bf, -0x6fd2ec1ee44f1d23, 0x41f43233cde82ab2, 0x1085faa5c3aae7cb },
+        { -0x647bf0990ec9eceb, 0x18462242701003e9, 0x65ed45fae4a25080, 0x0a2862393fda7320 }
+    },
+    {
+        { -0x69f18c84913462e9, -0x050db6b72983151f, 0x37e7a9b4d55e1b89, 0x5cb7173cb46c59eb },
         { 0x46ab13c8347cbc9d, 0x3849e8d499c12383, 0x4cea314087d64ac9, 0x1f354134b1a29ee7 },
-        { 0x4a89e68b82b7abf0, 0xf41cd9279ba6b7b9, 0x16e6c210e18d876f, 0x7cacdb0f7f1b09c6 }
-    },
-},
-{
-    {
-        { 0xe1014434dcc5caed, 0x47ed5d963c84fb33, 0x70019576ed86a0e7, 0x25b2697bd267f9e4 },
-        { 0x9062b2e0d91a78bc, 0x47c9889cc8509667, 0x9df54a66405070b8, 0x7369e6a92493a1bf },
-        { 0x9d673ffb13986864, 0x3ca5fbd9415dc7b8, 0xe04ecc3bdf273b5e, 0x1420683db54e4cd2 }
-    },
-    {
-        { 0x34eebb6fc1cc5ad0, 0x6a1b0ce99646ac8b, 0xd3b0da49a66bde53, 0x31e83b4161d081c1 },
-        { 0xb478bd1e249dd197, 0x620c35005e58c102, 0xfb02d32fccbaac5c, 0x60b63bebf508a72d },
-        { 0x97e8c7129e062b4f, 0x49e48f4f29320ad8, 0x5bece14b6f18683f, 0x55cf1eb62d550317 }
-    },
-    {
-        { 0x3076b5e37df58c52, 0xd73ab9dde799cc36, 0xbd831ce34913ee20, 0x1a56fbaa62ba0133 },
-        { 0x5879101065c23d58, 0x8b9d086d5094819c, 0xe2402fa912c55fa7, 0x669a6564570891d4 },
-        { 0x943e6b505c9dc9ec, 0x302557bba77c371a, 0x9873ae5641347651, 0x13c4836799c58a5c }
-    },
-    {
-        { 0xc4dcfb6a5d8bd080, 0xdeebc4ec571a4842, 0xd4b2e883b8e55365, 0x50bdc87dc8e5b827 },
-        { 0x423a5d465ab3e1b9, 0xfc13c187c7f13f61, 0x19f83664ecb5b9b6, 0x66f80c93a637b607 },
+        { 0x4a89e68b82b7abf0, -0x0be326d864594847, 0x16e6c210e18d876f, 0x7cacdb0f7f1b09c6 }
+    },
+},
+{
+    {
+        { -0x1efebbcb233a3513, 0x47ed5d963c84fb33, 0x70019576ed86a0e7, 0x25b2697bd267f9e4 },
+        { -0x6f9d4d1f26e58744, 0x47c9889cc8509667, -0x620ab599bfaf8f48, 0x7369e6a92493a1bf },
+        { -0x6298c004ec67979c, 0x3ca5fbd9415dc7b8, -0x1fb133c420d8c4a2, 0x1420683db54e4cd2 }
+    },
+    {
+        { 0x34eebb6fc1cc5ad0, 0x6a1b0ce99646ac8b, -0x2c4f25b6599421ad, 0x31e83b4161d081c1 },
+        { -0x4b8742e1db622e69, 0x620c35005e58c102, -0x04fd2cd0334553a4, 0x60b63bebf508a72d },
+        { -0x681738ed61f9d4b1, 0x49e48f4f29320ad8, 0x5bece14b6f18683f, 0x55cf1eb62d550317 }
+    },
+    {
+        { 0x3076b5e37df58c52, -0x28c54622186633ca, -0x427ce31cb6ec11e0, 0x1a56fbaa62ba0133 },
+        { 0x5879101065c23d58, -0x7462f792af6b7e64, -0x1dbfd056ed3aa059, 0x669a6564570891d4 },
+        { -0x6bc194afa3623614, 0x302557bba77c371a, -0x678c51a9becb89af, 0x13c4836799c58a5c }
+    },
+    {
+        { -0x3b230495a2742f80, -0x21143b13a8e5b7be, -0x2b4d177c471aac9b, 0x50bdc87dc8e5b827 },
+        { 0x423a5d465ab3e1b9, -0x03ec3e78380ec09f, 0x19f83664ecb5b9b6, 0x66f80c93a637b607 },
         { 0x606d37836edfe111, 0x32353e15f011abd9, 0x64b03ac325b73b96, 0x1dd56444725fd5ae }
     },
     {
-        { 0xc297e60008bac89a, 0x7d4cea11eae1c3e0, 0xf3e38be19fe7977c, 0x3a3a450f63a305cd },
-        { 0x8fa47ff83362127d, 0xbc9f6ac471cd7c15, 0x6e71454349220c8b, 0x0e645912219f732e },
-        { 0x078f2f31d8394627, 0x389d3183de94a510, 0xd1e36c6d17996f80, 0x318c8d9393a9a87b }
-    },
-    {
-        { 0x5d669e29ab1dd398, 0xfc921658342d9e3b, 0x55851dfdf35973cd, 0x509a41c325950af6 },
-        { 0xf2745d032afffe19, 0x0c9f3c497f24db66, 0xbc98d3e3ba8598ef, 0x224c7c679a1d5314 },
-        { 0xbdc06edca6f925e9, 0x793ef3f4641b1f33, 0x82ec12809d833e89, 0x05bff02328a11389 }
+        { -0x3d6819fff7453766, 0x7d4cea11eae1c3e0, -0x0c1c741e60186884, 0x3a3a450f63a305cd },
+        { -0x705b8007cc9ded83, -0x4360953b8e3283eb, 0x6e71454349220c8b, 0x0e645912219f732e },
+        { 0x078f2f31d8394627, 0x389d3183de94a510, -0x2e1c9392e8669080, 0x318c8d9393a9a87b }
+    },
+    {
+        { 0x5d669e29ab1dd398, -0x036de9a7cbd261c5, 0x55851dfdf35973cd, 0x509a41c325950af6 },
+        { -0x0d8ba2fcd50001e7, 0x0c9f3c497f24db66, -0x43672c1c457a6711, 0x224c7c679a1d5314 },
+        { -0x423f91235906da17, 0x793ef3f4641b1f33, -0x7d13ed7f627cc177, 0x05bff02328a11389 }
     },
     {
         { 0x6881a0dd0dc512e4, 0x4fe70dc844a5fafe, 0x1f748e6b8f4a5240, 0x576277cdee01a3ea },
-        { 0x3632137023cae00b, 0x544acf0ad1accf59, 0x96741049d21a1c88, 0x780b8cc3fa2a44a7 },
-        { 0x1ef38abc234f305f, 0x9a577fbd1405de08, 0x5e82a51434e62a0d, 0x5ff418726271b7a1 }
-    },
-    {
-        { 0xe5db47e813b69540, 0xf35d2a3b432610e1, 0xac1f26e938781276, 0x29d4db8ca0a0cb69 },
-        { 0x398e080c1789db9d, 0xa7602025f3e778f5, 0xfa98894c06bd035d, 0x106a03dc25a966be },
-        { 0xd9ad0aaf333353d0, 0x38669da5acd309e5, 0x3c57658ac888f7f0, 0x4ab38a51052cbefa }
-    },
-},
-{
-    {
-        { 0xf68fe2e8809de054, 0xe3bc096a9c82bad1, 0x076353d40aadbf45, 0x7b9b1fb5dea1959e },
-        { 0xdfdacbee4324c0e9, 0x054442883f955bb7, 0xdef7aaa8ea31609f, 0x68aee70642287cff },
-        { 0xf01cc8f17471cc0c, 0x95242e37579082bb, 0x27776093d3e46b5f, 0x2d13d55a28bd85fb }
-    },
-    {
-        { 0xbf019cce7aee7a52, 0xa8ded2b6e454ead3, 0x3c619f0b87a8bb19, 0x3619b5d7560916d8 },
-        { 0xfac5d2065b35b8da, 0xa8da8a9a85624bb7, 0xccd2ca913d21cd0f, 0x6b8341ee8bf90d58 },
-        { 0x3579f26b0282c4b2, 0x64d592f24fafefae, 0xb7cded7b28c8c7c0, 0x6a927b6b7173a8d7 }
-    },
-    {
-        { 0x8d7040863ece88eb, 0xf0e307a980eec08c, 0xac2250610d788fda, 0x056d92a43a0d478d },
-        { 0x1f6db24f986e4656, 0x1021c02ed1e9105b, 0xf8ff3fff2cc0a375, 0x1d2a6bf8c6c82592 },
+        { 0x3632137023cae00b, 0x544acf0ad1accf59, -0x698befb62de5e378, 0x780b8cc3fa2a44a7 },
+        { 0x1ef38abc234f305f, -0x65a88042ebfa21f8, 0x5e82a51434e62a0d, 0x5ff418726271b7a1 }
+    },
+    {
+        { -0x1a24b817ec496ac0, -0x0ca2d5c4bcd9ef1f, -0x53e0d916c787ed8a, 0x29d4db8ca0a0cb69 },
+        { 0x398e080c1789db9d, -0x589fdfda0c18870b, -0x056776b3f942fca3, 0x106a03dc25a966be },
+        { -0x2652f550ccccac30, 0x38669da5acd309e5, 0x3c57658ac888f7f0, 0x4ab38a51052cbefa }
+    },
+},
+{
+    {
+        { -0x09701d177f621fac, -0x1c43f695637d452f, 0x076353d40aadbf45, 0x7b9b1fb5dea1959e },
+        { -0x20253411bcdb3f17, 0x054442883f955bb7, -0x2108555715ce9f61, 0x68aee70642287cff },
+        { -0x0fe3370e8b8e33f4, -0x6adbd1c8a86f7d45, 0x27776093d3e46b5f, 0x2d13d55a28bd85fb }
+    },
+    {
+        { -0x40fe6331851185ae, -0x57212d491bab152d, 0x3c619f0b87a8bb19, 0x3619b5d7560916d8 },
+        { -0x053a2df9a4ca4726, -0x572575657a9db449, -0x332d356ec2de32f1, 0x6b8341ee8bf90d58 },
+        { 0x3579f26b0282c4b2, 0x64d592f24fafefae, -0x48321284d7373840, 0x6a927b6b7173a8d7 }
+    },
+    {
+        { -0x728fbf79c1317715, -0x0f1cf8567f113f74, -0x53ddaf9ef2877026, 0x056d92a43a0d478d },
+        { 0x1f6db24f986e4656, 0x1021c02ed1e9105b, -0x0700c000d33f5c8b, 0x1d2a6bf8c6c82592 },
         { 0x1b05a196fc3da5a1, 0x77d7a8c243b59ed0, 0x06da3d6297d17918, 0x66fbb494f12353f7 }
     },
     {
-        { 0xd6d70996f12309d6, 0xdbfb2385e9c3d539, 0x46d602b0f7552411, 0x270a0b0557843e0c },
-        { 0x751a50b9d85c0fb8, 0xd1afdc258bcf097b, 0x2f16a6a38309a969, 0x14ddff9ee5b00659 },
-        { 0x61ff0640a7862bcc, 0x81cac09a5f11abfe, 0x9047830455d12abb, 0x19a4bde1945ae873 }
+        { -0x2928f6690edcf62a, -0x2404dc7a163c2ac7, 0x46d602b0f7552411, 0x270a0b0557843e0c },
+        { 0x751a50b9d85c0fb8, -0x2e5023da7430f685, 0x2f16a6a38309a969, 0x14ddff9ee5b00659 },
+        { 0x61ff0640a7862bcc, -0x7e353f65a0ee5402, -0x6fb87cfbaa2ed545, 0x19a4bde1945ae873 }
     },
     {
         { 0x40c709dec076c49f, 0x657bfaf27f3e53f6, 0x40662331eca042c4, 0x14b375487eb4df04 },
-        { 0x9b9f26f520a6200a, 0x64804443cf13eaf8, 0x8a63673f8631edd3, 0x72bbbce11ed39dc1 },
-        { 0xae853c94ab66dc47, 0xeb62343edf762d6e, 0xf08e0e186fb2f7d1, 0x4f0b1c02700ab37a }
-    },
-    {
-        { 0x79fd21ccc1b2e23f, 0x4ae7c281453df52a, 0xc8172ec9d151486b, 0x68abe9443e0a7534 },
-        { 0xe1706787d81951fa, 0xa10a2c8eb290c77b, 0xe7382fa03ed66773, 0x0a4d84710bcc4b54 },
-        { 0xda12c6c407831dcb, 0x0da230d74d5c510d, 0x4ab1531e6bd404e1, 0x4106b166bcf440ef }
-    },
-    {
-        { 0xa485ccd539e4ecf2, 0x5aa3f3ad0555bab5, 0x145e3439937df82d, 0x1238b51e1214283f },
-        { 0x02e57a421cd23668, 0x4ad9fb5d0eaef6fd, 0x954e6727b1244480, 0x7f792f9d2699f331 },
-        { 0x0b886b925fd4d924, 0x60906f7a3626a80d, 0xecd367b4b98abd12, 0x2876beb1def344cf }
-    },
-    {
-        { 0xd594b3333a8a85f8, 0x4ea37689e78d7d58, 0x73bf9f455e8e351f, 0x5507d7d2bc41ebb4 },
-        { 0xdc84e93563144691, 0x632fe8a0d61f23f4, 0x4caa800612a9a8d5, 0x48f9dbfa0e9918d3 },
-        { 0x1ceb2903299572fc, 0x7c8ccaa29502d0ee, 0x91bfa43411cce67b, 0x5784481964a831e7 }
-    },
-},
-{
-    {
-        { 0xd6cfd1ef5fddc09c, 0xe82b3efdf7575dce, 0x25d56b5d201634c2, 0x3041c6bb04ed2b9b },
-        { 0xda7c2b256768d593, 0x98c1c0574422ca13, 0xf1a80bd5ca0ace1d, 0x29cdd1adc088a690 },
-        { 0x0ff2f2f9d956e148, 0xade797759f356b2e, 0x1a4698bb5f6c025c, 0x104bbd6814049a7b }
-    },
-    {
-        { 0xa95d9a5fd67ff163, 0xe92be69d4cc75681, 0xb7f8024cde20f257, 0x204f2a20fb072df5 },
+        { -0x6460d90adf59dff6, 0x64804443cf13eaf8, -0x759c98c079ce122d, 0x72bbbce11ed39dc1 },
+        { -0x517ac36b549923b9, -0x149dcbc12089d292, -0x0f71f1e7904d082f, 0x4f0b1c02700ab37a }
+    },
+    {
+        { 0x79fd21ccc1b2e23f, 0x4ae7c281453df52a, -0x37e8d1362eaeb795, 0x68abe9443e0a7534 },
+        { -0x1e8f987827e6ae06, -0x5ef5d3714d6f3885, -0x18c7d05fc129988d, 0x0a4d84710bcc4b54 },
+        { -0x25ed393bf87ce235, 0x0da230d74d5c510d, 0x4ab1531e6bd404e1, 0x4106b166bcf440ef }
+    },
+    {
+        { -0x5b7a332ac61b130e, 0x5aa3f3ad0555bab5, 0x145e3439937df82d, 0x1238b51e1214283f },
+        { 0x02e57a421cd23668, 0x4ad9fb5d0eaef6fd, -0x6ab198d84edbbb80, 0x7f792f9d2699f331 },
+        { 0x0b886b925fd4d924, 0x60906f7a3626a80d, -0x132c984b467542ee, 0x2876beb1def344cf }
+    },
+    {
+        { -0x2a6b4cccc5757a08, 0x4ea37689e78d7d58, 0x73bf9f455e8e351f, 0x5507d7d2bc41ebb4 },
+        { -0x237b16ca9cebb96f, 0x632fe8a0d61f23f4, 0x4caa800612a9a8d5, 0x48f9dbfa0e9918d3 },
+        { 0x1ceb2903299572fc, 0x7c8ccaa29502d0ee, -0x6e405bcbee331985, 0x5784481964a831e7 }
+    },
+},
+{
+    {
+        { -0x29302e10a0223f64, -0x17d4c10208a8a232, 0x25d56b5d201634c2, 0x3041c6bb04ed2b9b },
+        { -0x2583d4da98972a6d, -0x673e3fa8bbdd35ed, -0x0e57f42a35f531e3, 0x29cdd1adc088a690 },
+        { 0x0ff2f2f9d956e148, -0x5218688a60ca94d2, 0x1a4698bb5f6c025c, 0x104bbd6814049a7b }
+    },
+    {
+        { -0x56a265a029800e9d, -0x16d41962b338a97f, -0x4807fdb321df0da9, 0x204f2a20fb072df5 },
         { 0x51f0fd3168f1ed67, 0x2c811dcdd86f3bc2, 0x44dc5c4304d2f2de, 0x5be8cc57092a7149 },
-        { 0xc8143b3d30ebb079, 0x7589155abd652e30, 0x653c3c318f6d5c31, 0x2570fb17c279161f }
-    },
-    {
-        { 0x192ea9550bb8245a, 0xc8e6fba88f9050d1, 0x7986ea2d88a4c935, 0x241c5f91de018668 },
-        { 0x3efa367f2cb61575, 0xf5f96f761cd6026c, 0xe8c7142a65b52562, 0x3dcb65ea53030acd },
-        { 0x28d8172940de6caa, 0x8fbf2cf022d9733a, 0x16d7fcdd235b01d1, 0x08420edd5fcdf0e5 }
-    },
-    {
-        { 0x0358c34e04f410ce, 0xb6135b5a276e0685, 0x5d9670c7ebb91521, 0x04d654f321db889c },
-        { 0xcdff20ab8362fa4a, 0x57e118d4e21a3e6e, 0xe3179617fc39e62b, 0x0d9a53efbc1769fd },
+        { -0x37ebc4c2cf144f87, 0x7589155abd652e30, 0x653c3c318f6d5c31, 0x2570fb17c279161f }
+    },
+    {
+        { 0x192ea9550bb8245a, -0x37190457706faf2f, 0x7986ea2d88a4c935, 0x241c5f91de018668 },
+        { 0x3efa367f2cb61575, -0x0a069089e329fd94, -0x1738ebd59a4ada9e, 0x3dcb65ea53030acd },
+        { 0x28d8172940de6caa, -0x7040d30fdd268cc6, 0x16d7fcdd235b01d1, 0x08420edd5fcdf0e5 }
+    },
+    {
+        { 0x0358c34e04f410ce, -0x49eca4a5d891f97b, 0x5d9670c7ebb91521, 0x04d654f321db889c },
+        { -0x3200df547c9d05b6, 0x57e118d4e21a3e6e, -0x1ce869e803c619d5, 0x0d9a53efbc1769fd },
         { 0x5e7dc116ddbdb5d5, 0x2954deb68da5dd2d, 0x1cb608173334a292, 0x4a7a4f2618991ad7 }
     },
     {
-        { 0x24c3b291af372a4b, 0x93da8270718147f2, 0xdd84856486899ef2, 0x4a96314223e0ee33 },
-        { 0xf4a718025fb15f95, 0x3df65f346b5c1b8f, 0xcdfcf08500e01112, 0x11b50c4cddd31848 },
-        { 0xa6e8274408a4ffd6, 0x738e177e9c1576d9, 0x773348b63d02b3f2, 0x4f4bce4dce6bcc51 }
-    },
-    {
-        { 0x30e2616ec49d0b6f, 0xe456718fcaec2317, 0x48eb409bf26b4fa6, 0x3042cee561595f37 },
-        { 0xa71fce5ae2242584, 0x26ea725692f58a9e, 0xd21a09d71cea3cf4, 0x73fcdd14b71c01e6 },
-        { 0x427e7079449bac41, 0x855ae36dbce2310a, 0x4cae76215f841a7c, 0x389e740c9a9ce1d6 }
-    },
-    {
-        { 0xc9bd78f6570eac28, 0xe55b0b3227919ce1, 0x65fc3eaba19b91ed, 0x25c425e5d6263690 },
-        { 0x64fcb3ae34dcb9ce, 0x97500323e348d0ad, 0x45b3f07d62c6381b, 0x61545379465a6788 },
-        { 0x3f3e06a6f1d7de6e, 0x3ef976278e062308, 0x8c14f6264e8a6c77, 0x6539a08915484759 }
-    },
-    {
-        { 0xddc4dbd414bb4a19, 0x19b2bc3c98424f8e, 0x48a89fd736ca7169, 0x0f65320ef019bd90 },
-        { 0xe9d21f74c3d2f773, 0xc150544125c46845, 0x624e5ce8f9b99e33, 0x11c5e4aac5cd186c },
-        { 0xd486d1b1cafde0c6, 0x4f3fe6e3163b5181, 0x59a8af0dfaf2939a, 0x4cabc7bdec33072a }
-    },
-},
-{
-    {
-        { 0xf7c0a19c1a54a044, 0x4a1c5e2477bd9fbb, 0xa6e3ca115af22972, 0x1819bb953f2e9e0d },
-        { 0x16faa8fb532f7428, 0xdbd42ea046a4e272, 0x5337653b8b9ea480, 0x4065947223973f03 },
-        { 0x498fbb795e042e84, 0x7d0dd89a7698b714, 0x8bfb0ba427fe6295, 0x36ba82e721200524 }
-    },
-    {
-        { 0xc8d69d0a57274ed5, 0x45ba803260804b17, 0xdf3cda102255dfac, 0x77d221232709b339 },
-        { 0xd60ecbb74245ec41, 0xfd9be89e34348716, 0xc9240afee42284de, 0x4472f648d0531db4 },
-        { 0x498a6d7064ad94d8, 0xa5b5c8fd9af62263, 0x8ca8ed0545c141f4, 0x2c63bec3662d358c }
-    },
-    {
-        { 0x9a518b3a8586f8bf, 0x9ee71af6cbb196f0, 0xaa0625e6a2385cf2, 0x1deb2176ddd7c8d1 },
-        { 0x7fe60d8bea787955, 0xb9dc117eb5f401b7, 0x91c7c09a19355cce, 0x22692ef59442bedf },
-        { 0x8563d19a2066cf6c, 0x401bfd8c4dcc7cd7, 0xd976a6becd0d8f62, 0x67cfd773a278b05e }
-    },
-    {
-        { 0x2d5fa9855a4e586a, 0x65f8f7a449beab7e, 0xaa074dddf21d33d3, 0x185cba721bcb9dee },
-        { 0x8dec31faef3ee475, 0x99dbff8a9e22fd92, 0x512d11594e26cab1, 0x0cde561eec4310b9 },
-        { 0x93869da3f4e3cb41, 0xbf0392f540f7977e, 0x026204fcd0463b83, 0x3ec91a769eec6eed }
-    },
-    {
-        { 0x0fad2fb7b0a3402f, 0x46615ecbfb69f4a8, 0xf745bcc8c5f8eaa6, 0x7a5fa8794a94e896 },
-        { 0x1e9df75bf78166ad, 0x4dfda838eb0cd7af, 0xba002ed8c1eaf988, 0x13fedb3e11f33cfc },
-        { 0x52958faa13cd67a1, 0x965ee0818bdbb517, 0x16e58daa2e8845b3, 0x357d397d5499da8f }
+        { 0x24c3b291af372a4b, -0x6c257d8f8e7eb80e, -0x227b7a9b7976610e, 0x4a96314223e0ee33 },
+        { -0x0b58e7fda04ea06b, 0x3df65f346b5c1b8f, -0x32030f7aff1feeee, 0x11b50c4cddd31848 },
+        { -0x5917d8bbf75b002a, 0x738e177e9c1576d9, 0x773348b63d02b3f2, 0x4f4bce4dce6bcc51 }
+    },
+    {
+        { 0x30e2616ec49d0b6f, -0x1ba98e703513dce9, 0x48eb409bf26b4fa6, 0x3042cee561595f37 },
+        { -0x58e031a51ddbda7c, 0x26ea725692f58a9e, -0x2de5f628e315c30c, 0x73fcdd14b71c01e6 },
+        { 0x427e7079449bac41, -0x7aa51c92431dcef6, 0x4cae76215f841a7c, 0x389e740c9a9ce1d6 }
+    },
+    {
+        { -0x36428709a8f153d8, -0x1aa4f4cdd86e631f, 0x65fc3eaba19b91ed, 0x25c425e5d6263690 },
+        { 0x64fcb3ae34dcb9ce, -0x68affcdc1cb72f53, 0x45b3f07d62c6381b, 0x61545379465a6788 },
+        { 0x3f3e06a6f1d7de6e, 0x3ef976278e062308, -0x73eb09d9b1759389, 0x6539a08915484759 }
+    },
+    {
+        { -0x223b242beb44b5e7, 0x19b2bc3c98424f8e, 0x48a89fd736ca7169, 0x0f65320ef019bd90 },
+        { -0x162de08b3c2d088d, -0x3eafabbeda3b97bb, 0x624e5ce8f9b99e33, 0x11c5e4aac5cd186c },
+        { -0x2b792e4e35021f3a, 0x4f3fe6e3163b5181, 0x59a8af0dfaf2939a, 0x4cabc7bdec33072a }
+    },
+},
+{
+    {
+        { -0x083f5e63e5ab5fbc, 0x4a1c5e2477bd9fbb, -0x591c35eea50dd68e, 0x1819bb953f2e9e0d },
+        { 0x16faa8fb532f7428, -0x242bd15fb95b1d8e, 0x5337653b8b9ea480, 0x4065947223973f03 },
+        { 0x498fbb795e042e84, 0x7d0dd89a7698b714, -0x7404f45bd8019d6b, 0x36ba82e721200524 }
+    },
+    {
+        { -0x372962f5a8d8b12b, 0x45ba803260804b17, -0x20c325efddaa2054, 0x77d221232709b339 },
+        { -0x29f13448bdba13bf, -0x02641761cbcb78ea, -0x36dbf5011bdd7b22, 0x4472f648d0531db4 },
+        { 0x498a6d7064ad94d8, -0x5a4a37026509dd9d, -0x735712faba3ebe0c, 0x2c63bec3662d358c }
+    },
+    {
+        { -0x65ae74c57a790741, -0x6118e509344e6910, -0x55f9da195dc7a30e, 0x1deb2176ddd7c8d1 },
+        { 0x7fe60d8bea787955, -0x4623ee814a0bfe49, -0x6e383f65e6caa332, 0x22692ef59442bedf },
+        { -0x7a9c2e65df993094, 0x401bfd8c4dcc7cd7, -0x2689594132f2709e, 0x67cfd773a278b05e }
+    },
+    {
+        { 0x2d5fa9855a4e586a, 0x65f8f7a449beab7e, -0x55f8b2220de2cc2d, 0x185cba721bcb9dee },
+        { -0x7213ce0510c11b8b, -0x6624007561dd026e, 0x512d11594e26cab1, 0x0cde561eec4310b9 },
+        { -0x6c79625c0b1c34bf, -0x40fc6d0abf086882, 0x026204fcd0463b83, 0x3ec91a769eec6eed }
+    },
+    {
+        { 0x0fad2fb7b0a3402f, 0x46615ecbfb69f4a8, -0x08ba43373a07155a, 0x7a5fa8794a94e896 },
+        { 0x1e9df75bf78166ad, 0x4dfda838eb0cd7af, -0x45ffd1273e150678, 0x13fedb3e11f33cfc },
+        { 0x52958faa13cd67a1, -0x69a11f7e74244ae9, 0x16e58daa2e8845b3, 0x357d397d5499da8f }
     },
     {
         { 0x481dacb4194bfbf8, 0x4d77e3f1bae58299, 0x1ef4612e7d1372a0, 0x3a8d867e70ff69e1 },
-        { 0x1ebfa05fb0bace6c, 0xc934620c1caf9a1e, 0xcc771cc41d82b61a, 0x2d94a16aa5f74fec },
-        { 0x6f58cd5d55aff958, 0xba3eaa5c75567721, 0x75c123999165227d, 0x69be1343c2f2b35e }
-    },
-    {
-        { 0x82bbbdac684b8de3, 0xa2f4c7d03fca0718, 0x337f92fbe096aaa8, 0x200d4d8c63587376 },
+        { 0x1ebfa05fb0bace6c, -0x36cb9df3e35065e2, -0x3388e33be27d49e6, 0x2d94a16aa5f74fec },
+        { 0x6f58cd5d55aff958, -0x45c155a38aa988df, 0x75c123999165227d, 0x69be1343c2f2b35e }
+    },
+    {
+        { -0x7d44425397b4721d, -0x5d0b382fc035f8e8, 0x337f92fbe096aaa8, 0x200d4d8c63587376 },
         { 0x0e091d5ee197c92a, 0x4f51019f2945119f, 0x143679b9f034e99c, 0x7d88112e4d24c696 },
-        { 0x208aed4b4893b32b, 0x3efbf23ebe59b964, 0xd762deb0dba5e507, 0x69607bd681bd9d94 }
-    },
-    {
-        { 0x3b7f3bd49323a902, 0x7c21b5566b2c6e53, 0xe5ba8ff53a7852a7, 0x28bc77a5838ece00 },
-        { 0xf6be021068de1ce1, 0xe8d518e70edcbc1f, 0xe3effdd01b5505a5, 0x35f63353d3ec3fd0 },
+        { 0x208aed4b4893b32b, 0x3efbf23ebe59b964, -0x289d214f245a1af9, 0x69607bd681bd9d94 }
+    },
+    {
+        { 0x3b7f3bd49323a902, 0x7c21b5566b2c6e53, -0x1a45700ac587ad59, 0x28bc77a5838ece00 },
+        { -0x0941fdef9721e31f, -0x172ae718f12343e1, -0x1c10022fe4aafa5b, 0x35f63353d3ec3fd0 },
         { 0x63ba78a8e25d8036, 0x63651e0094333490, 0x48d82f20288ce532, 0x3a31abfa36b57524 }
     },
 },
 {
     {
-        { 0xc08f788f3f78d289, 0xfe30a72ca1404d9f, 0xf2778bfccf65cc9d, 0x7ee498165acb2021 },
-        { 0x239e9624089c0a2e, 0xc748c4c03afe4738, 0x17dbed2a764fa12a, 0x639b93f0321c8582 },
-        { 0x7bd508e39111a1c3, 0x2b2b90d480907489, 0xe7d2aec2ae72fd19, 0x0edf493c85b602a6 }
-    },
-    {
-        { 0x6767c4d284764113, 0xa090403ff7f5f835, 0x1c8fcffacae6bede, 0x04c00c54d1dfa369 },
-        { 0xaecc8158599b5a68, 0xea574f0febade20e, 0x4fe41d7422b67f07, 0x403b92e3019d4fb4 },
-        { 0x4dc22f818b465cf8, 0x71a0f35a1480eff8, 0xaee8bfad04c7d657, 0x355bb12ab26176f4 }
-    },
-    {
-        { 0xa301dac75a8c7318, 0xed90039db3ceaa11, 0x6f077cbf3bae3f2d, 0x7518eaf8e052ad8e },
-        { 0xa71e64cc7493bbf4, 0xe5bd84d9eca3b0c3, 0x0a6bc50cfa05e785, 0x0f9b8132182ec312 },
-        { 0xa48859c41b7f6c32, 0x0f2d60bcf4383298, 0x1815a929c9b1d1d9, 0x47c3871bbb1755c4 }
-    },
-    {
-        { 0xfbe65d50c85066b0, 0x62ecc4b0b3a299b0, 0xe53754ea441ae8e0, 0x08fea02ce8d48d5f },
-        { 0x5144539771ec4f48, 0xf805b17dc98c5d6e, 0xf762c11a47c3c66b, 0x00b89b85764699dc },
-        { 0x824ddd7668deead0, 0xc86445204b685d23, 0xb514cfcd5d89d665, 0x473829a74f75d537 }
+        { -0x3f708770c0872d77, -0x01cf58d35ebfb261, -0x0d887403309a3363, 0x7ee498165acb2021 },
+        { 0x239e9624089c0a2e, -0x38b73b3fc501b8c8, 0x17dbed2a764fa12a, 0x639b93f0321c8582 },
+        { 0x7bd508e39111a1c3, 0x2b2b90d480907489, -0x182d513d518d02e7, 0x0edf493c85b602a6 }
+    },
+    {
+        { 0x6767c4d284764113, -0x5f6fbfc0080a07cb, 0x1c8fcffacae6bede, 0x04c00c54d1dfa369 },
+        { -0x51337ea7a664a598, -0x15a8b0f014521df2, 0x4fe41d7422b67f07, 0x403b92e3019d4fb4 },
+        { 0x4dc22f818b465cf8, 0x71a0f35a1480eff8, -0x51174052fb3829a9, 0x355bb12ab26176f4 }
+    },
+    {
+        { -0x5cfe2538a5738ce8, -0x126ffc624c3155ef, 0x6f077cbf3bae3f2d, 0x7518eaf8e052ad8e },
+        { -0x58e19b338b6c440c, -0x1a427b26135c4f3d, 0x0a6bc50cfa05e785, 0x0f9b8132182ec312 },
+        { -0x5b77a63be48093ce, 0x0f2d60bcf4383298, 0x1815a929c9b1d1d9, 0x47c3871bbb1755c4 }
+    },
+    {
+        { -0x0419a2af37af9950, 0x62ecc4b0b3a299b0, -0x1ac8ab15bbe51720, 0x08fea02ce8d48d5f },
+        { 0x5144539771ec4f48, -0x07fa4e823673a292, -0x089d3ee5b83c3995, 0x00b89b85764699dc },
+        { -0x7db2228997211530, -0x379bbadfb497a2dd, -0x4aeb3032a276299b, 0x473829a74f75d537 }
     },
     {
         { 0x23d9533aad3902c9, 0x64c2ddceef03588f, 0x15257390cfe12fb4, 0x6c668b4d44e4d390 },
-        { 0x82d2da754679c418, 0xe63bd7d8b2618df0, 0x355eef24ac47eb0a, 0x2078684c4833c6b4 },
-        { 0x3b48cf217a78820c, 0xf76a0ab281273e97, 0xa96c65a78c8eed7b, 0x7411a6054f8a433f }
-    },
-    {
-        { 0x579ae53d18b175b4, 0x68713159f392a102, 0x8455ecba1eef35f5, 0x1ec9a872458c398f },
-        { 0x4d659d32b99dc86d, 0x044cdc75603af115, 0xb34c712cdcc2e488, 0x7c136574fb8134ff },
-        { 0xb8e6a4d400a2509b, 0x9b81d7020bc882b4, 0x57e7cc9bf1957561, 0x3add88a5c7cd6460 }
-    },
-    {
-        { 0x85c298d459393046, 0x8f7e35985ff659ec, 0x1d2ca22af2f66e3a, 0x61ba1131a406a720 },
-        { 0xab895770b635dcf2, 0x02dfef6cf66c1fbc, 0x85530268beb6d187, 0x249929fccc879e74 },
-        { 0xa3d0a0f116959029, 0x023b6b6cba7ebd89, 0x7bf15a3e26783307, 0x5620310cbbd8ece7 }
-    },
-    {
-        { 0x6646b5f477e285d6, 0x40e8ff676c8f6193, 0xa6ec7311abb594dd, 0x7ec846f3658cec4d },
-        { 0x528993434934d643, 0xb9dbf806a51222f5, 0x8f6d878fc3f41c22, 0x37676a2a4d9d9730 },
-        { 0x9b5e8f3f1da22ec7, 0x130f1d776c01cd13, 0x214c8fcfa2989fb8, 0x6daaf723399b9dd5 }
-    },
-},
-{
-    {
-        { 0x81aebbdd2cd13070, 0x962e4325f85a0e9e, 0xde9391aacadffecb, 0x53177fda52c230e6 },
-        { 0x591e4a5610628564, 0x2a4bb87ca8b4df34, 0xde2a2572e7a38e43, 0x3cbdabd9fee5046e },
-        { 0xa7bc970650b9de79, 0x3d12a7fbc301b59b, 0x02652e68d36ae38c, 0x79d739835a6199dc }
-    },
-    {
-        { 0x21c9d9920d591737, 0x9bea41d2e9b46cd6, 0xe20e84200d89bfca, 0x79d99f946eae5ff8 },
-        { 0xd9354df64131c1bd, 0x758094a186ec5822, 0x4464ee12e459f3c2, 0x6c11fce4cb133282 },
-        { 0xf17b483568673205, 0x387deae83caad96c, 0x61b471fd56ffe386, 0x31741195b745a599 }
+        { -0x7d2d258ab9863be8, -0x19c428274d9e7210, 0x355eef24ac47eb0a, 0x2078684c4833c6b4 },
+        { 0x3b48cf217a78820c, -0x0895f54d7ed8c169, -0x56939a5873711285, 0x7411a6054f8a433f }
+    },
+    {
+        { 0x579ae53d18b175b4, 0x68713159f392a102, -0x7baa1345e110ca0b, 0x1ec9a872458c398f },
+        { 0x4d659d32b99dc86d, 0x044cdc75603af115, -0x4cb38ed3233d1b78, 0x7c136574fb8134ff },
+        { -0x47195b2bff5daf65, -0x647e28fdf4377d4c, 0x57e7cc9bf1957561, 0x3add88a5c7cd6460 }
+    },
+    {
+        { -0x7a3d672ba6c6cfba, -0x7081ca67a009a614, 0x1d2ca22af2f66e3a, 0x61ba1131a406a720 },
+        { -0x5476a88f49ca230e, 0x02dfef6cf66c1fbc, -0x7aacfd9741492e79, 0x249929fccc879e74 },
+        { -0x5c2f5f0ee96a6fd7, 0x023b6b6cba7ebd89, 0x7bf15a3e26783307, 0x5620310cbbd8ece7 }
+    },
+    {
+        { 0x6646b5f477e285d6, 0x40e8ff676c8f6193, -0x59138cee544a6b23, 0x7ec846f3658cec4d },
+        { 0x528993434934d643, -0x462407f95aeddd0b, -0x709278703c0be3de, 0x37676a2a4d9d9730 },
+        { -0x64a170c0e25dd139, 0x130f1d776c01cd13, 0x214c8fcfa2989fb8, 0x6daaf723399b9dd5 }
+    },
+},
+{
+    {
+        { -0x7e514422d32ecf90, -0x69d1bcda07a5f162, -0x216c6e5535200135, 0x53177fda52c230e6 },
+        { 0x591e4a5610628564, 0x2a4bb87ca8b4df34, -0x21d5da8d185c71bd, 0x3cbdabd9fee5046e },
+        { -0x584368f9af462187, 0x3d12a7fbc301b59b, 0x02652e68d36ae38c, 0x79d739835a6199dc }
+    },
+    {
+        { 0x21c9d9920d591737, -0x6415be2d164b932a, -0x1df17bdff2764036, 0x79d99f946eae5ff8 },
+        { -0x26cab209bece3e43, 0x758094a186ec5822, 0x4464ee12e459f3c2, 0x6c11fce4cb133282 },
+        { -0x0e84b7ca9798cdfb, 0x387deae83caad96c, 0x61b471fd56ffe386, 0x31741195b745a599 }
     },
     {
         { 0x17f8ba683b02a047, 0x50212096feefb6c8, 0x70139be21556cbe2, 0x203e44a11d98915b },
-        { 0xe8d10190b77a360b, 0x99b983209995e702, 0xbd4fdff8fa0247aa, 0x2772e344e0d36a87 },
-        { 0xd6863eba37b9e39f, 0x105bc169723b5a23, 0x104f6459a65c0762, 0x567951295b4d38d4 }
+        { -0x172efe6f4885c9f5, -0x66467cdf666a18fe, -0x42b0200705fdb856, 0x2772e344e0d36a87 },
+        { -0x2979c145c8461c61, 0x105bc169723b5a23, 0x104f6459a65c0762, 0x567951295b4d38d4 }
     },
     {
         { 0x07242eb30d4b497f, 0x1ef96306b9bccc87, 0x37950934d8116f45, 0x05468d6201405b04 },
-        { 0x535fd60613037524, 0xe210adf6b0fbc26a, 0xac8d0a9b23e990ae, 0x47204d08d72fdbf9 },
-        { 0x00f565a9f93267de, 0xcecfd78dc0d58e8a, 0xa215e2dcf318e28e, 0x4599ee919b633352 }
-    },
-    {
-        { 0xac746d6b861ae579, 0x31ab0650f6aea9dc, 0x241d661140256d4c, 0x2f485e853d21a5de },
-        { 0xd3c220ca70e0e76b, 0xb12bea58ea9f3094, 0x294ddec8c3271282, 0x0c3539e1a1d1d028 },
+        { 0x535fd60613037524, -0x1def52094f043d96, -0x5372f564dc166f52, 0x47204d08d72fdbf9 },
+        { 0x00f565a9f93267de, -0x313028723f2a7176, -0x5dea1d230ce71d72, 0x4599ee919b633352 }
+    },
+    {
+        { -0x538b929479e51a87, 0x31ab0650f6aea9dc, 0x241d661140256d4c, 0x2f485e853d21a5de },
+        { -0x2c3ddf358f1f1895, -0x4ed415a71560cf6c, 0x294ddec8c3271282, 0x0c3539e1a1d1d028 },
         { 0x329744839c0833f3, 0x6fe6257fd2abc484, 0x5327d1814b358817, 0x65712585893fe9bc }
     },
     {
-        { 0x81c29f1bd708ee3f, 0xddcb5a05ae6407d0, 0x97aec1d7d2a3eba7, 0x1590521a91d50831 },
-        { 0x9c102fb732a61161, 0xe48e10dd34d520a8, 0x365c63546f9a9176, 0x32f6fe4c046f6006 },
-        { 0x40a3a11ec7910acc, 0x9013dff8f16d27ae, 0x1a9720d8abb195d4, 0x1bb9fe452ea98463 }
-    },
-    {
-        { 0xcf5e6c95cc36747c, 0x294201536b0bc30d, 0x453ac67cee797af0, 0x5eae6ab32a8bb3c9 },
-        { 0xe9d1d950b3d54f9e, 0x2d5f9cbee00d33c1, 0x51c2c656a04fc6ac, 0x65c091ee3c1cbcc9 },
+        { -0x7e3d60e428f711c1, -0x2234a5fa519bf830, -0x68513e282d5c1459, 0x1590521a91d50831 },
+        { -0x63efd048cd59ee9f, -0x1b71ef22cb2adf58, 0x365c63546f9a9176, 0x32f6fe4c046f6006 },
+        { 0x40a3a11ec7910acc, -0x6fec20070e92d852, 0x1a9720d8abb195d4, 0x1bb9fe452ea98463 }
+    },
+    {
+        { -0x30a1936a33c98b84, 0x294201536b0bc30d, 0x453ac67cee797af0, 0x5eae6ab32a8bb3c9 },
+        { -0x162e26af4c2ab062, 0x2d5f9cbee00d33c1, 0x51c2c656a04fc6ac, 0x65c091ee3c1cbcc9 },
         { 0x7083661114f118ea, 0x2b37b87b94349cad, 0x7273f51cb4e99f40, 0x78a2a95823d75698 }
     },
     {
-        { 0xb4f23c425ef83207, 0xabf894d3c9a934b5, 0xd0708c1339fd87f7, 0x1876789117166130 },
-        { 0xa2b072e95c8c2ace, 0x69cffc96651e9c4b, 0x44328ef842e7b42b, 0x5dd996c122aadeb3 },
-        { 0x925b5ef0670c507c, 0x819bc842b93c33bf, 0x10792e9a70dd003f, 0x59ad4b7a6e28dc74 }
+        { -0x4b0dc3bda107cdf9, -0x54076b2c3656cb4b, -0x2f8f73ecc6027809, 0x1876789117166130 },
+        { -0x5d4f8d16a373d532, 0x69cffc96651e9c4b, 0x44328ef842e7b42b, 0x5dd996c122aadeb3 },
+        { -0x6da4a10f98f3af84, -0x7e6437bd46c3cc41, 0x10792e9a70dd003f, 0x59ad4b7a6e28dc74 }
     },
 },
 {
     {
         { 0x583b04bfacad8ea2, 0x29b743e8148be884, 0x2b1e583b0810c5db, 0x2b5449e58eb3bbaa },
-        { 0x5f3a7562eb3dbe47, 0xf7ea38548ebda0b8, 0x00c3e53145747299, 0x1304e9e71627d551 },
-        { 0x789814d26adc9cfe, 0x3c1bab3f8b48dd0b, 0xda0fe1fff979c60a, 0x4468de2d7c2dd693 }
-    },
-    {
-        { 0x4b9ad8c6f86307ce, 0x21113531435d0c28, 0xd4a866c5657a772c, 0x5da6427e63247352 },
-        { 0x51bb355e9419469e, 0x33e6dc4c23ddc754, 0x93a5b6d6447f9962, 0x6cce7c6ffb44bd63 },
-        { 0x1a94c688deac22ca, 0xb9066ef7bbae1ff8, 0x88ad8c388d59580f, 0x58f29abfe79f2ca8 }
-    },
-    {
-        { 0x4b5a64bf710ecdf6, 0xb14ce538462c293c, 0x3643d056d50b3ab9, 0x6af93724185b4870 },
-        { 0xe90ecfab8de73e68, 0x54036f9f377e76a5, 0xf0495b0bbe015982, 0x577629c4a7f41e36 },
-        { 0x3220024509c6a888, 0xd2e036134b558973, 0x83e236233c33289f, 0x701f25bb0caec18f }
-    },
-    {
-        { 0x9d18f6d97cbec113, 0x844a06e674bfdbe4, 0x20f5b522ac4e60d6, 0x720a5bc050955e51 },
-        { 0xc3a8b0f8e4616ced, 0xf700660e9e25a87d, 0x61e3061ff4bca59c, 0x2e0c92bfbdc40be9 },
-        { 0x0c3f09439b805a35, 0xe84e8b376242abfc, 0x691417f35c229346, 0x0e9b9cbb144ef0ec }
-    },
-    {
-        { 0x8dee9bd55db1beee, 0xc9c3ab370a723fb9, 0x44a8f1bf1c68d791, 0x366d44191cfd3cde },
-        { 0xfbbad48ffb5720ad, 0xee81916bdbf90d0e, 0xd4813152635543bf, 0x221104eb3f337bd8 },
-        { 0x9e3c1743f2bc8c14, 0x2eda26fcb5856c3b, 0xccb82f0e68a7fb97, 0x4167a4e6bc593244 }
-    },
-    {
-        { 0xc2be2665f8ce8fee, 0xe967ff14e880d62c, 0xf12e6e7e2f364eee, 0x34b33370cb7ed2f6 },
+        { 0x5f3a7562eb3dbe47, -0x0815c7ab71425f48, 0x00c3e53145747299, 0x1304e9e71627d551 },
+        { 0x789814d26adc9cfe, 0x3c1bab3f8b48dd0b, -0x25f01e00068639f6, 0x4468de2d7c2dd693 }
+    },
+    {
+        { 0x4b9ad8c6f86307ce, 0x21113531435d0c28, -0x2b57993a9a8588d4, 0x5da6427e63247352 },
+        { 0x51bb355e9419469e, 0x33e6dc4c23ddc754, -0x6c5a4929bb80669e, 0x6cce7c6ffb44bd63 },
+        { 0x1a94c688deac22ca, -0x46f991084451e008, -0x775273c772a6a7f1, 0x58f29abfe79f2ca8 }
+    },
+    {
+        { 0x4b5a64bf710ecdf6, -0x4eb31ac7b9d3d6c4, 0x3643d056d50b3ab9, 0x6af93724185b4870 },
+        { -0x16f130547218c198, 0x54036f9f377e76a5, -0x0fb6a4f441fea67e, 0x577629c4a7f41e36 },
+        { 0x3220024509c6a888, -0x2d1fc9ecb4aa768d, -0x7c1dc9dcc3ccd761, 0x701f25bb0caec18f }
+    },
+    {
+        { -0x62e7092683413eed, -0x7bb5f9198b40241c, 0x20f5b522ac4e60d6, 0x720a5bc050955e51 },
+        { -0x3c574f071b9e9313, -0x08ff99f161da5783, 0x61e3061ff4bca59c, 0x2e0c92bfbdc40be9 },
+        { 0x0c3f09439b805a35, -0x17b174c89dbd5404, 0x691417f35c229346, 0x0e9b9cbb144ef0ec }
+    },
+    {
+        { -0x7211642aa24e4112, -0x363c54c8f58dc047, 0x44a8f1bf1c68d791, 0x366d44191cfd3cde },
+        { -0x04452b7004a8df53, -0x117e6e942406f2f2, -0x2b7ecead9caabc41, 0x221104eb3f337bd8 },
+        { -0x61c3e8bc0d4373ec, 0x2eda26fcb5856c3b, -0x3347d0f197580469, 0x4167a4e6bc593244 }
+    },
+    {
+        { -0x3d41d99a07317012, -0x169800eb177f29d4, -0x0ed19181d0c9b112, 0x34b33370cb7ed2f6 },
         { 0x643b9d2876f62700, 0x5d1d9d400e7668eb, 0x1b4b430321fc0684, 0x7938bb7e2255246a },
-        { 0xcdc591ee8681d6cc, 0xce02109ced85a753, 0xed7485c158808883, 0x1176fc6e2dfe65e4 }
-    },
-    {
-        { 0xdb90e28949770eb8, 0x98fbcc2aacf440a3, 0x21354ffeded7879b, 0x1f6a3e54f26906b6 },
-        { 0xb4af6cd05b9c619b, 0x2ddfc9f4b2a58480, 0x3d4fa502ebe94dc4, 0x08fc3a4c677d5f34 },
-        { 0x60a4c199d30734ea, 0x40c085b631165cd6, 0xe2333e23f7598295, 0x4f2fad0116b900d1 }
-    },
-    {
-        { 0x962cd91db73bb638, 0xe60577aafc129c08, 0x6f619b39f3b61689, 0x3451995f2944ee81 },
-        { 0x44beb24194ae4e54, 0x5f541c511857ef6c, 0xa61e6b2d368d0498, 0x445484a4972ef7ab },
-        { 0x9152fcd09fea7d7c, 0x4a816c94b0935cf6, 0x258e9aaa47285c40, 0x10b89ca6042893b7 }
-    },
-},
-{
-    {
-        { 0xd67cded679d34aa0, 0xcc0b9ec0cc4db39f, 0xa535a456e35d190f, 0x2e05d9eaf61f6fef },
-        { 0x9b2a426e3b646025, 0x32127190385ce4cf, 0xa25cffc2dd6dea45, 0x06409010bea8de75 },
-        { 0xc447901ad61beb59, 0x661f19bce5dc880a, 0x24685482b7ca6827, 0x293c778cefe07f26 }
-    },
-    {
-        { 0x16c795d6a11ff200, 0xcb70d0e2b15815c9, 0x89f293209b5395b5, 0x50b8c2d031e47b4f },
-        { 0x86809e7007069096, 0xaad75b15e4e50189, 0x07f35715a21a0147, 0x0487f3f112815d5e },
+        { -0x323a6e11797e2934, -0x31fdef63127a58ad, -0x128b7a3ea77f777d, 0x1176fc6e2dfe65e4 }
+    },
+    {
+        { -0x246f1d76b688f148, -0x670433d5530bbf5d, 0x21354ffeded7879b, 0x1f6a3e54f26906b6 },
+        { -0x4b50932fa4639e65, 0x2ddfc9f4b2a58480, 0x3d4fa502ebe94dc4, 0x08fc3a4c677d5f34 },
+        { 0x60a4c199d30734ea, 0x40c085b631165cd6, -0x1dccc1dc08a67d6b, 0x4f2fad0116b900d1 }
+    },
+    {
+        { -0x69d326e248c449c8, -0x19fa885503ed63f8, 0x6f619b39f3b61689, 0x3451995f2944ee81 },
+        { 0x44beb24194ae4e54, 0x5f541c511857ef6c, -0x59e194d2c972fb68, 0x445484a4972ef7ab },
+        { -0x6ead032f60158284, 0x4a816c94b0935cf6, 0x258e9aaa47285c40, 0x10b89ca6042893b7 }
+    },
+},
+{
+    {
+        { -0x29832129862cb560, -0x33f4613f33b24c61, -0x5aca5ba91ca2e6f1, 0x2e05d9eaf61f6fef },
+        { -0x64d5bd91c49b9fdb, 0x32127190385ce4cf, -0x5da3003d229215bb, 0x06409010bea8de75 },
+        { -0x3bb86fe529e414a7, 0x661f19bce5dc880a, 0x24685482b7ca6827, 0x293c778cefe07f26 }
+    },
+    {
+        { 0x16c795d6a11ff200, -0x348f2f1d4ea7ea37, -0x760d6cdf64ac6a4b, 0x50b8c2d031e47b4f },
+        { -0x797f618ff8f96f6a, -0x5528a4ea1b1afe77, 0x07f35715a21a0147, 0x0487f3f112815d5e },
         { 0x48350c08068a4962, 0x6ffdd05351092c9a, 0x17af4f4aaf6fc8dd, 0x4b0553b53cdba58b }
     },
     {
-        { 0xbf05211b27c152d4, 0x5ec26849bd1af639, 0x5e0b2caa8e6fab98, 0x054c8bdd50bd0840 },
-        { 0x9c65fcbe1b32ff79, 0xeb75ea9f03b50f9b, 0xfced2a6c6c07e606, 0x35106cd551717908 },
-        { 0x38a0b12f1dcf073d, 0x4b60a8a3b7f6a276, 0xfed5ac25d3404f9a, 0x72e82d5e5505c229 }
+        { -0x40fadee4d83ead2c, 0x5ec26849bd1af639, 0x5e0b2caa8e6fab98, 0x054c8bdd50bd0840 },
+        { -0x639a0341e4cd0087, -0x148a1560fc4af065, -0x0312d59393f819fa, 0x35106cd551717908 },
+        { 0x38a0b12f1dcf073d, 0x4b60a8a3b7f6a276, -0x012a53da2cbfb066, 0x72e82d5e5505c229 }
     },
     {
         { 0x00d9cdfd69771d02, 0x410276cd6cfbf17e, 0x4c45306c1cb12ec7, 0x2857bf1627500861 },
-        { 0x6b0b697ff0d844c8, 0xbb12f85cd979cb49, 0xd2a541c6c1da0f1f, 0x7b7c242958ce7211 },
-        { 0x9f21903f0101689e, 0xd779dfd3bf861005, 0xa122ee5f3deb0f1b, 0x510df84b485a00d4 }
-    },
-    {
-        { 0x24b3c887c70ac15e, 0xb0f3a557fb81b732, 0x9b2cde2fe578cc1b, 0x4cf7ed0703b54f8e },
-        { 0xa54133bb9277a1fa, 0x74ec3b6263991237, 0x1a3c54dc35d2f15a, 0x2d347144e482ba3a },
-        { 0x6bd47c6598fbee0f, 0x9e4733e2ab55be2d, 0x1093f624127610c5, 0x4e05e26ad0a1eaa4 }
-    },
-    {
-        { 0x1833c773e18fe6c0, 0xe3c4711ad3c87265, 0x3bfd3c4f0116b283, 0x1955875eb4cd4db8 },
-        { 0xda9b6b624b531f20, 0x429a760e77509abb, 0xdbe9f522e823cb80, 0x618f1856880c8f82 },
-        { 0x6da6de8f0e399799, 0x7ad61aa440fda178, 0xb32cd8105e3563dd, 0x15f6beae2ae340ae }
-    },
-    {
-        { 0xba9a0f7b9245e215, 0xf368612dd98c0dbb, 0x2e84e4cbf220b020, 0x6ba92fe962d90eda },
-        { 0x862bcb0c31ec3a62, 0x810e2b451138f3c2, 0x788ec4b839dac2a4, 0x28f76867ae2a9281 },
-        { 0x3e4df9655884e2aa, 0xbd62fbdbdbd465a5, 0xd7596caa0de9e524, 0x6e8042ccb2b1b3d7 }
+        { 0x6b0b697ff0d844c8, -0x44ed07a3268634b7, -0x2d5abe393e25f0e1, 0x7b7c242958ce7211 },
+        { -0x60de6fc0fefe9762, -0x2886202c4079effb, -0x5edd11a0c214f0e5, 0x510df84b485a00d4 }
+    },
+    {
+        { 0x24b3c887c70ac15e, -0x4f0c5aa8047e48ce, -0x64d321d01a8733e5, 0x4cf7ed0703b54f8e },
+        { -0x5abecc446d885e06, 0x74ec3b6263991237, 0x1a3c54dc35d2f15a, 0x2d347144e482ba3a },
+        { 0x6bd47c6598fbee0f, -0x61b8cc1d54aa41d3, 0x1093f624127610c5, 0x4e05e26ad0a1eaa4 }
+    },
+    {
+        { 0x1833c773e18fe6c0, -0x1c3b8ee52c378d9b, 0x3bfd3c4f0116b283, 0x1955875eb4cd4db8 },
+        { -0x2564949db4ace0e0, 0x429a760e77509abb, -0x24160add17dc3480, 0x618f1856880c8f82 },
+        { 0x6da6de8f0e399799, 0x7ad61aa440fda178, -0x4cd327efa1ca9c23, 0x15f6beae2ae340ae }
+    },
+    {
+        { -0x4565f0846dba1deb, -0x0c979ed22673f245, 0x2e84e4cbf220b020, 0x6ba92fe962d90eda },
+        { -0x79d434f3ce13c59e, -0x7ef1d4baeec70c3e, 0x788ec4b839dac2a4, 0x28f76867ae2a9281 },
+        { 0x3e4df9655884e2aa, -0x429d0424242b9a5b, -0x28a69355f2161adc, 0x6e8042ccb2b1b3d7 }
     },
     {
         { 0x1530653616521f7e, 0x660d06b896203dba, 0x2d3989bc545f0879, 0x4b5303af78ebd7b0 },
-        { 0xf10d3c29ce28ca6e, 0xbad34540fcb6093d, 0xe7426ed7a2ea2d3f, 0x08af9d4e4ff298b9 },
+        { -0x0ef2c3d631d73592, -0x452cbabf0349f6c3, -0x18bd91285d15d2c1, 0x08af9d4e4ff298b9 },
         { 0x72f8a6c3bebcbde8, 0x4f0fca4adc3a8e89, 0x6fa9d4e8c7bfdf7a, 0x0dcf2d679b624eb7 }
     },
 },
 {
     {
-        { 0x753941be5a45f06e, 0xd07caeed6d9c5f65, 0x11776b9c72ff51b6, 0x17d2d1d9ef0d4da9 },
+        { 0x753941be5a45f06e, -0x2f8351129263a09b, 0x11776b9c72ff51b6, 0x17d2d1d9ef0d4da9 },
         { 0x3d5947499718289c, 0x12ebf8c524533f26, 0x0262bfcb14c3ef15, 0x20b878d577b7518e },
-        { 0x27f2af18073f3e6a, 0xfd3fe519d7521069, 0x22e3b72c3ca60022, 0x72214f63cc65c6a7 }
-    },
-    {
-        { 0x1d9db7b9f43b29c9, 0xd605824a4f518f75, 0xf2c072bd312f9dc4, 0x1f24ac855a1545b0 },
-        { 0xb4e37f405307a693, 0xaba714d72f336795, 0xd6fbd0a773761099, 0x5fdf48c58171cbc9 },
-        { 0x24d608328e9505aa, 0x4748c1d10c1420ee, 0xc7ffe45c06fb25a2, 0x00ba739e2ae395e6 }
-    },
-    {
-        { 0xae4426f5ea88bb26, 0x360679d984973bfb, 0x5c9f030c26694e50, 0x72297de7d518d226 },
-        { 0x592e98de5c8790d6, 0xe5bfb7d345c2a2df, 0x115a3b60f9b49922, 0x03283a3e67ad78f3 },
-        { 0x48241dc7be0cb939, 0x32f19b4d8b633080, 0xd3dfc90d02289308, 0x05e1296846271945 }
-    },
-    {
-        { 0xadbfbbc8242c4550, 0xbcc80cecd03081d9, 0x843566a6f5c8df92, 0x78cf25d38258ce4c },
-        { 0xba82eeb32d9c495a, 0xceefc8fcf12bb97c, 0xb02dabae93b5d1e0, 0x39c00c9c13698d9b },
-        { 0x15ae6b8e31489d68, 0xaa851cab9c2bf087, 0xc9a75a97f04efa05, 0x006b52076b3ff832 }
-    },
-    {
-        { 0xf5cb7e16b9ce082d, 0x3407f14c417abc29, 0xd4b36bce2bf4a7ab, 0x7de2e9561a9f75ce },
-        { 0x29e0cfe19d95781c, 0xb681df18966310e2, 0x57df39d370516b39, 0x4d57e3443bc76122 },
-        { 0xde70d4f4b6a55ecb, 0x4801527f5d85db99, 0xdbc9c440d3ee9a81, 0x6b2a90af1a6029ed }
-    },
-    {
-        { 0x77ebf3245bb2d80a, 0xd8301b472fb9079b, 0xc647e6f24cee7333, 0x465812c8276c2109 },
-        { 0x6923f4fc9ae61e97, 0x5735281de03f5fd1, 0xa764ae43e6edd12d, 0x5fd8f4e9d12d3e4a },
+        { 0x27f2af18073f3e6a, -0x02c01ae628adef97, 0x22e3b72c3ca60022, 0x72214f63cc65c6a7 }
+    },
+    {
+        { 0x1d9db7b9f43b29c9, -0x29fa7db5b0ae708b, -0x0d3f8d42ced0623c, 0x1f24ac855a1545b0 },
+        { -0x4b1c80bfacf8596d, -0x5458eb28d0cc986b, -0x29042f588c89ef67, 0x5fdf48c58171cbc9 },
+        { 0x24d608328e9505aa, 0x4748c1d10c1420ee, -0x38001ba3f904da5e, 0x00ba739e2ae395e6 }
+    },
+    {
+        { -0x51bbd90a157744da, 0x360679d984973bfb, 0x5c9f030c26694e50, 0x72297de7d518d226 },
+        { 0x592e98de5c8790d6, -0x1a40482cba3d5d21, 0x115a3b60f9b49922, 0x03283a3e67ad78f3 },
+        { 0x48241dc7be0cb939, 0x32f19b4d8b633080, -0x2c2036f2fdd76cf8, 0x05e1296846271945 }
+    },
+    {
+        { -0x52404437dbd3bab0, -0x4337f3132fcf7e27, -0x7bca99590a37206e, 0x78cf25d38258ce4c },
+        { -0x457d114cd263b6a6, -0x311037030ed44684, -0x4fd254516c4a2e20, 0x39c00c9c13698d9b },
+        { 0x15ae6b8e31489d68, -0x557ae35463d40f79, -0x3658a5680fb105fb, 0x006b52076b3ff832 }
+    },
+    {
+        { -0x0a3481e94631f7d3, 0x3407f14c417abc29, -0x2b4c9431d40b5855, 0x7de2e9561a9f75ce },
+        { 0x29e0cfe19d95781c, -0x497e20e7699cef1e, 0x57df39d370516b39, 0x4d57e3443bc76122 },
+        { -0x218f2b0b495aa135, 0x4801527f5d85db99, -0x24363bbf2c11657f, 0x6b2a90af1a6029ed }
+    },
+    {
+        { 0x77ebf3245bb2d80a, -0x27cfe4b8d046f865, -0x39b8190db3118ccd, 0x465812c8276c2109 },
+        { 0x6923f4fc9ae61e97, 0x5735281de03f5fd1, -0x589b51bc19122ed3, 0x5fd8f4e9d12d3e4a },
         { 0x4d43beb22a1062d9, 0x7065fb753831dc16, 0x180d4a7bde2968d7, 0x05b32c2b1cb16790 }
     },
     {
-        { 0xf7fca42c7ad58195, 0x3214286e4333f3cc, 0xb6c29d0d340b979d, 0x31771a48567307e1 },
-        { 0xc8c05eccd24da8fd, 0xa1cf1aac05dfef83, 0xdbbeeff27df9cd61, 0x3b5556a37b471e99 },
-        { 0x32b0c524e14dd482, 0xedb351541a2ba4b6, 0xa3d16048282b5af3, 0x4fc079d27a7336eb }
-    },
-    {
-        { 0xdc348b440c86c50d, 0x1337cbc9cc94e651, 0x6422f74d643e3cb9, 0x241170c2bae3cd08 },
-        { 0x51c938b089bf2f7f, 0x2497bd6502dfe9a7, 0xffffc09c7880e453, 0x124567cecaf98e92 },
-        { 0x3ff9ab860ac473b4, 0xf0911dee0113e435, 0x4ae75060ebc6c4af, 0x3f8612966c87000d }
-    },
-},
-{
-    {
-        { 0x529fdffe638c7bf3, 0xdf2b9e60388b4995, 0xe027b34f1bad0249, 0x7bc92fc9b9fa74ed },
-        { 0x0c9c5303f7957be4, 0xa3c31a20e085c145, 0xb0721d71d0850050, 0x0aba390eab0bf2da },
-        { 0x9f97ef2e801ad9f9, 0x83697d5479afda3a, 0xe906b3ffbd596b50, 0x02672b37dd3fb8e0 }
-    },
-    {
-        { 0xee9ba729398ca7f5, 0xeb9ca6257a4849db, 0x29eb29ce7ec544e1, 0x232ca21ef736e2c8 },
-        { 0x48b2ca8b260885e4, 0xa4286bec82b34c1c, 0x937e1a2617f58f74, 0x741d1fcbab2ca2a5 },
-        { 0xbf61423d253fcb17, 0x08803ceafa39eb14, 0xf18602df9851c7af, 0x0400f3a049e3414b }
-    },
-    {
-        { 0x2efba412a06e7b06, 0x146785452c8d2560, 0xdf9713ebd67a91c7, 0x32830ac7157eadf3 },
-        { 0xabce0476ba61c55b, 0x36a3d6d7c4d39716, 0x6eb259d5e8d82d09, 0x0c9176e984d756fb },
+        { -0x08035bd3852a7e6b, 0x3214286e4333f3cc, -0x493d62f2cbf46863, 0x31771a48567307e1 },
+        { -0x373fa1332db25703, -0x5e30e553fa20107d, -0x2441100d8206329f, 0x3b5556a37b471e99 },
+        { 0x32b0c524e14dd482, -0x124caeabe5d45b4a, -0x5c2e9fb7d7d4a50d, 0x4fc079d27a7336eb }
+    },
+    {
+        { -0x23cb74bbf3793af3, 0x1337cbc9cc94e651, 0x6422f74d643e3cb9, 0x241170c2bae3cd08 },
+        { 0x51c938b089bf2f7f, 0x2497bd6502dfe9a7, -0x00003f63877f1bad, 0x124567cecaf98e92 },
+        { 0x3ff9ab860ac473b4, -0x0f6ee211feec1bcb, 0x4ae75060ebc6c4af, 0x3f8612966c87000d }
+    },
+},
+{
+    {
+        { 0x529fdffe638c7bf3, -0x20d4619fc774b66b, -0x1fd84cb0e452fdb7, 0x7bc92fc9b9fa74ed },
+        { 0x0c9c5303f7957be4, -0x5c3ce5df1f7a3ebb, -0x4f8de28e2f7affb0, 0x0aba390eab0bf2da },
+        { -0x606810d17fe52607, -0x7c9682ab865025c6, -0x16f94c0042a694b0, 0x02672b37dd3fb8e0 }
+    },
+    {
+        { -0x116458d6c673580b, -0x146359da85b7b625, 0x29eb29ce7ec544e1, 0x232ca21ef736e2c8 },
+        { 0x48b2ca8b260885e4, -0x5bd794137d4cb3e4, -0x6c81e5d9e80a708c, 0x741d1fcbab2ca2a5 },
+        { -0x409ebdc2dac034e9, 0x08803ceafa39eb14, -0x0e79fd2067ae3851, 0x0400f3a049e3414b }
+    },
+    {
+        { 0x2efba412a06e7b06, 0x146785452c8d2560, -0x2068ec1429856e39, 0x32830ac7157eadf3 },
+        { -0x5431fb89459e3aa5, 0x36a3d6d7c4d39716, 0x6eb259d5e8d82d09, 0x0c9176e984d756fb },
         { 0x0e782a7ab73769e8, 0x04a05d7875b18e2c, 0x29525226ebcceae1, 0x0d794f8383eba820 }
     },
     {
         { 0x7be44ce7a7a2e1ac, 0x411fd93efad1b8b7, 0x1734a1d70d5f7c9b, 0x0d6592233127db16 },
-        { 0xff35f5cb9e1516f4, 0xee805bcf648aae45, 0xf0d73c2bb93a9ef3, 0x097b0bf22092a6c2 },
-        { 0xc48bab1521a9d733, 0xa6c2eaead61abb25, 0x625c6c1cc6cb4305, 0x7fc90fea93eb3a67 }
-    },
-    {
-        { 0xc527deb59c7cb23d, 0x955391695328404e, 0xd64392817ccf2c7a, 0x6ce97dabf7d8fa11 },
+        { -0x00ca0a3461eae90c, -0x117fa4309b7551bb, -0x0f28c3d446c5610d, 0x097b0bf22092a6c2 },
+        { -0x3b7454eade5628cd, -0x593d151529e544db, 0x625c6c1cc6cb4305, 0x7fc90fea93eb3a67 }
+    },
+    {
+        { -0x3ad8214a63834dc3, -0x6aac6e96acd7bfb2, -0x29bc6d7e8330d386, 0x6ce97dabf7d8fa11 },
         { 0x0408f1fe1f5c5926, 0x1a8f2f5e3b258bf4, 0x40a951a2fdc71669, 0x6598ee93c98b577e },
-        { 0x25b5a8e50ef7c48f, 0xeb6034116f2ce532, 0xc5e75173e53de537, 0x73119fa08c12bb03 }
-    },
-    {
-        { 0x7845b94d21f4774d, 0xbf62f16c7897b727, 0x671857c03c56522b, 0x3cd6a85295621212 },
-        { 0xed30129453f1a4cb, 0xbce621c9c8f53787, 0xfacb2b1338bee7b9, 0x3025798a9ea8428c },
-        { 0x3fecde923aeca999, 0xbdaa5b0062e8c12f, 0x67b99dfc96988ade, 0x3f52c02852661036 }
-    },
-    {
-        { 0x9258bf99eec416c6, 0xac8a5017a9d2f671, 0x629549ab16dea4ab, 0x05d0e85c99091569 },
-        { 0xffeaa48e2a1351c6, 0x28624754fa7f53d7, 0x0b5ba9e57582ddf1, 0x60c0104ba696ac59 },
-        { 0x051de020de9cbe97, 0xfa07fc56b50bcf74, 0x378cec9f0f11df65, 0x36853c69ab96de4d }
-    },
-    {
-        { 0x4433c0b0fac5e7be, 0x724bae854c08dcbe, 0xf1f24cc446978f9b, 0x4a0aff6d62825fc8 },
+        { 0x25b5a8e50ef7c48f, -0x149fcbee90d31ace, -0x3a18ae8c1ac21ac9, 0x73119fa08c12bb03 }
+    },
+    {
+        { 0x7845b94d21f4774d, -0x409d0e93876848d9, 0x671857c03c56522b, 0x3cd6a85295621212 },
+        { -0x12cfed6bac0e5b35, -0x4319de36370ac879, -0x0534d4ecc7411847, 0x3025798a9ea8428c },
+        { 0x3fecde923aeca999, -0x4255a4ff9d173ed1, 0x67b99dfc96988ade, 0x3f52c02852661036 }
+    },
+    {
+        { -0x6da74066113be93a, -0x5375afe8562d098f, 0x629549ab16dea4ab, 0x05d0e85c99091569 },
+        { -0x00155b71d5ecae3a, 0x28624754fa7f53d7, 0x0b5ba9e57582ddf1, 0x60c0104ba696ac59 },
+        { 0x051de020de9cbe97, -0x05f803a94af4308c, 0x378cec9f0f11df65, 0x36853c69ab96de4d }
+    },
+    {
+        { 0x4433c0b0fac5e7be, 0x724bae854c08dcbe, -0x0e0db33bb9687065, 0x4a0aff6d62825fc8 },
         { 0x36d9b8de78f39b2d, 0x7f42ed71a847b9ec, 0x241cd1d679bd3fde, 0x6a704fec92fbce6b },
-        { 0xe917fb9e61095301, 0xc102df9402a092f8, 0xbf09e2f5fa66190b, 0x681109bee0dcfe37 }
-    },
-},
-{
-    {
-        { 0x9c18fcfa36048d13, 0x29159db373899ddd, 0xdc9f350b9f92d0aa, 0x26f57eee878a19d4 },
+        { -0x16e804619ef6acff, -0x3efd206bfd5f6d08, -0x40f61d0a0599e6f5, 0x681109bee0dcfe37 }
+    },
+},
+{
+    {
+        { -0x63e70305c9fb72ed, 0x29159db373899ddd, -0x2360caf4606d2f56, 0x26f57eee878a19d4 },
         { 0x559a0cc9782a0dde, 0x551dcdb2ea718385, 0x7f62865b31ef238c, 0x504aa7767973613d },
-        { 0x0cab2cd55687efb1, 0x5180d162247af17b, 0x85c15a344f5a2467, 0x4041943d9dba3069 }
-    },
-    {
-        { 0x4b217743a26caadd, 0x47a6b424648ab7ce, 0xcb1d4f7a03fbc9e3, 0x12d931429800d019 },
-        { 0xc3c0eeba43ebcc96, 0x8d749c9c26ea9caf, 0xd9fa95ee1c77ccc6, 0x1420a1d97684340f },
+        { 0x0cab2cd55687efb1, 0x5180d162247af17b, -0x7a3ea5cbb0a5db99, 0x4041943d9dba3069 }
+    },
+    {
+        { 0x4b217743a26caadd, 0x47a6b424648ab7ce, -0x34e2b085fc04361d, 0x12d931429800d019 },
+        { -0x3c3f1145bc14336a, -0x728b6363d9156351, -0x26056a11e388333a, 0x1420a1d97684340f },
         { 0x00c67799d337594f, 0x5e3c5140b23aa47b, 0x44182854e35ff395, 0x1b4f92314359a012 }
     },
     {
-        { 0x33cf3030a49866b1, 0x251f73d2215f4859, 0xab82aa4051def4f6, 0x5ff191d56f9a23f6 },
+        { 0x33cf3030a49866b1, 0x251f73d2215f4859, -0x547d55bfae210b0a, 0x5ff191d56f9a23f6 },
         { 0x3e5c109d89150951, 0x39cefa912de9696a, 0x20eae43f975f3020, 0x239b572a7f132dae },
-        { 0x819ed433ac2d9068, 0x2883ab795fc98523, 0xef4572805593eb3d, 0x020c526a758f36cb }
-    },
-    {
-        { 0xe931ef59f042cc89, 0x2c589c9d8e124bb6, 0xadc8e18aaec75997, 0x452cfe0a5602c50c },
-        { 0x779834f89ed8dbbc, 0xc8f2aaf9dc7ca46c, 0xa9524cdca3e1b074, 0x02aacc4615313877 },
-        { 0x86a0f7a0647877df, 0xbbc464270e607c9f, 0xab17ea25f1fb11c9, 0x4cfb7d7b304b877b }
-    },
-    {
-        { 0xe28699c29789ef12, 0x2b6ecd71df57190d, 0xc343c857ecc970d0, 0x5b1d4cbc434d3ac5 },
-        { 0x72b43d6cb89b75fe, 0x54c694d99c6adc80, 0xb8c3aa373ee34c9f, 0x14b4622b39075364 },
-        { 0xb6fb2615cc0a9f26, 0x3a4f0e2bb88dcce5, 0x1301498b3369a705, 0x2f98f71258592dd1 }
-    },
-    {
-        { 0x2e12ae444f54a701, 0xfcfe3ef0a9cbd7de, 0xcebf890d75835de0, 0x1d8062e9e7614554 },
-        { 0x0c94a74cb50f9e56, 0x5b1ff4a98e8e1320, 0x9a2acc2182300f67, 0x3a6ae249d806aaf9 },
-        { 0x657ada85a9907c5a, 0x1a0ea8b591b90f62, 0x8d0e1dfbdf34b4e9, 0x298b8ce8aef25ff3 }
-    },
-    {
-        { 0x837a72ea0a2165de, 0x3fab07b40bcf79f6, 0x521636c77738ae70, 0x6ba6271803a7d7dc },
-        { 0x2a927953eff70cb2, 0x4b89c92a79157076, 0x9418457a30a7cf6a, 0x34b8a8404d5ce485 },
-        { 0xc26eecb583693335, 0xd5a813df63b5fefd, 0xa293aa9aa4b22573, 0x71d62bdd465e1c6a }
-    },
-    {
-        { 0xcd2db5dab1f75ef5, 0xd77f95cf16b065f5, 0x14571fea3f49f085, 0x1c333621262b2b3d },
-        { 0x6533cc28d378df80, 0xf6db43790a0fa4b4, 0xe3645ff9f701da5a, 0x74d5f317f3172ba4 },
-        { 0xa86fe55467d9ca81, 0x398b7c752b298c37, 0xda6d0892e3ac623b, 0x4aebcc4547e9d98c }
-    },
-},
-{
-    {
-        { 0x0de9b204a059a445, 0xe15cb4aa4b17ad0f, 0xe1bbec521f79c557, 0x2633f1b9d071081b },
-        { 0x53175a7205d21a77, 0xb0c04422d3b934d4, 0xadd9f24bdd5deadc, 0x074f46e69f10ff8c },
-        { 0xc1fb4177018b9910, 0xa6ea20dc6c0fe140, 0xd661f3e74354c6ff, 0x5ecb72e6f1a3407a }
-    },
-    {
-        { 0xfeeae106e8e86997, 0x9863337f98d09383, 0x9470480eaa06ebef, 0x038b6898d4c5c2d0 },
-        { 0xa515a31b2259fb4e, 0x0960f3972bcac52f, 0xedb52fec8d3454cb, 0x382e2720c476c019 },
-        { 0xf391c51d8ace50a6, 0x3142d0b9ae2d2948, 0xdb4d5a1a7f24ca80, 0x21aeba8b59250ea8 }
+        { -0x7e612bcc53d26f98, 0x2883ab795fc98523, -0x10ba8d7faa6c14c3, 0x020c526a758f36cb }
+    },
+    {
+        { -0x16ce10a60fbd3377, 0x2c589c9d8e124bb6, -0x52371e755138a669, 0x452cfe0a5602c50c },
+        { 0x779834f89ed8dbbc, -0x370d550623835b94, -0x56adb3235c1e4f8c, 0x02aacc4615313877 },
+        { -0x795f085f9b878821, -0x443b9bd8f19f8361, -0x54e815da0e04ee37, 0x4cfb7d7b304b877b }
+    },
+    {
+        { -0x1d79663d687610ee, 0x2b6ecd71df57190d, -0x3cbc37a813368f30, 0x5b1d4cbc434d3ac5 },
+        { 0x72b43d6cb89b75fe, 0x54c694d99c6adc80, -0x473c55c8c11cb361, 0x14b4622b39075364 },
+        { -0x4904d9ea33f560da, 0x3a4f0e2bb88dcce5, 0x1301498b3369a705, 0x2f98f71258592dd1 }
+    },
+    {
+        { 0x2e12ae444f54a701, -0x0301c10f56342822, -0x314076f28a7ca220, 0x1d8062e9e7614554 },
+        { 0x0c94a74cb50f9e56, 0x5b1ff4a98e8e1320, -0x65d533de7dcff099, 0x3a6ae249d806aaf9 },
+        { 0x657ada85a9907c5a, 0x1a0ea8b591b90f62, -0x72f1e20420cb4b17, 0x298b8ce8aef25ff3 }
+    },
+    {
+        { -0x7c858d15f5de9a22, 0x3fab07b40bcf79f6, 0x521636c77738ae70, 0x6ba6271803a7d7dc },
+        { 0x2a927953eff70cb2, 0x4b89c92a79157076, -0x6be7ba85cf583096, 0x34b8a8404d5ce485 },
+        { -0x3d91134a7c96cccb, -0x2a57ec209c4a0103, -0x5d6c55655b4dda8d, 0x71d62bdd465e1c6a }
+    },
+    {
+        { -0x32d24a254e08a10b, -0x28806a30e94f9a0b, 0x14571fea3f49f085, 0x1c333621262b2b3d },
+        { 0x6533cc28d378df80, -0x0924bc86f5f05b4c, -0x1c9ba00608fe25a6, 0x74d5f317f3172ba4 },
+        { -0x57901aab9826357f, 0x398b7c752b298c37, -0x2592f76d1c539dc5, 0x4aebcc4547e9d98c }
+    },
+},
+{
+    {
+        { 0x0de9b204a059a445, -0x1ea34b55b4e852f1, -0x1e4413ade0863aa9, 0x2633f1b9d071081b },
+        { 0x53175a7205d21a77, -0x4f3fbbdd2c46cb2c, -0x52260db422a21524, 0x074f46e69f10ff8c },
+        { -0x3e04be88fe7466f0, -0x5915df2393f01ec0, -0x299e0c18bcab3901, 0x5ecb72e6f1a3407a }
+    },
+    {
+        { -0x01151ef917179669, -0x679ccc80672f6c7d, -0x6b8fb7f155f91411, 0x038b6898d4c5c2d0 },
+        { -0x5aea5ce4dda604b2, 0x0960f3972bcac52f, -0x124ad01372cbab35, 0x382e2720c476c019 },
+        { -0x0c6e3ae27531af5a, 0x3142d0b9ae2d2948, -0x24b2a5e580db3580, 0x21aeba8b59250ea8 }
     },
     {
         { 0x53853600f0087f23, 0x4c461879da7d5784, 0x6af303deb41f6860, 0x0a3c16c5c27c18ed },
         { 0x24f13b34cf405530, 0x3c44ea4a43088af7, 0x5dd5c5170006a482, 0x118eb8f8890b086d },
-        { 0x17e49c17cc947f3d, 0xccc6eda6aac1d27b, 0xdf6092ceb0f08e56, 0x4909b3e22c67c36b }
+        { 0x17e49c17cc947f3d, -0x33391259553e2d85, -0x209f6d314f0f71aa, 0x4909b3e22c67c36b }
     },
     {
         { 0x59a16676706ff64e, 0x10b953dd0d86a53d, 0x5848e1e6ce5c0b96, 0x2d8b78e712780c68 },
-        { 0x9c9c85ea63fe2e89, 0xbe1baf910e9412ec, 0x8f7baa8a86fbfe7b, 0x0fb17f9fef968b6c },
-        { 0x79d5c62eafc3902b, 0x773a215289e80728, 0xc38ae640e10120b9, 0x09ae23717b2b1a6d }
-    },
-    {
-        { 0x10ab8fa1ad32b1d0, 0xe9aced1be2778b24, 0xa8856bc0373de90f, 0x66f35ddddda53996 },
-        { 0xbb6a192a4e4d083c, 0x34ace0630029e192, 0x98245a59aafabaeb, 0x6d9c8a9ada97faac },
-        { 0xd27d9afb24997323, 0x1bb7e07ef6f01d2e, 0x2ba7472df52ecc7f, 0x03019b4f646f9dc8 }
-    },
-    {
-        { 0xaf09b214e6b3dc6b, 0x3f7573b5ad7d2f65, 0xd019d988100a23b0, 0x392b63a58b5c35f7 },
-        { 0x04a186b5565345cd, 0xeee76610bcc4116a, 0x689c73b478fb2a45, 0x387dcbff65697512 },
-        { 0x4093addc9c07c205, 0xc565be15f532c37e, 0x63dbecfd1583402a, 0x61722b4aef2e032e }
-    },
-    {
-        { 0xd6b07a5581cb0e3c, 0x290ff006d9444969, 0x08680b6a16dcda1f, 0x5568d2b75a06de59 },
-        { 0x0012aafeecbd47af, 0x55a266fb1cd46309, 0xf203eb680967c72c, 0x39633944ca3c1429 },
-        { 0x8d0cb88c1b37cfe1, 0x05b6a5a3053818f3, 0xf2e9bc04b787d959, 0x6beba1249add7f64 }
-    },
-    {
-        { 0x5c3cecb943f5a53b, 0x9cc9a61d06c08df2, 0xcfba639a85895447, 0x5a845ae80df09fd5 },
+        { -0x63637a159c01d177, -0x41e4506ef16bed14, -0x7084557579040185, 0x0fb17f9fef968b6c },
+        { 0x79d5c62eafc3902b, 0x773a215289e80728, -0x3c7519bf1efedf47, 0x09ae23717b2b1a6d }
+    },
+    {
+        { 0x10ab8fa1ad32b1d0, -0x165312e41d8874dc, -0x577a943fc8c216f1, 0x66f35ddddda53996 },
+        { -0x4495e6d5b1b2f7c4, 0x34ace0630029e192, -0x67dba5a655054515, 0x6d9c8a9ada97faac },
+        { -0x2d826504db668cdd, 0x1bb7e07ef6f01d2e, 0x2ba7472df52ecc7f, 0x03019b4f646f9dc8 }
+    },
+    {
+        { -0x50f64deb194c2395, 0x3f7573b5ad7d2f65, -0x2fe62677eff5dc50, 0x392b63a58b5c35f7 },
+        { 0x04a186b5565345cd, -0x111899ef433bee96, 0x689c73b478fb2a45, 0x387dcbff65697512 },
+        { 0x4093addc9c07c205, -0x3a9a41ea0acd3c82, 0x63dbecfd1583402a, 0x61722b4aef2e032e }
+    },
+    {
+        { -0x294f85aa7e34f1c4, 0x290ff006d9444969, 0x08680b6a16dcda1f, 0x5568d2b75a06de59 },
+        { 0x0012aafeecbd47af, 0x55a266fb1cd46309, -0x0dfc1497f69838d4, 0x39633944ca3c1429 },
+        { -0x72f34773e4c8301f, 0x05b6a5a3053818f3, -0x0d1643fb487826a7, 0x6beba1249add7f64 }
+    },
+    {
+        { 0x5c3cecb943f5a53b, -0x633659e2f93f720e, -0x30459c657a76abb9, 0x5a845ae80df09fd5 },
         { 0x1d06005ca5b1b143, 0x6d4c6bb87fd1cda2, 0x6ef5967653fcffe7, 0x097c29e8c1ce1ea5 },
-        { 0x4ce97dbe5deb94ca, 0x38d0a4388c709c48, 0xc43eced4a169d097, 0x0a1249fff7e587c3 }
-    },
-},
-{
-    {
-        { 0x0b408d9e7354b610, 0x806b32535ba85b6e, 0xdbe63a034a58a207, 0x173bd9ddc9a1df2c },
-        { 0x12f0071b276d01c9, 0xe7b8bac586c48c70, 0x5308129b71d6fba9, 0x5d88fbf95a3db792 },
-        { 0x2b500f1efe5872df, 0x58d6582ed43918c1, 0xe6ed278ec9673ae0, 0x06e1cd13b19ea319 }
+        { 0x4ce97dbe5deb94ca, 0x38d0a4388c709c48, -0x3bc1312b5e962f69, 0x0a1249fff7e587c3 }
+    },
+},
+{
+    {
+        { 0x0b408d9e7354b610, -0x7f94cdaca457a492, -0x2419c5fcb5a75df9, 0x173bd9ddc9a1df2c },
+        { 0x12f0071b276d01c9, -0x1847453a793b7390, 0x5308129b71d6fba9, 0x5d88fbf95a3db792 },
+        { 0x2b500f1efe5872df, 0x58d6582ed43918c1, -0x1912d8713698c520, 0x06e1cd13b19ea319 }
     },
     {
         { 0x472baf629e5b0353, 0x3baa0b90278d0447, 0x0c785f469643bf27, 0x7f3a6a1a8d837b13 },
         { 0x40d0ad516f166f23, 0x118e32931fab6abe, 0x3fe35e14a04d088e, 0x3080603526e16266 },
-        { 0xf7e644395d3d800b, 0x95a8d555c901edf6, 0x68cd7830592c6339, 0x30d0fded2e51307e }
-    },
-    {
-        { 0x9cb4971e68b84750, 0xa09572296664bbcf, 0x5c8de72672fa412b, 0x4615084351c589d9 },
-        { 0xe0594d1af21233b3, 0x1bdbe78ef0cc4d9c, 0x6965187f8f499a77, 0x0a9214202c099868 },
-        { 0xbc9019c0aeb9a02e, 0x55c7110d16034cae, 0x0e6df501659932ec, 0x3bca0d2895ca5dfe }
-    },
-    {
-        { 0x9c688eb69ecc01bf, 0xf0bc83ada644896f, 0xca2d955f5f7a9fe2, 0x4ea8b4038df28241 },
-        { 0x40f031bc3c5d62a4, 0x19fc8b3ecff07a60, 0x98183da2130fb545, 0x5631deddae8f13cd },
-        { 0x2aed460af1cad202, 0x46305305a48cee83, 0x9121774549f11a5f, 0x24ce0930542ca463 }
-    },
-    {
-        { 0x3fcfa155fdf30b85, 0xd2f7168e36372ea4, 0xb2e064de6492f844, 0x549928a7324f4280 },
-        { 0x1fe890f5fd06c106, 0xb5c468355d8810f2, 0x827808fe6e8caf3e, 0x41d4e3c28a06d74b },
-        { 0xf26e32a763ee1a2e, 0xae91e4b7d25ffdea, 0xbc3bd33bd17f4d69, 0x491b66dec0dcff6a }
-    },
-    {
-        { 0x75f04a8ed0da64a1, 0xed222caf67e2284b, 0x8234a3791f7b7ba4, 0x4cf6b8b0b7018b67 },
-        { 0x98f5b13dc7ea32a7, 0xe3d5f8cc7e16db98, 0xac0abf52cbf8d947, 0x08f338d0c85ee4ac },
-        { 0xc383a821991a73bd, 0xab27bc01df320c7a, 0xc13d331b84777063, 0x530d4a82eb078a99 }
+        { -0x0819bbc6a2c27ff5, -0x6a572aaa36fe120a, 0x68cd7830592c6339, 0x30d0fded2e51307e }
+    },
+    {
+        { -0x634b68e19747b8b0, -0x5f6a8dd6999b4431, 0x5c8de72672fa412b, 0x4615084351c589d9 },
+        { -0x1fa6b2e50dedcc4d, 0x1bdbe78ef0cc4d9c, 0x6965187f8f499a77, 0x0a9214202c099868 },
+        { -0x436fe63f51465fd2, 0x55c7110d16034cae, 0x0e6df501659932ec, 0x3bca0d2895ca5dfe }
+    },
+    {
+        { -0x639771496133fe41, -0x0f437c5259bb7691, -0x35d26aa0a085601e, 0x4ea8b4038df28241 },
+        { 0x40f031bc3c5d62a4, 0x19fc8b3ecff07a60, -0x67e7c25decf04abb, 0x5631deddae8f13cd },
+        { 0x2aed460af1cad202, 0x46305305a48cee83, -0x6ede88bab60ee5a1, 0x24ce0930542ca463 }
+    },
+    {
+        { 0x3fcfa155fdf30b85, -0x2d08e971c9c8d15c, -0x4d1f9b219b6d07bc, 0x549928a7324f4280 },
+        { 0x1fe890f5fd06c106, -0x4a3b97caa277ef0e, -0x7d87f701917350c2, 0x41d4e3c28a06d74b },
+        { -0x0d91cd589c11e5d2, -0x516e1b482da00216, -0x43c42cc42e80b297, 0x491b66dec0dcff6a }
+    },
+    {
+        { 0x75f04a8ed0da64a1, -0x12ddd350981dd7b5, -0x7dcb5c86e084845c, 0x4cf6b8b0b7018b67 },
+        { -0x670a4ec23815cd59, -0x1c2a073381e92468, -0x53f540ad340726b9, 0x08f338d0c85ee4ac },
+        { -0x3c7c57de66e58c43, -0x54d843fe20cdf386, -0x3ec2cce47b888f9d, 0x530d4a82eb078a99 }
     },
     {
         { 0x6d6973456c9abf9e, 0x257fb2fc4900a880, 0x2bacf412c8cfb850, 0x0db3e7e00cbfbd5b },
-        { 0x004c3630e1f94825, 0x7e2d78268cab535a, 0xc7482323cc84ff8b, 0x65ea753f101770b9 },
-        { 0x3d66fc3ee2096363, 0x81d62c7f61b5cb6b, 0x0fbe044213443b1a, 0x02a4ec1921e1a1db }
-    },
-    {
-        { 0xf5c86162f1cf795f, 0x118c861926ee57f2, 0x172124851c063578, 0x36d12b5dec067fcf },
-        { 0x5ce6259a3b24b8a2, 0xb8577acc45afa0b8, 0xcccbe6e88ba07037, 0x3d143c51127809bf },
-        { 0x126d279179154557, 0xd5e48f5cfc783a0a, 0x36bdb6e8df179bac, 0x2ef517885ba82859 }
-    },
-},
-{
-    {
-        { 0x1ea436837c6da1e9, 0xf9c189af1fb9bdbe, 0x303001fcce5dd155, 0x28a7c99ebc57be52 },
-        { 0x88bd438cd11e0d4a, 0x30cb610d43ccf308, 0xe09a0e3791937bcc, 0x4559135b25b1720c },
-        { 0xb8fd9399e8d19e9d, 0x908191cb962423ff, 0xb2b948d747c742a3, 0x37f33226d7fb44c4 }
+        { 0x004c3630e1f94825, 0x7e2d78268cab535a, -0x38b7dcdc337b0075, 0x65ea753f101770b9 },
+        { 0x3d66fc3ee2096363, -0x7e29d3809e4a3495, 0x0fbe044213443b1a, 0x02a4ec1921e1a1db }
+    },
+    {
+        { -0x0a379e9d0e3086a1, 0x118c861926ee57f2, 0x172124851c063578, 0x36d12b5dec067fcf },
+        { 0x5ce6259a3b24b8a2, -0x47a88533ba505f48, -0x33341917745f8fc9, 0x3d143c51127809bf },
+        { 0x126d279179154557, -0x2a1b70a30387c5f6, 0x36bdb6e8df179bac, 0x2ef517885ba82859 }
+    },
+},
+{
+    {
+        { 0x1ea436837c6da1e9, -0x063e7650e0464242, 0x303001fcce5dd155, 0x28a7c99ebc57be52 },
+        { -0x7742bc732ee1f2b6, 0x30cb610d43ccf308, -0x1f65f1c86e6c8434, 0x4559135b25b1720c },
+        { -0x47026c66172e6163, -0x6f7e6e3469dbdc01, -0x4d46b728b838bd5d, 0x37f33226d7fb44c4 }
     },
     {
         { 0x33912553c821b11d, 0x66ed42c241e301df, 0x066fcc11104222fd, 0x307a3b41c192168f },
-        { 0x0dae8767b55f6e08, 0x4a43b3b35b203a02, 0xe3725a6e80af8c79, 0x0f7a7fd1705fa7a3 },
-        { 0x8eeb5d076eb55ce0, 0x2fc536bfaa0d925a, 0xbe81830fdcb6c6e8, 0x556c7045827baf52 }
-    },
-    {
-        { 0xb94b90022bf44406, 0xabd4237eff90b534, 0x7600a960faf86d3a, 0x2f45abdac2322ee3 },
-        { 0x8e2b517302e9d8b7, 0xe3e52269248714e8, 0xbd4fbd774ca960b5, 0x6f4b4199c5ecada9 },
-        { 0x61af4912c8ef8a6a, 0xe58fa4fe43fb6e5e, 0xb5afcc5d6fd427cf, 0x6a5393281e1e11eb }
+        { 0x0dae8767b55f6e08, 0x4a43b3b35b203a02, -0x1c8da5917f507387, 0x0f7a7fd1705fa7a3 },
+        { -0x7114a2f8914aa320, 0x2fc536bfaa0d925a, -0x417e7cf023493918, 0x556c7045827baf52 }
+    },
+    {
+        { -0x46b46ffdd40bbbfa, -0x542bdc81006f4acc, 0x7600a960faf86d3a, 0x2f45abdac2322ee3 },
+        { -0x71d4ae8cfd162749, -0x1c1add96db78eb18, -0x42b04288b3569f4b, 0x6f4b4199c5ecada9 },
+        { 0x61af4912c8ef8a6a, -0x1a705b01bc0491a2, -0x4a5033a2902bd831, 0x6a5393281e1e11eb }
     },
     {
         { 0x0fff04fe149443cf, 0x53cac6d9865cddd7, 0x31385b03531ed1b7, 0x5846a27cacd1039d },
-        { 0xf3da5139a5d1ee89, 0x8145457cff936988, 0x3f622fed00e188c4, 0x0f513815db8b5a3d },
+        { -0x0c25aec65a2e1177, -0x7ebaba83006c9678, 0x3f622fed00e188c4, 0x0f513815db8b5a3d },
         { 0x4ff5cdac1eb08717, 0x67e8b29590f2e9bc, 0x44093b5e237afa99, 0x0d414bed8708b8b2 }
     },
     {
-        { 0x81886a92294ac9e8, 0x23162b45d55547be, 0x94cfbc4403715983, 0x50eb8fdb134bc401 },
-        { 0xcfb68265fd0e75f6, 0xe45b3e28bb90e707, 0x7242a8de9ff92c7a, 0x685b3201933202dd },
-        { 0xc0b73ec6d6b330cd, 0x84e44807132faff1, 0x732b7352c4a5dee1, 0x5d7c7cf1aa7cd2d2 }
-    },
-    {
-        { 0x33d1013e9b73a562, 0x925cef5748ec26e1, 0xa7fce614dd468058, 0x78b0fad41e9aa438 },
-        { 0xaf3b46bf7a4aafa2, 0xb78705ec4d40d411, 0x114f0c6aca7c15e3, 0x3f364faaa9489d4d },
-        { 0xbf56a431ed05b488, 0xa533e66c9c495c7e, 0xe8652baf87f3651a, 0x0241800059d66c33 }
-    },
-    {
-        { 0x28350c7dcf38ea01, 0x7c6cdbc0b2917ab6, 0xace7cfbe857082f7, 0x4d2845aba2d9a1e0 },
-        { 0xceb077fea37a5be4, 0xdb642f02e5a5eeb7, 0xc2e6d0c5471270b8, 0x4771b65538e4529c },
-        { 0xbb537fe0447070de, 0xcba744436dd557df, 0xd3b5a3473600dbcb, 0x4aeabbe6f9ffd7f8 }
-    },
-    {
-        { 0x6a2134bcc4a9c8f2, 0xfbf8fd1c8ace2e37, 0x000ae3049911a0ba, 0x046e3a616bc89b9e },
-        { 0x4630119e40d8f78c, 0xa01a9bc53c710e11, 0x486d2b258910dd79, 0x1e6c47b3db0324e5 },
+        { -0x7e77956dd6b53618, 0x23162b45d55547be, -0x6b3043bbfc8ea67d, 0x50eb8fdb134bc401 },
+        { -0x30497d9a02f18a0a, -0x1ba4c1d7446f18f9, 0x7242a8de9ff92c7a, 0x685b3201933202dd },
+        { -0x3f48c139294ccf33, -0x7b1bb7f8ecd0500f, 0x732b7352c4a5dee1, 0x5d7c7cf1aa7cd2d2 }
+    },
+    {
+        { 0x33d1013e9b73a562, -0x6da310a8b713d91f, -0x580319eb22b97fa8, 0x78b0fad41e9aa438 },
+        { -0x50c4b94085b5505e, -0x4878fa13b2bf2bef, 0x114f0c6aca7c15e3, 0x3f364faaa9489d4d },
+        { -0x40a95bce12fa4b78, -0x5acc199363b6a382, -0x179ad450780c9ae6, 0x0241800059d66c33 }
+    },
+    {
+        { 0x28350c7dcf38ea01, 0x7c6cdbc0b2917ab6, -0x531830417a8f7d09, 0x4d2845aba2d9a1e0 },
+        { -0x314f88015c85a41c, -0x249bd0fd1a5a1149, -0x3d192f3ab8ed8f48, 0x4771b65538e4529c },
+        { -0x44ac801fbb8f8f22, -0x3458bbbc922aa821, -0x2c4a5cb8c9ff2435, 0x4aeabbe6f9ffd7f8 }
+    },
+    {
+        { 0x6a2134bcc4a9c8f2, -0x040702e37531d1c9, 0x000ae3049911a0ba, 0x046e3a616bc89b9e },
+        { 0x4630119e40d8f78c, -0x5fe5643ac38ef1ef, 0x486d2b258910dd79, 0x1e6c47b3db0324e5 },
         { 0x14e65442f03906be, 0x4a019d54e362be2a, 0x68ccdfec8dc230c7, 0x7cfb7e3faf6b861c }
     },
 },
 {
     {
-        { 0x96eebffb305b2f51, 0xd3f938ad889596b8, 0xf0f52dc746d5dd25, 0x57968290bb3a0095 },
-        { 0x4637974e8c58aedc, 0xb9ef22fbabf041a4, 0xe185d956e980718a, 0x2f1b78fab143a8a6 },
-        { 0xf71ab8430a20e101, 0xf393658d24f0ec47, 0xcf7509a86ee2eed1, 0x7dc43e35dc2aa3e1 }
+        { -0x69114004cfa4d0af, -0x2c06c752776a6948, -0x0f0ad238b92a22db, 0x57968290bb3a0095 },
+        { 0x4637974e8c58aedc, -0x4610dd04540fbe5c, -0x1e7a26a9167f8e76, 0x2f1b78fab143a8a6 },
+        { -0x08e547bcf5df1eff, -0x0c6c9a72db0f13b9, -0x308af657911d112f, 0x7dc43e35dc2aa3e1 }
     },
     {
         { 0x5a782a5c273e9718, 0x3576c6995e4efd94, 0x0f2ed8051f237d3e, 0x044fb81d82d50a99 },
-        { 0x85966665887dd9c3, 0xc90f9b314bb05355, 0xc6e08df8ef2079b1, 0x7ef72016758cc12f },
-        { 0xc1df18c5a907e3d9, 0x57b3371dce4c6359, 0xca704534b201bb49, 0x7f79823f9c30dd2e }
+        { -0x7a69999a7782263d, -0x36f064ceb44facab, -0x391f720710df864f, 0x7ef72016758cc12f },
+        { -0x3e20e73a56f81c27, 0x57b3371dce4c6359, -0x358fbacb4dfe44b7, 0x7f79823f9c30dd2e }
     },
     {
         { 0x6a9c1ff068f587ba, 0x0827894e0050c8de, 0x3cbf99557ded5be7, 0x64a9b0431c06d6f0 },
-        { 0x8334d239a3b513e8, 0xc13670d4b91fa8d8, 0x12b54136f590bd33, 0x0a4e0373d784d9b4 },
-        { 0x2eb3d6a15b7d2919, 0xb0b4f6a0d53a8235, 0x7156ce4389a45d47, 0x071a7d0ace18346c }
-    },
-    {
-        { 0xcc0c355220e14431, 0x0d65950709b15141, 0x9af5621b209d5f36, 0x7c69bcf7617755d3 },
-        { 0xd3072daac887ba0b, 0x01262905bfa562ee, 0xcf543002c0ef768b, 0x2c3bcc7146ea7e9c },
-        { 0x07f0d7eb04e8295f, 0x10db18252f50f37d, 0xe951a9a3171798d7, 0x6f5a9a7322aca51d }
-    },
-    {
-        { 0xe729d4eba3d944be, 0x8d9e09408078af9e, 0x4525567a47869c03, 0x02ab9680ee8d3b24 },
-        { 0x8ba1000c2f41c6c5, 0xc49f79c10cfefb9b, 0x4efa47703cc51c9f, 0x494e21a2e147afca },
-        { 0xefa48a85dde50d9a, 0x219a224e0fb9a249, 0xfa091f1dd91ef6d9, 0x6b5d76cbea46bb34 }
-    },
-    {
-        { 0xe0f941171e782522, 0xf1e6ae74036936d3, 0x408b3ea2d0fcc746, 0x16fb869c03dd313e },
-        { 0x8857556cec0cd994, 0x6472dc6f5cd01dba, 0xaf0169148f42b477, 0x0ae333f685277354 },
+        { -0x7ccb2dc65c4aec18, -0x3ec98f2b46e05728, 0x12b54136f590bd33, 0x0a4e0373d784d9b4 },
+        { 0x2eb3d6a15b7d2919, -0x4f4b095f2ac57dcb, 0x7156ce4389a45d47, 0x071a7d0ace18346c }
+    },
+    {
+        { -0x33f3caaddf1ebbcf, 0x0d65950709b15141, -0x650a9de4df62a0ca, 0x7c69bcf7617755d3 },
+        { -0x2cf8d255377845f5, 0x01262905bfa562ee, -0x30abcffd3f108975, 0x2c3bcc7146ea7e9c },
+        { 0x07f0d7eb04e8295f, 0x10db18252f50f37d, -0x16ae565ce8e86729, 0x6f5a9a7322aca51d }
+    },
+    {
+        { -0x18d62b145c26bb42, -0x7261f6bf7f875062, 0x4525567a47869c03, 0x02ab9680ee8d3b24 },
+        { -0x745efff3d0be393b, -0x3b60863ef3010465, 0x4efa47703cc51c9f, 0x494e21a2e147afca },
+        { -0x105b757a221af266, 0x219a224e0fb9a249, -0x05f6e0e226e10927, 0x6b5d76cbea46bb34 }
+    },
+    {
+        { -0x1f06bee8e187dade, -0x0e19518bfc96c92d, 0x408b3ea2d0fcc746, 0x16fb869c03dd313e },
+        { -0x77a8aa9313f3266c, 0x6472dc6f5cd01dba, -0x50fe96eb70bd4b89, 0x0ae333f685277354 },
         { 0x288e199733b60962, 0x24fc72b4d8abe133, 0x4811f7ed0991d03e, 0x3f81e38b8f70d075 }
     },
     {
-        { 0x0adb7f355f17c824, 0x74b923c3d74299a4, 0xd57c3e8bcbf8eaf7, 0x0ad3e2d34cdedc3d },
-        { 0x7f910fcc7ed9affe, 0x545cb8a12465874b, 0xa8397ed24b0c4704, 0x50510fc104f50993 },
-        { 0x6f0c0fc5336e249d, 0x745ede19c331cfd9, 0xf2d6fd0009eefe1c, 0x127c158bf0fa1ebe }
-    },
-    {
-        { 0xdea28fc4ae51b974, 0x1d9973d3744dfe96, 0x6240680b873848a8, 0x4ed82479d167df95 },
-        { 0xf6197c422e9879a2, 0xa44addd452ca3647, 0x9b413fc14b4eaccb, 0x354ef87d07ef4f68 },
-        { 0xfee3b52260c5d975, 0x50352efceb41b0b8, 0x8808ac30a9f6653c, 0x302d92d20539236d }
-    },
-},
-{
-    {
-        { 0x957b8b8b0df53c30, 0x2a1c770a8e60f098, 0xbbc7a670345796de, 0x22a48f9a90c99bc9 },
-        { 0x4c59023fcb3efb7c, 0x6c2fcb99c63c2a94, 0xba4190e2c3c7e084, 0x0e545daea51874d9 },
+        { 0x0adb7f355f17c824, 0x74b923c3d74299a4, -0x2a83c17434071509, 0x0ad3e2d34cdedc3d },
+        { 0x7f910fcc7ed9affe, 0x545cb8a12465874b, -0x57c6812db4f3b8fc, 0x50510fc104f50993 },
+        { 0x6f0c0fc5336e249d, 0x745ede19c331cfd9, -0x0d2902fff61101e4, 0x127c158bf0fa1ebe }
+    },
+    {
+        { -0x215d703b51ae468c, 0x1d9973d3744dfe96, 0x6240680b873848a8, 0x4ed82479d167df95 },
+        { -0x09e683bdd167865e, -0x5bb5222bad35c9b9, -0x64bec03eb4b15335, 0x354ef87d07ef4f68 },
+        { -0x011c4add9f3a268b, 0x50352efceb41b0b8, -0x77f753cf56099ac4, 0x302d92d20539236d }
+    },
+},
+{
+    {
+        { -0x6a847474f20ac3d0, 0x2a1c770a8e60f098, -0x4438598fcba86922, 0x22a48f9a90c99bc9 },
+        { 0x4c59023fcb3efb7c, 0x6c2fcb99c63c2a94, -0x45be6f1d3c381f7c, 0x0e545daea51874d9 },
         { 0x6b7dc0dc8d3fac58, 0x5497cd6ce6e42bfd, 0x542f7d1bf400d305, 0x4159f47f048d9136 }
     },
     {
         { 0x748515a8bbd24839, 0x77128347afb02b55, 0x50ba2ac649a2a17f, 0x060525513ad730f1 },
-        { 0x20ad660839e31e32, 0xf81e1bd58405be50, 0xf8064056f4dabc69, 0x14d23dd4ce71b975 },
-        { 0xf2398e098aa27f82, 0x6d7982bb89a1b024, 0xfa694084214dd24c, 0x71ab966fa32301c3 }
-    },
-    {
-        { 0xb1088a0702809955, 0x43b273ea0b43c391, 0xca9b67aefe0686ed, 0x605eecbf8335f4ed },
-        { 0x2dcbd8e34ded02fc, 0x1151f3ec596f22aa, 0xbca255434e0328da, 0x35768fbe92411b22 },
-        { 0x83200a656c340431, 0x9fcd71678ee59c2f, 0x75d4613f71300f8a, 0x7a912faf60f542f9 }
+        { 0x20ad660839e31e32, -0x07e1e42a7bfa41b0, -0x07f9bfa90b254397, 0x14d23dd4ce71b975 },
+        { -0x0dc671f6755d807e, 0x6d7982bb89a1b024, -0x0596bf7bdeb22db4, 0x71ab966fa32301c3 }
+    },
+    {
+        { -0x4ef775f8fd7f66ab, 0x43b273ea0b43c391, -0x3564985101f97913, 0x605eecbf8335f4ed },
+        { 0x2dcbd8e34ded02fc, 0x1151f3ec596f22aa, -0x435daabcb1fcd726, 0x35768fbe92411b22 },
+        { -0x7cdff59a93cbfbcf, -0x60328e98711a63d1, 0x75d4613f71300f8a, 0x7a912faf60f542f9 }
     },
     {
         { 0x253f4f8dfa2d5597, 0x25e49c405477130c, 0x00c052e5996b1102, 0x33cb966e33bb6c4a },
-        { 0xb204585e5edc1a43, 0x9f0e16ee5897c73c, 0x5b82c0ae4e70483c, 0x624a170e2bddf9be },
-        { 0x597028047f116909, 0x828ac41c1e564467, 0x70417dbde6217387, 0x721627aefbac4384 }
-    },
-    {
-        { 0xfd3097bc410b2f22, 0xf1a05da7b5cfa844, 0x61289a1def57ca74, 0x245ea199bb821902 },
-        { 0x97d03bc38736add5, 0x2f1422afc532b130, 0x3aa68a057101bbc4, 0x4c946cf7e74f9fa7 },
-        { 0xaedca66978d477f8, 0x1898ba3c29117fe1, 0xcf73f983720cbd58, 0x67da12e6b8b56351 }
-    },
-    {
-        { 0x2b7ef3d38ec8308c, 0x828fd7ec71eb94ab, 0x807c3b36c5062abd, 0x0cb64cb831a94141 },
-        { 0x7067e187b4bd6e07, 0x6e8f0203c7d1fe74, 0x93c6aa2f38c85a30, 0x76297d1f3d75a78a },
-        { 0x3030fc33534c6378, 0xb9635c5ce541e861, 0x15d9a9bed9b2c728, 0x49233ea3f3775dcb }
-    },
-    {
-        { 0x7b3985fe1c9f249b, 0x4fd6b2d5a1233293, 0xceb345941adf4d62, 0x6987ff6f542de50c },
-        { 0x629398fa8dbffc3a, 0xe12fe52dd54db455, 0xf3be11dfdaf25295, 0x628b140dce5e7b51 },
-        { 0x47e241428f83753c, 0x6317bebc866af997, 0xdabb5b433d1a9829, 0x074d8d245287fb2d }
+        { -0x4dfba7a1a123e5bd, -0x60f1e911a76838c4, 0x5b82c0ae4e70483c, 0x624a170e2bddf9be },
+        { 0x597028047f116909, -0x7d753be3e1a9bb99, 0x70417dbde6217387, 0x721627aefbac4384 }
+    },
+    {
+        { -0x02cf6843bef4d0de, -0x0e5fa2584a3057bc, 0x61289a1def57ca74, 0x245ea199bb821902 },
+        { -0x682fc43c78c9522b, 0x2f1422afc532b130, 0x3aa68a057101bbc4, 0x4c946cf7e74f9fa7 },
+        { -0x51235996872b8808, 0x1898ba3c29117fe1, -0x308c067c8df342a8, 0x67da12e6b8b56351 }
+    },
+    {
+        { 0x2b7ef3d38ec8308c, -0x7d7028138e146b55, -0x7f83c4c93af9d543, 0x0cb64cb831a94141 },
+        { 0x7067e187b4bd6e07, 0x6e8f0203c7d1fe74, -0x6c3955d0c737a5d0, 0x76297d1f3d75a78a },
+        { 0x3030fc33534c6378, -0x469ca3a31abe179f, 0x15d9a9bed9b2c728, 0x49233ea3f3775dcb }
+    },
+    {
+        { 0x7b3985fe1c9f249b, 0x4fd6b2d5a1233293, -0x314cba6be520b29e, 0x6987ff6f542de50c },
+        { 0x629398fa8dbffc3a, -0x1ed01ad22ab24bab, -0x0c41ee20250dad6b, 0x628b140dce5e7b51 },
+        { 0x47e241428f83753c, 0x6317bebc866af997, -0x2544a4bcc2e567d7, 0x074d8d245287fb2d }
     },
     {
         { 0x481875c6c0e31488, 0x219429b2e22034b4, 0x7223c98a31283b65, 0x3420d60b342277f9 },
-        { 0x8337d9cd440bfc31, 0x729d2ca1af318fd7, 0xa040a4a4772c2070, 0x46002ef03a7349be },
-        { 0xfaa23adeaffe65f7, 0x78261ed45be0764c, 0x441c0a1e2f164403, 0x5aea8e567a87d395 }
+        { -0x7cc82632bbf403cf, 0x729d2ca1af318fd7, -0x5fbf5b5b88d3df90, 0x46002ef03a7349be },
+        { -0x055dc52150019a09, 0x78261ed45be0764c, 0x441c0a1e2f164403, 0x5aea8e567a87d395 }
     },
 },
 {
     {
         { 0x2dbc6fb6e4e0f177, 0x04e1bf29a4bd6a93, 0x5e1966d4787af6e8, 0x0edc5f5eb426d060 },
-        { 0x7813c1a2bca4283d, 0xed62f091a1863dd9, 0xaec7bcb8c268fa86, 0x10e5d3b76f1cae4c },
-        { 0x5453bfd653da8e67, 0xe9dc1eec24a9f641, 0xbf87263b03578a23, 0x45b46c51361cba72 }
-    },
-    {
-        { 0xce9d4ddd8a7fe3e4, 0xab13645676620e30, 0x4b594f7bb30e9958, 0x5c1c0aef321229df },
-        { 0xa9402abf314f7fa1, 0xe257f1dc8e8cf450, 0x1dbbd54b23a8be84, 0x2177bfa36dcb713b },
+        { 0x7813c1a2bca4283d, -0x129d0f6e5e79c227, -0x513843473d97057a, 0x10e5d3b76f1cae4c },
+        { 0x5453bfd653da8e67, -0x1623e113db5609bf, -0x4078d9c4fca875dd, 0x45b46c51361cba72 }
+    },
+    {
+        { -0x3162b22275801c1c, -0x54ec9ba9899df1d0, 0x4b594f7bb30e9958, 0x5c1c0aef321229df },
+        { -0x56bfd540ceb0805f, -0x1da80e2371730bb0, 0x1dbbd54b23a8be84, 0x2177bfa36dcb713b },
         { 0x37081bbcfa79db8f, 0x6048811ec25f59b3, 0x087a76659c832487, 0x4ae619387d8ab5bb }
     },
     {
-        { 0x61117e44985bfb83, 0xfce0462a71963136, 0x83ac3448d425904b, 0x75685abe5ba43d64 },
-        { 0x8ddbf6aa5344a32e, 0x7d88eab4b41b4078, 0x5eb0eb974a130d60, 0x1a00d91b17bf3e03 },
-        { 0x6e960933eb61f2b2, 0x543d0fa8c9ff4952, 0xdf7275107af66569, 0x135529b623b0e6aa }
-    },
-    {
-        { 0xf5c716bce22e83fe, 0xb42beb19e80985c1, 0xec9da63714254aae, 0x5972ea051590a613 },
-        { 0x18f0dbd7add1d518, 0x979f7888cfc11f11, 0x8732e1f07114759b, 0x79b5b81a65ca3a01 },
-        { 0x0fd4ac20dc8f7811, 0x9a9ad294ac4d4fa8, 0xc01b2d64b3360434, 0x4f7e9c95905f3bdb }
-    },
-    {
-        { 0x71c8443d355299fe, 0x8bcd3b1cdbebead7, 0x8092499ef1a49466, 0x1942eec4a144adc8 },
-        { 0x62674bbc5781302e, 0xd8520f3989addc0f, 0x8c2999ae53fbd9c6, 0x31993ad92e638e4c },
+        { 0x61117e44985bfb83, -0x031fb9d58e69ceca, -0x7c53cbb72bda6fb5, 0x75685abe5ba43d64 },
+        { -0x72240955acbb5cd2, 0x7d88eab4b41b4078, 0x5eb0eb974a130d60, 0x1a00d91b17bf3e03 },
+        { 0x6e960933eb61f2b2, 0x543d0fa8c9ff4952, -0x208d8aef85099a97, 0x135529b623b0e6aa }
+    },
+    {
+        { -0x0a38e9431dd17c02, -0x4bd414e617f67a3f, -0x136259c8ebdab552, 0x5972ea051590a613 },
+        { 0x18f0dbd7add1d518, -0x68608777303ee0ef, -0x78cd1e0f8eeb8a65, 0x79b5b81a65ca3a01 },
+        { 0x0fd4ac20dc8f7811, -0x65652d6b53b2b058, -0x3fe4d29b4cc9fbcc, 0x4f7e9c95905f3bdb }
+    },
+    {
+        { 0x71c8443d355299fe, -0x7432c4e324141529, -0x7f6db6610e5b6b9a, 0x1942eec4a144adc8 },
+        { 0x62674bbc5781302e, -0x27adf0c6765223f1, -0x73d66651ac04263a, 0x31993ad92e638e4c },
         { 0x7dac5319ae234992, 0x2c1b3d910cea3e92, 0x553ce494253c1122, 0x2a0a65314ef9ca75 }
     },
     {
-        { 0xcf361acd3c1c793a, 0x2f9ebcac5a35bc3b, 0x60e860e9a8cda6ab, 0x055dc39b6dea1a13 },
-        { 0x2db7937ff7f927c2, 0xdb741f0617d0a635, 0x5982f3a21155af76, 0x4cf6e218647c2ded },
-        { 0xb119227cc28d5bb6, 0x07e24ebc774dffab, 0xa83c78cee4a32c89, 0x121a307710aa24b6 }
-    },
-    {
-        { 0xd659713ec77483c9, 0x88bfe077b82b96af, 0x289e28231097bcd3, 0x527bb94a6ced3a9b },
-        { 0xe4db5d5e9f034a97, 0xe153fc093034bc2d, 0x460546919551d3b1, 0x333fc76c7a40e52d },
+        { -0x30c9e532c3e386c6, 0x2f9ebcac5a35bc3b, 0x60e860e9a8cda6ab, 0x055dc39b6dea1a13 },
+        { 0x2db7937ff7f927c2, -0x248be0f9e82f59cb, 0x5982f3a21155af76, 0x4cf6e218647c2ded },
+        { -0x4ee6dd833d72a44a, 0x07e24ebc774dffab, -0x57c387311b5cd377, 0x121a307710aa24b6 }
+    },
+    {
+        { -0x29a68ec1388b7c37, -0x77401f8847d46951, 0x289e28231097bcd3, 0x527bb94a6ced3a9b },
+        { -0x1b24a2a160fcb569, -0x1eac03f6cfcb43d3, 0x460546919551d3b1, 0x333fc76c7a40e52d },
         { 0x563d992a995b482e, 0x3405d07c6e383801, 0x485035de2f64d8e5, 0x6b89069b20a7a9f7 }
     },
     {
         { 0x4082fa8cb5c7db77, 0x068686f8c734c155, 0x29e6c8d9f6e7a57e, 0x0473d308a7639bcf },
-        { 0x812aa0416270220d, 0x995a89faf9245b4e, 0xffadc4ce5072ef05, 0x23bc2103aa73eb73 },
-        { 0xcaee792603589e05, 0x2b4b421246dcc492, 0x02a1ef74e601a94f, 0x102f73bfde04341a }
-    },
-},
-{
-    {
-        { 0x358ecba293a36247, 0xaf8f9862b268fd65, 0x412f7e9968a01c89, 0x5786f312cd754524 },
-        { 0xb5a2d50c7ec20d3e, 0xc64bdd6ea0c97263, 0x56e89052c1ff734d, 0x4929c6f72b2ffaba },
-        { 0x337788ffca14032c, 0xf3921028447f1ee3, 0x8b14071f231bccad, 0x4c817b4bf2344783 }
-    },
-    {
-        { 0x413ba057a40b4484, 0xba4c2e1a4f5f6a43, 0x614ba0a5aee1d61c, 0x78a1531a8b05dc53 },
-        { 0x0ff853852871b96e, 0xe13e9fab60c3f1bb, 0xeefd595325344402, 0x0a37c37075b7744b },
-        { 0x6cbdf1703ad0562b, 0x8ecf4830c92521a3, 0xdaebd303fd8424e7, 0x72ad82a42e5ec56f }
-    },
-    {
-        { 0xc368939167024bc3, 0x8e69d16d49502fda, 0xfcf2ec3ce45f4b29, 0x065f669ea3b4cbc4 },
+        { -0x7ed55fbe9d8fddf3, -0x66a5760506dba4b2, -0x00523b31af8d10fb, 0x23bc2103aa73eb73 },
+        { -0x351186d9fca761fb, 0x2b4b421246dcc492, 0x02a1ef74e601a94f, 0x102f73bfde04341a }
+    },
+},
+{
+    {
+        { 0x358ecba293a36247, -0x5070679d4d97029b, 0x412f7e9968a01c89, 0x5786f312cd754524 },
+        { -0x4a5d2af3813df2c2, -0x39b422915f368d9d, 0x56e89052c1ff734d, 0x4929c6f72b2ffaba },
+        { 0x337788ffca14032c, -0x0c6defd7bb80e11d, -0x74ebf8e0dce43353, 0x4c817b4bf2344783 }
+    },
+    {
+        { 0x413ba057a40b4484, -0x45b3d1e5b0a095bd, 0x614ba0a5aee1d61c, 0x78a1531a8b05dc53 },
+        { 0x0ff853852871b96e, -0x1ec160549f3c0e45, -0x1102a6acdacbbbfe, 0x0a37c37075b7744b },
+        { 0x6cbdf1703ad0562b, -0x7130b7cf36dade5d, -0x25142cfc027bdb19, 0x72ad82a42e5ec56f }
+    },
+    {
+        { -0x3c976c6e98fdb43d, -0x71962e92b6afd026, -0x030d13c31ba0b4d7, 0x065f669ea3b4cbc4 },
         { 0x3f9e8e35bafb65f6, 0x39d69ec8f27293a1, 0x6cb8cd958cf6a3d0, 0x1734778173adae6d },
-        { 0x8a00aec75532db4d, 0xb869a4e443e31bb1, 0x4a0f8552d3a7f515, 0x19adeb7c303d7c08 }
-    },
-    {
-        { 0x9d05ba7d43c31794, 0x2470c8ff93322526, 0x8323dec816197438, 0x2852709881569b53 },
-        { 0xc720cb6153ead9a3, 0x55b2c97f512b636e, 0xb1e35b5fd40290b1, 0x2fd9ccf13b530ee2 },
-        { 0x07bd475b47f796b8, 0xd2c7b013542c8f54, 0x2dbd23f43b24f87e, 0x6551afd77b0901d6 }
-    },
-    {
-        { 0x68a24ce3a1d5c9ac, 0xbb77a33d10ff6461, 0x0f86ce4425d3166e, 0x56507c0950b9623b },
-        { 0x4546baaf54aac27f, 0xf6f66fecb2a45a28, 0x582d1b5b562bcfe8, 0x44b123f3920f785f },
+        { -0x75ff5138aacd24b3, -0x47965b1bbc1ce44f, 0x4a0f8552d3a7f515, 0x19adeb7c303d7c08 }
+    },
+    {
+        { -0x62fa4582bc3ce86c, 0x2470c8ff93322526, -0x7cdc2137e9e68bc8, 0x2852709881569b53 },
+        { -0x38df349eac15265d, 0x55b2c97f512b636e, -0x4e1ca4a02bfd6f4f, 0x2fd9ccf13b530ee2 },
+        { 0x07bd475b47f796b8, -0x2d384fecabd370ac, 0x2dbd23f43b24f87e, 0x6551afd77b0901d6 }
+    },
+    {
+        { 0x68a24ce3a1d5c9ac, -0x44885cc2ef009b9f, 0x0f86ce4425d3166e, 0x56507c0950b9623b },
+        { 0x4546baaf54aac27f, -0x090990134d5ba5d8, 0x582d1b5b562bcfe8, 0x44b123f3920f785f },
         { 0x1206f0b7d1713e63, 0x353fe3d915bafc74, 0x194ceb970ad9d94d, 0x62fadd7cf9d03ad3 }
     },
     {
-        { 0x3cd7bc61e7ce4594, 0xcd6b35a9b7dd267e, 0xa080abc84366ef27, 0x6ec7c46f59c79711 },
-        { 0xc6b5967b5598a074, 0x5efe91ce8e493e25, 0xd4b72c4549280888, 0x20ef1149a26740c2 },
-        { 0x2f07ad636f09a8a2, 0x8697e6ce24205e7d, 0xc0aefc05ee35a139, 0x15e80958b5f9d897 }
+        { 0x3cd7bc61e7ce4594, -0x3294ca564822d982, -0x5f7f5437bc9910d9, 0x6ec7c46f59c79711 },
+        { -0x394a6984aa675f8c, 0x5efe91ce8e493e25, -0x2b48d3bab6d7f778, 0x20ef1149a26740c2 },
+        { 0x2f07ad636f09a8a2, -0x79681931dbdfa183, -0x3f5103fa11ca5ec7, 0x15e80958b5f9d897 }
     },
     {
         { 0x4dd1ed355bb061c4, 0x42dc0cef941c0700, 0x61305dc1fd86340e, 0x56b2cc930e55a443 },
-        { 0x25a5ef7d0c3e235b, 0x6c39c17fbe134ee7, 0xc774e1342dc5c327, 0x021354b892021f39 },
-        { 0x1df79da6a6bfc5a2, 0x02f3a2749fde4369, 0xb323d9f2cda390a7, 0x7be0847b8774d363 }
-    },
-    {
-        { 0x1466f5af5307fa11, 0x817fcc7ded6c0af2, 0x0a6de44ec3a4a3fb, 0x74071475bc927d0b },
-        { 0x8c99cc5a8b3f55c3, 0x0611d7253fded2a0, 0xed2995ff36b70a36, 0x1f699a54d78a2619 },
-        { 0xe77292f373e7ea8a, 0x296537d2cb045a31, 0x1bd0653ed3274fde, 0x2f9a2c4476bd2966 }
-    },
-},
-{
-    {
-        { 0xa2b4dae0b5511c9a, 0x7ac860292bffff06, 0x981f375df5504234, 0x3f6bd725da4ea12d },
-        { 0xeb18b9ab7f5745c6, 0x023a8aee5787c690, 0xb72712da2df7afa9, 0x36597d25ea5c013d },
-        { 0x734d8d7b106058ac, 0xd940579e6fc6905f, 0x6466f8f99202932d, 0x7b7ecc19da60d6d0 }
-    },
-    {
-        { 0x6dae4a51a77cfa9b, 0x82263654e7a38650, 0x09bbffcd8f2d82db, 0x03bedc661bf5caba },
-        { 0x78c2373c695c690d, 0xdd252e660642906e, 0x951d44444ae12bd2, 0x4235ad7601743956 },
-        { 0x6258cb0d078975f5, 0x492942549189f298, 0xa0cab423e2e36ee4, 0x0e7ce2b0cdf066a1 }
-    },
-    {
-        { 0xfea6fedfd94b70f9, 0xf130c051c1fcba2d, 0x4882d47e7f2fab89, 0x615256138aeceeb5 },
-        { 0xc494643ac48c85a3, 0xfd361df43c6139ad, 0x09db17dd3ae94d48, 0x666e0a5d8fb4674a },
-        { 0x2abbf64e4870cb0d, 0xcd65bcf0aa458b6b, 0x9abe4eba75e8985d, 0x7f0bc810d514dee4 }
-    },
-    {
-        { 0x83ac9dad737213a0, 0x9ff6f8ba2ef72e98, 0x311e2edd43ec6957, 0x1d3a907ddec5ab75 },
-        { 0xb9006ba426f4136f, 0x8d67369e57e03035, 0xcbc8dfd94f463c28, 0x0d1f8dbcf8eedbf5 },
-        { 0xba1693313ed081dc, 0x29329fad851b3480, 0x0128013c030321cb, 0x00011b44a31bfde3 }
-    },
-    {
-        { 0x16561f696a0aa75c, 0xc1bf725c5852bd6a, 0x11a8dd7f9a7966ad, 0x63d988a2d2851026 },
+        { 0x25a5ef7d0c3e235b, 0x6c39c17fbe134ee7, -0x388b1ecbd23a3cd9, 0x021354b892021f39 },
+        { 0x1df79da6a6bfc5a2, 0x02f3a2749fde4369, -0x4cdc260d325c6f59, 0x7be0847b8774d363 }
+    },
+    {
+        { 0x1466f5af5307fa11, -0x7e8033821293f50e, 0x0a6de44ec3a4a3fb, 0x74071475bc927d0b },
+        { -0x736633a574c0aa3d, 0x0611d7253fded2a0, -0x12d66a00c948f5ca, 0x1f699a54d78a2619 },
+        { -0x188d6d0c8c181576, 0x296537d2cb045a31, 0x1bd0653ed3274fde, 0x2f9a2c4476bd2966 }
+    },
+},
+{
+    {
+        { -0x5d4b251f4aaee366, 0x7ac860292bffff06, -0x67e0c8a20aafbdcc, 0x3f6bd725da4ea12d },
+        { -0x14e7465480a8ba3a, 0x023a8aee5787c690, -0x48d8ed25d2085057, 0x36597d25ea5c013d },
+        { 0x734d8d7b106058ac, -0x26bfa86190396fa1, 0x6466f8f99202932d, 0x7b7ecc19da60d6d0 }
+    },
+    {
+        { 0x6dae4a51a77cfa9b, -0x7dd9c9ab185c79b0, 0x09bbffcd8f2d82db, 0x03bedc661bf5caba },
+        { 0x78c2373c695c690d, -0x22dad199f9bd6f92, -0x6ae2bbbbb51ed42e, 0x4235ad7601743956 },
+        { 0x6258cb0d078975f5, 0x492942549189f298, -0x5f354bdc1d1c911c, 0x0e7ce2b0cdf066a1 }
+    },
+    {
+        { -0x0159012026b48f07, -0x0ecf3fae3e0345d3, 0x4882d47e7f2fab89, 0x615256138aeceeb5 },
+        { -0x3b6b9bc53b737a5d, -0x02c9e20bc39ec653, 0x09db17dd3ae94d48, 0x666e0a5d8fb4674a },
+        { 0x2abbf64e4870cb0d, -0x329a430f55ba7495, -0x6541b1458a1767a3, 0x7f0bc810d514dee4 }
+    },
+    {
+        { -0x7c5362528c8dec60, -0x60090745d108d168, 0x311e2edd43ec6957, 0x1d3a907ddec5ab75 },
+        { -0x46ff945bd90bec91, -0x7298c961a81fcfcb, -0x34372026b0b9c3d8, 0x0d1f8dbcf8eedbf5 },
+        { -0x45e96ccec12f7e24, 0x29329fad851b3480, 0x0128013c030321cb, 0x00011b44a31bfde3 }
+    },
+    {
+        { 0x16561f696a0aa75c, -0x3e408da3a7ad4296, 0x11a8dd7f9a7966ad, 0x63d988a2d2851026 },
         { 0x3fdfa06c3fc66c0c, 0x5d40e38e4dd60dd2, 0x7ae38b38268e4d71, 0x3ac48d916e8357e1 },
-        { 0x00120753afbd232e, 0xe92bceb8fdd8f683, 0xf81669b384e72b91, 0x33fad52b2368a066 }
-    },
-    {
-        { 0x8d2cc8d0c422cfe8, 0x072b4f7b05a13acb, 0xa3feb6e6ecf6a56f, 0x3cc355ccb90a71e2 },
-        { 0x540649c6c5e41e16, 0x0af86430333f7735, 0xb2acfcd2f305e746, 0x16c0f429a256dca7 },
-        { 0xe9b69443903e9131, 0xb8a494cb7a5637ce, 0xc87cd1a4baba9244, 0x631eaf426bae7568 }
+        { 0x00120753afbd232e, -0x16d431470227097d, -0x07e9964c7b18d46f, 0x33fad52b2368a066 }
+    },
+    {
+        { -0x72d3372f3bdd3018, 0x072b4f7b05a13acb, -0x5c01491913095a91, 0x3cc355ccb90a71e2 },
+        { 0x540649c6c5e41e16, 0x0af86430333f7735, -0x4d53032d0cfa18ba, 0x16c0f429a256dca7 },
+        { -0x16496bbc6fc16ecf, -0x475b6b3485a9c832, -0x37832e5b45456dbc, 0x631eaf426bae7568 }
     },
     {
         { 0x47d975b9a3700de8, 0x7280c5fbe2f80552, 0x53658f2732e45de1, 0x431f2c7f665f80b5 },
-        { 0xb3e90410da66fe9f, 0x85dd4b526c16e5a6, 0xbc3d97611ef9bf83, 0x5599648b1ea919b5 },
-        { 0xd6026344858f7b19, 0x14ab352fa1ea514a, 0x8900441a2090a9d7, 0x7b04715f91253b26 }
-    },
-    {
-        { 0xb376c280c4e6bac6, 0x970ed3dd6d1d9b0b, 0xb09a9558450bf944, 0x48d0acfa57cde223 },
-        { 0x83edbd28acf6ae43, 0x86357c8b7d5c7ab4, 0xc0404769b7eb2c44, 0x59b37bf5c2f6583f },
-        { 0xb60f26e47dabe671, 0xf1d1a197622f3a37, 0x4208ce7ee9960394, 0x16234191336d3bdb }
-    },
-},
-{
-    {
-        { 0x852dd1fd3d578bbe, 0x2b65ce72c3286108, 0x658c07f4eace2273, 0x0933f804ec38ab40 },
-        { 0xf19aeac733a63aef, 0x2c7fba5d4442454e, 0x5da87aa04795e441, 0x413051e1a4e0b0f5 },
-        { 0xa7ab69798d496476, 0x8121aadefcb5abc8, 0xa5dc12ef7b539472, 0x07fd47065e45351a }
-    },
-    {
-        { 0x304211559ae8e7c3, 0xf281b229944882a5, 0x8a13ac2e378250e4, 0x014afa0954ba48f4 },
-        { 0xc8583c3d258d2bcd, 0x17029a4daf60b73f, 0xfa0fc9d6416a3781, 0x1c1e5fba38b3fb23 },
-        { 0xcb3197001bb3666c, 0x330060524bffecb9, 0x293711991a88233c, 0x291884363d4ed364 }
-    },
-    {
-        { 0xfb9d37c3bc1ab6eb, 0x02be14534d57a240, 0xf4d73415f8a5e1f6, 0x5964f4300ccc8188 },
+        { -0x4c16fbef25990161, -0x7a22b4ad93e91a5a, -0x43c2689ee106407d, 0x5599648b1ea919b5 },
+        { -0x29fd9cbb7a7084e7, 0x14ab352fa1ea514a, -0x76ffbbe5df6f5629, 0x7b04715f91253b26 }
+    },
+    {
+        { -0x4c893d7f3b19453a, -0x68f12c2292e264f5, -0x4f656aa7baf406bc, 0x48d0acfa57cde223 },
+        { -0x7c1242d7530951bd, -0x79ca837482a3854c, -0x3fbfb8964814d3bc, 0x59b37bf5c2f6583f },
+        { -0x49f0d91b8254198f, -0x0e2e5e689dd0c5c9, 0x4208ce7ee9960394, 0x16234191336d3bdb }
+    },
+},
+{
+    {
+        { -0x7ad22e02c2a87442, 0x2b65ce72c3286108, 0x658c07f4eace2273, 0x0933f804ec38ab40 },
+        { -0x0e651538cc59c511, 0x2c7fba5d4442454e, 0x5da87aa04795e441, 0x413051e1a4e0b0f5 },
+        { -0x5854968672b69b8a, -0x7ede5521034a5438, -0x5a23ed1084ac6b8e, 0x07fd47065e45351a }
+    },
+    {
+        { 0x304211559ae8e7c3, -0x0d7e4dd66bb77d5b, -0x75ec53d1c87daf1c, 0x014afa0954ba48f4 },
+        { -0x37a7c3c2da72d433, 0x17029a4daf60b73f, -0x05f03629be95c87f, 0x1c1e5fba38b3fb23 },
+        { -0x34ce68ffe44c9994, 0x330060524bffecb9, 0x293711991a88233c, 0x291884363d4ed364 }
+    },
+    {
+        { -0x0462c83c43e54915, 0x02be14534d57a240, -0x0b28cbea075a1e0a, 0x5964f4300ccc8188 },
         { 0x033c6805dc4babfa, 0x2c15bf5e5596ecc1, 0x1bc70624b59b1d3b, 0x3ede9850a19f0ec5 },
-        { 0xe44a23152d096800, 0x5c08c55970866996, 0xdf2db60a46affb6e, 0x579155c1f856fd89 }
-    },
-    {
-        { 0xb5f16b630817e7a6, 0x808c69233c351026, 0x324a983b54cef201, 0x53c092084a485345 },
-        { 0x96324edd12e0c9ef, 0x468b878df2420297, 0x199a3776a4f573be, 0x1e7fbcf18e91e92a },
-        { 0xd2d41481f1cbafbf, 0x231d2db6716174e5, 0x0b7d7656e2a55c98, 0x3e955cd82aa495f6 }
-    },
-    {
-        { 0xab39f3ef61bb3a3f, 0x8eb400652eb9193e, 0xb5de6ecc38c11f74, 0x654d7e9626f3c49f },
-        { 0xe48f535e3ed15433, 0xd075692a0d7270a3, 0x40fbd21daade6387, 0x14264887cf4495f5 },
-        { 0xe564cfdd5c7d2ceb, 0x82eeafded737ccb9, 0x6107db62d1f9b0ab, 0x0b6baac3b4358dbb }
-    },
-    {
-        { 0x204abad63700a93b, 0xbe0023d3da779373, 0xd85f0346633ab709, 0x00496dc490820412 },
+        { -0x1bb5dcead2f69800, 0x5c08c55970866996, -0x20d249f5b9500492, 0x579155c1f856fd89 }
+    },
+    {
+        { -0x4a0e949cf7e8185a, -0x7f7396dcc3caefda, 0x324a983b54cef201, 0x53c092084a485345 },
+        { -0x69cdb122ed1f3611, 0x468b878df2420297, 0x199a3776a4f573be, 0x1e7fbcf18e91e92a },
+        { -0x2d2beb7e0e345041, 0x231d2db6716174e5, 0x0b7d7656e2a55c98, 0x3e955cd82aa495f6 }
+    },
+    {
+        { -0x54c60c109e44c5c1, -0x714bff9ad146e6c2, -0x4a219133c73ee08c, 0x654d7e9626f3c49f },
+        { -0x1b70aca1c12eabcd, -0x2f8a96d5f28d8f5d, 0x40fbd21daade6387, 0x14264887cf4495f5 },
+        { -0x1a9b3022a382d315, -0x7d11502128c83347, 0x6107db62d1f9b0ab, 0x0b6baac3b4358dbb }
+    },
+    {
+        { 0x204abad63700a93b, -0x41ffdc2c25886c8d, -0x27a0fcb99cc548f7, 0x00496dc490820412 },
         { 0x7ae62bcb8622fe98, 0x47762256ceb891af, 0x1a5a92bcf2e406b4, 0x7d29401784e41501 },
-        { 0x1c74b88dc27e6360, 0x074854268d14850c, 0xa145fb7b3e0dcb30, 0x10843f1b43803b23 }
-    },
-    {
-        { 0xd56f672de324689b, 0xd1da8aedb394a981, 0xdd7b58fe9168cfed, 0x7ce246cd4d56c1e8 },
-        { 0xc5f90455376276dd, 0xce59158dd7645cd9, 0x92f65d511d366b39, 0x11574b6e526996c4 },
-        { 0xb8f4308e7f80be53, 0x5f3cb8cb34a9d397, 0x18a961bd33cc2b2c, 0x710045fb3a9af671 }
-    },
-    {
-        { 0xa03fc862059d699e, 0x2370cfa19a619e69, 0xc4fe3b122f823deb, 0x1d1b056fa7f0844e },
-        { 0x73f93d36101b95eb, 0xfaef33794f6f4486, 0x5651735f8f15e562, 0x7fa3f19058b40da1 },
-        { 0x1bc64631e56bf61f, 0xd379ab106e5382a3, 0x4d58c57e0540168d, 0x566256628442d8e4 }
-    },
-},
-{
-    {
-        { 0xdd499cd61ff38640, 0x29cd9bc3063625a0, 0x51e2d8023dd73dc3, 0x4a25707a203b9231 },
-        { 0xb9e499def6267ff6, 0x7772ca7b742c0843, 0x23a0153fe9a4f2b1, 0x2cdfdfecd5d05006 },
+        { 0x1c74b88dc27e6360, 0x074854268d14850c, -0x5eba0484c1f234d0, 0x10843f1b43803b23 }
+    },
+    {
+        { -0x2a9098d21cdb9765, -0x2e2575124c6b567f, -0x2284a7016e973013, 0x7ce246cd4d56c1e8 },
+        { -0x3a06fbaac89d8923, -0x31a6ea72289ba327, -0x6d09a2aee2c994c7, 0x11574b6e526996c4 },
+        { -0x470bcf71807f41ad, 0x5f3cb8cb34a9d397, 0x18a961bd33cc2b2c, 0x710045fb3a9af671 }
+    },
+    {
+        { -0x5fc0379dfa629662, 0x2370cfa19a619e69, -0x3b01c4edd07dc215, 0x1d1b056fa7f0844e },
+        { 0x73f93d36101b95eb, -0x0510cc86b090bb7a, 0x5651735f8f15e562, 0x7fa3f19058b40da1 },
+        { 0x1bc64631e56bf61f, -0x2c8654ef91ac7d5d, 0x4d58c57e0540168d, 0x566256628442d8e4 }
+    },
+},
+{
+    {
+        { -0x22b66329e00c79c0, 0x29cd9bc3063625a0, 0x51e2d8023dd73dc3, 0x4a25707a203b9231 },
+        { -0x461b662109d9800a, 0x7772ca7b742c0843, 0x23a0153fe9a4f2b1, 0x2cdfdfecd5d05006 },
         { 0x2ab7668a53f6ed6a, 0x304242581dd170a1, 0x4000144c3ae20161, 0x5721896d248e49fc }
     },
     {
         { 0x285d5091a1d0da4e, 0x4baa6fa7b5fe3e08, 0x63e5177ce19393b3, 0x03c935afc4b030fd },
-        { 0x0b6e5517fd181bae, 0x9022629f2bb963b4, 0x5509bce932064625, 0x578edd74f63c13da },
-        { 0x997276c6492b0c3d, 0x47ccc2c4dfe205fc, 0xdcd29b84dd623a3c, 0x3ec2ab590288c7a2 }
-    },
-    {
-        { 0xa7213a09ae32d1cb, 0x0f2b87df40f5c2d5, 0x0baea4c6e81eab29, 0x0e1bf66c6adbac5e },
-        { 0xa1a0d27be4d87bb9, 0xa98b4deb61391aed, 0x99a0ddd073cb9b83, 0x2dd5c25a200fcace },
-        { 0xe2abd5e9792c887e, 0x1a020018cb926d5d, 0xbfba69cdbaae5f1e, 0x730548b35ae88f5f }
-    },
-    {
-        { 0x805b094ba1d6e334, 0xbf3ef17709353f19, 0x423f06cb0622702b, 0x585a2277d87845dd },
-        { 0xc43551a3cba8b8ee, 0x65a26f1db2115f16, 0x760f4f52ab8c3850, 0x3043443b411db8ca },
-        { 0xa18a5f8233d48962, 0x6698c4b5ec78257f, 0xa78e6fa5373e41ff, 0x7656278950ef981f }
-    },
-    {
-        { 0xe17073a3ea86cf9d, 0x3a8cfbb707155fdc, 0x4853e7fc31838a8e, 0x28bbf484b613f616 },
-        { 0x38c3cf59d51fc8c0, 0x9bedd2fd0506b6f2, 0x26bf109fab570e8f, 0x3f4160a8c1b846a6 },
-        { 0xf2612f5c6f136c7c, 0xafead107f6dd11be, 0x527e9ad213de6f33, 0x1e79cb358188f75d }
-    },
-    {
-        { 0x77e953d8f5e08181, 0x84a50c44299dded9, 0xdc6c2d0c864525e5, 0x478ab52d39d1f2f4 },
-        { 0x013436c3eef7e3f1, 0x828b6a7ffe9e10f8, 0x7ff908e5bcf9defc, 0x65d7951b3a3b3831 },
-        { 0x66a6a4d39252d159, 0xe5dde1bc871ac807, 0xb82c6b40a6c1c96f, 0x16d87a411a212214 }
-    },
-    {
-        { 0xfba4d5e2d54e0583, 0xe21fafd72ebd99fa, 0x497ac2736ee9778f, 0x1f990b577a5a6dde },
-        { 0xb3bd7e5a42066215, 0x879be3cd0c5a24c1, 0x57c05db1d6f994b7, 0x28f87c8165f38ca6 },
-        { 0xa3344ead1be8f7d6, 0x7d1e50ebacea798f, 0x77c6569e520de052, 0x45882fe1534d6d3e }
-    },
-    {
-        { 0xd8ac9929943c6fe4, 0xb5f9f161a38392a2, 0x2699db13bec89af3, 0x7dcf843ce405f074 },
+        { 0x0b6e5517fd181bae, -0x6fdd9d60d4469c4c, 0x5509bce932064625, 0x578edd74f63c13da },
+        { -0x668d8939b6d4f3c3, 0x47ccc2c4dfe205fc, -0x232d647b229dc5c4, 0x3ec2ab590288c7a2 }
+    },
+    {
+        { -0x58dec5f651cd2e35, 0x0f2b87df40f5c2d5, 0x0baea4c6e81eab29, 0x0e1bf66c6adbac5e },
+        { -0x5e5f2d841b278447, -0x5674b2149ec6e513, -0x665f222f8c34647d, 0x2dd5c25a200fcace },
+        { -0x1d542a1686d37782, 0x1a020018cb926d5d, -0x404596324551a0e2, 0x730548b35ae88f5f }
+    },
+    {
+        { -0x7fa4f6b45e291ccc, -0x40c10e88f6cac0e7, 0x423f06cb0622702b, 0x585a2277d87845dd },
+        { -0x3bcaae5c34574712, 0x65a26f1db2115f16, 0x760f4f52ab8c3850, 0x3043443b411db8ca },
+        { -0x5e75a07dcc2b769e, 0x6698c4b5ec78257f, -0x5871905ac8c1be01, 0x7656278950ef981f }
+    },
+    {
+        { -0x1e8f8c5c15793063, 0x3a8cfbb707155fdc, 0x4853e7fc31838a8e, 0x28bbf484b613f616 },
+        { 0x38c3cf59d51fc8c0, -0x64122d02faf9490e, 0x26bf109fab570e8f, 0x3f4160a8c1b846a6 },
+        { -0x0d9ed0a390ec9384, -0x50152ef80922ee42, 0x527e9ad213de6f33, 0x1e79cb358188f75d }
+    },
+    {
+        { 0x77e953d8f5e08181, -0x7b5af3bbd6622127, -0x2393d2f379bada1b, 0x478ab52d39d1f2f4 },
+        { 0x013436c3eef7e3f1, -0x7d7495800161ef08, 0x7ff908e5bcf9defc, 0x65d7951b3a3b3831 },
+        { 0x66a6a4d39252d159, -0x1a221e4378e537f9, -0x47d394bf593e3691, 0x16d87a411a212214 }
+    },
+    {
+        { -0x045b2a1d2ab1fa7d, -0x1de05028d1426606, 0x497ac2736ee9778f, 0x1f990b577a5a6dde },
+        { -0x4c4281a5bdf99deb, -0x78641c32f3a5db3f, 0x57c05db1d6f994b7, 0x28f87c8165f38ca6 },
+        { -0x5ccbb152e417082a, 0x7d1e50ebacea798f, 0x77c6569e520de052, 0x45882fe1534d6d3e }
+    },
+    {
+        { -0x275366d66bc3901c, -0x4a060e9e5c7c6d5e, 0x2699db13bec89af3, 0x7dcf843ce405f074 },
         { 0x6669345d757983d6, 0x62b6ed1117aa11a6, 0x7ddd1857985e128f, 0x688fe5b8f626f6dd },
-        { 0x6c90d6484a4732c0, 0xd52143fdca563299, 0xb3be28c3915dc6e1, 0x6739687e7327191b }
-    },
-},
-{
-    {
-        { 0x8ce5aad0c9cb971f, 0x1156aaa99fd54a29, 0x41f7247015af9b78, 0x1fe8cca8420f49aa },
-        { 0x9f65c5ea200814cf, 0x840536e169a31740, 0x8b0ed13925c8b4ad, 0x0080dbafe936361d },
+        { 0x6c90d6484a4732c0, -0x2adebc0235a9cd67, -0x4c41d73c6ea2391f, 0x6739687e7327191b }
+    },
+},
+{
+    {
+        { -0x731a552f363468e1, 0x1156aaa99fd54a29, 0x41f7247015af9b78, 0x1fe8cca8420f49aa },
+        { -0x609a3a15dff7eb31, -0x7bfac91e965ce8c0, -0x74f12ec6da374b53, 0x0080dbafe936361d },
         { 0x72a1848f3c0cc82a, 0x38c560c2877c9e54, 0x5004e228ce554140, 0x042418a103429d71 }
     },
     {
-        { 0x58e84c6f20816247, 0x8db2b2b6e36fd793, 0x977182561d484d85, 0x0822024f8632abd7 },
-        { 0x899dea51abf3ff5f, 0x9b93a8672fc2d8ba, 0x2c38cb97be6ebd5c, 0x114d578497263b5d },
-        { 0xb301bb7c6b1beca3, 0x55393f6dc6eb1375, 0x910d281097b6e4eb, 0x1ad4548d9d479ea3 }
-    },
-    {
-        { 0xa06fe66d0fe9fed3, 0xa8733a401c587909, 0x30d14d800df98953, 0x41ce5876c7b30258 },
-        { 0xcd5a7da0389a48fd, 0xb38fa4aa9a78371e, 0xc6d9761b2cdb8e6c, 0x35cf51dbc97e1443 },
-        { 0x59ac3bc5d670c022, 0xeae67c109b119406, 0x9798bdf0b3782fda, 0x651e3201fd074092 }
-    },
-    {
-        { 0xa57ba4a01efcae9e, 0x769f4beedc308a94, 0xd1f10eeb3603cb2e, 0x4099ce5e7e441278 },
-        { 0xd63d8483ef30c5cf, 0x4cd4b4962361cc0c, 0xee90e500a48426ac, 0x0af51d7d18c14eeb },
-        { 0x1ac98e4f8a5121e9, 0x7dae9544dbfa2fe0, 0x8320aa0dd6430df9, 0x667282652c4a2fb5 }
-    },
-    {
-        { 0xada8b6e02946db23, 0x1c0ce51a7b253ab7, 0x8448c85a66dd485b, 0x7f1fc025d0675adf },
-        { 0x874621f4d86bc9ab, 0xb54c7bbe56fe6fea, 0x077a24257fadc22c, 0x1ab53be419b90d39 },
-        { 0xd8ee1b18319ea6aa, 0x004d88083a21f0da, 0x3bd6aa1d883a4f4b, 0x4db9a3a6dfd9fd14 }
-    },
-    {
-        { 0xd95b00bbcbb77c68, 0xddbc846a91f17849, 0x7cf700aebe28d9b3, 0x5ce1285c85d31f3e },
-        { 0x8ce7b23bb99c0755, 0x35c5d6edc4f50f7a, 0x7e1e2ed2ed9b50c3, 0x36305f16e8934da1 },
-        { 0x31b6972d98b0bde8, 0x7d920706aca6de5b, 0xe67310f8908a659f, 0x50fac2a6efdf0235 }
-    },
-    {
-        { 0x295b1c86f6f449bc, 0x51b2e84a1f0ab4dd, 0xc001cb30aa8e551d, 0x6a28d35944f43662 },
-        { 0xf3d3a9f35b880f5a, 0xedec050cdb03e7c2, 0xa896981ff9f0b1a2, 0x49a4ae2bac5e34a4 },
+        { 0x58e84c6f20816247, -0x724d4d491c90286d, -0x688e7da9e2b7b27b, 0x0822024f8632abd7 },
+        { -0x766215ae540c00a1, -0x646c5798d03d2746, 0x2c38cb97be6ebd5c, 0x114d578497263b5d },
+        { -0x4cfe448394e4135d, 0x55393f6dc6eb1375, -0x6ef2d7ef68491b15, 0x1ad4548d9d479ea3 }
+    },
+    {
+        { -0x5f901992f016012d, -0x578cc5bfe3a786f7, 0x30d14d800df98953, 0x41ce5876c7b30258 },
+        { -0x32a5825fc765b703, -0x4c705b556587c8e2, -0x392689e4d3247194, 0x35cf51dbc97e1443 },
+        { 0x59ac3bc5d670c022, -0x151983ef64ee6bfa, -0x6867420f4c87d026, 0x651e3201fd074092 }
+    },
+    {
+        { -0x5a845b5fe1035162, 0x769f4beedc308a94, -0x2e0ef114c9fc34d2, 0x4099ce5e7e441278 },
+        { -0x29c27b7c10cf3a31, 0x4cd4b4962361cc0c, -0x116f1aff5b7bd954, 0x0af51d7d18c14eeb },
+        { 0x1ac98e4f8a5121e9, 0x7dae9544dbfa2fe0, -0x7cdf55f229bcf207, 0x667282652c4a2fb5 }
+    },
+    {
+        { -0x5257491fd6b924dd, 0x1c0ce51a7b253ab7, -0x7bb737a59922b7a5, 0x7f1fc025d0675adf },
+        { -0x78b9de0b27943655, -0x4ab38441a9019016, 0x077a24257fadc22c, 0x1ab53be419b90d39 },
+        { -0x2711e4e7ce615956, 0x004d88083a21f0da, 0x3bd6aa1d883a4f4b, 0x4db9a3a6dfd9fd14 }
+    },
+    {
+        { -0x26a4ff4434488398, -0x22437b956e0e87b7, 0x7cf700aebe28d9b3, 0x5ce1285c85d31f3e },
+        { -0x73184dc44663f8ab, 0x35c5d6edc4f50f7a, 0x7e1e2ed2ed9b50c3, 0x36305f16e8934da1 },
+        { 0x31b6972d98b0bde8, 0x7d920706aca6de5b, -0x198cef076f759a61, 0x50fac2a6efdf0235 }
+    },
+    {
+        { 0x295b1c86f6f449bc, 0x51b2e84a1f0ab4dd, -0x3ffe34cf5571aae3, 0x6a28d35944f43662 },
+        { -0x0c2c560ca477f0a6, -0x1213faf324fc183e, -0x576967e0060f4e5e, 0x49a4ae2bac5e34a4 },
         { 0x28bb12ee04a740e0, 0x14313bbd9bce8174, 0x72f5b5e4e8c10c40, 0x7cbfb19936adcd5b }
     },
     {
-        { 0x8e793a7acc36e6e0, 0xf9fab7a37d586eed, 0x3a4f9692bae1f4e4, 0x1c14b03eff5f447e },
-        { 0xa311ddc26b89792d, 0x1b30b4c6da512664, 0x0ca77b4ccf150859, 0x1de443df1b009408 },
+        { -0x7186c58533c91920, -0x0605485c82a79113, 0x3a4f9692bae1f4e4, 0x1c14b03eff5f447e },
+        { -0x5cee223d947686d3, 0x1b30b4c6da512664, 0x0ca77b4ccf150859, 0x1de443df1b009408 },
         { 0x19647bd114a85291, 0x57b76cb21034d3af, 0x6329db440f9d6dfa, 0x5ef43e586a571493 }
     },
 },
 {
     {
-        { 0xa66dcc9dc80c1ac0, 0x97a05cf41b38a436, 0xa7ebf3be95dbd7c6, 0x7da0b8f68d7e7dab },
-        { 0xef782014385675a6, 0xa2649f30aafda9e8, 0x4cd1eb505cdfa8cb, 0x46115aba1d4dc0b3 },
-        { 0xd40f1953c3b5da76, 0x1dac6f7321119e9b, 0x03cc6021feb25960, 0x5a5f887e83674b4b }
-    },
-    {
-        { 0x9e9628d3a0a643b9, 0xb5c3cb00e6c32064, 0x9b5302897c2dec32, 0x43e37ae2d5d1c70c },
-        { 0x8f6301cf70a13d11, 0xcfceb815350dd0c4, 0xf70297d4a4bca47e, 0x3669b656e44d1434 },
-        { 0x387e3f06eda6e133, 0x67301d5199a13ac0, 0xbd5ad8f836263811, 0x6a21e6cd4fd5e9be }
-    },
-    {
-        { 0xef4129126699b2e3, 0x71d30847708d1301, 0x325432d01182b0bd, 0x45371b07001e8b36 },
-        { 0xf1c6170a3046e65f, 0x58712a2a00d23524, 0x69dbbd3c8c82b755, 0x586bf9f1a195ff57 },
-        { 0xa6db088d5ef8790b, 0x5278f0dc610937e5, 0xac0349d261a16eb8, 0x0eafb03790e52179 }
-    },
-    {
-        { 0x5140805e0f75ae1d, 0xec02fbe32662cc30, 0x2cebdf1eea92396d, 0x44ae3344c5435bb3 },
-        { 0x960555c13748042f, 0x219a41e6820baa11, 0x1c81f73873486d0c, 0x309acc675a02c661 },
-        { 0x9cf289b9bba543ee, 0xf3760e9d5ac97142, 0x1d82e5c64f9360aa, 0x62d5221b7f94678f }
-    },
-    {
-        { 0x7585d4263af77a3c, 0xdfae7b11fee9144d, 0xa506708059f7193d, 0x14f29a5383922037 },
-        { 0x524c299c18d0936d, 0xc86bb56c8a0c1a0c, 0xa375052edb4a8631, 0x5c0efde4bc754562 },
-        { 0xdf717edc25b2d7f5, 0x21f970db99b53040, 0xda9234b7c3ed4c62, 0x5e72365c7bee093e }
-    },
-    {
-        { 0x7d9339062f08b33e, 0x5b9659e5df9f32be, 0xacff3dad1f9ebdfd, 0x70b20555cb7349b7 },
-        { 0x575bfc074571217f, 0x3779675d0694d95b, 0x9a0a37bbf4191e33, 0x77f1104c47b4eabc },
-        { 0xbe5113c555112c4c, 0x6688423a9a881fcd, 0x446677855e503b47, 0x0e34398f4a06404a }
-    },
-    {
-        { 0x18930b093e4b1928, 0x7de3e10e73f3f640, 0xf43217da73395d6f, 0x6f8aded6ca379c3e },
-        { 0xb67d22d93ecebde8, 0x09b3e84127822f07, 0x743fa61fb05b6d8d, 0x5e5405368a362372 },
-        { 0xe340123dfdb7b29a, 0x487b97e1a21ab291, 0xf9967d02fde6949e, 0x780de72ec8d3de97 }
-    },
-    {
-        { 0x671feaf300f42772, 0x8f72eb2a2a8c41aa, 0x29a17fd797373292, 0x1defc6ad32b587a6 },
+        { -0x5992336237f3e540, -0x685fa30be4c75bca, -0x58140c416a24283a, 0x7da0b8f68d7e7dab },
+        { -0x1087dfebc7a98a5a, -0x5d9b60cf55025618, 0x4cd1eb505cdfa8cb, 0x46115aba1d4dc0b3 },
+        { -0x2bf0e6ac3c4a258a, 0x1dac6f7321119e9b, 0x03cc6021feb25960, 0x5a5f887e83674b4b }
+    },
+    {
+        { -0x6169d72c5f59bc47, -0x4a3c34ff193cdf9c, -0x64acfd7683d213ce, 0x43e37ae2d5d1c70c },
+        { -0x709cfe308f5ec2ef, -0x303147eacaf22f3c, -0x08fd682b5b435b82, 0x3669b656e44d1434 },
+        { 0x387e3f06eda6e133, 0x67301d5199a13ac0, -0x42a52707c9d9c7ef, 0x6a21e6cd4fd5e9be }
+    },
+    {
+        { -0x10bed6ed99664d1d, 0x71d30847708d1301, 0x325432d01182b0bd, 0x45371b07001e8b36 },
+        { -0x0e39e8f5cfb919a1, 0x58712a2a00d23524, 0x69dbbd3c8c82b755, 0x586bf9f1a195ff57 },
+        { -0x5924f772a10786f5, 0x5278f0dc610937e5, -0x53fcb62d9e5e9148, 0x0eafb03790e52179 }
+    },
+    {
+        { 0x5140805e0f75ae1d, -0x13fd041cd99d33d0, 0x2cebdf1eea92396d, 0x44ae3344c5435bb3 },
+        { -0x69faaa3ec8b7fbd1, 0x219a41e6820baa11, 0x1c81f73873486d0c, 0x309acc675a02c661 },
+        { -0x630d7646445abc12, -0x0c89f162a5368ebe, 0x1d82e5c64f9360aa, 0x62d5221b7f94678f }
+    },
+    {
+        { 0x7585d4263af77a3c, -0x205184ee0116ebb3, -0x5af98f7fa608e6c3, 0x14f29a5383922037 },
+        { 0x524c299c18d0936d, -0x37944a9375f3e5f4, -0x5c8afad124b579cf, 0x5c0efde4bc754562 },
+        { -0x208e8123da4d280b, 0x21f970db99b53040, -0x256dcb483c12b39e, 0x5e72365c7bee093e }
+    },
+    {
+        { 0x7d9339062f08b33e, 0x5b9659e5df9f32be, -0x5300c252e0614203, 0x70b20555cb7349b7 },
+        { 0x575bfc074571217f, 0x3779675d0694d95b, -0x65f5c8440be6e1cd, 0x77f1104c47b4eabc },
+        { -0x41aeec3aaaeed3b4, 0x6688423a9a881fcd, 0x446677855e503b47, 0x0e34398f4a06404a }
+    },
+    {
+        { 0x18930b093e4b1928, 0x7de3e10e73f3f640, -0x0bcde8258cc6a291, 0x6f8aded6ca379c3e },
+        { -0x4982dd26c1314218, 0x09b3e84127822f07, 0x743fa61fb05b6d8d, 0x5e5405368a362372 },
+        { -0x1cbfedc202484d66, 0x487b97e1a21ab291, -0x066982fd02196b62, 0x780de72ec8d3de97 }
+    },
+    {
+        { 0x671feaf300f42772, -0x708d14d5d573be56, 0x29a17fd797373292, 0x1defc6ad32b587a6 },
         { 0x0ae28545089ae7bc, 0x388ddecf1c7f4d06, 0x38ac15510a4811b8, 0x0eb28bf671928ce4 },
-        { 0xaf5bbe1aef5195a7, 0x148c1277917b15ed, 0x2991f7fb7ae5da2e, 0x467d201bf8dd2867 }
-    },
-},
-{
-    {
-        { 0x745f9d56296bc318, 0x993580d4d8152e65, 0xb0e5b13f5839e9ce, 0x51fc2b28d43921c0 },
-        { 0x7906ee72f7bd2e6b, 0x05d270d6109abf4e, 0x8d5cfe45b941a8a4, 0x44c218671c974287 },
+        { -0x50a441e510ae6a59, 0x148c1277917b15ed, 0x2991f7fb7ae5da2e, 0x467d201bf8dd2867 }
+    },
+},
+{
+    {
+        { 0x745f9d56296bc318, -0x66ca7f2b27ead19b, -0x4f1a4ec0a7c61632, 0x51fc2b28d43921c0 },
+        { 0x7906ee72f7bd2e6b, 0x05d270d6109abf4e, -0x72a301ba46be575c, 0x44c218671c974287 },
         { 0x1b8fd11795e2a98c, 0x1c4e5ee12b6b6291, 0x5b30e7107424b572, 0x6e6b9de84c4f4ac6 }
     },
     {
-        { 0x6b7c5f10f80cb088, 0x736b54dc56e42151, 0xc2b620a5c6ef99c4, 0x5f4c802cc3a06f42 },
-        { 0xdff25fce4b1de151, 0xd841c0c7e11c4025, 0x2554b3c854749c87, 0x2d292459908e0df9 },
-        { 0x9b65c8f17d0752da, 0x881ce338c77ee800, 0xc3b514f05b62f9e3, 0x66ed5dd5bec10d48 }
-    },
-    {
-        { 0xf0adf3c9cbca047d, 0x81c3b2cbf4552f6b, 0xcfda112d44735f93, 0x1f23a0c77e20048c },
-        { 0x7d38a1c20bb2089d, 0x808334e196ccd412, 0xc4a70b8c6c97d313, 0x2eacf8bc03007f20 },
-        { 0xf235467be5bc1570, 0x03d2d9020dbab38c, 0x27529aa2fcf9e09e, 0x0840bef29d34bc50 }
-    },
-    {
-        { 0xcd54e06b7f37e4eb, 0x8cc15f87f5e96cca, 0xb8248bb0d3597dce, 0x246affa06074400c },
+        { 0x6b7c5f10f80cb088, 0x736b54dc56e42151, -0x3d49df5a3910663c, 0x5f4c802cc3a06f42 },
+        { -0x200da031b4e21eaf, -0x27be3f381ee3bfdb, 0x2554b3c854749c87, 0x2d292459908e0df9 },
+        { -0x649a370e82f8ad26, -0x77e31cc738811800, -0x3c4aeb0fa49d061d, 0x66ed5dd5bec10d48 }
+    },
+    {
+        { -0x0f520c363435fb83, -0x7e3c4d340baad095, -0x3025eed2bb8ca06d, 0x1f23a0c77e20048c },
+        { 0x7d38a1c20bb2089d, -0x7f7ccb1e69332bee, -0x3b58f47393682ced, 0x2eacf8bc03007f20 },
+        { -0x0dcab9841a43ea90, 0x03d2d9020dbab38c, 0x27529aa2fcf9e09e, 0x0840bef29d34bc50 }
+    },
+    {
+        { -0x32ab1f9480c81b15, -0x733ea0780a169336, -0x47db744f2ca68232, 0x246affa06074400c },
         { 0x796dfb35dc10b287, 0x27176bcd5c7ff29d, 0x7f3d43e8c7b24905, 0x0304f5a191c54276 },
-        { 0x37d88e68fbe45321, 0x86097548c0d75032, 0x4e9b13ef894a0d35, 0x25a83cac5753d325 }
-    },
-    {
-        { 0x9f0f66293952b6e2, 0x33db5e0e0934267b, 0xff45252bd609fedc, 0x06be10f5c506e0c9 },
+        { 0x37d88e68fbe45321, -0x79f68ab73f28afce, 0x4e9b13ef894a0d35, 0x25a83cac5753d325 }
+    },
+    {
+        { -0x60f099d6c6ad491e, 0x33db5e0e0934267b, -0x00badad429f60124, 0x06be10f5c506e0c9 },
         { 0x10222f48eed8165e, 0x623fc1234b8bcf3a, 0x1e145c09c221e8f0, 0x7ccfa59fca782630 },
-        { 0x1a9615a9b62a345f, 0x22050c564a52fecc, 0xa7a2788528bc0dfe, 0x5e82770a1a1ee71d }
-    },
-    {
-        { 0xe802e80a42339c74, 0x34175166a7fffae5, 0x34865d1f1c408cae, 0x2cca982c605bc5ee },
-        { 0x35425183ad896a5c, 0xe8673afbe78d52f6, 0x2c66f25f92a35f64, 0x09d04f3b3b86b102 },
-        { 0xfd2d5d35197dbe6e, 0x207c2eea8be4ffa3, 0x2613d8db325ae918, 0x7a325d1727741d3e }
-    },
-    {
-        { 0xecd27d017e2a076a, 0xd788689f1636495e, 0x52a61af0919233e5, 0x2a479df17bb1ae64 },
-        { 0xd036b9bbd16dfde2, 0xa2055757c497a829, 0x8e6cc966a7f12667, 0x4d3b1a791239c180 },
-        { 0x9e5eee8e33db2710, 0x189854ded6c43ca5, 0xa41c22c592718138, 0x27ad5538a43a5e9b }
-    },
-    {
-        { 0xcb5a7d638e47077c, 0x8db7536120a1c059, 0x549e1e4d8bedfdcc, 0x080153b7503b179d },
-        { 0x2746dd4b15350d61, 0xd03fcbc8ee9521b7, 0xe86e365a138672ca, 0x510e987f7e7d89e2 },
-        { 0xdda69d930a3ed3e3, 0x3d386ef1cd60a722, 0xc817ad58bdaa4ee6, 0x23be8d554fe7372a }
-    },
-},
-{
-    {
-        { 0xbc1ef4bd567ae7a9, 0x3f624cb2d64498bd, 0xe41064d22c1f4ec8, 0x2ef9c5a5ba384001 },
-        { 0x95fe919a74ef4fad, 0x3a827becf6a308a2, 0x964e01d309a47b01, 0x71c43c4f5ba3c797 },
-        { 0xb6fd6df6fa9e74cd, 0xf18278bce4af267a, 0x8255b3d0f1ef990e, 0x5a758ca390c5f293 }
-    },
-    {
-        { 0x8ce0918b1d61dc94, 0x8ded36469a813066, 0xd4e6a829afe8aad3, 0x0a738027f639d43f },
-        { 0xa2b72710d9462495, 0x3aa8c6d2d57d5003, 0xe3d400bfa0b487ca, 0x2dbae244b3eb72ec },
-        { 0x980f4a2f57ffe1cc, 0x00670d0de1839843, 0x105c3f4a49fb15fd, 0x2698ca635126a69c }
-    },
-    {
-        { 0x2e3d702f5e3dd90e, 0x9e3f0918e4d25386, 0x5e773ef6024da96a, 0x3c004b0c4afa3332 },
-        { 0xe765318832b0ba78, 0x381831f7925cff8b, 0x08a81b91a0291fcc, 0x1fb43dcc49caeb07 },
-        { 0x9aa946ac06f4b82b, 0x1ca284a5a806c4f3, 0x3ed3265fc6cd4787, 0x6b43fd01cd1fd217 }
-    },
-    {
-        { 0xb5c742583e760ef3, 0x75dc52b9ee0ab990, 0xbf1427c2072b923f, 0x73420b2d6ff0d9f0 },
-        { 0xc7a75d4b4697c544, 0x15fdf848df0fffbf, 0x2868b9ebaa46785a, 0x5a68d7105b52f714 },
-        { 0xaf2cf6cb9e851e06, 0x8f593913c62238c4, 0xda8ab89699fbf373, 0x3db5632fea34bc9e }
-    },
-    {
-        { 0x2e4990b1829825d5, 0xedeaeb873e9a8991, 0xeef03d394c704af8, 0x59197ea495df2b0e },
-        { 0xf46eee2bf75dd9d8, 0x0d17b1f6396759a5, 0x1bf2d131499e7273, 0x04321adf49d75f13 },
-        { 0x04e16019e4e55aae, 0xe77b437a7e2f92e9, 0xc7ce2dc16f159aa4, 0x45eafdc1f4d70cc0 }
-    },
-    {
-        { 0xb60e4624cfccb1ed, 0x59dbc292bd5c0395, 0x31a09d1ddc0481c9, 0x3f73ceea5d56d940 },
-        { 0x698401858045d72b, 0x4c22faa2cf2f0651, 0x941a36656b222dc6, 0x5a5eebc80362dade },
-        { 0xb7a7bfd10a4e8dc6, 0xbe57007e44c9b339, 0x60c1207f1557aefa, 0x26058891266218db }
-    },
-    {
-        { 0x4c818e3cc676e542, 0x5e422c9303ceccad, 0xec07cccab4129f08, 0x0dedfa10b24443b8 },
-        { 0x59f704a68360ff04, 0xc3d93fde7661e6f4, 0x831b2a7312873551, 0x54ad0c2e4e615d57 },
-        { 0xee3b67d5b82b522a, 0x36f163469fa5c1eb, 0xa5b4d2f26ec19fd3, 0x62ecb2baa77a9408 }
-    },
-    {
-        { 0x92072836afb62874, 0x5fcd5e8579e104a5, 0x5aad01adc630a14a, 0x61913d5075663f98 },
-        { 0xe5ed795261152b3d, 0x4962357d0eddd7d1, 0x7482c8d0b96b4c71, 0x2e59f919a966d8be },
-        { 0x0dc62d361a3231da, 0xfa47583294200270, 0x02d801513f9594ce, 0x3ddbc2a131c05d5c }
-    },
-},
-{
-    {
-        { 0xfb735ac2004a35d1, 0x31de0f433a6607c3, 0x7b8591bfc528d599, 0x55be9a25f5bb050c },
-        { 0x3f50a50a4ffb81ef, 0xb1e035093bf420bf, 0x9baa8e1cc6aa2cd0, 0x32239861fa237a40 },
+        { 0x1a9615a9b62a345f, 0x22050c564a52fecc, -0x585d877ad743f202, 0x5e82770a1a1ee71d }
+    },
+    {
+        { -0x17fd17f5bdcc638c, 0x34175166a7fffae5, 0x34865d1f1c408cae, 0x2cca982c605bc5ee },
+        { 0x35425183ad896a5c, -0x1798c5041872ad0a, 0x2c66f25f92a35f64, 0x09d04f3b3b86b102 },
+        { -0x02d2a2cae6824192, 0x207c2eea8be4ffa3, 0x2613d8db325ae918, 0x7a325d1727741d3e }
+    },
+    {
+        { -0x132d82fe81d5f896, -0x28779760e9c9b6a2, 0x52a61af0919233e5, 0x2a479df17bb1ae64 },
+        { -0x2fc946442e92021e, -0x5dfaa8a83b6857d7, -0x71933699580ed999, 0x4d3b1a791239c180 },
+        { -0x61a11171cc24d8f0, 0x189854ded6c43ca5, -0x5be3dd3a6d8e7ec8, 0x27ad5538a43a5e9b }
+    },
+    {
+        { -0x34a5829c71b8f884, -0x7248ac9edf5e3fa7, 0x549e1e4d8bedfdcc, 0x080153b7503b179d },
+        { 0x2746dd4b15350d61, -0x2fc03437116ade49, -0x1791c9a5ec798d36, 0x510e987f7e7d89e2 },
+        { -0x2259626cf5c12c1d, 0x3d386ef1cd60a722, -0x37e852a74255b11a, 0x23be8d554fe7372a }
+    },
+},
+{
+    {
+        { -0x43e10b42a9851857, 0x3f624cb2d64498bd, -0x1bef9b2dd3e0b138, 0x2ef9c5a5ba384001 },
+        { -0x6a016e658b10b053, 0x3a827becf6a308a2, -0x69b1fe2cf65b84ff, 0x71c43c4f5ba3c797 },
+        { -0x4902920905618b33, -0x0e7d87431b50d986, -0x7daa4c2f0e1066f2, 0x5a758ca390c5f293 }
+    },
+    {
+        { -0x731f6e74e29e236c, -0x7212c9b9657ecf9a, -0x2b1957d65017552d, 0x0a738027f639d43f },
+        { -0x5d48d8ef26b9db6b, 0x3aa8c6d2d57d5003, -0x1c2bff405f4b7836, 0x2dbae244b3eb72ec },
+        { -0x67f0b5d0a8001e34, 0x00670d0de1839843, 0x105c3f4a49fb15fd, 0x2698ca635126a69c }
+    },
+    {
+        { 0x2e3d702f5e3dd90e, -0x61c0f6e71b2dac7a, 0x5e773ef6024da96a, 0x3c004b0c4afa3332 },
+        { -0x189ace77cd4f4588, 0x381831f7925cff8b, 0x08a81b91a0291fcc, 0x1fb43dcc49caeb07 },
+        { -0x6556b953f90b47d5, 0x1ca284a5a806c4f3, 0x3ed3265fc6cd4787, 0x6b43fd01cd1fd217 }
+    },
+    {
+        { -0x4a38bda7c189f10d, 0x75dc52b9ee0ab990, -0x40ebd83df8d46dc1, 0x73420b2d6ff0d9f0 },
+        { -0x3858a2b4b9683abc, 0x15fdf848df0fffbf, 0x2868b9ebaa46785a, 0x5a68d7105b52f714 },
+        { -0x50d30934617ae1fa, -0x70a6c6ec39ddc73c, -0x2575476966040c8d, 0x3db5632fea34bc9e }
+    },
+    {
+        { 0x2e4990b1829825d5, -0x12151478c165766f, -0x110fc2c6b38fb508, 0x59197ea495df2b0e },
+        { -0x0b9111d408a22628, 0x0d17b1f6396759a5, 0x1bf2d131499e7273, 0x04321adf49d75f13 },
+        { 0x04e16019e4e55aae, -0x1884bc8581d06d17, -0x3831d23e90ea655c, 0x45eafdc1f4d70cc0 }
+    },
+    {
+        { -0x49f1b9db30334e13, 0x59dbc292bd5c0395, 0x31a09d1ddc0481c9, 0x3f73ceea5d56d940 },
+        { 0x698401858045d72b, 0x4c22faa2cf2f0651, -0x6be5c99a94ddd23a, 0x5a5eebc80362dade },
+        { -0x4858402ef5b1723a, -0x41a8ff81bb364cc7, 0x60c1207f1557aefa, 0x26058891266218db }
+    },
+    {
+        { 0x4c818e3cc676e542, 0x5e422c9303ceccad, -0x13f833354bed60f8, 0x0dedfa10b24443b8 },
+        { 0x59f704a68360ff04, -0x3c26c021899e190c, -0x7ce4d58ced78caaf, 0x54ad0c2e4e615d57 },
+        { -0x11c4982a47d4add6, 0x36f163469fa5c1eb, -0x5a4b2d0d913e602d, 0x62ecb2baa77a9408 }
+    },
+    {
+        { -0x6df8d7c95049d78c, 0x5fcd5e8579e104a5, 0x5aad01adc630a14a, 0x61913d5075663f98 },
+        { -0x1a1286ad9eead4c3, 0x4962357d0eddd7d1, 0x7482c8d0b96b4c71, 0x2e59f919a966d8be },
+        { 0x0dc62d361a3231da, -0x05b8a7cd6bdffd90, 0x02d801513f9594ce, 0x3ddbc2a131c05d5c }
+    },
+},
+{
+    {
+        { -0x048ca53dffb5ca2f, 0x31de0f433a6607c3, 0x7b8591bfc528d599, 0x55be9a25f5bb050c },
+        { 0x3f50a50a4ffb81ef, -0x4e1fcaf6c40bdf41, -0x645571e33955d330, 0x32239861fa237a40 },
         { 0x0d005acd33db3dbf, 0x0111b37c80ac35e2, 0x4892d66c6f88ebeb, 0x770eadb16508fbcd }
     },
     {
-        { 0xf1d3b681a05071b9, 0x2207659a3592ff3a, 0x5f0169297881e40e, 0x16bedd0e86ba374e },
-        { 0x8451f9e05e4e89dd, 0xc06302ffbc793937, 0x5d22749556a6495c, 0x09a6755ca05603fb },
+        { -0x0e2c497e5faf8e47, 0x2207659a3592ff3a, 0x5f0169297881e40e, 0x16bedd0e86ba374e },
+        { -0x7bae061fa1b17623, -0x3f9cfd004386c6c9, 0x5d22749556a6495c, 0x09a6755ca05603fb },
         { 0x5ecccc4f2c2737b5, 0x43b79e0c2dccb703, 0x33e008bc4ec43df3, 0x06c1b840f07566c0 }
     },
     {
-        { 0x69ee9e7f9b02805c, 0xcbff828a547d1640, 0x3d93a869b2430968, 0x46b7b8cd3fe26972 },
+        { 0x69ee9e7f9b02805c, -0x34007d75ab82e9c0, 0x3d93a869b2430968, 0x46b7b8cd3fe26972 },
         { 0x7688a5c6a388f877, 0x02a96c14deb2b6ac, 0x64c9f3431b8c2af8, 0x3628435554a1eed6 },
-        { 0xe9812086fe7eebe0, 0x4cba6be72f515437, 0x1d04168b516efae9, 0x5ea1391043982cb9 }
-    },
-    {
-        { 0x6f2b3be4d5d3b002, 0xafec33d96a09c880, 0x035f73a4a8bcc4cc, 0x22c5b9284662198b },
+        { -0x167edf7901811420, 0x4cba6be72f515437, 0x1d04168b516efae9, 0x5ea1391043982cb9 }
+    },
+    {
+        { 0x6f2b3be4d5d3b002, -0x5013cc2695f63780, 0x035f73a4a8bcc4cc, 0x22c5b9284662198b },
         { 0x49125c9cf4702ee1, 0x4520b71f8b25b32d, 0x33193026501fef7e, 0x656d8997c8d2eb2b },
-        { 0xcb58c8fe433d8939, 0x89a0cb2e6a8d7e50, 0x79ca955309fbbe5a, 0x0c626616cd7fc106 }
-    },
-    {
-        { 0x8fdfc379fbf454b1, 0x45a5a970f1a4b771, 0xac921ef7bad35915, 0x42d088dca81c2192 },
+        { -0x34a73701bcc276c7, -0x765f34d1957281b0, 0x79ca955309fbbe5a, 0x0c626616cd7fc106 }
+    },
+    {
+        { -0x70203c86040bab4f, 0x45a5a970f1a4b771, -0x536de108452ca6eb, 0x42d088dca81c2192 },
         { 0x1ffeb80a4879b61f, 0x6396726e4ada21ed, 0x33c7b093368025ba, 0x471aa0c6f3c31788 },
-        { 0x8fda0f37a0165199, 0x0adadb77c8a0e343, 0x20fbfdfcc875e820, 0x1cf2bea80c2206e7 }
-    },
-    {
-        { 0x982d6e1a02c0412f, 0x90fa4c83db58e8fe, 0x01c2f5bcdcb18bc0, 0x686e0c90216abc66 },
-        { 0xc2ddf1deb36202ac, 0x92a5fe09d2e27aa5, 0x7d1648f6fc09f1d3, 0x74c2cc0513bc4959 },
-        { 0x1fadbadba54395a7, 0xb41a02a0ae0da66a, 0xbf19f598bba37c07, 0x6a12b8acde48430d }
-    },
-    {
-        { 0x793bdd801aaeeb5f, 0x00a2a0aac1518871, 0xe8a373a31f2136b4, 0x48aab888fc91ef19 },
-        { 0xf8daea1f39d495d9, 0x592c190e525f1dfc, 0xdb8cbd04c9991d1b, 0x11f7fda3d88f0cb7 },
-        { 0x041f7e925830f40e, 0x002d6ca979661c06, 0x86dc9ff92b046a2e, 0x760360928b0493d1 }
-    },
-    {
-        { 0xb43108e5695a0b05, 0x6cb00ee8ad37a38b, 0x5edad6eea3537381, 0x3f2602d4b6dc3224 },
-        { 0x21bb41c6120cf9c6, 0xeab2aa12decda59b, 0xc1a72d020aa48b34, 0x215d4d27e87d3b68 },
-        { 0xc8b247b65bcaf19c, 0x49779dc3b1b2c652, 0x89a180bbd5ece2e2, 0x13f098a3cec8e039 }
-    },
-},
-{
-    {
-        { 0xf3aa57a22796bb14, 0x883abab79b07da21, 0xe54be21831a0391c, 0x5ee7fb38d83205f9 },
-        { 0x9adc0ff9ce5ec54b, 0x039c2a6b8c2f130d, 0x028007c7f0f89515, 0x78968314ac04b36b },
-        { 0x538dfdcb41446a8e, 0xa5acfda9434937f9, 0x46af908d263c8c78, 0x61d0633c9bca0d09 }
-    },
-    {
-        { 0xada328bcf8fc73df, 0xee84695da6f037fc, 0x637fb4db38c2a909, 0x5b23ac2df8067bdc },
-        { 0x63744935ffdb2566, 0xc5bd6b89780b68bb, 0x6f1b3280553eec03, 0x6e965fd847aed7f5 },
-        { 0x9ad2b953ee80527b, 0xe88f19aafade6d8d, 0x0e711704150e82cf, 0x79b9bbb9dd95dedc }
-    },
-    {
-        { 0xd1997dae8e9f7374, 0xa032a2f8cfbb0816, 0xcd6cba126d445f0a, 0x1ba811460accb834 },
-        { 0xebb355406a3126c2, 0xd26383a868c8c393, 0x6c0c6429e5b97a82, 0x5065f158c9fd2147 },
-        { 0x708169fb0c429954, 0xe14600acd76ecf67, 0x2eaab98a70e645ba, 0x3981f39e58a4faf2 }
-    },
-    {
-        { 0xc845dfa56de66fde, 0xe152a5002c40483a, 0xe9d2e163c7b4f632, 0x30f4452edcbc1b65 },
+        { -0x7025f0c85fe9ae67, 0x0adadb77c8a0e343, 0x20fbfdfcc875e820, 0x1cf2bea80c2206e7 }
+    },
+    {
+        { -0x67d291e5fd3fbed1, -0x6f05b37c24a71702, 0x01c2f5bcdcb18bc0, 0x686e0c90216abc66 },
+        { -0x3d220e214c9dfd54, -0x6d5a01f62d1d855b, 0x7d1648f6fc09f1d3, 0x74c2cc0513bc4959 },
+        { 0x1fadbadba54395a7, -0x4be5fd5f51f25996, -0x40e60a67445c83f9, 0x6a12b8acde48430d }
+    },
+    {
+        { 0x793bdd801aaeeb5f, 0x00a2a0aac1518871, -0x175c8c5ce0dec94c, 0x48aab888fc91ef19 },
+        { -0x072515e0c62b6a27, 0x592c190e525f1dfc, -0x247342fb3666e2e5, 0x11f7fda3d88f0cb7 },
+        { 0x041f7e925830f40e, 0x002d6ca979661c06, -0x79236006d4fb95d2, 0x760360928b0493d1 }
+    },
+    {
+        { -0x4bcef71a96a5f4fb, 0x6cb00ee8ad37a38b, 0x5edad6eea3537381, 0x3f2602d4b6dc3224 },
+        { 0x21bb41c6120cf9c6, -0x154d55ed21325a65, -0x3e58d2fdf55b74cc, 0x215d4d27e87d3b68 },
+        { -0x374db849a4350e64, 0x49779dc3b1b2c652, -0x765e7f442a131d1e, 0x13f098a3cec8e039 }
+    },
+},
+{
+    {
+        { -0x0c55a85dd86944ec, -0x77c5454864f825df, -0x1ab41de7ce5fc6e4, 0x5ee7fb38d83205f9 },
+        { -0x6523f00631a13ab5, 0x039c2a6b8c2f130d, 0x028007c7f0f89515, 0x78968314ac04b36b },
+        { 0x538dfdcb41446a8e, -0x5a530256bcb6c807, 0x46af908d263c8c78, 0x61d0633c9bca0d09 }
+    },
+    {
+        { -0x525cd74307038c21, -0x117b96a2590fc804, 0x637fb4db38c2a909, 0x5b23ac2df8067bdc },
+        { 0x63744935ffdb2566, -0x3a42947687f49745, 0x6f1b3280553eec03, 0x6e965fd847aed7f5 },
+        { -0x652d46ac117fad85, -0x1770e65505219273, 0x0e711704150e82cf, 0x79b9bbb9dd95dedc }
+    },
+    {
+        { -0x2e66825171608c8c, -0x5fcd5d073044f7ea, -0x329345ed92bba0f6, 0x1ba811460accb834 },
+        { -0x144caabf95ced93e, -0x2d9c7c5797373c6d, 0x6c0c6429e5b97a82, 0x5065f158c9fd2147 },
+        { 0x708169fb0c429954, -0x1eb9ff5328913099, 0x2eaab98a70e645ba, 0x3981f39e58a4faf2 }
+    },
+    {
+        { -0x37ba205a92199022, -0x1ead5affd3bfb7c6, -0x162d1e9c384b09ce, 0x30f4452edcbc1b65 },
         { 0x18fb8a7559230a93, 0x1d168f6960e6f45d, 0x3a85a94514a93cb5, 0x38dc083705acd0fd },
-        { 0x856d2782c5759740, 0xfa134569f99cbecc, 0x8844fc73c0ea4e71, 0x632d9a1a593f2469 }
-    },
-    {
-        { 0xbf09fd11ed0c84a7, 0x63f071810d9f693a, 0x21908c2d57cf8779, 0x3a5a7df28af64ba2 },
-        { 0xf6bb6b15b807cba6, 0x1823c7dfbc54f0d7, 0xbb1d97036e29670b, 0x0b24f48847ed4a57 },
-        { 0xdcdad4be511beac7, 0xa4538075ed26ccf2, 0xe19cff9f005f9a65, 0x34fcf74475481f63 }
-    },
-    {
-        { 0xa5bb1dab78cfaa98, 0x5ceda267190b72f2, 0x9309c9110a92608e, 0x0119a3042fb374b0 },
-        { 0xc197e04c789767ca, 0xb8714dcb38d9467d, 0x55de888283f95fa8, 0x3d3bdc164dfa63f7 },
-        { 0x67a2d89ce8c2177d, 0x669da5f66895d0c1, 0xf56598e5b282a2b0, 0x56c088f1ede20a73 }
-    },
-    {
-        { 0x581b5fac24f38f02, 0xa90be9febae30cbd, 0x9a2169028acf92f0, 0x038b7ea48359038f },
-        { 0x336d3d1110a86e17, 0xd7f388320b75b2fa, 0xf915337625072988, 0x09674c6b99108b87 },
-        { 0x9f4ef82199316ff8, 0x2f49d282eaa78d4f, 0x0971a5ab5aef3174, 0x6e5e31025969eb65 }
-    },
-    {
-        { 0x3304fb0e63066222, 0xfb35068987acba3f, 0xbd1924778c1061a3, 0x3058ad43d1838620 },
-        { 0xb16c62f587e593fb, 0x4999eddeca5d3e71, 0xb491c1e014cc3e6d, 0x08f5114789a8dba8 },
-        { 0x323c0ffde57663d0, 0x05c3df38a22ea610, 0xbdc78abdac994f9a, 0x26549fa4efe3dc99 }
-    },
-},
-{
-    {
-        { 0x04dbbc17f75396b9, 0x69e6a2d7d2f86746, 0xc6409d99f53eabc6, 0x606175f6332e25d2 },
-        { 0x738b38d787ce8f89, 0xb62658e24179a88d, 0x30738c9cf151316d, 0x49128c7f727275c9 },
+        { -0x7a92d87d3a8a68c0, -0x05ecba9606634134, -0x77bb038c3f15b18f, 0x632d9a1a593f2469 }
+    },
+    {
+        { -0x40f602ee12f37b59, 0x63f071810d9f693a, 0x21908c2d57cf8779, 0x3a5a7df28af64ba2 },
+        { -0x094494ea47f8345a, 0x1823c7dfbc54f0d7, -0x44e268fc91d698f5, 0x0b24f48847ed4a57 },
+        { -0x23252b41aee41539, -0x5bac7f8a12d9330e, -0x1e630060ffa0659b, 0x34fcf74475481f63 }
+    },
+    {
+        { -0x5a44e25487305568, 0x5ceda267190b72f2, -0x6cf636eef56d9f72, 0x0119a3042fb374b0 },
+        { -0x3e681fb387689836, -0x478eb234c726b983, 0x55de888283f95fa8, 0x3d3bdc164dfa63f7 },
+        { 0x67a2d89ce8c2177d, 0x669da5f66895d0c1, -0x0a9a671a4d7d5d50, 0x56c088f1ede20a73 }
+    },
+    {
+        { 0x581b5fac24f38f02, -0x56f41601451cf343, -0x65de96fd75306d10, 0x038b7ea48359038f },
+        { 0x336d3d1110a86e17, -0x280c77cdf48a4d06, -0x06eacc89daf8d678, 0x09674c6b99108b87 },
+        { -0x60b107de66ce9008, 0x2f49d282eaa78d4f, 0x0971a5ab5aef3174, 0x6e5e31025969eb65 }
+    },
+    {
+        { 0x3304fb0e63066222, -0x04caf976785345c1, -0x42e6db8873ef9e5d, 0x3058ad43d1838620 },
+        { -0x4e939d0a781a6c05, 0x4999eddeca5d3e71, -0x4b6e3e1feb33c193, 0x08f5114789a8dba8 },
+        { 0x323c0ffde57663d0, 0x05c3df38a22ea610, -0x423875425366b066, 0x26549fa4efe3dc99 }
+    },
+},
+{
+    {
+        { 0x04dbbc17f75396b9, 0x69e6a2d7d2f86746, -0x39bf62660ac1543a, 0x606175f6332e25d2 },
+        { 0x738b38d787ce8f89, -0x49d9a71dbe865773, 0x30738c9cf151316d, 0x49128c7f727275c9 },
         { 0x4021370ef540e7dd, 0x0910d6f5a1f1d0a5, 0x4634aacd5b06b807, 0x6a39e6356944f235 }
     },
     {
-        { 0x1da1965774049e9d, 0xfbcd6ea198fe352b, 0xb1cbcd50cc5236a6, 0x1f5ec83d3f9846e2 },
-        { 0x96cd5640df90f3e7, 0x6c3a760edbfa25ea, 0x24f3ef0959e33cc4, 0x42889e7e530d2e58 },
-        { 0x8efb23c3328ccb75, 0xaf42a207dd876ee9, 0x20fbdadc5dfae796, 0x241e246b06bf9f51 }
+        { 0x1da1965774049e9d, -0x0432915e6701cad5, -0x4e3432af33adc95a, 0x1f5ec83d3f9846e2 },
+        { -0x6932a9bf206f0c19, 0x6c3a760edbfa25ea, 0x24f3ef0959e33cc4, 0x42889e7e530d2e58 },
+        { -0x7104dc3ccd73348b, -0x50bd5df822789117, 0x20fbdadc5dfae796, 0x241e246b06bf9f51 }
     },
     {
         { 0x7eaafc9a6280bbb8, 0x22a70f12f403d809, 0x31ce40bb1bfc8d20, 0x2bc65635e8bd53ee },
         { 0x29e68e57ad6e98f6, 0x4c9260c80b462065, 0x3f00862ea51ebb4b, 0x5bc2c77fb38d9097 },
-        { 0xe8d5dc9fa96bad93, 0xe58fb17dde1947dc, 0x681532ea65185fa3, 0x1fdd6c3b034a7830 }
-    },
-    {
-        { 0x9c13a6a52dd8f7a9, 0x2dbb1f8c3efdcabf, 0x961e32405e08f7b5, 0x48c8a121bbe6c9e5 },
-        { 0x0a64e28c55dc18fe, 0xe3df9e993399ebdd, 0x79ac432370e2e652, 0x35ff7fc33ae4cc0e },
-        { 0xfc415a7c59646445, 0xd224b2d7c128b615, 0x6035c9c905fbb912, 0x42d7a91274429fab }
-    },
-    {
-        { 0xa9a48947933da5bc, 0x4a58920ec2e979ec, 0x96d8800013e5ac4c, 0x453692d74b48b147 },
-        { 0x4e6213e3eaf72ed3, 0x6794981a43acd4e7, 0xff547cde6eb508cb, 0x6fed19dd10fcb532 },
-        { 0xdd775d99a8559c6f, 0xf42a2140df003e24, 0x5223e229da928a66, 0x063f46ba6d38f22c }
-    },
-    {
-        { 0x39843cb737346921, 0xa747fb0738c89447, 0xcb8d8031a245307e, 0x67810f8e6d82f068 },
-        { 0xd2d242895f536694, 0xca33a2c542939b2c, 0x986fada6c7ddb95c, 0x5a152c042f712d5d },
+        { -0x172a23605694526d, -0x1a704e8221e6b824, 0x681532ea65185fa3, 0x1fdd6c3b034a7830 }
+    },
+    {
+        { -0x63ec595ad2270857, 0x2dbb1f8c3efdcabf, -0x69e1cdbfa1f7084b, 0x48c8a121bbe6c9e5 },
+        { 0x0a64e28c55dc18fe, -0x1c206166cc661423, 0x79ac432370e2e652, 0x35ff7fc33ae4cc0e },
+        { -0x03bea583a69b9bbb, -0x2ddb4d283ed749eb, 0x6035c9c905fbb912, 0x42d7a91274429fab }
+    },
+    {
+        { -0x565b76b86cc25a44, 0x4a58920ec2e979ec, -0x69277fffec1a53b4, 0x453692d74b48b147 },
+        { 0x4e6213e3eaf72ed3, 0x6794981a43acd4e7, -0x00ab8321914af735, 0x6fed19dd10fcb532 },
+        { -0x2288a26657aa6391, -0x0bd5debf20ffc1dc, 0x5223e229da928a66, 0x063f46ba6d38f22c }
+    },
+    {
+        { 0x39843cb737346921, -0x58b804f8c7376bb9, -0x34727fce5dbacf82, 0x67810f8e6d82f068 },
+        { -0x2d2dbd76a0ac996c, -0x35cc5d3abd6c64d4, -0x67905259382246a4, 0x5a152c042f712d5d },
         { 0x3eeb8fbcd2287db4, 0x72c7d3a301a03e93, 0x5473e88cbd98265a, 0x7324aa515921b403 }
     },
     {
-        { 0xad23f6dae82354cb, 0x6962502ab6571a6d, 0x9b651636e38e37d1, 0x5cac5005d1a3312f },
-        { 0x857942f46c3cbe8e, 0xa1d364b14730c046, 0x1c8ed914d23c41bf, 0x0838e161eef6d5d2 },
-        { 0x8cc154cce9e39904, 0x5b3a040b84de6846, 0xc4d8a61cb1be5d6e, 0x40fb897bd8861f02 }
-    },
-    {
-        { 0xe57ed8475ab10761, 0x71435e206fd13746, 0x342f824ecd025632, 0x4b16281ea8791e7b },
-        { 0x84c5aa9062de37a1, 0x421da5000d1d96e1, 0x788286306a9242d9, 0x3c5e464a690d10da },
-        { 0xd1c101d50b813381, 0xdee60f1176ee6828, 0x0cb68893383f6409, 0x6183c565f6ff484a }
-    },
-},
-{
-    {
-        { 0xdb468549af3f666e, 0xd77fcf04f14a0ea5, 0x3df23ff7a4ba0c47, 0x3a10dfe132ce3c85 },
-        { 0x741d5a461e6bf9d6, 0x2305b3fc7777a581, 0xd45574a26474d3d9, 0x1926e1dc6401e0ff },
-        { 0xe07f4e8aea17cea0, 0x2fd515463a1fc1fd, 0x175322fd31f2c0f1, 0x1fa1d01d861e5d15 }
+        { -0x52dc092517dcab35, 0x6962502ab6571a6d, -0x649ae9c91c71c82f, 0x5cac5005d1a3312f },
+        { -0x7a86bd0b93c34172, -0x5e2c9b4eb8cf3fba, 0x1c8ed914d23c41bf, 0x0838e161eef6d5d2 },
+        { -0x733eab33161c66fc, 0x5b3a040b84de6846, -0x3b2759e34e41a292, 0x40fb897bd8861f02 }
+    },
+    {
+        { -0x1a8127b8a54ef89f, 0x71435e206fd13746, 0x342f824ecd025632, 0x4b16281ea8791e7b },
+        { -0x7b3a556f9d21c85f, 0x421da5000d1d96e1, 0x788286306a9242d9, 0x3c5e464a690d10da },
+        { -0x2e3efe2af47ecc7f, -0x2119f0ee891197d8, 0x0cb68893383f6409, 0x6183c565f6ff484a }
+    },
+},
+{
+    {
+        { -0x24b97ab650c09992, -0x288030fb0eb5f15b, 0x3df23ff7a4ba0c47, 0x3a10dfe132ce3c85 },
+        { 0x741d5a461e6bf9d6, 0x2305b3fc7777a581, -0x2baa8b5d9b8b2c27, 0x1926e1dc6401e0ff },
+        { -0x1f80b17515e83160, 0x2fd515463a1fc1fd, 0x175322fd31f2c0f1, 0x1fa1d01d861e5d15 }
     },
     {
         { 0x38dcac00d1df94ab, 0x2e712bddd1080de9, 0x7f13e93efdd5e262, 0x73fced18ee9a01e5 },
-        { 0xcc8055947d599832, 0x1e4656da37f15520, 0x99f6f7744e059320, 0x773563bc6a75cf33 },
-        { 0x06b1e90863139cb3, 0xa493da67c5a03ecd, 0x8d77cec8ad638932, 0x1f426b701b864f44 }
-    },
-    {
-        { 0xf17e35c891a12552, 0xb76b8153575e9c76, 0xfa83406f0d9b723e, 0x0b76bb1b3fa7e438 },
-        { 0xefc9264c41911c01, 0xf1a3b7b817a22c25, 0x5875da6bf30f1447, 0x4e1af5271d31b090 },
-        { 0x08b8c1f97f92939b, 0xbe6771cbd444ab6e, 0x22e5646399bb8017, 0x7b6dd61eb772a955 }
-    },
-    {
-        { 0x5730abf9ab01d2c7, 0x16fb76dc40143b18, 0x866cbe65a0cbb281, 0x53fa9b659bff6afe },
-        { 0xb7adc1e850f33d92, 0x7998fa4f608cd5cf, 0xad962dbd8dfc5bdb, 0x703e9bceaf1d2f4f },
-        { 0x6c14c8e994885455, 0x843a5d6665aed4e5, 0x181bb73ebcd65af1, 0x398d93e5c4c61f50 }
-    },
-    {
-        { 0xc3877c60d2e7e3f2, 0x3b34aaa030828bb1, 0x283e26e7739ef138, 0x699c9c9002c30577 },
-        { 0x1c4bd16733e248f3, 0xbd9e128715bf0a5f, 0xd43f8cf0a10b0376, 0x53b09b5ddf191b13 },
-        { 0xf306a7235946f1cc, 0x921718b5cce5d97d, 0x28cdd24781b4e975, 0x51caf30c6fcdd907 }
-    },
-    {
-        { 0x737af99a18ac54c7, 0x903378dcc51cb30f, 0x2b89bc334ce10cc7, 0x12ae29c189f8e99a },
-        { 0xa60ba7427674e00a, 0x630e8570a17a7bf3, 0x3758563dcf3324cc, 0x5504aa292383fdaa },
-        { 0xa99ec0cb1f0d01cf, 0x0dd1efcc3a34f7ae, 0x55ca7521d09c4e22, 0x5fd14fe958eba5ea }
-    },
-    {
-        { 0x3c42fe5ebf93cb8e, 0xbedfa85136d4565f, 0xe0f0859e884220e8, 0x7dd73f960725d128 },
-        { 0xb5dc2ddf2845ab2c, 0x069491b10a7fe993, 0x4daaf3d64002e346, 0x093ff26e586474d1 },
-        { 0xb10d24fe68059829, 0x75730672dbaf23e5, 0x1367253ab457ac29, 0x2f59bcbc86b470a4 }
-    },
-    {
-        { 0x7041d560b691c301, 0x85201b3fadd7e71e, 0x16c2e16311335585, 0x2aa55e3d010828b1 },
-        { 0x83847d429917135f, 0xad1b911f567d03d7, 0x7e7748d9be77aad1, 0x5458b42e2e51af4a },
-        { 0xed5192e60c07444f, 0x42c54e2d74421d10, 0x352b4c82fdb5c864, 0x13e9004a8a768664 }
-    },
-},
-{
-    {
-        { 0x1e6284c5806b467c, 0xc5f6997be75d607b, 0x8b67d958b378d262, 0x3d88d66a81cd8b70 },
-        { 0xcbb5b5556c032bff, 0xdf7191b729297a3a, 0xc1ff7326aded81bb, 0x71ade8bb68be03f5 },
-        { 0x8b767a93204ed789, 0x762fcacb9fa0ae2a, 0x771febcc6dce4887, 0x343062158ff05fb3 }
-    },
-    {
-        { 0xfce219072a7b31b4, 0x4d7adc75aa578016, 0x0ec276a687479324, 0x6d6d9d5d1fda4beb },
-        { 0xe05da1a7e1f5bf49, 0x26457d6dd4736092, 0x77dcb07773cc32f6, 0x0a5d94969cdd5fcd },
-        { 0x22b1a58ae9b08183, 0xfd95d071c15c388b, 0xa9812376850a0517, 0x33384cbabb7f335e }
-    },
-    {
-        { 0x33bc627a26218b8d, 0xea80b21fc7a80c61, 0x9458b12b173e9ee6, 0x076247be0e2f3059 },
-        { 0x3c6fa2680ca2c7b5, 0x1b5082046fb64fda, 0xeb53349c5431d6de, 0x5278b38f6b879c89 },
-        { 0x52e105f61416375a, 0xec97af3685abeba4, 0x26e6b50623a67c36, 0x5cf0e856f3d4fb01 }
-    },
-    {
-        { 0xbeaece313db342a8, 0xcba3635b842db7ee, 0xe88c6620817f13ef, 0x1b9438aa4e76d5c6 },
-        { 0xf6c968731ae8cab4, 0x5e20741ecb4f92c5, 0x2da53be58ccdbc3e, 0x2dddfea269970df7 },
-        { 0x8a50777e166f031a, 0x067b39f10fb7a328, 0x1925c9a6010fbd76, 0x6df9b575cc740905 }
-    },
-    {
-        { 0xecdfc35b48cade41, 0x6a88471fb2328270, 0x740a4a2440a01b6a, 0x471e5796003b5f29 },
-        { 0x42c1192927f6bdcf, 0x8f91917a403d61ca, 0xdc1c5a668b9e1f61, 0x1596047804ec0f8d },
-        { 0xda96bbb3aced37ac, 0x7a2423b5e9208cea, 0x24cc5c3038aebae2, 0x50c356afdc5dae2f }
-    },
-    {
-        { 0xcfed9cdf1b31b964, 0xf486a9858ca51af3, 0x14897265ea8c1f84, 0x784a53dd932acc00 },
-        { 0x09dcbf4341c30318, 0xeeba061183181dce, 0xc179c0cedc1e29a1, 0x1dbf7b89073f35b0 },
-        { 0x2d99f9df14fc4920, 0x76ccb60cc4499fe5, 0xa4132cbbe5cf0003, 0x3f93d82354f000ea }
-    },
-    {
-        { 0xeaac12d179e14978, 0xff923ff3bbebff5e, 0x4af663e40663ce27, 0x0fd381a811a5f5ff },
-        { 0x8183e7689e04ce85, 0x678fb71e04465341, 0xad92058f6688edac, 0x5da350d3532b099a },
-        { 0xf256aceca436df54, 0x108b6168ae69d6e8, 0x20d986cb6b5d036c, 0x655957b9fee2af50 }
-    },
-    {
-        { 0xbdc1409bd002d0ac, 0x66660245b5ccd9a6, 0x82317dc4fade85ec, 0x02fe934b6ad7df0d },
-        { 0xaea8b07fa902030f, 0xf88c766af463d143, 0x15b083663c787a60, 0x08eab1148267a4a8 },
-        { 0xef5cf100cfb7ea74, 0x22897633a1cb42ac, 0xd4ce0c54cef285e2, 0x30408c048a146a55 }
-    },
-},
-{
-    {
-        { 0xbb2e00c9193b877f, 0xece3a890e0dc506b, 0xecf3b7c036de649f, 0x5f46040898de9e1a },
-        { 0x739d8845832fcedb, 0xfa38d6c9ae6bf863, 0x32bc0dcab74ffef7, 0x73937e8814bce45e },
-        { 0xb9037116297bf48d, 0xa9d13b22d4f06834, 0xe19715574696bdc6, 0x2cf8a4e891d5e835 }
-    },
-    {
-        { 0x2cb5487e17d06ba2, 0x24d2381c3950196b, 0xd7659c8185978a30, 0x7a6f7f2891d6a4f6 },
-        { 0x6d93fd8707110f67, 0xdd4c09d37c38b549, 0x7cb16a4cc2736a86, 0x2049bd6e58252a09 },
-        { 0x7d09fd8d6a9aef49, 0xf0ee60be5b3db90b, 0x4c21b52c519ebfd4, 0x6011aadfc545941d }
-    },
-    {
-        { 0x63ded0c802cbf890, 0xfbd098ca0dff6aaa, 0x624d0afdb9b6ed99, 0x69ce18b779340b1e },
-        { 0x5f67926dcf95f83c, 0x7c7e856171289071, 0xd6a1e7f3998f7a5b, 0x6fc5cc1b0b62f9e0 },
-        { 0xd1ef5528b29879cb, 0xdd1aae3cd47e9092, 0x127e0442189f2352, 0x15596b3ae57101f1 }
+        { -0x337faa6b82a667ce, 0x1e4656da37f15520, -0x6609088bb1fa6ce0, 0x773563bc6a75cf33 },
+        { 0x06b1e90863139cb3, -0x5b6c25983a5fc133, -0x72883137529c76ce, 0x1f426b701b864f44 }
+    },
+    {
+        { -0x0e81ca376e5edaae, -0x48947eaca8a1638a, -0x057cbf90f2648dc2, 0x0b76bb1b3fa7e438 },
+        { -0x1036d9b3be6ee3ff, -0x0e5c4847e85dd3db, 0x5875da6bf30f1447, 0x4e1af5271d31b090 },
+        { 0x08b8c1f97f92939b, -0x41988e342bbb5492, 0x22e5646399bb8017, 0x7b6dd61eb772a955 }
+    },
+    {
+        { 0x5730abf9ab01d2c7, 0x16fb76dc40143b18, -0x7993419a5f344d7f, 0x53fa9b659bff6afe },
+        { -0x48523e17af0cc26e, 0x7998fa4f608cd5cf, -0x5269d2427203a425, 0x703e9bceaf1d2f4f },
+        { 0x6c14c8e994885455, -0x7bc5a2999a512b1b, 0x181bb73ebcd65af1, 0x398d93e5c4c61f50 }
+    },
+    {
+        { -0x3c78839f2d181c0e, 0x3b34aaa030828bb1, 0x283e26e7739ef138, 0x699c9c9002c30577 },
+        { 0x1c4bd16733e248f3, -0x4261ed78ea40f5a1, -0x2bc0730f5ef4fc8a, 0x53b09b5ddf191b13 },
+        { -0x0cf958dca6b90e34, -0x6de8e74a331a2683, 0x28cdd24781b4e975, 0x51caf30c6fcdd907 }
+    },
+    {
+        { 0x737af99a18ac54c7, -0x6fcc87233ae34cf1, 0x2b89bc334ce10cc7, 0x12ae29c189f8e99a },
+        { -0x59f458bd898b1ff6, 0x630e8570a17a7bf3, 0x3758563dcf3324cc, 0x5504aa292383fdaa },
+        { -0x56613f34e0f2fe31, 0x0dd1efcc3a34f7ae, 0x55ca7521d09c4e22, 0x5fd14fe958eba5ea }
+    },
+    {
+        { 0x3c42fe5ebf93cb8e, -0x412057aec92ba9a1, -0x1f0f7a6177bddf18, 0x7dd73f960725d128 },
+        { -0x4a23d220d7ba54d4, 0x069491b10a7fe993, 0x4daaf3d64002e346, 0x093ff26e586474d1 },
+        { -0x4ef2db0197fa67d7, 0x75730672dbaf23e5, 0x1367253ab457ac29, 0x2f59bcbc86b470a4 }
+    },
+    {
+        { 0x7041d560b691c301, -0x7adfe4c0522818e2, 0x16c2e16311335585, 0x2aa55e3d010828b1 },
+        { -0x7c7b82bd66e8eca1, -0x52e46ee0a982fc29, 0x7e7748d9be77aad1, 0x5458b42e2e51af4a },
+        { -0x12ae6d19f3f8bbb1, 0x42c54e2d74421d10, 0x352b4c82fdb5c864, 0x13e9004a8a768664 }
+    },
+},
+{
+    {
+        { 0x1e6284c5806b467c, -0x3a09668418a29f85, -0x749826a74c872d9e, 0x3d88d66a81cd8b70 },
+        { -0x344a4aaa93fcd401, -0x208e6e48d6d685c6, -0x3e008cd952127e45, 0x71ade8bb68be03f5 },
+        { -0x7489856cdfb12877, 0x762fcacb9fa0ae2a, 0x771febcc6dce4887, 0x343062158ff05fb3 }
+    },
+    {
+        { -0x031de6f8d584ce4c, 0x4d7adc75aa578016, 0x0ec276a687479324, 0x6d6d9d5d1fda4beb },
+        { -0x1fa25e581e0a40b7, 0x26457d6dd4736092, 0x77dcb07773cc32f6, 0x0a5d94969cdd5fcd },
+        { 0x22b1a58ae9b08183, -0x026a2f8e3ea3c775, -0x567edc897af5fae9, 0x33384cbabb7f335e }
+    },
+    {
+        { 0x33bc627a26218b8d, -0x157f4de03857f39f, -0x6ba74ed4e8c1611a, 0x076247be0e2f3059 },
+        { 0x3c6fa2680ca2c7b5, 0x1b5082046fb64fda, -0x14accb63abce2922, 0x5278b38f6b879c89 },
+        { 0x52e105f61416375a, -0x136850c97a54145c, 0x26e6b50623a67c36, 0x5cf0e856f3d4fb01 }
+    },
+    {
+        { -0x415131cec24cbd58, -0x345c9ca47bd24812, -0x177399df7e80ec11, 0x1b9438aa4e76d5c6 },
+        { -0x0936978ce517354c, 0x5e20741ecb4f92c5, 0x2da53be58ccdbc3e, 0x2dddfea269970df7 },
+        { -0x75af8881e990fce6, 0x067b39f10fb7a328, 0x1925c9a6010fbd76, 0x6df9b575cc740905 }
+    },
+    {
+        { -0x13203ca4b73521bf, 0x6a88471fb2328270, 0x740a4a2440a01b6a, 0x471e5796003b5f29 },
+        { 0x42c1192927f6bdcf, -0x706e6e85bfc29e36, -0x23e3a5997461e09f, 0x1596047804ec0f8d },
+        { -0x2569444c5312c854, 0x7a2423b5e9208cea, 0x24cc5c3038aebae2, 0x50c356afdc5dae2f }
+    },
+    {
+        { -0x30126320e4ce469c, -0x0b79567a735ae50d, 0x14897265ea8c1f84, 0x784a53dd932acc00 },
+        { 0x09dcbf4341c30318, -0x1145f9ee7ce7e232, -0x3e863f3123e1d65f, 0x1dbf7b89073f35b0 },
+        { 0x2d99f9df14fc4920, 0x76ccb60cc4499fe5, -0x5becd3441a30fffd, 0x3f93d82354f000ea }
+    },
+    {
+        { -0x1553ed2e861eb688, -0x006dc00c441400a2, 0x4af663e40663ce27, 0x0fd381a811a5f5ff },
+        { -0x7e7c189761fb317b, 0x678fb71e04465341, -0x526dfa7099771254, 0x5da350d3532b099a },
+        { -0x0da953135bc920ac, 0x108b6168ae69d6e8, 0x20d986cb6b5d036c, 0x655957b9fee2af50 }
+    },
+    {
+        { -0x423ebf642ffd2f54, 0x66660245b5ccd9a6, -0x7dce823b05217a14, 0x02fe934b6ad7df0d },
+        { -0x51574f8056fdfcf1, -0x077389950b9c2ebd, 0x15b083663c787a60, 0x08eab1148267a4a8 },
+        { -0x10a30eff3048158c, 0x22897633a1cb42ac, -0x2b31f3ab310d7a1e, 0x30408c048a146a55 }
+    },
+},
+{
+    {
+        { -0x44d1ff36e6c47881, -0x131c576f1f23af95, -0x130c483fc9219b61, 0x5f46040898de9e1a },
+        { 0x739d8845832fcedb, -0x05c729365194079d, 0x32bc0dcab74ffef7, 0x73937e8814bce45e },
+        { -0x46fc8ee9d6840b73, -0x562ec4dd2b0f97cc, -0x1e68eaa8b969423a, 0x2cf8a4e891d5e835 }
+    },
+    {
+        { 0x2cb5487e17d06ba2, 0x24d2381c3950196b, -0x289a637e7a6875d0, 0x7a6f7f2891d6a4f6 },
+        { 0x6d93fd8707110f67, -0x22b3f62c83c74ab7, 0x7cb16a4cc2736a86, 0x2049bd6e58252a09 },
+        { 0x7d09fd8d6a9aef49, -0x0f119f41a4c246f5, 0x4c21b52c519ebfd4, 0x6011aadfc545941d }
+    },
+    {
+        { 0x63ded0c802cbf890, -0x042f6735f2009556, 0x624d0afdb9b6ed99, 0x69ce18b779340b1e },
+        { 0x5f67926dcf95f83c, 0x7c7e856171289071, -0x295e180c667085a5, 0x6fc5cc1b0b62f9e0 },
+        { -0x2e10aad74d678635, -0x22e551c32b816f6e, 0x127e0442189f2352, 0x15596b3ae57101f1 }
     },
     {
         { 0x09ff31167e5124ca, 0x0be4158bd9c745df, 0x292b7d227ef556e5, 0x3aa4e241afb6d138 },
-        { 0x462739d23f9179a2, 0xff83123197d6ddcf, 0x1307deb553f2148a, 0x0d2237687b5f4dda },
+        { 0x462739d23f9179a2, -0x007cedce68292231, 0x1307deb553f2148a, 0x0d2237687b5f4dda },
         { 0x2cc138bf2a3305f5, 0x48583f8fa2e926c3, 0x083ab1a25549d2eb, 0x32fcaa6e4687a36c }
     },
     {
-        { 0x3207a4732787ccdf, 0x17e31908f213e3f8, 0xd5b2ecd7f60d964e, 0x746f6336c2600be9 },
-        { 0x7bc56e8dc57d9af5, 0x3e0bd2ed9df0bdf2, 0xaac014de22efe4a3, 0x4627e9cefebd6a5c },
-        { 0x3f4af345ab6c971c, 0xe288eb729943731f, 0x33596a8a0344186d, 0x7b4917007ed66293 }
-    },
-    {
-        { 0x54341b28dd53a2dd, 0xaa17905bdf42fc3f, 0x0ff592d94dd2f8f4, 0x1d03620fe08cd37d },
+        { 0x3207a4732787ccdf, 0x17e31908f213e3f8, -0x2a4d132809f269b2, 0x746f6336c2600be9 },
+        { 0x7bc56e8dc57d9af5, 0x3e0bd2ed9df0bdf2, -0x553feb21dd101b5d, 0x4627e9cefebd6a5c },
+        { 0x3f4af345ab6c971c, -0x1d77148d66bc8ce1, 0x33596a8a0344186d, 0x7b4917007ed66293 }
+    },
+    {
+        { 0x54341b28dd53a2dd, -0x55e86fa420bd03c1, 0x0ff592d94dd2f8f4, 0x1d03620fe08cd37d },
         { 0x2d85fb5cab84b064, 0x497810d289f3bc14, 0x476adc447b15ce0c, 0x122ba376f844fd7b },
-        { 0xc20232cda2b4e554, 0x9ed0fd42115d187f, 0x2eabb4be7dd479d9, 0x02c70bf52b68ec4c }
-    },
-    {
-        { 0xace532bf458d72e1, 0x5be768e07cb73cb5, 0x56cf7d94ee8bbde7, 0x6b0697e3feb43a03 },
-        { 0xa287ec4b5d0b2fbb, 0x415c5790074882ca, 0xe044a61ec1d0815c, 0x26334f0a409ef5e0 },
-        { 0xb6c8f04adf62a3c0, 0x3ef000ef076da45d, 0x9c9cb95849f0d2a9, 0x1cc37f43441b2fae }
-    },
-    {
-        { 0xd76656f1c9ceaeb9, 0x1c5b15f818e5656a, 0x26e72832844c2334, 0x3a346f772f196838 },
-        { 0x508f565a5cc7324f, 0xd061c4c0e506a922, 0xfb18abdb5c45ac19, 0x6c6809c10380314a },
-        { 0xd2d55112e2da6ac8, 0xe9bd0331b1e851ed, 0x960746dd8ec67262, 0x05911b9f6ef7c5d0 }
-    },
-},
-{
-    {
-        { 0x01c18980c5fe9f94, 0xcd656769716fd5c8, 0x816045c3d195a086, 0x6e2b7f3266cc7982 },
-        { 0xe9dcd756b637ff2d, 0xec4c348fc987f0c4, 0xced59285f3fbc7b7, 0x3305354793e1ea87 },
-        { 0xcc802468f7c3568f, 0x9de9ba8219974cb3, 0xabb7229cb5b81360, 0x44e2017a6fbeba62 }
-    },
-    {
-        { 0x87f82cf3b6ca6ecd, 0x580f893e18f4a0c2, 0x058930072604e557, 0x6cab6ac256d19c1d },
-        { 0xc4c2a74354dab774, 0x8e5d4c3c4eaf031a, 0xb76c23d242838f17, 0x749a098f68dce4ea },
-        { 0xdcdfe0a02cc1de60, 0x032665ff51c5575b, 0x2c0c32f1073abeeb, 0x6a882014cd7b8606 }
-    },
-    {
-        { 0xd111d17caf4feb6e, 0x050bba42b33aa4a3, 0x17514c3ceeb46c30, 0x54bedb8b1bc27d75 },
-        { 0xa52a92fea4747fb5, 0xdc12a4491fa5ab89, 0xd82da94bb847a4ce, 0x4d77edce9512cc4e },
-        { 0x77c8e14577e2189c, 0xa3e46f6aff99c445, 0x3144dfc86d335343, 0x3a96559e7c4216a9 }
-    },
-    {
-        { 0x4493896880baaa52, 0x4c98afc4f285940e, 0xef4aa79ba45448b6, 0x5278c510a57aae7f },
-        { 0x12550d37f42ad2ee, 0x8b78e00498a1fbf5, 0x5d53078233894cb2, 0x02c84e4e3e498d0c },
-        { 0xa54dd074294c0b94, 0xf55d46b8df18ffb6, 0xf06fecc58dae8366, 0x588657668190d165 }
-    },
-    {
-        { 0xbf5834f03de25cc3, 0xb887c8aed6815496, 0x5105221a9481e892, 0x6760ed19f7723f93 },
-        { 0xd47712311aef7117, 0x50343101229e92c7, 0x7a95e1849d159b97, 0x2449959b8b5d29c9 },
+        { -0x3dfdcd325d4b1aac, -0x612f02bdeea2e781, 0x2eabb4be7dd479d9, 0x02c70bf52b68ec4c }
+    },
+    {
+        { -0x531acd40ba728d1f, 0x5be768e07cb73cb5, 0x56cf7d94ee8bbde7, 0x6b0697e3feb43a03 },
+        { -0x5d7813b4a2f4d045, 0x415c5790074882ca, -0x1fbb59e13e2f7ea4, 0x26334f0a409ef5e0 },
+        { -0x49370fb5209d5c40, 0x3ef000ef076da45d, -0x636346a7b60f2d57, 0x1cc37f43441b2fae }
+    },
+    {
+        { -0x2899a90e36315147, 0x1c5b15f818e5656a, 0x26e72832844c2334, 0x3a346f772f196838 },
+        { 0x508f565a5cc7324f, -0x2f9e3b3f1af956de, -0x04e75424a3ba53e7, 0x6c6809c10380314a },
+        { -0x2d2aaeed1d259538, -0x1642fcce4e17ae13, -0x69f8b92271398d9e, 0x05911b9f6ef7c5d0 }
+    },
+},
+{
+    {
+        { 0x01c18980c5fe9f94, -0x329a98968e902a38, -0x7e9fba3c2e6a5f7a, 0x6e2b7f3266cc7982 },
+        { -0x162328a949c800d3, -0x13b3cb7036780f3c, -0x312a6d7a0c043849, 0x3305354793e1ea87 },
+        { -0x337fdb97083ca971, -0x6216457de668b34d, -0x5448dd634a47eca0, 0x44e2017a6fbeba62 }
+    },
+    {
+        { -0x7807d30c49359133, 0x580f893e18f4a0c2, 0x058930072604e557, 0x6cab6ac256d19c1d },
+        { -0x3b3d58bcab25488c, -0x71a2b3c3b150fce6, -0x4893dc2dbd7c70e9, 0x749a098f68dce4ea },
+        { -0x23201f5fd33e21a0, 0x032665ff51c5575b, 0x2c0c32f1073abeeb, 0x6a882014cd7b8606 }
+    },
+    {
+        { -0x2eee2e8350b01492, 0x050bba42b33aa4a3, 0x17514c3ceeb46c30, 0x54bedb8b1bc27d75 },
+        { -0x5ad56d015b8b804b, -0x23ed5bb6e05a5477, -0x27d256b447b85b32, 0x4d77edce9512cc4e },
+        { 0x77c8e14577e2189c, -0x5c1b909500663bbb, 0x3144dfc86d335343, 0x3a96559e7c4216a9 }
+    },
+    {
+        { 0x4493896880baaa52, 0x4c98afc4f285940e, -0x10b558645babb74a, 0x5278c510a57aae7f },
+        { 0x12550d37f42ad2ee, -0x74871ffb675e040b, 0x5d53078233894cb2, 0x02c84e4e3e498d0c },
+        { -0x5ab22f8bd6b3f46c, -0x0aa2b94720e7004a, -0x0f90133a72517c9a, 0x588657668190d165 }
+    },
+    {
+        { -0x40a7cb0fc21da33d, -0x47783751297eab6a, 0x5105221a9481e892, 0x6760ed19f7723f93 },
+        { -0x2b88edcee5108ee9, 0x50343101229e92c7, 0x7a95e1849d159b97, 0x2449959b8b5d29c9 },
         { 0x669ba3b7ac35e160, 0x2eccf73fba842056, 0x1aec1f17c0804f07, 0x0d96bc031856f4e7 }
     },
     {
-        { 0xb1d534b0cc7505e1, 0x32cd003416c35288, 0xcb36a5800762c29d, 0x5bfe69b9237a0bf8 },
-        { 0x3318be7775c52d82, 0x4cb764b554d0aab9, 0xabcf3d27cc773d91, 0x3bf4d1848123288a },
-        { 0x183eab7e78a151ab, 0xbbe990c999093763, 0xff717d6e4ac7e335, 0x4c5cddb325f39f88 }
-    },
-    {
-        { 0x57750967e7a9f902, 0x2c37fdfc4f5b467e, 0xb261663a3177ba46, 0x3a375e78dc2d532b },
-        { 0xc0f6b74d6190a6eb, 0x20ea81a42db8f4e4, 0xa8bd6f7d97315760, 0x33b1d60262ac7c21 },
-        { 0x8141e72f2d4dddea, 0xe6eafe9862c607c8, 0x23c28458573cafd0, 0x46b9476f4ff97346 }
+        { -0x4e2acb4f338afa1f, 0x32cd003416c35288, -0x34c95a7ff89d3d63, 0x5bfe69b9237a0bf8 },
+        { 0x3318be7775c52d82, 0x4cb764b554d0aab9, -0x5430c2d83388c26f, 0x3bf4d1848123288a },
+        { 0x183eab7e78a151ab, -0x44166f3666f6c89d, -0x008e8291b5381ccb, 0x4c5cddb325f39f88 }
+    },
+    {
+        { 0x57750967e7a9f902, 0x2c37fdfc4f5b467e, -0x4d9e99c5ce8845ba, 0x3a375e78dc2d532b },
+        { -0x3f0948b29e6f5915, 0x20ea81a42db8f4e4, -0x5742908268cea8a0, 0x33b1d60262ac7c21 },
+        { -0x7ebe18d0d2b22216, -0x191501679d39f838, 0x23c28458573cafd0, 0x46b9476f4ff97346 }
     },
     {
         { 0x1215505c0d58359f, 0x2a2013c7fc28c46b, 0x24a0a1af89ea664e, 0x4400b638a1130e1f },
-        { 0x0c1ffea44f901e5c, 0x2b0b6fb72184b782, 0xe587ff910114db88, 0x37130f364785a142 },
+        { 0x0c1ffea44f901e5c, 0x2b0b6fb72184b782, -0x1a78006efeeb2478, 0x37130f364785a142 },
         { 0x3a01b76496ed19c3, 0x31e00ab0ed327230, 0x520a885783ca15b1, 0x06aab9875accbec7 }
     },
 },
 {
     {
         { 0x5349acf3512eeaef, 0x20c141d31cc1cb49, 0x24180c07a99a688d, 0x555ef9d1c64b2d17 },
-        { 0xc1339983f5df0ebb, 0xc0f3758f512c4cac, 0x2cf1130a0bb398e1, 0x6b3cecf9aa270c62 },
+        { -0x3ecc667c0a20f145, -0x3f0c8a70aed3b354, 0x2cf1130a0bb398e1, 0x6b3cecf9aa270c62 },
         { 0x36a770ba3b73bd08, 0x624aef08a3afbf0c, 0x5737ff98b40946f2, 0x675f4de13381749d }
     },
     {
-        { 0xa12ff6d93bdab31d, 0x0725d80f9d652dfe, 0x019c4ff39abe9487, 0x60f450b882cd3c43 },
-        { 0x0e2c52036b1782fc, 0x64816c816cad83b4, 0xd0dcbdd96964073e, 0x13d99df70164c520 },
+        { -0x5ed00926c4254ce3, 0x0725d80f9d652dfe, 0x019c4ff39abe9487, 0x60f450b882cd3c43 },
+        { 0x0e2c52036b1782fc, 0x64816c816cad83b4, -0x2f234226969bf8c2, 0x13d99df70164c520 },
         { 0x014b5ec321e5c0ca, 0x4fcb69c9d719bfa2, 0x4e5f1c18750023a0, 0x1c06de9e55edac80 }
     },
     {
-        { 0xffd52b40ff6d69aa, 0x34530b18dc4049bb, 0x5e4a5c2fa34d9897, 0x78096f8e7d32ba2d },
-        { 0x990f7ad6a33ec4e2, 0x6608f938be2ee08e, 0x9ca143c563284515, 0x4cf38a1fec2db60d },
-        { 0xa0aaaa650dfa5ce7, 0xf9c49e2a48b5478c, 0x4f09cc7d7003725b, 0x373cad3a26091abe }
-    },
-    {
-        { 0xf1bea8fb89ddbbad, 0x3bcb2cbc61aeaecb, 0x8f58a7bb1f9b8d9d, 0x21547eda5112a686 },
-        { 0xb294634d82c9f57c, 0x1fcbfde124934536, 0x9e9c4db3418cdb5a, 0x0040f3d9454419fc },
-        { 0xdefde939fd5986d3, 0xf4272c89510a380c, 0xb72ba407bb3119b9, 0x63550a334a254df4 }
-    },
-    {
-        { 0x9bba584572547b49, 0xf305c6fae2c408e0, 0x60e8fa69c734f18d, 0x39a92bafaa7d767a },
-        { 0x6507d6edb569cf37, 0x178429b00ca52ee1, 0xea7c0090eb6bd65d, 0x3eea62c7daf78f51 },
-        { 0x9d24c713e693274e, 0x5f63857768dbd375, 0x70525560eb8ab39a, 0x68436a0665c9c4cd }
-    },
-    {
-        { 0x1e56d317e820107c, 0xc5266844840ae965, 0xc1e0a1c6320ffc7a, 0x5373669c91611472 },
-        { 0xbc0235e8202f3f27, 0xc75c00e264f975b0, 0x91a4e9d5a38c2416, 0x17b6e7f68ab789f9 },
-        { 0x5d2814ab9a0e5257, 0x908f2084c9cab3fc, 0xafcaf5885b2d1eca, 0x1cb4b5a678f87d11 }
-    },
-    {
-        { 0x6b74aa62a2a007e7, 0xf311e0b0f071c7b1, 0x5707e438000be223, 0x2dc0fd2d82ef6eac },
-        { 0xb664c06b394afc6c, 0x0c88de2498da5fb1, 0x4f8d03164bcad834, 0x330bca78de7434a2 },
-        { 0x982eff841119744e, 0xf9695e962b074724, 0xc58ac14fbfc953fb, 0x3c31be1b369f1cf5 }
-    },
-    {
-        { 0xc168bc93f9cb4272, 0xaeb8711fc7cedb98, 0x7f0e52aa34ac8d7a, 0x41cec1097e7d55bb },
-        { 0xb0f4864d08948aee, 0x07dc19ee91ba1c6f, 0x7975cdaea6aca158, 0x330b61134262d4bb },
-        { 0xf79619d7a26d808a, 0xbb1fd49e1d9e156d, 0x73d7c36cdba1df27, 0x26b44cd91f28777d }
-    },
-},
-{
-    {
-        { 0xaf44842db0285f37, 0x8753189047efc8df, 0x9574e091f820979a, 0x0e378d6069615579 },
+        { -0x002ad4bf00929656, 0x34530b18dc4049bb, 0x5e4a5c2fa34d9897, 0x78096f8e7d32ba2d },
+        { -0x66f085295cc13b1e, 0x6608f938be2ee08e, -0x635ebc3a9cd7baeb, 0x4cf38a1fec2db60d },
+        { -0x5f55559af205a319, -0x063b61d5b74ab874, 0x4f09cc7d7003725b, 0x373cad3a26091abe }
+    },
+    {
+        { -0x0e41570476224453, 0x3bcb2cbc61aeaecb, -0x70a75844e0647263, 0x21547eda5112a686 },
+        { -0x4d6b9cb27d360a84, 0x1fcbfde124934536, -0x6163b24cbe7324a6, 0x0040f3d9454419fc },
+        { -0x210216c602a6792d, -0x0bd8d376aef5c7f4, -0x48d45bf844cee647, 0x63550a334a254df4 }
+    },
+    {
+        { -0x6445a7ba8dab84b7, -0x0cfa39051d3bf720, 0x60e8fa69c734f18d, 0x39a92bafaa7d767a },
+        { 0x6507d6edb569cf37, 0x178429b00ca52ee1, -0x1583ff6f149429a3, 0x3eea62c7daf78f51 },
+        { -0x62db38ec196cd8b2, 0x5f63857768dbd375, 0x70525560eb8ab39a, 0x68436a0665c9c4cd }
+    },
+    {
+        { 0x1e56d317e820107c, -0x3ad997bb7bf5169b, -0x3e1f5e39cdf00386, 0x5373669c91611472 },
+        { -0x43fdca17dfd0c0d9, -0x38a3ff1d9b068a50, -0x6e5b162a5c73dbea, 0x17b6e7f68ab789f9 },
+        { 0x5d2814ab9a0e5257, -0x6f70df7b36354c04, -0x50350a77a4d2e136, 0x1cb4b5a678f87d11 }
+    },
+    {
+        { 0x6b74aa62a2a007e7, -0x0cee1f4f0f8e384f, 0x5707e438000be223, 0x2dc0fd2d82ef6eac },
+        { -0x499b3f94c6b50394, 0x0c88de2498da5fb1, 0x4f8d03164bcad834, 0x330bca78de7434a2 },
+        { -0x67d1007beee68bb2, -0x0696a169d4f8b8dc, -0x3a753eb04036ac05, 0x3c31be1b369f1cf5 }
+    },
+    {
+        { -0x3e97436c0634bd8e, -0x51478ee038312468, 0x7f0e52aa34ac8d7a, 0x41cec1097e7d55bb },
+        { -0x4f0b79b2f76b7512, 0x07dc19ee91ba1c6f, 0x7975cdaea6aca158, 0x330b61134262d4bb },
+        { -0x0869e6285d927f76, -0x44e02b61e261ea93, 0x73d7c36cdba1df27, 0x26b44cd91f28777d }
+    },
+},
+{
+    {
+        { -0x50bb7bd24fd7a0c9, -0x78ace76fb8103721, -0x6a8b1f6e07df6866, 0x0e378d6069615579 },
         { 0x300a9035393aa6d8, 0x2b501131a12bb1cd, 0x7b1ff677f093c222, 0x4309c1f8cab82bad },
-        { 0xd9fa917183075a55, 0x4bdb5ad26b009fdc, 0x7829ad2cd63def0e, 0x078fc54975fd3877 }
-    },
-    {
-        { 0xe2004b5bb833a98a, 0x44775dec2d4c3330, 0x3aa244067eace913, 0x272630e3d58e00a9 },
-        { 0x87dfbd1428878f2d, 0x134636dd1e9421a1, 0x4f17c951257341a3, 0x5df98d4bad296cb8 },
-        { 0xf3678fd0ecc90b54, 0xf001459b12043599, 0x26725fbc3758b89b, 0x4325e4aa73a719ae }
-    },
-    {
-        { 0xed24629acf69f59d, 0x2a4a1ccedd5abbf4, 0x3535ca1f56b2d67b, 0x5d8c68d043b1b42d },
+        { -0x26056e8e7cf8a5ab, 0x4bdb5ad26b009fdc, 0x7829ad2cd63def0e, 0x078fc54975fd3877 }
+    },
+    {
+        { -0x1dffb4a447cc5676, 0x44775dec2d4c3330, 0x3aa244067eace913, 0x272630e3d58e00a9 },
+        { -0x782042ebd77870d3, 0x134636dd1e9421a1, 0x4f17c951257341a3, 0x5df98d4bad296cb8 },
+        { -0x0c98702f1336f4ac, -0x0ffeba64edfbca67, 0x26725fbc3758b89b, 0x4325e4aa73a719ae }
+    },
+    {
+        { -0x12db9d6530960a63, 0x2a4a1ccedd5abbf4, 0x3535ca1f56b2d67b, 0x5d8c68d043b1b42d },
         { 0x657dc6ef433c3493, 0x65375e9f80dbf8c3, 0x47fd2d465b372dae, 0x4966ab79796e7947 },
-        { 0xee332d4de3b42b0a, 0xd84e5a2b16a4601c, 0x78243877078ba3e4, 0x77ed1eb4184ee437 }
-    },
-    {
-        { 0x185d43f89e92ed1a, 0xb04a1eeafe4719c6, 0x499fbe88a6f03f4f, 0x5d8b0d2f3c859bdd },
-        { 0xbfd4e13f201839a0, 0xaeefffe23e3df161, 0xb65b04f06b5d1fe3, 0x52e085fb2b62fbc0 },
-        { 0x124079eaa54cf2ba, 0xd72465eb001b26e7, 0x6843bcfdc97af7fd, 0x0524b42b55eacd02 }
-    },
-    {
-        { 0xbc18dcad9b829eac, 0x23ae7d28b5f579d0, 0xc346122a69384233, 0x1a6110b2e7d4ac89 },
-        { 0xfd0d5dbee45447b0, 0x6cec351a092005ee, 0x99a47844567579cb, 0x59d242a216e7fa45 },
+        { -0x11ccd2b21c4bd4f6, -0x27b1a5d4e95b9fe4, 0x78243877078ba3e4, 0x77ed1eb4184ee437 }
+    },
+    {
+        { 0x185d43f89e92ed1a, -0x4fb5e11501b8e63a, 0x499fbe88a6f03f4f, 0x5d8b0d2f3c859bdd },
+        { -0x402b1ec0dfe7c660, -0x5110001dc1c20e9f, -0x49a4fb0f94a2e01d, 0x52e085fb2b62fbc0 },
+        { 0x124079eaa54cf2ba, -0x28db9a14ffe4d919, 0x6843bcfdc97af7fd, 0x0524b42b55eacd02 }
+    },
+    {
+        { -0x43e72352647d6154, 0x23ae7d28b5f579d0, -0x3cb9edd596c7bdcd, 0x1a6110b2e7d4ac89 },
+        { -0x02f2a2411babb850, 0x6cec351a092005ee, -0x665b87bba98a8635, 0x59d242a216e7fa45 },
         { 0x4f833f6ae66997ac, 0x6849762a361839a4, 0x6985dec1970ab525, 0x53045e89dcb1f546 }
     },
     {
-        { 0x84da3cde8d45fe12, 0xbd42c218e444e2d2, 0xa85196781f7e3598, 0x7642c93f5616e2b2 },
-        { 0xcb8bb346d75353db, 0xfcfcb24bae511e22, 0xcba48d40d50ae6ef, 0x26e3bae5f4f7cb5d },
-        { 0x2323daa74595f8e4, 0xde688c8b857abeb4, 0x3fc48e961c59326e, 0x0b2e73ca15c9b8ba }
-    },
-    {
-        { 0x0e3fbfaf79c03a55, 0x3077af054cbb5acf, 0xd5c55245db3de39f, 0x015e68c1476a4af7 },
-        { 0xd6bb4428c17f5026, 0x9eb27223fb5a9ca7, 0xe37ba5031919c644, 0x21ce380db59a6602 },
-        { 0xc1d5285220066a38, 0x95603e523570aef3, 0x832659a7226b8a4d, 0x5dd689091f8eedc9 }
-    },
-    {
-        { 0x1d022591a5313084, 0xca2d4aaed6270872, 0x86a12b852f0bfd20, 0x56e6c439ad7da748 },
-        { 0xcbac84debfd3c856, 0x1624c348b35ff244, 0xb7f88dca5d9cad07, 0x3b0e574da2c2ebe8 },
-        { 0xc704ff4942bdbae6, 0x5e21ade2b2de1f79, 0xe95db3f35652fad8, 0x0822b5378f08ebc1 }
-    },
-},
-{
-    {
-        { 0xe1b7f29362730383, 0x4b5279ffebca8a2c, 0xdafc778abfd41314, 0x7deb10149c72610f },
-        { 0x51f048478f387475, 0xb25dbcf49cbecb3c, 0x9aab1244d99f2055, 0x2c709e6c1c10a5d6 },
-        { 0xcb62af6a8766ee7a, 0x66cbec045553cd0e, 0x588001380f0be4b5, 0x08e68e9ff62ce2ea }
-    },
-    {
-        { 0x2f2d09d50ab8f2f9, 0xacb9218dc55923df, 0x4a8f342673766cb9, 0x4cb13bd738f719f5 },
-        { 0x34ad500a4bc130ad, 0x8d38db493d0bd49c, 0xa25c3d98500a89be, 0x2f1f3f87eeba3b09 },
-        { 0xf7848c75e515b64a, 0xa59501badb4a9038, 0xc20d313f3f751b50, 0x19a1e353c0ae2ee8 }
-    },
-    {
-        { 0xb42172cdd596bdbd, 0x93e0454398eefc40, 0x9fb15347b44109b5, 0x736bd3990266ae34 },
-        { 0x7d1c7560bafa05c3, 0xb3e1a0a0c6e55e61, 0xe3529718c0d66473, 0x41546b11c20c3486 },
-        { 0x85532d509334b3b4, 0x46fd114b60816573, 0xcc5f5f30425c8375, 0x412295a2b87fab5c }
-    },
-    {
-        { 0x2e655261e293eac6, 0x845a92032133acdb, 0x460975cb7900996b, 0x0760bb8d195add80 },
+        { -0x7b25c32172ba01ee, -0x42bd3de71bbb1d2e, -0x57ae6987e081ca68, 0x7642c93f5616e2b2 },
+        { -0x34744cb928acac25, -0x03034db451aee1de, -0x345b72bf2af51911, 0x26e3bae5f4f7cb5d },
+        { 0x2323daa74595f8e4, -0x219773747a85414c, 0x3fc48e961c59326e, 0x0b2e73ca15c9b8ba }
+    },
+    {
+        { 0x0e3fbfaf79c03a55, 0x3077af054cbb5acf, -0x2a3aadba24c21c61, 0x015e68c1476a4af7 },
+        { -0x2944bbd73e80afda, -0x614d8ddc04a56359, -0x1c845afce6e639bc, 0x21ce380db59a6602 },
+        { -0x3e2ad7addff995c8, -0x6a9fc1adca8f510d, -0x7cd9a658dd9475b3, 0x5dd689091f8eedc9 }
+    },
+    {
+        { 0x1d022591a5313084, -0x35d2b55129d8f78e, -0x795ed47ad0f402e0, 0x56e6c439ad7da748 },
+        { -0x34537b21402c37aa, 0x1624c348b35ff244, -0x48077235a26352f9, 0x3b0e574da2c2ebe8 },
+        { -0x38fb00b6bd42451a, 0x5e21ade2b2de1f79, -0x16a24c0ca9ad0528, 0x0822b5378f08ebc1 }
+    },
+},
+{
+    {
+        { -0x1e480d6c9d8cfc7d, 0x4b5279ffebca8a2c, -0x25038875402becec, 0x7deb10149c72610f },
+        { 0x51f048478f387475, -0x4da2430b634134c4, -0x6554edbb2660dfab, 0x2c709e6c1c10a5d6 },
+        { -0x349d509578991186, 0x66cbec045553cd0e, 0x588001380f0be4b5, 0x08e68e9ff62ce2ea }
+    },
+    {
+        { 0x2f2d09d50ab8f2f9, -0x5346de723aa6dc21, 0x4a8f342673766cb9, 0x4cb13bd738f719f5 },
+        { 0x34ad500a4bc130ad, -0x72c724b6c2f42b64, -0x5da3c267aff57642, 0x2f1f3f87eeba3b09 },
+        { -0x087b738a1aea49b6, -0x5a6afe4524b56fc8, -0x3df2cec0c08ae4b0, 0x19a1e353c0ae2ee8 }
+    },
+    {
+        { -0x4bde8d322a694243, -0x6c1fbabc671103c0, -0x604eacb84bbef64b, 0x736bd3990266ae34 },
+        { 0x7d1c7560bafa05c3, -0x4c1e5f5f391aa19f, -0x1cad68e73f299b8d, 0x41546b11c20c3486 },
+        { -0x7aacd2af6ccb4c4c, 0x46fd114b60816573, -0x33a0a0cfbda37c8b, 0x412295a2b87fab5c }
+    },
+    {
+        { 0x2e655261e293eac6, -0x7ba56dfcdecc5325, 0x460975cb7900996b, 0x0760bb8d195add80 },
         { 0x19c99b88f57ed6e9, 0x5393cb266df8c825, 0x5cee3213b30ad273, 0x14e153ebb52d2e34 },
         { 0x413e1a17cde6818a, 0x57156da9ed69a084, 0x2cbf268f46caccb1, 0x6b34be9bc33ac5f2 }
     },
     {
-        { 0x11fc69656571f2d3, 0xc6c9e845530e737a, 0xe33ae7a2d4fe5035, 0x01b9c7b62e6dd30b },
-        { 0xf3df2f643a78c0b2, 0x4c3e971ef22e027c, 0xec7d1c5e49c1b5a3, 0x2012c18f0922dd2d },
-        { 0x880b55e55ac89d29, 0x1483241f45a0a763, 0x3d36efdfc2e76c1f, 0x08af5b784e4bade8 }
-    },
-    {
-        { 0xe27314d289cc2c4b, 0x4be4bd11a287178d, 0x18d528d6fa3364ce, 0x6423c1d5afd9826e },
-        { 0x283499dc881f2533, 0x9d0525da779323b6, 0x897addfb673441f4, 0x32b79d71163a168d },
-        { 0xcc85f8d9edfcb36a, 0x22bcc28f3746e5f9, 0xe49de338f9e5d3cd, 0x480a5efbc13e2dcc }
-    },
-    {
-        { 0xb6614ce442ce221f, 0x6e199dcc4c053928, 0x663fb4a4dc1cbe03, 0x24b31d47691c8e06 },
+        { 0x11fc69656571f2d3, -0x393617baacf18c86, -0x1cc5185d2b01afcb, 0x01b9c7b62e6dd30b },
+        { -0x0c20d09bc5873f4e, 0x4c3e971ef22e027c, -0x1382e3a1b63e4a5d, 0x2012c18f0922dd2d },
+        { -0x77f4aa1aa53762d7, 0x1483241f45a0a763, 0x3d36efdfc2e76c1f, 0x08af5b784e4bade8 }
+    },
+    {
+        { -0x1d8ceb2d7633d3b5, 0x4be4bd11a287178d, 0x18d528d6fa3364ce, 0x6423c1d5afd9826e },
+        { 0x283499dc881f2533, -0x62fada25886cdc4a, -0x7685220498cbbe0c, 0x32b79d71163a168d },
+        { -0x337a072612034c96, 0x22bcc28f3746e5f9, -0x1b621cc7061a2c33, 0x480a5efbc13e2dcc }
+    },
+    {
+        { -0x499eb31bbd31dde1, 0x6e199dcc4c053928, 0x663fb4a4dc1cbe03, 0x24b31d47691c8e06 },
         { 0x0b51e70b01622071, 0x06b505cf8b1dafc5, 0x2c6bb061ef5aabcd, 0x47aa27600cb7bf31 },
-        { 0x2a541eedc015f8c3, 0x11a4fe7e7c693f7c, 0xf0af66134ea278d6, 0x545b585d14dda094 }
+        { 0x2a541eedc015f8c3, 0x11a4fe7e7c693f7c, -0x0f5099ecb15d872a, 0x545b585d14dda094 }
     },
     {
         { 0x6204e4d0e3b321e1, 0x3baa637a28ff1e95, 0x0b0ccffd5b99bd9e, 0x4d22dc3e64c8d071 },
-        { 0x67bf275ea0d43a0f, 0xade68e34089beebe, 0x4289134cd479e72e, 0x0f62f9c332ba5454 },
-        { 0xfcb46589d63b5f39, 0x5cae6a3f57cbcf61, 0xfebac2d2953afa05, 0x1c0fa01a36371436 }
-    },
-},
-{
-    {
-        { 0xc11ee5e854c53fae, 0x6a0b06c12b4f3ff4, 0x33540f80e0b67a72, 0x15f18fc3cd07e3ef },
-        { 0xe7547449bc7cd692, 0x0f9abeaae6f73ddf, 0x4af01ca700837e29, 0x63ab1b5d3f1bc183 },
-        { 0x32750763b028f48c, 0x06020740556a065f, 0xd53bd812c3495b58, 0x08706c9b865f508d }
-    },
-    {
-        { 0xcc991b4138b41246, 0x243b9c526f9ac26b, 0xb9ef494db7cbabbd, 0x5fba433dd082ed00 },
-        { 0xf37ca2ab3d343dff, 0x1a8c6a2d80abc617, 0x8e49e035d4ccffca, 0x48b46beebaa1d1b9 },
-        { 0x9c49e355c9941ad0, 0xb9734ade74498f84, 0x41c3fed066663e5c, 0x0ecfedf8e8e710b3 }
-    },
-    {
-        { 0x744f7463e9403762, 0xf79a8dee8dfcc9c9, 0x163a649655e4cde3, 0x3b61788db284f435 },
-        { 0x76430f9f9cd470d9, 0xb62acc9ba42f6008, 0x1898297c59adad5e, 0x7789dd2db78c5080 },
-        { 0xb22228190d6ef6b2, 0xa94a66b246ce4bfa, 0x46c1a77a4f0b6cc7, 0x4236ccffeb7338cf }
+        { 0x67bf275ea0d43a0f, -0x521971cbf7641142, 0x4289134cd479e72e, 0x0f62f9c332ba5454 },
+        { -0x034b9a7629c4a0c7, 0x5cae6a3f57cbcf61, -0x01453d2d6ac505fb, 0x1c0fa01a36371436 }
+    },
+},
+{
+    {
+        { -0x3ee11a17ab3ac052, 0x6a0b06c12b4f3ff4, 0x33540f80e0b67a72, 0x15f18fc3cd07e3ef },
+        { -0x18ab8bb64383296e, 0x0f9abeaae6f73ddf, 0x4af01ca700837e29, 0x63ab1b5d3f1bc183 },
+        { 0x32750763b028f48c, 0x06020740556a065f, -0x2ac427ed3cb6a4a8, 0x08706c9b865f508d }
+    },
+    {
+        { -0x3366e4bec74bedba, 0x243b9c526f9ac26b, -0x4610b6b248345443, 0x5fba433dd082ed00 },
+        { -0x0c835d54c2cbc201, 0x1a8c6a2d80abc617, -0x71b61fca2b330036, 0x48b46beebaa1d1b9 },
+        { -0x63b61caa366be530, -0x468cb5218bb6707c, 0x41c3fed066663e5c, 0x0ecfedf8e8e710b3 }
+    },
+    {
+        { 0x744f7463e9403762, -0x0865721172033637, 0x163a649655e4cde3, 0x3b61788db284f435 },
+        { 0x76430f9f9cd470d9, -0x49d533645bd09ff8, 0x1898297c59adad5e, 0x7789dd2db78c5080 },
+        { -0x4dddd7e6f291094e, -0x56b5994db931b406, 0x46c1a77a4f0b6cc7, 0x4236ccffeb7338cf }
     },
     {
         { 0x3bd82dbfda777df6, 0x71b177cc0b98369e, 0x1d0e8463850c3699, 0x5a71945b48e2d1f1 },
-        { 0x8497404d0d55e274, 0x6c6663d9c4ad2b53, 0xec2fb0d9ada95734, 0x2617e120cdb8f73c },
-        { 0x6f203dd5405b4b42, 0x327ec60410b24509, 0x9c347230ac2a8846, 0x77de29fc11ffeb6a }
-    },
-    {
-        { 0x835e138fecced2ca, 0x8c9eaf13ea963b9a, 0xc95fbfc0b2160ea6, 0x575e66f3ad877892 },
-        { 0xb0ac57c983b778a8, 0x53cdcca9d7fe912c, 0x61c2b854ff1f59dc, 0x3a1a2cf0f0de7dac },
-        { 0x99803a27c88fcb3a, 0x345a6789275ec0b0, 0x459789d0ff6c2be5, 0x62f882651e70a8b2 }
-    },
-    {
-        { 0x6d822986698a19e0, 0xdc9821e174d78a71, 0x41a85f31f6cb1f47, 0x352721c2bcda9c51 },
-        { 0x085ae2c759ff1be4, 0x149145c93b0e40b7, 0xc467e7fa7ff27379, 0x4eeecf0ad5c73a95 },
-        { 0x48329952213fc985, 0x1087cf0d368a1746, 0x8e5261b166c15aa5, 0x2d5b2d842ed24c21 }
-    },
-    {
-        { 0x5eb7d13d196ac533, 0x377234ecdb80be2b, 0xe144cffc7cf5ae24, 0x5226bcf9c441acec },
-        { 0x02cfebd9ebd3ded1, 0xd45b217739021974, 0x7576f813fe30a1b7, 0x5691b6f9a34ef6c2 },
-        { 0x79ee6c7223e5b547, 0x6f5f50768330d679, 0xed73e1e96d8adce9, 0x27c3da1e1d8ccc03 }
-    },
-    {
-        { 0x28302e71630ef9f6, 0xc2d4a2032b64cee0, 0x090820304b6292be, 0x5fca747aa82adf18 },
+        { -0x7b68bfb2f2aa1d8c, 0x6c6663d9c4ad2b53, -0x13d04f265256a8cc, 0x2617e120cdb8f73c },
+        { 0x6f203dd5405b4b42, 0x327ec60410b24509, -0x63cb8dcf53d577ba, 0x77de29fc11ffeb6a }
+    },
+    {
+        { -0x7ca1ec7013312d36, -0x736150ec1569c466, -0x36a0403f4de9f15a, 0x575e66f3ad877892 },
+        { -0x4f53a8367c488758, 0x53cdcca9d7fe912c, 0x61c2b854ff1f59dc, 0x3a1a2cf0f0de7dac },
+        { -0x667fc5d8377034c6, 0x345a6789275ec0b0, 0x459789d0ff6c2be5, 0x62f882651e70a8b2 }
+    },
+    {
+        { 0x6d822986698a19e0, -0x2367de1e8b28758f, 0x41a85f31f6cb1f47, 0x352721c2bcda9c51 },
+        { 0x085ae2c759ff1be4, 0x149145c93b0e40b7, -0x3b981805800d8c87, 0x4eeecf0ad5c73a95 },
+        { 0x48329952213fc985, 0x1087cf0d368a1746, -0x71ad9e4e993ea55b, 0x2d5b2d842ed24c21 }
+    },
+    {
+        { 0x5eb7d13d196ac533, 0x377234ecdb80be2b, -0x1ebb3003830a51dc, 0x5226bcf9c441acec },
+        { 0x02cfebd9ebd3ded1, -0x2ba4de88c6fde68c, 0x7576f813fe30a1b7, 0x5691b6f9a34ef6c2 },
+        { 0x79ee6c7223e5b547, 0x6f5f50768330d679, -0x128c1e1692752317, 0x27c3da1e1d8ccc03 }
+    },
+    {
+        { 0x28302e71630ef9f6, -0x3d2b5dfcd49b3120, 0x090820304b6292be, 0x5fca747aa82adf18 },
         { 0x7eb9efb23fe24c74, 0x3e50f49f1651be01, 0x3ea732dc21858dea, 0x17377bd75bb810f9 },
-        { 0x232a03c35c258ea5, 0x86f23a2c6bcb0cf1, 0x3dad8d0d2e442166, 0x04a8933cab76862b }
-    },
-},
-{
-    {
-        { 0x69082b0e8c936a50, 0xf9c9a035c1dac5b6, 0x6fb73e54c4dfb634, 0x4005419b1d2bc140 },
-        { 0xd2c604b622943dff, 0xbc8cbece44cfb3a0, 0x5d254ff397808678, 0x0fa3614f3b1ca6bf },
-        { 0xa003febdb9be82f0, 0x2089c1af3a44ac90, 0xf8499f911954fa8e, 0x1fba218aef40ab42 }
-    },
-    {
-        { 0x4f3e57043e7b0194, 0xa81d3eee08daaf7f, 0xc839c6ab99dcdef1, 0x6c535d13ff7761d5 },
-        { 0xab549448fac8f53e, 0x81f6e89a7ba63741, 0x74fd6c7d6c2b5e01, 0x392e3acaa8c86e42 },
+        { 0x232a03c35c258ea5, -0x790dc5d39434f30f, 0x3dad8d0d2e442166, 0x04a8933cab76862b }
+    },
+},
+{
+    {
+        { 0x69082b0e8c936a50, -0x06365fca3e253a4a, 0x6fb73e54c4dfb634, 0x4005419b1d2bc140 },
+        { -0x2d39fb49dd6bc201, -0x43734131bb304c60, 0x5d254ff397808678, 0x0fa3614f3b1ca6bf },
+        { -0x5ffc014246417d10, 0x2089c1af3a44ac90, -0x07b6606ee6ab0572, 0x1fba218aef40ab42 }
+    },
+    {
+        { 0x4f3e57043e7b0194, -0x57e2c111f7255081, -0x37c639546623210f, 0x6c535d13ff7761d5 },
+        { -0x54ab6bb705370ac2, -0x7e0917658459c8bf, 0x74fd6c7d6c2b5e01, 0x392e3acaa8c86e42 },
         { 0x4cbd34e93e8a35af, 0x2e0781445887e816, 0x19319c76f29ab0ab, 0x25e17fe4d50ac13b }
     },
     {
-        { 0x915f7ff576f121a7, 0xc34a32272fcd87e3, 0xccba2fde4d1be526, 0x6bba828f8969899b },
+        { -0x6ea0800a890ede59, -0x3cb5cdd8d032781d, -0x3345d021b2e41ada, 0x6bba828f8969899b },
         { 0x0a289bd71e04f676, 0x208e1c52d6420f95, 0x5186d8b034691fab, 0x255751442a9fb351 },
-        { 0xe2d1bc6690fe3901, 0x4cb54a18a0997ad5, 0x971d6914af8460d4, 0x559d504f7f6b7be4 }
-    },
-    {
-        { 0x9c4891e7f6d266fd, 0x0744a19b0307781b, 0x88388f1d6061e23b, 0x123ea6a3354bd50e },
-        { 0xa7738378b3eb54d5, 0x1d69d366a5553c7c, 0x0a26cf62f92800ba, 0x01ab12d5807e3217 },
-        { 0x118d189041e32d96, 0xb9ede3c2d8315848, 0x1eab4271d83245d9, 0x4a3961e2c918a154 }
-    },
-    {
-        { 0x0327d644f3233f1e, 0x499a260e34fcf016, 0x83b5a716f2dab979, 0x68aceead9bd4111f },
-        { 0x71dc3be0f8e6bba0, 0xd6cef8347effe30a, 0xa992425fe13a476a, 0x2cd6bce3fb1db763 },
+        { -0x1d2e43996f01c6ff, 0x4cb54a18a0997ad5, -0x68e296eb507b9f2c, 0x559d504f7f6b7be4 }
+    },
+    {
+        { -0x63b76e18092d9903, 0x0744a19b0307781b, -0x77c770e29f9e1dc5, 0x123ea6a3354bd50e },
+        { -0x588c7c874c14ab2b, 0x1d69d366a5553c7c, 0x0a26cf62f92800ba, 0x01ab12d5807e3217 },
+        { 0x118d189041e32d96, -0x46121c3d27cea7b8, 0x1eab4271d83245d9, 0x4a3961e2c918a154 }
+    },
+    {
+        { 0x0327d644f3233f1e, 0x499a260e34fcf016, -0x7c4a58e90d254687, 0x68aceead9bd4111f },
+        { 0x71dc3be0f8e6bba0, -0x293107cb81001cf6, -0x566dbda01ec5b896, 0x2cd6bce3fb1db763 },
         { 0x38b4c90ef3d7c210, 0x308e6e24b7ad040c, 0x3860d9f1b7e73e23, 0x595760d5b508f597 }
     },
     {
-        { 0x882acbebfd022790, 0x89af3305c4115760, 0x65f492e37d3473f4, 0x2cb2c5df54515a2b },
-        { 0x6129bfe104aa6397, 0x8f960008a4a7fccb, 0x3f8bc0897d909458, 0x709fa43edcb291a9 },
-        { 0xeb0a5d8c63fd2aca, 0xd22bc1662e694eff, 0x2723f36ef8cbb03a, 0x70f029ecf0c8131f }
+        { -0x77d5341402fdd870, -0x7650ccfa3beea8a0, 0x65f492e37d3473f4, 0x2cb2c5df54515a2b },
+        { 0x6129bfe104aa6397, -0x7069fff75b580335, 0x3f8bc0897d909458, 0x709fa43edcb291a9 },
+        { -0x14f5a2739c02d536, -0x2dd43e99d196b101, 0x2723f36ef8cbb03a, 0x70f029ecf0c8131f }
     },
     {
         { 0x2a6aafaa5e10b0b9, 0x78f0a370ef041aa9, 0x773efb77aa3ad61f, 0x44eca5a2a74bd9e1 },
-        { 0x461307b32eed3e33, 0xae042f33a45581e7, 0xc94449d3195f0366, 0x0b7d5d8a6c314858 },
-        { 0x25d448327b95d543, 0x70d38300a3340f1d, 0xde1c531c60e1c52b, 0x272224512c7de9e4 }
-    },
-    {
-        { 0xbf7bbb8a42a975fc, 0x8c5c397796ada358, 0xe27fc76fcdedaa48, 0x19735fd7f6bc20a6 },
-        { 0x1abc92af49c5342e, 0xffeed811b2e6fad0, 0xefa28c8dfcc84e29, 0x11b5df18a44cc543 },
-        { 0xe3ab90d042c84266, 0xeb848e0f7f19547e, 0x2503a1d065a497b9, 0x0fef911191df895f }
-    },
-},
-{
-    {
-        { 0x6ab5dcb85b1c16b7, 0x94c0fce83c7b27a5, 0xa4b11c1a735517be, 0x499238d0ba0eafaa },
-        { 0xb1507ca1ab1c6eb9, 0xbd448f3e16b687b3, 0x3455fb7f2c7a91ab, 0x7579229e2f2adec1 },
-        { 0xecf46e527aba8b57, 0x15a08c478bd1647b, 0x7af1c6a65f706fef, 0x6345fa78f03a30d5 }
-    },
-    {
-        { 0x93d3cbe9bdd8f0a4, 0xdb152c1bfd177302, 0x7dbddc6d7f17a875, 0x3e1a71cc8f426efe },
-        { 0xdf02f95f1015e7a1, 0x790ec41da9b40263, 0x4d3a0ea133ea1107, 0x54f70be7e33af8c9 },
-        { 0xc83ca3e390babd62, 0x80ede3670291c833, 0xc88038ccd37900c4, 0x2c5fc0231ec31fa1 }
-    },
-    {
-        { 0xc422e4d102456e65, 0x87414ac1cad47b91, 0x1592e2bba2b6ffdd, 0x75d9d2bff5c2100f },
-        { 0xfeba911717038b4f, 0xe5123721c9deef81, 0x1c97e4e75d0d8834, 0x68afae7a23dc3bc6 },
-        { 0x5bd9b4763626e81c, 0x89966936bca02edd, 0x0a41193d61f077b3, 0x3097a24200ce5471 }
-    },
-    {
-        { 0xa162e7246695c486, 0x131d633435a89607, 0x30521561a0d12a37, 0x56704bada6afb363 },
-        { 0x57427734c7f8b84c, 0xf141a13e01b270e9, 0x02d1adfeb4e564a6, 0x4bb23d92ce83bd48 },
-        { 0xaf6c4aa752f912b9, 0x5e665f6cd86770c8, 0x4c35ac83a3c8cd58, 0x2b7a29c010a58a7e }
-    },
-    {
-        { 0x33810a23bf00086e, 0xafce925ee736ff7c, 0x3d60e670e24922d4, 0x11ce9e714f96061b },
-        { 0xc4007f77d0c1cec3, 0x8d1020b6bac492f8, 0x32ec29d57e69daaf, 0x599408759d95fce0 },
-        { 0x219ef713d815bac1, 0xf141465d485be25c, 0x6d5447cc4e513c51, 0x174926be5ef44393 }
-    },
-    {
-        { 0x3ef5d41593ea022e, 0x5cbcc1a20ed0eed6, 0x8fd24ecf07382c8c, 0x6fa42ead06d8e1ad },
-        { 0xb5deb2f9fc5bd5bb, 0x92daa72ae1d810e1, 0xafc4cfdcb72a1c59, 0x497d78813fc22a24 },
-        { 0xe276824a1f73371f, 0x7f7cf01c4f5b6736, 0x7e201fe304fa46e7, 0x785a36a357808c96 }
+        { 0x461307b32eed3e33, -0x51fbd0cc5baa7e19, -0x36bbb62ce6a0fc9a, 0x0b7d5d8a6c314858 },
+        { 0x25d448327b95d543, 0x70d38300a3340f1d, -0x21e3ace39f1e3ad5, 0x272224512c7de9e4 }
+    },
+    {
+        { -0x40844475bd568a04, -0x73a3c68869525ca8, -0x1d803890321255b8, 0x19735fd7f6bc20a6 },
+        { 0x1abc92af49c5342e, -0x001127ee4d190530, -0x105d73720337b1d7, 0x11b5df18a44cc543 },
+        { -0x1c546f2fbd37bd9a, -0x147b71f080e6ab82, 0x2503a1d065a497b9, 0x0fef911191df895f }
+    },
+},
+{
+    {
+        { 0x6ab5dcb85b1c16b7, -0x6b3f0317c384d85b, -0x5b4ee3e58caae842, 0x499238d0ba0eafaa },
+        { -0x4eaf835e54e39147, -0x42bb70c1e949784d, 0x3455fb7f2c7a91ab, 0x7579229e2f2adec1 },
+        { -0x130b91ad854574a9, 0x15a08c478bd1647b, 0x7af1c6a65f706fef, 0x6345fa78f03a30d5 }
+    },
+    {
+        { -0x6c2c341642270f5c, -0x24ead3e402e88cfe, 0x7dbddc6d7f17a875, 0x3e1a71cc8f426efe },
+        { -0x20fd06a0efea185f, 0x790ec41da9b40263, 0x4d3a0ea133ea1107, 0x54f70be7e33af8c9 },
+        { -0x37c35c1c6f45429e, -0x7f121c98fd6e37cd, -0x377fc7332c86ff3c, 0x2c5fc0231ec31fa1 }
+    },
+    {
+        { -0x3bdd1b2efdba919b, -0x78beb53e352b846f, 0x1592e2bba2b6ffdd, 0x75d9d2bff5c2100f },
+        { -0x01456ee8e8fc74b1, -0x1aedc8de3621107f, 0x1c97e4e75d0d8834, 0x68afae7a23dc3bc6 },
+        { 0x5bd9b4763626e81c, -0x766996c9435fd123, 0x0a41193d61f077b3, 0x3097a24200ce5471 }
+    },
+    {
+        { -0x5e9d18db996a3b7a, 0x131d633435a89607, 0x30521561a0d12a37, 0x56704bada6afb363 },
+        { 0x57427734c7f8b84c, -0x0ebe5ec1fe4d8f17, 0x02d1adfeb4e564a6, 0x4bb23d92ce83bd48 },
+        { -0x5093b558ad06ed47, 0x5e665f6cd86770c8, 0x4c35ac83a3c8cd58, 0x2b7a29c010a58a7e }
+    },
+    {
+        { 0x33810a23bf00086e, -0x50316da118c90084, 0x3d60e670e24922d4, 0x11ce9e714f96061b },
+        { -0x3bff80882f3e313d, -0x72efdf49453b6d08, 0x32ec29d57e69daaf, 0x599408759d95fce0 },
+        { 0x219ef713d815bac1, -0x0ebeb9a2b7a41da4, 0x6d5447cc4e513c51, 0x174926be5ef44393 }
+    },
+    {
+        { 0x3ef5d41593ea022e, 0x5cbcc1a20ed0eed6, -0x702db130f8c7d374, 0x6fa42ead06d8e1ad },
+        { -0x4a214d0603a42a45, -0x6d2558d51e27ef1f, -0x503b302348d5e3a7, 0x497d78813fc22a24 },
+        { -0x1d897db5e08cc8e1, 0x7f7cf01c4f5b6736, 0x7e201fe304fa46e7, 0x785a36a357808c96 }
     },
     {
         { 0x070442985d517bc3, 0x6acd56c7ae653678, 0x00a27983985a7763, 0x5167effae512662b },
-        { 0x825fbdfd63014d2b, 0xc852369c6ca7578b, 0x5b2fcd285c0b5df0, 0x12ab214c58048c8f },
-        { 0xbd4ea9e10f53c4b6, 0x1673dc5f8ac91a14, 0xa8f81a4e2acc1aba, 0x33a92a7924332a25 }
-    },
-    {
-        { 0x7ba95ba0218f2ada, 0xcff42287330fb9ca, 0xdada496d56c6d907, 0x5380c296f4beee54 },
-        { 0x9dd1f49927996c02, 0x0cb3b058e04d1752, 0x1f7e88967fd02c3e, 0x2f964268cb8b3eb1 },
-        { 0x9d4f270466898d0a, 0x3d0987990aff3f7a, 0xd09ef36267daba45, 0x7761455e7b1c669c }
+        { -0x7da042029cfeb2d5, -0x37adc9639358a875, 0x5b2fcd285c0b5df0, 0x12ab214c58048c8f },
+        { -0x42b1561ef0ac3b4a, 0x1673dc5f8ac91a14, -0x5707e5b1d533e546, 0x33a92a7924332a25 }
+    },
+    {
+        { 0x7ba95ba0218f2ada, -0x300bdd78ccf04636, -0x2525b692a93926f9, 0x5380c296f4beee54 },
+        { -0x622e0b66d86693fe, 0x0cb3b058e04d1752, 0x1f7e88967fd02c3e, 0x2f964268cb8b3eb1 },
+        { -0x62b0d8fb997672f6, 0x3d0987990aff3f7a, -0x2f610c9d982545bb, 0x7761455e7b1c669c }
+    },
+},
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge_precomp base[64][8] = {
+{
+    {
+        { -0x0a73c47b, 0x2fbc93c6, -0x0473f1e7, -0x306cd23a, 0x643d42c2, 0x270b4898, 0x33d4ba65, 0x07cf9d3a },
+        { -0x28bf6ec2, -0x62efc6fb, -0x2ebf414d, -0x02c660fb, 0x688f8a09, -0x5a3e7bcc, -0x6707ed99, 0x44fd2f92 },
+        { 0x4b6fbb59, -0x2442ea99, -0x115d5a16, 0x41e13f00, -0x36a83906, -0x322b62e4, -0x50e91336, 0x4f0ebe1f }
+    },
+    {
+        { -0x6cc38e29, -0x6ddb1804, 0x7a0ff5b5, -0x60b9626a, -0x1e29f8fe, 0x5aa69a65, -0x5782d1d2, 0x590c063f },
+        { 0x42b4d5a8, -0x75665aa0, 0x4e60acf6, -0x70d47ef4, -0x4e91c856, -0x1f61dc95, 0x69c92555, 0x6bb595a6 },
+        { -0x252c97fe, 0x6e347eaa, -0x7c11b7fb, -0x450ca66d, -0x19f897da, 0x3bcabe10, 0x165ed1b8, 0x49314f0a }
+    },
+    {
+        { 0x4cee9730, -0x50da4f58, -0x1779b476, 0x025a8430, -0x60fe98ce, -0x3ee4affe, -0x657f070c, 0x7a164e1b },
+        { -0x5b032d9b, 0x56611fe8, -0x1a3e4583, 0x3bd353fd, 0x214bd6bd, -0x7ece0ce6, 0x555bda62, 0x2ab91587 },
+        { -0x0e98b7cc, -0x640dee0c, -0x09d2076b, -0x47b194e9, 0x5b722a4e, -0x282190f9, 0x63bb2a21, 0x549a04b9 }
+    },
+    {
+        { -0x7103f661, 0x287351b9, 0x7dfd2538, 0x6765c6f4, -0x04f56d9b, -0x35cb72c3, 0x21e58727, 0x680e9103 },
+        { 0x056818bf, -0x6a01faf6, 0x5660faa9, 0x327e8971, 0x06a05073, -0x3c171c33, 0x7445a49a, 0x27933f4c },
+        { -0x1aebd950, -0x40e1ba14, 0x6dba0f94, -0x1cd439c3, -0x7307ad40, -0x1bd68b2b, -0x4f19b3e8, 0x44f079b1 }
+    },
+    {
+        { 0x08a5bb33, -0x5ded43bc, -0x38a112fe, -0x72afb73d, 0x5abfec44, -0x22e414f4, 0x46e206eb, 0x2945ccf1 },
+        { -0x5bb82946, 0x7f9182c3, 0x4b2729b7, -0x2affeb2f, -0x479b5f79, -0x1cc30ee4, -0x14e4aa0d, 0x154a7e73 },
+        { -0x182ffc4d, -0x37cd5e87, 0x00124d7e, 0x5f729d0a, 0x0e6d8ff3, 0x62c1d4a1, 0x38b27a98, 0x68b8ac59 }
+    },
+    {
+        { 0x77157131, 0x3a0ceeeb, 0x00c8af88, -0x64d8ea77, -0x25a658ca, -0x7f9a4998, -0x5d33c743, 0x51e57bb6 },
+        { 0x7b7d8ca4, 0x499806b6, 0x27d22739, 0x575be284, 0x204553b9, -0x44f7a319, -0x51be877c, 0x38b64c41 },
+        { 0x689de3a4, -0x7062526f, -0x07046ec9, 0x175f2428, -0x60304678, 0x050ab532, 0x1354c09f, 0x7865dfa2 }
+    },
+    {
+        { -0x6bb15c41, 0x6b1a5cd0, -0x4c623f2e, 0x7470353a, 0x28542e49, 0x71b25282, 0x283c927e, 0x461bea69 },
+        { -0x55cdde4f, -0x4590d366, 0x3bba23a7, 0x6ca02153, -0x6de6d3c6, -0x621589b1, 0x2e5317e0, 0x1d6edd5d },
+        { -0x54f025ca, 0x217a8aac, 0x3d3549c8, -0x5ad739ac, 0x13ab7568, 0x37d05b8b, 0x3a2cbc37, 0x233cef62 }
+    },
+    {
+        { 0x04dd3e8f, 0x59b75966, -0x1d778fd4, 0x6cb30377, 0x5ed9c323, -0x4ecc639a, 0x61bce52f, 0x0915e760 },
+        { -0x0c6dcb27, -0x1d58a213, -0x1e4aa707, -0x69c28980, 0x6e3c23fb, 0x2c2741ac, 0x320e01c3, 0x3a9024a1 },
+        { -0x57cb5c82, -0x208217cb, 0x689857ea, -0x741e6326, 0x7167b326, 0x2c118536, -0x24102a3e, 0x589eb3d9 }
+    },
+},
+{
+    {
+        { 0x2d9021f6, 0x322d04a5, 0x75c6bf9c, -0x463e60cd, 0x42d20b09, 0x587a3a43, -0x559b019f, 0x143b1cf8 },
+        { 0x553e2df3, 0x7ec851ca, -0x59b7874d, -0x58ed7b35, 0x3288d1e7, -0x194a1be7, 0x5a9a8883, 0x4cf210ec },
+        { -0x69753555, -0x60798383, 0x27092729, 0x5f54258e, -0x15e7f68b, -0x2f582cb5, 0x374126e1, 0x21b546a3 }
+    },
+    {
+        { -0x2e7ade71, 0x490a7a45, 0x46049335, -0x65eac888, -0x33ce1e0a, 0x0060ea09, -0x0791169b, 0x7e041577 },
+        { -0x5d777cbd, -0x56b007a8, 0x5313ed3c, -0x31f12baa, -0x4a40cb06, -0x0aa3c231, -0x36154c8f, 0x0a653ca5 },
+        { -0x31a4980d, 0x66b2a496, -0x42a9686a, -0x00ab6d28, 0x4a592cd0, 0x503cec29, 0x0813acb2, 0x56694365 }
+    },
+    {
+        { 0x1dabb69d, 0x5672f9eb, -0x5017ac04, -0x458f4acb, 0x2796d66d, 0x47ac0f75, -0x6bee8d8b, 0x32a53517 },
+        { 0x26620798, -0x47e724f4, 0x606e354a, 0x5d5c31d9, 0x00a8cdc7, 0x0982fa4f, 0x4653e2d4, 0x17e12bcd },
+        { -0x209b7bc9, -0x2c59bb5a, -0x77f04023, 0x703b6559, -0x52c5e55b, -0x347adac0, -0x71b39b98, 0x0900b3f7 }
+    },
+    {
+        { -0x37e952cf, -0x12d7f042, -0x2719101d, 0x52d9595b, -0x0939dc0b, 0x0fe71772, 0x051e293c, 0x4314030b },
+        { 0x679d651b, 0x0a851b9f, 0x033342f2, -0x1ef7349f, -0x1774cf5d, -0x29fe0a81, -0x12d228ec, 0x371f3aca },
+        { -0x040f4353, -0x2a9fffa2, -0x2e78f3a2, -0x7148f0d2, -0x2f7b1960, 0x201f9033, -0x31849990, 0x4c3a5ae1 }
+    },
+    {
+        { -0x36c25f23, -0x45078a1c, 0x71b9294d, -0x46cd7d59, -0x0b393ba0, -0x7f29c049, -0x15993e7f, 0x6de9c73d },
+        { -0x2347056b, 0x4138a434, 0x6c96840b, -0x78f30983, 0x297be82c, -0x21c77a8c, 0x7262a55a, 0x7c814db2 },
+        { -0x5fb2070e, 0x478904d5, -0x4efebd2d, -0x050451b6, 0x555d0998, -0x0937539d, 0x2f90b104, 0x5aac4a41 }
+    },
+    {
+        { -0x4280aecc, 0x603a0d0a, -0x1e2c51ba, -0x7f7636ce, -0x7867429d, -0x20da6ec7, 0x74ba0235, 0x1c145cd2 },
+        { 0x3ac92908, -0x39b0cd95, -0x199c1e20, 0x5551b282, 0x4a1a4b83, 0x476b35f5, 0x189f68c2, 0x1b9da3fe },
+        { 0x75f3d743, 0x32e83864, 0x6ae5d9ef, 0x365b8baf, 0x385b681e, -0x7dadc74a, 0x167d65e1, 0x234929c1 }
+    },
+    {
+        { 0x1d099fcf, 0x48145cc2, -0x33d7281b, 0x4535c192, 0x48247e01, -0x7f183e1b, 0x3b2973ee, 0x4a5f2874 },
+        { -0x5f885218, -0x67b21355, 0x19eb389d, 0x383f77ad, 0x2954d794, -0x38139482, -0x1483c586, 0x59c77b3a },
+        { 0x225ccf62, -0x2c5228db, -0x4dead3a3, -0x6ee5cc7f, 0x5b08f87d, -0x274c6053, 0x4799fe3b, 0x6f05606b }
+    },
+    {
+        { -0x06e49b7d, 0x5b433149, 0x5a2cbf62, -0x524a239b, 0x632827b3, -0x78057bee, -0x54b60728, 0x60895e91 },
+        { 0x177ba962, -0x6001616e, 0x0de5cae1, -0x675118e3, 0x2d831044, 0x3ff4ae94, 0x58533ac8, 0x714de12e },
+        { 0x0cf86c18, -0x16130d13, 0x0735dfd4, -0x4b92f9ee, 0x04b96be7, -0x43625f68, -0x26923d95, 0x73e2e62f }
+    },
+},
+{
+    {
+        { 0x632f9c1d, 0x2eccdd0e, 0x76893115, 0x51d0b696, -0x579c85a8, 0x52dfb76b, -0x5ff110c7, 0x6dd37d49 },
+        { 0x49aa515e, -0x12a49cac, 0x0bc6823a, -0x579a3b61, 0x5b42d1c4, -0x7af3e017, 0x03d315b9, 0x30d76d6f },
+        { 0x2106e4c7, 0x6c444417, -0x6d728097, -0x04ac2980, 0x694d3f26, -0x4b8c615c, 0x2e864bb0, 0x10c69711 }
+    },
+    {
+        { -0x7ca737fb, 0x0ca62aa0, 0x7a204247, 0x6a3d4ae3, 0x3b11eddc, 0x7464d3a6, 0x550806ef, 0x03bf9baf },
+        { 0x7dbe5fde, 0x6493c427, 0x19ad7ea2, 0x265d4fad, 0x46304590, 0x0e00dfc8, -0x129901f7, 0x25e61cab },
+        { -0x33a799fc, 0x3f13e128, -0x4ba68b82, 0x6f5873ec, -0x33ed970b, -0x5f49c213, 0x4586e22c, 0x566d7863 }
+    },
+    {
+        { -0x39a5d030, -0x5efabd7b, -0x0ce9983d, 0x6c64112a, 0x731aee58, 0x680ae240, 0x4793b22a, 0x14fba5f3 },
+        { -0x633ef7cc, 0x1637a49f, -0x57643baf, -0x4371a92b, 0x7f7fd2db, 0x1cb5ec0f, 0x5ecc35d9, 0x33975bca },
+        { 0x6985f7d4, 0x3cd74616, -0x3637ffa9, 0x593e5e84, 0x7b61131e, 0x2fc3f2b6, -0x7c03ad94, 0x14829cea }
+    },
+    {
+        { 0x4e71ecb8, 0x21e70b2f, 0x40a477e3, -0x19a92247, -0x31e2b080, -0x409aa932, 0x535d7b7e, 0x05fc3bc4 },
+        { -0x68226a3e, -0x00bc847c, -0x55b14a59, 0x6c744e30, 0x3c85e88b, -0x61f3a29f, 0x5f758173, 0x2fd9c71e },
+        { 0x52afdedd, 0x24b8b3ae, -0x12c4cf31, 0x3495638c, -0x56417e6b, 0x33a4bc83, 0x5c651f04, 0x37376747 }
+    },
+    {
+        { 0x14246590, 0x634095cb, 0x16c15535, -0x10edebc0, -0x76ef43a0, -0x61c7ebf4, 0x30907c8c, 0x6bf59057 },
+        { 0x40d1add9, 0x2fba99fd, -0x690b2fd9, -0x4cf8e991, 0x15f03bae, 0x4363f052, 0x3b18f999, 0x1fbea56c },
+        { -0x1ebea476, 0x0fa778f1, -0x453c5882, 0x06409ff7, -0x655d65b0, 0x6f52d7b8, 0x7a635a56, 0x02521cf6 }
+    },
+    {
+        { 0x772f5ee4, -0x4eeb98e0, -0x69f86532, -0x17076b4f, 0x00ac824a, 0x4af8224d, -0x0832933c, 0x001753d9 },
+        { 0x0a9d5294, 0x513fee0b, 0x0fdf5a66, -0x706718a4, -0x401ef832, -0x2b9e7978, 0x71382ced, 0x3fa00a7e },
+        { -0x69c224cc, 0x3c69232d, -0x4b68c7a8, 0x1dde87da, -0x5f6e0d7b, -0x55282e07, -0x5fb7124a, 0x12b5fe2f }
+    },
+    {
+        { -0x5290e16e, -0x20d483da, 0x504b8913, 0x4b66d323, 0x751c8bc3, -0x73bf6240, 0x0796c7b8, 0x6f7e93c2 },
+        { -0x69031cb3, 0x71f0fbc4, -0x520ca413, 0x73b9826b, -0x00d73a9f, -0x2dfb8d9f, 0x6fb1206f, 0x749b76f9 },
+        { -0x515951fb, 0x1f5af604, -0x411b6367, -0x3edcae0f, -0x1100949a, 0x61a808b5, 0x01e02151, 0x0fcec10f }
+    },
+    {
+        { -0x3bdbb1bb, 0x3df2d29d, -0x6c2721f6, 0x2b020e74, -0x7df3deb3, 0x6cc8067e, 0x6feab90a, 0x41377916 },
+        { 0x49fe1e44, 0x644d58a6, 0x31ad777e, 0x21fcaea2, -0x77802f2e, 0x02441c5a, -0x7c3aee0d, 0x4901aa71 },
+        { -0x73e50710, 0x08b1b754, 0x246299b4, -0x31f08584, 0x1e06d939, -0x089f4f07, 0x726d1213, 0x41bb887b }
+    },
+},
+{
+    {
+        { -0x55c6082e, -0x68267f20, 0x52c6b51c, 0x35d03842, 0x07cd55aa, 0x7d43f493, -0x48753c9e, 0x56bd36cf },
+        { 0x567c49d8, -0x6d987f94, -0x3586e196, 0x066d04cc, -0x1c33c6b5, -0x5960a9bb, -0x5f87732e, 0x5c95b686 },
+        { 0x0d14a954, 0x2ac519c1, -0x6b4a0570, -0x150b8b4c, -0x560785a6, -0x19507c7e, -0x78641f6c, 0x0dea6db1 }
+    },
+    {
+        { -0x29578686, 0x15baeb74, -0x053be8ce, 0x7ef55cf1, 0x3c8b05c5, 0x29001f5a, 0x52eaccfb, 0x0ad7cc87 },
+        { 0x7344e5ab, -0x559940ac, -0x70e4bcf7, -0x25eda778, -0x02a9b4d1, 0x5e87d2b3, 0x5483b1dd, 0x5b2c7888 },
+        { 0x793408cf, 0x52151362, 0x19963d94, -0x14f0e8fd, -0x77c26b9a, -0x57cc4d06, 0x75003c78, 0x093a7fa7 }
+    },
+    {
+        { 0x60a91286, -0x47169fbc, 0x7778d3de, 0x7f3fd804, -0x4075a1d3, 0x67d01e31, -0x3d849ac2, 0x7b038a06 },
+        { 0x3a16d7be, -0x1aef821a, -0x650ccd31, -0x5c880024, 0x440b677f, 0x70d5bf18, -0x5b5cebfd, 0x6a252b19 },
+        { -0x2c966f0d, -0x6126e62b, -0x24b1460e, 0x5213aebb, 0x4cb99135, -0x38f715fb, 0x72260e56, 0x58ded57f }
+    },
+    {
+        { 0x5b0fd48b, -0x2592acda, -0x6c405678, -0x769f7dcf, 0x61d57e28, -0x287536ce, 0x3a5c8143, 0x79f2942d },
+        { -0x16bec289, 0x78e79dad, -0x68d61983, -0x0da8062b, -0x1c85581a, 0x59db910e, -0x4461fc64, 0x6aa11b5b },
+        { -0x49377217, -0x6825d0db, -0x530dfe97, 0x251ba7ea, -0x10b14b1c, 0x09b44f87, -0x4395825b, 0x7d90ab1b }
+    },
+    {
+        { -0x694c3c69, 0x1a07a3f4, -0x70b1dace, 0x11ceaa18, -0x588ae410, 0x7d9498d5, 0x508dd8a0, 0x19ed161f },
+        { -0x58fe9402, -0x6533597d, -0x0d3af493, -0x6fafa0b3, -0x331bca56, 0x6b610d5f, 0x6198ff96, 0x19a10d44 },
+        { -0x78231936, 0x560a2cd6, -0x799b30b3, 0x7f3568c4, 0x22803a38, -0x78be16ae, 0x595653fc, 0x483bdab1 }
+    },
+    {
+        { -0x4b257f0a, -0x2930b2f6, -0x07cf8020, -0x7db7c1bb, -0x5190625c, 0x05005269, -0x63087886, 0x1c705290 },
+        { -0x78cb05b7, -0x0587f0ec, 0x360534e0, 0x106f0b70, -0x1c1cf843, 0x2210776f, -0x22195f02, 0x3286c109 },
+        { -0x78b1672c, 0x32ee7de2, -0x4681f3a0, 0x14c362e9, 0x6a60a38a, 0x5781dcde, -0x558557c0, 0x217dd5ea }
+    },
+    {
+        { -0x4173f138, -0x7420e047, -0x1cf5fd7e, 0x00bae7f8, -0x5293b094, 0x4963991d, 0x5df6f60a, 0x07058a6e },
+        { 0x248e1eb0, -0x62483b30, 0x4d74bf52, -0x1f89681f, 0x3c562354, 0x1e6a9b17, 0x795a4965, 0x7fa7c21f },
+        { -0x24ce0981, -0x1614fd3c, 0x10bcfb2b, -0x12da0277, 0x5c5cddb4, 0x46c8131f, -0x5f346432, 0x33b21c13 }
+    },
+    {
+        { 0x5ee38c5b, -0x65504650, 0x071a13c7, -0x4062d2b2, -0x16ccd6f6, -0x71119193, -0x51ef68e9, 0x1c3bab17 },
+        { 0x087d8e31, 0x360692f8, -0x2d8e9c09, -0x0b2339c9, 0x65ea5963, 0x25a4e620, 0x5ac160d9, 0x659bf72e },
+        { -0x38354850, 0x1c9ab216, 0x07bbc3cc, 0x7d65d374, 0x504a58d5, 0x52744750, 0x131a2990, 0x09f2606b }
+    },
+},
+{
+    {
+        { 0x7c6691ae, 0x7e234c59, 0x0a85b4c8, 0x64889d3d, 0x354afae7, -0x251d36f4, 0x0c6a9e1d, 0x0a871e07 },
+        { 0x744346be, 0x40e87d44, 0x15b52b25, 0x1d48dad4, -0x5ec49fc2, 0x7c3a8a18, 0x2fcdbdf7, 0x4eb728c1 },
+        { 0x4bbc8989, 0x3301b599, 0x5bdd4260, 0x736bae3a, 0x19d59e3c, 0x0d61ade2, 0x2685d464, 0x3ee7300f }
+    },
+    {
+        { -0x7be18ae8, 0x43fa7947, 0x639c46d7, -0x1a3905a7, -0x1cfad48c, -0x5ef9a1e3, -0x30476fd0, 0x7d47c6a2 },
+        { -0x61822949, -0x0a2daa1c, 0x610b1eac, -0x7fe9eea4, -0x6d1e7836, 0x3c99975d, -0x686eda3e, 0x13815762 },
+        { -0x710f2920, 0x3fdad014, -0x6eab90c4, -0x62c18b66, 0x26bb8157, 0x71ec6210, 0x34c9ec80, 0x148cf58d }
+    },
+    {
+        { -0x651b8a93, -0x1da8d083, -0x770cb781, 0x56c345bb, 0x6960a88d, -0x602ef493, 0x4eaea1b9, 0x278febad },
+        { 0x7934f027, 0x46a492f6, -0x097bf557, 0x469984be, -0x769ee7ac, 0x5ca1bc2a, -0x42a2442c, 0x3ff2fa1e },
+        { -0x736cc69a, -0x4e5597e1, 0x20290c98, -0x73de6b64, 0x219d3c52, 0x39115291, -0x01639885, 0x4104dd02 }
+    },
+    {
+        { -0x24f69548, -0x7edeb1fa, 0x0ce44f35, 0x21a8b6c9, 0x409e2af5, 0x6524c12a, -0x71035b7f, 0x0165b5a4 },
+        { 0x1124422a, 0x72b2bf5e, -0x675cc54b, -0x5e05f3cd, -0x05ad499a, -0x6b349eff, -0x5050ac2b, 0x2c863b00 },
+        { -0x5f7b958a, -0x0e6f5b8c, -0x32d08340, 0x12eff984, 0x58aa2b8f, 0x695e2906, -0x40013748, 0x591b67d9 }
+    },
+    {
+        { -0x60e74aa3, -0x66464c8f, -0x5e739be2, -0x1b9a1a06, -0x3d60fa13, 0x61081136, 0x7030128b, 0x489b4f86 },
+        { -0x7f4b6406, 0x312f0d1c, -0x540c1376, 0x5979515e, -0x610fe378, 0x727033c0, -0x35708435, 0x3de02ec7 },
+        { 0x3aeb92ef, -0x2dcdefd3, 0x6116a861, -0x1e9dac4c, 0x190baa24, 0x3d7eabe7, 0x496cbebf, 0x49f5fbba }
+    },
+    {
+        { 0x1e9c572e, 0x155d628c, -0x3a77b8bf, -0x75b27954, 0x515763eb, -0x6e5cad0a, -0x7798aea5, 0x06a1a6c2 },
+        { -0x75a4302c, 0x30949a10, -0x439b8c15, -0x23bf2290, 0x307c0d1c, -0x6d3d6b3f, -0x3405918c, 0x5604a86d },
+        { 0x7c1764b6, 0x7288d1d4, -0x1fbe74af, 0x72541140, 0x18acf6d1, -0x60fce5a0, -0x01d8bd3a, 0x20989e89 }
+    },
+    {
+        { -0x7a1513d2, 0x1674278b, 0x7acb2bdf, 0x5621dc07, 0x61cbf45a, 0x640a4c16, -0x08fa6a2d, 0x730b9950 },
+        { 0x3a2dcc7f, 0x499777fd, -0x5ab0276e, 0x32857c2c, -0x2df81c60, -0x5d86279c, 0x0ca67e29, 0x0403ed1d },
+        { -0x78b13aae, -0x36b4d2cb, -0x67db9073, -0x3a193731, 0x16c035ce, -0x0834b906, 0x08303dcc, 0x5bd74543 }
+    },
+    {
+        { 0x15e7792a, -0x7a3b6cdf, -0x42322237, -0x39b3765e, -0x525c289e, -0x62e1c258, 0x3067f82c, 0x5bb7db12 },
+        { 0x28b24cc2, 0x7f9ad195, 0x6335c181, 0x7f6b5465, 0x4fc07236, 0x66b8b66e, 0x7380ad83, 0x133a7800 },
+        { -0x39359d42, 0x0961f467, 0x211952ee, 0x04ec21d6, -0x642ab890, 0x18236077, 0x58f0e0d2, 0x740dca6d }
+    },
+},
+{
+    {
+        { -0x12d9e51b, 0x3906c72a, -0x771eff09, -0x65497027, -0x0cc9fe69, -0x0a16fa66, -0x40d492b9, 0x0e53dc78 },
+        { -0x2c0f50f5, 0x50b70bf5, -0x1cd18e09, 0x4feaf48a, -0x5aa442cc, 0x60e84ed3, 0x3f50d1ed, 0x00ed489b },
+        { 0x7971877a, -0x46f7d641, 0x6d17e631, 0x5e444463, 0x18276893, 0x4d05c52e, 0x5a4a4af5, 0x27632d9a }
+    },
+    {
+        { -0x78150025, -0x567d7a2f, -0x272f579c, -0x5a4b0445, 0x022663f7, -0x49a70d81, -0x26631d7e, 0x3bbc2b22 },
+        { 0x54b260ce, -0x2ee00faf, 0x72f95270, -0x27923c72, 0x267cc138, 0x601fcd0d, 0x29e90ccd, 0x2b679164 },
+        { 0x583c0a58, -0x46e836ae, 0x0fe4c6f3, 0x653ff9b8, -0x4320c3f4, -0x64f25829, -0x54ab29f2, 0x43a0eeb6 }
+    },
+    {
+        { 0x57875fe8, 0x3ac63223, -0x0a043471, -0x262b0b14, 0x382bb620, -0x72117b6d, 0x4c799fdc, 0x50c5eaa1 },
+        { 0x6d4a5487, 0x396966a4, -0x53d44c46, -0x07ee5e76, 0x5628b26b, 0x66e4685b, -0x626d646e, 0x70a47702 },
+        { -0x290d04c4, -0x22f12375, -0x63384860, 0x54c63aa7, 0x2c8d9f1a, -0x51f4fcd5, 0x602967fb, 0x6f9ce107 }
+    },
+    {
+        { 0x3520e0b5, 0x13969306, -0x7715fc02, 0x437fcf7c, -0x2c36a644, -0x082b3bf5, -0x076c2127, 0x699154d1 },
+        { -0x321e3dd6, -0x52efab4f, 0x48eb32df, -0x3b5716fe, -0x53323f16, 0x5f3e7b33, -0x038669c2, 0x72364713 },
+        { -0x4b4d8ada, 0x315d5c75, 0x0236daa5, -0x33347bd3, 0x345fee8e, 0x22f0c8a3, 0x7d39dbed, 0x73975a61 }
+    },
+    {
+        { -0x0bbcc1ba, 0x6f37f392, 0x1f566b18, 0x0e19b9a1, 0x1fd1d662, 0x220fb78a, -0x5c7e36b3, 0x362a4258 },
+        { 0x6375da10, -0x1bfdb207, 0x1830c870, 0x78d3251a, 0x658cd91c, -0x6fd4e6b8, 0x29b7438a, 0x7e18b10b },
+        { 0x2b6beb2f, -0x6f8e26ed, 0x28418247, 0x0f26e9ad, -0x42136da3, -0x1546e137, -0x0b750d22, 0x4be65bc8 }
+    },
+    {
+        { 0x57c26234, 0x1d50fba2, -0x214f9875, 0x7bd4823a, -0x59ac750b, -0x3d4f2392, 0x351da73e, 0x5665eec6 },
+        { -0x5c918fd8, 0x78487feb, 0x1dd8ce34, 0x5f3f1300, 0x4b30c489, -0x6cb04ed3, 0x397f0a2b, 0x056c244d },
+        { 0x43bfb210, -0x24c11ff7, 0x20800ac2, 0x49720187, 0x73bd8667, 0x26ab5d61, -0x54dfb6c8, 0x20b209c2 }
+    },
+    {
+        { 0x16bd3289, 0x1fcca945, 0x41420428, 0x448d65aa, 0x16a55d62, 0x59c3b7b2, 0x4e612cd8, 0x49992cc6 },
+        { -0x3f804cb5, 0x549e342a, 0x21373d93, 0x02d82208, -0x532e0a99, -0x43d9d290, -0x0435387c, 0x7a92c9fd },
+        { 0x70f801de, 0x65bd1bea, -0x01b61d76, 0x1befb7c0, -0x4e4d51b6, -0x579cf933, 0x265c2a09, 0x3b7ac0cd }
+    },
+    {
+        { 0x22ed39a7, -0x0f2ab1b1, 0x5608150a, -0x5d5516e2, -0x1225178b, -0x0bde4d17, 0x6b7de992, 0x31bc531d },
+        { -0x73fe4314, -0x7dd411bd, -0x3f0438c5, 0x530cb525, -0x3e6ac017, 0x48519034, -0x1f65f0a5, 0x265cc261 },
+        { -0x567f068f, -0x20c2ecb3, 0x221a22a7, 0x7a4fb8d1, 0x35aad6d8, 0x3df7d420, 0x6a1a125e, 0x2a14edcc }
+    },
+},
+{
+    {
+        { 0x0478433c, 0x231a8c57, -0x3d7ebc63, -0x484ad8f2, -0x1c26f861, -0x24556616, 0x6c2b03d9, 0x2c03f525 },
+        { 0x52cfce4e, -0x20b711f9, 0x06ec08b7, -0x3c00050d, -0x46aba63c, 0x05710b2a, -0x69c15c73, 0x161d25fa },
+        { 0x7b53a47d, 0x790f1875, -0x30f3a787, 0x307b0130, 0x257ef7f9, 0x31903d77, -0x42694451, 0x699468bd }
+    },
+    {
+        { 0x6aa91948, -0x2722c21a, 0x2fc0d2cc, 0x485064c2, 0x34fdea2f, -0x64b7db9a, 0x6c4a2e3a, 0x293e1c4e },
+        { -0x0b250131, -0x42e0d0ba, -0x5b802909, 0x7cef0114, 0x4a47b37f, -0x2ce00226, 0x73905785, 0x525219a4 },
+        { -0x6daeed1f, 0x376e134b, -0x235ea260, 0x703778b5, 0x461c3111, -0x4fba7651, 0x7f032823, 0x5b605c44 }
+    },
+    {
+        { -0x0f180fb4, 0x3be9fec6, 0x75e34962, -0x7995a862, 0x1e1de61a, 0x5542ef16, -0x33a5422b, 0x2f12fef4 },
+        { 0x20c47c89, -0x469a7fa7, -0x6dc47034, -0x180feff4, 0x02e2ef77, 0x00012565, -0x57514c12, 0x24a76dce },
+        { -0x203f38c0, 0x0a4522b2, 0x40c9a407, 0x10d06e7f, 0x78cff668, -0x3930ebbf, 0x18a43790, 0x5e607b25 }
+    },
+    {
+        { -0x5a6930ec, -0x5fd3bce4, -0x512c1c00, -0x1c3bd2c0, 0x2e0f26db, -0x2dbad980, -0x61ba8f98, 0x201f3313 },
+        { 0x6cdf1818, 0x58b31d8f, -0x3c9da75e, 0x35cfa74f, 0x66e61d6e, -0x1e4c00b1, 0x6ccdd5f7, 0x5067acab },
+        { 0x08039d51, -0x02ad8095, 0x017c0006, 0x18b14964, 0x2e25a4a8, -0x2addf150, 0x62460375, 0x397cba88 }
+    },
+    {
+        { -0x37ec8619, 0x7815c3fb, -0x221ed50f, -0x599e6be0, -0x7a57022b, -0x00563f08, -0x3e1e3dae, 0x771b4022 },
+        { -0x0fa6a64e, 0x30c13093, -0x1656868a, -0x1dc55e73, 0x721d5e26, 0x222fd491, 0x766e6c3a, 0x2339d320 },
+        { 0x513a2fa7, -0x2782267a, -0x062b30f8, -0x0a53648f, 0x1ea283b3, -0x2f943ce5, 0x19971a76, 0x331a1892 }
+    },
+    {
+        { -0x628a8d51, 0x26512f3a, 0x68074a9e, 0x5bcbe288, 0x1180f7c4, -0x7b123e3f, -0x09b65985, 0x1ac9619f },
+        { -0x04b07f3a, -0x0ae990bb, 0x61c775cf, -0x63c93822, -0x6fbe26e4, -0x1c2b17e5, -0x7c4201df, 0x31167c6b },
+        { 0x524b1068, -0x0dd4c7be, -0x11631679, 0x5068343b, 0x4a6250c8, -0x03628e7c, 0x1f08b111, 0x61243634 }
+    },
+    {
+        { 0x1a2d2638, -0x749cb61d, -0x642c02cb, -0x62204900, -0x5c5f945c, 0x7f8bf1b8, 0x78d90445, 0x1522aa31 },
+        { -0x78b17673, -0x2662be25, 0x6c07dc20, 0x09fea5f1, -0x2ff06444, 0x793d2c67, -0x61a100c0, 0x46ebe230 },
+        { 0x69614938, 0x2c382f53, -0x48d292f0, -0x2501bf66, -0x49b90dd9, -0x1737cc6f, 0x0524306c, 0x45fe70f5 }
+    },
+    {
+        { -0x376aeb6f, 0x62f24920, 0x3f630ca2, 0x05f007c8, -0x0a362b48, 0x6fbb45d2, -0x4a85ddbb, 0x16619f6d },
+        { -0x69f3f474, -0x25b78a5a, -0x10f1d0e0, 0x5b68d076, 0x3d0b8fd4, 0x07fb51cf, -0x5f1c6d2c, 0x428d1623 },
+        { 0x01a308fd, 0x084f4a44, 0x76a5caac, -0x57dde63d, 0x43d1bc7d, -0x214721ba, 0x60bd38c6, 0x1d81592d }
+    },
+},
+{
+    {
+        { 0x2f89c8a1, 0x3a4a369a, 0x7c8de80d, 0x63137a1d, 0x78eda015, -0x4353ff76, -0x4b7c4fc1, 0x2cb8b3a5 },
+        { -0x13d5b3c8, -0x27cc2842, 0x0acc20ed, 0x2c916283, -0x6d208a7f, -0x16c5b856, 0x333c4a81, 0x702d67a3 },
+        { -0x34e46f5f, 0x36e417cb, 0x7f11794e, 0x33b3ddaa, -0x77a439f9, 0x3f510808, -0x1957fdf3, 0x24141dc0 }
+    },
+    {
+        { -0x427cea83, -0x6e6da234, 0x22cc8094, 0x3ca12053, 0x3f90d6e4, 0x28e57f18, -0x21d18985, 0x1a4714ce },
+        { 0x3fefee9d, 0x59f73c77, -0x3e306763, -0x4c0e1077, -0x1fd1aba1, -0x1ca204be, 0x47a1b47c, 0x5766120b },
+        { -0x47494801, -0x24df45f1, 0x77511fa1, -0x48cd3c4a, -0x660fd277, -0x56d4ae40, 0x489ca5f1, 0x4f3875ad }
+    },
+    {
+        { -0x118c1140, 0x79ed13f6, 0x69110bb1, -0x5a39ad93, -0x79fc79f4, -0x1b76d73d, -0x028fa60b, 0x722a1446 },
+        { 0x4932ab22, -0x380389d1, 0x2f4c3c1b, 0x7ac0edf7, -0x65576a18, 0x5f6b55aa, -0x52f5ff7f, 0x3680274d },
+        { -0x573077e7, -0x2f6a6017, -0x7b8a5664, -0x2f566ab0, 0x20b09cc5, 0x6eac1733, 0x331b1095, 0x628ecf04 }
+    },
+    {
+        { 0x5c74ccf1, -0x64be5308, 0x08265251, -0x498cce7f, 0x11adb147, -0x6636d513, 0x34ecb40f, 0x7a47d70d },
+        { -0x562f2244, -0x67434ee8, 0x08b4802b, -0x11bb61cc, -0x47594efc, -0x78f76dda, 0x45c7915d, 0x685f349a },
+        { -0x33bc5b0b, 0x60a0c4cb, 0x3677bea9, 0x775c66ca, 0x2ff8f5ed, -0x5e855e8b, 0x0e01fdc0, 0x11ded902 }
+    },
+    {
+        { 0x3bea93b7, 0x471f95b0, 0x3313abd3, 0x0552d7d4, -0x1e81c085, -0x426c8f1e, -0x4df1a414, 0x7b120f1d },
+        { -0x351018fc, -0x76f187f7, -0x1cf17394, -0x78d7d693, -0x6d514e37, 0x4c5cd2a3, 0x5771531f, 0x194263d1 },
+        { -0x79afd286, 0x17d2fb3d, 0x50a69352, -0x4a9b27bc, -0x59f128a3, 0x7da962c8, 0x318736aa, 0x00d0f85b }
+    },
+    {
+        { -0x0289de3f, -0x598ac3e2, 0x445671f5, 0x69c0b4a7, 0x05b23c11, -0x68e0ad8c, 0x51a8c7cd, 0x387bc748 },
+        { 0x777c84fd, -0x6874ebd2, 0x05a8c062, -0x0bfd9bb9, -0x1819ed39, -0x59852ae5, -0x672295cd, 0x2f7b4596 },
+        { 0x4a52a9a8, -0x7e76b4b3, -0x09477cd1, -0x5226c1ee, -0x49e429c8, 0x184d8548, -0x29360933, 0x3f1c62db }
+    },
+    {
+        { 0x148f693d, 0x3fad3e40, -0x6b14658e, 0x052656e1, 0x184f4e2f, 0x2f4dcbfd, -0x3b7d1e75, 0x406f8db1 },
+        { -0x6e6ef3e1, 0x2e8f1f00, -0x400d1ed4, -0x5b20b020, -0x116d8bc8, 0x60c6560a, -0x53103706, 0x6338283f },
+        { 0x7f191ee4, -0x619cf2d4, -0x43c00990, 0x4fbf8301, 0x7afb73c4, 0x787d8e4e, -0x170a705b, 0x50d83d5b }
+    },
+    {
+        { -0x4b2c4993, -0x3f533070, 0x61732e60, -0x58fa621b, 0x70c6b0ba, 0x033d1f78, 0x26d946e4, 0x584161cd },
+        { -0x3ee5e769, -0x7a97c6ea, -0x1af92ff8, 0x2d69a4ef, -0x099b42ff, 0x39af1378, 0x361517c6, 0x65942131 },
+        { 0x72d27ca2, -0x440d4e60, -0x042138fc, -0x40c6c3a7, -0x1d9d47e2, -0x16724432, 0x3029b589, 0x02eebd0b }
+    },
+},
+{
+    {
+        { 0x7b85c5e8, -0x789a4961, -0x2e97454e, 0x6ff0678b, 0x1d330f9b, 0x3a70e77c, -0x4f507184, 0x3a5f6d51 },
+        { -0x59f253a1, 0x61368756, -0x145423a9, 0x17e02f6a, 0x4cce0f7d, 0x7f193f2d, -0x76132310, 0x20234a77 },
+        { 0x7178b252, 0x76d20db6, -0x2ae12ea0, 0x071c34f9, -0x4c1bee90, -0x09d5b5e0, 0x3cffe366, 0x7cd68235 }
+    },
+    {
+        { 0x68acf4f3, -0x599a32a0, 0x3cd7e3d3, 0x42d92d18, 0x336025d9, 0x5759389d, 0x2b2cd8ff, 0x3ef0253b },
+        { -0x2778054a, 0x0be1a45b, -0x45bfc492, 0x2a846a32, -0x1691a000, -0x266defee, 0x3bdc0943, 0x2838c886 },
+        { 0x4a465030, -0x2e944f31, 0x15c577ab, -0x05b694bf, -0x0b54be63, -0x7d305176, 0x06a82812, 0x21dcb8a6 }
+    },
+    {
+        { -0x4188ce46, -0x6572ff06, 0x629e1889, -0x7dfc9f82, 0x43f3d97f, -0x4d33fdc9, 0x6c6f678b, 0x5d840dbf },
+        { -0x73626038, 0x5c600446, -0x2bd55c35, 0x2540096e, 0x12ee2f9c, 0x125b4d4c, -0x6b5ce255, 0x0bc3d081 },
+        { 0x309fe18b, 0x706e380d, -0x461e9a39, 0x6eb02da6, 0x7dae20ab, 0x57bbba99, 0x2ac196dd, 0x3a427623 }
+    },
+    {
+        { -0x24bb8135, 0x3bf8c172, -0x39d7d243, 0x5fcfc41f, 0x75aa15fe, -0x7f530040, 0x24e1a9f9, 0x0770c9e8 },
+        { -0x758f7b06, 0x4b42432c, -0x20461abb, -0x7675e61d, -0x63a71ba3, -0x4160ffdf, -0x5e92142f, 0x1ff177ce },
+        { 0x45b5b5fd, -0x309e2666, 0x1b3a7924, -0x79f67b17, 0x303e3e89, -0x18cff6e7, 0x41500b1e, 0x39f264fd }
+    },
+    {
+        { -0x01f6841f, -0x2e64b555, -0x201fe6d7, -0x5b92031f, 0x2ca6f1ff, -0x3c36f76c, 0x2c35f14e, 0x65c62127 },
+        { -0x24181d64, -0x5852cbe9, 0x2b9c139c, -0x426bc896, -0x6ca68457, -0x5f16e472, 0x68889840, 0x1712d734 },
+        { -0x31ce6c23, -0x18d47608, -0x5eda3f45, 0x4d103356, 0x2e1cfe83, 0x0419a93d, -0x4e631d8e, 0x22f9800a }
+    },
+    {
+        { -0x65910254, 0x42029fdd, 0x34a54941, -0x46ed3142, -0x78420c85, 0x640f64b9, -0x7a67354c, 0x4171a4d3 },
+        { 0x3e9ef8cb, 0x605a368a, -0x5aafb8eb, -0x1c163fde, 0x5f24248f, 0x553d48b0, 0x647626e5, 0x13f416cd },
+        { -0x6636b374, -0x05d8a756, -0x4fff47f9, 0x23006f6f, -0x5225ac6e, -0x042d6e23, 0x574bd1ab, 0x508214fa }
+    },
+    {
+        { 0x53d003d6, 0x461a15bb, -0x430c369b, -0x4defd778, 0x6c683a5a, 0x27c57675, -0x37934bb9, 0x3a7758a4 },
+        { 0x3ed6fe4b, -0x3dfd96eb, 0x511d77c4, -0x59a598c7, 0x2c14af94, -0x3421d9ba, 0x6faba74b, 0x22f960ec },
+        { -0x6c51af8a, 0x548111f6, 0x1dfd54a6, 0x1dae21df, -0x0ceea19b, 0x12248c90, -0x72180b6c, 0x5d9fd15f }
+    },
+    {
+        { -0x1128ade2, 0x3f244d2a, 0x432e9615, -0x71c56fd8, 0x2e9c16d4, -0x1e9b4589, 0x47eb98d8, 0x3bc187fa },
+        { 0x6d63727f, 0x031408d3, -0x28384acd, 0x6a379aef, -0x33511db5, -0x561e703b, 0x4f8fbed3, 0x332f3591 },
+        { -0x15793df4, 0x6d470115, 0x6c46d125, -0x66754835, 0x3a660188, -0x2887cd4b, -0x6f9045fd, 0x450d81ce }
+    },
+},
+{
+    {
+        { -0x4d351f4b, 0x23264d66, -0x14359a8a, 0x7dbaed33, -0x0f2db538, 0x030ebed6, -0x089caaf0, 0x2a887f78 },
+        { -0x27bac6fe, -0x0751b2d6, -0x1724d2e3, 0x7018058e, -0x382d3ee2, -0x554c66a1, 0x24ccca79, 0x53b16d23 },
+        { 0x5c012d4f, 0x2a23b9e7, -0x351e0d16, 0x0c974651, 0x675d70ca, 0x2fb63273, -0x79bbfc0b, 0x0ba7250b }
+    },
+    {
+        { -0x79079264, -0x229ca76d, -0x1ec57a5c, 0x61699176, 0x4eaa7d57, 0x2e511195, -0x049f4205, 0x32c21b57 },
+        { 0x029c6421, -0x44f2e703, -0x76d670fe, -0x43d2ebdf, -0x74daf16a, -0x7cb8071a, 0x032d71c9, 0x7b9f2fe8 },
+        { 0x319e0780, -0x2787dc33, -0x76888a3b, -0x103b303f, -0x65f54c09, 0x4854fb12, 0x7238c371, 0x12c49d41 }
+    },
+    {
+        { -0x7c866abe, 0x09b3a017, -0x552a11c1, 0x626dd08f, -0x148feb61, -0x45ff4312, -0x5f5bbb37, 0x1421b246 },
+        { -0x0017c897, 0x0950b533, -0x71e2942f, 0x21861c1d, 0x1302e510, -0x0fdd27c8, 0x6391cab4, 0x2509200c },
+        { -0x73db5839, 0x4aa43a8e, -0x270fa10b, 0x04c1f540, 0x0b3eb9dc, -0x5245a1f4, 0x48a49ce3, 0x2ab55044 }
+    },
+    {
+        { 0x1c5d3afa, -0x23f8539d, -0x06207394, 0x58615171, -0x628c1d50, 0x72a079d8, -0x4b151ea3, 0x7301f4ce },
+        { 0x6f0f5dec, 0x2ed22726, 0x5ed50824, -0x67db11bf, -0x6b972beb, -0x7f841384, -0x4ade1dc1, 0x7093bae1 },
+        { -0x298dd3bf, 0x6409e759, 0x72bf729b, -0x598b1e31, 0x3c21e569, -0x43f5db15, 0x4ebacb23, 0x390167d2 }
+    },
+    {
+        { -0x5d0dedf5, -0x2844fab5, -0x4efa7649, -0x1d463152, -0x0c3f1242, 0x3fe8bac8, 0x7112cb69, 0x4cbd4076 },
+        { -0x45cac0e4, 0x27f58e3b, -0x4095bc9f, 0x4c47764d, 0x6e562650, -0x50443b1b, -0x551e5ba3, 0x07db2ee6 },
+        { 0x29c58176, 0x0b603cc0, 0x5cb15d61, 0x5988e382, -0x230f5273, 0x2bb61413, 0x74183287, 0x7b8eec6c }
+    },
+    {
+        { -0x03c7948d, 0x32fee570, -0x25c57339, -0x2574febf, -0x37697ca7, -0x68a002f6, -0x4ecd57ab, 0x6ee809a1 },
+        { 0x2cd27cb0, -0x1b35bf88, -0x04169843, -0x25063cdd, -0x752be162, -0x4d642cb6, 0x626ede4d, 0x72810497 },
+        { -0x030279c6, -0x6bbb44cf, 0x3e4e48c5, 0x2fe3690a, -0x2f7705db, -0x23d63799, -0x2e8cd6d2, 0x13bd1e38 }
+    },
+    {
+        { 0x1dfac521, 0x223fb5cf, 0x6f554450, 0x325c2531, 0x659177ac, 0x030b98d7, 0x4f88a4bd, 0x1ed018b6 },
+        { 0x696149b5, -0x2cd4b328, -0x7e275549, -0x1aa6c829, -0x51edd46c, 0x0bcb2127, -0x4ebf6650, 0x41e86fcf },
+        { -0x47fd5950, 0x3630dfa1, 0x42ad3bd5, -0x77f078b9, -0x113a5b2c, 0x0af90d6c, 0x37cdc5d9, 0x746a247a }
+    },
+    {
+        { 0x78d941ed, 0x6eccd852, -0x2dd087bd, 0x2254ae83, 0x7bbfcdb7, -0x3add2fd2, -0x400f1b1e, 0x681e3351 },
+        { 0x2b7b9af6, -0x2ace4743, 0x37fc5b51, 0x50050935, -0x3a6cab93, 0x232fcf25, 0x2bb40f49, 0x20a36514 },
+        { -0x7cfcb0bb, -0x749b4a63, 0x1fa20efb, 0x2f8b71f2, -0x459aaf1c, 0x69249495, 0x45d5472b, 0x539ef98e }
+    },
+},
+{
+    {
+        { 0x1cae743f, -0x2f8b276a, -0x11e39c13, -0x0792e70b, -0x180b12d7, -0x68423aa5, 0x663ab108, 0x4cbad279 },
+        { -0x59dfad8b, 0x6e7bb6a1, 0x413c8e83, -0x55b0de29, -0x1770a34e, 0x6f56d155, -0x59cba41f, 0x2de25d4b },
+        { -0x5f28e033, -0x7f2e6fdc, -0x04d77508, -0x3ada3df6, 0x5f3a6419, -0x4e5c68b5, -0x1dff8dcd, 0x7d7fbcef }
+    },
+    {
+        { -0x0c3d6f6c, -0x3283a23b, 0x2a9105ab, -0x387e5d66, 0x421c3058, -0x7f39e2ca, -0x23272b29, 0x4f9cd196 },
+        { 0x266b2801, -0x0510e196, -0x2a8c60ea, -0x7993973c, 0x1b03762c, -0x0975d044, -0x7848a573, 0x5975435e },
+        { 0x6a7b3768, 0x199297d8, 0x1ad17a63, -0x2f2fa7dc, 0x5c1c0c17, -0x45fd6353, 0x387a0307, 0x7ccdd084 }
+    },
+    {
+        { 0x6760cc93, -0x64f37be8, 0x1ab32a99, -0x3251ff86, 0x620bda18, -0x5772137a, -0x7e6f35bc, 0x3593ca84 },
+        { 0x6d260417, -0x2359bdd4, -0x6b7dbf43, -0x51eac2b0, -0x04973989, -0x563f3e4c, 0x61d0cf53, 0x428bd0ed },
+        { 0x5e849aa7, -0x6dece766, 0x65d8facd, -0x2b273ccb, 0x53fdbbd1, -0x73adaba5, -0x25d29c1a, 0x27398308 }
+    },
+    {
+        { 0x0a702453, -0x465ef1b4, -0x2a82e422, 0x0fa25866, -0x32d82509, -0x0046264b, 0x492c33fd, 0x572c2945 },
+        { 0x435ed413, 0x42c38d28, 0x3278ccc9, -0x42af0ca0, 0x79da03ef, -0x44f854e6, -0x4173ccab, 0x269597ae },
+        { -0x2932cf42, -0x388038bb, -0x1c455105, -0x1b20172d, -0x55a225f4, -0x5dd377d0, -0x3fa43580, 0x7f985498 }
+    },
+    {
+        { 0x0fbf6363, -0x2ca9eaae, -0x30b2045a, 0x08045a45, -0x78c05f3e, -0x113db044, -0x2964ed19, 0x30f2653c },
+        { -0x60f41ee9, 0x3849ce88, 0x7b54a288, -0x7ffa52e5, 0x23fc921c, 0x3da3c39f, 0x0a31f304, 0x76c2ec47 },
+        { -0x553ef37b, -0x75f736c8, -0x24d89435, 0x46179b60, 0x0e6fac70, -0x56df3fe2, 0x596473da, 0x2f1273f1 }
+    },
+    {
+        { 0x55a70bc0, 0x30488bd7, -0x0e2bbd19, 0x06d6b5a4, -0x43a69e9e, -0x152e5962, -0x123a087c, 0x38ac1997 },
+        { -0x751fe1ef, 0x4739fc7c, 0x4a6aab9f, -0x02ad8b70, -0x788d70d2, 0x41d98a82, -0x27a4960e, 0x5d9e572a },
+        { -0x58ae4ec5, 0x0666b517, 0x7e9b858c, 0x747d0686, 0x454dde49, -0x53533fef, -0x40161964, 0x22dfcd9c }
+    },
+    {
+        { 0x103be0a1, 0x56ec59b4, -0x2da60697, 0x2ee3baec, 0x13f5cd32, 0x797cb294, 0x24cde472, 0x0fe98778 },
+        { -0x3cf2f327, -0x72242d20, -0x5344bccd, -0x527199a1, 0x322a961f, -0x7094da74, 0x5448c1c7, 0x6b2916c0 },
+        { 0x0aba913b, 0x7edb34d1, 0x2e6dac0e, 0x4ea3cd82, 0x6578f815, 0x66083dff, 0x7ff00a17, 0x4c303f30 }
+    },
+    {
+        { 0x0dd94500, 0x29fc0358, 0x6fbbec93, -0x132d855c, -0x3d1d5808, 0x130a155f, -0x48f95e2b, 0x416b151a },
+        { 0x17b28c85, -0x2cf5c42a, 0x39773bea, -0x3a2c8849, 0x1e6a5cbf, -0x39391874, -0x74d5483c, 0x0d61b8f7 },
+        { -0x163ec950, 0x56a8d7ef, 0x58e44b20, -0x42f81a33, 0x1b57e0ab, -0x5019d026, 0x4277e8d2, 0x191a2af7 }
+    },
+},
+{
+    {
+        { 0x2fe09a14, 0x09d4b60b, -0x244e8b82, -0x3c7b0f51, 0x78b5fd6e, 0x58e2ea89, -0x4a1f64f6, 0x519ef577 },
+        { -0x5490b67b, -0x2aaff6a5, 0x4fbfaf1a, 0x04f4cd5b, 0x2a0c7540, -0x6271d12f, -0x4ddedd7a, 0x2bc24e04 },
+        { 0x1124cca9, 0x1863d7d9, -0x47758f72, 0x7ac08145, -0x7a8fce0b, 0x2bcd7309, -0x7547051b, 0x62337a6e }
+    },
+    {
+        { 0x1b3a1273, -0x2e54cdb2, -0x7efaacc0, 0x18947cf1, -0x5673e692, 0x3b5d9567, -0x7fd1e198, 0x7fa00425 },
+        { 0x06ffca16, 0x4bcef17f, 0x692ae16a, -0x21f91e25, 0x614f42b0, 0x0753702d, 0x5b9212d0, 0x5f6041b4 },
+        { 0x028c2705, 0x7d531574, -0x24f28a02, -0x7fce8297, -0x10737223, 0x30fface8, -0x493c1668, 0x7e9de97b }
+    },
+    {
+        { -0x5db2bf23, -0x0ffb419e, 0x0452d41f, -0x45f9a66f, 0x62a44234, -0x7e3ba11f, -0x5ddd9911, 0x4cb829d8 },
+        { -0x619a7a5d, 0x1558967b, -0x6716746e, -0x68366320, 0x6eb3adad, 0x10af149b, -0x0b2c7306, 0x42181fe8 },
+        { 0x07b86681, 0x1dbcaa84, -0x74d98ac5, 0x081f001e, -0x7bfb717f, 0x3cd7ce6a, 0x3f25f22c, 0x78af1163 }
+    },
+    {
+        { 0x7d65318c, 0x3241c00e, -0x2f179219, -0x19411a24, -0x043f73da, 0x118b2dc2, -0x039fc23d, 0x680d04a7 },
+        { 0x0b50babc, -0x7be9142c, 0x28208bee, 0x15087226, -0x463e3c93, -0x5ceb7051, -0x2cd282a3, 0x0d07daac },
+        { 0x695aa3eb, -0x063dbeb6, 0x05a68f21, -0x255bd3b4, 0x7f93963e, 0x7c6c2398, 0x0c3954e3, 0x210e8cd3 }
+    },
+    {
+        { 0x37fe6c26, 0x2b50f161, 0x56e404d8, -0x1efd4328, 0x4c561f6b, 0x12b0f141, -0x2fd7136f, 0x51b17bc8 },
+        { 0x10a71c06, -0x53bdfe0e, -0x0c404fdf, 0x6a65e0ae, 0x393632f7, -0x43bd3ca4, -0x79a0f8be, 0x56ea8db1 },
+        { -0x30acaee7, -0x000a04b5, -0x20eef760, -0x0b676287, -0x65c45cdb, -0x4203159b, 0x74d1a6f2, 0x18a11f11 }
+    },
+    {
+        { -0x2d85a0d4, -0x0429c326, -0x755ef929, -0x0ff03b44, -0x719b5bd0, 0x53fb5c1a, 0x0c1a2e85, 0x04eaabe5 },
+        { 0x3f6bba29, 0x407375ab, -0x66e1b7d2, -0x613c4928, -0x1aa06d17, -0x6637f17e, -0x04f3f51f, 0x307c13b6 },
+        { -0x34754a19, 0x24751021, 0x5c5010eb, -0x03dcbbb7, 0x4e5610a1, 0x5f1e717b, -0x3d8ef32b, 0x44da5f18 }
+    },
+    {
+        { -0x76271534, -0x6ea90195, -0x1dced95f, -0x19486baf, 0x3944eb4e, -0x428b9c27, 0x767203ae, 0x726373f6 },
+        { -0x0e47d14b, 0x033cc55f, 0x411cae52, -0x4ea51c93, -0x7004532d, -0x45bf49e7, 0x532e861f, 0x768edce1 },
+        { -0x14810976, -0x1cfa358e, 0x70eadb23, 0x662cf31f, -0x4b3ba498, 0x18f026fd, -0x4a2d1343, 0x513b5384 }
+    },
+    {
+        { -0x750cb315, 0x5e270287, -0x46b92952, -0x6ff4fbf7, -0x25427aee, 0x6512ebf7, -0x77da707f, 0x61d9b769 },
+        { -0x38d66762, 0x46d46280, 0x5368a5dd, 0x4b93fbd0, -0x2e89a577, 0x63df3f81, -0x465f5ddd, 0x34cebd64 },
+        { 0x49b7d94b, -0x593a58ed, 0x23eb9446, -0x5c0c2ea8, 0x77484834, 0x0416fbd2, 0x2c70812f, 0x69d45e6f }
+    },
+},
+{
+    {
+        { 0x4f460efb, -0x6019d4bd, -0x59c9f82a, -0x212cfc2c, -0x485f25dc, -0x0faddef2, 0x00545b93, 0x237e7dbe },
+        { -0x3ac3ebcf, -0x31e908b5, 0x2072edde, 0x2b9725ce, -0x4a4dc119, -0x47463c91, 0x0b5cc908, 0x7e2e0e45 },
+        { 0x6701b430, 0x013575ed, -0x60f402f0, 0x231094e6, -0x7c1b80de, 0x75320f15, -0x4eeeaa1d, 0x71afa699 }
+    },
+    {
+        { 0x473b50d6, -0x15bdc3e4, 0x3b38ef10, 0x51e87a1f, -0x4d36416b, -0x647b40a1, 0x78f89a1c, 0x00731fbc },
+        { 0x3953b61d, 0x65ce6f9b, -0x505ebe1a, -0x39a7c616, -0x5608a602, 0x0f435ffd, -0x3d4e3d72, 0x021142e9 },
+        { 0x48f81880, -0x1bcf38e8, 0x5ecec119, -0x4069f3de, 0x6bba15e3, -0x49251f7d, 0x47e15808, 0x4c4d6f33 }
+    },
+    {
+        { -0x6770e690, 0x2f0cddfc, -0x4f460ae5, 0x6b916227, 0x779176be, 0x6ec7b6c4, -0x57706058, 0x38bf9500 },
+        { -0x3e82e037, 0x18f7eccf, 0x51403c14, 0x6c75f5a6, -0x0811f321, -0x24218ed5, -0x581b85de, 0x193fddaa },
+        { 0x37e8876f, 0x1fd2c93c, 0x18d1462c, -0x5d09e1a6, 0x39241276, 0x5080f582, -0x40f2b697, 0x6a6fb99e }
+    },
+    {
+        { -0x491bdc3a, -0x114edd4b, -0x0d790072, -0x6c628ff0, 0x1dcf5d8c, -0x6f56d57d, 0x42c5eb10, 0x136fda9f },
+        { 0x560855eb, 0x6a46c1bb, -0x076c0f63, 0x2416bb38, -0x708e533f, -0x28e2eec9, -0x5ce76916, 0x75f76914 },
+        { -0x5cfa422f, -0x06b3204f, -0x6007d3f8, 0x0f364b9d, -0x3c44a776, 0x2a87d8a5, 0x0be8dcba, 0x02218351 }
+    },
+    {
+        { 0x43307a7f, -0x62a58eff, -0x3b825ba1, -0x4f9c2162, -0x416d852d, 0x22bbfe52, -0x02bfbd94, 0x1387c441 },
+        { 0x5ead2d14, 0x4af76638, -0x3583a7d0, -0x5f712780, 0x10211e3d, 0x0d13a6e6, 0x7b806c03, 0x6a071ce1 },
+        { -0x78687508, -0x4a2c3c2f, 0x7f0e4413, 0x722b5a3d, -0x44b88360, 0x0d7b4848, -0x50e1236e, 0x3171b26a }
+    },
+    {
+        { -0x4d75b82f, -0x59f24828, 0x1770a4f1, -0x5940eb2a, 0x53ddbd58, -0x2b5e076d, 0x344243e9, 0x6c514a63 },
+        { -0x68a9b358, -0x56d0ce70, 0x2275e119, -0x008447b4, -0x5b78aeb0, 0x4f55fe37, 0x3cf0835a, 0x221fd487 },
+        { 0x3a156341, 0x2322204f, -0x45f5fcd3, -0x048c1f17, 0x410f030e, -0x031f22b4, -0x046db556, 0x48daa596 }
+    },
+    {
+        { -0x37b3686d, 0x14f61d5d, -0x10be7dfa, -0x66be061d, 0x346277ac, -0x320a4771, 0x0e8a79a9, 0x58c837fa },
+        { 0x5ca59cc7, 0x6eca8e66, 0x2e38aca0, -0x57b8dab5, -0x2de1e832, 0x31afc708, -0x3527b509, 0x676dd6fc },
+        { -0x69036fa8, 0x0cf96885, 0x7b56a01b, 0x1ddcbbf3, 0x4935d66a, -0x233d1883, -0x395a80f6, 0x1c4f73f2 }
+    },
+    {
+        { -0x0383cb7c, -0x4c918f92, -0x3c3e309f, 0x73dfc9b4, 0x781cc7e5, -0x14e28637, 0x7daf675c, 0x70459adb },
+        { 0x305fa0bb, 0x0e7a4fbd, 0x54c663ad, -0x7d62b320, 0x2fe33848, -0x0bde3c7d, 0x1bf64c42, 0x795ac80d },
+        { -0x6e4bd44d, 0x1b91db49, 0x4b02dcca, 0x57269623, 0x1f8c78dc, -0x6020611b, -0x731de02d, 0x5fe16284 }
+    },
+},
+{
+    {
+        { -0x6aeeac77, 0x315c29c7, -0x79d08b32, -0x281f1af9, -0x7a6d8bce, 0x0c4a7621, 0x4a25a1e4, 0x72de6c98 },
+        { 0x4d077c41, -0x1d86f552, -0x248b965d, -0x746c7d90, -0x7542e95e, 0x6eb632dc, -0x55f9b48e, 0x720814ec },
+        { -0x40955cf0, -0x51654aad, -0x7f9291e5, 0x050a50a9, -0x5200aec7, -0x6d448bfd, 0x45be618b, 0x0394d276 }
+    },
+    {
+        { -0x4dcaba5c, -0x0ac69bdb, -0x67044d6a, 0x15a7a27e, 0x636fdd86, -0x5493ad44, 0x419334ee, 0x79d995a8 },
+        { -0x7a81120c, 0x4d572251, -0x1e616c3b, -0x1c8db123, 0x0b797035, -0x758ebdf2, -0x785418bd, 0x3b3c8336 },
+        { 0x1195dd75, -0x3275715a, 0x1dd9a82f, -0x5afb2758, -0x5ca7864a, 0x540dca81, 0x79c86a8a, 0x60dd16a3 }
+    },
+    {
+        { 0x153e47b8, 0x3501d6f8, 0x14a2f60c, -0x485698ac, 0x455d9523, 0x112ee8b6, -0x7eed1576, 0x4e62a3c1 },
+        { 0x7381e559, 0x35a2c848, -0x287f7d35, 0x596ffea6, -0x245849ad, -0x34688e15, -0x64b2597b, 0x5a08b501 },
+        { 0x516ab786, -0x372b53fc, 0x5295b23d, 0x595af321, -0x24fdcf3f, -0x29122dcc, -0x7da4be34, 0x0929efe8 }
+    },
+    {
+        { -0x52a99ae3, -0x74ce8d49, 0x3fabd717, 0x01581b7a, 0x424df6e4, 0x2dc94df6, 0x2c29284f, 0x30376e5d },
+        { -0x342f0d2d, 0x5f0601d1, 0x6132bb7f, 0x736e412f, 0x238dde87, -0x7c9fbbce, -0x0a3f8ac4, 0x1e3a5272 },
+        { -0x7ea65a64, -0x2d6e7259, 0x3f0713f3, 0x6bdc1cd9, 0x4acd6590, 0x565f7a93, 0x4cb4c128, 0x53daacec }
+    },
+    {
+        { -0x7ad30250, -0x667ad43d, 0x59d6ed0b, 0x2cc12e95, -0x64a53d85, 0x70f9e2bf, 0x7959ae99, 0x4f3b8c11 },
+        { -0x6337582a, 0x4ca73bd7, 0x47e9a9b2, 0x4d4a738f, 0x42f5fe00, -0x0b340ed7, -0x4240f8ae, 0x01a13ff9 },
+        { 0x2ff26412, 0x55b6c9c8, 0x1fb667a8, 0x1ac4a8c9, -0x1488740e, -0x2ad84031, 0x7012a3be, 0x303337da }
+    },
+    {
+        { -0x052d022f, -0x6892c335, 0x37a640a8, -0x34777c69, 0x6734cb25, 0x2ff00c1d, 0x789c2d2b, 0x269ff4dc },
+        { -0x73e36284, -0x6aabddde, 0x1a9b340f, 0x01fac137, -0x6da4b729, 0x7e8d9177, 0x61b3e31b, 0x53f8ad56 },
+        { -0x3f729873, 0x0c003fbd, 0x7ead2b17, 0x4d982fa3, -0x4d1a7d0f, -0x3f819433, -0x20bed5bc, 0x296c7291 }
+    },
+    {
+        { -0x25474a62, -0x204dcdfb, -0x37f6ddb0, 0x465aeaa0, -0x658da2e8, -0x2ecc3ee8, 0x61f117d1, 0x23273702 },
+        { 0x33daf397, 0x7903de2b, -0x3659db4d, -0x2f00f9e7, 0x555b3e18, -0x75e2dad5, 0x52e0b7c0, 0x2b6d581c },
+        { 0x623e7986, 0x3d0543d3, -0x3d875cac, 0x679414c2, 0x726196f6, -0x51bc0f34, -0x7dba1546, 0x7836c41f }
+    },
+    {
+        { -0x7fee6c84, -0x359ae17c, 0x6ef41a28, -0x394f3b92, 0x5f3f8d52, -0x48fde459, -0x15284603, 0x119dff99 },
+        { 0x49e95a81, -0x185dab25, 0x08b0ad73, 0x5192d5d0, -0x2ff503f9, 0x4d20e5b1, 0x2cf25f38, 0x5d55f801 },
+        { -0x0b4ce2b3, 0x43eadfcb, 0x11148892, -0x39afc08c, 0x060d3b17, -0x0111973b, -0x22b5f538, 0x329293b3 }
+    },
+},
+{
+    {
+        { 0x5d7cb208, 0x2879852d, 0x687df2e7, -0x47212290, 0x21687891, -0x23f40055, 0x677daa35, 0x2b44c043 },
+        { -0x1e6b69e6, 0x4e59214f, 0x0d71cd4f, 0x49be7dc7, 0x3b50f22d, -0x6cff302e, -0x036e8dce, 0x4789d446 },
+        { 0x074eb78e, 0x1a1c87ab, -0x66250b99, -0x05392e72, 0x484f9067, 0x3eacbbcd, 0x2bb9a4e4, 0x60c52eef }
+    },
+    {
+        { 0x7cae6d11, 0x702bc5c2, 0x54a48cab, 0x44c7699b, -0x45b6d14e, -0x1043bfaa, -0x26499893, 0x70d77248 },
+        { 0x3bfd8bf1, 0x0b5d89bc, -0x360caae6, -0x4f946dc9, -0x2acfd70b, 0x0e4c16b0, 0x2ccfcaab, 0x10bc9c31 },
+        { 0x3ec2a05b, -0x557517b5, -0x12e87e20, -0x6796610c, 0x708e85d1, 0x794513e4, -0x56890bed, 0x63755bd3 }
+    },
+    {
+        { -0x680e5349, 0x3dc71018, -0x3e9a4428, 0x5dda7d5e, 0x0fa1020f, 0x508e5b9c, 0x37c52a56, 0x27637517 },
+        { 0x2ad10853, -0x4aa05fc2, -0x6119ca97, 0x356f7590, -0x41964770, -0x60060e03, -0x743e907c, 0x0d8cc1c4 },
+        { 0x6eb419a9, 0x029402d3, 0x77b460a5, -0x0f4bb182, -0x2bc3b6aa, -0x30579dd0, 0x7ad166e7, 0x70c2dd8a }
+    },
+    {
+        { -0x471281ed, -0x6e2b6983, -0x28897e86, 0x74252f0a, 0x0d852564, -0x1bf67d20, 0x16a53ce5, 0x32b86138 },
+        { -0x609013f2, 0x65619450, 0x46c6518d, -0x11d18157, 0x67e09b5c, -0x68cc3e0d, 0x63948495, 0x2e0fac63 },
+        { -0x1bb7329c, 0x79e7f7be, 0x087886d0, 0x6ac83a67, -0x5f1b24d2, -0x07602b27, 0x735a4f41, 0x4179215c }
+    },
+    {
+        { 0x286bcd34, -0x1b51cc47, 0x559dd6dc, -0x4810814a, -0x4c2c71e1, 0x278b141f, 0x2241c286, 0x31fa8566 },
+        { -0x282312d6, -0x738f6b19, 0x47d39c70, -0x6804753d, -0x56f926fe, -0x1ec41fcd, 0x0cd99d76, 0x700344a3 },
+        { 0x2e3622f4, -0x507d93be, -0x67ccafd3, -0x3edfd679, 0x2b389123, -0x643e481f, -0x566adb77, 0x24bb2312 }
+    },
+    {
+        { -0x0a07a395, 0x41f80c2a, 0x04fa6794, 0x687284c3, -0x5c45e453, -0x76ba2067, -0x0014a2ea, 0x0d1d2af9 },
+        { 0x32de67c3, -0x4e5712e9, 0x461b4948, 0x3cb49418, 0x76cfbcd2, -0x7142bcbd, 0x1e188008, 0x0fee3e87 },
+        { 0x32621edf, -0x5625755f, 0x59226579, 0x30b822a1, -0x58653e6d, 0x4004197b, 0x18531d76, 0x16acd797 }
+    },
+    {
+        { 0x7887b6ad, -0x36a6393b, 0x5f90feba, -0x6b1e6153, -0x5cbd0afc, 0x16e24e62, 0x18161700, 0x164ed34b },
+        { 0x2d9b1d3d, 0x72df72af, -0x5bcddba6, 0x63462a36, 0x16b39637, 0x3ecea079, -0x46cfdcf7, 0x123e0ef6 },
+        { 0x192fe69a, 0x487ed94c, 0x3a911513, 0x61ae2cea, -0x465b21d9, -0x7884092d, 0x1073f3eb, 0x78da0fc6 }
+    },
+    {
+        { 0x680c3a94, -0x5d607f0f, 0x1ae9e7e6, 0x71f77e15, 0x48017973, 0x1100f158, 0x16b38ddd, 0x054aa4b3 },
+        { -0x1ad43996, 0x5bf15d28, 0x70f01a8e, 0x2c47e318, 0x06c28bdd, 0x2419afbc, 0x256b173a, 0x2d25deeb },
+        { 0x19267cb8, -0x2037b973, 0x66e54daf, 0x0b28789c, 0x666eec17, 0x2aeb1d2a, -0x548258a0, 0x134610a6 }
+    },
+},
+{
+    {
+        { -0x23fd73c4, -0x26ebcf20, 0x5217c771, 0x0eb955a8, 0x2c99a1fa, 0x4b09e1ed, -0x42958bc4, 0x42881af2 },
+        { 0x7c59b23f, -0x350aa13e, 0x154d04f2, -0x665112c2, -0x1ebebe0c, 0x68441d72, 0x3932a0a2, 0x14034513 },
+        { -0x54a352c3, 0x7bfec69a, 0x4cb2cfad, -0x3dc1732d, -0x04c8295e, 0x685dd14b, 0x15677a18, 0x0ad6d644 }
+    },
+    {
+        { 0x47927e9f, 0x79148928, 0x370aa877, 0x33dad6ef, 0x11122703, 0x1f8f24fa, 0x2adf9592, 0x5265ac2f },
+        { 0x417becb5, 0x781a439e, -0x2ef1fd9a, 0x4ac5938c, 0x0692ac24, 0x5da38511, -0x521cedcd, 0x11b065a2 },
+        { -0x65034cba, 0x405fdd30, 0x28e63f54, -0x268dc2bc, 0x5f65aaae, -0x6b3fe210, -0x1eb3f7f7, 0x43e4dc3a }
+    },
+    {
+        { -0x523d395d, -0x1590853d, -0x168e836c, -0x2f16d70a, -0x29ba150b, -0x1d2c8616, -0x3ae00442, 0x46dd8785 },
+        { -0x56c75ae9, -0x43ed380f, 0x3180b2e1, 0x473028ab, -0x0432dab6, 0x3f78571e, 0x6ff6f90f, 0x74e53442 },
+        { 0x375c8898, 0x709801be, -0x1c027cb8, 0x4b06dab5, 0x27230714, 0x75880ced, -0x22d0b3be, 0x2b09468f }
+    },
+    {
+        { -0x7d005fd6, 0x5b979465, -0x01570ab7, -0x25f695af, 0x5f77af9b, -0x5f9caec9, 0x201d1e76, 0x1bcfde61 },
+        { -0x48fe346a, -0x6838b612, -0x495c963d, -0x7c0bc72c, -0x65bfd327, 0x62962b8b, -0x67772085, 0x6976c750 },
+        { 0x246a59a2, 0x4a4a5490, -0x17802270, -0x29c14222, 0x0d2371fa, -0x26bc8399, -0x2cf0712a, 0x69e87308 }
+    },
+    {
+        { -0x7437fcfd, 0x0f80bf02, 0x7a18cefb, 0x6aae16b3, -0x28d3295d, -0x22b815b9, -0x0b12c656, 0x61943588 },
+        { 0x5656beb0, 0x435a8bb1, 0x4f4d5bca, -0x07053646, 0x1548c075, -0x464d873c, -0x176d49de, 0x3eb0ef76 },
+        { -0x6efc607b, -0x2d91a3c2, -0x090cc557, -0x3f161883, 0x70066a93, -0x176973ab, 0x1faaaddd, 0x3c34d188 }
+    },
+    {
+        { 0x2fffe0d9, -0x42a4f471, 0x3ed24fb9, 0x6aa25410, -0x4d97de3c, 0x2ac7d7bc, 0x60dca36a, 0x605b394b },
+        { -0x5f606140, 0x3f9d2b5e, -0x49dc5770, 0x1dab3b6f, 0x72d926c4, -0x5f645c16, 0x3fd8b36d, 0x37419351 },
+        { 0x5a9d1ed2, -0x4b17a91c, 0x6c97a9a2, -0x1017b78a, 0x1e5eee7d, -0x4efb309c, -0x7758e371, 0x2f50b81c }
+    },
+    {
+        { -0x5825add6, 0x2b552ca0, 0x449b0250, 0x3230b336, -0x5b466047, -0x0d3b3a44, 0x58074a22, 0x7b2c6749 },
+        { -0x0397ee45, 0x31723c61, 0x6211800f, -0x634bafb8, 0x47995753, 0x768933d3, 0x02752fcd, 0x3491a535 },
+        { 0x3ed28cdf, -0x2aae9a78, -0x2c9d21c7, 0x12d84fd2, -0x1cc871b1, 0x0a874ad3, 0x7c763e74, 0x000d2b1f }
+    },
+    {
+        { 0x3e94a8ab, -0x69db8874, -0x16587414, 0x0ad6f3ce, 0x0d743c4f, -0x6b75387f, -0x55130334, 0x76627935 },
+        { -0x2f92b599, 0x3d420811, -0x6f1f001d, -0x4103fb7b, -0x42b78422, -0x078f3949, 0x319afa28, 0x6e2a7316 },
+        { -0x292a6561, 0x56a8ac24, 0x3096f006, -0x37248ac2, -0x70b3ad67, 0x477f41e6, -0x09379eec, 0x588d851c }
+    },
+},
+{
+    {
+        { 0x77d1f515, -0x32d59a19, -0x70559f0f, 0x54899187, -0x2543f91b, -0x4e48c444, -0x56833605, 0x654878cb },
+        { -0x72094f02, 0x51138ec7, -0x1a8a0ae5, 0x5397da89, 0x717af1b9, 0x09207a1d, 0x2b20d650, 0x2102fdba },
+        { 0x055ce6a1, -0x69611bfb, 0x1251ad29, 0x36bca768, -0x55825beb, 0x3a1af517, 0x29ecb2ba, 0x0ad725db }
+    },
+    {
+        { -0x64fa907b, -0x013843f4, -0x180a0029, 0x537d5268, 0x4312aefa, 0x77afc662, 0x02399fd9, 0x4f675f53 },
+        { -0x7cb1dba9, -0x23bd984f, 0x70ce1bc5, -0x498abb4b, -0x082ea129, 0x1af07a0b, 0x71a03650, 0x4aefcffb },
+        { 0x0415171e, -0x3cd2c9ca, -0x7667b7c5, -0x32d410ef, -0x2f6baef0, -0x78f59153, -0x5d579a9f, 0x0bccbb72 }
+    },
+    {
+        { 0x50fe1296, 0x186d5e4c, -0x01176082, -0x1fc6847e, 0x507031b0, 0x3bc7f6c5, 0x108f37c2, 0x6678fd69 },
+        { -0x154e5638, 0x185e962f, 0x65147dcd, -0x791819cb, -0x44a4920e, -0x4f6d1fcf, 0x59d6b73e, 0x4024f0ab },
+        { 0x636863c2, 0x1586fa31, 0x572d33f2, 0x07f68c48, 0x789eaefc, 0x4f73cc9f, -0x7152b8ff, 0x2d42e210 }
+    },
+    {
+        { 0x0f537593, 0x21717b0d, 0x131e064c, -0x6eb196f5, 0x752ae09f, 0x1bb687ae, -0x64bdc392, 0x420bf3a7 },
+        { -0x6b202d65, -0x680aeceb, 0x313f4c6a, 0x6155985d, 0x08455010, -0x145ec0f9, -0x472d2cde, 0x676b2608 },
+        { 0x1c5b2b47, -0x7ec7459b, 0x311b1b80, -0x798e4914, -0x43ceca50, 0x7bff0cb1, -0x63f30e20, 0x745d2ffa }
+    },
+    {
+        { 0x21d34e6a, 0x6036df57, -0x66844c30, -0x4e2477d9, -0x378a9506, -0x2c3df63d, 0x4c1dc839, 0x06e15be5 },
+        { 0x2bc9c8bd, -0x40ada5e2, 0x26479d81, -0x15a4d9f8, -0x20feaa25, -0x2aee38f2, -0x69f30a30, 0x1ae23ceb },
+        { 0x1932994a, 0x5b725d87, -0x314e2550, 0x32351cb5, -0x254835fb, 0x7dc41549, 0x278ec1f7, 0x58ded861 }
+    },
+    {
+        { -0x493d3658, 0x2dfb5ba8, -0x0ad3a674, 0x48eeef8e, -0x0ed2ea8d, 0x33809107, 0x531d5bd8, 0x08ba696b },
+        { -0x0d993aa4, -0x27e8c86d, -0x33bab1b7, -0x3736893b, -0x43d93c58, 0x5ce382f8, 0x5485f6f9, 0x2ff39de8 },
+        { -0x3c103a86, 0x77ed3eee, -0x2b00b7ef, 0x04e05517, -0x0e598e35, -0x15c285c1, -0x6b8301ac, 0x120633b4 }
+    },
+    {
+        { 0x4912100a, -0x7d42ceb9, 0x7e6fbe06, -0x21dc8493, 0x11ea79c6, -0x1ee189e7, -0x34c6c422, 0x07433be3 },
+        { -0x6e9effbe, 0x0b949878, -0x13140518, 0x4ee7b13c, -0x6b0f5b40, 0x70be7395, -0x4b2a6e7b, 0x35d30a99 },
+        { 0x5ce997f4, -0x0086bb40, -0x4fa3ae5d, 0x575d3de4, 0x5a76847c, 0x583381fd, 0x7af6da9f, 0x2d873ede }
+    },
+    {
+        { 0x4e5df981, -0x559dfd1f, 0x5015e1f5, -0x5df2a6e9, -0x451de294, 0x18a275d3, 0x01600253, 0x0543618a },
+        { 0x43373409, 0x157a3164, -0x0b557e27, -0x05474812, -0x0a59b7fa, -0x4f6c011a, 0x707fa7b6, 0x2e773654 },
+        { -0x68b3dc3f, 0x0deabdf4, -0x6231b96d, -0x5590f5db, -0x5d6545d4, 0x04202cb8, 0x2d07960d, 0x4b144336 }
+    },
+},
+{
+    {
+        { 0x57c5715e, 0x299b1c3f, 0x6b686d90, -0x69346d62, 0x47235ab3, 0x30048064, -0x5bb2601f, 0x2c435c24 },
+        { 0x53242cec, 0x47b837f7, -0x3fbded0e, 0x256dc48c, -0x1e26d73b, -0x1ddd0405, -0x5275d3f9, 0x48ea295b },
+        { -0x7f077cc1, 0x0607c97c, -0x35da13a5, 0x0e851578, 0x161ebb6f, 0x54f7450b, -0x5f2107f2, 0x7bcb4792 }
+    },
+    {
+        { 0x045224c2, 0x1cecd0a0, 0x69e53952, 0x757f1b1b, 0x5289f681, 0x775b7a92, 0x16736148, 0x1b6cc620 },
+        { 0x2bc73659, -0x7b781c30, 0x059979df, 0x4baf8445, -0x23529041, -0x2e8368a6, -0x2103694a, 0x57369f0b },
+        { 0x75638698, -0x0e5666ff, -0x11559f2d, 0x353dd1be, 0x4c9ba488, -0x7b6b8ecd, 0x43ade311, 0x63fa6e68 }
+    },
+    {
+        { -0x2db4a149, 0x2195becd, -0x3f32bb07, 0x5e41f18c, 0x41ca9ede, -0x20d7f8bc, -0x0ca48299, 0x07073b98 },
+        { 0x6597c168, -0x2ea3dfad, -0x672d7877, -0x608c8c00, 0x3257ba1f, 0x18aee7f1, 0x07346f14, 0x3418bfda },
+        { 0x4ce530d4, -0x2fc39894, 0x3b5df9f4, 0x0b64c047, 0x19b3a31e, 0x065cef8b, 0x533102c9, 0x3084d661 }
+    },
+    {
+        { 0x760321fd, -0x6593178a, -0x6149c528, 0x7fe2b510, -0x7537fa6e, 0x00e7d4ae, -0x44908dc6, 0x73d86b7a },
+        { -0x407b9653, -0x1e094862, -0x1d99cecb, 0x15801004, -0x508be7e5, -0x65b67cd0, 0x049b673c, 0x3ba2504f },
+        { 0x6dba5ab6, 0x0b52b560, -0x444e1255, -0x56ecb0f1, -0x64fb59cb, 0x30a9520d, 0x7973e5db, 0x6813b8f3 }
+    },
+    {
+        { -0x0cea81d7, -0x0e6b35aa, 0x5ef528a5, 0x136d3570, -0x74fa6644, -0x22b31089, 0x24f833ed, 0x7d5472af },
+        { 0x334127c1, -0x67ab4fac, -0x7d0400db, 0x105d0478, 0x44186f4f, -0x24b60807, -0x412f4700, 0x1768e838 },
+        { -0x50cc25b9, -0x2f1078b3, -0x491cc607, 0x00d3be5d, -0x63631132, 0x3f2a8a2f, 0x2352435a, 0x5d1aeb79 }
+    },
+    {
+        { -0x49e4588b, 0x12c7bfae, -0x1d9c4003, -0x47b19de1, 0x5c840dcf, 0x0b47a5c3, -0x335079cc, 0x7e83be0b },
+        { 0x19cd63ca, -0x0a61944d, 0x21d06839, 0x670c1592, 0x2150cab6, -0x4f92a9a5, 0x104f12a3, 0x20fb199d },
+        { 0x6d99c120, 0x61943dee, 0x460b9fe0, -0x79efe0d2, -0x7117a673, 0x6bb2f151, -0x033b8a34, 0x76b76289 }
+    },
+    {
+        { 0x522ec0b3, 0x4245f1a1, 0x2a75656d, 0x558785b2, 0x48a1b3c0, 0x1d485a25, -0x2a701f61, 0x60959ecc },
+        { 0x756286fa, 0x791b4cc1, -0x28b5ea84, -0x24312ce9, -0x158d421a, 0x7e732421, 0x1131c8e9, 0x01fe1849 },
+        { -0x571285f7, 0x3ebfeb7b, -0x1afd8764, 0x49fdc2bb, 0x3c119428, 0x44ebce5d, -0x416b80b6, 0x35e1eb55 }
+    },
+    {
+        { 0x726ccc74, 0x14fd6dfa, 0x2f53b965, 0x3b084cfe, 0x52a2c8b4, -0x0cc51b0b, 0x0d40166a, 0x59aab07a },
+        { -0x3a8c722d, -0x242518ff, -0x4d90e412, -0x063909cb, 0x42f15ef4, 0x61e96a80, -0x509f5b28, 0x3aa1d11f },
+        { -0x6da153db, 0x77bcec4c, 0x60137738, 0x18487184, -0x01560baf, 0x5b374337, -0x371955ba, 0x1865e78e }
+    },
+},
+{
+    {
+        { 0x1c529ccb, -0x6983ab17, 0x64c635fb, 0x30f62692, 0x78121965, 0x2747aff4, -0x150990a4, 0x17038418 },
+        { -0x4991e086, -0x333b4839, -0x0af3d082, 0x44157e25, 0x713eaf1c, 0x3ef06dfc, 0x52da63f7, 0x582f4467 },
+        { 0x20324ce4, -0x39ce842d, -0x5bb7743c, -0x57efbd18, 0x4e5a1364, -0x4de10e75, -0x325d7237, 0x0c2a1c4b }
+    },
+    {
+        { 0x69bd6945, -0x123b7eb8, -0x41e372de, 0x0d6d907d, -0x2aa33a55, -0x39c42dee, -0x5ceb237d, 0x5a6a9b30 },
+        { 0x6f1f0447, -0x2db23830, -0x24783fa7, -0x4dd961c2, -0x044d2d71, -0x2ea4fd8e, -0x3909b789, 0x7c558bd1 },
+        { -0x2c69b9c3, -0x2f13eadc, -0x3ca5db10, 0x12bb628a, 0x1cbc5fa4, -0x5af3c587, 0x0afbafc3, 0x0404a5ca }
+    },
+    {
+        { 0x2a416fd1, 0x62bc9e1b, -0x1cafa675, -0x4a3908d8, 0x3d5d6967, 0x04343fd8, -0x18071168, 0x39527516 },
+        { 0x0aa743d6, -0x73e0bff9, 0x5b265ee8, -0x33452f35, 0x668fd2de, 0x574b046b, -0x352269cd, 0x46395bfd },
+        { 0x1a5d9a9c, 0x117fdb2d, -0x2effa3d6, -0x6388ba44, 0x54d56fea, -0x102b410f, -0x17dd2fea, 0x76579a29 }
+    },
+    {
+        { 0x52b434f2, 0x333cb513, -0x6c217f1f, -0x27cdd7b7, 0x750d35ce, -0x4aaed779, 0x2a2777c1, 0x02c514bb },
+        { 0x49c02a17, 0x45b68e7e, -0x43565c81, 0x23cd51a2, -0x13ddb3e5, 0x3ed65f11, -0x61fa424f, 0x43a384dc },
+        { -0x740e49bb, 0x684bd5da, -0x094ab4ad, -0x04742c82, -0x564f2dad, 0x313916d7, 0x61548059, 0x11609209 }
+    },
+    {
+        { 0x369b4dcd, 0x7a385616, 0x655c3563, 0x75c02ca7, -0x2b0e7fdf, 0x7dc21bf9, -0x6e191fbe, 0x2f637d74 },
+        { 0x29dacfaa, -0x4bb2e997, -0x7beca671, -0x25ad60b4, 0x453d5559, -0x16109c36, -0x3a9671f5, 0x351e125b },
+        { 0x1af67bbe, -0x2b4b64ba, -0x3754769f, -0x29fcfc86, -0x06596605, 0x71dee19f, -0x1831d566, 0x7f182d06 }
+    },
+    {
+        { -0x71de8ade, 0x09454b72, -0x2b7b4728, -0x55a7170c, 0x7f46903c, -0x2ca7dab3, 0x241c5217, 0x44acc043 },
+        { -0x54fe9714, 0x7a7c8e64, 0x15edc543, -0x34a5b5ab, 0x47cd0eda, 0x095519d3, 0x343e93b0, 0x67d4ac8c },
+        { 0x4f7a5777, 0x1c7d6bbb, -0x6e7cec1f, -0x74ca012c, -0x3694b97c, 0x4adca1c6, 0x12ad71bd, 0x556d1c83 }
+    },
+    {
+        { -0x4ee417df, -0x7e0f98aa, 0x10a3f3dd, 0x0faff823, 0x6a99465d, -0x074d2fab, -0x337380fb, 0x097abe38 },
+        { 0x0c8d3982, 0x17ef40e3, 0x15a3fa34, 0x31f7073e, 0x0773646e, 0x4f21f3cb, 0x1d824eff, 0x746c6c6d },
+        { 0x7ea52da4, 0x0c49c987, -0x6423e2bd, 0x4c436955, -0x0833142e, 0x022c3809, 0x4bee84bd, 0x577e14a3 }
+    },
+    {
+        { -0x42b228d5, -0x6b013142, 0x060f2211, -0x0b95b026, -0x3f372e01, 0x124a5977, -0x04ff6d6b, 0x705304b8 },
+        { 0x61a73b0a, -0x0f1d9754, 0x3791a5f5, -0x0d0505f0, 0x6b6d00e9, -0x3e1ec17e, 0x6fd78f42, 0x60fa7ee9 },
+        { 0x4d296ec6, -0x49c2e2cb, 0x5fad31d8, -0x0c3cfac2, -0x4b42bd14, 0x670b958c, -0x5e9cac03, 0x21398e0c }
+    },
+},
+{
+    {
+        { -0x79e48166, -0x793a03ea, 0x6a27c451, -0x095ccfb9, -0x5e16ca69, 0x01667267, 0x6082dfeb, 0x05ffb9cd },
+        { -0x72582d11, 0x216ab2ca, -0x660bd7d9, 0x366ad9dd, 0x4fdd3c75, -0x519b4700, 0x53909e62, 0x403a395b },
+        { -0x0ac09ec7, -0x59e80561, 0x13e66cb6, 0x60f2b5e5, -0x4cbb755c, -0x28574111, 0x6f5ea192, 0x7a293285 }
+    },
+    {
+        { 0x79639302, -0x4763bbb8, 0x50c67f2c, 0x4ae4f193, -0x37e5063a, -0x0f4ca258, 0x46871017, 0x39d00035 },
+        { -0x4fd21778, 0x0b39d761, -0x2dbeb1e1, 0x5f550e7e, 0x22e1a940, -0x59405ba8, -0x02bb8467, 0x050a2f7d },
+        { -0x59af2489, 0x437c3b33, -0x453ad44e, 0x6bafe81d, 0x2db7d318, -0x0166bfd3, 0x372ba6ce, 0x2b5b7eec }
+    },
+    {
+        { 0x613ac8f4, -0x596bbfb3, -0x056818d4, 0x500c3c2b, 0x1fcec210, -0x78befb2e, -0x79fb5712, 0x1b205fb3 },
+        { -0x7c0af111, -0x4c43b443, -0x736d879a, 0x508f0c99, -0x37481992, 0x43e76587, -0x5b806727, 0x0f7655a3 },
+        { -0x2db4ecc4, 0x55ecad37, 0x6038c90b, 0x441e147d, -0x29d39012, 0x656683a1, -0x781f1352, 0x0157d5dc }
+    },
+    {
+        { -0x28e14adc, -0x6ad9aaec, 0x5df14593, -0x19fc277f, 0x0d4de6b7, 0x147cdf41, 0x0437c850, 0x5293b173 },
+        { 0x0354c13d, -0x0d5850af, -0x55c8d4a0, -0x285f4ebb, 0x05a3d470, 0x2869b96a, -0x7db9fe8d, 0x6528e42d },
+        { 0x4bccf226, 0x23d0e081, -0x7e69046d, -0x6d38ba33, 0x59541e5b, -0x749e8694, -0x3fde0688, 0x40a44df0 }
+    },
+    {
+        { 0x4bc5d095, -0x793691af, -0x03597fb6, -0x0df2bf68, -0x37d915a3, 0x27363d89, 0x5719cacf, 0x39ca3656 },
+        { 0x4f20ea6a, -0x25579677, 0x4c620618, -0x15eb5c2f, 0x090bf8be, 0x6001fccb, -0x6b816310, 0x35f4e822 },
+        { 0x6f87b75c, -0x68af90d1, 0x034ae070, -0x39db5160, -0x552cb22a, 0x1ec856e3, -0x1bbf1a71, 0x055b0be0 }
+    },
+    {
+        { 0x6ea33da2, 0x4d12a04b, -0x1c9ed923, 0x57cf4c15, -0x11bb2699, -0x6f13698b, 0x2a985aac, 0x64ca348d },
+        { -0x768ca2ee, 0x6469a17d, -0x199d460f, -0x2490d82b, 0x6a395681, -0x60345cd8, -0x2d9650db, 0x363b8004 },
+        { -0x1b3b6ed3, -0x66a771e7, 0x1ca5ce6b, -0x1033c4b2, -0x05a4672b, 0x4522ea60, 0x1de4a819, 0x7064bbab }
+    },
+    {
+        { 0x42542129, -0x5d6f3f9f, -0x4172a470, -0x0d1d3d52, 0x76abfe1b, -0x30dba725, -0x7c29d941, 0x02157ade },
+        { 0x5a770641, -0x46e61eaf, 0x4e7f8039, -0x565d1d39, 0x3df23109, 0x7527250b, -0x53d84875, 0x756a7330 },
+        { 0x1b9a038b, 0x3e46972a, 0x7ee03fb4, 0x2e4ee66a, 0x6edbb4ca, -0x7e5db789, -0x7132fa9d, 0x1a944ee8 }
+    },
+    {
+        { 0x182362d6, -0x44bf57a7, -0x75b2e545, -0x4660aa89, 0x758559f6, -0x72e74bd9, 0x4d26235a, 0x26c20fe7 },
+        { 0x51039372, -0x2a56e2ef, -0x6635d922, 0x2ed377b7, -0x02c99495, -0x5e8dfd54, -0x296fe66b, 0x0730291b },
+        { -0x1633dd0b, 0x648d1d9f, 0x28dd577c, 0x66bc5619, 0x652439d1, 0x47d3ed21, -0x125074b7, 0x49d271ac }
+    },
+},
+{
+    {
+        { -0x4b48a9ff, 0x2798aaf9, 0x5c8dad72, 0x5eac7213, 0x61b7a023, -0x2d31559f, -0x167082b2, 0x1bbfb284 },
+        { 0x382b33f3, -0x760afa76, -0x52b73f4c, 0x5ae2ba0b, -0x5ac24c92, -0x706c4afd, -0x6a5dcd1a, 0x5aa3ed9d },
+        { -0x38269a9f, 0x656777e9, 0x72c78036, -0x34d4edac, -0x26af9112, 0x65053299, 0x5e8957cc, 0x4a07e14e }
+    },
+    {
+        { -0x3b885b65, 0x240b58cd, 0x6447f017, -0x02c72522, -0x58379553, 0x19928d32, -0x7b505f7f, 0x50af7aed },
+        { -0x67f20667, 0x4ee412cb, 0x3c6ec771, -0x5cea2891, -0x6da38803, -0x445a1222, 0x1d313402, 0x3f0bac39 },
+        { 0x15f65be5, 0x6e4fde01, 0x216109b2, 0x29982621, 0x0badd6d9, 0x78020581, -0x45142ffa, 0x1921a316 }
+    },
+    {
+        { -0x260c3e75, -0x28a55266, 0x60b1c19c, 0x566a0eef, 0x255c0ed9, 0x3e9a0bac, -0x5f9d380b, 0x7b049dec },
+        { -0x20478f04, -0x76bdd082, 0x4f76b3bd, 0x2c296beb, 0x36c24df7, 0x0738f1d4, -0x1d8c5150, 0x6458df41 },
+        { 0x35444483, -0x23341c86, 0x0fedbe93, 0x75887933, 0x12c5dd87, 0x786004c3, -0x3d6af19c, 0x6093dccb }
+    },
+    {
+        { 0x6084034b, 0x6bdeeebe, 0x780fb854, 0x3199c2b6, -0x49d2f96b, -0x68cc8955, -0x749b8270, 0x6e3180c9 },
+        { -0x7a1f8f93, 0x1ff39a85, -0x4c18c6cd, 0x36d0a5d8, 0x718f453b, 0x43b9f2e1, 0x4827a97c, 0x57d1ea08 },
+        { -0x5ed74f8f, -0x11854919, -0x6c577456, -0x5b3ea693, -0x4dde9ed0, -0x084b217e, -0x226842e8, 0x363e999d }
+    },
+    {
+        { -0x1db4513a, 0x2f1848dc, -0x454350a0, 0x769b7255, 0x3cefe931, -0x6f34c392, -0x39064cab, 0x231f979b },
+        { 0x35ee1fc4, -0x6957bc3f, 0x08e4c8cf, -0x68914cab, -0x4a732cd0, -0x4bd097ff, 0x693a052b, 0x48ee9b78 },
+        { -0x33d50c3a, 0x5c31de4b, -0x01df72e1, -0x4fb44fd0, -0x3eb04b9a, -0x48728ff7, 0x08792413, 0x079bfa9b }
+    },
+    {
+        { -0x5d2abdbb, -0x0c361280, 0x77f63952, 0x0aa08b78, -0x2ef7ab8b, -0x2892539d, -0x6b8f9c95, 0x1ef4fb15 },
+        { -0x25cff20c, -0x1c6fc5af, 0x3da95ab0, -0x7bc69bdd, 0x0b356480, -0x12c30ed3, -0x7b7e8e6c, 0x038c77f6 },
+        { 0x5b167bec, -0x7ab1a11a, -0x692f323e, 0x59590a42, -0x67efde67, 0x72b2df34, 0x4a0bff56, 0x575ee92a }
+    },
+    {
+        { 0x0aa4d801, 0x5d46bc45, -0x5acc4628, -0x3c50edd9, 0x2b8906c2, 0x389e3b26, 0x382f581b, 0x200a1e7e },
+        { -0x75e7d031, -0x2b3f7f70, -0x66b76243, 0x30e170c2, 0x52f733de, 0x05babd57, 0x2cd3fd00, 0x43d4e711 },
+        { -0x1506c53b, 0x518db967, 0x056652c0, 0x71bc989b, 0x567197f5, -0x01d47a27, 0x651e4e38, 0x050eca52 }
+    },
+    {
+        { 0x60e668ea, -0x6853c68a, 0x153ab497, -0x64e64402, 0x34eca79f, 0x4cb179b5, -0x5ece51a9, 0x6151c09f },
+        { 0x453f0c9c, -0x3cbce522, -0x008fc465, -0x160afba2, -0x127b84c3, -0x03268537, 0x1c58f4c6, 0x4b0ee6c2 },
+        { -0x020fa26a, 0x3af55c0d, 0x2ab4ee7a, -0x22d9d120, 0x12171709, 0x11b2bb87, -0x7ff0fcf5, 0x1fef24fa }
+    },
+},
+{
+    {
+        { -0x6fe99de0, -0x006e5996, 0x5bf1e009, -0x0ddaad52, 0x7f90df7c, 0x7dff85d8, 0x0c736fb9, 0x4f620ffe },
+        { 0x6b6c6609, -0x4b69edc6, -0x7f54a6c8, -0x58af017b, -0x483d85a1, -0x0b8e40c7, 0x77ac193c, 0x507903ce },
+        { -0x2021c1cc, 0x62f90d65, -0x4605a053, -0x30d73a6e, -0x39e9baf0, -0x66379107, 0x4a256c84, 0x25d44804 }
+    },
+    {
+        { -0x36fdd4ab, 0x2c7c4415, -0x7ed14e02, 0x56a0d241, -0x2849a1f3, -0x0fd15e37, -0x2acdc4da, 0x4180512f },
+        { -0x38164e91, -0x4297dcf2, -0x3e3a86a3, 0x0eb1b9c1, -0x6a494e01, 0x7943c8c4, 0x0bbacf5e, 0x2f9faf62 },
+        { -0x75b75a25, -0x5b00c197, -0x426abfc5, -0x4595c7fa, 0x47d5b65d, -0x60831e51, 0x5939d2fb, 0x15e087e5 }
+    },
+    {
+        { -0x0469c0c8, -0x776be792, -0x239c642b, 0x48a00e80, -0x1693e367, -0x5b17f6d5, -0x35a8c99f, 0x5a097d54 },
+        { 0x745c1496, 0x12207543, -0x25c79ef4, -0x2500c303, 0x2c71c34f, -0x1b1868d9, 0x34bdede9, 0x39c07b19 },
+        { 0x17c9e755, 0x2d45892b, -0x76cf7208, -0x2fcc028e, 0x525b8bd9, 0x6c2fe9d9, -0x3ee33f87, 0x2edbecf1 }
+    },
+    {
+        { -0x2f785da1, -0x11f0f023, 0x5c3e34ee, -0x638aceab, -0x7054c54b, 0x660c572e, 0x544cd3b2, 0x0854fc44 },
+        { -0x38ea5f2e, 0x1616a4e3, -0x07cbe2b3, 0x53623cb0, -0x38176635, -0x6910acd7, -0x5997455a, 0x3d4e8dbb },
+        { 0x55edad19, 0x61eba0c5, -0x0f57c21a, 0x24b533fe, -0x7c455a08, 0x3b770428, -0x675b8173, 0x678f82b8 }
+    },
+    {
+        { 0x57775696, 0x1e09d940, 0x3cd951db, -0x112ed9a4, 0x20bce16f, -0x056253d5, -0x172f760c, 0x0f7f76e0 },
+        { -0x296ff3ac, -0x4eb6e2f5, -0x62ecd9ca, 0x3539722c, 0x0b362bc9, 0x4db92892, -0x59749621, 0x4d7cd1fe },
+        { -0x2b7a4ff4, 0x36d9ebc5, -0x1b524c9b, -0x5da69b6e, -0x3dee6333, -0x3e9a6b80, 0x186e0d5f, 0x45306349 }
+    },
+    {
+        { 0x2b072491, -0x695beb14, 0x27a7b65b, 0x1bb22181, 0x6e8a4af0, 0x6d284959, -0x32d889a1, 0x65f3b08c },
+        { -0x593200e3, -0x6b222f3f, -0x17bdec52, 0x55f6f115, -0x66d03096, 0x6c935f85, 0x4a37f16f, 0x067ee0f5 },
+        { 0x199801f7, -0x134d6001, -0x5d5f08d1, -0x62c9e2e1, 0x75fd2f49, 0x25f11d23, 0x0fe10fe2, 0x124cefe8 }
+    },
+    {
+        { 0x31b16489, 0x1518e85b, -0x248ef405, -0x70552349, -0x5eb51dc7, 0x39b0bdf4, 0x503d20c1, 0x05f4cbea },
+        { -0x2e720dab, 0x4c126cf9, 0x147a63b6, -0x3e2b8e17, -0x0c36c4a1, 0x2c6d3c73, -0x1c00795e, 0x6be3a6a2 },
+        { -0x3fbeba44, -0x31fbf162, 0x08f6834c, -0x38e00b1e, -0x5477b85d, -0x42ab9173, -0x5b2d545b, 0x64666aa0 }
+    },
+    {
+        { 0x3337e94c, -0x4f3ac409, 0x11e14f15, 0x7cb5697e, 0x1930c750, 0x4b84abac, -0x1f9bfb98, 0x28dd4abf },
+        { 0x7c06d912, 0x6841435a, -0x44c07cf5, -0x35edc3df, -0x4e341d88, -0x2b4c84d9, -0x3890afba, 0x1d753b84 },
+        { 0x44cb9f44, 0x7dc0b64c, -0x1c6da241, 0x18a3e1ac, 0x2d0457c4, 0x7a303486, -0x75f376d2, 0x4c498bf7 }
+    },
+},
+{
+    {
+        { 0x30976b86, 0x22d2aff5, -0x3d2db9fc, -0x726f47fa, 0x4de5bae5, -0x235e7694, -0x37cbf3e9, 0x28005fe6 },
+        { 0x1aa73196, 0x37d653fb, 0x3fd76418, 0x0f949530, -0x04c5e84e, -0x52dff4f7, 0x2fc8613e, 0x544d4929 },
+        { 0x34528688, 0x6aefba9f, 0x25107da1, 0x5c1bff94, 0x66d94b36, -0x08a44433, 0x0f316dfa, 0x72e47293 }
+    },
+    {
+        { -0x2cd589d9, 0x07f3f635, 0x5f6566f0, 0x7aaa4d86, 0x28d04450, 0x3c85e797, 0x0fe06438, 0x1fee7f00 },
+        { -0x687ef7b1, 0x2695208c, 0x23450ee1, -0x4eafd5f5, 0x03efde02, -0x0262515a, 0x2733a34c, 0x5a9d2e8c },
+        { 0x03dbf7e5, 0x765305da, 0x1434cdbd, -0x5b250db7, -0x2db57714, 0x7b4ad5cd, -0x11fbfabd, 0x00f94051 }
+    },
+    {
+        { 0x07af9753, -0x28106c45, 0x3db766a7, 0x583ed0cf, 0x6e0b1ec5, -0x31966741, 0x5dd40452, 0x47b7ffd2 },
+        { -0x3c2ccf4e, -0x72ca94dd, -0x4fb8e4fa, -0x0de37465, 0x6e42b83c, -0x4c93ce94, -0x74154ef3, 0x07d79c7e },
+        { -0x43f722ee, -0x78040464, -0x1e113d65, -0x75f994c6, -0x24e03e41, 0x0d57242b, 0x5ea64bb6, 0x1c3520a3 }
+    },
+    {
+        { 0x216bc059, -0x325790c0, 0x12bcd87e, 0x1fbb231d, 0x17c70990, -0x4b6a9562, 0x66d12e55, 0x38750c3b },
+        { -0x43345cb6, -0x7f2dac5a, 0x3838219b, 0x3e61c3a1, -0x677d1c6a, -0x6f3c49ff, 0x5d0ee66f, 0x1c3d0577 },
+        { -0x6bdd1ae6, 0x692ef140, 0x2b5df671, -0x343f38c4, 0x744ce029, 0x21014fe7, -0x2ccfb784, 0x0621e2c7 }
+    },
+    {
+        { -0x4f240f0d, -0x4851e86a, -0x1e831e6a, 0x54dfafb9, -0x16555c4c, 0x25923071, -0x5effd163, 0x5d8e589c },
+        { -0x7da67c73, -0x50679f34, -0x39606524, -0x6f15b73f, 0x65581e30, 0x65264837, 0x7bd3a5bc, 0x0007d609 },
+        { 0x0842a94b, -0x3f40e26b, 0x588f2e3e, -0x4d2c3c9d, -0x44ae1d11, 0x0a961438, 0x3c1cbf86, 0x1583d778 }
+    },
+    {
+        { -0x3362d739, -0x6ffcb8fc, -0x08d33a71, 0x1d1b679e, -0x41a478da, 0x16e12b5f, -0x7c3aa7f6, 0x4958064e },
+        { 0x5da27ae1, -0x13115d11, 0x55670174, 0x597c3a14, 0x6609167a, -0x3659d5ee, -0x7e127090, 0x252a5f2e },
+        { 0x5066e80d, 0x0d289426, 0x307c8c6b, -0x033c087b, 0x0c1112fd, 0x1b53da78, -0x27bc4c78, 0x079c170b }
+    },
+    {
+        { -0x3f2a2faa, -0x322932b0, -0x44fca8c5, -0x65089793, -0x0c3c10b8, 0x3ca6723f, 0x317b8acc, 0x6768c0d7 },
+        { 0x64fa6fff, 0x0506ece4, 0x6205e523, -0x411cbce2, 0x51b8ea42, 0x35794224, 0x4ac9fb00, 0x6dec05e3 },
+        { -0x0eaa3e4d, -0x6b49da1b, -0x6684846f, 0x417bf3a7, 0x6d6b2600, -0x3dd34224, -0x2232ad0c, 0x51445e14 }
+    },
+    {
+        { 0x2bbea455, -0x76ceb855, -0x6df86ed7, -0x73ac5db1, -0x41cf0859, 0x4b49f948, 0x6e4fd43d, 0x12e99008 },
+        { 0x3b144951, 0x57502b4b, 0x444bbcb3, -0x71980095, 0x166385db, -0x474296d9, -0x1c6d6a38, 0x13186f31 },
+        { 0x7fdfbb2e, -0x0ef3694d, 0x121ceaf9, -0x60656ca2, 0x3a5b983f, -0x20eec93c, 0x5d3e99af, 0x77b2e3f0 }
+    },
+},
+{
+    {
+        { -0x33a32d65, -0x6acd0b71, -0x5c31c98f, 0x2ba851be, 0x51122941, 0x32dacaa0, 0x350004f2, 0x478d99d9 },
+        { -0x630ed9a9, -0x02f28a79, -0x1ac5f1d7, -0x17d0106c, 0x5bbb4be7, -0x33cb5810, -0x5af3c75e, 0x0b251172 },
+        { -0x6f44fd40, 0x1d5ad948, 0x0ec25115, 0x50e208b1, 0x4ef21702, -0x5d95dd77, 0x3b524805, 0x4dc92334 }
+    },
+    {
+        { -0x0c93b68b, 0x3ad3e3eb, 0x37862125, -0x28a2da5b, -0x5fda5aea, -0x178c6bc3, -0x3bee37b9, 0x6bbc7cb4 },
+        { 0x0f8086b6, -0x1c7d73c0, -0x6860f238, 0x3f77e6f7, 0x4df42cb4, 0x7ef6de30, -0x4954287c, 0x5265797c },
+        { -0x2b5af2aa, 0x3c6f9cd1, -0x39015482, -0x49dbbf89, 0x3580972e, 0x6ff9bf48, -0x4ccd5305, 0x00375883 }
+    },
+    {
+        { 0x6c75c99c, -0x3674137b, 0x00e33cf4, -0x1bbe7b40, -0x456f89cc, 0x0a676b9b, 0x71f379d7, 0x669e2cb5 },
+        { 0x28cb0940, 0x0001b2cd, 0x6f1c24c9, 0x63fb51a0, -0x232a35cf, -0x4a52796f, -0x73baf9a0, 0x67238dbd },
+        { -0x5b642cf8, -0x34ee948d, 0x2392729e, 0x025aad6b, 0x3f55d9b1, -0x4b86c106, 0x40678bb9, 0x72a10561 }
+    },
+    {
+        { -0x1d1afa4a, 0x0d8d2909, -0x3fd6edd0, -0x67358755, -0x564edcd9, 0x77ef5569, -0x7ebc64b9, 0x7c77897b },
+        { 0x1cc9249d, -0x5d497ed5, 0x21211f58, 0x62866eee, 0x5df10ece, 0x2cb5c5b8, -0x1d9c5200, 0x03a6b259 },
+        { -0x21cce34b, -0x0e3e4a1e, 0x15fca420, 0x5a9f5d8e, 0x7bd932b1, -0x605bc70f, 0x1c6146e7, 0x2a381bf0 }
+    },
+    {
+        { -0x4acbe991, -0x083f41ce, 0x19cf70d4, 0x27e6ca64, -0x56a858a7, -0x6cb20829, -0x54213d56, 0x5701461d },
+        { -0x3037ee3f, -0x53646787, 0x3756e567, -0x7482d67f, 0x7c70edfc, 0x50da4e60, -0x77bbff4a, 0x5dbca62f },
+        { 0x2c915c25, 0x2c674740, 0x0b0d340a, 0x1bdcd1a8, 0x07b43f5f, 0x5e5601bd, 0x5539a242, 0x2555b4e0 }
+    },
+    {
+        { -0x781b9c2c, 0x78409b1d, -0x32049c63, -0x52b256a6, 0x55259b9c, -0x13d788c9, -0x3cedcf55, 0x69c806e9 },
+        { 0x66ddd216, 0x6fc09f52, -0x371c8fb8, -0x231a9f59, -0x5d209d03, -0x139a6c63, -0x1ad12e6e, 0x7a869ae7 },
+        { 0x14bb3f22, 0x7b48f574, -0x51233378, 0x68c7cee4, 0x79ed80be, -0x12d06c9f, 0x5f77bc4b, 0x25d70b88 }
+    },
+    {
+        { -0x44e51b2c, -0x67ba62d7, 0x39f954ec, 0x56b9c4c7, -0x3d64b4c2, -0x7cd8bc0a, -0x67497876, 0x21ea8e27 },
+        { 0x762bf4de, 0x4151c3d9, 0x2745d82b, 0x083f435f, 0x0d23ddd5, 0x29775a2e, 0x69a5db24, 0x138e3a62 },
+        { 0x6a5a7b9c, -0x78410b4c, 0x5fc1d062, -0x2dd662e5, -0x22cde9b8, -0x7dbf67e8, -0x1a5d1fc3, 0x5c5abeb1 }
+    },
+    {
+        { 0x1306a233, 0x02cde6de, 0x116f8ec7, 0x7b5a52a2, -0x3ee9c4a5, -0x1e397e0c, 0x60d32643, 0x241d3506 },
+        { -0x48c3d225, 0x14722af4, 0x5a05060d, -0x43b8f3a1, 0x2581b02e, 0x00943eac, 0x1f499c8f, 0x0e434b3b },
+        { 0x0ebc52c7, 0x6be4404d, -0x4e586e0b, -0x51b9dcc5, -0x2da24bd5, 0x2aec170e, 0x6645d694, 0x1d8dfd96 }
+    },
+},
+{
+    {
+        { 0x12ddb0a4, -0x2a679c64, -0x3fdb7995, -0x5a2e60d0, 0x58fce460, -0x2e83d0fd, 0x2e095e8a, 0x07a19515 },
+        { -0x63d13b22, 0x296fa9c5, 0x4f84f3cb, -0x43749e41, 0x17a8f908, 0x1c7706d9, 0x7ad3255d, 0x63b795fc },
+        { 0x389e5fc8, -0x57c970fe, -0x30721bc5, -0x6fbcc4fe, -0x3abed9bd, -0x505e02a3, 0x032f0137, 0x3e8fe83d }
+    },
+    {
+        { -0x17102ec4, 0x08704c8d, 0x33e03731, -0x203ae572, 0x1260cde3, -0x5a62a25b, -0x59da737a, 0x22d60899 },
+        { 0x0570a294, 0x2f8b15b9, 0x67084549, -0x6b0dbd90, 0x61bbfd84, -0x21e3a51f, 0x7fac4007, 0x75ba3b79 },
+        { 0x70cdd196, 0x6239dbc0, 0x6c7d8a9a, 0x60fe8a8b, -0x14bfeda0, -0x4c77b844, -0x788861a2, 0x0904d07b }
+    },
+    {
+        { 0x48f940b9, -0x0bcdd29a, -0x42d2f3c7, 0x06952f0c, -0x5f7e06cf, 0x167697ad, -0x4508d594, 0x6240aace },
+        { -0x22456e64, -0x4b31e02c, -0x38b37256, -0x30ce24c2, -0x527933af, 0x2c63cc63, -0x43e221f9, 0x43e2143f },
+        { 0x5ba295a0, -0x07cb8b64, -0x35c82da6, -0x296b83a5, -0x1836ce96, 0x66f13ba7, -0x724bf354, 0x56bdaf23 }
+    },
+    {
+        { -0x3e62c44e, 0x1310d36c, 0x622386b9, 0x062a6bb7, -0x285eb0a4, 0x7c9b8591, 0x7e1e5754, 0x03aa3150 },
+        { -0x0acacc15, 0x362ab9e3, 0x6eb93d40, 0x338568d5, 0x1d5a5572, -0x61f1ebae, -0x7c8bece8, 0x1d24a86d },
+        { -0x002b31e1, -0x0b1389b8, 0x54ac8c1c, -0x1fba1510, 0x1d09357c, -0x772dda7e, -0x6514b7a7, 0x43b261dc }
+    },
+    {
+        { 0x6c951364, 0x19513d8b, 0x000bf47b, -0x6b018eda, -0x2ab06a99, 0x028d10dd, 0x42940964, 0x02b4d5e2 },
+        { -0x77448645, -0x1aa4e1e7, -0x3e85ca63, -0x5f612f83, 0x603dea33, -0x4fd3d11e, 0x5b276bc2, 0x326055cf },
+        { 0x28d18df2, -0x4b5eaa35, 0x186ce508, -0x1533b9ba, 0x6c824389, -0x3b630b6d, -0x51a2cbf0, 0x27a6c809 }
+    },
+    {
+        { -0x3bc296ac, -0x32d3d8f6, 0x6a66cab2, -0x22b5c1a9, 0x69d7036c, 0x79fa5924, 0x3d8c2599, 0x22150360 },
+        { 0x1f0db188, -0x74591433, 0x675a5be8, 0x37d3d73a, 0x15f5585a, -0x0dd1205d, -0x009f5e82, 0x2cb67174 },
+        { 0x390be1d0, 0x59eecdf9, 0x728ce3f1, -0x56bddfbc, 0x7a94f0f4, -0x7d76e39a, 0x3890f436, 0x7b1df4b7 }
+    },
+    {
+        { 0x07f8f58c, 0x5f2e2218, -0x2b6bf62c, -0x1caaa361, 0x1fb6a630, -0x4d555773, -0x2cad1fc3, 0x68698245 },
+        { -0x4c4d5ddc, -0x1b6d0d20, 0x2b551160, 0x7c6c9e06, 0x0d7f7b0e, 0x15eb8fe2, 0x58fc5992, 0x61fcef26 },
+        { 0x2a18187a, -0x244ea27b, -0x79225329, -0x0c1b552d, 0x0ff6c482, 0x44bae281, 0x3daf01cf, 0x46cf4c47 }
+    },
+    {
+        { -0x0eb67ec0, 0x213c6ea7, 0x392b4854, 0x7c1e7ef8, 0x5629ceba, 0x2488c38c, 0x0d8cc5bb, 0x1065aae5 },
+        { -0x613b1a07, 0x426525ed, 0x16903303, 0x0e5eda01, -0x341a3524, 0x72b1a7f2, 0x14eb5f40, 0x29387bcd },
+        { -0x20dff2a9, 0x1c2c4525, -0x403598b6, 0x5c3b2dd6, -0x1e7cbfd0, 0x0a07e7b1, 0x4f1ce716, 0x69a198e6 }
+    },
+},
+{
+    {
+        { -0x61d2b8cc, 0x7b26e56b, -0x7e39e98b, -0x3b38ecd5, -0x13632181, -0x10a36adb, -0x18e8bc53, 0x39c80b16 },
+        { -0x10562969, 0x7afcd613, 0x1c067959, 0x0cc45aa4, -0x3e05256a, -0x5a901efc, 0x72e40365, 0x3a73b704 },
+        { 0x1b826c68, 0x0f196e0d, 0x4960e3db, -0x08e00f1e, 0x23b7436c, 0x61131670, 0x77da7282, 0x0cf0ea58 }
+    },
+    {
+        { 0x3ba6945a, -0x1ccd312c, -0x177e3fa3, -0x21f4ec9f, 0x5e67ed3b, 0x1ad40f09, -0x4739c2a3, 0x5da8acda },
+        { -0x222b3343, 0x196c80a4, -0x6a0d2263, 0x22e6f55d, 0x40d6c71b, -0x38a1cc39, -0x34c3fbd1, 0x7bb51279 },
+        { 0x3a70159f, -0x3b4999b6, 0x0a904e14, 0x76194f0f, -0x5bf693ed, -0x5a9eb3c7, -0x68601313, 0x6cd0ff50 }
+    },
+    {
+        { -0x4fb45e72, 0x7fecfabd, 0x3bddbcf7, -0x2f038404, 0x057a131c, -0x5be2b792, -0x0dddc59f, 0x641a4391 },
+        { -0x70bbd754, -0x3f1f9819, -0x59eeca1d, 0x14835ab0, 0x38062935, -0x0de2eb0d, -0x20fb7b64, 0x6390a4c8 },
+        { -0x59f95725, -0x3a3946a6, -0x4f97da0f, -0x6eb48062, 0x44fc9eff, 0x2a731f6b, 0x62705cfc, 0x30ddf385 }
+    },
+    {
+        { 0x68bcd52c, 0x33bef2bd, 0x69482ef2, -0x39b62450, 0x41cb1aee, -0x4a4911f4, 0x0212a7e5, 0x5c294d27 },
+        { -0x2e400807, 0x4e3dcbda, 0x20645717, -0x36ee717e, 0x0f189d56, -0x45333144, -0x2bb98998, 0x1b4822e9 },
+        { 0x25563781, -0x54c9f581, 0x480f7958, 0x2512228a, 0x6114b4e3, -0x38a2fad9, -0x268901d6, 0x222d9625 }
+    },
+    {
+        { 0x0a344f85, 0x0f94be7e, -0x780dd3c8, -0x14d05574, 0x4ee16f0f, -0x631e18a2, 0x18a08dea, 0x43e64e54 },
+        { -0x4c8d531f, 0x1c717f85, 0x4638bf18, -0x7e6cf197, 0x6bc08b58, 0x239cad05, -0x7807000c, 0x0b34271c },
+        { 0x1a35ce63, -0x7eaa1dae, -0x06edfd72, -0x41eff2b3, -0x5a822314, -0x4007f408, 0x6d6bc6e4, 0x57342dc9 }
+    },
+    {
+        { 0x1e707bf6, -0x0c3c4349, 0x7291a762, 0x351d9b8c, -0x252965cd, 0x00502e6e, 0x1ec8807f, 0x522f521f },
+        { -0x3731a668, -0x10110f9b, -0x4a34155e, -0x40fd6af0, 0x20b7c458, -0x739b5efa, 0x31c24855, 0x35134fb2 },
+        { -0x065c6fd5, 0x272c1f46, -0x669a8434, -0x36e45c49, 0x4f8a1c0e, -0x519eb4d0, 0x0b99017b, 0x7afcaad7 }
+    },
+    {
+        { -0x107bd495, -0x577ebe14, -0x6854193b, 0x55e7b147, 0x03784ffe, -0x738b7069, -0x5032ff49, 0x5b50a1f7 },
+        { -0x5b4741bf, -0x3da212ac, 0x1bb0e2dd, -0x6fd2ec1f, -0x3217d54e, 0x41f43233, -0x3c551835, 0x1085faa5 },
+        { -0x0ec9eceb, -0x647bf09a, 0x701003e9, 0x18462242, -0x1b5daf80, 0x65ed45fa, 0x3fda7320, 0x0a286239 }
+    },
+    {
+        { 0x6ecb9d17, -0x69f18c85, -0x2983151f, -0x050db6b8, -0x2aa1e477, 0x37e7a9b4, -0x4b93a615, 0x5cb7173c },
+        { 0x347cbc9d, 0x46ab13c8, -0x663edc7d, 0x3849e8d4, -0x7829b537, 0x4cea3140, -0x4e5d6119, 0x1f354134 },
+        { -0x7d485410, 0x4a89e68b, -0x64594847, -0x0be326d9, -0x1e727891, 0x16e6c210, 0x7f1b09c6, 0x7cacdb0f }
+    },
+},
+{
+    {
+        { -0x233a3513, -0x1efebbcc, 0x3c84fb33, 0x47ed5d96, -0x12795f19, 0x70019576, -0x2d98061c, 0x25b2697b },
+        { -0x26e58744, -0x6f9d4d20, -0x37af6999, 0x47c9889c, 0x405070b8, -0x620ab59a, 0x2493a1bf, 0x7369e6a9 },
+        { 0x13986864, -0x6298c005, 0x415dc7b8, 0x3ca5fbd9, -0x20d8c4a2, -0x1fb133c5, -0x4ab1b32e, 0x1420683d }
+    },
+    {
+        { -0x3e33a530, 0x34eebb6f, -0x69b95375, 0x6a1b0ce9, -0x599421ad, -0x2c4f25b7, 0x61d081c1, 0x31e83b41 },
+        { 0x249dd197, -0x4b8742e2, 0x5e58c102, 0x620c3500, -0x334553a4, -0x04fd2cd1, -0x0af758d3, 0x60b63beb },
+        { -0x61f9d4b1, -0x681738ee, 0x29320ad8, 0x49e48f4f, 0x6f18683f, 0x5bece14b, 0x2d550317, 0x55cf1eb6 }
+    },
+    {
+        { 0x7df58c52, 0x3076b5e3, -0x186633ca, -0x28c54623, 0x4913ee20, -0x427ce31d, 0x62ba0133, 0x1a56fbaa },
+        { 0x65c23d58, 0x58791010, 0x5094819c, -0x7462f793, 0x12c55fa7, -0x1dbfd057, 0x570891d4, 0x669a6564 },
+        { 0x5c9dc9ec, -0x6bc194b0, -0x5883c8e6, 0x302557bb, 0x41347651, -0x678c51aa, -0x663a75a4, 0x13c48367 }
+    },
+    {
+        { 0x5d8bd080, -0x3b230496, 0x571a4842, -0x21143b14, -0x471aac9b, -0x2b4d177d, -0x371a47d9, 0x50bdc87d },
+        { 0x5ab3e1b9, 0x423a5d46, -0x380ec09f, -0x03ec3e79, -0x134a464a, 0x19f83664, -0x59c849f9, 0x66f80c93 },
+        { 0x6edfe111, 0x606d3783, -0x0fee5427, 0x32353e15, 0x25b73b96, 0x64b03ac3, 0x725fd5ae, 0x1dd56444 }
+    },
+    {
+        { 0x08bac89a, -0x3d681a00, -0x151e3c20, 0x7d4cea11, -0x60186884, -0x0c1c741f, 0x63a305cd, 0x3a3a450f },
+        { 0x3362127d, -0x705b8008, 0x71cd7c15, -0x4360953c, 0x49220c8b, 0x6e714543, 0x219f732e, 0x0e645912 },
+        { -0x27c6b9d9, 0x078f2f31, -0x216b5af0, 0x389d3183, 0x17996f80, -0x2e1c9393, -0x6c565785, 0x318c8d93 }
+    },
+    {
+        { -0x54e22c68, 0x5d669e29, 0x342d9e3b, -0x036de9a8, -0x0ca68c33, 0x55851dfd, 0x25950af6, 0x509a41c3 },
+        { 0x2afffe19, -0x0d8ba2fd, 0x7f24db66, 0x0c9f3c49, -0x457a6711, -0x43672c1d, -0x65e2acec, 0x224c7c67 },
+        { -0x5906da17, -0x423f9124, 0x641b1f33, 0x793ef3f4, -0x627cc177, -0x7d13ed80, 0x28a11389, 0x05bff023 }
+    },
+    {
+        { 0x0dc512e4, 0x6881a0dd, 0x44a5fafe, 0x4fe70dc8, -0x70b5adc0, 0x1f748e6b, -0x11fe5c16, 0x576277cd },
+        { 0x23cae00b, 0x36321370, -0x2e5330a7, 0x544acf0a, -0x2de5e378, -0x698befb7, -0x05d5bb59, 0x780b8cc3 },
+        { 0x234f305f, 0x1ef38abc, 0x1405de08, -0x65a88043, 0x34e62a0d, 0x5e82a514, 0x6271b7a1, 0x5ff41872 }
+    },
+    {
+        { 0x13b69540, -0x1a24b818, 0x432610e1, -0x0ca2d5c5, 0x38781276, -0x53e0d917, -0x5f5f3497, 0x29d4db8c },
+        { 0x1789db9d, 0x398e080c, -0x0c18870b, -0x589fdfdb, 0x06bd035d, -0x056776b4, 0x25a966be, 0x106a03dc },
+        { 0x333353d0, -0x2652f551, -0x532cf61b, 0x38669da5, -0x37770810, 0x3c57658a, 0x052cbefa, 0x4ab38a51 }
+    },
+},
+{
+    {
+        { -0x7f621fac, -0x09701d18, -0x637d452f, -0x1c43f696, 0x0aadbf45, 0x076353d4, -0x215e6a62, 0x7b9b1fb5 },
+        { 0x4324c0e9, -0x20253412, 0x3f955bb7, 0x05444288, -0x15ce9f61, -0x21085558, 0x42287cff, 0x68aee706 },
+        { 0x7471cc0c, -0x0fe3370f, 0x579082bb, -0x6adbd1c9, -0x2c1b94a1, 0x27776093, 0x28bd85fb, 0x2d13d55a }
+    },
+    {
+        { 0x7aee7a52, -0x40fe6332, -0x1bab152d, -0x57212d4a, -0x785744e7, 0x3c619f0b, 0x560916d8, 0x3619b5d7 },
+        { 0x5b35b8da, -0x053a2dfa, -0x7a9db449, -0x57257566, 0x3d21cd0f, -0x332d356f, -0x7406f2a8, 0x6b8341ee },
+        { 0x0282c4b2, 0x3579f26b, 0x4fafefae, 0x64d592f2, 0x28c8c7c0, -0x48321285, 0x7173a8d7, 0x6a927b6b }
+    },
+    {
+        { 0x3ece88eb, -0x728fbf7a, -0x7f113f74, -0x0f1cf857, 0x0d788fda, -0x53ddaf9f, 0x3a0d478d, 0x056d92a4 },
+        { -0x6791b9aa, 0x1f6db24f, -0x2e16efa5, 0x1021c02e, 0x2cc0a375, -0x0700c001, -0x3937da6e, 0x1d2a6bf8 },
+        { -0x03c25a5f, 0x1b05a196, 0x43b59ed0, 0x77d7a8c2, -0x682e86e8, 0x06da3d62, -0x0edcac09, 0x66fbb494 }
+    },
+    {
+        { -0x0edcf62a, -0x2928f66a, -0x163c2ac7, -0x2404dc7b, -0x08aadbef, 0x46d602b0, 0x57843e0c, 0x270a0b05 },
+        { -0x27a3f048, 0x751a50b9, -0x7430f685, -0x2e5023db, -0x7cf65697, 0x2f16a6a3, -0x1a4ff9a7, 0x14ddff9e },
+        { -0x5879d434, 0x61ff0640, 0x5f11abfe, -0x7e353f66, 0x55d12abb, -0x6fb87cfc, -0x6ba5178d, 0x19a4bde1 }
+    },
+    {
+        { -0x3f893b61, 0x40c709de, 0x7f3e53f6, 0x657bfaf2, -0x135fbd3c, 0x40662331, 0x7eb4df04, 0x14b37548 },
+        { 0x20a6200a, -0x6460d90b, -0x30ec1508, 0x64804443, -0x79ce122d, -0x759c98c1, 0x1ed39dc1, 0x72bbbce1 },
+        { -0x549923b9, -0x517ac36c, -0x2089d292, -0x149dcbc2, 0x6fb2f7d1, -0x0f71f1e8, 0x700ab37a, 0x4f0b1c02 }
+    },
+    {
+        { -0x3e4d1dc1, 0x79fd21cc, 0x453df52a, 0x4ae7c281, -0x2eaeb795, -0x37e8d137, 0x3e0a7534, 0x68abe944 },
+        { -0x27e6ae06, -0x1e8f9879, -0x4d6f3885, -0x5ef5d372, 0x3ed66773, -0x18c7d060, 0x0bcc4b54, 0x0a4d8471 },
+        { 0x07831dcb, -0x25ed393c, 0x4d5c510d, 0x0da230d7, 0x6bd404e1, 0x4ab1531e, -0x430bbf11, 0x4106b166 }
+    },
+    {
+        { 0x39e4ecf2, -0x5b7a332b, 0x0555bab5, 0x5aa3f3ad, -0x6c8207d3, 0x145e3439, 0x1214283f, 0x1238b51e },
+        { 0x1cd23668, 0x02e57a42, 0x0eaef6fd, 0x4ad9fb5d, -0x4edbbb80, -0x6ab198d9, 0x2699f331, 0x7f792f9d },
+        { 0x5fd4d924, 0x0b886b92, 0x3626a80d, 0x60906f7a, -0x467542ee, -0x132c984c, -0x210cbb31, 0x2876beb1 }
+    },
+    {
+        { 0x3a8a85f8, -0x2a6b4ccd, -0x187282a8, 0x4ea37689, 0x5e8e351f, 0x73bf9f45, -0x43be144c, 0x5507d7d2 },
+        { 0x63144691, -0x237b16cb, -0x29e0dc0c, 0x632fe8a0, 0x12a9a8d5, 0x4caa8006, 0x0e9918d3, 0x48f9dbfa },
+        { 0x299572fc, 0x1ceb2903, -0x6afd2f12, 0x7c8ccaa2, 0x11cce67b, -0x6e405bcc, 0x64a831e7, 0x57844819 }
+    },
+},
+{
+    {
+        { 0x5fddc09c, -0x29302e11, -0x08a8a232, -0x17d4c103, 0x201634c2, 0x25d56b5d, 0x04ed2b9b, 0x3041c6bb },
+        { 0x6768d593, -0x2583d4db, 0x4422ca13, -0x673e3fa9, -0x35f531e3, -0x0e57f42b, -0x3f775970, 0x29cdd1ad },
+        { -0x26a91eb8, 0x0ff2f2f9, -0x60ca94d2, -0x5218688b, 0x5f6c025c, 0x1a4698bb, 0x14049a7b, 0x104bbd68 }
+    },
+    {
+        { -0x29800e9d, -0x56a265a1, 0x4cc75681, -0x16d41963, -0x21df0da9, -0x4807fdb4, -0x04f8d20b, 0x204f2a20 },
+        { 0x68f1ed67, 0x51f0fd31, -0x2790c43e, 0x2c811dcd, 0x04d2f2de, 0x44dc5c43, 0x092a7149, 0x5be8cc57 },
+        { 0x30ebb079, -0x37ebc4c3, -0x429ad1d0, 0x7589155a, -0x7092a3cf, 0x653c3c31, -0x3d86e9e1, 0x2570fb17 }
+    },
+    {
+        { 0x0bb8245a, 0x192ea955, -0x706faf2f, -0x37190458, -0x775b36cb, 0x7986ea2d, -0x21fe7998, 0x241c5f91 },
+        { 0x2cb61575, 0x3efa367f, 0x1cd6026c, -0x0a06908a, 0x65b52562, -0x1738ebd6, 0x53030acd, 0x3dcb65ea },
+        { 0x40de6caa, 0x28d81729, 0x22d9733a, -0x7040d310, 0x235b01d1, 0x16d7fcdd, 0x5fcdf0e5, 0x08420edd }
+    },
+    {
+        { 0x04f410ce, 0x0358c34e, 0x276e0685, -0x49eca4a6, -0x1446eadf, 0x5d9670c7, 0x21db889c, 0x04d654f3 },
+        { -0x7c9d05b6, -0x3200df55, -0x1de5c192, 0x57e118d4, -0x03c619d5, -0x1ce869e9, -0x43e89603, 0x0d9a53ef },
+        { -0x22424a2b, 0x5e7dc116, -0x725a22d3, 0x2954deb6, 0x3334a292, 0x1cb60817, 0x18991ad7, 0x4a7a4f26 }
+    },
+    {
+        { -0x50c8d5b5, 0x24c3b291, 0x718147f2, -0x6c257d90, -0x7976610e, -0x227b7a9c, 0x23e0ee33, 0x4a963142 },
+        { 0x5fb15f95, -0x0b58e7fe, 0x6b5c1b8f, 0x3df65f34, 0x00e01112, -0x32030f7b, -0x222ce7b8, 0x11b50c4c },
+        { 0x08a4ffd6, -0x5917d8bc, -0x63ea8927, 0x738e177e, 0x3d02b3f2, 0x773348b6, -0x319433af, 0x4f4bce4d }
+    },
+    {
+        { -0x3b62f491, 0x30e2616e, -0x3513dce9, -0x1ba98e71, -0x0d94b05a, 0x48eb409b, 0x61595f37, 0x3042cee5 },
+        { -0x1ddbda7c, -0x58e031a6, -0x6d0a7562, 0x26ea7256, 0x1cea3cf4, -0x2de5f629, -0x48e3fe1a, 0x73fcdd14 },
+        { 0x449bac41, 0x427e7079, -0x431dcef6, -0x7aa51c93, 0x5f841a7c, 0x4cae7621, -0x65631e2a, 0x389e740c }
+    },
+    {
+        { 0x570eac28, -0x3642870a, 0x27919ce1, -0x1aa4f4ce, -0x5e646e13, 0x65fc3eab, -0x29d9c970, 0x25c425e5 },
+        { 0x34dcb9ce, 0x64fcb3ae, -0x1cb72f53, -0x68affcdd, 0x62c6381b, 0x45b3f07d, 0x465a6788, 0x61545379 },
+        { -0x0e282192, 0x3f3e06a6, -0x71f9dcf8, 0x3ef97627, 0x4e8a6c77, -0x73eb09da, 0x15484759, 0x6539a089 }
+    },
+    {
+        { 0x14bb4a19, -0x223b242c, -0x67bdb072, 0x19b2bc3c, 0x36ca7169, 0x48a89fd7, -0x0fe64270, 0x0f65320e },
+        { -0x3c2d088d, -0x162de08c, 0x25c46845, -0x3eafabbf, -0x064661cd, 0x624e5ce8, -0x3a32e794, 0x11c5e4aa },
+        { -0x35021f3a, -0x2b792e4f, 0x163b5181, 0x4f3fe6e3, -0x050d6c66, 0x59a8af0d, -0x13ccf8d6, 0x4cabc7bd }
+    },
+},
+{
+    {
+        { 0x1a54a044, -0x083f5e64, 0x77bd9fbb, 0x4a1c5e24, 0x5af22972, -0x591c35ef, 0x3f2e9e0d, 0x1819bb95 },
+        { 0x532f7428, 0x16faa8fb, 0x46a4e272, -0x242bd160, -0x74615b80, 0x5337653b, 0x23973f03, 0x40659472 },
+        { 0x5e042e84, 0x498fbb79, 0x7698b714, 0x7d0dd89a, 0x27fe6295, -0x7404f45c, 0x21200524, 0x36ba82e7 }
+    },
+    {
+        { 0x57274ed5, -0x372962f6, 0x60804b17, 0x45ba8032, 0x2255dfac, -0x20c325f0, 0x2709b339, 0x77d22123 },
+        { 0x4245ec41, -0x29f13449, 0x34348716, -0x02641762, -0x1bdd7b22, -0x36dbf502, -0x2face24c, 0x4472f648 },
+        { 0x64ad94d8, 0x498a6d70, -0x6509dd9d, -0x5a4a3703, 0x45c141f4, -0x735712fb, 0x662d358c, 0x2c63bec3 }
+    },
+    {
+        { -0x7a790741, -0x65ae74c6, -0x344e6910, -0x6118e50a, -0x5dc7a30e, -0x55f9da1a, -0x2228372f, 0x1deb2176 },
+        { -0x158786ab, 0x7fe60d8b, -0x4a0bfe49, -0x4623ee82, 0x19355cce, -0x6e383f66, -0x6bbd4121, 0x22692ef5 },
+        { 0x2066cf6c, -0x7a9c2e66, 0x4dcc7cd7, 0x401bfd8c, -0x32f2709e, -0x26895942, -0x5d874fa2, 0x67cfd773 }
+    },
+    {
+        { 0x5a4e586a, 0x2d5fa985, 0x49beab7e, 0x65f8f7a4, -0x0de2cc2d, -0x55f8b223, 0x1bcb9dee, 0x185cba72 },
+        { -0x10c11b8b, -0x7213ce06, -0x61dd026e, -0x66240076, 0x4e26cab1, 0x512d1159, -0x13bcef47, 0x0cde561e },
+        { -0x0b1c34bf, -0x6c79625d, 0x40f7977e, -0x40fc6d0b, -0x2fb9c47d, 0x026204fc, -0x61139113, 0x3ec91a76 }
+    },
+    {
+        { -0x4f5cbfd1, 0x0fad2fb7, -0x04960b58, 0x46615ecb, -0x3a07155a, -0x08ba4338, 0x4a94e896, 0x7a5fa879 },
+        { -0x087e9953, 0x1e9df75b, -0x14f32851, 0x4dfda838, -0x3e150678, -0x45ffd128, 0x11f33cfc, 0x13fedb3e },
+        { 0x13cd67a1, 0x52958faa, -0x74244ae9, -0x69a11f7f, 0x2e8845b3, 0x16e58daa, 0x5499da8f, 0x357d397d }
+    },
+    {
+        { 0x194bfbf8, 0x481dacb4, -0x451a7d67, 0x4d77e3f1, 0x7d1372a0, 0x1ef4612e, 0x70ff69e1, 0x3a8d867e },
+        { -0x4f453194, 0x1ebfa05f, 0x1caf9a1e, -0x36cb9df4, 0x1d82b61a, -0x3388e33c, -0x5a08b014, 0x2d94a16a },
+        { 0x55aff958, 0x6f58cd5d, 0x75567721, -0x45c155a4, -0x6e9add83, 0x75c12399, -0x3d0d4ca2, 0x69be1343 }
+    },
+    {
+        { 0x684b8de3, -0x7d444254, 0x3fca0718, -0x5d0b3830, -0x1f695558, 0x337f92fb, 0x63587376, 0x200d4d8c },
+        { -0x1e6836d6, 0x0e091d5e, 0x2945119f, 0x4f51019f, -0x0fcb1664, 0x143679b9, 0x4d24c696, 0x7d88112e },
+        { 0x4893b32b, 0x208aed4b, -0x41a6469c, 0x3efbf23e, -0x245a1af9, -0x289d2150, -0x7e42626c, 0x69607bd6 }
+    },
+    {
+        { -0x6cdc56fe, 0x3b7f3bd4, 0x6b2c6e53, 0x7c21b556, 0x3a7852a7, -0x1a45700b, -0x7c713200, 0x28bc77a5 },
+        { 0x68de1ce1, -0x0941fdf0, 0x0edcbc1f, -0x172ae719, 0x1b5505a5, -0x1c100230, -0x2c13c030, 0x35f63353 },
+        { -0x1da27fca, 0x63ba78a8, -0x6bcccb70, 0x63651e00, 0x288ce532, 0x48d82f20, 0x36b57524, 0x3a31abfa }
+    },
+},
+{
+    {
+        { 0x3f78d289, -0x3f708771, -0x5ebfb261, -0x01cf58d4, -0x309a3363, -0x0d887404, 0x5acb2021, 0x7ee49816 },
+        { 0x089c0a2e, 0x239e9624, 0x3afe4738, -0x38b73b40, 0x764fa12a, 0x17dbed2a, 0x321c8582, 0x639b93f0 },
+        { -0x6eee5e3d, 0x7bd508e3, -0x7f6f8b77, 0x2b2b90d4, -0x518d02e7, -0x182d513e, -0x7a49fd5a, 0x0edf493c }
+    },
+    {
+        { -0x7b89beed, 0x6767c4d2, -0x080a07cb, -0x5f6fbfc1, -0x35194122, 0x1c8fcffa, -0x2e205c97, 0x04c00c54 },
+        { 0x599b5a68, -0x51337ea8, -0x14521df2, -0x15a8b0f1, 0x22b67f07, 0x4fe41d74, 0x019d4fb4, 0x403b92e3 },
+        { -0x74b9a308, 0x4dc22f81, 0x1480eff8, 0x71a0f35a, 0x04c7d657, -0x51174053, -0x4d9e890c, 0x355bb12a }
+    },
+    {
+        { 0x5a8c7318, -0x5cfe2539, -0x4c3155ef, -0x126ffc63, 0x3bae3f2d, 0x6f077cbf, -0x1fad5272, 0x7518eaf8 },
+        { 0x7493bbf4, -0x58e19b34, -0x135c4f3d, -0x1a427b27, -0x05fa187b, 0x0a6bc50c, 0x182ec312, 0x0f9b8132 },
+        { 0x1b7f6c32, -0x5b77a63c, -0x0bc7cd68, 0x0f2d60bc, -0x364e2e27, 0x1815a929, -0x44e8aa3c, 0x47c3871b }
+    },
+    {
+        { -0x37af9950, -0x0419a2b0, -0x4c5d6650, 0x62ecc4b0, 0x441ae8e0, -0x1ac8ab16, -0x172b72a1, 0x08fea02c },
+        { 0x71ec4f48, 0x51445397, -0x3673a292, -0x07fa4e83, 0x47c3c66b, -0x089d3ee6, 0x764699dc, 0x00b89b85 },
+        { 0x68deead0, -0x7db2228a, 0x4b685d23, -0x379bbae0, 0x5d89d665, -0x4aeb3033, 0x4f75d537, 0x473829a7 }
+    },
+    {
+        { -0x52c6fd37, 0x23d9533a, -0x10fca771, 0x64c2ddce, -0x301ed04c, 0x15257390, 0x44e4d390, 0x6c668b4d },
+        { 0x4679c418, -0x7d2d258b, -0x4d9e7210, -0x19c42828, -0x53b814f6, 0x355eef24, 0x4833c6b4, 0x2078684c },
+        { 0x7a78820c, 0x3b48cf21, -0x7ed8c169, -0x0895f54e, -0x73711285, -0x56939a59, 0x4f8a433f, 0x7411a605 }
+    },
+    {
+        { 0x18b175b4, 0x579ae53d, -0x0c6d5efe, 0x68713159, 0x1eef35f5, -0x7baa1346, 0x458c398f, 0x1ec9a872 },
+        { -0x46623793, 0x4d659d32, 0x603af115, 0x044cdc75, -0x233d1b78, -0x4cb38ed4, -0x047ecb01, 0x7c136574 },
+        { 0x00a2509b, -0x47195b2c, 0x0bc882b4, -0x647e28fe, -0x0e6a8a9f, 0x57e7cc9b, -0x38329ba0, 0x3add88a5 }
+    },
+    {
+        { 0x59393046, -0x7a3d672c, 0x5ff659ec, -0x7081ca68, -0x0d0991c6, 0x1d2ca22a, -0x5bf958e0, 0x61ba1131 },
+        { -0x49ca230e, -0x5476a890, -0x0993e044, 0x02dfef6c, -0x41492e79, -0x7aacfd98, -0x3378618c, 0x249929fc },
+        { 0x16959029, -0x5c2f5f0f, -0x45814277, 0x023b6b6c, 0x26783307, 0x7bf15a3e, -0x44271319, 0x5620310c }
+    },
+    {
+        { 0x77e285d6, 0x6646b5f4, 0x6c8f6193, 0x40e8ff67, -0x544a6b23, -0x59138cef, 0x658cec4d, 0x7ec846f3 },
+        { 0x4934d643, 0x52899343, -0x5aeddd0b, -0x462407fa, -0x3c0be3de, -0x70927871, 0x4d9d9730, 0x37676a2a },
+        { 0x1da22ec7, -0x64a170c1, 0x6c01cd13, 0x130f1d77, -0x5d676048, 0x214c8fcf, 0x399b9dd5, 0x6daaf723 }
+    },
+},
+{
+    {
+        { 0x2cd13070, -0x7e514423, -0x07a5f162, -0x69d1bcdb, -0x35200135, -0x216c6e56, 0x52c230e6, 0x53177fda },
+        { 0x10628564, 0x591e4a56, -0x574b20cc, 0x2a4bb87c, -0x185c71bd, -0x21d5da8e, -0x011afb92, 0x3cbdabd9 },
+        { 0x50b9de79, -0x584368fa, -0x3cfe4a65, 0x3d12a7fb, -0x2c951c74, 0x02652e68, 0x5a6199dc, 0x79d73983 }
+    },
+    {
+        { 0x0d591737, 0x21c9d992, -0x164b932a, -0x6415be2e, 0x0d89bfca, -0x1df17be0, 0x6eae5ff8, 0x79d99f94 },
+        { 0x4131c1bd, -0x26cab20a, -0x7913a7de, 0x758094a1, -0x1ba60c3e, 0x4464ee12, -0x34eccd7e, 0x6c11fce4 },
+        { 0x68673205, -0x0e84b7cb, 0x3caad96c, 0x387deae8, 0x56ffe386, 0x61b471fd, -0x48ba5a67, 0x31741195 }
+    },
+    {
+        { 0x3b02a047, 0x17f8ba68, -0x01104938, 0x50212096, 0x1556cbe2, 0x70139be2, 0x1d98915b, 0x203e44a1 },
+        { -0x4885c9f5, -0x172efe70, -0x666a18fe, -0x66467ce0, -0x05fdb856, -0x42b02008, -0x1f2c9579, 0x2772e344 },
+        { 0x37b9e39f, -0x2979c146, 0x723b5a23, 0x105bc169, -0x59a3f89e, 0x104f6459, 0x5b4d38d4, 0x56795129 }
+    },
+    {
+        { 0x0d4b497f, 0x07242eb3, -0x46433379, 0x1ef96306, -0x27ee90bb, 0x37950934, 0x01405b04, 0x05468d62 },
+        { 0x13037524, 0x535fd606, -0x4f043d96, -0x1def520a, 0x23e990ae, -0x5372f565, -0x28d02407, 0x47204d08 },
+        { -0x06cd9822, 0x00f565a9, -0x3f2a7176, -0x31302873, -0x0ce71d72, -0x5dea1d24, -0x649cccae, 0x4599ee91 }
+    },
+    {
+        { -0x79e51a87, -0x538b9295, -0x09515624, 0x31ab0650, 0x40256d4c, 0x241d6611, 0x3d21a5de, 0x2f485e85 },
+        { 0x70e0e76b, -0x2c3ddf36, -0x1560cf6c, -0x4ed415a8, -0x3cd8ed7e, 0x294ddec8, -0x5e2e2fd8, 0x0c3539e1 },
+        { -0x63f7cc0d, 0x32974483, -0x2d543b7c, 0x6fe6257f, 0x4b358817, 0x5327d181, -0x76c01644, 0x65712585 }
+    },
+    {
+        { -0x28f711c1, -0x7e3d60e5, -0x519bf830, -0x2234a5fb, -0x2d5c1459, -0x68513e29, -0x6e2af7cf, 0x1590521a },
+        { 0x32a61161, -0x63efd049, 0x34d520a8, -0x1b71ef23, 0x6f9a9176, 0x365c6354, 0x046f6006, 0x32f6fe4c },
+        { -0x386ef534, 0x40a3a11e, -0x0e92d852, -0x6fec2008, -0x544e6a2c, 0x1a9720d8, 0x2ea98463, 0x1bb9fe45 }
+    },
+    {
+        { -0x33c98b84, -0x30a1936b, 0x6b0bc30d, 0x29420153, -0x11868510, 0x453ac67c, 0x2a8bb3c9, 0x5eae6ab3 },
+        { -0x4c2ab062, -0x162e26b0, -0x1ff2cc3f, 0x2d5f9cbe, -0x5fb03954, 0x51c2c656, 0x3c1cbcc9, 0x65c091ee },
+        { 0x14f118ea, 0x70836611, -0x6bcb6353, 0x2b37b87b, -0x4b1660c0, 0x7273f51c, 0x23d75698, 0x78a2a958 }
+    },
+    {
+        { 0x5ef83207, -0x4b0dc3be, -0x3656cb4b, -0x54076b2d, 0x39fd87f7, -0x2f8f73ed, 0x17166130, 0x18767891 },
+        { 0x5c8c2ace, -0x5d4f8d17, 0x651e9c4b, 0x69cffc96, 0x42e7b42b, 0x44328ef8, 0x22aadeb3, 0x5dd996c1 },
+        { 0x670c507c, -0x6da4a110, -0x46c3cc41, -0x7e6437be, 0x70dd003f, 0x10792e9a, 0x6e28dc74, 0x59ad4b7a }
+    },
+},
+{
+    {
+        { -0x5352715e, 0x583b04bf, 0x148be884, 0x29b743e8, 0x0810c5db, 0x2b1e583b, -0x714c4456, 0x2b5449e5 },
+        { -0x14c241b9, 0x5f3a7562, -0x71425f48, -0x0815c7ac, 0x45747299, 0x00c3e531, 0x1627d551, 0x1304e9e7 },
+        { 0x6adc9cfe, 0x789814d2, -0x74b722f5, 0x3c1bab3f, -0x068639f6, -0x25f01e01, 0x7c2dd693, 0x4468de2d }
+    },
+    {
+        { -0x079cf832, 0x4b9ad8c6, 0x435d0c28, 0x21113531, 0x657a772c, -0x2b57993b, 0x63247352, 0x5da6427e },
+        { -0x6be6b962, 0x51bb355e, 0x23ddc754, 0x33e6dc4c, 0x447f9962, -0x6c5a492a, -0x04bb429d, 0x6cce7c6f },
+        { -0x2153dd36, 0x1a94c688, -0x4451e008, -0x46f99109, -0x72a6a7f1, -0x775273c8, -0x1860d358, 0x58f29abf }
+    },
+    {
+        { 0x710ecdf6, 0x4b5a64bf, 0x462c293c, -0x4eb31ac8, -0x2af4c547, 0x3643d056, 0x185b4870, 0x6af93724 },
+        { -0x7218c198, -0x16f13055, 0x377e76a5, 0x54036f9f, -0x41fea67e, -0x0fb6a4f5, -0x580be1ca, 0x577629c4 },
+        { 0x09c6a888, 0x32200245, 0x4b558973, -0x2d1fc9ed, 0x3c33289f, -0x7c1dc9dd, 0x0caec18f, 0x701f25bb }
+    },
+    {
+        { 0x7cbec113, -0x62e70927, 0x74bfdbe4, -0x7bb5f91a, -0x53b19f2a, 0x20f5b522, 0x50955e51, 0x720a5bc0 },
+        { -0x1b9e9313, -0x3c574f08, -0x61da5783, -0x08ff99f2, -0x0b435a64, 0x61e3061f, -0x423bf417, 0x2e0c92bf },
+        { -0x647fa5cb, 0x0c3f0943, 0x6242abfc, -0x17b174c9, 0x5c229346, 0x691417f3, 0x144ef0ec, 0x0e9b9cbb }
+    },
+    {
+        { 0x5db1beee, -0x7211642b, 0x0a723fb9, -0x363c54c9, 0x1c68d791, 0x44a8f1bf, 0x1cfd3cde, 0x366d4419 },
+        { -0x04a8df53, -0x04452b71, -0x2406f2f2, -0x117e6e95, 0x635543bf, -0x2b7eceae, 0x3f337bd8, 0x221104eb },
+        { -0x0d4373ec, -0x61c3e8bd, -0x4a7a93c5, 0x2eda26fc, 0x68a7fb97, -0x3347d0f2, -0x43a6cdbc, 0x4167a4e6 }
+    },
+    {
+        { -0x07317012, -0x3d41d99b, -0x177f29d4, -0x169800ec, 0x2f364eee, -0x0ed19182, -0x34812d0a, 0x34b33370 },
+        { 0x76f62700, 0x643b9d28, 0x0e7668eb, 0x5d1d9d40, 0x21fc0684, 0x1b4b4303, 0x2255246a, 0x7938bb7e },
+        { -0x797e2934, -0x323a6e12, -0x127a58ad, -0x31fdef64, 0x58808883, -0x128b7a3f, 0x2dfe65e4, 0x1176fc6e }
+    },
+    {
+        { 0x49770eb8, -0x246f1d77, -0x530bbf5d, -0x670433d6, -0x21287865, 0x21354ffe, -0x0d96f94a, 0x1f6a3e54 },
+        { 0x5b9c619b, -0x4b509330, -0x4d5a7b80, 0x2ddfc9f4, -0x1416b23c, 0x3d4fa502, 0x677d5f34, 0x08fc3a4c },
+        { -0x2cf8cb16, 0x60a4c199, 0x31165cd6, 0x40c085b6, -0x08a67d6b, -0x1dccc1dd, 0x16b900d1, 0x4f2fad01 }
+    },
+    {
+        { -0x48c449c8, -0x69d326e3, -0x03ed63f8, -0x19fa8856, -0x0c49e977, 0x6f619b39, 0x2944ee81, 0x3451995f },
+        { -0x6b51b1ac, 0x44beb241, 0x1857ef6c, 0x5f541c51, 0x368d0498, -0x59e194d3, -0x68d10855, 0x445484a4 },
+        { -0x60158284, -0x6ead0330, -0x4f6ca30a, 0x4a816c94, 0x47285c40, 0x258e9aaa, 0x042893b7, 0x10b89ca6 }
+    },
+},
+{
+    {
+        { 0x79d34aa0, -0x2983212a, -0x33b24c61, -0x33f46140, -0x1ca2e6f1, -0x5aca5baa, -0x09e09011, 0x2e05d9ea },
+        { 0x3b646025, -0x64d5bd92, 0x385ce4cf, 0x32127190, -0x229215bb, -0x5da3003e, -0x4157218b, 0x06409010 },
+        { -0x29e414a7, -0x3bb86fe6, -0x1a2377f6, 0x661f19bc, -0x483597d9, 0x24685482, -0x101f80da, 0x293c778c }
+    },
+    {
+        { -0x5ee00e00, 0x16c795d6, -0x4ea7ea37, -0x348f2f1e, -0x64ac6a4b, -0x760d6ce0, 0x31e47b4f, 0x50b8c2d0 },
+        { 0x07069096, -0x797f6190, -0x1b1afe77, -0x5528a4eb, -0x5de5feb9, 0x07f35715, 0x12815d5e, 0x0487f3f1 },
+        { 0x068a4962, 0x48350c08, 0x51092c9a, 0x6ffdd053, -0x50903723, 0x17af4f4a, 0x3cdba58b, 0x4b0553b5 }
+    },
+    {
+        { 0x27c152d4, -0x40fadee5, -0x42e509c7, 0x5ec26849, -0x71905468, 0x5e0b2caa, 0x50bd0840, 0x054c8bdd },
+        { 0x1b32ff79, -0x639a0342, 0x03b50f9b, -0x148a1561, 0x6c07e606, -0x0312d594, 0x51717908, 0x35106cd5 },
+        { 0x1dcf073d, 0x38a0b12f, -0x48095d8a, 0x4b60a8a3, -0x2cbfb066, -0x012a53db, 0x5505c229, 0x72e82d5e }
+    },
+    {
+        { 0x69771d02, 0x00d9cdfd, 0x6cfbf17e, 0x410276cd, 0x1cb12ec7, 0x4c45306c, 0x27500861, 0x2857bf16 },
+        { -0x0f27bb38, 0x6b0b697f, -0x268634b7, -0x44ed07a4, -0x3e25f0e1, -0x2d5abe3a, 0x58ce7211, 0x7b7c2429 },
+        { 0x0101689e, -0x60de6fc1, -0x4079effb, -0x2886202d, 0x3deb0f1b, -0x5edd11a1, 0x485a00d4, 0x510df84b }
+    },
+    {
+        { -0x38f53ea2, 0x24b3c887, -0x047e48ce, -0x4f0c5aa9, -0x1a8733e5, -0x64d321d1, 0x03b54f8e, 0x4cf7ed07 },
+        { -0x6d885e06, -0x5abecc45, 0x63991237, 0x74ec3b62, 0x35d2f15a, 0x1a3c54dc, -0x1b7d45c6, 0x2d347144 },
+        { -0x670411f1, 0x6bd47c65, -0x54aa41d3, -0x61b8cc1e, 0x127610c5, 0x1093f624, -0x2f5e155c, 0x4e05e26a }
+    },
+    {
+        { -0x1e701940, 0x1833c773, -0x2c378d9b, -0x1c3b8ee6, 0x0116b283, 0x3bfd3c4f, -0x4b32b248, 0x1955875e },
+        { 0x4b531f20, -0x2564949e, 0x77509abb, 0x429a760e, -0x17dc3480, -0x24160ade, -0x77f3707e, 0x618f1856 },
+        { 0x0e399799, 0x6da6de8f, 0x40fda178, 0x7ad61aa4, 0x5e3563dd, -0x4cd327f0, 0x2ae340ae, 0x15f6beae }
+    },
+    {
+        { -0x6dba1deb, -0x4565f085, -0x2673f245, -0x0c979ed3, -0x0ddf4fe0, 0x2e84e4cb, 0x62d90eda, 0x6ba92fe9 },
+        { 0x31ec3a62, -0x79d434f4, 0x1138f3c2, -0x7ef1d4bb, 0x39dac2a4, 0x788ec4b8, -0x51d56d7f, 0x28f76867 },
+        { 0x5884e2aa, 0x3e4df965, -0x242b9a5b, -0x429d0425, 0x0de9e524, -0x28a69356, -0x4d4e4c29, 0x6e8042cc }
+    },
+    {
+        { 0x16521f7e, 0x15306536, -0x69dfc246, 0x660d06b8, 0x545f0879, 0x2d3989bc, 0x78ebd7b0, 0x4b5303af },
+        { -0x31d73592, -0x0ef2c3d7, -0x0349f6c3, -0x452cbac0, -0x5d15d2c1, -0x18bd9129, 0x4ff298b9, 0x08af9d4e },
+        { -0x41434218, 0x72f8a6c3, -0x23c57177, 0x4f0fca4a, -0x38402086, 0x6fa9d4e8, -0x649db149, 0x0dcf2d67 }
+    },
+},
+{
+    {
+        { 0x5a45f06e, 0x753941be, 0x6d9c5f65, -0x2f835113, 0x72ff51b6, 0x11776b9c, -0x10f2b257, 0x17d2d1d9 },
+        { -0x68e7d764, 0x3d594749, 0x24533f26, 0x12ebf8c5, 0x14c3ef15, 0x0262bfcb, 0x77b7518e, 0x20b878d5 },
+        { 0x073f3e6a, 0x27f2af18, -0x28adef97, -0x02c01ae7, 0x3ca60022, 0x22e3b72c, -0x339a3959, 0x72214f63 }
+    },
+    {
+        { -0x0bc4d637, 0x1d9db7b9, 0x4f518f75, -0x29fa7db6, 0x312f9dc4, -0x0d3f8d43, 0x5a1545b0, 0x1f24ac85 },
+        { 0x5307a693, -0x4b1c80c0, 0x2f336795, -0x5458eb29, 0x73761099, -0x29042f59, -0x7e8e3437, 0x5fdf48c5 },
+        { -0x716afa56, 0x24d60832, 0x0c1420ee, 0x4748c1d1, 0x06fb25a2, -0x38001ba4, 0x2ae395e6, 0x00ba739e }
+    },
+    {
+        { -0x157744da, -0x51bbd90b, -0x7b68c405, 0x360679d9, 0x26694e50, 0x5c9f030c, -0x2ae72dda, 0x72297de7 },
+        { 0x5c8790d6, 0x592e98de, 0x45c2a2df, -0x1a40482d, -0x064b66de, 0x115a3b60, 0x67ad78f3, 0x03283a3e },
+        { -0x41f346c7, 0x48241dc7, -0x749ccf80, 0x32f19b4d, 0x02289308, -0x2c2036f3, 0x46271945, 0x05e12968 }
+    },
+    {
+        { 0x242c4550, -0x52404438, -0x2fcf7e27, -0x4337f314, -0x0a37206e, -0x7bca995a, -0x7da731b4, 0x78cf25d3 },
+        { 0x2d9c495a, -0x457d114d, -0x0ed44684, -0x31103704, -0x6c4a2e20, -0x4fd25452, 0x13698d9b, 0x39c00c9c },
+        { 0x31489d68, 0x15ae6b8e, -0x63d40f79, -0x557ae355, -0x0fb105fb, -0x3658a569, 0x6b3ff832, 0x006b5207 }
+    },
+    {
+        { -0x4631f7d3, -0x0a3481ea, 0x417abc29, 0x3407f14c, 0x2bf4a7ab, -0x2b4c9432, 0x1a9f75ce, 0x7de2e956 },
+        { -0x626a87e4, 0x29e0cfe1, -0x699cef1e, -0x497e20e8, 0x70516b39, 0x57df39d3, 0x3bc76122, 0x4d57e344 },
+        { -0x495aa135, -0x218f2b0c, 0x5d85db99, 0x4801527f, -0x2c11657f, -0x24363bc0, 0x1a6029ed, 0x6b2a90af }
+    },
+    {
+        { 0x5bb2d80a, 0x77ebf324, 0x2fb9079b, -0x27cfe4b9, 0x4cee7333, -0x39b8190e, 0x276c2109, 0x465812c8 },
+        { -0x6519e169, 0x6923f4fc, -0x1fc0a02f, 0x5735281d, -0x19122ed3, -0x589b51bd, -0x2ed2c1b6, 0x5fd8f4e9 },
+        { 0x2a1062d9, 0x4d43beb2, 0x3831dc16, 0x7065fb75, -0x21d69729, 0x180d4a7b, 0x1cb16790, 0x05b32c2b }
+    },
+    {
+        { 0x7ad58195, -0x08035bd4, 0x4333f3cc, 0x3214286e, 0x340b979d, -0x493d62f3, 0x567307e1, 0x31771a48 },
+        { -0x2db25703, -0x373fa134, 0x05dfef83, -0x5e30e554, 0x7df9cd61, -0x2441100e, 0x7b471e99, 0x3b5556a3 },
+        { -0x1eb22b7e, 0x32b0c524, 0x1a2ba4b6, -0x124caeac, 0x282b5af3, -0x5c2e9fb8, 0x7a7336eb, 0x4fc079d2 }
+    },
+    {
+        { 0x0c86c50d, -0x23cb74bc, -0x336b19af, 0x1337cbc9, 0x643e3cb9, 0x6422f74d, -0x451c32f8, 0x241170c2 },
+        { -0x7640d081, 0x51c938b0, 0x02dfe9a7, 0x2497bd65, 0x7880e453, -0x00003f64, -0x3506716e, 0x124567ce },
+        { 0x0ac473b4, 0x3ff9ab86, 0x0113e435, -0x0f6ee212, -0x14393b51, 0x4ae75060, 0x6c87000d, 0x3f861296 }
+    },
+},
+{
+    {
+        { 0x638c7bf3, 0x529fdffe, 0x388b4995, -0x20d461a0, 0x1bad0249, -0x1fd84cb1, -0x46058b13, 0x7bc92fc9 },
+        { -0x086a841c, 0x0c9c5303, -0x1f7a3ebb, -0x5c3ce5e0, -0x2f7affb0, -0x4f8de28f, -0x54f40d26, 0x0aba390e },
+        { -0x7fe52607, -0x606810d2, 0x79afda3a, -0x7c9682ac, -0x42a694b0, -0x16f94c01, -0x22c04720, 0x02672b37 }
+    },
+    {
+        { 0x398ca7f5, -0x116458d7, 0x7a4849db, -0x146359db, 0x7ec544e1, 0x29eb29ce, -0x08c91d38, 0x232ca21e },
+        { 0x260885e4, 0x48b2ca8b, -0x7d4cb3e4, -0x5bd79414, 0x17f58f74, -0x6c81e5da, -0x54d35d5b, 0x741d1fcb },
+        { 0x253fcb17, -0x409ebdc3, -0x05c614ec, 0x08803cea, -0x67ae3851, -0x0e79fd21, 0x49e3414b, 0x0400f3a0 }
+    },
+    {
+        { -0x5f9184fa, 0x2efba412, 0x2c8d2560, 0x14678545, -0x29856e39, -0x2068ec15, 0x157eadf3, 0x32830ac7 },
+        { -0x459e3aa5, -0x5431fb8a, -0x3b2c68ea, 0x36a3d6d7, -0x1727d2f7, 0x6eb259d5, -0x7b28a905, 0x0c9176e9 },
+        { -0x48c89618, 0x0e782a7a, 0x75b18e2c, 0x04a05d78, -0x1433151f, 0x29525226, -0x7c1457e0, 0x0d794f83 }
+    },
+    {
+        { -0x585d1e54, 0x7be44ce7, -0x052e4749, 0x411fd93e, 0x0d5f7c9b, 0x1734a1d7, 0x3127db16, 0x0d659223 },
+        { -0x61eae90c, -0x00ca0a35, 0x648aae45, -0x117fa431, -0x46c5610d, -0x0f28c3d5, 0x2092a6c2, 0x097b0bf2 },
+        { 0x21a9d733, -0x3b7454eb, -0x29e544db, -0x593d1516, -0x3934bcfb, 0x625c6c1c, -0x6c14c599, 0x7fc90fea }
+    },
+    {
+        { -0x63834dc3, -0x3ad8214b, 0x5328404e, -0x6aac6e97, 0x7ccf2c7a, -0x29bc6d7f, -0x082705ef, 0x6ce97dab },
+        { 0x1f5c5926, 0x0408f1fe, 0x3b258bf4, 0x1a8f2f5e, -0x0238e997, 0x40a951a2, -0x3674a882, 0x6598ee93 },
+        { 0x0ef7c48f, 0x25b5a8e5, 0x6f2ce532, -0x149fcbef, -0x1ac21ac9, -0x3a18ae8d, -0x73ed44fd, 0x73119fa0 }
+    },
+    {
+        { 0x21f4774d, 0x7845b94d, 0x7897b727, -0x409d0e94, 0x3c56522b, 0x671857c0, -0x6a9dedee, 0x3cd6a852 },
+        { 0x53f1a4cb, -0x12cfed6c, -0x370ac879, -0x4319de37, 0x38bee7b9, -0x0534d4ed, -0x6157bd74, 0x3025798a },
+        { 0x3aeca999, 0x3fecde92, 0x62e8c12f, -0x4255a500, -0x69677522, 0x67b99dfc, 0x52661036, 0x3f52c028 }
+    },
+    {
+        { -0x113be93a, -0x6da74067, -0x562d098f, -0x5375afe9, 0x16dea4ab, 0x629549ab, -0x66f6ea97, 0x05d0e85c },
+        { 0x2a1351c6, -0x00155b72, -0x0580ac29, 0x28624754, 0x7582ddf1, 0x0b5ba9e5, -0x596953a7, 0x60c0104b },
+        { -0x21634169, 0x051de020, -0x4af4308c, -0x05f803aa, 0x0f11df65, 0x378cec9f, -0x546921b3, 0x36853c69 }
+    },
+    {
+        { -0x053a1842, 0x4433c0b0, 0x4c08dcbe, 0x724bae85, 0x46978f9b, -0x0e0db33c, 0x62825fc8, 0x4a0aff6d },
+        { 0x78f39b2d, 0x36d9b8de, -0x57b84614, 0x7f42ed71, 0x79bd3fde, 0x241cd1d6, -0x6d043195, 0x6a704fec },
+        { 0x61095301, -0x16e80462, 0x02a092f8, -0x3efd206c, -0x0599e6f5, -0x40f61d0b, -0x1f2301c9, 0x681109be }
+    },
+},
+{
+    {
+        { 0x36048d13, -0x63e70306, 0x73899ddd, 0x29159db3, -0x606d2f56, -0x2360caf5, -0x7875e62c, 0x26f57eee },
+        { 0x782a0dde, 0x559a0cc9, -0x158e7c7b, 0x551dcdb2, 0x31ef238c, 0x7f62865b, 0x7973613d, 0x504aa776 },
+        { 0x5687efb1, 0x0cab2cd5, 0x247af17b, 0x5180d162, 0x4f5a2467, -0x7a3ea5cc, -0x6245cf97, 0x4041943d }
+    },
+    {
+        { -0x5d935523, 0x4b217743, 0x648ab7ce, 0x47a6b424, 0x03fbc9e3, -0x34e2b086, -0x67ff2fe7, 0x12d93142 },
+        { 0x43ebcc96, -0x3c3f1146, 0x26ea9caf, -0x728b6364, 0x1c77ccc6, -0x26056a12, 0x7684340f, 0x1420a1d9 },
+        { -0x2cc8a6b1, 0x00c67799, -0x4dc55b85, 0x5e3c5140, -0x1ca00c6b, 0x44182854, 0x4359a012, 0x1b4f9231 }
+    },
+    {
+        { -0x5b67994f, 0x33cf3030, 0x215f4859, 0x251f73d2, 0x51def4f6, -0x547d55c0, 0x6f9a23f6, 0x5ff191d5 },
+        { -0x76eaf6af, 0x3e5c109d, 0x2de9696a, 0x39cefa91, -0x68a0cfe0, 0x20eae43f, 0x7f132dae, 0x239b572a },
+        { -0x53d26f98, -0x7e612bcd, 0x5fc98523, 0x2883ab79, 0x5593eb3d, -0x10ba8d80, 0x758f36cb, 0x020c526a }
+    },
+    {
+        { -0x0fbd3377, -0x16ce10a7, -0x71edb44a, 0x2c589c9d, -0x5138a669, -0x52371e76, 0x5602c50c, 0x452cfe0a },
+        { -0x61272444, 0x779834f8, -0x23835b94, -0x370d5507, -0x5c1e4f8c, -0x56adb324, 0x15313877, 0x02aacc46 },
+        { 0x647877df, -0x795f0860, 0x0e607c9f, -0x443b9bd9, -0x0e04ee37, -0x54e815db, 0x304b877b, 0x4cfb7d7b }
+    },
+    {
+        { -0x687610ee, -0x1d79663e, -0x20a8e6f3, 0x2b6ecd71, -0x13368f30, -0x3cbc37a9, 0x434d3ac5, 0x5b1d4cbc },
+        { -0x47648a02, 0x72b43d6c, -0x63952380, 0x54c694d9, 0x3ee34c9f, -0x473c55c9, 0x39075364, 0x14b4622b },
+        { -0x33f560da, -0x4904d9eb, -0x4772331b, 0x3a4f0e2b, 0x3369a705, 0x1301498b, 0x58592dd1, 0x2f98f712 }
+    },
+    {
+        { 0x4f54a701, 0x2e12ae44, -0x56342822, -0x0301c110, 0x75835de0, -0x314076f3, -0x189ebaac, 0x1d8062e9 },
+        { -0x4af061aa, 0x0c94a74c, -0x7171ece0, 0x5b1ff4a9, -0x7dcff099, -0x65d533df, -0x27f95507, 0x3a6ae249 },
+        { -0x566f83a6, 0x657ada85, -0x6e46f09e, 0x1a0ea8b5, -0x20cb4b17, -0x72f1e205, -0x510da00d, 0x298b8ce8 }
+    },
+    {
+        { 0x0a2165de, -0x7c858d16, 0x0bcf79f6, 0x3fab07b4, 0x7738ae70, 0x521636c7, 0x03a7d7dc, 0x6ba62718 },
+        { -0x1008f34e, 0x2a927953, 0x79157076, 0x4b89c92a, 0x30a7cf6a, -0x6be7ba86, 0x4d5ce485, 0x34b8a840 },
+        { -0x7c96cccb, -0x3d91134b, 0x63b5fefd, -0x2a57ec21, -0x5b4dda8d, -0x5d6c5566, 0x465e1c6a, 0x71d62bdd }
+    },
+    {
+        { -0x4e08a10b, -0x32d24a26, 0x16b065f5, -0x28806a31, 0x3f49f085, 0x14571fea, 0x262b2b3d, 0x1c333621 },
+        { -0x2c872080, 0x6533cc28, 0x0a0fa4b4, -0x0924bc87, -0x08fe25a6, -0x1c9ba007, -0x0ce8d45c, 0x74d5f317 },
+        { 0x67d9ca81, -0x57901aac, 0x2b298c37, 0x398b7c75, -0x1c539dc5, -0x2592f76e, 0x47e9d98c, 0x4aebcc45 }
+    },
+},
+{
+    {
+        { -0x5fa65bbb, 0x0de9b204, 0x4b17ad0f, -0x1ea34b56, 0x1f79c557, -0x1e4413ae, -0x2f8ef7e5, 0x2633f1b9 },
+        { 0x05d21a77, 0x53175a72, -0x2c46cb2c, -0x4f3fbbde, -0x22a21524, -0x52260db5, -0x60ef0074, 0x074f46e6 },
+        { 0x018b9910, -0x3e04be89, 0x6c0fe140, -0x5915df24, 0x4354c6ff, -0x299e0c19, -0x0e5cbf86, 0x5ecb72e6 }
+    },
+    {
+        { -0x17179669, -0x01151efa, -0x672f6c7d, -0x679ccc81, -0x55f91411, -0x6b8fb7f2, -0x2b3a3d30, 0x038b6898 },
+        { 0x2259fb4e, -0x5aea5ce5, 0x2bcac52f, 0x0960f397, -0x72cbab35, -0x124ad014, -0x3b893fe7, 0x382e2720 },
+        { -0x7531af5a, -0x0c6e3ae3, -0x51d2d6b8, 0x3142d0b9, 0x7f24ca80, -0x24b2a5e6, 0x59250ea8, 0x21aeba8b }
+    },
+    {
+        { -0x0ff780dd, 0x53853600, -0x2582a87c, 0x4c461879, -0x4be097a0, 0x6af303de, -0x3d83e713, 0x0a3c16c5 },
+        { -0x30bfaad0, 0x24f13b34, 0x43088af7, 0x3c44ea4a, 0x0006a482, 0x5dd5c517, -0x76f4f793, 0x118eb8f8 },
+        { -0x336b80c3, 0x17e49c17, -0x553e2d85, -0x3339125a, -0x4f0f71aa, -0x209f6d32, 0x2c67c36b, 0x4909b3e2 }
+    },
+    {
+        { 0x706ff64e, 0x59a16676, 0x0d86a53d, 0x10b953dd, -0x31a3f46a, 0x5848e1e6, 0x12780c68, 0x2d8b78e7 },
+        { 0x63fe2e89, -0x63637a16, 0x0e9412ec, -0x41e4506f, -0x79040185, -0x70845576, -0x10697494, 0x0fb17f9f },
+        { -0x503c6fd5, 0x79d5c62e, -0x7617f8d8, 0x773a2152, -0x1efedf47, -0x3c7519c0, 0x7b2b1a6d, 0x09ae2371 }
+    },
+    {
+        { -0x52cd4e30, 0x10ab8fa1, -0x1d8874dc, -0x165312e5, 0x373de90f, -0x577a9440, -0x225ac66a, 0x66f35ddd },
+        { 0x4e4d083c, -0x4495e6d6, 0x0029e192, 0x34ace063, -0x55054515, -0x67dba5a7, -0x25680554, 0x6d9c8a9a },
+        { 0x24997323, -0x2d826505, -0x090fe2d2, 0x1bb7e07e, -0x0ad13381, 0x2ba7472d, 0x646f9dc8, 0x03019b4f }
+    },
+    {
+        { -0x194c2395, -0x50f64dec, -0x5282d09b, 0x3f7573b5, 0x100a23b0, -0x2fe62678, -0x74a3ca09, 0x392b63a5 },
+        { 0x565345cd, 0x04a186b5, -0x433bee96, -0x111899f0, 0x78fb2a45, 0x689c73b4, 0x65697512, 0x387dcbff },
+        { -0x63f83dfb, 0x4093addc, -0x0acd3c82, -0x3a9a41eb, 0x1583402a, 0x63dbecfd, -0x10d1fcd2, 0x61722b4a }
+    },
+    {
+        { -0x7e34f1c4, -0x294f85ab, -0x26bbb697, 0x290ff006, 0x16dcda1f, 0x08680b6a, 0x5a06de59, 0x5568d2b7 },
+        { -0x1342b851, 0x0012aafe, 0x1cd46309, 0x55a266fb, 0x0967c72c, -0x0dfc1498, -0x35c3ebd7, 0x39633944 },
+        { 0x1b37cfe1, -0x72f34774, 0x053818f3, 0x05b6a5a3, -0x487826a7, -0x0d1643fc, -0x6522809c, 0x6beba124 }
+    },
+    {
+        { 0x43f5a53b, 0x5c3cecb9, 0x06c08df2, -0x633659e3, -0x7a76abb9, -0x30459c66, 0x0df09fd5, 0x5a845ae8 },
+        { -0x5a4e4ebd, 0x1d06005c, 0x7fd1cda2, 0x6d4c6bb8, 0x53fcffe7, 0x6ef59676, -0x3e31e15b, 0x097c29e8 },
+        { 0x5deb94ca, 0x4ce97dbe, -0x738f63b8, 0x38d0a438, -0x5e962f69, -0x3bc1312c, -0x081a783d, 0x0a1249ff }
+    },
+},
+{
+    {
+        { 0x7354b610, 0x0b408d9e, 0x5ba85b6e, -0x7f94cdad, 0x4a58a207, -0x2419c5fd, -0x365e20d4, 0x173bd9dd },
+        { 0x276d01c9, 0x12f0071b, -0x793b7390, -0x1847453b, 0x71d6fba9, 0x5308129b, 0x5a3db792, 0x5d88fbf9 },
+        { -0x01a78d21, 0x2b500f1e, -0x2bc6e73f, 0x58d6582e, -0x3698c520, -0x1912d872, -0x4e615ce7, 0x06e1cd13 }
+    },
+    {
+        { -0x61a4fcad, 0x472baf62, 0x278d0447, 0x3baa0b90, -0x69bc40d9, 0x0c785f46, -0x727c84ed, 0x7f3a6a1a },
+        { 0x6f166f23, 0x40d0ad51, 0x1fab6abe, 0x118e3293, -0x5fb2f772, 0x3fe35e14, 0x26e16266, 0x30806035 },
+        { 0x5d3d800b, -0x0819bbc7, -0x36fe120a, -0x6a572aab, 0x592c6339, 0x68cd7830, 0x2e51307e, 0x30d0fded }
+    },
+    {
+        { 0x68b84750, -0x634b68e2, 0x6664bbcf, -0x5f6a8dd7, 0x72fa412b, 0x5c8de726, 0x51c589d9, 0x46150843 },
+        { -0x0dedcc4d, -0x1fa6b2e6, -0x0f33b264, 0x1bdbe78e, -0x70b66589, 0x6965187f, 0x2c099868, 0x0a921420 },
+        { -0x51465fd2, -0x436fe640, 0x16034cae, 0x55c7110d, 0x659932ec, 0x0e6df501, -0x6a35a202, 0x3bca0d28 }
+    },
+    {
+        { -0x6133fe41, -0x6397714a, -0x59bb7691, -0x0f437c53, 0x5f7a9fe2, -0x35d26aa1, -0x720d7dbf, 0x4ea8b403 },
+        { 0x3c5d62a4, 0x40f031bc, -0x300f85a0, 0x19fc8b3e, 0x130fb545, -0x67e7c25e, -0x5170ec33, 0x5631dedd },
+        { -0x0e352dfe, 0x2aed460a, -0x5b73117d, 0x46305305, 0x49f11a5f, -0x6ede88bb, 0x542ca463, 0x24ce0930 }
+    },
+    {
+        { -0x020cf47b, 0x3fcfa155, 0x36372ea4, -0x2d08e972, 0x6492f844, -0x4d1f9b22, 0x324f4280, 0x549928a7 },
+        { -0x02f93efa, 0x1fe890f5, 0x5d8810f2, -0x4a3b97cb, 0x6e8caf3e, -0x7d87f702, -0x75f928b5, 0x41d4e3c2 },
+        { 0x63ee1a2e, -0x0d91cd59, -0x2da00216, -0x516e1b49, -0x2e80b297, -0x43c42cc5, -0x3f230096, 0x491b66de }
+    },
+    {
+        { -0x2f259b5f, 0x75f04a8e, 0x67e2284b, -0x12ddd351, 0x1f7b7ba4, -0x7dcb5c87, -0x48fe7499, 0x4cf6b8b0 },
+        { -0x3815cd59, -0x670a4ec3, 0x7e16db98, -0x1c2a0734, -0x340726b9, -0x53f540ae, -0x37a11b54, 0x08f338d0 },
+        { -0x66e58c43, -0x3c7c57df, -0x20cdf386, -0x54d843ff, -0x7b888f9d, -0x3ec2cce5, -0x14f87567, 0x530d4a82 }
+    },
+    {
+        { 0x6c9abf9e, 0x6d697345, 0x4900a880, 0x257fb2fc, -0x373047b0, 0x2bacf412, 0x0cbfbd5b, 0x0db3e7e0 },
+        { -0x1e06b7db, 0x004c3630, -0x7354aca6, 0x7e2d7826, -0x337b0075, -0x38b7dcdd, 0x101770b9, 0x65ea753f },
+        { -0x1df69c9d, 0x3d66fc3e, 0x61b5cb6b, -0x7e29d381, 0x13443b1a, 0x0fbe0442, 0x21e1a1db, 0x02a4ec19 }
+    },
+    {
+        { -0x0e3086a1, -0x0a379e9e, 0x26ee57f2, 0x118c8619, 0x1c063578, 0x17212485, -0x13f98031, 0x36d12b5d },
+        { 0x3b24b8a2, 0x5ce6259a, 0x45afa0b8, -0x47a88534, -0x745f8fc9, -0x33341918, 0x127809bf, 0x3d143c51 },
+        { 0x79154557, 0x126d2791, -0x0387c5f6, -0x2a1b70a4, -0x20e86454, 0x36bdb6e8, 0x5ba82859, 0x2ef51788 }
+    },
+},
+{
+    {
+        { 0x7c6da1e9, 0x1ea43683, 0x1fb9bdbe, -0x063e7651, -0x31a22eab, 0x303001fc, -0x43a841ae, 0x28a7c99e },
+        { -0x2ee1f2b6, -0x7742bc74, 0x43ccf308, 0x30cb610d, -0x6e6c8434, -0x1f65f1c9, 0x25b1720c, 0x4559135b },
+        { -0x172e6163, -0x47026c67, -0x69dbdc01, -0x6f7e6e35, 0x47c742a3, -0x4d46b729, -0x2804bb3c, 0x37f33226 }
+    },
+    {
+        { -0x37de4ee3, 0x33912553, 0x41e301df, 0x66ed42c2, 0x104222fd, 0x066fcc11, -0x3e6de971, 0x307a3b41 },
+        { -0x4aa091f8, 0x0dae8767, 0x5b203a02, 0x4a43b3b3, -0x7f507387, -0x1c8da592, 0x705fa7a3, 0x0f7a7fd1 },
+        { 0x6eb55ce0, -0x7114a2f9, -0x55f26da6, 0x2fc536bf, -0x23493918, -0x417e7cf1, -0x7d8450ae, 0x556c7045 }
+    },
+    {
+        { 0x2bf44406, -0x46b46ffe, -0x006f4acc, -0x542bdc82, -0x050792c6, 0x7600a960, -0x3dcdd11d, 0x2f45abda },
+        { 0x02e9d8b7, -0x71d4ae8d, 0x248714e8, -0x1c1add97, 0x4ca960b5, -0x42b04289, -0x3a135257, 0x6f4b4199 },
+        { -0x37107596, 0x61af4912, 0x43fb6e5e, -0x1a705b02, 0x6fd427cf, -0x4a5033a3, 0x1e1e11eb, 0x6a539328 }
+    },
+    {
+        { 0x149443cf, 0x0fff04fe, -0x79a32229, 0x53cac6d9, 0x531ed1b7, 0x31385b03, -0x532efc63, 0x5846a27c },
+        { -0x5a2e1177, -0x0c25aec7, -0x006c9678, -0x7ebaba84, 0x00e188c4, 0x3f622fed, -0x2474a5c3, 0x0f513815 },
+        { 0x1eb08717, 0x4ff5cdac, -0x6f0d1644, 0x67e8b295, 0x237afa99, 0x44093b5e, -0x78f7474e, 0x0d414bed }
+    },
+    {
+        { 0x294ac9e8, -0x7e77956e, -0x2aaab842, 0x23162b45, 0x03715983, -0x6b3043bc, 0x134bc401, 0x50eb8fdb },
+        { -0x02f18a0a, -0x30497d9b, -0x446f18f9, -0x1ba4c1d8, -0x6006d386, 0x7242a8de, -0x6ccdfd23, 0x685b3201 },
+        { -0x294ccf33, -0x3f48c13a, 0x132faff1, -0x7b1bb7f9, -0x3b5a211f, 0x732b7352, -0x55832d2e, 0x5d7c7cf1 }
+    },
+    {
+        { -0x648c5a9e, 0x33d1013e, 0x48ec26e1, -0x6da310a9, -0x22b97fa8, -0x580319ec, 0x1e9aa438, 0x78b0fad4 },
+        { 0x7a4aafa2, -0x50c4b941, 0x4d40d411, -0x4878fa14, -0x3583ea1d, 0x114f0c6a, -0x56b762b3, 0x3f364faa },
+        { -0x12fa4b78, -0x40a95bcf, -0x63b6a382, -0x5acc1994, -0x780c9ae6, -0x179ad451, 0x59d66c33, 0x02418000 }
+    },
+    {
+        { -0x30c715ff, 0x28350c7d, -0x4d6e854a, 0x7c6cdbc0, -0x7a8f7d09, -0x53183042, -0x5d265e20, 0x4d2845ab },
+        { -0x5c85a41c, -0x314f8802, -0x1a5a1149, -0x249bd0fe, 0x471270b8, -0x3d192f3b, 0x38e4529c, 0x4771b655 },
+        { 0x447070de, -0x44ac8020, 0x6dd557df, -0x3458bbbd, 0x3600dbcb, -0x2c4a5cb9, -0x06002808, 0x4aeabbe6 }
+    },
+    {
+        { -0x3b56370e, 0x6a2134bc, -0x7531d1c9, -0x040702e4, -0x66ee5f46, 0x000ae304, 0x6bc89b9e, 0x046e3a61 },
+        { 0x40d8f78c, 0x4630119e, 0x3c710e11, -0x5fe5643b, -0x76ef2287, 0x486d2b25, -0x24fcdb1b, 0x1e6c47b3 },
+        { -0x0fc6f942, 0x14e65442, -0x1c9d41d6, 0x4a019d54, -0x723dcf39, 0x68ccdfec, -0x509479e4, 0x7cfb7e3f }
+    },
+},
+{
+    {
+        { 0x305b2f51, -0x69114005, -0x776a6948, -0x2c06c753, 0x46d5dd25, -0x0f0ad239, -0x44c5ff6b, 0x57968290 },
+        { -0x73a75124, 0x4637974e, -0x540fbe5c, -0x4610dd05, -0x167f8e76, -0x1e7a26aa, -0x4ebc575a, 0x2f1b78fa },
+        { 0x0a20e101, -0x08e547bd, 0x24f0ec47, -0x0c6c9a73, 0x6ee2eed1, -0x308af658, -0x23d55c1f, 0x7dc43e35 }
+    },
+    {
+        { 0x273e9718, 0x5a782a5c, 0x5e4efd94, 0x3576c699, 0x1f237d3e, 0x0f2ed805, -0x7d2af567, 0x044fb81d },
+        { -0x7782263d, -0x7a69999b, 0x4bb05355, -0x36f064cf, -0x10df864f, -0x391f7208, 0x758cc12f, 0x7ef72016 },
+        { -0x56f81c27, -0x3e20e73b, -0x31b39ca7, 0x57b3371d, -0x4dfe44b7, -0x358fbacc, -0x63cf22d2, 0x7f79823f }
+    },
+    {
+        { 0x68f587ba, 0x6a9c1ff0, 0x0050c8de, 0x0827894e, 0x7ded5be7, 0x3cbf9955, 0x1c06d6f0, 0x64a9b043 },
+        { -0x5c4aec18, -0x7ccb2dc7, -0x46e05728, -0x3ec98f2c, -0x0a6f42cd, 0x12b54136, -0x287b264c, 0x0a4e0373 },
+        { 0x5b7d2919, 0x2eb3d6a1, -0x2ac57dcb, -0x4f4b0960, -0x765ba2b9, 0x7156ce43, -0x31e7cb94, 0x071a7d0a }
+    },
+    {
+        { 0x20e14431, -0x33f3caae, 0x09b15141, 0x0d659507, 0x209d5f36, -0x650a9de5, 0x617755d3, 0x7c69bcf7 },
+        { -0x377845f5, -0x2cf8d256, -0x405a9d12, 0x01262905, -0x3f108975, -0x30abcffe, 0x46ea7e9c, 0x2c3bcc71 },
+        { 0x04e8295f, 0x07f0d7eb, 0x2f50f37d, 0x10db1825, 0x171798d7, -0x16ae565d, 0x22aca51d, 0x6f5a9a73 }
+    },
+    {
+        { -0x5c26bb42, -0x18d62b15, -0x7f875062, -0x7261f6c0, 0x47869c03, 0x4525567a, -0x1172c4dc, 0x02ab9680 },
+        { 0x2f41c6c5, -0x745efff4, 0x0cfefb9b, -0x3b60863f, 0x3cc51c9f, 0x4efa4770, -0x1eb85036, 0x494e21a2 },
+        { -0x221af266, -0x105b757b, 0x0fb9a249, 0x219a224e, -0x26e10927, -0x05f6e0e3, -0x15b944cc, 0x6b5d76cb }
+    },
+    {
+        { 0x1e782522, -0x1f06bee9, 0x036936d3, -0x0e19518c, -0x2f0338ba, 0x408b3ea2, 0x03dd313e, 0x16fb869c },
+        { -0x13f3266c, -0x77a8aa94, 0x5cd01dba, 0x6472dc6f, -0x70bd4b89, -0x50fe96ec, -0x7ad88cac, 0x0ae333f6 },
+        { 0x33b60962, 0x288e1997, -0x27541ecd, 0x24fc72b4, 0x0991d03e, 0x4811f7ed, -0x708f2f8b, 0x3f81e38b }
+    },
+    {
+        { 0x5f17c824, 0x0adb7f35, -0x28bd665c, 0x74b923c3, -0x34071509, -0x2a83c175, 0x4cdedc3d, 0x0ad3e2d3 },
+        { 0x7ed9affe, 0x7f910fcc, 0x2465874b, 0x545cb8a1, 0x4b0c4704, -0x57c6812e, 0x04f50993, 0x50510fc1 },
+        { 0x336e249d, 0x6f0c0fc5, -0x3cce3027, 0x745ede19, 0x09eefe1c, -0x0d290300, -0x0f05e142, 0x127c158b }
+    },
+    {
+        { -0x51ae468c, -0x215d703c, 0x744dfe96, 0x1d9973d3, -0x78c7b758, 0x6240680b, -0x2e98206b, 0x4ed82479 },
+        { 0x2e9879a2, -0x09e683be, 0x52ca3647, -0x5bb5222c, 0x4b4eaccb, -0x64bec03f, 0x07ef4f68, 0x354ef87d },
+        { 0x60c5d975, -0x011c4ade, -0x14be4f48, 0x50352efc, -0x56099ac4, -0x77f753d0, 0x0539236d, 0x302d92d2 }
+    },
+},
+{
+    {
+        { 0x0df53c30, -0x6a847475, -0x719f0f68, 0x2a1c770a, 0x345796de, -0x44385990, -0x6f366437, 0x22a48f9a },
+        { -0x34c10484, 0x4c59023f, -0x39c3d56c, 0x6c2fcb99, -0x3c381f7c, -0x45be6f1e, -0x5ae78b27, 0x0e545dae },
+        { -0x72c053a8, 0x6b7dc0dc, -0x191bd403, 0x5497cd6c, -0x0bff2cfb, 0x542f7d1b, 0x048d9136, 0x4159f47f }
+    },
+    {
+        { -0x442db7c7, 0x748515a8, -0x504fd4ab, 0x77128347, 0x49a2a17f, 0x50ba2ac6, 0x3ad730f1, 0x06052551 },
+        { 0x39e31e32, 0x20ad6608, -0x7bfa41b0, -0x07e1e42b, -0x0b254397, -0x07f9bfaa, -0x318e468b, 0x14d23dd4 },
+        { -0x755d807e, -0x0dc671f7, -0x765e4fdc, 0x6d7982bb, 0x214dd24c, -0x0596bf7c, -0x5cdcfe3d, 0x71ab966f }
+    },
+    {
+        { 0x02809955, -0x4ef775f9, 0x0b43c391, 0x43b273ea, -0x01f97913, -0x35649852, -0x7cca0b13, 0x605eecbf },
+        { 0x4ded02fc, 0x2dcbd8e3, 0x596f22aa, 0x1151f3ec, 0x4e0328da, -0x435daabd, -0x6dbee4de, 0x35768fbe },
+        { 0x6c340431, -0x7cdff59b, -0x711a63d1, -0x60328e99, 0x71300f8a, 0x75d4613f, 0x60f542f9, 0x7a912faf }
+    },
+    {
+        { -0x05d2aa69, 0x253f4f8d, 0x5477130c, 0x25e49c40, -0x6694eefe, 0x00c052e5, 0x33bb6c4a, 0x33cb966e },
+        { 0x5edc1a43, -0x4dfba7a2, 0x5897c73c, -0x60f1e912, 0x4e70483c, 0x5b82c0ae, 0x2bddf9be, 0x624a170e },
+        { 0x7f116909, 0x59702804, 0x1e564467, -0x7d753be4, -0x19de8c79, 0x70417dbd, -0x0453bc7c, 0x721627ae }
+    },
+    {
+        { 0x410b2f22, -0x02cf6844, -0x4a3057bc, -0x0e5fa259, -0x10a8358c, 0x61289a1d, -0x447de6fe, 0x245ea199 },
+        { -0x78c9522b, -0x682fc43d, -0x3acd4ed0, 0x2f1422af, 0x7101bbc4, 0x3aa68a05, -0x18b06059, 0x4c946cf7 },
+        { 0x78d477f8, -0x51235997, 0x29117fe1, 0x1898ba3c, 0x720cbd58, -0x308c067d, -0x474a9caf, 0x67da12e6 }
+    },
+    {
+        { -0x7137cf74, 0x2b7ef3d3, 0x71eb94ab, -0x7d702814, -0x3af9d543, -0x7f83c4ca, 0x31a94141, 0x0cb64cb8 },
+        { -0x4b4291f9, 0x7067e187, -0x382e018c, 0x6e8f0203, 0x38c85a30, -0x6c3955d1, 0x3d75a78a, 0x76297d1f },
+        { 0x534c6378, 0x3030fc33, -0x1abe179f, -0x469ca3a4, -0x264d38d8, 0x15d9a9be, -0x0c88a235, 0x49233ea3 }
+    },
+    {
+        { 0x1c9f249b, 0x7b3985fe, -0x5edccd6d, 0x4fd6b2d5, 0x1adf4d62, -0x314cba6c, 0x542de50c, 0x6987ff6f },
+        { -0x724003c6, 0x629398fa, -0x2ab24bab, -0x1ed01ad3, -0x250dad6b, -0x0c41ee21, -0x31a184af, 0x628b140d },
+        { -0x707c8ac4, 0x47e24142, -0x79950669, 0x6317bebc, 0x3d1a9829, -0x2544a4bd, 0x5287fb2d, 0x074d8d24 }
+    },
+    {
+        { -0x3f1ceb78, 0x481875c6, -0x1ddfcb4c, 0x219429b2, 0x31283b65, 0x7223c98a, 0x342277f9, 0x3420d60b },
+        { 0x440bfc31, -0x7cc82633, -0x50ce7029, 0x729d2ca1, 0x772c2070, -0x5fbf5b5c, 0x3a7349be, 0x46002ef0 },
+        { -0x50019a09, -0x055dc522, 0x5be0764c, 0x78261ed4, 0x2f164403, 0x441c0a1e, 0x7a87d395, 0x5aea8e56 }
+    },
+},
+{
+    {
+        { -0x1b1f0e89, 0x2dbc6fb6, -0x5b42956d, 0x04e1bf29, 0x787af6e8, 0x5e1966d4, -0x4bd92fa0, 0x0edc5f5e },
+        { -0x435bd7c3, 0x7813c1a2, -0x5e79c227, -0x129d0f6f, -0x3d97057a, -0x51384348, 0x6f1cae4c, 0x10e5d3b7 },
+        { 0x53da8e67, 0x5453bfd6, 0x24a9f641, -0x1623e114, 0x03578a23, -0x4078d9c5, 0x361cba72, 0x45b46c51 }
+    },
+    {
+        { -0x75801c1c, -0x3162b223, 0x76620e30, -0x54ec9baa, -0x4cf166a8, 0x4b594f7b, 0x321229df, 0x5c1c0aef },
+        { 0x314f7fa1, -0x56bfd541, -0x71730bb0, -0x1da80e24, 0x23a8be84, 0x1dbbd54b, 0x6dcb713b, 0x2177bfa3 },
+        { -0x05862471, 0x37081bbc, -0x3da0a64d, 0x6048811e, -0x637cdb79, 0x087a7665, 0x7d8ab5bb, 0x4ae61938 }
+    },
+    {
+        { -0x67a4047d, 0x61117e44, 0x71963136, -0x031fb9d6, -0x2bda6fb5, -0x7c53cbb8, 0x5ba43d64, 0x75685abe },
+        { 0x5344a32e, -0x72240956, -0x4be4bf88, 0x7d88eab4, 0x4a130d60, 0x5eb0eb97, 0x17bf3e03, 0x1a00d91b },
+        { -0x149e0d4e, 0x6e960933, -0x3600b6ae, 0x543d0fa8, 0x7af66569, -0x208d8af0, 0x23b0e6aa, 0x135529b6 }
+    },
+    {
+        { -0x1dd17c02, -0x0a38e944, -0x17f67a3f, -0x4bd414e7, 0x14254aae, -0x136259c9, 0x1590a613, 0x5972ea05 },
+        { -0x522e2ae8, 0x18f0dbd7, -0x303ee0ef, -0x68608778, 0x7114759b, -0x78cd1e10, 0x65ca3a01, 0x79b5b81a },
+        { -0x237087ef, 0x0fd4ac20, -0x53b2b058, -0x65652d6c, -0x4cc9fbcc, -0x3fe4d29c, -0x6fa0c425, 0x4f7e9c95 }
+    },
+    {
+        { 0x355299fe, 0x71c8443d, -0x24141529, -0x7432c4e4, -0x0e5b6b9a, -0x7f6db662, -0x5ebb5238, 0x1942eec4 },
+        { 0x5781302e, 0x62674bbc, -0x765223f1, -0x27adf0c7, 0x53fbd9c6, -0x73d66652, 0x2e638e4c, 0x31993ad9 },
+        { -0x51dcb66e, 0x7dac5319, 0x0cea3e92, 0x2c1b3d91, 0x253c1122, 0x553ce494, 0x4ef9ca75, 0x2a0a6531 }
+    },
+    {
+        { 0x3c1c793a, -0x30c9e533, 0x5a35bc3b, 0x2f9ebcac, -0x57325955, 0x60e860e9, 0x6dea1a13, 0x055dc39b },
+        { -0x0806d83e, 0x2db7937f, 0x17d0a635, -0x248be0fa, 0x1155af76, 0x5982f3a2, 0x647c2ded, 0x4cf6e218 },
+        { -0x3d72a44a, -0x4ee6dd84, 0x774dffab, 0x07e24ebc, -0x1b5cd377, -0x57c38732, 0x10aa24b6, 0x121a3077 }
+    },
+    {
+        { -0x388b7c37, -0x29a68ec2, -0x47d46951, -0x77401f89, 0x1097bcd3, 0x289e2823, 0x6ced3a9b, 0x527bb94a },
+        { -0x60fcb569, -0x1b24a2a2, 0x3034bc2d, -0x1eac03f7, -0x6aae2c4f, 0x46054691, 0x7a40e52d, 0x333fc76c },
+        { -0x66a4b7d2, 0x563d992a, 0x6e383801, 0x3405d07c, 0x2f64d8e5, 0x485035de, 0x20a7a9f7, 0x6b89069b }
+    },
+    {
+        { -0x4a382489, 0x4082fa8c, -0x38cb3eab, 0x068686f8, -0x09185a82, 0x29e6c8d9, -0x589c6431, 0x0473d308 },
+        { 0x6270220d, -0x7ed55fbf, -0x06dba4b2, -0x66a57606, 0x5072ef05, -0x00523b32, -0x558c148d, 0x23bc2103 },
+        { 0x03589e05, -0x351186da, 0x46dcc492, 0x2b4b4212, -0x19fe56b1, 0x02a1ef74, -0x21fbcbe6, 0x102f73bf }
+    },
+},
+{
+    {
+        { -0x6c5c9db9, 0x358ecba2, -0x4d97029b, -0x5070679e, 0x68a01c89, 0x412f7e99, -0x328abadc, 0x5786f312 },
+        { 0x7ec20d3e, -0x4a5d2af4, -0x5f368d9d, -0x39b42292, -0x3e008cb3, 0x56e89052, 0x2b2ffaba, 0x4929c6f7 },
+        { -0x35ebfcd4, 0x337788ff, 0x447f1ee3, -0x0c6defd8, 0x231bccad, -0x74ebf8e1, -0x0dcbb87d, 0x4c817b4b }
+    },
+    {
+        { -0x5bf4bb7c, 0x413ba057, 0x4f5f6a43, -0x45b3d1e6, -0x511e29e4, 0x614ba0a5, -0x74fa23ad, 0x78a1531a },
+        { 0x2871b96e, 0x0ff85385, 0x60c3f1bb, -0x1ec16055, 0x25344402, -0x1102a6ad, 0x75b7744b, 0x0a37c370 },
+        { 0x3ad0562b, 0x6cbdf170, -0x36dade5d, -0x7130b7d0, -0x027bdb19, -0x25142cfd, 0x2e5ec56f, 0x72ad82a4 }
+    },
+    {
+        { 0x67024bc3, -0x3c976c6f, 0x49502fda, -0x71962e93, -0x1ba0b4d7, -0x030d13c4, -0x5c4b343c, 0x065f669e },
+        { -0x45049a0a, 0x3f9e8e35, -0x0d8d6c5f, 0x39d69ec8, -0x73095c30, 0x6cb8cd95, 0x73adae6d, 0x17347781 },
+        { 0x5532db4d, -0x75ff5139, 0x43e31bb1, -0x47965b1c, -0x2c580aeb, 0x4a0f8552, 0x303d7c08, 0x19adeb7c }
+    },
+    {
+        { 0x43c31794, -0x62fa4583, -0x6ccddada, 0x2470c8ff, 0x16197438, -0x7cdc2138, -0x7ea964ad, 0x28527098 },
+        { 0x53ead9a3, -0x38df349f, 0x512b636e, 0x55b2c97f, -0x2bfd6f4f, -0x4e1ca4a1, 0x3b530ee2, 0x2fd9ccf1 },
+        { 0x47f796b8, 0x07bd475b, 0x542c8f54, -0x2d384fed, 0x3b24f87e, 0x2dbd23f4, 0x7b0901d6, 0x6551afd7 }
+    },
+    {
+        { -0x5e2a3654, 0x68a24ce3, 0x10ff6461, -0x44885cc3, 0x25d3166e, 0x0f86ce44, 0x50b9623b, 0x56507c09 },
+        { 0x54aac27f, 0x4546baaf, -0x4d5ba5d8, -0x09099014, 0x562bcfe8, 0x582d1b5b, -0x6df087a1, 0x44b123f3 },
+        { -0x2e8ec19d, 0x1206f0b7, 0x15bafc74, 0x353fe3d9, 0x0ad9d94d, 0x194ceb97, -0x062fc52d, 0x62fadd7c }
+    },
+    {
+        { -0x1831ba6c, 0x3cd7bc61, -0x4822d982, -0x3294ca57, 0x4366ef27, -0x5f7f5438, 0x59c79711, 0x6ec7c46f },
+        { 0x5598a074, -0x394a6985, -0x71b6c1db, 0x5efe91ce, 0x49280888, -0x2b48d3bb, -0x5d98bf3e, 0x20ef1149 },
+        { 0x6f09a8a2, 0x2f07ad63, 0x24205e7d, -0x79681932, -0x11ca5ec7, -0x3f5103fb, -0x4a062769, 0x15e80958 }
+    },
+    {
+        { 0x5bb061c4, 0x4dd1ed35, -0x6be3f900, 0x42dc0cef, -0x0279cbf2, 0x61305dc1, 0x0e55a443, 0x56b2cc93 },
+        { 0x0c3e235b, 0x25a5ef7d, -0x41ecb119, 0x6c39c17f, 0x2dc5c327, -0x388b1ecc, -0x6dfde0c7, 0x021354b8 },
+        { -0x59403a5e, 0x1df79da6, -0x6021bc97, 0x02f3a274, -0x325c6f59, -0x4cdc260e, -0x788b2c9d, 0x7be0847b }
+    },
+    {
+        { 0x5307fa11, 0x1466f5af, -0x1293f50e, -0x7e803383, -0x3c5b5c05, 0x0a6de44e, -0x436d82f5, 0x74071475 },
+        { -0x74c0aa3d, -0x736633a6, 0x3fded2a0, 0x0611d725, 0x36b70a36, -0x12d66a01, -0x2875d9e7, 0x1f699a54 },
+        { 0x73e7ea8a, -0x188d6d0d, -0x34fba5cf, 0x296537d2, -0x2cd8b022, 0x1bd0653e, 0x76bd2966, 0x2f9a2c44 }
+    },
+},
+{
+    {
+        { -0x4aaee366, -0x5d4b2520, 0x2bffff06, 0x7ac86029, -0x0aafbdcc, -0x67e0c8a3, -0x25b15ed3, 0x3f6bd725 },
+        { 0x7f5745c6, -0x14e74655, 0x5787c690, 0x023a8aee, 0x2df7afa9, -0x48d8ed26, -0x15a3fec3, 0x36597d25 },
+        { 0x106058ac, 0x734d8d7b, 0x6fc6905f, -0x26bfa862, -0x6dfd6cd3, 0x6466f8f9, -0x259f2930, 0x7b7ecc19 }
+    },
+    {
+        { -0x58830565, 0x6dae4a51, -0x185c79b0, -0x7dd9c9ac, -0x70d27d25, 0x09bbffcd, 0x1bf5caba, 0x03bedc66 },
+        { 0x695c690d, 0x78c2373c, 0x0642906e, -0x22dad19a, 0x4ae12bd2, -0x6ae2bbbc, 0x01743956, 0x4235ad76 },
+        { 0x078975f5, 0x6258cb0d, -0x6e760d68, 0x49294254, -0x1d1c911c, -0x5f354bdd, -0x320f995f, 0x0e7ce2b0 }
+    },
+    {
+        { -0x26b48f07, -0x01590121, -0x3e0345d3, -0x0ecf3faf, 0x7f2fab89, 0x4882d47e, -0x7513114b, 0x61525613 },
+        { -0x3b737a5d, -0x3b6b9bc6, 0x3c6139ad, -0x02c9e20c, 0x3ae94d48, 0x09db17dd, -0x704b98b6, 0x666e0a5d },
+        { 0x4870cb0d, 0x2abbf64e, -0x55ba7495, -0x329a4310, 0x75e8985d, -0x6541b146, -0x2aeb211c, 0x7f0bc810 }
+    },
+    {
+        { 0x737213a0, -0x7c536253, 0x2ef72e98, -0x60090746, 0x43ec6957, 0x311e2edd, -0x213a548b, 0x1d3a907d },
+        { 0x26f4136f, -0x46ff945c, 0x57e03035, -0x7298c962, 0x4f463c28, -0x34372027, -0x0711240b, 0x0d1f8dbc },
+        { 0x3ed081dc, -0x45e96ccf, -0x7ae4cb80, 0x29329fad, 0x030321cb, 0x0128013c, -0x5ce4021d, 0x00011b44 }
+    },
+    {
+        { 0x6a0aa75c, 0x16561f69, 0x5852bd6a, -0x3e408da4, -0x65869953, 0x11a8dd7f, -0x2d7aefda, 0x63d988a2 },
+        { 0x3fc66c0c, 0x3fdfa06c, 0x4dd60dd2, 0x5d40e38e, 0x268e4d71, 0x7ae38b38, 0x6e8357e1, 0x3ac48d91 },
+        { -0x5042dcd2, 0x00120753, -0x0227097d, -0x16d43148, -0x7b18d46f, -0x07e9964d, 0x2368a066, 0x33fad52b }
+    },
+    {
+        { -0x3bdd3018, -0x72d33730, 0x05a13acb, 0x072b4f7b, -0x13095a91, -0x5c01491a, -0x46f58e1e, 0x3cc355cc },
+        { -0x3a1be1ea, 0x540649c6, 0x333f7735, 0x0af86430, -0x0cfa18ba, -0x4d53032e, -0x5da92359, 0x16c0f429 },
+        { -0x6fc16ecf, -0x16496bbd, 0x7a5637ce, -0x475b6b35, -0x45456dbc, -0x37832e5c, 0x6bae7568, 0x631eaf42 }
+    },
+    {
+        { -0x5c8ff218, 0x47d975b9, -0x1d07faae, 0x7280c5fb, 0x32e45de1, 0x53658f27, 0x665f80b5, 0x431f2c7f },
+        { -0x25990161, -0x4c16fbf0, 0x6c16e5a6, -0x7a22b4ae, 0x1ef9bf83, -0x43c2689f, 0x1ea919b5, 0x5599648b },
+        { -0x7a7084e7, -0x29fd9cbc, -0x5e15aeb6, 0x14ab352f, 0x2090a9d7, -0x76ffbbe6, -0x6edac4da, 0x7b04715f }
+    },
+    {
+        { -0x3b19453a, -0x4c893d80, 0x6d1d9b0b, -0x68f12c23, 0x450bf944, -0x4f656aa8, 0x57cde223, 0x48d0acfa },
+        { -0x530951bd, -0x7c1242d8, 0x7d5c7ab4, -0x79ca8375, -0x4814d3bc, -0x3fbfb897, -0x3d09a7c1, 0x59b37bf5 },
+        { 0x7dabe671, -0x49f0d91c, 0x622f3a37, -0x0e2e5e69, -0x1669fc6c, 0x4208ce7e, 0x336d3bdb, 0x16234191 }
+    },
+},
+{
+    {
+        { 0x3d578bbe, -0x7ad22e03, -0x3cd79ef8, 0x2b65ce72, -0x1531dd8d, 0x658c07f4, -0x13c754c0, 0x0933f804 },
+        { 0x33a63aef, -0x0e651539, 0x4442454e, 0x2c7fba5d, 0x4795e441, 0x5da87aa0, -0x5b1f4f0b, 0x413051e1 },
+        { -0x72b69b8a, -0x58549687, -0x034a5438, -0x7ede5522, 0x7b539472, -0x5a23ed11, 0x5e45351a, 0x07fd4706 }
+    },
+    {
+        { -0x6517183d, 0x30421155, -0x6bb77d5b, -0x0d7e4dd7, 0x378250e4, -0x75ec53d2, 0x54ba48f4, 0x014afa09 },
+        { 0x258d2bcd, -0x37a7c3c3, -0x509f48c1, 0x17029a4d, 0x416a3781, -0x05f0362a, 0x38b3fb23, 0x1c1e5fba },
+        { 0x1bb3666c, -0x34ce6900, 0x4bffecb9, 0x33006052, 0x1a88233c, 0x29371199, 0x3d4ed364, 0x29188436 }
+    },
+    {
+        { -0x43e54915, -0x0462c83d, 0x4d57a240, 0x02be1453, -0x075a1e0a, -0x0b28cbeb, 0x0ccc8188, 0x5964f430 },
+        { -0x23b45406, 0x033c6805, 0x5596ecc1, 0x2c15bf5e, -0x4a64e2c5, 0x1bc70624, -0x5e60f13b, 0x3ede9850 },
+        { 0x2d096800, -0x1bb5dceb, 0x70866996, 0x5c08c559, 0x46affb6e, -0x20d249f6, -0x07a90277, 0x579155c1 }
+    },
+    {
+        { 0x0817e7a6, -0x4a0e949d, 0x3c351026, -0x7f7396dd, 0x54cef201, 0x324a983b, 0x4a485345, 0x53c09208 },
+        { 0x12e0c9ef, -0x69cdb123, -0x0dbdfd69, 0x468b878d, -0x5b0a8c42, 0x199a3776, -0x716e16d6, 0x1e7fbcf1 },
+        { -0x0e345041, -0x2d2beb7f, 0x716174e5, 0x231d2db6, -0x1d5aa368, 0x0b7d7656, 0x2aa495f6, 0x3e955cd8 }
+    },
+    {
+        { 0x61bb3a3f, -0x54c60c11, 0x2eb9193e, -0x714bff9b, 0x38c11f74, -0x4a219134, 0x26f3c49f, 0x654d7e96 },
+        { 0x3ed15433, -0x1b70aca2, 0x0d7270a3, -0x2f8a96d6, -0x55219c79, 0x40fbd21d, -0x30bb6a0b, 0x14264887 },
+        { 0x5c7d2ceb, -0x1a9b3023, -0x28c83347, -0x7d115022, -0x2e064f55, 0x6107db62, -0x4bca7245, 0x0b6baac3 }
+    },
+    {
+        { 0x3700a93b, 0x204abad6, -0x25886c8d, -0x41ffdc2d, 0x633ab709, -0x27a0fcba, -0x6f7dfbee, 0x00496dc4 },
+        { -0x79dd0168, 0x7ae62bcb, -0x31476e51, 0x47762256, -0x0d1bf94c, 0x1a5a92bc, -0x7b1beaff, 0x7d294017 },
+        { -0x3d819ca0, 0x1c74b88d, -0x72eb7af4, 0x07485426, 0x3e0dcb30, -0x5eba0485, 0x43803b23, 0x10843f1b }
+    },
+    {
+        { -0x1cdb9765, -0x2a9098d3, -0x4c6b567f, -0x2e257513, -0x6e973013, -0x2284a702, 0x4d56c1e8, 0x7ce246cd },
+        { 0x376276dd, -0x3a06fbab, -0x289ba327, -0x31a6ea73, 0x1d366b39, -0x6d09a2af, 0x526996c4, 0x11574b6e },
+        { 0x7f80be53, -0x470bcf72, 0x34a9d397, 0x5f3cb8cb, 0x33cc2b2c, 0x18a961bd, 0x3a9af671, 0x710045fb }
+    },
+    {
+        { 0x059d699e, -0x5fc0379e, -0x659e6197, 0x2370cfa1, 0x2f823deb, -0x3b01c4ee, -0x580f7bb2, 0x1d1b056f },
+        { 0x101b95eb, 0x73f93d36, 0x4f6f4486, -0x0510cc87, -0x70ea1a9e, 0x5651735f, 0x58b40da1, 0x7fa3f190 },
+        { -0x1a9409e1, 0x1bc64631, 0x6e5382a3, -0x2c8654f0, 0x0540168d, 0x4d58c57e, -0x7bbd271c, 0x56625662 }
+    },
+},
+{
+    {
+        { 0x1ff38640, -0x22b6632a, 0x063625a0, 0x29cd9bc3, 0x3dd73dc3, 0x51e2d802, 0x203b9231, 0x4a25707a },
+        { -0x09d9800a, -0x461b6622, 0x742c0843, 0x7772ca7b, -0x165b0d4f, 0x23a0153f, -0x2a2faffa, 0x2cdfdfec },
+        { 0x53f6ed6a, 0x2ab7668a, 0x1dd170a1, 0x30424258, 0x3ae20161, 0x4000144c, 0x248e49fc, 0x5721896d }
+    },
+    {
+        { -0x5e2f25b2, 0x285d5091, -0x4a01c1f8, 0x4baa6fa7, -0x1e6c6c4d, 0x63e5177c, -0x3b4fcf03, 0x03c935af },
+        { -0x02e7e452, 0x0b6e5517, 0x2bb963b4, -0x6fdd9d61, 0x32064625, 0x5509bce9, -0x09c3ec26, 0x578edd74 },
+        { 0x492b0c3d, -0x668d893a, -0x201dfa04, 0x47ccc2c4, -0x229dc5c4, -0x232d647c, 0x0288c7a2, 0x3ec2ab59 }
+    },
+    {
+        { -0x51cd2e35, -0x58dec5f7, 0x40f5c2d5, 0x0f2b87df, -0x17e154d7, 0x0baea4c6, 0x6adbac5e, 0x0e1bf66c },
+        { -0x1b278447, -0x5e5f2d85, 0x61391aed, -0x5674b215, 0x73cb9b83, -0x665f2230, 0x200fcace, 0x2dd5c25a },
+        { 0x792c887e, -0x1d542a17, -0x346d92a3, 0x1a020018, -0x4551a0e2, -0x40459633, 0x5ae88f5f, 0x730548b3 }
+    },
+    {
+        { -0x5e291ccc, -0x7fa4f6b5, 0x09353f19, -0x40c10e89, 0x0622702b, 0x423f06cb, -0x2787ba23, 0x585a2277 },
+        { -0x34574712, -0x3bcaae5d, -0x4deea0ea, 0x65a26f1d, -0x5473c7b0, 0x760f4f52, 0x411db8ca, 0x3043443b },
+        { 0x33d48962, -0x5e75a07e, -0x1387da81, 0x6698c4b5, 0x373e41ff, -0x5871905b, 0x50ef981f, 0x76562789 }
+    },
+    {
+        { -0x15793063, -0x1e8f8c5d, 0x07155fdc, 0x3a8cfbb7, 0x31838a8e, 0x4853e7fc, -0x49ec09ea, 0x28bbf484 },
+        { -0x2ae03740, 0x38c3cf59, 0x0506b6f2, -0x64122d03, -0x54a8f171, 0x26bf109f, -0x3e47b95a, 0x3f4160a8 },
+        { 0x6f136c7c, -0x0d9ed0a4, -0x0922ee42, -0x50152ef9, 0x13de6f33, 0x527e9ad2, -0x7e7708a3, 0x1e79cb35 }
+    },
+    {
+        { -0x0a1f7e7f, 0x77e953d8, 0x299dded9, -0x7b5af3bc, -0x79bada1b, -0x2393d2f4, 0x39d1f2f4, 0x478ab52d },
+        { -0x11081c0f, 0x013436c3, -0x0161ef08, -0x7d749581, -0x43062104, 0x7ff908e5, 0x3a3b3831, 0x65d7951b },
+        { -0x6dad2ea7, 0x66a6a4d3, -0x78e537f9, -0x1a221e44, -0x593e3691, -0x47d394c0, 0x1a212214, 0x16d87a41 }
+    },
+    {
+        { -0x2ab1fa7d, -0x045b2a1e, 0x2ebd99fa, -0x1de05029, 0x6ee9778f, 0x497ac273, 0x7a5a6dde, 0x1f990b57 },
+        { 0x42066215, -0x4c4281a6, 0x0c5a24c1, -0x78641c33, -0x29066b49, 0x57c05db1, 0x65f38ca6, 0x28f87c81 },
+        { 0x1be8f7d6, -0x5ccbb153, -0x53158671, 0x7d1e50eb, 0x520de052, 0x77c6569e, 0x534d6d3e, 0x45882fe1 }
+    },
+    {
+        { -0x6bc3901c, -0x275366d7, -0x5c7c6d5e, -0x4a060e9f, -0x4137650d, 0x2699db13, -0x1bfa0f8c, 0x7dcf843c },
+        { 0x757983d6, 0x6669345d, 0x17aa11a6, 0x62b6ed11, -0x67a1ed71, 0x7ddd1857, -0x09d90923, 0x688fe5b8 },
+        { 0x4a4732c0, 0x6c90d648, -0x35a9cd67, -0x2adebc03, -0x6ea2391f, -0x4c41d73d, 0x7327191b, 0x6739687e }
+    },
+},
+{
+    {
+        { -0x363468e1, -0x731a5530, -0x602ab5d7, 0x1156aaa9, 0x15af9b78, 0x41f72470, 0x420f49aa, 0x1fe8cca8 },
+        { 0x200814cf, -0x609a3a16, 0x69a31740, -0x7bfac91f, 0x25c8b4ad, -0x74f12ec7, -0x16c9c9e3, 0x0080dbaf },
+        { 0x3c0cc82a, 0x72a1848f, -0x788361ac, 0x38c560c2, -0x31aabec0, 0x5004e228, 0x03429d71, 0x042418a1 }
+    },
+    {
+        { 0x20816247, 0x58e84c6f, -0x1c90286d, -0x724d4d4a, 0x1d484d85, -0x688e7daa, -0x79cd5429, 0x0822024f },
+        { -0x540c00a1, -0x766215af, 0x2fc2d8ba, -0x646c5799, -0x419142a4, 0x2c38cb97, -0x68d9c4a3, 0x114d5784 },
+        { 0x6b1beca3, -0x4cfe4484, -0x3914ec8b, 0x55393f6d, -0x68491b15, -0x6ef2d7f0, -0x62b8615d, 0x1ad4548d }
+    },
+    {
+        { 0x0fe9fed3, -0x5f901993, 0x1c587909, -0x578cc5c0, 0x0df98953, 0x30d14d80, -0x384cfda8, 0x41ce5876 },
+        { 0x389a48fd, -0x32a58260, -0x6587c8e2, -0x4c705b56, 0x2cdb8e6c, -0x392689e5, -0x3681ebbd, 0x35cf51db },
+        { -0x298f3fde, 0x59ac3bc5, -0x64ee6bfa, -0x151983f0, -0x4c87d026, -0x68674210, -0x02f8bf6e, 0x651e3201 }
+    },
+    {
+        { 0x1efcae9e, -0x5a845b60, -0x23cf756c, 0x769f4bee, 0x3603cb2e, -0x2e0ef115, 0x7e441278, 0x4099ce5e },
+        { -0x10cf3a31, -0x29c27b7d, 0x2361cc0c, 0x4cd4b496, -0x5b7bd954, -0x116f1b00, 0x18c14eeb, 0x0af51d7d },
+        { -0x75aede17, 0x1ac98e4f, -0x2405d020, 0x7dae9544, -0x29bcf207, -0x7cdf55f3, 0x2c4a2fb5, 0x66728265 }
+    },
+    {
+        { 0x2946db23, -0x52574920, 0x7b253ab7, 0x1c0ce51a, 0x66dd485b, -0x7bb737a6, -0x2f98a521, 0x7f1fc025 },
+        { -0x27943655, -0x78b9de0c, 0x56fe6fea, -0x4ab38442, 0x7fadc22c, 0x077a2425, 0x19b90d39, 0x1ab53be4 },
+        { 0x319ea6aa, -0x2711e4e8, 0x3a21f0da, 0x004d8808, -0x77c5b0b5, 0x3bd6aa1d, -0x202602ec, 0x4db9a3a6 }
+    },
+    {
+        { -0x34488398, -0x26a4ff45, -0x6e0e87b7, -0x22437b96, -0x41d7264d, 0x7cf700ae, -0x7a2ce0c2, 0x5ce1285c },
+        { -0x4663f8ab, -0x73184dc5, -0x3b0af086, 0x35c5d6ed, -0x1264af3d, 0x7e1e2ed2, -0x176cb25f, 0x36305f16 },
+        { -0x674f4218, 0x31b6972d, -0x535921a5, 0x7d920706, -0x6f759a61, -0x198cef08, -0x1020fdcb, 0x50fac2a6 }
+    },
+    {
+        { -0x090bb644, 0x295b1c86, 0x1f0ab4dd, 0x51b2e84a, -0x5571aae3, -0x3ffe34d0, 0x44f43662, 0x6a28d359 },
+        { 0x5b880f5a, -0x0c2c560d, -0x24fc183e, -0x1213faf4, -0x060f4e5e, -0x576967e1, -0x53a1cb5c, 0x49a4ae2b },
+        { 0x04a740e0, 0x28bb12ee, -0x64317e8c, 0x14313bbd, -0x173ef3c0, 0x72f5b5e4, 0x36adcd5b, 0x7cbfb199 }
+    },
+    {
+        { -0x33c91920, -0x7186c586, 0x7d586eed, -0x0605485d, -0x451e0b1c, 0x3a4f9692, -0x00a0bb82, 0x1c14b03e },
+        { 0x6b89792d, -0x5cee223e, -0x25aed99c, 0x1b30b4c6, -0x30eaf7a7, 0x0ca77b4c, 0x1b009408, 0x1de443df },
+        { 0x14a85291, 0x19647bd1, 0x1034d3af, 0x57b76cb2, 0x0f9d6dfa, 0x6329db44, 0x6a571493, 0x5ef43e58 }
+    },
+},
+{
+    {
+        { -0x37f3e540, -0x59923363, 0x1b38a436, -0x685fa30c, -0x6a24283a, -0x58140c42, -0x72818255, 0x7da0b8f6 },
+        { 0x385675a6, -0x1087dfec, -0x55025618, -0x5d9b60d0, 0x5cdfa8cb, 0x4cd1eb50, 0x1d4dc0b3, 0x46115aba },
+        { -0x3c4a258a, -0x2bf0e6ad, 0x21119e9b, 0x1dac6f73, -0x014da6a0, 0x03cc6021, -0x7c98b4b5, 0x5a5f887e }
+    },
+    {
+        { -0x5f59bc47, -0x6169d72d, -0x193cdf9c, -0x4a3c3500, 0x7c2dec32, -0x64acfd77, -0x2a2e38f4, 0x43e37ae2 },
+        { 0x70a13d11, -0x709cfe31, 0x350dd0c4, -0x303147eb, -0x5b435b82, -0x08fd682c, -0x1bb2ebcc, 0x3669b656 },
+        { -0x12591ecd, 0x387e3f06, -0x665ec540, 0x67301d51, 0x36263811, -0x42a52708, 0x4fd5e9be, 0x6a21e6cd }
+    },
+    {
+        { 0x6699b2e3, -0x10bed6ee, 0x708d1301, 0x71d30847, 0x1182b0bd, 0x325432d0, 0x001e8b36, 0x45371b07 },
+        { 0x3046e65f, -0x0e39e8f6, 0x00d23524, 0x58712a2a, -0x737d48ab, 0x69dbbd3c, -0x5e6a00a9, 0x586bf9f1 },
+        { 0x5ef8790b, -0x5924f773, 0x610937e5, 0x5278f0dc, 0x61a16eb8, -0x53fcb62e, -0x6f1ade87, 0x0eafb037 }
+    },
+    {
+        { 0x0f75ae1d, 0x5140805e, 0x2662cc30, -0x13fd041d, -0x156dc693, 0x2cebdf1e, -0x3abca44d, 0x44ae3344 },
+        { 0x3748042f, -0x69faaa3f, -0x7df455ef, 0x219a41e6, 0x73486d0c, 0x1c81f738, 0x5a02c661, 0x309acc67 },
+        { -0x445abc12, -0x630d7647, 0x5ac97142, -0x0c89f163, 0x4f9360aa, 0x1d82e5c6, 0x7f94678f, 0x62d5221b }
+    },
+    {
+        { 0x3af77a3c, 0x7585d426, -0x0116ebb3, -0x205184ef, 0x59f7193d, -0x5af98f80, -0x7c6ddfc9, 0x14f29a53 },
+        { 0x18d0936d, 0x524c299c, -0x75f3e5f4, -0x37944a94, -0x24b579cf, -0x5c8afad2, -0x438aba9e, 0x5c0efde4 },
+        { 0x25b2d7f5, -0x208e8124, -0x664acfc0, 0x21f970db, -0x3c12b39e, -0x256dcb49, 0x7bee093e, 0x5e72365c }
+    },
+    {
+        { 0x2f08b33e, 0x7d933906, -0x2060cd42, 0x5b9659e5, 0x1f9ebdfd, -0x5300c253, -0x348cb649, 0x70b20555 },
+        { 0x4571217f, 0x575bfc07, 0x0694d95b, 0x3779675d, -0x0be6e1cd, -0x65f5c845, 0x47b4eabc, 0x77f1104c },
+        { 0x55112c4c, -0x41aeec3b, -0x6577e033, 0x6688423a, 0x5e503b47, 0x44667785, 0x4a06404a, 0x0e34398f }
+    },
+    {
+        { 0x3e4b1928, 0x18930b09, 0x73f3f640, 0x7de3e10e, 0x73395d6f, -0x0bcde826, -0x35c863c2, 0x6f8aded6 },
+        { 0x3ecebde8, -0x4982dd27, 0x27822f07, 0x09b3e841, -0x4fa49273, 0x743fa61f, -0x75c9dc8e, 0x5e540536 },
+        { -0x02484d66, -0x1cbfedc3, -0x5de54d6f, 0x487b97e1, -0x02196b62, -0x066982fe, -0x372c2169, 0x780de72e }
+    },
+    {
+        { 0x00f42772, 0x671feaf3, 0x2a8c41aa, -0x708d14d6, -0x68c8cd6e, 0x29a17fd7, 0x32b587a6, 0x1defc6ad },
+        { 0x089ae7bc, 0x0ae28545, 0x1c7f4d06, 0x388ddecf, 0x0a4811b8, 0x38ac1551, 0x71928ce4, 0x0eb28bf6 },
+        { -0x10ae6a59, -0x50a441e6, -0x6e84ea13, 0x148c1277, 0x7ae5da2e, 0x2991f7fb, -0x0722d799, 0x467d201b }
+    },
+},
+{
+    {
+        { 0x296bc318, 0x745f9d56, -0x27ead19b, -0x66ca7f2c, 0x5839e9ce, -0x4f1a4ec1, -0x2bc6de40, 0x51fc2b28 },
+        { -0x0842d195, 0x7906ee72, 0x109abf4e, 0x05d270d6, -0x46be575c, -0x72a301bb, 0x1c974287, 0x44c21867 },
+        { -0x6a1d5674, 0x1b8fd117, 0x2b6b6291, 0x1c4e5ee1, 0x7424b572, 0x5b30e710, 0x4c4f4ac6, 0x6e6b9de8 }
+    },
+    {
+        { -0x07f34f78, 0x6b7c5f10, 0x56e42151, 0x736b54dc, -0x3910663c, -0x3d49df5b, -0x3c5f90be, 0x5f4c802c },
+        { 0x4b1de151, -0x200da032, -0x1ee3bfdb, -0x27be3f39, 0x54749c87, 0x2554b3c8, -0x6f71f207, 0x2d292459 },
+        { 0x7d0752da, -0x649a370f, -0x38811800, -0x77e31cc8, 0x5b62f9e3, -0x3c4aeb10, -0x413ef2b8, 0x66ed5dd5 }
+    },
+    {
+        { -0x3435fb83, -0x0f520c37, -0x0baad095, -0x7e3c4d35, 0x44735f93, -0x3025eed3, 0x7e20048c, 0x1f23a0c7 },
+        { 0x0bb2089d, 0x7d38a1c2, -0x69332bee, -0x7f7ccb1f, 0x6c97d313, -0x3b58f474, 0x03007f20, 0x2eacf8bc },
+        { -0x1a43ea90, -0x0dcab985, 0x0dbab38c, 0x03d2d902, -0x03061f62, 0x27529aa2, -0x62cb43b0, 0x0840bef2 }
+    },
+    {
+        { 0x7f37e4eb, -0x32ab1f95, -0x0a169336, -0x733ea079, -0x2ca68232, -0x47db7450, 0x6074400c, 0x246affa0 },
+        { -0x23ef4d79, 0x796dfb35, 0x5c7ff29d, 0x27176bcd, -0x384db6fb, 0x7f3d43e8, -0x6e3abd8a, 0x0304f5a1 },
+        { -0x041bacdf, 0x37d88e68, -0x3f28afce, -0x79f68ab8, -0x76b5f2cb, 0x4e9b13ef, 0x5753d325, 0x25a83cac }
+    },
+    {
+        { 0x3952b6e2, -0x60f099d7, 0x0934267b, 0x33db5e0e, -0x29f60124, -0x00badad5, -0x3af91f37, 0x06be10f5 },
+        { -0x1127e9a2, 0x10222f48, 0x4b8bcf3a, 0x623fc123, -0x3dde1710, 0x1e145c09, -0x3587d9d0, 0x7ccfa59f },
+        { -0x49d5cba1, 0x1a9615a9, 0x4a52fecc, 0x22050c56, 0x28bc0dfe, -0x585d877b, 0x1a1ee71d, 0x5e82770a }
+    },
+    {
+        { 0x42339c74, -0x17fd17f6, -0x5800051b, 0x34175166, 0x1c408cae, 0x34865d1f, 0x605bc5ee, 0x2cca982c },
+        { -0x527695a4, 0x35425183, -0x1872ad0a, -0x1798c505, -0x6d5ca09c, 0x2c66f25f, 0x3b86b102, 0x09d04f3b },
+        { 0x197dbe6e, -0x02d2a2cb, -0x741b005d, 0x207c2eea, 0x325ae918, 0x2613d8db, 0x27741d3e, 0x7a325d17 }
+    },
+    {
+        { 0x7e2a076a, -0x132d82ff, 0x1636495e, -0x28779761, -0x6e6dcc1b, 0x52a61af0, 0x7bb1ae64, 0x2a479df1 },
+        { -0x2e92021e, -0x2fc94645, -0x3b6857d7, -0x5dfaa8a9, -0x580ed999, -0x7193369a, 0x1239c180, 0x4d3b1a79 },
+        { 0x33db2710, -0x61a11172, -0x293bc35b, 0x189854de, -0x6d8e7ec8, -0x5be3dd3b, -0x5bc5a165, 0x27ad5538 }
+    },
+    {
+        { -0x71b8f884, -0x34a5829d, 0x20a1c059, -0x7248ac9f, -0x74120234, 0x549e1e4d, 0x503b179d, 0x080153b7 },
+        { 0x15350d61, 0x2746dd4b, -0x116ade49, -0x2fc03438, 0x138672ca, -0x1791c9a6, 0x7e7d89e2, 0x510e987f },
+        { 0x0a3ed3e3, -0x2259626d, -0x329f58de, 0x3d386ef1, -0x4255b11a, -0x37e852a8, 0x4fe7372a, 0x23be8d55 }
+    },
+},
+{
+    {
+        { 0x567ae7a9, -0x43e10b43, -0x29bb6743, 0x3f624cb2, 0x2c1f4ec8, -0x1bef9b2e, -0x45c7bfff, 0x2ef9c5a5 },
+        { 0x74ef4fad, -0x6a016e66, -0x095cf75e, 0x3a827bec, 0x09a47b01, -0x69b1fe2d, 0x5ba3c797, 0x71c43c4f },
+        { -0x05618b33, -0x4902920a, -0x1b50d986, -0x0e7d8744, -0x0e1066f2, -0x7daa4c30, -0x6f3a0d6d, 0x5a758ca3 }
+    },
+    {
+        { 0x1d61dc94, -0x731f6e75, -0x657ecf9a, -0x7212c9ba, -0x5017552d, -0x2b1957d7, -0x09c62bc1, 0x0a738027 },
+        { -0x26b9db6b, -0x5d48d8f0, -0x2a82affd, 0x3aa8c6d2, -0x5f4b7836, -0x1c2bff41, -0x4c148d14, 0x2dbae244 },
+        { 0x57ffe1cc, -0x67f0b5d1, -0x1e7c67bd, 0x00670d0d, 0x49fb15fd, 0x105c3f4a, 0x5126a69c, 0x2698ca63 }
+    },
+    {
+        { 0x5e3dd90e, 0x2e3d702f, -0x1b2dac7a, -0x61c0f6e8, 0x024da96a, 0x5e773ef6, 0x4afa3332, 0x3c004b0c },
+        { 0x32b0ba78, -0x189ace78, -0x6da30075, 0x381831f7, -0x5fd6e034, 0x08a81b91, 0x49caeb07, 0x1fb43dcc },
+        { 0x06f4b82b, -0x6556b954, -0x57f93b0d, 0x1ca284a5, -0x3932b879, 0x3ed3265f, -0x32e02de9, 0x6b43fd01 }
+    },
+    {
+        { 0x3e760ef3, -0x4a38bda8, -0x11f54670, 0x75dc52b9, 0x072b923f, -0x40ebd83e, 0x6ff0d9f0, 0x73420b2d },
+        { 0x4697c544, -0x3858a2b5, -0x20f00041, 0x15fdf848, -0x55b987a6, 0x2868b9eb, 0x5b52f714, 0x5a68d710 },
+        { -0x617ae1fa, -0x50d30935, -0x39ddc73c, -0x70a6c6ed, -0x66040c8d, -0x2575476a, -0x15cb4362, 0x3db5632f }
+    },
+    {
+        { -0x7d67da2b, 0x2e4990b1, 0x3e9a8991, -0x12151479, 0x4c704af8, -0x110fc2c7, -0x6a20d4f2, 0x59197ea4 },
+        { -0x08a22628, -0x0b9111d5, 0x396759a5, 0x0d17b1f6, 0x499e7273, 0x1bf2d131, 0x49d75f13, 0x04321adf },
+        { -0x1b1aa552, 0x04e16019, 0x7e2f92e9, -0x1884bc86, 0x6f159aa4, -0x3831d23f, -0x0b28f340, 0x45eafdc1 }
+    },
+    {
+        { -0x30334e13, -0x49f1b9dc, -0x42a3fc6b, 0x59dbc292, -0x23fb7e37, 0x31a09d1d, 0x5d56d940, 0x3f73ceea },
+        { -0x7fba28d5, 0x69840185, -0x30d0f9af, 0x4c22faa2, 0x6b222dc6, -0x6be5c99b, 0x0362dade, 0x5a5eebc8 },
+        { 0x0a4e8dc6, -0x4858402f, 0x44c9b339, -0x41a8ff82, 0x1557aefa, 0x60c1207f, 0x266218db, 0x26058891 }
+    },
+    {
+        { -0x39891abe, 0x4c818e3c, 0x03ceccad, 0x5e422c93, -0x4bed60f8, -0x13f83336, -0x4dbbbc48, 0x0dedfa10 },
+        { -0x7c9f00fc, 0x59f704a6, 0x7661e6f4, -0x3c26c022, 0x12873551, -0x7ce4d58d, 0x4e615d57, 0x54ad0c2e },
+        { -0x47d4add6, -0x11c4982b, -0x605a3e15, 0x36f16346, 0x6ec19fd3, -0x5a4b2d0e, -0x58856bf8, 0x62ecb2ba }
+    },
+    {
+        { -0x5049d78c, -0x6df8d7ca, 0x79e104a5, 0x5fcd5e85, -0x39cf5eb6, 0x5aad01ad, 0x75663f98, 0x61913d50 },
+        { 0x61152b3d, -0x1a1286ae, 0x0eddd7d1, 0x4962357d, -0x4694b38f, 0x7482c8d0, -0x56992742, 0x2e59f919 },
+        { 0x1a3231da, 0x0dc62d36, -0x6bdffd90, -0x05b8a7ce, 0x3f9594ce, 0x02d80151, 0x31c05d5c, 0x3ddbc2a1 }
+    },
+},
+{
+    {
+        { 0x004a35d1, -0x048ca53e, 0x3a6607c3, 0x31de0f43, -0x3ad72a67, 0x7b8591bf, -0x0a44faf4, 0x55be9a25 },
+        { 0x4ffb81ef, 0x3f50a50a, 0x3bf420bf, -0x4e1fcaf7, -0x3955d330, -0x645571e4, -0x05dc85c0, 0x32239861 },
+        { 0x33db3dbf, 0x0d005acd, -0x7f53ca1e, 0x0111b37c, 0x6f88ebeb, 0x4892d66c, 0x6508fbcd, 0x770eadb1 }
+    },
+    {
+        { -0x5faf8e47, -0x0e2c497f, 0x3592ff3a, 0x2207659a, 0x7881e40e, 0x5f016929, -0x7945c8b2, 0x16bedd0e },
+        { 0x5e4e89dd, -0x7bae0620, -0x4386c6c9, -0x3f9cfd01, 0x56a6495c, 0x5d227495, -0x5fa9fc05, 0x09a6755c },
+        { 0x2c2737b5, 0x5ecccc4f, 0x2dccb703, 0x43b79e0c, 0x4ec43df3, 0x33e008bc, -0x0f8a9940, 0x06c1b840 }
+    },
+    {
+        { -0x64fd7fa4, 0x69ee9e7f, 0x547d1640, -0x34007d76, -0x4dbcf698, 0x3d93a869, 0x3fe26972, 0x46b7b8cd },
+        { -0x5c770789, 0x7688a5c6, -0x214d4954, 0x02a96c14, 0x1b8c2af8, 0x64c9f343, 0x54a1eed6, 0x36284355 },
+        { -0x01811420, -0x167edf7a, 0x2f515437, 0x4cba6be7, 0x516efae9, 0x1d04168b, 0x43982cb9, 0x5ea13910 }
+    },
+    {
+        { -0x2a2c4ffe, 0x6f2b3be4, 0x6a09c880, -0x5013cc27, -0x57433b34, 0x035f73a4, 0x4662198b, 0x22c5b928 },
+        { -0x0b8fd11f, 0x49125c9c, -0x74da4cd3, 0x4520b71f, 0x501fef7e, 0x33193026, -0x372d14d5, 0x656d8997 },
+        { 0x433d8939, -0x34a73702, 0x6a8d7e50, -0x765f34d2, 0x09fbbe5a, 0x79ca9553, -0x32803efa, 0x0c626616 }
+    },
+    {
+        { -0x040bab4f, -0x70203c87, -0x0e5b488f, 0x45a5a970, -0x452ca6eb, -0x536de109, -0x57e3de6e, 0x42d088dc },
+        { 0x4879b61f, 0x1ffeb80a, 0x4ada21ed, 0x6396726e, 0x368025ba, 0x33c7b093, -0x0c3ce878, 0x471aa0c6 },
+        { -0x5fe9ae67, -0x7025f0c9, -0x375f1cbd, 0x0adadb77, -0x378a17e0, 0x20fbfdfc, 0x0c2206e7, 0x1cf2bea8 }
+    },
+    {
+        { 0x02c0412f, -0x67d291e6, -0x24a71702, -0x6f05b37d, -0x234e7440, 0x01c2f5bc, 0x216abc66, 0x686e0c90 },
+        { -0x4c9dfd54, -0x3d220e22, -0x2d1d855b, -0x6d5a01f7, -0x03f60e2d, 0x7d1648f6, 0x13bc4959, 0x74c2cc05 },
+        { -0x5abc6a59, 0x1fadbadb, -0x51f25996, -0x4be5fd60, -0x445c83f9, -0x40e60a68, -0x21b7bcf3, 0x6a12b8ac }
+    },
+    {
+        { 0x1aaeeb5f, 0x793bdd80, -0x3eae778f, 0x00a2a0aa, 0x1f2136b4, -0x175c8c5d, -0x036e10e7, 0x48aab888 },
+        { 0x39d495d9, -0x072515e1, 0x525f1dfc, 0x592c190e, -0x3666e2e5, -0x247342fc, -0x2770f349, 0x11f7fda3 },
+        { 0x5830f40e, 0x041f7e92, 0x79661c06, 0x002d6ca9, 0x2b046a2e, -0x79236007, -0x74fb6c2f, 0x76036092 }
+    },
+    {
+        { 0x695a0b05, -0x4bcef71b, -0x52c85c75, 0x6cb00ee8, -0x5cac8c7f, 0x5edad6ee, -0x4923cddc, 0x3f2602d4 },
+        { 0x120cf9c6, 0x21bb41c6, -0x21325a65, -0x154d55ee, 0x0aa48b34, -0x3e58d2fe, -0x1782c498, 0x215d4d27 },
+        { 0x5bcaf19c, -0x374db84a, -0x4e4d39ae, 0x49779dc3, -0x2a131d1e, -0x765e7f45, -0x31371fc7, 0x13f098a3 }
+    },
+},
+{
+    {
+        { 0x2796bb14, -0x0c55a85e, -0x64f825df, -0x77c54549, 0x31a0391c, -0x1ab41de8, -0x27cdfa07, 0x5ee7fb38 },
+        { -0x31a13ab5, -0x6523f007, -0x73d0ecf3, 0x039c2a6b, -0x0f076aeb, 0x028007c7, -0x53fb4c95, 0x78968314 },
+        { 0x41446a8e, 0x538dfdcb, 0x434937f9, -0x5a530257, 0x263c8c78, 0x46af908d, -0x6435f2f7, 0x61d0633c }
+    },
+    {
+        { -0x07038c21, -0x525cd744, -0x590fc804, -0x117b96a3, 0x38c2a909, 0x637fb4db, -0x07f98424, 0x5b23ac2d },
+        { -0x0024da9a, 0x63744935, 0x780b68bb, -0x3a429477, 0x553eec03, 0x6f1b3280, 0x47aed7f5, 0x6e965fd8 },
+        { -0x117fad85, -0x652d46ad, -0x05219273, -0x1770e656, 0x150e82cf, 0x0e711704, -0x226a2124, 0x79b9bbb9 }
+    },
+    {
+        { -0x71608c8c, -0x2e668252, -0x3044f7ea, -0x5fcd5d08, 0x6d445f0a, -0x329345ee, 0x0accb834, 0x1ba81146 },
+        { 0x6a3126c2, -0x144caac0, 0x68c8c393, -0x2d9c7c58, -0x1a46857e, 0x6c0c6429, -0x3602deb9, 0x5065f158 },
+        { 0x0c429954, 0x708169fb, -0x28913099, -0x1eb9ff54, 0x70e645ba, 0x2eaab98a, 0x58a4faf2, 0x3981f39e }
+    },
+    {
+        { 0x6de66fde, -0x37ba205b, 0x2c40483a, -0x1ead5b00, -0x384b09ce, -0x162d1e9d, -0x2343e49b, 0x30f4452e },
+        { 0x59230a93, 0x18fb8a75, 0x60e6f45d, 0x1d168f69, 0x14a93cb5, 0x3a85a945, 0x05acd0fd, 0x38dc0837 },
+        { -0x3a8a68c0, -0x7a92d87e, -0x06634134, -0x05ecba97, -0x3f15b18f, -0x77bb038d, 0x593f2469, 0x632d9a1a }
+    },
+    {
+        { -0x12f37b59, -0x40f602ef, 0x0d9f693a, 0x63f07181, 0x57cf8779, 0x21908c2d, -0x7509b45e, 0x3a5a7df2 },
+        { -0x47f8345a, -0x094494eb, -0x43ab0f29, 0x1823c7df, 0x6e29670b, -0x44e268fd, 0x47ed4a57, 0x0b24f488 },
+        { 0x511beac7, -0x23252b42, -0x12d9330e, -0x5bac7f8b, 0x005f9a65, -0x1e630061, 0x75481f63, 0x34fcf744 }
+    },
+    {
+        { 0x78cfaa98, -0x5a44e255, 0x190b72f2, 0x5ceda267, 0x0a92608e, -0x6cf636ef, 0x2fb374b0, 0x0119a304 },
+        { 0x789767ca, -0x3e681fb4, 0x38d9467d, -0x478eb235, -0x7c06a058, 0x55de8882, 0x4dfa63f7, 0x3d3bdc16 },
+        { -0x173de883, 0x67a2d89c, 0x6895d0c1, 0x669da5f6, -0x4d7d5d50, -0x0a9a671b, -0x121df58d, 0x56c088f1 }
+    },
+    {
+        { 0x24f38f02, 0x581b5fac, -0x451cf343, -0x56f41602, -0x75306d10, -0x65de96fe, -0x7ca6fc71, 0x038b7ea4 },
+        { 0x10a86e17, 0x336d3d11, 0x0b75b2fa, -0x280c77ce, 0x25072988, -0x06eacc8a, -0x66ef7479, 0x09674c6b },
+        { -0x66ce9008, -0x60b107df, -0x155872b1, 0x2f49d282, 0x5aef3174, 0x0971a5ab, 0x5969eb65, 0x6e5e3102 }
+    },
+    {
+        { 0x63066222, 0x3304fb0e, -0x785345c1, -0x04caf977, -0x73ef9e5d, -0x42e6db89, -0x2e7c79e0, 0x3058ad43 },
+        { -0x781a6c05, -0x4e939d0b, -0x35a2c18f, 0x4999edde, 0x14cc3e6d, -0x4b6e3e20, -0x76572458, 0x08f51147 },
+        { -0x1a899c30, 0x323c0ffd, -0x5dd159f0, 0x05c3df38, -0x5366b066, -0x42387543, -0x101c2367, 0x26549fa4 }
+    },
+},
+{
+    {
+        { -0x08ac6947, 0x04dbbc17, -0x2d0798ba, 0x69e6a2d7, -0x0ac1543a, -0x39bf6267, 0x332e25d2, 0x606175f6 },
+        { -0x78317077, 0x738b38d7, 0x4179a88d, -0x49d9a71e, -0x0eaece93, 0x30738c9c, 0x727275c9, 0x49128c7f },
+        { -0x0abf1823, 0x4021370e, -0x5e0e2f5b, 0x0910d6f5, 0x5b06b807, 0x4634aacd, 0x6944f235, 0x6a39e635 }
+    },
+    {
+        { 0x74049e9d, 0x1da19657, -0x6701cad5, -0x0432915f, -0x33adc95a, -0x4e3432b0, 0x3f9846e2, 0x1f5ec83d },
+        { -0x206f0c19, -0x6932a9c0, -0x2405da16, 0x6c3a760e, 0x59e33cc4, 0x24f3ef09, 0x530d2e58, 0x42889e7e },
+        { 0x328ccb75, -0x7104dc3d, -0x22789117, -0x50bd5df9, 0x5dfae796, 0x20fbdadc, 0x06bf9f51, 0x241e246b }
+    },
+    {
+        { 0x6280bbb8, 0x7eaafc9a, -0x0bfc27f7, 0x22a70f12, 0x1bfc8d20, 0x31ce40bb, -0x1742ac12, 0x2bc65635 },
+        { -0x5291670a, 0x29e68e57, 0x0b462065, 0x4c9260c8, -0x5ae144b5, 0x3f00862e, -0x4c726f69, 0x5bc2c77f },
+        { -0x5694526d, -0x172a2361, -0x21e6b824, -0x1a704e83, 0x65185fa3, 0x681532ea, 0x034a7830, 0x1fdd6c3b }
+    },
+    {
+        { 0x2dd8f7a9, -0x63ec595b, 0x3efdcabf, 0x2dbb1f8c, 0x5e08f7b5, -0x69e1cdc0, -0x4419361b, 0x48c8a121 },
+        { 0x55dc18fe, 0x0a64e28c, 0x3399ebdd, -0x1c206167, 0x70e2e652, 0x79ac4323, 0x3ae4cc0e, 0x35ff7fc3 },
+        { 0x59646445, -0x03bea584, -0x3ed749eb, -0x2ddb4d29, 0x05fbb912, 0x6035c9c9, 0x74429fab, 0x42d7a912 }
+    },
+    {
+        { -0x6cc25a44, -0x565b76b9, -0x3d168614, 0x4a58920e, 0x13e5ac4c, -0x69278000, 0x4b48b147, 0x453692d7 },
+        { -0x1508d12d, 0x4e6213e3, 0x43acd4e7, 0x6794981a, 0x6eb508cb, -0x00ab8322, 0x10fcb532, 0x6fed19dd },
+        { -0x57aa6391, -0x2288a267, -0x20ffc1dc, -0x0bd5dec0, -0x256d759a, 0x5223e229, 0x6d38f22c, 0x063f46ba }
+    },
+    {
+        { 0x37346921, 0x39843cb7, 0x38c89447, -0x58b804f9, -0x5dbacf82, -0x34727fcf, 0x6d82f068, 0x67810f8e },
+        { 0x5f536694, -0x2d2dbd77, 0x42939b2c, -0x35cc5d3b, -0x382246a4, -0x6790525a, 0x2f712d5d, 0x5a152c04 },
+        { -0x2dd7824c, 0x3eeb8fbc, 0x01a03e93, 0x72c7d3a3, -0x4267d9a6, 0x5473e88c, 0x5921b403, 0x7324aa51 }
+    },
+    {
+        { -0x17dcab35, -0x52dc0926, -0x49a8e593, 0x6962502a, -0x1c71c82f, -0x649ae9ca, -0x2e5cced1, 0x5cac5005 },
+        { 0x6c3cbe8e, -0x7a86bd0c, 0x4730c046, -0x5e2c9b4f, -0x2dc3be41, 0x1c8ed914, -0x11092a2e, 0x0838e161 },
+        { -0x161c66fc, -0x733eab34, -0x7b2197ba, 0x5b3a040b, -0x4e41a292, -0x3b2759e4, -0x2779e0fe, 0x40fb897b }
+    },
+    {
+        { 0x5ab10761, -0x1a8127b9, 0x6fd13746, 0x71435e20, -0x32fda9ce, 0x342f824e, -0x5786e185, 0x4b16281e },
+        { 0x62de37a1, -0x7b3a5570, 0x0d1d96e1, 0x421da500, 0x6a9242d9, 0x78828630, 0x690d10da, 0x3c5e464a },
+        { 0x0b813381, -0x2e3efe2b, 0x76ee6828, -0x2119f0ef, 0x383f6409, 0x0cb68893, -0x0900b7b6, 0x6183c565 }
+    },
+},
+{
+    {
+        { -0x50c09992, -0x24b97ab7, -0x0eb5f15b, -0x288030fc, -0x5b45f3b9, 0x3df23ff7, 0x32ce3c85, 0x3a10dfe1 },
+        { 0x1e6bf9d6, 0x741d5a46, 0x7777a581, 0x2305b3fc, 0x6474d3d9, -0x2baa8b5e, 0x6401e0ff, 0x1926e1dc },
+        { -0x15e83160, -0x1f80b176, 0x3a1fc1fd, 0x2fd51546, 0x31f2c0f1, 0x175322fd, -0x79e1a2eb, 0x1fa1d01d }
+    },
+    {
+        { -0x2e206b55, 0x38dcac00, -0x2ef7f217, 0x2e712bdd, -0x022a1d9e, 0x7f13e93e, -0x1165fe1b, 0x73fced18 },
+        { 0x7d599832, -0x337faa6c, 0x37f15520, 0x1e4656da, 0x4e059320, -0x6609088c, 0x6a75cf33, 0x773563bc },
+        { 0x63139cb3, 0x06b1e908, -0x3a5fc133, -0x5b6c2599, -0x529c76ce, -0x72883138, 0x1b864f44, 0x1f426b70 }
+    },
+    {
+        { -0x6e5edaae, -0x0e81ca38, 0x575e9c76, -0x48947ead, 0x0d9b723e, -0x057cbf91, 0x3fa7e438, 0x0b76bb1b },
+        { 0x41911c01, -0x1036d9b4, 0x17a22c25, -0x0e5c4848, -0x0cf0ebb9, 0x5875da6b, 0x1d31b090, 0x4e1af527 },
+        { 0x7f92939b, 0x08b8c1f9, -0x2bbb5492, -0x41988e35, -0x66447fe9, 0x22e56463, -0x488d56ab, 0x7b6dd61e }
+    },
+    {
+        { -0x54fe2d39, 0x5730abf9, 0x40143b18, 0x16fb76dc, -0x5f344d7f, -0x7993419b, -0x64009502, 0x53fa9b65 },
+        { 0x50f33d92, -0x48523e18, 0x608cd5cf, 0x7998fa4f, -0x7203a425, -0x5269d243, -0x50e2d0b1, 0x703e9bce },
+        { -0x6b77abab, 0x6c14c8e9, 0x65aed4e5, -0x7bc5a29a, -0x4329a50f, 0x181bb73e, -0x3b39e0b0, 0x398d93e5 }
+    },
+    {
+        { -0x2d181c0e, -0x3c7883a0, 0x30828bb1, 0x3b34aaa0, 0x739ef138, 0x283e26e7, 0x02c30577, 0x699c9c90 },
+        { 0x33e248f3, 0x1c4bd167, 0x15bf0a5f, -0x4261ed79, -0x5ef4fc8a, -0x2bc07310, -0x20e6e4ed, 0x53b09b5d },
+        { 0x5946f1cc, -0x0cf958dd, -0x331a2683, -0x6de8e74b, -0x7e4b168b, 0x28cdd247, 0x6fcdd907, 0x51caf30c }
+    },
+    {
+        { 0x18ac54c7, 0x737af99a, -0x3ae34cf1, -0x6fcc8724, 0x4ce10cc7, 0x2b89bc33, -0x76071666, 0x12ae29c1 },
+        { 0x7674e00a, -0x59f458be, -0x5e85840d, 0x630e8570, -0x30ccdb34, 0x3758563d, 0x2383fdaa, 0x5504aa29 },
+        { 0x1f0d01cf, -0x56613f35, 0x3a34f7ae, 0x0dd1efcc, -0x2f63b1de, 0x55ca7521, 0x58eba5ea, 0x5fd14fe9 }
+    },
+    {
+        { -0x406c3472, 0x3c42fe5e, 0x36d4565f, -0x412057af, -0x77bddf18, -0x1f0f7a62, 0x0725d128, 0x7dd73f96 },
+        { 0x2845ab2c, -0x4a23d221, 0x0a7fe993, 0x069491b1, 0x4002e346, 0x4daaf3d6, 0x586474d1, 0x093ff26e },
+        { 0x68059829, -0x4ef2db02, -0x2450dc1b, 0x75730672, -0x4ba853d7, 0x1367253a, -0x794b8f5c, 0x2f59bcbc }
+    },
+    {
+        { -0x496e3cff, 0x7041d560, -0x522818e2, -0x7adfe4c1, 0x11335585, 0x16c2e163, 0x010828b1, 0x2aa55e3d },
+        { -0x66e8eca1, -0x7c7b82be, 0x567d03d7, -0x52e46ee1, -0x4188552f, 0x7e7748d9, 0x2e51af4a, 0x5458b42e },
+        { 0x0c07444f, -0x12ae6d1a, 0x74421d10, 0x42c54e2d, -0x024a379c, 0x352b4c82, -0x7589799c, 0x13e9004a }
+    },
+},
+{
+    {
+        { -0x7f94b984, 0x1e6284c5, -0x18a29f85, -0x3a096685, -0x4c872d9e, -0x749826a8, -0x7e327490, 0x3d88d66a },
+        { 0x6c032bff, -0x344a4aab, 0x29297a3a, -0x208e6e49, -0x52127e45, -0x3e008cda, 0x68be03f5, 0x71ade8bb },
+        { 0x204ed789, -0x7489856d, -0x605f51d6, 0x762fcacb, 0x6dce4887, 0x771febcc, -0x700fa04d, 0x34306215 }
+    },
+    {
+        { 0x2a7b31b4, -0x031de6f9, -0x55a87fea, 0x4d7adc75, -0x78b86cdc, 0x0ec276a6, 0x1fda4beb, 0x6d6d9d5d },
+        { -0x1e0a40b7, -0x1fa25e59, -0x2b8c9f6e, 0x26457d6d, 0x73cc32f6, 0x77dcb077, -0x6322a033, 0x0a5d9496 },
+        { -0x164f7e7d, 0x22b1a58a, -0x3ea3c775, -0x026a2f8f, -0x7af5fae9, -0x567edc8a, -0x4480cca2, 0x33384cba }
+    },
+    {
+        { 0x26218b8d, 0x33bc627a, -0x3857f39f, -0x157f4de1, 0x173e9ee6, -0x6ba74ed5, 0x0e2f3059, 0x076247be },
+        { 0x0ca2c7b5, 0x3c6fa268, 0x6fb64fda, 0x1b508204, 0x5431d6de, -0x14accb64, 0x6b879c89, 0x5278b38f },
+        { 0x1416375a, 0x52e105f6, -0x7a54145c, -0x136850ca, 0x23a67c36, 0x26e6b506, -0x0c2b04ff, 0x5cf0e856 }
+    },
+    {
+        { 0x3db342a8, -0x415131cf, -0x7bd24812, -0x345c9ca5, -0x7e80ec11, -0x177399e0, 0x4e76d5c6, 0x1b9438aa },
+        { 0x1ae8cab4, -0x0936978d, -0x34b06d3b, 0x5e20741e, -0x733243c2, 0x2da53be5, 0x69970df7, 0x2dddfea2 },
+        { 0x166f031a, -0x75af8882, 0x0fb7a328, 0x067b39f1, 0x010fbd76, 0x1925c9a6, -0x338bf6fb, 0x6df9b575 }
+    },
+    {
+        { 0x48cade41, -0x13203ca5, -0x4dcd7d90, 0x6a88471f, 0x40a01b6a, 0x740a4a24, 0x003b5f29, 0x471e5796 },
+        { 0x27f6bdcf, 0x42c11929, 0x403d61ca, -0x706e6e86, -0x7461e09f, -0x23e3a59a, 0x04ec0f8d, 0x15960478 },
+        { -0x5312c854, -0x2569444d, -0x16df7316, 0x7a2423b5, 0x38aebae2, 0x24cc5c30, -0x23a251d1, 0x50c356af }
+    },
+    {
+        { 0x1b31b964, -0x30126321, -0x735ae50d, -0x0b79567b, -0x1573e07c, 0x14897265, -0x6cd53400, 0x784a53dd },
+        { 0x41c30318, 0x09dcbf43, -0x7ce7e232, -0x1145f9ef, -0x23e1d65f, -0x3e863f32, 0x073f35b0, 0x1dbf7b89 },
+        { 0x14fc4920, 0x2d99f9df, -0x3bb6601b, 0x76ccb60c, -0x1a30fffd, -0x5becd345, 0x54f000ea, 0x3f93d823 }
+    },
+    {
+        { 0x79e14978, -0x1553ed2f, -0x441400a2, -0x006dc00d, 0x0663ce27, 0x4af663e4, 0x11a5f5ff, 0x0fd381a8 },
+        { -0x61fb317b, -0x7e7c1898, 0x04465341, 0x678fb71e, 0x6688edac, -0x526dfa71, 0x532b099a, 0x5da350d3 },
+        { -0x5bc920ac, -0x0da95314, -0x51962918, 0x108b6168, 0x6b5d036c, 0x20d986cb, -0x011d50b0, 0x655957b9 }
+    },
+    {
+        { -0x2ffd2f54, -0x423ebf65, -0x4a33265a, 0x66660245, -0x05217a14, -0x7dce823c, 0x6ad7df0d, 0x02fe934b },
+        { -0x56fdfcf1, -0x51574f81, -0x0b9c2ebd, -0x07738996, 0x3c787a60, 0x15b08366, -0x7d985b58, 0x08eab114 },
+        { -0x3048158c, -0x10a30f00, -0x5e34bd54, 0x22897633, -0x310d7a1e, -0x2b31f3ac, -0x75eb95ab, 0x30408c04 }
+    },
+},
+{
+    {
+        { 0x193b877f, -0x44d1ff37, -0x1f23af95, -0x131c5770, 0x36de649f, -0x130c4840, -0x672161e6, 0x5f460408 },
+        { -0x7cd03125, 0x739d8845, -0x5194079d, -0x05c72937, -0x48b00109, 0x32bc0dca, 0x14bce45e, 0x73937e88 },
+        { 0x297bf48d, -0x46fc8eea, -0x2b0f97cc, -0x562ec4de, 0x4696bdc6, -0x1e68eaa9, -0x6e2a17cb, 0x2cf8a4e8 }
+    },
+    {
+        { 0x17d06ba2, 0x2cb5487e, 0x3950196b, 0x24d2381c, -0x7a6875d0, -0x289a637f, -0x6e295b0a, 0x7a6f7f28 },
+        { 0x07110f67, 0x6d93fd87, 0x7c38b549, -0x22b3f62d, -0x3d8c957a, 0x7cb16a4c, 0x58252a09, 0x2049bd6e },
+        { 0x6a9aef49, 0x7d09fd8d, 0x5b3db90b, -0x0f119f42, 0x519ebfd4, 0x4c21b52c, -0x3aba6be3, 0x6011aadf }
+    },
+    {
+        { 0x02cbf890, 0x63ded0c8, 0x0dff6aaa, -0x042f6736, -0x46491267, 0x624d0afd, 0x79340b1e, 0x69ce18b7 },
+        { -0x306a07c4, 0x5f67926d, 0x71289071, 0x7c7e8561, -0x667085a5, -0x295e180d, 0x0b62f9e0, 0x6fc5cc1b },
+        { -0x4d678635, -0x2e10aad8, -0x2b816f6e, -0x22e551c4, 0x189f2352, 0x127e0442, -0x1a8efe0f, 0x15596b3a }
+    },
+    {
+        { 0x7e5124ca, 0x09ff3116, -0x2638ba21, 0x0be4158b, 0x7ef556e5, 0x292b7d22, -0x50492ec8, 0x3aa4e241 },
+        { 0x3f9179a2, 0x462739d2, -0x68292231, -0x007cedcf, 0x53f2148a, 0x1307deb5, 0x7b5f4dda, 0x0d223768 },
+        { 0x2a3305f5, 0x2cc138bf, -0x5d16d93d, 0x48583f8f, 0x5549d2eb, 0x083ab1a2, 0x4687a36c, 0x32fcaa6e }
+    },
+    {
+        { 0x2787ccdf, 0x3207a473, -0x0dec1c08, 0x17e31908, -0x09f269b2, -0x2a4d1329, -0x3d9ff417, 0x746f6336 },
+        { -0x3a82650b, 0x7bc56e8d, -0x620f420e, 0x3e0bd2ed, 0x22efe4a3, -0x553feb22, -0x014295a4, 0x4627e9ce },
+        { -0x549368e4, 0x3f4af345, -0x66bc8ce1, -0x1d77148e, 0x0344186d, 0x33596a8a, 0x7ed66293, 0x7b491700 }
+    },
+    {
+        { -0x22ac5d23, 0x54341b28, -0x20bd03c1, -0x55e86fa5, 0x4dd2f8f4, 0x0ff592d9, -0x1f732c83, 0x1d03620f },
+        { -0x547b4f9c, 0x2d85fb5c, -0x760c43ec, 0x497810d2, 0x7b15ce0c, 0x476adc44, -0x07bb0285, 0x122ba376 },
+        { -0x5d4b1aac, -0x3dfdcd33, 0x115d187f, -0x612f02be, 0x7dd479d9, 0x2eabb4be, 0x2b68ec4c, 0x02c70bf5 }
+    },
+    {
+        { 0x458d72e1, -0x531acd41, 0x7cb73cb5, 0x5be768e0, -0x11744219, 0x56cf7d94, -0x014bc5fd, 0x6b0697e3 },
+        { 0x5d0b2fbb, -0x5d7813b5, 0x074882ca, 0x415c5790, -0x3e2f7ea4, -0x1fbb59e2, 0x409ef5e0, 0x26334f0a },
+        { -0x209d5c40, -0x49370fb6, 0x076da45d, 0x3ef000ef, 0x49f0d2a9, -0x636346a8, 0x441b2fae, 0x1cc37f43 }
+    },
+    {
+        { -0x36315147, -0x2899a90f, 0x18e5656a, 0x1c5b15f8, -0x7bb3dccc, 0x26e72832, 0x2f196838, 0x3a346f77 },
+        { 0x5cc7324f, 0x508f565a, -0x1af956de, -0x2f9e3b40, 0x5c45ac19, -0x04e75425, 0x0380314a, 0x6c6809c1 },
+        { -0x1d259538, -0x2d2aaeee, -0x4e17ae13, -0x1642fccf, -0x71398d9e, -0x69f8b923, 0x6ef7c5d0, 0x05911b9f }
+    },
+},
+{
+    {
+        { -0x3a01606c, 0x01c18980, 0x716fd5c8, -0x329a9897, -0x2e6a5f7a, -0x7e9fba3d, 0x66cc7982, 0x6e2b7f32 },
+        { -0x49c800d3, -0x162328aa, -0x36780f3c, -0x13b3cb71, -0x0c043849, -0x312a6d7b, -0x6c1e1579, 0x33053547 },
+        { -0x083ca971, -0x337fdb98, 0x19974cb3, -0x6216457e, -0x4a47eca0, -0x5448dd64, 0x6fbeba62, 0x44e2017a }
+    },
+    {
+        { -0x49359133, -0x7807d30d, 0x18f4a0c2, 0x580f893e, 0x2604e557, 0x05893007, 0x56d19c1d, 0x6cab6ac2 },
+        { 0x54dab774, -0x3b3d58bd, 0x4eaf031a, -0x71a2b3c4, 0x42838f17, -0x4893dc2e, 0x68dce4ea, 0x749a098f },
+        { 0x2cc1de60, -0x23201f60, 0x51c5575b, 0x032665ff, 0x073abeeb, 0x2c0c32f1, -0x328479fa, 0x6a882014 }
+    },
+    {
+        { -0x50b01492, -0x2eee2e84, -0x4cc55b5d, 0x050bba42, -0x114b93d0, 0x17514c3c, 0x1bc27d75, 0x54bedb8b },
+        { -0x5b8b804b, -0x5ad56d02, 0x1fa5ab89, -0x23ed5bb7, -0x47b85b32, -0x27d256b5, -0x6aed33b2, 0x4d77edce },
+        { 0x77e2189c, 0x77c8e145, -0x00663bbb, -0x5c1b9096, 0x6d335343, 0x3144dfc8, 0x7c4216a9, 0x3a96559e }
+    },
+    {
+        { -0x7f4555ae, 0x44938968, -0x0d7a6bf2, 0x4c98afc4, -0x5babb74a, -0x10b55865, -0x5a855181, 0x5278c510 },
+        { -0x0bd52d12, 0x12550d37, -0x675e040b, -0x74871ffc, 0x33894cb2, 0x5d530782, 0x3e498d0c, 0x02c84e4e },
+        { 0x294c0b94, -0x5ab22f8c, -0x20e7004a, -0x0aa2b948, -0x72517c9a, -0x0f90133b, -0x7e6f2e9b, 0x58865766 }
+    },
+    {
+        { 0x3de25cc3, -0x40a7cb10, -0x297eab6a, -0x47783752, -0x6b7e176e, 0x5105221a, -0x088dc06d, 0x6760ed19 },
+        { 0x1aef7117, -0x2b88edcf, 0x229e92c7, 0x50343101, -0x62ea6469, 0x7a95e184, -0x74a2d637, 0x2449959b },
+        { -0x53ca1ea0, 0x669ba3b7, -0x457bdfaa, 0x2eccf73f, -0x3f7fb0f9, 0x1aec1f17, 0x1856f4e7, 0x0d96bc03 }
+    },
+    {
+        { -0x338afa1f, -0x4e2acb50, 0x16c35288, 0x32cd0034, 0x0762c29d, -0x34c95a80, 0x237a0bf8, 0x5bfe69b9 },
+        { 0x75c52d82, 0x3318be77, 0x54d0aab9, 0x4cb764b5, -0x3388c26f, -0x5430c2d9, -0x7edcd776, 0x3bf4d184 },
+        { 0x78a151ab, 0x183eab7e, -0x66f6c89d, -0x44166f37, 0x4ac7e335, -0x008e8292, 0x25f39f88, 0x4c5cddb3 }
+    },
+    {
+        { -0x185606fe, 0x57750967, 0x4f5b467e, 0x2c37fdfc, 0x3177ba46, -0x4d9e99c6, -0x23d2acd5, 0x3a375e78 },
+        { 0x6190a6eb, -0x3f0948b3, 0x2db8f4e4, 0x20ea81a4, -0x68cea8a0, -0x57429083, 0x62ac7c21, 0x33b1d602 },
+        { 0x2d4dddea, -0x7ebe18d1, 0x62c607c8, -0x19150168, 0x573cafd0, 0x23c28458, 0x4ff97346, 0x46b9476f }
+    },
+    {
+        { 0x0d58359f, 0x1215505c, -0x03d73b95, 0x2a2013c7, -0x761599b2, 0x24a0a1af, -0x5eecf1e1, 0x4400b638 },
+        { 0x4f901e5c, 0x0c1ffea4, 0x2184b782, 0x2b0b6fb7, 0x0114db88, -0x1a78006f, 0x4785a142, 0x37130f36 },
+        { -0x6912e63d, 0x3a01b764, -0x12cd8dd0, 0x31e00ab0, -0x7c35ea4f, 0x520a8857, 0x5accbec7, 0x06aab987 }
+    },
+},
+{
+    {
+        { 0x512eeaef, 0x5349acf3, 0x1cc1cb49, 0x20c141d3, -0x56659773, 0x24180c07, -0x39b4d2e9, 0x555ef9d1 },
+        { -0x0a20f145, -0x3ecc667d, 0x512c4cac, -0x3f0c8a71, 0x0bb398e1, 0x2cf1130a, -0x55d8f39e, 0x6b3cecf9 },
+        { 0x3b73bd08, 0x36a770ba, -0x5c5040f4, 0x624aef08, -0x4bf6b90e, 0x5737ff98, 0x3381749d, 0x675f4de1 }
+    },
+    {
+        { 0x3bdab31d, -0x5ed00927, -0x629ad202, 0x0725d80f, -0x65416b79, 0x019c4ff3, -0x7d32c3bd, 0x60f450b8 },
+        { 0x6b1782fc, 0x0e2c5203, 0x6cad83b4, 0x64816c81, 0x6964073e, -0x2f234227, 0x0164c520, 0x13d99df7 },
+        { 0x21e5c0ca, 0x014b5ec3, -0x28e6405e, 0x4fcb69c9, 0x750023a0, 0x4e5f1c18, 0x55edac80, 0x1c06de9e }
+    },
+    {
+        { -0x00929656, -0x002ad4c0, -0x23bfb645, 0x34530b18, -0x5cb26769, 0x5e4a5c2f, 0x7d32ba2d, 0x78096f8e },
+        { -0x5cc13b1e, -0x66f0852a, -0x41d11f72, 0x6608f938, 0x63284515, -0x635ebc3b, -0x13d249f3, 0x4cf38a1f },
+        { 0x0dfa5ce7, -0x5f55559b, 0x48b5478c, -0x063b61d6, 0x7003725b, 0x4f09cc7d, 0x26091abe, 0x373cad3a }
+    },
+    {
+        { -0x76224453, -0x0e415705, 0x61aeaecb, 0x3bcb2cbc, 0x1f9b8d9d, -0x70a75845, 0x5112a686, 0x21547eda },
+        { -0x7d360a84, -0x4d6b9cb3, 0x24934536, 0x1fcbfde1, 0x418cdb5a, -0x6163b24d, 0x454419fc, 0x0040f3d9 },
+        { -0x02a6792d, -0x210216c7, 0x510a380c, -0x0bd8d377, -0x44cee647, -0x48d45bf9, 0x4a254df4, 0x63550a33 }
+    },
+    {
+        { 0x72547b49, -0x6445a7bb, -0x1d3bf720, -0x0cfa3906, -0x38cb0e73, 0x60e8fa69, -0x55828986, 0x39a92baf },
+        { -0x4a9630c9, 0x6507d6ed, 0x0ca52ee1, 0x178429b0, -0x149429a3, -0x1583ff70, -0x250870af, 0x3eea62c7 },
+        { -0x196cd8b2, -0x62db38ed, 0x68dbd375, 0x5f638577, -0x14754c66, 0x70525560, 0x65c9c4cd, 0x68436a06 }
+    },
+    {
+        { -0x17dfef84, 0x1e56d317, -0x7bf5169b, -0x3ad997bc, 0x320ffc7a, -0x3e1f5e3a, -0x6e9eeb8e, 0x5373669c },
+        { 0x202f3f27, -0x43fdca18, 0x64f975b0, -0x38a3ff1e, -0x5c73dbea, -0x6e5b162b, -0x75487607, 0x17b6e7f6 },
+        { -0x65f1ada9, 0x5d2814ab, -0x36354c04, -0x6f70df7c, 0x5b2d1eca, -0x50350a78, 0x78f87d11, 0x1cb4b5a6 }
+    },
+    {
+        { -0x5d5ff819, 0x6b74aa62, -0x0f8e384f, -0x0cee1f50, 0x000be223, 0x5707e438, -0x7d109154, 0x2dc0fd2d },
+        { 0x394afc6c, -0x499b3f95, -0x6725a04f, 0x0c88de24, 0x4bcad834, 0x4f8d0316, -0x218bcb5e, 0x330bca78 },
+        { 0x1119744e, -0x67d1007c, 0x2b074724, -0x0696a16a, -0x4036ac05, -0x3a753eb1, 0x369f1cf5, 0x3c31be1b }
+    },
+    {
+        { -0x0634bd8e, -0x3e97436d, -0x38312468, -0x51478ee1, 0x34ac8d7a, 0x7f0e52aa, 0x7e7d55bb, 0x41cec109 },
+        { 0x08948aee, -0x4f0b79b3, -0x6e45e391, 0x07dc19ee, -0x59535ea8, 0x7975cdae, 0x4262d4bb, 0x330b6113 },
+        { -0x5d927f76, -0x0869e629, 0x1d9e156d, -0x44e02b62, -0x245e20d9, 0x73d7c36c, 0x1f28777d, 0x26b44cd9 }
+    },
+},
+{
+    {
+        { -0x4fd7a0c9, -0x50bb7bd3, 0x47efc8df, -0x78ace770, -0x07df6866, -0x6a8b1f6f, 0x69615579, 0x0e378d60 },
+        { 0x393aa6d8, 0x300a9035, -0x5ed44e33, 0x2b501131, -0x0f6c3dde, 0x7b1ff677, -0x3547d453, 0x4309c1f8 },
+        { -0x7cf8a5ab, -0x26056e8f, 0x6b009fdc, 0x4bdb5ad2, -0x29c210f2, 0x7829ad2c, 0x75fd3877, 0x078fc549 }
+    },
+    {
+        { -0x47cc5676, -0x1dffb4a5, 0x2d4c3330, 0x44775dec, 0x7eace913, 0x3aa24406, -0x2a71ff57, 0x272630e3 },
+        { 0x28878f2d, -0x782042ec, 0x1e9421a1, 0x134636dd, 0x257341a3, 0x4f17c951, -0x52d69348, 0x5df98d4b },
+        { -0x1336f4ac, -0x0c987030, 0x12043599, -0x0ffeba65, 0x3758b89b, 0x26725fbc, 0x73a719ae, 0x4325e4aa }
+    },
+    {
+        { -0x30960a63, -0x12db9d66, -0x22a5440c, 0x2a4a1cce, 0x56b2d67b, 0x3535ca1f, 0x43b1b42d, 0x5d8c68d0 },
+        { 0x433c3493, 0x657dc6ef, -0x7f24073d, 0x65375e9f, 0x5b372dae, 0x47fd2d46, 0x796e7947, 0x4966ab79 },
+        { -0x1c4bd4f6, -0x11ccd2b3, 0x16a4601c, -0x27b1a5d5, 0x078ba3e4, 0x78243877, 0x184ee437, 0x77ed1eb4 }
+    },
+    {
+        { -0x616d12e6, 0x185d43f8, -0x01b8e63a, -0x4fb5e116, -0x590fc0b1, 0x499fbe88, 0x3c859bdd, 0x5d8b0d2f },
+        { 0x201839a0, -0x402b1ec1, 0x3e3df161, -0x5110001e, 0x6b5d1fe3, -0x49a4fb10, 0x2b62fbc0, 0x52e085fb },
+        { -0x5ab30d46, 0x124079ea, 0x001b26e7, -0x28db9a15, -0x36850803, 0x6843bcfd, 0x55eacd02, 0x0524b42b }
+    },
+    {
+        { -0x647d6154, -0x43e72353, -0x4a0a8630, 0x23ae7d28, 0x69384233, -0x3cb9edd6, -0x182b5377, 0x1a6110b2 },
+        { -0x1babb850, -0x02f2a242, 0x092005ee, 0x6cec351a, 0x567579cb, -0x665b87bc, 0x16e7fa45, 0x59d242a2 },
+        { -0x19966854, 0x4f833f6a, 0x361839a4, 0x6849762a, -0x68f54adb, 0x6985dec1, -0x234e0aba, 0x53045e89 }
+    },
+    {
+        { -0x72ba01ee, -0x7b25c322, -0x1bbb1d2e, -0x42bd3de8, 0x1f7e3598, -0x57ae6988, 0x5616e2b2, 0x7642c93f },
+        { -0x28acac25, -0x34744cba, -0x51aee1de, -0x03034db5, -0x2af51911, -0x345b72c0, -0x0b0834a3, 0x26e3bae5 },
+        { 0x4595f8e4, 0x2323daa7, -0x7a85414c, -0x21977375, 0x1c59326e, 0x3fc48e96, 0x15c9b8ba, 0x0b2e73ca }
+    },
+    {
+        { 0x79c03a55, 0x0e3fbfaf, 0x4cbb5acf, 0x3077af05, -0x24c21c61, -0x2a3aadbb, 0x476a4af7, 0x015e68c1 },
+        { -0x3e80afda, -0x2944bbd8, -0x04a56359, -0x614d8ddd, 0x1919c644, -0x1c845afd, -0x4a6599fe, 0x21ce380d },
+        { 0x20066a38, -0x3e2ad7ae, 0x3570aef3, -0x6a9fc1ae, 0x226b8a4d, -0x7cd9a659, 0x1f8eedc9, 0x5dd68909 }
+    },
+    {
+        { -0x5acecf7c, 0x1d022591, -0x29d8f78e, -0x35d2b552, 0x2f0bfd20, -0x795ed47b, -0x528258b8, 0x56e6c439 },
+        { -0x402c37aa, -0x34537b22, -0x4ca00dbc, 0x1624c348, 0x5d9cad07, -0x48077236, -0x5d3d1418, 0x3b0e574d },
+        { 0x42bdbae6, -0x38fb00b7, -0x4d21e087, 0x5e21ade2, 0x5652fad8, -0x16a24c0d, -0x70f7143f, 0x0822b537 }
+    },
+},
+{
+    {
+        { 0x62730383, -0x1e480d6d, -0x143575d4, 0x4b5279ff, -0x402becec, -0x25038876, -0x638d9ef1, 0x7deb1014 },
+        { -0x70c78b8b, 0x51f04847, -0x634134c4, -0x4da2430c, -0x2660dfab, -0x6554edbc, 0x1c10a5d6, 0x2c709e6c },
+        { -0x78991186, -0x349d5096, 0x5553cd0e, 0x66cbec04, 0x0f0be4b5, 0x58800138, -0x09d31d16, 0x08e68e9f }
+    },
+    {
+        { 0x0ab8f2f9, 0x2f2d09d5, -0x3aa6dc21, -0x5346de73, 0x73766cb9, 0x4a8f3426, 0x38f719f5, 0x4cb13bd7 },
+        { 0x4bc130ad, 0x34ad500a, 0x3d0bd49c, -0x72c724b7, 0x500a89be, -0x5da3c268, -0x1145c4f7, 0x2f1f3f87 },
+        { -0x1aea49b6, -0x087b738b, -0x24b56fc8, -0x5a6afe46, 0x3f751b50, -0x3df2cec1, -0x3f51d118, 0x19a1e353 }
+    },
+    {
+        { -0x2a694243, -0x4bde8d33, -0x671103c0, -0x6c1fbabd, -0x4bbef64b, -0x604eacb9, 0x0266ae34, 0x736bd399 },
+        { -0x4505fa3d, 0x7d1c7560, -0x391aa19f, -0x4c1e5f60, -0x3f299b8d, -0x1cad68e8, -0x3df3cb7a, 0x41546b11 },
+        { -0x6ccb4c4c, -0x7aacd2b0, 0x60816573, 0x46fd114b, 0x425c8375, -0x33a0a0d0, -0x478054a4, 0x412295a2 }
+    },
+    {
+        { -0x1d6c153a, 0x2e655261, 0x2133acdb, -0x7ba56dfd, 0x7900996b, 0x460975cb, 0x195add80, 0x0760bb8d },
+        { -0x0a812917, 0x19c99b88, 0x6df8c825, 0x5393cb26, -0x4cf52d8d, 0x5cee3213, -0x4ad2d1cc, 0x14e153eb },
+        { -0x32197e76, 0x413e1a17, -0x12965f7c, 0x57156da9, 0x46caccb1, 0x2cbf268f, -0x3cc53a0e, 0x6b34be9b }
+    },
+    {
+        { 0x6571f2d3, 0x11fc6965, 0x530e737a, -0x393617bb, -0x2b01afcb, -0x1cc5185e, 0x2e6dd30b, 0x01b9c7b6 },
+        { 0x3a78c0b2, -0x0c20d09c, -0x0dd1fd84, 0x4c3e971e, 0x49c1b5a3, -0x1382e3a2, 0x0922dd2d, 0x2012c18f },
+        { 0x5ac89d29, -0x77f4aa1b, 0x45a0a763, 0x1483241f, -0x3d1893e1, 0x3d36efdf, 0x4e4bade8, 0x08af5b78 }
+    },
+    {
+        { -0x7633d3b5, -0x1d8ceb2e, -0x5d78e873, 0x4be4bd11, -0x05cc9b32, 0x18d528d6, -0x50267d92, 0x6423c1d5 },
+        { -0x77e0dacd, 0x283499dc, 0x779323b6, -0x62fada26, 0x673441f4, -0x76852205, 0x163a168d, 0x32b79d71 },
+        { -0x12034c96, -0x337a0727, 0x3746e5f9, 0x22bcc28f, -0x061a2c33, -0x1b621cc8, -0x3ec1d234, 0x480a5efb }
+    },
+    {
+        { 0x42ce221f, -0x499eb31c, 0x4c053928, 0x6e199dcc, -0x23e341fd, 0x663fb4a4, 0x691c8e06, 0x24b31d47 },
+        { 0x01622071, 0x0b51e70b, -0x74e2503b, 0x06b505cf, -0x10a55433, 0x2c6bb061, 0x0cb7bf31, 0x47aa2760 },
+        { -0x3fea073d, 0x2a541eed, 0x7c693f7c, 0x11a4fe7e, 0x4ea278d6, -0x0f5099ed, 0x14dda094, 0x545b585d }
+    },
+    {
+        { -0x1c4cde1f, 0x6204e4d0, 0x28ff1e95, 0x3baa637a, 0x5b99bd9e, 0x0b0ccffd, 0x64c8d071, 0x4d22dc3e },
+        { -0x5f2bc5f1, 0x67bf275e, 0x089beebe, -0x521971cc, -0x2b8618d2, 0x4289134c, 0x32ba5454, 0x0f62f9c3 },
+        { -0x29c4a0c7, -0x034b9a77, 0x57cbcf61, 0x5cae6a3f, -0x6ac505fb, -0x01453d2e, 0x36371436, 0x1c0fa01a }
+    },
+},
+{
+    {
+        { 0x54c53fae, -0x3ee11a18, 0x2b4f3ff4, 0x6a0b06c1, -0x1f49858e, 0x33540f80, -0x32f81c11, 0x15f18fc3 },
+        { -0x4383296e, -0x18ab8bb7, -0x1908c221, 0x0f9abeaa, 0x00837e29, 0x4af01ca7, 0x3f1bc183, 0x63ab1b5d },
+        { -0x4fd70b74, 0x32750763, 0x556a065f, 0x06020740, -0x3cb6a4a8, -0x2ac427ee, -0x79a0af73, 0x08706c9b }
+    },
+    {
+        { 0x38b41246, -0x3366e4bf, 0x6f9ac26b, 0x243b9c52, -0x48345443, -0x4610b6b3, -0x2f7d1300, 0x5fba433d },
+        { 0x3d343dff, -0x0c835d55, -0x7f5439e9, 0x1a8c6a2d, -0x2b330036, -0x71b61fcb, -0x455e2e47, 0x48b46bee },
+        { -0x366be530, -0x63b61cab, 0x74498f84, -0x468cb522, 0x66663e5c, 0x41c3fed0, -0x1718ef4d, 0x0ecfedf8 }
+    },
+    {
+        { -0x16bfc89e, 0x744f7463, -0x72033637, -0x08657212, 0x55e4cde3, 0x163a6496, -0x4d7b0bcb, 0x3b61788d },
+        { -0x632b8f27, 0x76430f9f, -0x5bd09ff8, -0x49d53365, 0x59adad5e, 0x1898297c, -0x4873af80, 0x7789dd2d },
+        { 0x0d6ef6b2, -0x4dddd7e7, 0x46ce4bfa, -0x56b5994e, 0x4f0b6cc7, 0x46c1a77a, -0x148cc731, 0x4236ccff }
+    },
+    {
+        { -0x2588820a, 0x3bd82dbf, 0x0b98369e, 0x71b177cc, -0x7af3c967, 0x1d0e8463, 0x48e2d1f1, 0x5a71945b },
+        { 0x0d55e274, -0x7b68bfb3, -0x3b52d4ad, 0x6c6663d9, -0x5256a8cc, -0x13d04f27, -0x324708c4, 0x2617e120 },
+        { 0x405b4b42, 0x6f203dd5, 0x10b24509, 0x327ec604, -0x53d577ba, -0x63cb8dd0, 0x11ffeb6a, 0x77de29fc }
+    },
+    {
+        { -0x13312d36, -0x7ca1ec71, -0x1569c466, -0x736150ed, -0x4de9f15a, -0x36a04040, -0x5278876e, 0x575e66f3 },
+        { -0x7c488758, -0x4f53a837, -0x28016ed4, 0x53cdcca9, -0x00e0a624, 0x61c2b854, -0x0f218254, 0x3a1a2cf0 },
+        { -0x377034c6, -0x667fc5d9, 0x275ec0b0, 0x345a6789, -0x0093d41b, 0x459789d0, 0x1e70a8b2, 0x62f88265 }
+    },
+    {
+        { 0x698a19e0, 0x6d822986, 0x74d78a71, -0x2367de1f, -0x0934e0b9, 0x41a85f31, -0x432563af, 0x352721c2 },
+        { 0x59ff1be4, 0x085ae2c7, 0x3b0e40b7, 0x149145c9, 0x7ff27379, -0x3b981806, -0x2a38c56b, 0x4eeecf0a },
+        { 0x213fc985, 0x48329952, 0x368a1746, 0x1087cf0d, 0x66c15aa5, -0x71ad9e4f, 0x2ed24c21, 0x2d5b2d84 }
+    },
+    {
+        { 0x196ac533, 0x5eb7d13d, -0x247f41d5, 0x377234ec, 0x7cf5ae24, -0x1ebb3004, -0x3bbe5314, 0x5226bcf9 },
+        { -0x142c212f, 0x02cfebd9, 0x39021974, -0x2ba4de89, -0x01cf5e49, 0x7576f813, -0x5cb1093e, 0x5691b6f9 },
+        { 0x23e5b547, 0x79ee6c72, -0x7ccf2987, 0x6f5f5076, 0x6d8adce9, -0x128c1e17, 0x1d8ccc03, 0x27c3da1e }
+    },
+    {
+        { 0x630ef9f6, 0x28302e71, 0x2b64cee0, -0x3d2b5dfd, 0x4b6292be, 0x09082030, -0x57d520e8, 0x5fca747a },
+        { 0x3fe24c74, 0x7eb9efb2, 0x1651be01, 0x3e50f49f, 0x21858dea, 0x3ea732dc, 0x5bb810f9, 0x17377bd7 },
+        { 0x5c258ea5, 0x232a03c3, 0x6bcb0cf1, -0x790dc5d4, 0x2e442166, 0x3dad8d0d, -0x548979d5, 0x04a8933c }
+    },
+},
+{
+    {
+        { -0x736c95b0, 0x69082b0e, -0x3e253a4a, -0x06365fcb, -0x3b2049cc, 0x6fb73e54, 0x1d2bc140, 0x4005419b },
+        { 0x22943dff, -0x2d39fb4a, 0x44cfb3a0, -0x43734132, -0x687f7988, 0x5d254ff3, 0x3b1ca6bf, 0x0fa3614f },
+        { -0x46417d10, -0x5ffc0143, 0x3a44ac90, 0x2089c1af, 0x1954fa8e, -0x07b6606f, -0x10bf54be, 0x1fba218a }
+    },
+    {
+        { 0x3e7b0194, 0x4f3e5704, 0x08daaf7f, -0x57e2c112, -0x6623210f, -0x37c63955, -0x00889e2b, 0x6c535d13 },
+        { -0x05370ac2, -0x54ab6bb8, 0x7ba63741, -0x7e091766, 0x6c2b5e01, 0x74fd6c7d, -0x573791be, 0x392e3aca },
+        { 0x3e8a35af, 0x4cbd34e9, 0x5887e816, 0x2e078144, -0x0d654f55, 0x19319c76, -0x2af53ec5, 0x25e17fe4 }
+    },
+    {
+        { 0x76f121a7, -0x6ea0800b, 0x2fcd87e3, -0x3cb5cdd9, 0x4d1be526, -0x3345d022, -0x76967665, 0x6bba828f },
+        { 0x1e04f676, 0x0a289bd7, -0x29bdf06b, 0x208e1c52, 0x34691fab, 0x5186d8b0, 0x2a9fb351, 0x25575144 },
+        { -0x6f01c6ff, -0x1d2e439a, -0x5f66852b, 0x4cb54a18, -0x507b9f2c, -0x68e296ec, 0x7f6b7be4, 0x559d504f }
+    },
+    {
+        { -0x092d9903, -0x63b76e19, 0x0307781b, 0x0744a19b, 0x6061e23b, -0x77c770e3, 0x354bd50e, 0x123ea6a3 },
+        { -0x4c14ab2b, -0x588c7c88, -0x5aaac384, 0x1d69d366, -0x06d7ff46, 0x0a26cf62, -0x7f81cde9, 0x01ab12d5 },
+        { 0x41e32d96, 0x118d1890, -0x27cea7b8, -0x46121c3e, -0x27cdba27, 0x1eab4271, -0x36e75eac, 0x4a3961e2 }
+    },
+    {
+        { -0x0cdcc0e2, 0x0327d644, 0x34fcf016, 0x499a260e, -0x0d254687, -0x7c4a58ea, -0x642beee1, 0x68aceead },
+        { -0x07194460, 0x71dc3be0, 0x7effe30a, -0x293107cc, -0x1ec5b896, -0x566dbda1, -0x04e2489d, 0x2cd6bce3 },
+        { -0x0c283df0, 0x38b4c90e, -0x4852fbf4, 0x308e6e24, -0x4818c1dd, 0x3860d9f1, -0x4af70a69, 0x595760d5 }
+    },
+    {
+        { -0x02fdd870, -0x77d53415, -0x3beea8a0, -0x7650ccfb, 0x7d3473f4, 0x65f492e3, 0x54515a2b, 0x2cb2c5df },
+        { 0x04aa6397, 0x6129bfe1, -0x5b580335, -0x7069fff8, 0x7d909458, 0x3f8bc089, -0x234d6e57, 0x709fa43e },
+        { 0x63fd2aca, -0x14f5a274, 0x2e694eff, -0x2dd43e9a, -0x07344fc6, 0x2723f36e, -0x0f37ece1, 0x70f029ec }
+    },
+    {
+        { 0x5e10b0b9, 0x2a6aafaa, -0x10fbe557, 0x78f0a370, -0x55c529e1, 0x773efb77, -0x58b4261f, 0x44eca5a2 },
+        { 0x2eed3e33, 0x461307b3, -0x5baa7e19, -0x51fbd0cd, 0x195f0366, -0x36bbb62d, 0x6c314858, 0x0b7d5d8a },
+        { 0x7b95d543, 0x25d44832, -0x5ccbf0e3, 0x70d38300, 0x60e1c52b, -0x21e3ace4, 0x2c7de9e4, 0x27222451 }
+    },
+    {
+        { 0x42a975fc, -0x40844476, -0x69525ca8, -0x73a3c689, -0x321255b8, -0x1d803891, -0x0943df5a, 0x19735fd7 },
+        { 0x49c5342e, 0x1abc92af, -0x4d190530, -0x001127ef, -0x0337b1d7, -0x105d7373, -0x5bb33abd, 0x11b5df18 },
+        { 0x42c84266, -0x1c546f30, 0x7f19547e, -0x147b71f1, 0x65a497b9, 0x2503a1d0, -0x6e2076a1, 0x0fef9111 }
+    },
+},
+{
+    {
+        { 0x5b1c16b7, 0x6ab5dcb8, 0x3c7b27a5, -0x6b3f0318, 0x735517be, -0x5b4ee3e6, -0x45f15056, 0x499238d0 },
+        { -0x54e39147, -0x4eaf835f, 0x16b687b3, -0x42bb70c2, 0x2c7a91ab, 0x3455fb7f, 0x2f2adec1, 0x7579229e },
+        { 0x7aba8b57, -0x130b91ae, -0x742e9b85, 0x15a08c47, 0x5f706fef, 0x7af1c6a6, -0x0fc5cf2b, 0x6345fa78 }
+    },
+    {
+        { -0x42270f5c, -0x6c2c3417, -0x02e88cfe, -0x24ead3e5, 0x7f17a875, 0x7dbddc6d, -0x70bd9102, 0x3e1a71cc },
+        { 0x1015e7a1, -0x20fd06a1, -0x564bfd9d, 0x790ec41d, 0x33ea1107, 0x4d3a0ea1, -0x1cc50737, 0x54f70be7 },
+        { -0x6f45429e, -0x37c35c1d, 0x0291c833, -0x7f121c99, -0x2c86ff3c, -0x377fc734, 0x1ec31fa1, 0x2c5fc023 }
+    },
+    {
+        { 0x02456e65, -0x3bdd1b2f, -0x352b846f, -0x78beb53f, -0x5d490023, 0x1592e2bb, -0x0a3deff1, 0x75d9d2bf },
+        { 0x17038b4f, -0x01456ee9, -0x3621107f, -0x1aedc8df, 0x5d0d8834, 0x1c97e4e7, 0x23dc3bc6, 0x68afae7a },
+        { 0x3626e81c, 0x5bd9b476, -0x435fd123, -0x766996ca, 0x61f077b3, 0x0a41193d, 0x00ce5471, 0x3097a242 }
+    },
+    {
+        { 0x6695c486, -0x5e9d18dc, 0x35a89607, 0x131d6334, -0x5f2ed5c9, 0x30521561, -0x59504c9d, 0x56704bad },
+        { -0x380747b4, 0x57427734, 0x01b270e9, -0x0ebe5ec2, -0x4b1a9b5a, 0x02d1adfe, -0x317c42b8, 0x4bb23d92 },
+        { 0x52f912b9, -0x5093b559, -0x27988f38, 0x5e665f6c, -0x5c3732a8, 0x4c35ac83, 0x10a58a7e, 0x2b7a29c0 }
+    },
+    {
+        { -0x40fff792, 0x33810a23, -0x18c90084, -0x50316da2, -0x1db6dd2c, 0x3d60e670, 0x4f96061b, 0x11ce9e71 },
+        { -0x2f3e313d, -0x3bff8089, -0x453b6d08, -0x72efdf4a, 0x7e69daaf, 0x32ec29d5, -0x626a0320, 0x59940875 },
+        { -0x27ea453f, 0x219ef713, 0x485be25c, -0x0ebeb9a3, 0x4e513c51, 0x6d5447cc, 0x5ef44393, 0x174926be }
+    },
+    {
+        { -0x6c15fdd2, 0x3ef5d415, 0x0ed0eed6, 0x5cbcc1a2, 0x07382c8c, -0x702db131, 0x06d8e1ad, 0x6fa42ead },
+        { -0x03a42a45, -0x4a214d07, -0x1e27ef1f, -0x6d2558d6, -0x48d5e3a7, -0x503b3024, 0x3fc22a24, 0x497d7881 },
+        { 0x1f73371f, -0x1d897db6, 0x4f5b6736, 0x7f7cf01c, 0x04fa46e7, 0x7e201fe3, 0x57808c96, 0x785a36a3 }
+    },
+    {
+        { 0x5d517bc3, 0x07044298, -0x519ac988, 0x6acd56c7, -0x67a5889d, 0x00a27983, -0x1aed99d5, 0x5167effa },
+        { 0x63014d2b, -0x7da04203, 0x6ca7578b, -0x37adc964, 0x5c0b5df0, 0x5b2fcd28, 0x58048c8f, 0x12ab214c },
+        { 0x0f53c4b6, -0x42b1561f, -0x7536e5ec, 0x1673dc5f, 0x2acc1aba, -0x5707e5b2, 0x24332a25, 0x33a92a79 }
+    },
+    {
+        { 0x218f2ada, 0x7ba95ba0, 0x330fb9ca, -0x300bdd79, 0x56c6d907, -0x2525b693, -0x0b4111ac, 0x5380c296 },
+        { 0x27996c02, -0x622e0b67, -0x1fb2e8ae, 0x0cb3b058, 0x7fd02c3e, 0x1f7e8896, -0x3474c14f, 0x2f964268 },
+        { 0x66898d0a, -0x62b0d8fc, 0x0aff3f7a, 0x3d098799, 0x67daba45, -0x2f610c9e, 0x7b1c669c, 0x7761455e }
     },
 },
 };
@@ -6368,7 +9095,7 @@
 
 static void ge_select(ge_precomp *t,int pos,signed char b)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
   ge_precomp minust;
   unsigned char bnegative = negative(b);
   unsigned char babs = b - (((-bnegative) & b) << 1);
@@ -6390,7 +9117,6 @@
 #endif
 }
 
-
 /*
 h = a * B
 where a = a[0]+256*a[1]+...+256^31 a[31]
@@ -6404,7 +9130,7 @@
   signed char e[64];
   signed char carry;
   ge_p1p1 r;
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
   ge_p2 s;
 #endif
   ge_precomp t;
@@ -6427,7 +9153,7 @@
   e[63] += carry;
   /* each e[i] is between -8 and 8 */
 
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
   ge_select(&t,0,e[1]);
   fe_sub(h->X, t.yplusx, t.yminusx);
   fe_add(h->Y, t.yplusx, t.yminusx);
@@ -6498,47 +9224,90 @@
     }
 }
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM_64BIT
 static const ge_precomp Bi[8] = {
     {
-        { 0x2fbc93c6f58c3b85, 0xcf932dc6fb8c0e19, 0x270b4898643d42c2, 0x7cf9d3a33d4ba65,  },
-        { 0x9d103905d740913e, 0xfd399f05d140beb3, 0xa5c18434688f8a09, 0x44fd2f9298f81267,  },
-        { 0xabc91205877aaa68, 0x26d9e823ccaac49e, 0x5a1b7dcbdd43598c, 0x6f117b689f0c65a8,  },
-    },
-    {
-        { 0xaf25b0a84cee9730, 0x25a8430e8864b8a, 0xc11b50029f016732, 0x7a164e1b9a80f8f4,  },
-        { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, 0x8131f31a214bd6bd, 0x2ab91587555bda62,  },
-        { 0x14ae933f0dd0d889, 0x589423221c35da62, 0xd170e5458cf2db4c, 0x5a2826af12b9b4c6,  },
-    },
-    {
-        { 0xa212bc4408a5bb33, 0x8d5048c3c75eed02, 0xdd1beb0c5abfec44, 0x2945ccf146e206eb,  },
-        { 0x7f9182c3a447d6ba, 0xd50014d14b2729b7, 0xe33cf11cb864a087, 0x154a7e73eb1b55f3,  },
-        { 0xbcbbdbf1812a8285, 0x270e0807d0bdd1fc, 0xb41b670b1bbda72d, 0x43aabe696b3bb69a,  },
+        { 0x2fbc93c6f58c3b85, -0x306cd2390473f1e7, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65,  },
+        { -0x62efc6fa28bf6ec2, -0x02c660fa2ebf414d, -0x5a3e7bcb977075f7, 0x44fd2f9298f81267,  },
+        { -0x5436edfa78855598, 0x26d9e823ccaac49e, 0x5a1b7dcbdd43598c, 0x6f117b689f0c65a8,  },
+    },
+    {
+        { -0x50da4f57b31168d0, 0x025a8430e8864b8a, -0x3ee4affd60fe98ce, 0x7a164e1b9a80f8f4,  },
+        { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, -0x7ece0ce5deb42943, 0x2ab91587555bda62,  },
+        { 0x14ae933f0dd0d889, 0x589423221c35da62, -0x2e8f1aba730d24b4, 0x5a2826af12b9b4c6,  },
+    },
+    {
+        { -0x5ded43bbf75a44cd, -0x72afb73c38a112fe, -0x22e414f3a54013bc, 0x2945ccf146e206eb,  },
+        { 0x7f9182c3a447d6ba, -0x2affeb2eb4d8d649, -0x1cc30ee3479b5f79, 0x154a7e73eb1b55f3,  },
+        { -0x4344240e7ed57d7b, 0x270e0807d0bdd1fc, -0x4be498f4e44258d3, 0x43aabe696b3bb69a,  },
     },
     {
         { 0x6b1a5cd0944ea3bf, 0x7470353ab39dc0d2, 0x71b2528228542e49, 0x461bea69283c927e,  },
-        { 0xba6f2c9aaa3221b1, 0x6ca021533bba23a7, 0x9dea764f92192c3a, 0x1d6edd5d2e5317e0,  },
-        { 0xf1836dc801b8b3a2, 0xb3035f47053ea49a, 0x529c41ba5877adf3, 0x7a9fbb1c6a0f90a7,  },
-    },
-    {
-        { 0x9b2e678aa6a8632f, 0xa6509e6f51bc46c5, 0xceb233c9c686f5b5, 0x34b9ed338add7f59,  },
-        { 0xf36e217e039d8064, 0x98a081b6f520419b, 0x96cbc608e75eb044, 0x49c05a51fadc9c8f,  },
-        { 0x6b4e8bf9045af1b, 0xe2ff83e8a719d22f, 0xaaf6fc2993d4cf16, 0x73c172021b008b06,  },
-    },
-    {
-        { 0x2fbf00848a802ade, 0xe5d9fecf02302e27, 0x113e847117703406, 0x4275aae2546d8faf,  },
-        { 0x315f5b0249864348, 0x3ed6b36977088381, 0xa3a075556a8deb95, 0x18ab598029d5c77f,  },
-        { 0xd82b2cc5fd6089e9, 0x31eb4a13282e4a4, 0x44311199b51a8622, 0x3dc65522b53df948,  },
-    },
-    {
-        { 0xbf70c222a2007f6d, 0xbf84b39ab5bcdedb, 0x537a0e12fb07ba07, 0x234fd7eec346f241,  },
-        { 0x506f013b327fbf93, 0xaefcebc99b776f6b, 0x9d12b232aaad5968, 0x267882d176024a7,  },
-        { 0x5360a119732ea378, 0x2437e6b1df8dd471, 0xa2ef37f891a7e533, 0x497ba6fdaa097863,  },
-    },
-    {
-        { 0x24cecc0313cfeaa0, 0x8648c28d189c246d, 0x2dbdbdfac1f2d4d0, 0x61e22917f12de72b,  },
-        { 0x40bcd86468ccf0b, 0xd3829ba42a9910d6, 0x7508300807b25192, 0x43b5cd4218d05ebf,  },
-        { 0x5d9a762f9bd0b516, 0xeb38af4e373fdeee, 0x32e5a7d93d64270, 0x511d61210ae4d842,  },
+        { -0x4590d36555cdde4f, 0x6ca021533bba23a7, -0x621589b06de6d3c6, 0x1d6edd5d2e5317e0,  },
+        { -0x0e7c9237fe474c5e, -0x4cfca0b8fac15b66, 0x529c41ba5877adf3, 0x7a9fbb1c6a0f90a7,  },
+    },
+    {
+        { -0x64d1987559579cd1, -0x59af6190ae43b93b, -0x314dcc3639790a4b, 0x34b9ed338add7f59,  },
+        { -0x0c91de81fc627f9c, -0x675f7e490adfbe65, -0x693439f718a14fbc, 0x49c05a51fadc9c8f,  },
+        { 0x06b4e8bf9045af1b, -0x1d007c1758e62dd1, -0x550903d66c2b30ea, 0x73c172021b008b06,  },
+    },
+    {
+        { 0x2fbf00848a802ade, -0x1a260130fdcfd1d9, 0x113e847117703406, 0x4275aae2546d8faf,  },
+        { 0x315f5b0249864348, 0x3ed6b36977088381, -0x5c5f8aaa9572146b, 0x18ab598029d5c77f,  },
+        { -0x27d4d33a029f7617, 0x031eb4a13282e4a4, 0x44311199b51a8622, 0x3dc65522b53df948,  },
+    },
+    {
+        { -0x408f3ddd5dff8093, -0x407b4c654a432125, 0x537a0e12fb07ba07, 0x234fd7eec346f241,  },
+        { 0x506f013b327fbf93, -0x5103143664889095, -0x62ed4dcd5552a698, 0x0267882d176024a7,  },
+        { 0x5360a119732ea378, 0x2437e6b1df8dd471, -0x5d10c8076e581acd, 0x497ba6fdaa097863,  },
+    },
+    {
+        { 0x24cecc0313cfeaa0, -0x79b73d72e763db93, 0x2dbdbdfac1f2d4d0, 0x61e22917f12de72b,  },
+        { 0x040bcd86468ccf0b, -0x2c7d645bd566ef2a, 0x7508300807b25192, 0x43b5cd4218d05ebf,  },
+        { 0x5d9a762f9bd0b516, -0x14c750b1c8c02112, 0x032e5a7d93d64270, 0x511d61210ae4d842,  },
+    },
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge_precomp Bi[8] = {
+    {
+        { -0x0a73c47b, 0x2fbc93c6, -0x0473f1e7, -0x306cd23a, 0x643d42c2, 0x270b4898, 0x33d4ba65, 0x07cf9d3a,  },
+        { -0x28bf6ec2, -0x62efc6fb, -0x2ebf414d, -0x02c660fb, 0x688f8a09, -0x5a3e7bcc, -0x6707ed99, 0x44fd2f92,  },
+        { -0x78855598, -0x5436edfb, -0x33553b62, 0x26d9e823, -0x22bca674, 0x5a1b7dcb, -0x60f39a58, 0x6f117b68,  },
+    },
+    {
+        { 0x4cee9730, -0x50da4f58, -0x1779b476, 0x025a8430, -0x60fe98ce, -0x3ee4affe, -0x657f070c, 0x7a164e1b,  },
+        { -0x5b032d9b, 0x56611fe8, -0x1a3e4583, 0x3bd353fd, 0x214bd6bd, -0x7ece0ce6, 0x555bda62, 0x2ab91587,  },
+        { 0x0dd0d889, 0x14ae933f, 0x1c35da62, 0x58942322, -0x730d24b4, -0x2e8f1abb, 0x12b9b4c6, 0x5a2826af,  },
+    },
+    {
+        { 0x08a5bb33, -0x5ded43bc, -0x38a112fe, -0x72afb73d, 0x5abfec44, -0x22e414f4, 0x46e206eb, 0x2945ccf1,  },
+        { -0x5bb82946, 0x7f9182c3, 0x4b2729b7, -0x2affeb2f, -0x479b5f79, -0x1cc30ee4, -0x14e4aa0d, 0x154a7e73,  },
+        { -0x7ed57d7b, -0x4344240f, -0x2f422e04, 0x270e0807, 0x1bbda72d, -0x4be498f5, 0x6b3bb69a, 0x43aabe69,  },
+    },
+    {
+        { -0x6bb15c41, 0x6b1a5cd0, -0x4c623f2e, 0x7470353a, 0x28542e49, 0x71b25282, 0x283c927e, 0x461bea69,  },
+        { -0x55cdde4f, -0x4590d366, 0x3bba23a7, 0x6ca02153, -0x6de6d3c6, -0x621589b1, 0x2e5317e0, 0x1d6edd5d,  },
+        { 0x01b8b3a2, -0x0e7c9238, 0x053ea49a, -0x4cfca0b9, 0x5877adf3, 0x529c41ba, 0x6a0f90a7, 0x7a9fbb1c,  },
+    },
+    {
+        { -0x59579cd1, -0x64d19876, 0x51bc46c5, -0x59af6191, -0x39790a4b, -0x314dcc37, -0x752280a7, 0x34b9ed33,  },
+        { 0x039d8064, -0x0c91de82, -0x0adfbe65, -0x675f7e4a, -0x18a14fbc, -0x693439f8, -0x05236371, 0x49c05a51,  },
+        { -0x6fba50e5, 0x06b4e8bf, -0x58e62dd1, -0x1d007c18, -0x6c2b30ea, -0x550903d7, 0x1b008b06, 0x73c17202,  },
+    },
+    {
+        { -0x757fd522, 0x2fbf0084, 0x02302e27, -0x1a260131, 0x17703406, 0x113e8471, 0x546d8faf, 0x4275aae2,  },
+        { 0x49864348, 0x315f5b02, 0x77088381, 0x3ed6b369, 0x6a8deb95, -0x5c5f8aab, 0x29d5c77f, 0x18ab5980,  },
+        { -0x029f7617, -0x27d4d33b, 0x3282e4a4, 0x031eb4a1, -0x4ae579de, 0x44311199, -0x4ac206b8, 0x3dc65522,  },
+    },
+    {
+        { -0x5dff8093, -0x408f3dde, -0x4a432125, -0x407b4c66, -0x04f845f9, 0x537a0e12, -0x3cb90dbf, 0x234fd7ee,  },
+        { 0x327fbf93, 0x506f013b, -0x64889095, -0x51031437, -0x5552a698, -0x62ed4dce, 0x176024a7, 0x0267882d,  },
+        { 0x732ea378, 0x5360a119, -0x20722b8f, 0x2437e6b1, -0x6e581acd, -0x5d10c808, -0x55f6879d, 0x497ba6fd,  },
+    },
+    {
+        { 0x13cfeaa0, 0x24cecc03, 0x189c246d, -0x79b73d73, -0x3e0d2b30, 0x2dbdbdfa, -0x0ed218d5, 0x61e22917,  },
+        { 0x468ccf0b, 0x040bcd86, 0x2a9910d6, -0x2c7d645c, 0x07b25192, 0x75083008, 0x18d05ebf, 0x43b5cd42,  },
+        { -0x642f4aea, 0x5d9a762f, 0x373fdeee, -0x14c750b2, -0x6c29bd90, 0x032e5a7d, 0x0ae4d842, 0x511d6121,  },
     },
 };
 #elif defined(CURVED25519_128BIT)
@@ -6691,10 +9460,14 @@
   return 0;
 }
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM_64BIT
 static const ge d = {
-    0x75eb4dca135978a3, 0x700a4d4141d8ab, 0x8cc740797779e898, 0x52036cee2b6ffe73,
-    };
+    0x75eb4dca135978a3, 0x00700a4d4141d8ab, -0x7338bf8688861768, 0x52036cee2b6ffe73,
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge d = {
+    0x135978a3, 0x75eb4dca, 0x4141d8ab, 0x00700a4d, 0x7779e898, -0x7338bf87, 0x2b6ffe73, 0x52036cee,
+};
 #elif defined(CURVED25519_128BIT)
 static const ge d = {
     0x34dca135978a3, 0x1a8283b156ebd, 0x5e7a26001c029, 0x739c663a03cbb,
@@ -6704,14 +9477,18 @@
 static const ge d = {
 -10913610,13857413,-15372611,6949391,114729,
 -8787816,-6275908,-3247719,-18696448,-12055116
-} ;
+};
 #endif
 
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM_64BIT
 static const ge sqrtm1 = {
-    0xc4ee1b274a0ea0b0, 0x2f431806ad2fe478, 0x2b4d00993dfbd7a7, 0x2b8324804fc1df0b,
-    };
+    -0x3b11e4d8b5f15f50, 0x2f431806ad2fe478, 0x2b4d00993dfbd7a7, 0x2b8324804fc1df0b,
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge sqrtm1 = {
+    0x4a0ea0b0, -0x3b11e4d9, -0x52d01b88, 0x2f431806, 0x3dfbd7a7, 0x2b4d0099, 0x4fc1df0b, 0x2b832480,
+};
 #elif defined(CURVED25519_128BIT)
 static const ge sqrtm1 = {
     0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, 0x78595a6804c9e,
@@ -6721,7 +9498,7 @@
 static const ge sqrtm1 = {
 -32595792,-7943725,9377950,3500415,12389472,
 -272473,-25146209,-2005654,326686,11406482
-} ;
+};
 #endif
 
 
@@ -6775,7 +9552,7 @@
 
 static WC_INLINE void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
     ge t0;
     fe_add(r->X,p->Y,p->X);
     fe_sub(r->Y,p->Y,p->X);
@@ -6802,7 +9579,7 @@
 
 static WC_INLINE void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
     ge t0;
     fe_add(r->X,p->Y,p->X);
     fe_sub(r->Y,p->Y,p->X);
@@ -6828,7 +9605,7 @@
 
 static void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
   fe_mul(r->X,p->X,p->T);
   fe_mul(r->Y,p->Y,p->Z);
   fe_mul(r->Z,p->Z,p->T);
@@ -6846,7 +9623,7 @@
 
 static WC_INLINE void ge_p1p1_to_p3(ge_p3 *r,const ge_p1p1 *p)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
   fe_mul(r->X,p->X,p->T);
   fe_mul(r->Y,p->Y,p->Z);
   fe_mul(r->Z,p->Z,p->T);
@@ -6875,7 +9652,7 @@
 
 static WC_INLINE void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
     ge t0;
     fe_sq(r->X,p->X);
     fe_sq(r->Z,p->Y);
@@ -6912,10 +9689,14 @@
 r = p
 */
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM_64BIT
 static const ge d2 = {
-    0xebd69b9426b2f159, 0xe0149a8283b156, 0x198e80f2eef3d130, 0x2406d9dc56dffce7,
-    };
+    -0x1429646bd94d0ea7, 0x00e0149a8283b156, 0x198e80f2eef3d130, 0x2406d9dc56dffce7,
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge d2 = {
+    0x26b2f159, -0x1429646c, -0x7d7c4eaa, 0x00e0149a, -0x110c2ed0, 0x198e80f2, 0x56dffce7, 0x2406d9dc,
+};
 #elif defined(CURVED25519_128BIT)
 static const ge d2 = {
     0x69b9426b2f159, 0x35050762add7a, 0x3cf44c0038052, 0x6738cc7407977,
@@ -6966,7 +9747,7 @@
 }
 
 
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
 /* ge_precomp_0 */
 static void ge_precomp_0(ge_precomp *h)
 {
@@ -6984,7 +9765,7 @@
 
 static WC_INLINE void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
 {
-#ifndef CURVED25519_X64
+#ifndef CURVED25519_ASM
     ge t0;
     fe_add(r->X,p->Y,p->X);
     fe_sub(r->Y,p->Y,p->X);
--- a/wolfcrypt/src/hash.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/hash.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hash.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -32,34 +32,33 @@
 #endif
 
 #include <wolfssl/wolfcrypt/hash.h>
-
+#include <wolfssl/wolfcrypt/hmac.h>
 
-#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC)
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
 
 #ifdef NO_ASN
 enum Hash_Sum  {
-    MD2h    = 646,
-    MD5h    = 649,
-    SHAh    =  88,
-    SHA224h = 417,
-    SHA256h = 414,
-    SHA384h = 415,
-    SHA512h = 416
+    MD2h      = 646,
+    MD5h      = 649,
+    SHAh      =  88,
+    SHA224h   = 417,
+    SHA256h   = 414,
+    SHA384h   = 415,
+    SHA512h   = 416,
+    SHA3_224h = 420,
+    SHA3_256h = 421,
+    SHA3_384h = 422,
+    SHA3_512h = 423
 };
 #endif /* !NO_ASN */
 
-#ifdef HAVE_SELFTEST
-enum {
-    /* CAVP selftest includes these in hmac.h instead of sha3.h,
-       copied here for that build */
-    WC_SHA3_224_BLOCK_SIZE = 144,
-    WC_SHA3_256_BLOCK_SIZE = 136,
-    WC_SHA3_384_BLOCK_SIZE = 104,
-    WC_SHA3_512_BLOCK_SIZE = 72,
-};
-#endif
-
-
+#if !defined(NO_PWDBASED) || !defined(NO_ASN)
 /* function converts int hash type to enum */
 enum wc_HashType wc_HashTypeConvert(int hashType)
 {
@@ -102,6 +101,20 @@
             eHashType = WC_HASH_TYPE_SHA512;
             break;
     #endif /* WOLFSSL_SHA512 */
+    #ifdef WOLFSSL_SHA3
+        case WC_SHA3_224:
+            eHashType = WC_HASH_TYPE_SHA3_224;
+            break;
+        case WC_SHA3_256:
+            eHashType = WC_HASH_TYPE_SHA3_256;
+            break;
+        case WC_SHA3_384:
+            eHashType = WC_HASH_TYPE_SHA3_384;
+            break;
+        case WC_SHA3_512:
+            eHashType = WC_HASH_TYPE_SHA3_512;
+            break;
+    #endif /* WOLFSSL_SHA3 */
         default:
             eHashType = WC_HASH_TYPE_NONE;
             break;
@@ -114,7 +127,9 @@
 #endif
     return eHashType;
 }
+#endif /* !NO_PWDBASED || !NO_ASN */
 
+#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC)
 
 int wc_HashGetOID(enum wc_HashType hash_type)
 {
@@ -157,14 +172,31 @@
             oid = SHA512h;
         #endif
             break;
+        case WC_HASH_TYPE_SHA3_224:
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+            oid = SHA3_224h;
+        #endif
+            break;
+        case WC_HASH_TYPE_SHA3_256:
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+            oid = SHA3_256h;
+        #endif
+            break;
+        case WC_HASH_TYPE_SHA3_384:
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+            oid = SHA3_384h;
+        #endif
+            break;
+        case WC_HASH_TYPE_SHA3_512:
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+            oid = SHA3_512h;
+        #endif
+            break;
 
         /* Not Supported */
         case WC_HASH_TYPE_MD4:
-        case WC_HASH_TYPE_SHA3_224:
-        case WC_HASH_TYPE_SHA3_256:
-        case WC_HASH_TYPE_SHA3_384:
-        case WC_HASH_TYPE_SHA3_512:
         case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
         case WC_HASH_TYPE_NONE:
         default:
             oid = BAD_FUNC_ARG;
@@ -178,11 +210,11 @@
     enum wc_HashType hash_type = WC_HASH_TYPE_NONE;
     switch (oid)
     {
+    #ifdef WOLFSSL_MD2
         case MD2h:
-        #ifdef WOLFSSL_MD2
             hash_type = WC_HASH_TYPE_MD2;
-        #endif
             break;
+    #endif
         case MD5h:
         #ifndef NO_MD5
             hash_type = WC_HASH_TYPE_MD5;
@@ -213,6 +245,20 @@
             hash_type = WC_HASH_TYPE_SHA512;
         #endif
             break;
+    #ifdef WOLFSSL_SHA3
+        case SHA3_224h:
+            hash_type = WC_HASH_TYPE_SHA3_224;
+            break;
+        case SHA3_256h:
+            hash_type = WC_HASH_TYPE_SHA3_256;
+            break;
+        case SHA3_384h:
+            hash_type = WC_HASH_TYPE_SHA3_384;
+            break;
+        case SHA3_512h:
+            hash_type = WC_HASH_TYPE_SHA3_512;
+            break;
+    #endif /* WOLFSSL_SHA3 */
         default:
             break;
     }
@@ -220,7 +266,7 @@
 }
 #endif /* !NO_ASN || !NO_DH || HAVE_ECC */
 
-
+#ifndef NO_HASH_WRAPPER
 
 /* Get Hash digest size */
 int wc_HashGetDigestSize(enum wc_HashType hash_type)
@@ -275,28 +321,33 @@
             break;
 
         case WC_HASH_TYPE_SHA3_224:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
             dig_size = WC_SHA3_224_DIGEST_SIZE;
         #endif
             break;
         case WC_HASH_TYPE_SHA3_256:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
             dig_size = WC_SHA3_256_DIGEST_SIZE;
         #endif
             break;
         case WC_HASH_TYPE_SHA3_384:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
             dig_size = WC_SHA3_384_DIGEST_SIZE;
         #endif
             break;
         case WC_HASH_TYPE_SHA3_512:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
             dig_size = WC_SHA3_512_DIGEST_SIZE;
         #endif
             break;
+        case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
+        #if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+            dig_size = BLAKE2S_OUTBYTES;
+        #endif
+            break;
 
         /* Not Supported */
-        case WC_HASH_TYPE_BLAKE2B:
         case WC_HASH_TYPE_NONE:
         default:
             dig_size = BAD_FUNC_ARG;
@@ -359,28 +410,33 @@
             break;
 
         case WC_HASH_TYPE_SHA3_224:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
             block_size = WC_SHA3_224_BLOCK_SIZE;
         #endif
             break;
         case WC_HASH_TYPE_SHA3_256:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
             block_size = WC_SHA3_256_BLOCK_SIZE;
         #endif
             break;
         case WC_HASH_TYPE_SHA3_384:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
             block_size = WC_SHA3_384_BLOCK_SIZE;
         #endif
             break;
         case WC_HASH_TYPE_SHA3_512:
-        #ifdef WOLFSSL_SHA3
+        #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
             block_size = WC_SHA3_512_BLOCK_SIZE;
         #endif
             break;
+        case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
+        #if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+            block_size = BLAKE2S_BLOCKBYTES;
+        #endif
+            break;
 
         /* Not Supported */
-        case WC_HASH_TYPE_BLAKE2B:
         case WC_HASH_TYPE_NONE:
         default:
             block_size = BAD_FUNC_ARG;
@@ -449,14 +505,32 @@
 #endif
             break;
 
+        case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+            ret = wc_Sha3_224Hash(data, data_len, hash);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+            ret = wc_Sha3_256Hash(data, data_len, hash);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+            ret = wc_Sha3_384Hash(data, data_len, hash);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+            ret = wc_Sha3_512Hash(data, data_len, hash);
+#endif
+            break;
+
         /* Not Supported */
         case WC_HASH_TYPE_MD2:
         case WC_HASH_TYPE_MD4:
-        case WC_HASH_TYPE_SHA3_224:
-        case WC_HASH_TYPE_SHA3_256:
-        case WC_HASH_TYPE_SHA3_384:
-        case WC_HASH_TYPE_SHA3_512:
         case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
         case WC_HASH_TYPE_NONE:
         default:
             ret = BAD_FUNC_ARG;
@@ -465,7 +539,8 @@
     return ret;
 }
 
-int wc_HashInit(wc_HashAlg* hash, enum wc_HashType type)
+int wc_HashInit_ex(wc_HashAlg* hash, enum wc_HashType type, void* heap,
+    int devId)
 {
     int ret = HASH_TYPE_E; /* Default to hash type error */
 
@@ -475,33 +550,53 @@
     switch (type) {
         case WC_HASH_TYPE_MD5:
 #ifndef NO_MD5
-            wc_InitMd5(&hash->md5);
-            ret = 0;
+            ret = wc_InitMd5_ex(&hash->md5, heap, devId);
 #endif
             break;
         case WC_HASH_TYPE_SHA:
 #ifndef NO_SHA
-            ret = wc_InitSha(&hash->sha);
+            ret = wc_InitSha_ex(&hash->sha, heap, devId);
 #endif
             break;
         case WC_HASH_TYPE_SHA224:
 #ifdef WOLFSSL_SHA224
-            ret = wc_InitSha224(&hash->sha224);
+            ret = wc_InitSha224_ex(&hash->sha224, heap, devId);
 #endif
             break;
         case WC_HASH_TYPE_SHA256:
 #ifndef NO_SHA256
-            ret = wc_InitSha256(&hash->sha256);
+            ret = wc_InitSha256_ex(&hash->sha256, heap, devId);
 #endif
             break;
         case WC_HASH_TYPE_SHA384:
 #ifdef WOLFSSL_SHA384
-            ret = wc_InitSha384(&hash->sha384);
+            ret = wc_InitSha384_ex(&hash->sha384, heap, devId);
 #endif
             break;
         case WC_HASH_TYPE_SHA512:
 #ifdef WOLFSSL_SHA512
-            ret = wc_InitSha512(&hash->sha512);
+            ret = wc_InitSha512_ex(&hash->sha512, heap, devId);
+#endif
+            break;
+
+        case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+            ret = wc_InitSha3_224(&hash->sha3, heap, devId);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+            ret = wc_InitSha3_256(&hash->sha3, heap, devId);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+            ret = wc_InitSha3_384(&hash->sha3, heap, devId);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+            ret = wc_InitSha3_512(&hash->sha3, heap, devId);
 #endif
             break;
 
@@ -509,11 +604,8 @@
         case WC_HASH_TYPE_MD5_SHA:
         case WC_HASH_TYPE_MD2:
         case WC_HASH_TYPE_MD4:
-        case WC_HASH_TYPE_SHA3_224:
-        case WC_HASH_TYPE_SHA3_256:
-        case WC_HASH_TYPE_SHA3_384:
-        case WC_HASH_TYPE_SHA3_512:
         case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
         case WC_HASH_TYPE_NONE:
         default:
             ret = BAD_FUNC_ARG;
@@ -522,6 +614,11 @@
     return ret;
 }
 
+int wc_HashInit(wc_HashAlg* hash, enum wc_HashType type)
+{
+    return wc_HashInit_ex(hash, type, NULL, INVALID_DEVID);
+}
+
 int wc_HashUpdate(wc_HashAlg* hash, enum wc_HashType type, const byte* data,
                   word32 dataSz)
 {
@@ -533,15 +630,12 @@
     switch (type) {
         case WC_HASH_TYPE_MD5:
 #ifndef NO_MD5
-            wc_Md5Update(&hash->md5, data, dataSz);
-            ret = 0;
+            ret = wc_Md5Update(&hash->md5, data, dataSz);
 #endif
             break;
         case WC_HASH_TYPE_SHA:
 #ifndef NO_SHA
             ret = wc_ShaUpdate(&hash->sha, data, dataSz);
-            if (ret != 0)
-                return ret;
 #endif
             break;
         case WC_HASH_TYPE_SHA224:
@@ -565,15 +659,33 @@
 #endif
             break;
 
+        case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+            ret = wc_Sha3_224_Update(&hash->sha3, data, dataSz);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+            ret = wc_Sha3_256_Update(&hash->sha3, data, dataSz);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+            ret = wc_Sha3_384_Update(&hash->sha3, data, dataSz);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+            ret = wc_Sha3_512_Update(&hash->sha3, data, dataSz);
+#endif
+            break;
+
         /* not supported */
         case WC_HASH_TYPE_MD5_SHA:
         case WC_HASH_TYPE_MD2:
         case WC_HASH_TYPE_MD4:
-        case WC_HASH_TYPE_SHA3_224:
-        case WC_HASH_TYPE_SHA3_256:
-        case WC_HASH_TYPE_SHA3_384:
-        case WC_HASH_TYPE_SHA3_512:
         case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
         case WC_HASH_TYPE_NONE:
         default:
             ret = BAD_FUNC_ARG;
@@ -592,8 +704,7 @@
     switch (type) {
         case WC_HASH_TYPE_MD5:
 #ifndef NO_MD5
-            wc_Md5Final(&hash->md5, out);
-            ret = 0;
+            ret = wc_Md5Final(&hash->md5, out);
 #endif
             break;
         case WC_HASH_TYPE_SHA:
@@ -622,15 +733,117 @@
 #endif
             break;
 
+        case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+            ret = wc_Sha3_224_Final(&hash->sha3, out);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+            ret = wc_Sha3_256_Final(&hash->sha3, out);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+            ret = wc_Sha3_384_Final(&hash->sha3, out);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+            ret = wc_Sha3_512_Final(&hash->sha3, out);
+#endif
+            break;
+
         /* not supported */
         case WC_HASH_TYPE_MD5_SHA:
         case WC_HASH_TYPE_MD2:
         case WC_HASH_TYPE_MD4:
+        case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
+        case WC_HASH_TYPE_NONE:
+        default:
+            ret = BAD_FUNC_ARG;
+    };
+
+    return ret;
+}
+
+int wc_HashFree(wc_HashAlg* hash, enum wc_HashType type)
+{
+    int ret = HASH_TYPE_E; /* Default to hash type error */
+
+    if (hash == NULL)
+        return BAD_FUNC_ARG;
+
+    switch (type) {
+        case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+            wc_Md5Free(&hash->md5);
+            ret = 0;
+#endif
+            break;
+        case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+            wc_ShaFree(&hash->sha);
+            ret = 0;
+#endif
+            break;
+        case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+            wc_Sha224Free(&hash->sha224);
+            ret = 0;
+#endif
+            break;
+        case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+            wc_Sha256Free(&hash->sha256);
+            ret = 0;
+#endif
+            break;
+        case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+            wc_Sha384Free(&hash->sha384);
+            ret = 0;
+#endif
+            break;
+        case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+            wc_Sha512Free(&hash->sha512);
+            ret = 0;
+#endif
+            break;
+
         case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+            wc_Sha3_224_Free(&hash->sha3);
+            ret = 0;
+#endif
+            break;
         case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+            wc_Sha3_256_Free(&hash->sha3);
+            ret = 0;
+#endif
+            break;
         case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+            wc_Sha3_384_Free(&hash->sha3);
+            ret = 0;
+#endif
+            break;
         case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+            wc_Sha3_512_Free(&hash->sha3);
+            ret = 0;
+#endif
+            break;
+
+        /* not supported */
+        case WC_HASH_TYPE_MD5_SHA:
+        case WC_HASH_TYPE_MD2:
+        case WC_HASH_TYPE_MD4:
         case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
         case WC_HASH_TYPE_NONE:
         default:
             ret = BAD_FUNC_ARG;
@@ -639,6 +852,131 @@
     return ret;
 }
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_HashSetFlags(wc_HashAlg* hash, enum wc_HashType type, word32 flags)
+{
+    int ret = HASH_TYPE_E; /* Default to hash type error */
+
+    if (hash == NULL)
+        return BAD_FUNC_ARG;
+
+    switch (type) {
+        case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+            ret = wc_Md5SetFlags(&hash->md5, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+            ret = wc_ShaSetFlags(&hash->sha, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+            ret = wc_Sha224SetFlags(&hash->sha224, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+            ret = wc_Sha256SetFlags(&hash->sha256, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+            ret = wc_Sha384SetFlags(&hash->sha384, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+            ret = wc_Sha512SetFlags(&hash->sha512, flags);
+#endif
+            break;
+
+        case WC_HASH_TYPE_SHA3_224:
+        case WC_HASH_TYPE_SHA3_256:
+        case WC_HASH_TYPE_SHA3_384:
+        case WC_HASH_TYPE_SHA3_512:
+#ifdef WOLFSSL_SHA3
+            ret = wc_Sha3_SetFlags(&hash->sha3, flags);
+#endif
+            break;
+
+        /* not supported */
+        case WC_HASH_TYPE_MD5_SHA:
+        case WC_HASH_TYPE_MD2:
+        case WC_HASH_TYPE_MD4:
+        case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
+        case WC_HASH_TYPE_NONE:
+        default:
+            ret = BAD_FUNC_ARG;
+    };
+
+    return ret;
+}
+int wc_HashGetFlags(wc_HashAlg* hash, enum wc_HashType type, word32* flags)
+{
+    int ret = HASH_TYPE_E; /* Default to hash type error */
+
+    if (hash == NULL)
+        return BAD_FUNC_ARG;
+
+    switch (type) {
+        case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+            ret = wc_Md5GetFlags(&hash->md5, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+            ret = wc_ShaGetFlags(&hash->sha, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+            ret = wc_Sha224GetFlags(&hash->sha224, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+            ret = wc_Sha256GetFlags(&hash->sha256, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+            ret = wc_Sha384GetFlags(&hash->sha384, flags);
+#endif
+            break;
+        case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+            ret = wc_Sha512GetFlags(&hash->sha512, flags);
+#endif
+            break;
+
+        case WC_HASH_TYPE_SHA3_224:
+        case WC_HASH_TYPE_SHA3_256:
+        case WC_HASH_TYPE_SHA3_384:
+        case WC_HASH_TYPE_SHA3_512:
+#ifdef WOLFSSL_SHA3
+            ret = wc_Sha3_GetFlags(&hash->sha3, flags);
+#endif
+            break;
+
+        /* not supported */
+        case WC_HASH_TYPE_MD5_SHA:
+        case WC_HASH_TYPE_MD2:
+        case WC_HASH_TYPE_MD4:
+        case WC_HASH_TYPE_BLAKE2B:
+        case WC_HASH_TYPE_BLAKE2S:
+        case WC_HASH_TYPE_NONE:
+        default:
+            ret = BAD_FUNC_ARG;
+    };
+
+    return ret;
+}
+#endif
+
 
 #if !defined(WOLFSSL_TI_HASH)
 
@@ -658,12 +996,17 @@
             return MEMORY_E;
     #endif
 
-        ret = wc_InitMd5(md5);
-        if (ret == 0) {
-            ret = wc_Md5Update(md5, data, len);
-            if (ret == 0) {
-                ret = wc_Md5Final(md5, hash);
+        if ((ret = wc_InitMd5(md5)) != 0) {
+            WOLFSSL_MSG("InitMd5 failed");
+        }
+        else {
+            if ((ret = wc_Md5Update(md5, data, len)) != 0) {
+                WOLFSSL_MSG("Md5Update failed");
             }
+            else if ((ret = wc_Md5Final(md5, hash)) != 0) {
+                WOLFSSL_MSG("Md5Final failed");
+            }
+            wc_Md5Free(md5);
         }
 
     #ifdef WOLFSSL_SMALL_STACK
@@ -691,11 +1034,16 @@
     #endif
 
         if ((ret = wc_InitSha(sha)) != 0) {
-            WOLFSSL_MSG("wc_InitSha failed");
+            WOLFSSL_MSG("InitSha failed");
         }
         else {
-            wc_ShaUpdate(sha, data, len);
-            wc_ShaFinal(sha, hash);
+            if ((ret = wc_ShaUpdate(sha, data, len)) != 0) {
+                WOLFSSL_MSG("ShaUpdate failed");
+            }
+            else if ((ret = wc_ShaFinal(sha, hash)) != 0) {
+                WOLFSSL_MSG("ShaFinal failed");
+            }
+            wc_ShaFree(sha);
         }
 
     #ifdef WOLFSSL_SMALL_STACK
@@ -861,4 +1209,470 @@
         return ret;
     }
 #endif /* WOLFSSL_SHA384 */
+
+#if defined(WOLFSSL_SHA3)
+#if !defined(WOLFSSL_NOSHA3_224)
+    int wc_Sha3_224Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        wc_Sha3* sha3;
+    #else
+        wc_Sha3 sha3[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha3 == NULL)
+            return MEMORY_E;
+    #endif
+
+        if ((ret = wc_InitSha3_224(sha3, NULL, INVALID_DEVID)) != 0) {
+            WOLFSSL_MSG("InitSha3_224 failed");
+        }
+        else {
+            if ((ret = wc_Sha3_224_Update(sha3, data, len)) != 0) {
+                WOLFSSL_MSG("Sha3_224_Update failed");
+            }
+            else if ((ret = wc_Sha3_224_Final(sha3, hash)) != 0) {
+                WOLFSSL_MSG("Sha3_224_Final failed");
+            }
+            wc_Sha3_224_Free(sha3);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* !WOLFSSL_NOSHA3_224 */
+
+#if !defined(WOLFSSL_NOSHA3_256)
+    int wc_Sha3_256Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        wc_Sha3* sha3;
+    #else
+        wc_Sha3 sha3[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha3 == NULL)
+            return MEMORY_E;
+    #endif
+
+        if ((ret = wc_InitSha3_256(sha3, NULL, INVALID_DEVID)) != 0) {
+            WOLFSSL_MSG("InitSha3_256 failed");
+        }
+        else {
+            if ((ret = wc_Sha3_256_Update(sha3, data, len)) != 0) {
+                WOLFSSL_MSG("Sha3_256_Update failed");
+            }
+            else if ((ret = wc_Sha3_256_Final(sha3, hash)) != 0) {
+                WOLFSSL_MSG("Sha3_256_Final failed");
+            }
+            wc_Sha3_256_Free(sha3);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* !WOLFSSL_NOSHA3_256 */
+
+#if !defined(WOLFSSL_NOSHA3_384)
+    int wc_Sha3_384Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        wc_Sha3* sha3;
+    #else
+        wc_Sha3 sha3[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha3 == NULL)
+            return MEMORY_E;
+    #endif
+
+        if ((ret = wc_InitSha3_384(sha3, NULL, INVALID_DEVID)) != 0) {
+            WOLFSSL_MSG("InitSha3_384 failed");
+        }
+        else {
+            if ((ret = wc_Sha3_384_Update(sha3, data, len)) != 0) {
+                WOLFSSL_MSG("Sha3_384_Update failed");
+            }
+            else if ((ret = wc_Sha3_384_Final(sha3, hash)) != 0) {
+                WOLFSSL_MSG("Sha3_384_Final failed");
+            }
+            wc_Sha3_384_Free(sha3);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* !WOLFSSL_NOSHA3_384 */
+
+#if !defined(WOLFSSL_NOSHA3_512)
+    int wc_Sha3_512Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        wc_Sha3* sha3;
+    #else
+        wc_Sha3 sha3[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha3 == NULL)
+            return MEMORY_E;
+    #endif
+
+        if ((ret = wc_InitSha3_512(sha3, NULL, INVALID_DEVID)) != 0) {
+            WOLFSSL_MSG("InitSha3_512 failed");
+        }
+        else {
+            if ((ret = wc_Sha3_512_Update(sha3, data, len)) != 0) {
+                WOLFSSL_MSG("Sha3_512_Update failed");
+            }
+            else if ((ret = wc_Sha3_512_Final(sha3, hash)) != 0) {
+                WOLFSSL_MSG("Sha3_512_Final failed");
+            }
+            wc_Sha3_512_Free(sha3);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* !WOLFSSL_NOSHA3_512 */
+
+#if defined(WOLFSSL_SHAKE256) && !defined(WOLFSSL_NO_SHAKE256)
+    int wc_Shake256Hash(const byte* data, word32 len, byte* hash,
+                        word32 hashLen)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        wc_Shake* shake;
+    #else
+        wc_Shake shake[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        shake = (wc_Shake*)XMALLOC(sizeof(wc_Shake), NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (shake == NULL)
+            return MEMORY_E;
+    #endif
+
+        if ((ret = wc_InitShake256(shake, NULL, INVALID_DEVID)) != 0) {
+            WOLFSSL_MSG("InitShake256 failed");
+        }
+        else {
+            if ((ret = wc_Shake256_Update(shake, data, len)) != 0) {
+                WOLFSSL_MSG("Shake256_Update failed");
+            }
+            else if ((ret = wc_Shake256_Final(shake, hash, hashLen)) != 0) {
+                WOLFSSL_MSG("Shake256_Final failed");
+            }
+            wc_Shake256_Free(shake);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(shake, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* WOLFSSL_SHAKE_256 && !WOLFSSL_NO_SHAKE256 */
+#endif /* WOLFSSL_SHA3 */
+
+#endif /* !NO_HASH_WRAPPER */
+
+#ifdef WOLFSSL_HAVE_PRF
+
+#ifdef WOLFSSL_SHA384
+    #define P_HASH_MAX_SIZE WC_SHA384_DIGEST_SIZE
+#else
+    #define P_HASH_MAX_SIZE WC_SHA256_DIGEST_SIZE
+#endif
+
+/* Pseudo Random Function for MD5, SHA-1, SHA-256, or SHA-384 */
+int wc_PRF(byte* result, word32 resLen, const byte* secret,
+                  word32 secLen, const byte* seed, word32 seedLen, int hash,
+                  void* heap, int devId)
+{
+    word32 len = P_HASH_MAX_SIZE;
+    word32 times;
+    word32 lastLen;
+    word32 lastTime;
+    word32 i;
+    word32 idx = 0;
+    int    ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+    byte*  previous;
+    byte*  current;
+    Hmac*  hmac;
+#else
+    byte   previous[P_HASH_MAX_SIZE];  /* max size */
+    byte   current[P_HASH_MAX_SIZE];   /* max size */
+    Hmac   hmac[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+    previous = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST);
+    current  = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST);
+    hmac     = (Hmac*)XMALLOC(sizeof(Hmac),    heap, DYNAMIC_TYPE_HMAC);
+
+    if (previous == NULL || current == NULL || hmac == NULL) {
+        if (previous) XFREE(previous, heap, DYNAMIC_TYPE_DIGEST);
+        if (current)  XFREE(current,  heap, DYNAMIC_TYPE_DIGEST);
+        if (hmac)     XFREE(hmac,     heap, DYNAMIC_TYPE_HMAC);
+
+        return MEMORY_E;
+    }
+#endif
+
+    switch (hash) {
+    #ifndef NO_MD5
+        case md5_mac:
+            hash = WC_MD5;
+            len  = WC_MD5_DIGEST_SIZE;
+        break;
+    #endif
+
+    #ifndef NO_SHA256
+        case sha256_mac:
+            hash = WC_SHA256;
+            len  = WC_SHA256_DIGEST_SIZE;
+        break;
+    #endif
+
+    #ifdef WOLFSSL_SHA384
+        case sha384_mac:
+            hash = WC_SHA384;
+            len  = WC_SHA384_DIGEST_SIZE;
+        break;
+    #endif
+
+    #ifndef NO_SHA
+        case sha_mac:
+        default:
+            hash = WC_SHA;
+            len  = WC_SHA_DIGEST_SIZE;
+        break;
+    #endif
+    }
+
+    times   = resLen / len;
+    lastLen = resLen % len;
+
+    if (lastLen)
+        times += 1;
+
+    lastTime = times - 1;
+
+    ret = wc_HmacInit(hmac, heap, devId);
+    if (ret == 0) {
+        ret = wc_HmacSetKey(hmac, hash, secret, secLen);
+        if (ret == 0)
+            ret = wc_HmacUpdate(hmac, seed, seedLen); /* A0 = seed */
+        if (ret == 0)
+            ret = wc_HmacFinal(hmac, previous);       /* A1 */
+        if (ret == 0) {
+            for (i = 0; i < times; i++) {
+                ret = wc_HmacUpdate(hmac, previous, len);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(hmac, seed, seedLen);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacFinal(hmac, current);
+                if (ret != 0)
+                    break;
+
+                if ((i == lastTime) && lastLen)
+                    XMEMCPY(&result[idx], current,
+                                             min(lastLen, P_HASH_MAX_SIZE));
+                else {
+                    XMEMCPY(&result[idx], current, len);
+                    idx += len;
+                    ret = wc_HmacUpdate(hmac, previous, len);
+                    if (ret != 0)
+                        break;
+                    ret = wc_HmacFinal(hmac, previous);
+                    if (ret != 0)
+                        break;
+                }
+            }
+        }
+        wc_HmacFree(hmac);
+    }
+
+    ForceZero(previous,  P_HASH_MAX_SIZE);
+    ForceZero(current,   P_HASH_MAX_SIZE);
+    ForceZero(hmac,      sizeof(Hmac));
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(previous, heap, DYNAMIC_TYPE_DIGEST);
+    XFREE(current,  heap, DYNAMIC_TYPE_DIGEST);
+    XFREE(hmac,     heap, DYNAMIC_TYPE_HMAC);
+#endif
+
+    return ret;
+}
+#undef P_HASH_MAX_SIZE
+
+/* compute PRF (pseudo random function) using SHA1 and MD5 for TLSv1 */
+int wc_PRF_TLSv1(byte* digest, word32 digLen, const byte* secret,
+           word32 secLen, const byte* label, word32 labLen,
+           const byte* seed, word32 seedLen, void* heap, int devId)
+{
+    int    ret  = 0;
+    word32 half = (secLen + 1) / 2;
+
+#ifdef WOLFSSL_SMALL_STACK
+    byte* md5_half;
+    byte* sha_half;
+    byte* md5_result;
+    byte* sha_result;
+#else
+    byte  md5_half[MAX_PRF_HALF];     /* half is real size */
+    byte  sha_half[MAX_PRF_HALF];     /* half is real size */
+    byte  md5_result[MAX_PRF_DIG];    /* digLen is real size */
+    byte  sha_result[MAX_PRF_DIG];    /* digLen is real size */
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+    DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap);
+    if (labelSeed == NULL)
+        return MEMORY_E;
+#else
+    byte labelSeed[MAX_PRF_LABSEED];
+#endif
+
+    if (half > MAX_PRF_HALF ||
+        labLen + seedLen > MAX_PRF_LABSEED ||
+        digLen > MAX_PRF_DIG)
+    {
+    #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+        FREE_VAR(labelSeed, heap);
+    #endif
+        return BUFFER_E;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    md5_half   = (byte*)XMALLOC(MAX_PRF_HALF,    heap, DYNAMIC_TYPE_DIGEST);
+    sha_half   = (byte*)XMALLOC(MAX_PRF_HALF,    heap, DYNAMIC_TYPE_DIGEST);
+    md5_result = (byte*)XMALLOC(MAX_PRF_DIG,     heap, DYNAMIC_TYPE_DIGEST);
+    sha_result = (byte*)XMALLOC(MAX_PRF_DIG,     heap, DYNAMIC_TYPE_DIGEST);
+
+    if (md5_half == NULL || sha_half == NULL || md5_result == NULL ||
+                                                           sha_result == NULL) {
+        if (md5_half)   XFREE(md5_half,   heap, DYNAMIC_TYPE_DIGEST);
+        if (sha_half)   XFREE(sha_half,   heap, DYNAMIC_TYPE_DIGEST);
+        if (md5_result) XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST);
+        if (sha_result) XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST);
+    #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+        FREE_VAR(labelSeed, heap);
+    #endif
+
+        return MEMORY_E;
+    }
+#endif
+
+    XMEMSET(md5_result, 0, digLen);
+    XMEMSET(sha_result, 0, digLen);
+
+    XMEMCPY(md5_half, secret, half);
+    XMEMCPY(sha_half, secret + half - secLen % 2, half);
+
+    XMEMCPY(labelSeed, label, labLen);
+    XMEMCPY(labelSeed + labLen, seed, seedLen);
+
+    if ((ret = wc_PRF(md5_result, digLen, md5_half, half, labelSeed,
+                                labLen + seedLen, md5_mac, heap, devId)) == 0) {
+        if ((ret = wc_PRF(sha_result, digLen, sha_half, half, labelSeed,
+                                labLen + seedLen, sha_mac, heap, devId)) == 0) {
+            /* calculate XOR for TLSv1 PRF */
+            XMEMCPY(digest, md5_result, digLen);
+            xorbuf(digest, sha_result, digLen);
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(md5_half,   heap, DYNAMIC_TYPE_DIGEST);
+    XFREE(sha_half,   heap, DYNAMIC_TYPE_DIGEST);
+    XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST);
+    XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST);
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+    FREE_VAR(labelSeed, heap);
+#endif
+
+    return ret;
+}
+
+/* Wrapper for TLS 1.2 and TLSv1 cases to calculate PRF */
+/* In TLS 1.2 case call straight thru to wc_PRF */
+int wc_PRF_TLS(byte* digest, word32 digLen, const byte* secret, word32 secLen,
+            const byte* label, word32 labLen, const byte* seed, word32 seedLen,
+            int useAtLeastSha256, int hash_type, void* heap, int devId)
+{
+    int ret = 0;
+
+    if (useAtLeastSha256) {
+    #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+        DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap);
+        if (labelSeed == NULL)
+            return MEMORY_E;
+    #else
+        byte labelSeed[MAX_PRF_LABSEED];
+    #endif
+
+        if (labLen + seedLen > MAX_PRF_LABSEED)
+            return BUFFER_E;
+
+        XMEMCPY(labelSeed, label, labLen);
+        XMEMCPY(labelSeed + labLen, seed, seedLen);
+
+        /* If a cipher suite wants an algorithm better than sha256, it
+         * should use better. */
+        if (hash_type < sha256_mac || hash_type == blake2b_mac)
+            hash_type = sha256_mac;
+        /* compute PRF for MD5, SHA-1, SHA-256, or SHA-384 for TLSv1.2 PRF */
+        ret = wc_PRF(digest, digLen, secret, secLen, labelSeed,
+                     labLen + seedLen, hash_type, heap, devId);
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+        FREE_VAR(labelSeed, heap);
+    #endif
+    }
+#ifndef NO_OLD_TLS
+    else {
+        /* compute TLSv1 PRF (pseudo random function using HMAC) */
+        ret = wc_PRF_TLSv1(digest, digLen, secret, secLen, label, labLen, seed,
+                          seedLen, heap, devId);
+    }
+#endif
+
+    return ret;
+}
+#endif /* WOLFSSL_HAVE_PRF */
 
--- a/wolfcrypt/src/hc128.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/hc128.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hc128.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/hmac.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/hmac.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hmac.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -43,6 +43,10 @@
 
 #include <wolfssl/wolfcrypt/hmac.h>
 
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
 #ifdef NO_INLINE
     #include <wolfssl/wolfcrypt/misc.h>
 #else
@@ -61,8 +65,7 @@
     {
         if (hmac == NULL || (key == NULL && keySz != 0) ||
            !(type == WC_MD5 || type == WC_SHA || type == WC_SHA256 ||
-                type == WC_SHA384 || type == WC_SHA512 ||
-                type == BLAKE2B_ID)) {
+                type == WC_SHA384 || type == WC_SHA512)) {
             return BAD_FUNC_ARG;
         }
 
@@ -127,8 +130,7 @@
             type == WC_SHA224 || type == WC_SHA256 ||
             type == WC_SHA384 || type == WC_SHA512 ||
             type == WC_SHA3_224 || type == WC_SHA3_256 ||
-            type == WC_SHA3_384 || type == WC_SHA3_512 ||
-            type == BLAKE2B_ID)) {
+            type == WC_SHA3_384 || type == WC_SHA3_512)) {
         return BAD_FUNC_ARG;
     }
 
@@ -168,12 +170,6 @@
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            ret = BLAKE2B_OUTBYTES;
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
         case WC_SHA3_224:
             ret = WC_SHA3_224_DIGEST_SIZE;
@@ -241,26 +237,28 @@
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            ret = wc_InitBlake2b(&hmac->hash.blake2b, BLAKE2B_256);
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
         case WC_SHA3_224:
             ret = wc_InitSha3_224(&hmac->hash.sha3, heap, INVALID_DEVID);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
         case WC_SHA3_256:
             ret = wc_InitSha3_256(&hmac->hash.sha3, heap, INVALID_DEVID);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
         case WC_SHA3_384:
             ret = wc_InitSha3_384(&hmac->hash.sha3, heap, INVALID_DEVID);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
         case WC_SHA3_512:
             ret = wc_InitSha3_512(&hmac->hash.sha3, heap, INVALID_DEVID);
             break;
     #endif
+    #endif
 
         default:
             ret = BAD_FUNC_ARG;
@@ -291,11 +289,21 @@
             type == WC_SHA224 || type == WC_SHA256 ||
             type == WC_SHA384 || type == WC_SHA512 ||
             type == WC_SHA3_224 || type == WC_SHA3_256 ||
-            type == WC_SHA3_384 || type == WC_SHA3_512 ||
-            type == BLAKE2B_ID)) {
+            type == WC_SHA3_384 || type == WC_SHA3_512)) {
         return BAD_FUNC_ARG;
     }
 
+#ifndef HAVE_FIPS
+    /* if set key has already been run then make sure and free existing */
+    /* This is for async and PIC32MZ situations, and just normally OK,
+       provided the user calls wc_HmacInit() first. That function is not
+       available in FIPS builds. In current FIPS builds, the hashes are
+       not allocating resources. */
+    if (hmac->macType != WC_HASH_TYPE_NONE) {
+        wc_HmacFree(hmac);
+    }
+#endif
+
     hmac->innerHashKeyed = 0;
     hmac->macType = (byte)type;
 
@@ -308,6 +316,11 @@
         return HMAC_MIN_KEYLEN_E;
 #endif
 
+#ifdef WOLF_CRYPTO_CB
+    hmac->keyRaw = key; /* use buffer directly */
+    hmac->keyLen = length;
+#endif
+
     ip = (byte*)hmac->ipad;
     op = (byte*)hmac->opad;
 
@@ -355,7 +368,6 @@
 
     #ifdef WOLFSSL_SHA224
         case WC_SHA224:
-        {
             hmac_block_size = WC_SHA224_BLOCK_SIZE;
             if (length <= WC_SHA224_BLOCK_SIZE) {
                 if (key != NULL) {
@@ -372,13 +384,11 @@
 
                 length = WC_SHA224_DIGEST_SIZE;
             }
-        }
-        break;
+            break;
     #endif /* WOLFSSL_SHA224 */
-
     #ifndef NO_SHA256
         case WC_SHA256:
-    		hmac_block_size = WC_SHA256_BLOCK_SIZE;
+            hmac_block_size = WC_SHA256_BLOCK_SIZE;
             if (length <= WC_SHA256_BLOCK_SIZE) {
                 if (key != NULL) {
                     XMEMCPY(ip, key, length);
@@ -438,28 +448,8 @@
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            hmac_block_size = BLAKE2B_BLOCKBYTES;
-            if (length <= BLAKE2B_BLOCKBYTES) {
-                if (key != NULL) {
-                    XMEMCPY(ip, key, length);
-                }
-            }
-            else {
-                ret = wc_Blake2bUpdate(&hmac->hash.blake2b, key, length);
-                if (ret != 0)
-                    break;
-                ret = wc_Blake2bFinal(&hmac->hash.blake2b, ip, BLAKE2B_256);
-                if (ret != 0)
-                    break;
-
-                length = BLAKE2B_256;
-            }
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
         case WC_SHA3_224:
             hmac_block_size = WC_SHA3_224_BLOCK_SIZE;
             if (length <= WC_SHA3_224_BLOCK_SIZE) {
@@ -478,6 +468,8 @@
                 length = WC_SHA3_224_DIGEST_SIZE;
             }
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
         case WC_SHA3_256:
             hmac_block_size = WC_SHA3_256_BLOCK_SIZE;
             if (length <= WC_SHA3_256_BLOCK_SIZE) {
@@ -496,6 +488,8 @@
                 length = WC_SHA3_256_DIGEST_SIZE;
             }
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
         case WC_SHA3_384:
             hmac_block_size = WC_SHA3_384_BLOCK_SIZE;
             if (length <= WC_SHA3_384_BLOCK_SIZE) {
@@ -514,6 +508,8 @@
                 length = WC_SHA3_384_DIGEST_SIZE;
             }
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
         case WC_SHA3_512:
             hmac_block_size = WC_SHA3_512_BLOCK_SIZE;
             if (length <= WC_SHA3_512_BLOCK_SIZE) {
@@ -532,6 +528,7 @@
                 length = WC_SHA3_512_DIGEST_SIZE;
             }
             break;
+    #endif
     #endif /* WOLFSSL_SHA3 */
 
         default:
@@ -579,7 +576,7 @@
     #ifndef NO_MD5
         case WC_MD5:
             ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
-                                                                WC_MD5_BLOCK_SIZE);
+                                                             WC_MD5_BLOCK_SIZE);
             break;
     #endif /* !NO_MD5 */
 
@@ -593,54 +590,54 @@
     #ifdef WOLFSSL_SHA224
         case WC_SHA224:
             ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
-                                                             WC_SHA224_BLOCK_SIZE);
+                                                          WC_SHA224_BLOCK_SIZE);
             break;
     #endif /* WOLFSSL_SHA224 */
-
     #ifndef NO_SHA256
         case WC_SHA256:
             ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
-                                                             WC_SHA256_BLOCK_SIZE);
+                                                          WC_SHA256_BLOCK_SIZE);
             break;
     #endif /* !NO_SHA256 */
 
     #ifdef WOLFSSL_SHA384
         case WC_SHA384:
             ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
-                                                             WC_SHA384_BLOCK_SIZE);
+                                                          WC_SHA384_BLOCK_SIZE);
             break;
     #endif /* WOLFSSL_SHA384 */
     #ifdef WOLFSSL_SHA512
         case WC_SHA512:
             ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
-                                                             WC_SHA512_BLOCK_SIZE);
+                                                          WC_SHA512_BLOCK_SIZE);
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->ipad,
-                                                            BLAKE2B_BLOCKBYTES);
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
         case WC_SHA3_224:
             ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
-                                                       WC_SHA3_224_BLOCK_SIZE);
+                                                        WC_SHA3_224_BLOCK_SIZE);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
         case WC_SHA3_256:
             ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
-                                                       WC_SHA3_256_BLOCK_SIZE);
+                                                        WC_SHA3_256_BLOCK_SIZE);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
         case WC_SHA3_384:
             ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
-                                                       WC_SHA3_384_BLOCK_SIZE);
+                                                        WC_SHA3_384_BLOCK_SIZE);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
         case WC_SHA3_512:
             ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
-                                                       WC_SHA3_512_BLOCK_SIZE);
+                                                        WC_SHA3_512_BLOCK_SIZE);
             break;
+    #endif
     #endif /* WOLFSSL_SHA3 */
 
         default:
@@ -648,7 +645,7 @@
     }
 
     if (ret == 0)
-        hmac->innerHashKeyed = 1;
+        hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
 
     return ret;
 }
@@ -662,6 +659,15 @@
         return BAD_FUNC_ARG;
     }
 
+#ifdef WOLF_CRYPTO_CB
+    if (hmac->devId != INVALID_DEVID) {
+        ret = wc_CryptoCb_Hmac(hmac, hmac->macType, msg, length, NULL);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
+        ret = 0; /* reset error code */
+    }
+#endif
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
     #if defined(HAVE_CAVIUM)
@@ -717,25 +723,27 @@
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, msg, length);
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
         case WC_SHA3_224:
             ret = wc_Sha3_224_Update(&hmac->hash.sha3, msg, length);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
         case WC_SHA3_256:
             ret = wc_Sha3_256_Update(&hmac->hash.sha3, msg, length);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
         case WC_SHA3_384:
             ret = wc_Sha3_384_Update(&hmac->hash.sha3, msg, length);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
         case WC_SHA3_512:
             ret = wc_Sha3_512_Update(&hmac->hash.sha3, msg, length);
             break;
+    #endif
     #endif /* WOLFSSL_SHA3 */
 
         default:
@@ -754,6 +762,14 @@
         return BAD_FUNC_ARG;
     }
 
+#ifdef WOLF_CRYPTO_CB
+    if (hmac->devId != INVALID_DEVID) {
+        ret = wc_CryptoCb_Hmac(hmac, hmac->macType, NULL, 0, hash);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
+    }
+#endif
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
         int hashLen = wc_HmacSizeByType(hmac->macType);
@@ -784,11 +800,11 @@
             if (ret != 0)
                 break;
             ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
-                                                                WC_MD5_BLOCK_SIZE);
+                                                             WC_MD5_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
-                                                               WC_MD5_DIGEST_SIZE);
+                                                            WC_MD5_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Md5Final(&hmac->hash.md5, hash);
@@ -814,36 +830,33 @@
 
     #ifdef WOLFSSL_SHA224
         case WC_SHA224:
-        {
             ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
             if (ret != 0)
                 break;
             ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
-                                                             WC_SHA224_BLOCK_SIZE);
+                                                          WC_SHA224_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
-                                                            WC_SHA224_DIGEST_SIZE);
+                                                         WC_SHA224_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha224Final(&hmac->hash.sha224, hash);
             if (ret != 0)
                 break;
-        }
-        break;
+            break;
     #endif /* WOLFSSL_SHA224 */
-
     #ifndef NO_SHA256
         case WC_SHA256:
             ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
             if (ret != 0)
                 break;
             ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
-                                                             WC_SHA256_BLOCK_SIZE);
+                                                          WC_SHA256_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
-                                                            WC_SHA256_DIGEST_SIZE);
+                                                         WC_SHA256_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha256Final(&hmac->hash.sha256, hash);
@@ -856,11 +869,11 @@
             if (ret != 0)
                 break;
             ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
-                                                             WC_SHA384_BLOCK_SIZE);
+                                                          WC_SHA384_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
-                                                            WC_SHA384_DIGEST_SIZE);
+                                                         WC_SHA384_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha384Final(&hmac->hash.sha384, hash);
@@ -872,92 +885,82 @@
             if (ret != 0)
                 break;
             ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
-                                                             WC_SHA512_BLOCK_SIZE);
+                                                          WC_SHA512_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
-                                                            WC_SHA512_DIGEST_SIZE);
+                                                         WC_SHA512_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha512Final(&hmac->hash.sha512, hash);
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            ret = wc_Blake2bFinal(&hmac->hash.blake2b, (byte*)hmac->innerHash,
-                                                                   BLAKE2B_256);
-            if (ret != 0)
-                break;
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->opad,
-                                                            BLAKE2B_BLOCKBYTES);
-            if (ret != 0)
-                break;
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->innerHash,
-                                                                   BLAKE2B_256);
-            if (ret != 0)
-                break;
-            ret = wc_Blake2bFinal(&hmac->hash.blake2b, hash, BLAKE2B_256);
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
         case WC_SHA3_224:
             ret = wc_Sha3_224_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
             if (ret != 0)
                 break;
             ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->opad,
-                                                       WC_SHA3_224_BLOCK_SIZE);
+                                                        WC_SHA3_224_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
-                                                          WC_SHA3_224_DIGEST_SIZE);
+                                                       WC_SHA3_224_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_224_Final(&hmac->hash.sha3, hash);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
         case WC_SHA3_256:
             ret = wc_Sha3_256_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
             if (ret != 0)
                 break;
             ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->opad,
-                                                       WC_SHA3_256_BLOCK_SIZE);
+                                                        WC_SHA3_256_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
-                                                          WC_SHA3_256_DIGEST_SIZE);
+                                                       WC_SHA3_256_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_256_Final(&hmac->hash.sha3, hash);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
         case WC_SHA3_384:
             ret = wc_Sha3_384_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
             if (ret != 0)
                 break;
             ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->opad,
-                                                       WC_SHA3_384_BLOCK_SIZE);
+                                                        WC_SHA3_384_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
-                                                          WC_SHA3_384_DIGEST_SIZE);
+                                                       WC_SHA3_384_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_384_Final(&hmac->hash.sha3, hash);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
         case WC_SHA3_512:
             ret = wc_Sha3_512_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
             if (ret != 0)
                 break;
             ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->opad,
-                                                       WC_SHA3_512_BLOCK_SIZE);
+                                                        WC_SHA3_512_BLOCK_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
-                                                          WC_SHA3_512_DIGEST_SIZE);
+                                                       WC_SHA3_512_DIGEST_SIZE);
             if (ret != 0)
                 break;
             ret = wc_Sha3_512_Final(&hmac->hash.sha3, hash);
             break;
+    #endif
     #endif /* WOLFSSL_SHA3 */
 
         default:
@@ -982,11 +985,14 @@
         return BAD_FUNC_ARG;
 
     XMEMSET(hmac, 0, sizeof(Hmac));
+    hmac->macType = WC_HASH_TYPE_NONE;
     hmac->heap = heap;
+#ifdef WOLF_CRYPTO_CB
+    hmac->devId = devId;
+    hmac->devCtx = NULL;
+#endif
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
-    hmac->keyLen = 0;
-
     ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC,
                                                          hmac->heap, devId);
 #else
@@ -996,12 +1002,45 @@
     return ret;
 }
 
+#ifdef HAVE_PKCS11
+int  wc_HmacInit_Id(Hmac* hmac, unsigned char* id, int len, void* heap,
+                    int devId)
+{
+    int ret = 0;
+
+    if (hmac == NULL)
+        ret = BAD_FUNC_ARG;
+    if (ret == 0 && (len < 0 || len > HMAC_MAX_ID_LEN))
+        ret = BUFFER_E;
+
+    if (ret == 0)
+        ret  = wc_HmacInit(hmac, heap, devId);
+    if (ret == 0) {
+        XMEMCPY(hmac->id, id, len);
+        hmac->idLen = len;
+    }
+
+    return ret;
+}
+#endif
+
 /* Free Hmac from use with async device */
 void wc_HmacFree(Hmac* hmac)
 {
     if (hmac == NULL)
         return;
 
+#ifdef WOLF_CRYPTO_CB
+    /* handle cleanup case where final is not called */
+    if (hmac->devId != INVALID_DEVID && hmac->devCtx != NULL) {
+        int  ret;
+        byte finalHash[WC_HMAC_BLOCK_SIZE];
+        ret = wc_CryptoCb_Hmac(hmac, hmac->macType, NULL, 0, finalHash);
+        (void)ret; /* must ignore return code here */
+        (void)finalHash;
+    }
+#endif
+
     switch (hmac->macType) {
     #ifndef NO_MD5
         case WC_MD5:
@@ -1020,7 +1059,6 @@
             wc_Sha224Free(&hmac->hash.sha224);
             break;
     #endif /* WOLFSSL_SHA224 */
-
     #ifndef NO_SHA256
         case WC_SHA256:
             wc_Sha256Free(&hmac->hash.sha256);
@@ -1038,24 +1076,27 @@
             break;
     #endif /* WOLFSSL_SHA512 */
 
-    #ifdef HAVE_BLAKE2
-        case BLAKE2B_ID:
-            break;
-    #endif /* HAVE_BLAKE2 */
-
     #ifdef WOLFSSL_SHA3
+    #ifndef WOLFSSL_NOSHA3_224
         case WC_SHA3_224:
             wc_Sha3_224_Free(&hmac->hash.sha3);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
         case WC_SHA3_256:
             wc_Sha3_256_Free(&hmac->hash.sha3);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_384
         case WC_SHA3_384:
             wc_Sha3_384_Free(&hmac->hash.sha3);
             break;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_512
         case WC_SHA3_512:
             wc_Sha3_512_Free(&hmac->hash.sha3);
             break;
+    #endif
     #endif /* WOLFSSL_SHA3 */
 
         default:
@@ -1065,6 +1106,42 @@
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     wolfAsync_DevCtxFree(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+
+    switch (hmac->macType) {
+    #ifndef NO_MD5
+        case WC_MD5:
+            wc_Md5Free(&hmac->hash.md5);
+            break;
+    #endif /* !NO_MD5 */
+
+    #ifndef NO_SHA
+        case WC_SHA:
+            wc_ShaFree(&hmac->hash.sha);
+            break;
+    #endif /* !NO_SHA */
+
+    #ifdef WOLFSSL_SHA224
+        case WC_SHA224:
+            wc_Sha224Free(&hmac->hash.sha224);
+            break;
+    #endif /* WOLFSSL_SHA224 */
+    #ifndef NO_SHA256
+        case WC_SHA256:
+            wc_Sha256Free(&hmac->hash.sha256);
+            break;
+    #endif /* !NO_SHA256 */
+
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
+        case WC_SHA384:
+            wc_Sha384Free(&hmac->hash.sha384);
+            break;
+    #endif /* WOLFSSL_SHA384 */
+        case WC_SHA512:
+            wc_Sha512Free(&hmac->hash.sha512);
+            break;
+    #endif /* WOLFSSL_SHA512 */
+    }
 }
 
 int wolfSSL_GetHmacMaxSize(void)
--- a/wolfcrypt/src/idea.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/idea.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* idea.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/integer.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/integer.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* integer.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -56,22 +56,29 @@
     #include <stdio.h>
 #endif
 
-#ifndef NO_WOLFSSL_SMALL_STACK
-    #ifndef WOLFSSL_SMALL_STACK
-        #define WOLFSSL_SMALL_STACK
+#ifdef SHOW_GEN
+    #ifndef NO_STDIO_FILESYSTEM
+        #include <stdio.h>
     #endif
 #endif
 
-#ifdef SHOW_GEN
-    #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        #if MQX_USE_IO_OLD
-            #include <fio.h>
-        #else
-            #include <nio.h>
-        #endif
-    #else
-        #include <stdio.h>
-    #endif
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef __cplusplus
+    extern "C" {
+#endif
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
 #endif
 
 /* reverse an array, used for radix code */
@@ -274,7 +281,7 @@
 #ifndef MP_8BIT
         bit = (t.dp[0] & 0x80) != 0;
 #else
-        bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
+        bit = ((t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80) != 0;
 #endif
         if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
             break;
@@ -321,6 +328,17 @@
   return res;
 }
 
+int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
+{
+    int i, len;
+
+    len = mp_unsigned_bin_size(a);
+
+    /* pad front w/ zeros to match length */
+    for (i = 0; i < c - len; i++)
+        b[i] = 0x00;
+    return mp_to_unsigned_bin(a, b + i);
+}
 
 /* creates "a" then copies b into it */
 int mp_init_copy (mp_int * a, mp_int * b)
@@ -544,6 +562,8 @@
     mp_digit r, rr;
     mp_digit D = x;
 
+    if (mp_iszero(c)) return;
+
     /* mask */
     mask = (((mp_digit)1) << D) - 1;
 
@@ -817,9 +837,21 @@
   int dr;
 
   /* modulus P must be positive */
-  if (P->sign == MP_NEG) {
+  if (mp_iszero(P) || P->sign == MP_NEG) {
      return MP_VAL;
   }
+  if (mp_isone(P)) {
+     mp_set(Y, 0);
+     return MP_OKAY;
+  }
+  if (mp_iszero(X)) {
+     mp_set(Y, 1);
+     return MP_OKAY;
+  }
+  if (mp_iszero(G)) {
+     mp_set(Y, 0);
+     return MP_OKAY;
+  }
 
   /* if exponent X is negative we have to recurse */
   if (X->sign == MP_NEG) {
@@ -858,6 +890,12 @@
 #endif
   }
 
+#ifdef BN_MP_EXPTMOD_BASE_2
+  if (G->used == 1 && G->dp[0] == 2) {
+    return mp_exptmod_base_2(X, P, Y);
+  }
+#endif
+
 /* modified diminished radix reduction */
 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
   defined(BN_S_MP_EXPTMOD_C)
@@ -874,6 +912,8 @@
   dr = 0;
 #endif
 
+  (void)dr;
+
 #ifdef BN_MP_REDUCE_IS_2K_C
   /* if not, is it a unrestricted DR modulus? */
   if (dr == 0) {
@@ -899,6 +939,11 @@
 #endif
 }
 
+int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
+{
+    (void)digits;
+    return mp_exptmod(G, X, P, Y);
+}
 
 /* b = |a|
  *
@@ -929,8 +974,8 @@
 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
 #endif
 {
-  /* b cannot be negative */
-  if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
+  /* b cannot be negative or zero, and can not divide by 0 (1/a mod b) */
+  if (b->sign == MP_NEG || mp_iszero(b) == MP_YES || mp_iszero(a) == MP_YES) {
     return MP_VAL;
   }
 
@@ -1108,23 +1153,28 @@
   /* init temps */
   if ((res = mp_init_multi(&x, &y, &u, &v,
                            &A, &B)) != MP_OKAY) {
-     return res;
+    return res;
   }
 
   /* init rest of tmps temps */
   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
-     mp_clear(&x);
-     mp_clear(&y);
-     mp_clear(&u);
-     mp_clear(&v);
-     mp_clear(&A);
-     mp_clear(&B);
-     return res;
+    mp_clear(&x);
+    mp_clear(&y);
+    mp_clear(&u);
+    mp_clear(&v);
+    mp_clear(&A);
+    mp_clear(&B);
+    return res;
   }
 
   /* x = a, y = b */
   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
-      goto LBL_ERR;
+    goto LBL_ERR;
+  }
+  if (mp_isone(&x)) {
+    mp_set(c, 1);
+    res = MP_OKAY;
+    goto LBL_ERR;
   }
   if ((res = mp_copy (b, &y)) != MP_OKAY) {
     goto LBL_ERR;
@@ -1161,10 +1211,10 @@
     if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
       /* A = (A+y)/2, B = (B-x)/2 */
       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
     }
     /* A = A/2, B = B/2 */
@@ -1186,10 +1236,10 @@
     if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
       /* C = (C+y)/2, D = (D-x)/2 */
       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
     }
     /* C = C/2, D = D/2 */
@@ -1369,7 +1419,7 @@
   return res;
 }
 
-/* chek if a bit is set */
+/* check if a bit is set */
 int mp_is_bit_set (mp_int *a, mp_digit b)
 {
     if ((mp_digit)a->used < b/DIGIT_BIT)
@@ -1857,7 +1907,7 @@
   mp_digit buf, mp;
   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
 #ifdef WOLFSSL_SMALL_STACK
-  mp_int* M = NULL;
+  mp_int* M;
 #else
   mp_int M[TAB_SIZE];
 #endif
@@ -1865,11 +1915,11 @@
    * one of many reduction algorithms without modding the guts of
    * the code with if statements everywhere.
    */
-  int     (*redux)(mp_int*,mp_int*,mp_digit);
+  int     (*redux)(mp_int*,mp_int*,mp_digit) = NULL;
 
 #ifdef WOLFSSL_SMALL_STACK
   M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                                                       DYNAMIC_TYPE_BIGINT);
   if (M == NULL)
     return MP_MEM;
 #endif
@@ -1902,7 +1952,7 @@
   /* init first cell */
   if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
 #ifdef WOLFSSL_SMALL_STACK
-     XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+     XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
      return err;
@@ -1917,7 +1967,7 @@
       mp_clear(&M[1]);
 
 #ifdef WOLFSSL_SMALL_STACK
-      XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
       return err;
@@ -1939,7 +1989,7 @@
      /* automatically pick the comba one if available (saves quite a few
         calls/ifs) */
 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
-     if (((P->used * 2 + 1) < MP_WARRAY) &&
+     if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
         redux = fast_mp_montgomery_reduce;
      } else
@@ -1948,9 +1998,6 @@
 #ifdef BN_MP_MONTGOMERY_REDUCE_C
         /* use slower baseline Montgomery method */
         redux = mp_montgomery_reduce;
-#else
-        err = MP_VAL;
-        goto LBL_M;
 #endif
      }
   } else if (redmode == 1) {
@@ -1958,9 +2005,6 @@
      /* setup DR reduction for moduli of the form B**k - b */
      mp_dr_setup(P, &mp);
      redux = mp_dr_reduce;
-#else
-     err = MP_VAL;
-     goto LBL_M;
 #endif
   } else {
 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
@@ -1969,10 +2013,12 @@
         goto LBL_M;
      }
      redux = mp_reduce_2k;
-#else
+#endif
+  }
+
+  if (redux == NULL) {
      err = MP_VAL;
      goto LBL_M;
-#endif
   }
 
   /* setup result */
@@ -2159,12 +2205,176 @@
   }
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
   return err;
 }
 
+#ifdef BN_MP_EXPTMOD_BASE_2
+#if DIGIT_BIT < 16
+    #define WINSIZE    3
+#elif DIGIT_BIT < 32
+    #define WINSIZE    4
+#elif DIGIT_BIT < 64
+    #define WINSIZE    5
+#elif DIGIT_BIT < 128
+    #define WINSIZE    6
+#endif
+int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y)
+{
+  mp_digit buf, mp;
+  int      err = MP_OKAY, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+  mp_int  *res = NULL;
+#else
+  mp_int   res[1];
+#endif
+  int     (*redux)(mp_int*,mp_int*,mp_digit) = NULL;
+
+  /* automatically pick the comba one if available (saves quite a few
+     calls/ifs) */
+#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
+  if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
+       P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+     redux = fast_mp_montgomery_reduce;
+  } else
+#endif
+  {
+#ifdef BN_MP_MONTGOMERY_REDUCE_C
+     /* use slower baseline Montgomery method */
+     redux = mp_montgomery_reduce;
+#else
+     return MP_VAL;
+#endif
+  }
+
+#ifdef WOLFSSL_SMALL_STACK
+  res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (res == NULL) {
+     return MP_MEM;
+  }
+#endif
+
+  /* now setup montgomery  */
+  if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
+     goto LBL_M;
+  }
+
+  /* setup result */
+  if ((err = mp_init(res)) != MP_OKAY) {
+     goto LBL_M;
+  }
+
+  /* now we need R mod m */
+  if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) {
+     goto LBL_RES;
+  }
+
+  /* Get the top bits left over after taking WINSIZE bits starting at the
+   * least-significant.
+   */
+  digidx = X->used - 1;
+  bitcpy = (X->used * DIGIT_BIT) % WINSIZE;
+  if (bitcpy > 0) {
+     bitcnt = (int)DIGIT_BIT - bitcpy;
+     buf    = X->dp[digidx--];
+     bitbuf = (int)(buf >> bitcnt);
+     /* Multiply montgomery representation of 1 by 2 ^ top */
+     err = mp_mul_2d(res, bitbuf, res);
+     if (err != MP_OKAY) {
+        goto LBL_RES;
+     }
+     err = mp_mod(res, P, res);
+     if (err != MP_OKAY) {
+        goto LBL_RES;
+     }
+     /* Move out bits used */
+     buf  <<= bitcpy;
+     bitcnt++;
+  }
+  else {
+     bitcnt = 1;
+     buf    = 0;
+  }
+
+  /* empty window and reset  */
+  bitbuf = 0;
+  bitcpy = 0;
+
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      /* if digidx == -1 we are out of digits so break */
+      if (digidx == -1) {
+        break;
+      }
+      /* read next digit and reset bitcnt */
+      buf    = X->dp[digidx--];
+      bitcnt = (int)DIGIT_BIT;
+    }
+
+    /* grab the next msb from the exponent */
+    y       = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+    buf   <<= (mp_digit)1;
+    /* add bit to the window */
+    bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+    if (bitcpy == WINSIZE) {
+      /* ok window is filled so square as required and multiply  */
+      /* square first */
+      for (x = 0; x < WINSIZE; x++) {
+        err = mp_sqr(res, res);
+        if (err != MP_OKAY) {
+          goto LBL_RES;
+        }
+        err = (*redux)(res, P, mp);
+        if (err != MP_OKAY) {
+          goto LBL_RES;
+        }
+      }
+
+      /* then multiply by 2^bitbuf */
+      err = mp_mul_2d(res, bitbuf, res);
+      if (err != MP_OKAY) {
+         goto LBL_RES;
+      }
+      err = mp_mod(res, P, res);
+      if (err != MP_OKAY) {
+         goto LBL_RES;
+      }
+
+      /* empty window and reset */
+      bitcpy = 0;
+      bitbuf = 0;
+    }
+  }
+
+  /* fixup result if Montgomery reduction is used
+   * recall that any value in a Montgomery system is
+   * actually multiplied by R mod n.  So we have
+   * to reduce one more time to cancel out the factor
+   * of R.
+   */
+  err = (*redux)(res, P, mp);
+  if (err != MP_OKAY) {
+     goto LBL_RES;
+  }
+
+  /* swap res with Y */
+  mp_copy(res, Y);
+
+LBL_RES:mp_clear (res);
+LBL_M:
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+  return err;
+}
+
+#undef WINSIZE
+#endif /* BN_MP_EXPTMOD_BASE_2 */
+
 
 /* setups the montgomery reduction stuff */
 int mp_montgomery_setup (mp_int * n, mp_digit * rho)
@@ -2278,7 +2488,7 @@
     /* a = a + mu * m * b**i
      *
      * This is computed in place and on the fly.  The multiplication
-     * by b**i is handled by offseting which columns the results
+     * by b**i is handled by offsetting which columns the results
      * are added to.
      *
      * Note the comba method normally doesn't handle carries in the
@@ -2384,7 +2594,7 @@
    * are fixed up in the inner loop.
    */
   digs = n->used * 2 + 1;
-  if ((digs < MP_WARRAY) &&
+  if ((digs < (int)MP_WARRAY) &&
       n->used <
       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_mp_montgomery_reduce (x, n, rho);
@@ -2628,7 +2838,15 @@
 {
     int i = b / DIGIT_BIT, res;
 
-    if (a->used < (int)(i + 1)) {
+    /*
+     * Require:
+     *  bit index b >= 0
+     *  a->alloc == a->used == 0 if a->dp == NULL
+     */
+    if (b < 0 || (a->dp == NULL && (a->alloc != 0 || a->used != 0)))
+        return MP_VAL;
+
+    if (a->dp == NULL || a->used < (int)(i + 1)) {
         /* grow a to accommodate the single bit */
         if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
             return res;
@@ -2786,7 +3004,7 @@
   {
 #ifdef BN_FAST_S_MP_SQR_C
     /* can we use the fast comba multiplier? */
-    if ((a->used * 2 + 1) < MP_WARRAY &&
+    if ((a->used * 2 + 1) < (int)MP_WARRAY &&
          a->used <
          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
       res = fast_s_mp_sqr (a, b);
@@ -2814,6 +3032,7 @@
   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
 
   {
+#ifdef BN_FAST_S_MP_MUL_DIGS_C
     /* can we use the fast multiplier?
      *
      * The fast multiplier can be used if the output will
@@ -2822,8 +3041,7 @@
      */
     int     digs = a->used + b->used + 1;
 
-#ifdef BN_FAST_S_MP_MUL_DIGS_C
-    if ((digs < MP_WARRAY) &&
+    if ((digs < (int)MP_WARRAY) &&
         MIN(a->used, b->used) <=
         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
       res = fast_s_mp_mul_digs (a, b, c, digs);
@@ -3021,7 +3239,7 @@
     }
   }
 
-  if (pa > MP_WARRAY)
+  if (pa > (int)MP_WARRAY)
     return MP_RANGE;  /* TAO range check */
 
 #ifdef WOLFSSL_SMALL_STACK
@@ -3140,7 +3358,7 @@
 
   /* number of output digits to produce */
   pa = MIN(digs, a->used + b->used);
-  if (pa > MP_WARRAY)
+  if (pa > (int)MP_WARRAY)
     return MP_RANGE;  /* TAO range check */
 
 #ifdef WOLFSSL_SMALL_STACK
@@ -3286,7 +3504,7 @@
   mp_digit tmpx, *tmpt, *tmpy;
 
   /* can we use the fast multiplier? */
-  if (((digs) < MP_WARRAY) &&
+  if ((digs < (int)MP_WARRAY) &&
       MIN (a->used, b->used) <
           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_s_mp_mul_digs (a, b, c, digs);
@@ -3794,7 +4012,7 @@
 
   /* can we use the fast multiplier? */
 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
-  if (((a->used + b->used + 1) < MP_WARRAY)
+  if (((a->used + b->used + 1) < (int)MP_WARRAY)
       && MIN (a->used, b->used) <
       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_s_mp_mul_high_digs (a, b, c, digs);
@@ -3873,7 +4091,7 @@
     }
   }
 
-  if (pa > MP_WARRAY)
+  if (pa > (int)MP_WARRAY)
     return MP_RANGE;  /* TAO range check */
 
 #ifdef WOLFSSL_SMALL_STACK
@@ -3978,7 +4196,8 @@
 }
 
 
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
+#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) || !defined(NO_RSA) || \
+    !defined(NO_DSA) | !defined(NO_DH)
 
 /* c = a * a (mod b) */
 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
@@ -4004,7 +4223,8 @@
 
 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
     defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
-    defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING)
+    defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING) || \
+    (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
 
 /* single digit addition */
 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
@@ -4172,7 +4392,8 @@
 
 
 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC) || \
-    defined(DEBUG_WOLFSSL)
+    defined(DEBUG_WOLFSSL) || !defined(NO_RSA) || !defined(NO_DSA) || \
+    !defined(NO_DH)
 
 static const int lnz[16] = {
    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
@@ -4320,7 +4541,7 @@
 
 #endif /* WOLFSSL_KEY_GEN || HAVE_COMP_KEY || HAVE_ECC || DEBUG_WOLFSSL */
 
-#ifdef WOLFSSL_KEY_GEN
+#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA)
 
 const mp_digit ltm_prime_tab[PRIME_SIZE] = {
   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
@@ -4411,9 +4632,30 @@
   if ((err = mp_init (&y)) != MP_OKAY) {
     goto LBL_R;
   }
-  if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
-    goto LBL_Y;
-  }
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+  if (mp_count_bits(a) == 1024)
+      err = sp_ModExp_1024(b, &r, a, &y);
+  else if (mp_count_bits(a) == 2048)
+      err = sp_ModExp_2048(b, &r, a, &y);
+  else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+  if (mp_count_bits(a) == 1536)
+      err = sp_ModExp_1536(b, &r, a, &y);
+  else if (mp_count_bits(a) == 3072)
+      err = sp_ModExp_3072(b, &r, a, &y);
+  else
+#endif
+#ifdef WOLFSSL_SP_4096
+  if (mp_count_bits(a) == 4096)
+      err = sp_ModExp_4096(b, &r, a, &y);
+  else
+#endif
+#endif
+      err = mp_exptmod (b, &r, a, &y);
+  if (err != MP_OKAY)
+      goto LBL_Y;
 
   /* if y != 1 and y != n1 do */
   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
@@ -4476,6 +4718,173 @@
   return MP_OKAY;
 }
 
+/*
+ * Sets result to 1 if probably prime, 0 otherwise
+ */
+int mp_prime_is_prime (mp_int * a, int t, int *result)
+{
+  mp_int  b;
+  int     ix, err, res;
+
+  /* default to no */
+  *result = MP_NO;
+
+  /* valid value of t? */
+  if (t <= 0 || t > PRIME_SIZE) {
+    return MP_VAL;
+  }
+
+  if (mp_isone(a)) {
+      *result = MP_NO;
+      return MP_OKAY;
+  }
+
+  /* is the input equal to one of the primes in the table? */
+  for (ix = 0; ix < PRIME_SIZE; ix++) {
+      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
+         *result = MP_YES;
+         return MP_OKAY;
+      }
+  }
+
+  /* first perform trial division */
+  if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
+    return err;
+  }
+
+  /* return if it was trivially divisible */
+  if (res == MP_YES) {
+    return MP_OKAY;
+  }
+
+  /* now perform the miller-rabin rounds */
+  if ((err = mp_init (&b)) != MP_OKAY) {
+    return err;
+  }
+
+  for (ix = 0; ix < t; ix++) {
+    /* set the prime */
+    if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
+        goto LBL_B;
+    }
+
+    if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
+      goto LBL_B;
+    }
+
+    if (res == MP_NO) {
+      goto LBL_B;
+    }
+  }
+
+  /* passed the test */
+  *result = MP_YES;
+LBL_B:mp_clear (&b);
+  return err;
+}
+
+
+/*
+ * Sets result to 1 if probably prime, 0 otherwise
+ */
+int mp_prime_is_prime_ex (mp_int * a, int t, int *result, WC_RNG *rng)
+{
+  mp_int  b, c;
+  int     ix, err, res;
+  byte*   base = NULL;
+  word32  baseSz = 0;
+
+  /* default to no */
+  *result = MP_NO;
+
+  /* valid value of t? */
+  if (t <= 0 || t > PRIME_SIZE) {
+    return MP_VAL;
+  }
+
+  if (mp_isone(a)) {
+    *result = MP_NO;
+    return MP_OKAY;
+  }
+
+  /* is the input equal to one of the primes in the table? */
+  for (ix = 0; ix < PRIME_SIZE; ix++) {
+      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
+         *result = MP_YES;
+         return MP_OKAY;
+      }
+  }
+
+  /* first perform trial division */
+  if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
+    return err;
+  }
+
+  /* return if it was trivially divisible */
+  if (res == MP_YES) {
+    return MP_OKAY;
+  }
+
+  /* now perform the miller-rabin rounds */
+  if ((err = mp_init (&b)) != MP_OKAY) {
+    return err;
+  }
+  if ((err = mp_init (&c)) != MP_OKAY) {
+      mp_clear(&b);
+    return err;
+  }
+
+  baseSz = mp_count_bits(a);
+  baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
+
+  base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (base == NULL) {
+      err = MP_MEM;
+      goto LBL_B;
+  }
+
+  if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
+      goto LBL_B;
+  }
+
+ /* now do a miller rabin with up to t random numbers, this should
+  * give a (1/4)^t chance of a false prime. */
+  for (ix = 0; ix < t; ix++) {
+    /* Set a test candidate. */
+    if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
+        goto LBL_B;
+    }
+
+    if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
+        goto LBL_B;
+    }
+
+    if (mp_cmp_d(&b, 2) != MP_GT || mp_cmp(&b, &c) != MP_LT) {
+        ix--;
+        continue;
+    }
+
+    if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
+      goto LBL_B;
+    }
+
+    if (res == MP_NO) {
+      goto LBL_B;
+    }
+  }
+
+  /* passed the test */
+  *result = MP_YES;
+LBL_B:mp_clear (&b);
+      mp_clear (&c);
+      XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  return err;
+}
+
+#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
+
+#ifdef WOLFSSL_KEY_GEN
+
 static const int USE_BBS = 1;
 
 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
@@ -4529,7 +4938,11 @@
         }
 
         /* test */
-        if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
+        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+         * of a 1024-bit candidate being a false positive, when it is our
+         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+         * Using 8 because we've always used 8. */
+        if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
             return err;
         }
@@ -4541,66 +4954,6 @@
     return MP_OKAY;
 }
 
-/*
- * Sets result to 1 if probably prime, 0 otherwise
- */
-int mp_prime_is_prime (mp_int * a, int t, int *result)
-{
-  mp_int  b;
-  int     ix, err, res;
-
-  /* default to no */
-  *result = MP_NO;
-
-  /* valid value of t? */
-  if (t <= 0 || t > PRIME_SIZE) {
-    return MP_VAL;
-  }
-
-  /* is the input equal to one of the primes in the table? */
-  for (ix = 0; ix < PRIME_SIZE; ix++) {
-      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
-         *result = 1;
-         return MP_OKAY;
-      }
-  }
-
-  /* first perform trial division */
-  if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
-    return err;
-  }
-
-  /* return if it was trivially divisible */
-  if (res == MP_YES) {
-    return MP_OKAY;
-  }
-
-  /* now perform the miller-rabin rounds */
-  if ((err = mp_init (&b)) != MP_OKAY) {
-    return err;
-  }
-
-  for (ix = 0; ix < t; ix++) {
-    /* set the prime */
-    if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
-        goto LBL_B;
-    }
-
-    if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
-      goto LBL_B;
-    }
-
-    if (res == MP_NO) {
-      goto LBL_B;
-    }
-  }
-
-  /* passed the test */
-  *result = MP_YES;
-LBL_B:mp_clear (&b);
-  return err;
-}
-
 
 /* computes least common multiple as |a*b|/(a, b) */
 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
@@ -4723,8 +5076,8 @@
     }
     c->sign = MP_ZPOS;
     res = MP_OKAY;
-LBL_V:mp_clear (&u);
-LBL_U:mp_clear (&v);
+LBL_V:mp_clear (&v);
+LBL_U:mp_clear (&u);
     return res;
 }
 
@@ -4733,7 +5086,7 @@
 
 #if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
     defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
-    defined(DEBUG_WOLFSSL)
+    defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA)
 
 /* chars used in radix conversions */
 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
@@ -4812,9 +5165,7 @@
 }
 #endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
 
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
-    defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
-    defined(WOLFSSL_PUBLIC_MP)
+#ifdef WC_MP_TO_RADIX
 
 /* returns size of ASCII representation */
 int mp_radix_size (mp_int *a, int radix, int *size)
@@ -4912,7 +5263,13 @@
         *str++ = mp_s_rmap[d];
         ++digs;
     }
-
+#ifndef WC_DISABLE_RADIX_ZERO_PAD
+    /* For hexadecimal output, add zero padding when number of digits is odd */
+    if ((digs & 1) && (radix == 16)) {
+        *str++ = mp_s_rmap[0];
+        ++digs;
+    }
+#endif
     /* reverse the digits of the string.  In this case _s points
      * to the first digit [excluding the sign] of the number]
      */
@@ -4954,12 +5311,11 @@
 }
 #endif /* WOLFSSL_DEBUG_MATH */
 
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
+#endif /* WC_MP_TO_RADIX */
 
 #endif /* WOLFSSL_SP_MATH */
 
 #endif /* USE_FAST_MATH */
 
 #endif /* NO_BIG_INT */
-
 
--- a/wolfcrypt/src/logging.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/logging.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* logging.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,8 +26,6 @@
 
 #include <wolfssl/wolfcrypt/settings.h>
 
-/* submitted by eof */
-
 #include <wolfssl/wolfcrypt/logging.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
@@ -64,6 +62,8 @@
 static double wc_func_start[WC_FUNC_COUNT];
 static double wc_func_time[WC_FUNC_COUNT] = { 0, };
 static const char* wc_func_name[WC_FUNC_COUNT] = {
+    "SendHelloRequest",
+    "DoHelloRequest",
     "SendClientHello",
     "DoClientHello",
     "SendServerHello",
@@ -117,6 +117,11 @@
 static wolfSSL_Logging_cb log_function = NULL;
 static int loggingEnabled = 0;
 
+#if defined(WOLFSSL_APACHE_MYNEWT)
+#include "log/log.h"
+static struct log mynewt_log;
+#endif /* WOLFSSL_APACHE_MYNEWT */
+
 #endif /* DEBUG_WOLFSSL */
 
 
@@ -132,11 +137,24 @@
 #endif
 }
 
+/* allow this to be set to NULL, so logs can be redirected to default output */
+wolfSSL_Logging_cb wolfSSL_GetLoggingCb(void)
+{
+#ifdef DEBUG_WOLFSSL
+    return log_function;
+#else
+    return NULL;
+#endif
+}
+
 
 int wolfSSL_Debugging_ON(void)
 {
 #ifdef DEBUG_WOLFSSL
     loggingEnabled = 1;
+#if defined(WOLFSSL_APACHE_MYNEWT)
+    log_register("wolfcrypt", &mynewt_log, &log_console_handler, NULL, LOG_SYSLEVEL);
+#endif /* WOLFSSL_APACHE_MYNEWT */
     return 0;
 #else
     return NOT_COMPILED_IN;
@@ -193,18 +211,25 @@
 #ifdef DEBUG_WOLFSSL
 
 #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-    #if MQX_USE_IO_OLD
-        #include <fio.h>
-    #else
-        #include <nio.h>
-    #endif
+    /* see wc_port.h for fio.h and nio.h includes */
 #elif defined(WOLFSSL_SGX)
     /* Declare sprintf for ocall */
     int sprintf(char* buf, const char *fmt, ...);
+#elif defined(WOLFSSL_DEOS)
 #elif defined(MICRIUM)
-    #include <bsp_ser.h>
+    #if (BSP_SER_COMM_EN  == DEF_ENABLED)
+        #include <bsp_ser.h>
+    #endif
 #elif defined(WOLFSSL_USER_LOG)
     /* user includes their own headers */
+#elif defined(WOLFSSL_ESPIDF)
+    #include "esp_types.h"
+    #include "esp_log.h"
+#elif defined(WOLFSSL_TELIT_M2MB)
+    #include <stdio.h>
+    #include "m2m_log.h"
+#elif defined(WOLFSSL_ANDROID_DEBUG)
+    #include <android/log.h>
 #else
     #include <stdio.h>   /* for default printf stuff */
 #endif
@@ -225,6 +250,8 @@
 
 #elif defined(THREADX) && !defined(THREADX_NO_DC_PRINTF)
         dc_log_printf("%s\n", logMessage);
+#elif defined(WOLFSSL_DEOS)
+        printf("%s\r\n", logMessage);
 #elif defined(MICRIUM)
         BSP_Ser_Printf("%s\r\n", logMessage);
 #elif defined(WOLFSSL_MDK_ARM)
@@ -237,6 +264,16 @@
 #elif defined(MQX_USE_IO_OLD)
         fprintf(_mqxio_stderr, "%s\n", logMessage);
 
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+        LOG_DEBUG(&mynewt_log, LOG_MODULE_DEFAULT, "%s\n", logMessage);
+#elif defined(WOLFSSL_ESPIDF)
+        ESP_LOGI("wolfssl", "%s", logMessage);
+#elif defined(WOLFSSL_ZEPHYR)
+        printk("%s\n", logMessage);
+#elif defined(WOLFSSL_TELIT_M2MB)
+        M2M_LOG_INFO("%s\n", logMessage);
+#elif defined(WOLFSSL_ANDROID_DEBUG)
+        __android_log_print(ANDROID_LOG_VERBOSE, "[wolfSSL]", "%s", logMessage);
 #else
         fprintf(stderr, "%s\n", logMessage);
 #endif
@@ -250,41 +287,52 @@
         wolfssl_log(INFO_LOG , msg);
 }
 
-
+#ifndef LINE_LEN
+#define LINE_LEN 16
+#endif
 void WOLFSSL_BUFFER(const byte* buffer, word32 length)
 {
-    #define LINE_LEN 16
+    int i, buflen = (int)length, bufidx;
+    char line[(LINE_LEN * 4) + 3]; /* \t00..0F | chars...chars\0 */
 
-    if (loggingEnabled) {
-        word32 i;
-        char line[80];
+    if (!loggingEnabled) {
+        return;
+    }
 
-        if (!buffer) {
-            wolfssl_log(INFO_LOG, "\tNULL");
+    if (!buffer) {
+        wolfssl_log(INFO_LOG, "\tNULL");
+        return;
+    }
 
-            return;
-        }
-
-        sprintf(line, "\t");
+    while (buflen > 0) {
+        bufidx = 0;
+        XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "\t");
+        bufidx++;
 
         for (i = 0; i < LINE_LEN; i++) {
-            if (i < length)
-                sprintf(line + 1 + i * 3,"%02x ", buffer[i]);
-            else
-                sprintf(line + 1 + i * 3, "   ");
+            if (i < buflen) {
+                XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "%02x ", buffer[i]);
+            }
+            else {
+                XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "   ");
+            }
+            bufidx += 3;
         }
 
-        sprintf(line + 1 + LINE_LEN * 3, "| ");
+        XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "| ");
+        bufidx++;
 
-        for (i = 0; i < LINE_LEN; i++)
-            if (i < length)
-                sprintf(line + 3 + LINE_LEN * 3 + i,
+        for (i = 0; i < LINE_LEN; i++) {
+            if (i < buflen) {
+                XSNPRINTF(&line[bufidx], sizeof(line)-bufidx,
                      "%c", 31 < buffer[i] && buffer[i] < 127 ? buffer[i] : '.');
+                bufidx++;
+            }
+        }
 
         wolfssl_log(INFO_LOG, line);
-
-        if (length > LINE_LEN)
-            WOLFSSL_BUFFER(buffer + LINE_LEN, length - LINE_LEN);
+        buffer += LINE_LEN;
+        buflen -= LINE_LEN;
     }
 }
 
@@ -308,18 +356,25 @@
         wolfssl_log(LEAVE_LOG , buffer);
     }
 }
+
+WOLFSSL_API int WOLFSSL_IS_DEBUG_ON(void)
+{
+    return loggingEnabled;
+}
 #endif /* !WOLFSSL_DEBUG_ERRORS_ONLY */
 #endif /* DEBUG_WOLFSSL */
 
 /*
  * When using OPENSSL_EXTRA or DEBUG_WOLFSSL_VERBOSE macro then WOLFSSL_ERROR is
- * mapped to new funtion WOLFSSL_ERROR_LINE which gets the line # and function
+ * mapped to new function WOLFSSL_ERROR_LINE which gets the line # and function
  * name where WOLFSSL_ERROR is called at.
  */
 #if defined(DEBUG_WOLFSSL) || defined(OPENSSL_ALL) || \
-    defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+    defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
+    defined(OPENSSL_EXTRA)
 
-#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+#if (defined(OPENSSL_EXTRA) && !defined(_WIN32) && !defined(NO_ERROR_QUEUE)) \
+    || defined(DEBUG_WOLFSSL_VERBOSE)
 void WOLFSSL_ERROR_LINE(int error, const char* func, unsigned int line,
         const char* file, void* usrCtx)
 #else
@@ -332,7 +387,8 @@
     {
         char buffer[WOLFSSL_MAX_ERROR_SZ];
 
-    #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+    #if (defined(OPENSSL_EXTRA) && !defined(_WIN32) && \
+            !defined(NO_ERROR_QUEUE)) || defined(DEBUG_WOLFSSL_VERBOSE)
         (void)usrCtx; /* a user ctx for future flexibility */
         (void)func;
 
@@ -432,7 +488,7 @@
  * line  : line number that error happened at
  *
  * Returns a negative value in error case, on success returns the nodes error
- * value which is positve (absolute value)
+ * value which is positive (absolute value)
  */
 int wc_PeekErrorNode(int idx, const char **file, const char **reason,
         int *line)
@@ -446,11 +502,6 @@
 
     if (idx < 0) {
         err = wc_last_node;
-        if (err == NULL) {
-            WOLFSSL_MSG("No Errors in queue");
-            wc_UnLockMutex(&debug_mutex);
-            return BAD_STATE_E;
-        }
     }
     else {
         int i;
@@ -466,6 +517,12 @@
         }
     }
 
+    if (err == NULL) {
+        WOLFSSL_MSG("No Errors in queue");
+        wc_UnLockMutex(&debug_mutex);
+        return BAD_STATE_E;
+    }
+
     if (file != NULL) {
         *file = err->file;
     }
@@ -490,7 +547,7 @@
  *
  * file   pointer to file that error was in. Can be NULL to return no file.
  * reason error string giving reason for error. Can be NULL to return no reason.
- * line   retrun line number of where error happened.
+ * line   return line number of where error happened.
  *
  * returns the error value on success and BAD_MUTEX_E or BAD_STATE_E on failure
  */
@@ -536,9 +593,14 @@
  * function. debug_mutex should be locked before a call to this function. */
 int wc_AddErrorNode(int error, int line, char* buf, char* file)
 {
-
+#if defined(NO_ERROR_QUEUE)
+    (void)error;
+    (void)line;
+    (void)buf;
+    (void)file;
+    WOLFSSL_MSG("Error queue turned off, can not add nodes");
+#else
     struct wc_error_queue* err;
-
     err = (struct wc_error_queue*)XMALLOC(
             sizeof(struct wc_error_queue), wc_error_heap, DYNAMIC_TYPE_LOG);
     if (err == NULL) {
@@ -580,6 +642,11 @@
             if (wc_errors != NULL) {
                 /* check for unexpected case before over writing wc_errors */
                 WOLFSSL_MSG("ERROR in adding new node to logging queue!!\n");
+                /* In the event both wc_last_node and wc_errors are NULL, err
+                 * goes unassigned to external wc_errors, wc_last_node. Free
+                 * err in this instance since wc_ClearErrorNodes will not
+                 */
+                XFREE(err, wc_error_heap, DYNAMIC_TYPE_LOG);
             }
             else {
                 wc_errors    = err;
@@ -599,7 +666,7 @@
             }
         }
     }
-
+#endif
     return 0;
 }
 
@@ -626,10 +693,14 @@
     if (current != NULL) {
         if (current->prev != NULL)
             current->prev->next = current->next;
+        if (current->next != NULL)
+            current->next->prev = current->prev;
         if (wc_last_node == current)
             wc_last_node = current->prev;
         if (wc_errors == current)
             wc_errors = current->next;
+        if (wc_current_node == current)
+            wc_current_node = current->next;
         XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
     }
 
@@ -641,7 +712,9 @@
  */
 void wc_ClearErrorNodes(void)
 {
-#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_NGINX)
+#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_NGINX) || \
+    defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+
     if (wc_LockMutex(&debug_mutex) != 0) {
         WOLFSSL_MSG("Lock debug mutex failed");
         return;
@@ -660,8 +733,9 @@
         }
     }
 
-    wc_errors    = NULL;
-    wc_last_node = NULL;
+    wc_errors       = NULL;
+    wc_last_node    = NULL;
+    wc_current_node = NULL;
     wc_UnLockMutex(&debug_mutex);
 #endif /* DEBUG_WOLFSSL || WOLFSSL_NGINX */
 }
@@ -708,40 +782,63 @@
     return 0;
 }
 
-
 #if !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM)
 /* empties out the error queue into the file */
+static int wc_ERR_dump_to_file (const char *str, size_t len, void *u)
+{
+    XFILE fp = (XFILE ) u;
+    fprintf(fp, "%-*.*s\n", (int)len, (int)len, str);
+    return 0;
+}
+
+/* This callback allows the application to provide a custom error printing
+ * function. */
+void wc_ERR_print_errors_cb(int (*cb)(const char *str, size_t len, void *u),
+                            void *u)
+{
+    WOLFSSL_ENTER("wc_ERR_print_errors_cb");
+
+    if (cb == NULL) {
+        /* Invalid param */
+        return;
+    }
+
+    if (wc_LockMutex(&debug_mutex) != 0)
+    {
+        WOLFSSL_MSG("Lock debug mutex failed");
+    }
+    else
+    {
+        /* free all nodes from error queue and print them to file */
+        struct wc_error_queue *current;
+        struct wc_error_queue *next;
+
+        current = (struct wc_error_queue *)wc_errors;
+        while (current != NULL)
+        {
+            next = current->next;
+            cb(current->error, strlen(current->error), u);
+            XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
+            current = next;
+        }
+
+        /* set global pointers to match having been freed */
+        wc_errors = NULL;
+        wc_last_node = NULL;
+
+        wc_UnLockMutex(&debug_mutex);
+    }
+}
+
 void wc_ERR_print_errors_fp(XFILE fp)
 {
     WOLFSSL_ENTER("wc_ERR_print_errors_fp");
 
-    if (wc_LockMutex(&debug_mutex) != 0) {
-        WOLFSSL_MSG("Lock debug mutex failed");
-    }
-    else {
-        /* free all nodes from error queue and print them to file */
-        {
-            struct wc_error_queue* current;
-            struct wc_error_queue* next;
+    /* Send all errors to the wc_ERR_dump_to_file function */
+    wc_ERR_print_errors_cb(wc_ERR_dump_to_file, fp);
+}
 
-            current = (struct wc_error_queue*)wc_errors;
-            while (current != NULL) {
-                next = current->next;
-                fprintf(fp, "%s\n", current->error);
-                XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
-                current = next;
-            }
-
-            /* set global pointers to match having been freed */
-            wc_errors    = NULL;
-            wc_last_node = NULL;
-        }
-
-        wc_UnLockMutex(&debug_mutex);
-    }
-}
 #endif /* !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM) */
 
 #endif /* defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) */
-
 
--- a/wolfcrypt/src/md2.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/md2.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md2.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/md4.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/md4.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md4.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/md5.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/md5.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md5.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -22,7 +22,7 @@
 
 
 #ifdef HAVE_CONFIG_H
-    #include <config.h>
+#include <config.h>
 #endif
 
 #include <wolfssl/wolfcrypt/settings.h>
@@ -30,206 +30,254 @@
 #if !defined(NO_MD5)
 
 #if defined(WOLFSSL_TI_HASH)
-    /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
+/* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
 
 #else
 
 #include <wolfssl/wolfcrypt/md5.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/hash.h>
 
 #ifdef NO_INLINE
-    #include <wolfssl/wolfcrypt/misc.h>
+#include <wolfssl/wolfcrypt/misc.h>
 #else
-    #define WOLFSSL_MISC_INCLUDED
-    #include <wolfcrypt/src/misc.c>
+#define WOLFSSL_MISC_INCLUDED
+#include <wolfcrypt/src/misc.c>
 #endif
 
 
 /* Hardware Acceleration */
 #if defined(STM32_HASH)
 
-    /* Supports CubeMX HAL or Standard Peripheral Library */
-	#define HAVE_MD5_CUST_API
+/* Supports CubeMX HAL or Standard Peripheral Library */
+#define HAVE_MD5_CUST_API
 
-    int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId)
-    {
-        if (md5 == NULL) {
-            return BAD_FUNC_ARG;
-        }
-
-        (void)devId;
-        (void)heap;
-
-        wc_Stm32_Hash_Init(&md5->stmCtx);
-
-        return 0;
+int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId)
+{
+    if (md5 == NULL) {
+        return BAD_FUNC_ARG;
     }
 
-    int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len)
-    {
-        int ret;
+    (void)devId;
+    (void)heap;
+
+    wc_Stm32_Hash_Init(&md5->stmCtx);
+
+    return 0;
+}
 
-        if (md5 == NULL || (data == NULL && len > 0)) {
-            return BAD_FUNC_ARG;
-        }
+int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len)
+{
+    int ret;
+
+    if (md5 == NULL || (data == NULL && len > 0)) {
+        return BAD_FUNC_ARG;
+    }
 
-        ret = wolfSSL_CryptHwMutexLock();
-        if (ret == 0) {
-            ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5,
-                data, len);
-            wolfSSL_CryptHwMutexUnLock();
-        }
-        return ret;
+    ret = wolfSSL_CryptHwMutexLock();
+    if (ret == 0) {
+        ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5,
+                                   data, len);
+        wolfSSL_CryptHwMutexUnLock();
+    }
+    return ret;
+}
+
+int wc_Md5Final(wc_Md5* md5, byte* hash)
+{
+    int ret;
+
+    if (md5 == NULL || hash == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    ret = wolfSSL_CryptHwMutexLock();
+    if (ret == 0) {
+        ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5,
+                                  hash, WC_MD5_DIGEST_SIZE);
+        wolfSSL_CryptHwMutexUnLock();
     }
 
-    int wc_Md5Final(wc_Md5* md5, byte* hash)
-    {
-        int ret;
-
-        if (md5 == NULL || hash == NULL) {
-            return BAD_FUNC_ARG;
-        }
+    (void)wc_InitMd5(md5);  /* reset state */
 
-        ret = wolfSSL_CryptHwMutexLock();
-        if (ret == 0) {
-            ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5,
-                hash, WC_MD5_DIGEST_SIZE);
-            wolfSSL_CryptHwMutexUnLock();
-        }
-
-        (void)wc_InitMd5(md5);  /* reset state */
-
-        return ret;
-    }
+    return ret;
+}
 
 #elif defined(FREESCALE_MMCAU_SHA)
+
+#ifdef FREESCALE_MMCAU_CLASSIC_SHA
     #include "cau_api.h"
-    #define XTRANSFORM(S,B)  Transform((S), (B))
+#else
+    #include "fsl_mmcau.h"
+#endif
+
+#define XTRANSFORM(S,B)       Transform((S), (B))
+#define XTRANSFORM_LEN(S,B,L) Transform_Len((S), (B), (L))
+
+#ifndef WC_HASH_DATA_ALIGNMENT
+    /* these hardware API's require 4 byte (word32) alignment */
+    #define WC_HASH_DATA_ALIGNMENT 4
+#endif
+
+static int Transform(wc_Md5* md5, const byte* data)
+{
+    int ret = wolfSSL_CryptHwMutexLock();
+    if (ret == 0) {
+#ifdef FREESCALE_MMCAU_CLASSIC_SHA
+        cau_md5_hash_n((byte*)data, 1, (unsigned char*)md5->digest);
+#else
+        MMCAU_MD5_HashN((byte*)data, 1, (uint32_t*)md5->digest);
+#endif
+        wolfSSL_CryptHwMutexUnLock();
+    }
+    return ret;
+}
 
-    static int Transform(wc_Md5* md5, byte* data)
-    {
-        int ret = wolfSSL_CryptHwMutexLock();
-        if(ret == 0) {
-        #ifdef FREESCALE_MMCAU_CLASSIC_SHA
-            cau_md5_hash_n(data, 1, (unsigned char*)md5->digest);
-        #else
-            MMCAU_MD5_HashN(data, 1, (uint32_t*)md5->digest);
-        #endif
-            wolfSSL_CryptHwMutexUnLock();
+static int Transform_Len(wc_Md5* md5, const byte* data, word32 len)
+{
+    int ret = wolfSSL_CryptHwMutexLock();
+    if (ret == 0) {
+    #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
+        if ((size_t)data % WC_HASH_DATA_ALIGNMENT) {
+            /* data pointer is NOT aligned,
+             * so copy and perform one block at a time */
+            byte* local = (byte*)md5->buffer;
+            while (len >= WC_MD5_BLOCK_SIZE) {
+                XMEMCPY(local, data, WC_MD5_BLOCK_SIZE);
+            #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+                cau_md5_hash_n(local, 1, (unsigned char*)md5->digest);
+            #else
+                MMCAU_MD5_HashN(local, 1, (uint32_t*)md5->digest);
+            #endif
+                data += WC_MD5_BLOCK_SIZE;
+                len  -= WC_MD5_BLOCK_SIZE;
+            }
         }
-        return ret;
+        else
+    #endif
+        {
+#ifdef FREESCALE_MMCAU_CLASSIC_SHA
+        cau_md5_hash_n((byte*)data, len / WC_MD5_BLOCK_SIZE,
+            (unsigned char*)md5->digest);
+#else
+        MMCAU_MD5_HashN((byte*)data, len / WC_MD5_BLOCK_SIZE,
+            (uint32_t*)md5->digest);
+#endif
+        }
+        wolfSSL_CryptHwMutexUnLock();
     }
+    return ret;
+}
 
 #elif defined(WOLFSSL_PIC32MZ_HASH)
-    #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
-    #define HAVE_MD5_CUST_API
+#include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
+#define HAVE_MD5_CUST_API
 
 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
-    /* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */
-    #define HAVE_MD5_CUST_API
+/* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */
+#define HAVE_MD5_CUST_API
 #else
-    #define NEED_SOFT_MD5
-
+#define NEED_SOFT_MD5
 #endif /* End Hardware Acceleration */
 
-
 #ifdef NEED_SOFT_MD5
 
-    #define XTRANSFORM(S,B)  Transform((S))
+#define XTRANSFORM(S,B)  Transform((S),(B))
 
-    #define F1(x, y, z) (z ^ (x & (y ^ z)))
-    #define F2(x, y, z) F1(z, x, y)
-    #define F3(x, y, z) (x ^ y ^ z)
-    #define F4(x, y, z) (y ^ (x | ~z))
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
 
-    #define MD5STEP(f, w, x, y, z, data, s) \
+#define MD5STEP(f, w, x, y, z, data, s) \
         w = rotlFixed(w + f(x, y, z) + data, s) + x
 
-    static int Transform(wc_Md5* md5)
-    {
-        /* Copy context->state[] to working vars  */
-        word32 a = md5->digest[0];
-        word32 b = md5->digest[1];
-        word32 c = md5->digest[2];
-        word32 d = md5->digest[3];
+static int Transform(wc_Md5* md5, const byte* data)
+{
+    word32* buffer = (word32*)data;
+    /* Copy context->state[] to working vars  */
+    word32 a = md5->digest[0];
+    word32 b = md5->digest[1];
+    word32 c = md5->digest[2];
+    word32 d = md5->digest[3];
 
-        MD5STEP(F1, a, b, c, d, md5->buffer[0]  + 0xd76aa478,  7);
-        MD5STEP(F1, d, a, b, c, md5->buffer[1]  + 0xe8c7b756, 12);
-        MD5STEP(F1, c, d, a, b, md5->buffer[2]  + 0x242070db, 17);
-        MD5STEP(F1, b, c, d, a, md5->buffer[3]  + 0xc1bdceee, 22);
-        MD5STEP(F1, a, b, c, d, md5->buffer[4]  + 0xf57c0faf,  7);
-        MD5STEP(F1, d, a, b, c, md5->buffer[5]  + 0x4787c62a, 12);
-        MD5STEP(F1, c, d, a, b, md5->buffer[6]  + 0xa8304613, 17);
-        MD5STEP(F1, b, c, d, a, md5->buffer[7]  + 0xfd469501, 22);
-        MD5STEP(F1, a, b, c, d, md5->buffer[8]  + 0x698098d8,  7);
-        MD5STEP(F1, d, a, b, c, md5->buffer[9]  + 0x8b44f7af, 12);
-        MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17);
-        MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22);
-        MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122,  7);
-        MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12);
-        MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17);
-        MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22);
+    MD5STEP(F1, a, b, c, d, buffer[0]  + 0xd76aa478,  7);
+    MD5STEP(F1, d, a, b, c, buffer[1]  + 0xe8c7b756, 12);
+    MD5STEP(F1, c, d, a, b, buffer[2]  + 0x242070db, 17);
+    MD5STEP(F1, b, c, d, a, buffer[3]  + 0xc1bdceee, 22);
+    MD5STEP(F1, a, b, c, d, buffer[4]  + 0xf57c0faf,  7);
+    MD5STEP(F1, d, a, b, c, buffer[5]  + 0x4787c62a, 12);
+    MD5STEP(F1, c, d, a, b, buffer[6]  + 0xa8304613, 17);
+    MD5STEP(F1, b, c, d, a, buffer[7]  + 0xfd469501, 22);
+    MD5STEP(F1, a, b, c, d, buffer[8]  + 0x698098d8,  7);
+    MD5STEP(F1, d, a, b, c, buffer[9]  + 0x8b44f7af, 12);
+    MD5STEP(F1, c, d, a, b, buffer[10] + 0xffff5bb1, 17);
+    MD5STEP(F1, b, c, d, a, buffer[11] + 0x895cd7be, 22);
+    MD5STEP(F1, a, b, c, d, buffer[12] + 0x6b901122,  7);
+    MD5STEP(F1, d, a, b, c, buffer[13] + 0xfd987193, 12);
+    MD5STEP(F1, c, d, a, b, buffer[14] + 0xa679438e, 17);
+    MD5STEP(F1, b, c, d, a, buffer[15] + 0x49b40821, 22);
 
-        MD5STEP(F2, a, b, c, d, md5->buffer[1]  + 0xf61e2562,  5);
-        MD5STEP(F2, d, a, b, c, md5->buffer[6]  + 0xc040b340,  9);
-        MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14);
-        MD5STEP(F2, b, c, d, a, md5->buffer[0]  + 0xe9b6c7aa, 20);
-        MD5STEP(F2, a, b, c, d, md5->buffer[5]  + 0xd62f105d,  5);
-        MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453,  9);
-        MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14);
-        MD5STEP(F2, b, c, d, a, md5->buffer[4]  + 0xe7d3fbc8, 20);
-        MD5STEP(F2, a, b, c, d, md5->buffer[9]  + 0x21e1cde6,  5);
-        MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6,  9);
-        MD5STEP(F2, c, d, a, b, md5->buffer[3]  + 0xf4d50d87, 14);
-        MD5STEP(F2, b, c, d, a, md5->buffer[8]  + 0x455a14ed, 20);
-        MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905,  5);
-        MD5STEP(F2, d, a, b, c, md5->buffer[2]  + 0xfcefa3f8,  9);
-        MD5STEP(F2, c, d, a, b, md5->buffer[7]  + 0x676f02d9, 14);
-        MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20);
+    MD5STEP(F2, a, b, c, d, buffer[1]  + 0xf61e2562,  5);
+    MD5STEP(F2, d, a, b, c, buffer[6]  + 0xc040b340,  9);
+    MD5STEP(F2, c, d, a, b, buffer[11] + 0x265e5a51, 14);
+    MD5STEP(F2, b, c, d, a, buffer[0]  + 0xe9b6c7aa, 20);
+    MD5STEP(F2, a, b, c, d, buffer[5]  + 0xd62f105d,  5);
+    MD5STEP(F2, d, a, b, c, buffer[10] + 0x02441453,  9);
+    MD5STEP(F2, c, d, a, b, buffer[15] + 0xd8a1e681, 14);
+    MD5STEP(F2, b, c, d, a, buffer[4]  + 0xe7d3fbc8, 20);
+    MD5STEP(F2, a, b, c, d, buffer[9]  + 0x21e1cde6,  5);
+    MD5STEP(F2, d, a, b, c, buffer[14] + 0xc33707d6,  9);
+    MD5STEP(F2, c, d, a, b, buffer[3]  + 0xf4d50d87, 14);
+    MD5STEP(F2, b, c, d, a, buffer[8]  + 0x455a14ed, 20);
+    MD5STEP(F2, a, b, c, d, buffer[13] + 0xa9e3e905,  5);
+    MD5STEP(F2, d, a, b, c, buffer[2]  + 0xfcefa3f8,  9);
+    MD5STEP(F2, c, d, a, b, buffer[7]  + 0x676f02d9, 14);
+    MD5STEP(F2, b, c, d, a, buffer[12] + 0x8d2a4c8a, 20);
 
-        MD5STEP(F3, a, b, c, d, md5->buffer[5]  + 0xfffa3942,  4);
-        MD5STEP(F3, d, a, b, c, md5->buffer[8]  + 0x8771f681, 11);
-        MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16);
-        MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23);
-        MD5STEP(F3, a, b, c, d, md5->buffer[1]  + 0xa4beea44,  4);
-        MD5STEP(F3, d, a, b, c, md5->buffer[4]  + 0x4bdecfa9, 11);
-        MD5STEP(F3, c, d, a, b, md5->buffer[7]  + 0xf6bb4b60, 16);
-        MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23);
-        MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6,  4);
-        MD5STEP(F3, d, a, b, c, md5->buffer[0]  + 0xeaa127fa, 11);
-        MD5STEP(F3, c, d, a, b, md5->buffer[3]  + 0xd4ef3085, 16);
-        MD5STEP(F3, b, c, d, a, md5->buffer[6]  + 0x04881d05, 23);
-        MD5STEP(F3, a, b, c, d, md5->buffer[9]  + 0xd9d4d039,  4);
-        MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11);
-        MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16);
-        MD5STEP(F3, b, c, d, a, md5->buffer[2]  + 0xc4ac5665, 23);
+    MD5STEP(F3, a, b, c, d, buffer[5]  + 0xfffa3942,  4);
+    MD5STEP(F3, d, a, b, c, buffer[8]  + 0x8771f681, 11);
+    MD5STEP(F3, c, d, a, b, buffer[11] + 0x6d9d6122, 16);
+    MD5STEP(F3, b, c, d, a, buffer[14] + 0xfde5380c, 23);
+    MD5STEP(F3, a, b, c, d, buffer[1]  + 0xa4beea44,  4);
+    MD5STEP(F3, d, a, b, c, buffer[4]  + 0x4bdecfa9, 11);
+    MD5STEP(F3, c, d, a, b, buffer[7]  + 0xf6bb4b60, 16);
+    MD5STEP(F3, b, c, d, a, buffer[10] + 0xbebfbc70, 23);
+    MD5STEP(F3, a, b, c, d, buffer[13] + 0x289b7ec6,  4);
+    MD5STEP(F3, d, a, b, c, buffer[0]  + 0xeaa127fa, 11);
+    MD5STEP(F3, c, d, a, b, buffer[3]  + 0xd4ef3085, 16);
+    MD5STEP(F3, b, c, d, a, buffer[6]  + 0x04881d05, 23);
+    MD5STEP(F3, a, b, c, d, buffer[9]  + 0xd9d4d039,  4);
+    MD5STEP(F3, d, a, b, c, buffer[12] + 0xe6db99e5, 11);
+    MD5STEP(F3, c, d, a, b, buffer[15] + 0x1fa27cf8, 16);
+    MD5STEP(F3, b, c, d, a, buffer[2]  + 0xc4ac5665, 23);
 
-        MD5STEP(F4, a, b, c, d, md5->buffer[0]  + 0xf4292244,  6);
-        MD5STEP(F4, d, a, b, c, md5->buffer[7]  + 0x432aff97, 10);
-        MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15);
-        MD5STEP(F4, b, c, d, a, md5->buffer[5]  + 0xfc93a039, 21);
-        MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3,  6);
-        MD5STEP(F4, d, a, b, c, md5->buffer[3]  + 0x8f0ccc92, 10);
-        MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15);
-        MD5STEP(F4, b, c, d, a, md5->buffer[1]  + 0x85845dd1, 21);
-        MD5STEP(F4, a, b, c, d, md5->buffer[8]  + 0x6fa87e4f,  6);
-        MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10);
-        MD5STEP(F4, c, d, a, b, md5->buffer[6]  + 0xa3014314, 15);
-        MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21);
-        MD5STEP(F4, a, b, c, d, md5->buffer[4]  + 0xf7537e82,  6);
-        MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10);
-        MD5STEP(F4, c, d, a, b, md5->buffer[2]  + 0x2ad7d2bb, 15);
-        MD5STEP(F4, b, c, d, a, md5->buffer[9]  + 0xeb86d391, 21);
+    MD5STEP(F4, a, b, c, d, buffer[0]  + 0xf4292244,  6);
+    MD5STEP(F4, d, a, b, c, buffer[7]  + 0x432aff97, 10);
+    MD5STEP(F4, c, d, a, b, buffer[14] + 0xab9423a7, 15);
+    MD5STEP(F4, b, c, d, a, buffer[5]  + 0xfc93a039, 21);
+    MD5STEP(F4, a, b, c, d, buffer[12] + 0x655b59c3,  6);
+    MD5STEP(F4, d, a, b, c, buffer[3]  + 0x8f0ccc92, 10);
+    MD5STEP(F4, c, d, a, b, buffer[10] + 0xffeff47d, 15);
+    MD5STEP(F4, b, c, d, a, buffer[1]  + 0x85845dd1, 21);
+    MD5STEP(F4, a, b, c, d, buffer[8]  + 0x6fa87e4f,  6);
+    MD5STEP(F4, d, a, b, c, buffer[15] + 0xfe2ce6e0, 10);
+    MD5STEP(F4, c, d, a, b, buffer[6]  + 0xa3014314, 15);
+    MD5STEP(F4, b, c, d, a, buffer[13] + 0x4e0811a1, 21);
+    MD5STEP(F4, a, b, c, d, buffer[4]  + 0xf7537e82,  6);
+    MD5STEP(F4, d, a, b, c, buffer[11] + 0xbd3af235, 10);
+    MD5STEP(F4, c, d, a, b, buffer[2]  + 0x2ad7d2bb, 15);
+    MD5STEP(F4, b, c, d, a, buffer[9]  + 0xeb86d391, 21);
 
-        /* Add the working vars back into digest state[]  */
-        md5->digest[0] += a;
-        md5->digest[1] += b;
-        md5->digest[2] += c;
-        md5->digest[3] += d;
+    /* Add the working vars back into digest state[]  */
+    md5->digest[0] += a;
+    md5->digest[1] += b;
+    md5->digest[2] += c;
+    md5->digest[3] += d;
 
-        return 0;
-    }
+    return 0;
+}
 #endif /* NEED_SOFT_MD5 */
 
 #ifndef HAVE_MD5_CUST_API
@@ -254,6 +302,9 @@
     md5->buffLen = 0;
     md5->loLen   = 0;
     md5->hiLen   = 0;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    md5->flags = 0;
+#endif
 
     return ret;
 }
@@ -273,16 +324,18 @@
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
     ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5,
-                                                            md5->heap, devId);
+                               md5->heap, devId);
 #else
     (void)devId;
 #endif
     return ret;
 }
 
+/* do block size increments/updates */
 int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len)
 {
     int ret = 0;
+    word32 blocksLen;
     byte* local;
 
     if (md5 == NULL || (data == NULL && len > 0)) {
@@ -291,36 +344,92 @@
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
     if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
-    #if defined(HAVE_INTEL_QA)
+#if defined(HAVE_INTEL_QA)
         return IntelQaSymMd5(&md5->asyncDev, NULL, data, len);
-    #endif
+#endif
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    /* do block size increments */
-    local = (byte*)md5->buffer;
-
     /* check that internal buffLen is valid */
     if (md5->buffLen >= WC_MD5_BLOCK_SIZE)
         return BUFFER_E;
 
-    while (len) {
-        word32 add = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen);
-        XMEMCPY(&local[md5->buffLen], data, add);
+    if (data == NULL && len == 0) {
+        /* valid, but do nothing */
+        return 0;
+    }
+
+    /* add length for final */
+    AddLength(md5, len);
 
-        md5->buffLen += add;
-        data         += add;
-        len          -= add;
+    local = (byte*)md5->buffer;
+
+    /* process any remainder from previous operation */
+    if (md5->buffLen > 0) {
+        blocksLen = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen);
+        XMEMCPY(&local[md5->buffLen], data, blocksLen);
+
+        md5->buffLen += blocksLen;
+        data         += blocksLen;
+        len          -= blocksLen;
 
         if (md5->buffLen == WC_MD5_BLOCK_SIZE) {
         #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
             ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE);
         #endif
-            XTRANSFORM(md5, local);
-            AddLength(md5, WC_MD5_BLOCK_SIZE);
+
+            ret = XTRANSFORM(md5, (const byte*)local);
+            if (ret != 0)
+                return ret;
+
             md5->buffLen = 0;
         }
     }
+
+    /* process blocks */
+#ifdef XTRANSFORM_LEN
+    /* get number of blocks */
+    /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */
+    /* len (masked by 0xFFFFFFC0) returns block aligned length */
+    blocksLen = len & ~(WC_MD5_BLOCK_SIZE-1);
+    if (blocksLen > 0) {
+        /* Byte reversal performed in function if required. */
+        XTRANSFORM_LEN(md5, data, blocksLen);
+        data += blocksLen;
+        len  -= blocksLen;
+    }
+#else
+    while (len >= WC_MD5_BLOCK_SIZE) {
+        word32* local32 = md5->buffer;
+        /* optimization to avoid memcpy if data pointer is properly aligned */
+        /* Big Endian requires byte swap, so can't use data directly */
+    #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER)
+        if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) {
+            local32 = (word32*)data;
+        }
+        else
+    #endif
+        {
+            XMEMCPY(local32, data, WC_MD5_BLOCK_SIZE);
+        }
+
+        data += WC_MD5_BLOCK_SIZE;
+        len  -= WC_MD5_BLOCK_SIZE;
+
+    #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE);
+    #endif
+
+        ret = XTRANSFORM(md5, (const byte*)local32);
+    }
+#endif /* XTRANSFORM_LEN */
+
+    /* save remainder */
+    if (len > 0) {
+        XMEMCPY(local, data, len);
+        md5->buffLen = len;
+    }
+
     return ret;
 }
 
@@ -334,15 +443,14 @@
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
     if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
-    #if defined(HAVE_INTEL_QA)
+#if defined(HAVE_INTEL_QA)
         return IntelQaSymMd5(&md5->asyncDev, hash, NULL, WC_MD5_DIGEST_SIZE);
-    #endif
+#endif
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
     local = (byte*)md5->buffer;
 
-    AddLength(md5, md5->buffLen);  /* before adding pads */
     local[md5->buffLen++] = 0x80;  /* add 1 */
 
     /* pad with zeros */
@@ -350,9 +458,9 @@
         XMEMSET(&local[md5->buffLen], 0, WC_MD5_BLOCK_SIZE - md5->buffLen);
         md5->buffLen += WC_MD5_BLOCK_SIZE - md5->buffLen;
 
-    #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
         ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE);
-    #endif
+#endif
         XTRANSFORM(md5, local);
         md5->buffLen = 0;
     }
@@ -363,7 +471,7 @@
 #endif
 
     /* put lengths in bits */
-    md5->hiLen = (md5->loLen >> (8*sizeof(md5->loLen) - 3)) +
+    md5->hiLen = (md5->loLen >> (8 * sizeof(md5->loLen) - 3)) +
                  (md5->hiLen << 3);
     md5->loLen = md5->loLen << 3;
 
@@ -399,6 +507,10 @@
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
     wolfAsync_DevCtxFree(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+    wc_Md5Pic32Free(md5);
+#endif
 }
 
 int wc_Md5GetHash(wc_Md5* md5, byte* hash)
@@ -432,10 +544,30 @@
 #ifdef WOLFSSL_PIC32MZ_HASH
     ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
 #endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
 
     return ret;
 }
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Md5SetFlags(wc_Md5* md5, word32 flags)
+{
+    if (md5) {
+        md5->flags = flags;
+    }
+    return 0;
+}
+int wc_Md5GetFlags(wc_Md5* md5, word32* flags)
+{
+    if (md5 && flags) {
+        *flags = md5->flags;
+    }
+    return 0;
+}
+#endif
+
 #endif /* WOLFSSL_TI_HASH */
 #endif /* NO_MD5 */
 
--- a/wolfcrypt/src/memory.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/memory.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* memory.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -34,13 +34,62 @@
     #define WOLFSSL_MALLOC_CHECK
 #endif
 
+
+/*
+Possible memory options:
+ * NO_WOLFSSL_MEMORY:               Disables wolf memory callback support. When not defined settings.h defines USE_WOLFSSL_MEMORY.
+ * WOLFSSL_STATIC_MEMORY:           Turns on the use of static memory buffers and functions.
+                                        This allows for using static memory instead of dynamic.
+ * WOLFSSL_STATIC_ALIGN:            Define defaults to 16 to indicate static memory alignment.
+ * HAVE_IO_POOL:                    Enables use of static thread safe memory pool for input/output buffers.
+ * XMALLOC_OVERRIDE:                Allows override of the XMALLOC, XFREE and XREALLOC macros.
+ * XMALLOC_USER:                    Allows custom XMALLOC, XFREE and XREALLOC functions to be defined.
+ * WOLFSSL_NO_MALLOC:               Disables the fall-back case to use STDIO malloc/free when no callbacks are set.
+ * WOLFSSL_TRACK_MEMORY:            Enables memory tracking for total stats and list of allocated memory.
+ * WOLFSSL_DEBUG_MEMORY:            Enables extra function and line number args for memory callbacks.
+ * WOLFSSL_DEBUG_MEMORY_PRINT:      Enables printing of each malloc/free.
+ * WOLFSSL_MALLOC_CHECK:            Reports malloc or alignment failure using WOLFSSL_STATIC_ALIGN
+ * WOLFSSL_FORCE_MALLOC_FAIL_TEST:  Used for internal testing to induce random malloc failures.
+ * WOLFSSL_HEAP_TEST:               Used for internal testing of heap hint
+ */
+
+#ifdef WOLFSSL_ZEPHYR
+#undef realloc
+void *z_realloc(void *ptr, size_t size)
+{
+    if (ptr == NULL)
+        ptr = malloc(size);
+    else
+        ptr = realloc(ptr, size);
+
+    return ptr;
+}
+#define realloc z_realloc
+#endif
+
 #ifdef USE_WOLFSSL_MEMORY
 
 #include <wolfssl/wolfcrypt/memory.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/logging.h>
 
-#if defined(WOLFSSL_MALLOC_CHECK) || defined(WOLFSSL_TRACK_MEMORY_FULL)
+#if defined(WOLFSSL_DEBUG_MEMORY) && defined(WOLFSSL_DEBUG_MEMORY_PRINT)
+#include <stdio.h>
+#endif
+
+#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+    static int gMemFailCountSeed;
+    static int gMemFailCount;
+    void wolfSSL_SetMemFailCount(int memFailCount)
+    {
+        if (gMemFailCountSeed == 0) {
+            gMemFailCountSeed = memFailCount;
+            gMemFailCount = memFailCount;
+        }
+    }
+#endif
+#if defined(WOLFSSL_MALLOC_CHECK) || defined(WOLFSSL_TRACK_MEMORY_FULL) || \
+                                                     defined(WOLFSSL_MEMORY_LOG)
     #include <stdio.h>
 #endif
 
@@ -54,33 +103,9 @@
                           wolfSSL_Free_cb    ff,
                           wolfSSL_Realloc_cb rf)
 {
-    int res = 0;
-
-    if (mf)
-        malloc_function = mf;
-    else
-        res = BAD_FUNC_ARG;
-
-    if (ff)
-        free_function = ff;
-    else
-        res = BAD_FUNC_ARG;
-
-    if (rf)
-        realloc_function = rf;
-    else
-        res = BAD_FUNC_ARG;
-
-    return res;
-}
-
-int wolfSSL_ResetAllocators(void)
-{
-    /* allow nulls to be set for callbacks to restore defaults */
-    malloc_function = NULL;
-    free_function = NULL;
-    realloc_function = NULL;
-
+    malloc_function = mf;
+    free_function = ff;
+    realloc_function = rf;
     return 0;
 }
 
@@ -118,10 +143,37 @@
     #endif
     }
 
-    #ifdef WOLFSSL_MALLOC_CHECK
-        if (res == NULL)
-            puts("wolfSSL_malloc failed");
-    #endif
+#ifdef WOLFSSL_DEBUG_MEMORY
+#if defined(WOLFSSL_DEBUG_MEMORY_PRINT) && !defined(WOLFSSL_TRACK_MEMORY)
+    printf("Alloc: %p -> %u at %s:%d\n", res, (word32)size, func, line);
+#else
+    (void)func;
+    (void)line;
+#endif
+#endif
+
+#ifdef WOLFSSL_MALLOC_CHECK
+    if (res == NULL)
+        WOLFSSL_MSG("wolfSSL_malloc failed");
+#endif
+
+#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+    if (res && --gMemFailCount == 0) {
+        printf("\n---FORCED MEM FAIL TEST---\n");
+        if (free_function) {
+        #ifdef WOLFSSL_DEBUG_MEMORY
+            free_function(res, func, line);
+        #else
+            free_function(res);
+        #endif
+        }
+        else {
+            free(res); /* clear */
+        }
+        gMemFailCount = gMemFailCountSeed; /* reset */
+        return NULL;
+    }
+#endif
 
     return res;
 }
@@ -132,6 +184,15 @@
 void wolfSSL_Free(void *ptr)
 #endif
 {
+#ifdef WOLFSSL_DEBUG_MEMORY
+#if defined(WOLFSSL_DEBUG_MEMORY_PRINT) && !defined(WOLFSSL_TRACK_MEMORY)
+    printf("Free: %p at %s:%d\n", ptr, func, line);
+#else
+    (void)func;
+    (void)line;
+#endif
+#endif
+
     if (free_function) {
     #ifdef WOLFSSL_DEBUG_MEMORY
         free_function(ptr, func, line);
@@ -341,7 +402,7 @@
     printf("Allocated %d bytes for static memory @ %p\n", ava, pt);
 #endif
 
-    /* devide into chunks of memory and add them to available list */
+    /* divide into chunks of memory and add them to available list */
     while (ava >= (heap->sizeList[0] + padSz + memSz)) {
         int i;
         /* creating only IO buffers from memory passed in, max TLS is 16k */
@@ -608,6 +669,12 @@
                             mem->ava[i] = pt->next;
                             break;
                         }
+                    #ifdef WOLFSSL_DEBUG_STATIC_MEMORY
+                        else {
+                            printf("Size: %ld, Empty: %d\n", size,
+                                                              mem->sizeList[i]);
+                        }
+                    #endif
                     }
                 }
             }
@@ -650,7 +717,7 @@
 
     #ifdef WOLFSSL_MALLOC_CHECK
         if ((wolfssl_word)res % WOLFSSL_STATIC_ALIGN) {
-            WOLFSSL_MSG("ERROR memory is not alligned");
+            WOLFSSL_MSG("ERROR memory is not aligned");
             res = NULL;
         }
     #endif
@@ -803,6 +870,14 @@
         WOLFSSL_HEAP*      mem  = hint->memory;
         word32 padSz = -(int)sizeof(wc_Memory) & (WOLFSSL_STATIC_ALIGN - 1);
 
+        if (ptr == NULL) {
+        #ifdef WOLFSSL_DEBUG_MEMORY
+            return wolfSSL_Malloc(size, heap, type, func, line);
+        #else
+            return wolfSSL_Malloc(size, heap, type);
+        #endif
+        }
+
         if (wc_LockMutex(&(mem->memory_mutex)) != 0) {
             WOLFSSL_MSG("Bad memory_mutex lock");
             return NULL;
@@ -861,7 +936,7 @@
 
     #ifdef WOLFSSL_MALLOC_CHECK
         if ((wolfssl_word)res % WOLFSSL_STATIC_ALIGN) {
-            WOLFSSL_MSG("ERROR memory is not alligned");
+            WOLFSSL_MSG("ERROR memory is not aligned");
             res = NULL;
         }
     #endif
@@ -952,4 +1027,101 @@
 
 #endif /* HAVE_IO_POOL */
 
+#ifdef WOLFSSL_MEMORY_LOG
+void *xmalloc(size_t n, void* heap, int type, const char* func,
+              const char* file, unsigned int line)
+{
+    void*   p;
+    word32* p32;
+
+    if (malloc_function)
+        p32 = malloc_function(n + sizeof(word32) * 4);
+    else
+        p32 = malloc(n + sizeof(word32) * 4);
+
+    p32[0] = (word32)n;
+    p = (void*)(p32 + 4);
+
+    fprintf(stderr, "Alloc: %p -> %u (%d) at %s:%s:%u\n", p, (word32)n, type,
+                                                              func, file, line);
+
+    (void)heap;
+
+    return p;
+}
+void *xrealloc(void *p, size_t n, void* heap, int type, const char* func,
+               const char* file, unsigned int line)
+{
+    void*   newp = NULL;
+    word32* p32;
+    word32* oldp32 = NULL;
+    word32  oldLen;
+
+    if (p != NULL) {
+        oldp32 = (word32*)p;
+        oldp32 -= 4;
+        oldLen = oldp32[0];
+    }
+
+    if (realloc_function)
+        p32 = realloc_function(oldp32, n + sizeof(word32) * 4);
+    else
+        p32 = realloc(oldp32, n + sizeof(word32) * 4);
+
+    if (p32 != NULL) {
+        p32[0] = (word32)n;
+        newp = (void*)(p32 + 4);
+
+        fprintf(stderr, "Alloc: %p -> %u (%d) at %s:%s:%u\n", newp, (word32)n,
+                                                        type, func, file, line);
+        if (p != NULL) {
+            fprintf(stderr, "Free: %p -> %u (%d) at %s:%s:%u\n", p, oldLen,
+                                                        type, func, file, line);
+        }
+    }
+
+    (void)heap;
+
+    return newp;
+}
+void xfree(void *p, void* heap, int type, const char* func, const char* file,
+           unsigned int line)
+{
+    word32* p32 = (word32*)p;
+
+    if (p != NULL) {
+        p32 -= 4;
+
+        fprintf(stderr, "Free: %p -> %u (%d) at %s:%s:%u\n", p, p32[0], type,
+                                                              func, file, line);
+
+        if (free_function)
+            free_function(p32);
+        else
+            free(p32);
+    }
+
+    (void)heap;
+}
+#endif /* WOLFSSL_MEMORY_LOG */
+
+#ifdef WOLFSSL_STACK_LOG
+/* Note: this code only works with GCC using -finstrument-functions. */
+void __attribute__((no_instrument_function))
+     __cyg_profile_func_enter(void *func,  void *caller)
+{
+    register void* sp asm("sp");
+    fprintf(stderr, "ENTER: %016lx %p\n", (unsigned long)(size_t)func, sp);
+    (void)caller;
+}
+
+void __attribute__((no_instrument_function))
+     __cyg_profile_func_exit(void *func, void *caller)
+{
+    register void* sp asm("sp");
+    fprintf(stderr, "EXIT: %016lx %p\n", (unsigned long)(size_t)func, sp);
+    (void)caller;
+}
+#endif
+
 
--- a/wolfcrypt/src/misc.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/misc.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* misc.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,9 +39,9 @@
  */
 
 #ifdef NO_INLINE
-    #define STATIC
+    #define WC_STATIC
 #else
-    #define STATIC static
+    #define WC_STATIC static
 #endif
 
 /* Check for if compiling misc.c when not needed. */
@@ -66,25 +66,25 @@
      * i.e., _rotl and _rotr */
     #pragma intrinsic(_lrotl, _lrotr)
 
-    STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y)
+    WC_STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y)
     {
         return y ? _lrotl(x, y) : x;
     }
 
-    STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y)
+    WC_STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y)
     {
         return y ? _lrotr(x, y) : x;
     }
 
 #else /* generic */
 
-    STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y)
+    WC_STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y)
     {
         return (x << y) | (x >> (sizeof(y) * 8 - y));
     }
 
 
-    STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y)
+    WC_STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y)
     {
         return (x >> y) | (x << (sizeof(y) * 8 - y));
     }
@@ -92,7 +92,7 @@
 #endif
 
 
-STATIC WC_INLINE word32 ByteReverseWord32(word32 value)
+WC_STATIC WC_INLINE word32 ByteReverseWord32(word32 value)
 {
 #ifdef PPC_INTRINSICS
     /* PPC: load reverse indexed instruction */
@@ -116,7 +116,7 @@
 }
 
 
-STATIC WC_INLINE void ByteReverseWords(word32* out, const word32* in,
+WC_STATIC WC_INLINE void ByteReverseWords(word32* out, const word32* in,
                                     word32 byteCount)
 {
     word32 count = byteCount/(word32)sizeof(word32), i;
@@ -127,22 +127,22 @@
 }
 
 
-#ifdef WORD64_AVAILABLE
+#if defined(WORD64_AVAILABLE) && !defined(WOLFSSL_NO_WORD64_OPS)
 
 
-STATIC WC_INLINE word64 rotlFixed64(word64 x, word64 y)
+WC_STATIC WC_INLINE word64 rotlFixed64(word64 x, word64 y)
 {
     return (x << y) | (x >> (sizeof(y) * 8 - y));
 }
 
 
-STATIC WC_INLINE word64 rotrFixed64(word64 x, word64 y)
+WC_STATIC WC_INLINE word64 rotrFixed64(word64 x, word64 y)
 {
     return (x >> y) | (x << (sizeof(y) * 8 - y));
 }
 
 
-STATIC WC_INLINE word64 ByteReverseWord64(word64 value)
+WC_STATIC WC_INLINE word64 ByteReverseWord64(word64 value)
 {
 #if defined(WOLF_ALLOW_BUILTIN) && defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 3)
     return (word64)__builtin_bswap64(value);
@@ -159,7 +159,7 @@
 }
 
 
-STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
+WC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
                                       word32 byteCount)
 {
     word32 count = byteCount/(word32)sizeof(word64), i;
@@ -169,10 +169,10 @@
 
 }
 
-#endif /* WORD64_AVAILABLE */
+#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
 
-
-STATIC WC_INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n)
+#ifndef WOLFSSL_NO_XOR_OPS
+WC_STATIC WC_INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n)
 {
     word32 i;
 
@@ -180,7 +180,7 @@
 }
 
 
-STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
+WC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
 {
     if (((wolfssl_word)buf | (wolfssl_word)mask | count) % WOLFSSL_WORD_SIZE == 0)
         XorWords( (wolfssl_word*)buf,
@@ -193,10 +193,11 @@
         for (i = 0; i < count; i++) b[i] ^= m[i];
     }
 }
-
+#endif
 
+#ifndef WOLFSSL_NO_FORCE_ZERO
 /* Make sure compiler doesn't skip */
-STATIC WC_INLINE void ForceZero(const void* mem, word32 len)
+WC_STATIC WC_INLINE void ForceZero(const void* mem, word32 len)
 {
     volatile byte* z = (volatile byte*)mem;
 
@@ -217,10 +218,12 @@
 
     while (len--) *z++ = 0;
 }
+#endif
 
 
+#ifndef WOLFSSL_NO_CONST_CMP
 /* check all length bytes for equality, return 0 on success */
-STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length)
+WC_STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length)
 {
     int i;
     int compareSum = 0;
@@ -231,6 +234,7 @@
 
     return compareSum;
 }
+#endif
 
 
 #ifndef WOLFSSL_HAVE_MIN
@@ -238,7 +242,7 @@
     #if defined(HAVE_FIPS) && !defined(min) /* so ifdef check passes */
         #define min min
     #endif
-    STATIC WC_INLINE word32 min(word32 a, word32 b)
+    WC_STATIC WC_INLINE word32 min(word32 a, word32 b)
     {
         return a > b ? b : a;
     }
@@ -249,14 +253,15 @@
     #if defined(HAVE_FIPS) && !defined(max) /* so ifdef check passes */
         #define max max
     #endif
-    STATIC WC_INLINE word32 max(word32 a, word32 b)
+    WC_STATIC WC_INLINE word32 max(word32 a, word32 b)
     {
         return a > b ? a : b;
     }
 #endif /* !WOLFSSL_HAVE_MAX */
 
+#ifndef WOLFSSL_NO_INT_ENCODE
 /* converts a 32 bit integer to 24 bit */
-STATIC WC_INLINE void c32to24(word32 in, word24 out)
+WC_STATIC WC_INLINE void c32to24(word32 in, word24 out)
 {
     out[0] = (in >> 16) & 0xff;
     out[1] = (in >>  8) & 0xff;
@@ -264,97 +269,136 @@
 }
 
 /* convert 16 bit integer to opaque */
-STATIC WC_INLINE void c16toa(word16 wc_u16, byte* c)
+WC_STATIC WC_INLINE void c16toa(word16 wc_u16, byte* c)
 {
     c[0] = (wc_u16 >> 8) & 0xff;
     c[1] =  wc_u16 & 0xff;
 }
 
 /* convert 32 bit integer to opaque */
-STATIC WC_INLINE void c32toa(word32 wc_u32, byte* c)
+WC_STATIC WC_INLINE void c32toa(word32 wc_u32, byte* c)
 {
     c[0] = (wc_u32 >> 24) & 0xff;
     c[1] = (wc_u32 >> 16) & 0xff;
     c[2] = (wc_u32 >>  8) & 0xff;
     c[3] =  wc_u32 & 0xff;
 }
+#endif
 
+#ifndef WOLFSSL_NO_INT_DECODE
 /* convert a 24 bit integer into a 32 bit one */
-STATIC WC_INLINE void c24to32(const word24 wc_u24, word32* wc_u32)
+WC_STATIC WC_INLINE void c24to32(const word24 wc_u24, word32* wc_u32)
 {
-    *wc_u32 = (wc_u24[0] << 16) | (wc_u24[1] << 8) | wc_u24[2];
+    *wc_u32 = ((word32)wc_u24[0] << 16) | (wc_u24[1] << 8) | wc_u24[2];
 }
 
 
 /* convert opaque to 24 bit integer */
-STATIC WC_INLINE void ato24(const byte* c, word32* wc_u24)
+WC_STATIC WC_INLINE void ato24(const byte* c, word32* wc_u24)
 {
-    *wc_u24 = (c[0] << 16) | (c[1] << 8) | c[2];
+    *wc_u24 = ((word32)c[0] << 16) | (c[1] << 8) | c[2];
 }
 
 /* convert opaque to 16 bit integer */
-STATIC WC_INLINE void ato16(const byte* c, word16* wc_u16)
+WC_STATIC WC_INLINE void ato16(const byte* c, word16* wc_u16)
 {
     *wc_u16 = (word16) ((c[0] << 8) | (c[1]));
 }
 
 /* convert opaque to 32 bit integer */
-STATIC WC_INLINE void ato32(const byte* c, word32* wc_u32)
+WC_STATIC WC_INLINE void ato32(const byte* c, word32* wc_u32)
 {
-    *wc_u32 = ((word32)c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
+    *wc_u32 = ((word32)c[0] << 24) | ((word32)c[1] << 16) | (c[2] << 8) | c[3];
 }
 
 
-STATIC WC_INLINE word32 btoi(byte b)
+WC_STATIC WC_INLINE word32 btoi(byte b)
 {
     return (word32)(b - 0x30);
 }
+#endif
 
 
+#ifndef WOLFSSL_NO_CT_OPS
 /* Constant time - mask set when a > b. */
-STATIC WC_INLINE byte ctMaskGT(int a, int b)
+WC_STATIC WC_INLINE byte ctMaskGT(int a, int b)
 {
     return (((word32)a - b - 1) >> 31) - 1;
 }
 
 /* Constant time - mask set when a >= b. */
-STATIC WC_INLINE byte ctMaskGTE(int a, int b)
+WC_STATIC WC_INLINE byte ctMaskGTE(int a, int b)
+{
+    return (((word32)a - b    ) >> 31) - 1;
+}
+
+/* Constant time - mask set when a >= b. */
+WC_STATIC WC_INLINE int ctMaskIntGTE(int a, int b)
 {
     return (((word32)a - b    ) >> 31) - 1;
 }
 
 /* Constant time - mask set when a < b. */
-STATIC WC_INLINE byte ctMaskLT(int a, int b)
+WC_STATIC WC_INLINE byte ctMaskLT(int a, int b)
 {
     return (((word32)b - a - 1) >> 31) - 1;
 }
 
 /* Constant time - mask set when a <= b. */
-STATIC WC_INLINE byte ctMaskLTE(int a, int b)
+WC_STATIC WC_INLINE byte ctMaskLTE(int a, int b)
 {
     return (((word32)b - a    ) >> 31) - 1;
 }
 
 /* Constant time - mask set when a == b. */
-STATIC WC_INLINE byte ctMaskEq(int a, int b)
+WC_STATIC WC_INLINE byte ctMaskEq(int a, int b)
+{
+    return (~ctMaskGT(a, b)) & (~ctMaskLT(a, b));
+}
+
+WC_STATIC WC_INLINE word16 ctMask16GT(int a, int b)
 {
-    return 0 - (a == b);
+    return (((word32)a - b - 1) >> 31) - 1;
+}
+
+WC_STATIC WC_INLINE word16 ctMask16LT(int a, int b)
+{
+    return (((word32)a - b - 1) >> 31) - 1;
 }
 
-/* Constant time - select b when mask is set and a otherwise. */
-STATIC WC_INLINE byte ctMaskSel(byte m, byte a, byte b)
+WC_STATIC WC_INLINE word16 ctMask16Eq(int a, int b)
+{
+    return (~ctMask16GT(a, b)) & (~ctMask16LT(a, b));
+}
+
+/* Constant time - mask set when a != b. */
+WC_STATIC WC_INLINE byte ctMaskNotEq(int a, int b)
 {
-    return (a & ((byte)~(word32)m)) | (b & m);
+    return ctMaskGT(a, b) | ctMaskLT(a, b);
+}
+
+/* Constant time - select a when mask is set and b otherwise. */
+WC_STATIC WC_INLINE byte ctMaskSel(byte m, byte a, byte b)
+{
+    return (b & ((byte)~(word32)m)) | (a & m);
+}
+
+/* Constant time - select integer a when mask is set and integer b otherwise. */
+WC_STATIC WC_INLINE int ctMaskSelInt(byte m, int a, int b)
+{
+    return (b & (~(signed int)(signed char)m)) |
+           (a & ( (signed int)(signed char)m));
 }
 
 /* Constant time - bit set when a <= b. */
-STATIC WC_INLINE byte ctSetLTE(int a, int b)
+WC_STATIC WC_INLINE byte ctSetLTE(int a, int b)
 {
     return ((word32)a - b - 1) >> 31;
 }
+#endif
 
 
-#undef STATIC
+#undef WC_STATIC
 
 #endif /* !WOLFSSL_MISC_INCLUDED && !NO_INLINE */
 
--- a/wolfcrypt/src/pkcs12.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/pkcs12.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pkcs12.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,7 +26,7 @@
 
 #include <wolfssl/wolfcrypt/settings.h>
 
-#if !defined(NO_ASN) && !defined(NO_PWDBASED)
+#if !defined(NO_ASN) && !defined(NO_PWDBASED) && defined(HAVE_PKCS12)
 
 #include <wolfssl/wolfcrypt/asn.h>
 #include <wolfssl/wolfcrypt/asn_public.h>
@@ -60,8 +60,8 @@
     WC_PKCS12_DATA_OBJ_SZ = 11,
 };
 
-/* static const byte WC_PKCS12_ENCRYPTED_OID[] =
-                         {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x06}; */
+static const byte WC_PKCS12_ENCRYPTED_OID[] =
+                         {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x06};
 static const byte WC_PKCS12_DATA_OID[] =
                          {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x01};
 static const byte WC_PKCS12_CertBag_Type1_OID[] =
@@ -79,7 +79,7 @@
     struct ContentInfo* next;
     word32 encC;  /* encryptedContent */
     word32 dataSz;
-    int type; /* DATA / encrypted / envelpoed */
+    int type; /* DATA / encrypted / enveloped */
 } ContentInfo;
 
 
@@ -98,7 +98,7 @@
     word32 oid;
     word32 digestSz;
     word32 saltSz;
-    int itt; /* number of itterations when creating HMAC key */
+    int itt; /* number of iterations when creating HMAC key */
 } MacData;
 
 
@@ -198,6 +198,7 @@
     word32 localIdx = *idx;
     int ret;
     int size = 0;
+    byte tag;
 
     safe = (AuthenticatedSafe*)XMALLOC(sizeof(AuthenticatedSafe), pkcs12->heap,
                                        DYNAMIC_TYPE_PKCS);
@@ -215,7 +216,12 @@
 
     safe->oid = oid;
     /* check tag, length */
-    if (input[localIdx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+    if (GetASNTag(input, &localIdx, &tag, maxIdx) < 0) {
+        freeSafe(safe, pkcs12->heap);
+        return ASN_PARSE_E;
+    }
+
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
         WOLFSSL_MSG("Unexpected tag in PKCS12 DER");
         freeSafe(safe, pkcs12->heap);
         return ASN_PARSE_E;
@@ -233,7 +239,12 @@
         case WC_PKCS12_DATA:
             WOLFSSL_MSG("Found PKCS12 OBJECT: DATA");
             /* get octets holding contents */
-            if (input[localIdx++] != ASN_OCTET_STRING) {
+            if (GetASNTag(input, &localIdx, &tag, maxIdx) < 0) {
+                freeSafe(safe, pkcs12->heap);
+                return ASN_PARSE_E;
+            }
+
+            if (tag != ASN_OCTET_STRING) {
                 WOLFSSL_MSG("Wrong tag with content PKCS12 type DATA");
                 freeSafe(safe, pkcs12->heap);
                 return ASN_PARSE_E;
@@ -256,7 +267,7 @@
     *idx = localIdx;
 
     /* an instance of AuthenticatedSafe is created from
-     * ContentInfo's strung together in a SEQUENCE. Here we itterate
+     * ContentInfo's strung together in a SEQUENCE. Here we iterate
      * through the ContentInfo's and add them to our
      * AuthenticatedSafe struct */
     localIdx = 0;
@@ -347,6 +358,7 @@
     word32 curIdx = *idx;
     word32 oid = 0;
     int size, ret;
+    byte tag;
 
     /* Digest Info : Sequence
      *      DigestAlgorithmIdentifier
@@ -380,7 +392,12 @@
 #endif
 
     /* Digest: should be octet type holding digest */
-    if (mem[curIdx++] != ASN_OCTET_STRING) {
+    if (GetASNTag(mem, &curIdx, &tag, totalSz) < 0) {
+        XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+        return ASN_PARSE_E;
+    }
+
+    if (tag != ASN_OCTET_STRING) {
         WOLFSSL_MSG("Failed to get digest");
         XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
         return ASN_PARSE_E;
@@ -411,12 +428,16 @@
     curIdx += mac->digestSz;
 
     /* get salt, should be octet string */
-    if (mem[curIdx++] != ASN_OCTET_STRING) {
+    if (GetASNTag(mem, &curIdx, &tag, totalSz) < 0) {
+        ERROR_OUT(ASN_PARSE_E, exit_gsd);
+    }
+
+    if (tag != ASN_OCTET_STRING) {
         WOLFSSL_MSG("Failed to get salt");
         ERROR_OUT(ASN_PARSE_E, exit_gsd);
     }
 
-    if ((ret = GetLength(mem, &curIdx, &size, totalSz)) <= 0) {
+    if ((ret = GetLength(mem, &curIdx, &size, totalSz)) < 0) {
         goto exit_gsd;
     }
     mac->saltSz = size;
@@ -449,7 +470,7 @@
     }
 
 #ifdef WOLFSSL_DEBUG_PKCS12
-    printf("\t\tITTERATIONS : %d\n", mac->itt);
+    printf("\t\tITERATIONS : %d\n", mac->itt);
 #endif
 
     *idx = curIdx;
@@ -592,7 +613,7 @@
 
 
 /* Convert DER format stored in der buffer to WC_PKCS12 struct
- * Puts the raw contents of Content Info into structure without completly
+ * Puts the raw contents of Content Info into structure without completely
  * parsing or decoding.
  * der    : pointer to der buffer holding PKCS12
  * derSz  : size of der buffer
@@ -629,7 +650,7 @@
     printf("version = %d\n", version);
 #endif
 
-    if (version != 3) {
+    if (version != WC_PKCS12_VERSION_DEFAULT) {
         WOLFSSL_MSG("PKCS12 unsupported version!");
         return ASN_VERSION_E;
     }
@@ -670,6 +691,219 @@
     return ret;
 }
 
+/* Convert WC_PKCS12 struct to allocated DER buffer.
+ * pkcs12 : non-null pkcs12 pointer
+ * der    : pointer-pointer to der buffer. If NULL space will be
+ *          allocated for der, which must be freed by application.
+ * derSz  : size of buffer passed in when der is not NULL. NULL arg disables
+ *          sanity checks on buffer read/writes. Max size gets set to derSz when
+ *          the "der" buffer passed in is NULL and LENGTH_ONLY_E is returned.
+ * return size of DER on success and negative on failure.
+ */
+int wc_i2d_PKCS12(WC_PKCS12* pkcs12, byte** der, int* derSz)
+{
+    int ret = 0;
+    word32 seqSz = 0, verSz = 0, totalSz = 0, idx = 0, sdBufSz = 0;
+    byte *buf = NULL;
+    byte ver[MAX_VERSION_SZ];
+    byte seq[MAX_SEQ_SZ];
+    byte *sdBuf = NULL;
+
+    if ((pkcs12 == NULL) || (pkcs12->safe == NULL) ||
+            (der == NULL && derSz == NULL)) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* Create the MAC portion */
+    if (pkcs12->signData != NULL) {
+        MacData *mac = (MacData*)pkcs12->signData;
+        word32 innerSz = 0;
+        word32 outerSz = 0;
+
+        /* get exact size */
+        {
+            byte ASNLENGTH[MAX_LENGTH_SZ];
+            byte ASNSHORT[MAX_SHORT_SZ];
+            byte ASNALGO[MAX_ALGO_SZ];
+            word32 tmpIdx = 0;
+
+            /* algo id */
+            innerSz += SetAlgoID(mac->oid, ASNALGO, oidHashType, 0);
+
+            /* Octet string holding digest */
+            innerSz += ASN_TAG_SZ;
+            innerSz += SetLength(mac->digestSz, ASNLENGTH);
+            innerSz += mac->digestSz;
+
+            /* salt */
+            outerSz += ASN_TAG_SZ;
+            outerSz += SetLength(mac->saltSz, ASNLENGTH);
+            outerSz += mac->saltSz;
+
+            /* MAC iterations */
+            outerSz += SetShortInt(ASNSHORT, &tmpIdx, mac->itt, MAX_SHORT_SZ);
+
+            /* sequence of inner data */
+            outerSz += SetSequence(innerSz, seq);
+            outerSz += innerSz;
+        }
+        sdBufSz = outerSz + SetSequence(outerSz, seq);
+        sdBuf = (byte*)XMALLOC(sdBufSz, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+        if (sdBuf == NULL) {
+            ret = MEMORY_E;
+        }
+
+        if (ret == 0) {
+            idx += SetSequence(outerSz, sdBuf);
+            idx += SetSequence(innerSz, &sdBuf[idx]);
+
+            /* Set Algorithm Identifier */
+            {
+                word32 algoIdSz;
+
+                algoIdSz = SetAlgoID(mac->oid, &sdBuf[idx], oidHashType, 0);
+                if (algoIdSz == 0) {
+                    ret = ALGO_ID_E;
+                }
+                else {
+                    idx += algoIdSz;
+                }
+            }
+        }
+
+        if (ret == 0) {
+
+
+            /* Octet string holding digest */
+            idx += SetOctetString(mac->digestSz, &sdBuf[idx]);
+            XMEMCPY(&sdBuf[idx], mac->digest, mac->digestSz);
+            idx += mac->digestSz;
+
+            /* Set salt */
+            idx += SetOctetString(mac->saltSz, &sdBuf[idx]);
+            XMEMCPY(&sdBuf[idx], mac->salt, mac->saltSz);
+            idx += mac->saltSz;
+
+            /* MAC iterations */
+            {
+                int tmpSz;
+                word32 tmpIdx = 0;
+                byte ar[MAX_SHORT_SZ];
+                tmpSz = SetShortInt(ar, &tmpIdx, mac->itt, MAX_SHORT_SZ);
+                if (tmpSz < 0) {
+                    ret = tmpSz;
+                }
+                else {
+                    XMEMCPY(&sdBuf[idx], ar, tmpSz);
+                }
+            }
+            totalSz += sdBufSz;
+        }
+    }
+
+    /* Calculate size of der */
+    if (ret == 0) {
+        totalSz += pkcs12->safe->dataSz;
+
+        totalSz += 4; /* Octet string */
+
+        totalSz += 4; /* Element */
+
+        totalSz += 2 + sizeof(WC_PKCS12_DATA_OID);
+
+        totalSz += 4; /* Seq */
+
+        ret = SetMyVersion(WC_PKCS12_VERSION_DEFAULT, ver, FALSE);
+        if (ret > 0) {
+            verSz = (word32)ret;
+            ret   = 0; /* value larger than 0 is success */
+            totalSz += verSz;
+
+            seqSz = SetSequence(totalSz, seq);
+            totalSz += seqSz;
+
+            /* check if getting length only */
+            if (der == NULL && derSz != NULL) {
+                *derSz = totalSz;
+                XFREE(sdBuf, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+                return LENGTH_ONLY_E;
+            }
+
+            if (*der == NULL) {
+                /* Allocate if requested */
+                buf = (byte*)XMALLOC(totalSz, NULL, DYNAMIC_TYPE_PKCS);
+            }
+            else {
+                buf = *der;
+
+                /* sanity check on buffer size if passed in */
+                if (derSz != NULL) {
+                    if (*derSz < (int)totalSz) {
+                        WOLFSSL_MSG("Buffer passed in is too small");
+                        ret = BUFFER_E;
+                    }
+                }
+            }
+        }
+    }
+
+    if (buf == NULL) {
+        ret = MEMORY_E;
+    }
+
+    if (ret == 0) {
+        idx = 0;
+
+        /* Copy parts to buf */
+        XMEMCPY(&buf[idx], seq, seqSz);
+        idx += seqSz;
+
+        XMEMCPY(&buf[idx], ver, verSz);
+        idx += verSz;
+
+        seqSz = SetSequence(totalSz - sdBufSz - idx - 4, seq);
+        XMEMCPY(&buf[idx], seq, seqSz);
+        idx += seqSz;
+
+        /* OID */
+        idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), &buf[idx]);
+        XMEMCPY(&buf[idx], WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID));
+        idx += sizeof(WC_PKCS12_DATA_OID);
+
+        /* Element */
+        buf[idx++] = ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC;
+        idx += SetLength(totalSz - sdBufSz - idx - 3, &buf[idx]);
+
+        /* Octet string */
+        idx += SetOctetString(totalSz - sdBufSz - idx - 4, &buf[idx]);
+
+        XMEMCPY(&buf[idx], pkcs12->safe->data, pkcs12->safe->dataSz);
+        idx += pkcs12->safe->dataSz;
+
+        if (pkcs12->signData != NULL) {
+            XMEMCPY(&buf[idx], sdBuf, sdBufSz);
+        }
+
+        if (*der == NULL) {
+            /* Point to start of data allocated for DER */
+            *der = buf;
+        }
+        else {
+            /* Increment pointer to byte past DER */
+            *der = &buf[totalSz];
+        }
+
+        /* Return size of der */
+        ret = totalSz;
+    }
+
+    XFREE(sdBuf, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+    /* Allocation of buf was the last time ret could be a failure,
+     * so no need to free here */
+
+    return ret;
+}
+
 
 /* helper function to free WC_DerCertList */
 void wc_FreeCertList(WC_DerCertList* list, void* heap)
@@ -751,6 +985,7 @@
     byte* buf             = NULL;
     word32 i, oid;
     int ret, pswSz;
+    word32 algId;
 
     WOLFSSL_ENTER("wc_PKCS12_parse");
 
@@ -786,13 +1021,18 @@
         byte*  data;
         word32 idx = 0;
         int    size, totalSz;
+        byte   tag;
 
         if (ci->type == WC_PKCS12_ENCRYPTED_DATA) {
             int number;
 
             WOLFSSL_MSG("Decrypting PKCS12 Content Info Container");
             data = ci->data;
-            if (data[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+            if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+            }
+
+            if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
                 ERROR_OUT(ASN_PARSE_E, exit_pk12par);
             }
             if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) {
@@ -850,13 +1090,21 @@
         else { /* type DATA */
             WOLFSSL_MSG("Parsing PKCS12 DATA Content Info Container");
             data = ci->data;
-            if (data[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+            if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+            }
+
+            if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
                 ERROR_OUT(ASN_PARSE_E, exit_pk12par);
             }
             if ((ret = GetLength(data, &idx, &size, ci->dataSz)) <= 0) {
-                goto exit_pk12par;
+                ERROR_OUT(ASN_PARSE_E, exit_pk12par);
             }
-            if (data[idx++] != ASN_OCTET_STRING) {
+
+            if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+            }
+            if (tag != ASN_OCTET_STRING) {
                 ERROR_OUT(ASN_PARSE_E, exit_pk12par);
             }
             if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) {
@@ -886,11 +1134,15 @@
             switch (oid) {
                 case WC_PKCS12_KeyBag: /* 667 */
                     WOLFSSL_MSG("PKCS12 Key Bag found");
-                    if (data[idx++] !=
-                                     (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+                    if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                        ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+                    }
+                    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
                         ERROR_OUT(ASN_PARSE_E, exit_pk12par);
                     }
                     if ((ret = GetLength(data, &idx, &size, ci->dataSz)) <= 0) {
+                        if (ret == 0)
+                            ret = ASN_PARSE_E;
                         goto exit_pk12par;
                     }
                     if (*pkey == NULL) {
@@ -900,7 +1152,7 @@
                             ERROR_OUT(MEMORY_E, exit_pk12par);
                         }
                         XMEMCPY(*pkey, data + idx, size);
-                        *pkeySz =  ToTraditional(*pkey, size);
+                        *pkeySz =  ToTraditional_ex(*pkey, size, &algId);
                     }
 
                 #ifdef WOLFSSL_DEBUG_PKCS12
@@ -920,8 +1172,10 @@
                         byte* k;
 
                         WOLFSSL_MSG("PKCS12 Shrouded Key Bag found");
-                        if (data[idx++] !=
-                                     (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+                        if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                            ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+                        }
+                        if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
                             ERROR_OUT(ASN_PARSE_E, exit_pk12par);
                         }
                         if ((ret = GetLength(data, &idx, &size,
@@ -937,7 +1191,8 @@
                         XMEMCPY(k, data + idx, size);
 
                         /* overwrites input, be warned */
-                        if ((ret = ToTraditionalEnc(k, size, psw, pswSz)) < 0) {
+                        if ((ret = ToTraditionalEnc(k, size, psw, pswSz,
+                                                                 &algId)) < 0) {
                             XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY);
                             goto exit_pk12par;
                         }
@@ -981,8 +1236,10 @@
                 {
                     WC_DerCertList* node;
                     WOLFSSL_MSG("PKCS12 Cert Bag found");
-                    if (data[idx++] !=
-                                     (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+                    if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                        ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+                    }
+                    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
                         ERROR_OUT(ASN_PARSE_E, exit_pk12par);
                     }
                     if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) {
@@ -1003,15 +1260,23 @@
                         case WC_PKCS12_CertBag_Type1:  /* 675 */
                             /* type 1 */
                             WOLFSSL_MSG("PKCS12 cert bag type 1");
-                            if (data[idx++] !=
-                                     (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+                            if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                                ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+                            }
+                            if (tag != (ASN_CONSTRUCTED |
+                                        ASN_CONTEXT_SPECIFIC)) {
                                 ERROR_OUT(ASN_PARSE_E, exit_pk12par);
                             }
                             if ((ret = GetLength(data, &idx, &size, ci->dataSz))
                                                                          <= 0) {
+                                if (ret == 0)
+                                    ret = ASN_PARSE_E;
                                 goto exit_pk12par;
                             }
-                            if (data[idx++] != ASN_OCTET_STRING) {
+                            if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+                                ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+                            }
+                            if (tag != ASN_OCTET_STRING) {
                                 ERROR_OUT(ASN_PARSE_E, exit_pk12par);
 
                             }
@@ -1106,6 +1371,7 @@
         /* free list, not wanted */
         wc_FreeCertList(certList, pkcs12->heap);
     }
+    (void)tailList; /* not used */
 
     ret = 0; /* success */
 
@@ -1304,7 +1570,7 @@
     XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
     totalSz += length;
 
-    /* set begining sequence */
+    /* set beginning sequence */
     tmpSz = SetSequence(totalSz, out);
     XMEMMOVE(out + tmpSz, out + MAX_SEQ_SZ, totalSz);
 
@@ -1444,7 +1710,8 @@
     word32 length = 0;
     word32 tmpSz;
     word32 encSz;
-    word32 i;
+
+    byte seq[MAX_SEQ_SZ];
 
     WOLFSSL_MSG("encrypting PKCS12 content");
 
@@ -1458,30 +1725,7 @@
      * sequence
      * get object id */
     if (type == WC_PKCS12_ENCRYPTED_DATA) {
-        if (out == NULL) {
-            *outSz = 1 + MAX_LENGTH_SZ + MAX_SEQ_SZ + MAX_VERSION_SZ +
-                MAX_SEQ_SZ + WC_PKCS12_DATA_OBJ_SZ;
-            ret = EncryptContent(NULL, contentSz + MAX_SEQ_SZ, NULL, &encSz,
-                    pass, passSz, vPKCS, vAlgo, NULL, 0, iter, rng, heap);
-            if (ret != LENGTH_ONLY_E) {
-                return ret;
-            }
-
-            *outSz += encSz;
-            return LENGTH_ONLY_E;
-        }
-
-        if (*outSz < (1 + MAX_LENGTH_SZ + MAX_SEQ_SZ + MAX_VERSION_SZ)) {
-            return BUFFER_E;
-        }
-        out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); totalSz++;
-
-        /* save room for length and sequence */
-        idx += MAX_LENGTH_SZ;
-        idx += MAX_SEQ_SZ;
-
-        tmpSz = SetMyVersion(0, out + idx, 0);
-        idx += tmpSz; length += tmpSz;
+        word32 outerSz = 0;
 
         encSz = contentSz;
         if ((ret = EncryptContent(NULL, contentSz, NULL, &encSz,
@@ -1491,9 +1735,46 @@
             }
         }
 
-        if (*outSz < (idx + MAX_SEQ_SZ + WC_PKCS12_DATA_OBJ_SZ + encSz)) {
+        /* calculate size */
+        totalSz  = SetObjectId(sizeof(WC_PKCS12_ENCRYPTED_OID), seq);
+        totalSz += sizeof(WC_PKCS12_ENCRYPTED_OID);
+        totalSz += ASN_TAG_SZ;
+
+        length  = SetMyVersion(0, seq, 0);
+        tmpSz   = SetObjectId(sizeof(WC_PKCS12_DATA_OID), seq);
+        tmpSz  += sizeof(WC_PKCS12_DATA_OID);
+        tmpSz  += encSz;
+        length += SetSequence(tmpSz, seq) + tmpSz;
+        outerSz = SetSequence(length, seq) + length;
+
+        totalSz += SetLength(outerSz, seq) + outerSz;
+        if (out == NULL) {
+            *outSz = totalSz + SetSequence(totalSz, seq);
+            return LENGTH_ONLY_E;
+        }
+
+        if (*outSz < totalSz + SetSequence(totalSz, seq)) {
             return BUFFER_E;
         }
+
+        idx = 0;
+        idx += SetSequence(totalSz, out + idx);
+        idx += SetObjectId(sizeof(WC_PKCS12_ENCRYPTED_OID), out + idx);
+        if (idx + sizeof(WC_PKCS12_ENCRYPTED_OID) > *outSz){
+            return BUFFER_E;
+        }
+        XMEMCPY(out + idx, WC_PKCS12_ENCRYPTED_OID,
+                sizeof(WC_PKCS12_ENCRYPTED_OID));
+        idx += sizeof(WC_PKCS12_ENCRYPTED_OID);
+
+        if (idx + 1 > *outSz){
+            return BUFFER_E;
+        }
+        out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC);
+        idx += SetLength(outerSz, out + idx);
+
+        idx += SetSequence(length, out + idx);
+        idx += SetMyVersion(0, out + idx, 0);
         tmp = (byte*)XMALLOC(encSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
         if (tmp == NULL) {
             return MEMORY_E;
@@ -1517,34 +1798,23 @@
         }
         #endif
 
-        tmpSz = SetSequence(WC_PKCS12_DATA_OBJ_SZ + encSz, out + idx);
-        idx += tmpSz; length += tmpSz;
-
-        out[idx++] = ASN_OBJECT_ID; length++;
-        tmpSz = SetLength(sizeof(WC_PKCS12_DATA_OID), out + idx);
-        idx += tmpSz; length += tmpSz;
-        for (i = 0; i < sizeof(WC_PKCS12_DATA_OID); i++) {
-            out[idx++] = WC_PKCS12_DATA_OID[i]; length++;
+        idx += SetSequence(WC_PKCS12_DATA_OBJ_SZ + encSz, out + idx);
+        idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), out + idx);
+        if (idx + sizeof(WC_PKCS12_DATA_OID) > *outSz){
+            WOLFSSL_MSG("Buffer not large enough for DATA OID");
+            return BUFFER_E;
         }
+        XMEMCPY(out + idx, WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID));
+        idx += sizeof(WC_PKCS12_DATA_OID);
 
         /* copy over encrypted data */
+        if (idx + encSz > *outSz){
+            return BUFFER_E;
+        }
         XMEMCPY(out + idx, tmp, encSz);
         XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        idx += encSz; length += encSz;
-
-        /* rewind and place sequence */
-        idx -= (length + MAX_SEQ_SZ);
-        tmpSz = SetSequence(length, out + idx);
-        XMEMMOVE(out + idx + tmpSz, out + idx + MAX_SEQ_SZ, length);
-        length += tmpSz;
-
-        /* now place length */
-        idx -= MAX_LENGTH_SZ;
-        tmpSz = SetLength(length, out + idx);
-        XMEMMOVE(out + idx + tmpSz, out + idx + MAX_LENGTH_SZ, length);
-        totalSz += length + tmpSz;
-
-        return totalSz;
+        idx += encSz;
+        return idx;
     }
 
     /* DATA
@@ -1554,35 +1824,50 @@
      * length
      * sequence containing all bags */
     if (type == WC_PKCS12_DATA) {
+        /* calculate size */
+        totalSz = SetObjectId(sizeof(WC_PKCS12_DATA_OID), seq);
+        totalSz += sizeof(WC_PKCS12_DATA_OID);
+        totalSz += ASN_TAG_SZ;
+
+        length   = SetOctetString(contentSz, seq);
+        length  += contentSz;
+        totalSz += SetLength(length, seq);
+        totalSz += length;
+
         if (out == NULL) {
-            *outSz = 1 + MAX_LENGTH_SZ + 1 + MAX_LENGTH_SZ + contentSz;
+            *outSz = totalSz + SetSequence(totalSz, seq);
             return LENGTH_ONLY_E;
         }
 
-        if (*outSz < (1 + MAX_LENGTH_SZ + 1 + MAX_LENGTH_SZ + contentSz)) {
+        if (*outSz < (totalSz + SetSequence(totalSz, seq))) {
             return BUFFER_E;
         }
 
-        out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC);
-        totalSz++;
-
-        /* save room for length */
-        idx += MAX_LENGTH_SZ;
-
-        out[idx++] = ASN_OCTET_STRING; length++;
-        tmpSz = SetLength(contentSz, out + idx);
-        idx += tmpSz; length += tmpSz;
+        /* place data in output buffer */
+        idx  = 0;
+        idx += SetSequence(totalSz, out);
+        idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), out + idx);
+        if (idx + sizeof(WC_PKCS12_DATA_OID) > *outSz){
+            WOLFSSL_MSG("Buffer not large enough for DATA OID");
+            return BUFFER_E;
+        }
+        XMEMCPY(out + idx, WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID));
+        idx += sizeof(WC_PKCS12_DATA_OID);
 
-        /* sequence containing all bags */
-        XMEMCPY(out + idx, content, contentSz);
-        idx += contentSz; length += contentSz;
+        if (idx + 1 > *outSz){
+            return BUFFER_E;
+        }
+        out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC);
+        idx += SetLength(length, out + idx);
+        idx += SetOctetString(contentSz, out + idx);
 
-        idx -= (MAX_LENGTH_SZ + length);
-        tmpSz = SetLength(length, out + idx);
-        XMEMMOVE(out + idx + tmpSz, out + idx + MAX_LENGTH_SZ, length);
-        totalSz += length + tmpSz;
+        if (idx + contentSz > *outSz){
+            return BUFFER_E;
+        }
+        XMEMCPY(out + idx, content, contentSz);
+        idx += contentSz;
 
-        return totalSz;
+        return idx;
     }
 
     WOLFSSL_MSG("Unknown/Unsupported content type");
@@ -1590,6 +1875,339 @@
 }
 
 
+/* helper function to create the PKCS12 key content
+ * keyCiSz is output buffer size
+ * returns a pointer to be free'd by caller on success and NULL on failure */
+static byte* PKCS12_create_key_content(WC_PKCS12* pkcs12, int nidKey,
+        word32* keyCiSz, WC_RNG* rng, char* pass, word32 passSz,
+        byte* key, word32 keySz, int iter)
+{
+    byte*  keyBuf;
+    word32 keyBufSz = 0;
+    byte* keyCi = NULL;
+    word32 tmpSz;
+    int ret;
+    int algo;
+    void* heap;
+
+    heap = wc_PKCS12_GetHeap(pkcs12);
+    *keyCiSz = 0;
+    switch (nidKey) {
+        case PBE_SHA1_RC4_128:
+            algo = 1;
+            break;
+
+        case PBE_SHA1_DES:
+            algo = 2;
+            break;
+
+        case PBE_SHA1_DES3:
+            algo = 3;
+            break;
+
+        /* no encryption */
+        case -1:
+            algo = -1;
+            break;
+
+        default:
+            WOLFSSL_MSG("Unknown/Unsupported key encryption");
+            return NULL;
+    }
+
+    /* get max size for key bag */
+    ret = wc_PKCS12_create_key_bag(pkcs12, rng, NULL, &keyBufSz, key, keySz,
+            algo, iter, pass, passSz);
+    if (ret != LENGTH_ONLY_E && ret < 0) {
+        WOLFSSL_MSG("Error getting key bag size");
+        return NULL;
+    }
+
+    /* account for sequence around bag */
+    keyBufSz += MAX_SEQ_SZ;
+    keyBuf = (byte*)XMALLOC(keyBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (keyBuf == NULL) {
+        WOLFSSL_MSG("Memory error creating keyBuf buffer");
+        return NULL;
+    }
+
+    ret = wc_PKCS12_create_key_bag(pkcs12, rng, keyBuf + MAX_SEQ_SZ, &keyBufSz,
+            key, keySz, algo, iter, pass, passSz);
+    if (ret < 0) {
+        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        WOLFSSL_MSG("Error creating key bag");
+        return NULL;
+    }
+    keyBufSz = ret;
+
+    tmpSz = SetSequence(keyBufSz, keyBuf);
+    XMEMMOVE(keyBuf + tmpSz, keyBuf + MAX_SEQ_SZ, keyBufSz);
+    keyBufSz += tmpSz;
+
+    #ifdef WOLFSSL_DEBUG_PKCS12
+    {
+        word32 i;
+        printf("(size %u) Key Bag = ", keyBufSz);
+        for (i = 0; i < keyBufSz; i++)
+            printf("%02X", keyBuf[i]);
+        printf("\n");
+    }
+    #endif
+    ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, keyCiSz,
+            NULL, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA);
+    if (ret != LENGTH_ONLY_E) {
+        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        WOLFSSL_MSG("Error getting key encrypt content size");
+        return NULL;
+    }
+    keyCi = (byte*)XMALLOC(*keyCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (keyCi == NULL) {
+        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return NULL;
+    }
+
+    ret = wc_PKCS12_encrypt_content(pkcs12, rng, keyCi, keyCiSz,
+            keyBuf, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA);
+    XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret < 0 ) {
+        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        WOLFSSL_MSG("Error creating key encrypt content");
+        return NULL;
+    }
+    *keyCiSz = ret;
+
+    #ifdef WOLFSSL_DEBUG_PKCS12
+    {
+        word32 i;
+        printf("(size %u) Key Content Info = ", *keyCiSz);
+        for (i = 0; i < *keyCiSz; i++)
+            printf("%02X", keyCi[i]);
+        printf("\n");
+    }
+    #endif
+
+    (void)heap;
+    return keyCi;
+}
+
+
+/* helper function to create the PKCS12 certificate content
+ * certCiSz is output buffer size
+ * returns a pointer to be free'd by caller on success and NULL on failure */
+static byte* PKCS12_create_cert_content(WC_PKCS12* pkcs12, int nidCert,
+        WC_DerCertList* ca, byte* cert, word32 certSz, word32* certCiSz,
+        WC_RNG* rng, char* pass, word32 passSz, int iter)
+{
+    int algo;
+    int ret;
+    int type;
+
+    byte*  certBuf = NULL;
+    word32 certBufSz;
+    word32 idx;
+    word32 sz;
+    word32 tmpSz;
+
+    byte* certCi;
+    void* heap;
+
+    heap = wc_PKCS12_GetHeap(pkcs12);
+    switch (nidCert) {
+        case PBE_SHA1_RC4_128:
+            type = WC_PKCS12_ENCRYPTED_DATA;
+            algo = 1;
+            break;
+
+        case PBE_SHA1_DES:
+            type = WC_PKCS12_ENCRYPTED_DATA;
+            algo = 2;
+            break;
+
+        case PBE_SHA1_DES3:
+            type = WC_PKCS12_ENCRYPTED_DATA;
+            algo = 3;
+            break;
+
+        case -1:
+            type = WC_PKCS12_DATA;
+            algo = -1;
+            break;
+
+        default:
+            WOLFSSL_MSG("Unknown/Unsupported certificate encryption");
+            return NULL;
+    }
+
+    /* get max size of buffer needed */
+    ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &certBufSz, cert, certSz);
+    if (ret != LENGTH_ONLY_E) {
+        return NULL;
+    }
+
+    if (ca != NULL) {
+        WC_DerCertList* current = ca;
+        word32 curBufSz = 0;
+
+        /* get max buffer size */
+        while (current != NULL) {
+            ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &curBufSz,
+                    current->buffer, current->bufferSz);
+            if (ret != LENGTH_ONLY_E) {
+                return NULL;
+            }
+            certBufSz += curBufSz;
+            current    = current->next;
+        }
+    }
+
+    /* account for Sequence that holds all certificate bags */
+    certBufSz += MAX_SEQ_SZ;
+
+    /* completed getting max size, now create buffer and start adding bags */
+    certBuf = (byte*)XMALLOC(certBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (certBuf == NULL) {
+        WOLFSSL_MSG("Memory error creating certificate bags");
+        return NULL;
+    }
+
+    idx = 0;
+    idx += MAX_SEQ_SZ;
+
+    sz = certBufSz - idx;
+    if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz,
+            cert, certSz)) < 0) {
+        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return NULL;
+    }
+    idx += ret;
+
+    if (ca != NULL) {
+        WC_DerCertList* current = ca;
+
+        while (current != NULL) {
+            sz = certBufSz - idx;
+            if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz,
+               current->buffer, current->bufferSz)) < 0) {
+                XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                return NULL;
+            }
+            idx    += ret;
+            current = current->next;
+        }
+    }
+
+    /* set sequence and create encrypted content with all certificate bags */
+    tmpSz = SetSequence(idx - MAX_SEQ_SZ, certBuf);
+    XMEMMOVE(certBuf + tmpSz, certBuf + MAX_SEQ_SZ, idx - MAX_SEQ_SZ);
+    certBufSz = tmpSz + (idx - MAX_SEQ_SZ);
+
+    /* get buffer size needed for content info */
+    ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, certCiSz,
+            NULL, certBufSz, algo, pass, passSz, iter, type);
+    if (ret != LENGTH_ONLY_E) {
+        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        WOLFSSL_LEAVE("wc_PKCS12_create()", ret);
+        return NULL;
+    }
+    certCi = (byte*)XMALLOC(*certCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (certCi == NULL) {
+        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return NULL;
+    }
+
+    ret = wc_PKCS12_encrypt_content(pkcs12, rng, certCi, certCiSz,
+            certBuf, certBufSz, algo, pass, passSz, iter, type);
+    XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret < 0) {
+        WOLFSSL_LEAVE("wc_PKCS12_create()", ret);
+        return NULL;
+    }
+    *certCiSz = ret;
+
+    #ifdef WOLFSSL_DEBUG_PKCS12
+    {
+        word32 i;
+        printf("(size %u) Encrypted Certificate Content Info = ", *certCiSz);
+        for (i = 0; i < *certCiSz; i++)
+            printf("%02X", certCi[i]);
+        printf("\n");
+    }
+    #endif
+
+    (void)heap;
+    return certCi;
+}
+
+
+/* helper function to create the PKCS12 safe
+ * returns 0 on success */
+static int PKCS12_create_safe(WC_PKCS12* pkcs12, byte* certCi, word32 certCiSz,
+        byte* keyCi, word32 keyCiSz, WC_RNG* rng, char* pass, word32 passSz,
+        int iter)
+{
+    int length;
+    int ret;
+    byte seq[MAX_SEQ_SZ];
+    word32 safeDataSz;
+    word32 innerDataSz;
+    byte *innerData = NULL;
+    byte *safeData  = NULL;
+    word32 idx;
+
+    innerDataSz = certCiSz + keyCiSz+SetSequence(certCiSz + keyCiSz, seq);
+
+    /* add Content Info structs to safe, key first then cert */
+    ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, &safeDataSz,
+            NULL, innerDataSz, 0, NULL, 0, 0, WC_PKCS12_DATA);
+    if (ret != LENGTH_ONLY_E) {
+        return ret;
+    }
+
+    safeData = (byte*)XMALLOC(safeDataSz, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (safeData == NULL) {
+        WOLFSSL_MSG("Error malloc'ing safe data buffer");
+        return MEMORY_E;
+    }
+
+    /* create sequence of inner data */
+    innerData = (byte*)XMALLOC(innerDataSz, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+    if (innerData == NULL) {
+        WOLFSSL_MSG("Error malloc'ing inner data buffer");
+        XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return MEMORY_E;
+    }
+    idx  = 0;
+    idx += SetSequence(certCiSz + keyCiSz, innerData);
+    XMEMCPY(innerData + idx, certCi, certCiSz);
+    XMEMCPY(innerData + idx + certCiSz, keyCi, keyCiSz);
+
+    ret = wc_PKCS12_encrypt_content(pkcs12, rng, safeData, &safeDataSz,
+            innerData, innerDataSz, 0, pass, passSz, iter, WC_PKCS12_DATA);
+    XFREE(innerData, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+    if (ret < 0 ) {
+        WOLFSSL_MSG("Error setting data type for safe contents");
+        XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+    idx = 0;
+
+    ret = GetSequence(safeData, &idx, &length, safeDataSz);
+    if (ret < 0) {
+        WOLFSSL_MSG("Error getting first sequence of safe");
+        XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+
+    ret = GetSafeContent(pkcs12, safeData, &idx, safeDataSz);
+    XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret < 0) {
+        WOLFSSL_MSG("Unable to create safe contents");
+        return ret;
+    }
+    return 0;
+}
+
+
 /*
  * pass : password to use with encryption
  * passSz : size of the password buffer
@@ -1613,27 +2231,15 @@
         byte* key, word32 keySz, byte* cert, word32 certSz, WC_DerCertList* ca,
         int nidKey, int nidCert, int iter, int macIter, int keyType, void* heap)
 {
-    WC_PKCS12*         pkcs12;
-    AuthenticatedSafe* safe;
-    ContentInfo*       ci;
-    WC_RNG rng;
-    int algo;
+    WC_PKCS12* pkcs12;
+    WC_RNG     rng;
     int ret;
-    int type;
-    word32 idx;
-    word32 sz;
-    word32 tmpSz;
 
     byte*  certCi = NULL;
+    byte*  keyCi  = NULL;
     word32 certCiSz;
-    byte*  keyCi;
     word32 keyCiSz;
 
-    byte*  certBuf = NULL;
-    word32 certBufSz;
-    byte*  keyBuf;
-    word32 keyBufSz = 0;
-
     WOLFSSL_ENTER("wc_PKCS12_create()");
 
     if ((ret = wc_InitRng_ex(&rng, heap, INVALID_DEVID)) != 0) {
@@ -1658,320 +2264,35 @@
     }
 
     /**** add private key bag ****/
-    switch (nidKey) {
-        case PBE_SHA1_RC4_128:
-            algo = 1;
-            break;
-
-        case PBE_SHA1_DES:
-            algo = 2;
-            break;
-
-        case PBE_SHA1_DES3:
-            algo = 3;
-            break;
-
-        /* no encryption */
-        case -1:
-            algo = -1;
-            break;
-
-        default:
-            WOLFSSL_MSG("Unknown/Unsupported key encryption");
-            wc_PKCS12_free(pkcs12);
-            wc_FreeRng(&rng);
-            return NULL;
-    }
-
-    /* get max size for key bag */
-    ret = wc_PKCS12_create_key_bag(pkcs12, &rng, NULL, &keyBufSz, key, keySz,
-            algo, iter, pass, passSz);
-    if (ret != LENGTH_ONLY_E && ret < 0) {
+    keyCi = PKCS12_create_key_content(pkcs12, nidKey, &keyCiSz, &rng,
+            pass, passSz, key, keySz, iter);
+    if (keyCi == NULL) {
         wc_PKCS12_free(pkcs12);
         wc_FreeRng(&rng);
-        WOLFSSL_LEAVE("wc_PKCS12_create", ret);
-        return NULL;
-    }
-
-    /* account for sequence around bag */
-    keyBufSz += MAX_SEQ_SZ;
-
-    keyBuf = (byte*)XMALLOC(keyBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (keyBuf == NULL) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        WOLFSSL_LEAVE("wc_PKCS12_create", MEMORY_E);
         return NULL;
     }
 
-    ret = wc_PKCS12_create_key_bag(pkcs12, &rng, keyBuf + MAX_SEQ_SZ, &keyBufSz,
-            key, keySz, algo, iter, pass, passSz);
-    if (ret < 0) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        WOLFSSL_LEAVE("wc_PKCS12_create", ret);
-        return NULL;
-    }
-    keyBufSz = ret;
-
-    tmpSz = SetSequence(keyBufSz, keyBuf);
-    XMEMMOVE(keyBuf + tmpSz, keyBuf + MAX_SEQ_SZ, keyBufSz);
-    keyBufSz += tmpSz;
-
-    ret = wc_PKCS12_encrypt_content(pkcs12, &rng, NULL, &keyCiSz,
-            NULL, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA);
-    if (ret != LENGTH_ONLY_E) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        WOLFSSL_LEAVE("wc_PKCS12_create", ret);
-        return NULL;
-    }
-    keyCi = (byte*)XMALLOC(keyCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (keyCi == NULL) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        return NULL;
-    }
-
-    ret = wc_PKCS12_encrypt_content(pkcs12, &rng, keyCi, &keyCiSz,
-            keyBuf, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA);
-    if (ret < 0 ) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        WOLFSSL_LEAVE("wc_PKCS12_create", ret);
-        return NULL;
-    }
-    keyCiSz = ret;
-    XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-
-    #ifdef WOLFSSL_DEBUG_PKCS12
-    {
-        byte* p;
-        for (printf("(size %u) Key Content Info = ", keyCiSz), p = (byte*)keyCi;
-            p < (byte*)keyCi + keyCiSz;
-            printf("%02X", *p), p++);
-        printf("\n");
-    }
-    #endif
-
-
     /**** add main certificate bag and extras ****/
-    switch (nidCert) {
-        case PBE_SHA1_RC4_128:
-            type = WC_PKCS12_ENCRYPTED_DATA;
-            algo = 1;
-            break;
-
-        case PBE_SHA1_DES:
-            type = WC_PKCS12_ENCRYPTED_DATA;
-            algo = 2;
-            break;
-
-        case PBE_SHA1_DES3:
-            type = WC_PKCS12_ENCRYPTED_DATA;
-            algo = 3;
-            break;
-
-        case -1:
-            type = WC_PKCS12_DATA;
-            algo = -1;
-            break;
-
-        default:
-            WOLFSSL_MSG("Unknown/Unsupported certificate encryption");
-            XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-            wc_PKCS12_free(pkcs12);
-            wc_FreeRng(&rng);
-            return NULL;
-    }
-
-    /* get max size of buffer needed */
-    ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &certBufSz, cert, certSz);
-    if (ret != LENGTH_ONLY_E) {
+    certCi = PKCS12_create_cert_content(pkcs12, nidCert, ca, cert, certSz,
+            &certCiSz, &rng, pass, passSz, iter);
+    if (certCi == NULL) {
         XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
         wc_PKCS12_free(pkcs12);
         wc_FreeRng(&rng);
         return NULL;
     }
 
-    if (ca != NULL) {
-        WC_DerCertList* current = ca;
-        word32 curBufSz = 0;
-
-        /* get max buffer size */
-        while (current != NULL) {
-            ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &curBufSz,
-                    current->buffer, current->bufferSz);
-            if (ret != LENGTH_ONLY_E) {
-                XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                wc_PKCS12_free(pkcs12);
-                wc_FreeRng(&rng);
-                return NULL;
-            }
-            certBufSz += curBufSz;
-            current    = current->next;
-        }
-    }
-
-    /* account for Sequence that holds all certificate bags */
-    certBufSz += MAX_SEQ_SZ;
-
-    /* completed getting max size, now create buffer and start adding bags */
-    certBuf = (byte*)XMALLOC(certBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (certBuf == NULL) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        WOLFSSL_MSG("Memory error creating certificate bags");
-        return NULL;
-    }
-
-    idx = 0;
-    idx += MAX_SEQ_SZ;
-
-    sz = certBufSz - idx;
-    if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz,
-            cert, certSz)) < 0) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    /**** create safe and Content Info ****/
+    ret = PKCS12_create_safe(pkcs12, certCi, certCiSz, keyCi, keyCiSz, &rng,
+            pass, passSz, iter);
+    XFREE(keyCi,  heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret != 0) {
+        WOLFSSL_MSG("Unable to create PKCS12 safe");
         wc_PKCS12_free(pkcs12);
         wc_FreeRng(&rng);
         return NULL;
     }
-    idx += ret;
-
-    if (ca != NULL) {
-        WC_DerCertList* current = ca;
-
-        while (current != NULL) {
-            sz = certBufSz - idx;
-            if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz,
-               current->buffer, current->bufferSz)) < 0) {
-                XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-                wc_PKCS12_free(pkcs12);
-                wc_FreeRng(&rng);
-                return NULL;
-            }
-            idx    += ret;
-            current = current->next;
-        }
-    }
-
-    /* set sequence and create encrypted content with all certificate bags */
-    tmpSz = SetSequence(idx - MAX_SEQ_SZ, certBuf);
-    XMEMMOVE(certBuf + tmpSz, certBuf + MAX_SEQ_SZ, idx - MAX_SEQ_SZ);
-    certBufSz = tmpSz + (idx - MAX_SEQ_SZ);
-
-    /* get buffer size needed for content info */
-    ret = wc_PKCS12_encrypt_content(pkcs12, &rng, NULL, &certCiSz,
-            NULL, certBufSz, algo, pass, passSz, iter, type);
-    if (ret != LENGTH_ONLY_E) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        WOLFSSL_LEAVE("wc_PKCS12_create()", ret);
-        return NULL;
-    }
-    certCi = (byte*)XMALLOC(certCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    if (certCi == NULL) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        return NULL;
-    }
-
-    ret = wc_PKCS12_encrypt_content(pkcs12, &rng, certCi, &certCiSz,
-            certBuf, certBufSz, algo, pass, passSz, iter, type);
-    if (ret < 0) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        WOLFSSL_LEAVE("wc_PKCS12_create()", ret);
-        return NULL;
-    }
-    certCiSz = ret;
-    XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
-
-    #ifdef WOLFSSL_DEBUG_PKCS12
-    {
-        byte* p;
-        for (printf("(size %u) Encrypted Certificate Content Info = ",certCiSz),
-                p = (byte*)certCi;
-            p < (byte*)certCi + certCiSz;
-            printf("%02X", *p), p++);
-        printf("\n");
-    }
-    #endif
-
-    /**** create safe and and Content Info ****/
-    safe = (AuthenticatedSafe*)XMALLOC(sizeof(AuthenticatedSafe), heap,
-            DYNAMIC_TYPE_PKCS);
-    if (safe == NULL) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        return NULL;
-    }
-    pkcs12->safe = safe; /* set so all of safe is free'd with wc_PKCS12_free */
-    XMEMSET(safe, 0, sizeof(AuthenticatedSafe));
-
-    safe->dataSz = certCiSz + keyCiSz;
-    safe->data   = (byte*)XMALLOC(safe->dataSz, heap, DYNAMIC_TYPE_PKCS);
-    if (safe->data == NULL) {
-        XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        return NULL;
-    }
-    XMEMCPY(safe->data, certCi, certCiSz);
-    XMEMCPY(safe->data + certCiSz, keyCi, keyCiSz);
-    XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(keyCi,  heap, DYNAMIC_TYPE_TMP_BUFFER);
-
-    safe->numCI = 2;
-
-    /* add Content Info structs to safe, key first then cert */
-    ci = (ContentInfo*)XMALLOC(sizeof(ContentInfo), heap, DYNAMIC_TYPE_PKCS);
-    if (ci == NULL) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        return NULL;
-    }
-    XMEMSET(ci, 0, sizeof(ContentInfo));
-    safe->CI = ci;
-    ci->data = safe->data + certCiSz;
-    ci->dataSz = keyCiSz;
-    ci->type = WC_PKCS12_DATA;
-
-    ci = (ContentInfo*)XMALLOC(sizeof(ContentInfo), heap, DYNAMIC_TYPE_PKCS);
-    if (ci == NULL) {
-        wc_PKCS12_free(pkcs12);
-        wc_FreeRng(&rng);
-        return NULL;
-    }
-    XMEMSET(ci, 0, sizeof(ContentInfo));
-    ci->next = safe->CI;
-    safe->CI = ci;
-    ci->data = safe->data;
-    ci->dataSz = certCiSz;
-    if (nidCert < 0) {
-        ci->type = WC_PKCS12_DATA;
-    }
-    else {
-        ci->type = WC_PKCS12_ENCRYPTED_DATA;
-    }
 
     /* create MAC */
     if (macIter > 0) {
@@ -1982,6 +2303,7 @@
         if (mac == NULL) {
             wc_PKCS12_free(pkcs12);
             wc_FreeRng(&rng);
+            WOLFSSL_MSG("Error malloc'ing mac data buffer");
             return NULL;
         }
         XMEMSET(mac, 0, sizeof(MacData));
@@ -2011,6 +2333,7 @@
         if (mac->salt == NULL) {
             wc_PKCS12_free(pkcs12);
             wc_FreeRng(&rng);
+            WOLFSSL_MSG("Error malloc'ing salt data buffer");
             return NULL;
         }
 
@@ -2020,17 +2343,21 @@
             wc_FreeRng(&rng);
             return NULL;
         }
-        ret = wc_PKCS12_create_mac(pkcs12, safe->data, safe->dataSz,
-                         (const byte*)pass, passSz, digest, WC_MAX_DIGEST_SIZE);
+        ret = wc_PKCS12_create_mac(pkcs12, pkcs12->safe->data,
+                pkcs12->safe->dataSz, (const byte*)pass, passSz, digest,
+                WC_MAX_DIGEST_SIZE);
         if (ret < 0) {
             wc_PKCS12_free(pkcs12);
             wc_FreeRng(&rng);
+            WOLFSSL_MSG("Error creating mac");
+            WOLFSSL_LEAVE("wc_PKCS12_create", ret);
             return NULL;
         }
 
         mac->digestSz = ret;
         mac->digest = (byte*)XMALLOC(ret, heap, DYNAMIC_TYPE_PKCS);
         if (mac->digest == NULL) {
+            WOLFSSL_MSG("Error malloc'ing mac digest buffer");
             wc_PKCS12_free(pkcs12);
             wc_FreeRng(&rng);
             return NULL;
@@ -2073,5 +2400,5 @@
 
 #undef ERROR_OUT
 
-#endif /* !NO_ASN && !NO_PWDBASED */
+#endif /* !NO_ASN && !NO_PWDBASED && HAVE_PKCS12 */
 
--- a/wolfcrypt/src/pkcs7.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/pkcs7.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pkcs7.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -38,6 +38,12 @@
 #ifdef HAVE_ECC
     #include <wolfssl/wolfcrypt/ecc.h>
 #endif
+#ifdef HAVE_LIBZ
+    #include <wolfssl/wolfcrypt/compress.h>
+#endif
+#ifndef NO_PWDBASED
+    #include <wolfssl/wolfcrypt/pwdbased.h>
+#endif
 #ifdef NO_INLINE
     #include <wolfssl/wolfcrypt/misc.h>
 #else
@@ -45,20 +51,454 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
-
 /* direction for processing, encoding or decoding */
 typedef enum {
     WC_PKCS7_ENCODE,
     WC_PKCS7_DECODE
 } pkcs7Direction;
 
+#define NO_USER_CHECK 0
+
+/* holds information about the signers */
+struct PKCS7SignerInfo {
+    int version;
+    byte  *sid;
+    word32 sidSz;
+};
+
+
+#ifndef NO_PKCS7_STREAM
+
+#define MAX_PKCS7_STREAM_BUFFER 256
+struct PKCS7State {
+    byte* tmpCert;
+    byte* bufferPt;
+    byte* key;
+    byte* nonce;    /* stored nonce */
+    byte* aad;      /* additional data for AEAD algos */
+    byte* tag;      /* tag data for AEAD algos */
+    byte* content;
+    byte* buffer;   /* main internal read buffer */
+
+    /* stack variables to store for when returning */
+    word32 varOne;
+    int    varTwo;
+    int    varThree;
+
+    word32 vers;
+    word32 idx;      /* index read into current input buffer */
+    word32 maxLen;   /* sanity cap on maximum amount of data to allow
+                      * needed for GetSequence and other calls */
+    word32 length;   /* amount of data stored */
+    word32 bufferSz; /* size of internal buffer */
+    word32 expected; /* next amount of data expected, if needed */
+    word32 totalRd;  /* total amount of bytes read */
+    word32 nonceSz;  /* size of nonce stored */
+    word32 aadSz;    /* size of additional AEAD data */
+    word32 tagSz;    /* size of tag for AEAD */
+    word32 contentSz;
+    byte tmpIv[MAX_CONTENT_IV_SIZE]; /* store IV if needed */
+#ifdef WC_PKCS7_STREAM_DEBUG
+    word32 peakUsed; /* most bytes used for struct at any one time */
+    word32 peakRead; /* most bytes used by read buffer */
+#endif
+    byte   multi:1;  /* flag for if content is in multiple parts */
+    byte   flagOne:1;
+    byte   detached:1; /* flag to indicate detached signature is present */
+};
+
+
+enum PKCS7_MaxLen {
+    PKCS7_DEFAULT_PEEK = 0,
+    PKCS7_SEQ_PEEK
+};
+
+/* creates a PKCS7State structure and returns 0 on success */
+static int wc_PKCS7_CreateStream(PKCS7* pkcs7)
+{
+    WOLFSSL_MSG("creating PKCS7 stream structure");
+    pkcs7->stream = (PKCS7State*)XMALLOC(sizeof(PKCS7State), pkcs7->heap,
+        DYNAMIC_TYPE_PKCS7);
+    if (pkcs7->stream == NULL) {
+        return MEMORY_E;
+    }
+    XMEMSET(pkcs7->stream, 0, sizeof(PKCS7State));
+#ifdef WC_PKCS7_STREAM_DEBUG
+    printf("\nCreating new PKCS#7 stream %p\n", pkcs7->stream);
+#endif
+    return 0;
+}
+
+
+static void wc_PKCS7_ResetStream(PKCS7* pkcs7)
+{
+    if (pkcs7 != NULL && pkcs7->stream != NULL) {
+#ifdef WC_PKCS7_STREAM_DEBUG
+        /* collect final data point in case more was read right before reset */
+        if (pkcs7->stream->length > pkcs7->stream->peakRead) {
+            pkcs7->stream->peakRead = pkcs7->stream->length;
+        }
+        if (pkcs7->stream->bufferSz + pkcs7->stream->aadSz +
+                pkcs7->stream->nonceSz + pkcs7->stream->tagSz >
+                pkcs7->stream->peakUsed) {
+            pkcs7->stream->peakUsed = pkcs7->stream->bufferSz +
+                pkcs7->stream->aadSz + pkcs7->stream->nonceSz +
+                pkcs7->stream->tagSz;
+        }
+
+        /* print out debugging statistics */
+        if (pkcs7->stream->peakUsed > 0 || pkcs7->stream->peakRead > 0) {
+            printf("PKCS#7 STREAM:\n\tPeak heap used by struct = %d"
+                                 "\n\tPeak read buffer bytes   = %d"
+                                 "\n\tTotal bytes read         = %d"
+                                 "\n",
+                   pkcs7->stream->peakUsed, pkcs7->stream->peakRead,
+                   pkcs7->stream->totalRd);
+        }
+        printf("PKCS#7 stream reset : Address [%p]\n", pkcs7->stream);
+    #endif
+
+        /* free any buffers that may be allocated */
+        XFREE(pkcs7->stream->aad, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(pkcs7->stream->tag, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(pkcs7->stream->nonce, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(pkcs7->stream->buffer, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(pkcs7->stream->key, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->stream->aad    = NULL;
+        pkcs7->stream->tag    = NULL;
+        pkcs7->stream->nonce  = NULL;
+        pkcs7->stream->buffer = NULL;
+        pkcs7->stream->key    = NULL;
+
+        /* reset values, note that content and tmpCert are saved */
+        pkcs7->stream->maxLen   = 0;
+        pkcs7->stream->length   = 0;
+        pkcs7->stream->idx      = 0;
+        pkcs7->stream->expected = 0;
+        pkcs7->stream->totalRd  = 0;
+        pkcs7->stream->bufferSz = 0;
+
+        pkcs7->stream->multi    = 0;
+        pkcs7->stream->flagOne  = 0;
+        pkcs7->stream->detached = 0;
+        pkcs7->stream->varOne   = 0;
+        pkcs7->stream->varTwo   = 0;
+        pkcs7->stream->varThree = 0;
+    }
+}
+
+
+static void wc_PKCS7_FreeStream(PKCS7* pkcs7)
+{
+    if (pkcs7 != NULL && pkcs7->stream != NULL) {
+        wc_PKCS7_ResetStream(pkcs7);
+
+        XFREE(pkcs7->stream->content, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(pkcs7->stream->tmpCert, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->stream->content = NULL;
+        pkcs7->stream->tmpCert = NULL;
+
+        XFREE(pkcs7->stream, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->stream = NULL;
+    }
+}
+
+
+/* used to increase the max size for internal buffer
+ * returns 0 on success  */
+static int wc_PKCS7_GrowStream(PKCS7* pkcs7, word32 newSz)
+{
+    byte* pt;
+
+    pt = (byte*)XMALLOC(newSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (pt == NULL) {
+        return MEMORY_E;
+    }
+    XMEMCPY(pt, pkcs7->stream->buffer, pkcs7->stream->bufferSz);
+
+#ifdef WC_PKCS7_STREAM_DEBUG
+    printf("PKCS7 increasing internal stream buffer %d -> %d\n",
+            pkcs7->stream->bufferSz, newSz);
+#endif
+    pkcs7->stream->bufferSz = newSz;
+    XFREE(pkcs7->stream->buffer, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    pkcs7->stream->buffer = pt;
+    return 0;
+}
+
+
+/* pt gets set to the buffer that is holding data in the case that stream struct
+ *    is used.
+ *
+ * Sets idx to be the current offset into "pt" buffer
+ * returns 0 on success
+ */
+static int wc_PKCS7_AddDataToStream(PKCS7* pkcs7, byte* in, word32 inSz,
+        word32 expected, byte** pt, word32* idx)
+{
+    word32 rdSz = pkcs7->stream->idx;
+
+    /* If the input size minus current index into input buffer is greater than
+     * the expected size then use the input buffer. If data is already stored
+     * in stream buffer or if there is not enough input data available then use
+     * the stream buffer. */
+    if (inSz - rdSz >= expected && pkcs7->stream->length == 0) {
+        /* storing input buffer is not needed */
+        *pt  = in; /* reset in case previously used internal buffer */
+        *idx = rdSz;
+        return 0;
+    }
+
+    /* is there enough stored in buffer already? */
+    if (pkcs7->stream->length >= expected) {
+        *idx = 0; /* start reading from beginning of stream buffer */
+        *pt  = pkcs7->stream->buffer;
+        return 0;
+    }
+
+    /* check if all data has been read from input */
+    if (rdSz >= inSz) {
+        /* no more input to read, reset input index and request more data */
+        pkcs7->stream->idx = 0;
+        return WC_PKCS7_WANT_READ_E;
+    }
+
+    /* try to store input data into stream buffer */
+    if (inSz - rdSz > 0 && pkcs7->stream->length < expected) {
+        int len = min(inSz - rdSz, expected - pkcs7->stream->length);
+
+        /* sanity check that the input buffer is not internal buffer */
+        if (in == pkcs7->stream->buffer) {
+            return WC_PKCS7_WANT_READ_E;
+        }
+
+        /* check if internal buffer size needs to be increased */
+        if (len + pkcs7->stream->length > pkcs7->stream->bufferSz) {
+            int ret = wc_PKCS7_GrowStream(pkcs7, expected);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+        XMEMCPY(pkcs7->stream->buffer + pkcs7->stream->length, in + rdSz, len);
+        pkcs7->stream->length  += len;
+        pkcs7->stream->idx     += len;
+        pkcs7->stream->totalRd += len;
+    }
+
+#ifdef WC_PKCS7_STREAM_DEBUG
+    /* collects memory usage for debugging */
+    if (pkcs7->stream->length > pkcs7->stream->peakRead) {
+        pkcs7->stream->peakRead = pkcs7->stream->length;
+    }
+    if (pkcs7->stream->bufferSz + pkcs7->stream->aadSz + pkcs7->stream->nonceSz +
+        pkcs7->stream->tagSz > pkcs7->stream->peakUsed) {
+        pkcs7->stream->peakUsed = pkcs7->stream->bufferSz +
+           pkcs7->stream->aadSz + pkcs7->stream->nonceSz + pkcs7->stream->tagSz;
+    }
+#endif
+
+    /* if not enough data was read in then request more */
+    if (pkcs7->stream->length < expected) {
+        pkcs7->stream->idx = 0;
+        return WC_PKCS7_WANT_READ_E;
+    }
+
+    /* adjust pointer to read from stored buffer */
+    *idx = 0;
+    *pt  = pkcs7->stream->buffer;
+    return 0;
+}
+
+
+/* Does two things
+ *  1) Tries to get the length from current buffer and set it as max length
+ *  2) Retrieves the set max length
+ *
+ * if no flag value is set then the stored max length is returned.
+ * returns length found on success and defSz if no stored data is found
+ */
+static long wc_PKCS7_GetMaxStream(PKCS7* pkcs7, byte flag, byte* in,
+        word32 defSz)
+{
+    /* check there is a buffer to read from */
+    if (pkcs7) {
+        int     length = 0, ret;
+        word32  idx = 0, maxIdx;
+        byte*   pt;
+
+        if (flag != PKCS7_DEFAULT_PEEK) {
+            if (pkcs7->stream->length > 0) {
+                length = pkcs7->stream->length;
+                pt     = pkcs7->stream->buffer;
+            }
+            else {
+                length = defSz;
+                pt     = in;
+            }
+            maxIdx = (word32)length;
+
+            if (length < MAX_SEQ_SZ) {
+                WOLFSSL_MSG("PKCS7 Error not enough data for SEQ peek\n");
+                return 0;
+            }
+            if (flag == PKCS7_SEQ_PEEK) {
+                if ((ret = GetSequence_ex(pt, &idx, &length, maxIdx,
+                                NO_USER_CHECK)) < 0) {
+                    return ret;
+                }
+
+            #ifdef ASN_BER_TO_DER
+                if (length == 0 && ret == 0) {
+                    idx = 0;
+                    if ((ret = wc_BerToDer(pt, defSz, NULL,
+                                    (word32*)&length)) != LENGTH_ONLY_E) {
+                        return ret;
+                    }
+                }
+            #endif /* ASN_BER_TO_DER */
+                pkcs7->stream->maxLen = length + idx;
+            }
+        }
+
+        if (pkcs7->stream->maxLen == 0) {
+            pkcs7->stream->maxLen = defSz;
+        }
+
+        return pkcs7->stream->maxLen;
+    }
+
+    return defSz;
+}
+
+
+/* setter function for stored variables */
+static void wc_PKCS7_StreamStoreVar(PKCS7* pkcs7, word32 var1, int var2,
+        int var3)
+{
+    if (pkcs7 != NULL && pkcs7->stream != NULL) {
+        pkcs7->stream->varOne   = var1;
+        pkcs7->stream->varTwo   = var2;
+        pkcs7->stream->varThree = var3;
+    }
+}
+
+/* getter function for stored variables */
+static void wc_PKCS7_StreamGetVar(PKCS7* pkcs7, word32* var1, int* var2,
+        int* var3)
+{
+    if (pkcs7 != NULL && pkcs7->stream != NULL) {
+        if (var1 != NULL) *var1 = pkcs7->stream->varOne;
+        if (var2 != NULL) *var2 = pkcs7->stream->varTwo;
+        if (var3 != NULL) *var3 = pkcs7->stream->varThree;
+    }
+}
+
+
+/* common update of index and total read after section complete
+ * returns 0 on success */
+static int wc_PKCS7_StreamEndCase(PKCS7* pkcs7, word32* tmpIdx, word32* idx)
+{
+    int ret = 0;
+
+    if (pkcs7->stream->length > 0) {
+        if (pkcs7->stream->length < *idx) {
+            WOLFSSL_MSG("PKCS7 read too much data from internal buffer");
+            ret = BUFFER_E;
+        }
+        else {
+            XMEMMOVE(pkcs7->stream->buffer, pkcs7->stream->buffer + *idx,
+                 pkcs7->stream->length - *idx);
+            pkcs7->stream->length -= *idx;
+        }
+    }
+    else {
+        pkcs7->stream->totalRd += *idx - *tmpIdx;
+        pkcs7->stream->idx = *idx; /* adjust index into input buffer */
+        *tmpIdx = *idx;
+    }
+
+    return ret;
+}
+#endif /* NO_PKCS7_STREAM */
+
+#ifdef WC_PKCS7_STREAM_DEBUG
+/* used to print out human readable state for debugging */
+static const char* wc_PKCS7_GetStateName(int in)
+{
+    switch (in) {
+        case WC_PKCS7_START: return "WC_PKCS7_START";
+
+        case WC_PKCS7_STAGE2: return "WC_PKCS7_STAGE2";
+        case WC_PKCS7_STAGE3: return "WC_PKCS7_STAGE3";
+        case WC_PKCS7_STAGE4: return "WC_PKCS7_STAGE4";
+        case WC_PKCS7_STAGE5: return "WC_PKCS7_STAGE5";
+        case WC_PKCS7_STAGE6: return "WC_PKCS7_STAGE6";
+
+        /* parse info set */
+        case WC_PKCS7_INFOSET_START:  return "WC_PKCS7_INFOSET_START";
+        case WC_PKCS7_INFOSET_BER:    return "WC_PKCS7_INFOSET_BER";
+        case WC_PKCS7_INFOSET_STAGE1: return "WC_PKCS7_INFOSET_STAGE1";
+        case WC_PKCS7_INFOSET_STAGE2: return "WC_PKCS7_INFOSET_STAGE2";
+        case WC_PKCS7_INFOSET_END:    return "WC_PKCS7_INFOSET_END";
+
+        /* decode enveloped data */
+        case WC_PKCS7_ENV_2: return "WC_PKCS7_ENV_2";
+        case WC_PKCS7_ENV_3: return "WC_PKCS7_ENV_3";
+        case WC_PKCS7_ENV_4: return "WC_PKCS7_ENV_4";
+        case WC_PKCS7_ENV_5: return "WC_PKCS7_ENV_5";
+
+        /* decode auth enveloped */
+        case WC_PKCS7_AUTHENV_2: return "WC_PKCS7_AUTHENV_2";
+        case WC_PKCS7_AUTHENV_3: return "WC_PKCS7_AUTHENV_3";
+        case WC_PKCS7_AUTHENV_4: return "WC_PKCS7_AUTHENV_4";
+        case WC_PKCS7_AUTHENV_5: return "WC_PKCS7_AUTHENV_5";
+        case WC_PKCS7_AUTHENV_6: return "WC_PKCS7_AUTHENV_6";
+        case WC_PKCS7_AUTHENV_ATRB: return "WC_PKCS7_AUTHENV_ATRB";
+        case WC_PKCS7_AUTHENV_ATRBEND: return "WC_PKCS7_AUTHENV_ATRBEND";
+        case WC_PKCS7_AUTHENV_7: return "WC_PKCS7_AUTHENV_7";
+
+        /* decryption state types */
+        case WC_PKCS7_DECRYPT_KTRI:   return "WC_PKCS7_DECRYPT_KTRI";
+        case WC_PKCS7_DECRYPT_KTRI_2: return "WC_PKCS7_DECRYPT_KTRI_2";
+        case WC_PKCS7_DECRYPT_KTRI_3: return "WC_PKCS7_DECRYPT_KTRI_3";
+
+        case WC_PKCS7_DECRYPT_KARI:  return "WC_PKCS7_DECRYPT_KARI";
+        case WC_PKCS7_DECRYPT_KEKRI: return "WC_PKCS7_DECRYPT_KEKRI";
+        case WC_PKCS7_DECRYPT_PWRI:  return "WC_PKCS7_DECRYPT_PWRI";
+        case WC_PKCS7_DECRYPT_ORI:   return "WC_PKCS7_DECRYPT_ORI";
+        case WC_PKCS7_DECRYPT_DONE:  return "WC_PKCS7_DECRYPT_DONE";
+
+        case WC_PKCS7_VERIFY_STAGE2: return "WC_PKCS7_VERIFY_STAGE2";
+        case WC_PKCS7_VERIFY_STAGE3: return "WC_PKCS7_VERIFY_STAGE3";
+        case WC_PKCS7_VERIFY_STAGE4: return "WC_PKCS7_VERIFY_STAGE4";
+        case WC_PKCS7_VERIFY_STAGE5: return "WC_PKCS7_VERIFY_STAGE5";
+        case WC_PKCS7_VERIFY_STAGE6: return "WC_PKCS7_VERIFY_STAGE6";
+
+        default:
+            return "Unknown state";
+    }
+}
+#endif
+
+/* Used to change the PKCS7 state. Having state change as a function allows
+ * for easier debugging */
+static void wc_PKCS7_ChangeState(PKCS7* pkcs7, int newState)
+{
+#ifdef WC_PKCS7_STREAM_DEBUG
+    printf("\tChanging from state [%02d] %s to [%02d] %s\n",
+            pkcs7->state, wc_PKCS7_GetStateName(pkcs7->state),
+            newState, wc_PKCS7_GetStateName(newState));
+#endif
+    pkcs7->state = newState;
+}
+
 #define MAX_PKCS7_DIGEST_SZ (MAX_SEQ_SZ + MAX_ALGO_SZ + \
                              MAX_OCTET_STR_SZ + WC_MAX_DIGEST_SIZE)
 
 
 /* placed ASN.1 contentType OID into *output, return idx on success,
  * 0 upon failure */
-static int wc_SetContentType(int pkcs7TypeOID, byte* output)
+static int wc_SetContentType(int pkcs7TypeOID, byte* output, word32 outputSz)
 {
     /* PKCS#7 content types, RFC 2315, section 14 */
     const byte pkcs7[]              = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
@@ -69,18 +509,34 @@
                                                0x0D, 0x01, 0x07, 0x02};
     const byte envelopedData[]      = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
                                                0x0D, 0x01, 0x07, 0x03 };
+    const byte authEnvelopedData[]  = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+                                        0x0D, 0x01, 0x09, 0x10, 0x01, 0x17};
     const byte signedAndEnveloped[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
                                                0x0D, 0x01, 0x07, 0x04 };
     const byte digestedData[]       = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
                                                0x0D, 0x01, 0x07, 0x05 };
-
 #ifndef NO_PKCS7_ENCRYPTED_DATA
     const byte encryptedData[]      = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
                                                0x0D, 0x01, 0x07, 0x06 };
 #endif
-
-    int idSz;
-    int typeSz = 0, idx = 0;
+    /* FirmwarePkgData (1.2.840.113549.1.9.16.1.16), RFC 4108 */
+    const byte firmwarePkgData[]    = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+                                        0x01, 0x09, 0x10, 0x01, 0x10 };
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+    /* id-ct-compressedData (1.2.840.113549.1.9.16.1.9), RFC 3274 */
+    const byte compressedData[]     = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+                                        0x01, 0x09, 0x10, 0x01, 0x09 };
+#endif
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+    const byte pwriKek[]            = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+                                        0x01, 0x09, 0x10, 0x03, 0x09 };
+    const byte pbkdf2[]             = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+                                        0x01, 0x05, 0x0C };
+#endif
+
+    int idSz, idx = 0;
+    word32 typeSz = 0;
     const byte* typeName = 0;
     byte ID_Length[MAX_LENGTH_SZ];
 
@@ -105,6 +561,11 @@
             typeName = envelopedData;
             break;
 
+        case AUTH_ENVELOPED_DATA:
+            typeSz = sizeof(authEnvelopedData);
+            typeName = authEnvelopedData;
+            break;
+
         case SIGNED_AND_ENVELOPED_DATA:
             typeSz = sizeof(signedAndEnveloped);
             typeName = signedAndEnveloped;
@@ -121,12 +582,39 @@
             typeName = encryptedData;
             break;
 #endif
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+        case COMPRESSED_DATA:
+            typeSz = sizeof(compressedData);
+            typeName = compressedData;
+            break;
+#endif
+        case FIRMWARE_PKG_DATA:
+            typeSz = sizeof(firmwarePkgData);
+            typeName = firmwarePkgData;
+            break;
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+        case PWRI_KEK_WRAP:
+            typeSz = sizeof(pwriKek);
+            typeName = pwriKek;
+            break;
+
+        case PBKDF2_OID:
+            typeSz = sizeof(pbkdf2);
+            typeName = pbkdf2;
+            break;
+#endif
 
         default:
             WOLFSSL_MSG("Unknown PKCS#7 Type");
             return 0;
     };
 
+    if (outputSz < (MAX_LENGTH_SZ + 1 + typeSz)) {
+        WOLFSSL_MSG("CMS content type buffer too small");
+        return BAD_FUNC_ARG;
+    }
+
     idSz  = SetLength(typeSz, ID_Length);
     output[idx++] = ASN_OBJECT_ID;
     XMEMCPY(output + idx, ID_Length, idSz);
@@ -159,12 +647,18 @@
 #ifndef NO_AES
     #ifdef WOLFSSL_AES_128
         case AES128CBCb:
+        case AES128GCMb:
+        case AES128CCMb:
     #endif
     #ifdef WOLFSSL_AES_192
         case AES192CBCb:
+        case AES192GCMb:
+        case AES192CCMb:
     #endif
     #ifdef WOLFSSL_AES_256
         case AES256CBCb:
+        case AES256GCMb:
+        case AES256CCMb:
     #endif
             blockSz = AES_BLOCK_SIZE;
             break;
@@ -193,18 +687,24 @@
 #ifndef NO_AES
     #ifdef WOLFSSL_AES_128
         case AES128CBCb:
+        case AES128GCMb:
+        case AES128CCMb:
         case AES128_WRAP:
             blockKeySz = 16;
             break;
     #endif
     #ifdef WOLFSSL_AES_192
         case AES192CBCb:
+        case AES192GCMb:
+        case AES192CCMb:
         case AES192_WRAP:
             blockKeySz = 24;
             break;
     #endif
     #ifdef WOLFSSL_AES_256
         case AES256CBCb:
+        case AES256GCMb:
+        case AES256CCMb:
         case AES256_WRAP:
             blockKeySz = 32;
             break;
@@ -255,13 +755,17 @@
  */
 int wc_PKCS7_Init(PKCS7* pkcs7, void* heap, int devId)
 {
+    word16 isDynamic;
+
     WOLFSSL_ENTER("wc_PKCS7_Init");
 
     if (pkcs7 == NULL) {
         return BAD_FUNC_ARG;
     }
 
+    isDynamic = pkcs7->isDynamic;
     XMEMSET(pkcs7, 0, sizeof(PKCS7));
+    pkcs7->isDynamic = isDynamic;
 #ifdef WOLFSSL_HEAP_TEST
     pkcs7->heap = (void*)WOLFSSL_HEAP_TEST;
 #else
@@ -273,28 +777,162 @@
 }
 
 
-/* init PKCS7 struct with recipient cert, decode into DecodedCert
+/* Certificate structure holding der pointer, size, and pointer to next
+ * Pkcs7Cert struct. Used when creating SignedData types with multiple
+ * certificates. */
+struct Pkcs7Cert {
+    byte*  der;
+    word32 derSz;
+    Pkcs7Cert* next;
+};
+
+
+/* Linked list of ASN.1 encoded RecipientInfos */
+struct Pkcs7EncodedRecip {
+    byte recip[MAX_RECIP_SZ];
+    word32 recipSz;
+    int recipType;
+    int recipVersion;
+    Pkcs7EncodedRecip* next;
+};
+
+
+/* free all members of Pkcs7Cert linked list */
+static void wc_PKCS7_FreeCertSet(PKCS7* pkcs7)
+{
+    Pkcs7Cert* curr = NULL;
+    Pkcs7Cert* next = NULL;
+
+    if (pkcs7 == NULL)
+        return;
+
+    curr = pkcs7->certList;
+    pkcs7->certList = NULL;
+
+    while (curr != NULL) {
+        next = curr->next;
+        curr->next = NULL;
+        XFREE(curr, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        curr = next;
+    }
+
+    return;
+}
+
+
+/* Get total size of all recipients in recipient list.
+ *
+ * Returns total size of recipients, or negative upon error */
+static int wc_PKCS7_GetRecipientListSize(PKCS7* pkcs7)
+{
+    int totalSz = 0;
+    Pkcs7EncodedRecip* tmp = NULL;
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    tmp = pkcs7->recipList;
+
+    while (tmp != NULL) {
+        totalSz += tmp->recipSz;
+        tmp = tmp->next;
+    }
+
+    return totalSz;
+}
+
+
+/* free all members of Pkcs7EncodedRecip linked list */
+static void wc_PKCS7_FreeEncodedRecipientSet(PKCS7* pkcs7)
+{
+    Pkcs7EncodedRecip* curr = NULL;
+    Pkcs7EncodedRecip* next = NULL;
+
+    if (pkcs7 == NULL)
+        return;
+
+    curr = pkcs7->recipList;
+    pkcs7->recipList = NULL;
+
+    while (curr != NULL) {
+        next = curr->next;
+        curr->next = NULL;
+        XFREE(curr, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        curr = next;
+    }
+
+    return;
+}
+
+
+/* search through RecipientInfo list for specific type.
+ * return 1 if ANY recipient of type specified is present, otherwise
+ * return 0 */
+static int wc_PKCS7_RecipientListIncludesType(PKCS7* pkcs7, int type)
+{
+    Pkcs7EncodedRecip* tmp = NULL;
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    tmp = pkcs7->recipList;
+
+    while (tmp != NULL) {
+        if (tmp->recipType == type)
+            return 1;
+
+        tmp = tmp->next;
+    }
+
+    return 0;
+}
+
+
+/* searches through RecipientInfo list, returns 1 if all structure
+ * versions are set to 0, otherwise returns 0 */
+static int wc_PKCS7_RecipientListVersionsAllZero(PKCS7* pkcs7)
+{
+    Pkcs7EncodedRecip* tmp = NULL;
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    tmp = pkcs7->recipList;
+
+    while (tmp != NULL) {
+        if (tmp->recipVersion != 0)
+            return 0;
+
+        tmp = tmp->next;
+    }
+
+    return 1;
+}
+
+
+/* Init PKCS7 struct with recipient cert, decode into DecodedCert
  * NOTE: keeps previously set pkcs7 heap hint, devId and isDynamic */
-int wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* cert, word32 certSz)
+int wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* derCert, word32 derCertSz)
 {
     int ret = 0;
     void* heap;
     int devId;
-    word16 isDynamic;
-
-    if (pkcs7 == NULL || (cert == NULL && certSz != 0)) {
+    Pkcs7Cert* cert;
+    Pkcs7Cert* lastCert;
+
+    if (pkcs7 == NULL || (derCert == NULL && derCertSz != 0)) {
         return BAD_FUNC_ARG;
     }
 
     heap = pkcs7->heap;
     devId = pkcs7->devId;
-    isDynamic = pkcs7->isDynamic;
+    cert = pkcs7->certList;
     ret = wc_PKCS7_Init(pkcs7, heap, devId);
     if (ret != 0)
         return ret;
-    pkcs7->isDynamic = isDynamic;
-
-    if (cert != NULL && certSz > 0) {
+    pkcs7->certList = cert;
+
+    if (derCert != NULL && derCertSz > 0) {
 #ifdef WOLFSSL_SMALL_STACK
         DecodedCert* dCert;
 
@@ -303,14 +941,37 @@
         if (dCert == NULL)
             return MEMORY_E;
 #else
-        DecodedCert stack_dCert;
-        DecodedCert* dCert = &stack_dCert;
-#endif
-
-        pkcs7->singleCert = cert;
-        pkcs7->singleCertSz = certSz;
-        InitDecodedCert(dCert, cert, certSz, pkcs7->heap);
-
+        DecodedCert dCert[1];
+#endif
+
+        pkcs7->singleCert = derCert;
+        pkcs7->singleCertSz = derCertSz;
+        pkcs7->cert[0] = derCert;
+        pkcs7->certSz[0] = derCertSz;
+
+        /* create new Pkcs7Cert for recipient, freed during cleanup */
+        cert = (Pkcs7Cert*)XMALLOC(sizeof(Pkcs7Cert), pkcs7->heap,
+                                   DYNAMIC_TYPE_PKCS7);
+        XMEMSET(cert, 0, sizeof(Pkcs7Cert));
+        cert->der = derCert;
+        cert->derSz = derCertSz;
+        cert->next = NULL;
+
+        /* free existing cert list if existing */
+        wc_PKCS7_FreeCertSet(pkcs7);
+
+        /* add cert to list */
+        if (pkcs7->certList == NULL) {
+            pkcs7->certList = cert;
+        } else {
+           lastCert = pkcs7->certList;
+           while (lastCert->next != NULL) {
+               lastCert = lastCert->next;
+           }
+           lastCert->next = cert;
+        }
+
+        InitDecodedCert(dCert, derCert, derCertSz, pkcs7->heap);
         ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0);
         if (ret < 0) {
             FreeDecodedCert(dCert);
@@ -328,6 +989,14 @@
         pkcs7->issuerSz = dCert->issuerRawLen;
         XMEMCPY(pkcs7->issuerSn, dCert->serial, dCert->serialSz);
         pkcs7->issuerSnSz = dCert->serialSz;
+        XMEMCPY(pkcs7->issuerSubjKeyId, dCert->extSubjKeyId, KEYID_SIZE);
+
+        /* default to IssuerAndSerialNumber for SignerIdentifier */
+        pkcs7->sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+
+        /* free existing recipient list if existing */
+        wc_PKCS7_FreeEncodedRecipientSet(pkcs7);
+
         FreeDecodedCert(dCert);
 
 #ifdef WOLFSSL_SMALL_STACK
@@ -339,6 +1008,45 @@
 }
 
 
+/* Adds one DER-formatted certificate to the internal PKCS7/CMS certificate
+ * list, to be added as part of the certificates CertificateSet. Currently
+ * used in SignedData content type.
+ *
+ * Must be called after wc_PKCS7_Init() or wc_PKCS7_InitWithCert().
+ *
+ * Does not represent the recipient/signer certificate, only certificates that
+ * are part of the certificate chain used to build and verify signer
+ * certificates.
+ *
+ * This API does not currently validate certificates.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddCertificate(PKCS7* pkcs7, byte* derCert, word32 derCertSz)
+{
+    Pkcs7Cert* cert;
+
+    if (pkcs7 == NULL || derCert == NULL || derCertSz == 0)
+        return BAD_FUNC_ARG;
+
+    cert = (Pkcs7Cert*)XMALLOC(sizeof(Pkcs7Cert), pkcs7->heap,
+                               DYNAMIC_TYPE_PKCS7);
+    if (cert == NULL)
+        return MEMORY_E;
+
+    cert->der = derCert;
+    cert->derSz = derCertSz;
+
+    if (pkcs7->certList == NULL) {
+        pkcs7->certList = cert;
+    } else {
+        cert->next = pkcs7->certList;
+        pkcs7->certList = cert;
+    }
+
+    return 0;
+}
+
+
 /* free linked list of PKCS7DecodedAttrib structs */
 static void wc_PKCS7_FreeDecodedAttrib(PKCS7DecodedAttrib* attrib, void* heap)
 {
@@ -365,18 +1073,110 @@
 }
 
 
+/* return 0 on success */
+static int wc_PKCS7_SignerInfoNew(PKCS7* pkcs7)
+{
+    if (pkcs7->signerInfo != NULL) {
+        XFREE(pkcs7->signerInfo, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->signerInfo = NULL;
+    }
+
+    pkcs7->signerInfo = (PKCS7SignerInfo*)XMALLOC(sizeof(PKCS7SignerInfo),
+            pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (pkcs7->signerInfo == NULL) {
+        WOLFSSL_MSG("Unable to malloc memory for signer info");
+        return MEMORY_E;
+    }
+    XMEMSET(pkcs7->signerInfo, 0, sizeof(PKCS7SignerInfo));
+    return 0;
+}
+
+
+static void wc_PKCS7_SignerInfoFree(PKCS7* pkcs7)
+{
+    if (pkcs7->signerInfo != NULL) {
+        if (pkcs7->signerInfo->sid != NULL) {
+            XFREE(pkcs7->signerInfo->sid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            pkcs7->signerInfo->sid = NULL;
+        }
+        XFREE(pkcs7->signerInfo, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->signerInfo = NULL;
+    }
+}
+
+
+/* free's any current SID and sets it to "in"
+ * returns 0 on success
+ */
+static int wc_PKCS7_SignerInfoSetSID(PKCS7* pkcs7, byte* in, int inSz)
+{
+    if (pkcs7 == NULL || in == NULL || inSz < 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (pkcs7->signerInfo->sid != NULL) {
+        XFREE(pkcs7->signerInfo->sid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->signerInfo->sid = NULL;
+    }
+    pkcs7->signerInfo->sid = (byte*)XMALLOC(inSz, pkcs7->heap,
+            DYNAMIC_TYPE_PKCS7);
+    if (pkcs7->signerInfo->sid == NULL) {
+        return MEMORY_E;
+    }
+    XMEMCPY(pkcs7->signerInfo->sid, in, inSz);
+    pkcs7->signerInfo->sidSz = inSz;
+    return 0;
+}
+
+
 /* releases any memory allocated by a PKCS7 initializer */
 void wc_PKCS7_Free(PKCS7* pkcs7)
 {
     if (pkcs7 == NULL)
         return;
 
+#ifndef NO_PKCS7_STREAM
+    wc_PKCS7_FreeStream(pkcs7);
+#endif
+
+    wc_PKCS7_SignerInfoFree(pkcs7);
     wc_PKCS7_FreeDecodedAttrib(pkcs7->decodedAttrib, pkcs7->heap);
+    wc_PKCS7_FreeCertSet(pkcs7);
 
 #ifdef ASN_BER_TO_DER
     if (pkcs7->der != NULL)
         XFREE(pkcs7->der, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
 #endif
+    if (pkcs7->contentDynamic != NULL)
+        XFREE(pkcs7->contentDynamic, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+    if (pkcs7->cek != NULL) {
+        ForceZero(pkcs7->cek, pkcs7->cekSz);
+        XFREE(pkcs7->cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    }
+
+    pkcs7->contentTypeSz = 0;
+
+    if (pkcs7->signature) {
+        XFREE(pkcs7->signature, pkcs7->heap, DYNAMIC_TYPE_SIGNATURE);
+        pkcs7->signature = NULL;
+        pkcs7->signatureSz = 0;
+    }
+    if (pkcs7->plainDigest) {
+        XFREE(pkcs7->plainDigest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+        pkcs7->plainDigest = NULL;
+        pkcs7->plainDigestSz = 0;
+    }
+    if (pkcs7->pkcs7Digest) {
+        XFREE(pkcs7->pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+        pkcs7->pkcs7Digest = NULL;
+        pkcs7->pkcs7DigestSz = 0;
+    }
+    if (pkcs7->cachedEncryptedContent != NULL) {
+        XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->cachedEncryptedContent = NULL;
+        pkcs7->cachedEncryptedContentSz = 0;
+    }
 
     if (pkcs7->isDynamic) {
         pkcs7->isDynamic = 0;
@@ -401,8 +1201,12 @@
         word32 sz  = oidSz;
         word32 idx = 0;
         int    length = 0;
-
-        if (list->oid[idx++] != ASN_OBJECT_ID) {
+        byte   tag;
+
+        if (GetASNTag(list->oid, &idx, &tag, list->oidSz) < 0) {
+            return NULL;
+        }
+        if (tag != ASN_OBJECT_ID) {
             WOLFSSL_MSG("Bad attribute ASN1 syntax");
             return NULL;
         }
@@ -534,20 +1338,24 @@
                 byte signerInfoSet[MAX_SET_SZ];
                     byte signerInfoSeq[MAX_SEQ_SZ];
                         byte signerVersion[MAX_VERSION_SZ];
+                        /* issuerAndSerialNumber ...*/
                         byte issuerSnSeq[MAX_SEQ_SZ];
                             byte issuerName[MAX_SEQ_SZ];
                             byte issuerSn[MAX_SN_SZ];
+                        /* OR subjectKeyIdentifier */
+                        byte issuerSKIDSeq[MAX_SEQ_SZ];
+                            byte issuerSKID[MAX_OCTET_STR_SZ];
                         byte signerDigAlgoId[MAX_ALGO_SZ];
                         byte digEncAlgoId[MAX_ALGO_SZ];
                         byte signedAttribSet[MAX_SET_SZ];
-                            EncodedAttrib signedAttribs[6];
+                            EncodedAttrib signedAttribs[7];
                         byte signerDigest[MAX_OCTET_STR_SZ];
     word32 innerOctetsSz, innerContSeqSz, contentInfoSeqSz;
     word32 outerSeqSz, outerContentSz, innerSeqSz, versionSz, digAlgoIdSetSz,
            singleDigAlgoIdSz, certsSetSz;
     word32 signerInfoSetSz, signerInfoSeqSz, signerVersionSz,
-           issuerSnSeqSz, issuerNameSz, issuerSnSz,
-           signerDigAlgoIdSz, digEncAlgoIdSz, signerDigestSz;
+           issuerSnSeqSz, issuerNameSz, issuerSnSz, issuerSKIDSz,
+           issuerSKIDSeqSz, signerDigAlgoIdSz, digEncAlgoIdSz, signerDigestSz;
     word32 encContentDigestSz, signedAttribsSz, signedAttribsCount,
            signedAttribSetSz;
 } ESD;
@@ -582,12 +1390,112 @@
 }
 
 
-static int FlattenAttributes(byte* output, EncodedAttrib* ea, int eaSz)
-{
-    int i, idx;
-
-    idx = 0;
+typedef struct FlatAttrib {
+    byte* data;
+    word32 dataSz;
+} FlatAttrib;
+
+/* Returns a pointer to FlatAttrib whose members are initialized to 0.
+*  Caller is expected to free.
+*/
+static FlatAttrib* NewAttrib(void* heap)
+{
+    FlatAttrib* fb = (FlatAttrib*) XMALLOC(sizeof(FlatAttrib), heap,
+                                                   DYNAMIC_TYPE_TMP_BUFFER);
+    if (fb != NULL) {
+        ForceZero(fb, sizeof(FlatAttrib));
+    }
+    (void)heap;
+    return fb;
+}
+
+/* Free FlatAttrib array and memory allocated to internal struct members */
+static void FreeAttribArray(PKCS7* pkcs7, FlatAttrib** arr, int rows)
+{
+    int i;
+
+    if (arr) {
+        for (i = 0; i < rows; i++) {
+            if (arr[i]) {
+                if (arr[i]->data) {
+                    ForceZero(arr[i]->data, arr[i]->dataSz);
+                    XFREE(arr[i]->data, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                }
+                ForceZero(arr[i], sizeof(FlatAttrib));
+                XFREE(arr[i], pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            }
+        }
+        ForceZero(arr, rows);
+        XFREE(arr, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+    (void)pkcs7;
+}
+
+
+/* Sort FlatAttrib array in ascending order */
+static int SortAttribArray(FlatAttrib** arr, int rows)
+{
+    int i, j;
+    word32 minSz, minIdx;
+    FlatAttrib* a   = NULL;
+    FlatAttrib* b   = NULL;
+    FlatAttrib* tmp = NULL;
+
+    if (arr == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    for (i = 0; i < rows; i++) {
+        a = arr[i];
+        minSz = a->dataSz;
+        minIdx = i;
+        for (j = i+1; j < rows; j++) {
+            b = arr[j];
+            if (b->dataSz < minSz) {
+                minSz = b->dataSz;
+                minIdx = j;
+            }
+        }
+        if (minSz < a->dataSz) {
+            /* swap array positions */
+            tmp = arr[i];
+            arr[i] = arr[minIdx];
+            arr[minIdx] = tmp;
+        }
+    }
+
+    return 0;
+}
+
+
+/* Build up array of FlatAttrib structs from EncodedAttrib ones. FlatAttrib
+ * holds flattened DER encoding of each attribute */
+static int FlattenEncodedAttribs(PKCS7* pkcs7, FlatAttrib** derArr, int rows,
+                                 EncodedAttrib* ea, int eaSz)
+{
+    int i, idx, sz;
+    byte* output   = NULL;
+    FlatAttrib* fa = NULL;
+
+    if (pkcs7 == NULL || derArr == NULL || ea == NULL) {
+        WOLFSSL_MSG("Invalid arguments to FlattenEncodedAttribs");
+        return BAD_FUNC_ARG;
+    }
+
+    if (rows != eaSz) {
+        WOLFSSL_MSG("DER array not large enough to hold attribute count");
+        return BAD_FUNC_ARG;
+    }
+
     for (i = 0; i < eaSz; i++) {
+        sz = ea[i].valueSeqSz + ea[i].oidSz + ea[i].valueSetSz + ea[i].valueSz;
+
+        output = (byte*)XMALLOC(sz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (output == NULL) {
+            return MEMORY_E;
+        }
+
+        idx = 0;
         XMEMCPY(output + idx, ea[i].valueSeq, ea[i].valueSeqSz);
         idx += ea[i].valueSeqSz;
         XMEMCPY(output + idx, ea[i].oid, ea[i].oidSz);
@@ -595,8 +1503,69 @@
         XMEMCPY(output + idx, ea[i].valueSet, ea[i].valueSetSz);
         idx += ea[i].valueSetSz;
         XMEMCPY(output + idx, ea[i].value, ea[i].valueSz);
-        idx += ea[i].valueSz;
-    }
+
+        fa = derArr[i];
+        fa->data = output;
+        fa->dataSz = sz;
+    }
+
+    return 0;
+}
+
+
+/* Sort and Flatten EncodedAttrib attributes into output buffer */
+static int FlattenAttributes(PKCS7* pkcs7, byte* output, EncodedAttrib* ea,
+                             int eaSz)
+{
+    int i, idx, ret;
+    FlatAttrib** derArr = NULL;
+    FlatAttrib*  fa     = NULL;
+
+    if (pkcs7 == NULL || output == NULL || ea == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* create array of FlatAttrib struct pointers to hold DER attribs */
+    derArr = (FlatAttrib**) XMALLOC(eaSz * sizeof(FlatAttrib*), pkcs7->heap,
+                                    DYNAMIC_TYPE_TMP_BUFFER);
+    if (derArr == NULL) {
+        return MEMORY_E;
+    }
+    XMEMSET(derArr, 0, eaSz * sizeof(FlatAttrib*));
+
+    for (i = 0; i < eaSz; i++) {
+        derArr[i] = NewAttrib(pkcs7->heap);
+        if (derArr[i] == NULL) {
+            FreeAttribArray(pkcs7, derArr, eaSz);
+            return MEMORY_E;
+        }
+        ForceZero(derArr[i], sizeof(FlatAttrib));
+    }
+
+    /* flatten EncodedAttrib into DER byte arrays */
+    ret = FlattenEncodedAttribs(pkcs7, derArr, eaSz, ea, eaSz);
+    if (ret != 0) {
+        FreeAttribArray(pkcs7, derArr, eaSz);
+        return ret;
+    }
+
+    /* SET OF DER signed attributes must be sorted in ascending order */
+    ret = SortAttribArray(derArr, eaSz);
+    if (ret != 0) {
+        FreeAttribArray(pkcs7, derArr, eaSz);
+        return ret;
+    }
+
+    /* copy sorted DER attribute arrays into output buffer */
+    idx = 0;
+    for (i = 0; i < eaSz; i++) {
+        fa = derArr[i];
+        XMEMCPY(output + idx, fa->data, fa->dataSz);
+        idx += fa->dataSz;
+    }
+
+    FreeAttribArray(pkcs7, derArr, eaSz);
+
     return 0;
 }
 
@@ -611,8 +1580,7 @@
 #ifdef WOLFSSL_SMALL_STACK
     RsaKey* privKey;
 #else
-    RsaKey  stack_privKey;
-    RsaKey* privKey = &stack_privKey;
+    RsaKey  privKey[1];
 #endif
 
     if (pkcs7 == NULL || pkcs7->rng == NULL || in == NULL || esd == NULL) {
@@ -638,9 +1606,20 @@
         }
     }
     if (ret == 0) {
-        ret = wc_RsaSSL_Sign(in, inSz, esd->encContentDigest,
-                             sizeof(esd->encContentDigest),
-                             privKey, pkcs7->rng);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        do {
+            ret = wc_AsyncWait(ret, &privKey->asyncDev,
+                WC_ASYNC_FLAG_CALL_AGAIN);
+            if (ret >= 0)
+    #endif
+            {
+                ret = wc_RsaSSL_Sign(in, inSz, esd->encContentDigest,
+                                     sizeof(esd->encContentDigest),
+                                     privKey, pkcs7->rng);
+            }
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        } while (ret == WC_PENDING_E);
+    #endif
     }
 
     wc_FreeRsaKey(privKey);
@@ -664,8 +1643,7 @@
 #ifdef WOLFSSL_SMALL_STACK
     ecc_key* privKey;
 #else
-    ecc_key  stack_privKey;
-    ecc_key* privKey = &stack_privKey;
+    ecc_key  privKey[1];
 #endif
 
     if (pkcs7 == NULL || pkcs7->rng == NULL || in == NULL || esd == NULL) {
@@ -692,8 +1670,19 @@
     }
     if (ret == 0) {
         outSz = sizeof(esd->encContentDigest);
-        ret = wc_ecc_sign_hash(in, inSz, esd->encContentDigest,
-                               &outSz, pkcs7->rng, privKey);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        do {
+            ret = wc_AsyncWait(ret, &privKey->asyncDev,
+                WC_ASYNC_FLAG_CALL_AGAIN);
+            if (ret >= 0)
+    #endif
+            {
+                ret = wc_ecc_sign_hash(in, inSz, esd->encContentDigest,
+                                       &outSz, pkcs7->rng, privKey);
+            }
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        } while (ret == WC_PENDING_E);
+    #endif
         if (ret == 0)
             ret = (int)outSz;
     }
@@ -716,41 +1705,88 @@
  *
  * return 0 on success, negative on error */
 static int wc_PKCS7_BuildSignedAttributes(PKCS7* pkcs7, ESD* esd,
-                    byte* contentTypeOid, word32 contentTypeOidSz,
-                    byte* contentType, word32 contentTypeSz,
-                    byte* messageDigestOid, word32 messageDigestOidSz)
+                    const byte* contentType, word32 contentTypeSz,
+                    const byte* contentTypeOid, word32 contentTypeOidSz,
+                    const byte* messageDigestOid, word32 messageDigestOidSz,
+                    const byte* signingTimeOid, word32 signingTimeOidSz,
+                    byte* signingTime, word32 signingTimeSz)
 {
     int hashSz;
-
+#ifdef NO_ASN_TIME
     PKCS7Attrib cannedAttribs[2];
+#else
+    time_t tm;
+    int timeSz;
+    PKCS7Attrib cannedAttribs[3];
+#endif
+    word32 idx = 0;
     word32 cannedAttribsCount;
 
-    if (pkcs7 == NULL || esd == NULL || contentTypeOid == NULL ||
-        contentType == NULL || messageDigestOid == NULL)
-        return BAD_FUNC_ARG;
-
-    hashSz = wc_HashGetDigestSize(esd->hashType);
-    if (hashSz < 0)
-        return hashSz;
-
-    cannedAttribsCount = sizeof(cannedAttribs)/sizeof(PKCS7Attrib);
-
-    cannedAttribs[0].oid     = contentTypeOid;
-    cannedAttribs[0].oidSz   = contentTypeOidSz;
-    cannedAttribs[0].value   = contentType;
-    cannedAttribs[0].valueSz = contentTypeSz;
-    cannedAttribs[1].oid     = messageDigestOid;
-    cannedAttribs[1].oidSz   = messageDigestOidSz;
-    cannedAttribs[1].value   = esd->contentDigest;
-    cannedAttribs[1].valueSz = hashSz + 2;  /* ASN.1 heading */
-
-    esd->signedAttribsCount += cannedAttribsCount;
-    esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[0], 2,
-                                         cannedAttribs, cannedAttribsCount);
-
-    esd->signedAttribsCount += pkcs7->signedAttribsSz;
-    esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[2], 4,
-                              pkcs7->signedAttribs, pkcs7->signedAttribsSz);
+    if (pkcs7 == NULL || esd == NULL || contentType == NULL ||
+        contentTypeOid == NULL || messageDigestOid == NULL ||
+        signingTimeOid == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (pkcs7->skipDefaultSignedAttribs == 0) {
+        hashSz = wc_HashGetDigestSize(esd->hashType);
+        if (hashSz < 0)
+            return hashSz;
+
+    #ifndef NO_ASN_TIME
+        if (signingTime == NULL || signingTimeSz == 0)
+            return BAD_FUNC_ARG;
+
+        tm = XTIME(0);
+        timeSz = GetAsnTimeString(&tm, signingTime, signingTimeSz);
+        if (timeSz < 0)
+            return timeSz;
+    #endif
+
+        cannedAttribsCount = sizeof(cannedAttribs)/sizeof(PKCS7Attrib);
+
+        cannedAttribs[idx].oid     = contentTypeOid;
+        cannedAttribs[idx].oidSz   = contentTypeOidSz;
+        cannedAttribs[idx].value   = contentType;
+        cannedAttribs[idx].valueSz = contentTypeSz;
+        idx++;
+    #ifndef NO_ASN_TIME
+        cannedAttribs[idx].oid     = signingTimeOid;
+        cannedAttribs[idx].oidSz   = signingTimeOidSz;
+        cannedAttribs[idx].value   = signingTime;
+        cannedAttribs[idx].valueSz = timeSz;
+        idx++;
+    #endif
+        cannedAttribs[idx].oid     = messageDigestOid;
+        cannedAttribs[idx].oidSz   = messageDigestOidSz;
+        cannedAttribs[idx].value   = esd->contentDigest;
+        cannedAttribs[idx].valueSz = hashSz + 2;  /* ASN.1 heading */
+
+        esd->signedAttribsCount += cannedAttribsCount;
+        esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[0], 3,
+                                             cannedAttribs, cannedAttribsCount);
+    } else {
+        esd->signedAttribsCount = 0;
+        esd->signedAttribsSz = 0;
+    }
+
+    /* add custom signed attributes if set */
+    if (pkcs7->signedAttribsSz > 0 && pkcs7->signedAttribs != NULL) {
+        esd->signedAttribsCount += pkcs7->signedAttribsSz;
+    #ifdef NO_ASN_TIME
+        esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[2], 4,
+                                  pkcs7->signedAttribs, pkcs7->signedAttribsSz);
+    #else
+        esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[3], 4,
+                                  pkcs7->signedAttribs, pkcs7->signedAttribsSz);
+    #endif
+    }
+
+#ifdef NO_ASN_TIME
+    (void)signingTimeOidSz;
+    (void)signingTime;
+    (void)signingTimeSz;
+#endif
 
     return 0;
 }
@@ -884,7 +1920,7 @@
     if (hashSz < 0)
         return hashSz;
 
-    if (pkcs7->signedAttribsSz != 0) {
+    if (flatSignedAttribsSz != 0) {
 
         if (flatSignedAttribs == NULL)
             return BAD_FUNC_ARG;
@@ -897,16 +1933,14 @@
 
         ret = wc_HashUpdate(&esd->hash, esd->hashType,
                             attribSet, attribSetSz);
-        if (ret < 0)
-            return ret;
-
-        ret = wc_HashUpdate(&esd->hash, esd->hashType,
-                            flatSignedAttribs, flatSignedAttribsSz);
-        if (ret < 0)
-            return ret;
-
-        ret = wc_HashFinal(&esd->hash, esd->hashType,
-                           esd->contentAttribsDigest);
+        if (ret == 0)
+            ret = wc_HashUpdate(&esd->hash, esd->hashType,
+                                flatSignedAttribs, flatSignedAttribsSz);
+        if (ret == 0)
+            ret = wc_HashFinal(&esd->hash, esd->hashType,
+                               esd->contentAttribsDigest);
+        wc_HashFree(&esd->hash, esd->hashType);
+
         if (ret < 0)
             return ret;
 
@@ -943,7 +1977,7 @@
 
 /* build SignedData signature over DigestInfo or content digest
  *
- * pkcs7 - pointer to initizlied PKCS7 struct
+ * pkcs7 - pointer to initialized PKCS7 struct
  * flatSignedAttribs - flattened, signed attributes
  * flatSignedAttribsSz - size of flatSignedAttribs, octets
  * esd - pointer to initialized ESD struct
@@ -954,15 +1988,19 @@
                                              word32 flatSignedAttribsSz,
                                              ESD* esd)
 {
-    int ret;
-#ifdef HAVE_ECC
-    int hashSz;
+    int ret = 0;
+#if defined(HAVE_ECC) || \
+    (defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA))
+    int hashSz = 0;
+#endif
+#if defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)
+    int hashOID;
 #endif
     word32 digestInfoSz = MAX_PKCS7_DIGEST_SZ;
 #ifdef WOLFSSL_SMALL_STACK
     byte* digestInfo;
 #else
-    byte digestInfo[MAX_PKCS7_DIGEST_SZ];
+    byte  digestInfo[MAX_PKCS7_DIGEST_SZ];
 #endif
 
     if (pkcs7 == NULL || esd == NULL)
@@ -975,6 +2013,7 @@
         return MEMORY_E;
     }
 #endif
+    XMEMSET(digestInfo, 0, digestInfoSz);
 
     ret = wc_PKCS7_BuildDigestInfo(pkcs7, flatSignedAttribs,
                                    flatSignedAttribsSz, esd, digestInfo,
@@ -986,11 +2025,37 @@
         return ret;
     }
 
+#if defined(HAVE_ECC) || \
+    (defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA))
+    /* get digest size from hash type */
+    hashSz = wc_HashGetDigestSize(esd->hashType);
+    if (hashSz < 0) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return hashSz;
+    }
+#endif
+
     /* sign digestInfo */
     switch (pkcs7->publicKeyOID) {
 
 #ifndef NO_RSA
         case RSAk:
+        #ifdef HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK
+            if (pkcs7->rsaSignRawDigestCb != NULL) {
+                /* get hash OID */
+                hashOID = wc_HashGetOID(esd->hashType);
+
+                /* user signing plain digest, build DigestInfo themselves */
+                ret = pkcs7->rsaSignRawDigestCb(pkcs7,
+                           esd->contentAttribsDigest, hashSz,
+                           esd->encContentDigest, sizeof(esd->encContentDigest),
+                           pkcs7->privateKey, pkcs7->privateKeySz, pkcs7->devId,
+                           hashOID);
+                break;
+            }
+        #endif
             ret = wc_PKCS7_RsaSign(pkcs7, digestInfo, digestInfoSz, esd);
             break;
 #endif
@@ -999,14 +2064,6 @@
         case ECDSAk:
             /* CMS with ECDSA does not sign DigestInfo structure
              * like PKCS#7 with RSA does */
-            hashSz = wc_HashGetDigestSize(esd->hashType);
-            if (hashSz < 0) {
-            #ifdef WOLFSSL_SMALL_STACK
-                XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            #endif
-                return hashSz;
-            }
-
             ret = wc_PKCS7_EcdsaSign(pkcs7, esd->contentAttribsDigest,
                                      hashSz, esd);
             break;
@@ -1028,49 +2085,50 @@
     return ret;
 }
 
+
 /* build PKCS#7 signedData content type */
-int wc_PKCS7_EncodeSignedData(PKCS7* pkcs7, byte* output, word32 outputSz)
-{
-    static const byte outerOid[] =
-        { ASN_OBJECT_ID, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
-                         0x07, 0x02 };
-    static const byte innerOid[] =
-        { ASN_OBJECT_ID, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
-                         0x07, 0x01 };
-
-    byte contentTypeOid[] =
+static int PKCS7_EncodeSigned(PKCS7* pkcs7, ESD* esd,
+    const byte* hashBuf, word32 hashSz, byte* output, word32* outputSz,
+    byte* output2, word32* output2Sz)
+{
+    /* contentType OID (1.2.840.113549.1.9.3) */
+    const byte contentTypeOid[] =
             { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01,
                              0x09, 0x03 };
-    byte contentType[] =
-            { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
-                             0x07, 0x01 };
-    byte messageDigestOid[] =
+
+    /* messageDigest OID (1.2.840.113549.1.9.4) */
+    const byte messageDigestOid[] =
             { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
                              0x09, 0x04 };
 
-#ifdef WOLFSSL_SMALL_STACK
-    ESD* esd = NULL;
-#else
-    ESD stack_esd;
-    ESD* esd = &stack_esd;
-#endif
+    /* signingTime OID () */
+    byte signingTimeOid[] =
+            { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
+                             0x09, 0x05};
+
+    Pkcs7Cert* certPtr = NULL;
+    word32 certSetSz = 0;
 
     word32 signerInfoSz = 0;
-    word32 totalSz = 0;
+    word32 totalSz, total2Sz;
     int idx = 0, ret = 0;
-    int digEncAlgoId, digEncAlgoType, hashSz;
+    int digEncAlgoId, digEncAlgoType;
     byte* flatSignedAttribs = NULL;
     word32 flatSignedAttribsSz = 0;
-    word32 innerOidSz = sizeof(innerOid);
-    word32 outerOidSz = sizeof(outerOid);
-
-    if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
+
+    byte signedDataOid[MAX_OID_SZ];
+    word32 signedDataOidSz;
+
+    byte signingTime[MAX_TIME_STRING_SZ];
+
+    if (pkcs7 == NULL || pkcs7->contentSz == 0 ||
         pkcs7->encryptOID == 0 || pkcs7->hashOID == 0 || pkcs7->rng == 0 ||
-        pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0 ||
-        output == NULL || outputSz == 0) {
-        return BAD_FUNC_ARG;
-    }
-
+        output == NULL || outputSz == NULL || *outputSz == 0 || hashSz == 0 ||
+        hashBuf == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* verify the hash size matches */
 #ifdef WOLFSSL_SMALL_STACK
     esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
     if (esd == NULL)
@@ -1079,168 +2137,259 @@
 
     XMEMSET(esd, 0, sizeof(ESD));
 
-    esd->hashType = wc_OidGetHash(pkcs7->hashOID);
-    ret = wc_HashGetDigestSize(esd->hashType);
-    if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-    hashSz = ret;
-
-    ret = wc_HashInit(&esd->hash, esd->hashType);
-    if (ret != 0) {
+    /* set content type based on contentOID, unless user has set custom one
+       with wc_PKCS7_SetContentType() */
+    if (pkcs7->contentTypeSz == 0) {
+
+        /* default to DATA content type if user has not set */
+        if (pkcs7->contentOID == 0) {
+            pkcs7->contentOID = DATA;
+        }
+
+        ret = wc_SetContentType(pkcs7->contentOID, pkcs7->contentType,
+                                sizeof(pkcs7->contentType));
+        if (ret < 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-        return ret;
-    }
-
-    if (pkcs7->contentSz != 0)
-    {
-        ret = wc_HashUpdate(&esd->hash, esd->hashType,
-                            pkcs7->content, pkcs7->contentSz);
-        if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-        }
-        esd->contentDigest[0] = ASN_OCTET_STRING;
-        esd->contentDigest[1] = (byte)hashSz;
-        ret = wc_HashFinal(&esd->hash, esd->hashType,
-                           &esd->contentDigest[2]);
-        if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-        }
-    }
-
-    esd->innerOctetsSz = SetOctetString(pkcs7->contentSz, esd->innerOctets);
-    esd->innerContSeqSz = SetExplicit(0, esd->innerOctetsSz + pkcs7->contentSz,
-                                esd->innerContSeq);
-    esd->contentInfoSeqSz = SetSequence(pkcs7->contentSz + esd->innerOctetsSz +
-                                    innerOidSz + esd->innerContSeqSz,
-                                    esd->contentInfoSeq);
-
-    esd->issuerSnSz = SetSerialNumber(pkcs7->issuerSn, pkcs7->issuerSnSz,
-                                     esd->issuerSn, MAX_SN_SZ);
-    signerInfoSz += esd->issuerSnSz;
-    esd->issuerNameSz = SetSequence(pkcs7->issuerSz, esd->issuerName);
-    signerInfoSz += esd->issuerNameSz + pkcs7->issuerSz;
-    esd->issuerSnSeqSz = SetSequence(signerInfoSz, esd->issuerSnSeq);
-    signerInfoSz += esd->issuerSnSeqSz;
-    esd->signerVersionSz = SetMyVersion(1, esd->signerVersion, 0);
-    signerInfoSz += esd->signerVersionSz;
-    esd->signerDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->signerDigAlgoId,
-                                      oidHashType, 0);
-    signerInfoSz += esd->signerDigAlgoIdSz;
-
-    /* set signatureAlgorithm */
-    ret = wc_PKCS7_SignedDataGetEncAlgoId(pkcs7, &digEncAlgoId,
-                                          &digEncAlgoType);
+            return ret;
+        }
+        pkcs7->contentTypeSz = ret;
+    }
+
+    /* set signedData outer content type */
+    ret = wc_SetContentType(SIGNED_DATA, signedDataOid, sizeof(signedDataOid));
     if (ret < 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
         return ret;
     }
-    esd->digEncAlgoIdSz = SetAlgoID(digEncAlgoId, esd->digEncAlgoId,
-                                    digEncAlgoType, 0);
-    signerInfoSz += esd->digEncAlgoIdSz;
-
-    if (pkcs7->signedAttribsSz != 0) {
-
-        /* build up signed attributes */
-        ret = wc_PKCS7_BuildSignedAttributes(pkcs7, esd,
-                                    contentTypeOid, sizeof(contentTypeOid),
-                                    contentType, sizeof(contentType),
-                                    messageDigestOid, sizeof(messageDigestOid));
+    signedDataOidSz = ret;
+
+    if (pkcs7->sidType != DEGENERATE_SID) {
+        esd->hashType = wc_OidGetHash(pkcs7->hashOID);
+        if (wc_HashGetDigestSize(esd->hashType) != (int)hashSz) {
+            WOLFSSL_MSG("hashSz did not match hashOID");
+    #ifdef WOLFSSL_SMALL_STACK
+            XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+            return BUFFER_E;
+        }
+
+        /* include hash */
+        esd->contentDigest[0] = ASN_OCTET_STRING;
+        esd->contentDigest[1] = (byte)hashSz;
+        XMEMCPY(&esd->contentDigest[2], hashBuf, hashSz);
+    }
+
+    if (pkcs7->detached == 1) {
+        /* do not include content if generating detached signature */
+        esd->innerOctetsSz = 0;
+        esd->innerContSeqSz = 0;
+        esd->contentInfoSeqSz = SetSequence(pkcs7->contentTypeSz,
+                                            esd->contentInfoSeq);
+    } else {
+        esd->innerOctetsSz = SetOctetString(pkcs7->contentSz, esd->innerOctets);
+        esd->innerContSeqSz = SetExplicit(0, esd->innerOctetsSz +
+                                    pkcs7->contentSz, esd->innerContSeq);
+        esd->contentInfoSeqSz = SetSequence(pkcs7->contentSz +
+                                    esd->innerOctetsSz + pkcs7->contentTypeSz +
+                                    esd->innerContSeqSz, esd->contentInfoSeq);
+    }
+
+    /* SignerIdentifier */
+    if (pkcs7->sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+        /* IssuerAndSerialNumber */
+        esd->issuerSnSz = SetSerialNumber(pkcs7->issuerSn, pkcs7->issuerSnSz,
+                                          esd->issuerSn, MAX_SN_SZ, MAX_SN_SZ);
+        signerInfoSz += esd->issuerSnSz;
+        esd->issuerNameSz = SetSequence(pkcs7->issuerSz, esd->issuerName);
+        signerInfoSz += esd->issuerNameSz + pkcs7->issuerSz;
+        esd->issuerSnSeqSz = SetSequence(signerInfoSz, esd->issuerSnSeq);
+        signerInfoSz += esd->issuerSnSeqSz;
+
+        if (pkcs7->version == 3) {
+            /* RFC 4108 version MUST be 3 for firmware package signer */
+            esd->signerVersionSz = SetMyVersion(3, esd->signerVersion, 0);
+        }
+        else {
+            /* version MUST be 1 otherwise*/
+            esd->signerVersionSz = SetMyVersion(1, esd->signerVersion, 0);
+        }
+
+    } else if (pkcs7->sidType == CMS_SKID) {
+        /* SubjectKeyIdentifier */
+        esd->issuerSKIDSz = SetOctetString(KEYID_SIZE, esd->issuerSKID);
+        esd->issuerSKIDSeqSz = SetExplicit(0, esd->issuerSKIDSz + KEYID_SIZE,
+                                           esd->issuerSKIDSeq);
+        signerInfoSz += (esd->issuerSKIDSz + esd->issuerSKIDSeqSz +
+                         KEYID_SIZE);
+
+        /* version MUST be 3 */
+        esd->signerVersionSz = SetMyVersion(3, esd->signerVersion, 0);
+    } else if (pkcs7->sidType == DEGENERATE_SID) {
+        /* no signer info added */
+    } else {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return SKID_E;
+    }
+
+    if (pkcs7->sidType != DEGENERATE_SID) {
+        signerInfoSz += esd->signerVersionSz;
+        esd->signerDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->signerDigAlgoId,
+                                          oidHashType, 0);
+        signerInfoSz += esd->signerDigAlgoIdSz;
+
+        /* set signatureAlgorithm */
+        ret = wc_PKCS7_SignedDataGetEncAlgoId(pkcs7, &digEncAlgoId,
+                                              &digEncAlgoType);
         if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
+    #ifdef WOLFSSL_SMALL_STACK
+            XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+            return ret;
+        }
+        esd->digEncAlgoIdSz = SetAlgoID(digEncAlgoId, esd->digEncAlgoId,
+                                        digEncAlgoType, 0);
+        signerInfoSz += esd->digEncAlgoIdSz;
+
+        /* build up signed attributes, include contentType, signingTime, and
+           messageDigest by default */
+        ret = wc_PKCS7_BuildSignedAttributes(pkcs7, esd, pkcs7->contentType,
+                                     pkcs7->contentTypeSz,
+                                     contentTypeOid, sizeof(contentTypeOid),
+                                     messageDigestOid, sizeof(messageDigestOid),
+                                     signingTimeOid, sizeof(signingTimeOid),
+                                     signingTime, sizeof(signingTime));
+        if (ret < 0) {
+        #ifdef WOLFSSL_SMALL_STACK
             XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return MEMORY_E;
-        }
-
-        flatSignedAttribs = (byte*)XMALLOC(esd->signedAttribsSz, pkcs7->heap,
-                                                         DYNAMIC_TYPE_PKCS7);
-        flatSignedAttribsSz = esd->signedAttribsSz;
-        if (flatSignedAttribs == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
+        #endif
+            return ret;
+        }
+
+        if (esd->signedAttribsSz > 0) {
+            flatSignedAttribs = (byte*)XMALLOC(esd->signedAttribsSz, pkcs7->heap,
+                                                             DYNAMIC_TYPE_PKCS7);
+            flatSignedAttribsSz = esd->signedAttribsSz;
+            if (flatSignedAttribs == NULL) {
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+                return MEMORY_E;
+            }
+
+            FlattenAttributes(pkcs7, flatSignedAttribs,
+                                       esd->signedAttribs, esd->signedAttribsCount);
+            esd->signedAttribSetSz = SetImplicit(ASN_SET, 0, esd->signedAttribsSz,
+                                                              esd->signedAttribSet);
+        } else {
+            esd->signedAttribSetSz = 0;
+        }
+
+        /* Calculate the final hash and encrypt it. */
+        ret = wc_PKCS7_SignedDataBuildSignature(pkcs7, flatSignedAttribs,
+                                                flatSignedAttribsSz, esd);
+        if (ret < 0) {
+            if (pkcs7->signedAttribsSz != 0)
+                XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        #ifdef WOLFSSL_SMALL_STACK
             XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return MEMORY_E;
-        }
-
-        FlattenAttributes(flatSignedAttribs,
-                                   esd->signedAttribs, esd->signedAttribsCount);
-        esd->signedAttribSetSz = SetImplicit(ASN_SET, 0, esd->signedAttribsSz,
-                                                          esd->signedAttribSet);
-    }
-
-    /* Calculate the final hash and encrypt it. */
-    ret = wc_PKCS7_SignedDataBuildSignature(pkcs7, flatSignedAttribs,
-                                            flatSignedAttribsSz, esd);
-    if (ret < 0) {
+        #endif
+            return ret;
+        }
+
+        signerInfoSz += flatSignedAttribsSz + esd->signedAttribSetSz;
+
+        esd->signerDigestSz = SetOctetString(esd->encContentDigestSz,
+                                                                 esd->signerDigest);
+        signerInfoSz += esd->signerDigestSz + esd->encContentDigestSz;
+
+        esd->signerInfoSeqSz = SetSequence(signerInfoSz, esd->signerInfoSeq);
+        signerInfoSz += esd->signerInfoSeqSz;
+    }
+    esd->signerInfoSetSz = SetSet(signerInfoSz, esd->signerInfoSet);
+    signerInfoSz += esd->signerInfoSetSz;
+
+    /* certificates [0] IMPLICIT CertificateSet */
+    /* get total certificates size */
+    certPtr = pkcs7->certList;
+    while (certPtr != NULL) {
+        certSetSz += certPtr->derSz;
+        certPtr = certPtr->next;
+    }
+    certPtr = NULL;
+
+    if (certSetSz > 0)
+        esd->certsSetSz = SetImplicit(ASN_SET, 0, certSetSz, esd->certsSet);
+
+    if (pkcs7->sidType != DEGENERATE_SID) {
+        esd->singleDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->singleDigAlgoId,
+                                      oidHashType, 0);
+    }
+    esd->digAlgoIdSetSz = SetSet(esd->singleDigAlgoIdSz, esd->digAlgoIdSet);
+
+    if (pkcs7->version == 3) {
+        /* RFC 4108 version MUST be 3 for firmware package signer */
+        esd->versionSz = SetMyVersion(3, esd->version, 0);
+    }
+    else {
+        esd->versionSz = SetMyVersion(1, esd->version, 0);
+    }
+
+    totalSz = esd->versionSz + esd->singleDigAlgoIdSz + esd->digAlgoIdSetSz +
+              esd->contentInfoSeqSz + pkcs7->contentTypeSz +
+              esd->innerContSeqSz + esd->innerOctetsSz + pkcs7->contentSz;
+    total2Sz = esd->certsSetSz + certSetSz + signerInfoSz;
+
+    if (pkcs7->detached) {
+        totalSz -= pkcs7->contentSz;
+    }
+
+    esd->innerSeqSz = SetSequence(totalSz + total2Sz, esd->innerSeq);
+    totalSz += esd->innerSeqSz;
+    esd->outerContentSz = SetExplicit(0, totalSz + total2Sz, esd->outerContent);
+    totalSz += esd->outerContentSz + signedDataOidSz;
+    esd->outerSeqSz = SetSequence(totalSz + total2Sz, esd->outerSeq);
+    totalSz += esd->outerSeqSz;
+
+    /* if using header/footer, we are not returning the content */
+    if (output2 && output2Sz) {
+        if (total2Sz > *output2Sz) {
+            if (pkcs7->signedAttribsSz != 0)
+                XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            return BUFFER_E;
+        }
+
+        if (!pkcs7->detached) {
+            totalSz -= pkcs7->contentSz;
+        }
+    }
+    else {
+        /* if using single output buffer include content and footer */
+        totalSz += total2Sz;
+    }
+
+    if (totalSz > *outputSz) {
         if (pkcs7->signedAttribsSz != 0)
             XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
+    #ifdef WOLFSSL_SMALL_STACK
         XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-
-    signerInfoSz += flatSignedAttribsSz + esd->signedAttribSetSz;
-
-    esd->signerDigestSz = SetOctetString(esd->encContentDigestSz,
-                                                             esd->signerDigest);
-    signerInfoSz += esd->signerDigestSz + esd->encContentDigestSz;
-
-    esd->signerInfoSeqSz = SetSequence(signerInfoSz, esd->signerInfoSeq);
-    signerInfoSz += esd->signerInfoSeqSz;
-    esd->signerInfoSetSz = SetSet(signerInfoSz, esd->signerInfoSet);
-    signerInfoSz += esd->signerInfoSetSz;
-
-    esd->certsSetSz = SetImplicit(ASN_SET, 0, pkcs7->singleCertSz,
-                                                                 esd->certsSet);
-
-    esd->singleDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->singleDigAlgoId,
-                                      oidHashType, 0);
-    esd->digAlgoIdSetSz = SetSet(esd->singleDigAlgoIdSz, esd->digAlgoIdSet);
-
-
-    esd->versionSz = SetMyVersion(1, esd->version, 0);
-
-    totalSz = esd->versionSz + esd->singleDigAlgoIdSz + esd->digAlgoIdSetSz +
-              esd->contentInfoSeqSz + esd->certsSetSz + pkcs7->singleCertSz +
-              esd->innerOctetsSz + esd->innerContSeqSz +
-              innerOidSz + pkcs7->contentSz +
-              signerInfoSz;
-    esd->innerSeqSz = SetSequence(totalSz, esd->innerSeq);
-    totalSz += esd->innerSeqSz;
-    esd->outerContentSz = SetExplicit(0, totalSz, esd->outerContent);
-    totalSz += esd->outerContentSz + outerOidSz;
-    esd->outerSeqSz = SetSequence(totalSz, esd->outerSeq);
-    totalSz += esd->outerSeqSz;
-
-    if (outputSz < totalSz) {
-        if (pkcs7->signedAttribsSz != 0)
-            XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    #endif
         return BUFFER_E;
     }
 
     idx = 0;
     XMEMCPY(output + idx, esd->outerSeq, esd->outerSeqSz);
     idx += esd->outerSeqSz;
-    XMEMCPY(output + idx, outerOid, outerOidSz);
-    idx += outerOidSz;
+    XMEMCPY(output + idx, signedDataOid, signedDataOidSz);
+    idx += signedDataOidSz;
     XMEMCPY(output + idx, esd->outerContent, esd->outerContentSz);
     idx += esd->outerContentSz;
     XMEMCPY(output + idx, esd->innerSeq, esd->innerSeqSz);
@@ -1253,74 +2402,685 @@
     idx += esd->singleDigAlgoIdSz;
     XMEMCPY(output + idx, esd->contentInfoSeq, esd->contentInfoSeqSz);
     idx += esd->contentInfoSeqSz;
-    XMEMCPY(output + idx, innerOid, innerOidSz);
-    idx += innerOidSz;
+    XMEMCPY(output + idx, pkcs7->contentType, pkcs7->contentTypeSz);
+    idx += pkcs7->contentTypeSz;
     XMEMCPY(output + idx, esd->innerContSeq, esd->innerContSeqSz);
     idx += esd->innerContSeqSz;
     XMEMCPY(output + idx, esd->innerOctets, esd->innerOctetsSz);
     idx += esd->innerOctetsSz;
-    XMEMCPY(output + idx, pkcs7->content, pkcs7->contentSz);
-    idx += pkcs7->contentSz;
-    XMEMCPY(output + idx, esd->certsSet, esd->certsSetSz);
+
+    /* support returning header and footer without content */
+    if (output2 && output2Sz) {
+        *outputSz = idx;
+        idx = 0;
+    }
+    else {
+        if (!pkcs7->detached) {
+            XMEMCPY(output + idx, pkcs7->content, pkcs7->contentSz);
+            idx += pkcs7->contentSz;
+        }
+        output2 = output;
+    }
+
+    /* certificates */
+    XMEMCPY(output2 + idx, esd->certsSet, esd->certsSetSz);
     idx += esd->certsSetSz;
-    XMEMCPY(output + idx, pkcs7->singleCert, pkcs7->singleCertSz);
-    idx += pkcs7->singleCertSz;
-    XMEMCPY(output + idx, esd->signerInfoSet, esd->signerInfoSetSz);
+    certPtr = pkcs7->certList;
+    while (certPtr != NULL) {
+        XMEMCPY(output2 + idx, certPtr->der, certPtr->derSz);
+        idx += certPtr->derSz;
+        certPtr = certPtr->next;
+    }
+    wc_PKCS7_FreeCertSet(pkcs7);
+
+    XMEMCPY(output2 + idx, esd->signerInfoSet, esd->signerInfoSetSz);
     idx += esd->signerInfoSetSz;
-    XMEMCPY(output + idx, esd->signerInfoSeq, esd->signerInfoSeqSz);
+    XMEMCPY(output2 + idx, esd->signerInfoSeq, esd->signerInfoSeqSz);
     idx += esd->signerInfoSeqSz;
-    XMEMCPY(output + idx, esd->signerVersion, esd->signerVersionSz);
+    XMEMCPY(output2 + idx, esd->signerVersion, esd->signerVersionSz);
     idx += esd->signerVersionSz;
-    XMEMCPY(output + idx, esd->issuerSnSeq, esd->issuerSnSeqSz);
-    idx += esd->issuerSnSeqSz;
-    XMEMCPY(output + idx, esd->issuerName, esd->issuerNameSz);
-    idx += esd->issuerNameSz;
-    XMEMCPY(output + idx, pkcs7->issuer, pkcs7->issuerSz);
-    idx += pkcs7->issuerSz;
-    XMEMCPY(output + idx, esd->issuerSn, esd->issuerSnSz);
-    idx += esd->issuerSnSz;
-    XMEMCPY(output + idx, esd->signerDigAlgoId, esd->signerDigAlgoIdSz);
+    /* SignerIdentifier */
+    if (pkcs7->sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+        /* IssuerAndSerialNumber */
+        XMEMCPY(output2 + idx, esd->issuerSnSeq, esd->issuerSnSeqSz);
+        idx += esd->issuerSnSeqSz;
+        XMEMCPY(output2 + idx, esd->issuerName, esd->issuerNameSz);
+        idx += esd->issuerNameSz;
+        XMEMCPY(output2 + idx, pkcs7->issuer, pkcs7->issuerSz);
+        idx += pkcs7->issuerSz;
+        XMEMCPY(output2 + idx, esd->issuerSn, esd->issuerSnSz);
+        idx += esd->issuerSnSz;
+    } else if (pkcs7->sidType == CMS_SKID) {
+        /* SubjectKeyIdentifier */
+        XMEMCPY(output2 + idx, esd->issuerSKIDSeq, esd->issuerSKIDSeqSz);
+        idx += esd->issuerSKIDSeqSz;
+        XMEMCPY(output2 + idx, esd->issuerSKID, esd->issuerSKIDSz);
+        idx += esd->issuerSKIDSz;
+        XMEMCPY(output2 + idx, pkcs7->issuerSubjKeyId, KEYID_SIZE);
+        idx += KEYID_SIZE;
+    } else if (pkcs7->sidType == DEGENERATE_SID) {
+        /* no signer infos in degenerate case */
+    } else {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+        return SKID_E;
+    }
+    XMEMCPY(output2 + idx, esd->signerDigAlgoId, esd->signerDigAlgoIdSz);
     idx += esd->signerDigAlgoIdSz;
 
     /* SignerInfo:Attributes */
     if (flatSignedAttribsSz > 0) {
-        XMEMCPY(output + idx, esd->signedAttribSet, esd->signedAttribSetSz);
+        XMEMCPY(output2 + idx, esd->signedAttribSet, esd->signedAttribSetSz);
         idx += esd->signedAttribSetSz;
-        XMEMCPY(output + idx, flatSignedAttribs, flatSignedAttribsSz);
+        XMEMCPY(output2 + idx, flatSignedAttribs, flatSignedAttribsSz);
         idx += flatSignedAttribsSz;
         XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
     }
 
-    XMEMCPY(output + idx, esd->digEncAlgoId, esd->digEncAlgoIdSz);
+    XMEMCPY(output2 + idx, esd->digEncAlgoId, esd->digEncAlgoIdSz);
     idx += esd->digEncAlgoIdSz;
-    XMEMCPY(output + idx, esd->signerDigest, esd->signerDigestSz);
+    XMEMCPY(output2 + idx, esd->signerDigest, esd->signerDigestSz);
     idx += esd->signerDigestSz;
-    XMEMCPY(output + idx, esd->encContentDigest, esd->encContentDigestSz);
+    XMEMCPY(output2 + idx, esd->encContentDigest, esd->encContentDigestSz);
     idx += esd->encContentDigestSz;
 
+    if (output2 && output2Sz) {
+        *output2Sz = idx;
+        idx = 0; /* success */
+    }
+    else {
+        *outputSz = idx;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+    return idx;
+}
+
+/* hashBuf: The computed digest for the pkcs7->content
+ * hashSz: The size of computed digest for the pkcs7->content based on hashOID
+ * outputHead: The PKCS7 header that goes on top of the raw data signed.
+ * outputFoot: The PKCS7 footer that goes at the end of the raw data signed.
+ * pkcs7->content: Not used
+ * pkcs7->contentSz: Must be provided as actual sign of raw data
+ * return codes: 0=success, negative=error
+ */
+int wc_PKCS7_EncodeSignedData_ex(PKCS7* pkcs7, const byte* hashBuf,
+    word32 hashSz, byte* outputHead, word32* outputHeadSz, byte* outputFoot,
+    word32* outputFootSz)
+{
+    int ret;
+#ifdef WOLFSSL_SMALL_STACK
+    ESD* esd;
+#else
+    ESD  esd[1];
+#endif
+
+    /* other args checked in wc_PKCS7_EncodeSigned_ex */
+    if (pkcs7 == NULL || outputFoot == NULL || outputFootSz == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (esd == NULL)
+        return MEMORY_E;
+#endif
+
+    XMEMSET(esd, 0, sizeof(ESD));
+
+    ret = PKCS7_EncodeSigned(pkcs7, esd, hashBuf, hashSz,
+        outputHead, outputHeadSz, outputFoot, outputFootSz);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    return ret;
+}
+
+/* Toggle detached signature mode on/off for PKCS#7/CMS SignedData content type.
+ * By default wolfCrypt includes the data to be signed in the SignedData
+ * bundle. This data can be omitted in the case when a detached signature is
+ * being created. To enable generation of detached signatures, set flag to "1",
+ * otherwise set to "0":
+ *
+ *     flag 1 turns on support
+ *     flag 0 turns off support
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * flag  - turn on/off detached signature generation (1 or 0)
+ *
+ * Returns 0 on success, negative upon error. */
+int wc_PKCS7_SetDetached(PKCS7* pkcs7, word16 flag)
+{
+    if (pkcs7 == NULL || (flag != 0 && flag != 1))
+        return BAD_FUNC_ARG;
+
+    pkcs7->detached = flag;
+
+    return 0;
+}
+
+/* By default, SignedData bundles have the following signed attributes attached:
+ *     contentType (1.2.840.113549.1.9.3)
+ *     signgingTime (1.2.840.113549.1.9.5)
+ *     messageDigest (1.2.840.113549.1.9.4)
+ *
+ * Calling this API before wc_PKCS7_EncodeSignedData() will disable the
+ * inclusion of those attributes.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ *
+ * Returns 0 on success, negative upon error. */
+int wc_PKCS7_NoDefaultSignedAttribs(PKCS7* pkcs7)
+{
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    pkcs7->skipDefaultSignedAttribs = 1;
+
+    return 0;
+}
+
+/* return codes: >0: Size of signed PKCS7 output buffer, negative: error */
+int wc_PKCS7_EncodeSignedData(PKCS7* pkcs7, byte* output, word32 outputSz)
+{
+    int ret;
+    int hashSz;
+    enum wc_HashType hashType;
+    byte hashBuf[WC_MAX_DIGEST_SIZE];
+#ifdef WOLFSSL_SMALL_STACK
+    ESD* esd;
+#else
+    ESD  esd[1];
+#endif
+
+    /* other args checked in wc_PKCS7_EncodeSigned_ex */
+    if (pkcs7 == NULL || pkcs7->contentSz == 0 || pkcs7->content == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* get hash type and size, validate hashOID */
+    hashType = wc_OidGetHash(pkcs7->hashOID);
+    hashSz = wc_HashGetDigestSize(hashType);
+    if (hashSz < 0)
+        return hashSz;
+
+#ifdef WOLFSSL_SMALL_STACK
+    esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (esd == NULL)
+        return MEMORY_E;
+#endif
+
+    XMEMSET(esd, 0, sizeof(ESD));
+    esd->hashType = hashType;
+
+    /* calculate hash for content */
+    ret = wc_HashInit(&esd->hash, esd->hashType);
+    if (ret == 0) {
+        ret = wc_HashUpdate(&esd->hash, esd->hashType,
+                            pkcs7->content, pkcs7->contentSz);
+        if (ret == 0) {
+            ret = wc_HashFinal(&esd->hash, esd->hashType, hashBuf);
+        }
+        wc_HashFree(&esd->hash, esd->hashType);
+    }
+
+    if (ret == 0) {
+        ret = PKCS7_EncodeSigned(pkcs7, esd, hashBuf, hashSz,
+            output, &outputSz, NULL, NULL);
+    }
+
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
-    return idx;
-}
+    return ret;
+}
+
+
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * content of type FirmwarePkgData. Any recipient certificates should be
+ * loaded into the PKCS7 structure prior to calling this function, using
+ * wc_PKCS7_InitWithCert() and/or wc_PKCS7_AddCertificate().
+ *
+ * pkcs7                - pointer to initialized PKCS7 struct
+ * privateKey           - private RSA/ECC key, used for signing SignedData
+ * privateKeySz         - size of privateKey, octets
+ * signOID              - public key algorithm OID, used for sign operation
+ * hashOID              - hash algorithm OID, used for signature generation
+ * content              - content to be encapsulated, of type FirmwarePkgData
+ * contentSz            - size of content, octets
+ * signedAttribs        - optional signed attributes
+ * signedAttribsSz      - number of PKCS7Attrib members in signedAttribs
+ * output               - output buffer for final bundle
+ * outputSz             - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedFPD(PKCS7* pkcs7, byte* privateKey,
+                             word32 privateKeySz, int signOID, int hashOID,
+                             byte* content, word32 contentSz,
+                             PKCS7Attrib* signedAttribs, word32 signedAttribsSz,
+                             byte* output, word32 outputSz)
+{
+    int ret = 0;
+    WC_RNG rng;
+
+    if (pkcs7 == NULL || privateKey == NULL || privateKeySz == 0 ||
+        content == NULL || contentSz == 0 || output == NULL || outputSz == 0)
+        return BAD_FUNC_ARG;
+
+    ret = wc_InitRng(&rng);
+    if (ret != 0)
+        return ret;
+
+    pkcs7->rng = &rng;
+    pkcs7->content = content;
+    pkcs7->contentSz = contentSz;
+    pkcs7->contentOID = FIRMWARE_PKG_DATA;
+    pkcs7->hashOID = hashOID;
+    pkcs7->encryptOID = signOID;
+    pkcs7->privateKey = privateKey;
+    pkcs7->privateKeySz = privateKeySz;
+    pkcs7->signedAttribs = signedAttribs;
+    pkcs7->signedAttribsSz = signedAttribsSz;
+    pkcs7->version = 3;
+
+    ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+    if (ret <= 0) {
+        WOLFSSL_MSG("Error encoding CMS SignedData content type");
+    }
+
+    pkcs7->rng = NULL;
+    wc_FreeRng(&rng);
+
+    return ret;
+}
+
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * CMS EncryptedData bundle. Content of inner EncryptedData is set to that
+ * of FirmwarePkgData. Any recipient certificates should be loaded into the
+ * PKCS7 structure prior to calling this function, using wc_PKCS7_InitWithCert()
+ * and/or wc_PKCS7_AddCertificate().
+ *
+ * pkcs7                - pointer to initialized PKCS7 struct
+ * encryptKey           - encryption key used for encrypting EncryptedData
+ * encryptKeySz         - size of encryptKey, octets
+ * privateKey           - private RSA/ECC key, used for signing SignedData
+ * privateKeySz         - size of privateKey, octets
+ * encryptOID           - encryption algorithm OID, to be used as encryption
+ *                        algorithm for EncryptedData
+ * signOID              - public key algorithm OID, to be used for sign
+ *                        operation in SignedData generation
+ * hashOID              - hash algorithm OID, to be used for signature in
+ *                        SignedData generation
+ * content              - content to be encapsulated
+ * contentSz            - size of content, octets
+ * unprotectedAttribs   - optional unprotected attributes, for EncryptedData
+ * unprotectedAttribsSz - number of PKCS7Attrib members in unprotectedAttribs
+ * signedAttribs        - optional signed attributes, for SignedData
+ * signedAttribsSz      - number of PKCS7Attrib members in signedAttribs
+ * output               - output buffer for final bundle
+ * outputSz             - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedEncryptedFPD(PKCS7* pkcs7, byte* encryptKey,
+                                      word32 encryptKeySz, byte* privateKey,
+                                      word32 privateKeySz, int encryptOID,
+                                      int signOID, int hashOID,
+                                      byte* content, word32 contentSz,
+                                      PKCS7Attrib* unprotectedAttribs,
+                                      word32 unprotectedAttribsSz,
+                                      PKCS7Attrib* signedAttribs,
+                                      word32 signedAttribsSz,
+                                      byte* output, word32 outputSz)
+{
+    int ret = 0, encryptedSz = 0;
+    byte* encrypted = NULL;
+    WC_RNG rng;
+
+    if (pkcs7 == NULL || encryptKey == NULL || encryptKeySz == 0 ||
+        privateKey == NULL || privateKeySz == 0 || content == NULL ||
+        contentSz == 0 || output == NULL || outputSz == 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* 1: build up EncryptedData using FirmwarePkgData type, use output
+     *    buffer as tmp for storage and to get size */
+
+    /* set struct elements, inner content type is FirmwarePkgData */
+    pkcs7->content = content;
+    pkcs7->contentSz = contentSz;
+    pkcs7->contentOID = FIRMWARE_PKG_DATA;
+    pkcs7->encryptOID = encryptOID;
+    pkcs7->encryptionKey = encryptKey;
+    pkcs7->encryptionKeySz = encryptKeySz;
+    pkcs7->unprotectedAttribs = unprotectedAttribs;
+    pkcs7->unprotectedAttribsSz = unprotectedAttribsSz;
+    pkcs7->version = 3;
+
+    encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, output, outputSz);
+    if (encryptedSz < 0) {
+        WOLFSSL_MSG("Error encoding CMS EncryptedData content type");
+        return encryptedSz;
+    }
+
+    /* save encryptedData, reset output buffer and struct */
+    encrypted = (byte*)XMALLOC(encryptedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (encrypted == NULL) {
+        ForceZero(output, outputSz);
+        return MEMORY_E;
+    }
+
+    XMEMCPY(encrypted, output, encryptedSz);
+    ForceZero(output, outputSz);
+
+    ret = wc_InitRng(&rng);
+    if (ret != 0) {
+        ForceZero(encrypted, encryptedSz);
+        XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* 2: build up SignedData, encapsulating EncryptedData */
+    pkcs7->rng = &rng;
+    pkcs7->content = encrypted;
+    pkcs7->contentSz = encryptedSz;
+    pkcs7->contentOID = ENCRYPTED_DATA;
+    pkcs7->hashOID = hashOID;
+    pkcs7->encryptOID = signOID;
+    pkcs7->privateKey = privateKey;
+    pkcs7->privateKeySz = privateKeySz;
+    pkcs7->signedAttribs = signedAttribs;
+    pkcs7->signedAttribsSz = signedAttribsSz;
+
+    ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+    if (ret <= 0) {
+        WOLFSSL_MSG("Error encoding CMS SignedData content type");
+    }
+
+    ForceZero(encrypted, encryptedSz);
+    XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    pkcs7->rng = NULL;
+    wc_FreeRng(&rng);
+
+    return ret;
+}
+
+#endif /* NO_PKCS7_ENCRYPTED_DATA */
+
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * CMS CompressedData bundle. Content of inner CompressedData is set to that
+ * of FirmwarePkgData. Any recipient certificates should be loaded into the
+ * PKCS7 structure prior to calling this function, using wc_PKCS7_InitWithCert()
+ * and/or wc_PKCS7_AddCertificate().
+ *
+ * pkcs7                - pointer to initialized PKCS7 struct
+ * privateKey           - private RSA/ECC key, used for signing SignedData
+ * privateKeySz         - size of privateKey, octets
+ * signOID              - public key algorithm OID, to be used for sign
+ *                        operation in SignedData generation
+ * hashOID              - hash algorithm OID, to be used for signature in
+ *                        SignedData generation
+ * content              - content to be encapsulated
+ * contentSz            - size of content, octets
+ * signedAttribs        - optional signed attributes, for SignedData
+ * signedAttribsSz      - number of PKCS7Attrib members in signedAttribs
+ * output               - output buffer for final bundle
+ * outputSz             - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedCompressedFPD(PKCS7* pkcs7, byte* privateKey,
+                                       word32 privateKeySz, int signOID,
+                                       int hashOID, byte* content,
+                                       word32 contentSz,
+                                       PKCS7Attrib* signedAttribs,
+                                       word32 signedAttribsSz, byte* output,
+                                       word32 outputSz)
+{
+    int ret = 0, compressedSz = 0;
+    byte* compressed = NULL;
+    WC_RNG rng;
+
+    if (pkcs7 == NULL || privateKey == NULL || privateKeySz == 0 ||
+        content == NULL || contentSz == 0 || output == NULL || outputSz == 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* 1: build up CompressedData using FirmwarePkgData type, use output
+     *    buffer as tmp for storage and to get size */
+
+    /* set struct elements, inner content type is FirmwarePkgData */
+    pkcs7->content = content;
+    pkcs7->contentSz = contentSz;
+    pkcs7->contentOID = FIRMWARE_PKG_DATA;
+    pkcs7->version = 3;
+
+    compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, output, outputSz);
+    if (compressedSz < 0) {
+        WOLFSSL_MSG("Error encoding CMS CompressedData content type");
+        return compressedSz;
+    }
+
+    /* save compressedData, reset output buffer and struct */
+    compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (compressed == NULL) {
+        ForceZero(output, outputSz);
+        return MEMORY_E;
+    }
+
+    XMEMCPY(compressed, output, compressedSz);
+    ForceZero(output, outputSz);
+
+    ret = wc_InitRng(&rng);
+    if (ret != 0) {
+        ForceZero(compressed, compressedSz);
+        XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* 2: build up SignedData, encapsulating EncryptedData */
+    pkcs7->rng = &rng;
+    pkcs7->content = compressed;
+    pkcs7->contentSz = compressedSz;
+    pkcs7->contentOID = COMPRESSED_DATA;
+    pkcs7->hashOID = hashOID;
+    pkcs7->encryptOID = signOID;
+    pkcs7->privateKey = privateKey;
+    pkcs7->privateKeySz = privateKeySz;
+    pkcs7->signedAttribs = signedAttribs;
+    pkcs7->signedAttribsSz = signedAttribsSz;
+
+    ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+    if (ret <= 0) {
+        WOLFSSL_MSG("Error encoding CMS SignedData content type");
+    }
+
+    ForceZero(compressed, compressedSz);
+    XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    pkcs7->rng = NULL;
+    wc_FreeRng(&rng);
+
+    return ret;
+}
+
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * CMS EncryptedData bundle, which then encapsulates a CMS CompressedData
+ * bundle. Content of inner CompressedData is set to that of FirmwarePkgData.
+ * Any recipient certificates should be loaded into the PKCS7 structure prior
+ * to calling this function, using wc_PKCS7_InitWithCert() and/or
+ * wc_PKCS7_AddCertificate().
+ *
+ * pkcs7                - pointer to initialized PKCS7 struct
+ * encryptKey           - encryption key used for encrypting EncryptedData
+ * encryptKeySz         - size of encryptKey, octets
+ * privateKey           - private RSA/ECC key, used for signing SignedData
+ * privateKeySz         - size of privateKey, octets
+ * encryptOID           - encryption algorithm OID, to be used as encryption
+ *                        algorithm for EncryptedData
+ * signOID              - public key algorithm OID, to be used for sign
+ *                        operation in SignedData generation
+ * hashOID              - hash algorithm OID, to be used for signature in
+ *                        SignedData generation
+ * content              - content to be encapsulated
+ * contentSz            - size of content, octets
+ * unprotectedAttribs   - optional unprotected attributes, for EncryptedData
+ * unprotectedAttribsSz - number of PKCS7Attrib members in unprotectedAttribs
+ * signedAttribs        - optional signed attributes, for SignedData
+ * signedAttribsSz      - number of PKCS7Attrib members in signedAttribs
+ * output               - output buffer for final bundle
+ * outputSz             - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int  wc_PKCS7_EncodeSignedEncryptedCompressedFPD(PKCS7* pkcs7, byte* encryptKey,
+                                       word32 encryptKeySz, byte* privateKey,
+                                       word32 privateKeySz, int encryptOID,
+                                       int signOID, int hashOID, byte* content,
+                                       word32 contentSz,
+                                       PKCS7Attrib* unprotectedAttribs,
+                                       word32 unprotectedAttribsSz,
+                                       PKCS7Attrib* signedAttribs,
+                                       word32 signedAttribsSz,
+                                       byte* output, word32 outputSz)
+{
+    int ret = 0, compressedSz = 0, encryptedSz = 0;
+    byte* compressed = NULL;
+    byte* encrypted = NULL;
+    WC_RNG rng;
+
+    if (pkcs7 == NULL || encryptKey == NULL || encryptKeySz == 0 ||
+        privateKey == NULL || privateKeySz == 0 || content == NULL ||
+        contentSz == 0 || output == NULL || outputSz == 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* 1: build up CompressedData using FirmwarePkgData type, use output
+     *    buffer as tmp for storage and to get size */
+    pkcs7->content = content;
+    pkcs7->contentSz = contentSz;
+    pkcs7->contentOID = FIRMWARE_PKG_DATA;
+    pkcs7->version = 3;
+
+    compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, output, outputSz);
+    if (compressedSz < 0) {
+        WOLFSSL_MSG("Error encoding CMS CompressedData content type");
+        return compressedSz;
+    }
+
+    /* save compressedData, reset output buffer and struct */
+    compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (compressed == NULL)
+        return MEMORY_E;
+
+    XMEMCPY(compressed, output, compressedSz);
+    ForceZero(output, outputSz);
+
+    /* 2: build up EncryptedData using CompressedData, use output
+     *    buffer as tmp for storage and to get size */
+    pkcs7->content = compressed;
+    pkcs7->contentSz = compressedSz;
+    pkcs7->contentOID = COMPRESSED_DATA;
+    pkcs7->encryptOID = encryptOID;
+    pkcs7->encryptionKey = encryptKey;
+    pkcs7->encryptionKeySz = encryptKeySz;
+    pkcs7->unprotectedAttribs = unprotectedAttribs;
+    pkcs7->unprotectedAttribsSz = unprotectedAttribsSz;
+
+    encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, output, outputSz);
+    if (encryptedSz < 0) {
+        WOLFSSL_MSG("Error encoding CMS EncryptedData content type");
+        XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return encryptedSz;
+    }
+
+    /* save encryptedData, reset output buffer and struct */
+    encrypted = (byte*)XMALLOC(encryptedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (encrypted == NULL) {
+        ForceZero(compressed, compressedSz);
+        XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+
+    XMEMCPY(encrypted, output, encryptedSz);
+    ForceZero(compressed, compressedSz);
+    XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    ForceZero(output, outputSz);
+
+    ret = wc_InitRng(&rng);
+    if (ret != 0) {
+        ForceZero(encrypted, encryptedSz);
+        XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* 3: build up SignedData, encapsulating EncryptedData */
+    pkcs7->rng = &rng;
+    pkcs7->content = encrypted;
+    pkcs7->contentSz = encryptedSz;
+    pkcs7->contentOID = ENCRYPTED_DATA;
+    pkcs7->hashOID = hashOID;
+    pkcs7->encryptOID = signOID;
+    pkcs7->privateKey = privateKey;
+    pkcs7->privateKeySz = privateKeySz;
+    pkcs7->signedAttribs = signedAttribs;
+    pkcs7->signedAttribsSz = signedAttribsSz;
+
+    ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+    if (ret <= 0) {
+        WOLFSSL_MSG("Error encoding CMS SignedData content type");
+    }
+
+    ForceZero(encrypted, encryptedSz);
+    XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    pkcs7->rng = NULL;
+    wc_FreeRng(&rng);
+
+    return ret;
+}
+
+#endif /* !NO_PKCS7_ENCRYPTED_DATA */
+#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
 
 
 #ifndef NO_RSA
 
+#ifdef HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK
+/* register raw RSA sign digest callback */
+int wc_PKCS7_SetRsaSignRawDigestCb(PKCS7* pkcs7, CallbackRsaSignRawDigest cb)
+{
+    if (pkcs7 == NULL || cb == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    pkcs7->rsaSignRawDigestCb = cb;
+
+    return 0;
+}
+#endif
+
 /* returns size of signature put into out, negative on error */
 static int wc_PKCS7_RsaVerify(PKCS7* pkcs7, byte* sig, int sigSz,
                               byte* hash, word32 hashSz)
 {
-    int ret = 0;
-    word32 scratch = 0;
+    int ret = 0, i;
+    word32 scratch = 0, verified = 0;
 #ifdef WOLFSSL_SMALL_STACK
     byte* digest;
     RsaKey* key;
+    DecodedCert* dCert;
 #else
     byte digest[MAX_PKCS7_DIGEST_SZ];
-    RsaKey stack_key;
-    RsaKey* key = &stack_key;
+    RsaKey key[1];
+    DecodedCert stack_dCert;
+    DecodedCert* dCert = &stack_dCert;
 #endif
 
     if (pkcs7 == NULL || sig == NULL || hash == NULL) {
@@ -1330,7 +3090,6 @@
 #ifdef WOLFSSL_SMALL_STACK
     digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
                             DYNAMIC_TYPE_TMP_BUFFER);
-
     if (digest == NULL)
         return MEMORY_E;
 
@@ -1339,41 +3098,88 @@
         XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
         return MEMORY_E;
     }
+
+    dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+                                  DYNAMIC_TYPE_DCERT);
+    if (dCert == NULL) {
+        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return MEMORY_E;
+    }
 #endif
 
     XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ);
 
-    ret = wc_InitRsaKey_ex(key, pkcs7->heap, pkcs7->devId);
-    if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-
-    if (wc_RsaPublicKeyDecode(pkcs7->publicKey, &scratch, key,
-                              pkcs7->publicKeySz) < 0) {
-        WOLFSSL_MSG("ASN RSA key decode error");
+    /* loop over certs received in certificates set, try to find one
+     * that will validate signature */
+    for (i = 0; i < MAX_PKCS7_CERTS; i++) {
+
+        verified = 0;
+        scratch  = 0;
+
+        if (pkcs7->certSz[i] == 0)
+            continue;
+
+        ret = wc_InitRsaKey_ex(key, pkcs7->heap, pkcs7->devId);
+        if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(dCert,  pkcs7->heap, DYNAMIC_TYPE_DCERT);
+#endif
+            return ret;
+        }
+
+        InitDecodedCert(dCert, pkcs7->cert[i], pkcs7->certSz[i], pkcs7->heap);
+        /* not verifying, only using this to extract public key */
+        ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0);
+        if (ret < 0) {
+            WOLFSSL_MSG("ASN RSA cert parse error");
+            FreeDecodedCert(dCert);
+            wc_FreeRsaKey(key);
+            continue;
+        }
+
+        if (wc_RsaPublicKeyDecode(dCert->publicKey, &scratch, key,
+                                  dCert->pubKeySize) < 0) {
+            WOLFSSL_MSG("ASN RSA key decode error");
+            FreeDecodedCert(dCert);
+            wc_FreeRsaKey(key);
+            continue;
+        }
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        do {
+            ret = wc_AsyncWait(ret, &key->asyncDev,
+                WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+            if (ret >= 0) {
+                ret = wc_RsaSSL_Verify(sig, sigSz, digest, MAX_PKCS7_DIGEST_SZ,
+                    key);
+            }
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        } while (ret == WC_PENDING_E);
+    #endif
+        FreeDecodedCert(dCert);
         wc_FreeRsaKey(key);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return PUBLIC_KEY_E;
-    }
-
-    ret = wc_RsaSSL_Verify(sig, sigSz, digest, MAX_PKCS7_DIGEST_SZ, key);
-
-    wc_FreeRsaKey(key);
-
-    if (((int)hashSz != ret) || (XMEMCMP(digest, hash, ret) != 0)) {
+
+        if ((ret > 0) && (hashSz == (word32)ret)) {
+            if (XMEMCMP(digest, hash, hashSz) == 0) {
+                /* found signer that successfully verified signature */
+                verified = 1;
+                break;
+            }
+        }
+    }
+
+    if (verified == 0) {
         ret = SIG_VERIFY_E;
     }
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
     XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(dCert,  pkcs7->heap, DYNAMIC_TYPE_DCERT);
 #endif
 
     return ret;
@@ -1388,15 +3194,18 @@
 static int wc_PKCS7_EcdsaVerify(PKCS7* pkcs7, byte* sig, int sigSz,
                                 byte* hash, word32 hashSz)
 {
-    int ret = 0;
+    int ret = 0, i;
     int res = 0;
+    int verified = 0;
 #ifdef WOLFSSL_SMALL_STACK
     byte* digest;
     ecc_key* key;
+    DecodedCert* dCert;
 #else
     byte digest[MAX_PKCS7_DIGEST_SZ];
-    ecc_key stack_key;
-    ecc_key* key = &stack_key;
+    ecc_key key[1];
+    DecodedCert stack_dCert;
+    DecodedCert* dCert = &stack_dCert;
 #endif
     word32 idx = 0;
 
@@ -1406,7 +3215,6 @@
 #ifdef WOLFSSL_SMALL_STACK
     digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
                             DYNAMIC_TYPE_TMP_BUFFER);
-
     if (digest == NULL)
         return MEMORY_E;
 
@@ -1415,41 +3223,85 @@
         XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
         return MEMORY_E;
     }
+
+    dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+                                  DYNAMIC_TYPE_DCERT);
+    if (dCert == NULL) {
+        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return MEMORY_E;
+    }
 #endif
 
     XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ);
 
-    ret = wc_ecc_init_ex(key, pkcs7->heap, pkcs7->devId);
-    if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-
-    if (wc_EccPublicKeyDecode(pkcs7->publicKey, &idx, key,
-                                                      pkcs7->publicKeySz) < 0) {
-        WOLFSSL_MSG("ASN ECDSA key decode error");
+    /* loop over certs received in certificates set, try to find one
+     * that will validate signature */
+    for (i = 0; i < MAX_PKCS7_CERTS; i++) {
+
+        verified = 0;
+
+        if (pkcs7->certSz[i] == 0)
+            continue;
+
+        ret = wc_ecc_init_ex(key, pkcs7->heap, pkcs7->devId);
+        if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(dCert,  pkcs7->heap, DYNAMIC_TYPE_DCERT);
+#endif
+            return ret;
+        }
+
+        InitDecodedCert(dCert, pkcs7->cert[i], pkcs7->certSz[i], pkcs7->heap);
+        /* not verifying, only using this to extract public key */
+        ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0);
+        if (ret < 0) {
+            WOLFSSL_MSG("ASN ECC cert parse error");
+            FreeDecodedCert(dCert);
+            wc_ecc_free(key);
+            continue;
+        }
+
+        if (wc_EccPublicKeyDecode(pkcs7->publicKey, &idx, key,
+                                  pkcs7->publicKeySz) < 0) {
+            WOLFSSL_MSG("ASN ECC key decode error");
+            FreeDecodedCert(dCert);
+            wc_ecc_free(key);
+            continue;
+        }
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        do {
+            ret = wc_AsyncWait(ret, &key->asyncDev,
+                WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+            if (ret >= 0) {
+                ret = wc_ecc_verify_hash(sig, sigSz, hash, hashSz, &res, key);
+            }
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        } while (ret == WC_PENDING_E);
+    #endif
+
+        FreeDecodedCert(dCert);
         wc_ecc_free(key);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return PUBLIC_KEY_E;
-    }
-
-    ret = wc_ecc_verify_hash(sig, sigSz, hash, hashSz, &res, key);
-
-    wc_ecc_free(key);
-
-    if (ret == 0 && res != 1) {
+
+        if (ret == 0 && res == 1) {
+            /* found signer that successfully verified signature */
+            verified = 1;
+            break;
+        }
+    }
+
+    if (verified == 0) {
         ret = SIG_VERIFY_E;
     }
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
     XFREE(key,    pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(dCert,  pkcs7->heap, DYNAMIC_TYPE_DCERT);
 #endif
 
     return ret;
@@ -1473,10 +3325,11 @@
 static int wc_PKCS7_BuildSignedDataDigest(PKCS7* pkcs7, byte* signedAttrib,
                                       word32 signedAttribSz, byte* pkcs7Digest,
                                       word32* pkcs7DigestSz, byte** plainDigest,
-                                      word32* plainDigestSz)
-{
-    int ret = 0, digIdx = 0, hashSz;
-    word32 attribSetSz;
+                                      word32* plainDigestSz,
+                                      const byte* hashBuf, word32 hashBufSz)
+{
+    int ret = 0, digIdx = 0;
+    word32 attribSetSz = 0, hashSz = 0;
     byte attribSet[MAX_SET_SZ];
     byte digest[WC_MAX_DIGEST_SIZE];
     byte digestInfoSeq[MAX_SEQ_SZ];
@@ -1486,17 +3339,37 @@
 #ifdef WOLFSSL_SMALL_STACK
     byte* digestInfo;
 #else
-    byte digestInfo[MAX_PKCS7_DIGEST_SZ];
+    byte  digestInfo[MAX_PKCS7_DIGEST_SZ];
 #endif
 
     wc_HashAlg hash;
     enum wc_HashType hashType;
 
+    /* check arguments */
     if (pkcs7 == NULL || pkcs7Digest == NULL ||
         pkcs7DigestSz == NULL || plainDigest == NULL) {
         return BAD_FUNC_ARG;
     }
 
+    hashType = wc_OidGetHash(pkcs7->hashOID);
+    ret = wc_HashGetDigestSize(hashType);
+    if (ret < 0)
+        return ret;
+    hashSz = ret;
+
+    if (signedAttribSz > 0) {
+        if (signedAttrib == NULL)
+            return BAD_FUNC_ARG;
+    }
+    else {
+        if (hashBuf && hashBufSz > 0) {
+            if (hashSz != hashBufSz)
+                return BAD_FUNC_ARG;
+        }
+        else if (pkcs7->content == NULL)
+            return BAD_FUNC_ARG;
+    }
+
 #ifdef WOLFSSL_SMALL_STACK
     digestInfo = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
         DYNAMIC_TYPE_TMP_BUFFER);
@@ -1508,81 +3381,40 @@
     XMEMSET(digest,      0, WC_MAX_DIGEST_SIZE);
     XMEMSET(digestInfo,  0, MAX_PKCS7_DIGEST_SZ);
 
-    hashType = wc_OidGetHash(pkcs7->hashOID);
-    ret = wc_HashGetDigestSize(hashType);
-    if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-    hashSz = ret;
 
     /* calculate digest */
-    ret = wc_HashInit(&hash, hashType);
-    if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-
-    if (signedAttribSz > 0) {
-
-        if (signedAttrib == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
+    if (hashBuf && hashBufSz > 0 && signedAttribSz == 0) {
+        XMEMCPY(digest, hashBuf, hashBufSz);
+    }
+    else {
+        ret = wc_HashInit(&hash, hashType);
+        if (ret < 0) {
+    #ifdef WOLFSSL_SMALL_STACK
             XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return BAD_FUNC_ARG;
-        }
-
-        attribSetSz = SetSet(signedAttribSz, attribSet);
-        ret = wc_HashUpdate(&hash, hashType, attribSet, attribSetSz);
-        if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return ret;
-        }
-
-        ret = wc_HashUpdate(&hash, hashType, signedAttrib, signedAttribSz);
-        if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    #endif
             return ret;
         }
 
-        ret = wc_HashFinal(&hash, hashType, digest);
+        if (signedAttribSz > 0) {
+            attribSetSz = SetSet(signedAttribSz, attribSet);
+
+            /* calculate digest */
+            ret = wc_HashUpdate(&hash, hashType, attribSet, attribSetSz);
+            if (ret == 0)
+                ret = wc_HashUpdate(&hash, hashType, signedAttrib, signedAttribSz);
+            if (ret == 0)
+                ret = wc_HashFinal(&hash, hashType, digest);
+        } else {
+            ret = wc_HashUpdate(&hash, hashType, pkcs7->content, pkcs7->contentSz);
+            if (ret == 0)
+                ret = wc_HashFinal(&hash, hashType, digest);
+        }
+
+        wc_HashFree(&hash, hashType);
         if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return ret;
-        }
-
-    } else {
-
-        if (pkcs7->content == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
+    #ifdef WOLFSSL_SMALL_STACK
             XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return BAD_FUNC_ARG;
-        }
-
-        ret = wc_HashUpdate(&hash, hashType, pkcs7->content, pkcs7->contentSz);
-        if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return ret;
-        }
-
-        ret = wc_HashFinal(&hash, hashType, digest);
-        if (ret < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    #endif
             return ret;
         }
     }
@@ -1617,6 +3449,127 @@
 }
 
 
+/* Verifies CMS/PKCS7 SignedData content digest matches that which is
+ * included in the messageDigest signed attribute. Only called when
+ * signed attributes are present, otherwise original signature verification
+ * is done over content.
+ *
+ * pkcs7          - pointer to initialized PKCS7 struct
+ * hashBuf        - pointer to user-provided hash buffer, used with
+ *                  wc_PKCS7_VerifySignedData_ex()
+ * hashBufSz      - size of hashBuf, octets
+ *
+ * return 0 on success, negative on error */
+static int wc_PKCS7_VerifyContentMessageDigest(PKCS7* pkcs7,
+                                               const byte* hashBuf,
+                                               word32 hashSz)
+{
+    int ret = 0, digestSz = 0, innerAttribSz = 0;
+    word32 idx = 0;
+    byte* digestBuf = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+    byte* digest = NULL;
+#else
+    byte  digest[MAX_PKCS7_DIGEST_SZ];
+#endif
+    PKCS7DecodedAttrib* attrib;
+    enum wc_HashType hashType;
+
+    /* messageDigest OID (1.2.840.113549.1.9.4) */
+    const byte mdOid[] =
+            { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x09, 0x04 };
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    if ((pkcs7->content == NULL || pkcs7->contentSz == 0) &&
+        (hashBuf == NULL || hashSz == 0)) {
+        WOLFSSL_MSG("SignedData bundle has no content or hash to verify");
+        return BAD_FUNC_ARG;
+    }
+
+    /* lookup messageDigest attribute */
+    attrib = findAttrib(pkcs7, mdOid, sizeof(mdOid));
+    if (attrib == NULL) {
+        WOLFSSL_MSG("messageDigest attribute not in bundle, must be when "
+                    "signed attribs are present");
+        return ASN_PARSE_E;
+    }
+
+    /* advance past attrib->value ASN.1 header and length */
+    if (attrib->value == NULL || attrib->valueSz == 0)
+        return ASN_PARSE_E;
+
+    if (attrib->value[idx++] != ASN_OCTET_STRING)
+        return ASN_PARSE_E;
+
+    if (GetLength(attrib->value, &idx, &innerAttribSz, attrib->valueSz) < 0)
+        return ASN_PARSE_E;
+
+    /* get hash type and size */
+    hashType = wc_OidGetHash(pkcs7->hashOID);
+    if (hashType == WC_HASH_TYPE_NONE) {
+        WOLFSSL_MSG("Error getting hash type for PKCS7 content verification");
+        return BAD_FUNC_ARG;
+    }
+
+    /* build content hash if needed, or use existing hash value */
+    if (hashBuf == NULL) {
+
+#ifdef WOLFSSL_SMALL_STACK
+        digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
+                                DYNAMIC_TYPE_TMP_BUFFER);
+        if (digest == NULL)
+            return MEMORY_E;
+#endif
+        XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ);
+
+        ret = wc_Hash(hashType, pkcs7->content, pkcs7->contentSz, digest,
+                      MAX_PKCS7_DIGEST_SZ);
+        if (ret < 0) {
+            WOLFSSL_MSG("Error hashing PKCS7 content for verification");
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            return ret;
+        }
+
+        digestBuf = digest;
+        digestSz = wc_HashGetDigestSize(hashType);
+        if (digestSz < 0) {
+            WOLFSSL_MSG("Invalid hash type");
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            return digestSz;
+        }
+    } else {
+
+        /* user passed in pre-computed hash */
+        digestBuf = (byte*)hashBuf;
+        digestSz  = (int)hashSz;
+    }
+
+    /* compare generated to hash in messageDigest attribute */
+    if ((innerAttribSz != digestSz) ||
+        (XMEMCMP(attrib->value + idx, digestBuf, (word32)digestSz) != 0)) {
+        WOLFSSL_MSG("Content digest does not match messageDigest attrib value");
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return SIG_VERIFY_E;
+    }
+
+    if (hashBuf == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+    }
+
+    return 0;
+}
+
+
 /* verifies SignedData signature, over either PKCS#7 DigestInfo or
  * content digest.
  *
@@ -1628,8 +3581,9 @@
  *
  * return 0 on success, negative on error */
 static int wc_PKCS7_SignedDataVerifySignature(PKCS7* pkcs7, byte* sig,
-                                              word32 sigSz, byte* signedAttrib,
-                                              word32 signedAttribSz)
+                                             word32 sigSz, byte* signedAttrib,
+                                             word32 signedAttribSz,
+                                             const byte* hashBuf, word32 hashSz)
 {
     int ret = 0;
     word32 plainDigestSz = 0, pkcs7DigestSz;
@@ -1643,19 +3597,33 @@
     if (pkcs7 == NULL)
         return BAD_FUNC_ARG;
 
-#ifdef WOLFSSL_SMALL_STACK
-    pkcs7Digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
+    /* allocate space to build hash */
+    pkcs7DigestSz = MAX_PKCS7_DIGEST_SZ;
+#ifdef WOLFSSL_SMALL_STACK
+    pkcs7Digest = (byte*)XMALLOC(pkcs7DigestSz, pkcs7->heap,
                                  DYNAMIC_TYPE_TMP_BUFFER);
     if (pkcs7Digest == NULL)
         return MEMORY_E;
 #endif
 
+    XMEMSET(pkcs7Digest, 0, pkcs7DigestSz);
+
+    /* verify signed attrib digest matches that of content */
+    if (signedAttrib != NULL) {
+        ret = wc_PKCS7_VerifyContentMessageDigest(pkcs7, hashBuf, hashSz);
+        if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            return ret;
+        }
+    }
+
     /* build hash to verify against */
-    pkcs7DigestSz = MAX_PKCS7_DIGEST_SZ;
     ret = wc_PKCS7_BuildSignedDataDigest(pkcs7, signedAttrib,
                                          signedAttribSz, pkcs7Digest,
                                          &pkcs7DigestSz, &plainDigest,
-                                         &plainDigestSz);
+                                         &plainDigestSz, hashBuf, hashSz);
     if (ret < 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -1663,6 +3631,76 @@
         return ret;
     }
 
+    /* If no certificates are available then store the signature and hash for
+     * user to verify. Make sure that different return value than success is
+     * returned because the signature was not verified here. */
+    if (ret == 0) {
+        byte haveCert = 0;
+        int  i;
+
+        for (i = 0; i < MAX_PKCS7_CERTS; i++) {
+            if (pkcs7->certSz[i] == 0)
+                continue;
+            haveCert = 1;
+        }
+
+        if (!haveCert) {
+            WOLFSSL_MSG("No certificates in bundle to verify signature");
+
+            /* store signature */
+            XFREE(pkcs7->signature, pkcs7->heap, DYNAMIC_TYPE_SIGNATURE);
+            pkcs7->signature = NULL;
+            pkcs7->signatureSz = 0;
+            pkcs7->signature = (byte*)XMALLOC(sigSz, pkcs7->heap,
+                    DYNAMIC_TYPE_SIGNATURE);
+            if (pkcs7->signature == NULL) {
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+                return MEMORY_E;
+            }
+            XMEMCPY(pkcs7->signature, sig, sigSz);
+            pkcs7->signatureSz = sigSz;
+
+            /* store plain digest (CMS and ECC) */
+            XFREE(pkcs7->plainDigest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+            pkcs7->plainDigest = NULL;
+            pkcs7->plainDigestSz = 0;
+            pkcs7->plainDigest = (byte*)XMALLOC(plainDigestSz, pkcs7->heap,
+                    DYNAMIC_TYPE_DIGEST);
+            if (pkcs7->plainDigest == NULL) {
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+                return MEMORY_E;
+            }
+            XMEMCPY(pkcs7->plainDigest, plainDigest, plainDigestSz);
+            pkcs7->plainDigestSz = plainDigestSz;
+
+            /* store pkcs7 digest (default RSA) */
+            XFREE(pkcs7->pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+            pkcs7->pkcs7Digest = NULL;
+            pkcs7->pkcs7DigestSz = 0;
+            pkcs7->pkcs7Digest = (byte*)XMALLOC(pkcs7DigestSz, pkcs7->heap,
+                    DYNAMIC_TYPE_DIGEST);
+            if (pkcs7->pkcs7Digest == NULL) {
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+                return MEMORY_E;
+            }
+            XMEMCPY(pkcs7->pkcs7Digest, pkcs7Digest, pkcs7DigestSz);
+            pkcs7->pkcs7DigestSz = pkcs7DigestSz;
+
+            #ifdef WOLFSSL_SMALL_STACK
+            XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+            return PKCS7_SIGNEEDS_CHECK;
+        }
+    }
+
+
+
     switch (pkcs7->publicKeyOID) {
 
 #ifndef NO_RSA
@@ -1818,7 +3856,6 @@
         }
         XMEMCPY(attrib->oid, in + oidIdx, attrib->oidSz);
 
-
         /* Get Set that contains the printable string value */
         if (GetSet(in, &idx, &length, inSz) < 0) {
             XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
@@ -1857,334 +3894,1234 @@
 }
 
 
-/* Finds the certificates in the message and saves it. */
-int wc_PKCS7_VerifySignedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz)
-{
-    word32 idx, contentType, hashOID, sigOID;
-    int length, version, ret;
-    byte* content = NULL;
-    byte* sig = NULL;
-    byte* cert = NULL;
-    byte* signedAttrib = NULL;
-    int contentSz = 0, sigSz = 0, certSz = 0, signedAttribSz = 0;
-    byte degenerate;
-#ifdef ASN_BER_TO_DER
-    byte* der;
-#endif
-
-    if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0)
-        return BAD_FUNC_ARG;
-
-    idx = 0;
-
-    /* Get the contentInfo sequence */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (length == 0 && pkiMsg[idx-1] == 0x80) {
-#ifdef ASN_BER_TO_DER
-        word32 len = 0;
-
-        ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len);
-        if (ret != LENGTH_ONLY_E)
-            return ret;
-        pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        if (pkcs7->der == NULL)
-            return MEMORY_E;
-        ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len);
-        if (ret < 0)
-            return ret;
-
-        pkiMsg = pkcs7->der;
-        pkiMsgSz = len;
-        idx = 0;
-        if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-#else
-        return BER_INDEF_E;
-#endif
-    }
-
-    /* Get the contentInfo contentType */
-    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (contentType != SIGNED_DATA) {
-        WOLFSSL_MSG("PKCS#7 input not of type SignedData");
-        return PKCS7_OID_E;
-    }
-
-    /* get the ContentInfo content */
-    if (pkiMsg[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
-        return ASN_PARSE_E;
-
-    if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* Get the signedData sequence */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* Get the version */
-    if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (version != 1) {
-        WOLFSSL_MSG("PKCS#7 signedData needs to be of version 1");
-        return ASN_VERSION_E;
-    }
-
-    /* Get the set of DigestAlgorithmIdentifiers */
-    if (GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* Skip the set. */
-    idx += length;
-    degenerate = (length == 0)? 1 : 0;
-
-    /* Get the inner ContentInfo sequence */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* Get the inner ContentInfo contentType */
-    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (contentType != DATA) {
-        WOLFSSL_MSG("PKCS#7 inner input not of type Data");
-        return PKCS7_OID_E;
-    }
-
-    /* Check for content info, it could be omitted when degenerate */
-    {
-        word32 localIdx = idx;
-        ret = 0;
-        if (pkiMsg[localIdx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
-            ret = ASN_PARSE_E;
-
-        if (ret == 0 && GetLength(pkiMsg, &localIdx, &length, pkiMsgSz) <= 0)
+/* option to turn off support for degenerate cases
+ * flag 0 turns off support
+ * flag 1 turns on support
+ *
+ * by default support for SignedData degenerate cases is on
+ */
+void wc_PKCS7_AllowDegenerate(PKCS7* pkcs7, word16 flag)
+{
+    if (pkcs7) {
+        if (flag) { /* flag of 1 turns on support for degenerate */
+            pkcs7->noDegenerate = 0;
+        }
+        else { /* flag of 0 turns off support */
+            pkcs7->noDegenerate = 1;
+        }
+    }
+}
+
+/* Parses through a signerInfo set. Reads buffer "in" from "idxIn" to "idxIn" +
+ * length treating the current "idxIn" plus the length of set as max possible
+ * index.
+ *
+ * In the case that signed attributes are found "signedAttrib" gets set to point
+ *  at their location in the buffer "in". Also in this case signedAttribSz gets
+ *  set to the size of the signedAttrib buffer.
+ *
+ * returns 0 on success
+ */
+static int wc_PKCS7_ParseSignerInfo(PKCS7* pkcs7, byte* in, word32 inSz,
+        word32* idxIn, int degenerate, byte** signedAttrib, int* signedAttribSz)
+{
+    int ret = 0;
+    int length;
+    int version;
+    word32 sigOID = 0, hashOID = 0;
+    word32 idx = *idxIn, localIdx;
+    byte tag;
+
+    WOLFSSL_ENTER("wc_PKCS7_ParseSignerInfo");
+    /* require a signer if degenerate case not allowed */
+    if (inSz == 0 && pkcs7->noDegenerate == 1) {
+        WOLFSSL_MSG("Set to not allow degenerate cases");
+        return PKCS7_NO_SIGNER_E;
+    }
+
+    if (inSz == 0 && degenerate == 0) {
+        WOLFSSL_MSG("PKCS7 signers expected");
+        return PKCS7_NO_SIGNER_E;
+    }
+
+    /* not a degenerate case and there is elements in the set */
+    if (inSz > 0 && degenerate == 0) {
+        ret = wc_PKCS7_SignerInfoNew(pkcs7);
+
+        /* Get the sequence of the first signerInfo */
+        if (ret == 0 && GetSequence(in, &idx, &length, inSz) < 0)
             ret = ASN_PARSE_E;
 
-        if (ret == 0 && pkiMsg[localIdx++] != ASN_OCTET_STRING)
-            ret = ASN_PARSE_E;
-
-        if (ret == 0 && GetLength(pkiMsg, &localIdx, &length, pkiMsgSz) < 0)
-            ret = ASN_PARSE_E;
-
-        /* Save the inner data as the content. */
-        if (length > 0) {
-            /* Local pointer for calculating hashes later */
-            content   = &pkiMsg[localIdx];
-            contentSz = length;
-            localIdx += length;
-        }
-
-        /* update idx if successful */
-        if (ret == 0) {
-            idx = localIdx;
-        }
-    }
-
-    /* If getting the content info failed with non degenerate then return the
-     * error case. Otherwise with a degenerate it is ok if the content
-     * info was omitted */
-    if (!degenerate && ret != 0) {
-        return ret;
-    }
-
-    /* Get the implicit[0] set of certificates */
-    if (pkiMsg[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
-        idx++;
-        if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-
-        if (length > 0) {
-            /* At this point, idx is at the first certificate in
-             * a set of certificates. There may be more than one,
-             * or none, or they may be a PKCS 6 extended
-             * certificate. We want to save the first cert if it
-             * is X.509. */
-
-            word32 certIdx = idx;
-
-            if (pkiMsg[certIdx++] == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
-                if (GetLength(pkiMsg, &certIdx, &certSz, pkiMsgSz) < 0)
-                    return ASN_PARSE_E;
-
-                cert = &pkiMsg[idx];
-                certSz += (certIdx - idx);
-            }
-
-#ifdef ASN_BER_TO_DER
-            der = pkcs7->der;
-#endif
-            /* This will reset PKCS7 structure and then set the certificate */
-            wc_PKCS7_InitWithCert(pkcs7, cert, certSz);
-#ifdef ASN_BER_TO_DER
-            pkcs7->der = der;
-#endif
-
-            /* iterate through any additional certificates */
-            if (MAX_PKCS7_CERTS > 0) {
-                word32 localIdx;
-                int sz = 0;
-                int i;
-
-                pkcs7->cert[0]   = cert;
-                pkcs7->certSz[0] = certSz;
-                certIdx = idx + certSz;
-
-                for (i = 1; i < MAX_PKCS7_CERTS && certIdx + 1 < pkiMsgSz; i++) {
-                    localIdx = certIdx;
-
-                    if (pkiMsg[certIdx++] == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
-                        if (GetLength(pkiMsg, &certIdx, &sz, pkiMsgSz) < 0)
-                            return ASN_PARSE_E;
-
-                        pkcs7->cert[i]   = &pkiMsg[localIdx];
-                        pkcs7->certSz[i] = sz + (certIdx - localIdx);
-                        certIdx += sz;
-                    }
-                }
-            }
-        }
-        idx += length;
-    }
-
-    /* set content and size after init of PKCS7 structure */
-    pkcs7->content   = content;
-    pkcs7->contentSz = contentSz;
-
-    /* Get the implicit[1] set of crls */
-    if (pkiMsg[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
-        idx++;
-        if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-
-        /* Skip the set */
-        idx += length;
-    }
-
-    /* Get the set of signerInfos */
-    if (GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (length > 0) {
-        /* Get the sequence of the first signerInfo */
-        if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-
         /* Get the version */
-        if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-
-        if (version != 1) {
-            WOLFSSL_MSG("PKCS#7 signerInfo needs to be of version 1");
-            return ASN_VERSION_E;
-        }
-
-        /* Get the sequence of IssuerAndSerialNumber */
-        if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-
-        /* Skip it */
-        idx += length;
+        if (ret == 0 && GetMyVersion(in, &idx, &version, inSz) < 0)
+            ret = ASN_PARSE_E;
+
+        if (ret == 0) {
+            pkcs7->signerInfo->version = version;
+        }
+
+        if (ret == 0 && version == 1) {
+            /* Get the sequence of IssuerAndSerialNumber */
+            if (GetSequence(in, &idx, &length, inSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0) {
+                ret = wc_PKCS7_SignerInfoSetSID(pkcs7, in + idx, length);
+                idx += length;
+            }
+
+        } else if (ret == 0 && version == 3) {
+            /* Get the sequence of SubjectKeyIdentifier */
+            if (idx + 1 > inSz)
+                ret = BUFFER_E;
+
+            localIdx = idx;
+            if (ret == 0 && GetASNTag(in, &localIdx, &tag, inSz) == 0 &&
+                   tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+                idx++;
+
+                if (GetLength(in, &idx, &length, inSz) <= 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && idx + 1 > inSz)
+                    ret = BUFFER_E;
+
+                if (ret == 0 && GetASNTag(in, &idx, &tag, inSz) < 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && tag != ASN_OCTET_STRING)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && GetLength(in, &idx, &length, inSz) < 0)
+                    ret = ASN_PARSE_E;
+            }
+            else {
+                /* check if SKID with ASN_CONTEXT_SPECIFIC otherwise in version
+                 * 3 try to get issuerAndSerial */
+                localIdx = idx;
+                if (GetASNTag(in, &localIdx, &tag, inSz) == 0 &&
+                        tag == ASN_CONTEXT_SPECIFIC) {
+                    idx++;
+                    if (ret == 0 && GetLength(in, &idx, &length, inSz) < 0)
+                        ret = ASN_PARSE_E;
+                }
+                else {
+                    if (pkcs7->version != 3) {
+                        WOLFSSL_MSG("Unexpected signer info found with version");
+                        ret = ASN_PARSE_E;
+                    }
+
+                    if (ret == 0 && GetSequence(in, &idx, &length, inSz) < 0)
+                        ret = ASN_PARSE_E;
+                }
+            }
+
+            if (ret == 0) {
+                ret = wc_PKCS7_SignerInfoSetSID(pkcs7, in + idx, length);
+                idx += length;
+            }
+
+        } else {
+            WOLFSSL_MSG("PKCS#7 signerInfo version must be 1 or 3");
+            ret = ASN_VERSION_E;
+        }
 
         /* Get the sequence of digestAlgorithm */
-        if (GetAlgoId(pkiMsg, &idx, &hashOID, oidHashType, pkiMsgSz) < 0) {
-            return ASN_PARSE_E;
+        if (ret == 0 && GetAlgoId(in, &idx, &hashOID, oidHashType, inSz) < 0) {
+            ret = ASN_PARSE_E;
         }
         pkcs7->hashOID = (int)hashOID;
 
         /* Get the IMPLICIT[0] SET OF signedAttributes */
-        if (pkiMsg[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+        localIdx = idx;
+        if (ret == 0 && GetASNTag(in, &localIdx, &tag, inSz) == 0 &&
+                tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
             idx++;
 
-            if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-                return ASN_PARSE_E;
+            if (GetLength(in, &idx, &length, inSz) < 0)
+                ret = ASN_PARSE_E;
 
             /* save pointer and length */
-            signedAttrib = &pkiMsg[idx];
-            signedAttribSz = length;
-
-            if (wc_PKCS7_ParseAttribs(pkcs7, signedAttrib, signedAttribSz) <0) {
+            *signedAttrib = &in[idx];
+            *signedAttribSz = length;
+
+            if (ret == 0 && wc_PKCS7_ParseAttribs(pkcs7, *signedAttrib,
+                        *signedAttribSz) < 0) {
                 WOLFSSL_MSG("Error parsing signed attributes");
-                return ASN_PARSE_E;
+                ret = ASN_PARSE_E;
             }
 
             idx += length;
         }
 
         /* Get digestEncryptionAlgorithm */
-        if (GetAlgoId(pkiMsg, &idx, &sigOID, oidSigType, pkiMsgSz) < 0) {
-            return ASN_PARSE_E;
+        if (ret == 0 && GetAlgoId(in, &idx, &sigOID, oidSigType, inSz) < 0) {
+            ret = ASN_PARSE_E;
         }
 
         /* store public key type based on digestEncryptionAlgorithm */
-        ret = wc_PKCS7_SetPublicKeyOID(pkcs7, sigOID);
-        if (ret <= 0) {
-            WOLFSSL_MSG("Failed to set public key OID from signature");
+        if (ret == 0) {
+            ret = wc_PKCS7_SetPublicKeyOID(pkcs7, sigOID);
+            if (ret < 0) {
+                WOLFSSL_MSG("Failed to set public key OID from signature");
+            }
+            else {
+                /* if previous return was positive then was success */
+                ret = 0;
+            }
+        }
+    }
+
+    /* update index on success */
+    if (ret == 0) {
+        *idxIn = idx;
+    }
+
+    return ret;
+}
+
+
+/* Finds the certificates in the message and saves it. By default allows
+ * degenerate cases which can have no signer.
+ *
+ * By default expects type SIGNED_DATA (SignedData) which can have any number of
+ * elements in signerInfos collection, including zero. (RFC2315 section 9.1)
+ * When adding support for the case of SignedAndEnvelopedData content types a
+ * signer is required. In this case the PKCS7 flag noDegenerate could be set.
+ */
+static int PKCS7_VerifySignedData(PKCS7* pkcs7, const byte* hashBuf,
+    word32 hashSz, byte* in, word32 inSz,
+    byte* in2, word32 in2Sz)
+{
+    word32 idx, maxIdx = inSz, outerContentType, contentTypeSz = 0, totalSz = 0;
+    int length = 0, version = 0, ret = 0;
+    byte* content = NULL;
+    byte* contentDynamic = NULL;
+    byte* sig = NULL;
+    byte* cert = NULL;
+    byte* signedAttrib = NULL;
+    byte* contentType = NULL;
+    int contentSz = 0, sigSz = 0, certSz = 0, signedAttribSz = 0;
+    word32 localIdx, start;
+    byte degenerate = 0;
+    byte detached = 0;
+    byte tag = 0;
+#ifdef ASN_BER_TO_DER
+    byte* der;
+#endif
+    int multiPart = 0, keepContent;
+    int contentLen = 0;
+
+    byte* pkiMsg    = in;
+    word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+    word32 stateIdx = 0;
+    long rc;
+#endif
+
+    byte* pkiMsg2 = in2;
+    word32 pkiMsg2Sz = in2Sz;
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+#ifndef NO_PKCS7_STREAM
+    /* allow for 0 size inputs with stream mode */
+    if (pkiMsg == NULL && pkiMsgSz > 0)
+        return BAD_FUNC_ARG;
+
+#else
+    if (pkiMsg == NULL || pkiMsgSz == 0)
+        return BAD_FUNC_ARG;
+#endif
+
+    if ((hashSz > 0 && hashBuf == NULL) || (pkiMsg2Sz > 0 && pkiMsg2 == NULL)) {
+        return BAD_FUNC_ARG;
+    }
+    idx = 0;
+
+#ifdef ASN_BER_TO_DER
+    if (pkcs7->derSz > 0 && pkcs7->der) {
+        pkiMsg = in = pkcs7->der;
+    }
+#endif
+
+#ifndef NO_PKCS7_STREAM
+    if (pkcs7->stream == NULL) {
+        if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
             return ret;
         }
-
-        /* Get the signature */
-        if (pkiMsg[idx] == ASN_OCTET_STRING) {
-            idx++;
-
-            if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-                return ASN_PARSE_E;
-
-            /* save pointer and length */
-            sig = &pkiMsg[idx];
-            sigSz = length;
-
+    }
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_START:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+                            MAX_VERSION_SZ + MAX_SEQ_SZ + MAX_LENGTH_SZ +
+                            ASN_TAG_SZ + MAX_OID_SZ + MAX_SEQ_SZ,
+                            &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (pkcs7->stream->length > 0)? pkcs7->stream->length :inSz;
+        #endif
+
+            /* determine total message size */
+            totalSz = pkiMsgSz;
+            if (pkiMsg2 && pkiMsg2Sz > 0) {
+                totalSz += pkiMsg2Sz + pkcs7->contentSz;
+            }
+
+            /* Get the contentInfo sequence */
+            if (ret == 0 && GetSequence_ex(pkiMsg, &idx, &length, totalSz,
+                        NO_USER_CHECK) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && length == 0 && pkiMsg[idx-1] == 0x80) {
+        #ifdef ASN_BER_TO_DER
+                word32 len = 0;
+
+                ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len);
+                if (ret != LENGTH_ONLY_E)
+                    return ret;
+                pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                if (pkcs7->der == NULL)
+                    return MEMORY_E;
+                ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len);
+                if (ret < 0)
+                    return ret;
+
+                pkiMsg   = in = pkcs7->der;
+                pkiMsgSz = pkcs7->derSz = len;
+                idx = 0;
+                if (GetSequence_ex(pkiMsg, &idx, &length, pkiMsgSz,
+                            NO_USER_CHECK) < 0)
+                    return ASN_PARSE_E;
+
+            #ifndef NO_PKCS7_STREAM
+                rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK,
+                    pkiMsg, pkiMsgSz);
+                if (rc < 0) {
+                    ret = (int)rc;
+                    break;
+                }
+            #endif
+        #else
+                ret = BER_INDEF_E;
+        #endif
+            }
+
+            /* Get the contentInfo contentType */
+            if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &outerContentType,
+                        pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && outerContentType != SIGNED_DATA) {
+                WOLFSSL_MSG("PKCS#7 input not of type SignedData");
+                ret = PKCS7_OID_E;
+            }
+
+            /* get the ContentInfo content */
+            if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, totalSz) != 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && GetLength_ex(pkiMsg, &idx, &length, totalSz,
+                        NO_USER_CHECK) < 0)
+                ret = ASN_PARSE_E;
+
+            /* Get the signedData sequence */
+            if (ret == 0 && GetSequence_ex(pkiMsg, &idx, &length, totalSz,
+                        NO_USER_CHECK) < 0)
+                ret = ASN_PARSE_E;
+
+            /* Get the version */
+            if (ret == 0 && GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+
+            /* version 1 follows RFC 2315 */
+            /* version 3 follows RFC 4108 */
+            if (ret == 0 && (version != 1 && version != 3)) {
+                WOLFSSL_MSG("PKCS#7 signedData needs to be version 1 or 3");
+                ret = ASN_VERSION_E;
+            }
+            pkcs7->version = version;
+
+            /* Get the set of DigestAlgorithmIdentifiers */
+            if (ret == 0 && GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            /* Skip the set. */
             idx += length;
-        }
-
-        pkcs7->content = content;
-        pkcs7->contentSz = contentSz;
-
-        ret = wc_PKCS7_SignedDataVerifySignature(pkcs7, sig, sigSz,
-                                                 signedAttrib, signedAttribSz);
-        if (ret < 0)
-            return ret;
-    }
+            degenerate = (length == 0)? 1 : 0;
+            if (pkcs7->noDegenerate == 1 && degenerate == 1) {
+                ret = PKCS7_NO_SIGNER_E;
+            }
+
+            if (ret != 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+                break;
+            }
+            if (pkiMsg2 && pkiMsg2Sz > 0) {
+                pkcs7->stream->maxLen += pkiMsg2Sz + pkcs7->contentSz;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, totalSz, 0, 0);
+        #endif
+
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE2);
+            FALL_THROUGH;
+
+        case WC_PKCS7_VERIFY_STAGE2:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+                           MAX_SEQ_SZ + MAX_OID_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ
+                           + ASN_TAG_SZ + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            wc_PKCS7_StreamGetVar(pkcs7, &totalSz, 0, 0);
+            if (pkcs7->stream->length > 0)
+                pkiMsgSz = pkcs7->stream->length;
+        #ifdef ASN_BER_TO_DER
+            else if (pkcs7->der)
+                pkiMsgSz = pkcs7->derSz;
+        #endif
+            else
+                pkiMsgSz = inSz;
+
+        #endif
+            /* Get the inner ContentInfo sequence */
+            if (GetSequence_ex(pkiMsg, &idx, &length, pkiMsgSz,
+                        NO_USER_CHECK) < 0)
+                ret = ASN_PARSE_E;
+
+            /* Get the inner ContentInfo contentType */
+            if (ret == 0) {
+                word32 tmpIdx = idx;
+
+                if (GetASNObjectId(pkiMsg, &idx, &length, pkiMsgSz) != 0)
+                    ret = ASN_PARSE_E;
+
+                contentType = pkiMsg + tmpIdx;
+                contentTypeSz = length + (idx - tmpIdx);
+
+                idx += length;
+            }
+
+            if (ret != 0)
+                break;
+
+            /* Check for content info, it could be omitted when degenerate */
+            localIdx = idx;
+            ret = 0;
+            if (localIdx + 1 > pkiMsgSz) {
+                ret = BUFFER_E;
+                break;
+            }
+
+            if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) != 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, &length, pkiMsgSz,
+                        NO_USER_CHECK) <= 0)
+                ret = ASN_PARSE_E;
+
+            if (localIdx >= pkiMsgSz) {
+                ret = BUFFER_E;
+            }
+
+            /* get length of content in the case that there is multiple parts */
+            if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && tag == (ASN_OCTET_STRING | ASN_CONSTRUCTED)) {
+                multiPart = 1;
+
+                /* Get length of all OCTET_STRINGs. */
+                if (GetLength_ex(pkiMsg, &localIdx, &contentLen, pkiMsgSz,
+                            NO_USER_CHECK) < 0)
+                    ret = ASN_PARSE_E;
+
+                /* Check whether there is one OCTET_STRING inside. */
+                start = localIdx;
+                if (localIdx >= pkiMsgSz) {
+                    ret = BUFFER_E;
+                }
+
+                if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz)
+                        != 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && tag != ASN_OCTET_STRING)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, &length, pkiMsgSz,
+                            NO_USER_CHECK) < 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0) {
+                    /* Use single OCTET_STRING directly. */
+                    if (localIdx - start + length == (word32)contentLen)
+                        multiPart = 0;
+                    localIdx = start;
+                }
+            }
+
+            /* get length of content in case of single part */
+            if (ret == 0 && !multiPart) {
+                if (tag != ASN_OCTET_STRING)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && GetLength_ex(pkiMsg, &localIdx,
+                            &length, pkiMsgSz, NO_USER_CHECK) < 0)
+                    ret = ASN_PARSE_E;
+            }
+
+            /* update idx if successful */
+            if (ret == 0) {
+                /* support using header and footer without content */
+                if (pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0) {
+                    localIdx = 0;
+                }
+                idx = localIdx;
+            }
+            else {
+
+                /* if pkcs7->content and pkcs7->contentSz are set, try to
+                   process as a detached signature */
+                if (!degenerate &&
+                    (pkcs7->content != NULL && pkcs7->contentSz != 0)) {
+                    detached = 1;
+                }
+
+                if (!degenerate && !detached && ret != 0)
+                    break;
+
+                length = 0; /* no content to read */
+                pkiMsg2   = pkiMsg;
+                pkiMsg2Sz = pkiMsgSz;
+            }
+
+        #ifndef NO_PKCS7_STREAM
+            /* save detached flag value */
+            pkcs7->stream->detached = detached;
+
+            /* save contentType */
+            pkcs7->stream->nonce = (byte*)XMALLOC(contentTypeSz, pkcs7->heap,
+                    DYNAMIC_TYPE_PKCS7);
+            if (pkcs7->stream->nonce == NULL) {
+                ret = MEMORY_E;
+                break;
+            }
+            else {
+                pkcs7->stream->nonceSz = contentTypeSz;
+                XMEMCPY(pkcs7->stream->nonce, contentType, contentTypeSz);
+            }
+
+            /* content expected? */
+            if ((ret == 0 && length > 0) &&
+                !(pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0)) {
+                pkcs7->stream->expected = length + ASN_TAG_SZ + MAX_LENGTH_SZ;
+            }
+            else {
+                pkcs7->stream->expected = ASN_TAG_SZ + MAX_LENGTH_SZ;
+            }
+
+            if (pkcs7->stream->expected > (pkcs7->stream->maxLen - idx)) {
+                pkcs7->stream->expected = pkcs7->stream->maxLen - idx;
+            }
+
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+                break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, localIdx, length);
+
+            /* content length is in multiple parts */
+            if (multiPart) {
+                pkcs7->stream->expected = contentLen + ASN_TAG_SZ;
+            }
+            pkcs7->stream->multi = multiPart;
+
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE3);
+            FALL_THROUGH;
+
+        case WC_PKCS7_VERIFY_STAGE3:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+                    pkiMsg, pkiMsgSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+        #ifdef ASN_BER_TO_DER
+            if (pkcs7->derSz != 0)
+                pkiMsgSz = pkcs7->derSz;
+            else
+        #endif
+                pkiMsgSz = (word32)rc;
+            wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, (int*)&localIdx, &length);
+
+            if (pkcs7->stream->length > 0) {
+                localIdx = 0;
+            }
+            multiPart = pkcs7->stream->multi;
+            detached  = pkcs7->stream->detached;
+            maxIdx = idx + pkcs7->stream->expected;
+        #endif
+
+            /* Break out before content because it can be optional in degenerate
+             * cases. */
+            if (ret != 0 && !degenerate)
+                break;
+
+            /* get parts of content */
+            if (ret == 0 && multiPart) {
+                int i = 0;
+                keepContent = !(pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0);
+
+                if (keepContent) {
+                    /* Create a buffer to hold content of OCTET_STRINGs. */
+                    pkcs7->contentDynamic = (byte*)XMALLOC(contentLen, pkcs7->heap,
+                                                            DYNAMIC_TYPE_PKCS7);
+                    if (pkcs7->contentDynamic == NULL)
+                        ret = MEMORY_E;
+                }
+
+                start = localIdx;
+                /* Use the data from each OCTET_STRING. */
+                while (ret == 0 && localIdx < start + contentLen) {
+                    if (GetASNTag(pkiMsg, &localIdx, &tag, totalSz) < 0)
+                        ret = ASN_PARSE_E;
+                    if (ret == 0 && tag != ASN_OCTET_STRING)
+                        ret = ASN_PARSE_E;
+
+                    if (ret == 0 && GetLength(pkiMsg, &localIdx, &length, totalSz) < 0)
+                        ret = ASN_PARSE_E;
+                    if (ret == 0 && length + localIdx > start + contentLen)
+                        ret = ASN_PARSE_E;
+
+                    if (ret == 0) {
+                        if (keepContent) {
+                            XMEMCPY(pkcs7->contentDynamic + i, pkiMsg + localIdx,
+                                                                        length);
+                        }
+                        i += length;
+                        localIdx += length;
+                    }
+                }
+                localIdx = start; /* reset for sanity check, increment later */
+                length = i;
+            }
+
+            /* Save the inner data as the content. */
+            if (ret == 0 && length > 0) {
+                contentSz = length;
+
+                /* support using header and footer without content */
+                if (pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0) {
+                    /* Content not provided, use provided pkiMsg2 footer */
+                    content = NULL;
+                    localIdx = 0;
+                    if (contentSz != (int)pkcs7->contentSz) {
+                        WOLFSSL_MSG("Data signed does not match contentSz provided");
+                        ret = BUFFER_E;
+                    }
+                }
+                else {
+                    if ((word32)length > pkiMsgSz - localIdx) {
+                        ret = BUFFER_E;
+                    }
+
+                    /* Content pointer for calculating hashes later */
+                    if (ret == 0 && !multiPart) {
+                        content = &pkiMsg[localIdx];
+                    }
+                    if (ret == 0 && multiPart) {
+                        content = pkcs7->contentDynamic;
+                    }
+
+                    if (ret == 0) {
+                        idx += length;
+
+                        pkiMsg2   = pkiMsg;
+                        pkiMsg2Sz = pkiMsgSz;
+                    #ifndef NO_PKCS7_STREAM
+                        pkcs7->stream->varOne = pkiMsg2Sz;
+                        pkcs7->stream->flagOne = 1;
+                    #endif
+                    }
+                }
+            }
+            else {
+                pkiMsg2 = pkiMsg;
+                pkiMsg2Sz = pkiMsgSz;
+            #ifndef NO_PKCS7_STREAM
+                pkcs7->stream->varOne = pkiMsg2Sz;
+                pkcs7->stream->flagOne = 1;
+            #endif
+            }
+
+            /* If getting the content info failed with non degenerate then return the
+             * error case. Otherwise with a degenerate it is ok if the content
+             * info was omitted */
+            if (!degenerate && !detached && (ret != 0)) {
+                break;
+            }
+            else {
+                ret = 0; /* reset ret state on degenerate case */
+            }
+
+        #ifndef NO_PKCS7_STREAM
+            /* save content */
+            if (detached == 1) {
+                /* if detached, use content from user in pkcs7 struct */
+                content = pkcs7->content;
+                contentSz = pkcs7->contentSz;
+            }
+
+            if (content != NULL) {
+                XFREE(pkcs7->stream->content, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                pkcs7->stream->content = (byte*)XMALLOC(contentSz, pkcs7->heap,
+                        DYNAMIC_TYPE_PKCS7);
+                if (pkcs7->stream->content == NULL) {
+                    ret = MEMORY_E;
+                    break;
+                }
+                else {
+                    XMEMCPY(pkcs7->stream->content, content, contentSz);
+                    pkcs7->stream->contentSz = contentSz;
+                }
+            }
+        #endif /* !NO_PKCS7_STREAM */
+
+            /* Get the implicit[0] set of certificates */
+            if (ret == 0 && idx >= pkiMsg2Sz)
+                ret = BUFFER_E;
+
+            length = 0; /* set length to 0 to check if reading in any certs */
+            localIdx = idx;
+            if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, pkiMsg2Sz) == 0
+                    && tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+                idx++;
+                if (GetLength_ex(pkiMsg2, &idx, &length, maxIdx, NO_USER_CHECK)
+                        < 0)
+                    ret = ASN_PARSE_E;
+            }
+
+            if (ret != 0) {
+                break;
+            }
+        #ifndef NO_PKCS7_STREAM
+            if (in2 && in2Sz > 0 && hashBuf && hashSz > 0) {
+                stateIdx = idx; /* case where all data was read from in2 */
+            }
+
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+                break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length);
+            if (length > 0) {
+                pkcs7->stream->expected = length;
+            }
+            else {
+                pkcs7->stream->expected = MAX_SEQ_SZ;
+                if (pkcs7->stream->expected > (pkcs7->stream->maxLen -
+                                pkcs7->stream->totalRd) + pkcs7->stream->length) {
+                    pkcs7->stream->expected = (pkcs7->stream->maxLen -
+                                pkcs7->stream->totalRd) + pkcs7->stream->length;
+                }
+            }
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE4);
+            FALL_THROUGH;
+
+        case WC_PKCS7_VERIFY_STAGE4:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length);
+            if (pkcs7->stream->flagOne) {
+                pkiMsg2 = pkiMsg;
+            }
+
+            /* restore content */
+            content   = pkcs7->stream->content;
+            contentSz = pkcs7->stream->contentSz;
+
+            /* restore detached flag */
+            detached = pkcs7->stream->detached;
+
+            /* store certificate if needed */
+            if (length > 0 && in2Sz == 0) {
+                /* free tmpCert if not NULL */
+                XFREE(pkcs7->stream->tmpCert, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                pkcs7->stream->tmpCert = (byte*)XMALLOC(length,
+                        pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                if ((pkiMsg2 == NULL) || (pkcs7->stream->tmpCert == NULL)) {
+                    ret = MEMORY_E;
+                    break;
+                }
+                XMEMCPY(pkcs7->stream->tmpCert, pkiMsg2 + idx, length);
+                pkiMsg2 = pkcs7->stream->tmpCert;
+                pkiMsg2Sz = length;
+                idx = 0;
+            }
+        #endif
+
+                if (length > 0) {
+                    /* At this point, idx is at the first certificate in
+                     * a set of certificates. There may be more than one,
+                     * or none, or they may be a PKCS 6 extended
+                     * certificate. We want to save the first cert if it
+                     * is X.509. */
+
+                    word32 certIdx = idx;
+
+                    if (length < MAX_LENGTH_SZ + ASN_TAG_SZ)
+                        ret = BUFFER_E;
+
+                    if (ret == 0)
+                        ret = GetASNTag(pkiMsg2, &certIdx, &tag, pkiMsg2Sz);
+
+                    if (ret == 0 && tag == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
+                        if (GetLength(pkiMsg2, &certIdx, &certSz, pkiMsg2Sz) < 0)
+                            ret = ASN_PARSE_E;
+
+                        cert = &pkiMsg2[idx];
+                        certSz += (certIdx - idx);
+                        if (certSz > length) {
+                            ret = BUFFER_E;
+                            break;
+                        }
+                    }
+        #ifdef ASN_BER_TO_DER
+                    der = pkcs7->der;
+        #endif
+                    contentDynamic = pkcs7->contentDynamic;
+                    version = pkcs7->version;
+
+
+                    if (ret == 0) {
+                    #ifndef NO_PKCS7_STREAM
+                        PKCS7State* stream = pkcs7->stream;
+                    #endif
+                        /* This will reset PKCS7 structure and then set the
+                         * certificate */
+                        ret = wc_PKCS7_InitWithCert(pkcs7, cert, certSz);
+                    #ifndef NO_PKCS7_STREAM
+                        pkcs7->stream = stream;
+                    #endif
+                    }
+                    pkcs7->contentDynamic = contentDynamic;
+                    pkcs7->version = version;
+        #ifdef ASN_BER_TO_DER
+                    pkcs7->der = der;
+        #endif
+                    if (ret != 0)
+                        break;
+
+                    /* iterate through any additional certificates */
+                    if (ret == 0 && MAX_PKCS7_CERTS > 0) {
+                        int sz = 0;
+                        int i;
+
+                        pkcs7->cert[0]   = cert;
+                        pkcs7->certSz[0] = certSz;
+                        certIdx = idx + certSz;
+
+                        for (i = 1; i < MAX_PKCS7_CERTS &&
+                                certIdx + 1 < pkiMsg2Sz &&
+                                certIdx + 1 < (word32)length; i++) {
+                            localIdx = certIdx;
+
+                            if (ret == 0 && GetASNTag(pkiMsg2, &certIdx, &tag,
+                                        pkiMsg2Sz) < 0) {
+                                ret = ASN_PARSE_E;
+                                break;
+                            }
+
+                            if (ret == 0 &&
+                                    tag == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
+                                if (GetLength(pkiMsg2, &certIdx, &sz,
+                                            pkiMsg2Sz) < 0) {
+                                    ret = ASN_PARSE_E;
+                                    break;
+                                }
+
+                                pkcs7->cert[i]   = &pkiMsg2[localIdx];
+                                pkcs7->certSz[i] = sz + (certIdx - localIdx);
+                                certIdx += sz;
+                            }
+                        }
+                    }
+                }
+                idx += length;
+
+            if (!detached) {
+                /* set content and size after init of PKCS7 structure */
+                pkcs7->content   = content;
+                pkcs7->contentSz = contentSz;
+            }
+        #ifndef NO_PKCS7_STREAM
+            else {
+                /* save content if detached and using streaming API */
+                if (pkcs7->content != NULL) {
+                    XFREE(pkcs7->stream->content, pkcs7->heap,
+                          DYNAMIC_TYPE_PKCS7);
+                    pkcs7->stream->content = (byte*)XMALLOC(pkcs7->contentSz,
+                                                            pkcs7->heap,
+                                                            DYNAMIC_TYPE_PKCS7);
+                    if (pkcs7->stream->content == NULL) {
+                        ret = MEMORY_E;
+                        break;
+                    }
+                    else {
+                        XMEMCPY(pkcs7->stream->content, pkcs7->content,
+                                contentSz);
+                        pkcs7->stream->contentSz = pkcs7->contentSz;
+                    }
+                }
+            }
+        #endif
+
+            if (ret != 0) {
+                break;
+            }
+        #ifndef NO_PKCS7_STREAM
+            /* factor in that recent idx was in cert buffer. If in2 buffer was
+             * used then don't advance idx. */
+            if (length > 0 && pkcs7->stream->flagOne &&
+                    pkcs7->stream->length == 0) {
+                idx = stateIdx + idx;
+                if (idx > inSz) {
+                    /* index is more than input size */
+                    ret = BUFFER_E;
+                    break;
+                }
+            }
+            else {
+                stateIdx = idx; /* didn't read any from internal buffer */
+            }
+
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+                break;
+            }
+            if (pkcs7->stream->flagOne && pkcs7->stream->length > 0) {
+                idx = stateIdx + idx;
+            }
+
+            pkcs7->stream->expected = MAX_OID_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ +
+                                      MAX_SET_SZ;
+
+            if (pkcs7->stream->expected > (pkcs7->stream->maxLen -
+                                pkcs7->stream->totalRd) + pkcs7->stream->length)
+                pkcs7->stream->expected = (pkcs7->stream->maxLen -
+                                pkcs7->stream->totalRd) + pkcs7->stream->length;
+
+            wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz,  0, 0);
+            wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE5);
+            FALL_THROUGH;
+
+        case WC_PKCS7_VERIFY_STAGE5:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+            wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length);
+            if (pkcs7->stream->flagOne) {
+                pkiMsg2 = pkiMsg;
+            }
+
+            /* restore content type */
+            contentType   = pkcs7->stream->nonce;
+            contentTypeSz = pkcs7->stream->nonceSz;
+
+            maxIdx = idx + pkcs7->stream->expected;
+            if (maxIdx > pkiMsg2Sz) {
+                ret = BUFFER_E;
+                break;
+            }
+            stateIdx = idx;
+        #endif
+
+            /* set contentType and size after init of PKCS7 structure */
+            if (ret == 0 && wc_PKCS7_SetContentType(pkcs7, contentType,
+                        contentTypeSz) < 0)
+                ret = ASN_PARSE_E;
+
+            /* Get the implicit[1] set of crls */
+            if (ret == 0 && idx >= maxIdx)
+                ret = BUFFER_E;
+
+            localIdx = idx;
+            if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, pkiMsg2Sz) == 0
+                    && tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+                idx++;
+                if (GetLength(pkiMsg2, &idx, &length, pkiMsg2Sz) < 0)
+                    ret = ASN_PARSE_E;
+
+                /* Skip the set */
+                idx += length;
+            }
+
+            /* Get the set of signerInfos */
+            if (ret == 0 && GetSet_ex(pkiMsg2, &idx, &length, maxIdx,
+                        NO_USER_CHECK) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret != 0)
+                break;
+        #ifndef NO_PKCS7_STREAM
+            if (!pkcs7->stream->flagOne) {
+                stateIdx = idx; /* didn't read any from internal buffer */
+            }
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+                break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length);
+
+            if (in2 && in2Sz > 0 && hashBuf && hashSz > 0) {
+                if (length > 0) {
+                    pkcs7->stream->expected = length;
+                }
+                else {
+                    pkcs7->stream->expected = 0;
+                }
+            }
+            else {
+                /* last state expect the reset of the buffer */
+                pkcs7->stream->expected = (pkcs7->stream->maxLen -
+                    pkcs7->stream->totalRd) + pkcs7->stream->length;
+            }
+
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE6);
+            FALL_THROUGH;
+
+        case WC_PKCS7_VERIFY_STAGE6:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length);
+            if (pkcs7->stream->flagOne) {
+                pkiMsg2 = pkiMsg;
+            }
+
+            /* restore content */
+            content   = pkcs7->stream->content;
+            contentSz = pkcs7->stream->contentSz;
+        #endif
+
+            ret = wc_PKCS7_ParseSignerInfo(pkcs7, pkiMsg2, pkiMsg2Sz, &idx,
+                    degenerate, &signedAttrib, &signedAttribSz);
+
+            /* parse out the signature if present and verify it */
+            if (ret == 0 && length > 0 && degenerate == 0) {
+                WOLFSSL_MSG("Parsing signature and verifying");
+                if (idx >= pkiMsg2Sz)
+                    ret = BUFFER_E;
+
+                /* Get the signature */
+                localIdx = idx;
+                if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag,
+                            pkiMsg2Sz) == 0 && tag == ASN_OCTET_STRING) {
+                    idx++;
+
+                    if (GetLength(pkiMsg2, &idx, &length, pkiMsg2Sz) < 0)
+                        ret = ASN_PARSE_E;
+
+                    /* save pointer and length */
+                    sig = &pkiMsg2[idx];
+                    sigSz = length;
+
+                    idx += length;
+                }
+
+                pkcs7->content = content;
+                pkcs7->contentSz = contentSz;
+
+                if (ret == 0) {
+                    ret = wc_PKCS7_SignedDataVerifySignature(pkcs7, sig, sigSz,
+                                                   signedAttrib, signedAttribSz,
+                                                   hashBuf, hashSz);
+                }
+            }
+
+            if (ret < 0)
+                break;
+
+            ret = 0; /* success */
+        #ifndef NO_PKCS7_STREAM
+            wc_PKCS7_ResetStream(pkcs7);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+            break;
+
+        default:
+            WOLFSSL_MSG("PKCS7 Unknown verify state");
+            ret = BAD_FUNC_ARG;
+    }
+
+    if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) {
+    #ifndef NO_PKCS7_STREAM
+        wc_PKCS7_ResetStream(pkcs7);
+    #endif
+        wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+    }
+    return ret;
+}
+
+
+/* Gets a copy of the SID parsed from signerInfo. This can be called after
+ * wc_PKCS7_VerifySignedData has been called. SID can be SKID in version 3 case
+ * or issuerAndSerialNumber.
+ *
+ * return 0 on success and LENGTH_ONLY_E if just setting "outSz" for buffer
+ *  length needed.
+ */
+int wc_PKCS7_GetSignerSID(PKCS7* pkcs7, byte* out, word32* outSz)
+{
+    if (outSz == NULL || pkcs7 == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (pkcs7->signerInfo == NULL) {
+        WOLFSSL_MSG("Either the bundle had no signers or"
+                "wc_PKCS7_VerifySignedData needs called yet");
+        return PKCS7_NO_SIGNER_E;
+    }
+
+    if (pkcs7->signerInfo->sidSz == 0) {
+        WOLFSSL_MSG("Bundle had no signer SID set");
+        return PKCS7_NO_SIGNER_E;
+    }
+
+    if (out == NULL) {
+        *outSz = pkcs7->signerInfo->sidSz;
+        return LENGTH_ONLY_E;
+    }
+
+    if (*outSz < pkcs7->signerInfo->sidSz) {
+        WOLFSSL_MSG("Buffer being passed in is not large enough for SKID");
+        return BUFFER_E;
+    }
+    XMEMCPY(out, pkcs7->signerInfo->sid, pkcs7->signerInfo->sidSz);
+    *outSz = pkcs7->signerInfo->sidSz;
+    return 0;
+}
+
+
+/* variant that allows computed data hash and header/foot,
+ * which is useful for large data signing */
+int wc_PKCS7_VerifySignedData_ex(PKCS7* pkcs7, const byte* hashBuf,
+    word32 hashSz, byte* pkiMsgHead, word32 pkiMsgHeadSz, byte* pkiMsgFoot,
+    word32 pkiMsgFootSz)
+{
+    return PKCS7_VerifySignedData(pkcs7, hashBuf, hashSz,
+        pkiMsgHead, pkiMsgHeadSz, pkiMsgFoot, pkiMsgFootSz);
+}
+
+int wc_PKCS7_VerifySignedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz)
+{
+    return PKCS7_VerifySignedData(pkcs7, NULL, 0, pkiMsg, pkiMsgSz, NULL, 0);
+}
+
+
+/* Generate random content encryption key, store into pkcs7->cek and
+ * pkcs7->cekSz.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * len   - length of key to be generated
+ *
+ * Returns 0 on success, negative upon error */
+static int PKCS7_GenerateContentEncryptionKey(PKCS7* pkcs7, word32 len)
+{
+    int ret;
+    WC_RNG rng;
+    byte* tmpKey;
+
+    if (pkcs7 == NULL || len == 0)
+        return BAD_FUNC_ARG;
+
+    /* if key already exists, don't need to re-generate */
+    if (pkcs7->cek != NULL && pkcs7->cekSz != 0) {
+
+        /* if key exists, but is different size, return error */
+        if (pkcs7->cekSz != len) {
+            WOLFSSL_MSG("Random content-encryption key size is inconsistent "
+                        "between CMS recipients");
+            return WC_KEY_SIZE_E;
+        }
+
+        return 0;
+    }
+
+    /* allocate space for cek */
+    tmpKey = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (tmpKey == NULL)
+        return MEMORY_E;
+
+    XMEMSET(tmpKey, 0, len);
+
+    ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+    if (ret != 0) {
+        XFREE(tmpKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    ret = wc_RNG_GenerateBlock(&rng, tmpKey, len);
+    if (ret != 0) {
+        wc_FreeRng(&rng);
+        XFREE(tmpKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* store into PKCS7, memory freed during final cleanup */
+    pkcs7->cek = tmpKey;
+    pkcs7->cekSz = len;
+
+    wc_FreeRng(&rng);
 
     return 0;
 }
 
 
-#ifdef HAVE_ECC
-
-/* KARI == KeyAgreeRecipientInfo (key agreement) */
-typedef struct WC_PKCS7_KARI {
-    DecodedCert* decoded;          /* decoded recip cert */
-    void*    heap;                 /* user heap, points to PKCS7->heap */
-    int      devId;                /* device ID for HW based private key */
-    ecc_key* recipKey;             /* recip key  (pub | priv) */
-    ecc_key* senderKey;            /* sender key (pub | priv) */
-    byte*    senderKeyExport;      /* sender ephemeral key DER */
-    byte*    kek;                  /* key encryption key */
-    byte*    ukm;                  /* OPTIONAL user keying material */
-    byte*    sharedInfo;           /* ECC-CMS-SharedInfo ASN.1 encoded blob */
-    word32   senderKeyExportSz;    /* size of sender ephemeral key DER */
-    word32   kekSz;                /* size of key encryption key */
-    word32   ukmSz;                /* size of user keying material */
-    word32   sharedInfoSz;         /* size of ECC-CMS-SharedInfo encoded */
-    byte     ukmOwner;             /* do we own ukm buffer? 1:yes, 0:no */
-    byte     direction;            /* WC_PKCS7_ENCODE | WC_PKCS7_DECODE */
-    byte     decodedInit : 1;      /* indicates decoded was initialized */
-    byte     recipKeyInit : 1;     /* indicates recipKey was initialized */
-    byte     senderKeyInit : 1;    /* indicates senderKey was initialized */
-} WC_PKCS7_KARI;
-
-
 /* wrap CEK (content encryption key) with KEK, 0 on success, < 0 on error */
-static int wc_PKCS7_KariKeyWrap(byte* cek, word32 cekSz, byte* kek,
-                                word32 kekSz, byte* out, word32 outSz,
-                                int keyWrapAlgo, int direction)
-{
-    int ret;
+static int wc_PKCS7_KeyWrap(byte* cek, word32 cekSz, byte* kek,
+                            word32 kekSz, byte* out, word32 outSz,
+                            int keyWrapAlgo, int direction)
+{
+    int ret = 0;
 
     if (cek == NULL || kek == NULL || out == NULL)
         return BAD_FUNC_ARG;
@@ -2217,7 +5154,6 @@
 
             if (ret <= 0)
                 return ret;
-
             break;
 #endif /* NO_AES */
 
@@ -2234,6 +5170,31 @@
 }
 
 
+#ifdef HAVE_ECC
+
+/* KARI == KeyAgreeRecipientInfo (key agreement) */
+typedef struct WC_PKCS7_KARI {
+    DecodedCert* decoded;          /* decoded recip cert */
+    void*    heap;                 /* user heap, points to PKCS7->heap */
+    int      devId;                /* device ID for HW based private key */
+    ecc_key* recipKey;             /* recip key  (pub | priv) */
+    ecc_key* senderKey;            /* sender key (pub | priv) */
+    byte*    senderKeyExport;      /* sender ephemeral key DER */
+    byte*    kek;                  /* key encryption key */
+    byte*    ukm;                  /* OPTIONAL user keying material */
+    byte*    sharedInfo;           /* ECC-CMS-SharedInfo ASN.1 encoded blob */
+    word32   senderKeyExportSz;    /* size of sender ephemeral key DER */
+    word32   kekSz;                /* size of key encryption key */
+    word32   ukmSz;                /* size of user keying material */
+    word32   sharedInfoSz;         /* size of ECC-CMS-SharedInfo encoded */
+    byte     ukmOwner;             /* do we own ukm buffer? 1:yes, 0:no */
+    byte     direction;            /* WC_PKCS7_ENCODE | WC_PKCS7_DECODE */
+    byte     decodedInit : 1;      /* indicates decoded was initialized */
+    byte     recipKeyInit : 1;     /* indicates recipKey was initialized */
+    byte     senderKeyInit : 1;    /* indicates senderKey was initialized */
+} WC_PKCS7_KARI;
+
+
 /* allocate and create new WC_PKCS7_KARI struct,
  * returns struct pointer on success, NULL on failure */
 static WC_PKCS7_KARI* wc_PKCS7_KariNew(PKCS7* pkcs7, byte direction)
@@ -2371,6 +5332,12 @@
     if (ret < 0)
         return ret;
 
+    /* only supports ECDSA for now */
+    if (kari->decoded->keyOID != ECDSAk) {
+        WOLFSSL_MSG("CMS KARI only supports ECDSA key types");
+        return BAD_FUNC_ARG;
+    }
+
     /* make sure subject key id was read from cert */
     if (kari->decoded->extSubjKeyIdSet == 0) {
         WOLFSSL_MSG("Failed to read subject key ID from recipient cert");
@@ -2418,13 +5385,13 @@
 /* create ephemeral ECC key, places ecc_key in kari->senderKey,
  * DER encoded in kari->senderKeyExport. return 0 on success,
  * negative on error */
-static int wc_PKCS7_KariGenerateEphemeralKey(WC_PKCS7_KARI* kari, WC_RNG* rng)
+static int wc_PKCS7_KariGenerateEphemeralKey(WC_PKCS7_KARI* kari)
 {
     int ret;
+    WC_RNG rng;
 
     if (kari == NULL || kari->decoded == NULL ||
-        kari->recipKey == NULL || kari->recipKey->dp == NULL ||
-        rng == NULL)
+        kari->recipKey == NULL || kari->recipKey->dp == NULL)
         return BAD_FUNC_ARG;
 
     kari->senderKeyExport = (byte*)XMALLOC(kari->decoded->pubKeySize,
@@ -2435,21 +5402,36 @@
     kari->senderKeyExportSz = kari->decoded->pubKeySize;
 
     ret = wc_ecc_init_ex(kari->senderKey, kari->heap, kari->devId);
-    if (ret != 0)
+    if (ret != 0) {
+        XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
+    }
 
     kari->senderKeyInit = 1;
 
-    ret = wc_ecc_make_key_ex(rng, kari->recipKey->dp->size,
+    ret = wc_InitRng_ex(&rng, kari->heap, kari->devId);
+    if (ret != 0) {
+        XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    ret = wc_ecc_make_key_ex(&rng, kari->recipKey->dp->size,
                              kari->senderKey, kari->recipKey->dp->id);
-    if (ret != 0)
+    if (ret != 0) {
+        XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
+        wc_FreeRng(&rng);
         return ret;
+    }
+
+    wc_FreeRng(&rng);
 
     /* dump generated key to X.963 DER for output in CMS bundle */
     ret = wc_ecc_export_x963(kari->senderKey, kari->senderKeyExport,
                              &kari->senderKeyExportSz);
-    if (ret != 0)
+    if (ret != 0) {
+        XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
+    }
 
     return 0;
 }
@@ -2652,16 +5634,24 @@
 }
 
 
-/* create ASN.1 formatted KeyAgreeRecipientInfo (kari) for use with ECDH,
- * return sequence size or negative on error */
-static int wc_CreateKeyAgreeRecipientInfo(PKCS7* pkcs7, const byte* cert,
-                            word32 certSz, int keyAgreeAlgo, int blockKeySz,
-                            int keyWrapAlgo, int keyEncAlgo, WC_RNG* rng,
-                            byte* contentKeyPlain, byte* contentKeyEnc,
-                            int* keyEncSz, byte* out, word32 outSz)
-{
-    int ret = 0, idx = 0;
+/* Encode and add CMS EnvelopedData KARI (KeyAgreeRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_KARI(PKCS7* pkcs7, const byte* cert, word32 certSz,
+                               int keyWrapOID, int keyAgreeOID, byte* ukm,
+                               word32 ukmSz, int options)
+{
+    Pkcs7EncodedRecip* recip;
+    Pkcs7EncodedRecip* lastRecip = NULL;
+    WC_PKCS7_KARI* kari = NULL;
+
+    word32 idx = 0;
+    word32 encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+
+    int ret = 0;
     int keySz, direction = 0;
+    int blockKeySz = 0;
 
     /* ASN.1 layout */
     int totalSz = 0;
@@ -2701,14 +5691,51 @@
     int encryptedKeyOctetSz = 0;
     byte encryptedKeyOctet[MAX_OCTET_STR_SZ];
 
-    WC_PKCS7_KARI* kari;
-
-    /* only supports ECDSA for now */
-    if (keyAgreeAlgo != ECDSAk)
-        return BAD_FUNC_ARG;
+#ifdef WOLFSSL_SMALL_STACK
+    byte* encryptedKey;
+
+    encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+                                  DYNAMIC_TYPE_TMP_BUFFER);
+    if (encryptedKey == NULL) {
+        return MEMORY_E;
+    }
+#else
+    byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+#endif
+
+    /* allocate and init memory for recipient */
+    recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap,
+                                 DYNAMIC_TYPE_PKCS7);
+    if (recip == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return MEMORY_E;
+    }
+    XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+    /* get key size for content-encryption key based on algorithm */
+    blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+    if (blockKeySz < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return blockKeySz;
+    }
+
+    /* generate random content encryption key, if needed */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+    if (ret < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
 
     /* set direction based on keyWrapAlgo */
-    switch (keyWrapAlgo) {
+    switch (keyWrapOID) {
 #ifndef NO_AES
     #ifdef WOLFSSL_AES_128
         case AES128_WRAP:
@@ -2724,17 +5751,26 @@
 #endif
         default:
             WOLFSSL_MSG("Unsupported key wrap algorithm");
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
             return BAD_KEYWRAP_ALG_E;
     }
 
     kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_ENCODE);
-    if (kari == NULL)
+    if (kari == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return MEMORY_E;
+    }
 
     /* set user keying material if available */
-    if ((pkcs7->ukmSz > 0) && (pkcs7->ukm != NULL)) {
-        kari->ukm = pkcs7->ukm;
-        kari->ukmSz = pkcs7->ukmSz;
+    if (ukmSz > 0 && ukm != NULL) {
+        kari->ukm = ukm;
+        kari->ukmSz = ukmSz;
         kari->ukmOwner = 0;
     }
 
@@ -2742,38 +5778,54 @@
     ret = wc_PKCS7_KariParseRecipCert(kari, cert, certSz, NULL, 0);
     if (ret != 0) {
         wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
     }
 
     /* generate sender ephemeral ECC key */
-    ret = wc_PKCS7_KariGenerateEphemeralKey(kari, rng);
+    ret = wc_PKCS7_KariGenerateEphemeralKey(kari);
     if (ret != 0) {
         wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
     }
 
     /* generate KEK (key encryption key) */
-    ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapAlgo, keyEncAlgo);
+    ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, keyAgreeOID);
     if (ret != 0) {
         wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
     }
 
     /* encrypt CEK with KEK */
-    keySz = wc_PKCS7_KariKeyWrap(contentKeyPlain, blockKeySz, kari->kek,
-                        kari->kekSz, contentKeyEnc, *keyEncSz, keyWrapAlgo,
-                        direction);
+    keySz = wc_PKCS7_KeyWrap(pkcs7->cek, pkcs7->cekSz, kari->kek,
+                             kari->kekSz, encryptedKey, encryptedKeySz,
+                             keyWrapOID, direction);
     if (keySz <= 0) {
         wc_PKCS7_KariFree(kari);
-        return ret;
-    }
-    *keyEncSz = (word32)keySz;
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return keySz;
+    }
+    encryptedKeySz = (word32)keySz;
 
     /* Start of RecipientEncryptedKeys */
 
     /* EncryptedKey */
-    encryptedKeyOctetSz = SetOctetString(*keyEncSz, encryptedKeyOctet);
-    totalSz += (encryptedKeyOctetSz + *keyEncSz);
+    encryptedKeyOctetSz = SetOctetString(encryptedKeySz, encryptedKeyOctet);
+    totalSz += (encryptedKeyOctetSz + encryptedKeySz);
 
     /* SubjectKeyIdentifier */
     subjKeyIdOctetSz = SetOctetString(KEYID_SIZE, subjKeyIdOctet);
@@ -2806,11 +5858,11 @@
     /* Start of KeyEncryptionAlgorithmIdentifier */
 
     /* KeyWrapAlgorithm */
-    keyWrapAlgSz = SetAlgoID(keyWrapAlgo, keyWrapAlg, oidKeyWrapType, 0);
+    keyWrapAlgSz = SetAlgoID(keyWrapOID, keyWrapAlg, oidKeyWrapType, 0);
     totalSz += keyWrapAlgSz;
 
     /* KeyEncryptionAlgorithmIdentifier */
-    keyEncryptAlgoIdSz = SetAlgoID(keyEncAlgo, keyEncryptAlgoId,
+    keyEncryptAlgoIdSz = SetAlgoID(keyAgreeOID, keyEncryptAlgoId,
                                    oidCmsKeyAgreeType, keyWrapAlgSz);
     totalSz += keyEncryptAlgoIdSz;
 
@@ -2823,8 +5875,11 @@
                                 origPubKeyStr + 1) + 2;
     totalSz += (origPubKeyStrSz + kari->senderKeyExportSz);
 
-    /* Originator AlgorithmIdentifier */
-    origAlgIdSz = SetAlgoID(ECDSAk, origAlgId, oidKeyType, 0);
+    /* Originator AlgorithmIdentifier, params set to NULL for interop
+       compatibility */
+    origAlgIdSz = SetAlgoID(ECDSAk, origAlgId, oidKeyType, 2);
+    origAlgId[origAlgIdSz++] = ASN_TAG_NULL;
+    origAlgId[origAlgIdSz++] = 0;
     totalSz += origAlgIdSz;
 
     /* outer OriginatorPublicKey IMPLICIT [1] */
@@ -2843,67 +5898,94 @@
     /* version, always 3 */
     verSz = SetMyVersion(3, ver, 0);
     totalSz += verSz;
+    recip->recipVersion = 3;
 
     /* outer IMPLICIT [1] kari */
     kariSeqSz = SetImplicit(ASN_SEQUENCE, 1, totalSz, kariSeq);
     totalSz += kariSeqSz;
 
-    if ((word32)totalSz > outSz) {
+    if (totalSz > MAX_RECIP_SZ) {
         WOLFSSL_MSG("KeyAgreeRecipientInfo output buffer too small");
         wc_PKCS7_KariFree(kari);
-
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return BUFFER_E;
     }
 
-    XMEMCPY(out + idx, kariSeq, kariSeqSz);
+    XMEMCPY(recip->recip + idx, kariSeq, kariSeqSz);
     idx += kariSeqSz;
-    XMEMCPY(out + idx, ver, verSz);
+    XMEMCPY(recip->recip + idx, ver, verSz);
     idx += verSz;
 
-    XMEMCPY(out + idx, origIdOrKeySeq, origIdOrKeySeqSz);
+    XMEMCPY(recip->recip + idx, origIdOrKeySeq, origIdOrKeySeqSz);
     idx += origIdOrKeySeqSz;
-    XMEMCPY(out + idx, origPubKeySeq, origPubKeySeqSz);
+    XMEMCPY(recip->recip + idx, origPubKeySeq, origPubKeySeqSz);
     idx += origPubKeySeqSz;
-    XMEMCPY(out + idx, origAlgId, origAlgIdSz);
+
+    /* AlgorithmIdentifier with NULL parameter */
+    XMEMCPY(recip->recip + idx, origAlgId, origAlgIdSz);
     idx += origAlgIdSz;
-    XMEMCPY(out + idx, origPubKeyStr, origPubKeyStrSz);
+
+    XMEMCPY(recip->recip + idx, origPubKeyStr, origPubKeyStrSz);
     idx += origPubKeyStrSz;
     /* ephemeral public key */
-    XMEMCPY(out + idx, kari->senderKeyExport, kari->senderKeyExportSz);
+    XMEMCPY(recip->recip + idx, kari->senderKeyExport, kari->senderKeyExportSz);
     idx += kari->senderKeyExportSz;
 
     if (kari->ukmSz > 0) {
-        XMEMCPY(out + idx, ukmExplicitSeq, ukmExplicitSz);
+        XMEMCPY(recip->recip + idx, ukmExplicitSeq, ukmExplicitSz);
         idx += ukmExplicitSz;
-        XMEMCPY(out + idx, ukmOctetStr, ukmOctetSz);
+        XMEMCPY(recip->recip + idx, ukmOctetStr, ukmOctetSz);
         idx += ukmOctetSz;
-        XMEMCPY(out + idx, kari->ukm, kari->ukmSz);
+        XMEMCPY(recip->recip + idx, kari->ukm, kari->ukmSz);
         idx += kari->ukmSz;
     }
 
-    XMEMCPY(out + idx, keyEncryptAlgoId, keyEncryptAlgoIdSz);
+    XMEMCPY(recip->recip + idx, keyEncryptAlgoId, keyEncryptAlgoIdSz);
     idx += keyEncryptAlgoIdSz;
-    XMEMCPY(out + idx, keyWrapAlg, keyWrapAlgSz);
+    XMEMCPY(recip->recip + idx, keyWrapAlg, keyWrapAlgSz);
     idx += keyWrapAlgSz;
 
-    XMEMCPY(out + idx, recipEncKeysSeq, recipEncKeysSeqSz);
+    XMEMCPY(recip->recip + idx, recipEncKeysSeq, recipEncKeysSeqSz);
     idx += recipEncKeysSeqSz;
-    XMEMCPY(out + idx, recipEncKeySeq, recipEncKeySeqSz);
+    XMEMCPY(recip->recip + idx, recipEncKeySeq, recipEncKeySeqSz);
     idx += recipEncKeySeqSz;
-    XMEMCPY(out + idx, recipKeyIdSeq, recipKeyIdSeqSz);
+    XMEMCPY(recip->recip + idx, recipKeyIdSeq, recipKeyIdSeqSz);
     idx += recipKeyIdSeqSz;
-    XMEMCPY(out + idx, subjKeyIdOctet, subjKeyIdOctetSz);
+    XMEMCPY(recip->recip + idx, subjKeyIdOctet, subjKeyIdOctetSz);
     idx += subjKeyIdOctetSz;
     /* subject key id */
-    XMEMCPY(out + idx, kari->decoded->extSubjKeyId, KEYID_SIZE);
+    XMEMCPY(recip->recip + idx, kari->decoded->extSubjKeyId, KEYID_SIZE);
     idx += KEYID_SIZE;
-    XMEMCPY(out + idx, encryptedKeyOctet, encryptedKeyOctetSz);
+    XMEMCPY(recip->recip + idx, encryptedKeyOctet, encryptedKeyOctetSz);
     idx += encryptedKeyOctetSz;
     /* encrypted CEK */
-    XMEMCPY(out + idx, contentKeyEnc, *keyEncSz);
-    idx += *keyEncSz;
+    XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+    idx += encryptedKeySz;
 
     wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    /* store recipient size */
+    recip->recipSz = idx;
+    recip->recipType = PKCS7_KARI;
+
+    /* add recipient to recip list */
+    if (pkcs7->recipList == NULL) {
+        pkcs7->recipList = recip;
+    } else {
+        lastRecip = pkcs7->recipList;
+        while (lastRecip->next != NULL) {
+            lastRecip = lastRecip->next;
+        }
+        lastRecip->next = recip;
+    }
+
+    (void)options;
 
     return idx;
 }
@@ -2912,18 +5994,25 @@
 
 #ifndef NO_RSA
 
-/* create ASN.1 formatted RecipientInfo structure, returns sequence size */
-static int wc_CreateRecipientInfo(const byte* cert, word32 certSz,
-                                  int keyEncAlgo, int blockKeySz,
-                                  WC_RNG* rng, byte* contentKeyPlain,
-                                  byte* contentKeyEnc, int* keyEncSz,
-                                  byte* out, word32 outSz, void* heap)
-{
+/* Encode and add CMS EnvelopedData KTRI (KeyTransRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_KTRI(PKCS7* pkcs7, const byte* cert, word32 certSz,
+                               int options)
+{
+    Pkcs7EncodedRecip* recip = NULL;
+    Pkcs7EncodedRecip* lastRecip = NULL;
+
+    WC_RNG rng;
     word32 idx = 0;
-    int ret = 0, totalSz = 0;
-    int verSz, issuerSz, snSz, keyEncAlgSz;
-    int issuerSeqSz, recipSeqSz, issuerSerialSeqSz;
+    word32 encryptedKeySz = 0;
+
+    int ret = 0, blockKeySz;
+    int verSz = 0, issuerSz = 0, snSz = 0, keyEncAlgSz = 0;
+    int issuerSeqSz = 0, recipSeqSz = 0, issuerSerialSeqSz = 0;
     int encKeyOctetStrSz;
+    int sidType;
 
     byte ver[MAX_VERSION_SZ];
     byte issuerSerialSeq[MAX_SEQ_SZ];
@@ -2931,213 +6020,396 @@
     byte issuerSeq[MAX_SEQ_SZ];
     byte encKeyOctetStr[MAX_OCTET_STR_SZ];
 
-#ifdef WOLFSSL_SMALL_STACK
-    byte *serial;
-    byte *keyAlgArray;
-
+    byte issuerSKIDSeq[MAX_SEQ_SZ];
+    byte issuerSKID[MAX_OCTET_STR_SZ];
+    word32 issuerSKIDSeqSz = 0, issuerSKIDSz = 0;
+
+#ifdef WOLFSSL_SMALL_STACK
+    byte*   serial;
+    byte*   keyAlgArray;
+    byte*   encryptedKey;
     RsaKey* pubKey;
     DecodedCert* decoded;
 
-    serial = (byte*)XMALLOC(MAX_SN_SZ, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    keyAlgArray = (byte*)XMALLOC(MAX_SN_SZ, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-
-    if (decoded == NULL || serial == NULL || keyAlgArray == NULL) {
-        if (serial)      XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        if (keyAlgArray) XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        if (decoded)     XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
+    serial = (byte*)XMALLOC(MAX_SN_SZ, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    keyAlgArray = (byte*)XMALLOC(MAX_SN_SZ, pkcs7->heap,
+                                 DYNAMIC_TYPE_TMP_BUFFER);
+    encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+                                  DYNAMIC_TYPE_TMP_BUFFER);
+    decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+                                    DYNAMIC_TYPE_TMP_BUFFER);
+
+    if (decoded == NULL || serial == NULL ||
+        encryptedKey == NULL || keyAlgArray == NULL) {
+        if (serial)
+            XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (keyAlgArray)
+            XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (encryptedKey)
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (decoded)
+            XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
         return MEMORY_E;
     }
-
 #else
     byte serial[MAX_SN_SZ];
     byte keyAlgArray[MAX_ALGO_SZ];
-
-    RsaKey stack_pubKey;
-    RsaKey* pubKey = &stack_pubKey;
-    DecodedCert stack_decoded;
-    DecodedCert* decoded = &stack_decoded;
-#endif
-
-    InitDecodedCert(decoded, (byte*)cert, certSz, heap);
+    byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+
+    RsaKey pubKey[1];
+    DecodedCert decoded[1];
+#endif
+
+    encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+    XMEMSET(encryptedKey, 0, encryptedKeySz);
+
+    /* default to IssuerAndSerialNumber if not set */
+    if (pkcs7->sidType != 0) {
+        sidType = pkcs7->sidType;
+    } else {
+        sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+    }
+
+    /* allow options to override SubjectIdentifier type if set */
+    if (options & CMS_SKID) {
+        sidType = CMS_SKID;
+    } else if (options & CMS_ISSUER_AND_SERIAL_NUMBER) {
+        sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+    }
+
+    /* allocate recipient struct */
+    recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap,
+                                 DYNAMIC_TYPE_PKCS7);
+    if (recip == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        return MEMORY_E;
+    }
+    XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+    /* get key size for content-encryption key based on algorithm */
+    blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+    if (blockKeySz < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return blockKeySz;
+    }
+
+    /* generate random content encryption key, if needed */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+    if (ret < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    InitDecodedCert(decoded, (byte*)cert, certSz, pkcs7->heap);
     ret = ParseCert(decoded, CA_TYPE, NO_VERIFY, 0);
     if (ret < 0) {
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
     }
 
-    /* version */
-    verSz = SetMyVersion(0, ver, 0);
-
-    /* IssuerAndSerialNumber */
-    if (decoded->issuerRaw == NULL || decoded->issuerRawLen == 0) {
-        WOLFSSL_MSG("DecodedCert lacks raw issuer pointer and length");
+    if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+
+        /* version, must be 0 for IssuerAndSerialNumber */
+        verSz = SetMyVersion(0, ver, 0);
+        recip->recipVersion = 0;
+
+        /* IssuerAndSerialNumber */
+        if (decoded->issuerRaw == NULL || decoded->issuerRawLen == 0) {
+            WOLFSSL_MSG("DecodedCert lacks raw issuer pointer and length");
+            FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return -1;
+        }
+        issuerSz    = decoded->issuerRawLen;
+        issuerSeqSz = SetSequence(issuerSz, issuerSeq);
+
+        if (decoded->serialSz == 0) {
+            WOLFSSL_MSG("DecodedCert missing serial number");
+            FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return -1;
+        }
+        snSz = SetSerialNumber(decoded->serial, decoded->serialSz, serial,
+                               MAX_SN_SZ, MAX_SN_SZ);
+
+        issuerSerialSeqSz = SetSequence(issuerSeqSz + issuerSz + snSz,
+                                        issuerSerialSeq);
+
+    } else if (sidType == CMS_SKID) {
+
+        /* version, must be 2 for SubjectKeyIdentifier */
+        verSz = SetMyVersion(2, ver, 0);
+        recip->recipVersion = 2;
+
+        issuerSKIDSz = SetOctetString(KEYID_SIZE, issuerSKID);
+        issuerSKIDSeqSz = SetExplicit(0, issuerSKIDSz + KEYID_SIZE,
+                                      issuerSKIDSeq);
+    } else {
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return -1;
-    }
-    issuerSz    = decoded->issuerRawLen;
-    issuerSeqSz = SetSequence(issuerSz, issuerSeq);
-
-    if (decoded->serialSz == 0) {
-        WOLFSSL_MSG("DecodedCert missing serial number");
-        FreeDecodedCert(decoded);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return -1;
-    }
-    snSz = SetSerialNumber(decoded->serial, decoded->serialSz, serial, MAX_SN_SZ);
-
-    issuerSerialSeqSz = SetSequence(issuerSeqSz + issuerSz + snSz,
-                                    issuerSerialSeq);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return PKCS7_RECIP_E;
+    }
+
+    pkcs7->publicKeyOID = decoded->keyOID;
 
     /* KeyEncryptionAlgorithmIdentifier, only support RSA now */
-    if (keyEncAlgo != RSAk) {
+    if (pkcs7->publicKeyOID != RSAk) {
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ALGO_ID_E;
     }
 
-    keyEncAlgSz = SetAlgoID(keyEncAlgo, keyAlgArray, oidKeyType, 0);
+    keyEncAlgSz = SetAlgoID(pkcs7->publicKeyOID, keyAlgArray, oidKeyType, 0);
     if (keyEncAlgSz == 0) {
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return BAD_FUNC_ARG;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
     if (pubKey == NULL) {
         FreeDecodedCert(decoded);
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return MEMORY_E;
     }
 #endif
 
     /* EncryptedKey */
-    ret = wc_InitRsaKey_ex(pubKey, heap, INVALID_DEVID);
+    ret = wc_InitRsaKey_ex(pubKey, pkcs7->heap, INVALID_DEVID);
     if (ret != 0) {
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(pubKey,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+        XFREE(pubKey,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
     }
 
     if (wc_RsaPublicKeyDecode(decoded->publicKey, &idx, pubKey,
-                           decoded->pubKeySize) < 0) {
+                              decoded->pubKeySize) < 0) {
         WOLFSSL_MSG("ASN RSA key decode error");
         wc_FreeRsaKey(pubKey);
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(pubKey,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+        XFREE(pubKey,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return PUBLIC_KEY_E;
     }
 
-    *keyEncSz = wc_RsaPublicEncrypt(contentKeyPlain, blockKeySz, contentKeyEnc,
-                                 MAX_ENCRYPTED_KEY_SZ, pubKey, rng);
+    ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+    if (ret != 0) {
+        wc_FreeRsaKey(pubKey);
+        FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+        XFREE(pubKey,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+
+
+    ret = wc_RsaPublicEncrypt(pkcs7->cek, pkcs7->cekSz, encryptedKey,
+                              encryptedKeySz, pubKey, &rng);
     wc_FreeRsaKey(pubKey);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(pubKey, heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    if (*keyEncSz < 0) {
+    wc_FreeRng(&rng);
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    if (ret < 0) {
         WOLFSSL_MSG("RSA Public Encrypt failed");
         FreeDecodedCert(decoded);
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return *keyEncSz;
-    }
-
-    encKeyOctetStrSz = SetOctetString(*keyEncSz, encKeyOctetStr);
+        XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+    encryptedKeySz = ret;
+
+    encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr);
 
     /* RecipientInfo */
-    recipSeqSz = SetSequence(verSz + issuerSerialSeqSz + issuerSeqSz +
-                             issuerSz + snSz + keyEncAlgSz + encKeyOctetStrSz +
-                             *keyEncSz, recipSeq);
-
-    if (recipSeqSz + verSz + issuerSerialSeqSz + issuerSeqSz + snSz +
-        keyEncAlgSz + encKeyOctetStrSz + *keyEncSz > (int)outSz) {
-        WOLFSSL_MSG("RecipientInfo output buffer too small");
-        FreeDecodedCert(decoded);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return BUFFER_E;
-    }
-
-    XMEMCPY(out + totalSz, recipSeq, recipSeqSz);
-    totalSz += recipSeqSz;
-    XMEMCPY(out + totalSz, ver, verSz);
-    totalSz += verSz;
-    XMEMCPY(out + totalSz, issuerSerialSeq, issuerSerialSeqSz);
-    totalSz += issuerSerialSeqSz;
-    XMEMCPY(out + totalSz, issuerSeq, issuerSeqSz);
-    totalSz += issuerSeqSz;
-    XMEMCPY(out + totalSz, decoded->issuerRaw, issuerSz);
-    totalSz += issuerSz;
-    XMEMCPY(out + totalSz, serial, snSz);
-    totalSz += snSz;
-    XMEMCPY(out + totalSz, keyAlgArray, keyEncAlgSz);
-    totalSz += keyEncAlgSz;
-    XMEMCPY(out + totalSz, encKeyOctetStr, encKeyOctetStrSz);
-    totalSz += encKeyOctetStrSz;
-    XMEMCPY(out + totalSz, contentKeyEnc, *keyEncSz);
-    totalSz += *keyEncSz;
+    if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+        recipSeqSz = SetSequence(verSz + issuerSerialSeqSz + issuerSeqSz +
+                                 issuerSz + snSz + keyEncAlgSz +
+                                 encKeyOctetStrSz + encryptedKeySz, recipSeq);
+
+        if (recipSeqSz + verSz + issuerSerialSeqSz + issuerSeqSz + snSz +
+            keyEncAlgSz + encKeyOctetStrSz + encryptedKeySz > MAX_RECIP_SZ) {
+            WOLFSSL_MSG("RecipientInfo output buffer too small");
+            FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return BUFFER_E;
+        }
+
+    } else {
+        recipSeqSz = SetSequence(verSz + issuerSKIDSeqSz + issuerSKIDSz +
+                                 KEYID_SIZE + keyEncAlgSz + encKeyOctetStrSz +
+                                 encryptedKeySz, recipSeq);
+
+        if (recipSeqSz + verSz + issuerSKIDSeqSz + issuerSKIDSz + KEYID_SIZE +
+            keyEncAlgSz + encKeyOctetStrSz + encryptedKeySz > MAX_RECIP_SZ) {
+            WOLFSSL_MSG("RecipientInfo output buffer too small");
+            FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+            XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+            XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return BUFFER_E;
+        }
+    }
+
+    idx = 0;
+    XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+    idx += recipSeqSz;
+    XMEMCPY(recip->recip + idx, ver, verSz);
+    idx += verSz;
+    if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+        XMEMCPY(recip->recip + idx, issuerSerialSeq, issuerSerialSeqSz);
+        idx += issuerSerialSeqSz;
+        XMEMCPY(recip->recip + idx, issuerSeq, issuerSeqSz);
+        idx += issuerSeqSz;
+        XMEMCPY(recip->recip + idx, decoded->issuerRaw, issuerSz);
+        idx += issuerSz;
+        XMEMCPY(recip->recip + idx, serial, snSz);
+        idx += snSz;
+    } else {
+        XMEMCPY(recip->recip + idx, issuerSKIDSeq, issuerSKIDSeqSz);
+        idx += issuerSKIDSeqSz;
+        XMEMCPY(recip->recip + idx, issuerSKID, issuerSKIDSz);
+        idx += issuerSKIDSz;
+        XMEMCPY(recip->recip + idx, pkcs7->issuerSubjKeyId, KEYID_SIZE);
+        idx += KEYID_SIZE;
+    }
+    XMEMCPY(recip->recip + idx, keyAlgArray, keyEncAlgSz);
+    idx += keyEncAlgSz;
+    XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz);
+    idx += encKeyOctetStrSz;
+    XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+    idx += encryptedKeySz;
 
     FreeDecodedCert(decoded);
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(serial,      heap, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(keyAlgArray, heap, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(decoded,     heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return totalSz;
-}
+    XFREE(serial,       pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(keyAlgArray,  pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(decoded,      pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+    /* store recipient size */
+    recip->recipSz = idx;
+    recip->recipType = PKCS7_KTRI;
+
+    /* add recipient to recip list */
+    if (pkcs7->recipList == NULL) {
+        pkcs7->recipList = recip;
+    } else {
+        lastRecip = pkcs7->recipList;
+        while (lastRecip->next != NULL) {
+            lastRecip = lastRecip->next;
+        }
+        lastRecip->next = recip;
+    }
+
+    return idx;
+}
+
 #endif /* !NO_RSA */
 
 
 /* encrypt content using encryptOID algo */
 static int wc_PKCS7_EncryptContent(int encryptOID, byte* key, int keySz,
-                                   byte* iv, int ivSz, byte* in, int inSz,
-                                   byte* out)
+                                   byte* iv, int ivSz, byte* aad, word32 aadSz,
+                                   byte* authTag, word32 authTagSz, byte* in,
+                                   int inSz, byte* out)
 {
     int ret;
 #ifndef NO_AES
@@ -3175,12 +6447,67 @@
                     (ivSz  != AES_BLOCK_SIZE) )
                 return BAD_FUNC_ARG;
 
-            ret = wc_AesSetKey(&aes, key, keySz, iv, AES_ENCRYPTION);
-            if (ret == 0)
-                ret = wc_AesCbcEncrypt(&aes, out, in, inSz);
-
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_AesSetKey(&aes, key, keySz, iv, AES_ENCRYPTION);
+                if (ret == 0)
+                    ret = wc_AesCbcEncrypt(&aes, out, in, inSz);
+                wc_AesFree(&aes);
+            }
             break;
-#endif
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+        case AES128GCMb:
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case AES192GCMb:
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case AES256GCMb:
+        #endif
+        #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+            defined(WOLFSSL_AES_256)
+            if (authTag == NULL)
+                return BAD_FUNC_ARG;
+
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_AesGcmSetKey(&aes, key, keySz);
+                if (ret == 0)
+                    ret = wc_AesGcmEncrypt(&aes, out, in, inSz, iv, ivSz,
+                                           authTag, authTagSz, aad, aadSz);
+                wc_AesFree(&aes);
+            }
+            break;
+        #endif
+    #endif /* HAVE_AESGCM */
+    #ifdef HAVE_AESCCM
+        #ifdef WOLFSSL_AES_128
+        case AES128CCMb:
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case AES192CCMb:
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case AES256CCMb:
+        #endif
+        #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+            defined(WOLFSSL_AES_256)
+            if (authTag == NULL)
+                return BAD_FUNC_ARG;
+
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_AesCcmSetKey(&aes, key, keySz);
+                if (ret == 0)
+                    ret = wc_AesCcmEncrypt(&aes, out, in, inSz, iv, ivSz,
+                                           authTag, authTagSz, aad, aadSz);
+                wc_AesFree(&aes);
+            }
+            break;
+        #endif
+    #endif /* HAVE_AESCCM */
+#endif /* NO_AES */
 #ifndef NO_DES3
         case DESb:
             if (keySz != DES_KEYLEN || ivSz != DES_BLOCK_SIZE)
@@ -3196,10 +6523,13 @@
             if (keySz != DES3_KEYLEN || ivSz != DES_BLOCK_SIZE)
                 return BAD_FUNC_ARG;
 
-            ret = wc_Des3_SetKey(&des3, key, iv, DES_ENCRYPTION);
-            if (ret == 0)
-                ret = wc_Des3_CbcEncrypt(&des3, out, in, inSz);
-
+            ret = wc_Des3Init(&des3, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_Des3_SetKey(&des3, key, iv, DES_ENCRYPTION);
+                if (ret == 0)
+                    ret = wc_Des3_CbcEncrypt(&des3, out, in, inSz);
+                wc_Des3Free(&des3);
+            }
             break;
 #endif
         default:
@@ -3207,14 +6537,21 @@
             return ALGO_ID_E;
     };
 
+#if defined(NO_AES) || (!defined(HAVE_AESGCM) && !defined(HAVE_AESCCM))
+    (void)authTag;
+    (void)authTagSz;
+    (void)aad;
+    (void)aadSz;
+#endif
     return ret;
 }
 
 
-/* decrypt content using encryptOID algo */
-static int wc_PKCS7_DecryptContent(int encryptOID, byte* key, int keySz,
-                                   byte* iv, int ivSz, byte* in, int inSz,
-                                   byte* out)
+/* decrypt content using encryptOID algo
+ * returns 0 on success */
+static int wc_PKCS7_DecryptContent(PKCS7* pkcs7, int encryptOID, byte* key,
+        int keySz, byte* iv, int ivSz, byte* aad, word32 aadSz, byte* authTag,
+        word32 authTagSz, byte* in, int inSz, byte* out)
 {
     int ret;
 #ifndef NO_AES
@@ -3225,7 +6562,16 @@
     Des3 des3;
 #endif
 
-    if (key == NULL || iv == NULL || in == NULL || out == NULL)
+    if (iv == NULL || in == NULL || out == NULL)
+        return BAD_FUNC_ARG;
+
+    if (pkcs7->decryptionCb != NULL) {
+        return pkcs7->decryptionCb(pkcs7, encryptOID, iv, ivSz,
+                                      aad, aadSz, authTag, authTagSz, in,
+                                      inSz, out, pkcs7->decryptionCtx);
+    }
+
+    if (key == NULL)
         return BAD_FUNC_ARG;
 
     switch (encryptOID) {
@@ -3251,13 +6597,67 @@
                 #endif
                     (ivSz  != AES_BLOCK_SIZE) )
                 return BAD_FUNC_ARG;
-
-            ret = wc_AesSetKey(&aes, key, keySz, iv, AES_DECRYPTION);
-            if (ret == 0)
-                ret = wc_AesCbcDecrypt(&aes, out, in, inSz);
-
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_AesSetKey(&aes, key, keySz, iv, AES_DECRYPTION);
+                if (ret == 0)
+                    ret = wc_AesCbcDecrypt(&aes, out, in, inSz);
+                wc_AesFree(&aes);
+            }
             break;
-#endif
+    #ifdef HAVE_AESGCM
+        #ifdef WOLFSSL_AES_128
+        case AES128GCMb:
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case AES192GCMb:
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case AES256GCMb:
+        #endif
+        #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+            defined(WOLFSSL_AES_256)
+            if (authTag == NULL)
+                return BAD_FUNC_ARG;
+
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_AesGcmSetKey(&aes, key, keySz);
+                if (ret == 0)
+                    ret = wc_AesGcmDecrypt(&aes, out, in, inSz, iv, ivSz,
+                                           authTag, authTagSz, aad, aadSz);
+                wc_AesFree(&aes);
+            }
+            break;
+        #endif
+    #endif /* HAVE_AESGCM */
+    #ifdef HAVE_AESCCM
+        #ifdef WOLFSSL_AES_128
+        case AES128CCMb:
+        #endif
+        #ifdef WOLFSSL_AES_192
+        case AES192CCMb:
+        #endif
+        #ifdef WOLFSSL_AES_256
+        case AES256CCMb:
+        #endif
+        #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+            defined(WOLFSSL_AES_256)
+            if (authTag == NULL)
+                return BAD_FUNC_ARG;
+
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_AesCcmSetKey(&aes, key, keySz);
+                if (ret == 0)
+                    ret = wc_AesCcmDecrypt(&aes, out, in, inSz, iv, ivSz,
+                                           authTag, authTagSz, aad, aadSz);
+                wc_AesFree(&aes);
+            }
+            break;
+        #endif
+    #endif /* HAVE_AESCCM */
+#endif /* NO_AES */
 #ifndef NO_DES3
         case DESb:
             if (keySz != DES_KEYLEN || ivSz != DES_BLOCK_SIZE)
@@ -3272,9 +6672,13 @@
             if (keySz != DES3_KEYLEN || ivSz != DES_BLOCK_SIZE)
                 return BAD_FUNC_ARG;
 
-            ret = wc_Des3_SetKey(&des3, key, iv, DES_DECRYPTION);
-            if (ret == 0)
-                ret = wc_Des3_CbcDecrypt(&des3, out, in, inSz);
+            ret = wc_Des3Init(&des3, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                ret = wc_Des3_SetKey(&des3, key, iv, DES_DECRYPTION);
+                if (ret == 0)
+                    ret = wc_Des3_CbcDecrypt(&des3, out, in, inSz);
+                wc_Des3Free(&des3);
+            }
 
             break;
 #endif
@@ -3283,17 +6687,26 @@
             return ALGO_ID_E;
     };
 
+#if defined(NO_AES) || (!defined(HAVE_AESGCM) && !defined(HAVE_AESCCM))
+    (void)authTag;
+    (void)authTagSz;
+    (void)aad;
+    (void)aadSz;
+#endif
+
     return ret;
 }
 
 
-/* generate random IV, place in iv, return 0 on success negative on error */
-static int wc_PKCS7_GenerateIV(PKCS7* pkcs7, WC_RNG* rng, byte* iv, word32 ivSz)
+/* Generate random block, place in out, return 0 on success negative on error.
+ * Used for generation of IV, nonce, etc */
+static int wc_PKCS7_GenerateBlock(PKCS7* pkcs7, WC_RNG* rng, byte* out,
+                                  word32 outSz)
 {
     int ret;
     WC_RNG* rnd = NULL;
 
-    if (iv == NULL || ivSz == 0)
+    if (out == NULL || outSz == 0)
         return BAD_FUNC_ARG;
 
     /* input RNG is optional, init local one if input rng is NULL */
@@ -3312,7 +6725,7 @@
         rnd = rng;
     }
 
-    ret = wc_RNG_GenerateBlock(rnd, iv, ivSz);
+    ret = wc_RNG_GenerateBlock(rnd, out, outSz);
 
     if (rng == NULL) {
         wc_FreeRng(rnd);
@@ -3323,6 +6736,59 @@
 }
 
 
+/* Set default SignerIdentifier type to be used. Is either
+ * IssuerAndSerialNumber or SubjectKeyIdentifier. Encoding defaults to using
+ * IssuerAndSerialNumber unless set with this function or explicitly
+ * overridden via options when adding RecipientInfo type.
+ *
+ * Using the type DEGENERATE_SID skips over signer information. In degenerate
+ * cases there are no signers.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * type  - either CMS_ISSUER_AND_SERIAL_NUMBER, CMS_SKID or DEGENERATE_SID
+ *
+ * return 0 on success, negative upon error */
+int wc_PKCS7_SetSignerIdentifierType(PKCS7* pkcs7, int type)
+{
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    if (type != CMS_ISSUER_AND_SERIAL_NUMBER &&
+        type != CMS_SKID &&
+        type != DEGENERATE_SID) {
+        return BAD_FUNC_ARG;
+    }
+
+    pkcs7->sidType = type;
+
+    return 0;
+}
+
+
+/* Set custom contentType, currently supported with SignedData type
+ *
+ * pkcs7       - pointer to initialized PKCS7 structure
+ * contentType - pointer to array with ASN.1 encoded OID value
+ * sz          - length of contentType array, octets
+ *
+ * return 0 on success, negative upon error */
+int wc_PKCS7_SetContentType(PKCS7* pkcs7, byte* contentType, word32 sz)
+{
+    if (pkcs7 == NULL || contentType == NULL || sz == 0)
+        return BAD_FUNC_ARG;
+
+    if (sz > MAX_OID_SZ) {
+        WOLFSSL_MSG("input array too large, bounded by MAX_OID_SZ");
+        return BAD_FUNC_ARG;
+    }
+
+    XMEMCPY(pkcs7->contentType, contentType, sz);
+    pkcs7->contentTypeSz = sz;
+
+    return 0;
+}
+
+
 /* return size of padded data, padded to blockSz chunks, or negative on error */
 int wc_PKCS7_GetPadSize(word32 inputSz, word32 blockSz)
 {
@@ -3363,38 +6829,887 @@
 }
 
 
+/* Encode and add CMS EnvelopedData ORI (OtherRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Return 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_ORI(PKCS7* pkcs7, CallbackOriEncrypt oriEncryptCb,
+                              int options)
+{
+    int oriTypeLenSz, blockKeySz, ret;
+    word32 idx, recipSeqSz;
+
+    Pkcs7EncodedRecip* recip = NULL;
+    Pkcs7EncodedRecip* lastRecip = NULL;
+
+    byte recipSeq[MAX_SEQ_SZ];
+    byte oriTypeLen[MAX_LENGTH_SZ];
+
+    byte oriType[MAX_ORI_TYPE_SZ];
+    byte oriValue[MAX_ORI_VALUE_SZ];
+    word32 oriTypeSz = MAX_ORI_TYPE_SZ;
+    word32 oriValueSz = MAX_ORI_VALUE_SZ;
+
+    if (pkcs7 == NULL || oriEncryptCb == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* allocate memory for RecipientInfo, KEK, encrypted key */
+    recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip),
+                                        pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (recip == NULL)
+        return MEMORY_E;
+    XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+    /* get key size for content-encryption key based on algorithm */
+    blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+    if (blockKeySz < 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return blockKeySz;
+    }
+
+    /* generate random content encryption key, if needed */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+    if (ret < 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* call user callback to encrypt CEK and get oriType and oriValue
+       values back */
+    ret = oriEncryptCb(pkcs7, pkcs7->cek, pkcs7->cekSz, oriType, &oriTypeSz,
+                       oriValue, &oriValueSz, pkcs7->oriEncryptCtx);
+    if (ret != 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    oriTypeLenSz = SetLength(oriTypeSz, oriTypeLen);
+
+    recipSeqSz = SetImplicit(ASN_SEQUENCE, 4, 1 + oriTypeLenSz + oriTypeSz +
+                             oriValueSz, recipSeq);
+
+    idx = 0;
+    XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+    idx += recipSeqSz;
+    /* oriType */
+    recip->recip[idx] = ASN_OBJECT_ID;
+    idx += 1;
+    XMEMCPY(recip->recip + idx, oriTypeLen, oriTypeLenSz);
+    idx += oriTypeLenSz;
+    XMEMCPY(recip->recip + idx, oriType, oriTypeSz);
+    idx += oriTypeSz;
+    /* oriValue, input MUST already be ASN.1 encoded */
+    XMEMCPY(recip->recip + idx, oriValue, oriValueSz);
+    idx += oriValueSz;
+
+    /* store recipient size */
+    recip->recipSz = idx;
+    recip->recipType = PKCS7_ORI;
+    recip->recipVersion = 4;
+
+    /* add recipient to recip list */
+    if (pkcs7->recipList == NULL) {
+        pkcs7->recipList = recip;
+    } else {
+        lastRecip = pkcs7->recipList;
+        while (lastRecip->next != NULL) {
+            lastRecip = lastRecip->next;
+        }
+        lastRecip->next = recip;
+    }
+
+    (void)options;
+
+    return idx;
+}
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+
+
+static int wc_PKCS7_GenerateKEK_PWRI(PKCS7* pkcs7, byte* passwd, word32 pLen,
+                                     byte* salt, word32 saltSz, int kdfOID,
+                                     int prfOID, int iterations, byte* out,
+                                     word32 outSz)
+{
+    int ret;
+
+    if (pkcs7 == NULL || passwd == NULL || salt == NULL || out == NULL)
+        return BAD_FUNC_ARG;
+
+    switch (kdfOID) {
+
+        case PBKDF2_OID:
+
+            ret = wc_PBKDF2(out, passwd, pLen, salt, saltSz, iterations,
+                            outSz, prfOID);
+            if (ret != 0) {
+                return ret;
+            }
+
+            break;
+
+        default:
+            WOLFSSL_MSG("Unsupported KDF OID");
+            return PKCS7_OID_E;
+    }
+
+    return 0;
+}
+
+
+/* RFC3211 (Section 2.3.1) key wrap algorithm (id-alg-PWRI-KEK).
+ *
+ * Returns output size on success, negative upon error */
+static int wc_PKCS7_PwriKek_KeyWrap(PKCS7* pkcs7, const byte* kek, word32 kekSz,
+                                    const byte* cek, word32 cekSz,
+                                    byte* out, word32 *outSz,
+                                    const byte* iv, word32 ivSz, int algID)
+{
+    WC_RNG rng;
+    int blockSz, outLen, ret;
+    word32 padSz;
+    byte* lastBlock;
+
+    if (kek == NULL || cek == NULL || iv == NULL || outSz == NULL)
+        return BAD_FUNC_ARG;
+
+    /* get encryption algorithm block size */
+    blockSz = wc_PKCS7_GetOIDBlockSize(algID);
+    if (blockSz < 0)
+        return blockSz;
+
+    /* get pad bytes needed to block boundary */
+    padSz = blockSz - ((4 + cekSz) % blockSz);
+    outLen = 4 + cekSz + padSz;
+
+    /* must be at least two blocks long */
+    if (outLen < 2 * blockSz)
+        padSz += blockSz;
+
+    /* if user set out to NULL, give back required length */
+    if (out == NULL) {
+        *outSz = outLen;
+        return LENGTH_ONLY_E;
+    }
+
+    /* verify output buffer is large enough */
+    if (*outSz < (word32)outLen)
+        return BUFFER_E;
+
+    out[0] = cekSz;
+    out[1] = ~cek[0];
+    out[2] = ~cek[1];
+    out[3] = ~cek[2];
+    XMEMCPY(out + 4, cek, cekSz);
+
+    /* random padding of size padSz */
+    ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+    if (ret != 0)
+        return ret;
+
+    ret = wc_RNG_GenerateBlock(&rng, out + 4 + cekSz, padSz);
+
+    if (ret == 0) {
+        /* encrypt, normal */
+        ret = wc_PKCS7_EncryptContent(algID, (byte*)kek, kekSz, (byte*)iv,
+                                      ivSz, NULL, 0, NULL, 0, out, outLen, out);
+    }
+
+    if (ret == 0) {
+        /* encrypt again, using last ciphertext block as IV */
+        lastBlock = out + (((outLen / blockSz) - 1) * blockSz);
+        ret = wc_PKCS7_EncryptContent(algID, (byte*)kek, kekSz, lastBlock,
+                                      blockSz, NULL, 0, NULL, 0, out,
+                                      outLen, out);
+    }
+
+    if (ret == 0) {
+        *outSz = outLen;
+    } else {
+        outLen = ret;
+    }
+
+    wc_FreeRng(&rng);
+
+    return outLen;
+}
+
+
+/* RFC3211 (Section 2.3.2) key unwrap algorithm (id-alg-PWRI-KEK).
+ *
+ * Returns cek size on success, negative upon error */
+static int wc_PKCS7_PwriKek_KeyUnWrap(PKCS7* pkcs7, const byte* kek,
+                                      word32 kekSz, const byte* in, word32 inSz,
+                                      byte* out, word32 outSz, const byte* iv,
+                                      word32 ivSz, int algID)
+{
+    int blockSz, cekLen, ret;
+    byte* tmpIv     = NULL;
+    byte* lastBlock = NULL;
+    byte* outTmp    = NULL;
+
+    if (pkcs7 == NULL || kek == NULL || in == NULL ||
+        out == NULL || iv == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    outTmp = (byte*)XMALLOC(inSz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (outTmp == NULL)
+        return MEMORY_E;
+
+    /* get encryption algorithm block size */
+    blockSz = wc_PKCS7_GetOIDBlockSize(algID);
+    if (blockSz < 0) {
+        XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return blockSz;
+    }
+
+    /* input needs to be blockSz multiple and at least 2 * blockSz */
+    if (((inSz % blockSz) != 0) || (inSz < (2 * (word32)blockSz))) {
+        WOLFSSL_MSG("PWRI-KEK unwrap input must of block size and >= 2 "
+                    "times block size");
+        XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return BAD_FUNC_ARG;
+    }
+
+    /* use block out[n-1] as IV to decrypt block out[n] */
+    lastBlock = (byte*)in + inSz - blockSz;
+    tmpIv = lastBlock - blockSz;
+
+    /* decrypt last block */
+    ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz, tmpIv,
+            blockSz, NULL, 0, NULL, 0, lastBlock, blockSz,
+            outTmp + inSz - blockSz);
+
+    if (ret == 0) {
+        /* using last decrypted block as IV, decrypt [0 ... n-1] blocks */
+        lastBlock = outTmp + inSz - blockSz;
+        ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz,
+                lastBlock, blockSz, NULL, 0, NULL, 0, (byte*)in, inSz - blockSz,
+                outTmp);
+    }
+
+    if (ret == 0) {
+        /* decrypt using original kek and iv */
+        ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz,
+                (byte*)iv, ivSz, NULL, 0, NULL, 0, outTmp, inSz, outTmp);
+    }
+
+    if (ret != 0) {
+        ForceZero(outTmp, inSz);
+        XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return ret;
+    }
+
+    cekLen = outTmp[0];
+
+    /* verify length */
+    if ((word32)cekLen > inSz) {
+        ForceZero(outTmp, inSz);
+        XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return BAD_FUNC_ARG;
+    }
+
+    /* verify check bytes */
+    if ((outTmp[1] ^ outTmp[4]) != 0xFF ||
+        (outTmp[2] ^ outTmp[5]) != 0xFF ||
+        (outTmp[3] ^ outTmp[6]) != 0xFF) {
+        ForceZero(outTmp, inSz);
+        XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return BAD_FUNC_ARG;
+    }
+
+    if (outSz < (word32)cekLen) {
+        ForceZero(outTmp, inSz);
+        XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        return BUFFER_E;
+    }
+
+    XMEMCPY(out, outTmp + 4, outTmp[0]);
+    ForceZero(outTmp, inSz);
+    XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return cekLen;
+}
+
+
+/* Encode and add CMS EnvelopedData PWRI (PasswordRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Return 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_PWRI(PKCS7* pkcs7, byte* passwd, word32 pLen,
+                               byte* salt, word32 saltSz, int kdfOID,
+                               int hashOID, int iterations, int kekEncryptOID,
+                               int options)
+{
+    Pkcs7EncodedRecip* recip = NULL;
+    Pkcs7EncodedRecip* lastRecip = NULL;
+
+    /* PasswordRecipientInfo */
+    byte recipSeq[MAX_SEQ_SZ];
+    byte ver[MAX_VERSION_SZ];
+    word32 recipSeqSz, verSz;
+
+    /* KeyDerivationAlgorithmIdentifier */
+    byte kdfAlgoIdSeq[MAX_SEQ_SZ];
+    byte kdfAlgoId[MAX_OID_SZ];
+    byte kdfParamsSeq[MAX_SEQ_SZ];              /* PBKDF2-params */
+    byte kdfSaltOctetStr[MAX_OCTET_STR_SZ];     /* salt OCTET STRING */
+    byte kdfIterations[MAX_VERSION_SZ];
+    word32 kdfAlgoIdSeqSz, kdfAlgoIdSz;
+    word32 kdfParamsSeqSz, kdfSaltOctetStrSz, kdfIterationsSz;
+    /* OPTIONAL: keyLength, not supported yet */
+    /* OPTIONAL: prf AlgorithIdentifier, not supported yet */
+
+    /* KeyEncryptionAlgorithmIdentifier */
+    byte keyEncAlgoIdSeq[MAX_SEQ_SZ];
+    byte keyEncAlgoId[MAX_OID_SZ];              /* id-alg-PWRI-KEK */
+    byte pwriEncAlgoId[MAX_ALGO_SZ];
+    byte ivOctetString[MAX_OCTET_STR_SZ];
+    word32 keyEncAlgoIdSeqSz, keyEncAlgoIdSz;
+    word32 pwriEncAlgoIdSz, ivOctetStringSz;
+
+    /* EncryptedKey */
+    byte encKeyOctetStr[MAX_OCTET_STR_SZ];
+    word32 encKeyOctetStrSz;
+
+    byte tmpIv[MAX_CONTENT_IV_SIZE];
+    byte* encryptedKey = NULL;
+    byte* kek = NULL;
+
+    int cekKeySz = 0, kekKeySz = 0, kekBlockSz = 0, ret = 0;
+    int encryptOID;
+    word32 idx, totalSz = 0, encryptedKeySz;
+
+    if (pkcs7 == NULL || passwd == NULL || pLen == 0 ||
+        salt == NULL || saltSz == 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* allow user to use different KEK encryption algorithm than used for
+     * main content encryption algorithm, if passed in */
+    if (kekEncryptOID != 0) {
+        encryptOID = kekEncryptOID;
+    } else {
+        encryptOID = pkcs7->encryptOID;
+    }
+
+    /* get content-encryption key size, based on algorithm */
+    cekKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+    if (cekKeySz < 0)
+        return cekKeySz;
+
+    /* get KEK encryption key size, based on algorithm */
+    if (encryptOID != pkcs7->encryptOID) {
+        kekKeySz = wc_PKCS7_GetOIDKeySize(encryptOID);
+    } else {
+        kekKeySz = cekKeySz;
+    }
+
+    /* get KEK encryption block size */
+    kekBlockSz = wc_PKCS7_GetOIDBlockSize(encryptOID);
+    if (kekBlockSz < 0)
+        return kekBlockSz;
+
+    /* generate random CEK */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, cekKeySz);
+    if (ret < 0)
+        return ret;
+
+    /* generate random IV */
+    ret = wc_PKCS7_GenerateBlock(pkcs7, NULL, tmpIv, kekBlockSz);
+    if (ret != 0)
+        return ret;
+
+    /* allocate memory for RecipientInfo, KEK, encrypted key */
+    recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip),
+                                        pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (recip == NULL)
+        return MEMORY_E;
+
+    kek = (byte*)XMALLOC(kekKeySz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (kek == NULL) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+
+    encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ,
+                                  pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (encryptedKey == NULL) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+
+    encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+    XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+    XMEMSET(kek, 0, kekKeySz);
+    XMEMSET(encryptedKey, 0, encryptedKeySz);
+
+    /* generate KEK: expand password into KEK */
+    ret = wc_PKCS7_GenerateKEK_PWRI(pkcs7, passwd, pLen, salt, saltSz,
+                                    kdfOID, hashOID, iterations, kek,
+                                    kekKeySz);
+    if (ret < 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* generate encrypted key: encrypt CEK with KEK */
+    ret = wc_PKCS7_PwriKek_KeyWrap(pkcs7, kek, kekKeySz, pkcs7->cek,
+                                   pkcs7->cekSz, encryptedKey, &encryptedKeySz,
+                                   tmpIv, kekBlockSz, encryptOID);
+    if (ret < 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+    encryptedKeySz = ret;
+
+    /* put together encrypted key OCTET STRING */
+    encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr);
+    totalSz += (encKeyOctetStrSz + encryptedKeySz);
+
+    /* put together IV OCTET STRING */
+    ivOctetStringSz = SetOctetString(kekBlockSz, ivOctetString);
+    totalSz += (ivOctetStringSz + kekBlockSz);
+
+    /* set PWRIAlgorithms AlgorithmIdentifier, adding (ivOctetStringSz +
+       blockKeySz) for IV OCTET STRING */
+    pwriEncAlgoIdSz = SetAlgoID(encryptOID, pwriEncAlgoId,
+                                oidBlkType, ivOctetStringSz + kekBlockSz);
+    totalSz += pwriEncAlgoIdSz;
+
+    /* set KeyEncryptionAlgorithms OID */
+    ret = wc_SetContentType(PWRI_KEK_WRAP, keyEncAlgoId, sizeof(keyEncAlgoId));
+    if (ret <= 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+    keyEncAlgoIdSz = ret;
+    totalSz += keyEncAlgoIdSz;
+
+    /* KeyEncryptionAlgorithm SEQ */
+    keyEncAlgoIdSeqSz = SetSequence(keyEncAlgoIdSz + pwriEncAlgoIdSz +
+                                    ivOctetStringSz + kekBlockSz,
+                                    keyEncAlgoIdSeq);
+    totalSz += keyEncAlgoIdSeqSz;
+
+    /* set KDF salt */
+    kdfSaltOctetStrSz = SetOctetString(saltSz, kdfSaltOctetStr);
+    totalSz += (kdfSaltOctetStrSz + saltSz);
+
+    /* set KDF iteration count */
+    kdfIterationsSz = SetMyVersion(iterations, kdfIterations, 0);
+    totalSz += kdfIterationsSz;
+
+    /* set KDF params SEQ */
+    kdfParamsSeqSz = SetSequence(kdfSaltOctetStrSz + saltSz + kdfIterationsSz,
+                                 kdfParamsSeq);
+    totalSz += kdfParamsSeqSz;
+
+    /* set KDF algo OID */
+    ret = wc_SetContentType(kdfOID, kdfAlgoId, sizeof(kdfAlgoId));
+    if (ret <= 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+    kdfAlgoIdSz = ret;
+    totalSz += kdfAlgoIdSz;
+
+    /* set KeyDerivationAlgorithmIdentifier EXPLICIT [0] SEQ */
+    kdfAlgoIdSeqSz = SetExplicit(0, kdfAlgoIdSz + kdfParamsSeqSz +
+                                 kdfSaltOctetStrSz + saltSz + kdfIterationsSz,
+                                 kdfAlgoIdSeq);
+    totalSz += kdfAlgoIdSeqSz;
+
+    /* set PasswordRecipientInfo CMSVersion, MUST be 0 */
+    verSz = SetMyVersion(0, ver, 0);
+    totalSz += verSz;
+    recip->recipVersion = 0;
+
+    /* set PasswordRecipientInfo SEQ */
+    recipSeqSz = SetImplicit(ASN_SEQUENCE, 3, totalSz, recipSeq);
+    totalSz += recipSeqSz;
+
+    if (totalSz > MAX_RECIP_SZ) {
+        WOLFSSL_MSG("CMS Recipient output buffer too small");
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return BUFFER_E;
+    }
+
+    idx = 0;
+    XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+    idx += recipSeqSz;
+    XMEMCPY(recip->recip + idx, ver, verSz);
+    idx += verSz;
+    XMEMCPY(recip->recip + idx, kdfAlgoIdSeq, kdfAlgoIdSeqSz);
+    idx += kdfAlgoIdSeqSz;
+    XMEMCPY(recip->recip + idx, kdfAlgoId, kdfAlgoIdSz);
+    idx += kdfAlgoIdSz;
+    XMEMCPY(recip->recip + idx, kdfParamsSeq, kdfParamsSeqSz);
+    idx += kdfParamsSeqSz;
+    XMEMCPY(recip->recip + idx, kdfSaltOctetStr, kdfSaltOctetStrSz);
+    idx += kdfSaltOctetStrSz;
+    XMEMCPY(recip->recip + idx, salt, saltSz);
+    idx += saltSz;
+    XMEMCPY(recip->recip + idx, kdfIterations, kdfIterationsSz);
+    idx += kdfIterationsSz;
+    XMEMCPY(recip->recip + idx, keyEncAlgoIdSeq, keyEncAlgoIdSeqSz);
+    idx += keyEncAlgoIdSeqSz;
+    XMEMCPY(recip->recip + idx, keyEncAlgoId, keyEncAlgoIdSz);
+    idx += keyEncAlgoIdSz;
+    XMEMCPY(recip->recip + idx, pwriEncAlgoId, pwriEncAlgoIdSz);
+    idx += pwriEncAlgoIdSz;
+    XMEMCPY(recip->recip + idx, ivOctetString, ivOctetStringSz);
+    idx += ivOctetStringSz;
+    XMEMCPY(recip->recip + idx, tmpIv, kekBlockSz);
+    idx += kekBlockSz;
+    XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz);
+    idx += encKeyOctetStrSz;
+    XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+    idx += encryptedKeySz;
+
+    ForceZero(kek, kekBlockSz);
+    ForceZero(encryptedKey, encryptedKeySz);
+    XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+    /* store recipient size */
+    recip->recipSz = idx;
+    recip->recipType = PKCS7_PWRI;
+
+    /* add recipient to recip list */
+    if (pkcs7->recipList == NULL) {
+        pkcs7->recipList = recip;
+    } else {
+        lastRecip = pkcs7->recipList;
+        while (lastRecip->next != NULL) {
+            lastRecip = lastRecip->next;
+        }
+        lastRecip->next = recip;
+    }
+
+    (void)options;
+
+    return idx;
+}
+
+/* Import password and KDF settings into a PKCS7 structure. Used for setting
+ * the password info for decryption a EnvelopedData PWRI RecipientInfo.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_SetPassword(PKCS7* pkcs7, byte* passwd, word32 pLen)
+{
+    if (pkcs7 == NULL || passwd == NULL || pLen == 0)
+        return BAD_FUNC_ARG;
+
+    pkcs7->pass = passwd;
+    pkcs7->passSz = pLen;
+
+    return 0;
+}
+
+#endif /* NO_PWDBASED */
+
+
+/* Encode and add CMS EnvelopedData KEKRI (KEKRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * keyWrapOID - OID sum of key wrap algorithm identifier
+ * kek        - key encryption key
+ * kekSz      - size of kek, bytes
+ * keyID      - key-encryption key identifier, pre-distributed to endpoints
+ * keyIDSz    - size of keyID, bytes
+ * timePtr    - pointer to "time_t", which is typically "long" (OPTIONAL)
+ * otherOID   - ASN.1 encoded OID of other attribute (OPTIONAL)
+ * otherOIDSz - size of otherOID, bytes (OPTIONAL)
+ * other      - other attribute (OPTIONAL)
+ * otherSz    - size of other (OPTIONAL)
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_KEKRI(PKCS7* pkcs7, int keyWrapOID, byte* kek,
+                                word32 kekSz, byte* keyId, word32 keyIdSz,
+                                void* timePtr, byte* otherOID,
+                                word32 otherOIDSz, byte* other, word32 otherSz,
+                                int options)
+{
+    Pkcs7EncodedRecip* recip = NULL;
+    Pkcs7EncodedRecip* lastRecip = NULL;
+
+    byte recipSeq[MAX_SEQ_SZ];
+    byte ver[MAX_VERSION_SZ];
+    byte kekIdSeq[MAX_SEQ_SZ];
+    byte kekIdOctetStr[MAX_OCTET_STR_SZ];
+    byte genTime[ASN_GENERALIZED_TIME_SIZE];
+    byte otherAttSeq[MAX_SEQ_SZ];
+    byte encAlgoId[MAX_ALGO_SZ];
+    byte encKeyOctetStr[MAX_OCTET_STR_SZ];
+#ifdef WOLFSSL_SMALL_STACK
+    byte* encryptedKey;
+#else
+    byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+#endif
+
+    int blockKeySz = 0, ret = 0, direction;
+    word32 idx = 0;
+    word32 totalSz = 0;
+    word32 recipSeqSz = 0, verSz = 0;
+    word32 kekIdSeqSz = 0, kekIdOctetStrSz = 0;
+    word32 otherAttSeqSz = 0, encAlgoIdSz = 0, encKeyOctetStrSz = 0;
+    int encryptedKeySz;
+
+    int timeSz = 0;
+#ifndef NO_ASN_TIME
+    time_t* tm = NULL;
+#endif
+
+    if (pkcs7 == NULL || kek == NULL || keyId == NULL)
+        return BAD_FUNC_ARG;
+
+    recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap,
+                                 DYNAMIC_TYPE_PKCS7);
+    if (recip == NULL)
+        return MEMORY_E;
+
+    XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+    /* get key size for content-encryption key based on algorithm */
+    blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+    if (blockKeySz < 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return blockKeySz;
+    }
+
+    /* generate random content encryption key, if needed */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+    if (ret < 0) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* EncryptedKey */
+#ifdef WOLFSSL_SMALL_STACK
+    encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+                                  DYNAMIC_TYPE_PKCS7);
+    if (encryptedKey == NULL) {
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+#endif
+    encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+    XMEMSET(encryptedKey, 0, encryptedKeySz);
+
+    #ifndef NO_AES
+        direction = AES_ENCRYPTION;
+    #else
+        direction = DES_ENCRYPTION;
+    #endif
+
+    encryptedKeySz = wc_PKCS7_KeyWrap(pkcs7->cek, pkcs7->cekSz, kek, kekSz,
+                                      encryptedKey, encryptedKeySz, keyWrapOID,
+                                      direction);
+    if (encryptedKeySz < 0) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    #endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return encryptedKeySz;
+    }
+    /* handle a zero size encKey case as WC_KEY_SIZE_E */
+    if (encryptedKeySz == 0 || encryptedKeySz > MAX_ENCRYPTED_KEY_SZ) {
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    #endif
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return WC_KEY_SIZE_E;
+    }
+
+    encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr);
+    totalSz += (encKeyOctetStrSz + encryptedKeySz);
+
+    /* KeyEncryptionAlgorithmIdentifier */
+    encAlgoIdSz = SetAlgoID(keyWrapOID, encAlgoId, oidKeyWrapType, 0);
+    totalSz += encAlgoIdSz;
+
+    /* KEKIdentifier: keyIdentifier */
+    kekIdOctetStrSz = SetOctetString(keyIdSz, kekIdOctetStr);
+    totalSz += (kekIdOctetStrSz + keyIdSz);
+
+    /* KEKIdentifier: GeneralizedTime (OPTIONAL) */
+#ifndef NO_ASN_TIME
+    if (timePtr != NULL) {
+        tm = (time_t*)timePtr;
+        timeSz = GetAsnTimeString(tm, genTime, sizeof(genTime));
+        if (timeSz < 0) {
+            XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        #endif
+            return timeSz;
+        }
+        totalSz += timeSz;
+    }
+#endif
+
+    /* KEKIdentifier: OtherKeyAttribute SEQ (OPTIONAL) */
+    if (other != NULL && otherSz > 0) {
+        otherAttSeqSz = SetSequence(otherOIDSz + otherSz, otherAttSeq);
+        totalSz += otherAttSeqSz + otherOIDSz + otherSz;
+    }
+
+    /* KEKIdentifier SEQ */
+    kekIdSeqSz = SetSequence(kekIdOctetStrSz + keyIdSz + timeSz +
+                             otherAttSeqSz + otherOIDSz + otherSz, kekIdSeq);
+    totalSz += kekIdSeqSz;
+
+    /* version */
+    verSz = SetMyVersion(4, ver, 0);
+    totalSz += verSz;
+    recip->recipVersion = 4;
+
+    /* KEKRecipientInfo SEQ */
+    recipSeqSz = SetImplicit(ASN_SEQUENCE, 2, totalSz, recipSeq);
+    totalSz += recipSeqSz;
+
+    if (totalSz > MAX_RECIP_SZ) {
+        WOLFSSL_MSG("CMS Recipient output buffer too small");
+        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    #endif
+        return BUFFER_E;
+    }
+
+    XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+    idx += recipSeqSz;
+    XMEMCPY(recip->recip + idx, ver, verSz);
+    idx += verSz;
+    XMEMCPY(recip->recip + idx, kekIdSeq, kekIdSeqSz);
+    idx += kekIdSeqSz;
+    XMEMCPY(recip->recip + idx, kekIdOctetStr, kekIdOctetStrSz);
+    idx += kekIdOctetStrSz;
+    XMEMCPY(recip->recip + idx, keyId, keyIdSz);
+    idx += keyIdSz;
+    if (timePtr != NULL) {
+        XMEMCPY(recip->recip + idx, genTime, timeSz);
+        idx += timeSz;
+    }
+    if (other != NULL && otherSz > 0) {
+        XMEMCPY(recip->recip + idx, otherAttSeq, otherAttSeqSz);
+        idx += otherAttSeqSz;
+        XMEMCPY(recip->recip + idx, otherOID, otherOIDSz);
+        idx += otherOIDSz;
+        XMEMCPY(recip->recip + idx, other, otherSz);
+        idx += otherSz;
+    }
+    XMEMCPY(recip->recip + idx, encAlgoId, encAlgoIdSz);
+    idx += encAlgoIdSz;
+    XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz);
+    idx += encKeyOctetStrSz;
+    XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+    idx += encryptedKeySz;
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+#endif
+
+    /* store recipient size */
+    recip->recipSz = idx;
+    recip->recipType = PKCS7_KEKRI;
+
+    /* add recipient to recip list */
+    if (pkcs7->recipList == NULL) {
+        pkcs7->recipList = recip;
+    } else {
+        lastRecip = pkcs7->recipList;
+        while(lastRecip->next != NULL) {
+            lastRecip = lastRecip->next;
+        }
+        lastRecip->next = recip;
+    }
+
+    (void)options;
+
+    return idx;
+}
+
+
+static int wc_PKCS7_GetCMSVersion(PKCS7* pkcs7, int cmsContentType)
+{
+    int version = -1;
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    switch (cmsContentType) {
+        case ENVELOPED_DATA:
+
+            /* NOTE: EnvelopedData does not currently support
+               originatorInfo or unprotectedAttributes. When either of these
+               are added, version checking below needs to be updated to match
+               Section 6.1 of RFC 5652 */
+
+            /* if RecipientInfos include pwri or ori, version is 3 */
+            if (wc_PKCS7_RecipientListIncludesType(pkcs7, PKCS7_PWRI) ||
+                wc_PKCS7_RecipientListIncludesType(pkcs7, PKCS7_ORI)) {
+                version = 3;
+                break;
+            }
+
+            /* if unprotectedAttrs is absent AND all RecipientInfo structs
+               are version 0, version is 0 */
+            if (wc_PKCS7_RecipientListVersionsAllZero(pkcs7)) {
+                version = 0;
+                break;
+            }
+
+            /* otherwise, version is 2 */
+            version = 2;
+            break;
+
+        default:
+            break;
+    }
+
+    return version;
+}
+
+
 /* build PKCS#7 envelopedData content type, return enveloped size */
 int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
 {
     int ret, idx = 0;
     int totalSz, padSz, encryptedOutSz;
 
-    int contentInfoSeqSz, outerContentTypeSz, outerContentSz;
+    int contentInfoSeqSz = 0, outerContentTypeSz = 0, outerContentSz;
     byte contentInfoSeq[MAX_SEQ_SZ];
     byte outerContentType[MAX_ALGO_SZ];
     byte outerContent[MAX_SEQ_SZ];
 
+    int kariVersion;
     int envDataSeqSz, verSz;
     byte envDataSeq[MAX_SEQ_SZ];
     byte ver[MAX_VERSION_SZ];
 
     WC_RNG rng;
-    int contentKeyEncSz, blockSz, blockKeySz;
-    byte contentKeyPlain[MAX_CONTENT_KEY_LEN];
-#ifdef WOLFSSL_SMALL_STACK
-    byte* contentKeyEnc;
-#else
-    byte contentKeyEnc[MAX_ENCRYPTED_KEY_SZ];
-#endif
+    int blockSz, blockKeySz;
     byte* plain;
     byte* encryptedContent;
 
+    Pkcs7EncodedRecip* tmpRecip = NULL;
     int recipSz, recipSetSz;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* recip;
-#else
-    byte recip[MAX_RECIP_SZ];
-#endif
     byte recipSet[MAX_SET_SZ];
 
     int encContentOctetSz, encContentSeqSz, contentTypeSz;
@@ -3406,9 +7721,7 @@
     byte ivOctetString[MAX_OCTET_STR_SZ];
     byte encContentOctet[MAX_OCTET_STR_SZ];
 
-    if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
-        pkcs7->encryptOID == 0 || pkcs7->singleCert == NULL ||
-        pkcs7->publicKeyOID == 0)
+    if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0)
         return BAD_FUNC_ARG;
 
     if (output == NULL || outputSz == 0)
@@ -3422,107 +7735,88 @@
     if (blockSz < 0)
         return blockSz;
 
-    /* outer content type */
-    outerContentTypeSz = wc_SetContentType(ENVELOPED_DATA, outerContentType);
-
-    /* version, defined as 0 in RFC 2315 */
-#ifdef HAVE_ECC
-    if (pkcs7->publicKeyOID == ECDSAk) {
-        verSz = SetMyVersion(2, ver, 0);
-    } else
-#endif
-    {
-        verSz = SetMyVersion(0, ver, 0);
+    if (pkcs7->contentOID != FIRMWARE_PKG_DATA) {
+        /* outer content type */
+        ret = wc_SetContentType(ENVELOPED_DATA, outerContentType,
+                                sizeof(outerContentType));
+        if (ret < 0)
+            return ret;
+
+        outerContentTypeSz = ret;
     }
 
     /* generate random content encryption key */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+    if (ret != 0) {
+        return ret;
+    }
+
+    /* build RecipientInfo, only if user manually set singleCert and size */
+    if (pkcs7->singleCert != NULL && pkcs7->singleCertSz > 0) {
+        switch (pkcs7->publicKeyOID) {
+        #ifndef NO_RSA
+            case RSAk:
+                ret = wc_PKCS7_AddRecipient_KTRI(pkcs7, pkcs7->singleCert,
+                                                 pkcs7->singleCertSz, 0);
+                break;
+        #endif
+        #ifdef HAVE_ECC
+            case ECDSAk:
+                ret = wc_PKCS7_AddRecipient_KARI(pkcs7, pkcs7->singleCert,
+                                                 pkcs7->singleCertSz,
+                                                 pkcs7->keyWrapOID,
+                                                 pkcs7->keyAgreeOID, pkcs7->ukm,
+                                                 pkcs7->ukmSz, 0);
+                break;
+        #endif
+
+            default:
+                WOLFSSL_MSG("Unsupported RecipientInfo public key type");
+                return BAD_FUNC_ARG;
+        };
+
+        if (ret < 0) {
+            WOLFSSL_MSG("Failed to create RecipientInfo");
+            return ret;
+        }
+    }
+
+    recipSz = wc_PKCS7_GetRecipientListSize(pkcs7);
+    if (recipSz < 0) {
+        return ret;
+
+    } else if (recipSz == 0) {
+        WOLFSSL_MSG("You must add at least one CMS recipient");
+        return PKCS7_RECIP_E;
+    }
+    recipSetSz = SetSet(recipSz, recipSet);
+
+    /* version, defined in Section 6.1 of RFC 5652 */
+    kariVersion = wc_PKCS7_GetCMSVersion(pkcs7, ENVELOPED_DATA);
+    if (kariVersion < 0) {
+        WOLFSSL_MSG("Failed to set CMS EnvelopedData version");
+        return PKCS7_RECIP_E;
+    }
+
+    verSz = SetMyVersion(kariVersion, ver, 0);
+
     ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
     if (ret != 0)
         return ret;
 
-    ret = wc_RNG_GenerateBlock(&rng, contentKeyPlain, blockKeySz);
-    if (ret != 0) {
-        wc_FreeRng(&rng);
+    /* generate IV for block cipher */
+    ret = wc_PKCS7_GenerateBlock(pkcs7, &rng, tmpIv, blockSz);
+    wc_FreeRng(&rng);
+    if (ret != 0)
         return ret;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    recip         = (byte*)XMALLOC(MAX_RECIP_SZ, pkcs7->heap,
-                                                       DYNAMIC_TYPE_PKCS7);
-    contentKeyEnc = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
-                                                       DYNAMIC_TYPE_PKCS7);
-    if (contentKeyEnc == NULL || recip == NULL) {
-        if (recip)         XFREE(recip,         pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        if (contentKeyEnc) XFREE(contentKeyEnc, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        wc_FreeRng(&rng);
-        return MEMORY_E;
-    }
-#endif
-    contentKeyEncSz = MAX_ENCRYPTED_KEY_SZ;
-
-    /* build RecipientInfo, only handle 1 for now */
-    switch (pkcs7->publicKeyOID) {
-#ifndef NO_RSA
-        case RSAk:
-            recipSz = wc_CreateRecipientInfo(pkcs7->singleCert,
-                                    pkcs7->singleCertSz,
-                                    pkcs7->publicKeyOID,
-                                    blockKeySz, &rng, contentKeyPlain,
-                                    contentKeyEnc, &contentKeyEncSz, recip,
-                                    MAX_RECIP_SZ, pkcs7->heap);
-            break;
-#endif
-#ifdef HAVE_ECC
-        case ECDSAk:
-            recipSz = wc_CreateKeyAgreeRecipientInfo(pkcs7, pkcs7->singleCert,
-                                    pkcs7->singleCertSz,
-                                    pkcs7->publicKeyOID,
-                                    blockKeySz, pkcs7->keyWrapOID,
-                                    pkcs7->keyAgreeOID, &rng,
-                                    contentKeyPlain, contentKeyEnc,
-                                    &contentKeyEncSz, recip, MAX_RECIP_SZ);
-            break;
-#endif
-
-        default:
-            WOLFSSL_MSG("Unsupported RecipientInfo public key type");
-            return BAD_FUNC_ARG;
-    };
-
-    ForceZero(contentKeyEnc, MAX_ENCRYPTED_KEY_SZ);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(contentKeyEnc, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-
-    if (recipSz < 0) {
-        WOLFSSL_MSG("Failed to create RecipientInfo");
-        wc_FreeRng(&rng);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return recipSz;
-    }
-    recipSetSz = SetSet(recipSz, recipSet);
-
-    /* generate IV for block cipher */
-    ret = wc_PKCS7_GenerateIV(pkcs7, &rng, tmpIv, blockSz);
-    wc_FreeRng(&rng);
-    if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
 
     /* EncryptedContentInfo */
-    contentTypeSz = wc_SetContentType(pkcs7->contentOID, contentType);
-    if (contentTypeSz == 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return BAD_FUNC_ARG;
-    }
+    ret = wc_SetContentType(pkcs7->contentOID, contentType,
+                            sizeof(contentType));
+    if (ret < 0)
+        return ret;
+
+    contentTypeSz = ret;
 
     /* allocate encrypted content buffer and PKCS#7 padding */
     padSz = wc_PKCS7_GetPadSize(pkcs7->contentSz, blockSz);
@@ -3531,8 +7825,7 @@
 
     encryptedOutSz = pkcs7->contentSz + padSz;
 
-    plain = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap,
-                           DYNAMIC_TYPE_PKCS7);
+    plain = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
     if (plain == NULL)
         return MEMORY_E;
 
@@ -3547,9 +7840,6 @@
                                       DYNAMIC_TYPE_PKCS7);
     if (encryptedContent == NULL) {
         XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
         return MEMORY_E;
     }
 
@@ -3564,23 +7854,17 @@
     if (contentEncAlgoSz == 0) {
         XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
         return BAD_FUNC_ARG;
     }
 
     /* encrypt content */
-    ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, contentKeyPlain,
-            blockKeySz, tmpIv, blockSz, plain, encryptedOutSz,
-            encryptedContent);
+    ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->cek,
+            pkcs7->cekSz, tmpIv, blockSz, NULL, 0, NULL, 0, plain,
+            encryptedOutSz, encryptedContent);
 
     if (ret != 0) {
         XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
         return ret;
     }
 
@@ -3606,34 +7890,41 @@
     totalSz += outerContentTypeSz;
     totalSz += outerContentSz;
 
-    /* ContentInfo */
-    contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
-    totalSz += contentInfoSeqSz;
+    if (pkcs7->contentOID != FIRMWARE_PKG_DATA) {
+        /* ContentInfo */
+        contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+        totalSz += contentInfoSeqSz;
+    }
 
     if (totalSz > (int)outputSz) {
         WOLFSSL_MSG("Pkcs7_encrypt output buffer too small");
         XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
         return BUFFER_E;
     }
 
-    XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
-    idx += contentInfoSeqSz;
-    XMEMCPY(output + idx, outerContentType, outerContentTypeSz);
-    idx += outerContentTypeSz;
-    XMEMCPY(output + idx, outerContent, outerContentSz);
-    idx += outerContentSz;
+    if (pkcs7->contentOID != FIRMWARE_PKG_DATA) {
+        XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
+        idx += contentInfoSeqSz;
+        XMEMCPY(output + idx, outerContentType, outerContentTypeSz);
+        idx += outerContentTypeSz;
+        XMEMCPY(output + idx, outerContent, outerContentSz);
+        idx += outerContentSz;
+    }
     XMEMCPY(output + idx, envDataSeq, envDataSeqSz);
     idx += envDataSeqSz;
     XMEMCPY(output + idx, ver, verSz);
     idx += verSz;
     XMEMCPY(output + idx, recipSet, recipSetSz);
     idx += recipSetSz;
-    XMEMCPY(output + idx, recip, recipSz);
-    idx += recipSz;
+    /* copy in recipients from list */
+    tmpRecip = pkcs7->recipList;
+    while (tmpRecip != NULL) {
+        XMEMCPY(output + idx, tmpRecip->recip, tmpRecip->recipSz);
+        idx += tmpRecip->recipSz;
+        tmpRecip = tmpRecip->next;
+    }
+    wc_PKCS7_FreeEncodedRecipientSet(pkcs7);
     XMEMCPY(output + idx, encContentSeq, encContentSeqSz);
     idx += encContentSeqSz;
     XMEMCPY(output + idx, contentType, contentTypeSz);
@@ -3649,187 +7940,335 @@
     XMEMCPY(output + idx, encryptedContent, encryptedOutSz);
     idx += encryptedOutSz;
 
-    ForceZero(contentKeyPlain, MAX_CONTENT_KEY_LEN);
-
     XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
     XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
     return idx;
 }
 
 #ifndef NO_RSA
 /* decode KeyTransRecipientInfo (ktri), return 0 on success, <0 on error */
-static int wc_PKCS7_DecodeKtri(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz,
+static int wc_PKCS7_DecryptKtri(PKCS7* pkcs7, byte* in, word32 inSz,
                                word32* idx, byte* decryptedKey,
                                word32* decryptedKeySz, int* recipFound)
 {
-    int length, encryptedKeySz, ret;
-    int keySz;
+    int length, encryptedKeySz = 0, ret = 0;
+    int keySz, version, sidType = 0;
     word32 encOID;
     word32 keyIdx;
     byte   issuerHash[KEYID_SIZE];
-    byte*  outKey = NULL;
-
+    byte*  outKey   = NULL;
+    byte* pkiMsg    = in;
+    word32 pkiMsgSz = inSz;
+    byte   tag;
+
+
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = *idx;
+    long rc;
+#endif
 #ifdef WC_RSA_BLINDING
     WC_RNG rng;
 #endif
 
 #ifdef WOLFSSL_SMALL_STACK
-    mp_int* serialNum;
-    byte* encryptedKey;
-    RsaKey* privKey;
+    mp_int* serialNum  = NULL;
+    byte* encryptedKey = NULL;
+    RsaKey* privKey    = NULL;
 #else
-    mp_int stack_serialNum;
-    mp_int* serialNum = &stack_serialNum;
+    mp_int serialNum[1];
     byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
-
-    RsaKey stack_privKey;
-    RsaKey* privKey = &stack_privKey;
-#endif
-
-    /* remove IssuerAndSerialNumber */
-    if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (GetNameHash(pkiMsg, idx, issuerHash, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* if we found correct recipient, issuer hashes will match */
-    if (XMEMCMP(issuerHash, pkcs7->issuerHash, KEYID_SIZE) == 0) {
-        *recipFound = 1;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    serialNum = (mp_int*)XMALLOC(sizeof(mp_int), pkcs7->heap,
-                                 DYNAMIC_TYPE_TMP_BUFFER);
-    if (serialNum == NULL)
-        return MEMORY_E;
-#endif
-
-    if (GetInt(serialNum, pkiMsg, idx, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    mp_clear(serialNum);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    if (GetAlgoId(pkiMsg, idx, &encOID, oidKeyType, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* key encryption algorithm must be RSA for now */
-    if (encOID != RSAk)
-        return ALGO_ID_E;
-
-    /* read encryptedKey */
-#ifdef WOLFSSL_SMALL_STACK
-    encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
-                                  DYNAMIC_TYPE_TMP_BUFFER);
-    if (encryptedKey == NULL)
-        return MEMORY_E;
-#endif
-
-    if (pkiMsg[(*idx)++] != ASN_OCTET_STRING) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (GetLength(pkiMsg, idx, &encryptedKeySz, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (*recipFound == 1)
-        XMEMCPY(encryptedKey, &pkiMsg[*idx], encryptedKeySz);
-    *idx += encryptedKeySz;
-
-    /* load private key */
-#ifdef WOLFSSL_SMALL_STACK
-    privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap,
-        DYNAMIC_TYPE_TMP_BUFFER);
-    if (privKey == NULL) {
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-
-    ret = wc_InitRsaKey_ex(privKey, pkcs7->heap, INVALID_DEVID);
-    if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-
-    if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) {
-        keyIdx = 0;
-        ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &keyIdx, privKey,
-                                     pkcs7->privateKeySz);
-    }
-    else if (pkcs7->devId == INVALID_DEVID) {
-        ret = BAD_FUNC_ARG;
-    }
-    if (ret != 0) {
-        WOLFSSL_MSG("Failed to decode RSA private key");
-        wc_FreeRsaKey(privKey);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ret;
-    }
-
-    /* decrypt encryptedKey */
-    #ifdef WC_RSA_BLINDING
-    ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
-    if (ret == 0) {
-        ret = wc_RsaSetRNG(privKey, &rng);
-    }
-    #endif
-    if (ret == 0) {
-        keySz = wc_RsaPrivateDecryptInline(encryptedKey, encryptedKeySz,
-                                           &outKey, privKey);
-        #ifdef WC_RSA_BLINDING
-            wc_FreeRng(&rng);
-        #endif
-    } else {
-        keySz = ret;
-    }
-    wc_FreeRsaKey(privKey);
-
-    if (keySz <= 0 || outKey == NULL) {
-        ForceZero(encryptedKey, MAX_ENCRYPTED_KEY_SZ);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return keySz;
-    } else {
-        *decryptedKeySz = keySz;
-        XMEMCPY(decryptedKey, outKey, keySz);
-        ForceZero(encryptedKey, MAX_ENCRYPTED_KEY_SZ);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return 0;
+    RsaKey privKey[1];
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_DECRYPT_KTRI:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_VERSION_SZ,
+                            &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+                    in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+        #endif
+            if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            if (version == 0) {
+                sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+            } else if (version == 2) {
+                sidType = CMS_SKID;
+            } else {
+                return ASN_VERSION_E;
+            }
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, 0, sidType, version);
+
+            /* @TODO getting total amount left because of GetInt call later on
+             * this could be optimized to stream better */
+            pkcs7->stream->expected = (pkcs7->stream->maxLen -
+                                pkcs7->stream->totalRd) + pkcs7->stream->length;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI_2);
+            FALL_THROUGH;
+
+        case WC_PKCS7_DECRYPT_KTRI_2:
+        #ifndef NO_PKCS7_STREAM
+
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, pkcs7->stream->expected,
+                            &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+                    in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            wc_PKCS7_StreamGetVar(pkcs7, NULL, &sidType, &version);
+
+            /* @TODO get expected size for next part, does not account for
+             * GetInt call well */
+            if (pkcs7->stream->expected == MAX_SEQ_SZ) {
+                int sz;
+                word32 lidx;
+
+                if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+                    lidx = *idx;
+                    ret = GetSequence(pkiMsg, &lidx, &sz, pkiMsgSz);
+                    if (ret < 0)
+                        return ret;
+                }
+                else {
+                    lidx = *idx + ASN_TAG_SZ;
+                    ret = GetLength(pkiMsg, &lidx, &sz, pkiMsgSz);
+                    if (ret < 0)
+                        return ret;
+                }
+
+                pkcs7->stream->expected = sz + MAX_ALGO_SZ + ASN_TAG_SZ +
+                                          MAX_LENGTH_SZ;
+                if (pkcs7->stream->length > 0 &&
+                        pkcs7->stream->length < pkcs7->stream->expected) {
+                    return WC_PKCS7_WANT_READ_E;
+                }
+            }
+        #endif /* !NO_PKCS7_STREAM */
+
+            if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+
+                /* remove IssuerAndSerialNumber */
+                if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                if (GetNameHash(pkiMsg, idx, issuerHash, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                /* if we found correct recipient, issuer hashes will match */
+                if (XMEMCMP(issuerHash, pkcs7->issuerHash, KEYID_SIZE) == 0) {
+                    *recipFound = 1;
+                }
+
+        #ifdef WOLFSSL_SMALL_STACK
+                serialNum = (mp_int*)XMALLOC(sizeof(mp_int), pkcs7->heap,
+                                             DYNAMIC_TYPE_TMP_BUFFER);
+                if (serialNum == NULL)
+                    return MEMORY_E;
+        #endif
+
+                if (GetInt(serialNum, pkiMsg, idx, pkiMsgSz) < 0) {
+        #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+                    return ASN_PARSE_E;
+                }
+
+                mp_clear(serialNum);
+
+        #ifdef WOLFSSL_SMALL_STACK
+                XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+
+            } else {
+                /* remove SubjectKeyIdentifier */
+                if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
+                    return ASN_PARSE_E;
+
+                if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                if (tag != ASN_OCTET_STRING)
+                    return ASN_PARSE_E;
+
+                if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                /* if we found correct recipient, SKID will match */
+                if (XMEMCMP(pkiMsg + (*idx), pkcs7->issuerSubjKeyId,
+                            KEYID_SIZE) == 0) {
+                    *recipFound = 1;
+                }
+                (*idx) += KEYID_SIZE;
+            }
+
+            if (GetAlgoId(pkiMsg, idx, &encOID, oidKeyType, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            /* key encryption algorithm must be RSA for now */
+            if (encOID != RSAk)
+                return ALGO_ID_E;
+
+            /* read encryptedKey */
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            if (tag != ASN_OCTET_STRING)
+                return ASN_PARSE_E;
+
+            if (GetLength(pkiMsg, idx, &encryptedKeySz, pkiMsgSz) < 0) {
+                return ASN_PARSE_E;
+            }
+            if (encryptedKeySz > MAX_ENCRYPTED_KEY_SZ) {
+               return BUFFER_E;
+            }
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, encryptedKeySz, sidType, version);
+            pkcs7->stream->expected = encryptedKeySz;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI_3);
+            FALL_THROUGH;
+
+        case WC_PKCS7_DECRYPT_KTRI_3:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+            encryptedKeySz = pkcs7->stream->expected;
+        #endif
+
+        #ifdef WOLFSSL_SMALL_STACK
+            encryptedKey = (byte*)XMALLOC(encryptedKeySz, pkcs7->heap,
+                                          DYNAMIC_TYPE_TMP_BUFFER);
+            if (encryptedKey == NULL)
+                return MEMORY_E;
+        #endif
+
+            if (*recipFound == 1)
+                XMEMCPY(encryptedKey, &pkiMsg[*idx], encryptedKeySz);
+            *idx += encryptedKeySz;
+
+            /* load private key */
+        #ifdef WOLFSSL_SMALL_STACK
+            privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap,
+                DYNAMIC_TYPE_TMP_BUFFER);
+            if (privKey == NULL) {
+                XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                return MEMORY_E;
+            }
+        #endif
+
+            ret = wc_InitRsaKey_ex(privKey, pkcs7->heap, INVALID_DEVID);
+            if (ret != 0) {
+        #ifdef WOLFSSL_SMALL_STACK
+                XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+                return ret;
+            }
+
+            if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) {
+                keyIdx = 0;
+                ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &keyIdx,
+                        privKey, pkcs7->privateKeySz);
+            }
+            else if (pkcs7->devId == INVALID_DEVID) {
+                ret = BAD_FUNC_ARG;
+            }
+            if (ret != 0) {
+                WOLFSSL_MSG("Failed to decode RSA private key");
+                wc_FreeRsaKey(privKey);
+        #ifdef WOLFSSL_SMALL_STACK
+                XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+                return ret;
+            }
+
+            /* decrypt encryptedKey */
+            #ifdef WC_RSA_BLINDING
+            ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+            if (ret == 0) {
+                ret = wc_RsaSetRNG(privKey, &rng);
+            }
+            #endif
+            if (ret == 0) {
+                keySz = wc_RsaPrivateDecryptInline(encryptedKey, encryptedKeySz,
+                                                   &outKey, privKey);
+                #ifdef WC_RSA_BLINDING
+                    wc_FreeRng(&rng);
+                #endif
+            } else {
+                keySz = ret;
+            }
+            wc_FreeRsaKey(privKey);
+
+            if (keySz <= 0 || outKey == NULL) {
+                ForceZero(encryptedKey, encryptedKeySz);
+        #ifdef WOLFSSL_SMALL_STACK
+                XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+                return keySz;
+            } else {
+                *decryptedKeySz = keySz;
+                XMEMCPY(decryptedKey, outKey, keySz);
+                ForceZero(encryptedKey, encryptedKeySz);
+            }
+
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+        #endif
+            ret = 0; /* success */
+            break;
+
+        default:
+            WOLFSSL_MSG("PKCS7 Unknown KTRI decrypt state");
+            ret = BAD_FUNC_ARG;
+    }
+
+    return ret;
 }
 #endif /* !NO_RSA */
 
@@ -3840,14 +8279,16 @@
                         byte* pkiMsg, word32 pkiMsgSz, word32* idx)
 {
     int ret, length;
-    word32 keyOID;
+    word32 keyOID, oidSum = 0;
+    int curve_id = ECC_CURVE_DEF;
+    byte tag;
 
     if (kari == NULL || pkiMsg == NULL || idx == NULL)
         return BAD_FUNC_ARG;
 
     /* remove OriginatorIdentifierOrKey */
-    if (pkiMsg[*idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
-        (*idx)++;
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) == 0 &&
+            tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
         if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
             return ASN_PARSE_E;
 
@@ -3856,8 +8297,8 @@
     }
 
     /* remove OriginatorPublicKey */
-    if (pkiMsg[*idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
-        (*idx)++;
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) == 0 &&
+            tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
         if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
             return ASN_PARSE_E;
 
@@ -3872,14 +8313,29 @@
     if (keyOID != ECDSAk)
         return ASN_PARSE_E;
 
+    /* optional algorithm parameters */
+    ret = GetObjectId(pkiMsg, idx, &oidSum, oidIgnoreType, pkiMsgSz);
+    if (ret == 0) {
+        /* get curve id */
+        curve_id = wc_ecc_get_oid(oidSum, NULL, 0);
+        if (curve_id < 0)
+            return ECC_CURVE_OID_E;
+    }
+
     /* remove ECPoint BIT STRING */
-    if ((pkiMsgSz > (*idx + 1)) && (pkiMsg[(*idx)++] != ASN_BIT_STRING))
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_BIT_STRING)
         return ASN_PARSE_E;
 
     if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
-    if ((pkiMsgSz < (*idx + 1)) || (pkiMsg[(*idx)++] != 0x00))
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+        return ASN_EXPECT_0_E;
+
+    if (tag != ASN_OTHER_TYPE)
         return ASN_EXPECT_0_E;
 
     /* get sender ephemeral public ECDSA key */
@@ -3890,11 +8346,16 @@
     kari->senderKeyInit = 1;
 
     /* length-1 for unused bits counter */
-    ret = wc_ecc_import_x963(pkiMsg + (*idx), length - 1, kari->senderKey);
-    if (ret != 0)
-        return ret;
-
-    (*idx) += length - 1;
+    ret = wc_ecc_import_x963_ex(pkiMsg + (*idx), length - 1, kari->senderKey,
+            curve_id);
+    if (ret != 0) {
+        ret = wc_EccPublicKeyDecode(pkiMsg, idx, kari->senderKey, *idx + length - 1);
+        if (ret != 0)
+            return ret;
+    }
+    else {
+        (*idx) += length - 1;
+    }
 
     return 0;
 }
@@ -3907,6 +8368,7 @@
 {
     int length;
     word32 savedIdx;
+    byte tag;
 
     if (kari == NULL || pkiMsg == NULL || idx == NULL)
         return BAD_FUNC_ARG;
@@ -3914,7 +8376,11 @@
     savedIdx = *idx;
 
     /* starts with EXPLICIT [1] */
-    if (pkiMsg[(*idx)++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+        *idx = savedIdx;
+        return 0;
+    }
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
         *idx = savedIdx;
         return 0;
     }
@@ -3925,8 +8391,11 @@
     }
 
     /* get OCTET STRING */
-    if ( (pkiMsgSz > ((*idx) + 1)) &&
-         (pkiMsg[(*idx)++] != ASN_OCTET_STRING) ) {
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+        *idx = savedIdx;
+        return 0;
+    }
+    if (tag != ASN_OCTET_STRING) {
         *idx = savedIdx;
         return 0;
     }
@@ -3956,18 +8425,31 @@
 /* remove ASN.1 KeyEncryptionAlgorithmIdentifier, return 0 on success,
  * < 0 on error */
 static int wc_PKCS7_KariGetKeyEncryptionAlgorithmId(WC_PKCS7_KARI* kari,
-                        byte* pkiMsg, word32 pkiMsgSz, word32* idx,
-                        word32* keyAgreeOID, word32* keyWrapOID)
-{
+        byte* pkiMsg, word32 pkiMsgSz, word32* idx,
+        word32* keyAgreeOID, word32* keyWrapOID)
+{
+    int length = 0;
+    word32 localIdx;
+
     if (kari == NULL || pkiMsg == NULL || idx == NULL ||
         keyAgreeOID == NULL || keyWrapOID == NULL)
         return BAD_FUNC_ARG;
 
+    localIdx = *idx;
+
     /* remove KeyEncryptionAlgorithmIdentifier */
-    if (GetAlgoId(pkiMsg, idx, keyAgreeOID, oidCmsKeyAgreeType,
-                  pkiMsgSz) < 0)
+    if (GetSequence(pkiMsg, &localIdx, &length, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
+    localIdx = *idx;
+    if (GetAlgoId(pkiMsg, &localIdx, keyAgreeOID, oidCmsKeyAgreeType,
+              pkiMsgSz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (localIdx < *idx + length) {
+        *idx = localIdx;
+    }
     /* remove KeyWrapAlgorithm, stored in parameter of KeyEncAlgoId */
     if (GetAlgoId(pkiMsg, idx, keyWrapOID, oidKeyWrapType, pkiMsgSz) < 0)
         return ASN_PARSE_E;
@@ -3980,18 +8462,21 @@
  * if subject key ID matches, recipFound is set to 1 */
 static int wc_PKCS7_KariGetSubjectKeyIdentifier(WC_PKCS7_KARI* kari,
                         byte* pkiMsg, word32 pkiMsgSz, word32* idx,
-                        int* recipFound)
+                        int* recipFound, byte* rid)
 {
     int length;
-    byte subjKeyId[KEYID_SIZE];
-
-    if (kari == NULL || pkiMsg == NULL || idx == NULL || recipFound == NULL)
+    byte tag;
+
+    if (kari == NULL || pkiMsg == NULL || idx == NULL || recipFound == NULL ||
+            rid == NULL)
         return BAD_FUNC_ARG;
 
     /* remove RecipientKeyIdentifier IMPLICIT [0] */
-    if ( (pkiMsgSz > (*idx + 1)) &&
-         (pkiMsg[(*idx)++] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) ) {
-
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
         if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
             return ASN_PARSE_E;
 
@@ -4000,8 +8485,11 @@
     }
 
     /* remove SubjectKeyIdentifier */
-    if ( (pkiMsgSz > (*idx + 1)) &&
-         (pkiMsg[(*idx)++] != ASN_OCTET_STRING) )
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+        return ASN_PARSE_E;
+    }
+
+    if (tag != ASN_OCTET_STRING)
         return ASN_PARSE_E;
 
     if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
@@ -4010,11 +8498,11 @@
     if (length != KEYID_SIZE)
         return ASN_PARSE_E;
 
-    XMEMCPY(subjKeyId, pkiMsg + (*idx), KEYID_SIZE);
+    XMEMCPY(rid, pkiMsg + (*idx), KEYID_SIZE);
     (*idx) += length;
 
     /* subject key id should match if recipient found */
-    if (XMEMCMP(subjKeyId, kari->decoded->extSubjKeyId, KEYID_SIZE) == 0) {
+    if (XMEMCMP(rid, kari->decoded->extSubjKeyId, KEYID_SIZE) == 0) {
         *recipFound = 1;
     }
 
@@ -4026,30 +8514,30 @@
  * if issuer and serial number match, recipFound is set to 1 */
 static int wc_PKCS7_KariGetIssuerAndSerialNumber(WC_PKCS7_KARI* kari,
                         byte* pkiMsg, word32 pkiMsgSz, word32* idx,
-                        int* recipFound)
+                        int* recipFound, byte* rid)
 {
     int length, ret;
-    byte issuerHash[KEYID_SIZE];
 #ifdef WOLFSSL_SMALL_STACK
     mp_int* serial;
     mp_int* recipSerial;
 #else
-    mp_int  stack_serial;
-    mp_int* serial = &stack_serial;
-
-    mp_int  stack_recipSerial;
-    mp_int* recipSerial = &stack_recipSerial;
-#endif
+    mp_int  serial[1];
+    mp_int  recipSerial[1];
+#endif
+
+    if (rid == NULL) {
+        return BAD_FUNC_ARG;
+    }
 
     /* remove IssuerAndSerialNumber */
     if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
-    if (GetNameHash(pkiMsg, idx, issuerHash, pkiMsgSz) < 0)
+    if (GetNameHash(pkiMsg, idx, rid, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
     /* if we found correct recipient, issuer hashes will match */
-    if (XMEMCMP(issuerHash, kari->decoded->issuerHash, KEYID_SIZE) == 0) {
+    if (XMEMCMP(rid, kari->decoded->issuerHash, KEYID_SIZE) == 0) {
         *recipFound = 1;
     }
 
@@ -4114,10 +8602,12 @@
 static int wc_PKCS7_KariGetRecipientEncryptedKeys(WC_PKCS7_KARI* kari,
                         byte* pkiMsg, word32 pkiMsgSz, word32* idx,
                         int* recipFound, byte* encryptedKey,
-                        int* encryptedKeySz)
+                        int* encryptedKeySz, byte* rid)
 {
     int length;
     int ret = 0;
+    byte tag;
+    word32 localIdx;
 
     if (kari == NULL || pkiMsg == NULL || idx == NULL ||
         recipFound == NULL || encryptedKey == NULL)
@@ -4133,16 +8623,18 @@
 
     /* KeyAgreeRecipientIdentifier is CHOICE of IssuerAndSerialNumber
      * or [0] IMMPLICIT RecipientKeyIdentifier */
-    if ( (pkiMsgSz > (*idx + 1)) &&
-         (pkiMsg[*idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) ) {
-
+    localIdx = *idx;
+    if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
         /* try to get RecipientKeyIdentifier */
         ret = wc_PKCS7_KariGetSubjectKeyIdentifier(kari, pkiMsg, pkiMsgSz,
-                                                   idx, recipFound);
+                                                   idx, recipFound, rid);
     } else {
         /* try to get IssuerAndSerialNumber */
         ret = wc_PKCS7_KariGetIssuerAndSerialNumber(kari, pkiMsg, pkiMsgSz,
-                                                    idx, recipFound);
+                                                    idx, recipFound, rid);
     }
 
     /* if we don't have either option, malformed CMS */
@@ -4150,8 +8642,10 @@
         return ret;
 
     /* remove EncryptedKey */
-    if ( (pkiMsgSz > (*idx + 1)) &&
-         (pkiMsg[(*idx)++] != ASN_OCTET_STRING) )
+    if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_OCTET_STRING)
         return ASN_PARSE_E;
 
     if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
@@ -4171,9 +8665,533 @@
 #endif /* HAVE_ECC */
 
 
+int wc_PKCS7_SetOriEncryptCtx(PKCS7* pkcs7, void* ctx)
+{
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    pkcs7->oriEncryptCtx = ctx;
+
+    return 0;
+}
+
+
+int wc_PKCS7_SetOriDecryptCtx(PKCS7* pkcs7, void* ctx)
+{
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    pkcs7->oriDecryptCtx = ctx;
+
+    return 0;
+}
+
+
+int wc_PKCS7_SetOriDecryptCb(PKCS7* pkcs7, CallbackOriDecrypt cb)
+{
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    pkcs7->oriDecryptCb = cb;
+
+    return 0;
+}
+
+
+/* return 0 on success */
+int wc_PKCS7_SetWrapCEKCb(PKCS7* pkcs7, CallbackWrapCEK cb)
+{
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    pkcs7->wrapCEKCb = cb;
+
+    return 0;
+}
+
+/* Decrypt ASN.1 OtherRecipientInfo (ori), as defined by:
+ *
+ *   OtherRecipientInfo ::= SEQUENCE {
+ *     oriType OBJECT IDENTIFIER,
+ *     oriValue ANY DEFINED BY oriType }
+ *
+ * pkcs7          - pointer to initialized PKCS7 structure
+ * pkiMsg         - pointer to encoded CMS bundle
+ * pkiMsgSz       - size of pkiMsg, bytes
+ * idx            - [IN/OUT] pointer to index into pkiMsg
+ * decryptedKey   - [OUT] output buf for decrypted content encryption key
+ * decryptedKeySz - [IN/OUT] size of buffer, size of decrypted key
+ * recipFound     - [OUT] 1 if recipient has been found, 0 if not
+ *
+ * Return 0 on success, negative upon error.
+ */
+static int wc_PKCS7_DecryptOri(PKCS7* pkcs7, byte* in, word32 inSz,
+                               word32* idx, byte* decryptedKey,
+                               word32* decryptedKeySz, int* recipFound)
+{
+    int ret, seqSz, oriOIDSz;
+    word32 oriValueSz, tmpIdx;
+    byte* oriValue;
+    byte oriOID[MAX_OID_SZ];
+
+    byte* pkiMsg    = in;
+    word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+    word32 stateIdx = *idx;
+    long rc;
+#endif
+
+    if (pkcs7->oriDecryptCb == NULL) {
+        WOLFSSL_MSG("You must register an ORI Decrypt callback");
+        return BAD_FUNC_ARG;
+    }
+
+    switch (pkcs7->state) {
+
+        case WC_PKCS7_DECRYPT_ORI:
+        #ifndef NO_PKCS7_STREAM
+            /* @TODO for now just get full buffer, needs divided up */
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                   (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+                   pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            /* get OtherRecipientInfo sequence length */
+            if (GetLength(pkiMsg, idx, &seqSz, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            tmpIdx = *idx;
+
+            /* remove and store oriType OBJECT IDENTIFIER */
+            if (GetASNObjectId(pkiMsg, idx, &oriOIDSz, pkiMsgSz) != 0)
+                return ASN_PARSE_E;
+
+            XMEMCPY(oriOID, pkiMsg + *idx, oriOIDSz);
+            *idx += oriOIDSz;
+
+            /* get oriValue, increment idx */
+            oriValue = pkiMsg + *idx;
+            oriValueSz = seqSz - (*idx - tmpIdx);
+            *idx += oriValueSz;
+
+            /* pass oriOID and oriValue to user callback, expect back
+               decryptedKey and size */
+            ret = pkcs7->oriDecryptCb(pkcs7, oriOID, (word32)oriOIDSz, oriValue,
+                                      oriValueSz, decryptedKey, decryptedKeySz,
+                                      pkcs7->oriDecryptCtx);
+
+            if (ret != 0 || decryptedKey == NULL || *decryptedKeySz == 0) {
+                /* decrypt operation failed */
+                *recipFound = 0;
+                return PKCS7_RECIP_E;
+            }
+
+            /* mark recipFound, since we only support one RecipientInfo for now */
+            *recipFound = 1;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, idx)) != 0) {
+                break;
+            }
+        #endif
+            ret = 0; /* success */
+            break;
+
+        default:
+            WOLFSSL_MSG("PKCS7 ORI unknown state");
+            ret = BAD_FUNC_ARG;
+
+    }
+
+    return ret;
+}
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+
+/* decode ASN.1 PasswordRecipientInfo (pwri), return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_DecryptPwri(PKCS7* pkcs7, byte* in, word32 inSz,
+                               word32* idx, byte* decryptedKey,
+                               word32* decryptedKeySz, int* recipFound)
+{
+    byte* salt;
+    byte* cek;
+    byte* kek;
+
+    byte tmpIv[MAX_CONTENT_IV_SIZE];
+
+    int ret = 0, length, saltSz, iterations, blockSz, kekKeySz;
+    int hashOID = WC_SHA; /* default to SHA1 */
+    word32 kdfAlgoId, pwriEncAlgoId, keyEncAlgoId, cekSz;
+    byte* pkiMsg = in;
+    word32 pkiMsgSz = inSz;
+    byte  tag;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = *idx;
+    long rc;
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_DECRYPT_PWRI:
+        #ifndef NO_PKCS7_STREAM
+            /*@TODO for now just get full buffer, needs divided up */
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                   (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+                   pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            /* remove KeyDerivationAlgorithmIdentifier */
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+                return ASN_PARSE_E;
+
+            if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            /* get KeyDerivationAlgorithmIdentifier */
+            if (wc_GetContentType(pkiMsg, idx, &kdfAlgoId, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            /* get KDF params SEQ */
+            if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            /* get KDF salt OCTET STRING */
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            if (tag != ASN_OCTET_STRING)
+                return ASN_PARSE_E;
+
+            if (GetLength(pkiMsg, idx, &saltSz, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            salt = (byte*)XMALLOC(saltSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            if (salt == NULL)
+                return MEMORY_E;
+
+            XMEMCPY(salt, pkiMsg + (*idx), saltSz);
+            *idx += saltSz;
+
+            /* get KDF iterations */
+            if (GetMyVersion(pkiMsg, idx, &iterations, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            /* get KeyEncAlgoId SEQ */
+            if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            /* get KeyEncAlgoId */
+            if (wc_GetContentType(pkiMsg, idx, &keyEncAlgoId, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            /* get pwriEncAlgoId */
+            if (GetAlgoId(pkiMsg, idx, &pwriEncAlgoId, oidBlkType, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            blockSz = wc_PKCS7_GetOIDBlockSize(pwriEncAlgoId);
+            if (blockSz < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return blockSz;
+            }
+
+            /* get content-encryption key size, based on algorithm */
+            kekKeySz = wc_PKCS7_GetOIDKeySize(pwriEncAlgoId);
+            if (kekKeySz < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return kekKeySz;
+            }
+
+            /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            if (tag != ASN_OCTET_STRING) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            if (length != blockSz) {
+                WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            XMEMCPY(tmpIv, pkiMsg + (*idx), length);
+            *idx += length;
+
+            /* get EncryptedKey */
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            if (tag != ASN_OCTET_STRING) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            /* allocate temporary space for decrypted key */
+            cekSz = length;
+            cek = (byte*)XMALLOC(cekSz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            if (cek == NULL) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return MEMORY_E;
+            }
+
+            /* generate KEK */
+            kek = (byte*)XMALLOC(kekKeySz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            if (kek == NULL) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return MEMORY_E;
+            }
+
+            ret = wc_PKCS7_GenerateKEK_PWRI(pkcs7, pkcs7->pass, pkcs7->passSz,
+                                            salt, saltSz, kdfAlgoId, hashOID,
+                                            iterations, kek, kekKeySz);
+            if (ret < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ASN_PARSE_E;
+            }
+
+            /* decrypt CEK with KEK */
+            ret = wc_PKCS7_PwriKek_KeyUnWrap(pkcs7, kek, kekKeySz,
+                                             pkiMsg + (*idx), length, cek,
+                                             cekSz, tmpIv, blockSz,
+                                             pwriEncAlgoId);
+            if (ret < 0) {
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ret;
+            }
+            cekSz = ret;
+
+            if (*decryptedKeySz < cekSz) {
+                WOLFSSL_MSG("Decrypted key buffer too small for CEK");
+                XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return BUFFER_E;
+            }
+
+            XMEMCPY(decryptedKey, cek, cekSz);
+            *decryptedKeySz = cekSz;
+
+            XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+            /* mark recipFound, since we only support one RecipientInfo for now */
+            *recipFound = 1;
+            *idx += length;
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+        #endif
+            ret = 0; /* success */
+            break;
+
+        default:
+            WOLFSSL_MSG("PKCS7 PWRI unknown state");
+            ret = BAD_FUNC_ARG;
+    }
+
+    return ret;
+}
+
+#endif /* NO_PWDBASED | NO_SHA */
+
+/* decode ASN.1 KEKRecipientInfo (kekri), return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_DecryptKekri(PKCS7* pkcs7, byte* in, word32 inSz,
+                               word32* idx, byte* decryptedKey,
+                               word32* decryptedKeySz, int* recipFound)
+{
+    int length, keySz, dateLen, direction;
+    byte* keyId = NULL;
+    const byte* datePtr = NULL;
+    byte  dateFormat, tag;
+    word32 keyIdSz, kekIdSz, keyWrapOID, localIdx;
+
+    int ret = 0;
+    byte* pkiMsg    = in;
+    word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = *idx;
+    long rc;
+#endif
+
+    WOLFSSL_ENTER("wc_PKCS7_DecryptKekri");
+    switch (pkcs7->state) {
+        case WC_PKCS7_DECRYPT_KEKRI:
+        #ifndef NO_PKCS7_STREAM
+            /* @TODO for now just get full buffer, needs divided up */
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                   (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+                   pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            /* remove KEKIdentifier */
+            if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            kekIdSz = length;
+
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            if (tag != ASN_OCTET_STRING)
+                return ASN_PARSE_E;
+
+            if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            /* save keyIdentifier and length */
+            keyId = pkiMsg + *idx;
+            keyIdSz = length;
+            *idx += keyIdSz;
+
+            /* may have OPTIONAL GeneralizedTime */
+            localIdx = *idx;
+            if ((*idx < kekIdSz) && GetASNTag(pkiMsg, &localIdx, &tag,
+                        pkiMsgSz) == 0 && tag == ASN_GENERALIZED_TIME) {
+                if (wc_GetDateInfo(pkiMsg + *idx, pkiMsgSz, &datePtr, &dateFormat,
+                                   &dateLen) != 0) {
+                    return ASN_PARSE_E;
+                }
+                *idx += (dateLen + 1);
+            }
+
+            /* may have OPTIONAL OtherKeyAttribute */
+            localIdx = *idx;
+            if ((*idx < kekIdSz) && GetASNTag(pkiMsg, &localIdx, &tag,
+                            pkiMsgSz) == 0 && tag == (ASN_SEQUENCE |
+                            ASN_CONSTRUCTED)) {
+                if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                /* skip it */
+                *idx += length;
+            }
+
+            /* get KeyEncryptionAlgorithmIdentifier */
+            if (GetAlgoId(pkiMsg, idx, &keyWrapOID, oidKeyWrapType, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            /* get EncryptedKey */
+            if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            if (tag != ASN_OCTET_STRING)
+                return ASN_PARSE_E;
+
+            if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                return ASN_PARSE_E;
+
+            #ifndef NO_AES
+                direction = AES_DECRYPTION;
+            #else
+                direction = DES_DECRYPTION;
+            #endif
+
+            /* decrypt CEK with KEK */
+            if (pkcs7->wrapCEKCb) {
+                keySz = pkcs7->wrapCEKCb(pkcs7, pkiMsg + *idx, length, keyId,
+                                     keyIdSz, NULL, 0, decryptedKey,
+                                     *decryptedKeySz, keyWrapOID,
+                                     (int)PKCS7_KEKRI, direction);
+            }
+            else {
+                keySz = wc_PKCS7_KeyWrap(pkiMsg + *idx, length, pkcs7->privateKey,
+                                     pkcs7->privateKeySz, decryptedKey, *decryptedKeySz,
+                                     keyWrapOID, direction);
+            }
+            if (keySz <= 0)
+                return keySz;
+
+            *decryptedKeySz = (word32)keySz;
+
+            /* mark recipFound, since we only support one RecipientInfo for now */
+            *recipFound = 1;
+            *idx += length;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+        #endif
+            ret = 0; /* success */
+            break;
+
+        default:
+            WOLFSSL_MSG("PKCS7 KEKRI unknown state");
+            ret = BAD_FUNC_ARG;
+
+    }
+
+    (void)keyId;
+    return ret;
+}
+
+
 /* decode ASN.1 KeyAgreeRecipientInfo (kari), return 0 on success,
  * < 0 on error */
-static int wc_PKCS7_DecodeKari(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz,
+static int wc_PKCS7_DecryptKari(PKCS7* pkcs7, byte* in, word32 inSz,
                                word32* idx, byte* decryptedKey,
                                word32* decryptedKeySz, int* recipFound)
 {
@@ -4182,152 +9200,253 @@
     int encryptedKeySz;
     int direction = 0;
     word32 keyAgreeOID, keyWrapOID;
+    byte rid[KEYID_SIZE];
 
 #ifdef WOLFSSL_SMALL_STACK
     byte* encryptedKey;
 #else
-    byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
-#endif
-
-    WC_PKCS7_KARI* kari;
-
-    if (pkcs7 == NULL || pkcs7->singleCert == NULL ||
-        pkcs7->singleCertSz == 0 || pkiMsg == NULL ||
+    byte  encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+#endif
+
+    byte* pkiMsg    = in;
+    word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = (idx) ? *idx : 0;
+    long rc;
+#endif
+
+    WOLFSSL_ENTER("wc_PKCS7_DecryptKari");
+    if (pkcs7 == NULL || pkiMsg == NULL ||
+            ((pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0) &&
+              pkcs7->wrapCEKCb == NULL) ||
         idx == NULL || decryptedKey == NULL || decryptedKeySz == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_DECODE);
-    if (kari == NULL)
-        return MEMORY_E;
-
-#ifdef WOLFSSL_SMALL_STACK
-    encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
-                                  DYNAMIC_TYPE_PKCS7);
-    if (encryptedKey == NULL) {
-        wc_PKCS7_KariFree(kari);
-        return MEMORY_E;
-    }
-#endif
-    encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
-
-    /* parse cert and key */
-    ret = wc_PKCS7_KariParseRecipCert(kari, (byte*)pkcs7->singleCert,
-                                      pkcs7->singleCertSz, pkcs7->privateKey,
-                                      pkcs7->privateKeySz);
-    if (ret != 0) {
-        wc_PKCS7_KariFree(kari);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return ret;
-    }
-
-    /* remove OriginatorIdentifierOrKey */
-    ret = wc_PKCS7_KariGetOriginatorIdentifierOrKey(kari, pkiMsg,
-                                                    pkiMsgSz, idx);
-    if (ret != 0) {
-        wc_PKCS7_KariFree(kari);
+    switch (pkcs7->state) {
+        case WC_PKCS7_DECRYPT_KARI: {
+        #ifndef NO_PKCS7_STREAM
+            /* @TODO for now just get full buffer, needs divided up */
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                   (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+                   pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            WC_PKCS7_KARI* kari;
+
+            kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_DECODE);
+            if (kari == NULL)
+                return MEMORY_E;
+
         #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return ret;
-    }
-
-    /* try and remove optional UserKeyingMaterial */
-    ret = wc_PKCS7_KariGetUserKeyingMaterial(kari, pkiMsg, pkiMsgSz, idx);
-    if (ret != 0) {
-        wc_PKCS7_KariFree(kari);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return ret;
-    }
-
-    /* remove KeyEncryptionAlgorithmIdentifier */
-    ret = wc_PKCS7_KariGetKeyEncryptionAlgorithmId(kari, pkiMsg, pkiMsgSz,
-                                                   idx, &keyAgreeOID,
-                                                   &keyWrapOID);
-    if (ret != 0) {
-        wc_PKCS7_KariFree(kari);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return ret;
-    }
-
-    /* if user has not explicitly set keyAgreeOID, set from one in bundle */
-    if (pkcs7->keyAgreeOID == 0)
-        pkcs7->keyAgreeOID = keyAgreeOID;
-
-    /* set direction based on key wrap algorithm */
-    switch (keyWrapOID) {
-#ifndef NO_AES
-    #ifdef WOLFSSL_AES_128
-        case AES128_WRAP:
-    #endif
-    #ifdef WOLFSSL_AES_192
-        case AES192_WRAP:
-    #endif
-    #ifdef WOLFSSL_AES_256
-        case AES256_WRAP:
-    #endif
-            direction = AES_DECRYPTION;
-            break;
-#endif
-        default:
+            encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+                                          DYNAMIC_TYPE_PKCS7);
+            if (encryptedKey == NULL) {
+                wc_PKCS7_KariFree(kari);
+                return MEMORY_E;
+            }
+        #endif
+            encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+
+            /* parse cert and key */
+            if (pkcs7->singleCert != NULL) {
+                ret = wc_PKCS7_KariParseRecipCert(kari, (byte*)pkcs7->singleCert,
+                                              pkcs7->singleCertSz, pkcs7->privateKey,
+                                              pkcs7->privateKeySz);
+                if (ret != 0) {
+                    wc_PKCS7_KariFree(kari);
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                #endif
+                    return ret;
+                }
+            }
+
+            /* remove OriginatorIdentifierOrKey */
+            ret = wc_PKCS7_KariGetOriginatorIdentifierOrKey(kari, pkiMsg,
+                                                            pkiMsgSz, idx);
+            if (ret != 0) {
+                wc_PKCS7_KariFree(kari);
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                #endif
+                return ret;
+            }
+
+            /* try and remove optional UserKeyingMaterial */
+            ret = wc_PKCS7_KariGetUserKeyingMaterial(kari, pkiMsg, pkiMsgSz, idx);
+            if (ret != 0) {
+                wc_PKCS7_KariFree(kari);
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                #endif
+                return ret;
+            }
+
+            /* remove KeyEncryptionAlgorithmIdentifier */
+            ret = wc_PKCS7_KariGetKeyEncryptionAlgorithmId(kari, pkiMsg,
+                    pkiMsgSz, idx, &keyAgreeOID, &keyWrapOID);
+            if (ret != 0) {
+                wc_PKCS7_KariFree(kari);
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                #endif
+                return ret;
+            }
+
+            /* if user has not explicitly set keyAgreeOID, set from one in bundle */
+            if (pkcs7->keyAgreeOID == 0)
+                pkcs7->keyAgreeOID = keyAgreeOID;
+
+            /* set direction based on key wrap algorithm */
+            switch (keyWrapOID) {
+        #ifndef NO_AES
+            #ifdef WOLFSSL_AES_128
+                case AES128_WRAP:
+            #endif
+            #ifdef WOLFSSL_AES_192
+                case AES192_WRAP:
+            #endif
+            #ifdef WOLFSSL_AES_256
+                case AES256_WRAP:
+            #endif
+                    direction = AES_DECRYPTION;
+                    break;
+        #endif
+                default:
+                    WOLFSSL_MSG("AES key wrap algorithm unsupported");
+                    if (pkcs7->wrapCEKCb) {
+                        WOLFSSL_MSG("Direction not set!");
+                        break; /* if unwrapping callback is set then do not
+                                * force restriction of supported wrap
+                                * algorithms */
+                    }
+
+                    wc_PKCS7_KariFree(kari);
+                    #ifdef WOLFSSL_SMALL_STACK
+                        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                    #endif
+                    return BAD_KEYWRAP_ALG_E;
+            }
+
+            /* remove RecipientEncryptedKeys */
+            ret = wc_PKCS7_KariGetRecipientEncryptedKeys(kari, pkiMsg, pkiMsgSz,
+                           idx, recipFound, encryptedKey, &encryptedKeySz, rid);
+            if (ret != 0) {
+                wc_PKCS7_KariFree(kari);
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                #endif
+                return ret;
+            }
+
+            /* decrypt CEK with KEK */
+            if (pkcs7->wrapCEKCb) {
+                word32 tmpKeySz = 0;
+                byte* tmpKeyDer = NULL;
+
+                ret = wc_ecc_export_x963(kari->senderKey, NULL, &tmpKeySz);
+                if (ret != LENGTH_ONLY_E) {
+                    return ret;
+                }
+
+                /* buffer space for algorithm/curve */
+                tmpKeySz += MAX_SEQ_SZ;
+                tmpKeySz += 2 * MAX_ALGO_SZ;
+
+                /* buffer space for public key sequence */
+                tmpKeySz += MAX_SEQ_SZ;
+                tmpKeySz += TRAILING_ZERO;
+
+                tmpKeyDer = (byte*)XMALLOC(tmpKeySz, pkcs7->heap,
+                        DYNAMIC_TYPE_TMP_BUFFER);
+                if (tmpKeyDer == NULL) {
+                    return MEMORY_E;
+                }
+
+                ret = wc_EccPublicKeyToDer(kari->senderKey, tmpKeyDer,
+                                         tmpKeySz, 1);
+                if (ret < 0) {
+                    XFREE(tmpKeyDer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    return ret;
+                }
+                tmpKeySz = (word32)ret;
+
+                keySz = pkcs7->wrapCEKCb(pkcs7, encryptedKey, encryptedKeySz,
+                        rid, KEYID_SIZE, tmpKeyDer, tmpKeySz,
+                        decryptedKey, *decryptedKeySz,
+                        keyWrapOID, (int)PKCS7_KARI, direction);
+                XFREE(tmpKeyDer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+                if (keySz  > 0) {
+                    /* If unwrapping was successful then consider recipient
+                     * found. Checking for NULL singleCert to confirm previous
+                     * SID check was not done */
+                    if (pkcs7->singleCert == NULL)
+                        *recipFound = 1;
+                }
+            }
+            else {
+                /* create KEK */
+                ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, pkcs7->keyAgreeOID);
+                if (ret != 0) {
+                    wc_PKCS7_KariFree(kari);
+                    #ifdef WOLFSSL_SMALL_STACK
+                        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                    #endif
+                    return ret;
+                }
+
+                /* decrypt CEK with KEK */
+                keySz = wc_PKCS7_KeyWrap(encryptedKey, encryptedKeySz, kari->kek,
+                                         kari->kekSz, decryptedKey, *decryptedKeySz,
+                                         keyWrapOID, direction);
+            }
+            if (keySz <= 0) {
+                wc_PKCS7_KariFree(kari);
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                #endif
+                return keySz;
+            }
+            *decryptedKeySz = (word32)keySz;
+
             wc_PKCS7_KariFree(kari);
             #ifdef WOLFSSL_SMALL_STACK
                 XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
             #endif
-            WOLFSSL_MSG("AES key wrap algorithm unsupported");
-            return BAD_KEYWRAP_ALG_E;
-    }
-
-    /* remove RecipientEncryptedKeys */
-    ret = wc_PKCS7_KariGetRecipientEncryptedKeys(kari, pkiMsg, pkiMsgSz,
-                               idx, recipFound, encryptedKey, &encryptedKeySz);
-    if (ret != 0) {
-        wc_PKCS7_KariFree(kari);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return ret;
-    }
-
-    /* create KEK */
-    ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, pkcs7->keyAgreeOID);
-    if (ret != 0) {
-        wc_PKCS7_KariFree(kari);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return ret;
-    }
-
-    /* decrypt CEK with KEK */
-    keySz = wc_PKCS7_KariKeyWrap(encryptedKey, encryptedKeySz, kari->kek,
-                                 kari->kekSz, decryptedKey, *decryptedKeySz,
-                                 keyWrapOID, direction);
-    if (keySz <= 0) {
-        wc_PKCS7_KariFree(kari);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        #endif
-        return keySz;
-    }
-    *decryptedKeySz = (word32)keySz;
-
-    wc_PKCS7_KariFree(kari);
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-    #endif
-
-    return 0;
-#else
-    (void)pkcs7;
+            #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+            #endif
+            ret = 0; /* success */
+        }
+        break;
+
+        default:
+            WOLFSSL_MSG("PKCS7 kari unknown state");
+            ret = BAD_FUNC_ARG;
+
+    }
+
     (void)pkiMsg;
     (void)pkiMsgSz;
+
+    return ret;
+#else
+    (void)in;
+    (void)inSz;
+    (void)pkcs7;
     (void)idx;
     (void)decryptedKey;
     (void)decryptedKeySz;
@@ -4339,12 +9458,19 @@
 
 
 /* decode ASN.1 RecipientInfos SET, return 0 on success, < 0 on error */
-static int wc_PKCS7_DecodeRecipientInfos(PKCS7* pkcs7, byte* pkiMsg,
-                            word32 pkiMsgSz, word32* idx, byte* decryptedKey,
+static int wc_PKCS7_DecryptRecipientInfos(PKCS7* pkcs7, byte* in,
+                            word32  inSz, word32* idx, byte* decryptedKey,
                             word32* decryptedKeySz, int* recipFound)
 {
     word32 savedIdx;
-    int version, ret, length;
+    int version, ret = 0, length;
+    byte* pkiMsg = in;
+    word32 pkiMsgSz = inSz;
+    byte  tag;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx;
+    long rc;
+#endif
 
     if (pkcs7 == NULL || pkiMsg == NULL || idx == NULL ||
         decryptedKey == NULL || decryptedKeySz == NULL ||
@@ -4352,7 +9478,67 @@
         return BAD_FUNC_ARG;
     }
 
+    WOLFSSL_ENTER("wc_PKCS7_DecryptRecipientInfos");
+#ifndef NO_PKCS7_STREAM
+    tmpIdx = *idx;
+#endif
+
+    /* check if in the process of decrypting */
+    switch (pkcs7->state) {
+        case WC_PKCS7_DECRYPT_KTRI:
+        case WC_PKCS7_DECRYPT_KTRI_2:
+        case WC_PKCS7_DECRYPT_KTRI_3:
+        #ifndef NO_RSA
+            ret = wc_PKCS7_DecryptKtri(pkcs7, in, inSz, idx,
+                                      decryptedKey, decryptedKeySz, recipFound);
+        #else
+            return NOT_COMPILED_IN;
+        #endif
+            break;
+
+        case WC_PKCS7_DECRYPT_KARI:
+                ret = wc_PKCS7_DecryptKari(pkcs7, in, inSz, idx,
+                                      decryptedKey, decryptedKeySz, recipFound);
+                break;
+
+        case WC_PKCS7_DECRYPT_KEKRI:
+                ret = wc_PKCS7_DecryptKekri(pkcs7, in, inSz, idx,
+                                      decryptedKey, decryptedKeySz, recipFound);
+                break;
+
+        case WC_PKCS7_DECRYPT_PWRI:
+        #if !defined(NO_PWDBASED) && !defined(NO_SHA)
+                ret = wc_PKCS7_DecryptPwri(pkcs7, in, inSz, idx,
+                                      decryptedKey, decryptedKeySz, recipFound);
+        #else
+                return NOT_COMPILED_IN;
+        #endif
+                break;
+
+        case WC_PKCS7_DECRYPT_ORI:
+            ret = wc_PKCS7_DecryptOri(pkcs7, in, inSz, idx,
+                                      decryptedKey, decryptedKeySz, recipFound);
+            break;
+
+        default:
+            /* not in decrypting state */
+            break;
+    }
+
+    if (ret < 0) {
+        return ret;
+    }
+
     savedIdx = *idx;
+#ifndef NO_PKCS7_STREAM
+    rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, inSz);
+    if (rc < 0) {
+        return (int)rc;
+    }
+    pkiMsgSz = (word32)rc;
+    if (pkcs7->stream->length > 0)
+        pkiMsg = pkcs7->stream->buffer;
+#endif
 
     /* when looking for next recipient, use first sequence and version to
      * indicate there is another, if not, move on */
@@ -4362,17 +9548,15 @@
          * last good saved one */
         if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) > 0) {
 
-            if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) {
-                *idx = savedIdx;
-                break;
-            }
-
-            if (version != 0)
-                return ASN_VERSION_E;
-
         #ifndef NO_RSA
             /* found ktri */
-            ret = wc_PKCS7_DecodeKtri(pkcs7, pkiMsg, pkiMsgSz, idx,
+            #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+            #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI);
+            ret = wc_PKCS7_DecryptKtri(pkcs7, in, inSz, idx,
                                       decryptedKey, decryptedKeySz,
                                       recipFound);
             if (ret != 0)
@@ -4382,11 +9566,18 @@
         #endif
         }
         else {
+            word32 localIdx;
             /* kari is IMPLICIT[1] */
             *idx = savedIdx;
-            if (pkiMsg[*idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+            localIdx = *idx;
+
+            if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) != 0) {
+                /* no room for recipient info */
+                break;
+            }
+
+            if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
                 (*idx)++;
-
                 if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
                     return ASN_PARSE_E;
 
@@ -4399,13 +9590,96 @@
                     return ASN_VERSION_E;
 
                 /* found kari */
-                ret = wc_PKCS7_DecodeKari(pkcs7, pkiMsg, pkiMsgSz, idx,
+            #ifndef NO_PKCS7_STREAM
+                if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+                }
+            #endif
+                wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KARI);
+                ret = wc_PKCS7_DecryptKari(pkcs7, in, inSz, idx,
                                           decryptedKey, decryptedKeySz,
                                           recipFound);
                 if (ret != 0)
                     return ret;
-            }
-            else {
+
+            /* kekri is IMPLICIT[2] */
+            } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2)) {
+                (*idx)++;
+
+                if (GetLength(pkiMsg, idx, &version, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) {
+                    *idx = savedIdx;
+                    break;
+                }
+
+                if (version != 4)
+                    return ASN_VERSION_E;
+
+                /* found kekri */
+            #ifndef NO_PKCS7_STREAM
+                if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+                }
+            #endif
+                wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KEKRI);
+                ret = wc_PKCS7_DecryptKekri(pkcs7, in, inSz, idx,
+                                           decryptedKey, decryptedKeySz,
+                                           recipFound);
+                if (ret != 0)
+                    return ret;
+
+            /* pwri is IMPLICIT[3] */
+            } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 3)) {
+        #if !defined(NO_PWDBASED) && !defined(NO_SHA)
+                (*idx)++;
+
+                if (GetLength(pkiMsg, idx, &version, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+
+                if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) {
+                    *idx = savedIdx;
+                    break;
+                }
+
+                if (version != 0)
+                    return ASN_VERSION_E;
+
+                /* found pwri */
+            #ifndef NO_PKCS7_STREAM
+                if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+                }
+            #endif
+                wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_PWRI);
+                ret = wc_PKCS7_DecryptPwri(pkcs7, in, inSz, idx,
+                                           decryptedKey, decryptedKeySz,
+                                           recipFound);
+                if (ret != 0)
+                    return ret;
+        #else
+                return NOT_COMPILED_IN;
+        #endif
+
+            /* ori is IMPLICIT[4] */
+            } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 4)) {
+                (*idx)++;
+
+                /* found ori */
+            #ifndef NO_PKCS7_STREAM
+                if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+                }
+            #endif
+                wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_ORI);
+                ret = wc_PKCS7_DecryptOri(pkcs7, in, inSz, idx,
+                                          decryptedKey, decryptedKeySz,
+                                          recipFound);
+                if (ret != 0)
+                    return ret;
+
+            } else {
                 /* failed to find RecipientInfo, restore idx and continue */
                 *idx = savedIdx;
                 break;
@@ -4416,267 +9690,1843 @@
         savedIdx = *idx;
     }
 
+    return ret;
+}
+
+
+/* Parse encoded EnvelopedData bundle up to RecipientInfo set.
+ *
+ * return size of RecipientInfo SET on success, negative upon error */
+static int wc_PKCS7_ParseToRecipientInfoSet(PKCS7* pkcs7, byte* in,
+                                            word32 inSz, word32* idx,
+                                            int type)
+{
+    int version = 0, length, ret = 0;
+    word32 contentType;
+    byte* pkiMsg = in;
+    word32 pkiMsgSz = inSz;
+    byte  tag;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = 0;
+    long rc;
+#endif
+
+    if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0 || idx == NULL)
+        return BAD_FUNC_ARG;
+
+    if ((type != ENVELOPED_DATA) && (type != AUTH_ENVELOPED_DATA) &&
+            pkcs7->contentOID != FIRMWARE_PKG_DATA)
+        return BAD_FUNC_ARG;
+
+#ifndef NO_PKCS7_STREAM
+    if (pkcs7->stream == NULL) {
+        if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+            return ret;
+        }
+    }
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_INFOSET_START:
+        case WC_PKCS7_INFOSET_BER:
+        case WC_PKCS7_INFOSET_STAGE1:
+        case WC_PKCS7_INFOSET_STAGE2:
+        case WC_PKCS7_INFOSET_END:
+            break;
+
+        default:
+            WOLFSSL_MSG("Warning, setting PKCS7 info state to start");
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_START);
+    }
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_INFOSET_START:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+                            ASN_TAG_SZ, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc  = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            /* read past ContentInfo, verify type is envelopedData */
+            if (ret == 0 && GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+            {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && length == 0 && pkiMsg[(*idx)-1] == 0x80) {
+        #ifdef ASN_BER_TO_DER
+                word32 len;
+
+                wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_BER);
+                FALL_THROUGH;
+
+                /* full buffer is needed for conversion */
+                case WC_PKCS7_INFOSET_BER:
+                #ifndef NO_PKCS7_STREAM
+                if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->maxLen - pkcs7->stream->length,
+                            &pkiMsg, idx)) != 0) {
+                    return ret;
+                }
+
+                rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+                        in, inSz);
+                if (rc < 0) {
+                    ret = (int)rc;
+                    break;
+                }
+                pkiMsgSz = (word32)rc;
+                #endif
+
+                len = 0;
+
+                ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len);
+                if (ret != LENGTH_ONLY_E)
+                    return ret;
+                pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                if (pkcs7->der == NULL)
+                    return MEMORY_E;
+                ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len);
+                if (ret < 0)
+                    return ret;
+
+                pkiMsg = in = pkcs7->der;
+                pkiMsgSz = pkcs7->derSz = len;
+                *idx = 0;
+
+                if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                    return ASN_PARSE_E;
+        #else
+                return BER_INDEF_E;
+        #endif
+            }
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_STAGE1);
+            FALL_THROUGH;
+
+        case WC_PKCS7_INFOSET_STAGE1:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_OID_SZ +
+                            MAX_LENGTH_SZ + ASN_TAG_SZ, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            pkiMsgSz = (pkcs7->stream->length > 0)? pkcs7->stream->length :inSz;
+        #endif
+            if (pkcs7->contentOID != FIRMWARE_PKG_DATA ||
+                    type == AUTH_ENVELOPED_DATA) {
+                if (ret == 0 && wc_GetContentType(pkiMsg, idx, &contentType,
+                            pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0) {
+                    if (type == ENVELOPED_DATA && contentType != ENVELOPED_DATA) {
+                        WOLFSSL_MSG("PKCS#7 input not of type EnvelopedData");
+                        ret = PKCS7_OID_E;
+                    } else if (type == AUTH_ENVELOPED_DATA &&
+                           contentType != AUTH_ENVELOPED_DATA) {
+                        WOLFSSL_MSG("PKCS#7 input not of type AuthEnvelopedData");
+                        ret = PKCS7_OID_E;
+                    }
+                }
+
+                if (ret == 0 && GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) != 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC
+                            | 0))
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && GetLength_ex(pkiMsg, idx, &length, pkiMsgSz,
+                            NO_USER_CHECK) < 0)
+                    ret = ASN_PARSE_E;
+            }
+
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                    break;
+            }
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_STAGE2);
+            FALL_THROUGH;
+
+        case WC_PKCS7_INFOSET_STAGE2:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+                            MAX_VERSION_SZ, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            /* remove EnvelopedData and version */
+            if (pkcs7->contentOID != FIRMWARE_PKG_DATA ||
+                    type == AUTH_ENVELOPED_DATA) {
+                if (ret == 0 && GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+
+            pkcs7->stream->varOne = version;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_END);
+            FALL_THROUGH;
+
+        case WC_PKCS7_INFOSET_END:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            MAX_SET_SZ, &pkiMsg, idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+            version = pkcs7->stream->varOne;
+        #endif
+
+            if (type == ENVELOPED_DATA) {
+                /* TODO :: make this more accurate */
+                if ((pkcs7->publicKeyOID == RSAk &&
+                     (version != 0 && version != 2))
+                #ifdef HAVE_ECC
+                        || (pkcs7->publicKeyOID == ECDSAk &&
+                            (version != 0 && version != 2 && version != 3))
+                #endif
+                        ) {
+                    WOLFSSL_MSG("PKCS#7 envelopedData version incorrect");
+                    ret = ASN_VERSION_E;
+                }
+            } else {
+                /* AuthEnvelopedData version MUST be 0 */
+                if (version != 0) {
+                    WOLFSSL_MSG("PKCS#7 AuthEnvelopedData needs to be of version 0");
+                    ret = ASN_VERSION_E;
+                }
+            }
+
+            /* remove RecipientInfo set, get length of set */
+            if (ret == 0 && GetSet(pkiMsg, idx, &length, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+                break;
+            }
+        #endif
+
+            if (ret == 0)
+                ret = length;
+
+            break;
+
+        default:
+            WOLFSSL_MSG("Bad PKCS7 info set state");
+            ret = BAD_FUNC_ARG;
+            break;
+    }
+
+    return ret;
+}
+
+
+/* Import secret/private key into a PKCS7 structure. Used for setting
+ * the secret key for decryption a EnvelopedData KEKRI RecipientInfo.
+ *
+ * Returns 0 on success, negative upon error */
+WOLFSSL_API int wc_PKCS7_SetKey(PKCS7* pkcs7, byte* key, word32 keySz)
+{
+    if (pkcs7 == NULL || key == NULL || keySz == 0)
+        return BAD_FUNC_ARG;
+
+    pkcs7->privateKey = key;
+    pkcs7->privateKeySz = keySz;
+
+    return 0;
+}
+
+
+/* append data to encrypted content cache in PKCS7 structure
+ * return 0 on success, negative on error */
+static int PKCS7_CacheEncryptedContent(PKCS7* pkcs7, byte* in, word32 inSz)
+{
+    byte* oldCache;
+    word32 oldCacheSz;
+
+    if (pkcs7 == NULL || in == NULL)
+        return BAD_FUNC_ARG;
+
+    /* save pointer to old cache */
+    oldCache = pkcs7->cachedEncryptedContent;
+    oldCacheSz = pkcs7->cachedEncryptedContentSz;
+
+    /* re-allocate new buffer to fit appended data */
+    pkcs7->cachedEncryptedContent = (byte*)XMALLOC(oldCacheSz + inSz,
+            pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (pkcs7->cachedEncryptedContent == NULL) {
+        pkcs7->cachedEncryptedContentSz = 0;
+        XFREE(oldCache, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+
+    if (oldCache != NULL) {
+        XMEMCPY(pkcs7->cachedEncryptedContent, oldCache, oldCacheSz);
+    }
+    XMEMCPY(pkcs7->cachedEncryptedContent + oldCacheSz, in, inSz);
+    pkcs7->cachedEncryptedContentSz += inSz;
+
+    XFREE(oldCache, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
     return 0;
 }
 
 
 /* unwrap and decrypt PKCS#7 envelopedData object, return decoded size */
-WOLFSSL_API int wc_PKCS7_DecodeEnvelopedData(PKCS7* pkcs7, byte* pkiMsg,
-                                         word32 pkiMsgSz, byte* output,
+WOLFSSL_API int wc_PKCS7_DecodeEnvelopedData(PKCS7* pkcs7, byte* in,
+                                         word32 inSz, byte* output,
                                          word32 outputSz)
 {
     int recipFound = 0;
-    int ret, version, length;
+    int ret, length = 0;
     word32 idx = 0;
-    word32 contentType, encOID;
-    word32 decryptedKeySz;
-
-    int expBlockSz, blockKeySz;
-    byte tmpIv[MAX_CONTENT_IV_SIZE];
-
-#ifdef WOLFSSL_SMALL_STACK
-    byte* decryptedKey;
-#else
-    byte decryptedKey[MAX_ENCRYPTED_KEY_SZ];
-#endif
-    int encryptedContentSz;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = 0;
+    long rc;
+#endif
+    word32 contentType, encOID = 0;
+    word32 decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+
+    int expBlockSz = 0, blockKeySz = 0;
+    byte  tmpIvBuf[MAX_CONTENT_IV_SIZE];
+    byte* tmpIv = tmpIvBuf;
+
+    byte* pkiMsg    = in;
+    word32 pkiMsgSz = inSz;
+    byte* decryptedKey = NULL;
+    int encryptedContentTotalSz = 0;
+    int encryptedContentSz = 0;
     byte padLen;
     byte* encryptedContent = NULL;
-    int explicitOctet;
-
-    if (pkcs7 == NULL || pkcs7->singleCert == NULL ||
-        pkcs7->singleCertSz == 0)
+    int explicitOctet = 0;
+    word32 localIdx;
+    byte   tag;
+
+    if (pkcs7 == NULL)
         return BAD_FUNC_ARG;
 
     if (pkiMsg == NULL || pkiMsgSz == 0 ||
         output == NULL || outputSz == 0)
         return BAD_FUNC_ARG;
 
-    /* read past ContentInfo, verify type is envelopedData */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (length == 0 && pkiMsg[idx-1] == 0x80) {
-#ifdef ASN_BER_TO_DER
-        word32 len = 0;
-
-        ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len);
-        if (ret != LENGTH_ONLY_E)
-            return ret;
-        pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        if (pkcs7->der == NULL)
-            return MEMORY_E;
-        ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len);
-        if (ret < 0)
+#ifndef NO_PKCS7_STREAM
+    (void)tmpIv; /* help out static analysis */
+    if (pkcs7->stream == NULL) {
+        if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
             return ret;
-
-        pkiMsg = pkcs7->der;
-        pkiMsgSz = len;
-        idx = 0;
-        if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-            return ASN_PARSE_E;
-#else
-        return BER_INDEF_E;
-#endif
-    }
-
-    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (contentType != ENVELOPED_DATA) {
-        WOLFSSL_MSG("PKCS#7 input not of type EnvelopedData");
-        return PKCS7_OID_E;
-    }
-
-    if (pkiMsg[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
-        return ASN_PARSE_E;
-
-    if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* remove EnvelopedData and version */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* TODO :: make this more accurate */
-    if ((pkcs7->publicKeyOID == RSAk && version != 0)
-    #ifdef HAVE_ECC
-            || (pkcs7->publicKeyOID == ECDSAk && version != 2)
-    #endif
-            ) {
-        WOLFSSL_MSG("PKCS#7 envelopedData needs to be of version 0");
-        return ASN_VERSION_E;
-    }
-
-    /* walk through RecipientInfo set, find correct recipient */
-    if (GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-#ifdef WOLFSSL_SMALL_STACK
-    decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+        }
+    }
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_START:
+        case WC_PKCS7_INFOSET_START:
+        case WC_PKCS7_INFOSET_BER:
+        case WC_PKCS7_INFOSET_STAGE1:
+        case WC_PKCS7_INFOSET_STAGE2:
+        case WC_PKCS7_INFOSET_END:
+            ret = wc_PKCS7_ParseToRecipientInfoSet(pkcs7, pkiMsg, pkiMsgSz,
+                    &idx, ENVELOPED_DATA);
+            if (ret < 0) {
+                break;
+            }
+
+        #ifdef ASN_BER_TO_DER
+            /* check if content was BER and has been converted to DER */
+            if (pkcs7->derSz > 0)
+                pkiMsg = in = pkcs7->der;
+        #endif
+
+            decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
                                                        DYNAMIC_TYPE_PKCS7);
-    if (decryptedKey == NULL)
-        return MEMORY_E;
-#endif
-    decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
-
-    ret = wc_PKCS7_DecodeRecipientInfos(pkcs7, pkiMsg, pkiMsgSz, &idx,
+            if (decryptedKey == NULL)
+                return MEMORY_E;
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_2);
+        #ifndef NO_PKCS7_STREAM
+            tmpIdx = idx;
+            pkcs7->stream->aad = decryptedKey;
+        #endif
+            FALL_THROUGH;
+
+        case WC_PKCS7_ENV_2:
+        #ifndef NO_PKCS7_STREAM
+            /* store up enough buffer for initial info set decode */
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+                            MAX_VERSION_SZ + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+        #endif
+            FALL_THROUGH;
+
+        case WC_PKCS7_DECRYPT_KTRI:
+        case WC_PKCS7_DECRYPT_KTRI_2:
+        case WC_PKCS7_DECRYPT_KTRI_3:
+        case WC_PKCS7_DECRYPT_KARI:
+        case WC_PKCS7_DECRYPT_KEKRI:
+        case WC_PKCS7_DECRYPT_PWRI:
+        case WC_PKCS7_DECRYPT_ORI:
+        #ifndef NO_PKCS7_STREAM
+            decryptedKey   = pkcs7->stream->aad;
+            decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+        #endif
+
+            ret = wc_PKCS7_DecryptRecipientInfos(pkcs7, in, inSz, &idx,
                                         decryptedKey, &decryptedKeySz,
                                         &recipFound);
-    if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ret;
-    }
-
-    if (recipFound == 0) {
-        WOLFSSL_MSG("No recipient found in envelopedData that matches input");
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return PKCS7_RECIP_E;
-    }
-
-    /* remove EncryptedContentInfo */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    blockKeySz = wc_PKCS7_GetOIDKeySize(encOID);
-    if (blockKeySz < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return blockKeySz;
-    }
-
-    expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID);
-    if (expBlockSz < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return expBlockSz;
-    }
-
-    /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
-    if (pkiMsg[idx++] != ASN_OCTET_STRING) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (length != expBlockSz) {
-        WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
-#ifdef WOLFSSL_SMALL_STACK
+            if (ret == 0 && recipFound == 0) {
+                WOLFSSL_MSG("No recipient found in envelopedData that matches input");
+                ret = PKCS7_RECIP_E;
+            }
+
+            if (ret != 0)
+                break;
+        #ifndef NO_PKCS7_STREAM
+            tmpIdx               = idx;
+            pkcs7->stream->aadSz = decryptedKeySz;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_3);
+            FALL_THROUGH;
+
+        case WC_PKCS7_ENV_3:
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+                                                MAX_VERSION_SZ + ASN_TAG_SZ +
+                                                MAX_LENGTH_SZ, &pkiMsg, &idx))
+                                                != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #else
+            ret = 0;
+        #endif
+
+            /* remove EncryptedContentInfo */
+            if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+                        pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType,
+                        pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            blockKeySz = wc_PKCS7_GetOIDKeySize(encOID);
+            if (ret == 0 && blockKeySz < 0) {
+                ret = blockKeySz;
+            }
+
+            expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID);
+            if (ret == 0 && expBlockSz < 0) {
+                ret = expBlockSz;
+            }
+
+            /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
+            if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) != 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && tag != ASN_OCTET_STRING) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && length != expBlockSz) {
+                WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret != 0)
+                break;
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, encOID, expBlockSz, length);
+            pkcs7->stream->contentSz = blockKeySz;
+            pkcs7->stream->expected = length + MAX_LENGTH_SZ + MAX_LENGTH_SZ +
+                ASN_TAG_SZ + ASN_TAG_SZ;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_4);
+            FALL_THROUGH;
+
+        case WC_PKCS7_ENV_4:
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            wc_PKCS7_StreamGetVar(pkcs7, 0, 0, &length);
+            tmpIv = pkcs7->stream->tmpIv;
+            if (tmpIv == NULL) {
+                /* check added to help out static analysis tool */
+                ret = MEMORY_E;
+                break;
+            }
+        #else
+            ret = 0;
+        #endif
+
+            XMEMCPY(tmpIv, &pkiMsg[idx], length);
+            idx += length;
+
+            explicitOctet = 0;
+            localIdx = idx;
+            if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 &&
+                    tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
+                explicitOctet = 1;
+            }
+
+            /* read encryptedContent, cont[0] */
+            if (tag != (ASN_CONTEXT_SPECIFIC | 0) &&
+                          tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
+                ret = ASN_PARSE_E;
+            }
+            idx++;
+
+            if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentTotalSz,
+                                                               pkiMsgSz) <= 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret != 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+            pkcs7->stream->expected = encryptedContentTotalSz;
+            wc_PKCS7_StreamGetVar(pkcs7, &encOID, &expBlockSz, 0);
+            wc_PKCS7_StreamStoreVar(pkcs7, encOID, expBlockSz, explicitOctet);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_5);
+            FALL_THROUGH;
+
+        case WC_PKCS7_ENV_5:
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            wc_PKCS7_StreamGetVar(pkcs7, &encOID, &expBlockSz, &explicitOctet);
+            tmpIv = pkcs7->stream->tmpIv;
+            encryptedContentTotalSz = pkcs7->stream->expected;
+
+            /* restore decrypted key */
+            decryptedKey   = pkcs7->stream->aad;
+            decryptedKeySz = pkcs7->stream->aadSz;
+            blockKeySz = pkcs7->stream->contentSz;
+        #else
+            ret = 0;
+        #endif
+
+            if (explicitOctet) {
+                /* encrypted content may be fragmented into multiple
+                 * consecutive OCTET STRINGs, if so loop through
+                 * collecting and caching encrypted content bytes */
+                localIdx = idx;
+                while (idx < (localIdx + encryptedContentTotalSz)) {
+
+                    if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+                        ret = ASN_PARSE_E;
+                    }
+
+                    if (ret == 0 && (tag != ASN_OCTET_STRING)) {
+                        ret = ASN_PARSE_E;
+                    }
+
+                    if (ret == 0 && GetLength(pkiMsg, &idx,
+                                &encryptedContentSz, pkiMsgSz) <= 0) {
+                        ret = ASN_PARSE_E;
+                    }
+
+                    if (ret == 0) {
+                        ret = PKCS7_CacheEncryptedContent(pkcs7, &pkiMsg[idx],
+                                                          encryptedContentSz);
+                    }
+
+                    if (ret != 0) {
+                        break;
+                    }
+
+                    /* advance idx past encrypted content */
+                    idx += encryptedContentSz;
+                }
+
+                if (ret != 0) {
+                    break;
+                }
+
+            } else {
+                /* cache encrypted content, no OCTET STRING */
+                ret = PKCS7_CacheEncryptedContent(pkcs7, &pkiMsg[idx],
+                                                  encryptedContentTotalSz);
+                if (ret != 0) {
+                    break;
+                }
+                idx += encryptedContentTotalSz;
+            }
+
+            /* use cached content */
+            encryptedContent = pkcs7->cachedEncryptedContent;
+            encryptedContentSz = pkcs7->cachedEncryptedContentSz;
+
+            /* decrypt encryptedContent */
+            ret = wc_PKCS7_DecryptContent(pkcs7, encOID, decryptedKey,
+                    blockKeySz, tmpIv, expBlockSz, NULL, 0, NULL, 0,
+                    encryptedContent, encryptedContentSz, encryptedContent);
+            if (ret != 0) {
+                break;
+            }
+
+            padLen = encryptedContent[encryptedContentSz-1];
+
+            /* copy plaintext to output */
+            if (padLen > encryptedContentSz ||
+                    (word32)(encryptedContentSz - padLen) > outputSz) {
+                ret = BUFFER_E;
+                break;
+            }
+            XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
+
+            /* free memory, zero out keys */
+            ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
+            XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            if (pkcs7->cachedEncryptedContent != NULL) {
+                XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap,
+                      DYNAMIC_TYPE_PKCS7);
+                pkcs7->cachedEncryptedContent = NULL;
+                pkcs7->cachedEncryptedContentSz = 0;
+            }
+
+            ret = encryptedContentSz - padLen;
+        #ifndef NO_PKCS7_STREAM
+            pkcs7->stream->aad = NULL;
+            pkcs7->stream->aadSz = 0;
+            wc_PKCS7_ResetStream(pkcs7);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+            break;
+
+        default:
+            WOLFSSL_MSG("PKCS#7 unknown decode enveloped state");
+            ret = BAD_FUNC_ARG;
+    }
+
+#ifndef NO_PKCS7_STREAM
+    if (ret < 0 && ret != WC_PKCS7_WANT_READ_E) {
+        wc_PKCS7_ResetStream(pkcs7);
+        wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+        if (pkcs7->cachedEncryptedContent != NULL) {
+            XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap,
+                  DYNAMIC_TYPE_PKCS7);
+            pkcs7->cachedEncryptedContent = NULL;
+            pkcs7->cachedEncryptedContentSz = 0;
+        }
+    }
+#else
+    if (decryptedKey != NULL && ret < 0) {
+        ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
         XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    XMEMCPY(tmpIv, &pkiMsg[idx], length);
-    idx += length;
-
-    explicitOctet = pkiMsg[idx] == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0);
-
-    /* read encryptedContent, cont[0] */
-    if (pkiMsg[idx] != (ASN_CONTEXT_SPECIFIC | 0) &&
-        pkiMsg[idx] != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-    idx++;
-
-    if (GetLength(pkiMsg, &idx, &encryptedContentSz, pkiMsgSz) <= 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ASN_PARSE_E;
-    }
-
-    if (explicitOctet) {
-        if (pkiMsg[idx++] != ASN_OCTET_STRING) {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-            return ASN_PARSE_E;
-        }
-
-        if (GetLength(pkiMsg, &idx, &encryptedContentSz, pkiMsgSz) <= 0) {
-#ifdef WOLFSSL_SMALL_STACK
+    }
+    if (pkcs7->cachedEncryptedContent != NULL && ret < 0) {
+        XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        pkcs7->cachedEncryptedContent = NULL;
+        pkcs7->cachedEncryptedContentSz = 0;
+    }
+#endif
+    return ret;
+}
+
+
+/* build PKCS#7 authEnvelopedData content type, return enveloped size */
+int wc_PKCS7_EncodeAuthEnvelopedData(PKCS7* pkcs7, byte* output,
+                                     word32 outputSz)
+{
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+    int ret, idx = 0;
+    int totalSz, encryptedOutSz;
+
+    int contentInfoSeqSz, outerContentTypeSz, outerContentSz;
+    byte contentInfoSeq[MAX_SEQ_SZ];
+    byte outerContentType[MAX_ALGO_SZ];
+    byte outerContent[MAX_SEQ_SZ];
+
+    int envDataSeqSz, verSz;
+    byte envDataSeq[MAX_SEQ_SZ];
+    byte ver[MAX_VERSION_SZ];
+
+    WC_RNG rng;
+    int blockSz, blockKeySz;
+    byte* encryptedContent;
+
+    Pkcs7EncodedRecip* tmpRecip = NULL;
+    int recipSz, recipSetSz;
+    byte recipSet[MAX_SET_SZ];
+
+    int encContentOctetSz, encContentSeqSz, contentTypeSz;
+    int contentEncAlgoSz, nonceOctetStringSz, macOctetStringSz;
+    byte encContentSeq[MAX_SEQ_SZ];
+    byte contentType[MAX_ALGO_SZ];
+    byte contentEncAlgo[MAX_ALGO_SZ];
+    byte nonceOctetString[MAX_OCTET_STR_SZ];
+    byte encContentOctet[MAX_OCTET_STR_SZ];
+    byte macOctetString[MAX_OCTET_STR_SZ];
+
+    byte authTag[AES_BLOCK_SIZE];
+    byte nonce[GCM_NONCE_MID_SZ];   /* GCM nonce is larger than CCM */
+    byte macInt[MAX_VERSION_SZ];
+    word32 nonceSz = 0, macIntSz = 0;
+
+    /* authAttribs */
+    byte* flatAuthAttribs = NULL;
+    byte authAttribSet[MAX_SET_SZ];
+    EncodedAttrib authAttribs[MAX_AUTH_ATTRIBS_SZ];
+    word32 authAttribsSz = 0, authAttribsCount = 0;
+    word32 authAttribsSetSz = 0;
+
+    byte* aadBuffer = NULL;
+    word32 aadBufferSz = 0;
+    byte authAttribAadSet[MAX_SET_SZ];
+    word32 authAttribsAadSetSz = 0;
+
+    /* unauthAttribs */
+    byte* flatUnauthAttribs = NULL;
+    byte unauthAttribSet[MAX_SET_SZ];
+    EncodedAttrib unauthAttribs[MAX_UNAUTH_ATTRIBS_SZ];
+    word32 unauthAttribsSz = 0, unauthAttribsCount = 0;
+    word32 unauthAttribsSetSz = 0;
+
+
+    PKCS7Attrib contentTypeAttrib;
+    byte contentTypeValue[MAX_OID_SZ];
+    /* contentType OID (1.2.840.113549.1.9.3) */
+    const byte contentTypeOid[] =
+            { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01,
+                             0x09, 0x03 };
+
+    if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0)
+        return BAD_FUNC_ARG;
+
+    if (output == NULL || outputSz == 0)
+        return BAD_FUNC_ARG;
+
+    switch (pkcs7->encryptOID) {
+#ifdef HAVE_AESGCM
+    #ifdef WOLFSSL_AES_128
+        case AES128GCMb:
+            break;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        case AES192GCMb:
+            break;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        case AES256GCMb:
+            break;
+    #endif
+#endif
+#ifdef HAVE_AESCCM
+    #ifdef WOLFSSL_AES_128
+        case AES128CCMb:
+            break;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        case AES192CCMb:
+            break;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        case AES256CCMb:
+            break;
+    #endif
+#endif
+        default:
+            WOLFSSL_MSG("CMS AuthEnvelopedData must use AES-GCM or AES-CCM");
+            return BAD_FUNC_ARG;
+    }
+
+    blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+    if (blockKeySz < 0)
+        return blockKeySz;
+
+    blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID);
+    if (blockSz < 0)
+        return blockSz;
+
+    /* outer content type */
+    ret = wc_SetContentType(AUTH_ENVELOPED_DATA, outerContentType,
+                            sizeof(outerContentType));
+    if (ret < 0)
+        return ret;
+
+    outerContentTypeSz = ret;
+
+    /* version, defined as 0 in RFC 5083 */
+    verSz = SetMyVersion(0, ver, 0);
+
+    /* generate random content encryption key */
+    ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+    if (ret != 0) {
+        return ret;
+    }
+
+    /* build RecipientInfo, only if user manually set singleCert and size */
+    if (pkcs7->singleCert != NULL && pkcs7->singleCertSz > 0) {
+        switch (pkcs7->publicKeyOID) {
+        #ifndef NO_RSA
+            case RSAk:
+                ret = wc_PKCS7_AddRecipient_KTRI(pkcs7, pkcs7->singleCert,
+                                                 pkcs7->singleCertSz, 0);
+                break;
+        #endif
+        #ifdef HAVE_ECC
+            case ECDSAk:
+                ret = wc_PKCS7_AddRecipient_KARI(pkcs7, pkcs7->singleCert,
+                                                 pkcs7->singleCertSz,
+                                                 pkcs7->keyWrapOID,
+                                                 pkcs7->keyAgreeOID, pkcs7->ukm,
+                                                 pkcs7->ukmSz, 0);
+                break;
+        #endif
+
+            default:
+                WOLFSSL_MSG("Unsupported RecipientInfo public key type");
+                return BAD_FUNC_ARG;
+        };
+
+        if (ret < 0) {
+            WOLFSSL_MSG("Failed to create RecipientInfo");
+            return ret;
+        }
+    }
+
+    recipSz = wc_PKCS7_GetRecipientListSize(pkcs7);
+    if (recipSz < 0) {
+        return ret;
+
+    } else if (recipSz == 0) {
+        WOLFSSL_MSG("You must add at least one CMS recipient");
+        return PKCS7_RECIP_E;
+    }
+    recipSetSz = SetSet(recipSz, recipSet);
+
+    /* generate random nonce and IV for encryption */
+    switch (pkcs7->encryptOID) {
+#ifdef HAVE_AESGCM
+    #ifdef WOLFSSL_AES_128
+        case AES128GCMb:
+            FALL_THROUGH;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        case AES192GCMb:
+            FALL_THROUGH;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        case AES256GCMb:
+    #endif
+    #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+        defined(WOLFSSL_AES_256)
+            /* GCM nonce is GCM_NONCE_MID_SZ (12) */
+            nonceSz = GCM_NONCE_MID_SZ;
+            break;
+    #endif
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AESCCM
+    #ifdef WOLFSSL_AES_128
+        case AES128CCMb:
+            FALL_THROUGH;
+    #endif
+    #ifdef WOLFSSL_AES_192
+        case AES192CCMb:
+            FALL_THROUGH;
+    #endif
+    #ifdef WOLFSSL_AES_256
+        case AES256CCMb:
+    #endif
+    #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+        defined(WOLFSSL_AES_256)
+            /* CCM nonce is CCM_NONCE_MIN_SZ (7) */
+            nonceSz = CCM_NONCE_MIN_SZ;
+            break;
+    #endif
+#endif /* HAVE_AESCCM */
+    }
+
+    ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+    if (ret != 0)
+        return ret;
+
+    ret = wc_PKCS7_GenerateBlock(pkcs7, &rng, nonce, nonceSz);
+    wc_FreeRng(&rng);
+    if (ret != 0) {
+        return ret;
+    }
+
+
+    /* authAttribs: add contentType attrib if needed */
+    if (pkcs7->contentOID != DATA) {
+
+        /* if type is not id-data, contentType attribute MUST be added */
+        contentTypeAttrib.oid = contentTypeOid;
+        contentTypeAttrib.oidSz = sizeof(contentTypeOid);
+
+        /* try to set from contentOID first, known types */
+        ret = wc_SetContentType(pkcs7->contentOID, contentTypeValue,
+                                sizeof(contentTypeValue));
+        if (ret > 0) {
+            contentTypeAttrib.value = contentTypeValue;
+            contentTypeAttrib.valueSz = ret;
+
+        /* otherwise, try to set from custom content type */
+        } else {
+            if (pkcs7->contentTypeSz == 0) {
+                WOLFSSL_MSG("CMS pkcs7->contentType must be set if "
+                            "contentOID is not");
+                return BAD_FUNC_ARG;
+            }
+            contentTypeAttrib.value = pkcs7->contentType;
+            contentTypeAttrib.valueSz = pkcs7->contentTypeSz;
+        }
+
+        authAttribsSz += EncodeAttributes(authAttribs, 1,
+                                          &contentTypeAttrib, 1);
+        authAttribsCount += 1;
+    }
+
+    /* authAttribs: add in user authenticated attributes */
+    if (pkcs7->authAttribs != NULL && pkcs7->authAttribsSz > 0) {
+        authAttribsSz += EncodeAttributes(authAttribs + authAttribsCount,
+                                 MAX_AUTH_ATTRIBS_SZ - authAttribsCount,
+                                 pkcs7->authAttribs,
+                                 pkcs7->authAttribsSz);
+        authAttribsCount += pkcs7->authAttribsSz;
+    }
+
+    /* authAttribs: flatten authAttribs */
+    if (authAttribsSz > 0 && authAttribsCount > 0) {
+        flatAuthAttribs = (byte*)XMALLOC(authAttribsSz, pkcs7->heap,
+                                         DYNAMIC_TYPE_PKCS7);
+        if (flatAuthAttribs == NULL) {
+            return MEMORY_E;
+        }
+
+        FlattenAttributes(pkcs7, flatAuthAttribs, authAttribs,
+                          authAttribsCount);
+
+        authAttribsSetSz = SetImplicit(ASN_SET, 1, authAttribsSz,
+                                       authAttribSet);
+
+        /* From RFC5083, "For the purpose of constructing the AAD, the
+         * IMPLICIT [1] tag in the authAttrs field is not used for the
+         * DER encoding: rather a universal SET OF tag is used. */
+        authAttribsAadSetSz = SetSet(authAttribsSz, authAttribAadSet);
+
+        /* allocate temp buffer to hold alternate attrib encoding for aad */
+        aadBuffer = (byte*)XMALLOC(authAttribsSz + authAttribsAadSetSz,
+                                   pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (aadBuffer == NULL) {
+            XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return MEMORY_E;
+        }
+
+        /* build up alternate attrib encoding for aad */
+        aadBufferSz = 0;
+        XMEMCPY(aadBuffer + aadBufferSz, authAttribAadSet, authAttribsAadSetSz);
+        aadBufferSz += authAttribsAadSetSz;
+        XMEMCPY(aadBuffer + aadBufferSz, flatAuthAttribs, authAttribsSz);
+        aadBufferSz += authAttribsSz;
+    }
+
+    /* build up unauthenticated attributes (unauthAttrs) */
+    if (pkcs7->unauthAttribsSz > 0) {
+        unauthAttribsSz = EncodeAttributes(unauthAttribs + unauthAttribsCount,
+                                     MAX_UNAUTH_ATTRIBS_SZ - unauthAttribsCount,
+                                     pkcs7->unauthAttribs,
+                                     pkcs7->unauthAttribsSz);
+        unauthAttribsCount = pkcs7->unauthAttribsSz;
+
+        flatUnauthAttribs = (byte*)XMALLOC(unauthAttribsSz, pkcs7->heap,
+                                            DYNAMIC_TYPE_PKCS7);
+        if (flatUnauthAttribs == NULL) {
+            if (aadBuffer)
+                XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            if (flatAuthAttribs)
+                XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return MEMORY_E;
+        }
+
+        FlattenAttributes(pkcs7, flatUnauthAttribs, unauthAttribs,
+                          unauthAttribsCount);
+        unauthAttribsSetSz = SetImplicit(ASN_SET, 2, unauthAttribsSz,
+                                         unauthAttribSet);
+    }
+
+    /* allocate encrypted content buffer */
+    encryptedOutSz = pkcs7->contentSz;
+    encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap,
+                                      DYNAMIC_TYPE_PKCS7);
+    if (encryptedContent == NULL) {
+        if (aadBuffer)
+            XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (flatUnauthAttribs)
+            XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        if (flatAuthAttribs)
+            XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return MEMORY_E;
+    }
+
+    /* encrypt content */
+    ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->cek,
+            pkcs7->cekSz, nonce, nonceSz, aadBuffer, aadBufferSz, authTag,
+            sizeof(authTag), pkcs7->content, encryptedOutSz, encryptedContent);
+
+    if (aadBuffer) {
+        XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        aadBuffer = NULL;
+    }
+
+    if (ret != 0) {
+        if (flatUnauthAttribs)
+            XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        if (flatAuthAttribs)
+            XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    /* EncryptedContentInfo */
+    ret = wc_SetContentType(pkcs7->contentOID, contentType,
+                            sizeof(contentType));
+    if (ret < 0) {
+        if (flatUnauthAttribs)
+            XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        if (flatAuthAttribs)
+            XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    contentTypeSz = ret;
+
+    /* put together nonce OCTET STRING */
+    nonceOctetStringSz = SetOctetString(nonceSz, nonceOctetString);
+
+    /* put together aes-ICVlen INTEGER */
+    macIntSz = SetMyVersion(sizeof(authTag), macInt, 0);
+
+    /* build up our ContentEncryptionAlgorithmIdentifier sequence,
+     * adding (nonceOctetStringSz + blockSz + macIntSz) for nonce OCTET STRING
+     * and tag size */
+    contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo,
+                                 oidBlkType, nonceOctetStringSz + nonceSz +
+                                 macIntSz);
+
+    if (contentEncAlgoSz == 0) {
+        if (flatUnauthAttribs)
+            XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        if (flatAuthAttribs)
+            XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return BAD_FUNC_ARG;
+    }
+
+    encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0, encryptedOutSz,
+                                    encContentOctet);
+
+    encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz +
+                                  nonceOctetStringSz + nonceSz + macIntSz +
+                                  encContentOctetSz + encryptedOutSz,
+                                  encContentSeq);
+
+    macOctetStringSz = SetOctetString(sizeof(authTag), macOctetString);
+
+    /* keep track of sizes for outer wrapper layering */
+    totalSz = verSz + recipSetSz + recipSz + encContentSeqSz + contentTypeSz +
+              contentEncAlgoSz + nonceOctetStringSz + nonceSz + macIntSz +
+              encContentOctetSz + encryptedOutSz + authAttribsSz +
+              authAttribsSetSz + macOctetStringSz + sizeof(authTag) +
+              unauthAttribsSz + unauthAttribsSetSz;
+
+    /* EnvelopedData */
+    envDataSeqSz = SetSequence(totalSz, envDataSeq);
+    totalSz += envDataSeqSz;
+
+    /* outer content */
+    outerContentSz = SetExplicit(0, totalSz, outerContent);
+    totalSz += outerContentTypeSz;
+    totalSz += outerContentSz;
+
+    /* ContentInfo */
+    contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+    totalSz += contentInfoSeqSz;
+
+    if (totalSz > (int)outputSz) {
+        WOLFSSL_MSG("Pkcs7_encrypt output buffer too small");
+        if (flatUnauthAttribs)
+            XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        if (flatAuthAttribs)
+            XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return BUFFER_E;
+    }
+
+    XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
+    idx += contentInfoSeqSz;
+    XMEMCPY(output + idx, outerContentType, outerContentTypeSz);
+    idx += outerContentTypeSz;
+    XMEMCPY(output + idx, outerContent, outerContentSz);
+    idx += outerContentSz;
+    XMEMCPY(output + idx, envDataSeq, envDataSeqSz);
+    idx += envDataSeqSz;
+    XMEMCPY(output + idx, ver, verSz);
+    idx += verSz;
+    XMEMCPY(output + idx, recipSet, recipSetSz);
+    idx += recipSetSz;
+    /* copy in recipients from list */
+    tmpRecip = pkcs7->recipList;
+    while (tmpRecip != NULL) {
+        XMEMCPY(output + idx, tmpRecip->recip, tmpRecip->recipSz);
+        idx += tmpRecip->recipSz;
+        tmpRecip = tmpRecip->next;
+    }
+    wc_PKCS7_FreeEncodedRecipientSet(pkcs7);
+    XMEMCPY(output + idx, encContentSeq, encContentSeqSz);
+    idx += encContentSeqSz;
+    XMEMCPY(output + idx, contentType, contentTypeSz);
+    idx += contentTypeSz;
+    XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz);
+    idx += contentEncAlgoSz;
+    XMEMCPY(output + idx, nonceOctetString, nonceOctetStringSz);
+    idx += nonceOctetStringSz;
+    XMEMCPY(output + idx, nonce, nonceSz);
+    idx += nonceSz;
+    XMEMCPY(output + idx, macInt, macIntSz);
+    idx += macIntSz;
+    XMEMCPY(output + idx, encContentOctet, encContentOctetSz);
+    idx += encContentOctetSz;
+    XMEMCPY(output + idx, encryptedContent, encryptedOutSz);
+    idx += encryptedOutSz;
+
+    /* authenticated attributes */
+    if (flatAuthAttribs && authAttribsSz > 0) {
+        XMEMCPY(output + idx, authAttribSet, authAttribsSetSz);
+        idx += authAttribsSetSz;
+        XMEMCPY(output + idx, flatAuthAttribs, authAttribsSz);
+        idx += authAttribsSz;
+        XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    }
+
+    XMEMCPY(output + idx, macOctetString, macOctetStringSz);
+    idx += macOctetStringSz;
+    XMEMCPY(output + idx, authTag, sizeof(authTag));
+    idx += sizeof(authTag);
+
+    /* unauthenticated attributes */
+    if (unauthAttribsSz > 0) {
+        XMEMCPY(output + idx, unauthAttribSet, unauthAttribsSetSz);
+        idx += unauthAttribsSetSz;
+        XMEMCPY(output + idx, flatUnauthAttribs, unauthAttribsSz);
+        idx += unauthAttribsSz;
+    }
+
+    if (flatUnauthAttribs != NULL) {
+        XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    }
+
+    XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+    return idx;
+
+#else
+    WOLFSSL_MSG("AuthEnvelopedData requires AES-GCM or AES-CCM to be enabled");
+    (void)pkcs7;
+    (void)output;
+    (void)outputSz;
+
+    return NOT_COMPILED_IN;
+#endif /* HAVE_AESGCM | HAVE_AESCCM */
+}
+
+
+/* unwrap and decrypt PKCS#7 AuthEnvelopedData object, return decoded size */
+WOLFSSL_API int wc_PKCS7_DecodeAuthEnvelopedData(PKCS7* pkcs7, byte* in,
+                                                 word32 inSz, byte* output,
+                                                 word32 outputSz)
+{
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+    int recipFound = 0;
+    int ret = 0, length;
+    word32 idx = 0;
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = 0;
+    long rc;
+#endif
+    word32 contentType, encOID = 0;
+    word32 decryptedKeySz = 0;
+    byte* pkiMsg = in;
+    word32 pkiMsgSz = inSz;
+
+    int expBlockSz = 0, blockKeySz = 0;
+    byte authTag[AES_BLOCK_SIZE];
+    byte nonce[GCM_NONCE_MID_SZ];       /* GCM nonce is larger than CCM */
+    int nonceSz = 0, authTagSz = 0, macSz = 0;
+
+#ifdef WOLFSSL_SMALL_STACK
+    byte* decryptedKey = NULL;
+#else
+    byte  decryptedKey[MAX_ENCRYPTED_KEY_SZ];
+#endif
+    int encryptedContentSz = 0;
+    byte* encryptedContent = NULL;
+    int explicitOctet = 0;
+
+    byte authAttribSetByte = 0;
+    byte* encodedAttribs = NULL;
+    word32 encodedAttribIdx = 0, encodedAttribSz = 0;
+    byte* authAttrib = NULL;
+    int authAttribSz = 0;
+    word32 localIdx;
+    byte tag;
+
+    if (pkcs7 == NULL)
+        return BAD_FUNC_ARG;
+
+    if (pkiMsg == NULL || pkiMsgSz == 0 ||
+        output == NULL || outputSz == 0)
+        return BAD_FUNC_ARG;
+#ifndef NO_PKCS7_STREAM
+    if (pkcs7->stream == NULL) {
+        if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+            return ret;
+        }
+    }
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_START:
+        case WC_PKCS7_INFOSET_START:
+        case WC_PKCS7_INFOSET_STAGE1:
+        case WC_PKCS7_INFOSET_STAGE2:
+        case WC_PKCS7_INFOSET_END:
+            ret = wc_PKCS7_ParseToRecipientInfoSet(pkcs7, pkiMsg, pkiMsgSz,
+                    &idx, AUTH_ENVELOPED_DATA);
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            tmpIdx = idx;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_2);
+            FALL_THROUGH;
+
+        case WC_PKCS7_AUTHENV_2:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+                            MAX_VERSION_SZ + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+        #endif
+        #ifdef WOLFSSL_SMALL_STACK
+            decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+                                                               DYNAMIC_TYPE_PKCS7);
+            if (decryptedKey == NULL) {
+                ret = MEMORY_E;
+                break;
+            }
+        #ifndef NO_PKCS7_STREAM
+            pkcs7->stream->key = decryptedKey;
+        #endif
+        #endif
+            FALL_THROUGH;
+
+        case WC_PKCS7_DECRYPT_KTRI:
+        case WC_PKCS7_DECRYPT_KTRI_2:
+        case WC_PKCS7_DECRYPT_KTRI_3:
+        case WC_PKCS7_DECRYPT_KARI:
+        case WC_PKCS7_DECRYPT_KEKRI:
+        case WC_PKCS7_DECRYPT_PWRI:
+        case WC_PKCS7_DECRYPT_ORI:
+
+            decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+        #ifdef WOLFSSL_SMALL_STACK
+            #ifndef NO_PKCS7_STREAM
+            decryptedKey = pkcs7->stream->key;
+            #endif
+        #endif
+
+            ret = wc_PKCS7_DecryptRecipientInfos(pkcs7, in, inSz, &idx,
+                                                decryptedKey, &decryptedKeySz,
+                                                &recipFound);
+            if (ret != 0) {
+                break;
+            }
+
+            if (recipFound == 0) {
+                WOLFSSL_MSG("No recipient found in envelopedData that matches input");
+                ret = PKCS7_RECIP_E;
+                break;
+            }
+
+        #ifndef NO_PKCS7_STREAM
+            tmpIdx = idx;
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_3);
+            FALL_THROUGH;
+
+        case WC_PKCS7_AUTHENV_3:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+                            MAX_ALGO_SZ + MAX_ALGO_SZ + ASN_TAG_SZ,
+                            &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+                in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+
+            /* remove EncryptedContentInfo */
+            if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+                        pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType,
+                        pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            blockKeySz = wc_PKCS7_GetOIDKeySize(encOID);
+            if (ret == 0 && blockKeySz < 0) {
+                ret = blockKeySz;
+            }
+
+            expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID);
+            if (ret == 0 && expBlockSz < 0) {
+                ret = expBlockSz;
+            }
+
+            /* get nonce, stored in OPTIONAL parameter of AlgoID */
+            if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && tag != ASN_OCTET_STRING) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+            wc_PKCS7_StreamStoreVar(pkcs7, encOID, blockKeySz, 0);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_4);
+            FALL_THROUGH;
+
+        case WC_PKCS7_AUTHENV_4:
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+                            MAX_VERSION_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ,
+                            &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+        #endif
+            if (ret == 0 && GetLength(pkiMsg, &idx, &nonceSz, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && nonceSz > (int)sizeof(nonce)) {
+                WOLFSSL_MSG("AuthEnvelopedData nonce too large for buffer");
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0) {
+                XMEMCPY(nonce, &pkiMsg[idx], nonceSz);
+                idx += nonceSz;
+            }
+
+            /* get mac size, also stored in OPTIONAL parameter of AlgoID */
+            if (ret == 0 && GetMyVersion(pkiMsg, &idx, &macSz, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0) {
+                explicitOctet = 0;
+                localIdx = idx;
+                if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 &&
+                        tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0))
+                    explicitOctet = 1;
+
+                /* read encryptedContent, cont[0] */
+                ret = GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz);
+            }
+
+            if (ret == 0 &&
+                    tag != (ASN_CONTEXT_SPECIFIC | 0) &&
+                    tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz,
+                        pkiMsgSz) <= 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (explicitOctet) {
+                if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+                    ret = ASN_PARSE_E;
+                }
+                if (ret == 0 && tag != ASN_OCTET_STRING) {
+                    ret = ASN_PARSE_E;
+                }
+
+                if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz,
+                            pkiMsgSz) <= 0) {
+                    ret = ASN_PARSE_E;
+                }
+            }
+
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+
+            /* store nonce for later */
+            if (nonceSz > 0) {
+                pkcs7->stream->nonceSz = nonceSz;
+                pkcs7->stream->nonce = (byte*)XMALLOC(nonceSz, pkcs7->heap,
+                        DYNAMIC_TYPE_PKCS7);
+                if (pkcs7->stream->nonce == NULL) {
+                    ret = MEMORY_E;
+                    break;
+                }
+                else {
+                    XMEMCPY(pkcs7->stream->nonce, nonce, nonceSz);
+                }
+            }
+
+            pkcs7->stream->expected = encryptedContentSz;
+            wc_PKCS7_StreamStoreVar(pkcs7, encOID, blockKeySz,
+                    encryptedContentSz);
+        #endif
+
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_5);
+            FALL_THROUGH;
+
+        case WC_PKCS7_AUTHENV_5:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+                            ASN_TAG_SZ + ASN_TAG_SZ + pkcs7->stream->expected,
+                            &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            encryptedContentSz = pkcs7->stream->expected;
+        #endif
+
+            encryptedContent = (byte*)XMALLOC(encryptedContentSz, pkcs7->heap,
+                                                               DYNAMIC_TYPE_PKCS7);
+            if (ret == 0 && encryptedContent == NULL) {
+                ret = MEMORY_E;
+            }
+
+            if (ret == 0) {
+                XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
+                idx += encryptedContentSz;
+            }
+        #ifndef NO_PKCS7_STREAM
+                pkcs7->stream->bufferPt = encryptedContent;
+        #endif
+
+            /* may have IMPLICIT [1] authenticatedAttributes */
+            localIdx = idx;
+            if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 &&
+                    tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+                encodedAttribIdx = idx;
+                encodedAttribs = pkiMsg + idx;
+                idx++;
+
+                if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+            #ifndef NO_PKCS7_STREAM
+                pkcs7->stream->expected = length;
+            #endif
+                encodedAttribSz = length + (idx - encodedAttribIdx);
+
+                if (ret != 0)
+                    break;
+
+            #ifndef NO_PKCS7_STREAM
+                if (encodedAttribSz > 0) {
+                    pkcs7->stream->aadSz = encodedAttribSz;
+                    pkcs7->stream->aad = (byte*)XMALLOC(encodedAttribSz,
+                            pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                    if (pkcs7->stream->aad == NULL) {
+                        ret = MEMORY_E;
+                        break;
+                    }
+                    else {
+                        XMEMCPY(pkcs7->stream->aad, encodedAttribs,
+                                (idx - encodedAttribIdx));
+                    }
+                }
+
+                if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                    break;
+                }
+            #endif
+                wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_ATRB);
+            }
+            else {
+            #ifndef NO_PKCS7_STREAM
+                if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                    break;
+                }
+            #endif
+                goto authenv_atrbend; /* jump over attribute cases */
+            }
+            FALL_THROUGH;
+
+        case WC_PKCS7_AUTHENV_ATRB:
+    #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            length = pkcs7->stream->expected;
+            encodedAttribs = pkcs7->stream->aad;
+    #else
+            length = 0;
+    #endif
+
+            /* save pointer and length */
+            authAttrib = &pkiMsg[idx];
+            authAttribSz = length;
+
+            if (ret == 0 && wc_PKCS7_ParseAttribs(pkcs7, authAttrib, authAttribSz) < 0) {
+                WOLFSSL_MSG("Error parsing authenticated attributes");
+                ret = ASN_PARSE_E;
+                break;
+            }
+
+            idx += length;
+
+    #ifndef NO_PKCS7_STREAM
+            if (encodedAttribSz > 0) {
+                XMEMCPY(pkcs7->stream->aad + (encodedAttribSz - length),
+                        authAttrib, authAttribSz);
+            }
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+
+    #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_ATRBEND);
+            FALL_THROUGH;
+
+authenv_atrbend:
+        case WC_PKCS7_AUTHENV_ATRBEND:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+                            ASN_TAG_SZ, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+                in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            if (pkcs7->stream->aadSz > 0) {
+                encodedAttribSz = pkcs7->stream->aadSz;
+                encodedAttribs  = pkcs7->stream->aad;
+            }
+        #endif
+
+
+            /* get authTag OCTET STRING */
+            if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+            if (ret == 0 && tag != ASN_OCTET_STRING) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && GetLength(pkiMsg, &idx, &authTagSz, pkiMsgSz) < 0) {
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0 && authTagSz > (int)sizeof(authTag)) {
+                WOLFSSL_MSG("AuthEnvelopedData authTag too large for buffer");
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret == 0) {
+                XMEMCPY(authTag, &pkiMsg[idx], authTagSz);
+                idx += authTagSz;
+            }
+
+            if (ret == 0 && authAttrib != NULL) {
+                /* temporarily swap authAttribs byte[0] to SET OF instead of
+                 * IMPLICIT [1], for aad calculation */
+                authAttribSetByte = encodedAttribs[0];
+
+                encodedAttribs[0] = ASN_SET | ASN_CONSTRUCTED;
+            }
+
+            if (ret < 0)
+                break;
+
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+            pkcs7->stream->expected = (pkcs7->stream->maxLen -
+                pkcs7->stream->totalRd) + pkcs7->stream->length;
+
+
+            /* store tag for later */
+            if (authTagSz > 0) {
+                pkcs7->stream->tagSz = authTagSz;
+                pkcs7->stream->tag = (byte*)XMALLOC(authTagSz, pkcs7->heap,
+                        DYNAMIC_TYPE_PKCS7);
+                if (pkcs7->stream->tag == NULL) {
+                    ret = MEMORY_E;
+                    break;
+                }
+                else {
+                    XMEMCPY(pkcs7->stream->tag, authTag, authTagSz);
+                }
+            }
+
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_6);
+            FALL_THROUGH;
+
+        case WC_PKCS7_AUTHENV_6:
+        #ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                break;
+            }
+
+            /* restore all variables needed */
+            if (pkcs7->stream->nonceSz > 0) {
+                nonceSz = pkcs7->stream->nonceSz;
+                if (nonceSz > GCM_NONCE_MID_SZ) {
+                    WOLFSSL_MSG("PKCS7 saved nonce is too large");
+                    ret = BUFFER_E;
+                    break;
+                }
+                else {
+                    XMEMCPY(nonce, pkcs7->stream->nonce, nonceSz);
+                }
+            }
+
+            if (pkcs7->stream->tagSz > 0) {
+                authTagSz = pkcs7->stream->tagSz;
+                if (authTagSz > AES_BLOCK_SIZE) {
+                    WOLFSSL_MSG("PKCS7 saved tag is too large");
+                    ret = BUFFER_E;
+                    break;
+                }
+                else {
+                    XMEMCPY(authTag, pkcs7->stream->tag, authTagSz);
+                }
+            }
+
+            if (pkcs7->stream->aadSz > 0) {
+                encodedAttribSz = pkcs7->stream->aadSz;
+                encodedAttribs  = pkcs7->stream->aad;
+            }
+
+            wc_PKCS7_StreamGetVar(pkcs7, &encOID, &blockKeySz,
+                                  &encryptedContentSz);
+            encryptedContent   = pkcs7->stream->bufferPt;
+        #ifdef WOLFSSL_SMALL_STACK
+            decryptedKey = pkcs7->stream->key;
+        #endif
+        #endif
+
+            /* decrypt encryptedContent */
+            ret = wc_PKCS7_DecryptContent(pkcs7, encOID, decryptedKey,
+                    blockKeySz, nonce, nonceSz, encodedAttribs, encodedAttribSz,
+                    authTag, authTagSz, encryptedContent, encryptedContentSz,
+                    encryptedContent);
+            if (ret != 0) {
+                XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                return ret;
+            }
+
+            if (authAttrib != NULL) {
+                /* restore authAttrib IMPLICIT [1] */
+                encodedAttribs[0] = authAttribSetByte;
+            }
+
+            /* copy plaintext to output */
+            XMEMCPY(output, encryptedContent, encryptedContentSz);
+
+            /* free memory, zero out keys */
+            ForceZero(encryptedContent, encryptedContentSz);
+            XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
+        #ifdef WOLFSSL_SMALL_STACK
             XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-            return ASN_PARSE_E;
-        }
-    }
-
-    encryptedContent = (byte*)XMALLOC(encryptedContentSz, pkcs7->heap,
-                                                       DYNAMIC_TYPE_PKCS7);
-    if (encryptedContent == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
+            decryptedKey = NULL;
+        #ifdef WOLFSSL_SMALL_STACK
+            #ifndef NO_PKCS7_STREAM
+            pkcs7->stream->key = NULL;
+            #endif
+        #endif
+        #endif
+            ret = encryptedContentSz;
+        #ifndef NO_PKCS7_STREAM
+            wc_PKCS7_ResetStream(pkcs7);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+            break;
+        default:
+            WOLFSSL_MSG("Unknown PKCS7 state");
+            ret = BAD_FUNC_ARG;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) {
+        if (decryptedKey != NULL) {
+            ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
+        }
         XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return MEMORY_E;
-    }
-
-    XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
-
-    /* decrypt encryptedContent */
-    ret = wc_PKCS7_DecryptContent(encOID, decryptedKey, blockKeySz,
-                                  tmpIv, expBlockSz, encryptedContent,
-                                  encryptedContentSz, encryptedContent);
-    if (ret != 0) {
-        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-        return ret;
-    }
-
-    padLen = encryptedContent[encryptedContentSz-1];
-
-    /* copy plaintext to output */
-    XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
-
-    /* free memory, zero out keys */
-    ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
-    ForceZero(encryptedContent, encryptedContentSz);
-    XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-#endif
-
-    return encryptedContentSz - padLen;
+    }
+#endif
+#ifndef NO_PKCS7_STREAM
+    if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) {
+        wc_PKCS7_ResetStream(pkcs7);
+    }
+#endif
+
+    return ret;
+
+#else
+    WOLFSSL_MSG("AuthEnvelopedData requires AES-GCM or AES-CCM to be enabled");
+    (void)pkcs7;
+    (void)in;
+    (void)inSz;
+    (void)output;
+    (void)outputSz;
+
+    return NOT_COMPILED_IN;
+#endif /* HAVE_AESGCM | HAVE_AESCCM */
 }
 
 
@@ -4703,7 +11553,7 @@
     int encContentOctetSz, encContentSeqSz, contentTypeSz;
     int contentEncAlgoSz, ivOctetStringSz;
     byte encContentSeq[MAX_SEQ_SZ];
-    byte contentType[MAX_ALGO_SZ];
+    byte contentType[MAX_OID_SZ];
     byte contentEncAlgo[MAX_ALGO_SZ];
     byte tmpIv[MAX_CONTENT_IV_SIZE];
     byte ivOctetString[MAX_OCTET_STR_SZ];
@@ -4725,20 +11575,34 @@
     if (output == NULL || outputSz == 0)
         return BAD_FUNC_ARG;
 
-    /* outer content type */
-    outerContentTypeSz = wc_SetContentType(ENCRYPTED_DATA, outerContentType);
-
-    /* version, 2 if unprotectedAttrs present, 0 if absent */
-    if (pkcs7->unprotectedAttribsSz > 0) {
-        verSz = SetMyVersion(2, ver, 0);
-    } else {
+    if (pkcs7->version == 3) {
         verSz = SetMyVersion(0, ver, 0);
+        outerContentTypeSz = 0;
+    }
+    else {
+        /* outer content type */
+        ret = wc_SetContentType(ENCRYPTED_DATA, outerContentType,
+                                sizeof(outerContentType));
+        if (ret < 0)
+            return ret;
+
+        outerContentTypeSz = ret;
+
+        /* version, 2 if unprotectedAttrs present, 0 if absent */
+        if (pkcs7->unprotectedAttribsSz > 0) {
+            verSz = SetMyVersion(2, ver, 0);
+        } else {
+            verSz = SetMyVersion(0, ver, 0);
+        }
     }
 
     /* EncryptedContentInfo */
-    contentTypeSz = wc_SetContentType(pkcs7->contentOID, contentType);
-    if (contentTypeSz == 0)
-        return BAD_FUNC_ARG;
+    ret = wc_SetContentType(pkcs7->contentOID, contentType,
+                            sizeof(contentType));
+    if (ret < 0)
+        return ret;
+
+    contentTypeSz = ret;
 
     /* allocate encrypted content buffer, do PKCS#7 padding */
     blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID);
@@ -4784,7 +11648,8 @@
     }
 
     /* encrypt content */
-    ret = wc_PKCS7_GenerateIV(pkcs7, NULL, tmpIv, blockSz);
+    WOLFSSL_MSG("Encrypting the content");
+    ret = wc_PKCS7_GenerateBlock(pkcs7, NULL, tmpIv, blockSz);
     if (ret != 0) {
         XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
@@ -4792,8 +11657,8 @@
     }
 
     ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->encryptionKey,
-            pkcs7->encryptionKeySz, tmpIv, blockSz, plain, encryptedOutSz,
-            encryptedContent);
+            pkcs7->encryptionKeySz, tmpIv, blockSz, NULL, 0, NULL, 0,
+            plain, encryptedOutSz, encryptedContent);
     if (ret != 0) {
         XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
@@ -4839,7 +11704,7 @@
             return MEMORY_E;
         }
 
-        FlattenAttributes(flatAttribs, attribs, attribsCount);
+        FlattenAttributes(pkcs7, flatAttribs, attribs, attribsCount);
         attribsSetSz = SetImplicit(ASN_SET, 1, attribsSz, attribSet);
 
     } else {
@@ -4850,20 +11715,24 @@
     /* keep track of sizes for outer wrapper layering */
     totalSz = verSz + encContentSeqSz + contentTypeSz + contentEncAlgoSz +
               ivOctetStringSz + blockSz + encContentOctetSz + encryptedOutSz +
-              attribsSz + attribsSetSz;;
+              attribsSz + attribsSetSz;
 
     /* EncryptedData */
     encDataSeqSz = SetSequence(totalSz, encDataSeq);
     totalSz += encDataSeqSz;
 
-    /* outer content */
-    outerContentSz = SetExplicit(0, totalSz, outerContent);
-    totalSz += outerContentTypeSz;
-    totalSz += outerContentSz;
-
-    /* ContentInfo */
-    contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
-    totalSz += contentInfoSeqSz;
+    if (pkcs7->version != 3) {
+        /* outer content */
+        outerContentSz = SetExplicit(0, totalSz, outerContent);
+        totalSz += outerContentTypeSz;
+        totalSz += outerContentSz;
+        /* ContentInfo */
+        contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+        totalSz += contentInfoSeqSz;
+    } else {
+        contentInfoSeqSz = 0;
+        outerContentSz = 0;
+    }
 
     if (totalSz > (int)outputSz) {
         WOLFSSL_MSG("PKCS#7 output buffer too small");
@@ -4924,6 +11793,7 @@
 {
     int ret, attribLen;
     word32 idx;
+    byte tag;
 
     if (pkcs7 == NULL || pkiMsg == NULL ||
         pkiMsgSz == 0 || inOutIdx == NULL)
@@ -4931,9 +11801,11 @@
 
     idx = *inOutIdx;
 
-    if (pkiMsg[idx] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+    if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
         return ASN_PARSE_E;
-    idx++;
+
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+        return ASN_PARSE_E;
 
     if (GetLength(pkiMsg, &idx, &attribLen, pkiMsgSz) < 0)
         return ASN_PARSE_E;
@@ -4950,143 +11822,693 @@
 
 
 /* unwrap and decrypt PKCS#7/CMS encrypted-data object, returned decoded size */
-int wc_PKCS7_DecodeEncryptedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz,
+int wc_PKCS7_DecodeEncryptedData(PKCS7* pkcs7, byte* in, word32 inSz,
                                  byte* output, word32 outputSz)
 {
-    int ret, version, length, haveAttribs;
+    int ret = 0, version, length = 0, haveAttribs = 0;
     word32 idx = 0;
+
+#ifndef NO_PKCS7_STREAM
+    word32 tmpIdx = 0;
+    long rc;
+#endif
     word32 contentType, encOID;
 
-    int expBlockSz;
-    byte tmpIv[MAX_CONTENT_IV_SIZE];
-
-    int encryptedContentSz;
+    int expBlockSz = 0;
+    byte tmpIvBuf[MAX_CONTENT_IV_SIZE];
+    byte *tmpIv = tmpIvBuf;
+
+    int encryptedContentSz = 0;
     byte padLen;
     byte* encryptedContent = NULL;
 
-    if (pkcs7 == NULL || pkcs7->encryptionKey == NULL ||
-        pkcs7->encryptionKeySz == 0)
+    byte* pkiMsg = in;
+    word32 pkiMsgSz = inSz;
+    byte  tag;
+
+    if (pkcs7 == NULL ||
+            ((pkcs7->encryptionKey == NULL || pkcs7->encryptionKeySz == 0) &&
+              pkcs7->decryptionCb == NULL))
         return BAD_FUNC_ARG;
 
     if (pkiMsg == NULL || pkiMsgSz == 0 ||
         output == NULL || outputSz == 0)
         return BAD_FUNC_ARG;
-    /* read past ContentInfo, verify type is encrypted-data */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (contentType != ENCRYPTED_DATA) {
-        WOLFSSL_MSG("PKCS#7 input not of type EncryptedData");
-        return PKCS7_OID_E;
-    }
-
-    if (pkiMsg[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
-        return ASN_PARSE_E;
-
-    if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* remove EncryptedData and version */
+
+#ifndef NO_PKCS7_STREAM
+    (void)tmpIv; /* help out static analysis */
+    if (pkcs7->stream == NULL) {
+        if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+            return ret;
+        }
+    }
+#endif
+
+    switch (pkcs7->state) {
+        case WC_PKCS7_START:
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+                            MAX_ALGO_SZ, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc  = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+#endif
+
+            if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (pkcs7->version != 3) { /* ContentInfo not in firmware bundles */
+                /* read past ContentInfo, verify type is encrypted-data */
+                if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+                            pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && contentType != ENCRYPTED_DATA) {
+                    WOLFSSL_MSG("PKCS#7 input not of type EncryptedData");
+                    ret = PKCS7_OID_E;
+                }
+            }
+            if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+#endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE2);
+            FALL_THROUGH;
+            /* end of stage 1 */
+
+        case WC_PKCS7_STAGE2:
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            MAX_LENGTH_SZ + MAX_SEQ_SZ + ASN_TAG_SZ, &pkiMsg,
+                            &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+#endif
+            if (pkcs7->version != 3) {
+                if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+                if (ret == 0 && tag !=
+                        (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+                    ret = ASN_PARSE_E;
+
+                if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+
+                /* remove EncryptedData and version */
+                if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                    ret = ASN_PARSE_E;
+            }
+
+            if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+#endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE3);
+            FALL_THROUGH;
+            /* end of stage 2 */
+
+       case WC_PKCS7_STAGE3:
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            MAX_VERSION_SZ + MAX_SEQ_SZ + MAX_ALGO_SZ * 2,
+                            &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+#endif
+            /* get version, check later */
+            haveAttribs = 0;
+            if (ret == 0 && GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            /* remove EncryptedContentInfo */
+            if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+                        pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && (ret = GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType,
+                        pkiMsgSz)) < 0)
+                ret = ASN_PARSE_E;
+            if (ret == 0 && (expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID)) < 0)
+                ret = expBlockSz;
+
+            if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+            /* store expBlockSz for later */
+            pkcs7->stream->varOne = expBlockSz;
+            pkcs7->stream->varTwo = encOID;
+
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+
+            /* store version for later */
+            pkcs7->stream->vers = version;
+#endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE4);
+            FALL_THROUGH;
+            /* end of stage 3 */
+
+        /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
+       case WC_PKCS7_STAGE4:
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            ASN_TAG_SZ + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            /* restore saved variables */
+            expBlockSz = pkcs7->stream->varOne;
+#endif
+            if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+            if (ret == 0 && tag != ASN_OCTET_STRING)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && length != expBlockSz) {
+                WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+                ret = ASN_PARSE_E;
+            }
+
+            if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+            /* next chunk of data expected should have the IV */
+            pkcs7->stream->expected = length;
+
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+#endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE5);
+            FALL_THROUGH;
+            /* end of stage 4 */
+
+       case WC_PKCS7_STAGE5:
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected + ASN_TAG_SZ +
+                            MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            /* use IV buffer from stream structure */
+            tmpIv  = pkcs7->stream->tmpIv;
+            length = pkcs7->stream->expected;
+#endif
+            XMEMCPY(tmpIv, &pkiMsg[idx], length);
+            idx += length;
+            /* read encryptedContent, cont[0] */
+            if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+                ret = ASN_PARSE_E;
+            if (ret == 0 && tag != (ASN_CONTEXT_SPECIFIC | 0))
+                ret = ASN_PARSE_E;
+
+            if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz,
+                        pkiMsgSz) <= 0)
+                ret = ASN_PARSE_E;
+
+            if (ret < 0)
+                break;
+#ifndef NO_PKCS7_STREAM
+            /* next chunk of data should contain encrypted content */
+            pkcs7->stream->varThree = encryptedContentSz;
+            if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+                break;
+            }
+
+            if (pkcs7->stream->totalRd +  encryptedContentSz < pkiMsgSz) {
+                pkcs7->stream->flagOne = 1;
+            }
+
+            pkcs7->stream->expected = (pkcs7->stream->maxLen -
+                pkcs7->stream->totalRd) + pkcs7->stream->length;
+
+#endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE6);
+            FALL_THROUGH;
+            /* end of stage 5 */
+
+        case WC_PKCS7_STAGE6:
+#ifndef NO_PKCS7_STREAM
+            if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+                            pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+                return ret;
+            }
+
+            rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+                    inSz);
+            if (rc < 0) {
+                ret = (int)rc;
+                break;
+            }
+            pkiMsgSz = (word32)rc;
+
+            /* restore saved variables */
+            expBlockSz = pkcs7->stream->varOne;
+            encOID     = pkcs7->stream->varTwo;
+            encryptedContentSz = pkcs7->stream->varThree;
+            version    = pkcs7->stream->vers;
+            tmpIv      = pkcs7->stream->tmpIv;
+#else
+            encOID = 0;
+#endif
+            if (ret == 0 && (encryptedContent = (byte*)XMALLOC(
+                encryptedContentSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7)) == NULL) {
+                ret = MEMORY_E;
+                break;
+            }
+
+            if (ret == 0) {
+                XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
+                idx += encryptedContentSz;
+
+                /* decrypt encryptedContent */
+                ret = wc_PKCS7_DecryptContent(pkcs7, encOID,
+                            pkcs7->encryptionKey, pkcs7->encryptionKeySz, tmpIv,
+                            expBlockSz, NULL, 0, NULL, 0, encryptedContent,
+                            encryptedContentSz, encryptedContent);
+                if (ret != 0) {
+                    XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                }
+            }
+
+            if (ret == 0) {
+                padLen = encryptedContent[encryptedContentSz-1];
+
+                if (padLen > encryptedContentSz) {
+                    WOLFSSL_MSG("Bad padding size found");
+                    ret = BUFFER_E;
+                    XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                    break;
+                }
+
+                /* copy plaintext to output */
+                XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
+
+                /* get implicit[1] unprotected attributes, optional */
+                wc_PKCS7_FreeDecodedAttrib(pkcs7->decodedAttrib, pkcs7->heap);
+                pkcs7->decodedAttrib = NULL;
+            #ifndef NO_PKCS7_STREAM
+                if (pkcs7->stream->flagOne)
+            #else
+                if (idx < pkiMsgSz)
+            #endif
+                {
+                    haveAttribs = 1;
+
+                    ret = wc_PKCS7_DecodeUnprotectedAttributes(pkcs7, pkiMsg,
+                                                       pkiMsgSz, &idx);
+                    if (ret != 0) {
+                        ForceZero(encryptedContent, encryptedContentSz);
+                        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+                        ret = ASN_PARSE_E;
+                    }
+                }
+            }
+
+            if (ret == 0) {
+                ForceZero(encryptedContent, encryptedContentSz);
+                XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+                /* go back and check the version now that attribs have been processed */
+                if (pkcs7->version == 3 && version != 0) {
+                    WOLFSSL_MSG("Wrong PKCS#7 FirmwareEncryptedData version");
+                    return ASN_VERSION_E;
+                }
+
+                if (pkcs7->version != 3 &&
+                   ((haveAttribs == 0 && version != 0) ||
+                    (haveAttribs == 1 && version != 2))) {
+                    WOLFSSL_MSG("Wrong PKCS#7 EncryptedData version");
+                    return ASN_VERSION_E;
+                }
+                ret = encryptedContentSz - padLen;
+            }
+
+            if (ret != 0) break;
+        #ifndef NO_PKCS7_STREAM
+            wc_PKCS7_ResetStream(pkcs7);
+        #endif
+            wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+            break;
+
+        default:
+            WOLFSSL_MSG("Error in unknown PKCS#7 Decode Encrypted Data state");
+            return BAD_STATE_E;
+    }
+
+    if (ret != 0) {
+    #ifndef NO_PKCS7_STREAM
+        /* restart in error case */
+        wc_PKCS7_ResetStream(pkcs7);
+    #endif
+        wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+    }
+    return ret;
+}
+
+
+/* Function to set callback during decryption, this overrides the default
+ * decryption function and can be used for choosing a key at run time based
+ * on the parsed bundle so far.
+ * returns 0 on success
+ */
+int wc_PKCS7_SetDecodeEncryptedCb(PKCS7* pkcs7,
+        CallbackDecryptContent decryptionCb)
+{
+    if (pkcs7 != NULL) {
+        pkcs7->decryptionCb = decryptionCb;
+    }
+    return 0;
+}
+
+
+/* Set an optional user context that gets passed to callback
+ * returns 0 on success
+ */
+int wc_PKCS7_SetDecodeEncryptedCtx(PKCS7* pkcs7, void* ctx)
+{
+    if (pkcs7 != NULL) {
+        pkcs7->decryptionCtx = ctx;
+    }
+    return 0;
+}
+#endif /* NO_PKCS7_ENCRYPTED_DATA */
+
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+
+/* build PKCS#7 compressedData content type, return encrypted size */
+int wc_PKCS7_EncodeCompressedData(PKCS7* pkcs7, byte* output, word32 outputSz)
+{
+    byte contentInfoSeq[MAX_SEQ_SZ];
+    byte contentInfoTypeOid[MAX_OID_SZ];
+    byte contentInfoContentSeq[MAX_SEQ_SZ]; /* EXPLICIT [0] */
+    byte compressedDataSeq[MAX_SEQ_SZ];
+    byte cmsVersion[MAX_VERSION_SZ];
+    byte compressAlgId[MAX_ALGO_SZ];
+    byte encapContentInfoSeq[MAX_SEQ_SZ];
+    byte contentTypeOid[MAX_OID_SZ];
+    byte contentSeq[MAX_SEQ_SZ];            /* EXPLICIT [0] */
+    byte contentOctetStr[MAX_OCTET_STR_SZ];
+
+    int ret;
+    word32 totalSz, idx;
+    word32 contentInfoSeqSz, contentInfoContentSeqSz, contentInfoTypeOidSz;
+    word32 compressedDataSeqSz, cmsVersionSz, compressAlgIdSz;
+    word32 encapContentInfoSeqSz, contentTypeOidSz, contentSeqSz;
+    word32 contentOctetStrSz;
+
+    byte* compressed;
+    word32 compressedSz;
+
+    if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
+        output == NULL || outputSz == 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* allocate space for compressed content. The libz code says the compressed
+     * buffer should be srcSz + 0.1% + 12. */
+    compressedSz = (pkcs7->contentSz + (word32)(pkcs7->contentSz * 0.001) + 12);
+    compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (compressed == NULL) {
+        WOLFSSL_MSG("Error allocating memory for CMS compressed content");
+        return MEMORY_E;
+    }
+
+    /* compress content */
+    ret = wc_Compress(compressed, compressedSz, pkcs7->content,
+                      pkcs7->contentSz, 0);
+    if (ret < 0) {
+        XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+    compressedSz = (word32)ret;
+
+    /* eContent OCTET STRING, working backwards */
+    contentOctetStrSz = SetOctetString(compressedSz, contentOctetStr);
+    totalSz = contentOctetStrSz + compressedSz;
+
+    /* EXPLICIT [0] eContentType */
+    contentSeqSz = SetExplicit(0, totalSz, contentSeq);
+    totalSz += contentSeqSz;
+
+    /* eContentType OBJECT IDENTIFIER */
+    ret = wc_SetContentType(pkcs7->contentOID, contentTypeOid,
+                            sizeof(contentTypeOid));
+    if (ret < 0) {
+        XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return ret;
+    }
+
+    contentTypeOidSz = ret;
+    totalSz += contentTypeOidSz;
+
+    /* EncapsulatedContentInfo SEQUENCE */
+    encapContentInfoSeqSz = SetSequence(totalSz, encapContentInfoSeq);
+    totalSz += encapContentInfoSeqSz;
+
+    /* compressionAlgorithm AlgorithmIdentifier */
+    /* Only supports zlib for compression currently:
+     * id-alg-zlibCompress (1.2.840.113549.1.9.16.3.8) */
+    compressAlgIdSz = SetAlgoID(ZLIBc, compressAlgId, oidCompressType, 0);
+    totalSz += compressAlgIdSz;
+
+    /* version */
+    cmsVersionSz = SetMyVersion(0, cmsVersion, 0);
+    totalSz += cmsVersionSz;
+
+    /* CompressedData SEQUENCE */
+    compressedDataSeqSz = SetSequence(totalSz, compressedDataSeq);
+    totalSz += compressedDataSeqSz;
+
+    /* ContentInfo content EXPLICIT SEQUENCE */
+    contentInfoContentSeqSz = SetExplicit(0, totalSz, contentInfoContentSeq);
+    totalSz += contentInfoContentSeqSz;
+
+    /* ContentInfo ContentType (compressedData) */
+    if (pkcs7->version == 3) {
+        contentInfoTypeOidSz = 0;
+    }
+    else {
+        ret = wc_SetContentType(COMPRESSED_DATA, contentInfoTypeOid,
+                                sizeof(contentInfoTypeOid));
+        if (ret < 0) {
+            XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+            return ret;
+        }
+
+        contentInfoTypeOidSz = ret;
+        totalSz += contentInfoTypeOidSz;
+    }
+
+    /* ContentInfo SEQUENCE */
+    contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+    totalSz += contentInfoSeqSz;
+
+    if (outputSz < totalSz) {
+        XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return BUFFER_E;
+    }
+
+    idx = 0;
+    XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
+    idx += contentInfoSeqSz;
+    XMEMCPY(output + idx, contentInfoTypeOid, contentInfoTypeOidSz);
+    idx += contentInfoTypeOidSz;
+    XMEMCPY(output + idx, contentInfoContentSeq, contentInfoContentSeqSz);
+    idx += contentInfoContentSeqSz;
+    XMEMCPY(output + idx, compressedDataSeq, compressedDataSeqSz);
+    idx += compressedDataSeqSz;
+    XMEMCPY(output + idx, cmsVersion, cmsVersionSz);
+    idx += cmsVersionSz;
+    XMEMCPY(output + idx, compressAlgId, compressAlgIdSz);
+    idx += compressAlgIdSz;
+    XMEMCPY(output + idx, encapContentInfoSeq, encapContentInfoSeqSz);
+    idx += encapContentInfoSeqSz;
+    XMEMCPY(output + idx, contentTypeOid, contentTypeOidSz);
+    idx += contentTypeOidSz;
+    XMEMCPY(output + idx, contentSeq, contentSeqSz);
+    idx += contentSeqSz;
+    XMEMCPY(output + idx, contentOctetStr, contentOctetStrSz);
+    idx += contentOctetStrSz;
+    XMEMCPY(output + idx, compressed, compressedSz);
+    idx += compressedSz;
+
+    XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+    return idx;
+}
+
+/* unwrap and decompress PKCS#7/CMS compressedData object,
+ * returned decoded size */
+int wc_PKCS7_DecodeCompressedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz,
+                                  byte* output, word32 outputSz)
+{
+    int length, version, ret;
+    word32 idx = 0, algOID, contentType;
+    byte tag;
+
+    byte* decompressed;
+    word32 decompressedSz;
+
+    if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0 ||
+        output == NULL || outputSz == 0) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* get ContentInfo SEQUENCE */
     if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
-    /* get version, check later */
-    haveAttribs = 0;
-    if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    /* remove EncryptedContentInfo */
-    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
+    if (pkcs7->version != 3) {
+        /* get ContentInfo contentType */
+        if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
+            return ASN_PARSE_E;
+
+        if (contentType != COMPRESSED_DATA)
+            return ASN_PARSE_E;
+    }
+
+    /* get ContentInfo content EXPLICIT SEQUENCE */
+    if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
-    if (GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType, pkiMsgSz) < 0)
-        return ASN_PARSE_E;
-
-    expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID);
-    if (expBlockSz < 0)
-        return expBlockSz;
-
-    /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
-    if (pkiMsg[idx++] != ASN_OCTET_STRING)
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
         return ASN_PARSE_E;
 
     if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
-    if (length != expBlockSz) {
-        WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+    /* get CompressedData SEQUENCE */
+    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    /* get version */
+    if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    if (version != 0) {
+        WOLFSSL_MSG("CMS CompressedData version MUST be 0, but is not");
         return ASN_PARSE_E;
     }
 
-    XMEMCPY(tmpIv, &pkiMsg[idx], length);
-    idx += length;
-
-    /* read encryptedContent, cont[0] */
-    if (pkiMsg[idx++] != (ASN_CONTEXT_SPECIFIC | 0))
+    /* get CompressionAlgorithmIdentifier */
+    if (GetAlgoId(pkiMsg, &idx, &algOID, oidIgnoreType, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    /* Only supports zlib for compression currently:
+     * id-alg-zlibCompress (1.2.840.113549.1.9.16.3.8) */
+    if (algOID != ZLIBc) {
+        WOLFSSL_MSG("CMS CompressedData only supports zlib algorithm");
         return ASN_PARSE_E;
-
-    if (GetLength(pkiMsg, &idx, &encryptedContentSz, pkiMsgSz) <= 0)
+    }
+
+    /* get EncapsulatedContentInfo SEQUENCE */
+    if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    /* get ContentType OID */
+    if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
         return ASN_PARSE_E;
 
-    encryptedContent = (byte*)XMALLOC(encryptedContentSz, pkcs7->heap,
-                                      DYNAMIC_TYPE_PKCS7);
-    if (encryptedContent == NULL)
+    pkcs7->contentOID = contentType;
+
+    /* get eContent EXPLICIT SEQUENCE */
+    if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+        return ASN_PARSE_E;
+
+    if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    /* get content OCTET STRING */
+    if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    if (tag != ASN_OCTET_STRING)
+        return ASN_PARSE_E;
+
+    if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+        return ASN_PARSE_E;
+
+    /* allocate space for decompressed data */
+    decompressed = (byte*)XMALLOC(length, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    if (decompressed == NULL) {
+        WOLFSSL_MSG("Error allocating memory for CMS decompression buffer");
         return MEMORY_E;
-
-    XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
-    idx += encryptedContentSz;
-
-    /* decrypt encryptedContent */
-    ret = wc_PKCS7_DecryptContent(encOID, pkcs7->encryptionKey,
-                                  pkcs7->encryptionKeySz, tmpIv, expBlockSz,
-                                  encryptedContent, encryptedContentSz,
-                                  encryptedContent);
-    if (ret != 0) {
-        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+    }
+
+    /* decompress content */
+    ret = wc_DeCompress(decompressed, length, &pkiMsg[idx], length);
+    if (ret < 0) {
+        XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
         return ret;
     }
-
-    padLen = encryptedContent[encryptedContentSz-1];
-
-    /* copy plaintext to output */
-    XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
-
-    /* get implicit[1] unprotected attributes, optional */
-    pkcs7->decodedAttrib = NULL;
-    if (idx < pkiMsgSz) {
-
-        haveAttribs = 1;
-
-        ret = wc_PKCS7_DecodeUnprotectedAttributes(pkcs7, pkiMsg,
-                                                   pkiMsgSz, &idx);
-        if (ret != 0) {
-            ForceZero(encryptedContent, encryptedContentSz);
-            XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-            return ASN_PARSE_E;
-        }
-    }
-
-    /* go back and check the version now that attribs have been processed */
-    if ((haveAttribs == 0 && version != 0) ||
-        (haveAttribs == 1 && version != 2) ) {
-        XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-        WOLFSSL_MSG("Wrong PKCS#7 EncryptedData version");
-        return ASN_VERSION_E;
-    }
-
-    ForceZero(encryptedContent, encryptedContentSz);
-    XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
-
-    return encryptedContentSz - padLen;
-}
-
-#endif /* NO_PKCS7_ENCRYPTED_DATA */
+    decompressedSz = (word32)ret;
+
+    /* get content */
+    if (outputSz < decompressedSz) {
+        WOLFSSL_MSG("CMS output buffer too small to hold decompressed data");
+        XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+        return BUFFER_E;
+    }
+
+    XMEMCPY(output, decompressed, decompressedSz);
+    XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+    return decompressedSz;
+}
+
+#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
 
 #else  /* HAVE_PKCS7 */
 
--- a/wolfcrypt/src/poly1305.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/poly1305.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* poly1305.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -24,6 +24,7 @@
  * and Daniel J. Bernstein
  */
 
+
 #ifdef HAVE_CONFIG_H
     #include <config.h>
 #endif
@@ -56,6 +57,7 @@
 
     #if defined(__GNUC__) && ((__GNUC__ < 4) || \
                               (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+        #undef  NO_AVX2_SUPPORT
         #define NO_AVX2_SUPPORT
     #endif
     #if defined(__clang__) && ((__clang_major__ < 3) || \
@@ -117,827 +119,55 @@
 #endif
 
 #ifdef USE_INTEL_SPEEDUP
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
 #ifdef HAVE_INTEL_AVX1
 /* Process one block (16 bytes) of data.
  *
  * ctx  Poly1305 context.
  * m    One block of message data.
  */
-static void poly1305_block_avx(Poly1305* ctx, const unsigned char *m)
-{
-        __asm__ __volatile__ (
-            "movq	(%[ctx]), %%r15\n\t"
-            "movq	24(%[ctx]), %%r8\n\t"
-            "movq	32(%[ctx]), %%r9\n\t"
-            "movq	40(%[ctx]), %%r10\n\t"
-            "xorq	%%rbx, %%rbx\n\t"
-            "movb	%[nfin], %%bl\n\t"
-            "# h += m\n\t"
-            "movq	 (%[m]), %%r11\n\t"
-            "movq	8(%[m]), %%r12\n\t"
-            "addq	%%r11, %%r8\n\t"
-            "adcq	%%r12, %%r9\n\t"
-            "movq	8(%[ctx]), %%rax\n\t"
-            "adcq	%%rbx, %%r10\n\t"
-            "# r[1] * h[0] => rdx, rax ==> t2, t1\n\t"
-            "mulq	%%r8\n\t"
-            "movq	%%rax, %%r12\n\t"
-            "movq	%%rdx, %%r13\n\t"
-            "# r[0] * h[1] => rdx, rax ++> t2, t1\n\t"
-            "movq	%%r15, %%rax\n\t"
-            "mulq	%%r9\n\t"
-            "addq	%%rax, %%r12\n\t"
-            "movq	%%r15, %%rax\n\t"
-            "adcq	%%rdx, %%r13\n\t"
-            "# r[0] * h[0] => rdx, rax ==> t4, t0\n\t"
-            "mulq	%%r8\n\t"
-            "movq	%%rax, %%r11\n\t"
-            "movq	%%rdx, %%r8\n\t"
-            "# r[1] * h[1] => rdx, rax =+> t3, t2\n\t"
-            "movq	8(%[ctx]), %%rax\n\t"
-            "mulq	%%r9\n\t"
-            "#   r[0] * h[2] +> t2\n\t"
-            "addq	352(%[ctx],%%r10,8), %%r13\n\t"
-            "movq	%%rdx, %%r14\n\t"
-            "addq	%%r8, %%r12\n\t"
-            "adcq	%%rax, %%r13\n\t"
-            "#   r[1] * h[2] +> t3\n\t"
-            "adcq	408(%[ctx],%%r10,8), %%r14\n\t"
-            "# r * h in r14, r13, r12, r11 \n\t"
-            "# h = (r * h) mod 2^130 - 5\n\t"
-            "movq	%%r13, %%r10\n\t"
-            "andq	  $-4, %%r13\n\t"
-            "andq	   $3, %%r10\n\t"
-            "addq	%%r13, %%r11\n\t"
-            "movq	%%r13, %%r8\n\t"
-            "adcq	%%r14, %%r12\n\t"
-            "adcq	   $0, %%r10\n\t"
-            "shrdq	   $2, %%r14, %%r8\n\t"
-            "shrq	   $2, %%r14\n\t"
-            "addq	%%r11, %%r8\n\t"
-            "adcq	%%r14, %%r12\n\t"
-            "movq	%%r12, %%r9\n\t"
-            "adcq	   $0, %%r10\n\t"
-            "# h in r10, r9, r8 \n\t"
-            "# Store h to ctx\n\t"
-            "movq       %%r8, 24(%[ctx])\n\t"
-            "movq       %%r9, 32(%[ctx])\n\t"
-            "movq       %%r10, 40(%[ctx])\n\t"
-            :
-            : [m] "r" (m), [ctx] "r" (ctx), [nfin] "m" (ctx->finished)
-            : "rax", "rdx", "r11", "r12", "r13", "r14", "r15", "rbx",
-              "r8", "r9", "r10", "memory"
-        );
-}
-
+extern void poly1305_block_avx(Poly1305* ctx, const unsigned char *m);
 /* Process multiple blocks (n * 16 bytes) of data.
  *
  * ctx    Poly1305 context.
  * m      Blocks of message data.
  * bytes  The number of bytes to process.
  */
-POLY1305_NOINLINE static void poly1305_blocks_avx(Poly1305* ctx,
-                                           const unsigned char* m, size_t bytes)
-{
-        __asm__ __volatile__ (
-            "movq	(%[ctx]), %%r15\n\t"
-            "movq	24(%[ctx]), %%r8\n\t"
-            "movq	32(%[ctx]), %%r9\n\t"
-            "movq	40(%[ctx]), %%r10\n"
-        "L_avx_start:\n\t"
-            "# h += m\n\t"
-            "movq	 (%[m]), %%r11\n\t"
-            "movq	8(%[m]), %%r12\n\t"
-            "addq	%%r11, %%r8\n\t"
-            "adcq	%%r12, %%r9\n\t"
-            "movq	8(%[ctx]), %%rax\n\t"
-            "adcq	$0, %%r10\n\t"
-            "# r[1] * h[0] => rdx, rax ==> t2, t1\n\t"
-            "mulq	%%r8\n\t"
-            "movq	%%rax, %%r12\n\t"
-            "movq	%%rdx, %%r13\n\t"
-            "# r[0] * h[1] => rdx, rax ++> t2, t1\n\t"
-            "movq	%%r15, %%rax\n\t"
-            "mulq	%%r9\n\t"
-            "addq	%%rax, %%r12\n\t"
-            "movq	%%r15, %%rax\n\t"
-            "adcq	%%rdx, %%r13\n\t"
-            "# r[0] * h[0] => rdx, rax ==> t4, t0\n\t"
-            "mulq	%%r8\n\t"
-            "movq	%%rax, %%r11\n\t"
-            "movq	%%rdx, %%r8\n\t"
-            "# r[1] * h[1] => rdx, rax =+> t3, t2\n\t"
-            "movq	8(%[ctx]), %%rax\n\t"
-            "mulq	%%r9\n\t"
-            "#   r[0] * h[2] +> t2\n\t"
-            "addq	360(%[ctx],%%r10,8), %%r13\n\t"
-            "movq	%%rdx, %%r14\n\t"
-            "addq	%%r8, %%r12\n\t"
-            "adcq	%%rax, %%r13\n\t"
-            "#   r[1] * h[2] +> t3\n\t"
-            "adcq	416(%[ctx],%%r10,8), %%r14\n\t"
-            "# r * h in r14, r13, r12, r11 \n\t"
-            "# h = (r * h) mod 2^130 - 5\n\t"
-            "movq	%%r13, %%r10\n\t"
-            "andq	  $-4, %%r13\n\t"
-            "andq	   $3, %%r10\n\t"
-            "addq	%%r13, %%r11\n\t"
-            "movq	%%r13, %%r8\n\t"
-            "adcq	%%r14, %%r12\n\t"
-            "adcq	   $0, %%r10\n\t"
-            "shrdq	   $2, %%r14, %%r8\n\t"
-            "shrq	   $2, %%r14\n\t"
-            "addq	%%r11, %%r8\n\t"
-            "adcq	%%r14, %%r12\n\t"
-            "movq	%%r12, %%r9\n\t"
-            "adcq	   $0, %%r10\n\t"
-            "# h in r10, r9, r8 \n\t"
-            "# Next block from message\n\t"
-            "addq	$16, %[m]\n\t"
-            "subq	$16, %[bytes]\n\t"
-            "cmp        $16, %[bytes]\n\t"
-            "jge	L_avx_start\n\t"
-            "# Store h to ctx\n\t"
-            "movq	%%r8, 24(%[ctx])\n\t"
-            "movq	%%r9, 32(%[ctx])\n\t"
-            "movq	%%r10, 40(%[ctx])\n\t"
-            : [m] "+r" (m), [bytes] "+r" (bytes)
-            : [ctx] "r" (ctx)
-            : "rax", "rdx", "r11", "r12", "r13", "r14", "r15",
-              "r8", "r9", "r10", "memory"
-        );
-}
-
+extern void poly1305_blocks_avx(Poly1305* ctx, const unsigned char* m,
+                                size_t bytes);
 /* Set the key to use when processing data.
  * Initialize the context.
  *
  * ctx  Poly1305 context.
  * key  The key data (16 bytes).
  */
-static void poly1305_setkey_avx(Poly1305* ctx, const byte* key)
-{
-    int i;
-
-    ctx->r[0] = *(word64*)(key + 0) & 0x0ffffffc0fffffffL;
-    ctx->r[1] = *(word64*)(key + 8) & 0x0ffffffc0ffffffcL;
-
-    for (i=0; i<7; i++) {
-        ctx->hm[i + 0] = ctx->r[0] * i;
-        ctx->hm[i + 7] = ctx->r[1] * i;
-    }
-
-    /* h (accumulator) = 0 */
-    ctx->h[0] = 0;
-    ctx->h[1] = 0;
-    ctx->h[2] = 0;
-
-    /* save pad for later */
-    ctx->pad[0] = *(word64*)(key + 16);
-    ctx->pad[1] = *(word64*)(key + 24);
-
-    ctx->leftover = 0;
-    ctx->finished = 1;
-}
-
+extern void poly1305_setkey_avx(Poly1305* ctx, const byte* key);
 /* Calculate the final result - authentication data.
  * Zeros out the private data in the context.
  *
  * ctx  Poly1305 context.
  * mac  Buffer to hold 16 bytes.
  */
-static void poly1305_final_avx(Poly1305* ctx, byte* mac)
-{
-    word64 h0, h1, h2;
-
-    /* process the remaining block */
-    if (ctx->leftover) {
-        size_t i = ctx->leftover;
-        ctx->buffer[i] = 1;
-        for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++)
-            ctx->buffer[i] = 0;
-        ctx->finished = 0;
-        poly1305_block_avx(ctx, ctx->buffer);
-    }
-
-    h0 = ctx->h[0];
-    h1 = ctx->h[1];
-    h2 = ctx->h[2];
-
-    /* h %= p */
-    /* h = (h + pad) */
-    __asm__ __volatile__ (
-        "# mod 2^130 - 5\n\t"
-        "movq	%[h2],  %%r13\n\t"
-        "andq	 $0x3, %[h2]\n\t"
-        "shrq	 $0x2, %%r13\n\t"
-        "leaq	(%%r13, %%r13, 4), %%r13\n\t"
-        "add	 %%r13, %[h0]\n\t"
-        "adc	   $0, %[h1]\n\t"
-        "adc	   $0, %[h2]\n\t"
-        "# Fixup when between (1 << 130) - 1 and (1 << 130) - 5\n\t"
-        "movq	%[h0], %%r13\n\t"
-        "movq	%[h1], %%r14\n\t"
-        "movq	%[h2], %%r15\n\t"
-        "addq	$5, %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "andq	$3, %%r15\n\t"
-        "cmpq   $4, %%r12\n\t"
-        "cmove	%%r13, %[h0]\n\t"
-        "cmove	%%r14, %[h1]\n\t"
-        "cmove	%%r15, %[h2]\n\t"
-        "# h += pad\n\t"
-        "add	%[p0], %[h0]\n\t"
-        "adc	%[p1], %[h1]\n\t"
-        "movq	%[h0], (%[m])\n\t"
-        "movq	%[h1], 8(%[m])\n\t"
-        : [h0] "+r" (h0), [h1] "+r" (h1), [h2] "+r" (h2),
-          [p0] "+r" (ctx->pad[0]), [p1] "+r" (ctx->pad[1])
-        : [m] "r" (mac)
-        : "memory", "r15", "r14", "r13", "r12"
-    );
-
-    /* zero out the state */
-    ctx->h[0] = 0;
-    ctx->h[1] = 0;
-    ctx->h[2] = 0;
-    ctx->r[0] = 0;
-    ctx->r[1] = 0;
-    ctx->pad[0] = 0;
-    ctx->pad[1] = 0;
-}
+extern void poly1305_final_avx(Poly1305* ctx, byte* mac);
 #endif
 
 #ifdef HAVE_INTEL_AVX2
-#if defined(_MSC_VER)
-    #define POLY1305_NOINLINE __declspec(noinline)
-#elif defined(__GNUC__)
-    #define POLY1305_NOINLINE __attribute__((noinline))
-#else
-    #define POLY1305_NOINLINE
-#endif
-
-/* Load H into five 256-bit registers.
- *
- * h is the memory location of the data - 26 of 32 bits.
- * h0-h4 the 4 H values with 26 bits stored in 64 for multiply.
- */
-#define LOAD_H(h, h0, h1, h2, h3, h4)  \
-    "vmovdqu	   ("#h"), "#h0"\n\t"  \
-    "vmovdqu	 32("#h"), "#h1"\n\t"  \
-    "vmovdqu	 64("#h"), "#h2"\n\t"  \
-    "vmovdqu	 96("#h"), "#h3"\n\t"  \
-    "vmovdqu	128("#h"), "#h4"\n\t"
-
-/* Store H, five 256-bit registers, packed.
- *
- * h is the memory location of the data - 26 bits in 32.
- * h0-h4 the 4 H values with 26 bits stored in 64.
- * x4 is the xmm register of h4.
- */
-#define STORE_H(h, h0, h1, h2, h3, h4, x4)      \
-    "vmovdqu	 "#h0",    ("#h")\n\t"          \
-    "vmovdqu	 "#h1",  32("#h")\n\t"          \
-    "vmovdqu	 "#h2",  64("#h")\n\t"          \
-    "vmovdqu	 "#h3",  96("#h")\n\t"          \
-    "vmovdqu	 "#h4", 128("#h")\n\t"
-
-/* Load four powers of r into position to be multiplied by the 4 H values.
- *
- * r0-r4 holds the loaded values with 26 bits stored in 64 for multiply.
- * t0-t3 are temporary registers.
- */
-#define LOAD_Rx4(r0, r1, r2, r3, r4,                     \
-                 t0, t1, t2, t3)                         \
-    "vmovdqu		224(%[ctx]), "#r3"\n\t"          \
-    "vmovdqu		256(%[ctx]), "#r2"\n\t"          \
-    "vmovdqu		288(%[ctx]), "#r1"\n\t"          \
-    "vmovdqu		320(%[ctx]), "#r0"\n\t"          \
-    "vpermq		$0xd8, "#r0", "#r0"\n\t"         \
-    "vpermq		$0xd8, "#r1", "#r1"\n\t"         \
-    "vpermq		$0xd8, "#r2", "#r2"\n\t"         \
-    "vpermq		$0xd8, "#r3", "#r3"\n\t"         \
-    "vpunpcklqdq	"#r1", "#r0", "#t0"\n\t"         \
-    "vpunpckhqdq	"#r1", "#r0", "#t1"\n\t"         \
-    "vpunpcklqdq	"#r3", "#r2", "#t2"\n\t"         \
-    "vpunpckhqdq	"#r3", "#r2", "#t3"\n\t"         \
-    "vperm2i128		$0x20, "#t2", "#t0", "#r0"\n\t"  \
-    "vperm2i128		$0x31, "#t2", "#t0", "#r2"\n\t"  \
-    "vperm2i128		$0x20, "#t3", "#t1", "#r4"\n\t"  \
-    "vpsrlq		  $32, "#r0", "#r1"\n\t"         \
-    "vpsrlq		  $32, "#r2", "#r3"\n\t"
-
-/* Load the r^4 value into position to be multiplied by all 4 H values.
- *
- * r4 holds r^4 as five 26 bits each in 32.
- * r0-r4 holds the loaded values with 26 bits stored in 64 for multiply.
- * t0-t1 are temporary registers.
- */
-#define LOAD_R4(r4, r40, r41, r42, r43, r44, \
-                t0, t1)                      \
-    "vmovdqu	"#r4", "#t0"\n\t"            \
-    "vpermq	 $0x0, "#t0", "#r40"\n\t"    \
-    "vpsrlq	  $32, "#t0", "#t1"\n\t"     \
-    "vpermq	$0x55, "#t0", "#r42"\n\t"    \
-    "vpermq	$0xaa, "#t0", "#r44"\n\t"    \
-    "vpermq	 $0x0, "#t1", "#r41"\n\t"    \
-    "vpermq	$0x55, "#t1", "#r43"\n\t"
-
-/* Multiply the top 4 26-bit values in 64 bits of each H by 5 for reduction in
- * multiply.
- *
- * s1-s4 are each 64 bit value in r1-r4 multiplied by 5.
- * r1-r4 are the top 4
- */
-#define MUL5(s1, s2, s3, s4, r1, r2, r3, r4) \
-    "vpslld	   $2, "#r1", "#s1"\n\t"     \
-    "vpslld	   $2, "#r2", "#s2"\n\t"     \
-    "vpslld	   $2, "#r3", "#s3"\n\t"     \
-    "vpslld	   $2, "#r4", "#s4"\n\t"     \
-    "vpaddq	"#s1", "#r1", "#s1"\n\t"     \
-    "vpaddq	"#s2", "#r2", "#s2"\n\t"     \
-    "vpaddq	"#s3", "#r3", "#s3"\n\t"     \
-    "vpaddq	"#s4", "#r4", "#s4"\n\t"
-
-/* Add the 4 H values together.
- * Each 64 bits in a register is 26 bits of one of the H values.
- *
- * h0-h4 contains the 4 H values.
- * t1-t4 are temporary registers.
- */
-#define FINALIZE_H(h0, h1, h2, h3, h4,    \
-                   t0, t1, t2, t3, t4)    \
-    "vpsrldq	$8, "#h0", "#t0"\n\t"     \
-    "vpsrldq	$8, "#h1", "#t1"\n\t"     \
-    "vpsrldq	$8, "#h2", "#t2"\n\t"     \
-    "vpsrldq	$8, "#h3", "#t3"\n\t"     \
-    "vpsrldq	$8, "#h4", "#t4"\n\t"     \
-    "vpaddq	"#h0", "#t0", "#h0"\n\t"  \
-    "vpaddq	"#h1", "#t1", "#h1"\n\t"  \
-    "vpaddq	"#h2", "#t2", "#h2"\n\t"  \
-    "vpaddq	"#h3", "#t3", "#h3"\n\t"  \
-    "vpaddq	"#h4", "#t4", "#h4"\n\t"  \
-    "vpermq	$0x02, "#h0", "#t0"\n\t"  \
-    "vpermq	$0x02, "#h1", "#t1"\n\t"  \
-    "vpermq	$0x02, "#h2", "#t2"\n\t"  \
-    "vpermq	$0x02, "#h3", "#t3"\n\t"  \
-    "vpermq	$0x02, "#h4", "#t4"\n\t"  \
-    "vpaddq	"#h0", "#t0", "#h0"\n\t"  \
-    "vpaddq	"#h1", "#t1", "#h1"\n\t"  \
-    "vpaddq	"#h2", "#t2", "#h2"\n\t"  \
-    "vpaddq	"#h3", "#t3", "#h3"\n\t"  \
-    "vpaddq	"#h4", "#t4", "#h4"\n\t"
-
-/* Move 32 bits from each xmm register to a 32 bit register.
- *
- * x0-x4 are the xmm version of the ymm registers used.
- * t0-t4 are the 32-bit registers to store data in.
- */
-#define MOVE_TO_32(x0, x1, x2, x3, x4,  \
-                   t0, t1, t2, t3, t4)  \
-    "vmovd	"#x0", "#t0"\n\t"       \
-    "vmovd	"#x1", "#t1"\n\t"       \
-    "vmovd	"#x2", "#t2"\n\t"       \
-    "vmovd	"#x3", "#t3"\n\t"       \
-    "vmovd	"#x4", "#t4"\n\t"
-
-/* Multiply using AVX2 instructions.
- * Each register contains up to 32 bits of data in 64 bits.
- * This is a 4 way parallel multiply.
- *
- * h0-h4 contain 4 H values with the 32 bits of each per register.
- * r0-r4 contain the 4 powers of r.
- * s1-s4 contain r1-r4 times 5.
- * t0-t4 and v0-v3 are temporary registers.
- */
-#define MUL_AVX2(h0, h1, h2, h3, h4,        \
-                 r0, r1, r2, r3, r4,        \
-                 s1, s2, s3, s4,            \
-                 t0, t1, t2, t3, t4,        \
-                 v0, v1, v2, v3)            \
-    "vpmuludq	"#s1", "#h4", "#t0"\n\t"    \
-    "vpmuludq	"#s2", "#h3", "#v0"\n\t"    \
-    "vpmuludq	"#s2", "#h4", "#t1"\n\t"    \
-    "vpmuludq	"#s3", "#h3", "#v1"\n\t"    \
-    "vpmuludq	"#s3", "#h4", "#t2"\n\t"    \
-    "vpaddq	"#t0", "#v0", "#t0"\n\t"    \
-    "vpmuludq	"#s3", "#h2", "#v2"\n\t"    \
-    "vpmuludq	"#s4", "#h4", "#t3"\n\t"    \
-    "vpaddq	"#t1", "#v1", "#t1"\n\t"    \
-    "vpmuludq	"#s4", "#h1", "#v3"\n\t"    \
-    "vpmuludq	"#s4", "#h2", "#v0"\n\t"    \
-    "vpaddq	"#t0", "#v2", "#t0"\n\t"    \
-    "vpmuludq	"#s4", "#h3", "#v1"\n\t"    \
-    "vpmuludq	"#r0", "#h3", "#v2"\n\t"    \
-    "vpaddq	"#t0", "#v3", "#t0"\n\t"    \
-    "vpmuludq	"#r0", "#h4", "#t4"\n\t"    \
-    "vpaddq	"#t1", "#v0", "#t1"\n\t"    \
-    "vpmuludq	"#r0", "#h0", "#v3"\n\t"    \
-    "vpaddq	"#t2", "#v1", "#t2"\n\t"    \
-    "vpmuludq	"#r0", "#h1", "#v0"\n\t"    \
-    "vpaddq	"#t3", "#v2", "#t3"\n\t"    \
-    "vpmuludq	"#r0", "#h2", "#v1"\n\t"    \
-    "vpmuludq	"#r1", "#h2", "#v2"\n\t"    \
-    "vpaddq	"#t0", "#v3", "#t0"\n\t"    \
-    "vpmuludq	"#r1", "#h3", "#v3"\n\t"    \
-    "vpaddq	"#t1", "#v0", "#t1"\n\t"    \
-    "vpmuludq	"#r1", "#h0", "#v0"\n\t"    \
-    "vpaddq	"#t2", "#v1", "#t2"\n\t"    \
-    "vpmuludq	"#r1", "#h1", "#v1"\n\t"    \
-    "vpaddq	"#t3", "#v2", "#t3"\n\t"    \
-    "vpmuludq	"#r2", "#h1", "#v2"\n\t"    \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"    \
-    "vpmuludq	"#r2", "#h2", "#v3"\n\t"    \
-    "vpaddq	"#t1", "#v0", "#t1"\n\t"    \
-    "vpmuludq	"#r2", "#h0", "#v0"\n\t"    \
-    "vpaddq	"#t2", "#v1", "#t2"\n\t"    \
-    "vpmuludq	"#r3", "#h0", "#v1"\n\t"    \
-    "vpaddq	"#t3", "#v2", "#t3"\n\t"    \
-    "vpmuludq	"#r3", "#h1", "#v2"\n\t"    \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"    \
-    "vpmuludq	"#r4", "#h0", "#v3"\n\t"    \
-    "vpaddq	"#t2", "#v0", "#t2"\n\t"    \
-    "vpaddq	"#t3", "#v1", "#t3"\n\t"    \
-    "vpaddq	"#t4", "#v2", "#t4"\n\t"    \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"
-
-/* Load the 4 blocks of the message.
- *
- * m the address of the message to load.
- * m0-m4 is the loaded message with 32 bits in 64. Loaded so data is parallel.
- * hi is the high bits of the 4 m (1 << 128 as not final block).
- * z is zero.
- */
-#define LOAD_M(m, m0, m1, m2, m3, m4, hi, z)     \
-    "vmovdqu      (%[m]), "#m0"\n\t"             \
-    "vmovdqu    32(%[m]), "#m1"\n\t"             \
-    "vperm2i128	$0x20, "#m1", "#m0", "#m2"\n\t"  \
-    "vperm2i128	$0x31, "#m1", "#m0", "#m0"\n\t"  \
-    "vpunpckldq	"#m0", "#m2", "#m1"\n\t"         \
-    "vpunpckhdq	"#m0", "#m2", "#m3"\n\t"         \
-    "vpunpckldq	 "#z", "#m1", "#m0"\n\t"         \
-    "vpunpckhdq	 "#z", "#m1", "#m1"\n\t"         \
-    "vpunpckldq	 "#z", "#m3", "#m2"\n\t"         \
-    "vpunpckhdq	 "#z", "#m3", "#m3"\n\t"         \
-    "vmovdqu	"#hi", "#m4"\n\t"                \
-    "vpsllq	   $6, "#m1", "#m1"\n\t"         \
-    "vpsllq	  $12, "#m2", "#m2"\n\t"         \
-    "vpsllq	  $18, "#m3", "#m3"\n\t"
-
-
-/* Multiply using AVX2 instructions - adding with message.
- * Each register contains up to 32 bits of data in 64 bits.
- * This is a 4 way parallel multiply.
- * The message data is loaded first and the multiplication adds into it.
- *
- * h0-h4 contain 4 H values with the 32 bits of each per register.
- * r0-r4 contain the 4 powers of r.
- * s1-s4 contain r1-r4 times 5.
- * t0-t4 and v0-v3 are temporary registers.
- * hi is the high bits of the 4 m (1 << 128 as not final block).
- * z is zero.
- */
-#define MUL_ADD_AVX2(h0, h1, h2, h3, h4,         \
-                     r0, r1, r2, r3, r4,         \
-                     s1, s2, s3, s4,             \
-                     t0, t1, t2, t3, t4,         \
-                     v0, v1, v2, v3,             \
-                     hi, z)                      \
-    "vmovdqu      (%[m]), "#t0"\n\t"             \
-    "vmovdqu    32(%[m]), "#t1"\n\t"             \
-    "vperm2i128	$0x20, "#t1", "#t0", "#t2"\n\t"  \
-    "vperm2i128	$0x31, "#t1", "#t0", "#t0"\n\t"  \
-    "vpunpckldq	"#t0", "#t2", "#t1"\n\t"         \
-    "vpunpckhdq	"#t0", "#t2", "#t3"\n\t"         \
-    "vpunpckldq	 "#z", "#t1", "#t0"\n\t"         \
-    "vpunpckhdq	 "#z", "#t1", "#t1"\n\t"         \
-    "vpunpckldq	 "#z", "#t3", "#t2"\n\t"         \
-    "vpunpckhdq	 "#z", "#t3", "#t3"\n\t"         \
-    "vmovdqu	"#hi", "#t4"\n\t"                \
-    "vpsllq	   $6, "#t1", "#t1"\n\t"         \
-    "vpsllq	  $12, "#t2", "#t2"\n\t"         \
-    "vpsllq	  $18, "#t3", "#t3"\n\t"         \
-    "vpmuludq	"#s1", "#h4", "#v0"\n\t"         \
-    "vpaddq     "#t0", "#v0", "#t0"\n\t"         \
-    "vpmuludq	"#s2", "#h3", "#v0"\n\t"         \
-    "vpmuludq	"#s2", "#h4", "#v1"\n\t"         \
-    "vpaddq     "#t1", "#v1", "#t1"\n\t"         \
-    "vpmuludq	"#s3", "#h3", "#v1"\n\t"         \
-    "vpmuludq	"#s3", "#h4", "#v2"\n\t"         \
-    "vpaddq     "#t2", "#v2", "#t2"\n\t"         \
-    "vpaddq	"#t0", "#v0", "#t0"\n\t"         \
-    "vpmuludq	"#s3", "#h2", "#v2"\n\t"         \
-    "vpmuludq	"#s4", "#h4", "#v3"\n\t"         \
-    "vpaddq     "#t3", "#v3", "#t3"\n\t"         \
-    "vpaddq	"#t1", "#v1", "#t1"\n\t"         \
-    "vpmuludq	"#s4", "#h1", "#v3"\n\t"         \
-    "vpmuludq	"#s4", "#h2", "#v0"\n\t"         \
-    "vpaddq	"#t0", "#v2", "#t0"\n\t"         \
-    "vpmuludq	"#s4", "#h3", "#v1"\n\t"         \
-    "vpmuludq	"#r0", "#h3", "#v2"\n\t"         \
-    "vpaddq	"#t0", "#v3", "#t0"\n\t"         \
-    "vpmuludq	"#r0", "#h4", "#v3"\n\t"         \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"         \
-    "vpaddq	"#t1", "#v0", "#t1"\n\t"         \
-    "vpmuludq	"#r0", "#h0", "#v3"\n\t"         \
-    "vpaddq	"#t2", "#v1", "#t2"\n\t"         \
-    "vpmuludq	"#r0", "#h1", "#v0"\n\t"         \
-    "vpaddq	"#t3", "#v2", "#t3"\n\t"         \
-    "vpmuludq	"#r0", "#h2", "#v1"\n\t"         \
-    "vpmuludq	"#r1", "#h2", "#v2"\n\t"         \
-    "vpaddq	"#t0", "#v3", "#t0"\n\t"         \
-    "vpmuludq	"#r1", "#h3", "#v3"\n\t"         \
-    "vpaddq	"#t1", "#v0", "#t1"\n\t"         \
-    "vpmuludq	"#r1", "#h0", "#v0"\n\t"         \
-    "vpaddq	"#t2", "#v1", "#t2"\n\t"         \
-    "vpmuludq	"#r1", "#h1", "#v1"\n\t"         \
-    "vpaddq	"#t3", "#v2", "#t3"\n\t"         \
-    "vpmuludq	"#r2", "#h1", "#v2"\n\t"         \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"         \
-    "vpmuludq	"#r2", "#h2", "#v3"\n\t"         \
-    "vpaddq	"#t1", "#v0", "#t1"\n\t"         \
-    "vpmuludq	"#r2", "#h0", "#v0"\n\t"         \
-    "vpaddq	"#t2", "#v1", "#t2"\n\t"         \
-    "vpmuludq	"#r3", "#h0", "#v1"\n\t"         \
-    "vpaddq	"#t3", "#v2", "#t3"\n\t"         \
-    "vpmuludq	"#r3", "#h1", "#v2"\n\t"         \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"         \
-    "vpmuludq	"#r4", "#h0", "#v3"\n\t"         \
-    "vpaddq	"#t2", "#v0", "#t2"\n\t"         \
-    "vpaddq	"#t3", "#v1", "#t3"\n\t"         \
-    "vpaddq	"#t4", "#v2", "#t4"\n\t"         \
-    "vpaddq	"#t4", "#v3", "#t4"\n\t"
-
-/* Reduce the 64 bits of data to 26 bits.
- *
- * h0-h4 contain the reduced H values.
- * m0-m4 contain the 4 H values to reduce.
- * t0-t2 are temporaries.
- * mask contains the 26-bit mask for each 64 bit value in the 256 bit register.
- */
-#define REDUCE(h0, h1, h2, h3, h4,          \
-               m0, m1, m2, m3, m4,          \
-               t0, t1, t2, mask)            \
-    "vpsrlq	    $26, "#m0", "#t0"\n\t"  \
-    "vpsrlq	    $26, "#m3", "#t1"\n\t"  \
-    "vpand	"#mask", "#m0", "#m0"\n\t"  \
-    "vpand	"#mask", "#m3", "#m3"\n\t"  \
-    "vpaddq	  "#m1", "#t0", "#m1"\n\t"  \
-    "vpaddq	  "#m4", "#t1", "#m4"\n\t"  \
-                                            \
-    "vpsrlq	    $26, "#m1", "#t0"\n\t"  \
-    "vpsrlq	    $26, "#m4", "#t1"\n\t"  \
-    "vpand	"#mask", "#m1", "#h1"\n\t"  \
-    "vpand	"#mask", "#m4", "#h4"\n\t"  \
-    "vpaddq	  "#m2", "#t0", "#m2"\n\t"  \
-    "vpslld	     $2, "#t1", "#t2"\n\t"  \
-    "vpaddd	  "#t2", "#t1", "#t2"\n\t"  \
-                                            \
-    "vpsrlq	    $26, "#m2", "#t0"\n\t"  \
-    "vpaddq	  "#m0", "#t2", "#m0"\n\t"  \
-    "vpsrlq	    $26, "#m0", "#t1"\n\t"  \
-    "vpand	"#mask", "#m2", "#h2"\n\t"  \
-    "vpand	"#mask", "#m0", "#h0"\n\t"  \
-    "vpaddq	  "#m3", "#t0", "#m3"\n\t"  \
-    "vpaddq	  "#h1", "#t1", "#h1"\n\t"  \
-                                            \
-    "vpsrlq	    $26, "#m3", "#t0"\n\t"  \
-    "vpand	"#mask", "#m3", "#h3"\n\t"  \
-    "vpaddq	  "#h4", "#t0", "#h4"\n\t"  \
-
-
 /* Process multiple blocks (n * 16 bytes) of data.
  *
  * ctx    Poly1305 context.
  * m      Blocks of message data.
  * bytes  The number of bytes to process.
  */
-POLY1305_NOINLINE static void poly1305_blocks_avx2(Poly1305* ctx,
-                                           const unsigned char* m, size_t bytes)
-{
-    ALIGN256 word64 r4[5][4];
-    ALIGN256 word64 s[4][4];
-    register word32 t0 asm("r8") = 0;
-    register word32 t1 asm("r9") = 0;
-    register word32 t2 asm("r10") = 0;
-    register word32 t3 asm("r11") = 0;
-    register word32 t4 asm("r12") = 0;
-    static const word64 mask[4] = { 0x0000000003ffffff, 0x0000000003ffffff,
-                                    0x0000000003ffffff, 0x0000000003ffffff };
-    static const word64 hibit[4] = { 0x1000000, 0x1000000,
-                                     0x1000000, 0x1000000 };
-
-    __asm__ __volatile__ (
-        "vpxor		%%ymm15, %%ymm15, %%ymm15\n\t"
-        "cmpb		$1, %[started]\n\t"
-        "je		L_begin\n\t"
-        "cmpb		$1, %[fin]\n\t"
-        "je		L_begin\n\t"
-        "# Load the message data\n\t"
-        LOAD_M(m, %%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4, %[hibit], %%ymm15)
-        "vmovdqu	%[mask], %%ymm14\n\t"
-        "# Reduce, in place, the message data\n\t"
-        REDUCE(%%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-               %%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-               %%ymm10, %%ymm11, %%ymm12, %%ymm14)
-        "addq		$64, %[m]\n\t"
-        "subq		$64, %[bytes]\n\t"
-        "jz		L_store\n\t"
-        "jmp		L_load_r4\n\t"
-        "\n"
-    "L_begin:\n\t"
-        "# Load the H values.\n\t"
-        LOAD_H(%[h], %%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4)
-        "# Check if there is a power of r to load - otherwise use r^4.\n\t"
-        "cmpb		$0, %[fin]\n\t"
-        "je		L_load_r4\n\t"
-        "\n\t"
-        "# Load the 4 powers of r - r^4, r^3, r^2, r^1.\n\t"
-        LOAD_Rx4(%%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
-                 %%ymm10, %%ymm11, %%ymm12, %%ymm13)
-        "jmp		L_mul_5\n\t"
-        "\n"
-     "L_load_r4:\n\t"
-        "# Load r^4 into all four positions.\n\t"
-        LOAD_R4(320(%[ctx]), %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
-                %%ymm13, %%ymm14)
-        "\n"
-    "L_mul_5:\n\t"
-        "# Multiply top 4 26-bit values of all four H by 5\n\t"
-        MUL5(%%ymm10, %%ymm11, %%ymm12, %%ymm13, %%ymm6, %%ymm7, %%ymm8, %%ymm9)
-        "# Store powers of r and multiple of 5 for use in multiply.\n\t"
-        "vmovdqa	%%ymm10,    (%[s])\n\t"
-        "vmovdqa	%%ymm11,  32(%[s])\n\t"
-        "vmovdqa	%%ymm12,  64(%[s])\n\t"
-        "vmovdqa	%%ymm13,  96(%[s])\n\t"
-        "vmovdqa	%%ymm5 ,    (%[r4])\n\t"
-        "vmovdqa	%%ymm6 ,  32(%[r4])\n\t"
-        "vmovdqa	%%ymm7 ,  64(%[r4])\n\t"
-        "vmovdqa	%%ymm8 ,  96(%[r4])\n\t"
-        "vmovdqa	%%ymm9 , 128(%[r4])\n\t"
-        "vmovdqu	%[mask], %%ymm14\n\t"
-        "\n"
-        "# If not finished then loop over data\n\t"
-        "cmpb		$0x1, %[fin]\n\t"
-        "jne		L_start\n\t"
-        "# Do last multiply, reduce, add the four H together and move to\n\t"
-        "# 32-bit registers\n\t"
-        MUL_AVX2(%%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-                 (%[r4]), 32(%[r4]), 64(%[r4]), 96(%[r4]), 128(%[r4]),
-                 (%[s]), 32(%[s]), 64(%[s]), 96(%[s]),
-                 %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
-                 %%ymm10, %%ymm11, %%ymm12, %%ymm13)
-        REDUCE(%%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-               %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
-               %%ymm10, %%ymm11, %%ymm12, %%ymm14)
-        FINALIZE_H(%%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-                   %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9)
-        MOVE_TO_32(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4,
-                   %[t0], %[t1], %[t2], %[t3], %[t4])
-        "jmp		L_end\n\t"
-        "\n"
-    "L_start:\n\t"
-        MUL_ADD_AVX2(%%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-                     (%[r4]), 32(%[r4]), 64(%[r4]), 96(%[r4]), 128(%[r4]),
-                     (%[s]), 32(%[s]), 64(%[s]), 96(%[s]),
-                     %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
-                     %%ymm10, %%ymm11, %%ymm12, %%ymm13,
-                     %[hibit], %%ymm15)
-        REDUCE(%%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4,
-               %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
-               %%ymm10, %%ymm11, %%ymm12, %%ymm14)
-        "addq		$64, %[m]\n\t"
-        "subq		$64, %[bytes]\n\t"
-        "jnz		L_start\n\t"
-        "\n"
-    "L_store:\n\t"
-        "# Store four H values - state\n\t"
-        STORE_H(%[h], %%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4, %%xmm4)
-        "\n"
-    "L_end:\n\t"
-        : [m] "+r" (m), [bytes] "+r" (bytes),
-          [t0] "+r" (t0), [t1] "+r" (t1), [t2] "+r" (t2),
-          [t3] "+r" (t3), [t4] "+r" (t4)
-        : [ctx] "r" (ctx), [h] "r" (ctx->hh),
-          [r4] "r" (r4), [s] "r" (s),
-          [fin] "m" (ctx->finished), [started] "m" (ctx->started),
-          [mask] "m" (mask), [hibit] "m" (hibit)
-        : "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
-          "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
-          "memory"
-    );
-
-    if (ctx->finished)
-    {
-        word64 h0, h1, h2, c;
-
-        /* Convert to 64-bit form. */
-        h0 = (((word64)(t1 & 0x3FFFF)) << 26) +  t0;
-        h1 = (((word64)(t3 &   0x3FF)) << 34) +
-             (((word64) t2           ) <<  8) + (t1 >> 18);
-        h2 = (((word64) t4           ) << 16) + (t3 >> 10);
-
-        /* Perform modulur reduction. */
-                     c = (h1 >> 44); h1 &= 0xfffffffffff;
-        h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
-        h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
-        h1 += c;     c = (h1 >> 44); h1 &= 0xfffffffffff;
-        h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
-        h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
-        h1 += c;
-
-        /* Convert from 42/44/44 to 2/64/64 bits used and store result. */
-        ctx->h[0] =  h0        | (h1 << 44);
-        ctx->h[1] = (h1 >> 20) | (h2 << 24);
-        ctx->h[2] =  h2 >> 40;
-    }
-
-    ctx->started = 1;
-}
-
-/* Multiply two 130-bit numbers in 64-bit registers and reduce.
- * 44 + 44 + 42 = 130 bits
- *
- * r0-r2 are the first operand and the result.
- * a0-a2 are the second operand.
- */
-#define MUL_64(r0, r1, r2, a0, a1, a2)                                       \
-    s1 = a1 * (5 << 2);                                                      \
-    s2 = a2 * (5 << 2);                                                      \
-    MUL(d0, r0, a0); MUL(d, r1, s2); ADD(d0, d); MUL(d, r2, s1); ADD(d0, d); \
-    MUL(d1, r0, a1); MUL(d, r1, a0); ADD(d1, d); MUL(d, r2, s2); ADD(d1, d); \
-    MUL(d2, r0, a2); MUL(d, r1, a1); ADD(d2, d); MUL(d, r2, a0); ADD(d2, d); \
-                                                                             \
-                  c = SHR(d0, 44); r0 = LO(d0) & 0xfffffffffff;              \
-    ADDLO(d1, c); c = SHR(d1, 44); r1 = LO(d1) & 0xfffffffffff;              \
-    ADDLO(d2, c); c = SHR(d2, 42); r2 = LO(d2) & 0x3ffffffffff;              \
-    r0  += c * 5; c = (r0 >> 44);  r0 =    r0  & 0xfffffffffff;              \
-    r1  += c
-
-#define SQR_64(r0, r1, r2)                                      \
-    s2 = r2 * (5 << 2);                                         \
-    MUL(d0, r1, s2); ADD(d0, d0); MUL(d, r0, r0); ADD(d0, d);   \
-    MUL(d1, r0, r1); ADD(d1, d1); MUL(d, r2, s2); ADD(d1, d);   \
-    MUL(d2, r0, r2); ADD(d2, d2); MUL(d, r1, r1); ADD(d2, d);   \
-                                                                \
-                  c = SHR(d0, 44); r0 = LO(d0) & 0xfffffffffff; \
-    ADDLO(d1, c); c = SHR(d1, 44); r1 = LO(d1) & 0xfffffffffff; \
-    ADDLO(d2, c); c = SHR(d2, 42); r2 = LO(d2) & 0x3ffffffffff; \
-    r0  += c * 5; c = (r0 >> 44);  r0 =    r0  & 0xfffffffffff; \
-    r1  += c
-
-/* Store the 130-bit number in 64-bit registers as 26-bit values in 32 bits.
- *
- * r0-r2 contains the 130-bit number in 64-bit registers.
- * r is the address of where to store the 26 of 32 bits result.
- */
-#define CONV_64_TO_32(r0, r1, r2, r)                      \
-    r[0] = (word32)( r0                    ) & 0x3ffffff; \
-    r[1] = (word32)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; \
-    r[2] = (word32)( r1 >> 8               ) & 0x3ffffff; \
-    r[3] = (word32)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; \
-    r[4] = (word32)( r2 >> 16              )
-
+extern void poly1305_blocks_avx2(Poly1305* ctx, const unsigned char* m,
+                                 size_t bytes);
 /* Calculate R^1, R^2, R^3 and R^4 and store them in the context.
  *
  * ctx    Poly1305 context.
  */
-static void poly1305_calc_powers(Poly1305* ctx)
-{
-    word64 r0, r1, r2, t0, t1, c;
-    word64 r20, r21, r22;
-    word64 r30, r31, r32;
-    word64 r40, r41, r42;
-    word64 s1, s2;
-    word128 d0, d1, d2, d;
-
-    t0 = ctx->r[0];
-    t1 = ctx->r[1];
-    r0 = ( t0                    ) & 0xfffffffffff;
-    r1 = ((t0 >> 44) | (t1 << 20)) & 0xfffffffffff;
-    r2 = ((t1 >> 24)             ) & 0x00fffffffff;
-
-    /* Store r^1 */
-    CONV_64_TO_32(r0, r1, r2, ctx->r1);
-
-    /* Calc and store r^2 */
-    r20 = r0; r21 = r1; r22 = r2;
-    SQR_64(r20, r21, r22);
-    CONV_64_TO_32(r20, r21, r22, ctx->r2);
-
-    /* Calc and store r^3 */
-    r30 = r20; r31 = r21; r32 = r22;
-    MUL_64(r30, r31, r32, r0, r1, r2);
-    CONV_64_TO_32(r30, r31, r32, ctx->r3);
-
-    /* Calc and store r^4 */
-    r40 = r20; r41 = r21; r42 = r22;
-    SQR_64(r40, r41, r42);
-    CONV_64_TO_32(r40, r41, r42, ctx->r4);
-
-}
-
+extern void poly1305_calc_powers_avx2(Poly1305* ctx);
 /* Set the key to use when processing data.
  * Initialize the context.
  * Calls AVX set key function as final function calls AVX code.
@@ -945,27 +175,7 @@
  * ctx  Poly1305 context.
  * key  The key data (16 bytes).
  */
-static void poly1305_setkey_avx2(Poly1305* ctx, const byte* key)
-{
-    poly1305_setkey_avx(ctx, key);
-
-    __asm__ __volatile__ (
-        "vpxor		%%ymm0, %%ymm0, %%ymm0\n\t"
-        "vmovdqu	%%ymm0,    (%[hh])\n\t"
-        "vmovdqu	%%ymm0,  32(%[hh])\n\t"
-        "vmovdqu	%%ymm0,  64(%[hh])\n\t"
-        "vmovdqu	%%ymm0,  96(%[hh])\n\t"
-        "vmovdqu	%%ymm0, 128(%[hh])\n\t"
-        :
-        : [hh] "r" (ctx->hh)
-        : "memory", "ymm0"
-    );
-
-    ctx->leftover = 0;
-    ctx->finished = 0;
-    ctx->started = 0;
-}
-
+extern void poly1305_setkey_avx2(Poly1305* ctx, const byte* key);
 /* Calculate the final result - authentication data.
  * Zeros out the private data in the context.
  * Calls AVX final function to quickly process last blocks.
@@ -973,50 +183,15 @@
  * ctx  Poly1305 context.
  * mac  Buffer to hold 16 bytes - authentication data.
  */
-static void poly1305_final_avx2(Poly1305* ctx, byte* mac)
-{
-    int i, j;
-    int l = (int)ctx->leftover;
-
-    ctx->finished = 1;
-    if (ctx->started)
-        poly1305_blocks_avx2(ctx, ctx->buffer, POLY1305_BLOCK_SIZE * 4);
-
-    i = l & ~(POLY1305_BLOCK_SIZE - 1);
-    if (i > 0)
-        poly1305_blocks_avx(ctx, ctx->buffer, i);
-    ctx->leftover -= i;
-    for (j = 0; i < l; i++, j++)
-        ctx->buffer[j] = ctx->buffer[i];
-
-    poly1305_final_avx(ctx, mac);
+extern void poly1305_final_avx2(Poly1305* ctx, byte* mac);
+#endif
 
-    /* zero out the state */
-    __asm__ __volatile__ (
-        "vpxor		%%ymm0, %%ymm0, %%ymm0\n\t"
-        "vmovdqu	%%ymm0,    (%[hh])\n\t"
-        "vmovdqu	%%ymm0,  32(%[hh])\n\t"
-        "vmovdqu	%%ymm0,  64(%[hh])\n\t"
-        "vmovdqu	%%ymm0,  96(%[hh])\n\t"
-        "vmovdqu	%%ymm0, 128(%[hh])\n\t"
-        "vmovdqu	%%ymm0,    (%[r1])\n\t"
-        "vmovdqu	%%ymm0,    (%[r2])\n\t"
-        "vmovdqu	%%ymm0,    (%[r3])\n\t"
-        "vmovdqu	%%ymm0,    (%[r4])\n\t"
-        :
-        : [hh] "r" (ctx->hh), [r1] "r" (ctx->r1), [r2] "r" (ctx->r2),
-          [r3] "r" (ctx->r3), [r4] "r" (ctx->r4)
-        : "memory", "ymm0"
-    );
-
-    ctx->leftover = 0;
-    ctx->finished = 0;
-    ctx->started = 0;
-}
+#ifdef __cplusplus
+    }  /* extern "C" */
 #endif
 
 #elif defined(POLY130564)
-
+#ifndef WOLFSSL_ARMASM
     static word64 U8TO64(const byte* p)
     {
         return
@@ -1040,7 +215,7 @@
         p[6] = (v >> 48) & 0xff;
         p[7] = (v >> 56) & 0xff;
     }
-
+#endif/* WOLFSSL_ARMASM */
 #else /* if not 64 bit then use 32 bit */
 
     static word32 U8TO32(const byte *p)
@@ -1060,18 +235,27 @@
     }
 #endif
 
-
-static void U32TO64(word32 v, byte* p)
+/* convert 32-bit unsigned to little endian 64 bit type as byte array */
+static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8])
 {
-    XMEMSET(p, 0, 8);
-    p[0] = (v & 0xFF);
-    p[1] = (v >>  8) & 0xFF;
-    p[2] = (v >> 16) & 0xFF;
-    p[3] = (v >> 24) & 0xFF;
+#ifndef WOLFSSL_X86_64_BUILD
+    outLe64[0] = (byte)(inLe32  & 0x000000FF);
+    outLe64[1] = (byte)((inLe32 & 0x0000FF00) >> 8);
+    outLe64[2] = (byte)((inLe32 & 0x00FF0000) >> 16);
+    outLe64[3] = (byte)((inLe32 & 0xFF000000) >> 24);
+    outLe64[4] = 0;
+    outLe64[5] = 0;
+    outLe64[6] = 0;
+    outLe64[7] = 0;
+#else
+    *(word64*)outLe64 = inLe32;
+#endif
 }
 
-static void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
-                            size_t bytes)
+
+#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__)
+void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
+                     size_t bytes)
 {
 #ifdef USE_INTEL_SPEEDUP
     /* AVX2 is handled in wc_Poly1305Update. */
@@ -1127,7 +311,7 @@
     ctx->h[2] = h2;
 
 #else /* if not 64 bit then use 32 bit */
-    const word32 hibit = (ctx->finished) ? 0 : (1 << 24); /* 1 << 128 */
+    const word32 hibit = (ctx->finished) ? 0 : ((word32)1 << 24); /* 1 << 128 */
     word32 r0,r1,r2,r3,r4;
     word32 s1,s2,s3,s4;
     word32 h0,h1,h2,h3,h4;
@@ -1194,7 +378,7 @@
 #endif /* end of 64 bit cpu blocks or 32 bit cpu */
 }
 
-static void poly1305_block(Poly1305* ctx, const unsigned char *m)
+void poly1305_block(Poly1305* ctx, const unsigned char *m)
 {
 #ifdef USE_INTEL_SPEEDUP
     /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */
@@ -1203,11 +387,12 @@
     poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE);
 #endif
 }
+#endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */
 
-
+#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__)
 int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
 {
-#if defined(POLY130564)
+#if defined(POLY130564) && !defined(USE_INTEL_SPEEDUP)
     word64 t0,t1;
 #endif
 
@@ -1291,7 +476,6 @@
     return 0;
 }
 
-
 int wc_Poly1305Final(Poly1305* ctx, byte* mac)
 {
 #ifdef USE_INTEL_SPEEDUP
@@ -1419,10 +603,10 @@
     g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
     g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
     g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
-    g4 = h4 + c - (1 << 26);
+    g4 = h4 + c - ((word32)1 << 26);
 
     /* select h if h < p, or h + -p if h >= p */
-    mask = (g4 >> ((sizeof(word32) * 8) - 1)) - 1;
+    mask = ((word32)g4 >> ((sizeof(word32) * 8) - 1)) - 1;
     g0 &= mask;
     g1 &= mask;
     g2 &= mask;
@@ -1472,6 +656,7 @@
 
     return 0;
 }
+#endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */
 
 
 int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
@@ -1510,7 +695,7 @@
                 return 0;
 
             if (!ctx->started)
-                poly1305_calc_powers(ctx);
+                poly1305_calc_powers_avx2(ctx);
             poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
             ctx->leftover = 0;
         }
@@ -1520,7 +705,7 @@
             size_t want = bytes & ~(sizeof(ctx->buffer) - 1);
 
             if (!ctx->started)
-                poly1305_calc_powers(ctx);
+                poly1305_calc_powers_avx2(ctx);
             poly1305_blocks_avx2(ctx, m, want);
             m += want;
             bytes -= (word32)want;
@@ -1572,6 +757,56 @@
     return 0;
 }
 
+/*  Takes a Poly1305 struct that has a key loaded and pads the provided length
+    ctx        : Initialized Poly1305 struct to use
+    lenToPad   : Current number of bytes updated that needs padding to 16
+ */
+int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad)
+{
+    int ret = 0;
+    word32 paddingLen;
+    byte padding[WC_POLY1305_PAD_SZ - 1];
+
+    if (ctx == NULL) {
+        return BAD_FUNC_ARG;
+    }
+    if (lenToPad == 0) {
+        return 0; /* nothing needs to be done */
+    }
+
+    XMEMSET(padding, 0, sizeof(padding));
+
+    /* Pad length to 16 bytes */
+    paddingLen = -(int)lenToPad & (WC_POLY1305_PAD_SZ - 1);
+    if (paddingLen > 0) {
+        ret = wc_Poly1305Update(ctx, padding, paddingLen);
+    }
+    return ret;
+}
+
+/*  Takes a Poly1305 struct that has a key loaded and adds the AEAD length
+    encoding in 64-bit little endian
+    aadSz      : Size of the additional authentication data
+    dataSz     : Size of the plaintext or ciphertext
+ */
+int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz)
+{
+    int ret;
+    byte little64[16]; /* sizeof(word64) * 2 */
+
+    if (ctx == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    XMEMSET(little64, 0, sizeof(little64));
+
+    /* size of additional data and input data as little endian 64 bit types */
+    u32tole64(aadSz,  little64);
+    u32tole64(dataSz, little64 + 8);
+    ret = wc_Poly1305Update(ctx, little64, sizeof(little64));
+
+    return ret;
+}
 
 /*  Takes in an initialized Poly1305 struct that has a key loaded and creates
     a MAC (tag) using recent TLS AEAD padding scheme.
@@ -1588,11 +823,6 @@
                     byte* input, word32 sz, byte* tag, word32 tagSz)
 {
     int ret;
-    byte padding[WC_POLY1305_PAD_SZ - 1];
-    word32 paddingLen;
-    byte little64[16];
-
-    XMEMSET(padding, 0, sizeof(padding));
 
     /* sanity check on arguments */
     if (ctx == NULL || input == NULL || tag == NULL ||
@@ -1609,11 +839,9 @@
         if ((ret = wc_Poly1305Update(ctx, additional, addSz)) != 0) {
             return ret;
         }
-        paddingLen = -((int)addSz) & (WC_POLY1305_PAD_SZ - 1);
-        if (paddingLen) {
-            if ((ret = wc_Poly1305Update(ctx, padding, paddingLen)) != 0) {
-                return ret;
-            }
+        /* pad additional data */
+        if ((ret = wc_Poly1305_Pad(ctx, addSz)) != 0) {
+            return ret;
         }
     }
 
@@ -1621,19 +849,13 @@
     if ((ret = wc_Poly1305Update(ctx, input, sz)) != 0) {
         return ret;
     }
-    paddingLen = -((int)sz) & (WC_POLY1305_PAD_SZ - 1);
-    if (paddingLen) {
-        if ((ret = wc_Poly1305Update(ctx, padding, paddingLen)) != 0) {
-            return ret;
-        }
+    /* pad input data */
+    if ((ret = wc_Poly1305_Pad(ctx, sz)) != 0) {
+        return ret;
     }
 
-    /* size of additional data and input as little endian 64 bit types */
-    U32TO64(addSz, little64);
-    U32TO64(sz, little64 + 8);
-    ret = wc_Poly1305Update(ctx, little64, sizeof(little64));
-    if (ret)
-    {
+    /* encode size of AAD and input data as little endian 64 bit types */
+    if ((ret = wc_Poly1305_EncodeSizes(ctx, addSz, sz)) != 0) {
         return ret;
     }
 
@@ -1644,5 +866,4 @@
 
 }
 #endif /* HAVE_POLY1305 */
-
 
--- a/wolfcrypt/src/pwdbased.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/pwdbased.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pwdbased.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,6 +42,8 @@
 #endif
 
 
+#ifdef HAVE_PBKDF1
+
 /* PKCS#5 v1.5 with non standard extension to optionally derive the extra data (IV) */
 int wc_PBKDF1_ex(byte* key, int keyLen, byte* iv, int ivLen,
     const byte* passwd, int passwdLen, const byte* salt, int saltLen,
@@ -83,7 +85,7 @@
         return MEMORY_E;
 #endif
 
-    err = wc_HashInit(hash, hashT);
+    err = wc_HashInit_ex(hash, hashT, heap, INVALID_DEVID);
     if (err != 0) {
     #ifdef WOLFSSL_SMALL_STACK
         XFREE(hash, heap, DYNAMIC_TYPE_HASHCTX);
@@ -142,6 +144,8 @@
         }
     }
 
+    wc_HashFree(hash, hashT);
+
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(hash, heap, DYNAMIC_TYPE_HASHCTX);
 #endif
@@ -163,9 +167,12 @@
         passwd, pLen, salt, sLen, iterations, hashType, NULL);
 }
 
+#endif /* HAVE_PKCS5 */
 
-int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
-           int sLen, int iterations, int kLen, int hashType)
+#ifdef HAVE_PBKDF2
+
+int wc_PBKDF2_ex(byte* output, const byte* passwd, int pLen, const byte* salt,
+           int sLen, int iterations, int kLen, int hashType, void* heap, int devId)
 {
     word32 i = 1;
     int    hLen;
@@ -192,15 +199,17 @@
         return BAD_FUNC_ARG;
 
 #ifdef WOLFSSL_SMALL_STACK
-    buffer = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    buffer = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
     if (buffer == NULL)
         return MEMORY_E;
-    hmac = (Hmac*)XMALLOC(sizeof(Hmac), NULL, DYNAMIC_TYPE_HMAC);
-    if (buffer == NULL)
+    hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC);
+    if (hmac == NULL) {
+        XFREE(buffer, heap, DYNAMIC_TYPE_TMP_BUFFER);
         return MEMORY_E;
+    }
 #endif
 
-    ret = wc_HmacInit(hmac, NULL, INVALID_DEVID);
+    ret = wc_HmacInit(hmac, heap, devId);
     if (ret == 0) {
         /* use int hashType here, since HMAC FIPS uses the old unique value */
         ret = wc_HmacSetKey(hmac, hashType, passwd, pLen);
@@ -254,13 +263,24 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(hmac, NULL, DYNAMIC_TYPE_HMAC);
+    XFREE(buffer, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(hmac, heap, DYNAMIC_TYPE_HMAC);
 #endif
 
     return ret;
 }
 
+int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
+           int sLen, int iterations, int kLen, int hashType)
+{
+    return wc_PBKDF2_ex(output, passwd, pLen, salt, sLen, iterations, kLen,
+        hashType, NULL, INVALID_DEVID);
+}
+
+#endif /* HAVE_PBKDF2 */
+
+#ifdef HAVE_PKCS12
+
 /* helper for PKCS12_PBKDF(), does hash operation */
 static int DoPKCS12Hash(int hashType, byte* buffer, word32 totalLen,
                  byte* Ai, word32 u, int iterations)
@@ -308,6 +328,8 @@
             ret = wc_HashFinal(hash, hashT, Ai);
     }
 
+    wc_HashFree(hash, hashT);
+
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(hash, NULL, DYNAMIC_TYPE_HASHCTX);
 #endif
@@ -462,7 +484,7 @@
             else {
                 if (outSz > (int)v) {
                     /* take off MSB */
-                    byte  tmp[129];
+                    byte  tmp[WC_MAX_BLOCK_SIZE + 1];
                     ret = mp_to_unsigned_bin(&res, tmp);
                     XMEMCPY(I + i, tmp + 1, v);
                 }
@@ -496,6 +518,8 @@
     return ret;
 }
 
+#endif /* HAVE_PKCS12 */
+
 #ifdef HAVE_SCRYPT
 /* Rotate the 32-bit value a by b bits to the left.
  *
@@ -674,7 +698,7 @@
  * parallel   The number of parallel mix operations to perform.
  *            (Note: this implementation does not use threads.)
  * dkLen      The length of the derived key in bytes.
- * returns BAD_FUNC_ARG when: parallel not 1, blockSize is too large for cost.
+ * returns BAD_FUNC_ARG when: blockSize is too large for cost.
  */
 int wc_scrypt(byte* output, const byte* passwd, int passLen,
               const byte* salt, int saltLen, int cost, int blockSize,
@@ -691,7 +715,7 @@
     if (blockSize > 8)
         return BAD_FUNC_ARG;
 
-    if (cost < 1 || cost >= 128 * blockSize / 8)
+    if (cost < 1 || cost >= 128 * blockSize / 8 || parallel < 1 || dkLen < 1)
         return BAD_FUNC_ARG;
 
     bSz = 128 * blockSize;
@@ -731,10 +755,42 @@
 
     return ret;
 }
-#endif
 
-#undef WC_MAX_DIGEST_SIZE
+/* Generates an key derived from a password and salt using a memory hard
+ * algorithm.
+ * Implements RFC 7914: scrypt PBKDF.
+ *
+ * output      Derived key.
+ * passwd      Password to derive key from.
+ * passLen     Length of the password.
+ * salt        Key specific data.
+ * saltLen     Length of the salt data.
+ * iterations  Number of iterations to perform. Range: 1 << (1..(128*r/8-1))
+ * blockSize   Number of 128 byte octets in a working block.
+ * parallel    Number of parallel mix operations to perform.
+ *             (Note: this implementation does not use threads.)
+ * dkLen       Length of the derived key in bytes.
+ * returns BAD_FUNC_ARG when: iterations is not a power of 2 or blockSize is too
+ *                            large for iterations.
+ */
+int wc_scrypt_ex(byte* output, const byte* passwd, int passLen,
+                 const byte* salt, int saltLen, word32 iterations,
+                 int blockSize, int parallel, int dkLen)
+{
+    int cost;
+
+    /* Iterations must be a power of 2. */
+    if ((iterations & (iterations - 1)) != 0)
+        return BAD_FUNC_ARG;
+
+    for (cost = -1; iterations != 0; cost++) {
+        iterations >>= 1;
+    }
+
+    return wc_scrypt(output, passwd, passLen, salt, saltLen, cost, blockSize,
+                     parallel, dkLen);
+}
+#endif /* HAVE_SCRYPT */
 
 #endif /* NO_PWDBASED */
-
 
--- a/wolfcrypt/src/rabbit.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/rabbit.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rabbit.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/random.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/random.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* random.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -33,7 +33,7 @@
 */
 
 #if defined(HAVE_FIPS) && \
-	defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+    defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
 
     /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
     #define FIPS_NO_WRAPPERS
@@ -89,13 +89,12 @@
         return FreeRng_fips(rng);
     }
 
-    int wc_RNG_HealthTest(int reseed,
-                                        const byte* entropyA, word32 entropyASz,
-                                        const byte* entropyB, word32 entropyBSz,
-                                        byte* output, word32 outputSz)
+    int wc_RNG_HealthTest(int reseed, const byte* seedA, word32 seedASz,
+                                      const byte* seedB, word32 seedBSz,
+                                      byte* output, word32 outputSz)
     {
-        return RNG_HealthTest_fips(reseed, entropyA, entropyASz,
-                              entropyB, entropyBSz, output, outputSz);
+        return RNG_HealthTest_fips(reseed, seedA, seedASz,
+                              seedB, seedBSz, output, outputSz);
    }
 #endif /* HAVE_HASHDRBG */
 
@@ -105,6 +104,10 @@
 
 #include <wolfssl/wolfcrypt/sha256.h>
 
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
 #ifdef NO_INLINE
     #include <wolfssl/wolfcrypt/misc.h>
 #else
@@ -131,7 +134,12 @@
     #include "fsl_trng.h"
 #elif defined(FREESCALE_KSDK_2_0_RNGA)
     #include "fsl_rnga.h"
-
+#elif defined(WOLFSSL_WICED)
+    #include "wiced_crypto.h"
+#elif defined(WOLFSSL_NETBURNER)
+    #include <predef.h>
+    #include <basictypes.h>
+    #include <random.h>
 #elif defined(NO_DEV_RANDOM)
 #elif defined(CUSTOM_RAND_GENERATE)
 #elif defined(CUSTOM_RAND_GENERATE_BLOCK)
@@ -141,9 +149,13 @@
 #elif defined(WOLFSSL_IAR_ARM)
 #elif defined(WOLFSSL_ROWLEY_ARM)
 #elif defined(WOLFSSL_EMBOS)
+#elif defined(WOLFSSL_DEOS)
 #elif defined(MICRIUM)
 #elif defined(WOLFSSL_NUCLEUS)
 #elif defined(WOLFSSL_PB)
+#elif defined(WOLFSSL_ZEPHYR)
+#elif defined(WOLFSSL_TELIT_M2MB)
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_TRNG)
 #else
     /* include headers that may be needed to get good seed */
     #include <fcntl.h>
@@ -177,9 +189,68 @@
 #define OUTPUT_BLOCK_LEN  (WC_SHA256_DIGEST_SIZE)
 #define MAX_REQUEST_LEN   (0x10000)
 #define RESEED_INTERVAL   WC_RESEED_INTERVAL
-#define SECURITY_STRENGTH (2048)
-#define ENTROPY_SZ        (SECURITY_STRENGTH/8)
-#define MAX_ENTROPY_SZ    (ENTROPY_SZ + ENTROPY_SZ/2)
+
+
+/* For FIPS builds, the user should not be adjusting the values. */
+#if defined(HAVE_FIPS) && \
+    defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+    #if defined(RNG_SECURITY_STRENGTH) \
+            || defined(ENTROPY_SCALE_FACTOR) \
+            || defined(SEED_BLOCK_SZ)
+
+        #error "Do not change the RNG parameters for FIPS builds."
+    #endif
+#endif
+
+
+/* The security strength for the RNG is the target number of bits of
+ * entropy you are looking for in a seed. */
+#ifndef RNG_SECURITY_STRENGTH
+    #if defined(HAVE_FIPS) && \
+        defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+        /* SHA-256 requires a minimum of 256-bits of entropy. The goal
+         * of 1024 will provide 4 times that. */
+        #define RNG_SECURITY_STRENGTH (1024)
+    #else
+        /* If not using FIPS or using old FIPS, set the number down a bit.
+         * More is better, but more is also slower. */
+        #define RNG_SECURITY_STRENGTH (256)
+    #endif
+#endif
+
+#ifndef ENTROPY_SCALE_FACTOR
+    /* The entropy scale factor should be the whole number inverse of the
+     * minimum bits of entropy per bit of NDRNG output. */
+    #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
+        /* The value of 2 applies to Intel's RDSEED which provides about
+         * 0.5 bits minimum of entropy per bit. */
+        #define ENTROPY_SCALE_FACTOR 2
+    #else
+        /* Setting the default to 1. */
+        #define ENTROPY_SCALE_FACTOR 1
+    #endif
+#endif
+
+#ifndef SEED_BLOCK_SZ
+    /* The seed block size, is the size of the output of the underlying NDRNG.
+     * This value is used for testing the output of the NDRNG. */
+    #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
+        /* RDSEED outputs in blocks of 64-bits. */
+        #define SEED_BLOCK_SZ sizeof(word64)
+    #else
+        /* Setting the default to 4. */
+        #define SEED_BLOCK_SZ 4
+    #endif
+#endif
+
+#define SEED_SZ        (RNG_SECURITY_STRENGTH*ENTROPY_SCALE_FACTOR/8)
+
+/* The maximum seed size will be the seed size plus a seed block for the
+ * test, and an additional half of the seed size. This additional half
+ * is in case the user does not supply a nonce. A nonce will be obtained
+ * from the NDRNG. */
+#define MAX_SEED_SZ    (SEED_SZ + SEED_SZ/2 + SEED_BLOCK_SZ)
+
 
 /* Internal return codes */
 #define DRBG_SUCCESS      0
@@ -208,13 +279,13 @@
     drbgInitV
 };
 
-
+/* NOTE: if DRBG struct is changed please update random.h drbg_data size */
 typedef struct DRBG {
     word32 reseedCtr;
     word32 lastBlock;
     byte V[DRBG_SEED_LEN];
     byte C[DRBG_SEED_LEN];
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
     void* heap;
     int devId;
 #endif
@@ -243,10 +314,16 @@
 #else
     wc_Sha256 sha[1];
 #endif
+#ifdef WC_ASYNC_ENABLE_SHA256
     DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap);
+    if (digest == NULL)
+        return MEMORY_E;
+#else
+    byte digest[WC_SHA256_DIGEST_SIZE];
+#endif
 
     (void)drbg;
-#ifdef WOLFSSL_ASYNC_CRYPT
+#ifdef WC_ASYNC_ENABLE_SHA256
     if (digest == NULL)
         return DRBG_FAILURE;
 #endif
@@ -259,7 +336,7 @@
 
     for (i = 0, ctr = 1; i < len; i++, ctr++) {
 #ifndef WOLFSSL_SMALL_STACK_CACHE
-    #ifdef WOLFSSL_ASYNC_CRYPT
+    #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
         ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId);
     #else
         ret = wc_InitSha256(sha);
@@ -304,23 +381,27 @@
 
     ForceZero(digest, WC_SHA256_DIGEST_SIZE);
 
+#ifdef WC_ASYNC_ENABLE_SHA256
     FREE_VAR(digest, drbg->heap);
+#endif
 
     return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
 
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
-static int Hash_DRBG_Reseed(DRBG* drbg, const byte* entropy, word32 entropySz)
+static int Hash_DRBG_Reseed(DRBG* drbg, const byte* seed, word32 seedSz)
 {
-    byte seed[DRBG_SEED_LEN];
-
-    if (Hash_df(drbg, seed, sizeof(seed), drbgReseed, drbg->V, sizeof(drbg->V),
-                                          entropy, entropySz) != DRBG_SUCCESS) {
+    byte newV[DRBG_SEED_LEN];
+
+    XMEMSET(newV, 0, DRBG_SEED_LEN);
+
+    if (Hash_df(drbg, newV, sizeof(newV), drbgReseed,
+                drbg->V, sizeof(drbg->V), seed, seedSz) != DRBG_SUCCESS) {
         return DRBG_FAILURE;
     }
 
-    XMEMCPY(drbg->V, seed, sizeof(drbg->V));
-    ForceZero(seed, sizeof(seed));
+    XMEMCPY(drbg->V, newV, sizeof(drbg->V));
+    ForceZero(newV, sizeof(newV));
 
     if (Hash_df(drbg, drbg->C, sizeof(drbg->C), drbgInitC, drbg->V,
                                     sizeof(drbg->V), NULL, 0) != DRBG_SUCCESS) {
@@ -334,13 +415,13 @@
 }
 
 /* Returns: DRBG_SUCCESS and DRBG_FAILURE or BAD_FUNC_ARG on fail */
-int wc_RNG_DRBG_Reseed(WC_RNG* rng, const byte* entropy, word32 entropySz)
+int wc_RNG_DRBG_Reseed(WC_RNG* rng, const byte* seed, word32 seedSz)
 {
-    if (rng == NULL || entropy == NULL) {
+    if (rng == NULL || seed == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    return Hash_DRBG_Reseed(rng->drbg, entropy, entropySz);
+    return Hash_DRBG_Reseed(rng->drbg, seed, seedSz);
 }
 
 static WC_INLINE void array_add_one(byte* data, word32 dataSz)
@@ -367,7 +448,13 @@
 #else
     wc_Sha256 sha[1];
 #endif
+#ifdef WC_ASYNC_ENABLE_SHA256
     DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap);
+    if (digest == NULL)
+        return MEMORY_E;
+#else
+    byte digest[WC_SHA256_DIGEST_SIZE];
+#endif
 
     /* Special case: outSz is 0 and out is NULL. wc_Generate a block to save for
      * the continuous test. */
@@ -379,7 +466,7 @@
     XMEMCPY(data, V, sizeof(data));
     for (i = 0; i < len; i++) {
 #ifndef WOLFSSL_SMALL_STACK_CACHE
-    #ifdef WOLFSSL_ASYNC_CRYPT
+    #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
         ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId);
     #else
         ret = wc_InitSha256(sha);
@@ -427,7 +514,9 @@
     }
     ForceZero(data, sizeof(data));
 
+#ifdef WC_ASYNC_ENABLE_SHA256
     FREE_VAR(digest, drbg->heap);
+#endif
 
     return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
@@ -469,14 +558,20 @@
     if (drbg->reseedCtr == RESEED_INTERVAL) {
         return DRBG_NEED_RESEED;
     } else {
+    #ifdef WC_ASYNC_ENABLE_SHA256
         DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap);
+        if (digest == NULL)
+            return MEMORY_E;
+    #else
+        byte digest[WC_SHA256_DIGEST_SIZE];
+    #endif
         type = drbgGenerateH;
         reseedCtr = drbg->reseedCtr;
 
         ret = Hash_gen(drbg, out, outSz, drbg->V);
         if (ret == DRBG_SUCCESS) {
 #ifndef WOLFSSL_SMALL_STACK_CACHE
-        #ifdef WOLFSSL_ASYNC_CRYPT
+        #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
             ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId);
         #else
             ret = wc_InitSha256(sha);
@@ -506,7 +601,9 @@
             drbg->reseedCtr++;
         }
         ForceZero(digest, WC_SHA256_DIGEST_SIZE);
+    #ifdef WC_ASYNC_ENABLE_SHA256
         FREE_VAR(digest, drbg->heap);
+    #endif
     }
 
     return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
@@ -517,10 +614,10 @@
                                              const byte* nonce, word32 nonceSz,
                                              void* heap, int devId)
 {
-    int ret = DRBG_FAILURE;
+    int ret;
 
     XMEMSET(drbg, 0, sizeof(DRBG));
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
     drbg->heap = heap;
     drbg->devId = devId;
 #else
@@ -529,7 +626,7 @@
 #endif
 
 #ifdef WOLFSSL_SMALL_STACK_CACHE
-    #ifdef WOLFSSL_ASYNC_CRYPT
+    #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
         ret = wc_InitSha256_ex(&drbg->sha256, drbg->heap, drbg->devId);
     #else
         ret = wc_InitSha256(&drbg->sha256);
@@ -548,6 +645,9 @@
         drbg->matchCount = 0;
         ret = DRBG_SUCCESS;
     }
+    else {
+        ret = DRBG_FAILURE;
+    }
 
     return ret;
 }
@@ -570,6 +670,29 @@
 
     return (compareSum == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
+
+
+int wc_RNG_TestSeed(const byte* seed, word32 seedSz)
+{
+    int ret = DRBG_SUCCESS;
+
+    /* Check the seed for duplicate words. */
+    word32 seedIdx = 0;
+    word32 scratchSz = min(SEED_BLOCK_SZ, seedSz - SEED_BLOCK_SZ);
+
+    while (seedIdx < seedSz - SEED_BLOCK_SZ) {
+        if (ConstantCompare(seed + seedIdx,
+                            seed + seedIdx + scratchSz,
+                            scratchSz) == 0) {
+
+            ret = DRBG_CONT_FAILURE;
+        }
+        seedIdx += SEED_BLOCK_SZ;
+        scratchSz = min(SEED_BLOCK_SZ, (seedSz - seedIdx));
+    }
+
+    return ret;
+}
 #endif /* HAVE_HASHDRBG */
 /* End NIST DRBG Code */
 
@@ -579,7 +702,7 @@
 {
     int ret = RNG_FAILURE_E;
 #ifdef HAVE_HASHDRBG
-    word32 entropySz = ENTROPY_SZ;
+    word32 seedSz = SEED_SZ + SEED_BLOCK_SZ;
 #endif
 
     (void)nonce;
@@ -596,8 +719,11 @@
 #else
     rng->heap = heap;
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
     rng->devId = devId;
+    #if defined(WOLF_CRYPTO_CB)
+        rng->seed.devId = devId;
+    #endif
 #else
     (void)devId;
 #endif
@@ -628,31 +754,60 @@
 #endif
 
 #ifdef CUSTOM_RAND_GENERATE_BLOCK
-	ret = 0; /* success */
+    ret = 0; /* success */
 #else
 #ifdef HAVE_HASHDRBG
     if (nonceSz == 0)
-        entropySz = MAX_ENTROPY_SZ;
+        seedSz = MAX_SEED_SZ;
 
     if (wc_RNG_HealthTestLocal(0) == 0) {
-        DECLARE_VAR(entropy, byte, MAX_ENTROPY_SZ, rng->heap);
-
+    #ifdef WC_ASYNC_ENABLE_SHA256
+        DECLARE_VAR(seed, byte, MAX_SEED_SZ, rng->heap);
+        if (seed == NULL)
+            return MEMORY_E;
+    #else
+        byte seed[MAX_SEED_SZ];
+    #endif
+
+#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
         rng->drbg =
                 (struct DRBG*)XMALLOC(sizeof(DRBG), rng->heap,
                                                           DYNAMIC_TYPE_RNG);
+#else
+        /* compile-time validation of drbg_data size */
+        typedef char drbg_data_test[sizeof(rng->drbg_data) >=
+                sizeof(struct DRBG) ? 1 : -1];
+        (void)sizeof(drbg_data_test);
+        rng->drbg = (struct DRBG*)rng->drbg_data;
+#endif
+
         if (rng->drbg == NULL) {
             ret = MEMORY_E;
         }
-        else if (wc_GenerateSeed(&rng->seed, entropy, entropySz) == 0 &&
-                 Hash_DRBG_Instantiate(rng->drbg, entropy, entropySz,
-                            nonce, nonceSz, rng->heap, devId) == DRBG_SUCCESS) {
-            ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
+        else {
+            ret = wc_GenerateSeed(&rng->seed, seed, seedSz);
+            if (ret != 0)
+                ret = DRBG_FAILURE;
+            else
+                ret = wc_RNG_TestSeed(seed, seedSz);
+
+            if (ret == DRBG_SUCCESS)
+                 ret = Hash_DRBG_Instantiate(rng->drbg,
+                            seed + SEED_BLOCK_SZ, seedSz - SEED_BLOCK_SZ,
+                            nonce, nonceSz, rng->heap, devId);
+
+            if (ret != DRBG_SUCCESS) {
+            #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+                XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG);
+            #endif
+                rng->drbg = NULL;
+            }
         }
-        else
-            ret = DRBG_FAILURE;
-
-        ForceZero(entropy, entropySz);
-        FREE_VAR(entropy, rng->heap);
+
+        ForceZero(seed, seedSz);
+    #ifdef WC_ASYNC_ENABLE_SHA256
+        FREE_VAR(seed, rng->heap);
+    #endif
     }
     else
         ret = DRBG_CONT_FAILURE;
@@ -679,6 +834,38 @@
 }
 
 
+WOLFSSL_ABI
+WC_RNG* wc_rng_new(byte* nonce, word32 nonceSz, void* heap)
+{
+    WC_RNG* rng;
+
+    rng = (WC_RNG*)XMALLOC(sizeof(WC_RNG), heap, DYNAMIC_TYPE_RNG);
+    if (rng) {
+        int error = _InitRng(rng, nonce, nonceSz, heap, INVALID_DEVID) != 0;
+        if (error) {
+            XFREE(rng, heap, DYNAMIC_TYPE_RNG);
+            rng = NULL;
+        }
+    }
+
+    return rng;
+}
+
+
+WOLFSSL_ABI
+void wc_rng_free(WC_RNG* rng)
+{
+    if (rng) {
+        void* heap = rng->heap;
+
+        wc_FreeRng(rng);
+        ForceZero(rng, sizeof(WC_RNG));
+        XFREE(rng, heap, DYNAMIC_TYPE_RNG);
+        (void)heap;
+    }
+}
+
+
 int wc_InitRng(WC_RNG* rng)
 {
     return _InitRng(rng, NULL, 0, NULL, INVALID_DEVID);
@@ -705,6 +892,7 @@
 
 
 /* place a generated block in output */
+WOLFSSL_ABI
 int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
 {
     int ret;
@@ -712,6 +900,15 @@
     if (rng == NULL || output == NULL)
         return BAD_FUNC_ARG;
 
+#ifdef WOLF_CRYPTO_CB
+    if (rng->devId != INVALID_DEVID) {
+        ret = wc_CryptoCb_RandomBlock(rng, output, sz);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
+    }
+#endif
+
 #ifdef HAVE_INTEL_RDRAND
     if (IS_INTEL_RDRAND(intel_flags))
         return wc_GenerateRand_IntelRD(NULL, output, sz);
@@ -722,7 +919,7 @@
         /* these are blocking */
     #ifdef HAVE_CAVIUM
         return NitroxRngGenerateBlock(rng, output, sz);
-    #elif defined(HAVE_INTEL_QA)
+    #elif defined(HAVE_INTEL_QA) && defined(QAT_ENABLE_RNG)
         return IntelQaDrbg(&rng->asyncDev, output, sz);
     #else
         /* simulator not supported */
@@ -745,20 +942,22 @@
     ret = Hash_DRBG_Generate(rng->drbg, output, sz);
     if (ret == DRBG_NEED_RESEED) {
         if (wc_RNG_HealthTestLocal(1) == 0) {
-            byte entropy[ENTROPY_SZ];
-
-            if (wc_GenerateSeed(&rng->seed, entropy, ENTROPY_SZ) == 0 &&
-                Hash_DRBG_Reseed(rng->drbg, entropy, ENTROPY_SZ)
-                                                              == DRBG_SUCCESS) {
-
-                ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
-                if (ret == DRBG_SUCCESS)
-                    ret = Hash_DRBG_Generate(rng->drbg, output, sz);
-            }
+            byte newSeed[SEED_SZ + SEED_BLOCK_SZ];
+
+            ret = wc_GenerateSeed(&rng->seed, newSeed,
+                                  SEED_SZ + SEED_BLOCK_SZ);
+            if (ret != 0)
+                ret = DRBG_FAILURE;
             else
-                ret = DRBG_FAILURE;
-
-            ForceZero(entropy, ENTROPY_SZ);
+                ret = wc_RNG_TestSeed(newSeed, SEED_SZ + SEED_BLOCK_SZ);
+
+            if (ret == DRBG_SUCCESS)
+                ret = Hash_DRBG_Reseed(rng->drbg, newSeed + SEED_BLOCK_SZ,
+                                       SEED_SZ);
+            if (ret == DRBG_SUCCESS)
+                ret = Hash_DRBG_Generate(rng->drbg, output, sz);
+
+            ForceZero(newSeed, sizeof(newSeed));
         }
         else
             ret = DRBG_CONT_FAILURE;
@@ -809,7 +1008,9 @@
         if (Hash_DRBG_Uninstantiate(rng->drbg) != DRBG_SUCCESS)
             ret = RNG_FAILURE_E;
 
+    #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
         XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG);
+    #endif
         rng->drbg = NULL;
     }
 
@@ -820,21 +1021,20 @@
 }
 
 #ifdef HAVE_HASHDRBG
-int wc_RNG_HealthTest(int reseed, const byte* entropyA, word32 entropyASz,
-                                  const byte* entropyB, word32 entropyBSz,
+int wc_RNG_HealthTest(int reseed, const byte* seedA, word32 seedASz,
+                                  const byte* seedB, word32 seedBSz,
                                   byte* output, word32 outputSz)
 {
     return wc_RNG_HealthTest_ex(reseed, NULL, 0,
-                                entropyA, entropyASz,
-                                entropyB, entropyBSz,
+                                seedA, seedASz, seedB, seedBSz,
                                 output, outputSz,
                                 NULL, INVALID_DEVID);
 }
 
 
 int wc_RNG_HealthTest_ex(int reseed, const byte* nonce, word32 nonceSz,
-                                  const byte* entropyA, word32 entropyASz,
-                                  const byte* entropyB, word32 entropyBSz,
+                                  const byte* seedA, word32 seedASz,
+                                  const byte* seedB, word32 seedBSz,
                                   byte* output, word32 outputSz,
                                   void* heap, int devId)
 {
@@ -844,11 +1044,11 @@
     DRBG  drbg_var;
 #endif
 
-    if (entropyA == NULL || output == NULL) {
+    if (seedA == NULL || output == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    if (reseed != 0 && entropyB == NULL) {
+    if (reseed != 0 && seedB == NULL) {
         return BAD_FUNC_ARG;
     }
 
@@ -857,7 +1057,7 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    drbg = (struct DRBG*)XMALLOC(sizeof(DRBG), NULL, DYNAMIC_TYPE_RNG);
+    drbg = (DRBG*)XMALLOC(sizeof(DRBG), NULL, DYNAMIC_TYPE_RNG);
     if (drbg == NULL) {
         return MEMORY_E;
     }
@@ -865,17 +1065,22 @@
     drbg = &drbg_var;
 #endif
 
-    if (Hash_DRBG_Instantiate(drbg, entropyA, entropyASz, nonce, nonceSz,
+    if (Hash_DRBG_Instantiate(drbg, seedA, seedASz, nonce, nonceSz,
                               heap, devId) != 0) {
         goto exit_rng_ht;
     }
 
     if (reseed) {
-        if (Hash_DRBG_Reseed(drbg, entropyB, entropyBSz) != 0) {
+        if (Hash_DRBG_Reseed(drbg, seedB, seedBSz) != 0) {
             goto exit_rng_ht;
         }
     }
 
+    /* This call to generate is prescribed by the NIST DRBGVS
+     * procedure. The results are thrown away. The known
+     * answer test checks the second block of DRBG out of
+     * the generator to ensure the internal state is updated
+     * as expected. */
     if (Hash_DRBG_Generate(drbg, output, outputSz) != 0) {
         goto exit_rng_ht;
     }
@@ -902,14 +1107,14 @@
 }
 
 
-const byte entropyA[] = {
+const byte seedA[] = {
     0x63, 0x36, 0x33, 0x77, 0xe4, 0x1e, 0x86, 0x46, 0x8d, 0xeb, 0x0a, 0xb4,
     0xa8, 0xed, 0x68, 0x3f, 0x6a, 0x13, 0x4e, 0x47, 0xe0, 0x14, 0xc7, 0x00,
     0x45, 0x4e, 0x81, 0xe9, 0x53, 0x58, 0xa5, 0x69, 0x80, 0x8a, 0xa3, 0x8f,
     0x2a, 0x72, 0xa6, 0x23, 0x59, 0x91, 0x5a, 0x9f, 0x8a, 0x04, 0xca, 0x68
 };
 
-const byte reseedEntropyA[] = {
+const byte reseedSeedA[] = {
     0xe6, 0x2b, 0x8a, 0x8e, 0xe8, 0xf1, 0x41, 0xb6, 0x98, 0x05, 0x66, 0xe3,
     0xbf, 0xe3, 0xc0, 0x49, 0x03, 0xda, 0xd4, 0xac, 0x2c, 0xdf, 0x9f, 0x22,
     0x80, 0x01, 0x0a, 0x67, 0x39, 0xbc, 0x83, 0xd3
@@ -929,7 +1134,7 @@
     0xa1, 0x80, 0x18, 0x3a, 0x07, 0xdf, 0xae, 0x17
 };
 
-const byte entropyB[] = {
+const byte seedB[] = {
     0xa6, 0x5a, 0xd0, 0xf3, 0x45, 0xdb, 0x4e, 0x0e, 0xff, 0xe8, 0x75, 0xc3,
     0xa2, 0xe7, 0x1f, 0x42, 0xc7, 0x12, 0x9d, 0x62, 0x0f, 0xf5, 0xc1, 0x19,
     0xa9, 0xef, 0x55, 0xf0, 0x51, 0x85, 0xe0, 0xfb, /* nonce next */
@@ -970,8 +1175,8 @@
 #endif
 
     if (reseed) {
-        ret = wc_RNG_HealthTest(1, entropyA, sizeof(entropyA),
-                                reseedEntropyA, sizeof(reseedEntropyA),
+        ret = wc_RNG_HealthTest(1, seedA, sizeof(seedA),
+                                reseedSeedA, sizeof(reseedSeedA),
                                 check, RNG_HEALTH_TEST_CHECK_SIZE);
         if (ret == 0) {
             if (ConstantCompare(check, outputA,
@@ -980,7 +1185,7 @@
         }
     }
     else {
-        ret = wc_RNG_HealthTest(0, entropyB, sizeof(entropyB),
+        ret = wc_RNG_HealthTest(0, seedB, sizeof(seedB),
                                 NULL, 0,
                                 check, RNG_HEALTH_TEST_CHECK_SIZE);
         if (ret == 0) {
@@ -990,13 +1195,13 @@
         }
 
         /* The previous test cases use a large seed instead of a seed and nonce.
-         * entropyB is actually from a test case with a seed and nonce, and
+         * seedB is actually from a test case with a seed and nonce, and
          * just concatenates them. The pivot point between seed and nonce is
          * byte 32, feed them into the health test separately. */
         if (ret == 0) {
             ret = wc_RNG_HealthTest_ex(0,
-                                    entropyB + 32, sizeof(entropyB) - 32,
-                                    entropyB, 32,
+                                    seedB + 32, sizeof(seedB) - 32,
+                                    seedB, 32,
                                     NULL, 0,
                                     check, RNG_HEALTH_TEST_CHECK_SIZE,
                                     NULL, INVALID_DEVID);
@@ -1333,7 +1538,7 @@
 
 int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
 {
-	int ret = !SGX_SUCCESS;
+    int ret = !SGX_SUCCESS;
 	int i, read_max = 10;
 
 	for (i = 0; i < read_max && ret != SGX_SUCCESS; i++) {
@@ -1348,6 +1553,30 @@
 
 int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
 {
+#ifdef WOLF_CRYPTO_CB
+    int ret;
+
+    if (os != NULL && os->devId != INVALID_DEVID) {
+        ret = wc_CryptoCb_RandomSeed(os, output, sz);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
+    }
+#endif
+
+    #ifdef HAVE_INTEL_RDSEED
+        if (IS_INTEL_RDSEED(intel_flags)) {
+             if (!wc_GenerateSeed_IntelRD(NULL, output, sz)) {
+                 /* success, we're done */
+                 return 0;
+             }
+        #ifdef FORCE_FAILURE_RDSEED
+             /* don't fall back to CryptoAPI */
+             return READ_RAN_E;
+        #endif
+        }
+    #endif /* HAVE_INTEL_RDSEED */
+
     if(!CryptAcquireContext(&os->handle, 0, 0, PROV_RSA_FULL,
                             CRYPT_VERIFYCONTEXT))
         return WINCRYPT_E;
@@ -1366,16 +1595,13 @@
 #include "rtprand.h"   /* rtp_rand () */
 #include "rtptime.h"   /* rtp_get_system_msec() */
 
-
 int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
 {
-    int i;
+    word32 i;
+
     rtp_srand(rtp_get_system_msec());
-
     for (i = 0; i < sz; i++ ) {
         output[i] = rtp_rand() % 256;
-        if ( (i % 8) == 7)
-            rtp_srand(rtp_get_system_msec());
     }
 
     return 0;
@@ -1385,7 +1611,12 @@
 #elif defined(MICROCHIP_PIC32)
 
     #ifdef MICROCHIP_MPLAB_HARMONY
-        #define PIC32_SEED_COUNT _CP0_GET_COUNT
+        #ifdef MICROCHIP_MPLAB_HARMONY_3
+            #include "system/time/sys_time.h"
+            #define PIC32_SEED_COUNT SYS_TIME_CounterGet
+        #else
+            #define PIC32_SEED_COUNT _CP0_GET_COUNT
+        #endif
     #else
         #if !defined(WOLFSSL_MICROCHIP_PIC32MZ)
             #include <peripheral/timer.h>
@@ -1424,10 +1655,11 @@
             RNGCONbits.PLEN = 0x40;
             RNGCONbits.PRNGEN = 1;
             for (i=0; i<5; i++) { /* wait for RNGNUMGEN ready */
-                volatile int x;
+                volatile int x, y;
                 x = RNGNUMGEN1;
-                x = RNGNUMGEN2;
+                y = RNGNUMGEN2;
                 (void)x;
+                (void)y;
             }
             do {
                 rnd32[0] = RNGNUMGEN1;
@@ -1585,39 +1817,75 @@
 
 #elif defined(STM32_RNG)
      /* Generate a RNG seed using the hardware random number generator
-      * on the STM32F2/F4/F7. */
+      * on the STM32F2/F4/F7/L4. */
 
     #ifdef WOLFSSL_STM32_CUBEMX
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
+        int ret;
         RNG_HandleTypeDef hrng;
-        int i;
+        word32 i = 0;
         (void)os;
 
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+
         /* enable RNG clock source */
         __HAL_RCC_RNG_CLK_ENABLE();
 
         /* enable RNG peripheral */
+        XMEMSET(&hrng, 0, sizeof(hrng));
         hrng.Instance = RNG;
         HAL_RNG_Init(&hrng);
 
-        for (i = 0; i < (int)sz; i++) {
-            /* get value */
-            output[i] = (byte)HAL_RNG_GetRandomNumber(&hrng);
+        while (i < sz) {
+            /* If not aligned or there is odd/remainder */
+            if( (i + sizeof(word32)) > sz ||
+                ((wolfssl_word)&output[i] % sizeof(word32)) != 0
+            ) {
+                /* Single byte at a time */
+                uint32_t tmpRng = 0;
+                if (HAL_RNG_GenerateRandomNumber(&hrng, &tmpRng) != HAL_OK) {
+                    wolfSSL_CryptHwMutexUnLock();
+                    return RAN_BLOCK_E;
+                }
+                output[i++] = (byte)tmpRng;
+            }
+            else {
+                /* Use native 32 instruction */
+                if (HAL_RNG_GenerateRandomNumber(&hrng, (uint32_t*)&output[i]) != HAL_OK) {
+                    wolfSSL_CryptHwMutexUnLock();
+                    return RAN_BLOCK_E;
+                }
+                i += sizeof(word32);
+            }
         }
 
+        wolfSSL_CryptHwMutexUnLock();
+
         return 0;
     }
-    #elif defined(WOLFSSL_STM32F427_RNG)
+    #elif defined(WOLFSSL_STM32F427_RNG) || defined(WOLFSSL_STM32_RNG_NOLIB)
 
     /* Generate a RNG seed using the hardware RNG on the STM32F427
      * directly, following steps outlined in STM32F4 Reference
      * Manual (Chapter 24) for STM32F4xx family. */
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
-        int i;
+        int ret;
+        word32 i;
         (void)os;
 
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+
+        /* enable RNG peripheral clock */
+        RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN;
+
         /* enable RNG interrupt, set IE bit in RNG->CR register */
         RNG->CR |= RNG_CR_IE;
 
@@ -1627,10 +1895,12 @@
 
         /* verify no errors, make sure SEIS and CEIS bits are 0
          * in RNG->SR register */
-        if (RNG->SR & (RNG_SR_SECS | RNG_SR_CECS))
+        if (RNG->SR & (RNG_SR_SECS | RNG_SR_CECS)) {
+            wolfSSL_CryptHwMutexUnLock();
             return RNG_FAILURE_E;
-
-        for (i = 0; i < (int)sz; i++) {
+        }
+
+        for (i = 0; i < sz; i++) {
             /* wait until RNG number is ready */
             while ((RNG->SR & RNG_SR_DRDY) == 0) { }
 
@@ -1638,6 +1908,8 @@
             output[i] = RNG->DR;
         }
 
+        wolfSSL_CryptHwMutexUnLock();
+
         return 0;
     }
 
@@ -1646,9 +1918,15 @@
     /* Generate a RNG seed using the STM32 Standard Peripheral Library */
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
-        int i;
+        int ret;
+        word32 i;
         (void)os;
 
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+
         /* enable RNG clock source */
         RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_RNG, ENABLE);
 
@@ -1659,10 +1937,12 @@
         RNG_Cmd(ENABLE);
 
         /* verify no errors with RNG_CLK or Seed */
-        if (RNG_GetFlagStatus(RNG_FLAG_SECS | RNG_FLAG_CECS) != RESET)
-        	return RNG_FAILURE_E;
-
-        for (i = 0; i < (int)sz; i++) {
+        if (RNG_GetFlagStatus(RNG_FLAG_SECS | RNG_FLAG_CECS) != RESET) {
+            wolfSSL_CryptHwMutexUnLock();
+            return RNG_FAILURE_E;
+        }
+
+        for (i = 0; i < sz; i++) {
             /* wait until RNG number is ready */
             while (RNG_GetFlagStatus(RNG_FLAG_DRDY) == RESET) { }
 
@@ -1670,6 +1950,8 @@
             output[i] = RNG_GetRandomNumber();
         }
 
+        wolfSSL_CryptHwMutexUnLock();
+
         return 0;
     }
     #endif /* WOLFSSL_STM32_CUBEMX */
@@ -1725,6 +2007,26 @@
 
     return 0;
 }
+#elif defined(WOLFSSL_DEOS) && !defined(CUSTOM_RAND_GENERATE)
+    #include "stdlib.h"
+
+    #warning "potential for not enough entropy, currently being used for testing Deos"
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        int i;
+        int seed = XTIME(0);
+        (void)os;
+
+        for (i = 0; i < sz; i++ ) {
+            output[i] = rand_r(&seed) % 256;
+            if ((i % 8) == 7) {
+                seed = XTIME(0);
+                rand_r(&seed);
+            }
+        }
+
+        return 0;
+    }
 #elif defined(WOLFSSL_VXWORKS)
 
     #include <randomNumGen.h>
@@ -1819,16 +2121,16 @@
 
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
-    	int ret = 0;
+        int ret = 0;
 
         (void)os;
-    	if (output == NULL) {
-    		return BUFFER_E;
-    	}
-
-    	ret = atmel_get_random_number(sz, output);
-
-    	return ret;
+        if (output == NULL) {
+            return BUFFER_E;
+        }
+
+        ret = atmel_get_random_number(sz, output);
+
+        return ret;
     }
 
 #elif defined(INTIME_RTOS)
@@ -1849,6 +2151,45 @@
         return ret;
     }
 
+#elif defined(WOLFSSL_WICED)
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        int ret;
+        (void)os;
+
+        if (output == NULL || UINT16_MAX < sz) {
+            return BUFFER_E;
+        }
+
+        if ((ret = wiced_crypto_get_random((void*) output, sz) )
+                         != WICED_SUCCESS) {
+            return ret;
+        }
+
+        return ret;
+    }
+
+#elif defined(WOLFSSL_NETBURNER)
+    #warning using NetBurner pseudo random GetRandomByte for seed
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        word32 i;
+        (void)os;
+
+        if (output == NULL) {
+            return BUFFER_E;
+        }
+
+        for (i = 0; i < sz; i++) {
+            output[i] = GetRandomByte();
+
+            /* check if was a valid random number */
+            if (!RandomValid())
+                return RNG_FAILURE_E;
+        }
+
+        return 0;
+    }
 #elif defined(IDIRECT_DEV_RANDOM)
 
     extern int getRandom( int sz, unsigned char *output );
@@ -1907,6 +2248,128 @@
         return ret;
     }
 
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+
+    #include <stdlib.h>
+    #include "os/os_time.h"
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        int i;
+        srand(os_time_get());
+
+        for (i = 0; i < sz; i++ ) {
+            output[i] = rand() % 256;
+            if ((i % 8) == 7) {
+                srand(os_time_get());
+            }
+        }
+
+        return 0;
+    }
+
+#elif defined(WOLFSSL_ESPIDF)
+    #if defined(WOLFSSL_ESPWROOM32) || defined(WOLFSSL_ESPWROOM32SE)
+        #include <esp_system.h>
+
+        int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+        {
+            word32 rand;
+            while (sz > 0) {
+                word32 len = sizeof(rand);
+                if (sz < len)
+                    len = sz;
+                /* Get one random 32-bit word from hw RNG */
+                rand = esp_random( );
+                XMEMCPY(output, &rand, len);
+                output += len;
+                sz -= len;
+            }
+
+            return 0;
+        }
+    #endif /* end WOLFSSL_ESPWROOM32 */
+
+#elif defined(WOLFSSL_RENESAS_TSIP)
+#if defined(WOLFSSL_RENESA_TSIP_IAREWRX)
+    #include "r_bsp/mcu/all/r_rx_compiler.h"
+#endif
+    #include "r_bsp/platform.h"
+    #include "r_tsip_rx_if.h"
+    
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        int ret;
+        uint32_t buffer[4];
+
+        while (sz > 0) {
+            uint32_t len = sizeof(buffer);
+            
+            if (sz < len) {
+                len = sz;
+            }
+            /* return 4 words random number*/
+            ret = R_TSIP_GenerateRandomNumber(buffer);
+            if(ret == TSIP_SUCCESS) {
+                XMEMCPY(output, &buffer, len);
+                output += len;
+                sz -= len;
+            } else
+                return ret;
+        }
+        return ret;
+    }
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_TRNG)
+    #include "hal_data.h"
+
+    #ifndef WOLFSSL_SCE_TRNG_HANDLE
+        #define WOLFSSL_SCE_TRNG_HANDLE g_sce_trng
+    #endif
+
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        uint32_t ret;
+        uint32_t blocks;
+        word32   len = sz;
+
+        ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->open(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl,
+                                                  WOLFSSL_SCE_TRNG_HANDLE.p_cfg);
+        if (ret != SSP_SUCCESS && ret != SSP_ERR_CRYPTO_ALREADY_OPEN) {
+            /* error opening TRNG driver */
+            return -1;
+        }
+
+        blocks = sz / sizeof(uint32_t);
+        if (blocks > 0) {
+            ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->read(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl,
+                                                      (uint32_t*)output, blocks);
+            if (ret != SSP_SUCCESS) {
+                return -1;
+            }
+        }
+
+        len = len - (blocks * sizeof(uint32_t));
+        if (len > 0) {
+            uint32_t tmp;
+
+            if (len > sizeof(uint32_t)) {
+                return -1;
+            }
+            ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->read(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl,
+                                                      (uint32_t*)tmp, 1);
+            if (ret != SSP_SUCCESS) {
+                return -1;
+            }
+            XMEMCPY(output + (blocks * sizeof(uint32_t)), (byte*)&tmp, len);
+        }
+
+        ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->close(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl);
+        if (ret != SSP_SUCCESS) {
+            /* error opening TRNG driver */
+            return -1;
+        }
+        return 0;
+    }
 #elif defined(CUSTOM_RAND_GENERATE_BLOCK)
     /* #define CUSTOM_RAND_GENERATE_BLOCK myRngFunc
      * extern int myRngFunc(byte* output, word32 sz);
@@ -1917,7 +2380,8 @@
       defined(WOLFSSL_uITRON4)  || defined(WOLFSSL_uTKERNEL2) || \
       defined(WOLFSSL_LPC43xx)  || defined(WOLFSSL_STM32F2xx) || \
       defined(MBED)             || defined(WOLFSSL_EMBOS) || \
-      defined(WOLFSSL_GENSEED_FORTEST) || defined(WOLFSSL_CHIBIOS)
+      defined(WOLFSSL_GENSEED_FORTEST) || defined(WOLFSSL_CHIBIOS) || \
+      defined(WOLFSSL_CONTIKI)  || defined(WOLFSSL_AZSPHERE)
 
     /* these platforms do not have a default random seed and
        you'll need to implement your own wc_GenerateSeed or define via
@@ -1925,6 +2389,59 @@
 
     #define USE_TEST_GENSEED
 
+#elif defined(WOLFSSL_ZEPHYR)
+
+        #include <entropy.h>
+    #ifndef _POSIX_C_SOURCE
+        #include <posix/time.h>
+    #else
+        #include <sys/time.h>
+    #endif
+
+        int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+        {
+            int ret = 0;
+            word32 rand;
+            while (sz > 0) {
+                word32 len = sizeof(rand);
+                if (sz < len)
+                    len = sz;
+                rand = sys_rand32_get();
+                XMEMCPY(output, &rand, len);
+                output += len;
+                sz -= len;
+            }
+
+            return ret;
+        }
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+
+        #include "stdlib.h"
+        static long get_timestamp(void) {
+            long myTime = 0;
+            INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+            if (fd >= 0) {
+                M2MB_RTC_TIMEVAL_T timeval;
+                m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+                myTime = timeval.msec;
+                m2mb_rtc_close(fd);
+            }
+            return myTime;
+        }
+        int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+        {
+            int i;
+            srand(get_timestamp());
+            for (i = 0; i < sz; i++ ) {
+                output[i] = rand() % 256;
+                if ((i % 8) == 7) {
+                    srand(get_timestamp());
+                }
+            }
+            return 0;
+        }
+
 #elif defined(NO_DEV_RANDOM)
 
     #error "you need to write an os specific wc_GenerateSeed() here"
@@ -1943,6 +2460,20 @@
     {
         int ret = 0;
 
+        if (os == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+    #ifdef WOLF_CRYPTO_CB
+        if (os->devId != INVALID_DEVID) {
+            ret = wc_CryptoCb_RandomSeed(os, output, sz);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+            ret = 0; /* reset error code */
+        }
+    #endif
+
     #ifdef HAVE_INTEL_RDSEED
         if (IS_INTEL_RDSEED(intel_flags)) {
              ret = wc_GenerateSeed_IntelRD(NULL, output, sz);
@@ -2003,7 +2534,6 @@
     #else
         #pragma message("Warning: write a real random seed!!!!, just for testing now")
     #endif
-
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
         word32 i;
@@ -2016,8 +2546,8 @@
     }
 #endif
 
+
 /* End wc_GenerateSeed */
-
 #endif /* WC_NO_RNG */
 #endif /* HAVE_FIPS */
 
--- a/wolfcrypt/src/ripemd.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/ripemd.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ripemd.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/rsa.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/rsa.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rsa.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -43,6 +43,10 @@
 
 #include <wolfssl/wolfcrypt/rsa.h>
 
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+#include <wolfssl/wolfcrypt/port/af_alg/wc_afalg.h>
+#endif
+
 #ifdef WOLFSSL_HAVE_SP_RSA
 #include <wolfssl/wolfcrypt/sp.h>
 #endif
@@ -55,7 +59,9 @@
  * WOLFSSL_KEY_GEN:     Allows Private Key Generation               default: off
  * RSA_LOW_MEM:         NON CRT Private Operations, less memory     default: off
  * WC_NO_RSA_OAEP:      Disables RSA OAEP padding                   default: on (not defined)
-
+ * WC_RSA_NONBLOCK:     Enables support for RSA non-blocking        default: off
+ * WC_RSA_NONBLOCK_TIME:Enables support for time based blocking     default: off
+ *                      time calculation.
 */
 
 /*
@@ -96,6 +102,7 @@
 }
 
 
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 int  wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
                                  word32 outLen, RsaKey* key, WC_RNG* rng)
 {
@@ -104,8 +111,10 @@
     }
     return RsaPublicEncrypt_fips(in, inLen, out, outLen, key, rng);
 }
-
-
+#endif
+
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
 int  wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out,
                                         RsaKey* key)
 {
@@ -134,6 +143,7 @@
     }
     return RsaSSL_Sign_fips(in, inLen, out, outLen, key, rng);
 }
+#endif
 
 
 int  wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key)
@@ -164,6 +174,7 @@
 }
 
 
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b,
                            word32* bSz)
 {
@@ -171,6 +182,7 @@
     /* not specified as fips so not needing _fips */
     return RsaFlattenPublicKey(key, a, aSz, b, bSz);
 }
+#endif
 
 
 #ifdef WOLFSSL_KEY_GEN
@@ -190,8 +202,8 @@
 
 #include <wolfssl/wolfcrypt/random.h>
 #include <wolfssl/wolfcrypt/logging.h>
-#ifdef WOLF_CRYPTO_DEV
-    #include <wolfssl/wolfcrypt/cryptodev.h>
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
 #endif
 #ifdef NO_INLINE
     #include <wolfssl/wolfcrypt/misc.h>
@@ -200,8 +212,6 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
-#define ERROR_OUT(x) { ret = (x); goto done;}
-
 
 enum {
     RSA_STATE_NONE = 0,
@@ -215,21 +225,28 @@
     RSA_STATE_DECRYPT_RES,
 };
 
+
 static void wc_RsaCleanup(RsaKey* key)
 {
+#ifndef WOLFSSL_RSA_VERIFY_INLINE
     if (key && key->data) {
         /* make sure any allocated memory is free'd */
         if (key->dataIsAlloc) {
+        #ifndef WOLFSSL_RSA_PUBLIC_ONLY
             if (key->type == RSA_PRIVATE_DECRYPT ||
                 key->type == RSA_PRIVATE_ENCRYPT) {
                 ForceZero(key->data, key->dataLen);
             }
+        #endif
             XFREE(key->data, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
             key->dataIsAlloc = 0;
         }
         key->data = NULL;
         key->dataLen = 0;
     }
+#else
+    (void)key;
+#endif
 }
 
 int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId)
@@ -245,14 +262,16 @@
     key->type = RSA_TYPE_UNKNOWN;
     key->state = RSA_STATE_NONE;
     key->heap = heap;
+#ifndef WOLFSSL_RSA_VERIFY_INLINE
+    key->dataIsAlloc = 0;
     key->data = NULL;
+#endif
     key->dataLen = 0;
-    key->dataIsAlloc = 0;
 #ifdef WC_RSA_BLINDING
     key->rng = NULL;
 #endif
 
-#ifdef WOLF_CRYPTO_DEV
+#ifdef WOLF_CRYPTO_CB
     key->devId = devId;
 #else
     (void)devId;
@@ -272,6 +291,7 @@
     #endif /* WC_ASYNC_ENABLE_RSA */
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
     ret = mp_init_multi(&key->n, &key->e, NULL, NULL, NULL, NULL);
     if (ret != MP_OKAY)
         return ret;
@@ -286,12 +306,27 @@
         mp_clear(&key->e);
         return ret;
     }
+#else
+    ret = mp_init(&key->n);
+    if (ret != MP_OKAY)
+        return ret;
+    ret = mp_init(&key->e);
+    if (ret != MP_OKAY) {
+        mp_clear(&key->n);
+        return ret;
+    }
+#endif
 
 #ifdef WOLFSSL_XILINX_CRYPT
     key->pubExp = 0;
     key->mod    = NULL;
 #endif
 
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+    key->alFd = WC_SOCK_NOTSET;
+    key->rdFd = WC_SOCK_NOTSET;
+#endif
+
     return ret;
 }
 
@@ -300,6 +335,29 @@
     return wc_InitRsaKey_ex(key, heap, INVALID_DEVID);
 }
 
+#ifdef HAVE_PKCS11
+int wc_InitRsaKey_Id(RsaKey* key, unsigned char* id, int len, void* heap,
+                     int devId)
+{
+    int ret = 0;
+
+    if (key == NULL)
+        ret = BAD_FUNC_ARG;
+    if (ret == 0 && (len < 0 || len > RSA_MAX_ID_LEN))
+        ret = BUFFER_E;
+
+    if (ret == 0)
+        ret = wc_InitRsaKey_ex(key, heap, devId);
+
+    if (ret == 0 && id != NULL && len != 0) {
+        XMEMCPY(key->id, id, len);
+        key->idLen = len;
+    }
+
+    return ret;
+}
+#endif
+
 
 #ifdef WOLFSSL_XILINX_CRYPT
 #define MAX_E_SIZE 4
@@ -371,10 +429,85 @@
        }
    }
 #endif
-
+    return 0;
+} /* WOLFSSL_XILINX_CRYPT*/
+
+#elif defined(WOLFSSL_CRYPTOCELL)
+
+int wc_InitRsaHw(RsaKey* key)
+{
+    CRYSError_t ret = 0;
+    byte e[3];
+    word32 eSz = sizeof(e);
+    byte n[256];
+    word32 nSz = sizeof(n);
+    byte d[256];
+    word32 dSz = sizeof(d);
+    byte p[128];
+    word32 pSz = sizeof(p);
+    byte q[128];
+    word32 qSz = sizeof(q);
+
+    if (key == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+    if (ret != 0)
+        return MP_READ_E;
+
+    ret = CRYS_RSA_Build_PubKey(&key->ctx.pubKey, e, eSz, n, nSz);
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_Build_PubKey failed");
+        return ret;
+    }
+
+    ret =  CRYS_RSA_Build_PrivKey(&key->ctx.privKey, d, dSz, e, eSz, n, nSz);
+
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_Build_PrivKey failed");
+        return ret;
+    }
+    key->type = RSA_PRIVATE;
     return 0;
 }
-#endif /* WOLFSSL_XILINX_CRYPT */
+static int cc310_RSA_GenerateKeyPair(RsaKey* key, int size, long e)
+{
+    CRYSError_t             ret = 0;
+    CRYS_RSAKGData_t        KeyGenData;
+    CRYS_RSAKGFipsContext_t FipsCtx;
+    byte ex[3];
+    uint16_t eSz = sizeof(ex);
+    byte n[256];
+    uint16_t nSz = sizeof(n);
+
+    ret = CRYS_RSA_KG_GenerateKeyPair(&wc_rndState,
+                        wc_rndGenVectFunc,
+                        (byte*)&e,
+                        3*sizeof(uint8_t),
+                        size,
+                        &key->ctx.privKey,
+                        &key->ctx.pubKey,
+                        &KeyGenData,
+                        &FipsCtx);
+
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_KG_GenerateKeyPair failed");
+        return ret;
+    }
+
+    ret = CRYS_RSA_Get_PubKey(&key->ctx.pubKey, ex, &eSz, n, &nSz);
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_Get_PubKey failed");
+        return ret;
+    }
+    ret = wc_RsaPublicKeyDecodeRaw(n, nSz, ex, eSz, key);
+
+    key->type = RSA_PRIVATE;
+
+    return ret;
+}
+#endif /* WOLFSSL_CRYPTOCELL */
 
 int wc_FreeRsaKey(RsaKey* key)
 {
@@ -390,6 +523,7 @@
     wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA);
 #endif
 
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
     if (key->type == RSA_PRIVATE) {
 #if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)
         mp_forcezero(&key->u);
@@ -409,6 +543,7 @@
     mp_clear(&key->q);
     mp_clear(&key->p);
     mp_clear(&key->d);
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
 
     /* public part */
     mp_clear(&key->e);
@@ -419,15 +554,31 @@
     key->mod = NULL;
 #endif
 
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+    /* make sure that sockets are closed on cleanup */
+    if (key->alFd > 0) {
+        close(key->alFd);
+        key->alFd = WC_SOCK_NOTSET;
+    }
+    if (key->rdFd > 0) {
+        close(key->rdFd);
+        key->rdFd = WC_SOCK_NOTSET;
+    }
+#endif
+
     return ret;
 }
 
-
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_RSA_KEY_CHECK)
 /* Check the pair-wise consistency of the RSA key.
  * From NIST SP 800-56B, section 6.4.1.1.
  * Verify that k = (k^e)^d, for some k: 1 < k < n-1. */
 int wc_CheckRsaKey(RsaKey* key)
 {
+#if defined(WOLFSSL_CRYPTOCELL)
+    return 0;
+#endif
 #ifdef WOLFSSL_SMALL_STACK
     mp_int *k = NULL, *tmp = NULL;
 #else
@@ -455,7 +606,7 @@
             ret = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SP_RSA
+#ifdef WOLFSSL_HAVE_SP_RSA
 #ifndef WOLFSSL_SP_NO_2048
     if (mp_count_bits(&key->n) == 2048) {
         ret = sp_ModExp_2048(k, &key->e, &key->n, tmp);
@@ -478,6 +629,17 @@
     }
     else
 #endif
+#ifdef WOLFSSL_SP_4096
+    if (mp_count_bits(&key->n) == 4096) {
+        ret = sp_ModExp_4096(k, &key->e, &key->n, tmp);
+        if (ret != 0)
+            ret = MP_EXPTMOD_E;
+        ret = sp_ModExp_4096(tmp, &key->d, &key->n, tmp);
+        if (ret != 0)
+            ret = MP_EXPTMOD_E;
+    }
+    else
+#endif
 #endif
 #ifdef WOLFSSL_SP_MATH
     {
@@ -502,6 +664,89 @@
             ret = RSA_KEY_PAIR_E;
     }
 
+    /* Check d is less than n. */
+    if (ret == 0 ) {
+        if (mp_cmp(&key->d, &key->n) != MP_LT) {
+            ret = MP_EXPTMOD_E;
+        }
+    }
+    /* Check p*q = n. */
+    if (ret == 0 ) {
+        if (mp_mul(&key->p, &key->q, tmp) != MP_OKAY) {
+            ret = MP_EXPTMOD_E;
+        }
+    }
+    if (ret == 0 ) {
+        if (mp_cmp(&key->n, tmp) != MP_EQ) {
+            ret = MP_EXPTMOD_E;
+        }
+    }
+
+    /* Check dP, dQ and u if they exist */
+    if (ret == 0 && !mp_iszero(&key->dP)) {
+        if (mp_sub_d(&key->p, 1, tmp) != MP_OKAY) {
+            ret = MP_EXPTMOD_E;
+        }
+        /* Check dP <= p-1. */
+        if (ret == 0) {
+            if (mp_cmp(&key->dP, tmp) != MP_LT) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        /* Check e*dP mod p-1 = 1. (dP = 1/e mod p-1) */
+        if (ret == 0) {
+            if (mp_mulmod(&key->dP, &key->e, tmp, tmp) != MP_OKAY) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        if (ret == 0 ) {
+            if (!mp_isone(tmp)) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+
+        if (ret == 0) {
+            if (mp_sub_d(&key->q, 1, tmp) != MP_OKAY) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        /* Check dQ <= q-1. */
+        if (ret == 0) {
+            if (mp_cmp(&key->dQ, tmp) != MP_LT) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        /* Check e*dP mod p-1 = 1. (dQ = 1/e mod q-1) */
+        if (ret == 0) {
+            if (mp_mulmod(&key->dQ, &key->e, tmp, tmp) != MP_OKAY) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        if (ret == 0 ) {
+            if (!mp_isone(tmp)) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+
+        /* Check u <= p. */
+        if (ret == 0) {
+            if (mp_cmp(&key->u, &key->p) != MP_LT) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        /* Check u*q mod p = 1. (u = 1/q mod p) */
+        if (ret == 0) {
+            if (mp_mulmod(&key->u, &key->q, &key->p, tmp) != MP_OKAY) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+        if (ret == 0 ) {
+            if (!mp_isone(tmp)) {
+                ret = MP_EXPTMOD_E;
+            }
+        }
+    }
+
     mp_forcezero(tmp);
     mp_clear(tmp);
     mp_clear(k);
@@ -511,6 +756,8 @@
 
     return ret;
 }
+#endif
+#endif
 
 
 #if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_PSS)
@@ -521,6 +768,7 @@
    out:   mask output after generation
    outSz: size of output buffer
  */
+#if !defined(NO_SHA) || !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)
 static int RsaMGF1(enum wc_HashType hType, byte* seed, word32 seedSz,
                                         byte* out, word32 outSz, void* heap)
 {
@@ -594,9 +842,10 @@
 
     return 0;
 }
+#endif /* SHA2 Hashes */
 
 /* helper function to direct which mask generation function is used
-   switeched on type input
+   switched on type input
  */
 static int RsaMGF(int type, byte* seed, word32 seedSz, byte* out,
                                                     word32 outSz, void* heap)
@@ -643,10 +892,12 @@
 
     return ret;
 }
-#endif /* !WC_NO_RSA_OAEP */
+#endif /* !WC_NO_RSA_OAEP || WC_RSA_PSS */
 
 
 /* Padding */
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+#ifndef WC_NO_RNG
 #ifndef WC_NO_RSA_OAEP
 static int RsaPad_OAEP(const byte* input, word32 inputLen, byte* pkcsBlock,
         word32 pkcsBlockLen, byte padValue, WC_RNG* rng,
@@ -847,58 +1098,136 @@
         word32 pkcsBlockLen, WC_RNG* rng, enum wc_HashType hType, int mgf,
         int saltLen, int bits, void* heap)
 {
-    int   ret;
-    int   hLen, i;
+    int   ret = 0;
+    int   hLen, i, o, maskLen, hiBits;
+    byte* m;
     byte* s;
-    byte* m;
-    byte* h;
-    byte  salt[WC_MAX_DIGEST_SIZE];
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+    #if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY)
+        byte salt[RSA_MAX_SIZE/8 + RSA_PSS_PAD_SZ];
+    #else
+        byte* salt = NULL;
+    #endif
+#else
+    byte salt[WC_MAX_DIGEST_SIZE];
+#endif
+
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+    if (pkcsBlockLen > RSA_MAX_SIZE/8) {
+        return MEMORY_E;
+    }
+#endif
 
     hLen = wc_HashGetDigestSize(hType);
     if (hLen < 0)
         return hLen;
 
-    if (saltLen == -1) {
+    hiBits = (bits - 1) & 0x7;
+    if (hiBits == 0) {
+        *(pkcsBlock++) = 0;
+        pkcsBlockLen--;
+    }
+
+    if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) {
         saltLen = hLen;
         #ifdef WOLFSSL_SHA512
             /* See FIPS 186-4 section 5.5 item (e). */
-            if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE)
+            if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE) {
                 saltLen = RSA_PSS_SALT_MAX_SZ;
+            }
         #endif
     }
-    else if (saltLen > hLen || saltLen < -1)
+#ifndef WOLFSSL_PSS_LONG_SALT
+    else if (saltLen > hLen) {
+        return PSS_SALTLEN_E;
+    }
+#endif
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+    else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) {
+        return PSS_SALTLEN_E;
+    }
+#else
+    else if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) {
+        saltLen = (int)pkcsBlockLen - hLen - 2;
+        if (saltLen < 0) {
+            return PSS_SALTLEN_E;
+        }
+    }
+    else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) {
+        return PSS_SALTLEN_E;
+    }
+#endif
+    if ((int)pkcsBlockLen - hLen < saltLen + 2) {
         return PSS_SALTLEN_E;
-    if ((int)pkcsBlockLen - hLen < saltLen + 2)
-        return PSS_SALTLEN_E;
-
+    }
+
+    maskLen = pkcsBlockLen - 1 - hLen;
+
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+    #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+        salt = (byte*)XMALLOC(RSA_PSS_PAD_SZ + inputLen + saltLen, heap,
+                                                       DYNAMIC_TYPE_RSA_BUFFER);
+        if (salt == NULL) {
+            return MEMORY_E;
+        }
+    #endif
+    s = m = salt;
+    XMEMSET(m, 0, RSA_PSS_PAD_SZ);
+    m += RSA_PSS_PAD_SZ;
+    XMEMCPY(m, input, inputLen);
+    m += inputLen;
+    o = (int)(m - s);
+    if (saltLen > 0) {
+        ret = wc_RNG_GenerateBlock(rng, m, saltLen);
+        if (ret == 0) {
+            m += saltLen;
+        }
+    }
+#else
     s = m = pkcsBlock;
     XMEMSET(m, 0, RSA_PSS_PAD_SZ);
     m += RSA_PSS_PAD_SZ;
     XMEMCPY(m, input, inputLen);
     m += inputLen;
-    if ((ret = wc_RNG_GenerateBlock(rng, salt, saltLen)) != 0)
-        return ret;
-    XMEMCPY(m, salt, saltLen);
-    m += saltLen;
-
-    h = pkcsBlock + pkcsBlockLen - 1 - hLen;
-    if ((ret = wc_Hash(hType, s, (word32)(m - s), h, hLen)) != 0)
-        return ret;
-    pkcsBlock[pkcsBlockLen - 1] = RSA_PSS_PAD_TERM;
-
-    ret = RsaMGF(mgf, h, hLen, pkcsBlock, pkcsBlockLen - hLen - 1, heap);
-    if (ret != 0)
-        return ret;
-    pkcsBlock[0] &= (1 << ((bits - 1) & 0x7)) - 1;
-
-    m = pkcsBlock + pkcsBlockLen - 1 - saltLen - hLen - 1;
-    *(m++) ^= 0x01;
-    for (i = 0; i < saltLen; i++)
-        m[i] ^= salt[i];
-
-    return 0;
+    o = 0;
+    if (saltLen > 0) {
+        ret = wc_RNG_GenerateBlock(rng, salt, saltLen);
+        if (ret == 0) {
+            XMEMCPY(m, salt, saltLen);
+            m += saltLen;
+        }
+    }
+#endif
+    if (ret == 0) {
+        /* Put Hash at end of pkcsBlock - 1 */
+        ret = wc_Hash(hType, s, (word32)(m - s), pkcsBlock + maskLen, hLen);
+    }
+    if (ret == 0) {
+        pkcsBlock[pkcsBlockLen - 1] = RSA_PSS_PAD_TERM;
+
+        ret = RsaMGF(mgf, pkcsBlock + maskLen, hLen, pkcsBlock, maskLen, heap);
+    }
+    if (ret == 0) {
+        pkcsBlock[0] &= (1 << hiBits) - 1;
+
+        m = pkcsBlock + maskLen - saltLen - 1;
+        *(m++) ^= 0x01;
+        for (i = 0; i < saltLen; i++) {
+            m[i] ^= salt[o + i];
+        }
+    }
+
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+    #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+        if (salt != NULL) {
+            XFREE(salt, heap, DYNAMIC_TYPE_RSA_BUFFER);
+        }
+    #endif
+#endif
+    return ret;
 }
-#endif
+#endif /* WC_RSA_PSS */
+#endif /* !WC_NO_RNG */
 
 static int RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock,
                            word32 pkcsBlockLen, byte padValue, WC_RNG* rng)
@@ -922,6 +1251,7 @@
         XMEMSET(&pkcsBlock[1], 0xFF, pkcsBlockLen - inputLen - 2);
     }
     else {
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WC_NO_RNG)
         /* pad with non-zero random bytes */
         word32 padLen, i;
         int    ret;
@@ -941,6 +1271,10 @@
         for (i = 1; i < padLen; i++) {
             if (pkcsBlock[i] == 0) pkcsBlock[i] = 0x01;
         }
+#else
+        (void)rng;
+        return RSA_WRONG_TYPE_E;
+#endif
     }
 
     pkcsBlock[pkcsBlockLen-inputLen-1] = 0;     /* separator */
@@ -950,7 +1284,7 @@
 }
 
 /* helper function to direct which padding is used */
-static int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock,
+int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock,
     word32 pkcsBlockLen, byte padValue, WC_RNG* rng, int padType,
     enum wc_HashType hType, int mgf, byte* optLabel, word32 labelLen,
     int saltLen, int bits, void* heap)
@@ -965,6 +1299,7 @@
                                                                  padValue, rng);
             break;
 
+#ifndef WC_NO_RNG
     #ifndef WC_NO_RSA_OAEP
         case WC_RSA_OAEP_PAD:
             WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
@@ -980,6 +1315,7 @@
                                                hType, mgf, saltLen, bits, heap);
             break;
     #endif
+#endif /* !WC_NO_RNG */
 
     #ifdef WC_RSA_NO_PADDING
         case WC_RSA_NO_PAD:
@@ -1004,6 +1340,13 @@
     }
 
     /* silence warning if not used with padding scheme */
+    (void)input;
+    (void)inputLen;
+    (void)pkcsBlock;
+    (void)pkcsBlockLen;
+    (void)padValue;
+    (void)rng;
+    (void)padType;
     (void)hType;
     (void)mgf;
     (void)optLabel;
@@ -1014,6 +1357,7 @@
 
     return ret;
 }
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
 
 
 /* UnPadding */
@@ -1093,10 +1437,8 @@
     ret += pkcsBlock[idx++] ^ 0x01; /* separator value is 0x01 */
     ret += pkcsBlock[0]     ^ 0x00; /* Y, the first value, should be 0 */
 
-    if (ret != 0) {
-        WOLFSSL_MSG("RsaUnPad_OAEP: Padding Error");
-        return BAD_PADDING_E;
-    }
+    /* Return 0 data length on error. */
+    idx = ctMaskSelInt(ctMaskEq(ret, 0), idx, pkcsBlockLen);
 
     /* adjust pointer to correct location in array and return size of M */
     *output = (byte*)(pkcsBlock + idx);
@@ -1125,59 +1467,120 @@
 {
     int   ret;
     byte* tmp;
-    int   hLen, i;
+    int   hLen, i, maskLen;
+#ifdef WOLFSSL_SHA512
+    int orig_bits = bits;
+#endif
+#if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY)
+    byte tmp_buf[RSA_MAX_SIZE/8];
+    tmp = tmp_buf;
+
+    if (pkcsBlockLen > RSA_MAX_SIZE/8) {
+        return MEMORY_E;
+    }
+#endif
 
     hLen = wc_HashGetDigestSize(hType);
     if (hLen < 0)
         return hLen;
-
-    if (saltLen == -1) {
+    bits = (bits - 1) & 0x7;
+    if ((pkcsBlock[0] & (0xff << bits)) != 0) {
+        return BAD_PADDING_E;
+    }
+    if (bits == 0) {
+        pkcsBlock++;
+        pkcsBlockLen--;
+    }
+    maskLen = (int)pkcsBlockLen - 1 - hLen;
+    if (maskLen < 0) {
+        WOLFSSL_MSG("RsaUnPad_PSS: Hash too large");
+        return WC_KEY_SIZE_E;
+    }
+
+    if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) {
         saltLen = hLen;
         #ifdef WOLFSSL_SHA512
             /* See FIPS 186-4 section 5.5 item (e). */
-            if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE)
+            if (orig_bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE)
                 saltLen = RSA_PSS_SALT_MAX_SZ;
         #endif
     }
-    else if (saltLen > hLen || saltLen < -1)
+#ifndef WOLFSSL_PSS_LONG_SALT
+    else if (saltLen > hLen)
+        return PSS_SALTLEN_E;
+#endif
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+    else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT)
+        return PSS_SALTLEN_E;
+    if (maskLen < saltLen + 1) {
         return PSS_SALTLEN_E;
-    if ((int)pkcsBlockLen - hLen < saltLen + 2)
+    }
+#else
+    else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER)
         return PSS_SALTLEN_E;
+    if (saltLen != RSA_PSS_SALT_LEN_DISCOVER && maskLen < saltLen + 1) {
+        return WC_KEY_SIZE_E;
+    }
+#endif
 
     if (pkcsBlock[pkcsBlockLen - 1] != RSA_PSS_PAD_TERM) {
         WOLFSSL_MSG("RsaUnPad_PSS: Padding Term Error");
         return BAD_PADDING_E;
     }
 
-    tmp = (byte*)XMALLOC(pkcsBlockLen, heap, DYNAMIC_TYPE_RSA_BUFFER);
-    if (tmp == NULL)
+#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+    tmp = (byte*)XMALLOC(maskLen, heap, DYNAMIC_TYPE_RSA_BUFFER);
+    if (tmp == NULL) {
         return MEMORY_E;
-
-    if ((ret = RsaMGF(mgf, pkcsBlock + pkcsBlockLen - 1 - hLen, hLen,
-                                    tmp, pkcsBlockLen - 1 - hLen, heap)) != 0) {
+    }
+#endif
+
+    if ((ret = RsaMGF(mgf, pkcsBlock + maskLen, hLen, tmp, maskLen,
+                                                                  heap)) != 0) {
         XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
         return ret;
     }
 
-    tmp[0] &= (1 << ((bits - 1) & 0x7)) - 1;
-    for (i = 0; i < (int)(pkcsBlockLen - 1 - saltLen - hLen - 1); i++) {
-        if (tmp[i] != pkcsBlock[i]) {
+    tmp[0] &= (1 << bits) - 1;
+    pkcsBlock[0] &= (1 << bits) - 1;
+#ifdef WOLFSSL_PSS_SALT_LEN_DISCOVER
+    if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) {
+        for (i = 0; i < maskLen - 1; i++) {
+            if (tmp[i] != pkcsBlock[i]) {
+                break;
+            }
+        }
+        if (tmp[i] != (pkcsBlock[i] ^ 0x01)) {
             XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
             WOLFSSL_MSG("RsaUnPad_PSS: Padding Error Match");
-            return BAD_PADDING_E;
+            return PSS_SALTLEN_RECOVER_E;
+        }
+        saltLen = maskLen - (i + 1);
+    }
+    else
+#endif
+    {
+        for (i = 0; i < maskLen - 1 - saltLen; i++) {
+            if (tmp[i] != pkcsBlock[i]) {
+                XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+                WOLFSSL_MSG("RsaUnPad_PSS: Padding Error Match");
+                return PSS_SALTLEN_E;
+            }
+        }
+        if (tmp[i] != (pkcsBlock[i] ^ 0x01)) {
+            XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+            WOLFSSL_MSG("RsaUnPad_PSS: Padding Error End");
+            return PSS_SALTLEN_E;
         }
     }
-    if (tmp[i] != (pkcsBlock[i] ^ 0x01)) {
-        XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
-        WOLFSSL_MSG("RsaUnPad_PSS: Padding Error End");
-        return BAD_PADDING_E;
-    }
-    for (i++; i < (int)(pkcsBlockLen - 1 - hLen); i++)
+    for (i++; i < maskLen; i++)
         pkcsBlock[i] ^= tmp[i];
 
+#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
     XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
-
-    *output = pkcsBlock + pkcsBlockLen - (hLen + saltLen + 1);
+#endif
+
+    *output = pkcsBlock + maskLen - saltLen;
     return saltLen + hLen;
 }
 #endif
@@ -1185,58 +1588,74 @@
 /* UnPad plaintext, set start to *output, return length of plaintext,
  * < 0 on error */
 static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen,
-                                               byte **output, byte padValue)
+                    byte **output, byte padValue)
 {
-    word32 maxOutputLen = (pkcsBlockLen > 10) ? (pkcsBlockLen - 10) : 0;
-    word32 invalid = 0;
-    word32 i = 1;
-    word32 outputLen;
-
-    if (output == NULL || pkcsBlockLen == 0) {
+    int    ret = BAD_FUNC_ARG;
+    word16 i;
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+    byte   invalid = 0;
+#endif
+
+    if (output == NULL || pkcsBlockLen == 0 || pkcsBlockLen > 0xFFFF) {
         return BAD_FUNC_ARG;
     }
 
-    if (pkcsBlock[0] != 0x0) { /* skip past zero */
-        invalid = 1;
-    }
-    pkcsBlock++; pkcsBlockLen--;
-
-    /* Require block type padValue */
-    invalid = (pkcsBlock[0] != padValue) || invalid;
-
-    /* verify the padding until we find the separator */
     if (padValue == RSA_BLOCK_TYPE_1) {
-        while (i<pkcsBlockLen && pkcsBlock[i++] == 0xFF) {/* Null body */}
-    }
-    else {
-        while (i<pkcsBlockLen && pkcsBlock[i++]) {/* Null body */}
+        /* First byte must be 0x00 and Second byte, block type, 0x01 */
+        if (pkcsBlock[0] != 0 || pkcsBlock[1] != RSA_BLOCK_TYPE_1) {
+            WOLFSSL_MSG("RsaUnPad error, invalid formatting");
+            return RSA_PAD_E;
+        }
+
+        /* check the padding until we find the separator */
+        for (i = 2; i < pkcsBlockLen && pkcsBlock[i++] == 0xFF; ) { }
+
+        /* Minimum of 11 bytes of pre-message data and must have separator. */
+        if (i < RSA_MIN_PAD_SZ || pkcsBlock[i-1] != 0) {
+            WOLFSSL_MSG("RsaUnPad error, bad formatting");
+            return RSA_PAD_E;
+        }
+
+        *output = (byte *)(pkcsBlock + i);
+        ret = pkcsBlockLen - i;
     }
-
-    if (!(i==pkcsBlockLen || pkcsBlock[i-1]==0)) {
-        WOLFSSL_MSG("RsaUnPad error, bad formatting");
-        return RSA_PAD_E;
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+    else {
+        word16 j;
+        word16 pastSep = 0;
+
+        /* Decrypted with private key - unpad must be constant time. */
+        for (i = 0, j = 2; j < pkcsBlockLen; j++) {
+           /* Update i if not passed the separator and at separator. */
+            i |= (~pastSep) & ctMask16Eq(pkcsBlock[j], 0x00) & (j + 1);
+            pastSep |= ctMask16Eq(pkcsBlock[j], 0x00);
+        }
+
+        /* Minimum of 11 bytes of pre-message data - including leading 0x00. */
+        invalid |= ctMaskLT(i, RSA_MIN_PAD_SZ);
+        /* Must have seen separator. */
+        invalid |= ~pastSep;
+        /* First byte must be 0x00. */
+        invalid |= ctMaskNotEq(pkcsBlock[0], 0x00);
+        /* Check against expected block type: padValue */
+        invalid |= ctMaskNotEq(pkcsBlock[1], padValue);
+
+        *output = (byte *)(pkcsBlock + i);
+        ret = ((int)~invalid) & (pkcsBlockLen - i);
     }
-
-    outputLen = pkcsBlockLen - i;
-    invalid = (outputLen > maxOutputLen) || invalid;
-
-    if (invalid) {
-        WOLFSSL_MSG("RsaUnPad error, invalid formatting");
-        return RSA_PAD_E;
-    }
-
-    *output = (byte *)(pkcsBlock + i);
-    return outputLen;
+#endif
+
+    return ret;
 }
 
 /* helper function to direct unpadding
  *
  * bits is the key modulus size in bits
  */
-static int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out,
-                          byte padValue, int padType, enum wc_HashType hType,
-                          int mgf, byte* optLabel, word32 labelLen, int saltLen,
-                          int bits, void* heap)
+int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out,
+                   byte padValue, int padType, enum wc_HashType hType,
+                   int mgf, byte* optLabel, word32 labelLen, int saltLen,
+                   int bits, void* heap)
 {
     int ret;
 
@@ -1268,7 +1687,8 @@
 
             /* In the case of no padding being used check that input is exactly
              * the RSA key length */
-            if (bits <= 0 || pkcsBlockLen != ((word32)bits/WOLFSSL_BIT_SIZE)) {
+            if (bits <= 0 || pkcsBlockLen !=
+                         ((word32)(bits+WOLFSSL_BIT_SIZE-1)/WOLFSSL_BIT_SIZE)) {
                 WOLFSSL_MSG("Bad input size");
                 ret = RSA_PAD_E;
             }
@@ -1308,7 +1728,7 @@
                           word32* outLen, int type, RsaKey* key, WC_RNG* rng)
 {
     int    ret = 0;
-    word32 keyLen, len;
+    word32 keyLen;
     (void)rng;
 
     keyLen = wc_RsaEncryptSize(key);
@@ -1347,15 +1767,254 @@
 }
 #endif /* WOLFSSL_XILINX_CRYPT */
 
+#ifdef WC_RSA_NONBLOCK
+static int wc_RsaFunctionNonBlock(const byte* in, word32 inLen, byte* out,
+                          word32* outLen, int type, RsaKey* key)
+{
+    int    ret = 0;
+    word32 keyLen, len;
+
+    if (key == NULL || key->nb == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (key->nb->exptmod.state == TFM_EXPTMOD_NB_INIT) {
+        if (mp_init(&key->nb->tmp) != MP_OKAY) {
+            ret = MP_INIT_E;
+        }
+
+        if (ret == 0) {
+            if (mp_read_unsigned_bin(&key->nb->tmp, (byte*)in, inLen) != MP_OKAY) {
+                ret = MP_READ_E;
+            }
+        }
+    }
+
+    if (ret == 0) {
+        switch(type) {
+        case RSA_PRIVATE_DECRYPT:
+        case RSA_PRIVATE_ENCRYPT:
+            ret = fp_exptmod_nb(&key->nb->exptmod, &key->nb->tmp, &key->d,
+                &key->n, &key->nb->tmp);
+            if (ret == FP_WOULDBLOCK)
+                return ret;
+            if (ret != MP_OKAY)
+                ret = MP_EXPTMOD_E;
+            break;
+
+        case RSA_PUBLIC_ENCRYPT:
+        case RSA_PUBLIC_DECRYPT:
+            ret = fp_exptmod_nb(&key->nb->exptmod, &key->nb->tmp, &key->e,
+                &key->n, &key->nb->tmp);
+            if (ret == FP_WOULDBLOCK)
+                return ret;
+            if (ret != MP_OKAY)
+                ret = MP_EXPTMOD_E;
+            break;
+        default:
+            ret = RSA_WRONG_TYPE_E;
+            break;
+        }
+    }
+
+    if (ret == 0) {
+        keyLen = wc_RsaEncryptSize(key);
+        if (keyLen > *outLen)
+            ret = RSA_BUFFER_E;
+    }
+    if (ret == 0) {
+        len = mp_unsigned_bin_size(&key->nb->tmp);
+
+        /* pad front w/ zeros to match key length */
+        while (len < keyLen) {
+            *out++ = 0x00;
+            len++;
+        }
+
+        *outLen = keyLen;
+
+        /* convert */
+        if (mp_to_unsigned_bin(&key->nb->tmp, out) != MP_OKAY) {
+             ret = MP_TO_E;
+        }
+    }
+
+    mp_clear(&key->nb->tmp);
+
+    return ret;
+}
+#endif /* WC_RSA_NONBLOCK */
+
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+#ifndef ERROR_OUT
+#define ERROR_OUT(x) ret = (x); goto done
+#endif
+
+static const char WC_TYPE_ASYMKEY[] = "skcipher";
+static const char WC_NAME_RSA[] = "xilinx-zynqmp-rsa";
+#ifndef MAX_XILINX_RSA_KEY
+    /* max key size of 4096 bits / 512 bytes */
+    #define MAX_XILINX_RSA_KEY 512
+#endif
+static const byte XILINX_RSA_FLAG[] = {0x1};
+
+
+/* AF_ALG implementation of RSA */
+static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out,
+                          word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+    struct msghdr   msg;
+    struct cmsghdr* cmsg;
+    struct iovec      iov;
+    byte*  keyBuf   = NULL;
+    word32 keyBufSz = 0;
+    char cbuf[CMSG_SPACE(4) + CMSG_SPACE(sizeof(struct af_alg_iv) + 1)] = {0};
+    int    ret = 0;
+    int    op  = 0;    /* decryption vs encryption flag */
+    word32 keyLen;
+
+    /* input and output buffer need to be aligned */
+    ALIGN64 byte outBuf[MAX_XILINX_RSA_KEY];
+    ALIGN64 byte inBuf[MAX_XILINX_RSA_KEY];
+
+    XMEMSET(&msg, 0, sizeof(struct msghdr));
+    (void)rng;
+
+    keyLen = wc_RsaEncryptSize(key);
+    if (keyLen > *outLen) {
+        ERROR_OUT(RSA_BUFFER_E);
+    }
+
+    if (keyLen > MAX_XILINX_RSA_KEY) {
+        WOLFSSL_MSG("RSA key size larger than supported");
+        ERROR_OUT(BAD_FUNC_ARG);
+    }
+
+    if ((keyBuf = (byte*)XMALLOC(keyLen * 2, key->heap, DYNAMIC_TYPE_KEY))
+            == NULL) {
+        ERROR_OUT(MEMORY_E);
+    }
+
+    if ((ret = mp_to_unsigned_bin(&(key->n), keyBuf)) != MP_OKAY) {
+        ERROR_OUT(MP_TO_E);
+    }
+
+    switch(type) {
+        case RSA_PRIVATE_DECRYPT:
+        case RSA_PRIVATE_ENCRYPT:
+            op = 1; /* set as decrypt */
+            {
+                keyBufSz = mp_unsigned_bin_size(&(key->d));
+                if ((mp_to_unsigned_bin(&(key->d), keyBuf + keyLen))
+                        != MP_OKAY) {
+                    ERROR_OUT(MP_TO_E);
+                }
+            }
+            break;
+
+        case RSA_PUBLIC_DECRYPT:
+        case RSA_PUBLIC_ENCRYPT: {
+            word32 exp = 0;
+            word32 eSz = mp_unsigned_bin_size(&(key->e));
+            if ((mp_to_unsigned_bin(&(key->e), (byte*)&exp +
+                            (sizeof(word32) - eSz))) != MP_OKAY) {
+                ERROR_OUT(MP_TO_E);
+            }
+            keyBufSz = sizeof(word32);
+            XMEMCPY(keyBuf + keyLen, (byte*)&exp, keyBufSz);
+            break;
+        }
+
+        default:
+            ERROR_OUT(RSA_WRONG_TYPE_E);
+    }
+    keyBufSz += keyLen; /* add size of modulus */
+
+    /* check for existing sockets before creating new ones */
+    if (key->alFd > 0) {
+        close(key->alFd);
+        key->alFd = WC_SOCK_NOTSET;
+    }
+    if (key->rdFd > 0) {
+        close(key->rdFd);
+        key->rdFd = WC_SOCK_NOTSET;
+    }
+
+    /* create new sockets and set the key to use */
+    if ((key->alFd = wc_Afalg_Socket()) < 0) {
+        WOLFSSL_MSG("Unable to create socket");
+        ERROR_OUT(key->alFd);
+    }
+    if ((key->rdFd = wc_Afalg_CreateRead(key->alFd, WC_TYPE_ASYMKEY,
+                    WC_NAME_RSA)) < 0) {
+        WOLFSSL_MSG("Unable to bind and create read/send socket");
+        ERROR_OUT(key->rdFd);
+    }
+    if ((ret = setsockopt(key->alFd, SOL_ALG, ALG_SET_KEY, keyBuf,
+                    keyBufSz)) < 0) {
+        WOLFSSL_MSG("Error setting RSA key");
+        ERROR_OUT(ret);
+    }
+
+    msg.msg_control    = cbuf;
+    msg.msg_controllen = sizeof(cbuf);
+    cmsg = CMSG_FIRSTHDR(&msg);
+    if ((ret = wc_Afalg_SetOp(cmsg, op)) < 0) {
+        ERROR_OUT(ret);
+    }
+
+    /* set flag in IV spot, needed for Xilinx hardware acceleration use */
+    cmsg = CMSG_NXTHDR(&msg, cmsg);
+    if ((ret = wc_Afalg_SetIv(cmsg, (byte*)XILINX_RSA_FLAG,
+                    sizeof(XILINX_RSA_FLAG))) != 0) {
+        ERROR_OUT(ret);
+    }
+
+    /* compose and send msg */
+    XMEMCPY(inBuf, (byte*)in, inLen); /* for alignment */
+    iov.iov_base = inBuf;
+    iov.iov_len  = inLen;
+    msg.msg_iov  = &iov;
+    msg.msg_iovlen = 1;
+    if ((ret = sendmsg(key->rdFd, &msg, 0)) <= 0) {
+        ERROR_OUT(WC_AFALG_SOCK_E);
+    }
+
+    if ((ret = read(key->rdFd, outBuf, inLen)) <= 0) {
+        ERROR_OUT(WC_AFALG_SOCK_E);
+    }
+    XMEMCPY(out, outBuf, ret);
+    *outLen = keyLen;
+
+done:
+    /* clear key data and free buffer */
+    if (keyBuf != NULL) {
+        ForceZero(keyBuf, keyBufSz);
+    }
+    XFREE(keyBuf, key->heap, DYNAMIC_TYPE_KEY);
+
+    if (key->alFd > 0) {
+        close(key->alFd);
+        key->alFd = WC_SOCK_NOTSET;
+    }
+    if (key->rdFd > 0) {
+        close(key->rdFd);
+        key->rdFd = WC_SOCK_NOTSET;
+    }
+
+    return ret;
+}
+
+#else
 static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out,
                           word32* outLen, int type, RsaKey* key, WC_RNG* rng)
 {
 #ifndef WOLFSSL_SP_MATH
 #ifdef WOLFSSL_SMALL_STACK
-    mp_int* tmp = NULL;
+    mp_int* tmp;
 #ifdef WC_RSA_BLINDING
-    mp_int* rnd = NULL;
-    mp_int* rndi = NULL;
+    mp_int* rnd;
+    mp_int* rndi;
 #endif
 #else
     mp_int tmp[1];
@@ -1364,13 +2023,14 @@
 #endif
 #endif
     int    ret = 0;
-    word32 keyLen, len;
+    word32 keyLen = 0;
 #endif
 
 #ifdef WOLFSSL_HAVE_SP_RSA
 #ifndef WOLFSSL_SP_NO_2048
     if (mp_count_bits(&key->n) == 2048) {
         switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
         case RSA_PRIVATE_DECRYPT:
         case RSA_PRIVATE_ENCRYPT:
     #ifdef WC_RSA_BLINDING
@@ -1385,6 +2045,7 @@
             return sp_RsaPrivate_2048(in, inLen, &key->d, &key->p, &key->q,
                                       NULL, NULL, NULL, &key->n, out, outLen);
     #endif
+#endif
         case RSA_PUBLIC_ENCRYPT:
         case RSA_PUBLIC_DECRYPT:
             return sp_RsaPublic_2048(in, inLen, &key->e, &key->n, out, outLen);
@@ -1394,6 +2055,7 @@
 #ifndef WOLFSSL_SP_NO_3072
     if (mp_count_bits(&key->n) == 3072) {
         switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
         case RSA_PRIVATE_DECRYPT:
         case RSA_PRIVATE_ENCRYPT:
     #ifdef WC_RSA_BLINDING
@@ -1408,15 +2070,43 @@
             return sp_RsaPrivate_3072(in, inLen, &key->d, &key->p, &key->q,
                                       NULL, NULL, NULL, &key->n, out, outLen);
     #endif
+#endif
         case RSA_PUBLIC_ENCRYPT:
         case RSA_PUBLIC_DECRYPT:
             return sp_RsaPublic_3072(in, inLen, &key->e, &key->n, out, outLen);
         }
     }
 #endif
+#ifdef WOLFSSL_SP_4096
+    if (mp_count_bits(&key->n) == 4096) {
+        switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+        case RSA_PRIVATE_DECRYPT:
+        case RSA_PRIVATE_ENCRYPT:
+    #ifdef WC_RSA_BLINDING
+            if (rng == NULL)
+                return MISSING_RNG_E;
+    #endif
+    #ifndef RSA_LOW_MEM
+            return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q,
+                                      &key->dP, &key->dQ, &key->u, &key->n,
+                                      out, outLen);
+    #else
+            return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q,
+                                      NULL, NULL, NULL, &key->n, out, outLen);
+    #endif
+#endif
+        case RSA_PUBLIC_ENCRYPT:
+        case RSA_PUBLIC_DECRYPT:
+            return sp_RsaPublic_4096(in, inLen, &key->e, &key->n, out, outLen);
+        }
+    }
+#endif
 #endif /* WOLFSSL_HAVE_SP_RSA */
 
 #ifdef WOLFSSL_SP_MATH
+    (void)rng;
+    WOLFSSL_MSG("SP Key Size Error");
     return WC_KEY_SIZE_E;
 #else
     (void)rng;
@@ -1449,15 +2139,17 @@
     }
 #endif
 
+#ifndef TEST_UNPAD_CONSTANT_TIME
     if (ret == 0 && mp_read_unsigned_bin(tmp, (byte*)in, inLen) != MP_OKAY)
         ret = MP_READ_E;
 
     if (ret == 0) {
         switch(type) {
+    #ifndef WOLFSSL_RSA_PUBLIC_ONLY
         case RSA_PRIVATE_DECRYPT:
         case RSA_PRIVATE_ENCRYPT:
         {
-        #ifdef WC_RSA_BLINDING
+        #if defined(WC_RSA_BLINDING) && !defined(WC_NO_RNG)
             /* blind */
             ret = mp_rand(rnd, get_digit_count(&key->n), rng);
 
@@ -1472,7 +2164,7 @@
             /* tmp = tmp*rnd mod n */
             if (ret == 0 && mp_mulmod(tmp, rnd, &key->n, tmp) != MP_OKAY)
                 ret = MP_MULMOD_E;
-        #endif /* WC_RSA_BLINDING */
+        #endif /* WC_RSA_BLINDING && !WC_NO_RNG */
 
         #ifdef RSA_LOW_MEM      /* half as much memory but twice as slow */
             if (ret == 0 && mp_exptmod(tmp, &key->d, &key->n, tmp) != MP_OKAY)
@@ -1480,7 +2172,7 @@
         #else
             if (ret == 0) {
             #ifdef WOLFSSL_SMALL_STACK
-                mp_int* tmpa = NULL;
+                mp_int* tmpa;
                 mp_int* tmpb = NULL;
             #else
                 mp_int tmpa[1], tmpb[1];
@@ -1488,7 +2180,8 @@
                 int cleara = 0, clearb = 0;
 
             #ifdef WOLFSSL_SMALL_STACK
-                tmpa = XMALLOC(sizeof(mp_int) * 2, key->heap, DYNAMIC_TYPE_RSA);
+                tmpa = (mp_int*)XMALLOC(sizeof(mp_int) * 2,
+                        key->heap, DYNAMIC_TYPE_RSA);
                 if (tmpa != NULL)
                     tmpb = tmpa + 1;
                 else
@@ -1513,7 +2206,7 @@
                 if (ret == 0 && mp_exptmod(tmp, &key->dP, &key->p,
                                                                tmpa) != MP_OKAY)
                     ret = MP_EXPTMOD_E;
-    
+
                 /* tmpb = tmp^dQ mod q */
                 if (ret == 0 && mp_exptmod(tmp, &key->dQ, &key->q,
                                                                tmpb) != MP_OKAY)
@@ -1557,15 +2250,16 @@
 
             break;
         }
+    #endif
         case RSA_PUBLIC_ENCRYPT:
         case RSA_PUBLIC_DECRYPT:
         #ifdef WOLFSSL_XILINX_CRYPT
             ret = wc_RsaFunctionXil(in, inLen, out, outLen, type, key, rng);
         #else
-            if (mp_exptmod(tmp, &key->e, &key->n, tmp) != MP_OKAY)
+            if (mp_exptmod_nct(tmp, &key->e, &key->n, tmp) != MP_OKAY)
                 ret = MP_EXPTMOD_E;
+        #endif
             break;
-        #endif
         default:
             ret = RSA_WRONG_TYPE_E;
             break;
@@ -1578,20 +2272,17 @@
             ret = RSA_BUFFER_E;
     }
     if (ret == 0) {
-        len = mp_unsigned_bin_size(tmp);
-
-        /* pad front w/ zeros to match key length */
-        while (len < keyLen) {
-            *out++ = 0x00;
-            len++;
-        }
-
         *outLen = keyLen;
-
-        /* convert */
-        if (mp_to_unsigned_bin(tmp, out) != MP_OKAY)
+        if (mp_to_unsigned_bin_len(tmp, out, keyLen) != MP_OKAY)
              ret = MP_TO_E;
     }
+#else
+    (void)type;
+    (void)key;
+    (void)keyLen;
+    XMEMCPY(out, in, inLen);
+    *outLen = inLen;
+#endif
 
     mp_clear(tmp);
 #ifdef WOLFSSL_SMALL_STACK
@@ -1609,6 +2300,7 @@
     return ret;
 #endif /* WOLFSSL_SP_MATH */
 }
+#endif
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
 static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
@@ -1633,6 +2325,7 @@
 #endif /* WOLFSSL_ASYNC_CRYPT_TEST */
 
     switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
     case RSA_PRIVATE_DECRYPT:
     case RSA_PRIVATE_ENCRYPT:
     #ifdef HAVE_CAVIUM
@@ -1657,6 +2350,7 @@
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     #endif
         break;
+#endif
 
     case RSA_PUBLIC_ENCRYPT:
     case RSA_PUBLIC_DECRYPT:
@@ -1768,7 +2462,11 @@
     }
 
     /* if async pending then skip cleanup*/
-    if (ret == WC_PENDING_E) {
+    if (ret == WC_PENDING_E
+    #ifdef WC_RSA_NONBLOCK
+        || ret == FP_WOULDBLOCK
+    #endif
+    ) {
         return ret;
     }
 
@@ -1779,6 +2477,100 @@
 }
 #endif /* WC_RSA_DIRECT || WC_RSA_NO_PADDING */
 
+#if defined(WOLFSSL_CRYPTOCELL)
+static int cc310_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
+                            word32 outLen, RsaKey* key)
+{
+    CRYSError_t ret = 0;
+    CRYS_RSAPrimeData_t primeData;
+    int modulusSize = wc_RsaEncryptSize(key);
+
+    /* The out buffer must be at least modulus size bytes long. */
+    if (outLen < modulusSize)
+        return BAD_FUNC_ARG;
+
+    ret = CRYS_RSA_PKCS1v15_Encrypt(&wc_rndState,
+                                    wc_rndGenVectFunc,
+                                    &key->ctx.pubKey,
+                                    &primeData,
+                                    (byte*)in,
+                                    inLen,
+                                    out);
+
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Encrypt failed");
+        return -1;
+    }
+
+    return modulusSize;
+}
+static int cc310_RsaPublicDecrypt(const byte* in, word32 inLen, byte* out,
+                            word32 outLen, RsaKey* key)
+{
+    CRYSError_t ret = 0;
+    CRYS_RSAPrimeData_t primeData;
+    uint16_t actualOutLen = outLen;
+
+    ret = CRYS_RSA_PKCS1v15_Decrypt(&key->ctx.privKey,
+                                    &primeData,
+                                    (byte*)in,
+                                    inLen,
+                                    out,
+                                    &actualOutLen);
+
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Decrypt failed");
+        return -1;
+    }
+    return actualOutLen;
+}
+
+int cc310_RsaSSL_Sign(const byte* in, word32 inLen, byte* out,
+                  word32 outLen, RsaKey* key, CRYS_RSA_HASH_OpMode_t mode)
+{
+    CRYSError_t ret = 0;
+    uint16_t actualOutLen = outLen*sizeof(byte);
+    CRYS_RSAPrivUserContext_t  contextPrivate;
+
+    ret =  CRYS_RSA_PKCS1v15_Sign(&wc_rndState,
+                wc_rndGenVectFunc,
+                &contextPrivate,
+                &key->ctx.privKey,
+                mode,
+                (byte*)in,
+                inLen,
+                out,
+                &actualOutLen);
+
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Sign failed");
+        return -1;
+    }
+    return actualOutLen;
+}
+
+int cc310_RsaSSL_Verify(const byte* in, word32 inLen, byte* sig,
+                               RsaKey* key, CRYS_RSA_HASH_OpMode_t mode)
+{
+    CRYSError_t ret = 0;
+    CRYS_RSAPubUserContext_t contextPub;
+
+    /* verify the signature in the sig pointer */
+    ret =  CRYS_RSA_PKCS1v15_Verify(&contextPub,
+                &key->ctx.pubKey,
+                mode,
+                (byte*)in,
+                inLen,
+                sig);
+
+    if (ret != SA_SILIB_RET_OK){
+        WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Verify failed");
+        return -1;
+    }
+
+    return ret;
+}
+#endif /* WOLFSSL_CRYPTOCELL */
 
 int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
                           word32* outLen, int type, RsaKey* key, WC_RNG* rng)
@@ -1790,22 +2582,24 @@
         return BAD_FUNC_ARG;
     }
 
-#ifdef WOLF_CRYPTO_DEV
+#ifdef WOLF_CRYPTO_CB
     if (key->devId != INVALID_DEVID) {
-        ret = wc_CryptoDev_Rsa(in, inLen, out, outLen, type, key, rng);
-        if (ret != NOT_COMPILED_IN)
+        ret = wc_CryptoCb_Rsa(in, inLen, out, outLen, type, key, rng);
+        if (ret != CRYPTOCB_UNAVAILABLE)
             return ret;
+        /* fall-through when unavailable */
         ret = 0; /* reset error code and try using software */
     }
 #endif
 
+#ifndef TEST_UNPAD_CONSTANT_TIME
 #ifndef NO_RSA_BOUNDS_CHECK
     if (type == RSA_PRIVATE_DECRYPT &&
         key->state == RSA_STATE_DECRYPT_EXPTMOD) {
 
         /* Check that 1 < in < n-1. (Requirement of 800-56B.) */
 #ifdef WOLFSSL_SMALL_STACK
-        mp_int* c = NULL;
+        mp_int* c;
 #else
         mp_int c[1];
 #endif
@@ -1847,6 +2641,7 @@
             return ret;
     }
 #endif /* NO_RSA_BOUNDS_CHECK */
+#endif
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
     if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
@@ -1855,12 +2650,22 @@
     }
     else
 #endif
+#ifdef WC_RSA_NONBLOCK
+    if (key->nb) {
+        ret = wc_RsaFunctionNonBlock(in, inLen, out, outLen, type, key);
+    }
+    else
+#endif
     {
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     }
 
     /* handle error */
-    if (ret < 0 && ret != WC_PENDING_E) {
+    if (ret < 0 && ret != WC_PENDING_E
+    #ifdef WC_RSA_NONBLOCK
+        && ret != FP_WOULDBLOCK
+    #endif
+    ) {
         if (ret == MP_EXPTMOD_E) {
             /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */
             WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
@@ -1874,6 +2679,7 @@
 }
 
 
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 /* Internal Wrappers */
 /* Gives the option of choosing padding type
    in : input to be encrypted
@@ -1927,8 +2733,6 @@
     switch (key->state) {
     case RSA_STATE_NONE:
     case RSA_STATE_ENCRYPT_PAD:
-        key->state = RSA_STATE_ENCRYPT_PAD;
-
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
             defined(HAVE_CAVIUM)
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
@@ -1947,8 +2751,20 @@
                 return NitroxRsaSSL_Sign(in, inLen, out, outLen, key);
             }
         }
-    #endif
-
+    #elif defined(WOLFSSL_CRYPTOCELL)
+        if (rsa_type == RSA_PUBLIC_ENCRYPT &&
+                                            pad_value == RSA_BLOCK_TYPE_2) {
+
+            return cc310_RsaPublicEncrypt(in, inLen, out, outLen, key);
+        }
+        else if (rsa_type == RSA_PRIVATE_ENCRYPT &&
+                                         pad_value == RSA_BLOCK_TYPE_1) {
+         return cc310_RsaSSL_Sign(in, inLen, out, outLen, key,
+                                  cc310_hashModeRSA(hash, 0));
+        }
+    #endif /* WOLFSSL_CRYPTOCELL */
+
+        key->state = RSA_STATE_ENCRYPT_PAD;
         ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng, pad_type, hash,
                            mgf, label, labelSz, saltLen, mp_count_bits(&key->n),
                            key->heap);
@@ -1957,7 +2773,6 @@
         }
 
         key->state = RSA_STATE_ENCRYPT_EXPTMOD;
-
         FALL_THROUGH;
 
     case RSA_STATE_ENCRYPT_EXPTMOD:
@@ -1984,7 +2799,11 @@
     }
 
     /* if async pending then return and skip done cleanup below */
-    if (ret == WC_PENDING_E) {
+    if (ret == WC_PENDING_E
+    #ifdef WC_RSA_NONBLOCK
+        || ret == FP_WOULDBLOCK
+    #endif
+    ) {
         return ret;
     }
 
@@ -1994,6 +2813,8 @@
     return ret;
 }
 
+#endif
+
 /* Gives the option of choosing padding type
    in : input to be decrypted
    inLen: length of input buffer
@@ -2020,6 +2841,7 @@
                             WC_RNG* rng)
 {
     int ret = RSA_WRONG_TYPE_E;
+    byte* pad = NULL;
 
     if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
         return BAD_FUNC_ARG;
@@ -2027,8 +2849,6 @@
 
     switch (key->state) {
     case RSA_STATE_NONE:
-    case RSA_STATE_DECRYPT_EXPTMOD:
-        key->state = RSA_STATE_DECRYPT_EXPTMOD;
         key->dataLen = inLen;
 
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
@@ -2036,12 +2856,14 @@
         /* Async operations that include padding */
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
                                                    pad_type != WC_RSA_PSS_PAD) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
             if (rsa_type == RSA_PRIVATE_DECRYPT &&
                                                 pad_value == RSA_BLOCK_TYPE_2) {
                 key->state = RSA_STATE_DECRYPT_RES;
                 key->data = NULL;
                 return NitroxRsaPrivateDecrypt(in, inLen, out, &key->dataLen,
                                                key);
+#endif
             }
             else if (rsa_type == RSA_PUBLIC_DECRYPT &&
                                                 pad_value == RSA_BLOCK_TYPE_1) {
@@ -2050,8 +2872,23 @@
                 return NitroxRsaSSL_Verify(in, inLen, out, &key->dataLen, key);
             }
         }
-    #endif
-
+    #elif defined(WOLFSSL_CRYPTOCELL)
+        if (rsa_type == RSA_PRIVATE_DECRYPT &&
+                                            pad_value == RSA_BLOCK_TYPE_2) {
+            ret = cc310_RsaPublicDecrypt(in, inLen, out, outLen, key);
+            if (outPtr != NULL)
+                *outPtr = out; /* for inline */
+            return ret;
+        }
+        else if (rsa_type == RSA_PUBLIC_DECRYPT &&
+                                            pad_value == RSA_BLOCK_TYPE_1) {
+            return cc310_RsaSSL_Verify(in, inLen, out, key,
+                                       cc310_hashModeRSA(hash, 0));
+        }
+    #endif /* WOLFSSL_CRYPTOCELL */
+
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
         /* verify the tmp ptr is NULL, otherwise indicates bad state */
         if (key->data != NULL) {
             ret = BAD_STATE_E;
@@ -2060,7 +2897,8 @@
 
         /* if not doing this inline then allocate a buffer for it */
         if (outPtr == NULL) {
-            key->data = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+            key->data = (byte*)XMALLOC(inLen, key->heap,
+                                                      DYNAMIC_TYPE_WOLF_BIGINT);
             key->dataIsAlloc = 1;
             if (key->data == NULL) {
                 ret = MEMORY_E;
@@ -2071,8 +2909,18 @@
         else {
             key->data = out;
         }
-        ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen, rsa_type,
-                                                                      key, rng);
+#endif
+
+        key->state = RSA_STATE_DECRYPT_EXPTMOD;
+        FALL_THROUGH;
+
+    case RSA_STATE_DECRYPT_EXPTMOD:
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+        ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen,
+                                                            rsa_type, key, rng);
+#else
+        ret = wc_RsaFunction(in, inLen, out, &key->dataLen, rsa_type, key, rng);
+#endif
 
         if (ret >= 0 || ret == WC_PENDING_E) {
             key->state = RSA_STATE_DECRYPT_UNPAD;
@@ -2084,42 +2932,71 @@
         FALL_THROUGH;
 
     case RSA_STATE_DECRYPT_UNPAD:
-    {
-        byte* pad = NULL;
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
         ret = wc_RsaUnPad_ex(key->data, key->dataLen, &pad, pad_value, pad_type,
                              hash, mgf, label, labelSz, saltLen,
                              mp_count_bits(&key->n), key->heap);
-        if (ret > 0 && ret <= (int)outLen && pad != NULL) {
+#else
+        ret = wc_RsaUnPad_ex(out, key->dataLen, &pad, pad_value, pad_type, hash,
+                             mgf, label, labelSz, saltLen,
+                             mp_count_bits(&key->n), key->heap);
+#endif
+        if (rsa_type == RSA_PUBLIC_DECRYPT && ret > (int)outLen)
+            ret = RSA_BUFFER_E;
+        else if (ret >= 0 && pad != NULL) {
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+            signed char c;
+#endif
+
             /* only copy output if not inline */
             if (outPtr == NULL) {
-                XMEMCPY(out, pad, ret);
-            }
-            else {
-                *outPtr = pad;
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+                if (rsa_type == RSA_PRIVATE_DECRYPT) {
+                    word32 i, j;
+                    int start = (int)((size_t)pad - (size_t)key->data);
+
+                    for (i = 0, j = 0; j < key->dataLen; j++) {
+                        out[i] = key->data[j];
+                        c  = ctMaskGTE(j, start);
+                        c &= ctMaskLT(i, outLen);
+                        /* 0 - no add, -1 add */
+                        i += (word32)((byte)(-c));
+                    }
+                }
+                else
+#endif
+                {
+                    XMEMCPY(out, pad, ret);
+                }
             }
-        }
-        else if (ret >= 0) {
-            ret = RSA_BUFFER_E;
-        }
-        if (ret < 0) {
-            break;
+            else
+                *outPtr = pad;
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY)
+            ret = ctMaskSelInt(ctMaskLTE(ret, outLen), ret, RSA_BUFFER_E);
+            ret = ctMaskSelInt(ctMaskNotEq(ret, 0), ret, RSA_BUFFER_E);
+#else
+            if (outLen < (word32)ret)
+                ret = RSA_BUFFER_E;
+#endif
         }
 
         key->state = RSA_STATE_DECRYPT_RES;
-
         FALL_THROUGH;
-    }
+
     case RSA_STATE_DECRYPT_RES:
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
             defined(HAVE_CAVIUM)
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
                                                    pad_type != WC_RSA_PSS_PAD) {
-            /* convert result */
-            byte* dataLen = (byte*)&key->dataLen;
-            ret = (dataLen[0] << 8) | (dataLen[1]);
-
-            if (outPtr)
-                *outPtr = in;
+            if (ret > 0) {
+                /* convert result */
+                byte* dataLen = (byte*)&key->dataLen;
+                ret = (dataLen[0] << 8) | (dataLen[1]);
+
+                if (outPtr)
+                    *outPtr = in;
+            }
         }
     #endif
         break;
@@ -2130,7 +3007,11 @@
     }
 
     /* if async pending then return and skip done cleanup below */
-    if (ret == WC_PENDING_E) {
+    if (ret == WC_PENDING_E
+    #ifdef WC_RSA_NONBLOCK
+        || ret == FP_WOULDBLOCK
+    #endif
+    ) {
         return ret;
     }
 
@@ -2141,6 +3022,7 @@
 }
 
 
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 /* Public RSA Functions */
 int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen,
                                                      RsaKey* key, WC_RNG* rng)
@@ -2161,13 +3043,17 @@
         RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng);
 }
 #endif /* WC_NO_RSA_OAEP */
-
-
+#endif
+
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
 int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
         RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD,
@@ -2180,9 +3066,11 @@
                                   RsaKey* key, int type, enum wc_HashType hash,
                                   int mgf, byte* label, word32 labelSz)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
         RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash,
@@ -2194,9 +3082,11 @@
 int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
                                                  word32 outLen, RsaKey* key)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key,
         RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD,
@@ -2209,49 +3099,65 @@
                             enum wc_HashType hash, int mgf, byte* label,
                             word32 labelSz)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key,
         RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash, mgf, label,
         labelSz, 0, rng);
 }
 #endif /* WC_NO_RSA_OAEP || WC_RSA_NO_PADDING */
-
-
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if !defined(WOLFSSL_CRYPTOCELL)
 int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
         RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD,
         WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
 }
-
+#endif
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out, word32 outLen,
                                                                  RsaKey* key)
 {
+    return wc_RsaSSL_Verify_ex(in, inLen, out, outLen, key , WC_RSA_PKCSV15_PAD);
+}
+
+int  wc_RsaSSL_Verify_ex(const byte* in, word32 inLen, byte* out, word32 outLen,
+                         RsaKey* key, int pad_type)
+{
     WC_RNG* rng;
 
     if (key == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    rng = NULL;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
+
     return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key,
-        RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD,
+        RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, pad_type,
         WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
 }
+#endif
 
 #ifdef WC_RSA_PSS
 /* Verify the message signed with RSA-PSS.
- * The input buffer is reused for the ouput buffer.
+ * The input buffer is reused for the output buffer.
  * Salt length is equal to hash length.
  *
  * in     Buffer holding encrypted data.
@@ -2265,11 +3171,17 @@
 int wc_RsaPSS_VerifyInline(byte* in, word32 inLen, byte** out,
                            enum wc_HashType hash, int mgf, RsaKey* key)
 {
-    return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf, -1, key);
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+    return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf,
+                                                 RSA_PSS_SALT_LEN_DEFAULT, key);
+#else
+    return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf,
+                                                RSA_PSS_SALT_LEN_DISCOVER, key);
+#endif
 }
 
 /* Verify the message signed with RSA-PSS.
- * The input buffer is reused for the ouput buffer.
+ * The input buffer is reused for the output buffer.
  *
  * in       Buffer holding encrypted data.
  * inLen    Length of data in buffer.
@@ -2277,17 +3189,20 @@
  * hash     Hash algorithm.
  * mgf      Mask generation function.
  * key      Public RSA key.
- * saltLen  Length of salt used. -1 indicates salt length is the same as the
- *          hash length.
+ * saltLen  Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ *          length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ *          indicates salt length is determined from the data.
  * returns the length of the PSS data on success and negative indicates failure.
  */
 int wc_RsaPSS_VerifyInline_ex(byte* in, word32 inLen, byte** out,
                               enum wc_HashType hash, int mgf, int saltLen,
                               RsaKey* key)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
         RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD,
@@ -2308,7 +3223,13 @@
 int wc_RsaPSS_Verify(byte* in, word32 inLen, byte* out, word32 outLen,
                      enum wc_HashType hash, int mgf, RsaKey* key)
 {
-    return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf, -1, key);
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+    return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf,
+                                                 RSA_PSS_SALT_LEN_DEFAULT, key);
+#else
+    return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf,
+                                                RSA_PSS_SALT_LEN_DISCOVER, key);
+#endif
 }
 
 /* Verify the message signed with RSA-PSS.
@@ -2319,17 +3240,20 @@
  * hash     Hash algorithm.
  * mgf      Mask generation function.
  * key      Public RSA key.
- * saltLen  Length of salt used. -1 indicates salt length is the same as the
- *          hash length.
+ * saltLen  Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ *          length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ *          indicates salt length is determined from the data.
  * returns the length of the PSS data on success and negative indicates failure.
  */
 int wc_RsaPSS_Verify_ex(byte* in, word32 inLen, byte* out, word32 outLen,
                         enum wc_HashType hash, int mgf, int saltLen,
                         RsaKey* key)
 {
-    WC_RNG* rng = NULL;
+    WC_RNG* rng;
 #ifdef WC_RSA_BLINDING
     rng = key->rng;
+#else
+    rng = NULL;
 #endif
     return RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key,
         RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD,
@@ -2362,8 +3286,9 @@
  * sig       Buffer holding PSS data.
  * sigSz     Size of PSS data.
  * hashType  Hash algorithm.
- * saltLen   Length of salt used. -1 indicates salt length is the same as the
- *           hash length.
+ * saltLen   Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ *           length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ *           indicates salt length is determined from the data.
  * returns BAD_PADDING_E when the PSS data is invalid, BAD_FUNC_ARG when
  * NULL is passed in to in or sig or inSz is not the same as the hash
  * algorithm length and 0 on success.
@@ -2373,33 +3298,68 @@
                               int saltLen, int bits)
 {
     int ret = 0;
+#ifndef WOLFSSL_PSS_LONG_SALT
     byte sigCheck[WC_MAX_DIGEST_SIZE*2 + RSA_PSS_PAD_SZ];
+#else
+    byte *sigCheck = NULL;
+#endif
 
     (void)bits;
 
     if (in == NULL || sig == NULL ||
-                      inSz != (word32)wc_HashGetDigestSize(hashType))
+                               inSz != (word32)wc_HashGetDigestSize(hashType)) {
         ret = BAD_FUNC_ARG;
+    }
 
     if (ret == 0) {
-        if (saltLen == -1) {
+        if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) {
             saltLen = inSz;
             #ifdef WOLFSSL_SHA512
                 /* See FIPS 186-4 section 5.5 item (e). */
-                if (bits == 1024 && inSz == WC_SHA512_DIGEST_SIZE)
+                if (bits == 1024 && inSz == WC_SHA512_DIGEST_SIZE) {
                     saltLen = RSA_PSS_SALT_MAX_SZ;
+                }
             #endif
         }
-        else if (saltLen < -1 || (word32)saltLen > inSz)
+#ifndef WOLFSSL_PSS_LONG_SALT
+        else if ((word32)saltLen > inSz) {
+            ret = PSS_SALTLEN_E;
+        }
+#endif
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+        else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) {
             ret = PSS_SALTLEN_E;
+        }
+#else
+        else if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) {
+            saltLen = sigSz - inSz;
+            if (saltLen < 0) {
+                ret = PSS_SALTLEN_E;
+            }
+        }
+        else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) {
+            ret = PSS_SALTLEN_E;
+        }
+#endif
     }
 
     /* Sig = Salt | Exp Hash */
     if (ret == 0) {
-        if (sigSz != inSz + saltLen)
-            ret = BAD_PADDING_E;
+        if (sigSz != inSz + saltLen) {
+            ret = PSS_SALTLEN_E;
+        }
     }
 
+#ifdef WOLFSSL_PSS_LONG_SALT
+    if (ret == 0) {
+        sigCheck = (byte*)XMALLOC(RSA_PSS_PAD_SZ + inSz + saltLen, NULL,
+                                                       DYNAMIC_TYPE_RSA_BUFFER);
+        if (sigCheck == NULL) {
+            ret = MEMORY_E;
+        }
+    }
+#endif
+
     /* Exp Hash = HASH(8 * 0x00 | Message Hash | Salt) */
     if (ret == 0) {
         XMEMSET(sigCheck, 0, RSA_PSS_PAD_SZ);
@@ -2415,12 +3375,17 @@
         }
     }
 
+#ifdef WOLFSSL_PSS_LONG_SALT
+    if (sigCheck != NULL) {
+        XFREE(sigCheck, NULL, DYNAMIC_TYPE_RSA_BUFFER);
+    }
+#endif
     return ret;
 }
 
 
 /* Verify the message signed with RSA-PSS.
- * The input buffer is reused for the ouput buffer.
+ * The input buffer is reused for the output buffer.
  * Salt length is equal to hash length.
  *
  * in     Buffer holding encrypted data.
@@ -2512,6 +3477,7 @@
 
 #endif
 
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
 int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
                                                    RsaKey* key, WC_RNG* rng)
 {
@@ -2538,7 +3504,8 @@
 int wc_RsaPSS_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
                        enum wc_HashType hash, int mgf, RsaKey* key, WC_RNG* rng)
 {
-    return wc_RsaPSS_Sign_ex(in, inLen, out, outLen, hash, mgf, -1, key, rng);
+    return wc_RsaPSS_Sign_ex(in, inLen, out, outLen, hash, mgf,
+                                            RSA_PSS_SALT_LEN_DEFAULT, key, rng);
 }
 
 /* Sign the hash of a message using RSA-PSS.
@@ -2549,8 +3516,9 @@
  * outLen   Size of buffer to write to.
  * hash     Hash algorithm.
  * mgf      Mask generation function.
- * saltLen  Length of salt used. -1 indicates salt length is the same as the
- *          hash length.
+ * saltLen  Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ *          length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ *          indicates salt length is determined from the data.
  * key      Public RSA key.
  * rng      Random number generator.
  * returns the length of the encrypted signature on success, a negative value
@@ -2565,7 +3533,10 @@
         hash, mgf, NULL, 0, saltLen, rng);
 }
 #endif
-
+#endif
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(WOLFSSL_SP_MATH) || \
+                                                             defined(WC_RSA_PSS)
 int wc_RsaEncryptSize(RsaKey* key)
 {
     int ret;
@@ -2574,9 +3545,9 @@
         return BAD_FUNC_ARG;
     }
 
-    ret =  mp_unsigned_bin_size(&key->n);
-
-#ifdef WOLF_CRYPTO_DEV
+    ret = mp_unsigned_bin_size(&key->n);
+
+#ifdef WOLF_CRYPTO_CB
     if (ret == 0 && key->devId != INVALID_DEVID) {
         ret = 2048/8; /* hardware handles, use 2048-bit as default */
     }
@@ -2584,8 +3555,9 @@
 
     return ret;
 }
-
-
+#endif
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 /* flatten RsaKey structure into individual elements (e, n) */
 int wc_RsaFlattenPublicKey(RsaKey* key, byte* e, word32* eSz, byte* n,
                                                                    word32* nSz)
@@ -2614,8 +3586,12 @@
 
     return 0;
 }
-
-
+#endif
+
+#endif /* HAVE_FIPS */
+
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
 static int RsaGetValue(mp_int* in, byte* out, word32* outSz)
 {
     word32 sz;
@@ -2651,15 +3627,29 @@
         ret = RsaGetValue(&key->e, e, eSz);
     if (ret == 0)
         ret = RsaGetValue(&key->n, n, nSz);
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
     if (ret == 0)
         ret = RsaGetValue(&key->d, d, dSz);
     if (ret == 0)
         ret = RsaGetValue(&key->p, p, pSz);
     if (ret == 0)
         ret = RsaGetValue(&key->q, q, qSz);
+#else
+    /* no private parts to key */
+    if (d == NULL || p == NULL || q == NULL || dSz == NULL || pSz == NULL
+            || qSz == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        *dSz = 0;
+        *pSz = 0;
+        *qSz = 0;
+    }
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
 
     return ret;
 }
+#endif
 
 
 #ifdef WOLFSSL_KEY_GEN
@@ -2769,8 +3759,8 @@
 }
 
 
-static int wc_CheckProbablePrime_ex(mp_int* p, mp_int* q, mp_int* e, int nlen,
-                                    int* isPrime)
+static int _CheckProbablePrime(mp_int* p, mp_int* q, mp_int* e, int nlen,
+                                    int* isPrime, WC_RNG* rng)
 {
     int ret;
     mp_int tmp1, tmp2;
@@ -2811,10 +3801,17 @@
     ret = mp_cmp_d(&tmp2, 1);
     if (ret != MP_EQ) goto exit; /* e divides p-1 */
 
-    /* 4.5.1,5.6.1 - Check primality of p with 8 iterations */
-    ret = mp_prime_is_prime(prime, 8, isPrime);
-        /* Performs some divides by a table of primes, and then does M-R,
-         * it sets isPrime as a side-effect. */
+    /* 4.5.1,5.6.1 - Check primality of p with 8 rounds of M-R.
+     * mp_prime_is_prime_ex() performs test divisions against the first 256
+     * prime numbers. After that it performs 8 rounds of M-R using random
+     * bases between 2 and n-2.
+     * mp_prime_is_prime() performs the same test divisions and then does
+     * M-R with the first 8 primes. Both functions set isPrime as a
+     * side-effect. */
+    if (rng != NULL)
+        ret = mp_prime_is_prime_ex(prime, 8, isPrime, rng);
+    else
+        ret = mp_prime_is_prime(prime, 8, isPrime);
     if (ret != MP_OKAY) goto notOkay;
 
 exit:
@@ -2826,11 +3823,10 @@
 }
 
 
-
-int wc_CheckProbablePrime(const byte* pRaw, word32 pRawSz,
+int wc_CheckProbablePrime_ex(const byte* pRaw, word32 pRawSz,
                           const byte* qRaw, word32 qRawSz,
                           const byte* eRaw, word32 eRawSz,
-                          int nlen, int* isPrime)
+                          int nlen, int* isPrime, WC_RNG* rng)
 {
     mp_int p, q, e;
     mp_int* Q = NULL;
@@ -2863,7 +3859,7 @@
         ret = mp_read_unsigned_bin(&e, eRaw, eRawSz);
 
     if (ret == MP_OKAY)
-        ret = wc_CheckProbablePrime_ex(&p, Q, &e, nlen, isPrime);
+        ret = _CheckProbablePrime(&p, Q, &e, nlen, isPrime, rng);
 
     ret = (ret == MP_OKAY) ? 0 : PRIME_GEN_E;
 
@@ -2875,9 +3871,21 @@
 }
 
 
+int wc_CheckProbablePrime(const byte* pRaw, word32 pRawSz,
+                          const byte* qRaw, word32 qRawSz,
+                          const byte* eRaw, word32 eRawSz,
+                          int nlen, int* isPrime)
+{
+    return wc_CheckProbablePrime_ex(pRaw, pRawSz, qRaw, qRawSz,
+                          eRaw, eRawSz, nlen, isPrime, NULL);
+}
+
+#if !defined(HAVE_FIPS) || (defined(HAVE_FIPS) && \
+        defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
 /* Make an RSA key for size bits, with e specified, 65537 is a good e */
 int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
 {
+#ifndef WC_NO_RNG
     mp_int p, q, tmp1, tmp2, tmp3;
     int err, i, failCount, primeSz, isPrime = 0;
     byte* buf = NULL;
@@ -2891,12 +3899,28 @@
     if (e < 3 || (e & 1) == 0)
         return BAD_FUNC_ARG;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+#if defined(WOLFSSL_CRYPTOCELL)
+
+    return cc310_RSA_GenerateKeyPair(key, size, e);
+
+#endif /*WOLFSSL_CRYPTOCELL*/
+
+#ifdef WOLF_CRYPTO_CB
+    if (key->devId != INVALID_DEVID) {
+        int ret = wc_CryptoCb_MakeRsaKey(key, size, e, rng);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        /* fall-through when unavailable */
+    }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+    defined(WC_ASYNC_ENABLE_RSA_KEYGEN)
     if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
     #ifdef HAVE_CAVIUM
         /* TODO: Not implemented */
     #elif defined(HAVE_INTEL_QA)
-        /* TODO: Not implemented */
+        return IntelQaRsaKeyGen(&key->asyncDev, key, size, e, rng);
     #else
         if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_RSA_MAKE)) {
             WC_ASYNC_TEST* testDev = &key->asyncDev.test;
@@ -2939,7 +3963,6 @@
 #endif
             /* generate value */
             err = wc_RNG_GenerateBlock(rng, buf, primeSz);
-
             if (err == 0) {
                 /* prime lower bound has the MSB set, set it in candidate */
                 buf[0] |= 0x80;
@@ -2950,9 +3973,9 @@
             }
 
             if (err == MP_OKAY)
-                err = wc_CheckProbablePrime_ex(&p, NULL, &tmp3, size, &isPrime);
-
-#ifdef WOLFSSL_FIPS
+                err = _CheckProbablePrime(&p, NULL, &tmp3, size, &isPrime, rng);
+
+#ifdef HAVE_FIPS
             i++;
 #else
             /* Keep the old retry behavior in non-FIPS build. */
@@ -2975,7 +3998,6 @@
 #endif
             /* generate value */
             err = wc_RNG_GenerateBlock(rng, buf, primeSz);
-
             if (err == 0) {
                 /* prime lower bound has the MSB set, set it in candidate */
                 buf[0] |= 0x80;
@@ -2986,9 +4008,9 @@
             }
 
             if (err == MP_OKAY)
-                err = wc_CheckProbablePrime_ex(&p, &q, &tmp3, size, &isPrime);
-
-#ifdef WOLFSSL_FIPS
+                err = _CheckProbablePrime(&p, &q, &tmp3, size, &isPrime, rng);
+
+#ifdef HAVE_FIPS
             i++;
 #else
             /* Keep the old retry behavior in non-FIPS build. */
@@ -3005,46 +4027,101 @@
         XFREE(buf, key->heap, DYNAMIC_TYPE_RSA);
     }
 
+    if (err == MP_OKAY && mp_cmp(&p, &q) < 0) {
+        err = mp_copy(&p, &tmp1);
+        if (err == MP_OKAY)
+            err = mp_copy(&q, &p);
+        if (err == MP_OKAY)
+            mp_copy(&tmp1, &q);
+    }
+
+    /* Setup RsaKey buffers */
     if (err == MP_OKAY)
         err = mp_init_multi(&key->n, &key->e, &key->d, &key->p, &key->q, NULL);
-
     if (err == MP_OKAY)
         err = mp_init_multi(&key->dP, &key->dQ, &key->u, NULL, NULL, NULL);
 
-    if (err == MP_OKAY)
-        err = mp_sub_d(&p, 1, &tmp1);  /* tmp1 = p-1 */
-
-    if (err == MP_OKAY)
-        err = mp_sub_d(&q, 1, &tmp2);  /* tmp2 = q-1 */
-
+    /* Software Key Calculation */
+    if (err == MP_OKAY)                /* tmp1 = p-1 */
+        err = mp_sub_d(&p, 1, &tmp1);
+    if (err == MP_OKAY)                /* tmp2 = q-1 */
+        err = mp_sub_d(&q, 1, &tmp2);
+#ifdef WC_RSA_BLINDING
+    if (err == MP_OKAY)                /* tmp3 = order of n */
+        err = mp_mul(&tmp1, &tmp2, &tmp3);
+#else
+    if (err == MP_OKAY)                /* tmp3 = lcm(p-1, q-1), last loop */
+        err = mp_lcm(&tmp1, &tmp2, &tmp3);
+#endif
+    /* make key */
+    if (err == MP_OKAY)                /* key->e = e */
+        err = mp_set_int(&key->e, (mp_digit)e);
+#ifdef WC_RSA_BLINDING
+    /* Blind the inverse operation with a value that is invertable */
+    if (err == MP_OKAY) {
+        do {
+            err = mp_rand(&key->p, get_digit_count(&tmp3), rng);
+            if (err == MP_OKAY)
+                err = mp_set_bit(&key->p, 0);
+            if (err == MP_OKAY)
+                err = mp_set_bit(&key->p, size - 1);
+            if (err == MP_OKAY)
+                err = mp_gcd(&key->p, &tmp3, &key->q);
+        }
+        while ((err == MP_OKAY) && !mp_isone(&key->q));
+    }
     if (err == MP_OKAY)
-        err = mp_lcm(&tmp1, &tmp2, &tmp3);  /* tmp3 = lcm(p-1, q-1),last loop */
-
-    /* make key */
-    if (err == MP_OKAY)
-        err = mp_set_int(&key->e, (mp_digit)e);  /* key->e = e */
-
+        err = mp_mul_d(&key->p, (mp_digit)e, &key->e);
+#endif
     if (err == MP_OKAY)                /* key->d = 1/e mod lcm(p-1, q-1) */
         err = mp_invmod(&key->e, &tmp3, &key->d);
-
+#ifdef WC_RSA_BLINDING
+    /* Take off blinding from d and reset e */
     if (err == MP_OKAY)
-        err = mp_mul(&p, &q, &key->n);  /* key->n = pq */
-
+        err = mp_mulmod(&key->d, &key->p, &tmp3, &key->d);
     if (err == MP_OKAY)
-        err = mp_mod(&key->d, &tmp1, &key->dP); /* key->dP = d mod(p-1) */
-
+        err = mp_set_int(&key->e, (mp_digit)e);
+#endif
+    if (err == MP_OKAY)                /* key->n = pq */
+        err = mp_mul(&p, &q, &key->n);
+    if (err == MP_OKAY)                /* key->dP = d mod(p-1) */
+        err = mp_mod(&key->d, &tmp1, &key->dP);
+    if (err == MP_OKAY)                /* key->dQ = d mod(q-1) */
+        err = mp_mod(&key->d, &tmp2, &key->dQ);
+#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME
+    if (err == MP_OKAY)                /* key->u = 1/q mod p */
+        err = mp_invmod(&q, &p, &key->u);
+#else
     if (err == MP_OKAY)
-        err = mp_mod(&key->d, &tmp2, &key->dQ); /* key->dQ = d mod(q-1) */
-
-    if (err == MP_OKAY)
-        err = mp_invmod(&q, &p, &key->u); /* key->u = 1/q mod p */
-
+        err = mp_sub_d(&p, 2, &tmp3);
+    if (err == MP_OKAY)                /* key->u = 1/q mod p = q^p-2 mod p */
+        err = mp_exptmod(&q, &tmp3 , &p, &key->u);
+#endif
     if (err == MP_OKAY)
         err = mp_copy(&p, &key->p);
-
     if (err == MP_OKAY)
         err = mp_copy(&q, &key->q);
 
+#ifdef HAVE_WOLF_BIGINT
+    /* make sure raw unsigned bin version is available */
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->n, &key->n.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->e, &key->e.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->d, &key->d.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->p, &key->p.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->q, &key->q.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->dP, &key->dP.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->dQ, &key->dQ.raw);
+    if (err == MP_OKAY)
+         err = wc_mp_to_bigint(&key->u, &key->u.raw);
+#endif
+
     if (err == MP_OKAY)
         key->type = RSA_PRIVATE;
 
@@ -3054,28 +4131,32 @@
     mp_clear(&p);
     mp_clear(&q);
 
+#if defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_RSA_KEY_CHECK)
     /* Perform the pair-wise consistency test on the new key. */
     if (err == 0)
         err = wc_CheckRsaKey(key);
+#endif
 
     if (err != 0) {
         wc_FreeRsaKey(key);
         return err;
     }
 
-#ifdef WOLFSSL_XILINX_CRYPT
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL)
     if (wc_InitRsaHw(key) != 0) {
         return BAD_STATE_E;
     }
 #endif
-
     return 0;
+#else
+    return NOT_COMPILED_IN;
+#endif
 }
+#endif /* !FIPS || FIPS_VER >= 2 */
 #endif /* WOLFSSL_KEY_GEN */
 
 
 #ifdef WC_RSA_BLINDING
-
 int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng)
 {
     if (key == NULL)
@@ -3085,12 +4166,37 @@
 
     return 0;
 }
-
 #endif /* WC_RSA_BLINDING */
 
-
-#undef ERROR_OUT
-
-#endif /* HAVE_FIPS */
+#ifdef WC_RSA_NONBLOCK
+int wc_RsaSetNonBlock(RsaKey* key, RsaNb* nb)
+{
+    if (key == NULL)
+        return BAD_FUNC_ARG;
+
+    if (nb) {
+        XMEMSET(nb, 0, sizeof(RsaNb));
+    }
+
+    /* Allow nb == NULL to clear non-block mode */
+    key->nb = nb;
+
+    return 0;
+}
+#ifdef WC_RSA_NONBLOCK_TIME
+int wc_RsaSetNonBlockTime(RsaKey* key, word32 maxBlockUs, word32 cpuMHz)
+{
+    if (key == NULL || key->nb == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* calculate maximum number of instructions to block */
+    key->nb->exptmod.maxBlockInst = cpuMHz * maxBlockUs;
+
+    return 0;
+}
+#endif /* WC_RSA_NONBLOCK_TIME */
+#endif /* WC_RSA_NONBLOCK */
+
 #endif /* NO_RSA */
 
--- a/wolfcrypt/src/sha.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sha.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,6 +42,11 @@
 
 #include <wolfssl/wolfcrypt/sha.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
+
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
 
 /* fips wrapper calls, user can call direct */
 #if defined(HAVE_FIPS) && \
@@ -200,7 +205,14 @@
     #endif
 
     #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */
-    #define XTRANSFORM(S,B)   Transform((S),(B))
+
+    #define XTRANSFORM(S,B)       Transform((S),(B))
+    #define XTRANSFORM_LEN(S,B,L) Transform_Len((S),(B),(L))
+
+    #ifndef WC_HASH_DATA_ALIGNMENT
+        /* these hardware API's require 4 byte (word32) alignment */
+        #define WC_HASH_DATA_ALIGNMENT 4
+    #endif
 
     static int InitSha(wc_Sha* sha)
     {
@@ -223,15 +235,50 @@
         return ret;
     }
 
-    static int Transform(wc_Sha* sha, byte* data)
+    static int Transform(wc_Sha* sha, const byte* data)
+    {
+        int ret = wolfSSL_CryptHwMutexLock();
+        if (ret == 0) {
+    #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+            cau_sha1_hash_n((byte*)data, 1, sha->digest);
+    #else
+            MMCAU_SHA1_HashN((byte*)data, 1, (uint32_t*)sha->digest);
+    #endif
+            wolfSSL_CryptHwMutexUnLock();
+        }
+        return ret;
+    }
+
+    static int Transform_Len(wc_Sha* sha, const byte* data, word32 len)
     {
         int ret = wolfSSL_CryptHwMutexLock();
-        if(ret == 0) {
+        if (ret == 0) {
+        #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
+            if ((size_t)data % WC_HASH_DATA_ALIGNMENT) {
+                /* data pointer is NOT aligned,
+                 * so copy and perform one block at a time */
+                byte* local = (byte*)sha->buffer;
+                while (len >= WC_SHA_BLOCK_SIZE) {
+                    XMEMCPY(local, data, WC_SHA_BLOCK_SIZE);
+                #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+                    cau_sha1_hash_n(local, 1, sha->digest);
+                #else
+                    MMCAU_SHA1_HashN(local, 1, sha->digest);
+                #endif
+                    data += WC_SHA_BLOCK_SIZE;
+                    len  -= WC_SHA_BLOCK_SIZE;
+                }
+            }
+            else
+        #endif
+            {
     #ifdef FREESCALE_MMCAU_CLASSIC_SHA
-            cau_sha1_hash_n(data, 1, sha->digest);
+            cau_sha1_hash_n((byte*)data, len/WC_SHA_BLOCK_SIZE, sha->digest);
     #else
-            MMCAU_SHA1_HashN(data, 1, (uint32_t*)sha->digest);
+            MMCAU_SHA1_HashN((byte*)data, len/WC_SHA_BLOCK_SIZE,
+                (uint32_t*)sha->digest);
     #endif
+            }
             wolfSSL_CryptHwMutexUnLock();
         }
         return ret;
@@ -239,8 +286,49 @@
 
 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
     /* wolfcrypt/src/port/caam/caam_sha.c */
+
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+     !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+
+    #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h"
+
+    #define USE_SHA_SOFTWARE_IMPL
+
+    static int InitSha(wc_Sha* sha)
+    {
+        int ret = 0;
+
+        sha->digest[0] = 0x67452301L;
+        sha->digest[1] = 0xEFCDAB89L;
+        sha->digest[2] = 0x98BADCFEL;
+        sha->digest[3] = 0x10325476L;
+        sha->digest[4] = 0xC3D2E1F0L;
+
+        sha->buffLen = 0;
+        sha->loLen   = 0;
+        sha->hiLen   = 0;
+
+        /* always start firstblock = 1 when using hw engine */
+        sha->ctx.isfirstblock = 1;
+        sha->ctx.sha_type = SHA1;
+        if(sha->ctx.mode == ESP32_SHA_HW){
+            /* release hw engine */
+            esp_sha_hw_unlock();
+        }
+        /* always set mode as INIT
+        *  whether using HW or SW is determined at first call of update()
+        */
+        sha->ctx.mode = ESP32_SHA_INIT;
+
+        return ret;
+    }
+
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+
+    /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */
+
 #else
-
     /* Software implementation */
     #define USE_SHA_SOFTWARE_IMPL
 
@@ -257,13 +345,14 @@
         sha->buffLen = 0;
         sha->loLen   = 0;
         sha->hiLen   = 0;
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        sha->flags = 0;
+    #endif
 
         return ret;
     }
-
 #endif /* End Hardware Acceleration */
 
-
 /* Software implementation */
 #ifdef USE_SHA_SOFTWARE_IMPL
 
@@ -278,7 +367,7 @@
 #ifndef XTRANSFORM
     #define XTRANSFORM(S,B)   Transform((S),(B))
 
-    #define blk0(i) (W[i] = sha->buffer[i])
+    #define blk0(i) (W[i] = *((word32*)&data[i*sizeof(word32)]))
     #define blk1(i) (W[(i)&15] = \
         rotlFixed(W[((i)+13)&15]^W[((i)+8)&15]^W[((i)+2)&15]^W[(i)&15],1))
 
@@ -307,7 +396,7 @@
     #define R4(v,w,x,y,z,i) (z)+= f4((w),(x),(y)) + blk1((i)) + 0xCA62C1D6+ \
         rotlFixed((v),5); (w) = rotlFixed((w),30);
 
-    static void Transform(wc_Sha* sha, byte* data)
+    static int Transform(wc_Sha* sha, const byte* data)
     {
         word32 W[WC_SHA_BLOCK_SIZE / sizeof(word32)];
 
@@ -382,6 +471,8 @@
         sha->digest[4] += e;
 
         (void)data; /* Not used */
+
+        return 0;
     }
 #endif /* !USE_CUSTOM_SHA_TRANSFORM */
 
@@ -394,7 +485,15 @@
         return BAD_FUNC_ARG;
 
     sha->heap = heap;
+#ifdef WOLF_CRYPTO_CB
+    sha->devId = devId;
+#endif
 
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    sha->ctx.mode = ESP32_SHA_INIT;
+    sha->ctx.isfirstblock = 1;
+#endif
     ret = InitSha(sha);
     if (ret != 0)
         return ret;
@@ -409,17 +508,26 @@
     return ret;
 }
 
+/* do block size increments/updates */
 int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len)
 {
+    int ret = 0;
+    word32 blocksLen;
     byte* local;
 
-    if (sha == NULL ||(data == NULL && len > 0)) {
+    if (sha == NULL || (data == NULL && len > 0)) {
         return BAD_FUNC_ARG;
     }
 
-    /* do block size increments */
-    local = (byte*)sha->buffer;
-
+#ifdef WOLF_CRYPTO_CB
+    if (sha->devId != INVALID_DEVID) {
+        ret = wc_CryptoCb_ShaHash(sha, data, len, NULL);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        ret = 0; /* reset ret */
+        /* fall-through when unavailable */
+    }
+#endif
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
     if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
     #if defined(HAVE_INTEL_QA)
@@ -432,25 +540,107 @@
     if (sha->buffLen >= WC_SHA_BLOCK_SIZE)
         return BUFFER_E;
 
-    while (len) {
-        word32 add = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen);
-        XMEMCPY(&local[sha->buffLen], data, add);
+    if (data == NULL && len == 0) {
+        /* valid, but do nothing */
+        return 0;
+    }
+
+    /* add length for final */
+    AddLength(sha, len);
 
-        sha->buffLen += add;
-        data         += add;
-        len          -= add;
+    local = (byte*)sha->buffer;
+
+    /* process any remainder from previous operation */
+    if (sha->buffLen > 0) {
+        blocksLen = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen);
+        XMEMCPY(&local[sha->buffLen], data, blocksLen);
+
+        sha->buffLen += blocksLen;
+        data         += blocksLen;
+        len          -= blocksLen;
 
         if (sha->buffLen == WC_SHA_BLOCK_SIZE) {
-#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
             ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE);
-#endif
-            XTRANSFORM(sha, local);
-            AddLength(sha, WC_SHA_BLOCK_SIZE);
+        #endif
+
+        #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+            !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+            if (sha->ctx.mode == ESP32_SHA_INIT) {
+                esp_sha_try_hw_lock(&sha->ctx);
+            }
+            if (sha->ctx.mode == ESP32_SHA_SW) {
+                ret = XTRANSFORM(sha, (const byte*)local);
+            } else {
+                esp_sha_process(sha, (const byte*)local);
+            }
+        #else
+            ret = XTRANSFORM(sha, (const byte*)local);
+        #endif
+            if (ret != 0)
+                return ret;
+
             sha->buffLen = 0;
         }
     }
 
-    return 0;
+    /* process blocks */
+#ifdef XTRANSFORM_LEN
+    /* get number of blocks */
+    /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */
+    /* len (masked by 0xFFFFFFC0) returns block aligned length */
+    blocksLen = len & ~(WC_SHA_BLOCK_SIZE-1);
+    if (blocksLen > 0) {
+        /* Byte reversal performed in function if required. */
+        XTRANSFORM_LEN(sha, data, blocksLen);
+        data += blocksLen;
+        len  -= blocksLen;
+    }
+#else
+    while (len >= WC_SHA_BLOCK_SIZE) {
+        word32* local32 = sha->buffer;
+        /* optimization to avoid memcpy if data pointer is properly aligned */
+        /* Little Endian requires byte swap, so can't use data directly */
+    #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER)
+        if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) {
+            local32 = (word32*)data;
+        }
+        else
+    #endif
+        {
+            XMEMCPY(local32, data, WC_SHA_BLOCK_SIZE);
+        }
+
+        data += WC_SHA_BLOCK_SIZE;
+        len  -= WC_SHA_BLOCK_SIZE;
+
+    #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE);
+    #endif
+
+    #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+        !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        if (sha->ctx.mode == ESP32_SHA_INIT){
+            esp_sha_try_hw_lock(&sha->ctx);
+        }
+        if (sha->ctx.mode == ESP32_SHA_SW){
+            ret = XTRANSFORM(sha, (const byte*)local32);
+        } else {
+            esp_sha_process(sha, (const byte*)local32);
+        }
+    #else
+        ret = XTRANSFORM(sha, (const byte*)local32);
+    #endif
+    }
+#endif /* XTRANSFORM_LEN */
+
+    /* save remainder */
+    if (len > 0) {
+        XMEMCPY(local, data, len);
+        sha->buffLen = len;
+    }
+
+    return ret;
 }
 
 int wc_ShaFinalRaw(wc_Sha* sha, byte* hash)
@@ -475,6 +665,7 @@
 
 int wc_ShaFinal(wc_Sha* sha, byte* hash)
 {
+    int ret;
     byte* local;
 
     if (sha == NULL || hash == NULL) {
@@ -483,6 +674,15 @@
 
     local = (byte*)sha->buffer;
 
+#ifdef WOLF_CRYPTO_CB
+    if (sha->devId != INVALID_DEVID) {
+        ret = wc_CryptoCb_ShaHash(sha, NULL, 0, hash);
+        if (ret != CRYPTOCB_UNAVAILABLE)
+            return ret;
+        ret = 0; /* reset ret */
+        /* fall-through when unavailable */
+    }
+#endif
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
     if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
     #if defined(HAVE_INTEL_QA)
@@ -491,8 +691,6 @@
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    AddLength(sha, sha->buffLen);  /* before adding pads */
-
     local[sha->buffLen++] = 0x80;  /* add 1 */
 
     /* pad with zeros */
@@ -500,10 +698,26 @@
         XMEMSET(&local[sha->buffLen], 0, WC_SHA_BLOCK_SIZE - sha->buffLen);
         sha->buffLen += WC_SHA_BLOCK_SIZE - sha->buffLen;
 
-#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+    #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
         ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE);
-#endif
-        XTRANSFORM(sha, local);
+    #endif
+
+    #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+        !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        if (sha->ctx.mode == ESP32_SHA_INIT) {
+            esp_sha_try_hw_lock(&sha->ctx);
+        }
+        if (sha->ctx.mode == ESP32_SHA_SW) {
+            ret = XTRANSFORM(sha, (const byte*)local);
+        } else {
+            ret = esp_sha_process(sha, (const byte*)local);
+        }
+    #else
+        ret = XTRANSFORM(sha, (const byte*)local);
+    #endif
+        if (ret != 0)
+            return ret;
+
         sha->buffLen = 0;
     }
     XMEMSET(&local[sha->buffLen], 0, WC_SHA_PAD_SIZE - sha->buffLen);
@@ -528,13 +742,29 @@
                      2 * sizeof(word32));
 #endif
 
-    XTRANSFORM(sha, local);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    if (sha->ctx.mode == ESP32_SHA_INIT) {
+        esp_sha_try_hw_lock(&sha->ctx);
+    }
+    if (sha->ctx.mode == ESP32_SHA_SW) {
+        ret = XTRANSFORM(sha, (const byte*)local);
+    } else {
+        ret = esp_sha_digest_process(sha, 1);
+    }
+#else
+    ret = XTRANSFORM(sha, (const byte*)local);
+#endif
+
 #ifdef LITTLE_ENDIAN_ORDER
     ByteReverseWords(sha->digest, sha->digest, WC_SHA_DIGEST_SIZE);
 #endif
+
     XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE);
 
-    return InitSha(sha); /* reset state */
+    (void)InitSha(sha); /* reset state */
+
+    return ret;
 }
 
 #endif /* USE_SHA_SOFTWARE_IMPL */
@@ -553,12 +783,25 @@
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
     wolfAsync_DevCtxFree(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+    wc_ShaPic32Free(sha);
+#endif
+#if (defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH))
+    if (sha->msg != NULL) {
+        XFREE(sha->msg, sha->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        sha->msg = NULL;
+    }
+#endif
 }
 
 #endif /* !WOLFSSL_TI_HASH */
 #endif /* HAVE_FIPS */
 
 #ifndef WOLFSSL_TI_HASH
+#if !defined(WOLFSSL_RENESAS_TSIP_CRYPT) || \
+    defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
 int wc_ShaGetHash(wc_Sha* sha, byte* hash)
 {
     int ret;
@@ -567,9 +810,24 @@
     if (sha == NULL || hash == NULL)
         return BAD_FUNC_ARG;
 
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    if(sha->ctx.mode == ESP32_SHA_INIT){
+        esp_sha_try_hw_lock(&sha->ctx);
+    }
+    if(sha->ctx.mode != ESP32_SHA_SW)
+        esp_sha_digest_process(sha, 0);
+#endif
+
     ret = wc_ShaCopy(sha, &tmpSha);
     if (ret == 0) {
-        ret = wc_ShaFinal(&tmpSha, hash);
+       ret = wc_ShaFinal(&tmpSha, hash);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        sha->ctx.mode = ESP32_SHA_SW;
+#endif
+
+
     }
     return ret;
 }
@@ -589,10 +847,37 @@
 #ifdef WOLFSSL_PIC32MZ_HASH
     ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
 #endif
-
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+     dst->ctx.mode = src->ctx.mode;
+     dst->ctx.isfirstblock = src->ctx.isfirstblock;
+     dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+     dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
     return ret;
 }
+#endif /* defined(WOLFSSL_RENESAS_TSIP_CRYPT) ... */
 #endif /* !WOLFSSL_TI_HASH */
 
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_ShaSetFlags(wc_Sha* sha, word32 flags)
+{
+    if (sha) {
+        sha->flags = flags;
+    }
+    return 0;
+}
+int wc_ShaGetFlags(wc_Sha* sha, word32* flags)
+{
+    if (sha && flags) {
+        *flags = sha->flags;
+    }
+    return 0;
+}
+#endif
+
 #endif /* !NO_SHA */
 
--- a/wolfcrypt/src/sha256.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sha256.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha256.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -19,15 +19,30 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
-
-/* code submitted by raphael.huck@efixo.com */
-
 #ifdef HAVE_CONFIG_H
     #include <config.h>
 #endif
 
 #include <wolfssl/wolfcrypt/settings.h>
 
+/*
+ * SHA256 Build Options:
+ * USE_SLOW_SHA256:            Reduces code size by not partially unrolling
+                                (~2KB smaller and ~25% slower) (default OFF)
+ * WOLFSSL_SHA256_BY_SPEC:     Uses the Ch/Maj based on SHA256 specification
+                                (default ON)
+ * WOLFSSL_SHA256_ALT_CH_MAJ:  Alternate Ch/Maj that is easier for compilers to
+                                optimize and recognize as SHA256 (default OFF)
+ * SHA256_MANY_REGISTERS:      A SHA256 version that keeps all data in registers
+                                and partial unrolled (default OFF)
+ */
+
+/* Default SHA256 to use Ch/Maj based on specification */
+#if !defined(WOLFSSL_SHA256_BY_SPEC) && !defined(WOLFSSL_SHA256_ALT_CH_MAJ)
+    #define WOLFSSL_SHA256_BY_SPEC
+#endif
+
+
 #if !defined(NO_SHA256) && !defined(WOLFSSL_ARMASM)
 
 #if defined(HAVE_FIPS) && \
@@ -45,6 +60,11 @@
 #include <wolfssl/wolfcrypt/sha256.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/cpuid.h>
+#include <wolfssl/wolfcrypt/hash.h>
+
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
 
 /* fips wrapper calls, user can call direct */
 #if defined(HAVE_FIPS) && \
@@ -97,6 +117,8 @@
 
 #if defined(WOLFSSL_TI_HASH)
     /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
+#elif defined(WOLFSSL_CRYPTOCELL)
+    /* wc_port.c includes wolfcrypt/src/port/arm/cryptoCellHash.c */
 #else
 
 #include <wolfssl/wolfcrypt/logging.h>
@@ -108,12 +130,16 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
+#ifdef WOLFSSL_DEVCRYPTO_HASH
+    #include <wolfssl/wolfcrypt/port/devcrypto/wc_devcrypto.h>
+#endif
+
+
 
 #if defined(USE_INTEL_SPEEDUP)
-    #define HAVE_INTEL_AVX1
-
     #if defined(__GNUC__) && ((__GNUC__ < 4) || \
                               (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+        #undef  NO_AVX2_SUPPORT
         #define NO_AVX2_SUPPORT
     #endif
     #if defined(__clang__) && ((__clang_major__ < 3) || \
@@ -135,7 +161,11 @@
 
 
 #if !defined(WOLFSSL_PIC32MZ_HASH) && !defined(STM32_HASH_SHA2) && \
-    (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH))
+    (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH)) && \
+    !defined(WOLFSSL_AFALG_HASH) && !defined(WOLFSSL_DEVCRYPTO_HASH) && \
+    (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)) && \
+    (!defined(WOLFSSL_RENESAS_TSIP_CRYPT) || defined(NO_WOLFSSL_RENESAS_TSIP_HASH))
+
 static int InitSha256(wc_Sha256* sha256)
 {
     int ret = 0;
@@ -156,6 +186,9 @@
     sha256->buffLen = 0;
     sha256->loLen   = 0;
     sha256->hiLen   = 0;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    sha256->flags = 0;
+#endif
 
     return ret;
 }
@@ -194,7 +227,7 @@
 
     #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
         #if defined(HAVE_INTEL_RORX
-             #define RND with rorx instuction
+             #define RND with rorx instruction
         #else
             #define RND
         #endif
@@ -213,7 +246,7 @@
       #define YMM Instructions/inline asm
 
       int Transform_Sha256() {
-          More granural Stitched Message Sched/Round
+          More granular Stitched Message Sched/Round
       }
 
     #endif
@@ -225,29 +258,45 @@
      */
 
     /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
-    static int Transform_Sha256(wc_Sha256* sha256);
+    static int Transform_Sha256(wc_Sha256* sha256, const byte* data);
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
     #if defined(HAVE_INTEL_AVX1)
-        static int Transform_Sha256_AVX1(wc_Sha256 *sha256);
-        static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, word32 len);
+        extern int Transform_Sha256_AVX1(wc_Sha256 *sha256, const byte* data);
+        extern int Transform_Sha256_AVX1_Len(wc_Sha256* sha256,
+                                             const byte* data, word32 len);
     #endif
     #if defined(HAVE_INTEL_AVX2)
-        static int Transform_Sha256_AVX2(wc_Sha256 *sha256);
-        static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, word32 len);
+        extern int Transform_Sha256_AVX2(wc_Sha256 *sha256, const byte* data);
+        extern int Transform_Sha256_AVX2_Len(wc_Sha256* sha256,
+                                             const byte* data, word32 len);
         #ifdef HAVE_INTEL_RORX
-        static int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256);
-        static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, word32 len);
-        static int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256);
-        static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, word32 len);
-        #endif
-    #endif
-    static int (*Transform_Sha256_p)(wc_Sha256* sha256);
+        extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256, const byte* data);
+        extern int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256,
+                                                  const byte* data, word32 len);
+        extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256, const byte* data);
+        extern int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256,
+                                                  const byte* data, word32 len);
+        #endif /* HAVE_INTEL_RORX */
+    #endif /* HAVE_INTEL_AVX2 */
+
+#ifdef __cplusplus
+    }  /* extern "C" */
+#endif
+
+    static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data);
                                                        /* = _Transform_Sha256 */
-    static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, word32 len);
+    static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data,
+                                         word32 len);
                                                                     /* = NULL */
     static int transform_check = 0;
     static word32 intel_flags;
-    #define XTRANSFORM(S)         (*Transform_Sha256_p)((S))
-    #define XTRANSFORM_LEN(S, L)  (*Transform_Sha256_Len_p)((S),(L))
+
+    #define XTRANSFORM(S, D)         (*Transform_Sha256_p)((S),(D))
+    #define XTRANSFORM_LEN(S, D, L)  (*Transform_Sha256_Len_p)((S),(D),(L))
 
     static void Sha256_SetTransform(void)
     {
@@ -258,7 +307,7 @@
         intel_flags = cpuid_get_flags();
 
     #ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_AVX2(intel_flags)) {
+        if (1 && IS_INTEL_AVX2(intel_flags)) {
         #ifdef HAVE_INTEL_RORX
             if (IS_INTEL_BMI2(intel_flags)) {
                 Transform_Sha256_p = Transform_Sha256_AVX2_RORX;
@@ -302,6 +351,9 @@
             return BAD_FUNC_ARG;
 
         sha256->heap = heap;
+    #ifdef WOLF_CRYPTO_CB
+        sha256->devId = devId;
+    #endif
 
         ret = InitSha256(sha256);
         if (ret != 0)
@@ -339,8 +391,13 @@
         #include "fsl_mmcau.h"
     #endif
 
-    #define XTRANSFORM(S)        Transform_Sha256((S))
-    #define XTRANSFORM_LEN(S,L)  Transform_Sha256_Len((S),(L))
+    #define XTRANSFORM(S, D)         Transform_Sha256((S),(D))
+    #define XTRANSFORM_LEN(S, D, L)  Transform_Sha256_Len((S),(D),(L))
+
+    #ifndef WC_HASH_DATA_ALIGNMENT
+        /* these hardware API's require 4 byte (word32) alignment */
+        #define WC_HASH_DATA_ALIGNMENT 4
+    #endif
 
     int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
     {
@@ -353,6 +410,7 @@
         if (ret != 0) {
             return ret;
         }
+
     #ifdef FREESCALE_MMCAU_CLASSIC_SHA
         cau_sha256_initialize_output(sha256->digest);
     #else
@@ -363,24 +421,64 @@
         sha256->buffLen = 0;
         sha256->loLen   = 0;
         sha256->hiLen   = 0;
+    #ifdef WOLFSSL_SMALL_STACK_CACHE
+        sha256->W = NULL;
+    #endif
 
         return ret;
     }
 
-    static int Transform_Sha256(wc_Sha256* sha256)
+    static int Transform_Sha256(wc_Sha256* sha256, const byte* data)
     {
         int ret = wolfSSL_CryptHwMutexLock();
         if (ret == 0) {
     #ifdef FREESCALE_MMCAU_CLASSIC_SHA
-            cau_sha256_hash_n((byte*)sha256->buffer, 1, sha256->digest);
+            cau_sha256_hash_n((byte*)data, 1, sha256->digest);
     #else
-            MMCAU_SHA256_HashN((byte*)sha256->buffer, 1, sha256->digest);
+            MMCAU_SHA256_HashN((byte*)data, 1, sha256->digest);
     #endif
             wolfSSL_CryptHwMutexUnLock();
         }
         return ret;
     }
 
+    static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
+        word32 len)
+    {
+        int ret = wolfSSL_CryptHwMutexLock();
+        if (ret == 0) {
+        #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
+            if ((size_t)data % WC_HASH_DATA_ALIGNMENT) {
+                /* data pointer is NOT aligned,
+                 * so copy and perform one block at a time */
+                byte* local = (byte*)sha256->buffer;
+                while (len >= WC_SHA256_BLOCK_SIZE) {
+                    XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
+                #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+                    cau_sha256_hash_n(local, 1, sha256->digest);
+                #else
+                    MMCAU_SHA256_HashN(local, 1, sha256->digest);
+                #endif
+                    data += WC_SHA256_BLOCK_SIZE;
+                    len  -= WC_SHA256_BLOCK_SIZE;
+                }
+            }
+            else
+        #endif
+            {
+    #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+            cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE,
+                sha256->digest);
+    #else
+            MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE,
+                sha256->digest);
+    #endif
+            }
+            wolfSSL_CryptHwMutexUnLock();
+        }
+        return ret;
+    }
+
 #elif defined(WOLFSSL_PIC32MZ_HASH)
     #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
 
@@ -439,6 +537,132 @@
 
 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
     /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
+
+#elif defined(WOLFSSL_AFALG_HASH)
+    /* implemented in wolfcrypt/src/port/af_alg/afalg_hash.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_HASH)
+    /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_HASH)
+    #include "hal_data.h"
+
+    #ifndef WOLFSSL_SCE_SHA256_HANDLE
+        #define WOLFSSL_SCE_SHA256_HANDLE g_sce_hash_0
+    #endif
+
+    #define WC_SHA256_DIGEST_WORD_SIZE 16
+    #define XTRANSFORM(S, D) wc_Sha256SCE_XTRANSFORM((S), (D))
+    static int wc_Sha256SCE_XTRANSFORM(wc_Sha256* sha256, const byte* data)
+    {
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+                CRYPTO_WORD_ENDIAN_LITTLE)
+        {
+            ByteReverseWords((word32*)data, (word32*)data,
+                    WC_SHA256_BLOCK_SIZE);
+            ByteReverseWords(sha256->digest, sha256->digest,
+                    WC_SHA256_DIGEST_SIZE);
+        }
+
+        if (WOLFSSL_SCE_SHA256_HANDLE.p_api->hashUpdate(
+                    WOLFSSL_SCE_SHA256_HANDLE.p_ctrl, (word32*)data,
+                    WC_SHA256_DIGEST_WORD_SIZE, sha256->digest) != SSP_SUCCESS){
+            WOLFSSL_MSG("Unexpected hardware return value");
+            return WC_HW_E;
+        }
+
+        if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+                CRYPTO_WORD_ENDIAN_LITTLE)
+        {
+            ByteReverseWords((word32*)data, (word32*)data,
+                    WC_SHA256_BLOCK_SIZE);
+            ByteReverseWords(sha256->digest, sha256->digest,
+                    WC_SHA256_DIGEST_SIZE);
+        }
+
+        return 0;
+    }
+
+
+    int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha256->heap = heap;
+
+        ret = InitSha256(sha256);
+        if (ret != 0)
+            return ret;
+
+        (void)devId;
+
+        return ret;
+    }
+
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+
+    #define NEED_SOFT_SHA256
+
+    static int InitSha256(wc_Sha256* sha256)
+    {
+        int ret = 0;
+
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        XMEMSET(sha256->digest, 0, sizeof(sha256->digest));
+        sha256->digest[0] = 0x6A09E667L;
+        sha256->digest[1] = 0xBB67AE85L;
+        sha256->digest[2] = 0x3C6EF372L;
+        sha256->digest[3] = 0xA54FF53AL;
+        sha256->digest[4] = 0x510E527FL;
+        sha256->digest[5] = 0x9B05688CL;
+        sha256->digest[6] = 0x1F83D9ABL;
+        sha256->digest[7] = 0x5BE0CD19L;
+
+        sha256->buffLen = 0;
+        sha256->loLen   = 0;
+        sha256->hiLen   = 0;
+
+        /* always start firstblock = 1 when using hw engine */
+        sha256->ctx.isfirstblock = 1;
+        sha256->ctx.sha_type = SHA2_256;
+        if(sha256->ctx.mode == ESP32_SHA_HW) {
+            /* release hw */
+            esp_sha_hw_unlock();
+        }
+        /* always set mode as INIT
+        *  whether using HW or SW is determined at first call of update()
+        */
+        sha256->ctx.mode = ESP32_SHA_INIT;
+
+        return ret;
+    }
+    int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        XMEMSET(sha256, 0, sizeof(wc_Sha256));
+        sha256->ctx.mode = ESP32_SHA_INIT;
+        sha256->ctx.isfirstblock = 1;
+        (void)devId;
+
+        ret = InitSha256(sha256);
+
+        return ret;
+    }
+
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+
+    /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */
+
 #else
     #define NEED_SOFT_SHA256
 
@@ -449,6 +673,10 @@
             return BAD_FUNC_ARG;
 
         sha256->heap = heap;
+    #ifdef WOLF_CRYPTO_CB
+        sha256->devId = devId;
+        sha256->devCtx = NULL;
+    #endif
 
         ret = InitSha256(sha256);
         if (ret != 0)
@@ -487,8 +715,17 @@
         0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
     };
 
+/* Both versions of Ch and Maj are logically the same, but with the second set
+    the compilers can recognize them better for optimization */
+#ifdef WOLFSSL_SHA256_BY_SPEC
+    /* SHA256 math based on specification */
     #define Ch(x,y,z)       ((z) ^ ((x) & ((y) ^ (z))))
     #define Maj(x,y,z)      ((((x) | (y)) & (z)) | ((x) & (y)))
+#else
+    /* SHA256 math reworked for easier compiler optimization */
+    #define Ch(x,y,z)       ((((y) ^ (z)) & (x)) ^ (z))
+    #define Maj(x,y,z)      ((((x) ^ (y)) & ((y) ^ (z))) ^ (y))
+#endif
     #define R(x, n)         (((x) & 0xFFFFFFFFU) >> (n))
 
     #define S(x, n)         rotrFixed(x, n)
@@ -506,18 +743,18 @@
     #define g(i) S[(6-i) & 7]
     #define h(i) S[(7-i) & 7]
 
+    #ifndef XTRANSFORM
+         #define XTRANSFORM(S, D)         Transform_Sha256((S),(D))
+    #endif
+
+#ifndef SHA256_MANY_REGISTERS
     #define RND(j) \
          t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
          t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
          d(j) += t0; \
          h(j)  = t0 + t1
 
-    #ifndef XTRANSFORM
-         #define XTRANSFORM(S)        Transform_Sha256((S))
-         #define XTRANSFORM_LEN(S,L)  Transform_Sha256_Len((S),(L))
-    #endif
-
-    static int Transform_Sha256(wc_Sha256* sha256)
+    static int Transform_Sha256(wc_Sha256* sha256, const byte* data)
     {
         word32 S[8], t0, t1;
         int i;
@@ -526,7 +763,7 @@
         word32* W = sha256->W;
         if (W == NULL) {
             W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
-                                                              DYNAMIC_TYPE_RNG);
+                                                           DYNAMIC_TYPE_DIGEST);
             if (W == NULL)
                 return MEMORY_E;
             sha256->W = W;
@@ -546,7 +783,7 @@
             S[i] = sha256->digest[i];
 
         for (i = 0; i < 16; i++)
-            W[i] = sha256->buffer[i];
+            W[i] = *((word32*)&data[i*sizeof(word32)]);
 
         for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++)
             W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
@@ -577,6 +814,69 @@
     #endif
         return 0;
     }
+#else
+    /* SHA256 version that keeps all data in registers */
+    #define SCHED1(j) (W[j] = *((word32*)&data[j*sizeof(word32)]))
+    #define SCHED(j) (               \
+                   W[ j     & 15] += \
+            Gamma1(W[(j-2)  & 15])+  \
+                   W[(j-7)  & 15] +  \
+            Gamma0(W[(j-15) & 15])   \
+        )
+
+    #define RND1(j) \
+         t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED1(j); \
+         t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
+         d(j) += t0; \
+         h(j)  = t0 + t1
+    #define RNDN(j) \
+         t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED(j); \
+         t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
+         d(j) += t0; \
+         h(j)  = t0 + t1
+
+    static int Transform_Sha256(wc_Sha256* sha256, const byte* data)
+    {
+        word32 S[8], t0, t1;
+        int i;
+        word32 W[WC_SHA256_BLOCK_SIZE/sizeof(word32)];
+
+        /* Copy digest to working vars */
+        S[0] = sha256->digest[0];
+        S[1] = sha256->digest[1];
+        S[2] = sha256->digest[2];
+        S[3] = sha256->digest[3];
+        S[4] = sha256->digest[4];
+        S[5] = sha256->digest[5];
+        S[6] = sha256->digest[6];
+        S[7] = sha256->digest[7];
+
+        i = 0;
+        RND1( 0); RND1( 1); RND1( 2); RND1( 3);
+        RND1( 4); RND1( 5); RND1( 6); RND1( 7);
+        RND1( 8); RND1( 9); RND1(10); RND1(11);
+        RND1(12); RND1(13); RND1(14); RND1(15);
+        /* 64 operations, partially loop unrolled */
+        for (i = 16; i < 64; i += 16) {
+            RNDN( 0); RNDN( 1); RNDN( 2); RNDN( 3);
+            RNDN( 4); RNDN( 5); RNDN( 6); RNDN( 7);
+            RNDN( 8); RNDN( 9); RNDN(10); RNDN(11);
+            RNDN(12); RNDN(13); RNDN(14); RNDN(15);
+        }
+
+        /* Add the working vars back into digest */
+        sha256->digest[0] += S[0];
+        sha256->digest[1] += S[1];
+        sha256->digest[2] += S[2];
+        sha256->digest[3] += S[3];
+        sha256->digest[4] += S[4];
+        sha256->digest[5] += S[5];
+        sha256->digest[6] += S[6];
+        sha256->digest[7] += S[7];
+
+        return 0;
+    }
+#endif /* SHA256_MANY_REGISTERS */
 #endif
 /* End wc_ software implementation */
 
@@ -586,13 +886,16 @@
     static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len)
     {
         word32 tmp = sha256->loLen;
-        if ((sha256->loLen += len) < tmp)
+        if ((sha256->loLen += len) < tmp) {
             sha256->hiLen++;                       /* carry low to high */
+        }
     }
 
+    /* do block size increments/updates */
     static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
     {
         int ret = 0;
+        word32 blocksLen;
         byte* local;
 
         if (sha256 == NULL || (data == NULL && len > 0)) {
@@ -604,102 +907,129 @@
             return 0;
         }
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
-        if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
-        #if defined(HAVE_INTEL_QA)
-            return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
-        #endif
+        /* check that internal buffLen is valid */
+        if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) {
+            return BUFFER_E;
         }
-    #endif /* WOLFSSL_ASYNC_CRYPT */
 
-        /* do block size increments */
+        /* add length for final */
+        AddLength(sha256, len);
+
         local = (byte*)sha256->buffer;
 
-        /* check that internal buffLen is valid */
-        if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE)
-            return BUFFER_E;
-
+        /* process any remainder from previous operation */
         if (sha256->buffLen > 0) {
-            word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
-            XMEMCPY(&local[sha256->buffLen], data, add);
+            blocksLen = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
+            XMEMCPY(&local[sha256->buffLen], data, blocksLen);
 
-            sha256->buffLen += add;
-            data            += add;
-            len             -= add;
+            sha256->buffLen += blocksLen;
+            data            += blocksLen;
+            len             -= blocksLen;
 
             if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) {
-        #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+            #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+                #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
                 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
-            #endif
+                #endif
                 {
                     ByteReverseWords(sha256->buffer, sha256->buffer,
-                                                          WC_SHA256_BLOCK_SIZE);
+                        WC_SHA256_BLOCK_SIZE);
+                }
+            #endif
+
+            #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+                !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+                if (sha256->ctx.mode == ESP32_SHA_INIT){
+                    esp_sha_try_hw_lock(&sha256->ctx);
                 }
-        #endif
-                ret = XTRANSFORM(sha256);
-                if (ret == 0) {
-                    AddLength(sha256, WC_SHA256_BLOCK_SIZE);
+                if (sha256->ctx.mode == ESP32_SHA_SW){
+                    ret = XTRANSFORM(sha256, (const byte*)local);
+                } else {
+                    esp_sha256_process(sha256, (const byte*)local);
+                }
+            #else
+                ret = XTRANSFORM(sha256, (const byte*)local);
+            #endif
+
+                if (ret == 0)
                     sha256->buffLen = 0;
-                }
                 else
-                    len = 0;
+                    len = 0; /* error */
             }
         }
 
-    #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if (Transform_Sha256_Len_p != NULL) {
-            word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
-
+        /* process blocks */
+    #ifdef XTRANSFORM_LEN
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+        if (Transform_Sha256_Len_p != NULL)
+        #endif
+        {
+            /* get number of blocks */
+            /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */
+            /* len (masked by 0xFFFFFFC0) returns block aligned length */
+            blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
             if (blocksLen > 0) {
-                AddLength(sha256, blocksLen);
-                sha256->data = data;
-                /* Byte reversal performed in function if required. */
-                XTRANSFORM_LEN(sha256, blocksLen);
+                /* Byte reversal and alignment handled in function if required */
+                XTRANSFORM_LEN(sha256, data, blocksLen);
                 data += blocksLen;
                 len  -= blocksLen;
             }
         }
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
         else
-    #endif
-    #if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \
-                            defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+        #endif
+    #endif /* XTRANSFORM_LEN */
+    #if !defined(XTRANSFORM_LEN) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
         {
-            word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
-
-            AddLength(sha256, blocksLen);
             while (len >= WC_SHA256_BLOCK_SIZE) {
-                XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
+                word32* local32 = sha256->buffer;
+                /* optimization to avoid memcpy if data pointer is properly aligned */
+                /* Intel transform function requires use of sha256->buffer */
+                /* Little Endian requires byte swap, so can't use data directly */
+            #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \
+                !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
+                if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) {
+                    local32 = (word32*)data;
+                }
+                else
+            #endif
+                {
+                    XMEMCPY(local32, data, WC_SHA256_BLOCK_SIZE);
+                }
 
                 data += WC_SHA256_BLOCK_SIZE;
                 len  -= WC_SHA256_BLOCK_SIZE;
 
-                /* Byte reversal performed in function if required. */
-                ret = XTRANSFORM(sha256);
-                if (ret != 0)
-                    break;
-            }
-        }
-    #else
-        {
-            word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
+            #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+                #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+                if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+                #endif
+                {
+                    ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE);
+                }
+            #endif
 
-            AddLength(sha256, blocksLen);
-            while (len >= WC_SHA256_BLOCK_SIZE) {
-                XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
+            #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+                !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+                if (sha256->ctx.mode == ESP32_SHA_INIT){
+                    esp_sha_try_hw_lock(&sha256->ctx);
+                }
+                if (sha256->ctx.mode == ESP32_SHA_SW){
+                    ret = XTRANSFORM(sha256, (const byte*)local32);
+                } else {
+                    esp_sha256_process(sha256, (const byte*)local32);
+                }
+            #else
+                ret = XTRANSFORM(sha256, (const byte*)local32);
+            #endif
 
-                data += WC_SHA256_BLOCK_SIZE;
-                len  -= WC_SHA256_BLOCK_SIZE;
-
-                ByteReverseWords(sha256->buffer, sha256->buffer,
-                                                          WC_SHA256_BLOCK_SIZE);
-                ret = XTRANSFORM(sha256);
                 if (ret != 0)
                     break;
             }
         }
     #endif
 
+        /* save remainder */
         if (len > 0) {
             XMEMCPY(local, data, len);
             sha256->buffLen = len;
@@ -710,6 +1040,31 @@
 
     int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
     {
+        if (sha256 == NULL || (data == NULL && len > 0)) {
+            return BAD_FUNC_ARG;
+        }
+
+        if (data == NULL && len == 0) {
+            /* valid, but do nothing */
+            return 0;
+        }
+
+    #ifdef WOLF_CRYPTO_CB
+        if (sha256->devId != INVALID_DEVID) {
+            int ret = wc_CryptoCb_Sha256Hash(sha256, data, len, NULL);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+        }
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
         return Sha256Update(sha256, data, len);
     }
 
@@ -717,14 +1072,14 @@
     {
 
         int ret;
-        byte* local = (byte*)sha256->buffer;
+        byte* local;
 
         if (sha256 == NULL) {
             return BAD_FUNC_ARG;
         }
 
-        AddLength(sha256, sha256->buffLen);  /* before adding pads */
-        local[sha256->buffLen++] = 0x80;     /* add 1 */
+        local = (byte*)sha256->buffer;
+        local[sha256->buffLen++] = 0x80; /* add 1 */
 
         /* pad with zeros */
         if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
@@ -732,25 +1087,36 @@
                 WC_SHA256_BLOCK_SIZE - sha256->buffLen);
             sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
 
-            {
         #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
             #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-                if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+            if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
             #endif
-                {
-                    ByteReverseWords(sha256->buffer, sha256->buffer,
-                                                          WC_SHA256_BLOCK_SIZE);
-                }
+            {
+                ByteReverseWords(sha256->buffer, sha256->buffer,
+                                                      WC_SHA256_BLOCK_SIZE);
+            }
         #endif
+
+        #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+             !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+            if (sha256->ctx.mode == ESP32_SHA_INIT) {
+                esp_sha_try_hw_lock(&sha256->ctx);
             }
-
-            ret = XTRANSFORM(sha256);
+            if (sha256->ctx.mode == ESP32_SHA_SW) {
+                ret = XTRANSFORM(sha256, (const byte*)local);
+            } else {
+                ret = esp_sha256_process(sha256, (const byte*)local);
+            }
+        #else
+            ret = XTRANSFORM(sha256, (const byte*)local);
+        #endif
             if (ret != 0)
                 return ret;
 
             sha256->buffLen = 0;
         }
-        XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen);
+        XMEMSET(&local[sha256->buffLen], 0,
+            WC_SHA256_PAD_SIZE - sha256->buffLen);
 
         /* put lengths in bits */
         sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
@@ -760,12 +1126,12 @@
         /* store lengths */
     #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
         #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-            if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+        if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
         #endif
-            {
-                ByteReverseWords(sha256->buffer, sha256->buffer,
-                    WC_SHA256_BLOCK_SIZE);
-            }
+        {
+            ByteReverseWords(sha256->buffer, sha256->buffer,
+                WC_SHA256_BLOCK_SIZE);
+        }
     #endif
         /* ! length ordering dependent on digest endian type ! */
         XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
@@ -776,17 +1142,31 @@
         defined(HAVE_INTEL_AVX2)
         /* Kinetis requires only these bytes reversed */
         #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-            if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
+        if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
         #endif
-            {
-                ByteReverseWords(
-                    &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
-                    &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
-                    2 * sizeof(word32));
-            }
+        {
+            ByteReverseWords(
+                &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
+                &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
+                2 * sizeof(word32));
+        }
     #endif
 
-        return XTRANSFORM(sha256);
+    #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+         !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        if (sha256->ctx.mode == ESP32_SHA_INIT) {
+            esp_sha_try_hw_lock(&sha256->ctx);
+        }
+        if (sha256->ctx.mode == ESP32_SHA_SW) {
+            ret = XTRANSFORM(sha256, (const byte*)local);
+        } else {
+            ret = esp_sha256_digest_process(sha256, 1);
+        }
+    #else
+        ret = XTRANSFORM(sha256, (const byte*)local);
+    #endif
+
+        return ret;
     }
 
     int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash)
@@ -818,6 +1198,15 @@
             return BAD_FUNC_ARG;
         }
 
+    #ifdef WOLF_CRYPTO_CB
+        if (sha256->devId != INVALID_DEVID) {
+            ret = wc_CryptoCb_Sha256Hash(sha256, NULL, 0, hash);
+            if (ret != CRYPTOCB_UNAVAILABLE)
+                return ret;
+            /* fall-through when unavailable */
+        }
+    #endif
+
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
         if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
         #if defined(HAVE_INTEL_QA)
@@ -841,1680 +1230,6 @@
 
 #endif /* XTRANSFORM */
 
-
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-
-#define _LOAD_DIGEST()                     \
-    "movl	  (%[sha256]), %%r8d \n\t" \
-    "movl	 4(%[sha256]), %%r9d \n\t" \
-    "movl	 8(%[sha256]), %%r10d\n\t" \
-    "movl	12(%[sha256]), %%r11d\n\t" \
-    "movl	16(%[sha256]), %%r12d\n\t" \
-    "movl	20(%[sha256]), %%r13d\n\t" \
-    "movl	24(%[sha256]), %%r14d\n\t" \
-    "movl	28(%[sha256]), %%r15d\n\t"
-
-#define _STORE_ADD_DIGEST()                \
-    "addl	%%r8d ,   (%[sha256])\n\t" \
-    "addl	%%r9d ,  4(%[sha256])\n\t" \
-    "addl	%%r10d,  8(%[sha256])\n\t" \
-    "addl	%%r11d, 12(%[sha256])\n\t" \
-    "addl	%%r12d, 16(%[sha256])\n\t" \
-    "addl	%%r13d, 20(%[sha256])\n\t" \
-    "addl	%%r14d, 24(%[sha256])\n\t" \
-    "addl	%%r15d, 28(%[sha256])\n\t"
-
-#define _ADD_DIGEST()                      \
-    "addl	  (%[sha256]), %%r8d \n\t" \
-    "addl	 4(%[sha256]), %%r9d \n\t" \
-    "addl	 8(%[sha256]), %%r10d\n\t" \
-    "addl	12(%[sha256]), %%r11d\n\t" \
-    "addl	16(%[sha256]), %%r12d\n\t" \
-    "addl	20(%[sha256]), %%r13d\n\t" \
-    "addl	24(%[sha256]), %%r14d\n\t" \
-    "addl	28(%[sha256]), %%r15d\n\t"
-
-#define _STORE_DIGEST()                    \
-    "movl	%%r8d ,   (%[sha256])\n\t" \
-    "movl	%%r9d ,  4(%[sha256])\n\t" \
-    "movl	%%r10d,  8(%[sha256])\n\t" \
-    "movl	%%r11d, 12(%[sha256])\n\t" \
-    "movl	%%r12d, 16(%[sha256])\n\t" \
-    "movl	%%r13d, 20(%[sha256])\n\t" \
-    "movl	%%r14d, 24(%[sha256])\n\t" \
-    "movl	%%r15d, 28(%[sha256])\n\t"
-
-#define LOAD_DIGEST() \
-    _LOAD_DIGEST()
-
-#define STORE_ADD_DIGEST() \
-    _STORE_ADD_DIGEST()
-
-#define ADD_DIGEST() \
-    _ADD_DIGEST()
-
-#define STORE_DIGEST() \
-    _STORE_DIGEST()
-
-
-#define S_0 %r8d
-#define S_1 %r9d
-#define S_2 %r10d
-#define S_3 %r11d
-#define S_4 %r12d
-#define S_5 %r13d
-#define S_6 %r14d
-#define S_7 %r15d
-
-#define L1  "%%edx"
-#define L2  "%%ecx"
-#define L3  "%%eax"
-#define L4  "%%ebx"
-#define WK  "%%rsp"
-
-#define WORK_REGS  "eax", "ebx", "ecx", "edx"
-#define STATE_REGS "r8","r9","r10","r11","r12","r13","r14","r15"
-#define XMM_REGS   "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",    \
-                   "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13"
-
-#if defined(HAVE_INTEL_RORX)
-#define RND_STEP_RORX_0_1(a, b, c, d, e, f, g, h, i) \
-    /* L3 = f */                                     \
-    "movl	%" #f ", " L3 "\n\t"                 \
-    /* L2 = e>>>11 */                                \
-    "rorx	$11, %" #e ", " L2 "\n\t"            \
-    /* h += w_k */                                   \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"     \
-
-#define RND_STEP_RORX_0_2(a, b, c, d, e, f, g, h, i) \
-    /* L2 = (e>>>6) ^ (e>>>11) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L3 = f ^ g */                                 \
-    "xorl	%" #g ", " L3 "\n\t"                 \
-    /* L1 = e>>>25 */                                \
-    "rorx	$25, %" #e ", " L1 "\n\t"            \
-
-#define RND_STEP_RORX_0_3(a, b, c, d, e, f, g, h, i) \
-    /* L3 = (f ^ g) & e */                           \
-    "andl	%" #e ", " L3 "\n\t"                 \
-    /* L1 = Sigma1(e) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L2 = a>>>13 */                                \
-    "rorx	$13, %" #a ", " L2 "\n\t"            \
-
-#define RND_STEP_RORX_0_4(a, b, c, d, e, f, g, h, i) \
-    /* h += Sigma1(e) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L1 = a>>>2 */                                 \
-    "rorx	$2, %" #a ", " L1 "\n\t"             \
-    /* L3 = Ch(e,f,g) */                             \
-    "xorl	%" #g ", " L3 "\n\t"                 \
-
-#define RND_STEP_RORX_0_5(a, b, c, d, e, f, g, h, i) \
-    /* L2 = (a>>>2) ^ (a>>>13) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L1 = a>>>22 */                                \
-    "rorx	$22, %" #a ", " L1 "\n\t"            \
-    /* h += Ch(e,f,g) */                             \
-    "addl	" L3 ", %" #h "\n\t"                 \
-
-#define RND_STEP_RORX_0_6(a, b, c, d, e, f, g, h, i) \
-    /* L1 = Sigma0(a) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L3 = b */                                     \
-    "movl	%" #b ", " L3 "\n\t"                 \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */       \
-    "addl	%" #h ", %" #d "\n\t"                \
-
-#define RND_STEP_RORX_0_7(a, b, c, d, e, f, g, h, i) \
-    /* L3 = a ^ b */                                 \
-    "xorl	%" #a ", " L3 "\n\t"                 \
-    /* h += Sigma0(a) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L4 = (a ^ b) & (b ^ c) */                     \
-    "andl	" L3 ", " L4 "\n\t"                  \
-
-#define RND_STEP_RORX_0_8(a, b, c, d, e, f, g, h, i) \
-    /* L4 = Maj(a,b,c) */                            \
-    "xorl	%" #b ", " L4 "\n\t"                 \
-    /* L1 = d>>>6 (= e>>>6 next RND) */              \
-    "rorx	$6, %" #d ", " L1 "\n\t"             \
-    /* h += Maj(a,b,c) */                            \
-    "addl	" L4 ", %" #h "\n\t"                 \
-
-#define RND_STEP_RORX_1_1(a, b, c, d, e, f, g, h, i) \
-    /* L4 = f */                                     \
-    "movl	%" #f ", " L4 "\n\t"                 \
-    /* L2 = e>>>11 */                                \
-    "rorx	$11, %" #e ", " L2 "\n\t"            \
-    /* h += w_k */                                   \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"     \
-
-#define RND_STEP_RORX_1_2(a, b, c, d, e, f, g, h, i) \
-    /* L2 = (e>>>6) ^ (e>>>11) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L4 = f ^ g */                                 \
-    "xorl	%" #g ", " L4 "\n\t"                 \
-    /* L1 = e>>>25 */                                \
-    "rorx	$25, %" #e ", " L1 "\n\t"            \
-
-#define RND_STEP_RORX_1_3(a, b, c, d, e, f, g, h, i) \
-    /* L4 = (f ^ g) & e */                           \
-    "andl	%" #e ", " L4 "\n\t"                 \
-    /* L1 = Sigma1(e) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L2 = a>>>13 */                                \
-    "rorx	$13, %" #a ", " L2 "\n\t"            \
-
-#define RND_STEP_RORX_1_4(a, b, c, d, e, f, g, h, i) \
-    /* h += Sigma1(e) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L1 = a>>>2 */                                 \
-    "rorx	$2, %" #a ", " L1 "\n\t"             \
-    /* L4 = Ch(e,f,g) */                             \
-    "xorl	%" #g ", " L4 "\n\t"                 \
-
-#define RND_STEP_RORX_1_5(a, b, c, d, e, f, g, h, i) \
-    /* L2 = (a>>>2) ^ (a>>>13) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L1 = a>>>22 */                                \
-    "rorx	$22, %" #a ", " L1 "\n\t"            \
-    /* h += Ch(e,f,g) */                             \
-    "addl	" L4 ", %" #h "\n\t"                 \
-
-#define RND_STEP_RORX_1_6(a, b, c, d, e, f, g, h, i) \
-    /* L1 = Sigma0(a) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L4 = b */                                     \
-    "movl	%" #b ", " L4 "\n\t"                 \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */       \
-    "addl	%" #h ", %" #d "\n\t"                \
-
-#define RND_STEP_RORX_1_7(a, b, c, d, e, f, g, h, i) \
-    /* L4 = a ^ b */                                 \
-    "xorl	%" #a ", " L4 "\n\t"                 \
-    /* h += Sigma0(a) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L3 = (a ^ b) & (b ^ c) */                     \
-    "andl	" L4 ", " L3 "\n\t"                  \
-
-#define RND_STEP_RORX_1_8(a, b, c, d, e, f, g, h, i) \
-    /* L3 = Maj(a,b,c) */                            \
-    "xorl	%" #b ", " L3 "\n\t"                 \
-    /* L1 = d>>>6 (= e>>>6 next RND) */              \
-    "rorx	$6, %" #d ", " L1 "\n\t"             \
-    /* h += Maj(a,b,c) */                            \
-    "addl	" L3 ", %" #h "\n\t"                 \
-
-#define _RND_RORX_X_0(a, b, c, d, e, f, g, h, i)     \
-    /* L1 = e>>>6 */                                 \
-    "rorx	$6, %" #e ", " L1 "\n\t"             \
-    /* L2 = e>>>11 */                                \
-    "rorx	$11, %" #e ", " L2 "\n\t"            \
-    /* Prev RND: h += Maj(a,b,c) */                  \
-    "addl	" L3 ", %" #a "\n\t"                 \
-    /* h += w_k */                                   \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"     \
-    /* L3 = f */                                     \
-    "movl	%" #f ", " L3 "\n\t"                 \
-    /* L2 = (e>>>6) ^ (e>>>11) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L3 = f ^ g */                                 \
-    "xorl	%" #g ", " L3 "\n\t"                 \
-    /* L1 = e>>>25 */                                \
-    "rorx	$25, %" #e ", " L1 "\n\t"            \
-    /* L1 = Sigma1(e) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L3 = (f ^ g) & e */                           \
-    "andl	%" #e ", " L3 "\n\t"                 \
-    /* h += Sigma1(e) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L1 = a>>>2 */                                 \
-    "rorx	$2, %" #a ", " L1 "\n\t"             \
-    /* L2 = a>>>13 */                                \
-    "rorx	$13, %" #a ", " L2 "\n\t"            \
-    /* L3 = Ch(e,f,g) */                             \
-    "xorl	%" #g ", " L3 "\n\t"                 \
-    /* L2 = (a>>>2) ^ (a>>>13) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L1 = a>>>22 */                                \
-    "rorx	$22, %" #a ", " L1 "\n\t"            \
-    /* h += Ch(e,f,g) */                             \
-    "addl	" L3 ", %" #h "\n\t"                 \
-    /* L1 = Sigma0(a) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L3 = b */                                     \
-    "movl	%" #b ", " L3 "\n\t"                 \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */       \
-    "addl	%" #h ", %" #d "\n\t"                \
-    /* L3 = a ^ b */                                 \
-    "xorl	%" #a ", " L3 "\n\t"                 \
-    /* L4 = (a ^ b) & (b ^ c) */                     \
-    "andl	" L3 ", " L4 "\n\t"                  \
-    /* h += Sigma0(a) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L4 = Maj(a,b,c) */                            \
-    "xorl	%" #b ", " L4 "\n\t"                 \
-
-#define _RND_RORX_X_1(a, b, c, d, e, f, g, h, i)     \
-    /* L1 = e>>>6 */                                 \
-    "rorx	$6, %" #e ", " L1 "\n\t"             \
-    /* L2 = e>>>11 */                                \
-    "rorx	$11, %" #e ", " L2 "\n\t"            \
-    /* Prev RND: h += Maj(a,b,c) */                  \
-    "addl	" L4 ", %" #a "\n\t"                 \
-    /* h += w_k */                                   \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"     \
-    /* L4 = f */                                     \
-    "movl	%" #f ", " L4 "\n\t"                 \
-    /* L2 = (e>>>6) ^ (e>>>11) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L4 = f ^ g */                                 \
-    "xorl	%" #g ", " L4 "\n\t"                 \
-    /* L1 = e>>>25 */                                \
-    "rorx	$25, %" #e ", " L1 "\n\t"            \
-    /* L1 = Sigma1(e) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L4 = (f ^ g) & e */                           \
-    "andl	%" #e ", " L4 "\n\t"                 \
-    /* h += Sigma1(e) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L1 = a>>>2 */                                 \
-    "rorx	$2, %" #a ", " L1 "\n\t"             \
-    /* L2 = a>>>13 */                                \
-    "rorx	$13, %" #a ", " L2 "\n\t"            \
-    /* L4 = Ch(e,f,g) */                             \
-    "xorl	%" #g ", " L4 "\n\t"                 \
-    /* L2 = (a>>>2) ^ (a>>>13) */                    \
-    "xorl	" L1 ", " L2 "\n\t"                  \
-    /* L1 = a>>>22 */                                \
-    "rorx	$22, %" #a ", " L1 "\n\t"            \
-    /* h += Ch(e,f,g) */                             \
-    "addl	" L4 ", %" #h "\n\t"                 \
-    /* L1 = Sigma0(a) */                             \
-    "xorl	" L2 ", " L1 "\n\t"                  \
-    /* L4 = b */                                     \
-    "movl	%" #b ", " L4 "\n\t"                 \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */       \
-    "addl	%" #h ", %" #d "\n\t"                \
-    /* L4 = a ^ b */                                 \
-    "xorl	%" #a ", " L4 "\n\t"                 \
-    /* L2 = (a ^ b) & (b ^ c) */                     \
-    "andl	" L4 ", " L3 "\n\t"                  \
-    /* h += Sigma0(a) */                             \
-    "addl	" L1 ", %" #h "\n\t"                 \
-    /* L3 = Maj(a,b,c) */                            \
-    "xorl	%" #b ", " L3 "\n\t"                 \
-
-
-#define RND_RORX_X_0(a,b,c,d,e,f,g,h,i) \
-       _RND_RORX_X_0(a,b,c,d,e,f,g,h,i)
-#define RND_RORX_X_1(a,b,c,d,e,f,g,h,i) \
-       _RND_RORX_X_1(a,b,c,d,e,f,g,h,i)
-
-#define RND_RORX_X4(a,b,c,d,e,f,g,h,i)    \
-        RND_RORX_X_0(a,b,c,d,e,f,g,h,i+0) \
-        RND_RORX_X_1(h,a,b,c,d,e,f,g,i+1) \
-        RND_RORX_X_0(g,h,a,b,c,d,e,f,i+2) \
-        RND_RORX_X_1(f,g,h,a,b,c,d,e,i+3)
-
-#endif /* HAVE_INTEL_RORX */
-
-#define RND_STEP_0_1(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = e>>>14 */                                                 \
-    "rorl	$14, " L1 "\n\t"                                      \
-
-#define RND_STEP_0_2(a,b,c,d,e,f,g,h,i)                               \
-    /* L3 = b */                                                      \
-    "movl	%" #b ", " L3 "\n\t"                                  \
-    /* L2 = f */                                                      \
-    "movl	%" #f ", " L2 "\n\t"                                  \
-    /* h += w_k */                                                    \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"                      \
-    /* L2 = f ^ g */                                                  \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-
-#define RND_STEP_0_3(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = (e>>>14) ^ e */                                           \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* L2 = (f ^ g) & e */                                            \
-    "andl	%" #e ", " L2 "\n\t"                                  \
- 
-#define RND_STEP_0_4(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = ((e>>>14) ^ e) >>> 5 */                                   \
-    "rorl	$5, " L1 "\n\t"                                       \
-    /* L2 = Ch(e,f,g) */                                              \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-    /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */                             \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* h += Ch(e,f,g) */                                              \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-
-#define RND_STEP_0_5(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */                     \
-    "rorl	$6, " L1 "\n\t"                                       \
-    /* L3 = a ^ b (= b ^ c of next RND) */                            \
-    "xorl	%" #a ", " L3 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) */                                     \
-    "addl	" L1 ", %" #h "\n\t"                                  \
-    /* L2 = a */                                                      \
-    "movl	%" #a ", " L2 "\n\t"                                  \
-
-#define RND_STEP_0_6(a,b,c,d,e,f,g,h,i)                               \
-    /* L3 = (a ^ b) & (b ^ c) */                                      \
-    "andl	" L3 ", " L4 "\n\t"                                   \
-    /* L2 = a>>>9 */                                                  \
-    "rorl	$9, " L2 "\n\t"                                       \
-    /* L2 = (a>>>9) ^ a */                                            \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* L1 = Maj(a,b,c) */                                             \
-    "xorl	%" #b ", " L4 "\n\t"                                  \
-
-#define RND_STEP_0_7(a,b,c,d,e,f,g,h,i)                               \
-    /* L2 = ((a>>>9) ^ a) >>> 11 */                                   \
-    "rorl	$11, " L2 "\n\t"                                      \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */                        \
-    "addl	%" #h ", %" #d "\n\t"                                 \
-    /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */                             \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */            \
-    "addl	" L4 ", %" #h "\n\t"                                  \
-
-#define RND_STEP_0_8(a,b,c,d,e,f,g,h,i)                               \
-    /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */                     \
-    "rorl	$2, " L2 "\n\t"                                       \
-    /* L1 = d (e of next RND) */                                      \
-    "movl	%" #d ", " L1 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */  \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-
-#define RND_STEP_1_1(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = e>>>14 */                                                 \
-    "rorl	$14, " L1 "\n\t"                                      \
- 
-#define RND_STEP_1_2(a,b,c,d,e,f,g,h,i)                               \
-    /* L3 = b */                                                      \
-    "movl	%" #b ", " L4 "\n\t"                                  \
-    /* L2 = f */                                                      \
-    "movl	%" #f ", " L2 "\n\t"                                  \
-    /* h += w_k */                                                    \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"                      \
-    /* L2 = f ^ g */                                                  \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
- 
-#define RND_STEP_1_3(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = (e>>>14) ^ e */                                           \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* L2 = (f ^ g) & e */                                            \
-    "andl	%" #e ", " L2 "\n\t"                                  \
- 
-#define RND_STEP_1_4(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = ((e>>>14) ^ e) >>> 5 */                                   \
-    "rorl	$5, " L1 "\n\t"                                       \
-    /* L2 = Ch(e,f,g) */                                              \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-    /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */                             \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* h += Ch(e,f,g) */                                              \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-
-#define RND_STEP_1_5(a,b,c,d,e,f,g,h,i)                               \
-    /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */                     \
-    "rorl	$6, " L1 "\n\t"                                       \
-    /* L4 = a ^ b (= b ^ c of next RND) */                            \
-    "xorl	%" #a ", " L4 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) */                                     \
-    "addl	" L1 ", %" #h "\n\t"                                  \
-    /* L2 = a */                                                      \
-    "movl	%" #a ", " L2 "\n\t"                                  \
-
-#define RND_STEP_1_6(a,b,c,d,e,f,g,h,i)                               \
-    /* L3 = (a ^ b) & (b ^ c)  */                                     \
-    "andl	" L4 ", " L3 "\n\t"                                   \
-    /* L2 = a>>>9 */                                                  \
-    "rorl	$9, " L2 "\n\t"                                       \
-    /* L2 = (a>>>9) ^ a */                                            \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* L1 = Maj(a,b,c) */                                             \
-    "xorl	%" #b ", " L3 "\n\t"                                  \
-
-#define RND_STEP_1_7(a,b,c,d,e,f,g,h,i)                               \
-    /* L2 = ((a>>>9) ^ a) >>> 11 */                                   \
-    "rorl	$11, " L2 "\n\t"                                      \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */                        \
-    "addl	%" #h ", %" #d "\n\t"                                 \
-    /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */                             \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */            \
-    "addl	" L3 ", %" #h "\n\t"                                  \
-
-#define RND_STEP_1_8(a,b,c,d,e,f,g,h,i)                               \
-    /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */                     \
-    "rorl	$2, " L2 "\n\t"                                       \
-    /* L1 = d (e of next RND) */                                      \
-    "movl	%" #d ", " L1 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */  \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-
-#define _RND_ALL_0(a,b,c,d,e,f,g,h,i)                                 \
-    /* h += w_k */                                                    \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"                      \
-    /* L2 = f */                                                      \
-    "movl	%" #f ", " L2 "\n\t"                                  \
-    /* L3 = b */                                                      \
-    "movl	%" #b ", " L3 "\n\t"                                  \
-    /* L2 = f ^ g */                                                  \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-    /* L1 = e>>>14 */                                                 \
-    "rorl	$14, " L1 "\n\t"                                      \
-    /* L2 = (f ^ g) & e */                                            \
-    "andl	%" #e ", " L2 "\n\t"                                  \
-    /* L1 = (e>>>14) ^ e */                                           \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* L2 = Ch(e,f,g) */                                              \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-    /* L1 = ((e>>>14) ^ e) >>> 5 */                                   \
-    "rorl	$5, " L1 "\n\t"                                       \
-    /* h += Ch(e,f,g) */                                              \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-    /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */                             \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* L3 = a ^ b */                                                  \
-    "xorl	%" #a ", " L3 "\n\t"                                  \
-    /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */                     \
-    "rorl	$6, " L1 "\n\t"                                       \
-    /* L2 = a */                                                      \
-    "movl	%" #a ", " L2 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) */                                     \
-    "addl	" L1 ", %" #h "\n\t"                                  \
-    /* L2 = a>>>9 */                                                  \
-    "rorl	$9, " L2 "\n\t"                                       \
-    /* L3 = (a ^ b) & (b ^ c) */                                      \
-    "andl	" L3 ", " L4 "\n\t"                                   \
-    /* L2 = (a>>>9) ^ a */                                            \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* L1 = Maj(a,b,c) */                                             \
-    "xorl	%" #b ", " L4 "\n\t"                                  \
-    /* L2 = ((a>>>9) ^ a) >>> 11 */                                   \
-    "rorl	$11, " L2 "\n\t"                                      \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */                        \
-    "addl	%" #h ", %" #d "\n\t"                                 \
-    /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */                             \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */            \
-    "addl	" L4 ", %" #h "\n\t"                                  \
-    /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */                     \
-    "rorl	$2, " L2 "\n\t"                                       \
-    /* L1 = d (e of next RND) */                                      \
-    "movl	%" #d ", " L1 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */  \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-
-#define _RND_ALL_1(a,b,c,d,e,f,g,h,i)                                 \
-    /* h += w_k */                                                    \
-    "addl	(" #i ")*4(" WK "), %" #h "\n\t"                      \
-    /* L2 = f */                                                      \
-    "movl	%" #f ", " L2 "\n\t"                                  \
-    /* L3 = b */                                                      \
-    "movl	%" #b ", " L4 "\n\t"                                  \
-    /* L2 = f ^ g */                                                  \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-    /* L1 = e>>>14 */                                                 \
-    "rorl	$14, " L1 "\n\t"                                      \
-    /* L2 = (f ^ g) & e */                                            \
-    "andl	%" #e ", " L2 "\n\t"                                  \
-    /* L1 = (e>>>14) ^ e */                                           \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* L2 = Ch(e,f,g) */                                              \
-    "xorl	%" #g ", " L2 "\n\t"                                  \
-    /* L1 = ((e>>>14) ^ e) >>> 5 */                                   \
-    "rorl	$5, " L1 "\n\t"                                       \
-    /* h += Ch(e,f,g) */                                              \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-    /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */                             \
-    "xorl	%" #e ", " L1 "\n\t"                                  \
-    /* L3 = a ^ b */                                                  \
-    "xorl	%" #a ", " L4 "\n\t"                                  \
-    /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */                     \
-    "rorl	$6, " L1 "\n\t"                                       \
-    /* L2 = a */                                                      \
-    "movl	%" #a ", " L2 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) */                                     \
-    "addl	" L1 ", %" #h "\n\t"                                  \
-    /* L2 = a>>>9 */                                                  \
-    "rorl	$9, " L2 "\n\t"                                       \
-    /* L3 = (a ^ b) & (b ^ c)  */                                     \
-    "andl	" L4 ", " L3 "\n\t"                                   \
-    /* L2 = (a>>>9) ^ a */                                            \
-    "xorl	%" #a", " L2 "\n\t"                                   \
-    /* L1 = Maj(a,b,c) */                                             \
-    "xorl	%" #b ", " L3 "\n\t"                                  \
-    /* L2 = ((a>>>9) ^ a) >>> 11 */                                   \
-    "rorl	$11, " L2 "\n\t"                                      \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */                        \
-    "addl	%" #h ", %" #d "\n\t"                                 \
-    /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */                             \
-    "xorl	%" #a ", " L2 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */            \
-    "addl	" L3 ", %" #h "\n\t"                                  \
-    /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */                     \
-    "rorl	$2, " L2 "\n\t"                                       \
-    /* L1 = d (e of next RND) */                                      \
-    "movl	%" #d ", " L1 "\n\t"                                  \
-    /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */  \
-    "addl	" L2 ", %" #h "\n\t"                                  \
-
-
-#define RND_ALL_0(a, b, c, d, e, f, g, h, i) \
-       _RND_ALL_0(a, b, c, d, e, f, g, h, i)
-#define RND_ALL_1(a, b, c, d, e, f, g, h, i) \
-       _RND_ALL_1(a, b, c, d, e, f, g, h, i)
-
-#define RND_ALL_4(a, b, c, d, e, f, g, h, i)   \
-        RND_ALL_0(a, b, c, d, e, f, g, h, i+0) \
-        RND_ALL_1(h, a, b, c, d, e, f, g, i+1) \
-        RND_ALL_0(g, h, a, b, c, d, e, f, i+2) \
-        RND_ALL_1(f, g, h, a, b, c, d, e, i+3)
-
-#endif  /* defined(HAVE_INTEL_AVX1) ||  defined(HAVE_INTEL_AVX2) */
-
-#if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
-
-#define _VPALIGNR(op1, op2, op3, op4)                    \
-    "vpalignr	$" #op4", %" #op3", %" #op2", %" #op1"\n\t"
-#define VPALIGNR(op1, op2, op3, op4)                     \
-        _VPALIGNR(op1, op2, op3, op4)
-#define _VPADDD(op1, op2, op3)                           \
-    "vpaddd	%" #op3", %" #op2", %" #op1"\n\t"
-#define VPADDD(op1, op2, op3)                            \
-       _VPADDD(op1, op2, op3)
-#define _VPSRLD(op1, op2, op3)                           \
-    "vpsrld	$" #op3", %" #op2", %" #op1"\n\t"
-#define VPSRLD(op1, op2, op3)        \
-       _VPSRLD(op1, op2, op3)
-#define _VPSRLQ(op1, op2, op3)                           \
-    "vpsrlq	$" #op3", %" #op2", %" #op1"\n\t"
-#define VPSRLQ(op1,op2,op3)        \
-       _VPSRLQ(op1,op2,op3)
-#define _VPSLLD(op1,op2,op3)                             \
-    "vpslld	$" #op3", %" #op2", %" #op1"\n\t"
-#define VPSLLD(op1,op2,op3)        \
-       _VPSLLD(op1,op2,op3)
-#define _VPOR(op1,op2,op3)                               \
-    "vpor	%" #op3", %" #op2", %" #op1"\n\t"
-#define VPOR(op1,op2,op3)          \
-       _VPOR(op1,op2,op3)
-#define _VPXOR(op1,op2,op3)                              \
-    "vpxor	%" #op3", %" #op2", %" #op1"\n\t"
-#define VPXOR(op1,op2,op3)         \
-       _VPXOR(op1,op2,op3)
-#define _VPSHUFD(op1,op2,op3)                            \
-    "vpshufd	$" #op3", %" #op2", %" #op1"\n\t"
-#define VPSHUFD(op1,op2,op3)       \
-       _VPSHUFD(op1,op2,op3)
-#define _VPSHUFB(op1,op2,op3)                            \
-    "vpshufb	%" #op3", %" #op2", %" #op1"\n\t"
-#define VPSHUFB(op1,op2,op3)       \
-       _VPSHUFB(op1,op2,op3)
-#define _VPSLLDQ(op1,op2,op3)                            \
-    "vpslldq	$" #op3", %" #op2", %" #op1"\n\t"
-#define VPSLLDQ(op1,op2,op3)       \
-       _VPSLLDQ(op1,op2,op3)
-
-#define MsgSched(X0,X1,X2,X3,a,b,c,d,e,f,g,h,_i)                           \
-            RND_STEP_0_1(a,b,c,d,e,f,g,h,_i)                               \
-    VPALIGNR (XTMP1, X1, X0, 4)    /* XTMP1 = W[-15] */                    \
-    VPALIGNR (XTMP0, X3, X2, 4)    /* XTMP0 = W[-7] */                     \
-            RND_STEP_0_2(a,b,c,d,e,f,g,h,_i)                               \
-            RND_STEP_0_3(a,b,c,d,e,f,g,h,_i)                               \
-    VPSRLD   (XTMP2, XTMP1, 7)     /* XTMP2 = W[-15] >> 7 */               \
-    VPSLLD   (XTMP3, XTMP1, 25)    /* XTEMP3 = W[-15] << (32-7) */         \
-            RND_STEP_0_4(a,b,c,d,e,f,g,h,_i)                               \
-            RND_STEP_0_5(a,b,c,d,e,f,g,h,_i)                               \
-    VPSRLD   (XTMP4, XTMP1, 18)    /* XTEMP4 = W[-15] >> 18 */             \
-    VPSLLD   (XTMP5, XTMP1, 14)    /* XTEMP5 = W[-15] << (32-18) */        \
-            RND_STEP_0_6(a,b,c,d,e,f,g,h,_i)                               \
-            RND_STEP_0_7(a,b,c,d,e,f,g,h,_i)                               \
-    VPOR     (XTMP2, XTMP3, XTMP2) /* XTMP2 = W[-15] >>> 7 */              \
-    VPOR     (XTMP4, XTMP5, XTMP4) /* XTMP4 = W[-15] >>> 18 */             \
-            RND_STEP_0_8(a,b,c,d,e,f,g,h,_i)                               \
-            RND_STEP_1_1(h,a,b,c,d,e,f,g,_i+1)                             \
-            RND_STEP_1_2(h,a,b,c,d,e,f,g,_i+1)                             \
-    VPSRLD   (XTMP5, XTMP1, 3)     /* XTMP4 = W[-15] >> 3 */               \
-    VPXOR    (XTMP2, XTMP4, XTMP2)                                         \
-                          /* XTMP2 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */ \
-            RND_STEP_1_3(h,a,b,c,d,e,f,g,_i+1)                             \
-            RND_STEP_1_4(h,a,b,c,d,e,f,g,_i+1)                             \
-    VPXOR    (XTMP1, XTMP5, XTMP2)  /* XTMP1 = s0 */                       \
-    VPSHUFD  (XTMP2, X3, 0b11111010)  /* XTMP2 = W[-2] {BBAA}*/            \
-            RND_STEP_1_5(h,a,b,c,d,e,f,g,_i+1)                             \
-            RND_STEP_1_6(h,a,b,c,d,e,f,g,_i+1)                             \
-    VPSRLD   (XTMP4, XTMP2, 10)      /* XTMP4 = W[-2] >> 10 {BBAA} */      \
-    VPSRLQ   (XTMP3, XTMP2, 19)      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */  \
-            RND_STEP_1_7(h,a,b,c,d,e,f,g,_i+1)                             \
-            RND_STEP_1_8(h,a,b,c,d,e,f,g,_i+1)                             \
-            RND_STEP_0_1(g,h,a,b,c,d,e,f,_i+2)                             \
-    VPSRLQ   (XTMP2, XTMP2, 17)      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */  \
-    VPADDD   (XTMP0, XTMP0, X0)                                            \
-            RND_STEP_0_2(g,h,a,b,c,d,e,f,_i+2)                             \
-            RND_STEP_0_3(g,h,a,b,c,d,e,f,_i+2)                             \
-            RND_STEP_0_4(g,h,a,b,c,d,e,f,_i+2)                             \
-    VPXOR    (XTMP2, XTMP3, XTMP2)                                         \
-    VPADDD   (XTMP0, XTMP0, XTMP1)  /* XTMP0 = W[-16] + W[-7] + s0 */      \
-            RND_STEP_0_5(g,h,a,b,c,d,e,f,_i+2)                             \
-    VPXOR    (XTMP4, XTMP4, XTMP2)   /* XTMP4 = s1 {xBxA} */               \
-            RND_STEP_0_6(g,h,a,b,c,d,e,f,_i+2)                             \
-    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA)  /* XTMP4 = s1 {00BA} */            \
-            RND_STEP_0_7(g,h,a,b,c,d,e,f,_i+2)                             \
-    VPADDD   (XTMP0, XTMP0, XTMP4)  /* XTMP0 = {..., ..., W[1], W[0]} */   \
-            RND_STEP_0_8(g,h,a,b,c,d,e,f,_i+2)                             \
-            RND_STEP_1_1(f,g,h,a,b,c,d,e,_i+3)                             \
-    VPSHUFD  (XTMP2, XTMP0, 0b01010000) /* XTMP2 = W[-2] {DDCC} */         \
-            RND_STEP_1_2(f,g,h,a,b,c,d,e,_i+3)                             \
-    VPSRLQ   (XTMP4, XTMP2, 17)      /* XTMP4 = W[-2] MY_ROR 17 {xDxC} */  \
-    VPSRLQ   (XTMP3, XTMP2, 19)       /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
-            RND_STEP_1_3(f,g,h,a,b,c,d,e,_i+3)                             \
-            RND_STEP_1_4(f,g,h,a,b,c,d,e,_i+3)                             \
-    VPSRLD   (XTMP5, XTMP2, 10)       /* XTMP5 = W[-2] >> 10 {DDCC} */     \
-    VPXOR    (XTMP4, XTMP3, XTMP4)                                         \
-            RND_STEP_1_5(f,g,h,a,b,c,d,e,_i+3)                             \
-            RND_STEP_1_6(f,g,h,a,b,c,d,e,_i+3)                             \
-    VPXOR    (XTMP5, XTMP4, XTMP5)   /* XTMP5 = s1 {xDxC} */               \
-            RND_STEP_1_7(f,g,h,a,b,c,d,e,_i+3)                             \
-    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00) /* XTMP5 = s1 {DC00} */             \
-            RND_STEP_1_8(f,g,h,a,b,c,d,e,_i+3)                             \
-    VPADDD   (X0, XTMP5, XTMP0)      /* X0 = {W[3], W[2], W[1], W[0]} */
-
-#if defined(HAVE_INTEL_RORX)
-
-#define MsgSched_RORX(X0,X1,X2,X3,a,b,c,d,e,f,g,h,_i)                      \
-            RND_STEP_RORX_0_1(a,b,c,d,e,f,g,h,_i)                          \
-    VPALIGNR (XTMP0, X3, X2, 4)                                            \
-    VPALIGNR (XTMP1, X1, X0, 4)   /* XTMP1 = W[-15] */                     \
-            RND_STEP_RORX_0_2(a,b,c,d,e,f,g,h,_i)                          \
-            RND_STEP_RORX_0_3(a,b,c,d,e,f,g,h,_i)                          \
-    VPSRLD   (XTMP2, XTMP1, 7)                                             \
-    VPSLLD   (XTMP3, XTMP1, 25) /* VPSLLD   (XTMP3, XTMP1, (32-7)) */      \
-            RND_STEP_RORX_0_4(a,b,c,d,e,f,g,h,_i)                          \
-            RND_STEP_RORX_0_5(a,b,c,d,e,f,g,h,_i)                          \
-    VPSRLD   (XTMP4, XTMP1, 3)  /* XTMP4 = W[-15] >> 3 */                  \
-    VPOR     (XTMP3, XTMP3, XTMP2)  /* XTMP1 = W[-15] MY_ROR 7 */          \
-            RND_STEP_RORX_0_6(a,b,c,d,e,f,g,h,_i)                          \
-            RND_STEP_RORX_0_7(a,b,c,d,e,f,g,h,_i)                          \
-            RND_STEP_RORX_0_8(a,b,c,d,e,f,g,h,_i)                          \
-                                                                           \
-            RND_STEP_RORX_1_1(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPSRLD   (XTMP2, XTMP1,18)                                             \
-            RND_STEP_RORX_1_2(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPSLLD   (XTMP1, XTMP1, 14) /* VPSLLD   (XTMP1, XTMP1, (32-18)) */     \
-            RND_STEP_RORX_1_3(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPXOR    (XTMP3, XTMP3, XTMP1)                                         \
-            RND_STEP_RORX_1_4(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPXOR    (XTMP3, XTMP3, XTMP2)                                         \
-                          /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */ \
-            RND_STEP_RORX_1_5(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPSHUFD  (XTMP2, X3, 0b11111010)  /* XTMP2 = W[-2] {BBAA}*/            \
-            RND_STEP_RORX_1_6(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPXOR    (XTMP1, XTMP3, XTMP4)  /* XTMP1 = s0 */                       \
-            RND_STEP_RORX_1_7(h,a,b,c,d,e,f,g,_i+1)                        \
-    VPSRLD   (XTMP4, XTMP2, 10)      /* XTMP4 = W[-2] >> 10 {BBAA} */      \
-            RND_STEP_RORX_1_8(h,a,b,c,d,e,f,g,_i+1)                        \
-                                                                           \
-            RND_STEP_RORX_0_1(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPSRLQ   (XTMP3, XTMP2, 19)      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */  \
-            RND_STEP_RORX_0_2(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPSRLQ   (XTMP2, XTMP2, 17)      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */  \
-    VPADDD   (XTMP0, XTMP0, X0)                                            \
-            RND_STEP_RORX_0_3(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPADDD   (XTMP0, XTMP0, XTMP1)  /* XTMP0 = W[-16] + W[-7] + s0 */      \
-            RND_STEP_RORX_0_4(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPXOR    (XTMP2, XTMP2, XTMP3)                                         \
-            RND_STEP_RORX_0_5(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPXOR    (XTMP4, XTMP4, XTMP2)   /* XTMP4 = s1 {xBxA} */               \
-            RND_STEP_RORX_0_6(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA)  /* XTMP4 = s1 {00BA} */            \
-            RND_STEP_RORX_0_7(g,h,a,b,c,d,e,f,_i+2)                        \
-    VPADDD   (XTMP0, XTMP0, XTMP4)  /* XTMP0 = {..., ..., W[1], W[0]} */   \
-            RND_STEP_RORX_0_8(g,h,a,b,c,d,e,f,_i+2)                        \
-                                                                           \
-            RND_STEP_RORX_1_1(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPSHUFD  (XTMP2, XTMP0, 0b01010000) /* XTMP2 = W[-2] {DDCC} */         \
-            RND_STEP_RORX_1_2(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPSRLD   (XTMP5, XTMP2, 10)       /* XTMP5 = W[-2] >> 10 {DDCC} */     \
-            RND_STEP_RORX_1_3(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPSRLQ   (XTMP3, XTMP2, 19)       /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
-            RND_STEP_RORX_1_4(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPSRLQ   (XTMP2, XTMP2, 17)      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */  \
-            RND_STEP_RORX_1_5(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPXOR    (XTMP2, XTMP2, XTMP3)                                         \
-            RND_STEP_RORX_1_6(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPXOR    (XTMP5, XTMP5, XTMP2)   /* XTMP5 = s1 {xDxC} */               \
-            RND_STEP_RORX_1_7(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00) /* XTMP5 = s1 {DC00} */             \
-            RND_STEP_RORX_1_8(f,g,h,a,b,c,d,e,_i+3)                        \
-    VPADDD   (X0, XTMP5, XTMP0)      /* X0 = {W[3], W[2], W[1], W[0]} */
-
-#endif /* HAVE_INTEL_RORX */
-
-
-#define _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \
-    "# X0, X1, X2, X3 = W[0..15]\n\t"                  \
-    "vmovdqu	  (%%rax), %" #X0 "\n\t"               \
-    "vmovdqu	16(%%rax), %" #X1 "\n\t"               \
-    VPSHUFB(X0, X0, BYTE_FLIP_MASK)                    \
-    VPSHUFB(X1, X1, BYTE_FLIP_MASK)                    \
-    "vmovdqu	32(%%rax), %" #X2 "\n\t"               \
-    "vmovdqu	48(%%rax), %" #X3 "\n\t"               \
-    VPSHUFB(X2, X2, BYTE_FLIP_MASK)                    \
-    VPSHUFB(X3, X3, BYTE_FLIP_MASK)
-
-#define W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \
-       _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
-
-
-#define _SET_W_K_XFER_4(i) \
-    "vpaddd	(" #i "*4)+ 0+%[K], %%xmm0, %%xmm4\n\t"  \
-    "vpaddd	(" #i "*4)+16+%[K], %%xmm1, %%xmm5\n\t"  \
-    "vmovdqu	%%xmm4,   (" WK ")\n\t"                  \
-    "vmovdqu	%%xmm5, 16(" WK ")\n\t"                  \
-    "vpaddd	(" #i "*4)+32+%[K], %%xmm2, %%xmm6\n\t"  \
-    "vpaddd	(" #i "*4)+48+%[K], %%xmm3, %%xmm7\n\t"  \
-    "vmovdqu	%%xmm6, 32(" WK ")\n\t"                  \
-    "vmovdqu	%%xmm7, 48(" WK ")\n\t"
-
-#define SET_W_K_XFER_4(i) \
-       _SET_W_K_XFER_4(i)
-
-
-static const ALIGN32 word64 mSHUF_00BA[] =
-    { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
-static const ALIGN32 word64 mSHUF_DC00[] =
-    { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
-static const ALIGN32 word64 mBYTE_FLIP_MASK[] =
-    { 0x0405060700010203, 0x0c0d0e0f08090a0b };
-
-#define _Init_Masks(mask1, mask2, mask3)       \
-    "vmovdqa	%[FLIP], %" #mask1 "\n\t"      \
-    "vmovdqa	%[SHUF00BA], %" #mask2 "\n\t"  \
-    "vmovdqa	%[SHUFDC00], %" #mask3 "\n\t"
-
-#define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
-       _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-
-#define X0 %xmm0
-#define X1 %xmm1
-#define X2 %xmm2
-#define X3 %xmm3
-
-#define XTMP0 %xmm4
-#define XTMP1 %xmm5
-#define XTMP2 %xmm6
-#define XTMP3 %xmm7
-#define XTMP4 %xmm8
-#define XTMP5 %xmm9
-#define XFER  %xmm10
-
-#define SHUF_00BA   %xmm11 /* shuffle xBxA -> 00BA */
-#define SHUF_DC00   %xmm12 /* shuffle xDxC -> DC00 */
-#define BYTE_FLIP_MASK  %xmm13
-
-
-SHA256_NOINLINE static int Transform_Sha256_AVX1(wc_Sha256* sha256)
-{
-    __asm__ __volatile__ (
-
-        "subq	$64, %%rsp\n\t"
-
-        "leaq	32(%[sha256]), %%rax\n\t"
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-    LOAD_DIGEST()
-
-    W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
-
-        "movl	%%r9d, " L4 "\n\t"
-        "movl	%%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    SET_W_K_XFER_4(0)
-    MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(16)
-    MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(32)
-    MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(48)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    STORE_ADD_DIGEST()
-
-        "addq	$64, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_DC00[0]),
-          [sha256]   "r" (sha256),
-          [K]        "m" (K)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
-    );
-
-    return 0;
-}
-
-SHA256_NOINLINE static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256,
-                                                     word32 len)
-{
-    __asm__ __volatile__ (
-
-        "subq	$64, %%rsp\n\t"
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-    LOAD_DIGEST()
-
-        "# Start of loop processing a block\n"
-        "1:\n\t"
-
-    W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
-
-        "movl	%%r9d, " L4 "\n\t"
-        "movl	%%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    SET_W_K_XFER_4(0)
-    MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(16)
-    MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(32)
-    MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(48)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    ADD_DIGEST()
-
-        "addq	$64, %%rax\n\t"
-        "subl	$64, %[len]\n\t"
-
-    STORE_DIGEST()
-
-        "movq	%%rax, 120(%[sha256])\n\t"
-        "jnz	1b\n\t"
-
-        "addq	$64, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_DC00[0]),
-          [sha256]   "r" (sha256),
-          [len]      "r" (len),
-          [K]        "m" (K)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
-    );
-
-    return 0;
-}
-#endif  /* HAVE_INTEL_AVX1 */
-
-#if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
-SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX(wc_Sha256* sha256)
-{
-    __asm__ __volatile__ (
-
-        "subq	$64, %%rsp\n\t"
-
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-        "leaq	32(%[sha256]), %%rax\n\t"
-    W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
-
-    LOAD_DIGEST()
-
-    SET_W_K_XFER_4(0)
-        "movl	%%r9d, " L4 "\n\t"
-        "rorx	$6, %%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-    MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(16)
-    MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(32)
-    MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(48)
-        "xorl	" L3 ", " L3 "\n\t"
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-        /* Prev RND: h += Maj(a,b,c) */
-        "addl	" L3 ", %%r8d\n\t"
-
-    STORE_ADD_DIGEST()
-
-        "addq	$64, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_DC00[0]),
-          [sha256]   "r" (sha256),
-          [K]        "m" (K)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
-    );
-
-    return 0;
-}
-
-SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256,
-                                                          word32 len)
-{
-    __asm__ __volatile__ (
-
-        "subq	$64, %%rsp\n\t"
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-    LOAD_DIGEST()
-
-        "# Start of loop processing a block\n"
-        "1:\n\t"
-
-    W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
-
-    SET_W_K_XFER_4(0)
-        "movl	%%r9d, " L4 "\n\t"
-        "rorx	$6, %%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-    MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched_RORX(X2, X3, X0, X1, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  8)
-    MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(16)
-    MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(32)
-    MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-
-    SET_W_K_XFER_4(48)
-        "xorl	" L3 ", " L3 "\n\t"
-        "xorl	" L2 ", " L2 "\n\t"
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  4)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  8)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
-        /* Prev RND: h += Maj(a,b,c) */
-        "addl	" L3 ", %%r8d\n\t"
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    ADD_DIGEST()
-
-        "addq	$64, %%rax\n\t"
-        "subl	$64, %[len]\n\t"
-
-    STORE_DIGEST()
-
-        "movq	%%rax, 120(%[sha256])\n\t"
-        "jnz	1b\n\t"
-
-        "addq	$64, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_DC00[0]),
-          [sha256]   "r" (sha256),
-          [len]      "r" (len),
-          [K]        "m" (K)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
-    );
-
-    return 0;
-}
-#endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_RORX */
-
-
-#if defined(HAVE_INTEL_AVX2)
-#define Y0 %ymm0
-#define Y1 %ymm1
-#define Y2 %ymm2
-#define Y3 %ymm3
-
-#define YTMP0 %ymm4
-#define YTMP1 %ymm5
-#define YTMP2 %ymm6
-#define YTMP3 %ymm7
-#define YTMP4 %ymm8
-#define YTMP5 %ymm9
-#define YXFER %ymm10
-
-#define SHUF_Y_00BA       %ymm11 /* shuffle xBxA -> 00BA */
-#define SHUF_Y_DC00       %ymm12 /* shuffle xDxC -> DC00 */
-#define BYTE_FLIP_Y_MASK  %ymm13
-
-#define YMM_REGS "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", \
-                 "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13"
-
-#define MsgSched_Y(Y0,Y1,Y2,Y3,a,b,c,d,e,f,g,h,_i)                            \
-            RND_STEP_0_1(a,b,c,d,e,f,g,h,_i)                                  \
-    VPALIGNR (YTMP1, Y1, Y0, 4)    /* YTMP1 = W[-15] */                       \
-    VPALIGNR (YTMP0, Y3, Y2, 4)    /* YTMP0 = W[-7] */                        \
-            RND_STEP_0_2(a,b,c,d,e,f,g,h,_i)                                  \
-            RND_STEP_0_3(a,b,c,d,e,f,g,h,_i)                                  \
-    VPSRLD   (YTMP2, YTMP1, 7)     /* YTMP2 = W[-15] >> 7 */                  \
-    VPSLLD   (YTMP3, YTMP1, 25)    /* YTEMP3 = W[-15] << (32-7) */            \
-            RND_STEP_0_4(a,b,c,d,e,f,g,h,_i)                                  \
-            RND_STEP_0_5(a,b,c,d,e,f,g,h,_i)                                  \
-    VPSRLD   (YTMP4, YTMP1, 18)    /* YTEMP4 = W[-15] >> 18 */                \
-    VPSLLD   (YTMP5, YTMP1, 14)    /* YTEMP5 = W[-15] << (32-18) */           \
-            RND_STEP_0_6(a,b,c,d,e,f,g,h,_i)                                  \
-            RND_STEP_0_7(a,b,c,d,e,f,g,h,_i)                                  \
-    VPOR     (YTMP2, YTMP3, YTMP2) /* YTMP2 = W[-15] >>> 7 */                 \
-    VPOR     (YTMP4, YTMP5, YTMP4) /* YTMP4 = W[-15] >>> 18 */                \
-            RND_STEP_0_8(a,b,c,d,e,f,g,h,_i)                                  \
-            RND_STEP_1_1(h,a,b,c,d,e,f,g,_i+1)                                \
-            RND_STEP_1_2(h,a,b,c,d,e,f,g,_i+1)                                \
-    VPSRLD   (YTMP5, YTMP1, 3)     /* YTMP4 = W[-15] >> 3 */                  \
-    VPXOR    (YTMP2, YTMP4, YTMP2) /* YTMP2 = W[-15] >>> 7 ^ W[-15] >>> 18 */ \
-            RND_STEP_1_3(h,a,b,c,d,e,f,g,_i+1)                                \
-            RND_STEP_1_4(h,a,b,c,d,e,f,g,_i+1)                                \
-    VPXOR    (YTMP1, YTMP5, YTMP2)  /* YTMP1 = s0 */                          \
-    VPSHUFD  (YTMP2, Y3, 0b11111010)  /* YTMP2 = W[-2] {BBAA}*/               \
-            RND_STEP_1_5(h,a,b,c,d,e,f,g,_i+1)                                \
-            RND_STEP_1_6(h,a,b,c,d,e,f,g,_i+1)                                \
-    VPSRLD   (YTMP4, YTMP2, 10)      /* YTMP4 = W[-2] >> 10 {BBAA} */         \
-    VPSRLQ   (YTMP3, YTMP2, 19)      /* YTMP3 = W[-2] MY_ROR 19 {xBxA} */     \
-            RND_STEP_1_7(h,a,b,c,d,e,f,g,_i+1)                                \
-            RND_STEP_1_8(h,a,b,c,d,e,f,g,_i+1)                                \
-            RND_STEP_0_1(g,h,a,b,c,d,e,f,_i+2)                                \
-    VPSRLQ   (YTMP2, YTMP2, 17)      /* YTMP2 = W[-2] MY_ROR 17 {xBxA} */     \
-    VPADDD   (YTMP0, YTMP0, Y0)                                               \
-            RND_STEP_0_2(g,h,a,b,c,d,e,f,_i+2)                                \
-            RND_STEP_0_3(g,h,a,b,c,d,e,f,_i+2)                                \
-            RND_STEP_0_4(g,h,a,b,c,d,e,f,_i+2)                                \
-    VPXOR    (YTMP2, YTMP3, YTMP2)                                            \
-    VPADDD   (YTMP0, YTMP0, YTMP1)  /* YTMP0 = W[-16] + W[-7] + s0 */         \
-            RND_STEP_0_5(g,h,a,b,c,d,e,f,_i+2)                                \
-    VPXOR    (YTMP4, YTMP4, YTMP2)   /* YTMP4 = s1 {xBxA} */                  \
-            RND_STEP_0_6(g,h,a,b,c,d,e,f,_i+2)                                \
-    VPSHUFB  (YTMP4, YTMP4, SHUF_Y_00BA)  /* YTMP4 = s1 {00BA} */             \
-            RND_STEP_0_7(g,h,a,b,c,d,e,f,_i+2)                                \
-    VPADDD   (YTMP0, YTMP0, YTMP4)  /* YTMP0 = {..., ..., W[1], W[0]} */      \
-            RND_STEP_0_8(g,h,a,b,c,d,e,f,_i+2)                                \
-            RND_STEP_1_1(f,g,h,a,b,c,d,e,_i+3)                                \
-    VPSHUFD  (YTMP2, YTMP0, 0b01010000) /* YTMP2 = W[-2] {DDCC} */            \
-            RND_STEP_1_2(f,g,h,a,b,c,d,e,_i+3)                                \
-    VPSRLQ   (YTMP4, YTMP2, 17)      /* YTMP4 = W[-2] MY_ROR 17 {xDxC} */     \
-    VPSRLQ   (YTMP3, YTMP2, 19)       /* YTMP3 = W[-2] MY_ROR 19 {xDxC} */    \
-            RND_STEP_1_3(f,g,h,a,b,c,d,e,_i+3)                                \
-            RND_STEP_1_4(f,g,h,a,b,c,d,e,_i+3)                                \
-    VPSRLD   (YTMP5, YTMP2, 10)       /* YTMP5 = W[-2] >> 10 {DDCC} */        \
-    VPXOR    (YTMP4, YTMP3, YTMP4)                                            \
-            RND_STEP_1_5(f,g,h,a,b,c,d,e,_i+3)                                \
-            RND_STEP_1_6(f,g,h,a,b,c,d,e,_i+3)                                \
-    VPXOR    (YTMP5, YTMP4, YTMP5)   /* YTMP5 = s1 {xDxC} */                  \
-            RND_STEP_1_7(f,g,h,a,b,c,d,e,_i+3)                                \
-    VPSHUFB  (YTMP5, YTMP5, SHUF_Y_DC00) /* YTMP5 = s1 {DC00} */              \
-            RND_STEP_1_8(f,g,h,a,b,c,d,e,_i+3)                                \
-    VPADDD   (Y0, YTMP5, YTMP0)      /* Y0 = {W[3], W[2], W[1], W[0]} */
-
-#if defined(HAVE_INTEL_RORX)
-
-#define MsgSched_Y_RORX(Y0,Y1,Y2,Y3,a,b,c,d,e,f,g,h,_i)                       \
-            RND_STEP_RORX_0_1(a,b,c,d,e,f,g,h,_i)                             \
-    VPALIGNR (YTMP1, Y1, Y0, 4)    /* YTMP1 = W[-15] */                       \
-            RND_STEP_RORX_0_2(a,b,c,d,e,f,g,h,_i)                             \
-    VPALIGNR (YTMP0, Y3, Y2, 4)    /* YTMP0 = W[-7] */                        \
-            RND_STEP_RORX_0_3(a,b,c,d,e,f,g,h,_i)                             \
-    VPSRLD   (YTMP2, YTMP1, 7)     /* YTMP2 = W[-15] >> 7 */                  \
-            RND_STEP_RORX_0_4(a,b,c,d,e,f,g,h,_i)                             \
-    VPSLLD   (YTMP3, YTMP1, 25)    /* YTEMP3 = W[-15] << (32-7) */            \
-            RND_STEP_RORX_0_5(a,b,c,d,e,f,g,h,_i)                             \
-    VPSRLD   (YTMP4, YTMP1, 18)    /* YTEMP4 = W[-15] >> 18 */                \
-            RND_STEP_RORX_0_6(a,b,c,d,e,f,g,h,_i)                             \
-    VPSLLD   (YTMP5, YTMP1, 14)    /* YTEMP5 = W[-15] << (32-18) */           \
-            RND_STEP_RORX_0_7(a,b,c,d,e,f,g,h,_i)                             \
-    VPOR     (YTMP2, YTMP2, YTMP3) /* YTMP2 = W[-15] >>> 7 */                 \
-            RND_STEP_RORX_0_8(a,b,c,d,e,f,g,h,_i)                             \
-    VPOR     (YTMP4, YTMP4, YTMP5) /* YTMP4 = W[-15] >>> 18 */                \
-            RND_STEP_RORX_1_1(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPSRLD   (YTMP5, YTMP1, 3)     /* YTMP4 = W[-15] >> 3 */                  \
-            RND_STEP_RORX_1_2(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPXOR    (YTMP2, YTMP2, YTMP4) /* YTMP2 = W[-15] >>> 7 ^ W[-15] >>> 18 */ \
-            RND_STEP_RORX_1_3(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPSHUFD  (YTMP3, Y3, 0b11111010)  /* YTMP2 = W[-2] {BBAA}*/               \
-            RND_STEP_RORX_1_4(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPXOR    (YTMP1, YTMP5, YTMP2)  /* YTMP1 = s0 */                          \
-            RND_STEP_RORX_1_5(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPSRLD   (YTMP4, YTMP3, 10)      /* YTMP4 = W[-2] >> 10 {BBAA} */         \
-            RND_STEP_RORX_1_6(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPSRLQ   (YTMP2, YTMP3, 19)      /* YTMP3 = W[-2] MY_ROR 19 {xBxA} */     \
-            RND_STEP_RORX_1_7(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPSRLQ   (YTMP3, YTMP3, 17)      /* YTMP2 = W[-2] MY_ROR 17 {xBxA} */     \
-            RND_STEP_RORX_1_8(h,a,b,c,d,e,f,g,_i+1)                           \
-    VPADDD   (YTMP0, YTMP0, Y0)                                               \
-            RND_STEP_RORX_0_1(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPXOR    (YTMP2, YTMP2, YTMP3)                                            \
-            RND_STEP_RORX_0_2(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPXOR    (YTMP4, YTMP4, YTMP2)   /* YTMP4 = s1 {xBxA} */                  \
-            RND_STEP_RORX_0_3(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPADDD   (YTMP0, YTMP0, YTMP1)  /* YTMP0 = W[-16] + W[-7] + s0 */         \
-            RND_STEP_RORX_0_4(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPSHUFB  (YTMP4, YTMP4, SHUF_Y_00BA)  /* YTMP4 = s1 {00BA} */             \
-            RND_STEP_RORX_0_5(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPADDD   (YTMP0, YTMP0, YTMP4)  /* YTMP0 = {..., ..., W[1], W[0]} */      \
-            RND_STEP_RORX_0_6(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPSHUFD  (YTMP2, YTMP0, 0b01010000) /* YTMP2 = W[-2] {DDCC} */            \
-            RND_STEP_RORX_0_7(g,h,a,b,c,d,e,f,_i+2)                           \
-            RND_STEP_RORX_0_8(g,h,a,b,c,d,e,f,_i+2)                           \
-    VPSRLQ   (YTMP4, YTMP2, 17)      /* YTMP4 = W[-2] MY_ROR 17 {xDxC} */     \
-            RND_STEP_RORX_1_1(f,g,h,a,b,c,d,e,_i+3)                           \
-    VPSRLQ   (YTMP3, YTMP2, 19)       /* YTMP3 = W[-2] MY_ROR 19 {xDxC} */    \
-            RND_STEP_RORX_1_2(f,g,h,a,b,c,d,e,_i+3)                           \
-    VPSRLD   (YTMP5, YTMP2, 10)       /* YTMP5 = W[-2] >> 10 {DDCC} */        \
-            RND_STEP_RORX_1_3(f,g,h,a,b,c,d,e,_i+3)                           \
-    VPXOR    (YTMP4, YTMP4, YTMP3)                                            \
-            RND_STEP_RORX_1_4(f,g,h,a,b,c,d,e,_i+3)                           \
-    VPXOR    (YTMP5, YTMP5, YTMP4)   /* YTMP5 = s1 {xDxC} */                  \
-            RND_STEP_RORX_1_5(f,g,h,a,b,c,d,e,_i+3)                           \
-            RND_STEP_RORX_1_6(f,g,h,a,b,c,d,e,_i+3)                           \
-    VPSHUFB  (YTMP5, YTMP5, SHUF_Y_DC00) /* YTMP5 = s1 {DC00} */              \
-            RND_STEP_RORX_1_7(f,g,h,a,b,c,d,e,_i+3)                           \
-            RND_STEP_RORX_1_8(f,g,h,a,b,c,d,e,_i+3)                           \
-    VPADDD   (Y0, YTMP5, YTMP0)      /* Y0 = {W[3], W[2], W[1], W[0]} */      \
-
-#endif /* HAVE_INTEL_RORX */
-
-#define _VINSERTI128(op1,op2,op3,op4) \
-    "vinserti128	$" #op4 ", %" #op3 ", %" #op2 ", %" #op1 "\n\t"
-#define VINSERTI128(op1,op2,op3,op4)  \
-       _VINSERTI128(op1,op2,op3,op4)
-
-
-#define _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg)   \
-    "# X0, X1, X2, X3 = W[0..15]\n\t"        \
-    "vmovdqu	  (%%" #reg "), %%xmm0\n\t"  \
-    "vmovdqu	16(%%" #reg "), %%xmm1\n\t"  \
-    VPSHUFB(X0, X0, BYTE_FLIP_MASK)          \
-    VPSHUFB(X1, X1, BYTE_FLIP_MASK)          \
-    "vmovdqu	32(%%" #reg "), %%xmm2\n\t"  \
-    "vmovdqu	48(%%" #reg "), %%xmm3\n\t"  \
-    VPSHUFB(X2, X2, BYTE_FLIP_MASK)          \
-    VPSHUFB(X3, X3, BYTE_FLIP_MASK)
-
-#define LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \
-       _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg)
-
-
-#define _LOAD_W_K(BYTE_FLIP_Y_MASK, reg)      \
-    "# X0, X1, X2, X3 = W[0..15]\n\t"         \
-    "vmovdqu	   (%%" #reg "), %%xmm0\n\t"  \
-    "vmovdqu	 16(%%" #reg "), %%xmm1\n\t"  \
-    "vmovdqu	 64(%%" #reg "), %%xmm4\n\t"  \
-    "vmovdqu	 80(%%" #reg "), %%xmm5\n\t"  \
-    VINSERTI128(Y0, Y0, XTMP0, 1)             \
-    VINSERTI128(Y1, Y1, XTMP1, 1)             \
-    VPSHUFB(Y0, Y0, BYTE_FLIP_Y_MASK)         \
-    VPSHUFB(Y1, Y1, BYTE_FLIP_Y_MASK)         \
-    "vmovdqu	 32(%%" #reg "), %%xmm2\n\t"  \
-    "vmovdqu	 48(%%" #reg "), %%xmm3\n\t"  \
-    "vmovdqu	 96(%%" #reg "), %%xmm6\n\t"  \
-    "vmovdqu	112(%%" #reg "), %%xmm7\n\t"  \
-    VINSERTI128(Y2, Y2, XTMP2, 1)             \
-    VINSERTI128(Y3, Y3, XTMP3, 1)             \
-    VPSHUFB(Y2, Y2, BYTE_FLIP_Y_MASK)         \
-    VPSHUFB(Y3, Y3, BYTE_FLIP_Y_MASK)
-
-#define LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \
-       _LOAD_W_K(BYTE_FLIP_Y_MASK, reg)
-
-
-#define _SET_W_Y_4(i)  \
-    "vpaddd	(" #i "*8)+ 0+%[K], %%ymm0, %%ymm4\n\t" \
-    "vpaddd	(" #i "*8)+32+%[K], %%ymm1, %%ymm5\n\t" \
-    "vmovdqu	%%ymm4, (" #i "*8)+ 0(" WK ")\n\t"      \
-    "vmovdqu	%%ymm5, (" #i "*8)+32(" WK ")\n\t"      \
-    "vpaddd	(" #i "*8)+64+%[K], %%ymm2, %%ymm4\n\t" \
-    "vpaddd	(" #i "*8)+96+%[K], %%ymm3, %%ymm5\n\t" \
-    "vmovdqu	%%ymm4, (" #i "*8)+64(" WK ")\n\t"      \
-    "vmovdqu	%%ymm5, (" #i "*8)+96(" WK ")\n\t"
-
-#define SET_W_Y_4(i) \
-       _SET_W_Y_4(i)
-
-
-static const ALIGN32 word64 mSHUF_Y_00BA[] =
-    { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF,
-      0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
-static const ALIGN32 word64 mSHUF_Y_DC00[] =
-    { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100,
-      0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
-static const ALIGN32 word64 mBYTE_FLIP_Y_MASK[] =
-    { 0x0405060700010203, 0x0c0d0e0f08090a0b,
-      0x0405060700010203, 0x0c0d0e0f08090a0b };
-
-#define _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
-    "vmovdqa	%[FLIP], %" #BYTE_FLIP_MASK "\n\t"          \
-    "vmovdqa	%[SHUF00BA], %" #SHUF_00BA "\n\t"           \
-    "vmovdqa	%[SHUFDC00], %" #SHUF_DC00 "\n\t"
-
-#define INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
-       _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-
-static const ALIGN32 word32 K256[128] = {
-    0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L,
-    0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L,
-    0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L,
-    0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L,
-    0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L,
-    0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L,
-    0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, 0xC19BF174L,
-    0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, 0xC19BF174L,
-    0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
-    0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
-    0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL,
-    0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL,
-    0x983E5152L, 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L,
-    0x983E5152L, 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L,
-    0xC6E00BF3L, 0xD5A79147L, 0x06CA6351L, 0x14292967L,
-    0xC6E00BF3L, 0xD5A79147L, 0x06CA6351L, 0x14292967L,
-    0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, 0x53380D13L,
-    0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, 0x53380D13L,
-    0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
-    0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
-    0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L,
-    0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L,
-    0xD192E819L, 0xD6990624L, 0xF40E3585L, 0x106AA070L,
-    0xD192E819L, 0xD6990624L, 0xF40E3585L, 0x106AA070L,
-    0x19A4C116L, 0x1E376C08L, 0x2748774CL, 0x34B0BCB5L,
-    0x19A4C116L, 0x1E376C08L, 0x2748774CL, 0x34B0BCB5L,
-    0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, 0x682E6FF3L,
-    0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, 0x682E6FF3L,
-    0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
-    0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
-    0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L,
-    0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
-};
-
-SHA256_NOINLINE static int Transform_Sha256_AVX2(wc_Sha256* sha256)
-{
-    __asm__ __volatile__ (
-
-        "subq	$512, %%rsp\n\t"
-        "leaq	32(%[sha256]), %%rax\n\t"
-
-    INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
-    LOAD_DIGEST()
-
-    LOAD_W_K_LOW(BYTE_FLIP_MASK, rax)
-
-        "movl	%%r9d, " L4 "\n\t"
-        "movl	%%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    SET_W_Y_4(0)
-    MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  8)
-    MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
-    MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
-
-    SET_W_Y_4(16)
-    MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
-    MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
-    MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
-    MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
-
-    SET_W_Y_4(32)
-    MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
-    MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
-    MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
-    MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
-
-    SET_W_Y_4(48)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  96)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
-
-    STORE_ADD_DIGEST()
-
-        "addq	$512, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
-          [sha256]   "r" (sha256),
-          [K]        "m" (K256)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
-    );
-
-    return 0;
-}
-
-SHA256_NOINLINE static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256,
-                                                     word32 len)
-{
-    if ((len & WC_SHA256_BLOCK_SIZE) != 0) {
-        XMEMCPY(sha256->buffer, sha256->data, WC_SHA256_BLOCK_SIZE);
-        Transform_Sha256_AVX2(sha256);
-        sha256->data += WC_SHA256_BLOCK_SIZE;
-        len -= WC_SHA256_BLOCK_SIZE;
-        if (len == 0)
-            return 0;
-    }
-
-    __asm__ __volatile__ (
-
-        "subq	$512, %%rsp\n\t"
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    INIT_MASKS_Y(BYTE_FLIP_Y_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
-    LOAD_DIGEST()
-
-        "# Start of loop processing two blocks\n"
-        "1:\n\t"
-
-    LOAD_W_K(BYTE_FLIP_Y_MASK, rax)
-
-        "movl	%%r9d, " L4 "\n\t"
-        "movl	%%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    SET_W_Y_4(0)
-    MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  8)
-    MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
-    MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
-
-    SET_W_Y_4(16)
-    MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
-    MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
-    MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
-    MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
-
-    SET_W_Y_4(32)
-    MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
-    MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
-    MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
-    MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
-
-    SET_W_Y_4(48)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  96)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
-
-    ADD_DIGEST()
-    STORE_DIGEST()
-
-        "movl	%%r9d, " L4 "\n\t"
-        "movl	%%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,   4)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  12)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  20)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  28)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  36)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  44)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  52)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  60)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  68)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  76)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  84)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  92)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 100)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 108)
-    RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116)
-    RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124)
-
-    ADD_DIGEST()
-
-        "movq	120(%[sha256]), %%rax\n\t"
-        "addq	$128, %%rax\n\t"
-        "subl	$128, %[len]\n\t"
-
-    STORE_DIGEST()
-
-        "movq	%%rax, 120(%[sha256])\n\t"
-        "jnz	1b\n\t"
-
-        "addq	$512, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_Y_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
-          [sha256]   "r" (sha256),
-          [len]      "r" (len),
-          [K]        "m" (K256)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
-    );
-
-    return 0;
-}
-
-#if defined(HAVE_INTEL_RORX)
-SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX(wc_Sha256* sha256)
-{
-    __asm__ __volatile__ (
-
-        "subq	$512, %%rsp\n\t"
-        "leaq	32(%[sha256]), %%rax\n\t"
-
-    INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
-    LOAD_W_K_LOW(BYTE_FLIP_MASK, rax)
-
-    LOAD_DIGEST()
-
-        "movl	%%r9d, " L4 "\n\t"
-        "rorx	$6, %%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    SET_W_Y_4(0)
-    MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  8)
-    MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
-    MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
-
-    SET_W_Y_4(16)
-    MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
-    MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
-    MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
-    MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
-
-    SET_W_Y_4(32)
-    MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
-    MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
-    MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
-    MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
-
-    SET_W_Y_4(48)
-        "xorl	" L3 ", " L3 "\n\t"
-        "xorl	" L2 ", " L2 "\n\t"
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  96)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
-        /* Prev RND: h += Maj(a,b,c) */
-        "addl	" L3 ", %%r8d\n\t"
-
-    STORE_ADD_DIGEST()
-
-        "addq	$512, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
-          [sha256]   "r" (sha256),
-          [K]        "m" (K256)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
-    );
-
-    return 0;
-}
-
-SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256,
-                                                          word32 len)
-{
-    if ((len & WC_SHA256_BLOCK_SIZE) != 0) {
-        XMEMCPY(sha256->buffer, sha256->data, WC_SHA256_BLOCK_SIZE);
-        Transform_Sha256_AVX2_RORX(sha256);
-        sha256->data += WC_SHA256_BLOCK_SIZE;
-        len -= WC_SHA256_BLOCK_SIZE;
-        if (len == 0)
-            return 0;
-    }
-
-    __asm__ __volatile__ (
-
-        "subq	$512, %%rsp\n\t"
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    INIT_MASKS_Y(BYTE_FLIP_Y_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
-    LOAD_DIGEST()
-
-        "# Start of loop processing two blocks\n"
-        "1:\n\t"
-
-    LOAD_W_K(BYTE_FLIP_Y_MASK, rax)
-
-        "movl	%%r9d, " L4 "\n\t"
-        "rorx	$6, %%r12d, " L1 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    SET_W_Y_4(0)
-    MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  0)
-    MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  8)
-    MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
-    MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
-
-    SET_W_Y_4(16)
-    MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
-    MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
-    MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
-    MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
-
-    SET_W_Y_4(32)
-    MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
-    MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
-    MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
-    MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
-
-    SET_W_Y_4(48)
-        "xorl	" L3 ", " L3 "\n\t"
-        "xorl	" L2 ", " L2 "\n\t"
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  96)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
-        /* Prev RND: h += Maj(a,b,c) */
-        "addl	" L3 ", %%r8d\n\t"
-        "xorl	" L2 ", " L2 "\n\t"
-
-    ADD_DIGEST()
-    STORE_DIGEST()
-
-        "movl	%%r9d, " L4 "\n\t"
-        "xorl	" L3 ", " L3 "\n\t"
-        "xorl	%%r10d, " L4 "\n\t"
-
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,   4)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  12)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  20)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  28)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  36)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  44)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  52)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  60)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  68)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  76)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,  84)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3,  92)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 100)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 108)
-    RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116)
-    RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124)
-        /* Prev RND: h += Maj(a,b,c) */
-        "addl	" L3 ", %%r8d\n\t"
-        "movq	120(%[sha256]), %%rax\n\t"
-
-    ADD_DIGEST()
-
-        "addq	$128, %%rax\n\t"
-        "subl	$128, %[len]\n\t"
-
-    STORE_DIGEST()
-
-        "movq	%%rax, 120(%[sha256])\n\t"
-        "jnz	1b\n\t"
-
-        "addq	$512, %%rsp\n\t"
-
-        :
-        : [FLIP]     "m" (mBYTE_FLIP_Y_MASK[0]),
-          [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
-          [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
-          [sha256]   "r" (sha256),
-          [len]      "r" (len),
-          [K]        "m" (K256)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
-    );
-
-    return 0;
-}
-#endif  /* HAVE_INTEL_RORX */
-#endif  /* HAVE_INTEL_AVX2 */
-
-
 #ifdef WOLFSSL_SHA224
 
 #ifdef STM32_HASH_SHA2
@@ -2572,6 +1287,13 @@
 
 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
     /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
+
+#elif defined(WOLFSSL_AFALG_HASH)
+    #error SHA224 currently not supported with AF_ALG enabled
+
+#elif defined(WOLFSSL_DEVCRYPTO_HASH)
+    /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */
+
 #else
 
     #define NEED_SOFT_SHA224
@@ -2602,6 +1324,9 @@
         /* choose best Transform function under this runtime environment */
         Sha256_SetTransform();
     #endif
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        sha224->flags = 0;
+    #endif
 
         return ret;
     }
@@ -2699,7 +1424,7 @@
 
 #ifdef WOLFSSL_SMALL_STACK_CACHE
     if (sha224->W != NULL) {
-        XFREE(sha224->W, NULL, DYNAMIC_TYPE_RNG);
+        XFREE(sha224->W, NULL, DYNAMIC_TYPE_DIGEST);
         sha224->W = NULL;
     }
 #endif
@@ -2707,6 +1432,10 @@
     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
         wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
     #endif /* WOLFSSL_ASYNC_CRYPT */
+
+    #ifdef WOLFSSL_PIC32MZ_HASH
+        wc_Sha256Pic32Free(sha224);
+    #endif
     }
 #endif /* WOLFSSL_SHA224 */
 
@@ -2723,7 +1452,7 @@
 
 #ifdef WOLFSSL_SMALL_STACK_CACHE
     if (sha256->W != NULL) {
-        XFREE(sha256->W, NULL, DYNAMIC_TYPE_RNG);
+        XFREE(sha256->W, NULL, DYNAMIC_TYPE_DIGEST);
         sha256->W = NULL;
     }
 #endif
@@ -2731,6 +1460,31 @@
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
     wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#ifdef WOLFSSL_PIC32MZ_HASH
+    wc_Sha256Pic32Free(sha256);
+#endif
+#if defined(WOLFSSL_AFALG_HASH)
+    if (sha256->alFd > 0) {
+        close(sha256->alFd);
+        sha256->alFd = -1; /* avoid possible double close on socket */
+    }
+    if (sha256->rdFd > 0) {
+        close(sha256->rdFd);
+        sha256->rdFd = -1; /* avoid possible double close on socket */
+    }
+#endif /* WOLFSSL_AFALG_HASH */
+#ifdef WOLFSSL_DEVCRYPTO_HASH
+    wc_DevCryptoFree(&sha256->ctx);
+#endif /* WOLFSSL_DEVCRYPTO */
+#if (defined(WOLFSSL_AFALG_HASH) && defined(WOLFSSL_AFALG_HASH_KEEP)) || \
+    (defined(WOLFSSL_DEVCRYPTO_HASH) && defined(WOLFSSL_DEVCRYPTO_HASH_KEEP)) || \
+    (defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH))
+    if (sha256->msg != NULL) {
+        XFREE(sha256->msg, sha256->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        sha256->msg = NULL;
+    }
+#endif
 }
 
 #endif /* !WOLFSSL_TI_HASH */
@@ -2769,11 +1523,44 @@
     #ifdef WOLFSSL_ASYNC_CRYPT
         ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
     #endif
+    #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+        dst->flags |= WC_HASH_FLAG_ISCOPY;
+    #endif
 
         return ret;
     }
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags)
+    {
+        if (sha224) {
+            sha224->flags = flags;
+        }
+        return 0;
+    }
+    int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags)
+    {
+        if (sha224 && flags) {
+            *flags = sha224->flags;
+        }
+        return 0;
+    }
+#endif
+
 #endif /* WOLFSSL_SHA224 */
 
+#ifdef WOLFSSL_AFALG_HASH
+    /* implemented in wolfcrypt/src/port/af_alg/afalg_hash.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_HASH)
+    /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */
+
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+    !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+
+    /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */
+#else
+
 int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash)
 {
     int ret;
@@ -2782,9 +1569,24 @@
     if (sha256 == NULL || hash == NULL)
         return BAD_FUNC_ARG;
 
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    if(sha256->ctx.mode == ESP32_SHA_INIT){
+        esp_sha_try_hw_lock(&sha256->ctx);
+    }
+    if(sha256->ctx.mode == ESP32_SHA_HW)
+    {
+        esp_sha256_digest_process(sha256, 0);
+    }
+#endif
     ret = wc_Sha256Copy(sha256, &tmpSha256);
     if (ret == 0) {
         ret = wc_Sha256Final(&tmpSha256, hash);
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        sha256->ctx.mode = ESP32_SHA_SW;
+#endif
+
         wc_Sha256Free(&tmpSha256);
     }
     return ret;
@@ -2807,9 +1609,36 @@
 #ifdef WOLFSSL_PIC32MZ_HASH
     ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
 #endif
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+     dst->ctx.mode = src->ctx.mode;
+     dst->ctx.isfirstblock = src->ctx.isfirstblock;
+     dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+     dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
 
     return ret;
 }
+#endif
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags)
+{
+    if (sha256) {
+        sha256->flags = flags;
+    }
+    return 0;
+}
+int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags)
+{
+    if (sha256 && flags) {
+        *flags = sha256->flags;
+    }
+    return 0;
+}
+#endif
 #endif /* !WOLFSSL_TI_HASH */
 
 #endif /* NO_SHA256 */
--- a/wolfcrypt/src/sha3.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sha3.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha3.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,7 +26,8 @@
 
 #include <wolfssl/wolfcrypt/settings.h>
 
-#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_XILINX_CRYPT)
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_XILINX_CRYPT) && \
+   !defined(WOLFSSL_AFALG_XILINX_SHA3)
 
 #if defined(HAVE_FIPS) && \
 	defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
@@ -42,6 +43,7 @@
 
 #include <wolfssl/wolfcrypt/sha3.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
 
 #ifdef NO_INLINE
     #include <wolfssl/wolfcrypt/misc.h>
@@ -77,7 +79,7 @@
     0x0000000080000001UL, 0x8000000080008008UL
 };
 
-/* Indeces used in swap and rotate operation. */
+/* Indices used in swap and rotate operation. */
 #define K_I_0   10
 #define K_I_1    7
 #define K_I_2   11
@@ -290,7 +292,7 @@
     0x0000000080000001UL, 0x8000000080008008UL
 };
 
-/* Indeces used in swap and rotate operation. */
+/* Indices used in swap and rotate operation. */
 #define KI_0     6
 #define KI_1    12
 #define KI_2    18
@@ -568,6 +570,9 @@
     for (i = 0; i < 25; i++)
         sha3->s[i] = 0;
     sha3->i = 0;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    sha3->flags = 0;
+#endif
 
     return 0;
 }
@@ -631,13 +636,18 @@
  * len   Number of bytes in output.
  * returns 0 on success.
  */
-static int Sha3Final(wc_Sha3* sha3, byte* hash, byte p, byte l)
+static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, byte l)
 {
     byte i;
     byte *s8 = (byte *)sha3->s;
 
     sha3->t[p * 8 - 1]  = 0x00;
-    sha3->t[  sha3->i]  = 0x06;
+#ifdef WOLFSSL_HASH_FLAGS
+    if (p == WC_SHA3_256_COUNT && sha3->flags & WC_HASH_SHA3_KECCAK256) {
+        padChar = 0x01;
+    }
+#endif
+    sha3->t[  sha3->i]  = padChar;
     sha3->t[p * 8 - 1] |= 0x80;
     for (i=sha3->i + 1; i < p * 8 - 1; i++)
         sha3->t[i] = 0;
@@ -692,7 +702,7 @@
  */
 static int wc_Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
 {
-    int ret = 0;
+    int ret;
 
     if (sha3 == NULL || (data == NULL && len > 0)) {
         return BAD_FUNC_ARG;
@@ -700,13 +710,19 @@
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3)
     if (sha3->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA3) {
-    #if defined(HAVE_INTEL_QA)
-        return IntelQaSymSha3(&sha3->asyncDev, NULL, data, len);
+    #if defined(HAVE_INTEL_QA) && defined(QAT_V2)
+        /* QAT only supports SHA3_256 */
+        if (p == WC_SHA3_256_COUNT) {
+            ret = IntelQaSymSha3(&sha3->asyncDev, NULL, data, len);
+            if (ret != NOT_COMPILED_IN)
+                return ret;
+            /* fall-through when unavailable */
+        }
     #endif
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    Sha3Update(sha3, data, len, p);
+    ret = Sha3Update(sha3, data, len, p);
 
     return ret;
 }
@@ -729,14 +745,20 @@
 
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3)
     if (sha3->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA3) {
-    #if defined(HAVE_INTEL_QA)
-        return IntelQaSymSha3(&sha3->asyncDev, hash, NULL,
-                              SHA3_DIGEST_SIZE);
+    #if defined(HAVE_INTEL_QA) && defined(QAT_V2)
+        /* QAT only supports SHA3_256 */
+        /* QAT SHA-3 only supported on v2 (8970 or later cards) */
+        if (len == WC_SHA3_256_DIGEST_SIZE) {
+            ret = IntelQaSymSha3(&sha3->asyncDev, hash, NULL, len);
+            if (ret != NOT_COMPILED_IN)
+                return ret;
+            /* fall-through when unavailable */
+        }
     #endif
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    ret = Sha3Final(sha3, hash, p, len);
+    ret = Sha3Final(sha3, 0x06, hash, p, len);
     if (ret != 0)
         return ret;
 
@@ -780,6 +802,9 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
 #endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+     dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
 
     return ret;
 }
@@ -817,7 +842,7 @@
  * devId  Device identifier for asynchronous operation.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_InitSha3_224(wc_Sha3* sha3, void* heap, int devId)
+int wc_InitSha3_224(wc_Sha3* sha3, void* heap, int devId)
 {
     return wc_InitSha3(sha3, heap, devId);
 }
@@ -829,7 +854,7 @@
  * len   Length of the message data.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_224_Update(wc_Sha3* sha3, const byte* data, word32 len)
+int wc_Sha3_224_Update(wc_Sha3* sha3, const byte* data, word32 len)
 {
     return wc_Sha3Update(sha3, data, len, WC_SHA3_224_COUNT);
 }
@@ -841,7 +866,7 @@
  * hash  Buffer to hold the hash result. Must be at least 28 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_224_Final(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_224_Final(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3Final(sha3, hash, WC_SHA3_224_COUNT, WC_SHA3_224_DIGEST_SIZE);
 }
@@ -852,7 +877,7 @@
  * sha3  wc_Sha3 object holding state.
  * returns 0 on success.
  */
-WOLFSSL_API void wc_Sha3_224_Free(wc_Sha3* sha3)
+void wc_Sha3_224_Free(wc_Sha3* sha3)
 {
     wc_Sha3Free(sha3);
 }
@@ -865,7 +890,7 @@
  * hash  Buffer to hold the hash result. Must be at least 28 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_224_GetHash(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_224_GetHash(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3GetHash(sha3, hash, WC_SHA3_224_COUNT, WC_SHA3_224_DIGEST_SIZE);
 }
@@ -876,7 +901,7 @@
  * dst  wc_Sha3 object to copy into.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_224_Copy(wc_Sha3* src, wc_Sha3* dst)
+int wc_Sha3_224_Copy(wc_Sha3* src, wc_Sha3* dst)
 {
     return wc_Sha3Copy(src, dst);
 }
@@ -889,7 +914,7 @@
  * devId  Device identifier for asynchronous operation.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_InitSha3_256(wc_Sha3* sha3, void* heap, int devId)
+int wc_InitSha3_256(wc_Sha3* sha3, void* heap, int devId)
 {
     return wc_InitSha3(sha3, heap, devId);
 }
@@ -901,7 +926,7 @@
  * len   Length of the message data.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_256_Update(wc_Sha3* sha3, const byte* data, word32 len)
+int wc_Sha3_256_Update(wc_Sha3* sha3, const byte* data, word32 len)
 {
     return wc_Sha3Update(sha3, data, len, WC_SHA3_256_COUNT);
 }
@@ -913,7 +938,7 @@
  * hash  Buffer to hold the hash result. Must be at least 32 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_256_Final(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_256_Final(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3Final(sha3, hash, WC_SHA3_256_COUNT, WC_SHA3_256_DIGEST_SIZE);
 }
@@ -924,7 +949,7 @@
  * sha3  wc_Sha3 object holding state.
  * returns 0 on success.
  */
-WOLFSSL_API void wc_Sha3_256_Free(wc_Sha3* sha3)
+void wc_Sha3_256_Free(wc_Sha3* sha3)
 {
     wc_Sha3Free(sha3);
 }
@@ -937,7 +962,7 @@
  * hash  Buffer to hold the hash result. Must be at least 32 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_256_GetHash(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_256_GetHash(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3GetHash(sha3, hash, WC_SHA3_256_COUNT, WC_SHA3_256_DIGEST_SIZE);
 }
@@ -948,7 +973,7 @@
  * dst  wc_Sha3 object to copy into.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_256_Copy(wc_Sha3* src, wc_Sha3* dst)
+int wc_Sha3_256_Copy(wc_Sha3* src, wc_Sha3* dst)
 {
     return wc_Sha3Copy(src, dst);
 }
@@ -961,7 +986,7 @@
  * devId  Device identifier for asynchronous operation.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_InitSha3_384(wc_Sha3* sha3, void* heap, int devId)
+int wc_InitSha3_384(wc_Sha3* sha3, void* heap, int devId)
 {
     return wc_InitSha3(sha3, heap, devId);
 }
@@ -973,7 +998,7 @@
  * len   Length of the message data.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_384_Update(wc_Sha3* sha3, const byte* data, word32 len)
+int wc_Sha3_384_Update(wc_Sha3* sha3, const byte* data, word32 len)
 {
     return wc_Sha3Update(sha3, data, len, WC_SHA3_384_COUNT);
 }
@@ -985,7 +1010,7 @@
  * hash  Buffer to hold the hash result. Must be at least 48 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_384_Final(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_384_Final(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3Final(sha3, hash, WC_SHA3_384_COUNT, WC_SHA3_384_DIGEST_SIZE);
 }
@@ -996,7 +1021,7 @@
  * sha3  wc_Sha3 object holding state.
  * returns 0 on success.
  */
-WOLFSSL_API void wc_Sha3_384_Free(wc_Sha3* sha3)
+void wc_Sha3_384_Free(wc_Sha3* sha3)
 {
     wc_Sha3Free(sha3);
 }
@@ -1009,7 +1034,7 @@
  * hash  Buffer to hold the hash result. Must be at least 48 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_384_GetHash(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_384_GetHash(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3GetHash(sha3, hash, WC_SHA3_384_COUNT, WC_SHA3_384_DIGEST_SIZE);
 }
@@ -1020,7 +1045,7 @@
  * dst  wc_Sha3 object to copy into.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_384_Copy(wc_Sha3* src, wc_Sha3* dst)
+int wc_Sha3_384_Copy(wc_Sha3* src, wc_Sha3* dst)
 {
     return wc_Sha3Copy(src, dst);
 }
@@ -1033,7 +1058,7 @@
  * devId  Device identifier for asynchronous operation.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_InitSha3_512(wc_Sha3* sha3, void* heap, int devId)
+int wc_InitSha3_512(wc_Sha3* sha3, void* heap, int devId)
 {
     return wc_InitSha3(sha3, heap, devId);
 }
@@ -1045,7 +1070,7 @@
  * len   Length of the message data.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_512_Update(wc_Sha3* sha3, const byte* data, word32 len)
+int wc_Sha3_512_Update(wc_Sha3* sha3, const byte* data, word32 len)
 {
     return wc_Sha3Update(sha3, data, len, WC_SHA3_512_COUNT);
 }
@@ -1057,7 +1082,7 @@
  * hash  Buffer to hold the hash result. Must be at least 64 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_512_Final(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_512_Final(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3Final(sha3, hash, WC_SHA3_512_COUNT, WC_SHA3_512_DIGEST_SIZE);
 }
@@ -1068,7 +1093,7 @@
  * sha3  wc_Sha3 object holding state.
  * returns 0 on success.
  */
-WOLFSSL_API void wc_Sha3_512_Free(wc_Sha3* sha3)
+void wc_Sha3_512_Free(wc_Sha3* sha3)
 {
     wc_Sha3Free(sha3);
 }
@@ -1081,7 +1106,7 @@
  * hash  Buffer to hold the hash result. Must be at least 64 bytes.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_512_GetHash(wc_Sha3* sha3, byte* hash)
+int wc_Sha3_512_GetHash(wc_Sha3* sha3, byte* hash)
 {
     return wc_Sha3GetHash(sha3, hash, WC_SHA3_512_COUNT, WC_SHA3_512_DIGEST_SIZE);
 }
@@ -1092,10 +1117,101 @@
  * dst  wc_Sha3 object to copy into.
  * returns 0 on success.
  */
-WOLFSSL_API int wc_Sha3_512_Copy(wc_Sha3* src, wc_Sha3* dst)
+int wc_Sha3_512_Copy(wc_Sha3* src, wc_Sha3* dst)
 {
     return wc_Sha3Copy(src, dst);
 }
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha3_SetFlags(wc_Sha3* sha3, word32 flags)
+{
+    if (sha3) {
+        sha3->flags = flags;
+    }
+    return 0;
+}
+int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags)
+{
+    if (sha3 && flags) {
+        *flags = sha3->flags;
+    }
+    return 0;
+}
+#endif
+
+#if defined(WOLFSSL_SHAKE256)
+/* Initialize the state for a Shake256 hash operation.
+ *
+ * shake  wc_Shake object holding state.
+ * heap   Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId  Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+int wc_InitShake256(wc_Shake* shake, void* heap, int devId)
+{
+    return wc_InitSha3(shake, heap, devId);
+}
+
+/* Update the SHAKE256 hash state with message data.
+ *
+ * shake  wc_Shake object holding state.
+ * data  Message data to be hashed.
+ * len   Length of the message data.
+ * returns 0 on success.
+ */
+int wc_Shake256_Update(wc_Shake* shake, const byte* data, word32 len)
+{
+    if (shake == NULL || (data == NULL && len > 0)) {
+         return BAD_FUNC_ARG;
+    }
+
+    return Sha3Update(shake, data, len, WC_SHA3_256_COUNT);
+}
+
+/* Calculate the SHAKE256 hash based on all the message data seen.
+ * The state is initialized ready for a new message to hash.
+ *
+ * shake  wc_Shake object holding state.
+ * hash  Buffer to hold the hash result. Must be at least 64 bytes.
+ * returns 0 on success.
+ */
+int wc_Shake256_Final(wc_Shake* shake, byte* hash, word32 hashLen)
+{
+    int ret;
+
+    if (shake == NULL || hash == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    ret = Sha3Final(shake, 0x1f, hash, WC_SHA3_256_COUNT, hashLen);
+    if (ret != 0)
+        return ret;
+
+    return InitSha3(shake);  /* reset state */
+}
+
+/* Dispose of any dynamically allocated data from the SHAKE256 operation.
+ * (Required for async ops.)
+ *
+ * shake  wc_Shake object holding state.
+ * returns 0 on success.
+ */
+void wc_Shake256_Free(wc_Shake* shake)
+{
+    wc_Sha3Free(shake);
+}
+
+/* Copy the state of the SHA3-512 operation.
+ *
+ * src  wc_Shake object holding state top copy.
+ * dst  wc_Shake object to copy into.
+ * returns 0 on success.
+ */
+int wc_Shake256_Copy(wc_Shake* src, wc_Shake* dst)
+{
+    return wc_Sha3Copy(src, dst);
+}
+#endif
+
 #endif /* WOLFSSL_SHA3 */
 
--- a/wolfcrypt/src/sha512.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sha512.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha512.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,7 +26,7 @@
 
 #include <wolfssl/wolfcrypt/settings.h>
 
-#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)
+#if (defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)) && !defined(WOLFSSL_ARMASM)
 
 #if defined(HAVE_FIPS) && \
 	defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
@@ -43,6 +43,7 @@
 #include <wolfssl/wolfcrypt/sha512.h>
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/cpuid.h>
+#include <wolfssl/wolfcrypt/hash.h>
 
 /* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */
 #if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512)
@@ -146,10 +147,9 @@
 
 
 #if defined(USE_INTEL_SPEEDUP)
-    #define HAVE_INTEL_AVX1
-
     #if defined(__GNUC__) && ((__GNUC__ < 4) || \
                               (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+        #undef  NO_AVX2_SUPPORT
         #define NO_AVX2_SUPPORT
     #endif
     #if defined(__clang__) && ((__clang_major__ < 3) || \
@@ -187,6 +187,7 @@
 
 #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
     /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */
+
 #else
 
 #ifdef WOLFSSL_SHA512
@@ -209,6 +210,24 @@
     sha512->loLen   = 0;
     sha512->hiLen   = 0;
 
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+
+    sha512->ctx.sha_type = SHA2_512;
+     /* always start firstblock = 1 when using hw engine */
+    sha512->ctx.isfirstblock = 1;
+    if(sha512->ctx.mode == ESP32_SHA_HW) {
+        /* release hw */
+        esp_sha_hw_unlock();
+    }
+    /* always set mode as INIT
+    *  whether using HW or SW is determined at first call of update()
+    */
+    sha512->ctx.mode = ESP32_SHA_INIT;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    sha512->flags = 0;
+#endif
     return 0;
 }
 
@@ -288,22 +307,31 @@
      * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
      */
 
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
     #if defined(HAVE_INTEL_AVX1)
-        static int Transform_Sha512_AVX1(wc_Sha512 *sha512);
-        static int Transform_Sha512_AVX1_Len(wc_Sha512 *sha512, word32 len);
+        extern int Transform_Sha512_AVX1(wc_Sha512 *sha512);
+        extern int Transform_Sha512_AVX1_Len(wc_Sha512 *sha512, word32 len);
     #endif
     #if defined(HAVE_INTEL_AVX2)
-        static int Transform_Sha512_AVX2(wc_Sha512 *sha512);
-        static int Transform_Sha512_AVX2_Len(wc_Sha512 *sha512, word32 len);
+        extern int Transform_Sha512_AVX2(wc_Sha512 *sha512);
+        extern int Transform_Sha512_AVX2_Len(wc_Sha512 *sha512, word32 len);
         #if defined(HAVE_INTEL_RORX)
-            static int Transform_Sha512_AVX1_RORX(wc_Sha512 *sha512);
-            static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512 *sha512,
+            extern int Transform_Sha512_AVX1_RORX(wc_Sha512 *sha512);
+            extern int Transform_Sha512_AVX1_RORX_Len(wc_Sha512 *sha512,
                                                       word32 len);
-            static int Transform_Sha512_AVX2_RORX(wc_Sha512 *sha512);
-            static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512 *sha512,
+            extern int Transform_Sha512_AVX2_RORX(wc_Sha512 *sha512);
+            extern int Transform_Sha512_AVX2_RORX_Len(wc_Sha512 *sha512,
                                                       word32 len);
         #endif
     #endif
+
+#ifdef __cplusplus
+    }  /* extern "C" */
+#endif
+
     static int _Transform_Sha512(wc_Sha512 *sha512);
     static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512;
     static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL;
@@ -311,7 +339,7 @@
     static int intel_flags;
     #define Transform_Sha512(sha512)     (*Transform_Sha512_p)(sha512)
     #define Transform_Sha512_Len(sha512, len) \
-        (*Transform_Sha512_Len_p)(sha512, len)
+                                          (*Transform_Sha512_Len_p)(sha512, len)
 
     static void Sha512_SetTransform()
     {
@@ -353,56 +381,48 @@
 
         transform_check = 1;
     }
-
-    int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
-    {
-        int ret = InitSha512(sha512);
-
-        (void)heap;
-        (void)devId;
-
-        Sha512_SetTransform();
-
-        return ret;
-    }
-
 #endif /* WOLFSSL_SHA512 */
 
 #else
     #define Transform_Sha512(sha512) _Transform_Sha512(sha512)
 
-    #ifdef WOLFSSL_SHA512
+#endif
+
+#ifdef WOLFSSL_SHA512
 
-    int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
-    {
-        int ret = 0;
+int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
+{
+    int ret = 0;
 
-        if (sha512 == NULL)
-            return BAD_FUNC_ARG;
+    if (sha512 == NULL)
+        return BAD_FUNC_ARG;
 
-        sha512->heap = heap;
+    sha512->heap = heap;
 
-        ret = InitSha512(sha512);
-        if (ret != 0)
-            return ret;
+    ret = InitSha512(sha512);
+    if (ret != 0)
+        return ret;
 
-    #ifdef WOLFSSL_SMALL_STACK_CACHE
-        sha512->W = NULL;
-    #endif
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    Sha512_SetTransform();
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+    sha512->W = NULL;
+#endif
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
-        ret = wolfAsync_DevCtxInit(&sha512->asyncDev,
-                            WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId);
-    #else
-        (void)devId;
-    #endif /* WOLFSSL_ASYNC_CRYPT */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    ret = wolfAsync_DevCtxInit(&sha512->asyncDev,
+                        WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
-        return ret;
-    }
+    return ret;
+}
 
-    #endif /* WOLFSSL_SHA512 */
+#endif /* WOLFSSL_SHA512 */
 
-#endif /* Hardware Acceleration */
 
 static const word64 K512[80] = {
     W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
@@ -563,6 +583,8 @@
     if (sha512->buffLen >= WC_SHA512_BLOCK_SIZE)
         return BUFFER_E;
 
+    AddLength(sha512, len);
+
     if (sha512->buffLen > 0) {
         word32 add = min(len, WC_SHA512_BLOCK_SIZE - sha512->buffLen);
         if (add > 0) {
@@ -579,15 +601,27 @@
             if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
         #endif
             {
+        #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+             defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
                 ByteReverseWords64(sha512->buffer, sha512->buffer,
-                                                          WC_SHA512_BLOCK_SIZE);
+                                                         WC_SHA512_BLOCK_SIZE);
+        #endif
             }
     #endif
+    #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+         defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
             ret = Transform_Sha512(sha512);
-            if (ret == 0) {
-                AddLength(sha512, WC_SHA512_BLOCK_SIZE);
+    #else
+            if(sha512->ctx.mode == ESP32_SHA_INIT) {
+                esp_sha_try_hw_lock(&sha512->ctx);
+            }
+            ret = esp_sha512_process(sha512);
+            if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){
+                ret = Transform_Sha512(sha512);
+            }
+    #endif
+            if (ret == 0)
                 sha512->buffLen = 0;
-            }
             else
                 len = 0;
         }
@@ -598,7 +632,6 @@
         word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1);
 
         if (blocksLen > 0) {
-            AddLength(sha512, blocksLen);
             sha512->data = data;
             /* Byte reversal performed in function if required. */
             Transform_Sha512_Len(sha512, blocksLen);
@@ -608,18 +641,21 @@
     }
     else
 #endif
-#if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \
-                            defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+#if !defined(LITTLE_ENDIAN_ORDER) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
     {
-        word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1);
-
-        AddLength(sha512, blocksLen);
         while (len >= WC_SHA512_BLOCK_SIZE) {
             XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE);
 
             data += WC_SHA512_BLOCK_SIZE;
             len  -= WC_SHA512_BLOCK_SIZE;
 
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+            if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+            {
+                ByteReverseWords64(sha512->buffer, sha512->buffer,
+                                                          WC_SHA512_BLOCK_SIZE);
+            }
+        #endif
             /* Byte reversal performed in function if required. */
             ret = Transform_Sha512(sha512);
             if (ret != 0)
@@ -628,18 +664,28 @@
     }
 #else
     {
-        word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1);
-
-        AddLength(sha512, blocksLen);
         while (len >= WC_SHA512_BLOCK_SIZE) {
             XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE);
 
             data += WC_SHA512_BLOCK_SIZE;
             len  -= WC_SHA512_BLOCK_SIZE;
-
+    #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+         defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
             ByteReverseWords64(sha512->buffer, sha512->buffer,
-                                                          WC_SHA512_BLOCK_SIZE);
+                                                       WC_SHA512_BLOCK_SIZE);
+    #endif
+    #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+         defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
             ret = Transform_Sha512(sha512);
+    #else
+            if(sha512->ctx.mode == ESP32_SHA_INIT) {
+                esp_sha_try_hw_lock(&sha512->ctx);
+            }
+            ret = esp_sha512_process(sha512);
+            if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){
+                ret = Transform_Sha512(sha512);
+            }
+    #endif
             if (ret != 0)
                 break;
         }
@@ -686,8 +732,6 @@
         return BAD_FUNC_ARG;
     }
 
-    AddLength(sha512, sha512->buffLen);               /* before adding pads */
-
     local[sha512->buffLen++] = 0x80;  /* add 1 */
 
     /* pad with zeros */
@@ -699,11 +743,26 @@
         if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
     #endif
         {
+
+       #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+            defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
             ByteReverseWords64(sha512->buffer,sha512->buffer,
-                                                             WC_SHA512_BLOCK_SIZE);
+                                                         WC_SHA512_BLOCK_SIZE);
+       #endif
         }
 #endif /* LITTLE_ENDIAN_ORDER */
+#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+     defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
         ret = Transform_Sha512(sha512);
+#else
+       if(sha512->ctx.mode == ESP32_SHA_INIT) {
+            esp_sha_try_hw_lock(&sha512->ctx);
+       }
+        ret = esp_sha512_process(sha512);
+        if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){
+            ret = Transform_Sha512(sha512);
+        }
+#endif
         if (ret != 0)
             return ret;
 
@@ -721,19 +780,37 @@
     #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
         if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
     #endif
+    #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+         defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
             ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_PAD_SIZE);
+    #endif
 #endif
     /* ! length ordering dependent on digest endian type ! */
 
+#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+     defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
     sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
     sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
+#endif
+
 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
     if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
         ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
                            &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
                            WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE);
 #endif
+#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+    defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
     ret = Transform_Sha512(sha512);
+#else
+    if(sha512->ctx.mode == ESP32_SHA_INIT) {
+        esp_sha_try_hw_lock(&sha512->ctx);
+    }
+    ret = esp_sha512_digest_process(sha512, 1);
+    if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW) {
+        ret = Transform_Sha512(sha512);
+    }
+#endif
     if (ret != 0)
         return ret;
 
@@ -793,7 +870,6 @@
     return InitSha512(sha512);  /* reset state */
 }
 
-
 int wc_InitSha512(wc_Sha512* sha512)
 {
     return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID);
@@ -816,1809 +892,8 @@
 #endif /* WOLFSSL_ASYNC_CRYPT */
 }
 
-
-#if defined(HAVE_INTEL_AVX1)
-
-static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f };
-
-#define W_0     xmm0
-#define W_2     xmm1
-#define W_4     xmm2
-#define W_6     xmm3
-#define W_8     xmm4
-#define W_10    xmm5
-#define W_12    xmm6
-#define W_14    xmm7
-
-#define W_M15   xmm12
-#define W_M7    xmm13
-#define MASK    xmm14
-
-#define XTMP1   xmm8
-#define XTMP2   xmm9
-#define XTMP3   xmm10
-#define XTMP4   xmm11
-
-#define XMM_REGS \
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",       \
-    "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
-
-#define _VPALIGNR(dest, src1, src2, bits)                               \
-    "vpalignr	$" #bits ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t"
-#define VPALIGNR(dest, src1, src2, bits) \
-       _VPALIGNR(dest, src1, src2, bits)
-
-#define _V_SHIFT_R(dest, src, bits)                             \
-    "vpsrlq	$" #bits ", %%" #src ", %%" #dest "\n\t"
-#define V_SHIFT_R(dest, src, bits) \
-       _V_SHIFT_R(dest, src, bits)
-
-#define _V_SHIFT_L(dest, src, bits)                             \
-    "vpsllq	$" #bits ", %%" #src ", %%" #dest "\n\t"
-#define V_SHIFT_L(dest, src, bits) \
-       _V_SHIFT_L(dest, src, bits)
-
-#define _V_ADD(dest, src1, src2)                                \
-    "vpaddq	%%" #src1 ", %%" #src2 ", %%" #dest "\n\t"
-#define V_ADD(dest, src1, src2) \
-       _V_ADD(dest, src1, src2)
-
-#define _V_XOR(dest, src1, src2)                                \
-    "vpxor	%%" #src1 ", %%" #src2 ", %%" #dest "\n\t"
-#define V_XOR(dest, src1, src2) \
-       _V_XOR(dest, src1, src2)
-
-#define _V_OR(dest, src1, src2)                                 \
-    "vpor	%%" #src1 ", %%" #src2 ", %%" #dest "\n\t"
-#define V_OR(dest, src1, src2) \
-       _V_OR(dest, src1, src2)
-
-#define RA  %%r8
-#define RB  %%r9
-#define RC  %%r10
-#define RD  %%r11
-#define RE  %%r12
-#define RF  %%r13
-#define RG  %%r14
-#define RH  %%r15
-
-#define STATE_REGS "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-
-#define L1  "%%rax"
-#define L2  "%%rcx"
-#define L3  "%%rdx"
-#define L4  "%%rbx"
-#define WX  "%%rsp"
-
-#define WORK_REGS "rax", "rbx", "rcx", "rdx"
-
-#define RND_0_1(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = e >>> 23 */                              \
-    "rorq	 $23, " L1 "\n\t"                    \
-
-#define RND_0_2(a,b,c,d,e,f,g,h,i)                   \
-    /* L3 = a */                                     \
-    "movq	"#a", " L3 "\n\t"                    \
-    /* L2 = f */                                     \
-    "movq	"#f", " L2 "\n\t"                    \
-    /* h += W_X[i] */                                \
-    "addq	("#i")*8(" WX "), "#h"\n\t"          \
-    /* L2 = f ^ g */                                 \
-    "xorq	"#g", " L2 "\n\t"                    \
-
-#define RND_0_2_A(a,b,c,d,e,f,g,h,i)                 \
-    /* L3 = a */                                     \
-    "movq	"#a", " L3 "\n\t"                    \
-    /* L2 = f */                                     \
-    "movq	"#f", " L2 "\n\t"                    \
-
-#define RND_0_2_B(a,b,c,d,e,f,g,h,i)                 \
-    /* h += W_X[i] */                                \
-    "addq	("#i")*8(" WX "), "#h"\n\t"          \
-    /* L2 = f ^ g */                                 \
-    "xorq	"#g", " L2 "\n\t"                    \
-
-#define RND_0_3(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = (e >>> 23) ^ e */                        \
-    "xorq	"#e", " L1 "\n\t"                    \
-    /* L2 = (f ^ g) & e */                           \
-    "andq	"#e", " L2 "\n\t"                    \
-
-#define RND_0_4(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = ((e >>> 23) ^ e) >>> 4 */                \
-    "rorq	 $4, " L1 "\n\t"                     \
-    /* L2 = ((f ^ g) & e) ^ g */                     \
-    "xorq	"#g", " L2 "\n\t"                    \
-
-#define RND_0_5(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = (((e >>> 23) ^ e) >>> 4) ^ e */          \
-    "xorq	"#e", " L1 "\n\t"                    \
-    /* h += Ch(e,f,g) */                             \
-    "addq	" L2 ", "#h"\n\t"                    \
-
-#define RND_0_6(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = ((((e >>> 23) ^ e) >>> 4) ^ e) >>> 14 */ \
-    "rorq	$14, " L1 "\n\t"                     \
-    /* L3 = a ^ b */                                 \
-    "xorq	"#b", " L3 "\n\t"                    \
-
-#define RND_0_7(a,b,c,d,e,f,g,h,i)                   \
-    /* h += Sigma1(e) */                             \
-    "addq	" L1 ", "#h"\n\t"                    \
-    /* L2 = a */                                     \
-    "movq	"#a", " L2 "\n\t"                    \
-
-#define RND_0_8(a,b,c,d,e,f,g,h,i)                   \
-    /* L4 = (a ^ b) & (b ^ c) */                     \
-    "andq	" L3 ", " L4 "\n\t"                  \
-    /* L2 = a >>> 5 */                               \
-    "rorq	$5, " L2 "\n\t"                      \
-
-#define RND_0_9(a,b,c,d,e,f,g,h,i)                   \
-    /* L2 = (a >>> 5) ^ a */                         \
-    "xorq	"#a", " L2 "\n\t"                    \
-    /* L4 = ((a ^ b) & (b ^ c) ^ b */                \
-    "xorq	"#b", " L4 "\n\t"                    \
-
-#define RND_0_10(a,b,c,d,e,f,g,h,i)                  \
-    /* L2 = ((a >>> 5) ^ a) >>> 6 */                 \
-    "rorq	 $6, " L2 "\n\t"                     \
-    /* d += h */                                     \
-    "addq	"#h", "#d"\n\t"                      \
-
-#define RND_0_11(a,b,c,d,e,f,g,h,i)                  \
-    /* L2 = (((a >>> 5) ^ a) >>> 6) ^ a */           \
-    "xorq	"#a", " L2 "\n\t"                    \
-    /* h += Sigma0(a) */                             \
-    "addq	" L4 ", "#h"\n\t"                    \
-
-#define RND_0_12(a,b,c,d,e,f,g,h,i)                  \
-    /* L2 = ((((a >>> 5) ^ a) >>> 6) ^ a) >>> 28 */  \
-    "rorq	$28, " L2 "\n\t"                     \
-    /* d (= e next RND) */                           \
-    "movq	"#d", " L1 "\n\t"                    \
-    /* h += Maj(a,b,c) */                            \
-    "addq	" L2 ", "#h"\n\t"                    \
-
-#define RND_1_1(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = e >>> 23 */                              \
-    "rorq	 $23, " L1 "\n\t"                    \
-
-#define RND_1_2(a,b,c,d,e,f,g,h,i)                   \
-    /* L4 = a */                                     \
-    "movq	"#a", " L4 "\n\t"                    \
-    /* L2 = f */                                     \
-    "movq	"#f", " L2 "\n\t"                    \
-    /* h += W_X[i] */                                \
-    "addq	("#i")*8(" WX "), "#h"\n\t"          \
-    /* L2 = f ^ g */                                 \
-    "xorq	"#g", " L2 "\n\t"                    \
-
-#define RND_1_2_A(a,b,c,d,e,f,g,h,i)                 \
-    /* L4 = a */                                     \
-    "movq	"#a", " L4 "\n\t"                    \
-    /* L2 = f */                                     \
-    "movq	"#f", " L2 "\n\t"                    \
-
-#define RND_1_2_B(a,b,c,d,e,f,g,h,i)                 \
-    /* h += W_X[i] */                                \
-    "addq	("#i")*8(" WX "), "#h"\n\t"          \
-    /* L2 = f ^ g */                                 \
-    "xorq	"#g", " L2 "\n\t"                    \
-
-#define RND_1_3(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = (e >>> 23) ^ e */                        \
-    "xorq	"#e", " L1 "\n\t"                    \
-    /* L2 = (f ^ g) & e */                           \
-    "andq	"#e", " L2 "\n\t"                    \
-
-#define RND_1_4(a,b,c,d,e,f,g,h,i)                   \
-    /* ((e >>> 23) ^ e) >>> 4 */                     \
-    "rorq	 $4, " L1 "\n\t"                     \
-    /* ((f ^ g) & e) ^ g */                          \
-    "xorq	"#g", " L2 "\n\t"                    \
-
-#define RND_1_5(a,b,c,d,e,f,g,h,i)                   \
-    /* (((e >>> 23) ^ e) >>> 4) ^ e */               \
-    "xorq	"#e", " L1 "\n\t"                    \
-    /* h += Ch(e,f,g) */                             \
-    "addq	" L2 ", "#h"\n\t"                    \
-
-#define RND_1_6(a,b,c,d,e,f,g,h,i)                   \
-    /* L1 = ((((e >>> 23) ^ e) >>> 4) ^ e) >>> 14 */ \
-    "rorq	$14, " L1 "\n\t"                     \
-    /* L4 = a ^ b */                                 \
-    "xorq	"#b", " L4 "\n\t"                    \
-
-#define RND_1_7(a,b,c,d,e,f,g,h,i)                   \
-    /* h += Sigma1(e) */                             \
-    "addq	" L1 ", "#h"\n\t"                    \
-    /* L2 = a */                                     \
-    "movq	"#a", " L2 "\n\t"                    \
- 
-#define RND_1_8(a,b,c,d,e,f,g,h,i)                   \
-    /* L3 = (a ^ b) & (b ^ c) */                     \
-    "andq	" L4 ", " L3 "\n\t"                  \
-    /* L2 = a >>> 5 */                               \
-    "rorq	$5, " L2 "\n\t"                      \
-
-#define RND_1_9(a,b,c,d,e,f,g,h,i)                   \
-    /* L2 = (a >>> 5) ^ a */                         \
-    "xorq	"#a", " L2 "\n\t"                    \
-    /* L3 = ((a ^ b) & (b ^ c) ^ b */                \
-    "xorq	"#b", " L3 "\n\t"                    \
-
-#define RND_1_10(a,b,c,d,e,f,g,h,i)                  \
-    /* L2 = ((a >>> 5) ^ a) >>> 6 */                 \
-    "rorq	 $6, " L2 "\n\t"                     \
-    /* d += h */                                     \
-    "addq	"#h", "#d"\n\t"                      \
-
-#define RND_1_11(a,b,c,d,e,f,g,h,i)                  \
-    /* L2 = (((a >>> 5) ^ a) >>> 6) ^ a */           \
-    "xorq	"#a", " L2 "\n\t"                    \
-    /* h += Sigma0(a) */                             \
-    "addq	" L3 ", "#h"\n\t"                    \
-
-#define RND_1_12(a,b,c,d,e,f,g,h,i)                  \
-    /* L2 = ((((a >>> 5) ^ a) >>> 6) ^ a) >>> 28 */  \
-    "rorq	$28, " L2 "\n\t"                     \
-    /* d (= e next RND) */                           \
-    "movq	"#d", " L1 "\n\t"                    \
-    /* h += Maj(a,b,c) */                            \
-    "addq	" L2 ", "#h"\n\t"                    \
-
-
-#define MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \
-            RND_0_1(a,b,c,d,e,f,g,h,i)                                  \
-    VPALIGNR(W_M15, W_2, W_0, 8)                                        \
-    VPALIGNR(W_M7, W_10, W_8, 8)                                        \
-            RND_0_2(a,b,c,d,e,f,g,h,i)                                  \
-    V_SHIFT_R(XTMP1, W_M15, 1)                                          \
-    V_SHIFT_L(XTMP2, W_M15, 63)                                         \
-            RND_0_3(a,b,c,d,e,f,g,h,i)                                  \
-            RND_0_4(a,b,c,d,e,f,g,h,i)                                  \
-    V_SHIFT_R(XTMP3, W_M15, 8)                                          \
-    V_SHIFT_L(XTMP4, W_M15, 56)                                         \
-            RND_0_5(a,b,c,d,e,f,g,h,i)                                  \
-            RND_0_6(a,b,c,d,e,f,g,h,i)                                  \
-    V_OR(XTMP1, XTMP2, XTMP1)                                           \
-    V_OR(XTMP3, XTMP4, XTMP3)                                           \
-            RND_0_7(a,b,c,d,e,f,g,h,i)                                  \
-            RND_0_8(a,b,c,d,e,f,g,h,i)                                  \
-    V_SHIFT_R(XTMP4, W_M15, 7)                                          \
-    V_XOR(XTMP1, XTMP3, XTMP1)                                          \
-            RND_0_9(a,b,c,d,e,f,g,h,i)                                  \
-            RND_0_10(a,b,c,d,e,f,g,h,i)                                 \
-    V_XOR(XTMP1, XTMP4, XTMP1)                                          \
-    V_ADD(W_0, W_0, W_M7)                                               \
-            RND_0_11(a,b,c,d,e,f,g,h,i)                                 \
-            RND_0_12(a,b,c,d,e,f,g,h,i)                                 \
-            RND_1_1(h,a,b,c,d,e,f,g,i+1)                                \
-    V_ADD(W_0, W_0, XTMP1)                                              \
-            RND_1_2(h,a,b,c,d,e,f,g,i+1)                                \
-    V_SHIFT_R(XTMP1, W_14, 19)                                          \
-    V_SHIFT_L(XTMP2, W_14, 45)                                          \
-            RND_1_3(h,a,b,c,d,e,f,g,i+1)                                \
-            RND_1_4(h,a,b,c,d,e,f,g,i+1)                                \
-    V_SHIFT_R(XTMP3, W_14, 61)                                          \
-    V_SHIFT_L(XTMP4, W_14, 3)                                           \
-            RND_1_5(h,a,b,c,d,e,f,g,i+1)                                \
-            RND_1_6(h,a,b,c,d,e,f,g,i+1)                                \
-            RND_1_7(h,a,b,c,d,e,f,g,i+1)                                \
-    V_OR(XTMP1, XTMP2, XTMP1)                                           \
-    V_OR(XTMP3, XTMP4, XTMP3)                                           \
-            RND_1_8(h,a,b,c,d,e,f,g,i+1)                                \
-            RND_1_9(h,a,b,c,d,e,f,g,i+1)                                \
-    V_XOR(XTMP1, XTMP3, XTMP1)                                          \
-    V_SHIFT_R(XTMP4, W_14, 6)                                           \
-            RND_1_10(h,a,b,c,d,e,f,g,i+1)                               \
-            RND_1_11(h,a,b,c,d,e,f,g,i+1)                               \
-    V_XOR(XTMP1, XTMP4, XTMP1)                                          \
-            RND_1_12(h,a,b,c,d,e,f,g,i+1)                               \
-    V_ADD(W_0, W_0, XTMP1)                                              \
-
-#define RND_ALL_2(a, b, c, d, e, f, g, h, i) \
-    RND_0_1 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_2 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_3 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_4 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_5 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_6 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_7 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_8 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_9 (a, b, c, d, e, f, g, h, i )     \
-    RND_0_10(a, b, c, d, e, f, g, h, i )     \
-    RND_0_11(a, b, c, d, e, f, g, h, i )     \
-    RND_0_12(a, b, c, d, e, f, g, h, i )     \
-    RND_1_1 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_2 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_3 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_4 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_5 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_6 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_7 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_8 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_9 (h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_10(h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_11(h, a, b, c, d, e, f, g, i+1)    \
-    RND_1_12(h, a, b, c, d, e, f, g, i+1)
-
-
-#if defined(HAVE_INTEL_RORX)
-
-#define RND_RORX_0_1(a, b, c, d, e, f, g, h, i) \
-    /* L1 = e>>>14 */                           \
-    "rorxq	$14, "#e", " L1 "\n\t"          \
-    /* L2 = e>>>18 */                           \
-    "rorxq	$18, "#e", " L2 "\n\t"          \
-    /* Prev RND: h += Maj(a,b,c) */             \
-    "addq	" L3 ", "#a"\n\t"               \
-
-#define RND_RORX_0_2(a, b, c, d, e, f, g, h, i) \
-    /* h += w_k */                              \
-    "addq	("#i")*8(" WX "), "#h"\n\t"     \
-    /* L3 = f */                                \
-    "movq	"#f", " L3 "\n\t"               \
-    /* L2 = (e>>>14) ^ (e>>>18) */              \
-    "xorq	" L1 ", " L2 "\n\t"             \
-
-#define RND_RORX_0_3(a, b, c, d, e, f, g, h, i) \
-    /* L3 = f ^ g */                            \
-    "xorq	"#g", " L3 "\n\t"               \
-    /* L1 = e>>>41 */                           \
-    "rorxq	$41, "#e", " L1 "\n\t"          \
-    /* L1 = Sigma1(e) */                        \
-    "xorq	" L2 ", " L1 "\n\t"             \
-
-#define RND_RORX_0_4(a, b, c, d, e, f, g, h, i) \
-    /* L3 = (f ^ g) & e */                      \
-    "andq	"#e", " L3 "\n\t"               \
-    /* h += Sigma1(e) */                        \
-    "addq	" L1 ", "#h"\n\t"               \
-    /* L1 = a>>>28 */                           \
-    "rorxq	$28, "#a", " L1 "\n\t"          \
-
-#define RND_RORX_0_5(a, b, c, d, e, f, g, h, i) \
-    /* L2 = a>>>34 */                           \
-    "rorxq	$34, "#a", " L2 "\n\t"          \
-    /* L3 = Ch(e,f,g) */                        \
-    "xorq	"#g", " L3 "\n\t"               \
-    /* L2 = (a>>>28) ^ (a>>>34) */              \
-    "xorq	" L1 ", " L2 "\n\t"             \
-
-#define RND_RORX_0_6(a, b, c, d, e, f, g, h, i) \
-    /* L1 = a>>>39 */                           \
-    "rorxq	$39, "#a", " L1 "\n\t"          \
-    /* h += Ch(e,f,g) */                        \
-    "addq	" L3 ", "#h"\n\t"               \
-    /* L1 = Sigma0(a) */                        \
-    "xorq	" L2 ", " L1 "\n\t"             \
-
-#define RND_RORX_0_7(a, b, c, d, e, f, g, h, i) \
-    /* L3 = b */                                \
-    "movq	"#b", " L3 "\n\t"               \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */  \
-    "addq	"#h", "#d"\n\t"                 \
-    /* L3 = a ^ b */                            \
-    "xorq	"#a", " L3 "\n\t"               \
-
-#define RND_RORX_0_8(a, b, c, d, e, f, g, h, i) \
-    /* L4 = (a ^ b) & (b ^ c) */                \
-    "andq	" L3 ", " L4 "\n\t"             \
-    /* h += Sigma0(a) */                        \
-    "addq	" L1 ", "#h"\n\t"               \
-    /* L4 = Maj(a,b,c) */                       \
-    "xorq	"#b", " L4 "\n\t"               \
-
-#define RND_RORX_1_1(a, b, c, d, e, f, g, h, i) \
-    /* L1 = e>>>14 */                           \
-    "rorxq	$14, "#e", " L1 "\n\t"          \
-    /* L2 = e>>>18 */                           \
-    "rorxq	$18, "#e", " L2 "\n\t"          \
-    /* Prev RND: h += Maj(a,b,c) */             \
-    "addq	" L4 ", "#a"\n\t"               \
-
-#define RND_RORX_1_2(a, b, c, d, e, f, g, h, i) \
-    /* h += w_k */                              \
-    "addq	("#i")*8(" WX "), "#h"\n\t"     \
-    /* L4 = f */                                \
-    "movq	"#f", " L4 "\n\t"               \
-    /* L2 = (e>>>14) ^ (e>>>18) */              \
-    "xorq	" L1 ", " L2 "\n\t"             \
-
-#define RND_RORX_1_3(a, b, c, d, e, f, g, h, i) \
-    /* L4 = f ^ g */                            \
-    "xorq	"#g", " L4 "\n\t"               \
-    /* L1 = e>>>41 */                           \
-    "rorxq	$41, "#e", " L1 "\n\t"          \
-    /* L1 = Sigma1(e) */                        \
-    "xorq	" L2 ", " L1 "\n\t"             \
-
-#define RND_RORX_1_4(a, b, c, d, e, f, g, h, i) \
-    /* L4 = (f ^ g) & e */                      \
-    "andq	"#e", " L4 "\n\t"               \
-    /* h += Sigma1(e) */                        \
-    "addq	" L1 ", "#h"\n\t"               \
-    /* L1 = a>>>28 */                           \
-    "rorxq	$28, "#a", " L1 "\n\t"          \
-
-#define RND_RORX_1_5(a, b, c, d, e, f, g, h, i) \
-    /* L2 = a>>>34 */                           \
-    "rorxq	$34, "#a", " L2 "\n\t"          \
-    /* L4 = Ch(e,f,g) */                        \
-    "xorq	"#g", " L4 "\n\t"               \
-    /* L2 = (a>>>28) ^ (a>>>34) */              \
-    "xorq	" L1 ", " L2 "\n\t"             \
-
-#define RND_RORX_1_6(a, b, c, d, e, f, g, h, i) \
-    /* L1 = a>>>39 */                           \
-    "rorxq	$39, "#a", " L1 "\n\t"          \
-    /* h += Ch(e,f,g) */                        \
-    "addq	" L4 ", "#h"\n\t"               \
-    /* L1 = Sigma0(a) */                        \
-    "xorq	" L2 ", " L1 "\n\t"             \
-
-#define RND_RORX_1_7(a, b, c, d, e, f, g, h, i) \
-    /* L4 = b */                                \
-    "movq	"#b", " L4 "\n\t"               \
-    /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */  \
-    "addq	"#h", "#d"\n\t"                 \
-    /* L4 = a ^ b */                            \
-    "xorq	"#a", " L4 "\n\t"               \
-
-#define RND_RORX_1_8(a, b, c, d, e, f, g, h, i) \
-    /* L2 = (a ^ b) & (b ^ c) */                \
-    "andq	" L4 ", " L3 "\n\t"             \
-    /* h += Sigma0(a) */                        \
-    "addq	" L1 ", "#h"\n\t"               \
-    /* L3 = Maj(a,b,c) */                       \
-    "xorq	"#b", " L3 "\n\t"               \
-
-#define RND_RORX_ALL_2(a, b, c, d, e, f, g, h, i) \
-    RND_RORX_0_1(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_2(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_3(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_4(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_5(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_6(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_7(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_0_8(a, b, c, d, e, f, g, h, i+0)     \
-    RND_RORX_1_1(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_2(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_3(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_4(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_5(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_6(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_7(h, a, b, c, d, e, f, g, i+1)     \
-    RND_RORX_1_8(h, a, b, c, d, e, f, g, i+1)     \
-
-#define RND_RORX_ALL_4(a, b, c, d, e, f, g, h, i) \
-    RND_RORX_ALL_2(a, b, c, d, e, f, g, h, i+0)   \
-    RND_RORX_ALL_2(g, h, a, b, c, d, e, f, i+2)
-
-#define MsgSched_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \
-            RND_RORX_0_1(a,b,c,d,e,f,g,h,i)                                 \
-    VPALIGNR(W_M15, W_2, W_0, 8)                                            \
-    VPALIGNR(W_M7, W_10, W_8, 8)                                            \
-            RND_RORX_0_2(a,b,c,d,e,f,g,h,i)                                 \
-    V_SHIFT_R(XTMP1, W_M15, 1)                                              \
-    V_SHIFT_L(XTMP2, W_M15, 63)                                             \
-            RND_RORX_0_3(a,b,c,d,e,f,g,h,i)                                 \
-    V_SHIFT_R(XTMP3, W_M15, 8)                                              \
-    V_SHIFT_L(XTMP4, W_M15, 56)                                             \
-            RND_RORX_0_4(a,b,c,d,e,f,g,h,i)                                 \
-    V_OR(XTMP1, XTMP2, XTMP1)                                               \
-    V_OR(XTMP3, XTMP4, XTMP3)                                               \
-            RND_RORX_0_5(a,b,c,d,e,f,g,h,i)                                 \
-    V_SHIFT_R(XTMP4, W_M15, 7)                                              \
-    V_XOR(XTMP1, XTMP3, XTMP1)                                              \
-            RND_RORX_0_6(a,b,c,d,e,f,g,h,i)                                 \
-    V_XOR(XTMP1, XTMP4, XTMP1)                                              \
-    V_ADD(W_0, W_0, W_M7)                                                   \
-            RND_RORX_0_7(a,b,c,d,e,f,g,h,i)                                 \
-            RND_RORX_0_8(a,b,c,d,e,f,g,h,i)                                 \
-    V_ADD(W_0, W_0, XTMP1)                                                  \
-            RND_RORX_1_1(h,a,b,c,d,e,f,g,i+1)                               \
-    V_SHIFT_R(XTMP1, W_14, 19)                                              \
-    V_SHIFT_L(XTMP2, W_14, 45)                                              \
-            RND_RORX_1_2(h,a,b,c,d,e,f,g,i+1)                               \
-    V_SHIFT_R(XTMP3, W_14, 61)                                              \
-    V_SHIFT_L(XTMP4, W_14, 3)                                               \
-            RND_RORX_1_3(h,a,b,c,d,e,f,g,i+1)                               \
-    V_OR(XTMP1, XTMP2, XTMP1)                                               \
-    V_OR(XTMP3, XTMP4, XTMP3)                                               \
-            RND_RORX_1_4(h,a,b,c,d,e,f,g,i+1)                               \
-            RND_RORX_1_5(h,a,b,c,d,e,f,g,i+1)                               \
-    V_XOR(XTMP1, XTMP3, XTMP1)                                              \
-    V_SHIFT_R(XTMP4, W_14, 6)                                               \
-            RND_RORX_1_6(h,a,b,c,d,e,f,g,i+1)                               \
-            RND_RORX_1_7(h,a,b,c,d,e,f,g,i+1)                               \
-    V_XOR(XTMP1, XTMP4, XTMP1)                                              \
-            RND_RORX_1_8(h,a,b,c,d,e,f,g,i+1)                               \
-    V_ADD(W_0, W_0, XTMP1)                                                  \
-
-#endif
-
-#define _INIT_MASK(mask) \
-    "vmovdqu %[mask], %%" #mask "\n\t"
-#define INIT_MASK(mask) \
-       _INIT_MASK(mask)
-
-#define _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg)           \
-    "vmovdqu	" #i1 "*16(%%" #reg "), %%" #xmm1 "\n\t"   \
-    "vmovdqu	" #i2 "*16(%%" #reg "), %%" #xmm2 "\n\t"   \
-    "vpshufb	%%" #mask ", %%" #xmm1 ", %%" #xmm1 "\n\t" \
-    "vpshufb	%%" #mask ", %%" #xmm2 ", %%" #xmm2 "\n\t"
-#define LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) \
-       _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg)
-
-#define LOAD_W(mask, reg)                           \
-    /* X0..3(xmm4..7), W[0..15] = buffer[0.15];  */ \
-    LOAD_W_2(0, 1, W_0 , W_2 , mask, reg)           \
-    LOAD_W_2(2, 3, W_4 , W_6 , mask, reg)           \
-    LOAD_W_2(4, 5, W_8 , W_10, mask, reg)           \
-    LOAD_W_2(6, 7, W_12, W_14, mask, reg)
-
-#define _SET_W_X_2(xmm0, xmm1, reg, i)                          \
-    "vpaddq	" #i "+ 0(%%" #reg "), %%" #xmm0 ", %%xmm8\n\t" \
-    "vpaddq	" #i "+16(%%" #reg "), %%" #xmm1 ", %%xmm9\n\t" \
-    "vmovdqu	%%xmm8, " #i "+ 0(" WX ")\n\t"                  \
-    "vmovdqu	%%xmm9, " #i "+16(" WX ")\n\t"                  \
-
-#define SET_W_X_2(xmm0, xmm1, reg, i) \
-       _SET_W_X_2(xmm0, xmm1, reg, i)
-
-#define SET_W_X(reg)                \
-    SET_W_X_2(W_0 , W_2 , reg,  0)  \
-    SET_W_X_2(W_4 , W_6 , reg, 32)  \
-    SET_W_X_2(W_8 , W_10, reg, 64)  \
-    SET_W_X_2(W_12, W_14, reg, 96)
-
-#define LOAD_DIGEST()                     \
-    "movq	  (%[sha512]), %%r8 \n\t" \
-    "movq	 8(%[sha512]), %%r9 \n\t" \
-    "movq	16(%[sha512]), %%r10\n\t" \
-    "movq	24(%[sha512]), %%r11\n\t" \
-    "movq	32(%[sha512]), %%r12\n\t" \
-    "movq	40(%[sha512]), %%r13\n\t" \
-    "movq	48(%[sha512]), %%r14\n\t" \
-    "movq	56(%[sha512]), %%r15\n\t"
-
-#define STORE_ADD_DIGEST()                \
-    "addq	 %%r8,   (%[sha512])\n\t" \
-    "addq	 %%r9,  8(%[sha512])\n\t" \
-    "addq	%%r10, 16(%[sha512])\n\t" \
-    "addq	%%r11, 24(%[sha512])\n\t" \
-    "addq	%%r12, 32(%[sha512])\n\t" \
-    "addq	%%r13, 40(%[sha512])\n\t" \
-    "addq	%%r14, 48(%[sha512])\n\t" \
-    "addq	%%r15, 56(%[sha512])\n\t"
-
-#define ADD_DIGEST()                      \
-    "addq	  (%[sha512]), %%r8 \n\t" \
-    "addq	 8(%[sha512]), %%r9 \n\t" \
-    "addq	16(%[sha512]), %%r10\n\t" \
-    "addq	24(%[sha512]), %%r11\n\t" \
-    "addq	32(%[sha512]), %%r12\n\t" \
-    "addq	40(%[sha512]), %%r13\n\t" \
-    "addq	48(%[sha512]), %%r14\n\t" \
-    "addq	56(%[sha512]), %%r15\n\t"
-
-#define STORE_DIGEST()                    \
-    "movq	 %%r8,   (%[sha512])\n\t" \
-    "movq	 %%r9,  8(%[sha512])\n\t" \
-    "movq	%%r10, 16(%[sha512])\n\t" \
-    "movq	%%r11, 24(%[sha512])\n\t" \
-    "movq	%%r12, 32(%[sha512])\n\t" \
-    "movq	%%r13, 40(%[sha512])\n\t" \
-    "movq	%%r14, 48(%[sha512])\n\t" \
-    "movq	%%r15, 56(%[sha512])\n\t"
-
-#endif /* HAVE_INTEL_AVX1 */
-
-
-/***  Transform Body ***/
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_Sha512_AVX1(wc_Sha512* sha512)
-{
-    __asm__ __volatile__ (
-
-        /* 16 Ws plus loop counter. */
-        "subq	$136, %%rsp\n\t"
-        "leaq	64(%[sha512]), %%rax\n\t"
-
-    INIT_MASK(MASK)
-    LOAD_DIGEST()
-
-    LOAD_W(MASK, rax)
-
-        "movl	$4, 16*8(" WX ")\n\t"
-        "leaq	%[K512], %%rsi\n\t"
-        /* b */
-        "movq	%%r9, " L4 "\n\t"
-        /* e */
-        "movq	%%r12, " L1 "\n\t"
-        /* b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-        "# Start of 16 rounds\n"
-        "1:\n\t"
-
-    SET_W_X(rsi)
-
-        "addq	$128, %%rsi\n\t"
-
-    MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched2(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    MsgSched2(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    MsgSched2(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6)
-    MsgSched2(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    MsgSched2(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10)
-    MsgSched2(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12)
-    MsgSched2(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-        "subl	$1, 16*8(" WX ")\n\t"
-        "jne	1b\n\t"
-
-    SET_W_X(rsi)
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6)
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-    STORE_ADD_DIGEST()
-
-        "addq	$136, %%rsp\n\t"
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-
-static int Transform_Sha512_AVX1_Len(wc_Sha512* sha512, word32 len)
-{
-    __asm__ __volatile__ (
-
-        "movq	224(%[sha512]), %%rsi\n\t"
-        "leaq	%[K512], %%rdx\n\t"
-
-    INIT_MASK(MASK)
-    LOAD_DIGEST()
-
-        "# Start of processing a block\n"
-        "2:\n\t"
-
-        /* 16 Ws plus loop counter and K512. len goes into -4(%rsp).
-         * Debug needs more stack space. */
-        "subq	$256, %%rsp\n\t"
-
-    LOAD_W(MASK, rsi)
-
-        "movl	$4, 16*8(" WX ")\n\t"
-        /* b */
-        "movq	%%r9, " L4 "\n\t"
-        /* e */
-        "movq	%%r12, " L1 "\n\t"
-        /* b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-    SET_W_X(rdx)
-
-        "# Start of 16 rounds\n"
-        "1:\n\t"
-
-        "addq	$128, %%rdx\n\t"
-        "movq	%%rdx, 17*8(%%rsp)\n\t"
-
-    MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched2(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    MsgSched2(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    MsgSched2(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6)
-    MsgSched2(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    MsgSched2(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10)
-    MsgSched2(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12)
-    MsgSched2(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-        "movq	17*8(%%rsp), %%rdx\n\t"
-
-    SET_W_X(rdx)
-
-        "subl	$1, 16*8(" WX ")\n\t"
-        "jne	1b\n\t"
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6)
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-    ADD_DIGEST()
-
-        "addq	$256, %%rsp\n\t"
-        "leaq	%[K512], %%rdx\n\t"
-        "addq	$128, %%rsi\n\t"
-        "subl	$128, %[len]\n\t"
-
-    STORE_DIGEST()
-
-        "jnz	2b\n\t"
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK),
-          [len]    "m" (len),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-#endif /* HAVE_INTEL_AVX1 */
-
-#if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
-static int Transform_Sha512_AVX1_RORX(wc_Sha512* sha512)
-{
-    __asm__ __volatile__ (
-
-        /* 16 Ws plus loop counter and K512. */
-        "subq	$144, %%rsp\n\t"
-        "leaq	64(%[sha512]), %%rax\n\t"
-
-    INIT_MASK(MASK)
-    LOAD_DIGEST()
-
-    LOAD_W(MASK, rax)
-
-        "movl	$4, 16*8(" WX ")\n\t"
-        "leaq	%[K512], %%rsi\n\t"
-        /* L4 = b */
-        "movq	%%r9, " L4 "\n\t"
-        /* L3 = 0 (add to prev h) */
-        "xorq	" L3 ", " L3 "\n\t"
-        /* L4 = b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-    SET_W_X(rsi)
-
-        "# Start of 16 rounds\n"
-        "1:\n\t"
-
-        "addq	$128, %%rsi\n\t"
-
-    MsgSched_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched_RORX(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    MsgSched_RORX(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    MsgSched_RORX(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6)
-    MsgSched_RORX(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    MsgSched_RORX(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10)
-    MsgSched_RORX(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12)
-    MsgSched_RORX(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-    SET_W_X(rsi)
-
-        "subl	$1, 16*8(" WX ")\n\t"
-        "jne	1b\n\t"
-
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6)
-
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-        /* Prev RND: h += Maj(a,b,c) */
-        "addq	" L3 ", %%r8\n\t"
-        "addq	$144, %%rsp\n\t"
-
-    STORE_ADD_DIGEST()
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-
-static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512* sha512, word32 len)
-{
-    __asm__ __volatile__ (
-
-        "movq	224(%[sha512]), %%rsi\n\t"
-        "leaq	%[K512], %%rcx\n\t"
-
-    INIT_MASK(MASK)
-    LOAD_DIGEST()
-
-        "# Start of processing a block\n"
-        "2:\n\t"
-
-        /* 16 Ws plus loop counter and K512. len goes into -4(%rsp).
-         * Debug needs more stack space. */
-        "subq	$256, %%rsp\n\t"
-
-    LOAD_W(MASK, rsi)
-
-        "movl	$4, 16*8(" WX ")\n\t"
-        /* L4 = b */
-        "movq	%%r9, " L4 "\n\t"
-        /* L3 = 0 (add to prev h) */
-        "xorq	" L3 ", " L3 "\n\t"
-        /* L4 = b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-    SET_W_X(rcx)
-
-        "# Start of 16 rounds\n"
-        "1:\n\t"
-
-        "addq	$128, %%rcx\n\t"
-        "movq	%%rcx, 17*8(%%rsp)\n\t"
-
-    MsgSched_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched_RORX(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    MsgSched_RORX(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    MsgSched_RORX(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6)
-    MsgSched_RORX(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    MsgSched_RORX(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10)
-    MsgSched_RORX(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12)
-    MsgSched_RORX(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-        "movq	17*8(%%rsp), %%rcx\n\t"
-
-    SET_W_X(rcx)
-
-        "subl	$1, 16*8(" WX ")\n\t"
-        "jne	1b\n\t"
-
-    SET_W_X(rcx)
-
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6)
-
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-        /* Prev RND: h += Maj(a,b,c) */
-        "addq	" L3 ", %%r8\n\t"
-        "addq	$256, %%rsp\n\t"
-
-    ADD_DIGEST()
-
-        "leaq	%[K512], %%rcx\n\t"
-        "addq	$128, %%rsi\n\t"
-        "subl	$128, %[len]\n\t"
-
-    STORE_DIGEST()
-
-        "jnz	2b\n\t"
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK),
-          [len]    "m" (len),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512)
-        : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-#endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_RORX */
-
-#if defined(HAVE_INTEL_AVX2)
-static const unsigned long mBYTE_FLIP_MASK_Y[] =
-   { 0x0001020304050607, 0x08090a0b0c0d0e0f,
-     0x0001020304050607, 0x08090a0b0c0d0e0f };
-
-#define W_Y_0       ymm0
-#define W_Y_4       ymm1
-#define W_Y_8       ymm2
-#define W_Y_12      ymm3
-
-#define X0       xmm0
-#define X1       xmm1
-#define X2       xmm2
-#define X3       xmm3
-#define X4       xmm4
-#define X5       xmm5
-#define X6       xmm6
-#define X7       xmm7
-#define X8       xmm8
-#define X9       xmm9
-#define Y0       ymm0
-#define Y1       ymm1
-#define Y2       ymm2
-#define Y3       ymm3
-#define Y4       ymm4
-#define Y5       ymm5
-#define Y6       ymm6
-#define Y7       ymm7
-
-#define W_Y_M15     ymm12
-#define W_Y_M7      ymm13
-#define W_Y_M2      ymm14
-#define MASK_Y      ymm15
-
-#define YTMP1       ymm8
-#define YTMP2       ymm9
-#define YTMP3       ymm10
-#define YTMP4       ymm11
-
-#define YMM_REGS \
-    "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",       \
-    "xmm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
-
-#define _VPERM2I128(dest, src1, src2, sel)                             \
-    "vperm2I128	$" #sel ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t"
-#define VPERM2I128(dest, src1, src2, sel) \
-       _VPERM2I128(dest, src1, src2, sel)
-
-#define _VPERMQ(dest, src, sel)                                        \
-    "vpermq	$" #sel ", %%" #src ", %%" #dest "\n\t"
-#define VPERMQ(dest, src, sel) \
-       _VPERMQ(dest, src, sel)
-
-#define _VPBLENDD(dest, src1, src2, sel)                               \
-    "vpblendd	$" #sel ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t"
-#define VPBLENDD(dest, src1, src2, sel) \
-       _VPBLENDD(dest, src1, src2, sel)
-
-#define _V_ADD_I(dest, src1, addr, i)                                  \
-    "vpaddq	 "#i"*8(%%" #addr "), %%" #src1 ", %%" #dest "\n\t"
-#define V_ADD_I(dest, src1, addr, i) \
-       _V_ADD_I(dest, src1, addr, i)
-
-#define _VMOVDQU_I(addr, i, src)                                       \
-    "vmovdqu	 %%" #src ", " #i "*8(%%" #addr ")\n\t"
-#define VMOVDQU_I(addr, i, src) \
-       _VMOVDQU_I(addr, i, src)
-
-#define MsgSched4_AVX2(W_Y_0,W_Y_4,W_Y_8,W_Y_12,a,b,c,d,e,f,g,h,i) \
-            RND_0_1(a,b,c,d,e,f,g,h,i)                             \
-    /* W[-13]..W[-15], W[-12] */                                   \
-    VPBLENDD(W_Y_M15, W_Y_0, W_Y_4, 0x03)                          \
-    /* W[-5]..W[-7], W[-4] */                                      \
-    VPBLENDD(W_Y_M7, W_Y_8, W_Y_12, 0x03)                          \
-            RND_0_2(a,b,c,d,e,f,g,h,i)                             \
-            RND_0_3(a,b,c,d,e,f,g,h,i)                             \
-    /* W_Y_M15 = W[-12]..W[-15] */                                 \
-    VPERMQ(W_Y_M15, W_Y_M15, 0x39)                                 \
-            RND_0_4(a,b,c,d,e,f,g,h,i)                             \
-    /* W_Y_M7 = W[-4]..W[-7] */                                    \
-    VPERMQ(W_Y_M7, W_Y_M7, 0x39)                                   \
-            RND_0_5(a,b,c,d,e,f,g,h,i)                             \
-            RND_0_6(a,b,c,d,e,f,g,h,i)                             \
-    /* W[-15] >>  1 */                                             \
-    V_SHIFT_R(YTMP1, W_Y_M15, 1)                                   \
-            RND_0_7(a,b,c,d,e,f,g,h,i)                             \
-    /* W[-15] << 63 */                                             \
-    V_SHIFT_L(YTMP2, W_Y_M15, 63)                                  \
-            RND_0_8(a,b,c,d,e,f,g,h,i)                             \
-    /* W[-15] >>  8 */                                             \
-    V_SHIFT_R(YTMP3, W_Y_M15, 8)                                   \
-            RND_0_9(a,b,c,d,e,f,g,h,i)                             \
-    /* W[-15] << 56 */                                             \
-    V_SHIFT_L(YTMP4, W_Y_M15, 56)                                  \
-            RND_0_10(a,b,c,d,e,f,g,h,i)                            \
-    /* W[-15] >>> 1 */                                             \
-    V_OR(YTMP1, YTMP2, YTMP1)                                      \
-            RND_0_11(a,b,c,d,e,f,g,h,i)                            \
-    /* W[-15] >>> 8 */                                             \
-    V_OR(YTMP3, YTMP4, YTMP3)                                      \
-            RND_0_12(a,b,c,d,e,f,g,h,i)                            \
-            RND_1_1(h,a,b,c,d,e,f,g,i+1)                           \
-    /* W[-15] >> 7 */                                              \
-    V_SHIFT_R(YTMP4, W_Y_M15, 7)                                   \
-            RND_1_2_A(h,a,b,c,d,e,f,g,i+1)                         \
-    /* (W[-15] >>> 1) ^ (W[-15] >>> 8) */                          \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                     \
-            RND_1_2_B(h,a,b,c,d,e,f,g,i+1)                         \
-    /* (W[-15] >>> 1) ^ (W[-15] >>> 8) ^ (W[-15] >> 7) */          \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                     \
-            RND_1_3(h,a,b,c,d,e,f,g,i+1)                           \
-    /* W[0] = W[-16] + W[-7] */                                    \
-    V_ADD(W_Y_0, W_Y_0, W_Y_M7)                                    \
-            RND_1_4(h,a,b,c,d,e,f,g,i+1)                           \
-    /* W[0] = W[-16] + W[-7] + s0(W[-15]) */                       \
-    V_ADD(W_Y_0, W_Y_0, YTMP1)                                     \
-            RND_1_5(h,a,b,c,d,e,f,g,i+1)                           \
-    /* 0, 0, W[-1], W[-2] */                                       \
-    VPERM2I128(W_Y_M2, W_Y_12, W_Y_12, 0x81)                       \
-            RND_1_6(h,a,b,c,d,e,f,g,i+1)                           \
-            RND_1_7(h,a,b,c,d,e,f,g,i+1)                           \
-            RND_1_8(h,a,b,c,d,e,f,g,i+1)                           \
-    /* W[-2] >> 19 */                                              \
-    V_SHIFT_R(YTMP1, W_Y_M2, 19)                                   \
-            RND_1_9(h,a,b,c,d,e,f,g,i+1)                           \
-    /* W[-2] << 45 */                                              \
-    V_SHIFT_L(YTMP2, W_Y_M2, 45)                                   \
-            RND_1_10(h,a,b,c,d,e,f,g,i+1)                          \
-    /* W[-2] >> 61 */                                              \
-    V_SHIFT_R(YTMP3, W_Y_M2, 61)                                   \
-            RND_1_11(h,a,b,c,d,e,f,g,i+1)                          \
-    /* W[-2] <<  3 */                                              \
-    V_SHIFT_L(YTMP4, W_Y_M2, 3)                                    \
-            RND_1_12(h,a,b,c,d,e,f,g,i+1)                          \
-            RND_0_1(g,h,a,b,c,d,e,f,i+2)                           \
-    /* W[-2] >>> 19 */                                             \
-    V_OR(YTMP1, YTMP2, YTMP1)                                      \
-            RND_0_2(g,h,a,b,c,d,e,f,i+2)                           \
-    /* W[-2] >>> 61 */                                             \
-    V_OR(YTMP3, YTMP4, YTMP3)                                      \
-            RND_0_3(g,h,a,b,c,d,e,f,i+2)                           \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */                          \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                     \
-            RND_0_4(g,h,a,b,c,d,e,f,i+2)                           \
-    /* W[-2] >>  6 */                                              \
-    V_SHIFT_R(YTMP4, W_Y_M2, 6)                                    \
-            RND_0_5(g,h,a,b,c,d,e,f,i+2)                           \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */           \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                     \
-            RND_0_6(g,h,a,b,c,d,e,f,i+2)                           \
-    /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */           \
-    V_ADD(W_Y_0, W_Y_0, YTMP1)                                     \
-            RND_0_7(g,h,a,b,c,d,e,f,i+2)                           \
-            RND_0_8(g,h,a,b,c,d,e,f,i+2)                           \
-    /* W[1], W[0], 0, 0 */                                         \
-    VPERM2I128(W_Y_M2, W_Y_0, W_Y_0, 0x08)                         \
-            RND_0_9(g,h,a,b,c,d,e,f,i+2)                           \
-            RND_0_10(g,h,a,b,c,d,e,f,i+2)                          \
-    /* W[-2] >> 19 */                                              \
-    V_SHIFT_R(YTMP1, W_Y_M2, 19)                                   \
-            RND_0_11(g,h,a,b,c,d,e,f,i+2)                          \
-    /* W[-2] << 45 */                                              \
-    V_SHIFT_L(YTMP2, W_Y_M2, 45)                                   \
-            RND_0_12(g,h,a,b,c,d,e,f,i+2)                          \
-            RND_1_1(f,g,h,a,b,c,d,e,i+3)                           \
-    /* W[-2] >> 61 */                                              \
-    V_SHIFT_R(YTMP3, W_Y_M2, 61)                                   \
-            RND_1_2(f,g,h,a,b,c,d,e,i+3)                           \
-    /* W[-2] <<  3 */                                              \
-    V_SHIFT_L(YTMP4, W_Y_M2, 3)                                    \
-            RND_1_3(f,g,h,a,b,c,d,e,i+3)                           \
-    /* W[-2] >>> 19 */                                             \
-    V_OR(YTMP1, YTMP2, YTMP1)                                      \
-            RND_1_4(f,g,h,a,b,c,d,e,i+3)                           \
-    /* W[-2] >>> 61 */                                             \
-    V_OR(YTMP3, YTMP4, YTMP3)                                      \
-            RND_1_5(f,g,h,a,b,c,d,e,i+3)                           \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */                          \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                     \
-            RND_1_6(f,g,h,a,b,c,d,e,i+3)                           \
-    /* W[-2] >>  6 */                                              \
-    V_SHIFT_R(YTMP4, W_Y_M2, 6)                                    \
-            RND_1_7(f,g,h,a,b,c,d,e,i+3)                           \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */           \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                     \
-            RND_1_8(f,g,h,a,b,c,d,e,i+3)                           \
-    /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */           \
-    V_ADD(W_Y_0, W_Y_0, YTMP1)                                     \
-            RND_1_9(f,g,h,a,b,c,d,e,i+3)                           \
-            RND_1_10(f,g,h,a,b,c,d,e,i+3)                          \
-            RND_1_11(f,g,h,a,b,c,d,e,i+3)                          \
-            RND_1_12(f,g,h,a,b,c,d,e,i+3)                          \
-
-#define MsgSched2_AVX2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \
-            RND_0_1(a,b,c,d,e,f,g,h,i)                                       \
-    VPALIGNR(W_Y_M15, W_2, W_0, 8)                                           \
-    VPALIGNR(W_Y_M7, W_10, W_8, 8)                                           \
-            RND_0_2(a,b,c,d,e,f,g,h,i)                                       \
-    V_SHIFT_R(YTMP1, W_Y_M15, 1)                                             \
-    V_SHIFT_L(YTMP2, W_Y_M15, 63)                                            \
-            RND_0_3(a,b,c,d,e,f,g,h,i)                                       \
-            RND_0_4(a,b,c,d,e,f,g,h,i)                                       \
-    V_SHIFT_R(YTMP3, W_Y_M15, 8)                                             \
-    V_SHIFT_L(YTMP4, W_Y_M15, 56)                                            \
-            RND_0_5(a,b,c,d,e,f,g,h,i)                                       \
-            RND_0_6(a,b,c,d,e,f,g,h,i)                                       \
-    V_OR(YTMP1, YTMP2, YTMP1)                                                \
-    V_OR(YTMP3, YTMP4, YTMP3)                                                \
-            RND_0_7(a,b,c,d,e,f,g,h,i)                                       \
-            RND_0_8(a,b,c,d,e,f,g,h,i)                                       \
-    V_SHIFT_R(YTMP4, W_Y_M15, 7)                                             \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                               \
-            RND_0_9(a,b,c,d,e,f,g,h,i)                                       \
-            RND_0_10(a,b,c,d,e,f,g,h,i)                                      \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                               \
-    V_ADD(W_0, W_0, W_Y_M7)                                                  \
-            RND_0_11(a,b,c,d,e,f,g,h,i)                                      \
-            RND_0_12(a,b,c,d,e,f,g,h,i)                                      \
-            RND_1_1(h,a,b,c,d,e,f,g,i+1)                                     \
-    V_ADD(W_0, W_0, YTMP1)                                                   \
-            RND_1_2(h,a,b,c,d,e,f,g,i+1)                                     \
-    V_SHIFT_R(YTMP1, W_14, 19)                                               \
-    V_SHIFT_L(YTMP2, W_14, 45)                                               \
-            RND_1_3(h,a,b,c,d,e,f,g,i+1)                                     \
-            RND_1_4(h,a,b,c,d,e,f,g,i+1)                                     \
-    V_SHIFT_R(YTMP3, W_14, 61)                                               \
-    V_SHIFT_L(YTMP4, W_14, 3)                                                \
-            RND_1_5(h,a,b,c,d,e,f,g,i+1)                                     \
-            RND_1_6(h,a,b,c,d,e,f,g,i+1)                                     \
-            RND_1_7(h,a,b,c,d,e,f,g,i+1)                                     \
-    V_OR(YTMP1, YTMP2, YTMP1)                                                \
-    V_OR(YTMP3, YTMP4, YTMP3)                                                \
-            RND_1_8(h,a,b,c,d,e,f,g,i+1)                                     \
-            RND_1_9(h,a,b,c,d,e,f,g,i+1)                                     \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                               \
-    V_SHIFT_R(YTMP4, W_14, 6)                                                \
-            RND_1_10(h,a,b,c,d,e,f,g,i+1)                                    \
-            RND_1_11(h,a,b,c,d,e,f,g,i+1)                                    \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                               \
-            RND_1_12(h,a,b,c,d,e,f,g,i+1)                                    \
-    V_ADD(W_0, W_0, YTMP1)                                                   \
-
-#define MsgSched4_AVX2_RORX_SET(W_Y_0,W_Y_4,W_Y_8,W_Y_12,a,b,c,d,e,f,g,h,i) \
-            RND_RORX_0_1(a,b,c,d,e,f,g,h,i)                                 \
-    /* W[-13]..W[-15], W[-12] */                                            \
-    VPBLENDD(W_Y_M15, W_Y_0, W_Y_4, 0x03)                                   \
-    /* W[-5]..W[-7], W[-4] */                                               \
-    VPBLENDD(W_Y_M7, W_Y_8, W_Y_12, 0x03)                                   \
-            RND_RORX_0_2(a,b,c,d,e,f,g,h,i)                                 \
-    /* W_Y_M15 = W[-12]..W[-15] */                                          \
-    VPERMQ(W_Y_M15, W_Y_M15, 0x39)                                          \
-            RND_RORX_0_3(a,b,c,d,e,f,g,h,i)                                 \
-    /* W_Y_M7 = W[-4]..W[-7] */                                             \
-    VPERMQ(W_Y_M7, W_Y_M7, 0x39)                                            \
-            RND_RORX_0_4(a,b,c,d,e,f,g,h,i)                                 \
-    /* W[-15] >>  1 */                                                      \
-    V_SHIFT_R(YTMP1, W_Y_M15, 1)                                            \
-    /* W[-15] << 63 */                                                      \
-    V_SHIFT_L(YTMP2, W_Y_M15, 63)                                           \
-            RND_RORX_0_5(a,b,c,d,e,f,g,h,i)                                 \
-    /* W[-15] >>  8 */                                                      \
-    V_SHIFT_R(YTMP3, W_Y_M15, 8)                                            \
-    /* W[-15] << 56 */                                                      \
-    V_SHIFT_L(YTMP4, W_Y_M15, 56)                                           \
-    /* W[-15] >>> 1 */                                                      \
-    V_OR(YTMP1, YTMP2, YTMP1)                                               \
-    /* W[-15] >>> 8 */                                                      \
-    V_OR(YTMP3, YTMP4, YTMP3)                                               \
-            RND_RORX_0_6(a,b,c,d,e,f,g,h,i)                                 \
-    /* W[-15] >> 7 */                                                       \
-    V_SHIFT_R(YTMP4, W_Y_M15, 7)                                            \
-            RND_RORX_0_7(a,b,c,d,e,f,g,h,i)                                 \
-    /* 0, 0, W[-1], W[-2] */                                                \
-    VPERM2I128(W_Y_M2, W_Y_12, W_Y_12, 0x81)                                \
-            RND_RORX_0_8(a,b,c,d,e,f,g,h,i)                                 \
-            RND_RORX_1_1(h,a,b,c,d,e,f,g,i+1)                               \
-    /* (W[-15] >>> 1) ^ (W[-15] >>> 8) */                                   \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                              \
-            RND_RORX_1_2(h,a,b,c,d,e,f,g,i+1)                               \
-    /* (W[-15] >>> 1) ^ (W[-15] >>> 8) ^ (W[-15] >> 7) */                   \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                              \
-            RND_RORX_1_3(h,a,b,c,d,e,f,g,i+1)                               \
-    /* W[0] = W[-16] + W[-7] */                                             \
-    V_ADD(W_Y_0, W_Y_0, W_Y_M7)                                             \
-    /* W[0] = W[-16] + W[-7] + s0(W[-15]) */                                \
-    V_ADD(W_Y_0, W_Y_0, YTMP1)                                              \
-            RND_RORX_1_4(h,a,b,c,d,e,f,g,i+1)                               \
-    /* W[-2] >> 19 */                                                       \
-    V_SHIFT_R(YTMP1, W_Y_M2, 19)                                            \
-    /* W[-2] << 45 */                                                       \
-    V_SHIFT_L(YTMP2, W_Y_M2, 45)                                            \
-            RND_RORX_1_5(h,a,b,c,d,e,f,g,i+1)                               \
-    /* W[-2] >> 61 */                                                       \
-    V_SHIFT_R(YTMP3, W_Y_M2, 61)                                            \
-    /* W[-2] <<  3 */                                                       \
-    V_SHIFT_L(YTMP4, W_Y_M2, 3)                                             \
-    /* W[-2] >>> 19 */                                                      \
-    V_OR(YTMP1, YTMP2, YTMP1)                                               \
-            RND_RORX_1_6(h,a,b,c,d,e,f,g,i+1)                               \
-    /* W[-2] >>> 61 */                                                      \
-    V_OR(YTMP3, YTMP4, YTMP3)                                               \
-            RND_RORX_1_7(h,a,b,c,d,e,f,g,i+1)                               \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */                                   \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                              \
-            RND_RORX_1_8(h,a,b,c,d,e,f,g,i+1)                               \
-    /* W[-2] >>  6 */                                                       \
-    V_SHIFT_R(YTMP4, W_Y_M2, 6)                                             \
-            RND_RORX_0_1(g,h,a,b,c,d,e,f,i+2)                               \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */                    \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                              \
-            RND_RORX_0_2(g,h,a,b,c,d,e,f,i+2)                               \
-    /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */                    \
-    V_ADD(W_Y_0, W_Y_0, YTMP1)                                              \
-            RND_RORX_0_3(g,h,a,b,c,d,e,f,i+2)                               \
-    /* W[1], W[0], 0, 0 */                                                  \
-    VPERM2I128(W_Y_M2, W_Y_0, W_Y_0, 0x08)                                  \
-            RND_RORX_0_4(g,h,a,b,c,d,e,f,i+2)                               \
-            RND_RORX_0_5(g,h,a,b,c,d,e,f,i+2)                               \
-    /* W[-2] >> 19 */                                                       \
-    V_SHIFT_R(YTMP1, W_Y_M2, 19)                                            \
-    /* W[-2] << 45 */                                                       \
-    V_SHIFT_L(YTMP2, W_Y_M2, 45)                                            \
-            RND_RORX_0_6(g,h,a,b,c,d,e,f,i+2)                               \
-    /* W[-2] >> 61 */                                                       \
-    V_SHIFT_R(YTMP3, W_Y_M2, 61)                                            \
-    /* W[-2] <<  3 */                                                       \
-    V_SHIFT_L(YTMP4, W_Y_M2, 3)                                             \
-    /* W[-2] >>> 19 */                                                      \
-    V_OR(YTMP1, YTMP2, YTMP1)                                               \
-            RND_RORX_0_7(g,h,a,b,c,d,e,f,i+2)                               \
-    /* W[-2] >>> 61 */                                                      \
-    V_OR(YTMP3, YTMP4, YTMP3)                                               \
-            RND_RORX_0_8(g,h,a,b,c,d,e,f,i+2)                               \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */                                   \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                              \
-            RND_RORX_1_1(f,g,h,a,b,c,d,e,i+3)                               \
-    /* W[-2] >>  6 */                                                       \
-    V_SHIFT_R(YTMP4, W_Y_M2, 6)                                             \
-            RND_RORX_1_2(f,g,h,a,b,c,d,e,i+3)                               \
-            RND_RORX_1_3(f,g,h,a,b,c,d,e,i+3)                               \
-    /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */                    \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                              \
-            RND_RORX_1_4(f,g,h,a,b,c,d,e,i+3)                               \
-            RND_RORX_1_5(f,g,h,a,b,c,d,e,i+3)                               \
-    /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */                    \
-    V_ADD(W_Y_0, W_Y_0, YTMP1)                                              \
-            RND_RORX_1_6(f,g,h,a,b,c,d,e,i+3)                               \
-    V_ADD_I(YTMP1, W_Y_0, rsi, i)                                           \
-            RND_RORX_1_7(f,g,h,a,b,c,d,e,i+3)                               \
-            RND_RORX_1_8(f,g,h,a,b,c,d,e,i+3)                               \
-    VMOVDQU_I(rsp, i, YTMP1)                                                \
-
-#define MsgSched2_AVX2_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,  \
-                            f,g,h,i)                                       \
-            RND_RORX_0_1(a,b,c,d,e,f,g,h,i)                                \
-    VPALIGNR(W_Y_M15, W_2, W_0, 8)                                         \
-    VPALIGNR(W_Y_M7, W_10, W_8, 8)                                         \
-            RND_RORX_0_2(a,b,c,d,e,f,g,h,i)                                \
-    V_SHIFT_R(YTMP1, W_Y_M15, 1)                                           \
-    V_SHIFT_L(YTMP2, W_Y_M15, 63)                                          \
-            RND_RORX_0_3(a,b,c,d,e,f,g,h,i)                                \
-    V_SHIFT_R(YTMP3, W_Y_M15, 8)                                           \
-    V_SHIFT_L(YTMP4, W_Y_M15, 56)                                          \
-            RND_RORX_0_4(a,b,c,d,e,f,g,h,i)                                \
-    V_OR(YTMP1, YTMP2, YTMP1)                                              \
-    V_OR(YTMP3, YTMP4, YTMP3)                                              \
-            RND_RORX_0_5(a,b,c,d,e,f,g,h,i)                                \
-    V_SHIFT_R(YTMP4, W_Y_M15, 7)                                           \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                             \
-            RND_RORX_0_6(a,b,c,d,e,f,g,h,i)                                \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                             \
-    V_ADD(W_0, W_0, W_Y_M7)                                                \
-            RND_RORX_0_7(a,b,c,d,e,f,g,h,i)                                \
-            RND_RORX_0_8(a,b,c,d,e,f,g,h,i)                                \
-    V_ADD(W_0, W_0, YTMP1)                                                 \
-            RND_RORX_1_1(h,a,b,c,d,e,f,g,i+1)                              \
-    V_SHIFT_R(YTMP1, W_14, 19)                                             \
-    V_SHIFT_L(YTMP2, W_14, 45)                                             \
-            RND_RORX_1_2(h,a,b,c,d,e,f,g,i+1)                              \
-    V_SHIFT_R(YTMP3, W_14, 61)                                             \
-    V_SHIFT_L(YTMP4, W_14, 3)                                              \
-            RND_RORX_1_3(h,a,b,c,d,e,f,g,i+1)                              \
-    V_OR(YTMP1, YTMP2, YTMP1)                                              \
-    V_OR(YTMP3, YTMP4, YTMP3)                                              \
-            RND_RORX_1_4(h,a,b,c,d,e,f,g,i+1)                              \
-            RND_RORX_1_5(h,a,b,c,d,e,f,g,i+1)                              \
-    V_XOR(YTMP1, YTMP3, YTMP1)                                             \
-    V_SHIFT_R(YTMP4, W_14, 6)                                              \
-            RND_RORX_1_6(h,a,b,c,d,e,f,g,i+1)                              \
-            RND_RORX_1_7(h,a,b,c,d,e,f,g,i+1)                              \
-    V_XOR(YTMP1, YTMP4, YTMP1)                                             \
-            RND_RORX_1_8(h,a,b,c,d,e,f,g,i+1)                              \
-    V_ADD(W_0, W_0, YTMP1)                                                 \
-
-
-#define _INIT_MASK_Y(mask)            \
-    "vmovdqu %[mask], %%"#mask"\n\t"
-#define INIT_MASK_Y(mask) \
-       _INIT_MASK_Y(mask)
-
-/* Load into YMM registers and swap endian. */
-#define _LOAD_BLOCK_W_Y_2(mask, ymm0, ymm1, reg, i)           \
-    /* buffer[0..15] => ymm0..ymm3;  */                       \
-    "vmovdqu	" #i "+ 0(%%" #reg "), %%" #ymm0 "\n\t"       \
-    "vmovdqu	" #i "+32(%%" #reg "), %%" #ymm1 "\n\t"       \
-    "vpshufb	%%" #mask ", %%" #ymm0 ", %%" #ymm0 "\n\t"    \
-    "vpshufb	%%" #mask ", %%" #ymm1 ", %%" #ymm1 "\n\t"
-
-#define LOAD_BLOCK_W_Y_2(mask, ymm1, ymm2, reg, i) \
-       _LOAD_BLOCK_W_Y_2(mask, ymm1, ymm2, reg, i)
-
-#define LOAD_BLOCK_W_Y(mask, reg)                  \
-    LOAD_BLOCK_W_Y_2(mask, W_Y_0, W_Y_4 , reg,  0) \
-    LOAD_BLOCK_W_Y_2(mask, W_Y_8, W_Y_12, reg, 64)
-
-#define _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i)                    \
-    "vpaddq	" #i "+ 0(%%" #reg "), %%" #ymm0 ", %%" #ymm2 "\n\t"  \
-    "vpaddq	" #i "+32(%%" #reg "), %%" #ymm1 ", %%" #ymm3 "\n\t"  \
-    "vmovdqu	%%" #ymm2 ", " #i "+ 0(" WX ")\n\t"                   \
-    "vmovdqu	%%" #ymm3 ", " #i "+32(" WX ")\n\t"
-
-#define SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) \
-       _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i)
-
-#define SET_BLOCK_W_Y(reg)                          \
-    SET_W_Y_2(W_Y_0, W_Y_4 , YTMP1, YTMP2, reg,  0) \
-    SET_W_Y_2(W_Y_8, W_Y_12, YTMP1, YTMP2, reg, 64)
-
-/* Load into YMM registers and swap endian. */
-#define _LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i)   \
-    "vmovdqu	" #i "+  0(%%" #reg "), %%" #X0 "\n\t"                   \
-    "vmovdqu	" #i "+ 16(%%" #reg "), %%" #X1 "\n\t"                   \
-    "vmovdqu	" #i "+128(%%" #reg "), %%" #X8 "\n\t"                   \
-    "vmovdqu	" #i "+144(%%" #reg "), %%" #X9 "\n\t"                   \
-    "vinserti128	$1, %%" #X8 ", %%" #Y0 ", %%" #Y0 "\n\t"         \
-    "vinserti128	$1, %%" #X9 ", %%" #Y1 ", %%" #Y1 "\n\t"         \
-    "vpshufb	%%" #mask ", %%" #Y0 ", %%" #Y0 "\n\t"                   \
-    "vpshufb	%%" #mask ", %%" #Y1 ", %%" #Y1 "\n\t"
-
-#define LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) \
-       _LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i)
-
-#define LOAD_BLOCK2_W_Y(mask, reg)                           \
-    LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg,  0) \
-    LOAD_BLOCK2_W_Y_2(mask, Y2, Y3, X2, X3, X8, X9, reg, 32) \
-    LOAD_BLOCK2_W_Y_2(mask, Y4, Y5, X4, X5, X8, X9, reg, 64) \
-    LOAD_BLOCK2_W_Y_2(mask, Y6, Y7, X6, X7, X8, X9, reg, 96) \
-
-#define SET_BLOCK2_W_Y(reg)                   \
-    SET_W_Y_2(Y0, Y1, YTMP1, YTMP2, reg,   0) \
-    SET_W_Y_2(Y2, Y3, YTMP1, YTMP2, reg,  64) \
-    SET_W_Y_2(Y4, Y5, YTMP1, YTMP2, reg, 128) \
-    SET_W_Y_2(Y6, Y7, YTMP1, YTMP2, reg, 192)
-
-static const word64 K512_AVX2[160] = {
-    W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
-    W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
-    W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
-    W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
-    W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
-    W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
-    W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
-    W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
-    W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
-    W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
-    W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
-    W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
-    W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
-    W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
-    W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
-    W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
-    W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
-    W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
-    W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
-    W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
-    W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
-    W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
-    W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
-    W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
-    W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
-    W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
-    W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
-    W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
-    W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
-    W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
-    W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
-    W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
-    W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
-    W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
-    W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
-    W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
-    W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
-    W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
-    W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
-    W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
-    W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
-    W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
-    W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
-    W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
-    W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
-    W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
-    W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
-    W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
-    W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
-    W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
-    W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
-    W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
-    W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
-    W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
-    W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
-    W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
-    W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
-    W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
-    W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
-    W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
-    W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
-    W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
-    W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
-    W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
-    W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
-    W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
-    W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
-    W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
-    W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
-    W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
-    W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
-    W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
-    W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
-    W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
-    W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
-    W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
-    W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
-    W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
-    W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817),
-    W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
-};
-static const word64* K512_AVX2_END = &K512_AVX2[128];
-
-static int Transform_Sha512_AVX2(wc_Sha512* sha512)
-{
-    __asm__ __volatile__ (
-
-        /* 16 Ws plus loop counter and K512. */
-        "subq	$136, %%rsp\n\t"
-        "leaq	64(%[sha512]), %%rax\n\t"
-
-    INIT_MASK(MASK_Y)
-    LOAD_DIGEST()
-
-    LOAD_BLOCK_W_Y(MASK_Y, rax)
-
-        "movl	$4, 16*8(" WX ")\n\t"
-        "leaq	%[K512], %%rsi\n\t"
-        /* b */
-        "movq	%%r9, " L4 "\n\t"
-        /* e */
-        "movq	%%r12, " L1 "\n\t"
-        /* b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-    SET_BLOCK_W_Y(rsi)
-
-        "# Start of 16 rounds\n"
-        "1:\n\t"
-
-        "addq	$128, %%rsi\n\t"
-
-    MsgSched4_AVX2(W_Y_0,W_Y_4,W_Y_8,W_Y_12,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched4_AVX2(W_Y_4,W_Y_8,W_Y_12,W_Y_0,RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    MsgSched4_AVX2(W_Y_8,W_Y_12,W_Y_0,W_Y_4,RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    MsgSched4_AVX2(W_Y_12,W_Y_0,W_Y_4,W_Y_8,RE,RF,RG,RH,RA,RB,RC,RD,12)
-
-    SET_BLOCK_W_Y(rsi)
-
-        "subl	$1, 16*8(" WX ")\n\t"
-        "jne	1b\n\t"
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6)
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-    STORE_ADD_DIGEST()
-
-        "addq	$136, %%rsp\n\t"
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK_Y),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-
-static int Transform_Sha512_AVX2_Len(wc_Sha512* sha512, word32 len)
-{
-    if ((len & WC_SHA512_BLOCK_SIZE) != 0) {
-        XMEMCPY(sha512->buffer, sha512->data, WC_SHA512_BLOCK_SIZE);
-        Transform_Sha512_AVX2(sha512);
-        sha512->data += WC_SHA512_BLOCK_SIZE;
-        len -= WC_SHA512_BLOCK_SIZE;
-        if (len == 0)
-            return 0;
-    }
-
-    __asm__ __volatile__ (
-
-        "movq	224(%[sha512]), %%rcx\n\t"
-
-    INIT_MASK(MASK_Y)
-    LOAD_DIGEST()
-
-        "# Start of processing two blocks\n"
-        "2:\n\t"
-
-        "subq	$1344, %%rsp\n\t"
-        "leaq	%[K512], %%rsi\n\t"
-
-        /* L4 = b */
-        "movq	%%r9, " L4 "\n\t"
-        /* e */
-        "movq	%%r12, " L1 "\n\t"
-
-    LOAD_BLOCK2_W_Y(MASK_Y, rcx)
-
-        /* L4 = b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-        "\n"
-        "1:\n\t"
-    SET_BLOCK2_W_Y(rsi)
-    MsgSched2_AVX2(Y0,Y1,Y2,Y3,Y4,Y5,Y6,Y7,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched2_AVX2(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y0,RG,RH,RA,RB,RC,RD,RE,RF, 4)
-    MsgSched2_AVX2(Y2,Y3,Y4,Y5,Y6,Y7,Y0,Y1,RE,RF,RG,RH,RA,RB,RC,RD, 8)
-    MsgSched2_AVX2(Y3,Y4,Y5,Y6,Y7,Y0,Y1,Y2,RC,RD,RE,RF,RG,RH,RA,RB,12)
-    MsgSched2_AVX2(Y4,Y5,Y6,Y7,Y0,Y1,Y2,Y3,RA,RB,RC,RD,RE,RF,RG,RH,16)
-    MsgSched2_AVX2(Y5,Y6,Y7,Y0,Y1,Y2,Y3,Y4,RG,RH,RA,RB,RC,RD,RE,RF,20)
-    MsgSched2_AVX2(Y6,Y7,Y0,Y1,Y2,Y3,Y4,Y5,RE,RF,RG,RH,RA,RB,RC,RD,24)
-    MsgSched2_AVX2(Y7,Y0,Y1,Y2,Y3,Y4,Y5,Y6,RC,RD,RE,RF,RG,RH,RA,RB,28)
-        "addq	$256, %%rsi\n\t"
-        "addq	$256, %%rsp\n\t"
-        "cmpq	%[K512_END], %%rsi\n\t"
-        "jne	1b\n\t"
-
-    SET_BLOCK2_W_Y(rsi)
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 4)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 8)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,12)
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,16)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,20)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,24)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,28)
-        "subq	$1024, %%rsp\n\t"
-
-    ADD_DIGEST()
-    STORE_DIGEST()
-
-        /* L4 = b */
-        "movq	%%r9, " L4 "\n\t"
-        /* e */
-        "movq	%%r12, " L1 "\n\t"
-        /* L4 = b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-        "movq	$5, %%rsi\n\t"
-        "\n"
-        "3:\n\t"
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 2)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 6)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,10)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-    RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,18)
-    RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,22)
-    RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,26)
-    RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,30)
-        "addq	$256, %%rsp\n\t"
-        "subq	$1, %%rsi\n\t"
-        "jnz	3b\n\t"
-
-    ADD_DIGEST()
-
-        "movq	224(%[sha512]), %%rcx\n\t"
-        "addq	$64, %%rsp\n\t"
-        "addq	$256, %%rcx\n\t"
-        "subl	$256, %[len]\n\t"
-        "movq	%%rcx, 224(%[sha512])\n\t"
-
-    STORE_DIGEST()
-
-        "jnz	2b\n\t"
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK_Y),
-          [len]    "m" (len),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512_AVX2),
-          [K512_END]   "m" (K512_AVX2_END)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-
-#ifdef HAVE_INTEL_RORX
-static int Transform_Sha512_AVX2_RORX(wc_Sha512* sha512)
-{
-    __asm__ __volatile__ (
-
-        /* 16 Ws plus loop counter. */
-        "subq	$136, %%rsp\n\t"
-        "leaq	64(%[sha512]), " L2 "\n\t"
-
-    INIT_MASK(MASK_Y)
-    LOAD_DIGEST()
-
-    LOAD_BLOCK_W_Y(MASK_Y, rcx)
-
-        "movl	$4, 16*8(" WX ")\n\t"
-        "leaq	%[K512], %%rsi\n\t"
-        /* b */
-        "movq	%%r9, " L4 "\n\t"
-        /* L3 = 0 (add to prev h) */
-        "xorq	" L3 ", " L3 "\n\t"
-        /* b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-    SET_BLOCK_W_Y(rsi)
-
-        "# Start of 16 rounds\n"
-        "1:\n\t"
-
-        "addq	$128, %%rsi\n\t"
-
-    MsgSched4_AVX2_RORX_SET(W_Y_0,W_Y_4,W_Y_8,W_Y_12,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched4_AVX2_RORX_SET(W_Y_4,W_Y_8,W_Y_12,W_Y_0,RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    MsgSched4_AVX2_RORX_SET(W_Y_8,W_Y_12,W_Y_0,W_Y_4,RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    MsgSched4_AVX2_RORX_SET(W_Y_12,W_Y_0,W_Y_4,W_Y_8,RE,RF,RG,RH,RA,RB,RC,RD,12)
-
-        "subl	$1, 16*8(%%rsp)\n\t"
-        "jnz	1b\n\t"
-
-    RND_RORX_ALL_4(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_RORX_ALL_4(RE,RF,RG,RH,RA,RB,RC,RD, 4)
-    RND_RORX_ALL_4(RA,RB,RC,RD,RE,RF,RG,RH, 8)
-    RND_RORX_ALL_4(RE,RF,RG,RH,RA,RB,RC,RD,12)
-        /* Prev RND: h += Maj(a,b,c) */
-        "addq	" L3 ", %%r8\n\t"
-        "addq	$136, %%rsp\n\t"
-
-    STORE_ADD_DIGEST()
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK_Y),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-
-static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512* sha512, word32 len)
-{
-    if ((len & WC_SHA512_BLOCK_SIZE) != 0) {
-        XMEMCPY(sha512->buffer, sha512->data, WC_SHA512_BLOCK_SIZE);
-        Transform_Sha512_AVX2_RORX(sha512);
-        sha512->data += WC_SHA512_BLOCK_SIZE;
-        len -= WC_SHA512_BLOCK_SIZE;
-        if (len == 0)
-            return 0;
-    }
-
-    __asm__ __volatile__ (
-
-        "movq	224(%[sha512]), %%rax\n\t"
-
-    INIT_MASK(MASK_Y)
-    LOAD_DIGEST()
-
-        "# Start of processing two blocks\n"
-        "2:\n\t"
-
-        "subq	$1344, %%rsp\n\t"
-        "leaq	%[K512], %%rsi\n\t"
-
-        /* L4 = b */
-        "movq	%%r9, " L4 "\n\t"
-        /* L3 = 0 (add to prev h) */
-        "xorq	" L3 ", " L3 "\n\t"
-
-    LOAD_BLOCK2_W_Y(MASK_Y, rax)
-
-        /* L4 = b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-        "\n"
-        "1:\n\t"
-    SET_BLOCK2_W_Y(rsi)
-    MsgSched2_AVX2_RORX(Y0,Y1,Y2,Y3,Y4,Y5,Y6,Y7,RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    MsgSched2_AVX2_RORX(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y0,RG,RH,RA,RB,RC,RD,RE,RF, 4)
-    MsgSched2_AVX2_RORX(Y2,Y3,Y4,Y5,Y6,Y7,Y0,Y1,RE,RF,RG,RH,RA,RB,RC,RD, 8)
-    MsgSched2_AVX2_RORX(Y3,Y4,Y5,Y6,Y7,Y0,Y1,Y2,RC,RD,RE,RF,RG,RH,RA,RB,12)
-    MsgSched2_AVX2_RORX(Y4,Y5,Y6,Y7,Y0,Y1,Y2,Y3,RA,RB,RC,RD,RE,RF,RG,RH,16)
-    MsgSched2_AVX2_RORX(Y5,Y6,Y7,Y0,Y1,Y2,Y3,Y4,RG,RH,RA,RB,RC,RD,RE,RF,20)
-    MsgSched2_AVX2_RORX(Y6,Y7,Y0,Y1,Y2,Y3,Y4,Y5,RE,RF,RG,RH,RA,RB,RC,RD,24)
-    MsgSched2_AVX2_RORX(Y7,Y0,Y1,Y2,Y3,Y4,Y5,Y6,RC,RD,RE,RF,RG,RH,RA,RB,28)
-        "addq	$256, %%rsi\n\t"
-        "addq	$256, %%rsp\n\t"
-        "cmpq	%[K512_END], %%rsi\n\t"
-        "jne	1b\n\t"
-
-    SET_BLOCK2_W_Y(rsi)
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 4)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 8)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,12)
-
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,16)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,20)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,24)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,28)
-        "addq	" L3 ", %%r8\n\t"
-        "subq	$1024, %%rsp\n\t"
-
-    ADD_DIGEST()
-    STORE_DIGEST()
-
-        /* L4 = b */
-        "movq	%%r9, " L4 "\n\t"
-        /* L3 = 0 (add to prev h) */
-        "xorq	" L3 ", " L3 "\n\t"
-        /* L4 = b ^ c */
-        "xorq	%%r10, " L4 "\n\t"
-
-        "movq	$5, %%rsi\n\t"
-        "\n"
-        "3:\n\t"
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 2)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 6)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,10)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14)
-
-    RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,18)
-    RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,22)
-    RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,26)
-    RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,30)
-        "addq	$256, %%rsp\n\t"
-        "subq	$1, %%rsi\n\t"
-        "jnz	3b\n\t"
-
-        "addq	" L3 ", %%r8\n\t"
-
-    ADD_DIGEST()
-
-        "movq	224(%[sha512]), %%rax\n\t"
-        "addq	$64, %%rsp\n\t"
-        "addq	$256, %%rax\n\t"
-        "subl	$256, %[len]\n\t"
-        "movq	%%rax, 224(%[sha512])\n\t"
-
-    STORE_DIGEST()
-
-        "jnz	2b\n\t"
-
-        :
-        : [mask]   "m" (mBYTE_FLIP_MASK_Y),
-          [len]    "m" (len),
-          [sha512] "r" (sha512),
-          [K512]   "m" (K512_AVX2),
-          [K512_END]   "m" (K512_AVX2_END)
-        : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi"
-    );
-
-    return 0;
-}
-#endif /* HAVE_INTEL_RORX */
-#endif /* HAVE_INTEL_AVX2 */
-
 #endif /* WOLFSSL_SHA512 */
 
-
 /* -------------------------------------------------------------------------- */
 /* SHA384 */
 /* -------------------------------------------------------------------------- */
@@ -2626,6 +901,7 @@
 
 #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
     /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */
+
 #else
 
 static int InitSha384(wc_Sha384* sha384)
@@ -2647,6 +923,25 @@
     sha384->loLen   = 0;
     sha384->hiLen   = 0;
 
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    sha384->ctx.sha_type = SHA2_384;
+     /* always start firstblock = 1 when using hw engine */
+    sha384->ctx.isfirstblock = 1;
+    if(sha384->ctx.mode == ESP32_SHA_HW) {
+        /* release hw */
+        esp_sha_hw_unlock();
+    }
+    /* always set mode as INIT
+    *  whether using HW or SW is determined at first call of update()
+    */
+    sha384->ctx.mode = ESP32_SHA_INIT;
+
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    sha384->flags = 0;
+#endif
+
     return 0;
 }
 
@@ -2715,21 +1010,6 @@
     return InitSha384(sha384);  /* reset state */
 }
 
-
-/* Hardware Acceleration */
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-    int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
-    {
-        int ret = InitSha384(sha384);
-
-        (void)heap;
-        (void)devId;
-
-        Sha512_SetTransform();
-
-        return ret;
-    }
-#else
 int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
 {
     int ret;
@@ -2743,6 +1023,9 @@
     if (ret != 0)
         return ret;
 
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    Sha512_SetTransform();
+#endif
 #ifdef WOLFSSL_SMALL_STACK_CACHE
     sha384->W = NULL;
 #endif
@@ -2756,7 +1039,7 @@
 
     return ret;
 }
-#endif
+
 #endif /* WOLFSSL_IMX6_CAAM */
 
 int wc_InitSha384(wc_Sha384* sha384)
@@ -2795,9 +1078,22 @@
     if (sha512 == NULL || hash == NULL)
         return BAD_FUNC_ARG;
 
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    if(sha512->ctx.mode == ESP32_SHA_INIT) {
+        esp_sha_try_hw_lock(&sha512->ctx);
+    }
+    if(sha512->ctx.mode != ESP32_SHA_SW)
+       esp_sha512_digest_process(sha512, 0);
+#endif
+
     ret = wc_Sha512Copy(sha512, &tmpSha512);
     if (ret == 0) {
         ret = wc_Sha512Final(&tmpSha512, hash);
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        sha512->ctx.mode = ESP32_SHA_SW;;
+#endif
         wc_Sha512Free(&tmpSha512);
     }
     return ret;
@@ -2818,10 +1114,36 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
 #endif
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    dst->ctx.mode = src->ctx.mode;
+    dst->ctx.isfirstblock = src->ctx.isfirstblock;
+    dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+     dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
 
     return ret;
 }
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha512SetFlags(wc_Sha512* sha512, word32 flags)
+{
+    if (sha512) {
+        sha512->flags = flags;
+    }
+    return 0;
+}
+int wc_Sha512GetFlags(wc_Sha512* sha512, word32* flags)
+{
+    if (sha512 && flags) {
+        *flags = sha512->flags;
+    }
+    return 0;
+}
+#endif
+
 #endif /* WOLFSSL_SHA512 */
 
 #ifdef WOLFSSL_SHA384
@@ -2833,10 +1155,22 @@
 
     if (sha384 == NULL || hash == NULL)
         return BAD_FUNC_ARG;
-
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    if(sha384->ctx.mode == ESP32_SHA_INIT) {
+        esp_sha_try_hw_lock(&sha384->ctx);
+    }
+    if(sha384->ctx.mode != ESP32_SHA_SW) {
+        esp_sha512_digest_process(sha384, 0);
+    }
+#endif
     ret = wc_Sha384Copy(sha384, &tmpSha384);
     if (ret == 0) {
         ret = wc_Sha384Final(&tmpSha384, hash);
+#if  defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+        sha384->ctx.mode = ESP32_SHA_SW;
+#endif
         wc_Sha384Free(&tmpSha384);
     }
     return ret;
@@ -2856,10 +1190,36 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
 #endif
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    dst->ctx.mode = src->ctx.mode;
+    dst->ctx.isfirstblock = src->ctx.isfirstblock;
+    dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+     dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
 
     return ret;
 }
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha384SetFlags(wc_Sha384* sha384, word32 flags)
+{
+    if (sha384) {
+        sha384->flags = flags;
+    }
+    return 0;
+}
+int wc_Sha384GetFlags(wc_Sha384* sha384, word32* flags)
+{
+    if (sha384 && flags) {
+        *flags = sha384->flags;
+    }
+    return 0;
+}
+#endif
+
 #endif /* WOLFSSL_SHA384 */
 
 #endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */
--- a/wolfcrypt/src/signature.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/signature.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* signature.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -49,35 +49,23 @@
 #ifndef NO_SIG_WRAPPER
 
 #if !defined(NO_RSA) && !defined(NO_ASN)
-static int wc_SignatureDerEncode(enum wc_HashType hash_type, byte** hash_data,
-    word32* hash_len)
+static int wc_SignatureDerEncode(enum wc_HashType hash_type, byte* hash_data,
+    word32 hash_len, word32* hash_enc_len)
 {
-    int ret = wc_HashGetOID(hash_type);
-    if (ret > 0) {
-        int oid = ret;
+    int ret, oid;
 
-        /* Allocate buffer for hash and max DER encoded */
-        word32 digest_len = *hash_len + MAX_DER_DIGEST_SZ;
-        byte *digest_buf = (byte*)XMALLOC(digest_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        if (digest_buf) {
-            ret = wc_EncodeSignature(digest_buf, *hash_data, *hash_len, oid);
-            if (ret > 0) {
-                digest_len = ret;
-                ret = 0;
+    ret = wc_HashGetOID(hash_type);
+    if (ret < 0) {
+        return ret;
+    }
+    oid = ret;
 
-                /* Replace hash with digest (DER encoding + hash) */
-                XFREE(*hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                *hash_data = digest_buf;
-                *hash_len = digest_len;
-            }
-            else {
-                XFREE(digest_buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-        }
-        else {
-            ret = MEMORY_E;
-        }
+    ret = wc_EncodeSignature(hash_data, hash_data, hash_len, oid);
+    if (ret > 0) {
+        *hash_enc_len = ret;
+        ret = 0;
     }
+
     return ret;
 }
 #endif /* !NO_RSA && !NO_ASN */
@@ -138,9 +126,9 @@
     int ret;
 
     /* Check arguments */
-    if (hash_data == NULL || hash_len <= 0 ||
-        sig == NULL || sig_len <= 0 ||
-        key == NULL || key_len <= 0) {
+    if (hash_data == NULL || hash_len == 0 ||
+        sig == NULL || sig_len == 0 ||
+        key == NULL || key_len == 0) {
         return BAD_FUNC_ARG;
     }
 
@@ -188,6 +176,24 @@
         case WC_SIGNATURE_TYPE_RSA:
         {
 #ifndef NO_RSA
+#if defined(WOLFSSL_CRYPTOCELL)
+        /* the signature must propagate to the cryptocell to get verfied */
+        if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+            ret = cc310_RsaSSL_Verify(hash_data, hash_len,(byte*)sig, key,
+                                      CRYS_RSA_HASH_SHA256_mode);
+        }
+        else {
+            ret = cc310_RsaSSL_Verify(hash_data, hash_len,(byte*)sig, key,
+                                      CRYS_RSA_After_SHA256_mode);
+        }
+
+        if (ret != 0) {
+            WOLFSSL_MSG("RSA Signature Verify difference!");
+            ret = SIG_VERIFY_E;
+        }
+
+#else /* WOLFSSL_CRYPTOCELL */
+
             word32 plain_len = hash_len;
             byte *plain_data;
 
@@ -222,6 +228,7 @@
             else {
                 ret = MEMORY_E;
             }
+#endif /* !WOLFSSL_CRYPTOCELL */
 #else
             ret = SIG_TYPE_E;
 #endif
@@ -244,13 +251,17 @@
     const void* key, word32 key_len)
 {
     int ret;
-    word32 hash_len;
-    byte *hash_data = NULL;
+    word32 hash_len, hash_enc_len;
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+    byte *hash_data;
+#else
+    byte hash_data[MAX_DER_DIGEST_SZ];
+#endif
 
     /* Check arguments */
-    if (data == NULL || data_len <= 0 ||
-        sig == NULL || sig_len <= 0 ||
-        key == NULL || key_len <= 0) {
+    if (data == NULL || data_len == 0 ||
+        sig == NULL || sig_len == 0 ||
+        key == NULL || key_len == 0) {
         return BAD_FUNC_ARG;
     }
 
@@ -266,13 +277,22 @@
         WOLFSSL_MSG("wc_SignatureVerify: Invalid hash type/len");
         return ret;
     }
-    hash_len = ret;
+    hash_enc_len = hash_len = ret;
 
+#ifndef NO_RSA
+    if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+        /* For RSA with ASN.1 encoding include room */
+        hash_enc_len += MAX_DER_DIGEST_ASN_SZ;
+    }
+#endif
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
     /* Allocate temporary buffer for hash data */
-    hash_data = (byte*)XMALLOC(hash_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    hash_data = (byte*)XMALLOC(hash_enc_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
     if (hash_data == NULL) {
         return MEMORY_E;
     }
+#endif
 
     /* Perform hash of data */
     ret = wc_Hash(hash_type, data, data_len, hash_data, hash_len);
@@ -282,20 +302,35 @@
         #if defined(NO_RSA) || defined(NO_ASN)
             ret = SIG_TYPE_E;
         #else
-            ret = wc_SignatureDerEncode(hash_type, &hash_data, &hash_len);
+            ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len,
+                &hash_enc_len);
         #endif
         }
 
         if (ret == 0) {
+#if defined(WOLFSSL_CRYPTOCELL)
+            if ((sig_type == WC_SIGNATURE_TYPE_RSA)
+                || (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC)) {
+                if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+                    ret = cc310_RsaSSL_Verify(hash_data, hash_len, sig, key,
+                                              cc310_hashModeRSA(hash_type, 0));
+                }
+                else {
+                    ret = cc310_RsaSSL_Verify(hash_data, hash_len, sig, key,
+                                              cc310_hashModeRSA(hash_type, 1));
+                }
+            }
+#else
             /* Verify signature using hash */
             ret = wc_SignatureVerifyHash(hash_type, sig_type,
-                hash_data, hash_len, sig, sig_len, key, key_len);
+                hash_data, hash_enc_len, sig, sig_len, key, key_len);
+#endif /* WOLFSSL_CRYPTOCELL */
         }
     }
 
-    if (hash_data) {
-        XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+    XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
 
     return ret;
 }
@@ -307,15 +342,25 @@
     byte* sig, word32 *sig_len,
     const void* key, word32 key_len, WC_RNG* rng)
 {
+    return wc_SignatureGenerateHash_ex(hash_type, sig_type, hash_data, hash_len,
+        sig, sig_len, key, key_len, rng, 1);
+}
+
+int wc_SignatureGenerateHash_ex(
+    enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+    const byte* hash_data, word32 hash_len,
+    byte* sig, word32 *sig_len,
+    const void* key, word32 key_len, WC_RNG* rng, int verify)
+{
     int ret;
 
     /* Suppress possible unused arg if all signature types are disabled */
     (void)rng;
 
     /* Check arguments */
-    if (hash_data == NULL || hash_len <= 0 ||
-        sig == NULL || sig_len == NULL || *sig_len <= 0 ||
-        key == NULL || key_len <= 0) {
+    if (hash_data == NULL || hash_len == 0 ||
+        sig == NULL || sig_len == NULL || *sig_len == 0 ||
+        key == NULL || key_len == 0) {
         return BAD_FUNC_ARG;
     }
 
@@ -354,7 +399,17 @@
 
         case WC_SIGNATURE_TYPE_RSA_W_ENC:
         case WC_SIGNATURE_TYPE_RSA:
-#ifndef NO_RSA
+#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+    #if defined(WOLFSSL_CRYPTOCELL)
+        if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+            ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, key,
+                                    cc310_hashModeRSA(hash_type, 0));
+            }
+        else {
+            ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, key,
+                                    cc310_hashModeRSA(hash_type, 1));
+        }
+    #else
             /* Create signature using provided RSA key */
             do {
             #ifdef WOLFSSL_ASYNC_CRYPT
@@ -365,6 +420,7 @@
                     ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
                         (RsaKey*)key, rng);
             } while (ret == WC_PENDING_E);
+     #endif /* WOLFSSL_CRYPTOCELL */
             if (ret >= 0) {
                 *sig_len = ret;
                 ret = 0; /* Success */
@@ -380,6 +436,11 @@
             break;
     }
 
+    if (ret == 0 && verify) {
+        ret = wc_SignatureVerifyHash(hash_type, sig_type, hash_data, hash_len,
+            sig, *sig_len, key, key_len);
+    }
+
     return ret;
 }
 
@@ -389,14 +450,28 @@
     byte* sig, word32 *sig_len,
     const void* key, word32 key_len, WC_RNG* rng)
 {
+    return wc_SignatureGenerate_ex(hash_type, sig_type, data, data_len, sig,
+        sig_len, key, key_len, rng, 1);
+}
+
+int wc_SignatureGenerate_ex(
+    enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+    const byte* data, word32 data_len,
+    byte* sig, word32 *sig_len,
+    const void* key, word32 key_len, WC_RNG* rng, int verify)
+{
     int ret;
-    word32 hash_len;
-    byte *hash_data = NULL;
+    word32 hash_len, hash_enc_len;
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+    byte *hash_data;
+#else
+    byte hash_data[MAX_DER_DIGEST_SZ];
+#endif
 
     /* Check arguments */
-    if (data == NULL || data_len <= 0 ||
-        sig == NULL || sig_len == NULL || *sig_len <= 0 ||
-        key == NULL || key_len <= 0) {
+    if (data == NULL || data_len == 0 ||
+        sig == NULL || sig_len == NULL || *sig_len == 0 ||
+        key == NULL || key_len == 0) {
         return BAD_FUNC_ARG;
     }
 
@@ -412,36 +487,71 @@
         WOLFSSL_MSG("wc_SignatureGenerate: Invalid hash type/len");
         return ret;
     }
-    hash_len = ret;
+    hash_enc_len = hash_len = ret;
 
+#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+    if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+        /* For RSA with ASN.1 encoding include room */
+        hash_enc_len += MAX_DER_DIGEST_ASN_SZ;
+    }
+#endif
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
     /* Allocate temporary buffer for hash data */
-    hash_data = (byte*)XMALLOC(hash_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    hash_data = (byte*)XMALLOC(hash_enc_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
     if (hash_data == NULL) {
         return MEMORY_E;
     }
+#endif
 
     /* Perform hash of data */
     ret = wc_Hash(hash_type, data, data_len, hash_data, hash_len);
     if (ret == 0) {
         /* Handle RSA with DER encoding */
         if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
-        #if defined(NO_RSA) || defined(NO_ASN)
+        #if defined(NO_RSA) || defined(NO_ASN) || \
+                                                defined(WOLFSSL_RSA_PUBLIC_ONLY)
             ret = SIG_TYPE_E;
         #else
-            ret = wc_SignatureDerEncode(hash_type, &hash_data, &hash_len);
+            ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len,
+                &hash_enc_len);
         #endif
         }
+        if (ret == 0) {
+#if defined(WOLFSSL_CRYPTOCELL)
+            if ((sig_type == WC_SIGNATURE_TYPE_RSA)
+                || (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC)) {
+                if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+                    ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+                                         key, cc310_hashModeRSA(hash_type, 0));
+                }
+                else {
+                    ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+                                         key, cc310_hashModeRSA(hash_type, 1));
+                }
 
-        if (ret == 0) {
+                if (ret == *sig_len) {
+                    ret = 0;
+                }
+             }
+        }
+     }
+#else
             /* Generate signature using hash */
             ret = wc_SignatureGenerateHash(hash_type, sig_type,
-                hash_data, hash_len, sig, sig_len, key, key_len, rng);
+                hash_data, hash_enc_len, sig, sig_len, key, key_len, rng);
         }
     }
 
-    if (hash_data) {
-        XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (ret == 0 && verify) {
+        ret = wc_SignatureVerifyHash(hash_type, sig_type, hash_data,
+            hash_enc_len, sig, *sig_len, key, key_len);
     }
+#endif /* WOLFSSL_CRYPTOCELL */
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+    XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
 
     return ret;
 }
--- a/wolfcrypt/src/sp_arm32.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sp_arm32.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp.c
  *
- * Copyright (C) 2006-2018 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,8 +39,6 @@
                                     defined(WOLFSSL_HAVE_SP_ECC)
 
 #ifdef RSA_LOW_MEM
-#define SP_RSA_PRIVATE_EXP_D
-
 #ifndef WOLFSSL_SP_SMALL
 #define WOLFSSL_SP_SMALL
 #endif
@@ -51,85 +49,101 @@
 #ifdef WOLFSSL_SP_ARM32_ASM
 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 #ifndef WOLFSSL_SP_NO_2048
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 24) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
             r[j] &= 0xffffffff;
-            s = 32 - s;
-            if (j + 1 >= max)
+            s = 32U - s;
+            if (j + 1 >= size) {
                 break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 32
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 32
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0xffffffff;
-        s = 32 - s;
-        if (j + 1 >= max)
+        s = 32U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 32 <= DIGIT_BIT) {
-            s += 32;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
             r[j] &= 0xffffffff;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 32) {
             r[j] &= 0xffffffff;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
+            }
             s = 32 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -140,16 +154,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 256
  *
  * r  A single precision integer.
@@ -163,19 +179,26 @@
     a[j] = 0;
     for (i=0; i<64 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
                 break;
+            }
         }
         s = 8 - (b - 32);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -188,16 +211,15 @@
  */
 static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #32\n\t"
         "mov	r10, #0\n\t"
         "#  A[0] * B[0]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #0]\n\t"
         "umull	r3, r4, r8, r9\n\t"
         "mov	r5, #0\n\t"
-        "str	r3, [%[tmp]]\n\t"
+        "str	r3, [sp]\n\t"
         "#  A[0] * B[1]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #4]\n\t"
@@ -212,7 +234,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #4]\n\t"
+        "str	r4, [sp, #4]\n\t"
         "#  A[0] * B[2]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #8]\n\t"
@@ -234,7 +256,7 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[tmp], #8]\n\t"
+        "str	r5, [sp, #8]\n\t"
         "#  A[0] * B[3]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #12]\n\t"
@@ -263,7 +285,7 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[tmp], #12]\n\t"
+        "str	r3, [sp, #12]\n\t"
         "#  A[0] * B[4]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #16]\n\t"
@@ -299,7 +321,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #16]\n\t"
+        "str	r4, [sp, #16]\n\t"
         "#  A[0] * B[5]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #20]\n\t"
@@ -342,7 +364,7 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[tmp], #20]\n\t"
+        "str	r5, [sp, #20]\n\t"
         "#  A[0] * B[6]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -392,7 +414,7 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[tmp], #24]\n\t"
+        "str	r3, [sp, #24]\n\t"
         "#  A[0] * B[7]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #28]\n\t"
@@ -449,7 +471,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #28]\n\t"
+        "str	r4, [sp, #28]\n\t"
         "#  A[1] * B[7]\n\t"
         "ldr	r8, [%[a], #4]\n\t"
         "ldr	r9, [%[b], #28]\n\t"
@@ -653,12 +675,27 @@
         "adc	r3, r3, r7\n\t"
         "str	r5, [%[r], #56]\n\t"
         "str	r3, [%[r], #60]\n\t"
+        "ldr	r3, [sp, #0]\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r5, [sp, #8]\n\t"
+        "ldr	r6, [sp, #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [sp, #16]\n\t"
+        "ldr	r4, [sp, #20]\n\t"
+        "ldr	r5, [sp, #24]\n\t"
+        "ldr	r6, [sp, #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "add	sp, sp, #32\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -668,15 +705,14 @@
  */
 static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #32\n\t"
         "mov	r14, #0\n\t"
         "#  A[0] * A[0]\n\t"
         "ldr	r10, [%[a], #0]\n\t"
         "umull	r8, r3, r10, r10\n\t"
         "mov	r4, #0\n\t"
-        "str	r8, [%[tmp]]\n\t"
+        "str	r8, [sp]\n\t"
         "#  A[0] * A[1]\n\t"
         "ldr	r10, [%[a], #4]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -687,7 +723,7 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "str	r3, [%[tmp], #4]\n\t"
+        "str	r3, [sp, #4]\n\t"
         "#  A[0] * A[2]\n\t"
         "ldr	r10, [%[a], #8]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -704,7 +740,7 @@
         "adds	r4, r4, r8\n\t"
         "adcs	r2, r2, r9\n\t"
         "adc	r3, r3, r14\n\t"
-        "str	r4, [%[tmp], #8]\n\t"
+        "str	r4, [sp, #8]\n\t"
         "#  A[0] * A[3]\n\t"
         "ldr	r10, [%[a], #12]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -725,7 +761,7 @@
         "adds	r2, r2, r8\n\t"
         "adcs	r3, r3, r9\n\t"
         "adc	r4, r4, r14\n\t"
-        "str	r2, [%[tmp], #12]\n\t"
+        "str	r2, [sp, #12]\n\t"
         "#  A[0] * A[4]\n\t"
         "ldr	r10, [%[a], #16]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -752,7 +788,7 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "str	r3, [%[tmp], #16]\n\t"
+        "str	r3, [sp, #16]\n\t"
         "#  A[0] * A[5]\n\t"
         "ldr	r10, [%[a], #20]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -779,7 +815,7 @@
         "adds	r4, r4, r5\n\t"
         "adcs	r2, r2, r6\n\t"
         "adc	r3, r3, r7\n\t"
-        "str	r4, [%[tmp], #20]\n\t"
+        "str	r4, [sp, #20]\n\t"
         "#  A[0] * A[6]\n\t"
         "ldr	r10, [%[a], #24]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -812,7 +848,7 @@
         "adds	r2, r2, r5\n\t"
         "adcs	r3, r3, r6\n\t"
         "adc	r4, r4, r7\n\t"
-        "str	r2, [%[tmp], #24]\n\t"
+        "str	r2, [sp, #24]\n\t"
         "#  A[0] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -846,7 +882,7 @@
         "adds	r3, r3, r5\n\t"
         "adcs	r4, r4, r6\n\t"
         "adc	r2, r2, r7\n\t"
-        "str	r3, [%[tmp], #28]\n\t"
+        "str	r3, [sp, #28]\n\t"
         "#  A[1] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #4]\n\t"
@@ -990,12 +1026,27 @@
         "adc	r2, r2, r9\n\t"
         "str	r4, [%[r], #56]\n\t"
         "str	r2, [%[r], #60]\n\t"
+        "ldr	r2, [sp, #0]\n\t"
+        "ldr	r3, [sp, #4]\n\t"
+        "ldr	r4, [sp, #8]\n\t"
+        "ldr	r8, [sp, #12]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r8, [%[r], #12]\n\t"
+        "ldr	r2, [sp, #16]\n\t"
+        "ldr	r3, [sp, #20]\n\t"
+        "ldr	r4, [sp, #24]\n\t"
+        "ldr	r8, [sp, #28]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r8, [%[r], #28]\n\t"
+        "add	sp, sp, #32\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+        : [r] "r" (r), [a] "r" (a)
         : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Add b to a into r. (r = a + b)
@@ -1227,13 +1278,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<8; i++)
+    for (i=0; i<8; i++) {
         r[i] = a[i] & m;
+    }
 #else
     r[0] = a[0] & m;
     r[1] = a[1] & m;
@@ -1252,7 +1304,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -1276,7 +1328,7 @@
     u += sp_2048_add_16(r + 8, r + 8, z1);
     r[24] = u;
     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
-    sp_2048_add_16(r + 16, r + 16, z2);
+    (void)sp_2048_add_16(r + 16, r + 16, z2);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -1284,7 +1336,7 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[16];
@@ -1303,7 +1355,7 @@
     u += sp_2048_add_16(r + 8, r + 8, z1);
     r[24] = u;
     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
-    sp_2048_add_16(r + 16, r + 16, z2);
+    (void)sp_2048_add_16(r + 16, r + 16, z2);
 }
 
 /* Sub b from a into a. (a -= b)
@@ -1609,13 +1661,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_16(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<16; i++)
+    for (i=0; i<16; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -1638,7 +1691,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -1662,7 +1715,7 @@
     u += sp_2048_add_32(r + 16, r + 16, z1);
     r[48] = u;
     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
+    (void)sp_2048_add_32(r + 32, r + 32, z2);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -1670,7 +1723,7 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[32];
@@ -1689,7 +1742,7 @@
     u += sp_2048_add_32(r + 16, r + 16, z1);
     r[48] = u;
     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
+    (void)sp_2048_add_32(r + 32, r + 32, z2);
 }
 
 /* Sub b from a into a. (a -= b)
@@ -2251,13 +2304,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<32; i++)
+    for (i=0; i<32; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -2280,7 +2334,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -2304,7 +2358,7 @@
     u += sp_2048_add_64(r + 32, r + 32, z1);
     r[96] = u;
     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
-    sp_2048_add_64(r + 64, r + 64, z2);
+    (void)sp_2048_add_64(r + 64, r + 64, z2);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -2312,7 +2366,7 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[64];
@@ -2331,10 +2385,10 @@
     u += sp_2048_add_64(r + 32, r + 32, z1);
     r[96] = u;
     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
-    sp_2048_add_64(r + 64, r + 64, z2);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -2432,15 +2486,15 @@
  */
 static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-    sp_digit tmp[128];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #512\n\t"
         "mov	r5, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
         "mov	r8, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #252\n\t"
+        "it	cc\n\t"
         "movcc	r3, #0\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -2457,20 +2511,31 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #504\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -2480,9 +2545,8 @@
  */
 static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[128];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #512\n\t"
         "mov	r12, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
@@ -2490,6 +2554,7 @@
         "mov	r5, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #252\n\t"
+        "it	cc\n\t"
         "movcc	r3, r12\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -2521,24 +2586,35 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #504\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* AND m into each word of a and store in r.
  *
@@ -2546,12 +2622,13 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
 {
     int i;
 
-    for (i=0; i<32; i++)
+    for (i=0; i<32; i++) {
         r[i] = a[i] & m;
+    }
 }
 
 #endif /* WOLFSSL_SP_SMALL */
@@ -2652,15 +2729,15 @@
  */
 static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-    sp_digit tmp[64];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #256\n\t"
         "mov	r5, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
         "mov	r8, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #124\n\t"
+        "it	cc\n\t"
         "movcc	r3, #0\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -2677,20 +2754,31 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #248\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -2700,9 +2788,8 @@
  */
 static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[64];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #256\n\t"
         "mov	r12, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
@@ -2710,6 +2797,7 @@
         "mov	r5, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #124\n\t"
+        "it	cc\n\t"
         "movcc	r3, r12\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -2741,31 +2829,42 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #248\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -2779,14 +2878,569 @@
     *rho = -x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r5, r3, %[b], r8\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]]\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, #4\n\t"
+        "1:\n\t"
+        "ldr	r8, [%[a], r9]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], r9]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r9, r9, #4\n\t"
+        "cmp	r9, #256\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r], #256]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r3, r4, %[b], r8\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[r]]\n\t"
+        "# A[1] * B\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "# A[2] * B\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "# A[3] * B\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "# A[4] * B\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "# A[5] * B\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "# A[6] * B\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "# A[7] * B\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "# A[8] * B\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "# A[9] * B\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "# A[10] * B\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "# A[11] * B\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "# A[12] * B\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "# A[13] * B\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "# A[14] * B\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "# A[15] * B\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "# A[16] * B\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "# A[17] * B\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "# A[18] * B\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #72]\n\t"
+        "# A[19] * B\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "# A[20] * B\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #80]\n\t"
+        "# A[21] * B\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "# A[22] * B\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "# A[23] * B\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #92]\n\t"
+        "# A[24] * B\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #96]\n\t"
+        "# A[25] * B\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "# A[26] * B\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #104]\n\t"
+        "# A[27] * B\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #108]\n\t"
+        "# A[28] * B\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "# A[29] * B\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #116]\n\t"
+        "# A[30] * B\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #120]\n\t"
+        "# A[31] * B\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "# A[32] * B\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #128]\n\t"
+        "# A[33] * B\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #132]\n\t"
+        "# A[34] * B\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "# A[35] * B\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #140]\n\t"
+        "# A[36] * B\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #144]\n\t"
+        "# A[37] * B\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #148]\n\t"
+        "# A[38] * B\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #152]\n\t"
+        "# A[39] * B\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #156]\n\t"
+        "# A[40] * B\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "# A[41] * B\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #164]\n\t"
+        "# A[42] * B\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #168]\n\t"
+        "# A[43] * B\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #172]\n\t"
+        "# A[44] * B\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #176]\n\t"
+        "# A[45] * B\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #180]\n\t"
+        "# A[46] * B\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "# A[47] * B\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #188]\n\t"
+        "# A[48] * B\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #192]\n\t"
+        "# A[49] * B\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #196]\n\t"
+        "# A[50] * B\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #200]\n\t"
+        "# A[51] * B\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #204]\n\t"
+        "# A[52] * B\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "# A[53] * B\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #212]\n\t"
+        "# A[54] * B\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #216]\n\t"
+        "# A[55] * B\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #220]\n\t"
+        "# A[56] * B\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #224]\n\t"
+        "# A[57] * B\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #228]\n\t"
+        "# A[58] * B\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "# A[59] * B\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #236]\n\t"
+        "# A[60] * B\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #240]\n\t"
+        "# A[61] * B\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #244]\n\t"
+        "# A[62] * B\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #248]\n\t"
+        "# A[63] * B\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r3, [%[r], #252]\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_32(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 32);
 
@@ -2802,7 +3456,7 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_2048_cond_sub_32(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
     sp_digit c = 0;
@@ -3006,7 +3660,7 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
@@ -3331,8 +3985,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_32(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_32(r, a, b);
     sp_2048_mont_reduce_32(r, m, mp);
@@ -3345,7 +3999,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_32(r, a);
@@ -3359,7 +4013,7 @@
  * b  A single precision digit.
  */
 static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
+        sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
@@ -3376,7 +4030,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], r9]\n\t"
         "mov	r3, r4\n\t"
         "mov	r4, r5\n\t"
@@ -3403,7 +4057,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #4]\n\t"
         "# A[2] * B\n\t"
         "ldr	r8, [%[a], #8]\n\t"
@@ -3411,7 +4065,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #8]\n\t"
         "# A[3] * B\n\t"
         "ldr	r8, [%[a], #12]\n\t"
@@ -3419,7 +4073,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #12]\n\t"
         "# A[4] * B\n\t"
         "ldr	r8, [%[a], #16]\n\t"
@@ -3427,7 +4081,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #16]\n\t"
         "# A[5] * B\n\t"
         "ldr	r8, [%[a], #20]\n\t"
@@ -3435,7 +4089,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #20]\n\t"
         "# A[6] * B\n\t"
         "ldr	r8, [%[a], #24]\n\t"
@@ -3443,7 +4097,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #24]\n\t"
         "# A[7] * B\n\t"
         "ldr	r8, [%[a], #28]\n\t"
@@ -3451,7 +4105,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #28]\n\t"
         "# A[8] * B\n\t"
         "ldr	r8, [%[a], #32]\n\t"
@@ -3459,7 +4113,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #32]\n\t"
         "# A[9] * B\n\t"
         "ldr	r8, [%[a], #36]\n\t"
@@ -3467,7 +4121,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #36]\n\t"
         "# A[10] * B\n\t"
         "ldr	r8, [%[a], #40]\n\t"
@@ -3475,7 +4129,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #40]\n\t"
         "# A[11] * B\n\t"
         "ldr	r8, [%[a], #44]\n\t"
@@ -3483,7 +4137,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #44]\n\t"
         "# A[12] * B\n\t"
         "ldr	r8, [%[a], #48]\n\t"
@@ -3491,7 +4145,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #48]\n\t"
         "# A[13] * B\n\t"
         "ldr	r8, [%[a], #52]\n\t"
@@ -3499,7 +4153,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #52]\n\t"
         "# A[14] * B\n\t"
         "ldr	r8, [%[a], #56]\n\t"
@@ -3507,7 +4161,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #56]\n\t"
         "# A[15] * B\n\t"
         "ldr	r8, [%[a], #60]\n\t"
@@ -3515,7 +4169,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #60]\n\t"
         "# A[16] * B\n\t"
         "ldr	r8, [%[a], #64]\n\t"
@@ -3523,7 +4177,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #64]\n\t"
         "# A[17] * B\n\t"
         "ldr	r8, [%[a], #68]\n\t"
@@ -3531,7 +4185,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #68]\n\t"
         "# A[18] * B\n\t"
         "ldr	r8, [%[a], #72]\n\t"
@@ -3539,7 +4193,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #72]\n\t"
         "# A[19] * B\n\t"
         "ldr	r8, [%[a], #76]\n\t"
@@ -3547,7 +4201,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #76]\n\t"
         "# A[20] * B\n\t"
         "ldr	r8, [%[a], #80]\n\t"
@@ -3555,7 +4209,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #80]\n\t"
         "# A[21] * B\n\t"
         "ldr	r8, [%[a], #84]\n\t"
@@ -3563,7 +4217,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #84]\n\t"
         "# A[22] * B\n\t"
         "ldr	r8, [%[a], #88]\n\t"
@@ -3571,7 +4225,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #88]\n\t"
         "# A[23] * B\n\t"
         "ldr	r8, [%[a], #92]\n\t"
@@ -3579,7 +4233,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #92]\n\t"
         "# A[24] * B\n\t"
         "ldr	r8, [%[a], #96]\n\t"
@@ -3587,7 +4241,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #96]\n\t"
         "# A[25] * B\n\t"
         "ldr	r8, [%[a], #100]\n\t"
@@ -3595,7 +4249,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #100]\n\t"
         "# A[26] * B\n\t"
         "ldr	r8, [%[a], #104]\n\t"
@@ -3603,7 +4257,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #104]\n\t"
         "# A[27] * B\n\t"
         "ldr	r8, [%[a], #108]\n\t"
@@ -3611,7 +4265,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #108]\n\t"
         "# A[28] * B\n\t"
         "ldr	r8, [%[a], #112]\n\t"
@@ -3619,7 +4273,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #112]\n\t"
         "# A[29] * B\n\t"
         "ldr	r8, [%[a], #116]\n\t"
@@ -3627,7 +4281,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #116]\n\t"
         "# A[30] * B\n\t"
         "ldr	r8, [%[a], #120]\n\t"
@@ -3635,7 +4289,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #120]\n\t"
         "# A[31] * B\n\t"
         "ldr	r8, [%[a], #124]\n\t"
@@ -3716,11 +4370,12 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int32_t sp_2048_cmp_32(sp_digit* a, sp_digit* b)
+static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = -1;
     sp_digit one = 1;
 
+
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	r7, #0\n\t"
@@ -3732,15 +4387,18 @@
         "and	r4, r4, r3\n\t"
         "and	r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
-        "movne	r3, r7\n\t"
-        "sub	r6, r6, #4\n\t"
-        "bcc	1b\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #else
     __asm__ __volatile__ (
@@ -3751,261 +4409,357 @@
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #120]\n\t"
         "ldr		r5, [%[b], #120]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #116]\n\t"
         "ldr		r5, [%[b], #116]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #112]\n\t"
         "ldr		r5, [%[b], #112]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #108]\n\t"
         "ldr		r5, [%[b], #108]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #104]\n\t"
         "ldr		r5, [%[b], #104]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #100]\n\t"
         "ldr		r5, [%[b], #100]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #96]\n\t"
         "ldr		r5, [%[b], #96]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #92]\n\t"
         "ldr		r5, [%[b], #92]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #88]\n\t"
         "ldr		r5, [%[b], #88]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #84]\n\t"
         "ldr		r5, [%[b], #84]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #80]\n\t"
         "ldr		r5, [%[b], #80]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #76]\n\t"
         "ldr		r5, [%[b], #76]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #72]\n\t"
         "ldr		r5, [%[b], #72]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #68]\n\t"
         "ldr		r5, [%[b], #68]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #64]\n\t"
         "ldr		r5, [%[b], #64]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #60]\n\t"
         "ldr		r5, [%[b], #60]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #56]\n\t"
         "ldr		r5, [%[b], #56]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #52]\n\t"
         "ldr		r5, [%[b], #52]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #48]\n\t"
         "ldr		r5, [%[b], #48]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #44]\n\t"
         "ldr		r5, [%[b], #44]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #40]\n\t"
         "ldr		r5, [%[b], #40]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #36]\n\t"
         "ldr		r5, [%[b], #36]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #32]\n\t"
         "ldr		r5, [%[b], #32]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #28]\n\t"
         "ldr		r5, [%[b], #28]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #24]\n\t"
         "ldr		r5, [%[b], #24]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #20]\n\t"
         "ldr		r5, [%[b], #20]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #16]\n\t"
         "ldr		r5, [%[b], #16]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #12]\n\t"
         "ldr		r5, [%[b], #12]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #8]\n\t"
         "ldr		r5, [%[b], #8]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #4]\n\t"
         "ldr		r5, [%[b], #4]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #0]\n\t"
         "ldr		r5, [%[b], #0]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #endif
 
@@ -4021,7 +4775,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[64], t2[33];
@@ -4030,6 +4784,7 @@
 
     (void)m;
 
+
     div = d[31];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
     for (i=31; i>=0; i--) {
@@ -4045,7 +4800,7 @@
     }
 
     r1 = sp_2048_cmp_32(t1, d) >= 0;
-    sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
+    sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -4057,7 +4812,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_mod_32(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_32(a, m, NULL, r);
 }
@@ -4072,8 +4827,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][64];
@@ -4092,27 +4847,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 64;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_32(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
             err = sp_2048_mod_32(t[1] + 32, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_32(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
@@ -4138,9 +4894,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 28;
-        n <<= 4;
-        c = 28;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -4171,7 +4934,7 @@
             sp_2048_mont_mul_32(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
         sp_2048_mont_reduce_32(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
@@ -4179,8 +4942,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -4195,8 +4959,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][64];
@@ -4215,27 +4979,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 64;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_32(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
             err = sp_2048_mod_32(t[1] + 32, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_32(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
@@ -4277,9 +5042,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 27;
-        n <<= 5;
-        c = 27;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -4310,14 +5082,8 @@
 
             sp_2048_mont_mul_32(r, r, t[y], m, mp);
         }
-        y = e[0] & 0xf;
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_mul_32(r, r, t[y], m, mp);
-
-        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
         sp_2048_mont_reduce_32(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
@@ -4325,23 +5091,25 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
 }
 #endif /* WOLFSSL_SP_SMALL */
 
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_64(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 64);
 
@@ -4349,6 +5117,7 @@
     sp_2048_sub_in_place_64(r, m);
 }
 
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -4357,7 +5126,7 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_2048_cond_sub_64(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
     sp_digit c = 0;
@@ -4721,7 +5490,7 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
@@ -5334,8 +6103,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_64(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_64(r, a, b);
     sp_2048_mont_reduce_64(r, m, mp);
@@ -5348,568 +6117,14 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_64(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_64(r, a);
     sp_2048_mont_reduce_64(r, m, mp);
 }
 
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	r10, #0\n\t"
-        "# A[0] * B\n\t"
-        "ldr	r8, [%[a]]\n\t"
-        "umull	r5, r3, %[b], r8\n\t"
-        "mov	r4, #0\n\t"
-        "str	r5, [%[r]]\n\t"
-        "mov	r5, #0\n\t"
-        "mov	r9, #4\n\t"
-        "1:\n\t"
-        "ldr	r8, [%[a], r9]\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], r9]\n\t"
-        "mov	r3, r4\n\t"
-        "mov	r4, r5\n\t"
-        "mov	r5, #0\n\t"
-        "add	r9, r9, #4\n\t"
-        "cmp	r9, #256\n\t"
-        "blt	1b\n\t"
-        "str	r3, [%[r], #256]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	r10, #0\n\t"
-        "# A[0] * B\n\t"
-        "ldr	r8, [%[a]]\n\t"
-        "umull	r3, r4, %[b], r8\n\t"
-        "mov	r5, #0\n\t"
-        "str	r3, [%[r]]\n\t"
-        "# A[1] * B\n\t"
-        "ldr	r8, [%[a], #4]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #4]\n\t"
-        "# A[2] * B\n\t"
-        "ldr	r8, [%[a], #8]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #8]\n\t"
-        "# A[3] * B\n\t"
-        "ldr	r8, [%[a], #12]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #12]\n\t"
-        "# A[4] * B\n\t"
-        "ldr	r8, [%[a], #16]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "# A[5] * B\n\t"
-        "ldr	r8, [%[a], #20]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "# A[6] * B\n\t"
-        "ldr	r8, [%[a], #24]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #24]\n\t"
-        "# A[7] * B\n\t"
-        "ldr	r8, [%[a], #28]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #28]\n\t"
-        "# A[8] * B\n\t"
-        "ldr	r8, [%[a], #32]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #32]\n\t"
-        "# A[9] * B\n\t"
-        "ldr	r8, [%[a], #36]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #36]\n\t"
-        "# A[10] * B\n\t"
-        "ldr	r8, [%[a], #40]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #40]\n\t"
-        "# A[11] * B\n\t"
-        "ldr	r8, [%[a], #44]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #44]\n\t"
-        "# A[12] * B\n\t"
-        "ldr	r8, [%[a], #48]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #48]\n\t"
-        "# A[13] * B\n\t"
-        "ldr	r8, [%[a], #52]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #52]\n\t"
-        "# A[14] * B\n\t"
-        "ldr	r8, [%[a], #56]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #56]\n\t"
-        "# A[15] * B\n\t"
-        "ldr	r8, [%[a], #60]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #60]\n\t"
-        "# A[16] * B\n\t"
-        "ldr	r8, [%[a], #64]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #64]\n\t"
-        "# A[17] * B\n\t"
-        "ldr	r8, [%[a], #68]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #68]\n\t"
-        "# A[18] * B\n\t"
-        "ldr	r8, [%[a], #72]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #72]\n\t"
-        "# A[19] * B\n\t"
-        "ldr	r8, [%[a], #76]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #76]\n\t"
-        "# A[20] * B\n\t"
-        "ldr	r8, [%[a], #80]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #80]\n\t"
-        "# A[21] * B\n\t"
-        "ldr	r8, [%[a], #84]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #84]\n\t"
-        "# A[22] * B\n\t"
-        "ldr	r8, [%[a], #88]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #88]\n\t"
-        "# A[23] * B\n\t"
-        "ldr	r8, [%[a], #92]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #92]\n\t"
-        "# A[24] * B\n\t"
-        "ldr	r8, [%[a], #96]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #96]\n\t"
-        "# A[25] * B\n\t"
-        "ldr	r8, [%[a], #100]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #100]\n\t"
-        "# A[26] * B\n\t"
-        "ldr	r8, [%[a], #104]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #104]\n\t"
-        "# A[27] * B\n\t"
-        "ldr	r8, [%[a], #108]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #108]\n\t"
-        "# A[28] * B\n\t"
-        "ldr	r8, [%[a], #112]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #112]\n\t"
-        "# A[29] * B\n\t"
-        "ldr	r8, [%[a], #116]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #116]\n\t"
-        "# A[30] * B\n\t"
-        "ldr	r8, [%[a], #120]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #120]\n\t"
-        "# A[31] * B\n\t"
-        "ldr	r8, [%[a], #124]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #124]\n\t"
-        "# A[32] * B\n\t"
-        "ldr	r8, [%[a], #128]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #128]\n\t"
-        "# A[33] * B\n\t"
-        "ldr	r8, [%[a], #132]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #132]\n\t"
-        "# A[34] * B\n\t"
-        "ldr	r8, [%[a], #136]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #136]\n\t"
-        "# A[35] * B\n\t"
-        "ldr	r8, [%[a], #140]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #140]\n\t"
-        "# A[36] * B\n\t"
-        "ldr	r8, [%[a], #144]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #144]\n\t"
-        "# A[37] * B\n\t"
-        "ldr	r8, [%[a], #148]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #148]\n\t"
-        "# A[38] * B\n\t"
-        "ldr	r8, [%[a], #152]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #152]\n\t"
-        "# A[39] * B\n\t"
-        "ldr	r8, [%[a], #156]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #156]\n\t"
-        "# A[40] * B\n\t"
-        "ldr	r8, [%[a], #160]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #160]\n\t"
-        "# A[41] * B\n\t"
-        "ldr	r8, [%[a], #164]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #164]\n\t"
-        "# A[42] * B\n\t"
-        "ldr	r8, [%[a], #168]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #168]\n\t"
-        "# A[43] * B\n\t"
-        "ldr	r8, [%[a], #172]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #172]\n\t"
-        "# A[44] * B\n\t"
-        "ldr	r8, [%[a], #176]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #176]\n\t"
-        "# A[45] * B\n\t"
-        "ldr	r8, [%[a], #180]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #180]\n\t"
-        "# A[46] * B\n\t"
-        "ldr	r8, [%[a], #184]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #184]\n\t"
-        "# A[47] * B\n\t"
-        "ldr	r8, [%[a], #188]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #188]\n\t"
-        "# A[48] * B\n\t"
-        "ldr	r8, [%[a], #192]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #192]\n\t"
-        "# A[49] * B\n\t"
-        "ldr	r8, [%[a], #196]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #196]\n\t"
-        "# A[50] * B\n\t"
-        "ldr	r8, [%[a], #200]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #200]\n\t"
-        "# A[51] * B\n\t"
-        "ldr	r8, [%[a], #204]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #204]\n\t"
-        "# A[52] * B\n\t"
-        "ldr	r8, [%[a], #208]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #208]\n\t"
-        "# A[53] * B\n\t"
-        "ldr	r8, [%[a], #212]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #212]\n\t"
-        "# A[54] * B\n\t"
-        "ldr	r8, [%[a], #216]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #216]\n\t"
-        "# A[55] * B\n\t"
-        "ldr	r8, [%[a], #220]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #220]\n\t"
-        "# A[56] * B\n\t"
-        "ldr	r8, [%[a], #224]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #224]\n\t"
-        "# A[57] * B\n\t"
-        "ldr	r8, [%[a], #228]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #228]\n\t"
-        "# A[58] * B\n\t"
-        "ldr	r8, [%[a], #232]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #232]\n\t"
-        "# A[59] * B\n\t"
-        "ldr	r8, [%[a], #236]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #236]\n\t"
-        "# A[60] * B\n\t"
-        "ldr	r8, [%[a], #240]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #240]\n\t"
-        "# A[61] * B\n\t"
-        "ldr	r8, [%[a], #244]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #244]\n\t"
-        "# A[62] * B\n\t"
-        "ldr	r8, [%[a], #248]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #248]\n\t"
-        "# A[63] * B\n\t"
-        "ldr	r8, [%[a], #252]\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adc	r4, r4, r7\n\t"
-        "str	r3, [%[r], #252]\n\t"
-        "str	r4, [%[r], #256]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
-    );
-#endif
-}
-
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  *
  * d1   The high order half of the number to divide.
@@ -5974,13 +6189,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_64(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<64; i++)
+    for (i=0; i<64; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -6004,11 +6220,12 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int32_t sp_2048_cmp_64(sp_digit* a, sp_digit* b)
+static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = -1;
     sp_digit one = 1;
 
+
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	r7, #0\n\t"
@@ -6020,15 +6237,18 @@
         "and	r4, r4, r3\n\t"
         "and	r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
-        "movne	r3, r7\n\t"
-        "sub	r6, r6, #4\n\t"
-        "bcc	1b\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #else
     __asm__ __volatile__ (
@@ -6039,517 +6259,709 @@
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #248]\n\t"
         "ldr		r5, [%[b], #248]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #244]\n\t"
         "ldr		r5, [%[b], #244]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #240]\n\t"
         "ldr		r5, [%[b], #240]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #236]\n\t"
         "ldr		r5, [%[b], #236]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #232]\n\t"
         "ldr		r5, [%[b], #232]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #228]\n\t"
         "ldr		r5, [%[b], #228]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #224]\n\t"
         "ldr		r5, [%[b], #224]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #220]\n\t"
         "ldr		r5, [%[b], #220]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #216]\n\t"
         "ldr		r5, [%[b], #216]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #212]\n\t"
         "ldr		r5, [%[b], #212]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #208]\n\t"
         "ldr		r5, [%[b], #208]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #204]\n\t"
         "ldr		r5, [%[b], #204]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #200]\n\t"
         "ldr		r5, [%[b], #200]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #196]\n\t"
         "ldr		r5, [%[b], #196]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #192]\n\t"
         "ldr		r5, [%[b], #192]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #188]\n\t"
         "ldr		r5, [%[b], #188]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #184]\n\t"
         "ldr		r5, [%[b], #184]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #180]\n\t"
         "ldr		r5, [%[b], #180]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #176]\n\t"
         "ldr		r5, [%[b], #176]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #172]\n\t"
         "ldr		r5, [%[b], #172]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #168]\n\t"
         "ldr		r5, [%[b], #168]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #164]\n\t"
         "ldr		r5, [%[b], #164]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #160]\n\t"
         "ldr		r5, [%[b], #160]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #156]\n\t"
         "ldr		r5, [%[b], #156]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #152]\n\t"
         "ldr		r5, [%[b], #152]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #148]\n\t"
         "ldr		r5, [%[b], #148]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #144]\n\t"
         "ldr		r5, [%[b], #144]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #140]\n\t"
         "ldr		r5, [%[b], #140]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #136]\n\t"
         "ldr		r5, [%[b], #136]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #132]\n\t"
         "ldr		r5, [%[b], #132]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #128]\n\t"
         "ldr		r5, [%[b], #128]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #124]\n\t"
         "ldr		r5, [%[b], #124]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #120]\n\t"
         "ldr		r5, [%[b], #120]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #116]\n\t"
         "ldr		r5, [%[b], #116]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #112]\n\t"
         "ldr		r5, [%[b], #112]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #108]\n\t"
         "ldr		r5, [%[b], #108]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #104]\n\t"
         "ldr		r5, [%[b], #104]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #100]\n\t"
         "ldr		r5, [%[b], #100]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #96]\n\t"
         "ldr		r5, [%[b], #96]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #92]\n\t"
         "ldr		r5, [%[b], #92]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #88]\n\t"
         "ldr		r5, [%[b], #88]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #84]\n\t"
         "ldr		r5, [%[b], #84]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #80]\n\t"
         "ldr		r5, [%[b], #80]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #76]\n\t"
         "ldr		r5, [%[b], #76]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #72]\n\t"
         "ldr		r5, [%[b], #72]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #68]\n\t"
         "ldr		r5, [%[b], #68]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #64]\n\t"
         "ldr		r5, [%[b], #64]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #60]\n\t"
         "ldr		r5, [%[b], #60]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #56]\n\t"
         "ldr		r5, [%[b], #56]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #52]\n\t"
         "ldr		r5, [%[b], #52]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #48]\n\t"
         "ldr		r5, [%[b], #48]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #44]\n\t"
         "ldr		r5, [%[b], #44]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #40]\n\t"
         "ldr		r5, [%[b], #40]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #36]\n\t"
         "ldr		r5, [%[b], #36]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #32]\n\t"
         "ldr		r5, [%[b], #32]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #28]\n\t"
         "ldr		r5, [%[b], #28]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #24]\n\t"
         "ldr		r5, [%[b], #24]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #20]\n\t"
         "ldr		r5, [%[b], #20]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #16]\n\t"
         "ldr		r5, [%[b], #16]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #12]\n\t"
         "ldr		r5, [%[b], #12]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #8]\n\t"
         "ldr		r5, [%[b], #8]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #4]\n\t"
         "ldr		r5, [%[b], #4]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #0]\n\t"
         "ldr		r5, [%[b], #0]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #endif
 
@@ -6565,7 +6977,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_div_64(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[128], t2[65];
@@ -6574,6 +6986,7 @@
 
     (void)m;
 
+
     div = d[63];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
     for (i=63; i>=0; i--) {
@@ -6589,7 +7002,7 @@
     }
 
     r1 = sp_2048_cmp_64(t1, d) >= 0;
-    sp_2048_cond_sub_64(r, t1, t2, (sp_digit)0 - r1);
+    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -6601,11 +7014,12 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_mod_64(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_64(a, m, NULL, r);
 }
 
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
@@ -6615,7 +7029,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_div_64_cond(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[128], t2[65];
@@ -6624,6 +7038,7 @@
 
     (void)m;
 
+
     div = d[63];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
     for (i=63; i>=0; i--) {
@@ -6640,7 +7055,7 @@
     }
 
     r1 = sp_2048_cmp_64(t1, d) >= 0;
-    sp_2048_cond_sub_64(r, t1, t2, (sp_digit)0 - r1);
+    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -6652,12 +7067,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_64_cond(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 #ifdef WOLFSSL_SP_SMALL
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
@@ -6668,8 +7084,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_64(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][128];
@@ -6688,27 +7104,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 128;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_64(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 64);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
             err = sp_2048_mod_64(t[1] + 64, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_64(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
@@ -6734,9 +7151,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 28;
-        n <<= 4;
-        c = 28;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -6767,7 +7191,7 @@
             sp_2048_mont_mul_64(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
         sp_2048_mont_reduce_64(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
@@ -6775,8 +7199,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -6791,8 +7216,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_64(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][128];
@@ -6811,27 +7236,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 128;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_64(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 64);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
             err = sp_2048_mod_64(t[1] + 64, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_64(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
@@ -6873,9 +7299,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 27;
-        n <<= 5;
-        c = 27;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -6906,13 +7339,8 @@
 
             sp_2048_mont_mul_64(r, r, t[y], m, mp);
         }
-        y = e[0] & 0x7;
-        sp_2048_mont_sqr_64(r, r, m, mp);
-        sp_2048_mont_sqr_64(r, r, m, mp);
-        sp_2048_mont_sqr_64(r, r, m, mp);
-        sp_2048_mont_mul_64(r, r, t[y], m, mp);
-
-        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
         sp_2048_mont_reduce_64(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
@@ -6920,14 +7348,15 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
 }
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
 
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
@@ -6945,15 +7374,15 @@
 int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[128], md[64], rd[128];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128], m[64], r[128];
 #else
     sp_digit* d = NULL;
-#endif
     sp_digit* a;
-    sp_digit *ah;
     sp_digit* m;
     sp_digit* r;
+#endif
+    sp_digit *ah;
     sp_digit e[1];
     int err = MP_OKAY;
 
@@ -6963,10 +7392,10 @@
                                                      mp_count_bits(mm) != 2048))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -6975,26 +7404,24 @@
         a = d;
         r = a + 64 * 2;
         m = r + 64 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
         ah = a + 64;
-    }
-#else
-    a = ad;
-    m = md;
-    r = rd;
-    ah = a + 64;
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_from_bin(ah, 64, in, inLen);
 #if DIGIT_BIT >= 32
         e[0] = em->dp[0];
 #else
         e[0] = em->dp[0];
-        if (em->used > 1)
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_from_mp(m, 64, mm);
@@ -7020,25 +7447,30 @@
             err = sp_2048_mod_64_cond(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=31; i>=0; i--)
-                    if (e[0] >> i)
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
                         break;
+                    }
+                }
 
                 XMEMCPY(r, a, sizeof(sp_digit) * 64);
                 for (i--; i>=0; i--) {
                     sp_2048_mont_sqr_64(r, r, m, mp);
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_2048_mont_mul_64(r, r, a, m, mp);
+                    }
                 }
                 XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
                 sp_2048_mont_reduce_64(r, m, mp);
 
                 for (i = 63; i > 0; i--) {
-                    if (r[i] != m[i])
+                    if (r[i] != m[i]) {
                         break;
+                    }
                 }
-                if (r[i] >= m[i])
+                if (r[i] >= m[i]) {
                     sp_2048_sub_in_place_64(r, m);
+                }
             }
         }
     }
@@ -7048,14 +7480,279 @@
         *outLen = 256;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
 #endif
 
     return err;
 }
 
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 256U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+           err = MP_READ_E;
+        }
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 64;
+        m = a + 128;
+        r = a;
+
+        sp_2048_from_bin(a, 64, in, inLen);
+        sp_2048_from_mp(d, 64, dm);
+        sp_2048_from_mp(m, 64, mm);
+        err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 64);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r9, #0\n\t"
+        "mov	r8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	r4, [%[a], r8]\n\t"
+        "ldr	r5, [%[b], r8]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adc	%[c], r9, r9\n\t"
+        "str	r4, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, #128\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#else
+    __asm__ __volatile__ (
+
+        "mov	r9, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r6, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r6, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r6, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r7, [%[b], #52]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "str	r6, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r6, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r7, [%[b], #60]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "str	r6, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r6, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "str	r6, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r6, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r7, [%[b], #76]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "str	r6, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r6, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "str	r6, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r6, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r7, [%[b], #92]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r6, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r6, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r7, [%[b], #100]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "str	r6, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r6, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r7, [%[b], #108]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "str	r6, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r6, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r7, [%[b], #116]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "str	r6, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r6, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r7, [%[b], #124]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "str	r6, [%[r], #124]\n\t"
+        "adc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#endif /* WOLFSSL_SP_SMALL */
+
+    return c;
+}
+
 /* RSA private key operation.
  *
  * in      Array of bytes representing the number to exponentiate, base.
@@ -7077,23 +7774,22 @@
     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[64 * 2];
-    sp_digit pd[32], qd[32], dpd[32];
-    sp_digit tmpad[64], tmpbd[64];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[64 * 2];
+    sp_digit p[32], q[32], dp[32];
+    sp_digit tmpa[64], tmpb[64];
 #else
     sp_digit* t = NULL;
-#endif
     sp_digit* a;
     sp_digit* p;
     sp_digit* q;
     sp_digit* dp;
-    sp_digit* dq;
-    sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
+#endif
     sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
     sp_digit c;
     int err = MP_OKAY;
 
@@ -7105,10 +7801,10 @@
     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (t == NULL)
             err = MEMORY_E;
     }
@@ -7120,20 +7816,16 @@
         tmpa = qi + 32;
         tmpb = tmpa + 64;
 
-        tmp = t;
-        r = tmp + 64;
-    }
-#else
-    r = a = ad;
-    p = pd;
-    q = qd;
-    qi = dq = dp = dpd;
-    tmpa = tmpad;
-    tmpb = tmpbd;
-    tmp = a + 64;
-#endif
-
-    if (err == MP_OKAY) {
+        r = t + 64;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
         sp_2048_from_bin(a, 64, in, inLen);
         sp_2048_from_mp(p, 32, pm);
         sp_2048_from_mp(q, 32, qm);
@@ -7148,8 +7840,8 @@
 
     if (err == MP_OKAY) {
         c = sp_2048_sub_in_place_32(tmpa, tmpb);
-        sp_2048_mask_32(tmp, p, c);
-        sp_2048_add_32(tmpa, tmpa, tmp);
+        c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
+        sp_2048_cond_add_32(tmpa, tmpa, p, c);
 
         sp_2048_from_mp(qi, 32, qim);
         sp_2048_mul_32(tmpa, tmpa, qi);
@@ -7165,34 +7857,37 @@
         *outLen = 256;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-#else
-    XMEMSET(tmpad, 0, sizeof(tmpad));
-    XMEMSET(tmpbd, 0, sizeof(tmpbd));
-    XMEMSET(pd, 0, sizeof(pd));
-    XMEMSET(qd, 0, sizeof(qd));
-    XMEMSET(dpd, 0, sizeof(dpd));
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
 #endif
 
     return err;
 }
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_2048_to_mp(sp_digit* a, mp_int* r)
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 32
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
         r->used = 64;
@@ -7202,14 +7897,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 64; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 32) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 32 - s;
         }
@@ -7222,15 +7922,16 @@
         for (i = 0; i < 64; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 32 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 32
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 32 - s;
             }
-            else
+            else {
                 s += 32;
+            }
         }
         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -7246,7 +7947,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -7256,12 +7957,23 @@
     sp_digit* r = b;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_2048_from_mp(b, 64, base);
         sp_2048_from_mp(e, 64, exp);
         sp_2048_from_mp(m, 64, mod);
@@ -7278,6 +7990,514 @@
     return err;
 }
 
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #31\n\t"
+        "sub	r6, r6, %[n]\n\t"
+        "ldr	r3, [%[a], #252]\n\t"
+        "lsr	r4, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r4, r4, r6\n\t"
+        "ldr	r2, [%[a], #248]\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #244]\n\t"
+        "str	r3, [%[r], #252]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #240]\n\t"
+        "str	r2, [%[r], #248]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #236]\n\t"
+        "str	r4, [%[r], #244]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #232]\n\t"
+        "str	r3, [%[r], #240]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #228]\n\t"
+        "str	r2, [%[r], #236]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #224]\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #220]\n\t"
+        "str	r3, [%[r], #228]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #216]\n\t"
+        "str	r2, [%[r], #224]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #212]\n\t"
+        "str	r4, [%[r], #220]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #208]\n\t"
+        "str	r3, [%[r], #216]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #204]\n\t"
+        "str	r2, [%[r], #212]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #200]\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #196]\n\t"
+        "str	r3, [%[r], #204]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #192]\n\t"
+        "str	r2, [%[r], #200]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #188]\n\t"
+        "str	r4, [%[r], #196]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #184]\n\t"
+        "str	r3, [%[r], #192]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #180]\n\t"
+        "str	r2, [%[r], #188]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #176]\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #172]\n\t"
+        "str	r3, [%[r], #180]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #168]\n\t"
+        "str	r2, [%[r], #176]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #164]\n\t"
+        "str	r4, [%[r], #172]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #160]\n\t"
+        "str	r3, [%[r], #168]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #156]\n\t"
+        "str	r2, [%[r], #164]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #152]\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #148]\n\t"
+        "str	r3, [%[r], #156]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #144]\n\t"
+        "str	r2, [%[r], #152]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #140]\n\t"
+        "str	r4, [%[r], #148]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #136]\n\t"
+        "str	r3, [%[r], #144]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #132]\n\t"
+        "str	r2, [%[r], #140]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #128]\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "str	r3, [%[r], #132]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "str	r2, [%[r], #128]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #116]\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "str	r3, [%[r], #120]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #108]\n\t"
+        "str	r2, [%[r], #116]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #104]\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "str	r3, [%[r], #108]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "str	r2, [%[r], #104]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #92]\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "str	r3, [%[r], #96]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #84]\n\t"
+        "str	r2, [%[r], #92]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #80]\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "str	r2, [%[r], #80]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #68]\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "str	r3, [%[r], #72]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "str	r2, [%[r], #68]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "str	r3, [%[r]]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[128];
+    sp_digit td[65];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 128;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_2048_lshift_64(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_lshift_64(r, r, y);
+            sp_2048_mul_d_64(tmp, norm, r[64]);
+            r[64] = 0;
+            o = sp_2048_add_64(r, r, tmp);
+            sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
  * base     Base.
@@ -7287,7 +8507,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 256 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
@@ -7298,17 +8518,34 @@
     sp_digit* r = b;
     word32 i;
 
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expLen > 256) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_2048_from_mp(b, 64, base);
         sp_2048_from_bin(e, 64, exp, expLen);
         sp_2048_from_mp(m, 64, mod);
 
-        err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+            err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
     }
 
     if (err == MP_OKAY) {
@@ -7325,91 +8562,160 @@
 
     return err;
 }
-
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_2048 */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[64], e[32], m[32];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 32, base);
+        sp_2048_from_mp(e, 32, exp);
+        sp_2048_from_mp(m, 32, mod);
+
+        err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 32, 0, sizeof(*r) * 32U);
+        err = sp_2048_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
 
 #ifndef WOLFSSL_SP_NO_3072
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 24) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
             r[j] &= 0xffffffff;
-            s = 32 - s;
-            if (j + 1 >= max)
+            s = 32U - s;
+            if (j + 1 >= size) {
                 break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 32
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 32
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0xffffffff;
-        s = 32 - s;
-        if (j + 1 >= max)
+        s = 32U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 32 <= DIGIT_BIT) {
-            s += 32;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
             r[j] &= 0xffffffff;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 32) {
             r[j] &= 0xffffffff;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
+            }
             s = 32 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -7420,16 +8726,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 384
  *
  * r  A single precision integer.
@@ -7443,19 +8751,26 @@
     a[j] = 0;
     for (i=0; i<96 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
                 break;
+            }
         }
         s = 8 - (b - 32);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -7466,18 +8781,17 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_3072_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
-{
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
+static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #48\n\t"
         "mov	r10, #0\n\t"
         "#  A[0] * B[0]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #0]\n\t"
         "umull	r3, r4, r8, r9\n\t"
         "mov	r5, #0\n\t"
-        "str	r3, [%[tmp]]\n\t"
+        "str	r3, [sp]\n\t"
         "#  A[0] * B[1]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #4]\n\t"
@@ -7492,7 +8806,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #4]\n\t"
+        "str	r4, [sp, #4]\n\t"
         "#  A[0] * B[2]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #8]\n\t"
@@ -7514,7 +8828,7 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[tmp], #8]\n\t"
+        "str	r5, [sp, #8]\n\t"
         "#  A[0] * B[3]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #12]\n\t"
@@ -7543,7 +8857,7 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[tmp], #12]\n\t"
+        "str	r3, [sp, #12]\n\t"
         "#  A[0] * B[4]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #16]\n\t"
@@ -7579,7 +8893,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #16]\n\t"
+        "str	r4, [sp, #16]\n\t"
         "#  A[0] * B[5]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #20]\n\t"
@@ -7622,7 +8936,7 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[tmp], #20]\n\t"
+        "str	r5, [sp, #20]\n\t"
         "#  A[0] * B[6]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7672,7 +8986,7 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[tmp], #24]\n\t"
+        "str	r3, [sp, #24]\n\t"
         "#  A[0] * B[7]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #28]\n\t"
@@ -7729,14 +9043,21 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #28]\n\t"
-        "#  A[1] * B[7]\n\t"
-        "ldr	r8, [%[a], #4]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
+        "str	r4, [sp, #28]\n\t"
+        "#  A[0] * B[8]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[7]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
         "#  A[2] * B[6]\n\t"
         "ldr	r8, [%[a], #8]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7779,14 +9100,35 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[r], #32]\n\t"
-        "#  A[2] * B[7]\n\t"
-        "ldr	r8, [%[a], #8]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
+        "#  A[8] * B[0]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #32]\n\t"
+        "#  A[0] * B[9]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[8]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[7]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
         "#  A[3] * B[6]\n\t"
         "ldr	r8, [%[a], #12]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7822,14 +9164,49 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[r], #36]\n\t"
-        "#  A[3] * B[7]\n\t"
-        "ldr	r8, [%[a], #12]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
+        "#  A[8] * B[1]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[0]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #36]\n\t"
+        "#  A[0] * B[10]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[9]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[8]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[7]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
         "#  A[4] * B[6]\n\t"
         "ldr	r8, [%[a], #16]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7858,14 +9235,63 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[r], #40]\n\t"
-        "#  A[4] * B[7]\n\t"
-        "ldr	r8, [%[a], #16]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
+        "#  A[8] * B[2]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[1]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[0]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #40]\n\t"
+        "#  A[0] * B[11]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[10]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[9]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[8]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[7]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
         "#  A[5] * B[6]\n\t"
         "ldr	r8, [%[a], #20]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7887,14 +9313,70 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[r], #44]\n\t"
-        "#  A[5] * B[7]\n\t"
-        "ldr	r8, [%[a], #20]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
+        "#  A[8] * B[3]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[2]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[1]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[0]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #44]\n\t"
+        "#  A[1] * B[11]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r10, r10\n\t"
+        "#  A[2] * B[10]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[9]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[8]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[7]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
         "#  A[6] * B[6]\n\t"
         "ldr	r8, [%[a], #24]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7909,14 +9391,70 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[4]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[3]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[2]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[1]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #48]\n\t"
-        "#  A[6] * B[7]\n\t"
-        "ldr	r8, [%[a], #24]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
+        "#  A[2] * B[11]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r10, r10\n\t"
+        "#  A[3] * B[10]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[9]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[8]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[7]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
         "#  A[7] * B[6]\n\t"
         "ldr	r8, [%[a], #28]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -7924,21 +9462,388 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[5]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[4]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[3]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[2]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #52]\n\t"
+        "#  A[3] * B[11]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[4] * B[10]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[9]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[8]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
         "#  A[7] * B[7]\n\t"
         "ldr	r8, [%[a], #28]\n\t"
         "ldr	r9, [%[b], #28]\n\t"
         "umull	r6, r7, r8, r9\n\t"
         "adds	r5, r5, r6\n\t"
-        "adc	r3, r3, r7\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[6]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[5]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[4]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[3]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #56]\n\t"
+        "#  A[4] * B[11]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[5] * B[10]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[9]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[8]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[7]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[6]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[5]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[4]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #60]\n\t"
+        "#  A[5] * B[11]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[6] * B[10]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[9]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[8]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[7]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[6]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[5]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "#  A[6] * B[11]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[7] * B[10]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[9]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[8]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[7]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[6]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "#  A[7] * B[11]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[8] * B[10]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[9]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[8]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[7]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #72]\n\t"
+        "#  A[8] * B[11]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[9] * B[10]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[9]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[8]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "#  A[9] * B[11]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[10] * B[10]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[9]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #80]\n\t"
+        "#  A[10] * B[11]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[11] * B[10]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "#  A[11] * B[11]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r7\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r5, [%[r], #92]\n\t"
+        "ldr	r3, [sp, #0]\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r5, [sp, #8]\n\t"
+        "ldr	r6, [sp, #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [sp, #16]\n\t"
+        "ldr	r4, [sp, #20]\n\t"
+        "ldr	r5, [sp, #24]\n\t"
+        "ldr	r6, [sp, #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "ldr	r4, [sp, #36]\n\t"
+        "ldr	r5, [sp, #40]\n\t"
+        "ldr	r6, [sp, #44]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "str	r5, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "add	sp, sp, #48\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -7946,17 +9851,16 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_3072_sqr_8(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
+static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #48\n\t"
         "mov	r14, #0\n\t"
         "#  A[0] * A[0]\n\t"
         "ldr	r10, [%[a], #0]\n\t"
         "umull	r8, r3, r10, r10\n\t"
         "mov	r4, #0\n\t"
-        "str	r8, [%[tmp]]\n\t"
+        "str	r8, [sp]\n\t"
         "#  A[0] * A[1]\n\t"
         "ldr	r10, [%[a], #4]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -7967,7 +9871,7 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "str	r3, [%[tmp], #4]\n\t"
+        "str	r3, [sp, #4]\n\t"
         "#  A[0] * A[2]\n\t"
         "ldr	r10, [%[a], #8]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -7984,7 +9888,7 @@
         "adds	r4, r4, r8\n\t"
         "adcs	r2, r2, r9\n\t"
         "adc	r3, r3, r14\n\t"
-        "str	r4, [%[tmp], #8]\n\t"
+        "str	r4, [sp, #8]\n\t"
         "#  A[0] * A[3]\n\t"
         "ldr	r10, [%[a], #12]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -8005,7 +9909,7 @@
         "adds	r2, r2, r8\n\t"
         "adcs	r3, r3, r9\n\t"
         "adc	r4, r4, r14\n\t"
-        "str	r2, [%[tmp], #12]\n\t"
+        "str	r2, [sp, #12]\n\t"
         "#  A[0] * A[4]\n\t"
         "ldr	r10, [%[a], #16]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -8032,7 +9936,7 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "str	r3, [%[tmp], #16]\n\t"
+        "str	r3, [sp, #16]\n\t"
         "#  A[0] * A[5]\n\t"
         "ldr	r10, [%[a], #20]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -8059,7 +9963,7 @@
         "adds	r4, r4, r5\n\t"
         "adcs	r2, r2, r6\n\t"
         "adc	r3, r3, r7\n\t"
-        "str	r4, [%[tmp], #20]\n\t"
+        "str	r4, [sp, #20]\n\t"
         "#  A[0] * A[6]\n\t"
         "ldr	r10, [%[a], #24]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -8092,7 +9996,7 @@
         "adds	r2, r2, r5\n\t"
         "adcs	r3, r3, r6\n\t"
         "adc	r4, r4, r7\n\t"
-        "str	r2, [%[tmp], #24]\n\t"
+        "str	r2, [sp, #24]\n\t"
         "#  A[0] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -8126,13 +10030,20 @@
         "adds	r3, r3, r5\n\t"
         "adcs	r4, r4, r6\n\t"
         "adc	r2, r2, r7\n\t"
-        "str	r3, [%[tmp], #28]\n\t"
+        "str	r3, [sp, #28]\n\t"
+        "#  A[0] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
         "#  A[1] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #4]\n\t"
-        "umull	r5, r6, r10, r8\n\t"
-        "mov	r3, #0\n\t"
-        "mov	r7, #0\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
         "#  A[2] * A[6]\n\t"
         "ldr	r10, [%[a], #24]\n\t"
         "ldr	r8, [%[a], #8]\n\t"
@@ -8159,13 +10070,27 @@
         "adds	r4, r4, r5\n\t"
         "adcs	r2, r2, r6\n\t"
         "adc	r3, r3, r7\n\t"
-        "str	r4, [%[r], #32]\n\t"
+        "str	r4, [sp, #32]\n\t"
+        "#  A[0] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
         "#  A[2] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #8]\n\t"
-        "umull	r5, r6, r10, r8\n\t"
-        "mov	r4, #0\n\t"
-        "mov	r7, #0\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
         "#  A[3] * A[6]\n\t"
         "ldr	r10, [%[a], #24]\n\t"
         "ldr	r8, [%[a], #12]\n\t"
@@ -8186,20 +10111,364 @@
         "adds	r2, r2, r5\n\t"
         "adcs	r3, r3, r6\n\t"
         "adc	r4, r4, r7\n\t"
-        "str	r2, [%[r], #36]\n\t"
+        "str	r2, [sp, #36]\n\t"
+        "#  A[0] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
         "#  A[3] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #12]\n\t"
         "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #40]\n\t"
+        "#  A[0] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #44]\n\t"
+        "#  A[1] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[2] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "#  A[2] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[3] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "#  A[3] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[4] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "#  A[4] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[5] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "#  A[5] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[6] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "#  A[6] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[7] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "#  A[7] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r14, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[8] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[9] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "str	r2, [%[r], #72]\n\t"
+        "#  A[8] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r14, r14\n\t"
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "#  A[4] * A[6]\n\t"
-        "ldr	r10, [%[a], #24]\n\t"
-        "ldr	r8, [%[a], #16]\n\t"
+        "#  A[9] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
         "umull	r8, r9, r10, r8\n\t"
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
@@ -8207,16 +10476,10 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "#  A[5] * A[5]\n\t"
-        "ldr	r10, [%[a], #20]\n\t"
-        "umull	r8, r9, r10, r10\n\t"
-        "adds	r3, r3, r8\n\t"
-        "adcs	r4, r4, r9\n\t"
-        "adc	r2, r2, r14\n\t"
-        "str	r3, [%[r], #40]\n\t"
-        "#  A[4] * A[7]\n\t"
-        "ldr	r10, [%[a], #28]\n\t"
-        "ldr	r8, [%[a], #16]\n\t"
+        "str	r3, [%[r], #76]\n\t"
+        "#  A[9] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
         "umull	r8, r9, r10, r8\n\t"
         "adds	r4, r4, r8\n\t"
         "adcs	r2, r2, r9\n\t"
@@ -8224,20 +10487,16 @@
         "adds	r4, r4, r8\n\t"
         "adcs	r2, r2, r9\n\t"
         "adc	r3, r3, r14\n\t"
-        "#  A[5] * A[6]\n\t"
-        "ldr	r10, [%[a], #24]\n\t"
-        "ldr	r8, [%[a], #20]\n\t"
-        "umull	r8, r9, r10, r8\n\t"
+        "#  A[10] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
         "adds	r4, r4, r8\n\t"
         "adcs	r2, r2, r9\n\t"
         "adc	r3, r3, r14\n\t"
-        "adds	r4, r4, r8\n\t"
-        "adcs	r2, r2, r9\n\t"
-        "adc	r3, r3, r14\n\t"
-        "str	r4, [%[r], #44]\n\t"
-        "#  A[5] * A[7]\n\t"
-        "ldr	r10, [%[a], #28]\n\t"
-        "ldr	r8, [%[a], #20]\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "#  A[10] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
         "umull	r8, r9, r10, r8\n\t"
         "adds	r2, r2, r8\n\t"
         "adcs	r3, r3, r9\n\t"
@@ -8245,37 +10504,43 @@
         "adds	r2, r2, r8\n\t"
         "adcs	r3, r3, r9\n\t"
         "adc	r4, r4, r14\n\t"
-        "#  A[6] * A[6]\n\t"
-        "ldr	r10, [%[a], #24]\n\t"
-        "umull	r8, r9, r10, r10\n\t"
-        "adds	r2, r2, r8\n\t"
-        "adcs	r3, r3, r9\n\t"
-        "adc	r4, r4, r14\n\t"
-        "str	r2, [%[r], #48]\n\t"
-        "#  A[6] * A[7]\n\t"
-        "ldr	r10, [%[a], #28]\n\t"
-        "ldr	r8, [%[a], #24]\n\t"
-        "umull	r8, r9, r10, r8\n\t"
-        "adds	r3, r3, r8\n\t"
-        "adcs	r4, r4, r9\n\t"
-        "adc	r2, r14, r14\n\t"
+        "str	r2, [%[r], #84]\n\t"
+        "#  A[11] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
         "adds	r3, r3, r8\n\t"
-        "adcs	r4, r4, r9\n\t"
-        "adc	r2, r2, r14\n\t"
-        "str	r3, [%[r], #52]\n\t"
-        "#  A[7] * A[7]\n\t"
-        "ldr	r10, [%[a], #28]\n\t"
-        "umull	r8, r9, r10, r10\n\t"
-        "adds	r4, r4, r8\n\t"
-        "adc	r2, r2, r9\n\t"
-        "str	r4, [%[r], #56]\n\t"
-        "str	r2, [%[r], #60]\n\t"
+        "adc	r4, r4, r9\n\t"
+        "str	r3, [%[r], #88]\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r2, [sp, #0]\n\t"
+        "ldr	r3, [sp, #4]\n\t"
+        "ldr	r4, [sp, #8]\n\t"
+        "ldr	r8, [sp, #12]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r8, [%[r], #12]\n\t"
+        "ldr	r2, [sp, #16]\n\t"
+        "ldr	r3, [sp, #20]\n\t"
+        "ldr	r4, [sp, #24]\n\t"
+        "ldr	r8, [sp, #28]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r8, [%[r], #28]\n\t"
+        "ldr	r2, [sp, #32]\n\t"
+        "ldr	r3, [sp, #36]\n\t"
+        "ldr	r4, [sp, #40]\n\t"
+        "ldr	r8, [sp, #44]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r8, [%[r], #44]\n\t"
+        "add	sp, sp, #48\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+        : [r] "r" (r), [a] "r" (a)
         : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Add b to a into r. (r = a + b)
@@ -8284,7 +10549,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static sp_digit sp_3072_add_8(sp_digit* r, const sp_digit* a,
+static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit c = 0;
@@ -8323,6 +10588,22 @@
         "str	r5, [%[r], #20]\n\t"
         "str	r6, [%[r], #24]\n\t"
         "str	r7, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[a], #36]\n\t"
+        "ldr	r6, [%[a], #40]\n\t"
+        "ldr	r7, [%[a], #44]\n\t"
+        "ldr	r8, [%[b], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "ldr	r10, [%[b], #40]\n\t"
+        "ldr	r14, [%[b], #44]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r5, [%[r], #36]\n\t"
+        "str	r6, [%[r], #40]\n\t"
+        "str	r7, [%[r], #44]\n\t"
         "adc	%[c], r12, r12\n\t"
         : [c] "+r" (c)
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
@@ -8337,7 +10618,7 @@
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
-static sp_digit sp_3072_sub_in_place_16(sp_digit* a, const sp_digit* b)
+static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
 {
     sp_digit c = 0;
 
@@ -8406,6 +10687,38 @@
         "str	r3, [%[a], #52]\n\t"
         "str	r4, [%[a], #56]\n\t"
         "str	r5, [%[a], #60]\n\t"
+        "ldr	r2, [%[a], #64]\n\t"
+        "ldr	r3, [%[a], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[a], #76]\n\t"
+        "ldr	r6, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "ldr	r8, [%[b], #72]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #64]\n\t"
+        "str	r3, [%[a], #68]\n\t"
+        "str	r4, [%[a], #72]\n\t"
+        "str	r5, [%[a], #76]\n\t"
+        "ldr	r2, [%[a], #80]\n\t"
+        "ldr	r3, [%[a], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[a], #92]\n\t"
+        "ldr	r6, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "ldr	r8, [%[b], #88]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #80]\n\t"
+        "str	r3, [%[a], #84]\n\t"
+        "str	r4, [%[a], #88]\n\t"
+        "str	r5, [%[a], #92]\n\t"
         "sbc	%[c], r9, r9\n\t"
         : [c] "+r" (c)
         : [a] "r" (a), [b] "r" (b)
@@ -8421,327 +10734,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static sp_digit sp_3072_add_16(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "mov	r12, #0\n\t"
-        "ldr	r4, [%[a], #0]\n\t"
-        "ldr	r5, [%[a], #4]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[a], #12]\n\t"
-        "ldr	r8, [%[b], #0]\n\t"
-        "ldr	r9, [%[b], #4]\n\t"
-        "ldr	r10, [%[b], #8]\n\t"
-        "ldr	r14, [%[b], #12]\n\t"
-        "adds	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adcs	r7, r7, r14\n\t"
-        "str	r4, [%[r], #0]\n\t"
-        "str	r5, [%[r], #4]\n\t"
-        "str	r6, [%[r], #8]\n\t"
-        "str	r7, [%[r], #12]\n\t"
-        "ldr	r4, [%[a], #16]\n\t"
-        "ldr	r5, [%[a], #20]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "ldr	r8, [%[b], #16]\n\t"
-        "ldr	r9, [%[b], #20]\n\t"
-        "ldr	r10, [%[b], #24]\n\t"
-        "ldr	r14, [%[b], #28]\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adcs	r7, r7, r14\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "str	r6, [%[r], #24]\n\t"
-        "str	r7, [%[r], #28]\n\t"
-        "ldr	r4, [%[a], #32]\n\t"
-        "ldr	r5, [%[a], #36]\n\t"
-        "ldr	r6, [%[a], #40]\n\t"
-        "ldr	r7, [%[a], #44]\n\t"
-        "ldr	r8, [%[b], #32]\n\t"
-        "ldr	r9, [%[b], #36]\n\t"
-        "ldr	r10, [%[b], #40]\n\t"
-        "ldr	r14, [%[b], #44]\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adcs	r7, r7, r14\n\t"
-        "str	r4, [%[r], #32]\n\t"
-        "str	r5, [%[r], #36]\n\t"
-        "str	r6, [%[r], #40]\n\t"
-        "str	r7, [%[r], #44]\n\t"
-        "ldr	r4, [%[a], #48]\n\t"
-        "ldr	r5, [%[a], #52]\n\t"
-        "ldr	r6, [%[a], #56]\n\t"
-        "ldr	r7, [%[a], #60]\n\t"
-        "ldr	r8, [%[b], #48]\n\t"
-        "ldr	r9, [%[b], #52]\n\t"
-        "ldr	r10, [%[b], #56]\n\t"
-        "ldr	r14, [%[b], #60]\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adcs	r7, r7, r14\n\t"
-        "str	r4, [%[r], #48]\n\t"
-        "str	r5, [%[r], #52]\n\t"
-        "str	r6, [%[r], #56]\n\t"
-        "str	r7, [%[r], #60]\n\t"
-        "adc	%[c], r12, r12\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
-    );
-
-    return c;
-}
-
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_3072_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<8; i++)
-        r[i] = a[i] & m;
-#else
-    r[0] = a[0] & m;
-    r[1] = a[1] & m;
-    r[2] = a[2] & m;
-    r[3] = a[3] & m;
-    r[4] = a[4] & m;
-    r[5] = a[5] & m;
-    r[6] = a[6] & m;
-    r[7] = a[7] & m;
-#endif
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-static void sp_3072_mul_16(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[16];
-    sp_digit a1[8];
-    sp_digit b1[8];
-    sp_digit z2[16];
-    sp_digit u, ca, cb;
-
-    ca = sp_3072_add_8(a1, a, &a[8]);
-    cb = sp_3072_add_8(b1, b, &b[8]);
-    u  = ca & cb;
-    sp_3072_mul_8(z1, a1, b1);
-    sp_3072_mul_8(z2, &a[8], &b[8]);
-    sp_3072_mul_8(z0, a, b);
-    sp_3072_mask_8(r + 16, a1, 0 - cb);
-    sp_3072_mask_8(b1, b1, 0 - ca);
-    u += sp_3072_add_8(r + 16, r + 16, b1);
-    u += sp_3072_sub_in_place_16(z1, z2);
-    u += sp_3072_sub_in_place_16(z1, z0);
-    u += sp_3072_add_16(r + 8, r + 8, z1);
-    r[24] = u;
-    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
-    sp_3072_add_16(r + 16, r + 16, z2);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-static void sp_3072_sqr_16(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z2[16];
-    sp_digit z1[16];
-    sp_digit a1[8];
-    sp_digit u;
-
-    u = sp_3072_add_8(a1, a, &a[8]);
-    sp_3072_sqr_8(z1, a1);
-    sp_3072_sqr_8(z2, &a[8]);
-    sp_3072_sqr_8(z0, a);
-    sp_3072_mask_8(r + 16, a1, 0 - u);
-    u += sp_3072_add_8(r + 16, r + 16, r + 16);
-    u += sp_3072_sub_in_place_16(z1, z2);
-    u += sp_3072_sub_in_place_16(z1, z0);
-    u += sp_3072_add_16(r + 8, r + 8, z1);
-    r[24] = u;
-    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
-    sp_3072_add_16(r + 16, r + 16, z2);
-}
-
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-static sp_digit sp_3072_sub_32(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "ldr	r3, [%[a], #0]\n\t"
-        "ldr	r4, [%[a], #4]\n\t"
-        "ldr	r5, [%[a], #8]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "ldr	r8, [%[b], #4]\n\t"
-        "ldr	r9, [%[b], #8]\n\t"
-        "ldr	r10, [%[b], #12]\n\t"
-        "subs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #0]\n\t"
-        "str	r4, [%[r], #4]\n\t"
-        "str	r5, [%[r], #8]\n\t"
-        "str	r6, [%[r], #12]\n\t"
-        "ldr	r3, [%[a], #16]\n\t"
-        "ldr	r4, [%[a], #20]\n\t"
-        "ldr	r5, [%[a], #24]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "ldr	r8, [%[b], #20]\n\t"
-        "ldr	r9, [%[b], #24]\n\t"
-        "ldr	r10, [%[b], #28]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #16]\n\t"
-        "str	r4, [%[r], #20]\n\t"
-        "str	r5, [%[r], #24]\n\t"
-        "str	r6, [%[r], #28]\n\t"
-        "ldr	r3, [%[a], #32]\n\t"
-        "ldr	r4, [%[a], #36]\n\t"
-        "ldr	r5, [%[a], #40]\n\t"
-        "ldr	r6, [%[a], #44]\n\t"
-        "ldr	r7, [%[b], #32]\n\t"
-        "ldr	r8, [%[b], #36]\n\t"
-        "ldr	r9, [%[b], #40]\n\t"
-        "ldr	r10, [%[b], #44]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #32]\n\t"
-        "str	r4, [%[r], #36]\n\t"
-        "str	r5, [%[r], #40]\n\t"
-        "str	r6, [%[r], #44]\n\t"
-        "ldr	r3, [%[a], #48]\n\t"
-        "ldr	r4, [%[a], #52]\n\t"
-        "ldr	r5, [%[a], #56]\n\t"
-        "ldr	r6, [%[a], #60]\n\t"
-        "ldr	r7, [%[b], #48]\n\t"
-        "ldr	r8, [%[b], #52]\n\t"
-        "ldr	r9, [%[b], #56]\n\t"
-        "ldr	r10, [%[b], #60]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #48]\n\t"
-        "str	r4, [%[r], #52]\n\t"
-        "str	r5, [%[r], #56]\n\t"
-        "str	r6, [%[r], #60]\n\t"
-        "ldr	r3, [%[a], #64]\n\t"
-        "ldr	r4, [%[a], #68]\n\t"
-        "ldr	r5, [%[a], #72]\n\t"
-        "ldr	r6, [%[a], #76]\n\t"
-        "ldr	r7, [%[b], #64]\n\t"
-        "ldr	r8, [%[b], #68]\n\t"
-        "ldr	r9, [%[b], #72]\n\t"
-        "ldr	r10, [%[b], #76]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #64]\n\t"
-        "str	r4, [%[r], #68]\n\t"
-        "str	r5, [%[r], #72]\n\t"
-        "str	r6, [%[r], #76]\n\t"
-        "ldr	r3, [%[a], #80]\n\t"
-        "ldr	r4, [%[a], #84]\n\t"
-        "ldr	r5, [%[a], #88]\n\t"
-        "ldr	r6, [%[a], #92]\n\t"
-        "ldr	r7, [%[b], #80]\n\t"
-        "ldr	r8, [%[b], #84]\n\t"
-        "ldr	r9, [%[b], #88]\n\t"
-        "ldr	r10, [%[b], #92]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #80]\n\t"
-        "str	r4, [%[r], #84]\n\t"
-        "str	r5, [%[r], #88]\n\t"
-        "str	r6, [%[r], #92]\n\t"
-        "ldr	r3, [%[a], #96]\n\t"
-        "ldr	r4, [%[a], #100]\n\t"
-        "ldr	r5, [%[a], #104]\n\t"
-        "ldr	r6, [%[a], #108]\n\t"
-        "ldr	r7, [%[b], #96]\n\t"
-        "ldr	r8, [%[b], #100]\n\t"
-        "ldr	r9, [%[b], #104]\n\t"
-        "ldr	r10, [%[b], #108]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #96]\n\t"
-        "str	r4, [%[r], #100]\n\t"
-        "str	r5, [%[r], #104]\n\t"
-        "str	r6, [%[r], #108]\n\t"
-        "ldr	r3, [%[a], #112]\n\t"
-        "ldr	r4, [%[a], #116]\n\t"
-        "ldr	r5, [%[a], #120]\n\t"
-        "ldr	r6, [%[a], #124]\n\t"
-        "ldr	r7, [%[b], #112]\n\t"
-        "ldr	r8, [%[b], #116]\n\t"
-        "ldr	r9, [%[b], #120]\n\t"
-        "ldr	r10, [%[b], #124]\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "sbcs	r6, r6, r10\n\t"
-        "str	r3, [%[r], #112]\n\t"
-        "str	r4, [%[r], #116]\n\t"
-        "str	r5, [%[r], #120]\n\t"
-        "str	r6, [%[r], #124]\n\t"
-        "sbc	%[c], %[c], #0\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
-    );
-
-    return c;
-}
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-static sp_digit sp_3072_add_32(sp_digit* r, const sp_digit* a,
+static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit c = 0;
@@ -8844,38 +10837,6 @@
         "str	r5, [%[r], #84]\n\t"
         "str	r6, [%[r], #88]\n\t"
         "str	r7, [%[r], #92]\n\t"
-        "ldr	r4, [%[a], #96]\n\t"
-        "ldr	r5, [%[a], #100]\n\t"
-        "ldr	r6, [%[a], #104]\n\t"
-        "ldr	r7, [%[a], #108]\n\t"
-        "ldr	r8, [%[b], #96]\n\t"
-        "ldr	r9, [%[b], #100]\n\t"
-        "ldr	r10, [%[b], #104]\n\t"
-        "ldr	r14, [%[b], #108]\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adcs	r7, r7, r14\n\t"
-        "str	r4, [%[r], #96]\n\t"
-        "str	r5, [%[r], #100]\n\t"
-        "str	r6, [%[r], #104]\n\t"
-        "str	r7, [%[r], #108]\n\t"
-        "ldr	r4, [%[a], #112]\n\t"
-        "ldr	r5, [%[a], #116]\n\t"
-        "ldr	r6, [%[a], #120]\n\t"
-        "ldr	r7, [%[a], #124]\n\t"
-        "ldr	r8, [%[b], #112]\n\t"
-        "ldr	r9, [%[b], #116]\n\t"
-        "ldr	r10, [%[b], #120]\n\t"
-        "ldr	r14, [%[b], #124]\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adcs	r7, r7, r14\n\t"
-        "str	r4, [%[r], #112]\n\t"
-        "str	r5, [%[r], #116]\n\t"
-        "str	r6, [%[r], #120]\n\t"
-        "str	r7, [%[r], #124]\n\t"
         "adc	%[c], r12, r12\n\t"
         : [c] "+r" (c)
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
@@ -8885,96 +10846,305 @@
     return c;
 }
 
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+    r[8] = a[8] & m;
+    r[9] = a[9] & m;
+    r[10] = a[10] & m;
+    r[11] = a[11] & m;
+#endif
+}
+
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit p0[32];
-    sp_digit p1[32];
-    sp_digit p2[32];
-    sp_digit p3[32];
-    sp_digit p4[32];
-    sp_digit p5[32];
-    sp_digit t0[32];
-    sp_digit t1[32];
-    sp_digit t2[32];
-    sp_digit a0[16];
-    sp_digit a1[16];
-    sp_digit a2[16];
-    sp_digit b0[16];
-    sp_digit b1[16];
-    sp_digit b2[16];
-    sp_3072_add_16(a0, a, &a[16]);
-    sp_3072_add_16(b0, b, &b[16]);
-    sp_3072_add_16(a1, &a[16], &a[32]);
-    sp_3072_add_16(b1, &b[16], &b[32]);
-    sp_3072_add_16(a2, a0, &a[32]);
-    sp_3072_add_16(b2, b0, &b[32]);
-    sp_3072_mul_16(p0, a, b);
-    sp_3072_mul_16(p2, &a[16], &b[16]);
-    sp_3072_mul_16(p4, &a[32], &b[32]);
-    sp_3072_mul_16(p1, a0, b0);
-    sp_3072_mul_16(p3, a1, b1);
-    sp_3072_mul_16(p5, a2, b2);
-    XMEMSET(r, 0, sizeof(*r)*2*48);
-    sp_3072_sub_32(t0, p3, p2);
-    sp_3072_sub_32(t1, p1, p2);
-    sp_3072_sub_32(t2, p5, t0);
-    sp_3072_sub_32(t2, t2, t1);
-    sp_3072_sub_32(t0, t0, p4);
-    sp_3072_sub_32(t1, t1, p0);
-    sp_3072_add_32(r, r, p0);
-    sp_3072_add_32(&r[16], &r[16], t1);
-    sp_3072_add_32(&r[32], &r[32], t2);
-    sp_3072_add_32(&r[48], &r[48], t0);
-    sp_3072_add_32(&r[64], &r[64], p4);
-}
-
-/* Square a into r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
-{
-    sp_digit p0[32];
-    sp_digit p1[32];
-    sp_digit p2[32];
-    sp_digit p3[32];
-    sp_digit p4[32];
-    sp_digit p5[32];
-    sp_digit t0[32];
-    sp_digit t1[32];
-    sp_digit t2[32];
-    sp_digit a0[16];
-    sp_digit a1[16];
-    sp_digit a2[16];
-    sp_3072_add_16(a0, a, &a[16]);
-    sp_3072_add_16(a1, &a[16], &a[32]);
-    sp_3072_add_16(a2, a0, &a[32]);
-    sp_3072_sqr_16(p0, a);
-    sp_3072_sqr_16(p2, &a[16]);
-    sp_3072_sqr_16(p4, &a[32]);
-    sp_3072_sqr_16(p1, a0);
-    sp_3072_sqr_16(p3, a1);
-    sp_3072_sqr_16(p5, a2);
-    XMEMSET(r, 0, sizeof(*r)*2*48);
-    sp_3072_sub_32(t0, p3, p2);
-    sp_3072_sub_32(t1, p1, p2);
-    sp_3072_sub_32(t2, p5, t0);
-    sp_3072_sub_32(t2, t2, t1);
-    sp_3072_sub_32(t0, t0, p4);
-    sp_3072_sub_32(t1, t1, p0);
-    sp_3072_add_32(r, r, p0);
-    sp_3072_add_32(&r[16], &r[16], t1);
-    sp_3072_add_32(&r[32], &r[32], t2);
-    sp_3072_add_32(&r[48], &r[48], t0);
-    sp_3072_add_32(&r[64], &r[64], p4);
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[24];
+    sp_digit a1[12];
+    sp_digit b1[12];
+    sp_digit z2[24];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_12(a1, a, &a[12]);
+    cb = sp_3072_add_12(b1, b, &b[12]);
+    u  = ca & cb;
+    sp_3072_mul_12(z1, a1, b1);
+    sp_3072_mul_12(z2, &a[12], &b[12]);
+    sp_3072_mul_12(z0, a, b);
+    sp_3072_mask_12(r + 24, a1, 0 - cb);
+    sp_3072_mask_12(b1, b1, 0 - ca);
+    u += sp_3072_add_12(r + 24, r + 24, b1);
+    u += sp_3072_sub_in_place_24(z1, z2);
+    u += sp_3072_sub_in_place_24(z1, z0);
+    u += sp_3072_add_24(r + 12, r + 12, z1);
+    r[36] = u;
+    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+    (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[24];
+    sp_digit z1[24];
+    sp_digit a1[12];
+    sp_digit u;
+
+    u = sp_3072_add_12(a1, a, &a[12]);
+    sp_3072_sqr_12(z1, a1);
+    sp_3072_sqr_12(z2, &a[12]);
+    sp_3072_sqr_12(z0, a);
+    sp_3072_mask_12(r + 24, a1, 0 - u);
+    u += sp_3072_add_12(r + 24, r + 24, r + 24);
+    u += sp_3072_sub_in_place_24(z1, z2);
+    u += sp_3072_sub_in_place_24(z1, z0);
+    u += sp_3072_add_24(r + 12, r + 12, z1);
+    r[36] = u;
+    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+    (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer and result.
+ * b  A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a], #0]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "subs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #0]\n\t"
+        "str	r3, [%[a], #4]\n\t"
+        "str	r4, [%[a], #8]\n\t"
+        "str	r5, [%[a], #12]\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #16]\n\t"
+        "str	r3, [%[a], #20]\n\t"
+        "str	r4, [%[a], #24]\n\t"
+        "str	r5, [%[a], #28]\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[a], #44]\n\t"
+        "ldr	r6, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "ldr	r8, [%[b], #40]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #32]\n\t"
+        "str	r3, [%[a], #36]\n\t"
+        "str	r4, [%[a], #40]\n\t"
+        "str	r5, [%[a], #44]\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[a], #60]\n\t"
+        "ldr	r6, [%[b], #48]\n\t"
+        "ldr	r7, [%[b], #52]\n\t"
+        "ldr	r8, [%[b], #56]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #48]\n\t"
+        "str	r3, [%[a], #52]\n\t"
+        "str	r4, [%[a], #56]\n\t"
+        "str	r5, [%[a], #60]\n\t"
+        "ldr	r2, [%[a], #64]\n\t"
+        "ldr	r3, [%[a], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[a], #76]\n\t"
+        "ldr	r6, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "ldr	r8, [%[b], #72]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #64]\n\t"
+        "str	r3, [%[a], #68]\n\t"
+        "str	r4, [%[a], #72]\n\t"
+        "str	r5, [%[a], #76]\n\t"
+        "ldr	r2, [%[a], #80]\n\t"
+        "ldr	r3, [%[a], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[a], #92]\n\t"
+        "ldr	r6, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "ldr	r8, [%[b], #88]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #80]\n\t"
+        "str	r3, [%[a], #84]\n\t"
+        "str	r4, [%[a], #88]\n\t"
+        "str	r5, [%[a], #92]\n\t"
+        "ldr	r2, [%[a], #96]\n\t"
+        "ldr	r3, [%[a], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[a], #108]\n\t"
+        "ldr	r6, [%[b], #96]\n\t"
+        "ldr	r7, [%[b], #100]\n\t"
+        "ldr	r8, [%[b], #104]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #96]\n\t"
+        "str	r3, [%[a], #100]\n\t"
+        "str	r4, [%[a], #104]\n\t"
+        "str	r5, [%[a], #108]\n\t"
+        "ldr	r2, [%[a], #112]\n\t"
+        "ldr	r3, [%[a], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[a], #124]\n\t"
+        "ldr	r6, [%[b], #112]\n\t"
+        "ldr	r7, [%[b], #116]\n\t"
+        "ldr	r8, [%[b], #120]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #112]\n\t"
+        "str	r3, [%[a], #116]\n\t"
+        "str	r4, [%[a], #120]\n\t"
+        "str	r5, [%[a], #124]\n\t"
+        "ldr	r2, [%[a], #128]\n\t"
+        "ldr	r3, [%[a], #132]\n\t"
+        "ldr	r4, [%[a], #136]\n\t"
+        "ldr	r5, [%[a], #140]\n\t"
+        "ldr	r6, [%[b], #128]\n\t"
+        "ldr	r7, [%[b], #132]\n\t"
+        "ldr	r8, [%[b], #136]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #128]\n\t"
+        "str	r3, [%[a], #132]\n\t"
+        "str	r4, [%[a], #136]\n\t"
+        "str	r5, [%[a], #140]\n\t"
+        "ldr	r2, [%[a], #144]\n\t"
+        "ldr	r3, [%[a], #148]\n\t"
+        "ldr	r4, [%[a], #152]\n\t"
+        "ldr	r5, [%[a], #156]\n\t"
+        "ldr	r6, [%[b], #144]\n\t"
+        "ldr	r7, [%[b], #148]\n\t"
+        "ldr	r8, [%[b], #152]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #144]\n\t"
+        "str	r3, [%[a], #148]\n\t"
+        "str	r4, [%[a], #152]\n\t"
+        "str	r5, [%[a], #156]\n\t"
+        "ldr	r2, [%[a], #160]\n\t"
+        "ldr	r3, [%[a], #164]\n\t"
+        "ldr	r4, [%[a], #168]\n\t"
+        "ldr	r5, [%[a], #172]\n\t"
+        "ldr	r6, [%[b], #160]\n\t"
+        "ldr	r7, [%[b], #164]\n\t"
+        "ldr	r8, [%[b], #168]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #160]\n\t"
+        "str	r3, [%[a], #164]\n\t"
+        "str	r4, [%[a], #168]\n\t"
+        "str	r5, [%[a], #172]\n\t"
+        "ldr	r2, [%[a], #176]\n\t"
+        "ldr	r3, [%[a], #180]\n\t"
+        "ldr	r4, [%[a], #184]\n\t"
+        "ldr	r5, [%[a], #188]\n\t"
+        "ldr	r6, [%[b], #176]\n\t"
+        "ldr	r7, [%[b], #180]\n\t"
+        "ldr	r8, [%[b], #184]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #176]\n\t"
+        "str	r3, [%[a], #180]\n\t"
+        "str	r4, [%[a], #184]\n\t"
+        "str	r5, [%[a], #188]\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+
+    return c;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -9191,6 +11361,96 @@
     return c;
 }
 
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<24; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 24; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[48];
+    sp_digit a1[24];
+    sp_digit b1[24];
+    sp_digit z2[48];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_24(a1, a, &a[24]);
+    cb = sp_3072_add_24(b1, b, &b[24]);
+    u  = ca & cb;
+    sp_3072_mul_24(z1, a1, b1);
+    sp_3072_mul_24(z2, &a[24], &b[24]);
+    sp_3072_mul_24(z0, a, b);
+    sp_3072_mask_24(r + 48, a1, 0 - cb);
+    sp_3072_mask_24(b1, b1, 0 - ca);
+    u += sp_3072_add_24(r + 48, r + 48, b1);
+    u += sp_3072_sub_in_place_48(z1, z2);
+    u += sp_3072_sub_in_place_48(z1, z0);
+    u += sp_3072_add_48(r + 24, r + 24, z1);
+    r[72] = u;
+    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+    (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[48];
+    sp_digit z1[48];
+    sp_digit a1[24];
+    sp_digit u;
+
+    u = sp_3072_add_24(a1, a, &a[24]);
+    sp_3072_sqr_24(z1, a1);
+    sp_3072_sqr_24(z2, &a[24]);
+    sp_3072_sqr_24(z0, a);
+    sp_3072_mask_24(r + 48, a1, 0 - u);
+    u += sp_3072_add_24(r + 48, r + 48, r + 48);
+    u += sp_3072_sub_in_place_48(z1, z2);
+    u += sp_3072_sub_in_place_48(z1, z0);
+    u += sp_3072_add_48(r + 24, r + 24, z1);
+    r[72] = u;
+    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+    (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
 /* Sub b from a into a. (a -= b)
  *
  * a  A single precision integer and result.
@@ -10006,13 +12266,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<48; i++)
+    for (i=0; i<48; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -10035,7 +12296,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -10059,7 +12320,7 @@
     u += sp_3072_add_96(r + 48, r + 48, z1);
     r[144] = u;
     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
-    sp_3072_add_96(r + 96, r + 96, z2);
+    (void)sp_3072_add_96(r + 96, r + 96, z2);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -10067,7 +12328,7 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[96];
@@ -10086,10 +12347,10 @@
     u += sp_3072_add_96(r + 48, r + 48, z1);
     r[144] = u;
     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
-    sp_3072_add_96(r + 96, r + 96, z2);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -10187,15 +12448,15 @@
  */
 static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-    sp_digit tmp[192];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #768\n\t"
         "mov	r5, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
         "mov	r8, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #380\n\t"
+        "it	cc\n\t"
         "movcc	r3, #0\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -10212,20 +12473,31 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #760\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -10235,9 +12507,8 @@
  */
 static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[192];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #768\n\t"
         "mov	r12, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
@@ -10245,6 +12516,7 @@
         "mov	r5, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #380\n\t"
+        "it	cc\n\t"
         "movcc	r3, r12\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -10276,24 +12548,35 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #760\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* AND m into each word of a and store in r.
  *
@@ -10301,12 +12584,13 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
 {
     int i;
 
-    for (i=0; i<48; i++)
+    for (i=0; i<48; i++) {
         r[i] = a[i] & m;
+    }
 }
 
 #endif /* WOLFSSL_SP_SMALL */
@@ -10356,143 +12640,6 @@
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
-{
-    sp_digit tmp[96];
-
-    __asm__ __volatile__ (
-        "mov	r5, #0\n\t"
-        "mov	r6, #0\n\t"
-        "mov	r7, #0\n\t"
-        "mov	r8, #0\n\t"
-        "\n1:\n\t"
-        "subs	r3, r5, #188\n\t"
-        "movcc	r3, #0\n\t"
-        "sub	r4, r5, r3\n\t"
-        "\n2:\n\t"
-        "ldr	r14, [%[a], r3]\n\t"
-        "ldr	r12, [%[b], r4]\n\t"
-        "umull	r9, r10, r14, r12\n\t"
-        "adds	r6, r6, r9\n\t"
-        "adcs	r7, r7, r10\n\t"
-        "adc	r8, r8, #0\n\t"
-        "add	r3, r3, #4\n\t"
-        "sub	r4, r4, #4\n\t"
-        "cmp	r3, #192\n\t"
-        "beq	3f\n\t"
-        "cmp	r3, r5\n\t"
-        "ble	2b\n\t"
-        "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        "mov	r6, r7\n\t"
-        "mov	r7, r8\n\t"
-        "mov	r8, #0\n\t"
-        "add	r5, r5, #4\n\t"
-        "cmp	r5, #376\n\t"
-        "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[96];
-
-    __asm__ __volatile__ (
-        "mov	r12, #0\n\t"
-        "mov	r6, #0\n\t"
-        "mov	r7, #0\n\t"
-        "mov	r8, #0\n\t"
-        "mov	r5, #0\n\t"
-        "\n1:\n\t"
-        "subs	r3, r5, #188\n\t"
-        "movcc	r3, r12\n\t"
-        "sub	r4, r5, r3\n\t"
-        "\n2:\n\t"
-        "cmp	r4, r3\n\t"
-        "beq	4f\n\t"
-        "ldr	r14, [%[a], r3]\n\t"
-        "ldr	r9, [%[a], r4]\n\t"
-        "umull	r9, r10, r14, r9\n\t"
-        "adds	r6, r6, r9\n\t"
-        "adcs	r7, r7, r10\n\t"
-        "adc	r8, r8, r12\n\t"
-        "adds	r6, r6, r9\n\t"
-        "adcs	r7, r7, r10\n\t"
-        "adc	r8, r8, r12\n\t"
-        "bal	5f\n\t"
-        "\n4:\n\t"
-        "ldr	r14, [%[a], r3]\n\t"
-        "umull	r9, r10, r14, r14\n\t"
-        "adds	r6, r6, r9\n\t"
-        "adcs	r7, r7, r10\n\t"
-        "adc	r8, r8, r12\n\t"
-        "\n5:\n\t"
-        "add	r3, r3, #4\n\t"
-        "sub	r4, r4, #4\n\t"
-        "cmp	r3, #192\n\t"
-        "beq	3f\n\t"
-        "cmp	r3, r4\n\t"
-        "bgt	3f\n\t"
-        "cmp	r3, r5\n\t"
-        "ble	2b\n\t"
-        "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        "mov	r6, r7\n\t"
-        "mov	r7, r8\n\t"
-        "mov	r8, #0\n\t"
-        "add	r5, r5, #4\n\t"
-        "cmp	r5, #376\n\t"
-        "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
-/* Caclulate the bottom digit of -1/a mod 2^n.
- *
- * a    A single precision number.
- * rho  Bottom word of inverse.
- */
-static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
-{
-    sp_digit x, b;
-
-    b = a[0];
-    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
-
-    /* rho = -1/m mod b */
-    *rho = -x;
-}
-
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-#ifdef WOLFSSL_SP_SMALL
 /* Sub b from a into a. (a -= b)
  *
  * a  A single precision integer.
@@ -10534,226 +12681,984 @@
     return c;
 }
 
-#else
-/* Sub b from a into a. (a -= b)
- *
- * a  A single precision integer and result.
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
  * b  A single precision integer.
  */
-static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "ldr	r2, [%[a], #0]\n\t"
-        "ldr	r3, [%[a], #4]\n\t"
-        "ldr	r4, [%[a], #8]\n\t"
-        "ldr	r5, [%[a], #12]\n\t"
-        "ldr	r6, [%[b], #0]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "ldr	r8, [%[b], #8]\n\t"
-        "ldr	r9, [%[b], #12]\n\t"
-        "subs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #0]\n\t"
-        "str	r3, [%[a], #4]\n\t"
-        "str	r4, [%[a], #8]\n\t"
-        "str	r5, [%[a], #12]\n\t"
-        "ldr	r2, [%[a], #16]\n\t"
-        "ldr	r3, [%[a], #20]\n\t"
-        "ldr	r4, [%[a], #24]\n\t"
-        "ldr	r5, [%[a], #28]\n\t"
-        "ldr	r6, [%[b], #16]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "ldr	r8, [%[b], #24]\n\t"
-        "ldr	r9, [%[b], #28]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #16]\n\t"
-        "str	r3, [%[a], #20]\n\t"
-        "str	r4, [%[a], #24]\n\t"
-        "str	r5, [%[a], #28]\n\t"
-        "ldr	r2, [%[a], #32]\n\t"
-        "ldr	r3, [%[a], #36]\n\t"
-        "ldr	r4, [%[a], #40]\n\t"
-        "ldr	r5, [%[a], #44]\n\t"
-        "ldr	r6, [%[b], #32]\n\t"
-        "ldr	r7, [%[b], #36]\n\t"
-        "ldr	r8, [%[b], #40]\n\t"
-        "ldr	r9, [%[b], #44]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #32]\n\t"
-        "str	r3, [%[a], #36]\n\t"
-        "str	r4, [%[a], #40]\n\t"
-        "str	r5, [%[a], #44]\n\t"
-        "ldr	r2, [%[a], #48]\n\t"
-        "ldr	r3, [%[a], #52]\n\t"
-        "ldr	r4, [%[a], #56]\n\t"
-        "ldr	r5, [%[a], #60]\n\t"
-        "ldr	r6, [%[b], #48]\n\t"
-        "ldr	r7, [%[b], #52]\n\t"
-        "ldr	r8, [%[b], #56]\n\t"
-        "ldr	r9, [%[b], #60]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #48]\n\t"
-        "str	r3, [%[a], #52]\n\t"
-        "str	r4, [%[a], #56]\n\t"
-        "str	r5, [%[a], #60]\n\t"
-        "ldr	r2, [%[a], #64]\n\t"
-        "ldr	r3, [%[a], #68]\n\t"
-        "ldr	r4, [%[a], #72]\n\t"
-        "ldr	r5, [%[a], #76]\n\t"
-        "ldr	r6, [%[b], #64]\n\t"
-        "ldr	r7, [%[b], #68]\n\t"
-        "ldr	r8, [%[b], #72]\n\t"
-        "ldr	r9, [%[b], #76]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #64]\n\t"
-        "str	r3, [%[a], #68]\n\t"
-        "str	r4, [%[a], #72]\n\t"
-        "str	r5, [%[a], #76]\n\t"
-        "ldr	r2, [%[a], #80]\n\t"
-        "ldr	r3, [%[a], #84]\n\t"
-        "ldr	r4, [%[a], #88]\n\t"
-        "ldr	r5, [%[a], #92]\n\t"
-        "ldr	r6, [%[b], #80]\n\t"
-        "ldr	r7, [%[b], #84]\n\t"
-        "ldr	r8, [%[b], #88]\n\t"
-        "ldr	r9, [%[b], #92]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #80]\n\t"
-        "str	r3, [%[a], #84]\n\t"
-        "str	r4, [%[a], #88]\n\t"
-        "str	r5, [%[a], #92]\n\t"
-        "ldr	r2, [%[a], #96]\n\t"
-        "ldr	r3, [%[a], #100]\n\t"
-        "ldr	r4, [%[a], #104]\n\t"
-        "ldr	r5, [%[a], #108]\n\t"
-        "ldr	r6, [%[b], #96]\n\t"
-        "ldr	r7, [%[b], #100]\n\t"
-        "ldr	r8, [%[b], #104]\n\t"
-        "ldr	r9, [%[b], #108]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #96]\n\t"
-        "str	r3, [%[a], #100]\n\t"
-        "str	r4, [%[a], #104]\n\t"
-        "str	r5, [%[a], #108]\n\t"
-        "ldr	r2, [%[a], #112]\n\t"
-        "ldr	r3, [%[a], #116]\n\t"
-        "ldr	r4, [%[a], #120]\n\t"
-        "ldr	r5, [%[a], #124]\n\t"
-        "ldr	r6, [%[b], #112]\n\t"
-        "ldr	r7, [%[b], #116]\n\t"
-        "ldr	r8, [%[b], #120]\n\t"
-        "ldr	r9, [%[b], #124]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #112]\n\t"
-        "str	r3, [%[a], #116]\n\t"
-        "str	r4, [%[a], #120]\n\t"
-        "str	r5, [%[a], #124]\n\t"
-        "ldr	r2, [%[a], #128]\n\t"
-        "ldr	r3, [%[a], #132]\n\t"
-        "ldr	r4, [%[a], #136]\n\t"
-        "ldr	r5, [%[a], #140]\n\t"
-        "ldr	r6, [%[b], #128]\n\t"
-        "ldr	r7, [%[b], #132]\n\t"
-        "ldr	r8, [%[b], #136]\n\t"
-        "ldr	r9, [%[b], #140]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #128]\n\t"
-        "str	r3, [%[a], #132]\n\t"
-        "str	r4, [%[a], #136]\n\t"
-        "str	r5, [%[a], #140]\n\t"
-        "ldr	r2, [%[a], #144]\n\t"
-        "ldr	r3, [%[a], #148]\n\t"
-        "ldr	r4, [%[a], #152]\n\t"
-        "ldr	r5, [%[a], #156]\n\t"
-        "ldr	r6, [%[b], #144]\n\t"
-        "ldr	r7, [%[b], #148]\n\t"
-        "ldr	r8, [%[b], #152]\n\t"
-        "ldr	r9, [%[b], #156]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #144]\n\t"
-        "str	r3, [%[a], #148]\n\t"
-        "str	r4, [%[a], #152]\n\t"
-        "str	r5, [%[a], #156]\n\t"
-        "ldr	r2, [%[a], #160]\n\t"
-        "ldr	r3, [%[a], #164]\n\t"
-        "ldr	r4, [%[a], #168]\n\t"
-        "ldr	r5, [%[a], #172]\n\t"
-        "ldr	r6, [%[b], #160]\n\t"
-        "ldr	r7, [%[b], #164]\n\t"
-        "ldr	r8, [%[b], #168]\n\t"
-        "ldr	r9, [%[b], #172]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #160]\n\t"
-        "str	r3, [%[a], #164]\n\t"
-        "str	r4, [%[a], #168]\n\t"
-        "str	r5, [%[a], #172]\n\t"
-        "ldr	r2, [%[a], #176]\n\t"
-        "ldr	r3, [%[a], #180]\n\t"
-        "ldr	r4, [%[a], #184]\n\t"
-        "ldr	r5, [%[a], #188]\n\t"
-        "ldr	r6, [%[b], #176]\n\t"
-        "ldr	r7, [%[b], #180]\n\t"
-        "ldr	r8, [%[b], #184]\n\t"
-        "ldr	r9, [%[b], #188]\n\t"
-        "sbcs	r2, r2, r6\n\t"
-        "sbcs	r3, r3, r7\n\t"
-        "sbcs	r4, r4, r8\n\t"
-        "sbcs	r5, r5, r9\n\t"
-        "str	r2, [%[a], #176]\n\t"
-        "str	r3, [%[a], #180]\n\t"
-        "str	r4, [%[a], #184]\n\t"
-        "str	r5, [%[a], #188]\n\t"
-        "sbc	%[c], r9, r9\n\t"
-        : [c] "+r" (c)
+static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #384\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #0\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "subs	r3, r5, #188\n\t"
+        "it	cc\n\t"
+        "movcc	r3, #0\n\t"
+        "sub	r4, r5, r3\n\t"
+        "\n2:\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "ldr	r12, [%[b], r4]\n\t"
+        "umull	r9, r10, r14, r12\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, #0\n\t"
+        "add	r3, r3, #4\n\t"
+        "sub	r4, r4, #4\n\t"
+        "cmp	r3, #192\n\t"
+        "beq	3f\n\t"
+        "cmp	r3, r5\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "mov	r6, r7\n\t"
+        "mov	r7, r8\n\t"
+        "mov	r8, #0\n\t"
+        "add	r5, r5, #4\n\t"
+        "cmp	r5, #376\n\t"
+        "ble	1b\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b)
-        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
-    );
-
-    return c;
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #384\n\t"
+        "mov	r12, #0\n\t"
+        "mov	r6, #0\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r8, #0\n\t"
+        "mov	r5, #0\n\t"
+        "\n1:\n\t"
+        "subs	r3, r5, #188\n\t"
+        "it	cc\n\t"
+        "movcc	r3, r12\n\t"
+        "sub	r4, r5, r3\n\t"
+        "\n2:\n\t"
+        "cmp	r4, r3\n\t"
+        "beq	4f\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "ldr	r9, [%[a], r4]\n\t"
+        "umull	r9, r10, r14, r9\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "umull	r9, r10, r14, r14\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "\n5:\n\t"
+        "add	r3, r3, #4\n\t"
+        "sub	r4, r4, #4\n\t"
+        "cmp	r3, #192\n\t"
+        "beq	3f\n\t"
+        "cmp	r3, r4\n\t"
+        "bgt	3f\n\t"
+        "cmp	r3, r5\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "mov	r6, r7\n\t"
+        "mov	r7, r8\n\t"
+        "mov	r8, #0\n\t"
+        "add	r5, r5, #4\n\t"
+        "cmp	r5, #376\n\t"
+        "ble	1b\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+    );
 }
 
 #endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r5, r3, %[b], r8\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]]\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, #4\n\t"
+        "1:\n\t"
+        "ldr	r8, [%[a], r9]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], r9]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r9, r9, #4\n\t"
+        "cmp	r9, #384\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r], #384]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r3, r4, %[b], r8\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[r]]\n\t"
+        "# A[1] * B\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "# A[2] * B\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "# A[3] * B\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "# A[4] * B\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "# A[5] * B\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "# A[6] * B\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "# A[7] * B\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "# A[8] * B\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "# A[9] * B\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "# A[10] * B\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "# A[11] * B\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "# A[12] * B\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "# A[13] * B\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "# A[14] * B\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "# A[15] * B\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "# A[16] * B\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "# A[17] * B\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "# A[18] * B\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #72]\n\t"
+        "# A[19] * B\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "# A[20] * B\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #80]\n\t"
+        "# A[21] * B\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "# A[22] * B\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "# A[23] * B\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #92]\n\t"
+        "# A[24] * B\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #96]\n\t"
+        "# A[25] * B\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "# A[26] * B\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #104]\n\t"
+        "# A[27] * B\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #108]\n\t"
+        "# A[28] * B\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "# A[29] * B\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #116]\n\t"
+        "# A[30] * B\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #120]\n\t"
+        "# A[31] * B\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "# A[32] * B\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #128]\n\t"
+        "# A[33] * B\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #132]\n\t"
+        "# A[34] * B\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "# A[35] * B\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #140]\n\t"
+        "# A[36] * B\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #144]\n\t"
+        "# A[37] * B\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #148]\n\t"
+        "# A[38] * B\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #152]\n\t"
+        "# A[39] * B\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #156]\n\t"
+        "# A[40] * B\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "# A[41] * B\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #164]\n\t"
+        "# A[42] * B\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #168]\n\t"
+        "# A[43] * B\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #172]\n\t"
+        "# A[44] * B\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #176]\n\t"
+        "# A[45] * B\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #180]\n\t"
+        "# A[46] * B\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "# A[47] * B\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #188]\n\t"
+        "# A[48] * B\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #192]\n\t"
+        "# A[49] * B\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #196]\n\t"
+        "# A[50] * B\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #200]\n\t"
+        "# A[51] * B\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #204]\n\t"
+        "# A[52] * B\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "# A[53] * B\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #212]\n\t"
+        "# A[54] * B\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #216]\n\t"
+        "# A[55] * B\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #220]\n\t"
+        "# A[56] * B\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #224]\n\t"
+        "# A[57] * B\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #228]\n\t"
+        "# A[58] * B\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "# A[59] * B\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #236]\n\t"
+        "# A[60] * B\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #240]\n\t"
+        "# A[61] * B\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #244]\n\t"
+        "# A[62] * B\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #248]\n\t"
+        "# A[63] * B\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #252]\n\t"
+        "# A[64] * B\n\t"
+        "ldr	r8, [%[a], #256]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        "# A[65] * B\n\t"
+        "ldr	r8, [%[a], #260]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #260]\n\t"
+        "# A[66] * B\n\t"
+        "ldr	r8, [%[a], #264]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #264]\n\t"
+        "# A[67] * B\n\t"
+        "ldr	r8, [%[a], #268]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #268]\n\t"
+        "# A[68] * B\n\t"
+        "ldr	r8, [%[a], #272]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #272]\n\t"
+        "# A[69] * B\n\t"
+        "ldr	r8, [%[a], #276]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #276]\n\t"
+        "# A[70] * B\n\t"
+        "ldr	r8, [%[a], #280]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #280]\n\t"
+        "# A[71] * B\n\t"
+        "ldr	r8, [%[a], #284]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #284]\n\t"
+        "# A[72] * B\n\t"
+        "ldr	r8, [%[a], #288]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #288]\n\t"
+        "# A[73] * B\n\t"
+        "ldr	r8, [%[a], #292]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #292]\n\t"
+        "# A[74] * B\n\t"
+        "ldr	r8, [%[a], #296]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #296]\n\t"
+        "# A[75] * B\n\t"
+        "ldr	r8, [%[a], #300]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #300]\n\t"
+        "# A[76] * B\n\t"
+        "ldr	r8, [%[a], #304]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #304]\n\t"
+        "# A[77] * B\n\t"
+        "ldr	r8, [%[a], #308]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #308]\n\t"
+        "# A[78] * B\n\t"
+        "ldr	r8, [%[a], #312]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #312]\n\t"
+        "# A[79] * B\n\t"
+        "ldr	r8, [%[a], #316]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #316]\n\t"
+        "# A[80] * B\n\t"
+        "ldr	r8, [%[a], #320]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #320]\n\t"
+        "# A[81] * B\n\t"
+        "ldr	r8, [%[a], #324]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #324]\n\t"
+        "# A[82] * B\n\t"
+        "ldr	r8, [%[a], #328]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #328]\n\t"
+        "# A[83] * B\n\t"
+        "ldr	r8, [%[a], #332]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #332]\n\t"
+        "# A[84] * B\n\t"
+        "ldr	r8, [%[a], #336]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #336]\n\t"
+        "# A[85] * B\n\t"
+        "ldr	r8, [%[a], #340]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #340]\n\t"
+        "# A[86] * B\n\t"
+        "ldr	r8, [%[a], #344]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #344]\n\t"
+        "# A[87] * B\n\t"
+        "ldr	r8, [%[a], #348]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #348]\n\t"
+        "# A[88] * B\n\t"
+        "ldr	r8, [%[a], #352]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #352]\n\t"
+        "# A[89] * B\n\t"
+        "ldr	r8, [%[a], #356]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #356]\n\t"
+        "# A[90] * B\n\t"
+        "ldr	r8, [%[a], #360]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #360]\n\t"
+        "# A[91] * B\n\t"
+        "ldr	r8, [%[a], #364]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #364]\n\t"
+        "# A[92] * B\n\t"
+        "ldr	r8, [%[a], #368]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #368]\n\t"
+        "# A[93] * B\n\t"
+        "ldr	r8, [%[a], #372]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #372]\n\t"
+        "# A[94] * B\n\t"
+        "ldr	r8, [%[a], #376]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #376]\n\t"
+        "# A[95] * B\n\t"
+        "ldr	r8, [%[a], #380]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r5, [%[r], #380]\n\t"
+        "str	r3, [%[r], #384]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_48(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 48);
 
@@ -10769,7 +13674,7 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_3072_cond_sub_48(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
     sp_digit c = 0;
@@ -11053,7 +13958,7 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
@@ -11522,8 +14427,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_48(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_3072_mul_48(r, a, b);
     sp_3072_mont_reduce_48(r, m, mp);
@@ -11536,7 +14441,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_3072_sqr_48(r, a);
@@ -11550,7 +14455,7 @@
  * b  A single precision digit.
  */
 static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
+        sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
@@ -11567,7 +14472,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], r9]\n\t"
         "mov	r3, r4\n\t"
         "mov	r4, r5\n\t"
@@ -11594,7 +14499,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #4]\n\t"
         "# A[2] * B\n\t"
         "ldr	r8, [%[a], #8]\n\t"
@@ -11602,7 +14507,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #8]\n\t"
         "# A[3] * B\n\t"
         "ldr	r8, [%[a], #12]\n\t"
@@ -11610,7 +14515,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #12]\n\t"
         "# A[4] * B\n\t"
         "ldr	r8, [%[a], #16]\n\t"
@@ -11618,7 +14523,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #16]\n\t"
         "# A[5] * B\n\t"
         "ldr	r8, [%[a], #20]\n\t"
@@ -11626,7 +14531,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #20]\n\t"
         "# A[6] * B\n\t"
         "ldr	r8, [%[a], #24]\n\t"
@@ -11634,7 +14539,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #24]\n\t"
         "# A[7] * B\n\t"
         "ldr	r8, [%[a], #28]\n\t"
@@ -11642,7 +14547,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #28]\n\t"
         "# A[8] * B\n\t"
         "ldr	r8, [%[a], #32]\n\t"
@@ -11650,7 +14555,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #32]\n\t"
         "# A[9] * B\n\t"
         "ldr	r8, [%[a], #36]\n\t"
@@ -11658,7 +14563,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #36]\n\t"
         "# A[10] * B\n\t"
         "ldr	r8, [%[a], #40]\n\t"
@@ -11666,7 +14571,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #40]\n\t"
         "# A[11] * B\n\t"
         "ldr	r8, [%[a], #44]\n\t"
@@ -11674,7 +14579,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #44]\n\t"
         "# A[12] * B\n\t"
         "ldr	r8, [%[a], #48]\n\t"
@@ -11682,7 +14587,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #48]\n\t"
         "# A[13] * B\n\t"
         "ldr	r8, [%[a], #52]\n\t"
@@ -11690,7 +14595,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #52]\n\t"
         "# A[14] * B\n\t"
         "ldr	r8, [%[a], #56]\n\t"
@@ -11698,7 +14603,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #56]\n\t"
         "# A[15] * B\n\t"
         "ldr	r8, [%[a], #60]\n\t"
@@ -11706,7 +14611,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #60]\n\t"
         "# A[16] * B\n\t"
         "ldr	r8, [%[a], #64]\n\t"
@@ -11714,7 +14619,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #64]\n\t"
         "# A[17] * B\n\t"
         "ldr	r8, [%[a], #68]\n\t"
@@ -11722,7 +14627,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #68]\n\t"
         "# A[18] * B\n\t"
         "ldr	r8, [%[a], #72]\n\t"
@@ -11730,7 +14635,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #72]\n\t"
         "# A[19] * B\n\t"
         "ldr	r8, [%[a], #76]\n\t"
@@ -11738,7 +14643,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #76]\n\t"
         "# A[20] * B\n\t"
         "ldr	r8, [%[a], #80]\n\t"
@@ -11746,7 +14651,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #80]\n\t"
         "# A[21] * B\n\t"
         "ldr	r8, [%[a], #84]\n\t"
@@ -11754,7 +14659,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #84]\n\t"
         "# A[22] * B\n\t"
         "ldr	r8, [%[a], #88]\n\t"
@@ -11762,7 +14667,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #88]\n\t"
         "# A[23] * B\n\t"
         "ldr	r8, [%[a], #92]\n\t"
@@ -11770,7 +14675,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #92]\n\t"
         "# A[24] * B\n\t"
         "ldr	r8, [%[a], #96]\n\t"
@@ -11778,7 +14683,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #96]\n\t"
         "# A[25] * B\n\t"
         "ldr	r8, [%[a], #100]\n\t"
@@ -11786,7 +14691,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #100]\n\t"
         "# A[26] * B\n\t"
         "ldr	r8, [%[a], #104]\n\t"
@@ -11794,7 +14699,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #104]\n\t"
         "# A[27] * B\n\t"
         "ldr	r8, [%[a], #108]\n\t"
@@ -11802,7 +14707,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #108]\n\t"
         "# A[28] * B\n\t"
         "ldr	r8, [%[a], #112]\n\t"
@@ -11810,7 +14715,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #112]\n\t"
         "# A[29] * B\n\t"
         "ldr	r8, [%[a], #116]\n\t"
@@ -11818,7 +14723,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #116]\n\t"
         "# A[30] * B\n\t"
         "ldr	r8, [%[a], #120]\n\t"
@@ -11826,7 +14731,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #120]\n\t"
         "# A[31] * B\n\t"
         "ldr	r8, [%[a], #124]\n\t"
@@ -11834,7 +14739,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #124]\n\t"
         "# A[32] * B\n\t"
         "ldr	r8, [%[a], #128]\n\t"
@@ -11842,7 +14747,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #128]\n\t"
         "# A[33] * B\n\t"
         "ldr	r8, [%[a], #132]\n\t"
@@ -11850,7 +14755,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #132]\n\t"
         "# A[34] * B\n\t"
         "ldr	r8, [%[a], #136]\n\t"
@@ -11858,7 +14763,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #136]\n\t"
         "# A[35] * B\n\t"
         "ldr	r8, [%[a], #140]\n\t"
@@ -11866,7 +14771,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #140]\n\t"
         "# A[36] * B\n\t"
         "ldr	r8, [%[a], #144]\n\t"
@@ -11874,7 +14779,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #144]\n\t"
         "# A[37] * B\n\t"
         "ldr	r8, [%[a], #148]\n\t"
@@ -11882,7 +14787,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #148]\n\t"
         "# A[38] * B\n\t"
         "ldr	r8, [%[a], #152]\n\t"
@@ -11890,7 +14795,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #152]\n\t"
         "# A[39] * B\n\t"
         "ldr	r8, [%[a], #156]\n\t"
@@ -11898,7 +14803,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #156]\n\t"
         "# A[40] * B\n\t"
         "ldr	r8, [%[a], #160]\n\t"
@@ -11906,7 +14811,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #160]\n\t"
         "# A[41] * B\n\t"
         "ldr	r8, [%[a], #164]\n\t"
@@ -11914,7 +14819,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #164]\n\t"
         "# A[42] * B\n\t"
         "ldr	r8, [%[a], #168]\n\t"
@@ -11922,7 +14827,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #168]\n\t"
         "# A[43] * B\n\t"
         "ldr	r8, [%[a], #172]\n\t"
@@ -11930,7 +14835,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #172]\n\t"
         "# A[44] * B\n\t"
         "ldr	r8, [%[a], #176]\n\t"
@@ -11938,7 +14843,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #176]\n\t"
         "# A[45] * B\n\t"
         "ldr	r8, [%[a], #180]\n\t"
@@ -11946,7 +14851,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #180]\n\t"
         "# A[46] * B\n\t"
         "ldr	r8, [%[a], #184]\n\t"
@@ -11954,7 +14859,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #184]\n\t"
         "# A[47] * B\n\t"
         "ldr	r8, [%[a], #188]\n\t"
@@ -12035,11 +14940,12 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int32_t sp_3072_cmp_48(sp_digit* a, sp_digit* b)
+static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = -1;
     sp_digit one = 1;
 
+
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	r7, #0\n\t"
@@ -12051,15 +14957,18 @@
         "and	r4, r4, r3\n\t"
         "and	r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
-        "movne	r3, r7\n\t"
-        "sub	r6, r6, #4\n\t"
-        "bcc	1b\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #else
     __asm__ __volatile__ (
@@ -12070,389 +14979,533 @@
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #184]\n\t"
         "ldr		r5, [%[b], #184]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #180]\n\t"
         "ldr		r5, [%[b], #180]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #176]\n\t"
         "ldr		r5, [%[b], #176]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #172]\n\t"
         "ldr		r5, [%[b], #172]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #168]\n\t"
         "ldr		r5, [%[b], #168]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #164]\n\t"
         "ldr		r5, [%[b], #164]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #160]\n\t"
         "ldr		r5, [%[b], #160]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #156]\n\t"
         "ldr		r5, [%[b], #156]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #152]\n\t"
         "ldr		r5, [%[b], #152]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #148]\n\t"
         "ldr		r5, [%[b], #148]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #144]\n\t"
         "ldr		r5, [%[b], #144]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #140]\n\t"
         "ldr		r5, [%[b], #140]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #136]\n\t"
         "ldr		r5, [%[b], #136]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #132]\n\t"
         "ldr		r5, [%[b], #132]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #128]\n\t"
         "ldr		r5, [%[b], #128]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #124]\n\t"
         "ldr		r5, [%[b], #124]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #120]\n\t"
         "ldr		r5, [%[b], #120]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #116]\n\t"
         "ldr		r5, [%[b], #116]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #112]\n\t"
         "ldr		r5, [%[b], #112]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #108]\n\t"
         "ldr		r5, [%[b], #108]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #104]\n\t"
         "ldr		r5, [%[b], #104]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #100]\n\t"
         "ldr		r5, [%[b], #100]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #96]\n\t"
         "ldr		r5, [%[b], #96]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #92]\n\t"
         "ldr		r5, [%[b], #92]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #88]\n\t"
         "ldr		r5, [%[b], #88]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #84]\n\t"
         "ldr		r5, [%[b], #84]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #80]\n\t"
         "ldr		r5, [%[b], #80]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #76]\n\t"
         "ldr		r5, [%[b], #76]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #72]\n\t"
         "ldr		r5, [%[b], #72]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #68]\n\t"
         "ldr		r5, [%[b], #68]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #64]\n\t"
         "ldr		r5, [%[b], #64]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #60]\n\t"
         "ldr		r5, [%[b], #60]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #56]\n\t"
         "ldr		r5, [%[b], #56]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #52]\n\t"
         "ldr		r5, [%[b], #52]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #48]\n\t"
         "ldr		r5, [%[b], #48]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #44]\n\t"
         "ldr		r5, [%[b], #44]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #40]\n\t"
         "ldr		r5, [%[b], #40]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #36]\n\t"
         "ldr		r5, [%[b], #36]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #32]\n\t"
         "ldr		r5, [%[b], #32]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #28]\n\t"
         "ldr		r5, [%[b], #28]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #24]\n\t"
         "ldr		r5, [%[b], #24]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #20]\n\t"
         "ldr		r5, [%[b], #20]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #16]\n\t"
         "ldr		r5, [%[b], #16]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #12]\n\t"
         "ldr		r5, [%[b], #12]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #8]\n\t"
         "ldr		r5, [%[b], #8]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #4]\n\t"
         "ldr		r5, [%[b], #4]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #0]\n\t"
         "ldr		r5, [%[b], #0]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #endif
 
@@ -12468,7 +15521,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[96], t2[49];
@@ -12477,6 +15530,7 @@
 
     (void)m;
 
+
     div = d[47];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
     for (i=47; i>=0; i--) {
@@ -12492,7 +15546,7 @@
     }
 
     r1 = sp_3072_cmp_48(t1, d) >= 0;
-    sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
+    sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -12504,7 +15558,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_mod_48(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_48(a, m, NULL, r);
 }
@@ -12519,8 +15573,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][96];
@@ -12539,27 +15593,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 96;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_48(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
             err = sp_3072_mod_48(t[1] + 48, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_48(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
@@ -12585,9 +15640,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 28;
-        n <<= 4;
-        c = 28;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -12618,7 +15680,7 @@
             sp_3072_mont_mul_48(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
         sp_3072_mont_reduce_48(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
@@ -12626,8 +15688,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -12642,8 +15705,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][96];
@@ -12662,27 +15725,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 96;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_48(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
             err = sp_3072_mod_48(t[1] + 48, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_48(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
@@ -12724,9 +15788,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 27;
-        n <<= 5;
-        c = 27;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -12757,11 +15828,8 @@
 
             sp_3072_mont_mul_48(r, r, t[y], m, mp);
         }
-        y = e[0] & 0x1;
-        sp_3072_mont_sqr_48(r, r, m, mp);
-        sp_3072_mont_mul_48(r, r, t[y], m, mp);
-
-        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
         sp_3072_mont_reduce_48(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
@@ -12769,23 +15837,25 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
 }
 #endif /* WOLFSSL_SP_SMALL */
 
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_96(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 96);
 
@@ -12793,6 +15863,7 @@
     sp_3072_sub_in_place_96(r, m);
 }
 
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -12801,7 +15872,7 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_3072_cond_sub_96(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
     sp_digit c = 0;
@@ -13325,7 +16396,7 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
@@ -14226,8 +17297,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_96(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_3072_mul_96(r, a, b);
     sp_3072_mont_reduce_96(r, m, mp);
@@ -14240,824 +17311,14 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_96(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_3072_sqr_96(r, a);
     sp_3072_mont_reduce_96(r, m, mp);
 }
 
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	r10, #0\n\t"
-        "# A[0] * B\n\t"
-        "ldr	r8, [%[a]]\n\t"
-        "umull	r5, r3, %[b], r8\n\t"
-        "mov	r4, #0\n\t"
-        "str	r5, [%[r]]\n\t"
-        "mov	r5, #0\n\t"
-        "mov	r9, #4\n\t"
-        "1:\n\t"
-        "ldr	r8, [%[a], r9]\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], r9]\n\t"
-        "mov	r3, r4\n\t"
-        "mov	r4, r5\n\t"
-        "mov	r5, #0\n\t"
-        "add	r9, r9, #4\n\t"
-        "cmp	r9, #384\n\t"
-        "blt	1b\n\t"
-        "str	r3, [%[r], #384]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	r10, #0\n\t"
-        "# A[0] * B\n\t"
-        "ldr	r8, [%[a]]\n\t"
-        "umull	r3, r4, %[b], r8\n\t"
-        "mov	r5, #0\n\t"
-        "str	r3, [%[r]]\n\t"
-        "# A[1] * B\n\t"
-        "ldr	r8, [%[a], #4]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #4]\n\t"
-        "# A[2] * B\n\t"
-        "ldr	r8, [%[a], #8]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #8]\n\t"
-        "# A[3] * B\n\t"
-        "ldr	r8, [%[a], #12]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #12]\n\t"
-        "# A[4] * B\n\t"
-        "ldr	r8, [%[a], #16]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "# A[5] * B\n\t"
-        "ldr	r8, [%[a], #20]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "# A[6] * B\n\t"
-        "ldr	r8, [%[a], #24]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #24]\n\t"
-        "# A[7] * B\n\t"
-        "ldr	r8, [%[a], #28]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #28]\n\t"
-        "# A[8] * B\n\t"
-        "ldr	r8, [%[a], #32]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #32]\n\t"
-        "# A[9] * B\n\t"
-        "ldr	r8, [%[a], #36]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #36]\n\t"
-        "# A[10] * B\n\t"
-        "ldr	r8, [%[a], #40]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #40]\n\t"
-        "# A[11] * B\n\t"
-        "ldr	r8, [%[a], #44]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #44]\n\t"
-        "# A[12] * B\n\t"
-        "ldr	r8, [%[a], #48]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #48]\n\t"
-        "# A[13] * B\n\t"
-        "ldr	r8, [%[a], #52]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #52]\n\t"
-        "# A[14] * B\n\t"
-        "ldr	r8, [%[a], #56]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #56]\n\t"
-        "# A[15] * B\n\t"
-        "ldr	r8, [%[a], #60]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #60]\n\t"
-        "# A[16] * B\n\t"
-        "ldr	r8, [%[a], #64]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #64]\n\t"
-        "# A[17] * B\n\t"
-        "ldr	r8, [%[a], #68]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #68]\n\t"
-        "# A[18] * B\n\t"
-        "ldr	r8, [%[a], #72]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #72]\n\t"
-        "# A[19] * B\n\t"
-        "ldr	r8, [%[a], #76]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #76]\n\t"
-        "# A[20] * B\n\t"
-        "ldr	r8, [%[a], #80]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #80]\n\t"
-        "# A[21] * B\n\t"
-        "ldr	r8, [%[a], #84]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #84]\n\t"
-        "# A[22] * B\n\t"
-        "ldr	r8, [%[a], #88]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #88]\n\t"
-        "# A[23] * B\n\t"
-        "ldr	r8, [%[a], #92]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #92]\n\t"
-        "# A[24] * B\n\t"
-        "ldr	r8, [%[a], #96]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #96]\n\t"
-        "# A[25] * B\n\t"
-        "ldr	r8, [%[a], #100]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #100]\n\t"
-        "# A[26] * B\n\t"
-        "ldr	r8, [%[a], #104]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #104]\n\t"
-        "# A[27] * B\n\t"
-        "ldr	r8, [%[a], #108]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #108]\n\t"
-        "# A[28] * B\n\t"
-        "ldr	r8, [%[a], #112]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #112]\n\t"
-        "# A[29] * B\n\t"
-        "ldr	r8, [%[a], #116]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #116]\n\t"
-        "# A[30] * B\n\t"
-        "ldr	r8, [%[a], #120]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #120]\n\t"
-        "# A[31] * B\n\t"
-        "ldr	r8, [%[a], #124]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #124]\n\t"
-        "# A[32] * B\n\t"
-        "ldr	r8, [%[a], #128]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #128]\n\t"
-        "# A[33] * B\n\t"
-        "ldr	r8, [%[a], #132]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #132]\n\t"
-        "# A[34] * B\n\t"
-        "ldr	r8, [%[a], #136]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #136]\n\t"
-        "# A[35] * B\n\t"
-        "ldr	r8, [%[a], #140]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #140]\n\t"
-        "# A[36] * B\n\t"
-        "ldr	r8, [%[a], #144]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #144]\n\t"
-        "# A[37] * B\n\t"
-        "ldr	r8, [%[a], #148]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #148]\n\t"
-        "# A[38] * B\n\t"
-        "ldr	r8, [%[a], #152]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #152]\n\t"
-        "# A[39] * B\n\t"
-        "ldr	r8, [%[a], #156]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #156]\n\t"
-        "# A[40] * B\n\t"
-        "ldr	r8, [%[a], #160]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #160]\n\t"
-        "# A[41] * B\n\t"
-        "ldr	r8, [%[a], #164]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #164]\n\t"
-        "# A[42] * B\n\t"
-        "ldr	r8, [%[a], #168]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #168]\n\t"
-        "# A[43] * B\n\t"
-        "ldr	r8, [%[a], #172]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #172]\n\t"
-        "# A[44] * B\n\t"
-        "ldr	r8, [%[a], #176]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #176]\n\t"
-        "# A[45] * B\n\t"
-        "ldr	r8, [%[a], #180]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #180]\n\t"
-        "# A[46] * B\n\t"
-        "ldr	r8, [%[a], #184]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #184]\n\t"
-        "# A[47] * B\n\t"
-        "ldr	r8, [%[a], #188]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #188]\n\t"
-        "# A[48] * B\n\t"
-        "ldr	r8, [%[a], #192]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #192]\n\t"
-        "# A[49] * B\n\t"
-        "ldr	r8, [%[a], #196]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #196]\n\t"
-        "# A[50] * B\n\t"
-        "ldr	r8, [%[a], #200]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #200]\n\t"
-        "# A[51] * B\n\t"
-        "ldr	r8, [%[a], #204]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #204]\n\t"
-        "# A[52] * B\n\t"
-        "ldr	r8, [%[a], #208]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #208]\n\t"
-        "# A[53] * B\n\t"
-        "ldr	r8, [%[a], #212]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #212]\n\t"
-        "# A[54] * B\n\t"
-        "ldr	r8, [%[a], #216]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #216]\n\t"
-        "# A[55] * B\n\t"
-        "ldr	r8, [%[a], #220]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #220]\n\t"
-        "# A[56] * B\n\t"
-        "ldr	r8, [%[a], #224]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #224]\n\t"
-        "# A[57] * B\n\t"
-        "ldr	r8, [%[a], #228]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #228]\n\t"
-        "# A[58] * B\n\t"
-        "ldr	r8, [%[a], #232]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #232]\n\t"
-        "# A[59] * B\n\t"
-        "ldr	r8, [%[a], #236]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #236]\n\t"
-        "# A[60] * B\n\t"
-        "ldr	r8, [%[a], #240]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #240]\n\t"
-        "# A[61] * B\n\t"
-        "ldr	r8, [%[a], #244]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #244]\n\t"
-        "# A[62] * B\n\t"
-        "ldr	r8, [%[a], #248]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #248]\n\t"
-        "# A[63] * B\n\t"
-        "ldr	r8, [%[a], #252]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #252]\n\t"
-        "# A[64] * B\n\t"
-        "ldr	r8, [%[a], #256]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #256]\n\t"
-        "# A[65] * B\n\t"
-        "ldr	r8, [%[a], #260]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #260]\n\t"
-        "# A[66] * B\n\t"
-        "ldr	r8, [%[a], #264]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #264]\n\t"
-        "# A[67] * B\n\t"
-        "ldr	r8, [%[a], #268]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #268]\n\t"
-        "# A[68] * B\n\t"
-        "ldr	r8, [%[a], #272]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #272]\n\t"
-        "# A[69] * B\n\t"
-        "ldr	r8, [%[a], #276]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #276]\n\t"
-        "# A[70] * B\n\t"
-        "ldr	r8, [%[a], #280]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #280]\n\t"
-        "# A[71] * B\n\t"
-        "ldr	r8, [%[a], #284]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #284]\n\t"
-        "# A[72] * B\n\t"
-        "ldr	r8, [%[a], #288]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #288]\n\t"
-        "# A[73] * B\n\t"
-        "ldr	r8, [%[a], #292]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #292]\n\t"
-        "# A[74] * B\n\t"
-        "ldr	r8, [%[a], #296]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #296]\n\t"
-        "# A[75] * B\n\t"
-        "ldr	r8, [%[a], #300]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #300]\n\t"
-        "# A[76] * B\n\t"
-        "ldr	r8, [%[a], #304]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #304]\n\t"
-        "# A[77] * B\n\t"
-        "ldr	r8, [%[a], #308]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #308]\n\t"
-        "# A[78] * B\n\t"
-        "ldr	r8, [%[a], #312]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #312]\n\t"
-        "# A[79] * B\n\t"
-        "ldr	r8, [%[a], #316]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #316]\n\t"
-        "# A[80] * B\n\t"
-        "ldr	r8, [%[a], #320]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #320]\n\t"
-        "# A[81] * B\n\t"
-        "ldr	r8, [%[a], #324]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #324]\n\t"
-        "# A[82] * B\n\t"
-        "ldr	r8, [%[a], #328]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #328]\n\t"
-        "# A[83] * B\n\t"
-        "ldr	r8, [%[a], #332]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #332]\n\t"
-        "# A[84] * B\n\t"
-        "ldr	r8, [%[a], #336]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #336]\n\t"
-        "# A[85] * B\n\t"
-        "ldr	r8, [%[a], #340]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #340]\n\t"
-        "# A[86] * B\n\t"
-        "ldr	r8, [%[a], #344]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #344]\n\t"
-        "# A[87] * B\n\t"
-        "ldr	r8, [%[a], #348]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #348]\n\t"
-        "# A[88] * B\n\t"
-        "ldr	r8, [%[a], #352]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #352]\n\t"
-        "# A[89] * B\n\t"
-        "ldr	r8, [%[a], #356]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #356]\n\t"
-        "# A[90] * B\n\t"
-        "ldr	r8, [%[a], #360]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #360]\n\t"
-        "# A[91] * B\n\t"
-        "ldr	r8, [%[a], #364]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #364]\n\t"
-        "# A[92] * B\n\t"
-        "ldr	r8, [%[a], #368]\n\t"
-        "mov	r4, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
-        "str	r5, [%[r], #368]\n\t"
-        "# A[93] * B\n\t"
-        "ldr	r8, [%[a], #372]\n\t"
-        "mov	r5, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
-        "str	r3, [%[r], #372]\n\t"
-        "# A[94] * B\n\t"
-        "ldr	r8, [%[a], #376]\n\t"
-        "mov	r3, #0\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r4, r4, r6\n\t"
-        "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
-        "str	r4, [%[r], #376]\n\t"
-        "# A[95] * B\n\t"
-        "ldr	r8, [%[a], #380]\n\t"
-        "umull	r6, r7, %[b], r8\n\t"
-        "adds	r5, r5, r6\n\t"
-        "adc	r3, r3, r7\n\t"
-        "str	r5, [%[r], #380]\n\t"
-        "str	r3, [%[r], #384]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
-    );
-#endif
-}
-
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  *
  * d1   The high order half of the number to divide.
@@ -15122,13 +17383,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_96(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<96; i++)
+    for (i=0; i<96; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -15152,11 +17414,12 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int32_t sp_3072_cmp_96(sp_digit* a, sp_digit* b)
+static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = -1;
     sp_digit one = 1;
 
+
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	r7, #0\n\t"
@@ -15168,15 +17431,18 @@
         "and	r4, r4, r3\n\t"
         "and	r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
-        "movne	r3, r7\n\t"
-        "sub	r6, r6, #4\n\t"
-        "bcc	1b\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #else
     __asm__ __volatile__ (
@@ -15187,773 +17453,1061 @@
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #376]\n\t"
         "ldr		r5, [%[b], #376]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #372]\n\t"
         "ldr		r5, [%[b], #372]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #368]\n\t"
         "ldr		r5, [%[b], #368]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #364]\n\t"
         "ldr		r5, [%[b], #364]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #360]\n\t"
         "ldr		r5, [%[b], #360]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #356]\n\t"
         "ldr		r5, [%[b], #356]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #352]\n\t"
         "ldr		r5, [%[b], #352]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #348]\n\t"
         "ldr		r5, [%[b], #348]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #344]\n\t"
         "ldr		r5, [%[b], #344]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #340]\n\t"
         "ldr		r5, [%[b], #340]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #336]\n\t"
         "ldr		r5, [%[b], #336]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #332]\n\t"
         "ldr		r5, [%[b], #332]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #328]\n\t"
         "ldr		r5, [%[b], #328]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #324]\n\t"
         "ldr		r5, [%[b], #324]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #320]\n\t"
         "ldr		r5, [%[b], #320]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #316]\n\t"
         "ldr		r5, [%[b], #316]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #312]\n\t"
         "ldr		r5, [%[b], #312]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #308]\n\t"
         "ldr		r5, [%[b], #308]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #304]\n\t"
         "ldr		r5, [%[b], #304]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #300]\n\t"
         "ldr		r5, [%[b], #300]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #296]\n\t"
         "ldr		r5, [%[b], #296]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #292]\n\t"
         "ldr		r5, [%[b], #292]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #288]\n\t"
         "ldr		r5, [%[b], #288]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #284]\n\t"
         "ldr		r5, [%[b], #284]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #280]\n\t"
         "ldr		r5, [%[b], #280]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #276]\n\t"
         "ldr		r5, [%[b], #276]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #272]\n\t"
         "ldr		r5, [%[b], #272]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #268]\n\t"
         "ldr		r5, [%[b], #268]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #264]\n\t"
         "ldr		r5, [%[b], #264]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #260]\n\t"
         "ldr		r5, [%[b], #260]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #256]\n\t"
         "ldr		r5, [%[b], #256]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #252]\n\t"
         "ldr		r5, [%[b], #252]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #248]\n\t"
         "ldr		r5, [%[b], #248]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #244]\n\t"
         "ldr		r5, [%[b], #244]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #240]\n\t"
         "ldr		r5, [%[b], #240]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #236]\n\t"
         "ldr		r5, [%[b], #236]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #232]\n\t"
         "ldr		r5, [%[b], #232]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #228]\n\t"
         "ldr		r5, [%[b], #228]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #224]\n\t"
         "ldr		r5, [%[b], #224]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #220]\n\t"
         "ldr		r5, [%[b], #220]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #216]\n\t"
         "ldr		r5, [%[b], #216]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #212]\n\t"
         "ldr		r5, [%[b], #212]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #208]\n\t"
         "ldr		r5, [%[b], #208]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #204]\n\t"
         "ldr		r5, [%[b], #204]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #200]\n\t"
         "ldr		r5, [%[b], #200]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #196]\n\t"
         "ldr		r5, [%[b], #196]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #192]\n\t"
         "ldr		r5, [%[b], #192]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #188]\n\t"
         "ldr		r5, [%[b], #188]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #184]\n\t"
         "ldr		r5, [%[b], #184]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #180]\n\t"
         "ldr		r5, [%[b], #180]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #176]\n\t"
         "ldr		r5, [%[b], #176]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #172]\n\t"
         "ldr		r5, [%[b], #172]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #168]\n\t"
         "ldr		r5, [%[b], #168]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #164]\n\t"
         "ldr		r5, [%[b], #164]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #160]\n\t"
         "ldr		r5, [%[b], #160]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #156]\n\t"
         "ldr		r5, [%[b], #156]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #152]\n\t"
         "ldr		r5, [%[b], #152]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #148]\n\t"
         "ldr		r5, [%[b], #148]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #144]\n\t"
         "ldr		r5, [%[b], #144]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #140]\n\t"
         "ldr		r5, [%[b], #140]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #136]\n\t"
         "ldr		r5, [%[b], #136]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #132]\n\t"
         "ldr		r5, [%[b], #132]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #128]\n\t"
         "ldr		r5, [%[b], #128]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #124]\n\t"
         "ldr		r5, [%[b], #124]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #120]\n\t"
         "ldr		r5, [%[b], #120]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #116]\n\t"
         "ldr		r5, [%[b], #116]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #112]\n\t"
         "ldr		r5, [%[b], #112]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #108]\n\t"
         "ldr		r5, [%[b], #108]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #104]\n\t"
         "ldr		r5, [%[b], #104]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #100]\n\t"
         "ldr		r5, [%[b], #100]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #96]\n\t"
         "ldr		r5, [%[b], #96]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #92]\n\t"
         "ldr		r5, [%[b], #92]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #88]\n\t"
         "ldr		r5, [%[b], #88]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #84]\n\t"
         "ldr		r5, [%[b], #84]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #80]\n\t"
         "ldr		r5, [%[b], #80]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #76]\n\t"
         "ldr		r5, [%[b], #76]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #72]\n\t"
         "ldr		r5, [%[b], #72]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #68]\n\t"
         "ldr		r5, [%[b], #68]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #64]\n\t"
         "ldr		r5, [%[b], #64]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #60]\n\t"
         "ldr		r5, [%[b], #60]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #56]\n\t"
         "ldr		r5, [%[b], #56]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #52]\n\t"
         "ldr		r5, [%[b], #52]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #48]\n\t"
         "ldr		r5, [%[b], #48]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #44]\n\t"
         "ldr		r5, [%[b], #44]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #40]\n\t"
         "ldr		r5, [%[b], #40]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #36]\n\t"
         "ldr		r5, [%[b], #36]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #32]\n\t"
         "ldr		r5, [%[b], #32]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #28]\n\t"
         "ldr		r5, [%[b], #28]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #24]\n\t"
         "ldr		r5, [%[b], #24]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #20]\n\t"
         "ldr		r5, [%[b], #20]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #16]\n\t"
         "ldr		r5, [%[b], #16]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #12]\n\t"
         "ldr		r5, [%[b], #12]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #8]\n\t"
         "ldr		r5, [%[b], #8]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #4]\n\t"
         "ldr		r5, [%[b], #4]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #0]\n\t"
         "ldr		r5, [%[b], #0]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #endif
 
@@ -15969,7 +18523,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_div_96(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[192], t2[97];
@@ -15978,6 +18532,7 @@
 
     (void)m;
 
+
     div = d[95];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
     for (i=95; i>=0; i--) {
@@ -15993,7 +18548,7 @@
     }
 
     r1 = sp_3072_cmp_96(t1, d) >= 0;
-    sp_3072_cond_sub_96(r, t1, t2, (sp_digit)0 - r1);
+    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -16005,11 +18560,12 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_mod_96(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_96(a, m, NULL, r);
 }
 
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
@@ -16019,7 +18575,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_div_96_cond(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[192], t2[97];
@@ -16028,6 +18584,7 @@
 
     (void)m;
 
+
     div = d[95];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
     for (i=95; i>=0; i--) {
@@ -16044,7 +18601,7 @@
     }
 
     r1 = sp_3072_cmp_96(t1, d) >= 0;
-    sp_3072_cond_sub_96(r, t1, t2, (sp_digit)0 - r1);
+    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -16056,12 +18613,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_96_cond(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 #ifdef WOLFSSL_SP_SMALL
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
@@ -16072,8 +18630,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_96(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][192];
@@ -16092,27 +18650,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 192;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_96(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 96);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+        if (reduceA != 0) {
             err = sp_3072_mod_96(t[1] + 96, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_96(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
@@ -16138,9 +18697,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 28;
-        n <<= 4;
-        c = 28;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -16171,7 +18737,7 @@
             sp_3072_mont_mul_96(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
         sp_3072_mont_reduce_96(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
@@ -16179,8 +18745,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -16195,8 +18762,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_96(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][192];
@@ -16215,27 +18782,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 192;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_96(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 96);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+        if (reduceA != 0) {
             err = sp_3072_mod_96(t[1] + 96, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_96(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
@@ -16277,9 +18845,16 @@
 
         i = (bits - 1) / 32;
         n = e[i--];
-        y = n >> 27;
-        n <<= 5;
-        c = 27;
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -16310,12 +18885,8 @@
 
             sp_3072_mont_mul_96(r, r, t[y], m, mp);
         }
-        y = e[0] & 0x3;
-        sp_3072_mont_sqr_96(r, r, m, mp);
-        sp_3072_mont_sqr_96(r, r, m, mp);
-        sp_3072_mont_mul_96(r, r, t[y], m, mp);
-
-        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
         sp_3072_mont_reduce_96(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
@@ -16323,14 +18894,15 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
 }
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
 
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
@@ -16348,15 +18920,15 @@
 int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[192], md[96], rd[192];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[192], m[96], r[192];
 #else
     sp_digit* d = NULL;
-#endif
     sp_digit* a;
-    sp_digit *ah;
     sp_digit* m;
     sp_digit* r;
+#endif
+    sp_digit *ah;
     sp_digit e[1];
     int err = MP_OKAY;
 
@@ -16366,10 +18938,10 @@
                                                      mp_count_bits(mm) != 3072))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -16378,26 +18950,24 @@
         a = d;
         r = a + 96 * 2;
         m = r + 96 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
         ah = a + 96;
-    }
-#else
-    a = ad;
-    m = md;
-    r = rd;
-    ah = a + 96;
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_from_bin(ah, 96, in, inLen);
 #if DIGIT_BIT >= 32
         e[0] = em->dp[0];
 #else
         e[0] = em->dp[0];
-        if (em->used > 1)
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_3072_from_mp(m, 96, mm);
@@ -16423,25 +18993,30 @@
             err = sp_3072_mod_96_cond(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=31; i>=0; i--)
-                    if (e[0] >> i)
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
                         break;
+                    }
+                }
 
                 XMEMCPY(r, a, sizeof(sp_digit) * 96);
                 for (i--; i>=0; i--) {
                     sp_3072_mont_sqr_96(r, r, m, mp);
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_3072_mont_mul_96(r, r, a, m, mp);
+                    }
                 }
                 XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
                 sp_3072_mont_reduce_96(r, m, mp);
 
                 for (i = 95; i > 0; i--) {
-                    if (r[i] != m[i])
+                    if (r[i] != m[i]) {
                         break;
+                    }
                 }
-                if (r[i] >= m[i])
+                if (r[i] >= m[i]) {
                     sp_3072_sub_in_place_96(r, m);
+                }
             }
         }
     }
@@ -16451,14 +19026,359 @@
         *outLen = 384;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
 #endif
 
     return err;
 }
 
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 384U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+           err = MP_READ_E;
+        }
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 96;
+        m = a + 192;
+        r = a;
+
+        sp_3072_from_bin(a, 96, in, inLen);
+        sp_3072_from_mp(d, 96, dm);
+        sp_3072_from_mp(m, 96, mm);
+        err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 96);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r9, #0\n\t"
+        "mov	r8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	r4, [%[a], r8]\n\t"
+        "ldr	r5, [%[b], r8]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adc	%[c], r9, r9\n\t"
+        "str	r4, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, #192\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#else
+    __asm__ __volatile__ (
+
+        "mov	r9, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r6, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r6, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r6, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r7, [%[b], #52]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "str	r6, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r6, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r7, [%[b], #60]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "str	r6, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r6, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "str	r6, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r6, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r7, [%[b], #76]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "str	r6, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r6, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "str	r6, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r6, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r7, [%[b], #92]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r6, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r6, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r7, [%[b], #100]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "str	r6, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r6, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r7, [%[b], #108]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "str	r6, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r6, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r7, [%[b], #116]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "str	r6, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r6, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r7, [%[b], #124]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "str	r6, [%[r], #124]\n\t"
+        "ldr	r4, [%[a], #128]\n\t"
+        "ldr	r6, [%[a], #132]\n\t"
+        "ldr	r5, [%[b], #128]\n\t"
+        "ldr	r7, [%[b], #132]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #128]\n\t"
+        "str	r6, [%[r], #132]\n\t"
+        "ldr	r4, [%[a], #136]\n\t"
+        "ldr	r6, [%[a], #140]\n\t"
+        "ldr	r5, [%[b], #136]\n\t"
+        "ldr	r7, [%[b], #140]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "str	r6, [%[r], #140]\n\t"
+        "ldr	r4, [%[a], #144]\n\t"
+        "ldr	r6, [%[a], #148]\n\t"
+        "ldr	r5, [%[b], #144]\n\t"
+        "ldr	r7, [%[b], #148]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #144]\n\t"
+        "str	r6, [%[r], #148]\n\t"
+        "ldr	r4, [%[a], #152]\n\t"
+        "ldr	r6, [%[a], #156]\n\t"
+        "ldr	r5, [%[b], #152]\n\t"
+        "ldr	r7, [%[b], #156]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #152]\n\t"
+        "str	r6, [%[r], #156]\n\t"
+        "ldr	r4, [%[a], #160]\n\t"
+        "ldr	r6, [%[a], #164]\n\t"
+        "ldr	r5, [%[b], #160]\n\t"
+        "ldr	r7, [%[b], #164]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "str	r6, [%[r], #164]\n\t"
+        "ldr	r4, [%[a], #168]\n\t"
+        "ldr	r6, [%[a], #172]\n\t"
+        "ldr	r5, [%[b], #168]\n\t"
+        "ldr	r7, [%[b], #172]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #168]\n\t"
+        "str	r6, [%[r], #172]\n\t"
+        "ldr	r4, [%[a], #176]\n\t"
+        "ldr	r6, [%[a], #180]\n\t"
+        "ldr	r5, [%[b], #176]\n\t"
+        "ldr	r7, [%[b], #180]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #176]\n\t"
+        "str	r6, [%[r], #180]\n\t"
+        "ldr	r4, [%[a], #184]\n\t"
+        "ldr	r6, [%[a], #188]\n\t"
+        "ldr	r5, [%[b], #184]\n\t"
+        "ldr	r7, [%[b], #188]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "str	r6, [%[r], #188]\n\t"
+        "adc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#endif /* WOLFSSL_SP_SMALL */
+
+    return c;
+}
+
 /* RSA private key operation.
  *
  * in      Array of bytes representing the number to exponentiate, base.
@@ -16480,23 +19400,22 @@
     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[96 * 2];
-    sp_digit pd[48], qd[48], dpd[48];
-    sp_digit tmpad[96], tmpbd[96];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[96 * 2];
+    sp_digit p[48], q[48], dp[48];
+    sp_digit tmpa[96], tmpb[96];
 #else
     sp_digit* t = NULL;
-#endif
     sp_digit* a;
     sp_digit* p;
     sp_digit* q;
     sp_digit* dp;
-    sp_digit* dq;
-    sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
+#endif
     sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
     sp_digit c;
     int err = MP_OKAY;
 
@@ -16508,10 +19427,10 @@
     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (t == NULL)
             err = MEMORY_E;
     }
@@ -16523,20 +19442,16 @@
         tmpa = qi + 48;
         tmpb = tmpa + 96;
 
-        tmp = t;
-        r = tmp + 96;
-    }
-#else
-    r = a = ad;
-    p = pd;
-    q = qd;
-    qi = dq = dp = dpd;
-    tmpa = tmpad;
-    tmpb = tmpbd;
-    tmp = a + 96;
-#endif
-
-    if (err == MP_OKAY) {
+        r = t + 96;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
         sp_3072_from_bin(a, 96, in, inLen);
         sp_3072_from_mp(p, 48, pm);
         sp_3072_from_mp(q, 48, qm);
@@ -16551,8 +19466,8 @@
 
     if (err == MP_OKAY) {
         c = sp_3072_sub_in_place_48(tmpa, tmpb);
-        sp_3072_mask_48(tmp, p, c);
-        sp_3072_add_48(tmpa, tmpa, tmp);
+        c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
+        sp_3072_cond_add_48(tmpa, tmpa, p, c);
 
         sp_3072_from_mp(qi, 48, qim);
         sp_3072_mul_48(tmpa, tmpa, qi);
@@ -16568,34 +19483,37 @@
         *outLen = 384;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-#else
-    XMEMSET(tmpad, 0, sizeof(tmpad));
-    XMEMSET(tmpbd, 0, sizeof(tmpbd));
-    XMEMSET(pd, 0, sizeof(pd));
-    XMEMSET(qd, 0, sizeof(qd));
-    XMEMSET(dpd, 0, sizeof(dpd));
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
 #endif
 
     return err;
 }
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_3072_to_mp(sp_digit* a, mp_int* r)
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 32
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
         r->used = 96;
@@ -16605,14 +19523,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 96; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 32) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 32 - s;
         }
@@ -16625,15 +19548,16 @@
         for (i = 0; i < 96; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 32 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 32
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 32 - s;
             }
-            else
+            else {
                 s += 32;
+            }
         }
         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -16649,7 +19573,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -16659,12 +19583,23 @@
     sp_digit* r = b;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_3072_from_mp(b, 96, base);
         sp_3072_from_mp(e, 96, exp);
         sp_3072_from_mp(m, 96, mod);
@@ -16681,6 +19616,706 @@
     return err;
 }
 
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #31\n\t"
+        "sub	r6, r6, %[n]\n\t"
+        "ldr	r3, [%[a], #380]\n\t"
+        "lsr	r4, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r4, r4, r6\n\t"
+        "ldr	r2, [%[a], #376]\n\t"
+        "str	r4, [%[r], #384]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #372]\n\t"
+        "str	r3, [%[r], #380]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #368]\n\t"
+        "str	r2, [%[r], #376]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #364]\n\t"
+        "str	r4, [%[r], #372]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #360]\n\t"
+        "str	r3, [%[r], #368]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #356]\n\t"
+        "str	r2, [%[r], #364]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #352]\n\t"
+        "str	r4, [%[r], #360]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #348]\n\t"
+        "str	r3, [%[r], #356]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #344]\n\t"
+        "str	r2, [%[r], #352]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #340]\n\t"
+        "str	r4, [%[r], #348]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #336]\n\t"
+        "str	r3, [%[r], #344]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #332]\n\t"
+        "str	r2, [%[r], #340]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #328]\n\t"
+        "str	r4, [%[r], #336]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #324]\n\t"
+        "str	r3, [%[r], #332]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #320]\n\t"
+        "str	r2, [%[r], #328]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #316]\n\t"
+        "str	r4, [%[r], #324]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #312]\n\t"
+        "str	r3, [%[r], #320]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #308]\n\t"
+        "str	r2, [%[r], #316]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #304]\n\t"
+        "str	r4, [%[r], #312]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #300]\n\t"
+        "str	r3, [%[r], #308]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #296]\n\t"
+        "str	r2, [%[r], #304]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #292]\n\t"
+        "str	r4, [%[r], #300]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #288]\n\t"
+        "str	r3, [%[r], #296]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #284]\n\t"
+        "str	r2, [%[r], #292]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #280]\n\t"
+        "str	r4, [%[r], #288]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #276]\n\t"
+        "str	r3, [%[r], #284]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #272]\n\t"
+        "str	r2, [%[r], #280]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #268]\n\t"
+        "str	r4, [%[r], #276]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #264]\n\t"
+        "str	r3, [%[r], #272]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #260]\n\t"
+        "str	r2, [%[r], #268]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #256]\n\t"
+        "str	r4, [%[r], #264]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #252]\n\t"
+        "str	r3, [%[r], #260]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #248]\n\t"
+        "str	r2, [%[r], #256]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #244]\n\t"
+        "str	r4, [%[r], #252]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #240]\n\t"
+        "str	r3, [%[r], #248]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #236]\n\t"
+        "str	r2, [%[r], #244]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #232]\n\t"
+        "str	r4, [%[r], #240]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #228]\n\t"
+        "str	r3, [%[r], #236]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #224]\n\t"
+        "str	r2, [%[r], #232]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #220]\n\t"
+        "str	r4, [%[r], #228]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #216]\n\t"
+        "str	r3, [%[r], #224]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #212]\n\t"
+        "str	r2, [%[r], #220]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #208]\n\t"
+        "str	r4, [%[r], #216]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #204]\n\t"
+        "str	r3, [%[r], #212]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #200]\n\t"
+        "str	r2, [%[r], #208]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #196]\n\t"
+        "str	r4, [%[r], #204]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #192]\n\t"
+        "str	r3, [%[r], #200]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #188]\n\t"
+        "str	r2, [%[r], #196]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #184]\n\t"
+        "str	r4, [%[r], #192]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #180]\n\t"
+        "str	r3, [%[r], #188]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #176]\n\t"
+        "str	r2, [%[r], #184]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #172]\n\t"
+        "str	r4, [%[r], #180]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #168]\n\t"
+        "str	r3, [%[r], #176]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #164]\n\t"
+        "str	r2, [%[r], #172]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #160]\n\t"
+        "str	r4, [%[r], #168]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #156]\n\t"
+        "str	r3, [%[r], #164]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #152]\n\t"
+        "str	r2, [%[r], #160]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #148]\n\t"
+        "str	r4, [%[r], #156]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #144]\n\t"
+        "str	r3, [%[r], #152]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #140]\n\t"
+        "str	r2, [%[r], #148]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #136]\n\t"
+        "str	r4, [%[r], #144]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #132]\n\t"
+        "str	r3, [%[r], #140]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #128]\n\t"
+        "str	r2, [%[r], #136]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #124]\n\t"
+        "str	r4, [%[r], #132]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "str	r3, [%[r], #128]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #116]\n\t"
+        "str	r2, [%[r], #124]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #112]\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "str	r3, [%[r], #116]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "str	r2, [%[r], #112]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #100]\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "str	r3, [%[r], #104]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #92]\n\t"
+        "str	r2, [%[r], #100]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #88]\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "str	r3, [%[r], #92]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "str	r2, [%[r], #88]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #76]\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "str	r3, [%[r], #80]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #68]\n\t"
+        "str	r2, [%[r], #76]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #64]\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #52]\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "str	r3, [%[r], #56]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #44]\n\t"
+        "str	r2, [%[r], #52]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #40]\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "str	r3, [%[r], #44]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "str	r2, [%[r], #40]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #28]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "str	r2, [%[r], #28]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "str	r2, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[192];
+    sp_digit td[97];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 192;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_3072_lshift_96(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_lshift_96(r, r, y);
+            sp_3072_mul_d_96(tmp, norm, r[96]);
+            r[96] = 0;
+            o = sp_3072_add_96(r, r, tmp);
+            sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
  * base     Base.
@@ -16690,7 +20325,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 384 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
@@ -16701,17 +20336,34 @@
     sp_digit* r = b;
     word32 i;
 
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expLen > 384) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_3072_from_mp(b, 96, base);
         sp_3072_from_bin(e, 96, exp, expLen);
         sp_3072_from_mp(m, 96, mod);
 
-        err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
+            err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+
     }
 
     if (err == MP_OKAY) {
@@ -16728,238 +20380,160 @@
 
     return err;
 }
-
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_3072 */
-
-#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
-#ifdef WOLFSSL_HAVE_SP_ECC
-#ifndef WOLFSSL_SP_NO_256
-
-/* Point structure to use. */
-typedef struct sp_point {
-    sp_digit x[2 * 8];
-    sp_digit y[2 * 8];
-    sp_digit z[2 * 8];
-    int infinity;
-} sp_point;
-
-/* The modulus (prime) of the curve P256. */
-static sp_digit p256_mod[8] = {
-    0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
-    0x00000001,0xffffffff
-};
-/* The Montogmery normalizer for modulus of the curve P256. */
-static sp_digit p256_norm_mod[8] = {
-    0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
-    0xfffffffe,0x00000000
-};
-/* The Montogmery multiplier for modulus of the curve P256. */
-static sp_digit p256_mp_mod = 0x00000001;
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
-                                            defined(HAVE_ECC_VERIFY)
-/* The order of the curve P256. */
-static sp_digit p256_order[8] = {
-    0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
-    0x00000000,0xffffffff
-};
-#endif
-/* The order of the curve P256 minus 2. */
-static sp_digit p256_order2[8] = {
-    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
-    0x00000000,0xffffffff
-};
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* The Montogmery normalizer for order of the curve P256. */
-static sp_digit p256_norm_order[8] = {
-    0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
-    0xffffffff,0x00000000
-};
-#endif
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* The Montogmery multiplier for order of the curve P256. */
-static sp_digit p256_mp_order = 0xee00bc4f;
-#endif
-/* The base point of curve P256. */
-static sp_point p256_base = {
-    /* X ordinate */
-    {
-        0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
-        0xe12c4247,0x6b17d1f2
-    },
-    /* Y ordinate */
-    {
-        0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
-        0xfe1a7f9b,0x4fe342e2
-    },
-    /* Z ordinate */
-    {
-        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
-        0x00000000,0x00000000
-    },
-    /* infinity */
-    0
-};
-#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
-static sp_digit p256_b[8] = {
-    0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
-    0xaa3a93e7,0x5ac635d8
-};
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-/* Allocate memory for point and return error. */
-#define sp_ecc_point_new(heap, sp, p)                                   \
-    ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
-        MEMORY_E : MP_OKAY
-#else
-/* Set pointer to data and return no error. */
-#define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-/* If valid pointer then clear point data if requested and free data. */
-#define sp_ecc_point_free(p, clear, heap)     \
-    do {                                      \
-        if (p != NULL) {                      \
-            if (clear)                        \
-                XMEMSET(p, 0, sizeof(*p));    \
-            XFREE(p, heap, DYNAMIC_TYPE_ECC); \
-        }                                     \
-    }                                         \
-    while (0)
-#else
-/* Clear point data if requested. */
-#define sp_ecc_point_free(p, clear, heap) \
-    do {                                  \
-        if (clear)                        \
-            XMEMSET(p, 0, sizeof(*p));    \
-    }                                     \
-    while (0)
-#endif
-
-/* Multiply a number by Montogmery normalizer mod modulus (prime).
- *
- * r  The resulting Montgomery form number.
- * a  The number to convert.
- * m  The modulus (prime).
- */
-static int sp_256_mod_mul_norm_8(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    int64_t t[8];
-    int64_t a64[8];
-    int64_t o;
-
-    (void)m;
-
-    a64[0] = a[0];
-    a64[1] = a[1];
-    a64[2] = a[2];
-    a64[3] = a[3];
-    a64[4] = a[4];
-    a64[5] = a[5];
-    a64[6] = a[6];
-    a64[7] = a[7];
-
-    /*  1  1  0 -1 -1 -1 -1  0 */
-    t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
-    /*  0  1  1  0 -1 -1 -1 -1 */
-    t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
-    /*  0  0  1  1  0 -1 -1 -1 */
-    t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
-    /* -1 -1  0  2  2  1  0 -1 */
-    t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
-    /*  0 -1 -1  0  2  2  1  0 */
-    t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
-    /*  0  0 -1 -1  0  2  2  1 */
-    t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
-    /* -1 -1  0  0  0  1  3  2 */
-    t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
-    /*  1  0 -1 -1 -1 -1  0  3 */
-    t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
-
-    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-    o     = t[7] >> 32; t[7] &= 0xffffffff;
-    t[0] += o;
-    t[3] -= o;
-    t[6] -= o;
-    t[7] += o;
-    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-    r[0] = t[0];
-    r[1] = t[1];
-    r[2] = t[2];
-    r[3] = t[3];
-    r[4] = t[4];
-    r[5] = t[5];
-    r[6] = t[6];
-    r[7] = t[7];
-
-    return MP_OKAY;
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[96], e[48], m[48];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 48, base);
+        sp_3072_from_mp(e, 48, exp);
+        sp_3072_from_mp(m, 48, mod);
+
+        err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 48, 0, sizeof(*r) * 48U);
+        err = sp_3072_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 32
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 32
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0xffffffff;
-        s = 32 - s;
-        if (j + 1 >= max)
+        s = 32U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 32 <= DIGIT_BIT) {
-            s += 32;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
             r[j] &= 0xffffffff;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 32) {
             r[j] &= 0xffffffff;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
+            }
             s = 32 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -16970,21 +20544,53185 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Convert a point of type ecc_point to type sp_point.
- *
- * p   Point of type sp_point (result).
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 4096 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<128 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r12, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "ldr	r10, [%[b], #8]\n\t"
+        "ldr	r14, [%[b], #12]\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "str	r6, [%[r], #8]\n\t"
+        "str	r7, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "ldr	r10, [%[b], #24]\n\t"
+        "ldr	r14, [%[b], #28]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "str	r6, [%[r], #24]\n\t"
+        "str	r7, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[a], #36]\n\t"
+        "ldr	r6, [%[a], #40]\n\t"
+        "ldr	r7, [%[a], #44]\n\t"
+        "ldr	r8, [%[b], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "ldr	r10, [%[b], #40]\n\t"
+        "ldr	r14, [%[b], #44]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r5, [%[r], #36]\n\t"
+        "str	r6, [%[r], #40]\n\t"
+        "str	r7, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[a], #52]\n\t"
+        "ldr	r6, [%[a], #56]\n\t"
+        "ldr	r7, [%[a], #60]\n\t"
+        "ldr	r8, [%[b], #48]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "ldr	r10, [%[b], #56]\n\t"
+        "ldr	r14, [%[b], #60]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "str	r5, [%[r], #52]\n\t"
+        "str	r6, [%[r], #56]\n\t"
+        "str	r7, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[a], #68]\n\t"
+        "ldr	r6, [%[a], #72]\n\t"
+        "ldr	r7, [%[a], #76]\n\t"
+        "ldr	r8, [%[b], #64]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "ldr	r10, [%[b], #72]\n\t"
+        "ldr	r14, [%[b], #76]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "str	r6, [%[r], #72]\n\t"
+        "str	r7, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[a], #84]\n\t"
+        "ldr	r6, [%[a], #88]\n\t"
+        "ldr	r7, [%[a], #92]\n\t"
+        "ldr	r8, [%[b], #80]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "ldr	r10, [%[b], #88]\n\t"
+        "ldr	r14, [%[b], #92]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "str	r5, [%[r], #84]\n\t"
+        "str	r6, [%[r], #88]\n\t"
+        "str	r7, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[a], #100]\n\t"
+        "ldr	r6, [%[a], #104]\n\t"
+        "ldr	r7, [%[a], #108]\n\t"
+        "ldr	r8, [%[b], #96]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "ldr	r10, [%[b], #104]\n\t"
+        "ldr	r14, [%[b], #108]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "str	r5, [%[r], #100]\n\t"
+        "str	r6, [%[r], #104]\n\t"
+        "str	r7, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[a], #116]\n\t"
+        "ldr	r6, [%[a], #120]\n\t"
+        "ldr	r7, [%[a], #124]\n\t"
+        "ldr	r8, [%[b], #112]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "ldr	r10, [%[b], #120]\n\t"
+        "ldr	r14, [%[b], #124]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "str	r5, [%[r], #116]\n\t"
+        "str	r6, [%[r], #120]\n\t"
+        "str	r7, [%[r], #124]\n\t"
+        "ldr	r4, [%[a], #128]\n\t"
+        "ldr	r5, [%[a], #132]\n\t"
+        "ldr	r6, [%[a], #136]\n\t"
+        "ldr	r7, [%[a], #140]\n\t"
+        "ldr	r8, [%[b], #128]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "ldr	r10, [%[b], #136]\n\t"
+        "ldr	r14, [%[b], #140]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #128]\n\t"
+        "str	r5, [%[r], #132]\n\t"
+        "str	r6, [%[r], #136]\n\t"
+        "str	r7, [%[r], #140]\n\t"
+        "ldr	r4, [%[a], #144]\n\t"
+        "ldr	r5, [%[a], #148]\n\t"
+        "ldr	r6, [%[a], #152]\n\t"
+        "ldr	r7, [%[a], #156]\n\t"
+        "ldr	r8, [%[b], #144]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "ldr	r10, [%[b], #152]\n\t"
+        "ldr	r14, [%[b], #156]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #144]\n\t"
+        "str	r5, [%[r], #148]\n\t"
+        "str	r6, [%[r], #152]\n\t"
+        "str	r7, [%[r], #156]\n\t"
+        "ldr	r4, [%[a], #160]\n\t"
+        "ldr	r5, [%[a], #164]\n\t"
+        "ldr	r6, [%[a], #168]\n\t"
+        "ldr	r7, [%[a], #172]\n\t"
+        "ldr	r8, [%[b], #160]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "ldr	r10, [%[b], #168]\n\t"
+        "ldr	r14, [%[b], #172]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "str	r5, [%[r], #164]\n\t"
+        "str	r6, [%[r], #168]\n\t"
+        "str	r7, [%[r], #172]\n\t"
+        "ldr	r4, [%[a], #176]\n\t"
+        "ldr	r5, [%[a], #180]\n\t"
+        "ldr	r6, [%[a], #184]\n\t"
+        "ldr	r7, [%[a], #188]\n\t"
+        "ldr	r8, [%[b], #176]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "ldr	r10, [%[b], #184]\n\t"
+        "ldr	r14, [%[b], #188]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #176]\n\t"
+        "str	r5, [%[r], #180]\n\t"
+        "str	r6, [%[r], #184]\n\t"
+        "str	r7, [%[r], #188]\n\t"
+        "ldr	r4, [%[a], #192]\n\t"
+        "ldr	r5, [%[a], #196]\n\t"
+        "ldr	r6, [%[a], #200]\n\t"
+        "ldr	r7, [%[a], #204]\n\t"
+        "ldr	r8, [%[b], #192]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "ldr	r10, [%[b], #200]\n\t"
+        "ldr	r14, [%[b], #204]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #192]\n\t"
+        "str	r5, [%[r], #196]\n\t"
+        "str	r6, [%[r], #200]\n\t"
+        "str	r7, [%[r], #204]\n\t"
+        "ldr	r4, [%[a], #208]\n\t"
+        "ldr	r5, [%[a], #212]\n\t"
+        "ldr	r6, [%[a], #216]\n\t"
+        "ldr	r7, [%[a], #220]\n\t"
+        "ldr	r8, [%[b], #208]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "ldr	r10, [%[b], #216]\n\t"
+        "ldr	r14, [%[b], #220]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "str	r5, [%[r], #212]\n\t"
+        "str	r6, [%[r], #216]\n\t"
+        "str	r7, [%[r], #220]\n\t"
+        "ldr	r4, [%[a], #224]\n\t"
+        "ldr	r5, [%[a], #228]\n\t"
+        "ldr	r6, [%[a], #232]\n\t"
+        "ldr	r7, [%[a], #236]\n\t"
+        "ldr	r8, [%[b], #224]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "ldr	r10, [%[b], #232]\n\t"
+        "ldr	r14, [%[b], #236]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #224]\n\t"
+        "str	r5, [%[r], #228]\n\t"
+        "str	r6, [%[r], #232]\n\t"
+        "str	r7, [%[r], #236]\n\t"
+        "ldr	r4, [%[a], #240]\n\t"
+        "ldr	r5, [%[a], #244]\n\t"
+        "ldr	r6, [%[a], #248]\n\t"
+        "ldr	r7, [%[a], #252]\n\t"
+        "ldr	r8, [%[b], #240]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "ldr	r10, [%[b], #248]\n\t"
+        "ldr	r14, [%[b], #252]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #240]\n\t"
+        "str	r5, [%[r], #244]\n\t"
+        "str	r6, [%[r], #248]\n\t"
+        "str	r7, [%[r], #252]\n\t"
+        "adc	%[c], r12, r12\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return c;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer and result.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a], #0]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "subs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #0]\n\t"
+        "str	r3, [%[a], #4]\n\t"
+        "str	r4, [%[a], #8]\n\t"
+        "str	r5, [%[a], #12]\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #16]\n\t"
+        "str	r3, [%[a], #20]\n\t"
+        "str	r4, [%[a], #24]\n\t"
+        "str	r5, [%[a], #28]\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[a], #44]\n\t"
+        "ldr	r6, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "ldr	r8, [%[b], #40]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #32]\n\t"
+        "str	r3, [%[a], #36]\n\t"
+        "str	r4, [%[a], #40]\n\t"
+        "str	r5, [%[a], #44]\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[a], #60]\n\t"
+        "ldr	r6, [%[b], #48]\n\t"
+        "ldr	r7, [%[b], #52]\n\t"
+        "ldr	r8, [%[b], #56]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #48]\n\t"
+        "str	r3, [%[a], #52]\n\t"
+        "str	r4, [%[a], #56]\n\t"
+        "str	r5, [%[a], #60]\n\t"
+        "ldr	r2, [%[a], #64]\n\t"
+        "ldr	r3, [%[a], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[a], #76]\n\t"
+        "ldr	r6, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "ldr	r8, [%[b], #72]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #64]\n\t"
+        "str	r3, [%[a], #68]\n\t"
+        "str	r4, [%[a], #72]\n\t"
+        "str	r5, [%[a], #76]\n\t"
+        "ldr	r2, [%[a], #80]\n\t"
+        "ldr	r3, [%[a], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[a], #92]\n\t"
+        "ldr	r6, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "ldr	r8, [%[b], #88]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #80]\n\t"
+        "str	r3, [%[a], #84]\n\t"
+        "str	r4, [%[a], #88]\n\t"
+        "str	r5, [%[a], #92]\n\t"
+        "ldr	r2, [%[a], #96]\n\t"
+        "ldr	r3, [%[a], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[a], #108]\n\t"
+        "ldr	r6, [%[b], #96]\n\t"
+        "ldr	r7, [%[b], #100]\n\t"
+        "ldr	r8, [%[b], #104]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #96]\n\t"
+        "str	r3, [%[a], #100]\n\t"
+        "str	r4, [%[a], #104]\n\t"
+        "str	r5, [%[a], #108]\n\t"
+        "ldr	r2, [%[a], #112]\n\t"
+        "ldr	r3, [%[a], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[a], #124]\n\t"
+        "ldr	r6, [%[b], #112]\n\t"
+        "ldr	r7, [%[b], #116]\n\t"
+        "ldr	r8, [%[b], #120]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #112]\n\t"
+        "str	r3, [%[a], #116]\n\t"
+        "str	r4, [%[a], #120]\n\t"
+        "str	r5, [%[a], #124]\n\t"
+        "ldr	r2, [%[a], #128]\n\t"
+        "ldr	r3, [%[a], #132]\n\t"
+        "ldr	r4, [%[a], #136]\n\t"
+        "ldr	r5, [%[a], #140]\n\t"
+        "ldr	r6, [%[b], #128]\n\t"
+        "ldr	r7, [%[b], #132]\n\t"
+        "ldr	r8, [%[b], #136]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #128]\n\t"
+        "str	r3, [%[a], #132]\n\t"
+        "str	r4, [%[a], #136]\n\t"
+        "str	r5, [%[a], #140]\n\t"
+        "ldr	r2, [%[a], #144]\n\t"
+        "ldr	r3, [%[a], #148]\n\t"
+        "ldr	r4, [%[a], #152]\n\t"
+        "ldr	r5, [%[a], #156]\n\t"
+        "ldr	r6, [%[b], #144]\n\t"
+        "ldr	r7, [%[b], #148]\n\t"
+        "ldr	r8, [%[b], #152]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #144]\n\t"
+        "str	r3, [%[a], #148]\n\t"
+        "str	r4, [%[a], #152]\n\t"
+        "str	r5, [%[a], #156]\n\t"
+        "ldr	r2, [%[a], #160]\n\t"
+        "ldr	r3, [%[a], #164]\n\t"
+        "ldr	r4, [%[a], #168]\n\t"
+        "ldr	r5, [%[a], #172]\n\t"
+        "ldr	r6, [%[b], #160]\n\t"
+        "ldr	r7, [%[b], #164]\n\t"
+        "ldr	r8, [%[b], #168]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #160]\n\t"
+        "str	r3, [%[a], #164]\n\t"
+        "str	r4, [%[a], #168]\n\t"
+        "str	r5, [%[a], #172]\n\t"
+        "ldr	r2, [%[a], #176]\n\t"
+        "ldr	r3, [%[a], #180]\n\t"
+        "ldr	r4, [%[a], #184]\n\t"
+        "ldr	r5, [%[a], #188]\n\t"
+        "ldr	r6, [%[b], #176]\n\t"
+        "ldr	r7, [%[b], #180]\n\t"
+        "ldr	r8, [%[b], #184]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #176]\n\t"
+        "str	r3, [%[a], #180]\n\t"
+        "str	r4, [%[a], #184]\n\t"
+        "str	r5, [%[a], #188]\n\t"
+        "ldr	r2, [%[a], #192]\n\t"
+        "ldr	r3, [%[a], #196]\n\t"
+        "ldr	r4, [%[a], #200]\n\t"
+        "ldr	r5, [%[a], #204]\n\t"
+        "ldr	r6, [%[b], #192]\n\t"
+        "ldr	r7, [%[b], #196]\n\t"
+        "ldr	r8, [%[b], #200]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #192]\n\t"
+        "str	r3, [%[a], #196]\n\t"
+        "str	r4, [%[a], #200]\n\t"
+        "str	r5, [%[a], #204]\n\t"
+        "ldr	r2, [%[a], #208]\n\t"
+        "ldr	r3, [%[a], #212]\n\t"
+        "ldr	r4, [%[a], #216]\n\t"
+        "ldr	r5, [%[a], #220]\n\t"
+        "ldr	r6, [%[b], #208]\n\t"
+        "ldr	r7, [%[b], #212]\n\t"
+        "ldr	r8, [%[b], #216]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #208]\n\t"
+        "str	r3, [%[a], #212]\n\t"
+        "str	r4, [%[a], #216]\n\t"
+        "str	r5, [%[a], #220]\n\t"
+        "ldr	r2, [%[a], #224]\n\t"
+        "ldr	r3, [%[a], #228]\n\t"
+        "ldr	r4, [%[a], #232]\n\t"
+        "ldr	r5, [%[a], #236]\n\t"
+        "ldr	r6, [%[b], #224]\n\t"
+        "ldr	r7, [%[b], #228]\n\t"
+        "ldr	r8, [%[b], #232]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #224]\n\t"
+        "str	r3, [%[a], #228]\n\t"
+        "str	r4, [%[a], #232]\n\t"
+        "str	r5, [%[a], #236]\n\t"
+        "ldr	r2, [%[a], #240]\n\t"
+        "ldr	r3, [%[a], #244]\n\t"
+        "ldr	r4, [%[a], #248]\n\t"
+        "ldr	r5, [%[a], #252]\n\t"
+        "ldr	r6, [%[b], #240]\n\t"
+        "ldr	r7, [%[b], #244]\n\t"
+        "ldr	r8, [%[b], #248]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #240]\n\t"
+        "str	r3, [%[a], #244]\n\t"
+        "str	r4, [%[a], #248]\n\t"
+        "str	r5, [%[a], #252]\n\t"
+        "ldr	r2, [%[a], #256]\n\t"
+        "ldr	r3, [%[a], #260]\n\t"
+        "ldr	r4, [%[a], #264]\n\t"
+        "ldr	r5, [%[a], #268]\n\t"
+        "ldr	r6, [%[b], #256]\n\t"
+        "ldr	r7, [%[b], #260]\n\t"
+        "ldr	r8, [%[b], #264]\n\t"
+        "ldr	r9, [%[b], #268]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #256]\n\t"
+        "str	r3, [%[a], #260]\n\t"
+        "str	r4, [%[a], #264]\n\t"
+        "str	r5, [%[a], #268]\n\t"
+        "ldr	r2, [%[a], #272]\n\t"
+        "ldr	r3, [%[a], #276]\n\t"
+        "ldr	r4, [%[a], #280]\n\t"
+        "ldr	r5, [%[a], #284]\n\t"
+        "ldr	r6, [%[b], #272]\n\t"
+        "ldr	r7, [%[b], #276]\n\t"
+        "ldr	r8, [%[b], #280]\n\t"
+        "ldr	r9, [%[b], #284]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #272]\n\t"
+        "str	r3, [%[a], #276]\n\t"
+        "str	r4, [%[a], #280]\n\t"
+        "str	r5, [%[a], #284]\n\t"
+        "ldr	r2, [%[a], #288]\n\t"
+        "ldr	r3, [%[a], #292]\n\t"
+        "ldr	r4, [%[a], #296]\n\t"
+        "ldr	r5, [%[a], #300]\n\t"
+        "ldr	r6, [%[b], #288]\n\t"
+        "ldr	r7, [%[b], #292]\n\t"
+        "ldr	r8, [%[b], #296]\n\t"
+        "ldr	r9, [%[b], #300]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #288]\n\t"
+        "str	r3, [%[a], #292]\n\t"
+        "str	r4, [%[a], #296]\n\t"
+        "str	r5, [%[a], #300]\n\t"
+        "ldr	r2, [%[a], #304]\n\t"
+        "ldr	r3, [%[a], #308]\n\t"
+        "ldr	r4, [%[a], #312]\n\t"
+        "ldr	r5, [%[a], #316]\n\t"
+        "ldr	r6, [%[b], #304]\n\t"
+        "ldr	r7, [%[b], #308]\n\t"
+        "ldr	r8, [%[b], #312]\n\t"
+        "ldr	r9, [%[b], #316]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #304]\n\t"
+        "str	r3, [%[a], #308]\n\t"
+        "str	r4, [%[a], #312]\n\t"
+        "str	r5, [%[a], #316]\n\t"
+        "ldr	r2, [%[a], #320]\n\t"
+        "ldr	r3, [%[a], #324]\n\t"
+        "ldr	r4, [%[a], #328]\n\t"
+        "ldr	r5, [%[a], #332]\n\t"
+        "ldr	r6, [%[b], #320]\n\t"
+        "ldr	r7, [%[b], #324]\n\t"
+        "ldr	r8, [%[b], #328]\n\t"
+        "ldr	r9, [%[b], #332]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #320]\n\t"
+        "str	r3, [%[a], #324]\n\t"
+        "str	r4, [%[a], #328]\n\t"
+        "str	r5, [%[a], #332]\n\t"
+        "ldr	r2, [%[a], #336]\n\t"
+        "ldr	r3, [%[a], #340]\n\t"
+        "ldr	r4, [%[a], #344]\n\t"
+        "ldr	r5, [%[a], #348]\n\t"
+        "ldr	r6, [%[b], #336]\n\t"
+        "ldr	r7, [%[b], #340]\n\t"
+        "ldr	r8, [%[b], #344]\n\t"
+        "ldr	r9, [%[b], #348]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #336]\n\t"
+        "str	r3, [%[a], #340]\n\t"
+        "str	r4, [%[a], #344]\n\t"
+        "str	r5, [%[a], #348]\n\t"
+        "ldr	r2, [%[a], #352]\n\t"
+        "ldr	r3, [%[a], #356]\n\t"
+        "ldr	r4, [%[a], #360]\n\t"
+        "ldr	r5, [%[a], #364]\n\t"
+        "ldr	r6, [%[b], #352]\n\t"
+        "ldr	r7, [%[b], #356]\n\t"
+        "ldr	r8, [%[b], #360]\n\t"
+        "ldr	r9, [%[b], #364]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #352]\n\t"
+        "str	r3, [%[a], #356]\n\t"
+        "str	r4, [%[a], #360]\n\t"
+        "str	r5, [%[a], #364]\n\t"
+        "ldr	r2, [%[a], #368]\n\t"
+        "ldr	r3, [%[a], #372]\n\t"
+        "ldr	r4, [%[a], #376]\n\t"
+        "ldr	r5, [%[a], #380]\n\t"
+        "ldr	r6, [%[b], #368]\n\t"
+        "ldr	r7, [%[b], #372]\n\t"
+        "ldr	r8, [%[b], #376]\n\t"
+        "ldr	r9, [%[b], #380]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #368]\n\t"
+        "str	r3, [%[a], #372]\n\t"
+        "str	r4, [%[a], #376]\n\t"
+        "str	r5, [%[a], #380]\n\t"
+        "ldr	r2, [%[a], #384]\n\t"
+        "ldr	r3, [%[a], #388]\n\t"
+        "ldr	r4, [%[a], #392]\n\t"
+        "ldr	r5, [%[a], #396]\n\t"
+        "ldr	r6, [%[b], #384]\n\t"
+        "ldr	r7, [%[b], #388]\n\t"
+        "ldr	r8, [%[b], #392]\n\t"
+        "ldr	r9, [%[b], #396]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #384]\n\t"
+        "str	r3, [%[a], #388]\n\t"
+        "str	r4, [%[a], #392]\n\t"
+        "str	r5, [%[a], #396]\n\t"
+        "ldr	r2, [%[a], #400]\n\t"
+        "ldr	r3, [%[a], #404]\n\t"
+        "ldr	r4, [%[a], #408]\n\t"
+        "ldr	r5, [%[a], #412]\n\t"
+        "ldr	r6, [%[b], #400]\n\t"
+        "ldr	r7, [%[b], #404]\n\t"
+        "ldr	r8, [%[b], #408]\n\t"
+        "ldr	r9, [%[b], #412]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #400]\n\t"
+        "str	r3, [%[a], #404]\n\t"
+        "str	r4, [%[a], #408]\n\t"
+        "str	r5, [%[a], #412]\n\t"
+        "ldr	r2, [%[a], #416]\n\t"
+        "ldr	r3, [%[a], #420]\n\t"
+        "ldr	r4, [%[a], #424]\n\t"
+        "ldr	r5, [%[a], #428]\n\t"
+        "ldr	r6, [%[b], #416]\n\t"
+        "ldr	r7, [%[b], #420]\n\t"
+        "ldr	r8, [%[b], #424]\n\t"
+        "ldr	r9, [%[b], #428]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #416]\n\t"
+        "str	r3, [%[a], #420]\n\t"
+        "str	r4, [%[a], #424]\n\t"
+        "str	r5, [%[a], #428]\n\t"
+        "ldr	r2, [%[a], #432]\n\t"
+        "ldr	r3, [%[a], #436]\n\t"
+        "ldr	r4, [%[a], #440]\n\t"
+        "ldr	r5, [%[a], #444]\n\t"
+        "ldr	r6, [%[b], #432]\n\t"
+        "ldr	r7, [%[b], #436]\n\t"
+        "ldr	r8, [%[b], #440]\n\t"
+        "ldr	r9, [%[b], #444]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #432]\n\t"
+        "str	r3, [%[a], #436]\n\t"
+        "str	r4, [%[a], #440]\n\t"
+        "str	r5, [%[a], #444]\n\t"
+        "ldr	r2, [%[a], #448]\n\t"
+        "ldr	r3, [%[a], #452]\n\t"
+        "ldr	r4, [%[a], #456]\n\t"
+        "ldr	r5, [%[a], #460]\n\t"
+        "ldr	r6, [%[b], #448]\n\t"
+        "ldr	r7, [%[b], #452]\n\t"
+        "ldr	r8, [%[b], #456]\n\t"
+        "ldr	r9, [%[b], #460]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #448]\n\t"
+        "str	r3, [%[a], #452]\n\t"
+        "str	r4, [%[a], #456]\n\t"
+        "str	r5, [%[a], #460]\n\t"
+        "ldr	r2, [%[a], #464]\n\t"
+        "ldr	r3, [%[a], #468]\n\t"
+        "ldr	r4, [%[a], #472]\n\t"
+        "ldr	r5, [%[a], #476]\n\t"
+        "ldr	r6, [%[b], #464]\n\t"
+        "ldr	r7, [%[b], #468]\n\t"
+        "ldr	r8, [%[b], #472]\n\t"
+        "ldr	r9, [%[b], #476]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #464]\n\t"
+        "str	r3, [%[a], #468]\n\t"
+        "str	r4, [%[a], #472]\n\t"
+        "str	r5, [%[a], #476]\n\t"
+        "ldr	r2, [%[a], #480]\n\t"
+        "ldr	r3, [%[a], #484]\n\t"
+        "ldr	r4, [%[a], #488]\n\t"
+        "ldr	r5, [%[a], #492]\n\t"
+        "ldr	r6, [%[b], #480]\n\t"
+        "ldr	r7, [%[b], #484]\n\t"
+        "ldr	r8, [%[b], #488]\n\t"
+        "ldr	r9, [%[b], #492]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #480]\n\t"
+        "str	r3, [%[a], #484]\n\t"
+        "str	r4, [%[a], #488]\n\t"
+        "str	r5, [%[a], #492]\n\t"
+        "ldr	r2, [%[a], #496]\n\t"
+        "ldr	r3, [%[a], #500]\n\t"
+        "ldr	r4, [%[a], #504]\n\t"
+        "ldr	r5, [%[a], #508]\n\t"
+        "ldr	r6, [%[b], #496]\n\t"
+        "ldr	r7, [%[b], #500]\n\t"
+        "ldr	r8, [%[b], #504]\n\t"
+        "ldr	r9, [%[b], #508]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #496]\n\t"
+        "str	r3, [%[a], #500]\n\t"
+        "str	r4, [%[a], #504]\n\t"
+        "str	r5, [%[a], #508]\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r12, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "ldr	r10, [%[b], #8]\n\t"
+        "ldr	r14, [%[b], #12]\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "str	r6, [%[r], #8]\n\t"
+        "str	r7, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "ldr	r10, [%[b], #24]\n\t"
+        "ldr	r14, [%[b], #28]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "str	r6, [%[r], #24]\n\t"
+        "str	r7, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[a], #36]\n\t"
+        "ldr	r6, [%[a], #40]\n\t"
+        "ldr	r7, [%[a], #44]\n\t"
+        "ldr	r8, [%[b], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "ldr	r10, [%[b], #40]\n\t"
+        "ldr	r14, [%[b], #44]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r5, [%[r], #36]\n\t"
+        "str	r6, [%[r], #40]\n\t"
+        "str	r7, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[a], #52]\n\t"
+        "ldr	r6, [%[a], #56]\n\t"
+        "ldr	r7, [%[a], #60]\n\t"
+        "ldr	r8, [%[b], #48]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "ldr	r10, [%[b], #56]\n\t"
+        "ldr	r14, [%[b], #60]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "str	r5, [%[r], #52]\n\t"
+        "str	r6, [%[r], #56]\n\t"
+        "str	r7, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[a], #68]\n\t"
+        "ldr	r6, [%[a], #72]\n\t"
+        "ldr	r7, [%[a], #76]\n\t"
+        "ldr	r8, [%[b], #64]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "ldr	r10, [%[b], #72]\n\t"
+        "ldr	r14, [%[b], #76]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "str	r6, [%[r], #72]\n\t"
+        "str	r7, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[a], #84]\n\t"
+        "ldr	r6, [%[a], #88]\n\t"
+        "ldr	r7, [%[a], #92]\n\t"
+        "ldr	r8, [%[b], #80]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "ldr	r10, [%[b], #88]\n\t"
+        "ldr	r14, [%[b], #92]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "str	r5, [%[r], #84]\n\t"
+        "str	r6, [%[r], #88]\n\t"
+        "str	r7, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[a], #100]\n\t"
+        "ldr	r6, [%[a], #104]\n\t"
+        "ldr	r7, [%[a], #108]\n\t"
+        "ldr	r8, [%[b], #96]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "ldr	r10, [%[b], #104]\n\t"
+        "ldr	r14, [%[b], #108]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "str	r5, [%[r], #100]\n\t"
+        "str	r6, [%[r], #104]\n\t"
+        "str	r7, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[a], #116]\n\t"
+        "ldr	r6, [%[a], #120]\n\t"
+        "ldr	r7, [%[a], #124]\n\t"
+        "ldr	r8, [%[b], #112]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "ldr	r10, [%[b], #120]\n\t"
+        "ldr	r14, [%[b], #124]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "str	r5, [%[r], #116]\n\t"
+        "str	r6, [%[r], #120]\n\t"
+        "str	r7, [%[r], #124]\n\t"
+        "ldr	r4, [%[a], #128]\n\t"
+        "ldr	r5, [%[a], #132]\n\t"
+        "ldr	r6, [%[a], #136]\n\t"
+        "ldr	r7, [%[a], #140]\n\t"
+        "ldr	r8, [%[b], #128]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "ldr	r10, [%[b], #136]\n\t"
+        "ldr	r14, [%[b], #140]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #128]\n\t"
+        "str	r5, [%[r], #132]\n\t"
+        "str	r6, [%[r], #136]\n\t"
+        "str	r7, [%[r], #140]\n\t"
+        "ldr	r4, [%[a], #144]\n\t"
+        "ldr	r5, [%[a], #148]\n\t"
+        "ldr	r6, [%[a], #152]\n\t"
+        "ldr	r7, [%[a], #156]\n\t"
+        "ldr	r8, [%[b], #144]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "ldr	r10, [%[b], #152]\n\t"
+        "ldr	r14, [%[b], #156]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #144]\n\t"
+        "str	r5, [%[r], #148]\n\t"
+        "str	r6, [%[r], #152]\n\t"
+        "str	r7, [%[r], #156]\n\t"
+        "ldr	r4, [%[a], #160]\n\t"
+        "ldr	r5, [%[a], #164]\n\t"
+        "ldr	r6, [%[a], #168]\n\t"
+        "ldr	r7, [%[a], #172]\n\t"
+        "ldr	r8, [%[b], #160]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "ldr	r10, [%[b], #168]\n\t"
+        "ldr	r14, [%[b], #172]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "str	r5, [%[r], #164]\n\t"
+        "str	r6, [%[r], #168]\n\t"
+        "str	r7, [%[r], #172]\n\t"
+        "ldr	r4, [%[a], #176]\n\t"
+        "ldr	r5, [%[a], #180]\n\t"
+        "ldr	r6, [%[a], #184]\n\t"
+        "ldr	r7, [%[a], #188]\n\t"
+        "ldr	r8, [%[b], #176]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "ldr	r10, [%[b], #184]\n\t"
+        "ldr	r14, [%[b], #188]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #176]\n\t"
+        "str	r5, [%[r], #180]\n\t"
+        "str	r6, [%[r], #184]\n\t"
+        "str	r7, [%[r], #188]\n\t"
+        "ldr	r4, [%[a], #192]\n\t"
+        "ldr	r5, [%[a], #196]\n\t"
+        "ldr	r6, [%[a], #200]\n\t"
+        "ldr	r7, [%[a], #204]\n\t"
+        "ldr	r8, [%[b], #192]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "ldr	r10, [%[b], #200]\n\t"
+        "ldr	r14, [%[b], #204]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #192]\n\t"
+        "str	r5, [%[r], #196]\n\t"
+        "str	r6, [%[r], #200]\n\t"
+        "str	r7, [%[r], #204]\n\t"
+        "ldr	r4, [%[a], #208]\n\t"
+        "ldr	r5, [%[a], #212]\n\t"
+        "ldr	r6, [%[a], #216]\n\t"
+        "ldr	r7, [%[a], #220]\n\t"
+        "ldr	r8, [%[b], #208]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "ldr	r10, [%[b], #216]\n\t"
+        "ldr	r14, [%[b], #220]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "str	r5, [%[r], #212]\n\t"
+        "str	r6, [%[r], #216]\n\t"
+        "str	r7, [%[r], #220]\n\t"
+        "ldr	r4, [%[a], #224]\n\t"
+        "ldr	r5, [%[a], #228]\n\t"
+        "ldr	r6, [%[a], #232]\n\t"
+        "ldr	r7, [%[a], #236]\n\t"
+        "ldr	r8, [%[b], #224]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "ldr	r10, [%[b], #232]\n\t"
+        "ldr	r14, [%[b], #236]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #224]\n\t"
+        "str	r5, [%[r], #228]\n\t"
+        "str	r6, [%[r], #232]\n\t"
+        "str	r7, [%[r], #236]\n\t"
+        "ldr	r4, [%[a], #240]\n\t"
+        "ldr	r5, [%[a], #244]\n\t"
+        "ldr	r6, [%[a], #248]\n\t"
+        "ldr	r7, [%[a], #252]\n\t"
+        "ldr	r8, [%[b], #240]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "ldr	r10, [%[b], #248]\n\t"
+        "ldr	r14, [%[b], #252]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #240]\n\t"
+        "str	r5, [%[r], #244]\n\t"
+        "str	r6, [%[r], #248]\n\t"
+        "str	r7, [%[r], #252]\n\t"
+        "ldr	r4, [%[a], #256]\n\t"
+        "ldr	r5, [%[a], #260]\n\t"
+        "ldr	r6, [%[a], #264]\n\t"
+        "ldr	r7, [%[a], #268]\n\t"
+        "ldr	r8, [%[b], #256]\n\t"
+        "ldr	r9, [%[b], #260]\n\t"
+        "ldr	r10, [%[b], #264]\n\t"
+        "ldr	r14, [%[b], #268]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        "str	r5, [%[r], #260]\n\t"
+        "str	r6, [%[r], #264]\n\t"
+        "str	r7, [%[r], #268]\n\t"
+        "ldr	r4, [%[a], #272]\n\t"
+        "ldr	r5, [%[a], #276]\n\t"
+        "ldr	r6, [%[a], #280]\n\t"
+        "ldr	r7, [%[a], #284]\n\t"
+        "ldr	r8, [%[b], #272]\n\t"
+        "ldr	r9, [%[b], #276]\n\t"
+        "ldr	r10, [%[b], #280]\n\t"
+        "ldr	r14, [%[b], #284]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #272]\n\t"
+        "str	r5, [%[r], #276]\n\t"
+        "str	r6, [%[r], #280]\n\t"
+        "str	r7, [%[r], #284]\n\t"
+        "ldr	r4, [%[a], #288]\n\t"
+        "ldr	r5, [%[a], #292]\n\t"
+        "ldr	r6, [%[a], #296]\n\t"
+        "ldr	r7, [%[a], #300]\n\t"
+        "ldr	r8, [%[b], #288]\n\t"
+        "ldr	r9, [%[b], #292]\n\t"
+        "ldr	r10, [%[b], #296]\n\t"
+        "ldr	r14, [%[b], #300]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #288]\n\t"
+        "str	r5, [%[r], #292]\n\t"
+        "str	r6, [%[r], #296]\n\t"
+        "str	r7, [%[r], #300]\n\t"
+        "ldr	r4, [%[a], #304]\n\t"
+        "ldr	r5, [%[a], #308]\n\t"
+        "ldr	r6, [%[a], #312]\n\t"
+        "ldr	r7, [%[a], #316]\n\t"
+        "ldr	r8, [%[b], #304]\n\t"
+        "ldr	r9, [%[b], #308]\n\t"
+        "ldr	r10, [%[b], #312]\n\t"
+        "ldr	r14, [%[b], #316]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #304]\n\t"
+        "str	r5, [%[r], #308]\n\t"
+        "str	r6, [%[r], #312]\n\t"
+        "str	r7, [%[r], #316]\n\t"
+        "ldr	r4, [%[a], #320]\n\t"
+        "ldr	r5, [%[a], #324]\n\t"
+        "ldr	r6, [%[a], #328]\n\t"
+        "ldr	r7, [%[a], #332]\n\t"
+        "ldr	r8, [%[b], #320]\n\t"
+        "ldr	r9, [%[b], #324]\n\t"
+        "ldr	r10, [%[b], #328]\n\t"
+        "ldr	r14, [%[b], #332]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #320]\n\t"
+        "str	r5, [%[r], #324]\n\t"
+        "str	r6, [%[r], #328]\n\t"
+        "str	r7, [%[r], #332]\n\t"
+        "ldr	r4, [%[a], #336]\n\t"
+        "ldr	r5, [%[a], #340]\n\t"
+        "ldr	r6, [%[a], #344]\n\t"
+        "ldr	r7, [%[a], #348]\n\t"
+        "ldr	r8, [%[b], #336]\n\t"
+        "ldr	r9, [%[b], #340]\n\t"
+        "ldr	r10, [%[b], #344]\n\t"
+        "ldr	r14, [%[b], #348]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #336]\n\t"
+        "str	r5, [%[r], #340]\n\t"
+        "str	r6, [%[r], #344]\n\t"
+        "str	r7, [%[r], #348]\n\t"
+        "ldr	r4, [%[a], #352]\n\t"
+        "ldr	r5, [%[a], #356]\n\t"
+        "ldr	r6, [%[a], #360]\n\t"
+        "ldr	r7, [%[a], #364]\n\t"
+        "ldr	r8, [%[b], #352]\n\t"
+        "ldr	r9, [%[b], #356]\n\t"
+        "ldr	r10, [%[b], #360]\n\t"
+        "ldr	r14, [%[b], #364]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #352]\n\t"
+        "str	r5, [%[r], #356]\n\t"
+        "str	r6, [%[r], #360]\n\t"
+        "str	r7, [%[r], #364]\n\t"
+        "ldr	r4, [%[a], #368]\n\t"
+        "ldr	r5, [%[a], #372]\n\t"
+        "ldr	r6, [%[a], #376]\n\t"
+        "ldr	r7, [%[a], #380]\n\t"
+        "ldr	r8, [%[b], #368]\n\t"
+        "ldr	r9, [%[b], #372]\n\t"
+        "ldr	r10, [%[b], #376]\n\t"
+        "ldr	r14, [%[b], #380]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #368]\n\t"
+        "str	r5, [%[r], #372]\n\t"
+        "str	r6, [%[r], #376]\n\t"
+        "str	r7, [%[r], #380]\n\t"
+        "ldr	r4, [%[a], #384]\n\t"
+        "ldr	r5, [%[a], #388]\n\t"
+        "ldr	r6, [%[a], #392]\n\t"
+        "ldr	r7, [%[a], #396]\n\t"
+        "ldr	r8, [%[b], #384]\n\t"
+        "ldr	r9, [%[b], #388]\n\t"
+        "ldr	r10, [%[b], #392]\n\t"
+        "ldr	r14, [%[b], #396]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #384]\n\t"
+        "str	r5, [%[r], #388]\n\t"
+        "str	r6, [%[r], #392]\n\t"
+        "str	r7, [%[r], #396]\n\t"
+        "ldr	r4, [%[a], #400]\n\t"
+        "ldr	r5, [%[a], #404]\n\t"
+        "ldr	r6, [%[a], #408]\n\t"
+        "ldr	r7, [%[a], #412]\n\t"
+        "ldr	r8, [%[b], #400]\n\t"
+        "ldr	r9, [%[b], #404]\n\t"
+        "ldr	r10, [%[b], #408]\n\t"
+        "ldr	r14, [%[b], #412]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #400]\n\t"
+        "str	r5, [%[r], #404]\n\t"
+        "str	r6, [%[r], #408]\n\t"
+        "str	r7, [%[r], #412]\n\t"
+        "ldr	r4, [%[a], #416]\n\t"
+        "ldr	r5, [%[a], #420]\n\t"
+        "ldr	r6, [%[a], #424]\n\t"
+        "ldr	r7, [%[a], #428]\n\t"
+        "ldr	r8, [%[b], #416]\n\t"
+        "ldr	r9, [%[b], #420]\n\t"
+        "ldr	r10, [%[b], #424]\n\t"
+        "ldr	r14, [%[b], #428]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #416]\n\t"
+        "str	r5, [%[r], #420]\n\t"
+        "str	r6, [%[r], #424]\n\t"
+        "str	r7, [%[r], #428]\n\t"
+        "ldr	r4, [%[a], #432]\n\t"
+        "ldr	r5, [%[a], #436]\n\t"
+        "ldr	r6, [%[a], #440]\n\t"
+        "ldr	r7, [%[a], #444]\n\t"
+        "ldr	r8, [%[b], #432]\n\t"
+        "ldr	r9, [%[b], #436]\n\t"
+        "ldr	r10, [%[b], #440]\n\t"
+        "ldr	r14, [%[b], #444]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #432]\n\t"
+        "str	r5, [%[r], #436]\n\t"
+        "str	r6, [%[r], #440]\n\t"
+        "str	r7, [%[r], #444]\n\t"
+        "ldr	r4, [%[a], #448]\n\t"
+        "ldr	r5, [%[a], #452]\n\t"
+        "ldr	r6, [%[a], #456]\n\t"
+        "ldr	r7, [%[a], #460]\n\t"
+        "ldr	r8, [%[b], #448]\n\t"
+        "ldr	r9, [%[b], #452]\n\t"
+        "ldr	r10, [%[b], #456]\n\t"
+        "ldr	r14, [%[b], #460]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #448]\n\t"
+        "str	r5, [%[r], #452]\n\t"
+        "str	r6, [%[r], #456]\n\t"
+        "str	r7, [%[r], #460]\n\t"
+        "ldr	r4, [%[a], #464]\n\t"
+        "ldr	r5, [%[a], #468]\n\t"
+        "ldr	r6, [%[a], #472]\n\t"
+        "ldr	r7, [%[a], #476]\n\t"
+        "ldr	r8, [%[b], #464]\n\t"
+        "ldr	r9, [%[b], #468]\n\t"
+        "ldr	r10, [%[b], #472]\n\t"
+        "ldr	r14, [%[b], #476]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #464]\n\t"
+        "str	r5, [%[r], #468]\n\t"
+        "str	r6, [%[r], #472]\n\t"
+        "str	r7, [%[r], #476]\n\t"
+        "ldr	r4, [%[a], #480]\n\t"
+        "ldr	r5, [%[a], #484]\n\t"
+        "ldr	r6, [%[a], #488]\n\t"
+        "ldr	r7, [%[a], #492]\n\t"
+        "ldr	r8, [%[b], #480]\n\t"
+        "ldr	r9, [%[b], #484]\n\t"
+        "ldr	r10, [%[b], #488]\n\t"
+        "ldr	r14, [%[b], #492]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #480]\n\t"
+        "str	r5, [%[r], #484]\n\t"
+        "str	r6, [%[r], #488]\n\t"
+        "str	r7, [%[r], #492]\n\t"
+        "ldr	r4, [%[a], #496]\n\t"
+        "ldr	r5, [%[a], #500]\n\t"
+        "ldr	r6, [%[a], #504]\n\t"
+        "ldr	r7, [%[a], #508]\n\t"
+        "ldr	r8, [%[b], #496]\n\t"
+        "ldr	r9, [%[b], #500]\n\t"
+        "ldr	r10, [%[b], #504]\n\t"
+        "ldr	r14, [%[b], #508]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #496]\n\t"
+        "str	r5, [%[r], #500]\n\t"
+        "str	r6, [%[r], #504]\n\t"
+        "str	r7, [%[r], #508]\n\t"
+        "adc	%[c], r12, r12\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return c;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #256\n\t"
+        "mov	r10, #0\n\t"
+        "#  A[0] * B[0]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r3, r4, r8, r9\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [sp]\n\t"
+        "#  A[0] * B[1]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[0]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #4]\n\t"
+        "#  A[0] * B[2]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[1]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[0]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #8]\n\t"
+        "#  A[0] * B[3]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[2]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[1]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[0]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #12]\n\t"
+        "#  A[0] * B[4]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[3]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[2]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[1]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[0]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #16]\n\t"
+        "#  A[0] * B[5]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[4]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[3]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[2]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[1]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[0]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #20]\n\t"
+        "#  A[0] * B[6]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[5]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[4]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[3]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[2]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[1]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[0]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #24]\n\t"
+        "#  A[0] * B[7]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[6]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[5]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[4]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[3]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[2]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[1]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[0]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #28]\n\t"
+        "#  A[0] * B[8]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[7]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[6]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[5]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[4]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[3]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[2]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[1]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[0]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #32]\n\t"
+        "#  A[0] * B[9]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[8]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[7]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[6]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[5]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[4]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[3]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[2]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[1]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[0]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #36]\n\t"
+        "#  A[0] * B[10]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[9]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[8]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[7]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[6]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[5]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[4]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[3]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[2]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[1]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[0]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #40]\n\t"
+        "#  A[0] * B[11]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[10]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[9]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[8]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[7]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[6]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[5]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[4]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[3]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[2]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[1]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[0]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #44]\n\t"
+        "#  A[0] * B[12]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[11]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[10]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[9]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[8]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[7]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[6]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[5]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[4]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[3]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[2]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[1]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[0]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #48]\n\t"
+        "#  A[0] * B[13]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[12]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[11]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[10]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[9]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[8]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[7]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[6]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[5]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[4]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[3]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[2]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[1]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[0]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #52]\n\t"
+        "#  A[0] * B[14]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[13]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[12]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[11]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[10]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[9]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[8]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[7]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[6]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[5]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[4]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[3]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[2]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[1]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[0]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #56]\n\t"
+        "#  A[0] * B[15]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[14]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[13]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[12]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[11]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[10]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[9]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[8]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[7]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[6]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[5]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[4]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[3]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[2]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[1]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[0]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #60]\n\t"
+        "#  A[0] * B[16]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[15]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[14]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[13]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[12]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[11]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[10]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[9]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[8]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[7]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[6]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[5]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[4]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[3]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[2]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[1]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[0]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #64]\n\t"
+        "#  A[0] * B[17]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[16]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[15]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[14]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[13]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[12]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[11]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[10]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[9]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[8]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[7]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[6]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[5]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[4]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[3]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[2]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[1]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[0]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #68]\n\t"
+        "#  A[0] * B[18]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[17]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[16]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[15]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[14]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[13]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[12]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[11]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[10]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[9]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[8]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[7]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[6]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[5]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[4]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[3]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[2]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[1]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[0]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #72]\n\t"
+        "#  A[0] * B[19]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[18]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[17]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[16]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[15]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[14]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[13]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[12]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[11]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[10]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[9]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[8]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[7]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[6]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[5]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[4]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[3]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[2]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[1]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[0]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #76]\n\t"
+        "#  A[0] * B[20]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[19]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[18]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[17]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[16]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[15]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[14]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[13]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[12]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[11]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[10]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[9]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[8]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[7]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[6]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[5]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[4]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[3]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[2]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[1]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[0]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #80]\n\t"
+        "#  A[0] * B[21]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[20]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[19]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[18]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[17]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[16]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[15]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[14]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[13]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[12]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[11]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[10]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[9]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[8]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[7]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[6]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[5]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[4]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[3]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[2]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[1]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[0]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #84]\n\t"
+        "#  A[0] * B[22]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[21]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[20]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[19]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[18]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[17]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[16]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[15]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[14]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[13]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[12]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[11]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[10]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[9]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[8]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[7]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[6]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[5]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[4]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[3]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[2]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[1]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[0]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #88]\n\t"
+        "#  A[0] * B[23]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[22]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[21]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[20]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[19]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[18]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[17]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[16]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[15]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[14]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[13]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[12]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[11]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[10]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[9]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[8]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[7]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[6]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[5]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[4]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[3]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[2]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[1]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[0]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #92]\n\t"
+        "#  A[0] * B[24]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[23]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[22]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[21]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[20]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[19]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[18]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[17]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[16]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[15]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[14]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[13]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[12]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[11]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[10]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[9]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[8]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[7]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[6]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[5]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[4]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[3]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[2]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[1]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[0]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #96]\n\t"
+        "#  A[0] * B[25]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[24]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[23]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[22]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[21]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[20]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[19]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[18]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[17]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[16]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[15]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[14]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[13]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[12]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[11]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[10]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[9]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[8]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[7]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[6]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[5]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[4]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[3]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[2]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[1]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[0]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #100]\n\t"
+        "#  A[0] * B[26]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[25]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[24]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[23]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[22]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[21]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[20]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[19]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[18]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[17]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[16]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[15]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[14]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[13]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[12]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[11]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[10]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[9]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[8]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[7]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[6]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[5]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[4]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[3]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[2]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[1]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[0]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #104]\n\t"
+        "#  A[0] * B[27]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[26]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[25]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[24]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[23]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[22]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[21]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[20]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[19]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[18]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[17]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[16]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[15]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[14]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[13]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[12]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[11]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[10]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[9]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[8]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[7]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[6]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[5]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[4]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[3]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[2]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[1]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[0]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #108]\n\t"
+        "#  A[0] * B[28]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[27]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[26]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[25]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[24]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[23]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[22]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[21]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[20]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[19]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[18]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[17]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[16]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[15]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[14]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[13]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[12]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[11]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[10]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[9]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[8]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[7]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[6]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[5]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[4]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[3]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[2]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[1]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[0]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #112]\n\t"
+        "#  A[0] * B[29]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[28]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[27]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[26]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[25]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[24]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[23]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[22]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[21]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[20]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[19]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[18]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[17]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[16]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[15]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[14]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[13]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[12]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[11]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[10]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[9]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[8]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[7]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[6]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[5]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[4]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[3]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[2]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[1]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[0]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #116]\n\t"
+        "#  A[0] * B[30]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[29]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[28]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[27]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[26]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[25]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[24]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[23]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[22]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[21]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[20]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[19]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[18]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[17]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[16]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[15]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[14]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[13]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[12]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[11]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[10]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[9]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[8]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[7]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[6]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[5]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[4]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[3]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[2]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[1]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[0]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #120]\n\t"
+        "#  A[0] * B[31]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[30]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[29]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[28]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[27]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[26]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[25]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[24]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[23]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[22]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[21]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[20]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[19]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[18]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[17]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[16]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[15]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[14]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[13]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[12]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[11]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[10]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[9]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[8]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[7]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[6]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[5]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[4]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[3]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[2]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[1]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[0]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #124]\n\t"
+        "#  A[0] * B[32]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[31]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[30]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[29]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[28]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[27]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[26]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[25]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[24]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[23]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[22]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[21]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[20]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[19]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[18]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[17]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[16]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[15]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[14]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[13]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[12]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[11]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[10]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[9]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[8]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[7]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[6]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[5]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[4]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[3]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[2]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[1]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[0]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #128]\n\t"
+        "#  A[0] * B[33]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[32]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[31]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[30]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[29]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[28]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[27]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[26]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[25]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[24]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[23]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[22]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[21]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[20]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[19]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[18]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[17]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[16]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[15]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[14]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[13]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[12]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[11]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[10]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[9]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[8]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[7]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[6]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[5]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[4]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[3]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[2]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[1]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[0]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #132]\n\t"
+        "#  A[0] * B[34]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[33]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[32]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[31]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[30]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[29]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[28]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[27]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[26]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[25]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[24]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[23]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[22]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[21]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[20]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[19]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[18]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[17]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[16]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[15]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[14]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[13]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[12]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[11]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[10]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[9]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[8]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[7]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[6]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[5]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[4]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[3]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[2]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[1]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[0]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #136]\n\t"
+        "#  A[0] * B[35]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[34]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[33]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[32]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[31]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[30]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[29]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[28]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[27]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[26]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[25]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[24]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[23]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[22]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[21]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[20]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[19]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[18]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[17]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[16]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[15]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[14]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[13]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[12]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[11]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[10]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[9]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[8]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[7]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[6]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[5]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[4]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[3]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[2]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[1]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[0]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #140]\n\t"
+        "#  A[0] * B[36]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[35]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[34]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[33]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[32]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[31]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[30]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[29]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[28]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[27]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[26]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[25]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[24]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[23]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[22]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[21]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[20]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[19]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[18]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[17]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[16]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[15]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[14]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[13]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[12]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[11]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[10]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[9]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[8]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[7]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[6]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[5]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[4]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[3]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[2]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[1]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[0]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #144]\n\t"
+        "#  A[0] * B[37]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[36]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[35]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[34]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[33]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[32]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[31]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[30]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[29]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[28]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[27]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[26]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[25]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[24]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[23]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[22]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[21]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[20]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[19]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[18]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[17]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[16]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[15]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[14]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[13]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[12]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[11]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[10]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[9]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[8]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[7]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[6]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[5]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[4]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[3]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[2]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[1]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[0]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #148]\n\t"
+        "#  A[0] * B[38]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[37]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[36]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[35]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[34]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[33]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[32]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[31]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[30]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[29]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[28]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[27]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[26]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[25]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[24]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[23]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[22]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[21]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[20]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[19]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[18]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[17]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[16]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[15]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[14]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[13]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[12]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[11]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[10]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[9]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[8]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[7]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[6]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[5]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[4]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[3]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[2]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[1]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[0]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #152]\n\t"
+        "#  A[0] * B[39]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[38]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[37]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[36]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[35]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[34]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[33]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[32]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[31]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[30]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[29]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[28]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[27]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[26]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[25]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[24]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[23]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[22]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[21]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[20]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[19]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[18]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[17]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[16]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[15]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[14]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[13]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[12]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[11]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[10]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[9]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[8]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[7]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[6]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[5]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[4]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[3]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[2]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[1]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[0]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #156]\n\t"
+        "#  A[0] * B[40]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[39]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[38]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[37]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[36]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[35]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[34]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[33]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[32]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[31]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[30]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[29]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[28]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[27]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[26]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[25]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[24]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[23]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[22]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[21]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[20]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[19]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[18]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[17]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[16]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[15]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[14]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[13]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[12]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[11]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[10]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[9]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[8]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[7]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[6]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[5]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[4]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[3]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[2]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[1]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[0]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #160]\n\t"
+        "#  A[0] * B[41]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[40]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[39]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[38]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[37]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[36]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[35]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[34]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[33]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[32]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[31]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[30]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[29]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[28]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[27]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[26]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[25]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[24]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[23]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[22]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[21]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[20]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[19]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[18]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[17]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[16]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[15]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[14]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[13]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[12]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[11]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[10]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[9]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[8]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[7]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[6]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[5]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[4]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[3]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[2]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[1]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[0]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #164]\n\t"
+        "#  A[0] * B[42]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[41]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[40]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[39]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[38]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[37]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[36]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[35]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[34]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[33]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[32]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[31]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[30]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[29]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[28]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[27]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[26]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[25]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[24]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[23]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[22]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[21]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[20]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[19]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[18]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[17]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[16]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[15]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[14]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[13]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[12]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[11]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[10]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[9]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[8]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[7]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[6]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[5]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[4]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[3]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[2]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[1]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[0]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #168]\n\t"
+        "#  A[0] * B[43]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[42]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[41]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[40]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[39]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[38]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[37]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[36]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[35]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[34]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[33]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[32]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[31]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[30]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[29]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[28]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[27]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[26]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[25]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[24]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[23]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[22]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[21]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[20]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[19]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[18]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[17]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[16]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[15]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[14]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[13]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[12]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[11]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[10]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[9]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[8]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[7]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[6]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[5]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[4]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[3]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[2]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[1]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[0]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #172]\n\t"
+        "#  A[0] * B[44]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[43]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[42]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[41]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[40]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[39]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[38]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[37]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[36]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[35]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[34]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[33]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[32]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[31]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[30]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[29]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[28]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[27]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[26]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[25]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[24]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[23]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[22]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[21]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[20]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[19]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[18]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[17]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[16]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[15]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[14]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[13]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[12]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[11]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[10]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[9]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[8]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[7]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[6]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[5]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[4]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[3]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[2]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[1]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[0]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #176]\n\t"
+        "#  A[0] * B[45]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[44]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[43]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[42]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[41]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[40]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[39]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[38]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[37]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[36]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[35]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[34]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[33]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[32]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[31]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[30]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[29]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[28]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[27]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[26]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[25]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[24]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[23]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[22]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[21]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[20]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[19]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[18]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[17]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[16]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[15]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[14]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[13]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[12]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[11]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[10]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[9]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[8]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[7]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[6]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[5]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[4]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[3]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[2]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[1]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[0]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #180]\n\t"
+        "#  A[0] * B[46]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[45]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[44]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[43]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[42]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[41]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[40]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[39]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[38]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[37]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[36]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[35]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[34]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[33]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[32]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[31]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[30]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[29]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[28]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[27]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[26]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[25]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[24]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[23]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[22]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[21]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[20]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[19]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[18]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[17]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[16]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[15]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[14]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[13]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[12]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[11]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[10]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[9]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[8]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[7]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[6]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[5]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[4]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[3]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[2]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[1]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[0]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #184]\n\t"
+        "#  A[0] * B[47]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[46]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[45]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[44]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[43]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[42]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[41]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[40]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[39]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[38]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[37]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[36]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[35]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[34]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[33]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[32]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[31]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[30]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[29]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[28]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[27]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[26]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[25]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[24]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[23]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[22]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[21]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[20]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[19]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[18]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[17]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[16]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[15]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[14]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[13]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[12]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[11]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[10]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[9]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[8]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[7]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[6]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[5]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[4]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[3]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[2]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[1]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[0]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #188]\n\t"
+        "#  A[0] * B[48]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[47]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[46]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[45]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[44]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[43]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[42]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[41]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[40]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[39]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[38]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[37]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[36]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[35]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[34]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[33]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[32]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[31]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[30]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[29]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[28]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[27]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[26]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[25]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[24]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[23]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[22]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[21]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[20]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[19]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[18]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[17]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[16]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[15]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[14]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[13]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[12]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[11]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[10]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[9]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[8]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[7]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[6]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[5]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[4]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[3]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[2]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[1]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[0]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #192]\n\t"
+        "#  A[0] * B[49]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[48]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[47]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[46]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[45]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[44]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[43]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[42]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[41]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[40]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[39]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[38]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[37]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[36]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[35]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[34]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[33]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[32]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[31]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[30]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[29]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[28]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[27]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[26]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[25]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[24]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[23]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[22]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[21]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[20]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[19]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[18]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[17]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[16]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[15]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[14]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[13]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[12]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[11]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[10]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[9]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[8]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[7]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[6]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[5]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[4]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[3]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[2]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[1]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[0]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #196]\n\t"
+        "#  A[0] * B[50]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[49]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[48]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[47]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[46]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[45]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[44]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[43]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[42]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[41]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[40]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[39]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[38]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[37]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[36]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[35]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[34]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[33]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[32]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[31]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[30]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[29]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[28]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[27]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[26]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[25]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[24]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[23]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[22]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[21]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[20]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[19]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[18]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[17]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[16]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[15]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[14]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[13]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[12]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[11]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[10]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[9]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[8]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[7]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[6]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[5]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[4]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[3]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[2]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[1]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[0]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #200]\n\t"
+        "#  A[0] * B[51]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[50]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[49]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[48]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[47]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[46]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[45]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[44]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[43]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[42]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[41]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[40]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[39]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[38]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[37]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[36]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[35]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[34]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[33]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[32]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[31]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[30]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[29]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[28]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[27]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[26]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[25]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[24]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[23]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[22]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[21]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[20]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[19]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[18]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[17]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[16]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[15]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[14]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[13]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[12]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[11]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[10]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[9]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[8]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[7]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[6]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[5]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[4]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[3]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[2]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[1]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[0]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #204]\n\t"
+        "#  A[0] * B[52]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[51]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[50]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[49]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[48]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[47]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[46]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[45]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[44]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[43]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[42]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[41]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[40]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[39]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[38]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[37]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[36]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[35]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[34]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[33]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[32]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[31]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[30]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[29]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[28]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[27]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[26]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[25]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[24]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[23]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[22]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[21]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[20]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[19]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[18]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[17]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[16]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[15]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[14]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[13]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[12]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[11]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[10]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[9]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[8]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[7]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[6]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[5]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[4]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[3]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[2]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[1]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[0]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #208]\n\t"
+        "#  A[0] * B[53]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[52]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[51]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[50]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[49]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[48]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[47]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[46]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[45]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[44]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[43]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[42]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[41]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[40]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[39]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[38]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[37]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[36]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[35]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[34]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[33]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[32]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[31]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[30]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[29]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[28]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[27]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[26]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[25]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[24]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[23]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[22]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[21]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[20]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[19]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[18]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[17]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[16]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[15]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[14]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[13]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[12]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[11]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[10]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[9]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[8]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[7]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[6]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[5]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[4]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[3]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[2]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[1]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[0]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #212]\n\t"
+        "#  A[0] * B[54]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[53]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[52]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[51]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[50]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[49]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[48]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[47]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[46]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[45]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[44]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[43]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[42]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[41]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[40]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[39]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[38]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[37]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[36]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[35]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[34]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[33]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[32]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[31]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[30]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[29]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[28]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[27]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[26]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[25]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[24]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[23]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[22]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[21]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[20]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[19]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[18]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[17]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[16]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[15]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[14]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[13]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[12]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[11]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[10]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[9]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[8]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[7]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[6]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[5]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[4]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[3]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[2]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[1]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[0]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #216]\n\t"
+        "#  A[0] * B[55]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[54]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[53]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[52]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[51]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[50]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[49]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[48]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[47]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[46]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[45]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[44]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[43]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[42]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[41]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[40]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[39]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[38]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[37]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[36]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[35]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[34]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[33]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[32]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[31]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[30]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[29]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[28]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[27]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[26]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[25]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[24]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[23]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[22]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[21]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[20]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[19]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[18]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[17]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[16]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[15]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[14]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[13]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[12]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[11]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[10]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[9]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[8]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[7]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[6]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[5]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[4]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[3]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[2]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[1]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[0]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #220]\n\t"
+        "#  A[0] * B[56]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[55]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[54]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[53]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[52]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[51]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[50]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[49]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[48]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[47]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[46]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[45]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[44]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[43]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[42]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[41]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[40]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[39]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[38]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[37]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[36]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[35]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[34]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[33]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[32]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[31]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[30]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[29]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[28]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[27]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[26]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[25]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[24]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[23]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[22]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[21]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[20]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[19]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[18]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[17]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[16]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[15]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[14]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[13]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[12]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[11]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[10]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[9]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[8]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[7]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[6]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[5]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[4]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[3]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[2]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[1]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[0]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #224]\n\t"
+        "#  A[0] * B[57]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[56]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[55]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[54]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[53]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[52]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[51]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[50]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[49]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[48]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[47]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[46]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[45]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[44]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[43]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[42]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[41]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[40]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[39]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[38]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[37]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[36]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[35]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[34]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[33]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[32]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[31]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[30]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[29]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[28]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[27]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[26]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[25]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[24]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[23]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[22]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[21]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[20]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[19]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[18]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[17]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[16]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[15]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[14]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[13]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[12]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[11]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[10]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[9]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[8]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[7]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[6]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[5]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[4]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[3]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[2]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[1]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[0]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #228]\n\t"
+        "#  A[0] * B[58]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[57]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[56]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[55]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[54]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[53]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[52]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[51]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[50]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[49]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[48]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[47]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[46]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[45]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[44]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[43]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[42]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[41]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[40]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[39]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[38]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[37]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[36]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[35]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[34]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[33]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[32]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[31]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[30]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[29]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[28]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[27]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[26]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[25]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[24]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[23]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[22]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[21]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[20]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[19]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[18]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[17]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[16]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[15]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[14]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[13]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[12]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[11]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[10]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[9]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[8]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[7]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[6]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[5]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[4]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[3]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[2]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[1]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[0]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #232]\n\t"
+        "#  A[0] * B[59]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[58]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[57]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[56]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[55]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[54]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[53]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[52]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[51]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[50]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[49]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[48]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[47]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[46]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[45]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[44]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[43]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[42]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[41]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[40]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[39]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[38]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[37]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[36]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[35]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[34]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[33]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[32]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[31]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[30]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[29]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[28]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[27]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[26]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[25]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[24]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[23]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[22]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[21]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[20]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[19]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[18]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[17]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[16]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[15]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[14]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[13]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[12]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[11]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[10]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[9]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[8]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[7]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[6]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[5]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[4]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[3]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[2]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[1]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[0]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #236]\n\t"
+        "#  A[0] * B[60]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[59]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[58]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[57]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[56]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[55]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[54]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[53]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[52]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[51]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[50]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[49]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[48]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[47]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[46]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[45]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[44]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[43]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[42]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[41]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[40]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[39]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[38]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[37]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[36]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[35]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[34]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[33]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[32]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[31]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[30]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[29]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[28]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[27]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[26]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[25]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[24]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[23]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[22]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[21]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[20]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[19]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[18]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[17]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[16]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[15]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[14]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[13]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[12]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[11]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[10]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[9]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[8]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[7]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[6]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[5]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[4]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[3]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[2]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[1]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[0]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #240]\n\t"
+        "#  A[0] * B[61]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[60]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[59]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[58]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[57]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[56]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[55]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[54]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[53]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[52]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[51]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[50]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[49]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[48]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[47]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[46]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[45]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[44]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[43]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[42]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[41]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[40]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[39]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[38]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[37]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[36]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[35]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[34]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[33]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[32]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[31]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[30]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[29]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[28]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[27]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[26]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[25]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[24]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[23]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[22]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[21]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[20]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[19]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[18]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[17]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[16]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[15]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[14]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[13]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[12]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[11]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[10]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[9]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[8]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[7]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[6]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[5]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[4]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[3]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[2]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[1]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[0]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #244]\n\t"
+        "#  A[0] * B[62]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[61]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[60]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[59]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[58]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[57]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[56]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[55]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[54]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[53]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[52]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[51]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[50]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[49]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[48]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[47]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[46]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[45]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[44]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[43]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[42]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[41]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[40]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[39]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[38]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[37]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[36]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[35]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[34]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[33]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[32]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[31]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[30]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[29]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[28]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[27]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[26]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[25]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[24]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[23]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[22]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[21]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[20]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[19]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[18]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[17]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[16]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[15]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[14]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[13]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[12]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[11]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[10]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[9]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[8]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[7]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[6]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[5]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[4]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[3]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[2]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[1]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[0]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #248]\n\t"
+        "#  A[0] * B[63]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[62]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[61]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[60]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[59]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[58]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[57]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[56]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[55]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[54]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[53]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[52]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[51]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[50]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[49]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[48]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[47]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[46]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[45]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[44]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[43]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[42]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[41]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[40]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[39]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[38]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[37]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[36]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[35]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[34]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[33]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[32]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[31]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[30]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[29]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[28]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[27]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[26]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[25]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[24]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[23]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[22]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[21]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[20]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[19]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[18]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[17]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[16]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[15]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[14]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[13]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[12]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[11]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[10]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[9]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[8]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[7]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[6]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[5]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[4]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[3]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[2]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[1]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[0]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #252]\n\t"
+        "#  A[1] * B[63]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[2] * B[62]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[61]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[60]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[59]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[58]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[57]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[56]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[55]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[54]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[53]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[52]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[51]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[50]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[49]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[48]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[47]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[46]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[45]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[44]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[43]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[42]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[41]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[40]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[39]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[38]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[37]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[36]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[35]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[34]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[33]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[32]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[31]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[30]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[29]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[28]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[27]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[26]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[25]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[24]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[23]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[22]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[21]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[20]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[19]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[18]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[17]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[16]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[15]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[14]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[13]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[12]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[11]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[10]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[9]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[8]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[7]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[6]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[5]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[4]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[3]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[2]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[1]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        "#  A[2] * B[63]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[3] * B[62]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[61]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[60]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[59]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[58]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[57]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[56]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[55]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[54]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[53]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[52]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[51]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[50]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[49]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[48]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[47]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[46]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[45]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[44]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[43]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[42]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[41]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[40]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[39]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[38]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[37]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[36]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[35]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[34]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[33]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[32]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[31]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[30]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[29]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[28]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[27]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[26]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[25]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[24]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[23]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[22]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[21]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[20]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[19]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[18]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[17]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[16]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[15]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[14]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[13]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[12]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[11]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[10]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[9]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[8]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[7]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[6]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[5]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[4]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[3]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[2]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #260]\n\t"
+        "#  A[3] * B[63]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[4] * B[62]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[61]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[60]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[59]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[58]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[57]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[56]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[55]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[54]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[53]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[52]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[51]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[50]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[49]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[48]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[47]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[46]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[45]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[44]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[43]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[42]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[41]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[40]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[39]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[38]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[37]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[36]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[35]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[34]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[33]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[32]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[31]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[30]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[29]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[28]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[27]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[26]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[25]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[24]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[23]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[22]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[21]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[20]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[19]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[18]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[17]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[16]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[15]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[14]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[13]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[12]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[11]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[10]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[9]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[8]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[7]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[6]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[5]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[4]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[3]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #264]\n\t"
+        "#  A[4] * B[63]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[5] * B[62]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[61]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[60]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[59]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[58]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[57]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[56]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[55]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[54]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[53]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[52]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[51]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[50]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[49]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[48]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[47]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[46]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[45]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[44]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[43]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[42]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[41]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[40]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[39]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[38]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[37]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[36]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[35]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[34]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[33]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[32]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[31]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[30]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[29]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[28]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[27]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[26]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[25]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[24]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[23]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[22]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[21]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[20]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[19]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[18]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[17]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[16]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[15]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[14]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[13]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[12]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[11]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[10]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[9]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[8]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[7]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[6]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[5]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[4]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #268]\n\t"
+        "#  A[5] * B[63]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[6] * B[62]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[61]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[60]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[59]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[58]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[57]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[56]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[55]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[54]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[53]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[52]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[51]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[50]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[49]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[48]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[47]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[46]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[45]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[44]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[43]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[42]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[41]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[40]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[39]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[38]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[37]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[36]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[35]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[34]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[33]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[32]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[31]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[30]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[29]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[28]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[27]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[26]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[25]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[24]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[23]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[22]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[21]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[20]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[19]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[18]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[17]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[16]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[15]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[14]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[13]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[12]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[11]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[10]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[9]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[8]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[7]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[6]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[5]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #272]\n\t"
+        "#  A[6] * B[63]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[7] * B[62]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[61]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[60]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[59]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[58]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[57]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[56]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[55]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[54]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[53]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[52]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[51]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[50]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[49]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[48]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[47]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[46]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[45]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[44]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[43]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[42]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[41]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[40]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[39]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[38]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[37]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[36]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[35]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[34]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[33]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[32]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[31]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[30]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[29]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[28]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[27]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[26]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[25]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[24]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[23]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[22]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[21]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[20]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[19]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[18]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[17]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[16]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[15]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[14]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[13]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[12]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[11]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[10]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[9]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[8]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[7]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[6]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #276]\n\t"
+        "#  A[7] * B[63]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[8] * B[62]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[61]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[60]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[59]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[58]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[57]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[56]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[55]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[54]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[53]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[52]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[51]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[50]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[49]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[48]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[47]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[46]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[45]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[44]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[43]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[42]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[41]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[40]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[39]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[38]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[37]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[36]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[35]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[34]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[33]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[32]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[31]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[30]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[29]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[28]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[27]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[26]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[25]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[24]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[23]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[22]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[21]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[20]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[19]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[18]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[17]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[16]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[15]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[14]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[13]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[12]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[11]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[10]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[9]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[8]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[7]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #280]\n\t"
+        "#  A[8] * B[63]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[9] * B[62]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[61]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[60]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[12] * B[59]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[58]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[57]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[56]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[55]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[54]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[53]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[52]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[51]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[50]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[49]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[48]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[47]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[46]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[45]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[44]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[43]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[42]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[41]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[40]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[39]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[38]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[37]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[36]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[35]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[34]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[33]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[32]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[31]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[30]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[29]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[28]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[27]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[26]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[25]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[24]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[23]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[22]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[21]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[20]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[19]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[18]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[17]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[16]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[15]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[14]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[13]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[12]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[11]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[10]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[9]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[8]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #284]\n\t"
+        "#  A[9] * B[63]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[10] * B[62]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[61]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[12] * B[60]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[13] * B[59]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[58]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[57]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[56]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[55]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[54]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[53]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[52]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[51]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[50]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[49]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[48]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[47]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[46]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[45]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[44]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[43]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[42]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[41]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[40]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[39]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[38]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[37]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[36]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[35]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[34]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[33]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[32]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[31]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[30]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[29]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[28]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[27]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[26]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[25]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[24]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[23]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[22]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[21]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[20]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[19]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[18]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[17]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[16]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[15]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[14]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[13]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[12]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[11]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[10]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[9]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #288]\n\t"
+        "#  A[10] * B[63]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[11] * B[62]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[12] * B[61]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[13] * B[60]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[14] * B[59]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[58]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[57]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[56]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[55]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[54]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[53]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[52]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[51]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[50]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[49]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[48]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[47]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[46]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[45]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[44]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[43]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[42]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[41]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[40]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[39]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[38]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[37]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[36]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[35]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[34]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[33]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[32]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[31]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[30]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[29]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[28]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[27]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[26]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[25]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[24]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[23]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[22]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[21]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[20]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[19]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[18]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[17]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[16]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[15]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[14]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[13]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[12]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[11]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[10]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #292]\n\t"
+        "#  A[11] * B[63]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[12] * B[62]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[13] * B[61]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[14] * B[60]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[15] * B[59]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[58]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[57]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[56]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[55]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[54]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[53]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[52]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[51]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[50]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[49]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[48]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[47]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[46]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[45]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[44]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[43]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[42]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[41]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[40]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[39]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[38]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[37]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[36]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[35]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[34]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[33]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[32]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[31]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[30]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[29]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[28]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[27]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[26]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[25]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[24]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[23]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[22]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[21]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[20]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[19]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[18]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[17]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[16]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[15]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[14]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[13]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[12]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[11]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #296]\n\t"
+        "#  A[12] * B[63]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[13] * B[62]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[14] * B[61]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[15] * B[60]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[16] * B[59]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[58]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[57]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[56]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[55]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[54]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[53]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[52]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[51]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[50]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[49]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[48]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[47]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[46]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[45]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[44]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[43]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[42]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[41]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[40]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[39]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[38]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[37]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[36]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[35]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[34]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[33]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[32]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[31]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[30]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[29]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[28]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[27]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[26]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[25]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[24]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[23]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[22]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[21]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[20]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[19]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[18]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[17]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[16]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[15]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[14]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[13]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[12]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #48]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #300]\n\t"
+        "#  A[13] * B[63]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[14] * B[62]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[15] * B[61]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[16] * B[60]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[17] * B[59]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[58]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[57]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[56]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[55]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[54]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[53]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[52]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[51]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[50]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[49]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[48]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[47]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[46]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[45]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[44]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[43]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[42]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[41]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[40]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[39]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[38]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[37]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[36]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[35]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[34]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[33]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[32]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[31]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[30]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[29]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[28]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[27]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[26]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[25]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[24]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[23]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[22]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[21]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[20]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[19]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[18]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[17]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[16]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[15]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[14]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[13]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #52]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #304]\n\t"
+        "#  A[14] * B[63]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[15] * B[62]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[16] * B[61]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[17] * B[60]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[18] * B[59]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[58]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[57]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[56]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[55]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[54]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[53]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[52]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[51]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[50]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[49]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[48]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[47]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[46]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[45]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[44]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[43]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[42]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[41]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[40]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[39]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[38]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[37]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[36]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[35]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[34]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[33]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[32]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[31]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[30]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[29]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[28]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[27]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[26]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[25]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[24]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[23]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[22]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[21]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[20]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[19]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[18]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[17]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[16]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[15]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[14]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #56]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #308]\n\t"
+        "#  A[15] * B[63]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[16] * B[62]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[17] * B[61]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[18] * B[60]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[19] * B[59]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[58]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[57]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[56]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[55]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[54]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[53]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[52]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[51]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[50]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[49]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[48]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[47]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[46]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[45]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[44]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[43]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[42]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[41]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[40]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[39]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[38]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[37]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[36]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[35]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[34]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[33]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[32]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[31]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[30]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[29]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[28]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[27]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[26]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[25]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[24]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[23]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[22]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[21]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[20]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[19]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[18]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[17]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[16]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[15]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #60]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #312]\n\t"
+        "#  A[16] * B[63]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[17] * B[62]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[18] * B[61]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[19] * B[60]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[20] * B[59]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[58]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[57]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[56]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[55]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[54]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[53]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[52]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[51]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[50]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[49]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[48]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[47]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[46]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[45]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[44]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[43]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[42]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[41]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[40]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[39]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[38]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[37]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[36]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[35]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[34]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[33]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[32]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[31]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[30]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[29]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[28]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[27]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[26]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[25]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[24]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[23]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[22]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[21]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[20]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[19]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[18]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[17]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[16]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #64]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #316]\n\t"
+        "#  A[17] * B[63]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[18] * B[62]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[19] * B[61]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[20] * B[60]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[21] * B[59]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[58]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[57]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[56]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[55]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[54]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[53]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[52]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[51]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[50]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[49]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[48]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[47]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[46]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[45]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[44]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[43]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[42]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[41]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[40]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[39]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[38]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[37]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[36]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[35]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[34]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[33]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[32]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[31]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[30]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[29]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[28]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[27]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[26]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[25]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[24]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[23]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[22]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[21]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[20]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[19]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[18]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[17]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #68]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #320]\n\t"
+        "#  A[18] * B[63]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[19] * B[62]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[20] * B[61]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[21] * B[60]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[22] * B[59]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[58]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[57]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[56]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[55]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[54]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[53]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[52]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[51]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[50]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[49]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[48]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[47]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[46]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[45]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[44]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[43]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[42]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[41]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[40]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[39]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[38]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[37]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[36]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[35]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[34]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[33]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[32]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[31]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[30]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[29]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[28]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[27]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[26]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[25]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[24]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[23]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[22]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[21]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[20]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[19]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[18]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #72]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #324]\n\t"
+        "#  A[19] * B[63]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[20] * B[62]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[21] * B[61]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[22] * B[60]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[23] * B[59]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[58]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[57]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[56]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[55]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[54]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[53]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[52]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[51]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[50]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[49]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[48]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[47]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[46]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[45]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[44]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[43]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[42]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[41]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[40]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[39]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[38]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[37]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[36]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[35]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[34]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[33]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[32]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[31]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[30]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[29]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[28]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[27]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[26]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[25]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[24]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[23]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[22]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[21]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[20]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[19]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #76]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #328]\n\t"
+        "#  A[20] * B[63]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[21] * B[62]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[22] * B[61]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[23] * B[60]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[24] * B[59]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[58]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[57]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[56]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[55]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[54]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[53]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[52]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[51]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[50]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[49]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[48]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[47]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[46]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[45]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[44]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[43]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[42]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[41]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[40]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[39]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[38]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[37]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[36]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[35]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[34]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[33]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[32]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[31]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[30]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[29]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[28]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[27]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[26]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[25]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[24]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[23]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[22]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[21]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[20]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #80]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #332]\n\t"
+        "#  A[21] * B[63]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[22] * B[62]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[23] * B[61]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[24] * B[60]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[25] * B[59]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[58]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[57]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[56]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[55]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[54]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[53]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[52]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[51]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[50]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[49]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[48]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[47]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[46]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[45]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[44]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[43]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[42]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[41]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[40]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[39]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[38]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[37]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[36]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[35]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[34]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[33]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[32]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[31]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[30]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[29]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[28]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[27]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[26]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[25]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[24]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[23]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[22]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[21]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #84]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #336]\n\t"
+        "#  A[22] * B[63]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[23] * B[62]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[24] * B[61]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[25] * B[60]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[26] * B[59]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[58]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[57]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[56]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[55]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[54]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[53]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[52]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[51]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[50]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[49]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[48]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[47]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[46]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[45]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[44]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[43]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[42]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[41]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[40]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[39]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[38]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[37]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[36]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[35]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[34]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[33]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[32]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[31]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[30]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[29]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[28]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[27]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[26]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[25]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[24]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[23]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[22]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #88]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #340]\n\t"
+        "#  A[23] * B[63]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[24] * B[62]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[25] * B[61]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[26] * B[60]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[27] * B[59]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[58]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[57]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[56]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[55]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[54]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[53]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[52]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[51]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[50]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[49]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[48]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[47]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[46]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[45]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[44]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[43]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[42]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[41]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[40]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[39]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[38]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[37]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[36]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[35]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[34]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[33]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[32]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[31]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[30]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[29]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[28]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[27]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[26]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[25]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[24]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[23]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #92]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #344]\n\t"
+        "#  A[24] * B[63]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[25] * B[62]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[26] * B[61]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[27] * B[60]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[28] * B[59]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[58]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[57]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[56]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[55]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[54]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[53]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[52]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[51]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[50]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[49]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[48]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[47]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[46]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[45]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[44]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[43]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[42]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[41]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[40]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[39]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[38]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[37]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[36]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[35]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[34]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[33]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[32]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[31]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[30]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[29]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[28]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[27]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[26]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[25]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[24]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #96]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #348]\n\t"
+        "#  A[25] * B[63]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[26] * B[62]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[27] * B[61]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[28] * B[60]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[29] * B[59]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[58]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[57]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[56]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[55]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[54]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[53]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[52]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[51]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[50]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[49]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[48]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[47]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[46]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[45]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[44]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[43]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[42]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[41]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[40]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[39]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[38]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[37]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[36]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[35]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[34]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[33]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[32]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[31]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[30]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[29]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[28]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[27]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[26]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[25]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #100]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #352]\n\t"
+        "#  A[26] * B[63]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[27] * B[62]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[28] * B[61]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[29] * B[60]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[30] * B[59]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[58]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[57]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[56]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[55]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[54]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[53]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[52]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[51]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[50]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[49]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[48]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[47]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[46]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[45]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[44]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[43]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[42]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[41]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[40]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[39]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[38]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[37]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[36]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[35]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[34]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[33]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[32]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[31]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[30]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[29]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[28]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[27]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[26]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #104]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #356]\n\t"
+        "#  A[27] * B[63]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[28] * B[62]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[29] * B[61]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[30] * B[60]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[31] * B[59]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[58]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[57]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[56]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[55]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[54]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[53]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[52]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[51]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[50]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[49]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[48]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[47]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[46]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[45]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[44]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[43]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[42]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[41]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[40]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[39]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[38]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[37]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[36]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[35]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[34]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[33]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[32]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[31]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[30]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[29]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[28]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[27]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #108]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #360]\n\t"
+        "#  A[28] * B[63]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[29] * B[62]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[30] * B[61]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[31] * B[60]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[32] * B[59]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[58]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[57]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[56]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[55]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[54]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[53]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[52]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[51]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[50]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[49]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[48]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[47]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[46]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[45]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[44]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[43]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[42]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[41]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[40]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[39]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[38]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[37]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[36]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[35]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[34]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[33]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[32]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[31]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[30]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[29]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[28]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #112]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #364]\n\t"
+        "#  A[29] * B[63]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[30] * B[62]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[31] * B[61]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[32] * B[60]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[33] * B[59]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[58]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[57]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[56]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[55]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[54]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[53]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[52]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[51]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[50]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[49]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[48]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[47]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[46]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[45]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[44]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[43]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[42]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[41]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[40]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[39]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[38]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[37]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[36]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[35]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[34]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[33]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[32]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[31]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[30]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[29]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #116]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #368]\n\t"
+        "#  A[30] * B[63]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[31] * B[62]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[32] * B[61]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[33] * B[60]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[34] * B[59]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[58]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[57]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[56]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[55]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[54]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[53]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[52]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[51]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[50]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[49]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[48]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[47]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[46]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[45]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[44]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[43]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[42]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[41]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[40]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[39]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[38]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[37]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[36]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[35]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[34]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[33]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[32]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[31]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[30]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #120]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #372]\n\t"
+        "#  A[31] * B[63]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[32] * B[62]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[33] * B[61]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[34] * B[60]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[35] * B[59]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[58]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[57]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[56]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[55]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[54]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[53]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[52]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[51]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[50]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[49]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[48]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[47]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[46]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[45]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[44]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[43]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[42]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[41]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[40]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[39]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[38]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[37]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[36]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[35]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[34]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[33]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[32]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[31]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #124]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #376]\n\t"
+        "#  A[32] * B[63]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[33] * B[62]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[34] * B[61]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[35] * B[60]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[36] * B[59]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[58]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[57]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[56]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[55]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[54]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[53]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[52]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[51]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[50]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[49]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[48]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[47]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[46]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[45]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[44]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[43]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[42]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[41]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[40]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[39]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[38]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[37]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[36]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[35]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[34]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[33]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[32]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #128]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #380]\n\t"
+        "#  A[33] * B[63]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[34] * B[62]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[35] * B[61]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[36] * B[60]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[37] * B[59]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[58]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[57]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[56]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[55]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[54]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[53]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[52]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[51]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[50]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[49]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[48]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[47]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[46]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[45]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[44]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[43]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[42]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[41]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[40]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[39]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[38]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[37]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[36]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[35]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[34]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[33]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #132]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #384]\n\t"
+        "#  A[34] * B[63]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[35] * B[62]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[36] * B[61]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[37] * B[60]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[38] * B[59]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[58]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[57]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[56]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[55]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[54]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[53]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[52]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[51]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[50]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[49]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[48]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[47]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[46]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[45]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[44]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[43]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[42]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[41]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[40]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[39]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[38]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[37]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[36]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[35]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[34]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #136]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #388]\n\t"
+        "#  A[35] * B[63]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[36] * B[62]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[37] * B[61]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[38] * B[60]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[39] * B[59]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[58]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[57]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[56]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[55]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[54]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[53]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[52]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[51]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[50]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[49]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[48]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[47]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[46]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[45]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[44]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[43]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[42]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[41]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[40]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[39]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[38]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[37]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[36]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[35]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #140]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #392]\n\t"
+        "#  A[36] * B[63]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[37] * B[62]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[38] * B[61]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[39] * B[60]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[40] * B[59]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[58]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[57]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[56]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[55]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[54]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[53]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[52]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[51]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[50]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[49]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[48]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[47]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[46]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[45]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[44]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[43]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[42]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[41]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[40]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[39]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[38]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[37]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[36]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #144]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #396]\n\t"
+        "#  A[37] * B[63]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[38] * B[62]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[39] * B[61]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[40] * B[60]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[41] * B[59]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[58]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[57]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[56]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[55]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[54]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[53]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[52]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[51]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[50]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[49]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[48]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[47]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[46]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[45]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[44]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[43]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[42]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[41]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[40]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[39]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[38]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[37]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #148]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #400]\n\t"
+        "#  A[38] * B[63]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[39] * B[62]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[40] * B[61]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[41] * B[60]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[42] * B[59]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[58]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[57]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[56]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[55]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[54]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[53]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[52]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[51]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[50]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[49]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[48]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[47]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[46]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[45]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[44]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[43]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[42]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[41]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[40]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[39]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[38]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #152]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #404]\n\t"
+        "#  A[39] * B[63]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[40] * B[62]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[41] * B[61]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[42] * B[60]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[43] * B[59]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[58]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[57]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[56]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[55]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[54]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[53]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[52]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[51]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[50]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[49]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[48]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[47]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[46]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[45]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[44]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[43]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[42]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[41]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[40]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[39]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #156]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #408]\n\t"
+        "#  A[40] * B[63]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[41] * B[62]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[42] * B[61]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[43] * B[60]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[44] * B[59]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[58]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[57]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[56]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[55]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[54]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[53]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[52]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[51]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[50]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[49]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[48]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[47]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[46]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[45]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[44]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[43]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[42]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[41]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[40]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #160]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #412]\n\t"
+        "#  A[41] * B[63]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[42] * B[62]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[43] * B[61]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[44] * B[60]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[45] * B[59]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[58]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[57]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[56]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[55]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[54]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[53]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[52]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[51]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[50]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[49]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[48]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[47]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[46]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[45]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[44]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[43]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[42]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[41]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #164]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #416]\n\t"
+        "#  A[42] * B[63]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[43] * B[62]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[44] * B[61]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[45] * B[60]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[46] * B[59]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[58]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[57]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[56]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[55]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[54]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[53]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[52]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[51]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[50]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[49]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[48]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[47]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[46]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[45]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[44]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[43]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[42]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #168]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #420]\n\t"
+        "#  A[43] * B[63]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[44] * B[62]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[45] * B[61]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[46] * B[60]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[47] * B[59]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[58]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[57]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[56]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[55]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[54]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[53]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[52]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[51]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[50]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[49]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[48]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[47]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[46]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[45]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[44]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[43]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #172]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #424]\n\t"
+        "#  A[44] * B[63]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[45] * B[62]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[46] * B[61]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[47] * B[60]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[48] * B[59]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[58]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[57]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[56]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[55]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[54]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[53]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[52]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[51]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[50]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[49]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[48]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[47]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[46]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[45]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[44]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #176]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #428]\n\t"
+        "#  A[45] * B[63]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[46] * B[62]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[47] * B[61]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[48] * B[60]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[49] * B[59]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[58]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[57]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[56]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[55]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[54]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[53]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[52]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[51]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[50]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[49]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[48]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[47]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[46]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[45]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #180]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #432]\n\t"
+        "#  A[46] * B[63]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[47] * B[62]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[48] * B[61]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[49] * B[60]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[50] * B[59]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[58]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[57]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[56]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[55]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[54]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[53]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[52]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[51]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[50]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[49]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[48]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[47]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[46]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #184]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #436]\n\t"
+        "#  A[47] * B[63]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[48] * B[62]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[49] * B[61]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[50] * B[60]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[51] * B[59]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[58]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[57]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[56]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[55]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[54]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[53]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[52]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[51]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[50]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[49]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[48]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[47]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #188]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #440]\n\t"
+        "#  A[48] * B[63]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[49] * B[62]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[50] * B[61]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[51] * B[60]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[52] * B[59]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[58]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[57]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[56]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[55]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[54]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[53]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[52]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[51]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[50]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[49]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[48]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #192]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #444]\n\t"
+        "#  A[49] * B[63]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[50] * B[62]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[51] * B[61]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[52] * B[60]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[53] * B[59]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[58]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[57]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[56]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[55]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[54]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[53]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[52]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[51]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[50]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[49]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #196]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #448]\n\t"
+        "#  A[50] * B[63]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[51] * B[62]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[52] * B[61]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[53] * B[60]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[54] * B[59]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[58]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[57]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[56]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[55]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[54]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[53]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[52]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[51]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[50]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #200]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #452]\n\t"
+        "#  A[51] * B[63]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[52] * B[62]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[53] * B[61]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[54] * B[60]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[55] * B[59]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[58]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[57]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[56]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[55]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[54]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[53]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[52]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[51]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #204]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #456]\n\t"
+        "#  A[52] * B[63]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[53] * B[62]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[54] * B[61]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[55] * B[60]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[56] * B[59]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[58]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[57]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[56]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[55]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[54]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[53]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[52]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #208]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #460]\n\t"
+        "#  A[53] * B[63]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[54] * B[62]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[55] * B[61]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[56] * B[60]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[57] * B[59]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[58]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[57]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[56]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[55]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[54]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[53]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #212]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #464]\n\t"
+        "#  A[54] * B[63]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[55] * B[62]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[56] * B[61]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[57] * B[60]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[58] * B[59]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[58]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[57]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[56]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[55]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[54]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #216]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #468]\n\t"
+        "#  A[55] * B[63]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[56] * B[62]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[57] * B[61]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[58] * B[60]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[59] * B[59]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[58]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[57]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[56]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[55]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #220]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #472]\n\t"
+        "#  A[56] * B[63]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[57] * B[62]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[58] * B[61]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[59] * B[60]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[60] * B[59]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[58]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[57]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[56]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #224]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #476]\n\t"
+        "#  A[57] * B[63]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[58] * B[62]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[59] * B[61]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[60] * B[60]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[61] * B[59]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[58]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[57]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #228]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #480]\n\t"
+        "#  A[58] * B[63]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[59] * B[62]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[60] * B[61]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[61] * B[60]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[62] * B[59]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[58]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #232]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #484]\n\t"
+        "#  A[59] * B[63]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[60] * B[62]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[61] * B[61]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[62] * B[60]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[63] * B[59]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #236]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #488]\n\t"
+        "#  A[60] * B[63]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[61] * B[62]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[62] * B[61]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[63] * B[60]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #240]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #492]\n\t"
+        "#  A[61] * B[63]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[62] * B[62]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[63] * B[61]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #244]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #496]\n\t"
+        "#  A[62] * B[63]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[63] * B[62]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #248]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #500]\n\t"
+        "#  A[63] * B[63]\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "ldr	r9, [%[b], #252]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r3, [%[r], #504]\n\t"
+        "str	r4, [%[r], #508]\n\t"
+        "ldr	r3, [sp, #0]\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r5, [sp, #8]\n\t"
+        "ldr	r6, [sp, #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [sp, #16]\n\t"
+        "ldr	r4, [sp, #20]\n\t"
+        "ldr	r5, [sp, #24]\n\t"
+        "ldr	r6, [sp, #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "ldr	r4, [sp, #36]\n\t"
+        "ldr	r5, [sp, #40]\n\t"
+        "ldr	r6, [sp, #44]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "str	r5, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "ldr	r3, [sp, #48]\n\t"
+        "ldr	r4, [sp, #52]\n\t"
+        "ldr	r5, [sp, #56]\n\t"
+        "ldr	r6, [sp, #60]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "str	r6, [%[r], #60]\n\t"
+        "ldr	r3, [sp, #64]\n\t"
+        "ldr	r4, [sp, #68]\n\t"
+        "ldr	r5, [sp, #72]\n\t"
+        "ldr	r6, [sp, #76]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "str	r5, [%[r], #72]\n\t"
+        "str	r6, [%[r], #76]\n\t"
+        "ldr	r3, [sp, #80]\n\t"
+        "ldr	r4, [sp, #84]\n\t"
+        "ldr	r5, [sp, #88]\n\t"
+        "ldr	r6, [sp, #92]\n\t"
+        "str	r3, [%[r], #80]\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "str	r5, [%[r], #88]\n\t"
+        "str	r6, [%[r], #92]\n\t"
+        "ldr	r3, [sp, #96]\n\t"
+        "ldr	r4, [sp, #100]\n\t"
+        "ldr	r5, [sp, #104]\n\t"
+        "ldr	r6, [sp, #108]\n\t"
+        "str	r3, [%[r], #96]\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "str	r5, [%[r], #104]\n\t"
+        "str	r6, [%[r], #108]\n\t"
+        "ldr	r3, [sp, #112]\n\t"
+        "ldr	r4, [sp, #116]\n\t"
+        "ldr	r5, [sp, #120]\n\t"
+        "ldr	r6, [sp, #124]\n\t"
+        "str	r3, [%[r], #112]\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "str	r5, [%[r], #120]\n\t"
+        "str	r6, [%[r], #124]\n\t"
+        "ldr	r3, [sp, #128]\n\t"
+        "ldr	r4, [sp, #132]\n\t"
+        "ldr	r5, [sp, #136]\n\t"
+        "ldr	r6, [sp, #140]\n\t"
+        "str	r3, [%[r], #128]\n\t"
+        "str	r4, [%[r], #132]\n\t"
+        "str	r5, [%[r], #136]\n\t"
+        "str	r6, [%[r], #140]\n\t"
+        "ldr	r3, [sp, #144]\n\t"
+        "ldr	r4, [sp, #148]\n\t"
+        "ldr	r5, [sp, #152]\n\t"
+        "ldr	r6, [sp, #156]\n\t"
+        "str	r3, [%[r], #144]\n\t"
+        "str	r4, [%[r], #148]\n\t"
+        "str	r5, [%[r], #152]\n\t"
+        "str	r6, [%[r], #156]\n\t"
+        "ldr	r3, [sp, #160]\n\t"
+        "ldr	r4, [sp, #164]\n\t"
+        "ldr	r5, [sp, #168]\n\t"
+        "ldr	r6, [sp, #172]\n\t"
+        "str	r3, [%[r], #160]\n\t"
+        "str	r4, [%[r], #164]\n\t"
+        "str	r5, [%[r], #168]\n\t"
+        "str	r6, [%[r], #172]\n\t"
+        "ldr	r3, [sp, #176]\n\t"
+        "ldr	r4, [sp, #180]\n\t"
+        "ldr	r5, [sp, #184]\n\t"
+        "ldr	r6, [sp, #188]\n\t"
+        "str	r3, [%[r], #176]\n\t"
+        "str	r4, [%[r], #180]\n\t"
+        "str	r5, [%[r], #184]\n\t"
+        "str	r6, [%[r], #188]\n\t"
+        "ldr	r3, [sp, #192]\n\t"
+        "ldr	r4, [sp, #196]\n\t"
+        "ldr	r5, [sp, #200]\n\t"
+        "ldr	r6, [sp, #204]\n\t"
+        "str	r3, [%[r], #192]\n\t"
+        "str	r4, [%[r], #196]\n\t"
+        "str	r5, [%[r], #200]\n\t"
+        "str	r6, [%[r], #204]\n\t"
+        "ldr	r3, [sp, #208]\n\t"
+        "ldr	r4, [sp, #212]\n\t"
+        "ldr	r5, [sp, #216]\n\t"
+        "ldr	r6, [sp, #220]\n\t"
+        "str	r3, [%[r], #208]\n\t"
+        "str	r4, [%[r], #212]\n\t"
+        "str	r5, [%[r], #216]\n\t"
+        "str	r6, [%[r], #220]\n\t"
+        "ldr	r3, [sp, #224]\n\t"
+        "ldr	r4, [sp, #228]\n\t"
+        "ldr	r5, [sp, #232]\n\t"
+        "ldr	r6, [sp, #236]\n\t"
+        "str	r3, [%[r], #224]\n\t"
+        "str	r4, [%[r], #228]\n\t"
+        "str	r5, [%[r], #232]\n\t"
+        "str	r6, [%[r], #236]\n\t"
+        "ldr	r3, [sp, #240]\n\t"
+        "ldr	r4, [sp, #244]\n\t"
+        "ldr	r5, [sp, #248]\n\t"
+        "ldr	r6, [sp, #252]\n\t"
+        "str	r3, [%[r], #240]\n\t"
+        "str	r4, [%[r], #244]\n\t"
+        "str	r5, [%[r], #248]\n\t"
+        "str	r6, [%[r], #252]\n\t"
+        "add	sp, sp, #256\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<64; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[128];
+    sp_digit a1[64];
+    sp_digit b1[64];
+    sp_digit z2[128];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_64(a1, a, &a[64]);
+    cb = sp_2048_add_64(b1, b, &b[64]);
+    u  = ca & cb;
+    sp_2048_mul_64(z1, a1, b1);
+    sp_2048_mul_64(z2, &a[64], &b[64]);
+    sp_2048_mul_64(z0, a, b);
+    sp_2048_mask_64(r + 128, a1, 0 - cb);
+    sp_2048_mask_64(b1, b1, 0 - ca);
+    u += sp_2048_add_64(r + 128, r + 128, b1);
+    u += sp_4096_sub_in_place_128(z1, z2);
+    u += sp_4096_sub_in_place_128(z1, z0);
+    u += sp_4096_add_128(r + 64, r + 64, z1);
+    r[192] = u;
+    XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+    (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #256\n\t"
+        "mov	r14, #0\n\t"
+        "#  A[0] * A[0]\n\t"
+        "ldr	r10, [%[a], #0]\n\t"
+        "umull	r8, r3, r10, r10\n\t"
+        "mov	r4, #0\n\t"
+        "str	r8, [sp]\n\t"
+        "#  A[0] * A[1]\n\t"
+        "ldr	r10, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r14, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "str	r3, [sp, #4]\n\t"
+        "#  A[0] * A[2]\n\t"
+        "ldr	r10, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r14, r14\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "#  A[1] * A[1]\n\t"
+        "ldr	r10, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "str	r4, [sp, #8]\n\t"
+        "#  A[0] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r14, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[1] * A[2]\n\t"
+        "ldr	r10, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "str	r2, [sp, #12]\n\t"
+        "#  A[0] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r14, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "#  A[1] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "#  A[2] * A[2]\n\t"
+        "ldr	r10, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "str	r3, [sp, #16]\n\t"
+        "#  A[0] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #20]\n\t"
+        "#  A[0] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #24]\n\t"
+        "#  A[0] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #28]\n\t"
+        "#  A[0] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #32]\n\t"
+        "#  A[0] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #36]\n\t"
+        "#  A[0] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #40]\n\t"
+        "#  A[0] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #44]\n\t"
+        "#  A[0] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #48]\n\t"
+        "#  A[0] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #52]\n\t"
+        "#  A[0] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #56]\n\t"
+        "#  A[0] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #60]\n\t"
+        "#  A[0] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #64]\n\t"
+        "#  A[0] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #68]\n\t"
+        "#  A[0] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #72]\n\t"
+        "#  A[0] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #76]\n\t"
+        "#  A[0] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #80]\n\t"
+        "#  A[0] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #84]\n\t"
+        "#  A[0] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #88]\n\t"
+        "#  A[0] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #92]\n\t"
+        "#  A[0] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[12]\n\t"
+        "ldr	r10, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #96]\n\t"
+        "#  A[0] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #100]\n\t"
+        "#  A[0] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[13]\n\t"
+        "ldr	r10, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #104]\n\t"
+        "#  A[0] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #108]\n\t"
+        "#  A[0] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[14]\n\t"
+        "ldr	r10, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #112]\n\t"
+        "#  A[0] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #116]\n\t"
+        "#  A[0] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[15]\n\t"
+        "ldr	r10, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #120]\n\t"
+        "#  A[0] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #124]\n\t"
+        "#  A[0] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[16]\n\t"
+        "ldr	r10, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #128]\n\t"
+        "#  A[0] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #132]\n\t"
+        "#  A[0] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[17]\n\t"
+        "ldr	r10, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #136]\n\t"
+        "#  A[0] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #140]\n\t"
+        "#  A[0] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[18]\n\t"
+        "ldr	r10, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #144]\n\t"
+        "#  A[0] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #148]\n\t"
+        "#  A[0] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[19]\n\t"
+        "ldr	r10, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #152]\n\t"
+        "#  A[0] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #156]\n\t"
+        "#  A[0] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[20]\n\t"
+        "ldr	r10, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #160]\n\t"
+        "#  A[0] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #164]\n\t"
+        "#  A[0] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[21]\n\t"
+        "ldr	r10, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #168]\n\t"
+        "#  A[0] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #172]\n\t"
+        "#  A[0] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[22]\n\t"
+        "ldr	r10, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #176]\n\t"
+        "#  A[0] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #180]\n\t"
+        "#  A[0] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[23]\n\t"
+        "ldr	r10, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #184]\n\t"
+        "#  A[0] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #188]\n\t"
+        "#  A[0] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[24]\n\t"
+        "ldr	r10, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #192]\n\t"
+        "#  A[0] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #196]\n\t"
+        "#  A[0] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[25]\n\t"
+        "ldr	r10, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #200]\n\t"
+        "#  A[0] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #204]\n\t"
+        "#  A[0] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[26]\n\t"
+        "ldr	r10, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #208]\n\t"
+        "#  A[0] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #212]\n\t"
+        "#  A[0] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[27]\n\t"
+        "ldr	r10, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #216]\n\t"
+        "#  A[0] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #220]\n\t"
+        "#  A[0] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[28]\n\t"
+        "ldr	r10, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #224]\n\t"
+        "#  A[0] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #228]\n\t"
+        "#  A[0] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[29]\n\t"
+        "ldr	r10, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #232]\n\t"
+        "#  A[0] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #236]\n\t"
+        "#  A[0] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[30]\n\t"
+        "ldr	r10, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #240]\n\t"
+        "#  A[0] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #244]\n\t"
+        "#  A[0] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[31]\n\t"
+        "ldr	r10, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #248]\n\t"
+        "#  A[0] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #252]\n\t"
+        "#  A[1] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[2] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[32]\n\t"
+        "ldr	r10, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #256]\n\t"
+        "#  A[2] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[3] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #260]\n\t"
+        "#  A[3] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[4] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[33]\n\t"
+        "ldr	r10, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #264]\n\t"
+        "#  A[4] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[5] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #268]\n\t"
+        "#  A[5] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[6] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[34]\n\t"
+        "ldr	r10, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #272]\n\t"
+        "#  A[6] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[7] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #276]\n\t"
+        "#  A[7] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[8] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[9] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[35]\n\t"
+        "ldr	r10, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #280]\n\t"
+        "#  A[8] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[9] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[10] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #284]\n\t"
+        "#  A[9] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[10] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[11] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[36]\n\t"
+        "ldr	r10, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #288]\n\t"
+        "#  A[10] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[11] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[12] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #292]\n\t"
+        "#  A[11] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[12] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[13] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[37]\n\t"
+        "ldr	r10, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #296]\n\t"
+        "#  A[12] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[13] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[14] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #300]\n\t"
+        "#  A[13] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[14] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[15] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[38]\n\t"
+        "ldr	r10, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #304]\n\t"
+        "#  A[14] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[15] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[16] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #308]\n\t"
+        "#  A[15] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[16] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[17] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[39]\n\t"
+        "ldr	r10, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #312]\n\t"
+        "#  A[16] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[17] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[18] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #316]\n\t"
+        "#  A[17] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[18] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[19] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[40]\n\t"
+        "ldr	r10, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #320]\n\t"
+        "#  A[18] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[19] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[20] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #324]\n\t"
+        "#  A[19] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[20] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[21] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[41]\n\t"
+        "ldr	r10, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #328]\n\t"
+        "#  A[20] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[21] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[22] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #332]\n\t"
+        "#  A[21] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[22] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[23] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[42]\n\t"
+        "ldr	r10, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #336]\n\t"
+        "#  A[22] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[23] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[24] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #340]\n\t"
+        "#  A[23] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[24] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[25] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[43]\n\t"
+        "ldr	r10, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #344]\n\t"
+        "#  A[24] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[25] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[26] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #348]\n\t"
+        "#  A[25] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[26] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[27] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[44]\n\t"
+        "ldr	r10, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #352]\n\t"
+        "#  A[26] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[27] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[28] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #356]\n\t"
+        "#  A[27] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[28] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[29] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[45]\n\t"
+        "ldr	r10, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #360]\n\t"
+        "#  A[28] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[29] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[30] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #364]\n\t"
+        "#  A[29] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[30] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[31] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[46]\n\t"
+        "ldr	r10, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #368]\n\t"
+        "#  A[30] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[31] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[32] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #372]\n\t"
+        "#  A[31] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[32] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[33] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[47]\n\t"
+        "ldr	r10, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #376]\n\t"
+        "#  A[32] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[33] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[34] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #380]\n\t"
+        "#  A[33] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[34] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[35] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[48]\n\t"
+        "ldr	r10, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #384]\n\t"
+        "#  A[34] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[35] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[36] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #388]\n\t"
+        "#  A[35] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[36] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[37] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[49]\n\t"
+        "ldr	r10, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #392]\n\t"
+        "#  A[36] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[37] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[38] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #396]\n\t"
+        "#  A[37] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[38] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[39] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[50]\n\t"
+        "ldr	r10, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #400]\n\t"
+        "#  A[38] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[39] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[40] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #404]\n\t"
+        "#  A[39] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[40] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[41] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[51]\n\t"
+        "ldr	r10, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #408]\n\t"
+        "#  A[40] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[41] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[42] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #412]\n\t"
+        "#  A[41] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[42] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[43] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[52]\n\t"
+        "ldr	r10, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #416]\n\t"
+        "#  A[42] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[43] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[44] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #420]\n\t"
+        "#  A[43] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[44] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[45] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[53]\n\t"
+        "ldr	r10, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #424]\n\t"
+        "#  A[44] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[45] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[46] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #428]\n\t"
+        "#  A[45] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[46] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[47] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[54]\n\t"
+        "ldr	r10, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #432]\n\t"
+        "#  A[46] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[47] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[48] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #436]\n\t"
+        "#  A[47] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[48] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[49] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[55]\n\t"
+        "ldr	r10, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #440]\n\t"
+        "#  A[48] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[49] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[50] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #444]\n\t"
+        "#  A[49] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[50] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[51] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[56] * A[56]\n\t"
+        "ldr	r10, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #448]\n\t"
+        "#  A[50] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[51] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[52] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[56] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #452]\n\t"
+        "#  A[51] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[52] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[53] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[56] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[57] * A[57]\n\t"
+        "ldr	r10, [%[a], #228]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #456]\n\t"
+        "#  A[52] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[53] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[54] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[56] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[57] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #460]\n\t"
+        "#  A[53] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[54] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[55] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[56] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[57] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[58] * A[58]\n\t"
+        "ldr	r10, [%[a], #232]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #464]\n\t"
+        "#  A[54] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[55] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[56] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[57] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[58] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #468]\n\t"
+        "#  A[55] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[56] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[57] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[58] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[59] * A[59]\n\t"
+        "ldr	r10, [%[a], #236]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #472]\n\t"
+        "#  A[56] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[57] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[58] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[59] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #476]\n\t"
+        "#  A[57] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[58] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[59] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[60] * A[60]\n\t"
+        "ldr	r10, [%[a], #240]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #480]\n\t"
+        "#  A[58] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[59] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[60] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #484]\n\t"
+        "#  A[59] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r14, r14\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "#  A[60] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "#  A[61] * A[61]\n\t"
+        "ldr	r10, [%[a], #244]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "str	r4, [%[r], #488]\n\t"
+        "#  A[60] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r14, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[61] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "str	r2, [%[r], #492]\n\t"
+        "#  A[61] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r14, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "#  A[62] * A[62]\n\t"
+        "ldr	r10, [%[a], #248]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "str	r3, [%[r], #496]\n\t"
+        "#  A[62] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r14, r14\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "str	r4, [%[r], #500]\n\t"
+        "#  A[63] * A[63]\n\t"
+        "ldr	r10, [%[a], #252]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adc	r3, r3, r9\n\t"
+        "str	r2, [%[r], #504]\n\t"
+        "str	r3, [%[r], #508]\n\t"
+        "ldr	r2, [sp, #0]\n\t"
+        "ldr	r3, [sp, #4]\n\t"
+        "ldr	r4, [sp, #8]\n\t"
+        "ldr	r8, [sp, #12]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r8, [%[r], #12]\n\t"
+        "ldr	r2, [sp, #16]\n\t"
+        "ldr	r3, [sp, #20]\n\t"
+        "ldr	r4, [sp, #24]\n\t"
+        "ldr	r8, [sp, #28]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r8, [%[r], #28]\n\t"
+        "ldr	r2, [sp, #32]\n\t"
+        "ldr	r3, [sp, #36]\n\t"
+        "ldr	r4, [sp, #40]\n\t"
+        "ldr	r8, [sp, #44]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r8, [%[r], #44]\n\t"
+        "ldr	r2, [sp, #48]\n\t"
+        "ldr	r3, [sp, #52]\n\t"
+        "ldr	r4, [sp, #56]\n\t"
+        "ldr	r8, [sp, #60]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "str	r8, [%[r], #60]\n\t"
+        "ldr	r2, [sp, #64]\n\t"
+        "ldr	r3, [sp, #68]\n\t"
+        "ldr	r4, [sp, #72]\n\t"
+        "ldr	r8, [sp, #76]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "str	r8, [%[r], #76]\n\t"
+        "ldr	r2, [sp, #80]\n\t"
+        "ldr	r3, [sp, #84]\n\t"
+        "ldr	r4, [sp, #88]\n\t"
+        "ldr	r8, [sp, #92]\n\t"
+        "str	r2, [%[r], #80]\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r8, [%[r], #92]\n\t"
+        "ldr	r2, [sp, #96]\n\t"
+        "ldr	r3, [sp, #100]\n\t"
+        "ldr	r4, [sp, #104]\n\t"
+        "ldr	r8, [sp, #108]\n\t"
+        "str	r2, [%[r], #96]\n\t"
+        "str	r3, [%[r], #100]\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "str	r8, [%[r], #108]\n\t"
+        "ldr	r2, [sp, #112]\n\t"
+        "ldr	r3, [sp, #116]\n\t"
+        "ldr	r4, [sp, #120]\n\t"
+        "ldr	r8, [sp, #124]\n\t"
+        "str	r2, [%[r], #112]\n\t"
+        "str	r3, [%[r], #116]\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "str	r8, [%[r], #124]\n\t"
+        "ldr	r2, [sp, #128]\n\t"
+        "ldr	r3, [sp, #132]\n\t"
+        "ldr	r4, [sp, #136]\n\t"
+        "ldr	r8, [sp, #140]\n\t"
+        "str	r2, [%[r], #128]\n\t"
+        "str	r3, [%[r], #132]\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "str	r8, [%[r], #140]\n\t"
+        "ldr	r2, [sp, #144]\n\t"
+        "ldr	r3, [sp, #148]\n\t"
+        "ldr	r4, [sp, #152]\n\t"
+        "ldr	r8, [sp, #156]\n\t"
+        "str	r2, [%[r], #144]\n\t"
+        "str	r3, [%[r], #148]\n\t"
+        "str	r4, [%[r], #152]\n\t"
+        "str	r8, [%[r], #156]\n\t"
+        "ldr	r2, [sp, #160]\n\t"
+        "ldr	r3, [sp, #164]\n\t"
+        "ldr	r4, [sp, #168]\n\t"
+        "ldr	r8, [sp, #172]\n\t"
+        "str	r2, [%[r], #160]\n\t"
+        "str	r3, [%[r], #164]\n\t"
+        "str	r4, [%[r], #168]\n\t"
+        "str	r8, [%[r], #172]\n\t"
+        "ldr	r2, [sp, #176]\n\t"
+        "ldr	r3, [sp, #180]\n\t"
+        "ldr	r4, [sp, #184]\n\t"
+        "ldr	r8, [sp, #188]\n\t"
+        "str	r2, [%[r], #176]\n\t"
+        "str	r3, [%[r], #180]\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "str	r8, [%[r], #188]\n\t"
+        "ldr	r2, [sp, #192]\n\t"
+        "ldr	r3, [sp, #196]\n\t"
+        "ldr	r4, [sp, #200]\n\t"
+        "ldr	r8, [sp, #204]\n\t"
+        "str	r2, [%[r], #192]\n\t"
+        "str	r3, [%[r], #196]\n\t"
+        "str	r4, [%[r], #200]\n\t"
+        "str	r8, [%[r], #204]\n\t"
+        "ldr	r2, [sp, #208]\n\t"
+        "ldr	r3, [sp, #212]\n\t"
+        "ldr	r4, [sp, #216]\n\t"
+        "ldr	r8, [sp, #220]\n\t"
+        "str	r2, [%[r], #208]\n\t"
+        "str	r3, [%[r], #212]\n\t"
+        "str	r4, [%[r], #216]\n\t"
+        "str	r8, [%[r], #220]\n\t"
+        "ldr	r2, [sp, #224]\n\t"
+        "ldr	r3, [sp, #228]\n\t"
+        "ldr	r4, [sp, #232]\n\t"
+        "ldr	r8, [sp, #236]\n\t"
+        "str	r2, [%[r], #224]\n\t"
+        "str	r3, [%[r], #228]\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "str	r8, [%[r], #236]\n\t"
+        "ldr	r2, [sp, #240]\n\t"
+        "ldr	r3, [sp, #244]\n\t"
+        "ldr	r4, [sp, #248]\n\t"
+        "ldr	r8, [sp, #252]\n\t"
+        "str	r2, [%[r], #240]\n\t"
+        "str	r3, [%[r], #244]\n\t"
+        "str	r4, [%[r], #248]\n\t"
+        "str	r8, [%[r], #252]\n\t"
+        "add	sp, sp, #256\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+    );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[128];
+    sp_digit z1[128];
+    sp_digit a1[64];
+    sp_digit u;
+
+    u = sp_2048_add_64(a1, a, &a[64]);
+    sp_2048_sqr_64(z1, a1);
+    sp_2048_sqr_64(z2, &a[64]);
+    sp_2048_sqr_64(z0, a);
+    sp_2048_mask_64(r + 128, a1, 0 - u);
+    u += sp_2048_add_64(r + 128, r + 128, r + 128);
+    u += sp_4096_sub_in_place_128(z1, z2);
+    u += sp_4096_sub_in_place_128(z1, z0);
+    u += sp_4096_add_128(r + 64, r + 64, z1);
+    r[192] = u;
+    XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+    (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	r12, %[a], #512\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	r4, [%[a]], #4\n\t"
+        "ldr	r5, [%[a]], #4\n\t"
+        "ldr	r6, [%[a]], #4\n\t"
+        "ldr	r7, [%[a]], #4\n\t"
+        "ldr	r8, [%[b]], #4\n\t"
+        "ldr	r9, [%[b]], #4\n\t"
+        "ldr	r10, [%[b]], #4\n\t"
+        "ldr	r14, [%[b]], #4\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r]], #4\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        "str	r6, [%[r]], #4\n\t"
+        "str	r7, [%[r]], #4\n\t"
+        "mov	r4, #0\n\t"
+        "adc	%[c], r4, #0\n\t"
+        "cmp	%[a], r12\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r14, #0\n\t"
+        "add	r12, %[a], #512\n\t"
+        "\n1:\n\t"
+        "subs	%[c], r14, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b]], #4\n\t"
+        "ldr	r8, [%[b]], #4\n\t"
+        "ldr	r9, [%[b]], #4\n\t"
+        "ldr	r10, [%[b]], #4\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "sbcs	r6, r6, r10\n\t"
+        "str	r3, [%[a]], #4\n\t"
+        "str	r4, [%[a]], #4\n\t"
+        "str	r5, [%[a]], #4\n\t"
+        "str	r6, [%[a]], #4\n\t"
+        "sbc	%[c], r14, r14\n\t"
+        "cmp	%[a], r12\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #1024\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #0\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "subs	r3, r5, #508\n\t"
+        "it	cc\n\t"
+        "movcc	r3, #0\n\t"
+        "sub	r4, r5, r3\n\t"
+        "\n2:\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "ldr	r12, [%[b], r4]\n\t"
+        "umull	r9, r10, r14, r12\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, #0\n\t"
+        "add	r3, r3, #4\n\t"
+        "sub	r4, r4, #4\n\t"
+        "cmp	r3, #512\n\t"
+        "beq	3f\n\t"
+        "cmp	r3, r5\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "mov	r6, r7\n\t"
+        "mov	r7, r8\n\t"
+        "mov	r8, #0\n\t"
+        "add	r5, r5, #4\n\t"
+        "cmp	r5, #1016\n\t"
+        "ble	1b\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #1024\n\t"
+        "mov	r12, #0\n\t"
+        "mov	r6, #0\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r8, #0\n\t"
+        "mov	r5, #0\n\t"
+        "\n1:\n\t"
+        "subs	r3, r5, #508\n\t"
+        "it	cc\n\t"
+        "movcc	r3, r12\n\t"
+        "sub	r4, r5, r3\n\t"
+        "\n2:\n\t"
+        "cmp	r4, r3\n\t"
+        "beq	4f\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "ldr	r9, [%[a], r4]\n\t"
+        "umull	r9, r10, r14, r9\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "umull	r9, r10, r14, r14\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "\n5:\n\t"
+        "add	r3, r3, #4\n\t"
+        "sub	r4, r4, #4\n\t"
+        "cmp	r3, #512\n\t"
+        "beq	3f\n\t"
+        "cmp	r3, r4\n\t"
+        "bgt	3f\n\t"
+        "cmp	r3, r5\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "mov	r6, r7\n\t"
+        "mov	r7, r8\n\t"
+        "mov	r8, #0\n\t"
+        "add	r5, r5, #4\n\t"
+        "cmp	r5, #1016\n\t"
+        "ble	1b\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r5, r3, %[b], r8\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]]\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, #4\n\t"
+        "1:\n\t"
+        "ldr	r8, [%[a], r9]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], r9]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r9, r9, #4\n\t"
+        "cmp	r9, #512\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r], #512]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r3, r4, %[b], r8\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[r]]\n\t"
+        "# A[1] * B\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "# A[2] * B\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "# A[3] * B\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "# A[4] * B\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "# A[5] * B\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "# A[6] * B\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "# A[7] * B\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "# A[8] * B\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "# A[9] * B\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "# A[10] * B\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "# A[11] * B\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "# A[12] * B\n\t"
+        "ldr	r8, [%[a], #48]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "# A[13] * B\n\t"
+        "ldr	r8, [%[a], #52]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "# A[14] * B\n\t"
+        "ldr	r8, [%[a], #56]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "# A[15] * B\n\t"
+        "ldr	r8, [%[a], #60]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "# A[16] * B\n\t"
+        "ldr	r8, [%[a], #64]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "# A[17] * B\n\t"
+        "ldr	r8, [%[a], #68]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "# A[18] * B\n\t"
+        "ldr	r8, [%[a], #72]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #72]\n\t"
+        "# A[19] * B\n\t"
+        "ldr	r8, [%[a], #76]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "# A[20] * B\n\t"
+        "ldr	r8, [%[a], #80]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #80]\n\t"
+        "# A[21] * B\n\t"
+        "ldr	r8, [%[a], #84]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "# A[22] * B\n\t"
+        "ldr	r8, [%[a], #88]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "# A[23] * B\n\t"
+        "ldr	r8, [%[a], #92]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #92]\n\t"
+        "# A[24] * B\n\t"
+        "ldr	r8, [%[a], #96]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #96]\n\t"
+        "# A[25] * B\n\t"
+        "ldr	r8, [%[a], #100]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "# A[26] * B\n\t"
+        "ldr	r8, [%[a], #104]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #104]\n\t"
+        "# A[27] * B\n\t"
+        "ldr	r8, [%[a], #108]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #108]\n\t"
+        "# A[28] * B\n\t"
+        "ldr	r8, [%[a], #112]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "# A[29] * B\n\t"
+        "ldr	r8, [%[a], #116]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #116]\n\t"
+        "# A[30] * B\n\t"
+        "ldr	r8, [%[a], #120]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #120]\n\t"
+        "# A[31] * B\n\t"
+        "ldr	r8, [%[a], #124]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "# A[32] * B\n\t"
+        "ldr	r8, [%[a], #128]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #128]\n\t"
+        "# A[33] * B\n\t"
+        "ldr	r8, [%[a], #132]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #132]\n\t"
+        "# A[34] * B\n\t"
+        "ldr	r8, [%[a], #136]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "# A[35] * B\n\t"
+        "ldr	r8, [%[a], #140]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #140]\n\t"
+        "# A[36] * B\n\t"
+        "ldr	r8, [%[a], #144]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #144]\n\t"
+        "# A[37] * B\n\t"
+        "ldr	r8, [%[a], #148]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #148]\n\t"
+        "# A[38] * B\n\t"
+        "ldr	r8, [%[a], #152]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #152]\n\t"
+        "# A[39] * B\n\t"
+        "ldr	r8, [%[a], #156]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #156]\n\t"
+        "# A[40] * B\n\t"
+        "ldr	r8, [%[a], #160]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "# A[41] * B\n\t"
+        "ldr	r8, [%[a], #164]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #164]\n\t"
+        "# A[42] * B\n\t"
+        "ldr	r8, [%[a], #168]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #168]\n\t"
+        "# A[43] * B\n\t"
+        "ldr	r8, [%[a], #172]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #172]\n\t"
+        "# A[44] * B\n\t"
+        "ldr	r8, [%[a], #176]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #176]\n\t"
+        "# A[45] * B\n\t"
+        "ldr	r8, [%[a], #180]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #180]\n\t"
+        "# A[46] * B\n\t"
+        "ldr	r8, [%[a], #184]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "# A[47] * B\n\t"
+        "ldr	r8, [%[a], #188]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #188]\n\t"
+        "# A[48] * B\n\t"
+        "ldr	r8, [%[a], #192]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #192]\n\t"
+        "# A[49] * B\n\t"
+        "ldr	r8, [%[a], #196]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #196]\n\t"
+        "# A[50] * B\n\t"
+        "ldr	r8, [%[a], #200]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #200]\n\t"
+        "# A[51] * B\n\t"
+        "ldr	r8, [%[a], #204]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #204]\n\t"
+        "# A[52] * B\n\t"
+        "ldr	r8, [%[a], #208]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "# A[53] * B\n\t"
+        "ldr	r8, [%[a], #212]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #212]\n\t"
+        "# A[54] * B\n\t"
+        "ldr	r8, [%[a], #216]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #216]\n\t"
+        "# A[55] * B\n\t"
+        "ldr	r8, [%[a], #220]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #220]\n\t"
+        "# A[56] * B\n\t"
+        "ldr	r8, [%[a], #224]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #224]\n\t"
+        "# A[57] * B\n\t"
+        "ldr	r8, [%[a], #228]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #228]\n\t"
+        "# A[58] * B\n\t"
+        "ldr	r8, [%[a], #232]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "# A[59] * B\n\t"
+        "ldr	r8, [%[a], #236]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #236]\n\t"
+        "# A[60] * B\n\t"
+        "ldr	r8, [%[a], #240]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #240]\n\t"
+        "# A[61] * B\n\t"
+        "ldr	r8, [%[a], #244]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #244]\n\t"
+        "# A[62] * B\n\t"
+        "ldr	r8, [%[a], #248]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #248]\n\t"
+        "# A[63] * B\n\t"
+        "ldr	r8, [%[a], #252]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #252]\n\t"
+        "# A[64] * B\n\t"
+        "ldr	r8, [%[a], #256]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        "# A[65] * B\n\t"
+        "ldr	r8, [%[a], #260]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #260]\n\t"
+        "# A[66] * B\n\t"
+        "ldr	r8, [%[a], #264]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #264]\n\t"
+        "# A[67] * B\n\t"
+        "ldr	r8, [%[a], #268]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #268]\n\t"
+        "# A[68] * B\n\t"
+        "ldr	r8, [%[a], #272]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #272]\n\t"
+        "# A[69] * B\n\t"
+        "ldr	r8, [%[a], #276]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #276]\n\t"
+        "# A[70] * B\n\t"
+        "ldr	r8, [%[a], #280]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #280]\n\t"
+        "# A[71] * B\n\t"
+        "ldr	r8, [%[a], #284]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #284]\n\t"
+        "# A[72] * B\n\t"
+        "ldr	r8, [%[a], #288]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #288]\n\t"
+        "# A[73] * B\n\t"
+        "ldr	r8, [%[a], #292]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #292]\n\t"
+        "# A[74] * B\n\t"
+        "ldr	r8, [%[a], #296]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #296]\n\t"
+        "# A[75] * B\n\t"
+        "ldr	r8, [%[a], #300]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #300]\n\t"
+        "# A[76] * B\n\t"
+        "ldr	r8, [%[a], #304]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #304]\n\t"
+        "# A[77] * B\n\t"
+        "ldr	r8, [%[a], #308]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #308]\n\t"
+        "# A[78] * B\n\t"
+        "ldr	r8, [%[a], #312]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #312]\n\t"
+        "# A[79] * B\n\t"
+        "ldr	r8, [%[a], #316]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #316]\n\t"
+        "# A[80] * B\n\t"
+        "ldr	r8, [%[a], #320]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #320]\n\t"
+        "# A[81] * B\n\t"
+        "ldr	r8, [%[a], #324]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #324]\n\t"
+        "# A[82] * B\n\t"
+        "ldr	r8, [%[a], #328]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #328]\n\t"
+        "# A[83] * B\n\t"
+        "ldr	r8, [%[a], #332]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #332]\n\t"
+        "# A[84] * B\n\t"
+        "ldr	r8, [%[a], #336]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #336]\n\t"
+        "# A[85] * B\n\t"
+        "ldr	r8, [%[a], #340]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #340]\n\t"
+        "# A[86] * B\n\t"
+        "ldr	r8, [%[a], #344]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #344]\n\t"
+        "# A[87] * B\n\t"
+        "ldr	r8, [%[a], #348]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #348]\n\t"
+        "# A[88] * B\n\t"
+        "ldr	r8, [%[a], #352]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #352]\n\t"
+        "# A[89] * B\n\t"
+        "ldr	r8, [%[a], #356]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #356]\n\t"
+        "# A[90] * B\n\t"
+        "ldr	r8, [%[a], #360]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #360]\n\t"
+        "# A[91] * B\n\t"
+        "ldr	r8, [%[a], #364]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #364]\n\t"
+        "# A[92] * B\n\t"
+        "ldr	r8, [%[a], #368]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #368]\n\t"
+        "# A[93] * B\n\t"
+        "ldr	r8, [%[a], #372]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #372]\n\t"
+        "# A[94] * B\n\t"
+        "ldr	r8, [%[a], #376]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #376]\n\t"
+        "# A[95] * B\n\t"
+        "ldr	r8, [%[a], #380]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #380]\n\t"
+        "# A[96] * B\n\t"
+        "ldr	r8, [%[a], #384]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #384]\n\t"
+        "# A[97] * B\n\t"
+        "ldr	r8, [%[a], #388]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #388]\n\t"
+        "# A[98] * B\n\t"
+        "ldr	r8, [%[a], #392]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #392]\n\t"
+        "# A[99] * B\n\t"
+        "ldr	r8, [%[a], #396]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #396]\n\t"
+        "# A[100] * B\n\t"
+        "ldr	r8, [%[a], #400]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #400]\n\t"
+        "# A[101] * B\n\t"
+        "ldr	r8, [%[a], #404]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #404]\n\t"
+        "# A[102] * B\n\t"
+        "ldr	r8, [%[a], #408]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #408]\n\t"
+        "# A[103] * B\n\t"
+        "ldr	r8, [%[a], #412]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #412]\n\t"
+        "# A[104] * B\n\t"
+        "ldr	r8, [%[a], #416]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #416]\n\t"
+        "# A[105] * B\n\t"
+        "ldr	r8, [%[a], #420]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #420]\n\t"
+        "# A[106] * B\n\t"
+        "ldr	r8, [%[a], #424]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #424]\n\t"
+        "# A[107] * B\n\t"
+        "ldr	r8, [%[a], #428]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #428]\n\t"
+        "# A[108] * B\n\t"
+        "ldr	r8, [%[a], #432]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #432]\n\t"
+        "# A[109] * B\n\t"
+        "ldr	r8, [%[a], #436]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #436]\n\t"
+        "# A[110] * B\n\t"
+        "ldr	r8, [%[a], #440]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #440]\n\t"
+        "# A[111] * B\n\t"
+        "ldr	r8, [%[a], #444]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #444]\n\t"
+        "# A[112] * B\n\t"
+        "ldr	r8, [%[a], #448]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #448]\n\t"
+        "# A[113] * B\n\t"
+        "ldr	r8, [%[a], #452]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #452]\n\t"
+        "# A[114] * B\n\t"
+        "ldr	r8, [%[a], #456]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #456]\n\t"
+        "# A[115] * B\n\t"
+        "ldr	r8, [%[a], #460]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #460]\n\t"
+        "# A[116] * B\n\t"
+        "ldr	r8, [%[a], #464]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #464]\n\t"
+        "# A[117] * B\n\t"
+        "ldr	r8, [%[a], #468]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #468]\n\t"
+        "# A[118] * B\n\t"
+        "ldr	r8, [%[a], #472]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #472]\n\t"
+        "# A[119] * B\n\t"
+        "ldr	r8, [%[a], #476]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #476]\n\t"
+        "# A[120] * B\n\t"
+        "ldr	r8, [%[a], #480]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #480]\n\t"
+        "# A[121] * B\n\t"
+        "ldr	r8, [%[a], #484]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #484]\n\t"
+        "# A[122] * B\n\t"
+        "ldr	r8, [%[a], #488]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #488]\n\t"
+        "# A[123] * B\n\t"
+        "ldr	r8, [%[a], #492]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #492]\n\t"
+        "# A[124] * B\n\t"
+        "ldr	r8, [%[a], #496]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #496]\n\t"
+        "# A[125] * B\n\t"
+        "ldr	r8, [%[a], #500]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #500]\n\t"
+        "# A[126] * B\n\t"
+        "ldr	r8, [%[a], #504]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #504]\n\t"
+        "# A[127] * B\n\t"
+        "ldr	r8, [%[a], #508]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r7\n\t"
+        "str	r4, [%[r], #508]\n\t"
+        "str	r5, [%[r], #512]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#endif
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 128);
+
+    /* r = 2^n mod m */
+    sp_4096_sub_in_place_128(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r9, #0\n\t"
+        "mov	r8, #0\n\t"
+        "1:\n\t"
+        "subs	%[c], r9, %[c]\n\t"
+        "ldr	r4, [%[a], r8]\n\t"
+        "ldr	r5, [%[b], r8]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        "str	r4, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, #512\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#else
+    __asm__ __volatile__ (
+
+        "mov	r9, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "subs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r6, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r6, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r6, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r7, [%[b], #52]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "str	r6, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r6, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r7, [%[b], #60]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "str	r6, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r6, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "str	r6, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r6, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r7, [%[b], #76]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "str	r6, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r6, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "str	r6, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r6, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r7, [%[b], #92]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r6, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r6, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r7, [%[b], #100]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "str	r6, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r6, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r7, [%[b], #108]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "str	r6, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r6, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r7, [%[b], #116]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "str	r6, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r6, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r7, [%[b], #124]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "str	r6, [%[r], #124]\n\t"
+        "ldr	r4, [%[a], #128]\n\t"
+        "ldr	r6, [%[a], #132]\n\t"
+        "ldr	r5, [%[b], #128]\n\t"
+        "ldr	r7, [%[b], #132]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #128]\n\t"
+        "str	r6, [%[r], #132]\n\t"
+        "ldr	r4, [%[a], #136]\n\t"
+        "ldr	r6, [%[a], #140]\n\t"
+        "ldr	r5, [%[b], #136]\n\t"
+        "ldr	r7, [%[b], #140]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "str	r6, [%[r], #140]\n\t"
+        "ldr	r4, [%[a], #144]\n\t"
+        "ldr	r6, [%[a], #148]\n\t"
+        "ldr	r5, [%[b], #144]\n\t"
+        "ldr	r7, [%[b], #148]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #144]\n\t"
+        "str	r6, [%[r], #148]\n\t"
+        "ldr	r4, [%[a], #152]\n\t"
+        "ldr	r6, [%[a], #156]\n\t"
+        "ldr	r5, [%[b], #152]\n\t"
+        "ldr	r7, [%[b], #156]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #152]\n\t"
+        "str	r6, [%[r], #156]\n\t"
+        "ldr	r4, [%[a], #160]\n\t"
+        "ldr	r6, [%[a], #164]\n\t"
+        "ldr	r5, [%[b], #160]\n\t"
+        "ldr	r7, [%[b], #164]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "str	r6, [%[r], #164]\n\t"
+        "ldr	r4, [%[a], #168]\n\t"
+        "ldr	r6, [%[a], #172]\n\t"
+        "ldr	r5, [%[b], #168]\n\t"
+        "ldr	r7, [%[b], #172]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #168]\n\t"
+        "str	r6, [%[r], #172]\n\t"
+        "ldr	r4, [%[a], #176]\n\t"
+        "ldr	r6, [%[a], #180]\n\t"
+        "ldr	r5, [%[b], #176]\n\t"
+        "ldr	r7, [%[b], #180]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #176]\n\t"
+        "str	r6, [%[r], #180]\n\t"
+        "ldr	r4, [%[a], #184]\n\t"
+        "ldr	r6, [%[a], #188]\n\t"
+        "ldr	r5, [%[b], #184]\n\t"
+        "ldr	r7, [%[b], #188]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "str	r6, [%[r], #188]\n\t"
+        "ldr	r4, [%[a], #192]\n\t"
+        "ldr	r6, [%[a], #196]\n\t"
+        "ldr	r5, [%[b], #192]\n\t"
+        "ldr	r7, [%[b], #196]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #192]\n\t"
+        "str	r6, [%[r], #196]\n\t"
+        "ldr	r4, [%[a], #200]\n\t"
+        "ldr	r6, [%[a], #204]\n\t"
+        "ldr	r5, [%[b], #200]\n\t"
+        "ldr	r7, [%[b], #204]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #200]\n\t"
+        "str	r6, [%[r], #204]\n\t"
+        "ldr	r4, [%[a], #208]\n\t"
+        "ldr	r6, [%[a], #212]\n\t"
+        "ldr	r5, [%[b], #208]\n\t"
+        "ldr	r7, [%[b], #212]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "str	r6, [%[r], #212]\n\t"
+        "ldr	r4, [%[a], #216]\n\t"
+        "ldr	r6, [%[a], #220]\n\t"
+        "ldr	r5, [%[b], #216]\n\t"
+        "ldr	r7, [%[b], #220]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #216]\n\t"
+        "str	r6, [%[r], #220]\n\t"
+        "ldr	r4, [%[a], #224]\n\t"
+        "ldr	r6, [%[a], #228]\n\t"
+        "ldr	r5, [%[b], #224]\n\t"
+        "ldr	r7, [%[b], #228]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #224]\n\t"
+        "str	r6, [%[r], #228]\n\t"
+        "ldr	r4, [%[a], #232]\n\t"
+        "ldr	r6, [%[a], #236]\n\t"
+        "ldr	r5, [%[b], #232]\n\t"
+        "ldr	r7, [%[b], #236]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "str	r6, [%[r], #236]\n\t"
+        "ldr	r4, [%[a], #240]\n\t"
+        "ldr	r6, [%[a], #244]\n\t"
+        "ldr	r5, [%[b], #240]\n\t"
+        "ldr	r7, [%[b], #244]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #240]\n\t"
+        "str	r6, [%[r], #244]\n\t"
+        "ldr	r4, [%[a], #248]\n\t"
+        "ldr	r6, [%[a], #252]\n\t"
+        "ldr	r5, [%[b], #248]\n\t"
+        "ldr	r7, [%[b], #252]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #248]\n\t"
+        "str	r6, [%[r], #252]\n\t"
+        "ldr	r4, [%[a], #256]\n\t"
+        "ldr	r6, [%[a], #260]\n\t"
+        "ldr	r5, [%[b], #256]\n\t"
+        "ldr	r7, [%[b], #260]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #256]\n\t"
+        "str	r6, [%[r], #260]\n\t"
+        "ldr	r4, [%[a], #264]\n\t"
+        "ldr	r6, [%[a], #268]\n\t"
+        "ldr	r5, [%[b], #264]\n\t"
+        "ldr	r7, [%[b], #268]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #264]\n\t"
+        "str	r6, [%[r], #268]\n\t"
+        "ldr	r4, [%[a], #272]\n\t"
+        "ldr	r6, [%[a], #276]\n\t"
+        "ldr	r5, [%[b], #272]\n\t"
+        "ldr	r7, [%[b], #276]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #272]\n\t"
+        "str	r6, [%[r], #276]\n\t"
+        "ldr	r4, [%[a], #280]\n\t"
+        "ldr	r6, [%[a], #284]\n\t"
+        "ldr	r5, [%[b], #280]\n\t"
+        "ldr	r7, [%[b], #284]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #280]\n\t"
+        "str	r6, [%[r], #284]\n\t"
+        "ldr	r4, [%[a], #288]\n\t"
+        "ldr	r6, [%[a], #292]\n\t"
+        "ldr	r5, [%[b], #288]\n\t"
+        "ldr	r7, [%[b], #292]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #288]\n\t"
+        "str	r6, [%[r], #292]\n\t"
+        "ldr	r4, [%[a], #296]\n\t"
+        "ldr	r6, [%[a], #300]\n\t"
+        "ldr	r5, [%[b], #296]\n\t"
+        "ldr	r7, [%[b], #300]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #296]\n\t"
+        "str	r6, [%[r], #300]\n\t"
+        "ldr	r4, [%[a], #304]\n\t"
+        "ldr	r6, [%[a], #308]\n\t"
+        "ldr	r5, [%[b], #304]\n\t"
+        "ldr	r7, [%[b], #308]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #304]\n\t"
+        "str	r6, [%[r], #308]\n\t"
+        "ldr	r4, [%[a], #312]\n\t"
+        "ldr	r6, [%[a], #316]\n\t"
+        "ldr	r5, [%[b], #312]\n\t"
+        "ldr	r7, [%[b], #316]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #312]\n\t"
+        "str	r6, [%[r], #316]\n\t"
+        "ldr	r4, [%[a], #320]\n\t"
+        "ldr	r6, [%[a], #324]\n\t"
+        "ldr	r5, [%[b], #320]\n\t"
+        "ldr	r7, [%[b], #324]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #320]\n\t"
+        "str	r6, [%[r], #324]\n\t"
+        "ldr	r4, [%[a], #328]\n\t"
+        "ldr	r6, [%[a], #332]\n\t"
+        "ldr	r5, [%[b], #328]\n\t"
+        "ldr	r7, [%[b], #332]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #328]\n\t"
+        "str	r6, [%[r], #332]\n\t"
+        "ldr	r4, [%[a], #336]\n\t"
+        "ldr	r6, [%[a], #340]\n\t"
+        "ldr	r5, [%[b], #336]\n\t"
+        "ldr	r7, [%[b], #340]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #336]\n\t"
+        "str	r6, [%[r], #340]\n\t"
+        "ldr	r4, [%[a], #344]\n\t"
+        "ldr	r6, [%[a], #348]\n\t"
+        "ldr	r5, [%[b], #344]\n\t"
+        "ldr	r7, [%[b], #348]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #344]\n\t"
+        "str	r6, [%[r], #348]\n\t"
+        "ldr	r4, [%[a], #352]\n\t"
+        "ldr	r6, [%[a], #356]\n\t"
+        "ldr	r5, [%[b], #352]\n\t"
+        "ldr	r7, [%[b], #356]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #352]\n\t"
+        "str	r6, [%[r], #356]\n\t"
+        "ldr	r4, [%[a], #360]\n\t"
+        "ldr	r6, [%[a], #364]\n\t"
+        "ldr	r5, [%[b], #360]\n\t"
+        "ldr	r7, [%[b], #364]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #360]\n\t"
+        "str	r6, [%[r], #364]\n\t"
+        "ldr	r4, [%[a], #368]\n\t"
+        "ldr	r6, [%[a], #372]\n\t"
+        "ldr	r5, [%[b], #368]\n\t"
+        "ldr	r7, [%[b], #372]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #368]\n\t"
+        "str	r6, [%[r], #372]\n\t"
+        "ldr	r4, [%[a], #376]\n\t"
+        "ldr	r6, [%[a], #380]\n\t"
+        "ldr	r5, [%[b], #376]\n\t"
+        "ldr	r7, [%[b], #380]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #376]\n\t"
+        "str	r6, [%[r], #380]\n\t"
+        "ldr	r4, [%[a], #384]\n\t"
+        "ldr	r6, [%[a], #388]\n\t"
+        "ldr	r5, [%[b], #384]\n\t"
+        "ldr	r7, [%[b], #388]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #384]\n\t"
+        "str	r6, [%[r], #388]\n\t"
+        "ldr	r4, [%[a], #392]\n\t"
+        "ldr	r6, [%[a], #396]\n\t"
+        "ldr	r5, [%[b], #392]\n\t"
+        "ldr	r7, [%[b], #396]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #392]\n\t"
+        "str	r6, [%[r], #396]\n\t"
+        "ldr	r4, [%[a], #400]\n\t"
+        "ldr	r6, [%[a], #404]\n\t"
+        "ldr	r5, [%[b], #400]\n\t"
+        "ldr	r7, [%[b], #404]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #400]\n\t"
+        "str	r6, [%[r], #404]\n\t"
+        "ldr	r4, [%[a], #408]\n\t"
+        "ldr	r6, [%[a], #412]\n\t"
+        "ldr	r5, [%[b], #408]\n\t"
+        "ldr	r7, [%[b], #412]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #408]\n\t"
+        "str	r6, [%[r], #412]\n\t"
+        "ldr	r4, [%[a], #416]\n\t"
+        "ldr	r6, [%[a], #420]\n\t"
+        "ldr	r5, [%[b], #416]\n\t"
+        "ldr	r7, [%[b], #420]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #416]\n\t"
+        "str	r6, [%[r], #420]\n\t"
+        "ldr	r4, [%[a], #424]\n\t"
+        "ldr	r6, [%[a], #428]\n\t"
+        "ldr	r5, [%[b], #424]\n\t"
+        "ldr	r7, [%[b], #428]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #424]\n\t"
+        "str	r6, [%[r], #428]\n\t"
+        "ldr	r4, [%[a], #432]\n\t"
+        "ldr	r6, [%[a], #436]\n\t"
+        "ldr	r5, [%[b], #432]\n\t"
+        "ldr	r7, [%[b], #436]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #432]\n\t"
+        "str	r6, [%[r], #436]\n\t"
+        "ldr	r4, [%[a], #440]\n\t"
+        "ldr	r6, [%[a], #444]\n\t"
+        "ldr	r5, [%[b], #440]\n\t"
+        "ldr	r7, [%[b], #444]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #440]\n\t"
+        "str	r6, [%[r], #444]\n\t"
+        "ldr	r4, [%[a], #448]\n\t"
+        "ldr	r6, [%[a], #452]\n\t"
+        "ldr	r5, [%[b], #448]\n\t"
+        "ldr	r7, [%[b], #452]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #448]\n\t"
+        "str	r6, [%[r], #452]\n\t"
+        "ldr	r4, [%[a], #456]\n\t"
+        "ldr	r6, [%[a], #460]\n\t"
+        "ldr	r5, [%[b], #456]\n\t"
+        "ldr	r7, [%[b], #460]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #456]\n\t"
+        "str	r6, [%[r], #460]\n\t"
+        "ldr	r4, [%[a], #464]\n\t"
+        "ldr	r6, [%[a], #468]\n\t"
+        "ldr	r5, [%[b], #464]\n\t"
+        "ldr	r7, [%[b], #468]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #464]\n\t"
+        "str	r6, [%[r], #468]\n\t"
+        "ldr	r4, [%[a], #472]\n\t"
+        "ldr	r6, [%[a], #476]\n\t"
+        "ldr	r5, [%[b], #472]\n\t"
+        "ldr	r7, [%[b], #476]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #472]\n\t"
+        "str	r6, [%[r], #476]\n\t"
+        "ldr	r4, [%[a], #480]\n\t"
+        "ldr	r6, [%[a], #484]\n\t"
+        "ldr	r5, [%[b], #480]\n\t"
+        "ldr	r7, [%[b], #484]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #480]\n\t"
+        "str	r6, [%[r], #484]\n\t"
+        "ldr	r4, [%[a], #488]\n\t"
+        "ldr	r6, [%[a], #492]\n\t"
+        "ldr	r5, [%[b], #488]\n\t"
+        "ldr	r7, [%[b], #492]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #488]\n\t"
+        "str	r6, [%[r], #492]\n\t"
+        "ldr	r4, [%[a], #496]\n\t"
+        "ldr	r6, [%[a], #500]\n\t"
+        "ldr	r5, [%[b], #496]\n\t"
+        "ldr	r7, [%[b], #500]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #496]\n\t"
+        "str	r6, [%[r], #500]\n\t"
+        "ldr	r4, [%[a], #504]\n\t"
+        "ldr	r6, [%[a], #508]\n\t"
+        "ldr	r5, [%[b], #504]\n\t"
+        "ldr	r7, [%[b], #508]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #504]\n\t"
+        "str	r6, [%[r], #508]\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#endif /* WOLFSSL_SP_SMALL */
+
+    return c;
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "# i = 0\n\t"
+        "mov	r12, #0\n\t"
+        "ldr	r10, [%[a], #0]\n\t"
+        "ldr	r14, [%[a], #4]\n\t"
+        "\n1:\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mul	r8, %[mp], r10\n\t"
+        "# a[i+0] += m[0] * mu\n\t"
+        "ldr	r7, [%[m], #0]\n\t"
+        "ldr	r9, [%[a], #0]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r10, r10, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "# a[i+1] += m[1] * mu\n\t"
+        "ldr	r7, [%[m], #4]\n\t"
+        "ldr	r9, [%[a], #4]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r10, r14, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r10, r10, r5\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+2] += m[2] * mu\n\t"
+        "ldr	r7, [%[m], #8]\n\t"
+        "ldr	r14, [%[a], #8]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r14, r14, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r14, r14, r4\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+3] += m[3] * mu\n\t"
+        "ldr	r7, [%[m], #12]\n\t"
+        "ldr	r9, [%[a], #12]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #12]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+4] += m[4] * mu\n\t"
+        "ldr	r7, [%[m], #16]\n\t"
+        "ldr	r9, [%[a], #16]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #16]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+5] += m[5] * mu\n\t"
+        "ldr	r7, [%[m], #20]\n\t"
+        "ldr	r9, [%[a], #20]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #20]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+6] += m[6] * mu\n\t"
+        "ldr	r7, [%[m], #24]\n\t"
+        "ldr	r9, [%[a], #24]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #24]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+7] += m[7] * mu\n\t"
+        "ldr	r7, [%[m], #28]\n\t"
+        "ldr	r9, [%[a], #28]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #28]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+8] += m[8] * mu\n\t"
+        "ldr	r7, [%[m], #32]\n\t"
+        "ldr	r9, [%[a], #32]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #32]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+9] += m[9] * mu\n\t"
+        "ldr	r7, [%[m], #36]\n\t"
+        "ldr	r9, [%[a], #36]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #36]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+10] += m[10] * mu\n\t"
+        "ldr	r7, [%[m], #40]\n\t"
+        "ldr	r9, [%[a], #40]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #40]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+11] += m[11] * mu\n\t"
+        "ldr	r7, [%[m], #44]\n\t"
+        "ldr	r9, [%[a], #44]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #44]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+12] += m[12] * mu\n\t"
+        "ldr	r7, [%[m], #48]\n\t"
+        "ldr	r9, [%[a], #48]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #48]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+13] += m[13] * mu\n\t"
+        "ldr	r7, [%[m], #52]\n\t"
+        "ldr	r9, [%[a], #52]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #52]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+14] += m[14] * mu\n\t"
+        "ldr	r7, [%[m], #56]\n\t"
+        "ldr	r9, [%[a], #56]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #56]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+15] += m[15] * mu\n\t"
+        "ldr	r7, [%[m], #60]\n\t"
+        "ldr	r9, [%[a], #60]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #60]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+16] += m[16] * mu\n\t"
+        "ldr	r7, [%[m], #64]\n\t"
+        "ldr	r9, [%[a], #64]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #64]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+17] += m[17] * mu\n\t"
+        "ldr	r7, [%[m], #68]\n\t"
+        "ldr	r9, [%[a], #68]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #68]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+18] += m[18] * mu\n\t"
+        "ldr	r7, [%[m], #72]\n\t"
+        "ldr	r9, [%[a], #72]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #72]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+19] += m[19] * mu\n\t"
+        "ldr	r7, [%[m], #76]\n\t"
+        "ldr	r9, [%[a], #76]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #76]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+20] += m[20] * mu\n\t"
+        "ldr	r7, [%[m], #80]\n\t"
+        "ldr	r9, [%[a], #80]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #80]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+21] += m[21] * mu\n\t"
+        "ldr	r7, [%[m], #84]\n\t"
+        "ldr	r9, [%[a], #84]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #84]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+22] += m[22] * mu\n\t"
+        "ldr	r7, [%[m], #88]\n\t"
+        "ldr	r9, [%[a], #88]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #88]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+23] += m[23] * mu\n\t"
+        "ldr	r7, [%[m], #92]\n\t"
+        "ldr	r9, [%[a], #92]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #92]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+24] += m[24] * mu\n\t"
+        "ldr	r7, [%[m], #96]\n\t"
+        "ldr	r9, [%[a], #96]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #96]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+25] += m[25] * mu\n\t"
+        "ldr	r7, [%[m], #100]\n\t"
+        "ldr	r9, [%[a], #100]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #100]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+26] += m[26] * mu\n\t"
+        "ldr	r7, [%[m], #104]\n\t"
+        "ldr	r9, [%[a], #104]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #104]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+27] += m[27] * mu\n\t"
+        "ldr	r7, [%[m], #108]\n\t"
+        "ldr	r9, [%[a], #108]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #108]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+28] += m[28] * mu\n\t"
+        "ldr	r7, [%[m], #112]\n\t"
+        "ldr	r9, [%[a], #112]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #112]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+29] += m[29] * mu\n\t"
+        "ldr	r7, [%[m], #116]\n\t"
+        "ldr	r9, [%[a], #116]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #116]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+30] += m[30] * mu\n\t"
+        "ldr	r7, [%[m], #120]\n\t"
+        "ldr	r9, [%[a], #120]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #120]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+31] += m[31] * mu\n\t"
+        "ldr	r7, [%[m], #124]\n\t"
+        "ldr	r9, [%[a], #124]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #124]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+32] += m[32] * mu\n\t"
+        "ldr	r7, [%[m], #128]\n\t"
+        "ldr	r9, [%[a], #128]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #128]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+33] += m[33] * mu\n\t"
+        "ldr	r7, [%[m], #132]\n\t"
+        "ldr	r9, [%[a], #132]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #132]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+34] += m[34] * mu\n\t"
+        "ldr	r7, [%[m], #136]\n\t"
+        "ldr	r9, [%[a], #136]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #136]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+35] += m[35] * mu\n\t"
+        "ldr	r7, [%[m], #140]\n\t"
+        "ldr	r9, [%[a], #140]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #140]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+36] += m[36] * mu\n\t"
+        "ldr	r7, [%[m], #144]\n\t"
+        "ldr	r9, [%[a], #144]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #144]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+37] += m[37] * mu\n\t"
+        "ldr	r7, [%[m], #148]\n\t"
+        "ldr	r9, [%[a], #148]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #148]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+38] += m[38] * mu\n\t"
+        "ldr	r7, [%[m], #152]\n\t"
+        "ldr	r9, [%[a], #152]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #152]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+39] += m[39] * mu\n\t"
+        "ldr	r7, [%[m], #156]\n\t"
+        "ldr	r9, [%[a], #156]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #156]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+40] += m[40] * mu\n\t"
+        "ldr	r7, [%[m], #160]\n\t"
+        "ldr	r9, [%[a], #160]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #160]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+41] += m[41] * mu\n\t"
+        "ldr	r7, [%[m], #164]\n\t"
+        "ldr	r9, [%[a], #164]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #164]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+42] += m[42] * mu\n\t"
+        "ldr	r7, [%[m], #168]\n\t"
+        "ldr	r9, [%[a], #168]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #168]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+43] += m[43] * mu\n\t"
+        "ldr	r7, [%[m], #172]\n\t"
+        "ldr	r9, [%[a], #172]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #172]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+44] += m[44] * mu\n\t"
+        "ldr	r7, [%[m], #176]\n\t"
+        "ldr	r9, [%[a], #176]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #176]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+45] += m[45] * mu\n\t"
+        "ldr	r7, [%[m], #180]\n\t"
+        "ldr	r9, [%[a], #180]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #180]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+46] += m[46] * mu\n\t"
+        "ldr	r7, [%[m], #184]\n\t"
+        "ldr	r9, [%[a], #184]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #184]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+47] += m[47] * mu\n\t"
+        "ldr	r7, [%[m], #188]\n\t"
+        "ldr	r9, [%[a], #188]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #188]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+48] += m[48] * mu\n\t"
+        "ldr	r7, [%[m], #192]\n\t"
+        "ldr	r9, [%[a], #192]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #192]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+49] += m[49] * mu\n\t"
+        "ldr	r7, [%[m], #196]\n\t"
+        "ldr	r9, [%[a], #196]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #196]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+50] += m[50] * mu\n\t"
+        "ldr	r7, [%[m], #200]\n\t"
+        "ldr	r9, [%[a], #200]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #200]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+51] += m[51] * mu\n\t"
+        "ldr	r7, [%[m], #204]\n\t"
+        "ldr	r9, [%[a], #204]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #204]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+52] += m[52] * mu\n\t"
+        "ldr	r7, [%[m], #208]\n\t"
+        "ldr	r9, [%[a], #208]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #208]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+53] += m[53] * mu\n\t"
+        "ldr	r7, [%[m], #212]\n\t"
+        "ldr	r9, [%[a], #212]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #212]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+54] += m[54] * mu\n\t"
+        "ldr	r7, [%[m], #216]\n\t"
+        "ldr	r9, [%[a], #216]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #216]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+55] += m[55] * mu\n\t"
+        "ldr	r7, [%[m], #220]\n\t"
+        "ldr	r9, [%[a], #220]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #220]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+56] += m[56] * mu\n\t"
+        "ldr	r7, [%[m], #224]\n\t"
+        "ldr	r9, [%[a], #224]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #224]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+57] += m[57] * mu\n\t"
+        "ldr	r7, [%[m], #228]\n\t"
+        "ldr	r9, [%[a], #228]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #228]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+58] += m[58] * mu\n\t"
+        "ldr	r7, [%[m], #232]\n\t"
+        "ldr	r9, [%[a], #232]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #232]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+59] += m[59] * mu\n\t"
+        "ldr	r7, [%[m], #236]\n\t"
+        "ldr	r9, [%[a], #236]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #236]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+60] += m[60] * mu\n\t"
+        "ldr	r7, [%[m], #240]\n\t"
+        "ldr	r9, [%[a], #240]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #240]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+61] += m[61] * mu\n\t"
+        "ldr	r7, [%[m], #244]\n\t"
+        "ldr	r9, [%[a], #244]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #244]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+62] += m[62] * mu\n\t"
+        "ldr	r7, [%[m], #248]\n\t"
+        "ldr	r9, [%[a], #248]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #248]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+63] += m[63] * mu\n\t"
+        "ldr	r7, [%[m], #252]\n\t"
+        "ldr	r9, [%[a], #252]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #252]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+64] += m[64] * mu\n\t"
+        "ldr	r7, [%[m], #256]\n\t"
+        "ldr	r9, [%[a], #256]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #256]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+65] += m[65] * mu\n\t"
+        "ldr	r7, [%[m], #260]\n\t"
+        "ldr	r9, [%[a], #260]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #260]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+66] += m[66] * mu\n\t"
+        "ldr	r7, [%[m], #264]\n\t"
+        "ldr	r9, [%[a], #264]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #264]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+67] += m[67] * mu\n\t"
+        "ldr	r7, [%[m], #268]\n\t"
+        "ldr	r9, [%[a], #268]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #268]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+68] += m[68] * mu\n\t"
+        "ldr	r7, [%[m], #272]\n\t"
+        "ldr	r9, [%[a], #272]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #272]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+69] += m[69] * mu\n\t"
+        "ldr	r7, [%[m], #276]\n\t"
+        "ldr	r9, [%[a], #276]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #276]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+70] += m[70] * mu\n\t"
+        "ldr	r7, [%[m], #280]\n\t"
+        "ldr	r9, [%[a], #280]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #280]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+71] += m[71] * mu\n\t"
+        "ldr	r7, [%[m], #284]\n\t"
+        "ldr	r9, [%[a], #284]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #284]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+72] += m[72] * mu\n\t"
+        "ldr	r7, [%[m], #288]\n\t"
+        "ldr	r9, [%[a], #288]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #288]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+73] += m[73] * mu\n\t"
+        "ldr	r7, [%[m], #292]\n\t"
+        "ldr	r9, [%[a], #292]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #292]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+74] += m[74] * mu\n\t"
+        "ldr	r7, [%[m], #296]\n\t"
+        "ldr	r9, [%[a], #296]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #296]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+75] += m[75] * mu\n\t"
+        "ldr	r7, [%[m], #300]\n\t"
+        "ldr	r9, [%[a], #300]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #300]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+76] += m[76] * mu\n\t"
+        "ldr	r7, [%[m], #304]\n\t"
+        "ldr	r9, [%[a], #304]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #304]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+77] += m[77] * mu\n\t"
+        "ldr	r7, [%[m], #308]\n\t"
+        "ldr	r9, [%[a], #308]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #308]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+78] += m[78] * mu\n\t"
+        "ldr	r7, [%[m], #312]\n\t"
+        "ldr	r9, [%[a], #312]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #312]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+79] += m[79] * mu\n\t"
+        "ldr	r7, [%[m], #316]\n\t"
+        "ldr	r9, [%[a], #316]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #316]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+80] += m[80] * mu\n\t"
+        "ldr	r7, [%[m], #320]\n\t"
+        "ldr	r9, [%[a], #320]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #320]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+81] += m[81] * mu\n\t"
+        "ldr	r7, [%[m], #324]\n\t"
+        "ldr	r9, [%[a], #324]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #324]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+82] += m[82] * mu\n\t"
+        "ldr	r7, [%[m], #328]\n\t"
+        "ldr	r9, [%[a], #328]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #328]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+83] += m[83] * mu\n\t"
+        "ldr	r7, [%[m], #332]\n\t"
+        "ldr	r9, [%[a], #332]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #332]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+84] += m[84] * mu\n\t"
+        "ldr	r7, [%[m], #336]\n\t"
+        "ldr	r9, [%[a], #336]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #336]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+85] += m[85] * mu\n\t"
+        "ldr	r7, [%[m], #340]\n\t"
+        "ldr	r9, [%[a], #340]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #340]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+86] += m[86] * mu\n\t"
+        "ldr	r7, [%[m], #344]\n\t"
+        "ldr	r9, [%[a], #344]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #344]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+87] += m[87] * mu\n\t"
+        "ldr	r7, [%[m], #348]\n\t"
+        "ldr	r9, [%[a], #348]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #348]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+88] += m[88] * mu\n\t"
+        "ldr	r7, [%[m], #352]\n\t"
+        "ldr	r9, [%[a], #352]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #352]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+89] += m[89] * mu\n\t"
+        "ldr	r7, [%[m], #356]\n\t"
+        "ldr	r9, [%[a], #356]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #356]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+90] += m[90] * mu\n\t"
+        "ldr	r7, [%[m], #360]\n\t"
+        "ldr	r9, [%[a], #360]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #360]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+91] += m[91] * mu\n\t"
+        "ldr	r7, [%[m], #364]\n\t"
+        "ldr	r9, [%[a], #364]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #364]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+92] += m[92] * mu\n\t"
+        "ldr	r7, [%[m], #368]\n\t"
+        "ldr	r9, [%[a], #368]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #368]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+93] += m[93] * mu\n\t"
+        "ldr	r7, [%[m], #372]\n\t"
+        "ldr	r9, [%[a], #372]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #372]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+94] += m[94] * mu\n\t"
+        "ldr	r7, [%[m], #376]\n\t"
+        "ldr	r9, [%[a], #376]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #376]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+95] += m[95] * mu\n\t"
+        "ldr	r7, [%[m], #380]\n\t"
+        "ldr	r9, [%[a], #380]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #380]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+96] += m[96] * mu\n\t"
+        "ldr	r7, [%[m], #384]\n\t"
+        "ldr	r9, [%[a], #384]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #384]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+97] += m[97] * mu\n\t"
+        "ldr	r7, [%[m], #388]\n\t"
+        "ldr	r9, [%[a], #388]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #388]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+98] += m[98] * mu\n\t"
+        "ldr	r7, [%[m], #392]\n\t"
+        "ldr	r9, [%[a], #392]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #392]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+99] += m[99] * mu\n\t"
+        "ldr	r7, [%[m], #396]\n\t"
+        "ldr	r9, [%[a], #396]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #396]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+100] += m[100] * mu\n\t"
+        "ldr	r7, [%[m], #400]\n\t"
+        "ldr	r9, [%[a], #400]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #400]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+101] += m[101] * mu\n\t"
+        "ldr	r7, [%[m], #404]\n\t"
+        "ldr	r9, [%[a], #404]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #404]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+102] += m[102] * mu\n\t"
+        "ldr	r7, [%[m], #408]\n\t"
+        "ldr	r9, [%[a], #408]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #408]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+103] += m[103] * mu\n\t"
+        "ldr	r7, [%[m], #412]\n\t"
+        "ldr	r9, [%[a], #412]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #412]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+104] += m[104] * mu\n\t"
+        "ldr	r7, [%[m], #416]\n\t"
+        "ldr	r9, [%[a], #416]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #416]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+105] += m[105] * mu\n\t"
+        "ldr	r7, [%[m], #420]\n\t"
+        "ldr	r9, [%[a], #420]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #420]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+106] += m[106] * mu\n\t"
+        "ldr	r7, [%[m], #424]\n\t"
+        "ldr	r9, [%[a], #424]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #424]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+107] += m[107] * mu\n\t"
+        "ldr	r7, [%[m], #428]\n\t"
+        "ldr	r9, [%[a], #428]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #428]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+108] += m[108] * mu\n\t"
+        "ldr	r7, [%[m], #432]\n\t"
+        "ldr	r9, [%[a], #432]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #432]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+109] += m[109] * mu\n\t"
+        "ldr	r7, [%[m], #436]\n\t"
+        "ldr	r9, [%[a], #436]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #436]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+110] += m[110] * mu\n\t"
+        "ldr	r7, [%[m], #440]\n\t"
+        "ldr	r9, [%[a], #440]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #440]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+111] += m[111] * mu\n\t"
+        "ldr	r7, [%[m], #444]\n\t"
+        "ldr	r9, [%[a], #444]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #444]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+112] += m[112] * mu\n\t"
+        "ldr	r7, [%[m], #448]\n\t"
+        "ldr	r9, [%[a], #448]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #448]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+113] += m[113] * mu\n\t"
+        "ldr	r7, [%[m], #452]\n\t"
+        "ldr	r9, [%[a], #452]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #452]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+114] += m[114] * mu\n\t"
+        "ldr	r7, [%[m], #456]\n\t"
+        "ldr	r9, [%[a], #456]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #456]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+115] += m[115] * mu\n\t"
+        "ldr	r7, [%[m], #460]\n\t"
+        "ldr	r9, [%[a], #460]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #460]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+116] += m[116] * mu\n\t"
+        "ldr	r7, [%[m], #464]\n\t"
+        "ldr	r9, [%[a], #464]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #464]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+117] += m[117] * mu\n\t"
+        "ldr	r7, [%[m], #468]\n\t"
+        "ldr	r9, [%[a], #468]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #468]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+118] += m[118] * mu\n\t"
+        "ldr	r7, [%[m], #472]\n\t"
+        "ldr	r9, [%[a], #472]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #472]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+119] += m[119] * mu\n\t"
+        "ldr	r7, [%[m], #476]\n\t"
+        "ldr	r9, [%[a], #476]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #476]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+120] += m[120] * mu\n\t"
+        "ldr	r7, [%[m], #480]\n\t"
+        "ldr	r9, [%[a], #480]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #480]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+121] += m[121] * mu\n\t"
+        "ldr	r7, [%[m], #484]\n\t"
+        "ldr	r9, [%[a], #484]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #484]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+122] += m[122] * mu\n\t"
+        "ldr	r7, [%[m], #488]\n\t"
+        "ldr	r9, [%[a], #488]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #488]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+123] += m[123] * mu\n\t"
+        "ldr	r7, [%[m], #492]\n\t"
+        "ldr	r9, [%[a], #492]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #492]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+124] += m[124] * mu\n\t"
+        "ldr	r7, [%[m], #496]\n\t"
+        "ldr	r9, [%[a], #496]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #496]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+125] += m[125] * mu\n\t"
+        "ldr	r7, [%[m], #500]\n\t"
+        "ldr	r9, [%[a], #500]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #500]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+126] += m[126] * mu\n\t"
+        "ldr	r7, [%[m], #504]\n\t"
+        "ldr	r9, [%[a], #504]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #504]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+127] += m[127] * mu\n\t"
+        "ldr	r7, [%[m], #508]\n\t"
+        "ldr   r9, [%[a], #508]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r7, r7, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        "adc	%[ca], %[ca], %[ca]\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #508]\n\t"
+        "ldr	r9, [%[a], #512]\n\t"
+        "adcs	r9, r9, r7\n\t"
+        "str	r9, [%[a], #512]\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "# i += 1\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	r12, r12, #4\n\t"
+        "cmp	r12, #512\n\t"
+        "blt	1b\n\t"
+        "str	r10, [%[a], #0]\n\t"
+        "str	r14, [%[a], #4]\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_128(r, a, b);
+    sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_128(r, a);
+    sp_4096_mont_reduce_128(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, r5, #1\n\t"
+        "mov	r6, %[d0]\n\t"
+        "mov	r7, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "subs	r8, r5, r7\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], %[r]\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "and	r8, r8, r5\n\t"
+        "subs	r7, r7, r8\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "movs	r6, r6, lsl #1\n\t"
+        "adc	r7, r7, r7\n\t"
+        "subs	r8, r5, r7\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], %[r]\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "and	r8, r8, r5\n\t"
+        "subs	r7, r7, r8\n\t"
+        "subs	r4, r4, #1\n\t"
+        "bpl	1b\n\t"
+        "add	%[r], %[r], %[r]\n\t"
+        "add	%[r], %[r], #1\n\t"
+        "umull	r4, r5, %[r], %[div]\n\t"
+        "subs	r4, %[d0], r4\n\t"
+        "sbc	r5, %[d1], r5\n\t"
+        "add	%[r], %[r], r5\n\t"
+        "umull	r4, r5, %[r], %[div]\n\t"
+        "subs	r4, %[d0], r4\n\t"
+        "sbc	r5, %[d1], r5\n\t"
+        "add	%[r], %[r], r5\n\t"
+        "subs	r8, %[div], r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r7", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<128; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = -1;
+    sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mov	r3, #-1\n\t"
+        "mov	r6, #508\n\t"
+        "1:\n\t"
+        "ldr	r4, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r4, r4, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
+        "eor	%[r], %[r], r3\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mov	r3, #-1\n\t"
+        "ldr		r4, [%[a], #508]\n\t"
+        "ldr		r5, [%[b], #508]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #504]\n\t"
+        "ldr		r5, [%[b], #504]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #500]\n\t"
+        "ldr		r5, [%[b], #500]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #496]\n\t"
+        "ldr		r5, [%[b], #496]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #492]\n\t"
+        "ldr		r5, [%[b], #492]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #488]\n\t"
+        "ldr		r5, [%[b], #488]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #484]\n\t"
+        "ldr		r5, [%[b], #484]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #480]\n\t"
+        "ldr		r5, [%[b], #480]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #476]\n\t"
+        "ldr		r5, [%[b], #476]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #472]\n\t"
+        "ldr		r5, [%[b], #472]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #468]\n\t"
+        "ldr		r5, [%[b], #468]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #464]\n\t"
+        "ldr		r5, [%[b], #464]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #460]\n\t"
+        "ldr		r5, [%[b], #460]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #456]\n\t"
+        "ldr		r5, [%[b], #456]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #452]\n\t"
+        "ldr		r5, [%[b], #452]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #448]\n\t"
+        "ldr		r5, [%[b], #448]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #444]\n\t"
+        "ldr		r5, [%[b], #444]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #440]\n\t"
+        "ldr		r5, [%[b], #440]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #436]\n\t"
+        "ldr		r5, [%[b], #436]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #432]\n\t"
+        "ldr		r5, [%[b], #432]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #428]\n\t"
+        "ldr		r5, [%[b], #428]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #424]\n\t"
+        "ldr		r5, [%[b], #424]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #420]\n\t"
+        "ldr		r5, [%[b], #420]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #416]\n\t"
+        "ldr		r5, [%[b], #416]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #412]\n\t"
+        "ldr		r5, [%[b], #412]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #408]\n\t"
+        "ldr		r5, [%[b], #408]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #404]\n\t"
+        "ldr		r5, [%[b], #404]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #400]\n\t"
+        "ldr		r5, [%[b], #400]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #396]\n\t"
+        "ldr		r5, [%[b], #396]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #392]\n\t"
+        "ldr		r5, [%[b], #392]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #388]\n\t"
+        "ldr		r5, [%[b], #388]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #384]\n\t"
+        "ldr		r5, [%[b], #384]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #380]\n\t"
+        "ldr		r5, [%[b], #380]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #376]\n\t"
+        "ldr		r5, [%[b], #376]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #372]\n\t"
+        "ldr		r5, [%[b], #372]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #368]\n\t"
+        "ldr		r5, [%[b], #368]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #364]\n\t"
+        "ldr		r5, [%[b], #364]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #360]\n\t"
+        "ldr		r5, [%[b], #360]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #356]\n\t"
+        "ldr		r5, [%[b], #356]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #352]\n\t"
+        "ldr		r5, [%[b], #352]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #348]\n\t"
+        "ldr		r5, [%[b], #348]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #344]\n\t"
+        "ldr		r5, [%[b], #344]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #340]\n\t"
+        "ldr		r5, [%[b], #340]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #336]\n\t"
+        "ldr		r5, [%[b], #336]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #332]\n\t"
+        "ldr		r5, [%[b], #332]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #328]\n\t"
+        "ldr		r5, [%[b], #328]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #324]\n\t"
+        "ldr		r5, [%[b], #324]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #320]\n\t"
+        "ldr		r5, [%[b], #320]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #316]\n\t"
+        "ldr		r5, [%[b], #316]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #312]\n\t"
+        "ldr		r5, [%[b], #312]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #308]\n\t"
+        "ldr		r5, [%[b], #308]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #304]\n\t"
+        "ldr		r5, [%[b], #304]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #300]\n\t"
+        "ldr		r5, [%[b], #300]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #296]\n\t"
+        "ldr		r5, [%[b], #296]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #292]\n\t"
+        "ldr		r5, [%[b], #292]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #288]\n\t"
+        "ldr		r5, [%[b], #288]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #284]\n\t"
+        "ldr		r5, [%[b], #284]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #280]\n\t"
+        "ldr		r5, [%[b], #280]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #276]\n\t"
+        "ldr		r5, [%[b], #276]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #272]\n\t"
+        "ldr		r5, [%[b], #272]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #268]\n\t"
+        "ldr		r5, [%[b], #268]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #264]\n\t"
+        "ldr		r5, [%[b], #264]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #260]\n\t"
+        "ldr		r5, [%[b], #260]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #256]\n\t"
+        "ldr		r5, [%[b], #256]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #252]\n\t"
+        "ldr		r5, [%[b], #252]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #248]\n\t"
+        "ldr		r5, [%[b], #248]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #244]\n\t"
+        "ldr		r5, [%[b], #244]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #240]\n\t"
+        "ldr		r5, [%[b], #240]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #236]\n\t"
+        "ldr		r5, [%[b], #236]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #232]\n\t"
+        "ldr		r5, [%[b], #232]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #228]\n\t"
+        "ldr		r5, [%[b], #228]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #224]\n\t"
+        "ldr		r5, [%[b], #224]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #220]\n\t"
+        "ldr		r5, [%[b], #220]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #216]\n\t"
+        "ldr		r5, [%[b], #216]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #212]\n\t"
+        "ldr		r5, [%[b], #212]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #208]\n\t"
+        "ldr		r5, [%[b], #208]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #204]\n\t"
+        "ldr		r5, [%[b], #204]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #200]\n\t"
+        "ldr		r5, [%[b], #200]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #196]\n\t"
+        "ldr		r5, [%[b], #196]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #192]\n\t"
+        "ldr		r5, [%[b], #192]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #188]\n\t"
+        "ldr		r5, [%[b], #188]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #184]\n\t"
+        "ldr		r5, [%[b], #184]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #180]\n\t"
+        "ldr		r5, [%[b], #180]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #176]\n\t"
+        "ldr		r5, [%[b], #176]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #172]\n\t"
+        "ldr		r5, [%[b], #172]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #168]\n\t"
+        "ldr		r5, [%[b], #168]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #164]\n\t"
+        "ldr		r5, [%[b], #164]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #160]\n\t"
+        "ldr		r5, [%[b], #160]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #156]\n\t"
+        "ldr		r5, [%[b], #156]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #152]\n\t"
+        "ldr		r5, [%[b], #152]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #148]\n\t"
+        "ldr		r5, [%[b], #148]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #144]\n\t"
+        "ldr		r5, [%[b], #144]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #140]\n\t"
+        "ldr		r5, [%[b], #140]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #136]\n\t"
+        "ldr		r5, [%[b], #136]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #132]\n\t"
+        "ldr		r5, [%[b], #132]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #128]\n\t"
+        "ldr		r5, [%[b], #128]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #124]\n\t"
+        "ldr		r5, [%[b], #124]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #120]\n\t"
+        "ldr		r5, [%[b], #120]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #116]\n\t"
+        "ldr		r5, [%[b], #116]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #112]\n\t"
+        "ldr		r5, [%[b], #112]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #108]\n\t"
+        "ldr		r5, [%[b], #108]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #104]\n\t"
+        "ldr		r5, [%[b], #104]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #100]\n\t"
+        "ldr		r5, [%[b], #100]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #96]\n\t"
+        "ldr		r5, [%[b], #96]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #92]\n\t"
+        "ldr		r5, [%[b], #92]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #88]\n\t"
+        "ldr		r5, [%[b], #88]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #84]\n\t"
+        "ldr		r5, [%[b], #84]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #80]\n\t"
+        "ldr		r5, [%[b], #80]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #76]\n\t"
+        "ldr		r5, [%[b], #76]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #72]\n\t"
+        "ldr		r5, [%[b], #72]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #68]\n\t"
+        "ldr		r5, [%[b], #68]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #64]\n\t"
+        "ldr		r5, [%[b], #64]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #60]\n\t"
+        "ldr		r5, [%[b], #60]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #56]\n\t"
+        "ldr		r5, [%[b], #56]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #52]\n\t"
+        "ldr		r5, [%[b], #52]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #48]\n\t"
+        "ldr		r5, [%[b], #48]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #44]\n\t"
+        "ldr		r5, [%[b], #44]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #40]\n\t"
+        "ldr		r5, [%[b], #40]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #36]\n\t"
+        "ldr		r5, [%[b], #36]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #32]\n\t"
+        "ldr		r5, [%[b], #32]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #28]\n\t"
+        "ldr		r5, [%[b], #28]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #24]\n\t"
+        "ldr		r5, [%[b], #24]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #20]\n\t"
+        "ldr		r5, [%[b], #20]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #16]\n\t"
+        "ldr		r5, [%[b], #16]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #12]\n\t"
+        "ldr		r5, [%[b], #12]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #8]\n\t"
+        "ldr		r5, [%[b], #8]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #4]\n\t"
+        "ldr		r5, [%[b], #4]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #0]\n\t"
+        "ldr		r5, [%[b], #0]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "eor	%[r], %[r], r3\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+#endif
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[256], t2[129];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+
+    div = d[127];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+    for (i=127; i>=0; i--) {
+        r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+        sp_4096_mul_d_128(t2, d, r1);
+        t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+        t1[128 + i] -= t2[128];
+        sp_4096_mask_128(t2, d, t1[128 + i]);
+        t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+        sp_4096_mask_128(t2, d, t1[128 + i]);
+        t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_4096_cmp_128(t1, d) >= 0;
+    sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_128(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[256], t2[129];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+
+    div = d[127];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+    for (i=127; i>=0; i--) {
+        r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+        sp_4096_mul_d_128(t2, d, r1);
+        t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+        t1[128 + i] -= t2[128];
+        if (t1[128 + i] != 0) {
+            t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+            if (t1[128 + i] != 0)
+                t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_4096_cmp_128(t1, d) >= 0;
+    sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_128_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][256];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 256;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_128(t[1] + 128, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_128(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_mont_mul_128(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][256];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 256;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_128(t[1] + 128, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_128(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_128(t[20], t[10], m, mp);
+        sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_128(t[22], t[11], m, mp);
+        sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_128(t[24], t[12], m, mp);
+        sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_128(t[26], t[13], m, mp);
+        sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_128(t[28], t[14], m, mp);
+        sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_128(t[30], t[15], m, mp);
+        sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_mont_mul_128(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[256], m[128], r[256];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
+                                                     mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 128 * 2;
+        m = r + 128 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 128;
+
+        sp_4096_from_bin(ah, 128, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 128, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_4096_sqr_128(r, ah);
+                err = sp_4096_mod_128_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_4096_mul_128(r, ah, r);
+                err = sp_4096_mod_128_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_4096_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 128);
+                for (i--; i>=0; i--) {
+                    sp_4096_mont_sqr_128(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_4096_mont_mul_128(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
+                sp_4096_mont_reduce_128(r, m, mp);
+
+                for (i = 127; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_4096_sub_in_place_128(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+           err = MP_READ_E;
+        }
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 128;
+        m = a + 256;
+        r = a;
+
+        sp_4096_from_bin(a, 128, in, inLen);
+        sp_4096_from_mp(d, 128, dm);
+        sp_4096_from_mp(m, 128, mm);
+        err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 128);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r9, #0\n\t"
+        "mov	r8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	r4, [%[a], r8]\n\t"
+        "ldr	r5, [%[b], r8]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adc	%[c], r9, r9\n\t"
+        "str	r4, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, #256\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#else
+    __asm__ __volatile__ (
+
+        "mov	r9, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r6, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r6, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r6, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r7, [%[b], #52]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "str	r6, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r6, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r7, [%[b], #60]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "str	r6, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r6, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r7, [%[b], #68]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "str	r6, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r6, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r7, [%[b], #76]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "str	r6, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r6, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r7, [%[b], #84]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "str	r6, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r6, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r7, [%[b], #92]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r6, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r6, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r7, [%[b], #100]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "str	r6, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r6, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r7, [%[b], #108]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "str	r6, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r6, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r7, [%[b], #116]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "str	r6, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r6, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r7, [%[b], #124]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "str	r6, [%[r], #124]\n\t"
+        "ldr	r4, [%[a], #128]\n\t"
+        "ldr	r6, [%[a], #132]\n\t"
+        "ldr	r5, [%[b], #128]\n\t"
+        "ldr	r7, [%[b], #132]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #128]\n\t"
+        "str	r6, [%[r], #132]\n\t"
+        "ldr	r4, [%[a], #136]\n\t"
+        "ldr	r6, [%[a], #140]\n\t"
+        "ldr	r5, [%[b], #136]\n\t"
+        "ldr	r7, [%[b], #140]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #136]\n\t"
+        "str	r6, [%[r], #140]\n\t"
+        "ldr	r4, [%[a], #144]\n\t"
+        "ldr	r6, [%[a], #148]\n\t"
+        "ldr	r5, [%[b], #144]\n\t"
+        "ldr	r7, [%[b], #148]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #144]\n\t"
+        "str	r6, [%[r], #148]\n\t"
+        "ldr	r4, [%[a], #152]\n\t"
+        "ldr	r6, [%[a], #156]\n\t"
+        "ldr	r5, [%[b], #152]\n\t"
+        "ldr	r7, [%[b], #156]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #152]\n\t"
+        "str	r6, [%[r], #156]\n\t"
+        "ldr	r4, [%[a], #160]\n\t"
+        "ldr	r6, [%[a], #164]\n\t"
+        "ldr	r5, [%[b], #160]\n\t"
+        "ldr	r7, [%[b], #164]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #160]\n\t"
+        "str	r6, [%[r], #164]\n\t"
+        "ldr	r4, [%[a], #168]\n\t"
+        "ldr	r6, [%[a], #172]\n\t"
+        "ldr	r5, [%[b], #168]\n\t"
+        "ldr	r7, [%[b], #172]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #168]\n\t"
+        "str	r6, [%[r], #172]\n\t"
+        "ldr	r4, [%[a], #176]\n\t"
+        "ldr	r6, [%[a], #180]\n\t"
+        "ldr	r5, [%[b], #176]\n\t"
+        "ldr	r7, [%[b], #180]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #176]\n\t"
+        "str	r6, [%[r], #180]\n\t"
+        "ldr	r4, [%[a], #184]\n\t"
+        "ldr	r6, [%[a], #188]\n\t"
+        "ldr	r5, [%[b], #184]\n\t"
+        "ldr	r7, [%[b], #188]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #184]\n\t"
+        "str	r6, [%[r], #188]\n\t"
+        "ldr	r4, [%[a], #192]\n\t"
+        "ldr	r6, [%[a], #196]\n\t"
+        "ldr	r5, [%[b], #192]\n\t"
+        "ldr	r7, [%[b], #196]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #192]\n\t"
+        "str	r6, [%[r], #196]\n\t"
+        "ldr	r4, [%[a], #200]\n\t"
+        "ldr	r6, [%[a], #204]\n\t"
+        "ldr	r5, [%[b], #200]\n\t"
+        "ldr	r7, [%[b], #204]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #200]\n\t"
+        "str	r6, [%[r], #204]\n\t"
+        "ldr	r4, [%[a], #208]\n\t"
+        "ldr	r6, [%[a], #212]\n\t"
+        "ldr	r5, [%[b], #208]\n\t"
+        "ldr	r7, [%[b], #212]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #208]\n\t"
+        "str	r6, [%[r], #212]\n\t"
+        "ldr	r4, [%[a], #216]\n\t"
+        "ldr	r6, [%[a], #220]\n\t"
+        "ldr	r5, [%[b], #216]\n\t"
+        "ldr	r7, [%[b], #220]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #216]\n\t"
+        "str	r6, [%[r], #220]\n\t"
+        "ldr	r4, [%[a], #224]\n\t"
+        "ldr	r6, [%[a], #228]\n\t"
+        "ldr	r5, [%[b], #224]\n\t"
+        "ldr	r7, [%[b], #228]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #224]\n\t"
+        "str	r6, [%[r], #228]\n\t"
+        "ldr	r4, [%[a], #232]\n\t"
+        "ldr	r6, [%[a], #236]\n\t"
+        "ldr	r5, [%[b], #232]\n\t"
+        "ldr	r7, [%[b], #236]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #232]\n\t"
+        "str	r6, [%[r], #236]\n\t"
+        "ldr	r4, [%[a], #240]\n\t"
+        "ldr	r6, [%[a], #244]\n\t"
+        "ldr	r5, [%[b], #240]\n\t"
+        "ldr	r7, [%[b], #244]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #240]\n\t"
+        "str	r6, [%[r], #244]\n\t"
+        "ldr	r4, [%[a], #248]\n\t"
+        "ldr	r6, [%[a], #252]\n\t"
+        "ldr	r5, [%[b], #248]\n\t"
+        "ldr	r7, [%[b], #252]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #248]\n\t"
+        "str	r6, [%[r], #252]\n\t"
+        "adc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#endif /* WOLFSSL_SP_SMALL */
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128 * 2];
+    sp_digit p[64], q[64], dp[64];
+    sp_digit tmpa[128], tmpb[128];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 128 * 2;
+        q = p + 64;
+        qi = dq = dp = q + 64;
+        tmpa = qi + 64;
+        tmpb = tmpa + 128;
+
+        r = t + 128;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_4096_from_bin(a, 128, in, inLen);
+        sp_4096_from_mp(p, 64, pm);
+        sp_4096_from_mp(q, 64, qm);
+        sp_4096_from_mp(dp, 64, dpm);
+
+        err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(dq, 64, dqm);
+        err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_2048_sub_in_place_64(tmpa, tmpb);
+        c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
+        sp_4096_cond_add_64(tmpa, tmpa, p, c);
+
+        sp_2048_from_mp(qi, 64, qim);
+        sp_2048_mul_64(tmpa, tmpa, qi);
+        err = sp_2048_mod_64(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mul_64(tmpa, q, tmpa);
+        XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
+        sp_4096_add_128(r, tmpb, tmpa);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
+        r->used = 128;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 128; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 128; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[256], e[128], m[128];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 128, base);
+        sp_4096_from_mp(e, 128, exp);
+        sp_4096_from_mp(m, 128, mod);
+
+        err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #31\n\t"
+        "sub	r6, r6, %[n]\n\t"
+        "ldr	r3, [%[a], #508]\n\t"
+        "lsr	r4, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r4, r4, r6\n\t"
+        "ldr	r2, [%[a], #504]\n\t"
+        "str	r4, [%[r], #512]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #500]\n\t"
+        "str	r3, [%[r], #508]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #496]\n\t"
+        "str	r2, [%[r], #504]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #492]\n\t"
+        "str	r4, [%[r], #500]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #488]\n\t"
+        "str	r3, [%[r], #496]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #484]\n\t"
+        "str	r2, [%[r], #492]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #480]\n\t"
+        "str	r4, [%[r], #488]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #476]\n\t"
+        "str	r3, [%[r], #484]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #472]\n\t"
+        "str	r2, [%[r], #480]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #468]\n\t"
+        "str	r4, [%[r], #476]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #464]\n\t"
+        "str	r3, [%[r], #472]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #460]\n\t"
+        "str	r2, [%[r], #468]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #456]\n\t"
+        "str	r4, [%[r], #464]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #452]\n\t"
+        "str	r3, [%[r], #460]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #448]\n\t"
+        "str	r2, [%[r], #456]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #444]\n\t"
+        "str	r4, [%[r], #452]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #440]\n\t"
+        "str	r3, [%[r], #448]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #436]\n\t"
+        "str	r2, [%[r], #444]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #432]\n\t"
+        "str	r4, [%[r], #440]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #428]\n\t"
+        "str	r3, [%[r], #436]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #424]\n\t"
+        "str	r2, [%[r], #432]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #420]\n\t"
+        "str	r4, [%[r], #428]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #416]\n\t"
+        "str	r3, [%[r], #424]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #412]\n\t"
+        "str	r2, [%[r], #420]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #408]\n\t"
+        "str	r4, [%[r], #416]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #404]\n\t"
+        "str	r3, [%[r], #412]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #400]\n\t"
+        "str	r2, [%[r], #408]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #396]\n\t"
+        "str	r4, [%[r], #404]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #392]\n\t"
+        "str	r3, [%[r], #400]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #388]\n\t"
+        "str	r2, [%[r], #396]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #384]\n\t"
+        "str	r4, [%[r], #392]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #380]\n\t"
+        "str	r3, [%[r], #388]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #376]\n\t"
+        "str	r2, [%[r], #384]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #372]\n\t"
+        "str	r4, [%[r], #380]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #368]\n\t"
+        "str	r3, [%[r], #376]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #364]\n\t"
+        "str	r2, [%[r], #372]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #360]\n\t"
+        "str	r4, [%[r], #368]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #356]\n\t"
+        "str	r3, [%[r], #364]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #352]\n\t"
+        "str	r2, [%[r], #360]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #348]\n\t"
+        "str	r4, [%[r], #356]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #344]\n\t"
+        "str	r3, [%[r], #352]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #340]\n\t"
+        "str	r2, [%[r], #348]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #336]\n\t"
+        "str	r4, [%[r], #344]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #332]\n\t"
+        "str	r3, [%[r], #340]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #328]\n\t"
+        "str	r2, [%[r], #336]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #324]\n\t"
+        "str	r4, [%[r], #332]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #320]\n\t"
+        "str	r3, [%[r], #328]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #316]\n\t"
+        "str	r2, [%[r], #324]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #312]\n\t"
+        "str	r4, [%[r], #320]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #308]\n\t"
+        "str	r3, [%[r], #316]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #304]\n\t"
+        "str	r2, [%[r], #312]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #300]\n\t"
+        "str	r4, [%[r], #308]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #296]\n\t"
+        "str	r3, [%[r], #304]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #292]\n\t"
+        "str	r2, [%[r], #300]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #288]\n\t"
+        "str	r4, [%[r], #296]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #284]\n\t"
+        "str	r3, [%[r], #292]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #280]\n\t"
+        "str	r2, [%[r], #288]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #276]\n\t"
+        "str	r4, [%[r], #284]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #272]\n\t"
+        "str	r3, [%[r], #280]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #268]\n\t"
+        "str	r2, [%[r], #276]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #264]\n\t"
+        "str	r4, [%[r], #272]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #260]\n\t"
+        "str	r3, [%[r], #268]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #256]\n\t"
+        "str	r2, [%[r], #264]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #252]\n\t"
+        "str	r4, [%[r], #260]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #248]\n\t"
+        "str	r3, [%[r], #256]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #244]\n\t"
+        "str	r2, [%[r], #252]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #240]\n\t"
+        "str	r4, [%[r], #248]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #236]\n\t"
+        "str	r3, [%[r], #244]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #232]\n\t"
+        "str	r2, [%[r], #240]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #228]\n\t"
+        "str	r4, [%[r], #236]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #224]\n\t"
+        "str	r3, [%[r], #232]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #220]\n\t"
+        "str	r2, [%[r], #228]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #216]\n\t"
+        "str	r4, [%[r], #224]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #212]\n\t"
+        "str	r3, [%[r], #220]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #208]\n\t"
+        "str	r2, [%[r], #216]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #204]\n\t"
+        "str	r4, [%[r], #212]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #200]\n\t"
+        "str	r3, [%[r], #208]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #196]\n\t"
+        "str	r2, [%[r], #204]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #192]\n\t"
+        "str	r4, [%[r], #200]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #188]\n\t"
+        "str	r3, [%[r], #196]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #184]\n\t"
+        "str	r2, [%[r], #192]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #180]\n\t"
+        "str	r4, [%[r], #188]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #176]\n\t"
+        "str	r3, [%[r], #184]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #172]\n\t"
+        "str	r2, [%[r], #180]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #168]\n\t"
+        "str	r4, [%[r], #176]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #164]\n\t"
+        "str	r3, [%[r], #172]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #160]\n\t"
+        "str	r2, [%[r], #168]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #156]\n\t"
+        "str	r4, [%[r], #164]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #152]\n\t"
+        "str	r3, [%[r], #160]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #148]\n\t"
+        "str	r2, [%[r], #156]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #144]\n\t"
+        "str	r4, [%[r], #152]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #140]\n\t"
+        "str	r3, [%[r], #148]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #136]\n\t"
+        "str	r2, [%[r], #144]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #132]\n\t"
+        "str	r4, [%[r], #140]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #128]\n\t"
+        "str	r3, [%[r], #136]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #124]\n\t"
+        "str	r2, [%[r], #132]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #120]\n\t"
+        "str	r4, [%[r], #128]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "str	r3, [%[r], #124]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "str	r2, [%[r], #120]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #108]\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "str	r3, [%[r], #112]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #100]\n\t"
+        "str	r2, [%[r], #108]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #96]\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "str	r3, [%[r], #100]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "str	r2, [%[r], #96]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #84]\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "str	r3, [%[r], #88]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #76]\n\t"
+        "str	r2, [%[r], #84]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #72]\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "str	r3, [%[r], #76]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "str	r2, [%[r], #72]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "str	r2, [%[r]]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[256];
+    sp_digit td[129];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 256;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_4096_lshift_128(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_lshift_128(r, r, y);
+            sp_4096_mul_d_128(tmp, norm, r[128]);
+            r[128] = 0;
+            o = sp_4096_add_128(r, r, tmp);
+            sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 512 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[256], e[128], m[128];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 128, base);
+        sp_4096_from_bin(e, 128, exp, expLen);
+        sp_4096_from_mp(m, 128, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
+            err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+    sp_digit x[2 * 8];
+    sp_digit y[2 * 8];
+    sp_digit z[2 * 8];
+    int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[8] = {
+    0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
+    0x00000001,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[8] = {
+    0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
+    0xfffffffe,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[8] = {
+    0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+    0x00000000,0xffffffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[8] = {
+    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+    0x00000000,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[8] = {
+    0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
+    0xffffffff,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xee00bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+    /* X ordinate */
+    {
+        0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
+        0xe12c4247,0x6b17d1f2,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
+        0xfe1a7f9b,0x4fe342e2,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0x00000000,0x00000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[8] = {
+    0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
+    0xaa3a93e7,0x5ac635d8
+};
+#endif
+
+static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "sub	sp, sp, #24\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[a], #28]\n\t"
+        "# Clear overflow and underflow\n\t"
+        "mov	r14, #0\n\t"
+        "mov	r12, #0\n\t"
+        "# t[0] =  1  1  0 -1 -1 -1 -1  0\n\t"
+        "adds	r10, r2, r3\n\t"
+        "adc	r14, r14, #0\n\t"
+        "subs	r10, r10, r5\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r6\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r7\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r8\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[0]\n\t"
+        "str	r10, [sp, #0]\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r10, #0\n\t"
+        "# t[1] =  0  1  1  0 -1 -1 -1 -1\n\t"
+        "adds	r14, r14, r3\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r4\n\t"
+        "adc	r10, r10, #0\n\t"
+        "subs	r14, r14, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r6\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r7\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r8\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r9\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[1]\n\t"
+        "str	r14, [sp, #4]\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r14, #0\n\t"
+        "# t[2] =  0  0  1  1  0 -1 -1 -1\n\t"
+        "adds	r10, r10, r4\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r5\n\t"
+        "adc	r14, r14, #0\n\t"
+        "subs	r10, r10, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r7\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r8\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r9\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[2]\n\t"
+        "str	r10, [sp, #8]\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r10, #0\n\t"
+        "# t[3] = -1 -1  0  2  2  1  0 -1\n\t"
+        "adds	r14, r14, r5\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r5\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r6\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r6\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r7\n\t"
+        "adc	r10, r10, #0\n\t"
+        "subs	r14, r14, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r2\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r3\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r9\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[3]\n\t"
+        "str	r14, [sp, #12]\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r14, #0\n\t"
+        "# t[4] =  0 -1 -1  0  2  2  1  0\n\t"
+        "adds	r10, r10, r6\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r6\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r7\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r7\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r8\n\t"
+        "adc	r14, r14, #0\n\t"
+        "subs	r10, r10, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r3\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r4\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[4]\n\t"
+        "str	r10, [sp, #16]\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r10, #0\n\t"
+        "# t[5] =  0  0 -1 -1  0  2  2  1\n\t"
+        "adds	r14, r14, r7\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r7\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r8\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r8\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r9\n\t"
+        "adc	r10, r10, #0\n\t"
+        "subs	r14, r14, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r4\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r5\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[5]\n\t"
+        "str	r14, [sp, #20]\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r14, #0\n\t"
+        "# t[6] = -1 -1  0  0  0  1  3  2\n\t"
+        "adds	r10, r10, r7\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r8\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r8\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r8\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r9\n\t"
+        "adc	r14, r14, #0\n\t"
+        "adds	r10, r10, r9\n\t"
+        "adc	r14, r14, #0\n\t"
+        "subs	r10, r10, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r2\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r10, r10, r3\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[6]\n\t"
+        "mov	r8, r10\n\t"
+        "neg	r12, r12\n\t"
+        "mov	r10, #0\n\t"
+        "# t[7] =  1  0 -1 -1 -1 -1  0  3\n\t"
+        "adds	r14, r14, r2\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r9\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r9\n\t"
+        "adc	r10, r10, #0\n\t"
+        "adds	r14, r14, r9\n\t"
+        "adc	r10, r10, #0\n\t"
+        "subs	r14, r14, r12\n\t"
+        "mov	r12, #0\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r4\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r5\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r6\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "subs	r14, r14, r7\n\t"
+        "sbc	r12, r12, #0\n\t"
+        "# Store t[7]\n\t"
+        "# Load intermediate\n\t"
+        "ldr	r2, [sp, #0]\n\t"
+        "ldr	r3, [sp, #4]\n\t"
+        "ldr	r4, [sp, #8]\n\t"
+        "ldr	r5, [sp, #12]\n\t"
+        "ldr	r6, [sp, #16]\n\t"
+        "ldr	r7, [sp, #20]\n\t"
+        "neg	r12, r12\n\t"
+        "# Add overflow\n\t"
+        "# Subtract underflow - add neg underflow\n\t"
+        "adds	r2, r2, r10\n\t"
+        "adcs	r3, r3, #0\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "adds	r5, r5, r12\n\t"
+        "adcs	r6, r6, #0\n\t"
+        "adcs	r7, r7, #0\n\t"
+        "adcs	r8, r8, r12\n\t"
+        "adc	r14, r14, r10\n\t"
+        "# Subtract overflow\n\t"
+        "# Add underflow - subtract neg underflow\n\t"
+        "subs	r2, r2, r12\n\t"
+        "sbcs	r3, r3, #0\n\t"
+        "sbcs	r4, r4, #0\n\t"
+        "subs	r5, r5, r10\n\t"
+        "sbcs	r6, r6, #0\n\t"
+        "sbcs	r7, r7, #0\n\t"
+        "sbcs	r8, r8, r10\n\t"
+        "sbc	r14, r14, r12\n\t"
+        "# Store result\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r5, [%[r], #12]\n\t"
+        "str	r6, [%[r], #16]\n\t"
+        "str	r7, [%[r], #20]\n\t"
+        "str	r8, [%[r], #24]\n\t"
+        "str	r14, [%[r], #28]\n\t"
+        "add	sp, sp, #24\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p   Point of type sp_point_256 (result).
  * pm  Point of type ecc_point.
  */
-static void sp_256_point_from_ecc_point_8(sp_point* p, ecc_point* pm)
+static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
 {
     XMEMSET(p->x, 0, sizeof(p->x));
     XMEMSET(p->y, 0, sizeof(p->y));
@@ -17000,12 +73738,12 @@
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_256_to_mp(sp_digit* a, mp_int* r)
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 32
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
         r->used = 8;
@@ -17015,14 +73753,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 8; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 32) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 32 - s;
         }
@@ -17035,15 +73778,16 @@
         for (i = 0; i < 8; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 32 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 32
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 32 - s;
             }
-            else
+            else {
                 s += 32;
+            }
         }
         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -17053,26 +73797,1299 @@
     return err;
 }
 
-/* Convert a point of type sp_point to type ecc_point.
- *
- * p   Point of type sp_point.
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p   Point of type sp_point_256.
  * pm  Point of type ecc_point (result).
  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
  * MP_OKAY.
  */
-static int sp_256_point_to_ecc_point_8(sp_point* p, ecc_point* pm)
+static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
 {
     int err;
 
     err = sp_256_to_mp(p->x, pm->x);
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pm->y);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pm->z);
+    }
 
     return err;
 }
 
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    (void)mp;
+    (void)m;
+
+    __asm__ __volatile__ (
+        "sub	sp, sp, #68\n\t"
+        "mov	r5, #0\n\t"
+        "#  A[0] * B[0]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r8, r9, r6, r7\n\t"
+        "str	r8, [sp, #0]\n\t"
+        "#  A[0] * B[1]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adc	r10, r4, #0\n\t"
+        "#  A[1] * B[0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, #0\n\t"
+        "str	r9, [sp, #4]\n\t"
+        "#  A[0] * B[2]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adc	r14, r4, r14\n\t"
+        "#  A[1] * B[1]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, #0\n\t"
+        "#  A[2] * B[0]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "str	r10, [sp, #8]\n\t"
+        "#  A[0] * B[3]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, #0\n\t"
+        "#  A[1] * B[2]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[2] * B[1]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[3] * B[0]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "str	r14, [sp, #12]\n\t"
+        "#  A[0] * B[4]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, #0\n\t"
+        "#  A[1] * B[3]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[2] * B[2]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[3] * B[1]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[4] * B[0]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "str	r8, [sp, #16]\n\t"
+        "#  A[0] * B[5]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, #0\n\t"
+        "#  A[1] * B[4]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[2] * B[3]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[3] * B[2]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[4] * B[1]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[5] * B[0]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "str	r9, [sp, #20]\n\t"
+        "#  A[0] * B[6]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, #0\n\t"
+        "#  A[1] * B[5]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[2] * B[4]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[3] * B[3]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[4] * B[2]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[5] * B[1]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[6] * B[0]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "str	r10, [sp, #24]\n\t"
+        "#  A[0] * B[7]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, #0\n\t"
+        "#  A[1] * B[6]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[2] * B[5]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[3] * B[4]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[4] * B[3]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[5] * B[2]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[6] * B[1]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[7] * B[0]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "str	r14, [sp, #28]\n\t"
+        "#  A[1] * B[7]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, #0\n\t"
+        "#  A[2] * B[6]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[3] * B[5]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[4] * B[4]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[5] * B[3]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[6] * B[2]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[7] * B[1]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "str	r8, [sp, #32]\n\t"
+        "#  A[2] * B[7]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, #0\n\t"
+        "#  A[3] * B[6]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[4] * B[5]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[5] * B[4]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[6] * B[3]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[7] * B[2]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "str	r9, [sp, #36]\n\t"
+        "#  A[3] * B[7]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, #0\n\t"
+        "#  A[4] * B[6]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[5] * B[5]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[6] * B[4]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[7] * B[3]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "str	r10, [sp, #40]\n\t"
+        "#  A[4] * B[7]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, #0\n\t"
+        "#  A[5] * B[6]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[6] * B[5]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[7] * B[4]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "str	r14, [sp, #44]\n\t"
+        "#  A[5] * B[7]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, #0\n\t"
+        "#  A[6] * B[6]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[7] * B[5]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[6] * B[7]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, #0\n\t"
+        "#  A[7] * B[6]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[7] * B[7]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adc	r14, r4, r14\n\t"
+        "str	r8, [sp, #48]\n\t"
+        "str	r9, [sp, #52]\n\t"
+        "str	r10, [sp, #56]\n\t"
+        "str	r14, [sp, #60]\n\t"
+        "# Start Reduction\n\t"
+        "ldr	r4, [sp, #0]\n\t"
+        "ldr	r5, [sp, #4]\n\t"
+        "ldr	r6, [sp, #8]\n\t"
+        "ldr	r7, [sp, #12]\n\t"
+        "ldr	r8, [sp, #16]\n\t"
+        "ldr	r9, [sp, #20]\n\t"
+        "ldr	r10, [sp, #24]\n\t"
+        "ldr	r14, [sp, #28]\n\t"
+        "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
+        "#    - a[0] << 224\n\t"
+        "#   + (a[0]-a[1] * 2) << (6 * 32)\n\t"
+        "adds	r10, r10, r4\n\t"
+        "adc	r14, r14, r5\n\t"
+        "adds	r10, r10, r4\n\t"
+        "adc	r14, r14, r5\n\t"
+        "#   - a[0] << (7 * 32)\n\t"
+        "sub	r14, r14, r4\n\t"
+        "#   + a[0]-a[4] << (3 * 32)\n\t"
+        "mov	%[a], r7\n\t"
+        "mov	%[b], r8\n\t"
+        "adds	r7, r7, r4\n\t"
+        "adcs	r8, r8, r5\n\t"
+        "adcs	r9, r9, r6\n\t"
+        "adcs	r10, r10, %[a]\n\t"
+        "adc	r14, r14, %[b]\n\t"
+        "str	r4, [sp, #0]\n\t"
+        "str	r5, [sp, #4]\n\t"
+        "str	r6, [sp, #8]\n\t"
+        "str	r7, [sp, #12]\n\t"
+        "str	r8, [sp, #16]\n\t"
+        "str	r9, [sp, #20]\n\t"
+        "# a += mu * m\n\t"
+        "#   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
+        "mov	%[a], #0\n\t"
+        "# a[6] +=        t[0] + t[3]\n\t"
+        "ldr	r3, [sp, #24]\n\t"
+        "adds	r3, r3, r4\n\t"
+        "adc	%[b], %[a], #0\n\t"
+        "adds	r3, r3, r7\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "str	r10, [sp, #24]\n\t"
+        "# a[7] +=        t[1] + t[4]\n\t"
+        "ldr	r3, [sp, #28]\n\t"
+        "adds	r3, r3, %[b]\n\t"
+        "adc	%[b], %[a], #0\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "str	r14, [sp, #28]\n\t"
+        "str	r3, [sp, #64]\n\t"
+        "# a[8] += t[0] + t[2] + t[5]\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "adds	r3, r3, %[b]\n\t"
+        "adc	%[b], %[a], #0\n\t"
+        "adds	r3, r3, r4\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "str	r3, [sp, #32]\n\t"
+        "# a[9]  += t[1] + t[3] + t[6]\n\t"
+        "# a[10] += t[2] + t[4] + t[7]\n\t"
+        "ldr	r3, [sp, #36]\n\t"
+        "ldr	r4, [sp, #40]\n\t"
+        "adds	r3, r3, %[b]\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "adc	%[b], %[a], #0\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "adds	r3, r3, r7\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "adds	r3, r3, r10\n\t"
+        "adcs	r4, r4, r14\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "str	r3, [sp, #36]\n\t"
+        "str	r4, [sp, #40]\n\t"
+        "# a[11] += t[3] + t[5]\n\t"
+        "# a[12] += t[4] + t[6]\n\t"
+        "# a[13] += t[5] + t[7]\n\t"
+        "# a[14] += t[6]\n\t"
+        "ldr	r3, [sp, #44]\n\t"
+        "ldr	r4, [sp, #48]\n\t"
+        "ldr	r5, [sp, #52]\n\t"
+        "ldr	r6, [sp, #56]\n\t"
+        "adds	r3, r3, %[b]\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "adcs	r5, r5, #0\n\t"
+        "adcs	r6, r6, #0\n\t"
+        "adc	%[b], %[a], #0\n\t"
+        "adds	r3, r3, r7\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adcs	r5, r5, r14\n\t"
+        "adcs	r6, r6, #0\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "str	r3, [sp, #44]\n\t"
+        "str	r4, [sp, #48]\n\t"
+        "str	r5, [sp, #52]\n\t"
+        "str	r6, [sp, #56]\n\t"
+        "# a[15] += t[7]\n\t"
+        "ldr	r3, [sp, #60]\n\t"
+        "adds	r3, r3, %[b]\n\t"
+        "adc	%[b], %[a], #0\n\t"
+        "adds	r3, r3, r14\n\t"
+        "adc	%[b], %[b], #0\n\t"
+        "str	r3, [sp, #60]\n\t"
+        "ldr	r3, [sp, #64]\n\t"
+        "ldr	r4, [sp, #32]\n\t"
+        "ldr	r5, [sp, #36]\n\t"
+        "ldr	r6, [sp, #40]\n\t"
+        "ldr	r8, [sp, #0]\n\t"
+        "ldr	r9, [sp, #4]\n\t"
+        "ldr	r10, [sp, #8]\n\t"
+        "ldr	r14, [sp, #12]\n\t"
+        "subs	r3, r3, r8\n\t"
+        "sbcs	r4, r4, r9\n\t"
+        "sbcs	r5, r5, r10\n\t"
+        "sbcs	r6, r6, r14\n\t"
+        "str	r4, [sp, #32]\n\t"
+        "str	r5, [sp, #36]\n\t"
+        "str	r6, [sp, #40]\n\t"
+        "ldr	r3, [sp, #44]\n\t"
+        "ldr	r4, [sp, #48]\n\t"
+        "ldr	r5, [sp, #52]\n\t"
+        "ldr	r6, [sp, #56]\n\t"
+        "ldr	r7, [sp, #60]\n\t"
+        "ldr	r8, [sp, #16]\n\t"
+        "ldr	r9, [sp, #20]\n\t"
+        "ldr	r10, [sp, #24]\n\t"
+        "ldr	r14, [sp, #28]\n\t"
+        "sbcs	r3, r3, r8\n\t"
+        "sbcs	r4, r4, r9\n\t"
+        "sbcs	r5, r5, r10\n\t"
+        "sbcs	r6, r6, r14\n\t"
+        "sbc	r7, r7, #0\n\t"
+        "str	r3, [sp, #44]\n\t"
+        "str	r4, [sp, #48]\n\t"
+        "str	r5, [sp, #52]\n\t"
+        "str	r6, [sp, #56]\n\t"
+        "str	r7, [sp, #60]\n\t"
+        "# mask m and sub from result if overflow\n\t"
+        "sub	%[b], %[a], %[b]\n\t"
+        "and	%[a], %[b], #1\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "ldr	r4, [sp, #36]\n\t"
+        "ldr	r5, [sp, #40]\n\t"
+        "ldr	r6, [sp, #44]\n\t"
+        "ldr	r7, [sp, #48]\n\t"
+        "ldr	r8, [sp, #52]\n\t"
+        "ldr	r9, [sp, #56]\n\t"
+        "ldr	r10, [sp, #60]\n\t"
+        "subs	r3, r3, %[b]\n\t"
+        "sbcs	r4, r4, %[b]\n\t"
+        "sbcs	r5, r5, %[b]\n\t"
+        "sbcs	r6, r6, #0\n\t"
+        "sbcs	r7, r7, #0\n\t"
+        "sbcs	r8, r8, #0\n\t"
+        "sbcs	r9, r9, %[a]\n\t"
+        "sbc	r10, r10, %[b]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "str	r7, [%[r], #16]\n\t"
+        "str	r8, [%[r], #20]\n\t"
+        "str	r9, [%[r], #24]\n\t"
+        "str	r10, [%[r], #28]\n\t"
+        "add	sp, sp, #68\n\t"
+        : [a] "+r" (a), [b] "+r" (b)
+        : [r] "r" (r)
+        : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7"
+    );
+}
+
+/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    (void)mp;
+    (void)m;
+
+    __asm__ __volatile__ (
+        "sub	sp, sp, #68\n\t"
+        "mov	r5, #0\n\t"
+        "#  A[0] * A[1]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "umull	r9, r10, r6, r7\n\t"
+        "str	r9, [sp, #4]\n\t"
+        "#  A[0] * A[2]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adc	r14, r4, #0\n\t"
+        "str	r10, [sp, #8]\n\t"
+        "#  A[0] * A[3]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adc	r8, r4, #0\n\t"
+        "#  A[1] * A[2]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[a], #8]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, #0\n\t"
+        "str	r14, [sp, #12]\n\t"
+        "#  A[0] * A[4]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adc	r9, r4, r9\n\t"
+        "#  A[1] * A[3]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[a], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, #0\n\t"
+        "str	r8, [sp, #16]\n\t"
+        "#  A[0] * A[5]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adc	r10, r4, r10\n\t"
+        "#  A[1] * A[4]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[a], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, #0\n\t"
+        "#  A[2] * A[3]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #12]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "str	r9, [sp, #20]\n\t"
+        "#  A[0] * A[6]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, #0\n\t"
+        "#  A[1] * A[5]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[a], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "#  A[2] * A[4]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "str	r10, [sp, #24]\n\t"
+        "#  A[0] * A[7]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, #0\n\t"
+        "#  A[1] * A[6]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[a], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[2] * A[5]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "#  A[3] * A[4]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[a], #16]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "str	r14, [sp, #28]\n\t"
+        "#  A[1] * A[7]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, #0\n\t"
+        "#  A[2] * A[6]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "#  A[3] * A[5]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[a], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, r10\n\t"
+        "str	r8, [sp, #32]\n\t"
+        "#  A[2] * A[7]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, #0\n\t"
+        "#  A[3] * A[6]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[a], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "#  A[4] * A[5]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[a], #20]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adcs	r10, r4, r10\n\t"
+        "adc	r14, r5, r14\n\t"
+        "str	r9, [sp, #36]\n\t"
+        "#  A[3] * A[7]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, #0\n\t"
+        "#  A[4] * A[6]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[a], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r10, r3, r10\n\t"
+        "adcs	r14, r4, r14\n\t"
+        "adc	r8, r5, r8\n\t"
+        "str	r10, [sp, #40]\n\t"
+        "#  A[4] * A[7]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, #0\n\t"
+        "#  A[5] * A[6]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[a], #24]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r14, r3, r14\n\t"
+        "adcs	r8, r4, r8\n\t"
+        "adc	r9, r5, r9\n\t"
+        "str	r14, [sp, #44]\n\t"
+        "#  A[5] * A[7]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r8, r3, r8\n\t"
+        "adcs	r9, r4, r9\n\t"
+        "adc	r10, r5, #0\n\t"
+        "str	r8, [sp, #48]\n\t"
+        "#  A[6] * A[7]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "umull	r3, r4, r6, r7\n\t"
+        "adds	r9, r3, r9\n\t"
+        "adc	r10, r4, r10\n\t"
+        "str	r9, [sp, #52]\n\t"
+        "str	r10, [sp, #56]\n\t"
+        "# Double\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r6, [sp, #8]\n\t"
+        "ldr	r7, [sp, #12]\n\t"
+        "ldr	r8, [sp, #16]\n\t"
+        "ldr	r9, [sp, #20]\n\t"
+        "ldr	r10, [sp, #24]\n\t"
+        "ldr	r14, [sp, #28]\n\t"
+        "ldr	r12, [sp, #32]\n\t"
+        "ldr	r3, [sp, #36]\n\t"
+        "adds	r4, r4, r4\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adcs	r7, r7, r7\n\t"
+        "adcs	r8, r8, r8\n\t"
+        "adcs	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adcs	r14, r14, r14\n\t"
+        "adcs	r12, r12, r12\n\t"
+        "adcs	r3, r3, r3\n\t"
+        "str	r4, [sp, #4]\n\t"
+        "str	r6, [sp, #8]\n\t"
+        "str	r7, [sp, #12]\n\t"
+        "str	r8, [sp, #16]\n\t"
+        "str	r9, [sp, #20]\n\t"
+        "str	r10, [sp, #24]\n\t"
+        "str	r14, [sp, #28]\n\t"
+        "str	r12, [sp, #32]\n\t"
+        "str	r3, [sp, #36]\n\t"
+        "ldr	r4, [sp, #40]\n\t"
+        "ldr	r6, [sp, #44]\n\t"
+        "ldr	r7, [sp, #48]\n\t"
+        "ldr	r8, [sp, #52]\n\t"
+        "ldr	r9, [sp, #56]\n\t"
+        "adcs	r4, r4, r4\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adcs	r7, r7, r7\n\t"
+        "adcs	r8, r8, r8\n\t"
+        "adcs	r9, r9, r9\n\t"
+        "str	r4, [sp, #40]\n\t"
+        "str	r6, [sp, #44]\n\t"
+        "str	r7, [sp, #48]\n\t"
+        "str	r8, [sp, #52]\n\t"
+        "str	r9, [sp, #56]\n\t"
+        "adc	r10, r5, #0\n\t"
+        "str	r10, [sp, #60]\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r5, [sp, #8]\n\t"
+        "ldr	r12, [sp, #12]\n\t"
+        "#  A[0] * A[0]\n\t"
+        "ldr	r6, [%[a], #0]\n\t"
+        "umull	r8, r9, r6, r6\n\t"
+        "#  A[1] * A[1]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "umull	r10, r14, r6, r6\n\t"
+        "adds	r9, r9, r4\n\t"
+        "adcs	r10, r10, r5\n\t"
+        "adcs	r14, r14, r12\n\t"
+        "str	r8, [sp, #0]\n\t"
+        "str	r9, [sp, #4]\n\t"
+        "str	r10, [sp, #8]\n\t"
+        "str	r14, [sp, #12]\n\t"
+        "ldr	r3, [sp, #16]\n\t"
+        "ldr	r4, [sp, #20]\n\t"
+        "ldr	r5, [sp, #24]\n\t"
+        "ldr	r12, [sp, #28]\n\t"
+        "#  A[2] * A[2]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "umull	r8, r9, r6, r6\n\t"
+        "#  A[3] * A[3]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "umull	r10, r14, r6, r6\n\t"
+        "adcs	r8, r8, r3\n\t"
+        "adcs	r9, r9, r4\n\t"
+        "adcs	r10, r10, r5\n\t"
+        "adcs	r14, r14, r12\n\t"
+        "str	r8, [sp, #16]\n\t"
+        "str	r9, [sp, #20]\n\t"
+        "str	r10, [sp, #24]\n\t"
+        "str	r14, [sp, #28]\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "ldr	r4, [sp, #36]\n\t"
+        "ldr	r5, [sp, #40]\n\t"
+        "ldr	r12, [sp, #44]\n\t"
+        "#  A[4] * A[4]\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "umull	r8, r9, r6, r6\n\t"
+        "#  A[5] * A[5]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "umull	r10, r14, r6, r6\n\t"
+        "adcs	r8, r8, r3\n\t"
+        "adcs	r9, r9, r4\n\t"
+        "adcs	r10, r10, r5\n\t"
+        "adcs	r14, r14, r12\n\t"
+        "str	r8, [sp, #32]\n\t"
+        "str	r9, [sp, #36]\n\t"
+        "str	r10, [sp, #40]\n\t"
+        "str	r14, [sp, #44]\n\t"
+        "ldr	r3, [sp, #48]\n\t"
+        "ldr	r4, [sp, #52]\n\t"
+        "ldr	r5, [sp, #56]\n\t"
+        "ldr	r12, [sp, #60]\n\t"
+        "#  A[6] * A[6]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "umull	r8, r9, r6, r6\n\t"
+        "#  A[7] * A[7]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "umull	r10, r14, r6, r6\n\t"
+        "adcs	r8, r8, r3\n\t"
+        "adcs	r9, r9, r4\n\t"
+        "adcs	r10, r10, r5\n\t"
+        "adc	r14, r14, r12\n\t"
+        "str	r8, [sp, #48]\n\t"
+        "str	r9, [sp, #52]\n\t"
+        "str	r10, [sp, #56]\n\t"
+        "str	r14, [sp, #60]\n\t"
+        "# Start Reduction\n\t"
+        "ldr	r4, [sp, #0]\n\t"
+        "ldr	r5, [sp, #4]\n\t"
+        "ldr	r6, [sp, #8]\n\t"
+        "ldr	r7, [sp, #12]\n\t"
+        "ldr	r8, [sp, #16]\n\t"
+        "ldr	r9, [sp, #20]\n\t"
+        "ldr	r10, [sp, #24]\n\t"
+        "ldr	r14, [sp, #28]\n\t"
+        "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
+        "#    - a[0] << 224\n\t"
+        "#   + (a[0]-a[1] * 2) << (6 * 32)\n\t"
+        "adds	r10, r10, r4\n\t"
+        "adc	r14, r14, r5\n\t"
+        "adds	r10, r10, r4\n\t"
+        "adc	r14, r14, r5\n\t"
+        "#   - a[0] << (7 * 32)\n\t"
+        "sub	r14, r14, r4\n\t"
+        "#   + a[0]-a[4] << (3 * 32)\n\t"
+        "mov	%[a], r7\n\t"
+        "mov	r12, r8\n\t"
+        "adds	r7, r7, r4\n\t"
+        "adcs	r8, r8, r5\n\t"
+        "adcs	r9, r9, r6\n\t"
+        "adcs	r10, r10, %[a]\n\t"
+        "adc	r14, r14, r12\n\t"
+        "str	r4, [sp, #0]\n\t"
+        "str	r5, [sp, #4]\n\t"
+        "str	r6, [sp, #8]\n\t"
+        "str	r7, [sp, #12]\n\t"
+        "str	r8, [sp, #16]\n\t"
+        "str	r9, [sp, #20]\n\t"
+        "# a += mu * m\n\t"
+        "#   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
+        "mov	%[a], #0\n\t"
+        "# a[6] +=        t[0] + t[3]\n\t"
+        "ldr	r3, [sp, #24]\n\t"
+        "adds	r3, r3, r4\n\t"
+        "adc	r12, %[a], #0\n\t"
+        "adds	r3, r3, r7\n\t"
+        "adc	r12, r12, #0\n\t"
+        "str	r10, [sp, #24]\n\t"
+        "# a[7] +=        t[1] + t[4]\n\t"
+        "ldr	r3, [sp, #28]\n\t"
+        "adds	r3, r3, r12\n\t"
+        "adc	r12, %[a], #0\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adc	r12, r12, #0\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adc	r12, r12, #0\n\t"
+        "str	r14, [sp, #28]\n\t"
+        "str	r3, [sp, #64]\n\t"
+        "# a[8] += t[0] + t[2] + t[5]\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "adds	r3, r3, r12\n\t"
+        "adc	r12, %[a], #0\n\t"
+        "adds	r3, r3, r4\n\t"
+        "adc	r12, r12, #0\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adc	r12, r12, #0\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adc	r12, r12, #0\n\t"
+        "str	r3, [sp, #32]\n\t"
+        "# a[9]  += t[1] + t[3] + t[6]\n\t"
+        "# a[10] += t[2] + t[4] + t[7]\n\t"
+        "ldr	r3, [sp, #36]\n\t"
+        "ldr	r4, [sp, #40]\n\t"
+        "adds	r3, r3, r12\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "adc	r12, %[a], #0\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r12, r12, #0\n\t"
+        "adds	r3, r3, r7\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r12, r12, #0\n\t"
+        "adds	r3, r3, r10\n\t"
+        "adcs	r4, r4, r14\n\t"
+        "adc	r12, r12, #0\n\t"
+        "str	r3, [sp, #36]\n\t"
+        "str	r4, [sp, #40]\n\t"
+        "# a[11] += t[3] + t[5]\n\t"
+        "# a[12] += t[4] + t[6]\n\t"
+        "# a[13] += t[5] + t[7]\n\t"
+        "# a[14] += t[6]\n\t"
+        "ldr	r3, [sp, #44]\n\t"
+        "ldr	r4, [sp, #48]\n\t"
+        "ldr	r5, [sp, #52]\n\t"
+        "ldr	r6, [sp, #56]\n\t"
+        "adds	r3, r3, r12\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "adcs	r5, r5, #0\n\t"
+        "adcs	r6, r6, #0\n\t"
+        "adc	r12, %[a], #0\n\t"
+        "adds	r3, r3, r7\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adc	r12, r12, #0\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adcs	r5, r5, r14\n\t"
+        "adcs	r6, r6, #0\n\t"
+        "adc	r12, r12, #0\n\t"
+        "str	r3, [sp, #44]\n\t"
+        "str	r4, [sp, #48]\n\t"
+        "str	r5, [sp, #52]\n\t"
+        "str	r6, [sp, #56]\n\t"
+        "# a[15] += t[7]\n\t"
+        "ldr	r3, [sp, #60]\n\t"
+        "adds	r3, r3, r12\n\t"
+        "adc	r12, %[a], #0\n\t"
+        "adds	r3, r3, r14\n\t"
+        "adc	r12, r12, #0\n\t"
+        "str	r3, [sp, #60]\n\t"
+        "ldr	r3, [sp, #64]\n\t"
+        "ldr	r4, [sp, #32]\n\t"
+        "ldr	r5, [sp, #36]\n\t"
+        "ldr	r6, [sp, #40]\n\t"
+        "ldr	r8, [sp, #0]\n\t"
+        "ldr	r9, [sp, #4]\n\t"
+        "ldr	r10, [sp, #8]\n\t"
+        "ldr	r14, [sp, #12]\n\t"
+        "subs	r3, r3, r8\n\t"
+        "sbcs	r4, r4, r9\n\t"
+        "sbcs	r5, r5, r10\n\t"
+        "sbcs	r6, r6, r14\n\t"
+        "str	r4, [sp, #32]\n\t"
+        "str	r5, [sp, #36]\n\t"
+        "str	r6, [sp, #40]\n\t"
+        "ldr	r3, [sp, #44]\n\t"
+        "ldr	r4, [sp, #48]\n\t"
+        "ldr	r5, [sp, #52]\n\t"
+        "ldr	r6, [sp, #56]\n\t"
+        "ldr	r7, [sp, #60]\n\t"
+        "ldr	r8, [sp, #16]\n\t"
+        "ldr	r9, [sp, #20]\n\t"
+        "ldr	r10, [sp, #24]\n\t"
+        "ldr	r14, [sp, #28]\n\t"
+        "sbcs	r3, r3, r8\n\t"
+        "sbcs	r4, r4, r9\n\t"
+        "sbcs	r5, r5, r10\n\t"
+        "sbcs	r6, r6, r14\n\t"
+        "sbc	r7, r7, #0\n\t"
+        "str	r3, [sp, #44]\n\t"
+        "str	r4, [sp, #48]\n\t"
+        "str	r5, [sp, #52]\n\t"
+        "str	r6, [sp, #56]\n\t"
+        "str	r7, [sp, #60]\n\t"
+        "# mask m and sub from result if overflow\n\t"
+        "sub	r12, %[a], r12\n\t"
+        "and	%[a], r12, #1\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "ldr	r4, [sp, #36]\n\t"
+        "ldr	r5, [sp, #40]\n\t"
+        "ldr	r6, [sp, #44]\n\t"
+        "ldr	r7, [sp, #48]\n\t"
+        "ldr	r8, [sp, #52]\n\t"
+        "ldr	r9, [sp, #56]\n\t"
+        "ldr	r10, [sp, #60]\n\t"
+        "subs	r3, r3, r12\n\t"
+        "sbcs	r4, r4, r12\n\t"
+        "sbcs	r5, r5, r12\n\t"
+        "sbcs	r6, r6, #0\n\t"
+        "sbcs	r7, r7, #0\n\t"
+        "sbcs	r8, r8, #0\n\t"
+        "sbcs	r9, r9, %[a]\n\t"
+        "sbc	r10, r10, r12\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "str	r7, [%[r], #16]\n\t"
+        "str	r8, [%[r], #20]\n\t"
+        "str	r9, [%[r], #24]\n\t"
+        "str	r10, [%[r], #28]\n\t"
+        "add	sp, sp, #68\n\t"
+        : [a] "+r" (a)
+        : [r] "r" (r)
+        : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12"
+    );
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mont_sqr_8(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_256_mont_sqr_8(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+    0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+    0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 8);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
+        if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 8);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 8;
+    sp_digit* t3 = td + 4 * 8;
+    /* 0x2 */
+    sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
+    /* 0x3 */
+    sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
+    /* 0xc */
+    sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
+    /* 0xd */
+    sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
+    /* 0xf */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xf0 */
+    sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
+    /* 0xfd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xff00 */
+    sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
+    /* 0xfffd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffff0000 */
+    sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
+    /* 0xfffffffd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000000 */
+    sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffffffffffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001 */
+    sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
+    /* 0xffffffff000000010000000000000000000000000000000000000000 */
+    sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+    sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+    sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+    sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
 /* Compare a with b in constant time.
  *
  * a  A single precision integer.
@@ -17080,11 +75097,12 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int32_t sp_256_cmp_8(sp_digit* a, sp_digit* b)
+static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = -1;
     sp_digit one = 1;
 
+
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	r7, #0\n\t"
@@ -17096,15 +75114,18 @@
         "and	r4, r4, r3\n\t"
         "and	r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
-        "movne	r3, r7\n\t"
-        "sub	r6, r6, #4\n\t"
-        "bcc	1b\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #else
     __asm__ __volatile__ (
@@ -17115,69 +75136,93 @@
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #24]\n\t"
         "ldr		r5, [%[b], #24]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #20]\n\t"
         "ldr		r5, [%[b], #20]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #16]\n\t"
         "ldr		r5, [%[b], #16]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #12]\n\t"
         "ldr		r5, [%[b], #12]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #8]\n\t"
         "ldr		r5, [%[b], #8]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #4]\n\t"
         "ldr		r5, [%[b], #4]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "ldr		r4, [%[a], #0]\n\t"
         "ldr		r5, [%[b], #0]\n\t"
         "and		r4, r4, r3\n\t"
         "and		r5, r5, r3\n\t"
         "subs	r4, r4, r5\n\t"
-        "movhi	%[r], %[one]\n\t"
-        "movlo	%[r], r3\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
         "movne	r3, r7\n\t"
         "eor	%[r], %[r], r3\n\t"
         : [r] "+r" (r)
         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "r2", "r3", "r4", "r5", "r6", "r7"
+        : "r3", "r4", "r5", "r6", "r7"
     );
 #endif
 
@@ -17198,7 +75243,7 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_256_cond_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
     sp_digit c = 0;
@@ -17276,13 +75321,15 @@
     return c;
 }
 
+#define sp_256_mont_reduce_order_8    sp_256_mont_reduce_8
+
 /* Reduce the number back to 256 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
@@ -17382,1287 +75429,13 @@
     sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
 }
 
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_digit tmp[9];
-
-    (void)mp;
-    (void)m;
-
-    __asm__ __volatile__ (
-        "mov	r5, #0\n\t"
-        "#  A[0] * B[0]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r8, r9, r6, r7\n\t"
-        "str	r8, [%[tmp], #0]\n\t"
-        "#  A[0] * B[1]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adc	r10, r4, #0\n\t"
-        "#  A[1] * B[0]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, #0\n\t"
-        "str	r9, [%[tmp], #4]\n\t"
-        "#  A[0] * B[2]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adc	r14, r4, r14\n\t"
-        "#  A[1] * B[1]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, #0\n\t"
-        "#  A[2] * B[0]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "str	r10, [%[tmp], #8]\n\t"
-        "#  A[0] * B[3]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, #0\n\t"
-        "#  A[1] * B[2]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[2] * B[1]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[3] * B[0]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "str	r14, [%[tmp], #12]\n\t"
-        "#  A[0] * B[4]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, #0\n\t"
-        "#  A[1] * B[3]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[2] * B[2]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[3] * B[1]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[4] * B[0]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "str	r8, [%[tmp], #16]\n\t"
-        "#  A[0] * B[5]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, #0\n\t"
-        "#  A[1] * B[4]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[2] * B[3]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[3] * B[2]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[4] * B[1]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[5] * B[0]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "str	r9, [%[tmp], #20]\n\t"
-        "#  A[0] * B[6]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, #0\n\t"
-        "#  A[1] * B[5]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[2] * B[4]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[3] * B[3]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[4] * B[2]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[5] * B[1]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[6] * B[0]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "str	r10, [%[tmp], #24]\n\t"
-        "#  A[0] * B[7]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, #0\n\t"
-        "#  A[1] * B[6]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[2] * B[5]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[3] * B[4]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[4] * B[3]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[5] * B[2]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[6] * B[1]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[7] * B[0]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #0]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "str	r14, [%[tmp], #28]\n\t"
-        "#  A[1] * B[7]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, #0\n\t"
-        "#  A[2] * B[6]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[3] * B[5]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[4] * B[4]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[5] * B[3]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[6] * B[2]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[7] * B[1]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #4]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "str	r8, [%[r], #0]\n\t"
-        "#  A[2] * B[7]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, #0\n\t"
-        "#  A[3] * B[6]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[4] * B[5]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[5] * B[4]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[6] * B[3]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[7] * B[2]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "str	r9, [%[r], #4]\n\t"
-        "#  A[3] * B[7]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, #0\n\t"
-        "#  A[4] * B[6]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[5] * B[5]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[6] * B[4]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[7] * B[3]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "str	r10, [%[r], #8]\n\t"
-        "#  A[4] * B[7]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, #0\n\t"
-        "#  A[5] * B[6]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[6] * B[5]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[7] * B[4]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "str	r14, [%[r], #12]\n\t"
-        "#  A[5] * B[7]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, #0\n\t"
-        "#  A[6] * B[6]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[7] * B[5]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[6] * B[7]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, #0\n\t"
-        "#  A[7] * B[6]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[7] * B[7]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "ldr	r7, [%[b], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adc	r14, r4, r14\n\t"
-        "str	r8, [%[r], #16]\n\t"
-        "str	r9, [%[r], #20]\n\t"
-        "str	r10, [%[r], #24]\n\t"
-        "str	r14, [%[r], #28]\n\t"
-        "# Start Reduction\n\t"
-        "ldr	r4, [%[tmp], #0]\n\t"
-        "ldr	r5, [%[tmp], #4]\n\t"
-        "ldr	r6, [%[tmp], #8]\n\t"
-        "ldr	r7, [%[tmp], #12]\n\t"
-        "ldr	r8, [%[tmp], #16]\n\t"
-        "ldr	r9, [%[tmp], #20]\n\t"
-        "ldr	r10, [%[tmp], #24]\n\t"
-        "ldr	r14, [%[tmp], #28]\n\t"
-        "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
-        "#    - a[0] << 224\n\t"
-        "#   + (a[0]-a[1] * 2) << (6 * 32)\n\t"
-        "adds	r10, r10, r4\n\t"
-        "adc	r14, r14, r5\n\t"
-        "adds	r10, r10, r4\n\t"
-        "adc	r14, r14, r5\n\t"
-        "#   - a[0] << (7 * 32)\n\t"
-        "sub	r14, r14, r4\n\t"
-        "#   + a[0]-a[4] << (3 * 32)\n\t"
-        "mov	%[a], r7\n\t"
-        "mov	%[b], r8\n\t"
-        "adds	r7, r7, r4\n\t"
-        "adcs	r8, r8, r5\n\t"
-        "adcs	r9, r9, r6\n\t"
-        "adcs	r10, r10, %[a]\n\t"
-        "adc	r14, r14, %[b]\n\t"
-        "str	r4, [%[tmp], #0]\n\t"
-        "str	r5, [%[tmp], #4]\n\t"
-        "str	r6, [%[tmp], #8]\n\t"
-        "str	r7, [%[tmp], #12]\n\t"
-        "str	r8, [%[tmp], #16]\n\t"
-        "str	r9, [%[tmp], #20]\n\t"
-        "# a += mu * m\n\t"
-        "#   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
-        "mov	%[a], #0\n\t"
-        "# a[6] +=        t[0] + t[3]\n\t"
-        "ldr	r3, [%[tmp], #24]\n\t"
-        "adds	r3, r3, r4\n\t"
-        "adc	%[b], %[a], #0\n\t"
-        "adds	r3, r3, r7\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "str	r10, [%[tmp], #24]\n\t"
-        "# a[7] +=        t[1] + t[4]\n\t"
-        "ldr	r3, [%[tmp], #28]\n\t"
-        "adds	r3, r3, %[b]\n\t"
-        "adc	%[b], %[a], #0\n\t"
-        "adds	r3, r3, r5\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "adds	r3, r3, r8\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "str	r14, [%[tmp], #28]\n\t"
-        "str	r3, [%[tmp], #32]\n\t"
-        "# a[8] += t[0] + t[2] + t[5]\n\t"
-        "ldr	r3, [%[r], #0]\n\t"
-        "adds	r3, r3, %[b]\n\t"
-        "adc	%[b], %[a], #0\n\t"
-        "adds	r3, r3, r4\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "adds	r3, r3, r9\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "str	r3, [%[r], #0]\n\t"
-        "# a[9]  += t[1] + t[3] + t[6]\n\t"
-        "# a[10] += t[2] + t[4] + t[7]\n\t"
-        "ldr	r3, [%[r], #4]\n\t"
-        "ldr	r4, [%[r], #8]\n\t"
-        "adds	r3, r3, %[b]\n\t"
-        "adcs	r4, r4, #0\n\t"
-        "adc	%[b], %[a], #0\n\t"
-        "adds	r3, r3, r5\n\t"
-        "adcs	r4, r4, r6\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "adds	r3, r3, r7\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "adds	r3, r3, r10\n\t"
-        "adcs	r4, r4, r14\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "str	r3, [%[r], #4]\n\t"
-        "str	r4, [%[r], #8]\n\t"
-        "# a[11] += t[3] + t[5]\n\t"
-        "# a[12] += t[4] + t[6]\n\t"
-        "# a[13] += t[5] + t[7]\n\t"
-        "# a[14] += t[6]\n\t"
-        "ldr	r3, [%[r], #12]\n\t"
-        "ldr	r4, [%[r], #16]\n\t"
-        "ldr	r5, [%[r], #20]\n\t"
-        "ldr	r6, [%[r], #24]\n\t"
-        "adds	r3, r3, %[b]\n\t"
-        "adcs	r4, r4, #0\n\t"
-        "adcs	r5, r5, #0\n\t"
-        "adcs	r6, r6, #0\n\t"
-        "adc	%[b], %[a], #0\n\t"
-        "adds	r3, r3, r7\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "adds	r3, r3, r9\n\t"
-        "adcs	r4, r4, r10\n\t"
-        "adcs	r5, r5, r14\n\t"
-        "adcs	r6, r6, #0\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "str	r3, [%[r], #12]\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "str	r6, [%[r], #24]\n\t"
-        "# a[15] += t[7]\n\t"
-        "ldr	r3, [%[r], #28]\n\t"
-        "adds	r3, r3, %[b]\n\t"
-        "adc	%[b], %[a], #0\n\t"
-        "adds	r3, r3, r14\n\t"
-        "adc	%[b], %[b], #0\n\t"
-        "str	r3, [%[r], #28]\n\t"
-        "ldr	r3, [%[tmp], #32]\n\t"
-        "ldr	r4, [%[r], #0]\n\t"
-        "ldr	r5, [%[r], #4]\n\t"
-        "ldr	r6, [%[r], #8]\n\t"
-        "ldr	r8, [%[tmp], #0]\n\t"
-        "ldr	r9, [%[tmp], #4]\n\t"
-        "ldr	r10, [%[tmp], #8]\n\t"
-        "ldr	r14, [%[tmp], #12]\n\t"
-        "subs	r3, r3, r8\n\t"
-        "sbcs	r4, r4, r9\n\t"
-        "sbcs	r5, r5, r10\n\t"
-        "sbcs	r6, r6, r14\n\t"
-        "str	r4, [%[r], #0]\n\t"
-        "str	r5, [%[r], #4]\n\t"
-        "str	r6, [%[r], #8]\n\t"
-        "ldr	r3, [%[r], #12]\n\t"
-        "ldr	r4, [%[r], #16]\n\t"
-        "ldr	r5, [%[r], #20]\n\t"
-        "ldr	r6, [%[r], #24]\n\t"
-        "ldr	r7, [%[r], #28]\n\t"
-        "ldr	r8, [%[tmp], #16]\n\t"
-        "ldr	r9, [%[tmp], #20]\n\t"
-        "ldr	r10, [%[tmp], #24]\n\t"
-        "ldr	r14, [%[tmp], #28]\n\t"
-        "sbcs	r3, r3, r8\n\t"
-        "sbcs	r4, r4, r9\n\t"
-        "sbcs	r5, r5, r10\n\t"
-        "sbcs	r6, r6, r14\n\t"
-        "sbc	r7, r7, #0\n\t"
-        "str	r3, [%[r], #12]\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "str	r6, [%[r], #24]\n\t"
-        "str	r7, [%[r], #28]\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "sub	%[b], %[a], %[b]\n\t"
-        "and	%[a], %[b], #1\n\t"
-        "ldr	r3, [%[r], #0]\n\t"
-        "ldr	r4, [%[r], #4]\n\t"
-        "ldr	r5, [%[r], #8]\n\t"
-        "ldr	r6, [%[r], #12]\n\t"
-        "ldr	r7, [%[r], #16]\n\t"
-        "ldr	r8, [%[r], #20]\n\t"
-        "ldr	r9, [%[r], #24]\n\t"
-        "ldr	r10, [%[r], #28]\n\t"
-        "subs	r3, r3, %[b]\n\t"
-        "sbcs	r4, r4, %[b]\n\t"
-        "sbcs	r5, r5, %[b]\n\t"
-        "sbcs	r6, r6, #0\n\t"
-        "sbcs	r7, r7, #0\n\t"
-        "sbcs	r8, r8, #0\n\t"
-        "sbcs	r9, r9, %[a]\n\t"
-        "sbc	r10, r10, %[b]\n\t"
-        "str	r3, [%[r], #0]\n\t"
-        "str	r4, [%[r], #4]\n\t"
-        "str	r5, [%[r], #8]\n\t"
-        "str	r6, [%[r], #12]\n\t"
-        "str	r7, [%[r], #16]\n\t"
-        "str	r8, [%[r], #20]\n\t"
-        "str	r9, [%[r], #24]\n\t"
-        "str	r10, [%[r], #28]\n\t"
-        : [a] "+r" (a), [b] "+r" (b)
-        : [r] "r" (r), [tmp] "r" (tmp)
-        : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7"
-    );
-}
-
-/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit tmp[16];
-
-    (void)mp;
-    (void)m;
-
-    __asm__ __volatile__ (
-        "mov	r5, #0\n\t"
-        "#  A[0] * A[1]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #4]\n\t"
-        "umull	r9, r10, r6, r7\n\t"
-        "str	r9, [%[tmp], #4]\n\t"
-        "#  A[0] * A[2]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adc	r14, r4, #0\n\t"
-        "str	r10, [%[tmp], #8]\n\t"
-        "#  A[0] * A[3]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adc	r8, r4, #0\n\t"
-        "#  A[1] * A[2]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[a], #8]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, #0\n\t"
-        "str	r14, [%[tmp], #12]\n\t"
-        "#  A[0] * A[4]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adc	r9, r4, r9\n\t"
-        "#  A[1] * A[3]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[a], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, #0\n\t"
-        "str	r8, [%[tmp], #16]\n\t"
-        "#  A[0] * A[5]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adc	r10, r4, r10\n\t"
-        "#  A[1] * A[4]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[a], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, #0\n\t"
-        "#  A[2] * A[3]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[a], #12]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "str	r9, [%[tmp], #20]\n\t"
-        "#  A[0] * A[6]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, #0\n\t"
-        "#  A[1] * A[5]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[a], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "#  A[2] * A[4]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[a], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "str	r10, [%[tmp], #24]\n\t"
-        "#  A[0] * A[7]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, #0\n\t"
-        "#  A[1] * A[6]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[a], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[2] * A[5]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[a], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "#  A[3] * A[4]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[a], #16]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "str	r14, [%[tmp], #28]\n\t"
-        "#  A[1] * A[7]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, #0\n\t"
-        "#  A[2] * A[6]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[a], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "#  A[3] * A[5]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[a], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, r10\n\t"
-        "str	r8, [%[tmp], #32]\n\t"
-        "#  A[2] * A[7]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, #0\n\t"
-        "#  A[3] * A[6]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[a], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "#  A[4] * A[5]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[a], #20]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adcs	r10, r4, r10\n\t"
-        "adc	r14, r5, r14\n\t"
-        "str	r9, [%[tmp], #36]\n\t"
-        "#  A[3] * A[7]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, #0\n\t"
-        "#  A[4] * A[6]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[a], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r10, r3, r10\n\t"
-        "adcs	r14, r4, r14\n\t"
-        "adc	r8, r5, r8\n\t"
-        "str	r10, [%[tmp], #40]\n\t"
-        "#  A[4] * A[7]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, #0\n\t"
-        "#  A[5] * A[6]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[a], #24]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r14, r3, r14\n\t"
-        "adcs	r8, r4, r8\n\t"
-        "adc	r9, r5, r9\n\t"
-        "str	r14, [%[tmp], #44]\n\t"
-        "#  A[5] * A[7]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r8, r3, r8\n\t"
-        "adcs	r9, r4, r9\n\t"
-        "adc	r10, r5, #0\n\t"
-        "str	r8, [%[tmp], #48]\n\t"
-        "#  A[6] * A[7]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "ldr	r7, [%[a], #28]\n\t"
-        "umull	r3, r4, r6, r7\n\t"
-        "adds	r9, r3, r9\n\t"
-        "adc	r10, r4, r10\n\t"
-        "str	r9, [%[tmp], #52]\n\t"
-        "str	r10, [%[tmp], #56]\n\t"
-        "# Double\n\t"
-        "ldr	r4, [%[tmp], #4]\n\t"
-        "ldr	r6, [%[tmp], #8]\n\t"
-        "ldr	r7, [%[tmp], #12]\n\t"
-        "ldr	r8, [%[tmp], #16]\n\t"
-        "ldr	r9, [%[tmp], #20]\n\t"
-        "ldr	r10, [%[tmp], #24]\n\t"
-        "ldr	r14, [%[tmp], #28]\n\t"
-        "ldr	r12, [%[tmp], #32]\n\t"
-        "ldr	r3, [%[tmp], #36]\n\t"
-        "adds	r4, r4, r4\n\t"
-        "adcs	r6, r6, r6\n\t"
-        "adcs	r7, r7, r7\n\t"
-        "adcs	r8, r8, r8\n\t"
-        "adcs	r9, r9, r9\n\t"
-        "adcs	r10, r10, r10\n\t"
-        "adcs	r14, r14, r14\n\t"
-        "adcs	r12, r12, r12\n\t"
-        "adcs	r3, r3, r3\n\t"
-        "str	r4, [%[tmp], #4]\n\t"
-        "str	r6, [%[tmp], #8]\n\t"
-        "str	r7, [%[tmp], #12]\n\t"
-        "str	r8, [%[tmp], #16]\n\t"
-        "str	r9, [%[tmp], #20]\n\t"
-        "str	r10, [%[tmp], #24]\n\t"
-        "str	r14, [%[tmp], #28]\n\t"
-        "str	r12, [%[tmp], #32]\n\t"
-        "str	r3, [%[tmp], #36]\n\t"
-        "ldr	r4, [%[tmp], #40]\n\t"
-        "ldr	r6, [%[tmp], #44]\n\t"
-        "ldr	r7, [%[tmp], #48]\n\t"
-        "ldr	r8, [%[tmp], #52]\n\t"
-        "ldr	r9, [%[tmp], #56]\n\t"
-        "adcs	r4, r4, r4\n\t"
-        "adcs	r6, r6, r6\n\t"
-        "adcs	r7, r7, r7\n\t"
-        "adcs	r8, r8, r8\n\t"
-        "adcs	r9, r9, r9\n\t"
-        "str	r4, [%[tmp], #40]\n\t"
-        "str	r6, [%[tmp], #44]\n\t"
-        "str	r7, [%[tmp], #48]\n\t"
-        "str	r8, [%[tmp], #52]\n\t"
-        "str	r9, [%[tmp], #56]\n\t"
-        "adc	r10, r5, #0\n\t"
-        "str	r10, [%[tmp], #60]\n\t"
-        "ldr	r4, [%[tmp], #4]\n\t"
-        "ldr	r5, [%[tmp], #8]\n\t"
-        "ldr	r12, [%[tmp], #12]\n\t"
-        "#  A[0] * A[0]\n\t"
-        "ldr	r6, [%[a], #0]\n\t"
-        "umull	r8, r9, r6, r6\n\t"
-        "#  A[1] * A[1]\n\t"
-        "ldr	r6, [%[a], #4]\n\t"
-        "umull	r10, r14, r6, r6\n\t"
-        "adds	r9, r9, r4\n\t"
-        "adcs	r10, r10, r5\n\t"
-        "adcs	r14, r14, r12\n\t"
-        "str	r8, [%[tmp], #0]\n\t"
-        "str	r9, [%[tmp], #4]\n\t"
-        "str	r10, [%[tmp], #8]\n\t"
-        "str	r14, [%[tmp], #12]\n\t"
-        "ldr	r3, [%[tmp], #16]\n\t"
-        "ldr	r4, [%[tmp], #20]\n\t"
-        "ldr	r5, [%[tmp], #24]\n\t"
-        "ldr	r12, [%[tmp], #28]\n\t"
-        "#  A[2] * A[2]\n\t"
-        "ldr	r6, [%[a], #8]\n\t"
-        "umull	r8, r9, r6, r6\n\t"
-        "#  A[3] * A[3]\n\t"
-        "ldr	r6, [%[a], #12]\n\t"
-        "umull	r10, r14, r6, r6\n\t"
-        "adcs	r8, r8, r3\n\t"
-        "adcs	r9, r9, r4\n\t"
-        "adcs	r10, r10, r5\n\t"
-        "adcs	r14, r14, r12\n\t"
-        "str	r8, [%[tmp], #16]\n\t"
-        "str	r9, [%[tmp], #20]\n\t"
-        "str	r10, [%[tmp], #24]\n\t"
-        "str	r14, [%[tmp], #28]\n\t"
-        "ldr	r3, [%[tmp], #32]\n\t"
-        "ldr	r4, [%[tmp], #36]\n\t"
-        "ldr	r5, [%[tmp], #40]\n\t"
-        "ldr	r12, [%[tmp], #44]\n\t"
-        "#  A[4] * A[4]\n\t"
-        "ldr	r6, [%[a], #16]\n\t"
-        "umull	r8, r9, r6, r6\n\t"
-        "#  A[5] * A[5]\n\t"
-        "ldr	r6, [%[a], #20]\n\t"
-        "umull	r10, r14, r6, r6\n\t"
-        "adcs	r8, r8, r3\n\t"
-        "adcs	r9, r9, r4\n\t"
-        "adcs	r10, r10, r5\n\t"
-        "adcs	r14, r14, r12\n\t"
-        "str	r8, [%[r], #0]\n\t"
-        "str	r9, [%[r], #4]\n\t"
-        "str	r10, [%[r], #8]\n\t"
-        "str	r14, [%[r], #12]\n\t"
-        "ldr	r3, [%[tmp], #48]\n\t"
-        "ldr	r4, [%[tmp], #52]\n\t"
-        "ldr	r5, [%[tmp], #56]\n\t"
-        "ldr	r12, [%[tmp], #60]\n\t"
-        "#  A[6] * A[6]\n\t"
-        "ldr	r6, [%[a], #24]\n\t"
-        "umull	r8, r9, r6, r6\n\t"
-        "#  A[7] * A[7]\n\t"
-        "ldr	r6, [%[a], #28]\n\t"
-        "umull	r10, r14, r6, r6\n\t"
-        "adcs	r8, r8, r3\n\t"
-        "adcs	r9, r9, r4\n\t"
-        "adcs	r10, r10, r5\n\t"
-        "adc	r14, r14, r12\n\t"
-        "str	r8, [%[r], #16]\n\t"
-        "str	r9, [%[r], #20]\n\t"
-        "str	r10, [%[r], #24]\n\t"
-        "str	r14, [%[r], #28]\n\t"
-        "# Start Reduction\n\t"
-        "ldr	r4, [%[tmp], #0]\n\t"
-        "ldr	r5, [%[tmp], #4]\n\t"
-        "ldr	r6, [%[tmp], #8]\n\t"
-        "ldr	r7, [%[tmp], #12]\n\t"
-        "ldr	r8, [%[tmp], #16]\n\t"
-        "ldr	r9, [%[tmp], #20]\n\t"
-        "ldr	r10, [%[tmp], #24]\n\t"
-        "ldr	r14, [%[tmp], #28]\n\t"
-        "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
-        "#    - a[0] << 224\n\t"
-        "#   + (a[0]-a[1] * 2) << (6 * 32)\n\t"
-        "adds	r10, r10, r4\n\t"
-        "adc	r14, r14, r5\n\t"
-        "adds	r10, r10, r4\n\t"
-        "adc	r14, r14, r5\n\t"
-        "#   - a[0] << (7 * 32)\n\t"
-        "sub	r14, r14, r4\n\t"
-        "#   + a[0]-a[4] << (3 * 32)\n\t"
-        "mov	%[a], r7\n\t"
-        "mov	r12, r8\n\t"
-        "adds	r7, r7, r4\n\t"
-        "adcs	r8, r8, r5\n\t"
-        "adcs	r9, r9, r6\n\t"
-        "adcs	r10, r10, %[a]\n\t"
-        "adc	r14, r14, r12\n\t"
-        "str	r4, [%[tmp], #0]\n\t"
-        "str	r5, [%[tmp], #4]\n\t"
-        "str	r6, [%[tmp], #8]\n\t"
-        "str	r7, [%[tmp], #12]\n\t"
-        "str	r8, [%[tmp], #16]\n\t"
-        "str	r9, [%[tmp], #20]\n\t"
-        "# a += mu * m\n\t"
-        "#   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
-        "mov	%[a], #0\n\t"
-        "# a[6] +=        t[0] + t[3]\n\t"
-        "ldr	r3, [%[tmp], #24]\n\t"
-        "adds	r3, r3, r4\n\t"
-        "adc	r12, %[a], #0\n\t"
-        "adds	r3, r3, r7\n\t"
-        "adc	r12, r12, #0\n\t"
-        "str	r10, [%[tmp], #24]\n\t"
-        "# a[7] +=        t[1] + t[4]\n\t"
-        "ldr	r3, [%[tmp], #28]\n\t"
-        "adds	r3, r3, r12\n\t"
-        "adc	r12, %[a], #0\n\t"
-        "adds	r3, r3, r5\n\t"
-        "adc	r12, r12, #0\n\t"
-        "adds	r3, r3, r8\n\t"
-        "adc	r12, r12, #0\n\t"
-        "str	r14, [%[tmp], #28]\n\t"
-        "str	r3, [%[tmp], #32]\n\t"
-        "# a[8] += t[0] + t[2] + t[5]\n\t"
-        "ldr	r3, [%[r], #0]\n\t"
-        "adds	r3, r3, r12\n\t"
-        "adc	r12, %[a], #0\n\t"
-        "adds	r3, r3, r4\n\t"
-        "adc	r12, r12, #0\n\t"
-        "adds	r3, r3, r6\n\t"
-        "adc	r12, r12, #0\n\t"
-        "adds	r3, r3, r9\n\t"
-        "adc	r12, r12, #0\n\t"
-        "str	r3, [%[r], #0]\n\t"
-        "# a[9]  += t[1] + t[3] + t[6]\n\t"
-        "# a[10] += t[2] + t[4] + t[7]\n\t"
-        "ldr	r3, [%[r], #4]\n\t"
-        "ldr	r4, [%[r], #8]\n\t"
-        "adds	r3, r3, r12\n\t"
-        "adcs	r4, r4, #0\n\t"
-        "adc	r12, %[a], #0\n\t"
-        "adds	r3, r3, r5\n\t"
-        "adcs	r4, r4, r6\n\t"
-        "adc	r12, r12, #0\n\t"
-        "adds	r3, r3, r7\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adc	r12, r12, #0\n\t"
-        "adds	r3, r3, r10\n\t"
-        "adcs	r4, r4, r14\n\t"
-        "adc	r12, r12, #0\n\t"
-        "str	r3, [%[r], #4]\n\t"
-        "str	r4, [%[r], #8]\n\t"
-        "# a[11] += t[3] + t[5]\n\t"
-        "# a[12] += t[4] + t[6]\n\t"
-        "# a[13] += t[5] + t[7]\n\t"
-        "# a[14] += t[6]\n\t"
-        "ldr	r3, [%[r], #12]\n\t"
-        "ldr	r4, [%[r], #16]\n\t"
-        "ldr	r5, [%[r], #20]\n\t"
-        "ldr	r6, [%[r], #24]\n\t"
-        "adds	r3, r3, r12\n\t"
-        "adcs	r4, r4, #0\n\t"
-        "adcs	r5, r5, #0\n\t"
-        "adcs	r6, r6, #0\n\t"
-        "adc	r12, %[a], #0\n\t"
-        "adds	r3, r3, r7\n\t"
-        "adcs	r4, r4, r8\n\t"
-        "adcs	r5, r5, r9\n\t"
-        "adcs	r6, r6, r10\n\t"
-        "adc	r12, r12, #0\n\t"
-        "adds	r3, r3, r9\n\t"
-        "adcs	r4, r4, r10\n\t"
-        "adcs	r5, r5, r14\n\t"
-        "adcs	r6, r6, #0\n\t"
-        "adc	r12, r12, #0\n\t"
-        "str	r3, [%[r], #12]\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "str	r6, [%[r], #24]\n\t"
-        "# a[15] += t[7]\n\t"
-        "ldr	r3, [%[r], #28]\n\t"
-        "adds	r3, r3, r12\n\t"
-        "adc	r12, %[a], #0\n\t"
-        "adds	r3, r3, r14\n\t"
-        "adc	r12, r12, #0\n\t"
-        "str	r3, [%[r], #28]\n\t"
-        "ldr	r3, [%[tmp], #32]\n\t"
-        "ldr	r4, [%[r], #0]\n\t"
-        "ldr	r5, [%[r], #4]\n\t"
-        "ldr	r6, [%[r], #8]\n\t"
-        "ldr	r8, [%[tmp], #0]\n\t"
-        "ldr	r9, [%[tmp], #4]\n\t"
-        "ldr	r10, [%[tmp], #8]\n\t"
-        "ldr	r14, [%[tmp], #12]\n\t"
-        "subs	r3, r3, r8\n\t"
-        "sbcs	r4, r4, r9\n\t"
-        "sbcs	r5, r5, r10\n\t"
-        "sbcs	r6, r6, r14\n\t"
-        "str	r4, [%[r], #0]\n\t"
-        "str	r5, [%[r], #4]\n\t"
-        "str	r6, [%[r], #8]\n\t"
-        "ldr	r3, [%[r], #12]\n\t"
-        "ldr	r4, [%[r], #16]\n\t"
-        "ldr	r5, [%[r], #20]\n\t"
-        "ldr	r6, [%[r], #24]\n\t"
-        "ldr	r7, [%[r], #28]\n\t"
-        "ldr	r8, [%[tmp], #16]\n\t"
-        "ldr	r9, [%[tmp], #20]\n\t"
-        "ldr	r10, [%[tmp], #24]\n\t"
-        "ldr	r14, [%[tmp], #28]\n\t"
-        "sbcs	r3, r3, r8\n\t"
-        "sbcs	r4, r4, r9\n\t"
-        "sbcs	r5, r5, r10\n\t"
-        "sbcs	r6, r6, r14\n\t"
-        "sbc	r7, r7, #0\n\t"
-        "str	r3, [%[r], #12]\n\t"
-        "str	r4, [%[r], #16]\n\t"
-        "str	r5, [%[r], #20]\n\t"
-        "str	r6, [%[r], #24]\n\t"
-        "str	r7, [%[r], #28]\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "sub	r12, %[a], r12\n\t"
-        "and	%[a], r12, #1\n\t"
-        "ldr	r3, [%[r], #0]\n\t"
-        "ldr	r4, [%[r], #4]\n\t"
-        "ldr	r5, [%[r], #8]\n\t"
-        "ldr	r6, [%[r], #12]\n\t"
-        "ldr	r7, [%[r], #16]\n\t"
-        "ldr	r8, [%[r], #20]\n\t"
-        "ldr	r9, [%[r], #24]\n\t"
-        "ldr	r10, [%[r], #28]\n\t"
-        "subs	r3, r3, r12\n\t"
-        "sbcs	r4, r4, r12\n\t"
-        "sbcs	r5, r5, r12\n\t"
-        "sbcs	r6, r6, #0\n\t"
-        "sbcs	r7, r7, #0\n\t"
-        "sbcs	r8, r8, #0\n\t"
-        "sbcs	r9, r9, %[a]\n\t"
-        "sbc	r10, r10, r12\n\t"
-        "str	r3, [%[r], #0]\n\t"
-        "str	r4, [%[r], #4]\n\t"
-        "str	r5, [%[r], #8]\n\t"
-        "str	r6, [%[r], #12]\n\t"
-        "str	r7, [%[r], #16]\n\t"
-        "str	r8, [%[r], #20]\n\t"
-        "str	r9, [%[r], #24]\n\t"
-        "str	r10, [%[r], #28]\n\t"
-        : [a] "+r" (a)
-        : [r] "r" (r), [tmp] "r" (tmp)
-        : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12"
-    );
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * n   Number of times to square.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_256_mont_sqr_n_8(sp_digit* r, sp_digit* a, int n,
-        sp_digit* m, sp_digit mp)
-{
-    sp_256_mont_sqr_8(r, a, m, mp);
-    for (; n > 1; n--)
-        sp_256_mont_sqr_8(r, r, m, mp);
-}
-
-#else
-/* Mod-2 for the P256 curve. */
-static const uint32_t p256_mod_2[8] = {
-    0xfffffffd,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
-    0x00000001,0xffffffff
-};
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
- * P256 curve. (r = 1 / a mod m)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a, sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 8);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
-        if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 8);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 8;
-    sp_digit* t3 = td + 4 * 8;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_8(t, a, p256_mod, p256_mp_mod);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_8(t2, t, 2, p256_mod, p256_mp_mod);
-    /* t3= a^d = t2 * a */
-    sp_256_mont_mul_8(t3, t2, a, p256_mod, p256_mp_mod);
-    /* t = a^f = t2 * t */
-    sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^f0 = t ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_8(t2, t, 4, p256_mod, p256_mp_mod);
-    /* t3= a^fd = t2 * t3 */
-    sp_256_mont_mul_8(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ff = t2 * t */
-    sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_8(t2, t, 8, p256_mod, p256_mp_mod);
-    /* t3= a^fffd = t2 * t3 */
-    sp_256_mont_mul_8(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_8(t2, t, 16, p256_mod, p256_mp_mod);
-    /* t3= a^fffffffd = t2 * t3 */
-    sp_256_mont_mul_8(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_8(t2, t, 32, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001 = t2 * a */
-    sp_256_mont_mul_8(t2, t2, a, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
-     *   = t2 ^ 2 ^ 160 */
-    sp_256_mont_sqr_n_8(t2, t2, 160, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
-     *   = t2 * t */
-    sp_256_mont_mul_8(t2, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
-     *   = t2 ^ 2 ^ 32 */
-    sp_256_mont_sqr_n_8(t2, t2, 32, p256_mod, p256_mp_mod);
-    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
-     *   = t2 * t3 */
-    sp_256_mont_mul_8(r, t2, t3, p256_mod, p256_mp_mod);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Map the Montgomery form projective co-ordinate point to an affine point.
- *
- * r  Resulting affine co-ordinate point.
- * p  Montgomery form projective co-ordinate point.
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
  * t  Temporary ordinate data.
  */
-static void sp_256_map_8(sp_point* r, sp_point* p, sp_digit* t)
+static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*8;
@@ -18675,20 +75448,22 @@
 
     /* x /= z^2 */
     sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
-    XMEMSET(r->x + 8, 0, sizeof(r->x) / 2);
+    XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
     sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
     /* Reduce x to less than modulus */
     n = sp_256_cmp_8(r->x, p256_mod);
-    sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_8(r->x);
 
     /* y /= z^3 */
     sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
-    XMEMSET(r->y + 8, 0, sizeof(r->y) / 2);
+    XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
     sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
     /* Reduce y to less than modulus */
     n = sp_256_cmp_8(r->y, p256_mod);
-    sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_8(r->y);
 
     XMEMSET(r->z, 0, sizeof(r->z));
@@ -18803,8 +75578,8 @@
  * b   Second number to add in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_add_8(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
+static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
 {
     (void)m;
 
@@ -18873,7 +75648,7 @@
  * a   Number to double in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m)
+static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     (void)m;
 
@@ -18926,7 +75701,7 @@
  * a   Number to triple in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_tpl_8(sp_digit* r, sp_digit* a, sp_digit* m)
+static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     (void)m;
 
@@ -19024,8 +75799,8 @@
  * b   Number to subtract with in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
+static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
 {
     (void)m;
 
@@ -19093,7 +75868,7 @@
  * a  Number to divide.
  * m  Modulus (prime).
  */
-static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m)
+static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     __asm__ __volatile__ (
         "mov	r10, #0\n\t"
@@ -19164,50 +75939,38 @@
  * p  Point to double.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*8;
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    /* When infinity don't double point passed in - constant time. */
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    /* Put point to double into result - good for infinty. */
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
     if (r != p) {
-        for (i=0; i<8; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<8; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<8; i++)
-            r->z[i] = p->z[i];
         r->infinity = p->infinity;
     }
 
     /* T1 = Z * Z */
-    sp_256_mont_sqr_8(t1, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
     /* Z = Y * Z */
-    sp_256_mont_mul_8(z, y, z, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
     /* Z = 2Z */
     sp_256_mont_dbl_8(z, z, p256_mod);
     /* T2 = X - T1 */
-    sp_256_mont_sub_8(t2, x, t1, p256_mod);
+    sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
     /* T1 = X + T1 */
-    sp_256_mont_add_8(t1, x, t1, p256_mod);
+    sp_256_mont_add_8(t1, p->x, t1, p256_mod);
     /* T2 = T1 * T2 */
     sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
     /* T1 = 3T2 */
     sp_256_mont_tpl_8(t1, t2, p256_mod);
     /* Y = 2Y */
-    sp_256_mont_dbl_8(y, y, p256_mod);
+    sp_256_mont_dbl_8(y, p->y, p256_mod);
     /* Y = Y * Y */
     sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
     /* T2 = Y * Y */
@@ -19215,9 +75978,9 @@
     /* T2 = T2/2 */
     sp_256_div2_8(t2, t2, p256_mod);
     /* Y = Y * X */
-    sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
     /* X = T1 * T1 */
-    sp_256_mont_mul_8(x, t1, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
     /* X = X - Y */
     sp_256_mont_sub_8(x, x, y, p256_mod);
     /* X = X - Y */
@@ -19228,7 +75991,6 @@
     sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
     /* Y = Y - T2 */
     sp_256_mont_sub_8(y, y, t2, p256_mod);
-
 }
 
 #ifdef WOLFSSL_SP_SMALL
@@ -19345,16 +76107,15 @@
 /* Add two Montgomery form projective points.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q,
+static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
         sp_digit* t)
 {
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*8;
     sp_digit* t3 = t + 4*8;
@@ -19367,34 +76128,39 @@
 
     /* Ensure only the first point is the same as the result. */
     if (q == r) {
-        sp_point* a = p;
+        const sp_point_256* a = p;
         p = q;
         q = a;
     }
 
     /* Check double */
-    sp_256_sub_8(t1, p256_mod, q->y);
+    (void)sp_256_sub_8(t1, p256_mod, q->y);
     sp_256_norm_8(t1);
-    if (sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
-        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) {
+    if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_8(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<8; i++)
+        for (i=0; i<8; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<8; i++)
+        }
+        for (i=0; i<8; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<8; i++)
+        }
+        for (i=0; i<8; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U1 = X1*Z2^2 */
@@ -19433,7 +76199,7 @@
 }
 
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -19442,16 +76208,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_fast_8(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[16];
-    sp_point rtd;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td[16];
+    sp_point_256 rtd;
     sp_digit tmpd[2 * 8 * 5];
 #endif
-    sp_point* t;
-    sp_point* rt;
+    sp_point_256* t;
+    sp_point_256* rt;
     sp_digit* tmp;
     sp_digit n;
     int i;
@@ -19460,9 +76226,9 @@
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
+    err = sp_256_point_new_8(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
@@ -19479,9 +76245,9 @@
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
-        sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
-        sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
-        sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
         t[1].infinity = 0;
         sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
         t[ 2].infinity = 0;
@@ -19516,11 +76282,11 @@
         n = k[i+1] << 0;
         c = 28;
         y = n >> 28;
-        XMEMCPY(rt, &t[y], sizeof(sp_point));
+        XMEMCPY(rt, &t[y], sizeof(sp_point_256));
         n <<= 4;
         for (; i>=0 || c>=4; ) {
             if (c < 4) {
-                n |= k[i--] << (0 - c);
+                n |= k[i--];
                 c += 32;
             }
             y = (n >> 28) & 0xf;
@@ -19535,36 +76301,37 @@
             sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_8(r, rt, tmp);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
     }
     if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 16);
+        XMEMSET(t, 0, sizeof(sp_point_256) * 16);
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
     ForceZero(tmpd, sizeof(tmpd));
     ForceZero(td, sizeof(td));
 #endif
-    sp_ecc_point_free(rt, 1, heap);
+    sp_256_point_free_8(rt, 1, heap);
 
     return err;
 }
 
 /* A table entry for pre-computed points. */
-typedef struct sp_table_entry {
+typedef struct sp_table_entry_256 {
     sp_digit x[8];
     sp_digit y[8];
-    byte infinity;
-} sp_table_entry;
+} sp_table_entry_256;
 
 #ifdef FP_ECC
 /* Double the Montgomery form projective point p a number of times.
@@ -19574,11 +76341,8 @@
  * n  Number of times to double
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_n_8(sp_point* r, sp_point* p, int n,
-        sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
+{
     sp_digit* w = t;
     sp_digit* a = t + 2*8;
     sp_digit* b = t + 4*8;
@@ -19587,54 +76351,73 @@
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    if (r != p) {
-        for (i=0; i<8; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<8; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<8; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
 
     /* Y = 2*Y */
     sp_256_mont_dbl_8(y, y, p256_mod);
     /* W = Z^4 */
     sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
     sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
-    while (n--) {
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
         /* A = 3*(X^2 - W) */
         sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
         sp_256_mont_sub_8(t1, t1, w, p256_mod);
         sp_256_mont_tpl_8(a, t1, p256_mod);
         /* B = X*Y^2 */
-        sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_8(b, t2, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
         /* X = A^2 - 2B */
         sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_8(t1, b, p256_mod);
-        sp_256_mont_sub_8(x, x, t1, p256_mod);
+        sp_256_mont_dbl_8(t2, b, p256_mod);
+        sp_256_mont_sub_8(x, x, t2, p256_mod);
         /* Z = Z*Y */
         sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
         /* t2 = Y^4 */
-        sp_256_mont_sqr_8(t2, t2, p256_mod, p256_mp_mod);
-        if (n) {
+        sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
             /* W = W*Y^4 */
-            sp_256_mont_mul_8(w, w, t2, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
         }
         /* y = 2*A*(B - X) - Y^4 */
         sp_256_mont_sub_8(y, b, x, p256_mod);
         sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
         sp_256_mont_dbl_8(y, y, p256_mod);
-        sp_256_mont_sub_8(y, y, t2, p256_mod);
-    }
+        sp_256_mont_sub_8(y, y, t1, p256_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_8(t1, t1, w, p256_mod);
+    sp_256_mont_tpl_8(a, t1, p256_mod);
+    /* B = X*Y^2 */
+    sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+    /* X = A^2 - 2B */
+    sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_8(t2, b, p256_mod);
+    sp_256_mont_sub_8(x, x, t2, p256_mod);
+    /* Z = Z*Y */
+    sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+    /* t2 = Y^4 */
+    sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_256_mont_sub_8(y, b, x, p256_mod);
+    sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_8(y, y, p256_mod);
+    sp_256_mont_sub_8(y, y, t1, p256_mod);
+#endif
     /* Y = Y/2 */
     sp_256_div2_8(y, y, p256_mod);
 }
@@ -19645,16 +76428,15 @@
  * Only the first point can be the same pointer as the result point.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_qz1_8(sp_point* r, sp_point* p,
-        sp_point* q, sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
+        const sp_point_256* q, sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*8;
     sp_digit* t3 = t + 4*8;
@@ -19666,28 +76448,33 @@
     int i;
 
     /* Check double */
-    sp_256_sub_8(t1, p256_mod, q->y);
+    (void)sp_256_sub_8(t1, p256_mod, q->y);
     sp_256_norm_8(t1);
-    if (sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
-        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) {
+    if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_8(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<8; i++)
+        for (i=0; i<8; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<8; i++)
+        }
+        for (i=0; i<8; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<8; i++)
+        }
+        for (i=0; i<8; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U2 = X2*Z1^2 */
@@ -19724,9 +76511,9 @@
  * Ordinates are in Montgomery form.
  *
  * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_8(sp_point* a, sp_digit* t)
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2 * 8;
@@ -19746,35 +76533,40 @@
  *
  * a      The base point.
  * table  Place to store generated point data.
- * tmp    Temprorary data.
+ * tmp    Temporary data.
  * heap  Heap to use for allocation.
  */
-static int sp_256_gen_stripe_table_8(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
     int i, j;
     int err;
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
+    err = sp_256_point_new_8(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+    }
     if (err == MP_OKAY) {
         t->infinity = 0;
         sp_256_proj_to_affine_8(t, tmp);
@@ -19785,19 +76577,16 @@
         s2->infinity = 0;
 
         /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
         /* table[1] = Affine version of 'a' in Montgomery form */
         XMEMCPY(table[1].x, t->x, sizeof(table->x));
         XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
 
         for (i=1; i<4; i++) {
-            sp_256_proj_point_dbl_n_8(t, t, 64, tmp);
+            sp_256_proj_point_dbl_n_8(t, 64, tmp);
             sp_256_proj_to_affine_8(t, tmp);
             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
         }
 
         for (i=1; i<4; i++) {
@@ -19810,21 +76599,20 @@
                 sp_256_proj_to_affine_8(t, tmp);
                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
+            }
+        }
+    }
+
+    sp_256_point_free_8(s2, 0, heap);
+    sp_256_point_free_8(s1, 0, heap);
+    sp_256_point_free_8( t, 0, heap);
 
     return err;
 }
 
 #endif /* FP_ECC */
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -19832,16 +76620,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
     sp_digit td[2 * 8 * 5];
 #endif
-    sp_point* rt;
-    sp_point* p = NULL;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* t;
     int i, j;
     int y, x;
@@ -19850,14 +76638,17 @@
     (void)g;
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
                            DYNAMIC_TYPE_ECC);
-    if (t == NULL)
+    if (t == NULL) {
         err = MEMORY_E;
+    }
 #else
     t = td;
 #endif
@@ -19867,35 +76658,40 @@
         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
 
         y = 0;
-        for (j=0,x=63; j<4; j++,x+=32)
+        for (j=0,x=63; j<4; j++,x+=64) {
             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
+        rt->infinity = !y;
         for (i=62; i>=0; i--) {
             y = 0;
-            for (j=0,x=i; j<4; j++,x+=64)
+            for (j=0,x=i; j<4; j++,x+=64) {
                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
 
             sp_256_proj_point_dbl_8(rt, rt, t);
             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
+            p->infinity = !y;
             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_8(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(rt, 0, heap);
 
     return err;
 }
@@ -19905,43 +76701,43 @@
     #define FP_ENTRIES 16
 #endif
 
-typedef struct sp_cache_t {
+typedef struct sp_cache_256_t {
     sp_digit x[8];
     sp_digit y[8];
-    sp_table_entry table[16];
+    sp_table_entry_256 table[16];
     uint32_t cnt;
     int set;
-} sp_cache_t;
-
-static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
-static THREAD_LS_T int sp_cache_last = -1;
-static THREAD_LS_T int sp_cache_inited = 0;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
 
 #ifndef HAVE_THREAD_LS
-    static volatile int initCacheMutex = 0;
-    static wolfSSL_Mutex sp_cache_lock;
-#endif
-
-static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
 {
     int i, j;
     uint32_t least;
 
-    if (sp_cache_inited == 0) {
+    if (sp_cache_256_inited == 0) {
         for (i=0; i<FP_ENTRIES; i++) {
-            sp_cache[i].set = 0;
-        }
-        sp_cache_inited = 1;
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
     }
 
     /* Compare point with those in cache. */
     for (i=0; i<FP_ENTRIES; i++) {
-        if (!sp_cache[i].set)
+        if (!sp_cache_256[i].set)
             continue;
 
-        if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & 
-                           sp_256_cmp_equal_8(g->y, sp_cache[i].y)) {
-            sp_cache[i].cnt++;
+        if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
             break;
         }
     }
@@ -19949,37 +76745,37 @@
     /* No match. */
     if (i == FP_ENTRIES) {
         /* Find empty entry. */
-        i = (sp_cache_last + 1) % FP_ENTRIES;
-        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
-            if (!sp_cache[i].set) {
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
                 break;
             }
         }
 
         /* Evict least used. */
-        if (i == sp_cache_last) {
-            least = sp_cache[0].cnt;
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
             for (j=1; j<FP_ENTRIES; j++) {
-                if (sp_cache[j].cnt < least) {
+                if (sp_cache_256[j].cnt < least) {
                     i = j;
-                    least = sp_cache[i].cnt;
+                    least = sp_cache_256[i].cnt;
                 }
             }
         }
 
-        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
-        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
-        sp_cache[i].set = 1;
-        sp_cache[i].cnt = 1;
-    }
-
-    *cache = &sp_cache[i];
-    sp_cache_last = i;
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
 }
 #endif /* FP_ECC */
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -19988,32 +76784,32 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_8(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
 #ifndef FP_ECC
     return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
 #else
     sp_digit tmp[2 * 8 * 5];
-    sp_cache_t* cache;
+    sp_cache_256_t* cache;
     int err = MP_OKAY;
 
 #ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
        err = BAD_MUTEX_E;
 #endif /* HAVE_THREAD_LS */
 
     if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
+        sp_ecc_get_cache_256(g, &cache);
         if (cache->cnt == 2)
             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
 
 #ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
+        wc_UnLockMutex(&sp_cache_256_lock);
 #endif /* HAVE_THREAD_LS */
 
         if (cache->cnt < 2) {
@@ -20035,35 +76831,40 @@
  *
  * a      The base point.
  * table  Place to store generated point data.
- * tmp    Temprorary data.
+ * tmp    Temporary data.
  * heap  Heap to use for allocation.
  */
-static int sp_256_gen_stripe_table_8(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
     int i, j;
     int err;
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
+    err = sp_256_point_new_8(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+    }
     if (err == MP_OKAY) {
         t->infinity = 0;
         sp_256_proj_to_affine_8(t, tmp);
@@ -20074,19 +76875,16 @@
         s2->infinity = 0;
 
         /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
         /* table[1] = Affine version of 'a' in Montgomery form */
         XMEMCPY(table[1].x, t->x, sizeof(table->x));
         XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
 
         for (i=1; i<8; i++) {
-            sp_256_proj_point_dbl_n_8(t, t, 32, tmp);
+            sp_256_proj_point_dbl_n_8(t, 32, tmp);
             sp_256_proj_to_affine_8(t, tmp);
             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
         }
 
         for (i=1; i<8; i++) {
@@ -20099,21 +76897,20 @@
                 sp_256_proj_to_affine_8(t, tmp);
                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
+            }
+        }
+    }
+
+    sp_256_point_free_8(s2, 0, heap);
+    sp_256_point_free_8(s1, 0, heap);
+    sp_256_point_free_8( t, 0, heap);
 
     return err;
 }
 
 #endif /* FP_ECC */
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -20121,16 +76918,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
     sp_digit td[2 * 8 * 5];
 #endif
-    sp_point* rt;
-    sp_point* p = NULL;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* t;
     int i, j;
     int y, x;
@@ -20139,14 +76936,17 @@
     (void)g;
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
                            DYNAMIC_TYPE_ECC);
-    if (t == NULL)
+    if (t == NULL) {
         err = MEMORY_E;
+    }
 #else
     t = td;
 #endif
@@ -20156,35 +76956,40 @@
         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
 
         y = 0;
-        for (j=0,x=31; j<8; j++,x+=32)
+        for (j=0,x=31; j<8; j++,x+=32) {
             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
+        rt->infinity = !y;
         for (i=30; i>=0; i--) {
             y = 0;
-            for (j=0,x=i; j<8; j++,x+=32)
+            for (j=0,x=i; j<8; j++,x+=32) {
                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
 
             sp_256_proj_point_dbl_8(rt, rt, t);
             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
+            p->infinity = !y;
             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_8(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(rt, 0, heap);
 
     return err;
 }
@@ -20194,43 +76999,43 @@
     #define FP_ENTRIES 16
 #endif
 
-typedef struct sp_cache_t {
+typedef struct sp_cache_256_t {
     sp_digit x[8];
     sp_digit y[8];
-    sp_table_entry table[256];
+    sp_table_entry_256 table[256];
     uint32_t cnt;
     int set;
-} sp_cache_t;
-
-static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
-static THREAD_LS_T int sp_cache_last = -1;
-static THREAD_LS_T int sp_cache_inited = 0;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
 
 #ifndef HAVE_THREAD_LS
-    static volatile int initCacheMutex = 0;
-    static wolfSSL_Mutex sp_cache_lock;
-#endif
-
-static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
 {
     int i, j;
     uint32_t least;
 
-    if (sp_cache_inited == 0) {
+    if (sp_cache_256_inited == 0) {
         for (i=0; i<FP_ENTRIES; i++) {
-            sp_cache[i].set = 0;
-        }
-        sp_cache_inited = 1;
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
     }
 
     /* Compare point with those in cache. */
     for (i=0; i<FP_ENTRIES; i++) {
-        if (!sp_cache[i].set)
+        if (!sp_cache_256[i].set)
             continue;
 
-        if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & 
-                           sp_256_cmp_equal_8(g->y, sp_cache[i].y)) {
-            sp_cache[i].cnt++;
+        if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
             break;
         }
     }
@@ -20238,37 +77043,37 @@
     /* No match. */
     if (i == FP_ENTRIES) {
         /* Find empty entry. */
-        i = (sp_cache_last + 1) % FP_ENTRIES;
-        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
-            if (!sp_cache[i].set) {
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
                 break;
             }
         }
 
         /* Evict least used. */
-        if (i == sp_cache_last) {
-            least = sp_cache[0].cnt;
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
             for (j=1; j<FP_ENTRIES; j++) {
-                if (sp_cache[j].cnt < least) {
+                if (sp_cache_256[j].cnt < least) {
                     i = j;
-                    least = sp_cache[i].cnt;
+                    least = sp_cache_256[i].cnt;
                 }
             }
         }
 
-        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
-        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
-        sp_cache[i].set = 1;
-        sp_cache[i].cnt = 1;
-    }
-
-    *cache = &sp_cache[i];
-    sp_cache_last = i;
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
 }
 #endif /* FP_ECC */
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -20277,32 +77082,32 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_8(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
 #ifndef FP_ECC
     return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
 #else
     sp_digit tmp[2 * 8 * 5];
-    sp_cache_t* cache;
+    sp_cache_256_t* cache;
     int err = MP_OKAY;
 
 #ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
        err = BAD_MUTEX_E;
 #endif /* HAVE_THREAD_LS */
 
     if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
+        sp_ecc_get_cache_256(g, &cache);
         if (cache->cnt == 2)
             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
 
 #ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
+        wc_UnLockMutex(&sp_cache_256_lock);
 #endif /* HAVE_THREAD_LS */
 
         if (cache->cnt < 2) {
@@ -20320,7 +77125,7 @@
 
 #endif /* WOLFSSL_SP_SMALL */
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * p     Point to multiply.
@@ -20332,21 +77137,19 @@
 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
         void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[8];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -20357,125 +77160,106 @@
         sp_256_from_mp(k, 8, km);
         sp_256_point_from_ecc_point_8(point, gm);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_8(point, point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_8(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
 
     return err;
 }
 
 #ifdef WOLFSSL_SP_SMALL
-static sp_table_entry p256_table[16] = {
+static const sp_table_entry_256 p256_table[16] = {
     /* 0 */
     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-      1 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
     /* 1 */
     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
         0xa53755c6,0x18905f76 },
       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
-        0x25885d85,0x8571ff18 },
-      0 },
+        0x25885d85,0x8571ff18 } },
     /* 2 */
     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
         0xfd1b667f,0x2f5e6961 },
       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
-        0x8d6f0f7b,0xf648f916 },
-      0 },
+        0x8d6f0f7b,0xf648f916 } },
     /* 3 */
     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
         0x133d0015,0x5abe0285 },
       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
-        0x6b6f7383,0x94bb725b },
-      0 },
+        0x6b6f7383,0x94bb725b } },
     /* 4 */
     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
         0x21d324f6,0x61d587d4 },
       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
-        0x4621efbe,0xfa11fe12 },
-      0 },
+        0x4621efbe,0xfa11fe12 } },
     /* 5 */
     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
         0x1f13bedc,0x586eb04c },
       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
-        0x70864f11,0x19d5ac08 },
-      0 },
+        0x70864f11,0x19d5ac08 } },
     /* 6 */
     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
         0xc3b266b1,0xbb6de651 },
       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
-        0x5d18b99b,0x60b4619a },
-      0 },
+        0x5d18b99b,0x60b4619a } },
     /* 7 */
     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
         0xaeebffcd,0x9d0f27b2 },
       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
-        0x356ec48d,0x244a566d },
-      0 },
+        0x356ec48d,0x244a566d } },
     /* 8 */
     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
         0xcd42ab1b,0x803f3e02 },
       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
-        0x5067adc1,0xc097440e },
-      0 },
+        0x5067adc1,0xc097440e } },
     /* 9 */
     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
         0x915f1f30,0xf1af32d5 },
       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
-        0xe2d41c8b,0x23d0f130 },
-      0 },
+        0xe2d41c8b,0x23d0f130 } },
     /* 10 */
     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
         0x7990216a,0x50bbb4d9 },
       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
-        0x01fe49c3,0x2b100118 },
-      0 },
+        0x01fe49c3,0x2b100118 } },
     /* 11 */
     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
         0x83fbae0c,0xdd558999 },
       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
-        0x149d6041,0xe6e4c551 },
-      0 },
+        0x149d6041,0xe6e4c551 } },
     /* 12 */
     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
         0xdb7e63af,0xfad27148 },
       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
-        0x9f0e1a84,0x77387de3 },
-      0 },
+        0x9f0e1a84,0x77387de3 } },
     /* 13 */
     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
         0xbef0c47e,0xb37b85c0 },
       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
-        0xf9f628d5,0x9c135ac8 },
-      0 },
+        0xf9f628d5,0x9c135ac8 } },
     /* 14 */
     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
         0x91ece900,0xc109f9cb },
       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
-        0x2eee1ee1,0x9bc3344f },
-      0 },
+        0x2eee1ee1,0x9bc3344f } },
     /* 15 */
     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
         0x5f1a4cc1,0x29591d52 },
       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
-        0x18ef332c,0x6376551f },
-      0 },
+        0x18ef332c,0x6376551f } },
 };
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -20483,7 +77267,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
@@ -20491,1545 +77275,1289 @@
 }
 
 #else
-static sp_table_entry p256_table[256] = {
+static const sp_table_entry_256 p256_table[256] = {
     /* 0 */
     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-      1 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
     /* 1 */
     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
         0xa53755c6,0x18905f76 },
       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
-        0x25885d85,0x8571ff18 },
-      0 },
+        0x25885d85,0x8571ff18 } },
     /* 2 */
     { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
         0xdbdf58e9,0xd953c50d },
       { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
-        0x9eb288f3,0x863ebb7e },
-      0 },
+        0x9eb288f3,0x863ebb7e } },
     /* 3 */
     { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
         0xb5ff80a0,0x00076055 },
       { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
-        0x34373ee0,0x83087761 },
-      0 },
+        0x34373ee0,0x83087761 } },
     /* 4 */
     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
         0xfd1b667f,0x2f5e6961 },
       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
-        0x8d6f0f7b,0xf648f916 },
-      0 },
+        0x8d6f0f7b,0xf648f916 } },
     /* 5 */
     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
         0x133d0015,0x5abe0285 },
       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
-        0x6b6f7383,0x94bb725b },
-      0 },
+        0x6b6f7383,0x94bb725b } },
     /* 6 */
     { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
         0x2f7dc4ef,0xcdd6bbcb },
       { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
-        0x4bdae5f6,0xa361bebd },
-      0 },
+        0x4bdae5f6,0xa361bebd } },
     /* 7 */
     { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
         0xc4b5292c,0xba12ca09 },
       { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
-        0x701fef4b,0x53ebb99d },
-      0 },
+        0x701fef4b,0x53ebb99d } },
     /* 8 */
     { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
         0x06d54831,0x8589fb92 },
       { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
-        0x02541c4f,0xebb0696d },
-      0 },
+        0x02541c4f,0xebb0696d } },
     /* 9 */
     { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
         0xd1b27da3,0xeb2820cb },
       { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
-        0x55a7da1d,0x1f28289b },
-      0 },
+        0x55a7da1d,0x1f28289b } },
     /* 10 */
     { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
         0x05e54d63,0x337a4b59 },
       { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
-        0xf4c2fbd6,0x0d65e0d5 },
-      0 },
+        0xf4c2fbd6,0x0d65e0d5 } },
     /* 11 */
     { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
         0x52f4a232,0xc23da242 },
       { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
-        0xc790cff1,0x19de3b8c },
-      0 },
+        0xc790cff1,0x19de3b8c } },
     /* 12 */
     { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
         0x91fccbfd,0xe34dcbd4 },
       { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
-        0x7b4e0f7f,0xe7641f44 },
-      0 },
+        0x7b4e0f7f,0xe7641f44 } },
     /* 13 */
     { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
         0x052a57bf,0x4a12df57 },
       { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
-        0xbb5bea46,0x6af5aa93 },
-      0 },
+        0xbb5bea46,0x6af5aa93 } },
     /* 14 */
     { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
         0x66a44013,0x5fe3475a },
       { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
-        0xecfea916,0xb544e308 },
-      0 },
+        0xecfea916,0xb544e308 } },
     /* 15 */
     { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
         0xa6b0c20b,0xe0b6b2bd },
       { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
-        0x25a63774,0x71c023de },
-      0 },
+        0x25a63774,0x71c023de } },
     /* 16 */
     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
         0x21d324f6,0x61d587d4 },
       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
-        0x4621efbe,0xfa11fe12 },
-      0 },
+        0x4621efbe,0xfa11fe12 } },
     /* 17 */
     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
         0x1f13bedc,0x586eb04c },
       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
-        0x70864f11,0x19d5ac08 },
-      0 },
+        0x70864f11,0x19d5ac08 } },
     /* 18 */
     { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
         0x7f9c563f,0xe7c0073f },
       { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
-        0xc65b3c0a,0xe08504fe },
-      0 },
+        0xc65b3c0a,0xe08504fe } },
     /* 19 */
     { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
         0x5b0996b4,0x78f01882 },
       { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
-        0x7e94747a,0x43a773b8 },
-      0 },
+        0x7e94747a,0x43a773b8 } },
     /* 20 */
     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
         0xc3b266b1,0xbb6de651 },
       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
-        0x5d18b99b,0x60b4619a },
-      0 },
+        0x5d18b99b,0x60b4619a } },
     /* 21 */
     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
         0xaeebffcd,0x9d0f27b2 },
       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
-        0x356ec48d,0x244a566d },
-      0 },
+        0x356ec48d,0x244a566d } },
     /* 22 */
     { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
         0x3581ef69,0x45e58c87 },
       { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
-        0xc1e4b7a4,0xc040e21c },
-      0 },
+        0xc1e4b7a4,0xc040e21c } },
     /* 23 */
     { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
         0x682c6ec7,0x1cdf5c97 },
       { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
-        0xa92dff3d,0x046755f8 },
-      0 },
+        0xa92dff3d,0x046755f8 } },
     /* 24 */
     { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
         0x3b83a5f3,0x046e5e11 },
       { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
-        0x303d005b,0x6e0106c3 },
-      0 },
+        0x303d005b,0x6e0106c3 } },
     /* 25 */
     { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
         0xe901cf1f,0x442594ed },
       { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
-        0x4c2ee68e,0xa796fa51 },
-      0 },
+        0x4c2ee68e,0xa796fa51 } },
     /* 26 */
     { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
         0xc69766e9,0xe4ad2da9 },
       { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
-        0xc37b5143,0xc5e94046 },
-      0 },
+        0xc37b5143,0xc5e94046 } },
     /* 27 */
     { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
         0xdb464747,0x63283daf },
       { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
-        0x1981a938,0x68bd19ab },
-      0 },
+        0x1981a938,0x68bd19ab } },
     /* 28 */
     { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
         0x3c6fdfd6,0x495292f5 },
       { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
-        0x26036837,0x0ec7530d },
-      0 },
+        0x26036837,0x0ec7530d } },
     /* 29 */
     { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
         0x64863f0b,0x0f6207a6 },
       { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
-        0x08ed6dcf,0xff0db072 },
-      0 },
+        0x08ed6dcf,0xff0db072 } },
     /* 30 */
     { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
         0x88740ea3,0x313b513c },
       { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
-        0x86f19f81,0x2d3abcf9 },
-      0 },
+        0x86f19f81,0x2d3abcf9 } },
     /* 31 */
     { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
         0xded98cdf,0xc036fa10 },
       { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
-        0xb6d40194,0xa6b2a2c4 },
-      0 },
+        0xb6d40194,0xa6b2a2c4 } },
     /* 32 */
     { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
         0xaf7c9860,0x810ee252 },
       { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
-        0x92731745,0xd485717a },
-      0 },
+        0x92731745,0xd485717a } },
     /* 33 */
     { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
         0x2f9a604e,0x6a6045a7 },
       { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
-        0xf9e15790,0xd3e45cfa },
-      0 },
+        0xf9e15790,0xd3e45cfa } },
     /* 34 */
     { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
         0xe3c2c19c,0x207755de },
       { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
-        0x7154b00d,0x48dc5ee5 },
-      0 },
+        0x7154b00d,0x48dc5ee5 } },
     /* 35 */
     { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
         0xdff6f445,0xf2fb0aed },
       { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
-        0xdb28d525,0xa13e9015 },
-      0 },
+        0xdb28d525,0xa13e9015 } },
     /* 36 */
     { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
         0x1497526f,0x2bf0d6b0 },
       { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
-        0x162fe89f,0x42a94a5a },
-      0 },
+        0x162fe89f,0x42a94a5a } },
     /* 37 */
     { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
         0xc65ede3d,0x2c2dd969 },
       { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
-        0x42c56dbc,0xf437fa1f },
-      0 },
+        0x42c56dbc,0xf437fa1f } },
     /* 38 */
     { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
         0x54707aa8,0xaaf45b33 },
       { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
-        0xf4f272bc,0xcdf6310d },
-      0 },
+        0xf4f272bc,0xcdf6310d } },
     /* 39 */
     { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
         0xda9e2ff2,0xf0d008ba },
       { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
-        0xca887b8b,0x5bd5c2f5 },
-      0 },
+        0xca887b8b,0x5bd5c2f5 } },
     /* 40 */
     { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
         0xa09e4719,0xaa12dfc8 },
       { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
-        0xe48ca901,0x6c036e73 },
-      0 },
+        0xe48ca901,0x6c036e73 } },
     /* 41 */
     { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
         0x96afbe24,0x292ff658 },
       { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
-        0x311b7276,0x644e0c90 },
-      0 },
+        0x311b7276,0x644e0c90 } },
     /* 42 */
     { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
         0xcab79a77,0xf25ae793 },
       { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
-        0x13db0a3e,0x39b8e653 },
-      0 },
+        0x13db0a3e,0x39b8e653 } },
     /* 43 */
     { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
         0x0f19db06,0x39122f2f },
       { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
-        0xce80ff8d,0x8de80af8 },
-      0 },
+        0xce80ff8d,0x8de80af8 } },
     /* 44 */
     { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
         0x2e368c04,0x87194906 },
       { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
-        0x5b74fde1,0xfc315e6a },
-      0 },
+        0x5b74fde1,0xfc315e6a } },
     /* 45 */
     { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
         0xee389088,0xe6d4a7ad },
       { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
-        0x9be2ae57,0x35dfaf9a },
-      0 },
+        0x9be2ae57,0x35dfaf9a } },
     /* 46 */
     { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
         0x1c830d2b,0x1da5c7d7 },
       { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
-        0xdbf4b9d6,0x7077c0fd },
-      0 },
+        0xdbf4b9d6,0x7077c0fd } },
     /* 47 */
     { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
         0xe50efe44,0x53a8632e },
       { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
-        0x34e1fcc1,0x028ca76d },
-      0 },
+        0x34e1fcc1,0x028ca76d } },
     /* 48 */
     { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
         0x6962f046,0x04c17cd8 },
       { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
-        0xfed97474,0xf7ba4de9 },
-      0 },
+        0xfed97474,0xf7ba4de9 } },
     /* 49 */
     { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
         0x52131c41,0xe31f9600 },
       { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
-        0xce34d47b,0xaa3a6259 },
-      0 },
+        0xce34d47b,0xaa3a6259 } },
     /* 50 */
     { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
         0x7e79daee,0x2398dd62 },
       { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
-        0x1c046210,0x5717f5b2 },
-      0 },
+        0x1c046210,0x5717f5b2 } },
     /* 51 */
     { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
         0x0e3c28de,0x660a2c56 },
       { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
-        0x4f522453,0x624ee54c },
-      0 },
+        0x4f522453,0x624ee54c } },
     /* 52 */
     { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
         0x92bdfbc0,0x4f392afb },
       { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
-        0xccdb399c,0x8a3e7977 },
-      0 },
+        0xccdb399c,0x8a3e7977 } },
     /* 53 */
     { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
         0x70c24404,0x3888d023 },
       { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
-        0x18102336,0xa5e62e47 },
-      0 },
+        0x18102336,0xa5e62e47 } },
     /* 54 */
     { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
         0x466a5adc,0x2c4768e6 },
       { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
-        0xf9e652a0,0x7b5e6441 },
-      0 },
+        0xf9e652a0,0x7b5e6441 } },
     /* 55 */
     { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
         0x0c8d744a,0xb8af73cb },
       { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
-        0x7f3f0895,0xa036395f },
-      0 },
+        0x7f3f0895,0xa036395f } },
     /* 56 */
     { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
         0x875fb533,0x4be36b01 },
       { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
-        0x1bdc00c0,0x8cbc9a87 },
-      0 },
+        0x1bdc00c0,0x8cbc9a87 } },
     /* 57 */
     { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
         0x0c0835f8,0x44e7553e },
       { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
-        0x5eb8fc18,0x470a683a },
-      0 },
+        0x5eb8fc18,0x470a683a } },
     /* 58 */
     { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
         0xc63dc6ef,0x16410690 },
       { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
-        0x7abcbb4f,0xd73479fd },
-      0 },
+        0x7abcbb4f,0xd73479fd } },
     /* 59 */
     { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
         0x0771666b,0x816469e3 },
       { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
-        0xf0dd3f9c,0x0a36dd23 },
-      0 },
+        0xf0dd3f9c,0x0a36dd23 } },
     /* 60 */
     { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
         0xfdbab118,0xe331dfd6 },
       { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
-        0x492e3389,0xd3b4782a },
-      0 },
+        0x492e3389,0xd3b4782a } },
     /* 61 */
     { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
         0x4c86a5bd,0x7281275a },
       { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
-        0xce145059,0x2c062e7e },
-      0 },
+        0xce145059,0x2c062e7e } },
     /* 62 */
     { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
         0x2c4e7ef1,0x282a35f9 },
       { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
-        0x554d2abd,0xc71cd513 },
-      0 },
+        0x554d2abd,0xc71cd513 } },
     /* 63 */
     { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
         0xcf47f3a3,0xc50f6740 },
       { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
-        0x212958dc,0xb9ecb3a7 },
-      0 },
+        0x212958dc,0xb9ecb3a7 } },
     /* 64 */
     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
         0xcd42ab1b,0x803f3e02 },
       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
-        0x5067adc1,0xc097440e },
-      0 },
+        0x5067adc1,0xc097440e } },
     /* 65 */
     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
         0x915f1f30,0xf1af32d5 },
       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
-        0xe2d41c8b,0x23d0f130 },
-      0 },
+        0xe2d41c8b,0x23d0f130 } },
     /* 66 */
     { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
         0xc0a3fadd,0xb0288dd6 },
       { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
-        0xf408c8d2,0xffd3724f },
-      0 },
+        0xf408c8d2,0xffd3724f } },
     /* 67 */
     { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
         0xd78c26df,0xf5590f4a },
       { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
-        0xf6f74a20,0x18d6da54 },
-      0 },
+        0xf6f74a20,0x18d6da54 } },
     /* 68 */
     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
         0x7990216a,0x50bbb4d9 },
       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
-        0x01fe49c3,0x2b100118 },
-      0 },
+        0x01fe49c3,0x2b100118 } },
     /* 69 */
     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
         0x83fbae0c,0xdd558999 },
       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
-        0x149d6041,0xe6e4c551 },
-      0 },
+        0x149d6041,0xe6e4c551 } },
     /* 70 */
     { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
         0x07ed56ff,0x51e00db1 },
       { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
-        0x49829177,0xe22f4241 },
-      0 },
+        0x49829177,0xe22f4241 } },
     /* 71 */
     { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
         0x52dc48c9,0xf709373d },
       { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
-        0xe7275b11,0xbd52d288 },
-      0 },
+        0xe7275b11,0xbd52d288 } },
     /* 72 */
     { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
         0xc8aa77a6,0xa0d0f8e4 },
       { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
-        0x946d6a00,0xa56c78c7 },
-      0 },
+        0x946d6a00,0xa56c78c7 } },
     /* 73 */
     { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
         0x731a367a,0xd8befdf8 },
       { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
-        0xce9f6478,0x854a68a5 },
-      0 },
+        0xce9f6478,0x854a68a5 } },
     /* 74 */
     { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
         0x98846a95,0x5cacea0b },
       { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
-        0x35e4efa9,0xe4982d12 },
-      0 },
+        0x35e4efa9,0xe4982d12 } },
     /* 75 */
     { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
         0x16b20499,0x8046b7f6 },
       { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
-        0x9082af55,0xeb17ca7b },
-      0 },
+        0x9082af55,0xeb17ca7b } },
     /* 76 */
     { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
         0xfab5e131,0x097b00ba },
       { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
-        0xafdbcc9e,0xf95c747b },
-      0 },
+        0xafdbcc9e,0xf95c747b } },
     /* 77 */
     { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
         0x566ed837,0x3512601e },
       { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
-        0x6068ab6b,0x0ef97123 },
-      0 },
+        0x6068ab6b,0x0ef97123 } },
     /* 78 */
     { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
         0x3b4fbc95,0xfc16d933 },
       { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
-        0xb95d7a17,0x14ca4af1 },
-      0 },
+        0xb95d7a17,0x14ca4af1 } },
     /* 79 */
     { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
         0xf59c231d,0x4057b063 },
       { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
-        0xf1330b13,0x1c3b5d64 },
-      0 },
+        0xf1330b13,0x1c3b5d64 } },
     /* 80 */
     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
         0xdb7e63af,0xfad27148 },
       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
-        0x9f0e1a84,0x77387de3 },
-      0 },
+        0x9f0e1a84,0x77387de3 } },
     /* 81 */
     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
         0xbef0c47e,0xb37b85c0 },
       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
-        0xf9f628d5,0x9c135ac8 },
-      0 },
+        0xf9f628d5,0x9c135ac8 } },
     /* 82 */
     { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
         0xc433851f,0x5721361f },
       { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
-        0xe6bb11bd,0xdcbac3c9 },
-      0 },
+        0xe6bb11bd,0xdcbac3c9 } },
     /* 83 */
     { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
         0x2d626862,0xb8c1c89e },
       { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
-        0x2f9422d4,0x5d23bbda },
-      0 },
+        0x2f9422d4,0x5d23bbda } },
     /* 84 */
     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
         0x91ece900,0xc109f9cb },
       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
-        0x2eee1ee1,0x9bc3344f },
-      0 },
+        0x2eee1ee1,0x9bc3344f } },
     /* 85 */
     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
         0x5f1a4cc1,0x29591d52 },
       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
-        0x18ef332c,0x6376551f },
-      0 },
+        0x18ef332c,0x6376551f } },
     /* 86 */
     { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
         0x08e2987a,0xbdb79dc8 },
       { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
-        0xadd3c14a,0x8ee86001 },
-      0 },
+        0xadd3c14a,0x8ee86001 } },
     /* 87 */
     { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
         0x6f77aa4b,0x92e51d7a },
       { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
-        0x0a56aaaa,0x5182f86f },
-      0 },
+        0x0a56aaaa,0x5182f86f } },
     /* 88 */
     { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
         0x4073a6f2,0x91dcab5d },
       { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
-        0x97974f2b,0x17a0cedb },
-      0 },
+        0x97974f2b,0x17a0cedb } },
     /* 89 */
     { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
         0x7f4cdf41,0x2e8ce36c },
       { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
-        0x34f668f3,0xf4ccc6cb },
-      0 },
+        0x34f668f3,0xf4ccc6cb } },
     /* 90 */
     { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
         0x9a0df3c9,0xac0db488 },
       { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
-        0x94c974a2,0x95a64a61 },
-      0 },
+        0x94c974a2,0x95a64a61 } },
     /* 91 */
     { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
         0x29210677,0x231e54ba },
       { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
-        0xd8a731e1,0xab0be032 },
-      0 },
+        0xd8a731e1,0xab0be032 } },
     /* 92 */
     { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
         0x2cf6a679,0xf1bcc880 },
       { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
-        0x5aebb271,0x85169469 },
-      0 },
+        0x5aebb271,0x85169469 } },
     /* 93 */
     { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
         0xdaad55d8,0x8f67d9d2 },
       { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
-        0xc0728b5d,0xf84572b9 },
-      0 },
+        0xc0728b5d,0xf84572b9 } },
     /* 94 */
     { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
         0x616b2c19,0xedee2710 },
       { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
-        0x44ebd7f4,0x9fd27e9b },
-      0 },
+        0x44ebd7f4,0x9fd27e9b } },
     /* 95 */
     { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
         0x958ff387,0xa40c2fb6 },
       { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
-        0x7dc6decf,0x99bc9bb8 },
-      0 },
+        0x7dc6decf,0x99bc9bb8 } },
     /* 96 */
     { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
         0xa16d7e64,0x9abe210b },
       { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
-        0x87f344b0,0x7881c257 },
-      0 },
+        0x87f344b0,0x7881c257 } },
     /* 97 */
     { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
         0xa30e8940,0x15e6e319 },
       { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
-        0x191172ce,0x0e55facf },
-      0 },
+        0x191172ce,0x0e55facf } },
     /* 98 */
     { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
         0x6fe96577,0xd73d0976 },
       { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
-        0x8f15a50b,0x9250a374 },
-      0 },
+        0x8f15a50b,0x9250a374 } },
     /* 99 */
     { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
         0xc1cc8c0b,0x77414082 },
       { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
-        0x12eb20b9,0x8cb04f4d },
-      0 },
+        0x12eb20b9,0x8cb04f4d } },
     /* 100 */
     { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
         0x47123b51,0xe4e429ef },
       { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
-        0x3c6e6552,0x37bca2ff },
-      0 },
+        0x3c6e6552,0x37bca2ff } },
     /* 101 */
     { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
         0x3002b22a,0x59913edc },
       { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
-        0xb013e226,0x43786e4a },
-      0 },
+        0xb013e226,0x43786e4a } },
     /* 102 */
     { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
         0xb7e79e7a,0x8638ca98 },
       { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
-        0x7b3aa6f0,0x1ecdd36a },
-      0 },
+        0x7b3aa6f0,0x1ecdd36a } },
     /* 103 */
     { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
         0xd459f32d,0xd85d0f85 },
       { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
-        0xb4ed3c62,0xa04f19c3 },
-      0 },
+        0xb4ed3c62,0xa04f19c3 } },
     /* 104 */
     { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
         0x5c0950b0,0x92b2eeea },
       { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
-        0x5834276c,0x1ee78221 },
-      0 },
+        0x5834276c,0x1ee78221 } },
     /* 105 */
     { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
         0x57a6e150,0xf3f2ced8 },
       { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
-        0x3da3e210,0x0f56a454 },
-      0 },
+        0x3da3e210,0x0f56a454 } },
     /* 106 */
     { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
         0x1969e263,0xbd8f1741 },
       { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
-        0x30ccfa09,0x2d1a1c35 },
-      0 },
+        0x30ccfa09,0x2d1a1c35 } },
     /* 107 */
     { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
         0xb91fba46,0xa107a65e },
       { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
-        0xf87a9af2,0x183d760a },
-      0 },
+        0xf87a9af2,0x183d760a } },
     /* 108 */
     { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
         0xc269d754,0x1d44179d },
       { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
-        0x9606d262,0x771f9cc2 },
-      0 },
+        0x9606d262,0x771f9cc2 } },
     /* 109 */
     { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
         0x0362718e,0x64427a31 },
       { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
-        0x6ae90d6d,0x49d9b749 },
-      0 },
+        0x6ae90d6d,0x49d9b749 } },
     /* 110 */
     { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
         0x3f605445,0x9037d81b },
       { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
-        0x7cc0639c,0x08c3de6a },
-      0 },
+        0x7cc0639c,0x08c3de6a } },
     /* 111 */
     { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
         0x45796b2f,0xc6909442 },
       { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
-        0xcafe3ac0,0x3fa3db02 },
-      0 },
+        0xcafe3ac0,0x3fa3db02 } },
     /* 112 */
     { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
         0xfdb808ff,0xc5c4bdb0 },
       { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
-        0x46c2b6b5,0x2d56db94 },
-      0 },
+        0x46c2b6b5,0x2d56db94 } },
     /* 113 */
     { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
         0xe503ba42,0x0f56bd9d },
       { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
-        0x1173b5f1,0x4003bb9d },
-      0 },
+        0x1173b5f1,0x4003bb9d } },
     /* 114 */
     { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
         0xa07f2f9e,0x53765522 },
       { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
-        0x6c5d4549,0x7a056f58 },
-      0 },
+        0x6c5d4549,0x7a056f58 } },
     /* 115 */
     { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
         0x7a1a2675,0x77d482f1 },
       { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
-        0x2b38b0e4,0x4115012b },
-      0 },
+        0x2b38b0e4,0x4115012b } },
     /* 116 */
     { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
         0xfbea0946,0xcdf04572 },
       { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
-        0x97383109,0xee703dda },
-      0 },
+        0x97383109,0xee703dda } },
     /* 117 */
     { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
         0xa162ce21,0x2a0ad89d },
       { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
-        0xac2b4659,0xd62d0b67 },
-      0 },
+        0xac2b4659,0xd62d0b67 } },
     /* 118 */
     { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
         0x991c2426,0xb39a23f2 },
       { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
-        0xc0674cc5,0x04ed0092 },
-      0 },
+        0xc0674cc5,0x04ed0092 } },
     /* 119 */
     { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
         0x0177c387,0xa0a91fc1 },
       { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
-        0x9ed20c41,0x084cf988 },
-      0 },
+        0x9ed20c41,0x084cf988 } },
     /* 120 */
     { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
         0x73abf77e,0xd57955b2 },
       { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
-        0x02d141f1,0x8e14ea42 },
-      0 },
+        0x02d141f1,0x8e14ea42 } },
     /* 121 */
     { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
         0x2aa4d158,0x597e1a37 },
       { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
-        0x199b4dea,0xca3f0236 },
-      0 },
+        0x199b4dea,0xca3f0236 } },
     /* 122 */
     { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
         0x309c07e4,0xbde7fd7e },
       { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
-        0x0a7dd198,0xb623ad0e },
-      0 },
+        0x0a7dd198,0xb623ad0e } },
     /* 123 */
     { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
         0x58ec137b,0xd6aa2e46 },
       { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
-        0x2dcc513a,0x111662e0 },
-      0 },
+        0x2dcc513a,0x111662e0 } },
     /* 124 */
     { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
         0x94b750f8,0xdb3ee1cb },
       { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
-        0x52206a59,0x886a6442 },
-      0 },
+        0x52206a59,0x886a6442 } },
     /* 125 */
     { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
         0x018a17bc,0xa70cf4eb },
       { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
-        0xd1747b77,0xaa4772ab },
-      0 },
+        0xd1747b77,0xaa4772ab } },
     /* 126 */
     { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
         0x30faf974,0x611a6ddc },
       { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
-        0x16429c88,0x5cfffaf8 },
-      0 },
+        0x16429c88,0x5cfffaf8 } },
     /* 127 */
     { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
         0x7dc1994c,0x6e5a6b23 },
       { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
-        0x242dabcc,0x481a238d },
-      0 },
+        0x242dabcc,0x481a238d } },
     /* 128 */
     { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
         0xe0cdf943,0x2c41114c },
       { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
-        0x42ff9297,0x20477abf },
-      0 },
+        0x42ff9297,0x20477abf } },
     /* 129 */
     { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
         0xc77396b6,0xac66409a },
       { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
-        0xcc122f85,0xce8e6975 },
-      0 },
+        0xcc122f85,0xce8e6975 } },
     /* 130 */
     { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
         0x250bb4a8,0x08fde365 },
       { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
-        0x565d6cd7,0x2f7e2fd2 },
-      0 },
+        0x565d6cd7,0x2f7e2fd2 } },
     /* 131 */
     { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
         0x907702ae,0xc65be92e },
       { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
-        0xd1193b3a,0x4bff8e47 },
-      0 },
+        0xd1193b3a,0x4bff8e47 } },
     /* 132 */
     { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
         0x5772967d,0x3e4e4ae6 },
       { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
-        0x58ec6028,0x5388aefd },
-      0 },
+        0x58ec6028,0x5388aefd } },
     /* 133 */
     { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
         0x4f75be0e,0x5cf908d1 },
       { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
-        0x60f00ce2,0xa698ba40 },
-      0 },
+        0x60f00ce2,0xa698ba40 } },
     /* 134 */
     { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
         0x7aebad8d,0xb142ef8a },
       { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
-        0x58515075,0xd1896a96 },
-      0 },
+        0x58515075,0xd1896a96 } },
     /* 135 */
     { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
         0x7981da39,0x267b0e0b },
       { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
-        0xa1119393,0xb54e287a },
-      0 },
+        0xa1119393,0xb54e287a } },
     /* 136 */
     { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
         0x5f87d4e6,0x84abb28b },
       { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
-        0x17655640,0xe5436f67 },
-      0 },
+        0x17655640,0xe5436f67 } },
     /* 137 */
     { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
         0x5b9ce99e,0x0404f68b },
       { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
-        0x0ac1c701,0x3a4263df },
-      0 },
+        0x0ac1c701,0x3a4263df } },
     /* 138 */
     { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
         0x905ea367,0x0ca8fd3f },
       { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
-        0x4ddb0c33,0x96dca264 },
-      0 },
+        0x4ddb0c33,0x96dca264 } },
     /* 139 */
     { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
         0x3aad59dc,0x4363e212 },
       { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
-        0xd8bb98c4,0x840e115c },
-      0 },
+        0xd8bb98c4,0x840e115c } },
     /* 140 */
     { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
         0x30ded6d4,0x5e0d6abd },
       { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
-        0x2945a25a,0x7dea48f4 },
-      0 },
+        0x2945a25a,0x7dea48f4 } },
     /* 141 */
     { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
         0xebfd16d1,0xabc2a2be },
       { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
-        0x6c7eefc1,0x4ea35394 },
-      0 },
+        0x6c7eefc1,0x4ea35394 } },
     /* 142 */
     { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
         0x1c94ffc3,0x3a76e689 },
       { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
-        0x465e6464,0x8212a10a },
-      0 },
+        0x465e6464,0x8212a10a } },
     /* 143 */
     { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
         0x599cb164,0xaa7cab71 },
       { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
-        0xfe0617c3,0x40e38073 },
-      0 },
+        0xfe0617c3,0x40e38073 } },
     /* 144 */
     { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
         0xb3055526,0xe3604700 },
       { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
-        0xa3dee15f,0x6542d677 },
-      0 },
+        0xa3dee15f,0x6542d677 } },
     /* 145 */
     { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
         0x09bb6f21,0xa6534aee },
       { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
-        0xdc9aef22,0xf3cb672f },
-      0 },
+        0xdc9aef22,0xf3cb672f } },
     /* 146 */
     { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
         0xaae870e7,0x7cafaa2e },
       { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
-        0xb9bd522e,0x0aab13c1 },
-      0 },
+        0xb9bd522e,0x0aab13c1 } },
     /* 147 */
     { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
         0x847012e9,0x4b91a602 },
       { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
-        0x72321cab,0x49534c53 },
-      0 },
+        0x72321cab,0x49534c53 } },
     /* 148 */
     { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
         0xd65ac5ee,0xcaf46c4f },
       { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
-        0x04c6770f,0x14ce9e57 },
-      0 },
+        0x04c6770f,0x14ce9e57 } },
     /* 149 */
     { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
         0x3e4c9a71,0x1bb708a5 },
       { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
-        0xda300102,0xf9d126f2 },
-      0 },
+        0xda300102,0xf9d126f2 } },
     /* 150 */
     { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
         0x729ecc69,0x807afcb9 },
       { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
-        0x6568cd8c,0x751adcd1 },
-      0 },
+        0x6568cd8c,0x751adcd1 } },
     /* 151 */
     { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
         0x2537743f,0x29ec4468 },
       { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
-        0x92a4077d,0xff9370e3 },
-      0 },
+        0x92a4077d,0xff9370e3 } },
     /* 152 */
     { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
         0xa2a9d01a,0x9776478b },
       { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
-        0xac2f82fa,0x74a6313f },
-      0 },
+        0xac2f82fa,0x74a6313f } },
     /* 153 */
     { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
         0x0ff4863d,0xab75be15 },
       { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
-        0x0b4459f6,0x4ebeac2e },
-      0 },
+        0x0b4459f6,0x4ebeac2e } },
     /* 154 */
     { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
         0x2c1baffc,0xdf99887b },
       { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
-        0x779f4058,0x27b040a7 },
-      0 },
+        0x779f4058,0x27b040a7 } },
     /* 155 */
     { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
         0xe4cfa3f5,0xb393dd37 },
       { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
-        0xd0463419,0x09588c12 },
-      0 },
+        0xd0463419,0x09588c12 } },
     /* 156 */
     { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
         0xdb9f648b,0x81c879a9 },
       { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
-        0x5fc11bc4,0xfa0d48f5 },
-      0 },
+        0x5fc11bc4,0xfa0d48f5 } },
     /* 157 */
     { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
         0xb6a367d6,0x8ea0e156 },
       { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
-        0xfa00b5ac,0x3f5ab924 },
-      0 },
+        0xfa00b5ac,0x3f5ab924 } },
     /* 158 */
     { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
         0x2b74256e,0x8bc76887 },
       { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
-        0x60fcf34f,0xb386f190 },
-      0 },
+        0x60fcf34f,0xb386f190 } },
     /* 159 */
     { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
         0x1b069c4d,0x4cb460f7 },
       { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
-        0x95ef5223,0x52c0d508 },
-      0 },
+        0x95ef5223,0x52c0d508 } },
     /* 160 */
     { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
         0x2bb09c0b,0x4ac3c938 },
       { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
-        0xe39705f4,0x380d94c7 },
-      0 },
+        0xe39705f4,0x380d94c7 } },
     /* 161 */
     { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
         0xde2637af,0x2ce3e171 },
       { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
-        0x0b624e4d,0x2e6cd852 },
-      0 },
+        0x0b624e4d,0x2e6cd852 } },
     /* 162 */
     { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
         0x42c69d54,0xca177547 },
       { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
-        0x9cab2ce6,0xa976a713 },
-      0 },
+        0x9cab2ce6,0xa976a713 } },
     /* 163 */
     { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
         0x0a1f4999,0x8720a717 },
       { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
-        0xc769893c,0x9719ef29 },
-      0 },
+        0xc769893c,0x9719ef29 } },
     /* 164 */
     { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
         0xe15704c1,0xa5072976 },
       { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
-        0xf7b77725,0x99389c9d },
-      0 },
+        0xf7b77725,0x99389c9d } },
     /* 165 */
     { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
         0x202c82e4,0xa88806aa },
       { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
-        0x4738dcfe,0x0043bffb },
-      0 },
+        0x4738dcfe,0x0043bffb } },
     /* 166 */
     { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
         0xba6c4866,0x52f3ef01 },
       { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
-        0x9ef27e75,0x3296bd89 },
-      0 },
+        0x9ef27e75,0x3296bd89 } },
     /* 167 */
     { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
         0xaee571e9,0x3b90febf },
       { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
-        0x9f810b18,0x6e88069d },
-      0 },
+        0x9f810b18,0x6e88069d } },
     /* 168 */
     { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
         0xdefaad13,0xa7222bea },
       { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
-        0xbc2ac690,0xbe94d523 },
-      0 },
+        0xbc2ac690,0xbe94d523 } },
     /* 169 */
     { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
         0x9be8c766,0x7782defe },
       { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
-        0xa2892e4b,0x03838567 },
-      0 },
+        0xa2892e4b,0x03838567 } },
     /* 170 */
     { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
         0xadf7b420,0xdbd986c4 },
       { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
-        0x6860bbd0,0x8e24d3c4 },
-      0 },
+        0x6860bbd0,0x8e24d3c4 } },
     /* 171 */
     { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
         0x407bafc8,0x541a99c4 },
       { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
-        0xf57d35d1,0xc0092c49 },
-      0 },
+        0xf57d35d1,0xc0092c49 } },
     /* 172 */
     { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
         0x7286944d,0x75e40634 },
       { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
-        0xc7848586,0x5b7cb658 },
-      0 },
+        0xc7848586,0x5b7cb658 } },
     /* 173 */
     { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
         0x8df097a1,0x7ae13eba },
       { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
-        0xe2a8e3fd,0x787d8074 },
-      0 },
+        0xe2a8e3fd,0x787d8074 } },
     /* 174 */
     { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
         0x9ef28484,0x5c222819 },
       { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
-        0xbaf0f2b0,0xe45d37ab },
-      0 },
+        0xbaf0f2b0,0xe45d37ab } },
     /* 175 */
     { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
         0x84dfb9d3,0xed7bc122 },
       { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
-        0x45ca6d27,0xaac97cc9 },
-      0 },
+        0x45ca6d27,0xaac97cc9 } },
     /* 176 */
     { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
         0x1163dc4e,0x318f97b3 },
       { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
-        0x9a84ff4d,0xfa41faa1 },
-      0 },
+        0x9a84ff4d,0xfa41faa1 } },
     /* 177 */
     { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
         0x1d26e9e2,0x38bb6b2c },
       { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
-        0xce7601a5,0x94dd0905 },
-      0 },
+        0xce7601a5,0x94dd0905 } },
     /* 178 */
     { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
         0xd25c2ae9,0x92077867 },
       { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
-        0xd29beb51,0x81e8428b },
-      0 },
+        0xd29beb51,0x81e8428b } },
     /* 179 */
     { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
         0xdbbfa4b1,0x1b94ab62 },
       { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
-        0x055590ee,0x06a38e28 },
-      0 },
+        0x055590ee,0x06a38e28 } },
     /* 180 */
     { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
         0x83d9d4f8,0xa7b36c20 },
       { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
-        0xa2822a20,0xbe54c6b4 },
-      0 },
+        0xa2822a20,0xbe54c6b4 } },
     /* 181 */
     { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
         0xeae022bb,0xbf30a5ab },
       { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
-        0x2732d13a,0xd1c820de },
-      0 },
+        0x2732d13a,0xd1c820de } },
     /* 182 */
     { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
         0x68a18da3,0xb7d17bed },
       { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
-        0x6412cc64,0x3997fd5e },
-      0 },
+        0x6412cc64,0x3997fd5e } },
     /* 183 */
     { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
         0x3c6c13e8,0x0eeb8929 },
       { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
-        0xc922b6ef,0x228916f8 },
-      0 },
+        0xc922b6ef,0x228916f8 } },
     /* 184 */
     { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
         0x6e93097e,0xec05ad1d },
       { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
-        0x7ff11b37,0x7d314156 },
-      0 },
+        0x7ff11b37,0x7d314156 } },
     /* 185 */
     { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
         0x9bc1d7a3,0xe9ce66fc },
       { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
-        0x72280651,0xd9650b01 },
-      0 },
+        0x72280651,0xd9650b01 } },
     /* 186 */
     { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
         0x804eb7a2,0x14d6699a },
       { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
-        0x0d43598a,0x6f4c6841 },
-      0 },
+        0x0d43598a,0x6f4c6841 } },
     /* 187 */
     { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
         0x61189abb,0x4c4350fd },
       { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
-        0x5a3118b5,0xa726d242 },
-      0 },
+        0x5a3118b5,0xa726d242 } },
     /* 188 */
     { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
         0xcc6cf392,0x13639e82 },
       { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
-        0xc1a335a3,0xca9365e1 },
-      0 },
+        0xc1a335a3,0xca9365e1 } },
     /* 189 */
     { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
         0x970b72a5,0x9ce29c34 },
       { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
-        0xab42af98,0x48c4abd7 },
-      0 },
+        0xab42af98,0x48c4abd7 } },
     /* 190 */
     { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
         0xf67b33cb,0x78017c32 },
       { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
-        0xde5c1c04,0x53cd0454 },
-      0 },
+        0xde5c1c04,0x53cd0454 } },
     /* 191 */
     { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
         0xd3d7fa8f,0xeea465c1 },
       { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
-        0x7ae69193,0x1b6e42a4 },
-      0 },
+        0x7ae69193,0x1b6e42a4 } },
     /* 192 */
     { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
         0x187fbd3d,0x0224da14 },
       { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
-        0x42bfff33,0x60838ef0 },
-      0 },
+        0x42bfff33,0x60838ef0 } },
     /* 193 */
     { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
         0x2d331643,0x636eb202 },
       { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
-        0x39218bac,0x8844eeb6 },
-      0 },
+        0x39218bac,0x8844eeb6 } },
     /* 194 */
     { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
         0x51fb789e,0x27ba83dc },
       { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
-        0x87f3a4ab,0xadb62d34 },
-      0 },
+        0x87f3a4ab,0xadb62d34 } },
     /* 195 */
     { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
         0x75e7c8b2,0xb990fd76 },
       { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
-        0x4d10d18d,0x81707ef9 },
-      0 },
+        0x4d10d18d,0x81707ef9 } },
     /* 196 */
     { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
         0xd5a8aa5c,0x3792daea },
       { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
-        0x94b001ba,0x5abd635e },
-      0 },
+        0x94b001ba,0x5abd635e } },
     /* 197 */
     { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
         0x846ab610,0x5995bf21 },
       { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
-        0xd483411e,0x44c32ca2 },
-      0 },
+        0xd483411e,0x44c32ca2 } },
     /* 198 */
     { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
         0x8082a54c,0x1f2162fb },
       { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
-        0xc3e907c9,0x8f1d402b },
-      0 },
+        0xc3e907c9,0x8f1d402b } },
     /* 199 */
     { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
         0x926edbf9,0xb1980f43 },
       { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
-        0x37448e45,0x2828ad9b },
-      0 },
+        0x37448e45,0x2828ad9b } },
     /* 200 */
     { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
         0x5a14b390,0x4973f127 },
       { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
-        0xdb168ac7,0x6dac8ed0 },
-      0 },
+        0xdb168ac7,0x6dac8ed0 } },
     /* 201 */
     { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
         0x20b9de4c,0x4b23ef59 },
       { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
-        0xddf49a4e,0x4dd71534 },
-      0 },
+        0xddf49a4e,0x4dd71534 } },
     /* 202 */
     { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
         0x2f4a4dbb,0xfd317000 },
       { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
-        0x9569f365,0x14fac58c },
-      0 },
+        0x9569f365,0x14fac58c } },
     /* 203 */
     { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
         0x36abda50,0xed7c7651 },
       { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
-        0x4d2e9f53,0xfefcb7f7 },
-      0 },
+        0x4d2e9f53,0xfefcb7f7 } },
     /* 204 */
     { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
         0x87e0d80b,0x1801a57e },
       { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
-        0x1ead1064,0x9f8fc11e },
-      0 },
+        0x1ead1064,0x9f8fc11e } },
     /* 205 */
     { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
         0x3d3a69a9,0xa9d3809d },
       { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
-        0xe1178ef7,0x3006b9ae },
-      0 },
+        0xe1178ef7,0x3006b9ae } },
     /* 206 */
     { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
         0x45f8f761,0x0ab85fd7 },
       { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
-        0x11e942c2,0xb122d675 },
-      0 },
+        0x11e942c2,0xb122d675 } },
     /* 207 */
     { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
         0x097dbaec,0x9f599dc1 },
       { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
-        0x8a294b78,0x7d5528e0 },
-      0 },
+        0x8a294b78,0x7d5528e0 } },
     /* 208 */
     { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
         0x303f1730,0x28ccea01 },
       { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
-        0xa1d013bf,0xc18baf48 },
-      0 },
+        0xa1d013bf,0xc18baf48 } },
     /* 209 */
     { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
         0xb7a9596b,0x9def809d },
       { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
-        0x68808ce5,0x0357f8b0 },
-      0 },
+        0x68808ce5,0x0357f8b0 } },
     /* 210 */
     { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
         0x1b489887,0xe4a01add },
       { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
-        0xce10cc30,0x466d7d79 },
-      0 },
+        0xce10cc30,0x466d7d79 } },
     /* 211 */
     { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
         0x451ead1a,0xc672a522 },
       { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
-        0xf2a67513,0x5e3d64fa },
-      0 },
+        0xf2a67513,0x5e3d64fa } },
     /* 212 */
     { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
         0xeb8e42fc,0x6c8a7a95 },
       { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
-        0xad82ca91,0x348ae422 },
-      0 },
+        0xad82ca91,0x348ae422 } },
     /* 213 */
     { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
         0xd9ef2d2e,0xc1074de0 },
       { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
-        0xc9e54ffc,0xfbadfbdb },
-      0 },
+        0xc9e54ffc,0xfbadfbdb } },
     /* 214 */
     { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
         0x83716fcd,0xb7f976b4 },
       { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
-        0xcafcc805,0xf4d41b2e },
-      0 },
+        0xcafcc805,0xf4d41b2e } },
     /* 215 */
     { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
         0xe0160f10,0x180824ea },
       { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
-        0x83cf6d25,0x67e5f639 },
-      0 },
+        0x83cf6d25,0x67e5f639 } },
     /* 216 */
     { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
         0x04c11fc6,0x9fef789a },
       { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
-        0xa99c4e20,0xbc80c181 },
-      0 },
+        0xa99c4e20,0xbc80c181 } },
     /* 217 */
     { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
         0x9f8cdf10,0x49270e62 },
       { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
-        0x61372f7f,0xd2ee52f9 },
-      0 },
+        0x61372f7f,0xd2ee52f9 } },
     /* 218 */
     { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
         0xe5abb733,0xdfb478be },
       { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
-        0x08df473a,0xd9a140b4 },
-      0 },
+        0x08df473a,0xd9a140b4 } },
     /* 219 */
     { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
         0x623f4b1a,0x760c058d },
       { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
-        0x8f190409,0x7141982d },
-      0 },
+        0x8f190409,0x7141982d } },
     /* 220 */
     { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
         0x89d54e47,0x3af9d1ce },
       { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
-        0x73957dd6,0xb1f815c3 },
-      0 },
+        0x73957dd6,0xb1f815c3 } },
     /* 221 */
     { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
         0x1543f052,0xa41aed14 },
       { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
-        0x86fb60ef,0xd6e9c1dd },
-      0 },
+        0x86fb60ef,0xd6e9c1dd } },
     /* 222 */
     { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
         0xae9bf8c2,0x9c9c6e10 },
       { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
-        0x40fa61b6,0x566bd596 },
-      0 },
+        0x40fa61b6,0x566bd596 } },
     /* 223 */
     { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
         0xf525345e,0xcf2c7390 },
       { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
-        0x8aa20979,0x02f51755 },
-      0 },
+        0x8aa20979,0x02f51755 } },
     /* 224 */
     { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
         0xe8d4d97d,0x14e9ada5 },
       { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
-        0x8e9d9ae8,0xa0ad4fab },
-      0 },
+        0x8e9d9ae8,0xa0ad4fab } },
     /* 225 */
     { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
         0x6e56ed1e,0xbcd530b8 },
       { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
-        0x6979341d,0x909283cf },
-      0 },
+        0x6979341d,0x909283cf } },
     /* 226 */
     { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
         0xace1549a,0x35eeb7c9 },
       { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
-        0x448ae864,0x9a8b2cf4 },
-      0 },
+        0x448ae864,0x9a8b2cf4 } },
     /* 227 */
     { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
         0xd4491379,0x6bdb60f4 },
       { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
-        0x94ba08a9,0x01ec3cfd },
-      0 },
+        0x94ba08a9,0x01ec3cfd } },
     /* 228 */
     { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
         0x475464f6,0xd1acb1c0 },
       { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
-        0x405626c2,0x7dcd079d },
-      0 },
+        0x405626c2,0x7dcd079d } },
     /* 229 */
     { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
         0x377d19b8,0x0bf53589 },
       { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
-        0xe16686fc,0xd28be4d9 },
-      0 },
+        0xe16686fc,0xd28be4d9 } },
     /* 230 */
     { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
         0x510f88ce,0xd76007aa },
       { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
-        0xb303bb01,0xf2b52f68 },
-      0 },
+        0xb303bb01,0xf2b52f68 } },
     /* 231 */
     { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
         0xcc5aed3a,0xd8dbe98e },
       { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
-        0xee559705,0xe01593a3 },
-      0 },
+        0xee559705,0xe01593a3 } },
     /* 232 */
     { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
         0xaeb8ef06,0xafec07b1 },
       { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
-        0x6e2dbfdd,0xa71b9354 },
-      0 },
+        0x6e2dbfdd,0xa71b9354 } },
     /* 233 */
     { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
         0x628523d9,0x53a2005c },
       { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
-        0x3d588e3d,0xbf47d19b },
-      0 },
+        0x3d588e3d,0xbf47d19b } },
     /* 234 */
     { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
         0x39c9a1b6,0x001c2c7f },
       { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
-        0x86ffb99b,0xfdadf8e7 },
-      0 },
+        0x86ffb99b,0xfdadf8e7 } },
     /* 235 */
     { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
         0x5aa43c94,0x3a838e4d },
       { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
-        0x873e1da3,0x3cdb8257 },
-      0 },
+        0x873e1da3,0x3cdb8257 } },
     /* 236 */
     { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
         0xf1f57fba,0x5a60cc89 },
       { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
-        0xdbfd8fc0,0x922ff56f },
-      0 },
+        0xdbfd8fc0,0x922ff56f } },
     /* 237 */
     { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
         0xf6c5cd62,0x72919a7d },
       { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
-        0x3624089a,0x5e791780 },
-      0 },
+        0x3624089a,0x5e791780 } },
     /* 238 */
     { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
         0xe24c2fab,0x4e0a5371 },
       { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
-        0xd56604ee,0xf5ff7818 },
-      0 },
+        0xd56604ee,0xf5ff7818 } },
     /* 239 */
     { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
         0x533f5e64,0xe41df0e9 },
       { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
-        0xac4f155f,0x8edd7d6e },
-      0 },
+        0xac4f155f,0x8edd7d6e } },
     /* 240 */
     { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
         0xed8aee96,0x1432c1ca },
       { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
-        0x5ac8d2c6,0xcaef480b },
-      0 },
+        0x5ac8d2c6,0xcaef480b } },
     /* 241 */
     { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
         0x8efae236,0xd0ba177e },
       { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
-        0x1c54ae16,0xf31c957c },
-      0 },
+        0x1c54ae16,0xf31c957c } },
     /* 242 */
     { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
         0x96e17c3a,0x013404cb },
       { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
-        0x91933e6c,0x6f377c4b },
-      0 },
+        0x91933e6c,0x6f377c4b } },
     /* 243 */
     { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
         0xd2d09506,0x6dba3e4e },
       { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
-        0x3becf4a7,0xf13cf342 },
-      0 },
+        0x3becf4a7,0xf13cf342 } },
     /* 244 */
     { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
         0x274bbad3,0xc83fa9a9 },
       { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
-        0x5d702683,0xb49d70f4 },
-      0 },
+        0x5d702683,0xb49d70f4 } },
     /* 245 */
     { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
         0x0c30f1cf,0x59cfadbb },
       { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
-        0x354a4b67,0x5babf362 },
-      0 },
+        0x354a4b67,0x5babf362 } },
     /* 246 */
     { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
         0x9026c8f0,0x6188c6a7 },
       { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
-        0xdf50b9d9,0x993fe475 },
-      0 },
+        0xdf50b9d9,0x993fe475 } },
     /* 247 */
     { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
         0x4c80616b,0x81f76466 },
       { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
-        0x5fe9060d,0x564a812a },
-      0 },
+        0x5fe9060d,0x564a812a } },
     /* 248 */
     { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
         0x00e51d6c,0x226bf3cf },
       { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
-        0xff257836,0x68779f47 },
-      0 },
+        0xff257836,0x68779f47 } },
     /* 249 */
     { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
         0xeb092e0b,0x97bcb0d1 },
       { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
-        0x0a784655,0xa872ffe8 },
-      0 },
+        0x0a784655,0xa872ffe8 } },
     /* 250 */
     { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
         0xb732a36a,0x02812bfc },
       { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
-        0xfe5396af,0x07391cc9 },
-      0 },
+        0xfe5396af,0x07391cc9 } },
     /* 251 */
     { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
         0x7e6d2a08,0x355d2adc },
       { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
-        0x7c2a3a79,0x3dc2b1e3 },
-      0 },
+        0x7c2a3a79,0x3dc2b1e3 } },
     /* 252 */
     { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
         0x3ccd846b,0xc4786910 },
       { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
-        0xd5bb4d32,0xccc42968 },
-      0 },
+        0xd5bb4d32,0xccc42968 } },
     /* 253 */
     { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
         0xaa4871cf,0xe147eb42 },
       { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
-        0x080e96e3,0x239ac047 },
-      0 },
+        0x080e96e3,0x239ac047 } },
     /* 254 */
     { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
         0xf5f7e59d,0xc55fa1a3 },
       { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
-        0xd4f4b699,0x094cd99c },
-      0 },
+        0xd4f4b699,0x094cd99c } },
     /* 255 */
     { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
         0x42abad33,0xb90a30b6 },
       { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
-        0x1b7924f7,0x019f8b9a },
-      0 },
+        0x1b7924f7,0x019f8b9a } },
 };
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -22037,7 +78565,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
@@ -22047,7 +78575,7 @@
 #endif
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * r     Resulting point.
@@ -22057,23 +78585,22 @@
  */
 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[8];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     k = kd;
@@ -22081,26 +78608,24 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 8, km);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_8(point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_8(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
 
     return err;
 }
 
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
 /* Returns 1 if the number of zero.
  * Implementation is constant time.
  *
@@ -22112,10 +78637,9 @@
     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
 }
 
-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 /* Add 1 to a. (a = a + 1)
  *
- * r  A single precision integer.
  * a  A single precision integer.
  */
 static void sp_256_add_one_8(sp_digit* a)
@@ -22151,33 +78675,38 @@
     );
 }
 
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 24) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
             r[j] &= 0xffffffff;
-            s = 32 - s;
-            if (j + 1 >= max)
+            s = 32U - s;
+            if (j + 1 >= size) {
                 break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Generates a scalar that is in the range 1..order-1.
@@ -22195,7 +78724,7 @@
     do {
         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
         if (err == 0) {
-            sp_256_from_bin(k, 8, buf, sizeof(buf));
+            sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
             if (sp_256_cmp_8(k, p256_order2) < 0) {
                 sp_256_add_one_8(k);
                 break;
@@ -22218,87 +78747,80 @@
  */
 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[8];
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point inf;
-#endif
-#endif
-    sp_point* point;
+    sp_point_256 inf;
+#endif
+#endif
+    sp_point_256* point;
     sp_digit* k = NULL;
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point* infinity;
+    sp_point_256* infinity;
 #endif
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
+    err = sp_256_point_new_8(heap, p, point);
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, inf, infinity);
-#endif
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     k = kd;
 #endif
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_ecc_gen_k_8(rng, k);
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_8(point, k, 1, NULL);
-        else
-#endif
+    }
+    if (err == MP_OKAY) {
             err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
     }
 
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            err = sp_256_ecc_mulmod_avx2_8(infinity, point, p256_order, 1,
-                                                                          NULL);
-        }
-        else
-#endif
             err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
     }
     if (err == MP_OKAY) {
-        if (!sp_256_iszero_8(point->x) || !sp_256_iszero_8(point->y))
+        if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
             err = ECC_INF_E;
-    }
-#endif
-
-    if (err == MP_OKAY)
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(k, priv);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_8(point, pub);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_ecc_point_free(infinity, 1, heap);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    sp_256_point_free_8(infinity, 1, heap);
+#endif
+    sp_256_point_free_8(point, 1, heap);
 
     return err;
 }
 
 #ifdef HAVE_ECC_DHE
-/* Write r as big endian to byte aray.
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
  *
  * r  A single precision integer.
@@ -22312,19 +78834,26 @@
     a[j] = 0;
     for (i=0; i<8 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
                 break;
+            }
         }
         s = 8 - (b - 32);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -22343,25 +78872,25 @@
 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
                           word32* outLen, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[8];
 #endif
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 32)
+
+    if (*outLen < 32U) {
         err = BUFFER_E;
-
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -22372,11 +78901,6 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 8, priv);
         sp_256_point_from_ecc_point_8(point, pub);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_8(point, point, k, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -22384,11 +78908,12 @@
         *outLen = 32;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
 
     return err;
 }
@@ -22404,15 +78929,15 @@
  */
 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-    sp_digit tmp[16];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #64\n\t"
         "mov	r5, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
         "mov	r8, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #28\n\t"
+        "it	cc\n\t"
         "movcc	r3, #0\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -22429,20 +78954,31 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #56\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #else
@@ -22454,16 +78990,15 @@
  */
 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #32\n\t"
         "mov	r10, #0\n\t"
         "#  A[0] * B[0]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #0]\n\t"
         "umull	r3, r4, r8, r9\n\t"
         "mov	r5, #0\n\t"
-        "str	r3, [%[tmp]]\n\t"
+        "str	r3, [sp]\n\t"
         "#  A[0] * B[1]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #4]\n\t"
@@ -22478,7 +79013,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #4]\n\t"
+        "str	r4, [sp, #4]\n\t"
         "#  A[0] * B[2]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #8]\n\t"
@@ -22500,7 +79035,7 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[tmp], #8]\n\t"
+        "str	r5, [sp, #8]\n\t"
         "#  A[0] * B[3]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #12]\n\t"
@@ -22529,7 +79064,7 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[tmp], #12]\n\t"
+        "str	r3, [sp, #12]\n\t"
         "#  A[0] * B[4]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #16]\n\t"
@@ -22565,7 +79100,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #16]\n\t"
+        "str	r4, [sp, #16]\n\t"
         "#  A[0] * B[5]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #20]\n\t"
@@ -22608,7 +79143,7 @@
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
         "adc	r4, r4, r10\n\t"
-        "str	r5, [%[tmp], #20]\n\t"
+        "str	r5, [sp, #20]\n\t"
         "#  A[0] * B[6]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #24]\n\t"
@@ -22658,7 +79193,7 @@
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
         "adc	r5, r5, r10\n\t"
-        "str	r3, [%[tmp], #24]\n\t"
+        "str	r3, [sp, #24]\n\t"
         "#  A[0] * B[7]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
         "ldr	r9, [%[b], #28]\n\t"
@@ -22715,7 +79250,7 @@
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
         "adc	r3, r3, r10\n\t"
-        "str	r4, [%[tmp], #28]\n\t"
+        "str	r4, [sp, #28]\n\t"
         "#  A[1] * B[7]\n\t"
         "ldr	r8, [%[a], #4]\n\t"
         "ldr	r9, [%[b], #28]\n\t"
@@ -22919,17 +79454,30 @@
         "adc	r3, r3, r7\n\t"
         "str	r5, [%[r], #56]\n\t"
         "str	r3, [%[r], #60]\n\t"
+        "ldr	r3, [sp, #0]\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r5, [sp, #8]\n\t"
+        "ldr	r6, [sp, #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [sp, #16]\n\t"
+        "ldr	r4, [sp, #20]\n\t"
+        "ldr	r5, [sp, #24]\n\t"
+        "ldr	r6, [sp, #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "add	sp, sp, #32\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#ifdef HAVE_INTEL_AVX2
-#endif /* HAVE_INTEL_AVX2 */
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 #ifdef WOLFSSL_SP_SMALL
@@ -23034,7 +79582,7 @@
  * b  A single precision digit.
  */
 static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
+        sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
@@ -23051,7 +79599,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], r9]\n\t"
         "mov	r3, r4\n\t"
         "mov	r4, r5\n\t"
@@ -23078,7 +79626,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #4]\n\t"
         "# A[2] * B\n\t"
         "ldr	r8, [%[a], #8]\n\t"
@@ -23086,7 +79634,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #8]\n\t"
         "# A[3] * B\n\t"
         "ldr	r8, [%[a], #12]\n\t"
@@ -23094,7 +79642,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #12]\n\t"
         "# A[4] * B\n\t"
         "ldr	r8, [%[a], #16]\n\t"
@@ -23102,7 +79650,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r4, r4, r6\n\t"
         "adcs	r5, r5, r7\n\t"
-        "adc	r3, r10, r10\n\t"
+        "adc	r3, r3, r10\n\t"
         "str	r4, [%[r], #16]\n\t"
         "# A[5] * B\n\t"
         "ldr	r8, [%[a], #20]\n\t"
@@ -23110,7 +79658,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r5, r5, r6\n\t"
         "adcs	r3, r3, r7\n\t"
-        "adc	r4, r10, r10\n\t"
+        "adc	r4, r4, r10\n\t"
         "str	r5, [%[r], #20]\n\t"
         "# A[6] * B\n\t"
         "ldr	r8, [%[a], #24]\n\t"
@@ -23118,7 +79666,7 @@
         "umull	r6, r7, %[b], r8\n\t"
         "adds	r3, r3, r6\n\t"
         "adcs	r4, r4, r7\n\t"
-        "adc	r5, r10, r10\n\t"
+        "adc	r5, r5, r10\n\t"
         "str	r3, [%[r], #24]\n\t"
         "# A[7] * B\n\t"
         "ldr	r8, [%[a], #28]\n\t"
@@ -23198,13 +79746,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_256_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
+static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<8; i++)
+    for (i=0; i<8; i++) {
         r[i] = a[i] & m;
+    }
 #else
     r[0] = a[0] & m;
     r[1] = a[1] & m;
@@ -23226,7 +79775,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_256_div_8(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[16], t2[9];
@@ -23235,6 +79784,7 @@
 
     (void)m;
 
+
     div = d[7];
     XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
     for (i=7; i>=0; i--) {
@@ -23250,7 +79800,7 @@
     }
 
     r1 = sp_256_cmp_8(t1, d) >= 0;
-    sp_256_cond_sub_8(r, t1, t2, (sp_digit)0 - r1);
+    sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -23262,7 +79812,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_256_mod_8(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_256_div_8(a, m, NULL, r);
 }
@@ -23277,9 +79827,8 @@
  */
 static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[16];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #64\n\t"
         "mov	r12, #0\n\t"
         "mov	r6, #0\n\t"
         "mov	r7, #0\n\t"
@@ -23287,6 +79836,7 @@
         "mov	r5, #0\n\t"
         "\n1:\n\t"
         "subs	r3, r5, #28\n\t"
+        "it	cc\n\t"
         "movcc	r3, r12\n\t"
         "sub	r4, r5, r3\n\t"
         "\n2:\n\t"
@@ -23318,20 +79868,31 @@
         "cmp	r3, r5\n\t"
         "ble	2b\n\t"
         "\n3:\n\t"
-        "str	r6, [%[r], r5]\n\t"
+        "str	r6, [sp, r5]\n\t"
         "mov	r6, r7\n\t"
         "mov	r7, r8\n\t"
         "mov	r8, #0\n\t"
         "add	r5, r5, #4\n\t"
         "cmp	r5, #56\n\t"
         "ble	1b\n\t"
-        "str	r6, [%[r], r5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a)
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #else
@@ -23342,15 +79903,14 @@
  */
 static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
+    __asm__ __volatile__ (
+        "sub	sp, sp, #32\n\t"
         "mov	r14, #0\n\t"
         "#  A[0] * A[0]\n\t"
         "ldr	r10, [%[a], #0]\n\t"
         "umull	r8, r3, r10, r10\n\t"
         "mov	r4, #0\n\t"
-        "str	r8, [%[tmp]]\n\t"
+        "str	r8, [sp]\n\t"
         "#  A[0] * A[1]\n\t"
         "ldr	r10, [%[a], #4]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23361,7 +79921,7 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "str	r3, [%[tmp], #4]\n\t"
+        "str	r3, [sp, #4]\n\t"
         "#  A[0] * A[2]\n\t"
         "ldr	r10, [%[a], #8]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23378,7 +79938,7 @@
         "adds	r4, r4, r8\n\t"
         "adcs	r2, r2, r9\n\t"
         "adc	r3, r3, r14\n\t"
-        "str	r4, [%[tmp], #8]\n\t"
+        "str	r4, [sp, #8]\n\t"
         "#  A[0] * A[3]\n\t"
         "ldr	r10, [%[a], #12]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23399,7 +79959,7 @@
         "adds	r2, r2, r8\n\t"
         "adcs	r3, r3, r9\n\t"
         "adc	r4, r4, r14\n\t"
-        "str	r2, [%[tmp], #12]\n\t"
+        "str	r2, [sp, #12]\n\t"
         "#  A[0] * A[4]\n\t"
         "ldr	r10, [%[a], #16]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23426,7 +79986,7 @@
         "adds	r3, r3, r8\n\t"
         "adcs	r4, r4, r9\n\t"
         "adc	r2, r2, r14\n\t"
-        "str	r3, [%[tmp], #16]\n\t"
+        "str	r3, [sp, #16]\n\t"
         "#  A[0] * A[5]\n\t"
         "ldr	r10, [%[a], #20]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23453,7 +80013,7 @@
         "adds	r4, r4, r5\n\t"
         "adcs	r2, r2, r6\n\t"
         "adc	r3, r3, r7\n\t"
-        "str	r4, [%[tmp], #20]\n\t"
+        "str	r4, [sp, #20]\n\t"
         "#  A[0] * A[6]\n\t"
         "ldr	r10, [%[a], #24]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23486,7 +80046,7 @@
         "adds	r2, r2, r5\n\t"
         "adcs	r3, r3, r6\n\t"
         "adc	r4, r4, r7\n\t"
-        "str	r2, [%[tmp], #24]\n\t"
+        "str	r2, [sp, #24]\n\t"
         "#  A[0] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #0]\n\t"
@@ -23520,7 +80080,7 @@
         "adds	r3, r3, r5\n\t"
         "adcs	r4, r4, r6\n\t"
         "adc	r2, r2, r7\n\t"
-        "str	r3, [%[tmp], #28]\n\t"
+        "str	r3, [sp, #28]\n\t"
         "#  A[1] * A[7]\n\t"
         "ldr	r10, [%[a], #28]\n\t"
         "ldr	r8, [%[a], #4]\n\t"
@@ -23664,25 +80224,40 @@
         "adc	r2, r2, r9\n\t"
         "str	r4, [%[r], #56]\n\t"
         "str	r2, [%[r], #60]\n\t"
+        "ldr	r2, [sp, #0]\n\t"
+        "ldr	r3, [sp, #4]\n\t"
+        "ldr	r4, [sp, #8]\n\t"
+        "ldr	r8, [sp, #12]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r8, [%[r], #12]\n\t"
+        "ldr	r2, [sp, #16]\n\t"
+        "ldr	r3, [sp, #20]\n\t"
+        "ldr	r4, [sp, #24]\n\t"
+        "ldr	r8, [sp, #28]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r8, [%[r], #28]\n\t"
+        "add	sp, sp, #32\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+        : [r] "r" (r), [a] "r" (a)
         : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
     );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
 }
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Order-2 for the P256 curve. */
-static const uint32_t p256_order_2[8] = {
-    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
-    0x00000000,0xffffffff
+static const uint32_t p256_order_minus_2[8] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+    0x00000000U,0xffffffffU
 };
 #else
 /* The low half of the order-2 of the P256 curve. */
 static const uint32_t p256_order_low[4] = {
-    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
 };
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -23692,10 +80267,10 @@
  * a  First operand of the multiplication.
  * b  Second operand of the multiplication.
  */
-static void sp_256_mont_mul_order_8(sp_digit* r, sp_digit* a, sp_digit* b)
+static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
     sp_256_mul_8(r, a, b);
-    sp_256_mont_reduce_8(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
 }
 
 /* Square number mod the order of P256 curve. (r = a * a mod order)
@@ -23703,10 +80278,10 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_order_8(sp_digit* r, sp_digit* a)
+static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
 {
     sp_256_sqr_8(r, a);
-    sp_256_mont_reduce_8(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
 }
 
 #ifndef WOLFSSL_SP_SMALL
@@ -23716,13 +80291,14 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_n_order_8(sp_digit* r, sp_digit* a, int n)
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
 {
     int i;
 
     sp_256_mont_sqr_order_8(r, a);
-    for (i=1; i<n; i++)
+    for (i=1; i<n; i++) {
         sp_256_mont_sqr_order_8(r, r);
+    }
 }
 #endif /* !WOLFSSL_SP_SMALL */
 
@@ -23733,7 +80309,7 @@
  * a   Number to invert.
  * td  Temporary data.
  */
-static void sp_256_mont_inv_order_8(sp_digit* r, sp_digit* a,
+static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
         sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
@@ -23743,10 +80319,11 @@
     XMEMCPY(t, a, sizeof(sp_digit) * 8);
     for (i=254; i>=0; i--) {
         sp_256_mont_sqr_order_8(t, t);
-        if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
+        if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_8(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 8);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 8U);
 #else
     sp_digit* t = td;
     sp_digit* t2 = td + 2 * 8;
@@ -23784,8 +80361,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
     for (i=127; i>=112; i--) {
         sp_256_mont_sqr_order_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_8(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
     sp_256_mont_sqr_n_order_8(t2, t2, 4);
@@ -23793,8 +80371,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
     for (i=107; i>=64; i--) {
         sp_256_mont_sqr_order_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_8(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
     sp_256_mont_sqr_n_order_8(t2, t2, 4);
@@ -23802,8 +80381,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
     for (i=59; i>=32; i--) {
         sp_256_mont_sqr_order_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_8(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
     sp_256_mont_sqr_n_order_8(t2, t2, 4);
@@ -23811,8 +80391,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
     for (i=27; i>=0; i--) {
         sp_256_mont_sqr_order_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_8(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
     sp_256_mont_sqr_n_order_8(t2, t2, 4);
@@ -23821,143 +80402,6 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
- *
- * r  Result of the multiplication.
- * a  First operand of the multiplication.
- * b  Second operand of the multiplication.
- */
-static void sp_256_mont_mul_order_avx2_8(sp_digit* r, sp_digit* a, sp_digit* b)
-{
-    sp_256_mul_avx2_8(r, a, b);
-    sp_256_mont_reduce_avx2_8(r, p256_order, p256_mp_order);
-}
-
-/* Square number mod the order of P256 curve. (r = a * a mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_order_avx2_8(sp_digit* r, sp_digit* a)
-{
-    sp_256_sqr_avx2_8(r, a);
-    sp_256_mont_reduce_avx2_8(r, p256_order, p256_mp_order);
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square number mod the order of P256 curve a number of times.
- * (r = a ^ n mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_n_order_avx2_8(sp_digit* r, sp_digit* a, int n)
-{
-    int i;
-
-    sp_256_mont_sqr_order_avx2_8(r, a);
-    for (i=1; i<n; i++)
-        sp_256_mont_sqr_order_avx2_8(r, r);
-}
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
- * (r = 1 / a mod order)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_order_avx2_8(sp_digit* r, sp_digit* a,
-        sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 8);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_8(t, t);
-        if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_8(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 8);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 8;
-    sp_digit* t3 = td + 4 * 8;
-    int i;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_order_avx2_8(t, a);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_order_avx2_8(t, t, a);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t, 2);
-    /* t3= a^f = t2 * t */
-    sp_256_mont_mul_order_avx2_8(t3, t2, t);
-    /* t2= a^f0 = t3 ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t3, 4);
-    /* t = a^ff = t2 * t3 */
-    sp_256_mont_mul_order_avx2_8(t, t2, t3);
-    /* t3= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t, 8);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_order_avx2_8(t, t2, t);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t, 16);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_8(t, t2, t);
-    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t, 64);
-    /* t2= a^ffffffff00000000ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_8(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t2, 32);
-    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_8(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
-    for (i=127; i>=112; i--) {
-        sp_256_mont_sqr_order_avx2_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_8(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_8(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
-    for (i=107; i>=64; i--) {
-        sp_256_mont_sqr_order_avx2_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_8(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_8(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
-    for (i=59; i>=32; i--) {
-        sp_256_mont_sqr_order_avx2_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_8(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_8(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
-    for (i=27; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_8(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_8(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
-    sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
-    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
-    sp_256_mont_mul_order_avx2_8(r, t2, t3);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-#endif /* HAVE_INTEL_AVX2 */
 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 #ifdef HAVE_ECC_SIGN
 #ifndef SP_ECC_MAX_SIG_GEN
@@ -23981,114 +80425,102 @@
  * MP_OKAY on success.
  */
 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
-                    mp_int* rm, mp_int* sm, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
 #else
     sp_digit ed[2*8];
     sp_digit xd[2*8];
     sp_digit kd[2*8];
     sp_digit rd[2*8];
     sp_digit td[3 * 2*8];
-    sp_point p;
+    sp_point_256 p;
 #endif
     sp_digit* e = NULL;
     sp_digit* x = NULL;
     sp_digit* k = NULL;
     sp_digit* r = NULL;
     sp_digit* tmp = NULL;
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit carry;
-    sp_digit* s;
-    sp_digit* kInv;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
     int err = MP_OKAY;
     int32_t c;
     int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            e = d + 0 * 8;
-            x = d + 2 * 8;
-            k = d + 4 * 8;
-            r = d + 6 * 8;
-            tmp = d + 8 * 8;
-        }
-        else
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
             err = MEMORY_E;
-    }
-#else
-    e = ed;
-    x = xd;
-    k = kd;
-    r = rd;
-    tmp = td;
-#endif
-    s = e;
-    kInv = k;
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(e, 8, hash, hashLen);
-        sp_256_from_mp(x, 8, priv);
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 8;
+        x = d + 2 * 8;
+        k = d + 4 * 8;
+        r = d + 6 * 8;
+        tmp = d + 8 * 8;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(e, 8, hash, (int)hashLen);
     }
 
     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 8, priv);
+
         /* New random point. */
-        err = sp_256_ecc_gen_k_8(rng, k);
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_256_ecc_gen_k_8(rng, k);
+        }
+        else {
+            sp_256_from_mp(k, 8, km);
+            mp_zero(km);
+        }
         if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                err = sp_256_ecc_mulmod_base_avx2_8(point, k, 1, heap);
-            else
-#endif
                 err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
         }
 
         if (err == MP_OKAY) {
             /* r = point->x mod order */
-            XMEMCPY(r, point->x, sizeof(sp_digit) * 8);
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
             sp_256_norm_8(r);
             c = sp_256_cmp_8(r, p256_order);
-            sp_256_cond_sub_8(r, r, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_8(r);
 
             /* Conv k to Montgomery form (mod order) */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_8(k, k, p256_norm_order);
-            else
-#endif
                 sp_256_mul_8(k, k, p256_norm_order);
             err = sp_256_mod_8(k, k, p256_order);
         }
         if (err == MP_OKAY) {
             sp_256_norm_8(k);
             /* kInv = 1/k mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_inv_order_avx2_8(kInv, k, tmp);
-            else
-#endif
                 sp_256_mont_inv_order_8(kInv, k, tmp);
             sp_256_norm_8(kInv);
 
             /* s = r * x + e */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_8(x, x, r);
-            else
-#endif
                 sp_256_mul_8(x, x, r);
             err = sp_256_mod_8(x, x, p256_order);
         }
@@ -24098,46 +80530,45 @@
             sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
             sp_256_norm_8(s);
             c = sp_256_cmp_8(s, p256_order);
-            sp_256_cond_sub_8(s, s, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_8(s);
 
             /* s = s * k^-1 mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_mul_order_avx2_8(s, s, kInv);
-            else
-#endif
                 sp_256_mont_mul_order_8(s, s, kInv);
             sp_256_norm_8(s);
 
             /* Check that signature is usable. */
-            if (!sp_256_iszero_8(s))
+            if (sp_256_iszero_8(s) == 0) {
                 break;
-        }
-    }
-
-    if (i == 0)
+            }
+        }
+    }
+
+    if (i == 0) {
         err = RNG_FAILURE_E;
-
-    if (err == MP_OKAY)
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(r, rm);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(s, sm);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    XMEMSET(e, 0, sizeof(sp_digit) * 2 * 8);
-    XMEMSET(x, 0, sizeof(sp_digit) * 2 * 8);
-    XMEMSET(k, 0, sizeof(sp_digit) * 2 * 8);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 8);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 8);
-    XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*8);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
+#endif
+    sp_256_point_free_8(point, 1, heap);
 
     return err;
 }
@@ -24167,109 +80598,104 @@
 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit u1d[2*8];
     sp_digit u2d[2*8];
     sp_digit sd[2*8];
     sp_digit tmpd[2*8 * 5];
-    sp_point p1d;
-    sp_point p2d;
-#endif
-    sp_digit* u1;
-    sp_digit* u2;
-    sp_digit* s;
-    sp_digit* tmp;
-    sp_point* p1;
-    sp_point* p2 = NULL;
+    sp_point_256 p1d;
+    sp_point_256 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* p1;
+    sp_point_256* p2 = NULL;
     sp_digit carry;
     int32_t c;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p1d, p1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p2d, p2);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 16 * 8, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            u1  = d + 0 * 8;
-            u2  = d + 2 * 8;
-            s   = d + 4 * 8;
-            tmp = d + 6 * 8;
-        }
-        else
+
+    err = sp_256_point_new_8(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
             err = MEMORY_E;
-    }
-#else
-    u1 = u1d;
-    u2 = u2d;
-    s  = sd;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(u1, 8, hash, hashLen);
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 8;
+        u2  = d + 2 * 8;
+        s   = d + 4 * 8;
+        tmp = d + 6 * 8;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(u1, 8, hash, (int)hashLen);
         sp_256_from_mp(u2, 8, r);
         sp_256_from_mp(s, 8, sm);
         sp_256_from_mp(p2->x, 8, pX);
         sp_256_from_mp(p2->y, 8, pY);
         sp_256_from_mp(p2->z, 8, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_mul_avx2_8(s, s, p256_norm_order);
-        else
-#endif
+        {
             sp_256_mul_8(s, s, p256_norm_order);
+        }
         err = sp_256_mod_8(s, s, p256_order);
     }
     if (err == MP_OKAY) {
         sp_256_norm_8(s);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_inv_order_avx2_8(s, s, tmp);
-            sp_256_mont_mul_order_avx2_8(u1, u1, s);
-            sp_256_mont_mul_order_avx2_8(u2, u2, s);
-        }
-        else
-#endif
         {
             sp_256_mont_inv_order_8(s, s, tmp);
             sp_256_mont_mul_order_8(u1, u1, s);
             sp_256_mont_mul_order_8(u2, u2, s);
         }
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_8(p1, u1, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
     }
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_8(p2, p2, u2, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
     }
 
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_8(p1, p1, p2, tmp);
-        else
-#endif
+        {
             sp_256_proj_point_add_8(p1, p1, p2, tmp);
+            if (sp_256_iszero_8(p1->z)) {
+                if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
+                    sp_256_proj_point_dbl_8(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+                }
+            }
+        }
 
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
@@ -24281,13 +80707,13 @@
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
-        *res = sp_256_cmp_8(p1->x, u1) == 0;
+        *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
         if (*res == 0) {
             /* Reload r and add order. */
             sp_256_from_mp(u2, 8, r);
             carry = sp_256_add_8(u2, u2, p256_order);
             /* Carry means result is greater than mod and is not valid. */
-            if (!carry) {
+            if (carry == 0) {
                 sp_256_norm_8(u2);
 
                 /* Compare with mod and if greater or equal then not valid. */
@@ -24299,19 +80725,19 @@
                         /* u1 = (r + 1*order).z'.z' mod prime */
                         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
                                                                   p256_mp_mod);
-                        *res = sp_256_cmp_8(p1->x, u2) == 0;
+                        *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
                     }
                 }
             }
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL)
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
 #endif
-    sp_ecc_point_free(p1, 0, heap);
-    sp_ecc_point_free(p2, 0, heap);
+    sp_256_point_free_8(p1, 0, heap);
+    sp_256_point_free_8(p2, 0, heap);
 
     return err;
 }
@@ -24325,9 +80751,9 @@
  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  * not on the curve and MP_OKAY otherwise.
  */
-static int sp_256_ecc_is_point_8(sp_point* point, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit t1d[2*8];
@@ -24337,42 +80763,46 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 8;
         t2 = d + 2 * 8;
-    }
-    else
-        err = MEMORY_E;
-#else
-    (void)heap;
-
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         sp_256_sqr_8(t1, point->y);
-        sp_256_mod_8(t1, t1, p256_mod);
+        (void)sp_256_mod_8(t1, t1, p256_mod);
         sp_256_sqr_8(t2, point->x);
-        sp_256_mod_8(t2, t2, p256_mod);
+        (void)sp_256_mod_8(t2, t2, p256_mod);
         sp_256_mul_8(t2, t2, point->x);
-        sp_256_mod_8(t2, t2, p256_mod);
-	sp_256_sub_8(t2, p256_mod, t2);
+        (void)sp_256_mod_8(t2, t2, p256_mod);
+        (void)sp_256_sub_8(t2, p256_mod, t2);
         sp_256_mont_add_8(t1, t1, t2, p256_mod);
 
         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
 
-        if (sp_256_cmp_8(t1, p256_b) != 0)
+        if (sp_256_cmp_8(t1, p256_b) != 0) {
             err = MP_VAL;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
@@ -24387,23 +80817,23 @@
  */
 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point pubd;
-#endif
-    sp_point* pub;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 pubd;
+#endif
+    sp_point_256* pub;
     byte one[1] = { 1 };
     int err;
 
-    err = sp_ecc_point_new(NULL, pubd, pub);
+    err = sp_256_point_new_8(NULL, pubd, pub);
     if (err == MP_OKAY) {
         sp_256_from_mp(pub->x, 8, pX);
         sp_256_from_mp(pub->y, 8, pY);
-        sp_256_from_bin(pub->z, 8, one, sizeof(one));
+        sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
 
         err = sp_256_ecc_is_point_8(pub, NULL);
     }
 
-    sp_ecc_point_free(pub, 0, NULL);
+    sp_256_point_free_8(pub, 0, NULL);
 
     return err;
 }
@@ -24421,50 +80851,54 @@
  */
 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit privd[8];
-    sp_point pubd;
-    sp_point pd;
+    sp_point_256 pubd;
+    sp_point_256 pd;
 #endif
     sp_digit* priv = NULL;
-    sp_point* pub;
-    sp_point* p = NULL;
+    sp_point_256* pub;
+    sp_point_256* p = NULL;
     byte one[1] = { 1 };
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, pubd, pub);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        priv = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
-        if (priv == NULL)
+
+    err = sp_256_point_new_8(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
             err = MEMORY_E;
-    }
-#else
-    priv = privd;
-#endif
-
-    if (err == MP_OKAY) {
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
         sp_256_from_mp(pub->x, 8, pX);
         sp_256_from_mp(pub->y, 8, pY);
-        sp_256_from_bin(pub->z, 8, one, sizeof(one));
+        sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
         sp_256_from_mp(priv, 8, privm);
 
         /* Check point at infinitiy. */
-        if (sp_256_iszero_8(pub->x) &&
-            sp_256_iszero_8(pub->y))
+        if ((sp_256_iszero_8(pub->x) != 0) &&
+            (sp_256_iszero_8(pub->y) != 0)) {
             err = ECC_INF_E;
+        }
     }
 
     if (err == MP_OKAY) {
         /* Check range of X and Y */
         if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
-            sp_256_cmp_8(pub->y, p256_mod) >= 0)
+            sp_256_cmp_8(pub->y, p256_mod) >= 0) {
             err = ECC_OUT_OF_RANGE_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -24474,28 +80908,18 @@
 
     if (err == MP_OKAY) {
         /* Point * order = infinity */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_8(p, pub, p256_order, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
     }
     if (err == MP_OKAY) {
         /* Check result is infinity */
-        if (!sp_256_iszero_8(p->x) ||
-            !sp_256_iszero_8(p->y)) {
+        if ((sp_256_iszero_8(p->x) == 0) ||
+            (sp_256_iszero_8(p->y) == 0)) {
             err = ECC_INF_E;
         }
     }
 
     if (err == MP_OKAY) {
         /* Base * private = point */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_8(p, priv, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -24506,12 +80930,13 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (priv != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(pub, 0, heap);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(pub, 0, heap);
 
     return err;
 }
@@ -24535,27 +80960,27 @@
                               mp_int* qX, mp_int* qY, mp_int* qZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 8 * 5];
-    sp_point pd;
-    sp_point qd;
+    sp_point_256 pd;
+    sp_point_256 qd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
-    sp_point* q = NULL;
+    sp_point_256* p;
+    sp_point_256* q = NULL;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(NULL, qd, q);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
+
+    err = sp_256_point_new_8(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -24569,27 +80994,26 @@
         sp_256_from_mp(q->y, 8, qY);
         sp_256_from_mp(q->z, 8, qZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_8(p, p, q, tmp);
-        else
-#endif
             sp_256_proj_point_add_8(p, p, q, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(q, 0, NULL);
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_8(q, 0, NULL);
+    sp_256_point_free_8(p, 0, NULL);
 
     return err;
 }
@@ -24608,23 +81032,22 @@
 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 8 * 2];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
+
+    err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -24635,26 +81058,25 @@
         sp_256_from_mp(p->y, 8, pY);
         sp_256_from_mp(p->z, 8, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_dbl_avx2_8(p, p, tmp);
-        else
-#endif
             sp_256_proj_point_dbl_8(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_8(p, 0, NULL);
 
     return err;
 }
@@ -24669,20 +81091,22 @@
  */
 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 8 * 4];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
 
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
+    err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -24695,18 +81119,22 @@
         sp_256_map_8(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, pX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_8(p, 0, NULL);
 
     return err;
 }
@@ -24719,7 +81147,7 @@
  */
 static int sp_256_mont_sqrt_8(sp_digit* y)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit t1d[2 * 8];
@@ -24728,58 +81156,23 @@
     sp_digit* t1;
     sp_digit* t2;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 8;
         t2 = d + 2 * 8;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            /* t2 = y ^ 0x2 */
-            sp_256_mont_sqr_avx2_8(t2, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0x3 */
-            sp_256_mont_mul_avx2_8(t1, t2, y, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xc */
-            sp_256_mont_sqr_n_avx2_8(t2, t1, 2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xf */
-            sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xf0 */
-            sp_256_mont_sqr_n_avx2_8(t2, t1, 4, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xff */
-            sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xff00 */
-            sp_256_mont_sqr_n_avx2_8(t2, t1, 8, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffff */
-            sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xffff0000 */
-            sp_256_mont_sqr_n_avx2_8(t2, t1, 16, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff */
-            sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000000 */
-            sp_256_mont_sqr_n_avx2_8(t1, t1, 32, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001 */
-            sp_256_mont_mul_avx2_8(t1, t1, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
-            sp_256_mont_sqr_n_avx2_8(t1, t1, 96, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
-            sp_256_mont_mul_avx2_8(t1, t1, y, p256_mod, p256_mp_mod);
-            sp_256_mont_sqr_n_avx2_8(y, t1, 94, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         {
             /* t2 = y ^ 0x2 */
             sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
@@ -24813,14 +81206,16 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
 }
 
+
 /* Uncompress the point given the X ordinate.
  *
  * xm    X ordinate.
@@ -24830,47 +81225,37 @@
  */
 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit xd[2 * 8];
     sp_digit yd[2 * 8];
 #endif
-    sp_digit* x;
-    sp_digit* y;
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         x = d + 0 * 8;
         y = d + 2 * 8;
-    }
-    else
-        err = MEMORY_E;
-#else
-    x = xd;
-    y = yd;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        x = xd;
+        y = yd;
+#endif
+
         sp_256_from_mp(x, 8, xm);
-
         err = sp_256_mod_mul_norm_8(x, x, p256_mod);
     }
-
     if (err == MP_OKAY) {
         /* y = x^3 */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_sqr_avx2_8(y, x, p256_mod, p256_mp_mod);
-            sp_256_mont_mul_avx2_8(y, y, x, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
         {
             sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
             sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
@@ -24888,23 +81273,7785 @@
         err = sp_256_mont_sqrt_8(y);
     }
     if (err == MP_OKAY) {
-        XMEMSET(y + 8, 0, 8 * sizeof(sp_digit));
+        XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
         sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
-        if (((y[0] ^ odd) & 1) != 0)
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
             sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
+        }
 
         err = sp_256_to_mp(y, ym);
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+    sp_digit x[2 * 12];
+    sp_digit y[2 * 12];
+    sp_digit z[2 * 12];
+    int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[12] = {
+    0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[12] = {
+    0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
+    0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[12] = {
+    0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[12] = {
+    0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[12] = {
+    0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
+    0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0xe88fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+    /* X ordinate */
+    {
+        0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
+        0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
+        0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[12] = {
+    0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
+    0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
+};
+#endif
+
+static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    int64_t* t;
+#else
+    int64_t t[12];
+#endif
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
+        t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
+        /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
+        t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
+        /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
+        t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
+        /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
+        t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
+        /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
+        t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] -  2 * (uint64_t)a[11];
+        /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
+        t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
+        /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
+        t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
+        /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
+        t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
+        /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
+        t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
+        /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
+        t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
+        /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
+        t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
+        /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
+        t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
+
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+        o     = t[11] >> 32; t[11] &= 0xffffffff;
+        t[0] += o;
+        t[1] -= o;
+        t[3] += o;
+        t[4] += o;
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+        r[0] = t[0];
+        r[1] = t[1];
+        r[2] = t[2];
+        r[3] = t[3];
+        r[4] = t[4];
+        r[5] = t[5];
+        r[6] = t[6];
+        r[7] = t[7];
+        r[8] = t[8];
+        r[9] = t[9];
+        r[10] = t[10];
+        r[11] = t[11];
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
 #endif
 
     return err;
 }
-#endif
-#endif /* WOLFSSL_SP_NO_256 */
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p   Point of type sp_point_384 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_384_from_mp(p->x, 12, pm->x);
+    sp_384_from_mp(p->y, 12, pm->y);
+    sp_384_from_mp(p->z, 12, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
+        r->used = 12;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 12; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 12; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p   Point of type sp_point_384.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_384_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #96\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #0\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "subs	r3, r5, #44\n\t"
+        "it	cc\n\t"
+        "movcc	r3, #0\n\t"
+        "sub	r4, r5, r3\n\t"
+        "\n2:\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "ldr	r12, [%[b], r4]\n\t"
+        "umull	r9, r10, r14, r12\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, #0\n\t"
+        "add	r3, r3, #4\n\t"
+        "sub	r4, r4, #4\n\t"
+        "cmp	r3, #48\n\t"
+        "beq	3f\n\t"
+        "cmp	r3, r5\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "mov	r6, r7\n\t"
+        "mov	r7, r8\n\t"
+        "mov	r8, #0\n\t"
+        "add	r5, r5, #4\n\t"
+        "cmp	r5, #88\n\t"
+        "ble	1b\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #48\n\t"
+        "mov	r10, #0\n\t"
+        "#  A[0] * B[0]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r3, r4, r8, r9\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [sp]\n\t"
+        "#  A[0] * B[1]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[0]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #4]\n\t"
+        "#  A[0] * B[2]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[1]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[0]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #8]\n\t"
+        "#  A[0] * B[3]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[2]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[1]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[0]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #12]\n\t"
+        "#  A[0] * B[4]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[3]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[2]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[1]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[0]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #16]\n\t"
+        "#  A[0] * B[5]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[4]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[3]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[2]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[1]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[0]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #20]\n\t"
+        "#  A[0] * B[6]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[5]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[4]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[3]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[2]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[1]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[0]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #24]\n\t"
+        "#  A[0] * B[7]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[6]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[5]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[4]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[3]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[2]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[1]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[0]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #28]\n\t"
+        "#  A[0] * B[8]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[7]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[6]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[5]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[4]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[3]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[2]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[1]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[0]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #32]\n\t"
+        "#  A[0] * B[9]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[1] * B[8]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[2] * B[7]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[6]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[5]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[4]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[3]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[2]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[1]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[0]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [sp, #36]\n\t"
+        "#  A[0] * B[10]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[1] * B[9]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[2] * B[8]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[3] * B[7]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[6]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[5]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[4]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[3]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[2]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[1]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[0]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [sp, #40]\n\t"
+        "#  A[0] * B[11]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[1] * B[10]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[2] * B[9]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[3] * B[8]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[4] * B[7]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[6]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[5]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[4]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[3]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[2]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[1]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[0]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #0]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [sp, #44]\n\t"
+        "#  A[1] * B[11]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[2] * B[10]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[3] * B[9]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[4] * B[8]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[5] * B[7]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[6]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[5]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[4]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[3]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[2]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[1]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "#  A[2] * B[11]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[3] * B[10]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[4] * B[9]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[5] * B[8]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[6] * B[7]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[6]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[5]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[4]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[3]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[2]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "#  A[3] * B[11]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[4] * B[10]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[5] * B[9]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[6] * B[8]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[7] * B[7]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[6]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[5]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[4]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[3]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "#  A[4] * B[11]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[5] * B[10]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[6] * B[9]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[7] * B[8]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[8] * B[7]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[6]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[5]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[4]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #16]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "#  A[5] * B[11]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[6] * B[10]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[7] * B[9]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[8] * B[8]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[9] * B[7]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[6]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[5]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "#  A[6] * B[11]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[7] * B[10]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[8] * B[9]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[9] * B[8]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[10] * B[7]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[6]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #68]\n\t"
+        "#  A[7] * B[11]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[8] * B[10]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[9] * B[9]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[10] * B[8]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "#  A[11] * B[7]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #72]\n\t"
+        "#  A[8] * B[11]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r10, r10\n\t"
+        "#  A[9] * B[10]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[10] * B[9]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "#  A[11] * B[8]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #32]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "#  A[9] * B[11]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r10, r10\n\t"
+        "#  A[10] * B[10]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "#  A[11] * B[9]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #80]\n\t"
+        "#  A[10] * B[11]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r10, r10\n\t"
+        "#  A[11] * B[10]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #84]\n\t"
+        "#  A[11] * B[11]\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "umull	r6, r7, r8, r9\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r7\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "str	r5, [%[r], #92]\n\t"
+        "ldr	r3, [sp, #0]\n\t"
+        "ldr	r4, [sp, #4]\n\t"
+        "ldr	r5, [sp, #8]\n\t"
+        "ldr	r6, [sp, #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [sp, #16]\n\t"
+        "ldr	r4, [sp, #20]\n\t"
+        "ldr	r5, [sp, #24]\n\t"
+        "ldr	r6, [sp, #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r3, [sp, #32]\n\t"
+        "ldr	r4, [sp, #36]\n\t"
+        "ldr	r5, [sp, #40]\n\t"
+        "ldr	r6, [sp, #44]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "str	r5, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "add	sp, sp, #48\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r9, #0\n\t"
+        "mov	r8, #0\n\t"
+        "1:\n\t"
+        "subs	%[c], r9, %[c]\n\t"
+        "ldr	r4, [%[a], r8]\n\t"
+        "ldr	r5, [%[b], r8]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        "str	r4, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, #48\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#else
+    __asm__ __volatile__ (
+
+        "mov	r9, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "subs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r6, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r6, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "sbcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#endif /* WOLFSSL_SP_SMALL */
+
+    return c;
+}
+
+#define sp_384_mont_reduce_order_12   sp_384_mont_reduce_12
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "# i = 0\n\t"
+        "mov	r12, #0\n\t"
+        "ldr	r10, [%[a], #0]\n\t"
+        "ldr	r14, [%[a], #4]\n\t"
+        "\n1:\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mul	r8, %[mp], r10\n\t"
+        "# a[i+0] += m[0] * mu\n\t"
+        "ldr	r7, [%[m], #0]\n\t"
+        "ldr	r9, [%[a], #0]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r10, r10, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "# a[i+1] += m[1] * mu\n\t"
+        "ldr	r7, [%[m], #4]\n\t"
+        "ldr	r9, [%[a], #4]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r10, r14, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r10, r10, r5\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+2] += m[2] * mu\n\t"
+        "ldr	r7, [%[m], #8]\n\t"
+        "ldr	r14, [%[a], #8]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r14, r14, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r14, r14, r4\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+3] += m[3] * mu\n\t"
+        "ldr	r7, [%[m], #12]\n\t"
+        "ldr	r9, [%[a], #12]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #12]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+4] += m[4] * mu\n\t"
+        "ldr	r7, [%[m], #16]\n\t"
+        "ldr	r9, [%[a], #16]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #16]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+5] += m[5] * mu\n\t"
+        "ldr	r7, [%[m], #20]\n\t"
+        "ldr	r9, [%[a], #20]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #20]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+6] += m[6] * mu\n\t"
+        "ldr	r7, [%[m], #24]\n\t"
+        "ldr	r9, [%[a], #24]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #24]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+7] += m[7] * mu\n\t"
+        "ldr	r7, [%[m], #28]\n\t"
+        "ldr	r9, [%[a], #28]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #28]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+8] += m[8] * mu\n\t"
+        "ldr	r7, [%[m], #32]\n\t"
+        "ldr	r9, [%[a], #32]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #32]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+9] += m[9] * mu\n\t"
+        "ldr	r7, [%[m], #36]\n\t"
+        "ldr	r9, [%[a], #36]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r4, r7, #0\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #36]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "# a[i+10] += m[10] * mu\n\t"
+        "ldr	r7, [%[m], #40]\n\t"
+        "ldr	r9, [%[a], #40]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adc	r5, r7, #0\n\t"
+        "adds	r9, r9, r4\n\t"
+        "str	r9, [%[a], #40]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "# a[i+11] += m[11] * mu\n\t"
+        "ldr	r7, [%[m], #44]\n\t"
+        "ldr   r9, [%[a], #44]\n\t"
+        "umull	r6, r7, r8, r7\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r7, r7, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        "adc	%[ca], %[ca], %[ca]\n\t"
+        "adds	r9, r9, r5\n\t"
+        "str	r9, [%[a], #44]\n\t"
+        "ldr	r9, [%[a], #48]\n\t"
+        "adcs	r9, r9, r7\n\t"
+        "str	r9, [%[a], #48]\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "# i += 1\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	r12, r12, #4\n\t"
+        "cmp	r12, #48\n\t"
+        "blt	1b\n\t"
+        "str	r10, [%[a], #0]\n\t"
+        "str	r14, [%[a], #4]\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mul_12(r, a, b);
+    sp_384_mont_reduce_12(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #96\n\t"
+        "mov	r12, #0\n\t"
+        "mov	r6, #0\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r8, #0\n\t"
+        "mov	r5, #0\n\t"
+        "\n1:\n\t"
+        "subs	r3, r5, #44\n\t"
+        "it	cc\n\t"
+        "movcc	r3, r12\n\t"
+        "sub	r4, r5, r3\n\t"
+        "\n2:\n\t"
+        "cmp	r4, r3\n\t"
+        "beq	4f\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "ldr	r9, [%[a], r4]\n\t"
+        "umull	r9, r10, r14, r9\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "ldr	r14, [%[a], r3]\n\t"
+        "umull	r9, r10, r14, r14\n\t"
+        "adds	r6, r6, r9\n\t"
+        "adcs	r7, r7, r10\n\t"
+        "adc	r8, r8, r12\n\t"
+        "\n5:\n\t"
+        "add	r3, r3, #4\n\t"
+        "sub	r4, r4, #4\n\t"
+        "cmp	r3, #48\n\t"
+        "beq	3f\n\t"
+        "cmp	r3, r4\n\t"
+        "bgt	3f\n\t"
+        "cmp	r3, r5\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "mov	r6, r7\n\t"
+        "mov	r7, r8\n\t"
+        "mov	r8, #0\n\t"
+        "add	r5, r5, #4\n\t"
+        "cmp	r5, #88\n\t"
+        "ble	1b\n\t"
+        "str	r6, [sp, r5]\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [sp, #0]\n\t"
+        "ldr	r7, [sp, #4]\n\t"
+        "ldr	r8, [sp, #8]\n\t"
+        "ldr	r3, [sp, #12]\n\t"
+        "str	r6, [%[r], #0]\n\t"
+        "str	r7, [%[r], #4]\n\t"
+        "str	r8, [%[r], #8]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "add	sp, sp, #16\n\t"
+        "add	%[r], %[r], #16\n\t"
+        "subs	r5, r5, #16\n\t"
+        "bgt	4b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+    );
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "sub	sp, sp, #48\n\t"
+        "mov	r14, #0\n\t"
+        "#  A[0] * A[0]\n\t"
+        "ldr	r10, [%[a], #0]\n\t"
+        "umull	r8, r3, r10, r10\n\t"
+        "mov	r4, #0\n\t"
+        "str	r8, [sp]\n\t"
+        "#  A[0] * A[1]\n\t"
+        "ldr	r10, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r14, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "str	r3, [sp, #4]\n\t"
+        "#  A[0] * A[2]\n\t"
+        "ldr	r10, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r14, r14\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "#  A[1] * A[1]\n\t"
+        "ldr	r10, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "str	r4, [sp, #8]\n\t"
+        "#  A[0] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r14, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[1] * A[2]\n\t"
+        "ldr	r10, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "str	r2, [sp, #12]\n\t"
+        "#  A[0] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r14, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "#  A[1] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "#  A[2] * A[2]\n\t"
+        "ldr	r10, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "str	r3, [sp, #16]\n\t"
+        "#  A[0] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #20]\n\t"
+        "#  A[0] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[3]\n\t"
+        "ldr	r10, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #24]\n\t"
+        "#  A[0] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #28]\n\t"
+        "#  A[0] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[4]\n\t"
+        "ldr	r10, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #32]\n\t"
+        "#  A[0] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [sp, #36]\n\t"
+        "#  A[0] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[5]\n\t"
+        "ldr	r10, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [sp, #40]\n\t"
+        "#  A[0] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #0]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[1] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[2] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [sp, #44]\n\t"
+        "#  A[1] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[2] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[3] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[6]\n\t"
+        "ldr	r10, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "#  A[2] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[3] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[4] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "#  A[3] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[4] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[5] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[7]\n\t"
+        "ldr	r10, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "#  A[4] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[5] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[6] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r2, r2, r5\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adc	r4, r4, r7\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "#  A[5] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r2, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[6] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[7] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[8]\n\t"
+        "ldr	r10, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adc	r2, r2, r7\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "#  A[6] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r5, r6, r10, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "#  A[7] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "#  A[8] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r5, r5, r8\n\t"
+        "adcs	r6, r6, r9\n\t"
+        "adc	r7, r7, r14\n\t"
+        "adds	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adc	r7, r7, r7\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r2, r2, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "#  A[7] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r14, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[8] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "#  A[9] * A[9]\n\t"
+        "ldr	r10, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "str	r2, [%[r], #72]\n\t"
+        "#  A[8] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r14, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "#  A[9] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adcs	r4, r4, r9\n\t"
+        "adc	r2, r2, r14\n\t"
+        "str	r3, [%[r], #76]\n\t"
+        "#  A[9] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r14, r14\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "#  A[10] * A[10]\n\t"
+        "ldr	r10, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r2, r2, r9\n\t"
+        "adc	r3, r3, r14\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "#  A[10] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "umull	r8, r9, r10, r8\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r14, r14\n\t"
+        "adds	r2, r2, r8\n\t"
+        "adcs	r3, r3, r9\n\t"
+        "adc	r4, r4, r14\n\t"
+        "str	r2, [%[r], #84]\n\t"
+        "#  A[11] * A[11]\n\t"
+        "ldr	r10, [%[a], #44]\n\t"
+        "umull	r8, r9, r10, r10\n\t"
+        "adds	r3, r3, r8\n\t"
+        "adc	r4, r4, r9\n\t"
+        "str	r3, [%[r], #88]\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r2, [sp, #0]\n\t"
+        "ldr	r3, [sp, #4]\n\t"
+        "ldr	r4, [sp, #8]\n\t"
+        "ldr	r8, [sp, #12]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r8, [%[r], #12]\n\t"
+        "ldr	r2, [sp, #16]\n\t"
+        "ldr	r3, [sp, #20]\n\t"
+        "ldr	r4, [sp, #24]\n\t"
+        "ldr	r8, [sp, #28]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r8, [%[r], #28]\n\t"
+        "ldr	r2, [sp, #32]\n\t"
+        "ldr	r3, [sp, #36]\n\t"
+        "ldr	r4, [sp, #40]\n\t"
+        "ldr	r8, [sp, #44]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r8, [%[r], #44]\n\t"
+        "add	sp, sp, #48\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_384_sqr_12(r, a);
+    sp_384_mont_reduce_12(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mont_sqr_12(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_384_mont_sqr_12(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+    0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 12);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
+        if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 12);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 12;
+    sp_digit* t3 = td + 4 * 12;
+    sp_digit* t4 = td + 6 * 12;
+    sp_digit* t5 = td + 8 * 12;
+
+    /* 0x2 */
+    sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
+    /* 0x3 */
+    sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
+    /* 0xc */
+    sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
+    /* 0xf */
+    sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
+    /* 0x1e */
+    sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
+    /* 0x1f */
+    sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
+    /* 0x3e0 */
+    sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
+    /* 0x3ff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x7fe0 */
+    sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
+    /* 0x7fff */
+    sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x3fff8000 */
+    sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
+    /* 0x3fffffff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffc */
+    sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
+    /* 0xfffffffd */
+    sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
+    /* 0xffffffff */
+    sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
+    /* 0xfffffffc0000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+    sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+    sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+    sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = -1;
+    sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mov	r3, #-1\n\t"
+        "mov	r6, #44\n\t"
+        "1:\n\t"
+        "ldr	r4, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r4, r4, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "subs	r6, r6, #4\n\t"
+        "bcs	1b\n\t"
+        "eor	%[r], %[r], r3\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mov	r3, #-1\n\t"
+        "ldr		r4, [%[a], #44]\n\t"
+        "ldr		r5, [%[b], #44]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #40]\n\t"
+        "ldr		r5, [%[b], #40]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #36]\n\t"
+        "ldr		r5, [%[b], #36]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #32]\n\t"
+        "ldr		r5, [%[b], #32]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #28]\n\t"
+        "ldr		r5, [%[b], #28]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #24]\n\t"
+        "ldr		r5, [%[b], #24]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #20]\n\t"
+        "ldr		r5, [%[b], #20]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #16]\n\t"
+        "ldr		r5, [%[b], #16]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #12]\n\t"
+        "ldr		r5, [%[b], #12]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #8]\n\t"
+        "ldr		r5, [%[b], #8]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #4]\n\t"
+        "ldr		r5, [%[b], #4]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "ldr		r4, [%[a], #0]\n\t"
+        "ldr		r5, [%[b], #0]\n\t"
+        "and		r4, r4, r3\n\t"
+        "and		r5, r5, r3\n\t"
+        "subs	r4, r4, r5\n\t"
+        "it	hi\n\t"
+        "movhi	%[r], %[one]\n\t"
+        "it	lo\n\t"
+        "movlo	%[r], r3\n\t"
+        "it	ne\n\t"
+        "movne	r3, r7\n\t"
+        "eor	%[r], %[r], r3\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+#endif
+
+    return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+#define sp_384_norm_12(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    int32_t n;
+
+    sp_384_mont_inv_12(t1, p->z, t + 2*12);
+
+    sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    /* x /= z^2 */
+    sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
+    XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
+    sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_384_cmp_12(r->x, p384_mod);
+    sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_12(r->x);
+
+    /* y /= z^3 */
+    sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
+    XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
+    sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_384_cmp_12(r->y, p384_mod);
+    sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_12(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	r12, %[a], #48\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	r4, [%[a]], #4\n\t"
+        "ldr	r5, [%[a]], #4\n\t"
+        "ldr	r6, [%[a]], #4\n\t"
+        "ldr	r7, [%[a]], #4\n\t"
+        "ldr	r8, [%[b]], #4\n\t"
+        "ldr	r9, [%[b]], #4\n\t"
+        "ldr	r10, [%[b]], #4\n\t"
+        "ldr	r14, [%[b]], #4\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r]], #4\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        "str	r6, [%[r]], #4\n\t"
+        "str	r7, [%[r]], #4\n\t"
+        "mov	r4, #0\n\t"
+        "adc	%[c], r4, #0\n\t"
+        "cmp	%[a], r12\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r12, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r7, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "ldr	r9, [%[b], #4]\n\t"
+        "ldr	r10, [%[b], #8]\n\t"
+        "ldr	r14, [%[b], #12]\n\t"
+        "adds	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "str	r6, [%[r], #8]\n\t"
+        "str	r7, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r7, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "ldr	r9, [%[b], #20]\n\t"
+        "ldr	r10, [%[b], #24]\n\t"
+        "ldr	r14, [%[b], #28]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "str	r6, [%[r], #24]\n\t"
+        "str	r7, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[a], #36]\n\t"
+        "ldr	r6, [%[a], #40]\n\t"
+        "ldr	r7, [%[a], #44]\n\t"
+        "ldr	r8, [%[b], #32]\n\t"
+        "ldr	r9, [%[b], #36]\n\t"
+        "ldr	r10, [%[b], #40]\n\t"
+        "ldr	r14, [%[b], #44]\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adcs	r5, r5, r9\n\t"
+        "adcs	r6, r6, r10\n\t"
+        "adcs	r7, r7, r14\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r5, [%[r], #36]\n\t"
+        "str	r6, [%[r], #40]\n\t"
+        "str	r7, [%[r], #44]\n\t"
+        "adc	%[c], r12, r12\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, b);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+    o = sp_384_add_12(r, r, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	r12, %[a], #48\n\t"
+        "\n1:\n\t"
+        "rsbs	%[c], %[c], #0\n\t"
+        "ldr	r4, [%[a]], #4\n\t"
+        "ldr	r5, [%[a]], #4\n\t"
+        "ldr	r6, [%[a]], #4\n\t"
+        "ldr	r7, [%[a]], #4\n\t"
+        "ldr	r8, [%[b]], #4\n\t"
+        "ldr	r9, [%[b]], #4\n\t"
+        "ldr	r10, [%[b]], #4\n\t"
+        "ldr	r14, [%[b]], #4\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "sbcs	r6, r6, r10\n\t"
+        "sbcs	r7, r7, r14\n\t"
+        "str	r4, [%[r]], #4\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        "str	r6, [%[r]], #4\n\t"
+        "str	r7, [%[r]], #4\n\t"
+        "sbc	%[c], r4, r4\n\t"
+        "cmp	%[a], r12\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b], #0]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "ldr	r9, [%[b], #8]\n\t"
+        "ldr	r10, [%[b], #12]\n\t"
+        "subs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "sbcs	r6, r6, r10\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[b], #16]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "ldr	r9, [%[b], #24]\n\t"
+        "ldr	r10, [%[b], #28]\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "sbcs	r6, r6, r10\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r7, [%[b], #32]\n\t"
+        "ldr	r8, [%[b], #36]\n\t"
+        "ldr	r9, [%[b], #40]\n\t"
+        "ldr	r10, [%[b], #44]\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "sbcs	r6, r6, r10\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "str	r5, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "sbc	%[c], %[c], #0\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r9, #0\n\t"
+        "mov	r8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	r4, [%[a], r8]\n\t"
+        "ldr	r5, [%[b], r8]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adc	%[c], r9, r9\n\t"
+        "str	r4, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, #48\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#else
+    __asm__ __volatile__ (
+
+        "mov	r9, #0\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adds	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r6, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r6, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r6, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "and	r5, r5, %[m]\n\t"
+        "and	r7, r7, %[m]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adcs	r6, r6, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r6, [%[r], #44]\n\t"
+        "adc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+    );
+#endif /* WOLFSSL_SP_SMALL */
+
+    return c;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_sub_12(r, a, b);
+    sp_384_cond_add_12(r, r, m, o);
+}
+
+static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a]]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r2, r2, r3, lsl #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "orr	r3, r3, r4, lsl #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "orr	r4, r4, r2, lsl #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "orr	r2, r2, r3, lsl #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "orr	r3, r3, r4, lsl #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "orr	r4, r4, r2, lsl #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "orr	r2, r2, r3, lsl #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "orr	r3, r3, r4, lsl #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "orr	r4, r4, r2, lsl #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "orr	r2, r2, r3, lsl #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "orr	r3, r3, r4, lsl #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4"
+    );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
+    sp_384_rshift1_12(r, r);
+    r[11] |= o << 31;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
+    /* Z = Y * Z */
+    sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
+    /* Z = 2Z */
+    sp_384_mont_dbl_12(z, z, p384_mod);
+    /* T2 = X - T1 */
+    sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
+    /* T1 = X + T1 */
+    sp_384_mont_add_12(t1, p->x, t1, p384_mod);
+    /* T2 = T1 * T2 */
+    sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
+    /* T1 = 3T2 */
+    sp_384_mont_tpl_12(t1, t2, p384_mod);
+    /* Y = 2Y */
+    sp_384_mont_dbl_12(y, p->y, p384_mod);
+    /* Y = Y * Y */
+    sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
+    /* T2 = Y * Y */
+    sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+    /* T2 = T2/2 */
+    sp_384_div2_12(t2, t2, p384_mod);
+    /* Y = Y * X */
+    sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
+    /* X = T1 * T1 */
+    sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_12(x, x, y, p384_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_12(x, x, y, p384_mod);
+    /* Y = Y - X */
+    sp_384_mont_sub_12(y, y, x, p384_mod);
+    /* Y = Y * T1 */
+    sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
+    /* Y = Y - T2 */
+    sp_384_mont_sub_12(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+            (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+        sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* t3 = t + 4*12;
+    sp_digit* t4 = t + 6*12;
+    sp_digit* t5 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_384* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_384_sub_12(t1, p384_mod, q->y);
+    sp_384_norm_12(t1);
+    if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+        (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_12(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<12; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<12; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<12; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - U1 */
+        sp_384_mont_sub_12(t2, t2, t1, p384_mod);
+        /* R = S2 - S1 */
+        sp_384_mont_sub_12(t4, t4, t3, p384_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(x, x, t5, p384_mod);
+        sp_384_mont_dbl_12(t1, y, p384_mod);
+        sp_384_mont_sub_12(x, x, t1, p384_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(y, y, t5, p384_mod);
+    }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td[16];
+    sp_point_384 rtd;
+    sp_digit tmpd[2 * 12 * 6];
+#endif
+    sp_point_384* t;
+    sp_point_384* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
+        (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
+        (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
+        t[1].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 10;
+        n = k[i+1] << 0;
+        c = 28;
+        y = n >> 28;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+        n <<= 4;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--];
+                c += 32;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+
+            sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_384_point_free_12(rt, 1, heap);
+
+    return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+    sp_digit x[12];
+    sp_digit y[12];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*12;
+    sp_digit* b = t + 4*12;
+    sp_digit* t1 = t + 6*12;
+    sp_digit* t2 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_12(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_12(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_12(t2, b, p384_mod);
+        sp_384_mont_sub_12(x, x, t2, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_12(y, b, x, p384_mod);
+        sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_12(y, y, p384_mod);
+        sp_384_mont_sub_12(y, y, t1, p384_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_12(t1, t1, w, p384_mod);
+    sp_384_mont_tpl_12(a, t1, p384_mod);
+    /* B = X*Y^2 */
+    sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+    /* X = A^2 - 2B */
+    sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_12(t2, b, p384_mod);
+    sp_384_mont_sub_12(x, x, t2, p384_mod);
+    /* Z = Z*Y */
+    sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+    /* t2 = Y^4 */
+    sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_384_mont_sub_12(y, b, x, p384_mod);
+    sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_12(y, y, p384_mod);
+    sp_384_mont_sub_12(y, y, t1, p384_mod);
+#endif
+    /* Y = Y/2 */
+    sp_384_div2_12(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
+        const sp_point_384* q, sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* t3 = t + 4*12;
+    sp_digit* t4 = t + 6*12;
+    sp_digit* t5 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_384_sub_12(t1, p384_mod, q->y);
+    sp_384_norm_12(t1);
+    if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+        (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_12(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<12; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<12; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<12; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - X1 */
+        sp_384_mont_sub_12(t2, t2, x, p384_mod);
+        /* R = S2 - Y1 */
+        sp_384_mont_sub_12(t4, t4, y, p384_mod);
+        /* Z3 = H*Z1 */
+        sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(x, t1, t5, p384_mod);
+        sp_384_mont_dbl_12(t1, t3, p384_mod);
+        sp_384_mont_sub_12(x, x, t1, p384_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_384_mont_sub_12(t3, t3, x, p384_mod);
+        sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(y, t3, t5, p384_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 12;
+    sp_digit* tmp = t + 4 * 12;
+
+    sp_384_mont_inv_12(t1, a->z, tmp);
+
+    sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
+    XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_12(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<4; i++) {
+            sp_384_proj_point_dbl_n_12(t, 96, tmp);
+            sp_384_proj_to_affine_12(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<4; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+                sp_384_proj_to_affine_12(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_12(s2, 0, heap);
+    sp_384_point_free_12(s1, 0, heap);
+    sp_384_point_free_12( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 12 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=95; j<4; j++,x+=96) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=94; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<4; j++,x+=96) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_12(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[12];
+    sp_digit y[12];
+    sp_table_entry_384 table[16];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 12 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_12(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_384_proj_point_dbl_n_12(t, 48, tmp);
+            sp_384_proj_to_affine_12(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+                sp_384_proj_to_affine_12(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_12(s2, 0, heap);
+    sp_384_point_free_12(s1, 0, heap);
+    sp_384_point_free_12( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 12 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=47; j<8; j++,x+=48) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=46; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=48) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_12(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[12];
+    sp_digit y[12];
+    sp_table_entry_384 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 12 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, km);
+        sp_384_point_from_ecc_point_12(point, gm);
+
+            err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_384 p384_table[16] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+        0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+      { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+        0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+    /* 2 */
+    { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+        0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+      { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+        0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+    /* 3 */
+    { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+        0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+      { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+        0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+    /* 4 */
+    { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+        0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+      { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+        0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+    /* 5 */
+    { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+        0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+      { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+        0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+    /* 6 */
+    { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+        0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+      { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+        0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+    /* 7 */
+    { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+        0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+      { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+        0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+    /* 8 */
+    { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+        0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+      { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+        0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+    /* 9 */
+    { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+        0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+      { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+        0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+    /* 10 */
+    { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+        0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+      { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+        0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+    /* 11 */
+    { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+        0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+      { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+        0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+    /* 12 */
+    { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+        0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+      { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+        0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+    /* 13 */
+    { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+        0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+      { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+        0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+    /* 14 */
+    { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+        0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+      { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+        0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+    /* 15 */
+    { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+        0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+      { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+        0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+        0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+      { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+        0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+    /* 2 */
+    { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
+        0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
+      { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
+        0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
+    /* 3 */
+    { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
+        0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
+      { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
+        0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
+    /* 4 */
+    { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+        0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+      { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+        0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+    /* 5 */
+    { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+        0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+      { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+        0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+    /* 6 */
+    { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
+        0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
+      { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
+        0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
+    /* 7 */
+    { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
+        0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
+      { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
+        0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
+    /* 8 */
+    { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
+        0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
+      { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
+        0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
+    /* 9 */
+    { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
+        0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
+      { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
+        0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
+    /* 10 */
+    { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
+        0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
+      { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
+        0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
+    /* 11 */
+    { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
+        0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
+      { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
+        0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
+    /* 12 */
+    { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
+        0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
+      { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
+        0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
+    /* 13 */
+    { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
+        0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
+      { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
+        0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
+    /* 14 */
+    { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
+        0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
+      { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
+        0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
+    /* 15 */
+    { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
+        0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
+      { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
+        0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
+    /* 16 */
+    { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+        0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+      { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+        0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+    /* 17 */
+    { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+        0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+      { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+        0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+    /* 18 */
+    { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
+        0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
+      { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
+        0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
+    /* 19 */
+    { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
+        0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
+      { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
+        0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
+    /* 20 */
+    { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+        0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+      { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+        0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+    /* 21 */
+    { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+        0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+      { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+        0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+    /* 22 */
+    { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
+        0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
+      { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
+        0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
+    /* 23 */
+    { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
+        0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
+      { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
+        0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
+    /* 24 */
+    { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
+        0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
+      { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
+        0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
+    /* 25 */
+    { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
+        0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
+      { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
+        0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
+    /* 26 */
+    { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
+        0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
+      { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
+        0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
+    /* 27 */
+    { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
+        0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
+      { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
+        0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
+    /* 28 */
+    { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
+        0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
+      { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
+        0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
+    /* 29 */
+    { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
+        0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
+      { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
+        0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
+    /* 30 */
+    { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
+        0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
+      { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
+        0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
+    /* 31 */
+    { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
+        0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
+      { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
+        0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
+    /* 32 */
+    { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
+        0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
+      { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
+        0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
+    /* 33 */
+    { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
+        0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
+      { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
+        0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
+    /* 34 */
+    { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
+        0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
+      { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
+        0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
+    /* 35 */
+    { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
+        0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
+      { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
+        0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
+    /* 36 */
+    { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
+        0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
+      { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
+        0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
+    /* 37 */
+    { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
+        0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
+      { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
+        0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
+    /* 38 */
+    { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
+        0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
+      { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
+        0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
+    /* 39 */
+    { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
+        0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
+      { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
+        0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
+    /* 40 */
+    { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
+        0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
+      { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
+        0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
+    /* 41 */
+    { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
+        0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
+      { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
+        0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
+    /* 42 */
+    { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
+        0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
+      { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
+        0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
+    /* 43 */
+    { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
+        0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
+      { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
+        0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
+    /* 44 */
+    { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
+        0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
+      { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
+        0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
+    /* 45 */
+    { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
+        0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
+      { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
+        0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
+    /* 46 */
+    { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
+        0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
+      { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
+        0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
+    /* 47 */
+    { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
+        0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
+      { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
+        0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
+    /* 48 */
+    { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
+        0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
+      { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
+        0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
+    /* 49 */
+    { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
+        0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
+      { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
+        0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
+    /* 50 */
+    { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
+        0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
+      { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
+        0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
+    /* 51 */
+    { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
+        0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
+      { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
+        0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
+    /* 52 */
+    { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
+        0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
+      { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
+        0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
+    /* 53 */
+    { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
+        0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
+      { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
+        0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
+    /* 54 */
+    { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
+        0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
+      { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
+        0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
+    /* 55 */
+    { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
+        0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
+      { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
+        0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
+    /* 56 */
+    { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
+        0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
+      { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
+        0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
+    /* 57 */
+    { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
+        0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
+      { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
+        0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
+    /* 58 */
+    { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
+        0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
+      { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
+        0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
+    /* 59 */
+    { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
+        0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
+      { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
+        0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
+    /* 60 */
+    { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
+        0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
+      { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
+        0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
+    /* 61 */
+    { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
+        0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
+      { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
+        0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
+    /* 62 */
+    { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
+        0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
+      { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
+        0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
+    /* 63 */
+    { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
+        0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
+      { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
+        0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
+    /* 64 */
+    { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+        0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+      { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+        0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+    /* 65 */
+    { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+        0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+      { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+        0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+    /* 66 */
+    { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
+        0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
+      { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
+        0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
+    /* 67 */
+    { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
+        0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
+      { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
+        0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
+    /* 68 */
+    { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+        0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+      { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+        0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+    /* 69 */
+    { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+        0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+      { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+        0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+    /* 70 */
+    { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
+        0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
+      { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
+        0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
+    /* 71 */
+    { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
+        0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
+      { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
+        0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
+    /* 72 */
+    { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
+        0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
+      { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
+        0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
+    /* 73 */
+    { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
+        0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
+      { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
+        0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
+    /* 74 */
+    { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
+        0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
+      { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
+        0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
+    /* 75 */
+    { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
+        0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
+      { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
+        0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
+    /* 76 */
+    { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
+        0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
+      { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
+        0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
+    /* 77 */
+    { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
+        0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
+      { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
+        0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
+    /* 78 */
+    { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
+        0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
+      { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
+        0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
+    /* 79 */
+    { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
+        0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
+      { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
+        0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
+    /* 80 */
+    { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+        0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+      { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+        0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+    /* 81 */
+    { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+        0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+      { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+        0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+    /* 82 */
+    { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
+        0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
+      { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
+        0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
+    /* 83 */
+    { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
+        0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
+      { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
+        0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
+    /* 84 */
+    { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+        0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+      { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+        0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+    /* 85 */
+    { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+        0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+      { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+        0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+    /* 86 */
+    { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
+        0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
+      { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
+        0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
+    /* 87 */
+    { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
+        0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
+      { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
+        0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
+    /* 88 */
+    { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
+        0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
+      { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
+        0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
+    /* 89 */
+    { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
+        0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
+      { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
+        0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
+    /* 90 */
+    { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
+        0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
+      { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
+        0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
+    /* 91 */
+    { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
+        0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
+      { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
+        0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
+    /* 92 */
+    { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
+        0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
+      { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
+        0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
+    /* 93 */
+    { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
+        0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
+      { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
+        0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
+    /* 94 */
+    { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
+        0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
+      { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
+        0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
+    /* 95 */
+    { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
+        0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
+      { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
+        0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
+    /* 96 */
+    { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
+        0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
+      { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
+        0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
+    /* 97 */
+    { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
+        0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
+      { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
+        0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
+    /* 98 */
+    { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
+        0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
+      { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
+        0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
+    /* 99 */
+    { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
+        0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
+      { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
+        0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
+    /* 100 */
+    { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
+        0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
+      { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
+        0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
+    /* 101 */
+    { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
+        0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
+      { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
+        0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
+    /* 102 */
+    { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
+        0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
+      { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
+        0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
+    /* 103 */
+    { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
+        0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
+      { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
+        0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
+    /* 104 */
+    { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
+        0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
+      { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
+        0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
+    /* 105 */
+    { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
+        0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
+      { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
+        0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
+    /* 106 */
+    { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
+        0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
+      { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
+        0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
+    /* 107 */
+    { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
+        0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
+      { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
+        0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
+    /* 108 */
+    { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
+        0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
+      { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
+        0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
+    /* 109 */
+    { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
+        0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
+      { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
+        0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
+    /* 110 */
+    { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
+        0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
+      { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
+        0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
+    /* 111 */
+    { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
+        0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
+      { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
+        0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
+    /* 112 */
+    { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
+        0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
+      { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
+        0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
+    /* 113 */
+    { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
+        0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
+      { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
+        0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
+    /* 114 */
+    { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
+        0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
+      { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
+        0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
+    /* 115 */
+    { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
+        0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
+      { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
+        0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
+    /* 116 */
+    { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
+        0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
+      { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
+        0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
+    /* 117 */
+    { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
+        0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
+      { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
+        0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
+    /* 118 */
+    { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
+        0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
+      { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
+        0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
+    /* 119 */
+    { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
+        0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
+      { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
+        0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
+    /* 120 */
+    { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
+        0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
+      { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
+        0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
+    /* 121 */
+    { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
+        0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
+      { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
+        0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
+    /* 122 */
+    { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
+        0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
+      { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
+        0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
+    /* 123 */
+    { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
+        0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
+      { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
+        0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
+    /* 124 */
+    { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
+        0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
+      { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
+        0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
+    /* 125 */
+    { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
+        0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
+      { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
+        0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
+    /* 126 */
+    { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
+        0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
+      { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
+        0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
+    /* 127 */
+    { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
+        0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
+      { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
+        0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
+    /* 128 */
+    { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
+        0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
+      { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
+        0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
+    /* 129 */
+    { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
+        0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
+      { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
+        0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
+    /* 130 */
+    { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
+        0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
+      { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
+        0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
+    /* 131 */
+    { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
+        0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
+      { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
+        0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
+    /* 132 */
+    { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
+        0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
+      { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
+        0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
+    /* 133 */
+    { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
+        0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
+      { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
+        0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
+    /* 134 */
+    { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
+        0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
+      { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
+        0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
+    /* 135 */
+    { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
+        0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
+      { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
+        0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
+    /* 136 */
+    { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
+        0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
+      { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
+        0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
+    /* 137 */
+    { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
+        0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
+      { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
+        0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
+    /* 138 */
+    { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
+        0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
+      { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
+        0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
+    /* 139 */
+    { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
+        0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
+      { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
+        0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
+    /* 140 */
+    { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
+        0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
+      { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
+        0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
+    /* 141 */
+    { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
+        0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
+      { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
+        0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
+    /* 142 */
+    { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
+        0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
+      { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
+        0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
+    /* 143 */
+    { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
+        0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
+      { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
+        0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
+    /* 144 */
+    { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
+        0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
+      { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
+        0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
+    /* 145 */
+    { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
+        0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
+      { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
+        0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
+    /* 146 */
+    { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
+        0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
+      { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
+        0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
+    /* 147 */
+    { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
+        0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
+      { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
+        0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
+    /* 148 */
+    { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
+        0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
+      { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
+        0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
+    /* 149 */
+    { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
+        0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
+      { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
+        0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
+    /* 150 */
+    { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
+        0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
+      { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
+        0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
+    /* 151 */
+    { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
+        0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
+      { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
+        0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
+    /* 152 */
+    { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
+        0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
+      { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
+        0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
+    /* 153 */
+    { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
+        0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
+      { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
+        0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
+    /* 154 */
+    { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
+        0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
+      { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
+        0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
+    /* 155 */
+    { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
+        0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
+      { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
+        0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
+    /* 156 */
+    { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
+        0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
+      { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
+        0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
+    /* 157 */
+    { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
+        0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
+      { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
+        0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
+    /* 158 */
+    { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
+        0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
+      { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
+        0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
+    /* 159 */
+    { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
+        0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
+      { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
+        0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
+    /* 160 */
+    { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
+        0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
+      { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
+        0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
+    /* 161 */
+    { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
+        0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
+      { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
+        0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
+    /* 162 */
+    { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
+        0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
+      { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
+        0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
+    /* 163 */
+    { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
+        0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
+      { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
+        0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
+    /* 164 */
+    { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
+        0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
+      { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
+        0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
+    /* 165 */
+    { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
+        0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
+      { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
+        0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
+    /* 166 */
+    { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
+        0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
+      { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
+        0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
+    /* 167 */
+    { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
+        0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
+      { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
+        0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
+    /* 168 */
+    { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
+        0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
+      { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
+        0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
+    /* 169 */
+    { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
+        0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
+      { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
+        0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
+    /* 170 */
+    { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
+        0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
+      { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
+        0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
+    /* 171 */
+    { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
+        0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
+      { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
+        0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
+    /* 172 */
+    { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
+        0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
+      { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
+        0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
+    /* 173 */
+    { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
+        0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
+      { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
+        0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
+    /* 174 */
+    { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
+        0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
+      { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
+        0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
+    /* 175 */
+    { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
+        0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
+      { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
+        0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
+    /* 176 */
+    { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
+        0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
+      { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
+        0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
+    /* 177 */
+    { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
+        0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
+      { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
+        0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
+    /* 178 */
+    { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
+        0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
+      { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
+        0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
+    /* 179 */
+    { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
+        0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
+      { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
+        0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
+    /* 180 */
+    { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
+        0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
+      { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
+        0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
+    /* 181 */
+    { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
+        0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
+      { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
+        0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
+    /* 182 */
+    { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
+        0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
+      { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
+        0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
+    /* 183 */
+    { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
+        0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
+      { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
+        0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
+    /* 184 */
+    { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
+        0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
+      { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
+        0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
+    /* 185 */
+    { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
+        0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
+      { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
+        0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
+    /* 186 */
+    { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
+        0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
+      { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
+        0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
+    /* 187 */
+    { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
+        0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
+      { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
+        0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
+    /* 188 */
+    { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
+        0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
+      { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
+        0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
+    /* 189 */
+    { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
+        0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
+      { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
+        0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
+    /* 190 */
+    { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
+        0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
+      { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
+        0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
+    /* 191 */
+    { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
+        0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
+      { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
+        0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
+    /* 192 */
+    { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
+        0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
+      { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
+        0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
+    /* 193 */
+    { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
+        0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
+      { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
+        0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
+    /* 194 */
+    { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
+        0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
+      { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
+        0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
+    /* 195 */
+    { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
+        0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
+      { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
+        0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
+    /* 196 */
+    { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
+        0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
+      { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
+        0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
+    /* 197 */
+    { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
+        0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
+      { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
+        0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
+    /* 198 */
+    { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
+        0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
+      { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
+        0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
+    /* 199 */
+    { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
+        0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
+      { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
+        0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
+    /* 200 */
+    { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
+        0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
+      { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
+        0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
+    /* 201 */
+    { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
+        0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
+      { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
+        0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
+    /* 202 */
+    { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
+        0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
+      { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
+        0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
+    /* 203 */
+    { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
+        0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
+      { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
+        0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
+    /* 204 */
+    { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
+        0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
+      { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
+        0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
+    /* 205 */
+    { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
+        0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
+      { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
+        0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
+    /* 206 */
+    { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
+        0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
+      { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
+        0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
+    /* 207 */
+    { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
+        0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
+      { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
+        0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
+    /* 208 */
+    { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
+        0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
+      { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
+        0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
+    /* 209 */
+    { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
+        0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
+      { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
+        0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
+    /* 210 */
+    { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
+        0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
+      { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
+        0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
+    /* 211 */
+    { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
+        0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
+      { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
+        0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
+    /* 212 */
+    { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
+        0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
+      { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
+        0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
+    /* 213 */
+    { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
+        0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
+      { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
+        0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
+    /* 214 */
+    { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
+        0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
+      { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
+        0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
+    /* 215 */
+    { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
+        0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
+      { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
+        0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
+    /* 216 */
+    { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
+        0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
+      { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
+        0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
+    /* 217 */
+    { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
+        0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
+      { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
+        0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
+    /* 218 */
+    { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
+        0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
+      { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
+        0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
+    /* 219 */
+    { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
+        0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
+      { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
+        0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
+    /* 220 */
+    { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
+        0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
+      { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
+        0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
+    /* 221 */
+    { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
+        0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
+      { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
+        0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
+    /* 222 */
+    { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
+        0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
+      { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
+        0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
+    /* 223 */
+    { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
+        0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
+      { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
+        0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
+    /* 224 */
+    { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
+        0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
+      { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
+        0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
+    /* 225 */
+    { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
+        0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
+      { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
+        0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
+    /* 226 */
+    { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
+        0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
+      { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
+        0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
+    /* 227 */
+    { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
+        0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
+      { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
+        0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
+    /* 228 */
+    { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
+        0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
+      { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
+        0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
+    /* 229 */
+    { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
+        0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
+      { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
+        0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
+    /* 230 */
+    { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
+        0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
+      { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
+        0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
+    /* 231 */
+    { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
+        0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
+      { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
+        0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
+    /* 232 */
+    { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
+        0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
+      { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
+        0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
+    /* 233 */
+    { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
+        0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
+      { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
+        0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
+    /* 234 */
+    { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
+        0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
+      { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
+        0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
+    /* 235 */
+    { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
+        0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
+      { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
+        0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
+    /* 236 */
+    { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
+        0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
+      { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
+        0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
+    /* 237 */
+    { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
+        0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
+      { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
+        0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
+    /* 238 */
+    { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
+        0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
+      { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
+        0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
+    /* 239 */
+    { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
+        0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
+      { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
+        0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
+    /* 240 */
+    { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
+        0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
+      { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
+        0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
+    /* 241 */
+    { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
+        0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
+      { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
+        0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
+    /* 242 */
+    { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
+        0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
+      { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
+        0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
+    /* 243 */
+    { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
+        0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
+      { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
+        0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
+    /* 244 */
+    { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
+        0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
+      { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
+        0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
+    /* 245 */
+    { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
+        0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
+      { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
+        0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
+    /* 246 */
+    { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
+        0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
+      { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
+        0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
+    /* 247 */
+    { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
+        0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
+      { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
+        0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
+    /* 248 */
+    { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
+        0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
+      { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
+        0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
+    /* 249 */
+    { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
+        0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
+      { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
+        0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
+    /* 250 */
+    { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
+        0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
+      { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
+        0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
+    /* 251 */
+    { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
+        0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
+      { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
+        0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
+    /* 252 */
+    { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
+        0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
+      { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
+        0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
+    /* 253 */
+    { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
+        0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
+      { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
+        0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
+    /* 254 */
+    { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
+        0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
+      { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
+        0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
+    /* 255 */
+    { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
+        0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
+      { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
+        0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, km);
+
+            err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_12(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+            a[8] | a[9] | a[10] | a[11]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a  A single precision integer.
+ */
+static void sp_384_add_one_12(sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldr	r1, [%[a], #0]\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "adds	r1, r1, #1\n\t"
+        "adcs	r2, r2, #0\n\t"
+        "adcs	r3, r3, #0\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "str	r1, [%[a], #0]\n\t"
+        "str	r2, [%[a], #4]\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r1, [%[a], #16]\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "adcs	r1, r1, #0\n\t"
+        "adcs	r2, r2, #0\n\t"
+        "adcs	r3, r3, #0\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "str	r1, [%[a], #16]\n\t"
+        "str	r2, [%[a], #20]\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r1, [%[a], #32]\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "adcs	r1, r1, #0\n\t"
+        "adcs	r2, r2, #0\n\t"
+        "adcs	r3, r3, #0\n\t"
+        "adcs	r4, r4, #0\n\t"
+        "str	r1, [%[a], #32]\n\t"
+        "str	r2, [%[a], #36]\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        :
+        : [a] "r" (a)
+        : "memory", "r1", "r2", "r3", "r4"
+    );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[48];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
+            if (sp_384_cmp_12(k, p384_order2) < 0) {
+                sp_384_add_one_12(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384 inf;
+#endif
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_ecc_gen_k_12(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_384_point_free_12(infinity, 1, heap);
+#endif
+    sp_384_point_free_12(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 384 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<12 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 48U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, priv);
+        sp_384_point_from_ecc_point_12(point, pub);
+            err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_384_to_bin(point->x, out);
+        *outLen = 48;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r14, #0\n\t"
+        "add	r12, %[a], #48\n\t"
+        "\n1:\n\t"
+        "subs	%[c], r14, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[a], #8]\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r7, [%[b]], #4\n\t"
+        "ldr	r8, [%[b]], #4\n\t"
+        "ldr	r9, [%[b]], #4\n\t"
+        "ldr	r10, [%[b]], #4\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "sbcs	r6, r6, r10\n\t"
+        "str	r3, [%[a]], #4\n\t"
+        "str	r4, [%[a]], #4\n\t"
+        "str	r5, [%[a]], #4\n\t"
+        "str	r6, [%[a]], #4\n\t"
+        "sbc	%[c], r14, r14\n\t"
+        "cmp	%[a], r12\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer and result.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a], #0]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "ldr	r9, [%[b], #12]\n\t"
+        "subs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #0]\n\t"
+        "str	r3, [%[a], #4]\n\t"
+        "str	r4, [%[a], #8]\n\t"
+        "str	r5, [%[a], #12]\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "ldr	r9, [%[b], #28]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #16]\n\t"
+        "str	r3, [%[a], #20]\n\t"
+        "str	r4, [%[a], #24]\n\t"
+        "str	r5, [%[a], #28]\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[a], #44]\n\t"
+        "ldr	r6, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "ldr	r8, [%[b], #40]\n\t"
+        "ldr	r9, [%[b], #44]\n\t"
+        "sbcs	r2, r2, r6\n\t"
+        "sbcs	r3, r3, r7\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r9\n\t"
+        "str	r2, [%[a], #32]\n\t"
+        "str	r3, [%[a], #36]\n\t"
+        "str	r4, [%[a], #40]\n\t"
+        "str	r5, [%[a], #44]\n\t"
+        "sbc	%[c], r9, r9\n\t"
+        : [c] "+r" (c)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r5, r3, %[b], r8\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]]\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, #4\n\t"
+        "1:\n\t"
+        "ldr	r8, [%[a], r9]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], r9]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r9, r9, #4\n\t"
+        "cmp	r9, #48\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	r10, #0\n\t"
+        "# A[0] * B\n\t"
+        "ldr	r8, [%[a]]\n\t"
+        "umull	r3, r4, %[b], r8\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[r]]\n\t"
+        "# A[1] * B\n\t"
+        "ldr	r8, [%[a], #4]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "# A[2] * B\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "# A[3] * B\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "# A[4] * B\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "# A[5] * B\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "# A[6] * B\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "# A[7] * B\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "# A[8] * B\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "mov	r4, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r7\n\t"
+        "adc	r4, r4, r10\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "# A[9] * B\n\t"
+        "ldr	r8, [%[a], #36]\n\t"
+        "mov	r5, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r7\n\t"
+        "adc	r5, r5, r10\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "# A[10] * B\n\t"
+        "ldr	r8, [%[a], #40]\n\t"
+        "mov	r3, #0\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r7\n\t"
+        "adc	r3, r3, r10\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "# A[11] * B\n\t"
+        "ldr	r8, [%[a], #44]\n\t"
+        "umull	r6, r7, %[b], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r7\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+    );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, r5, #1\n\t"
+        "mov	r6, %[d0]\n\t"
+        "mov	r7, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "subs	r8, r5, r7\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], %[r]\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "and	r8, r8, r5\n\t"
+        "subs	r7, r7, r8\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "movs	r6, r6, lsl #1\n\t"
+        "adc	r7, r7, r7\n\t"
+        "subs	r8, r5, r7\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], %[r]\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "and	r8, r8, r5\n\t"
+        "subs	r7, r7, r8\n\t"
+        "subs	r4, r4, #1\n\t"
+        "bpl	1b\n\t"
+        "add	%[r], %[r], %[r]\n\t"
+        "add	%[r], %[r], #1\n\t"
+        "umull	r4, r5, %[r], %[div]\n\t"
+        "subs	r4, %[d0], r4\n\t"
+        "sbc	r5, %[d1], r5\n\t"
+        "add	%[r], %[r], r5\n\t"
+        "umull	r4, r5, %[r], %[div]\n\t"
+        "subs	r4, %[d0], r4\n\t"
+        "sbc	r5, %[d1], r5\n\t"
+        "add	%[r], %[r], r5\n\t"
+        "subs	r8, %[div], r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r7", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+    r[8] = a[8] & m;
+    r[9] = a[9] & m;
+    r[10] = a[10] & m;
+    r[11] = a[11] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[24], t2[13];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+
+    div = d[11];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
+    for (i=11; i>=0; i--) {
+        r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
+
+        sp_384_mul_d_12(t2, d, r1);
+        t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
+        t1[12 + i] -= t2[12];
+        sp_384_mask_12(t2, d, t1[12 + i]);
+        t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+        sp_384_mask_12(t2, d, t1[12 + i]);
+        t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_384_cmp_12(t1, d) >= 0;
+    sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_384_div_12(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+    
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_384_mul_12(r, a, b);
+    sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
+{
+    sp_384_sqr_12(r, a);
+    sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_384_mont_sqr_order_12(r, a);
+    for (i=1; i<n; i++) {
+        sp_384_mont_sqr_order_12(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 12);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_order_12(t, t);
+        if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_12(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 12U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 12;
+    sp_digit* t3 = td + 4 * 12;
+    int i;
+
+    /* t = a^2 */
+    sp_384_mont_sqr_order_12(t, a);
+    /* t = a^3 = t * a */
+    sp_384_mont_mul_order_12(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_384_mont_sqr_n_order_12(t2, t, 2);
+    /* t = a^f = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_384_mont_sqr_n_order_12(t2, t, 4);
+    /* t = a^ff = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_384_mont_sqr_n_order_12(t2, t, 8);
+    /* t3= a^ffff = t2 * t */
+    sp_384_mont_mul_order_12(t3, t2, t);
+    /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+    sp_384_mont_sqr_n_order_12(t2, t3, 16);
+    /* t = a^ffffffff = t2 * t3 */
+    sp_384_mont_mul_order_12(t, t2, t3);
+    /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
+    sp_384_mont_sqr_n_order_12(t2, t, 16);
+    /* t = a^ffffffffffff = t2 * t3 */
+    sp_384_mont_mul_order_12(t, t2, t3);
+    /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
+    sp_384_mont_sqr_n_order_12(t2, t, 48);
+    /* t= a^fffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_order_12(t2, t, 96);
+    /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_12(t2, t2, t);
+    for (i=191; i>=1; i--) {
+        sp_384_mont_sqr_order_12(t2, t2);
+        if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_12(t2, t2, a);
+        }
+    }
+    sp_384_mont_sqr_order_12(t2, t2);
+    sp_384_mont_mul_order_12(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 384 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*12];
+    sp_digit xd[2*12];
+    sp_digit kd[2*12];
+    sp_digit rd[2*12];
+    sp_digit td[3 * 2*12];
+    sp_point_384 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int32_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 12;
+        x = d + 2 * 12;
+        k = d + 4 * 12;
+        r = d + 6 * 12;
+        tmp = d + 8 * 12;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(e, 12, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_384_from_mp(x, 12, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_384_ecc_gen_k_12(rng, k);
+        }
+        else {
+            sp_384_from_mp(k, 12, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
+            sp_384_norm_12(r);
+            c = sp_384_cmp_12(r, p384_order);
+            sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_12(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_384_mul_12(k, k, p384_norm_order);
+            err = sp_384_mod_12(k, k, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_12(k);
+            /* kInv = 1/k mod order */
+                sp_384_mont_inv_order_12(kInv, k, tmp);
+            sp_384_norm_12(kInv);
+
+            /* s = r * x + e */
+                sp_384_mul_12(x, x, r);
+            err = sp_384_mod_12(x, x, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_12(x);
+            carry = sp_384_add_12(s, e, x);
+            sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
+            sp_384_norm_12(s);
+            c = sp_384_cmp_12(s, p384_order);
+            sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_12(s);
+
+            /* s = s * k^-1 mod order */
+                sp_384_mont_mul_order_12(s, s, kInv);
+            sp_384_norm_12(s);
+
+            /* Check that signature is usable. */
+            if (sp_384_iszero_12(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
+#endif
+    sp_384_point_free_12(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 384)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*12];
+    sp_digit u2d[2*12];
+    sp_digit sd[2*12];
+    sp_digit tmpd[2*12 * 5];
+    sp_point_384 p1d;
+    sp_point_384 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* p1;
+    sp_point_384* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+
+    err = sp_384_point_new_12(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 12;
+        u2  = d + 2 * 12;
+        s   = d + 4 * 12;
+        tmp = d + 6 * 12;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(u1, 12, hash, (int)hashLen);
+        sp_384_from_mp(u2, 12, r);
+        sp_384_from_mp(s, 12, sm);
+        sp_384_from_mp(p2->x, 12, pX);
+        sp_384_from_mp(p2->y, 12, pY);
+        sp_384_from_mp(p2->z, 12, pZ);
+
+        {
+            sp_384_mul_12(s, s, p384_norm_order);
+        }
+        err = sp_384_mod_12(s, s, p384_order);
+    }
+    if (err == MP_OKAY) {
+        sp_384_norm_12(s);
+        {
+            sp_384_mont_inv_order_12(s, s, tmp);
+            sp_384_mont_mul_order_12(u1, u1, s);
+            sp_384_mont_mul_order_12(u2, u2, s);
+        }
+
+            err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_384_proj_point_add_12(p1, p1, p2, tmp);
+            if (sp_384_iszero_12(p1->z)) {
+                if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
+                    sp_384_proj_point_dbl_12(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    p1->x[8] = 0;
+                    p1->x[9] = 0;
+                    p1->x[10] = 0;
+                    p1->x[11] = 0;
+                    XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_384_from_mp(u2, 12, r);
+        err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
+        *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_384_from_mp(u2, 12, r);
+            carry = sp_384_add_12(u2, u2, p384_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_384_norm_12(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_384_cmp_12(u2, p384_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
+                                                                  p384_mp_mod);
+                        *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_12(p1, 0, heap);
+    sp_384_point_free_12(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*12];
+    sp_digit t2d[2*12];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 12;
+        t2 = d + 2 * 12;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_384_sqr_12(t1, point->y);
+        (void)sp_384_mod_12(t1, t1, p384_mod);
+        sp_384_sqr_12(t2, point->x);
+        (void)sp_384_mod_12(t2, t2, p384_mod);
+        sp_384_mul_12(t2, t2, point->x);
+        (void)sp_384_mod_12(t2, t2, p384_mod);
+        (void)sp_384_sub_12(t2, p384_mod, t2);
+        sp_384_mont_add_12(t1, t1, t2, p384_mod);
+
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+
+        if (sp_384_cmp_12(t1, p384_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 pubd;
+#endif
+    sp_point_384* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_12(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_384_from_mp(pub->x, 12, pX);
+        sp_384_from_mp(pub->y, 12, pY);
+        sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+
+        err = sp_384_ecc_is_point_12(pub, NULL);
+    }
+
+    sp_384_point_free_12(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[12];
+    sp_point_384 pubd;
+    sp_point_384 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_384* pub;
+    sp_point_384* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_12(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_384_from_mp(pub->x, 12, pX);
+        sp_384_from_mp(pub->y, 12, pY);
+        sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+        sp_384_from_mp(priv, 12, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_384_iszero_12(pub->x) != 0) &&
+            (sp_384_iszero_12(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
+            sp_384_cmp_12(pub->y, p384_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_384_ecc_is_point_12(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_384_iszero_12(p->x) == 0) ||
+            (sp_384_iszero_12(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_384_cmp_12(p->x, pub->x) != 0 ||
+            sp_384_cmp_12(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 5];
+    sp_point_384 pd;
+    sp_point_384 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    sp_point_384* q = NULL;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+        sp_384_from_mp(q->x, 12, qX);
+        sp_384_from_mp(q->y, 12, qY);
+        sp_384_from_mp(q->z, 12, qZ);
+
+            sp_384_proj_point_add_12(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(q, 0, NULL);
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 2];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+
+            sp_384_proj_point_dbl_12(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 6];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+
+        sp_384_map_12(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_12(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 12];
+    sp_digit t2d[2 * 12];
+    sp_digit t3d[2 * 12];
+    sp_digit t4d[2 * 12];
+    sp_digit t5d[2 * 12];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* t3;
+    sp_digit* t4;
+    sp_digit* t5;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 12;
+        t2 = d + 2 * 12;
+        t3 = d + 4 * 12;
+        t4 = d + 6 * 12;
+        t5 = d + 8 * 12;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        t3 = t3d;
+        t4 = t4d;
+        t5 = t5d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
+            /* t5 = y ^ 0xc */
+            sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x1e */
+            sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x1f */
+            sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3e0 */
+            sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3ff */
+            sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fe0 */
+            sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x7fff */
+            sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fff800 */
+            sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
+            /* t4 = y ^ 0x3ffffff */
+            sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffc000000 */
+            sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffff */
+            sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+            sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+            sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+            sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+            sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+            sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+            sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 12];
+    sp_digit yd[2 * 12];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 12;
+        y = d + 2 * 12;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_384_from_mp(x, 12, xm);
+        err = sp_384_mod_mul_norm_12(x, x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
+            sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_384_mont_add_12(y, y, x, p384_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_384_mont_sqrt_12(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
+        sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
+        }
+
+        err = sp_384_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
 #endif /* WOLFSSL_HAVE_SP_ECC */
 #endif /* WOLFSSL_SP_ARM32_ASM */
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
--- a/wolfcrypt/src/sp_arm64.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sp_arm64.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp.c
  *
- * Copyright (C) 2006-2018 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,8 +39,6 @@
                                     defined(WOLFSSL_HAVE_SP_ECC)
 
 #ifdef RSA_LOW_MEM
-#define SP_RSA_PRIVATE_EXP_D
-
 #ifndef WOLFSSL_SP_SMALL
 #define WOLFSSL_SP_SMALL
 #endif
@@ -51,85 +49,112 @@
 #ifdef WOLFSSL_SP_ARM64_ASM
 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 #ifndef WOLFSSL_SP_NO_2048
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 56) {
-            r[j] &= 0xffffffffffffffffl;
-            s = 64 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j;
+    byte* d;
+
+    for (i = n - 1,j = 0; i >= 7; i -= 8) {
+        r[j]  = ((sp_digit)a[i - 0] <<  0) |
+                ((sp_digit)a[i - 1] <<  8) |
+                ((sp_digit)a[i - 2] << 16) |
+                ((sp_digit)a[i - 3] << 24) |
+                ((sp_digit)a[i - 4] << 32) |
+                ((sp_digit)a[i - 5] << 40) |
+                ((sp_digit)a[i - 6] << 48) |
+                ((sp_digit)a[i - 7] << 56);
+        j++;
+    }
+
+    if (i >= 0) {
         r[j] = 0;
+
+        d = (byte*)r;
+        switch (i) {
+            case 6: d[n - 1 - 6] = a[6]; //fallthrough
+            case 5: d[n - 1 - 5] = a[5]; //fallthrough
+            case 4: d[n - 1 - 4] = a[4]; //fallthrough
+            case 3: d[n - 1 - 3] = a[3]; //fallthrough
+            case 2: d[n - 1 - 2] = a[2]; //fallthrough
+            case 1: d[n - 1 - 1] = a[1]; //fallthrough
+            case 0: d[n - 1 - 0] = a[0]; //fallthrough
+        }
+        j++;
+    }
+
+    for (; j < size; j++) {
+        r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 64
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 64
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0xffffffffffffffffl;
-        s = 64 - s;
-        if (j + 1 >= max)
+        s = 64U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 64 <= DIGIT_BIT) {
-            s += 64;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 64U) <= (word32)DIGIT_BIT) {
+            s += 64U;
             r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 64) {
             r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
+            }
             s = 64 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -140,16 +165,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 256
  *
  * r  A single precision integer.
@@ -157,25 +184,17 @@
  */
 static void sp_2048_to_bin(sp_digit* r, byte* a)
 {
-    int i, j, s = 0, b;
-
-    j = 2048 / 8 - 1;
-    a[j] = 0;
-    for (i=0; i<32 && j>=0; i++) {
-        b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
-            break;
-        while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
-        }
-        s = 8 - (b - 64);
-        if (j >= 0)
-            a[j] = 0;
-        if (s != 0)
-            j++;
+    int i, j;
+
+    for (i = 31, j = 0; i >= 0; i--) {
+        a[j++] = r[i] >> 56;
+        a[j++] = r[i] >> 48;
+        a[j++] = r[i] >> 40;
+        a[j++] = r[i] >> 32;
+        a[j++] = r[i] >> 24;
+        a[j++] = r[i] >> 16;
+        a[j++] = r[i] >> 8;
+        a[j++] = r[i] >> 0;
     }
 }
 
@@ -191,414 +210,420 @@
     sp_digit tmp[8];
 
     __asm__ __volatile__ (
-        "ldp	x8, x9, [%[a], 0]\n\t"
-        "ldp	x10, x11, [%[a], 16]\n\t"
-        "ldp	x12, x13, [%[a], 32]\n\t"
-        "ldp	x14, x15, [%[a], 48]\n\t"
-        "ldp	x16, x17, [%[b], 0]\n\t"
-        "ldp	x18, x19, [%[b], 16]\n\t"
-        "ldp	x20, x21, [%[b], 32]\n\t"
-        "ldp	x22, x23, [%[b], 48]\n\t"
+        "ldp	x9, x10, [%[a], 0]\n\t"
+        "ldp	x11, x12, [%[a], 16]\n\t"
+        "ldp	x13, x14, [%[a], 32]\n\t"
+        "ldp	x15, x16, [%[a], 48]\n\t"
+        "ldp	x17, x19, [%[b], 0]\n\t"
+        "ldp	x20, x21, [%[b], 16]\n\t"
+        "ldp	x22, x23, [%[b], 32]\n\t"
+        "ldp	x24, x25, [%[b], 48]\n\t"
         "#  A[0] * B[0]\n\t"
-        "mul	x3, x8, x16\n\t"
-        "umulh	x4, x8, x16\n\t"
-        "str	x3, [%[tmp]]\n\t"
+        "mul	x4, x9, x17\n\t"
+        "umulh	x5, x9, x17\n\t"
+        "str	x4, [%[tmp]]\n\t"
         "#  A[0] * B[1]\n\t"
-        "mul	x6, x8, x17\n\t"
-        "umulh	x7, x8, x17\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adc	x5, xzr, x7\n\t"
+        "mul	x7, x9, x19\n\t"
+        "umulh	x8, x9, x19\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[0]\n\t"
-        "mul	x6, x9, x16\n\t"
-        "umulh	x7, x9, x16\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
-        "str	x4, [%[tmp], 8]\n\t"
+        "mul	x7, x10, x17\n\t"
+        "adc	x6, xzr, x8\n\t"
+        "umulh	x8, x10, x17\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 8]\n\t"
+        "adc	x4, xzr, xzr\n\t"
         "#  A[0] * B[2]\n\t"
-        "mul	x6, x8, x18\n\t"
-        "umulh	x7, x8, x18\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "mul	x7, x9, x20\n\t"
+        "umulh	x8, x9, x20\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[1] * B[1]\n\t"
-        "mul	x6, x9, x17\n\t"
-        "umulh	x7, x9, x17\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x10, x19\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x10, x19\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[0]\n\t"
-        "mul	x6, x10, x16\n\t"
-        "umulh	x7, x10, x16\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 16]\n\t"
+        "mul	x7, x11, x17\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x11, x17\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 16]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[0] * B[3]\n\t"
-        "mul	x6, x8, x19\n\t"
-        "umulh	x7, x8, x19\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "mul	x7, x9, x21\n\t"
+        "umulh	x8, x9, x21\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[1] * B[2]\n\t"
-        "mul	x6, x9, x18\n\t"
-        "umulh	x7, x9, x18\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x10, x20\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x10, x20\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[2] * B[1]\n\t"
-        "mul	x6, x10, x17\n\t"
-        "umulh	x7, x10, x17\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x11, x19\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x11, x19\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[0]\n\t"
-        "mul	x6, x11, x16\n\t"
-        "umulh	x7, x11, x16\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[tmp], 24]\n\t"
+        "mul	x7, x12, x17\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x12, x17\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[tmp], 24]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[0] * B[4]\n\t"
-        "mul	x6, x8, x20\n\t"
-        "umulh	x7, x8, x20\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "mul	x7, x9, x22\n\t"
+        "umulh	x8, x9, x22\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[3]\n\t"
-        "mul	x6, x9, x19\n\t"
-        "umulh	x7, x9, x19\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x10, x21\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x10, x21\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[2] * B[2]\n\t"
-        "mul	x6, x10, x18\n\t"
-        "umulh	x7, x10, x18\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x11, x20\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x11, x20\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[3] * B[1]\n\t"
-        "mul	x6, x11, x17\n\t"
-        "umulh	x7, x11, x17\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x12, x19\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x12, x19\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[0]\n\t"
-        "mul	x6, x12, x16\n\t"
-        "umulh	x7, x12, x16\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 32]\n\t"
+        "mul	x7, x13, x17\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x13, x17\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 32]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[0] * B[5]\n\t"
-        "mul	x6, x8, x21\n\t"
-        "umulh	x7, x8, x21\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "mul	x7, x9, x23\n\t"
+        "umulh	x8, x9, x23\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[1] * B[4]\n\t"
-        "mul	x6, x9, x20\n\t"
-        "umulh	x7, x9, x20\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x10, x22\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x10, x22\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[3]\n\t"
-        "mul	x6, x10, x19\n\t"
-        "umulh	x7, x10, x19\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x11, x21\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x11, x21\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[3] * B[2]\n\t"
-        "mul	x6, x11, x18\n\t"
-        "umulh	x7, x11, x18\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x12, x20\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x12, x20\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[4] * B[1]\n\t"
-        "mul	x6, x12, x17\n\t"
-        "umulh	x7, x12, x17\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x13, x19\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x13, x19\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[0]\n\t"
-        "mul	x6, x13, x16\n\t"
-        "umulh	x7, x13, x16\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 40]\n\t"
+        "mul	x7, x14, x17\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x14, x17\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 40]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[0] * B[6]\n\t"
-        "mul	x6, x8, x22\n\t"
-        "umulh	x7, x8, x22\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "mul	x7, x9, x24\n\t"
+        "umulh	x8, x9, x24\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[1] * B[5]\n\t"
-        "mul	x6, x9, x21\n\t"
-        "umulh	x7, x9, x21\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x10, x23\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x10, x23\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[2] * B[4]\n\t"
-        "mul	x6, x10, x20\n\t"
-        "umulh	x7, x10, x20\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x11, x22\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x11, x22\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[3]\n\t"
-        "mul	x6, x11, x19\n\t"
-        "umulh	x7, x11, x19\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x12, x21\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x12, x21\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[4] * B[2]\n\t"
-        "mul	x6, x12, x18\n\t"
-        "umulh	x7, x12, x18\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x13, x20\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x13, x20\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[5] * B[1]\n\t"
-        "mul	x6, x13, x17\n\t"
-        "umulh	x7, x13, x17\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x14, x19\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x14, x19\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[0]\n\t"
-        "mul	x6, x14, x16\n\t"
-        "umulh	x7, x14, x16\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[tmp], 48]\n\t"
+        "mul	x7, x15, x17\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x15, x17\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[tmp], 48]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[0] * B[7]\n\t"
-        "mul	x6, x8, x23\n\t"
-        "umulh	x7, x8, x23\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "mul	x7, x9, x25\n\t"
+        "umulh	x8, x9, x25\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[6]\n\t"
-        "mul	x6, x9, x22\n\t"
-        "umulh	x7, x9, x22\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x10, x24\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x10, x24\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[2] * B[5]\n\t"
-        "mul	x6, x10, x21\n\t"
-        "umulh	x7, x10, x21\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x11, x23\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x11, x23\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[3] * B[4]\n\t"
-        "mul	x6, x11, x20\n\t"
-        "umulh	x7, x11, x20\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x12, x22\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x12, x22\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[3]\n\t"
-        "mul	x6, x12, x19\n\t"
-        "umulh	x7, x12, x19\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x13, x21\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x13, x21\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[5] * B[2]\n\t"
-        "mul	x6, x13, x18\n\t"
-        "umulh	x7, x13, x18\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x14, x20\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x14, x20\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[6] * B[1]\n\t"
-        "mul	x6, x14, x17\n\t"
-        "umulh	x7, x14, x17\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x15, x19\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x15, x19\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[0]\n\t"
-        "mul	x6, x15, x16\n\t"
-        "umulh	x7, x15, x16\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 56]\n\t"
+        "mul	x7, x16, x17\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x16, x17\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 56]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[1] * B[7]\n\t"
-        "mul	x6, x9, x23\n\t"
-        "umulh	x7, x9, x23\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "mul	x7, x10, x25\n\t"
+        "umulh	x8, x10, x25\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[6]\n\t"
-        "mul	x6, x10, x22\n\t"
-        "umulh	x7, x10, x22\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x11, x24\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x11, x24\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[3] * B[5]\n\t"
-        "mul	x6, x11, x21\n\t"
-        "umulh	x7, x11, x21\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x12, x23\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x12, x23\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[4] * B[4]\n\t"
-        "mul	x6, x12, x20\n\t"
-        "umulh	x7, x12, x20\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x13, x22\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x13, x22\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[3]\n\t"
-        "mul	x6, x13, x19\n\t"
-        "umulh	x7, x13, x19\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x14, x21\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x14, x21\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[6] * B[2]\n\t"
-        "mul	x6, x14, x18\n\t"
-        "umulh	x7, x14, x18\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x15, x20\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x15, x20\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[7] * B[1]\n\t"
-        "mul	x6, x15, x17\n\t"
-        "umulh	x7, x15, x17\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[r], 64]\n\t"
+        "mul	x7, x16, x19\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x16, x19\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[r], 64]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[2] * B[7]\n\t"
-        "mul	x6, x10, x23\n\t"
-        "umulh	x7, x10, x23\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "mul	x7, x11, x25\n\t"
+        "umulh	x8, x11, x25\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[6]\n\t"
-        "mul	x6, x11, x22\n\t"
-        "umulh	x7, x11, x22\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x12, x24\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x12, x24\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[4] * B[5]\n\t"
-        "mul	x6, x12, x21\n\t"
-        "umulh	x7, x12, x21\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x13, x23\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x13, x23\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[5] * B[4]\n\t"
-        "mul	x6, x13, x20\n\t"
-        "umulh	x7, x13, x20\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x14, x22\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x14, x22\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[3]\n\t"
-        "mul	x6, x14, x19\n\t"
-        "umulh	x7, x14, x19\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x15, x21\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x15, x21\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[7] * B[2]\n\t"
-        "mul	x6, x15, x18\n\t"
-        "umulh	x7, x15, x18\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[r], 72]\n\t"
+        "mul	x7, x16, x20\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x16, x20\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 72]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[3] * B[7]\n\t"
-        "mul	x6, x11, x23\n\t"
-        "umulh	x7, x11, x23\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "mul	x7, x12, x25\n\t"
+        "umulh	x8, x12, x25\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[6]\n\t"
-        "mul	x6, x12, x22\n\t"
-        "umulh	x7, x12, x22\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x13, x24\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x13, x24\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[5] * B[5]\n\t"
-        "mul	x6, x13, x21\n\t"
-        "umulh	x7, x13, x21\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x14, x23\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x14, x23\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[6] * B[4]\n\t"
-        "mul	x6, x14, x20\n\t"
-        "umulh	x7, x14, x20\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x7, x15, x22\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x15, x22\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[3]\n\t"
-        "mul	x6, x15, x19\n\t"
-        "umulh	x7, x15, x19\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 80]\n\t"
+        "mul	x7, x16, x21\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x16, x21\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[r], 80]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[4] * B[7]\n\t"
-        "mul	x6, x12, x23\n\t"
-        "umulh	x7, x12, x23\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "mul	x7, x13, x25\n\t"
+        "umulh	x8, x13, x25\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[6]\n\t"
-        "mul	x6, x13, x22\n\t"
-        "umulh	x7, x13, x22\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x14, x24\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x14, x24\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[6] * B[5]\n\t"
-        "mul	x6, x14, x21\n\t"
-        "umulh	x7, x14, x21\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x15, x23\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x15, x23\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[7] * B[4]\n\t"
-        "mul	x6, x15, x20\n\t"
-        "umulh	x7, x15, x20\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[r], 88]\n\t"
+        "mul	x7, x16, x22\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x16, x22\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[r], 88]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[5] * B[7]\n\t"
-        "mul	x6, x13, x23\n\t"
-        "umulh	x7, x13, x23\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "mul	x7, x14, x25\n\t"
+        "umulh	x8, x14, x25\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[6]\n\t"
-        "mul	x6, x14, x22\n\t"
-        "umulh	x7, x14, x22\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul	x7, x15, x24\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x15, x24\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[7] * B[5]\n\t"
-        "mul	x6, x15, x21\n\t"
-        "umulh	x7, x15, x21\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[r], 96]\n\t"
+        "mul	x7, x16, x23\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x16, x23\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 96]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[6] * B[7]\n\t"
-        "mul	x6, x14, x23\n\t"
-        "umulh	x7, x14, x23\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "mul	x7, x15, x25\n\t"
+        "umulh	x8, x15, x25\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[6]\n\t"
-        "mul	x6, x15, x22\n\t"
-        "umulh	x7, x15, x22\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 104]\n\t"
+        "mul	x7, x16, x24\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x16, x24\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[r], 104]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[7] * B[7]\n\t"
-        "mul	x6, x15, x23\n\t"
-        "umulh	x7, x15, x23\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adc	x3, x3, x7\n\t"
-        "stp	x5, x3, [%[r], 112]\n\t"
+        "mul	x7, x16, x25\n\t"
+        "umulh	x8, x16, x25\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adc	x4, x4, x8\n\t"
+        "stp	x6, x4, [%[r], 112]\n\t"
+        "ldp	x9, x10, [%[tmp], 0]\n\t"
+        "ldp	x11, x12, [%[tmp], 16]\n\t"
+        "ldp	x13, x14, [%[tmp], 32]\n\t"
+        "ldp	x15, x16, [%[tmp], 48]\n\t"
+        "stp	x9, x10, [%[r], 0]\n\t"
+        "stp	x11, x12, [%[r], 16]\n\t"
+        "stp	x13, x14, [%[r], 32]\n\t"
+        "stp	x15, x16, [%[r], 48]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25"
+    );
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -608,308 +633,238 @@
  */
 static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
-        "ldp	x10, x11, [%[a], 0]\n\t"
-        "ldp	x12, x13, [%[a], 16]\n\t"
-        "ldp	x14, x15, [%[a], 32]\n\t"
-        "ldp	x16, x17, [%[a], 48]\n\t"
-        "#  A[0] * A[0]\n\t"
-        "mul	x2, x10, x10\n\t"
-        "umulh	x3, x10, x10\n\t"
-        "str	x2, [%[tmp]]\n\t"
-        "mov	x4, 0\n\t"
+    __asm__ __volatile__ (
+        "ldp       x22, x23, [%[a], 0]\n\t"
+        "ldp       x24, x25, [%[a], 16]\n\t"
+        "ldp       x26, x27, [%[a], 32]\n\t"
+        "ldp       x28, x29, [%[a], 48]\n\t"
         "#  A[0] * A[1]\n\t"
-        "mul	x8, x10, x11\n\t"
-        "umulh	x9, x10, x11\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, xzr, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[tmp], 8]\n\t"
+        "mul	x3, x22, x23\n\t"
+        "umulh	x7, x22, x23\n\t"
         "#  A[0] * A[2]\n\t"
-        "mul	x8, x10, x12\n\t"
-        "umulh	x9, x10, x12\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, xzr, xzr\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "#  A[1] * A[1]\n\t"
-        "mul	x8, x11, x11\n\t"
-        "umulh	x9, x11, x11\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 16]\n\t"
+        "mul	x4, x22, x24\n\t"
+        "umulh	x5, x22, x24\n\t"
+        "adds	x7, x7, x4\n\t"
         "#  A[0] * A[3]\n\t"
-        "mul	x8, x10, x13\n\t"
-        "umulh	x9, x10, x13\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, xzr, xzr\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x4, x22, x25\n\t"
+        "adc	x8, xzr, x5\n\t"
+        "umulh	x5, x22, x25\n\t"
+        "adds	x8, x8, x4\n\t"
         "#  A[1] * A[2]\n\t"
-        "mul	x8, x11, x12\n\t"
-        "umulh	x9, x11, x12\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x2, [%[tmp], 24]\n\t"
+        "mul	x4, x23, x24\n\t"
+        "adc	x9, xzr, x5\n\t"
+        "umulh	x5, x23, x24\n\t"
+        "adds	x8, x8, x4\n\t"
         "#  A[0] * A[4]\n\t"
-        "mul	x8, x10, x14\n\t"
-        "umulh	x9, x10, x14\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, xzr, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
+        "mul	x4, x22, x26\n\t"
+        "adcs	x9, x9, x5\n\t"
+        "umulh	x5, x22, x26\n\t"
+        "adc	x10, xzr, xzr\n\t"
+        "adds	x9, x9, x4\n\t"
         "#  A[1] * A[3]\n\t"
-        "mul	x8, x11, x13\n\t"
-        "umulh	x9, x11, x13\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "#  A[2] * A[2]\n\t"
-        "mul	x8, x12, x12\n\t"
-        "umulh	x9, x12, x12\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[tmp], 32]\n\t"
+        "mul	x4, x23, x25\n\t"
+        "adc	x10, x10, x5\n\t"
+        "umulh	x5, x23, x25\n\t"
+        "adds	x9, x9, x4\n\t"
         "#  A[0] * A[5]\n\t"
-        "mul	x5, x10, x15\n\t"
-        "umulh	x6, x10, x15\n\t"
-        "mov	x3, 0\n\t"
-        "mov	x7, 0\n\t"
+        "mul	x4, x22, x27\n\t"
+        "adcs	x10, x10, x5\n\t"
+        "umulh	x5, x22, x27\n\t"
+        "adc	x11, xzr, xzr\n\t"
+        "adds	x10, x10, x4\n\t"
         "#  A[1] * A[4]\n\t"
-        "mul	x8, x11, x14\n\t"
-        "umulh	x9, x11, x14\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x4, x23, x26\n\t"
+        "adc	x11, x11, x5\n\t"
+        "umulh	x5, x23, x26\n\t"
+        "adds	x10, x10, x4\n\t"
         "#  A[2] * A[3]\n\t"
-        "mul	x8, x12, x13\n\t"
-        "umulh	x9, x12, x13\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "adds	x5, x5, x5\n\t"
-        "adcs	x6, x6, x6\n\t"
-        "adc	x7, x7, x7\n\t"
-        "adds	x4, x4, x5\n\t"
-        "adcs	x2, x2, x6\n\t"
-        "adc	x3, x3, x7\n\t"
-        "str	x4, [%[tmp], 40]\n\t"
+        "mul	x4, x24, x25\n\t"
+        "adcs	x11, x11, x5\n\t"
+        "umulh	x5, x24, x25\n\t"
+        "adc	x12, xzr, xzr\n\t"
+        "adds	x10, x10, x4\n\t"
         "#  A[0] * A[6]\n\t"
-        "mul	x5, x10, x16\n\t"
-        "umulh	x6, x10, x16\n\t"
-        "mov	x4, 0\n\t"
-        "mov	x7, 0\n\t"
+        "mul	x4, x22, x28\n\t"
+        "adcs	x11, x11, x5\n\t"
+        "umulh	x5, x22, x28\n\t"
+        "adc	x12, x12, xzr\n\t"
+        "adds	x11, x11, x4\n\t"
         "#  A[1] * A[5]\n\t"
-        "mul	x8, x11, x15\n\t"
-        "umulh	x9, x11, x15\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x4, x23, x27\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x23, x27\n\t"
+        "adc	x13, xzr, xzr\n\t"
+        "adds	x11, x11, x4\n\t"
         "#  A[2] * A[4]\n\t"
-        "mul	x8, x12, x14\n\t"
-        "umulh	x9, x12, x14\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "#  A[3] * A[3]\n\t"
-        "mul	x8, x13, x13\n\t"
-        "umulh	x9, x13, x13\n\t"
-        "adds	x5, x5, x5\n\t"
-        "adcs	x6, x6, x6\n\t"
-        "adc	x7, x7, x7\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "adds	x2, x2, x5\n\t"
-        "adcs	x3, x3, x6\n\t"
-        "adc	x4, x4, x7\n\t"
-        "str	x2, [%[tmp], 48]\n\t"
+        "mul	x4, x24, x26\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x24, x26\n\t"
+        "adc	x13, x13, xzr\n\t"
+        "adds	x11, x11, x4\n\t"
         "#  A[0] * A[7]\n\t"
-        "mul	x5, x10, x17\n\t"
-        "umulh	x6, x10, x17\n\t"
-        "mov	x2, 0\n\t"
-        "mov	x7, 0\n\t"
+        "mul	x4, x22, x29\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x22, x29\n\t"
+        "adc	x13, x13, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
         "#  A[1] * A[6]\n\t"
-        "mul	x8, x11, x16\n\t"
-        "umulh	x9, x11, x16\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x4, x23, x28\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "umulh	x5, x23, x28\n\t"
+        "adc	x14, xzr, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
         "#  A[2] * A[5]\n\t"
-        "mul	x8, x12, x15\n\t"
-        "umulh	x9, x12, x15\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x4, x24, x27\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "umulh	x5, x24, x27\n\t"
+        "adc	x14, x14, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
         "#  A[3] * A[4]\n\t"
-        "mul	x8, x13, x14\n\t"
-        "umulh	x9, x13, x14\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "adds	x5, x5, x5\n\t"
-        "adcs	x6, x6, x6\n\t"
-        "adc	x7, x7, x7\n\t"
-        "adds	x3, x3, x5\n\t"
-        "adcs	x4, x4, x6\n\t"
-        "adc	x2, x2, x7\n\t"
-        "str	x3, [%[tmp], 56]\n\t"
+        "mul	x4, x25, x26\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "umulh	x5, x25, x26\n\t"
+        "adc	x14, x14, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
         "#  A[1] * A[7]\n\t"
-        "mul	x5, x11, x17\n\t"
-        "umulh	x6, x11, x17\n\t"
-        "mov	x3, 0\n\t"
-        "mov	x7, 0\n\t"
+        "mul	x4, x23, x29\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "umulh	x5, x23, x29\n\t"
+        "adc	x14, x14, xzr\n\t"
+        "adds	x13, x13, x4\n\t"
         "#  A[2] * A[6]\n\t"
-        "mul	x8, x12, x16\n\t"
-        "umulh	x9, x12, x16\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x4, x24, x28\n\t"
+        "adcs	x14, x14, x5\n\t"
+        "umulh	x5, x24, x28\n\t"
+        "adc	x15, xzr, xzr\n\t"
+        "adds	x13, x13, x4\n\t"
         "#  A[3] * A[5]\n\t"
-        "mul	x8, x13, x15\n\t"
-        "umulh	x9, x13, x15\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "#  A[4] * A[4]\n\t"
-        "mul	x8, x14, x14\n\t"
-        "umulh	x9, x14, x14\n\t"
-        "adds	x5, x5, x5\n\t"
-        "adcs	x6, x6, x6\n\t"
-        "adc	x7, x7, x7\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "adds	x4, x4, x5\n\t"
-        "adcs	x2, x2, x6\n\t"
-        "adc	x3, x3, x7\n\t"
-        "str	x4, [%[r], 64]\n\t"
+        "mul	x4, x25, x27\n\t"
+        "adcs	x14, x14, x5\n\t"
+        "umulh	x5, x25, x27\n\t"
+        "adc	x15, x15, xzr\n\t"
+        "adds	x13, x13, x4\n\t"
         "#  A[2] * A[7]\n\t"
-        "mul	x5, x12, x17\n\t"
-        "umulh	x6, x12, x17\n\t"
-        "mov	x4, 0\n\t"
-        "mov	x7, 0\n\t"
+        "mul	x4, x24, x29\n\t"
+        "adcs	x14, x14, x5\n\t"
+        "umulh	x5, x24, x29\n\t"
+        "adc	x15, x15, xzr\n\t"
+        "adds	x14, x14, x4\n\t"
         "#  A[3] * A[6]\n\t"
-        "mul	x8, x13, x16\n\t"
-        "umulh	x9, x13, x16\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x4, x25, x28\n\t"
+        "adcs	x15, x15, x5\n\t"
+        "umulh	x5, x25, x28\n\t"
+        "adc	x16, xzr, xzr\n\t"
+        "adds	x14, x14, x4\n\t"
         "#  A[4] * A[5]\n\t"
-        "mul	x8, x14, x15\n\t"
-        "umulh	x9, x14, x15\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
-        "adds	x5, x5, x5\n\t"
-        "adcs	x6, x6, x6\n\t"
-        "adc	x7, x7, x7\n\t"
-        "adds	x2, x2, x5\n\t"
-        "adcs	x3, x3, x6\n\t"
-        "adc	x4, x4, x7\n\t"
-        "str	x2, [%[r], 72]\n\t"
+        "mul	x4, x26, x27\n\t"
+        "adcs	x15, x15, x5\n\t"
+        "umulh	x5, x26, x27\n\t"
+        "adc	x16, x16, xzr\n\t"
+        "adds	x14, x14, x4\n\t"
         "#  A[3] * A[7]\n\t"
-        "mul	x8, x13, x17\n\t"
-        "umulh	x9, x13, x17\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, xzr, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
+        "mul	x4, x25, x29\n\t"
+        "adcs	x15, x15, x5\n\t"
+        "umulh	x5, x25, x29\n\t"
+        "adc	x16, x16, xzr\n\t"
+        "adds	x15, x15, x4\n\t"
         "#  A[4] * A[6]\n\t"
-        "mul	x8, x14, x16\n\t"
-        "umulh	x9, x14, x16\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "#  A[5] * A[5]\n\t"
-        "mul	x8, x15, x15\n\t"
-        "umulh	x9, x15, x15\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[r], 80]\n\t"
+        "mul	x4, x26, x28\n\t"
+        "adcs	x16, x16, x5\n\t"
+        "umulh	x5, x26, x28\n\t"
+        "adc	x17, xzr, xzr\n\t"
+        "adds	x15, x15, x4\n\t"
         "#  A[4] * A[7]\n\t"
-        "mul	x8, x14, x17\n\t"
-        "umulh	x9, x14, x17\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, xzr, xzr\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul	x4, x26, x29\n\t"
+        "adcs	x16, x16, x5\n\t"
+        "umulh	x5, x26, x29\n\t"
+        "adc	x17, x17, xzr\n\t"
+        "adds	x16, x16, x4\n\t"
         "#  A[5] * A[6]\n\t"
-        "mul	x8, x15, x16\n\t"
-        "umulh	x9, x15, x16\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 88]\n\t"
+        "mul	x4, x27, x28\n\t"
+        "adcs	x17, x17, x5\n\t"
+        "umulh	x5, x27, x28\n\t"
+        "adc	x19, xzr, xzr\n\t"
+        "adds	x16, x16, x4\n\t"
         "#  A[5] * A[7]\n\t"
-        "mul	x8, x15, x17\n\t"
-        "umulh	x9, x15, x17\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, xzr, xzr\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x4, x27, x29\n\t"
+        "adcs	x17, x17, x5\n\t"
+        "umulh	x5, x27, x29\n\t"
+        "adc	x19, x19, xzr\n\t"
+        "adds	x17, x17, x4\n\t"
+        "#  A[6] * A[7]\n\t"
+        "mul	x4, x28, x29\n\t"
+        "adcs	x19, x19, x5\n\t"
+        "umulh	x5, x28, x29\n\t"
+        "adc	x20, xzr, xzr\n\t"
+        "adds	x19, x19, x4\n\t"
+        "adc	x20, x20, x5\n\t"
+        "# Double\n\t"
+        "adds	x3, x3, x3\n\t"
+        "adcs	x7, x7, x7\n\t"
+        "adcs	x8, x8, x8\n\t"
+        "adcs	x9, x9, x9\n\t"
+        "adcs	x10, x10, x10\n\t"
+        "adcs	x11, x11, x11\n\t"
+        "adcs	x12, x12, x12\n\t"
+        "adcs	x13, x13, x13\n\t"
+        "adcs	x14, x14, x14\n\t"
+        "adcs	x15, x15, x15\n\t"
+        "adcs	x16, x16, x16\n\t"
+        "adcs	x17, x17, x17\n\t"
+        "adcs	x19, x19, x19\n\t"
+        "#  A[0] * A[0]\n\t"
+        "mul	x2, x22, x22\n\t"
+        "adcs	x20, x20, x20\n\t"
+        "umulh	x4, x22, x22\n\t"
+        "cset  x21, cs\n\t"
+        "#  A[1] * A[1]\n\t"
+        "mul	x5, x23, x23\n\t"
+        "adds	x3, x3, x4\n\t"
+        "umulh	x6, x23, x23\n\t"
+        "adcs	x7, x7, x5\n\t"
+        "#  A[2] * A[2]\n\t"
+        "mul	x4, x24, x24\n\t"
+        "adcs	x8, x8, x6\n\t"
+        "umulh	x5, x24, x24\n\t"
+        "adcs	x9, x9, x4\n\t"
+        "#  A[3] * A[3]\n\t"
+        "mul	x6, x25, x25\n\t"
+        "adcs	x10, x10, x5\n\t"
+        "umulh	x4, x25, x25\n\t"
+        "adcs	x11, x11, x6\n\t"
+        "#  A[4] * A[4]\n\t"
+        "mul	x5, x26, x26\n\t"
+        "adcs	x12, x12, x4\n\t"
+        "umulh	x6, x26, x26\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "#  A[5] * A[5]\n\t"
+        "mul	x4, x27, x27\n\t"
+        "adcs	x14, x14, x6\n\t"
+        "umulh	x5, x27, x27\n\t"
+        "adcs	x15, x15, x4\n\t"
         "#  A[6] * A[6]\n\t"
-        "mul	x8, x16, x16\n\t"
-        "umulh	x9, x16, x16\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x2, [%[r], 96]\n\t"
-        "#  A[6] * A[7]\n\t"
-        "mul	x8, x16, x17\n\t"
-        "umulh	x9, x16, x17\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, xzr, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[r], 104]\n\t"
+        "mul	x6, x28, x28\n\t"
+        "adcs	x16, x16, x5\n\t"
+        "umulh	x4, x28, x28\n\t"
+        "adcs	x17, x17, x6\n\t"
         "#  A[7] * A[7]\n\t"
-        "mul	x8, x17, x17\n\t"
-        "umulh	x9, x17, x17\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adc	x2, x2, x9\n\t"
-        "stp	x4, x2, [%[r], 112]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
+        "mul	x5, x29, x29\n\t"
+        "adcs	x19, x19, x4\n\t"
+        "umulh	x6, x29, x29\n\t"
+        "adcs	x20, x20, x5\n\t"
+        "stp	x2, x3, [%[r], 0]\n\t"
+        "adc	x21, x21, x6\n\t"
+        "stp	x7, x8, [%[r], 16]\n\t"
+        "stp	x9, x10, [%[r], 32]\n\t"
+        "stp	x11, x12, [%[r], 48]\n\t"
+        "stp	x13, x14, [%[r], 64]\n\t"
+        "stp	x15, x16, [%[r], 80]\n\t"
+        "stp	x17, x19, [%[r], 96]\n\t"
+        "stp	x20, x21, [%[r], 112]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "x4", "x5", "x6", "x2", "x3", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29"
+    );
 }
 
 /* Add b to a into r. (r = a + b)
@@ -921,36 +876,34 @@
 static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 48]\n\t"
-        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 48]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 48]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 /* Sub b from a into a. (a -= b)
@@ -960,56 +913,54 @@
  */
 static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x2, x3, [%[a], 0]\n\t"
-        "ldp	x4, x5, [%[a], 16]\n\t"
         "ldp	x6, x7, [%[b], 0]\n\t"
-        "ldp	x8, x9, [%[b], 16]\n\t"
         "subs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 16]\n\t"
         "ldp	x2, x3, [%[a], 32]\n\t"
+        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 48]\n\t"
-        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 48]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 32]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 48]\n\t"
         "ldp	x2, x3, [%[a], 64]\n\t"
+        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 80]\n\t"
-        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 80]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 64]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 80]\n\t"
         "ldp	x2, x3, [%[a], 96]\n\t"
+        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 112]\n\t"
-        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 112]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 96]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 112]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
         : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 
-    return c;
+    return (sp_digit)a;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -1021,56 +972,54 @@
 static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 48]\n\t"
-        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 48]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 48]\n\t"
         "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 80]\n\t"
-        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 80]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 80]\n\t"
         "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 112]\n\t"
-        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 112]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 112]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 /* AND m into each word of a and store in r.
@@ -1079,13 +1028,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<8; i++)
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<8; i++) {
         r[i] = a[i] & m;
+    }
 #else
     r[0] = a[0] & m;
     r[1] = a[1] & m;
@@ -1098,13 +1048,45 @@
 #endif
 }
 
+/* Add digit to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_2048_add_zero_8(sp_digit* r, const sp_digit* a,
+        const sp_digit d)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adds	x3, x3, %[d]\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+}
+
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -1126,17 +1108,86 @@
     u += sp_2048_sub_in_place_16(z1, z2);
     u += sp_2048_sub_in_place_16(z1, z0);
     u += sp_2048_add_16(r + 8, r + 8, z1);
-    r[24] = u;
-    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
-    sp_2048_add_16(r + 16, r + 16, z2);
-}
-
+    u += sp_2048_add_8(r + 16, r + 16, z2);
+    sp_2048_add_zero_8(r + 24, z2 + 8, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 64\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "cset	%[c], cs\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "adds	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 16]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 24]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 48]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 56]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[16];
@@ -1149,13 +1200,13 @@
     sp_2048_sqr_8(z2, &a[8]);
     sp_2048_sqr_8(z0, a);
     sp_2048_mask_8(r + 16, a1, 0 - u);
-    u += sp_2048_add_8(r + 16, r + 16, r + 16);
+    u += sp_2048_dbl_8(r + 16, r + 16);
     u += sp_2048_sub_in_place_16(z1, z2);
     u += sp_2048_sub_in_place_16(z1, z0);
     u += sp_2048_add_16(r + 8, r + 8, z1);
-    r[24] = u;
-    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
-    sp_2048_add_16(r + 16, r + 16, z2);
+    u += sp_2048_add_8(r + 16, r + 16, z2);
+    sp_2048_add_zero_8(r + 24, z2 + 8, u);
+    
 }
 
 /* Sub b from a into a. (a -= b)
@@ -1165,96 +1216,94 @@
  */
 static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x2, x3, [%[a], 0]\n\t"
-        "ldp	x4, x5, [%[a], 16]\n\t"
         "ldp	x6, x7, [%[b], 0]\n\t"
-        "ldp	x8, x9, [%[b], 16]\n\t"
         "subs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 16]\n\t"
         "ldp	x2, x3, [%[a], 32]\n\t"
+        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 48]\n\t"
-        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 48]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 32]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 48]\n\t"
         "ldp	x2, x3, [%[a], 64]\n\t"
+        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 80]\n\t"
-        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 80]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 64]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 80]\n\t"
         "ldp	x2, x3, [%[a], 96]\n\t"
+        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 112]\n\t"
-        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 112]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 96]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 112]\n\t"
         "ldp	x2, x3, [%[a], 128]\n\t"
+        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 144]\n\t"
-        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 144]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 128]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 144]\n\t"
         "ldp	x2, x3, [%[a], 160]\n\t"
+        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 176]\n\t"
-        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 176]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 160]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 176]\n\t"
         "ldp	x2, x3, [%[a], 192]\n\t"
+        "ldp	x6, x7, [%[b], 192]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 208]\n\t"
-        "ldp	x6, x7, [%[b], 192]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 208]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 192]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 208]\n\t"
         "ldp	x2, x3, [%[a], 224]\n\t"
+        "ldp	x6, x7, [%[b], 224]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 240]\n\t"
-        "ldp	x6, x7, [%[b], 224]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 240]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 224]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 240]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
         : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 
-    return c;
+    return (sp_digit)a;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -1266,96 +1315,94 @@
 static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 48]\n\t"
-        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 48]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 48]\n\t"
         "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 80]\n\t"
-        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 80]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 80]\n\t"
         "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 112]\n\t"
-        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 112]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 112]\n\t"
         "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 144]\n\t"
-        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 144]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 144]\n\t"
         "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 176]\n\t"
-        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 176]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 176]\n\t"
         "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 208]\n\t"
-        "ldp	x7, x8, [%[b], 192]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 208]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 192]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 208]\n\t"
         "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 240]\n\t"
-        "ldp	x7, x8, [%[b], 224]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 240]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 224]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 240]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 /* AND m into each word of a and store in r.
@@ -1364,13 +1411,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_16(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<16; i++)
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<16; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -1387,13 +1435,61 @@
 #endif
 }
 
+/* Add digit to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_2048_add_zero_16(sp_digit* r, const sp_digit* a,
+        const sp_digit d)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adds	x3, x3, %[d]\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+}
+
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -1415,17 +1511,104 @@
     u += sp_2048_sub_in_place_32(z1, z2);
     u += sp_2048_sub_in_place_32(z1, z0);
     u += sp_2048_add_32(r + 16, r + 16, z1);
-    r[48] = u;
-    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
-}
-
+    u += sp_2048_add_16(r + 32, r + 32, z2);
+    sp_2048_add_zero_16(r + 48, z2 + 16, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 128\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "cset	%[c], cs\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "adds	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 16]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 24]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 48]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 56]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 80]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 88]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 112]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 120]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[32];
@@ -1438,16 +1621,16 @@
     sp_2048_sqr_16(z2, &a[16]);
     sp_2048_sqr_16(z0, a);
     sp_2048_mask_16(r + 32, a1, 0 - u);
-    u += sp_2048_add_16(r + 32, r + 32, r + 32);
+    u += sp_2048_dbl_16(r + 32, r + 32);
     u += sp_2048_sub_in_place_32(z1, z2);
     u += sp_2048_sub_in_place_32(z1, z0);
     u += sp_2048_add_32(r + 16, r + 16, z1);
-    r[48] = u;
-    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    u += sp_2048_add_16(r + 32, r + 32, z2);
+    sp_2048_add_zero_16(r + 48, z2 + 16, u);
+    
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -1467,12 +1650,12 @@
         "ldp	x3, x4, [%[a]], #16\n\t"
         "ldp	x5, x6, [%[a]], #16\n\t"
         "ldp	x7, x8, [%[b]], #16\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x9, x10, [%[b]], #16\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r]], #16\n\t"
         "cset	%[c], cs\n\t"
         "cmp	%[a], x11\n\t"
@@ -1503,12 +1686,12 @@
         "ldp	x2, x3, [%[a]]\n\t"
         "ldp	x4, x5, [%[a], #16]\n\t"
         "ldp	x6, x7, [%[b]], #16\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x8, x9, [%[b]], #16\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a]], #16\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a]], #16\n\t"
         "csetm	%[c], cc\n\t"
         "cmp	%[a], x10\n\t"
@@ -1639,7 +1822,7 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* AND m into each word of a and store in r.
  *
@@ -1647,12 +1830,13 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_16(sp_digit* r, sp_digit* a, sp_digit m)
-{
-    int i;
-
-    for (i=0; i<16; i++)
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    int i;
+
+    for (i=0; i<16; i++) {
         r[i] = a[i] & m;
+    }
 }
 
 #endif /* WOLFSSL_SP_SMALL */
@@ -1675,12 +1859,12 @@
         "ldp	x3, x4, [%[a]], #16\n\t"
         "ldp	x5, x6, [%[a]], #16\n\t"
         "ldp	x7, x8, [%[b]], #16\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x9, x10, [%[b]], #16\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r]], #16\n\t"
         "cset	%[c], cs\n\t"
         "cmp	%[a], x11\n\t"
@@ -1711,12 +1895,12 @@
         "ldp	x2, x3, [%[a]]\n\t"
         "ldp	x4, x5, [%[a], #16]\n\t"
         "ldp	x6, x7, [%[b]], #16\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x8, x9, [%[b]], #16\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a]], #16\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a]], #16\n\t"
         "csetm	%[c], cc\n\t"
         "cmp	%[a], x10\n\t"
@@ -1847,14 +2031,14 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -1869,14 +2053,328 @@
     *rho = -x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldr	x8, [%[a]]\n\t"
+        "mul	x5, %[b], x8\n\t"
+        "umulh	x3, %[b], x8\n\t"
+        "mov	x4, 0\n\t"
+        "str	x5, [%[r]]\n\t"
+        "mov	x5, 0\n\t"
+        "mov	x9, #8\n\t"
+        "1:\n\t"
+        "ldr	x8, [%[a], x9]\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "str	x3, [%[r], x9]\n\t"
+        "mov	x3, x4\n\t"
+        "mov	x4, x5\n\t"
+        "mov	x5, #0\n\t"
+        "add	x9, x9, #8\n\t"
+        "cmp	x9, 256\n\t"
+        "b.lt	1b\n\t"
+        "str	x3, [%[r], 256]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#else
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldp	x8, x9, [%[a]]\n\t"
+        "mul	x3, %[b], x8\n\t"
+        "umulh	x4, %[b], x8\n\t"
+        "mov	x5, 0\n\t"
+        "# A[1] * B\n\t"
+        "str	x3, [%[r]]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[2] * B\n\t"
+        "ldp	x8, x9, [%[a], 16]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[3] * B\n\t"
+        "str	x5, [%[r], 16]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[4] * B\n\t"
+        "ldp	x8, x9, [%[a], 32]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[5] * B\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[6] * B\n\t"
+        "ldp	x8, x9, [%[a], 48]\n\t"
+        "str	x5, [%[r], 40]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[7] * B\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[8] * B\n\t"
+        "ldp	x8, x9, [%[a], 64]\n\t"
+        "str	x4, [%[r], 56]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[9] * B\n\t"
+        "str	x5, [%[r], 64]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[10] * B\n\t"
+        "ldp	x8, x9, [%[a], 80]\n\t"
+        "str	x3, [%[r], 72]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[11] * B\n\t"
+        "str	x4, [%[r], 80]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[12] * B\n\t"
+        "ldp	x8, x9, [%[a], 96]\n\t"
+        "str	x5, [%[r], 88]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[13] * B\n\t"
+        "str	x3, [%[r], 96]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[14] * B\n\t"
+        "ldp	x8, x9, [%[a], 112]\n\t"
+        "str	x4, [%[r], 104]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[15] * B\n\t"
+        "str	x5, [%[r], 112]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[16] * B\n\t"
+        "ldp	x8, x9, [%[a], 128]\n\t"
+        "str	x3, [%[r], 120]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[17] * B\n\t"
+        "str	x4, [%[r], 128]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[18] * B\n\t"
+        "ldp	x8, x9, [%[a], 144]\n\t"
+        "str	x5, [%[r], 136]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[19] * B\n\t"
+        "str	x3, [%[r], 144]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[20] * B\n\t"
+        "ldp	x8, x9, [%[a], 160]\n\t"
+        "str	x4, [%[r], 152]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[21] * B\n\t"
+        "str	x5, [%[r], 160]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[22] * B\n\t"
+        "ldp	x8, x9, [%[a], 176]\n\t"
+        "str	x3, [%[r], 168]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[23] * B\n\t"
+        "str	x4, [%[r], 176]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[24] * B\n\t"
+        "ldp	x8, x9, [%[a], 192]\n\t"
+        "str	x5, [%[r], 184]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[25] * B\n\t"
+        "str	x3, [%[r], 192]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[26] * B\n\t"
+        "ldp	x8, x9, [%[a], 208]\n\t"
+        "str	x4, [%[r], 200]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[27] * B\n\t"
+        "str	x5, [%[r], 208]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[28] * B\n\t"
+        "ldp	x8, x9, [%[a], 224]\n\t"
+        "str	x3, [%[r], 216]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[29] * B\n\t"
+        "str	x4, [%[r], 224]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[30] * B\n\t"
+        "ldp	x8, x9, [%[a], 240]\n\t"
+        "str	x5, [%[r], 232]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[31] * B\n\t"
+        "str	x3, [%[r], 240]\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "adc	x5, x5, x7\n\t"
+        "stp	x4, x5, [%[r], 248]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_16(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 16);
 
@@ -1892,12 +2390,12 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_2048_cond_sub_16(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
+#ifdef WOLFSSL_SP_SMALL
     sp_digit c = 0;
 
-#ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	x8, #0\n\t"
         "1:\n\t"
@@ -1913,99 +2411,77 @@
         "b.lt	1b\n\t"
         : [c] "+r" (c)
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#else
-    __asm__ __volatile__ (
-
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x6, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "ldr		x7, [%[b], 8]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "subs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 0]\n\t"
-        "str		x6, [%[r], 8]\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x6, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "ldr		x7, [%[b], 24]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 16]\n\t"
-        "str		x6, [%[r], 24]\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x6, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "ldr		x7, [%[b], 40]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 32]\n\t"
-        "str		x6, [%[r], 40]\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x6, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "ldr		x7, [%[b], 56]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 48]\n\t"
-        "str		x6, [%[r], 56]\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x6, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "ldr		x7, [%[b], 72]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 64]\n\t"
-        "str		x6, [%[r], 72]\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x6, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "ldr		x7, [%[b], 88]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 80]\n\t"
-        "str		x6, [%[r], 88]\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x6, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "ldr		x7, [%[b], 104]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 96]\n\t"
-        "str		x6, [%[r], 104]\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x6, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "ldr		x7, [%[b], 120]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 112]\n\t"
-        "str		x6, [%[r], 120]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#endif /* WOLFSSL_SP_SMALL */
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
 
     return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "subs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
 }
 
 /* Reduce the number back to 2048 bits using Montgomery reduction.
@@ -2014,179 +2490,175 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
 
     __asm__ __volatile__ (
-        "ldp       x12, x13, [%[m], 0]\n\t"
-        "ldp       x14, x15, [%[m], 16]\n\t"
-        "ldp       x16, x17, [%[m], 32]\n\t"
-        "ldp       x18, x19, [%[m], 48]\n\t"
-        "ldp       x20, x21, [%[m], 64]\n\t"
-        "ldp       x22, x23, [%[m], 80]\n\t"
-        "ldp       x24, x25, [%[m], 96]\n\t"
-        "ldp       x26, x27, [%[m], 112]\n\t"
-        "# i = 0\n\t"
-        "mov	x3, 0\n\t"
-        "ldp	x10, x11, [%[a], 0]\n\t"
+        "ldp	x14, x15, [%[m], 0]\n\t"
+        "ldp	x16, x17, [%[m], 16]\n\t"
+        "ldp	x19, x20, [%[m], 32]\n\t"
+        "ldp	x21, x22, [%[m], 48]\n\t"
+        "ldp	x23, x24, [%[m], 64]\n\t"
+        "ldp	x25, x26, [%[m], 80]\n\t"
+        "ldp	x27, x28, [%[m], 96]\n\t"
+        "# i = 16\n\t"
+        "mov	x4, 16\n\t"
+        "ldp	x12, x13, [%[a], 0]\n\t"
         "\n1:\n\t"
         "# mu = a[i] * mp\n\t"
-        "mul	x8, %[mp], x10\n\t"
+        "mul	x9, %[mp], x12\n\t"
         "# a[i+0] += m[0] * mu\n\t"
-        "ldr	x9, [%[a], 0]\n\t"
-        "mul		x6, x12, x8\n\t"
-        "umulh	x7, x12, x8\n\t"
-        "adds	x10, x10, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x12, x12, x7\n\t"
         "# a[i+1] += m[1] * mu\n\t"
-        "ldr	x9, [%[a], 8]\n\t"
-        "mul		x6, x13, x8\n\t"
-        "umulh	x7, x13, x8\n\t"
-        "adds	x10, x11, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x10, x10, x5\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x12, x13, x7\n\t"
         "# a[i+2] += m[2] * mu\n\t"
-        "ldr	x11, [%[a], 16]\n\t"
-        "mul		x6, x14, x8\n\t"
-        "umulh	x7, x14, x8\n\t"
-        "adds	x11, x11, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x11, x11, x4\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x13, [%[a], 16]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adds	x12, x12, x6\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x13, x13, x7\n\t"
         "# a[i+3] += m[3] * mu\n\t"
-        "ldr	x9, [%[a], 24]\n\t"
-        "mul		x6, x15, x8\n\t"
-        "umulh	x7, x15, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 24]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 24]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adds	x13, x13, x5\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+4] += m[4] * mu\n\t"
-        "ldr	x9, [%[a], 32]\n\t"
-        "mul		x6, x16, x8\n\t"
-        "umulh	x7, x16, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 32]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 32]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "str	x10, [%[a], 24]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+5] += m[5] * mu\n\t"
-        "ldr	x9, [%[a], 40]\n\t"
-        "mul		x6, x17, x8\n\t"
-        "umulh	x7, x17, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 40]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 40]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "str	x11, [%[a], 32]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+6] += m[6] * mu\n\t"
-        "ldr	x9, [%[a], 48]\n\t"
-        "mul		x6, x18, x8\n\t"
-        "umulh	x7, x18, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 48]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 48]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "str	x10, [%[a], 40]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+7] += m[7] * mu\n\t"
-        "ldr	x9, [%[a], 56]\n\t"
-        "mul		x6, x19, x8\n\t"
-        "umulh	x7, x19, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 56]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 56]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "str	x11, [%[a], 48]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+8] += m[8] * mu\n\t"
-        "ldr	x9, [%[a], 64]\n\t"
-        "mul		x6, x20, x8\n\t"
-        "umulh	x7, x20, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 64]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 64]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x23, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x23, x9\n\t"
+        "str	x10, [%[a], 56]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+9] += m[9] * mu\n\t"
-        "ldr	x9, [%[a], 72]\n\t"
-        "mul		x6, x21, x8\n\t"
-        "umulh	x7, x21, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 72]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 72]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x24, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x24, x9\n\t"
+        "str	x11, [%[a], 64]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+10] += m[10] * mu\n\t"
-        "ldr	x9, [%[a], 80]\n\t"
-        "mul		x6, x22, x8\n\t"
-        "umulh	x7, x22, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 80]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 80]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x25, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x25, x9\n\t"
+        "str	x10, [%[a], 72]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+11] += m[11] * mu\n\t"
-        "ldr	x9, [%[a], 88]\n\t"
-        "mul		x6, x23, x8\n\t"
-        "umulh	x7, x23, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 88]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 88]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x26, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x26, x9\n\t"
+        "str	x11, [%[a], 80]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+12] += m[12] * mu\n\t"
-        "ldr	x9, [%[a], 96]\n\t"
-        "mul		x6, x24, x8\n\t"
-        "umulh	x7, x24, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 96]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 96]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x27, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x27, x9\n\t"
+        "str	x10, [%[a], 88]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+13] += m[13] * mu\n\t"
-        "ldr	x9, [%[a], 104]\n\t"
-        "mul		x6, x25, x8\n\t"
-        "umulh	x7, x25, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 104]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 104]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x28, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x28, x9\n\t"
+        "str	x11, [%[a], 96]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+14] += m[14] * mu\n\t"
-        "ldr	x9, [%[a], 112]\n\t"
-        "mul		x6, x26, x8\n\t"
-        "umulh	x7, x26, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 112]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 112]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 112]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 104]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+15] += m[15] * mu\n\t"
-        "ldr	x9, [%[a], 120]\n\t"
-        "mul	x6, x27, x8\n\t"
-        "umulh	x7, x27, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x7, x7, %[ca]\n\t"
+        "ldr	x10, [%[a], 120]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 120]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x8, x8, %[ca]\n\t"
+        "str	x11, [%[a], 112]\n\t"
         "cset  %[ca], cs\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 120]\n\t"
-        "ldr	x9, [%[a], 128]\n\t"
-        "adcs	x9, x9, x7\n\t"
-        "str	x9, [%[a], 128]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "ldr	x11, [%[a], 128]\n\t"
+        "str	x10, [%[a], 120]\n\t"
+        "adcs	x11, x11, x8\n\t"
+        "str	x11, [%[a], 128]\n\t"
         "adc	%[ca], %[ca], xzr\n\t"
-        "# i += 1\n\t"
+        "subs	x4, x4, 1\n\t"
         "add	%[a], %[a], 8\n\t"
-        "add	x3, x3, 8\n\t"
-        "cmp	x3, 128\n\t"
-        "blt	1b\n\t"
-        "str	x10, [%[a], 0]\n\t"
-        "str	x11, [%[a], 8]\n\t"
+        "bne	1b\n\t"
+        "stp	x12, x13, [%[a], 0]\n\t"
         : [ca] "+r" (ca), [a] "+r" (a)
         : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27"
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
     );
 
     sp_2048_cond_sub_16(a - 16, a, m, (sp_digit)0 - ca);
@@ -2201,8 +2673,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_16(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_16(r, a, b);
     sp_2048_mont_reduce_16(r, m, mp);
@@ -2215,7 +2687,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_16(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_16(r, a);
@@ -2229,7 +2701,7 @@
  * b  A single precision digit.
  */
 static void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
+        sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
@@ -2258,153 +2730,144 @@
         "str	x3, [%[r], 128]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 #else
     __asm__ __volatile__ (
         "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
+        "ldp	x8, x9, [%[a]]\n\t"
         "mul	x3, %[b], x8\n\t"
         "umulh	x4, %[b], x8\n\t"
         "mov	x5, 0\n\t"
+        "# A[1] * B\n\t"
         "str	x3, [%[r]]\n\t"
-        "# A[1] * B\n\t"
-        "ldr		x8, [%[a], 8]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 8]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[2] * B\n\t"
-        "ldr		x8, [%[a], 16]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 16]\n\t"
+        "ldp	x8, x9, [%[a], 16]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[3] * B\n\t"
-        "ldr		x8, [%[a], 24]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 24]\n\t"
+        "str	x5, [%[r], 16]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[4] * B\n\t"
-        "ldr		x8, [%[a], 32]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 32]\n\t"
+        "ldp	x8, x9, [%[a], 32]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[5] * B\n\t"
-        "ldr		x8, [%[a], 40]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 40]\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[6] * B\n\t"
-        "ldr		x8, [%[a], 48]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 48]\n\t"
+        "ldp	x8, x9, [%[a], 48]\n\t"
+        "str	x5, [%[r], 40]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[7] * B\n\t"
-        "ldr		x8, [%[a], 56]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 56]\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[8] * B\n\t"
-        "ldr		x8, [%[a], 64]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 64]\n\t"
+        "ldp	x8, x9, [%[a], 64]\n\t"
+        "str	x4, [%[r], 56]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[9] * B\n\t"
-        "ldr		x8, [%[a], 72]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 72]\n\t"
+        "str	x5, [%[r], 64]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[10] * B\n\t"
-        "ldr		x8, [%[a], 80]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 80]\n\t"
+        "ldp	x8, x9, [%[a], 80]\n\t"
+        "str	x3, [%[r], 72]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[11] * B\n\t"
-        "ldr		x8, [%[a], 88]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 88]\n\t"
+        "str	x4, [%[r], 80]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[12] * B\n\t"
-        "ldr		x8, [%[a], 96]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 96]\n\t"
+        "ldp	x8, x9, [%[a], 96]\n\t"
+        "str	x5, [%[r], 88]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[13] * B\n\t"
-        "ldr		x8, [%[a], 104]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 104]\n\t"
+        "str	x3, [%[r], 96]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[14] * B\n\t"
-        "ldr		x8, [%[a], 112]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 112]\n\t"
+        "ldp	x8, x9, [%[a], 112]\n\t"
+        "str	x4, [%[r], 104]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[15] * B\n\t"
-        "ldr	x8, [%[a], 120]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
+        "str	x5, [%[r], 112]\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
         "adds	x3, x3, x6\n\t"
         "adc	x4, x4, x7\n\t"
-        "str	x3, [%[r], 120]\n\t"
-        "str	x4, [%[r], 128]\n\t"
+        "stp	x3, x4, [%[r], 120]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 #endif
 }
@@ -2439,8 +2902,8 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
@@ -2449,21 +2912,16 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
         "mul	x4, %[div], x3\n\t"
-        "umulh	x3, %[div], x3\n\t"
-        "subs	%[d0], %[d0], x4\n\t"
-        "sbc	%[d1], %[d1], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
 
         "udiv	x3, %[d0], %[div]\n\t"
-        "add	x6, x6, x3\n\t"
-        "mul	x3, %[div], x3\n\t"
-        "sub	%[d0], %[d0], x3\n\t"
-        "mov	%[r], x6\n\t"
+        "add	%[r], x6, x3\n\t"
 
         : [r] "=r" (r)
         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
@@ -2480,170 +2938,155 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int64_t sp_2048_cmp_16(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "mov	x6, 120\n\t"
+static int64_t sp_2048_cmp_16(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 120\n\t"
         "1:\n\t"
-        "ldr	x4, [%[a], x6]\n\t"
-        "ldr	x5, [%[b], x6]\n\t"
-        "and	x4, x4, x3\n\t"
-        "and	x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "sub	x6, x6, #8\n\t"
-        "b.cc	1b\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "ldr		x4, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 120]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 104]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 88]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 72]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 56]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 40]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 24]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 8]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#endif
-
-    return r;
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov  x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "ldp	x7, x8, [%[b], 112]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "ldp	x7, x8, [%[b], 80]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "ldp	x7, x8, [%[b], 48]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "ldp	x7, x8, [%[b], 16]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#endif
+
+    return (int64_t)a;
 }
 
 /* Divide d in a and put remainder into r (m*d + r = a)
@@ -2655,7 +3098,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_div_16(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[32], t2[17];
@@ -2679,7 +3122,7 @@
     }
 
     r1 = sp_2048_cmp_16(t1, d) >= 0;
-    sp_2048_cond_sub_16(r, t1, t2, (sp_digit)0 - r1);
+    sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -2691,7 +3134,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_mod_16(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_16(a, m, NULL, r);
 }
@@ -2706,8 +3149,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_16(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][32];
@@ -2726,27 +3169,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 32, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 32;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_16(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 16);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 16U);
+        if (reduceA != 0) {
             err = sp_2048_mod_16(t[1] + 16, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_16(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
@@ -2772,9 +3216,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 60;
-        n <<= 4;
-        c = 60;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 4;
+        if (c == 64) {
+            c = 60;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -2805,7 +3256,7 @@
             sp_2048_mont_mul_16(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
+        XMEMSET(&r[16], 0, sizeof(sp_digit) * 16U);
         sp_2048_mont_reduce_16(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
@@ -2813,8 +3264,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -2829,8 +3281,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_16(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][32];
@@ -2849,27 +3301,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 32, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 32;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_16(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 16);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 16U);
+        if (reduceA != 0) {
             err = sp_2048_mod_16(t[1] + 16, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_16(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
@@ -2911,9 +3364,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 5;
+        if (c == 64) {
+            c = 59;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -2944,14 +3404,8 @@
 
             sp_2048_mont_mul_16(r, r, t[y], m, mp);
         }
-        y = e[0] & 0xf;
-        sp_2048_mont_sqr_16(r, r, m, mp);
-        sp_2048_mont_sqr_16(r, r, m, mp);
-        sp_2048_mont_sqr_16(r, r, m, mp);
-        sp_2048_mont_sqr_16(r, r, m, mp);
-        sp_2048_mont_mul_16(r, r, t[y], m, mp);
-
-        XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
+
+        XMEMSET(&r[16], 0, sizeof(sp_digit) * 16U);
         sp_2048_mont_reduce_16(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
@@ -2959,23 +3413,25 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* WOLFSSL_SP_SMALL */
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_32(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 32);
 
@@ -2983,6 +3439,7 @@
     sp_2048_sub_in_place_32(r, m);
 }
 
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -2991,12 +3448,12 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_2048_cond_sub_32(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
+#ifdef WOLFSSL_SP_SMALL
     sp_digit c = 0;
 
-#ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	x8, #0\n\t"
         "1:\n\t"
@@ -3012,179 +3469,133 @@
         "b.lt	1b\n\t"
         : [c] "+r" (c)
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#else
-    __asm__ __volatile__ (
-
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x6, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "ldr		x7, [%[b], 8]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "subs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 0]\n\t"
-        "str		x6, [%[r], 8]\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x6, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "ldr		x7, [%[b], 24]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 16]\n\t"
-        "str		x6, [%[r], 24]\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x6, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "ldr		x7, [%[b], 40]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 32]\n\t"
-        "str		x6, [%[r], 40]\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x6, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "ldr		x7, [%[b], 56]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 48]\n\t"
-        "str		x6, [%[r], 56]\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x6, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "ldr		x7, [%[b], 72]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 64]\n\t"
-        "str		x6, [%[r], 72]\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x6, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "ldr		x7, [%[b], 88]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 80]\n\t"
-        "str		x6, [%[r], 88]\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x6, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "ldr		x7, [%[b], 104]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 96]\n\t"
-        "str		x6, [%[r], 104]\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x6, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "ldr		x7, [%[b], 120]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 112]\n\t"
-        "str		x6, [%[r], 120]\n\t"
-        "ldr		x4, [%[a], 128]\n\t"
-        "ldr		x6, [%[a], 136]\n\t"
-        "ldr		x5, [%[b], 128]\n\t"
-        "ldr		x7, [%[b], 136]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 128]\n\t"
-        "str		x6, [%[r], 136]\n\t"
-        "ldr		x4, [%[a], 144]\n\t"
-        "ldr		x6, [%[a], 152]\n\t"
-        "ldr		x5, [%[b], 144]\n\t"
-        "ldr		x7, [%[b], 152]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 144]\n\t"
-        "str		x6, [%[r], 152]\n\t"
-        "ldr		x4, [%[a], 160]\n\t"
-        "ldr		x6, [%[a], 168]\n\t"
-        "ldr		x5, [%[b], 160]\n\t"
-        "ldr		x7, [%[b], 168]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 160]\n\t"
-        "str		x6, [%[r], 168]\n\t"
-        "ldr		x4, [%[a], 176]\n\t"
-        "ldr		x6, [%[a], 184]\n\t"
-        "ldr		x5, [%[b], 176]\n\t"
-        "ldr		x7, [%[b], 184]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 176]\n\t"
-        "str		x6, [%[r], 184]\n\t"
-        "ldr		x4, [%[a], 192]\n\t"
-        "ldr		x6, [%[a], 200]\n\t"
-        "ldr		x5, [%[b], 192]\n\t"
-        "ldr		x7, [%[b], 200]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 192]\n\t"
-        "str		x6, [%[r], 200]\n\t"
-        "ldr		x4, [%[a], 208]\n\t"
-        "ldr		x6, [%[a], 216]\n\t"
-        "ldr		x5, [%[b], 208]\n\t"
-        "ldr		x7, [%[b], 216]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 208]\n\t"
-        "str		x6, [%[r], 216]\n\t"
-        "ldr		x4, [%[a], 224]\n\t"
-        "ldr		x6, [%[a], 232]\n\t"
-        "ldr		x5, [%[b], 224]\n\t"
-        "ldr		x7, [%[b], 232]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 224]\n\t"
-        "str		x6, [%[r], 232]\n\t"
-        "ldr		x4, [%[a], 240]\n\t"
-        "ldr		x6, [%[a], 248]\n\t"
-        "ldr		x5, [%[b], 240]\n\t"
-        "ldr		x7, [%[b], 248]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 240]\n\t"
-        "str		x6, [%[r], 248]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#endif /* WOLFSSL_SP_SMALL */
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
 
     return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "subs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "ldp	x5, x7, [%[b], 128]\n\t"
+        "ldp	x11, x12, [%[b], 144]\n\t"
+        "ldp	x4, x6, [%[a], 128]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 144]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 128]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 144]\n\t"
+        "ldp	x5, x7, [%[b], 160]\n\t"
+        "ldp	x11, x12, [%[b], 176]\n\t"
+        "ldp	x4, x6, [%[a], 160]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 176]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 160]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 176]\n\t"
+        "ldp	x5, x7, [%[b], 192]\n\t"
+        "ldp	x11, x12, [%[b], 208]\n\t"
+        "ldp	x4, x6, [%[a], 192]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 208]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 192]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 208]\n\t"
+        "ldp	x5, x7, [%[b], 224]\n\t"
+        "ldp	x11, x12, [%[b], 240]\n\t"
+        "ldp	x4, x6, [%[a], 224]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 240]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 224]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 240]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
 }
 
 /* Reduce the number back to 2048 bits using Montgomery reduction.
@@ -3193,339 +3604,335 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
 
     __asm__ __volatile__ (
-        "ldp       x12, x13, [%[m], 0]\n\t"
-        "ldp       x14, x15, [%[m], 16]\n\t"
-        "ldp       x16, x17, [%[m], 32]\n\t"
-        "ldp       x18, x19, [%[m], 48]\n\t"
-        "ldp       x20, x21, [%[m], 64]\n\t"
-        "ldp       x22, x23, [%[m], 80]\n\t"
-        "ldp       x24, x25, [%[m], 96]\n\t"
-        "ldp       x26, x27, [%[m], 112]\n\t"
-        "# i = 0\n\t"
-        "mov	x3, 0\n\t"
-        "ldp	x10, x11, [%[a], 0]\n\t"
+        "ldp	x14, x15, [%[m], 0]\n\t"
+        "ldp	x16, x17, [%[m], 16]\n\t"
+        "ldp	x19, x20, [%[m], 32]\n\t"
+        "ldp	x21, x22, [%[m], 48]\n\t"
+        "ldp	x23, x24, [%[m], 64]\n\t"
+        "ldp	x25, x26, [%[m], 80]\n\t"
+        "ldp	x27, x28, [%[m], 96]\n\t"
+        "# i = 32\n\t"
+        "mov	x4, 32\n\t"
+        "ldp	x12, x13, [%[a], 0]\n\t"
         "\n1:\n\t"
         "# mu = a[i] * mp\n\t"
-        "mul	x8, %[mp], x10\n\t"
+        "mul	x9, %[mp], x12\n\t"
         "# a[i+0] += m[0] * mu\n\t"
-        "ldr	x9, [%[a], 0]\n\t"
-        "mul		x6, x12, x8\n\t"
-        "umulh	x7, x12, x8\n\t"
-        "adds	x10, x10, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x12, x12, x7\n\t"
         "# a[i+1] += m[1] * mu\n\t"
-        "ldr	x9, [%[a], 8]\n\t"
-        "mul		x6, x13, x8\n\t"
-        "umulh	x7, x13, x8\n\t"
-        "adds	x10, x11, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x10, x10, x5\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x12, x13, x7\n\t"
         "# a[i+2] += m[2] * mu\n\t"
-        "ldr	x11, [%[a], 16]\n\t"
-        "mul		x6, x14, x8\n\t"
-        "umulh	x7, x14, x8\n\t"
-        "adds	x11, x11, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x11, x11, x4\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x13, [%[a], 16]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adds	x12, x12, x6\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x13, x13, x7\n\t"
         "# a[i+3] += m[3] * mu\n\t"
-        "ldr	x9, [%[a], 24]\n\t"
-        "mul		x6, x15, x8\n\t"
-        "umulh	x7, x15, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 24]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 24]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adds	x13, x13, x5\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+4] += m[4] * mu\n\t"
-        "ldr	x9, [%[a], 32]\n\t"
-        "mul		x6, x16, x8\n\t"
-        "umulh	x7, x16, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 32]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 32]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "str	x10, [%[a], 24]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+5] += m[5] * mu\n\t"
-        "ldr	x9, [%[a], 40]\n\t"
-        "mul		x6, x17, x8\n\t"
-        "umulh	x7, x17, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 40]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 40]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "str	x11, [%[a], 32]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+6] += m[6] * mu\n\t"
-        "ldr	x9, [%[a], 48]\n\t"
-        "mul		x6, x18, x8\n\t"
-        "umulh	x7, x18, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 48]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 48]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "str	x10, [%[a], 40]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+7] += m[7] * mu\n\t"
-        "ldr	x9, [%[a], 56]\n\t"
-        "mul		x6, x19, x8\n\t"
-        "umulh	x7, x19, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 56]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 56]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "str	x11, [%[a], 48]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+8] += m[8] * mu\n\t"
-        "ldr	x9, [%[a], 64]\n\t"
-        "mul		x6, x20, x8\n\t"
-        "umulh	x7, x20, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 64]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 64]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x23, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x23, x9\n\t"
+        "str	x10, [%[a], 56]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+9] += m[9] * mu\n\t"
-        "ldr	x9, [%[a], 72]\n\t"
-        "mul		x6, x21, x8\n\t"
-        "umulh	x7, x21, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 72]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 72]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x24, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x24, x9\n\t"
+        "str	x11, [%[a], 64]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+10] += m[10] * mu\n\t"
-        "ldr	x9, [%[a], 80]\n\t"
-        "mul		x6, x22, x8\n\t"
-        "umulh	x7, x22, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 80]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 80]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x25, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x25, x9\n\t"
+        "str	x10, [%[a], 72]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+11] += m[11] * mu\n\t"
-        "ldr	x9, [%[a], 88]\n\t"
-        "mul		x6, x23, x8\n\t"
-        "umulh	x7, x23, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 88]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 88]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x26, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x26, x9\n\t"
+        "str	x11, [%[a], 80]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+12] += m[12] * mu\n\t"
-        "ldr	x9, [%[a], 96]\n\t"
-        "mul		x6, x24, x8\n\t"
-        "umulh	x7, x24, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 96]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 96]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x27, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x27, x9\n\t"
+        "str	x10, [%[a], 88]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+13] += m[13] * mu\n\t"
-        "ldr	x9, [%[a], 104]\n\t"
-        "mul		x6, x25, x8\n\t"
-        "umulh	x7, x25, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 104]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 104]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x28, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x28, x9\n\t"
+        "str	x11, [%[a], 96]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+14] += m[14] * mu\n\t"
-        "ldr	x9, [%[a], 112]\n\t"
-        "mul		x6, x26, x8\n\t"
-        "umulh	x7, x26, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 112]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 112]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 112]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 104]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+15] += m[15] * mu\n\t"
-        "ldr	x9, [%[a], 120]\n\t"
-        "mul		x6, x27, x8\n\t"
-        "umulh	x7, x27, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 120]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 120]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 120]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 112]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+16] += m[16] * mu\n\t"
-        "ldr		x7, [%[m], 128]\n\t"
-        "ldr	x9, [%[a], 128]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 128]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 128]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 128]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 120]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+17] += m[17] * mu\n\t"
-        "ldr		x7, [%[m], 136]\n\t"
-        "ldr	x9, [%[a], 136]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 136]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 136]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 136]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 128]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+18] += m[18] * mu\n\t"
-        "ldr		x7, [%[m], 144]\n\t"
-        "ldr	x9, [%[a], 144]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 144]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 144]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 144]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 136]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+19] += m[19] * mu\n\t"
-        "ldr		x7, [%[m], 152]\n\t"
-        "ldr	x9, [%[a], 152]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 152]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 152]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 152]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 144]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+20] += m[20] * mu\n\t"
-        "ldr		x7, [%[m], 160]\n\t"
-        "ldr	x9, [%[a], 160]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 160]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 160]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 160]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 152]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+21] += m[21] * mu\n\t"
-        "ldr		x7, [%[m], 168]\n\t"
-        "ldr	x9, [%[a], 168]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 168]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 168]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 168]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 160]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+22] += m[22] * mu\n\t"
-        "ldr		x7, [%[m], 176]\n\t"
-        "ldr	x9, [%[a], 176]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 176]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 176]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 176]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 168]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+23] += m[23] * mu\n\t"
-        "ldr		x7, [%[m], 184]\n\t"
-        "ldr	x9, [%[a], 184]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 184]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 184]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 184]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 176]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+24] += m[24] * mu\n\t"
-        "ldr		x7, [%[m], 192]\n\t"
-        "ldr	x9, [%[a], 192]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 192]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 192]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 192]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 184]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+25] += m[25] * mu\n\t"
-        "ldr		x7, [%[m], 200]\n\t"
-        "ldr	x9, [%[a], 200]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 200]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 200]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 200]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 192]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+26] += m[26] * mu\n\t"
-        "ldr		x7, [%[m], 208]\n\t"
-        "ldr	x9, [%[a], 208]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 208]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 208]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 208]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 200]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+27] += m[27] * mu\n\t"
-        "ldr		x7, [%[m], 216]\n\t"
-        "ldr	x9, [%[a], 216]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 216]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 216]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 216]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 208]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+28] += m[28] * mu\n\t"
-        "ldr		x7, [%[m], 224]\n\t"
-        "ldr	x9, [%[a], 224]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 224]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 224]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 224]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 216]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+29] += m[29] * mu\n\t"
-        "ldr		x7, [%[m], 232]\n\t"
-        "ldr	x9, [%[a], 232]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 232]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 232]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 232]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 224]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+30] += m[30] * mu\n\t"
-        "ldr		x7, [%[m], 240]\n\t"
-        "ldr	x9, [%[a], 240]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 240]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 240]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 240]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 232]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+31] += m[31] * mu\n\t"
-        "ldr	x7, [%[m], 248]\n\t"
-        "ldr	x9, [%[a], 248]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x7, x7, %[ca]\n\t"
+        "ldr	x10, [%[a], 248]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 248]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x8, x8, %[ca]\n\t"
+        "str	x11, [%[a], 240]\n\t"
         "cset  %[ca], cs\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 248]\n\t"
-        "ldr	x9, [%[a], 256]\n\t"
-        "adcs	x9, x9, x7\n\t"
-        "str	x9, [%[a], 256]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "ldr	x11, [%[a], 256]\n\t"
+        "str	x10, [%[a], 248]\n\t"
+        "adcs	x11, x11, x8\n\t"
+        "str	x11, [%[a], 256]\n\t"
         "adc	%[ca], %[ca], xzr\n\t"
-        "# i += 1\n\t"
+        "subs	x4, x4, 1\n\t"
         "add	%[a], %[a], 8\n\t"
-        "add	x3, x3, 8\n\t"
-        "cmp	x3, 256\n\t"
-        "blt	1b\n\t"
-        "str	x10, [%[a], 0]\n\t"
-        "str	x11, [%[a], 8]\n\t"
+        "bne	1b\n\t"
+        "stp	x12, x13, [%[a], 0]\n\t"
         : [ca] "+r" (ca), [a] "+r" (a)
         : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27"
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
     );
 
     sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
@@ -3540,8 +3947,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_32(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_32(r, a, b);
     sp_2048_mont_reduce_32(r, m, mp);
@@ -3554,344 +3961,13 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_32(r, a);
     sp_2048_mont_reduce_32(r, m, mp);
 }
 
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
-        "mul	x5, %[b], x8\n\t"
-        "umulh	x3, %[b], x8\n\t"
-        "mov	x4, 0\n\t"
-        "str	x5, [%[r]]\n\t"
-        "mov	x5, 0\n\t"
-        "mov	x9, #8\n\t"
-        "1:\n\t"
-        "ldr	x8, [%[a], x9]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
-        "str	x3, [%[r], x9]\n\t"
-        "mov	x3, x4\n\t"
-        "mov	x4, x5\n\t"
-        "mov	x5, #0\n\t"
-        "add	x9, x9, #8\n\t"
-        "cmp	x9, 256\n\t"
-        "b.lt	1b\n\t"
-        "str	x3, [%[r], 256]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
-    );
-#else
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
-        "mul	x3, %[b], x8\n\t"
-        "umulh	x4, %[b], x8\n\t"
-        "mov	x5, 0\n\t"
-        "str	x3, [%[r]]\n\t"
-        "# A[1] * B\n\t"
-        "ldr		x8, [%[a], 8]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 8]\n\t"
-        "# A[2] * B\n\t"
-        "ldr		x8, [%[a], 16]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 16]\n\t"
-        "# A[3] * B\n\t"
-        "ldr		x8, [%[a], 24]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 24]\n\t"
-        "# A[4] * B\n\t"
-        "ldr		x8, [%[a], 32]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 32]\n\t"
-        "# A[5] * B\n\t"
-        "ldr		x8, [%[a], 40]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 40]\n\t"
-        "# A[6] * B\n\t"
-        "ldr		x8, [%[a], 48]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 48]\n\t"
-        "# A[7] * B\n\t"
-        "ldr		x8, [%[a], 56]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 56]\n\t"
-        "# A[8] * B\n\t"
-        "ldr		x8, [%[a], 64]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 64]\n\t"
-        "# A[9] * B\n\t"
-        "ldr		x8, [%[a], 72]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 72]\n\t"
-        "# A[10] * B\n\t"
-        "ldr		x8, [%[a], 80]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 80]\n\t"
-        "# A[11] * B\n\t"
-        "ldr		x8, [%[a], 88]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 88]\n\t"
-        "# A[12] * B\n\t"
-        "ldr		x8, [%[a], 96]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 96]\n\t"
-        "# A[13] * B\n\t"
-        "ldr		x8, [%[a], 104]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 104]\n\t"
-        "# A[14] * B\n\t"
-        "ldr		x8, [%[a], 112]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 112]\n\t"
-        "# A[15] * B\n\t"
-        "ldr		x8, [%[a], 120]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 120]\n\t"
-        "# A[16] * B\n\t"
-        "ldr		x8, [%[a], 128]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 128]\n\t"
-        "# A[17] * B\n\t"
-        "ldr		x8, [%[a], 136]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 136]\n\t"
-        "# A[18] * B\n\t"
-        "ldr		x8, [%[a], 144]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 144]\n\t"
-        "# A[19] * B\n\t"
-        "ldr		x8, [%[a], 152]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 152]\n\t"
-        "# A[20] * B\n\t"
-        "ldr		x8, [%[a], 160]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 160]\n\t"
-        "# A[21] * B\n\t"
-        "ldr		x8, [%[a], 168]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 168]\n\t"
-        "# A[22] * B\n\t"
-        "ldr		x8, [%[a], 176]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 176]\n\t"
-        "# A[23] * B\n\t"
-        "ldr		x8, [%[a], 184]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 184]\n\t"
-        "# A[24] * B\n\t"
-        "ldr		x8, [%[a], 192]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 192]\n\t"
-        "# A[25] * B\n\t"
-        "ldr		x8, [%[a], 200]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 200]\n\t"
-        "# A[26] * B\n\t"
-        "ldr		x8, [%[a], 208]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 208]\n\t"
-        "# A[27] * B\n\t"
-        "ldr		x8, [%[a], 216]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 216]\n\t"
-        "# A[28] * B\n\t"
-        "ldr		x8, [%[a], 224]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 224]\n\t"
-        "# A[29] * B\n\t"
-        "ldr		x8, [%[a], 232]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 232]\n\t"
-        "# A[30] * B\n\t"
-        "ldr		x8, [%[a], 240]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 240]\n\t"
-        "# A[31] * B\n\t"
-        "ldr	x8, [%[a], 248]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adc	x5, x5, x7\n\t"
-        "str	x4, [%[r], 248]\n\t"
-        "str	x5, [%[r], 256]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
-    );
-#endif
-}
-
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  *
  * d1   The high order half of the number to divide.
@@ -3922,8 +3998,8 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
@@ -3932,21 +4008,16 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
         "mul	x4, %[div], x3\n\t"
-        "umulh	x3, %[div], x3\n\t"
-        "subs	%[d0], %[d0], x4\n\t"
-        "sbc	%[d1], %[d1], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
 
         "udiv	x3, %[d0], %[div]\n\t"
-        "add	x6, x6, x3\n\t"
-        "mul	x3, %[div], x3\n\t"
-        "sub	%[d0], %[d0], x3\n\t"
-        "mov	%[r], x6\n\t"
+        "add	%[r], x6, x3\n\t"
 
         : [r] "=r" (r)
         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
@@ -3962,13 +4033,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<32; i++)
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<32; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -3992,298 +4064,267 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int64_t sp_2048_cmp_32(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "mov	x6, 248\n\t"
+static int64_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 248\n\t"
         "1:\n\t"
-        "ldr	x4, [%[a], x6]\n\t"
-        "ldr	x5, [%[b], x6]\n\t"
-        "and	x4, x4, x3\n\t"
-        "and	x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "sub	x6, x6, #8\n\t"
-        "b.cc	1b\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "ldr		x4, [%[a], 248]\n\t"
-        "ldr		x5, [%[b], 248]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 240]\n\t"
-        "ldr		x5, [%[b], 240]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 232]\n\t"
-        "ldr		x5, [%[b], 232]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 224]\n\t"
-        "ldr		x5, [%[b], 224]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 216]\n\t"
-        "ldr		x5, [%[b], 216]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 208]\n\t"
-        "ldr		x5, [%[b], 208]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 200]\n\t"
-        "ldr		x5, [%[b], 200]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 192]\n\t"
-        "ldr		x5, [%[b], 192]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 184]\n\t"
-        "ldr		x5, [%[b], 184]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 176]\n\t"
-        "ldr		x5, [%[b], 176]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 168]\n\t"
-        "ldr		x5, [%[b], 168]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 160]\n\t"
-        "ldr		x5, [%[b], 160]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 152]\n\t"
-        "ldr		x5, [%[b], 152]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 144]\n\t"
-        "ldr		x5, [%[b], 144]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 136]\n\t"
-        "ldr		x5, [%[b], 136]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 128]\n\t"
-        "ldr		x5, [%[b], 128]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 120]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 104]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 88]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 72]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 56]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 40]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 24]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 8]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#endif
-
-    return r;
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov  x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "ldp	x7, x8, [%[b], 240]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "ldp	x7, x8, [%[b], 208]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "ldp	x7, x8, [%[b], 176]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "ldp	x7, x8, [%[b], 144]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "ldp	x7, x8, [%[b], 112]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "ldp	x7, x8, [%[b], 80]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "ldp	x7, x8, [%[b], 48]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "ldp	x7, x8, [%[b], 16]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#endif
+
+    return (int64_t)a;
 }
 
 /* Divide d in a and put remainder into r (m*d + r = a)
@@ -4295,7 +4336,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[64], t2[33];
@@ -4319,7 +4360,7 @@
     }
 
     r1 = sp_2048_cmp_32(t1, d) >= 0;
-    sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
+    sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -4331,11 +4372,149 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_mod_32(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_32(a, m, NULL, r);
 }
 
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 256\n\t"
+        "\n1:\n\t"
+        "subs	%[c], xzr, %[c]\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "ldp	x7, x8, [%[b]], #16\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x9, x10, [%[b]], #16\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "csetm	%[c], cc\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "subs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 48]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 80]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 112]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 144]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 176]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 208]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 240]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
@@ -4345,7 +4524,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_div_32_cond(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[64], t2[33];
@@ -4369,8 +4548,16 @@
         }
     }
 
-    r1 = sp_2048_cmp_32(t1, d) >= 0;
-    sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
+    for (i = 31; i > 0; i--) {
+        if (t1[i] != d[i])
+            break;
+    }
+    if (t1[i] >= d[i]) {
+        sp_2048_sub_32(r, t1, d);
+    }
+    else {
+        XMEMCPY(r, t1, sizeof(*t1) * 32);
+    }
 
     return MP_OKAY;
 }
@@ -4382,12 +4569,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_32_cond(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 #ifdef WOLFSSL_SP_SMALL
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
@@ -4398,8 +4586,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][64];
@@ -4418,27 +4606,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 64;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_32(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
             err = sp_2048_mod_32(t[1] + 32, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_32(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
@@ -4464,9 +4653,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 60;
-        n <<= 4;
-        c = 60;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 4;
+        if (c == 64) {
+            c = 60;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -4497,7 +4693,7 @@
             sp_2048_mont_mul_32(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
         sp_2048_mont_reduce_32(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
@@ -4505,8 +4701,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -4521,8 +4718,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][64];
@@ -4541,27 +4738,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 64;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_32(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
             err = sp_2048_mod_32(t[1] + 32, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_2048_mod_32(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
@@ -4603,9 +4801,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 5;
+        if (c == 64) {
+            c = 59;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -4636,13 +4841,8 @@
 
             sp_2048_mont_mul_32(r, r, t[y], m, mp);
         }
-        y = e[0] & 0x7;
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_mul_32(r, r, t[y], m, mp);
-
-        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
         sp_2048_mont_reduce_32(r, m, mp);
 
         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
@@ -4650,14 +4850,15 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* WOLFSSL_SP_SMALL */
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
 
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
@@ -4675,15 +4876,15 @@
 int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[64], md[32], rd[64];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[64], m[32], r[64];
 #else
     sp_digit* d = NULL;
-#endif
     sp_digit* a;
-    sp_digit *ah;
     sp_digit* m;
     sp_digit* r;
+#endif
+    sp_digit *ah;
     sp_digit e[1];
     int err = MP_OKAY;
 
@@ -4693,10 +4894,10 @@
                                                      mp_count_bits(mm) != 2048))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -4705,26 +4906,24 @@
         a = d;
         r = a + 32 * 2;
         m = r + 32 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
         ah = a + 32;
-    }
-#else
-    a = ad;
-    m = md;
-    r = rd;
-    ah = a + 32;
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_from_bin(ah, 32, in, inLen);
 #if DIGIT_BIT >= 64
         e[0] = em->dp[0];
 #else
         e[0] = em->dp[0];
-        if (em->used > 1)
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_from_mp(m, 32, mm);
@@ -4750,25 +4949,30 @@
             err = sp_2048_mod_32_cond(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=63; i>=0; i--)
-                    if (e[0] >> i)
+                for (i = 63; i >= 0; i--) {
+                    if (e[0] >> i) {
                         break;
+                    }
+                }
 
                 XMEMCPY(r, a, sizeof(sp_digit) * 32);
                 for (i--; i>=0; i--) {
                     sp_2048_mont_sqr_32(r, r, m, mp);
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_2048_mont_mul_32(r, r, a, m, mp);
+                    }
                 }
                 XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
                 sp_2048_mont_reduce_32(r, m, mp);
 
                 for (i = 31; i > 0; i--) {
-                    if (r[i] != m[i])
+                    if (r[i] != m[i]) {
                         break;
+                    }
                 }
-                if (r[i] >= m[i])
+                if (r[i] >= m[i]) {
                     sp_2048_sub_in_place_32(r, m);
+                }
             }
         }
     }
@@ -4778,12 +4982,173 @@
         *outLen = 256;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 256U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+           err = MP_READ_E;
+        }
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 32;
+        m = a + 64;
+        r = a;
+
+        sp_2048_from_bin(a, 32, in, inLen);
+        sp_2048_from_mp(d, 32, dm);
+        sp_2048_from_mp(m, 32, mm);
+        err = sp_2048_mod_exp_32(r, a, d, 2048, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 32);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	x8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	x4, [%[a], x8]\n\t"
+        "ldr	x5, [%[b], x8]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "cset	%[c], cs\n\t"
+        "str	x4, [%[r], x8]\n\t"
+        "add	x8, x8, #8\n\t"
+        "cmp	x8, 128\n\t"
+        "b.lt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adds	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
 }
 
 /* RSA private key operation.
@@ -4807,23 +5172,22 @@
     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[32 * 2];
-    sp_digit pd[16], qd[16], dpd[16];
-    sp_digit tmpad[32], tmpbd[32];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[32 * 2];
+    sp_digit p[16], q[16], dp[16];
+    sp_digit tmpa[32], tmpb[32];
 #else
     sp_digit* t = NULL;
-#endif
     sp_digit* a;
     sp_digit* p;
     sp_digit* q;
     sp_digit* dp;
-    sp_digit* dq;
-    sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
+#endif
     sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
     sp_digit c;
     int err = MP_OKAY;
 
@@ -4835,10 +5199,10 @@
     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (t == NULL)
             err = MEMORY_E;
     }
@@ -4850,20 +5214,16 @@
         tmpa = qi + 16;
         tmpb = tmpa + 32;
 
-        tmp = t;
-        r = tmp + 32;
-    }
-#else
-    r = a = ad;
-    p = pd;
-    q = qd;
-    qi = dq = dp = dpd;
-    tmpa = tmpad;
-    tmpb = tmpbd;
-    tmp = a + 32;
-#endif
-
-    if (err == MP_OKAY) {
+        r = t + 32;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
         sp_2048_from_bin(a, 32, in, inLen);
         sp_2048_from_mp(p, 16, pm);
         sp_2048_from_mp(q, 16, qm);
@@ -4878,8 +5238,8 @@
 
     if (err == MP_OKAY) {
         c = sp_2048_sub_in_place_16(tmpa, tmpb);
-        sp_2048_mask_16(tmp, p, c);
-        sp_2048_add_16(tmpa, tmpa, tmp);
+        c += sp_2048_cond_add_16(tmpa, tmpa, p, c);
+        sp_2048_cond_add_16(tmpa, tmpa, p, c);
 
         sp_2048_from_mp(qi, 16, qim);
         sp_2048_mul_16(tmpa, tmpa, qi);
@@ -4895,34 +5255,37 @@
         *outLen = 256;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 16 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-#else
-    XMEMSET(tmpad, 0, sizeof(tmpad));
-    XMEMSET(tmpbd, 0, sizeof(tmpbd));
-    XMEMSET(pd, 0, sizeof(pd));
-    XMEMSET(qd, 0, sizeof(qd));
-    XMEMSET(dpd, 0, sizeof(dpd));
-#endif
-
-    return err;
-}
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_2048_to_mp(sp_digit* a, mp_int* r)
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 64
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 32);
         r->used = 32;
@@ -4932,14 +5295,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 32; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 64) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 64 - s;
         }
@@ -4952,15 +5320,16 @@
         for (i = 0; i < 32; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 64 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 64 - s;
             }
-            else
+            else {
                 s += 64;
+            }
         }
         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -4976,7 +5345,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -4986,12 +5355,23 @@
     sp_digit* r = b;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_2048_from_mp(b, 32, base);
         sp_2048_from_mp(e, 32, exp);
         sp_2048_from_mp(m, 32, mod);
@@ -5008,6 +5388,323 @@
     return err;
 }
 
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_32(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	x6, 63\n\t"
+        "sub	x6, x6, %[n]\n\t"
+        "ldr	x3, [%[a], 248]\n\t"
+        "lsr	x4, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x4, x4, x6\n\t"
+        "ldr	x2, [%[a], 240]\n\t"
+        "str	x4, [%[r], 256]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 232]\n\t"
+        "str	x3, [%[r], 248]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 224]\n\t"
+        "str	x2, [%[r], 240]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 216]\n\t"
+        "str	x4, [%[r], 232]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 208]\n\t"
+        "str	x3, [%[r], 224]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 200]\n\t"
+        "str	x2, [%[r], 216]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 192]\n\t"
+        "str	x4, [%[r], 208]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 184]\n\t"
+        "str	x3, [%[r], 200]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 176]\n\t"
+        "str	x2, [%[r], 192]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 168]\n\t"
+        "str	x4, [%[r], 184]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 160]\n\t"
+        "str	x3, [%[r], 176]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 152]\n\t"
+        "str	x2, [%[r], 168]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 144]\n\t"
+        "str	x4, [%[r], 160]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 136]\n\t"
+        "str	x3, [%[r], 152]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 128]\n\t"
+        "str	x2, [%[r], 144]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 120]\n\t"
+        "str	x4, [%[r], 136]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 112]\n\t"
+        "str	x3, [%[r], 128]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 104]\n\t"
+        "str	x2, [%[r], 120]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 96]\n\t"
+        "str	x4, [%[r], 112]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 88]\n\t"
+        "str	x3, [%[r], 104]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 80]\n\t"
+        "str	x2, [%[r], 96]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 72]\n\t"
+        "str	x4, [%[r], 88]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 64]\n\t"
+        "str	x3, [%[r], 80]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 56]\n\t"
+        "str	x2, [%[r], 72]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 48]\n\t"
+        "str	x4, [%[r], 64]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 40]\n\t"
+        "str	x3, [%[r], 56]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 32]\n\t"
+        "str	x2, [%[r], 48]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 24]\n\t"
+        "str	x4, [%[r], 40]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 16]\n\t"
+        "str	x3, [%[r], 32]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 8]\n\t"
+        "str	x2, [%[r], 24]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 0]\n\t"
+        "str	x4, [%[r], 16]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "str	x2, [%[r]]\n\t"
+        "str	x3, [%[r], 8]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "x2", "x3", "x4", "x5", "x6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[64];
+    sp_digit td[33];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 64;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_32(norm, m);
+
+        i = (bits - 1) / 64;
+        n = e[i--];
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 6;
+        if (c == 64) {
+            c = 58;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
+        sp_2048_lshift_32(r, norm, y);
+        for (; i>=0 || c>=6; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 58;
+                n <<= 6;
+                c = 58;
+            }
+            else if (c < 6) {
+                y = n >> 58;
+                n = e[i--];
+                c = 6 - c;
+                y |= n >> (64 - c);
+                n <<= c;
+                c = 64 - c;
+            }
+            else {
+                y = (n >> 58) & 0x3f;
+                n <<= 6;
+                c -= 6;
+            }
+
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+
+            sp_2048_lshift_32(r, r, y);
+            sp_2048_mul_d_32(tmp, norm, r[32]);
+            r[32] = 0;
+            o = sp_2048_add_32(r, r, tmp);
+            sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+        sp_2048_mont_reduce_32(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+        sp_2048_cond_sub_32(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
  * base     Base.
@@ -5017,7 +5714,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 256 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
@@ -5028,17 +5725,34 @@
     sp_digit* r = b;
     word32 i;
 
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expLen > 256) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_2048_from_mp(b, 32, base);
         sp_2048_from_bin(e, 32, exp, expLen);
         sp_2048_from_mp(m, 32, mod);
 
-        err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2 && m[31] == (sp_digit)-1)
+            err = sp_2048_mod_exp_2_32(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
+
     }
 
     if (err == MP_OKAY) {
@@ -5055,91 +5769,171 @@
 
     return err;
 }
-
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_2048 */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[32], e[16], m[16];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 16, base);
+        sp_2048_from_mp(e, 16, exp);
+        sp_2048_from_mp(m, 16, mod);
+
+        err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 16, 0, sizeof(*r) * 16U);
+        err = sp_2048_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
 
 #ifndef WOLFSSL_SP_NO_3072
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 56) {
-            r[j] &= 0xffffffffffffffffl;
-            s = 64 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j;
+    byte* d;
+
+    for (i = n - 1,j = 0; i >= 7; i -= 8) {
+        r[j]  = ((sp_digit)a[i - 0] <<  0) |
+                ((sp_digit)a[i - 1] <<  8) |
+                ((sp_digit)a[i - 2] << 16) |
+                ((sp_digit)a[i - 3] << 24) |
+                ((sp_digit)a[i - 4] << 32) |
+                ((sp_digit)a[i - 5] << 40) |
+                ((sp_digit)a[i - 6] << 48) |
+                ((sp_digit)a[i - 7] << 56);
+        j++;
+    }
+
+    if (i >= 0) {
         r[j] = 0;
+
+        d = (byte*)r;
+        switch (i) {
+            case 6: d[n - 1 - 6] = a[6]; //fallthrough
+            case 5: d[n - 1 - 5] = a[5]; //fallthrough
+            case 4: d[n - 1 - 4] = a[4]; //fallthrough
+            case 3: d[n - 1 - 3] = a[3]; //fallthrough
+            case 2: d[n - 1 - 2] = a[2]; //fallthrough
+            case 1: d[n - 1 - 1] = a[1]; //fallthrough
+            case 0: d[n - 1 - 0] = a[0]; //fallthrough
+        }
+        j++;
+    }
+
+    for (; j < size; j++) {
+        r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 64
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 64
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0xffffffffffffffffl;
-        s = 64 - s;
-        if (j + 1 >= max)
+        s = 64U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 64 <= DIGIT_BIT) {
-            s += 64;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 64U) <= (word32)DIGIT_BIT) {
+            s += 64U;
             r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 64) {
             r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
+            }
             s = 64 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -5150,16 +5944,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 384
  *
  * r  A single precision integer.
@@ -5167,25 +5963,17 @@
  */
 static void sp_3072_to_bin(sp_digit* r, byte* a)
 {
-    int i, j, s = 0, b;
-
-    j = 3072 / 8 - 1;
-    a[j] = 0;
-    for (i=0; i<48 && j>=0; i++) {
-        b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
-            break;
-        while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
-        }
-        s = 8 - (b - 64);
-        if (j >= 0)
-            a[j] = 0;
-        if (s != 0)
-            j++;
+    int i, j;
+
+    for (i = 47, j = 0; i >= 0; i--) {
+        a[j++] = r[i] >> 56;
+        a[j++] = r[i] >> 48;
+        a[j++] = r[i] >> 40;
+        a[j++] = r[i] >> 32;
+        a[j++] = r[i] >> 24;
+        a[j++] = r[i] >> 16;
+        a[j++] = r[i] >> 8;
+        a[j++] = r[i] >> 0;
     }
 }
 
@@ -5201,1184 +5989,1056 @@
     sp_digit tmp[12];
 
     __asm__ __volatile__ (
+        "ldp	x10, x11, [%[a], 0]\n\t"
+        "ldp	x12, x13, [%[a], 16]\n\t"
+        "ldp	x14, x15, [%[a], 32]\n\t"
+        "ldp	x16, x17, [%[a], 48]\n\t"
+        "ldp	x19, x20, [%[a], 64]\n\t"
+        "ldp	x21, x22, [%[a], 80]\n\t"
         "#  A[0] * B[0]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x3, x7, x8\n\t"
-        "umulh	x4, x7, x8\n\t"
-        "mov	x5, 0\n\t"
-        "str	x3, [%[tmp]]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "mul	x4, x10, x9\n\t"
+        "umulh	x5, x10, x9\n\t"
+        "mov	x6, 0\n\t"
+        "str	x4, [%[tmp]]\n\t"
         "#  A[0] * B[1]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[0]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 8]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 8]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[0] * B[2]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[1] * B[1]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[0]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 16]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 16]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[0] * B[3]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[1] * B[2]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[2] * B[1]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[0]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[tmp], 24]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[tmp], 24]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[0] * B[4]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[3]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[2] * B[2]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[3] * B[1]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[0]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 32]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 32]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[0] * B[5]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[1] * B[4]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[3]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[3] * B[2]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[4] * B[1]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[0]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 40]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 40]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[0] * B[6]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[1] * B[5]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[2] * B[4]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[3]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[4] * B[2]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[5] * B[1]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[0]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[tmp], 48]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[tmp], 48]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[0] * B[7]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[6]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[2] * B[5]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[3] * B[4]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[3]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[5] * B[2]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[6] * B[1]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[0]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 56]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 56]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[0] * B[8]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[1] * B[7]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[6]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[3] * B[5]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[4] * B[4]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[3]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[6] * B[2]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[7] * B[1]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[8] * B[0]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 64]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 64]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[0] * B[9]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[1] * B[8]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[2] * B[7]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[6]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[4] * B[5]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[5] * B[4]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[3]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[7] * B[2]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[8] * B[1]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[9] * B[0]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[tmp], 72]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[tmp], 72]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[0] * B[10]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[1] * B[9]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[2] * B[8]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[3] * B[7]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[6]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[5] * B[5]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[6] * B[4]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[3]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[8] * B[2]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[9] * B[1]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[10] * B[0]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 80]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 80]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[0] * B[11]\n\t"
-        "ldr	x7, [%[a], 0]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x10, x9\n\t"
+        "umulh	x8, x10, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[1] * B[10]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x11, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[2] * B[9]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[3] * B[8]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[4] * B[7]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[6]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[6] * B[5]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[7] * B[4]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[8] * B[3]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[9] * B[2]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[10] * B[1]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[11] * B[0]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 0]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 88]\n\t"
+        "ldr	x9, [%[b], 0]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 88]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[1] * B[11]\n\t"
-        "ldr	x7, [%[a], 8]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x11, x9\n\t"
+        "umulh	x8, x11, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[2] * B[10]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x12, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[3] * B[9]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[4] * B[8]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[5] * B[7]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[6]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[7] * B[5]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[8] * B[4]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[9] * B[3]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[10] * B[2]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[11] * B[1]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 8]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[r], 96]\n\t"
+        "ldr	x9, [%[b], 8]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 96]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[2] * B[11]\n\t"
-        "ldr	x7, [%[a], 16]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x12, x9\n\t"
+        "umulh	x8, x12, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[3] * B[10]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x13, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[4] * B[9]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[5] * B[8]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[6] * B[7]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[6]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[8] * B[5]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[9] * B[4]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[10] * B[3]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[11] * B[2]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 16]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 104]\n\t"
+        "ldr	x9, [%[b], 16]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[r], 104]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[3] * B[11]\n\t"
-        "ldr	x7, [%[a], 24]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x13, x9\n\t"
+        "umulh	x8, x13, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[4] * B[10]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x14, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[5] * B[9]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[6] * B[8]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[7] * B[7]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[8] * B[6]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[9] * B[5]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[10] * B[4]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[11] * B[3]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 24]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[r], 112]\n\t"
+        "ldr	x9, [%[b], 24]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[r], 112]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[4] * B[11]\n\t"
-        "ldr	x7, [%[a], 32]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[5] * B[10]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[6] * B[9]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[7] * B[8]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[8] * B[7]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[9] * B[6]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[10] * B[5]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[11] * B[4]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 32]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[r], 120]\n\t"
+        "ldr	x9, [%[b], 32]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 120]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[5] * B[11]\n\t"
-        "ldr	x7, [%[a], 40]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x15, x9\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[6] * B[10]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[7] * B[9]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[8] * B[8]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[9] * B[7]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[10] * B[6]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[11] * B[5]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 40]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 128]\n\t"
+        "ldr	x9, [%[b], 40]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[r], 128]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[6] * B[11]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x16, x9\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[7] * B[10]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[8] * B[9]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[9] * B[8]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[10] * B[7]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[11] * B[6]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 48]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[r], 136]\n\t"
+        "ldr	x9, [%[b], 48]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[r], 136]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[7] * B[11]\n\t"
-        "ldr	x7, [%[a], 56]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x17, x9\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[8] * B[10]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[9] * B[9]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[10] * B[8]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[11] * B[7]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 56]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[r], 144]\n\t"
+        "ldr	x9, [%[b], 56]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 144]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[8] * B[11]\n\t"
-        "ldr	x7, [%[a], 64]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x19, x9\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[9] * B[10]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[10] * B[9]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x5, x5, x7\n\t"
         "#  A[11] * B[8]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 64]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 152]\n\t"
+        "ldr	x9, [%[b], 64]\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[r], 152]\n\t"
+        "adc	x4, x4, xzr\n\t"
         "#  A[9] * B[11]\n\t"
-        "ldr	x7, [%[a], 72]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x20, x9\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[10] * B[10]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x6, x6, x7\n\t"
         "#  A[11] * B[9]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 72]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[r], 160]\n\t"
+        "ldr	x9, [%[b], 72]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[r], 160]\n\t"
+        "adc	x5, x5, xzr\n\t"
         "#  A[10] * B[11]\n\t"
-        "ldr	x7, [%[a], 80]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x21, x9\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "adds	x4, x4, x7\n\t"
         "#  A[11] * B[10]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 80]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[r], 168]\n\t"
+        "ldr	x9, [%[b], 80]\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 168]\n\t"
+        "adc	x6, x6, xzr\n\t"
         "#  A[11] * B[11]\n\t"
-        "ldr	x7, [%[a], 88]\n\t"
-        "ldr	x8, [%[b], 88]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adc	x5, x5, x7\n\t"
-        "stp	x4, x5, [%[r], 176]\n\t"
+        "ldr	x9, [%[b], 88]\n\t"
+        "mul	x7, x22, x9\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adc	x6, x6, x8\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x10, x11, [%[tmp], 0]\n\t"
+        "ldp	x12, x13, [%[tmp], 16]\n\t"
+        "ldp	x14, x15, [%[tmp], 32]\n\t"
+        "ldp	x16, x17, [%[tmp], 48]\n\t"
+        "ldp	x19, x20, [%[tmp], 64]\n\t"
+        "ldp	x21, x22, [%[tmp], 80]\n\t"
+        "stp	x10, x11, [%[r], 0]\n\t"
+        "stp	x12, x13, [%[r], 16]\n\t"
+        "stp	x14, x15, [%[r], 32]\n\t"
+        "stp	x16, x17, [%[r], 48]\n\t"
+        "stp	x19, x20, [%[r], 64]\n\t"
+        "stp	x21, x22, [%[r], 80]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22"
+    );
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -6388,19 +7048,17 @@
  */
 static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[12];
-
     __asm__ __volatile__ (
         "ldp	x10, x11, [%[a], 0]\n\t"
         "ldp	x12, x13, [%[a], 16]\n\t"
         "ldp	x14, x15, [%[a], 32]\n\t"
         "ldp	x16, x17, [%[a], 48]\n\t"
-        "ldp	x18, x19, [%[a], 64]\n\t"
-        "ldp	x20, x21, [%[a], 80]\n\t"
+        "ldp	x19, x20, [%[a], 64]\n\t"
+        "ldp	x21, x22, [%[a], 80]\n\t"
         "#  A[0] * A[0]\n\t"
         "mul	x2, x10, x10\n\t"
         "umulh	x3, x10, x10\n\t"
-        "str	x2, [%[tmp]]\n\t"
+        "str	x2, [%[r]]\n\t"
         "mov	x4, 0\n\t"
         "#  A[0] * A[1]\n\t"
         "mul	x8, x10, x11\n\t"
@@ -6409,83 +7067,83 @@
         "adcs	x4, x4, x9\n\t"
         "adc	x2, xzr, xzr\n\t"
         "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[tmp], 8]\n\t"
+        "str	x3, [%[r], 8]\n\t"
         "#  A[0] * A[2]\n\t"
         "mul	x8, x10, x12\n\t"
+        "adcs	x4, x4, x9\n\t"
         "umulh	x9, x10, x12\n\t"
+        "adc	x2, x2, xzr\n\t"
         "adds	x4, x4, x8\n\t"
         "adcs	x2, x2, x9\n\t"
         "adc	x3, xzr, xzr\n\t"
         "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
         "#  A[1] * A[1]\n\t"
         "mul	x8, x11, x11\n\t"
+        "adcs	x2, x2, x9\n\t"
         "umulh	x9, x11, x11\n\t"
+        "adc	x3, x3, xzr\n\t"
         "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 16]\n\t"
+        "str	x4, [%[r], 16]\n\t"
         "#  A[0] * A[3]\n\t"
         "mul	x8, x10, x13\n\t"
+        "adcs	x2, x2, x9\n\t"
         "umulh	x9, x10, x13\n\t"
+        "adc	x3, x3, xzr\n\t"
         "adds	x2, x2, x8\n\t"
         "adcs	x3, x3, x9\n\t"
         "adc	x4, xzr, xzr\n\t"
         "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
         "#  A[1] * A[2]\n\t"
         "mul	x8, x11, x12\n\t"
+        "adcs	x3, x3, x9\n\t"
         "umulh	x9, x11, x12\n\t"
+        "adc	x4, x4, xzr\n\t"
         "adds	x2, x2, x8\n\t"
         "adcs	x3, x3, x9\n\t"
         "adc	x4, x4, xzr\n\t"
         "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x2, [%[tmp], 24]\n\t"
+        "str	x2, [%[r], 24]\n\t"
         "#  A[0] * A[4]\n\t"
         "mul	x8, x10, x14\n\t"
+        "adcs	x3, x3, x9\n\t"
         "umulh	x9, x10, x14\n\t"
+        "adc	x4, x4, xzr\n\t"
         "adds	x3, x3, x8\n\t"
         "adcs	x4, x4, x9\n\t"
         "adc	x2, xzr, xzr\n\t"
         "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
         "#  A[1] * A[3]\n\t"
         "mul	x8, x11, x13\n\t"
+        "adcs	x4, x4, x9\n\t"
         "umulh	x9, x11, x13\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
         "adc	x2, x2, xzr\n\t"
         "adds	x3, x3, x8\n\t"
         "adcs	x4, x4, x9\n\t"
         "adc	x2, x2, xzr\n\t"
+        "adds	x3, x3, x8\n\t"
         "#  A[2] * A[2]\n\t"
         "mul	x8, x12, x12\n\t"
+        "adcs	x4, x4, x9\n\t"
         "umulh	x9, x12, x12\n\t"
+        "adc	x2, x2, xzr\n\t"
         "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[tmp], 32]\n\t"
+        "str	x3, [%[r], 32]\n\t"
         "#  A[0] * A[5]\n\t"
         "mul	x5, x10, x15\n\t"
+        "adcs	x4, x4, x9\n\t"
         "umulh	x6, x10, x15\n\t"
+        "adc	x2, x2, xzr\n\t"
         "mov	x3, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[1] * A[4]\n\t"
         "mul	x8, x11, x14\n\t"
         "umulh	x9, x11, x14\n\t"
         "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
         "#  A[2] * A[3]\n\t"
         "mul	x8, x12, x13\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x12, x13\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6495,7 +7153,7 @@
         "adds	x4, x4, x5\n\t"
         "adcs	x2, x2, x6\n\t"
         "adc	x3, x3, x7\n\t"
-        "str	x4, [%[tmp], 40]\n\t"
+        "str	x4, [%[r], 40]\n\t"
         "#  A[0] * A[6]\n\t"
         "mul	x5, x10, x16\n\t"
         "umulh	x6, x10, x16\n\t"
@@ -6505,17 +7163,17 @@
         "mul	x8, x11, x15\n\t"
         "umulh	x9, x11, x15\n\t"
         "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
         "#  A[2] * A[4]\n\t"
         "mul	x8, x12, x14\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x12, x14\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[3]\n\t"
         "mul	x8, x13, x13\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x13, x13\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x5\n\t"
         "adcs	x6, x6, x6\n\t"
         "adc	x7, x7, x7\n\t"
@@ -6525,7 +7183,7 @@
         "adds	x2, x2, x5\n\t"
         "adcs	x3, x3, x6\n\t"
         "adc	x4, x4, x7\n\t"
-        "str	x2, [%[tmp], 48]\n\t"
+        "str	x2, [%[r], 48]\n\t"
         "#  A[0] * A[7]\n\t"
         "mul	x5, x10, x17\n\t"
         "umulh	x6, x10, x17\n\t"
@@ -6535,17 +7193,17 @@
         "mul	x8, x11, x16\n\t"
         "umulh	x9, x11, x16\n\t"
         "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
         "#  A[2] * A[5]\n\t"
         "mul	x8, x12, x15\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x12, x15\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[4]\n\t"
         "mul	x8, x13, x14\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x13, x14\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6555,33 +7213,33 @@
         "adds	x3, x3, x5\n\t"
         "adcs	x4, x4, x6\n\t"
         "adc	x2, x2, x7\n\t"
-        "str	x3, [%[tmp], 56]\n\t"
+        "str	x3, [%[r], 56]\n\t"
         "#  A[0] * A[8]\n\t"
-        "mul	x5, x10, x18\n\t"
-        "umulh	x6, x10, x18\n\t"
+        "mul	x5, x10, x19\n\t"
+        "umulh	x6, x10, x19\n\t"
         "mov	x3, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[1] * A[7]\n\t"
         "mul	x8, x11, x17\n\t"
         "umulh	x9, x11, x17\n\t"
         "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
         "#  A[2] * A[6]\n\t"
         "mul	x8, x12, x16\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x12, x16\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[5]\n\t"
         "mul	x8, x13, x15\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x13, x15\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[4] * A[4]\n\t"
         "mul	x8, x14, x14\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x14, x14\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x5\n\t"
         "adcs	x6, x6, x6\n\t"
         "adc	x7, x7, x7\n\t"
@@ -6591,33 +7249,33 @@
         "adds	x4, x4, x5\n\t"
         "adcs	x2, x2, x6\n\t"
         "adc	x3, x3, x7\n\t"
-        "str	x4, [%[tmp], 64]\n\t"
+        "str	x4, [%[r], 64]\n\t"
         "#  A[0] * A[9]\n\t"
-        "mul	x5, x10, x19\n\t"
-        "umulh	x6, x10, x19\n\t"
+        "mul	x5, x10, x20\n\t"
+        "umulh	x6, x10, x20\n\t"
         "mov	x4, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[1] * A[8]\n\t"
-        "mul	x8, x11, x18\n\t"
-        "umulh	x9, x11, x18\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x11, x19\n\t"
+        "umulh	x9, x11, x19\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[2] * A[7]\n\t"
         "mul	x8, x12, x17\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x12, x17\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[6]\n\t"
         "mul	x8, x13, x16\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x13, x16\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[4] * A[5]\n\t"
         "mul	x8, x14, x15\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x14, x15\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6627,39 +7285,39 @@
         "adds	x2, x2, x5\n\t"
         "adcs	x3, x3, x6\n\t"
         "adc	x4, x4, x7\n\t"
-        "str	x2, [%[tmp], 72]\n\t"
+        "str	x2, [%[r], 72]\n\t"
         "#  A[0] * A[10]\n\t"
-        "mul	x5, x10, x20\n\t"
-        "umulh	x6, x10, x20\n\t"
+        "mul	x5, x10, x21\n\t"
+        "umulh	x6, x10, x21\n\t"
         "mov	x2, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[1] * A[9]\n\t"
-        "mul	x8, x11, x19\n\t"
-        "umulh	x9, x11, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x11, x20\n\t"
+        "umulh	x9, x11, x20\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[2] * A[8]\n\t"
-        "mul	x8, x12, x18\n\t"
-        "umulh	x9, x12, x18\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x12, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x12, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[7]\n\t"
         "mul	x8, x13, x17\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x13, x17\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[4] * A[6]\n\t"
         "mul	x8, x14, x16\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x14, x16\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[5] * A[5]\n\t"
         "mul	x8, x15, x15\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x15, x15\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x5\n\t"
         "adcs	x6, x6, x6\n\t"
         "adc	x7, x7, x7\n\t"
@@ -6669,39 +7327,39 @@
         "adds	x3, x3, x5\n\t"
         "adcs	x4, x4, x6\n\t"
         "adc	x2, x2, x7\n\t"
-        "str	x3, [%[tmp], 80]\n\t"
+        "str	x3, [%[r], 80]\n\t"
         "#  A[0] * A[11]\n\t"
-        "mul	x5, x10, x21\n\t"
-        "umulh	x6, x10, x21\n\t"
+        "mul	x5, x10, x22\n\t"
+        "umulh	x6, x10, x22\n\t"
         "mov	x3, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[1] * A[10]\n\t"
-        "mul	x8, x11, x20\n\t"
-        "umulh	x9, x11, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x11, x21\n\t"
+        "umulh	x9, x11, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[2] * A[9]\n\t"
-        "mul	x8, x12, x19\n\t"
-        "umulh	x9, x12, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x12, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x12, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[8]\n\t"
-        "mul	x8, x13, x18\n\t"
-        "umulh	x9, x13, x18\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x13, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x13, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[4] * A[7]\n\t"
         "mul	x8, x14, x17\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x14, x17\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[5] * A[6]\n\t"
         "mul	x8, x15, x16\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x15, x16\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6711,39 +7369,39 @@
         "adds	x4, x4, x5\n\t"
         "adcs	x2, x2, x6\n\t"
         "adc	x3, x3, x7\n\t"
-        "str	x4, [%[tmp], 88]\n\t"
+        "str	x4, [%[r], 88]\n\t"
         "#  A[1] * A[11]\n\t"
-        "mul	x5, x11, x21\n\t"
-        "umulh	x6, x11, x21\n\t"
+        "mul	x5, x11, x22\n\t"
+        "umulh	x6, x11, x22\n\t"
         "mov	x4, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[2] * A[10]\n\t"
-        "mul	x8, x12, x20\n\t"
-        "umulh	x9, x12, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x12, x21\n\t"
+        "umulh	x9, x12, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[3] * A[9]\n\t"
-        "mul	x8, x13, x19\n\t"
-        "umulh	x9, x13, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x13, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x13, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[4] * A[8]\n\t"
-        "mul	x8, x14, x18\n\t"
-        "umulh	x9, x14, x18\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x14, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x14, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[5] * A[7]\n\t"
         "mul	x8, x15, x17\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x15, x17\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[6] * A[6]\n\t"
         "mul	x8, x16, x16\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x16, x16\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x5\n\t"
         "adcs	x6, x6, x6\n\t"
         "adc	x7, x7, x7\n\t"
@@ -6755,31 +7413,31 @@
         "adc	x4, x4, x7\n\t"
         "str	x2, [%[r], 96]\n\t"
         "#  A[2] * A[11]\n\t"
-        "mul	x5, x12, x21\n\t"
-        "umulh	x6, x12, x21\n\t"
+        "mul	x5, x12, x22\n\t"
+        "umulh	x6, x12, x22\n\t"
         "mov	x2, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[3] * A[10]\n\t"
-        "mul	x8, x13, x20\n\t"
-        "umulh	x9, x13, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x13, x21\n\t"
+        "umulh	x9, x13, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[4] * A[9]\n\t"
-        "mul	x8, x14, x19\n\t"
-        "umulh	x9, x14, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x14, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x14, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[5] * A[8]\n\t"
-        "mul	x8, x15, x18\n\t"
-        "umulh	x9, x15, x18\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x15, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x15, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[6] * A[7]\n\t"
         "mul	x8, x16, x17\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x16, x17\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6791,31 +7449,31 @@
         "adc	x2, x2, x7\n\t"
         "str	x3, [%[r], 104]\n\t"
         "#  A[3] * A[11]\n\t"
-        "mul	x5, x13, x21\n\t"
-        "umulh	x6, x13, x21\n\t"
+        "mul	x5, x13, x22\n\t"
+        "umulh	x6, x13, x22\n\t"
         "mov	x3, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[4] * A[10]\n\t"
-        "mul	x8, x14, x20\n\t"
-        "umulh	x9, x14, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x14, x21\n\t"
+        "umulh	x9, x14, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[5] * A[9]\n\t"
-        "mul	x8, x15, x19\n\t"
-        "umulh	x9, x15, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x15, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x15, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[6] * A[8]\n\t"
-        "mul	x8, x16, x18\n\t"
-        "umulh	x9, x16, x18\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x16, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x16, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[7] * A[7]\n\t"
         "mul	x8, x17, x17\n\t"
+        "adcs	x6, x6, x9\n\t"
         "umulh	x9, x17, x17\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x5\n\t"
         "adcs	x6, x6, x6\n\t"
         "adc	x7, x7, x7\n\t"
@@ -6827,25 +7485,25 @@
         "adc	x3, x3, x7\n\t"
         "str	x4, [%[r], 112]\n\t"
         "#  A[4] * A[11]\n\t"
-        "mul	x5, x14, x21\n\t"
-        "umulh	x6, x14, x21\n\t"
+        "mul	x5, x14, x22\n\t"
+        "umulh	x6, x14, x22\n\t"
         "mov	x4, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[5] * A[10]\n\t"
-        "mul	x8, x15, x20\n\t"
-        "umulh	x9, x15, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x15, x21\n\t"
+        "umulh	x9, x15, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[6] * A[9]\n\t"
-        "mul	x8, x16, x19\n\t"
-        "umulh	x9, x16, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x16, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x16, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[7] * A[8]\n\t"
-        "mul	x8, x17, x18\n\t"
-        "umulh	x9, x17, x18\n\t"
+        "mul	x8, x17, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x17, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6857,25 +7515,25 @@
         "adc	x4, x4, x7\n\t"
         "str	x2, [%[r], 120]\n\t"
         "#  A[5] * A[11]\n\t"
-        "mul	x5, x15, x21\n\t"
-        "umulh	x6, x15, x21\n\t"
+        "mul	x5, x15, x22\n\t"
+        "umulh	x6, x15, x22\n\t"
         "mov	x2, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[6] * A[10]\n\t"
-        "mul	x8, x16, x20\n\t"
-        "umulh	x9, x16, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x16, x21\n\t"
+        "umulh	x9, x16, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[7] * A[9]\n\t"
-        "mul	x8, x17, x19\n\t"
-        "umulh	x9, x17, x19\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x17, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x17, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[8] * A[8]\n\t"
-        "mul	x8, x18, x18\n\t"
-        "umulh	x9, x18, x18\n\t"
+        "mul	x8, x19, x19\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x19, x19\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x5\n\t"
         "adcs	x6, x6, x6\n\t"
         "adc	x7, x7, x7\n\t"
@@ -6887,19 +7545,19 @@
         "adc	x2, x2, x7\n\t"
         "str	x3, [%[r], 128]\n\t"
         "#  A[6] * A[11]\n\t"
-        "mul	x5, x16, x21\n\t"
-        "umulh	x6, x16, x21\n\t"
+        "mul	x5, x16, x22\n\t"
+        "umulh	x6, x16, x22\n\t"
         "mov	x3, 0\n\t"
         "mov	x7, 0\n\t"
         "#  A[7] * A[10]\n\t"
-        "mul	x8, x17, x20\n\t"
-        "umulh	x9, x17, x20\n\t"
-        "adds	x5, x5, x8\n\t"
-        "adcs	x6, x6, x9\n\t"
-        "adc	x7, x7, xzr\n\t"
+        "mul	x8, x17, x21\n\t"
+        "umulh	x9, x17, x21\n\t"
+        "adds	x5, x5, x8\n\t"
         "#  A[8] * A[9]\n\t"
-        "mul	x8, x18, x19\n\t"
-        "umulh	x9, x18, x19\n\t"
+        "mul	x8, x19, x20\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "umulh	x9, x19, x20\n\t"
+        "adc	x7, x7, xzr\n\t"
         "adds	x5, x5, x8\n\t"
         "adcs	x6, x6, x9\n\t"
         "adc	x7, x7, xzr\n\t"
@@ -6911,87 +7569,85 @@
         "adc	x3, x3, x7\n\t"
         "str	x4, [%[r], 136]\n\t"
         "#  A[7] * A[11]\n\t"
-        "mul	x8, x17, x21\n\t"
-        "umulh	x9, x17, x21\n\t"
+        "mul	x8, x17, x22\n\t"
+        "umulh	x9, x17, x22\n\t"
         "adds	x2, x2, x8\n\t"
         "adcs	x3, x3, x9\n\t"
         "adc	x4, xzr, xzr\n\t"
         "adds	x2, x2, x8\n\t"
+        "#  A[8] * A[10]\n\t"
+        "mul	x8, x19, x21\n\t"
         "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "#  A[8] * A[10]\n\t"
-        "mul	x8, x18, x20\n\t"
-        "umulh	x9, x18, x20\n\t"
+        "umulh	x9, x19, x21\n\t"
+        "adc	x4, x4, xzr\n\t"
         "adds	x2, x2, x8\n\t"
         "adcs	x3, x3, x9\n\t"
         "adc	x4, x4, xzr\n\t"
         "adds	x2, x2, x8\n\t"
+        "#  A[9] * A[9]\n\t"
+        "mul	x8, x20, x20\n\t"
         "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "#  A[9] * A[9]\n\t"
-        "mul	x8, x19, x19\n\t"
-        "umulh	x9, x19, x19\n\t"
+        "umulh	x9, x20, x20\n\t"
+        "adc	x4, x4, xzr\n\t"
         "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
         "str	x2, [%[r], 144]\n\t"
         "#  A[8] * A[11]\n\t"
-        "mul	x8, x18, x21\n\t"
-        "umulh	x9, x18, x21\n\t"
+        "mul	x8, x19, x22\n\t"
+        "adcs	x3, x3, x9\n\t"
+        "umulh	x9, x19, x22\n\t"
+        "adc	x4, x4, xzr\n\t"
         "adds	x3, x3, x8\n\t"
         "adcs	x4, x4, x9\n\t"
         "adc	x2, xzr, xzr\n\t"
         "adds	x3, x3, x8\n\t"
+        "#  A[9] * A[10]\n\t"
+        "mul	x8, x20, x21\n\t"
         "adcs	x4, x4, x9\n\t"
+        "umulh	x9, x20, x21\n\t"
         "adc	x2, x2, xzr\n\t"
-        "#  A[9] * A[10]\n\t"
-        "mul	x8, x19, x20\n\t"
-        "umulh	x9, x19, x20\n\t"
         "adds	x3, x3, x8\n\t"
         "adcs	x4, x4, x9\n\t"
         "adc	x2, x2, xzr\n\t"
         "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
         "str	x3, [%[r], 152]\n\t"
         "#  A[9] * A[11]\n\t"
-        "mul	x8, x19, x21\n\t"
-        "umulh	x9, x19, x21\n\t"
+        "mul	x8, x20, x22\n\t"
+        "adcs	x4, x4, x9\n\t"
+        "umulh	x9, x20, x22\n\t"
+        "adc	x2, x2, xzr\n\t"
         "adds	x4, x4, x8\n\t"
         "adcs	x2, x2, x9\n\t"
         "adc	x3, xzr, xzr\n\t"
         "adds	x4, x4, x8\n\t"
+        "#  A[10] * A[10]\n\t"
+        "mul	x8, x21, x21\n\t"
         "adcs	x2, x2, x9\n\t"
+        "umulh	x9, x21, x21\n\t"
         "adc	x3, x3, xzr\n\t"
-        "#  A[10] * A[10]\n\t"
-        "mul	x8, x20, x20\n\t"
-        "umulh	x9, x20, x20\n\t"
         "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
         "str	x4, [%[r], 160]\n\t"
         "#  A[10] * A[11]\n\t"
-        "mul	x8, x20, x21\n\t"
-        "umulh	x9, x20, x21\n\t"
+        "mul	x8, x21, x22\n\t"
+        "adcs	x2, x2, x9\n\t"
+        "umulh	x9, x21, x22\n\t"
+        "adc	x3, x3, xzr\n\t"
         "adds	x2, x2, x8\n\t"
         "adcs	x3, x3, x9\n\t"
         "adc	x4, xzr, xzr\n\t"
         "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
         "str	x2, [%[r], 168]\n\t"
         "#  A[11] * A[11]\n\t"
-        "mul	x8, x21, x21\n\t"
-        "umulh	x9, x21, x21\n\t"
+        "mul	x8, x22, x22\n\t"
+        "adcs	x3, x3, x9\n\t"
+        "umulh	x9, x22, x22\n\t"
+        "adc	x4, x4, xzr\n\t"
         "adds	x3, x3, x8\n\t"
         "adc	x4, x4, x9\n\t"
         "stp	x3, x4, [%[r], 176]\n\t"
         :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22"
+    );
 }
 
 /* Add b to a into r. (r = a + b)
@@ -7003,46 +7659,44 @@
 static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 48]\n\t"
-        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 48]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 48]\n\t"
         "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 80]\n\t"
-        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 80]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 80]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 /* Sub b from a into a. (a -= b)
@@ -7052,76 +7706,74 @@
  */
 static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x2, x3, [%[a], 0]\n\t"
-        "ldp	x4, x5, [%[a], 16]\n\t"
         "ldp	x6, x7, [%[b], 0]\n\t"
-        "ldp	x8, x9, [%[b], 16]\n\t"
         "subs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 16]\n\t"
         "ldp	x2, x3, [%[a], 32]\n\t"
+        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 48]\n\t"
-        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 48]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 32]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 48]\n\t"
         "ldp	x2, x3, [%[a], 64]\n\t"
+        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 80]\n\t"
-        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 80]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 64]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 80]\n\t"
         "ldp	x2, x3, [%[a], 96]\n\t"
+        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 112]\n\t"
-        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 112]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 96]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 112]\n\t"
         "ldp	x2, x3, [%[a], 128]\n\t"
+        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 144]\n\t"
-        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 144]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 128]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 144]\n\t"
         "ldp	x2, x3, [%[a], 160]\n\t"
+        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 176]\n\t"
-        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 176]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 160]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 176]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
         : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 
-    return c;
+    return (sp_digit)a;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -7133,76 +7785,74 @@
 static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 48]\n\t"
-        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 48]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 48]\n\t"
         "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 80]\n\t"
-        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 80]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 80]\n\t"
         "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 112]\n\t"
-        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 112]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 112]\n\t"
         "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 144]\n\t"
-        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 144]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 144]\n\t"
         "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 176]\n\t"
-        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 176]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 176]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 /* AND m into each word of a and store in r.
@@ -7211,13 +7861,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_12(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<12; i++)
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
         r[i] = a[i] & m;
+    }
 #else
     r[0] = a[0] & m;
     r[1] = a[1] & m;
@@ -7234,13 +7885,53 @@
 #endif
 }
 
+/* Add digit to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_3072_add_zero_12(sp_digit* r, const sp_digit* a,
+        const sp_digit d)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adds	x3, x3, %[d]\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+}
+
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -7262,17 +7953,95 @@
     u += sp_3072_sub_in_place_24(z1, z2);
     u += sp_3072_sub_in_place_24(z1, z0);
     u += sp_3072_add_24(r + 12, r + 12, z1);
-    r[36] = u;
-    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
-    sp_3072_add_24(r + 24, r + 24, z2);
-}
-
+    u += sp_3072_add_12(r + 24, r + 24, z2);
+    sp_3072_add_zero_12(r + 36, z2 + 12, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 96\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "cset	%[c], cs\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "adds	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 16]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 24]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 48]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 56]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 80]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 88]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[24];
@@ -7285,13 +8054,13 @@
     sp_3072_sqr_12(z2, &a[12]);
     sp_3072_sqr_12(z0, a);
     sp_3072_mask_12(r + 24, a1, 0 - u);
-    u += sp_3072_add_12(r + 24, r + 24, r + 24);
+    u += sp_3072_dbl_12(r + 24, r + 24);
     u += sp_3072_sub_in_place_24(z1, z2);
     u += sp_3072_sub_in_place_24(z1, z0);
     u += sp_3072_add_24(r + 12, r + 12, z1);
-    r[36] = u;
-    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
-    sp_3072_add_24(r + 24, r + 24, z2);
+    u += sp_3072_add_12(r + 24, r + 24, z2);
+    sp_3072_add_zero_12(r + 36, z2 + 12, u);
+    
 }
 
 /* Sub b from a into a. (a -= b)
@@ -7301,136 +8070,134 @@
  */
 static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x2, x3, [%[a], 0]\n\t"
-        "ldp	x4, x5, [%[a], 16]\n\t"
         "ldp	x6, x7, [%[b], 0]\n\t"
-        "ldp	x8, x9, [%[b], 16]\n\t"
         "subs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 16]\n\t"
         "ldp	x2, x3, [%[a], 32]\n\t"
+        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 48]\n\t"
-        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 48]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 32]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 48]\n\t"
         "ldp	x2, x3, [%[a], 64]\n\t"
+        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 80]\n\t"
-        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 80]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 64]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 80]\n\t"
         "ldp	x2, x3, [%[a], 96]\n\t"
+        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 112]\n\t"
-        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 112]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 96]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 112]\n\t"
         "ldp	x2, x3, [%[a], 128]\n\t"
+        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 144]\n\t"
-        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 144]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 128]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 144]\n\t"
         "ldp	x2, x3, [%[a], 160]\n\t"
+        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 176]\n\t"
-        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 176]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 160]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 176]\n\t"
         "ldp	x2, x3, [%[a], 192]\n\t"
+        "ldp	x6, x7, [%[b], 192]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 208]\n\t"
-        "ldp	x6, x7, [%[b], 192]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 208]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 192]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 208]\n\t"
         "ldp	x2, x3, [%[a], 224]\n\t"
+        "ldp	x6, x7, [%[b], 224]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 240]\n\t"
-        "ldp	x6, x7, [%[b], 224]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 240]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 224]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 240]\n\t"
         "ldp	x2, x3, [%[a], 256]\n\t"
+        "ldp	x6, x7, [%[b], 256]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 272]\n\t"
-        "ldp	x6, x7, [%[b], 256]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 272]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 256]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 272]\n\t"
         "ldp	x2, x3, [%[a], 288]\n\t"
+        "ldp	x6, x7, [%[b], 288]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 304]\n\t"
-        "ldp	x6, x7, [%[b], 288]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 304]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 288]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 304]\n\t"
         "ldp	x2, x3, [%[a], 320]\n\t"
+        "ldp	x6, x7, [%[b], 320]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 336]\n\t"
-        "ldp	x6, x7, [%[b], 320]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 336]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 320]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 336]\n\t"
         "ldp	x2, x3, [%[a], 352]\n\t"
+        "ldp	x6, x7, [%[b], 352]\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x4, x5, [%[a], 368]\n\t"
-        "ldp	x6, x7, [%[b], 352]\n\t"
+        "sbcs	x3, x3, x7\n\t"
         "ldp	x8, x9, [%[b], 368]\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 352]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 368]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
         : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 
-    return c;
+    return (sp_digit)a;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -7442,136 +8209,134 @@
 static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 48]\n\t"
-        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 48]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 48]\n\t"
         "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 80]\n\t"
-        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 80]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 80]\n\t"
         "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 112]\n\t"
-        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 112]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 112]\n\t"
         "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 144]\n\t"
-        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 144]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 144]\n\t"
         "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 176]\n\t"
-        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 176]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 176]\n\t"
         "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 208]\n\t"
-        "ldp	x7, x8, [%[b], 192]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 208]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 192]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 208]\n\t"
         "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 240]\n\t"
-        "ldp	x7, x8, [%[b], 224]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 240]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 224]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 240]\n\t"
         "ldp	x3, x4, [%[a], 256]\n\t"
+        "ldp	x7, x8, [%[b], 256]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 272]\n\t"
-        "ldp	x7, x8, [%[b], 256]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 272]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 256]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 272]\n\t"
         "ldp	x3, x4, [%[a], 288]\n\t"
+        "ldp	x7, x8, [%[b], 288]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 304]\n\t"
-        "ldp	x7, x8, [%[b], 288]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 304]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 288]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 304]\n\t"
         "ldp	x3, x4, [%[a], 320]\n\t"
+        "ldp	x7, x8, [%[b], 320]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 336]\n\t"
-        "ldp	x7, x8, [%[b], 320]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 336]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 320]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 336]\n\t"
         "ldp	x3, x4, [%[a], 352]\n\t"
+        "ldp	x7, x8, [%[b], 352]\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x5, x6, [%[a], 368]\n\t"
-        "ldp	x7, x8, [%[b], 352]\n\t"
+        "adcs	x4, x4, x8\n\t"
         "ldp	x9, x10, [%[b], 368]\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 352]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 368]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 /* AND m into each word of a and store in r.
@@ -7580,13 +8345,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_24(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<24; i++)
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<24; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -7603,13 +8369,77 @@
 #endif
 }
 
+/* Add digit to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_3072_add_zero_24(sp_digit* r, const sp_digit* a,
+        const sp_digit d)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adds	x3, x3, %[d]\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+}
+
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
-static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     sp_digit* z0 = r;
@@ -7631,17 +8461,122 @@
     u += sp_3072_sub_in_place_48(z1, z2);
     u += sp_3072_sub_in_place_48(z1, z0);
     u += sp_3072_add_48(r + 24, r + 24, z1);
-    r[72] = u;
-    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
-    sp_3072_add_48(r + 48, r + 48, z2);
-}
-
+    u += sp_3072_add_24(r + 48, r + 48, z2);
+    sp_3072_add_zero_24(r + 72, z2 + 24, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 192\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "cset	%[c], cs\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "adds	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 16]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 24]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 48]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 56]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 80]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 88]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 112]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 120]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 144]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 152]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 176]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 184]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
-static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
 {
     sp_digit* z0 = r;
     sp_digit z2[48];
@@ -7654,16 +8589,16 @@
     sp_3072_sqr_24(z2, &a[24]);
     sp_3072_sqr_24(z0, a);
     sp_3072_mask_24(r + 48, a1, 0 - u);
-    u += sp_3072_add_24(r + 48, r + 48, r + 48);
+    u += sp_3072_dbl_24(r + 48, r + 48);
     u += sp_3072_sub_in_place_48(z1, z2);
     u += sp_3072_sub_in_place_48(z1, z0);
     u += sp_3072_add_48(r + 24, r + 24, z1);
-    r[72] = u;
-    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
-    sp_3072_add_48(r + 48, r + 48, z2);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    u += sp_3072_add_24(r + 48, r + 48, z2);
+    sp_3072_add_zero_24(r + 72, z2 + 24, u);
+    
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -7683,12 +8618,12 @@
         "ldp	x3, x4, [%[a]], #16\n\t"
         "ldp	x5, x6, [%[a]], #16\n\t"
         "ldp	x7, x8, [%[b]], #16\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x9, x10, [%[b]], #16\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r]], #16\n\t"
         "cset	%[c], cs\n\t"
         "cmp	%[a], x11\n\t"
@@ -7719,12 +8654,12 @@
         "ldp	x2, x3, [%[a]]\n\t"
         "ldp	x4, x5, [%[a], #16]\n\t"
         "ldp	x6, x7, [%[b]], #16\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x8, x9, [%[b]], #16\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a]], #16\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a]], #16\n\t"
         "csetm	%[c], cc\n\t"
         "cmp	%[a], x10\n\t"
@@ -7855,7 +8790,7 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* AND m into each word of a and store in r.
  *
@@ -7863,12 +8798,13 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_24(sp_digit* r, sp_digit* a, sp_digit m)
-{
-    int i;
-
-    for (i=0; i<24; i++)
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    int i;
+
+    for (i=0; i<24; i++) {
         r[i] = a[i] & m;
+    }
 }
 
 #endif /* WOLFSSL_SP_SMALL */
@@ -7891,12 +8827,12 @@
         "ldp	x3, x4, [%[a]], #16\n\t"
         "ldp	x5, x6, [%[a]], #16\n\t"
         "ldp	x7, x8, [%[b]], #16\n\t"
+        "adcs	x3, x3, x7\n\t"
         "ldp	x9, x10, [%[b]], #16\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r]], #16\n\t"
         "cset	%[c], cs\n\t"
         "cmp	%[a], x11\n\t"
@@ -7927,12 +8863,12 @@
         "ldp	x2, x3, [%[a]]\n\t"
         "ldp	x4, x5, [%[a], #16]\n\t"
         "ldp	x6, x7, [%[b]], #16\n\t"
+        "sbcs	x2, x2, x6\n\t"
         "ldp	x8, x9, [%[b]], #16\n\t"
-        "sbcs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a]], #16\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a]], #16\n\t"
         "csetm	%[c], cc\n\t"
         "cmp	%[a], x10\n\t"
@@ -8063,14 +8999,14 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -8085,14 +9021,464 @@
     *rho = -x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldr	x8, [%[a]]\n\t"
+        "mul	x5, %[b], x8\n\t"
+        "umulh	x3, %[b], x8\n\t"
+        "mov	x4, 0\n\t"
+        "str	x5, [%[r]]\n\t"
+        "mov	x5, 0\n\t"
+        "mov	x9, #8\n\t"
+        "1:\n\t"
+        "ldr	x8, [%[a], x9]\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "str	x3, [%[r], x9]\n\t"
+        "mov	x3, x4\n\t"
+        "mov	x4, x5\n\t"
+        "mov	x5, #0\n\t"
+        "add	x9, x9, #8\n\t"
+        "cmp	x9, 384\n\t"
+        "b.lt	1b\n\t"
+        "str	x3, [%[r], 384]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#else
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldp	x8, x9, [%[a]]\n\t"
+        "mul	x3, %[b], x8\n\t"
+        "umulh	x4, %[b], x8\n\t"
+        "mov	x5, 0\n\t"
+        "# A[1] * B\n\t"
+        "str	x3, [%[r]]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[2] * B\n\t"
+        "ldp	x8, x9, [%[a], 16]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[3] * B\n\t"
+        "str	x5, [%[r], 16]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[4] * B\n\t"
+        "ldp	x8, x9, [%[a], 32]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[5] * B\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[6] * B\n\t"
+        "ldp	x8, x9, [%[a], 48]\n\t"
+        "str	x5, [%[r], 40]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[7] * B\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[8] * B\n\t"
+        "ldp	x8, x9, [%[a], 64]\n\t"
+        "str	x4, [%[r], 56]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[9] * B\n\t"
+        "str	x5, [%[r], 64]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[10] * B\n\t"
+        "ldp	x8, x9, [%[a], 80]\n\t"
+        "str	x3, [%[r], 72]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[11] * B\n\t"
+        "str	x4, [%[r], 80]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[12] * B\n\t"
+        "ldp	x8, x9, [%[a], 96]\n\t"
+        "str	x5, [%[r], 88]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[13] * B\n\t"
+        "str	x3, [%[r], 96]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[14] * B\n\t"
+        "ldp	x8, x9, [%[a], 112]\n\t"
+        "str	x4, [%[r], 104]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[15] * B\n\t"
+        "str	x5, [%[r], 112]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[16] * B\n\t"
+        "ldp	x8, x9, [%[a], 128]\n\t"
+        "str	x3, [%[r], 120]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[17] * B\n\t"
+        "str	x4, [%[r], 128]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[18] * B\n\t"
+        "ldp	x8, x9, [%[a], 144]\n\t"
+        "str	x5, [%[r], 136]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[19] * B\n\t"
+        "str	x3, [%[r], 144]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[20] * B\n\t"
+        "ldp	x8, x9, [%[a], 160]\n\t"
+        "str	x4, [%[r], 152]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[21] * B\n\t"
+        "str	x5, [%[r], 160]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[22] * B\n\t"
+        "ldp	x8, x9, [%[a], 176]\n\t"
+        "str	x3, [%[r], 168]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[23] * B\n\t"
+        "str	x4, [%[r], 176]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[24] * B\n\t"
+        "ldp	x8, x9, [%[a], 192]\n\t"
+        "str	x5, [%[r], 184]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[25] * B\n\t"
+        "str	x3, [%[r], 192]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[26] * B\n\t"
+        "ldp	x8, x9, [%[a], 208]\n\t"
+        "str	x4, [%[r], 200]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[27] * B\n\t"
+        "str	x5, [%[r], 208]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[28] * B\n\t"
+        "ldp	x8, x9, [%[a], 224]\n\t"
+        "str	x3, [%[r], 216]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[29] * B\n\t"
+        "str	x4, [%[r], 224]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[30] * B\n\t"
+        "ldp	x8, x9, [%[a], 240]\n\t"
+        "str	x5, [%[r], 232]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[31] * B\n\t"
+        "str	x3, [%[r], 240]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[32] * B\n\t"
+        "ldp	x8, x9, [%[a], 256]\n\t"
+        "str	x4, [%[r], 248]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[33] * B\n\t"
+        "str	x5, [%[r], 256]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[34] * B\n\t"
+        "ldp	x8, x9, [%[a], 272]\n\t"
+        "str	x3, [%[r], 264]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[35] * B\n\t"
+        "str	x4, [%[r], 272]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[36] * B\n\t"
+        "ldp	x8, x9, [%[a], 288]\n\t"
+        "str	x5, [%[r], 280]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[37] * B\n\t"
+        "str	x3, [%[r], 288]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[38] * B\n\t"
+        "ldp	x8, x9, [%[a], 304]\n\t"
+        "str	x4, [%[r], 296]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[39] * B\n\t"
+        "str	x5, [%[r], 304]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[40] * B\n\t"
+        "ldp	x8, x9, [%[a], 320]\n\t"
+        "str	x3, [%[r], 312]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[41] * B\n\t"
+        "str	x4, [%[r], 320]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[42] * B\n\t"
+        "ldp	x8, x9, [%[a], 336]\n\t"
+        "str	x5, [%[r], 328]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[43] * B\n\t"
+        "str	x3, [%[r], 336]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[44] * B\n\t"
+        "ldp	x8, x9, [%[a], 352]\n\t"
+        "str	x4, [%[r], 344]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[45] * B\n\t"
+        "str	x5, [%[r], 352]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[46] * B\n\t"
+        "ldp	x8, x9, [%[a], 368]\n\t"
+        "str	x3, [%[r], 360]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[47] * B\n\t"
+        "str	x4, [%[r], 368]\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "adc	x3, x3, x7\n\t"
+        "stp	x5, x3, [%[r], 376]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_24(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 24);
 
@@ -8108,12 +9494,12 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_3072_cond_sub_24(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
+#ifdef WOLFSSL_SP_SMALL
     sp_digit c = 0;
 
-#ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	x8, #0\n\t"
         "1:\n\t"
@@ -8129,139 +9515,105 @@
         "b.lt	1b\n\t"
         : [c] "+r" (c)
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#else
-    __asm__ __volatile__ (
-
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x6, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "ldr		x7, [%[b], 8]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "subs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 0]\n\t"
-        "str		x6, [%[r], 8]\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x6, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "ldr		x7, [%[b], 24]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 16]\n\t"
-        "str		x6, [%[r], 24]\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x6, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "ldr		x7, [%[b], 40]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 32]\n\t"
-        "str		x6, [%[r], 40]\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x6, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "ldr		x7, [%[b], 56]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 48]\n\t"
-        "str		x6, [%[r], 56]\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x6, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "ldr		x7, [%[b], 72]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 64]\n\t"
-        "str		x6, [%[r], 72]\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x6, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "ldr		x7, [%[b], 88]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 80]\n\t"
-        "str		x6, [%[r], 88]\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x6, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "ldr		x7, [%[b], 104]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 96]\n\t"
-        "str		x6, [%[r], 104]\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x6, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "ldr		x7, [%[b], 120]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 112]\n\t"
-        "str		x6, [%[r], 120]\n\t"
-        "ldr		x4, [%[a], 128]\n\t"
-        "ldr		x6, [%[a], 136]\n\t"
-        "ldr		x5, [%[b], 128]\n\t"
-        "ldr		x7, [%[b], 136]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 128]\n\t"
-        "str		x6, [%[r], 136]\n\t"
-        "ldr		x4, [%[a], 144]\n\t"
-        "ldr		x6, [%[a], 152]\n\t"
-        "ldr		x5, [%[b], 144]\n\t"
-        "ldr		x7, [%[b], 152]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 144]\n\t"
-        "str		x6, [%[r], 152]\n\t"
-        "ldr		x4, [%[a], 160]\n\t"
-        "ldr		x6, [%[a], 168]\n\t"
-        "ldr		x5, [%[b], 160]\n\t"
-        "ldr		x7, [%[b], 168]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 160]\n\t"
-        "str		x6, [%[r], 168]\n\t"
-        "ldr		x4, [%[a], 176]\n\t"
-        "ldr		x6, [%[a], 184]\n\t"
-        "ldr		x5, [%[b], 176]\n\t"
-        "ldr		x7, [%[b], 184]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 176]\n\t"
-        "str		x6, [%[r], 184]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#endif /* WOLFSSL_SP_SMALL */
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
 
     return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "subs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "ldp	x5, x7, [%[b], 128]\n\t"
+        "ldp	x11, x12, [%[b], 144]\n\t"
+        "ldp	x4, x6, [%[a], 128]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 144]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 128]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 144]\n\t"
+        "ldp	x5, x7, [%[b], 160]\n\t"
+        "ldp	x11, x12, [%[b], 176]\n\t"
+        "ldp	x4, x6, [%[a], 160]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 176]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 160]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 176]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
 }
 
 /* Reduce the number back to 3072 bits using Montgomery reduction.
@@ -8270,259 +9622,255 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
 
     __asm__ __volatile__ (
-        "ldp       x12, x13, [%[m], 0]\n\t"
-        "ldp       x14, x15, [%[m], 16]\n\t"
-        "ldp       x16, x17, [%[m], 32]\n\t"
-        "ldp       x18, x19, [%[m], 48]\n\t"
-        "ldp       x20, x21, [%[m], 64]\n\t"
-        "ldp       x22, x23, [%[m], 80]\n\t"
-        "ldp       x24, x25, [%[m], 96]\n\t"
-        "ldp       x26, x27, [%[m], 112]\n\t"
-        "# i = 0\n\t"
-        "mov	x3, 0\n\t"
-        "ldp	x10, x11, [%[a], 0]\n\t"
+        "ldp	x14, x15, [%[m], 0]\n\t"
+        "ldp	x16, x17, [%[m], 16]\n\t"
+        "ldp	x19, x20, [%[m], 32]\n\t"
+        "ldp	x21, x22, [%[m], 48]\n\t"
+        "ldp	x23, x24, [%[m], 64]\n\t"
+        "ldp	x25, x26, [%[m], 80]\n\t"
+        "ldp	x27, x28, [%[m], 96]\n\t"
+        "# i = 24\n\t"
+        "mov	x4, 24\n\t"
+        "ldp	x12, x13, [%[a], 0]\n\t"
         "\n1:\n\t"
         "# mu = a[i] * mp\n\t"
-        "mul	x8, %[mp], x10\n\t"
+        "mul	x9, %[mp], x12\n\t"
         "# a[i+0] += m[0] * mu\n\t"
-        "ldr	x9, [%[a], 0]\n\t"
-        "mul		x6, x12, x8\n\t"
-        "umulh	x7, x12, x8\n\t"
-        "adds	x10, x10, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x12, x12, x7\n\t"
         "# a[i+1] += m[1] * mu\n\t"
-        "ldr	x9, [%[a], 8]\n\t"
-        "mul		x6, x13, x8\n\t"
-        "umulh	x7, x13, x8\n\t"
-        "adds	x10, x11, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x10, x10, x5\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x12, x13, x7\n\t"
         "# a[i+2] += m[2] * mu\n\t"
-        "ldr	x11, [%[a], 16]\n\t"
-        "mul		x6, x14, x8\n\t"
-        "umulh	x7, x14, x8\n\t"
-        "adds	x11, x11, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x11, x11, x4\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x13, [%[a], 16]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adds	x12, x12, x6\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x13, x13, x7\n\t"
         "# a[i+3] += m[3] * mu\n\t"
-        "ldr	x9, [%[a], 24]\n\t"
-        "mul		x6, x15, x8\n\t"
-        "umulh	x7, x15, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 24]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 24]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adds	x13, x13, x5\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+4] += m[4] * mu\n\t"
-        "ldr	x9, [%[a], 32]\n\t"
-        "mul		x6, x16, x8\n\t"
-        "umulh	x7, x16, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 32]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 32]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "str	x10, [%[a], 24]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+5] += m[5] * mu\n\t"
-        "ldr	x9, [%[a], 40]\n\t"
-        "mul		x6, x17, x8\n\t"
-        "umulh	x7, x17, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 40]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 40]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "str	x11, [%[a], 32]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+6] += m[6] * mu\n\t"
-        "ldr	x9, [%[a], 48]\n\t"
-        "mul		x6, x18, x8\n\t"
-        "umulh	x7, x18, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 48]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 48]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "str	x10, [%[a], 40]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+7] += m[7] * mu\n\t"
-        "ldr	x9, [%[a], 56]\n\t"
-        "mul		x6, x19, x8\n\t"
-        "umulh	x7, x19, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 56]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 56]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "str	x11, [%[a], 48]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+8] += m[8] * mu\n\t"
-        "ldr	x9, [%[a], 64]\n\t"
-        "mul		x6, x20, x8\n\t"
-        "umulh	x7, x20, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 64]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 64]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x23, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x23, x9\n\t"
+        "str	x10, [%[a], 56]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+9] += m[9] * mu\n\t"
-        "ldr	x9, [%[a], 72]\n\t"
-        "mul		x6, x21, x8\n\t"
-        "umulh	x7, x21, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 72]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 72]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x24, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x24, x9\n\t"
+        "str	x11, [%[a], 64]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+10] += m[10] * mu\n\t"
-        "ldr	x9, [%[a], 80]\n\t"
-        "mul		x6, x22, x8\n\t"
-        "umulh	x7, x22, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 80]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 80]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x25, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x25, x9\n\t"
+        "str	x10, [%[a], 72]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+11] += m[11] * mu\n\t"
-        "ldr	x9, [%[a], 88]\n\t"
-        "mul		x6, x23, x8\n\t"
-        "umulh	x7, x23, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 88]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 88]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x26, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x26, x9\n\t"
+        "str	x11, [%[a], 80]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+12] += m[12] * mu\n\t"
-        "ldr	x9, [%[a], 96]\n\t"
-        "mul		x6, x24, x8\n\t"
-        "umulh	x7, x24, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 96]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 96]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x27, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x27, x9\n\t"
+        "str	x10, [%[a], 88]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+13] += m[13] * mu\n\t"
-        "ldr	x9, [%[a], 104]\n\t"
-        "mul		x6, x25, x8\n\t"
-        "umulh	x7, x25, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 104]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 104]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x28, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x28, x9\n\t"
+        "str	x11, [%[a], 96]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+14] += m[14] * mu\n\t"
-        "ldr	x9, [%[a], 112]\n\t"
-        "mul		x6, x26, x8\n\t"
-        "umulh	x7, x26, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 112]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 112]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 112]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 104]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+15] += m[15] * mu\n\t"
-        "ldr	x9, [%[a], 120]\n\t"
-        "mul		x6, x27, x8\n\t"
-        "umulh	x7, x27, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 120]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 120]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 120]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 112]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+16] += m[16] * mu\n\t"
-        "ldr		x7, [%[m], 128]\n\t"
-        "ldr	x9, [%[a], 128]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 128]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 128]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 128]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 120]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+17] += m[17] * mu\n\t"
-        "ldr		x7, [%[m], 136]\n\t"
-        "ldr	x9, [%[a], 136]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 136]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 136]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 136]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 128]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+18] += m[18] * mu\n\t"
-        "ldr		x7, [%[m], 144]\n\t"
-        "ldr	x9, [%[a], 144]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 144]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 144]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 144]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 136]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+19] += m[19] * mu\n\t"
-        "ldr		x7, [%[m], 152]\n\t"
-        "ldr	x9, [%[a], 152]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 152]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 152]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 152]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 144]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+20] += m[20] * mu\n\t"
-        "ldr		x7, [%[m], 160]\n\t"
-        "ldr	x9, [%[a], 160]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 160]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 160]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 160]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 152]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+21] += m[21] * mu\n\t"
-        "ldr		x7, [%[m], 168]\n\t"
-        "ldr	x9, [%[a], 168]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 168]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 168]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 168]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 160]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+22] += m[22] * mu\n\t"
-        "ldr		x7, [%[m], 176]\n\t"
-        "ldr	x9, [%[a], 176]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 176]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 176]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 176]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 168]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+23] += m[23] * mu\n\t"
-        "ldr	x7, [%[m], 184]\n\t"
-        "ldr	x9, [%[a], 184]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x7, x7, %[ca]\n\t"
+        "ldr	x10, [%[a], 184]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 184]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x8, x8, %[ca]\n\t"
+        "str	x11, [%[a], 176]\n\t"
         "cset  %[ca], cs\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 184]\n\t"
-        "ldr	x9, [%[a], 192]\n\t"
-        "adcs	x9, x9, x7\n\t"
-        "str	x9, [%[a], 192]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "ldr	x11, [%[a], 192]\n\t"
+        "str	x10, [%[a], 184]\n\t"
+        "adcs	x11, x11, x8\n\t"
+        "str	x11, [%[a], 192]\n\t"
         "adc	%[ca], %[ca], xzr\n\t"
-        "# i += 1\n\t"
+        "subs	x4, x4, 1\n\t"
         "add	%[a], %[a], 8\n\t"
-        "add	x3, x3, 8\n\t"
-        "cmp	x3, 192\n\t"
-        "blt	1b\n\t"
-        "str	x10, [%[a], 0]\n\t"
-        "str	x11, [%[a], 8]\n\t"
+        "bne	1b\n\t"
+        "stp	x12, x13, [%[a], 0]\n\t"
         : [ca] "+r" (ca), [a] "+r" (a)
         : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27"
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
     );
 
     sp_3072_cond_sub_24(a - 24, a, m, (sp_digit)0 - ca);
@@ -8537,8 +9885,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_24(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_3072_mul_24(r, a, b);
     sp_3072_mont_reduce_24(r, m, mp);
@@ -8551,7 +9899,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_24(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_3072_sqr_24(r, a);
@@ -8565,7 +9913,7 @@
  * b  A single precision digit.
  */
 static void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
+        sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
@@ -8594,225 +9942,212 @@
         "str	x3, [%[r], 192]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 #else
     __asm__ __volatile__ (
         "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
+        "ldp	x8, x9, [%[a]]\n\t"
         "mul	x3, %[b], x8\n\t"
         "umulh	x4, %[b], x8\n\t"
         "mov	x5, 0\n\t"
+        "# A[1] * B\n\t"
         "str	x3, [%[r]]\n\t"
-        "# A[1] * B\n\t"
-        "ldr		x8, [%[a], 8]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 8]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[2] * B\n\t"
-        "ldr		x8, [%[a], 16]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 16]\n\t"
+        "ldp	x8, x9, [%[a], 16]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[3] * B\n\t"
-        "ldr		x8, [%[a], 24]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 24]\n\t"
+        "str	x5, [%[r], 16]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[4] * B\n\t"
-        "ldr		x8, [%[a], 32]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 32]\n\t"
+        "ldp	x8, x9, [%[a], 32]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[5] * B\n\t"
-        "ldr		x8, [%[a], 40]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 40]\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[6] * B\n\t"
-        "ldr		x8, [%[a], 48]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 48]\n\t"
+        "ldp	x8, x9, [%[a], 48]\n\t"
+        "str	x5, [%[r], 40]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[7] * B\n\t"
-        "ldr		x8, [%[a], 56]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 56]\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[8] * B\n\t"
-        "ldr		x8, [%[a], 64]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 64]\n\t"
+        "ldp	x8, x9, [%[a], 64]\n\t"
+        "str	x4, [%[r], 56]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[9] * B\n\t"
-        "ldr		x8, [%[a], 72]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 72]\n\t"
+        "str	x5, [%[r], 64]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[10] * B\n\t"
-        "ldr		x8, [%[a], 80]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 80]\n\t"
+        "ldp	x8, x9, [%[a], 80]\n\t"
+        "str	x3, [%[r], 72]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[11] * B\n\t"
-        "ldr		x8, [%[a], 88]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 88]\n\t"
+        "str	x4, [%[r], 80]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[12] * B\n\t"
-        "ldr		x8, [%[a], 96]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 96]\n\t"
+        "ldp	x8, x9, [%[a], 96]\n\t"
+        "str	x5, [%[r], 88]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[13] * B\n\t"
-        "ldr		x8, [%[a], 104]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 104]\n\t"
+        "str	x3, [%[r], 96]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[14] * B\n\t"
-        "ldr		x8, [%[a], 112]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 112]\n\t"
+        "ldp	x8, x9, [%[a], 112]\n\t"
+        "str	x4, [%[r], 104]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[15] * B\n\t"
-        "ldr		x8, [%[a], 120]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 120]\n\t"
+        "str	x5, [%[r], 112]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[16] * B\n\t"
-        "ldr		x8, [%[a], 128]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 128]\n\t"
+        "ldp	x8, x9, [%[a], 128]\n\t"
+        "str	x3, [%[r], 120]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[17] * B\n\t"
-        "ldr		x8, [%[a], 136]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 136]\n\t"
+        "str	x4, [%[r], 128]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[18] * B\n\t"
-        "ldr		x8, [%[a], 144]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 144]\n\t"
+        "ldp	x8, x9, [%[a], 144]\n\t"
+        "str	x5, [%[r], 136]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[19] * B\n\t"
-        "ldr		x8, [%[a], 152]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 152]\n\t"
+        "str	x3, [%[r], 144]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[20] * B\n\t"
-        "ldr		x8, [%[a], 160]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 160]\n\t"
+        "ldp	x8, x9, [%[a], 160]\n\t"
+        "str	x4, [%[r], 152]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
         "# A[21] * B\n\t"
-        "ldr		x8, [%[a], 168]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 168]\n\t"
+        "str	x5, [%[r], 160]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
         "# A[22] * B\n\t"
-        "ldr		x8, [%[a], 176]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 176]\n\t"
+        "ldp	x8, x9, [%[a], 176]\n\t"
+        "str	x3, [%[r], 168]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
         "# A[23] * B\n\t"
-        "ldr	x8, [%[a], 184]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
+        "str	x4, [%[r], 176]\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
         "adds	x5, x5, x6\n\t"
         "adc	x3, x3, x7\n\t"
-        "str	x5, [%[r], 184]\n\t"
-        "str	x3, [%[r], 192]\n\t"
+        "stp	x5, x3, [%[r], 184]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 #endif
 }
@@ -8847,8 +10182,8 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
@@ -8857,21 +10192,16 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
         "mul	x4, %[div], x3\n\t"
-        "umulh	x3, %[div], x3\n\t"
-        "subs	%[d0], %[d0], x4\n\t"
-        "sbc	%[d1], %[d1], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
 
         "udiv	x3, %[d0], %[div]\n\t"
-        "add	x6, x6, x3\n\t"
-        "mul	x3, %[div], x3\n\t"
-        "sub	%[d0], %[d0], x3\n\t"
-        "mov	%[r], x6\n\t"
+        "add	%[r], x6, x3\n\t"
 
         : [r] "=r" (r)
         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
@@ -8888,234 +10218,211 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int64_t sp_3072_cmp_24(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "mov	x6, 184\n\t"
+static int64_t sp_3072_cmp_24(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 184\n\t"
         "1:\n\t"
-        "ldr	x4, [%[a], x6]\n\t"
-        "ldr	x5, [%[b], x6]\n\t"
-        "and	x4, x4, x3\n\t"
-        "and	x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "sub	x6, x6, #8\n\t"
-        "b.cc	1b\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "ldr		x4, [%[a], 184]\n\t"
-        "ldr		x5, [%[b], 184]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 176]\n\t"
-        "ldr		x5, [%[b], 176]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 168]\n\t"
-        "ldr		x5, [%[b], 168]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 160]\n\t"
-        "ldr		x5, [%[b], 160]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 152]\n\t"
-        "ldr		x5, [%[b], 152]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 144]\n\t"
-        "ldr		x5, [%[b], 144]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 136]\n\t"
-        "ldr		x5, [%[b], 136]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 128]\n\t"
-        "ldr		x5, [%[b], 128]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 120]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 104]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 88]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 72]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 56]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 40]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 24]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 8]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#endif
-
-    return r;
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov  x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "ldp	x7, x8, [%[b], 176]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "ldp	x7, x8, [%[b], 144]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "ldp	x7, x8, [%[b], 112]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "ldp	x7, x8, [%[b], 80]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "ldp	x7, x8, [%[b], 48]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "ldp	x7, x8, [%[b], 16]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#endif
+
+    return (int64_t)a;
 }
 
 /* Divide d in a and put remainder into r (m*d + r = a)
@@ -9127,7 +10434,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_div_24(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[48], t2[25];
@@ -9151,7 +10458,7 @@
     }
 
     r1 = sp_3072_cmp_24(t1, d) >= 0;
-    sp_3072_cond_sub_24(r, t1, t2, (sp_digit)0 - r1);
+    sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -9163,7 +10470,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_mod_24(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_24(a, m, NULL, r);
 }
@@ -9178,8 +10485,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_24(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][48];
@@ -9198,27 +10505,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 48, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 48;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_24(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 24);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 24U);
+        if (reduceA != 0) {
             err = sp_3072_mod_24(t[1] + 24, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_24(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
@@ -9244,9 +10552,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 60;
-        n <<= 4;
-        c = 60;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 4;
+        if (c == 64) {
+            c = 60;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -9277,7 +10592,7 @@
             sp_3072_mont_mul_24(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
+        XMEMSET(&r[24], 0, sizeof(sp_digit) * 24U);
         sp_3072_mont_reduce_24(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
@@ -9285,8 +10600,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -9301,8 +10617,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_24(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][48];
@@ -9321,27 +10637,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 48, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 48;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_24(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 24);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 24U);
+        if (reduceA != 0) {
             err = sp_3072_mod_24(t[1] + 24, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_24(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
@@ -9383,9 +10700,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 5;
+        if (c == 64) {
+            c = 59;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -9416,11 +10740,8 @@
 
             sp_3072_mont_mul_24(r, r, t[y], m, mp);
         }
-        y = e[0] & 0x1;
-        sp_3072_mont_sqr_24(r, r, m, mp);
-        sp_3072_mont_mul_24(r, r, t[y], m, mp);
-
-        XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
+
+        XMEMSET(&r[24], 0, sizeof(sp_digit) * 24U);
         sp_3072_mont_reduce_24(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
@@ -9428,23 +10749,25 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* WOLFSSL_SP_SMALL */
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_48(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
 {
     XMEMSET(r, 0, sizeof(sp_digit) * 48);
 
@@ -9452,6 +10775,7 @@
     sp_3072_sub_in_place_48(r, m);
 }
 
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -9460,12 +10784,12 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_3072_cond_sub_48(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
+#ifdef WOLFSSL_SP_SMALL
     sp_digit c = 0;
 
-#ifdef WOLFSSL_SP_SMALL
     __asm__ __volatile__ (
         "mov	x8, #0\n\t"
         "1:\n\t"
@@ -9481,259 +10805,189 @@
         "b.lt	1b\n\t"
         : [c] "+r" (c)
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#else
-    __asm__ __volatile__ (
-
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x6, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "ldr		x7, [%[b], 8]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "subs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 0]\n\t"
-        "str		x6, [%[r], 8]\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x6, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "ldr		x7, [%[b], 24]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 16]\n\t"
-        "str		x6, [%[r], 24]\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x6, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "ldr		x7, [%[b], 40]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 32]\n\t"
-        "str		x6, [%[r], 40]\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x6, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "ldr		x7, [%[b], 56]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 48]\n\t"
-        "str		x6, [%[r], 56]\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x6, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "ldr		x7, [%[b], 72]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 64]\n\t"
-        "str		x6, [%[r], 72]\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x6, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "ldr		x7, [%[b], 88]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 80]\n\t"
-        "str		x6, [%[r], 88]\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x6, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "ldr		x7, [%[b], 104]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 96]\n\t"
-        "str		x6, [%[r], 104]\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x6, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "ldr		x7, [%[b], 120]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 112]\n\t"
-        "str		x6, [%[r], 120]\n\t"
-        "ldr		x4, [%[a], 128]\n\t"
-        "ldr		x6, [%[a], 136]\n\t"
-        "ldr		x5, [%[b], 128]\n\t"
-        "ldr		x7, [%[b], 136]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 128]\n\t"
-        "str		x6, [%[r], 136]\n\t"
-        "ldr		x4, [%[a], 144]\n\t"
-        "ldr		x6, [%[a], 152]\n\t"
-        "ldr		x5, [%[b], 144]\n\t"
-        "ldr		x7, [%[b], 152]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 144]\n\t"
-        "str		x6, [%[r], 152]\n\t"
-        "ldr		x4, [%[a], 160]\n\t"
-        "ldr		x6, [%[a], 168]\n\t"
-        "ldr		x5, [%[b], 160]\n\t"
-        "ldr		x7, [%[b], 168]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 160]\n\t"
-        "str		x6, [%[r], 168]\n\t"
-        "ldr		x4, [%[a], 176]\n\t"
-        "ldr		x6, [%[a], 184]\n\t"
-        "ldr		x5, [%[b], 176]\n\t"
-        "ldr		x7, [%[b], 184]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 176]\n\t"
-        "str		x6, [%[r], 184]\n\t"
-        "ldr		x4, [%[a], 192]\n\t"
-        "ldr		x6, [%[a], 200]\n\t"
-        "ldr		x5, [%[b], 192]\n\t"
-        "ldr		x7, [%[b], 200]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 192]\n\t"
-        "str		x6, [%[r], 200]\n\t"
-        "ldr		x4, [%[a], 208]\n\t"
-        "ldr		x6, [%[a], 216]\n\t"
-        "ldr		x5, [%[b], 208]\n\t"
-        "ldr		x7, [%[b], 216]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 208]\n\t"
-        "str		x6, [%[r], 216]\n\t"
-        "ldr		x4, [%[a], 224]\n\t"
-        "ldr		x6, [%[a], 232]\n\t"
-        "ldr		x5, [%[b], 224]\n\t"
-        "ldr		x7, [%[b], 232]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 224]\n\t"
-        "str		x6, [%[r], 232]\n\t"
-        "ldr		x4, [%[a], 240]\n\t"
-        "ldr		x6, [%[a], 248]\n\t"
-        "ldr		x5, [%[b], 240]\n\t"
-        "ldr		x7, [%[b], 248]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 240]\n\t"
-        "str		x6, [%[r], 248]\n\t"
-        "ldr		x4, [%[a], 256]\n\t"
-        "ldr		x6, [%[a], 264]\n\t"
-        "ldr		x5, [%[b], 256]\n\t"
-        "ldr		x7, [%[b], 264]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 256]\n\t"
-        "str		x6, [%[r], 264]\n\t"
-        "ldr		x4, [%[a], 272]\n\t"
-        "ldr		x6, [%[a], 280]\n\t"
-        "ldr		x5, [%[b], 272]\n\t"
-        "ldr		x7, [%[b], 280]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 272]\n\t"
-        "str		x6, [%[r], 280]\n\t"
-        "ldr		x4, [%[a], 288]\n\t"
-        "ldr		x6, [%[a], 296]\n\t"
-        "ldr		x5, [%[b], 288]\n\t"
-        "ldr		x7, [%[b], 296]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 288]\n\t"
-        "str		x6, [%[r], 296]\n\t"
-        "ldr		x4, [%[a], 304]\n\t"
-        "ldr		x6, [%[a], 312]\n\t"
-        "ldr		x5, [%[b], 304]\n\t"
-        "ldr		x7, [%[b], 312]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 304]\n\t"
-        "str		x6, [%[r], 312]\n\t"
-        "ldr		x4, [%[a], 320]\n\t"
-        "ldr		x6, [%[a], 328]\n\t"
-        "ldr		x5, [%[b], 320]\n\t"
-        "ldr		x7, [%[b], 328]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 320]\n\t"
-        "str		x6, [%[r], 328]\n\t"
-        "ldr		x4, [%[a], 336]\n\t"
-        "ldr		x6, [%[a], 344]\n\t"
-        "ldr		x5, [%[b], 336]\n\t"
-        "ldr		x7, [%[b], 344]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 336]\n\t"
-        "str		x6, [%[r], 344]\n\t"
-        "ldr		x4, [%[a], 352]\n\t"
-        "ldr		x6, [%[a], 360]\n\t"
-        "ldr		x5, [%[b], 352]\n\t"
-        "ldr		x7, [%[b], 360]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 352]\n\t"
-        "str		x6, [%[r], 360]\n\t"
-        "ldr		x4, [%[a], 368]\n\t"
-        "ldr		x6, [%[a], 376]\n\t"
-        "ldr		x5, [%[b], 368]\n\t"
-        "ldr		x7, [%[b], 376]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 368]\n\t"
-        "str		x6, [%[r], 376]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-#endif /* WOLFSSL_SP_SMALL */
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
 
     return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "subs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "ldp	x5, x7, [%[b], 128]\n\t"
+        "ldp	x11, x12, [%[b], 144]\n\t"
+        "ldp	x4, x6, [%[a], 128]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 144]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 128]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 144]\n\t"
+        "ldp	x5, x7, [%[b], 160]\n\t"
+        "ldp	x11, x12, [%[b], 176]\n\t"
+        "ldp	x4, x6, [%[a], 160]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 176]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 160]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 176]\n\t"
+        "ldp	x5, x7, [%[b], 192]\n\t"
+        "ldp	x11, x12, [%[b], 208]\n\t"
+        "ldp	x4, x6, [%[a], 192]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 208]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 192]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 208]\n\t"
+        "ldp	x5, x7, [%[b], 224]\n\t"
+        "ldp	x11, x12, [%[b], 240]\n\t"
+        "ldp	x4, x6, [%[a], 224]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 240]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 224]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 240]\n\t"
+        "ldp	x5, x7, [%[b], 256]\n\t"
+        "ldp	x11, x12, [%[b], 272]\n\t"
+        "ldp	x4, x6, [%[a], 256]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 272]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 256]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 272]\n\t"
+        "ldp	x5, x7, [%[b], 288]\n\t"
+        "ldp	x11, x12, [%[b], 304]\n\t"
+        "ldp	x4, x6, [%[a], 288]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 304]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 288]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 304]\n\t"
+        "ldp	x5, x7, [%[b], 320]\n\t"
+        "ldp	x11, x12, [%[b], 336]\n\t"
+        "ldp	x4, x6, [%[a], 320]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 336]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 320]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 336]\n\t"
+        "ldp	x5, x7, [%[b], 352]\n\t"
+        "ldp	x11, x12, [%[b], 368]\n\t"
+        "ldp	x4, x6, [%[a], 352]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 368]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 352]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 368]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
 }
 
 /* Reduce the number back to 3072 bits using Montgomery reduction.
@@ -9742,499 +10996,495 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_digit ca = 0;
 
     __asm__ __volatile__ (
-        "ldp       x12, x13, [%[m], 0]\n\t"
-        "ldp       x14, x15, [%[m], 16]\n\t"
-        "ldp       x16, x17, [%[m], 32]\n\t"
-        "ldp       x18, x19, [%[m], 48]\n\t"
-        "ldp       x20, x21, [%[m], 64]\n\t"
-        "ldp       x22, x23, [%[m], 80]\n\t"
-        "ldp       x24, x25, [%[m], 96]\n\t"
-        "ldp       x26, x27, [%[m], 112]\n\t"
-        "# i = 0\n\t"
-        "mov	x3, 0\n\t"
-        "ldp	x10, x11, [%[a], 0]\n\t"
+        "ldp	x14, x15, [%[m], 0]\n\t"
+        "ldp	x16, x17, [%[m], 16]\n\t"
+        "ldp	x19, x20, [%[m], 32]\n\t"
+        "ldp	x21, x22, [%[m], 48]\n\t"
+        "ldp	x23, x24, [%[m], 64]\n\t"
+        "ldp	x25, x26, [%[m], 80]\n\t"
+        "ldp	x27, x28, [%[m], 96]\n\t"
+        "# i = 48\n\t"
+        "mov	x4, 48\n\t"
+        "ldp	x12, x13, [%[a], 0]\n\t"
         "\n1:\n\t"
         "# mu = a[i] * mp\n\t"
-        "mul	x8, %[mp], x10\n\t"
+        "mul	x9, %[mp], x12\n\t"
         "# a[i+0] += m[0] * mu\n\t"
-        "ldr	x9, [%[a], 0]\n\t"
-        "mul		x6, x12, x8\n\t"
-        "umulh	x7, x12, x8\n\t"
-        "adds	x10, x10, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x12, x12, x7\n\t"
         "# a[i+1] += m[1] * mu\n\t"
-        "ldr	x9, [%[a], 8]\n\t"
-        "mul		x6, x13, x8\n\t"
-        "umulh	x7, x13, x8\n\t"
-        "adds	x10, x11, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x10, x10, x5\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x12, x13, x7\n\t"
         "# a[i+2] += m[2] * mu\n\t"
-        "ldr	x11, [%[a], 16]\n\t"
-        "mul		x6, x14, x8\n\t"
-        "umulh	x7, x14, x8\n\t"
-        "adds	x11, x11, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x11, x11, x4\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x13, [%[a], 16]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adds	x12, x12, x6\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x13, x13, x7\n\t"
         "# a[i+3] += m[3] * mu\n\t"
-        "ldr	x9, [%[a], 24]\n\t"
-        "mul		x6, x15, x8\n\t"
-        "umulh	x7, x15, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 24]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 24]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adds	x13, x13, x5\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+4] += m[4] * mu\n\t"
-        "ldr	x9, [%[a], 32]\n\t"
-        "mul		x6, x16, x8\n\t"
-        "umulh	x7, x16, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 32]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 32]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "str	x10, [%[a], 24]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+5] += m[5] * mu\n\t"
-        "ldr	x9, [%[a], 40]\n\t"
-        "mul		x6, x17, x8\n\t"
-        "umulh	x7, x17, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 40]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 40]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "str	x11, [%[a], 32]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+6] += m[6] * mu\n\t"
-        "ldr	x9, [%[a], 48]\n\t"
-        "mul		x6, x18, x8\n\t"
-        "umulh	x7, x18, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 48]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 48]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "str	x10, [%[a], 40]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+7] += m[7] * mu\n\t"
-        "ldr	x9, [%[a], 56]\n\t"
-        "mul		x6, x19, x8\n\t"
-        "umulh	x7, x19, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 56]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 56]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "str	x11, [%[a], 48]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+8] += m[8] * mu\n\t"
-        "ldr	x9, [%[a], 64]\n\t"
-        "mul		x6, x20, x8\n\t"
-        "umulh	x7, x20, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 64]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 64]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x23, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x23, x9\n\t"
+        "str	x10, [%[a], 56]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+9] += m[9] * mu\n\t"
-        "ldr	x9, [%[a], 72]\n\t"
-        "mul		x6, x21, x8\n\t"
-        "umulh	x7, x21, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 72]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 72]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x24, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x24, x9\n\t"
+        "str	x11, [%[a], 64]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+10] += m[10] * mu\n\t"
-        "ldr	x9, [%[a], 80]\n\t"
-        "mul		x6, x22, x8\n\t"
-        "umulh	x7, x22, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 80]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 80]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x25, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x25, x9\n\t"
+        "str	x10, [%[a], 72]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+11] += m[11] * mu\n\t"
-        "ldr	x9, [%[a], 88]\n\t"
-        "mul		x6, x23, x8\n\t"
-        "umulh	x7, x23, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 88]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 88]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x26, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x26, x9\n\t"
+        "str	x11, [%[a], 80]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+12] += m[12] * mu\n\t"
-        "ldr	x9, [%[a], 96]\n\t"
-        "mul		x6, x24, x8\n\t"
-        "umulh	x7, x24, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 96]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 96]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x27, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x27, x9\n\t"
+        "str	x10, [%[a], 88]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+13] += m[13] * mu\n\t"
-        "ldr	x9, [%[a], 104]\n\t"
-        "mul		x6, x25, x8\n\t"
-        "umulh	x7, x25, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 104]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 104]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x28, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x28, x9\n\t"
+        "str	x11, [%[a], 96]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+14] += m[14] * mu\n\t"
-        "ldr	x9, [%[a], 112]\n\t"
-        "mul		x6, x26, x8\n\t"
-        "umulh	x7, x26, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 112]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 112]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 112]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 104]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+15] += m[15] * mu\n\t"
-        "ldr	x9, [%[a], 120]\n\t"
-        "mul		x6, x27, x8\n\t"
-        "umulh	x7, x27, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 120]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 120]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 120]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 112]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+16] += m[16] * mu\n\t"
-        "ldr		x7, [%[m], 128]\n\t"
-        "ldr	x9, [%[a], 128]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 128]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 128]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 128]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 120]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+17] += m[17] * mu\n\t"
-        "ldr		x7, [%[m], 136]\n\t"
-        "ldr	x9, [%[a], 136]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 136]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 136]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 136]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 128]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+18] += m[18] * mu\n\t"
-        "ldr		x7, [%[m], 144]\n\t"
-        "ldr	x9, [%[a], 144]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 144]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 144]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 144]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 136]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+19] += m[19] * mu\n\t"
-        "ldr		x7, [%[m], 152]\n\t"
-        "ldr	x9, [%[a], 152]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 152]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 152]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 152]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 144]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+20] += m[20] * mu\n\t"
-        "ldr		x7, [%[m], 160]\n\t"
-        "ldr	x9, [%[a], 160]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 160]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 160]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 160]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 152]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+21] += m[21] * mu\n\t"
-        "ldr		x7, [%[m], 168]\n\t"
-        "ldr	x9, [%[a], 168]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 168]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 168]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 168]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 160]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+22] += m[22] * mu\n\t"
-        "ldr		x7, [%[m], 176]\n\t"
-        "ldr	x9, [%[a], 176]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 176]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 176]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 176]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 168]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+23] += m[23] * mu\n\t"
-        "ldr		x7, [%[m], 184]\n\t"
-        "ldr	x9, [%[a], 184]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 184]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 184]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 184]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 176]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+24] += m[24] * mu\n\t"
-        "ldr		x7, [%[m], 192]\n\t"
-        "ldr	x9, [%[a], 192]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 192]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 192]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 192]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 184]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+25] += m[25] * mu\n\t"
-        "ldr		x7, [%[m], 200]\n\t"
-        "ldr	x9, [%[a], 200]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 200]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 200]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 200]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 192]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+26] += m[26] * mu\n\t"
-        "ldr		x7, [%[m], 208]\n\t"
-        "ldr	x9, [%[a], 208]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 208]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 208]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 208]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 200]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+27] += m[27] * mu\n\t"
-        "ldr		x7, [%[m], 216]\n\t"
-        "ldr	x9, [%[a], 216]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 216]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 216]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 216]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 208]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+28] += m[28] * mu\n\t"
-        "ldr		x7, [%[m], 224]\n\t"
-        "ldr	x9, [%[a], 224]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 224]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 224]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 224]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 216]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+29] += m[29] * mu\n\t"
-        "ldr		x7, [%[m], 232]\n\t"
-        "ldr	x9, [%[a], 232]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 232]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 232]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 232]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 224]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+30] += m[30] * mu\n\t"
-        "ldr		x7, [%[m], 240]\n\t"
-        "ldr	x9, [%[a], 240]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 240]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 240]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 240]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 232]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+31] += m[31] * mu\n\t"
-        "ldr		x7, [%[m], 248]\n\t"
-        "ldr	x9, [%[a], 248]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 248]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 248]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 248]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 240]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+32] += m[32] * mu\n\t"
-        "ldr		x7, [%[m], 256]\n\t"
-        "ldr	x9, [%[a], 256]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 256]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 256]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 256]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 248]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+33] += m[33] * mu\n\t"
-        "ldr		x7, [%[m], 264]\n\t"
-        "ldr	x9, [%[a], 264]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 264]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 264]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 264]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 256]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+34] += m[34] * mu\n\t"
-        "ldr		x7, [%[m], 272]\n\t"
-        "ldr	x9, [%[a], 272]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 272]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 272]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 272]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 264]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+35] += m[35] * mu\n\t"
-        "ldr		x7, [%[m], 280]\n\t"
-        "ldr	x9, [%[a], 280]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 280]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 280]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 280]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 272]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+36] += m[36] * mu\n\t"
-        "ldr		x7, [%[m], 288]\n\t"
-        "ldr	x9, [%[a], 288]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 288]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 288]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 288]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 280]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+37] += m[37] * mu\n\t"
-        "ldr		x7, [%[m], 296]\n\t"
-        "ldr	x9, [%[a], 296]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 296]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 296]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 296]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 288]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+38] += m[38] * mu\n\t"
-        "ldr		x7, [%[m], 304]\n\t"
-        "ldr	x9, [%[a], 304]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 304]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 304]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 304]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 296]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+39] += m[39] * mu\n\t"
-        "ldr		x7, [%[m], 312]\n\t"
-        "ldr	x9, [%[a], 312]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 312]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 312]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 312]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 304]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+40] += m[40] * mu\n\t"
-        "ldr		x7, [%[m], 320]\n\t"
-        "ldr	x9, [%[a], 320]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 320]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 320]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 320]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 312]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+41] += m[41] * mu\n\t"
-        "ldr		x7, [%[m], 328]\n\t"
-        "ldr	x9, [%[a], 328]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 328]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 328]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 328]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 320]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+42] += m[42] * mu\n\t"
-        "ldr		x7, [%[m], 336]\n\t"
-        "ldr	x9, [%[a], 336]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 336]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 336]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 336]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 328]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+43] += m[43] * mu\n\t"
-        "ldr		x7, [%[m], 344]\n\t"
-        "ldr	x9, [%[a], 344]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 344]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 344]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 344]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 336]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+44] += m[44] * mu\n\t"
-        "ldr		x7, [%[m], 352]\n\t"
-        "ldr	x9, [%[a], 352]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 352]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 352]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 352]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 344]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+45] += m[45] * mu\n\t"
-        "ldr		x7, [%[m], 360]\n\t"
-        "ldr	x9, [%[a], 360]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x4, x7, xzr\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 360]\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "ldr	x10, [%[a], 360]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 360]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 352]\n\t"
+        "adds	x10, x10, x7\n\t"
         "# a[i+46] += m[46] * mu\n\t"
-        "ldr		x7, [%[m], 368]\n\t"
-        "ldr	x9, [%[a], 368]\n\t"
-        "mul		x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x9, x9, x6\n\t"
-        "adc	x5, x7, xzr\n\t"
-        "adds	x9, x9, x4\n\t"
-        "str	x9, [%[a], 368]\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "ldr	x11, [%[a], 368]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 368]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 360]\n\t"
+        "adds	x11, x11, x7\n\t"
         "# a[i+47] += m[47] * mu\n\t"
-        "ldr	x7, [%[m], 376]\n\t"
-        "ldr	x9, [%[a], 376]\n\t"
-        "mul	x6, x7, x8\n\t"
-        "umulh	x7, x7, x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x7, x7, %[ca]\n\t"
+        "ldr	x10, [%[a], 376]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 376]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x8, x8, %[ca]\n\t"
+        "str	x11, [%[a], 368]\n\t"
         "cset  %[ca], cs\n\t"
-        "adds	x9, x9, x5\n\t"
-        "str	x9, [%[a], 376]\n\t"
-        "ldr	x9, [%[a], 384]\n\t"
-        "adcs	x9, x9, x7\n\t"
-        "str	x9, [%[a], 384]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "ldr	x11, [%[a], 384]\n\t"
+        "str	x10, [%[a], 376]\n\t"
+        "adcs	x11, x11, x8\n\t"
+        "str	x11, [%[a], 384]\n\t"
         "adc	%[ca], %[ca], xzr\n\t"
-        "# i += 1\n\t"
+        "subs	x4, x4, 1\n\t"
         "add	%[a], %[a], 8\n\t"
-        "add	x3, x3, 8\n\t"
-        "cmp	x3, 384\n\t"
-        "blt	1b\n\t"
-        "str	x10, [%[a], 0]\n\t"
-        "str	x11, [%[a], 8]\n\t"
+        "bne	1b\n\t"
+        "stp	x12, x13, [%[a], 0]\n\t"
         : [ca] "+r" (ca), [a] "+r" (a)
         : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27"
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
     );
 
     sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
@@ -10249,8 +11499,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_48(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_3072_mul_48(r, a, b);
     sp_3072_mont_reduce_48(r, m, mp);
@@ -10263,488 +11513,13 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_3072_sqr_48(r, a);
     sp_3072_mont_reduce_48(r, m, mp);
 }
 
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
-        "mul	x5, %[b], x8\n\t"
-        "umulh	x3, %[b], x8\n\t"
-        "mov	x4, 0\n\t"
-        "str	x5, [%[r]]\n\t"
-        "mov	x5, 0\n\t"
-        "mov	x9, #8\n\t"
-        "1:\n\t"
-        "ldr	x8, [%[a], x9]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
-        "str	x3, [%[r], x9]\n\t"
-        "mov	x3, x4\n\t"
-        "mov	x4, x5\n\t"
-        "mov	x5, #0\n\t"
-        "add	x9, x9, #8\n\t"
-        "cmp	x9, 384\n\t"
-        "b.lt	1b\n\t"
-        "str	x3, [%[r], 384]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
-    );
-#else
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
-        "mul	x3, %[b], x8\n\t"
-        "umulh	x4, %[b], x8\n\t"
-        "mov	x5, 0\n\t"
-        "str	x3, [%[r]]\n\t"
-        "# A[1] * B\n\t"
-        "ldr		x8, [%[a], 8]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 8]\n\t"
-        "# A[2] * B\n\t"
-        "ldr		x8, [%[a], 16]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 16]\n\t"
-        "# A[3] * B\n\t"
-        "ldr		x8, [%[a], 24]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 24]\n\t"
-        "# A[4] * B\n\t"
-        "ldr		x8, [%[a], 32]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 32]\n\t"
-        "# A[5] * B\n\t"
-        "ldr		x8, [%[a], 40]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 40]\n\t"
-        "# A[6] * B\n\t"
-        "ldr		x8, [%[a], 48]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 48]\n\t"
-        "# A[7] * B\n\t"
-        "ldr		x8, [%[a], 56]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 56]\n\t"
-        "# A[8] * B\n\t"
-        "ldr		x8, [%[a], 64]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 64]\n\t"
-        "# A[9] * B\n\t"
-        "ldr		x8, [%[a], 72]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 72]\n\t"
-        "# A[10] * B\n\t"
-        "ldr		x8, [%[a], 80]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 80]\n\t"
-        "# A[11] * B\n\t"
-        "ldr		x8, [%[a], 88]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 88]\n\t"
-        "# A[12] * B\n\t"
-        "ldr		x8, [%[a], 96]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 96]\n\t"
-        "# A[13] * B\n\t"
-        "ldr		x8, [%[a], 104]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 104]\n\t"
-        "# A[14] * B\n\t"
-        "ldr		x8, [%[a], 112]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 112]\n\t"
-        "# A[15] * B\n\t"
-        "ldr		x8, [%[a], 120]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 120]\n\t"
-        "# A[16] * B\n\t"
-        "ldr		x8, [%[a], 128]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 128]\n\t"
-        "# A[17] * B\n\t"
-        "ldr		x8, [%[a], 136]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 136]\n\t"
-        "# A[18] * B\n\t"
-        "ldr		x8, [%[a], 144]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 144]\n\t"
-        "# A[19] * B\n\t"
-        "ldr		x8, [%[a], 152]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 152]\n\t"
-        "# A[20] * B\n\t"
-        "ldr		x8, [%[a], 160]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 160]\n\t"
-        "# A[21] * B\n\t"
-        "ldr		x8, [%[a], 168]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 168]\n\t"
-        "# A[22] * B\n\t"
-        "ldr		x8, [%[a], 176]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 176]\n\t"
-        "# A[23] * B\n\t"
-        "ldr		x8, [%[a], 184]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 184]\n\t"
-        "# A[24] * B\n\t"
-        "ldr		x8, [%[a], 192]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 192]\n\t"
-        "# A[25] * B\n\t"
-        "ldr		x8, [%[a], 200]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 200]\n\t"
-        "# A[26] * B\n\t"
-        "ldr		x8, [%[a], 208]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 208]\n\t"
-        "# A[27] * B\n\t"
-        "ldr		x8, [%[a], 216]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 216]\n\t"
-        "# A[28] * B\n\t"
-        "ldr		x8, [%[a], 224]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 224]\n\t"
-        "# A[29] * B\n\t"
-        "ldr		x8, [%[a], 232]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 232]\n\t"
-        "# A[30] * B\n\t"
-        "ldr		x8, [%[a], 240]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 240]\n\t"
-        "# A[31] * B\n\t"
-        "ldr		x8, [%[a], 248]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 248]\n\t"
-        "# A[32] * B\n\t"
-        "ldr		x8, [%[a], 256]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 256]\n\t"
-        "# A[33] * B\n\t"
-        "ldr		x8, [%[a], 264]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 264]\n\t"
-        "# A[34] * B\n\t"
-        "ldr		x8, [%[a], 272]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 272]\n\t"
-        "# A[35] * B\n\t"
-        "ldr		x8, [%[a], 280]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 280]\n\t"
-        "# A[36] * B\n\t"
-        "ldr		x8, [%[a], 288]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 288]\n\t"
-        "# A[37] * B\n\t"
-        "ldr		x8, [%[a], 296]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 296]\n\t"
-        "# A[38] * B\n\t"
-        "ldr		x8, [%[a], 304]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 304]\n\t"
-        "# A[39] * B\n\t"
-        "ldr		x8, [%[a], 312]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 312]\n\t"
-        "# A[40] * B\n\t"
-        "ldr		x8, [%[a], 320]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 320]\n\t"
-        "# A[41] * B\n\t"
-        "ldr		x8, [%[a], 328]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 328]\n\t"
-        "# A[42] * B\n\t"
-        "ldr		x8, [%[a], 336]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 336]\n\t"
-        "# A[43] * B\n\t"
-        "ldr		x8, [%[a], 344]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 344]\n\t"
-        "# A[44] * B\n\t"
-        "ldr		x8, [%[a], 352]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 352]\n\t"
-        "# A[45] * B\n\t"
-        "ldr		x8, [%[a], 360]\n\t"
-        "mov		x5, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc		x5, xzr, xzr\n\t"
-        "str		x3, [%[r], 360]\n\t"
-        "# A[46] * B\n\t"
-        "ldr		x8, [%[a], 368]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 368]\n\t"
-        "# A[47] * B\n\t"
-        "ldr	x8, [%[a], 376]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adc	x3, x3, x7\n\t"
-        "str	x5, [%[r], 376]\n\t"
-        "str	x3, [%[r], 384]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
-    );
-#endif
-}
-
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  *
  * d1   The high order half of the number to divide.
@@ -10775,8 +11550,8 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
@@ -10785,21 +11560,16 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
         "mul	x4, %[div], x3\n\t"
-        "umulh	x3, %[div], x3\n\t"
-        "subs	%[d0], %[d0], x4\n\t"
-        "sbc	%[d1], %[d1], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
 
         "udiv	x3, %[d0], %[div]\n\t"
-        "add	x6, x6, x3\n\t"
-        "mul	x3, %[div], x3\n\t"
-        "sub	%[d0], %[d0], x3\n\t"
-        "mov	%[r], x6\n\t"
+        "add	%[r], x6, x3\n\t"
 
         : [r] "=r" (r)
         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
@@ -10815,13 +11585,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<48; i++)
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<48; i++) {
         r[i] = a[i] & m;
+    }
 #else
     int i;
 
@@ -10845,426 +11616,379 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int64_t sp_3072_cmp_48(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "mov	x6, 376\n\t"
+static int64_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 376\n\t"
         "1:\n\t"
-        "ldr	x4, [%[a], x6]\n\t"
-        "ldr	x5, [%[b], x6]\n\t"
-        "and	x4, x4, x3\n\t"
-        "and	x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "sub	x6, x6, #8\n\t"
-        "b.cc	1b\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "ldr		x4, [%[a], 376]\n\t"
-        "ldr		x5, [%[b], 376]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 368]\n\t"
-        "ldr		x5, [%[b], 368]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 360]\n\t"
-        "ldr		x5, [%[b], 360]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 352]\n\t"
-        "ldr		x5, [%[b], 352]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 344]\n\t"
-        "ldr		x5, [%[b], 344]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 336]\n\t"
-        "ldr		x5, [%[b], 336]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 328]\n\t"
-        "ldr		x5, [%[b], 328]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 320]\n\t"
-        "ldr		x5, [%[b], 320]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 312]\n\t"
-        "ldr		x5, [%[b], 312]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 304]\n\t"
-        "ldr		x5, [%[b], 304]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 296]\n\t"
-        "ldr		x5, [%[b], 296]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 288]\n\t"
-        "ldr		x5, [%[b], 288]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 280]\n\t"
-        "ldr		x5, [%[b], 280]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 272]\n\t"
-        "ldr		x5, [%[b], 272]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 264]\n\t"
-        "ldr		x5, [%[b], 264]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 256]\n\t"
-        "ldr		x5, [%[b], 256]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 248]\n\t"
-        "ldr		x5, [%[b], 248]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 240]\n\t"
-        "ldr		x5, [%[b], 240]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 232]\n\t"
-        "ldr		x5, [%[b], 232]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 224]\n\t"
-        "ldr		x5, [%[b], 224]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 216]\n\t"
-        "ldr		x5, [%[b], 216]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 208]\n\t"
-        "ldr		x5, [%[b], 208]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 200]\n\t"
-        "ldr		x5, [%[b], 200]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 192]\n\t"
-        "ldr		x5, [%[b], 192]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 184]\n\t"
-        "ldr		x5, [%[b], 184]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 176]\n\t"
-        "ldr		x5, [%[b], 176]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 168]\n\t"
-        "ldr		x5, [%[b], 168]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 160]\n\t"
-        "ldr		x5, [%[b], 160]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 152]\n\t"
-        "ldr		x5, [%[b], 152]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 144]\n\t"
-        "ldr		x5, [%[b], 144]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 136]\n\t"
-        "ldr		x5, [%[b], 136]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 128]\n\t"
-        "ldr		x5, [%[b], 128]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 120]\n\t"
-        "ldr		x5, [%[b], 120]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 112]\n\t"
-        "ldr		x5, [%[b], 112]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 104]\n\t"
-        "ldr		x5, [%[b], 104]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 96]\n\t"
-        "ldr		x5, [%[b], 96]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 88]\n\t"
-        "ldr		x5, [%[b], 88]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 80]\n\t"
-        "ldr		x5, [%[b], 80]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 72]\n\t"
-        "ldr		x5, [%[b], 72]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 64]\n\t"
-        "ldr		x5, [%[b], 64]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 56]\n\t"
-        "ldr		x5, [%[b], 56]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 48]\n\t"
-        "ldr		x5, [%[b], 48]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 40]\n\t"
-        "ldr		x5, [%[b], 40]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 32]\n\t"
-        "ldr		x5, [%[b], 32]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 24]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 8]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#endif
-
-    return r;
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov  x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 368]\n\t"
+        "ldp	x7, x8, [%[b], 368]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 352]\n\t"
+        "ldp	x7, x8, [%[b], 352]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 336]\n\t"
+        "ldp	x7, x8, [%[b], 336]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 320]\n\t"
+        "ldp	x7, x8, [%[b], 320]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 304]\n\t"
+        "ldp	x7, x8, [%[b], 304]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 288]\n\t"
+        "ldp	x7, x8, [%[b], 288]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 272]\n\t"
+        "ldp	x7, x8, [%[b], 272]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 256]\n\t"
+        "ldp	x7, x8, [%[b], 256]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "ldp	x7, x8, [%[b], 240]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "ldp	x7, x8, [%[b], 208]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "ldp	x7, x8, [%[b], 176]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "ldp	x7, x8, [%[b], 144]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "ldp	x7, x8, [%[b], 112]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "ldp	x7, x8, [%[b], 80]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "ldp	x7, x8, [%[b], 48]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "ldp	x7, x8, [%[b], 16]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#endif
+
+    return (int64_t)a;
 }
 
 /* Divide d in a and put remainder into r (m*d + r = a)
@@ -11276,7 +12000,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[96], t2[49];
@@ -11300,7 +12024,7 @@
     }
 
     r1 = sp_3072_cmp_48(t1, d) >= 0;
-    sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
+    sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -11312,11 +12036,189 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_mod_48(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_48(a, m, NULL, r);
 }
 
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 384\n\t"
+        "\n1:\n\t"
+        "subs	%[c], xzr, %[c]\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "ldp	x7, x8, [%[b]], #16\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x9, x10, [%[b]], #16\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "csetm	%[c], cc\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "subs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 48]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 80]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 112]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 144]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 176]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 208]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 240]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        "ldp	x3, x4, [%[a], 256]\n\t"
+        "ldp	x7, x8, [%[b], 256]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 272]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 272]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 256]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 272]\n\t"
+        "ldp	x3, x4, [%[a], 288]\n\t"
+        "ldp	x7, x8, [%[b], 288]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 304]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 304]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 288]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 304]\n\t"
+        "ldp	x3, x4, [%[a], 320]\n\t"
+        "ldp	x7, x8, [%[b], 320]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 336]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 336]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 320]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 336]\n\t"
+        "ldp	x3, x4, [%[a], 352]\n\t"
+        "ldp	x7, x8, [%[b], 352]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 368]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 368]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 352]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 368]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
@@ -11326,7 +12228,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_div_48_cond(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[96], t2[49];
@@ -11350,8 +12252,16 @@
         }
     }
 
-    r1 = sp_3072_cmp_48(t1, d) >= 0;
-    sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
+    for (i = 47; i > 0; i--) {
+        if (t1[i] != d[i])
+            break;
+    }
+    if (t1[i] >= d[i]) {
+        sp_3072_sub_48(r, t1, d);
+    }
+    else {
+        XMEMCPY(r, t1, sizeof(*t1) * 48);
+    }
 
     return MP_OKAY;
 }
@@ -11363,12 +12273,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_48_cond(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 #ifdef WOLFSSL_SP_SMALL
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
@@ -11379,8 +12290,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[16][96];
@@ -11399,27 +12310,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<16; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
             t[i] = td + i * 96;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_48(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
             err = sp_3072_mod_48(t[1] + 48, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_48(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
@@ -11445,9 +12357,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 60;
-        n <<= 4;
-        c = 60;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 4;
+        if (c == 64) {
+            c = 60;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
         for (; i>=0 || c>=4; ) {
             if (c == 0) {
@@ -11478,7 +12397,7 @@
             sp_3072_mont_mul_48(r, r, t[y], m, mp);
         }
 
-        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
         sp_3072_mont_reduce_48(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
@@ -11486,8 +12405,9 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -11502,8 +12422,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
 {
 #ifndef WOLFSSL_SMALL_STACK
     sp_digit t[32][96];
@@ -11522,27 +12442,28 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
+    if (td == NULL) {
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
             t[i] = td + i * 96;
+        }
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_48(norm, m);
 
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
-        if (reduceA) {
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
             err = sp_3072_mod_48(t[1] + 48, a, m);
-            if (err == MP_OKAY)
+            if (err == MP_OKAY) {
                 err = sp_3072_mod_48(t[1], t[1], m);
+            }
         }
         else {
             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
@@ -11584,9 +12505,16 @@
 
         i = (bits - 1) / 64;
         n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 5;
+        if (c == 64) {
+            c = 59;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
         for (; i>=0 || c>=5; ) {
             if (c == 0) {
@@ -11617,12 +12545,8 @@
 
             sp_3072_mont_mul_48(r, r, t[y], m, mp);
         }
-        y = e[0] & 0x3;
-        sp_3072_mont_sqr_48(r, r, m, mp);
-        sp_3072_mont_sqr_48(r, r, m, mp);
-        sp_3072_mont_mul_48(r, r, t[y], m, mp);
-
-        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
         sp_3072_mont_reduce_48(r, m, mp);
 
         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
@@ -11630,14 +12554,15 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* WOLFSSL_SP_SMALL */
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
 
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
@@ -11655,15 +12580,15 @@
 int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[96], md[48], rd[96];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[96], m[48], r[96];
 #else
     sp_digit* d = NULL;
-#endif
     sp_digit* a;
-    sp_digit *ah;
     sp_digit* m;
     sp_digit* r;
+#endif
+    sp_digit *ah;
     sp_digit e[1];
     int err = MP_OKAY;
 
@@ -11673,10 +12598,10 @@
                                                      mp_count_bits(mm) != 3072))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -11685,26 +12610,24 @@
         a = d;
         r = a + 48 * 2;
         m = r + 48 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
         ah = a + 48;
-    }
-#else
-    a = ad;
-    m = md;
-    r = rd;
-    ah = a + 48;
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_from_bin(ah, 48, in, inLen);
 #if DIGIT_BIT >= 64
         e[0] = em->dp[0];
 #else
         e[0] = em->dp[0];
-        if (em->used > 1)
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_3072_from_mp(m, 48, mm);
@@ -11730,25 +12653,30 @@
             err = sp_3072_mod_48_cond(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=63; i>=0; i--)
-                    if (e[0] >> i)
+                for (i = 63; i >= 0; i--) {
+                    if (e[0] >> i) {
                         break;
+                    }
+                }
 
                 XMEMCPY(r, a, sizeof(sp_digit) * 48);
                 for (i--; i>=0; i--) {
                     sp_3072_mont_sqr_48(r, r, m, mp);
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_3072_mont_mul_48(r, r, a, m, mp);
+                    }
                 }
                 XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
                 sp_3072_mont_reduce_48(r, m, mp);
 
                 for (i = 47; i > 0; i--) {
-                    if (r[i] != m[i])
+                    if (r[i] != m[i]) {
                         break;
+                    }
                 }
-                if (r[i] >= m[i])
+                if (r[i] >= m[i]) {
                     sp_3072_sub_in_place_48(r, m);
+                }
             }
         }
     }
@@ -11758,12 +12686,201 @@
         *outLen = 384;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 384U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+           err = MP_READ_E;
+        }
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 48;
+        m = a + 96;
+        r = a;
+
+        sp_3072_from_bin(a, 48, in, inLen);
+        sp_3072_from_mp(d, 48, dm);
+        sp_3072_from_mp(m, 48, mm);
+        err = sp_3072_mod_exp_48(r, a, d, 3072, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 48);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	x8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	x4, [%[a], x8]\n\t"
+        "ldr	x5, [%[b], x8]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "cset	%[c], cs\n\t"
+        "str	x4, [%[r], x8]\n\t"
+        "add	x8, x8, #8\n\t"
+        "cmp	x8, 192\n\t"
+        "b.lt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adds	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "ldp	x5, x7, [%[b], 128]\n\t"
+        "ldp	x11, x12, [%[b], 144]\n\t"
+        "ldp	x4, x6, [%[a], 128]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 144]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 128]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 144]\n\t"
+        "ldp	x5, x7, [%[b], 160]\n\t"
+        "ldp	x11, x12, [%[b], 176]\n\t"
+        "ldp	x4, x6, [%[a], 160]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 176]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 160]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 176]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
 }
 
 /* RSA private key operation.
@@ -11787,23 +12904,22 @@
     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
     byte* out, word32* outLen)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[48 * 2];
-    sp_digit pd[24], qd[24], dpd[24];
-    sp_digit tmpad[48], tmpbd[48];
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[48 * 2];
+    sp_digit p[24], q[24], dp[24];
+    sp_digit tmpa[48], tmpb[48];
 #else
     sp_digit* t = NULL;
-#endif
     sp_digit* a;
     sp_digit* p;
     sp_digit* q;
     sp_digit* dp;
-    sp_digit* dq;
-    sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
+#endif
     sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
     sp_digit c;
     int err = MP_OKAY;
 
@@ -11815,10 +12931,10 @@
     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
         err = MP_READ_E;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (t == NULL)
             err = MEMORY_E;
     }
@@ -11830,20 +12946,16 @@
         tmpa = qi + 24;
         tmpb = tmpa + 48;
 
-        tmp = t;
-        r = tmp + 48;
-    }
-#else
-    r = a = ad;
-    p = pd;
-    q = qd;
-    qi = dq = dp = dpd;
-    tmpa = tmpad;
-    tmpb = tmpbd;
-    tmp = a + 48;
-#endif
-
-    if (err == MP_OKAY) {
+        r = t + 48;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
         sp_3072_from_bin(a, 48, in, inLen);
         sp_3072_from_mp(p, 24, pm);
         sp_3072_from_mp(q, 24, qm);
@@ -11858,8 +12970,8 @@
 
     if (err == MP_OKAY) {
         c = sp_3072_sub_in_place_24(tmpa, tmpb);
-        sp_3072_mask_24(tmp, p, c);
-        sp_3072_add_24(tmpa, tmpa, tmp);
+        c += sp_3072_cond_add_24(tmpa, tmpa, p, c);
+        sp_3072_cond_add_24(tmpa, tmpa, p, c);
 
         sp_3072_from_mp(qi, 24, qim);
         sp_3072_mul_24(tmpa, tmpa, qi);
@@ -11875,34 +12987,37 @@
         *outLen = 384;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 24 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-#else
-    XMEMSET(tmpad, 0, sizeof(tmpad));
-    XMEMSET(tmpbd, 0, sizeof(tmpbd));
-    XMEMSET(pd, 0, sizeof(pd));
-    XMEMSET(qd, 0, sizeof(qd));
-    XMEMSET(dpd, 0, sizeof(dpd));
-#endif
-
-    return err;
-}
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_3072_to_mp(sp_digit* a, mp_int* r)
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 64
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 48);
         r->used = 48;
@@ -11912,14 +13027,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 48; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 64) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 64 - s;
         }
@@ -11932,15 +13052,16 @@
         for (i = 0; i < 48; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 64 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 64 - s;
             }
-            else
+            else {
                 s += 64;
+            }
         }
         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -11956,7 +13077,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -11966,12 +13087,23 @@
     sp_digit* r = b;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_3072_from_mp(b, 48, base);
         sp_3072_from_mp(e, 48, exp);
         sp_3072_from_mp(m, 48, mod);
@@ -11988,6 +13120,419 @@
     return err;
 }
 
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_48(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	x6, 63\n\t"
+        "sub	x6, x6, %[n]\n\t"
+        "ldr	x3, [%[a], 376]\n\t"
+        "lsr	x4, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x4, x4, x6\n\t"
+        "ldr	x2, [%[a], 368]\n\t"
+        "str	x4, [%[r], 384]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 360]\n\t"
+        "str	x3, [%[r], 376]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 352]\n\t"
+        "str	x2, [%[r], 368]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 344]\n\t"
+        "str	x4, [%[r], 360]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 336]\n\t"
+        "str	x3, [%[r], 352]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 328]\n\t"
+        "str	x2, [%[r], 344]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 320]\n\t"
+        "str	x4, [%[r], 336]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 312]\n\t"
+        "str	x3, [%[r], 328]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 304]\n\t"
+        "str	x2, [%[r], 320]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 296]\n\t"
+        "str	x4, [%[r], 312]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 288]\n\t"
+        "str	x3, [%[r], 304]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 280]\n\t"
+        "str	x2, [%[r], 296]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 272]\n\t"
+        "str	x4, [%[r], 288]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 264]\n\t"
+        "str	x3, [%[r], 280]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 256]\n\t"
+        "str	x2, [%[r], 272]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 248]\n\t"
+        "str	x4, [%[r], 264]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 240]\n\t"
+        "str	x3, [%[r], 256]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 232]\n\t"
+        "str	x2, [%[r], 248]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 224]\n\t"
+        "str	x4, [%[r], 240]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 216]\n\t"
+        "str	x3, [%[r], 232]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 208]\n\t"
+        "str	x2, [%[r], 224]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 200]\n\t"
+        "str	x4, [%[r], 216]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 192]\n\t"
+        "str	x3, [%[r], 208]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 184]\n\t"
+        "str	x2, [%[r], 200]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 176]\n\t"
+        "str	x4, [%[r], 192]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 168]\n\t"
+        "str	x3, [%[r], 184]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 160]\n\t"
+        "str	x2, [%[r], 176]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 152]\n\t"
+        "str	x4, [%[r], 168]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 144]\n\t"
+        "str	x3, [%[r], 160]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 136]\n\t"
+        "str	x2, [%[r], 152]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 128]\n\t"
+        "str	x4, [%[r], 144]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 120]\n\t"
+        "str	x3, [%[r], 136]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 112]\n\t"
+        "str	x2, [%[r], 128]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 104]\n\t"
+        "str	x4, [%[r], 120]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 96]\n\t"
+        "str	x3, [%[r], 112]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 88]\n\t"
+        "str	x2, [%[r], 104]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 80]\n\t"
+        "str	x4, [%[r], 96]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 72]\n\t"
+        "str	x3, [%[r], 88]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 64]\n\t"
+        "str	x2, [%[r], 80]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 56]\n\t"
+        "str	x4, [%[r], 72]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 48]\n\t"
+        "str	x3, [%[r], 64]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 40]\n\t"
+        "str	x2, [%[r], 56]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 32]\n\t"
+        "str	x4, [%[r], 48]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 24]\n\t"
+        "str	x3, [%[r], 40]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 16]\n\t"
+        "str	x2, [%[r], 32]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 8]\n\t"
+        "str	x4, [%[r], 24]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 0]\n\t"
+        "str	x3, [%[r], 16]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "str	x4, [%[r]]\n\t"
+        "str	x2, [%[r], 8]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "x2", "x3", "x4", "x5", "x6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[96];
+    sp_digit td[49];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 96;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_48(norm, m);
+
+        i = (bits - 1) / 64;
+        n = e[i--];
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 6;
+        if (c == 64) {
+            c = 58;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
+        sp_3072_lshift_48(r, norm, y);
+        for (; i>=0 || c>=6; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 58;
+                n <<= 6;
+                c = 58;
+            }
+            else if (c < 6) {
+                y = n >> 58;
+                n = e[i--];
+                c = 6 - c;
+                y |= n >> (64 - c);
+                n <<= c;
+                c = 64 - c;
+            }
+            else {
+                y = (n >> 58) & 0x3f;
+                n <<= 6;
+                c -= 6;
+            }
+
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+
+            sp_3072_lshift_48(r, r, y);
+            sp_3072_mul_d_48(tmp, norm, r[48]);
+            r[48] = 0;
+            o = sp_3072_add_48(r, r, tmp);
+            sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+        sp_3072_mont_reduce_48(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+        sp_3072_cond_sub_48(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
  * base     Base.
@@ -11997,7 +13542,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 384 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
@@ -12008,17 +13553,34 @@
     sp_digit* r = b;
     word32 i;
 
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
+        if (expLen > 384) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
         sp_3072_from_mp(b, 48, base);
         sp_3072_from_bin(e, 48, exp, expLen);
         sp_3072_from_mp(m, 48, mod);
 
-        err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2 && m[47] == (sp_digit)-1)
+            err = sp_3072_mod_exp_2_48(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
+
     }
 
     if (err == MP_OKAY) {
@@ -12035,118 +13597,5189 @@
 
     return err;
 }
-
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_3072 */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[48], e[24], m[24];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 24, base);
+        sp_3072_from_mp(e, 24, exp);
+        sp_3072_from_mp(m, 24, mod);
+
+        err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 24, 0, sizeof(*r) * 24U);
+        err = sp_3072_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j;
+    byte* d;
+
+    for (i = n - 1,j = 0; i >= 7; i -= 8) {
+        r[j]  = ((sp_digit)a[i - 0] <<  0) |
+                ((sp_digit)a[i - 1] <<  8) |
+                ((sp_digit)a[i - 2] << 16) |
+                ((sp_digit)a[i - 3] << 24) |
+                ((sp_digit)a[i - 4] << 32) |
+                ((sp_digit)a[i - 5] << 40) |
+                ((sp_digit)a[i - 6] << 48) |
+                ((sp_digit)a[i - 7] << 56);
+        j++;
+    }
+
+    if (i >= 0) {
+        r[j] = 0;
+
+        d = (byte*)r;
+        switch (i) {
+            case 6: d[n - 1 - 6] = a[6]; //fallthrough
+            case 5: d[n - 1 - 5] = a[5]; //fallthrough
+            case 4: d[n - 1 - 4] = a[4]; //fallthrough
+            case 3: d[n - 1 - 3] = a[3]; //fallthrough
+            case 2: d[n - 1 - 2] = a[2]; //fallthrough
+            case 1: d[n - 1 - 1] = a[1]; //fallthrough
+            case 0: d[n - 1 - 0] = a[0]; //fallthrough
+        }
+        j++;
+    }
+
+    for (; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 64
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffffffffffffl;
+        s = 64U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 64U) <= (word32)DIGIT_BIT) {
+            s += 64U;
+            r[j] &= 0xffffffffffffffffl;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 64) {
+            r[j] &= 0xffffffffffffffffl;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 64 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+    int i, j;
+
+    for (i = 63, j = 0; i >= 0; i--) {
+        a[j++] = r[i] >> 56;
+        a[j++] = r[i] >> 48;
+        a[j++] = r[i] >> 40;
+        a[j++] = r[i] >> 32;
+        a[j++] = r[i] >> 24;
+        a[j++] = r[i] >> 16;
+        a[j++] = r[i] >> 8;
+        a[j++] = r[i] >> 0;
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_add_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "adds	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 48]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 80]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 112]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 144]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 176]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 208]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 240]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer and result.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x2, x3, [%[a], 0]\n\t"
+        "ldp	x6, x7, [%[b], 0]\n\t"
+        "subs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 16]\n\t"
+        "ldp	x2, x3, [%[a], 32]\n\t"
+        "ldp	x6, x7, [%[b], 32]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 48]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 48]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 32]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 48]\n\t"
+        "ldp	x2, x3, [%[a], 64]\n\t"
+        "ldp	x6, x7, [%[b], 64]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 80]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 80]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 64]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 80]\n\t"
+        "ldp	x2, x3, [%[a], 96]\n\t"
+        "ldp	x6, x7, [%[b], 96]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 112]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 112]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 96]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 112]\n\t"
+        "ldp	x2, x3, [%[a], 128]\n\t"
+        "ldp	x6, x7, [%[b], 128]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 144]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 144]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 128]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 144]\n\t"
+        "ldp	x2, x3, [%[a], 160]\n\t"
+        "ldp	x6, x7, [%[b], 160]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 176]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 176]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 160]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 176]\n\t"
+        "ldp	x2, x3, [%[a], 192]\n\t"
+        "ldp	x6, x7, [%[b], 192]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 208]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 208]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 192]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 208]\n\t"
+        "ldp	x2, x3, [%[a], 224]\n\t"
+        "ldp	x6, x7, [%[b], 224]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 240]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 240]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 224]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 240]\n\t"
+        "ldp	x2, x3, [%[a], 256]\n\t"
+        "ldp	x6, x7, [%[b], 256]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 272]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 272]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 256]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 272]\n\t"
+        "ldp	x2, x3, [%[a], 288]\n\t"
+        "ldp	x6, x7, [%[b], 288]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 304]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 304]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 288]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 304]\n\t"
+        "ldp	x2, x3, [%[a], 320]\n\t"
+        "ldp	x6, x7, [%[b], 320]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 336]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 336]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 320]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 336]\n\t"
+        "ldp	x2, x3, [%[a], 352]\n\t"
+        "ldp	x6, x7, [%[b], 352]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 368]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 368]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 352]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 368]\n\t"
+        "ldp	x2, x3, [%[a], 384]\n\t"
+        "ldp	x6, x7, [%[b], 384]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 400]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 400]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 384]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 400]\n\t"
+        "ldp	x2, x3, [%[a], 416]\n\t"
+        "ldp	x6, x7, [%[b], 416]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 432]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 432]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 416]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 432]\n\t"
+        "ldp	x2, x3, [%[a], 448]\n\t"
+        "ldp	x6, x7, [%[b], 448]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 464]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 464]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 448]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 464]\n\t"
+        "ldp	x2, x3, [%[a], 480]\n\t"
+        "ldp	x6, x7, [%[b], 480]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 496]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 496]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 480]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 496]\n\t"
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+
+    return (sp_digit)a;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "adds	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 48]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 80]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 112]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 144]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 176]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 208]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 240]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        "ldp	x3, x4, [%[a], 256]\n\t"
+        "ldp	x7, x8, [%[b], 256]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 272]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 272]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 256]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 272]\n\t"
+        "ldp	x3, x4, [%[a], 288]\n\t"
+        "ldp	x7, x8, [%[b], 288]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 304]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 304]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 288]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 304]\n\t"
+        "ldp	x3, x4, [%[a], 320]\n\t"
+        "ldp	x7, x8, [%[b], 320]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 336]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 336]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 320]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 336]\n\t"
+        "ldp	x3, x4, [%[a], 352]\n\t"
+        "ldp	x7, x8, [%[b], 352]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 368]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 368]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 352]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 368]\n\t"
+        "ldp	x3, x4, [%[a], 384]\n\t"
+        "ldp	x7, x8, [%[b], 384]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 400]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 400]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 384]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 400]\n\t"
+        "ldp	x3, x4, [%[a], 416]\n\t"
+        "ldp	x7, x8, [%[b], 416]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 432]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 432]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 416]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 432]\n\t"
+        "ldp	x3, x4, [%[a], 448]\n\t"
+        "ldp	x7, x8, [%[b], 448]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 464]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 464]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 448]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 464]\n\t"
+        "ldp	x3, x4, [%[a], 480]\n\t"
+        "ldp	x7, x8, [%[b], 480]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 496]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 496]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 480]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 496]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+/* Add digit to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_4096_add_zero_32(sp_digit* r, const sp_digit* a,
+        const sp_digit d)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adds	x3, x3, %[d]\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "adcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[64];
+    sp_digit a1[32];
+    sp_digit b1[32];
+    sp_digit z2[64];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_32(a1, a, &a[32]);
+    cb = sp_2048_add_32(b1, b, &b[32]);
+    u  = ca & cb;
+    sp_2048_mul_32(z1, a1, b1);
+    sp_2048_mul_32(z2, &a[32], &b[32]);
+    sp_2048_mul_32(z0, a, b);
+    sp_2048_mask_32(r + 64, a1, 0 - cb);
+    sp_2048_mask_32(b1, b1, 0 - ca);
+    u += sp_2048_add_32(r + 64, r + 64, b1);
+    u += sp_4096_sub_in_place_64(z1, z2);
+    u += sp_4096_sub_in_place_64(z1, z0);
+    u += sp_4096_add_64(r + 32, r + 32, z1);
+    u += sp_4096_add_32(r + 64, r + 64, z2);
+    sp_4096_add_zero_32(r + 96, z2 + 32, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 256\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "cset	%[c], cs\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "adds	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 16]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 24]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 48]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 56]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 80]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 88]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 112]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 120]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 144]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 152]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 176]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 184]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 208]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 216]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "adcs	x3, x3, x3\n\t"
+        "ldr	x5, [%[a], 240]\n\t"
+        "adcs	x4, x4, x4\n\t"
+        "ldr	x6, [%[a], 248]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[64];
+    sp_digit z1[64];
+    sp_digit a1[32];
+    sp_digit u;
+
+    u = sp_2048_add_32(a1, a, &a[32]);
+    sp_2048_sqr_32(z1, a1);
+    sp_2048_sqr_32(z2, &a[32]);
+    sp_2048_sqr_32(z0, a);
+    sp_2048_mask_32(r + 64, a1, 0 - u);
+    u += sp_2048_dbl_32(r + 64, r + 64);
+    u += sp_4096_sub_in_place_64(z1, z2);
+    u += sp_4096_sub_in_place_64(z1, z0);
+    u += sp_4096_add_64(r + 32, r + 32, z1);
+    u += sp_4096_add_32(r + 64, r + 64, z2);
+    sp_4096_add_zero_32(r + 96, z2 + 32, u);
+    
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 512\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "ldp	x7, x8, [%[b]], #16\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "ldp	x9, x10, [%[b]], #16\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "cset	%[c], cs\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x10, %[a], 512\n\t"
+        "\n1:\n\t"
+        "subs	%[c], xzr, %[c]\n\t"
+        "ldp	x2, x3, [%[a]]\n\t"
+        "ldp	x4, x5, [%[a], #16]\n\t"
+        "ldp	x6, x7, [%[b]], #16\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "ldp	x8, x9, [%[b]], #16\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a]], #16\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a]], #16\n\t"
+        "csetm	%[c], cc\n\t"
+        "cmp	%[a], x10\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_digit tmp[128];
+
+    __asm__ __volatile__ (
+        "mov	x5, 0\n\t"
+        "mov	x6, 0\n\t"
+        "mov	x7, 0\n\t"
+        "mov	x8, 0\n\t"
+        "\n1:\n\t"
+        "subs	x3, x5, 504\n\t"
+        "csel	x3, xzr, x3, cc\n\t"
+        "sub	x4, x5, x3\n\t"
+        "\n2:\n\t"
+        "ldr	x10, [%[a], x3]\n\t"
+        "ldr	x11, [%[b], x4]\n\t"
+        "mul	x9, x10, x11\n\t"
+        "umulh	x10, x10, x11\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "add	x3, x3, #8\n\t"
+        "sub	x4, x4, #8\n\t"
+        "cmp	x3, 512\n\t"
+        "b.eq	3f\n\t"
+        "cmp	x3, x5\n\t"
+        "b.le	2b\n\t"
+        "\n3:\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        "mov	x6, x7\n\t"
+        "mov	x7, x8\n\t"
+        "mov	x8, #0\n\t"
+        "add	x5, x5, #8\n\t"
+        "cmp	x5, 1008\n\t"
+        "b.le	1b\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    sp_digit tmp[128];
+
+    __asm__ __volatile__ (
+        "mov	x6, 0\n\t"
+        "mov	x7, 0\n\t"
+        "mov	x8, 0\n\t"
+        "mov	x5, 0\n\t"
+        "\n1:\n\t"
+        "subs	x3, x5, 504\n\t"
+        "csel	x3, xzr, x3, cc\n\t"
+        "sub	x4, x5, x3\n\t"
+        "\n2:\n\t"
+        "cmp	x4, x3\n\t"
+        "b.eq	4f\n\t"
+        "ldr	x10, [%[a], x3]\n\t"
+        "ldr	x11, [%[a], x4]\n\t"
+        "mul	x9, x10, x11\n\t"
+        "umulh	x10, x10, x11\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "b.al	5f\n\t"
+        "\n4:\n\t"
+        "ldr	x10, [%[a], x3]\n\t"
+        "mul	x9, x10, x10\n\t"
+        "umulh	x10, x10, x10\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "\n5:\n\t"
+        "add	x3, x3, #8\n\t"
+        "sub	x4, x4, #8\n\t"
+        "cmp	x3, 512\n\t"
+        "b.eq	3f\n\t"
+        "cmp	x3, x4\n\t"
+        "b.gt	3f\n\t"
+        "cmp	x3, x5\n\t"
+        "b.le	2b\n\t"
+        "\n3:\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        "mov	x6, x7\n\t"
+        "mov	x7, x8\n\t"
+        "mov	x8, #0\n\t"
+        "add	x5, x5, #8\n\t"
+        "cmp	x5, 1008\n\t"
+        "b.le	1b\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldr	x8, [%[a]]\n\t"
+        "mul	x5, %[b], x8\n\t"
+        "umulh	x3, %[b], x8\n\t"
+        "mov	x4, 0\n\t"
+        "str	x5, [%[r]]\n\t"
+        "mov	x5, 0\n\t"
+        "mov	x9, #8\n\t"
+        "1:\n\t"
+        "ldr	x8, [%[a], x9]\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "str	x3, [%[r], x9]\n\t"
+        "mov	x3, x4\n\t"
+        "mov	x4, x5\n\t"
+        "mov	x5, #0\n\t"
+        "add	x9, x9, #8\n\t"
+        "cmp	x9, 512\n\t"
+        "b.lt	1b\n\t"
+        "str	x3, [%[r], 512]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#else
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldp	x8, x9, [%[a]]\n\t"
+        "mul	x3, %[b], x8\n\t"
+        "umulh	x4, %[b], x8\n\t"
+        "mov	x5, 0\n\t"
+        "# A[1] * B\n\t"
+        "str	x3, [%[r]]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[2] * B\n\t"
+        "ldp	x8, x9, [%[a], 16]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[3] * B\n\t"
+        "str	x5, [%[r], 16]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[4] * B\n\t"
+        "ldp	x8, x9, [%[a], 32]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[5] * B\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[6] * B\n\t"
+        "ldp	x8, x9, [%[a], 48]\n\t"
+        "str	x5, [%[r], 40]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[7] * B\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[8] * B\n\t"
+        "ldp	x8, x9, [%[a], 64]\n\t"
+        "str	x4, [%[r], 56]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[9] * B\n\t"
+        "str	x5, [%[r], 64]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[10] * B\n\t"
+        "ldp	x8, x9, [%[a], 80]\n\t"
+        "str	x3, [%[r], 72]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[11] * B\n\t"
+        "str	x4, [%[r], 80]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[12] * B\n\t"
+        "ldp	x8, x9, [%[a], 96]\n\t"
+        "str	x5, [%[r], 88]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[13] * B\n\t"
+        "str	x3, [%[r], 96]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[14] * B\n\t"
+        "ldp	x8, x9, [%[a], 112]\n\t"
+        "str	x4, [%[r], 104]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[15] * B\n\t"
+        "str	x5, [%[r], 112]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[16] * B\n\t"
+        "ldp	x8, x9, [%[a], 128]\n\t"
+        "str	x3, [%[r], 120]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[17] * B\n\t"
+        "str	x4, [%[r], 128]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[18] * B\n\t"
+        "ldp	x8, x9, [%[a], 144]\n\t"
+        "str	x5, [%[r], 136]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[19] * B\n\t"
+        "str	x3, [%[r], 144]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[20] * B\n\t"
+        "ldp	x8, x9, [%[a], 160]\n\t"
+        "str	x4, [%[r], 152]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[21] * B\n\t"
+        "str	x5, [%[r], 160]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[22] * B\n\t"
+        "ldp	x8, x9, [%[a], 176]\n\t"
+        "str	x3, [%[r], 168]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[23] * B\n\t"
+        "str	x4, [%[r], 176]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[24] * B\n\t"
+        "ldp	x8, x9, [%[a], 192]\n\t"
+        "str	x5, [%[r], 184]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[25] * B\n\t"
+        "str	x3, [%[r], 192]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[26] * B\n\t"
+        "ldp	x8, x9, [%[a], 208]\n\t"
+        "str	x4, [%[r], 200]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[27] * B\n\t"
+        "str	x5, [%[r], 208]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[28] * B\n\t"
+        "ldp	x8, x9, [%[a], 224]\n\t"
+        "str	x3, [%[r], 216]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[29] * B\n\t"
+        "str	x4, [%[r], 224]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[30] * B\n\t"
+        "ldp	x8, x9, [%[a], 240]\n\t"
+        "str	x5, [%[r], 232]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[31] * B\n\t"
+        "str	x3, [%[r], 240]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[32] * B\n\t"
+        "ldp	x8, x9, [%[a], 256]\n\t"
+        "str	x4, [%[r], 248]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[33] * B\n\t"
+        "str	x5, [%[r], 256]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[34] * B\n\t"
+        "ldp	x8, x9, [%[a], 272]\n\t"
+        "str	x3, [%[r], 264]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[35] * B\n\t"
+        "str	x4, [%[r], 272]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[36] * B\n\t"
+        "ldp	x8, x9, [%[a], 288]\n\t"
+        "str	x5, [%[r], 280]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[37] * B\n\t"
+        "str	x3, [%[r], 288]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[38] * B\n\t"
+        "ldp	x8, x9, [%[a], 304]\n\t"
+        "str	x4, [%[r], 296]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[39] * B\n\t"
+        "str	x5, [%[r], 304]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[40] * B\n\t"
+        "ldp	x8, x9, [%[a], 320]\n\t"
+        "str	x3, [%[r], 312]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[41] * B\n\t"
+        "str	x4, [%[r], 320]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[42] * B\n\t"
+        "ldp	x8, x9, [%[a], 336]\n\t"
+        "str	x5, [%[r], 328]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[43] * B\n\t"
+        "str	x3, [%[r], 336]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[44] * B\n\t"
+        "ldp	x8, x9, [%[a], 352]\n\t"
+        "str	x4, [%[r], 344]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[45] * B\n\t"
+        "str	x5, [%[r], 352]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[46] * B\n\t"
+        "ldp	x8, x9, [%[a], 368]\n\t"
+        "str	x3, [%[r], 360]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[47] * B\n\t"
+        "str	x4, [%[r], 368]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[48] * B\n\t"
+        "ldp	x8, x9, [%[a], 384]\n\t"
+        "str	x5, [%[r], 376]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[49] * B\n\t"
+        "str	x3, [%[r], 384]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[50] * B\n\t"
+        "ldp	x8, x9, [%[a], 400]\n\t"
+        "str	x4, [%[r], 392]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[51] * B\n\t"
+        "str	x5, [%[r], 400]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[52] * B\n\t"
+        "ldp	x8, x9, [%[a], 416]\n\t"
+        "str	x3, [%[r], 408]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[53] * B\n\t"
+        "str	x4, [%[r], 416]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[54] * B\n\t"
+        "ldp	x8, x9, [%[a], 432]\n\t"
+        "str	x5, [%[r], 424]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[55] * B\n\t"
+        "str	x3, [%[r], 432]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[56] * B\n\t"
+        "ldp	x8, x9, [%[a], 448]\n\t"
+        "str	x4, [%[r], 440]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[57] * B\n\t"
+        "str	x5, [%[r], 448]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[58] * B\n\t"
+        "ldp	x8, x9, [%[a], 464]\n\t"
+        "str	x3, [%[r], 456]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[59] * B\n\t"
+        "str	x4, [%[r], 464]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[60] * B\n\t"
+        "ldp	x8, x9, [%[a], 480]\n\t"
+        "str	x5, [%[r], 472]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[61] * B\n\t"
+        "str	x3, [%[r], 480]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[62] * B\n\t"
+        "ldp	x8, x9, [%[a], 496]\n\t"
+        "str	x4, [%[r], 488]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[63] * B\n\t"
+        "str	x5, [%[r], 496]\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adc	x4, x4, x7\n\t"
+        "stp	x3, x4, [%[r], 504]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#endif
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+    /* r = 2^n mod m */
+    sp_4096_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	x8, #0\n\t"
+        "1:\n\t"
+        "subs	%[c], xzr, %[c]\n\t"
+        "ldr	x4, [%[a], x8]\n\t"
+        "ldr	x5, [%[b], x8]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "csetm	%[c], cc\n\t"
+        "str	x4, [%[r], x8]\n\t"
+        "add	x8, x8, #8\n\t"
+        "cmp	x8, 512\n\t"
+        "b.lt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "subs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "ldp	x5, x7, [%[b], 128]\n\t"
+        "ldp	x11, x12, [%[b], 144]\n\t"
+        "ldp	x4, x6, [%[a], 128]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 144]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 128]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 144]\n\t"
+        "ldp	x5, x7, [%[b], 160]\n\t"
+        "ldp	x11, x12, [%[b], 176]\n\t"
+        "ldp	x4, x6, [%[a], 160]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 176]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 160]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 176]\n\t"
+        "ldp	x5, x7, [%[b], 192]\n\t"
+        "ldp	x11, x12, [%[b], 208]\n\t"
+        "ldp	x4, x6, [%[a], 192]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 208]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 192]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 208]\n\t"
+        "ldp	x5, x7, [%[b], 224]\n\t"
+        "ldp	x11, x12, [%[b], 240]\n\t"
+        "ldp	x4, x6, [%[a], 224]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 240]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 224]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 240]\n\t"
+        "ldp	x5, x7, [%[b], 256]\n\t"
+        "ldp	x11, x12, [%[b], 272]\n\t"
+        "ldp	x4, x6, [%[a], 256]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 272]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 256]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 272]\n\t"
+        "ldp	x5, x7, [%[b], 288]\n\t"
+        "ldp	x11, x12, [%[b], 304]\n\t"
+        "ldp	x4, x6, [%[a], 288]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 304]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 288]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 304]\n\t"
+        "ldp	x5, x7, [%[b], 320]\n\t"
+        "ldp	x11, x12, [%[b], 336]\n\t"
+        "ldp	x4, x6, [%[a], 320]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 336]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 320]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 336]\n\t"
+        "ldp	x5, x7, [%[b], 352]\n\t"
+        "ldp	x11, x12, [%[b], 368]\n\t"
+        "ldp	x4, x6, [%[a], 352]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 368]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 352]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 368]\n\t"
+        "ldp	x5, x7, [%[b], 384]\n\t"
+        "ldp	x11, x12, [%[b], 400]\n\t"
+        "ldp	x4, x6, [%[a], 384]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 400]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 384]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 400]\n\t"
+        "ldp	x5, x7, [%[b], 416]\n\t"
+        "ldp	x11, x12, [%[b], 432]\n\t"
+        "ldp	x4, x6, [%[a], 416]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 432]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 416]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 432]\n\t"
+        "ldp	x5, x7, [%[b], 448]\n\t"
+        "ldp	x11, x12, [%[b], 464]\n\t"
+        "ldp	x4, x6, [%[a], 448]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 464]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 448]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 464]\n\t"
+        "ldp	x5, x7, [%[b], 480]\n\t"
+        "ldp	x11, x12, [%[b], 496]\n\t"
+        "ldp	x4, x6, [%[a], 480]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 496]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 480]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 496]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "ldp	x14, x15, [%[m], 0]\n\t"
+        "ldp	x16, x17, [%[m], 16]\n\t"
+        "ldp	x19, x20, [%[m], 32]\n\t"
+        "ldp	x21, x22, [%[m], 48]\n\t"
+        "ldp	x23, x24, [%[m], 64]\n\t"
+        "ldp	x25, x26, [%[m], 80]\n\t"
+        "ldp	x27, x28, [%[m], 96]\n\t"
+        "# i = 64\n\t"
+        "mov	x4, 64\n\t"
+        "ldp	x12, x13, [%[a], 0]\n\t"
+        "\n1:\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mul	x9, %[mp], x12\n\t"
+        "# a[i+0] += m[0] * mu\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x12, x12, x7\n\t"
+        "# a[i+1] += m[1] * mu\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x12, x13, x7\n\t"
+        "# a[i+2] += m[2] * mu\n\t"
+        "ldr	x13, [%[a], 16]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adds	x12, x12, x6\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x13, x13, x7\n\t"
+        "# a[i+3] += m[3] * mu\n\t"
+        "ldr	x10, [%[a], 24]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adds	x13, x13, x5\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+4] += m[4] * mu\n\t"
+        "ldr	x11, [%[a], 32]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "str	x10, [%[a], 24]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+5] += m[5] * mu\n\t"
+        "ldr	x10, [%[a], 40]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "str	x11, [%[a], 32]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+6] += m[6] * mu\n\t"
+        "ldr	x11, [%[a], 48]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x21, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x21, x9\n\t"
+        "str	x10, [%[a], 40]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+7] += m[7] * mu\n\t"
+        "ldr	x10, [%[a], 56]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x22, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x22, x9\n\t"
+        "str	x11, [%[a], 48]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+8] += m[8] * mu\n\t"
+        "ldr	x11, [%[a], 64]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x23, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x23, x9\n\t"
+        "str	x10, [%[a], 56]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+9] += m[9] * mu\n\t"
+        "ldr	x10, [%[a], 72]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x24, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x24, x9\n\t"
+        "str	x11, [%[a], 64]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+10] += m[10] * mu\n\t"
+        "ldr	x11, [%[a], 80]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x25, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x25, x9\n\t"
+        "str	x10, [%[a], 72]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+11] += m[11] * mu\n\t"
+        "ldr	x10, [%[a], 88]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x26, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x26, x9\n\t"
+        "str	x11, [%[a], 80]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+12] += m[12] * mu\n\t"
+        "ldr	x11, [%[a], 96]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x27, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x27, x9\n\t"
+        "str	x10, [%[a], 88]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+13] += m[13] * mu\n\t"
+        "ldr	x10, [%[a], 104]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x28, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x28, x9\n\t"
+        "str	x11, [%[a], 96]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+14] += m[14] * mu\n\t"
+        "ldr	x11, [%[a], 112]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 112]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 104]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+15] += m[15] * mu\n\t"
+        "ldr	x10, [%[a], 120]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 120]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 112]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+16] += m[16] * mu\n\t"
+        "ldr	x11, [%[a], 128]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 128]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 120]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+17] += m[17] * mu\n\t"
+        "ldr	x10, [%[a], 136]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 136]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 128]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+18] += m[18] * mu\n\t"
+        "ldr	x11, [%[a], 144]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 144]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 136]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+19] += m[19] * mu\n\t"
+        "ldr	x10, [%[a], 152]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 152]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 144]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+20] += m[20] * mu\n\t"
+        "ldr	x11, [%[a], 160]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 160]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 152]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+21] += m[21] * mu\n\t"
+        "ldr	x10, [%[a], 168]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 168]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 160]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+22] += m[22] * mu\n\t"
+        "ldr	x11, [%[a], 176]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 176]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 168]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+23] += m[23] * mu\n\t"
+        "ldr	x10, [%[a], 184]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 184]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 176]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+24] += m[24] * mu\n\t"
+        "ldr	x11, [%[a], 192]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 192]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 184]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+25] += m[25] * mu\n\t"
+        "ldr	x10, [%[a], 200]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 200]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 192]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+26] += m[26] * mu\n\t"
+        "ldr	x11, [%[a], 208]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 208]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 200]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+27] += m[27] * mu\n\t"
+        "ldr	x10, [%[a], 216]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 216]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 208]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+28] += m[28] * mu\n\t"
+        "ldr	x11, [%[a], 224]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 224]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 216]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+29] += m[29] * mu\n\t"
+        "ldr	x10, [%[a], 232]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 232]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 224]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+30] += m[30] * mu\n\t"
+        "ldr	x11, [%[a], 240]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 240]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 232]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+31] += m[31] * mu\n\t"
+        "ldr	x10, [%[a], 248]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 248]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 240]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+32] += m[32] * mu\n\t"
+        "ldr	x11, [%[a], 256]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 256]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 248]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+33] += m[33] * mu\n\t"
+        "ldr	x10, [%[a], 264]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 264]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 256]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+34] += m[34] * mu\n\t"
+        "ldr	x11, [%[a], 272]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 272]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 264]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+35] += m[35] * mu\n\t"
+        "ldr	x10, [%[a], 280]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 280]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 272]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+36] += m[36] * mu\n\t"
+        "ldr	x11, [%[a], 288]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 288]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 280]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+37] += m[37] * mu\n\t"
+        "ldr	x10, [%[a], 296]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 296]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 288]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+38] += m[38] * mu\n\t"
+        "ldr	x11, [%[a], 304]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 304]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 296]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+39] += m[39] * mu\n\t"
+        "ldr	x10, [%[a], 312]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 312]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 304]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+40] += m[40] * mu\n\t"
+        "ldr	x11, [%[a], 320]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 320]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 312]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+41] += m[41] * mu\n\t"
+        "ldr	x10, [%[a], 328]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 328]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 320]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+42] += m[42] * mu\n\t"
+        "ldr	x11, [%[a], 336]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 336]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 328]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+43] += m[43] * mu\n\t"
+        "ldr	x10, [%[a], 344]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 344]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 336]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+44] += m[44] * mu\n\t"
+        "ldr	x11, [%[a], 352]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 352]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 344]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+45] += m[45] * mu\n\t"
+        "ldr	x10, [%[a], 360]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 360]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 352]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+46] += m[46] * mu\n\t"
+        "ldr	x11, [%[a], 368]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 368]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 360]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+47] += m[47] * mu\n\t"
+        "ldr	x10, [%[a], 376]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 376]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 368]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+48] += m[48] * mu\n\t"
+        "ldr	x11, [%[a], 384]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 384]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 376]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+49] += m[49] * mu\n\t"
+        "ldr	x10, [%[a], 392]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 392]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 384]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+50] += m[50] * mu\n\t"
+        "ldr	x11, [%[a], 400]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 400]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 392]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+51] += m[51] * mu\n\t"
+        "ldr	x10, [%[a], 408]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 408]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 400]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+52] += m[52] * mu\n\t"
+        "ldr	x11, [%[a], 416]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 416]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 408]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+53] += m[53] * mu\n\t"
+        "ldr	x10, [%[a], 424]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 424]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 416]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+54] += m[54] * mu\n\t"
+        "ldr	x11, [%[a], 432]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 432]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 424]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+55] += m[55] * mu\n\t"
+        "ldr	x10, [%[a], 440]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 440]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 432]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+56] += m[56] * mu\n\t"
+        "ldr	x11, [%[a], 448]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 448]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 440]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+57] += m[57] * mu\n\t"
+        "ldr	x10, [%[a], 456]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 456]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 448]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+58] += m[58] * mu\n\t"
+        "ldr	x11, [%[a], 464]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 464]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 456]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+59] += m[59] * mu\n\t"
+        "ldr	x10, [%[a], 472]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 472]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 464]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+60] += m[60] * mu\n\t"
+        "ldr	x11, [%[a], 480]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 480]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 472]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+61] += m[61] * mu\n\t"
+        "ldr	x10, [%[a], 488]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 488]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x11, [%[a], 480]\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+62] += m[62] * mu\n\t"
+        "ldr	x11, [%[a], 496]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "ldr	x8, [%[m], 496]\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "str	x10, [%[a], 488]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+63] += m[63] * mu\n\t"
+        "ldr	x10, [%[a], 504]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "ldr	x8, [%[m], 504]\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x8, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x8, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x8, x8, %[ca]\n\t"
+        "str	x11, [%[a], 496]\n\t"
+        "cset  %[ca], cs\n\t"
+        "adds	x10, x10, x6\n\t"
+        "ldr	x11, [%[a], 512]\n\t"
+        "str	x10, [%[a], 504]\n\t"
+        "adcs	x11, x11, x8\n\t"
+        "str	x11, [%[a], 512]\n\t"
+        "adc	%[ca], %[ca], xzr\n\t"
+        "subs	x4, x4, 1\n\t"
+        "add	%[a], %[a], 8\n\t"
+        "bne	1b\n\t"
+        "stp	x12, x13, [%[a], 0]\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
+    );
+
+    sp_4096_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_64(r, a, b);
+    sp_4096_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_64(r, a);
+    sp_4096_mont_reduce_64(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+{
+    sp_digit r;
+
+    __asm__ __volatile__ (
+        "lsr	x5, %[div], 32\n\t"
+        "add	x5, x5, 1\n\t"
+
+        "udiv	x3, %[d1], x5\n\t"
+        "lsl	x6, x3, 32\n\t"
+        "mul	x4, %[div], x6\n\t"
+        "umulh	x3, %[div], x6\n\t"
+        "subs	%[d0], %[d0], x4\n\t"
+        "sbc	%[d1], %[d1], x3\n\t"
+
+        "udiv	x3, %[d1], x5\n\t"
+        "lsl	x3, x3, 32\n\t"
+        "add	x6, x6, x3\n\t"
+        "mul	x4, %[div], x3\n\t"
+        "umulh	x3, %[div], x3\n\t"
+        "subs	%[d0], %[d0], x4\n\t"
+        "sbc	%[d1], %[d1], x3\n\t"
+
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
+
+        "udiv	x3, x3, x5\n\t"
+        "add	x6, x6, x3\n\t"
+        "mul	x4, %[div], x3\n\t"
+        "umulh	x3, %[div], x3\n\t"
+        "subs	%[d0], %[d0], x4\n\t"
+        "sbc	%[d1], %[d1], x3\n\t"
+
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
+
+        "udiv	x3, x3, x5\n\t"
+        "add	x6, x6, x3\n\t"
+        "mul	x4, %[div], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
+
+        "udiv	x3, %[d0], %[div]\n\t"
+        "add	%[r], x6, x3\n\t"
+
+        : [r] "=r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "x3", "x4", "x5", "x6"
+    );
+
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<64; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 504\n\t"
+        "1:\n\t"
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov  x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 496]\n\t"
+        "ldp	x7, x8, [%[b], 496]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 480]\n\t"
+        "ldp	x7, x8, [%[b], 480]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 464]\n\t"
+        "ldp	x7, x8, [%[b], 464]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 448]\n\t"
+        "ldp	x7, x8, [%[b], 448]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 432]\n\t"
+        "ldp	x7, x8, [%[b], 432]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 416]\n\t"
+        "ldp	x7, x8, [%[b], 416]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 400]\n\t"
+        "ldp	x7, x8, [%[b], 400]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 384]\n\t"
+        "ldp	x7, x8, [%[b], 384]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 368]\n\t"
+        "ldp	x7, x8, [%[b], 368]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 352]\n\t"
+        "ldp	x7, x8, [%[b], 352]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 336]\n\t"
+        "ldp	x7, x8, [%[b], 336]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 320]\n\t"
+        "ldp	x7, x8, [%[b], 320]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 304]\n\t"
+        "ldp	x7, x8, [%[b], 304]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 288]\n\t"
+        "ldp	x7, x8, [%[b], 288]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 272]\n\t"
+        "ldp	x7, x8, [%[b], 272]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 256]\n\t"
+        "ldp	x7, x8, [%[b], 256]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "ldp	x7, x8, [%[b], 240]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "ldp	x7, x8, [%[b], 208]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "ldp	x7, x8, [%[b], 176]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "ldp	x7, x8, [%[b], 144]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "ldp	x7, x8, [%[b], 112]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "ldp	x7, x8, [%[b], 80]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "ldp	x7, x8, [%[b], 48]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "ldp	x7, x8, [%[b], 16]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x8, x8, x4\n\t"
+        "subs	x6, x6, x8\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x5, x5, x7\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+    );
+#endif
+
+    return (int64_t)a;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[128], t2[65];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[63];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+    for (i=63; i>=0; i--) {
+        r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+        sp_4096_mul_d_64(t2, d, r1);
+        t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
+        t1[64 + i] -= t2[64];
+        sp_4096_mask_64(t2, d, t1[64 + i]);
+        t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2);
+        sp_4096_mask_64(t2, d, t1[64 + i]);
+        t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_4096_cmp_64(t1, d) >= 0;
+    sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_64(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "add	x11, %[a], 512\n\t"
+        "\n1:\n\t"
+        "subs	%[c], xzr, %[c]\n\t"
+        "ldp	x3, x4, [%[a]], #16\n\t"
+        "ldp	x5, x6, [%[a]], #16\n\t"
+        "ldp	x7, x8, [%[b]], #16\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x9, x10, [%[b]], #16\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r]], #16\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r]], #16\n\t"
+        "csetm	%[c], cc\n\t"
+        "cmp	%[a], x11\n\t"
+        "b.ne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "subs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x3, x4, [%[a], 32]\n\t"
+        "ldp	x7, x8, [%[b], 32]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 48]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 48]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 32]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 48]\n\t"
+        "ldp	x3, x4, [%[a], 64]\n\t"
+        "ldp	x7, x8, [%[b], 64]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 80]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 80]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 64]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x3, x4, [%[a], 96]\n\t"
+        "ldp	x7, x8, [%[b], 96]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 112]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 112]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 96]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 112]\n\t"
+        "ldp	x3, x4, [%[a], 128]\n\t"
+        "ldp	x7, x8, [%[b], 128]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 144]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 144]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 128]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 144]\n\t"
+        "ldp	x3, x4, [%[a], 160]\n\t"
+        "ldp	x7, x8, [%[b], 160]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 176]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 176]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 160]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 176]\n\t"
+        "ldp	x3, x4, [%[a], 192]\n\t"
+        "ldp	x7, x8, [%[b], 192]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 208]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 208]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 192]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 208]\n\t"
+        "ldp	x3, x4, [%[a], 224]\n\t"
+        "ldp	x7, x8, [%[b], 224]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 240]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 240]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 224]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 240]\n\t"
+        "ldp	x3, x4, [%[a], 256]\n\t"
+        "ldp	x7, x8, [%[b], 256]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 272]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 272]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 256]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 272]\n\t"
+        "ldp	x3, x4, [%[a], 288]\n\t"
+        "ldp	x7, x8, [%[b], 288]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 304]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 304]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 288]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 304]\n\t"
+        "ldp	x3, x4, [%[a], 320]\n\t"
+        "ldp	x7, x8, [%[b], 320]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 336]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 336]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 320]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 336]\n\t"
+        "ldp	x3, x4, [%[a], 352]\n\t"
+        "ldp	x7, x8, [%[b], 352]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 368]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 368]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 352]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 368]\n\t"
+        "ldp	x3, x4, [%[a], 384]\n\t"
+        "ldp	x7, x8, [%[b], 384]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 400]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 400]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 384]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 400]\n\t"
+        "ldp	x3, x4, [%[a], 416]\n\t"
+        "ldp	x7, x8, [%[b], 416]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 432]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 432]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 416]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 432]\n\t"
+        "ldp	x3, x4, [%[a], 448]\n\t"
+        "ldp	x7, x8, [%[b], 448]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 464]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 464]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 448]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 464]\n\t"
+        "ldp	x3, x4, [%[a], 480]\n\t"
+        "ldp	x7, x8, [%[b], 480]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 496]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 496]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 480]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 496]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[128], t2[65];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[63];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+    for (i=63; i>=0; i--) {
+        r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+        sp_4096_mul_d_64(t2, d, r1);
+        t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
+        t1[64 + i] -= t2[64];
+        if (t1[64 + i] != 0) {
+            t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d);
+            if (t1[64 + i] != 0)
+                t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d);
+        }
+    }
+
+    for (i = 63; i > 0; i--) {
+        if (t1[i] != d[i])
+            break;
+    }
+    if (t1[i] >= d[i]) {
+        sp_4096_sub_64(r, t1, d);
+    }
+    else {
+        XMEMCPY(r, t1, sizeof(*t1) * 64);
+    }
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][128];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 128;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_64(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_64(t[1] + 64, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_64(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+            err = sp_4096_mod_64(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_64(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_64(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_64(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 64;
+        n = e[i--];
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 4;
+        if (c == 64) {
+            c = 60;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 60;
+                n <<= 4;
+                c = 60;
+            }
+            else if (c < 4) {
+                y = n >> 60;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (64 - c);
+                n <<= c;
+                c = 64 - c;
+            }
+            else {
+                y = (n >> 60) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+
+            sp_4096_mont_mul_64(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_4096_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+        sp_4096_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][128];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 128;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_64(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_64(t[1] + 64, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_64(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+            err = sp_4096_mod_64(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_64(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_64(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_64(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_64(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_64(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_64(t[20], t[10], m, mp);
+        sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_64(t[22], t[11], m, mp);
+        sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_64(t[24], t[12], m, mp);
+        sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_64(t[26], t[13], m, mp);
+        sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_64(t[28], t[14], m, mp);
+        sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_64(t[30], t[15], m, mp);
+        sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 64;
+        n = e[i--];
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 5;
+        if (c == 64) {
+            c = 59;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 59;
+                n <<= 5;
+                c = 59;
+            }
+            else if (c < 5) {
+                y = n >> 59;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (64 - c);
+                n <<= c;
+                c = 64 - c;
+            }
+            else {
+                y = (n >> 59) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+
+            sp_4096_mont_mul_64(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_4096_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+        sp_4096_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128], m[64], r[128];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 ||
+                                                     mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 64 * 2;
+        m = r + 64 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 64;
+
+        sp_4096_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 64
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 64, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_4096_sqr_64(r, ah);
+                err = sp_4096_mod_64_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_4096_mul_64(r, ah, r);
+                err = sp_4096_mod_64_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_4096_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 64);
+            err = sp_4096_mod_64_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 63; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 64);
+                for (i--; i>=0; i--) {
+                    sp_4096_mont_sqr_64(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_4096_mont_mul_64(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+                sp_4096_mont_reduce_64(r, m, mp);
+
+                for (i = 63; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_4096_sub_in_place_64(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+           err = MP_READ_E;
+        }
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 64;
+        m = a + 128;
+        r = a;
+
+        sp_4096_from_bin(a, 64, in, inLen);
+        sp_4096_from_mp(d, 64, dm);
+        sp_4096_from_mp(m, 64, mm);
+        err = sp_4096_mod_exp_64(r, a, d, 4096, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 64);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	x8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	x4, [%[a], x8]\n\t"
+        "ldr	x5, [%[b], x8]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "cset	%[c], cs\n\t"
+        "str	x4, [%[r], x8]\n\t"
+        "add	x8, x8, #8\n\t"
+        "cmp	x8, 256\n\t"
+        "b.lt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adds	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x11, x12, [%[b], 48]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 48]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 48]\n\t"
+        "ldp	x5, x7, [%[b], 64]\n\t"
+        "ldp	x11, x12, [%[b], 80]\n\t"
+        "ldp	x4, x6, [%[a], 64]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 80]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 64]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 80]\n\t"
+        "ldp	x5, x7, [%[b], 96]\n\t"
+        "ldp	x11, x12, [%[b], 112]\n\t"
+        "ldp	x4, x6, [%[a], 96]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 112]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 96]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 112]\n\t"
+        "ldp	x5, x7, [%[b], 128]\n\t"
+        "ldp	x11, x12, [%[b], 144]\n\t"
+        "ldp	x4, x6, [%[a], 128]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 144]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 128]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 144]\n\t"
+        "ldp	x5, x7, [%[b], 160]\n\t"
+        "ldp	x11, x12, [%[b], 176]\n\t"
+        "ldp	x4, x6, [%[a], 160]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 176]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 160]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 176]\n\t"
+        "ldp	x5, x7, [%[b], 192]\n\t"
+        "ldp	x11, x12, [%[b], 208]\n\t"
+        "ldp	x4, x6, [%[a], 192]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 208]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 192]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 208]\n\t"
+        "ldp	x5, x7, [%[b], 224]\n\t"
+        "ldp	x11, x12, [%[b], 240]\n\t"
+        "ldp	x4, x6, [%[a], 224]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 240]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 224]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 240]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[64 * 2];
+    sp_digit p[32], q[32], dp[32];
+    sp_digit tmpa[64], tmpb[64];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 64 * 2;
+        q = p + 32;
+        qi = dq = dp = q + 32;
+        tmpa = qi + 32;
+        tmpb = tmpa + 64;
+
+        r = t + 64;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_4096_from_bin(a, 64, in, inLen);
+        sp_4096_from_mp(p, 32, pm);
+        sp_4096_from_mp(q, 32, qm);
+        sp_4096_from_mp(dp, 32, dpm);
+
+        err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(dq, 32, dqm);
+        err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_2048_sub_in_place_32(tmpa, tmpb);
+        c += sp_4096_cond_add_32(tmpa, tmpa, p, c);
+        sp_4096_cond_add_32(tmpa, tmpa, p, c);
+
+        sp_2048_from_mp(qi, 32, qim);
+        sp_2048_mul_32(tmpa, tmpa, qi);
+        err = sp_2048_mod_32(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mul_32(tmpa, q, tmpa);
+        XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+        sp_4096_add_64(r, tmpb, tmpa);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+        r->used = 64;
+        mp_clamp(r);
+#elif DIGIT_BIT < 64
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 64; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 64) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 64 - s;
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 64; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 64 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 64 - s;
+            }
+            else {
+                s += 64;
+            }
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[128], e[64], m[64];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 64, base);
+        sp_4096_from_mp(e, 64, exp);
+        sp_4096_from_mp(m, 64, mod);
+
+        err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	x6, 63\n\t"
+        "sub	x6, x6, %[n]\n\t"
+        "ldr	x3, [%[a], 504]\n\t"
+        "lsr	x4, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x4, x4, x6\n\t"
+        "ldr	x2, [%[a], 496]\n\t"
+        "str	x4, [%[r], 512]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 488]\n\t"
+        "str	x3, [%[r], 504]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 480]\n\t"
+        "str	x2, [%[r], 496]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 472]\n\t"
+        "str	x4, [%[r], 488]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 464]\n\t"
+        "str	x3, [%[r], 480]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 456]\n\t"
+        "str	x2, [%[r], 472]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 448]\n\t"
+        "str	x4, [%[r], 464]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 440]\n\t"
+        "str	x3, [%[r], 456]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 432]\n\t"
+        "str	x2, [%[r], 448]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 424]\n\t"
+        "str	x4, [%[r], 440]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 416]\n\t"
+        "str	x3, [%[r], 432]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 408]\n\t"
+        "str	x2, [%[r], 424]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 400]\n\t"
+        "str	x4, [%[r], 416]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 392]\n\t"
+        "str	x3, [%[r], 408]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 384]\n\t"
+        "str	x2, [%[r], 400]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 376]\n\t"
+        "str	x4, [%[r], 392]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 368]\n\t"
+        "str	x3, [%[r], 384]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 360]\n\t"
+        "str	x2, [%[r], 376]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 352]\n\t"
+        "str	x4, [%[r], 368]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 344]\n\t"
+        "str	x3, [%[r], 360]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 336]\n\t"
+        "str	x2, [%[r], 352]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 328]\n\t"
+        "str	x4, [%[r], 344]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 320]\n\t"
+        "str	x3, [%[r], 336]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 312]\n\t"
+        "str	x2, [%[r], 328]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 304]\n\t"
+        "str	x4, [%[r], 320]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 296]\n\t"
+        "str	x3, [%[r], 312]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 288]\n\t"
+        "str	x2, [%[r], 304]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 280]\n\t"
+        "str	x4, [%[r], 296]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 272]\n\t"
+        "str	x3, [%[r], 288]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 264]\n\t"
+        "str	x2, [%[r], 280]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 256]\n\t"
+        "str	x4, [%[r], 272]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 248]\n\t"
+        "str	x3, [%[r], 264]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 240]\n\t"
+        "str	x2, [%[r], 256]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 232]\n\t"
+        "str	x4, [%[r], 248]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 224]\n\t"
+        "str	x3, [%[r], 240]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 216]\n\t"
+        "str	x2, [%[r], 232]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 208]\n\t"
+        "str	x4, [%[r], 224]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 200]\n\t"
+        "str	x3, [%[r], 216]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 192]\n\t"
+        "str	x2, [%[r], 208]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 184]\n\t"
+        "str	x4, [%[r], 200]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 176]\n\t"
+        "str	x3, [%[r], 192]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 168]\n\t"
+        "str	x2, [%[r], 184]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 160]\n\t"
+        "str	x4, [%[r], 176]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 152]\n\t"
+        "str	x3, [%[r], 168]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 144]\n\t"
+        "str	x2, [%[r], 160]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 136]\n\t"
+        "str	x4, [%[r], 152]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 128]\n\t"
+        "str	x3, [%[r], 144]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 120]\n\t"
+        "str	x2, [%[r], 136]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 112]\n\t"
+        "str	x4, [%[r], 128]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 104]\n\t"
+        "str	x3, [%[r], 120]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 96]\n\t"
+        "str	x2, [%[r], 112]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 88]\n\t"
+        "str	x4, [%[r], 104]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 80]\n\t"
+        "str	x3, [%[r], 96]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 72]\n\t"
+        "str	x2, [%[r], 88]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 64]\n\t"
+        "str	x4, [%[r], 80]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 56]\n\t"
+        "str	x3, [%[r], 72]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 48]\n\t"
+        "str	x2, [%[r], 64]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 40]\n\t"
+        "str	x4, [%[r], 56]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 32]\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 24]\n\t"
+        "str	x2, [%[r], 40]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "ldr	x2, [%[a], 16]\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "lsr	x5, x2, 1\n\t"
+        "lsl	x2, x2, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x3, x3, x5\n\t"
+        "ldr	x4, [%[a], 8]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "lsr	x5, x4, 1\n\t"
+        "lsl	x4, x4, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x2, x2, x5\n\t"
+        "ldr	x3, [%[a], 0]\n\t"
+        "str	x2, [%[r], 16]\n\t"
+        "lsr	x5, x3, 1\n\t"
+        "lsl	x3, x3, %[n]\n\t"
+        "lsr	x5, x5, x6\n\t"
+        "orr	x4, x4, x5\n\t"
+        "str	x3, [%[r]]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "x2", "x3", "x4", "x5", "x6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[128];
+    sp_digit td[65];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 128;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_64(norm, m);
+
+        i = (bits - 1) / 64;
+        n = e[i--];
+        c = bits & 63;
+        if (c == 0) {
+            c = 64;
+        }
+        c -= bits % 6;
+        if (c == 64) {
+            c = 58;
+        }
+        y = (int)(n >> c);
+        n <<= 64 - c;
+        sp_4096_lshift_64(r, norm, y);
+        for (; i>=0 || c>=6; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 58;
+                n <<= 6;
+                c = 58;
+            }
+            else if (c < 6) {
+                y = n >> 58;
+                n = e[i--];
+                c = 6 - c;
+                y |= n >> (64 - c);
+                n <<= c;
+                c = 64 - c;
+            }
+            else {
+                y = (n >> 58) & 0x3f;
+                n <<= 6;
+                c -= 6;
+            }
+
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+            sp_4096_mont_sqr_64(r, r, m, mp);
+
+            sp_4096_lshift_64(r, r, y);
+            sp_4096_mul_d_64(tmp, norm, r[64]);
+            r[64] = 0;
+            o = sp_4096_add_64(r, r, tmp);
+            sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_4096_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+        sp_4096_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 512 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[128], e[64], m[64];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 64, base);
+        sp_4096_from_bin(e, 64, exp, expLen);
+        sp_4096_from_mp(m, 64, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+            err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
 
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 #ifdef WOLFSSL_HAVE_SP_ECC
 #ifndef WOLFSSL_SP_NO_256
 
 /* Point structure to use. */
-typedef struct sp_point {
+typedef struct sp_point_256 {
     sp_digit x[2 * 4];
     sp_digit y[2 * 4];
     sp_digit z[2 * 4];
     int infinity;
-} sp_point;
+} sp_point_256;
 
 /* The modulus (prime) of the curve P256. */
-static sp_digit p256_mod[4] = {
-    0xffffffffffffffffl,0x00000000ffffffffl,0x0000000000000000l,
-    0xffffffff00000001l
+static const sp_digit p256_mod[4] = {
+    0xffffffffffffffffL,0x00000000ffffffffL,0x0000000000000000L,
+    0xffffffff00000001L
 };
 /* The Montogmery normalizer for modulus of the curve P256. */
-static sp_digit p256_norm_mod[4] = {
-    0x0000000000000001l,0xffffffff00000000l,0xffffffffffffffffl,
-    0x00000000fffffffel
+static const sp_digit p256_norm_mod[4] = {
+    0x0000000000000001L,0xffffffff00000000L,0xffffffffffffffffL,
+    0x00000000fffffffeL
 };
 /* The Montogmery multiplier for modulus of the curve P256. */
-static sp_digit p256_mp_mod = 0x0000000000000001;
+static const sp_digit p256_mp_mod = 0x0000000000000001;
 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
                                             defined(HAVE_ECC_VERIFY)
 /* The order of the curve P256. */
-static sp_digit p256_order[4] = {
-    0xf3b9cac2fc632551l,0xbce6faada7179e84l,0xffffffffffffffffl,
-    0xffffffff00000000l
+static const sp_digit p256_order[4] = {
+    0xf3b9cac2fc632551L,0xbce6faada7179e84L,0xffffffffffffffffL,
+    0xffffffff00000000L
 };
 #endif
 /* The order of the curve P256 minus 2. */
-static sp_digit p256_order2[4] = {
-    0xf3b9cac2fc63254fl,0xbce6faada7179e84l,0xffffffffffffffffl,
-    0xffffffff00000000l
+static const sp_digit p256_order2[4] = {
+    0xf3b9cac2fc63254fL,0xbce6faada7179e84L,0xffffffffffffffffL,
+    0xffffffff00000000L
 };
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* The Montogmery normalizer for order of the curve P256. */
-static sp_digit p256_norm_order[4] = {
-    0x0c46353d039cdaafl,0x4319055258e8617bl,0x0000000000000000l,
-    0x00000000ffffffffl
+static const sp_digit p256_norm_order[4] = {
+    0x0c46353d039cdaafL,0x4319055258e8617bL,0x0000000000000000L,
+    0x00000000ffffffffL
 };
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* The Montogmery multiplier for order of the curve P256. */
-static sp_digit p256_mp_order = 0xccd1c8aaee00bc4fl;
+static const sp_digit p256_mp_order = 0xccd1c8aaee00bc4fL;
 #endif
 #ifdef WOLFSSL_SP_SMALL
 /* The base point of curve P256. */
-static sp_point p256_base = {
+static const sp_point_256 p256_base = {
     /* X ordinate */
     {
-        0xf4a13945d898c296l,0x77037d812deb33a0l,0xf8bce6e563a440f2l,
-        0x6b17d1f2e12c4247l
+        0xf4a13945d898c296L,0x77037d812deb33a0L,0xf8bce6e563a440f2L,
+        0x6b17d1f2e12c4247L,
+        0L, 0L, 0L, 0L
     },
     /* Y ordinate */
     {
-        0xcbb6406837bf51f5l,0x2bce33576b315ecel,0x8ee7eb4a7c0f9e16l,
-        0x4fe342e2fe1a7f9bl
+        0xcbb6406837bf51f5L,0x2bce33576b315eceL,0x8ee7eb4a7c0f9e16L,
+        0x4fe342e2fe1a7f9bL,
+        0L, 0L, 0L, 0L
     },
     /* Z ordinate */
     {
-        0x0000000000000001l,0x0000000000000000l,0x0000000000000000l,
-        0x0000000000000000l
+        0x0000000000000001L,0x0000000000000000L,0x0000000000000000L,
+        0x0000000000000000L,
+        0L, 0L, 0L, 0L
     },
     /* infinity */
     0
 };
 #endif /* WOLFSSL_SP_SMALL */
 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
-static sp_digit p256_b[4] = {
-    0x3bce3c3e27d2604bl,0x651d06b0cc53b0f6l,0xb3ebbd55769886bcl,
-    0x5ac635d8aa3a93e7l
+static const sp_digit p256_b[4] = {
+    0x3bce3c3e27d2604bL,0x651d06b0cc53b0f6L,0xb3ebbd55769886bcL,
+    0x5ac635d8aa3a93e7L
 };
 #endif
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
 /* Allocate memory for point and return error. */
-#define sp_ecc_point_new(heap, sp, p)                                   \
-    ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
-        MEMORY_E : MP_OKAY
+#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), NULL, &(p))
 #else
 /* Set pointer to data and return no error. */
-#define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_4(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
 /* If valid pointer then clear point data if requested and free data. */
-#define sp_ecc_point_free(p, clear, heap)     \
-    do {                                      \
-        if (p != NULL) {                      \
-            if (clear)                        \
-                XMEMSET(p, 0, sizeof(*p));    \
-            XFREE(p, heap, DYNAMIC_TYPE_ECC); \
-        }                                     \
-    }                                         \
-    while (0)
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
 #else
 /* Clear point data if requested. */
-#define sp_ecc_point_free(p, clear, heap) \
-    do {                                  \
-        if (clear)                        \
-            XMEMSET(p, 0, sizeof(*p));    \
-    }                                     \
-    while (0)
-#endif
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
 
 /* Multiply a number by Montogmery normalizer mod modulus (prime).
  *
@@ -12154,7 +18787,7 @@
  * a  The number to convert.
  * m  The modulus (prime).
  */
-static int sp_256_mod_mul_norm_4(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     int64_t t[8];
     int64_t a32[8];
@@ -12218,53 +18851,64 @@
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 64
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 64
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0xffffffffffffffffl;
-        s = 64 - s;
-        if (j + 1 >= max)
+        s = 64U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 64 <= DIGIT_BIT) {
-            s += 64;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 64U) <= (word32)DIGIT_BIT) {
+            s += 64U;
             r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 64) {
             r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
+            if (j + 1 >= size) {
                 break;
+            }
             s = 64 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -12275,21 +18919,23 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Convert a point of type ecc_point to type sp_point.
- *
- * p   Point of type sp_point (result).
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p   Point of type sp_point_256 (result).
  * pm  Point of type ecc_point.
  */
-static void sp_256_point_from_ecc_point_4(sp_point* p, ecc_point* pm)
+static void sp_256_point_from_ecc_point_4(sp_point_256* p, const ecc_point* pm)
 {
     XMEMSET(p->x, 0, sizeof(p->x));
     XMEMSET(p->y, 0, sizeof(p->y));
@@ -12305,12 +18951,12 @@
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_256_to_mp(sp_digit* a, mp_int* r)
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 64
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 4);
         r->used = 4;
@@ -12320,14 +18966,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 4; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 64) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 64 - s;
         }
@@ -12340,15 +18991,16 @@
         for (i = 0; i < 4; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 64 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 64 - s;
             }
-            else
+            else {
                 s += 64;
+            }
         }
         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -12358,22 +19010,24 @@
     return err;
 }
 
-/* Convert a point of type sp_point to type ecc_point.
- *
- * p   Point of type sp_point.
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p   Point of type sp_point_256.
  * pm  Point of type ecc_point (result).
  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
  * MP_OKAY.
  */
-static int sp_256_point_to_ecc_point_4(sp_point* p, ecc_point* pm)
+static int sp_256_point_to_ecc_point_4(const sp_point_256* p, ecc_point* pm)
 {
     int err;
 
     err = sp_256_to_mp(p->x, pm->x);
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pm->y);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pm->z);
+    }
 
     return err;
 }
@@ -12385,15 +19039,15 @@
  * a  A single precision number to copy.
  * m  Mask value to apply.
  */
-static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, const sp_digit m)
+static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m)
 {
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[r], 0]\n\t"
+        "ldp	x7, x8, [%[a], 0]\n\t"
+        "eor	x7, x7, x3\n\t"
         "ldp	x5, x6, [%[r], 16]\n\t"
-        "ldp	x7, x8, [%[a], 0]\n\t"
+        "eor	x8, x8, x4\n\t"
         "ldp	x9, x10, [%[a], 16]\n\t"
-        "eor	x7, x7, x3\n\t"
-        "eor	x8, x8, x4\n\t"
         "eor	x9, x9, x5\n\t"
         "eor	x10, x10, x6\n\t"
         "and	x7, x7, %[m]\n\t"
@@ -12403,8 +19057,8 @@
         "eor	x3, x3, x7\n\t"
         "eor	x4, x4, x8\n\t"
         "eor	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
         "eor	x6, x6, x10\n\t"
-        "stp	x3, x4, [%[r], 0]\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
@@ -12412,6 +19066,445 @@
     );
 }
 
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    (void)m;
+    (void)mp;
+
+    __asm__ __volatile__ (
+        "ldp       x16, x17, [%[a], 0]\n\t"
+        "ldp       x21, x22, [%[b], 0]\n\t"
+        "#  A[0] * B[0]\n\t"
+        "mul       x8, x16, x21\n\t"
+        "ldr       x19, [%[a], 16]\n\t"
+        "umulh     x9, x16, x21\n\t"
+        "ldr       x23, [%[b], 16]\n\t"
+        "#  A[0] * B[1]\n\t"
+        "mul       x4, x16, x22\n\t"
+        "ldr       x20, [%[a], 24]\n\t"
+        "umulh     x5, x16, x22\n\t"
+        "ldr       x24, [%[b], 24]\n\t"
+        "adds  x9, x9, x4\n\t"
+        "#  A[1] * B[0]\n\t"
+        "mul       x4, x17, x21\n\t"
+        "adc   x10, xzr, x5\n\t"
+        "umulh     x5, x17, x21\n\t"
+        "adds  x9, x9, x4\n\t"
+        "#  A[0] * B[2]\n\t"
+        "mul       x4, x16, x23\n\t"
+        "adcs   x10, x10, x5\n\t"
+        "umulh     x5, x16, x23\n\t"
+        "adc     x11, xzr, xzr\n\t"
+        "adds  x10, x10, x4\n\t"
+        "#  A[1] * B[1]\n\t"
+        "mul       x4, x17, x22\n\t"
+        "adc   x11, x11, x5\n\t"
+        "umulh     x5, x17, x22\n\t"
+        "adds  x10, x10, x4\n\t"
+        "#  A[2] * B[0]\n\t"
+        "mul       x4, x19, x21\n\t"
+        "adcs   x11, x11, x5\n\t"
+        "umulh     x5, x19, x21\n\t"
+        "adc     x12, xzr, xzr\n\t"
+        "adds  x10, x10, x4\n\t"
+        "#  A[0] * B[3]\n\t"
+        "mul       x4, x16, x24\n\t"
+        "adcs   x11, x11, x5\n\t"
+        "umulh     x5, x16, x24\n\t"
+        "adc     x12, x12, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
+        "#  A[1] * B[2]\n\t"
+        "mul       x4, x17, x23\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x17, x23\n\t"
+        "adc     x13, xzr, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
+        "#  A[2] * B[1]\n\t"
+        "mul       x4, x19, x22\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x19, x22\n\t"
+        "adc     x13, x13, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
+        "#  A[3] * B[0]\n\t"
+        "mul       x4, x20, x21\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x20, x21\n\t"
+        "adc     x13, x13, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
+        "#  A[1] * B[3]\n\t"
+        "mul       x4, x17, x24\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x17, x24\n\t"
+        "adc     x13, x13, xzr\n\t"
+        "adds  x12, x12, x4\n\t"
+        "#  A[2] * B[2]\n\t"
+        "mul       x4, x19, x23\n\t"
+        "adcs   x13, x13, x5\n\t"
+        "umulh     x5, x19, x23\n\t"
+        "adc     x14, xzr, xzr\n\t"
+        "adds  x12, x12, x4\n\t"
+        "#  A[3] * B[1]\n\t"
+        "mul       x4, x20, x22\n\t"
+        "adcs   x13, x13, x5\n\t"
+        "umulh     x5, x20, x22\n\t"
+        "adc     x14, x14, xzr\n\t"
+        "adds  x12, x12, x4\n\t"
+        "#  A[2] * B[3]\n\t"
+        "mul       x4, x19, x24\n\t"
+        "adcs   x13, x13, x5\n\t"
+        "umulh     x5, x19, x24\n\t"
+        "adc     x14, x14, xzr\n\t"
+        "adds  x13, x13, x4\n\t"
+        "#  A[3] * B[2]\n\t"
+        "mul       x4, x20, x23\n\t"
+        "adcs   x14, x14, x5\n\t"
+        "umulh     x5, x20, x23\n\t"
+        "adc     x15, xzr, xzr\n\t"
+        "adds  x13, x13, x4\n\t"
+        "#  A[3] * B[3]\n\t"
+        "mul       x4, x20, x24\n\t"
+        "adcs   x14, x14, x5\n\t"
+        "umulh     x5, x20, x24\n\t"
+        "adc     x15, x15, xzr\n\t"
+        "adds  x14, x14, x4\n\t"
+        "mov	x4, x8\n\t"
+        "adc   x15, x15, x5\n\t"
+        "# Start Reduction\n\t"
+        "mov	x5, x9\n\t"
+        "mov	x6, x10\n\t"
+        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
+        "#    - a[0] << 32 << 192\n\t"
+        "#   + (a[0] * 2) << 192\n\t"
+        "#   a[0]-a[2] << 32\n\t"
+        "lsl	x10, x10, 32\n\t"
+        "add	x7, x11, x8\n\t"
+        "eor	x10, x10, x9, lsr #32\n\t"
+        "lsl	x9, x9, 32\n\t"
+        "add	x7, x7, x8\n\t"
+        "eor	x9, x9, x8, lsr #32\n\t"
+        "#   + a[0]-a[2] << 32 << 64\n\t"
+        "#   - a[0] << 32 << 192\n\t"
+        "adds	x5, x5, x8, lsl #32\n\t"
+        "sub	x7, x7, x8, lsl #32\n\t"
+        "adcs	x6, x6, x9\n\t"
+        "adc	x7, x7, x10\n\t"
+        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
+        "#   a += mu << 256\n\t"
+        "adds	x12, x12, x4\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "adcs	x14, x14, x6\n\t"
+        "adcs	x15, x15, x7\n\t"
+        "cset	x8, cs\n\t"
+        "#   a += mu << 192\n\t"
+        "# mu <<= 32\n\t"
+        "#   a += (mu << 32) << 64\n\t"
+        "adds	x11, x11, x4\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "adcs	x13, x13, x6\n\t"
+        "lsr	x16, x7, 32\n\t"
+        "adcs	x14, x14, x7\n\t"
+        "lsl	x7, x7, 32\n\t"
+        "adcs	x15, x15, xzr\n\t"
+        "eor	x7, x7, x6, lsr #32\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "lsl	x6, x6, 32\n\t"
+        "eor	x6, x6, x5, lsr #32\n\t"
+        "adds	x11, x11, x6\n\t"
+        "lsl	x5, x5, 32\n\t"
+        "adcs	x12, x12, x7\n\t"
+        "eor	x5, x5, x4, lsr #32\n\t"
+        "adcs	x13, x13, x16\n\t"
+        "lsl	x4, x4, 32\n\t"
+        "adcs	x14, x14, xzr\n\t"
+        "adcs	x15, x15, xzr\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "#   a -= (mu << 32) << 192\n\t"
+        "subs	x11, x11, x4\n\t"
+        "sbcs	x12, x12, x5\n\t"
+        "sbcs	x13, x13, x6\n\t"
+        "sub	x8, xzr, x8\n\t"
+        "sbcs	x14, x14, x7\n\t"
+        "sub	x8, x8, #1\n\t"
+        "sbcs	x15, x15, x16\n\t"
+        "mov	x19, 0xffffffff00000001\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "# mask m and sub from result if overflow\n\t"
+        "#  m[0] = -1 & mask = mask\n\t"
+        "subs	x12, x12, x8\n\t"
+        "#  m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t"
+        "lsr	x17, x8, 32\n\t"
+        "sbcs	x13, x13, x17\n\t"
+        "and	x19, x19, x8\n\t"
+        "#  m[2] =  0 & mask = 0\n\t"
+        "sbcs	x14, x14, xzr\n\t"
+        "stp	x12, x13, [%[r], 0]\n\t"
+        "#  m[3] =  0xffffffff00000001 & mask\n\t"
+        "sbc	x15, x15, x19\n\t"
+        "stp	x14, x15, [%[r], 16]\n\t"
+        : [a] "+r" (a), [b] "+r" (b)
+        : [r] "r" (r)
+        : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15"
+    );
+}
+
+/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    (void)m;
+    (void)mp;
+
+    __asm__ __volatile__ (
+        "ldp       x16, x17, [%[a], 0]\n\t"
+        "#  A[0] * A[1]\n\t"
+        "mul	x9, x16, x17\n\t"
+        "ldr       x19, [%[a], 16]\n\t"
+        "umulh	x10, x16, x17\n\t"
+        "ldr       x20, [%[a], 24]\n\t"
+        "#  A[0] * A[2]\n\t"
+        "mul	x4, x16, x19\n\t"
+        "umulh	x5, x16, x19\n\t"
+        "adds	x10, x10, x4\n\t"
+        "#  A[0] * A[3]\n\t"
+        "mul	x4, x16, x20\n\t"
+        "adc	x11, xzr, x5\n\t"
+        "umulh	x5, x16, x20\n\t"
+        "adds	x11, x11, x4\n\t"
+        "#  A[1] * A[2]\n\t"
+        "mul	x4, x17, x19\n\t"
+        "adc	x12, xzr, x5\n\t"
+        "umulh	x5, x17, x19\n\t"
+        "adds	x11, x11, x4\n\t"
+        "#  A[1] * A[3]\n\t"
+        "mul	x4, x17, x20\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x17, x20\n\t"
+        "adc	x13, xzr, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
+        "#  A[2] * A[3]\n\t"
+        "mul	x4, x19, x20\n\t"
+        "adc	x13, x13, x5\n\t"
+        "umulh	x5, x19, x20\n\t"
+        "adds	x13, x13, x4\n\t"
+        "adc	x14, xzr, x5\n\t"
+        "# Double\n\t"
+        "adds	x9, x9, x9\n\t"
+        "adcs	x10, x10, x10\n\t"
+        "adcs	x11, x11, x11\n\t"
+        "adcs	x12, x12, x12\n\t"
+        "adcs	x13, x13, x13\n\t"
+        "#  A[0] * A[0]\n\t"
+        "mul	x8, x16, x16\n\t"
+        "adcs	x14, x14, x14\n\t"
+        "umulh	x3, x16, x16\n\t"
+        "cset	x15, cs\n\t"
+        "#  A[1] * A[1]\n\t"
+        "mul	x4, x17, x17\n\t"
+        "adds	x9, x9, x3\n\t"
+        "umulh	x5, x17, x17\n\t"
+        "adcs	x10, x10, x4\n\t"
+        "#  A[2] * A[2]\n\t"
+        "mul	x6, x19, x19\n\t"
+        "adcs	x11, x11, x5\n\t"
+        "umulh	x7, x19, x19\n\t"
+        "adcs	x12, x12, x6\n\t"
+        "#  A[3] * A[3]\n\t"
+        "mul	x16, x20, x20\n\t"
+        "adcs	x13, x13, x7\n\t"
+        "umulh	x17, x20, x20\n\t"
+        "adcs	x14, x14, x16\n\t"
+        "mov	x3, x8\n\t"
+        "adc	x15, x15, x17\n\t"
+        "# Start Reduction\n\t"
+        "mov	x4, x9\n\t"
+        "mov	x5, x10\n\t"
+        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
+        "#    - a[0] << 32 << 192\n\t"
+        "#   + (a[0] * 2) << 192\n\t"
+        "#   a[0]-a[2] << 32\n\t"
+        "lsl	x10, x10, 32\n\t"
+        "add	x6, x11, x8\n\t"
+        "eor	x10, x10, x9, lsr #32\n\t"
+        "lsl	x9, x9, 32\n\t"
+        "add	x6, x6, x8\n\t"
+        "eor	x9, x9, x8, lsr #32\n\t"
+        "#   + a[0]-a[2] << 32 << 64\n\t"
+        "#   - a[0] << 32 << 192\n\t"
+        "adds	x4, x4, x8, lsl #32\n\t"
+        "sub	x6, x6, x8, lsl #32\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "adc	x6, x6, x10\n\t"
+        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
+        "#   a += mu << 256\n\t"
+        "adds	x12, x12, x3\n\t"
+        "adcs	x13, x13, x4\n\t"
+        "adcs	x14, x14, x5\n\t"
+        "adcs	x15, x15, x6\n\t"
+        "cset	x8, cs\n\t"
+        "#   a += mu << 192\n\t"
+        "# mu <<= 32\n\t"
+        "#   a += (mu << 32) << 64\n\t"
+        "adds	x11, x11, x3\n\t"
+        "adcs	x12, x12, x4\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "lsr	x7, x6, 32\n\t"
+        "adcs	x14, x14, x6\n\t"
+        "lsl	x6, x6, 32\n\t"
+        "adcs	x15, x15, xzr\n\t"
+        "eor	x6, x6, x5, lsr #32\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "lsl	x5, x5, 32\n\t"
+        "eor	x5, x5, x4, lsr #32\n\t"
+        "adds	x11, x11, x5\n\t"
+        "lsl	x4, x4, 32\n\t"
+        "adcs	x12, x12, x6\n\t"
+        "eor	x4, x4, x3, lsr #32\n\t"
+        "adcs	x13, x13, x7\n\t"
+        "lsl	x3, x3, 32\n\t"
+        "adcs	x14, x14, xzr\n\t"
+        "adcs	x15, x15, xzr\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "#   a -= (mu << 32) << 192\n\t"
+        "subs	x11, x11, x3\n\t"
+        "sbcs	x12, x12, x4\n\t"
+        "sbcs	x13, x13, x5\n\t"
+        "sub	x8, xzr, x8\n\t"
+        "sbcs	x14, x14, x6\n\t"
+        "sub	x8, x8, #1\n\t"
+        "sbcs	x15, x15, x7\n\t"
+        "mov	x17, 0xffffffff00000001\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "# mask m and sub from result if overflow\n\t"
+        "#  m[0] = -1 & mask = mask\n\t"
+        "subs	x12, x12, x8\n\t"
+        "#  m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t"
+        "lsr	x16, x8, 32\n\t"
+        "sbcs	x13, x13, x16\n\t"
+        "and	x17, x17, x8\n\t"
+        "#  m[2] =  0 & mask = 0\n\t"
+        "sbcs	x14, x14, xzr\n\t"
+        "stp	x12, x13, [%[r], 0]\n\t"
+        "#  m[3] =  0xffffffff00000001 & mask\n\t"
+        "sbc	x15, x15, x17\n\t"
+        "stp	x14, x15, [%[r], 16]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20"
+    );
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mont_sqr_4(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_256_mont_sqr_4(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint64_t p256_mod_minus_2[4] = {
+    0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U,
+    0xffffffff00000001U
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_4(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 4);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod);
+        if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+            sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 4);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 4;
+    sp_digit* t3 = td + 4 * 4;
+    /* 0x2 */
+    sp_256_mont_sqr_4(t1, a, p256_mod, p256_mp_mod);
+    /* 0x3 */
+    sp_256_mont_mul_4(t2, t1, a, p256_mod, p256_mp_mod);
+    /* 0xc */
+    sp_256_mont_sqr_n_4(t1, t2, 2, p256_mod, p256_mp_mod);
+    /* 0xd */
+    sp_256_mont_mul_4(t3, t1, a, p256_mod, p256_mp_mod);
+    /* 0xf */
+    sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xf0 */
+    sp_256_mont_sqr_n_4(t1, t2, 4, p256_mod, p256_mp_mod);
+    /* 0xfd */
+    sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xff */
+    sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xff00 */
+    sp_256_mont_sqr_n_4(t1, t2, 8, p256_mod, p256_mp_mod);
+    /* 0xfffd */
+    sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffff */
+    sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffff0000 */
+    sp_256_mont_sqr_n_4(t1, t2, 16, p256_mod, p256_mp_mod);
+    /* 0xfffffffd */
+    sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff */
+    sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000000 */
+    sp_256_mont_sqr_n_4(t1, t2, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffffffffffff */
+    sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001 */
+    sp_256_mont_mul_4(r, t1, a, p256_mod, p256_mp_mod);
+    /* 0xffffffff000000010000000000000000000000000000000000000000 */
+    sp_256_mont_sqr_n_4(r, r, 160, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+    sp_256_mont_mul_4(r, r, t2, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+    sp_256_mont_sqr_n_4(r, r, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+    sp_256_mont_mul_4(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
 /* Compare a with b in constant time.
  *
  * a  A single precision integer.
@@ -12419,74 +19512,71 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static int64_t sp_256_cmp_4(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-#ifdef WOLFSSL_SP_SMALL
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "mov	x6, 24\n\t"
+static int64_t sp_256_cmp_4(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 24\n\t"
         "1:\n\t"
-        "ldr	x4, [%[a], x6]\n\t"
-        "ldr	x5, [%[b], x6]\n\t"
-        "and	x4, x4, x3\n\t"
-        "and	x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "sub	x6, x6, #8\n\t"
-        "b.cc	1b\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#else
-    __asm__ __volatile__ (
-        "mov	x3, -1\n\t"
-        "ldr		x4, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 24]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 8]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "and		x4, x4, x3\n\t"
-        "and		x5, x5, x3\n\t"
-        "subs	x4, x4, x5\n\t"
-        "csel	%[r], %[one], %[r], hi\n\t"
-        "csel	%[r], x3, %[r], lo\n\t"
-        "csel	x3, x3, xzr, eq\n\t"
-        "eor	%[r], %[r], x3\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "x2", "x3", "x4", "x5", "x6"
-    );
-#endif
-
-    return r;
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[a], 16]\n\t"
+        "ldp	x9, x10, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "and	x8, x8, x4\n\t"
+        "and	x12, x12, x4\n\t"
+        "subs	x8, x8, x12\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x7, x7, x4\n\t"
+        "and	x11, x11, x4\n\t"
+        "subs	x7, x7, x11\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x10, x10, x4\n\t"
+        "subs	x6, x6, x10\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x9, x9, x4\n\t"
+        "subs	x5, x5, x9\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+#endif
+
+    return (int64_t)a;
 }
 
 /* Normalize the values in each word to 64.
@@ -12503,40 +19593,32 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static sp_digit sp_256_cond_sub_4(sp_digit* r, sp_digit* a, sp_digit* b,
+static sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
         sp_digit m)
 {
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-
-        "ldr		x4, [%[a], 0]\n\t"
-        "ldr		x6, [%[a], 8]\n\t"
-        "ldr		x5, [%[b], 0]\n\t"
-        "ldr		x7, [%[b], 8]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
         "subs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 0]\n\t"
-        "str		x6, [%[r], 8]\n\t"
-        "ldr		x4, [%[a], 16]\n\t"
-        "ldr		x6, [%[a], 24]\n\t"
-        "ldr		x5, [%[b], 16]\n\t"
-        "ldr		x7, [%[b], 24]\n\t"
-        "and		x5, x5, %[m]\n\t"
-        "and		x7, x7, %[m]\n\t"
-        "sbcs	x4, x4, x5\n\t"
-        "sbcs	x6, x6, x7\n\t"
-        "str		x4, [%[r], 16]\n\t"
-        "str		x6, [%[r], 24]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "x4", "x6", "x5", "x7", "x8"
-    );
-
-    return c;
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
 }
 
 /* Sub b from a into r. (r = a - b)
@@ -12548,27 +19630,27 @@
 static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "subs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
-        "sbcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "sbcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
-}
+    return (sp_digit)r;
+}
+
+#define sp_256_mont_reduce_order_4    sp_256_mont_reduce_4
 
 /* Reduce the number back to 256 bits using Montgomery reduction.
  *
@@ -12576,564 +19658,163 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "mov	x9, xzr\n\t"
+        "ldp	x9, x10, [%[a], 0]\n\t"
+        "ldp	x11, x12, [%[a], 16]\n\t"
+        "ldp	x17, x19, [%[m], 0]\n\t"
+        "ldp	x20, x21, [%[m], 16]\n\t"
         "mov	x8, xzr\n\t"
-        "mov	x6, %[a]\n\t"
-        "\n1:\n\t"
-        "# mu = a[i] * mp\n\t"
-        "ldr	x5, [x6, 0]\n\t"
-        "mov	x7, x5\n\t"
-        "mul	x5, %[mp], x5\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "ldr	x4, [%[m], 0]\n\t"
-        "ldr	x11, [%[m], 8]\n\t"
-        "mul	x3, x4, x5\n\t"
-        "umulh	x10, x4, x5\n\t"
-        "adds	x7, x7, x3\n\t"
-        "str	x7, [x6, 0]\n\t"
-        "adc	x10, x10, xzr\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "mul	x3, x11, x5\n\t"
-        "umulh	x12, x11, x5\n\t"
-        "ldr	x11, [%[m], 16]\n\t"
-        "ldr	x7, [x6, 8]\n\t"
-        "adds	x3, x3, x10\n\t"
-        "adc	x12, x12, xzr\n\t"
-        "adds	x7, x7, x3\n\t"
-        "str	x7, [x6, 8]\n\t"
-        "adc	x12, x12, xzr\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "mul	x3, x11, x5\n\t"
-        "umulh	x10, x11, x5\n\t"
-        "ldr	x11, [%[m], 24]\n\t"
-        "ldr	x7, [x6, 16]\n\t"
-        "adds	x3, x3, x12\n\t"
-        "adc	x10, x10, xzr\n\t"
-        "adds	x7, x7, x3\n\t"
-        "str	x7, [x6, 16]\n\t"
-        "adc	x10, x10, xzr\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "mul	x3, x11, x5\n\t"
-        "umulh	x4, x11, x5\n\t"
-        "ldr	x7, [x6, 24]\n\t"
-        "ldr	x12, [x6, 32]\n\t"
-        "adds	x3, x3, x10\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "cset	x9, cs\n\t"
-        "adds	x7, x7, x3\n\t"
-        "str	x7, [x6, 24]\n\t"
-        "adcs	x12, x12, x4\n\t"
-        "str	x12, [x6, 32]\n\t"
-        "adc	x9, x9, xzr\n\t"
-        "# i += 1\n\t"
-        "add	x6, x6, 8\n\t"
-        "add	x8, x8, 8\n\t"
-        "cmp	x8, 32\n\t"
-        "b.lt	1b\n\t"
-        "ldr	x5, [%[a], 32]\n\t"
-        "ldr	x6, [%[a], 40]\n\t"
-        "ldr	x7, [%[a], 48]\n\t"
-        "ldr	x8, [%[a], 56]\n\t"
-        "sub	x3, xzr, x9\n\t"
-        "ldr	x9, [%[m], 0]\n\t"
-        "ldr	x10, [%[m], 8]\n\t"
-        "ldr	x11, [%[m], 16]\n\t"
-        "ldr	x12, [%[m], 24]\n\t"
-        "and	x9, x9, x3\n\t"
-        "and	x10, x10, x3\n\t"
-        "and	x11, x11, x3\n\t"
-        "and	x12, x12, x3\n\t"
-        "subs	x5, x5, x9\n\t"
-        "sbcs	x6, x6, x10\n\t"
-        "sbcs	x7, x7, x11\n\t"
-        "sbc	x8, x8, x12\n\t"
-        "str	x5, [%[a], 0]\n\t"
-        "str	x6, [%[a], 8]\n\t"
-        "str	x7, [%[a], 16]\n\t"
-        "str	x8, [%[a], 24]\n\t"
+        "# mu = a[0] * mp\n\t"
+        "mul	x5, %[mp], x9\n\t"
+        "ldr	x13, [%[a], 32]\n\t"
+        "# a[0+0] += m[0] * mu\n\t"
+        "mul	x3, x17, x5\n\t"
+        "ldr	x14, [%[a], 40]\n\t"
+        "umulh	x6, x17, x5\n\t"
+        "ldr	x15, [%[a], 48]\n\t"
+        "adds	x9, x9, x3\n\t"
+        "ldr	x16, [%[a], 56]\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[0+1] += m[1] * mu\n\t"
+        "mul	x3, x19, x5\n\t"
+        "umulh	x7, x19, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x10, x10, x3\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "# a[0+2] += m[2] * mu\n\t"
+        "mul	x3, x20, x5\n\t"
+        "umulh	x6, x20, x5\n\t"
+        "adds	x3, x3, x7\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x11, x11, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[0+3] += m[3] * mu\n\t"
+        "mul	x3, x21, x5\n\t"
+        "umulh	x4, x21, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "cset	x8, cs\n\t"
+        "adds	x12, x12, x3\n\t"
+        "adcs	x13, x13, x4\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "# mu = a[1] * mp\n\t"
+        "mul	x5, %[mp], x10\n\t"
+        "# a[1+0] += m[0] * mu\n\t"
+        "mul	x3, x17, x5\n\t"
+        "umulh	x6, x17, x5\n\t"
+        "adds	x10, x10, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[1+1] += m[1] * mu\n\t"
+        "mul	x3, x19, x5\n\t"
+        "umulh	x7, x19, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x11, x11, x3\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "# a[1+2] += m[2] * mu\n\t"
+        "mul	x3, x20, x5\n\t"
+        "umulh	x6, x20, x5\n\t"
+        "adds	x3, x3, x7\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x12, x12, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[1+3] += m[3] * mu\n\t"
+        "mul	x3, x21, x5\n\t"
+        "umulh	x4, x21, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "cset	x8, cs\n\t"
+        "adds	x13, x13, x3\n\t"
+        "adcs	x14, x14, x4\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "# mu = a[2] * mp\n\t"
+        "mul	x5, %[mp], x11\n\t"
+        "# a[2+0] += m[0] * mu\n\t"
+        "mul	x3, x17, x5\n\t"
+        "umulh	x6, x17, x5\n\t"
+        "adds	x11, x11, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[2+1] += m[1] * mu\n\t"
+        "mul	x3, x19, x5\n\t"
+        "umulh	x7, x19, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x12, x12, x3\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "# a[2+2] += m[2] * mu\n\t"
+        "mul	x3, x20, x5\n\t"
+        "umulh	x6, x20, x5\n\t"
+        "adds	x3, x3, x7\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x13, x13, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[2+3] += m[3] * mu\n\t"
+        "mul	x3, x21, x5\n\t"
+        "umulh	x4, x21, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "cset	x8, cs\n\t"
+        "adds	x14, x14, x3\n\t"
+        "adcs	x15, x15, x4\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "# mu = a[3] * mp\n\t"
+        "mul	x5, %[mp], x12\n\t"
+        "# a[3+0] += m[0] * mu\n\t"
+        "mul	x3, x17, x5\n\t"
+        "umulh	x6, x17, x5\n\t"
+        "adds	x12, x12, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[3+1] += m[1] * mu\n\t"
+        "mul	x3, x19, x5\n\t"
+        "umulh	x7, x19, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "adds	x13, x13, x3\n\t"
+        "adc	x7, x7, xzr\n\t"
+        "# a[3+2] += m[2] * mu\n\t"
+        "mul	x3, x20, x5\n\t"
+        "umulh	x6, x20, x5\n\t"
+        "adds	x3, x3, x7\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x14, x14, x3\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "# a[3+3] += m[3] * mu\n\t"
+        "mul	x3, x21, x5\n\t"
+        "umulh	x4, x21, x5\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "cset	x8, cs\n\t"
+        "adds	x15, x15, x3\n\t"
+        "adcs	x16, x16, x4\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "sub	x3, xzr, x8\n\t"
+        "and	x17, x17, x3\n\t"
+        "and	x19, x19, x3\n\t"
+        "and	x20, x20, x3\n\t"
+        "and	x21, x21, x3\n\t"
+        "subs	x13, x13, x17\n\t"
+        "sbcs	x14, x14, x19\n\t"
+        "sbcs	x15, x15, x20\n\t"
+        "stp	x13, x14, [%[a], 0]\n\t"
+        "sbc	x16, x16, x21\n\t"
+        "stp	x15, x16, [%[a], 16]\n\t"
         :
         : [a] "r" (a), [m] "r" (m), [mp] "r" (mp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
-          "x12"
-    );
-}
-
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    (void)mp;
-
-    __asm__ __volatile__ (
-        "ldr	x18, [%[a], 0]\n\t"
-        "ldr	x19, [%[a], 8]\n\t"
-        "ldr	x20, [%[a], 16]\n\t"
-        "ldr	x21, [%[a], 24]\n\t"
-        "ldr	x22, [%[b], 0]\n\t"
-        "ldr	x23, [%[b], 8]\n\t"
-        "ldr	x24, [%[b], 16]\n\t"
-        "ldr	x25, [%[b], 24]\n\t"
-        "#  A[0] * B[0]\n\t"
-        "mul	x10, x18, x22\n\t"
-        "umulh	x11, x18, x22\n\t"
-        "#  A[0] * B[1]\n\t"
-        "mul	x5, x18, x23\n\t"
-        "umulh	x6, x18, x23\n\t"
-        "adds	x11, x11, x5\n\t"
-        "adc	x12, xzr, x6\n\t"
-        "#  A[1] * B[0]\n\t"
-        "mul	x5, x19, x22\n\t"
-        "umulh	x6, x19, x22\n\t"
-        "adds	x11, x11, x5\n\t"
-        "adcs	x12, x12, x6\n\t"
-        "adc	x13, xzr, xzr\n\t"
-        "#  A[0] * B[2]\n\t"
-        "mul	x5, x18, x24\n\t"
-        "umulh	x6, x18, x24\n\t"
-        "adds	x12, x12, x5\n\t"
-        "adc	x13, x13, x6\n\t"
-        "#  A[1] * B[1]\n\t"
-        "mul	x5, x19, x23\n\t"
-        "umulh	x6, x19, x23\n\t"
-        "adds	x12, x12, x5\n\t"
-        "adcs	x13, x13, x6\n\t"
-        "adc	x14, xzr, xzr\n\t"
-        "#  A[2] * B[0]\n\t"
-        "mul	x5, x20, x22\n\t"
-        "umulh	x6, x20, x22\n\t"
-        "adds	x12, x12, x5\n\t"
-        "adcs	x13, x13, x6\n\t"
-        "adc	x14, x14, xzr\n\t"
-        "#  A[0] * B[3]\n\t"
-        "mul	x5, x18, x25\n\t"
-        "umulh	x6, x18, x25\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adc	x15, xzr, xzr\n\t"
-        "#  A[1] * B[2]\n\t"
-        "mul	x5, x19, x24\n\t"
-        "umulh	x6, x19, x24\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adc	x15, x15, xzr\n\t"
-        "#  A[2] * B[1]\n\t"
-        "mul	x5, x20, x23\n\t"
-        "umulh	x6, x20, x23\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adc	x15, x15, xzr\n\t"
-        "#  A[3] * B[0]\n\t"
-        "mul	x5, x21, x22\n\t"
-        "umulh	x6, x21, x22\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adc	x15, x15, xzr\n\t"
-        "#  A[1] * B[3]\n\t"
-        "mul	x5, x19, x25\n\t"
-        "umulh	x6, x19, x25\n\t"
-        "adds	x14, x14, x5\n\t"
-        "adcs	x15, x15, x6\n\t"
-        "adc	x16, xzr, xzr\n\t"
-        "#  A[2] * B[2]\n\t"
-        "mul	x5, x20, x24\n\t"
-        "umulh	x6, x20, x24\n\t"
-        "adds	x14, x14, x5\n\t"
-        "adcs	x15, x15, x6\n\t"
-        "adc	x16, x16, xzr\n\t"
-        "#  A[3] * B[1]\n\t"
-        "mul	x5, x21, x23\n\t"
-        "umulh	x6, x21, x23\n\t"
-        "adds	x14, x14, x5\n\t"
-        "adcs	x15, x15, x6\n\t"
-        "adc	x16, x16, xzr\n\t"
-        "#  A[2] * B[3]\n\t"
-        "mul	x5, x20, x25\n\t"
-        "umulh	x6, x20, x25\n\t"
-        "adds	x15, x15, x5\n\t"
-        "adcs	x16, x16, x6\n\t"
-        "adc	x17, xzr, xzr\n\t"
-        "#  A[3] * B[2]\n\t"
-        "mul	x5, x21, x24\n\t"
-        "umulh	x6, x21, x24\n\t"
-        "adds	x15, x15, x5\n\t"
-        "adcs	x16, x16, x6\n\t"
-        "adc	x17, x17, xzr\n\t"
-        "#  A[3] * B[3]\n\t"
-        "mul	x5, x21, x25\n\t"
-        "umulh	x6, x21, x25\n\t"
-        "adds	x16, x16, x5\n\t"
-        "adc	x17, x17, x6\n\t"
-        "# Start Reduction\n\t"
-        "mov	x5, x10\n\t"
-        "mov	x6, x11\n\t"
-        "mov	x7, x12\n\t"
-        "mov	x8, x13\n\t"
-        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
-        "#    - a[0] << 32 << 192\n\t"
-        "#   + (a[0] * 2) << 192\n\t"
-        "add	x8, x8, x10\n\t"
-        "add	x8, x8, x10\n\t"
-        "#   a[0]-a[2] << 32\n\t"
-        "lsl	x10, x10, 32\n\t"
-        "lsr	x18, x5, 32\n\t"
-        "lsl	x11, x6, 32\n\t"
-        "lsr	x19, x6, 32\n\t"
-        "lsl	x12, x7, 32\n\t"
-        "eor	x11, x11, x18\n\t"
-        "eor	x12, x12, x19\n\t"
-        "#   - a[0] << 32 << 192\n\t"
-        "sub	x8, x8, x10\n\t"
-        "#   + a[0]-a[2] << 32 << 64\n\t"
-        "adds	x6, x6, x10\n\t"
-        "adcs	x7, x7, x11\n\t"
-        "adc	x8, x8, x12\n\t"
-        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
-        "#   a += mu << 256\n\t"
-        "adds	x14, x14, x5\n\t"
-        "adcs	x15, x15, x6\n\t"
-        "adcs	x16, x16, x7\n\t"
-        "adcs	x17, x17, x8\n\t"
-        "csetm	x10, cs\n\t"
-        "#   a += mu << 192\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adcs	x15, x15, x7\n\t"
-        "adcs	x16, x16, x8\n\t"
-        "adcs	x17, x17, xzr\n\t"
-        "csetm	x20, cs\n\t"
-        "add	x10, x10, x20\n\t"
-        "# mu <<= 32\n\t"
-        "lsr	x9, x8, 32\n\t"
-        "lsr	x18, x5, 32\n\t"
-        "lsl	x5, x5, 32\n\t"
-        "lsr	x19, x6, 32\n\t"
-        "lsl	x6, x6, 32\n\t"
-        "lsr	x20, x7, 32\n\t"
-        "lsl	x7, x7, 32\n\t"
-        "lsl	x8, x8, 32\n\t"
-        "eor	x6, x6, x18\n\t"
-        "eor	x7, x7, x19\n\t"
-        "eor	x8, x8, x20\n\t"
-        "#   a += (mu << 32) << 64\n\t"
-        "adds	x13, x13, x7\n\t"
-        "adcs	x14, x14, x8\n\t"
-        "adcs	x15, x15, x9\n\t"
-        "adcs	x16, x16, xzr\n\t"
-        "adcs	x17, x17, xzr\n\t"
-        "csetm	x20, cs\n\t"
-        "add	x10, x10, x20\n\t"
-        "#   a -= (mu << 32) << 192\n\t"
-        "subs	x13, x13, x5\n\t"
-        "mov	x18, 0xffffffff\n\t"
-        "sbcs	x14, x14, x6\n\t"
-        "mov	x19, 0xffffffff00000001\n\t"
-        "sbcs	x15, x15, x7\n\t"
-        "sbcs	x16, x16, x8\n\t"
-        "sbcs	x17, x17, x9\n\t"
-        "cset	x20, cc\n\t"
-        "add	x10, x10, x20\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "#  m[0] = -1 & mask = mask\n\t"
-        "and	x18, x18, x10\n\t"
-        "#  m[2] =  0 & mask = 0\n\t"
-        "and	x19, x19, x10\n\t"
-        "subs	x14, x14, x10\n\t"
-        "sbcs	x15, x15, x18\n\t"
-        "sbcs	x16, x16, xzr\n\t"
-        "sbc	x17, x17, x19\n\t"
-        "str	x14, [%[r], 0]\n\t"
-        "str	x15, [%[r], 8]\n\t"
-        "str	x16, [%[r], 16]\n\t"
-        "str	x17, [%[r], 24]\n\t"
-        : [m] "+r" (m), [a] "+r" (a), [b] "+r" (b)
-        : [r] "r" (r)
-        : "memory", "x5", "x6", "x7", "x8", "x9",
-          "x18", "x19", "x20", "x21",
-          "x22", "x23", "x24", "x25",
-          "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"
-    );
-}
-
-/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    __asm__ __volatile__ (
-        "ldr	x18, [%[a], 0]\n\t"
-        "ldr	x19, [%[a], 8]\n\t"
-        "ldr	x20, [%[a], 16]\n\t"
-        "ldr	x21, [%[a], 24]\n\t"
-        "#  A[0] * A[1]\n\t"
-        "mul	x11, x18, x19\n\t"
-        "umulh	x12, x18, x19\n\t"
-        "#  A[0] * A[2]\n\t"
-        "mul	x5, x18, x20\n\t"
-        "umulh	x6, x18, x20\n\t"
-        "adds	x12, x12, x5\n\t"
-        "adc	x13, xzr, x6\n\t"
-        "#  A[0] * A[3]\n\t"
-        "mul	x5, x18, x21\n\t"
-        "umulh	x6, x18, x21\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adc	x14, xzr, x6\n\t"
-        "#  A[1] * A[2]\n\t"
-        "mul	x5, x19, x20\n\t"
-        "umulh	x6, x19, x20\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adc	x15, xzr, xzr\n\t"
-        "#  A[1] * A[3]\n\t"
-        "mul	x5, x19, x21\n\t"
-        "umulh	x6, x19, x21\n\t"
-        "adds	x14, x14, x5\n\t"
-        "adc	x15, x15, x6\n\t"
-        "#  A[2] * A[3]\n\t"
-        "mul	x5, x20, x21\n\t"
-        "umulh	x6, x20, x21\n\t"
-        "adds	x15, x15, x5\n\t"
-        "adc	x16, xzr, x6\n\t"
-        "# Double\n\t"
-        "adds	x11, x11, x11\n\t"
-        "adcs	x12, x12, x12\n\t"
-        "adcs	x13, x13, x13\n\t"
-        "adcs	x14, x14, x14\n\t"
-        "adcs	x15, x15, x15\n\t"
-        "adcs	x16, x16, x16\n\t"
-        "cset	x17, cs\n\t"
-        "#  A[0] * A[0]\n\t"
-        "mul	x10, x18, x18\n\t"
-        "umulh	x4, x18, x18\n\t"
-        "#  A[1] * A[1]\n\t"
-        "mul	x5, x19, x19\n\t"
-        "umulh	x6, x19, x19\n\t"
-        "#  A[2] * A[2]\n\t"
-        "mul	x7, x20, x20\n\t"
-        "umulh	x8, x20, x20\n\t"
-        "#  A[3] * A[3]\n\t"
-        "mul	x9, x21, x21\n\t"
-        "umulh	x18, x21, x21\n\t"
-        "adds	x11, x11, x4\n\t"
-        "adcs	x12, x12, x5\n\t"
-        "adcs	x13, x13, x6\n\t"
-        "adcs	x14, x14, x7\n\t"
-        "adcs	x15, x15, x8\n\t"
-        "adcs	x16, x16, x9\n\t"
-        "adc	x17, x17, x18\n\t"
-        "# Start Reduction\n\t"
-        "mov	x5, x10\n\t"
-        "mov	x6, x11\n\t"
-        "mov	x7, x12\n\t"
-        "mov	x8, x13\n\t"
-        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
-        "#    - a[0] << 32 << 192\n\t"
-        "#   + (a[0] * 2) << 192\n\t"
-        "add	x8, x8, x10\n\t"
-        "add	x8, x8, x10\n\t"
-        "#   a[0]-a[2] << 32\n\t"
-        "lsl	x10, x10, 32\n\t"
-        "lsr	x18, x5, 32\n\t"
-        "lsl	x11, x6, 32\n\t"
-        "lsr	x19, x6, 32\n\t"
-        "lsl	x12, x7, 32\n\t"
-        "eor	x11, x11, x18\n\t"
-        "eor	x12, x12, x19\n\t"
-        "#   - a[0] << 32 << 192\n\t"
-        "sub	x8, x8, x10\n\t"
-        "#   + a[0]-a[2] << 32 << 64\n\t"
-        "adds	x6, x6, x10\n\t"
-        "adcs	x7, x7, x11\n\t"
-        "adc	x8, x8, x12\n\t"
-        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
-        "#   a += mu << 256\n\t"
-        "adds	x14, x14, x5\n\t"
-        "adcs	x15, x15, x6\n\t"
-        "adcs	x16, x16, x7\n\t"
-        "adcs	x17, x17, x8\n\t"
-        "csetm	x10, cs\n\t"
-        "#   a += mu << 192\n\t"
-        "adds	x13, x13, x5\n\t"
-        "adcs	x14, x14, x6\n\t"
-        "adcs	x15, x15, x7\n\t"
-        "adcs	x16, x16, x8\n\t"
-        "adcs	x17, x17, xzr\n\t"
-        "csetm	x20, cs\n\t"
-        "add	x10, x10, x20\n\t"
-        "# mu <<= 32\n\t"
-        "lsr	x9, x8, 32\n\t"
-        "lsr	x18, x5, 32\n\t"
-        "lsl	x5, x5, 32\n\t"
-        "lsr	x19, x6, 32\n\t"
-        "lsl	x6, x6, 32\n\t"
-        "lsr	x20, x7, 32\n\t"
-        "lsl	x7, x7, 32\n\t"
-        "lsl	x8, x8, 32\n\t"
-        "eor	x6, x6, x18\n\t"
-        "eor	x7, x7, x19\n\t"
-        "eor	x8, x8, x20\n\t"
-        "#   a += (mu << 32) << 64\n\t"
-        "adds	x13, x13, x7\n\t"
-        "adcs	x14, x14, x8\n\t"
-        "adcs	x15, x15, x9\n\t"
-        "adcs	x16, x16, xzr\n\t"
-        "adcs	x17, x17, xzr\n\t"
-        "csetm	x20, cs\n\t"
-        "add	x10, x10, x20\n\t"
-        "#   a -= (mu << 32) << 192\n\t"
-        "subs	x13, x13, x5\n\t"
-        "mov	x18, 0xffffffff\n\t"
-        "sbcs	x14, x14, x6\n\t"
-        "mov	x19, 0xffffffff00000001\n\t"
-        "sbcs	x15, x15, x7\n\t"
-        "sbcs	x16, x16, x8\n\t"
-        "sbcs	x17, x17, x9\n\t"
-        "cset	x20, cc\n\t"
-        "add	x10, x10, x20\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "#  m[0] = -1 & mask = mask\n\t"
-        "and	x18, x18, x10\n\t"
-        "#  m[2] =  0 & mask = 0\n\t"
-        "and	x19, x19, x10\n\t"
-        "subs	x14, x14, x10\n\t"
-        "sbcs	x15, x15, x18\n\t"
-        "sbcs	x16, x16, xzr\n\t"
-        "sbc	x17, x17, x19\n\t"
-        "str	x14, [%[r], 0]\n\t"
-        "str	x15, [%[r], 8]\n\t"
-        "str	x16, [%[r], 16]\n\t"
-        "str	x17, [%[r], 24]\n\t"
-        : [m] "+r" (m), [a] "+r" (a), [mp] "+r" (mp)
-        : [r] "r" (r)
-        : "memory", "x4", "x5", "x6", "x7", "x8", "x9",
-          "x18", "x19", "x20", "x21",
-          "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"
-    );
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * n   Number of times to square.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_256_mont_sqr_n_4(sp_digit* r, sp_digit* a, int n,
-        sp_digit* m, sp_digit mp)
-{
-    sp_256_mont_sqr_4(r, a, m, mp);
-    for (; n > 1; n--)
-        sp_256_mont_sqr_4(r, r, m, mp);
-}
-
-#else
-/* Mod-2 for the P256 curve. */
-static const uint64_t p256_mod_2[4] = {
-    0xfffffffffffffffd,0x00000000ffffffff,0x0000000000000000,
-    0xffffffff00000001
-};
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
- * P256 curve. (r = 1 / a mod m)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_4(sp_digit* r, sp_digit* a, sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 4);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod);
-        if (p256_mod_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 4;
-    sp_digit* t3 = td + 4 * 4;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_4(t, a, p256_mod, p256_mp_mod);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_4(t2, t, 2, p256_mod, p256_mp_mod);
-    /* t3= a^d = t2 * a */
-    sp_256_mont_mul_4(t3, t2, a, p256_mod, p256_mp_mod);
-    /* t = a^f = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^f0 = t ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_4(t2, t, 4, p256_mod, p256_mp_mod);
-    /* t3= a^fd = t2 * t3 */
-    sp_256_mont_mul_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_4(t2, t, 8, p256_mod, p256_mp_mod);
-    /* t3= a^fffd = t2 * t3 */
-    sp_256_mont_mul_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_4(t2, t, 16, p256_mod, p256_mp_mod);
-    /* t3= a^fffffffd = t2 * t3 */
-    sp_256_mont_mul_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_4(t2, t, 32, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001 = t2 * a */
-    sp_256_mont_mul_4(t2, t2, a, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
-     *   = t2 ^ 2 ^ 160 */
-    sp_256_mont_sqr_n_4(t2, t2, 160, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
-     *   = t2 * t */
-    sp_256_mont_mul_4(t2, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
-     *   = t2 ^ 2 ^ 32 */
-    sp_256_mont_sqr_n_4(t2, t2, 32, p256_mod, p256_mp_mod);
-    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
-     *   = t2 * t3 */
-    sp_256_mont_mul_4(r, t2, t3, p256_mod, p256_mp_mod);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Map the Montgomery form projective co-ordinate point to an affine point.
- *
- * r  Resulting affine co-ordinate point.
- * p  Montgomery form projective co-ordinate point.
+        : "memory", "x3", "x4", "x5", "x8", "x6", "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21"
+    );
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
  * t  Temporary ordinate data.
  */
-static void sp_256_map_4(sp_point* r, sp_point* p, sp_digit* t)
+static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*4;
@@ -13146,20 +19827,22 @@
 
     /* x /= z^2 */
     sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod);
-    XMEMSET(r->x + 4, 0, sizeof(r->x) / 2);
+    XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U);
     sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod);
     /* Reduce x to less than modulus */
     n = sp_256_cmp_4(r->x, p256_mod);
-    sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_4(r->x);
 
     /* y /= z^3 */
     sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod);
-    XMEMSET(r->y + 4, 0, sizeof(r->y) / 2);
+    XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U);
     sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod);
     /* Reduce y to less than modulus */
     n = sp_256_cmp_4(r->y, p256_mod);
-    sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_4(r->y);
 
     XMEMSET(r->z, 0, sizeof(r->z));
@@ -13174,35 +19857,28 @@
  * b   Second number to add in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_add_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "ldr	x4, [%[a],0]\n\t"
-        "ldr	x5, [%[a],8]\n\t"
-        "ldr	x6, [%[a],16]\n\t"
-        "ldr	x7, [%[a],24]\n\t"
-        "ldr	x8, [%[b],0]\n\t"
-        "ldr	x9, [%[b],8]\n\t"
-        "ldr	x10, [%[b],16]\n\t"
-        "ldr	x11, [%[b],24]\n\t"
+static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x4, x5, [%[a], 0]\n\t"
+        "ldp	x8, x9, [%[b], 0]\n\t"
         "adds	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "mov	x12, 0xffffffff\n\t"
+        "ldp	x6, x7, [%[a], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "ldp	x10, x11, [%[b], 16]\n\t"
         "adcs	x6, x6, x10\n\t"
         "adcs	x7, x7, x11\n\t"
         "mov	x13, 0xffffffff00000001\n\t"
         "csetm	x14, cs\n\t"
-        "and	x12, x12, x14\n\t"
-        "and	x13, x13, x14\n\t"
         "subs	x4, x4, x14\n\t"
+        "lsr	x12, x14, 32\n\t"
         "sbcs	x5, x5, x12\n\t"
-        "str	x4, [%[r],0]\n\t"
+        "and	x13, x13, x14\n\t"
         "sbcs	x6, x6, xzr\n\t"
-        "str	x5, [%[r],8]\n\t"
+        "stp	x4, x5, [%[r],0]\n\t"
         "sbc	x7, x7, x13\n\t"
-        "str	x6, [%[r],16]\n\t"
-        "str	x7, [%[r],24]\n\t"
+        "stp	x6, x7, [%[r],16]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
         : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
@@ -13215,30 +19891,25 @@
  * a   Number to double in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_dbl_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "ldr	x3, [%[a]]\n\t"
-        "ldr	x4, [%[a],8]\n\t"
-        "ldr	x5, [%[a],16]\n\t"
-        "ldr	x6, [%[a],24]\n\t"
+static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a]]\n\t"
+        "ldp	x5, x6, [%[a],16]\n\t"
         "adds	x3, x3, x3\n\t"
         "adcs	x4, x4, x4\n\t"
-        "mov	x7, 0xffffffff\n\t"
         "adcs	x5, x5, x5\n\t"
+        "adcs	x6, x6, x6\n\t"
         "mov	x8, 0xffffffff00000001\n\t"
-        "adcs	x6, x6, x6\n\t"
         "csetm	x9, cs\n\t"
-        "and	x7, x7, x9\n\t"
-        "and	x8, x8, x9\n\t"
         "subs	x3, x3, x9\n\t"
+        "lsr	x7, x9, 32\n\t"
         "sbcs	x4, x4, x7\n\t"
-        "str	x3, [%[r],0]\n\t"
+        "and	x8, x8, x9\n\t"
         "sbcs	x5, x5, xzr\n\t"
-        "str	x4, [%[r],8]\n\t"
+        "stp	x3, x4, [%[r],0]\n\t"
         "sbc	x6, x6, x8\n\t"
-        "str	x5, [%[r],16]\n\t"
-        "str	x6, [%[r],24]\n\t"
+        "stp	x5, x6, [%[r],16]\n\t"
         :
         : [r] "r" (r), [a] "r" (a)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
@@ -13253,43 +19924,38 @@
  * a   Number to triple in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_tpl_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "ldr	x10, [%[a]]\n\t"
-        "ldr	x11, [%[a],8]\n\t"
-        "ldr	x12, [%[a],16]\n\t"
-        "ldr	x13, [%[a],24]\n\t"
+static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x10, x11, [%[a]]\n\t"
         "adds	x3, x10, x10\n\t"
+        "ldr	x12, [%[a], 16]\n\t"
         "adcs	x4, x11, x11\n\t"
-        "mov	x7, 0xffffffff\n\t"
+        "ldr	x13, [%[a], 24]\n\t"
         "adcs	x5, x12, x12\n\t"
+        "adcs	x6, x13, x13\n\t"
         "mov	x8, 0xffffffff00000001\n\t"
-        "adcs	x6, x13, x13\n\t"
         "csetm	x9, cs\n\t"
-        "and	x7, x7, x9\n\t"
+        "subs	x3, x3, x9\n\t"
+        "lsr	x7, x9, 32\n\t"
+        "sbcs	x4, x4, x7\n\t"
         "and	x8, x8, x9\n\t"
-        "subs	x3, x3, x9\n\t"
-        "sbcs	x4, x4, x7\n\t"
         "sbcs	x5, x5, xzr\n\t"
         "sbc	x6, x6, x8\n\t"
         "adds	x3, x3, x10\n\t"
         "adcs	x4, x4, x11\n\t"
-        "mov	x7, 0xffffffff\n\t"
         "adcs	x5, x5, x12\n\t"
-        "mov	x8, 0xffffffff00000001\n\t"
         "adcs	x6, x6, x13\n\t"
+        "mov	x8, 0xffffffff00000001\n\t"
         "csetm	x9, cs\n\t"
-        "and	x7, x7, x9\n\t"
-        "and	x8, x8, x9\n\t"
         "subs	x3, x3, x9\n\t"
+        "lsr	x7, x9, 32\n\t"
         "sbcs	x4, x4, x7\n\t"
+        "and	x8, x8, x9\n\t"
         "sbcs	x5, x5, xzr\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
         "sbc	x6, x6, x8\n\t"
-        "str	x3, [%[r], 0]\n\t"
-        "str	x4, [%[r], 8]\n\t"
-        "str	x5, [%[r], 16]\n\t"
-        "str	x6, [%[r], 24]\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
         :
         : [r] "r" (r), [a] "r" (a)
         : "memory", "x10", "x11", "x12", "x13", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
@@ -13305,35 +19971,28 @@
  * b   Number to subtract with in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_sub_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "ldr	x4, [%[a],0]\n\t"
-        "ldr	x5, [%[a],8]\n\t"
-        "ldr	x6, [%[a],16]\n\t"
-        "ldr	x7, [%[a],24]\n\t"
-        "ldr	x8, [%[b],0]\n\t"
-        "ldr	x9, [%[b],8]\n\t"
-        "ldr	x10, [%[b],16]\n\t"
-        "ldr	x11, [%[b],24]\n\t"
+static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x4, x5, [%[a], 0]\n\t"
+        "ldp	x8, x9, [%[b], 0]\n\t"
         "subs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
-        "mov	x12, 0xffffffff\n\t"
+        "ldp	x6, x7, [%[a], 16]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "ldp	x10, x11, [%[b], 16]\n\t"
         "sbcs	x6, x6, x10\n\t"
         "sbcs	x7, x7, x11\n\t"
         "mov	x13, 0xffffffff00000001\n\t"
         "csetm	x14, cc\n\t"
-        "and	x12, x12, x14\n\t"
-        "and	x13, x13, x14\n\t"
         "adds	x4, x4, x14\n\t"
+        "lsr	x12, x14, 32\n\t"
         "adcs	x5, x5, x12\n\t"
-        "str	x4, [%[r],0]\n\t"
+        "and	x13, x13, x14\n\t"
         "adcs	x6, x6, xzr\n\t"
-        "str	x5, [%[r],8]\n\t"
+        "stp	x4, x5, [%[r],0]\n\t"
         "adc	x7, x7, x13\n\t"
-        "str	x6, [%[r],16]\n\t"
-        "str	x7, [%[r],24]\n\t"
+        "stp	x6, x7, [%[r],16]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
         : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
@@ -13346,34 +20005,30 @@
  * a  Number to divide.
  * m  Modulus (prime).
  */
-static void sp_256_div2_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "ldr	x3, [%[a], 0]\n\t"
-        "ldr	x4, [%[a], 8]\n\t"
-        "ldr	x5, [%[a], 16]\n\t"
-        "ldr	x6, [%[a], 24]\n\t"
+static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
         "and	x9, x3, 1\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
         "sub	x10, xzr, x9\n\t"
-        "and	x7, x10, 0xffffffff\n\t"
+        "lsr	x7, x10, 32\n\t"
+        "adds	x3, x3, x10\n\t"
         "and	x8, x10, 0xffffffff00000001\n\t"
-        "adds	x3, x3, x10\n\t"
-        "adcs	x4, x4, x7\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "lsr	x3, x3, 1\n\t"
         "adcs	x5, x5, xzr\n\t"
-        "adcs	x6, x6, x8\n\t"
+        "lsr	x7, x4, 1\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "lsr	x8, x5, 1\n\t"
         "cset	x9, cs\n\t"
-        "lsr	x3, x3, 1\n\t"
-        "lsr	x7, x4, 1\n\t"
-        "lsr	x8, x5, 1\n\t"
         "lsr	x10, x6, 1\n\t"
         "orr	x3, x3, x4, lsl 63\n\t"
         "orr	x4, x7, x5, lsl 63\n\t"
         "orr	x5, x8, x6, lsl 63\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
         "orr	x6, x10, x9, lsl 63\n\t"
-        "str	x3, [%[r], 0]\n\t"
-        "str	x4, [%[r], 8]\n\t"
-        "str	x5, [%[r], 16]\n\t"
-        "str	x6, [%[r], 24]\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
@@ -13387,50 +20042,38 @@
  * p  Point to double.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_4(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*4;
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    /* When infinity don't double point passed in - constant time. */
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    /* Put point to double into result - good for infinty. */
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
     if (r != p) {
-        for (i=0; i<4; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = p->z[i];
         r->infinity = p->infinity;
     }
 
     /* T1 = Z * Z */
-    sp_256_mont_sqr_4(t1, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod);
     /* Z = Y * Z */
-    sp_256_mont_mul_4(z, y, z, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod);
     /* Z = 2Z */
     sp_256_mont_dbl_4(z, z, p256_mod);
     /* T2 = X - T1 */
-    sp_256_mont_sub_4(t2, x, t1, p256_mod);
+    sp_256_mont_sub_4(t2, p->x, t1, p256_mod);
     /* T1 = X + T1 */
-    sp_256_mont_add_4(t1, x, t1, p256_mod);
+    sp_256_mont_add_4(t1, p->x, t1, p256_mod);
     /* T2 = T1 * T2 */
     sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod);
     /* T1 = 3T2 */
     sp_256_mont_tpl_4(t1, t2, p256_mod);
     /* Y = 2Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
+    sp_256_mont_dbl_4(y, p->y, p256_mod);
     /* Y = Y * Y */
     sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod);
     /* T2 = Y * Y */
@@ -13438,9 +20081,9 @@
     /* T2 = T2/2 */
     sp_256_div2_4(t2, t2, p256_mod);
     /* Y = Y * X */
-    sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod);
     /* X = T1 * T1 */
-    sp_256_mont_mul_4(x, t1, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod);
     /* X = X - Y */
     sp_256_mont_sub_4(x, x, y, p256_mod);
     /* X = X - Y */
@@ -13451,7 +20094,100 @@
     sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod);
     /* Y = Y - T2 */
     sp_256_mont_sub_4(y, y, t2, p256_mod);
-
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_256_mont_sub_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x8, x9, [%[b]]\n\t"
+        "ldp	x10, x11, [%[b],16]\n\t"
+        "adds	x8, x8, x8\n\t"
+        "ldp	x4, x5, [%[a]]\n\t"
+        "adcs	x9, x9, x9\n\t"
+        "ldp	x6, x7, [%[a],16]\n\t"
+        "adcs	x10, x10, x10\n\t"
+        "adcs	x11, x11, x11\n\t"
+        "mov	x13, 0xffffffff00000001\n\t"
+        "csetm	x14, cs\n\t"
+        "subs	x8, x8, x14\n\t"
+        "lsr	x12, x14, 32\n\t"
+        "sbcs	x9, x9, x12\n\t"
+        "and	x13, x13, x14\n\t"
+        "sbcs	x10, x10, xzr\n\t"
+        "sbc	x11, x11, x13\n\t"
+        "subs	x4, x4, x8\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "sbcs	x7, x7, x11\n\t"
+        "mov	x13, 0xffffffff00000001\n\t"
+        "csetm	x14, cc\n\t"
+        "adds	x4, x4, x14\n\t"
+        "lsr	x12, x14, 32\n\t"
+        "adcs	x5, x5, x12\n\t"
+        "and	x13, x13, x14\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x4, x5, [%[r],0]\n\t"
+        "adc	x7, x7, x13\n\t"
+        "stp	x6, x7, [%[r],16]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+    );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_256_mont_dbl_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldp	x4, x5, [%[a]]\n\t"
+        "ldp	x6, x7, [%[a],16]\n\t"
+        "adds	x4, x4, x4\n\t"
+        "ldp	x8, x9, [%[b]]\n\t"
+        "adcs	x5, x5, x5\n\t"
+        "ldp	x10, x11, [%[b],16]\n\t"
+        "adcs	x6, x6, x6\n\t"
+        "adcs	x7, x7, x7\n\t"
+        "mov	x13, 0xffffffff00000001\n\t"
+        "csetm	x14, cs\n\t"
+        "subs	x4, x4, x14\n\t"
+        "lsr	x12, x14, 32\n\t"
+        "sbcs	x5, x5, x12\n\t"
+        "and	x13, x13, x14\n\t"
+        "sbcs	x6, x6, xzr\n\t"
+        "sbc	x7, x7, x13\n\t"
+        "subs	x4, x4, x8\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "sbcs	x7, x7, x11\n\t"
+        "mov	x13, 0xffffffff00000001\n\t"
+        "csetm	x14, cc\n\t"
+        "adds	x4, x4, x14\n\t"
+        "lsr	x12, x14, 32\n\t"
+        "adcs	x5, x5, x12\n\t"
+        "and	x13, x13, x14\n\t"
+        "adcs	x6, x6, xzr\n\t"
+        "stp	x4, x5, [%[r],0]\n\t"
+        "adc	x7, x7, x13\n\t"
+        "stp	x6, x7, [%[r],16]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+    );
 }
 
 /* Double the Montgomery form projective point p a number of times.
@@ -13461,67 +20197,78 @@
  * n  Number of times to double
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_n_4(sp_point* r, sp_point* p, int n,
-        sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_digit* t)
+{
     sp_digit* w = t;
     sp_digit* a = t + 2*4;
     sp_digit* b = t + 4*4;
     sp_digit* t1 = t + 6*4;
-    sp_digit* t2 = t + 8*4;
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    if (r != p) {
-        for (i=0; i<4; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
 
     /* Y = 2*Y */
     sp_256_mont_dbl_4(y, y, p256_mod);
     /* W = Z^4 */
     sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
     sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
-    while (n--) {
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
         /* A = 3*(X^2 - W) */
         sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
         sp_256_mont_sub_4(t1, t1, w, p256_mod);
         sp_256_mont_tpl_4(a, t1, p256_mod);
         /* B = X*Y^2 */
-        sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod);
         /* X = A^2 - 2B */
         sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(t1, b, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
+        sp_256_mont_sub_dbl_4(x, x, b, p256_mod);
         /* Z = Z*Y */
         sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
         /* t2 = Y^4 */
-        sp_256_mont_sqr_4(t2, t2, p256_mod, p256_mp_mod);
-        if (n) {
+        sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
             /* W = W*Y^4 */
-            sp_256_mont_mul_4(w, w, t2, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_4(w, w, t1, p256_mod, p256_mp_mod);
         }
         /* y = 2*A*(B - X) - Y^4 */
         sp_256_mont_sub_4(y, b, x, p256_mod);
         sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(y, y, p256_mod);
-        sp_256_mont_sub_4(y, y, t2, p256_mod);
-    }
+        sp_256_mont_dbl_sub_4(y, y, t1, p256_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_4(t1, t1, w, p256_mod);
+    sp_256_mont_tpl_4(a, t1, p256_mod);
+    /* B = X*Y^2 */
+    sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod);
+    /* X = A^2 - 2B */
+    sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_dbl_4(x, x, b, p256_mod);
+    /* Z = Z*Y */
+    sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
+    /* t2 = Y^4 */
+    sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_256_mont_sub_4(y, b, x, p256_mod);
+    sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_sub_4(y, y, t1, p256_mod);
+#endif
     /* Y = Y/2 */
     sp_256_div2_4(y, y, p256_mod);
 }
@@ -13541,16 +20288,15 @@
 /* Add two Montgomery form projective points.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_4(sp_point* r, sp_point* p, sp_point* q,
+static void sp_256_proj_point_add_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
         sp_digit* t)
 {
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*4;
     sp_digit* t3 = t + 4*4;
@@ -13563,34 +20309,39 @@
 
     /* Ensure only the first point is the same as the result. */
     if (q == r) {
-        sp_point* a = p;
+        const sp_point_256* a = p;
         p = q;
         q = a;
     }
 
     /* Check double */
-    sp_256_sub_4(t1, p256_mod, q->y);
+    (void)sp_256_sub_4(t1, p256_mod, q->y);
     sp_256_norm_4(t1);
-    if (sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
-        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) {
+    if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_4(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<4; i++)
+        for (i=0; i<4; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<4; i++)
+        }
+        for (i=0; i<4; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<4; i++)
+        }
+        for (i=0; i<4; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U1 = X1*Z2^2 */
@@ -13635,7 +20386,7 @@
  * n  Number of times to double
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_n_store_4(sp_point* r, sp_point* p,
+static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, const sp_point_256* p,
         int n, int m, sp_digit* t)
 {
     sp_digit* w = t;
@@ -13648,12 +20399,15 @@
     sp_digit* z = r[2*m].z;
     int i;
 
-    for (i=0; i<4; i++)
+    for (i=0; i<4; i++) {
         x[i] = p->x[i];
-    for (i=0; i<4; i++)
+    }
+    for (i=0; i<4; i++) {
         y[i] = p->y[i];
-    for (i=0; i<4; i++)
+    }
+    for (i=0; i<4; i++) {
         z[i] = p->z[i];
+    }
 
     /* Y = 2*Y */
     sp_256_mont_dbl_4(y, y, p256_mod);
@@ -13698,12 +20452,12 @@
  *
  * ra  Result of addition.
  * rs  Result of subtraction.
- * p   Frist point to add.
+ * p   First point to add.
  * q   Second point to add.
  * t   Temporary ordinate data.
  */
-static void sp_256_proj_point_add_sub_4(sp_point* ra, sp_point* rs,
-        sp_point* p, sp_point* q, sp_digit* t)
+static void sp_256_proj_point_add_sub_4(sp_point_256* ra, sp_point_256* rs,
+        const sp_point_256* p, const sp_point_256* q, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*4;
@@ -13773,15 +20527,15 @@
 }
 
 /* Structure used to describe recoding of scalar multiplication. */
-typedef struct ecc_recode {
+typedef struct ecc_recode_256 {
     /* Index into pre-computation table. */
     uint8_t i;
     /* Use the negative of the point. */
     uint8_t neg;
-} ecc_recode;
+} ecc_recode_256;
 
 /* The index into pre-computation table to use. */
-static uint8_t recode_index_4_6[66] = {
+static const uint8_t recode_index_4_6[66] = {
      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
     32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
@@ -13790,7 +20544,7 @@
 };
 
 /* Whether to negate y-ordinate. */
-static uint8_t recode_neg_4_6[66] = {
+static const uint8_t recode_neg_4_6[66] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
@@ -13802,9 +20556,9 @@
  * subtraction.
  *
  * k  Scalar to multiply by.
- * v  Vector of operations to peform.
- */
-static void sp_256_ecc_recode_6_4(sp_digit* k, ecc_recode* v)
+ * v  Vector of operations to perform.
+ */
+static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v)
 {
     int i, j;
     uint8_t y;
@@ -13843,7 +20597,7 @@
 }
 
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -13852,30 +20606,30 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_win_add_sub_4(sp_point* r, sp_point* g,
-        sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[33];
-    sp_point rtd, pd;
+static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* g,
+        const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td[33];
+    sp_point_256 rtd, pd;
     sp_digit tmpd[2 * 4 * 6];
 #endif
-    sp_point* t;
-    sp_point* rt;
-    sp_point* p = NULL;
+    sp_point_256* t;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* tmp;
     sp_digit* negy;
     int i;
-    ecc_recode v[43];
+    ecc_recode_256 v[43];
     int err;
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
+    err = sp_256_point_new_4(heap, rtd, rt);
     if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 33, heap, DYNAMIC_TYPE_ECC);
+        err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap,
@@ -13895,74 +20649,77 @@
         /* t[1] = {g->x, g->y, g->z} * norm */
         err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod);
+    }
 
     if (err == MP_OKAY) {
         t[1].infinity = 0;
         /* t[2] ... t[32]  */
-    sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp);
-    sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp);
-    sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp);
-    sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp);
-    sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp);
-    sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp);
-    sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[20], &t[10], tmp);
-    sp_256_proj_point_dbl_4(&t[22], &t[11], tmp);
-    sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[24], &t[12], tmp);
-    sp_256_proj_point_dbl_4(&t[26], &t[13], tmp);
-    sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[28], &t[14], tmp);
-    sp_256_proj_point_dbl_4(&t[30], &t[15], tmp);
-    sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
+        sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp);
+        sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp);
+        sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp);
+        sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp);
+        sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp);
+        sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp);
+        sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[20], &t[10], tmp);
+        sp_256_proj_point_dbl_4(&t[22], &t[11], tmp);
+        sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[24], &t[12], tmp);
+        sp_256_proj_point_dbl_4(&t[26], &t[13], tmp);
+        sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
+        sp_256_proj_point_dbl_4(&t[28], &t[14], tmp);
+        sp_256_proj_point_dbl_4(&t[30], &t[15], tmp);
+        sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
 
         negy = t[0].y;
 
         sp_256_ecc_recode_6_4(k, v);
 
         i = 42;
-        XMEMCPY(rt, &t[v[i].i], sizeof(sp_point));
+        XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256));
         for (--i; i>=0; i--) {
-            sp_256_proj_point_dbl_n_4(rt, rt, 6, tmp);
-
-            XMEMCPY(p, &t[v[i].i], sizeof(sp_point));
+            sp_256_proj_point_dbl_n_4(rt, 6, tmp);
+
+            XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256));
             sp_256_sub_4(negy, p256_mod, p->y);
             sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
             sp_256_proj_point_add_4(rt, rt, p, tmp);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_4(r, rt, tmp);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (t != NULL)
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     if (tmp != NULL)
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
 #endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
+    sp_256_point_free_4(p, 0, heap);
+    sp_256_point_free_4(rt, 0, heap);
 
     return err;
 }
 
 /* A table entry for pre-computed points. */
-typedef struct sp_table_entry {
+typedef struct sp_table_entry_256 {
     sp_digit x[4];
     sp_digit y[4];
-    byte infinity;
-} sp_table_entry;
+} sp_table_entry_256;
 
 #if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
 #endif /* FP_ECC || WOLFSSL_SP_SMALL */
@@ -13971,16 +20728,15 @@
  * Only the first point can be the same pointer as the result point.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_qz1_4(sp_point* r, sp_point* p,
-        sp_point* q, sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p,
+        const sp_point_256* q, sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*4;
     sp_digit* t3 = t + 4*4;
@@ -13992,28 +20748,33 @@
     int i;
 
     /* Check double */
-    sp_256_sub_4(t1, p256_mod, q->y);
+    (void)sp_256_sub_4(t1, p256_mod, q->y);
     sp_256_norm_4(t1);
-    if (sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
-        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) {
+    if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_4(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<4; i++)
+        for (i=0; i<4; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<4; i++)
+        }
+        for (i=0; i<4; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<4; i++)
+        }
+        for (i=0; i<4; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U2 = X2*Z1^2 */
@@ -14049,9 +20810,9 @@
  * Ordinates are in Montgomery form.
  *
  * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_4(sp_point* a, sp_digit* t)
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_4(sp_point_256* a, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2 * 4;
@@ -14071,35 +20832,40 @@
  *
  * a      The base point.
  * table  Place to store generated point data.
- * tmp    Temprorary data.
+ * tmp    Temporary data.
  * heap  Heap to use for allocation.
  */
-static int sp_256_gen_stripe_table_4(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
+static int sp_256_gen_stripe_table_4(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
     int i, j;
     int err;
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
+    err = sp_256_point_new_4(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod);
+    }
     if (err == MP_OKAY) {
         t->infinity = 0;
         sp_256_proj_to_affine_4(t, tmp);
@@ -14110,19 +20876,16 @@
         s2->infinity = 0;
 
         /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
         /* table[1] = Affine version of 'a' in Montgomery form */
         XMEMCPY(table[1].x, t->x, sizeof(table->x));
         XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
 
         for (i=1; i<8; i++) {
-            sp_256_proj_point_dbl_n_4(t, t, 32, tmp);
+            sp_256_proj_point_dbl_n_4(t, 32, tmp);
             sp_256_proj_to_affine_4(t, tmp);
             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
         }
 
         for (i=1; i<8; i++) {
@@ -14135,14 +20898,13 @@
                 sp_256_proj_to_affine_4(t, tmp);
                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
+            }
+        }
+    }
+
+    sp_256_point_free_4(s2, 0, heap);
+    sp_256_point_free_4(s1, 0, heap);
+    sp_256_point_free_4( t, 0, heap);
 
     return err;
 }
@@ -14150,7 +20912,7 @@
 #endif /* FP_ECC */
 #if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -14158,16 +20920,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_stripe_4(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
+static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
     sp_digit td[2 * 4 * 5];
 #endif
-    sp_point* rt;
-    sp_point* p = NULL;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* t;
     int i, j;
     int y, x;
@@ -14176,14 +20938,17 @@
     (void)g;
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+
+    err = sp_256_point_new_4(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
                            DYNAMIC_TYPE_ECC);
-    if (t == NULL)
+    if (t == NULL) {
         err = MEMORY_E;
+    }
 #else
     t = td;
 #endif
@@ -14193,35 +20958,40 @@
         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
 
         y = 0;
-        for (j=0,x=31; j<8; j++,x+=32)
+        for (j=0,x=31; j<8; j++,x+=32) {
             y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+        }
         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
+        rt->infinity = !y;
         for (i=30; i>=0; i--) {
             y = 0;
-            for (j=0,x=i; j<8; j++,x+=32)
+            for (j=0,x=i; j<8; j++,x+=32) {
                 y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+            }
 
             sp_256_proj_point_dbl_4(rt, rt, t);
             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
+            p->infinity = !y;
             sp_256_proj_point_add_qz1_4(rt, rt, p, t);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_4(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
+    }
+#endif
+    sp_256_point_free_4(p, 0, heap);
+    sp_256_point_free_4(rt, 0, heap);
 
     return err;
 }
@@ -14232,43 +21002,43 @@
     #define FP_ENTRIES 16
 #endif
 
-typedef struct sp_cache_t {
+typedef struct sp_cache_256_t {
     sp_digit x[4];
     sp_digit y[4];
-    sp_table_entry table[256];
+    sp_table_entry_256 table[256];
     uint32_t cnt;
     int set;
-} sp_cache_t;
-
-static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
-static THREAD_LS_T int sp_cache_last = -1;
-static THREAD_LS_T int sp_cache_inited = 0;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
 
 #ifndef HAVE_THREAD_LS
-    static volatile int initCacheMutex = 0;
-    static wolfSSL_Mutex sp_cache_lock;
-#endif
-
-static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
 {
     int i, j;
     uint32_t least;
 
-    if (sp_cache_inited == 0) {
+    if (sp_cache_256_inited == 0) {
         for (i=0; i<FP_ENTRIES; i++) {
-            sp_cache[i].set = 0;
-        }
-        sp_cache_inited = 1;
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
     }
 
     /* Compare point with those in cache. */
     for (i=0; i<FP_ENTRIES; i++) {
-        if (!sp_cache[i].set)
+        if (!sp_cache_256[i].set)
             continue;
 
-        if (sp_256_cmp_equal_4(g->x, sp_cache[i].x) & 
-                           sp_256_cmp_equal_4(g->y, sp_cache[i].y)) {
-            sp_cache[i].cnt++;
+        if (sp_256_cmp_equal_4(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_4(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
             break;
         }
     }
@@ -14276,37 +21046,37 @@
     /* No match. */
     if (i == FP_ENTRIES) {
         /* Find empty entry. */
-        i = (sp_cache_last + 1) % FP_ENTRIES;
-        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
-            if (!sp_cache[i].set) {
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
                 break;
             }
         }
 
         /* Evict least used. */
-        if (i == sp_cache_last) {
-            least = sp_cache[0].cnt;
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
             for (j=1; j<FP_ENTRIES; j++) {
-                if (sp_cache[j].cnt < least) {
+                if (sp_cache_256[j].cnt < least) {
                     i = j;
-                    least = sp_cache[i].cnt;
+                    least = sp_cache_256[i].cnt;
                 }
             }
         }
 
-        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
-        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
-        sp_cache[i].set = 1;
-        sp_cache[i].cnt = 1;
-    }
-
-    *cache = &sp_cache[i];
-    sp_cache_last = i;
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
 }
 #endif /* FP_ECC */
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -14315,32 +21085,32 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_4(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
 #ifndef FP_ECC
     return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
 #else
     sp_digit tmp[2 * 4 * 5];
-    sp_cache_t* cache;
+    sp_cache_256_t* cache;
     int err = MP_OKAY;
 
 #ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
        err = BAD_MUTEX_E;
 #endif /* HAVE_THREAD_LS */
 
     if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
+        sp_ecc_get_cache_256(g, &cache);
         if (cache->cnt == 2)
             sp_256_gen_stripe_table_4(g, cache->table, tmp, heap);
 
 #ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
+        wc_UnLockMutex(&sp_cache_256_lock);
 #endif /* HAVE_THREAD_LS */
 
         if (cache->cnt < 2) {
@@ -14357,7 +21127,7 @@
 }
 
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * p     Point to multiply.
@@ -14369,21 +21139,19 @@
 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
         void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[4];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
+
+    err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -14394,1565 +21162,1306 @@
         sp_256_from_mp(k, 4, km);
         sp_256_point_from_ecc_point_4(point, gm);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(point, point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_4(point, point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_4(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-
-#ifdef WOLFSSL_SP_SMALL
-static sp_table_entry p256_table[256] = {
+    }
+#endif
+    sp_256_point_free_4(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[256] = {
     /* 0 */
     { { 0x00, 0x00, 0x00, 0x00 },
-      { 0x00, 0x00, 0x00, 0x00 },
-      1 },
+      { 0x00, 0x00, 0x00, 0x00 } },
     /* 1 */
-    { { 0x79e730d418a9143cl,0x75ba95fc5fedb601l,0x79fb732b77622510l,
-        0x18905f76a53755c6l },
-      { 0xddf25357ce95560al,0x8b4ab8e4ba19e45cl,0xd2e88688dd21f325l,
-        0x8571ff1825885d85l },
-      0 },
+    { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L,
+        0x18905f76a53755c6L },
+      { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L,
+        0x8571ff1825885d85L } },
     /* 2 */
-    { { 0x202886024147519al,0xd0981eac26b372f0l,0xa9d4a7caa785ebc8l,
-        0xd953c50ddbdf58e9l },
-      { 0x9d6361ccfd590f8fl,0x72e9626b44e6c917l,0x7fd9611022eb64cfl,
-        0x863ebb7e9eb288f3l },
-      0 },
+    { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L,
+        0xd953c50ddbdf58e9L },
+      { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL,
+        0x863ebb7e9eb288f3L } },
     /* 3 */
-    { { 0x7856b6235cdb6485l,0x808f0ea22f0a2f97l,0x3e68d9544f7e300bl,
-        0x00076055b5ff80a0l },
-      { 0x7634eb9b838d2010l,0x54014fbb3243708al,0xe0e47d39842a6606l,
-        0x8308776134373ee0l },
-      0 },
+    { { 0x7856b6235cdb6485L,0x808f0ea22f0a2f97L,0x3e68d9544f7e300bL,
+        0x00076055b5ff80a0L },
+      { 0x7634eb9b838d2010L,0x54014fbb3243708aL,0xe0e47d39842a6606L,
+        0x8308776134373ee0L } },
     /* 4 */
-    { { 0x4f922fc516a0d2bbl,0x0d5cc16c1a623499l,0x9241cf3a57c62c8bl,
-        0x2f5e6961fd1b667fl },
-      { 0x5c15c70bf5a01797l,0x3d20b44d60956192l,0x04911b37071fdb52l,
-        0xf648f9168d6f0f7bl },
-      0 },
+    { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL,
+        0x2f5e6961fd1b667fL },
+      { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L,
+        0xf648f9168d6f0f7bL } },
     /* 5 */
-    { { 0x9e566847e137bbbcl,0xe434469e8a6a0becl,0xb1c4276179d73463l,
-        0x5abe0285133d0015l },
-      { 0x92aa837cc04c7dabl,0x573d9f4c43260c07l,0x0c93156278e6cc37l,
-        0x94bb725b6b6f7383l },
-      0 },
+    { { 0x9e566847e137bbbcL,0xe434469e8a6a0becL,0xb1c4276179d73463L,
+        0x5abe0285133d0015L },
+      { 0x92aa837cc04c7dabL,0x573d9f4c43260c07L,0x0c93156278e6cc37L,
+        0x94bb725b6b6f7383L } },
     /* 6 */
-    { { 0xbbf9b48f720f141cl,0x6199b3cd2df5bc74l,0xdc3f6129411045c4l,
-        0xcdd6bbcb2f7dc4efl },
-      { 0xcca6700beaf436fdl,0x6f647f6db99326bel,0x0c0fa792014f2522l,
-        0xa361bebd4bdae5f6l },
-      0 },
+    { { 0xbbf9b48f720f141cL,0x6199b3cd2df5bc74L,0xdc3f6129411045c4L,
+        0xcdd6bbcb2f7dc4efL },
+      { 0xcca6700beaf436fdL,0x6f647f6db99326beL,0x0c0fa792014f2522L,
+        0xa361bebd4bdae5f6L } },
     /* 7 */
-    { { 0x28aa2558597c13c7l,0xc38d635f50b7c3e1l,0x07039aecf3c09d1dl,
-        0xba12ca09c4b5292cl },
-      { 0x9e408fa459f91dfdl,0x3af43b66ceea07fbl,0x1eceb0899d780b29l,
-        0x53ebb99d701fef4bl },
-      0 },
+    { { 0x28aa2558597c13c7L,0xc38d635f50b7c3e1L,0x07039aecf3c09d1dL,
+        0xba12ca09c4b5292cL },
+      { 0x9e408fa459f91dfdL,0x3af43b66ceea07fbL,0x1eceb0899d780b29L,
+        0x53ebb99d701fef4bL } },
     /* 8 */
-    { { 0x4fe7ee31b0e63d34l,0xf4600572a9e54fabl,0xc0493334d5e7b5a4l,
-        0x8589fb9206d54831l },
-      { 0xaa70f5cc6583553al,0x0879094ae25649e5l,0xcc90450710044652l,
-        0xebb0696d02541c4fl },
-      0 },
+    { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L,
+        0x8589fb9206d54831L },
+      { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L,
+        0xebb0696d02541c4fL } },
     /* 9 */
-    { { 0x4616ca15ac1647c5l,0xb8127d47c4cf5799l,0xdc666aa3764dfbacl,
-        0xeb2820cbd1b27da3l },
-      { 0x9406f8d86a87e008l,0xd87dfa9d922378f3l,0x56ed2e4280ccecb2l,
-        0x1f28289b55a7da1dl },
-      0 },
+    { { 0x4616ca15ac1647c5L,0xb8127d47c4cf5799L,0xdc666aa3764dfbacL,
+        0xeb2820cbd1b27da3L },
+      { 0x9406f8d86a87e008L,0xd87dfa9d922378f3L,0x56ed2e4280ccecb2L,
+        0x1f28289b55a7da1dL } },
     /* 10 */
-    { { 0xabbaa0c03b89da99l,0xa6f2d79eb8284022l,0x27847862b81c05e8l,
-        0x337a4b5905e54d63l },
-      { 0x3c67500d21f7794al,0x207005b77d6d7f61l,0x0a5a378104cfd6e8l,
-        0x0d65e0d5f4c2fbd6l },
-      0 },
+    { { 0xabbaa0c03b89da99L,0xa6f2d79eb8284022L,0x27847862b81c05e8L,
+        0x337a4b5905e54d63L },
+      { 0x3c67500d21f7794aL,0x207005b77d6d7f61L,0x0a5a378104cfd6e8L,
+        0x0d65e0d5f4c2fbd6L } },
     /* 11 */
-    { { 0xd9d09bbeb5275d38l,0x4268a7450be0a358l,0xf0762ff4973eb265l,
-        0xc23da24252f4a232l },
-      { 0x5da1b84f0b94520cl,0x09666763b05bd78el,0x3a4dcb8694d29ea1l,
-        0x19de3b8cc790cff1l },
-      0 },
+    { { 0xd9d09bbeb5275d38L,0x4268a7450be0a358L,0xf0762ff4973eb265L,
+        0xc23da24252f4a232L },
+      { 0x5da1b84f0b94520cL,0x09666763b05bd78eL,0x3a4dcb8694d29ea1L,
+        0x19de3b8cc790cff1L } },
     /* 12 */
-    { { 0x183a716c26c5fe04l,0x3b28de0b3bba1bdbl,0x7432c586a4cb712cl,
-        0xe34dcbd491fccbfdl },
-      { 0xb408d46baaa58403l,0x9a69748682e97a53l,0x9e39012736aaa8afl,
-        0xe7641f447b4e0f7fl },
-      0 },
+    { { 0x183a716c26c5fe04L,0x3b28de0b3bba1bdbL,0x7432c586a4cb712cL,
+        0xe34dcbd491fccbfdL },
+      { 0xb408d46baaa58403L,0x9a69748682e97a53L,0x9e39012736aaa8afL,
+        0xe7641f447b4e0f7fL } },
     /* 13 */
-    { { 0x7d753941df64ba59l,0xd33f10ec0b0242fcl,0x4f06dfc6a1581859l,
-        0x4a12df57052a57bfl },
-      { 0xbfa6338f9439dbd0l,0xd3c24bd4bde53e1fl,0xfd5e4ffa21f1b314l,
-        0x6af5aa93bb5bea46l },
-      0 },
+    { { 0x7d753941df64ba59L,0xd33f10ec0b0242fcL,0x4f06dfc6a1581859L,
+        0x4a12df57052a57bfL },
+      { 0xbfa6338f9439dbd0L,0xd3c24bd4bde53e1fL,0xfd5e4ffa21f1b314L,
+        0x6af5aa93bb5bea46L } },
     /* 14 */
-    { { 0xda10b69910c91999l,0x0a24b4402a580491l,0x3e0094b4b8cc2090l,
-        0x5fe3475a66a44013l },
-      { 0xb0f8cabdf93e7b4bl,0x292b501a7c23f91al,0x42e889aecd1e6263l,
-        0xb544e308ecfea916l },
-      0 },
+    { { 0xda10b69910c91999L,0x0a24b4402a580491L,0x3e0094b4b8cc2090L,
+        0x5fe3475a66a44013L },
+      { 0xb0f8cabdf93e7b4bL,0x292b501a7c23f91aL,0x42e889aecd1e6263L,
+        0xb544e308ecfea916L } },
     /* 15 */
-    { { 0x6478c6e916ddfdcel,0x2c329166f89179e6l,0x4e8d6e764d4e67e1l,
-        0xe0b6b2bda6b0c20bl },
-      { 0x0d312df2bb7efb57l,0x1aac0dde790c4007l,0xf90336ad679bc944l,
-        0x71c023de25a63774l },
-      0 },
+    { { 0x6478c6e916ddfdceL,0x2c329166f89179e6L,0x4e8d6e764d4e67e1L,
+        0xe0b6b2bda6b0c20bL },
+      { 0x0d312df2bb7efb57L,0x1aac0dde790c4007L,0xf90336ad679bc944L,
+        0x71c023de25a63774L } },
     /* 16 */
-    { { 0x62a8c244bfe20925l,0x91c19ac38fdce867l,0x5a96a5d5dd387063l,
-        0x61d587d421d324f6l },
-      { 0xe87673a2a37173eal,0x2384800853778b65l,0x10f8441e05bab43el,
-        0xfa11fe124621efbel },
-      0 },
+    { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L,
+        0x61d587d421d324f6L },
+      { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL,
+        0xfa11fe124621efbeL } },
     /* 17 */
-    { { 0x1c891f2b2cb19ffdl,0x01ba8d5bb1923c23l,0xb6d03d678ac5ca8el,
-        0x586eb04c1f13bedcl },
-      { 0x0c35c6e527e8ed09l,0x1e81a33c1819ede2l,0x278fd6c056c652fal,
-        0x19d5ac0870864f11l },
-      0 },
+    { { 0x1c891f2b2cb19ffdL,0x01ba8d5bb1923c23L,0xb6d03d678ac5ca8eL,
+        0x586eb04c1f13bedcL },
+      { 0x0c35c6e527e8ed09L,0x1e81a33c1819ede2L,0x278fd6c056c652faL,
+        0x19d5ac0870864f11L } },
     /* 18 */
-    { { 0x1e99f581309a4e1fl,0xab7de71be9270074l,0x26a5ef0befd28d20l,
-        0xe7c0073f7f9c563fl },
-      { 0x1f6d663a0ef59f76l,0x669b3b5420fcb050l,0xc08c1f7a7a6602d4l,
-        0xe08504fec65b3c0al },
-      0 },
+    { { 0x1e99f581309a4e1fL,0xab7de71be9270074L,0x26a5ef0befd28d20L,
+        0xe7c0073f7f9c563fL },
+      { 0x1f6d663a0ef59f76L,0x669b3b5420fcb050L,0xc08c1f7a7a6602d4L,
+        0xe08504fec65b3c0aL } },
     /* 19 */
-    { { 0xf098f68da031b3cal,0x6d1cab9ee6da6d66l,0x5bfd81fa94f246e8l,
-        0x78f018825b0996b4l },
-      { 0xb7eefde43a25787fl,0x8016f80d1dccac9bl,0x0cea4877b35bfc36l,
-        0x43a773b87e94747al },
-      0 },
+    { { 0xf098f68da031b3caL,0x6d1cab9ee6da6d66L,0x5bfd81fa94f246e8L,
+        0x78f018825b0996b4L },
+      { 0xb7eefde43a25787fL,0x8016f80d1dccac9bL,0x0cea4877b35bfc36L,
+        0x43a773b87e94747aL } },
     /* 20 */
-    { { 0x62577734d2b533d5l,0x673b8af6a1bdddc0l,0x577e7c9aa79ec293l,
-        0xbb6de651c3b266b1l },
-      { 0xe7e9303ab65259b3l,0xd6a0afd3d03a7480l,0xc5ac83d19b3cfc27l,
-        0x60b4619a5d18b99bl },
-      0 },
+    { { 0x62577734d2b533d5L,0x673b8af6a1bdddc0L,0x577e7c9aa79ec293L,
+        0xbb6de651c3b266b1L },
+      { 0xe7e9303ab65259b3L,0xd6a0afd3d03a7480L,0xc5ac83d19b3cfc27L,
+        0x60b4619a5d18b99bL } },
     /* 21 */
-    { { 0xbd6a38e11ae5aa1cl,0xb8b7652b49e73658l,0x0b130014ee5f87edl,
-        0x9d0f27b2aeebffcdl },
-      { 0xca9246317a730a55l,0x9c955b2fddbbc83al,0x07c1dfe0ac019a71l,
-        0x244a566d356ec48dl },
-      0 },
+    { { 0xbd6a38e11ae5aa1cL,0xb8b7652b49e73658L,0x0b130014ee5f87edL,
+        0x9d0f27b2aeebffcdL },
+      { 0xca9246317a730a55L,0x9c955b2fddbbc83aL,0x07c1dfe0ac019a71L,
+        0x244a566d356ec48dL } },
     /* 22 */
-    { { 0x6db0394aeacf1f96l,0x9f2122a9024c271cl,0x2626ac1b82cbd3b9l,
-        0x45e58c873581ef69l },
-      { 0xd3ff479da38f9dbcl,0xa8aaf146e888a040l,0x945adfb246e0bed7l,
-        0xc040e21cc1e4b7a4l },
-      0 },
+    { { 0x6db0394aeacf1f96L,0x9f2122a9024c271cL,0x2626ac1b82cbd3b9L,
+        0x45e58c873581ef69L },
+      { 0xd3ff479da38f9dbcL,0xa8aaf146e888a040L,0x945adfb246e0bed7L,
+        0xc040e21cc1e4b7a4L } },
     /* 23 */
-    { { 0x847af0006f8117b6l,0x651969ff73a35433l,0x482b35761d9475ebl,
-        0x1cdf5c97682c6ec7l },
-      { 0x7db775b411f04839l,0x7dbeacf448de1698l,0xb2921dd1b70b3219l,
-        0x046755f8a92dff3dl },
-      0 },
+    { { 0x847af0006f8117b6L,0x651969ff73a35433L,0x482b35761d9475ebL,
+        0x1cdf5c97682c6ec7L },
+      { 0x7db775b411f04839L,0x7dbeacf448de1698L,0xb2921dd1b70b3219L,
+        0x046755f8a92dff3dL } },
     /* 24 */
-    { { 0xcc8ac5d2bce8ffcdl,0x0d53c48b2fe61a82l,0xf6f161727202d6c7l,
-        0x046e5e113b83a5f3l },
-      { 0xe7b8ff64d8007f01l,0x7fb1ef125af43183l,0x045c5ea635e1a03cl,
-        0x6e0106c3303d005bl },
-      0 },
+    { { 0xcc8ac5d2bce8ffcdL,0x0d53c48b2fe61a82L,0xf6f161727202d6c7L,
+        0x046e5e113b83a5f3L },
+      { 0xe7b8ff64d8007f01L,0x7fb1ef125af43183L,0x045c5ea635e1a03cL,
+        0x6e0106c3303d005bL } },
     /* 25 */
-    { { 0x48c7358488dd73b1l,0x7670708f995ed0d9l,0x38385ea8c56a2ab7l,
-        0x442594ede901cf1fl },
-      { 0xf8faa2c912d4b65bl,0x94c2343b96c90c37l,0xd326e4a15e978d1fl,
-        0xa796fa514c2ee68el },
-      0 },
+    { { 0x48c7358488dd73b1L,0x7670708f995ed0d9L,0x38385ea8c56a2ab7L,
+        0x442594ede901cf1fL },
+      { 0xf8faa2c912d4b65bL,0x94c2343b96c90c37L,0xd326e4a15e978d1fL,
+        0xa796fa514c2ee68eL } },
     /* 26 */
-    { { 0x359fb604823addd7l,0x9e2a6183e56693b3l,0xf885b78e3cbf3c80l,
-        0xe4ad2da9c69766e9l },
-      { 0x357f7f428e048a61l,0x082d198cc092d9a0l,0xfc3a1af4c03ed8efl,
-        0xc5e94046c37b5143l },
-      0 },
+    { { 0x359fb604823addd7L,0x9e2a6183e56693b3L,0xf885b78e3cbf3c80L,
+        0xe4ad2da9c69766e9L },
+      { 0x357f7f428e048a61L,0x082d198cc092d9a0L,0xfc3a1af4c03ed8efL,
+        0xc5e94046c37b5143L } },
     /* 27 */
-    { { 0x476a538c2be75f9el,0x6fd1a9e8cb123a78l,0xd85e4df0b109c04bl,
-        0x63283dafdb464747l },
-      { 0xce728cf7baf2df15l,0xe592c4550ad9a7f4l,0xfab226ade834bcc3l,
-        0x68bd19ab1981a938l },
-      0 },
+    { { 0x476a538c2be75f9eL,0x6fd1a9e8cb123a78L,0xd85e4df0b109c04bL,
+        0x63283dafdb464747L },
+      { 0xce728cf7baf2df15L,0xe592c4550ad9a7f4L,0xfab226ade834bcc3L,
+        0x68bd19ab1981a938L } },
     /* 28 */
-    { { 0xc08ead511887d659l,0x3374d5f4b359305al,0x96986981cfe74fe3l,
-        0x495292f53c6fdfd6l },
-      { 0x4a878c9e1acec896l,0xd964b210ec5b4484l,0x6696f7e2664d60a7l,
-        0x0ec7530d26036837l },
-      0 },
+    { { 0xc08ead511887d659L,0x3374d5f4b359305aL,0x96986981cfe74fe3L,
+        0x495292f53c6fdfd6L },
+      { 0x4a878c9e1acec896L,0xd964b210ec5b4484L,0x6696f7e2664d60a7L,
+        0x0ec7530d26036837L } },
     /* 29 */
-    { { 0x2da13a05ad2687bbl,0xa1f83b6af32e21fal,0x390f5ef51dd4607bl,
-        0x0f6207a664863f0bl },
-      { 0xbd67e3bb0f138233l,0xdd66b96c272aa718l,0x8ed0040726ec88ael,
-        0xff0db07208ed6dcfl },
-      0 },
+    { { 0x2da13a05ad2687bbL,0xa1f83b6af32e21faL,0x390f5ef51dd4607bL,
+        0x0f6207a664863f0bL },
+      { 0xbd67e3bb0f138233L,0xdd66b96c272aa718L,0x8ed0040726ec88aeL,
+        0xff0db07208ed6dcfL } },
     /* 30 */
-    { { 0x749fa1014c95d553l,0xa44052fd5d680a8al,0x183b4317ff3b566fl,
-        0x313b513c88740ea3l },
-      { 0xb402e2ac08d11549l,0x071ee10bb4dee21cl,0x26b987dd47f2320el,
-        0x2d3abcf986f19f81l },
-      0 },
+    { { 0x749fa1014c95d553L,0xa44052fd5d680a8aL,0x183b4317ff3b566fL,
+        0x313b513c88740ea3L },
+      { 0xb402e2ac08d11549L,0x071ee10bb4dee21cL,0x26b987dd47f2320eL,
+        0x2d3abcf986f19f81L } },
     /* 31 */
-    { { 0x4c288501815581a2l,0x9a0a6d56632211afl,0x19ba7a0f0cab2e99l,
-        0xc036fa10ded98cdfl },
-      { 0x29ae08bac1fbd009l,0x0b68b19006d15816l,0xc2eb32779b9e0d8fl,
-        0xa6b2a2c4b6d40194l },
-      0 },
+    { { 0x4c288501815581a2L,0x9a0a6d56632211afL,0x19ba7a0f0cab2e99L,
+        0xc036fa10ded98cdfL },
+      { 0x29ae08bac1fbd009L,0x0b68b19006d15816L,0xc2eb32779b9e0d8fL,
+        0xa6b2a2c4b6d40194L } },
     /* 32 */
-    { { 0xd433e50f6d3549cfl,0x6f33696ffacd665el,0x695bfdacce11fcb4l,
-        0x810ee252af7c9860l },
-      { 0x65450fe17159bb2cl,0xf7dfbebe758b357bl,0x2b057e74d69fea72l,
-        0xd485717a92731745l },
-      0 },
+    { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L,
+        0x810ee252af7c9860L },
+      { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L,
+        0xd485717a92731745L } },
     /* 33 */
-    { { 0x11741a8af0cb5a98l,0xd3da8f931f3110bfl,0x1994e2cbab382adfl,
-        0x6a6045a72f9a604el },
-      { 0x170c0d3fa2b2411dl,0xbe0eb83e510e96e0l,0x3bcc9f738865b3ccl,
-        0xd3e45cfaf9e15790l },
-      0 },
+    { { 0x11741a8af0cb5a98L,0xd3da8f931f3110bfL,0x1994e2cbab382adfL,
+        0x6a6045a72f9a604eL },
+      { 0x170c0d3fa2b2411dL,0xbe0eb83e510e96e0L,0x3bcc9f738865b3ccL,
+        0xd3e45cfaf9e15790L } },
     /* 34 */
-    { { 0xce1f69bbe83f7669l,0x09f8ae8272877d6bl,0x9548ae543244278dl,
-        0x207755dee3c2c19cl },
-      { 0x87bd61d96fef1945l,0x18813cefb12d28c3l,0x9fbcd1d672df64aal,
-        0x48dc5ee57154b00dl },
-      0 },
+    { { 0xce1f69bbe83f7669L,0x09f8ae8272877d6bL,0x9548ae543244278dL,
+        0x207755dee3c2c19cL },
+      { 0x87bd61d96fef1945L,0x18813cefb12d28c3L,0x9fbcd1d672df64aaL,
+        0x48dc5ee57154b00dL } },
     /* 35 */
-    { { 0x123790bff7e5a199l,0xe0efb8cf989ccbb7l,0xc27a2bfe0a519c79l,
-        0xf2fb0aeddff6f445l },
-      { 0x41c09575f0b5025fl,0x550543d740fa9f22l,0x8fa3c8ad380bfbd0l,
-        0xa13e9015db28d525l },
-      0 },
+    { { 0x123790bff7e5a199L,0xe0efb8cf989ccbb7L,0xc27a2bfe0a519c79L,
+        0xf2fb0aeddff6f445L },
+      { 0x41c09575f0b5025fL,0x550543d740fa9f22L,0x8fa3c8ad380bfbd0L,
+        0xa13e9015db28d525L } },
     /* 36 */
-    { { 0xf9f7a350a2b65cbcl,0x0b04b9722a464226l,0x265ce241e23f07a1l,
-        0x2bf0d6b01497526fl },
-      { 0xd3d4dd3f4b216fb7l,0xf7d7b867fbdda26al,0xaeb7b83f6708505cl,
-        0x42a94a5a162fe89fl },
-      0 },
+    { { 0xf9f7a350a2b65cbcL,0x0b04b9722a464226L,0x265ce241e23f07a1L,
+        0x2bf0d6b01497526fL },
+      { 0xd3d4dd3f4b216fb7L,0xf7d7b867fbdda26aL,0xaeb7b83f6708505cL,
+        0x42a94a5a162fe89fL } },
     /* 37 */
-    { { 0x5846ad0beaadf191l,0x0f8a489025a268d7l,0xe8603050494dc1f6l,
-        0x2c2dd969c65ede3dl },
-      { 0x6d02171d93849c17l,0x460488ba1da250ddl,0x4810c7063c3a5485l,
-        0xf437fa1f42c56dbcl },
-      0 },
+    { { 0x5846ad0beaadf191L,0x0f8a489025a268d7L,0xe8603050494dc1f6L,
+        0x2c2dd969c65ede3dL },
+      { 0x6d02171d93849c17L,0x460488ba1da250ddL,0x4810c7063c3a5485L,
+        0xf437fa1f42c56dbcL } },
     /* 38 */
-    { { 0x6aa0d7144a0f7dabl,0x0f0497931776e9acl,0x52c0a050f5f39786l,
-        0xaaf45b3354707aa8l },
-      { 0x85e37c33c18d364al,0xd40b9b063e497165l,0xf417168115ec5444l,
-        0xcdf6310df4f272bcl },
-      0 },
+    { { 0x6aa0d7144a0f7dabL,0x0f0497931776e9acL,0x52c0a050f5f39786L,
+        0xaaf45b3354707aa8L },
+      { 0x85e37c33c18d364aL,0xd40b9b063e497165L,0xf417168115ec5444L,
+        0xcdf6310df4f272bcL } },
     /* 39 */
-    { { 0x7473c6238ea8b7efl,0x08e9351885bc2287l,0x419567722bda8e34l,
-        0xf0d008bada9e2ff2l },
-      { 0x2912671d2414d3b1l,0xb3754985b019ea76l,0x5c61b96d453bcbdbl,
-        0x5bd5c2f5ca887b8bl },
-      0 },
+    { { 0x7473c6238ea8b7efL,0x08e9351885bc2287L,0x419567722bda8e34L,
+        0xf0d008bada9e2ff2L },
+      { 0x2912671d2414d3b1L,0xb3754985b019ea76L,0x5c61b96d453bcbdbL,
+        0x5bd5c2f5ca887b8bL } },
     /* 40 */
-    { { 0xef0f469ef49a3154l,0x3e85a5956e2b2e9al,0x45aaec1eaa924a9cl,
-        0xaa12dfc8a09e4719l },
-      { 0x26f272274df69f1dl,0xe0e4c82ca2ff5e73l,0xb9d8ce73b7a9dd44l,
-        0x6c036e73e48ca901l },
-      0 },
+    { { 0xef0f469ef49a3154L,0x3e85a5956e2b2e9aL,0x45aaec1eaa924a9cL,
+        0xaa12dfc8a09e4719L },
+      { 0x26f272274df69f1dL,0xe0e4c82ca2ff5e73L,0xb9d8ce73b7a9dd44L,
+        0x6c036e73e48ca901L } },
     /* 41 */
-    { { 0x5cfae12a0f6e3138l,0x6966ef0025ad345al,0x8993c64b45672bc5l,
-        0x292ff65896afbe24l },
-      { 0xd5250d445e213402l,0xf6580e274392c9fel,0x097b397fda1c72e8l,
-        0x644e0c90311b7276l },
-      0 },
+    { { 0x5cfae12a0f6e3138L,0x6966ef0025ad345aL,0x8993c64b45672bc5L,
+        0x292ff65896afbe24L },
+      { 0xd5250d445e213402L,0xf6580e274392c9feL,0x097b397fda1c72e8L,
+        0x644e0c90311b7276L } },
     /* 42 */
-    { { 0xe1e421e1a47153f0l,0xb86c3b79920418c9l,0x93bdce87705d7672l,
-        0xf25ae793cab79a77l },
-      { 0x1f3194a36d869d0cl,0x9d55c8824986c264l,0x49fb5ea3096e945el,
-        0x39b8e65313db0a3el },
-      0 },
+    { { 0xe1e421e1a47153f0L,0xb86c3b79920418c9L,0x93bdce87705d7672L,
+        0xf25ae793cab79a77L },
+      { 0x1f3194a36d869d0cL,0x9d55c8824986c264L,0x49fb5ea3096e945eL,
+        0x39b8e65313db0a3eL } },
     /* 43 */
-    { { 0x37754200b6fd2e59l,0x35e2c0669255c98fl,0xd9dab21a0e2a5739l,
-        0x39122f2f0f19db06l },
-      { 0xcfbce1e003cad53cl,0x225b2c0fe65c17e3l,0x72baf1d29aa13877l,
-        0x8de80af8ce80ff8dl },
-      0 },
+    { { 0x37754200b6fd2e59L,0x35e2c0669255c98fL,0xd9dab21a0e2a5739L,
+        0x39122f2f0f19db06L },
+      { 0xcfbce1e003cad53cL,0x225b2c0fe65c17e3L,0x72baf1d29aa13877L,
+        0x8de80af8ce80ff8dL } },
     /* 44 */
-    { { 0xafbea8d9207bbb76l,0x921c7e7c21782758l,0xdfa2b74b1c0436b1l,
-        0x871949062e368c04l },
-      { 0xb5f928bba3993df5l,0x639d75b5f3b3d26al,0x011aa78a85b55050l,
-        0xfc315e6a5b74fde1l },
-      0 },
+    { { 0xafbea8d9207bbb76L,0x921c7e7c21782758L,0xdfa2b74b1c0436b1L,
+        0x871949062e368c04L },
+      { 0xb5f928bba3993df5L,0x639d75b5f3b3d26aL,0x011aa78a85b55050L,
+        0xfc315e6a5b74fde1L } },
     /* 45 */
-    { { 0x561fd41ae8d6ecfal,0x5f8c44f61aec7f86l,0x98452a7b4924741dl,
-        0xe6d4a7adee389088l },
-      { 0x60552ed14593c75dl,0x70a70da4dd271162l,0xd2aede937ba2c7dbl,
-        0x35dfaf9a9be2ae57l },
-      0 },
+    { { 0x561fd41ae8d6ecfaL,0x5f8c44f61aec7f86L,0x98452a7b4924741dL,
+        0xe6d4a7adee389088L },
+      { 0x60552ed14593c75dL,0x70a70da4dd271162L,0xd2aede937ba2c7dbL,
+        0x35dfaf9a9be2ae57L } },
     /* 46 */
-    { { 0x6b956fcdaa736636l,0x09f51d97ae2cab7el,0xfb10bf410f349966l,
-        0x1da5c7d71c830d2bl },
-      { 0x5c41e4833cce6825l,0x15ad118ff9573c3bl,0xa28552c7f23036b8l,
-        0x7077c0fddbf4b9d6l },
-      0 },
+    { { 0x6b956fcdaa736636L,0x09f51d97ae2cab7eL,0xfb10bf410f349966L,
+        0x1da5c7d71c830d2bL },
+      { 0x5c41e4833cce6825L,0x15ad118ff9573c3bL,0xa28552c7f23036b8L,
+        0x7077c0fddbf4b9d6L } },
     /* 47 */
-    { { 0xbf63ff8d46b9661cl,0xa1dfd36b0d2cfd71l,0x0373e140a847f8f7l,
-        0x53a8632ee50efe44l },
-      { 0x0976ff68696d8051l,0xdaec0c95c74f468al,0x62994dc35e4e26bdl,
-        0x028ca76d34e1fcc1l },
-      0 },
+    { { 0xbf63ff8d46b9661cL,0xa1dfd36b0d2cfd71L,0x0373e140a847f8f7L,
+        0x53a8632ee50efe44L },
+      { 0x0976ff68696d8051L,0xdaec0c95c74f468aL,0x62994dc35e4e26bdL,
+        0x028ca76d34e1fcc1L } },
     /* 48 */
-    { { 0xd11d47dcfc9877eel,0xc8b36210801d0002l,0xd002c11754c260b6l,
-        0x04c17cd86962f046l },
-      { 0x6d9bd094b0daddf5l,0xbea2357524ce55c0l,0x663356e672da03b5l,
-        0xf7ba4de9fed97474l },
-      0 },
+    { { 0xd11d47dcfc9877eeL,0xc8b36210801d0002L,0xd002c11754c260b6L,
+        0x04c17cd86962f046L },
+      { 0x6d9bd094b0daddf5L,0xbea2357524ce55c0L,0x663356e672da03b5L,
+        0xf7ba4de9fed97474L } },
     /* 49 */
-    { { 0xd0dbfa34ebe1263fl,0x5576373571ae7ce6l,0xd244055382a6f523l,
-        0xe31f960052131c41l },
-      { 0xd1bb9216ea6b6ec6l,0x37a1d12e73c2fc44l,0xc10e7eac89d0a294l,
-        0xaa3a6259ce34d47bl },
-      0 },
+    { { 0xd0dbfa34ebe1263fL,0x5576373571ae7ce6L,0xd244055382a6f523L,
+        0xe31f960052131c41L },
+      { 0xd1bb9216ea6b6ec6L,0x37a1d12e73c2fc44L,0xc10e7eac89d0a294L,
+        0xaa3a6259ce34d47bL } },
     /* 50 */
-    { { 0xfbcf9df536f3dcd3l,0x6ceded50d2bf7360l,0x491710fadf504f5bl,
-        0x2398dd627e79daeel },
-      { 0xcf4705a36d09569el,0xea0619bb5149f769l,0xff9c037735f6034cl,
-        0x5717f5b21c046210l },
-      0 },
+    { { 0xfbcf9df536f3dcd3L,0x6ceded50d2bf7360L,0x491710fadf504f5bL,
+        0x2398dd627e79daeeL },
+      { 0xcf4705a36d09569eL,0xea0619bb5149f769L,0xff9c037735f6034cL,
+        0x5717f5b21c046210L } },
     /* 51 */
-    { { 0x9fe229c921dd895el,0x8e51850040c28451l,0xfa13d2391d637ecdl,
-        0x660a2c560e3c28del },
-      { 0x9cca88aed67fcbd0l,0xc84724780ea9f096l,0x32b2f48172e92b4dl,
-        0x624ee54c4f522453l },
-      0 },
+    { { 0x9fe229c921dd895eL,0x8e51850040c28451L,0xfa13d2391d637ecdL,
+        0x660a2c560e3c28deL },
+      { 0x9cca88aed67fcbd0L,0xc84724780ea9f096L,0x32b2f48172e92b4dL,
+        0x624ee54c4f522453L } },
     /* 52 */
-    { { 0x09549ce4d897ecccl,0x4d49d1d93f9880aal,0x723c2423043a7c20l,
-        0x4f392afb92bdfbc0l },
-      { 0x6969f8fa7de44fd9l,0xb66cfbe457b32156l,0xdb2fa803368ebc3cl,
-        0x8a3e7977ccdb399cl },
-      0 },
+    { { 0x09549ce4d897ecccL,0x4d49d1d93f9880aaL,0x723c2423043a7c20L,
+        0x4f392afb92bdfbc0L },
+      { 0x6969f8fa7de44fd9L,0xb66cfbe457b32156L,0xdb2fa803368ebc3cL,
+        0x8a3e7977ccdb399cL } },
     /* 53 */
-    { { 0xdde1881f06c4b125l,0xae34e300f6e3ca8cl,0xef6999de5c7a13e9l,
-        0x3888d02370c24404l },
-      { 0x7628035644f91081l,0x3d9fcf615f015504l,0x1827edc8632cd36el,
-        0xa5e62e4718102336l },
-      0 },
+    { { 0xdde1881f06c4b125L,0xae34e300f6e3ca8cL,0xef6999de5c7a13e9L,
+        0x3888d02370c24404L },
+      { 0x7628035644f91081L,0x3d9fcf615f015504L,0x1827edc8632cd36eL,
+        0xa5e62e4718102336L } },
     /* 54 */
-    { { 0x1a825ee32facd6c8l,0x699c635454bcbc66l,0x0ce3edf798df9931l,
-        0x2c4768e6466a5adcl },
-      { 0xb346ff8c90a64bc9l,0x630a6020e4779f5cl,0xd949d064bc05e884l,
-        0x7b5e6441f9e652a0l },
-      0 },
+    { { 0x1a825ee32facd6c8L,0x699c635454bcbc66L,0x0ce3edf798df9931L,
+        0x2c4768e6466a5adcL },
+      { 0xb346ff8c90a64bc9L,0x630a6020e4779f5cL,0xd949d064bc05e884L,
+        0x7b5e6441f9e652a0L } },
     /* 55 */
-    { { 0x2169422c1d28444al,0xe996c5d8be136a39l,0x2387afe5fb0c7fcel,
-        0xb8af73cb0c8d744al },
-      { 0x5fde83aa338b86fdl,0xfee3f158a58a5cffl,0xc9ee8f6f20ac9433l,
-        0xa036395f7f3f0895l },
-      0 },
+    { { 0x2169422c1d28444aL,0xe996c5d8be136a39L,0x2387afe5fb0c7fceL,
+        0xb8af73cb0c8d744aL },
+      { 0x5fde83aa338b86fdL,0xfee3f158a58a5cffL,0xc9ee8f6f20ac9433L,
+        0xa036395f7f3f0895L } },
     /* 56 */
-    { { 0x8c73c6bba10f7770l,0xa6f16d81a12a0e24l,0x100df68251bc2b9fl,
-        0x4be36b01875fb533l },
-      { 0x9226086e9fb56dbbl,0x306fef8b07e7a4f8l,0xeeaccc0566d52f20l,
-        0x8cbc9a871bdc00c0l },
-      0 },
+    { { 0x8c73c6bba10f7770L,0xa6f16d81a12a0e24L,0x100df68251bc2b9fL,
+        0x4be36b01875fb533L },
+      { 0x9226086e9fb56dbbL,0x306fef8b07e7a4f8L,0xeeaccc0566d52f20L,
+        0x8cbc9a871bdc00c0L } },
     /* 57 */
-    { { 0xe131895cc0dac4abl,0xa874a440712ff112l,0x6332ae7c6a1cee57l,
-        0x44e7553e0c0835f8l },
-      { 0x6d503fff7734002dl,0x9d35cb8b0b34425cl,0x95f702760e8738b5l,
-        0x470a683a5eb8fc18l },
-      0 },
+    { { 0xe131895cc0dac4abL,0xa874a440712ff112L,0x6332ae7c6a1cee57L,
+        0x44e7553e0c0835f8L },
+      { 0x6d503fff7734002dL,0x9d35cb8b0b34425cL,0x95f702760e8738b5L,
+        0x470a683a5eb8fc18L } },
     /* 58 */
-    { { 0x81b761dc90513482l,0x0287202a01e9276al,0xcda441ee0ce73083l,
-        0x16410690c63dc6efl },
-      { 0xf5034a066d06a2edl,0xdd4d7745189b100bl,0xd914ae72ab8218c9l,
-        0xd73479fd7abcbb4fl },
-      0 },
+    { { 0x81b761dc90513482L,0x0287202a01e9276aL,0xcda441ee0ce73083L,
+        0x16410690c63dc6efL },
+      { 0xf5034a066d06a2edL,0xdd4d7745189b100bL,0xd914ae72ab8218c9L,
+        0xd73479fd7abcbb4fL } },
     /* 59 */
-    { { 0x7edefb165ad4c6e5l,0x262cf08f5b06d04dl,0x12ed5bb18575cb14l,
-        0x816469e30771666bl },
-      { 0xd7ab9d79561e291el,0xeb9daf22c1de1661l,0xf49827eb135e0513l,
-        0x0a36dd23f0dd3f9cl },
-      0 },
+    { { 0x7edefb165ad4c6e5L,0x262cf08f5b06d04dL,0x12ed5bb18575cb14L,
+        0x816469e30771666bL },
+      { 0xd7ab9d79561e291eL,0xeb9daf22c1de1661L,0xf49827eb135e0513L,
+        0x0a36dd23f0dd3f9cL } },
     /* 60 */
-    { { 0x098d32c741d5533cl,0x7c5f5a9e8684628fl,0x39a228ade349bd11l,
-        0xe331dfd6fdbab118l },
-      { 0x5100ab686bcc6ed8l,0x7160c3bdef7a260el,0x9063d9a7bce850d7l,
-        0xd3b4782a492e3389l },
-      0 },
+    { { 0x098d32c741d5533cL,0x7c5f5a9e8684628fL,0x39a228ade349bd11L,
+        0xe331dfd6fdbab118L },
+      { 0x5100ab686bcc6ed8L,0x7160c3bdef7a260eL,0x9063d9a7bce850d7L,
+        0xd3b4782a492e3389L } },
     /* 61 */
-    { { 0xa149b6e8f3821f90l,0x92edd9ed66eb7aadl,0x0bb669531a013116l,
-        0x7281275a4c86a5bdl },
-      { 0x503858f7d3ff47e5l,0x5e1616bc61016441l,0x62b0f11a7dfd9bb1l,
-        0x2c062e7ece145059l },
-      0 },
+    { { 0xa149b6e8f3821f90L,0x92edd9ed66eb7aadL,0x0bb669531a013116L,
+        0x7281275a4c86a5bdL },
+      { 0x503858f7d3ff47e5L,0x5e1616bc61016441L,0x62b0f11a7dfd9bb1L,
+        0x2c062e7ece145059L } },
     /* 62 */
-    { { 0xa76f996f0159ac2el,0x281e7736cbdb2713l,0x2ad6d28808e46047l,
-        0x282a35f92c4e7ef1l },
-      { 0x9c354b1ec0ce5cd2l,0xcf99efc91379c229l,0x992caf383e82c11el,
-        0xc71cd513554d2abdl },
-      0 },
+    { { 0xa76f996f0159ac2eL,0x281e7736cbdb2713L,0x2ad6d28808e46047L,
+        0x282a35f92c4e7ef1L },
+      { 0x9c354b1ec0ce5cd2L,0xcf99efc91379c229L,0x992caf383e82c11eL,
+        0xc71cd513554d2abdL } },
     /* 63 */
-    { { 0x4885de9c09b578f4l,0x1884e258e3affa7al,0x8f76b1b759182f1fl,
-        0xc50f6740cf47f3a3l },
-      { 0xa9c4adf3374b68eal,0xa406f32369965fe2l,0x2f86a22285a53050l,
-        0xb9ecb3a7212958dcl },
-      0 },
+    { { 0x4885de9c09b578f4L,0x1884e258e3affa7aL,0x8f76b1b759182f1fL,
+        0xc50f6740cf47f3a3L },
+      { 0xa9c4adf3374b68eaL,0xa406f32369965fe2L,0x2f86a22285a53050L,
+        0xb9ecb3a7212958dcL } },
     /* 64 */
-    { { 0x56f8410ef4f8b16al,0x97241afec47b266al,0x0a406b8e6d9c87c1l,
-        0x803f3e02cd42ab1bl },
-      { 0x7f0309a804dbec69l,0xa83b85f73bbad05fl,0xc6097273ad8e197fl,
-        0xc097440e5067adc1l },
-      0 },
+    { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L,
+        0x803f3e02cd42ab1bL },
+      { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL,
+        0xc097440e5067adc1L } },
     /* 65 */
-    { { 0x846a56f2c379ab34l,0xa8ee068b841df8d1l,0x20314459176c68efl,
-        0xf1af32d5915f1f30l },
-      { 0x99c375315d75bd50l,0x837cffbaf72f67bcl,0x0613a41848d7723fl,
-        0x23d0f130e2d41c8bl },
-      0 },
+    { { 0x846a56f2c379ab34L,0xa8ee068b841df8d1L,0x20314459176c68efL,
+        0xf1af32d5915f1f30L },
+      { 0x99c375315d75bd50L,0x837cffbaf72f67bcL,0x0613a41848d7723fL,
+        0x23d0f130e2d41c8bL } },
     /* 66 */
-    { { 0x857ab6edf41500d9l,0x0d890ae5fcbeada8l,0x52fe864889725951l,
-        0xb0288dd6c0a3faddl },
-      { 0x85320f30650bcb08l,0x71af6313695d6e16l,0x31f520a7b989aa76l,
-        0xffd3724ff408c8d2l },
-      0 },
+    { { 0x857ab6edf41500d9L,0x0d890ae5fcbeada8L,0x52fe864889725951L,
+        0xb0288dd6c0a3faddL },
+      { 0x85320f30650bcb08L,0x71af6313695d6e16L,0x31f520a7b989aa76L,
+        0xffd3724ff408c8d2L } },
     /* 67 */
-    { { 0x53968e64b458e6cbl,0x992dad20317a5d28l,0x3814ae0b7aa75f56l,
-        0xf5590f4ad78c26dfl },
-      { 0x0fc24bd3cf0ba55al,0x0fc4724a0c778bael,0x1ce9864f683b674al,
-        0x18d6da54f6f74a20l },
-      0 },
+    { { 0x53968e64b458e6cbL,0x992dad20317a5d28L,0x3814ae0b7aa75f56L,
+        0xf5590f4ad78c26dfL },
+      { 0x0fc24bd3cf0ba55aL,0x0fc4724a0c778baeL,0x1ce9864f683b674aL,
+        0x18d6da54f6f74a20L } },
     /* 68 */
-    { { 0xed93e225d5be5a2bl,0x6fe799835934f3c6l,0x4314092622626ffcl,
-        0x50bbb4d97990216al },
-      { 0x378191c6e57ec63el,0x65422c40181dcdb2l,0x41a8099b0236e0f6l,
-        0x2b10011801fe49c3l },
-      0 },
+    { { 0xed93e225d5be5a2bL,0x6fe799835934f3c6L,0x4314092622626ffcL,
+        0x50bbb4d97990216aL },
+      { 0x378191c6e57ec63eL,0x65422c40181dcdb2L,0x41a8099b0236e0f6L,
+        0x2b10011801fe49c3L } },
     /* 69 */
-    { { 0xfc68b5c59b391593l,0xc385f5a2598270fcl,0x7144f3aad19adcbbl,
-        0xdd55899983fbae0cl },
-      { 0x93b88b8e74b82ff4l,0xd2e03c4071e734c9l,0x9a7a9eaf43c0322al,
-        0xe6e4c551149d6041l },
-      0 },
+    { { 0xfc68b5c59b391593L,0xc385f5a2598270fcL,0x7144f3aad19adcbbL,
+        0xdd55899983fbae0cL },
+      { 0x93b88b8e74b82ff4L,0xd2e03c4071e734c9L,0x9a7a9eaf43c0322aL,
+        0xe6e4c551149d6041L } },
     /* 70 */
-    { { 0x55f655bb1e9af288l,0x647e1a64f7ada931l,0x43697e4bcb2820e5l,
-        0x51e00db107ed56ffl },
-      { 0x43d169b8771c327el,0x29cdb20b4a96c2adl,0xc07d51f53deb4779l,
-        0xe22f424149829177l },
-      0 },
+    { { 0x55f655bb1e9af288L,0x647e1a64f7ada931L,0x43697e4bcb2820e5L,
+        0x51e00db107ed56ffL },
+      { 0x43d169b8771c327eL,0x29cdb20b4a96c2adL,0xc07d51f53deb4779L,
+        0xe22f424149829177L } },
     /* 71 */
-    { { 0xcd45e8f4635f1abbl,0x7edc0cb568538874l,0xc9472c1fb5a8034dl,
-        0xf709373d52dc48c9l },
-      { 0x401966bba8af30d6l,0x95bf5f4af137b69cl,0x3966162a9361c47el,
-        0xbd52d288e7275b11l },
-      0 },
+    { { 0xcd45e8f4635f1abbL,0x7edc0cb568538874L,0xc9472c1fb5a8034dL,
+        0xf709373d52dc48c9L },
+      { 0x401966bba8af30d6L,0x95bf5f4af137b69cL,0x3966162a9361c47eL,
+        0xbd52d288e7275b11L } },
     /* 72 */
-    { { 0xab155c7a9c5fa877l,0x17dad6727d3a3d48l,0x43f43f9e73d189d8l,
-        0xa0d0f8e4c8aa77a6l },
-      { 0x0bbeafd8cc94f92dl,0xd818c8be0c4ddb3al,0x22cc65f8b82eba14l,
-        0xa56c78c7946d6a00l },
-      0 },
+    { { 0xab155c7a9c5fa877L,0x17dad6727d3a3d48L,0x43f43f9e73d189d8L,
+        0xa0d0f8e4c8aa77a6L },
+      { 0x0bbeafd8cc94f92dL,0xd818c8be0c4ddb3aL,0x22cc65f8b82eba14L,
+        0xa56c78c7946d6a00L } },
     /* 73 */
-    { { 0x2962391b0dd09529l,0x803e0ea63daddfcfl,0x2c77351f5b5bf481l,
-        0xd8befdf8731a367al },
-      { 0xab919d42fc0157f4l,0xf51caed7fec8e650l,0xcdf9cb4002d48b0al,
-        0x854a68a5ce9f6478l },
-      0 },
+    { { 0x2962391b0dd09529L,0x803e0ea63daddfcfL,0x2c77351f5b5bf481L,
+        0xd8befdf8731a367aL },
+      { 0xab919d42fc0157f4L,0xf51caed7fec8e650L,0xcdf9cb4002d48b0aL,
+        0x854a68a5ce9f6478L } },
     /* 74 */
-    { { 0xdc35f67b63506ea5l,0x9286c489a4fe0d66l,0x3f101d3bfe95cd4dl,
-        0x5cacea0b98846a95l },
-      { 0xa90df60c9ceac44dl,0x3db29af4354d1c3al,0x08dd3de8ad5dbabel,
-        0xe4982d1235e4efa9l },
-      0 },
+    { { 0xdc35f67b63506ea5L,0x9286c489a4fe0d66L,0x3f101d3bfe95cd4dL,
+        0x5cacea0b98846a95L },
+      { 0xa90df60c9ceac44dL,0x3db29af4354d1c3aL,0x08dd3de8ad5dbabeL,
+        0xe4982d1235e4efa9L } },
     /* 75 */
-    { { 0x23104a22c34cd55el,0x58695bb32680d132l,0xfb345afa1fa1d943l,
-        0x8046b7f616b20499l },
-      { 0xb533581e38e7d098l,0xd7f61e8df46f0b70l,0x30dea9ea44cb78c4l,
-        0xeb17ca7b9082af55l },
-      0 },
+    { { 0x23104a22c34cd55eL,0x58695bb32680d132L,0xfb345afa1fa1d943L,
+        0x8046b7f616b20499L },
+      { 0xb533581e38e7d098L,0xd7f61e8df46f0b70L,0x30dea9ea44cb78c4L,
+        0xeb17ca7b9082af55L } },
     /* 76 */
-    { { 0x1751b59876a145b9l,0xa5cf6b0fc1bc71ecl,0xd3e03565392715bbl,
-        0x097b00bafab5e131l },
-      { 0xaa66c8e9565f69e1l,0x77e8f75ab5be5199l,0x6033ba11da4fd984l,
-        0xf95c747bafdbcc9el },
-      0 },
+    { { 0x1751b59876a145b9L,0xa5cf6b0fc1bc71ecL,0xd3e03565392715bbL,
+        0x097b00bafab5e131L },
+      { 0xaa66c8e9565f69e1L,0x77e8f75ab5be5199L,0x6033ba11da4fd984L,
+        0xf95c747bafdbcc9eL } },
     /* 77 */
-    { { 0x558f01d3bebae45el,0xa8ebe9f0c4bc6955l,0xaeb705b1dbc64fc6l,
-        0x3512601e566ed837l },
-      { 0x9336f1e1fa1161cdl,0x328ab8d54c65ef87l,0x4757eee2724f21e5l,
-        0x0ef971236068ab6bl },
-      0 },
+    { { 0x558f01d3bebae45eL,0xa8ebe9f0c4bc6955L,0xaeb705b1dbc64fc6L,
+        0x3512601e566ed837L },
+      { 0x9336f1e1fa1161cdL,0x328ab8d54c65ef87L,0x4757eee2724f21e5L,
+        0x0ef971236068ab6bL } },
     /* 78 */
-    { { 0x02598cf754ca4226l,0x5eede138f8642c8el,0x48963f74468e1790l,
-        0xfc16d9333b4fbc95l },
-      { 0xbe96fb31e7c800cal,0x138063312678adaal,0x3d6244976ff3e8b5l,
-        0x14ca4af1b95d7a17l },
-      0 },
+    { { 0x02598cf754ca4226L,0x5eede138f8642c8eL,0x48963f74468e1790L,
+        0xfc16d9333b4fbc95L },
+      { 0xbe96fb31e7c800caL,0x138063312678adaaL,0x3d6244976ff3e8b5L,
+        0x14ca4af1b95d7a17L } },
     /* 79 */
-    { { 0x7a4771babd2f81d5l,0x1a5f9d6901f7d196l,0xd898bef7cad9c907l,
-        0x4057b063f59c231dl },
-      { 0xbffd82fe89c05c0al,0xe4911c6f1dc0df85l,0x3befccaea35a16dbl,
-        0x1c3b5d64f1330b13l },
-      0 },
+    { { 0x7a4771babd2f81d5L,0x1a5f9d6901f7d196L,0xd898bef7cad9c907L,
+        0x4057b063f59c231dL },
+      { 0xbffd82fe89c05c0aL,0xe4911c6f1dc0df85L,0x3befccaea35a16dbL,
+        0x1c3b5d64f1330b13L } },
     /* 80 */
-    { { 0x5fe14bfe80ec21fel,0xf6ce116ac255be82l,0x98bc5a072f4a5d67l,
-        0xfad27148db7e63afl },
-      { 0x90c0b6ac29ab05b3l,0x37a9a83c4e251ae6l,0x0a7dc875c2aade7dl,
-        0x77387de39f0e1a84l },
-      0 },
+    { { 0x5fe14bfe80ec21feL,0xf6ce116ac255be82L,0x98bc5a072f4a5d67L,
+        0xfad27148db7e63afL },
+      { 0x90c0b6ac29ab05b3L,0x37a9a83c4e251ae6L,0x0a7dc875c2aade7dL,
+        0x77387de39f0e1a84L } },
     /* 81 */
-    { { 0x1e9ecc49a56c0dd7l,0xa5cffcd846086c74l,0x8f7a1408f505aecel,
-        0xb37b85c0bef0c47el },
-      { 0x3596b6e4cc0e6a8fl,0xfd6d4bbf6b388f23l,0xaba453fac39cef4el,
-        0x9c135ac8f9f628d5l },
-      0 },
+    { { 0x1e9ecc49a56c0dd7L,0xa5cffcd846086c74L,0x8f7a1408f505aeceL,
+        0xb37b85c0bef0c47eL },
+      { 0x3596b6e4cc0e6a8fL,0xfd6d4bbf6b388f23L,0xaba453fac39cef4eL,
+        0x9c135ac8f9f628d5L } },
     /* 82 */
-    { { 0x32aa320284e35743l,0x320d6ab185a3cdefl,0xb821b1761df19819l,
-        0x5721361fc433851fl },
-      { 0x1f0db36a71fc9168l,0x5f98ba735e5c403cl,0xf64ca87e37bcd8f5l,
-        0xdcbac3c9e6bb11bdl },
-      0 },
+    { { 0x32aa320284e35743L,0x320d6ab185a3cdefL,0xb821b1761df19819L,
+        0x5721361fc433851fL },
+      { 0x1f0db36a71fc9168L,0x5f98ba735e5c403cL,0xf64ca87e37bcd8f5L,
+        0xdcbac3c9e6bb11bdL } },
     /* 83 */
-    { { 0xf01d99684518cbe2l,0xd242fc189c9eb04el,0x727663c7e47feebfl,
-        0xb8c1c89e2d626862l },
-      { 0x51a58bddc8e1d569l,0x563809c8b7d88cd0l,0x26c27fd9f11f31ebl,
-        0x5d23bbda2f9422d4l },
-      0 },
+    { { 0xf01d99684518cbe2L,0xd242fc189c9eb04eL,0x727663c7e47feebfL,
+        0xb8c1c89e2d626862L },
+      { 0x51a58bddc8e1d569L,0x563809c8b7d88cd0L,0x26c27fd9f11f31ebL,
+        0x5d23bbda2f9422d4L } },
     /* 84 */
-    { { 0x0a1c729495c8f8bel,0x2961c4803bf362bfl,0x9e418403df63d4acl,
-        0xc109f9cb91ece900l },
-      { 0xc2d095d058945705l,0xb9083d96ddeb85c0l,0x84692b8d7a40449bl,
-        0x9bc3344f2eee1ee1l },
-      0 },
+    { { 0x0a1c729495c8f8beL,0x2961c4803bf362bfL,0x9e418403df63d4acL,
+        0xc109f9cb91ece900L },
+      { 0xc2d095d058945705L,0xb9083d96ddeb85c0L,0x84692b8d7a40449bL,
+        0x9bc3344f2eee1ee1L } },
     /* 85 */
-    { { 0x0d5ae35642913074l,0x55491b2748a542b1l,0x469ca665b310732al,
-        0x29591d525f1a4cc1l },
-      { 0xe76f5b6bb84f983fl,0xbe7eef419f5f84e1l,0x1200d49680baa189l,
-        0x6376551f18ef332cl },
-      0 },
+    { { 0x0d5ae35642913074L,0x55491b2748a542b1L,0x469ca665b310732aL,
+        0x29591d525f1a4cc1L },
+      { 0xe76f5b6bb84f983fL,0xbe7eef419f5f84e1L,0x1200d49680baa189L,
+        0x6376551f18ef332cL } },
     /* 86 */
-    { { 0xbda5f14e562976ccl,0x22bca3e60ef12c38l,0xbbfa30646cca9852l,
-        0xbdb79dc808e2987al },
-      { 0xfd2cb5c9cb06a772l,0x38f475aafe536dcel,0xc2a3e0227c2b5db8l,
-        0x8ee86001add3c14al },
-      0 },
+    { { 0xbda5f14e562976ccL,0x22bca3e60ef12c38L,0xbbfa30646cca9852L,
+        0xbdb79dc808e2987aL },
+      { 0xfd2cb5c9cb06a772L,0x38f475aafe536dceL,0xc2a3e0227c2b5db8L,
+        0x8ee86001add3c14aL } },
     /* 87 */
-    { { 0xcbe96981a4ade873l,0x7ee9aa4dc4fba48cl,0x2cee28995a054ba5l,
-        0x92e51d7a6f77aa4bl },
-      { 0x948bafa87190a34dl,0xd698f75bf6bd1ed1l,0xd00ee6e30caf1144l,
-        0x5182f86f0a56aaaal },
-      0 },
+    { { 0xcbe96981a4ade873L,0x7ee9aa4dc4fba48cL,0x2cee28995a054ba5L,
+        0x92e51d7a6f77aa4bL },
+      { 0x948bafa87190a34dL,0xd698f75bf6bd1ed1L,0xd00ee6e30caf1144L,
+        0x5182f86f0a56aaaaL } },
     /* 88 */
-    { { 0xfba6212c7a4cc99cl,0xff609b683e6d9ca1l,0x5dbb27cb5ac98c5al,
-        0x91dcab5d4073a6f2l },
-      { 0x01b6cc3d5f575a70l,0x0cb361396f8d87fal,0x165d4e8c89981736l,
-        0x17a0cedb97974f2bl },
-      0 },
+    { { 0xfba6212c7a4cc99cL,0xff609b683e6d9ca1L,0x5dbb27cb5ac98c5aL,
+        0x91dcab5d4073a6f2L },
+      { 0x01b6cc3d5f575a70L,0x0cb361396f8d87faL,0x165d4e8c89981736L,
+        0x17a0cedb97974f2bL } },
     /* 89 */
-    { { 0x38861e2a076c8d3al,0x701aad39210f924bl,0x94d0eae413a835d9l,
-        0x2e8ce36c7f4cdf41l },
-      { 0x91273dab037a862bl,0x01ba9bb760e4c8fal,0xf964538833baf2ddl,
-        0xf4ccc6cb34f668f3l },
-      0 },
+    { { 0x38861e2a076c8d3aL,0x701aad39210f924bL,0x94d0eae413a835d9L,
+        0x2e8ce36c7f4cdf41L },
+      { 0x91273dab037a862bL,0x01ba9bb760e4c8faL,0xf964538833baf2ddL,
+        0xf4ccc6cb34f668f3L } },
     /* 90 */
-    { { 0x44ef525cf1f79687l,0x7c59549592efa815l,0xe1231741a5c78d29l,
-        0xac0db4889a0df3c9l },
-      { 0x86bfc711df01747fl,0x592b9358ef17df13l,0xe5880e4f5ccb6bb5l,
-        0x95a64a6194c974a2l },
-      0 },
+    { { 0x44ef525cf1f79687L,0x7c59549592efa815L,0xe1231741a5c78d29L,
+        0xac0db4889a0df3c9L },
+      { 0x86bfc711df01747fL,0x592b9358ef17df13L,0xe5880e4f5ccb6bb5L,
+        0x95a64a6194c974a2L } },
     /* 91 */
-    { { 0x72c1efdac15a4c93l,0x40269b7382585141l,0x6a8dfb1c16cb0badl,
-        0x231e54ba29210677l },
-      { 0xa70df9178ae6d2dcl,0x4d6aa63f39112918l,0xf627726b5e5b7223l,
-        0xab0be032d8a731e1l },
-      0 },
+    { { 0x72c1efdac15a4c93L,0x40269b7382585141L,0x6a8dfb1c16cb0badL,
+        0x231e54ba29210677L },
+      { 0xa70df9178ae6d2dcL,0x4d6aa63f39112918L,0xf627726b5e5b7223L,
+        0xab0be032d8a731e1L } },
     /* 92 */
-    { { 0x097ad0e98d131f2dl,0x637f09e33b04f101l,0x1ac86196d5e9a748l,
-        0xf1bcc8802cf6a679l },
-      { 0x25c69140e8daacb4l,0x3c4e405560f65009l,0x591cc8fc477937a6l,
-        0x851694695aebb271l },
-      0 },
+    { { 0x097ad0e98d131f2dL,0x637f09e33b04f101L,0x1ac86196d5e9a748L,
+        0xf1bcc8802cf6a679L },
+      { 0x25c69140e8daacb4L,0x3c4e405560f65009L,0x591cc8fc477937a6L,
+        0x851694695aebb271L } },
     /* 93 */
-    { { 0xde35c143f1dcf593l,0x78202b29b018be3bl,0xe9cdadc29bdd9d3dl,
-        0x8f67d9d2daad55d8l },
-      { 0x841116567481ea5fl,0xe7d2dde9e34c590cl,0xffdd43f405053fa8l,
-        0xf84572b9c0728b5dl },
-      0 },
+    { { 0xde35c143f1dcf593L,0x78202b29b018be3bL,0xe9cdadc29bdd9d3dL,
+        0x8f67d9d2daad55d8L },
+      { 0x841116567481ea5fL,0xe7d2dde9e34c590cL,0xffdd43f405053fa8L,
+        0xf84572b9c0728b5dL } },
     /* 94 */
-    { { 0x5e1a7a7197af71c9l,0xa14494447a736565l,0xa1b4ae070e1d5063l,
-        0xedee2710616b2c19l },
-      { 0xb2f034f511734121l,0x1cac6e554a25e9f0l,0x8dc148f3a40c2ecfl,
-        0x9fd27e9b44ebd7f4l },
-      0 },
+    { { 0x5e1a7a7197af71c9L,0xa14494447a736565L,0xa1b4ae070e1d5063L,
+        0xedee2710616b2c19L },
+      { 0xb2f034f511734121L,0x1cac6e554a25e9f0L,0x8dc148f3a40c2ecfL,
+        0x9fd27e9b44ebd7f4L } },
     /* 95 */
-    { { 0x3cc7658af6e2cb16l,0xe3eb7d2cfe5919b6l,0x5a8c5816168d5583l,
-        0xa40c2fb6958ff387l },
-      { 0x8c9ec560fedcc158l,0x7ad804c655f23056l,0xd93967049a307e12l,
-        0x99bc9bb87dc6decfl },
-      0 },
+    { { 0x3cc7658af6e2cb16L,0xe3eb7d2cfe5919b6L,0x5a8c5816168d5583L,
+        0xa40c2fb6958ff387L },
+      { 0x8c9ec560fedcc158L,0x7ad804c655f23056L,0xd93967049a307e12L,
+        0x99bc9bb87dc6decfL } },
     /* 96 */
-    { { 0x84a9521d927dafc6l,0x52c1fb695c09cd19l,0x9d9581a0f9366ddel,
-        0x9abe210ba16d7e64l },
-      { 0x480af84a48915220l,0xfa73176a4dd816c6l,0xc7d539871681ca5al,
-        0x7881c25787f344b0l },
-      0 },
+    { { 0x84a9521d927dafc6L,0x52c1fb695c09cd19L,0x9d9581a0f9366ddeL,
+        0x9abe210ba16d7e64L },
+      { 0x480af84a48915220L,0xfa73176a4dd816c6L,0xc7d539871681ca5aL,
+        0x7881c25787f344b0L } },
     /* 97 */
-    { { 0x93399b51e0bcf3ffl,0x0d02cbc5127f74f6l,0x8fb465a2dd01d968l,
-        0x15e6e319a30e8940l },
-      { 0x646d6e0d3e0e05f4l,0xfad7bddc43588404l,0xbe61c7d1c4f850d3l,
-        0x0e55facf191172cel },
-      0 },
+    { { 0x93399b51e0bcf3ffL,0x0d02cbc5127f74f6L,0x8fb465a2dd01d968L,
+        0x15e6e319a30e8940L },
+      { 0x646d6e0d3e0e05f4L,0xfad7bddc43588404L,0xbe61c7d1c4f850d3L,
+        0x0e55facf191172ceL } },
     /* 98 */
-    { { 0x7e9d9806f8787564l,0x1a33172131e85ce6l,0x6b0158cab819e8d6l,
-        0xd73d09766fe96577l },
-      { 0x424834251eb7206el,0xa519290fc618bb42l,0x5dcbb8595e30a520l,
-        0x9250a3748f15a50bl },
-      0 },
+    { { 0x7e9d9806f8787564L,0x1a33172131e85ce6L,0x6b0158cab819e8d6L,
+        0xd73d09766fe96577L },
+      { 0x424834251eb7206eL,0xa519290fc618bb42L,0x5dcbb8595e30a520L,
+        0x9250a3748f15a50bL } },
     /* 99 */
-    { { 0xcaff08f8be577410l,0xfd408a035077a8c6l,0xf1f63289ec0a63a4l,
-        0x77414082c1cc8c0bl },
-      { 0x05a40fa6eb0991cdl,0xc1ca086649fdc296l,0x3a68a3c7b324fd40l,
-        0x8cb04f4d12eb20b9l },
-      0 },
+    { { 0xcaff08f8be577410L,0xfd408a035077a8c6L,0xf1f63289ec0a63a4L,
+        0x77414082c1cc8c0bL },
+      { 0x05a40fa6eb0991cdL,0xc1ca086649fdc296L,0x3a68a3c7b324fd40L,
+        0x8cb04f4d12eb20b9L } },
     /* 100 */
-    { { 0xb1c2d0556906171cl,0x9073e9cdb0240c3fl,0xdb8e6b4fd8906841l,
-        0xe4e429ef47123b51l },
-      { 0x0b8dd53c38ec36f4l,0xf9d2dc01ff4b6a27l,0x5d066e07879a9a48l,
-        0x37bca2ff3c6e6552l },
-      0 },
+    { { 0xb1c2d0556906171cL,0x9073e9cdb0240c3fL,0xdb8e6b4fd8906841L,
+        0xe4e429ef47123b51L },
+      { 0x0b8dd53c38ec36f4L,0xf9d2dc01ff4b6a27L,0x5d066e07879a9a48L,
+        0x37bca2ff3c6e6552L } },
     /* 101 */
-    { { 0x4cd2e3c7df562470l,0x44f272a2c0964ac9l,0x7c6d5df980c793bel,
-        0x59913edc3002b22al },
-      { 0x7a139a835750592al,0x99e01d80e783de02l,0xcf8c0375ea05d64fl,
-        0x43786e4ab013e226l },
-      0 },
+    { { 0x4cd2e3c7df562470L,0x44f272a2c0964ac9L,0x7c6d5df980c793beL,
+        0x59913edc3002b22aL },
+      { 0x7a139a835750592aL,0x99e01d80e783de02L,0xcf8c0375ea05d64fL,
+        0x43786e4ab013e226L } },
     /* 102 */
-    { { 0xff32b0ed9e56b5a6l,0x0750d9a6d9fc68f9l,0xec15e845597846a7l,
-        0x8638ca98b7e79e7al },
-      { 0x2f5ae0960afc24b2l,0x05398eaf4dace8f2l,0x3b765dd0aecba78fl,
-        0x1ecdd36a7b3aa6f0l },
-      0 },
+    { { 0xff32b0ed9e56b5a6L,0x0750d9a6d9fc68f9L,0xec15e845597846a7L,
+        0x8638ca98b7e79e7aL },
+      { 0x2f5ae0960afc24b2L,0x05398eaf4dace8f2L,0x3b765dd0aecba78fL,
+        0x1ecdd36a7b3aa6f0L } },
     /* 103 */
-    { { 0x5d3acd626c5ff2f3l,0xa2d516c02873a978l,0xad94c9fad2110d54l,
-        0xd85d0f85d459f32dl },
-      { 0x9f700b8d10b11da3l,0xd2c22c30a78318c4l,0x556988f49208decdl,
-        0xa04f19c3b4ed3c62l },
-      0 },
+    { { 0x5d3acd626c5ff2f3L,0xa2d516c02873a978L,0xad94c9fad2110d54L,
+        0xd85d0f85d459f32dL },
+      { 0x9f700b8d10b11da3L,0xd2c22c30a78318c4L,0x556988f49208decdL,
+        0xa04f19c3b4ed3c62L } },
     /* 104 */
-    { { 0x087924c8ed7f93bdl,0xcb64ac5d392f51f6l,0x7cae330a821b71afl,
-        0x92b2eeea5c0950b0l },
-      { 0x85ac4c9485b6e235l,0xab2ca4a92936c0f0l,0x80faa6b3e0508891l,
-        0x1ee782215834276cl },
-      0 },
+    { { 0x087924c8ed7f93bdL,0xcb64ac5d392f51f6L,0x7cae330a821b71afL,
+        0x92b2eeea5c0950b0L },
+      { 0x85ac4c9485b6e235L,0xab2ca4a92936c0f0L,0x80faa6b3e0508891L,
+        0x1ee782215834276cL } },
     /* 105 */
-    { { 0xa60a2e00e63e79f7l,0xf590e7b2f399d906l,0x9021054a6607c09dl,
-        0xf3f2ced857a6e150l },
-      { 0x200510f3f10d9b55l,0x9d2fcfacd8642648l,0xe5631aa7e8bd0e7cl,
-        0x0f56a4543da3e210l },
-      0 },
+    { { 0xa60a2e00e63e79f7L,0xf590e7b2f399d906L,0x9021054a6607c09dL,
+        0xf3f2ced857a6e150L },
+      { 0x200510f3f10d9b55L,0x9d2fcfacd8642648L,0xe5631aa7e8bd0e7cL,
+        0x0f56a4543da3e210L } },
     /* 106 */
-    { { 0x5b21bffa1043e0dfl,0x6c74b6cc9c007e6dl,0x1a656ec0d4a8517al,
-        0xbd8f17411969e263l },
-      { 0x8a9bbb86beb7494al,0x1567d46f45f3b838l,0xdf7a12a7a4e5a79al,
-        0x2d1a1c3530ccfa09l },
-      0 },
+    { { 0x5b21bffa1043e0dfL,0x6c74b6cc9c007e6dL,0x1a656ec0d4a8517aL,
+        0xbd8f17411969e263L },
+      { 0x8a9bbb86beb7494aL,0x1567d46f45f3b838L,0xdf7a12a7a4e5a79aL,
+        0x2d1a1c3530ccfa09L } },
     /* 107 */
-    { { 0x192e3813506508dal,0x336180c4a1d795a7l,0xcddb59497a9944b3l,
-        0xa107a65eb91fba46l },
-      { 0xe6d1d1c50f94d639l,0x8b4af3758a58b7d7l,0x1a7c5584bd37ca1cl,
-        0x183d760af87a9af2l },
-      0 },
+    { { 0x192e3813506508daL,0x336180c4a1d795a7L,0xcddb59497a9944b3L,
+        0xa107a65eb91fba46L },
+      { 0xe6d1d1c50f94d639L,0x8b4af3758a58b7d7L,0x1a7c5584bd37ca1cL,
+        0x183d760af87a9af2L } },
     /* 108 */
-    { { 0x29d697110dde59a4l,0xf1ad8d070e8bef87l,0x229b49634f2ebe78l,
-        0x1d44179dc269d754l },
-      { 0xb32dc0cf8390d30el,0x0a3b27530de8110cl,0x31af1dc52bc0339al,
-        0x771f9cc29606d262l },
-      0 },
+    { { 0x29d697110dde59a4L,0xf1ad8d070e8bef87L,0x229b49634f2ebe78L,
+        0x1d44179dc269d754L },
+      { 0xb32dc0cf8390d30eL,0x0a3b27530de8110cL,0x31af1dc52bc0339aL,
+        0x771f9cc29606d262L } },
     /* 109 */
-    { { 0x99993e7785040739l,0x44539db98026a939l,0xcf40f6f2f5f8fc26l,
-        0x64427a310362718el },
-      { 0x4f4f2d8785428aa8l,0x7b7adc3febfb49a8l,0x201b2c6df23d01acl,
-        0x49d9b7496ae90d6dl },
-      0 },
+    { { 0x99993e7785040739L,0x44539db98026a939L,0xcf40f6f2f5f8fc26L,
+        0x64427a310362718eL },
+      { 0x4f4f2d8785428aa8L,0x7b7adc3febfb49a8L,0x201b2c6df23d01acL,
+        0x49d9b7496ae90d6dL } },
     /* 110 */
-    { { 0xcc78d8bc435d1099l,0x2adbcd4e8e8d1a08l,0x02c2e2a02cb68a41l,
-        0x9037d81b3f605445l },
-      { 0x7cdbac27074c7b61l,0xfe2031ab57bfd72el,0x61ccec96596d5352l,
-        0x08c3de6a7cc0639cl },
-      0 },
+    { { 0xcc78d8bc435d1099L,0x2adbcd4e8e8d1a08L,0x02c2e2a02cb68a41L,
+        0x9037d81b3f605445L },
+      { 0x7cdbac27074c7b61L,0xfe2031ab57bfd72eL,0x61ccec96596d5352L,
+        0x08c3de6a7cc0639cL } },
     /* 111 */
-    { { 0x20fdd020f6d552abl,0x56baff9805cd81f1l,0x06fb7c3e91351291l,
-        0xc690944245796b2fl },
-      { 0x17b3ae9c41231bd1l,0x1eac6e875cc58205l,0x208837abf9d6a122l,
-        0x3fa3db02cafe3ac0l },
-      0 },
+    { { 0x20fdd020f6d552abL,0x56baff9805cd81f1L,0x06fb7c3e91351291L,
+        0xc690944245796b2fL },
+      { 0x17b3ae9c41231bd1L,0x1eac6e875cc58205L,0x208837abf9d6a122L,
+        0x3fa3db02cafe3ac0L } },
     /* 112 */
-    { { 0xd75a3e6505058880l,0x7da365ef643943f2l,0x4147861cfab24925l,
-        0xc5c4bdb0fdb808ffl },
-      { 0x73513e34b272b56bl,0xc8327e9511b9043al,0xfd8ce37df8844969l,
-        0x2d56db9446c2b6b5l },
-      0 },
+    { { 0xd75a3e6505058880L,0x7da365ef643943f2L,0x4147861cfab24925L,
+        0xc5c4bdb0fdb808ffL },
+      { 0x73513e34b272b56bL,0xc8327e9511b9043aL,0xfd8ce37df8844969L,
+        0x2d56db9446c2b6b5L } },
     /* 113 */
-    { { 0x2461782fff46ac6bl,0xd19f792607a2e425l,0xfafea3c409a48de1l,
-        0x0f56bd9de503ba42l },
-      { 0x137d4ed1345cda49l,0x821158fc816f299dl,0xe7c6a54aaeb43402l,
-        0x4003bb9d1173b5f1l },
-      0 },
+    { { 0x2461782fff46ac6bL,0xd19f792607a2e425L,0xfafea3c409a48de1L,
+        0x0f56bd9de503ba42L },
+      { 0x137d4ed1345cda49L,0x821158fc816f299dL,0xe7c6a54aaeb43402L,
+        0x4003bb9d1173b5f1L } },
     /* 114 */
-    { { 0x3b8e8189a0803387l,0xece115f539cbd404l,0x4297208dd2877f21l,
-        0x53765522a07f2f9el },
-      { 0xa4980a21a8a4182dl,0xa2bbd07a3219df79l,0x674d0a2e1a19a2d4l,
-        0x7a056f586c5d4549l },
-      0 },
+    { { 0x3b8e8189a0803387L,0xece115f539cbd404L,0x4297208dd2877f21L,
+        0x53765522a07f2f9eL },
+      { 0xa4980a21a8a4182dL,0xa2bbd07a3219df79L,0x674d0a2e1a19a2d4L,
+        0x7a056f586c5d4549L } },
     /* 115 */
-    { { 0x646b25589d8a2a47l,0x5b582948c3df2773l,0x51ec000eabf0d539l,
-        0x77d482f17a1a2675l },
-      { 0xb8a1bd9587853948l,0xa6f817bd6cfbffeel,0xab6ec05780681e47l,
-        0x4115012b2b38b0e4l },
-      0 },
+    { { 0x646b25589d8a2a47L,0x5b582948c3df2773L,0x51ec000eabf0d539L,
+        0x77d482f17a1a2675L },
+      { 0xb8a1bd9587853948L,0xa6f817bd6cfbffeeL,0xab6ec05780681e47L,
+        0x4115012b2b38b0e4L } },
     /* 116 */
-    { { 0x3c73f0f46de28cedl,0x1d5da7609b13ec47l,0x61b8ce9e6e5c6392l,
-        0xcdf04572fbea0946l },
-      { 0x1cb3c58b6c53c3b0l,0x97fe3c10447b843cl,0xfb2b8ae12cb9780el,
-        0xee703dda97383109l },
-      0 },
+    { { 0x3c73f0f46de28cedL,0x1d5da7609b13ec47L,0x61b8ce9e6e5c6392L,
+        0xcdf04572fbea0946L },
+      { 0x1cb3c58b6c53c3b0L,0x97fe3c10447b843cL,0xfb2b8ae12cb9780eL,
+        0xee703dda97383109L } },
     /* 117 */
-    { { 0x34515140ff57e43al,0xd44660d3b1b811b8l,0x2b3b5dff8f42b986l,
-        0x2a0ad89da162ce21l },
-      { 0x64e4a6946bc277bal,0xc788c954c141c276l,0x141aa64ccabf6274l,
-        0xd62d0b67ac2b4659l },
-      0 },
+    { { 0x34515140ff57e43aL,0xd44660d3b1b811b8L,0x2b3b5dff8f42b986L,
+        0x2a0ad89da162ce21L },
+      { 0x64e4a6946bc277baL,0xc788c954c141c276L,0x141aa64ccabf6274L,
+        0xd62d0b67ac2b4659L } },
     /* 118 */
-    { { 0x39c5d87b2c054ac4l,0x57005859f27df788l,0xedf7cbf3b18128d6l,
-        0xb39a23f2991c2426l },
-      { 0x95284a15f0b16ae5l,0x0c6a05b1a136f51bl,0x1d63c137f2700783l,
-        0x04ed0092c0674cc5l },
-      0 },
+    { { 0x39c5d87b2c054ac4L,0x57005859f27df788L,0xedf7cbf3b18128d6L,
+        0xb39a23f2991c2426L },
+      { 0x95284a15f0b16ae5L,0x0c6a05b1a136f51bL,0x1d63c137f2700783L,
+        0x04ed0092c0674cc5L } },
     /* 119 */
-    { { 0x1f4185d19ae90393l,0x3047b4294a3d64e6l,0xae0001a69854fc14l,
-        0xa0a91fc10177c387l },
-      { 0xff0a3f01ae2c831el,0xbb76ae822b727e16l,0x8f12c8a15a3075b4l,
-        0x084cf9889ed20c41l },
-      0 },
+    { { 0x1f4185d19ae90393L,0x3047b4294a3d64e6L,0xae0001a69854fc14L,
+        0xa0a91fc10177c387L },
+      { 0xff0a3f01ae2c831eL,0xbb76ae822b727e16L,0x8f12c8a15a3075b4L,
+        0x084cf9889ed20c41L } },
     /* 120 */
-    { { 0xd98509defca6becfl,0x2fceae807dffb328l,0x5d8a15c44778e8b9l,
-        0xd57955b273abf77el },
-      { 0x210da79e31b5d4f1l,0xaa52f04b3cfa7a1cl,0xd4d12089dc27c20bl,
-        0x8e14ea4202d141f1l },
-      0 },
+    { { 0xd98509defca6becfL,0x2fceae807dffb328L,0x5d8a15c44778e8b9L,
+        0xd57955b273abf77eL },
+      { 0x210da79e31b5d4f1L,0xaa52f04b3cfa7a1cL,0xd4d12089dc27c20bL,
+        0x8e14ea4202d141f1L } },
     /* 121 */
-    { { 0xeed50345f2897042l,0x8d05331f43402c4al,0xc8d9c194c8bdfb21l,
-        0x597e1a372aa4d158l },
-      { 0x0327ec1acf0bd68cl,0x6d4be0dcab024945l,0x5b9c8d7ac9fe3e84l,
-        0xca3f0236199b4deal },
-      0 },
+    { { 0xeed50345f2897042L,0x8d05331f43402c4aL,0xc8d9c194c8bdfb21L,
+        0x597e1a372aa4d158L },
+      { 0x0327ec1acf0bd68cL,0x6d4be0dcab024945L,0x5b9c8d7ac9fe3e84L,
+        0xca3f0236199b4deaL } },
     /* 122 */
-    { { 0x592a10b56170bd20l,0x0ea897f16d3f5de7l,0xa3363ff144b2ade2l,
-        0xbde7fd7e309c07e4l },
-      { 0x516bb6d2b8f5432cl,0x210dc1cbe043444bl,0x3db01e6ff8f95b5al,
-        0xb623ad0e0a7dd198l },
-      0 },
+    { { 0x592a10b56170bd20L,0x0ea897f16d3f5de7L,0xa3363ff144b2ade2L,
+        0xbde7fd7e309c07e4L },
+      { 0x516bb6d2b8f5432cL,0x210dc1cbe043444bL,0x3db01e6ff8f95b5aL,
+        0xb623ad0e0a7dd198L } },
     /* 123 */
-    { { 0xa75bd67560c7b65bl,0xab8c559023a4a289l,0xf8220fd0d7b26795l,
-        0xd6aa2e4658ec137bl },
-      { 0x10abc00b5138bb85l,0x8c31d121d833a95cl,0xb24ff00b1702a32el,
-        0x111662e02dcc513al },
-      0 },
+    { { 0xa75bd67560c7b65bL,0xab8c559023a4a289L,0xf8220fd0d7b26795L,
+        0xd6aa2e4658ec137bL },
+      { 0x10abc00b5138bb85L,0x8c31d121d833a95cL,0xb24ff00b1702a32eL,
+        0x111662e02dcc513aL } },
     /* 124 */
-    { { 0x78114015efb42b87l,0xbd9f5d701b6c4dffl,0x66ecccd7a7d7c129l,
-        0xdb3ee1cb94b750f8l },
-      { 0xb26f3db0f34837cfl,0xe7eed18bb9578d4fl,0x5d2cdf937c56657dl,
-        0x886a644252206a59l },
-      0 },
+    { { 0x78114015efb42b87L,0xbd9f5d701b6c4dffL,0x66ecccd7a7d7c129L,
+        0xdb3ee1cb94b750f8L },
+      { 0xb26f3db0f34837cfL,0xe7eed18bb9578d4fL,0x5d2cdf937c56657dL,
+        0x886a644252206a59L } },
     /* 125 */
-    { { 0x3c234cfb65b569eal,0x20011141f72119c1l,0x8badc85da15a619el,
-        0xa70cf4eb018a17bcl },
-      { 0x224f97ae8c4a6a65l,0x36e5cf270134378fl,0xbe3a609e4f7e0960l,
-        0xaa4772abd1747b77l },
-      0 },
+    { { 0x3c234cfb65b569eaL,0x20011141f72119c1L,0x8badc85da15a619eL,
+        0xa70cf4eb018a17bcL },
+      { 0x224f97ae8c4a6a65L,0x36e5cf270134378fL,0xbe3a609e4f7e0960L,
+        0xaa4772abd1747b77L } },
     /* 126 */
-    { { 0x676761317aa60cc0l,0xc79163610368115fl,0xded98bb4bbc1bb5al,
-        0x611a6ddc30faf974l },
-      { 0x30e78cbcc15ee47al,0x2e8962824e0d96a5l,0x36f35adf3dd9ed88l,
-        0x5cfffaf816429c88l },
-      0 },
+    { { 0x676761317aa60cc0L,0xc79163610368115fL,0xded98bb4bbc1bb5aL,
+        0x611a6ddc30faf974L },
+      { 0x30e78cbcc15ee47aL,0x2e8962824e0d96a5L,0x36f35adf3dd9ed88L,
+        0x5cfffaf816429c88L } },
     /* 127 */
-    { { 0xc0d54cff9b7a99cdl,0x7bf3b99d843c45a1l,0x038a908f62c739e1l,
-        0x6e5a6b237dc1994cl },
-      { 0xef8b454e0ba5db77l,0xb7b8807facf60d63l,0xe591c0c676608378l,
-        0x481a238d242dabccl },
-      0 },
+    { { 0xc0d54cff9b7a99cdL,0x7bf3b99d843c45a1L,0x038a908f62c739e1L,
+        0x6e5a6b237dc1994cL },
+      { 0xef8b454e0ba5db77L,0xb7b8807facf60d63L,0xe591c0c676608378L,
+        0x481a238d242dabccL } },
     /* 128 */
-    { { 0xe3417bc035d0b34al,0x440b386b8327c0a7l,0x8fb7262dac0362d1l,
-        0x2c41114ce0cdf943l },
-      { 0x2ba5cef1ad95a0b1l,0xc09b37a867d54362l,0x26d6cdd201e486c9l,
-        0x20477abf42ff9297l },
-      0 },
+    { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L,
+        0x2c41114ce0cdf943L },
+      { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L,
+        0x20477abf42ff9297L } },
     /* 129 */
-    { { 0x2f75173c18d65dbfl,0x77bf940e339edad8l,0x7022d26bdcf1001cl,
-        0xac66409ac77396b6l },
-      { 0x8b0bb36fc6261cc3l,0x213f7bc9190e7e90l,0x6541cebaa45e6c10l,
-        0xce8e6975cc122f85l },
-      0 },
+    { { 0x2f75173c18d65dbfL,0x77bf940e339edad8L,0x7022d26bdcf1001cL,
+        0xac66409ac77396b6L },
+      { 0x8b0bb36fc6261cc3L,0x213f7bc9190e7e90L,0x6541cebaa45e6c10L,
+        0xce8e6975cc122f85L } },
     /* 130 */
-    { { 0x0f121b41bc0a67d2l,0x62d4760a444d248al,0x0e044f1d659b4737l,
-        0x08fde365250bb4a8l },
-      { 0xaceec3da848bf287l,0xc2a62182d3369d6el,0x3582dfdc92449482l,
-        0x2f7e2fd2565d6cd7l },
-      0 },
+    { { 0x0f121b41bc0a67d2L,0x62d4760a444d248aL,0x0e044f1d659b4737L,
+        0x08fde365250bb4a8L },
+      { 0xaceec3da848bf287L,0xc2a62182d3369d6eL,0x3582dfdc92449482L,
+        0x2f7e2fd2565d6cd7L } },
     /* 131 */
-    { { 0xae4b92dbc3770fa7l,0x095e8d5c379043f9l,0x54f34e9d17761171l,
-        0xc65be92e907702ael },
-      { 0x2758a303f6fd0a40l,0xe7d822e3bcce784bl,0x7ae4f5854f9767bfl,
-        0x4bff8e47d1193b3al },
-      0 },
+    { { 0xae4b92dbc3770fa7L,0x095e8d5c379043f9L,0x54f34e9d17761171L,
+        0xc65be92e907702aeL },
+      { 0x2758a303f6fd0a40L,0xe7d822e3bcce784bL,0x7ae4f5854f9767bfL,
+        0x4bff8e47d1193b3aL } },
     /* 132 */
-    { { 0xcd41d21f00ff1480l,0x2ab8fb7d0754db16l,0xac81d2efbbe0f3eal,
-        0x3e4e4ae65772967dl },
-      { 0x7e18f36d3c5303e6l,0x3bd9994b92262397l,0x9ed70e261324c3c0l,
-        0x5388aefd58ec6028l },
-      0 },
+    { { 0xcd41d21f00ff1480L,0x2ab8fb7d0754db16L,0xac81d2efbbe0f3eaL,
+        0x3e4e4ae65772967dL },
+      { 0x7e18f36d3c5303e6L,0x3bd9994b92262397L,0x9ed70e261324c3c0L,
+        0x5388aefd58ec6028L } },
     /* 133 */
-    { { 0xad1317eb5e5d7713l,0x09b985ee75de49dal,0x32f5bc4fc74fb261l,
-        0x5cf908d14f75be0el },
-      { 0x760435108e657b12l,0xbfd421a5b96ed9e6l,0x0e29f51f8970ccc2l,
-        0xa698ba4060f00ce2l },
-      0 },
+    { { 0xad1317eb5e5d7713L,0x09b985ee75de49daL,0x32f5bc4fc74fb261L,
+        0x5cf908d14f75be0eL },
+      { 0x760435108e657b12L,0xbfd421a5b96ed9e6L,0x0e29f51f8970ccc2L,
+        0xa698ba4060f00ce2L } },
     /* 134 */
-    { { 0x73db1686ef748fecl,0xe6e755a27e9d2cf9l,0x630b6544ce265effl,
-        0xb142ef8a7aebad8dl },
-      { 0xad31af9f17d5770al,0x66af3b672cb3412fl,0x6bd60d1bdf3359del,
-        0xd1896a9658515075l },
-      0 },
+    { { 0x73db1686ef748fecL,0xe6e755a27e9d2cf9L,0x630b6544ce265effL,
+        0xb142ef8a7aebad8dL },
+      { 0xad31af9f17d5770aL,0x66af3b672cb3412fL,0x6bd60d1bdf3359deL,
+        0xd1896a9658515075L } },
     /* 135 */
-    { { 0xec5957ab33c41c08l,0x87de94ac5468e2e1l,0x18816b73ac472f6cl,
-        0x267b0e0b7981da39l },
-      { 0x6e554e5d8e62b988l,0xd8ddc755116d21e7l,0x4610faf03d2a6f99l,
-        0xb54e287aa1119393l },
-      0 },
+    { { 0xec5957ab33c41c08L,0x87de94ac5468e2e1L,0x18816b73ac472f6cL,
+        0x267b0e0b7981da39L },
+      { 0x6e554e5d8e62b988L,0xd8ddc755116d21e7L,0x4610faf03d2a6f99L,
+        0xb54e287aa1119393L } },
     /* 136 */
-    { { 0x0a0122b5178a876bl,0x51ff96ff085104b4l,0x050b31ab14f29f76l,
-        0x84abb28b5f87d4e6l },
-      { 0xd5ed439f8270790al,0x2d6cb59d85e3f46bl,0x75f55c1b6c1e2212l,
-        0xe5436f6717655640l },
-      0 },
+    { { 0x0a0122b5178a876bL,0x51ff96ff085104b4L,0x050b31ab14f29f76L,
+        0x84abb28b5f87d4e6L },
+      { 0xd5ed439f8270790aL,0x2d6cb59d85e3f46bL,0x75f55c1b6c1e2212L,
+        0xe5436f6717655640L } },
     /* 137 */
-    { { 0x53f9025e2286e8d5l,0x353c95b4864453bel,0xd832f5bde408e3a0l,
-        0x0404f68b5b9ce99el },
-      { 0xcad33bdea781e8e5l,0x3cdf5018163c2f5bl,0x575769600119caa3l,
-        0x3a4263df0ac1c701l },
-      0 },
+    { { 0x53f9025e2286e8d5L,0x353c95b4864453beL,0xd832f5bde408e3a0L,
+        0x0404f68b5b9ce99eL },
+      { 0xcad33bdea781e8e5L,0x3cdf5018163c2f5bL,0x575769600119caa3L,
+        0x3a4263df0ac1c701L } },
     /* 138 */
-    { { 0xc2965ecc9aeb596dl,0x01ea03e7023c92b4l,0x4704b4b62e013961l,
-        0x0ca8fd3f905ea367l },
-      { 0x92523a42551b2b61l,0x1eb7a89c390fcd06l,0xe7f1d2be0392a63el,
-        0x96dca2644ddb0c33l },
-      0 },
+    { { 0xc2965ecc9aeb596dL,0x01ea03e7023c92b4L,0x4704b4b62e013961L,
+        0x0ca8fd3f905ea367L },
+      { 0x92523a42551b2b61L,0x1eb7a89c390fcd06L,0xe7f1d2be0392a63eL,
+        0x96dca2644ddb0c33L } },
     /* 139 */
-    { { 0x203bb43a387510afl,0x846feaa8a9a36a01l,0xd23a57702f950378l,
-        0x4363e2123aad59dcl },
-      { 0xca43a1c740246a47l,0xb362b8d2e55dd24dl,0xf9b086045d8faf96l,
-        0x840e115cd8bb98c4l },
-      0 },
+    { { 0x203bb43a387510afL,0x846feaa8a9a36a01L,0xd23a57702f950378L,
+        0x4363e2123aad59dcL },
+      { 0xca43a1c740246a47L,0xb362b8d2e55dd24dL,0xf9b086045d8faf96L,
+        0x840e115cd8bb98c4L } },
     /* 140 */
-    { { 0xf12205e21023e8a7l,0xc808a8cdd8dc7a0bl,0xe292a272163a5ddfl,
-        0x5e0d6abd30ded6d4l },
-      { 0x07a721c27cfc0f64l,0x42eec01d0e55ed88l,0x26a7bef91d1f9db2l,
-        0x7dea48f42945a25al },
-      0 },
+    { { 0xf12205e21023e8a7L,0xc808a8cdd8dc7a0bL,0xe292a272163a5ddfL,
+        0x5e0d6abd30ded6d4L },
+      { 0x07a721c27cfc0f64L,0x42eec01d0e55ed88L,0x26a7bef91d1f9db2L,
+        0x7dea48f42945a25aL } },
     /* 141 */
-    { { 0xabdf6f1ce5060a81l,0xe79f9c72f8f95615l,0xcfd36c5406ac268bl,
-        0xabc2a2beebfd16d1l },
-      { 0x8ac66f91d3e2eac7l,0x6f10ba63d2dd0466l,0x6790e3770282d31bl,
-        0x4ea353946c7eefc1l },
-      0 },
+    { { 0xabdf6f1ce5060a81L,0xe79f9c72f8f95615L,0xcfd36c5406ac268bL,
+        0xabc2a2beebfd16d1L },
+      { 0x8ac66f91d3e2eac7L,0x6f10ba63d2dd0466L,0x6790e3770282d31bL,
+        0x4ea353946c7eefc1L } },
     /* 142 */
-    { { 0xed8a2f8d5266309dl,0x0a51c6c081945a3el,0xcecaf45a578c5dc1l,
-        0x3a76e6891c94ffc3l },
-      { 0x9aace8a47d7b0d0fl,0x963ace968f584a5fl,0x51a30c724e697fbel,
-        0x8212a10a465e6464l },
-      0 },
+    { { 0xed8a2f8d5266309dL,0x0a51c6c081945a3eL,0xcecaf45a578c5dc1L,
+        0x3a76e6891c94ffc3L },
+      { 0x9aace8a47d7b0d0fL,0x963ace968f584a5fL,0x51a30c724e697fbeL,
+        0x8212a10a465e6464L } },
     /* 143 */
-    { { 0xef7c61c3cfab8caal,0x18eb8e840e142390l,0xcd1dff677e9733cal,
-        0xaa7cab71599cb164l },
-      { 0x02fc9273bc837bd1l,0xc06407d0c36af5d7l,0x17621292f423da49l,
-        0x40e38073fe0617c3l },
-      0 },
+    { { 0xef7c61c3cfab8caaL,0x18eb8e840e142390L,0xcd1dff677e9733caL,
+        0xaa7cab71599cb164L },
+      { 0x02fc9273bc837bd1L,0xc06407d0c36af5d7L,0x17621292f423da49L,
+        0x40e38073fe0617c3L } },
     /* 144 */
-    { { 0xf4f80824a7bf9b7cl,0x365d23203fbe30d0l,0xbfbe532097cf9ce3l,
-        0xe3604700b3055526l },
-      { 0x4dcb99116cc6c2c7l,0x72683708ba4cbee6l,0xdcded434637ad9ecl,
-        0x6542d677a3dee15fl },
-      0 },
+    { { 0xf4f80824a7bf9b7cL,0x365d23203fbe30d0L,0xbfbe532097cf9ce3L,
+        0xe3604700b3055526L },
+      { 0x4dcb99116cc6c2c7L,0x72683708ba4cbee6L,0xdcded434637ad9ecL,
+        0x6542d677a3dee15fL } },
     /* 145 */
-    { { 0x3f32b6d07b6c377al,0x6cb03847903448bel,0xd6fdd3a820da8af7l,
-        0xa6534aee09bb6f21l },
-      { 0x30a1780d1035facfl,0x35e55a339dcb47e6l,0x6ea50fe1c447f393l,
-        0xf3cb672fdc9aef22l },
-      0 },
+    { { 0x3f32b6d07b6c377aL,0x6cb03847903448beL,0xd6fdd3a820da8af7L,
+        0xa6534aee09bb6f21L },
+      { 0x30a1780d1035facfL,0x35e55a339dcb47e6L,0x6ea50fe1c447f393L,
+        0xf3cb672fdc9aef22L } },
     /* 146 */
-    { { 0xeb3719fe3b55fd83l,0xe0d7a46c875ddd10l,0x33ac9fa905cea784l,
-        0x7cafaa2eaae870e7l },
-      { 0x9b814d041d53b338l,0xe0acc0a0ef87e6c6l,0xfb93d10811672b0fl,
-        0x0aab13c1b9bd522el },
-      0 },
+    { { 0xeb3719fe3b55fd83L,0xe0d7a46c875ddd10L,0x33ac9fa905cea784L,
+        0x7cafaa2eaae870e7L },
+      { 0x9b814d041d53b338L,0xe0acc0a0ef87e6c6L,0xfb93d10811672b0fL,
+        0x0aab13c1b9bd522eL } },
     /* 147 */
-    { { 0xddcce278d2681297l,0xcb350eb1b509546al,0x2dc431737661aaf2l,
-        0x4b91a602847012e9l },
-      { 0xdcff109572f8ddcfl,0x08ebf61e9a911af4l,0x48f4360ac372430el,
-        0x49534c5372321cabl },
-      0 },
+    { { 0xddcce278d2681297L,0xcb350eb1b509546aL,0x2dc431737661aaf2L,
+        0x4b91a602847012e9L },
+      { 0xdcff109572f8ddcfL,0x08ebf61e9a911af4L,0x48f4360ac372430eL,
+        0x49534c5372321cabL } },
     /* 148 */
-    { { 0x83df7d71f07b7e9dl,0xa478efa313cd516fl,0x78ef264b6c047ee3l,
-        0xcaf46c4fd65ac5eel },
-      { 0xa04d0c7792aa8266l,0xedf45466913684bbl,0x56e65168ae4b16b0l,
-        0x14ce9e5704c6770fl },
-      0 },
+    { { 0x83df7d71f07b7e9dL,0xa478efa313cd516fL,0x78ef264b6c047ee3L,
+        0xcaf46c4fd65ac5eeL },
+      { 0xa04d0c7792aa8266L,0xedf45466913684bbL,0x56e65168ae4b16b0L,
+        0x14ce9e5704c6770fL } },
     /* 149 */
-    { { 0x99445e3e965e8f91l,0xd3aca1bacb0f2492l,0xd31cc70f90c8a0a0l,
-        0x1bb708a53e4c9a71l },
-      { 0xd5ca9e69558bdd7al,0x734a0508018a26b1l,0xb093aa714c9cf1ecl,
-        0xf9d126f2da300102l },
-      0 },
+    { { 0x99445e3e965e8f91L,0xd3aca1bacb0f2492L,0xd31cc70f90c8a0a0L,
+        0x1bb708a53e4c9a71L },
+      { 0xd5ca9e69558bdd7aL,0x734a0508018a26b1L,0xb093aa714c9cf1ecL,
+        0xf9d126f2da300102L } },
     /* 150 */
-    { { 0x749bca7aaff9563el,0xdd077afeb49914a0l,0xe27a0311bf5f1671l,
-        0x807afcb9729ecc69l },
-      { 0x7f8a9337c9b08b77l,0x86c3a785443c7e38l,0x85fafa59476fd8bal,
-        0x751adcd16568cd8cl },
-      0 },
+    { { 0x749bca7aaff9563eL,0xdd077afeb49914a0L,0xe27a0311bf5f1671L,
+        0x807afcb9729ecc69L },
+      { 0x7f8a9337c9b08b77L,0x86c3a785443c7e38L,0x85fafa59476fd8baL,
+        0x751adcd16568cd8cL } },
     /* 151 */
-    { { 0x8aea38b410715c0dl,0xd113ea718f7697f7l,0x665eab1493fbf06dl,
-        0x29ec44682537743fl },
-      { 0x3d94719cb50bebbcl,0x399ee5bfe4505422l,0x90cd5b3a8d2dedb1l,
-        0xff9370e392a4077dl },
-      0 },
+    { { 0x8aea38b410715c0dL,0xd113ea718f7697f7L,0x665eab1493fbf06dL,
+        0x29ec44682537743fL },
+      { 0x3d94719cb50bebbcL,0x399ee5bfe4505422L,0x90cd5b3a8d2dedb1L,
+        0xff9370e392a4077dL } },
     /* 152 */
-    { { 0x59a2d69bc6b75b65l,0x4188f8d5266651c5l,0x28a9f33e3de9d7d2l,
-        0x9776478ba2a9d01al },
-      { 0x8852622d929af2c7l,0x334f5d6d4e690923l,0xce6cc7e5a89a51e9l,
-        0x74a6313fac2f82fal },
-      0 },
+    { { 0x59a2d69bc6b75b65L,0x4188f8d5266651c5L,0x28a9f33e3de9d7d2L,
+        0x9776478ba2a9d01aL },
+      { 0x8852622d929af2c7L,0x334f5d6d4e690923L,0xce6cc7e5a89a51e9L,
+        0x74a6313fac2f82faL } },
     /* 153 */
-    { { 0xb2f4dfddb75f079cl,0x85b07c9518e36fbbl,0x1b6cfcf0e7cd36ddl,
-        0xab75be150ff4863dl },
-      { 0x81b367c0173fc9b7l,0xb90a7420d2594fd0l,0x15fdbf03c4091236l,
-        0x4ebeac2e0b4459f6l },
-      0 },
+    { { 0xb2f4dfddb75f079cL,0x85b07c9518e36fbbL,0x1b6cfcf0e7cd36ddL,
+        0xab75be150ff4863dL },
+      { 0x81b367c0173fc9b7L,0xb90a7420d2594fd0L,0x15fdbf03c4091236L,
+        0x4ebeac2e0b4459f6L } },
     /* 154 */
-    { { 0xeb6c5fe75c9f2c53l,0xd25220118eae9411l,0xc8887633f95ac5d8l,
-        0xdf99887b2c1baffcl },
-      { 0xbb78eed2850aaecbl,0x9d49181b01d6a272l,0x978dd511b1cdbcacl,
-        0x27b040a7779f4058l },
-      0 },
+    { { 0xeb6c5fe75c9f2c53L,0xd25220118eae9411L,0xc8887633f95ac5d8L,
+        0xdf99887b2c1baffcL },
+      { 0xbb78eed2850aaecbL,0x9d49181b01d6a272L,0x978dd511b1cdbcacL,
+        0x27b040a7779f4058L } },
     /* 155 */
-    { { 0x90405db7f73b2eb2l,0xe0df85088e1b2118l,0x501b71525962327el,
-        0xb393dd37e4cfa3f5l },
-      { 0xa1230e7b3fd75165l,0xd66344c2bcd33554l,0x6c36f1be0f7b5022l,
-        0x09588c12d0463419l },
-      0 },
+    { { 0x90405db7f73b2eb2L,0xe0df85088e1b2118L,0x501b71525962327eL,
+        0xb393dd37e4cfa3f5L },
+      { 0xa1230e7b3fd75165L,0xd66344c2bcd33554L,0x6c36f1be0f7b5022L,
+        0x09588c12d0463419L } },
     /* 156 */
-    { { 0xe086093f02601c3bl,0xfb0252f8cf5c335fl,0x955cf280894aff28l,
-        0x81c879a9db9f648bl },
-      { 0x040e687cc6f56c51l,0xfed471693f17618cl,0x44f88a419059353bl,
-        0xfa0d48f55fc11bc4l },
-      0 },
+    { { 0xe086093f02601c3bL,0xfb0252f8cf5c335fL,0x955cf280894aff28L,
+        0x81c879a9db9f648bL },
+      { 0x040e687cc6f56c51L,0xfed471693f17618cL,0x44f88a419059353bL,
+        0xfa0d48f55fc11bc4L } },
     /* 157 */
-    { { 0xbc6e1c9de1608e4dl,0x010dda113582822cl,0xf6b7ddc1157ec2d7l,
-        0x8ea0e156b6a367d6l },
-      { 0xa354e02f2383b3b4l,0x69966b943f01f53cl,0x4ff6632b2de03ca5l,
-        0x3f5ab924fa00b5acl },
-      0 },
+    { { 0xbc6e1c9de1608e4dL,0x010dda113582822cL,0xf6b7ddc1157ec2d7L,
+        0x8ea0e156b6a367d6L },
+      { 0xa354e02f2383b3b4L,0x69966b943f01f53cL,0x4ff6632b2de03ca5L,
+        0x3f5ab924fa00b5acL } },
     /* 158 */
-    { { 0x337bb0d959739efbl,0xc751b0f4e7ebec0dl,0x2da52dd6411a67d1l,
-        0x8bc768872b74256el },
-      { 0xa5be3b7282d3d253l,0xa9f679a1f58d779fl,0xa1cac168e16767bbl,
-        0xb386f19060fcf34fl },
-      0 },
+    { { 0x337bb0d959739efbL,0xc751b0f4e7ebec0dL,0x2da52dd6411a67d1L,
+        0x8bc768872b74256eL },
+      { 0xa5be3b7282d3d253L,0xa9f679a1f58d779fL,0xa1cac168e16767bbL,
+        0xb386f19060fcf34fL } },
     /* 159 */
-    { { 0x31f3c1352fedcfc2l,0x5396bf6262f8af0dl,0x9a02b4eae57288c2l,
-        0x4cb460f71b069c4dl },
-      { 0xae67b4d35b8095eal,0x92bbf8596fc07603l,0xe1475f66b614a165l,
-        0x52c0d50895ef5223l },
-      0 },
+    { { 0x31f3c1352fedcfc2L,0x5396bf6262f8af0dL,0x9a02b4eae57288c2L,
+        0x4cb460f71b069c4dL },
+      { 0xae67b4d35b8095eaL,0x92bbf8596fc07603L,0xe1475f66b614a165L,
+        0x52c0d50895ef5223L } },
     /* 160 */
-    { { 0x231c210e15339848l,0xe87a28e870778c8dl,0x9d1de6616956e170l,
-        0x4ac3c9382bb09c0bl },
-      { 0x19be05516998987dl,0x8b2376c4ae09f4d6l,0x1de0b7651a3f933dl,
-        0x380d94c7e39705f4l },
-      0 },
+    { { 0x231c210e15339848L,0xe87a28e870778c8dL,0x9d1de6616956e170L,
+        0x4ac3c9382bb09c0bL },
+      { 0x19be05516998987dL,0x8b2376c4ae09f4d6L,0x1de0b7651a3f933dL,
+        0x380d94c7e39705f4L } },
     /* 161 */
-    { { 0x01a355aa81542e75l,0x96c724a1ee01b9b7l,0x6b3a2977624d7087l,
-        0x2ce3e171de2637afl },
-      { 0xcfefeb49f5d5bc1al,0xa655607e2777e2b5l,0x4feaac2f9513756cl,
-        0x2e6cd8520b624e4dl },
-      0 },
+    { { 0x01a355aa81542e75L,0x96c724a1ee01b9b7L,0x6b3a2977624d7087L,
+        0x2ce3e171de2637afL },
+      { 0xcfefeb49f5d5bc1aL,0xa655607e2777e2b5L,0x4feaac2f9513756cL,
+        0x2e6cd8520b624e4dL } },
     /* 162 */
-    { { 0x3685954b8c31c31dl,0x68533d005bf21a0cl,0x0bd7626e75c79ec9l,
-        0xca17754742c69d54l },
-      { 0xcc6edafff6d2dbb2l,0xfd0d8cbd174a9d18l,0x875e8793aa4578e8l,
-        0xa976a7139cab2ce6l },
-      0 },
+    { { 0x3685954b8c31c31dL,0x68533d005bf21a0cL,0x0bd7626e75c79ec9L,
+        0xca17754742c69d54L },
+      { 0xcc6edafff6d2dbb2L,0xfd0d8cbd174a9d18L,0x875e8793aa4578e8L,
+        0xa976a7139cab2ce6L } },
     /* 163 */
-    { { 0x0a651f1b93fb353dl,0xd75cab8b57fcfa72l,0xaa88cfa731b15281l,
-        0x8720a7170a1f4999l },
-      { 0x8c3e8d37693e1b90l,0xd345dc0b16f6dfc3l,0x8ea8d00ab52a8742l,
-        0x9719ef29c769893cl },
-      0 },
+    { { 0x0a651f1b93fb353dL,0xd75cab8b57fcfa72L,0xaa88cfa731b15281L,
+        0x8720a7170a1f4999L },
+      { 0x8c3e8d37693e1b90L,0xd345dc0b16f6dfc3L,0x8ea8d00ab52a8742L,
+        0x9719ef29c769893cL } },
     /* 164 */
-    { { 0x820eed8d58e35909l,0x9366d8dc33ddc116l,0xd7f999d06e205026l,
-        0xa5072976e15704c1l },
-      { 0x002a37eac4e70b2el,0x84dcf6576890aa8al,0xcd71bf18645b2a5cl,
-        0x99389c9df7b77725l },
-      0 },
+    { { 0x820eed8d58e35909L,0x9366d8dc33ddc116L,0xd7f999d06e205026L,
+        0xa5072976e15704c1L },
+      { 0x002a37eac4e70b2eL,0x84dcf6576890aa8aL,0xcd71bf18645b2a5cL,
+        0x99389c9df7b77725L } },
     /* 165 */
-    { { 0x238c08f27ada7a4bl,0x3abe9d03fd389366l,0x6b672e89766f512cl,
-        0xa88806aa202c82e4l },
-      { 0x6602044ad380184el,0xa8cb78c4126a8b85l,0x79d670c0ad844f17l,
-        0x0043bffb4738dcfel },
-      0 },
+    { { 0x238c08f27ada7a4bL,0x3abe9d03fd389366L,0x6b672e89766f512cL,
+        0xa88806aa202c82e4L },
+      { 0x6602044ad380184eL,0xa8cb78c4126a8b85L,0x79d670c0ad844f17L,
+        0x0043bffb4738dcfeL } },
     /* 166 */
-    { { 0x8d59b5dc36d5192el,0xacf885d34590b2afl,0x83566d0a11601781l,
-        0x52f3ef01ba6c4866l },
-      { 0x3986732a0edcb64dl,0x0a482c238068379fl,0x16cbe5fa7040f309l,
-        0x3296bd899ef27e75l },
-      0 },
+    { { 0x8d59b5dc36d5192eL,0xacf885d34590b2afL,0x83566d0a11601781L,
+        0x52f3ef01ba6c4866L },
+      { 0x3986732a0edcb64dL,0x0a482c238068379fL,0x16cbe5fa7040f309L,
+        0x3296bd899ef27e75L } },
     /* 167 */
-    { { 0x476aba89454d81d7l,0x9eade7ef51eb9b3cl,0x619a21cd81c57986l,
-        0x3b90febfaee571e9l },
-      { 0x9393023e5496f7cbl,0x55be41d87fb51bc4l,0x03f1dd4899beb5cel,
-        0x6e88069d9f810b18l },
-      0 },
+    { { 0x476aba89454d81d7L,0x9eade7ef51eb9b3cL,0x619a21cd81c57986L,
+        0x3b90febfaee571e9L },
+      { 0x9393023e5496f7cbL,0x55be41d87fb51bc4L,0x03f1dd4899beb5ceL,
+        0x6e88069d9f810b18L } },
     /* 168 */
-    { { 0xce37ab11b43ea1dbl,0x0a7ff1a95259d292l,0x851b02218f84f186l,
-        0xa7222beadefaad13l },
-      { 0xa2ac78ec2b0a9144l,0x5a024051f2fa59c5l,0x91d1eca56147ce38l,
-        0xbe94d523bc2ac690l },
-      0 },
+    { { 0xce37ab11b43ea1dbL,0x0a7ff1a95259d292L,0x851b02218f84f186L,
+        0xa7222beadefaad13L },
+      { 0xa2ac78ec2b0a9144L,0x5a024051f2fa59c5L,0x91d1eca56147ce38L,
+        0xbe94d523bc2ac690L } },
     /* 169 */
-    { { 0x72f4945e0b226ce7l,0xb8afd747967e8b70l,0xedea46f185a6c63el,
-        0x7782defe9be8c766l },
-      { 0x760d2aa43db38626l,0x460ae78776f67ad1l,0x341b86fc54499cdbl,
-        0x03838567a2892e4bl },
-      0 },
+    { { 0x72f4945e0b226ce7L,0xb8afd747967e8b70L,0xedea46f185a6c63eL,
+        0x7782defe9be8c766L },
+      { 0x760d2aa43db38626L,0x460ae78776f67ad1L,0x341b86fc54499cdbL,
+        0x03838567a2892e4bL } },
     /* 170 */
-    { { 0x2d8daefd79ec1a0fl,0x3bbcd6fdceb39c97l,0xf5575ffc58f61a95l,
-        0xdbd986c4adf7b420l },
-      { 0x81aa881415f39eb7l,0x6ee2fcf5b98d976cl,0x5465475dcf2f717dl,
-        0x8e24d3c46860bbd0l },
-      0 },
+    { { 0x2d8daefd79ec1a0fL,0x3bbcd6fdceb39c97L,0xf5575ffc58f61a95L,
+        0xdbd986c4adf7b420L },
+      { 0x81aa881415f39eb7L,0x6ee2fcf5b98d976cL,0x5465475dcf2f717dL,
+        0x8e24d3c46860bbd0L } },
     /* 171 */
-    { { 0x749d8e549a587390l,0x12bb194f0cbec588l,0x46e07da4b25983c6l,
-        0x541a99c4407bafc8l },
-      { 0xdb241692624c8842l,0x6044c12ad86c05ffl,0xc59d14b44f7fcf62l,
-        0xc0092c49f57d35d1l },
-      0 },
+    { { 0x749d8e549a587390L,0x12bb194f0cbec588L,0x46e07da4b25983c6L,
+        0x541a99c4407bafc8L },
+      { 0xdb241692624c8842L,0x6044c12ad86c05ffL,0xc59d14b44f7fcf62L,
+        0xc0092c49f57d35d1L } },
     /* 172 */
-    { { 0xd3cc75c3df2e61efl,0x7e8841c82e1b35cal,0xc62d30d1909f29f4l,
-        0x75e406347286944dl },
-      { 0xe7d41fc5bbc237d0l,0xc9537bf0ec4f01c9l,0x91c51a16282bd534l,
-        0x5b7cb658c7848586l },
-      0 },
+    { { 0xd3cc75c3df2e61efL,0x7e8841c82e1b35caL,0xc62d30d1909f29f4L,
+        0x75e406347286944dL },
+      { 0xe7d41fc5bbc237d0L,0xc9537bf0ec4f01c9L,0x91c51a16282bd534L,
+        0x5b7cb658c7848586L } },
     /* 173 */
-    { { 0x964a70848a28ead1l,0x802dc508fd3b47f6l,0x9ae4bfd1767e5b39l,
-        0x7ae13eba8df097a1l },
-      { 0xfd216ef8eadd384el,0x0361a2d9b6b2ff06l,0x204b98784bcdb5f3l,
-        0x787d8074e2a8e3fdl },
-      0 },
+    { { 0x964a70848a28ead1L,0x802dc508fd3b47f6L,0x9ae4bfd1767e5b39L,
+        0x7ae13eba8df097a1L },
+      { 0xfd216ef8eadd384eL,0x0361a2d9b6b2ff06L,0x204b98784bcdb5f3L,
+        0x787d8074e2a8e3fdL } },
     /* 174 */
-    { { 0xc5e25d6b757fbb1cl,0xe47bddb2ca201debl,0x4a55e9a36d2233ffl,
-        0x5c2228199ef28484l },
-      { 0x773d4a8588315250l,0x21b21a2b827097c1l,0xab7c4ea1def5d33fl,
-        0xe45d37abbaf0f2b0l },
-      0 },
+    { { 0xc5e25d6b757fbb1cL,0xe47bddb2ca201debL,0x4a55e9a36d2233ffL,
+        0x5c2228199ef28484L },
+      { 0x773d4a8588315250L,0x21b21a2b827097c1L,0xab7c4ea1def5d33fL,
+        0xe45d37abbaf0f2b0L } },
     /* 175 */
-    { { 0xd2df1e3428511c8al,0xebb229c8bdca6cd3l,0x578a71a7627c39a7l,
-        0xed7bc12284dfb9d3l },
-      { 0xcf22a6df93dea561l,0x5443f18dd48f0ed1l,0xd8b861405bad23e8l,
-        0xaac97cc945ca6d27l },
-      0 },
+    { { 0xd2df1e3428511c8aL,0xebb229c8bdca6cd3L,0x578a71a7627c39a7L,
+        0xed7bc12284dfb9d3L },
+      { 0xcf22a6df93dea561L,0x5443f18dd48f0ed1L,0xd8b861405bad23e8L,
+        0xaac97cc945ca6d27L } },
     /* 176 */
-    { { 0xeb54ea74a16bd00al,0xd839e9adf5c0bcc1l,0x092bb7f11f9bfc06l,
-        0x318f97b31163dc4el },
-      { 0xecc0c5bec30d7138l,0x44e8df23abc30220l,0x2bb7972fb0223606l,
-        0xfa41faa19a84ff4dl },
-      0 },
+    { { 0xeb54ea74a16bd00aL,0xd839e9adf5c0bcc1L,0x092bb7f11f9bfc06L,
+        0x318f97b31163dc4eL },
+      { 0xecc0c5bec30d7138L,0x44e8df23abc30220L,0x2bb7972fb0223606L,
+        0xfa41faa19a84ff4dL } },
     /* 177 */
-    { { 0x4402d974a6642269l,0xc81814ce9bb783bdl,0x398d38e47941e60bl,
-        0x38bb6b2c1d26e9e2l },
-      { 0xc64e4a256a577f87l,0x8b52d253dc11fe1cl,0xff336abf62280728l,
-        0x94dd0905ce7601a5l },
-      0 },
+    { { 0x4402d974a6642269L,0xc81814ce9bb783bdL,0x398d38e47941e60bL,
+        0x38bb6b2c1d26e9e2L },
+      { 0xc64e4a256a577f87L,0x8b52d253dc11fe1cL,0xff336abf62280728L,
+        0x94dd0905ce7601a5L } },
     /* 178 */
-    { { 0x156cf7dcde93f92al,0xa01333cb89b5f315l,0x02404df9c995e750l,
-        0x92077867d25c2ae9l },
-      { 0xe2471e010bf39d44l,0x5f2c902096bb53d7l,0x4c44b7b35c9c3d8fl,
-        0x81e8428bd29beb51l },
-      0 },
+    { { 0x156cf7dcde93f92aL,0xa01333cb89b5f315L,0x02404df9c995e750L,
+        0x92077867d25c2ae9L },
+      { 0xe2471e010bf39d44L,0x5f2c902096bb53d7L,0x4c44b7b35c9c3d8fL,
+        0x81e8428bd29beb51L } },
     /* 179 */
-    { { 0x6dd9c2bac477199fl,0x8cb8eeee6b5ecdd9l,0x8af7db3fee40fd0el,
-        0x1b94ab62dbbfa4b1l },
-      { 0x44f0d8b3ce47f143l,0x51e623fc63f46163l,0xf18f270fcc599383l,
-        0x06a38e28055590eel },
-      0 },
+    { { 0x6dd9c2bac477199fL,0x8cb8eeee6b5ecdd9L,0x8af7db3fee40fd0eL,
+        0x1b94ab62dbbfa4b1L },
+      { 0x44f0d8b3ce47f143L,0x51e623fc63f46163L,0xf18f270fcc599383L,
+        0x06a38e28055590eeL } },
     /* 180 */
-    { { 0x2e5b0139b3355b49l,0x20e26560b4ebf99bl,0xc08ffa6bd269f3dcl,
-        0xa7b36c2083d9d4f8l },
-      { 0x64d15c3a1b3e8830l,0xd5fceae1a89f9c0bl,0xcfeee4a2e2d16930l,
-        0xbe54c6b4a2822a20l },
-      0 },
+    { { 0x2e5b0139b3355b49L,0x20e26560b4ebf99bL,0xc08ffa6bd269f3dcL,
+        0xa7b36c2083d9d4f8L },
+      { 0x64d15c3a1b3e8830L,0xd5fceae1a89f9c0bL,0xcfeee4a2e2d16930L,
+        0xbe54c6b4a2822a20L } },
     /* 181 */
-    { { 0xd6cdb3df8d91167cl,0x517c3f79e7a6625el,0x7105648f346ac7f4l,
-        0xbf30a5abeae022bbl },
-      { 0x8e7785be93828a68l,0x5161c3327f3ef036l,0xe11b5feb592146b2l,
-        0xd1c820de2732d13al },
-      0 },
+    { { 0xd6cdb3df8d91167cL,0x517c3f79e7a6625eL,0x7105648f346ac7f4L,
+        0xbf30a5abeae022bbL },
+      { 0x8e7785be93828a68L,0x5161c3327f3ef036L,0xe11b5feb592146b2L,
+        0xd1c820de2732d13aL } },
     /* 182 */
-    { { 0x043e13479038b363l,0x58c11f546b05e519l,0x4fe57abe6026cad1l,
-        0xb7d17bed68a18da3l },
-      { 0x44ca5891e29c2559l,0x4f7a03765bfffd84l,0x498de4af74e46948l,
-        0x3997fd5e6412cc64l },
-      0 },
+    { { 0x043e13479038b363L,0x58c11f546b05e519L,0x4fe57abe6026cad1L,
+        0xb7d17bed68a18da3L },
+      { 0x44ca5891e29c2559L,0x4f7a03765bfffd84L,0x498de4af74e46948L,
+        0x3997fd5e6412cc64L } },
     /* 183 */
-    { { 0xf20746828bd61507l,0x29e132d534a64d2al,0xffeddfb08a8a15e3l,
-        0x0eeb89293c6c13e8l },
-      { 0xe9b69a3ea7e259f8l,0xce1db7e6d13e7e67l,0x277318f6ad1fa685l,
-        0x228916f8c922b6efl },
-      0 },
+    { { 0xf20746828bd61507L,0x29e132d534a64d2aL,0xffeddfb08a8a15e3L,
+        0x0eeb89293c6c13e8L },
+      { 0xe9b69a3ea7e259f8L,0xce1db7e6d13e7e67L,0x277318f6ad1fa685L,
+        0x228916f8c922b6efL } },
     /* 184 */
-    { { 0x959ae25b0a12ab5bl,0xcc11171f957bc136l,0x8058429ed16e2b0cl,
-        0xec05ad1d6e93097el },
-      { 0x157ba5beac3f3708l,0x31baf93530b59d77l,0x47b55237118234e5l,
-        0x7d3141567ff11b37l },
-      0 },
+    { { 0x959ae25b0a12ab5bL,0xcc11171f957bc136L,0x8058429ed16e2b0cL,
+        0xec05ad1d6e93097eL },
+      { 0x157ba5beac3f3708L,0x31baf93530b59d77L,0x47b55237118234e5L,
+        0x7d3141567ff11b37L } },
     /* 185 */
-    { { 0x7bd9c05cf6dfefabl,0xbe2f2268dcb37707l,0xe53ead973a38bb95l,
-        0xe9ce66fc9bc1d7a3l },
-      { 0x75aa15766f6a02a1l,0x38c087df60e600edl,0xf8947f3468cdc1b9l,
-        0xd9650b0172280651l },
-      0 },
+    { { 0x7bd9c05cf6dfefabL,0xbe2f2268dcb37707L,0xe53ead973a38bb95L,
+        0xe9ce66fc9bc1d7a3L },
+      { 0x75aa15766f6a02a1L,0x38c087df60e600edL,0xf8947f3468cdc1b9L,
+        0xd9650b0172280651L } },
     /* 186 */
-    { { 0x504b4c4a5a057e60l,0xcbccc3be8def25e4l,0xa635320817c1ccbdl,
-        0x14d6699a804eb7a2l },
-      { 0x2c8a8415db1f411al,0x09fbaf0bf80d769cl,0xb4deef901c2f77adl,
-        0x6f4c68410d43598al },
-      0 },
+    { { 0x504b4c4a5a057e60L,0xcbccc3be8def25e4L,0xa635320817c1ccbdL,
+        0x14d6699a804eb7a2L },
+      { 0x2c8a8415db1f411aL,0x09fbaf0bf80d769cL,0xb4deef901c2f77adL,
+        0x6f4c68410d43598aL } },
     /* 187 */
-    { { 0x8726df4e96c24a96l,0x534dbc85fcbd99a3l,0x3c466ef28b2ae30al,
-        0x4c4350fd61189abbl },
-      { 0x2967f716f855b8dal,0x41a42394463c38a1l,0xc37e1413eae93343l,
-        0xa726d2425a3118b5l },
-      0 },
+    { { 0x8726df4e96c24a96L,0x534dbc85fcbd99a3L,0x3c466ef28b2ae30aL,
+        0x4c4350fd61189abbL },
+      { 0x2967f716f855b8daL,0x41a42394463c38a1L,0xc37e1413eae93343L,
+        0xa726d2425a3118b5L } },
     /* 188 */
-    { { 0xdae6b3ee948c1086l,0xf1de503dcbd3a2e1l,0x3f35ed3f03d022f3l,
-        0x13639e82cc6cf392l },
-      { 0x9ac938fbcdafaa86l,0xf45bc5fb2654a258l,0x1963b26e45051329l,
-        0xca9365e1c1a335a3l },
-      0 },
+    { { 0xdae6b3ee948c1086L,0xf1de503dcbd3a2e1L,0x3f35ed3f03d022f3L,
+        0x13639e82cc6cf392L },
+      { 0x9ac938fbcdafaa86L,0xf45bc5fb2654a258L,0x1963b26e45051329L,
+        0xca9365e1c1a335a3L } },
     /* 189 */
-    { { 0x3615ac754c3b2d20l,0x742a5417904e241bl,0xb08521c4cc9d071dl,
-        0x9ce29c34970b72a5l },
-      { 0x8cc81f736d3e0ad6l,0x8060da9ef2f8434cl,0x35ed1d1a6ce862d9l,
-        0x48c4abd7ab42af98l },
-      0 },
+    { { 0x3615ac754c3b2d20L,0x742a5417904e241bL,0xb08521c4cc9d071dL,
+        0x9ce29c34970b72a5L },
+      { 0x8cc81f736d3e0ad6L,0x8060da9ef2f8434cL,0x35ed1d1a6ce862d9L,
+        0x48c4abd7ab42af98L } },
     /* 190 */
-    { { 0xd221b0cc40c7485al,0xead455bbe5274dbfl,0x493c76989263d2e8l,
-        0x78017c32f67b33cbl },
-      { 0xb9d35769930cb5eel,0xc0d14e940c408ed2l,0xf8b7bf55272f1a4dl,
-        0x53cd0454de5c1c04l },
-      0 },
+    { { 0xd221b0cc40c7485aL,0xead455bbe5274dbfL,0x493c76989263d2e8L,
+        0x78017c32f67b33cbL },
+      { 0xb9d35769930cb5eeL,0xc0d14e940c408ed2L,0xf8b7bf55272f1a4dL,
+        0x53cd0454de5c1c04L } },
     /* 191 */
-    { { 0xbcd585fa5d28ccacl,0x5f823e56005b746el,0x7c79f0a1cd0123aal,
-        0xeea465c1d3d7fa8fl },
-      { 0x7810659f0551803bl,0x6c0b599f7ce6af70l,0x4195a77029288e70l,
-        0x1b6e42a47ae69193l },
-      0 },
+    { { 0xbcd585fa5d28ccacL,0x5f823e56005b746eL,0x7c79f0a1cd0123aaL,
+        0xeea465c1d3d7fa8fL },
+      { 0x7810659f0551803bL,0x6c0b599f7ce6af70L,0x4195a77029288e70L,
+        0x1b6e42a47ae69193L } },
     /* 192 */
-    { { 0x2e80937cf67d04c3l,0x1e312be289eeb811l,0x56b5d88792594d60l,
-        0x0224da14187fbd3dl },
-      { 0x87abb8630c5fe36fl,0x580f3c604ef51f5fl,0x964fb1bfb3b429ecl,
-        0x60838ef042bfff33l },
-      0 },
+    { { 0x2e80937cf67d04c3L,0x1e312be289eeb811L,0x56b5d88792594d60L,
+        0x0224da14187fbd3dL },
+      { 0x87abb8630c5fe36fL,0x580f3c604ef51f5fL,0x964fb1bfb3b429ecL,
+        0x60838ef042bfff33L } },
     /* 193 */
-    { { 0x432cb2f27e0bbe99l,0x7bda44f304aa39eel,0x5f497c7a9fa93903l,
-        0x636eb2022d331643l },
-      { 0xfcfd0e6193ae00aal,0x875a00fe31ae6d2fl,0xf43658a29f93901cl,
-        0x8844eeb639218bacl },
-      0 },
+    { { 0x432cb2f27e0bbe99L,0x7bda44f304aa39eeL,0x5f497c7a9fa93903L,
+        0x636eb2022d331643L },
+      { 0xfcfd0e6193ae00aaL,0x875a00fe31ae6d2fL,0xf43658a29f93901cL,
+        0x8844eeb639218bacL } },
     /* 194 */
-    { { 0x114171d26b3bae58l,0x7db3df7117e39f3el,0xcd37bc7f81a8eadal,
-        0x27ba83dc51fb789el },
-      { 0xa7df439ffbf54de5l,0x7277030bb5fe1a71l,0x42ee8e35db297a48l,
-        0xadb62d3487f3a4abl },
-      0 },
+    { { 0x114171d26b3bae58L,0x7db3df7117e39f3eL,0xcd37bc7f81a8eadaL,
+        0x27ba83dc51fb789eL },
+      { 0xa7df439ffbf54de5L,0x7277030bb5fe1a71L,0x42ee8e35db297a48L,
+        0xadb62d3487f3a4abL } },
     /* 195 */
-    { { 0x9b1168a2a175df2al,0x082aa04f618c32e9l,0xc9e4f2e7146b0916l,
-        0xb990fd7675e7c8b2l },
-      { 0x0829d96b4df37313l,0x1c205579d0b40789l,0x66c9ae4a78087711l,
-        0x81707ef94d10d18dl },
-      0 },
+    { { 0x9b1168a2a175df2aL,0x082aa04f618c32e9L,0xc9e4f2e7146b0916L,
+        0xb990fd7675e7c8b2L },
+      { 0x0829d96b4df37313L,0x1c205579d0b40789L,0x66c9ae4a78087711L,
+        0x81707ef94d10d18dL } },
     /* 196 */
-    { { 0x97d7cab203d6ff96l,0x5b851bfc0d843360l,0x268823c4d042db4bl,
-        0x3792daead5a8aa5cl },
-      { 0x52818865941afa0bl,0xf3e9e74142d83671l,0x17c825275be4e0a7l,
-        0x5abd635e94b001bal },
-      0 },
+    { { 0x97d7cab203d6ff96L,0x5b851bfc0d843360L,0x268823c4d042db4bL,
+        0x3792daead5a8aa5cL },
+      { 0x52818865941afa0bL,0xf3e9e74142d83671L,0x17c825275be4e0a7L,
+        0x5abd635e94b001baL } },
     /* 197 */
-    { { 0x727fa84e0ac4927cl,0xe3886035a7c8cf23l,0xa4bcd5ea4adca0dfl,
-        0x5995bf21846ab610l },
-      { 0xe90f860b829dfa33l,0xcaafe2ae958fc18bl,0x9b3baf4478630366l,
-        0x44c32ca2d483411el },
-      0 },
+    { { 0x727fa84e0ac4927cL,0xe3886035a7c8cf23L,0xa4bcd5ea4adca0dfL,
+        0x5995bf21846ab610L },
+      { 0xe90f860b829dfa33L,0xcaafe2ae958fc18bL,0x9b3baf4478630366L,
+        0x44c32ca2d483411eL } },
     /* 198 */
-    { { 0xa74a97f1e40ed80cl,0x5f938cb131d2ca82l,0x53f2124b7c2d6ad9l,
-        0x1f2162fb8082a54cl },
-      { 0x7e467cc5720b173el,0x40e8a666085f12f9l,0x8cebc20e4c9d65dcl,
-        0x8f1d402bc3e907c9l },
-      0 },
+    { { 0xa74a97f1e40ed80cL,0x5f938cb131d2ca82L,0x53f2124b7c2d6ad9L,
+        0x1f2162fb8082a54cL },
+      { 0x7e467cc5720b173eL,0x40e8a666085f12f9L,0x8cebc20e4c9d65dcL,
+        0x8f1d402bc3e907c9L } },
     /* 199 */
-    { { 0x4f592f9cfbc4058al,0xb15e14b6292f5670l,0xc55cfe37bc1d8c57l,
-        0xb1980f43926edbf9l },
-      { 0x98c33e0932c76b09l,0x1df5279d33b07f78l,0x6f08ead4863bb461l,
-        0x2828ad9b37448e45l },
-      0 },
+    { { 0x4f592f9cfbc4058aL,0xb15e14b6292f5670L,0xc55cfe37bc1d8c57L,
+        0xb1980f43926edbf9L },
+      { 0x98c33e0932c76b09L,0x1df5279d33b07f78L,0x6f08ead4863bb461L,
+        0x2828ad9b37448e45L } },
     /* 200 */
-    { { 0x696722c4c4cf4ac5l,0xf5ac1a3fdde64afbl,0x0551baa2e0890832l,
-        0x4973f1275a14b390l },
-      { 0xe59d8335322eac5dl,0x5e07eef50bd9b568l,0xab36720fa2588393l,
-        0x6dac8ed0db168ac7l },
-      0 },
+    { { 0x696722c4c4cf4ac5L,0xf5ac1a3fdde64afbL,0x0551baa2e0890832L,
+        0x4973f1275a14b390L },
+      { 0xe59d8335322eac5dL,0x5e07eef50bd9b568L,0xab36720fa2588393L,
+        0x6dac8ed0db168ac7L } },
     /* 201 */
-    { { 0xf7b545aeeda835efl,0x4aa113d21d10ed51l,0x035a65e013741b09l,
-        0x4b23ef5920b9de4cl },
-      { 0xe82bb6803c4c7341l,0xd457706d3f58bc37l,0x73527863a51e3ee8l,
-        0x4dd71534ddf49a4el },
-      0 },
+    { { 0xf7b545aeeda835efL,0x4aa113d21d10ed51L,0x035a65e013741b09L,
+        0x4b23ef5920b9de4cL },
+      { 0xe82bb6803c4c7341L,0xd457706d3f58bc37L,0x73527863a51e3ee8L,
+        0x4dd71534ddf49a4eL } },
     /* 202 */
-    { { 0xbf94467295476cd9l,0x648d072fe31a725bl,0x1441c8b8fc4b67e0l,
-        0xfd3170002f4a4dbbl },
-      { 0x1cb43ff48995d0e1l,0x76e695d10ef729aal,0xe0d5f97641798982l,
-        0x14fac58c9569f365l },
-      0 },
+    { { 0xbf94467295476cd9L,0x648d072fe31a725bL,0x1441c8b8fc4b67e0L,
+        0xfd3170002f4a4dbbL },
+      { 0x1cb43ff48995d0e1L,0x76e695d10ef729aaL,0xe0d5f97641798982L,
+        0x14fac58c9569f365L } },
     /* 203 */
-    { { 0xad9a0065f312ae18l,0x51958dc0fcc93fc9l,0xd9a142408a7d2846l,
-        0xed7c765136abda50l },
-      { 0x46270f1a25d4abbcl,0x9b5dd8f3f1a113eal,0xc609b0755b51952fl,
-        0xfefcb7f74d2e9f53l },
-      0 },
+    { { 0xad9a0065f312ae18L,0x51958dc0fcc93fc9L,0xd9a142408a7d2846L,
+        0xed7c765136abda50L },
+      { 0x46270f1a25d4abbcL,0x9b5dd8f3f1a113eaL,0xc609b0755b51952fL,
+        0xfefcb7f74d2e9f53L } },
     /* 204 */
-    { { 0xbd09497aba119185l,0xd54e8c30aac45ba4l,0x492479deaa521179l,
-        0x1801a57e87e0d80bl },
-      { 0x073d3f8dfcafffb0l,0x6cf33c0bae255240l,0x781d763b5b5fdfbcl,
-        0x9f8fc11e1ead1064l },
-      0 },
+    { { 0xbd09497aba119185L,0xd54e8c30aac45ba4L,0x492479deaa521179L,
+        0x1801a57e87e0d80bL },
+      { 0x073d3f8dfcafffb0L,0x6cf33c0bae255240L,0x781d763b5b5fdfbcL,
+        0x9f8fc11e1ead1064L } },
     /* 205 */
-    { { 0x1583a1715e69544cl,0x0eaf8567f04b7813l,0x1e22a8fd278a4c32l,
-        0xa9d3809d3d3a69a9l },
-      { 0x936c2c2c59a2da3bl,0x38ccbcf61895c847l,0x5e65244e63d50869l,
-        0x3006b9aee1178ef7l },
-      0 },
+    { { 0x1583a1715e69544cL,0x0eaf8567f04b7813L,0x1e22a8fd278a4c32L,
+        0xa9d3809d3d3a69a9L },
+      { 0x936c2c2c59a2da3bL,0x38ccbcf61895c847L,0x5e65244e63d50869L,
+        0x3006b9aee1178ef7L } },
     /* 206 */
-    { { 0x0bb1f2b0c9eead28l,0x7eef635d89f4dfbcl,0x074757fdb2ce8939l,
-        0x0ab85fd745f8f761l },
-      { 0xecda7c933e5b4549l,0x4be2bb5c97922f21l,0x261a1274b43b8040l,
-        0xb122d67511e942c2l },
-      0 },
+    { { 0x0bb1f2b0c9eead28L,0x7eef635d89f4dfbcL,0x074757fdb2ce8939L,
+        0x0ab85fd745f8f761L },
+      { 0xecda7c933e5b4549L,0x4be2bb5c97922f21L,0x261a1274b43b8040L,
+        0xb122d67511e942c2L } },
     /* 207 */
-    { { 0x3be607be66a5ae7al,0x01e703fa76adcbe3l,0xaf9043014eb6e5c5l,
-        0x9f599dc1097dbaecl },
-      { 0x6d75b7180ff250edl,0x8eb91574349a20dcl,0x425605a410b227a3l,
-        0x7d5528e08a294b78l },
-      0 },
+    { { 0x3be607be66a5ae7aL,0x01e703fa76adcbe3L,0xaf9043014eb6e5c5L,
+        0x9f599dc1097dbaecL },
+      { 0x6d75b7180ff250edL,0x8eb91574349a20dcL,0x425605a410b227a3L,
+        0x7d5528e08a294b78L } },
     /* 208 */
-    { { 0xf0f58f6620c26defl,0x025585ea582b2d1el,0xfbe7d79b01ce3881l,
-        0x28ccea01303f1730l },
-      { 0xd1dabcd179644ba5l,0x1fc643e806fff0b8l,0xa60a76fc66b3e17bl,
-        0xc18baf48a1d013bfl },
-      0 },
+    { { 0xf0f58f6620c26defL,0x025585ea582b2d1eL,0xfbe7d79b01ce3881L,
+        0x28ccea01303f1730L },
+      { 0xd1dabcd179644ba5L,0x1fc643e806fff0b8L,0xa60a76fc66b3e17bL,
+        0xc18baf48a1d013bfL } },
     /* 209 */
-    { { 0x34e638c85dc4216dl,0x00c01067206142acl,0xd453a17195f5064al,
-        0x9def809db7a9596bl },
-      { 0x41e8642e67ab8d2cl,0xb42404336237a2b6l,0x7d506a6d64c4218bl,
-        0x0357f8b068808ce5l },
-      0 },
+    { { 0x34e638c85dc4216dL,0x00c01067206142acL,0xd453a17195f5064aL,
+        0x9def809db7a9596bL },
+      { 0x41e8642e67ab8d2cL,0xb42404336237a2b6L,0x7d506a6d64c4218bL,
+        0x0357f8b068808ce5L } },
     /* 210 */
-    { { 0x8e9dbe644cd2cc88l,0xcc61c28df0b8f39dl,0x4a309874cd30a0c8l,
-        0xe4a01add1b489887l },
-      { 0x2ed1eeacf57cd8f9l,0x1b767d3ebd594c48l,0xa7295c717bd2f787l,
-        0x466d7d79ce10cc30l },
-      0 },
+    { { 0x8e9dbe644cd2cc88L,0xcc61c28df0b8f39dL,0x4a309874cd30a0c8L,
+        0xe4a01add1b489887L },
+      { 0x2ed1eeacf57cd8f9L,0x1b767d3ebd594c48L,0xa7295c717bd2f787L,
+        0x466d7d79ce10cc30L } },
     /* 211 */
-    { { 0x47d318929dada2c7l,0x4fa0a6c38f9aa27dl,0x90e4fd28820a59e1l,
-        0xc672a522451ead1al },
-      { 0x30607cc85d86b655l,0xf0235d3bf9ad4af1l,0x99a08680571172a6l,
-        0x5e3d64faf2a67513l },
-      0 },
+    { { 0x47d318929dada2c7L,0x4fa0a6c38f9aa27dL,0x90e4fd28820a59e1L,
+        0xc672a522451ead1aL },
+      { 0x30607cc85d86b655L,0xf0235d3bf9ad4af1L,0x99a08680571172a6L,
+        0x5e3d64faf2a67513L } },
     /* 212 */
-    { { 0xaa6410c79b3b4416l,0xcd8fcf85eab26d99l,0x5ebff74adb656a74l,
-        0x6c8a7a95eb8e42fcl },
-      { 0x10c60ba7b02a63bdl,0x6b2f23038b8f0047l,0x8c6c3738312d90b0l,
-        0x348ae422ad82ca91l },
-      0 },
+    { { 0xaa6410c79b3b4416L,0xcd8fcf85eab26d99L,0x5ebff74adb656a74L,
+        0x6c8a7a95eb8e42fcL },
+      { 0x10c60ba7b02a63bdL,0x6b2f23038b8f0047L,0x8c6c3738312d90b0L,
+        0x348ae422ad82ca91L } },
     /* 213 */
-    { { 0x7f4746635ccda2fbl,0x22accaa18e0726d2l,0x85adf782492b1f20l,
-        0xc1074de0d9ef2d2el },
-      { 0xfcf3ce44ae9a65b3l,0xfd71e4ac05d7151bl,0xd4711f50ce6a9788l,
-        0xfbadfbdbc9e54ffcl },
-      0 },
+    { { 0x7f4746635ccda2fbL,0x22accaa18e0726d2L,0x85adf782492b1f20L,
+        0xc1074de0d9ef2d2eL },
+      { 0xfcf3ce44ae9a65b3L,0xfd71e4ac05d7151bL,0xd4711f50ce6a9788L,
+        0xfbadfbdbc9e54ffcL } },
     /* 214 */
-    { { 0x1713f1cd20a99363l,0xb915658f6cf22775l,0x968175cd24d359b2l,
-        0xb7f976b483716fcdl },
-      { 0x5758e24d5d6dbf74l,0x8d23bafd71c3af36l,0x48f477600243dfe3l,
-        0xf4d41b2ecafcc805l },
-      0 },
+    { { 0x1713f1cd20a99363L,0xb915658f6cf22775L,0x968175cd24d359b2L,
+        0xb7f976b483716fcdL },
+      { 0x5758e24d5d6dbf74L,0x8d23bafd71c3af36L,0x48f477600243dfe3L,
+        0xf4d41b2ecafcc805L } },
     /* 215 */
-    { { 0x51f1cf28fdabd48dl,0xce81be3632c078a4l,0x6ace2974117146e9l,
-        0x180824eae0160f10l },
-      { 0x0387698b66e58358l,0x63568752ce6ca358l,0x82380e345e41e6c5l,
-        0x67e5f63983cf6d25l },
-      0 },
+    { { 0x51f1cf28fdabd48dL,0xce81be3632c078a4L,0x6ace2974117146e9L,
+        0x180824eae0160f10L },
+      { 0x0387698b66e58358L,0x63568752ce6ca358L,0x82380e345e41e6c5L,
+        0x67e5f63983cf6d25L } },
     /* 216 */
-    { { 0xf89ccb8dcf4899efl,0x949015f09ebb44c0l,0x546f9276b2598ec9l,
-        0x9fef789a04c11fc6l },
-      { 0x6d367ecf53d2a071l,0xb10e1a7fa4519b09l,0xca6b3fb0611e2eefl,
-        0xbc80c181a99c4e20l },
-      0 },
+    { { 0xf89ccb8dcf4899efL,0x949015f09ebb44c0L,0x546f9276b2598ec9L,
+        0x9fef789a04c11fc6L },
+      { 0x6d367ecf53d2a071L,0xb10e1a7fa4519b09L,0xca6b3fb0611e2eefL,
+        0xbc80c181a99c4e20L } },
     /* 217 */
-    { { 0x972536f8e5eb82e6l,0x1a484fc7f56cb920l,0xc78e217150b5da5el,
-        0x49270e629f8cdf10l },
-      { 0x1a39b7bbea6b50adl,0x9a0284c1a2388ffcl,0x5403eb178107197bl,
-        0xd2ee52f961372f7fl },
-      0 },
+    { { 0x972536f8e5eb82e6L,0x1a484fc7f56cb920L,0xc78e217150b5da5eL,
+        0x49270e629f8cdf10L },
+      { 0x1a39b7bbea6b50adL,0x9a0284c1a2388ffcL,0x5403eb178107197bL,
+        0xd2ee52f961372f7fL } },
     /* 218 */
-    { { 0xd37cd28588e0362al,0x442fa8a78fa5d94dl,0xaff836e5a434a526l,
-        0xdfb478bee5abb733l },
-      { 0xa91f1ce7673eede6l,0xa5390ad42b5b2f04l,0x5e66f7bf5530da2fl,
-        0xd9a140b408df473al },
-      0 },
+    { { 0xd37cd28588e0362aL,0x442fa8a78fa5d94dL,0xaff836e5a434a526L,
+        0xdfb478bee5abb733L },
+      { 0xa91f1ce7673eede6L,0xa5390ad42b5b2f04L,0x5e66f7bf5530da2fL,
+        0xd9a140b408df473aL } },
     /* 219 */
-    { { 0x0e0221b56e8ea498l,0x623478293563ee09l,0xe06b8391335d2adel,
-        0x760c058d623f4b1al },
-      { 0x0b89b58cc198aa79l,0xf74890d2f07aba7fl,0x4e204110fde2556al,
-        0x7141982d8f190409l },
-      0 },
+    { { 0x0e0221b56e8ea498L,0x623478293563ee09L,0xe06b8391335d2adeL,
+        0x760c058d623f4b1aL },
+      { 0x0b89b58cc198aa79L,0xf74890d2f07aba7fL,0x4e204110fde2556aL,
+        0x7141982d8f190409L } },
     /* 220 */
-    { { 0x6f0a0e334d4b0f45l,0xd9280b38392a94e1l,0x3af324c6b3c61d5el,
-        0x3af9d1ce89d54e47l },
-      { 0xfd8f798120930371l,0xeda2664c21c17097l,0x0e9545dcdc42309bl,
-        0xb1f815c373957dd6l },
-      0 },
+    { { 0x6f0a0e334d4b0f45L,0xd9280b38392a94e1L,0x3af324c6b3c61d5eL,
+        0x3af9d1ce89d54e47L },
+      { 0xfd8f798120930371L,0xeda2664c21c17097L,0x0e9545dcdc42309bL,
+        0xb1f815c373957dd6L } },
     /* 221 */
-    { { 0x84faa78e89fec44al,0xc8c2ae473caa4cafl,0x691c807dc1b6a624l,
-        0xa41aed141543f052l },
-      { 0x424353997d5ffe04l,0x8bacb2df625b6e20l,0x85d660be87817775l,
-        0xd6e9c1dd86fb60efl },
-      0 },
+    { { 0x84faa78e89fec44aL,0xc8c2ae473caa4cafL,0x691c807dc1b6a624L,
+        0xa41aed141543f052L },
+      { 0x424353997d5ffe04L,0x8bacb2df625b6e20L,0x85d660be87817775L,
+        0xd6e9c1dd86fb60efL } },
     /* 222 */
-    { { 0x3aa2e97ec6853264l,0x771533b7e2304a0bl,0x1b912bb7b8eae9bel,
-        0x9c9c6e10ae9bf8c2l },
-      { 0xa2309a59e030b74cl,0x4ed7494d6a631e90l,0x89f44b23a49b79f2l,
-        0x566bd59640fa61b6l },
-      0 },
+    { { 0x3aa2e97ec6853264L,0x771533b7e2304a0bL,0x1b912bb7b8eae9beL,
+        0x9c9c6e10ae9bf8c2L },
+      { 0xa2309a59e030b74cL,0x4ed7494d6a631e90L,0x89f44b23a49b79f2L,
+        0x566bd59640fa61b6L } },
     /* 223 */
-    { { 0x066c0118c18061f3l,0x190b25d37c83fc70l,0xf05fc8e027273245l,
-        0xcf2c7390f525345el },
-      { 0xa09bceb410eb30cfl,0xcfd2ebba0d77703al,0xe842c43a150ff255l,
-        0x02f517558aa20979l },
-      0 },
+    { { 0x066c0118c18061f3L,0x190b25d37c83fc70L,0xf05fc8e027273245L,
+        0xcf2c7390f525345eL },
+      { 0xa09bceb410eb30cfL,0xcfd2ebba0d77703aL,0xe842c43a150ff255L,
+        0x02f517558aa20979L } },
     /* 224 */
-    { { 0x396ef794addb7d07l,0x0b4fc74224455500l,0xfaff8eacc78aa3cel,
-        0x14e9ada5e8d4d97dl },
-      { 0xdaa480a12f7079e2l,0x45baa3cde4b0800el,0x01765e2d7838157dl,
-        0xa0ad4fab8e9d9ae8l },
-      0 },
+    { { 0x396ef794addb7d07L,0x0b4fc74224455500L,0xfaff8eacc78aa3ceL,
+        0x14e9ada5e8d4d97dL },
+      { 0xdaa480a12f7079e2L,0x45baa3cde4b0800eL,0x01765e2d7838157dL,
+        0xa0ad4fab8e9d9ae8L } },
     /* 225 */
-    { { 0x0bfb76214a653618l,0x1872813c31eaaa5fl,0x1553e73744949d5el,
-        0xbcd530b86e56ed1el },
-      { 0x169be85332e9c47bl,0xdc2776feb50059abl,0xcdba9761192bfbb4l,
-        0x909283cf6979341dl },
-      0 },
+    { { 0x0bfb76214a653618L,0x1872813c31eaaa5fL,0x1553e73744949d5eL,
+        0xbcd530b86e56ed1eL },
+      { 0x169be85332e9c47bL,0xdc2776feb50059abL,0xcdba9761192bfbb4L,
+        0x909283cf6979341dL } },
     /* 226 */
-    { { 0x67b0032476e81a13l,0x9bee1a9962171239l,0x08ed361bd32e19d6l,
-        0x35eeb7c9ace1549al },
-      { 0x1280ae5a7e4e5bdcl,0x2dcd2cd3b6ceec6el,0x52e4224c6e266bc1l,
-        0x9a8b2cf4448ae864l },
-      0 },
+    { { 0x67b0032476e81a13L,0x9bee1a9962171239L,0x08ed361bd32e19d6L,
+        0x35eeb7c9ace1549aL },
+      { 0x1280ae5a7e4e5bdcL,0x2dcd2cd3b6ceec6eL,0x52e4224c6e266bc1L,
+        0x9a8b2cf4448ae864L } },
     /* 227 */
-    { { 0xf6471bf209d03b59l,0xc90e62a3b65af2abl,0xff7ff168ebd5eec9l,
-        0x6bdb60f4d4491379l },
-      { 0xdadafebc8a55bc30l,0xc79ead1610097fe0l,0x42e197414c1e3bddl,
-        0x01ec3cfd94ba08a9l },
-      0 },
+    { { 0xf6471bf209d03b59L,0xc90e62a3b65af2abL,0xff7ff168ebd5eec9L,
+        0x6bdb60f4d4491379L },
+      { 0xdadafebc8a55bc30L,0xc79ead1610097fe0L,0x42e197414c1e3bddL,
+        0x01ec3cfd94ba08a9L } },
     /* 228 */
-    { { 0xba6277ebdc9485c2l,0x48cc9a7922fb10c7l,0x4f61d60f70a28d8al,
-        0xd1acb1c0475464f6l },
-      { 0xd26902b126f36612l,0x59c3a44ee0618d8bl,0x4df8a813308357eel,
-        0x7dcd079d405626c2l },
-      0 },
+    { { 0xba6277ebdc9485c2L,0x48cc9a7922fb10c7L,0x4f61d60f70a28d8aL,
+        0xd1acb1c0475464f6L },
+      { 0xd26902b126f36612L,0x59c3a44ee0618d8bL,0x4df8a813308357eeL,
+        0x7dcd079d405626c2L } },
     /* 229 */
-    { { 0x5ce7d4d3f05a4b48l,0xadcd295237230772l,0xd18f7971812a915al,
-        0x0bf53589377d19b8l },
-      { 0x35ecd95a6c68ea73l,0xc7f3bbca823a584dl,0x9fb674c6f473a723l,
-        0xd28be4d9e16686fcl },
-      0 },
+    { { 0x5ce7d4d3f05a4b48L,0xadcd295237230772L,0xd18f7971812a915aL,
+        0x0bf53589377d19b8L },
+      { 0x35ecd95a6c68ea73L,0xc7f3bbca823a584dL,0x9fb674c6f473a723L,
+        0xd28be4d9e16686fcL } },
     /* 230 */
-    { { 0x5d2b990638fa8e4bl,0x559f186e893fd8fcl,0x3a6de2aa436fb6fcl,
-        0xd76007aa510f88cel },
-      { 0x2d10aab6523a4988l,0xb455cf4474dd0273l,0x7f467082a3407278l,
-        0xf2b52f68b303bb01l },
-      0 },
+    { { 0x5d2b990638fa8e4bL,0x559f186e893fd8fcL,0x3a6de2aa436fb6fcL,
+        0xd76007aa510f88ceL },
+      { 0x2d10aab6523a4988L,0xb455cf4474dd0273L,0x7f467082a3407278L,
+        0xf2b52f68b303bb01L } },
     /* 231 */
-    { { 0x0d57eafa9835b4cal,0x2d2232fcbb669cbcl,0x8eeeb680c6643198l,
-        0xd8dbe98ecc5aed3al },
-      { 0xcba9be3fc5a02709l,0x30be68e5f5ba1fa8l,0xfebd43cdf10ea852l,
-        0xe01593a3ee559705l },
-      0 },
+    { { 0x0d57eafa9835b4caL,0x2d2232fcbb669cbcL,0x8eeeb680c6643198L,
+        0xd8dbe98ecc5aed3aL },
+      { 0xcba9be3fc5a02709L,0x30be68e5f5ba1fa8L,0xfebd43cdf10ea852L,
+        0xe01593a3ee559705L } },
     /* 232 */
-    { { 0xd3e5af50ea75a0a6l,0x512226ac57858033l,0x6fe6d50fd0176406l,
-        0xafec07b1aeb8ef06l },
-      { 0x7fb9956780bb0a31l,0x6f1af3cc37309aael,0x9153a15a01abf389l,
-        0xa71b93546e2dbfddl },
-      0 },
+    { { 0xd3e5af50ea75a0a6L,0x512226ac57858033L,0x6fe6d50fd0176406L,
+        0xafec07b1aeb8ef06L },
+      { 0x7fb9956780bb0a31L,0x6f1af3cc37309aaeL,0x9153a15a01abf389L,
+        0xa71b93546e2dbfddL } },
     /* 233 */
-    { { 0xbf8e12e018f593d2l,0xd1a90428a078122bl,0x150505db0ba4f2adl,
-        0x53a2005c628523d9l },
-      { 0x07c8b639e7f2b935l,0x2bff975ac182961al,0x86bceea77518ca2cl,
-        0xbf47d19b3d588e3dl },
-      0 },
+    { { 0xbf8e12e018f593d2L,0xd1a90428a078122bL,0x150505db0ba4f2adL,
+        0x53a2005c628523d9L },
+      { 0x07c8b639e7f2b935L,0x2bff975ac182961aL,0x86bceea77518ca2cL,
+        0xbf47d19b3d588e3dL } },
     /* 234 */
-    { { 0x672967a7dd7665d5l,0x4e3030572f2f4de5l,0x144005ae80d4903fl,
-        0x001c2c7f39c9a1b6l },
-      { 0x143a801469efc6d6l,0xc810bdaa7bc7a724l,0x5f65670ba78150a4l,
-        0xfdadf8e786ffb99bl },
-      0 },
+    { { 0x672967a7dd7665d5L,0x4e3030572f2f4de5L,0x144005ae80d4903fL,
+        0x001c2c7f39c9a1b6L },
+      { 0x143a801469efc6d6L,0xc810bdaa7bc7a724L,0x5f65670ba78150a4L,
+        0xfdadf8e786ffb99bL } },
     /* 235 */
-    { { 0xfd38cb88ffc00785l,0x77fa75913b48eb67l,0x0454d055bf368fbcl,
-        0x3a838e4d5aa43c94l },
-      { 0x561663293e97bb9al,0x9eb93363441d94d9l,0x515591a60adb2a83l,
-        0x3cdb8257873e1da3l },
-      0 },
+    { { 0xfd38cb88ffc00785L,0x77fa75913b48eb67L,0x0454d055bf368fbcL,
+        0x3a838e4d5aa43c94L },
+      { 0x561663293e97bb9aL,0x9eb93363441d94d9L,0x515591a60adb2a83L,
+        0x3cdb8257873e1da3L } },
     /* 236 */
-    { { 0x137140a97de77eabl,0xf7e1c50d41648109l,0x762dcad2ceb1d0dfl,
-        0x5a60cc89f1f57fbal },
-      { 0x80b3638240d45673l,0x1b82be195913c655l,0x057284b8dd64b741l,
-        0x922ff56fdbfd8fc0l },
-      0 },
+    { { 0x137140a97de77eabL,0xf7e1c50d41648109L,0x762dcad2ceb1d0dfL,
+        0x5a60cc89f1f57fbaL },
+      { 0x80b3638240d45673L,0x1b82be195913c655L,0x057284b8dd64b741L,
+        0x922ff56fdbfd8fc0L } },
     /* 237 */
-    { { 0x1b265deec9a129a1l,0xa5b1ce57cc284e04l,0x04380c46cebfbe3cl,
-        0x72919a7df6c5cd62l },
-      { 0x298f453a8fb90f9al,0xd719c00b88e4031bl,0xe32c0e77796f1856l,
-        0x5e7917803624089al },
-      0 },
+    { { 0x1b265deec9a129a1L,0xa5b1ce57cc284e04L,0x04380c46cebfbe3cL,
+        0x72919a7df6c5cd62L },
+      { 0x298f453a8fb90f9aL,0xd719c00b88e4031bL,0xe32c0e77796f1856L,
+        0x5e7917803624089aL } },
     /* 238 */
-    { { 0x5c16ec557f63cdfbl,0x8e6a3571f1cae4fdl,0xfce26bea560597cal,
-        0x4e0a5371e24c2fabl },
-      { 0x276a40d3a5765357l,0x3c89af440d73a2b4l,0xb8f370ae41d11a32l,
-        0xf5ff7818d56604eel },
-      0 },
+    { { 0x5c16ec557f63cdfbL,0x8e6a3571f1cae4fdL,0xfce26bea560597caL,
+        0x4e0a5371e24c2fabL },
+      { 0x276a40d3a5765357L,0x3c89af440d73a2b4L,0xb8f370ae41d11a32L,
+        0xf5ff7818d56604eeL } },
     /* 239 */
-    { { 0xfbf3e3fe1a09df21l,0x26d5d28ee66e8e47l,0x2096bd0a29c89015l,
-        0xe41df0e9533f5e64l },
-      { 0x305fda40b3ba9e3fl,0xf2340ceb2604d895l,0x0866e1927f0367c7l,
-        0x8edd7d6eac4f155fl },
-      0 },
+    { { 0xfbf3e3fe1a09df21L,0x26d5d28ee66e8e47L,0x2096bd0a29c89015L,
+        0xe41df0e9533f5e64L },
+      { 0x305fda40b3ba9e3fL,0xf2340ceb2604d895L,0x0866e1927f0367c7L,
+        0x8edd7d6eac4f155fL } },
     /* 240 */
-    { { 0xc9a1dc0e0bfc8ff3l,0x14efd82be936f42fl,0x67016f7ccca381efl,
-        0x1432c1caed8aee96l },
-      { 0xec68482970b23c26l,0xa64fe8730735b273l,0xe389f6e5eaef0f5al,
-        0xcaef480b5ac8d2c6l },
-      0 },
+    { { 0xc9a1dc0e0bfc8ff3L,0x14efd82be936f42fL,0x67016f7ccca381efL,
+        0x1432c1caed8aee96L },
+      { 0xec68482970b23c26L,0xa64fe8730735b273L,0xe389f6e5eaef0f5aL,
+        0xcaef480b5ac8d2c6L } },
     /* 241 */
-    { { 0x5245c97875315922l,0xd82951713063cca5l,0xf3ce60d0b64ef2cbl,
-        0xd0ba177e8efae236l },
-      { 0x53a9ae8fb1b3af60l,0x1a796ae53d2da20el,0x01d63605df9eef28l,
-        0xf31c957c1c54ae16l },
-      0 },
+    { { 0x5245c97875315922L,0xd82951713063cca5L,0xf3ce60d0b64ef2cbL,
+        0xd0ba177e8efae236L },
+      { 0x53a9ae8fb1b3af60L,0x1a796ae53d2da20eL,0x01d63605df9eef28L,
+        0xf31c957c1c54ae16L } },
     /* 242 */
-    { { 0xc0f58d5249cc4597l,0xdc5015b0bae0a028l,0xefc5fc55734a814al,
-        0x013404cb96e17c3al },
-      { 0xb29e2585c9a824bfl,0xd593185e001eaed7l,0x8d6ee68261ef68acl,
-        0x6f377c4b91933e6cl },
-      0 },
+    { { 0xc0f58d5249cc4597L,0xdc5015b0bae0a028L,0xefc5fc55734a814aL,
+        0x013404cb96e17c3aL },
+      { 0xb29e2585c9a824bfL,0xd593185e001eaed7L,0x8d6ee68261ef68acL,
+        0x6f377c4b91933e6cL } },
     /* 243 */
-    { { 0x9f93bad1a8333fd2l,0xa89302025a2a95b8l,0x211e5037eaf75acel,
-        0x6dba3e4ed2d09506l },
-      { 0xa48ef98cd04399cdl,0x1811c66ee6b73adel,0x72f60752c17ecaf3l,
-        0xf13cf3423becf4a7l },
-      0 },
+    { { 0x9f93bad1a8333fd2L,0xa89302025a2a95b8L,0x211e5037eaf75aceL,
+        0x6dba3e4ed2d09506L },
+      { 0xa48ef98cd04399cdL,0x1811c66ee6b73adeL,0x72f60752c17ecaf3L,
+        0xf13cf3423becf4a7L } },
     /* 244 */
-    { { 0xceeb9ec0a919e2ebl,0x83a9a195f62c0f68l,0xcfba3bb67aba2299l,
-        0xc83fa9a9274bbad3l },
-      { 0x0d7d1b0b62fa1ce0l,0xe58b60f53418efbfl,0xbfa8ef9e52706f04l,
-        0xb49d70f45d702683l },
-      0 },
+    { { 0xceeb9ec0a919e2ebL,0x83a9a195f62c0f68L,0xcfba3bb67aba2299L,
+        0xc83fa9a9274bbad3L },
+      { 0x0d7d1b0b62fa1ce0L,0xe58b60f53418efbfL,0xbfa8ef9e52706f04L,
+        0xb49d70f45d702683L } },
     /* 245 */
-    { { 0x914c7510fad5513bl,0x05f32eecb1751e2dl,0x6d850418d9fb9d59l,
-        0x59cfadbb0c30f1cfl },
-      { 0xe167ac2355cb7fd6l,0x249367b8820426a3l,0xeaeec58c90a78864l,
-        0x5babf362354a4b67l },
-      0 },
+    { { 0x914c7510fad5513bL,0x05f32eecb1751e2dL,0x6d850418d9fb9d59L,
+        0x59cfadbb0c30f1cfL },
+      { 0xe167ac2355cb7fd6L,0x249367b8820426a3L,0xeaeec58c90a78864L,
+        0x5babf362354a4b67L } },
     /* 246 */
-    { { 0x37c981d1ee424865l,0x8b002878f2e5577fl,0x702970f1b9e0c058l,
-        0x6188c6a79026c8f0l },
-      { 0x06f9a19bd0f244dal,0x1ecced5cfb080873l,0x35470f9b9f213637l,
-        0x993fe475df50b9d9l },
-      0 },
+    { { 0x37c981d1ee424865L,0x8b002878f2e5577fL,0x702970f1b9e0c058L,
+        0x6188c6a79026c8f0L },
+      { 0x06f9a19bd0f244daL,0x1ecced5cfb080873L,0x35470f9b9f213637L,
+        0x993fe475df50b9d9L } },
     /* 247 */
-    { { 0x68e31cdf9b2c3609l,0x84eb19c02c46d4eal,0x7ac9ec1a9a775101l,
-        0x81f764664c80616bl },
-      { 0x1d7c2a5a75fbe978l,0x6743fed3f183b356l,0x838d1f04501dd2bfl,
-        0x564a812a5fe9060dl },
-      0 },
+    { { 0x68e31cdf9b2c3609L,0x84eb19c02c46d4eaL,0x7ac9ec1a9a775101L,
+        0x81f764664c80616bL },
+      { 0x1d7c2a5a75fbe978L,0x6743fed3f183b356L,0x838d1f04501dd2bfL,
+        0x564a812a5fe9060dL } },
     /* 248 */
-    { { 0x7a5a64f4fa817d1dl,0x55f96844bea82e0fl,0xb5ff5a0fcd57f9aal,
-        0x226bf3cf00e51d6cl },
-      { 0xd6d1a9f92f2833cfl,0x20a0a35a4f4f89a8l,0x11536c498f3f7f77l,
-        0x68779f47ff257836l },
-      0 },
+    { { 0x7a5a64f4fa817d1dL,0x55f96844bea82e0fL,0xb5ff5a0fcd57f9aaL,
+        0x226bf3cf00e51d6cL },
+      { 0xd6d1a9f92f2833cfL,0x20a0a35a4f4f89a8L,0x11536c498f3f7f77L,
+        0x68779f47ff257836L } },
     /* 249 */
-    { { 0x79b0c1c173043d08l,0xa54467741fc020fal,0xd3767e289a6d26d0l,
-        0x97bcb0d1eb092e0bl },
-      { 0x2ab6eaa8f32ed3c3l,0xc8a4f151b281bc48l,0x4d1bf4f3bfa178f3l,
-        0xa872ffe80a784655l },
-      0 },
+    { { 0x79b0c1c173043d08L,0xa54467741fc020faL,0xd3767e289a6d26d0L,
+        0x97bcb0d1eb092e0bL },
+      { 0x2ab6eaa8f32ed3c3L,0xc8a4f151b281bc48L,0x4d1bf4f3bfa178f3L,
+        0xa872ffe80a784655L } },
     /* 250 */
-    { { 0xb1ab7935a32b2086l,0xe1eb710e8160f486l,0x9bd0cd913b6ae6bel,
-        0x02812bfcb732a36al },
-      { 0xa63fd7cacf605318l,0x646e5d50fdfd6d1dl,0xa1d683982102d619l,
-        0x07391cc9fe5396afl },
-      0 },
+    { { 0xb1ab7935a32b2086L,0xe1eb710e8160f486L,0x9bd0cd913b6ae6beL,
+        0x02812bfcb732a36aL },
+      { 0xa63fd7cacf605318L,0x646e5d50fdfd6d1dL,0xa1d683982102d619L,
+        0x07391cc9fe5396afL } },
     /* 251 */
-    { { 0xc50157f08b80d02bl,0x6b8333d162877f7fl,0x7aca1af878d542ael,
-        0x355d2adc7e6d2a08l },
-      { 0xb41f335a287386e1l,0xfd272a94f8e43275l,0x286ca2cde79989eal,
-        0x3dc2b1e37c2a3a79l },
-      0 },
+    { { 0xc50157f08b80d02bL,0x6b8333d162877f7fL,0x7aca1af878d542aeL,
+        0x355d2adc7e6d2a08L },
+      { 0xb41f335a287386e1L,0xfd272a94f8e43275L,0x286ca2cde79989eaL,
+        0x3dc2b1e37c2a3a79L } },
     /* 252 */
-    { { 0xd689d21c04581352l,0x0a00c825376782bel,0x203bd5909fed701fl,
-        0xc47869103ccd846bl },
-      { 0x5dba770824c768edl,0x72feea026841f657l,0x73313ed56accce0el,
-        0xccc42968d5bb4d32l },
-      0 },
+    { { 0xd689d21c04581352L,0x0a00c825376782beL,0x203bd5909fed701fL,
+        0xc47869103ccd846bL },
+      { 0x5dba770824c768edL,0x72feea026841f657L,0x73313ed56accce0eL,
+        0xccc42968d5bb4d32L } },
     /* 253 */
-    { { 0x94e50de13d7620b9l,0xd89a5c8a5992a56al,0xdc007640675487c9l,
-        0xe147eb42aa4871cfl },
-      { 0x274ab4eeacf3ae46l,0xfd4936fb50350fbel,0xdf2afe4748c840eal,
-        0x239ac047080e96e3l },
-      0 },
+    { { 0x94e50de13d7620b9L,0xd89a5c8a5992a56aL,0xdc007640675487c9L,
+        0xe147eb42aa4871cfL },
+      { 0x274ab4eeacf3ae46L,0xfd4936fb50350fbeL,0xdf2afe4748c840eaL,
+        0x239ac047080e96e3L } },
     /* 254 */
-    { { 0x481d1f352bfee8d4l,0xce80b5cffa7b0fecl,0x105c4c9e2ce9af3cl,
-        0xc55fa1a3f5f7e59dl },
-      { 0x3186f14e8257c227l,0xc5b1653f342be00bl,0x09afc998aa904fb2l,
-        0x094cd99cd4f4b699l },
-      0 },
+    { { 0x481d1f352bfee8d4L,0xce80b5cffa7b0fecL,0x105c4c9e2ce9af3cL,
+        0xc55fa1a3f5f7e59dL },
+      { 0x3186f14e8257c227L,0xc5b1653f342be00bL,0x09afc998aa904fb2L,
+        0x094cd99cd4f4b699L } },
     /* 255 */
-    { { 0x8a981c84d703bebal,0x8631d15032ceb291l,0xa445f2c9e3bd49ecl,
-        0xb90a30b642abad33l },
-      { 0xb465404fb4a5abf9l,0x004750c375db7603l,0x6f9a42ccca35d89fl,
-        0x019f8b9a1b7924f7l },
-      0 },
+    { { 0x8a981c84d703bebaL,0x8631d15032ceb291L,0xa445f2c9e3bd49ecL,
+        0xb90a30b642abad33L },
+      { 0xb465404fb4a5abf9L,0x004750c375db7603L,0x6f9a42ccca35d89fL,
+        0x019f8b9a1b7924f7L } },
 };
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -15960,7 +22469,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     return sp_256_ecc_mulmod_stripe_4(r, &p256_base, p256_table,
@@ -15968,11585 +22477,42 @@
 }
 
 #else
-/* A table entry for pre-computed points. */
-typedef struct sp_table_entry_sum {
-    sp_digit x[4];
-    sp_digit y[4];
-    byte infinity;
-} sp_table_entry_sum;
-
-/* Table of pre-computed values for P256 with 3 multiples and width of 8 bits.
- */
-static sp_table_entry_sum p256_table[33][58] = {
-    {
-        /* 0 << 0 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 0 */
-        { { 0x79e730d418a9143cl,0x75ba95fc5fedb601l,0x79fb732b77622510l,
-            0x18905f76a53755c6l },
-          { 0xddf25357ce95560al,0x8b4ab8e4ba19e45cl,0xd2e88688dd21f325l,
-            0x8571ff1825885d85l },
-          0 },
-        /* 3 << 0 */
-        { { 0xffac3f904eebc127l,0xb027f84a087d81fbl,0x66ad77dd87cbbc98l,
-            0x26936a3fb6ff747el },
-          { 0xb04c5c1fc983a7ebl,0x583e47ad0861fe1al,0x788208311a2ee98el,
-            0xd5f06a29e587cc07l },
-          0 },
-        /* 4 << 0 */
-        { { 0x74b0b50d46918dccl,0x4650a6edc623c173l,0x0cdaacace8100af2l,
-            0x577362f541b0176bl },
-          { 0x2d96f24ce4cbaba6l,0x17628471fad6f447l,0x6b6c36dee5ddd22el,
-            0x84b14c394c5ab863l },
-          0 },
-        /* 5 << 0 */
-        { { 0xbe1b8aaec45c61f5l,0x90ec649a94b9537dl,0x941cb5aad076c20cl,
-            0xc9079605890523c8l },
-          { 0xeb309b4ae7ba4f10l,0x73c568efe5eb882bl,0x3540a9877e7a1f68l,
-            0x73a076bb2dd1e916l },
-          0 },
-        /* 7 << 0 */
-        { { 0x0746354ea0173b4fl,0x2bd20213d23c00f7l,0xf43eaab50c23bb08l,
-            0x13ba5119c3123e03l },
-          { 0x2847d0303f5b9d4dl,0x6742f2f25da67bddl,0xef933bdc77c94195l,
-            0xeaedd9156e240867l },
-          0 },
-        /* 9 << 0 */
-        { { 0x75c96e8f264e20e8l,0xabe6bfed59a7a841l,0x2cc09c0444c8eb00l,
-            0xe05b3080f0c4e16bl },
-          { 0x1eb7777aa45f3314l,0x56af7bedce5d45e3l,0x2b6e019a88b12f1al,
-            0x086659cdfd835f9bl },
-          0 },
-        /* 10 << 0 */
-        { { 0x2c18dbd19dc21ec8l,0x98f9868a0fcf8139l,0x737d2cd648250b49l,
-            0xcc61c94724b3428fl },
-          { 0x0c2b407880dd9e76l,0xc43a8991383fbe08l,0x5f7d2d65779be5d2l,
-            0x78719a54eb3b4ab5l },
-          0 },
-        /* 11 << 0 */
-        { { 0xea7d260a6245e404l,0x9de407956e7fdfe0l,0x1ff3a4158dac1ab5l,
-            0x3e7090f1649c9073l },
-          { 0x1a7685612b944e88l,0x250f939ee57f61c8l,0x0c0daa891ead643dl,
-            0x68930023e125b88el },
-          0 },
-        /* 13 << 0 */
-        { { 0xccc425634b2ed709l,0x0e356769856fd30dl,0xbcbcd43f559e9811l,
-            0x738477ac5395b759l },
-          { 0x35752b90c00ee17fl,0x68748390742ed2e3l,0x7cd06422bd1f5bc1l,
-            0xfbc08769c9e7b797l },
-          0 },
-        /* 15 << 0 */
-        { { 0x72bcd8b7bc60055bl,0x03cc23ee56e27e4bl,0xee337424e4819370l,
-            0xe2aa0e430ad3da09l },
-          { 0x40b8524f6383c45dl,0xd766355442a41b25l,0x64efa6de778a4797l,
-            0x2042170a7079adf4l },
-          0 },
-        /* 16 << 0 */
-        { { 0x808b0b650bc6fb80l,0x5882e0753ffe2e6bl,0xd5ef2f7c2c83f549l,
-            0x54d63c809103b723l },
-          { 0xf2f11bd652a23f9bl,0x3670c3194b0b6587l,0x55c4623bb1580e9el,
-            0x64edf7b201efe220l },
-          0 },
-        /* 17 << 0 */
-        { { 0x97091dcbd53c5c9dl,0xf17624b6ac0a177bl,0xb0f139752cfe2dffl,
-            0xc1a35c0a6c7a574el },
-          { 0x227d314693e79987l,0x0575bf30e89cb80el,0x2f4e247f0d1883bbl,
-            0xebd512263274c3d0l },
-          0 },
-        /* 19 << 0 */
-        { { 0xfea912baa5659ae8l,0x68363aba25e1a16el,0xb8842277752c41acl,
-            0xfe545c282897c3fcl },
-          { 0x2d36e9e7dc4c696bl,0x5806244afba977c5l,0x85665e9be39508c1l,
-            0xf720ee256d12597bl },
-          0 },
-        /* 21 << 0 */
-        { { 0x562e4cecc135b208l,0x74e1b2654783f47dl,0x6d2a506c5a3f3b30l,
-            0xecead9f4c16762fcl },
-          { 0xf29dd4b2e286e5b9l,0x1b0fadc083bb3c61l,0x7a75023e7fac29a4l,
-            0xc086d5f1c9477fa3l },
-          0 },
-        /* 23 << 0 */
-        { { 0xf4f876532de45068l,0x37c7a7e89e2e1f6el,0xd0825fa2a3584069l,
-            0xaf2cea7c1727bf42l },
-          { 0x0360a4fb9e4785a9l,0xe5fda49c27299f4al,0x48068e1371ac2f71l,
-            0x83d0687b9077666fl },
-          0 },
-        /* 25 << 0 */
-        { { 0xa4a319acd837879fl,0x6fc1b49eed6b67b0l,0xe395993332f1f3afl,
-            0x966742eb65432a2el },
-          { 0x4b8dc9feb4966228l,0x96cc631243f43950l,0x12068859c9b731eel,
-            0x7b948dc356f79968l },
-          0 },
-        /* 27 << 0 */
-        { { 0x042c2af497e2feb4l,0xd36a42d7aebf7313l,0x49d2c9eb084ffdd7l,
-            0x9f8aa54b2ef7c76al },
-          { 0x9200b7ba09895e70l,0x3bd0c66fddb7fb58l,0x2d97d10878eb4cbbl,
-            0x2d431068d84bde31l },
-          0 },
-        /* 28 << 0 */
-        { { 0x4b523eb7172ccd1fl,0x7323cb2830a6a892l,0x97082ec0cfe153ebl,
-            0xe97f6b6af2aadb97l },
-          { 0x1d3d393ed1a83da1l,0xa6a7f9c7804b2a68l,0x4a688b482d0cb71el,
-            0xa9b4cc5f40585278l },
-          0 },
-        /* 29 << 0 */
-        { { 0x5e5db46acb66e132l,0xf1be963a0d925880l,0x944a70270317b9e2l,
-            0xe266f95948603d48l },
-          { 0x98db66735c208899l,0x90472447a2fb18a3l,0x8a966939777c619fl,
-            0x3798142a2a3be21bl },
-          0 },
-        /* 31 << 0 */
-        { { 0xe2f73c696755ff89l,0xdd3cf7e7473017e6l,0x8ef5689d3cf7600dl,
-            0x948dc4f8b1fc87b4l },
-          { 0xd9e9fe814ea53299l,0x2d921ca298eb6028l,0xfaecedfd0c9803fcl,
-            0xf38ae8914d7b4745l },
-          0 },
-        /* 33 << 0 */
-        { { 0x871514560f664534l,0x85ceae7c4b68f103l,0xac09c4ae65578ab9l,
-            0x33ec6868f044b10cl },
-          { 0x6ac4832b3a8ec1f1l,0x5509d1285847d5efl,0xf909604f763f1574l,
-            0xb16c4303c32f63c4l },
-          0 },
-        /* 34 << 0 */
-        { { 0xb6ab20147ca23cd3l,0xcaa7a5c6a391849dl,0x5b0673a375678d94l,
-            0xc982ddd4dd303e64l },
-          { 0xfd7b000b5db6f971l,0xbba2cb1f6f876f92l,0xc77332a33c569426l,
-            0xa159100c570d74f8l },
-          0 },
-        /* 35 << 0 */
-        { { 0xfd16847fdec67ef5l,0x742ee464233e76b7l,0x0b8e4134efc2b4c8l,
-            0xca640b8642a3e521l },
-          { 0x653a01908ceb6aa9l,0x313c300c547852d5l,0x24e4ab126b237af7l,
-            0x2ba901628bb47af8l },
-          0 },
-        /* 36 << 0 */
-        { { 0x3d5e58d6a8219bb7l,0xc691d0bd1b06c57fl,0x0ae4cb10d257576el,
-            0x3569656cd54a3dc3l },
-          { 0xe5ebaebd94cda03al,0x934e82d3162bfe13l,0x450ac0bae251a0c6l,
-            0x480b9e11dd6da526l },
-          0 },
-        /* 37 << 0 */
-        { { 0x00467bc58cce08b5l,0xb636458c7f178d55l,0xc5748baea677d806l,
-            0x2763a387dfa394ebl },
-          { 0xa12b448a7d3cebb6l,0xe7adda3e6f20d850l,0xf63ebce51558462cl,
-            0x58b36143620088a8l },
-          0 },
-        /* 39 << 0 */
-        { { 0xa9d89488a059c142l,0x6f5ae714ff0b9346l,0x068f237d16fb3664l,
-            0x5853e4c4363186acl },
-          { 0xe2d87d2363c52f98l,0x2ec4a76681828876l,0x47b864fae14e7b1cl,
-            0x0c0bc0e569192408l },
-          0 },
-        /* 40 << 0 */
-        { { 0xe4d7681db82e9f3el,0x83200f0bdf25e13cl,0x8909984c66f27280l,
-            0x462d7b0075f73227l },
-          { 0xd90ba188f2651798l,0x74c6e18c36ab1c34l,0xab256ea35ef54359l,
-            0x03466612d1aa702fl },
-          0 },
-        /* 41 << 0 */
-        { { 0x624d60492ed22e91l,0x6fdfe0b56f072822l,0xeeca111539ce2271l,
-            0x98100a4fdb01614fl },
-          { 0xb6b0daa2a35c628fl,0xb6f94d2ec87e9a47l,0xc67732591d57d9cel,
-            0xf70bfeec03884a7bl },
-          0 },
-        /* 43 << 0 */
-        { { 0x4ff23ffd248a7d06l,0x80c5bfb4878873fal,0xb7d9ad9005745981l,
-            0x179c85db3db01994l },
-          { 0xba41b06261a6966cl,0x4d82d052eadce5a8l,0x9e91cd3ba5e6a318l,
-            0x47795f4f95b2dda0l },
-          0 },
-        /* 44 << 0 */
-        { { 0xecfd7c1fd55a897cl,0x009194abb29110fbl,0x5f0e2046e381d3b0l,
-            0x5f3425f6a98dd291l },
-          { 0xbfa06687730d50dal,0x0423446c4b083b7fl,0x397a247dd69d3417l,
-            0xeb629f90387ba42al },
-          0 },
-        /* 45 << 0 */
-        { { 0x1ee426ccd5cd79bfl,0x0032940b946c6e18l,0x1b1e8ae057477f58l,
-            0xe94f7d346d823278l },
-          { 0xc747cb96782ba21al,0xc5254469f72b33a5l,0x772ef6dec7f80c81l,
-            0xd73acbfe2cd9e6b5l },
-          0 },
-        /* 46 << 0 */
-        { { 0x4075b5b149ee90d9l,0x785c339aa06e9ebal,0xa1030d5babf825e0l,
-            0xcec684c3a42931dcl },
-          { 0x42ab62c9c1586e63l,0x45431d665ab43f2bl,0x57c8b2c055f7835dl,
-            0x033da338c1b7f865l },
-          0 },
-        /* 47 << 0 */
-        { { 0x283c7513caa76097l,0x0a624fa936c83906l,0x6b20afec715af2c7l,
-            0x4b969974eba78bfdl },
-          { 0x220755ccd921d60el,0x9b944e107baeca13l,0x04819d515ded93d4l,
-            0x9bbff86e6dddfd27l },
-          0 },
-        /* 48 << 0 */
-        { { 0x6b34413077adc612l,0xa7496529bbd803a0l,0x1a1baaa76d8805bdl,
-            0xc8403902470343adl },
-          { 0x39f59f66175adff1l,0x0b26d7fbb7d8c5b7l,0xa875f5ce529d75e3l,
-            0x85efc7e941325cc2l },
-          0 },
-        /* 49 << 0 */
-        { { 0x21950b421ff6acd3l,0xffe7048453dc6909l,0xff4cd0b228766127l,
-            0xabdbe6084fb7db2bl },
-          { 0x837c92285e1109e8l,0x26147d27f4645b5al,0x4d78f592f7818ed8l,
-            0xd394077ef247fa36l },
-          0 },
-        /* 51 << 0 */
-        { { 0x508cec1c3b3f64c9l,0xe20bc0ba1e5edf3fl,0xda1deb852f4318d4l,
-            0xd20ebe0d5c3fa443l },
-          { 0x370b4ea773241ea3l,0x61f1511c5e1a5f65l,0x99a5e23d82681c62l,
-            0xd731e383a2f54c2dl },
-          0 },
-        /* 52 << 0 */
-        { { 0x2692f36e83445904l,0x2e0ec469af45f9c0l,0x905a3201c67528b7l,
-            0x88f77f34d0e5e542l },
-          { 0xf67a8d295864687cl,0x23b92eae22df3562l,0x5c27014b9bbec39el,
-            0x7ef2f2269c0f0f8dl },
-          0 },
-        /* 53 << 0 */
-        { { 0x97359638546c4d8dl,0x5f9c3fc492f24679l,0x912e8beda8c8acd9l,
-            0xec3a318d306634b0l },
-          { 0x80167f41c31cb264l,0x3db82f6f522113f2l,0xb155bcd2dcafe197l,
-            0xfba1da5943465283l },
-          0 },
-        /* 55 << 0 */
-        { { 0x258bbbf9e7305683l,0x31eea5bf07ef5be6l,0x0deb0e4a46c814c1l,
-            0x5cee8449a7b730ddl },
-          { 0xeab495c5a0182bdel,0xee759f879e27a6b4l,0xc2cf6a6880e518cal,
-            0x25e8013ff14cf3f4l },
-          0 },
-        /* 57 << 0 */
-        { { 0x3ec832e77acaca28l,0x1bfeea57c7385b29l,0x068212e3fd1eaf38l,
-            0xc13298306acf8cccl },
-          { 0xb909f2db2aac9e59l,0x5748060db661782al,0xc5ab2632c79b7a01l,
-            0xda44c6c600017626l },
-          0 },
-        /* 59 << 0 */
-        { { 0x69d44ed65c46aa8el,0x2100d5d3a8d063d1l,0xcb9727eaa2d17c36l,
-            0x4c2bab1b8add53b7l },
-          { 0xa084e90c15426704l,0x778afcd3a837ebeal,0x6651f7017ce477f8l,
-            0xa062499846fb7a8bl },
-          0 },
-        /* 60 << 0 */
-        { { 0xdc1e6828ed8a6e19l,0x33fc23364189d9c7l,0x026f8fe2671c39bcl,
-            0xd40c4ccdbc6f9915l },
-          { 0xafa135bbf80e75cal,0x12c651a022adff2cl,0xc40a04bd4f51ad96l,
-            0x04820109bbe4e832l },
-          0 },
-        /* 61 << 0 */
-        { { 0x3667eb1a7f4c04ccl,0x59556621a9404f84l,0x71cdf6537eceb50al,
-            0x994a44a69b8335fal },
-          { 0xd7faf819dbeb9b69l,0x473c5680eed4350dl,0xb6658466da44bba2l,
-            0x0d1bc780872bdbf3l },
-          0 },
-        /* 63 << 0 */
-        { { 0xb8d3d9319ff91fe5l,0x039c4800f0518eedl,0x95c376329182cb26l,
-            0x0763a43482fc568dl },
-          { 0x707c04d5383e76bal,0xac98b930824e8197l,0x92bf7c8f91230de0l,
-            0x90876a0140959b70l },
-          0 },
-        /* 64 << 0 */
-        { { 0xdb6d96f305968b80l,0x380a0913089f73b9l,0x7da70b83c2c61e01l,
-            0x95fb8394569b38c7l },
-          { 0x9a3c651280edfe2fl,0x8f726bb98faeaf82l,0x8010a4a078424bf8l,
-            0x296720440e844970l },
-          0 },
-        /* 65 << 0 */
-        { { 0xdc2306ebfcdbb2b2l,0x79527db7ba66f4b9l,0xbf639ed67765765el,
-            0x01628c4706b6090al },
-          { 0x66eb62f1b957b4a1l,0x33cb7691ba659f46l,0x2c90d98cf3e055d6l,
-            0x7d096ac42f174750l },
-          0 },
-        /* 71 << 0 */
-        { { 0xf19f382e92aa7864l,0x49c7cb94fc05804bl,0xf94aa89b40750d01l,
-            0xdd421b5d4a210364l },
-          { 0x56cd001e39df3672l,0x030a119fdd4af1ecl,0x11f947e696cd0572l,
-            0x574cc7b293786791l },
-          0 },
-        /* 77 << 0 */
-        { { 0x0a2193bfc266f85cl,0x719a87be5a0ec9cel,0x9c30c6422b2f9c49l,
-            0xdb15e4963d5baeb1l },
-          { 0x83c3139be0d37321l,0x4788522b2e9fdbb2l,0x2b4f0c7877eb94eal,
-            0x854dc9d595105f9el },
-          0 },
-        /* 83 << 0 */
-        { { 0x2c9ee62dc3363a22l,0x125d4714ec67199al,0xf87abebf2ab80485l,
-            0xcf3086e87a243ca4l },
-          { 0x5c52b051c64e09ddl,0x5e9b16125625aad7l,0x0536a39db19c6126l,
-            0x97f0013247b64be5l },
-          0 },
-        /* 89 << 0 */
-        { { 0xc1ee6264a7eabe67l,0x62d51e29fd54487dl,0x3ea123446310eb5al,
-            0xbd88aca74765b805l },
-          { 0xb7b284be14fb691al,0x640388f83b9fffefl,0x7ab49dd209f98f9al,
-            0x7150f87e7211e445l },
-          0 },
-        /* 95 << 0 */
-        { { 0x263e039bb308cc40l,0x6684ad762b346fd2l,0x9a127f2bcaa12d0dl,
-            0x76a8f9fea974291fl },
-          { 0xc802049b68aa19e4l,0x65499c990c5dbba0l,0xee1b1cb5344455a1l,
-            0x3f293fda2cd6f439l },
-          0 },
-        /* 101 << 0 */
-        { { 0xb7a96e0a4ea6fdf7l,0xbbe914d3b99cd026l,0x6a610374c569a602l,
-            0xe9b1c23914da499el },
-          { 0xb5f6f0feadc19a99l,0x731251826f21687cl,0x5a8a14644be77793l,
-            0x94ce9e0adba8bfc7l },
-          0 },
-        /* 107 << 0 */
-        { { 0x2ca0ba9c3796f4c7l,0x3571e4d1592ce334l,0x28f9cdebe9f6e877l,
-            0xee206023efce1a70l },
-          { 0xb2159e08b76369dcl,0x2754e4260a7f687cl,0xe008039e02de2ff1l,
-            0xccd7e9418ea700c1l },
-          0 },
-        /* 113 << 0 */
-        { { 0xa125e6c1b7ebcb88l,0x3289e86e10ec0d40l,0xcc3a5ecb98353869l,
-            0x734e0d078a2b0d3al },
-          { 0xe0d92e9a51933360l,0xfa6bcdb1786076b9l,0xd13cca90747f19ecl,
-            0x61d8209d49f3a53dl },
-          0 },
-        /* 116 << 0 */
-        { { 0x87f9793bc9826344l,0x4b3de89bb2f5f79cl,0xc9f08a5659cb1b6el,
-            0xd8f1fc5f6a92b9aal },
-          { 0x86357f9eb412595el,0x53c30bbe65b80f16l,0xf06c2c8c70549a57l,
-            0xa9c8a4b42b9157dal },
-          0 },
-        /* 119 << 0 */
-        { { 0x87af199e6cc47305l,0x062afb7c1e314ddel,0x2be22ba0f3a49fb4l,
-            0x6ed0b988157b7f56l },
-          { 0x8162cf502d653fd9l,0x17d29c64877b7497l,0xd7e814380f67b514l,
-            0xfedf1014fe6ee703l },
-          0 },
-        /* 125 << 0 */
-        { { 0xaab54cfc93740130l,0xf72dab6d225733fal,0x04b76d2d1ed32559l,
-            0xa9fe2396bb85b9cbl },
-          { 0x128b0d24bf2219f0l,0x2292393b579f3ce2l,0x51dc5fac145ff0d5l,
-            0xb16d6af8c3febbc1l },
-          0 },
-    },
-    {
-        /* 0 << 8 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 8 */
-        { { 0x486d8ffa696946fcl,0x50fbc6d8b9cba56dl,0x7e3d423e90f35a15l,
-            0x7c3da195c0dd962cl },
-          { 0xe673fdb03cfd5d8bl,0x0704b7c2889dfca5l,0xf6ce581ff52305aal,
-            0x399d49eb914d5e53l },
-          0 },
-        /* 3 << 8 */
-        { { 0x35d6a53eed4c3717l,0x9f8240cf3d0ed2a3l,0x8c0d4d05e5543aa5l,
-            0x45d5bbfbdd33b4b4l },
-          { 0xfa04cc73137fd28el,0x862ac6efc73b3ffdl,0x403ff9f531f51ef2l,
-            0x34d5e0fcbc73f5a2l },
-          0 },
-        /* 4 << 8 */
-        { { 0x4f7081e144cc3addl,0xd5ffa1d687be82cfl,0x89890b6c0edd6472l,
-            0xada26e1a3ed17863l },
-          { 0x276f271563483caal,0xe6924cd92f6077fdl,0x05a7fe980a466e3cl,
-            0xf1c794b0b1902d1fl },
-          0 },
-        /* 5 << 8 */
-        { { 0x33b2385c08369a90l,0x2990c59b190eb4f8l,0x819a6145c68eac80l,
-            0x7a786d622ec4a014l },
-          { 0x33faadbe20ac3a8dl,0x31a217815aba2d30l,0x209d2742dba4f565l,
-            0xdb2ce9e355aa0fbbl },
-          0 },
-        /* 7 << 8 */
-        { { 0x0c4a58d474a86108l,0xf8048a8fee4c5d90l,0xe3c7c924e86d4c80l,
-            0x28c889de056a1e60l },
-          { 0x57e2662eb214a040l,0xe8c48e9837e10347l,0x8774286280ac748al,
-            0xf1c24022186b06f2l },
-          0 },
-        /* 9 << 8 */
-        { { 0xe8cbf1e5d5923359l,0xdb0cea9d539b9fb0l,0x0c5b34cf49859b98l,
-            0x5e583c56a4403cc6l },
-          { 0x11fc1a2dd48185b7l,0xc93fbc7e6e521787l,0x47e7a05805105b8bl,
-            0x7b4d4d58db8260c8l },
-          0 },
-        /* 10 << 8 */
-        { { 0xb31bd6136339c083l,0x39ff8155dfb64701l,0x7c3388d2e29604abl,
-            0x1e19084ba6b10442l },
-          { 0x17cf54c0eccd47efl,0x896933854a5dfb30l,0x69d023fb47daf9f6l,
-            0x9222840b7d91d959l },
-          0 },
-        /* 11 << 8 */
-        { { 0xc510610939842194l,0xb7e2353e49d05295l,0xfc8c1d5cefb42ee0l,
-            0xe04884eb08ce811cl },
-          { 0xf1f75d817419f40el,0x5b0ac162a995c241l,0x120921bbc4c55646l,
-            0x713520c28d33cf97l },
-          0 },
-        /* 13 << 8 */
-        { { 0x41d04ee21726931al,0x0bbbb2c83660ecfdl,0xa6ef6de524818e18l,
-            0xe421cc51e7d57887l },
-          { 0xf127d208bea87be6l,0x16a475d3b1cdd682l,0x9db1b684439b63f7l,
-            0x5359b3dbf0f113b6l },
-          0 },
-        /* 15 << 8 */
-        { { 0x3a5c752edcc18770l,0x4baf1f2f8825c3a5l,0xebd63f7421b153edl,
-            0xa2383e47b2f64723l },
-          { 0xe7bf620a2646d19al,0x56cb44ec03c83ffdl,0xaf7267c94f6be9f1l,
-            0x8b2dfd7bc06bb5e9l },
-          0 },
-        /* 16 << 8 */
-        { { 0x6772b0e5ab4b35a2l,0x1d8b6001f5eeaacfl,0x728f7ce4795b9580l,
-            0x4a20ed2a41fb81dal },
-          { 0x9f685cd44fec01e6l,0x3ed7ddcca7ff50adl,0x460fd2640c2d97fdl,
-            0x3a241426eb82f4f9l },
-          0 },
-        /* 17 << 8 */
-        { { 0xc503cd33bccd9617l,0x365dede4ba7730a3l,0x798c63555ddb0786l,
-            0xa6c3200efc9cd3bcl },
-          { 0x060ffb2ce5e35efdl,0x99a4e25b5555a1c1l,0x11d95375f70b3751l,
-            0x0a57354a160e1bf6l },
-          0 },
-        /* 19 << 8 */
-        { { 0xc033bdc719803511l,0xa9f97b3b8888c3bel,0x3d68aebc85c6d05el,
-            0xc3b88a9d193919ebl },
-          { 0x2d300748c48b0ee3l,0x7506bc7c07a746c1l,0xfc48437c6e6d57f3l,
-            0x5bd71587cfeaa91al },
-          0 },
-        /* 21 << 8 */
-        { { 0xe40736d3df61bc76l,0x13a619c03f778cdbl,0x6dd921a4c56ea28fl,
-            0x76a524332fa647b4l },
-          { 0x23591891ac5bdc5dl,0xff4a1a72bac7dc01l,0x9905e26162df8453l,
-            0x3ac045dfe63b265fl },
-          0 },
-        /* 23 << 8 */
-        { { 0x8435bd6994b03ed1l,0xd9ad1de3634cc546l,0x2cf423fc00e420cal,
-            0xeed26d80a03096ddl },
-          { 0xd7f60be7a4db09d2l,0xf47f569d960622f7l,0xe5925fd77296c729l,
-            0xeff2db2626ca2715l },
-          0 },
-        /* 25 << 8 */
-        { { 0x5dfee80f83774bddl,0x6313160285734485l,0xa1b524ae914a69a9l,
-            0xebc2ffafd4e300d7l },
-          { 0x52c93db77cfa46a5l,0x71e6161f21653b50l,0x3574fc57a4bc580al,
-            0xc09015dde1bc1253l },
-          0 },
-        /* 27 << 8 */
-        { { 0x9c38ddcceb5b76c1l,0x746f528526fc0ab4l,0x52a63a50d62c269fl,
-            0x60049c5599458621l },
-          { 0xe7f48f823c2f7c9el,0x6bd99043917d5cf3l,0xeb1317a88701f469l,
-            0xbd3fe2ed9a449fe0l },
-          0 },
-        /* 28 << 8 */
-        { { 0xe652533b3cef0d7dl,0xd94f7b182bbb4381l,0x838752be0e80f500l,
-            0x8e6e24889e9c9bfbl },
-          { 0xc975169716caca6al,0x866c49d838531ad9l,0xc917e2397151ade1l,
-            0x2d016ec16037c407l },
-          0 },
-        /* 29 << 8 */
-        { { 0x202f6a9c31c71f7bl,0x01f95aa3296ffe5cl,0x5fc0601453cec3a3l,
-            0xeb9912375f498a45l },
-          { 0xae9a935e5d91ba87l,0xc6ac62810b564a19l,0x8a8fe81c3bd44e69l,
-            0x7c8b467f9dd11d45l },
-          0 },
-        /* 31 << 8 */
-        { { 0x21d3634d39eedbbal,0x35cd2e680455a46dl,0xc8cafe65f9d7eb0cl,
-            0xbda3ce9e00cefb3el },
-          { 0xddc17a602c9cf7a4l,0x01572ee47bcb8773l,0xa92b2b018c7548dfl,
-            0x732fd309a84600e3l },
-          0 },
-        /* 33 << 8 */
-        { { 0x65cf89a2e0600afal,0xcf51482f753c5ceal,0x4f2b2d25a5c2bfc5l,
-            0x9381f57187098256l },
-          { 0x89210f676e976e4bl,0xe2cf12f489f47a7bl,0xc21a1658e8484050l,
-            0xa224dbf82f0fff01l },
-          0 },
-        /* 34 << 8 */
-        { { 0xc28961087282513dl,0x9a78c4296a3f8fb8l,0xddfa56f9a31e24b7l,
-            0xb1e14f84fb72611fl },
-          { 0x1d0f70ab45078d65l,0xb247aef3819924d8l,0x8d519f9dbb9877c1l,
-            0x495c2ece8368c7c9l },
-          0 },
-        /* 35 << 8 */
-        { { 0xca9129a0bdb69d12l,0xbe3e319978f39adfl,0xa88506df5fe49438l,
-            0x17ddb7a7aafe894cl },
-          { 0x28d1456f6d1d742fl,0xeec09651917d1268l,0xdecb1c700fd5b4c0l,
-            0x32d14f6acf2861dbl },
-          0 },
-        /* 36 << 8 */
-        { { 0x903f6e3960e913afl,0xb2b58bee98bf140dl,0x9deff025354890b8l,
-            0x155810068d2e924el },
-          { 0xb5755db493c95e5bl,0x3fac42f0dae20eb8l,0x9377c8c109b6d8e0l,
-            0xa43e2b46ab47ceffl },
-          0 },
-        /* 37 << 8 */
-        { { 0x6c3f5a51cb61e7e7l,0x264aebc80d9c73b2l,0xc404b2114a0d9288l,
-            0x5178d3cf8b3a79e9l },
-          { 0x4080be5372a420d7l,0xa39396adef026429l,0x22fbb92e8dde4728l,
-            0x19e42d8874d949fcl },
-          0 },
-        /* 39 << 8 */
-        { { 0xde352d78387f5557l,0x6770149969367413l,0x255bb8c00b0cc102l,
-            0x63cad1be1f4d262el },
-          { 0xf34f9a8a3f8f4fb6l,0x32bc13aae03a969fl,0xb29d4336218371cdl,
-            0x799d76ab285bd210l },
-          0 },
-        /* 40 << 8 */
-        { { 0x5f57b2fbfacfa459l,0x874b1498c1b5aa6bl,0xb9e89acac4db2092l,
-            0x1362bf8ddf4381dal },
-          { 0x25d76830b76328a0l,0x38188b7098572ae4l,0xb43e941429132f7dl,
-            0x7895a29f22dd42c9l },
-          0 },
-        /* 41 << 8 */
-        { { 0x85bded619e808c05l,0x6e0fc2bcc7ef83bbl,0xed70e0b499bedf77l,
-            0x300e777dc1aaffc0l },
-          { 0xe2da2359c43e6d2cl,0xacf6d60a275226e0l,0x18ca38f7f82558bdl,
-            0xd7b017d475ae2591l },
-          0 },
-        /* 43 << 8 */
-        { { 0xed299e2d7cd92ee2l,0x2c08eb37ad847153l,0x7b372aa712acfd81l,
-            0x574d27f5fabda29cl },
-          { 0xbd8247f0f2ee6ebcl,0x8bf76710d06be261l,0x26e95b4bcb186d4cl,
-            0x4fa3ac1d1ebb4a46l },
-          0 },
-        /* 44 << 8 */
-        { { 0xcbde78dd5e22cbb2l,0xf449c85b76bb4391l,0x4289f357b6a4273bl,
-            0x9fce23fd48e84a19l },
-          { 0xcfc32730939eb3b4l,0x8b3d982c16c32280l,0x5ac234bad5f1346cl,
-            0x781954b470769fc9l },
-          0 },
-        /* 45 << 8 */
-        { { 0xff0d4d30062c7dbdl,0x2c483081e6f9fcf0l,0x22f96316d67e070fl,
-            0xdd9be459c0e68c44l },
-          { 0xb9c1edffce2edd4dl,0x1a54782021fc538cl,0x93849be49979aee1l,
-            0x3f313629a590949el },
-          0 },
-        /* 46 << 8 */
-        { { 0x160b836b266be332l,0x49de38215f340575l,0x782e8f6701edce66l,
-            0x83ae008b5df1a93el },
-          { 0x85d33a263ed9ffebl,0xae2f9f961e79db97l,0xf64f209b95ae9e34l,
-            0x2b6b03455e957d49l },
-          0 },
-        /* 47 << 8 */
-        { { 0x7a24a21a331d6bdal,0xfdba302f6328f742l,0x37a36dd47744dca4l,
-            0xda2832ce6fef500fl },
-          { 0x23da304a7b49d73al,0xeede2cebc6ad834fl,0xf21a81248dec3c78l,
-            0x4bc9469b19b721e3l },
-          0 },
-        /* 48 << 8 */
-        { { 0x6faf68feaae6ee70l,0x78f4cc155602b0c9l,0x7e3321a86e94052al,
-            0x2fb3a0d6734d5d80l },
-          { 0xf3b98f3bb25a43bal,0x30bf803119ee2951l,0x7ffee43321b0612al,
-            0x12f775e42eb821d0l },
-          0 },
-        /* 49 << 8 */
-        { { 0x31cc342913e5c1d6l,0x05deaa3cee54e334l,0x21ea2b61cd5087d8l,
-            0x73a1841e70d1b8bcl },
-          { 0xd44e2b41b078bf14l,0xc295732fcea2a30el,0x30cdab42954939f7l,
-            0xc1b4e43a2dba0b7cl },
-          0 },
-        /* 51 << 8 */
-        { { 0x5f33f618b6a20132l,0xc8d73e3cfbbf3022l,0xf3b9844d47ed4320l,
-            0xab5868aa927f00cal },
-          { 0x06cb1113077f6e1cl,0x1417b43a5c94faaal,0x7666cb90cf4cd1e9l,
-            0x99e009f210900566l },
-          0 },
-        /* 52 << 8 */
-        { { 0x4fdff805f57209b5l,0x9bd65ac3f952ac8dl,0x02a3abd3c7969a6fl,
-            0x1359927ef523775fl },
-          { 0xe09b463f88d2e861l,0x661d2199623287c3l,0x821e64495a70eb7al,
-            0x0afbbb1dd67dc684l },
-          0 },
-        /* 53 << 8 */
-        { { 0x2c5a2b2d55750eb2l,0x54d756c29dc28d9fl,0x798c8d113af97f71l,
-            0x54e21ee21f6d1853l },
-          { 0x34e0c8bceffc3f8al,0xed3cc4dda96f193fl,0x86436a84fad97110l,
-            0x8530ca522c97205el },
-          0 },
-        /* 55 << 8 */
-        { { 0x9b6c8452f7236867l,0x21cf260c777b44fdl,0x659fc99dceb00c52l,
-            0xda97098e2439e8dbl },
-          { 0x647efe510ed6e14fl,0x37c8ca122a6600f3l,0x53e89b0badf6f4a7l,
-            0xd9fc8c716645618al },
-          0 },
-        /* 57 << 8 */
-        { { 0x9cecfb8eee6ebd31l,0x4603994b1ff25529l,0x707bc80af4b141c4l,
-            0x3a83d56c07524d3al },
-          { 0x7035c746613a3020l,0x7aa766b286626a1cl,0x3af656095ac76c78l,
-            0x4039c655171e47d6l },
-          0 },
-        /* 59 << 8 */
-        { { 0x79cb147f0ce33b63l,0xa1328a622d160c61l,0xf99538f3cf7eb87el,
-            0x0334d4958e2241d5l },
-          { 0x3ad97e02f3e49e48l,0xdcfcc754037c3679l,0x76078ba61a8ff67cl,
-            0x8054aa55c2a64964l },
-          0 },
-        /* 60 << 8 */
-        { { 0x5852104b87453b28l,0x073e8128b387344dl,0x300e78e4817cfc08l,
-            0x3a82ed4799362088l },
-          { 0xe222304c88de46a4l,0x666c94fd57fadf4al,0x40b2d08ea0c8e108l,
-            0x4b2955b909e050fal },
-          0 },
-        /* 61 << 8 */
-        { { 0x656078565f814881l,0x0fc3d1ce58466117l,0x0ae377d3c6c1e68al,
-            0xe3dd8d5cba566c48l },
-          { 0x9404849ec4b63be6l,0x1e22b03ba5be9c92l,0x08145122a8b03e63l,
-            0x71248243771fe153l },
-          0 },
-        /* 63 << 8 */
-        { { 0xa80a0e83b41ac541l,0xa77570ea533e5f9bl,0x416a14c0216dc452l,
-            0x2a8d728a19f7ee59l },
-          { 0x58494c8cd6552eaal,0x4d635acd60145722l,0xa8e9b127327b1cbcl,
-            0xb429a62e9f8235f0l },
-          0 },
-        /* 64 << 8 */
-        { { 0xf8d112e76e6485b3l,0x4d3e24db771c52f8l,0x48e3ee41684a2f6dl,
-            0x7161957d21d95551l },
-          { 0x19631283cdb12a6cl,0xbf3fa8822e50e164l,0xf6254b633166cc73l,
-            0x3aefa7aeaee8cc38l },
-          0 },
-        /* 65 << 8 */
-        { { 0xd52d2cb746ef1c7el,0xebd4f7c4d8fb6e07l,0x16f77a48cf6dd2b4l,
-            0x6e8f0431e77e4d51l },
-          { 0x59d94cc4e9177bf2l,0xb58a578f7a7181a1l,0xeefbc4cde8f6d330l,
-            0xa66c85560fe05490l },
-          0 },
-        /* 71 << 8 */
-        { { 0x0e6db7a35d9649dal,0x4d2f25193be3d362l,0xcd891fd5a6b137b5l,
-            0xa4b7e4ddacd377a9l },
-          { 0x20ccd6f24355f258l,0x842c08673aafb413l,0xdd55db99d6873b88l,
-            0x04d15f4fea5a2a55l },
-          0 },
-        /* 77 << 8 */
-        { { 0x679cd93dfae289c2l,0x84cadd61ff92ba1bl,0x548b5a6f2cd734aal,
-            0x1827507db8267082l },
-          { 0xa903a6010c6d5b4cl,0xde0d96befdfb952bl,0x2fc9419c6a2e24f9l,
-            0x27333e3936bb3203l },
-          0 },
-        /* 83 << 8 */
-        { { 0x3eb7f062dde4aa6al,0x40effae07f354cc0l,0xe9a14bc2a066c05el,
-            0x7817b11356afc543l },
-          { 0x5f0ed1f28bdda262l,0x001e23d2e007ec13l,0x435878a59c57de6al,
-            0x84d0e20895ac263cl },
-          0 },
-        /* 89 << 8 */
-        { { 0xedf24aec97a66678l,0xd1f93cf8ccf55671l,0x4ed2ce8a9379a49dl,
-            0x64991862c39b0ac9l },
-          { 0xc15b24e31ff67e04l,0x4ee8fc76c3c084fel,0x262012b4f64bcd46l,
-            0x3b5086732425c622l },
-          0 },
-        /* 95 << 8 */
-        { { 0xaa3e451fe65002f7l,0xf5ff2617eb46d253l,0x918d146e572afca2l,
-            0x0a9333b7e56a8553l },
-          { 0x9b7e232d94127dc0l,0xcd0687d6831014e6l,0x725ce5baf08e1c71l,
-            0x56e26f48cde0e4edl },
-          0 },
-        /* 101 << 8 */
-        { { 0xae78dde8db833460l,0xaf1736fe762cb78al,0x5cd85742eae5ac60l,
-            0x7b6c52fe955e981al },
-          { 0x9f823e8555599f97l,0xb9ce70d21a4b46b3l,0xb6076175d7d09829l,
-            0x21e77d22abf390a4l },
-          0 },
-        /* 107 << 8 */
-        { { 0xf704f09da142ad7el,0xb60ec2e1bab9f5d2l,0x4180314681e54d0dl,
-            0x0de50506309335e6l },
-          { 0x4135374e05aec64fl,0xb5d31041b556808al,0x0092eb86049033a8l,
-            0x5b7a2fa0bde0d737l },
-          0 },
-        /* 113 << 8 */
-        { { 0xc0dfa6bbefb40cfal,0x86a6fe279c5037f3l,0xf153cd37f71155f4l,
-            0xf16d6029767664f9l },
-          { 0x7441aa54c635aa57l,0x547f82e9e8186b2el,0x330b464bfbf7c7fel,
-            0xb5556770a1f6fddel },
-          0 },
-        /* 116 << 8 */
-        { { 0xa0a9c5d1e8f9edf1l,0x9814c26b6946cea3l,0xcbb47a37d8e6a08dl,
-            0x517a3d9b2cba11b1l },
-          { 0x94edc73dab43c540l,0x4fd0b82a753e552cl,0x419aab8bd14ae853l,
-            0x94955f9ca68abad8l },
-          0 },
-        /* 119 << 8 */
-        { { 0x3a162e06ed169150l,0x8c9683a6ba1194a8l,0x53fead66ccc28d04l,
-            0xdbb2a85bef09809al },
-          { 0x58e677439d3ab018l,0xff9a2046b6e56bd0l,0xf4b8215eb28061e9l,
-            0xcf16d9f7b10e358fl },
-          0 },
-        /* 125 << 8 */
-        { { 0x265ceae9a55abe39l,0x9e3783f796a98f84l,0xb799628af0757d99l,
-            0xebb5f12665472fb3l },
-          { 0xd83619f52ba517d8l,0x5672105f50382bdfl,0x32c5681c4a12ee9fl,
-            0x31e6f60d834a9fedl },
-          0 },
-    },
-    {
-        /* 0 << 16 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 16 */
-        { { 0x0f0165fce3779ee3l,0xe00e7f9dbd495d9el,0x1fa4efa220284e7al,
-            0x4564bade47ac6219l },
-          { 0x90e6312ac4708e8el,0x4f5725fba71e9adfl,0xe95f55ae3d684b9fl,
-            0x47f7ccb11e94b415l },
-          0 },
-        /* 3 << 16 */
-        { { 0xbd9b8b1dbe7a2af3l,0xec51caa94fb74a72l,0xb9937a4b63879697l,
-            0x7c9a9d20ec2687d5l },
-          { 0x1773e44f6ef5f014l,0x8abcf412e90c6900l,0x387bd0228142161el,
-            0x50393755fcb6ff2al },
-          0 },
-        /* 4 << 16 */
-        { { 0xfabf770977f7195al,0x8ec86167adeb838fl,0xea1285a8bb4f012dl,
-            0xd68835039a3eab3fl },
-          { 0xee5d24f8309004c2l,0xa96e4b7613ffe95el,0x0cdffe12bd223ea4l,
-            0x8f5c2ee5b6739a53l },
-          0 },
-        /* 5 << 16 */
-        { { 0x3d61333959145a65l,0xcd9bc368fa406337l,0x82d11be32d8a52a0l,
-            0xf6877b2797a1c590l },
-          { 0x837a819bf5cbdb25l,0x2a4fd1d8de090249l,0x622a7de774990e5fl,
-            0x840fa5a07945511bl },
-          0 },
-        /* 7 << 16 */
-        { { 0x26e08c07e3533d77l,0xd7222e6a2e341c99l,0x9d60ec3d8d2dc4edl,
-            0xbdfe0d8f7c476cf8l },
-          { 0x1fe59ab61d056605l,0xa9ea9df686a8551fl,0x8489941e47fb8d8cl,
-            0xfeb874eb4a7f1b10l },
-          0 },
-        /* 9 << 16 */
-        { { 0x9164088d977eab40l,0x51f4c5b62760b390l,0xd238238f340dd553l,
-            0x358566c3db1d31c9l },
-          { 0x3a5ad69e5068f5ffl,0xf31435fcdaff6b06l,0xae549a5bd6debff0l,
-            0x59e5f0b775e01331l },
-          0 },
-        /* 10 << 16 */
-        { { 0x2cc5226138634818l,0x501814f4b44c2e0bl,0xf7e181aa54dfdba3l,
-            0xcfd58ff0e759718cl },
-          { 0xf90cdb14d3b507a8l,0x57bd478ec50bdad8l,0x29c197e250e5f9aal,
-            0x4db6eef8e40bc855l },
-          0 },
-        /* 11 << 16 */
-        { { 0xd5d5cdd35958cd79l,0x3580a1b51d373114l,0xa36e4c91fa935726l,
-            0xa38c534def20d760l },
-          { 0x7088e40a2ff5845bl,0xe5bb40bdbd78177fl,0x4f06a7a8857f9920l,
-            0xe3cc3e50e968f05dl },
-          0 },
-        /* 13 << 16 */
-        { { 0x10595b5696a71cbal,0x944938b2fdcadeb7l,0xa282da4cfccd8471l,
-            0x98ec05f30d37bfe1l },
-          { 0xe171ce1b0698304al,0x2d69144421bdf79bl,0xd0cd3b741b21dec1l,
-            0x712ecd8b16a15f71l },
-          0 },
-        /* 15 << 16 */
-        { { 0xe89f48c85963a46el,0x658ab875a99e61c7l,0x6e296f874b8517b4l,
-            0x36c4fcdcfc1bc656l },
-          { 0xde5227a1a3906defl,0x9fe95f5762418945l,0x20c91e81fdd96cdel,
-            0x5adbe47eda4480del },
-          0 },
-        /* 16 << 16 */
-        { { 0xa7a8746a584c5e20l,0x267e4ea1b9dc7035l,0x593a15cfb9548c9bl,
-            0x5e6e21354bd012f3l },
-          { 0xdf31cc6a8c8f936el,0x8af84d04b5c241dcl,0x63990a6f345efb86l,
-            0x6fef4e61b9b962cbl },
-          0 },
-        /* 17 << 16 */
-        { { 0xaa35809ddfe6e2a0l,0xebb4d7d4356a2222l,0x7d500a6a319f33b7l,
-            0x4895a47d4ac99011l },
-          { 0x300ab40bdf3812b2l,0xd0764ec88aec8b9fl,0x86b61d95e591b2a7l,
-            0xc1b2a0b72ed74603l },
-          0 },
-        /* 19 << 16 */
-        { { 0x6001bf5d3849c680l,0xd7a1a4e4c1d3faccl,0xa0f2776418c5e351l,
-            0x0849c0736c29c623l },
-          { 0x3317e143ac751c0cl,0x9bcb1f3eda06200bl,0x40a63a75541419b5l,
-            0x8fad9c983f62c513l },
-          0 },
-        /* 21 << 16 */
-        { { 0xacff0828d03b2242l,0x5a9375c43abb7389l,0x41b1a318d0192baal,
-            0x105bd3100458e97bl },
-          { 0x71582dc7ed496315l,0x8ab2884a4d4bda18l,0xb8b638b494bc5bb8l,
-            0xb42ed1309500bb04l },
-          0 },
-        /* 23 << 16 */
-        { { 0x73e04f02ad1ed952l,0x680051cadfa5bdb7l,0xbe0bef3c0c7437b9l,
-            0x45d6f3a40e65e627l },
-          { 0x5295e060c9436a75l,0xbe84ba78d289ba9el,0x350887fd69c09364l,
-            0xf27bfd17671c64a7l },
-          0 },
-        /* 25 << 16 */
-        { { 0xc8afbdc3adf6ffc5l,0x4a4fb35876385891l,0xc7fa86424d41453fl,
-            0x19490b7672eedd06l },
-          { 0xc883e45337d22d6al,0x8e6e38e4a9009f96l,0x44e2811eb1c560c6l,
-            0x8a0021bf4439cfcfl },
-          0 },
-        /* 27 << 16 */
-        { { 0xba768f8b7615a327l,0x6c8b320d7b15bbe7l,0x5d8d5bcbaaa9ca64l,
-            0x19a2b99f3d13cdfdl },
-          { 0x858288a26f172e10l,0x2412a4da37a00f94l,0xfc67fd2edaa7f6c6l,
-            0x4aea0eadafa2a5c5l },
-          0 },
-        /* 28 << 16 */
-        { { 0x5c80ccef6cd77b30l,0x49978299ec99b6d0l,0x6bf4485eb939d335l,
-            0xc53e61ab86d7c147l },
-          { 0xdd948052fb601dddl,0x34c5eb393511dd48l,0x91f5c67600e6f61cl,
-            0x33f1b525b1e71f34l },
-          0 },
-        /* 29 << 16 */
-        { { 0xb4cb4a151d2dad36l,0x709a61631e60b60dl,0x2f18f3bd932ece4fl,
-            0x70f495a8e92368bel },
-          { 0x6e88be2bb7aeaa6fl,0x4efebd9ae1bf1d6el,0x49925e6e44e94993l,
-            0x33b7aba0ef0517dcl },
-          0 },
-        /* 31 << 16 */
-        { { 0x69ce1f207afe6c37l,0xe1148ba984f68db5l,0x32668bdc2c594a8al,
-            0x2cb60d3063ac4fb3l },
-          { 0x5e6efe1dd9e036f8l,0x917cb2a27db4739fl,0x70ea601ded4e0b5el,
-            0x5928f068ae7ac8a6l },
-          0 },
-        /* 33 << 16 */
-        { { 0x9e4ad0073f2d96abl,0x51a9697f2d058c03l,0xcd5c0a7522d1e795l,
-            0xaa1a121c2ac4f019l },
-          { 0xa837c14c3e3631f4l,0x6a997381236a5576l,0xb305e7db2753782bl,
-            0xae561b0237243afbl },
-          0 },
-        /* 34 << 16 */
-        { { 0x20176baca787897bl,0x057b8b979a9f67d9l,0xe7d5c4f761e14e09l,
-            0x8e4856901e6cd6d0l },
-          { 0x3eeffbba9b925d52l,0xe651a5383046927bl,0x02326d1fe92d4352l,
-            0xad2d6493d697369fl },
-          0 },
-        /* 35 << 16 */
-        { { 0xe9de299c548c4ca5l,0x66f64ef54be3bde3l,0xcf6d39ebf2d5ebc9l,
-            0x665ca727898953e1l },
-          { 0x521ec435e33ac1b4l,0x8418fa7534ab2b82l,0x94d6c0c4771a3a87l,
-            0x21feb6054859ee22l },
-          0 },
-        /* 36 << 16 */
-        { { 0xde7153f8eed9dd1dl,0xba09ad1152ebcb2el,0xaa41b015e1843fb6l,
-            0xf933a2abdd4ce6f0l },
-          { 0x777f834313f6b83fl,0x28df7da4db113a75l,0x6d7d1b3c72a5d143l,
-            0x6f789698966c6ddfl },
-          0 },
-        /* 37 << 16 */
-        { { 0x57d11ed7a95e704el,0x7d5ac6dc380ad582l,0xb175421d5ab6e377l,
-            0x4e383b0ba760dd4dl },
-          { 0xde07b81a352b6cb3l,0x342abe825c2e1704l,0x90988de20dd48537l,
-            0x4a7fec0544821591l },
-          0 },
-        /* 39 << 16 */
-        { { 0xb0e4d17c90a94eb7l,0x27555067aceb0176l,0x587576e15c38c4e2l,
-            0xe647d9dd445f2880l },
-          { 0x00beb2f5ca502f83l,0x4e89e638c44767c7l,0xbef361da154a5757l,
-            0x2dc632a2dc0675f2l },
-          0 },
-        /* 40 << 16 */
-        { { 0xed439a33a72ba054l,0xa3170a15ead265bal,0xcf7eb903fe99a58el,
-            0xcf6db0c633d80c26l },
-          { 0xd031255ef613e71al,0x12ccbe5718ca255cl,0xdd21d0537808c40dl,
-            0xf5488ebc3af2be6bl },
-          0 },
-        /* 41 << 16 */
-        { { 0x589a125ac10f8157l,0x3c8a15bde1353e49l,0x7d9bbd0c22ce2dd0l,
-            0xdfcd019211ac7bb1l },
-          { 0x0e1d67151193c5b1l,0xd4de115ab0e8c285l,0x0b3e94c2272c29fel,
-            0xea640843c8213581l },
-          0 },
-        /* 43 << 16 */
-        { { 0x7a01aeed6aca2231l,0x8135cf2ace80abbel,0xdc1a41b2ae5fdec9l,
-            0xde34ea4da0174364l },
-          { 0xa5104e453cf8b845l,0x4b6fd986675ba557l,0x4bc750af29c8cb4al,
-            0x8bebb266583f9391l },
-          0 },
-        /* 44 << 16 */
-        { { 0x47110d7c1be3f9c5l,0x12b9e4485eadb4ddl,0x6e8c09870b713d41l,
-            0xe1e20356733d56ael },
-          { 0xe68d6bab445ea727l,0x9ef4f6eac934a1a4l,0xe0155547f8cef1c3l,
-            0xdb5c3909159bdcbfl },
-          0 },
-        /* 45 << 16 */
-        { { 0xef0449cb32fa8a37l,0x95071f5dcd246405l,0x1c56ad776c598891l,
-            0x981781de0fa9cd42l },
-          { 0x0f93d456d29c0500l,0x43aa7bc1483f52c4l,0xd7c8736666c8abadl,
-            0x47552530ea5050efl },
-          0 },
-        /* 46 << 16 */
-        { { 0x40dd9ca9fa9b8d3dl,0xf27b7bc056da41d9l,0x87967f4b66db8845l,
-            0xf6918c9444de6bc7l },
-          { 0x4d76d51135568d4dl,0x7ab18f9a40e7fa5al,0x069a44bba5bbbdc6l,
-            0x19e6c04bb4c8f808l },
-          0 },
-        /* 47 << 16 */
-        { { 0x5fd2501108b2b6c7l,0xcce85a3ec41cad21l,0x90857daffdd70387l,
-            0x7a679062c63789f4l },
-          { 0x9c462134ef8666e2l,0xcb7dba108c8505bdl,0x7c4a7e2fc610f2e7l,
-            0x22906f65d68315f9l },
-          0 },
-        /* 48 << 16 */
-        { { 0xf2efe23d442a8ad1l,0xc3816a7d06b9c164l,0xa9df2d8bdc0aa5e5l,
-            0x191ae46f120a8e65l },
-          { 0x83667f8700611c5bl,0x83171ed7ff109948l,0x33a2ecf8ca695952l,
-            0xfa4a73eef48d1a13l },
-          0 },
-        /* 49 << 16 */
-        { { 0x41dd38c1118de9a0l,0x3485cb3be2d8f6f5l,0xd4bac751b1dcc577l,
-            0x2148d93fed12ea6bl },
-          { 0xde3504729da8cb18l,0x6046daf89eb85925l,0xddbc357b942b1044l,
-            0x248e7afe815b8b7cl },
-          0 },
-        /* 51 << 16 */
-        { { 0xd4bb77b3acb21004l,0xe9f236cf83392035l,0xa9894c5c52133743l,
-            0x4d6112749a7b054al },
-          { 0xa61675ea4ba2a553l,0x59c199681da6aa78l,0x3988c36590f474del,
-            0x73e751bbd001be43l },
-          0 },
-        /* 52 << 16 */
-        { { 0x97cacf846604007dl,0x1e92b4b22d47a9f1l,0x858ae0d6374ed165l,
-            0x4c973e6f307aefb8l },
-          { 0x6f524a238a10eb72l,0x7b4a92a9eb2849d6l,0x3678bda42fe91eddl,
-            0x56092acd7c0fc35cl },
-          0 },
-        /* 53 << 16 */
-        { { 0x93bea99b1b9b43c4l,0x2f6af6f3e145fda2l,0x862f0607278adf0dl,
-            0x647be08398456ccal },
-          { 0xce79ba1487250c28l,0x1c1c4fc8efedab42l,0x966f612af90caa8dl,
-            0xb1a2cf6e72c440f8l },
-          0 },
-        /* 55 << 16 */
-        { { 0x2fca1be45b3b7dd5l,0x453c19853c211bcal,0x313cb21969a46484l,
-            0x66082837414bd5dfl },
-          { 0xab7a97bf2ac1cdf7l,0x45cd1792676d778fl,0x42fb6c4f6a5b560al,
-            0x45747fe30b8f17e9l },
-          0 },
-        /* 57 << 16 */
-        { { 0x38b6db6235db6218l,0xa10cdfe1bb54bacal,0x56fd4a1d610f7f6bl,
-            0xc4bea78b76d183d7l },
-          { 0xc0e6ca9fbf730d26l,0x1b1e271aed6cf535l,0x6fef275faadbe375l,
-            0xfa2e8da903e489bal },
-          0 },
-        /* 59 << 16 */
-        { { 0x6f79d25c7c4626ecl,0xfe27690232d55d6cl,0x3f5c5768afa19ce3l,
-            0xa1373777f8834739l },
-          { 0x761d67a8a4ce960al,0xb34de1ea459e656al,0x8725b0f09db6f269l,
-            0x75316f250dbfe22el },
-          0 },
-        /* 60 << 16 */
-        { { 0x091d5b631a093b40l,0xb85c1c075862f24al,0xc5d74eb53e8f85bfl,
-            0xf51c7746cab22456l },
-          { 0xc25cb8d9e761da89l,0x2670ec2fc0f028b5l,0x873fd30d2db9af5cl,
-            0x3d0f1ea18262565el },
-          0 },
-        /* 61 << 16 */
-        { { 0x8f9492c261c23b3cl,0xd366baeb631688a4l,0x55e759e78093bb07l,
-            0xf6d0eaf47218f765l },
-          { 0xb8a174ff54ca583bl,0x790f10e0b23d14cel,0xfebe7333be83cbbal,
-            0xfeb6dcc5eed67536l },
-          0 },
-        /* 63 << 16 */
-        { { 0x175b3bacce027e5bl,0xe0728a99c48252c4l,0x0be25d4507a39c7cl,
-            0xcb9c2d3aba8e8c72l },
-          { 0x6185a48d1abd459al,0x27207feadff9a27bl,0xfd92e8231d34393fl,
-            0x738511534351d965l },
-          0 },
-        /* 64 << 16 */
-        { { 0xfcde7cc8f43a730fl,0xe89b6f3c33ab590el,0xc823f529ad03240bl,
-            0x82b79afe98bea5dbl },
-          { 0x568f2856962fe5del,0x0c590adb60c591f3l,0x1fc74a144a28a858l,
-            0x3b662498b3203f4cl },
-          0 },
-        /* 65 << 16 */
-        { { 0x8ede0fcdc11682eel,0x41e3faa1b2ab5664l,0x58b2a7dc26a35ff5l,
-            0x939bcd6b701b89e9l },
-          { 0x55f66fd188e0838fl,0x99d1a77b4ff1f975l,0x103abbf72e060cc5l,
-            0x91c77beb6bc4bdbbl },
-          0 },
-        /* 71 << 16 */
-        { { 0xcd048abca380cc72l,0x91cab1bbd0e13662l,0x68115b18686de4cel,
-            0x484724e63deccbf5l },
-          { 0xf164ba54f176137el,0x5189793662ab2728l,0x6afdecf9b60a5458l,
-            0xca40472d0aabafd2l },
-          0 },
-        /* 77 << 16 */
-        { { 0x7a9439183b98d725l,0x1c1763e8ece1ea3cl,0x45c44ef639840476l,
-            0x689271e69c009133l },
-          { 0xa017405f56a51fe1l,0xd54cc7253e0d0970l,0x212ad075cfe09e8bl,
-            0x999f21c37af7bf30l },
-          0 },
-        /* 83 << 16 */
-        { { 0xdc2a2af12bf95f73l,0xb88b4ca76de82cbel,0xa31a21aaecb8e84el,
-            0x86d19a601b74f5bel },
-          { 0xc68bf64406008019l,0xe52ab50e9431c694l,0x6375463d627ab11cl,
-            0xdd3eeaa03c0ef241l },
-          0 },
-        /* 89 << 16 */
-        { { 0x608d9cb323f1caf8l,0x95069450b1700741l,0xe3132bd2bc2fa7aal,
-            0xc4f363e7f64e4f06l },
-          { 0xb059c4191ca888c2l,0x1004cb1f8d17bf5dl,0x6b6ba6f934ea5711l,
-            0x071d94abd79b2c8al },
-          0 },
-        /* 95 << 16 */
-        { { 0xc7ef9b42d147a39dl,0x36dd5d770a10cd5bl,0x3bf6cc77d0eea34bl,
-            0x60c84591197479c7l },
-          { 0xf95860ac50ba50edl,0xe1c94a8dc4cdc8fal,0x780818d685e24a23l,
-            0x1950e3c0c8abbd27l },
-          0 },
-        /* 101 << 16 */
-        { { 0x9908c694ae04778el,0x2e37a6790a0d36ffl,0x212a340f52b067bdl,
-            0xec89c9fad080b914l },
-          { 0x920dc2f005ab8a23l,0xecff5c78655e8984l,0x80eedd34f66211acl,
-            0xa7a56366ef58d4d8l },
-          0 },
-        /* 107 << 16 */
-        { { 0x4f95debe2bca42f0l,0xf0346307844334d2l,0x7003a60521d600aal,
-            0x1eb98c6365c5248al },
-          { 0x6757b3822fa202cal,0x32765d399fb12f36l,0xe851b476d7b44c9al,
-            0x27cd7d1b4e0bab4cl },
-          0 },
-        /* 113 << 16 */
-        { { 0xd0c1f7c9c43ea1a3l,0x73d944f49f42907dl,0xd113f34619352c92l,
-            0x86a1ad53b149cdc1l },
-          { 0x32c34e8f848d1be4l,0xba8afda7c3d9360bl,0x17e8bc32eea8bf96l,
-            0x3174cae499c87febl },
-          0 },
-        /* 116 << 16 */
-        { { 0x4b215f016671b47el,0xb67633ca4a8dae2al,0x2915120f79fd3cdbl,
-            0xc1f8a06fb064e6del },
-          { 0xf4d5368cc1d57420l,0x6ada51a8e18de475l,0xa0f0d47cc749d4b0l,
-            0xabfa2c0074526aa5l },
-          0 },
-        /* 119 << 16 */
-        { { 0xf752f6659e5ce44fl,0x7b97ebfa189d35ecl,0x9540cbb90fc609abl,
-            0x19c1dc6999632cc8l },
-          { 0x0a957700e08ca9a8l,0xb0cd0ab7a3246a4el,0xca687cfcc8d6a544l,
-            0xb6281f0035f82a77l },
-          0 },
-        /* 125 << 16 */
-        { { 0x547027012b818036l,0xf72315f729c8f14cl,0x95f1bc15230e74bel,
-            0x2e7c492f1abe20d4l },
-          { 0xe1ea8b1cd7e78ab1l,0xc3f6ba59043585adl,0xac404ea9477ac053l,
-            0xaa6872914ec6d0e3l },
-          0 },
-    },
-    {
-        /* 0 << 24 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 24 */
-        { { 0xd9d0c8c4868af75dl,0xd7325cff45c8c7eal,0xab471996cc81ecb0l,
-            0xff5d55f3611824edl },
-          { 0xbe3145411977a0eel,0x5085c4c5722038c6l,0x2d5335bff94bb495l,
-            0x894ad8a6c8e2a082l },
-          0 },
-        /* 3 << 24 */
-        { { 0xd1e059b21994ef20l,0x2a653b69638ae318l,0x70d5eb582f699010l,
-            0x279739f709f5f84al },
-          { 0x5da4663c8b799336l,0xfdfdf14d203c37ebl,0x32d8a9dca1dbfb2dl,
-            0xab40cff077d48f9bl },
-          0 },
-        /* 4 << 24 */
-        { { 0xf2369f0b879fbbedl,0x0ff0ae86da9d1869l,0x5251d75956766f45l,
-            0x4984d8c02be8d0fcl },
-          { 0x7ecc95a6d21008f0l,0x29bd54a03a1a1c49l,0xab9828c5d26c50f3l,
-            0x32c0087c51d0d251l },
-          0 },
-        /* 5 << 24 */
-        { { 0xf61790abfbaf50a5l,0xdf55e76b684e0750l,0xec516da7f176b005l,
-            0x575553bb7a2dddc7l },
-          { 0x37c87ca3553afa73l,0x315f3ffc4d55c251l,0xe846442aaf3e5d35l,
-            0x61b911496495ff28l },
-          0 },
-        /* 7 << 24 */
-        { { 0x4bdf3a4956f90823l,0xba0f5080741d777bl,0x091d71c3f38bf760l,
-            0x9633d50f9b625b02l },
-          { 0x03ecb743b8c9de61l,0xb47512545de74720l,0x9f9defc974ce1cb2l,
-            0x774a4f6a00bd32efl },
-          0 },
-        /* 9 << 24 */
-        { { 0x327bc002b0131e5bl,0x1739e6d5cb2514d9l,0xc8cbdafe55a81543l,
-            0x5bb1a36ce1137243l },
-          { 0x205da3c517325327l,0xc35c1a36515a057el,0xf00f64c942925f9bl,
-            0xbd14633cb7d59f7al },
-          0 },
-        /* 10 << 24 */
-        { { 0xae2ad171656e8c3al,0xc0e2a4631acd0705l,0x006f6a8aa0b6055cl,
-            0xaf4513d72b65a26el },
-          { 0x3f549e14d616d5bcl,0x64ee395571253b1fl,0xe8b10bc1b8ce243al,
-            0xbcbeace5913a4e77l },
-          0 },
-        /* 11 << 24 */
-        { { 0x47c1004341f37dbdl,0x96eccae36168ecf6l,0x65bde59d1ca46aa3l,
-            0x38a7027ab8698ffal },
-          { 0xa2b89dc86dc34437l,0x5a0a118d43a4153fl,0x9e330a861ce22fd8l,
-            0x28382af6b3bbd3bcl },
-          0 },
-        /* 13 << 24 */
-        { { 0x0b2e27c0d81e0271l,0xa67a7596117a317cl,0x17f08928a6723d99l,
-            0x71a75681485310a3l },
-          { 0x90465462afb66ca9l,0x185e97ccfbbe229dl,0x6a1a606addad8fc2l,
-            0x2431f316b3c797cfl },
-          0 },
-        /* 15 << 24 */
-        { { 0x4703401193529432l,0x1f106bdd30743462l,0xabfb9964cd66d8cal,
-            0x934d9d5ae9bdadd5l },
-          { 0x5976d815908e3d22l,0x344a362f28e057bdl,0xf92cdadc5443dfb3l,
-            0x001297adf089603bl },
-          0 },
-        /* 16 << 24 */
-        { { 0x7f99824f20151427l,0x206828b692430206l,0xaa9097d7e1112357l,
-            0xacf9a2f209e414ecl },
-          { 0xdbdac9da27915356l,0x7e0734b7001efee3l,0x54fab5bbd2b288e2l,
-            0x4c630fc4f62dd09cl },
-          0 },
-        /* 17 << 24 */
-        { { 0x4a2fce605044066bl,0x904a019cfa3a47f4l,0xba81ea9c0c5c0a60l,
-            0xd7e4ea0d96c098bdl },
-          { 0xefe700419cd50a02l,0xc0c839d42d7f048cl,0xe2daf264e09b561fl,
-            0x0cbc13185034b18bl },
-          0 },
-        /* 19 << 24 */
-        { { 0x11e5f2e388323f7al,0xe07a74c2927584cdl,0x1e774b3495613d2dl,
-            0x9c9b52c52c787488l },
-          { 0x3cdd3c3ebe421f08l,0x5ff7819e223e3d5fl,0xba8739b2c1da09b9l,
-            0x6b7263164e8b491bl },
-          0 },
-        /* 21 << 24 */
-        { { 0xb5afd13ca0943befl,0xd651772957abb1ccl,0x9d5a52dc9b61b5bcl,
-            0x85cefaa6806e31cdl },
-          { 0xab84257a720a1deal,0x6a60261bced70d35l,0xc023f94db9d6da61l,
-            0x947f7eec54a0ae0el },
-          0 },
-        /* 23 << 24 */
-        { { 0xc3b787569f83b787l,0xd6d249263694ddd7l,0x58d248945d70a02el,
-            0xac16670e8c278c6al },
-          { 0x71a94d58e370b6e6l,0xe4d763840253db05l,0x99b1c98814b32cfel,
-            0x4e6bd870cc78cc95l },
-          0 },
-        /* 25 << 24 */
-        { { 0xf5f7ca79c8b63614l,0xf3bfb2158af4903cl,0x2bdb9f5496d47bd3l,
-            0xd6e715300e8a63bal },
-          { 0x67e90a497a93bec4l,0x8613478b8c1e63eel,0xe36bd9c8f2dde561l,
-            0x681486518a768689l },
-          0 },
-        /* 27 << 24 */
-        { { 0xef617a9494aa531cl,0x9ac35e2fd6f4ad87l,0xbcd2a047122468fbl,
-            0xbd7a423fef7c5ca6l },
-          { 0xab58cb52064c8040l,0x93ef4ed54a644716l,0xf7d17097c32cd48dl,
-            0xb249a173d17fcf42l },
-          0 },
-        /* 28 << 24 */
-        { { 0x66fe0fffe298cdf5l,0x3f61bea47b2e51b6l,0x7d372117bad3afa4l,
-            0x6521a09cef656e2fl },
-          { 0xb3b8c966e8a58fe7l,0x25203a115a47ebc7l,0xfe81588d5c4be573l,
-            0x6132e2f31f49a03cl },
-          0 },
-        /* 29 << 24 */
-        { { 0xbbe5c108b7a7ecc4l,0x62a5a78ebfd22e4cl,0xb7974033df188bd2l,
-            0xcf11deea4df7d1ael },
-          { 0x99cc774a53ace3eal,0xe0373a71105cc1f6l,0xd751987f133d7a20l,
-            0xab86ee04ae215871l },
-          0 },
-        /* 31 << 24 */
-        { { 0x2094f9a280cd10e6l,0x045232aa7b8a0da7l,0x969a81b69c03244el,
-            0x1293b4ca7e98d955l },
-          { 0x1631421dd68f3ab0l,0xa0106422c3738c82l,0xc5f43845f82c4ff9l,
-            0xb479acbe1aa0f58fl },
-          0 },
-        /* 33 << 24 */
-        { { 0xf1db0267f67683cfl,0xa6b13c9e44ce009dl,0x04b4eed505884a69l,
-            0xf2ff9c16d9087a0bl },
-          { 0x2c53699b3e35b4a6l,0x5020c0142369afb8l,0xf83bfe0095be37f1l,
-            0xd300d8c553b29d80l },
-          0 },
-        /* 34 << 24 */
-        { { 0x16893055811cf4bbl,0x580dd1e55aeb5027l,0xcaf47fba5ae3c71cl,
-            0xde79698129ebbb07l },
-          { 0xbed1db33d262cdd3l,0x78315e3748c7313bl,0xfc9561f02fe1368dl,
-            0xe0209698ccacacc7l },
-          0 },
-        /* 35 << 24 */
-        { { 0xd61af89a781ece24l,0xf3b90626008f41e9l,0xd715dbf7c5693191l,
-            0x8d6c05de6f299edel },
-          { 0xf18d62637ca50aacl,0x7987bf5cb0dd5fdcl,0x424136bd2cfa702bl,
-            0xaa7e237ded859db2l },
-          0 },
-        /* 36 << 24 */
-        { { 0xde7169e4e5d41796l,0x6700333e33c0a380l,0xe20b95780343a994l,
-            0xa745455e1fb3a1c3l },
-          { 0x97e0ff88ce029a7fl,0x3b3481c976e384bcl,0x028b339dddad5951l,
-            0xa1fdcdbae4b95cfcl },
-          0 },
-        /* 37 << 24 */
-        { { 0xcc9221baed20c6adl,0xf2619a51fa9c73aal,0xfc2cff847d7f55a5l,
-            0xd56c23d65f01d4dal },
-          { 0x6d20f88cb3d84d5fl,0x048825f75dcc615dl,0x73634d3f85631a6el,
-            0xa57a02e3ad7b2e2dl },
-          0 },
-        /* 39 << 24 */
-        { { 0x067a8dcf08aa81ffl,0x62948258c23f3d16l,0xb61bd04316f2fe7bl,
-            0xf250f769b6a766b1l },
-          { 0x32df97246d0b241el,0xb736e4bb714e5f88l,0x50da15022c1d40d7l,
-            0x013e0edebdd285a4l },
-          0 },
-        /* 40 << 24 */
-        { { 0x1b92c3a0181a5d8fl,0x6429531d9adb77c7l,0x629152b53af710eel,
-            0x4e3f27370bd5647el },
-          { 0xfb7c392b77553c7dl,0xa930abacefe78c87l,0xf80c8cd6a05a6991l,
-            0x751469b71be5f6f5l },
-          0 },
-        /* 41 << 24 */
-        { { 0xf89f2b0b3e2f2af0l,0x52f634099eefc39al,0x505005c679906cb6l,
-            0x820c2216b2de0b1el },
-          { 0x96f0f2831f20ad7al,0xcd33125c718ffcb0l,0xf6130ef278f0c578l,
-            0x4cda2471d0b76b95l },
-          0 },
-        /* 43 << 24 */
-        { { 0x611dd83f39485581l,0x96c47051803e1b20l,0xefacc736830f44c7l,
-            0x5588d8ce688b12bal },
-          { 0x44f4edf3eee70fadl,0x1026dfd8869539f7l,0xa4c146ee8ddb0e00l,
-            0x9f4f55816efb41c8l },
-          0 },
-        /* 44 << 24 */
-        { { 0x6036ed0236cbace7l,0x5a70e4abada837ddl,0xf06918aff10b2fefl,
-            0x08a8a9f69fd31590l },
-          { 0x6c4a1ba6916af88dl,0x4868bc1466016037l,0x06d345af164228a9l,
-            0x2c1961d19b550dd9l },
-          0 },
-        /* 45 << 24 */
-        { { 0x8b72775c6851f0acl,0x7827242bd70f5975l,0x2de91f1e34db4a6fl,
-            0x586bf3d58538f5eel },
-          { 0xf0a15aed25d9a09bl,0x43018e56f74deb46l,0xc2af1ad0f50e0e67l,
-            0x49cc9528b10cff6fl },
-          0 },
-        /* 46 << 24 */
-        { { 0x05eb146c9d55c425l,0xe2b557ccbc62261fl,0x2a716301bd077089l,
-            0x83a63c81e0527d02l },
-          { 0x055ff7f8a0d9203bl,0x05d09f0525bf5a04l,0x2e44545fb3eb0b30l,
-            0xed7c57c4d279a1adl },
-          0 },
-        /* 47 << 24 */
-        { { 0x6928f6e45e0ebdd5l,0xd7e44ddf092d233bl,0xe7148066d1b7026fl,
-            0xf645a2e53d5f25c3l },
-          { 0x6eeb25ee58ff9eb4l,0x60f1fcf737f87ebfl,0x9eaaf1e5c4679c70l,
-            0x4609fb13b7b7dc7el },
-          0 },
-        /* 48 << 24 */
-        { { 0xae915f5d5fa067d1l,0x4134b57f9668960cl,0xbd3656d6a48edaacl,
-            0xdac1e3e4fc1d7436l },
-          { 0x674ff869d81fbb26l,0x449ed3ecb26c33d4l,0x85138705d94203e8l,
-            0xccde538bbeeb6f4al },
-          0 },
-        /* 49 << 24 */
-        { { 0x27f317af2b33987fl,0xd2d3cf5d51e59588l,0x333999bd031f27c9l,
-            0x6ddfa3f22e0a3306l },
-          { 0x23e0e651990041b0l,0xf028aba1585837acl,0x1c6ad72b25226f53l,
-            0xf243c991d1fca64al },
-          0 },
-        /* 51 << 24 */
-        { { 0x72b8a13272cbae1fl,0xfe0b1c4fbfdbd64al,0x98bc7876c5e76921l,
-            0x51c726bfdb1f5af7l },
-          { 0x97e88a842c186e8bl,0x9ed99516ed8eb7b4l,0x3e54a17dafc818ebl,
-            0xfcfbf25a1e8f77d8l },
-          0 },
-        /* 52 << 24 */
-        { { 0x7780d7d68f7d5c6el,0x6725b49a454101e6l,0xceddc26586b0770cl,
-            0xc26624615666f504l },
-          { 0x16b77477ce040f75l,0x13f9113c293f8b45l,0xff0cfa07e2dcc91el,
-            0x1948d8bd41c202f5l },
-          0 },
-        /* 53 << 24 */
-        { { 0x4c6ae39a1dfbe13al,0xafb1e5c46be9c200l,0x39e728d168bb08c3l,
-            0xc794b905acc9166fl },
-          { 0x1cb0dec2d9c7c3e4l,0xc4c3053289f14d65l,0x4af80801a6a9d609l,
-            0x79d7e82de0d6ab24l },
-          0 },
-        /* 55 << 24 */
-        { { 0xb905c6af8ad4cf6el,0x785590b0f6d1be13l,0x78f402c2a0ef76bel,
-            0x739b22ea5c19a40bl },
-          { 0xd4d3262553d596b6l,0x01598eb4d571666bl,0xf8dc150b8173486al,
-            0xd8aa43af15e94f09l },
-          0 },
-        /* 57 << 24 */
-        { { 0xcfa387cd984393b5l,0x1645659e21a1bf92l,0xb4ab3966dd46c7eel,
-            0xcf8c296d89482623l },
-          { 0x72e4d01cf976b4c0l,0x44ad07e8fa0fa5ebl,0xd6c82681b486fdd2l,
-            0x2d9074f89b8845b4l },
-          0 },
-        /* 59 << 24 */
-        { { 0x96e4fc08d96862dbl,0xf9e29bb6c50c14b2l,0xfedaad64f8f9be75l,
-            0xab6b2d79ae9e1274l },
-          { 0x033e3eb58d84dec0l,0xc136904ccbd113e7l,0xb82b0aed6061f289l,
-            0x3476d9247b699e25l },
-          0 },
-        /* 60 << 24 */
-        { { 0x8fb5ceeb969231dcl,0xaed13be1686ff6cdl,0x71d7c67bdd69db87l,
-            0x49613e08fb53f33al },
-          { 0x2899729ead8e802fl,0x83bfde49d1982a1dl,0x675c45ea878239d2l,
-            0xb7bf59cd0d8240d3l },
-          0 },
-        /* 61 << 24 */
-        { { 0x853d8cd1baf53b8bl,0x9c73d04cff95fc18l,0xae8a94412d1d6aacl,
-            0xd8a15ce901500b70l },
-          { 0xaef813499aacba59l,0x2cd2ba0ac493cd8dl,0x01c37ee1f398f034l,
-            0xed72d51d0f7299fcl },
-          0 },
-        /* 63 << 24 */
-        { { 0x2c204940e7592fb1l,0xcc1bb19b49366f08l,0x31855e8a7c927935l,
-            0x16f7e9a2c590b81dl },
-          { 0xa5fbb7c1ed8df240l,0x7b5204122de2d7f5l,0x7eb1eb989a637588l,
-            0x5ef4eca89540d2e8l },
-          0 },
-        /* 64 << 24 */
-        { { 0x55d5c68da61a76fal,0x598b441dca1554dcl,0xd39923b9773b279cl,
-            0x33331d3c36bf9efcl },
-          { 0x2d4c848e298de399l,0xcfdb8e77a1a27f56l,0x94c855ea57b8ab70l,
-            0xdcdb9dae6f7879bal },
-          0 },
-        /* 65 << 24 */
-        { { 0x811e14dd9594afb8l,0xaf6c1b10d349124al,0x8488021b6528a642l,
-            0xecf6834341cf1447l },
-          { 0x7a40acb756924446l,0xd9c11bbed98ec4cfl,0x0cef00bfb2bff163l,
-            0xfaaad8015432803bl },
-          0 },
-        /* 71 << 24 */
-        { { 0x5a217d5e6b075cbel,0x7ef88d1dc89b513bl,0xb6d015da0531c93bl,
-            0x477b502a6333834al },
-          { 0x4655e48b2fb458d5l,0x93f21a7cb7674ca8l,0xa0616786502d1f3al,
-            0x82d16d17f26bb6ccl },
-          0 },
-        /* 77 << 24 */
-        { { 0x3d995aa9183c1688l,0xa125906c3766d2e8l,0x23ed7871c5f10d5bl,
-            0xdfe1e1cc6df80368l },
-          { 0x8bfcb54271eaae2cl,0xe94e6f910945a7bbl,0xd543ef90862f650al,
-            0x0dc043b803eed66bl },
-          0 },
-        /* 83 << 24 */
-        { { 0x0c6a5620060d2ccdl,0xcd8200e37a8a03a4l,0x6018d304793867e6l,
-            0xad23dd61a74d054dl },
-          { 0x5a856faeebc21eb4l,0x66be16714b5cd7dbl,0xe0d0441ec75f8c9dl,
-            0xb80ca9ecf90dbc6dl },
-          0 },
-        /* 89 << 24 */
-        { { 0xbd6902ccd24692cbl,0xbcce6bbc21920408l,0x40f120ca55dec4c5l,
-            0xd9f1f5ef5361c8b3l },
-          { 0x535d368226935dffl,0x9635447b01a9998al,0x8c4ec40d99e36d12l,
-            0xbaeef8912b793369l },
-          0 },
-        /* 95 << 24 */
-        { { 0xded3a51c1cd887ebl,0xd43225568376515cl,0xdaf3a2271ca7c097l,
-            0x089156fdecd4d90cl },
-          { 0x2b354810ca0727c9l,0xb7257c1966c19d8cl,0x5e68a379432d5072l,
-            0x75c04c2443e585c7l },
-          0 },
-        /* 101 << 24 */
-        { { 0xb5ba2a8fe5e0952fl,0x2c2d086811040b4el,0x27448bd5f818e253l,
-            0x720f677987a92c85l },
-          { 0x2c9b2367b9d035fal,0xf18ad8ce16c15ab9l,0xd65a360841bd57eel,
-            0xeb4b07c9ff6ae897l },
-          0 },
-        /* 107 << 24 */
-        { { 0xcffb6d71d38589acl,0x812372920fa509d3l,0x94db5ba6e54725e8l,
-            0x1ad2b4206cfbb825l },
-          { 0x8592c1f238cfb9f2l,0xbe8e917e0eec6a27l,0x53921bfe9d93d42fl,
-            0x1aa95e6269454a35l },
-          0 },
-        /* 113 << 24 */
-        { { 0xc25e8934d898049dl,0xeeaf4e6d3bb3d459l,0xc3ac44447d29ad10l,
-            0xccdf9fcbcef8fa04l },
-          { 0x1d995a3fb9679cb9l,0x3d6c5eab46fabc14l,0xd3849ff066385d4dl,
-            0xc0eb21bacff08be2l },
-          0 },
-        /* 116 << 24 */
-        { { 0x8213c71e90d13fd6l,0x114321149bb6b733l,0xaaf8037880ac4902l,
-            0xb24e046b555f7557l },
-          { 0x5f6ed2881db79832l,0xd493a758ac760e5dl,0xbc30a2a7a1c0f570l,
-            0xa5009807161174e3l },
-          0 },
-        /* 119 << 24 */
-        { { 0x9e9b864a6889e952l,0xee908932f352f31al,0xe421f2423166b932l,
-            0x6dd4aa3b7ddbdb35l },
-          { 0x553cc5639e8b88a4l,0x05457f171f04704dl,0x1dcc3004c9554e6bl,
-            0x3a4a3a253f1b61e7l },
-          0 },
-        /* 125 << 24 */
-        { { 0x7ac0a5e7c56e303al,0x7c7bab64037b0a19l,0x11f103fcc8d29a2bl,
-            0x7d99dc46cf0b1340l },
-          { 0x0481588ceffba92el,0x8a817356b04e77bcl,0x19edf4dbce1b708dl,
-            0xa2a1f7a6e6f9d52cl },
-          0 },
-    },
-    {
-        /* 0 << 32 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 32 */
-        { { 0x202886024147519al,0xd0981eac26b372f0l,0xa9d4a7caa785ebc8l,
-            0xd953c50ddbdf58e9l },
-          { 0x9d6361ccfd590f8fl,0x72e9626b44e6c917l,0x7fd9611022eb64cfl,
-            0x863ebb7e9eb288f3l },
-          0 },
-        /* 3 << 32 */
-        { { 0xa18f07e0e90fb21el,0x00fd2b80bba7fca1l,0x20387f2795cd67b5l,
-            0x5b89a4e7d39707f7l },
-          { 0x8f83ad3f894407cel,0xa0025b946c226132l,0xc79563c7f906c13bl,
-            0x5f548f314e7bb025l },
-          0 },
-        /* 4 << 32 */
-        { { 0x0ee6d3a7c35d8794l,0x042e65580356bae5l,0x9f59698d643322fdl,
-            0x9379ae1550a61967l },
-          { 0x64b9ae62fcc9981el,0xaed3d6316d2934c6l,0x2454b3025e4e65ebl,
-            0xab09f647f9950428l },
-          0 },
-        /* 5 << 32 */
-        { { 0xc1b3d3d331b85f09l,0x0f45354aa88ae64al,0xa8b626d32fec50fdl,
-            0x1bdcfbd4e828834fl },
-          { 0xe45a2866cd522539l,0xfa9d4732810f7ab3l,0xd8c1d6b4c905f293l,
-            0x10ac80473461b597l },
-          0 },
-        /* 7 << 32 */
-        { { 0xbbb175146fc627e2l,0xa0569bc591573a51l,0xa7016d9e358243d5l,
-            0x0dac0c56ac1d6692l },
-          { 0x993833b5da590d5fl,0xa8067803de817491l,0x65b4f2124dbf75d0l,
-            0xcc960232ccf80cfbl },
-          0 },
-        /* 9 << 32 */
-        { { 0x35d742806cf3d65bl,0x4b7c790678b28dd9l,0xc4fcdd2f95e1f85fl,
-            0xcf6fb7ba591350b6l },
-          { 0x9f8e3287edfc26afl,0xe2dd9e73c2d0ed9al,0xeab5d67f24cbb703l,
-            0x60c293999a759a5al },
-          0 },
-        /* 10 << 32 */
-        { { 0xcf8625d7708f97cdl,0xfb6c5119ea419de4l,0xe8cb234dc03f9b06l,
-            0x5a7822c335e23972l },
-          { 0x9b876319a284ff10l,0xefcc49977093fdcel,0xdddfd62a878fe39al,
-            0x44bfbe53910aa059l },
-          0 },
-        /* 11 << 32 */
-        { { 0xfb93ca3d7ca53d5fl,0x432649f004379cbfl,0xf506113acba2ff75l,
-            0x4594ae2103718b35l },
-          { 0x1aa6cee50d044627l,0xc0e0d2b7f5c94aa2l,0x0bf33d3dee4dd3f5l,
-            0xaca96e288477c97al },
-          0 },
-        /* 13 << 32 */
-        { { 0x995c068e6861a713l,0xa9ba339463de88dcl,0xab954344689a964fl,
-            0x58195aec0f5a0d6cl },
-          { 0xc5f207d5c98f8b50l,0x6600cd280c98ccf6l,0x1a680fe339c3e6c2l,
-            0xa23f3931660e87c0l },
-          0 },
-        /* 15 << 32 */
-        { { 0x43bc1b42c78440a1l,0x9a07e22632ac6c3fl,0xaf3d7ba10f4bcd15l,
-            0x3ad43c9da36814c6l },
-          { 0xca11f742a0c9c162l,0xd3e06fc6c90b96ecl,0xeace6e766bf2d03fl,
-            0x8bcd98e8f8032795l },
-          0 },
-        /* 16 << 32 */
-        { { 0xe27a6dbe305406ddl,0x8eb7dc7fdd5d1957l,0xf54a6876387d4d8fl,
-            0x9c479409c7762de4l },
-          { 0xbe4d5b5d99b30778l,0x25380c566e793682l,0x602d37f3dac740e3l,
-            0x140deabe1566e4ael },
-          0 },
-        /* 17 << 32 */
-        { { 0x7be3ddb77099ae96l,0x83d6157306e0da6al,0x31bcac5f74bf9870l,
-            0x7f7aa3b422b256f1l },
-          { 0xff84d63caa212e20l,0x7d636556decdc8b5l,0x8fed824dbf909d62l,
-            0x62d70186e5fb1445l },
-          0 },
-        /* 19 << 32 */
-        { { 0x8796989f67d8ab8al,0xa46282253700b772l,0xa353cadf05f799abl,
-            0x7a8be2741eeb06bbl },
-          { 0xf74a367e4653b134l,0x4e43449660c70340l,0xc99b6d6b72e10b18l,
-            0xcf1adf0f1ba636e1l },
-          0 },
-        /* 21 << 32 */
-        { { 0xb0260fb57c6a0958l,0xae791b9c2fc2731el,0xb339f2bf8ce6e575l,
-            0x769214a816e2639fl },
-          { 0xbaf422e1346da10el,0xc7805fdf7a56f463l,0xf47b6b766f845428l,
-            0x8f21369e38492948l },
-          0 },
-        /* 23 << 32 */
-        { { 0x2bac716a17931a90l,0x42a5e27cc8267236l,0xfd4b367c0bafeb78l,
-            0x5856e69c6173db02l },
-          { 0xfaac7358973d73c4l,0xbfbffcc36768d285l,0x05444ff2be3eb243l,
-            0x9f8d3692f3c323fel },
-          0 },
-        /* 25 << 32 */
-        { { 0xac296863221c31a9l,0x46f3a24ef1ca99a9l,0xd927648a7535a864l,
-            0xd7e3c47d5848e497l },
-          { 0xc19595b782a98ac7l,0x9a9bf627273ff554l,0xe29aa48fb62298a1l,
-            0xed3f068ee797e9e3l },
-          0 },
-        /* 27 << 32 */
-        { { 0x8d16a1660eb9227bl,0xe04c6bc58c37c74bl,0xd1be9585cc1ef78cl,
-            0xa5cfe1962e929d9bl },
-          { 0xc9b0ea21417c1cc6l,0x316352d345b79599l,0xc1502c4dc2d54af7l,
-            0xe7f4412990f83445l },
-          0 },
-        /* 28 << 32 */
-        { { 0x0f6704abd95917e8l,0x168dafaeaec6e899l,0xd2833e8cde710027l,
-            0x34ea277e68ee3c59l },
-          { 0x3689e2350054d4e5l,0x6f3a568d11013943l,0xb5ce1ff69bc2b144l,
-            0x705bfe7e72b33a59l },
-          0 },
-        /* 29 << 32 */
-        { { 0x1baa4f02c8e93284l,0xec6b93ea3c97d3e8l,0xb656c149034f8b32l,
-            0x3cab9063cd4cc69fl },
-          { 0xd8de5989d61031ccl,0xcf85329fc1b1de1dl,0xf18b78b323d8cb9al,
-            0x6dc04bc61a6b69eal },
-          0 },
-        /* 31 << 32 */
-        { { 0x79cf86314a1d4f8fl,0xda5ba331aa47394el,0x36f9c0be8ff20527l,
-            0xccdc719bbc7097f6l },
-          { 0x2304a3ba5cb052bbl,0xab80cdea392f0ab5l,0x0ac1858bf38de03bl,
-            0xd6e2119878a8f55dl },
-          0 },
-        /* 33 << 32 */
-        { { 0x6bdebc26584bc618l,0x499f0f1894591499l,0xd35ed50bf4a573dal,
-            0x5a622e73ff2792d0l },
-          { 0x8510cbce68d41a3bl,0x6610f43c94e919afl,0x4527373dc163c8a1l,
-            0x50afb46f280a8a7dl },
-          0 },
-        /* 34 << 32 */
-        { { 0x33e779cd8de7707al,0xf94bbd94438f535bl,0x61159864be144878l,
-            0xb6623235f098ce4al },
-          { 0x6813b71ba65568d8l,0x6603dd4c2f796451l,0x9a97d88c8b9ee5b2l,
-            0xaaa4593549d5926cl },
-          0 },
-        /* 35 << 32 */
-        { { 0x2e01fc75ebe75bf2l,0x8270318d6cbdd09cl,0x534e4f21d3f1a196l,
-            0x6c9eaeca9459173el },
-          { 0xda454fe0b642a1d4l,0xe45b69bfc4664c4al,0x4724bd423e078dc8l,
-            0x39ac8fe603336b81l },
-          0 },
-        /* 36 << 32 */
-        { { 0x0a2e53dd302e9485l,0x75882a19deaa9ff4l,0xe283242eac8de4ddl,
-            0x2742105cc678dba7l },
-          { 0x9f6f0a88cdb3a8a2l,0x5c9d3338f722e894l,0xf1fa3143c38c31c1l,
-            0x22137e2db18c77acl },
-          0 },
-        /* 37 << 32 */
-        { { 0xd821665e368d7835l,0x3300c012b596c6ecl,0xb60da7353557b2ddl,
-            0x6c3d9db6fb8cf9ael },
-          { 0x092d8b0b8b4b0d34l,0x900a0bf4b3d4107dl,0x75371a245e813ec3l,
-            0x91125a17f2ad56d5l },
-          0 },
-        /* 39 << 32 */
-        { { 0x5e6594e2fe0073e6l,0x908a93778be13cb7l,0xa2c3d5c8ac26617cl,
-            0xa0bab085c317c6b9l },
-          { 0x0bdc183b83664109l,0x6bbba2b468f9dcd9l,0x697a50785814be41l,
-            0x12a59b183a5e5f98l },
-          0 },
-        /* 40 << 32 */
-        { { 0xbd9802e6c30fa92bl,0x5a70d96d9a552784l,0x9085c4ea3f83169bl,
-            0xfa9423bb06908228l },
-          { 0x2ffebe12fe97a5b9l,0x85da604971b99118l,0x9cbc2f7f63178846l,
-            0xfd96bc709153218el },
-          0 },
-        /* 41 << 32 */
-        { { 0xb5a85c61bfa70ca6l,0x4edc7f2d4c1f745fl,0x05aea9aa3ded1eb5l,
-            0x750385efb82e5918l },
-          { 0xdcbc53221fdc5164l,0x32a5721f6794184el,0x5c5b2269ff09c90bl,
-            0x96d009115323ca42l },
-          0 },
-        /* 43 << 32 */
-        { { 0x12c73403f43f1440l,0xc94813eb66cc1f50l,0x04d5957b9b035151l,
-            0x76011bca4bfaafa8l },
-          { 0x56806c13574f1f0al,0x98f63a4697652a62l,0x17c63ef4a3178de9l,
-            0xf7ce961a65009a52l },
-          0 },
-        /* 44 << 32 */
-        { { 0x58f92aebe4173516l,0xdc37d99275e42d44l,0x76dcec5b4d48e1bal,
-            0x07e0608e25676448l },
-          { 0xa1877bcd1d4af36al,0x38b62b3c5a8ccf0cl,0x60522e88aeab7f75l,
-            0xbef213ed5e03547al },
-          0 },
-        /* 45 << 32 */
-        { { 0x8acd5ba4e6ed0282l,0x792328f06a04531dl,0xe95de8aa80297e50l,
-            0x79d33ce07d60e05cl },
-          { 0xcb84646dd827d602l,0xd3421521302a608cl,0x867970a4524f9751l,
-            0x05e2f7e347a75734l },
-          0 },
-        /* 46 << 32 */
-        { { 0x64e4de4a01c66263l,0xbcfe16a4d0033d4cl,0x359e23d4817de1dcl,
-            0xb01e812ec259449cl },
-          { 0x90c9ade2df53499fl,0xabbeaa27288c6862l,0x5a655db4cd1b896fl,
-            0x416f10a5a022a3d6l },
-          0 },
-        /* 47 << 32 */
-        { { 0x0d17e1ef98601fd5l,0x9a3f85e0eab76a6fl,0x0b9eaed1510b80a1l,
-            0x3282fd747ec30422l },
-          { 0xaca5815a70a4a402l,0xfad3121cf2439cb2l,0xba251af81fccabd6l,
-            0xb382843fa5c127d5l },
-          0 },
-        /* 48 << 32 */
-        { { 0x958381db1782269bl,0xae34bf792597e550l,0xbb5c60645f385153l,
-            0x6f0e96afe3088048l },
-          { 0xbf6a021577884456l,0xb3b5688c69310ea7l,0x17c9429504fad2del,
-            0xe020f0e517896d4dl },
-          0 },
-        /* 49 << 32 */
-        { { 0x442fdfe920cd1ebel,0xa8317dfa6a250d62l,0x5214576d082d5a2dl,
-            0xc1a5d31930803c33l },
-          { 0x33eee5b25e4a2cd0l,0x7df181b3b4db8011l,0x249285145b5c6b0bl,
-            0x464c1c5828bf8837l },
-          0 },
-        /* 51 << 32 */
-        { { 0x5464da65d55babd1l,0x50eaad2a0048d80fl,0x782ca3dd2b9bce90l,
-            0x41107164ab526844l },
-          { 0xad3f0602d56e0a5fl,0xc1f0248018455114l,0xe05d8dcab1527931l,
-            0x87818cf5bb1295d7l },
-          0 },
-        /* 52 << 32 */
-        { { 0x95aeb5bd483e333al,0x003af31effeaededl,0xfc5532e87efb1e4fl,
-            0xb37e0fb52dfa24a5l },
-          { 0x485d4cecdc140b08l,0xb81a0d23983bd787l,0xd19928dae8d489fdl,
-            0x3fa0312c177b9dbdl },
-          0 },
-        /* 53 << 32 */
-        { { 0xade391470c6d7e88l,0x4fd1e8cd47072c45l,0x145760fed5a65c56l,
-            0x198960c7be4887del },
-          { 0xfe7974a82640257al,0xf838a19b774febefl,0xb2aecad11b6e988el,
-            0x643f44fa448e4a8fl },
-          0 },
-        /* 55 << 32 */
-        { { 0xc35ceffdee756e71l,0x2c1364d88ea932c4l,0xbd594d8d837d2d9fl,
-            0x5b334bdac9d74d48l },
-          { 0x72dc3e03b8fac08bl,0x38f01de006fdf70fl,0x4bde74b31d298ba4l,
-            0x2598d183ad5f42a9l },
-          0 },
-        /* 57 << 32 */
-        { { 0x02c6ba15f62befa2l,0x6399ceb55c8ccee9l,0x3638bd6e08d3473el,
-            0xb8f1f13d2f8f4a9cl },
-          { 0x50d7560655827a74l,0x8d6e65f33fb4f32cl,0x40a5d21189ee621al,
-            0x6d3f9e11c4474716l },
-          0 },
-        /* 59 << 32 */
-        { { 0xcb633a4ce9b2bb8fl,0x0475703f8c529253l,0x61e007b5a8878873l,
-            0x342d77ba14504159l },
-          { 0x2925175c313578dfl,0x4e631897b6b097f1l,0xe64d138929350e41l,
-            0x2fb20608ec7adccdl },
-          0 },
-        /* 60 << 32 */
-        { { 0xa560c234d5c0f5d1l,0x74f84bf62bdef0efl,0x61ed00005cbd3d0bl,
-            0xc74262d087fb408bl },
-          { 0xad30a6496cc64128l,0x708e3a31a4a8b154l,0xaf21ce2637f82074l,
-            0x31d33b38204c9a74l },
-          0 },
-        /* 61 << 32 */
-        { { 0x8f609fe04cc2f575l,0xe44f9784b35488c4l,0x0d464bb6180fa375l,
-            0x4f44d5d2de2247b8l },
-          { 0xf538eb38141ef077l,0x781f8f6e8fa456a4l,0x67e9a46429b4f39dl,
-            0x245d21e8b704c3e9l },
-          0 },
-        /* 63 << 32 */
-        { { 0x45a94ee858ffa7cdl,0x4d38bc6818053549l,0x0b4bc65a499d79f3l,
-            0xa81e3ab09159cab7l },
-          { 0xf13716efb47898cel,0xb7ee597c2e2d9044l,0x09396b90e6158276l,
-            0x5c644dc36a533fcel },
-          0 },
-        /* 64 << 32 */
-        { { 0xcca4428dbbe5a1a9l,0x8187fd5f3126bd67l,0x0036973a48105826l,
-            0xa39b6663b8bd61a0l },
-          { 0x6d42deef2d65a808l,0x4969044f94636b19l,0xf611ee47dd5d564cl,
-            0x7b2f3a49d2873077l },
-          0 },
-        /* 65 << 32 */
-        { { 0xbe4c16c3bf429668l,0xd32f56f0ef35db3bl,0xae8355de9ea4e3f1l,
-            0x8f66c4a2a450944el },
-          { 0xafab94c8b798fbe2l,0x18c57baff7f3d5cfl,0x692d191c5cfa5c7dl,
-            0xc0c25f69a689daebl },
-          0 },
-        /* 71 << 32 */
-        { { 0x15fb3ae398340d4cl,0xa8b9233a7de82134l,0x44971a545fc0dbc6l,
-            0xb2b4f0f3a1d3f094l },
-          { 0x8d9eaba1b6242bd4l,0xd8aad777787cc557l,0xb1ab8b7870d1a2bbl,
-            0x5d20f48cead3bfe3l },
-          0 },
-        /* 77 << 32 */
-        { { 0x4dacbf09a2bf9772l,0x969a4c4357aa8457l,0xadbe673b273ebfc5l,
-            0xb85582bb927778c9l },
-          { 0x748371855c03752cl,0xc337bc6bc2f60d11l,0x2c3838e4ad456a09l,
-            0xaf479c897e381842l },
-          0 },
-        /* 83 << 32 */
-        { { 0x8530ae751b1aea77l,0xf43b923ba8310cb9l,0x9c1a60c6bf4dd6c5l,
-            0x11885b863e3aaaa5l },
-          { 0x594a8fa90f69821el,0x1eece3d66bc37998l,0x1fd718f518df32bfl,
-            0x1c00c7d461d84082l },
-          0 },
-        /* 89 << 32 */
-        { { 0xd67ee3a4c763c3cfl,0x760b128305969234l,0x1a5ff331ec17f2d1l,
-            0x25f0392a84fecfefl },
-          { 0xb1bc004a3a80d47el,0xf450bf08182fee3bl,0xf11117681e19751el,
-            0x5b4127dae28ed23fl },
-          0 },
-        /* 95 << 32 */
-        { { 0x91e00defdaf08f09l,0x7ef41724f4738a07l,0x990fbbceaf1263fcl,
-            0x779121e3e6eeb5aal },
-          { 0x3e162c7a5a3ecf52l,0x73ae568a51be5faal,0x8bea1bfa451be8a9l,
-            0x3e8cd5db90e11097l },
-          0 },
-        /* 101 << 32 */
-        { { 0x90390f7224d27159l,0x685c139efd07e5d4l,0x4e21e44a3bc234a8l,
-            0x61b50f34eeb14dacl },
-          { 0x7beb0aa087555d58l,0x781326bcc806f0d2l,0xc289537a1eb7199fl,
-            0x44a31a037b42766el },
-          0 },
-        /* 107 << 32 */
-        { { 0x7d778206edde4b40l,0x34539fa18eb92fcdl,0x5a0bdd79bf52a552l,
-            0x066d3672fdcca75el },
-          { 0xd73fa893e28b5a5bl,0xb495135876c38698l,0x44469b0114ae16cfl,
-            0xb428c763691d6618l },
-          0 },
-        /* 113 << 32 */
-        { { 0x9022db8b69196353l,0x152ebb7dd7a4afd0l,0xea36fae57fcf1765l,
-            0xa8fc00ba0decea8al },
-          { 0x1047206a0c0b0414l,0x6607d8ade076df28l,0xf343e19966b8aba1l,
-            0x7f03c1ad311e208dl },
-          0 },
-        /* 116 << 32 */
-        { { 0xe6b4c96e888f3870l,0xa21bb618fe544042l,0x7122ee88bd817699l,
-            0xcb38ecebfa66e173l },
-          { 0x6ed5b3482c9cc05fl,0x591affc84ae0fd9el,0x7cf325ac6e7aaac0l,
-            0x2397c053d05e5be0l },
-          0 },
-        /* 119 << 32 */
-        { { 0x95363f61eaa96552l,0xe03bc6b38fb15b73l,0xa5c5808f2c389053l,
-            0xcd021e6c11b2030cl },
-          { 0x349ca9bdc038e30al,0x0a3368d4165afa2cl,0x043630debbfa1cc6l,
-            0xb8c4456ba7cdbf69l },
-          0 },
-        /* 125 << 32 */
-        { { 0x63aa3315fd7d2983l,0xaf4c96afa6a04bedl,0x3a5c0b5410814a74l,
-            0x9906f5e30f9b0770l },
-          { 0x622be6523676986fl,0x09ac5bc0173e7cb5l,0x1c40e56a502c8b3cl,
-            0xabb9a0f7253ce8f6l },
-          0 },
-    },
-    {
-        /* 0 << 40 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 40 */
-        { { 0x889f6d65533ef217l,0x7158c7e4c3ca2e87l,0xfb670dfbdc2b4167l,
-            0x75910a01844c257fl },
-          { 0xf336bf07cf88577dl,0x22245250e45e2acel,0x2ed92e8d7ca23d85l,
-            0x29f8be4c2b812f58l },
-          0 },
-        /* 3 << 40 */
-        { { 0xc51e414351facc61l,0xbaf2647de68a25bcl,0x8f5271a00ff872edl,
-            0x8f32ef993d2d9659l },
-          { 0xca12488c7593cbd4l,0xed266c5d02b82fabl,0x0a2f78ad14eb3f16l,
-            0xc34049484d47afe3l },
-          0 },
-        /* 4 << 40 */
-        { { 0xa6f3d574c005979dl,0xc2072b426a40e350l,0xfca5c1568de2ecf9l,
-            0xa8c8bf5ba515344el },
-          { 0x97aee555114df14al,0xd4374a4dfdc5ec6bl,0x754cc28f2ca85418l,
-            0x71cb9e27d3c41f78l },
-          0 },
-        /* 5 << 40 */
-        { { 0x09c1670209470496l,0xa489a5edebd23815l,0xc4dde4648edd4398l,
-            0x3ca7b94a80111696l },
-          { 0x3c385d682ad636a4l,0x6702702508dc5f1el,0x0c1965deafa21943l,
-            0x18666e16610be69el },
-          0 },
-        /* 7 << 40 */
-        { { 0x45beb4ca2a604b3bl,0x56f651843a616762l,0xf52f5a70978b806el,
-            0x7aa3978711dc4480l },
-          { 0xe13fac2a0e01fabcl,0x7c6ee8a5237d99f9l,0x251384ee05211ffel,
-            0x4ff6976d1bc9d3ebl },
-          0 },
-        /* 9 << 40 */
-        { { 0xdde0492316e043a2l,0x98a452611dd3d209l,0xeaf9f61bd431ebe8l,
-            0x00919f4dbaf56abdl },
-          { 0xe42417db6d8774b1l,0x5fc5279c58e0e309l,0x64aa40613adf81eal,
-            0xef419edabc627c7fl },
-          0 },
-        /* 10 << 40 */
-        { { 0x3919759239ef620fl,0x9d47284074fa29c4l,0x4e428fa39d416d83l,
-            0xd1a7c25129f30269l },
-          { 0x46076e1cd746218fl,0xf3ad6ee8110d967el,0xfbb5f434a00ae61fl,
-            0x3cd2c01980d4c929l },
-          0 },
-        /* 11 << 40 */
-        { { 0xfa24d0537a4af00fl,0x3f938926ca294614l,0x0d700c183982182el,
-            0x801334434cc59947l },
-          { 0xf0397106ec87c925l,0x62bd59fc0ed6665cl,0xe8414348c7cca8b5l,
-            0x574c76209f9f0a30l },
-          0 },
-        /* 13 << 40 */
-        { { 0x95be42e2bb8b6a07l,0x64be74eeca23f86al,0xa73d74fd154ce470l,
-            0x1c2d2857d8dc076al },
-          { 0xb1fa1c575a887868l,0x38df8e0b3de64818l,0xd88e52f9c34e8967l,
-            0x274b4f018b4cc76cl },
-          0 },
-        /* 15 << 40 */
-        { { 0x3f5c05b4f8b7559dl,0x0be4c7acfae29200l,0xdd6d3ef756532accl,
-            0xf6c3ed87eea7a285l },
-          { 0xe463b0a8f46ec59bl,0x531d9b14ecea6c83l,0x3d6bdbafc2dc836bl,
-            0x3ee501e92ab27f0bl },
-          0 },
-        /* 16 << 40 */
-        { { 0x8df275455922ac1cl,0xa7b3ef5ca52b3f63l,0x8e77b21471de57c4l,
-            0x31682c10834c008bl },
-          { 0xc76824f04bd55d31l,0xb6d1c08617b61c71l,0x31db0903c2a5089dl,
-            0x9c092172184e5d3fl },
-          0 },
-        /* 17 << 40 */
-        { { 0x7b1a921ea6b3340bl,0x6d7c4d7d7438a53el,0x2b9ef73c5bf71d8fl,
-            0xb5f6e0182b167a7cl },
-          { 0x5ada98ab0ce536a3l,0xee0f16f9e1fea850l,0xf6424e9d74f1c0c5l,
-            0x4d00de0cd3d10b41l },
-          0 },
-        /* 19 << 40 */
-        { { 0xd542f522a6533610l,0xfdde15a734ec439al,0x696560fedc87dd0dl,
-            0x69eab421e01fd05fl },
-          { 0xca4febdc95cc5988l,0x839be396c44d92fbl,0x7bedff6daffe543bl,
-            0xd2bb97296f6da43al },
-          0 },
-        /* 21 << 40 */
-        { { 0x5bc6dea80b8d0077l,0xb2adf5d1ea9c49efl,0x7104c20eaafe8659l,
-            0x1e3604f37866ee7el },
-          { 0x0cfc7e7b3075c8c5l,0x5281d9bb639c5a2bl,0xcbdf42494bc44ee3l,
-            0x835ab066655e9209l },
-          0 },
-        /* 23 << 40 */
-        { { 0x78fbda4b90b94ffal,0x447e52eb7beb993cl,0x920011bc92620d15l,
-            0x7bad6ecf481fd396l },
-          { 0xad3bd28ba989a09el,0x20491784a3e62b78l,0xcdcd7096b07bd9efl,
-            0x9bf5bb7337d780adl },
-          0 },
-        /* 25 << 40 */
-        { { 0xbe911a71a976c8d4l,0xba0346743fdd778el,0x2359e7434cf87ea1l,
-            0x8dccf65f07ebb691l },
-          { 0x6c2c18eb09746d87l,0x6a19945fd2ecc8fal,0xc67121ff2ffa0339l,
-            0x408c95ba9bd9fc31l },
-          0 },
-        /* 27 << 40 */
-        { { 0xa317204bcaa5da39l,0xd390df7468bf53d7l,0x56de18b2dbd71c0dl,
-            0xcb4d3bee75184779l },
-          { 0x815a219499d920a5l,0x9e10fb4ecf3d3a64l,0x7fd4901dfe92e1eel,
-            0x5d86d10d3ab87b2el },
-          0 },
-        /* 28 << 40 */
-        { { 0x24f2a692840bb336l,0x7c353bdca669fa7bl,0xda20d6fcdec9c300l,
-            0x625fbe2fa13a4f17l },
-          { 0xa2b1b61adbc17328l,0x008965bfa9515621l,0x49690939c620ff46l,
-            0x182dd27d8717e91cl },
-          0 },
-        /* 29 << 40 */
-        { { 0x98e9136c878303e4l,0x2769e74fd1e65efdl,0x6154c545809da56el,
-            0x8c5d50a04301638cl },
-          { 0x10f3d2068214b763l,0x2da9a2fc44df0644l,0xca912bab588a6fcdl,
-            0xe9e82d9b227e1932l },
-          0 },
-        /* 31 << 40 */
-        { { 0xcbdc4d66d080e55bl,0xad3f11e5b8f98d6bl,0x31bea68e18a32480l,
-            0xdf1c6fd52c1bcf6el },
-          { 0xadcda7ee118a3f39l,0xbd02f857ac060d5fl,0xd2d0265d86631997l,
-            0xb866a7d33818f2d4l },
-          0 },
-        /* 33 << 40 */
-        { { 0xfbcce2d31892d98dl,0x2e34bc9507de73dcl,0x3a48d1a94891eec1l,
-            0xe64499c24d31060bl },
-          { 0xe9674b7149745520l,0xf126ccaca6594a2cl,0x33e5c1a079945342l,
-            0x02aa0629066e061fl },
-          0 },
-        /* 34 << 40 */
-        { { 0xdfd7c0ae7af3191el,0x923ec111d68c70d9l,0xb6f1380bb675f013l,
-            0x9192a224f23d45bal },
-          { 0xbe7890f9524891e3l,0x45b24c47eba996bbl,0x59331e48320447e9l,
-            0x0e4d8753ac9afad4l },
-          0 },
-        /* 35 << 40 */
-        { { 0x49e49c38c9f5a6c3l,0x3f5eea44d8ee2a65l,0x02bf3e761c74bbb4l,
-            0x50d291cdef565571l },
-          { 0xf4edc290a36dd5fal,0x3015df9556dd6b85l,0x4494926aa5549a16l,
-            0x5de6c59390399e4al },
-          0 },
-        /* 36 << 40 */
-        { { 0x29be11c6ce800998l,0x72bb1752b90360d9l,0x2c1931975a4ad590l,
-            0x2ba2f5489fc1dbc0l },
-          { 0x7fe4eebbe490ebe0l,0x12a0a4cd7fae11c0l,0x7197cf81e903ba37l,
-            0xcf7d4aa8de1c6dd8l },
-          0 },
-        /* 37 << 40 */
-        { { 0x961fa6317e249e7bl,0x5c4f707796caed50l,0x6b176e62d7e50885l,
-            0x4dd5de72f390cbecl },
-          { 0x91fa29954b2bd762l,0x80427e6395b8dadel,0xd565bf1de2c34743l,
-            0x911da39d16e6c841l },
-          0 },
-        /* 39 << 40 */
-        { { 0x48365465802ff016l,0x6d2a561f71beece6l,0xdd299ce6f9707052l,
-            0x62a32698a23407bbl },
-          { 0x1d55bdb147004afbl,0xfadec124369b1084l,0x1ce78adf291c89f7l,
-            0x9f2eaf03278bc529l },
-          0 },
-        /* 40 << 40 */
-        { { 0x92af6bf43fd5684cl,0x2b26eecf80360aa1l,0xbd960f3000546a82l,
-            0x407b3c43f59ad8fel },
-          { 0x86cae5fe249c82bal,0x9e0faec72463744cl,0x87f551e894916272l,
-            0x033f93446ceb0615l },
-          0 },
-        /* 41 << 40 */
-        { { 0x04658ad212dba0cel,0x9e600624068822f0l,0x84661f11b26d368bl,
-            0xbca867d894ebb87al },
-          { 0x79506dc42f1bad89l,0x1a8322d3ebcbe7a1l,0xb4f1e102ac197178l,
-            0x29a950b779f7198cl },
-          0 },
-        /* 43 << 40 */
-        { { 0x19a6fb0984a3d1d5l,0x6c75c3a2ba5f5307l,0x7983485bf9698447l,
-            0x689f41b88b1cdc1el },
-          { 0x18f6fbd74c1979d0l,0x3e6be9a27a0b6708l,0x06acb615f63d5a8al,
-            0x8a817c098d0f64b1l },
-          0 },
-        /* 44 << 40 */
-        { { 0x1e5eb0d18be82e84l,0x89967f0e7a582fefl,0xbcf687d5a6e921fal,
-            0xdfee4cf3d37a09bal },
-          { 0x94f06965b493c465l,0x638b9a1c7635c030l,0x7666786466f05e9fl,
-            0xccaf6808c04da725l },
-          0 },
-        /* 45 << 40 */
-        { { 0xa9b3479b1b53a173l,0xc041eda3392eddc0l,0xdb8f804755edd7eel,
-            0xaf1f7a37ab60683cl },
-          { 0x9318603a72c0accbl,0xab1bb9fe401cbf3cl,0xc40e991e88afe245l,
-            0x9298a4580d06ac35l },
-          0 },
-        /* 46 << 40 */
-        { { 0x58e127d5036c2fe7l,0x5fe5020555b93361l,0xc1373d850f74a045l,
-            0x28cd79dbe8228e4bl },
-          { 0x0ae82320c2018d9al,0xf6d0049c78f8016al,0x381b6fe2149b31fbl,
-            0x33a0e8adec3cfbcfl },
-          0 },
-        /* 47 << 40 */
-        { { 0x23a6612e9eab5da7l,0xb645fe29d94d6431l,0xe3d74594ca1210c4l,
-            0xdc1376bceeca0674l },
-          { 0xfd40dfef657f0154l,0x7952a548d52cbac5l,0x0ee189583685ad28l,
-            0xd13639409ba9ca46l },
-          0 },
-        /* 48 << 40 */
-        { { 0xca2eb690768fccfcl,0xf402d37db835b362l,0x0efac0d0e2fdfccel,
-            0xefc9cdefb638d990l },
-          { 0x2af12b72d1669a8bl,0x33c536bc5774ccbdl,0x30b21909fb34870el,
-            0xc38fa2f77df25acal },
-          0 },
-        /* 49 << 40 */
-        { { 0x1337902f1c982cd6l,0x222e08fe14ec53eal,0x6c8abd0d330ef3e5l,
-            0xeb59e01531f6fd9dl },
-          { 0xd74ae554a8532df4l,0xbc010db1ab44c83el,0xe98016561b8f9285l,
-            0x65a9612783acc546l },
-          0 },
-        /* 51 << 40 */
-        { { 0x36a8b0a76770cfb1l,0x3338d52f9bb578fcl,0x5136c785f5ed12a4l,
-            0x652d47ed87bf129el },
-          { 0x9c6c827e6067c2d0l,0x61fc2f410345533al,0x2d7fb182130cea19l,
-            0x71a0186330b3ef85l },
-          0 },
-        /* 52 << 40 */
-        { { 0x74c5f02bbf81f3f5l,0x0525a5aeaf7e4581l,0x88d2aaba433c54ael,
-            0xed9775db806a56c5l },
-          { 0xd320738ac0edb37dl,0x25fdb6ee66cc1f51l,0xac661d1710600d76l,
-            0x931ec1f3bdd1ed76l },
-          0 },
-        /* 53 << 40 */
-        { { 0xb81e239161faa569l,0xb379f759bb40eebfl,0x9f2fd1b2a2c54549l,
-            0x0a968f4b0d6ba0ael },
-          { 0xaa869e6eedfe8c75l,0x0e36b298645ab173l,0x5a76282b0bcdefd7l,
-            0x9e949331d05293f2l },
-          0 },
-        /* 55 << 40 */
-        { { 0xc1cfa9a1c59fac6el,0x2648bffcb72747cel,0x5f8a39805f2e2637l,
-            0x8bd3a8eb73e65758l },
-          { 0xd9c43f1df14381a7l,0xecc1c3b0d6a86c10l,0xffcf4fa8a4a6dc74l,
-            0x7304fa834cea0a46l },
-          0 },
-        /* 57 << 40 */
-        { { 0x4460760c34dca952l,0xeac9cf2444c70444l,0xb879297b8493c87el,
-            0x295941a54b2dccb7l },
-          { 0x1e5cecede58721cdl,0xc8b58db74ca0d12bl,0x1927965c6da1d034l,
-            0x7220b02839ed1369l },
-          0 },
-        /* 59 << 40 */
-        { { 0xc38746c83c2e34b6l,0x9f27362e38a51042l,0x26febec02067afebl,
-            0xd9c4e15544e7371fl },
-          { 0x6035f469f92930d1l,0xe6ed7c08b4431b8bl,0xa25bf5903e16410dl,
-            0x147d83368adf4c18l },
-          0 },
-        /* 60 << 40 */
-        { { 0x7f01c9ecaa80ba59l,0x3083411a68538e51l,0x970370f1e88128afl,
-            0x625cc3db91dec14bl },
-          { 0xfef9666c01ac3107l,0xb2a8d577d5057ac3l,0xb0f2629992be5df7l,
-            0xf579c8e500353924l },
-          0 },
-        /* 61 << 40 */
-        { { 0xbd9398d6ca02669fl,0x896e053bf9ad11a1l,0xe024b699a3556f9fl,
-            0x23b4b96ad53cbca3l },
-          { 0x549d2d6c89733dd6l,0x3dae193f394f3179l,0x8bf7ec1cdfeda825l,
-            0xf6a1db7a8a4844b4l },
-          0 },
-        /* 63 << 40 */
-        { { 0x3b5403d56437a027l,0xda32bbd233ed30aal,0xd2ad3baa906de0cal,
-            0x3b6df514533f736el },
-          { 0x986f1cab5df9b9c4l,0x41cd2088970d330el,0xaae7c2238c20a923l,
-            0x52760a6e1e951dc0l },
-          0 },
-        /* 64 << 40 */
-        { { 0xb8fa3d931341ed7al,0x4223272ca7b59d49l,0x3dcb194783b8c4a4l,
-            0x4e413c01ed1302e4l },
-          { 0x6d999127e17e44cel,0xee86bf7533b3adfbl,0xf6902fe625aa96cal,
-            0xb73540e4e5aae47dl },
-          0 },
-        /* 65 << 40 */
-        { { 0x55318a525e34036cl,0xc3acafaaf9884e3fl,0xe5ba15cea042ba04l,
-            0x56a1d8960ada550el },
-          { 0xa5198cae87b76764l,0xd079d1f0b6fd84fbl,0xb22b637bcbe363edl,
-            0xbe8ab7d64499deaal },
-          0 },
-        /* 71 << 40 */
-        { { 0xbe8eba5eb4925f25l,0x00f8bf582e3159d6l,0xb1aa24fa18856070l,
-            0x22ea8b74e4c30b22l },
-          { 0x512f633e55bbe4e8l,0x82ba62318678aee9l,0xea05da90fdf72b7el,
-            0x616b9bc7a4fc65eel },
-          0 },
-        /* 77 << 40 */
-        { { 0xe31ee3b3b7c221e7l,0x10353824e353fa43l,0x9d2f3df69dd2a86fl,
-            0x8a12ab9322ccffecl },
-          { 0x25c8e326d666f9e5l,0x33ea98a0598da7fbl,0x2fc1de0917f74e17l,
-            0x0d0b6c7a35efb211l },
-          0 },
-        /* 83 << 40 */
-        { { 0x22a82c6c804e6ecel,0x824a170b1d8fce9el,0x621802becee65ed0l,
-            0x4a4e9e7895ec4285l },
-          { 0x8da0988fa8940b7al,0xaff89c5b86445aa5l,0x386fdbdad689cde9l,
-            0x3aeaae7d9f5caaccl },
-          0 },
-        /* 89 << 40 */
-        { { 0xe9cb9e68a7b62f4cl,0x515cae0ec3b7092el,0xb8abec354b491f52l,
-            0x672673fd01eeabc1l },
-          { 0x65e5739f7ad6e8a1l,0xc2da8e003d91b2f9l,0xcc43229cced84319l,
-            0x0f8cbf9574ccf2d1l },
-          0 },
-        /* 95 << 40 */
-        { { 0xb03d1cfb1b2f872al,0x88aef4670872b6f7l,0xaafe55e48ea9170cl,
-            0xd5cc4875f24aa689l },
-          { 0x7e5732908458ce84l,0xef4e143d58bfc16dl,0xc58626efaa222836l,
-            0x01c60ec0ca5e0cb8l },
-          0 },
-        /* 101 << 40 */
-        { { 0x123901aa36337c09l,0x1697acadd2f5e675l,0xc0a1ddd022fe2bael,
-            0xf68ea88cff0210ddl },
-          { 0x665d11e014168709l,0x912a575f45f25321l,0x7e7ed38070c78934l,
-            0x663d692cb0a46322l },
-          0 },
-        /* 107 << 40 */
-        { { 0x912ab8bd8642cba4l,0x97fab1a3b6b50b73l,0x76666b3cb86ef354l,
-            0x16d41330fa5ecce9l },
-          { 0x77c7c138c7da404bl,0xc6508cb78c983fb0l,0xe5881733f9004984l,
-            0x76dea7794182c7abl },
-          0 },
-        /* 113 << 40 */
-        { { 0x16db18583556b765l,0x39c18c200263755al,0x7b6691f591c15201l,
-            0x4e4c17b168514ea9l },
-          { 0xacbe449e06f5f20al,0xeb9119d2541ddfb6l,0x2f6e687bf2eac86fl,
-            0xb161471ec14ac508l },
-          0 },
-        /* 116 << 40 */
-        { { 0x58846d32c4744733l,0x40517c71379f9e34l,0x2f65655f130ef6cal,
-            0x526e4488f1f3503fl },
-          { 0x8467bd177ee4a976l,0x1d9dc913921363d1l,0xd8d24c33b069e041l,
-            0x5eb5da0a2cdf7f51l },
-          0 },
-        /* 119 << 40 */
-        { { 0x81c2cc32951ab3e7l,0xc86d9a109b0c7e87l,0x0b7a18bd606ef408l,
-            0x099b5bbfe6c2251el },
-          { 0x46d627d0bfce880fl,0xbfaddcbbe1c6865al,0xa9ab6183d2bb9a00l,
-            0x23cb9a2720ad9789l },
-          0 },
-        /* 125 << 40 */
-        { { 0x1592d0630c25fbebl,0x13869ec24995a3fal,0x6413f494861d0a73l,
-            0xa3b782342f9f1b89l },
-          { 0x113689e2b6cad351l,0x53be2014a873dcc1l,0xccf405e0c6bb1be7l,
-            0x4fff7b4ca9061ca9l },
-          0 },
-    },
-    {
-        /* 0 << 48 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 48 */
-        { { 0xcc7a64880a750c0fl,0x39bacfe34e548e83l,0x3d418c760c110f05l,
-            0x3e4daa4cb1f11588l },
-          { 0x2733e7b55ffc69ffl,0x46f147bc92053127l,0x885b2434d722df94l,
-            0x6a444f65e6fc6b7cl },
-          0 },
-        /* 3 << 48 */
-        { { 0x6d0b16f4bdaedfbdl,0x23fd326086746cedl,0x8bfb1d2fff4b3e17l,
-            0xc7f2ec2d019c14c8l },
-          { 0x3e0832f245104b0dl,0x5f00dafbadea2b7el,0x29e5cf6699fbfb0fl,
-            0x264f972361827cdal },
-          0 },
-        /* 4 << 48 */
-        { { 0x97b14f7ea90567e6l,0x513257b7b6ae5cb7l,0x85454a3c9f10903dl,
-            0xd8d2c9ad69bc3724l },
-          { 0x38da93246b29cb44l,0xb540a21d77c8cbacl,0x9bbfe43501918e42l,
-            0xfffa707a56c3614el },
-          0 },
-        /* 5 << 48 */
-        { { 0x6eb1a2f3e30bc27fl,0xe5f0c05ab0836511l,0x4d741bbf4965ab0el,
-            0xfeec41ca83464bbdl },
-          { 0x1aca705f99d0b09fl,0xc5d6cc56f42da5fal,0x49964eddcc52b931l,
-            0x8ae59615c884d8d8l },
-          0 },
-        /* 7 << 48 */
-        { { 0xf634b57b39f8868al,0xe27f4fd475cc69afl,0xa47e58cbd0d5496el,
-            0x8a26793fd323e07fl },
-          { 0xc61a9b72fa30f349l,0x94c9d9c9b696d134l,0x792beca85880a6d1l,
-            0xbdcc4645af039995l },
-          0 },
-        /* 9 << 48 */
-        { { 0xce7ef8e58c796c3cl,0x9adaae84dd66e57al,0x784ae13e45227f33l,
-            0xb046c5b82a85e757l },
-          { 0xb7aa50aeec37631fl,0xbedc4fca3b300758l,0x0f82567e0ac9700bl,
-            0x1071d9d44ff5f8d2l },
-          0 },
-        /* 10 << 48 */
-        { { 0x61360ee99e240d18l,0x057cdcacb4b94466l,0xe7667cd12fe5325cl,
-            0x1fa297b521974e3bl },
-          { 0xfa4081e7db083d76l,0x31993be6f206bd15l,0x8949269b14c19f8cl,
-            0x21468d72a9d92357l },
-          0 },
-        /* 11 << 48 */
-        { { 0xd09ef6c4e51a2811l,0x39f6862bb8fb66b9l,0x64e77f8d22dfaa99l,
-            0x7b10504461b08aacl },
-          { 0x71704e4c4a7df332l,0xd09734342ffe015bl,0xab0eaf4408d3020el,
-            0x28b1909eed63b97al },
-          0 },
-        /* 13 << 48 */
-        { { 0x2f3fa882cdadcd4fl,0xa4ef68595f631995l,0xe52ca2f9e531766fl,
-            0x20af5c3057e2c1d3l },
-          { 0x1e4828f6e51e94b8l,0xf900a1751a2f5d4fl,0xe831adb3392c58a0l,
-            0x4c5a90ca1b6e5866l },
-          0 },
-        /* 15 << 48 */
-        { { 0x5f3dcba86182827cl,0xd1a448ddbd7e7252l,0x2d8f96fcf493b815l,
-            0xba0a4c263b0aa95fl },
-          { 0x88a1514063a0007fl,0x9564c25e6a9c5846l,0x5a4d7b0fdc0fcbcal,
-            0x2275daa33f8a740el },
-          0 },
-        /* 16 << 48 */
-        { { 0x83f49167ceca9754l,0x426d2cf64b7939a0l,0x2555e355723fd0bfl,
-            0xa96e6d06c4f144e2l },
-          { 0x4768a8dd87880e61l,0x15543815e508e4d5l,0x09d7e772b1b65e15l,
-            0x63439dd6ac302fa0l },
-          0 },
-        /* 17 << 48 */
-        { { 0x159591cc0461086bl,0xb695aa9495e66e51l,0x2d4c946779ded531l,
-            0xbd2482ba89c2be79l },
-          { 0x8ee2658aa20bbf19l,0xc000528a32247917l,0xd924be4affeae845l,
-            0x51312bebed992c8bl },
-          0 },
-        /* 19 << 48 */
-        { { 0x3a01b958dc752bd9l,0x2babdbc20c215d45l,0xe689d79a131641c1l,
-            0x48e8f0da80e05ed4l },
-          { 0x4b505feb77bb70c4l,0xefbd3e2bb6057ef7l,0x7583e22dce603ca5l,
-            0xfbe3b1f22c5c70c7l },
-          0 },
-        /* 21 << 48 */
-        { { 0x8ec1ecf029e5e35al,0x2f3168e58645c2b3l,0xe9297362c7f94cb2l,
-            0x4fbf1466d1c90b39l },
-          { 0x3e4f7656920bae2al,0x805d04b9f1beb172l,0x729a7208dbdbd4b4l,
-            0x1aade45687aeca53l },
-          0 },
-        /* 23 << 48 */
-        { { 0xb0ff1f541934a508l,0x19e1397604bbf31al,0xb2a8e6033717a6b4l,
-            0xd601e45d0ef12cb9l },
-          { 0x563f0af5b515e98el,0x9b129db633984f9bl,0xe34aba2fa47e4a65l,
-            0xb56f82d19e3f9d82l },
-          0 },
-        /* 25 << 48 */
-        { { 0x0203effdb1209b86l,0x21f063edb19d6cbfl,0x59f53476980f275bl,
-            0x202456d7b7ac5e80l },
-          { 0xe5a8c05f4900edc9l,0x04c08eb470f01e86l,0xf74ac2241dcd98cel,
-            0x7e77cc0ce2e830dbl },
-          0 },
-        /* 27 << 48 */
-        { { 0x74e37234a9747edel,0x4fc9fbb1361b1013l,0xe7b533733cf357efl,
-            0x6aa2dd2c991c4193l },
-          { 0x7887e4d2a770917al,0xdd1809b4c20d24cbl,0x004cd7c38e9c2d3el,
-            0xc77c5baba9970abel },
-          0 },
-        /* 28 << 48 */
-        { { 0x20ac0351d598d710l,0x272c4166cb3a4da4l,0xdb82fe1aca71de1fl,
-            0x746e79f2d8f54b0fl },
-          { 0x6e7fc7364b573e9bl,0x75d03f46fd4b5040l,0x5c1cc36d0b98d87bl,
-            0x513ba3f11f472da1l },
-          0 },
-        /* 29 << 48 */
-        { { 0x52927eaac3af237fl,0xfaa06065d7398767l,0x042e72b497c6ce0bl,
-            0xdaed0cc40a9f2361l },
-          { 0xddc2e11c2fc1bb4al,0x631da5770c1a9ef8l,0x8a4cfe44680272bfl,
-            0xc76b9f7262fb5cc3l },
-          0 },
-        /* 31 << 48 */
-        { { 0x248f814538b3aae3l,0xb5345864bc204334l,0x66d6b5bc1d127524l,
-            0xe312080d14f572d3l },
-          { 0x13ed15a716abafebl,0x6f18ce27dba967bel,0x96c9e826ef08552dl,
-            0x2c191b06be2b63e0l },
-          0 },
-        /* 33 << 48 */
-        { { 0xde4be45dc115ca51l,0xa028cafe934dabd6l,0x7e875663d1c0f8c5l,
-            0xa8e32ab063d17473l },
-          { 0x33f55bd5543199aal,0x79d2c937a2071d6el,0xa6a6758ceff16f28l,
-            0x9c5f93ef87d85201l },
-          0 },
-        /* 34 << 48 */
-        { { 0x7f2e440381e9ede3l,0x243c3894caf6df0al,0x7c605bb11c073b11l,
-            0xcd06a541ba6a4a62l },
-          { 0x2916894949d4e2e5l,0x33649d074af66880l,0xbfc0c885e9a85035l,
-            0xb4e52113fc410f4bl },
-          0 },
-        /* 35 << 48 */
-        { { 0xe86f21bc3ad4c81el,0x53b408403a37dcebl,0xaa606087383402cdl,
-            0xc248caf185452b1dl },
-          { 0x38853772576b57cdl,0xe2798e5441b7a6edl,0x7c2f1eed95ef4a33l,
-            0xccd7e776adb1873cl },
-          0 },
-        /* 36 << 48 */
-        { { 0xdca3b70678a6513bl,0x92ea4a2a9edb1943l,0x02642216db6e2dd8l,
-            0x9b45d0b49fd57894l },
-          { 0x114e70dbc69d11ael,0x1477dd194c57595fl,0xbc2208b4ec77c272l,
-            0x95c5b4d7db68f59cl },
-          0 },
-        /* 37 << 48 */
-        { { 0xd978bb791c61030al,0xa47325d2218222f3l,0x65ad4d4832e67d97l,
-            0x31e4ed632e0d162al },
-          { 0x7308ea317f76da37l,0xcfdffe87d93f35d8l,0xf4b2d60ee6f96cc4l,
-            0x8028f3bd0117c421l },
-          0 },
-        /* 39 << 48 */
-        { { 0x7df80cbb9543edb6l,0xa07a54df40b0b3bcl,0xacbd067cc1888488l,
-            0x61ad61318a00c721l },
-          { 0x67e7599ebe2e6fe6l,0x8349d568f7270e06l,0x5630aabc307bc0c7l,
-            0x97210b3f71af442fl },
-          0 },
-        /* 40 << 48 */
-        { { 0xfe541fa47ea67c77l,0x952bd2afe3ea810cl,0x791fef568d01d374l,
-            0xa3a1c6210f11336el },
-          { 0x5ad0d5a9c7ec6d79l,0xff7038af3225c342l,0x003c6689bc69601bl,
-            0x25059bc745e8747dl },
-          0 },
-        /* 41 << 48 */
-        { { 0x58bdabb7ef701b5fl,0x64f987aee00c3a96l,0x533b391e2d585679l,
-            0x30ad79d97a862e03l },
-          { 0xd941471e8177b261l,0x33f65cb856a9018el,0x985ce9f607759fc4l,
-            0x9b085f33aefdbd9el },
-          0 },
-        /* 43 << 48 */
-        { { 0xab2fa51a9c43ee15l,0x457f338263f30575l,0xce8dcd863e75a6e0l,
-            0x67a03ab86e70421al },
-          { 0xe72c37893e174230l,0x45ffff6c066f4816l,0x3a3dd84879a2d4a7l,
-            0xefa4b7e68b76c24cl },
-          0 },
-        /* 44 << 48 */
-        { { 0x9a75c80676cb2566l,0x8f76acb1b24892d9l,0x7ae7b9cc1f08fe45l,
-            0x19ef73296a4907d8l },
-          { 0x2db4ab715f228bf0l,0xf3cdea39817032d7l,0x0b1f482edcabe3c0l,
-            0x3baf76b4bb86325cl },
-          0 },
-        /* 45 << 48 */
-        { { 0xd6be8f00e39e056al,0xb58f87a6232fa3bcl,0xd5cb09dc6b18c772l,
-            0x3177256da8e7e17bl },
-          { 0x1877fd34230bf92cl,0x6f9031175a36f632l,0x526a288728e2c9d9l,
-            0xc373fc94415ec45cl },
-          0 },
-        /* 46 << 48 */
-        { { 0xd49065e010089465l,0x3bab5d298e77c596l,0x7636c3a6193dbd95l,
-            0xdef5d294b246e499l },
-          { 0xb22c58b9286b2475l,0xa0b93939cd80862bl,0x3002c83af0992388l,
-            0x6de01f9beacbe14cl },
-          0 },
-        /* 47 << 48 */
-        { { 0x70fa6e2a2bf5e373l,0x501691739271694cl,0xd6ebb98c5d2ed9f1l,
-            0x11fd0b3f225bf92dl },
-          { 0x51ffbcea1e3d5520l,0xa7c549875513ad47l,0xe9689750b431d46dl,
-            0x6e69fecbb620cb9al },
-          0 },
-        /* 48 << 48 */
-        { { 0x6aac688eadd70482l,0x708de92a7b4a4e8al,0x75b6dd73758a6eefl,
-            0xea4bf352725b3c43l },
-          { 0x10041f2c87912868l,0xb1b1be95ef09297al,0x19ae23c5a9f3860al,
-            0xc4f0f839515dcf4bl },
-          0 },
-        /* 49 << 48 */
-        { { 0xf3c22398e04b5734l,0x4fba59b275f2579dl,0xbf95182d691901b3l,
-            0x4c139534eb599496l },
-          { 0xf3f821de33b77e8bl,0x66e580743785d42fl,0xe3ba3d5abdc89c2dl,
-            0x7ee988bdd19f37b9l },
-          0 },
-        /* 51 << 48 */
-        { { 0xe9ba62ca2ee53eb0l,0x64295ae23401d7dal,0x70ed8be24e493580l,
-            0x702caa624502732fl },
-          { 0xb1f4e21278d0cedfl,0x130b114bdc97057bl,0x9c5d0bd3c38c77b5l,
-            0xd9d641e18bad68e7l },
-          0 },
-        /* 52 << 48 */
-        { { 0xc71e27bf8538a5c6l,0x195c63dd89abff17l,0xfd3152851b71e3dal,
-            0x9cbdfda7fa680fa0l },
-          { 0x9db876ca849d7eabl,0xebe2764b3c273271l,0x663357e3f208dceal,
-            0x8c5bd833565b1b70l },
-          0 },
-        /* 53 << 48 */
-        { { 0x7c2dea1d122aebd4l,0x090bee4a138c1e4dl,0x94a9ffe59e4aca6cl,
-            0x8f3212ba5d405c7fl },
-          { 0x6618185f180b5e85l,0x76298d46f455ab9fl,0x0c804076476b2d88l,
-            0x45ea9d03d5a40b39l },
-          0 },
-        /* 55 << 48 */
-        { { 0xdf325ac76a2ed772l,0x35da47ccb0da2765l,0x94ce6f460bc9b166l,
-            0xe0fc82fb5f7f3628l },
-          { 0x2b26d588c055f576l,0xb9d37c97ec2bae98l,0xffbbead856908806l,
-            0xa8c2df87437f4c84l },
-          0 },
-        /* 57 << 48 */
-        { { 0x47d11c3528430994l,0x0183df71cf13d9d3l,0x98604c89aa138fe5l,
-            0xb1432e1c32c09aa1l },
-          { 0xf19bc45d99bd5e34l,0xb198be72108e9b89l,0xee500ae9dacde648l,
-            0x5936cf98746870a9l },
-          0 },
-        /* 59 << 48 */
-        { { 0x6d8efb98ed1d5a9bl,0x2e0b08e697f778fal,0xda728454dc5e0835l,
-            0x2c28a45f8e3651c4l },
-          { 0x667fab6f7ee77088l,0xd94429c8f29a94b4l,0xd83d594d9deea5b2l,
-            0x2dc08ccbbea58080l },
-          0 },
-        /* 60 << 48 */
-        { { 0xba5514df3fd165e8l,0x499fd6a9061f8811l,0x72cd1fe0bfef9f00l,
-            0x120a4bb979ad7e8al },
-          { 0xf2ffd0955f4a5ac5l,0xcfd174f195a7a2f0l,0xd42301ba9d17baf1l,
-            0xd2fa487a77f22089l },
-          0 },
-        /* 61 << 48 */
-        { { 0xfb5f53ba20a9a01el,0x3adb174fd20d6a9cl,0x6db8bb6d80e0f64fl,
-            0x596e428df6a26f76l },
-          { 0xbab1f846e6a4e362l,0x8bdb22af9b1becbdl,0x62b48335f31352adl,
-            0xd72c26409634f727l },
-          0 },
-        /* 63 << 48 */
-        { { 0xaaa61cb22b1ec1c3l,0x3b5156722cb6f00el,0x67d1be0a8bf83f60l,
-            0x88f1627aa4b804bcl },
-          { 0xc52b11a7cdade2abl,0xa6a8b71a606a4e9dl,0x04e0e6697b900551l,
-            0x35cfa33c8d5ad0d2l },
-          0 },
-        /* 64 << 48 */
-        { { 0xb93452381d531696l,0x57201c0088cdde69l,0xdde922519a86afc7l,
-            0xe3043895bd35cea8l },
-          { 0x7608c1e18555970dl,0x8267dfa92535935el,0xd4c60a57322ea38bl,
-            0xe0bf7977804ef8b5l },
-          0 },
-        /* 65 << 48 */
-        { { 0x375ca189b60f0d5al,0xc9458cf949a78362l,0x61c1c5024262c03al,
-            0x299353db4363d5bel },
-          { 0xe3565124dac407fel,0x16ea66cd5b93c532l,0xe5c6aec2749df8e3l,
-            0x59181317ce3ee4bfl },
-          0 },
-        /* 71 << 48 */
-        { { 0xd46ea34af41c2a3cl,0x9936184916545c98l,0xd7cb800ccf2498b4l,
-            0xe71d088d9353fe87l },
-          { 0x43443cbeae2e172cl,0x77131656ca905cb3l,0x76471fd1dce63594l,
-            0x346b1d1738f5e264l },
-          0 },
-        /* 77 << 48 */
-        { { 0x22b1e639f6d0a419l,0x8bbb1fad7cea278cl,0xf07f6c01370cc86al,
-            0x661bd027d39b837fl },
-          { 0x042c7a69de606098l,0x93433b154e44eb12l,0x20f44ada88d8bfe8l,
-            0xb44f66e64ccbfab6l },
-          0 },
-        /* 83 << 48 */
-        { { 0x1cc32158583d9745l,0x9306223cad1c2201l,0x76aa8d0995748039l,
-            0x29425391707e9b59l },
-          { 0x8501c0d4487cdf9el,0xbe08e89c205c5611l,0xa950400b04ccc48bl,
-            0xb614b69b637e966bl },
-          0 },
-        /* 89 << 48 */
-        { { 0xd9c3c1238ffa5c4bl,0xc65765f7f3593988l,0x9a7e5d2728242119l,
-            0x0ad27b5097ad7620l },
-          { 0x154cc5eb413a8b23l,0xae93d8de7afa8254l,0x9ce5116cab9907b5l,
-            0x9a163d78063103b9l },
-          0 },
-        /* 95 << 48 */
-        { { 0x5c4c299291086d2al,0x42c6ca9de8e2d951l,0xe67ecf93dd353f30l,
-            0xba54557fe7167c2el },
-          { 0x04a7eb2db734c779l,0x8f345605e300711al,0x4811c1ad67b27de6l,
-            0xb7ac8e842731d5f0l },
-          0 },
-        /* 101 << 48 */
-        { { 0xee33a1d8e449ac46l,0x2500ba0aaaebfa2dl,0x8fb914ebc424eff4l,
-            0x3a36545d3989255el },
-          { 0xd24f2484761235e6l,0x2fc5d5ddd9b2c04bl,0x73660f86070ab0dbl,
-            0x2e266d0479d20c7bl },
-          0 },
-        /* 107 << 48 */
-        { { 0x143752d5316d19a3l,0x56a55e01915497b8l,0x44ba4b2609a5fd15l,
-            0xe4fc3e7fd9bee4eel },
-          { 0x6f9d8609878a9f26l,0xdf36b5bd2ede7a20l,0x8e03e712a9a3e435l,
-            0x4ced555b56546d33l },
-          0 },
-        /* 113 << 48 */
-        { { 0x89a6aaab0882717el,0x56a9736b43fa5153l,0xdb07dcc9d0e1fb1al,
-            0xe7c986d34145e227l },
-          { 0x57be66abb10dad51l,0xa47b964e4aa01ea7l,0xd851d9f36bb837cbl,
-            0x9851ab3d652e13f7l },
-          0 },
-        /* 116 << 48 */
-        { { 0x22b88a805616ee30l,0xfb09548fe7ab1083l,0x8ad6ab0d511270cdl,
-            0x61f6c57a6924d9abl },
-          { 0xa0f7bf7290aecb08l,0x849f87c90df784a4l,0x27c79c15cfaf1d03l,
-            0xbbf9f675c463facel },
-          0 },
-        /* 119 << 48 */
-        { { 0x65512fb716dd6ce1l,0xfa76ebc960d53b35l,0x31e5322e19ada3bel,
-            0x7e259b75d0ccc3cdl },
-          { 0xd36d03f0e025fd69l,0xbefab782eea9e5f3l,0x1569969dd09ce6a7l,
-            0x2df5396178c385b0l },
-          0 },
-        /* 125 << 48 */
-        { { 0x4201652fce0ccac7l,0x12f8e93df1d29d2dl,0x6c2ac9b2220f00c1l,
-            0x4ee6a685a850baa9l },
-          { 0x2c2371f163ee8829l,0xddff16488f464433l,0xeab6cd8869a2c413l,
-            0xcae34beb85e4c2a8l },
-          0 },
-    },
-    {
-        /* 0 << 56 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 56 */
-        { { 0xc7913e91991724f3l,0x5eda799c39cbd686l,0xddb595c763d4fc1el,
-            0x6b63b80bac4fed54l },
-          { 0x6ea0fc697e5fb516l,0x737708bad0f1c964l,0x9628745f11a92ca5l,
-            0x61f379589a86967al },
-          0 },
-        /* 3 << 56 */
-        { { 0x46a8c4180d738dedl,0x6f1a5bb0e0de5729l,0xf10230b98ba81675l,
-            0x32c6f30c112b33d4l },
-          { 0x7559129dd8fffb62l,0x6a281b47b459bf05l,0x77c1bd3afa3b6776l,
-            0x0709b3807829973al },
-          0 },
-        /* 4 << 56 */
-        { { 0x8c26b232a3326505l,0x38d69272ee1d41bfl,0x0459453effe32afal,
-            0xce8143ad7cb3ea87l },
-          { 0x932ec1fa7e6ab666l,0x6cd2d23022286264l,0x459a46fe6736f8edl,
-            0x50bf0d009eca85bbl },
-          0 },
-        /* 5 << 56 */
-        { { 0x0b825852877a21ecl,0x300414a70f537a94l,0x3f1cba4021a9a6a2l,
-            0x50824eee76943c00l },
-          { 0xa0dbfcecf83cba5dl,0xf953814893b4f3c0l,0x6174416248f24dd7l,
-            0x5322d64de4fb09ddl },
-          0 },
-        /* 7 << 56 */
-        { { 0xa337c447f1f0ced1l,0x800cc7939492dd2bl,0x4b93151dbea08efal,
-            0x820cf3f8de0a741el },
-          { 0xff1982dc1c0f7d13l,0xef92196084dde6cal,0x1ad7d97245f96ee3l,
-            0x319c8dbe29dea0c7l },
-          0 },
-        /* 9 << 56 */
-        { { 0x0ae1d63b0eb919b0l,0xd74ee51da74b9620l,0x395458d0a674290cl,
-            0x324c930f4620a510l },
-          { 0x2d1f4d19fbac27d4l,0x4086e8ca9bedeeacl,0x0cdd211b9b679ab8l,
-            0x5970167d7090fec4l },
-          0 },
-        /* 10 << 56 */
-        { { 0x3420f2c9faf1fc63l,0x616d333a328c8bb4l,0x7d65364c57f1fe4al,
-            0x9343e87755e5c73al },
-          { 0x5795176be970e78cl,0xa36ccebf60533627l,0xfc7c738009cdfc1bl,
-            0xb39a2afeb3fec326l },
-          0 },
-        /* 11 << 56 */
-        { { 0xb7ff1ba16224408al,0xcc856e92247cfc5el,0x01f102e7c18bc493l,
-            0x4613ab742091c727l },
-          { 0xaa25e89cc420bf2bl,0x00a5317690337ec2l,0xd2be9f437d025fc7l,
-            0x3316fb856e6fe3dcl },
-          0 },
-        /* 13 << 56 */
-        { { 0x67332cfc2064cfd1l,0x339c31deb0651934l,0x719b28d52a3bcbeal,
-            0xee74c82b9d6ae5c6l },
-          { 0x0927d05ebaf28ee6l,0x82cecf2c9d719028l,0x0b0d353eddb30289l,
-            0xfe4bb977fddb2e29l },
-          0 },
-        /* 15 << 56 */
-        { { 0xe10b2ab817a91cael,0xb89aab6508e27f63l,0x7b3074a7dba3ddf9l,
-            0x1c20ce09330c2972l },
-          { 0x6b9917b45fcf7e33l,0xe6793743945ceb42l,0x18fc22155c633d19l,
-            0xad1adb3cc7485474l },
-          0 },
-        /* 16 << 56 */
-        { { 0x646f96796424c49bl,0xf888dfe867c241c9l,0xe12d4b9324f68b49l,
-            0x9a6b62d8a571df20l },
-          { 0x81b4b26d179483cbl,0x666f96329511fae2l,0xd281b3e4d53aa51fl,
-            0x7f96a7657f3dbd16l },
-          0 },
-        /* 17 << 56 */
-        { { 0xa7f8b5bf074a30cel,0xd7f52107005a32e6l,0x6f9e090750237ed4l,
-            0x2f21da478096fa2bl },
-          { 0xf3e19cb4eec863a0l,0xd18f77fd9527620al,0x9505c81c407c1cf8l,
-            0x9998db4e1b6ec284l },
-          0 },
-        /* 19 << 56 */
-        { { 0x794e2d5984ac066cl,0xf5954a92e68c69a0l,0x28c524584fd99dccl,
-            0x60e639fcb1012517l },
-          { 0xc2e601257de79248l,0xe9ef6404f12fc6d7l,0x4c4f28082a3b5d32l,
-            0x865ad32ec768eb8al },
-          0 },
-        /* 21 << 56 */
-        { { 0x4f4ddf91b2f1ac7al,0xf99eaabb760fee27l,0x57f4008a49c228e5l,
-            0x090be4401cf713bbl },
-          { 0xac91fbe45004f022l,0xd838c2c2569e1af6l,0xd6c7d20b0f1daaa5l,
-            0xaa063ac11bbb02c0l },
-          0 },
-        /* 23 << 56 */
-        { { 0x54935fcb81d73c9el,0x6d07e9790a5e97abl,0x4dc7b30acf3a6babl,
-            0x147ab1f3170bee11l },
-          { 0x0aaf8e3d9fafdee4l,0xfab3dbcb538a8b95l,0x405df4b36ef13871l,
-            0xf1f4e9cb088d5a49l },
-          0 },
-        /* 25 << 56 */
-        { { 0x43c01b87459afccdl,0x6bd45143b7432652l,0x8473453055b5d78el,
-            0x81088fdb1554ba7dl },
-          { 0xada0a52c1e269375l,0xf9f037c42dc5ec10l,0xc066060794bfbc11l,
-            0xc0a630bbc9c40d2fl },
-          0 },
-        /* 27 << 56 */
-        { { 0x9a730ed44763eb50l,0x24a0e221c1ab0d66l,0x643b6393648748f3l,
-            0x1982daa16d3c6291l },
-          { 0x6f00a9f78bbc5549l,0x7a1783e17f36384el,0xe8346323de977f50l,
-            0x91ab688db245502al },
-          0 },
-        /* 28 << 56 */
-        { { 0x331ab6b56d0bdd66l,0x0a6ef32e64b71229l,0x1028150efe7c352fl,
-            0x27e04350ce7b39d3l },
-          { 0x2a3c8acdc1070c82l,0xfb2034d380c9feefl,0x2d729621709f3729l,
-            0x8df290bf62cb4549l },
-          0 },
-        /* 29 << 56 */
-        { { 0x02f99f33fc2e4326l,0x3b30076d5eddf032l,0xbb21f8cf0c652fb5l,
-            0x314fb49eed91cf7bl },
-          { 0xa013eca52f700750l,0x2b9e3c23712a4575l,0xe5355557af30fbb0l,
-            0x1ada35167c77e771l },
-          0 },
-        /* 31 << 56 */
-        { { 0xdc9f46fc609e4a74l,0x2a44a143ba667f91l,0xbc3d8b95b4d83436l,
-            0xa01e4bd0c7bd2958l },
-          { 0x7b18293273483c90l,0xa79c6aa1a7c7b598l,0xbf3983c6eaaac07el,
-            0x8f18181e96e0d4e6l },
-          0 },
-        /* 33 << 56 */
-        { { 0x0bfc27eeacee5043l,0xae419e732eb10f02l,0x19c028d18943fb05l,
-            0x71f01cf7ff13aa2al },
-          { 0x7790737e8887a132l,0x6751330966318410l,0x9819e8a37ddb795el,
-            0xfecb8ef5dad100b2l },
-          0 },
-        /* 34 << 56 */
-        { { 0x59f74a223021926al,0xb7c28a496f9b4c1cl,0xed1a733f912ad0abl,
-            0x42a910af01a5659cl },
-          { 0x3842c6e07bd68cabl,0x2b57fa3876d70ac8l,0x8a6707a83c53aaebl,
-            0x62c1c51065b4db18l },
-          0 },
-        /* 35 << 56 */
-        { { 0x8de2c1fbb2d09dc7l,0xc3dfed12266bd23bl,0x927d039bd5b27db6l,
-            0x2fb2f0f1103243dal },
-          { 0xf855a07b80be7399l,0xed9327ce1f9f27a8l,0xa0bd99c7729bdef7l,
-            0x2b67125e28250d88l },
-          0 },
-        /* 36 << 56 */
-        { { 0x784b26e88670ced7l,0xe3dfe41fc31bd3b4l,0x9e353a06bcc85cbcl,
-            0x302e290960178a9dl },
-          { 0x860abf11a6eac16el,0x76447000aa2b3aacl,0x46ff9d19850afdabl,
-            0x35bdd6a5fdb2d4c1l },
-          0 },
-        /* 37 << 56 */
-        { { 0xe82594b07e5c9ce9l,0x0f379e5320af346el,0x608b31e3bc65ad4al,
-            0x710c6b12267c4826l },
-          { 0x51c966f971954cf1l,0xb1cec7930d0aa215l,0x1f15598986bd23a8l,
-            0xae2ff99cf9452e86l },
-          0 },
-        /* 39 << 56 */
-        { { 0xb5a741a76b2515cfl,0x71c416019585c749l,0x78350d4fe683de97l,
-            0x31d6152463d0b5f5l },
-          { 0x7a0cc5e1fbce090bl,0xaac927edfbcb2a5bl,0xe920de4920d84c35l,
-            0x8c06a0b622b4de26l },
-          0 },
-        /* 40 << 56 */
-        { { 0xd34dd58bafe7ddf3l,0x55851fedc1e6e55bl,0xd1395616960696e7l,
-            0x940304b25f22705fl },
-          { 0x6f43f861b0a2a860l,0xcf1212820e7cc981l,0x121862120ab64a96l,
-            0x09215b9ab789383cl },
-          0 },
-        /* 41 << 56 */
-        { { 0x311eb30537387c09l,0xc5832fcef03ee760l,0x30358f5832f7ea19l,
-            0xe01d3c3491d53551l },
-          { 0x1ca5ee41da48ea80l,0x34e71e8ecf4fa4c1l,0x312abd257af1e1c7l,
-            0xe3afcdeb2153f4a5l },
-          0 },
-        /* 43 << 56 */
-        { { 0x2a17747fa6d74081l,0x60ea4c0555a26214l,0x53514bb41f88c5fel,
-            0xedd645677e83426cl },
-          { 0xd5d6cbec96460b25l,0xa12fd0ce68dc115el,0xc5bc3ed2697840eal,
-            0x969876a8a6331e31l },
-          0 },
-        /* 44 << 56 */
-        { { 0x60c36217472ff580l,0xf42297054ad41393l,0x4bd99ef0a03b8b92l,
-            0x501c7317c144f4f6l },
-          { 0x159009b318464945l,0x6d5e594c74c5c6bel,0x2d587011321a3660l,
-            0xd1e184b13898d022l },
-          0 },
-        /* 45 << 56 */
-        { { 0x5ba047524c6a7e04l,0x47fa1e2b45550b65l,0x9419daf048c0a9a5l,
-            0x663629537c243236l },
-          { 0xcd0744b15cb12a88l,0x561b6f9a2b646188l,0x599415a566c2c0c0l,
-            0xbe3f08590f83f09al },
-          0 },
-        /* 46 << 56 */
-        { { 0x9141c5beb92041b8l,0x01ae38c726477d0dl,0xca8b71f3d12c7a94l,
-            0xfab5b31f765c70dbl },
-          { 0x76ae7492487443e9l,0x8595a310990d1349l,0xf8dbeda87d460a37l,
-            0x7f7ad0821e45a38fl },
-          0 },
-        /* 47 << 56 */
-        { { 0xed1d4db61059705al,0xa3dd492ae6b9c697l,0x4b92ee3a6eb38bd5l,
-            0xbab2609d67cc0bb7l },
-          { 0x7fc4fe896e70ee82l,0xeff2c56e13e6b7e3l,0x9b18959e34d26fcal,
-            0x2517ab66889d6b45l },
-          0 },
-        /* 48 << 56 */
-        { { 0xf167b4e0bdefdd4fl,0x69958465f366e401l,0x5aa368aba73bbec0l,
-            0x121487097b240c21l },
-          { 0x378c323318969006l,0xcb4d73cee1fe53d1l,0x5f50a80e130c4361l,
-            0xd67f59517ef5212bl },
-          0 },
-        /* 49 << 56 */
-        { { 0xf145e21e9e70c72el,0xb2e52e295566d2fbl,0x44eaba4a032397f5l,
-            0x5e56937b7e31a7del },
-          { 0x68dcf517456c61e1l,0xbc2e954aa8b0a388l,0xe3552fa760a8b755l,
-            0x03442dae73ad0cdel },
-          0 },
-        /* 51 << 56 */
-        { { 0x3fcbdbce478e2135l,0x7547b5cfbda35342l,0xa97e81f18a677af6l,
-            0xc8c2bf8328817987l },
-          { 0xdf07eaaf45580985l,0xc68d1f05c93b45cbl,0x106aa2fec77b4cacl,
-            0x4c1d8afc04a7ae86l },
-          0 },
-        /* 52 << 56 */
-        { { 0xdb41c3fd9eb45ab2l,0x5b234b5bd4b22e74l,0xda253decf215958al,
-            0x67e0606ea04edfa0l },
-          { 0xabbbf070ef751b11l,0xf352f175f6f06dcel,0xdfc4b6af6839f6b4l,
-            0x53ddf9a89959848el },
-          0 },
-        /* 53 << 56 */
-        { { 0xda49c379c21520b0l,0x90864ff0dbd5d1b6l,0x2f055d235f49c7f7l,
-            0xe51e4e6aa796b2d8l },
-          { 0xc361a67f5c9dc340l,0x5ad53c37bca7c620l,0xda1d658832c756d0l,
-            0xad60d9118bb67e13l },
-          0 },
-        /* 55 << 56 */
-        { { 0xd1183316fd6f7140l,0xf9fadb5bbd8e81f7l,0x701d5e0c5a02d962l,
-            0xfdee4dbf1b601324l },
-          { 0xbed1740735d7620el,0x04e3c2c3f48c0012l,0x9ee29da73455449al,
-            0x562cdef491a836c4l },
-          0 },
-        /* 57 << 56 */
-        { { 0x147ebf01fad097a5l,0x49883ea8610e815dl,0xe44d60ba8a11de56l,
-            0xa970de6e827a7a6dl },
-          { 0x2be414245e17fc19l,0xd833c65701214057l,0x1375813b363e723fl,
-            0x6820bb88e6a52e9bl },
-          0 },
-        /* 59 << 56 */
-        { { 0xe1b6f60c08191224l,0xc4126ebbde4ec091l,0xe1dff4dc4ae38d84l,
-            0xde3f57db4f2ef985l },
-          { 0x34964337d446a1ddl,0x7bf217a0859e77f6l,0x8ff105278e1d13f5l,
-            0xa304ef0374eeae27l },
-          0 },
-        /* 60 << 56 */
-        { { 0xfc6f5e47d19dfa5al,0xdb007de37fad982bl,0x28205ad1613715f5l,
-            0x251e67297889529el },
-          { 0x727051841ae98e78l,0xf818537d271cac32l,0xc8a15b7eb7f410f5l,
-            0xc474356f81f62393l },
-          0 },
-        /* 61 << 56 */
-        { { 0x92dbdc5ac242316bl,0xabe060acdbf4aff5l,0x6e8c38fe909a8ec6l,
-            0x43e514e56116cb94l },
-          { 0x2078fa3807d784f9l,0x1161a880f4b5b357l,0x5283ce7913adea3dl,
-            0x0756c3e6cc6a910bl },
-          0 },
-        /* 63 << 56 */
-        { { 0xa573a4966d17fbc7l,0x0cd1a70a73d2b24el,0x34e2c5cab2676937l,
-            0xe7050b06bf669f21l },
-          { 0xfbe948b61ede9046l,0xa053005197662659l,0x58cbd4edf10124c5l,
-            0xde2646e4dd6c06c8l },
-          0 },
-        /* 64 << 56 */
-        { { 0x332f81088cad38c0l,0x471b7e906bd68ae2l,0x56ac3fb20d8e27a3l,
-            0xb54660db136b4b0dl },
-          { 0x123a1e11a6fd8de4l,0x44dbffeaa37799efl,0x4540b977ce6ac17cl,
-            0x495173a8af60acefl },
-          0 },
-        /* 65 << 56 */
-        { { 0xc48b1478db447d0bl,0xe1b85f5d46104fbbl,0x4ab31e7d991c60b9l,
-            0xaa674a9258a0cfd0l },
-          { 0x179fc2cd316f4297l,0x90c18642dcccbc82l,0x65d4309e56a4c163l,
-            0xf211a9c7145a33ecl },
-          0 },
-        /* 71 << 56 */
-        { { 0x9669170cdc32717fl,0x52d69b5138133e34l,0xaed24e5fb079c3b2l,
-            0xaba44a91a21ea3d2l },
-          { 0xd6814f1938d40105l,0x38289fe463462e7al,0x1793eefa3a80cbf5l,
-            0x05816a0795f29bacl },
-          0 },
-        /* 77 << 56 */
-        { { 0xdca88ad98f850641l,0x8c1152c447999b0dl,0x509f654e654aff33l,
-            0x2228550f08a12f14l },
-          { 0x60fe99dbb6a0ccdbl,0x80d6829bfc2cddccl,0x190f454dd5617aa4l,
-            0x0aea05fe36295d2dl },
-          0 },
-        /* 83 << 56 */
-        { { 0x1de06c8af9bef9a5l,0xe24d85d3fb2d3164l,0x3dbe455e8d203d3el,
-            0x439bee4735ea47a9l },
-          { 0xcc143432784893d7l,0x9b71073bd9bebd00l,0x6c106b343aa2fe88l,
-            0x9df2a42734746f7al },
-          0 },
-        /* 89 << 56 */
-        { { 0x1ad0b3725a8c2168l,0x64e52d6d143f0402l,0xd933c783e320f31fl,
-            0x1ccf90a80ff14f52l },
-          { 0xd3a3133ee1e6d0c0l,0xfd75a2d5b4acc8cal,0x62659b8e5559d171l,
-            0x5087d6e9f13ad52al },
-          0 },
-        /* 95 << 56 */
-        { { 0xb4d647a5deef31a4l,0x95bf4ab180975ea9l,0x2f92d15adf57b03el,
-            0x5ee808ab746b26d6l },
-          { 0x4341597c1082f261l,0x027795eb40c45e95l,0xcb77744b3b690c30l,
-            0xdd87c084af3f88d1l },
-          0 },
-        /* 101 << 56 */
-        { { 0x469f177572109785l,0xf365e55123f84d6cl,0x8006a9c28a046dbbl,
-            0x1b9fbe892fa09f52l },
-          { 0xac18a88016075e9el,0x4a3069bc1e3fd628l,0x20c61eaa60c61c14l,
-            0x315b59daf61f004bl },
-          0 },
-        /* 107 << 56 */
-        { { 0x0a94387f26d04857l,0x952a4ebc43d6de95l,0xb422e15cf14abdfal,
-            0x5b7a0153324ef90cl },
-          { 0x6aefa20e9826ec5bl,0x0e529886ad2fe161l,0xb710a74ec0d416e8l,
-            0x6cf4b0a5fb6c90bcl },
-          0 },
-        /* 113 << 56 */
-        { { 0x822aea4031979d3bl,0xb504eafde215a109l,0xa8761ead84bf2377l,
-            0xb55c1e55efb3d942l },
-          { 0xd01f9b0212b7f17bl,0x41b62c2a891bfbbfl,0x50800e6b08938149l,
-            0x527b50a9b0a55d82l },
-          0 },
-        /* 116 << 56 */
-        { { 0x6bc84d8d1d9ce3c4l,0x53b465072a308df0l,0x6c3da9bfca79c88al,
-            0x9636ad9c36372acfl },
-          { 0x8840e92c425ef14cl,0x863191f96af3225bl,0xd56d82d0d369b857l,
-            0x2053a2527a4c41f9l },
-          0 },
-        /* 119 << 56 */
-        { { 0x20aecd6609ca8805l,0x945d9b31dc818ee6l,0x1424647c2119b44bl,
-            0xbe934d7e5a6641f9l },
-          { 0xe91d53184559e55el,0xc2fb8e0b4dfbc3d4l,0x9e92e20676cb937fl,
-            0x0f5582e4f2932429l },
-          0 },
-        /* 125 << 56 */
-        { { 0xb5fc22a42d31809fl,0x6d582d2b0e35b7b4l,0x5fac415158c5f576l,
-            0xdff239371e4cd7c9l },
-          { 0x0f62b329ed4d1925l,0x00994a2e6010fb16l,0xb4b91076bd754837l,
-            0xfde219463345103al },
-          0 },
-    },
-    {
-        /* 0 << 64 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 64 */
-        { { 0x4f922fc516a0d2bbl,0x0d5cc16c1a623499l,0x9241cf3a57c62c8bl,
-            0x2f5e6961fd1b667fl },
-          { 0x5c15c70bf5a01797l,0x3d20b44d60956192l,0x04911b37071fdb52l,
-            0xf648f9168d6f0f7bl },
-          0 },
-        /* 3 << 64 */
-        { { 0x4090914bb5def996l,0x1cb69c83233dd1e7l,0xc1e9c1d39b3d5e76l,
-            0x1f3338edfccf6012l },
-          { 0xb1e95d0d2f5378a8l,0xacf4c2c72f00cd21l,0x6e984240eb5fe290l,
-            0xd66c038d248088ael },
-          0 },
-        /* 4 << 64 */
-        { { 0x9ad5462bb4d8bc50l,0x181c0b16a9195770l,0xebd4fe1c78412a68l,
-            0xae0341bcc0dff48cl },
-          { 0xb6bc45cf7003e866l,0xf11a6dea8a24a41bl,0x5407151ad04c24c2l,
-            0x62c9d27dda5b7b68l },
-          0 },
-        /* 5 << 64 */
-        { { 0xd4992b30614c0900l,0xda98d121bd00c24bl,0x7f534dc87ec4bfa1l,
-            0x4a5ff67437dc34bcl },
-          { 0x68c196b81d7ea1d7l,0x38cf289380a6d208l,0xfd56cd09e3cbbd6el,
-            0xec72e27e4205a5b6l },
-          0 },
-        /* 7 << 64 */
-        { { 0xe8b97932b88756ddl,0xed4e8652f17e3e61l,0xc2dd14993ee1c4a4l,
-            0xc0aaee17597f8c0el },
-          { 0x15c4edb96c168af3l,0x6563c7bfb39ae875l,0xadfadb6f20adb436l,
-            0xad55e8c99a042ac0l },
-          0 },
-        /* 9 << 64 */
-        { { 0x65c29219909523c8l,0xa62f648fa3a1c741l,0x88598d4f60c9e55al,
-            0xbce9141b0e4f347al },
-          { 0x9af97d8435f9b988l,0x0210da62320475b6l,0x3c076e229191476cl,
-            0x7520dbd944fc7834l },
-          0 },
-        /* 10 << 64 */
-        { { 0x87a7ebd1e0a1b12al,0x1e4ef88d770ba95fl,0x8c33345cdc2ae9cbl,
-            0xcecf127601cc8403l },
-          { 0x687c012e1b39b80fl,0xfd90d0ad35c33ba4l,0xa3ef5a675c9661c2l,
-            0x368fc88ee017429el },
-          0 },
-        /* 11 << 64 */
-        { { 0x664300b07850ec06l,0xac5a38b97d3a10cfl,0x9233188de34ab39dl,
-            0xe77057e45072cbb9l },
-          { 0xbcf0c042b59e78dfl,0x4cfc91e81d97de52l,0x4661a26c3ee0ca4al,
-            0x5620a4c1fb8507bcl },
-          0 },
-        /* 13 << 64 */
-        { { 0x84b9ca1504b6c5a0l,0x35216f3918f0e3a3l,0x3ec2d2bcbd986c00l,
-            0x8bf546d9d19228fel },
-          { 0xd1c655a44cd623c3l,0x366ce718502b8e5al,0x2cfc84b4eea0bfe7l,
-            0xe01d5ceecf443e8el },
-          0 },
-        /* 15 << 64 */
-        { { 0xa75feacabe063f64l,0x9b392f43bce47a09l,0xd42415091ad07acal,
-            0x4b0c591b8d26cd0fl },
-          { 0x2d42ddfd92f1169al,0x63aeb1ac4cbf2392l,0x1de9e8770691a2afl,
-            0xebe79af7d98021dal },
-          0 },
-        /* 16 << 64 */
-        { { 0x58af2010f5b343bcl,0x0f2e400af2f142fel,0x3483bfdea85f4bdfl,
-            0xf0b1d09303bfeaa9l },
-          { 0x2ea01b95c7081603l,0xe943e4c93dba1097l,0x47be92adb438f3a6l,
-            0x00bb7742e5bf6636l },
-          0 },
-        /* 17 << 64 */
-        { { 0x66917ce63b5f1cc4l,0x37ae52eace872e62l,0xbb087b722905f244l,
-            0x120770861e6af74fl },
-          { 0x4b644e491058edeal,0x827510e3b638ca1dl,0x8cf2b7046038591cl,
-            0xffc8b47afe635063l },
-          0 },
-        /* 19 << 64 */
-        { { 0x7677408d6dfafed3l,0x33a0165339661588l,0x3c9c15ec0b726fa0l,
-            0x090cfd936c9b56dal },
-          { 0xe34f4baea3c40af5l,0x3469eadbd21129f1l,0xcc51674a1e207ce8l,
-            0x1e293b24c83b1ef9l },
-          0 },
-        /* 21 << 64 */
-        { { 0x796d3a85825808bdl,0x51dc3cb73fd6e902l,0x643c768a916219d1l,
-            0x36cd7685a2ad7d32l },
-          { 0xe3db9d05b22922a4l,0x6494c87edba29660l,0xf0ac91dfbcd2ebc7l,
-            0x4deb57a045107f8dl },
-          0 },
-        /* 23 << 64 */
-        { { 0xb6c69ac82094cec3l,0x9976fb88403b770cl,0x1dea026c4859590dl,
-            0xb6acbb468562d1fdl },
-          { 0x7cd6c46144569d85l,0xc3190a3697f0891dl,0xc6f5319548d5a17dl,
-            0x7d919966d749abc8l },
-          0 },
-        /* 25 << 64 */
-        { { 0xb53b7de561906373l,0x858dbadeeb999595l,0x8cbb47b2a59e5c36l,
-            0x660318b3dcf4e842l },
-          { 0xbd161ccd12ba4b7al,0xf399daabf8c8282al,0x1587633aeeb2130dl,
-            0xa465311ada38dd7dl },
-          0 },
-        /* 27 << 64 */
-        { { 0x2dae9082be7cf3a6l,0xcc86ba92bc967274l,0xf28a2ce8aea0a8a9l,
-            0x404ca6d96ee988b3l },
-          { 0xfd7e9c5d005921b8l,0xf56297f144e79bf9l,0xa163b4600d75ddc2l,
-            0x30b23616a1f2be87l },
-          0 },
-        /* 28 << 64 */
-        { { 0x19e6125dec3f1decl,0x07b1f040911178dal,0xd93ededa904a6738l,
-            0x55187a5a0bebedcdl },
-          { 0xf7d04722eb329d41l,0xf449099ef170b391l,0xfd317a69ca99f828l,
-            0x50c3db2b34a4976dl },
-          0 },
-        /* 29 << 64 */
-        { { 0x0064d8585499fb32l,0x7b67bad977a8aeb7l,0x1d3eb9772d08eec5l,
-            0x5fc047a6cbabae1dl },
-          { 0x0577d159e54a64bbl,0x8862201bc43497e4l,0xad6b4e282ce0608dl,
-            0x8b687b7d0b167aacl },
-          0 },
-        /* 31 << 64 */
-        { { 0xe9f9669cda94951el,0x4b6af58d66b8d418l,0xfa32107417d426a4l,
-            0xc78e66a99dde6027l },
-          { 0x0516c0834a53b964l,0xfc659d38ff602330l,0x0ab55e5c58c5c897l,
-            0x985099b2838bc5dfl },
-          0 },
-        /* 33 << 64 */
-        { { 0xe7a935fa1684cb3bl,0x571650b5a7d7e69dl,0x6ba9ffa40328c168l,
-            0xac43f6bc7e46f358l },
-          { 0x54f75e567cb6a779l,0x4e4e2cc8c61320del,0xb94258bc2b8903d0l,
-            0xc7f32d57ceecabe0l },
-          0 },
-        /* 34 << 64 */
-        { { 0x34739f16cd7d9d89l,0x6daab4267ca080b5l,0x772086ff40e19f45l,
-            0x43caa56118c61b42l },
-          { 0x0ba3d4a8dbf365f1l,0xa0db435ee760ad97l,0xfd6f30d56916c59bl,
-            0xab34cb5dafe12f5dl },
-          0 },
-        /* 35 << 64 */
-        { { 0x445b86ea02a3260al,0x8c51d6428d689babl,0x183334d65588904cl,
-            0xf8a3b84d479d6422l },
-          { 0x581acfa0f0833d00l,0xc50827bc3b567d2dl,0x2c935e6daddcf73el,
-            0x2a645f7704dd19f2l },
-          0 },
-        /* 36 << 64 */
-        { { 0x78d2e8dfcb564473l,0x4349a97357d5621al,0x9d835d89218f8b24l,
-            0x01fe7bc5079b6ee2l },
-          { 0xe57f2a2b5b3b5dcel,0x5a8637b75fe55565l,0x83ff34aea41dbae7l,
-            0xfce1199c950a7a8fl },
-          0 },
-        /* 37 << 64 */
-        { { 0x0ca5d25bf8e71ce2l,0x204edc4a062685dal,0x06fe407d87678ec2l,
-            0xd16936a07defa39al },
-          { 0x3b108d84af3d16d0l,0xf2e9616d0305cad0l,0xbc9537e6f27bed97l,
-            0x71c2d699ebc9f45cl },
-          0 },
-        /* 39 << 64 */
-        { { 0x203bdd84cdcd3a85l,0x1107b901ade3ccfal,0xa7da89e95533159dl,
-            0x8d834005860e8c64l },
-          { 0x914bc0eb2a7638f7l,0xc66ce0a6620e8606l,0x11ef98c2e6c12dc0l,
-            0x25666b1d7780fc0el },
-          0 },
-        /* 40 << 64 */
-        { { 0x374f541f3e707706l,0x9a4d3638a831d0cfl,0x4ab4f4831518ca04l,
-            0x54e3ee5dfe38c318l },
-          { 0x383ae36403c8819bl,0xa9d1daa12e17864cl,0x245a97b350eeaa5bl,
-            0x5362d00999bf4e83l },
-          0 },
-        /* 41 << 64 */
-        { { 0x6667e89f4ded8a4fl,0xa59161abc36a7795l,0x1c96f6f9331ccf94l,
-            0xf2727e879a686d49l },
-          { 0x0f94894bb841295fl,0xb0fe8f744a0503d1l,0x60c581c7ef407926l,
-            0x1980c8e13edb7e1cl },
-          0 },
-        /* 43 << 64 */
-        { { 0x47948c84c5de1a41l,0xd595d14a48959688l,0x3bfca4be86ff21c9l,
-            0xb5ff59b86a4191cal },
-          { 0xced1dd1d65094c86l,0xd57b86559dc9d001l,0xbcac6fa3486e51d7l,
-            0x8e97e2637b774c1bl },
-          0 },
-        /* 44 << 64 */
-        { { 0xfc0313c29bd43980l,0x9c954b70f172db29l,0x679bdcb7f954a21al,
-            0x6b48170954e2e4fcl },
-          { 0x318af5f530baf1d0l,0x26ea8a3ccbf92060l,0xc3c69d7ccd5ae258l,
-            0xa73ba0470ead07c9l },
-          0 },
-        /* 45 << 64 */
-        { { 0xe82eb003e35dca85l,0xfd0000fa31e39180l,0xbca90f746735f378l,
-            0xe6aa783158c943edl },
-          { 0x0e94ecd5b6a438d7l,0xc02b60faf9a5f114l,0x4063568b8b1611ebl,
-            0x1398bdc1272509ecl },
-          0 },
-        /* 46 << 64 */
-        { { 0xc2ef6a01be3e92d1l,0x1bce9c27282bd5ddl,0xf7e488f3adda0568l,
-            0xd4f15fdb1af9bb8bl },
-          { 0x8c490ade4da846efl,0x76229da17f0b825el,0xc8b812082a6711c6l,
-            0x511f5e23b4c523aal },
-          0 },
-        /* 47 << 64 */
-        { { 0xbdf4e7049970f46el,0x70e220288dadbd1al,0x2b86c97fb1223d26l,
-            0x042ad22ecf62f51al },
-          { 0x72944339ba2ed2e9l,0x0ba0d10ef94fa61dl,0x3f86164194e68f15l,
-            0x1312a74acb86c545l },
-          0 },
-        /* 48 << 64 */
-        { { 0x3a63c39731815e69l,0x6df9cbd6dcdd2802l,0x4c47ed4a15b4f6afl,
-            0x62009d826ac0f978l },
-          { 0x664d80d28b898fc7l,0x72f1eeda2c17c91fl,0x9e84d3bc7aae6609l,
-            0x58c7c19528376895l },
-          0 },
-        /* 49 << 64 */
-        { { 0x640ebf5d5b8d354al,0xa5f3a8fdb396ff64l,0xd53f041d8378ed81l,
-            0x1969d61bc1234ad2l },
-          { 0x16d7acffeb68bde2l,0x63767a68f23e9368l,0x937a533c38928d95l,
-            0xee2190bbbeb0f1f2l },
-          0 },
-        /* 51 << 64 */
-        { { 0xb6860c9a73a4aafbl,0xb2f996290488870dl,0x16ef6232572d9e25l,
-            0x5b9eb1bad1383389l },
-          { 0xabf713a7ed8d77f8l,0xd2b4a2e9e2b69e64l,0xa1a22cfd6d6f17c2l,
-            0x4bfd6f992d604511l },
-          0 },
-        /* 52 << 64 */
-        { { 0xdcff7630d9294f07l,0x89b765d68dba8fd0l,0x553e55de8dbcaccdl,
-            0x9b4a009eed702bf8l },
-          { 0xf6e534dd27b8ca0dl,0xc4496b346177fd52l,0x378ce6f6c87bb7b7l,
-            0x68633d4844cc19f0l },
-          0 },
-        /* 53 << 64 */
-        { { 0xfe550021bc84c625l,0x8d7169986d45e4a3l,0xa09c6ded4c0c66b7l,
-            0xe32313aeb9e1d547l },
-          { 0x8ce775b4d1e8e0b9l,0xa899f9102654dd15l,0x7c38aa066cc8b2a9l,
-            0xe6ebb291d6ce6cc0l },
-          0 },
-        /* 55 << 64 */
-        { { 0x5963df62a6991216l,0x4c17f72246996010l,0x131dc2b840477722l,
-            0x78bf50b0d1765a75l },
-          { 0x360afd587ceaca12l,0xebc55dbb139cd470l,0x9083e27e4c05541cl,
-            0xc10057a3b873d757l },
-          0 },
-        /* 57 << 64 */
-        { { 0x440009c3deed7769l,0xde2fa58a14fd8a44l,0x509e7df35b627596l,
-            0x3d76a87cc3bb07a7l },
-          { 0x8018fee5b8ef000al,0x71ce33e9823fd4b6l,0x3a1cac37469c0bb1l,
-            0x92fe7aeaf3eec8eel },
-          0 },
-        /* 59 << 64 */
-        { { 0x37ad0eb8de64e568l,0x4ac669bca1e3e20el,0x240d0ac22ce944edl,
-            0xd532039a3c1b28fbl },
-          { 0xa2bb899a23acba6cl,0xd472af671af937e1l,0x04478f7b8851e753l,
-            0x74030eef5ea05307l },
-          0 },
-        /* 60 << 64 */
-        { { 0x3559e7b67dc17874l,0xd0caf0ef8195cc2al,0x07c067880cd24dd9l,
-            0x01a99ea002857c41l },
-          { 0xd86579e490f82f63l,0xb1e0658ae41c9237l,0x075ffafd93fd1e79l,
-            0x6e70403547f60b8fl },
-          0 },
-        /* 61 << 64 */
-        { { 0x2246ad76c1d68c31l,0x9126202b0d5c4677l,0x5f40de81638882dcl,
-            0xb131988ca3253a7fl },
-          { 0x766f1897ba9ae0a8l,0xf0e01dd41d8b5fefl,0x03e28ce3ed7b12c8l,
-            0x44b3a2be1fd20e1el },
-          0 },
-        /* 63 << 64 */
-        { { 0xd4c8e8e5f2a5f247l,0x42ffd816c2c7c979l,0x89e1485211093d1al,
-            0x98f44a4613871ebbl },
-          { 0x374849964b032e2dl,0x28a430f445995a61l,0xf2f9acbad5be16b6l,
-            0xac98a5402d8e02aal },
-          0 },
-        /* 64 << 64 */
-        { { 0x0d53f5c7a3e6fcedl,0xe8cbbdd5f45fbdebl,0xf85c01df13339a70l,
-            0x0ff71880142ceb81l },
-          { 0x4c4e8774bd70437al,0x5fb32891ba0bda6al,0x1cdbebd2f18bd26el,
-            0x2f9526f103a9d522l },
-          0 },
-        /* 65 << 64 */
-        { { 0x48334fdcc20b8d30l,0x25f887d749414fddl,0x9ccd513311a2cf0dl,
-            0x7e7799e4d08975a4l },
-          { 0xb5993a53729b951cl,0x0cf14a5a62dbc6a8l,0xb39ed36efe4d16eel,
-            0xb75f3fb681bda63al },
-          0 },
-        /* 71 << 64 */
-        { { 0xac7db8706d4f68b5l,0x819a13c7be49b3a4l,0x646ae2b1418bf1e9l,
-            0x25b53a5f69b3a5ccl },
-          { 0xd23d94d37de26578l,0x8bb581caecdd138al,0x9e053f67f857b0dal,
-            0xe679cc7a255ff474l },
-          0 },
-        /* 77 << 64 */
-        { { 0x4a4b8d990df097f9l,0x0ae1227a0b4173cal,0x0d401778adb72178l,
-            0xd29848b43f421e0cl },
-          { 0xc5eec6096eb0722dl,0x527d72877e12c028l,0xed12a9e71b5dcc0cl,
-            0x26b27344dcf4b4dal },
-          0 },
-        /* 83 << 64 */
-        { { 0x695c502565e4408al,0x2d23768fcbce94e6l,0x1505fa1e5080b88dl,
-            0x5c8fbab6855f7cc1l },
-          { 0x70d876f275fb125dl,0x456421330a252007l,0xfe99249a8ee05be1l,
-            0x0893b620f4bf5490l },
-          0 },
-        /* 89 << 64 */
-        { { 0x2a59df1ed9fe6bdfl,0x96a9c791785e057fl,0x4b0d795f86a1d751l,
-            0x196c8e0aec642886l },
-          { 0x6df67899bc0e055cl,0x4173204a63007433l,0xb5ee4efec21c9245l,
-            0x2f7d4c75c1451bael },
-          0 },
-        /* 95 << 64 */
-        { { 0x2ad7f836b1047b7fl,0x368d431a71f6bfe1l,0xfcd933b103db4667l,
-            0xfff77ed3ecb81330l },
-          { 0x3677935b44958bd4l,0xa6cfcda8a1d5a9e7l,0xb2b73bc699ff9fael,
-            0x1c2cd628f866d3c4l },
-          0 },
-        /* 101 << 64 */
-        { { 0x2756873495031ceel,0xebed373d51091c1bl,0x398fef0819aa2f27l,
-            0x2f26174e2c0a9feal },
-          { 0xedca72b6b219be3fl,0x001a8fdc80503df8l,0x9a2fadbb6b93f643l,
-            0xd48e552cd44cebc3l },
-          0 },
-        /* 107 << 64 */
-        { { 0x6c0dbb68667a7ab6l,0x00490ce757630e91l,0x04976cd57eb2f382l,
-            0x9ee486b655dda4a3l },
-          { 0x4ea5c9c9cca0d01cl,0xa6e054b639f69c6dl,0xb3b7ac992ecab239l,
-            0x80c9f6d17597512el },
-          0 },
-        /* 113 << 64 */
-        { { 0x64dfdd68b942fad9l,0xe7d8e88da5eb3d14l,0xb7281dc2382f6301l,
-            0xcfa2ee6dbfe00a7fl },
-          { 0x6e617657dc7be39fl,0x22d58dd6591c6e3al,0xd3a4003918318c13l,
-            0xcac6c830981b6b72l },
-          0 },
-        /* 116 << 64 */
-        { { 0x009690ffb4fbfaa0l,0x8bbbdab73619c6dbl,0xc6d44273728356e8l,
-            0xfd76f0d8e453ec35l },
-          { 0x775c2554aac28a29l,0x28f7af9d5c55e4f0l,0xbacf54a688e8ad4dl,
-            0x85b018e80aa76ddfl },
-          0 },
-        /* 119 << 64 */
-        { { 0x27893f7983ce88e4l,0x9556c9977785f13dl,0x83d3c38d3a35831el,
-            0x3856c829d12f0a1dl },
-          { 0xb308d84c93259c1al,0x4ef87ab4691ffd28l,0x76a18d5321a88c58l,
-            0xf13cd5d53503cb4dl },
-          0 },
-        /* 125 << 64 */
-        { { 0x669d93dba8cc0db3l,0x403cb9200dfcfcf4l,0x5def4a03e77c3979l,
-            0x2a05c9423e2e2522l },
-          { 0xd86dca52b5f48bf0l,0x174766de5828a135l,0x116290b40d3a96d0l,
-            0xe1999457aeea1193l },
-          0 },
-    },
-    {
-        /* 0 << 72 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 72 */
-        { { 0x0db2fb5ed005832al,0x5f5efd3b91042e4fl,0x8c4ffdc6ed70f8cal,
-            0xe4645d0bb52da9ccl },
-          { 0x9596f58bc9001d1fl,0x52c8f0bc4e117205l,0xfd4aa0d2e398a084l,
-            0x815bfe3a104f49del },
-          0 },
-        /* 3 << 72 */
-        { { 0x524d226ad7ab9a2dl,0x9c00090d7dfae958l,0x0ba5f5398751d8c2l,
-            0x8afcbcdd3ab8262dl },
-          { 0x57392729e99d043bl,0xef51263baebc943al,0x9feace9320862935l,
-            0x639efc03b06c817bl },
-          0 },
-        /* 4 << 72 */
-        { { 0xe839be7d341d81dcl,0xcddb688932148379l,0xda6211a1f7026eadl,
-            0xf3b2575ff4d1cc5el },
-          { 0x40cfc8f6a7a73ae6l,0x83879a5e61d5b483l,0xc5acb1ed41a50ebcl,
-            0x59a60cc83c07d8fal },
-          0 },
-        /* 5 << 72 */
-        { { 0xdec98d4ac3b81990l,0x1cb837229e0cc8fel,0xfe0b0491d2b427b9l,
-            0x0f2386ace983a66cl },
-          { 0x930c4d1eb3291213l,0xa2f82b2e59a62ae4l,0x77233853f93e89e3l,
-            0x7f8063ac11777c7fl },
-          0 },
-        /* 7 << 72 */
-        { { 0x36e607cf02ff6072l,0xa47d2ca98ad98cdcl,0xbf471d1ef5f56609l,
-            0xbcf86623f264ada0l },
-          { 0xb70c0687aa9e5cb6l,0xc98124f217401c6cl,0x8189635fd4a61435l,
-            0xd28fb8afa9d98ea6l },
-          0 },
-        /* 9 << 72 */
-        { { 0x3d4da8c3017025f3l,0xefcf628cfb9579b4l,0x5c4d00161f3716ecl,
-            0x9c27ebc46801116el },
-          { 0x5eba0ea11da1767el,0xfe15145247004c57l,0x3ace6df68c2373b7l,
-            0x75c3dffe5dbc37acl },
-          0 },
-        /* 10 << 72 */
-        { { 0xa2a147dba28a0749l,0x246c20d6ee519165l,0x5068d1b1d3810715l,
-            0xb1e7018c748160b9l },
-          { 0x03f5b1faf380ff62l,0xef7fb1ddf3cb2c1el,0xeab539a8fc91a7dal,
-            0x83ddb707f3f9b561l },
-          0 },
-        /* 11 << 72 */
-        { { 0xb57276d980101b98l,0x760883fdb82f0f66l,0x89d7de754bc3eff3l,
-            0x03b606435dc2ab40l },
-          { 0xcd6e53dfe05beeacl,0xf2f1e862bc3325cdl,0xdd0f7921774f03c3l,
-            0x97ca72214552cc1bl },
-          0 },
-        /* 13 << 72 */
-        { { 0x760cb3b5e224c5d7l,0xfa3baf8c68616919l,0x9fbca1138d142552l,
-            0x1ab18bf17669ebf5l },
-          { 0x55e6f53e9bdf25ddl,0x04cc0bf3cb6cd154l,0x595bef4995e89080l,
-            0xfe9459a8104a9ac1l },
-          0 },
-        /* 15 << 72 */
-        { { 0x694b64c5abb020e8l,0x3d18c18419c4eec7l,0x9c4673ef1c4793e5l,
-            0xc7b8aeb5056092e6l },
-          { 0x3aa1ca43f0f8c16bl,0x224ed5ecd679b2f6l,0x0d56eeaf55a205c9l,
-            0xbfe115ba4b8e028bl },
-          0 },
-        /* 16 << 72 */
-        { { 0x3e22a7b397acf4ecl,0x0426c4005ea8b640l,0x5e3295a64e969285l,
-            0x22aabc59a6a45670l },
-          { 0xb929714c5f5942bcl,0x9a6168bdfa3182edl,0x2216a665104152bal,
-            0x46908d03b6926368l },
-          0 },
-        /* 17 << 72 */
-        { { 0x9b8be0247fcba850l,0x81eb5797820a181el,0xa0f2812230a01211l,
-            0x7e9cdc3cae7b8821l },
-          { 0x202332cc72ce15e7l,0xcd3cb2bbcb8238d7l,0xe4ab63dfc6e82c43l,
-            0x58bd00283183d717l },
-          0 },
-        /* 19 << 72 */
-        { { 0x02d57b7e717ed7b5l,0xd22e5b244dbce1a2l,0x174bd7712a4cdcf5l,
-            0xa6fdb801408205bbl },
-          { 0x67b4b0695e1387e9l,0x332b19a10591a442l,0x24edd916ccacf366l,
-            0xbe34cc4534958a50l },
-          0 },
-        /* 21 << 72 */
-        { { 0xa3f46e1e3e66d391l,0xb4a732cd7d6369b2l,0x99c3b85d402c1022l,
-            0x7dccfcbe2b54932el },
-          { 0xa6ddaa7b56b1dfe2l,0x31dc78a5e34a82c9l,0x8abeb3da704f3941l,
-            0xdf11a36cca55fa98l },
-          0 },
-        /* 23 << 72 */
-        { { 0x6c01f77a16e00c1bl,0x82515490839eaaacl,0x62f3a4ef3470d334l,
-            0x5a29a6491c1dcd6cl },
-          { 0x46b6782ece997a25l,0x9978fb35d3579953l,0x98f5a9df0960e0cel,
-            0x547dc8391f527a4cl },
-          0 },
-        /* 25 << 72 */
-        { { 0x395b15835d9dc24fl,0xa4256932c73ae680l,0x0542960efaa2c8e9l,
-            0x2bb3adee71068c6al },
-          { 0xa706099b570b4554l,0x85d12bb5f4e278d6l,0xd78af6f664296843l,
-            0xc7d3b3888428c633l },
-          0 },
-        /* 27 << 72 */
-        { { 0x34d44f9343b7e597l,0xdde440a7c2530f42l,0x7270a0817856bdb9l,
-            0x86a945eb5353032fl },
-          { 0x6c2f8e9966d39810l,0x0642a31b9b8b4b6bl,0x51679e62d1509d82l,
-            0x0120001c90f8ff16l },
-          0 },
-        /* 28 << 72 */
-        { { 0x50a1c1062e36e34al,0x74e8f58ce024ed1al,0x3f0f1dfa1300d726l,
-            0x6680df267b4a2d18l },
-          { 0x12b5979d8235b3b7l,0x1d2fafcb8a611493l,0x73ebda968848ece5l,
-            0xe996c275a413e399l },
-          0 },
-        /* 29 << 72 */
-        { { 0x46b7d7c7495ff000l,0xe60ed097baed95d1l,0xaa8804ac6e38f9c0l,
-            0x92990c0645c6f9bbl },
-          { 0xcae6a439c0919851l,0x713dff151bf5e1f2l,0x5d262c302eb38cdbl,
-            0xb73d505190df31dfl },
-          0 },
-        /* 31 << 72 */
-        { { 0x921e7b1c32d9268cl,0x34db2b964276fad4l,0x0ec56d34cc44e730l,
-            0x59be3a46096545b7l },
-          { 0xe9fdbc9766cf3a6al,0x7b2f83edd04e9b53l,0x6d99b3cc8fbae3e7l,
-            0x8eb5646c7ada3a40l },
-          0 },
-        /* 33 << 72 */
-        { { 0xa69ab906fc3302bfl,0x49ae6ba7d0872e90l,0xc9e2d6d1f3a1bfc3l,
-            0x11dfe85f1a033500l },
-          { 0x45189c2998666dbdl,0xba6aab88bbfd13cel,0xcf9c8b43dbd38cd4l,
-            0xa0cb581b68009236l },
-          0 },
-        /* 34 << 72 */
-        { { 0xff18c42a16288a7al,0x6363ace430699163l,0x8546d6332a2ce353l,
-            0x5e0379ef7b6b3418l },
-          { 0x2df2bb463e941bb2l,0xae7c091888e1aacel,0x6bc0982d83f5a37al,
-            0x8521bd02676d09e0l },
-          0 },
-        /* 35 << 72 */
-        { { 0x6531dff33d361aacl,0x59b954477c8cac2el,0xcc104df6c5cb7363l,
-            0x68b571c519364acdl },
-          { 0x7521e962979c3bc0l,0xbe0544c9c4aa1f92l,0x59127fe92a31eabbl,
-            0x760ac28593d8b55bl },
-          0 },
-        /* 36 << 72 */
-        { { 0x62ed534c6115164bl,0xaebe9e4cdce84ceal,0xd81c91a1c83f64c3l,
-            0x325a8ca8ecacd09al },
-          { 0x7ea57ad968b45df1l,0xa555636fd530c5d2l,0x23aff510591cfe32l,
-            0x46ff147637bedab9l },
-          0 },
-        /* 37 << 72 */
-        { { 0xa5a7e81ecb2edb3bl,0x9b0dc5f4f8fbe238l,0xc6f258087c66dd34l,
-            0xb4a57503a3f8f38al },
-          { 0x195b433513571b5bl,0xa32840763ccbc30bl,0x64ae1ffccf99ddd5l,
-            0x0dfc8772aa844e76l },
-          0 },
-        /* 39 << 72 */
-        { { 0x8b471afbfb22341dl,0xbf448b43397afdd2l,0x4cb08409682c37edl,
-            0xc3acfae6a948f1f6l },
-          { 0xf58462549e634707l,0x50161a78bd949f52l,0xf0529e752fe73566l,
-            0xe7e3fdef6fda53e0l },
-          0 },
-        /* 40 << 72 */
-        { { 0x56dab1c8321a518cl,0xfd4439a68bce226fl,0xe0b30d194facb9fal,
-            0xb5052f307583571bl },
-          { 0x1442641012afd476l,0xd02e417203fe624al,0xfc394f65531c92e6l,
-            0x16d4bf5ad4bc0b52l },
-          0 },
-        /* 41 << 72 */
-        { { 0xa38ac25eb4ec4f0fl,0x5399c024de72b27dl,0x08318aafd81a3d65l,
-            0x1af227a70c20e5d9l },
-          { 0x6389cc9a26c54e25l,0x438298bba47dc27fl,0x75386cca1a63fa0el,
-            0xc941e84cdf7bc1b0l },
-          0 },
-        /* 43 << 72 */
-        { { 0x81cad748fdfe3faal,0x752107b453ff1988l,0x8d8bb7001a8fd829l,
-            0x69838e15ca821d8el },
-          { 0x24371ede3b9f6b34l,0x19b4bb24d91e1495l,0x90899ca1e598ded1l,
-            0xbbb78b167c14e9e3l },
-          0 },
-        /* 44 << 72 */
-        { { 0xa577e84cbef239aal,0x656d2b6f8904b4d4l,0x2f6defe6ca4007edl,
-            0xca6e517737770796l },
-          { 0x4c62fcba298b6448l,0x046849660f62e00dl,0x806c2f0390b07d82l,
-            0x730855795e8d1e60l },
-          0 },
-        /* 45 << 72 */
-        { { 0x24488802f4703b78l,0x6c9323bee9eaa1e0l,0x242990e2aa94c170l,
-            0x3292bc42a15b5886l },
-          { 0x60ccb5bc908af203l,0x8fd63583713b09bdl,0x40791ecad693fa28l,
-            0xea80abf2941af8a1l },
-          0 },
-        /* 46 << 72 */
-        { { 0xf9c0315071145fe3l,0x80a71b55d7873a7dl,0xd134244b5e10bac7l,
-            0x303f7e12ded3a4b4l },
-          { 0x58e6f17e803b7a3bl,0xcd6f64130b1ca6b4l,0x25e744ce2ce65aa2l,
-            0xf2bbc66b952efa51l },
-          0 },
-        /* 47 << 72 */
-        { { 0xc8b212e75913e1f3l,0xf018ab208d416886l,0x28249e15b617cac4l,
-            0x837fcba1693ed09al },
-          { 0x9c457e511c15a1bcl,0x9354758756c7f3f1l,0x1afd80348be18306l,
-            0xa43d56982256ab14l },
-          0 },
-        /* 48 << 72 */
-        { { 0xce06b88210395755l,0x117ce6345ec1df80l,0xfefae513eff55e96l,
-            0xcf36cba6fd7fed1el },
-          { 0x7340eca9a40ebf88l,0xe6ec1bcfb3d37e12l,0xca51b64e86bbf9ffl,
-            0x4e0dbb588b40e05el },
-          0 },
-        /* 49 << 72 */
-        { { 0xf9c063f62f2be34bl,0x9ca32fa99c20f16bl,0xe02e350d0125a01al,
-            0x62d66c54e6516c25l },
-          { 0x21b154ad5120bedbl,0xb1077f4e8d6ff9d8l,0xd01a46c300bb4941l,
-            0x9d381847d1460588l },
-          0 },
-        /* 51 << 72 */
-        { { 0xf3a9b311581cb57bl,0x65fb3fb649727d13l,0xb8496e3d35131142l,
-            0xf7642f554d0cdab9l },
-          { 0xe2f66f0e9f6d7e45l,0xbae14cedaa22fcd4l,0x1f769f0e49b2e05al,
-            0x08c4d7784ac5191el },
-          0 },
-        /* 52 << 72 */
-        { { 0x86f9108ece4aa825l,0xbe5b2f317e5a5fbfl,0x2772c1b49254bb78l,
-            0xae6cdf5f4ff8ac5cl },
-          { 0x106cd94bf6b7a12el,0xbe0915d6d1c7a1a5l,0x8bf6bc8d3b40ac5el,
-            0xbb89180423ee3acal },
-          0 },
-        /* 53 << 72 */
-        { { 0x76f15eaa618b5ea1l,0xec1ea62e6d4ad0c8l,0x301b60c8168d57fal,
-            0x454d5f771edbfb05l },
-          { 0xea888e29a936031al,0x01303d3f0174dd17l,0x8b5e06b4244254e7l,
-            0x00ebf03509724acfl },
-          0 },
-        /* 55 << 72 */
-        { { 0x66ce3ded8e66d509l,0x368e38d05a488586l,0x7b9ae220c7eedf5el,
-            0x67e9ea52bfbf9d62l },
-          { 0xe9cbf53d99b7ecb3l,0xfde3e8c0908bf072l,0x288400ab1107e21fl,
-            0x24c8856256532667l },
-          0 },
-        /* 57 << 72 */
-        { { 0x0d5f9955ca9d3ad1l,0x545feba13a1daec0l,0xd22972016cb30f23l,
-            0x9660175ccef6cf6el },
-          { 0xbf3e341a395738dcl,0x74a5efbc80f7cca4l,0xc4f9a07bbebc6a60l,
-            0x2f1e3dad4b1f915al },
-          0 },
-        /* 59 << 72 */
-        { { 0xada4423f0d5e2e34l,0x2d31f4920b372358l,0xd7f469370e2d6a8cl,
-            0xf5e7ccfe0028e4ael },
-          { 0x20fcb1f3928854b2l,0x2a8973c507271bf6l,0xe87de33e5fa88fe1l,
-            0xe9af2dce7bd3c2a6l },
-          0 },
-        /* 60 << 72 */
-        { { 0x185a19d959d097b2l,0xb1c72a3a0dea2875l,0x3b371628f9021f08l,
-            0x45f1255bfa9d6ac1l },
-          { 0x9ff36a90cfd72c0dl,0x8c7315db24fe2376l,0x9aebcde04b34d42cl,
-            0x2129ab16923025f3l },
-          0 },
-        /* 61 << 72 */
-        { { 0x341b9dd714b4cf50l,0x7c6e4634d619d00el,0x571d6e2fdf2165ael,
-            0xdedf9cd18dbe9db5l },
-          { 0x52a152777c5f3dc3l,0x7d27c97ef2901cf7l,0x5e098b54d02a85dfl,
-            0x6fce3e13088e3640l },
-          0 },
-        /* 63 << 72 */
-        { { 0xfa95be147a939904l,0xdfcf5b9bb56365ccl,0xdbb546bdd2d66922l,
-            0xf26a8b9cda03ca7fl },
-          { 0x96a8042d16821c0cl,0xe6729970e88ede60l,0xd028130d1285e303l,
-            0x1678b01688b7de75l },
-          0 },
-        /* 64 << 72 */
-        { { 0x96649933aed1d1f7l,0x566eaff350563090l,0x345057f0ad2e39cfl,
-            0x148ff65b1f832124l },
-          { 0x042e89d4cf94cf0dl,0x319bec84520c58b3l,0x2a2676265361aa0dl,
-            0xc86fa3028fbc87adl },
-          0 },
-        /* 65 << 72 */
-        { { 0x5db4884124627d04l,0xf92740766f7e3febl,0xd09eb11773496240l,
-            0xd48e51419a6b9ec9l },
-          { 0xcbb2ac97b7336e27l,0xe794fb760640bf6cl,0xc0b7f78dc7c7fa3fl,
-            0x1355d071fd2edbb9l },
-          0 },
-        /* 71 << 72 */
-        { { 0x575d9724e84e25a3l,0x068690a13d4d8708l,0x8a7b1c6c54dd62d0l,
-            0x8c45e1b37f88e231l },
-          { 0x38c665466d85afe2l,0x65231642e1d69f1bl,0xb71c53a090687ec1l,
-            0xdf8469d777fb5981l },
-          0 },
-        /* 77 << 72 */
-        { { 0xb920b503144fe6bcl,0x54b0f0593914c130l,0x63188d5a8269b650l,
-            0x8d7780962fc7064dl },
-          { 0xbf7b0eec5e50839al,0xaf8a7ddbe242cd06l,0x93df850809cecdb9l,
-            0x4db58a72410659e9l },
-          0 },
-        /* 83 << 72 */
-        { { 0x460d9b383baba3cdl,0x52386e4d2cf860b8l,0xd224fe5da3924b9al,
-            0xe4a4be7bcf14d813l },
-          { 0xb0759e82ed3774fdl,0x57c064b38d9b6c59l,0x301ab902aee183d0l,
-            0xf1c873495ba207c3l },
-          0 },
-        /* 89 << 72 */
-        { { 0xe8245b0a6dd58696l,0x0714eedb61091043l,0x7d9874459101129bl,
-            0x4a7f1f03a0b27a21l },
-          { 0x282e5cff71ee2045l,0x25c694a3da5c6b41l,0xb3d8e21f5542ca55l,
-            0x57d64170e3601af0l },
-          0 },
-        /* 95 << 72 */
-        { { 0x9c8e86c6c6c4fee6l,0x70194db5a596119bl,0xfc6271d30e06050cl,
-            0x17d94c89b15f18d2l },
-          { 0x76c9e9bd49817224l,0x42621638b989c5bcl,0x1e9c4cbeb769d70cl,
-            0x85e227c3b87f2783l },
-          0 },
-        /* 101 << 72 */
-        { { 0x146185d2117e73c5l,0xbf6214696dc38116l,0x9af9d9b5459e72cbl,
-            0x7512882fb3930b85l },
-          { 0xfe935379d36583b8l,0xb83ad35e7c7fdcdel,0x093ca0ab2658ae4bl,
-            0xc9b16d60a756681bl },
-          0 },
-        /* 107 << 72 */
-        { { 0x12c24d9195d3519bl,0x1fc6db1bdb43fd06l,0x1ae49fed25bbde51l,
-            0x27072e0b76d2827bl },
-          { 0xdcb92e05aeb8c47fl,0x601d414056145f67l,0xcb7002652a39e8f7l,
-            0x6ce9facc35620d8cl },
-          0 },
-        /* 113 << 72 */
-        { { 0x5c428a5ebd702c22l,0xcb6863291616129dl,0xe6278994eabcb9a1l,
-            0xb409a10b9327e540l },
-          { 0x6899f7cb66cf96aal,0xa9225f051c64b545l,0x00c5522ee3feec21l,
-            0x35503728e083315cl },
-          0 },
-        /* 116 << 72 */
-        { { 0x1916d88cf1600077l,0x1ac9c238e3a58b2bl,0x3080df8535f3508dl,
-            0x86cc18712744912bl },
-          { 0x56aec9d5ccd15044l,0x8dd9061a5db0ab17l,0x84d6bc4e2c84171dl,
-            0xd569c7d70989a5bdl },
-          0 },
-        /* 119 << 72 */
-        { { 0x24446b2702af35abl,0x071710478eea4565l,0xba4989db728306e6l,
-            0x2cd692a85954a558l },
-          { 0x644e02763576b32el,0x7efdb65c1f9fe65dl,0x04b2828e8796c048l,
-            0xcfd22481187b979bl },
-          0 },
-        /* 125 << 72 */
-        { { 0xa10d104084ea9701l,0x27dd0dcb415e187dl,0xf667c5e939bfe45cl,
-            0x3995e4ae55b67506l },
-          { 0xb25117d9b5a14801l,0xeee58525fe142e92l,0x100b856a6dbae9f1l,
-            0xada7057629586658l },
-          0 },
-    },
-    {
-        /* 0 << 80 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 80 */
-        { { 0xe4050f1cf1c367cal,0x9bc85a9bc90fbc7dl,0xa373c4a2e1a11032l,
-            0xb64232b7ad0393a9l },
-          { 0xf5577eb0167dad29l,0x1604f30194b78ab2l,0x0baa94afe829348bl,
-            0x77fbd8dd41654342l },
-          0 },
-        /* 3 << 80 */
-        { { 0xa2f7932c68af43eel,0x5502468e703d00bdl,0xe5dc978f2fb061f5l,
-            0xc9a1904a28c815adl },
-          { 0xd3af538d470c56a4l,0x159abc5f193d8cedl,0x2a37245f20108ef3l,
-            0xfa17081e223f7178l },
-          0 },
-        /* 4 << 80 */
-        { { 0x1fe2a9b2b4b4b67cl,0xc1d10df0e8020604l,0x9d64abfcbc8058d8l,
-            0x8943b9b2712a0fbbl },
-          { 0x90eed9143b3def04l,0x85ab3aa24ce775ffl,0x605fd4ca7bbc9040l,
-            0x8b34a564e2c75dfbl },
-          0 },
-        /* 5 << 80 */
-        { { 0x5c18acf88e2f7d90l,0xfdbf33d777be32cdl,0x0a085cd7d2eb5ee9l,
-            0x2d702cfbb3201115l },
-          { 0xb6e0ebdb85c88ce8l,0x23a3ce3c1e01d617l,0x3041618e567333acl,
-            0x9dd0fd8f157edb6bl },
-          0 },
-        /* 7 << 80 */
-        { { 0x516ff3a36fa6110cl,0x74fb1eb1fb93561fl,0x6c0c90478457522bl,
-            0xcfd321046bb8bdc6l },
-          { 0x2d6884a2cc80ad57l,0x7c27fc3586a9b637l,0x3461baedadf4e8cdl,
-            0x1d56251a617242f0l },
-          0 },
-        /* 9 << 80 */
-        { { 0x892c81a321175ec1l,0x9159a505ee018109l,0xc70130532d8be316l,
-            0x76060c21426fa2e5l },
-          { 0x074d2dfc6b6f0f22l,0x9725fc64ca01a671l,0x3f6679b92770bd8el,
-            0x8fe6604fd7c9b3fel },
-          0 },
-        /* 10 << 80 */
-        { { 0xce711154b6e00a84l,0xd9fe7e4224890e60l,0xd10bc6c34560988fl,
-            0xbdc2ef526859b004l },
-          { 0xdcf0d868d5c890eel,0x893115e6119c47dcl,0xe97966fbee714567l,
-            0x117813355c85aa53l },
-          0 },
-        /* 11 << 80 */
-        { { 0x71d530cc73204349l,0xc9df473d94a0679cl,0xc572f0014261e031l,
-            0x9786b71f22f135fel },
-          { 0xed6505fa6b64e56fl,0xe2fb48e905219c46l,0x0dbec45bedf53d71l,
-            0xd7d782f2c589f406l },
-          0 },
-        /* 13 << 80 */
-        { { 0x06513c8a446cd7f4l,0x158c423b906d52a6l,0x71503261c423866cl,
-            0x4b96f57093c148eel },
-          { 0x5daf9cc7239a8523l,0x611b597695ac4b8bl,0xde3981db724bf7f6l,
-            0x7e7d0f7867afc443l },
-          0 },
-        /* 15 << 80 */
-        { { 0x3d1ab80c8ce59954l,0x742c5a9478222ac0l,0x3ddacbf894f878ddl,
-            0xfc085117e7d54a99l },
-          { 0xfb0f1dfa21e38ec2l,0x1c7b59cb16f4ff7fl,0x988752397ea888fel,
-            0x705d270cb10dc889l },
-          0 },
-        /* 16 << 80 */
-        { { 0xe5aa692a87dec0e1l,0x010ded8df7b39d00l,0x7b1b80c854cfa0b5l,
-            0x66beb876a0f8ea28l },
-          { 0x50d7f5313476cd0el,0xa63d0e65b08d3949l,0x1a09eea953479fc6l,
-            0x82ae9891f499e742l },
-          0 },
-        /* 17 << 80 */
-        { { 0xd7c89ba1e7d1cefdl,0xcb33553a9a91e03dl,0xa01caaff59f01e54l,
-            0x4a71c141de07def7l },
-          { 0xe1616a4034d467d1l,0x6f395ab2e8ba8817l,0xf781ea64e45869abl,
-            0x8b9513bb7134f484l },
-          0 },
-        /* 19 << 80 */
-        { { 0x0b0ec9035948c135l,0xaee219539a990127l,0x9d15ba0eb185dda1l,
-            0xd87bc2fb2c7d6802l },
-          { 0x05a480307a82d7f8l,0x7b591ce4e7e11ec3l,0x14d4cc22a0e15fdbl,
-            0xf2d4213576def955l },
-          0 },
-        /* 21 << 80 */
-        { { 0xd56d69e4117a5f59l,0xcae6008a01286e97l,0x716a0a282dab13b0l,
-            0xc821da99b3a8d2d0l },
-          { 0x6898b66239c305e6l,0xe42d3394c8b61142l,0x54c1d2b253b16712l,
-            0x3cec3953a01f4be6l },
-          0 },
-        /* 23 << 80 */
-        { { 0x5bd1e3036951b85el,0x1a73f1fb164d79a4l,0x6e77abd39fb22bc3l,
-            0x8ae4c181b3d18dfdl },
-          { 0xdd4226f5a6a14ed1l,0x620e111feb4e1d92l,0xffce6e59edca4fe8l,
-            0x39f5fc053d0a717dl },
-          0 },
-        /* 25 << 80 */
-        { { 0xef8fa78cd91aff44l,0x6f3f9749bdc03be7l,0x171545f8b8596075l,
-            0xbe31a73e2af132cel },
-          { 0x5b4e174123884e1dl,0x4373357ea9fa75f0l,0x8dba2731bc06f49el,
-            0xa09aebc877fa6de8l },
-          0 },
-        /* 27 << 80 */
-        { { 0xd4974e518293e18cl,0x1e4cfc5331ec0e8fl,0x80b4258325d40b1el,
-            0x5cfb73a2a85f7588l },
-          { 0xe553efd204c0e00bl,0xdaa6750e9a48ac39l,0xf20936b00abda06al,
-            0xbfd3c7e4bf85771cl },
-          0 },
-        /* 28 << 80 */
-        { { 0x72669c3c7292495cl,0xa627e2dd82786572l,0xbdbfce5cd39c3e3dl,
-            0xba6164927feed3d6l },
-          { 0x4eb5f513e77b7318l,0x133f2e834337c2e0l,0xdea20f07f408bec6l,
-            0x848a8396e3c87655l },
-          0 },
-        /* 29 << 80 */
-        { { 0x3086643551138f2bl,0x1176d8e6108a36bal,0xd78b3b400d4d4b66l,
-            0x99ddd9bd956dbff1l },
-          { 0x91dfe72822f08e5fl,0x7fd8cfe6a081ac4el,0x8ebb278ed75285c2l,
-            0x2335fe00ef457ac0l },
-          0 },
-        /* 31 << 80 */
-        { { 0xe9d79c50f058191al,0x6749c3b05d3183f8l,0x5edc2708dbfeb1ecl,
-            0x2c18f93621275986l },
-          { 0x3a093e1f0703389fl,0xdf065e4a3ef60f44l,0x6860e4df87e7c458l,
-            0xdb22d96e8bfe4c7dl },
-          0 },
-        /* 33 << 80 */
-        { { 0xb7193811b48dad42l,0x23b9dca320ad0f0cl,0x55511ffb54efb61bl,
-            0xac8ed94626f9ce42l },
-          { 0xa42b4bc73fc4cbd9l,0x2a4670905c6f8e39l,0xb50040f87eb592del,
-            0x6633f81bdc2541f3l },
-          0 },
-        /* 34 << 80 */
-        { { 0xc104e02ed2d6d9c2l,0xa4876e870302517al,0x0263c9b2912f5005l,
-            0x902f364a3d89d268l },
-          { 0x76070565bb20a5a8l,0xa3a8977452109e98l,0x51fbffec463aa476l,
-            0xfa8519625daa1503l },
-          0 },
-        /* 35 << 80 */
-        { { 0xe449dd8f82a9a4f3l,0xa1a2f405797e6b36l,0x76913537787785e8l,
-            0x0315a3cfe064481el },
-          { 0xc02291ee83df11e2l,0x5b59a0e9bcd178f0l,0xd5e8d10ce6b4c63al,
-            0x9eee599f3fc60a82l },
-          0 },
-        /* 36 << 80 */
-        { { 0x051e589759621468l,0xb92c06327293621el,0xee17ea647762e4f2l,
-            0x412107a771abd28cl },
-          { 0xa083d87bf02d65ebl,0xbd4a3f165594395el,0x1d5694337c8882f3l,
-            0xc5eb10c55f9c63cfl },
-          0 },
-        /* 37 << 80 */
-        { { 0x4b196728c8e62c4el,0x03dbd04cb74a757cl,0xe960a65b8520f044l,
-            0x9eda0f33f7937337l },
-          { 0x06ff0b86b6dc7dfbl,0x3bd276c11fc1ac35l,0x0e67055b1b255c27l,
-            0xe43ae552eff899f8l },
-          0 },
-        /* 39 << 80 */
-        { { 0xc64c914d3b156d76l,0x784c1f61d794345dl,0xcda0c77c365d7a50l,
-            0xcc5a1e205b32dbd0l },
-          { 0x2f4e78bff90b6ac0l,0xbead62f9a2d4862dl,0xa8f67e7dcc346b53l,
-            0xa38d7ae947e59dbdl },
-          0 },
-        /* 40 << 80 */
-        { { 0x7dc1605d480aca4dl,0x08c37750ef263aabl,0xd5c6b7c93f166725l,
-            0xf99982f30ff2853bl },
-          { 0xc61b9583a8ecb64al,0x041211a91b771741l,0x50ba64154e156f97l,
-            0xb6595ea871b8954el },
-          0 },
-        /* 41 << 80 */
-        { { 0x4ae760845eb3b4eel,0xcafefdc6c62ed274l,0x4eabeacf113f790bl,
-            0x10c2cc88a5ff64c9l },
-          { 0xe7b59f8a49965d80l,0xd04884b50df07712l,0x6316ac5ba5f7bab1l,
-            0x388111d99e78a075l },
-          0 },
-        /* 43 << 80 */
-        { { 0x8d437128f24804efl,0x12a687dd7b71dd53l,0x8b8f71d96139a60el,
-            0xb047fed42a095ec7l },
-          { 0xef238041fba59ee8l,0x61b17fac64045514l,0x45b1cf4857afa184l,
-            0x8592c50a4bff5fc5l },
-          0 },
-        /* 44 << 80 */
-        { { 0x2830592394b745dcl,0x53e9ec16b09cb993l,0x59d0b57f9a134ed1l,
-            0x89d7b439c56ee0ebl },
-          { 0xc3656539991e22a2l,0xd27a89372a345043l,0x55dd5341064038eel,
-            0xc9ee3f0348cb42efl },
-          0 },
-        /* 45 << 80 */
-        { { 0x08518c631d56c1cbl,0x5650f79f31235521l,0x33fc08d648911017l,
-            0xbb8b58538a0a33c8l },
-          { 0xb54554f2f869a62al,0x67f8cf48222457e5l,0x46e13911f276cc0dl,
-            0x4b3a2ad6943b389el },
-          0 },
-        /* 46 << 80 */
-        { { 0x0e72b816b11a4c9dl,0x919b2738e9028fa4l,0xab80e1117698a5d6l,
-            0xcd7950f56cd49adal },
-          { 0x0db75c908dfb13a5l,0x2178578770f12cebl,0xfab72d5243486ff6l,
-            0x66d55d726a0673ebl },
-          0 },
-        /* 47 << 80 */
-        { { 0xe98014b922667519l,0x7fcab2b3a95da9c0l,0x9bdbccd8438d5060l,
-            0xa72fff5455a726b6l },
-          { 0x7ae032943a5e769bl,0xf7291e9b559a0734l,0x18ae4f182ce18eeel,
-            0x88e49f7328b7b4f0l },
-          0 },
-        /* 48 << 80 */
-        { { 0x90fe7a1d214aeb18l,0x1506af3c741432f7l,0xbb5565f9e591a0c4l,
-            0x10d41a77b44f1bc3l },
-          { 0xa09d65e4a84bde96l,0x42f060d8f20a6a1cl,0x652a3bfdf27f9ce7l,
-            0xb6bdb65c3b3d739fl },
-          0 },
-        /* 49 << 80 */
-        { { 0xc6a2923e60ef9d87l,0xac66cdd8c3a64f1cl,0x069292d26e0bb0ccl,
-            0x9e491414451e52a0l },
-          { 0x2e76cedf0e0d35b3l,0x311b7ae9af682b84l,0xaa1017a02f90b176l,
-            0xac0b43a794feb6e8l },
-          0 },
-        /* 51 << 80 */
-        { { 0x7ddb42f9214e82f5l,0x91c88566f67269d7l,0x1763ed8cdd0ff422l,
-            0x045dd690ad284ddfl },
-          { 0x5713bbb141e48fe7l,0xdc5bef28f8eb580fl,0x4bd0b288ed2992c2l,
-            0x436587faaf5ef2b3l },
-          0 },
-        /* 52 << 80 */
-        { { 0xbbc1a48d6e5822c4l,0x16c3135daacebd02l,0xd0c6c543b56157dfl,
-            0xae249a0ef49f44a1l },
-          { 0x1f2c23ce72c47341l,0x8f52dc2a25974313l,0x2c99bc0a958e0e6bl,
-            0xe57eab6b950cd492l },
-          0 },
-        /* 53 << 80 */
-        { { 0xea66db638934efc0l,0x7bfe479193c6f7c7l,0x78438d535ef90d99l,
-            0xe63b87c9c665736dl },
-          { 0x6de32d82db49e1bbl,0xbfa877dcd0ad1648l,0xdb2e85de1197806dl,
-            0x74e9dbd3cfee7854l },
-          0 },
-        /* 55 << 80 */
-        { { 0xd2c26e2edb6d7e0al,0x9103119a531009cdl,0xb5dc49869a8b9d54l,
-            0x4781b83bb408b427l },
-          { 0x70d98b2ccb4ba2f7l,0x112ed5d7fa8a36b8l,0x97257bc6fdde1675l,
-            0xd2a9c711db211cb7l },
-          0 },
-        /* 57 << 80 */
-        { { 0xe4aa6a06ee79fe8cl,0x06e210233dff8a54l,0x63e11ac5bf50731al,
-            0xb8b9944f544125b8l },
-          { 0xcba92c41d359aeb0l,0xd201c893249bca36l,0xfe79bd77cb501216l,
-            0x694b21488d525ba4l },
-          0 },
-        /* 59 << 80 */
-        { { 0x60c90e11ee3dde2al,0x7df08e17bb36c4a2l,0xb6c3210dcc5b3c17l,
-            0xa814180955cec91cl },
-          { 0xf4ecbc05a8193dffl,0xf43cdef8da5744fal,0x4895a6c6f12f8a2el,
-            0x44282692eb7b910al },
-          0 },
-        /* 60 << 80 */
-        { { 0x1a405e1886d6e13al,0x6a18c91827a7c67cl,0xc34877ebe127bfd7l,
-            0x3c9fab08c098e692l },
-          { 0xfe2dc65bc2066586l,0xb107603a8f68a0a9l,0x74ef0ef8127cd340l,
-            0xfe577b5b86788d87l },
-          0 },
-        /* 61 << 80 */
-        { { 0xdc7ff83c71234c81l,0xee48d9c6d868c82fl,0xb80bac5e37e4f365l,
-            0x2bfbe94efcb951c2l },
-          { 0x55829049a374d0b0l,0x2a502cada87a5fb4l,0x0742ac9d9ee840bal,
-            0x7689bf53eecd05b1l },
-          0 },
-        /* 63 << 80 */
-        { { 0x0e7f459320059c22l,0x47c273e0e49368a2l,0x5ccb960ac6946ee2l,
-            0xd8209ec48b3271b6l },
-          { 0x7fd5142cdfb9e947l,0x46a89c83ff737ab1l,0xa45f6b0282d875ecl,
-            0x19a16e0e34c296d6l },
-          0 },
-        /* 64 << 80 */
-        { { 0xeb5ddcb6ec7fae9fl,0x995f2714efb66e5al,0xdee95d8e69445d52l,
-            0x1b6c2d4609e27620l },
-          { 0x32621c318129d716l,0xb03909f10958c1aal,0x8c468ef91af4af63l,
-            0x162c429ffba5cdf6l },
-          0 },
-        /* 65 << 80 */
-        { { 0x65c93be33607927bl,0x86feaaecdae5411dl,0x4a1686c6dd2e2c3dl,
-            0xf78200068acdf51dl },
-          { 0xf82c4d0239ed3e50l,0x5ac04047b4c3a4a4l,0xbdd14d7ec34b07a7l,
-            0x9911d7027cc12db5l },
-          0 },
-        /* 71 << 80 */
-        { { 0x4ed5dbbd1751abc9l,0xaf374229a23cc54al,0x9b5fa66ea4ed3f9al,
-            0xc56dd9613d380643l },
-          { 0x7d77897144b38021l,0xdf4712d0d3584508l,0x0018e2eecd7ab168l,
-            0xc8a3a166293d29a7l },
-          0 },
-        /* 77 << 80 */
-        { { 0x34681bdb3a5a0214l,0xe188d6f1f718797el,0xaa751de7db761c5fl,
-            0x347c50324959a5cel },
-          { 0x108705fc338be49cl,0x1dc5eada95abf7a8l,0xb863808f0fc3f0b7l,
-            0x529c27c1a05c4d43l },
-          0 },
-        /* 83 << 80 */
-        { { 0xa75f90677f699f79l,0xd01cf9c866356f99l,0xf90f9b73fdfbaae7l,
-            0xe0b5f4412c304d2fl },
-          { 0x17cbfb11807f3f57l,0xe902d542af8a9eb4l,0x3335285461f89b4al,
-            0x3a51c54d3628c0ael },
-          0 },
-        /* 89 << 80 */
-        { { 0xae5fd487c704212dl,0x82dd07a565e2e32cl,0x46d4c9646c19c199l,
-            0xe7f428593778eedcl },
-          { 0x084a4e9b6dcc5ec9l,0x757e04ba2d0538b7l,0x4ec0a573a3fba4cdl,
-            0x2432a4e5c627c2fcl },
-          0 },
-        /* 95 << 80 */
-        { { 0xfde00b3094c8a424l,0x20a57d8cd224c232l,0xd6ace1a170019992l,
-            0x1a648d40697e67a3l },
-          { 0xed1fb10691338d84l,0x828004a08372bfc8l,0xb93030fefad3bfedl,
-            0x883dea23f27369ecl },
-          0 },
-        /* 101 << 80 */
-        { { 0xfbbf36a62a710d73l,0x8db834024b3cc6bbl,0xa60c47cf16d7b1fcl,
-            0xf9778fa6cd16ce8fl },
-          { 0xd77023086d14a1a6l,0x01f139cb06e8247cl,0xd89af2979770b9c1l,
-            0x94bf1ca97d9fb550l },
-          0 },
-        /* 107 << 80 */
-        { { 0xe17e2e6dc2d45f34l,0x5969d8ee26efc6cbl,0x6f175231b9219cfbl,
-            0x027f333c189f1175l },
-          { 0x5bc60fad54f6da49l,0xc52e09af8ae5c3f3l,0x6c0e3927ed07f46dl,
-            0xbfd9e598f39cf16bl },
-          0 },
-        /* 113 << 80 */
-        { { 0x9dffd95b090aefb9l,0x26db7b73637224fel,0xb78a679e92e2aa0cl,
-            0xfc7c824ffc8f895dl },
-          { 0xdc8287e8e636b3a8l,0x6b3ccc0f28b7a639l,0x38e6e2cc653de56al,
-            0x998cf6985392c3cal },
-          0 },
-        /* 116 << 80 */
-        { { 0xe68de79e57f0d6fal,0xe707b252ff9c06f7l,0x5613698a4a061697l,
-            0xd83d6453b5390352l },
-          { 0x59b007599867c708l,0xcfe24fd7b41ea7adl,0x4692abf3da5b7de6l,
-            0xd99a6f3bf0c54e8fl },
-          0 },
-        /* 119 << 80 */
-        { { 0xe8ee870dea4addc3l,0x0d1fb29559841f3el,0xdc05b5581dba2f14l,
-            0xb8bf38324e3f4600l },
-          { 0x1a909e66fd57c48al,0xb65ca4c24e2d76dfl,0x0b27755ae7c60d89l,
-            0x9fcfa75acb9003f6l },
-          0 },
-        /* 125 << 80 */
-        { { 0xbbbdf4c49e5325aal,0x6879fe11d0d1f281l,0x7a400f890633002el,
-            0xc3633c779bb79ac9l },
-          { 0x15a4cfae93ab9bc3l,0x379bbdea42594603l,0x7c61dfa257d2af3fl,
-            0x20190537b51bfb62l },
-          0 },
-    },
-    {
-        /* 0 << 88 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 88 */
-        { { 0xa80d1db6f79588c0l,0xfa52fc69b55768ccl,0x0b4df1ae7f54438al,
-            0x0cadd1a7f9b46a4fl },
-          { 0xb40ea6b31803dd6fl,0x488e4fa555eaae35l,0x9f047d55382e4e16l,
-            0xc9b5b7e02f6e0c98l },
-          0 },
-        /* 3 << 88 */
-        { { 0x4b7d0e0683a7337bl,0x1e3416d4ffecf249l,0x24840eff66a2b71fl,
-            0xd0d9a50ab37cc26dl },
-          { 0xe21981506fe28ef7l,0x3cc5ef1623324c7fl,0x220f3455769b5263l,
-            0xe2ade2f1a10bf475l },
-          0 },
-        /* 4 << 88 */
-        { { 0x9894344f3a29467al,0xde81e949c51eba6dl,0xdaea066ba5e5c2f2l,
-            0x3fc8a61408c8c7b3l },
-          { 0x7adff88f06d0de9fl,0xbbc11cf53b75ce0al,0x9fbb7accfbbc87d5l,
-            0xa1458e267badfde2l },
-          0 },
-        /* 5 << 88 */
-        { { 0x03b6c8c7dacddb7dl,0x92ed50047e1edcadl,0xa0e46c2f54080633l,
-            0xcd37663d46dec1cel },
-          { 0x396984c5f365b7ccl,0x294e3a2ae79bb95dl,0x9aa17d7727b1d3c1l,
-            0x3ffd3cfae49440f5l },
-          0 },
-        /* 7 << 88 */
-        { { 0x26679d11399f9cf3l,0x78e7a48e1e3c4394l,0x08722dea0d98daf1l,
-            0x37e7ed5880030ea3l },
-          { 0xf3731ad43c8aae72l,0x7878be95ac729695l,0x6a643affbbc28352l,
-            0xef8b801b78759b61l },
-          0 },
-        /* 9 << 88 */
-        { { 0xdcdd3709b63afe75l,0xad9d7f0b3f1af8ffl,0xdd6a8045194f4beel,
-            0x867724cc2f7d998cl },
-          { 0xd51d0aa5837751bel,0x21d6754a959a0658l,0xd2212611695f7e58l,
-            0xec4b93c2297363efl },
-          0 },
-        /* 10 << 88 */
-        { { 0x0ac1c5fab6ef26cfl,0xcd8ba0c5a39de8eel,0x11ba7537dd7796e0l,
-            0x1215933476d58d6dl },
-          { 0xf51eb76f529fda4cl,0x2fd9209ddedaa8a3l,0x555a675615efac65l,
-            0xb784c9ca7fd42fe9l },
-          0 },
-        /* 11 << 88 */
-        { { 0x8165ec11b9d1a70fl,0x01347efc384f6cael,0xe95c01a0ab7aeca9l,
-            0x459ba1c5c6c99530l },
-          { 0x38967a635cf3416bl,0x5c3761fd1e5457e2l,0x43e6077af03e9df6l,
-            0xb15d34628bd1c7f6l },
-          0 },
-        /* 13 << 88 */
-        { { 0xad87d3db35a75c49l,0xc69d800961af03c5l,0x31aef61a3a6a6c4cl,
-            0xb3292640aa10a993l },
-          { 0x959aae80aaee340fl,0xf900528e7f381a3bl,0x44ecf76e853691a3l,
-            0xa081663ce749e68el },
-          0 },
-        /* 15 << 88 */
-        { { 0x4f2782136283e34al,0x6f9fcf60fbfa315fl,0x224a2ab99b701364l,
-            0xb4b1b418f9fecadcl },
-          { 0xbf7280fe50ba1b9al,0x7e68259c33f36db9l,0x8ccb754e154c9fb0l,
-            0xf281adb1db2328f1l },
-          0 },
-        /* 16 << 88 */
-        { { 0xf92dda31be24319al,0x03f7d28be095a8e7l,0xa52fe84098782185l,
-            0x276ddafe29c24dbcl },
-          { 0x80cd54961d7a64ebl,0xe43608897f1dbe42l,0x2f81a8778438d2d5l,
-            0x7e4d52a885169036l },
-          0 },
-        /* 17 << 88 */
-        { { 0xc2a950ad2d6608bel,0xab415e2a51c3c2b6l,0xffbd2a65f5c803e7l,
-            0x3f81dc3eca908532l },
-          { 0x0ec47397c28c04f4l,0xf6c632e8153f58e8l,0xccac35f8efb4a6d8l,
-            0x22a1b677ee6d7407l },
-          0 },
-        /* 19 << 88 */
-        { { 0x276662435243c119l,0x79cb8580e707363el,0x5bf5ebf4d01682d6l,
-            0x8a980173762811e0l },
-          { 0xe2f2be1fc7547d77l,0x21a50fffb925fec6l,0x5e6cf2ef40115509l,
-            0xb69beae18faa0fc0l },
-          0 },
-        /* 21 << 88 */
-        { { 0xfa147da8cec36e75l,0xba184e5a42860484l,0xe8ec25df222fb1e6l,
-            0xce91dcb18ff8403cl },
-          { 0xf1b0e27ead7faa32l,0x097d881d42a3a205l,0xa8865dd43f8f56d4l,
-            0x624d7a451aef929dl },
-          0 },
-        /* 23 << 88 */
-        { { 0x3db0238ad01698e8l,0xbb7186dc00306082l,0x542f4377250f830el,
-            0x34b8a67dae438c50l },
-          { 0xada528a0858d8048l,0x561aa3336b57afc1l,0x8d9188e0fda35f7al,
-            0x5838d1211dcad0c5l },
-          0 },
-        /* 25 << 88 */
-        { { 0x4f97d1529f17511dl,0x8b9f012776fdb9ebl,0x53a0a72d4056e6a7l,
-            0x5ff937d64e262eeel },
-          { 0xaa64a8dc489fbe6dl,0xc19947dfea02bc69l,0x76f0bbb91492c9bel,
-            0xe53881098d89cd01l },
-          0 },
-        /* 27 << 88 */
-        { { 0x16083309456057b7l,0x2810c08040a331f6l,0x0561656c3c166929l,
-            0x16f0d8d6ed1c3999l },
-          { 0x37b6da7294697927l,0xd821c2cc23ca6c9cl,0x42ef1bdb8ca4351cl,
-            0x7ca32bad5edfa682l },
-          0 },
-        /* 28 << 88 */
-        { { 0xdc1de17d98119f10l,0x74353c5d488c36a6l,0x14aaf33a3d8e23dfl,
-            0x31e075c078baf593l },
-          { 0x0f7ca03a46d1ca3cl,0x99c5e3ac47b660c7l,0x70d0241388fe2e59l,
-            0x2e9a6be12a7ec005l },
-          0 },
-        /* 29 << 88 */
-        { { 0x4d1f087f184252b1l,0xfd3ace273f5b49c6l,0x6e874447bbb04da2l,
-            0x2347e3a1b3767ff0l },
-          { 0x990d4010f868966al,0x35320090dd658b5el,0x1105bfb974fe972al,
-            0x3961f7dc8e7ad2c6l },
-          0 },
-        /* 31 << 88 */
-        { { 0x100d8b54741e3286l,0x65d9108ef3abc7afl,0x172b450620ef8fbcl,
-            0x11bd7db2d81b8a2el },
-          { 0xf89210e1e8e41de5l,0x910613f3d98a868bl,0xbfc85241849aa909l,
-            0x68a43e21c7d3a7cal },
-          0 },
-        /* 33 << 88 */
-        { { 0x68f891479a4f8293l,0x48262328a5eb9101l,0x7eca2a178fe218b5l,
-            0xde6c22dbc733f768l },
-          { 0xde7171d108d6084dl,0xd153827a0f0f8092l,0xc7b52d8f85a9252fl,
-            0xfa29ca3a5708b31fl },
-          0 },
-        /* 34 << 88 */
-        { { 0x20518ddf9e0ad7e7l,0x33d5d079e8d28b9bl,0x1149b393d13058b0l,
-            0x708cc65586d4651dl },
-          { 0xd7fefaa694207435l,0xce882c0d96312f8fl,0x2fd5cb2059d091a7l,
-            0x4533a88a0e1ece94l },
-          0 },
-        /* 35 << 88 */
-        { { 0xceddd9b5a59c28bcl,0xaa4808f9572e2a5dl,0x38bc191999014a1el,
-            0x1aacefdaa6d85686l },
-          { 0xa59283d42a573fddl,0x84359db29c387594l,0x79994773dca3acc8l,
-            0xe4323e7654cf7653l },
-          0 },
-        /* 36 << 88 */
-        { { 0xac449695241fbd6fl,0x67c9b170081c1223l,0x16868f21b56aac6fl,
-            0x34bd8fa3f8bcb721l },
-          { 0x06b6bd33b6691c76l,0x6c924766381a7973l,0x6a12444ca54078dbl,
-            0xd02e91a96d1051ccl },
-          0 },
-        /* 37 << 88 */
-        { { 0x512f5fb35f30b344l,0xb13ade169d516885l,0x18812e9b2b468802l,
-            0xf15d730e6b28979al },
-          { 0x5015616f6889348bl,0xe0b02a0a96af0401l,0x3b02007b61204c89l,
-            0x9ece2aa7432742a4l },
-          0 },
-        /* 39 << 88 */
-        { { 0xd5f7e09c7c1cc4a1l,0x313ac04218b2d854l,0xbc4fe2a04c253b10l,
-            0x25a696a3c7080b5cl },
-          { 0x6de3cb6aef811877l,0x4d242fecd15f9644l,0xb9bfa2480ee6a136l,
-            0x8122679e9c8d181el },
-          0 },
-        /* 40 << 88 */
-        { { 0x37e5684744ddfa35l,0x9ccfc5c5dab3f747l,0x9ac1df3f1ee96cf4l,
-            0x0c0571a13b480b8fl },
-          { 0x2fbeb3d54b3a7b3cl,0x35c036695dcdbb99l,0x52a0f5dcb2415b3al,
-            0xd57759b44413ed9al },
-          0 },
-        /* 41 << 88 */
-        { { 0xc2c7daec96a8d727l,0x8a11631a17f3abf9l,0x06aba65c0ae8940al,
-            0xfca280c7873d3635l },
-          { 0x57496889ddb72b87l,0xaa9a3359320793d4l,0x11b6864d43120741l,
-            0x1877cd4e51527639l },
-          0 },
-        /* 43 << 88 */
-        { { 0x8b35ce4e6f43dfc6l,0x4114b2fe9a19f3bfl,0x8c4af8024ffa45cal,
-            0xa3ab5f869328b847l },
-          { 0x0986de3e555f30f0l,0xaae6e3eac8cb84c4l,0x2a7dcdbaa4ba01f7l,
-            0xfa32efa729f5dc6cl },
-          0 },
-        /* 44 << 88 */
-        { { 0x077379c00b33d3f8l,0x421883c67064e409l,0x2d0873d76c29c8f6l,
-            0xbfa433a3d274c0c8l },
-          { 0x56dc778f23a5891el,0xd663bf6535e2de04l,0x488fdb485db517cel,
-            0x00bba55e19b226c2l },
-          0 },
-        /* 45 << 88 */
-        { { 0x879b30ead7260d78l,0x04954ba2eac5201fl,0x3210c0e3ff2529d1l,
-            0x0743823488b470b3l },
-          { 0x8b618de48854cc0dl,0x98270d5e35b795eel,0x0e47d651aa33ca37l,
-            0x77d75fda1e87d0cfl },
-          0 },
-        /* 46 << 88 */
-        { { 0x789dbe987803fbf9l,0x940589aa17ede316l,0x032902bd85a1988cl,
-            0x43cbc0031c47f7f0l },
-          { 0xc6ff73714709148fl,0x769957122d9b8a5el,0xb4520e462597b70el,
-            0x00d19f39f67ff3b8l },
-          0 },
-        /* 47 << 88 */
-        { { 0xe2dfcef9b159f403l,0xe8e9e8d8855644afl,0x2796247163fa1068l,
-            0x400e992a968a5400l },
-          { 0xe2b9d29f56e563c1l,0xed66759c2885fabfl,0x788b6263750abdffl,
-            0x30adb00d6cbbdcacl },
-          0 },
-        /* 48 << 88 */
-        { { 0x1fe647d83d30a2c5l,0x0857f77ef78a81dcl,0x11d5a334131a4a9bl,
-            0xc0a94af929d393f5l },
-          { 0xbc3a5c0bdaa6ec1al,0xba9fe49388d2d7edl,0xbb4335b4bb614797l,
-            0x991c4d6872f83533l },
-          0 },
-        /* 49 << 88 */
-        { { 0x5548d3423fa17b28l,0x38587952823ee731l,0x8ee9b90a0a28bcd1l,
-            0xcfc029bf6676917el },
-          { 0x7e08306d2a212358l,0x66a9488dc88a66bcl,0x7a09db327d7c9e65l,
-            0x20eaf4e72cbc1790l },
-          0 },
-        /* 51 << 88 */
-        { { 0xb3095b491f2a9605l,0x7cfc4205f72691c7l,0x1544bf964d889b90l,
-            0xdc44d20ba0bbae7al },
-          { 0xee369b670b1f0b23l,0xf3ec25e818a7bdcbl,0xf614ab5df47ecf65l,
-            0x4869762f80a4a09dl },
-          0 },
-        /* 52 << 88 */
-        { { 0xedbbeee78a058fb6l,0xb9d19ddcfb09121al,0xa41bb45bd34dddcel,
-            0x2dbc80b900964bc4l },
-          { 0x4ed9137d1d6cb654l,0x1b9016db483d01c5l,0x5fc501bc6528e22el,
-            0xb2d2f8816cad646bl },
-          0 },
-        /* 53 << 88 */
-        { { 0xb57aa72a89043e56l,0x8fbca2435c5319fdl,0xe66aef43b13ce900l,
-            0x2c7c3927c3382934l },
-          { 0x434d9104a835fdf5l,0x419470b81b3b85bel,0xeaec374abeb4d448l,
-            0x26a53b51f33cda51l },
-          0 },
-        /* 55 << 88 */
-        { { 0x421f1725bb1db793l,0x20214d4f558c94a9l,0x3371233b7696092cl,
-            0x774d3fcb1902ab0el },
-          { 0x4ce223ded149aecel,0x174b260e33057bc7l,0xdf70cfa3f6effee4l,
-            0x3d8cd01f80880678l },
-          0 },
-        /* 57 << 88 */
-        { { 0x32db21862e59985cl,0x448865abaa1b39e1l,0x250ce79cd89fe98dl,
-            0x962710e763e3fb10l },
-          { 0xa8fc70561ac10e3el,0x9eed208fa3b132fbl,0xf499d638937051f5l,
-            0x27acf7ec21a9f78fl },
-          0 },
-        /* 59 << 88 */
-        { { 0x148e572a4c7b445el,0xdc10a0214dc95a4fl,0xe60e9c2e02237869l,
-            0xbfdfcb3aa393c3a4l },
-          { 0x8b799db211a64cf0l,0x1ca865ea2e16f59fl,0x865441fbd3a17e46l,
-            0x23315b9753409692l },
-          0 },
-        /* 60 << 88 */
-        { { 0x5e76fb2f286bad39l,0xbad9efe39dcad1e2l,0x60e75190edc7e904l,
-            0x6a6f063e0fecb5a5l },
-          { 0x5150ed85aed8acc3l,0xb56ccfbc6d20af6cl,0x7e0d1e982c69dbfal,
-            0xabf5628a7c7e10a9l },
-          0 },
-        /* 61 << 88 */
-        { { 0xb84af2c00df6d61fl,0x02c651c52acbaf4bl,0xfb605754afaaa0bfl,
-            0xa03f5257dff61017l },
-          { 0x9e3ffb1672762093l,0x4f9a5da0c4f40bd3l,0x37dce5220d26f8e1l,
-            0x260f736fc06a1a07l },
-          0 },
-        /* 63 << 88 */
-        { { 0xb92aba79b1077d55l,0xc52f81081a42f5f5l,0x9913f04f86e5aa99l,
-            0x6814b0b1f3c7f504l },
-          { 0xb7d61fd34d354bdal,0xf27926e39581d25el,0x97724001c2dc21adl,
-            0x835778231d5c4788l },
-          0 },
-        /* 64 << 88 */
-        { { 0x77b868cee978a1d3l,0xe3a68b337ab92d04l,0x5102979487a5b862l,
-            0x5f0606c33a61d41dl },
-          { 0x2814be276f9326f1l,0x2f521c14c6fe3c2el,0x17464d7dacdf7351l,
-            0x10f5f9d3777f7e44l },
-          0 },
-        /* 65 << 88 */
-        { { 0x53857462ff9727a2l,0xe6870e7dc68488e7l,0x276da72808c79656l,
-            0x1308eb61d86c24ebl },
-          { 0x34c43a84db0a3e56l,0x03961b5525335a59l,0xf9bc2d5805689d86l,
-            0xfa4d3c01eb29d6d6l },
-          0 },
-        /* 71 << 88 */
-        { { 0xd07dac3037d10ffal,0xb2b0a0fd8bef0a79l,0xa2e804510ec02505l,
-            0xf256c18962f55f5fl },
-          { 0x0ca3f9b10b39f4f0l,0x7bf4e1cf3bb7c8e9l,0x7a8a43f8ee11f227l,
-            0x2ad8431a3e4056ebl },
-          0 },
-        /* 77 << 88 */
-        { { 0xb8cf71ed031c1871l,0x702431806f703102l,0x9a87e1c24ec6f1b0l,
-            0xf7e6e5b4664f275dl },
-          { 0xc70a8b4e8c76b505l,0x6ba69bf2a002e9cfl,0x33ed74f7a0d8c9bfl,
-            0x17f5f4b18d9989del },
-          0 },
-        /* 83 << 88 */
-        { { 0xcd116dcb1b13a4a1l,0x591adb831c369877l,0x697be1aca6b8e80bl,
-            0xb2d4baa1b975d781l },
-          { 0xd4a9a496b16b48e7l,0x64de2d7af293997dl,0x039ae039af09a492l,
-            0x66e31a2665f3a485l },
-          0 },
-        /* 89 << 88 */
-        { { 0x110a8a42fec01a53l,0x1f5fcc1b38affab8l,0x757310ca9941a19el,
-            0x11ef95f76c29d6cbl },
-          { 0x0756bdb22dd427bal,0x8de8d44af3e16c33l,0xf9d28355e25aec52l,
-            0xeb761efc02f36465l },
-          0 },
-        /* 95 << 88 */
-        { { 0xfc83bf7454bfcd7al,0x51d861794837b6bel,0x8165b3f9801a324dl,
-            0x3a5972bc634cfd61l },
-          { 0xeecfe6d825258ed6l,0x51d968df1451ced0l,0x3010cdb8316aa0ael,
-            0xc295b8522900eaf2l },
-          0 },
-        /* 101 << 88 */
-        { { 0x5ad434a3890cc798l,0x4c17ff5e1531bce4l,0x825b5b5a5ea8e26fl,
-            0xacca9d5dd66fd7b3l },
-          { 0xb647dbde37ae6f92l,0xa5594868f3600416l,0x7b90ac53ab0c5d63l,
-            0x4b66ad7ceb43e1d0l },
-          0 },
-        /* 107 << 88 */
-        { { 0x04a211fac09ccbffl,0x9c96ad9ee873d898l,0x9eb1deb69c481f86l,
-            0xb3616ce8b2d70298l },
-          { 0x67a6fe9b9073726dl,0x5b8aa37d4c9bf744l,0xf558603ebb6aa0efl,
-            0x72767f5103d304fbl },
-          0 },
-        /* 113 << 88 */
-        { { 0x787cb8b8d6e9b7e3l,0x8bb30222e079fc68l,0x651a2ea6e3145a0bl,
-            0x0254c5da9ab18fa8l },
-          { 0x83722ffc12e1611fl,0xb0ddf1ffa7cc61bel,0x7c9c7e10ac0ac8d7l,
-            0x8241a8191da12218l },
-          0 },
-        /* 116 << 88 */
-        { { 0x70bb7719bc407e6el,0x231328efd84ceb41l,0x8bca6a1fc104bb20l,
-            0xd6f4e425280b9071l },
-          { 0xb41b95a292896a82l,0x735cf435fa34df67l,0xbc331a08d9d6d769l,
-            0x579786052682747el },
-          0 },
-        /* 119 << 88 */
-        { { 0x048ba499eb3af9a9l,0x43a8c367d50b82cel,0xedf9e2b21e0724d9l,
-            0x3098aab3d607140bl },
-          { 0xd1f18f1e5ed49eb9l,0xf9c6bb6ae0bb02a2l,0x204f96aa0cd245ddl,
-            0xdaadaf4afb011ed5l },
-          0 },
-        /* 125 << 88 */
-        { { 0xb298ce2de50404b1l,0x04dd38c45bf9b581l,0x229deabdfada51e8l,
-            0x74bd233f8788a132l },
-          { 0x951ba5ecf03e6c30l,0x9da2f5aa45bf1a41l,0x6bec7fea7e52b860l,
-            0x76e3778964b0a9ddl },
-          0 },
-    },
-    {
-        /* 0 << 96 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 96 */
-        { { 0x4fe7ee31b0e63d34l,0xf4600572a9e54fabl,0xc0493334d5e7b5a4l,
-            0x8589fb9206d54831l },
-          { 0xaa70f5cc6583553al,0x0879094ae25649e5l,0xcc90450710044652l,
-            0xebb0696d02541c4fl },
-          0 },
-        /* 3 << 96 */
-        { { 0xb99f0e0399375235l,0x7614c847b9917970l,0xfec93ce9524ec067l,
-            0xe40e7bf89b122520l },
-          { 0xb5670631ee4c4774l,0x6f03847a3b04914cl,0xc96e9429dc9dd226l,
-            0x43489b6c8c57c1f8l },
-          0 },
-        /* 4 << 96 */
-        { { 0x0e299d23fe67ba66l,0x9145076093cf2f34l,0xf45b5ea997fcf913l,
-            0x5be008438bd7dddal },
-          { 0x358c3e05d53ff04dl,0xbf7ccdc35de91ef7l,0xad684dbfb69ec1a0l,
-            0x367e7cf2801fd997l },
-          0 },
-        /* 5 << 96 */
-        { { 0x46ffd227cc2338fbl,0x89ff6fa990e26153l,0xbe570779331a0076l,
-            0x43d241c506e1f3afl },
-          { 0xfdcdb97dde9b62a3l,0x6a06e984a0ae30eal,0xc9bf16804fbddf7dl,
-            0x170471a2d36163c4l },
-          0 },
-        /* 7 << 96 */
-        { { 0x361619e455950cc3l,0xc71d665c56b66bb8l,0xea034b34afac6d84l,
-            0xa987f832e5e4c7e3l },
-          { 0xa07427727a79a6a7l,0x56e5d017e26d6c23l,0x7e50b97638167e10l,
-            0xaa6c81efe88aa84el },
-          0 },
-        /* 9 << 96 */
-        { { 0x473959d74d325bbfl,0x2a61beec8d6114b9l,0x25672a94924be2eel,
-            0xa48595dbf2c23d0cl },
-          { 0xe476848b6a221838l,0xe743e69a35c1b673l,0x2ab42499d8468503l,
-            0x62aa0054e9e90ba7l },
-          0 },
-        /* 10 << 96 */
-        { { 0x358d13f1bc482911l,0x685d1971b7fa7f26l,0x3e67a51d2be1aee4l,
-            0xe041850998d114a9l },
-          { 0x59639f604e052561l,0x32075c49155d0818l,0x2aa2343b67b64b1cl,
-            0x1b445e2967f53e6al },
-          0 },
-        /* 11 << 96 */
-        { { 0xbdfb271773a904e0l,0x7ce1e40b28888d73l,0x2e7e35f6eaa97d1bl,
-            0xd061772aa9afa097l },
-          { 0x434ac7c47a1f7c59l,0x6e21124ae79b7b9al,0x055acff3bb22ecc7l,
-            0x8bfd7ac984c858d3l },
-          0 },
-        /* 13 << 96 */
-        { { 0x2fd57df59f1f68adl,0x5ddcc6dbb06470c8l,0x801b6451a9b47307l,
-            0x6b51c8e376551bf4l },
-          { 0xef0bd1f7d44e1da9l,0x714bcb1d4d4e600cl,0xc57bb9e40c6540c7l,
-            0x71bd1ec2327cc644l },
-          0 },
-        /* 15 << 96 */
-        { { 0x9a52cf7e7f4dd81fl,0xa0132be15e69c05el,0x90dab7472a0f4d72l,
-            0xc142f911312d6706l },
-          { 0xe8d3631f8261998bl,0xf0f42fae615c1c94l,0x2f4e948caec3fa5dl,
-            0x242ae7a8a374101el },
-          0 },
-        /* 16 << 96 */
-        { { 0x0f893a5dc8de610bl,0xe8c515fb67e223cel,0x7774bfa64ead6dc5l,
-            0x89d20f95925c728fl },
-          { 0x7a1e0966098583cel,0xa2eedb9493f2a7d7l,0x1b2820974c304d4al,
-            0x0842e3dac077282dl },
-          0 },
-        /* 17 << 96 */
-        { { 0x1fa878cad088be52l,0x89c2cb07a9e1e656l,0x385bc5c3219d62dbl,
-            0xd82b676b5fda2752l },
-          { 0x2449dc9ee304eafcl,0x1e9e7991632f4ea2l,0x3036e061cdd5e0b9l,
-            0x75a6f6ff830825bcl },
-          0 },
-        /* 19 << 96 */
-        { { 0xb10fcddc449dedb4l,0x2c890042d1244acfl,0x9b3072cac7fc7017l,
-            0x1acda6859ce8063fl },
-          { 0xd243313c7f51e2f5l,0x52a3f1a4d73d9578l,0xda785b7a64f0ce6el,
-            0x2e766315442a4c2dl },
-          0 },
-        /* 21 << 96 */
-        { { 0x94f9b004151f111al,0xc7a5035b07dbc5fal,0x53958ea7609e49d7l,
-            0x0526b4d79013f4c0l },
-          { 0x66de5ebb593e2fbdl,0x6e7cf8b44c2e0c37l,0x6f72fc8b8c983e78l,
-            0x6fab9b632348f9d7l },
-          0 },
-        /* 23 << 96 */
-        { { 0xc748a3526a3d8468l,0x3fab479927e38032l,0x91ad3629fa430ce7l,
-            0xc5af0b2c71614c44l },
-          { 0xcede3fa50c211611l,0x6e6889ba02338083l,0xee0a195977f0fe32l,
-            0x01ea905d0f4bbc5al },
-          0 },
-        /* 25 << 96 */
-        { { 0x12cfb25e8193db48l,0xddb4ae633bea708cl,0xdaae102ef181f821l,
-            0x9d9d923024a089d9l },
-          { 0x71c4122da0876aeal,0x1a63ea3bbbe19c09l,0x3b898076016f8d0cl,
-            0xa5cccc5daea6b713l },
-          0 },
-        /* 27 << 96 */
-        { { 0xc3f22baf4a8e2f61l,0x77d29ede176da6a6l,0x40a55f211607da63l,
-            0x858b38561452e391l },
-          { 0x0dd3c267fe1b3c56l,0x66c04bdd7d55227al,0xfbd2fe55e6404e09l,
-            0x5981cf49ea9cfcbcl },
-          0 },
-        /* 28 << 96 */
-        { { 0xe549237f78890732l,0xc443bef953fcb4d9l,0x9884d8a6eb3480d6l,
-            0x8a35b6a13048b186l },
-          { 0xb4e4471665e9a90al,0x45bf380d653006c0l,0x8f3f820d4fe9ae3bl,
-            0x244a35a0979a3b71l },
-          0 },
-        /* 29 << 96 */
-        { { 0xae46a902aea870afl,0xa9b9fcf57cbedc99l,0x74f2ca3f79b7e793l,
-            0xadb8f2231dbeeb28l },
-          { 0x6302060e6764df85l,0x363320d257ebd554l,0xd9fd573e798d22e1l,
-            0x285f85f5ebb67dedl },
-          0 },
-        /* 31 << 96 */
-        { { 0xd86b329211caa2b5l,0x2a26258e39337bd1l,0x4dc5a9b579c8c291l,
-            0x16443d87741942e6l },
-          { 0x6bc9a2f8f811400cl,0x819c69359eeb4e0el,0xe1be7273ce0c214bl,
-            0x429afb8184b61581l },
-          0 },
-        /* 33 << 96 */
-        { { 0xb37e188756af5812l,0xd662bdb485aff83el,0xc89742d07bc63de7l,
-            0xea103f9d0279f487l },
-          { 0x4d26916a3a6cc639l,0x4eea3a3c7c743b94l,0x6a3e0dc7007376d9l,
-            0xdb6ef3cf573f904el },
-          0 },
-        /* 34 << 96 */
-        { { 0x9b1058ecb0b0fb53l,0x8955f5f75f8a9a9fl,0xf5f92e7f9f6f9e6dl,
-            0x03f5df6c50ec198bl },
-          { 0x6c8741f2b8aedbcel,0x8f4e60cfed8018f7l,0x6ca5297c9fa01f89l,
-            0x8591cf7a864995dbl },
-          0 },
-        /* 35 << 96 */
-        { { 0xa126147eb0a11b9bl,0xeedcc9e198900232l,0x15d94f8c2bead119l,
-            0x042423cfefc38691l },
-          { 0x6ce86fbe77165d91l,0xa07732126b3fd565l,0x8cdc409150b1f9c7l,
-            0x7f5ad1af064595acl },
-          0 },
-        /* 36 << 96 */
-        { { 0xed374a6658926dddl,0x138b2d49908015b8l,0x886c6579de1f7ab8l,
-            0x888b9aa0c3020b7al },
-          { 0xd3ec034e3a96e355l,0xba65b0b8f30fbe9al,0x064c8e50ff21367al,
-            0x1f508ea40b04b46el },
-          0 },
-        /* 37 << 96 */
-        { { 0x73644c158f8402a0l,0x0d9b5354f4730eb9l,0x78542af4e94cc278l,
-            0xf4dbede3e395f33al },
-          { 0x8fe8cbc590c70b00l,0x9c35bb2d7db197f6l,0x229b4973e6599746l,
-            0x0817d04e1a84b986l },
-          0 },
-        /* 39 << 96 */
-        { { 0x8ffe34e95ecd09b3l,0x6a7c3de4153b7cael,0xf02713e4a81044b7l,
-            0x85ca6158c70545c8l },
-          { 0xd3ff392845d88bffl,0x3a251a07f0bafe89l,0x61290e1287cea7f4l,
-            0xa360a17efa4808adl },
-          0 },
-        /* 40 << 96 */
-        { { 0x98561a49747c866cl,0xbbb1e5fe0518a062l,0x20ff4e8becdc3608l,
-            0x7f55cded20184027l },
-          { 0x8d73ec95f38c85f0l,0x5b589fdf8bc3b8c3l,0xbe95dd980f12b66fl,
-            0xf5bd1a090e338e01l },
-          0 },
-        /* 41 << 96 */
-        { { 0x2d1751083edf4e2bl,0x30e6e90fa29c10d0l,0xfee1eb14c9c6ccd2l,
-            0x244670c756a81453l },
-          { 0x90b33eefc5185c22l,0xd77ae4b63db82d28l,0xce5ee034f228f940l,
-            0x5d7660847bb47be5l },
-          0 },
-        /* 43 << 96 */
-        { { 0x88b7eec499b9a8c6l,0x56048d9e14e8ef0cl,0xa18f93215c89cf78l,
-            0xbd2087616d327e66l },
-          { 0x5b187225d9e53e27l,0xa57ca6c7bf4d0317l,0x187731d2e9557736l,
-            0xd4ce2f78a874982el },
-          0 },
-        /* 44 << 96 */
-        { { 0x65163ae55e915918l,0x6158d6d986f8a46bl,0x8466b538eeebf99cl,
-            0xca8761f6bca477efl },
-          { 0xaf3449c29ebbc601l,0xef3b0f41e0c3ae2fl,0xaa6c577d5de63752l,
-            0xe916660164682a51l },
-          0 },
-        /* 45 << 96 */
-        { { 0xf5b602bb29f47deal,0x42853c9659ddd679l,0x5c25be4041d7c001l,
-            0x8e069399d4a3b307l },
-          { 0x1782152e736ce467l,0x2e264109c9cb4f08l,0xf900cb11ab124698l,
-            0x1bbed1d02d6e05b1l },
-          0 },
-        /* 46 << 96 */
-        { { 0x9cc3fedc7da08b1fl,0x0f44949361d5ed38l,0xc8cbc4209b991b6bl,
-            0xee62a342891c42e1l },
-          { 0x11c496bb1a179139l,0x94ece2892eac4d8el,0x35f303a5a98d5570l,
-            0x69d4340514a31552l },
-          0 },
-        /* 47 << 96 */
-        { { 0x29d45e50892dfcbal,0x653e613e5c30cee3l,0x7b8c1ae61868a348l,
-            0x40ab51654f2c612al },
-          { 0x56e977f9891cdc8cl,0xee1ca12a34ca7cd1l,0xa4e283ee17b5ddf8l,
-            0x4e36f2fb6f536205l },
-          0 },
-        /* 48 << 96 */
-        { { 0x5a3097befc15aa1el,0x40d12548b54b0745l,0x5bad4706519a5f12l,
-            0xed03f717a439dee6l },
-          { 0x0794bb6c4a02c499l,0xf725083dcffe71d2l,0x2cad75190f3adcafl,
-            0x7f68ea1c43729310l },
-          0 },
-        /* 49 << 96 */
-        { { 0xa3834d85e89ea13fl,0x2ca00f942db803bbl,0x0f378681400ed3dal,
-            0x1028af6b54854da3l },
-          { 0x3928c2da06400c7fl,0x21119785d82aac92l,0x06618c17724e4af0l,
-            0x22b42b161470736bl },
-          0 },
-        /* 51 << 96 */
-        { { 0x7d0cfd48f7f2ac65l,0x46e1ac705f641b60l,0x0ab9566a0fcf0137l,
-            0xbd4380e0db460fb8l },
-          { 0x4550efbf6db99b55l,0x33846e669764b744l,0xacffa0cae34ca007l,
-            0xce642d6a077e646cl },
-          0 },
-        /* 52 << 96 */
-        { { 0xe747c8c7b7ffd977l,0xec104c3580761a22l,0x8395ebaf5a3ffb83l,
-            0xfb3261f4e4b63db7l },
-          { 0x53544960d883e544l,0x13520d708cc2eeb8l,0x08f6337bd3d65f99l,
-            0x83997db2781cf95bl },
-          0 },
-        /* 53 << 96 */
-        { { 0xd89112c47d8037a3l,0xcba48ad3464c2025l,0x3afea8399814a09dl,
-            0x69e52260269030b5l },
-          { 0x5b7067365c674805l,0x8c3fd33d87343f56l,0xc572c858b1c61edfl,
-            0x43d8f4ded06749cbl },
-          0 },
-        /* 55 << 96 */
-        { { 0x04da1f06b4066003l,0xf7d4e52f372749e8l,0x56cd667114b38747l,
-            0x1943a22a22eb6d9el },
-          { 0xc2c5391990714b0al,0xb6e3abb7d13cf3ael,0xfcd8d671676115cbl,
-            0x178ce1a0c06a0d3al },
-          0 },
-        /* 57 << 96 */
-        { { 0x94485b36913508f8l,0x92f87fe36de83b42l,0xedd476f0ed77e666l,
-            0xee90fbc68da2cf53l },
-          { 0x6f4afc53fc6cf3d9l,0x231bceb9f21f6ecfl,0x6504a11d494c6e9cl,
-            0xd3728f032c211461l },
-          0 },
-        /* 59 << 96 */
-        { { 0x09a9b93799562ca2l,0xb7d5c5cf6a5a5aa8l,0x52f5d7b9987b219dl,
-            0x33849f9ec38014d4l },
-          { 0x299adaf628f23880l,0x738ecc8874875588l,0x39d707adca2af665l,
-            0xc8c11f688f4c5f73l },
-          0 },
-        /* 60 << 96 */
-        { { 0x68e4f15e9afdfb3cl,0x49a561435bdfb6dfl,0xa9bc1bd45f823d97l,
-            0xbceb5970ea111c2al },
-          { 0x366b455fb269bbc4l,0x7cd85e1ee9bc5d62l,0xc743c41c4f18b086l,
-            0xa4b4099095294fb9l },
-          0 },
-        /* 61 << 96 */
-        { { 0x2ae046d66aa34757l,0x34db1addaa6d7e9dl,0x2b4b7e017ccf432bl,
-            0xfbe0bfa590d319c6l },
-          { 0xfb2981687ec7a7f2l,0x346cc46004f5132el,0x782b2e53b40aceddl,
-            0x402e1d64e3f0b8b9l },
-          0 },
-        /* 63 << 96 */
-        { { 0x2aa3b21d25a56088l,0xae6ee57543d08962l,0x669e42bff1e22297l,
-            0x7b4c635732e3a47al },
-          { 0x22b16260ea464a25l,0xad8ca59072d5cd7al,0x7c244266104eb96al,
-            0x1def95e28e7c11d2l },
-          0 },
-        /* 64 << 96 */
-        { { 0x9c7c581d26ee8382l,0xcf17dcc5359d638el,0xee8273abb728ae3dl,
-            0x1d112926f821f047l },
-          { 0x1149847750491a74l,0x687fa761fde0dfb9l,0x2c2580227ea435abl,
-            0x6b8bdb9491ce7e3fl },
-          0 },
-        /* 65 << 96 */
-        { { 0x1f04524cdc27e1f7l,0xa0c74f61572eab14l,0xdd5d0cfced272074l,
-            0x95533c1d5bfe4f65l },
-          { 0x3039d57ecce817cal,0x029967d73b822082l,0x9fca43866c4a10d3l,
-            0xf8b2a7f0bb4968ebl },
-          0 },
-        /* 71 << 96 */
-        { { 0x933cd6dcbfbf6407l,0xd08f21504be673f8l,0x0e1c4d0db1140a2el,
-            0x0502a092431b270al },
-          { 0x5d99f9508768c00al,0xda3ce5079b3ff3c7l,0x1c648b75031c11abl,
-            0x5e3de47bf2776305l },
-          0 },
-        /* 77 << 96 */
-        { { 0xe22af9274d2b9de4l,0xf3690f55a69609ecl,0x20260a6e453fbe18l,
-            0x8edcb46b42d0b085l },
-          { 0xd4ef250b7d9c7f58l,0x5e8578dfc83c3433l,0x9751d9b9e46e320al,
-            0xb02bd03cf3c58af6l },
-          0 },
-        /* 83 << 96 */
-        { { 0x0ab299ede1b4d1ccl,0x22e7301cec4d18d2l,0xf2380f2a7b86d4ffl,
-            0xca19ef9e40753713l },
-          { 0x52bb0d24678c38a1l,0xcc9d6fd499001c02l,0xa2dd6b00bc5876e4l,
-            0xfe04b402409fe2b3l },
-          0 },
-        /* 89 << 96 */
-        { { 0x7db986b1ff69f8d3l,0x648865e59d6266b9l,0x7ccfe96183f7dae5l,
-            0x0f59a8bd6828379bl },
-          { 0xad97e5ef0ac7c4e8l,0xa75914be784e9c18l,0x053e015bb18c1bb8l,
-            0x18f6cefcb347043el },
-          0 },
-        /* 95 << 96 */
-        { { 0xb4d641bdf257c38al,0xadcea4d0c1372574l,0x7f8d20be71c8f0d0l,
-            0x14a1d24c41dc6344l },
-          { 0xe446054e41f35526l,0x4664213823c952ddl,0xfbde483401f6b0acl,
-            0xc89eee66d75b6318l },
-          0 },
-        /* 101 << 96 */
-        { { 0x700242937a087392l,0xd42bd3aad5da04del,0xee64cb5b1f803414l,
-            0xd6341ecbbab52988l },
-          { 0x7ad522f343170a74l,0x5fba22536d61d9del,0x230304c1e845a6e5l,
-            0xd69feabfbc9e326bl },
-          0 },
-        /* 107 << 96 */
-        { { 0xef7e49412e8a11d7l,0x4cb8963662c8bae1l,0xecc741198aad5816l,
-            0x13490782c7af5175l },
-          { 0x10c701f73e91a604l,0xcb8c6c7124cc30c1l,0xce0d479c071eb382l,
-            0xa3dc71fb058087d4l },
-          0 },
-        /* 113 << 96 */
-        { { 0xec368492541eb6d1l,0x567735d6e09a94abl,0xb8039ec172350329l,
-            0x3bd83a8f4894ddafl },
-          { 0x740ef2a39c07063dl,0xba25e72277da7b59l,0xb09e248e3bf42e82l,
-            0x7ff36da0b017d037l },
-          0 },
-        /* 116 << 96 */
-        { { 0xca80416651b8d9a3l,0x42531bc90ffb0db1l,0x72ce4718aa82e7cel,
-            0x6e199913df574741l },
-          { 0xd5f1b13dd5d36946l,0x8255dc65f68f0194l,0xdc9df4cd8710d230l,
-            0x3453c20f138c1988l },
-          0 },
-        /* 119 << 96 */
-        { { 0x913f23b9ed08ac04l,0x18e336643590d098l,0xd3f72934e67536dcl,
-            0xf949a757ec7ecde9l },
-          { 0x37fc6583cf9cbd37l,0xcbe62cc043b1228el,0x777124948a743274l,
-            0x3ea3668c716ce6f1l },
-          0 },
-        /* 125 << 96 */
-        { { 0xc89ce010a90d375bl,0x39ac669340503fe3l,0x9036f782d33ecb0el,
-            0x5190656841fdc7d1l },
-          { 0xbefd136e917d94cdl,0x05fea2f22a511b24l,0x80e62d76f9076e0cl,
-            0x8c57635e418ba653l },
-          0 },
-    },
-    {
-        /* 0 << 104 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 104 */
-        { { 0x20d3c982cf7d62d2l,0x1f36e29d23ba8150l,0x48ae0bf092763f9el,
-            0x7a527e6b1d3a7007l },
-          { 0xb4a89097581a85e3l,0x1f1a520fdc158be5l,0xf98db37d167d726el,
-            0x8802786e1113e862l },
-          0 },
-        /* 3 << 104 */
-        { { 0xf6e894d1f4c6b6ecl,0x526b082718b3cd9bl,0x73f952a812117fbfl,
-            0x2be864b011945bf5l },
-          { 0x86f18ea542099b64l,0x2770b28a07548ce2l,0x97390f28295c1c9cl,
-            0x672e6a43cb5206c3l },
-          0 },
-        /* 4 << 104 */
-        { { 0xc37c7dd0c55c4496l,0xa6a9635725bbabd2l,0x5b7e63f2add7f363l,
-            0x9dce37822e73f1dfl },
-          { 0xe1e5a16ab2b91f71l,0xe44898235ba0163cl,0xf2759c32f6e515adl,
-            0xa5e2f1f88615eecfl },
-          0 },
-        /* 5 << 104 */
-        { { 0xcacce2c847c64367l,0x6a496b9f45af4ec0l,0x2a0836f36034042cl,
-            0x14a1f3900b6c62eal },
-          { 0xe7fa93633ef1f540l,0xd323b30a72a76d93l,0xffeec8b50feae451l,
-            0x4eafc172bd04ef87l },
-          0 },
-        /* 7 << 104 */
-        { { 0xe4435a51b3e59b89l,0x136139554133a1c9l,0x87f46973440bee59l,
-            0x714710f800c401e4l },
-          { 0xc0cf4bced6c446c9l,0xe0aa7fd66c4d5368l,0xde5d811afc68fc37l,
-            0x61febd72b7c2a057l },
-          0 },
-        /* 9 << 104 */
-        { { 0x27375fe665f837e2l,0x93f8c68bd882179fl,0x584feadc59b16187l,
-            0xe5b50be9483bc162l },
-          { 0x7ad9d6f1a2776625l,0xe9d1008004ff457bl,0x5b56d322677618a6l,
-            0x036694eae3e68673l },
-          0 },
-        /* 10 << 104 */
-        { { 0x6ca4f87e822e37bel,0x73f237b4253bda4el,0xf747f3a241190aebl,
-            0xf06fa36f804cf284l },
-          { 0x0a6bbb6efc621c12l,0x5d624b6440b80ec6l,0x4b0724257ba556f3l,
-            0x7fa0c3543e2d20a8l },
-          0 },
-        /* 11 << 104 */
-        { { 0x6feaffc51d8a4fd1l,0x59663b205f1ad208l,0xefc93cef24acb46al,
-            0x54929de05967118cl },
-          { 0x885708009acffb1cl,0x492bbf2b145639ecl,0x71f495a638f0018el,
-            0xe24365dbc2792847l },
-          0 },
-        /* 13 << 104 */
-        { { 0x4bedae86a6f29002l,0x7abedb56e034457al,0x8bf3eec6179bff2al,
-            0x9d626d57390f4e6bl },
-          { 0x653fe0e914dd6ea3l,0x7483715989bd6d08l,0x85fb05b4ebd9b03dl,
-            0x7dc3f2214a768bbcl },
-          0 },
-        /* 15 << 104 */
-        { { 0xaacc63f132b0ed8fl,0x041237242bafefd2l,0x0df9a7987e2d2a13l,
-            0x09bd13cf9c27591fl },
-          { 0xaa5f5e476e1afb50l,0xcd146a42b66eb646l,0x3f07561d1442ec3cl,
-            0x7e5471738ae8ec47l },
-          0 },
-        /* 16 << 104 */
-        { { 0x8de2b7bc453cadd6l,0x203900a7bc0bc1f8l,0xbcd86e47a6abd3afl,
-            0x911cac128502effbl },
-          { 0x2d550242ec965469l,0x0e9f769229e0017el,0x633f078f65979885l,
-            0xfb87d4494cf751efl },
-          0 },
-        /* 17 << 104 */
-        { { 0x2c3e61196c0c6cd5l,0x5e01a49a99f4aac8l,0xfa518fc92ef1565el,
-            0xf64ff8714f772366l },
-          { 0x52fcbc2b726420d0l,0x30fbf6eb76cfa9eel,0x0bd17139fa618268l,
-            0x23ed6e122087535dl },
-          0 },
-        /* 19 << 104 */
-        { { 0x76098e38bb4ccb2cl,0x44e88aeeafbad6d1l,0x5c4d286771928778l,
-            0xb1df868138534c94l },
-          { 0x67eb8f4d77ce9debl,0x2a86d0461a77c55dl,0xc327181e46a6a3e7l,
-            0x68fd611b8710e206l },
-          0 },
-        /* 21 << 104 */
-        { { 0xc093f3fc0c82bdf1l,0x21db25894f76c4a6l,0xf3dcb22ee410a7ael,
-            0x1db37114f3c22ffel },
-          { 0x9bd0a1fb58f6801dl,0x2cab103bd1b55cc8l,0x2ae1a7f5077ba4b2l,
-            0x82b46642ce5ab2b3l },
-          0 },
-        /* 23 << 104 */
-        { { 0xc8477ec52546684cl,0xe3f9387702ff02b5l,0xefb72133ae5d04cdl,
-            0x644905c339f10d02l },
-          { 0x1750c87c13d8d356l,0x0e9b8063b41e7640l,0xc7ece04f5647b05bl,
-            0x89a43da7ca9df9c4l },
-          0 },
-        /* 25 << 104 */
-        { { 0x02610ef1920eb7d9l,0x34bd2fc2e1ea1dc0l,0xcb89da255170b890l,
-            0xaaa2796461cff827l },
-          { 0xc308c9d37103ed6al,0xe82d63d5a467564al,0x94c897c4a0fa7732l,
-            0x75eb52fa64c7aa5fl },
-          0 },
-        /* 27 << 104 */
-        { { 0x52582f9cb985fcb6l,0xaaef8d9f8508a691l,0x494c2c346e505131l,
-            0x6d062362d55f30f6l },
-          { 0x70059e9122e1e32fl,0x1507c3fe9e51abb0l,0xd8aba31b2b7bda72l,
-            0x5acbc5f77b753f13l },
-          0 },
-        /* 28 << 104 */
-        { { 0x15bfb8bf5116f937l,0x7c64a586c1268943l,0x71e25cc38419a2c8l,
-            0x9fd6b0c48335f463l },
-          { 0x4bf0ba3ce8ee0e0el,0x6f6fba60298c21fal,0x57d57b39ae66bee0l,
-            0x292d513022672544l },
-          0 },
-        /* 29 << 104 */
-        { { 0x075dc81953952ff6l,0xd4d9eeda20b7384dl,0x8a81c1bfd2d6c6a5l,
-            0x319368a0db050f3bl },
-          { 0x91f476de31f1cee2l,0x1b38604500d0e17fl,0xed2081889a820384l,
-            0x8d00c411a0f1a637l },
-          0 },
-        /* 31 << 104 */
-        { { 0xb029b687a47fd8f0l,0xa531360696371a05l,0x7b84e88c5ab09140l,
-            0x87dad7c85eeb1d14l },
-          { 0xef0749b9d0edf6f3l,0x29fc7310e2ef198bl,0x01e05df5069ed399l,
-            0x121db4ecdf4e2fcal },
-          0 },
-        /* 33 << 104 */
-        { { 0xe730f3f62826bee0l,0xb9bdbe3fce332a8fl,0x1ecad11766ec00aal,
-            0x7503d835617a62d1l },
-          { 0x9f34e161b862b139l,0xde42194cf30f6a67l,0x5037a953c1e879fel,
-            0x62f321f89bda45dbl },
-          0 },
-        /* 34 << 104 */
-        { { 0xe87771d8033f2876l,0xb0186ec67d5cc3dbl,0x58e8bb803bc9bc1dl,
-            0x4d1395cc6f6ef60el },
-          { 0xa73c62d6186244a0l,0x918e5f23110a5b53l,0xed4878ca741b7eabl,
-            0x3038d71adbe03e51l },
-          0 },
-        /* 35 << 104 */
-        { { 0xcbdba27c40234d55l,0x24352b6cb3eb56c9l,0xae681b85a8e9295al,
-            0x2a6cfba1f1171664l },
-          { 0x49f045838ca40c3cl,0xe56da25c6eb0f8eal,0x8e62f86fc4341a4el,
-            0x7f68bdc64c3f947fl },
-          0 },
-        /* 36 << 104 */
-        { { 0x840204b7a93c3246l,0x21ab6069a0b9b4cdl,0xf5fa6e2bb1d64218l,
-            0x1de6ad0ef3d56191l },
-          { 0x570aaa88ff1929c7l,0xc6df4c6b640e87b5l,0xde8a74f2c65f0cccl,
-            0x8b972fd5e6f6cc01l },
-          0 },
-        /* 37 << 104 */
-        { { 0x862013c00bf22173l,0xfd004c834acd8e23l,0x50e422ca310b1649l,
-            0xe6d04de65bbe1854l },
-          { 0x651f646385761ef3l,0x3b17d38652cf85c9l,0xbdce284a5f54ecc7l,
-            0x72efcd3ec7c2106cl },
-          0 },
-        /* 39 << 104 */
-        { { 0x34324b182ff07e3el,0x29938f38f50bcb71l,0xd0e3d7b977e2bcc3l,
-            0x8e78f007c0a3292bl },
-          { 0xfa28c530005c2c00l,0x6f9c21d51faa0c5al,0x3df01abd7b9c78f3l,
-            0x0e5618c1ccaaeb7el },
-          0 },
-        /* 40 << 104 */
-        { { 0xaa6778fce7560b90l,0xb4073e61a7e824cel,0xff0d693cd642eba8l,
-            0x7ce2e57a5dccef38l },
-          { 0x89c2c7891df1ad46l,0x83a06922098346fdl,0x2d715d72da2fc177l,
-            0x7b6dd71d85b6cf1dl },
-          0 },
-        /* 41 << 104 */
-        { { 0x4601a6a492ad3889l,0xdc8e3364d9a0709fl,0x0c687f2b2c260327l,
-            0xe882af62e1a79573l },
-          { 0x0cfd00ab945d9017l,0xe6df7505d0e3c188l,0xb389a66dbde825a2l,
-            0x126d77b6bcd8e14fl },
-          0 },
-        /* 43 << 104 */
-        { { 0xc800acc7db18ec73l,0x0ebecc78d86e99efl,0x675796cdbd05bc5fl,
-            0x254498126afd7c7fl },
-          { 0x96293b695969b165l,0xd8514d83c162c8dal,0xe174f8b674a15a5cl,
-            0x880d687389a2f73cl },
-          0 },
-        /* 44 << 104 */
-        { { 0x53703a328300129fl,0x1f63766268c43bfdl,0xbcbd191300e54051l,
-            0x812fcc627bf5a8c5l },
-          { 0x3f969d5f29fb85dal,0x72f4e00a694759e8l,0x426b6e52790726b7l,
-            0x617bbc873bdbb209l },
-          0 },
-        /* 45 << 104 */
-        { { 0xf536f07cad1deb2el,0x2a13a11ea87a710el,0x0ce2ccab64f4dc96l,
-            0x16178694f5a55464l },
-          { 0x1496168da2cb3986l,0xb079a5b9d56a93a9l,0x97005e99092893d3l,
-            0x55df5ed6e8fcc6c3l },
-          0 },
-        /* 46 << 104 */
-        { { 0x511f8bb997aee317l,0x812a4096e81536a8l,0x137dfe593ac09b9bl,
-            0x0682238fba8c9a7al },
-          { 0x7072ead6aeccb4bdl,0x6a34e9aa692ba633l,0xc82eaec26fff9d33l,
-            0xfb7535121d4d2b62l },
-          0 },
-        /* 47 << 104 */
-        { { 0x821dca8bbf328b1cl,0x24596ddd5a3d6830l,0x061c4c15635b5b4cl,
-            0x0e2b3bef4fa3560al },
-          { 0xffced37498906c43l,0x10ebd174e26b3784l,0x7cd068c470039bb5l,
-            0xc47dda0f88404e59l },
-          0 },
-        /* 48 << 104 */
-        { { 0x1a0445ff1d7aadabl,0x65d38260d5f6a67cl,0x6e62fb0891cfb26fl,
-            0xef1e0fa55c7d91d6l },
-          { 0x47e7c7ba33db72cdl,0x017cbc09fa7c74b2l,0x3c931590f50a503cl,
-            0xcac54f60616baa42l },
-          0 },
-        /* 49 << 104 */
-        { { 0x7ad7d13569185235l,0x19771949fb69e030l,0xd4de9717bc45fb4fl,
-            0x5657b076167e5739l },
-          { 0x9503a71fdd27449el,0xfa2fabf73cc01347l,0xf8ecef24c83fb301l,
-            0x527012bd5a8d5078l },
-          0 },
-        /* 51 << 104 */
-        { { 0x70a550d7e6fc3a32l,0x8e5875841951fe57l,0x5e6d43eaaab9788bl,
-            0x1e406fed80599794l },
-          { 0xd8164ace9ed2557cl,0xf9648f30ff593e10l,0x53af2fd80c2ff879l,
-            0x6705993cc9409bf4l },
-          0 },
-        /* 52 << 104 */
-        { { 0x04b005b6c6458293l,0x36bb5276e8d10af7l,0xacf2dc138ee617b8l,
-            0x470d2d35b004b3d4l },
-          { 0x06790832feeb1b77l,0x2bb75c3985657f9cl,0xd70bd4edc0f60004l,
-            0xfe797ecc219b018bl },
-          0 },
-        /* 53 << 104 */
-        { { 0xeca02ebf0ef19ceel,0xac691fbe2de090a4l,0x1f3866641b374547l,
-            0xbd8018c6a12ee85fl },
-          { 0x3e851318ee63e0f1l,0x45b0c37a161987d3l,0x67fe36056eb567c4l,
-            0x07c291b563200c5bl },
-          0 },
-        /* 55 << 104 */
-        { { 0xc85535ac1a956a8al,0x7bf4d70bc0ade321l,0xaf2efc48237bc56fl,
-            0xf9bfe13e31ba97e7l },
-          { 0x2ca5fac4cf7c6c65l,0xc23b14ff03ec3e35l,0xc5109923217bcfd2l,
-            0xf02f96a1c58f32f3l },
-          0 },
-        /* 57 << 104 */
-        { { 0x3b1f715b0d0aeff4l,0xbe406d62f0d44536l,0xe413843d567bcb38l,
-            0x75b7fb43791e705al },
-          { 0x5b831d4b224f85e5l,0x3fea6659d9a35eael,0xd6f8bd097c85480bl,
-            0x2a9561a34a959267l },
-          0 },
-        /* 59 << 104 */
-        { { 0x4a96a3535a303c10l,0x9aa3ad71c37c8d7el,0x4e2d077fde52014fl,
-            0x4d8bec5df8e3964dl },
-          { 0xda88ab94e865e142l,0x52df506d10a88091l,0x9aebff0092fc38a2l,
-            0xdfc034395608b0a2l },
-          0 },
-        /* 60 << 104 */
-        { { 0xee23fa819966e7eel,0x64ec4aa805b7920dl,0x2d44462d2d90aad4l,
-            0xf44dd195df277ad5l },
-          { 0x8d6471f1bb46b6a1l,0x1e65d313fd885090l,0x33a800f513a977b4l,
-            0xaca9d7210797e1efl },
-          0 },
-        /* 61 << 104 */
-        { { 0xb1557be2a4ea787el,0x59324973019f667fl,0x262ceced5595367cl,
-            0x8a676897ec598640l },
-          { 0x2df6cebfc7f06f4fl,0xb255723138078f9al,0xad553c46524a0dd1l,
-            0xe20bb20a5a68d62al },
-          0 },
-        /* 63 << 104 */
-        { { 0x6f47e3779589e263l,0x7cb83e3d35106bb8l,0x2642d87bcc632fc2l,
-            0x4d18f34d8b77eb36l },
-          { 0x7de6bf6d19ca4d1cl,0x438e8f02f7e926aal,0xb539021250ac930al,
-            0xe34ddfc15b219a9fl },
-          0 },
-        /* 64 << 104 */
-        { { 0x98857ceb1bf4581cl,0xe635e186aca7b166l,0x278ddd22659722acl,
-            0xa0903c4c1db68007l },
-          { 0x366e458948f21402l,0x31b49c14b96abda2l,0x329c4b09e0403190l,
-            0x97197ca3d29f43fel },
-          0 },
-        /* 65 << 104 */
-        { { 0xfe4de13781479db4l,0x307331f012f08ea5l,0x7f59a64758c04c13l,
-            0x6b41189abdc9b3c9l },
-          { 0xb10f11e5a6f8c5edl,0x757fb7a3f5b0579el,0x456d0a873c90d027l,
-            0x7e8bb6bf32361796l },
-          0 },
-        /* 71 << 104 */
-        { { 0x6aa1dc6c9e689d8dl,0xaa5fa015479cdd09l,0x7eb4dbb582fc000al,
-            0x4a57b689eff4e701l },
-          { 0x7bfe8d2a8e15cd8cl,0xab109b1cc9074e1al,0x5716715fee1619a5l,
-            0xf29a51eccdcb40bcl },
-          0 },
-        /* 77 << 104 */
-        { { 0x14c76234ddf03c6el,0xdfb5d388baeb2eddl,0x4bd85da26d413d2dl,
-            0x5b0dd9be3ae38469l },
-          { 0xe4d8a9d89ab3ae61l,0xb9e37b880ee63951l,0x17f08e9b21a7f30fl,
-            0x173db1e8119af788l },
-          0 },
-        /* 83 << 104 */
-        { { 0x2352ad4a170d43f6l,0x098d74f65a0ae4b0l,0x290f5236c3a46c2al,
-            0xea9266102dd87e7fl },
-          { 0xd7ee90f6848e6911l,0xebe8f4cce0d8886fl,0xa2038320558ff6a0l,
-            0x1f716534f37c38cfl },
-          0 },
-        /* 89 << 104 */
-        { { 0x9754209439a4a159l,0xe6135412fed24278l,0xbba62254d70e2cabl,
-            0x4ac6a8ac85895130l },
-          { 0xc01614fee1a45363l,0x720ad3f8b67294f2l,0x724ea95cb420ea51l,
-            0x1f40ab2d712b856cl },
-          0 },
-        /* 95 << 104 */
-        { { 0x708e1c7975f3d30cl,0x423f1535e2172da3l,0x7a29be342a06a0b1l,
-            0x9de5c9eb32c68ba2l },
-          { 0x70217b0232d48793l,0x3cf3855bac1471cfl,0x6762d03f8321e179l,
-            0x06ee12ea236fa7cfl },
-          0 },
-        /* 101 << 104 */
-        { { 0x1718e7428779109bl,0x6188008d0aca350bl,0xbbe227e00594bc15l,
-            0x4a7b6423ddbdea35l },
-          { 0x06ad632dfa44e1bfl,0xaf9c163d1e97b409l,0x64dafec3c61f2b2fl,
-            0xc6759d905525c0c9l },
-          0 },
-        /* 107 << 104 */
-        { { 0x76d6294787517149l,0x2bda339baa77d325l,0x04b1bec067ad1fd1l,
-            0x49f63fcc0aec7c73l },
-          { 0x005cb459ec1bf494l,0x8fa99c1b1ec6f8bbl,0x70a4e6d78b59dd43l,
-            0xfd70bcb313d6594dl },
-          0 },
-        /* 113 << 104 */
-        { { 0x2987a7cb13966c11l,0x74ad0a26a783f283l,0xf011200ae54d27f0l,
-            0xbd8632963fb38396l },
-          { 0x7ec7fe8c9b86d059l,0xfa94ca76d0cd33a7l,0xf6ad741cdc646993l,
-            0x83054a427ebc34e9l },
-          0 },
-        /* 116 << 104 */
-        { { 0xadef8c5a192ef710l,0x88afbd4b3b7431f9l,0x7e1f740764250c9el,
-            0x6e31318db58bec07l },
-          { 0xfd4fc4b824f89b4el,0x65a5dd8848c36a2al,0x4f1eccfff024baa7l,
-            0x22a21cf2cba94650l },
-          0 },
-        /* 119 << 104 */
-        { { 0x7b45865478f39754l,0xcbb8b96c4564e003l,0xb492d2bf69b35752l,
-            0x4e6287e065ee5ad3l },
-          { 0x07906c14eb1ffe62l,0xf350390c681fcdf8l,0xc351386f6be3eec3l,
-            0x8480d00ee5df919dl },
-          0 },
-        /* 125 << 104 */
-        { { 0x399861ecf8a2d5aal,0xb179adeb046f78cbl,0x056a6cd88792f647l,
-            0xd3dfc91c3d411820l },
-          { 0x4ccf92d179693be1l,0x12ecd9a3f65cb250l,0x58e5d2102538b9e7l,
-            0x4e655882ff977ccal },
-          0 },
-    },
-    {
-        /* 0 << 112 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 112 */
-        { { 0x8ce9b6bfc360e25al,0xe6425195075a1a78l,0x9dc756a8481732f4l,
-            0x83c0440f5432b57al },
-          { 0xc670b3f1d720281fl,0x2205910ed135e051l,0xded14b0edb052be7l,
-            0x697b3d27c568ea39l },
-          0 },
-        /* 3 << 112 */
-        { { 0x0b89de9314092ebbl,0xf17256bd428e240cl,0xcf89a7f393d2f064l,
-            0x4f57841ee1ed3b14l },
-          { 0x4ee14405e708d855l,0x856aae7203f1c3d0l,0xc8e5424fbdd7eed5l,
-            0x3333e4ef73ab4270l },
-          0 },
-        /* 4 << 112 */
-        { { 0x3bc77adedda492f8l,0xc11a3aea78297205l,0x5e89a3e734931b4cl,
-            0x17512e2e9f5694bbl },
-          { 0x5dc349f3177bf8b6l,0x232ea4ba08c7ff3el,0x9c4f9d16f511145dl,
-            0xccf109a333b379c3l },
-          0 },
-        /* 5 << 112 */
-        { { 0xe75e7a88a1f25897l,0x7ac6961fa1b5d4d8l,0xe3e1077308f3ed5cl,
-            0x208a54ec0a892dfbl },
-          { 0xbe826e1978660710l,0x0cf70a97237df2c8l,0x418a7340ed704da5l,
-            0xa3eeb9a908ca33fdl },
-          0 },
-        /* 7 << 112 */
-        { { 0xb4323d588434a920l,0xc0af8e93622103c5l,0x667518ef938dbf9al,
-            0xa184307383a9cdf2l },
-          { 0x350a94aa5447ab80l,0xe5e5a325c75a3d61l,0x74ba507f68411a9el,
-            0x10581fc1594f70c5l },
-          0 },
-        /* 9 << 112 */
-        { { 0x5aaa98a7cb0c9c8cl,0x75105f3081c4375cl,0xceee50575ef1c90fl,
-            0xb31e065fc23a17bfl },
-          { 0x5364d275d4b6d45al,0xd363f3ad62ec8996l,0xb5d212394391c65bl,
-            0x84564765ebb41b47l },
-          0 },
-        /* 10 << 112 */
-        { { 0x20d18ecc37107c78l,0xacff3b6b570c2a66l,0x22f975d99bd0d845l,
-            0xef0a0c46ba178fa0l },
-          { 0x1a41965176b6028el,0xc49ec674248612d4l,0x5b6ac4f27338af55l,
-            0x06145e627bee5a36l },
-          0 },
-        /* 11 << 112 */
-        { { 0x33e95d07e75746b5l,0x1c1e1f6dc40c78bel,0x967833ef222ff8e2l,
-            0x4bedcf6ab49180adl },
-          { 0x6b37e9c13d7a4c8al,0x2748887c6ddfe760l,0xf7055123aa3a5bbcl,
-            0x954ff2257bbb8e74l },
-          0 },
-        /* 13 << 112 */
-        { { 0x4e23ca446d3fea55l,0xb4ae9c86f4810568l,0x47bfb91b2a62f27dl,
-            0x60deb4c9d9bac28cl },
-          { 0xa892d8947de6c34cl,0x4ee682594494587dl,0x914ee14e1a3f8a5bl,
-            0xbb113eaa28700385l },
-          0 },
-        /* 15 << 112 */
-        { { 0xef9dc899a7b56eafl,0x00c0e52c34ef7316l,0x5b1e4e24fe818a86l,
-            0x9d31e20dc538be47l },
-          { 0x22eb932d3ed68974l,0xe44bbc087c4e87c4l,0x4121086e0dde9aefl,
-            0x8e6b9cff134f4345l },
-          0 },
-        /* 16 << 112 */
-        { { 0x96892c1f711b0eb9l,0xb905f2c8780ab954l,0xace26309a20792dbl,
-            0xec8ac9b30684e126l },
-          { 0x486ad8b6b40a2447l,0x60121fc19fe3fb24l,0x5626fccf1a8e3b3fl,
-            0x4e5686226ad1f394l },
-          0 },
-        /* 17 << 112 */
-        { { 0xda7aae0d196aa5a1l,0xe0df8c771041b5fbl,0x451465d926b318b7l,
-            0xc29b6e557ab136e9l },
-          { 0x2c2ab48b71148463l,0xb5738de364454a76l,0x54ccf9a05a03abe4l,
-            0x377c02960427d58el },
-          0 },
-        /* 19 << 112 */
-        { { 0x90e4f7c92d7d1413l,0x67e2d6b59834f597l,0x4fd4f4f9a808c3e8l,
-            0xaf8237e0d5281ec1l },
-          { 0x25ab5fdc84687ceel,0xc5ded6b1a5b26c09l,0x8e4a5aecc8ea7650l,
-            0x23b73e5c14cc417fl },
-          0 },
-        /* 21 << 112 */
-        { { 0xb4293fdcf50225f9l,0xc52e175cb0e12b03l,0xf649c3bad0a8bf64l,
-            0x745a8fefeb8ae3c6l },
-          { 0x30d7e5a358321bc3l,0xb1732be70bc4df48l,0x1f217993e9ea5058l,
-            0xf7a71cde3e4fd745l },
-          0 },
-        /* 23 << 112 */
-        { { 0xa188b2502d0f39aal,0x622118bb15a85947l,0x2ebf520ffde0f4fal,
-            0xa40e9f294860e539l },
-          { 0x7b6a51eb22b57f0fl,0x849a33b97e80644al,0x50e5d16f1cf095fel,
-            0xd754b54eec55f002l },
-          0 },
-        /* 25 << 112 */
-        { { 0xcd821dfb988baf01l,0xe6331a7ddbb16647l,0x1eb8ad33094cb960l,
-            0x593cca38c91bbca5l },
-          { 0x384aac8d26567456l,0x40fa0309c04b6490l,0x97834cd6dab6c8f6l,
-            0x68a7318d3f91e55fl },
-          0 },
-        /* 27 << 112 */
-        { { 0xc7bfd486605daaa6l,0x46fd72b7bb9a6c9el,0xe4847fb1a124fb89l,
-            0x75959cbda2d8ffbcl },
-          { 0x42579f65c8a588eel,0x368c92e6b80b499dl,0xea4ef6cd999a5df1l,
-            0xaa73bb7f936fe604l },
-          0 },
-        /* 28 << 112 */
-        { { 0xf347a70d6457d188l,0x86eda86b8b7a388bl,0xb7cdff060ccd6013l,
-            0xbeb1b6c7d0053fb2l },
-          { 0x0b02238799240a9fl,0x1bbb384f776189b2l,0x8695e71e9066193al,
-            0x2eb5009706ffac7el },
-          0 },
-        /* 29 << 112 */
-        { { 0x0654a9c04a7d2caal,0x6f3fb3d1a5aaa290l,0x835db041ff476e8fl,
-            0x540b8b0bc42295e4l },
-          { 0xa5c73ac905e214f5l,0x9a74075a56a0b638l,0x2e4b1090ce9e680bl,
-            0x57a5b4796b8d9afal },
-          0 },
-        /* 31 << 112 */
-        { { 0x2a2bfa7f650006f0l,0xdfd7dad350c0fbb2l,0x92452495ccf9ad96l,
-            0x183bf494d95635f9l },
-          { 0x02d5df434a7bd989l,0x505385cca5431095l,0xdd98e67dfd43f53el,
-            0xd61e1a6c500c34a9l },
-          0 },
-        /* 33 << 112 */
-        { { 0x41d85ea1ef74c45bl,0x2cfbfa66ae328506l,0x98b078f53ada7da9l,
-            0xd985fe37ec752fbbl },
-          { 0xeece68fe5a0148b4l,0x6f9a55c72d78136dl,0x232dccc4d2b729cel,
-            0xa27e0dfd90aafbc4l },
-          0 },
-        /* 34 << 112 */
-        { { 0x9647445212b4603el,0xa876c5516b706d14l,0xdf145fcf69a9d412l,
-            0xe2ab75b72d479c34l },
-          { 0x12df9a761a23ff97l,0xc61389925d359d10l,0x6e51c7aefa835f22l,
-            0x69a79cb1c0fcc4d9l },
-          0 },
-        /* 35 << 112 */
-        { { 0xf57f350d594cc7e1l,0x3079ca633350ab79l,0x226fb6149aff594al,
-            0x35afec026d59a62bl },
-          { 0x9bee46f406ed2c6el,0x58da17357d939a57l,0x44c504028fd1797el,
-            0xd8853e7c5ccea6cal },
-          0 },
-        /* 36 << 112 */
-        { { 0x4065508da35fcd5fl,0x8965df8c495ccaebl,0x0f2da85012e1a962l,
-            0xee471b94c1cf1cc4l },
-          { 0xcef19bc80a08fb75l,0x704958f581de3591l,0x2867f8b23aef4f88l,
-            0x8d749384ea9f9a5fl },
-          0 },
-        /* 37 << 112 */
-        { { 0x1b3855378c9049f4l,0x5be948f37b92d8b6l,0xd96f725db6e2bd6bl,
-            0x37a222bc958c454dl },
-          { 0xe7c61abb8809bf61l,0x46f07fbc1346f18dl,0xfb567a7ae87c0d1cl,
-            0x84a461c87ef3d07al },
-          0 },
-        /* 39 << 112 */
-        { { 0x3ab3d5afbd76e195l,0x478dd1ad6938a810l,0x6ffab3936ee3d5cbl,
-            0xdfb693db22b361e4l },
-          { 0xf969449651dbf1a7l,0xcab4b4ef08a2e762l,0xe8c92f25d39bba9al,
-            0x850e61bcf1464d96l },
-          0 },
-        /* 40 << 112 */
-        { { 0xb7e830e3dc09508bl,0xfaf6d2cf74317655l,0x72606cebdf690355l,
-            0x48bb92b3d0c3ded6l },
-          { 0x65b754845c7cf892l,0xf6cd7ac9d5d5f01fl,0xc2c30a5996401d69l,
-            0x91268650ed921878l },
-          0 },
-        /* 41 << 112 */
-        { { 0x380bf913b78c558fl,0x43c0baebc8afdaa9l,0x377f61d554f169d3l,
-            0xf8da07e3ae5ff20bl },
-          { 0xb676c49da8a90ea8l,0x81c1ff2b83a29b21l,0x383297ac2ad8d276l,
-            0x3001122fba89f982l },
-          0 },
-        /* 43 << 112 */
-        { { 0xbbe1e6a6c93f72d6l,0xd5f75d12cad800eal,0xfa40a09fe7acf117l,
-            0x32c8cdd57581a355l },
-          { 0x742219927023c499l,0xa8afe5d738ec3901l,0x5691afcba90e83f0l,
-            0x41bcaa030b8f8eacl },
-          0 },
-        /* 44 << 112 */
-        { { 0xe38b5ff98d2668d5l,0x0715281a7ad81965l,0x1bc8fc7c03c6ce11l,
-            0xcbbee6e28b650436l },
-          { 0x06b00fe80cdb9808l,0x17d6e066fe3ed315l,0x2e9d38c64d0b5018l,
-            0xab8bfd56844dcaefl },
-          0 },
-        /* 45 << 112 */
-        { { 0x42894a59513aed8bl,0xf77f3b6d314bd07al,0xbbdecb8f8e42b582l,
-            0xf10e2fa8d2390fe6l },
-          { 0xefb9502262a2f201l,0x4d59ea5050ee32b0l,0xd87f77286da789a8l,
-            0xcf98a2cff79492c4l },
-          0 },
-        /* 46 << 112 */
-        { { 0xf9577239720943c2l,0xba044cf53990b9d0l,0x5aa8e82395f2884al,
-            0x834de6ed0278a0afl },
-          { 0xc8e1ee9a5f25bd12l,0x9259ceaa6f7ab271l,0x7e6d97a277d00b76l,
-            0x5c0c6eeaa437832al },
-          0 },
-        /* 47 << 112 */
-        { { 0x5232c20f5606b81dl,0xabd7b3750d991ee5l,0x4d2bfe358632d951l,
-            0x78f8514698ed9364l },
-          { 0x951873f0f30c3282l,0x0da8ac80a789230bl,0x3ac7789c5398967fl,
-            0xa69b8f7fbdda0fb5l },
-          0 },
-        /* 48 << 112 */
-        { { 0xe5db77176add8545l,0x1b71cb6672c49b66l,0xd856073968421d77l,
-            0x03840fe883e3afeal },
-          { 0xb391dad51ec69977l,0xae243fb9307f6726l,0xc88ac87be8ca160cl,
-            0x5174cced4ce355f4l },
-          0 },
-        /* 49 << 112 */
-        { { 0x98a35966e58ba37dl,0xfdcc8da27817335dl,0x5b75283083fbc7bfl,
-            0x68e419d4d9c96984l },
-          { 0x409a39f402a40380l,0x88940faf1fe977bcl,0xc640a94b8f8edea6l,
-            0x1e22cd17ed11547dl },
-          0 },
-        /* 51 << 112 */
-        { { 0x17ba93b1a20ef103l,0xad8591306ba6577bl,0x65c91cf66fa214a0l,
-            0xd7d49c6c27990da5l },
-          { 0xecd9ec8d20bb569dl,0xbd4b2502eeffbc33l,0x2056ca5a6bed0467l,
-            0x7916a1f75b63728cl },
-          0 },
-        /* 52 << 112 */
-        { { 0xd4f9497d53a4f566l,0x8973466497b56810l,0xf8e1da740494a621l,
-            0x82546a938d011c68l },
-          { 0x1f3acb19c61ac162l,0x52f8fa9cabad0d3el,0x15356523b4b7ea43l,
-            0x5a16ad61ae608125l },
-          0 },
-        /* 53 << 112 */
-        { { 0xb0bcb87f4faed184l,0x5f236b1d5029f45fl,0xd42c76070bc6b1fcl,
-            0xc644324e68aefce3l },
-          { 0x8e191d595c5d8446l,0xc020807713ae1979l,0xadcaee553ba59cc7l,
-            0x20ed6d6ba2cb81bal },
-          0 },
-        /* 55 << 112 */
-        { { 0x7392b41a530ccbbdl,0x87c82146ea823525l,0xa52f984c05d98d0cl,
-            0x2ae57d735ef6974cl },
-          { 0x9377f7bf3042a6ddl,0xb1a007c019647a64l,0xfaa9079a0cca9767l,
-            0x3d81a25bf68f72d5l },
-          0 },
-        /* 57 << 112 */
-        { { 0xc110d830b0f2ac95l,0x48d0995aab20e64el,0x0f3e00e17729cd9al,
-            0x2a570c20dd556946l },
-          { 0x912dbcfd4e86214dl,0x2d014ee2cf615498l,0x55e2b1e63530d76el,
-            0xc5135ae4fd0fd6d1l },
-          0 },
-        /* 59 << 112 */
-        { { 0x1854daa5061f1658l,0xc0016df1df0cd2b3l,0xc2a3f23e833d50del,
-            0x73b681d2bbbd3017l },
-          { 0x2f046dc43ac343c0l,0x9c847e7d85716421l,0xe1e13c910917eed4l,
-            0x3fc9eebd63a1b9c6l },
-          0 },
-        /* 60 << 112 */
-        { { 0x0f816a727fe02299l,0x6335ccc2294f3319l,0x3820179f4745c5bel,
-            0xe647b782922f066el },
-          { 0xc22e49de02cafb8al,0x299bc2fffcc2ecccl,0x9a8feea26e0e8282l,
-            0xa627278bfe893205l },
-          0 },
-        /* 61 << 112 */
-        { { 0xa7e197337933e47bl,0xf4ff6b132e766402l,0xa4d8be0a98440d9fl,
-            0x658f5c2f38938808l },
-          { 0x90b75677c95b3b3el,0xfa0442693137b6ffl,0x077b039b43c47c29l,
-            0xcca95dd38a6445b2l },
-          0 },
-        /* 63 << 112 */
-        { { 0x583f3703f9374ab6l,0x864f91956e564145l,0x33bc3f4822526d50l,
-            0x9f323c801262a496l },
-          { 0xaa97a7ae3f046a9al,0x70da183edf8a039al,0x5b68f71c52aa0ba6l,
-            0x9be0fe5121459c2dl },
-          0 },
-        /* 64 << 112 */
-        { { 0xc1e17eb6cbc613e5l,0x33131d55497ea61cl,0x2f69d39eaf7eded5l,
-            0x73c2f434de6af11bl },
-          { 0x4ca52493a4a375fal,0x5f06787cb833c5c2l,0x814e091f3e6e71cfl,
-            0x76451f578b746666l },
-          0 },
-        /* 65 << 112 */
-        { { 0xa700767eabd0cc76l,0xa14ae98015889273l,0x5acf2cc466ea6380l,
-            0xb942cc40d08d18b9l },
-          { 0x9b5daa763ae45782l,0x61a25e0fb72f0ce0l,0xf94c0e80435fefe3l,
-            0x73d552cf1620e1c9l },
-          0 },
-        /* 71 << 112 */
-        { { 0x57130582727185c1l,0x8f2b8ebc163897ecl,0x4a059cc7a04e4a6bl,
-            0x4b1de9fe0908a366l },
-          { 0xa4f7738688d0fef0l,0x55e3bb1d9ebfc138l,0x9022bbef005ae362l,
-            0xf5669edc8741d349l },
-          0 },
-        /* 77 << 112 */
-        { { 0xf192c0f7ede937a4l,0xd2e91d62810c1b1el,0xf2b40b64dcc39c69l,
-            0xe125fbd028f03b0el },
-          { 0x52966dd78da708f9l,0x92d400a3cc0e7f32l,0x4e35aae36b0842b8l,
-            0x0b4fe66ded3ad3cfl },
-          0 },
-        /* 83 << 112 */
-        { { 0x14b81d951f1ff6b5l,0x1d82f132ed9b03b8l,0x52f6f029b4fa4047l,
-            0xea653682601e5913l },
-          { 0x4e900375edeee046l,0xd22ed267f9428714l,0xb004fb3b1753e873l,
-            0xfef061ba245b2c09l },
-          0 },
-        /* 89 << 112 */
-        { { 0x5e2376eaf9deba2bl,0x1ed1e9e5269a18cfl,0x8dffd66dcb1cada8l,
-            0xb13239f068369c77l },
-          { 0x2fede3a67f25426fl,0xc885cf0c6f90a2a6l,0xd950162d4eeac543l,
-            0x53011aa09abc201bl },
-          0 },
-        /* 95 << 112 */
-        { { 0x7a63925d432b798al,0x92e762cfc9bd6da9l,0xf22fb9706a190382l,
-            0x19919b847b18a9b3l },
-          { 0x16793b803adfde86l,0xf9ce15ace8b1d44cl,0x4bf74144c0a140b8l,
-            0x680468616f853f6cl },
-          0 },
-        /* 101 << 112 */
-        { { 0xd4e0d8460db84ba2l,0x9a162a3a360b68bbl,0x7297f3939233146cl,
-            0xbc93c2f4ec77412dl },
-          { 0x13ddf0a7e07e1065l,0x000a8d45fb5e5131l,0xb4373078cf61d467l,
-            0xa4a1fd67bf3bb6f9l },
-          0 },
-        /* 107 << 112 */
-        { { 0x6f2473f9d7585098l,0x45a29448d4f23c1al,0x47fe40f1c22bdc25l,
-            0x4e46ed1f31347673l },
-          { 0x5e43a8624148898cl,0x4a02ededa993954el,0x83d830b52f8a1847l,
-            0x007e3156a7f6a378l },
-          0 },
-        /* 113 << 112 */
-        { { 0x01a39fe7e847ca18l,0xaf2722418fed2772l,0x3104ef891fbb1748l,
-            0x5b55331b2b9dd5ffl },
-          { 0xe7806e31cec6a787l,0x9f49ed881e9c0af2l,0xf5a66373a3905b36l,
-            0x77b5bca9efab75f3l },
-          0 },
-        /* 116 << 112 */
-        { { 0xd4d75f4bf0831932l,0x5e770ac477fe8cc9l,0x52b5e748862e72a2l,
-            0xe9a45482501d35fel },
-          { 0x8a93e7424a9ab187l,0x5a72506de88ca017l,0xe680dcb201eb2defl,
-            0xdc5aa4e6ba68209dl },
-          0 },
-        /* 119 << 112 */
-        { { 0x2defa3dc3d01a344l,0x11fd939b162e459al,0x928453b97313d720l,
-            0x08696dc053184a65l },
-          { 0xd9f8a69c721f7415l,0x304eb0e079539019l,0xc9b0ca6dbb0c6313l,
-            0xa10133eba93dc74el },
-          0 },
-        /* 125 << 112 */
-        { { 0xee0b164004393f1el,0x511547dfe1301979l,0xc00dfc3516d26d87l,
-            0x06227c8aab847494l },
-          { 0x178ca86748b2fdc7l,0xb51296f01a8ba1dcl,0xf252787731e1dd14l,
-            0x7ecb5456c0ba2a1fl },
-          0 },
-    },
-    {
-        /* 0 << 120 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 120 */
-        { { 0x3e0e5c9dd111f8ecl,0xbcc33f8db7c4e760l,0x702f9a91bd392a51l,
-            0x7da4a795c132e92dl },
-          { 0x1a0b0ae30bb1151bl,0x54febac802e32251l,0xea3a5082694e9e78l,
-            0xe58ffec1e4fe40b8l },
-          0 },
-        /* 3 << 120 */
-        { { 0x7b23c513516e19e4l,0x56e2e847c5c4d593l,0x9f727d735ce71ef6l,
-            0x5b6304a6f79a44c5l },
-          { 0x6638a7363ab7e433l,0x1adea470fe742f83l,0xe054b8545b7fc19fl,
-            0xf935381aba1d0698l },
-          0 },
-        /* 4 << 120 */
-        { { 0xb5504f9d918e4936l,0x65035ef6b2513982l,0x0553a0c26f4d9cb9l,
-            0x6cb10d56bea85509l },
-          { 0x48d957b7a242da11l,0x16a4d3dd672b7268l,0x3d7e637c8502a96bl,
-            0x27c7032b730d463bl },
-          0 },
-        /* 5 << 120 */
-        { { 0x55366b7d5846426fl,0xe7d09e89247d441dl,0x510b404d736fbf48l,
-            0x7fa003d0e784bd7dl },
-          { 0x25f7614f17fd9596l,0x49e0e0a135cb98dbl,0x2c65957b2e83a76al,
-            0x5d40da8dcddbe0f8l },
-          0 },
-        /* 7 << 120 */
-        { { 0x9fb3bba354530bb2l,0xbde3ef77cb0869eal,0x89bc90460b431163l,
-            0x4d03d7d2e4819a35l },
-          { 0x33ae4f9e43b6a782l,0x216db3079c88a686l,0x91dd88e000ffedd9l,
-            0xb280da9f12bd4840l },
-          0 },
-        /* 9 << 120 */
-        { { 0xa37f3573f37f5937l,0xeb0f6c7dd1e4fca5l,0x2965a554ac8ab0fcl,
-            0x17fbf56c274676acl },
-          { 0x2e2f6bd9acf7d720l,0x41fc8f8810224766l,0x517a14b385d53befl,
-            0xdae327a57d76a7d1l },
-          0 },
-        /* 10 << 120 */
-        { { 0x515d5c891f5f82dcl,0x9a7f67d76361079el,0xa8da81e311a35330l,
-            0xe44990c44b18be1bl },
-          { 0xc7d5ed95af103e59l,0xece8aba78dac9261l,0xbe82b0999394b8d3l,
-            0x6830f09a16adfe83l },
-          0 },
-        /* 11 << 120 */
-        { { 0x43c41ac194d7d9b1l,0x5bafdd82c82e7f17l,0xdf0614c15fda0fcal,
-            0x74b043a7a8ae37adl },
-          { 0x3ba6afa19e71734cl,0x15d5437e9c450f2el,0x4a5883fe67e242b1l,
-            0x5143bdc22c1953c2l },
-          0 },
-        /* 13 << 120 */
-        { { 0xc676d7f2b1f3390bl,0x9f7a1b8ca5b61272l,0x4ebebfc9c2e127a9l,
-            0x4602500c5dd997bfl },
-          { 0x7f09771c4711230fl,0x058eb37c020f09c1l,0xab693d4bfee5e38bl,
-            0x9289eb1f4653cbc0l },
-          0 },
-        /* 15 << 120 */
-        { { 0x54da9dc7ab952578l,0xb5423df226e84d0bl,0xa8b64eeb9b872042l,
-            0xac2057825990f6dfl },
-          { 0x4ff696eb21f4c77al,0x1a79c3e4aab273afl,0x29bc922e9436b3f1l,
-            0xff807ef8d6d9a27al },
-          0 },
-        /* 16 << 120 */
-        { { 0xc7f3a8f833f6746cl,0x21e46f65fea990cal,0x915fd5c5caddb0a9l,
-            0xbd41f01678614555l },
-          { 0x346f4434426ffb58l,0x8055943614dbc204l,0xf3dd20fe5a969b7fl,
-            0x9d59e956e899a39al },
-          0 },
-        /* 17 << 120 */
-        { { 0xe4ca688fd06f56c0l,0xa48af70ddf027972l,0x691f0f045e9a609dl,
-            0xa9dd82cdee61270el },
-          { 0x8903ca63a0ef18d3l,0x9fb7ee353d6ca3bdl,0xa7b4a09cabf47d03l,
-            0x4cdada011c67de8el },
-          0 },
-        /* 19 << 120 */
-        { { 0xac127dc1e038a675l,0x729deff38c5c6320l,0xb7df8fd4a90d2c53l,
-            0x9b74b0ec681e7cd3l },
-          { 0x5cb5a623dab407e5l,0xcdbd361576b340c6l,0xa184415a7d28392cl,
-            0xc184c1d8e96f7830l },
-          0 },
-        /* 21 << 120 */
-        { { 0x86a9303b2f7e85c3l,0x5fce462171988f9bl,0x5b935bf6c138acb5l,
-            0x30ea7d6725661212l },
-          { 0xef1eb5f4e51ab9a2l,0x0587c98aae067c78l,0xb3ce1b3c77ca9ca6l,
-            0x2a553d4d54b5f057l },
-          0 },
-        /* 23 << 120 */
-        { { 0x2c7156e10b1894a0l,0x92034001d81c68c0l,0xed225d00c8b115b5l,
-            0x237f9c2283b907f2l },
-          { 0x0ea2f32f4470e2c0l,0xb725f7c158be4e95l,0x0f1dcafab1ae5463l,
-            0x59ed51871ba2fc04l },
-          0 },
-        /* 25 << 120 */
-        { { 0xd1b0ccdec9520711l,0x55a9e4ed3c8b84bfl,0x9426bd39a1fef314l,
-            0x4f5f638e6eb93f2bl },
-          { 0xba2a1ed32bf9341bl,0xd63c13214d42d5a9l,0xd2964a89316dc7c5l,
-            0xd1759606ca511851l },
-          0 },
-        /* 27 << 120 */
-        { { 0xedf69feaf8c51187l,0x05bb67ec741e4da7l,0x47df0f3208114345l,
-            0x56facb07bb9792b1l },
-          { 0xf3e007e98f6229e4l,0x62d103f4526fba0fl,0x4f33bef7b0339d79l,
-            0x9841357bb59bfec1l },
-          0 },
-        /* 28 << 120 */
-        { { 0xae1e0b67e28ef5bal,0x2c9a4699cb18e169l,0x0ecd0e331e6bbd20l,
-            0x571b360eaf5e81d2l },
-          { 0xcd9fea58101c1d45l,0x6651788e18880452l,0xa99726351f8dd446l,
-            0x44bed022e37281d0l },
-          0 },
-        /* 29 << 120 */
-        { { 0x830e6eea60dbac1fl,0x23d8c484da06a2f7l,0x896714b050ca535bl,
-            0xdc8d3644ebd97a9bl },
-          { 0x106ef9fab12177b4l,0xf79bf464534d5d9cl,0x2537a349a6ab360bl,
-            0xc7c54253a00c744fl },
-          0 },
-        /* 31 << 120 */
-        { { 0x24d661d168754ab0l,0x801fce1d6f429a76l,0xc068a85fa58ce769l,
-            0xedc35c545d5eca2bl },
-          { 0xea31276fa3f660d1l,0xa0184ebeb8fc7167l,0x0f20f21a1d8db0ael,
-            0xd96d095f56c35e12l },
-          0 },
-        /* 33 << 120 */
-        { { 0x57d2046b59da06ebl,0x3c076d5fa49f6d74l,0x6b4c96e616f82ea0l,
-            0xaf7b0f1f90536c0bl },
-          { 0x7999f86d204a9b2dl,0x7e420264126c9f87l,0x4c967a1f262ac4e5l,
-            0xe8174a09900e79adl },
-          0 },
-        /* 34 << 120 */
-        { { 0xd51687f2cb82516bl,0x8a440cfc040e4670l,0xeafd2bcfe7738d32l,
-            0x7071e9162a1e911al },
-          { 0xbd3abd44cfea57bbl,0x9c3add16085b19e2l,0xb194c01d6baa5aa6l,
-            0x6f3d3faf92f85c64l },
-          0 },
-        /* 35 << 120 */
-        { { 0xe23e0769488a280el,0x8e55a728e63a5904l,0x01690716ab84cccfl,
-            0xfe796130b78b3c98l },
-          { 0x15cc475b9117f211l,0xbdc178761d1b9d56l,0x8df5594a3e37b9b9l,
-            0x97747e341e37e494l },
-          0 },
-        /* 36 << 120 */
-        { { 0xf2a6370ed2f896e1l,0x27100e63802987afl,0xb4db1cff4678ebc7l,
-            0x6e5f28d937b4b263l },
-          { 0xd29030009711ebc4l,0xf14dcb9ff8712484l,0x7a46ec3eea449146l,
-            0x200155e9c1c51179l },
-          0 },
-        /* 37 << 120 */
-        { { 0x8130f007f1968d55l,0x18823e7097ed9803l,0xdc9fec559402762dl,
-            0x9e0bd57e278f5abbl },
-          { 0xaa41b913c9ebf303l,0x1105ec43a76b9353l,0xf8e4ee4cf4e6c6b5l,
-            0x3a630972bd7be696l },
-          0 },
-        /* 39 << 120 */
-        { { 0x5c7da7e16356b3eel,0x951bfe458ccf9b48l,0x6f2c6e91d0555d0cl,
-            0x47d7f7b58efd38eel },
-          { 0x957256c8af6fd630l,0xa690c65bdc01774cl,0xad52b27c7c8dafdal,
-            0x81fbc16af44a145fl },
-          0 },
-        /* 40 << 120 */
-        { { 0x497c3a3481b0493al,0x2b3ab20d71bc8408l,0x0c60226aa03769d1l,
-            0x4ac89c7ad10708b0l },
-          { 0x62398ea5092f7e6al,0x7f408f54de96d526l,0x025bde6f85bf102cl,
-            0xcc2f85120a4aa72el },
-          0 },
-        /* 41 << 120 */
-        { { 0x8a65e0386884a9c3l,0xd2e6ac047bf8c794l,0xc9c5d3d3f7bcdfb9l,
-            0x0000ce42a33f2c12l },
-          { 0xea1c0a9a7dd13b2bl,0xbfd97d7f0c35c3b1l,0x0ba75cf3347fcefel,
-            0xc3c5f28f1333460dl },
-          0 },
-        /* 43 << 120 */
-        { { 0x7810ebf575baa708l,0xe7fa7a0dd7440549l,0x25b813baf0667e4al,
-            0x30a46740d15838a8l },
-          { 0x13207b1ad04b22f7l,0x09e601ffd1419699l,0xb1038fc77f687b27l,
-            0xa4547dc9a127f95bl },
-          0 },
-        /* 44 << 120 */
-        { { 0x83b2e3b3056ecd2cl,0xd17dcdaaf03dfd36l,0xee24a5f81dcef956l,
-            0xb6746cd0b7239f16l },
-          { 0xed6cb311c8458c48l,0xe8c0fc9805d27da4l,0x4610e9a0a1bf0970l,
-            0x1947f01d9906c19el },
-          0 },
-        /* 45 << 120 */
-        { { 0x8b979126217c7cd7l,0x65c57a378050067el,0x6a50c6383f34838cl,
-            0x3de617c29b7bc81fl },
-          { 0x58488d24253a0ac7l,0x3fe53ec75520ba0bl,0x9156dca763f0607el,
-            0xdd08c5705d1fe134l },
-          0 },
-        /* 46 << 120 */
-        { { 0xbfb1d9e1e33ba77fl,0x0985311ccaef6c01l,0xc8b59e9accca8948l,
-            0x1256280945416f25l },
-          { 0xc90edbc257f53218l,0xcaa08c05125d8fb5l,0x33ea3fd49a1aad3bl,
-            0x2aa8bd83d005e8bel },
-          0 },
-        /* 47 << 120 */
-        { { 0xcbd2f1a3c2b22963l,0x0f7bd29c0c8ac2b3l,0xddb932432d405bfdl,
-            0xeabd4805328413b5l },
-          { 0xcc79d31748ebb6b9l,0x09604f831f521aael,0xd3487fdf4c7d188cl,
-            0xd219c318d1552ea9l },
-          0 },
-        /* 48 << 120 */
-        { { 0xef4f115c775d6ecel,0x69d2e3bbe8c0e78dl,0xb0264ef1145cfc81l,
-            0x0a41e9fa1b69788bl },
-          { 0x0d9233be909a1f0bl,0x150a84520ae76b30l,0xea3375370632bb69l,
-            0x15f7b3cfaa25584al },
-          0 },
-        /* 49 << 120 */
-        { { 0xfc4c623e321f7b11l,0xd36c1066f9cbc693l,0x8165235835dc0c0al,
-            0xa3ce2e18c824e97el },
-          { 0x59ea7cbcc6ff405el,0xced5a94a1e56a1e2l,0x88d744c53ab64b39l,
-            0x8963d029073a36e7l },
-          0 },
-        /* 51 << 120 */
-        { { 0x97aa902cb19f3edbl,0x8e605ff9bbf2975bl,0x0536fa8ba6eb299bl,
-            0xfd96da4f7cd03ac0l },
-          { 0x29c5b5b578f9a265l,0x1f025a6d5fd0bc1bl,0x440486ee58e0f8e1l,
-            0x8f191f7d593e49e9l },
-          0 },
-        /* 52 << 120 */
-        { { 0xbddf656baea9c13fl,0x083c5d514c678b37l,0x975431b630878ed4l,
-            0x6de13d4608d9cf1cl },
-          { 0xfbb639cc02427c45l,0x6190ca0c5a6cd989l,0x35a6aa26c53f11b7l,
-            0x73f9e17dddfd86f6l },
-          0 },
-        /* 53 << 120 */
-        { { 0xd30478a317be7689l,0x6fc3f634e358f7a7l,0x4057ece515688d9fl,
-            0xb5397495d3d91eefl },
-          { 0x62fac49e2f49bde4l,0xeb4a3e1860125c73l,0x15f38be8dabdac55l,
-            0x18bf29f7d334d52al },
-          0 },
-        /* 55 << 120 */
-        { { 0xf684162b68777538l,0x3e2a770bbb3381f4l,0x1b7562c1b374577cl,
-            0x9eec22dc5cf21688l },
-          { 0xc35014b1d472be2cl,0xafe2317035f086fbl,0xb9c9c4c9a1491ce1l,
-            0x2df1e669b56792ddl },
-          0 },
-        /* 57 << 120 */
-        { { 0xcf7d36fe1830f624l,0x176c3c12ed0474bdl,0x25b802c8f82b493dl,
-            0x683c2a744c78147el },
-          { 0x0db99444f8f3e446l,0x437bcac6800a56c7l,0xb4e592264d08b25fl,
-            0xcaf1b4142e691ca7l },
-          0 },
-        /* 59 << 120 */
-        { { 0x378bd47b9d231cafl,0xde3aa2f01f4db832l,0xf609d16ab29bd7d5l,
-            0x13feab54bdfb54dfl },
-          { 0x274abbbc22fc1a12l,0x267febb47d30ef1bl,0xeffa996d80717cd8l,
-            0x065a86d1118d0812l },
-          0 },
-        /* 60 << 120 */
-        { { 0xc681a8656a3cb3afl,0x528f25a981751414l,0x6669f07cc7eac946l,
-            0x9fb3a53f3cc6cc6bl },
-          { 0x2919d92a11ae224al,0xa59141110b170a19l,0xdc16c611e2042f16l,
-            0x58ace12decd4180bl },
-          0 },
-        /* 61 << 120 */
-        { { 0x689bb1ec107bb59fl,0x8129702adad2b385l,0x10bd3baeb1630603l,
-            0xaadec5d15f23e7cfl },
-          { 0x572f234f4586f7fbl,0x13abdec95ec11b32l,0xa462a7ec6191c26al,
-            0x4a7d92a06685c8d3l },
-          0 },
-        /* 63 << 120 */
-        { { 0xdd4e2b63b16628eal,0xdf0c8fc8eefa5e86l,0xb0ec710205662720l,
-            0x3f4c6956fe81e9dal },
-          { 0x5732ad8f52e356f7l,0x045a103968a658f0l,0x9c40b0b6506ba33al,
-            0x0a426010cb54258dl },
-          0 },
-        /* 64 << 120 */
-        { { 0x09891641d4c5105fl,0x1ae80f8e6d7fbd65l,0x9d67225fbee6bdb0l,
-            0x3b433b597fc4d860l },
-          { 0x44e66db693e85638l,0xf7b59252e3e9862fl,0xdb785157665c32ecl,
-            0x702fefd7ae362f50l },
-          0 },
-        /* 65 << 120 */
-        { { 0x3902ab14c3254641l,0xa63cfd9fd8c001c8l,0x597d155c52d0af3cl,
-            0xc5a2cbc4a0dbe688l },
-          { 0xac8a841b249195aal,0xc98f01aaed14426fl,0xeb4a8ce8353905f1l,
-            0x4d6668171ecee1b7l },
-          0 },
-        /* 71 << 120 */
-        { { 0xbd66e7d9a94da8cdl,0x7bc04735801ef314l,0x90f3eba1c5cc2904l,
-            0x3c7dfed6f71bb36dl },
-          { 0x89a50c8da75e3086l,0x88b8b4746f8e3418l,0x26fe17f4a44a5dbdl,
-            0x98bf74c16a1e24fel },
-          0 },
-        /* 77 << 120 */
-        { { 0xca7b470679e0db85l,0x7f46c7716fc897fdl,0x9537e7918edfc0f3l,
-            0xa46d4b4405e91ddfl },
-          { 0x97d21061ee5575e7l,0x1f4f32da59650429l,0x2d1d6af878995129l,
-            0x41d6fc228a0e4260l },
-          0 },
-        /* 83 << 120 */
-        { { 0xb30a1a89107d2282l,0x5433d7673a5e1323l,0xb9eeab822abdfeafl,
-            0x9579cb46df3e0dbfl },
-          { 0x6fc3ff2c7e088e79l,0x94b32360d7314326l,0xd2e82b59e5ad82e4l,
-            0x7372dc4a55bc24e3l },
-          0 },
-        /* 89 << 120 */
-        { { 0x355697215f3c03cbl,0x4150adf2a146edcdl,0x16ec1a421a252e1cl,
-            0xdf4d0f94424984eal },
-          { 0x15142b5f5fabe961l,0xe6a73c29567ec13al,0xe6d370795d12070al,
-            0x437743d0206fd7c6l },
-          0 },
-        /* 95 << 120 */
-        { { 0x483b7a95d66bc594l,0xf6a7064e8a6113bbl,0x373ce20f4ed34f72l,
-            0x6aa876ab24f429b2l },
-          { 0x378d5c25412c3102l,0xe4219a97b493199cl,0x01c7cafaa0b37332l,
-            0x9305cc85f7633f7dl },
-          0 },
-        /* 101 << 120 */
-        { { 0x0259b43aaadf2273l,0x869c5bd3cf9dc1c2l,0x4f18a6e4068d6628l,
-            0xd110637fec2d4547l },
-          { 0x1ae88a791e94aaddl,0xe8b4be39de64f5f9l,0x85cbd9b24dc6b2bbl,
-            0xb65091fa1bc352b2l },
-          0 },
-        /* 107 << 120 */
-        { { 0x7c5cea5d20f6a354l,0xe936ff1582f3ed39l,0x54e7a775b779368el,
-            0x8ca8a46e3cb17c9el },
-          { 0x753ca1fa0138974dl,0x9ce311eba72902ffl,0xcb727e56973f72b6l,
-            0xde72538d91685710l },
-          0 },
-        /* 113 << 120 */
-        { { 0xf423569f1bec8f85l,0x23376da5ca844ac4l,0xce7b407a111523f4l,
-            0x736fb92dde7aa46dl },
-          { 0xd9139edcc7662640l,0x520fbf0656a85e24l,0x14e3b5857e5284b5l,
-            0xcbae4e8321d56ef3l },
-          0 },
-        /* 116 << 120 */
-        { { 0x69830a05564470a1l,0x1a1e26cf5b702e8el,0xe5fdf7d9d8fae645l,
-            0xe4774f74a9950c66l },
-          { 0x18bdda7cd1466825l,0xe6ab4ce6d115218al,0xfcb8c50064528629l,
-            0xd705f429e70deed9l },
-          0 },
-        /* 119 << 120 */
-        { { 0x3f992d7ba99df096l,0x08993b4125e78725l,0x79eaad13117c4cafl,
-            0x7230594c9fa87285l },
-          { 0xac23d7edf2673e27l,0xc9d76fb53b9eb111l,0x7a0a036a9e9db78al,
-            0x7c6ec39df9565cffl },
-          0 },
-        /* 125 << 120 */
-        { { 0x956ad1441fd4f7a1l,0x6c511ffecb7546cal,0x11becdaef5ae6ddbl,
-            0x67587741946168b2l },
-          { 0x99cd45edf54379a7l,0x687f8462e2748decl,0x2b2be1e1837bd066l,
-            0x3862659c0c45a5a9l },
-          0 },
-    },
-    {
-        /* 0 << 128 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 128 */
-        { { 0x62a8c244bfe20925l,0x91c19ac38fdce867l,0x5a96a5d5dd387063l,
-            0x61d587d421d324f6l },
-          { 0xe87673a2a37173eal,0x2384800853778b65l,0x10f8441e05bab43el,
-            0xfa11fe124621efbel },
-          0 },
-        /* 3 << 128 */
-        { { 0xc0f734a3b2335834l,0x9526205a90ef6860l,0xcb8be71704e2bb0dl,
-            0x2418871e02f383fal },
-          { 0xd71776814082c157l,0xcc914ad029c20073l,0xf186c1ebe587e728l,
-            0x6fdb3c2261bcd5fdl },
-          0 },
-        /* 4 << 128 */
-        { { 0xb4480f0441c23fa3l,0xb4712eb0c1989a2el,0x3ccbba0f93a29ca7l,
-            0x6e205c14d619428cl },
-          { 0x90db7957b3641686l,0x0432691d45ac8b4el,0x07a759acf64e0350l,
-            0x0514d89c9c972517l },
-          0 },
-        /* 5 << 128 */
-        { { 0xcc7c4c1c2cf9d7c1l,0x1320886aee95e5abl,0xbb7b9056beae170cl,
-            0xc8a5b250dbc0d662l },
-          { 0x4ed81432c11d2303l,0x7da669121f03769fl,0x3ac7a5fd84539828l,
-            0x14dada943bccdd02l },
-          0 },
-        /* 7 << 128 */
-        { { 0x51b90651cbae2f70l,0xefc4bc0593aaa8ebl,0x8ecd8689dd1df499l,
-            0x1aee99a822f367a5l },
-          { 0x95d485b9ae8274c5l,0x6c14d4457d30b39cl,0xbafea90bbcc1ef81l,
-            0x7c5f317aa459a2edl },
-          0 },
-        /* 9 << 128 */
-        { { 0x410dc6a90deeaf52l,0xb003fb024c641c15l,0x1384978c5bc504c4l,
-            0x37640487864a6a77l },
-          { 0x05991bc6222a77dal,0x62260a575e47eb11l,0xc7af6613f21b432cl,
-            0x22f3acc9ab4953e9l },
-          0 },
-        /* 10 << 128 */
-        { { 0x27c8919240be34e8l,0xc7162b3791907f35l,0x90188ec1a956702bl,
-            0xca132f7ddf93769cl },
-          { 0x3ece44f90e2025b4l,0x67aaec690c62f14cl,0xad74141822e3cc11l,
-            0xcf9b75c37ff9a50el },
-          0 },
-        /* 11 << 128 */
-        { { 0x0d0942770c24efc8l,0x0349fd04bef737a4l,0x6d1c9dd2514cdd28l,
-            0x29c135ff30da9521l },
-          { 0xea6e4508f78b0b6fl,0x176f5dd2678c143cl,0x081484184be21e65l,
-            0x27f7525ce7df38c4l },
-          0 },
-        /* 13 << 128 */
-        { { 0x9faaccf5e4652f1dl,0xbd6fdd2ad56157b2l,0xa4f4fb1f6261ec50l,
-            0x244e55ad476bcd52l },
-          { 0x881c9305047d320bl,0x1ca983d56181263fl,0x354e9a44278fb8eel,
-            0xad2dbc0f396e4964l },
-          0 },
-        /* 15 << 128 */
-        { { 0xfce0176788a2ffe4l,0xdc506a3528e169a5l,0x0ea108617af9c93al,
-            0x1ed2436103fa0e08l },
-          { 0x96eaaa92a3d694e7l,0xc0f43b4def50bc74l,0xce6aa58c64114db4l,
-            0x8218e8ea7c000fd4l },
-          0 },
-        /* 16 << 128 */
-        { { 0x6a7091c2e48fb889l,0x26882c137b8a9d06l,0xa24986631b82a0e2l,
-            0x844ed7363518152dl },
-          { 0x282f476fd86e27c7l,0xa04edaca04afefdcl,0x8b256ebc6119e34dl,
-            0x56a413e90787d78bl },
-          0 },
-        /* 17 << 128 */
-        { { 0xd1ffd160deb58b9bl,0x78492428c007273cl,0x47c908048ef06073l,
-            0x746cd0dfe48c659el },
-          { 0xbd7e8e109d47055bl,0xe070967e39711c04l,0x3d8869c99c9444f6l,
-            0x6c67ccc834ac85fcl },
-          0 },
-        /* 19 << 128 */
-        { { 0x8a42d8b087b05be1l,0xef00df8d3e4e1456l,0x148cc8e8fbfc8cd2l,
-            0x0288ae4c4878804fl },
-          { 0x44e669a73b4f6872l,0xa4a8dbd4aab53c5bl,0x843fa963c9660052l,
-            0x128e2d2571c05dd2l },
-          0 },
-        /* 21 << 128 */
-        { { 0x3ea86174a9f1b59bl,0xc747ea076a9a8845l,0x733710b5ab242123l,
-            0x6381b546d386a60cl },
-          { 0xba0e286366a44904l,0x770f618de9db556cl,0x39e567f828fb198dl,
-            0xb5f1bef040147ee8l },
-          0 },
-        /* 23 << 128 */
-        { { 0x1adee1d516391617l,0x962d9184a3315fd9l,0x91c229750c805d59l,
-            0x4575eaf2cd9a1877l },
-          { 0x83fef163451831b9l,0x829d6bdd6f09e30fl,0x9379272dcc6b4e6al,
-            0xd7a049bd95fbee4al },
-          0 },
-        /* 25 << 128 */
-        { { 0x695f70da44ae09c6l,0x79793892bb99de1dl,0xde269352f696b429l,
-            0xe37ea97f8104c825l },
-          { 0x3166cac6b0e72e63l,0xa82e633ca03ba670l,0x1106e3843e505667l,
-            0xc2994f3dffb788b6l },
-          0 },
-        /* 27 << 128 */
-        { { 0xd36a5ab37c53073bl,0xc44a9940ebdc7e35l,0x7dd86c8bf3ded136l,
-            0x9fe9879fd5a0eb14l },
-          { 0xa210726c9b99bf9cl,0x3faf4456861036afl,0x1661f1c9615d091al,
-            0x2c63f630911551bcl },
-          0 },
-        /* 28 << 128 */
-        { { 0x1554d46da670ff1dl,0x24833d88cb97a1ccl,0x8fa6ab3cded97493l,
-            0x215e037189926498l },
-          { 0x549bd592e56d74ffl,0x58a8caf543b5e1ecl,0x3c6087a323e93cb9l,
-            0x8b0549875648b83cl },
-          0 },
-        /* 29 << 128 */
-        { { 0x232974230554f94fl,0x4f445a380f3a7618l,0xb9fb40bee4abefd6l,
-            0xfbf3eaf9c15eb07cl },
-          { 0xed469c23aca0c8b3l,0xc5209f68846e3f8fl,0x33d51d13d75da468l,
-            0x9406e10a3d5c6e29l },
-          0 },
-        /* 31 << 128 */
-        { { 0xb9a44b1f5c6cad21l,0xaa9947751ee60a83l,0xc89af3858c390401l,
-            0xef1e450b8dd51056l },
-          { 0x5f5f069879ac84d1l,0x68d82982ef57b1afl,0x31f1d90f50849555l,
-            0xff9577e57d9fc8f6l },
-          0 },
-        /* 33 << 128 */
-        { { 0xaeebc5c0b430d6a1l,0x39b87a13dc3a9c04l,0xf0c445252db4a631l,
-            0xe32d95482c66fcf6l },
-          { 0x16f11bafb17849c4l,0xdd1c76615eca71f7l,0x4389ad2e32e6c944l,
-            0x727c11a5889a06bbl },
-          0 },
-        /* 34 << 128 */
-        { { 0x38dd1ac021e5781al,0x578318dbfd019ee2l,0x096b677d5f88e574l,
-            0xdbec82b216ad9f4fl },
-          { 0x348debe23260e8d9l,0x9334126064dfcda1l,0xdc5fb34cefc8faael,
-            0x5fa048beb4a6fc25l },
-          0 },
-        /* 35 << 128 */
-        { { 0xe18806fd60b3258cl,0xb7d2926b1364df47l,0xe208300fa107ce99l,
-            0x8d2f29fe7918df0el },
-          { 0x0b012d77a1244f4cl,0xf01076f4213a11cfl,0x8e623223181c559dl,
-            0x9df196ee995a281dl },
-          0 },
-        /* 36 << 128 */
-        { { 0xc431a238013ff83bl,0x7c0018b2fad69d08l,0x99aeb52a4c9589eal,
-            0x121f41ab9b1cf19fl },
-          { 0x0cfbbcbaef0f5958l,0x8deb3aeb7be8fbdcl,0x12b954081f15aa31l,
-            0x5acc09b34c0c06fdl },
-          0 },
-        /* 37 << 128 */
-        { { 0xfaa821383a721940l,0xdd70f54dd0008b83l,0x00decb507d32a52dl,
-            0x04563529cdd87deal },
-          { 0xb0e7e2a2db81643dl,0x445f4c383a6fef50l,0x5c0ef211df694ae1l,
-            0xa5a8fead923d0f1cl },
-          0 },
-        /* 39 << 128 */
-        { { 0xbc0e08b0325b2601l,0xae9e4c6105815b7al,0x07f664faf944a4a1l,
-            0x0ad19d29288f83b3l },
-          { 0x8615cd677232c458l,0x98edff6e9038e7d1l,0x082e0c4395a4dfccl,
-            0x336267afeceee00el },
-          0 },
-        /* 40 << 128 */
-        { { 0x775cbfa86d518ffbl,0xdecee1f6930f124bl,0x9a402804f5e81d0fl,
-            0x0e8225c52a0eeb2fl },
-          { 0x884a5d39fee9e867l,0x9540428ffb505454l,0xb2bf2e20107a70d1l,
-            0xd9917c3ba010b2aal },
-          0 },
-        /* 41 << 128 */
-        { { 0xc88ad4452a29bfdel,0x3072ebfa998368b7l,0xa754cbf7f5384692l,
-            0x85f7e16906b13146l },
-          { 0x42a7095f6a549fbel,0xef44edf91f7f1f42l,0xbea2989737b0c863l,
-            0x13b096d87a1e7fc3l },
-          0 },
-        /* 43 << 128 */
-        { { 0x51add77ce2a3a251l,0x840ca1384d8476adl,0x08d01d26f6096478l,
-            0x10d501a532f1662bl },
-          { 0xc8d63f811165a955l,0x587aa2e34095046al,0x759506c617af9000l,
-            0xd6201fe4a32ab8d2l },
-          0 },
-        /* 44 << 128 */
-        { { 0xa98f42fa3d843d53l,0x33777cc613ef927al,0xc440cdbecb84ca74l,
-            0x8c22f9631dc7c5ddl },
-          { 0x4bc82b70c8d94708l,0x7e0b43fcc814364fl,0x286d4e2486f59b7el,
-            0x1abc895e4d6bf4c4l },
-          0 },
-        /* 45 << 128 */
-        { { 0x7c52500cfc8c9bbdl,0x635563381534d9f7l,0xf55f38cbfd52c990l,
-            0xc585ae85058f52e7l },
-          { 0xb710a28bf9f19a01l,0x891861bdf0273ca4l,0x38a7aa2b034b0b7cl,
-            0xa2ecead52a809fb1l },
-          0 },
-        /* 46 << 128 */
-        { { 0x3df614f1ec3ca8eal,0x6bb24e9f9505bc08l,0x23ba1afbf37ace22l,
-            0x2e51b03b3463c261l },
-          { 0x59a0fca9c39e6558l,0x819f271ca342ccd9l,0x0c913d54df7ac033l,
-            0xba0f83de573257d3l },
-          0 },
-        /* 47 << 128 */
-        { { 0xdf62817ab3b32fbcl,0x616d74b0964670d4l,0xa37bc6270e26020bl,
-            0xda46d655b7d40bdal },
-          { 0x2840f155b5773f84l,0xbb633777897774b6l,0x59ff1df79a1ed3fal,
-            0xf7011ee2bac571f9l },
-          0 },
-        /* 48 << 128 */
-        { { 0x38151e274d559d96l,0x4f18c0d3b8db6c01l,0x49a3aa836f9921afl,
-            0xdbeab27b8c046029l },
-          { 0x242b9eaa7040bf3bl,0x39c479e51614b091l,0x338ede2b0e4baf5dl,
-            0x5bb192b7f0a53945l },
-          0 },
-        /* 49 << 128 */
-        { { 0xd612951861535bb0l,0xbf14364016f6a954l,0x3e0931eedde18024l,
-            0x79d791c8139441c0l },
-          { 0xba4fe7ecb67b8269l,0x7f30d848224b96c1l,0xa7e0a6abf0341068l,
-            0x78db42c37198ea2dl },
-          0 },
-        /* 51 << 128 */
-        { { 0x13354044185ce776l,0x109a6e059ff0100cl,0xafa3b61b03144cb1l,
-            0x4e4c814585265586l },
-          { 0xa8dafd33edb35364l,0x6691781bfd2606bel,0x2e06a9786182f5ccl,
-            0x588784ebe77faeecl },
-          0 },
-        /* 52 << 128 */
-        { { 0x896d572337e440d7l,0x685c5fd9ade23f68l,0xb5b1a26dc2c64918l,
-            0xb9390e30dad6580cl },
-          { 0x87911c4e7dee5b9bl,0xb90c5053deb04f6el,0x37b942a18f065aa6l,
-            0x34acdf2a1ca0928dl },
-          0 },
-        /* 53 << 128 */
-        { { 0xc773f525606f8f04l,0x75ae4a4b41b0a5bbl,0xb2aa058eaf7df93cl,
-            0xf15bea4feafed676l },
-          { 0xd2967b236a3c4fd7l,0xa698628090e30e7fl,0xf1b5166d316418bdl,
-            0x5748682e1c13cb29l },
-          0 },
-        /* 55 << 128 */
-        { { 0xe7b11babfff3605bl,0xdbce1b74cbac080fl,0xa0be39bd6535f082l,
-            0x2b6501805f826684l },
-          { 0xf90cea2400f5244fl,0xe279f2fadd244a1cl,0xd3fca77c9421c3ael,
-            0xe66bc7ee81a5210al },
-          0 },
-        /* 57 << 128 */
-        { { 0x114085dac40c6461l,0xaf78cb47f47d41b8l,0x7a9ae851755b0adbl,
-            0x8d2e8c66a0600b6dl },
-          { 0x5fb19045389758c0l,0xfa6e2cdabe7c91b2l,0x6472a432663983a2l,
-            0xc9370829e0e19363l },
-          0 },
-        /* 59 << 128 */
-        { { 0xd335856ec50bf2ffl,0x89b42295dfa708c2l,0x5dfb42241b201b4el,
-            0x6c94d6b94eecbf9cl },
-          { 0xabe5a47a7a634097l,0xf3d53b1643febecfl,0xff18619faca9846el,
-            0x80ad8629a4066177l },
-          0 },
-        /* 60 << 128 */
-        { { 0x7872e34b3390ff23l,0x968ce4abde7d18efl,0x9b4a745e627fe7b1l,
-            0x9607b0a0caff3e2al },
-          { 0x1b05818eeb40e3a5l,0x6ac62204c0fa8d7al,0xb5b9058571ed4809l,
-            0xb2432ef0f7cb65f2l },
-          0 },
-        /* 61 << 128 */
-        { { 0xc1203418f8a144b7l,0xb3413f808378f901l,0xf6badea161857095l,
-            0xcd2816c2b2e93efel },
-          { 0x6a8303ea174a0ee6l,0x98b62f29150b28b6l,0x68071bbc9c2a05b6l,
-            0xcfcf41a39f00e36el },
-          0 },
-        /* 63 << 128 */
-        { { 0xcaf564f234d6bc29l,0x9e9a6507f3c8edb0l,0x2fb889edd4e5502el,
-            0xb70d4ceb6cc9d8edl },
-          { 0x0de25356b020f740l,0xa68d9263d11fe5e6l,0xe86400679d85dd77l,
-            0xa95dfa7dec2c8c8dl },
-          0 },
-        /* 64 << 128 */
-        { { 0x715c9f973112795fl,0xe8244437984e6ee1l,0x55cb4858ecb66bcdl,
-            0x7c136735abaffbeel },
-          { 0x546615955dbec38el,0x51c0782c388ad153l,0x9ba4c53ac6e0952fl,
-            0x27e6782a1b21dfa8l },
-          0 },
-        /* 65 << 128 */
-        { { 0x3f9bc63ece59397dl,0x3f0f98a93eaa6104l,0x2f82c37c002d9271l,
-            0x6ac0495d4985353cl },
-          { 0xbde52f629191527bl,0xa3a13fce475aa640l,0x1d71ae17ce673f89l,
-            0x2b5cc61529120ec1l },
-          0 },
-        /* 71 << 128 */
-        { { 0xa0ab0f9924318c1cl,0x0cc5ca7da80ca60bl,0x24e27598abb965bal,
-            0xc4863198b44d1351l },
-          { 0x4d913783a28f04bel,0x404e78088cce8960l,0x2973b4e46286873el,
-            0x7b6e0f3219f42b50l },
-          0 },
-        /* 77 << 128 */
-        { { 0x0091a786306a6349l,0x4640ceab2098622dl,0x9928022be8182233l,
-            0xf261bee4514d0bedl },
-          { 0x70cdcc44c5f64fedl,0x4e19fec4f9eb2dfel,0xd05bdc09058b0b69l,
-            0x16f3007ed3bc6190l },
-          0 },
-        /* 83 << 128 */
-        { { 0x8f7f16957f136df1l,0x6d7547019b4f4215l,0xfb22d55eb4cc46a6l,
-            0x0b53ef53a8563034l },
-          { 0x8b105acc42bc9353l,0xe44c0a396079d59dl,0x78441fee35ee38ddl,
-            0x87ad93e43dcc0119l },
-          0 },
-        /* 89 << 128 */
-        { { 0x98a1c55358d9f73al,0xaa0843f0540e2b91l,0x701f8831d0647459l,
-            0xc4ae9d0484673005l },
-          { 0x9c37bc9f30b3ea20l,0x24cb4e2dbcbfb2b2l,0x8513e6f313cbf070l,
-            0x0c4db4334e76c79el },
-          0 },
-        /* 95 << 128 */
-        { { 0x882a2b9cbc8320b8l,0x16e9c11e3ad9e222l,0x24399ac19b23cb1dl,
-            0x334c5496799a89c7l },
-          { 0x72b6f9b8df3d774cl,0x42955bcbb11b6704l,0x3c4d6021ad2d4eafl,
-            0x5416b309afe2b671l },
-          0 },
-        /* 101 << 128 */
-        { { 0x1bbe9e662bf7c2a6l,0x22a3a10ca4acfddbl,0x2424eaab46bae581l,
-            0xebec1bbf40d6bdadl },
-          { 0xd7e3fa1a5b012aedl,0xc0f82c23f1dc6204l,0x42787c82e319477dl,
-            0xca1ae7a14cf57573l },
-          0 },
-        /* 107 << 128 */
-        { { 0x44b7d589d51bbde9l,0x15de755fd6a4cc98l,0x9b6ea8e582fb8e2el,
-            0x9d9294f04332bc22l },
-          { 0x53c6b2b7d1fa239al,0x286bf536693ca4f1l,0xc3fa754603c00f65l,
-            0xc046713af49cdb48l },
-          0 },
-        /* 113 << 128 */
-        { { 0xe356f5f11d82d5d6l,0xa0346a73d035ca0cl,0x14c76adee1884448l,
-            0xd8369bdd1c23dde9l },
-          { 0x13017862fe025eafl,0x6b5ac5e9a76be1d7l,0x52d621a94933bb6el,
-            0xb045b53baa8c1d3fl },
-          0 },
-        /* 116 << 128 */
-        { { 0x242da39e4e40466al,0xc03cb184ac322b07l,0x776b744f9aaa10bfl,
-            0xb80d9f14fe7d4beal },
-          { 0x75cd14308f9c4908l,0xa4e59ce9087b3d7al,0x3bbdce598cdca614l,
-            0x58c57113bc1a5df1l },
-          0 },
-        /* 119 << 128 */
-        { { 0x2a70af1abd79d467l,0x68dc4f23f63e2b73l,0x4345572f1f67b23dl,
-            0xc012b08f3a340718l },
-          { 0x9458585cc963dbe2l,0x21d84032223a495cl,0x0d54a4ea0dc28159l,
-            0xd9549e2c9b927dafl },
-          0 },
-        /* 125 << 128 */
-        { { 0xcd54ebd2d43c8cd2l,0x5ff4ded6a817b9f9l,0x6f59bc31245386d3l,
-            0x65b67cb0a2077821l },
-          { 0x36407956405ffa07l,0x723e0252d589f27al,0x052004b888e1239el,
-            0x8e6d188d69fdf94dl },
-          0 },
-    },
-    {
-        /* 0 << 136 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 136 */
-        { { 0xc16c236e846e364fl,0x7f33527cdea50ca0l,0xc48107750926b86dl,
-            0x6c2a36090598e70cl },
-          { 0xa6755e52f024e924l,0xe0fa07a49db4afcal,0x15c3ce7d66831790l,
-            0x5b4ef350a6cbb0d6l },
-          0 },
-        /* 3 << 136 */
-        { { 0xe2a37598a9d82abfl,0x5f188ccbe6c170f5l,0x816822005066b087l,
-            0xda22c212c7155adal },
-          { 0x151e5d3afbddb479l,0x4b606b846d715b99l,0x4a73b54bf997cb2el,
-            0x9a1bfe433ecd8b66l },
-          0 },
-        /* 4 << 136 */
-        { { 0xe13122f3dbfb894el,0xbe9b79f6ce274b18l,0x85a49de5ca58aadfl,
-            0x2495775811487351l },
-          { 0x111def61bb939099l,0x1d6a974a26d13694l,0x4474b4ced3fc253bl,
-            0x3a1485e64c5db15el },
-          0 },
-        /* 5 << 136 */
-        { { 0x5afddab61430c9abl,0x0bdd41d32238e997l,0xf0947430418042ael,
-            0x71f9addacdddc4cbl },
-          { 0x7090c016c52dd907l,0xd9bdf44d29e2047fl,0xe6f1fe801b1011a6l,
-            0xb63accbcd9acdc78l },
-          0 },
-        /* 7 << 136 */
-        { { 0x0ad7337ac0b7eff3l,0x8552225ec5e48b3cl,0xe6f78b0c73f13a5fl,
-            0x5e70062e82349cbel },
-          { 0x6b8d5048e7073969l,0x392d2a29c33cb3d2l,0xee4f727c4ecaa20fl,
-            0xa068c99e2ccde707l },
-          0 },
-        /* 9 << 136 */
-        { { 0x5b826fcb1b3ec67bl,0xece1b4b041356616l,0x7d5ce77e56a3ab4fl,
-            0xf6087f13aa212da0l },
-          { 0xe63015054db92129l,0xb8ae4c9940407d11l,0x2b6de222dfab8385l,
-            0x9b323022b7d6c3b4l },
-          0 },
-        /* 10 << 136 */
-        { { 0x057ef17a5ae6ad84l,0x9feae00b293a6ae0l,0xd18bb6c154266408l,
-            0xd3d3e1209c8e8e48l },
-          { 0xba8d4ca80e94fc8fl,0x80262ffc8a8ea0fel,0xac5b2855f71655fdl,
-            0xa348f8fae9aced89l },
-          0 },
-        /* 11 << 136 */
-        { { 0x60684b69a5660af3l,0x69aad23b9066d14bl,0x4d9f9b49fa4d020al,
-            0xafb54ec1b5cd6a4al },
-          { 0x2b25fe1832fd864dl,0xee6945062b6b64d0l,0x954a2a515001d8aal,
-            0x5e1008557082b5b3l },
-          0 },
-        /* 13 << 136 */
-        { { 0x20ecf71cbc90eb1bl,0x4234facf651c1df4l,0xc720fce9e681f678l,
-            0x680becdda7c007f4l },
-          { 0x7c08dc063181afeal,0x75c1b050a34eca91l,0x7d3479d54b9e2333l,
-            0xed16640af3951aa3l },
-          0 },
-        /* 15 << 136 */
-        { { 0x911b596264723e54l,0x34384f8c004b327cl,0x06ca5c61b85435f2l,
-            0x12e0cd25e2c1075cl },
-          { 0xa4b84cb8ac727394l,0x50bd720492b352c1l,0xe85524a49cbd0fb4l,
-            0x10b9274be7876024l },
-          0 },
-        /* 16 << 136 */
-        { { 0xef0a3fecfa181e69l,0x9ea02f8130d69a98l,0xb2e9cf8e66eab95dl,
-            0x520f2beb24720021l },
-          { 0x621c540a1df84361l,0x1203772171fa6d5dl,0x6e3c7b510ff5f6ffl,
-            0x817a069babb2bef3l },
-          0 },
-        /* 17 << 136 */
-        { { 0xb7cf93c3aace2c6al,0x017a96e658ff1bbfl,0x3b401301624a8250l,
-            0xf5ef158529266518l },
-          { 0x3c968bef7585838dl,0x8e97d023853191abl,0x175022e4f6823389l,
-            0xb6a3bfc2f6a9b4c1l },
-          0 },
-        /* 19 << 136 */
-        { { 0x515acf174591d77el,0xb393c89e3c3b25b6l,0x291e068e9c95abd7l,
-            0x256b72c046c02544l },
-          { 0x8172af03915ea92fl,0xc1b324ae4fcd0f03l,0x8abc779215108993l,
-            0xe05fe6867ab815ael },
-          0 },
-        /* 21 << 136 */
-        { { 0xca08d4095bc42740l,0xdd2c19d3e26e2e60l,0x27afdeded7c091fal,
-            0x3b943b0faf25cb22l },
-          { 0x400af8be026047e9l,0x3149b35f772b8ff9l,0x3ddb2c06f17229d9l,
-            0xcd604aeadac152fcl },
-          0 },
-        /* 23 << 136 */
-        { { 0xea2275311c0f6803l,0x9ae82d5ea394cc08l,0xc107a2cfbe32080cl,
-            0x550f35a76429f6d7l },
-          { 0x483c94dacfb70c0cl,0xf26f8e5d90190c94l,0x8574b3cf86bf2620l,
-            0xe7258e45df9f482fl },
-          0 },
-        /* 25 << 136 */
-        { { 0x8f8dc582da46f1cfl,0x61d76cf91e1e7427l,0x8aceb48b306c84aal,
-            0xecaa142f28ebff98l },
-          { 0xac5bd940401d80fel,0x0caacb8fe800cf9el,0x99068da9b3359af5l,
-            0x92fdd5795225b8c0l },
-          0 },
-        /* 27 << 136 */
-        { { 0x5a29d1c5ab56a3fbl,0x4e46ffc0a9aab4afl,0xa210472624d83080l,
-            0xb5820998007f08b6l },
-          { 0x9ce1188e4bc07b3el,0xbf6d0dbe32a19898l,0x5d5c68ea5b2350bal,
-            0xd6c794eb3aa20b45l },
-          0 },
-        /* 28 << 136 */
-        { { 0x3de605ba9ec598cfl,0x1933d3ae4d3029ael,0x6bf2fabd9b140516l,
-            0x712dfc5559a7d01cl },
-          { 0xff3eaae0d2576366l,0x36e407f948701cf8l,0xede21d89b41f4bd4l,
-            0xc5292f5c666eefa9l },
-          0 },
-        /* 29 << 136 */
-        { { 0x30045782c3ebcd77l,0xaa0cf3c73fdbe72el,0x719ec58ef8f43b39l,
-            0x9716fb9972574d3al },
-          { 0x300afc2b0d03ccd6l,0xb60016a34f3fac41l,0x8898910ea3a439f6l,
-            0xdc00a99707ca11f5l },
-          0 },
-        /* 31 << 136 */
-        { { 0x291b15ee8ed34662l,0xb780d54b2ee422a7l,0x5b9e3788fcfe4ccbl,
-            0x4554cb8cbe8b7c3al },
-          { 0xfdaccc2209a85a7fl,0x51f4a8ec555497edl,0x07dc69037da33505l,
-            0xa3bc8bfcbc1fc1dbl },
-          0 },
-        /* 33 << 136 */
-        { { 0x661638c151e25257l,0x0a6fd99c53304974l,0x29d8ae165078eec6l,
-            0xed7512ad447b73del },
-          { 0x0e21de607a4d0e9bl,0x842abd422462be01l,0x3be82afa5cddc709l,
-            0x25bb9da99b52797dl },
-          0 },
-        /* 34 << 136 */
-        { { 0x80613af28adc986al,0x4602284935776a41l,0x17d33e0f4665d03cl,
-            0xeb12eb6c0df12b50l },
-          { 0x0f0effa0ee41527fl,0x8ca2edb680531563l,0x4c354679f28c52c3l,
-            0x67f1ba5c2f6df66dl },
-          0 },
-        /* 35 << 136 */
-        { { 0x9c27207a2479fb3fl,0xef6e0f13515fb902l,0x3f7ad9e9d0d9436el,
-            0x36eb4ea5893bbcf5l },
-          { 0x5c53a2ac02b316b7l,0x10c75ee1f54f7585l,0x29e5879c3c7a4c1bl,
-            0x77da3c82f29c67d6l },
-          0 },
-        /* 36 << 136 */
-        { { 0xf2b75d21ef78a852l,0xba38cd34dd31a900l,0x72b3a68658ffe18al,
-            0x7464190cbfd95745l },
-          { 0x406e532177ed6e81l,0x1af0975bde535eabl,0x66ba22c760c54c82l,
-            0x88e3b1ceb00a2fe0l },
-          0 },
-        /* 37 << 136 */
-        { { 0xb6099b7df7e5c69bl,0x84aa1e26ba34ee2fl,0x5952600405c338bbl,
-            0xe9a134374951a539l },
-          { 0xb12276526ec196bdl,0x26a7be264b6dce36l,0x052e10a4e2a68458l,
-            0x475fc74c1f38898bl },
-          0 },
-        /* 39 << 136 */
-        { { 0x120167fc0a3eb4e1l,0xaa94bc70c0c21204l,0x313cd835e1243b75l,
-            0x3bb63fb20bfd6a4al },
-          { 0xa615dcae21ef05cfl,0x63774c2ec23c3ee5l,0x39365b1fed0dfd65l,
-            0xb610e6ff5d2a2d7dl },
-          0 },
-        /* 40 << 136 */
-        { { 0x55b7f977f0337b15l,0x3bc872a30e94973al,0x624ad983770deea0l,
-            0xcaab336413a5efdbl },
-          { 0x391dd0027a0d4247l,0x39590d5df312aed5l,0x532802c9351365acl,
-            0xdd2e824578a2e22al },
-          0 },
-        /* 41 << 136 */
-        { { 0x81b0d7be7f774fb8l,0x62f32bb3aa412425l,0xbe7afe26bbcd2162l,
-            0xa6ce167c53c7fa7dl },
-          { 0x8deca64fc5c4fc5bl,0x70e546aba6efd2fel,0xf2d8495987ff672al,
-            0x2ca551f249c3059el },
-          0 },
-        /* 43 << 136 */
-        { { 0x40b62d528eb99155l,0xe6b048947420a7e0l,0x9ebecb2bc685e58al,
-            0x3ea642d8d3c8d2cbl },
-          { 0x5340ac6ed489d0dfl,0xf3846d08c2b7588el,0x4cecd8a0611c289bl,
-            0xdddc39c50dd71421l },
-          0 },
-        /* 44 << 136 */
-        { { 0x98c6a6a52ebee687l,0xcdf65bfa56c1c731l,0x48e8132772def210l,
-            0x4ea119418083b5a5l },
-          { 0x3fdcea4fffebb525l,0x55aaea19fb50bf72l,0x5fbedc0a2a85b40cl,
-            0x0d6fd954bf44f29fl },
-          0 },
-        /* 45 << 136 */
-        { { 0x83a8302a9db4071el,0x52f104436f8ae934l,0x96de829d175b800al,
-            0x20ff5035373e97cel },
-          { 0xf58660185f65356al,0x992c15054c8cd782l,0x0b962c8eb57d727fl,
-            0xe8a9abc92bba8bc7l },
-          0 },
-        /* 46 << 136 */
-        { { 0x81a85ddd7cf2b565l,0x5e51e6afc34a0305l,0xa8d94ccefbc89faal,
-            0x2bfd97c1e68cd288l },
-          { 0x16d79c21af2958b8l,0x5e5d989defda7df8l,0x6d2f0ca6ff734c8al,
-            0xfa5b8dd32cc9bafel },
-          0 },
-        /* 47 << 136 */
-        { { 0x5787a9934e6ed688l,0x6815f3b5aab42f46l,0x7960f45b093c6c66l,
-            0xb2b9829728be10cfl },
-          { 0x1d4c7790296568cdl,0xa279a877f048e194l,0xcf7c20f4c6a58b4el,
-            0xf0c717afa1f9c00fl },
-          0 },
-        /* 48 << 136 */
-        { { 0x8a10b53189e800cal,0x50fe0c17145208fdl,0x9e43c0d3b714ba37l,
-            0x427d200e34189accl },
-          { 0x05dee24fe616e2c0l,0x9c25f4c8ee1854c1l,0x4d3222a58f342a73l,
-            0x0807804fa027c952l },
-          0 },
-        /* 49 << 136 */
-        { { 0x79730084ba196afcl,0x17d38e98054bd539l,0xc5cfff3918583239l,
-            0x4b0db5a2d9adbee6l },
-          { 0x9bc9f1e3c2a304e8l,0xbaa61de7de406fa8l,0x8e921ca9e4bec498l,
-            0xd9f4e5ae6604ab02l },
-          0 },
-        /* 51 << 136 */
-        { { 0xdf6b97b5b37f2097l,0x7576c3f9b4a5d2b9l,0x6eb697ed3588cabbl,
-            0x4d75b38622598d8fl },
-          { 0x4e6d93b522ff55e8l,0x4620ec635b8f7edal,0xd5006209f97b7749l,
-            0x9e22e3a84da8b464l },
-          0 },
-        /* 52 << 136 */
-        { { 0xbabfb7f82e8f326fl,0xed9cac225625a519l,0xf1109c1a0edae0a9l,
-            0x45f80a9858521259l },
-          { 0x37a44b075ab71f44l,0x21699eb64a21161bl,0xb523fddf56fe67eel,
-            0x9f5c3a2120b9f72el },
-          0 },
-        /* 53 << 136 */
-        { { 0x12c1131508b75673l,0xfa20121823b096d6l,0x839f01aeeacd6537l,
-            0x0e592be787df32cal },
-          { 0xfe3f65ff8b7dd0fcl,0xed09b4875c1d9a80l,0x8c09dd97b79786d8l,
-            0x74eba2806c5bc983l },
-          0 },
-        /* 55 << 136 */
-        { { 0xf917704862987b50l,0xcc84cdc6bc4ac456l,0x8bd2c922ae08fe12l,
-            0x09d5f661fc2d06c7l },
-          { 0xd10ac6dd9457d47fl,0x65aa30a23668060cl,0x33cddac6745161fcl,
-            0xf4c18b5ea51e540fl },
-          0 },
-        /* 57 << 136 */
-        { { 0x591c064ede723c1fl,0x92e5d4e601a4adael,0x3d7ee8a3145716ecl,
-            0x0ef4c62061727816l },
-          { 0x0e17c576f1bf6d6el,0x173104015ae18045l,0xdad620aae9589b75l,
-            0xb10c7e2d0eda4905l },
-          0 },
-        /* 59 << 136 */
-        { { 0xb8020f16aa08df6fl,0x03cf58ffd67054e9l,0x302e003c11fe3d1al,
-            0x9c194bc1c638a3ecl },
-          { 0x8ed3cb3adefd3f1el,0xc4115e079bf39de4l,0x8dece48bdf46fdf6l,
-            0xebd1dbcf30eafeafl },
-          0 },
-        /* 60 << 136 */
-        { { 0x058eb276fba319c5l,0xd33a91127f7fa54al,0xf060c1b4932a2dabl,
-            0xce3a224e79c7d9bfl },
-          { 0x6fb0388c0ba92823l,0x8d31738a69787881l,0x2d86eb0203cd00b7l,
-            0x4e6e44512b69911bl },
-          0 },
-        /* 61 << 136 */
-        { { 0xff2efe1cfdcca1cfl,0x08f22c69b5bb71e3l,0xc63f4a9f7023076el,
-            0x88fb2aa0ce0c490el },
-          { 0xcc7c97f91f77783cl,0x360026d942ab36b7l,0x547c34ecefd68f70l,
-            0xebe7f99efbabfdabl },
-          0 },
-        /* 63 << 136 */
-        { { 0xe7c1c1788613e87al,0xb035d65e60b82654l,0x055a82d03583a254l,
-            0x27ce1ffc9b3b22fal },
-          { 0x0cf904917ec83cd5l,0xfc6c21805604aa40l,0x1330604099357428l,
-            0x9b0982f9ad4818b7l },
-          0 },
-        /* 64 << 136 */
-        { { 0xc222653a4f0d56f3l,0x961e4047ca28b805l,0x2c03f8b04a73434bl,
-            0x4c966787ab712a19l },
-          { 0xcc196c42864fee42l,0xc1be93da5b0ece5cl,0xa87d9f22c131c159l,
-            0x2bb6d593dce45655l },
-          0 },
-        /* 65 << 136 */
-        { { 0x3a6080d9fb56bc3al,0xf1552dcad6212d7el,0x977ac5b59420f4f6l,
-            0xef914d370e3cd97fl },
-          { 0x807bd6e69c04f768l,0x743a7b552bb803f6l,0x7f5c20804215f4b0l,
-            0x41e331288fc6ce42l },
-          0 },
-        /* 71 << 136 */
-        { { 0x5a31c9ac61e6a460l,0x55102e4093e7eeddl,0x969fe0612da6adcel,
-            0xe8cddc2f3ffea1d9l },
-          { 0xaa26c6b1f0f327c5l,0x9e5b63743544f5e1l,0x5159fa1ddbaa685bl,
-            0x9892d03aa7f44b99l },
-          0 },
-        /* 77 << 136 */
-        { { 0x4dfcbf12e2c6fc1fl,0x703f2f5b7535ac29l,0x78f8617e82f7dc0fl,
-            0x54b835ff853e792dl },
-          { 0x3cc7f000df9f7353l,0x0d7ffd68db5a157al,0x2c1c33691672b21cl,
-            0x694b4904ac970ef8l },
-          0 },
-        /* 83 << 136 */
-        { { 0xd655bc42c1d2c45cl,0x572f603cbd22b05fl,0xa7fbf09388e4531al,
-            0x8d38bbd91fdde98dl },
-          { 0x16cc2aaa73b0fa01l,0x515019a25e8ffb04l,0xb075990611e792ccl,
-            0x89df06f399112c90l },
-          0 },
-        /* 89 << 136 */
-        { { 0x26d435c2481b46dal,0x73ab7e96266e9b3al,0x22d5b1db3c613c40l,
-            0x9de4021c6727e399l },
-          { 0x451ebba56051f8c9l,0xa37f6ec52c281a58l,0x3d7a28fe0e9f4cc5l,
-            0x0f45bcd655b64df7l },
-          0 },
-        /* 95 << 136 */
-        { { 0xba2a718c66616fbel,0x4b27810b3369a9acl,0x50b8391a2b426d5fl,
-            0x420c88efa626fa05l },
-          { 0xe39cef97b9c39a30l,0xcae7cde85e67e5d0l,0x3821f8319a58e521l,
-            0xbf474d1941479509l },
-          0 },
-        /* 101 << 136 */
-        { { 0x401bbab58fb15118l,0xb0376892dbf38b39l,0x10e4b9dd3a3ca42al,
-            0xa69c2693f8063ffel },
-          { 0xe10facdde07cb761l,0x96f4dde831d7759al,0xd702fdecc2cc7f9fl,
-            0x9e87e46e1ac0162cl },
-          0 },
-        /* 107 << 136 */
-        { { 0xb6cd60518479ca8fl,0xcca345e60968f6c7l,0x7b57248a64a9afe7l,
-            0x5552e3511d0d4db9l },
-          { 0x8f749b199dc68aabl,0x0fb86f06db1f7819l,0x23b300963143ac09l,
-            0x61c166d8abfbcb9bl },
-          0 },
-        /* 113 << 136 */
-        { { 0x4c96e85a43101165l,0x393a882fcf39bd19l,0xef9e1d42c2df6f33l,
-            0xe1775c990278f088l },
-          { 0xb1581929a9250d4al,0x582b0608c4168873l,0x0b3ffba3a1e68cd8l,
-            0x3f78147ef9490897l },
-          0 },
-        /* 116 << 136 */
-        { { 0x277b5177eb18ff20l,0x48002e9828f06d62l,0xece8d6c30e506d8dl,
-            0x5cde0a58cd9ff963l },
-          { 0x3b97cdb74e3baa0el,0x50560c0b631238f9l,0xe1c31b35cf79793dl,
-            0x95d12f14355e2178l },
-          0 },
-        /* 119 << 136 */
-        { { 0x0143f695bcc31b77l,0x3627aed14c49b65al,0x6e4f7a9ce441c183l,
-            0xb708c79de1bfa0a3l },
-          { 0xdbf0fc313a0726b8l,0xe04d82a8852d78bbl,0xb859001e3be5d398l,
-            0x92dcc20c8e89bd11l },
-          0 },
-        /* 125 << 136 */
-        { { 0x5f2416a3df9026b4l,0xffc01f3afcb29a1bl,0x18d02c9f1d94b20fl,
-            0xd93b0f2f81cfdef3l },
-          { 0xe6b0fd4713adf5f2l,0xcc9067b7ba06dff3l,0xb48c0cbb2256f842l,
-            0xc2ae741dfd34df2fl },
-          0 },
-    },
-    {
-        /* 0 << 144 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 144 */
-        { { 0x80531fe1c63c4962l,0x50541e89981fdb25l,0xdc1291a1fd4c2b6bl,
-            0xc0693a17a6df4fcal },
-          { 0xb2c4604e0117f203l,0x245f19630a99b8d0l,0xaedc20aac6212c44l,
-            0xb1ed4e56520f52a8l },
-          0 },
-        /* 3 << 144 */
-        { { 0x18f37a9c6bdf22dal,0xefbc432f90dc82dfl,0xc52cef8e5d703651l,
-            0x82887ba0d99881a5l },
-          { 0x7cec9ddab920ec1dl,0xd0d7e8c3ec3e8d3bl,0x445bc3954ca88747l,
-            0xedeaa2e09fd53535l },
-          0 },
-        /* 4 << 144 */
-        { { 0xa12b384ece53c2d0l,0x779d897d5e4606dal,0xa53e47b073ec12b0l,
-            0x462dbbba5756f1adl },
-          { 0x69fe09f2cafe37b6l,0x273d1ebfecce2e17l,0x8ac1d5383cf607fdl,
-            0x8035f7ff12e10c25l },
-          0 },
-        /* 5 << 144 */
-        { { 0xb7d4cc0f296c9005l,0x4b9094fa7b0aebdbl,0xe1bf10f1c00ec8d4l,
-            0xd807b1c4d667c101l },
-          { 0xa9412cdfbe713383l,0x435e063e81142ba1l,0x984c15ecaf0a6bdcl,
-            0x592c246092a3dab9l },
-          0 },
-        /* 7 << 144 */
-        { { 0x9365690016e23e9dl,0xcb220c6ba7cc41e1l,0xb36b20c369d6245cl,
-            0x2d63c348b62e9a6al },
-          { 0xa3473e19cdc0bcb5l,0x70f18b3f8f601b98l,0x8ad7a2c7cde346e4l,
-            0xae9f6ec3bd3aaa64l },
-          0 },
-        /* 9 << 144 */
-        { { 0x030223503274c7e1l,0x61ee8c934c4b6c26l,0x3c4397e3199389cel,
-            0xe0082600488757cel },
-          { 0xaac3a2df06b4dafbl,0x45af0700ddff5b6al,0x0a5974248c1d9fa0l,
-            0x1640087d391fc68bl },
-          0 },
-        /* 10 << 144 */
-        { { 0x26a43e41d07fa53dl,0x3154a78a74e35bc5l,0x7b768924e0da2f8cl,
-            0xba964a2b23613f9al },
-          { 0x5a548d35ba1d16c4l,0x2e1bfed1fb54d057l,0xff992136bc640205l,
-            0xf39cb9148156df29l },
-          0 },
-        /* 11 << 144 */
-        { { 0xf4873fcf4e5548bdl,0x8725da3f03ce57f0l,0xd82f5c95ca953258l,
-            0xac647f127cf0747el },
-          { 0xff2038b02d570bd5l,0xb0c2a767a13ae03fl,0xebaa27cde9932d16l,
-            0xa686e3fc1234e901l },
-          0 },
-        /* 13 << 144 */
-        { { 0x9f80435e63261eccl,0x6302a62e4337d6c9l,0x91916a49ca4958a0l,
-            0x554958993149d5d3l },
-          { 0x378d020b9f91de3cl,0x47b839a34dd25170l,0x2825854138b7f258l,
-            0xea5b14f7437e7decl },
-          0 },
-        /* 15 << 144 */
-        { { 0x74f08736b0018f44l,0xf4a03417b446d0f5l,0x66a4aa2fa40ca6b2l,
-            0x215679f0badb60edl },
-          { 0x3871195a323e4eefl,0x8f0940c320952b16l,0xfe8dac62879d5f7dl,
-            0x649cb623c1a6e875l },
-          0 },
-        /* 16 << 144 */
-        { { 0xecaff541338d6e43l,0x56f7dd734541d5ccl,0xb5d426de96bc88cal,
-            0x48d94f6b9ed3a2c3l },
-          { 0x6354a3bb2ef8279cl,0xd575465b0b1867f2l,0xef99b0ff95225151l,
-            0xf3e19d88f94500d8l },
-          0 },
-        /* 17 << 144 */
-        { { 0xa26a9087133ec108l,0x5dc5699f2712bdc0l,0x96903f4dd14224a9l,
-            0x3da5992429e47b80l },
-          { 0xb717712ff9dbba5al,0x9e52004b756391c9l,0xe669a11dcc9d219cl,
-            0x3b6e6b84d1d6c07dl },
-          0 },
-        /* 19 << 144 */
-        { { 0x5feec06a676feadbl,0xfc449bc59d69f322l,0x1d8d7b5e7cda8895l,
-            0x5ed54dc11a3314a7l },
-          { 0x1a11d2ae6de889c0l,0xb2a979724ced2bd9l,0x6ecf6989306a5ef6l,
-            0x1611d57b8cc8a249l },
-          0 },
-        /* 21 << 144 */
-        { { 0x2d9942ba007cbf87l,0x4e62bce6df3fc926l,0xe7eee5b0e4560affl,
-            0xe51963bb7cb009b7l },
-          { 0xaa5118cee29b37ddl,0x5cd84a4747263903l,0x3050caa6620055d8l,
-            0x7ef576a76c4b1e3dl },
-          0 },
-        /* 23 << 144 */
-        { { 0x9026a4dde6008ff1l,0x49e995ad1c8cd96cl,0x80722e73503e589bl,
-            0x05bcbce184c2bc26l },
-          { 0x255f9abbd4682c2cl,0xc42bcfc2f084d456l,0xa0eae9b0641c0767l,
-            0x1b45632d864c9a2dl },
-          0 },
-        /* 25 << 144 */
-        { { 0xcf25793b6ae024e0l,0x1b6607b484b5c4b0l,0x9579fa903f1624c8l,
-            0x37fb65be68bd57e8l },
-          { 0xd693a55efc39c203l,0x4e267ac4c87252e9l,0xb8d78bb09f899413l,
-            0xe4c014070b3b8508l },
-          0 },
-        /* 27 << 144 */
-        { { 0x662906e5bc3f3553l,0xde38d53531459684l,0x8f46a8c634f7280dl,
-            0xaaf91b873d24198el },
-          { 0xecd5ee115f9b117el,0xce00ffbe50ae8ddal,0x263a3d4e7710a9ael,
-            0x0ff3f721f26ba74fl },
-          0 },
-        /* 28 << 144 */
-        { { 0x4a8a4f47f0cefa69l,0xdc8e4cbaa4546866l,0x359ba69b23f603c1l,
-            0xdab4d601187b7ac5l },
-          { 0xa6ca4337c1ebc8d9l,0x9fa6585452b4074bl,0x1a4b4f81902fb733l,
-            0xd2bb5d7aa525deaal },
-          0 },
-        /* 29 << 144 */
-        { { 0xcc287ac2e6b3577al,0xd7528ca7f612003bl,0x8afdb6f12c1400b8l,
-            0x103a2ed346a2dd8dl },
-          { 0xc8f8c54d2ee21339l,0x8f011b92355a2d20l,0x81c6fc9f1346f2acl,
-            0xdb6042f005a6d24bl },
-          0 },
-        /* 31 << 144 */
-        { { 0xfc90e3630da4f996l,0x8ceca49daa6d6fe4l,0x1084affdbdfc619bl,
-            0x2029f672c1140b04l },
-          { 0x606ec25f136f3e5el,0x6d24149b02224c4al,0xabb0f142cfdfcf4cl,
-            0xe40d0419fab1a0edl },
-          0 },
-        /* 33 << 144 */
-        { { 0xcfdd08265cbccb84l,0x2258a16e88ad93c4l,0xb3ac365e728c5ad3l,
-            0x0bbf97808560df1fl },
-          { 0x42d08a39bad8c7b8l,0x1e3960106d3e8b91l,0xc332b39910274f58l,
-            0xe0a84dacce2ea778l },
-          0 },
-        /* 34 << 144 */
-        { { 0x113e1189ff432945l,0x4a0d2c3d04e1106cl,0xcde487744f3597b1l,
-            0x853b029174fa26eal },
-          { 0x2149e0ff02662e26l,0xb3181eaa5e6a030fl,0x086fc2159b006340l,
-            0xa1df84a694a4e0bbl },
-          0 },
-        /* 35 << 144 */
-        { { 0xc2cbd80ac99f8d3dl,0xe24b9d8f50ecf4f4l,0xf18d34728ecb126al,
-            0x83966662e1670aael },
-          { 0x1cece80fda5f594el,0x545e94ae65f391e0l,0xf3286dff93f98bb7l,
-            0xf945e6cdf5abf176l },
-          0 },
-        /* 36 << 144 */
-        { { 0x00ba5995dd95ac33l,0xa4957a40738f3bf4l,0x073539f599438a85l,
-            0xcc9c43acc2eb1411l },
-          { 0xe27501b5be2ec3d2l,0xa88d4ed057a85458l,0x870ae236755c8777l,
-            0x0933c5af89216cbal },
-          0 },
-        /* 37 << 144 */
-        { { 0xb5feea219e40e37fl,0x8c5ccb159e20fd60l,0xaeddc502ce8209a1l,
-            0xbdf873cc11e793b3l },
-          { 0xbc938103f0de8db5l,0x619fb72fb0e9d3d5l,0x800147cb588ed2adl,
-            0x260f92bb7901ced8l },
-          0 },
-        /* 39 << 144 */
-        { { 0x72dd9b089848c699l,0xc6086381185dacc1l,0x9489f11ff7d5a4c8l,
-            0xedb41d5628dee90fl },
-          { 0x1091db6b09af693cl,0xc7587551ae4b6413l,0x806aefb0768227adl,
-            0x4214b83eafb3c88el },
-          0 },
-        /* 40 << 144 */
-        { { 0xddfb02c4c753c45fl,0x18ca81b6f9c840fel,0x846fd09ab0f8a3e6l,
-            0xb1162adde7733dbcl },
-          { 0x7070ad20236e3ab6l,0xf88cdaf5b2a56326l,0x05fc8719997cbc7al,
-            0x442cd4524b665272l },
-          0 },
-        /* 41 << 144 */
-        { { 0x748819f9aa9c0ef5l,0xd7227d8ba458ad48l,0x8d67399f27aef626l,
-            0xc6241a1859bf0a4cl },
-          { 0xed9b0bfcc31cb9bbl,0x591254f896142555l,0x80e4bab461134151l,
-            0x7c5e680243efbd83l },
-          0 },
-        /* 43 << 144 */
-        { { 0x7f3f5a1706b9b7ddl,0x392132e75faeb417l,0x508ac4788fae38a2l,
-            0x2b854ead0d3499c3l },
-          { 0x26a687d8ef18bf0fl,0x62ff0c4a8ae00b61l,0x84111011f48578f2l,
-            0xa879f383cd0fcd3al },
-          0 },
-        /* 44 << 144 */
-        { { 0xeb7615aa202992f0l,0xde0562b38361d0b3l,0x789a302862027ee0l,
-            0xe3e3e9921048f899l },
-          { 0x07945c246deadab4l,0xeb06a15ec77d894el,0xb825af36bab1416bl,
-            0x99083c4df4b4e04fl },
-          0 },
-        /* 45 << 144 */
-        { { 0x4684a8f27b3ad6c3l,0x58238dbd928d9b6bl,0x31865b998da2c495l,
-            0xc1ca784fb8e7cda1l },
-          { 0xc9605dc71e081572l,0x8f560bcdef8ed104l,0x51f73981bd3feaedl,
-            0xc778aa4e4251c88dl },
-          0 },
-        /* 46 << 144 */
-        { { 0x9c0daa63aa502800l,0x73c7959a1e15b9bdl,0xd0447bcb7ab10f6cl,
-            0x05b8fbc8b8311bdel },
-          { 0xa8a74be1915d5c4el,0x38d41c1e0b7c0351l,0x5bb2d49ff52d6568l,
-            0x6c48d8eed5e43593l },
-          0 },
-        /* 47 << 144 */
-        { { 0x387b26d554159498l,0x92e92fad1ec34eb4l,0x0f88705e7a51b635l,
-            0x66bcbf4dedca735fl },
-          { 0x0a4c6112dcb896ccl,0x148e1dfe6fc72ad9l,0x3de977fd2b4c9585l,
-            0x0cd6e65f741e62cal },
-          0 },
-        /* 48 << 144 */
-        { { 0x7807f364b71698f5l,0x6ba418d29f7b605el,0xfd20b00fa03b2cbbl,
-            0x883eca37da54386fl },
-          { 0xff0be43ff3437f24l,0xe910b432a48bb33cl,0x4963a128329df765l,
-            0xac1dd556be2fe6f7l },
-          0 },
-        /* 49 << 144 */
-        { { 0x98ae40d53ce533bal,0x10342e1931fdd9c2l,0x54a255c8abf8b2bfl,
-            0x8facc41b15f6fef7l },
-          { 0x2e195565bc65b38bl,0xb9f3abaaeaea63cbl,0xede2ab9bf2b7518bl,
-            0x5e84102ce9ea3d81l },
-          0 },
-        /* 51 << 144 */
-        { { 0x162abc35113bc262l,0x8012f06829eb3fd4l,0x0e2727eb2c1ccf9cl,
-            0x89561ff44b455b20l },
-          { 0xc48db835ee3b1fd4l,0x4075ca86095bbfa7l,0x0c498d7d98745182l,
-            0x828fb93c5dfb5205l },
-          0 },
-        /* 52 << 144 */
-        { { 0xf95c7a5f0a76333bl,0x07603929cd607927l,0xabde328591028d3el,
-            0x55765e8fa032a400l },
-          { 0x3041f2cabed17cd7l,0x018a5b7b9a9e5923l,0xca4867975bb9bae3l,
-            0x741c802ecc382cb5l },
-          0 },
-        /* 53 << 144 */
-        { { 0x182a10311e5a3d8el,0xc352b8c8986c4d10l,0x7c50a172434c02ebl,
-            0x121d728c4420c41cl },
-          { 0x0f8eca2a8a51812fl,0xdb6c4a4ea5158430l,0x67944e0b8d8f4144l,
-            0x387cc2052405c77al },
-          0 },
-        /* 55 << 144 */
-        { { 0x98b36eb47e95ad76l,0x1973fa7d5f7e5ff7l,0xc4827abc6cc8a25cl,
-            0x4263a0d3ec822ae4l },
-          { 0x49f113f35217a6f4l,0xf27cc9bb81748aa6l,0x9cb81d97d822e08el,
-            0x698d2826b5c360bcl },
-          0 },
-        /* 57 << 144 */
-        { { 0x895f81514eb6d0b8l,0x32ef71df9f786536l,0x032a449430379a79l,
-            0xa8c1076218bdb83fl },
-          { 0x7a3b0b8fe53a4064l,0x0e724a54e2ce89b7l,0x565baeba7a31f6bcl,
-            0x12b9fa6387d18a7bl },
-          0 },
-        /* 59 << 144 */
-        { { 0x027231a3585bcfbdl,0x8690e977dca24269l,0x229c021afc6f1422l,
-            0xd98050d044084cabl },
-          { 0x6add95d79d4fd09al,0x12484c68c15b24ddl,0xa79a8f4facf4f551l,
-            0xf53204e27a83cbecl },
-          0 },
-        /* 60 << 144 */
-        { { 0xbc006413a906f7aal,0x9c8cd648bbeaf464l,0xaf5c7c64fb78cdf2l,
-            0xe45839eafabc2375l },
-          { 0x1eb89bd150012172l,0x9d0d76194488518cl,0xd55a7238bd534d32l,
-            0x48f35d5e95b4fe55l },
-          0 },
-        /* 61 << 144 */
-        { { 0xa6c5574f3e70a35al,0x35c11b5a8df97d97l,0x8f629f6cda85dd27l,
-            0x94dab294c218452el },
-          { 0xa2e1882e8916c731l,0xc02ce77c8929e350l,0xa7ed351fe4eff8afl,
-            0xeb76ef0654c3e1c1l },
-          0 },
-        /* 63 << 144 */
-        { { 0xc31d7cf87e3f5be5l,0x1472af0d3ce7f3a0l,0x226414f8f962e1afl,
-            0xd318e3df16f54295l },
-          { 0x9a3f6aaf41477cd3l,0x7034172f66ec6b2el,0xbea54eb537413a62l,
-            0x79f81262dc515e73l },
-          0 },
-        /* 64 << 144 */
-        { { 0x994f523a626332d5l,0x7bc388335561bb44l,0x005ed4b03d845ea2l,
-            0xd39d3ee1c2a1f08al },
-          { 0x6561fdd3e7676b0dl,0x620e35fffb706017l,0x36ce424ff264f9a8l,
-            0xc4c3419fda2681f7l },
-          0 },
-        /* 65 << 144 */
-        { { 0xb71a52b8b6bf8719l,0x0c7701f73196db36l,0xff1b936f53141cf4l,
-            0x684d8a3c1b94a31cl },
-          { 0xe555633ab52386e1l,0x9353a2af91450578l,0xc53db6fab99b14bcl,
-            0x1f2d42adcf619d36l },
-          0 },
-        /* 71 << 144 */
-        { { 0xbeb535ef3851c573l,0x3105fff585589843l,0xbe9f62a1d47aaf06l,
-            0x6bb2ee5d107e1131l },
-          { 0x82530247a4a7699fl,0x3fb475e144872afbl,0x8ad43fd73c4c49f2l,
-            0x3f7632882e045fc4l },
-          0 },
-        /* 77 << 144 */
-        { { 0x48440beb2924d7b2l,0x234163809c88fc57l,0xdc1d23d54ab08c2bl,
-            0x576400b6e70feab0l },
-          { 0x3b8afb8ba66da779l,0x7a7e3bf445468f16l,0x1976ddf3231f79dfl,
-            0xbe61c170b8531a9el },
-          0 },
-        /* 83 << 144 */
-        { { 0xf8d2dc768bf191b2l,0x3269e68813a39eb9l,0x104bb84be755eccfl,
-            0xb8d1330f2868f807l },
-          { 0x2b29c74cb06c6059l,0x3648baa1a6440a26l,0x5dfae323f1e6b2c9l,
-            0x9d0319b79330ac0al },
-          0 },
-        /* 89 << 144 */
-        { { 0x526ba3770e708bb2l,0x95c21ba327565dd9l,0x7071f46d48a0a873l,
-            0xe4b9959efed6cc74l },
-          { 0x1b16bfd1e08a5afal,0xc87fec98d1789782l,0x200186e946cfd068l,
-            0x88ea35a7280bf3ebl },
-          0 },
-        /* 95 << 144 */
-        { { 0x9e31943d42ac0e6cl,0xe61374cf1db8e40fl,0xbe27ea35a27db609l,
-            0x7c5b91d67bf192e9l },
-          { 0xc2af846defd0a24bl,0x1b2efc37669b647al,0xbfc3c38e5e58ef8al,
-            0xb6afb167e13ab5a2l },
-          0 },
-        /* 101 << 144 */
-        { { 0x08612d29b9f2aad4l,0x43c41330ad09dd17l,0xa45cb84a9f740519l,
-            0x0a9ea9a7512ec031l },
-          { 0x6e90dccaee747f35l,0xe4388bd1f0a1479bl,0x966140c4e20a9029l,
-            0x1bb1f65d7dd956abl },
-          0 },
-        /* 107 << 144 */
-        { { 0x066d206ea8f12bb3l,0xc9023b1b4325ec13l,0x1f56c72c96ead8ddl,
-            0x454050fd8003e4c2l },
-          { 0x9ca258a58917aa9dl,0xfe24b282d94593cfl,0xea66c203752741cfl,
-            0x5714268c295a895el },
-          0 },
-        /* 113 << 144 */
-        { { 0x72a9fbecc177d694l,0x38bb9387d68454d3l,0xa3d347bf590bc7d2l,
-            0xcb6e292605ccc234l },
-          { 0x588abfcf0d393c01l,0xf053dadf539e5568l,0xad7480fef2a8b157l,
-            0xff28c8bb018cac8fl },
-          0 },
-        /* 116 << 144 */
-        { { 0x12f1a00e7f5b8821l,0x0afa44e489b4b0cel,0x2dcaad8f6006338el,
-            0x79c022cdba41242bl },
-          { 0x7f6ef7e17871d350l,0x946c2a91674253adl,0xf686d137a9cbbdd9l,
-            0xa47ce2eaf7d4f9f2l },
-          0 },
-        /* 119 << 144 */
-        { { 0x1824991b205d40d6l,0x49cca1c085046a90l,0x7e23c1acd005e3c2l,
-            0x093a9ae6d102c8ffl },
-          { 0xf4791082d2f40843l,0xe456021811645483l,0x8a59c3b0fd3a6b39l,
-            0x39130e7f820de158l },
-          0 },
-        /* 125 << 144 */
-        { { 0xf7eef88d83b90783l,0xff60762af336d581l,0xf64f2d5dd801f5a0l,
-            0x672b6ee7d6b3b8b9l },
-          { 0xa2a2dceb08034d69l,0x3eca27f635638218l,0xe7065986fa17fefdl,
-            0xf1b74445f5803af1l },
-          0 },
-    },
-    {
-        /* 0 << 152 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 152 */
-        { { 0x32670d2f7189e71fl,0xc64387485ecf91e7l,0x15758e57db757a21l,
-            0x427d09f8290a9ce5l },
-          { 0x846a308f38384a7al,0xaac3acb4b0732b99l,0x9e94100917845819l,
-            0x95cba111a7ce5e03l },
-          0 },
-        /* 3 << 152 */
-        { { 0x37a01e48a105fc8el,0x769d754a289ba48cl,0xc08c6fe1d51c2180l,
-            0xb032dd33b7bd1387l },
-          { 0x953826db020b0aa6l,0x05137e800664c73cl,0xc66302c4660cf95dl,
-            0x99004e11b2cef28al },
-          0 },
-        /* 4 << 152 */
-        { { 0x214bc9a7d298c241l,0xe3b697ba56807cfdl,0xef1c78024564eadbl,
-            0xdde8cdcfb48149c5l },
-          { 0x946bf0a75a4d2604l,0x27154d7f6c1538afl,0x95cc9230de5b1fccl,
-            0xd88519e966864f82l },
-          0 },
-        /* 5 << 152 */
-        { { 0x1013e4f796ea6ca1l,0x567cdc2a1f792871l,0xadb728705c658d45l,
-            0xf7c1ff4ace600e98l },
-          { 0xa1ba86574b6cad39l,0x3d58d634ba20b428l,0xc0011cdea2e6fdfbl,
-            0xa832367a7b18960dl },
-          0 },
-        /* 7 << 152 */
-        { { 0x1ecc032af416448dl,0x4a7e8c10ec76d971l,0x854f9805b90b6eael,
-            0xfd0b15324bed0594l },
-          { 0x89f71848d98b5ca3l,0xd01fe5fcf039b3efl,0x4481332e627bda2el,
-            0xe67cecd7a5073e41l },
-          0 },
-        /* 9 << 152 */
-        { { 0x2ab0bce94595a859l,0x4d8c2da082084ee7l,0x21ff8be5acca3d3cl,
-            0xd8b805337827f633l },
-          { 0xf74e8c026becabbfl,0x9fae4dbefede4828l,0xd3885a5b3cc46bcfl,
-            0x2d535e2b6e6ad144l },
-          0 },
-        /* 10 << 152 */
-        { { 0x63d3444507d9e240l,0x6fbadf4338cff7e6l,0x8717624a959c9461l,
-            0xd7d951c411fb775bl },
-          { 0x4049161af6fc3a2bl,0x0dfa2547a1a8e98dl,0xeca780d439c2139cl,
-            0xd8c2d8cbd73ea8efl },
-          0 },
-        /* 11 << 152 */
-        { { 0x3aa1974f07605b28l,0x4f3d82a71e296255l,0xbbe5ea03b4e23f16l,
-            0x8f5c6c6b4e654193l },
-          { 0x27181182d3e8ab01l,0xc68bb231f3ba6bc2l,0x90a244d820af1fd7l,
-            0x605abc055b713f4fl },
-          0 },
-        /* 13 << 152 */
-        { { 0xca5fe19bd221991al,0x271ff066f05f400el,0x9d46ec4c9cf09896l,
-            0xdcaa8dfdec4febc3l },
-          { 0xaa3995a0adf19d04l,0xc98634239da573a6l,0x378058b2f2465b2bl,
-            0x20d389f9b4c31612l },
-          0 },
-        /* 15 << 152 */
-        { { 0xd7d199c7b7631c9dl,0x1322c2b8bb123942l,0xe662b68fbe8b6848l,
-            0xc970faf2cde99b14l },
-          { 0x61b27134b06655e5l,0xadcef8f781365d89l,0x917b5ab521b851aal,
-            0x4f4472121cf694a7l },
-          0 },
-        /* 16 << 152 */
-        { { 0x488f1185ca8d9d1al,0xadf2c77dd987ded2l,0x5f3039f060c46124l,
-            0xe5d70b7571e095f4l },
-          { 0x82d586506260e70fl,0x39d75ea7f750d105l,0x8cf3d0b175bac364l,
-            0xf3a7564d21d01329l },
-          0 },
-        /* 17 << 152 */
-        { { 0x241e3907fe44e547l,0x42d464c36b992187l,0xeaa8fa989ba72f28l,
-            0x965a8b8f6afbb81fl },
-          { 0x69356a7a8b375ea5l,0x22501ec741bdcc83l,0xf80f4e1445fb180cl,
-            0xc0b12e95f5e1b822l },
-          0 },
-        /* 19 << 152 */
-        { { 0x977234e05483dc02l,0x0167430c13d8dcb2l,0xa9971278049912edl,
-            0xab044b18ca40fa39l },
-          { 0xac9587449ff3896cl,0x75bb32eb860d1240l,0xf807071f6b958654l,
-            0x67d2d3dc7121b4b6l },
-          0 },
-        /* 21 << 152 */
-        { { 0x3b61e67722f9f017l,0x9c593eb1a8541696l,0xbeba950050eda653l,
-            0x07b5a48f5e673f6al },
-          { 0x748dca0013257aa3l,0x6bbddf9a7372e942l,0xc012f4badde83977l,
-            0x6e59b327392ddb53l },
-          0 },
-        /* 23 << 152 */
-        { { 0xb2f3fff641356603l,0x50e63537545f042bl,0x55e5149770eb530dl,
-            0x5a7383c310860c3bl },
-          { 0x7be30382ea669a09l,0xfdf735d289cc1c7fl,0x6e51ed844e0607cfl,
-            0xdab566df4893795el },
-          0 },
-        /* 25 << 152 */
-        { { 0x20e3be0f8920690dl,0x98db80eaac279c05l,0x4cd5c60a44b8a4f8l,
-            0xeda7e91c7b0335f4l },
-          { 0x45c1302a41ee5713l,0x1f6455fe588508d0l,0x82cb7311163d2fc3l,
-            0xe866b90322f10b71l },
-          0 },
-        /* 27 << 152 */
-        { { 0xc217a2e259b4041el,0x85b96ce274526cbfl,0xcbfc4f5473f12687l,
-            0x097caa5fd40225e7l },
-          { 0x0871ad406e91293fl,0x5f2ea207033b98ecl,0x0b3b8fac1f27d37al,
-            0x7d72dd4c7f03876cl },
-          0 },
-        /* 28 << 152 */
-        { { 0xb51a40a51e6a75c1l,0x24327c760ea7d817l,0x0663018207774597l,
-            0xd6fdbec397fa7164l },
-          { 0x20c99dfb13c90f48l,0xd6ac5273686ef263l,0xc6a50bdcfef64eebl,
-            0xcd87b28186fdfc32l },
-          0 },
-        /* 29 << 152 */
-        { { 0x2f0c49ac95861439l,0xcdcb051b2e36e38al,0x459474080ae20c0cl,
-            0x374baad2dddf0aabl },
-          { 0x291abc85d5d104a4l,0x0758001958a0657cl,0xd0f428e1a905ea13l,
-            0x12599ddcf7241dbfl },
-          0 },
-        /* 31 << 152 */
-        { { 0x16222ce81bc3c403l,0xbacc1508fc13ca02l,0xfa98db4d920ee8e9l,
-            0xe5fc39c4df12a359l },
-          { 0x4e8c9b90188733e8l,0x04283dd81394936cl,0x93b3db51cd130432l,
-            0x33bfe3163c93ce31l },
-          0 },
-        /* 33 << 152 */
-        { { 0xb48591e9840b1724l,0x1009559f5885ec6fl,0x45ee51121b077620l,
-            0x848f9800f1f4cc8al },
-          { 0x6ec1e0f74e97bceal,0x953bc23a98e80642l,0x9f0d1e8194ce7181l,
-            0xeb3e6b9700eec596l },
-          0 },
-        /* 34 << 152 */
-        { { 0x6d34b39bff7514dal,0x29ffe49825be3634l,0x63e56598f28c8b82l,
-            0x78b99133aab41bcel },
-          { 0x11febd5a52563180l,0xa3be94c5c356a8c0l,0x5e9b422e0d61f864l,
-            0x2bf4ca1278fd259el },
-          0 },
-        /* 35 << 152 */
-        { { 0x8f60e40266914514l,0x6d9e280fef178167l,0x2ff7aec9e2949a48l,
-            0x422389ce72d37511l },
-          { 0xe9b156f3307ac1d2l,0x1cb581a78518e79fl,0x56d43f302185cf82l,
-            0x8d46c5aade59562cl },
-          0 },
-        /* 36 << 152 */
-        { { 0x50fc0711745edc11l,0x9dd9ad7d3dc87558l,0xce6931fbb49d1e64l,
-            0x6c77a0a2c98bd0f9l },
-          { 0x62b9a6296baf7cb1l,0xcf065f91ccf72d22l,0x7203cce979639071l,
-            0x09ae4885f9cb732fl },
-          0 },
-        /* 37 << 152 */
-        { { 0xd007d682e4b35428l,0x80c162315bcdc0d6l,0xe55a86bd36fce9b2l,
-            0x16772edb969a87cfl },
-          { 0xff323a2d3f370c94l,0x8d3c8028bf3c1afcl,0x4e1591e73b0c3fafl,
-            0xfbd6475cb981ce83l },
-          0 },
-        /* 39 << 152 */
-        { { 0xcf414ae3315b2471l,0xf54abf8033168de6l,0x6883efc5df5cdb24l,
-            0x3eca788c8efe81acl },
-          { 0xdb58c6c778eeccadl,0x3c77939082fecfb7l,0x5736cdd9c9b513f3l,
-            0xab7e6ea57b02aaf2l },
-          0 },
-        /* 40 << 152 */
-        { { 0x5e7c3becee8314f3l,0x1c068aeddbea298fl,0x08d381f17c80acecl,
-            0x03b56be8e330495bl },
-          { 0xaeffb8f29222882dl,0x95ff38f6c4af8bf7l,0x50e32d351fc57d8cl,
-            0x6635be5217b444f0l },
-          0 },
-        /* 41 << 152 */
-        { { 0x2cec7ba64805d895l,0x4c8399870ac78e7cl,0x031ad6c7f79416c5l,
-            0x1b2f2621f1838d2fl },
-          { 0x60835eac91447f90l,0x59147af1f9bab5d9l,0x7a3005d6f393f175l,
-            0x8cf3c468c4120ba2l },
-          0 },
-        /* 43 << 152 */
-        { { 0xeccffc7d8a2c1f08l,0x308916d37e384bd4l,0x6b8c2ff55e366384l,
-            0xf4b2850d03e4747cl },
-          { 0xe839c569e96c1488l,0xa46ff7f956c9cb10l,0xd968c74c362fd172l,
-            0x2aa7fe4cad6bb601l },
-          0 },
-        /* 44 << 152 */
-        { { 0x04d15276a5177900l,0x4e1dbb47f6858752l,0x5b475622c615796cl,
-            0xa6fa0387691867bfl },
-          { 0xed7f5d562844c6d0l,0xc633cf9b03a2477dl,0xf6be5c402d3721d6l,
-            0xaf312eb7e9fd68e6l },
-          0 },
-        /* 45 << 152 */
-        { { 0xf3b8164eec04c847l,0xa305ca93fe65816cl,0xa65f9963c7e2ce52l,
-            0xc448005198882cfcl },
-          { 0x46a998df05c165bbl,0xc38f4edf9dfe1e98l,0xb96ec43f8739f77al,
-            0x10a23af9313b40bfl },
-          0 },
-        /* 46 << 152 */
-        { { 0xe476c3e3ee668e0cl,0xcec6a984478197c2l,0xc9fa1d68897147c1l,
-            0x4e6aec0ea6465793l },
-          { 0xedca9db76b219c3bl,0xa2cd57942e508d3bl,0x38b384663936e02al,
-            0x0b8d3b4ca54ce90fl },
-          0 },
-        /* 47 << 152 */
-        { { 0x66e06537af08e0fcl,0x70fe0f2a907f1a93l,0x8c25245285ec1647l,
-            0x0b8b2964d5560eddl },
-          { 0xda45a326f3ef8e14l,0xf3adf9a6abc3494bl,0xbbdd93c11eda0d92l,
-            0x1b5e12c609912773l },
-          0 },
-        /* 48 << 152 */
-        { { 0x242792d2e7417ce1l,0xff42bc71970ee7f5l,0x1ff4dc6d5c67a41el,
-            0x77709b7b20882a58l },
-          { 0x3554731dbe217f2cl,0x2af2a8cd5bb72177l,0x58eee769591dd059l,
-            0xbb2930c94bba6477l },
-          0 },
-        /* 49 << 152 */
-        { { 0x5d9d507551d01848l,0x53dadb405b600d1el,0x7ba5b4dc5cb0a9a3l,
-            0xdb85b04c6795e547l },
-          { 0x480e7443f0354843l,0xc7efe6e813012322l,0x479b674a2aeee1e6l,
-            0xf5481f19704f4ea3l },
-          0 },
-        /* 51 << 152 */
-        { { 0x76a38d6978c7816el,0xe020c87df84ec554l,0x99af2f78f9818010l,
-            0x31cf103d988136eal },
-          { 0x6b095a114816a5aal,0x5a4cd2a4eff0a4afl,0x543041a5892e5e04l,
-            0x460f94c30aab9ee1l },
-          0 },
-        /* 52 << 152 */
-        { { 0x863ee0477d930cfcl,0x4c262ad1396fd1f4l,0xf4765bc8039af7e1l,
-            0x2519834b5ba104f6l },
-          { 0x7cd61b4cd105f961l,0xa5415da5d63bca54l,0x778280a088a1f17cl,
-            0xc49689492329512cl },
-          0 },
-        /* 53 << 152 */
-        { { 0x282d92b48cd3948al,0x95d219dfe168205bl,0xf6111a6f87bf3abcl,
-            0x910f8ce655fee9f2l },
-          { 0xb6c806f74f71ac89l,0xd0cc300fb7235f73l,0xfe37ccb47d0d45bbl,
-            0x5b2445f6952f0eaal },
-          0 },
-        /* 55 << 152 */
-        { { 0x03870be447141962l,0x8b79033f4a2b3f7fl,0xb6983b5ed2e5e274l,
-            0x2a2f8018501ed99cl },
-          { 0x07a92eb9feb49656l,0x063f0a9e482e2972l,0x413be27a57435832l,
-            0x56363c5f6f9d3de1l },
-          0 },
-        /* 57 << 152 */
-        { { 0xd247153163b50214l,0x32b435eeb2b897del,0xc49f0b01b05df4del,
-            0x97b6aa40b7df9b91l },
-          { 0x58ff34ec8ec39d78l,0xab0889005e0114a3l,0x6872b4de4822b7b8l,
-            0x7614c0d0ab239073l },
-          0 },
-        /* 59 << 152 */
-        { { 0x81891d378aa5d80al,0xf48ca24292e45f2cl,0xba711b6c0d04904cl,
-            0x5992cda349f16ed6l },
-          { 0x18b9a739790593eel,0x8b98e84dc4ba16d1l,0xac55701cb7b81615l,
-            0xadb4533b15822291l },
-          0 },
-        /* 60 << 152 */
-        { { 0x6210db7181236c97l,0x74f7685b3ee0781fl,0x4df7da7ba3e41372l,
-            0x2aae38b1b1a1553el },
-          { 0x1688e222f6dd9d1bl,0x576954485b8b6487l,0x478d21274b2edeaal,
-            0xb2818fa51e85956al },
-          0 },
-        /* 61 << 152 */
-        { { 0xc0677533f255ba8el,0x2bdae2a1efa2aabel,0xf7aebbd4b086c8a6l,
-            0x148455d992cb1147l },
-          { 0xa084e8d715402565l,0x33f111a8fa41bf23l,0x4bc990d627ac189bl,
-            0x48dbe6569d505f76l },
-          0 },
-        /* 63 << 152 */
-        { { 0x59df7fab596766f3l,0x4cadcbfe604f26e4l,0x0cf199338a6af592l,
-            0x3af1ace287b826c1l },
-          { 0xf09a5b38ee60684el,0xa04cbeda4ed7c711l,0xdb28c42eb1731040l,
-            0x75fcc0ec2e6e6523l },
-          0 },
-        /* 64 << 152 */
-        { { 0x1e6adddaf176f2c0l,0x01ca4604e2572658l,0x0a404ded85342ffbl,
-            0x8cf60f96441838d6l },
-          { 0x9bbc691cc9071c4al,0xfd58874434442803l,0x97101c85809c0d81l,
-            0xa7fb754c8c456f7fl },
-          0 },
-        /* 65 << 152 */
-        { { 0x4374020072196f30l,0x59ed0dc0dcd6c935l,0x17d4ed8e5034161bl,
-            0x8abe3e13009e7170l },
-          { 0xe51c41c96c791456l,0xc671807704d72bb6l,0xd4309cf56bba424al,
-            0x6122b951d0ca4ceal },
-          0 },
-        /* 71 << 152 */
-        { { 0xdfdb2e9c4278982bl,0xf3a282b32d6a2a61l,0x5611650cd2f2b03cl,
-            0xa62c177f43f7f83al },
-          { 0x372310ab4c593d32l,0x2bb6903a2b570f9cl,0x2930da3df43af904l,
-            0x2bbd04aa2c8a5a7dl },
-          0 },
-        /* 77 << 152 */
-        { { 0x10c324c007e536del,0xc456836d377be1b4l,0x9a627d75d785af3fl,
-            0xde74559118b58b31l },
-          { 0xeac83ea60c47239al,0x35da24abbc02f670l,0x2d4abde0c3af6e63l,
-            0xac53acba5a7ebf1bl },
-          0 },
-        /* 83 << 152 */
-        { { 0x2b03ec2efd9a9f3el,0xc967cd2b9d898a09l,0xb24bcba8039dc4f6l,
-            0x0ea1d297061ada1el },
-          { 0x3a7a25fbc134b8bcl,0x846282d6f61cd312l,0xfa1de0d2e0d778d9l,
-            0xf75fad4ef09be264l },
-          0 },
-        /* 89 << 152 */
-        { { 0x7d35695bcf74afb3l,0x34d43d9f15bb36fbl,0x15f0b43960b45fbel,
-            0xb15db8d84f38ec06l },
-          { 0x93ce7d50f7da1406l,0x2db97edd9f076aaal,0x27ebb9aa354429dcl,
-            0xf97eb5c446ace469l },
-          0 },
-        /* 95 << 152 */
-        { { 0x758fa2312dcf498fl,0xaa8c14d15cf3853al,0x416f5dab097d786al,
-            0xceec00ef38f242a0l },
-          { 0x2f8b10b9d8b75ef2l,0xee64912b2281be6al,0xa883481aa382a51el,
-            0x9442300f61b16b8al },
-          0 },
-        /* 101 << 152 */
-        { { 0x80e7fbc4f4b171e1l,0xdd2246f5661564a4l,0xcf08d73cd00d4e54l,
-            0xf725f5389fca9a30l },
-          { 0xd9607358af20debel,0xa97c81e16f7d1cf2l,0x72794ae70dedfb2al,
-            0xc328cb93159ff29dl },
-          0 },
-        /* 107 << 152 */
-        { { 0xaf9491d6252f6d59l,0x6744d7518feda60dl,0xa485f8aa34c5c048l,
-            0x2ed794b4b50ea53bl },
-          { 0x0da82650db26c289l,0xed3ab4c50904af55l,0x425eda1176544463l,
-            0x917be5f48939b29bl },
-          0 },
-        /* 113 << 152 */
-        { { 0xa2e72d0f8e208e5dl,0x5a5e4344234a5fedl,0x6dcc56535005bee8l,
-            0x09d0c254854e2e04l },
-          { 0xade4bcdba82f0789l,0x5a3e3cd4ec460a91l,0x6b1a867be76695b2l,
-            0xd1eb9df0a28b9331l },
-          0 },
-        /* 116 << 152 */
-        { { 0x3f5cf5f678e62ddcl,0x2267c45407fd752bl,0x5e361b6b5e437bbel,
-            0x95c595018354e075l },
-          { 0xec725f85f2b254d9l,0x844b617d2cb52b4el,0xed8554f5cf425fb5l,
-            0xab67703e2af9f312l },
-          0 },
-        /* 119 << 152 */
-        { { 0x8dcc920005fb96bbl,0x29d2442470f84705l,0x540bb6e63f09628fl,
-            0x07f8b4de2a9c2359l },
-          { 0xb8e002d1957e41dcl,0x9a0fe82b9e683a3fl,0x996b1a5250e633fdl,
-            0x748a11e500c669cal },
-          0 },
-        /* 125 << 152 */
-        { { 0x0593a788581dfd6el,0x99f1164f64e1b329l,0x1142c44b1defddbbl,
-            0xbc95c9c7660b9036l },
-          { 0xf24b5a47079179ccl,0x6175b52c21f7033bl,0x8b5d84183bc2eec0l,
-            0xc1332c8272d12670l },
-          0 },
-    },
-    {
-        /* 0 << 160 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 160 */
-        { { 0xd433e50f6d3549cfl,0x6f33696ffacd665el,0x695bfdacce11fcb4l,
-            0x810ee252af7c9860l },
-          { 0x65450fe17159bb2cl,0xf7dfbebe758b357bl,0x2b057e74d69fea72l,
-            0xd485717a92731745l },
-          0 },
-        /* 3 << 160 */
-        { { 0x6c8d0aa9b898fd52l,0x2fb38a57be9af1a7l,0xe1f2b9a93b4f03f8l,
-            0x2b1aad44c3f0cc6fl },
-          { 0x58b5332e7cf2c084l,0x1c57d96f0367d26dl,0x2297eabdfa6e4a8dl,
-            0x65a947ee4a0e2b6al },
-          0 },
-        /* 4 << 160 */
-        { { 0xaaafafb0285b9491l,0x01a0be881e4c705el,0xff1d4f5d2ad9caabl,
-            0x6e349a4ac37a233fl },
-          { 0xcf1c12464a1c6a16l,0xd99e6b6629383260l,0xea3d43665f6d5471l,
-            0x36974d04ff8cc89bl },
-          0 },
-        /* 5 << 160 */
-        { { 0xf535b616fdd5b854l,0x592549c85728719fl,0xe231468606921cadl,
-            0x98c8ce34311b1ef8l },
-          { 0x28b937e7e9090b36l,0x67fc3ab90bf7bbb7l,0x12337097a9d87974l,
-            0x3e5adca1f970e3fel },
-          0 },
-        /* 7 << 160 */
-        { { 0xcdcc68a7b3f85ff0l,0xacd21cdd1a888044l,0xb6719b2e05dbe894l,
-            0xfae1d3d88b8260d4l },
-          { 0xedfedece8a1c5d92l,0xbca01a94dc52077el,0xc085549c16dd13edl,
-            0xdc5c3bae495ebaadl },
-          0 },
-        /* 9 << 160 */
-        { { 0xcc17063fbe7b643al,0x7872e1c846085760l,0x86b0fffbb4214c9el,
-            0xb18bbc0e72bf3638l },
-          { 0x8b17de0c722591c9l,0x1edeab1948c29e0cl,0x9fbfd98ef4304f20l,
-            0x2d1dbb6b9c77ffb6l },
-          0 },
-        /* 10 << 160 */
-        { { 0xf53f2c658ead09f7l,0x1335e1d59780d14dl,0x69cc20e0cd1b66bcl,
-            0x9b670a37bbe0bfc8l },
-          { 0xce53dc8128efbeedl,0x0c74e77c8326a6e5l,0x3604e0d2b88e9a63l,
-            0xbab38fca13dc2248l },
-          0 },
-        /* 11 << 160 */
-        { { 0x255616d3c7141771l,0xa86691ab2f226b66l,0xda19fea4b3ca63a9l,
-            0xfc05dc42ae672f2bl },
-          { 0xa9c6e786718ba28fl,0x07b7995b9c66b984l,0x0f434f551b3702f2l,
-            0xd6f6212fda84eeffl },
-          0 },
-        /* 13 << 160 */
-        { { 0x4b0e7987b5b41d78l,0xea7df9074bf0c4f8l,0xb4d03560fab80ecdl,
-            0x6cf306f6fb1db7e5l },
-          { 0x0d59fb5689fd4773l,0xab254f4000f9be33l,0x18a09a9277352da4l,
-            0xf81862f5641ea3efl },
-          0 },
-        /* 15 << 160 */
-        { { 0xb59b01579f759d01l,0xa2923d2f7eae4fdel,0x18327757690ba8c0l,
-            0x4bf7e38b44f51443l },
-          { 0xb6812563b413fc26l,0xedb7d36379e53b36l,0x4fa585c4c389f66dl,
-            0x8e1adc3154bd3416l },
-          0 },
-        /* 16 << 160 */
-        { { 0xd3b3a13f1402b9d0l,0x573441c32c7bc863l,0x4b301ec4578c3e6el,
-            0xc26fc9c40adaf57el },
-          { 0x96e71bfd7493cea3l,0xd05d4b3f1af81456l,0xdaca2a8a6a8c608fl,
-            0x53ef07f60725b276l },
-          0 },
-        /* 17 << 160 */
-        { { 0x971e9eedd5098497l,0x97692be63077d8a7l,0xb57e02ad79625a8al,
-            0x5e3d20f6a688ecd5l },
-          { 0xa4431a28188f964dl,0xd4eb23bd5a11c1dbl,0xfcda853eadc7446fl,
-            0x9e2e98b593c94046l },
-          0 },
-        /* 19 << 160 */
-        { { 0x4a649b66eddaa4f1l,0x35a04f185e690c50l,0x1639bdcff908bc53l,
-            0xce6d525c121726e8l },
-          { 0x70f34948902b402cl,0x3a40c6950e290579l,0x7b0ed90f469a0085l,
-            0xecb979c60189c501l },
-          0 },
-        /* 21 << 160 */
-        { { 0x847e2bde5cee8d07l,0x1bed198cd3340037l,0x439ffb3ce41586e3l,
-            0x594980f1856f15b0l },
-          { 0x22c3b86c6e9307c6l,0xf8b3ee08876382dbl,0x850c628e628f3f30l,
-            0x22ec0acb51ee3659l },
-          0 },
-        /* 23 << 160 */
-        { { 0xa4052591efcef5a0l,0x82692a47106d55afl,0xdac3ea88e6ead453l,
-            0xaa1368fcf3dfd875l },
-          { 0x87bc688aa0c539eal,0x905e206040b1de3el,0x072240b8f1d52452l,
-            0x3ebf0644d57b6580l },
-          0 },
-        /* 25 << 160 */
-        { { 0x12109bcc07a0b2f8l,0x336f87d2ca23f14cl,0xb39ae282452a2ea2l,
-            0x8e085f5bab59a500l },
-          { 0xf7daeb69b63f015cl,0x44c555bcacb47b38l,0x96190454b623910al,
-            0x4b666e2255b41b70l },
-          0 },
-        /* 27 << 160 */
-        { { 0xf146914eb53419fdl,0xd2109b07493e88bfl,0x30bf9cbccc54bcd5l,
-            0xcf9ea59750e34a1fl },
-          { 0x70ade8a59588591dl,0xf668be676b41c269l,0x3497c58f78df2e6bl,
-            0x0fad05cc71042b56l },
-          0 },
-        /* 28 << 160 */
-        { { 0x27f536e049ce89e7l,0x18908539cc890cb5l,0x308909abd83c2aa1l,
-            0xecd3142b1ab73bd3l },
-          { 0x6a85bf59b3f5ab84l,0x3c320a68f2bea4c6l,0xad8dc5386da4541fl,
-            0xeaf34eb0b7c41186l },
-          0 },
-        /* 29 << 160 */
-        { { 0x709da836093aa5f6l,0x567a9becb4644edel,0xae02a46044466b0cl,
-            0xc80b237a407f1b3bl },
-          { 0x451df45ab4168a98l,0xdc9b40ef24a3f7c9l,0x23593ef32671341dl,
-            0x40f4533190b90faal },
-          0 },
-        /* 31 << 160 */
-        { { 0x7f97768e922f36e3l,0x936943f8491034a2l,0x72f6c17f21483753l,
-            0x5489fa0cb2918619l },
-          { 0x55b31aa59cc21a46l,0xde4cc71a8e54ab14l,0x942cb8be9eaff8b0l,
-            0xe38f6116d1755231l },
-          0 },
-        /* 33 << 160 */
-        { { 0xf0c0606a395b39abl,0x0efcbc699b5166a5l,0x85995e6895453d85l,
-            0xadc9a2920806ee5cl },
-          { 0xc3662e804928fe09l,0x2a2ddcc6969c87e7l,0xa02d7947111d319dl,
-            0xde23bcf12d20f66dl },
-          0 },
-        /* 34 << 160 */
-        { { 0xc47cb3395f6d4a09l,0x6b4f355cee52b826l,0x3d100f5df51b930al,
-            0xf4512fac9f668f69l },
-          { 0x546781d5206c4c74l,0xd021d4d4cb4d2e48l,0x494a54c2ca085c2dl,
-            0xf1dbaca4520850a8l },
-          0 },
-        /* 35 << 160 */
-        { { 0xb2d15b14a911cc2bl,0xab2dfaf7643e28eal,0xfccc9ed1f52c4c2dl,
-            0xfb4b1d4a09d8faa3l },
-          { 0x6fd72a9b7f5ce767l,0x0233c856a287e2b5l,0xd42135e05775ebb9l,
-            0xb3c9dada7376568bl },
-          0 },
-        /* 36 << 160 */
-        { { 0x63c79326490a1acal,0xcb64dd9c41526b02l,0xbb772591a2979258l,
-            0x3f58297048d97846l },
-          { 0xd66b70d17c213ba7l,0xc28febb5e8a0ced4l,0x6b911831c10338c1l,
-            0x0d54e389bf0126f3l },
-          0 },
-        /* 37 << 160 */
-        { { 0x5952996b5306af1bl,0x99f444f4354b67bel,0x6f670181633a2928l,
-            0x289023f0e9bdc4a6l },
-          { 0xcbed12148f7455a2l,0x501ace2f659a4858l,0x83ee678d5f8e1784l,
-            0x95c984587335c5bdl },
-          0 },
-        /* 39 << 160 */
-        { { 0x2e25a1f3e0233000l,0xed0028cd44fe8ba9l,0x447501a6021d43b3l,
-            0x4ec203906b4dffccl },
-          { 0x50642f9ad0169740l,0x9360003373cc58adl,0x825f1a82fe9cf9acl,
-            0x456194c653242bd6l },
-          0 },
-        /* 40 << 160 */
-        { { 0x40242efeb483689bl,0x2575d3f6513ac262l,0xf30037c80ca6db72l,
-            0xc9fcce8298864be2l },
-          { 0x84a112ff0149362dl,0x95e575821c4ae971l,0x1fa4b1a8945cf86cl,
-            0x4525a7340b024a2fl },
-          0 },
-        /* 41 << 160 */
-        { { 0x83205e8f5db5e2b1l,0x94e7a2621e311c12l,0xe1cac7333e37068fl,
-            0xe3f43f6d39965acfl },
-          { 0xd28db9e854d905bal,0x686f372a101f2162l,0x409cfe5d3d1b46d4l,
-            0x17648f1cbd0bb63al },
-          0 },
-        /* 43 << 160 */
-        { { 0xef83315b821f4ee4l,0xb90766998ba78b4dl,0xee6a15880fce5260l,
-            0x828f4a72d754affbl },
-          { 0x4650ec7daaae54d2l,0x3174301f1057efe9l,0x174e0683eb7704cel,
-            0xb7e6aeb357eb0b14l },
-          0 },
-        /* 44 << 160 */
-        { { 0xcaead1c2c905d85fl,0xe9d7f7900733ae57l,0x24c9a65cf07cdd94l,
-            0x7389359ca4b55931l },
-          { 0xf58709b7367e45f7l,0x1f203067cb7e7adcl,0x82444bffc7b72818l,
-            0x07303b35baac8033l },
-          0 },
-        /* 45 << 160 */
-        { { 0xd59528fb38a0dc96l,0x8179dc9088d0e857l,0x55e9ba039ed4b1afl,
-            0x8a2c0dc787b74cacl },
-          { 0xe8ca91aeef1c0006l,0x67f59ab2de0e15d4l,0xba0cddf86e6634d2l,
-            0x352803657b7ba591l },
-          0 },
-        /* 46 << 160 */
-        { { 0x1e1ee4e4d13b7ea1l,0xe6489b24e0e74180l,0xa5f2c6107e70ef70l,
-            0xa1655412bdd10894l },
-          { 0x555ebefb7af4194el,0x533c1c3c8e89bd9cl,0x735b9b5789895856l,
-            0x15fb3cd2567f5c15l },
-          0 },
-        /* 47 << 160 */
-        { { 0xef07bfedfb0986c7l,0xde138afe47c1659al,0x8b79c159a555e907l,
-            0x21d572f1125518bbl },
-          { 0x2005999ad320410cl,0x4167dc469484414bl,0x0cd965c34c6aaefdl,
-            0x2a1abc9a0e1d5e9dl },
-          0 },
-        /* 48 << 160 */
-        { { 0x057fed45526f09fdl,0xe8a4f10c8128240al,0x9332efc4ff2bfd8dl,
-            0x214e77a0bd35aa31l },
-          { 0x32896d7314faa40el,0x767867ec01e5f186l,0xc9adf8f117a1813el,
-            0xcb6cda7854741795l },
-          0 },
-        /* 49 << 160 */
-        { { 0xadfaf39b888dedf1l,0x4f8b178aab1750b9l,0x26418617ffe6b0eal,
-            0x01d1be82af04a59fl },
-          { 0x41584147e652db64l,0xf7775ac5727f9ea7l,0x58052a20e72ad8bbl,
-            0x5badf0dc6021160el },
-          0 },
-        /* 51 << 160 */
-        { { 0x8490ea99183de59dl,0xc95f72146f5c6f8cl,0x89b55d15df00c334l,
-            0x84386ad8a0ec36f7l },
-          { 0x24dadaefe4dc1ed1l,0xc606ba4c1e717227l,0x7e4756c0bbfa62eal,
-            0x3916cf14afc29cf3l },
-          0 },
-        /* 52 << 160 */
-        { { 0xb7b4d00101dae185l,0x45434e0b9b7a94bcl,0xf54339affbd8cb0bl,
-            0xdcc4569ee98ef49el },
-          { 0x7789318a09a51299l,0x81b4d206b2b025d8l,0xf64aa418fae85792l,
-            0x3e50258facd7baf7l },
-          0 },
-        /* 53 << 160 */
-        { { 0x4152c508492d91f3l,0x59d6cf9c678f9db4l,0xb0a8c966404608d1l,
-            0xdced55d0e3fed558l },
-          { 0x0914a3cb33a76188l,0x79df212423d35d46l,0x2322507fca13b364l,
-            0x0aed41d60078ab93l },
-          0 },
-        /* 55 << 160 */
-        { { 0x7acdaa7f6b2ebfc2l,0xb5ab1a9a80d9f67fl,0x53ba8173ff8aa8b0l,
-            0x9cd85cf874ca56a6l },
-          { 0xabac57f49c4fad81l,0x2325bb8521078995l,0xbac5e3a1b928a054l,
-            0x7219047a2394cc2al },
-          0 },
-        /* 57 << 160 */
-        { { 0xa33410d2aa75fd37l,0x821093affc0f1192l,0xe45e85ed155e39a9l,
-            0xd0e87cd12de67188l },
-          { 0xdeca97d965d43d87l,0x8c73826f9d2c99ecl,0x1bfe111e33237ddbl,
-            0xda32e865587bfb28l },
-          0 },
-        /* 59 << 160 */
-        { { 0xde456d92c89e9e4el,0xe45688a98e47f3cdl,0x3deacfca3bacbde0l,
-            0xdf9b32efc9683a70l },
-          { 0x749bc007e1691106l,0x788a05342a5154d7l,0x1a06baecf7c7b70dl,
-            0xb5b608eeae6ffc4cl },
-          0 },
-        /* 60 << 160 */
-        { { 0x4cd296df5579bea4l,0x10e35ac85ceedaf1l,0x04c4c5fde3bcc5b1l,
-            0x95f9ee8a89412cf9l },
-          { 0x2c9459ee82b6eb0fl,0x2e84576595c2aaddl,0x774a84aed327fcfel,
-            0xd8c937220368d476l },
-          0 },
-        /* 61 << 160 */
-        { { 0x39ebf947ccd25abbl,0x74e7a868cb49ebael,0x576ea108332e6147l,
-            0xcf3ba166150c1e5dl },
-          { 0xb5411fc3515c0e93l,0x51b15761f15c8a34l,0x362a4a3a0d213f38l,
-            0xf6f63c2e24e93aeal },
-          0 },
-        /* 63 << 160 */
-        { { 0x0cb3a2dcb78528d5l,0xa1888c18d585bb41l,0x210cca40de402a6el,
-            0x10c6339d9ed7c381l },
-          { 0xcd3558d561fe2a0cl,0xc97db05dad5140b1l,0x3366b028b21f8d11l,
-            0x878b09033e38be13l },
-          0 },
-        /* 64 << 160 */
-        { { 0x211cde10296c36efl,0x7ee8967282c4da77l,0xb617d270a57836dal,
-            0xf0cd9c319cb7560bl },
-          { 0x01fdcbf7e455fe90l,0x3fb53cbb7e7334f3l,0x781e2ea44e7de4ecl,
-            0x8adab3ad0b384fd0l },
-          0 },
-        /* 65 << 160 */
-        { { 0x081e505aa353ba05l,0x244ab34a288b86b1l,0x1155f06214e3a829l,
-            0x383300daf2118a6bl },
-          { 0xe8fc17cef27032b9l,0xed7f05c9c7bd2389l,0x78f70d14202f8a88l,
-            0x8a8310c0647b3f20l },
-          0 },
-        /* 71 << 160 */
-        { { 0xc80786e1a3633369l,0x496d55de9073f5b9l,0x10deeb6a89ae93cel,
-            0x6a2dd5c8b12e00c6l },
-          { 0xc25cd2f90c68e26dl,0x29d7ad8b53f0bb64l,0x2dd0d027d7fc9b00l,
-            0xad21e1f7ca9c4d5dl },
-          0 },
-        /* 77 << 160 */
-        { { 0xd45cb932d83465f3l,0x95830c0faf22fdbdl,0x41d830e007cd2a0al,
-            0x4a08500e3616e716l },
-          { 0x5931fc9f277755a5l,0x7d11680731006764l,0xa409a0ad1b3999aal,
-            0xec70368c9939d566l },
-          0 },
-        /* 83 << 160 */
-        { { 0x3905cb59f2030370l,0x7e9bdee56dcc8fd7l,0xb1b7b04e9806e06fl,
-            0xfbdadce22c73eb57l },
-          { 0xfb1ab2e98d5b2eb3l,0x58fbf2df7699338bl,0x81b1c54a63b5a032l,
-            0xefd1a1896a5d7ff4l },
-          0 },
-        /* 89 << 160 */
-        { { 0x0265189da1f769eal,0x22fa0bbbfdb5a502l,0xf69f0d1b21027534l,
-            0x64302b81f6066b99l },
-          { 0xdef85fc98a717e80l,0xe066166386879a3bl,0xe5489b347f95b22cl,
-            0x106dca9aa054a563l },
-          0 },
-        /* 95 << 160 */
-        { { 0xd624b4f4b4be9a77l,0x21a11ed77d50acb1l,0x707181f43d406e11l,
-            0x3f324d203ef158bcl },
-          { 0xb29a2a34aa8cc8del,0x482f4a15315db969l,0x42ce4fc7d9af272el,
-            0x784665b1f8f4cdc4l },
-          0 },
-        /* 101 << 160 */
-        { { 0x66ff7f73ab43a863l,0xa90be2cba77fd07el,0x84843997f76e5288l,
-            0x288c197f3cee129bl },
-          { 0x39acc080c0a060a6l,0x4c8e574bd24e27cal,0x1dd6170ffcd3d5e9l,
-            0x9736bb51f75e5150l },
-          0 },
-        /* 107 << 160 */
-        { { 0x2133810e6ba75716l,0x4debf728712886a8l,0x351e46a1f527d1f3l,
-            0x29709ae8e9591564l },
-          { 0x696163d3a3dc1780l,0xd5b7825ae02aadf3l,0x23579d7cd565ae68l,
-            0x105380124fa42cecl },
-          0 },
-        /* 113 << 160 */
-        { { 0x04eb554d13ffa704l,0x7441a62f2ed33d20l,0xaa926fa0b5b81324l,
-            0xb981bcb829836f61l },
-          { 0x313a78d4cc9a7a15l,0xff1242d11b3921d2l,0xc0053fd36a209d4dl,
-            0x95ac85caf7e92ca9l },
-          0 },
-        /* 116 << 160 */
-        { { 0x6d2a483d6f73c51el,0xa4cb2412ea0dc2ddl,0x50663c411eb917ffl,
-            0x3d3a74cfeade299el },
-          { 0x29b3990f4a7a9202l,0xa9bccf59a7b15c3dl,0x66a3ccdca5df9208l,
-            0x48027c1443f2f929l },
-          0 },
-        /* 119 << 160 */
-        { { 0xdf8a6f9673c3f6fbl,0xe4b1f0d98cc03220l,0x5ddacd618350480cl,
-            0x485c4fababdfb016l },
-          { 0xdc840628b4d424b7l,0x07d3a99c215b2359l,0xad3dc5af56dff52el,
-            0x5a3a6754973b6825l },
-          0 },
-        /* 125 << 160 */
-        { { 0xcfe231b83539a06dl,0xb36d1f72f46770ddl,0x126049747bb900d6l,
-            0x8d0990973fc31661l },
-          { 0x03b2749c920bc39el,0xf933d510b0486e23l,0x09cc958f0e9b0bb5l,
-            0x0b254dd1aa1e23abl },
-          0 },
-    },
-    {
-        /* 0 << 168 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 168 */
-        { { 0x263a2cfb9db3b381l,0x9c3a2deed4df0a4bl,0x728d06e97d04e61fl,
-            0x8b1adfbc42449325l },
-          { 0x6ec1d9397e053a1bl,0xee2be5c766daf707l,0x80ba1e14810ac7abl,
-            0xdd2ae778f530f174l },
-          0 },
-        /* 3 << 168 */
-        { { 0xadbaeb79b6828f36l,0x9d7a025801bd5b9el,0xeda01e0d1e844b0cl,
-            0x4b625175887edfc9l },
-          { 0x14109fdd9669b621l,0x88a2ca56f6f87b98l,0xfe2eb788170df6bcl,
-            0x0cea06f4ffa473f9l },
-          0 },
-        /* 4 << 168 */
-        { { 0x43ed81b5c4e83d33l,0xd9f358795efd488bl,0x164a620f9deb4d0fl,
-            0xc6927bdbac6a7394l },
-          { 0x45c28df79f9e0f03l,0x2868661efcd7e1a9l,0x7cf4e8d0ffa348f1l,
-            0x6bd4c284398538e0l },
-          0 },
-        /* 5 << 168 */
-        { { 0x2618a091289a8619l,0xef796e606671b173l,0x664e46e59090c632l,
-            0xa38062d41e66f8fbl },
-          { 0x6c744a200573274el,0xd07b67e4a9271394l,0x391223b26bdc0e20l,
-            0xbe2d93f1eb0a05a7l },
-          0 },
-        /* 7 << 168 */
-        { { 0x7efa14b84444896bl,0x64974d2ff94027fbl,0xefdcd0e8de84487dl,
-            0x8c45b2602b48989bl },
-          { 0xa8fcbbc2d8463487l,0xd1b2b3f73fbc476cl,0x21d005b7c8f443c0l,
-            0x518f2e6740c0139cl },
-          0 },
-        /* 9 << 168 */
-        { { 0xae51dca2a91f6791l,0x2abe41909baa9efcl,0xd9d2e2f4559c7ac1l,
-            0xe82f4b51fc9f773al },
-          { 0xa77130274073e81cl,0xc0276facfbb596fcl,0x1d819fc9a684f70cl,
-            0x29b47fddc9f7b1e0l },
-          0 },
-        /* 10 << 168 */
-        { { 0x358de103459b1940l,0xec881c595b013e93l,0x51574c9349532ad3l,
-            0x2db1d445b37b46del },
-          { 0xc6445b87df239fd8l,0xc718af75151d24eel,0xaea1c4a4f43c6259l,
-            0x40c0e5d770be02f7l },
-          0 },
-        /* 11 << 168 */
-        { { 0x6a4590f4721b33f2l,0x2124f1fbfedf04eal,0xf8e53cde9745efe7l,
-            0xe7e1043265f046d9l },
-          { 0xc3fca28ee4d0c7e6l,0x847e339a87253b1bl,0x9b5953483743e643l,
-            0xcb6a0a0b4fd12fc5l },
-          0 },
-        /* 13 << 168 */
-        { { 0xec1214eda714181dl,0x609ac13b6067b341l,0xff4b4c97a545df1fl,
-            0xa124050134d2076bl },
-          { 0x6efa0c231409ca97l,0x254cc1a820638c43l,0xd4e363afdcfb46cdl,
-            0x62c2adc303942a27l },
-          0 },
-        /* 15 << 168 */
-        { { 0x27b6a8ab3fd40e09l,0xe455842e77313ea9l,0x8b51d1e21f55988bl,
-            0x5716dd73062bbbfcl },
-          { 0x633c11e54e8bf3del,0x9a0e77b61b85be3bl,0x565107290911cca6l,
-            0x27e76495efa6590fl },
-          0 },
-        /* 16 << 168 */
-        { { 0xe4ac8b33070d3aabl,0x2643672b9a2cd5e5l,0x52eff79b1cfc9173l,
-            0x665ca49b90a7c13fl },
-          { 0x5a8dda59b3efb998l,0x8a5b922d052f1341l,0xae9ebbab3cf9a530l,
-            0x35986e7bf56da4d7l },
-          0 },
-        /* 17 << 168 */
-        { { 0x3a636b5cff3513ccl,0xbb0cf8ba3198f7ddl,0xb8d4052241f16f86l,
-            0x760575d8de13a7bfl },
-          { 0x36f74e169f7aa181l,0x163a3ecff509ed1cl,0x6aead61f3c40a491l,
-            0x158c95fcdfe8fcaal },
-          0 },
-        /* 19 << 168 */
-        { { 0x6b47accdd9eee96cl,0x0ca277fbe58cec37l,0x113fe413e702c42al,
-            0xdd1764eec47cbe51l },
-          { 0x041e7cde7b3ed739l,0x50cb74595ce9e1c0l,0x355685132925b212l,
-            0x7cff95c4001b081cl },
-          0 },
-        /* 21 << 168 */
-        { { 0x726f0973da50c991l,0x48afcd5b822d6ee2l,0xe5fc718b20fd7771l,
-            0xb9e8e77dfd0807a1l },
-          { 0x7f5e0f4499a7703dl,0x6972930e618e36f3l,0x2b7c77b823807bbel,
-            0xe5b82405cb27ff50l },
-          0 },
-        /* 23 << 168 */
-        { { 0x98cb1ae9255c0980l,0x4bd863812b4a739fl,0x5a5c31e11e4a45a1l,
-            0x1e5d55fe9cb0db2fl },
-          { 0x74661b068ff5cc29l,0x026b389f0eb8a4f4l,0x536b21a458848c24l,
-            0x2e5bf8ec81dc72b0l },
-          0 },
-        /* 25 << 168 */
-        { { 0x9f0af483d309cbe6l,0x5b020d8ae0bced4fl,0x606e986db38023e3l,
-            0xad8f2c9d1abc6933l },
-          { 0x19292e1de7400e93l,0xfe3e18a952be5e4dl,0xe8e9771d2e0680bfl,
-            0x8c5bec98c54db063l },
-          0 },
-        /* 27 << 168 */
-        { { 0x4c23f62a2c160dcdl,0x34e6c5e38f90eaefl,0x35865519a9a65d5al,
-            0x07c48aae8fd38a3dl },
-          { 0xb7e7aeda50068527l,0x2c09ef231c90936al,0x31ecfeb6e879324cl,
-            0xa0871f6bfb0ec938l },
-          0 },
-        /* 28 << 168 */
-        { { 0xb1f0fb68d84d835dl,0xc90caf39861dc1e6l,0x12e5b0467594f8d7l,
-            0x26897ae265012b92l },
-          { 0xbcf68a08a4d6755dl,0x403ee41c0991fbdal,0x733e343e3bbf17e8l,
-            0xd2c7980d679b3d65l },
-          0 },
-        /* 29 << 168 */
-        { { 0x33056232d2e11305l,0x966be492f3c07a6fl,0x6a8878ffbb15509dl,
-            0xff2211010a9b59a4l },
-          { 0x6c9f564aabe30129l,0xc6f2c940336e64cfl,0x0fe752628b0c8022l,
-            0xbe0267e96ae8db87l },
-          0 },
-        /* 31 << 168 */
-        { { 0x9d031369a5e829e5l,0xcbb4c6fc1607aa41l,0x75ac59a6241d84c1l,
-            0xc043f2bf8829e0eel },
-          { 0x82a38f758ea5e185l,0x8bda40b9d87cbd9fl,0x9e65e75e2d8fc601l,
-            0x3d515f74a35690b3l },
-          0 },
-        /* 33 << 168 */
-        { { 0xf6b5b2d0bc8fa5bcl,0x8a5ead67500c277bl,0x214625e6dfa08a5dl,
-            0x51fdfedc959cf047l },
-          { 0x6bc9430b289fca32l,0xe36ff0cf9d9bdc3fl,0x2fe187cb58ea0edel,
-            0xed66af205a900b3fl },
-          0 },
-        /* 34 << 168 */
-        { { 0x00e0968b5fa9f4d6l,0x2d4066ce37a362e7l,0xa99a9748bd07e772l,
-            0x710989c006a4f1d0l },
-          { 0xd5dedf35ce40cbd8l,0xab55c5f01743293dl,0x766f11448aa24e2cl,
-            0x94d874f8605fbcb4l },
-          0 },
-        /* 35 << 168 */
-        { { 0xa365f0e8a518001bl,0xee605eb69d04ef0fl,0x5a3915cdba8d4d25l,
-            0x44c0e1b8b5113472l },
-          { 0xcbb024e88b6740dcl,0x89087a53ee1d4f0cl,0xa88fa05c1fc4e372l,
-            0x8bf395cbaf8b3af2l },
-          0 },
-        /* 36 << 168 */
-        { { 0x1e71c9a1deb8568bl,0xa35daea080fb3d32l,0xe8b6f2662cf8fb81l,
-            0x6d51afe89490696al },
-          { 0x81beac6e51803a19l,0xe3d24b7f86219080l,0x727cfd9ddf6f463cl,
-            0x8c6865ca72284ee8l },
-          0 },
-        /* 37 << 168 */
-        { { 0x32c88b7db743f4efl,0x3793909be7d11dcel,0xd398f9222ff2ebe8l,
-            0x2c70ca44e5e49796l },
-          { 0xdf4d9929cb1131b1l,0x7826f29825888e79l,0x4d3a112cf1d8740al,
-            0x00384cb6270afa8bl },
-          0 },
-        /* 39 << 168 */
-        { { 0xbe7e990ff0d796a0l,0x5fc62478df0e8b02l,0x8aae8bf4030c00adl,
-            0x3d2db93b9004ba0fl },
-          { 0xe48c8a79d85d5ddcl,0xe907caa76bb07f34l,0x58db343aa39eaed5l,
-            0x0ea6e007adaf5724l },
-          0 },
-        /* 40 << 168 */
-        { { 0xe00df169d23233f3l,0x3e32279677cb637fl,0x1f897c0e1da0cf6cl,
-            0xa651f5d831d6bbddl },
-          { 0xdd61af191a230c76l,0xbd527272cdaa5e4al,0xca753636d0abcd7el,
-            0x78bdd37c370bd8dcl },
-          0 },
-        /* 41 << 168 */
-        { { 0xc23916c217cd93fel,0x65b97a4ddadce6e2l,0xe04ed4eb174e42f8l,
-            0x1491ccaabb21480al },
-          { 0x145a828023196332l,0x3c3862d7587b479al,0x9f4a88a301dcd0edl,
-            0x4da2b7ef3ea12f1fl },
-          0 },
-        /* 43 << 168 */
-        { { 0x71965cbfc3dd9b4dl,0xce23edbffc068a87l,0xb78d4725745b029bl,
-            0x74610713cefdd9bdl },
-          { 0x7116f75f1266bf52l,0x0204672218e49bb6l,0xdf43df9f3d6f19e3l,
-            0xef1bc7d0e685cb2fl },
-          0 },
-        /* 44 << 168 */
-        { { 0xcddb27c17078c432l,0xe1961b9cb77fedb7l,0x1edc2f5cc2290570l,
-            0x2c3fefca19cbd886l },
-          { 0xcf880a36c2af389al,0x96c610fdbda71ceal,0xf03977a932aa8463l,
-            0x8eb7763f8586d90al },
-          0 },
-        /* 45 << 168 */
-        { { 0x3f3424542a296e77l,0xc871868342837a35l,0x7dc710906a09c731l,
-            0x54778ffb51b816dbl },
-          { 0x6b33bfecaf06defdl,0xfe3c105f8592b70bl,0xf937fda461da6114l,
-            0x3c13e6514c266ad7l },
-          0 },
-        /* 46 << 168 */
-        { { 0xe363a829855938e8l,0x2eeb5d9e9de54b72l,0xbeb93b0e20ccfab9l,
-            0x3dffbb5f25e61a25l },
-          { 0x7f655e431acc093dl,0x0cb6cc3d3964ce61l,0x6ab283a1e5e9b460l,
-            0x55d787c5a1c7e72dl },
-          0 },
-        /* 47 << 168 */
-        { { 0x4d2efd47deadbf02l,0x11e80219ac459068l,0x810c762671f311f0l,
-            0xfa17ef8d4ab6ef53l },
-          { 0xaf47fd2593e43bffl,0x5cb5ff3f0be40632l,0x546871068ee61da3l,
-            0x7764196eb08afd0fl },
-          0 },
-        /* 48 << 168 */
-        { { 0x831ab3edf0290a8fl,0xcae81966cb47c387l,0xaad7dece184efb4fl,
-            0xdcfc53b34749110el },
-          { 0x6698f23c4cb632f9l,0xc42a1ad6b91f8067l,0xb116a81d6284180al,
-            0xebedf5f8e901326fl },
-          0 },
-        /* 49 << 168 */
-        { { 0xf2274c9f97e3e044l,0x4201852011d09fc9l,0x56a65f17d18e6e23l,
-            0x2ea61e2a352b683cl },
-          { 0x27d291bc575eaa94l,0x9e7bc721b8ff522dl,0x5f7268bfa7f04d6fl,
-            0x5868c73faba41748l },
-          0 },
-        /* 51 << 168 */
-        { { 0x1c52e63596e78cc4l,0x5385c8b20c06b4a8l,0xd84ddfdbb0e87d03l,
-            0xc49dfb66934bafadl },
-          { 0x7071e17059f70772l,0x3a073a843a1db56bl,0x034949033b8af190l,
-            0x7d882de3d32920f0l },
-          0 },
-        /* 52 << 168 */
-        { { 0x91633f0ab2cf8940l,0x72b0b1786f948f51l,0x2d28dc30782653c8l,
-            0x88829849db903a05l },
-          { 0xb8095d0c6a19d2bbl,0x4b9e7f0c86f782cbl,0x7af739882d907064l,
-            0xd12be0fe8b32643cl },
-          0 },
-        /* 53 << 168 */
-        { { 0x358ed23d0e165dc3l,0x3d47ce624e2378cel,0x7e2bb0b9feb8a087l,
-            0x3246e8aee29e10b9l },
-          { 0x459f4ec703ce2b4dl,0xe9b4ca1bbbc077cfl,0x2613b4f20e9940c1l,
-            0xfc598bb9047d1eb1l },
-          0 },
-        /* 55 << 168 */
-        { { 0x52fb0c9d7fc63668l,0x6886c9dd0c039cdel,0x602bd59955b22351l,
-            0xb00cab02360c7c13l },
-          { 0x8cb616bc81b69442l,0x41486700b55c3ceel,0x71093281f49ba278l,
-            0xad956d9c64a50710l },
-          0 },
-        /* 57 << 168 */
-        { { 0xbaca6591d4b66947l,0xb452ce9804460a8cl,0x6830d24643768f55l,
-            0xf4197ed87dff12dfl },
-          { 0x6521b472400dd0f7l,0x59f5ca8f4b1e7093l,0x6feff11b080338ael,
-            0x0ada31f6a29ca3c6l },
-          0 },
-        /* 59 << 168 */
-        { { 0x04e5dfe0d809c7bdl,0xd7b2580c8f1050abl,0x6d91ad78d8a4176fl,
-            0x0af556ee4e2e897cl },
-          { 0x162a8b73921de0acl,0x52ac9c227ea78400l,0xee2a4eeaefce2174l,
-            0xbe61844e6d637f79l },
-          0 },
-        /* 60 << 168 */
-        { { 0x0491f1bc789a283bl,0x72d3ac3d880836f4l,0xaa1c5ea388e5402dl,
-            0x1b192421d5cc473dl },
-          { 0x5c0b99989dc84cacl,0xb0a8482d9c6e75b8l,0x639961d03a191ce2l,
-            0xda3bc8656d837930l },
-          0 },
-        /* 61 << 168 */
-        { { 0xca990653056e6f8fl,0x84861c4164d133a7l,0x8b403276746abe40l,
-            0xb7b4d51aebf8e303l },
-          { 0x05b43211220a255dl,0xc997152c02419e6el,0x76ff47b6630c2feal,
-            0x50518677281fdadel },
-          0 },
-        /* 63 << 168 */
-        { { 0x6d2d99b7ea7b979bl,0xcd78cd74e6fb3bcdl,0x11e45a9e86cffbfel,
-            0x78a61cf4637024f6l },
-          { 0xd06bc8723d502295l,0xf1376854458cb288l,0xb9db26a1342f8586l,
-            0xf33effcf4beee09el },
-          0 },
-        /* 64 << 168 */
-        { { 0xd7e0c4cdb30cfb3al,0x6d09b8c16c9db4c8l,0x40ba1a4207c8d9dfl,
-            0x6fd495f71c52c66dl },
-          { 0xfb0e169f275264dal,0x80c2b746e57d8362l,0xedd987f749ad7222l,
-            0xfdc229af4398ec7bl },
-          0 },
-        /* 65 << 168 */
-        { { 0xfe81af4609418a51l,0xdbb60b836f18e3a5l,0x5e7a86ea4566ec9cl,
-            0xb76ff40f25093925l },
-          { 0x5fe6662c429c5554l,0xfc9ec35384e478cfl,0x73dbb5f3e8cfa761l,
-            0x031e506592f82709l },
-          0 },
-        /* 71 << 168 */
-        { { 0x108c736abd49f2e0l,0xe230f2417487dcc8l,0x073fc4f8f74d939cl,
-            0x98532487e9745bbel },
-          { 0x5208eb981714b10bl,0xec35d0510458725dl,0x35dbb60bf203f4b6l,
-            0x064299b27781ab38l },
-          0 },
-        /* 77 << 168 */
-        { { 0x43cc7bbc02d26929l,0xeb00a683162d9607l,0x2af152b8ed9fa224l,
-            0xf24e8bee12257f0cl },
-          { 0xdf065dd5d004b1cbl,0x6aa20bcf9f9908c6l,0x8e5e86b6941c593dl,
-            0x0e0034b398969717l },
-          0 },
-        /* 83 << 168 */
-        { { 0x5be62e155c43b8fcl,0xd9e0adfc3c445636l,0xc5141df0e0d78f48l,
-            0xd134bbed2c277716l },
-          { 0x79033a84598fe069l,0x6c704367b081614cl,0x55c45d66bf5bf772l,
-            0xf08744c57a444730l },
-          0 },
-        /* 89 << 168 */
-        { { 0x866752091422b528l,0xdb297411c3e028eel,0x1f5575b040e1c3ccl,
-            0x85367b84d333b04fl },
-          { 0x57864c86e9804aa9l,0xf13fa8e3439156dfl,0xa3b337e0464e0aecl,
-            0x0018dfd7f2ae382bl },
-          0 },
-        /* 95 << 168 */
-        { { 0xe93cece9cea132fcl,0x985542d8f74e867al,0x2a3d18a5cc8fcf87l,
-            0xa0561055479d0039l },
-          { 0x3513c7eaac4b3f9dl,0xc095967256477606l,0xa63960f330df8ad6l,
-            0x59ca8d53cc9ddcb3l },
-          0 },
-        /* 101 << 168 */
-        { { 0x6d8e942b2f208191l,0xd49a6d9453fe5457l,0x2b55e391003010bal,
-            0x3dd1fd9fdf4605ebl },
-          { 0xdc006a3358682886l,0x60a5e86c1bd9ac88l,0xc4bd320ed0cab8f2l,
-            0x7281e7cb7751855bl },
-          0 },
-        /* 107 << 168 */
-        { { 0x7d564222e1881e7al,0x59061a89db0673c2l,0x1f9d607213f27313l,
-            0x5b3b29368ff3aeb7l },
-          { 0x6cf2304ccf969f43l,0x8eff4a25e7f69ae5l,0xbaeb6411d17da4ffl,
-            0x666af0af9eea17ecl },
-          0 },
-        /* 113 << 168 */
-        { { 0x6c0b811697f4cd0bl,0xcd7825d40e4ea852l,0x80158fb0677fef3dl,
-            0x5bb1a3aaa10ee693l },
-          { 0xc5df66678066fc9bl,0x3200dc11f404d4a6l,0x58868950a8686d8el,
-            0xbdaaffb53770fabal },
-          0 },
-        /* 116 << 168 */
-        { { 0xba6a9f84660326f5l,0x61c1e44161bc3e88l,0xfbf992a0bde85cf8l,
-            0xe704dd1e6f8c8f5fl },
-          { 0x231caa0ab1d7d486l,0xd10616d8891cd571l,0x2ddada75c008833cl,
-            0x44337d6dad514c94l },
-          0 },
-        /* 119 << 168 */
-        { { 0xd48678b8f6933cf0l,0x7b4d623e0b739471l,0x4ad620287b216238l,
-            0xb4d4918959c4fabel },
-          { 0x8c2a1bdc296d42d5l,0x9235d0ec2fd3eb96l,0xfe271972f81c135bl,
-            0x82b5181741471e16l },
-          0 },
-        /* 125 << 168 */
-        { { 0xe9aa8ce4051f8e81l,0x14484af67cd1391fl,0x53a361dcafb1656el,
-            0x6ad8ba02f4d9d0cbl },
-          { 0xfb4385466c50a722l,0x2f1c5bbc7edb37f4l,0x8dc90ccb16e4b795l,
-            0xbcb32e1508127094l },
-          0 },
-    },
-    {
-        /* 0 << 176 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 176 */
-        { { 0xb81d783e979f3925l,0x1efd130aaf4c89a7l,0x525c2144fd1bf7fal,
-            0x4b2969041b265a9el },
-          { 0xed8e9634b9db65b6l,0x35c82e3203599d8al,0xdaa7a54f403563f3l,
-            0x9df088ad022c38abl },
-          0 },
-        /* 3 << 176 */
-        { { 0x9e93ba24f111661el,0xedced484b105eb04l,0x96dc9ba1f424b578l,
-            0xbf8f66b7e83e9069l },
-          { 0x872d4df4d7ed8216l,0xbf07f3778e2cbecfl,0x4281d89998e73754l,
-            0xfec85fbb8aab8708l },
-          0 },
-        /* 4 << 176 */
-        { { 0x13b5bf22765fa7d0l,0x59805bf01d6a5370l,0x67a5e29d4280db98l,
-            0x4f53916f776b1ce3l },
-          { 0x714ff61f33ddf626l,0x4206238ea085d103l,0x1c50d4b7e5809ee3l,
-            0x999f450d85f8eb1dl },
-          0 },
-        /* 5 << 176 */
-        { { 0x82eebe731a3a93bcl,0x42bbf465a21adc1al,0xc10b6fa4ef030efdl,
-            0x247aa4c787b097bbl },
-          { 0x8b8dc632f60c77dal,0x6ffbc26ac223523el,0xa4f6ff11344579cfl,
-            0x5825653c980250f6l },
-          0 },
-        /* 7 << 176 */
-        { { 0xeda6c595d314e7bcl,0x2ee7464b467899edl,0x1cef423c0a1ed5d3l,
-            0x217e76ea69cc7613l },
-          { 0x27ccce1fe7cda917l,0x12d8016b8a893f16l,0xbcd6de849fc74f6bl,
-            0xfa5817e2f3144e61l },
-          0 },
-        /* 9 << 176 */
-        { { 0xc0b48d4e49ccd6d7l,0xff8fb02c88bd5580l,0xc75235e907d473b2l,
-            0x4fab1ac5a2188af3l },
-          { 0x030fa3bc97576ec0l,0xe8c946e80b7e7d2fl,0x40a5c9cc70305600l,
-            0x6d8260a9c8b013b4l },
-          0 },
-        /* 10 << 176 */
-        { { 0xe6c51073615cd9e4l,0x498ec047f1243c06l,0x3e5a8809b17b3d8cl,
-            0x5cd99e610cc565f1l },
-          { 0x81e312df7851dafel,0xf156f5baa79061e2l,0x80d62b71880c590el,
-            0xbec9746f0a39faa1l },
-          0 },
-        /* 11 << 176 */
-        { { 0x2b09d2c3cfdcf7ddl,0x41a9fce3723fcab4l,0x73d905f707f57ca3l,
-            0x080f9fb1ac8e1555l },
-          { 0x7c088e849ba7a531l,0x07d35586ed9a147fl,0x602846abaf48c336l,
-            0x7320fd320ccf0e79l },
-          0 },
-        /* 13 << 176 */
-        { { 0x92eb40907f8f875dl,0x9c9d754e56c26bbfl,0x158cea618110bbe7l,
-            0x62a6b802745f91eal },
-          { 0xa79c41aac6e7394bl,0x445b6a83ad57ef10l,0x0c5277eb6ea6f40cl,
-            0x319fe96b88633365l },
-          0 },
-        /* 15 << 176 */
-        { { 0x77f84203d39b8c34l,0xed8b1be63125eddbl,0x5bbf2441f6e39dc5l,
-            0xb00f6ee66a5d678al },
-          { 0xba456ecf57d0ea99l,0xdcae0f5817e06c43l,0x01643de40f5b4baal,
-            0x2c324341d161b9bel },
-          0 },
-        /* 16 << 176 */
-        { { 0x949c9976e1337c26l,0x6faadebdd73d68e5l,0x9e158614f1b768d9l,
-            0x22dfa5579cc4f069l },
-          { 0xccd6da17be93c6d6l,0x24866c61a504f5b9l,0x2121353c8d694da1l,
-            0x1c6ca5800140b8c6l },
-          0 },
-        /* 17 << 176 */
-        { { 0x4e77c5575b45afb4l,0xe9ded649efb8912dl,0x7ec9bbf542f6e557l,
-            0x2570dfff62671f00l },
-          { 0x2b3bfb7888e084bdl,0xa024b238f37fe5b4l,0x44e7dc0495649aeel,
-            0x498ca2555e7ec1d8l },
-          0 },
-        /* 19 << 176 */
-        { { 0x2e44d22526a1fc90l,0x0d6d10d24d70705dl,0xd94b6b10d70c45f4l,
-            0x0f201022b216c079l },
-          { 0xcec966c5658fde41l,0xa8d2bc7d7e27601dl,0xbfcce3e1ff230be7l,
-            0x3394ff6b0033ffb5l },
-          0 },
-        /* 21 << 176 */
-        { { 0x05d99be8b9c20cdal,0x89f7aad5d5cd0c98l,0x7ef936fe5bb94183l,
-            0x92ca0753b05cd7f2l },
-          { 0x9d65db1174a1e035l,0x02628cc813eaea92l,0xf2d9e24249e4fbf2l,
-            0x94fdfd9be384f8b7l },
-          0 },
-        /* 23 << 176 */
-        { { 0x29882d7c98379d44l,0xd000bdfb509edc8al,0xc6f95979e66fe464l,
-            0x504a6115fa61bde0l },
-          { 0x56b3b871effea31al,0x2d3de26df0c21a54l,0x21dbff31834753bfl,
-            0xe67ecf4969269d86l },
-          0 },
-        /* 25 << 176 */
-        { { 0xed29a56da16d4b34l,0x7fba9d09dca21c4fl,0x66d7ac006d8de486l,
-            0x6006198773a2a5e1l },
-          { 0x8b400f869da28ff0l,0x3133f70843c4599cl,0x9911c9b8ee28cb0dl,
-            0xcd7e28748e0af61dl },
-          0 },
-        /* 27 << 176 */
-        { { 0x6a7bb6a93b5bdb83l,0x08da65c0a4a72318l,0xc58d22aa63eb065fl,
-            0x1717596c1b15d685l },
-          { 0x112df0d0b266d88bl,0xf688ae975941945al,0x487386e37c292cacl,
-            0x42f3b50d57d6985cl },
-          0 },
-        /* 28 << 176 */
-        { { 0x69e3be0427596893l,0xb6bb02a645bf452bl,0x0875c11af4c698c8l,
-            0x6652b5c7bece3794l },
-          { 0x7b3755fd4f5c0499l,0x6ea16558b5532b38l,0xd1c69889a2e96ef7l,
-            0x9c773c3a61ed8f48l },
-          0 },
-        /* 29 << 176 */
-        { { 0x5a304ada8545d185l,0x82ae44ea738bb8cbl,0x628a35e3df87e10el,
-            0xd3624f3da15b9fe3l },
-          { 0xcc44209b14be4254l,0x7d0efcbcbdbc2ea5l,0x1f60336204c37bbel,
-            0x21f363f556a5852cl },
-          0 },
-        /* 31 << 176 */
-        { { 0x81262e4225346689l,0x716da290b07c7004l,0x35f911eab7950ee3l,
-            0x6fd72969261d21b5l },
-          { 0x5238980308b640d3l,0x5b0026ee887f12a1l,0x20e21660742e9311l,
-            0x0ef6d5415ff77ff7l },
-          0 },
-        /* 33 << 176 */
-        { { 0x64aa0874925dd0b0l,0x5ffd503851c474c6l,0x4478c72c8ebd4157l,
-            0xb98694cb8c8375e2l },
-          { 0xeda4edeecd8e208cl,0xf98a053d2c0670a6l,0x564bd3057f346b9dl,
-            0xafbbf3e94c318fddl },
-          0 },
-        /* 34 << 176 */
-        { { 0x8a03410aa96c4685l,0xef1b6b16a978a31bl,0x44738a3b629df6cfl,
-            0xa1dc65da807713e9l },
-          { 0x569cc7884c373442l,0x1f30a2464965fb52l,0x56822f1677ff5e2el,
-            0x63f18812e303748bl },
-          0 },
-        /* 35 << 176 */
-        { { 0x2abdc403dd0983ecl,0xec0c08c7f365c6f5l,0xe555083fbdb66b8bl,
-            0x593685bc4e8973ffl },
-          { 0x737df3f920e9c705l,0x00c7bcc309c31a5al,0x5f1d23e2efdcb34dl,
-            0x79d9b382470f7949l },
-          0 },
-        /* 36 << 176 */
-        { { 0x44a315645fd2eb1dl,0x4e7397263fdd1356l,0x9b96735463200efel,
-            0xcb70402e520bbb6al },
-          { 0xcbc90d7e693d2642l,0x6fb00064bc9b4002l,0x95f2eab3d96f7150l,
-            0xb1619e3fe035f47al },
-          0 },
-        /* 37 << 176 */
-        { { 0xd22d6073d1561bb7l,0x40666e4ba9928683l,0x90654dab8ab3f9b1l,
-            0x7625c507b8773421l },
-          { 0x288f28220ca88cd2l,0xbb88114ed8d005c1l,0xbeec2b0af603a11bl,
-            0x8fdda60325f7949el },
-          0 },
-        /* 39 << 176 */
-        { { 0x6503632d6ee4f1d0l,0xd5449747ea394840l,0xd696167a8abe13a1l,
-            0xc080f76e609ebaa9l },
-          { 0x181acf0c10aa70d6l,0x70614461291e5e50l,0x7ade8e84b9f0c0a3l,
-            0xef1de9f2cb11b41el },
-          0 },
-        /* 40 << 176 */
-        { { 0x2d5c3c848e592413l,0x727022961832ba2cl,0x22979b51596c6321l,
-            0x738f31cb5a04db64l },
-          { 0x0bdaa6ca98f84ee5l,0x4e9e827c15e21eeel,0x4c59dbcc3ea632e0l,
-            0xed3404db5bc6f027l },
-          0 },
-        /* 41 << 176 */
-        { { 0x2841f05cfbaf8b26l,0xac9830db5b243770l,0xde3ab1707787f324l,
-            0x1ee12efe079209bcl },
-          { 0x2d3fd62d5bcf6e3cl,0x8a680655d60b0582l,0xdafc5061bc2b64a1l,
-            0xe0d91e7526a88788l },
-          0 },
-        /* 43 << 176 */
-        { { 0x2d49c685426b1b1el,0x6c2149caeabb02f7l,0xa4697d7fde11984fl,
-            0xa0e32fb3ed3c8707l },
-          { 0xb783e825f4ca12dal,0xb2666e2448770a50l,0x82d47f478660e923l,
-            0x6e36cd71fb4a984fl },
-          0 },
-        /* 44 << 176 */
-        { { 0x3295a8ea43c66b92l,0x99387af6ac5d19d4l,0x545f9b1b8e9d2090l,
-            0x138b1c4c2660f530l },
-          { 0xbfb05fd2ff872627l,0xb6614b0f4c3bc45cl,0x13defece62ca0fb0l,
-            0x82ddae134fededd8l },
-          0 },
-        /* 45 << 176 */
-        { { 0x5a34499b871c4cbbl,0x3ab0e69a2eb6084bl,0xa8d0160025ef7755l,
-            0x5db8f611d9e70f5dl },
-          { 0x63f9eb9a7afa95d7l,0x328b97f9706d7964l,0x8bcf9a0f4b71dfcal,
-            0x53d4c3042a5c7934l },
-          0 },
-        /* 46 << 176 */
-        { { 0x0c87dd3a8768d9aal,0x201ce5a082f6a55fl,0xa3de6f3049ca4602l,
-            0x36f421422aeb5f17l },
-          { 0x5c9962399817b77al,0x2584a10ae8d165acl,0x80f683d0c726f4aal,
-            0x524307502dcdfa48l },
-          0 },
-        /* 47 << 176 */
-        { { 0x0c04399f94683df2l,0x0978e9d4e954838dl,0x01faa5e8cf4a7a7bl,
-            0x92f6e6a90dae61cfl },
-          { 0x0c0f1293373dc957l,0x8320178fd8cc6b67l,0x4af977ed4b6444f2l,
-            0xd8c9a401ad8e5f84l },
-          0 },
-        /* 48 << 176 */
-        { { 0xbd5660ed9aed9f40l,0x70ca6ad1532a8c99l,0xc4978bfb95c371eal,
-            0xe5464d0d7003109dl },
-          { 0x1af32fdfd9e535efl,0xabf57ea798c9185bl,0xed7a741712b42488l,
-            0x8e0296a7e97286fal },
-          0 },
-        /* 49 << 176 */
-        { { 0x79ee35ac16fca804l,0x8f16e6165f59782el,0x8fbef1011737694el,
-            0xb34b7625462be08bl },
-          { 0x7e63e1b016e75c91l,0xb6a18edd2d23728dl,0xcf761a1e7f299ab6l,
-            0x796dcdebf16c770el },
-          0 },
-        /* 51 << 176 */
-        { { 0x47354f22308ee4afl,0x96959a538ecd6f4bl,0xf60b5f104055cbd2l,
-            0x04b1c9599bd86095l },
-          { 0x26accd8486008564l,0x46b2fe0478f31ea7l,0x5500dbf72dd76f23l,
-            0x36bcdf584c496c6fl },
-          0 },
-        /* 52 << 176 */
-        { { 0x8836cd431527d7cel,0x1f236623187a50eal,0x6470c0ae847221f0l,
-            0xc61f86b47e449110l },
-          { 0x7cc9cc20fa9fcec1l,0xa394903019134349l,0xafe5a08ff53ab467l,
-            0x9caba02301ed2919l },
-          0 },
-        /* 53 << 176 */
-        { { 0xffecbdce406abf1el,0x0ef4bcd73ae340d4l,0x7e37bae0e19d5613l,
-            0xe191669be4c6e97al },
-          { 0x9fafe59797292db7l,0xab7ef3713172d716l,0x9f0fff330ce3b533l,
-            0xca94ff8f932dd8cfl },
-          0 },
-        /* 55 << 176 */
-        { { 0x659c8b5d78aea69el,0xdde7ab46476a8fb9l,0x26bfe303bd01b5e6l,
-            0xf3dfb08a726a937cl },
-          { 0xe7a591fa0a263670l,0xe872c3f8f97434a0l,0x4881a82e2e0f2c21l,
-            0x17624e48788ef958l },
-          0 },
-        /* 57 << 176 */
-        { { 0xd526d66da7222e5bl,0xd33bb78efeb00e25l,0x9a7d670b932c8d08l,
-            0xea31e5273cee093fl },
-          { 0x55cc091bd04b7a43l,0x12b08d6dd01a123dl,0x1d98a6467fb0e7bal,
-            0xdabb09483535fd0dl },
-          0 },
-        /* 59 << 176 */
-        { { 0x2862314d08b69b19l,0x9cf302e191effcfal,0x43bdc8462ead917al,
-            0x21b238bbf94b3d8fl },
-          { 0xa3736160e2f465d3l,0x4d7fb6818541e255l,0x46fa089a23551edcl,
-            0xf7c41d17c1fefa8cl },
-          0 },
-        /* 60 << 176 */
-        { { 0x8ed0807fed113000l,0x8e1672d04c691484l,0x33a13ab31ee86ca0l,
-            0x9df0d9573bcaee4fl },
-          { 0x0cf0c638ef0dfb71l,0x1e0fe22ac2c9510al,0x43f506716fcc6a21l,
-            0xccb58404cec03a94l },
-          0 },
-        /* 61 << 176 */
-        { { 0x59547e37fd0936c1l,0x81e0517df45140b1l,0xcc6ccd89ed49e3fcl,
-            0xc2fa23eff3b897del },
-          { 0x149511ef2050c80al,0xf66bea6b3140b833l,0xbbe1401e2786d723l,
-            0x0aeb549c887509bcl },
-          0 },
-        /* 63 << 176 */
-        { { 0xf938e85060f5867al,0x806e1fff72429adcl,0x5ff7962a45f43b52l,
-            0xd8375ab6b2bbb403l },
-          { 0x00d5819b21b287fcl,0x15c7190ebae37d58l,0x075ce5ce05fcfb07l,
-            0x76368d06dbc003cbl },
-          0 },
-        /* 64 << 176 */
-        { { 0x01079383171b445fl,0x9bcf21e38131ad4cl,0x8cdfe205c93987e8l,
-            0xe63f4152c92e8c8fl },
-          { 0x729462a930add43dl,0x62ebb143c980f05al,0x4f3954e53b06e968l,
-            0xfe1d75ad242cf6b1l },
-          0 },
-        /* 65 << 176 */
-        { { 0x1cf508197630655el,0x9b4685c408d417f5l,0x6ea942619b049259l,
-            0x31c29b54fe73b755l },
-          { 0x3d2872a1f1f2af17l,0xbcd1139956bcbc4bl,0x4d14f59890d7a85cl,
-            0xd2c46040dbcbe998l },
-          0 },
-        /* 71 << 176 */
-        { { 0x3c8a06ca9792c42al,0x92535628602460ddl,0xa95e13f2ddd4c676l,
-            0xe823841d3b20d463l },
-          { 0x0248605bbfad6051l,0x82985dd61af51233l,0x3d243a5cdef7d742l,
-            0x0a88ce55ff6aa911l },
-          0 },
-        /* 77 << 176 */
-        { { 0xcf5b5962449aec98l,0x40322a6531a41389l,0xcd15606fd72c0527l,
-            0xfe91eac7b90d65a0l },
-          { 0xcd32415487636360l,0x82f2c7bdfc653a6fl,0xd04d138ae315ce7cl,
-            0x40ebfd5e78118dbcl },
-          0 },
-        /* 83 << 176 */
-        { { 0x0f9ea6ae4144660fl,0x02345c6513279b25l,0x139497b65c7671cbl,
-            0x7259f14b2ebed1d5l },
-          { 0xa1e5d98ce9b29988l,0xaed0efcd8df73ac8l,0x88339f073b81a77cl,
-            0x28f2bbca7109c8a6l },
-          0 },
-        /* 89 << 176 */
-        { { 0xa264f99d811472ddl,0x0e7eae0afc07a80cl,0x77f264d4a683cdc6l,
-            0x0512df49d053c668l },
-          { 0x2b4dfbade61dea15l,0x83de61acfd74890al,0xd2552bab32d41182l,
-            0x1fb9411435924e6al },
-          0 },
-        /* 95 << 176 */
-        { { 0x85efe53ade23c988l,0x89d41dbbf897f91bl,0x1357f91e7873fa8dl,
-            0x7a6ec2e3718d911cl },
-          { 0xf9e4f92e8f209a01l,0x4ffb96a70fdd67f3l,0x4c81a787f83dde1cl,
-            0x0d68fce15e163b60l },
-          0 },
-        /* 101 << 176 */
-        { { 0xbc79b4b26ab6da9dl,0xb4be5c278bb005f1l,0x63624530cd3b280bl,
-            0x543142f04e880026l },
-          { 0xbf7fb14cad90ddbfl,0xfe456e8a3966732dl,0x85499fb987ce35e9l,
-            0x8af09e6b24f1305dl },
-          0 },
-        /* 107 << 176 */
-        { { 0x5fc563ec16dc2b4bl,0xfe5631b25d0e535fl,0xbf4c489f9a93e36cl,
-            0x56badff1da2a07c4l },
-          { 0x72ac6b77fb7c5595l,0x4b25b9428e6645d9l,0xeeae127251f0657el,
-            0x30779ca51abeb76bl },
-          0 },
-        /* 113 << 176 */
-        { { 0x3d602ef5d909f43dl,0x2b2951a6bb347c79l,0x44903bfaa0d88896l,
-            0xd4ab20e8684c104fl },
-          { 0x55f70b4dd9b7e626l,0x084b3ee646a5f9ecl,0x1799cbe3da4ae81al,
-            0xc7cfac937fd6b80fl },
-          0 },
-        /* 116 << 176 */
-        { { 0x45647911ca20c525l,0x78f83186004706abl,0x5596377d97510538l,
-            0x047863defe041f8cl },
-          { 0xaea784896ec82367l,0x9d4eac2601eee8fcl,0xb32728f19b57d9dbl,
-            0x60a158f5313c0f65l },
-          0 },
-        /* 119 << 176 */
-        { { 0xf78caf129754377bl,0xa7fce16b6966f0c4l,0xfea937555a54a2b7l,
-            0x52d7f79b7cdfe951l },
-          { 0x3e14b92e94b1dac0l,0x363f2e5af168b73bl,0xcc0e9dcb6436a8c2l,
-            0x2dbece4bb52cbd27l },
-          0 },
-        /* 125 << 176 */
-        { { 0x7e7907ed8df38ffel,0xa68ec827e24e8a24l,0x5093a97e5f168732l,
-            0xa9ffea2f39ebb6dbl },
-          { 0x89e02c12284276d4l,0xc1179e3b3f9502d6l,0x01becb51d8f69eb6l,
-            0x86eee2935eb1c73cl },
-          0 },
-    },
-    {
-        /* 0 << 184 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 184 */
-        { { 0xf3b7963f4c830320l,0x842c7aa0903203e3l,0xaf22ca0ae7327afbl,
-            0x38e13092967609b6l },
-          { 0x73b8fb62757558f1l,0x3cc3e831f7eca8c1l,0xe4174474f6331627l,
-            0xa77989cac3c40234l },
-          0 },
-        /* 3 << 184 */
-        { { 0xb32cb8b0b796d219l,0xc3e95f4f34741dd9l,0x8721212568edf6f5l,
-            0x7a03aee4a2b9cb8el },
-          { 0x0cd3c376f53a89aal,0x0d8af9b1948a28dcl,0xcf86a3f4902ab04fl,
-            0x8aacb62a7f42002dl },
-          0 },
-        /* 4 << 184 */
-        { { 0xfd8e139f8f5fcda8l,0xf3e558c4bdee5bfdl,0xd76cbaf4e33f9f77l,
-            0x3a4c97a471771969l },
-          { 0xda27e84bf6dce6a7l,0xff373d9613e6c2d1l,0xf115193cd759a6e9l,
-            0x3f9b702563d2262cl },
-          0 },
-        /* 5 << 184 */
-        { { 0x9cb0ae6c252bd479l,0x05e0f88a12b5848fl,0x78f6d2b2a5c97663l,
-            0x6f6e149bc162225cl },
-          { 0xe602235cde601a89l,0xd17bbe98f373be1fl,0xcaf49a5ba8471827l,
-            0x7e1a0a8518aaa116l },
-          0 },
-        /* 7 << 184 */
-        { { 0x8b1e572235e6fc06l,0x3477728f0b3e13d5l,0x150c294daa8a7372l,
-            0xc0291d433bfa528al },
-          { 0xc6c8bc67cec5a196l,0xdeeb31e45c2e8a7cl,0xba93e244fb6e1c51l,
-            0xb9f8b71b2e28e156l },
-          0 },
-        /* 9 << 184 */
-        { { 0x343ac0a3ee9523f0l,0xbb75eab2975ea978l,0x1bccf332107387f4l,
-            0x790f92599ab0062el },
-          { 0xf1a363ad1e4f6a5fl,0x06e08b8462519a50l,0x609151877265f1eel,
-            0x6a80ca3493ae985el },
-          0 },
-        /* 10 << 184 */
-        { { 0xa3f4f521e447f2c4l,0x81b8da7a604291f0l,0xd680bc467d5926del,
-            0x84f21fd534a1202fl },
-          { 0x1d1e31814e9df3d8l,0x1ca4861a39ab8d34l,0x809ddeec5b19aa4al,
-            0x59f72f7e4d329366l },
-          0 },
-        /* 11 << 184 */
-        { { 0x2dfb9e08be0f4492l,0x3ff0da03e9d5e517l,0x03dbe9a1f79466a8l,
-            0x0b87bcd015ea9932l },
-          { 0xeb64fc83ab1f58abl,0x6d9598da817edc8al,0x699cff661d3b67e5l,
-            0x645c0f2992635853l },
-          0 },
-        /* 13 << 184 */
-        { { 0xd50e57c7d7fe71f3l,0x15342190bc97ce38l,0x51bda2de4df07b63l,
-            0xba12aeae200eb87dl },
-          { 0xabe135d2a9b4f8f6l,0x04619d65fad6d99cl,0x4a6683a77994937cl,
-            0x7a778c8b6f94f09al },
-          0 },
-        /* 15 << 184 */
-        { { 0x8dd1fb83425c6559l,0x7fc00ee60af06fdal,0xe98c922533d956dfl,
-            0x0f1ef3354fbdc8a2l },
-          { 0x2abb5145b79b8ea2l,0x40fd2945bdbff288l,0x6a814ac4d7185db7l,
-            0xc4329d6fc084609al },
-          0 },
-        /* 16 << 184 */
-        { { 0x511053e453544774l,0x834d0ecc3adba2bcl,0x4215d7f7bae371f5l,
-            0xfcfd57bf6c8663bcl },
-          { 0xded2383dd6901b1dl,0x3b49fbb4b5587dc3l,0xfd44a08d07625f62l,
-            0x3ee4d65b9de9b762l },
-          0 },
-        /* 17 << 184 */
-        { { 0x55ef9d3dcc26e8b0l,0xf869c827729b707al,0xdbbf450d8c47e00cl,
-            0x73d546ea60972ed7l },
-          { 0x9563e11f0dcd6821l,0xe48e1af57d80de7fl,0xbe7139b49057838dl,
-            0xf3f0ad4d7e5ca535l },
-          0 },
-        /* 19 << 184 */
-        { { 0xac66d1d49f8f8cc2l,0x43fe5c154ef18941l,0xbae77b6ddc30fcbfl,
-            0xdb95ea7d945723b7l },
-          { 0x43298e2bda8097e2l,0x8004167baf22ea9bl,0x9cf5974196a83d57l,
-            0xb35c9aba3cf67d5el },
-          0 },
-        /* 21 << 184 */
-        { { 0x0569a48df766f793l,0x6b4c7b16706b3442l,0xcc97754416ff41e0l,
-            0x800c56e31fee2e86l },
-          { 0xce0c3d0fcdf93450l,0x6ec3703582f35916l,0x902520d5bbc11e68l,
-            0x7e2b988505078223l },
-          0 },
-        /* 23 << 184 */
-        { { 0xb30d1769101da00bl,0xb26872d5113cfdb6l,0x7b0491da44e48db5l,
-            0x810e73bb2013f8c9l },
-          { 0xc86e579a570f0b59l,0xf34107e37a918f34l,0x49286d00277473f1l,
-            0x74423f5abc85905dl },
-          0 },
-        /* 25 << 184 */
-        { { 0x90d7417879de6b48l,0xe762caf0d14fa75bl,0xa309dcf3bd91ec5dl,
-            0x7aafe1ddf526d04fl },
-          { 0x76911342d39e36ffl,0xe28994d2fabb34b8l,0xac23a92c863110cbl,
-            0x9f0f69673aabd166l },
-          0 },
-        /* 27 << 184 */
-        { { 0x7436bdf47e333f98l,0x879cf31f2455af64l,0x07933a9cf6cfde92l,
-            0xfcac38a5b6e3203fl },
-          { 0xa39b6a8098e5a6e0l,0x1d600b5da4837528l,0x54718de7c32d412bl,
-            0x02870f46317937ccl },
-          0 },
-        /* 28 << 184 */
-        { { 0x1f13756db1761ec8l,0xe53c8b98a4b97e55l,0xb2aee3f84096cc28l,
-            0x48c361a0920f1a8dl },
-          { 0xa98b672d8c31190al,0x7bc1e7d1001855d4l,0x242cfb07bf3f4b2al,
-            0x9bf44a3f32a28bc4l },
-          0 },
-        /* 29 << 184 */
-        { { 0x96d4b271e36eeccdl,0x2d8c01b859237e23l,0x24f7a6eb8adf2653l,
-            0xc08ac4ab41183d80l },
-          { 0xc35e5bb7036367c3l,0xd8c97cbc0ba59f61l,0x296b1f4c5aafe986l,
-            0xa519c7a17d179c37l },
-          0 },
-        /* 31 << 184 */
-        { { 0x4043490790ae5f49l,0x8ac8f73649556b81l,0xb57a89b0f4e77a16l,
-            0xe1a1565d071020eal },
-          { 0x4a27f34d3dda8450l,0x65af18b9bc395814l,0xaf21939f9ff49991l,
-            0x47e00639b4af7691l },
-          0 },
-        /* 33 << 184 */
-        { { 0x4b3e263246b1f9b2l,0x6457d838efde99d3l,0x77d5142325e56171l,
-            0xb45de3df7d54996cl },
-          { 0x1ee2dd3194098d98l,0x986896141f3ebdc5l,0x2704a107997efb47l,
-            0x96b502eecb11e520l },
-          0 },
-        /* 34 << 184 */
-        { { 0x58c8039ec19f866el,0xc84c053e386c2644l,0xb3708ab049435704l,
-            0x1b70c3c86fc47b24l },
-          { 0x235582a27f095649l,0x0d344b66673c9a9el,0x777c9e71e2b00efdl,
-            0x91691d6e5b877856l },
-          0 },
-        /* 35 << 184 */
-        { { 0x11c663c49cd31e22l,0x46ae0bd95fb943d7l,0x6e36bca6a392fc01l,
-            0x4f8cc3a77948716fl },
-          { 0x10ae9d6b3aa4bbb0l,0xcc9b6cb5d8001a86l,0x012c8e3aa0a4ceedl,
-            0xe462971e52274942l },
-          0 },
-        /* 36 << 184 */
-        { { 0x9982e2ac42e176a5l,0x324eba46e2782b64l,0x3d8caaafe18350f5l,
-            0xf3d82af2f5d674cal },
-          { 0xc2090fed56600d1el,0x4548e0ef5950de07l,0xb2f0023f765a4febl,
-            0xb303103339f16790l },
-          0 },
-        /* 37 << 184 */
-        { { 0xb94095dc7bdacf7al,0x0e73db39509b310al,0x76e99a6b41b5f772l,
-            0xef40e9c596f3dbd7l },
-          { 0xd0d644f980f2179el,0xe0db831d5a89807el,0xa0188493c2a2d6c6l,
-            0xf2d9a85e5ba9faa9l },
-          0 },
-        /* 39 << 184 */
-        { { 0x598b7876cdd95b93l,0x5f7cc827336966e8l,0x01887109e797f102l,
-            0x665671c446c7c296l },
-          { 0xb314793c6e019c72l,0x5a6c81580e0329acl,0x4faf2f1b44281b98l,
-            0x825884072e1fc97el },
-          0 },
-        /* 40 << 184 */
-        { { 0xa692781d61a3c8b3l,0x08bc385432876d0el,0xbecf05fb28027b03l,
-            0x636c687da4b1e12fl },
-          { 0x00e3003d07217c58l,0x613ba9375e01b2a3l,0xa58c8405881de16el,
-            0xc653c43014f8f48bl },
-          0 },
-        /* 41 << 184 */
-        { { 0x68e53c7c89c0c7c2l,0xf2e680b23c423272l,0xacd47fae60f50133l,
-            0x4c484c6534f05605l },
-          { 0x663bdcf9ebffbb7dl,0xb49cff3be42421c6l,0x0549f7b13f53f261l,
-            0xc516aeda7c374766l },
-          0 },
-        /* 43 << 184 */
-        { { 0xa515fe0f76a0ec26l,0xf727c0797b0b8b21l,0xaeed4c671993651el,
-            0x1465a7f828ac7c87l },
-          { 0x776bd5131f0ef90bl,0x57515d2cd9773e61l,0x235455e95564c50bl,
-            0xf44daef80bf06a24l },
-          0 },
-        /* 44 << 184 */
-        { { 0xbc1c6897d6a0d0f9l,0xd8e0ea0e3b0d7f55l,0xb35baa92b85b7aadl,
-            0x2becd1b7674e48f4l },
-          { 0xe2d7f78d6d7a9ac2l,0xf5074262f99c95d0l,0x4852470a89f611e9l,
-            0xf7aa911992869decl },
-          0 },
-        /* 45 << 184 */
-        { { 0x0bd1755b0ac4840fl,0x0f4c6c2aa22eef10l,0x3f72fe2d78d16dd9l,
-            0xb2d49200ff7096a4l },
-          { 0xa5dead555ffca031l,0x1d013c320b65f4cfl,0x67e498582a23f441l,
-            0x55bae166d02412c0l },
-          0 },
-        /* 46 << 184 */
-        { { 0x546dd4545739a62al,0x353dc1422a30b836l,0x1462449d99cbd704l,
-            0xda02d0772da69411l },
-          { 0xcb115fe565b1a1adl,0x395235f501230a22l,0x8ae630eed164d970l,
-            0x60b679f0074e3a7el },
-          0 },
-        /* 47 << 184 */
-        { { 0x2e64695245d231e1l,0xc96663ac00d8a0fbl,0xc1fbaa0cd07e1f41l,
-            0x4b31484488758781l },
-          { 0xd6971a835183e72el,0xd1d01f174cbe99b7l,0xe90b438c5a2f7512l,
-            0xf858fa452957c620l },
-          0 },
-        /* 48 << 184 */
-        { { 0xed7f2e774e6daae2l,0x7b3ae0e39e0a19bcl,0xd3293f8a91ae677el,
-            0xd363b0cb45c8611fl },
-          { 0xbe1d1ccf309ae93bl,0xa3f80be73920cae1l,0xaaacba74498edf01l,
-            0x1e6d2a4ab2f5ac90l },
-          0 },
-        /* 49 << 184 */
-        { { 0xb5c5bb67b972a778l,0xc2423a4a190f9b5al,0x4e693cf365247948l,
-            0xc37d129ea94a65a3l },
-          { 0xbea4736b6e9cd47bl,0xf3d1bd212338f524l,0xa2a0278e067a45dal,
-            0xc86d631b5b5dce9bl },
-          0 },
-        /* 51 << 184 */
-        { { 0xc2d75f46116952cel,0xd2b66269b75e40dal,0x024f670f921c4111l,
-            0x37ffd854c91fd490l },
-          { 0x6be44d0385b2f613l,0x040cd7d9ba11c4f9l,0x04c1cb762c0efb1fl,
-            0xd905ff4f505e4698l },
-          0 },
-        /* 52 << 184 */
-        { { 0x60c5f03f233550f1l,0xd4d09411925afd2el,0xa95b65c3d258e5a6l,
-            0x1a19cfb59f902c6al },
-          { 0xb486013af5ad5c68l,0xa2506776979638f3l,0x1232b4d0a38e0b28l,
-            0xa64784b8d36a7b4fl },
-          0 },
-        /* 53 << 184 */
-        { { 0x22c75830a13dcb47l,0xd6e81258efd7a08fl,0x6db703b6e4fc49b8l,
-            0x8a5ac636f01817e9l },
-          { 0x8d27b6e1b3f24514l,0x40edc3bc708c51d7l,0x9a1eec7765bb086dl,
-            0x812ccb42b10800f8l },
-          0 },
-        /* 55 << 184 */
-        { { 0x1a39c6acd4338453l,0x3d93822954b1295dl,0x7bf0bf45e0d81165l,
-            0x83d58ca5972804d2l },
-          { 0x105d3ddb00524b94l,0x65d516e7920378ecl,0x1d28f5f1aea33926l,
-            0xa0b354313901c906l },
-          0 },
-        /* 57 << 184 */
-        { { 0x000442a1e4f354del,0x165b44d9d1d112f5l,0x67fd9ced0d05c0a9l,
-            0xd6ce074360bd5d60l },
-          { 0x9ac80c931522af2al,0x8232d522fa07d449l,0x287b5534c3fdb652l,
-            0x9f0548b3abd2ab98l },
-          0 },
-        /* 59 << 184 */
-        { { 0xde8d7086b9aea1d4l,0x692180d98a7dc3fcl,0xd64ffb53bad3e6f3l,
-            0x84628acf36ce3f91l },
-          { 0xf76e470b6d498ac5l,0xa16945547abad602l,0x5b8fd6a5a255c1f6l,
-            0xffe24e4a8576ae2al },
-          0 },
-        /* 60 << 184 */
-        { { 0x5655179de7d70e03l,0x3e780c5c72a84570l,0xc102b4cb1d50029cl,
-            0x3e71bdd5f075e839l },
-          { 0x6460f4f0b498b822l,0x2682e06c6d4b8da5l,0x4eae53c996a740d4l,
-            0xc19d8bef6389702cl },
-          0 },
-        /* 61 << 184 */
-        { { 0x711be2081025fe1dl,0x2e562c89f0bc6a99l,0xcfd2be3a28bf4150l,
-            0x33037b4a38e5bc91l },
-          { 0x10c6da9df52fea02l,0x511f62444f0ea410l,0x19d37ca81a294c3fl,
-            0x7e40f444618e6fd3l },
-          0 },
-        /* 63 << 184 */
-        { { 0x4095f5ddbedb8734l,0x9c16027c4432f51al,0xced8179d873d0f11l,
-            0x70c2bc9f6ebe6e61l },
-          { 0x5c31035d616cf2f4l,0xf92e0fbd00a4af3dl,0xe6048a03511893c4l,
-            0x639a804b52e2f462l },
-          0 },
-        /* 64 << 184 */
-        { { 0x8735728dc2c6ff70l,0x79d6122fc5dc2235l,0x23f5d00319e277f9l,
-            0x7ee84e25dded8cc7l },
-          { 0x91a8afb063cd880al,0x3f3ea7c63574af60l,0x0cfcdc8402de7f42l,
-            0x62d0792fb31aa152l },
-          0 },
-        /* 65 << 184 */
-        { { 0x0f4bcefd9da373e4l,0x7278f44d119271a3l,0xb2dff94449e111c0l,
-            0xb0a3abf8e5d2b2d4l },
-          { 0x01baabb48ea80631l,0x27517ed3da305f85l,0x0a1ca6fc3f56aa86l,
-            0x183d9c7694c22839l },
-          0 },
-        /* 71 << 184 */
-        { { 0xe9a0dfbf22e238d7l,0x8690dfd97e8d8d31l,0xb3cb2a0d4006c59cl,
-            0xe4d297caa1850d74l },
-          { 0x066f10517842d14cl,0x68dd32737d43602bl,0x1f9f5cf931345f39l,
-            0x44f18c2b10593890l },
-          0 },
-        /* 77 << 184 */
-        { { 0x8d8c0233a7c3f60bl,0xfb59fe2d2bcbbd4cl,0xfa311680dc3e5b44l,
-            0xb3cba9f3fbea5eedl },
-          { 0xcb353b2f61e0e690l,0x06edf0c1b6e0efe0l,0xa29578cb1d0c02a2l,
-            0xaeb2d677937fec07l },
-          0 },
-        /* 83 << 184 */
-        { { 0xa19a81c5cdd0cac9l,0x5c10b942ec9cf85bl,0x0843ef4639e8c298l,
-            0xcfd45d0e6c043258l },
-          { 0x1011bcb9fb7e4b58l,0xae6362a544402bbdl,0x9ecc8c68ec15d751l,
-            0xbc05998869d1a00bl },
-          0 },
-        /* 89 << 184 */
-        { { 0xe9a43619460147e3l,0x881a6af423067448l,0x94f93ae6cee17a6bl,
-            0x469e692f10782558l },
-          { 0x01e244a1289bdb32l,0x240645779dddf970l,0x664cbd92d8f521ecl,
-            0xadaf8ffb600222d0l },
-          0 },
-        /* 95 << 184 */
-        { { 0x68314c740dbec437l,0x2095e1295ec75e2cl,0x8e88a3ddf0e6c606l,
-            0x40ac647d1230f6b2l },
-          { 0x09d124aaa2e6b991l,0xa22f9e2bcc81037cl,0xc842b64d15c3a1c2l,
-            0x4d822becce808c65l },
-          0 },
-        /* 101 << 184 */
-        { { 0xb02204d06ffb396bl,0x82eb6ecc881bead6l,0xf58432cebd6896c8l,
-            0xc243468da38f4b9dl },
-          { 0x8486402df8e628bdl,0x5dd338a1a4df2401l,0x748a41ab0daac953l,
-            0xaa121d13e51e6235l },
-          0 },
-        /* 107 << 184 */
-        { { 0x6daa0a4e50abc6aal,0x99fcc5bdeafb7cf2l,0xc705f64c4b8dbd2al,
-            0x7deff836e7b51e90l },
-          { 0xd92f42b859a8180fl,0x3bb298f8618d24acl,0x2433aa7357a56438l,
-            0xcf29895b48a6a238l },
-          0 },
-        /* 113 << 184 */
-        { { 0x74079dc59ed25aafl,0x7988245c023d5143l,0x7edfc6a6feb79c24l,
-            0x7ed03c50a6baa70fl },
-          { 0x71d3413596a753b4l,0x59efbafcef976246l,0xed050260a4a6947fl,
-            0xabbc1f8066254247l },
-          0 },
-        /* 116 << 184 */
-        { { 0x1f804e00caa4646fl,0x8643dc8870944924l,0xa37f1ca273f86de9l,
-            0xa3199f9228889898l },
-          { 0xc273ba580c1e4adfl,0x0f0d38af65bc82f0l,0xd8b28ab5f8a6cd3bl,
-            0xeea6e08575894d8el },
-          0 },
-        /* 119 << 184 */
-        { { 0x398f39132c1620f7l,0x9046d2dea921f3a3l,0x40a25a2785b50bb0l,
-            0xb9adeca0d32e95f3l },
-          { 0xa4199b1bdede5cbfl,0x9068aee084f5410bl,0x6665e4f5730f0397l,
-            0x2e9ba18c8ae20659l },
-          0 },
-        /* 125 << 184 */
-        { { 0xd76e9b2351835897l,0x72a0e000012deda6l,0x5bf08922bfec23e4l,
-            0x8c2fcf1385cf2b7bl },
-          { 0x6c42f935c63332c6l,0x8736c58395eccce9l,0x2d2abbb10721afc8l,
-            0x1f7a76cc42d4e029l },
-          0 },
-    },
-    {
-        /* 0 << 192 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 192 */
-        { { 0x56f8410ef4f8b16al,0x97241afec47b266al,0x0a406b8e6d9c87c1l,
-            0x803f3e02cd42ab1bl },
-          { 0x7f0309a804dbec69l,0xa83b85f73bbad05fl,0xc6097273ad8e197fl,
-            0xc097440e5067adc1l },
-          0 },
-        /* 3 << 192 */
-        { { 0x266344a43794f8dcl,0xdcca923a483c5c36l,0x2d6b6bbf3f9d10a0l,
-            0xb320c5ca81d9bdf3l },
-          { 0x620e28ff47b50a95l,0x933e3b01cef03371l,0xf081bf8599100153l,
-            0x183be9a0c3a8c8d6l },
-          0 },
-        /* 4 << 192 */
-        { { 0xb6c185c341dca566l,0x7de7fedad8622aa3l,0x99e84d92901b6dfbl,
-            0x30a02b0e7c4ad288l },
-          { 0xc7c81daa2fd3cf36l,0xd1319547df89e59fl,0xb2be8184cd496733l,
-            0xd5f449eb93d3412bl },
-          0 },
-        /* 5 << 192 */
-        { { 0x25470fabe085116bl,0x04a4337587285310l,0x4e39187ee2bfd52fl,
-            0x36166b447d9ebc74l },
-          { 0x92ad433cfd4b322cl,0x726aa817ba79ab51l,0xf96eacd8c1db15ebl,
-            0xfaf71e910476be63l },
-          0 },
-        /* 7 << 192 */
-        { { 0x72cfd2e949dee168l,0x1ae052233e2af239l,0x009e75be1d94066al,
-            0x6cca31c738abf413l },
-          { 0xb50bd61d9bc49908l,0x4a9b4a8cf5e2bc1el,0xeb6cc5f7946f83acl,
-            0x27da93fcebffab28l },
-          0 },
-        /* 9 << 192 */
-        { { 0x3ce519ef76257c51l,0x6f5818d318d477e7l,0xab022e037963edc0l,
-            0xf0403a898bd1f5f3l },
-          { 0xe43b8da0496033cal,0x0994e10ea1cfdd72l,0xb1ec6d20ba73c0e2l,
-            0x0329c9ecb6bcfad1l },
-          0 },
-        /* 10 << 192 */
-        { { 0xf1ff42a12c84bd9dl,0x751f3ec4390c674al,0x27bb36f701e5e0cal,
-            0x65dfff515caf6692l },
-          { 0x5df579c4cd7bbd3fl,0xef8fb29785591205l,0x1ded7203e47ac732l,
-            0xa93dc45ccd1c331al },
-          0 },
-        /* 11 << 192 */
-        { { 0xbdec338e3318d2d4l,0x733dd7bbbe8de963l,0x61bcc3baa2c47ebdl,
-            0xa821ad1935efcbdel },
-          { 0x91ac668c024cdd5cl,0x7ba558e4c1cdfa49l,0x491d4ce0908fb4dal,
-            0x7ba869f9f685bde8l },
-          0 },
-        /* 13 << 192 */
-        { { 0xed1b5ec279f464bal,0x2d65e42c47d72e26l,0x8198e5749e67f926l,
-            0x4106673834747e44l },
-          { 0x4637acc1e37e5447l,0x02cbc9ecf3e15822l,0x58a8e98e805aa83cl,
-            0x73facd6e5595e800l },
-          0 },
-        /* 15 << 192 */
-        { { 0x468ff80338330507l,0x06f34ddf4037a53el,0x70cd1a408d6993a4l,
-            0xf85a159743e5c022l },
-          { 0x396fc9c2c125a67dl,0x03b7bebf1064bfcbl,0x7c444592a9806dcbl,
-            0x1b02614b4487cd54l },
-          0 },
-        /* 16 << 192 */
-        { { 0x8303604f692ac542l,0xf079ffe1227b91d3l,0x19f63e6315aaf9bdl,
-            0xf99ee565f1f344fbl },
-          { 0x8a1d661fd6219199l,0x8c883bc6d48ce41cl,0x1065118f3c74d904l,
-            0x713889ee0faf8b1bl },
-          0 },
-        /* 17 << 192 */
-        { { 0xb47b60f70de21bb6l,0x64acae4fdcd836cal,0x3375ea6dc744ce63l,
-            0xb764265fb047955bl },
-          { 0xc68a5d4c9841c2c3l,0x60e98fd7cf454f60l,0xc701fbe2756aea0cl,
-            0x09c8885eaab21c79l },
-          0 },
-        /* 19 << 192 */
-        { { 0x45bb810869d2d46cl,0xe47c8b3968c8365al,0xf3b87663267551bdl,
-            0x1590768f5b67547al },
-          { 0x371c1db2fb2ed3ffl,0xe316691917a59440l,0x03c0d178df242c14l,
-            0x40c93fceed862ac1l },
-          0 },
-        /* 21 << 192 */
-        { { 0x1286da692bc982d6l,0x5f6d80f27bdae7e3l,0x3d9c5647a6f064fbl,
-            0xfdc8e6a1d74c1540l },
-          { 0x97da48c6d68b135al,0xc2097979d66dbfffl,0x0296adb9ea20531dl,
-            0xa333730d4ab2c8f0l },
-          0 },
-        /* 23 << 192 */
-        { { 0x0eb3565429847fedl,0xfdc142860a673dd0l,0x721b36278b62dd0bl,
-            0x105a293e711a5771l },
-          { 0xdf001cce7f761927l,0xf7b681b011d04c7dl,0x16dff792a3ac1996l,
-            0x580c120b0fc4ae30l },
-          0 },
-        /* 25 << 192 */
-        { { 0x31ea3d4f7ee8d0bcl,0x3832f22a0f42c3dcl,0xc661061a1a87a2f4l,
-            0x0978c9f64b45576bl },
-          { 0xb7abac3c6dfb5fd2l,0x27f36a00b7e01b90l,0x68f733cde9429e36l,
-            0x953a4681dcbfe8cbl },
-          0 },
-        /* 27 << 192 */
-        { { 0xbfb7c41067fe1eafl,0xa2073c6a6929a785l,0x6f2536f4a75fdb79l,
-            0x859ad26d809bca69l },
-          { 0x06f2c0693b197e7bl,0x656ad9f48ec0a573l,0xe7c7901f9a4d0262l,
-            0xbec29443b938602bl },
-          0 },
-        /* 28 << 192 */
-        { { 0xd00397fc0f0073a4l,0x5b668fa46f8d675fl,0x14374ac91522108cl,
-            0x92efa7d10283e42el },
-          { 0x673e6df90b6d024al,0x05f914d457581f26l,0xf5c8516267df8c12l,
-            0x1197f1b4e06c2462l },
-          0 },
-        /* 29 << 192 */
-        { { 0x6e2d1cb3dd9c90c1l,0x28f82d5a7990579el,0x90e189cd06226195l,
-            0xbd2939df19b0dc74l },
-          { 0x18b18505c0917177l,0xeed5470d3117d9c4l,0x39ef92eb6c893ca0l,
-            0x4533ef8244a41940l },
-          0 },
-        /* 31 << 192 */
-        { { 0xcaee9dec34943ddal,0x8e50e98e8b4b6782l,0x24358ea591ea3a1fl,
-            0x71c4c827a9e1c194l },
-          { 0xa38baa5d09bb7a94l,0xfb4ab4c057b58f9cl,0x4a01065e24e0ee19l,
-            0xb9cf805107b877bfl },
-          0 },
-        /* 33 << 192 */
-        { { 0xd38c1ce0a2980d5el,0x8b84cca4541face7l,0x93298136dbd8d05dl,
-            0x582708d03f85c85al },
-          { 0x6545eec7282960e4l,0x92e184aebaadec07l,0x05452564fd27a20fl,
-            0x79d4668abddce6ebl },
-          0 },
-        /* 34 << 192 */
-        { { 0xf5cc5cccf5191707l,0xe800328bd5d01f67l,0x0572012ebd9b1599l,
-            0xf5be11a6863d0125l },
-          { 0x4da7ca876ea441e0l,0x47dbf83b321b134al,0x5cbadcdac1acfb4al,
-            0x19ac798a734f8e25l },
-          0 },
-        /* 35 << 192 */
-        { { 0xe312623a7002114fl,0xb888b637e047686bl,0x23b2c270cbac91bdl,
-            0xb50b31884dbfe02dl },
-          { 0x8335ce43de97eef6l,0x6a4e65502bac193al,0xf2b35aac3101f720l,
-            0x5b2c88d5379a2015l },
-          0 },
-        /* 36 << 192 */
-        { { 0xf445e77131547128l,0x22761665e27811cal,0x9b944e91a37c6681l,
-            0xc0aa06a536899860l },
-          { 0x8c2b5816cfcd557el,0xf2734a19945aa357l,0x536ca07ca55a0049l,
-            0x8328fdccc636d967l },
-          0 },
-        /* 37 << 192 */
-        { { 0x52b513616aca06bdl,0x8d19b893cdf16560l,0x06b28179c3b438cdl,
-            0xde1ef747cd1819e4l },
-          { 0xbc6cc43b5f557985l,0xa277e11f61e0142al,0x58890f1e429cc392l,
-            0x28d17dbfe5fc8f5el },
-          0 },
-        /* 39 << 192 */
-        { { 0x556df61a29a8f7cbl,0x5cf554dfd14ab27al,0x243f933ba755b886l,
-            0xa4d0b06ff2d4ce87l },
-          { 0xa745eb8d2c0f1d39l,0xc228747aea3047a5l,0xced774c41d2cecc0l,
-            0x54a55c3a774fb01al },
-          0 },
-        /* 40 << 192 */
-        { { 0xa691398a4a9eb3f0l,0x56c1dbff3b99a48fl,0x9a87e1b91b4b5b32l,
-            0xad6396145378b5fel },
-          { 0x437a243ec26b5302l,0x0275878c3ccb4c10l,0x0e81e4a21de07015l,
-            0x0c6265c9850df3c0l },
-          0 },
-        /* 41 << 192 */
-        { { 0x182c3f0e6be95db0l,0x8c5ab38cae065c62l,0xcce8294ebe23abacl,
-            0xed5b65c47d0add6dl },
-          { 0xbce57d78cc9494cal,0x76f75c717f435877l,0xb3084b2eb06560a9l,
-            0x67216bc850b55981l },
-          0 },
-        /* 43 << 192 */
-        { { 0x49c9fd92557de68bl,0x357aa44fc3151b7al,0xd36286d11e4aebd0l,
-            0x84562cd736a51203l },
-          { 0x42a57e7c3cacc002l,0x794a47751b1e25a3l,0x2c2ab68cac0d4356l,
-            0xececb6addb31afdcl },
-          0 },
-        /* 44 << 192 */
-        { { 0x47a5f010b4c21bfel,0x45c5610f0ac3dc20l,0x20e689fcea3bf4dcl,
-            0xf244ea49fb5f46e4l },
-          { 0xd918e59e8ca38e45l,0x7d6c601d96189a6fl,0x1a40f03854138471l,
-            0xfe867d7308a9d034l },
-          0 },
-        /* 45 << 192 */
-        { { 0x3b49e489100c0410l,0x8831d3992adc2b29l,0xb6726cd1247a8116l,
-            0x83a71a59d1d56d8el },
-          { 0x82ade2fe5cd333e9l,0x3b087ef83ea11f1al,0x17b96ca66ce879cel,
-            0xc2f74a971871dc43l },
-          0 },
-        /* 46 << 192 */
-        { { 0xa11a1e3680b576cel,0xf91278bbce2683e8l,0xc3bab95fbae8bc5bl,
-            0x642ca26397351715l },
-          { 0x5ffc14726fecbbc1l,0x2465e996a23f36d4l,0x06fc53bf5187d428l,
-            0x54b4014351fbce91l },
-          0 },
-        /* 47 << 192 */
-        { { 0x081ca6f0eafc7b2cl,0x1ba047a38c48703fl,0xe84865046663accfl,
-            0xde1f97568d43689cl },
-          { 0xf5373e1d5bc19f75l,0x4e48c493d64b0a54l,0x0c43f4e25807dbf6l,
-            0x73bef15167778c36l },
-          0 },
-        /* 48 << 192 */
-        { { 0xca6c0937b1b76ba6l,0x1a2eab854d2026dcl,0xb1715e1519d9ae0al,
-            0xf1ad9199bac4a026l },
-          { 0x35b3dfb807ea7b0el,0xedf5496f3ed9eb89l,0x8932e5ff2d6d08abl,
-            0xf314874e25bd2731l },
-          0 },
-        /* 49 << 192 */
-        { { 0x9d5322e89e9bba53l,0xdd7c9ceb989ff350l,0xd76147eadab0d7b3l,
-            0x8e45b1c6d7a9a9a1l },
-          { 0x8f896a91d4f10c10l,0x999a73c54068de06l,0x84a9d0839cf0a779l,
-            0x4d7cc7689f608ab2l },
-          0 },
-        /* 51 << 192 */
-        { { 0x1833ccddaee93c82l,0x6a05ef7b9f35f20fl,0xc538dac9ae413bc2l,
-            0x1e74f4658b4784bdl },
-          { 0xccb2bc4a49ffd544l,0x9b88183d2b17ae88l,0x96037a136e43824fl,
-            0xbbb61441480bf3dfl },
-          0 },
-        /* 52 << 192 */
-        { { 0x13319d20e090ad42l,0x4ff3186e12cbb719l,0xf38e504913fc0a46l,
-            0x83185a1254e60378l },
-          { 0x08c4057797ea8935l,0x7b2212a946b614f9l,0xedcdfa520634cfb3l,
-            0xdbc60eed9e7d5726l },
-          0 },
-        /* 53 << 192 */
-        { { 0x9b0785c6c7e1070fl,0xec112f53cbf561e5l,0xc93511e37fab3464l,
-            0x9e6dc4da9de8e0c2l },
-          { 0x7733c425e206b4eel,0xb8b254ef50cedf29l,0xfaee4bbbd50ad285l,
-            0x216e76d58c4eb6cfl },
-          0 },
-        /* 55 << 192 */
-        { { 0x9d6a28641d51f254l,0x26c5062a0c2822c3l,0xd74ebba8334bf4eel,
-            0x6e5446eb0b8f7305l },
-          { 0x5988ae8eb629beccl,0x71e576d0a1de7d1dl,0x15e39592a8873970l,
-            0x2b1f9a9342ecc74el },
-          0 },
-        /* 57 << 192 */
-        { { 0xcbdb70727c519bf9l,0x112986bbcaaf48e6l,0x64d4c6d1a13baf3cl,
-            0x85ccf6f7a065e77el },
-          { 0x183be337749beaedl,0xb3703096cba6c9b1l,0x1edf81f0e42b8afel,
-            0xf04ed594ccb73ad7l },
-          0 },
-        /* 59 << 192 */
-        { { 0xfa954ebc38491e9fl,0xf75a5808d32f0b03l,0x196d4a828083b9d3l,
-            0x92d5a0be5e8dc9fel },
-          { 0x4a507ae9aea628bal,0xeea5861e11a02fb5l,0xa033b84fd23ec8f7l,
-            0x1a68c36ec60f11d5l },
-          0 },
-        /* 60 << 192 */
-        { { 0x3dfb55bdab920ef2l,0xe0090971e6244484l,0xdc39fd08f7c6e1a3l,
-            0x1ca765356ee79e72l },
-          { 0x472c8985287d590cl,0x67635e35ad6daeb4l,0x06ec4e7980f9fee3l,
-            0x0aceb39921dc5fdbl },
-          0 },
-        /* 61 << 192 */
-        { { 0xdb2478fd9410a756l,0xd106aefe3a53a1e6l,0x1f4c940d14286333l,
-            0x6a98659d04950958l },
-          { 0x3232a1c6a6bbe060l,0x19ad132ca5e7ca9bl,0x3c9c13ef800fae29l,
-            0x9b0d9068b8660f49l },
-          0 },
-        /* 63 << 192 */
-        { { 0x1e7f043795c53027l,0x5221e5c0da9a3806l,0xf297d8e379d9385fl,
-            0x4d69e95f78ba697el },
-          { 0xdda936cee76d13c1l,0xd9a5790a485b12f5l,0xeab84add51efbfd0l,
-            0xc9a3ee9ca9f44aa4l },
-          0 },
-        /* 64 << 192 */
-        { { 0xefb26a753f73f449l,0x1d1c94f88d44fc79l,0x49f0fbc53bc0dc4dl,
-            0xb747ea0b3698a0d0l },
-          { 0x5218c3fe228d291el,0x35b804b543c129d6l,0xfac859b8d1acc516l,
-            0x6c10697d95d6e668l },
-          0 },
-        /* 65 << 192 */
-        { { 0x8c12e87a15454db4l,0xbc1fc546908e8fbcl,0xc35d83c7e4cf1636l,
-            0xcb2f5ac820641524l },
-          { 0x2400aae2e644ecd0l,0x9b01e2d14be37119l,0x6cffd52831b54857l,
-            0xb3fd5d864b5cbf81l },
-          0 },
-        /* 71 << 192 */
-        { { 0x2e999a4739709fb9l,0x4cb4bbdb62c2b30fl,0x4c7259ac09de0c92l,
-            0x73c1e34f8c59a0ffl },
-          { 0x0a9e5f2e48cb0a12l,0x5e07449fcf499bb0l,0x0527a8b4b02c4a54l,
-            0x7381287159da01e4l },
-          0 },
-        /* 77 << 192 */
-        { { 0xe0b876ca0548ff87l,0x74b5a9b25e03bae3l,0xd5564cc5dd0642d2l,
-            0x29ed211b668c4977l },
-          { 0xf29d3b7aa7422b11l,0x17f2d3586d29b8bal,0x2e35cdda2bb887del,
-            0x650f148078e4444bl },
-          0 },
-        /* 83 << 192 */
-        { { 0x8c75532fb47435ebl,0x2234e2c5a113f905l,0x27b75fea31508ae9l,
-            0x09733e40d489ad0bl },
-          { 0x73b38464a1b06da1l,0x0aed522dc5b7ccf2l,0xcc04783e78d7e5afl,
-            0xa81c8a8ff23eaab7l },
-          0 },
-        /* 89 << 192 */
-        { { 0x6bb5eca73c149ffal,0x4593d851c536487al,0x3675daaad85eb9edl,
-            0xbf65d0f9b8a58ffbl },
-          { 0x1dc6ddddc22e83eel,0xb673397ee10d3c17l,0x6bdc20600ca62c93l,
-            0x260389c30b821f6dl },
-          0 },
-        /* 95 << 192 */
-        { { 0x45f5cf07b417be10l,0x0acb1a44e5d561d8l,0x54b7baeafb1dfbe9l,
-            0x0e6e66219044672el },
-          { 0xa9b6db6d9a793601l,0xd70eadb8a4a0ba4al,0xaedace846098b89el,
-            0x970f2c23ac39d40fl },
-          0 },
-        /* 101 << 192 */
-        { { 0x9dff8d289c7eaaa8l,0x38bcd076db0cc361l,0x25760147cdea9db8l,
-            0x44c89dd40163f343l },
-          { 0x18815d7544db8365l,0xa186d57b37f3e4b3l,0xa71de7806e84a7fal,
-            0xf1c08989e56646b3l },
-          0 },
-        /* 107 << 192 */
-        { { 0xad73e1448fb56a43l,0x078c14fb715543c9l,0xa57770fd64b92d54l,
-            0xf0420a9277e9b919l },
-          { 0xc660d0cb588ccc1dl,0x069baa1471415c2el,0x747438dc32982740l,
-            0x4782ce08767381eel },
-          0 },
-        /* 113 << 192 */
-        { { 0xc2a1ee5fdb3b6b5dl,0x08ce544820e1339fl,0x3cb954b77073955fl,
-            0xb9ed2ee7f32d0832l },
-          { 0xc0a998b1b4aac98el,0x4912273dbca4bac7l,0xac0f5014c3f92c4al,
-            0xbf3dc27f9e916e78l },
-          0 },
-        /* 116 << 192 */
-        { { 0x222c7bae28833944l,0xbb78a867f5e3cf67l,0x590cbd96faf6cfd6l,
-            0x1c50aecb3b0d842el },
-          { 0x8f2c5df1dbade9a5l,0x60923fb7e3840cecl,0xe8f2db6b03a67512l,
-            0x90af187be0d7c628l },
-          0 },
-        /* 119 << 192 */
-        { { 0xb4162b615fee3ccbl,0xe9786e7d7327e651l,0x6c85bd938812d9c1l,
-            0xfe4905083dc9e838l },
-          { 0xe66f25178a6765dfl,0x72fd294edeee184cl,0x07608bd27b6ec227l,
-            0x9df7b664dfdaa5e6l },
-          0 },
-        /* 125 << 192 */
-        { { 0x4aea16602d53a155l,0x7285069a32ab07fdl,0xf6f3000d8b6fcd19l,
-            0x010b1f246e98953fl },
-          { 0xe180bc559f9aa221l,0x7717ee383cba4534l,0x5997f3aa36cbda06l,
-            0x54c6090064a04b05l },
-          0 },
-    },
-    {
-        /* 0 << 200 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 200 */
-        { { 0x25914f7881fdad90l,0xcf638f560d2cf6abl,0xb90bc03fcc054de5l,
-            0x932811a718b06350l },
-          { 0x2f00b3309bbd11ffl,0x76108a6fb4044974l,0x801bb9e0a851d266l,
-            0x0dd099bebf8990c1l },
-          0 },
-        /* 3 << 200 */
-        { { 0xebd6a6777b0ac93dl,0xa6e37b0d78f5e0d7l,0x2516c09676f5492bl,
-            0x1e4bf8889ac05f3al },
-          { 0xcdb42ce04df0ba2bl,0x935d5cfd5062341bl,0x8a30333382acac20l,
-            0x429438c45198b00el },
-          0 },
-        /* 4 << 200 */
-        { { 0xfb2838be67e573e0l,0x05891db94084c44bl,0x9131137396c1c2c5l,
-            0x6aebfa3fd958444bl },
-          { 0xac9cdce9e56e55c1l,0x7148ced32caa46d0l,0x2e10c7efb61fe8ebl,
-            0x9fd835daff97cf4dl },
-          0 },
-        /* 5 << 200 */
-        { { 0x6c626f56c1770616l,0x5351909e09da9a2dl,0xe58e6825a3730e45l,
-            0x9d8c8bc003ef0a79l },
-          { 0x543f78b6056becfdl,0x33f13253a090b36dl,0x82ad4997794432f9l,
-            0x1386493c4721f502l },
-          0 },
-        /* 7 << 200 */
-        { { 0xe566f400b008733al,0xcba0697d512e1f57l,0x9537c2b240509cd0l,
-            0x5f989c6957353d8cl },
-          { 0x7dbec9724c3c2b2fl,0x90e02fa8ff031fa8l,0xf4d15c53cfd5d11fl,
-            0xb3404fae48314dfcl },
-          0 },
-        /* 9 << 200 */
-        { { 0xf02cc3a9f327a07fl,0xefb27a9b4490937dl,0x81451e96b1b3afa5l,
-            0x67e24de891883be4l },
-          { 0x1ad65d4770869e54l,0xd36291a464a3856al,0x070a1abf7132e880l,
-            0x9511d0a30e28dfdfl },
-          0 },
-        /* 10 << 200 */
-        { { 0xfdeed650f8d1cac4l,0xeb99194b6d16bda5l,0xb53b19f71cabbe46l,
-            0x5f45af5039b9276cl },
-          { 0xd0784c6126ee9d77l,0xf7a1558b0c02ca5dl,0xb61d6c59f032e720l,
-            0xae3ffb95470cf3f7l },
-          0 },
-        /* 11 << 200 */
-        { { 0x9b185facc72a4be5l,0xf66de2364d848089l,0xba14d07c717afea9l,
-            0x25bfbfc02d551c1cl },
-          { 0x2cef0ecd4cdf3d88l,0x8cee2aa3647f73c4l,0xc10a7d3d722d67f7l,
-            0x090037a294564a21l },
-          0 },
-        /* 13 << 200 */
-        { { 0x6ac07bb84f3815c4l,0xddb9f6241aa9017el,0x31e30228ca85720al,
-            0xe59d63f57cb75838l },
-          { 0x69e18e777baad2d0l,0x2cfdb784d42f5d73l,0x025dd53df5774983l,
-            0x2f80e7cee042cd52l },
-          0 },
-        /* 15 << 200 */
-        { { 0x43f18d7f4d6ee4abl,0xd3ac8cde9570c3dcl,0x527e49070b8c9b2al,
-            0x716709a7c5a4c0f1l },
-          { 0x930852b0916a26b1l,0x3cc17fcf4e071177l,0x34f5e3d459694868l,
-            0xee0341aba28f655dl },
-          0 },
-        /* 16 << 200 */
-        { { 0xf431f462060b5f61l,0xa56f46b47bd057c2l,0x348dca6c47e1bf65l,
-            0x9a38783e41bcf1ffl },
-          { 0x7a5d33a9da710718l,0x5a7799872e0aeaf6l,0xca87314d2d29d187l,
-            0xfa0edc3ec687d733l },
-          0 },
-        /* 17 << 200 */
-        { { 0x4b764317aa365220l,0x7a24affe68cc0355l,0x76732ed0ceb3df5el,
-            0x2ce1332aae096ed0l },
-          { 0x89ce70a7b8adac9dl,0xfdddcf05b3fc85c8l,0xbd7b29c6f2ee8bfel,
-            0xa1effcb9457d50f3l },
-          0 },
-        /* 19 << 200 */
-        { { 0x6053972dac953207l,0xc2ca9a8408ad12f6l,0x9ed6cd386ba36190l,
-            0xa5b50a48539d18a4l },
-          { 0xd9491347dbf18c2al,0x2cdce4662e9697cfl,0x4e97db5ca9e31819l,
-            0x0fb02e2d4c044b74l },
-          0 },
-        /* 21 << 200 */
-        { { 0x66a4dd414aa5e9ddl,0x6ec7576e64f6aeb9l,0x3f08ce06c7e980b5l,
-            0x52fe9fd6c1a2aa7el },
-          { 0xfe46e6d95074326al,0xd570ed734c126c1dl,0x86c7ec257217d55al,
-            0x3cb434057c3de2b2l },
-          0 },
-        /* 23 << 200 */
-        { { 0x48e0295dcc9e79bfl,0x2419485693eb403dl,0x9386fb7709dd8194l,
-            0xb6e89bb101a242f6l },
-          { 0xc7994f3924d308d7l,0xf0fbc392de673d88l,0x43eed52ea11abb62l,
-            0xc900f9d0c83e7fbel },
-          0 },
-        /* 25 << 200 */
-        { { 0x214a10dca8152891l,0xe6787b4c64f1abb2l,0x276333d9fa1a10edl,
-            0xc0e1c88e47dbccbcl },
-          { 0x8a3c37c4849dd12el,0x2144a8c8d86e109fl,0xbb6891f7286c140cl,
-            0xb0b8c5e29cce5e6fl },
-          0 },
-        /* 27 << 200 */
-        { { 0x3f9e0e3499753288l,0x6b26f1ebe559d93al,0x647fe21d9841faf1l,
-            0x48a4b6efa786ea02l },
-          { 0x6e09cd22665a882dl,0x95390d81b63ccda6l,0x5b014db4b026a44al,
-            0x5b96efb22ad30ff1l },
-          0 },
-        /* 28 << 200 */
-        { { 0x64c50c8b4a3b99e9l,0x2489a675d0a26f4fl,0xe2aacaeed85bc6fdl,
-            0x556882038a6019bal },
-          { 0x7ceb9da645cfac07l,0xe1ad3d25652dbd09l,0x086adf348d3b5d2bl,
-            0xf9256d8aec3654a0l },
-          0 },
-        /* 29 << 200 */
-        { { 0x571c246bf009a690l,0x8fe54231ccd90d3al,0x8adde6adfe173b79l,
-            0x75d9a392b05a5e3bl },
-          { 0x607f47b0d1bb3a84l,0xe4e3b472058e691al,0xfc0f793bf3d956e3l,
-            0x6a6730b605de54dal },
-          0 },
-        /* 31 << 200 */
-        { { 0x4daf7f540d80aaa1l,0xc571d04c229c4574l,0x469e2da5fffca53dl,
-            0x9fffe29513ff7f59l },
-          { 0x2075da5a33a254f7l,0x769f33acd35e575dl,0x7b940d2c3d35001al,
-            0x2d606b57e34c95b7l },
-          0 },
-        /* 33 << 200 */
-        { { 0xc7e4f8b899365f86l,0x8f6f959faae69527l,0x749ffedffdfaeeeal,
-            0x2b91f0221b54c2a0l },
-          { 0xe75c2352addbdf83l,0xe7329922fff2694cl,0xbb65ae06badadeacl,
-            0x16cbb9d1f56be3b5l },
-          0 },
-        /* 34 << 200 */
-        { { 0xb100a4c67a07bd70l,0x222fee7634787efel,0xa4dafc14f1e79d1bl,
-            0x0d3a82dad18b8be4l },
-          { 0xe0181445fc06922fl,0x0873d99b714a90b6l,0xdf43082fa5087a0el,
-            0x195e49367399e0dbl },
-          0 },
-        /* 35 << 200 */
-        { { 0x7e83545aae6fcc9cl,0x1a24fce819e15ce2l,0x4a3465c536d8c6a8l,
-            0xd1e5f24109436ae0l },
-          { 0xed334bfc6be463d5l,0xc46a600b934fbdcfl,0xbd2fd65b920321ffl,
-            0x953fa91767fa154el },
-          0 },
-        /* 36 << 200 */
-        { { 0x5dca4995f93ddad1l,0x061efcabf72470c2l,0xad78d54d5e7e0741l,
-            0xa91f4e839c4e0ab4l },
-          { 0xdd4403af5c75aa0dl,0x4308c8ee13c69113l,0x3a3b66f51ebc36adl,
-            0xc07cc3f0f4bf777al },
-          0 },
-        /* 37 << 200 */
-        { { 0x3fd1963e37a86b32l,0x22e236d60bd0880el,0xb87467cf89f0fa5cl,
-            0x85b9c6c0310e0265l },
-          { 0x82979a96783459ael,0xd19b0919bd529ed3l,0xa21f771808434f94l,
-            0x3dd130a9195369c6l },
-          0 },
-        /* 39 << 200 */
-        { { 0xc61e62767915d157l,0xc48244279e07fb0el,0x8980c1cc8420ea49l,
-            0x10d82e4a588d4e2bl },
-          { 0xdddecd52b17eff2dl,0xe44c7b2ded8492a4l,0x96ca89ebb9bea6afl,
-            0x724166fe1b03ed03l },
-          0 },
-        /* 40 << 200 */
-        { { 0xfc87975f8fb54738l,0x3516078827c3ead3l,0x834116d2b74a085al,
-            0x53c99a73a62fe996l },
-          { 0x87585be05b81c51bl,0x925bafa8be0852b7l,0x76a4fafda84d19a7l,
-            0x39a45982585206d4l },
-          0 },
-        /* 41 << 200 */
-        { { 0x8bbc484ed551f3e1l,0x6e058a90b7eb06d2l,0xfaccd9a0e5cd281al,
-            0xe7661b78d5b44900l },
-          { 0x03afe115725fde22l,0xbe929230c7229fd1l,0x5cd0d16a0000035el,
-            0x1f6a9df0c8f5a910l },
-          0 },
-        /* 43 << 200 */
-        { { 0xe54bbcfd535dfc82l,0x89be0b89a9012196l,0xa67831ee71011beal,
-            0x2ea7a8292db43878l },
-          { 0xff7c144378ffe871l,0xa67dc3d4c63f65eal,0xbbfc7fc2a1527419l,
-            0x6440380bf6c36b8fl },
-          0 },
-        /* 44 << 200 */
-        { { 0x71ab9f69d812d7e6l,0x2847c5516e142126l,0x9e27755bb31e7753l,
-            0xb89533e2943b8c7fl },
-          { 0xbe7f0c6e14fa7dc6l,0x782a06388cee1f7al,0x7069292938e13a6bl,
-            0x1e1221f0c63f4d28l },
-          0 },
-        /* 45 << 200 */
-        { { 0x9030aa9a63a431f4l,0x0fa7b5d45039a318l,0x6a0cf40af083687dl,
-            0x46689cec659fa752l },
-          { 0x8259727a456fa97el,0x4f618a355b08d7fcl,0x2c44217b72028d15l,
-            0x8083b09935111e32l },
-          0 },
-        /* 46 << 200 */
-        { { 0xaa5976523b5b29f1l,0xb07f10ab37432a54l,0x16e3e2236e36556fl,
-            0xf1c7c9bd47cd4586l },
-          { 0xa4eef99d3f87216dl,0x4e54d3c52e1eaa79l,0x534c5901d2540d91l,
-            0x718df7c9b6f0fcfcl },
-          0 },
-        /* 47 << 200 */
-        { { 0x99497f8a2eb0ee3bl,0x87e550c1caeb3a20l,0xd23e053dfb91627cl,
-            0xb971c043873124e6l },
-          { 0x3581ab853b16e467l,0x24541c926145187bl,0x4423ec5c010c2527l,
-            0x775f13029fa82a68l },
-          0 },
-        /* 48 << 200 */
-        { { 0x499b6ab65eb03c0el,0xf19b795472bc3fdel,0xa86b5b9c6e3a80d2l,
-            0xe43775086d42819fl },
-          { 0xc1663650bb3ee8a3l,0x75eb14fcb132075fl,0xa8ccc9067ad834f6l,
-            0xea6a2474e6e92ffdl },
-          0 },
-        /* 49 << 200 */
-        { { 0xbaebdd8a0c40aec4l,0x5eccafb563e8cfd0l,0x1c204c0eb5159938l,
-            0x607109d34b996aa9l },
-          { 0x024c6c4b9cef59fel,0xbc846e216ed4b6f1l,0xf6a50ff3ff652c0al,
-            0x368af2c72d95220cl },
-          0 },
-        /* 51 << 200 */
-        { { 0xec9c2e35cbd3ccafl,0xb9eeff3ddcda8f30l,0x82012e191062d02el,
-            0xed964cc94efc6b6el },
-          { 0x8853ea0a6bf54c22l,0xea40fcc0f3cbe264l,0x21f9c01ddecf114el,
-            0x05e754c63da71e59l },
-          0 },
-        /* 52 << 200 */
-        { { 0xe6a26d38046dfc72l,0x70409579c2175175l,0x2a575ac5d44e0c1dl,
-            0xb35395e01479ab5al },
-          { 0x1550a5d4f7bfbd8el,0x01daeb680778807bl,0xe0aa940321294dbal,
-            0x84bcdc8c5b5a93b7l },
-          0 },
-        /* 53 << 200 */
-        { { 0x876cc4d2520f04abl,0x6e320f5da85ff6a8l,0x7c504720ce17bc80l,
-            0xe7907079a62089f9l },
-          { 0xa45c4ac7bca45feel,0xd8f3facd5bd54b0cl,0xc0b036277b3e4a24l,
-            0xaabe96dfe4cd4b57l },
-          0 },
-        /* 55 << 200 */
-        { { 0xdc85a54773862ce4l,0x169051a3cc6f5d85l,0x8e3d3be0355f4df7l,
-            0xa139d6fac72bac76l },
-          { 0xddc95d0dfeb0a6f0l,0xd53f70e545cd6955l,0x18eede5e47e54112l,
-            0x4a135dc9cbc6a52el },
-          0 },
-        /* 57 << 200 */
-        { { 0x705a08ba90a58fb4l,0x10eef880fb3f8a64l,0x4ced9ba2f8e585ffl,
-            0xb4f0f955fc6ebef5l },
-          { 0x152c1a338d8b739el,0xb2be701db495bee5l,0xd27141a8d3540a74l,
-            0x20c8a00247f9e9d7l },
-          0 },
-        /* 59 << 200 */
-        { { 0x6d5ae921f5adcb3fl,0xaed1047003a3b610l,0x7c75e36f22256df9l,
-            0xe664b36fb97dae99l },
-          { 0x138b5eca91e746ael,0xb3e01ef5648674a7l,0xa3f256da9e375c74l,
-            0xa00e82bc6a82d6f3l },
-          0 },
-        /* 60 << 200 */
-        { { 0xe7a01eae6e28b4a8l,0xb3bf8224782166c9l,0x0b7ba2a06a244510l,
-            0x9751a69c2abbb4dbl },
-          { 0xb611adc1b3f9fcbcl,0x1d08eb3b436c4675l,0x1c71e98a20f96a64l,
-            0x33d9b58c7ffd3f08l },
-          0 },
-        /* 61 << 200 */
-        { { 0x7c7b03c1affa2d6cl,0x5f189bb9aec6e624l,0xe77a1eedadeff5e7l,
-            0xfc58b90f4280b467l },
-          { 0x561e5d579b71cb4el,0x8ed767aa36d6a17el,0x38d8671e8aa9e188l,
-            0x7bc68f07a95350c0l },
-          0 },
-        /* 63 << 200 */
-        { { 0xe0cd38cf98c01384l,0xc6741123a4226d9fl,0xdd1d42dbf877a0b8l,
-            0xc5986ef0110b3cbal },
-          { 0xeba949f809c8cebel,0x96b47bc4bd39f1dcl,0xbad140b6e07a2a3cl,
-            0x2a8d80999ac5ca8al },
-          0 },
-        /* 64 << 200 */
-        { { 0x39d934abd3c095f1l,0x04b261bee4b76d71l,0x1d2e6970e73e6984l,
-            0x879fb23b5e5fcb11l },
-          { 0x11506c72dfd75490l,0x3a97d08561bcf1c1l,0x43201d82bf5e7007l,
-            0x7f0ac52f798232a7l },
-          0 },
-        /* 65 << 200 */
-        { { 0x8cf27618590ca850l,0x58134f6f44bb94f2l,0x0a147562b78b4eecl,
-            0x2e5986e39f1ed647l },
-          { 0x9becf893348393b0l,0xaea21b92c31c2a86l,0x3d69859e5ff1b9a6l,
-            0x6fcd19f4cd805691l },
-          0 },
-        /* 71 << 200 */
-        { { 0x81619bd4841f43c3l,0x3a3325538e5c61f0l,0x2b68921eda862151l,
-            0x97f5c8a741a491f8l },
-          { 0x8b452094d3b9afa0l,0x93b2b7b4f2124dbcl,0x53285e7d26e0e26dl,
-            0x3f003fc5c8a24edel },
-          0 },
-        /* 77 << 200 */
-        { { 0x4cdabb586c025824l,0x5935ad1586bfcd7dl,0x8ce2c3101b7c5533l,
-            0x761c9fe96cae8808l },
-          { 0x8a0723f5d9e66d70l,0xb640b323dcced11dl,0x5768528051ae548cl,
-            0x83576f75d53f3f2cl },
-          0 },
-        /* 83 << 200 */
-        { { 0xc715edc47b532ec3l,0x159765e6c4a6e14bl,0x4a74f15228cd2d45l,
-            0xbfd309edae8c753bl },
-          { 0xf56bb5315d6d5245l,0x2c89c21833b30a55l,0xe436141acd4ed5fal,
-            0x7eb7a5c707868ee6l },
-          0 },
-        /* 89 << 200 */
-        { { 0x9a3ad3ffb0c7c48cl,0x25e8d977738e3638l,0xbb6c6c9d1c024074l,
-            0xeda1ac0f8cfdf416l },
-          { 0x93059ba538de49e2l,0xdb199cfc1b9ce741l,0x49b05e9446f3b494l,
-            0x717cafc606480902l },
-          0 },
-        /* 95 << 200 */
-        { { 0x8d27421052885708l,0x9d2297fd74e5b9b5l,0xe7cb6a68dc4d7318l,
-            0x0b60b0d276357b31l },
-          { 0x57301994532c2095l,0xfbae2ba203373452l,0xe8020b20ba700583l,
-            0x1ca7772c2988919cl },
-          0 },
-        /* 101 << 200 */
-        { { 0x723296eb918f3eecl,0x358c9ff0b79901c6l,0x64a1934c8d5e814cl,
-            0x7e5a9afced165177l },
-          { 0xd783840168733e7al,0xfcf3c0b6f61ede6dl,0x94ec0bf08434e804l,
-            0xa5a70153c192c1cdl },
-          0 },
-        /* 107 << 200 */
-        { { 0x03cdf976c23e49d4l,0x51e5cfa5a2ae72d5l,0x7716faa3100f7a51l,
-            0xc53153a2c14dc015l },
-          { 0xe7c69b052b47ec18l,0xff4756907ea93b01l,0x55fde3c540a2f205l,
-            0x0263d0b12f85aed6l },
-          0 },
-        /* 113 << 200 */
-        { { 0x668c56619686fe30l,0x382a8ccd8f73a476l,0xda012cbfb40a85e7l,
-            0x55ea1e72e9e88b91l },
-          { 0x8312556088cc5afcl,0x44ae54cbc45b19c7l,0xc91fffa8f86a02cdl,
-            0xc79f573752d7e89bl },
-          0 },
-        /* 116 << 200 */
-        { { 0x652b50523e357579l,0x08ce7d3a2afe5746l,0x9dc1cca6f71a12efl,
-            0x80a221c24f6c4196l },
-          { 0xdde40eff0f49f508l,0x7995bb46913b0dc3l,0x4adbdeb385e44f6el,
-            0x6816bb3ab222e4bbl },
-          0 },
-        /* 119 << 200 */
-        { { 0xce1ee518579a1a4dl,0x5d86e8912bc3870al,0x230878d18da907c4l,
-            0xc648392777ae7ea8l },
-          { 0x64319653016c0ad7l,0x7cbfa0b0b71f20dal,0xbf087dc3395ed4d8l,
-            0x59512add307d218dl },
-          0 },
-        /* 125 << 200 */
-        { { 0x7378a969d8ae335el,0x11c69965506d3a42l,0x212539769949468al,
-            0x570cf87e64995050l },
-          { 0xf300ad2e30b94e22l,0xbc159cf8f36dad32l,0xdff3b3767ca8aa6al,
-            0xa5de93b5627fb9e7l },
-          0 },
-    },
-    {
-        /* 0 << 208 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 208 */
-        { { 0x75d9bc15adf7cccfl,0x81a3e5d6dfa1e1b0l,0x8c39e444249bc17el,
-            0xf37dccb28ea7fd43l },
-          { 0xda654873907fba12l,0x35daa6da4a372904l,0x0564cfc66283a6c5l,
-            0xd09fa4f64a9395bfl },
-          0 },
-        /* 3 << 208 */
-        { { 0xc51aa29e5cfe5c48l,0x82c020ae815ee096l,0x7848ad827549a68al,
-            0x7933d48960471355l },
-          { 0x04998d2e67c51e57l,0x0f64020ad9944afcl,0x7a299fe1a7fadac6l,
-            0x40c73ff45aefe92cl },
-          0 },
-        /* 4 << 208 */
-        { { 0xe5f649be9d8e68fdl,0xdb0f05331b044320l,0xf6fde9b3e0c33398l,
-            0x92f4209b66c8cfael },
-          { 0xe9d1afcc1a739d4bl,0x09aea75fa28ab8del,0x14375fb5eac6f1d0l,
-            0x6420b560708f7aa5l },
-          0 },
-        /* 5 << 208 */
-        { { 0xbf44ffc75488771al,0xcb76e3f17f2f2191l,0x4197bde394f86a42l,
-            0x45c25bb970641d9al },
-          { 0xd8a29e31f88ce6dcl,0xbe2becfd4bb7ac7dl,0x13094214b5670cc7l,
-            0xe90a8fd560af8433l },
-          0 },
-        /* 7 << 208 */
-        { { 0x0ecf9b8b4ebd3f02l,0xa47acd9d86b770eal,0x93b84a6a2da213cel,
-            0xd760871b53e7c8cfl },
-          { 0x7a5f58e536e530d7l,0x7abc52a51912ad51l,0x7ad43db02ea0252al,
-            0x498b00ecc176b742l },
-          0 },
-        /* 9 << 208 */
-        { { 0x9ff713ef888ae17fl,0x6007f68fb34b7bebl,0x5d2b18983b653d64l,
-            0xcbf73e91d3ca4b1bl },
-          { 0x4b050ad56cdfb3a1l,0x41bd3ec3d1f833a4l,0x78d7e2ee719d7bf5l,
-            0xea4604672a27412el },
-          0 },
-        /* 10 << 208 */
-        { { 0x7dad6d1b42cd7900l,0xb6e6b439e058f8a4l,0x8836f1e662aa3bbcl,
-            0xd45bf2c811142b0al },
-          { 0xae324bac3c045ed1l,0x372be24d270a8333l,0xeeda7a3a6b7c73b6l,
-            0xf6675402db49562al },
-          0 },
-        /* 11 << 208 */
-        { { 0xc312ba68441e760dl,0x84d0d061a50e512el,0xfe764f4e4bbdd849l,
-            0xa924adcf9dadd5c0l },
-          { 0x08685961debfe976l,0xd3d846c529fba601l,0x43bf8227dc3f4040l,
-            0x05e767b8a49e9ff5l },
-          0 },
-        /* 13 << 208 */
-        { { 0xc4689c309953e453l,0x5e355a2e1712dca5l,0x1ff83c81f1cd96f7l,
-            0xb06b89fb44cf56dbl },
-          { 0x1827705365f16e0dl,0x6403b91de5618672l,0xba3f9475be384bc6l,
-            0x7f691cbe303ce5f3l },
-          0 },
-        /* 15 << 208 */
-        { { 0x4589ba03210f4045l,0xd5e7366301e8012al,0x1c26052d74462ffal,
-            0xe78f600c4f989519l },
-          { 0xc63ca0c97cee0b2fl,0xbe588573af760b5fl,0x05906fc4593773cdl,
-            0xd5970fb0e322d5afl },
-          0 },
-        /* 16 << 208 */
-        { { 0x103c46e60ebcf726l,0x4482b8316231470el,0x6f6dfaca487c2109l,
-            0x2e0ace9762e666efl },
-          { 0x3246a9d31f8d1f42l,0x1b1e83f1574944d2l,0x13dfa63aa57f334bl,
-            0x0cf8daed9f025d81l },
-          0 },
-        /* 17 << 208 */
-        { { 0xf67c098aae0690aal,0x1a4656422b7bc62bl,0xaffc6b917220dea2l,
-            0xd97ac543d2552deel },
-          { 0x1f84514a7e816b8el,0xe9887e81a8f38552l,0x2e6358e6847ad46bl,
-            0x1f67871e6bc9895el },
-          0 },
-        /* 19 << 208 */
-        { { 0x2462b6e0d47f43fal,0x71db3610d8a245e5l,0x0c26b0e734208974l,
-            0x0cd6d49d2029bd2el },
-          { 0xf207c9f6091922b8l,0x0c476c5c7f0fbf66l,0x6de7efb2295d6da8l,
-            0xea054ee10ced6cfel },
-          0 },
-        /* 21 << 208 */
-        { { 0xd21496e3e9bd795cl,0xf293f617c6a557del,0x9d041b7239a45642l,
-            0xe8353dab4ac87f80l },
-          { 0x21e9f35620d8d019l,0x1f4adca9d2fb2668l,0xe5f68227dfecd64al,
-            0x10d71b79d7f09ec0l },
-          0 },
-        /* 23 << 208 */
-        { { 0xca3f068999f87118l,0x99a933911b2417f0l,0xa383481a3d1f70e5l,
-            0x7a31a6c833b14414l },
-          { 0x9d60f4368b2a9931l,0xd4c97ded80588534l,0x7cb29e82ab6a8bdal,
-            0x3799bdad97b4c45al },
-          0 },
-        /* 25 << 208 */
-        { { 0x51da0ff629011af3l,0xcbb03c809a4f0855l,0xea3536725555b10bl,
-            0x4bf94e025c7da97el },
-          { 0x384352f5ff713300l,0xb2c2b675192d41e6l,0x4ff66861625ca046l,
-            0xf0f5e472013dddc4l },
-          0 },
-        /* 27 << 208 */
-        { { 0x38c44cdc59987914l,0xad7f2829757fb853l,0x9aabf1c8688e3342l,
-            0xbe0f1e4ef534c850l },
-          { 0x732cac652ec24ecal,0x9328b657933bb5e4l,0xe2747ff60bb31033l,
-            0xdbaab72cfcdc36acl },
-          0 },
-        /* 28 << 208 */
-        { { 0x0e5e3049a639fc6bl,0xe75c35d986003625l,0x0cf35bd85dcc1646l,
-            0x8bcaced26c26273al },
-          { 0xe22ecf1db5536742l,0x013dd8971a9e068bl,0x17f411cb8a7909c5l,
-            0x5757ac98861dd506l },
-          0 },
-        /* 29 << 208 */
-        { { 0xaf410d5aac66a3e8l,0x39fcbffb2031f658l,0xd29e58c947ce11fbl,
-            0x7f0b874965f73e49l },
-          { 0xedc30f4b27fea6c6l,0xe03b9103d2baa340l,0xa7bb3f17ae680612l,
-            0xe06656a8197af6f0l },
-          0 },
-        /* 31 << 208 */
-        { { 0x84562095bff86165l,0x994194e916bc7589l,0xb1320c7ec14c6710l,
-            0x508a8d7f766e978fl },
-          { 0xd04adc9ec7e1f6fel,0x7bafaff68398cecfl,0x906df2fccef3b934l,
-            0xc65afe18f3008c38l },
-          0 },
-        /* 33 << 208 */
-        { { 0x477ffeeeab983130l,0x5426363a96e83d55l,0xcf0370a15204af42l,
-            0x99834414b5a6ea8fl },
-          { 0xf475ba711ab4ee8al,0x8486da5d0102d8f2l,0x55082e713839c821l,
-            0xa57e58395b65defal },
-          0 },
-        /* 34 << 208 */
-        { { 0x34b2185bbbb33a76l,0x189038b7d48158c2l,0xfa32eb90e9e90217l,
-            0x79271771730e74dfl },
-          { 0x315ed8c2a5d01ffdl,0x9799dae723e6a95el,0x40070aa016f5715al,
-            0x40e6c0ca5ea51f8cl },
-          0 },
-        /* 35 << 208 */
-        { { 0x099c0570d8132163l,0xcd5508a3023dbbf3l,0x18162ff526bfe6a6l,
-            0xf39e071144bbb455l },
-          { 0x49664996eaa3cf96l,0x1c6442d5e2649be9l,0x6199f740c01d269dl,
-            0x4be605ee37542c11l },
-          0 },
-        /* 36 << 208 */
-        { { 0xc7313e9cf36658f0l,0xc433ef1c71f8057el,0x853262461b6a835al,
-            0xc8f053987c86394cl },
-          { 0xff398cdfe983c4a1l,0xbf5e816203b7b931l,0x93193c46b7b9045bl,
-            0x1e4ebf5da4a6e46bl },
-          0 },
-        /* 37 << 208 */
-        { { 0xd032fbfd0dbf82b4l,0x707181f668e58969l,0xef434381e7be2d5el,
-            0x290669176f2c64ddl },
-          { 0xf66cffc3772769abl,0x68d8a76a17aad01cl,0xdd3991c590f6e078l,
-            0xdb74db06ea4ac7dcl },
-          0 },
-        /* 39 << 208 */
-        { { 0x9f34a7c11c78be71l,0x7bf2f2d149ca6987l,0xb528a514dcd34afcl,
-            0x4dddb3f1183a68b1l },
-          { 0x54d2626660b83883l,0x9073e4e0e0cd8dadl,0xbd2b837d9eb818b2l,
-            0x5fa5f9086ae2e32dl },
-          0 },
-        /* 40 << 208 */
-        { { 0xf9942a6043a24fe7l,0x29c1191effb3492bl,0x9f662449902fde05l,
-            0xc792a7ac6713c32dl },
-          { 0x2fd88ad8b737982cl,0x7e3a0319a21e60e3l,0x09b0de447383591al,
-            0x6df141ee8310a456l },
-          0 },
-        /* 41 << 208 */
-        { { 0xcd02ba1e0df98a64l,0x301b6bfa03f5676el,0x41e1a8d4a2fe4090l,
-            0x489c1cbf47f0e1dcl },
-          { 0x4171a98c20760847l,0xdcb21cee77af4796l,0x5fb0f0c9d0b7e981l,
-            0x4c2791dff33b9f8dl },
-          0 },
-        /* 43 << 208 */
-        { { 0x95d7ec0c50420a50l,0x5794665c2a6756d5l,0x73558c6e9101e7f5l,
-            0xa3fa0f8c1642af0el },
-          { 0xa11b309b4ee43551l,0x3939de30cb8fc712l,0x9710f2320fde8921l,
-            0x2a4db2d5cae8b41cl },
-          0 },
-        /* 44 << 208 */
-        { { 0xaec1a039e6d6f471l,0x14b2ba0f1198d12el,0xebc1a1603aeee5acl,
-            0x401f4836e0b964cel },
-          { 0x2ee437964fd03f66l,0x3fdb4e49dd8f3f12l,0x6ef267f629380f18l,
-            0x3e8e96708da64d16l },
-          0 },
-        /* 45 << 208 */
-        { { 0xdf6cdac0bc4c78adl,0xbe9e32182e97376el,0xa37f9d8b1a139274l,
-            0x7640c3982807128el },
-          { 0xe9735166c05b5f85l,0xbccd3675100e5716l,0x51376a293e5c9682l,
-            0x95efe088848f6aeal },
-          0 },
-        /* 46 << 208 */
-        { { 0xfac2d7dd23d14105l,0xdda17149a9136f52l,0xb9f3a9c672d1a99bl,
-            0x2fcf532a142c3b20l },
-          { 0xc2731f1e61190c1bl,0x26dbe810a76509e4l,0xc96cc431908bb92fl,
-            0x5661a84d80e3e694l },
-          0 },
-        /* 47 << 208 */
-        { { 0x5194d144150ba121l,0x8de57c48b6b11561l,0x803228da96c156d9l,
-            0x2112e4250a8f6376l },
-          { 0x15436294643449ffl,0xfc3880add4118cd0l,0x16ed90731e3f7413l,
-            0xa400699901d38d6dl },
-          0 },
-        /* 48 << 208 */
-        { { 0xbc19180c207674f1l,0x112e09a733ae8fdbl,0x996675546aaeb71el,
-            0x79432af1e101b1c7l },
-          { 0xd5eb558fde2ddec6l,0x81392d1f5357753fl,0xa7a76b973ae1158al,
-            0x416fbbff4a899991l },
-          0 },
-        /* 49 << 208 */
-        { { 0xf84c9147c52d7384l,0x86391accec01efa6l,0xffd68616f9c6f3f4l,
-            0xc7536461b17c2de6l },
-          { 0xa81f4ba10121abdfl,0xa068a2e26f6eae27l,0xe0ee90350eb159f0l,
-            0x4c48f761fd8c4b9cl },
-          0 },
-        /* 51 << 208 */
-        { { 0x4b6d71e87790000cl,0xced195744ce9293el,0xc25626a3747585e8l,
-            0xb8307d22d7044270l },
-          { 0xf08e7ef6117c24cbl,0xae6403162f660d04l,0xbc3ffdcff224a2fdl,
-            0x1ebc0328d0586c7el },
-          0 },
-        /* 52 << 208 */
-        { { 0x9e65fdfd0d4a9dcfl,0x7bc29e48944ddf12l,0xbc1a92d93c856866l,
-            0x273c69056e98dfe2l },
-          { 0x69fce418cdfaa6b8l,0x606bd8235061c69fl,0x42d495a06af75e27l,
-            0x8ed3d5056d873a1fl },
-          0 },
-        /* 53 << 208 */
-        { { 0x46b160e5a6022278l,0x86b1d50cc30a51fcl,0xe898ac0e684b81b7l,
-            0x04d591e277b93597l },
-          { 0xd20cac347626e18al,0xb49c941f0a968733l,0x054e6e7e21631627l,
-            0xd6d33db9d4c716b1l },
-          0 },
-        /* 55 << 208 */
-        { { 0xaa79ab4bf91e9b75l,0x7df3235bd34d961dl,0x9f3954e6534a40e1l,
-            0x80f88d2c790b4456l },
-          { 0x98f7711b21e9fb2al,0x0a04c318877d27e6l,0x499b7c2412338848l,
-            0x0b1dbe9ccd5e7ec3l },
-          0 },
-        /* 57 << 208 */
-        { { 0xb430ff44e04715ffl,0x671358d565d076d0l,0x3946d38f22c3aa06l,
-            0x80919ea363b2d627l },
-          { 0x14ffa219e8790922l,0xfe1d895ae8d89c48l,0x717e9e51748e806el,
-            0xb91e1ddf550d711dl },
-          0 },
-        /* 59 << 208 */
-        { { 0x8aac26225f540127l,0x57cd5d7cba25f742l,0x87006a6b1df7a0fcl,
-            0x88e9ab863ecbf26cl },
-          { 0xe1b8155f9143b314l,0xc00196130b679bddl,0x819e7b61a1871d07l,
-            0xc36e7892cc2c9cc9l },
-          0 },
-        /* 60 << 208 */
-        { { 0x4b03c55b8e33787fl,0xef42f975a6384673l,0xff7304f75051b9f0l,
-            0x18aca1dc741c87c2l },
-          { 0x56f120a72d4bfe80l,0xfd823b3d053e732cl,0x11bccfe47537ca16l,
-            0xdf6c9c741b5a996bl },
-          0 },
-        /* 61 << 208 */
-        { { 0x65729b05301ee370l,0x3ed09a2a24c2824cl,0x781ef66a33481977l,
-            0xf2ccdeec193506d0l },
-          { 0x92b4f70d703422d6l,0x7f004a43f80a1b99l,0x47db23607a856445l,
-            0x783a8dd1ce5b0622l },
-          0 },
-        /* 63 << 208 */
-        { { 0x7febefd34e9aac5al,0x601c89e2bdd6173el,0x79b08930c257431el,
-            0x915d601d399ee099l },
-          { 0xfa48347eca02acd2l,0xc33249baeeb7ccedl,0xd76e408755704722l,
-            0xd3709c600dcf4878l },
-          0 },
-        /* 64 << 208 */
-        { { 0xee7332c7904fc3fal,0x14a23f45c7e3636al,0xc38659c3f091d9aal,
-            0x4a995e5db12d8540l },
-          { 0x20a53becf3a5598al,0x56534b17b1eaa995l,0x9ed3dca4bf04e03cl,
-            0x716c563ad8d56268l },
-          0 },
-        /* 65 << 208 */
-        { { 0x963353201580f3adl,0x6c495304b0cd50d4l,0xd035cdc7555ff981l,
-            0xe65cd063c6b6bdfbl },
-          { 0x7deb3cbb437e749cl,0xa9de9f3db5dc24a1l,0xe2e76a2b35c29ffal,
-            0x4d35e261323ba650l },
-          0 },
-        /* 71 << 208 */
-        { { 0x52c46fc8c89e2766l,0x7330b02bb945e5f2l,0xc77ef75c2673ebbcl,
-            0x1740e72657c33783l },
-          { 0xf0312d29623565fbl,0xff9f707af0ca1ed9l,0xb98609ca5ea51a4al,
-            0xde86b9a87b5cc91fl },
-          0 },
-        /* 77 << 208 */
-        { { 0x0dece4badca158b7l,0x5e39baf6a3e9f837l,0xcf14e6dc4d57b640l,
-            0x0548aaa4b67bcbe7l },
-          { 0xb6cf5b393c90e434l,0xf8b3c5645006f3abl,0xa74e92859bf04bd9l,
-            0xf59a3a6bf99c8977l },
-          0 },
-        /* 83 << 208 */
-        { { 0x652ca66ac5b072d5l,0x2102b55993ad4928l,0x1b5f192d88210f9bl,
-            0xb18710144c6ad7e5l },
-          { 0x3979fde3bc0abf13l,0xb5cb4c7dac3fd631l,0x4aedffa6c200ec7bl,
-            0x8aed81ceaddf3610l },
-          0 },
-        /* 89 << 208 */
-        { { 0x72b48105abeefbael,0x0e9e6e41827bb22bl,0xf45ada151e52a848l,
-            0xb8e94579534867a2l },
-          { 0x3a08773b7adb0fdcl,0xe7133a28b83316dfl,0xc8b7b08c5bb41470l,
-            0x28719eb4aaf140c7l },
-          0 },
-        /* 95 << 208 */
-        { { 0x398996cd430007cel,0x20d8c0e07642d616l,0x81566639a7eb2397l,
-            0x74aa0b692e133732l },
-          { 0x326745907ba80aa7l,0x56a491c39bd69d64l,0xc8c8b040e54dcce0l,
-            0x3f991872d571d037l },
-          0 },
-        /* 101 << 208 */
-        { { 0x70e681fa4fb595c9l,0xf0635d6386b4d97bl,0xfc029284c1347081l,
-            0x5a4e9cbe4fee0303l },
-          { 0xd43da8609c31094fl,0x0412cfed6515b4aal,0x10fc06da8d53be86l,
-            0x4b7b380b4bccc94dl },
-          0 },
-        /* 107 << 208 */
-        { { 0x560d57408e7d6738l,0xa82268a8937f12a2l,0x87787b2d3d95b463l,
-            0xb36539b2030e23bfl },
-          { 0x60d16b8fd61e761dl,0x96ba2949fe8efccdl,0x8c170eda667fa7ebl,
-            0xc880d74cf800d7c3l },
-          0 },
-        /* 113 << 208 */
-        { { 0x7c05d6c1efcbfea0l,0xae7ba3291a2f6dd8l,0x521598ed5bd42ecfl,
-            0x58e07842ef0ab40cl },
-          { 0xae65105f66c752a5l,0x4910fba45f99d499l,0xbfdaf5fce9e44357l,
-            0x6aaf4053796ee5b6l },
-          0 },
-        /* 116 << 208 */
-        { { 0xf58fecb16f640f62l,0xe274b92b39f51946l,0x7f4dfc046288af44l,
-            0x0a91f32aeac329e5l },
-          { 0x43ad274bd6aaba31l,0x719a16400f6884f9l,0x685d29f6daf91e20l,
-            0x5ec1cc3327e49d52l },
-          0 },
-        /* 119 << 208 */
-        { { 0x615ac02527ba93edl,0x0d43915d3556ef47l,0x8c739fd1cb0cda89l,
-            0xa2318169625f7a16l },
-          { 0x17d486113e0479cel,0x814beb6038ee541el,0x09c9807fb98ef355l,
-            0x4ad3668752d07af6l },
-          0 },
-        /* 125 << 208 */
-        { { 0x5c1f42e444f3f568l,0xd743b7c078fb409bl,0xe09edccb6224362cl,
-            0x7f13d140c5fe872cl },
-          { 0x85e8cb88f403c0ebl,0x918a231b688d20a0l,0xc65b7ab9f246c73fl,
-            0xda743fbf76dbd6adl },
-          0 },
-    },
-    {
-        /* 0 << 216 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 216 */
-        { { 0xa0158eeae457a477l,0xd19857dbee6ddc05l,0xb326522418c41671l,
-            0x3ffdfc7e3c2c0d58l },
-          { 0x3a3a525426ee7cdal,0x341b0869df02c3a8l,0xa023bf42723bbfc8l,
-            0x3d15002a14452691l },
-          0 },
-        /* 3 << 216 */
-        { { 0xf3cae7e9262a3539l,0x78a49d1d6670d59el,0x37de0f63c1c5e1b9l,
-            0x3072c30c69cb7c1cl },
-          { 0x1d278a5277c850e6l,0x84f15f8f1f6a3de6l,0x46a8bb45592ca7adl,
-            0x1912e3eee4d424b8l },
-          0 },
-        /* 4 << 216 */
-        { { 0x6ba7a92079e5fb67l,0xe1331feb70aa725el,0x5080ccf57df5d837l,
-            0xe4cae01d7ff72e21l },
-          { 0xd9243ee60412a77dl,0x06ff7cacdf449025l,0xbe75f7cd23ef5a31l,
-            0xbc9578220ddef7a8l },
-          0 },
-        /* 5 << 216 */
-        { { 0xdc988086365e668bl,0xada8dcdaaabda5fbl,0xbc146b4c255f1fbel,
-            0x9cfcde29cf34cfc3l },
-          { 0xacbb453e7e85d1e4l,0x9ca09679f92358b5l,0x15fc2d96240823ffl,
-            0x8d65adf70c11d11el },
-          0 },
-        /* 7 << 216 */
-        { { 0x775557f10296f4fdl,0x1dca76a3ea51b436l,0xf3e98f60fb950805l,
-            0x31ff32ea831cf7f1l },
-          { 0x643e7bf18d2c714bl,0x64b5c3392e9d2acal,0xa9fd9ccc6adc2d23l,
-            0xfc2397eccc721b9bl },
-          0 },
-        /* 9 << 216 */
-        { { 0xf031182db48ec57dl,0x515d32f804b233b9l,0x06bbb1d4093aad26l,
-            0x88a142fe0d83d1ecl },
-          { 0x3b95c099245c73f8l,0xb126d4af52edcd32l,0xf8022c1e8fcb52e6l,
-            0x5a51ac4c0106d339l },
-          0 },
-        /* 10 << 216 */
-        { { 0xc589e1ce44ace150l,0xe0f8d3d94381e97cl,0x59e99b1162c5a4b8l,
-            0x90d262f7fd0ec9f9l },
-          { 0xfbc854c9283e13c9l,0x2d04fde7aedc7085l,0x057d776547dcbecbl,
-            0x8dbdf5919a76fa5fl },
-          0 },
-        /* 11 << 216 */
-        { { 0xb7f70a1a7c64a054l,0x0dc1c0df9db43e79l,0x6d0a4ae251fe63d6l,
-            0xe0d5e3327f0c8abfl },
-          { 0xff5500362b7ecee8l,0x3ea0e6f75d055008l,0x30deb62ff24ac84fl,
-            0x936969fd5d7116b7l },
-          0 },
-        /* 13 << 216 */
-        { { 0x02da76122617cf7fl,0xd6e25d4eeee35260l,0xb2fa5b0afd3533e9l,
-            0xe76bb7b0b9126f88l },
-          { 0x692e6a9988856866l,0x3fdf394f49db65cal,0x2529699122d8d606l,
-            0xe815bfbf3dd7c4cfl },
-          0 },
-        /* 15 << 216 */
-        { { 0x69c984ed4d844e7fl,0xd354b2174a2e8a82l,0x25bd4addfb2c4136l,
-            0xf72df4de144b26e1l },
-          { 0xd0aa9db0e6101afdl,0x4445efaae49bd1b8l,0x5dc54eee331593b2l,
-            0xfa35e3b9094bf10bl },
-          0 },
-        /* 16 << 216 */
-        { { 0xdb567d6ac42bd6d2l,0x6df86468bb1f96ael,0x0efe5b1a4843b28el,
-            0x961bbb056379b240l },
-          { 0xb6caf5f070a6a26bl,0x70686c0d328e6e39l,0x80da06cf895fc8d3l,
-            0x804d8810b363fdc9l },
-          0 },
-        /* 17 << 216 */
-        { { 0x660a0f893ea089c3l,0xa25823aac9009b09l,0xb2262d7ba681f5e5l,
-            0x4fc30c8c3413863al },
-          { 0x691544b7c32059f7l,0xf65cf276b21c6134l,0xe3a96b2a5104dabal,
-            0xbb08d109a43ee42fl },
-          0 },
-        /* 19 << 216 */
-        { { 0x85a52d69f9916861l,0x595469a4da4fa813l,0x1dd7786e3338502fl,
-            0x34b8ef2853963ac5l },
-          { 0xc0f019f81a891b25l,0xb619970c4f4bd775l,0x8c2a5af3be19f681l,
-            0x9463db0498ec1728l },
-          0 },
-        /* 21 << 216 */
-        { { 0xeb62c27801f39eabl,0x27de39340ab3a4aal,0xfbd17520a982ca8dl,
-            0x58817ec2e4bdc6edl },
-          { 0x312d78de31c6ac13l,0x9483bf7609202ea6l,0xf64ab8b622c6d8e1l,
-            0xdddf589ce580de74l },
-          0 },
-        /* 23 << 216 */
-        { { 0xe0fa3336ee98a92al,0x7d80eeef66a4d745l,0xb612531bba0119d3l,
-            0x86e770c1b351fe15l },
-          { 0xafbad6f882d5a397l,0x1e5f1cb80dbf0110l,0x25138ac09f79063dl,
-            0x089ed22f2746a156l },
-          0 },
-        /* 25 << 216 */
-        { { 0x198d1b5d7d8b8ddel,0xf32c11078dab37fbl,0xf15fcb6d42b93874l,
-            0x91ddb74f41f94f84l },
-          { 0x6a64540a271524b2l,0x950a0c12758b5a64l,0xf9f237933dce9580l,
-            0xc8edd0ab2cf8ce32l },
-          0 },
-        /* 27 << 216 */
-        { { 0xefc6357eae1046b7l,0xe6704929612932e4l,0xa20305d4b1355b17l,
-            0x88a9136a58b4a156l },
-          { 0xbc379985b4d275ecl,0x718b91316eaf338bl,0x61229a7ad152a509l,
-            0x1109f7c445157ae9l },
-          0 },
-        /* 28 << 216 */
-        { { 0xcf197ca7fb8088fal,0x014272474ddc96c5l,0xa2d2550a30777176l,
-            0x534698984d0cf71dl },
-          { 0x6ce937b83a2aaac6l,0xe9f91dc35af38d9bl,0x2598ad83c8bf2899l,
-            0x8e706ac9b5536c16l },
-          0 },
-        /* 29 << 216 */
-        { { 0x2bde42140df85c2cl,0x4fb839f4058a7a63l,0x7c10572a47f51231l,
-            0x878826231989824el },
-          { 0xa8293d2016e1564al,0xcb11c0f818c04576l,0x83b91e7d9740c631l,
-            0xbdcb23d0cbffcea0l },
-          0 },
-        /* 31 << 216 */
-        { { 0x64bdfd2a9094bfc8l,0x8558acc60fc54d1el,0x3992848faf27721el,
-            0x7a8fcbdaa14cd009l },
-          { 0x6de6120900a4b9c2l,0xbd192b1b20cf8f28l,0x2356b90168d9be83l,
-            0xce1e7a944a49a48al },
-          0 },
-        /* 33 << 216 */
-        { { 0x7630103b6ac189b9l,0x15d35edc6f1f5549l,0x9051799d31cb58edl,
-            0xb4f32694a7a8579el },
-          { 0x6f037435f2abe306l,0xf0595696410fb2f7l,0x2a0d347a5cc98f59l,
-            0x9c19a9a87e3bbd69l },
-          0 },
-        /* 34 << 216 */
-        { { 0x87f8df7c0e58d493l,0xb1ae5ed058b73f12l,0xc368f784dea0c34dl,
-            0x9bd0a120859a91a0l },
-          { 0xb00d88b7cc863c68l,0x3a1cc11e3d1f4d65l,0xea38e0e70aa85593l,
-            0x37f13e987dc4aee8l },
-          0 },
-        /* 35 << 216 */
-        { { 0x91dbe00e49430cd2l,0xcc67c0b17aa8ef6bl,0x769985b8a273f1a5l,
-            0x358371dc360e5dafl },
-          { 0xbf9b9127d6d8b5e8l,0x748ae12cb45588c1l,0x9c609eb556076c58l,
-            0xf287489109733e89l },
-          0 },
-        /* 36 << 216 */
-        { { 0x10d38667bc947badl,0x738e07ce2a36ee2el,0xc93470cdc577fcacl,
-            0xdee1b6162782470dl },
-          { 0x36a25e672e793d12l,0xd6aa6caee0f186dal,0x474d0fd980e07af7l,
-            0xf7cdc47dba8a5cd4l },
-          0 },
-        /* 37 << 216 */
-        { { 0xceb6aa80f8a08fddl,0xd98fc56f46fead7bl,0xe26bd3f8b07b3f1fl,
-            0x3547e9b99d361c3el },
-          { 0x1a89f802e94b8eccl,0x2210a590c0a40ef2l,0xe7e5b965afc01bf2l,
-            0xca3d57fe234b936bl },
-          0 },
-        /* 39 << 216 */
-        { { 0x9230a70db9f9e8cdl,0xa63cebfcb81ba2ecl,0x8482ca87a8f664d6l,
-            0xa8ae78e00b137064l },
-          { 0xb787bd558384c687l,0xfde1d1bdb29ae830l,0xc4a9b2e39f0b7535l,
-            0x7e6c9a15efde2d01l },
-          0 },
-        /* 40 << 216 */
-        { { 0x7d2e5c054f7269b1l,0xfcf30777e287c385l,0x10edc84ff2a46f21l,
-            0x354417574f43fa36l },
-          { 0xf1327899fd703431l,0xa438d7a616dd587al,0x65c34c57e9c8352dl,
-            0xa728edab5cc5a24el },
-          0 },
-        /* 41 << 216 */
-        { { 0xcd6e6db872896d4fl,0x324afa99896c4640l,0x37d18c3d33a292bdl,
-            0x98dba3b44143421fl },
-          { 0x2406f3c949c61b84l,0x402d974754899588l,0xc73b7fd634a485e5l,
-            0x75c9bae08587f0c3l },
-          0 },
-        /* 43 << 216 */
-        { { 0x6c32fa8cb0b4a04dl,0xeb58d0d875fda587l,0x61d8a157c4b86563l,
-            0x92191bf01006b8afl },
-          { 0xd04d3eff32d3478bl,0x3cc52eab2a684fc8l,0xb19a0f1625de54ccl,
-            0x5c5295973620db2dl },
-          0 },
-        /* 44 << 216 */
-        { { 0xa97b51265c3427b0l,0x6401405cd282c9bdl,0x3629f8d7222c5c45l,
-            0xb1c02c16e8d50aedl },
-          { 0xbea2ed75d9635bc9l,0x226790c76e24552fl,0x3c33f2a365f1d066l,
-            0x2a43463e6dfccc2el },
-          0 },
-        /* 45 << 216 */
-        { { 0x09b2e0d3b8da1e01l,0xa3a1a8fee9c0eb04l,0x59af5afe8bf653bal,
-            0xba979f8bd0a54836l },
-          { 0xa0d8194b51ee6ffbl,0x451c29e2f4b0586cl,0x7eb5fddb7471ee3dl,
-            0x84b627d4bcb3afd8l },
-          0 },
-        /* 46 << 216 */
-        { { 0x8cc3453adb483761l,0xe7cc608565d5672bl,0x277ed6cbde3efc87l,
-            0x19f2f36869234eafl },
-          { 0x9aaf43175c0b800bl,0x1f1e7c898b6da6e2l,0x6cfb4715b94ec75el,
-            0xd590dd5f453118c2l },
-          0 },
-        /* 47 << 216 */
-        { { 0xa70e9b0afb54e812l,0x092a0d7d8d86819bl,0x5421ff042e669090l,
-            0x8af770c6b133c952l },
-          { 0xc8e8dd596c8b1426l,0x1c92eb0e9523b483l,0x5a7c88f2cf3d40edl,
-            0x4cc0c04bf5dd98f8l },
-          0 },
-        /* 48 << 216 */
-        { { 0x14e49da11f17a34cl,0x5420ab39235a1456l,0xb76372412f50363bl,
-            0x7b15d623c3fabb6el },
-          { 0xa0ef40b1e274e49cl,0x5cf5074496b1860al,0xd6583fbf66afe5a4l,
-            0x44240510f47e3e9al },
-          0 },
-        /* 49 << 216 */
-        { { 0xb3939a8ffd617288l,0x3d37e5c2d68c2636l,0x4a595fac9d666c0el,
-            0xfebcad9edb3a4978l },
-          { 0x6d284a49c125016fl,0x05a7b9c80ee246a2l,0xe8b351739436c6e9l,
-            0xffb89032d4be40b7l },
-          0 },
-        /* 51 << 216 */
-        { { 0xba1387a5436ebf33l,0xc351a400e8d05267l,0x18645dde4259dbe8l,
-            0x5fc32895c10fd676l },
-          { 0x1ef7a944807f040el,0x9486b5c625738e5fl,0xc9e56cf4a7e3e96cl,
-            0x34c7dc87a20be832l },
-          0 },
-        /* 52 << 216 */
-        { { 0xe10d49996fe8393fl,0x0f809a3fe91f3a32l,0x61096d1c802f63c8l,
-            0x289e146257750d3dl },
-          { 0xed06167e9889feeal,0xd5c9c0e2e0993909l,0x46fca0d856508ac6l,
-            0x918260474f1b8e83l },
-          0 },
-        /* 53 << 216 */
-        { { 0x1d5f2ad7a9bf79cbl,0x228fb24fca9c2f98l,0x5f7c3883701c4b71l,
-            0x18cf76c4ec42d686l },
-          { 0x3680d2e94dcdec8dl,0x6d58e87ba0d60cb6l,0x72fbf086a0e513cfl,
-            0xb922d3c5346ed99al },
-          0 },
-        /* 55 << 216 */
-        { { 0x1678d658c2b9b874l,0x0e0b2c47f6360d4dl,0x01a45c02a0c9b9acl,
-            0x05e82e9d0da69afbl },
-          { 0x50be4001f28b8018l,0x503d967b667d8241l,0x6cd816534981da04l,
-            0x9b18c3117f09c35fl },
-          0 },
-        /* 57 << 216 */
-        { { 0xdfdfd5b409d22331l,0xf445126817f0c6a2l,0xe51d1aa8a5cde27bl,
-            0xb61a12a37aaf9513l },
-          { 0xe43a241d3b3ea114l,0x5c62b624366ae28dl,0x085a530db5f237eal,
-            0x7c4ed375651205afl },
-          0 },
-        /* 59 << 216 */
-        { { 0xf9de879dce842decl,0xe505320a94cedb89l,0xee55dae7f05ad888l,
-            0x44ffbfa7f028b4efl },
-          { 0xa3c1b32e63b2cd31l,0x201a058910c5ab29l,0x20f930afcd4085d6l,
-            0xda79ed169f6ff24bl },
-          0 },
-        /* 60 << 216 */
-        { { 0x7e8cfbcf704e23c6l,0xc71b7d2228aaa65bl,0xa041b2bd245e3c83l,
-            0x69b98834d21854ffl },
-          { 0x89d227a3963bfeecl,0x99947aaade7da7cbl,0x1d9ee9dbee68a9b1l,
-            0x0a08f003698ec368l },
-          0 },
-        /* 61 << 216 */
-        { { 0x04c64f33b0959be5l,0x182332ba396a7fe2l,0x4c5401e302e15b97l,
-            0x92880f9877db104bl },
-          { 0x0bf0b9cc21726a33l,0x780264741acc7b6dl,0x9721f621a26f08e3l,
-            0xe3935b434197fed1l },
-          0 },
-        /* 63 << 216 */
-        { { 0x0bffae503652be69l,0x395a9c6afb3fd5d8l,0x17f66adaa4fadfbfl,
-            0x1ee92a35f9268f8cl },
-          { 0x40ded34d6827781al,0xcd36224e34e63dccl,0xec90cf571cd1ef7al,
-            0xf6067d578f72a3bfl },
-          0 },
-        /* 64 << 216 */
-        { { 0x142b55021a93507al,0xb4cd11878d3c06cfl,0xdf70e76a91ec3f40l,
-            0x484e81ad4e7553c2l },
-          { 0x830f87b5272e9d6el,0xea1c93e5c6ff514al,0x67cc2adcc4192a8el,
-            0xc77e27e242f4535al },
-          0 },
-        /* 65 << 216 */
-        { { 0x537388d299e2f9d2l,0x15ead88612cd6d08l,0x33dfe3a769082d86l,
-            0x0ef25f4266d79d40l },
-          { 0x8035b4e546ba5cf1l,0x4e48f53711eec591l,0x40b56cda122a7aael,
-            0x78e270211dbb79a7l },
-          0 },
-        /* 71 << 216 */
-        { { 0x520b655355b4a5b1l,0xeee835cafb4f5fdel,0xb2ae86e59a823d7fl,
-            0x24325f4fc084497fl },
-          { 0x542bed4e6f0eefa4l,0x2909233b141792fdl,0x74bfc3bfc847a946l,
-            0x8ec1d009e212cb44l },
-          0 },
-        /* 77 << 216 */
-        { { 0xc2082b6d5cedd516l,0xaf148eadeafa3a10l,0x104cd5855ad63aa6l,
-            0xe3fdbf8c78c11e1el },
-          { 0x78651c493c25c24el,0x8064c4f37b7cce0el,0xa55441d4a6d8a928l,
-            0x4525c40eb0db3adcl },
-          0 },
-        /* 83 << 216 */
-        { { 0x5f69e49cfde6001el,0xc61e753aee59b47el,0xd0d4559971b0db5bl,
-            0x7f76f7b45ad4acc3l },
-          { 0xb0318a9c39830897l,0x2b15da22feef3822l,0x34049400acfb0753l,
-            0x16f4fb51a5114ed4l },
-          0 },
-        /* 89 << 216 */
-        { { 0x0b5c76928defbf10l,0xb9f1795cb79cdb6el,0xba17e7759a90317cl,
-            0x3cb69cf950cf514bl },
-          { 0x076cc4c1e5b892ffl,0x75724e8fb548b73cl,0x2ebcdb33248ff2e6l,
-            0x1f12967be109b08fl },
-          0 },
-        /* 95 << 216 */
-        { { 0x3f514c63461b7bb3l,0x3bdca5aa70afbad7l,0x368ce251eab3e38bl,
-            0xdc0fb3300d101049l },
-          { 0x7ce09abdff5013eel,0x926dd7dd7d10729dl,0xe6fe47ab6f486197l,
-            0xd23964eaa6eb6903l },
-          0 },
-        /* 101 << 216 */
-        { { 0x537ceb74eca30797l,0xf171bba557b0f338l,0x220a31fee831f1f8l,
-            0xabbc2c7c5ae6bbbcl },
-          { 0xaf7609f27eadfb60l,0x22cff1d58f28b51bl,0x63c3d76d6d1863bdl,
-            0x3a6a2fb489e8a4c8l },
-          0 },
-        /* 107 << 216 */
-        { { 0x9e74f8beb26e38f0l,0xc4c73fc4ea8bd55bl,0x086f688e1429e1fcl,
-            0x91438ff40f78159fl },
-          { 0x3571ae5f20810acbl,0x305edafe7451eb00l,0x8443c96d5704385cl,
-            0xc03b234e542605b5l },
-          0 },
-        /* 113 << 216 */
-        { { 0x2e5ff4fed85567c2l,0x136f49c7e4abd0c6l,0x5a68730cfb8a62d1l,
-            0x101ebfd030bcb848l },
-          { 0x634b0618fee950bbl,0xfa748d21c8aa65bal,0xc1d67c3e699f5560l,
-            0x6fb0546cb22889d2l },
-          0 },
-        /* 116 << 216 */
-        { { 0xa9784ebd9c95f0f9l,0x5ed9deb224640771l,0x31244af7035561c4l,
-            0x87332f3a7ee857del },
-          { 0x09e16e9e2b9e0d88l,0x52d910f456a06049l,0x507ed477a9592f48l,
-            0x85cb917b2365d678l },
-          0 },
-        /* 119 << 216 */
-        { { 0x6108f2b458a9d40dl,0xb036034838e15a52l,0xcc5610a3fd5625d6l,
-            0x79825dd083b0418el },
-          { 0xf83a95fc6324b6e5l,0x2463114deedfc4ebl,0x58b177e32250707fl,
-            0x778dcd454af8d942l },
-          0 },
-        /* 125 << 216 */
-        { { 0x1ecf2670eb816bf8l,0xa2d6e73aaa6d59c6l,0xf9a11434156852ebl,
-            0x9bc9bb70f6f82c83l },
-          { 0xd23a018d9c874836l,0xd26bf8bc6db5a8b5l,0x1d648846bec0c624l,
-            0x39f15d97ef90302fl },
-          0 },
-    },
-    {
-        /* 0 << 224 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 224 */
-        { { 0xe3417bc035d0b34al,0x440b386b8327c0a7l,0x8fb7262dac0362d1l,
-            0x2c41114ce0cdf943l },
-          { 0x2ba5cef1ad95a0b1l,0xc09b37a867d54362l,0x26d6cdd201e486c9l,
-            0x20477abf42ff9297l },
-          0 },
-        /* 3 << 224 */
-        { { 0x126f35b51e706ad9l,0xb99cebb4c3a9ebdfl,0xa75389afbf608d90l,
-            0x76113c4fc6c89858l },
-          { 0x80de8eb097e2b5aal,0x7e1022cc63b91304l,0x3bdab6056ccc066cl,
-            0x33cbb144b2edf900l },
-          0 },
-        /* 4 << 224 */
-        { { 0xc41764717af715d2l,0xe2f7f594d0134a96l,0x2c1873efa41ec956l,
-            0xe4e7b4f677821304l },
-          { 0xe5c8ff9788d5374al,0x2b915e6380823d5bl,0xea6bc755b2ee8fe2l,
-            0x6657624ce7112651l },
-          0 },
-        /* 5 << 224 */
-        { { 0x157af101dace5acal,0xc4fdbcf211a6a267l,0xdaddf340c49c8609l,
-            0x97e49f52e9604a65l },
-          { 0x9be8e790937e2ad5l,0x846e2508326e17f1l,0x3f38007a0bbbc0dcl,
-            0xcf03603fb11e16d6l },
-          0 },
-        /* 7 << 224 */
-        { { 0x5ed0c007f8ae7c38l,0x6db07a5c3d740192l,0xbe5e9c2a5fe36db3l,
-            0xd5b9d57a76e95046l },
-          { 0x54ac32e78eba20f2l,0xef11ca8f71b9a352l,0x305e373eff98a658l,
-            0xffe5a100823eb667l },
-          0 },
-        /* 9 << 224 */
-        { { 0x5c8ed8d5da64309dl,0x61a6de5691b30704l,0xd6b52f6a2f9b5808l,
-            0x0eee419498c958a7l },
-          { 0xcddd9aab771e4caal,0x83965dfd78bc21bel,0x02affce3b3b504f5l,
-            0x30847a21561c8291l },
-          0 },
-        /* 10 << 224 */
-        { { 0xd2eb2cf152bfda05l,0xe0e4c4e96197b98cl,0x1d35076cf8a1726fl,
-            0x6c06085b2db11e3dl },
-          { 0x15c0c4d74463ba14l,0x9d292f830030238cl,0x1311ee8b3727536dl,
-            0xfeea86efbeaedc1el },
-          0 },
-        /* 11 << 224 */
-        { { 0xb9d18cd366131e2el,0xf31d974f80fe2682l,0xb6e49e0fe4160289l,
-            0x7c48ec0b08e92799l },
-          { 0x818111d8d1989aa7l,0xb34fa0aaebf926f9l,0xdb5fe2f5a245474al,
-            0xf80a6ebb3c7ca756l },
-          0 },
-        /* 13 << 224 */
-        { { 0x8ea610593de9abe3l,0x404348819cdc03bel,0x9b261245cfedce8cl,
-            0x78c318b4cf5234a1l },
-          { 0x510bcf16fde24c99l,0x2a77cb75a2c2ff5dl,0x9c895c2b27960fb4l,
-            0xd30ce975b0eda42bl },
-          0 },
-        /* 15 << 224 */
-        { { 0x09521177ff57d051l,0x2ff38037fb6a1961l,0xfc0aba74a3d76ad4l,
-            0x7c76480325a7ec17l },
-          { 0x7532d75f48879bc8l,0xea7eacc058ce6bc1l,0xc82176b48e896c16l,
-            0x9a30e0b22c750fedl },
-          0 },
-        /* 16 << 224 */
-        { { 0xc37e2c2e421d3aa4l,0xf926407ce84fa840l,0x18abc03d1454e41cl,
-            0x26605ecd3f7af644l },
-          { 0x242341a6d6a5eabfl,0x1edb84f4216b668el,0xd836edb804010102l,
-            0x5b337ce7945e1d8cl },
-          0 },
-        /* 17 << 224 */
-        { { 0xd2075c77c055dc14l,0x2a0ffa2581d89cdfl,0x8ce815ea6ffdcbafl,
-            0xa3428878fb648867l },
-          { 0x277699cf884655fbl,0xfa5b5bd6364d3e41l,0x01f680c6441e1cb7l,
-            0x3fd61e66b70a7d67l },
-          0 },
-        /* 19 << 224 */
-        { { 0xfd5bb657b1fa70fbl,0xfa07f50fd8073a00l,0xf72e3aa7bca02500l,
-            0xf68f895d9975740dl },
-          { 0x301120605cae2a6al,0x01bd721802874842l,0x3d4238917ce47bd3l,
-            0xa66663c1789544f6l },
-          0 },
-        /* 21 << 224 */
-        { { 0xb4b9a39b36194d40l,0xe857a7c577612601l,0xf4209dd24ecf2f58l,
-            0x82b9e66d5a033487l },
-          { 0xc1e36934e4e8b9ddl,0xd2372c9da42377d7l,0x51dc94c70e3ae43bl,
-            0x4c57761e04474f6fl },
-          0 },
-        /* 23 << 224 */
-        { { 0xa39114e24415503bl,0xc08ff7c64cbb17e9l,0x1eff674dd7dec966l,
-            0x6d4690af53376f63l },
-          { 0xff6fe32eea74237bl,0xc436d17ecd57508el,0x15aa28e1edcc40fel,
-            0x0d769c04581bbb44l },
-          0 },
-        /* 25 << 224 */
-        { { 0xfe51d0296ae55043l,0x8931e98f44a87de1l,0xe57f1cc609e4fee2l,
-            0x0d063b674e072d92l },
-          { 0x70a998b9ed0e4316l,0xe74a736b306aca46l,0xecf0fbf24fda97c7l,
-            0xa40f65cb3e178d93l },
-          0 },
-        /* 27 << 224 */
-        { { 0x8667e981c27253c9l,0x05a6aefb92b36a45l,0xa62c4b369cb7bb46l,
-            0x8394f37511f7027bl },
-          { 0x747bc79c5f109d0fl,0xcad88a765b8cc60al,0x80c5a66b58f09e68l,
-            0xe753d451f6127eacl },
-          0 },
-        /* 28 << 224 */
-        { { 0xc44b74a15b0ec6f5l,0x47989fe45289b2b8l,0x745f848458d6fc73l,
-            0xec362a6ff61c70abl },
-          { 0x070c98a7b3a8ad41l,0x73a20fc07b63db51l,0xed2c2173f44c35f4l,
-            0x8a56149d9acc9dcal },
-          0 },
-        /* 29 << 224 */
-        { { 0x98f178819ac6e0f4l,0x360fdeafa413b5edl,0x0625b8f4a300b0fdl,
-            0xf1f4d76a5b3222d3l },
-          { 0x9d6f5109587f76b8l,0x8b4ee08d2317fdb5l,0x88089bb78c68b095l,
-            0x95570e9a5808d9b9l },
-          0 },
-        /* 31 << 224 */
-        { { 0x2e1284943fb42622l,0x3b2700ac500907d5l,0xf370fb091a95ec63l,
-            0xf8f30be231b6dfbdl },
-          { 0xf2b2f8d269e55f15l,0x1fead851cc1323e9l,0xfa366010d9e5eef6l,
-            0x64d487b0e316107el },
-          0 },
-        /* 33 << 224 */
-        { { 0xc9a9513929607745l,0x0ca07420a26f2b28l,0xcb2790e74bc6f9ddl,
-            0x345bbb58adcaffc0l },
-          { 0xc65ea38cbe0f27a2l,0x67c24d7c641fcb56l,0x2c25f0a7a9e2c757l,
-            0x93f5cdb016f16c49l },
-          0 },
-        /* 34 << 224 */
-        { { 0x2ca5a9d7c5ee30a1l,0xd1593635b909b729l,0x804ce9f3dadeff48l,
-            0xec464751b07c30c3l },
-          { 0x89d65ff39e49af6al,0xf2d6238a6f3d01bcl,0x1095561e0bced843l,
-            0x51789e12c8a13fd8l },
-          0 },
-        /* 35 << 224 */
-        { { 0xd633f929763231dfl,0x46df9f7de7cbddefl,0x01c889c0cb265da8l,
-            0xfce1ad10af4336d2l },
-          { 0x8d110df6fc6a0a7el,0xdd431b986da425dcl,0xcdc4aeab1834aabel,
-            0x84deb1248439b7fcl },
-          0 },
-        /* 36 << 224 */
-        { { 0x8796f1693c2a5998l,0x9b9247b47947190dl,0x55b9d9a511597014l,
-            0x7e9dd70d7b1566eel },
-          { 0x94ad78f7cbcd5e64l,0x0359ac179bd4c032l,0x3b11baaf7cc222ael,
-            0xa6a6e284ba78e812l },
-          0 },
-        /* 37 << 224 */
-        { { 0x8392053f24cea1a0l,0xc97bce4a33621491l,0x7eb1db3435399ee9l,
-            0x473f78efece81ad1l },
-          { 0x41d72fe0f63d3d0dl,0xe620b880afab62fcl,0x92096bc993158383l,
-            0x41a213578f896f6cl },
-          0 },
-        /* 39 << 224 */
-        { { 0x6fb4d4e42bad4d5fl,0xfa4c3590fef0059bl,0x6a10218af5122294l,
-            0x9a78a81aa85751d1l },
-          { 0x04f20579a98e84e7l,0xfe1242c04997e5b5l,0xe77a273bca21e1e4l,
-            0xfcc8b1ef9411939dl },
-          0 },
-        /* 40 << 224 */
-        { { 0xe20ea30292d0487al,0x1442dbec294b91fel,0x1f7a4afebb6b0e8fl,
-            0x1700ef746889c318l },
-          { 0xf5bbffc370f1fc62l,0x3b31d4b669c79ccal,0xe8bc2aaba7f6340dl,
-            0xb0b08ab4a725e10al },
-          0 },
-        /* 41 << 224 */
-        { { 0x44f05701ae340050l,0xba4b30161cf0c569l,0x5aa29f83fbe19a51l,
-            0x1b9ed428b71d752el },
-          { 0x1666e54eeb4819f5l,0x616cdfed9e18b75bl,0x112ed5be3ee27b0bl,
-            0xfbf2831944c7de4dl },
-          0 },
-        /* 43 << 224 */
-        { { 0x722eb104e2b4e075l,0x49987295437c4926l,0xb1e4c0e446a9b82dl,
-            0xd0cb319757a006f5l },
-          { 0xf3de0f7dd7808c56l,0xb5c54d8f51f89772l,0x500a114aadbd31aal,
-            0x9afaaaa6295f6cabl },
-          0 },
-        /* 44 << 224 */
-        { { 0x94705e2104cf667al,0xfc2a811b9d3935d7l,0x560b02806d09267cl,
-            0xf19ed119f780e53bl },
-          { 0xf0227c09067b6269l,0x967b85335caef599l,0x155b924368efeebcl,
-            0xcd6d34f5c497bae6l },
-          0 },
-        /* 45 << 224 */
-        { { 0x1dd8d5d36cceb370l,0x2aeac579a78d7bf9l,0x5d65017d70b67a62l,
-            0x70c8e44f17c53f67l },
-          { 0xd1fc095086a34d09l,0xe0fca256e7134907l,0xe24fa29c80fdd315l,
-            0x2c4acd03d87499adl },
-          0 },
-        /* 46 << 224 */
-        { { 0xbaaf75173b5a9ba6l,0xb9cbe1f612e51a51l,0xd88edae35e154897l,
-            0xe4309c3c77b66ca0l },
-          { 0xf5555805f67f3746l,0x85fc37baa36401ffl,0xdf86e2cad9499a53l,
-            0x6270b2a3ecbc955bl },
-          0 },
-        /* 47 << 224 */
-        { { 0xafae64f5974ad33bl,0x04d85977fe7b2df1l,0x2a3db3ff4ab03f73l,
-            0x0b87878a8702740al },
-          { 0x6d263f015a061732l,0xc25430cea32a1901l,0xf7ebab3ddb155018l,
-            0x3a86f69363a9b78el },
-          0 },
-        /* 48 << 224 */
-        { { 0x349ae368da9f3804l,0x470f07fea164349cl,0xd52f4cc98562baa5l,
-            0xc74a9e862b290df3l },
-          { 0xd3a1aa3543471a24l,0x239446beb8194511l,0xbec2dd0081dcd44dl,
-            0xca3d7f0fc42ac82dl },
-          0 },
-        /* 49 << 224 */
-        { { 0x1f3db085fdaf4520l,0xbb6d3e804549daf2l,0xf5969d8a19ad5c42l,
-            0x7052b13ddbfd1511l },
-          { 0x11890d1b682b9060l,0xa71d3883ac34452cl,0xa438055b783805b4l,
-            0x432412774725b23el },
-          0 },
-        /* 51 << 224 */
-        { { 0x40b08f7443b30ca8l,0xe10b5bbad9934583l,0xe8a546d6b51110adl,
-            0x1dd50e6628e0b6c5l },
-          { 0x292e9d54cff2b821l,0x3882555d47281760l,0x134838f83724d6e3l,
-            0xf2c679e022ddcda1l },
-          0 },
-        /* 52 << 224 */
-        { { 0x40ee88156d2a5768l,0x7f227bd21c1e7e2dl,0x487ba134d04ff443l,
-            0x76e2ff3dc614e54bl },
-          { 0x36b88d6fa3177ec7l,0xbf731d512328fff5l,0x758caea249ba158el,
-            0x5ab8ff4c02938188l },
-          0 },
-        /* 53 << 224 */
-        { { 0x33e1605635edc56dl,0x5a69d3497e940d79l,0x6c4fd00103866dcbl,
-            0x20a38f574893cdefl },
-          { 0xfbf3e790fac3a15bl,0x6ed7ea2e7a4f8e6bl,0xa663eb4fbc3aca86l,
-            0x22061ea5080d53f7l },
-          0 },
-        /* 55 << 224 */
-        { { 0x635a8e5ec3a0ee43l,0x70aaebca679898ffl,0x9ee9f5475dc63d56l,
-            0xce987966ffb34d00l },
-          { 0xf9f86b195e26310al,0x9e435484382a8ca8l,0x253bcb81c2352fe4l,
-            0xa4eac8b04474b571l },
-          0 },
-        /* 57 << 224 */
-        { { 0x2617f91c93aa96b8l,0x0fc8716b7fca2e13l,0xa7106f5e95328723l,
-            0xd1c9c40b262e6522l },
-          { 0xb9bafe8642b7c094l,0x1873439d1543c021l,0xe1baa5de5cbefd5dl,
-            0xa363fc5e521e8affl },
-          0 },
-        /* 59 << 224 */
-        { { 0xbc00fc2f2f8ba2c7l,0x0966eb2f7c67aa28l,0x13f7b5165a786972l,
-            0x3bfb75578a2fbba0l },
-          { 0x131c4f235a2b9620l,0xbff3ed276faf46bel,0x9b4473d17e172323l,
-            0x421e8878339f6246l },
-          0 },
-        /* 60 << 224 */
-        { { 0x0fa8587a25a41632l,0xc0814124a35b6c93l,0x2b18a9f559ebb8dbl,
-            0x264e335776edb29cl },
-          { 0xaf245ccdc87c51e2l,0x16b3015b501e6214l,0xbb31c5600a3882cel,
-            0x6961bb94fec11e04l },
-          0 },
-        /* 61 << 224 */
-        { { 0x3b825b8deff7a3a0l,0xbec33738b1df7326l,0x68ad747c99604a1fl,
-            0xd154c9349a3bd499l },
-          { 0xac33506f1cc7a906l,0x73bb53926c560e8fl,0x6428fcbe263e3944l,
-            0xc11828d51c387434l },
-          0 },
-        /* 63 << 224 */
-        { { 0x659b17c8d8ceb147l,0x9b649eeeb70a5554l,0x6b7fa0b5ac6bc634l,
-            0xd99fe2c71d6e732fl },
-          { 0x30e6e7628d3abba2l,0x18fee6e7a797b799l,0x5c9d360dc696464dl,
-            0xe3baeb4827bfde12l },
-          0 },
-        /* 64 << 224 */
-        { { 0x2bf5db47f23206d5l,0x2f6d34201d260152l,0x17b876533f8ff89al,
-            0x5157c30c378fa458l },
-          { 0x7517c5c52d4fb936l,0xef22f7ace6518cdcl,0xdeb483e6bf847a64l,
-            0xf508455892e0fa89l },
-          0 },
-        /* 65 << 224 */
-        { { 0xf77bb113a74ed3bel,0x89e4eb8f074f2637l,0x7fbfa84df7ce2aebl,
-            0xe7c6ecd5baaefe4cl },
-          { 0x176bba7df6319542l,0x70098120f6080799l,0x2e2118339054d9aal,
-            0x1be4c6a78295a912l },
-          0 },
-        /* 71 << 224 */
-        { { 0x6bb4d8c35df1455fl,0xb839f08f0384b033l,0x718868af11f95d50l,
-            0xae256a92e07a8801l },
-          { 0xa5bafaf24d71a273l,0x18ff04ea2a30e68fl,0x364c193287ba727el,
-            0x4bb8cf99befcaf73l },
-          0 },
-        /* 77 << 224 */
-        { { 0xc79f5b1f4e9fb3d7l,0x52854970a51cccddl,0xa4e27e97f00054a3l,
-            0x26a79792240e1232l },
-          { 0xb15579fecb5ff465l,0x6ef54c3bd1722a84l,0xee211bfa5239a4d8l,
-            0x36c7db27270b7059l },
-          0 },
-        /* 83 << 224 */
-        { { 0x5e7da0a9f9858cd3l,0x67459de5b633de49l,0x2db0d54b2e73892el,
-            0x37f50877adae399al },
-          { 0x83c28b83b65e6179l,0xae5a915ca39faf17l,0x6ab8f3fbe841b53cl,
-            0x7c30997b0df7d004l },
-          0 },
-        /* 89 << 224 */
-        { { 0x87904ca7b3b862bdl,0x7593db93cf9ea671l,0x8a2670f8739aa783l,
-            0x3921d779f5154ca6l },
-          { 0xe81ca56468f65ebbl,0x0c600603bc4e64d4l,0xdf170049cb83b2d1l,
-            0x373893b863487064l },
-          0 },
-        /* 95 << 224 */
-        { { 0x7c3c52b9c0c4e88el,0x0f0484d06f0c2446l,0xeb876827000fe87bl,
-            0xa749b3136d20f94al },
-          { 0x0876dae9d55abda6l,0xe6e4367620726911l,0xf85e8a8c4a2676b4l,
-            0x4e8c97f1b4a890ebl },
-          0 },
-        /* 101 << 224 */
-        { { 0xa992f482a3c0a4f4l,0xe1536f3f7a8d961al,0x26fc79ae000752b0l,
-            0xdbfb706b76ad8508l },
-          { 0x2642b2ed6f4cf9e4l,0xa013db54557fa7e2l,0x2ef711821d326116l,
-            0x8dc3f5bcbafc83ecl },
-          0 },
-        /* 107 << 224 */
-        { { 0x9671258578e5a201l,0xc71aca1de9125569l,0x360c45c0e2231379l,
-            0x2d71783512e82369l },
-          { 0x392432d3d84b2153l,0x502fd3f6d6939ffel,0x33c440ae6e766cacl,
-            0x99f1fbee28062416l },
-          0 },
-        /* 113 << 224 */
-        { { 0xe51ad841861604cbl,0x1ec9c54f630283a7l,0xcc42cad582a39473l,
-            0xa2eb053709929c4al },
-          { 0xe374459767f655a3l,0x9f54c2451d7f2674l,0xd85e9163fbc8aba5l,
-            0x12fd0b55866bc892l },
-          0 },
-        /* 116 << 224 */
-        { { 0x4f2c3063d7bd4661l,0xe533798d57a974ccl,0x44860d503ea02d85l,
-            0xf2a7f4e5acaa0521l },
-          { 0x05593061abb108f0l,0x56d1056044528309l,0x1f674df9c88b6d1el,
-            0x19fdc4cbd8744c4dl },
-          0 },
-        /* 119 << 224 */
-        { { 0xfd1488ec00f2f1d5l,0x24fcc67b44a825ddl,0xc7bfae2ea925a0f4l,
-            0x5e03249cad59cf48l },
-          { 0x1dc5a8e11af4844cl,0x89b2fbc58a598c20l,0xb0f56afff2078121l,
-            0x8194012d4878bb0dl },
-          0 },
-        /* 125 << 224 */
-        { { 0xc1cbe9d3a5ae1031l,0x38da74435706b987l,0x01844b55b353f188l,
-            0x390c59ca87a807c5l },
-          { 0x55ac7b1fb13b780cl,0x060970bff375c1cbl,0x8dd1f378c7ab4e5cl,
-            0xcca782e5cf726645l },
-          0 },
-    },
-    {
-        /* 0 << 232 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 232 */
-        { { 0x91213462f23f2d92l,0x6cab71bd60b94078l,0x6bdd0a63176cde20l,
-            0x54c9b20cee4d54bcl },
-          { 0x3cd2d8aa9f2ac02fl,0x03f8e617206eedb0l,0xc7f68e1693086434l,
-            0x831469c592dd3db9l },
-          0 },
-        /* 3 << 232 */
-        { { 0x4a9090cde36d0757l,0xf722d7b1d9a29382l,0xfb7fb04c04b48ddfl,
-            0x628ad2a7ebe16f43l },
-          { 0xcd3fbfb520226040l,0x6c34ecb15104b6c4l,0x30c0754ec903c188l,
-            0xec336b082d23cab0l },
-          0 },
-        /* 4 << 232 */
-        { { 0x9f51439e558df019l,0x230da4baac712b27l,0x518919e355185a24l,
-            0x4dcefcdd84b78f50l },
-          { 0xa7d90fb2a47d4c5al,0x55ac9abfb30e009el,0xfd2fc35974eed273l,
-            0xb72d824cdbea8fafl },
-          0 },
-        /* 5 << 232 */
-        { { 0xd213f923cbb13d1bl,0x98799f425bfb9bfel,0x1ae8ddc9701144a9l,
-            0x0b8b3bb64c5595eel },
-          { 0x0ea9ef2e3ecebb21l,0x17cb6c4b3671f9a7l,0x47ef464f726f1d1fl,
-            0x171b94846943a276l },
-          0 },
-        /* 7 << 232 */
-        { { 0xc9941109a607419dl,0xfaa71e62bb6bca80l,0x34158c1307c431f3l,
-            0x594abebc992bc47al },
-          { 0x6dfea691eb78399fl,0x48aafb353f42cba4l,0xedcd65af077c04f0l,
-            0x1a29a366e884491al },
-          0 },
-        /* 9 << 232 */
-        { { 0x7bf6a5c1f7ea25aal,0xd165e6bffbb07d5fl,0xe353936189e78671l,
-            0xa3fcac892bac4219l },
-          { 0xdfab6fd4f0baa8abl,0x5a4adac1e2c1c2e5l,0x6cd75e3140d85849l,
-            0xce263fea19b39181l },
-          0 },
-        /* 10 << 232 */
-        { { 0xb8d804a3315980cdl,0x693bc492fa3bebf7l,0x3578aeee2253c504l,
-            0x158de498cd2474a2l },
-          { 0x1331f5c7cfda8368l,0xd2d7bbb378d7177el,0xdf61133af3c1e46el,
-            0x5836ce7dd30e7be8l },
-          0 },
-        /* 11 << 232 */
-        { { 0xe042ece59a29a5c5l,0xb19b3c073b6c8402l,0xc97667c719d92684l,
-            0xb5624622ebc66372l },
-          { 0x0cb96e653c04fa02l,0x83a7176c8eaa39aal,0x2033561deaa1633fl,
-            0x45a9d0864533df73l },
-          0 },
-        /* 13 << 232 */
-        { { 0xa29ae9df5ece6e7cl,0x0603ac8f0facfb55l,0xcfe85b7adda233a5l,
-            0xe618919fbd75f0b8l },
-          { 0xf555a3d299bf1603l,0x1f43afc9f184255al,0xdcdaf341319a3e02l,
-            0xd3b117ef03903a39l },
-          0 },
-        /* 15 << 232 */
-        { { 0xb6b82fa74d82f4c2l,0x90725a606804efb3l,0xbc82ec46adc3425el,
-            0xb7b805812787843el },
-          { 0xdf46d91cdd1fc74cl,0xdc1c62cbe783a6c4l,0x59d1b9f31a04cbbal,
-            0xd87f6f7295e40764l },
-          0 },
-        /* 16 << 232 */
-        { { 0x196860411e84e0e5l,0xa5db84d3aea34c93l,0xf9d5bb197073a732l,
-            0xb8d2fe566bcfd7c0l },
-          { 0x45775f36f3eb82fal,0x8cb20cccfdff8b58l,0x1659b65f8374c110l,
-            0xb8b4a422330c789al },
-          0 },
-        /* 17 << 232 */
-        { { 0xa6312c9e8977d99bl,0xbe94433183f531e7l,0x8232c0c218d3b1d4l,
-            0x617aae8be1247b73l },
-          { 0x40153fc4282aec3bl,0xc6063d2ff7b8f823l,0x68f10e583304f94cl,
-            0x31efae74ee676346l },
-          0 },
-        /* 19 << 232 */
-        { { 0xd98bf2a43734e520l,0x5e3abbe3209bdcbal,0x77c76553bc945b35l,
-            0x5331c093c6ef14aal },
-          { 0x518ffe2976b60c80l,0x2285593b7ace16f8l,0xab1f64ccbe2b9784l,
-            0xe8f2c0d9ab2421b6l },
-          0 },
-        /* 21 << 232 */
-        { { 0x481dae5fd5ecfefcl,0x07084fd8c2bff8fcl,0x8040a01aea324596l,
-            0x4c646980d4de4036l },
-          { 0x9eb8ab4ed65abfc3l,0xe01cb91f13541ec7l,0x8f029adbfd695012l,
-            0x9ae284833c7569ecl },
-          0 },
-        /* 23 << 232 */
-        { { 0xc83605f6f10ff927l,0xd387145123739fc6l,0x6d163450cac1c2ccl,
-            0x6b521296a2ec1ac5l },
-          { 0x0606c4f96e3cb4a5l,0xe47d3f41778abff7l,0x425a8d5ebe8e3a45l,
-            0x53ea9e97a6102160l },
-          0 },
-        /* 25 << 232 */
-        { { 0x6b72fab526bc2797l,0x13670d1699f16771l,0x001700521e3e48d1l,
-            0x978fe401b7adf678l },
-          { 0x55ecfb92d41c5dd4l,0x5ff8e247c7b27da5l,0xe7518272013fb606l,
-            0x5768d7e52f547a3cl },
-          0 },
-        /* 27 << 232 */
-        { { 0x0e966e64c73b2383l,0x49eb3447d17d8762l,0xde1078218da05dabl,
-            0x443d8baa016b7236l },
-          { 0x163b63a5ea7610d6l,0xe47e4185ce1ca979l,0xae648b6580baa132l,
-            0xebf53de20e0d5b64l },
-          0 },
-        /* 28 << 232 */
-        { { 0x6ba535da9a85788bl,0xd21f03aebd0626d4l,0x099f8c47e873dc64l,
-            0xcda8564d018ec97el },
-          { 0x3e8d7a5cde92c68cl,0x78e035a173323cc4l,0x3ef26275f880ff7cl,
-            0xa4ee3dff273eedaal },
-          0 },
-        /* 29 << 232 */
-        { { 0x8bbaec49571d92acl,0x569e85fe4692517fl,0x8333b014a14ea4afl,
-            0x32f2a62f12e5c5adl },
-          { 0x98c2ce3a06d89b85l,0xb90741aa2ff77a08l,0x2530defc01f795a2l,
-            0xd6e5ba0b84b3c199l },
-          0 },
-        /* 31 << 232 */
-        { { 0x3d1b24cb28c682c6l,0x27f252288612575bl,0xb587c779e8e66e98l,
-            0x7b0c03e9405eb1fel },
-          { 0xfdf0d03015b548e7l,0xa8be76e038b36af7l,0x4cdab04a4f310c40l,
-            0x6287223ef47ecaecl },
-          0 },
-        /* 33 << 232 */
-        { { 0x0a4c6f3670ad54aal,0xc24cfd0d2a543909l,0xe1b0bc5b745c1a97l,
-            0xb8431cfd68f0ddbfl },
-          { 0x326357989ed8cb06l,0xa00a80ff759d2b7dl,0x81f335c190570e02l,
-            0xbfccd89849c4e4d9l },
-          0 },
-        /* 34 << 232 */
-        { { 0x4dcb646bfd16d8c4l,0x76a6b640e38ba57bl,0xd92de1f79d8ae7e2l,
-            0x126f48f13f77f23bl },
-          { 0xb7b53ca977e8abc2l,0x3faa17112c0787ffl,0xf8f9308c8e5762f8l,
-            0x600a8a7f6b83aea8l },
-          0 },
-        /* 35 << 232 */
-        { { 0xa2aed4a799aa03c0l,0x1f93b93da18b79c5l,0x7b4550b7314192c3l,
-            0x9da00676272bb08el },
-          { 0xe42f0d7e23e072edl,0x7ce76494888b5783l,0x4c7900203680b63bl,
-            0x6040c83f662a8718l },
-          0 },
-        /* 36 << 232 */
-        { { 0xba9e5c88a56d73edl,0x6c24f7712ca054d3l,0x4a37c235083beae1l,
-            0x04a883b26483e9fdl },
-          { 0x0c63f3aee27c2c5dl,0x0e1da88dae4671f1l,0xa577e8e25995e1dbl,
-            0xbfc4b1b16ed6066al },
-          0 },
-        /* 37 << 232 */
-        { { 0x8b398541f53d9e63l,0x4ab045bb019395cbl,0x69a1b90371dd70c7l,
-            0xdedf284b38aaa431l },
-          { 0xb45e245aaed3efe7l,0x49460905079f2facl,0xde4dee470845bd78l,
-            0x0540524039d02ec3l },
-          0 },
-        /* 39 << 232 */
-        { { 0x300cf051675cc986l,0x758afea99324219fl,0xf524c3fad5a93b5fl,
-            0xb73385abc3864a8al },
-          { 0xbde19289f6be9050l,0xbb9018558205a3f3l,0x99a9d14d229f6b89l,
-            0x4c3a802f4336e68fl },
-          0 },
-        /* 40 << 232 */
-        { { 0xdd4a12d8e12b31f8l,0x577e29bc177736e6l,0x2353722ba88935e8l,
-            0xca1d3729015f286dl },
-          { 0x86c7b6a239a3e035l,0x6e5250bfd3b03a9fl,0x79d98930fd0d536el,
-            0x8c4cbbabfa0c3832l },
-          0 },
-        /* 41 << 232 */
-        { { 0x92ecff374f8e6163l,0x171cc8830f35faeal,0xc5434242bcd36142l,
-            0x707049adb28b63bbl },
-          { 0xa1f4d1dbf6443da9l,0x002bb062dabc108bl,0x17287f171a272b08l,
-            0x2a3aac8c884cf6bbl },
-          0 },
-        /* 43 << 232 */
-        { { 0x55524645651c0a5al,0x14624a9703cf0d12l,0xca9315a8f884a9e2l,
-            0x9840c6e2df7c9d59l },
-          { 0xd96bd10a7438e8d5l,0x12be73d2b2f887del,0x5e47445dca2493efl,
-            0x85aef555e9fff03el },
-          0 },
-        /* 44 << 232 */
-        { { 0x169b38c9a43b2339l,0x884308d91732bfabl,0xe4b593a28ff202ddl,
-            0xaf51d11f1e65376cl },
-          { 0x6ec648de741525ffl,0xf93cbd369ff4c628l,0xc76df9efb1129c79l,
-            0x31a5f2e2b7a67294l },
-          0 },
-        /* 45 << 232 */
-        { { 0x0661bc02801d0e38l,0x4a37dc0e71fc46b7l,0x0b224cfc80c3e311l,
-            0x2dd3d2779646a957l },
-          { 0xfa45aa18ef524012l,0x5d2a2d0916185a09l,0x34d5c630b5313dcel,
-            0xd9581ed151e4cf84l },
-          0 },
-        /* 46 << 232 */
-        { { 0x5845aa4a8ebd2af8l,0x141404ecd3df43ccl,0xff3fc7681ffd48d9l,
-            0x8a096e72e0cefb65l },
-          { 0xc9c81cfdffc3a5cdl,0x7550aa3029b27cf9l,0x34dca72b65fa0380l,
-            0xe8c5f6059ddd032bl },
-          0 },
-        /* 47 << 232 */
-        { { 0xe53da8a46bfbadb3l,0x4a9dfa55afaeeb5el,0x076245ea6644b1d4l,
-            0xc19be4012307bbcbl },
-          { 0x097774c19d77318bl,0xacc8a1519cfd51c4l,0x736ef6b3ecaa7b08l,
-            0x107479132d643a80l },
-          0 },
-        /* 48 << 232 */
-        { { 0x2d500910cab91f1el,0xbedd9e444d1cd216l,0xd634b74fedd02252l,
-            0xbd60f8e11258617al },
-          { 0xd8c7537b9e05614al,0xfd26c766e7af5fc5l,0x0660b581582bd926l,
-            0x87019244acf07fc8l },
-          0 },
-        /* 49 << 232 */
-        { { 0xd4889fdf6220ae8el,0x745d67ec1abf1549l,0x957b2e3d2fb89c36l,
-            0x9768c90edc62ada9l },
-          { 0x90332fd748e6c46el,0x5aa5a4e54e90ef0dl,0x58838fd3ddcc8571l,
-            0xd12f6c6f9a721126l },
-          0 },
-        /* 51 << 232 */
-        { { 0x2f0fd0b2cec757bal,0x46a7a9c63032cd1dl,0x9af3a600547d7a77l,
-            0x828e16eca43da1bal },
-          { 0x0b303a66092a8d92l,0x78ba0389c23d08bal,0x52aed08d4616bd29l,
-            0x4c0ff1210539c9fal },
-          0 },
-        /* 52 << 232 */
-        { { 0x2c3b7322badcfe8el,0x6e0616fac5e25a04l,0x0a3c12753da6e4a2l,
-            0xe46c957e077bca01l },
-          { 0xb46ca4e3da4be64bl,0xa59bda668e75ee78l,0x41835184a4de98f2l,
-            0x6efb1f924ed6a568l },
-          0 },
-        /* 53 << 232 */
-        { { 0xbb8cdc094af1dd72l,0x93c0aa38a2460633l,0xf66f5d238a7ebc93l,
-            0x43ecda843e8e37a6l },
-          { 0x399da8265fd5139el,0x8b39930fd446f38el,0x114414135d2b68efl,
-            0x8be163b8d1637c38l },
-          0 },
-        /* 55 << 232 */
-        { { 0x488e2a35b70ddbd3l,0xb4aa5f718da50077l,0xb38b74b1d8752bbdl,
-            0x7007f328416106a3l },
-          { 0xe6a62e4fcec4ea68l,0x9fdfb79741ef920bl,0x1a19d7dfe3c337a6l,
-            0x08f643558be0f586l },
-          0 },
-        /* 57 << 232 */
-        { { 0x91a5d8ff60343a1fl,0x921e442173ef8cdfl,0x4358f27b975138cdl,
-            0x36fd8577a4992b08l },
-          { 0xc07c8ca1f8d044c6l,0xcf42903687747b6bl,0x0932ffb0867c8632l,
-            0x7e565213250e5a89l },
-          0 },
-        /* 59 << 232 */
-        { { 0xae7c3b9b06255feal,0x2eb1d9a78a6fe229l,0xf81548e77601e6f8l,
-            0x777394eb7bd96d6cl },
-          { 0x54734187000a3509l,0xaeec146492d43c04l,0xc9b7f0d7c428b4acl,
-            0x9d4bcedccd7f7018l },
-          0 },
-        /* 60 << 232 */
-        { { 0x4741b9b311370605l,0x47fa72f75d09b355l,0x391a71ac7a144c6al,
-            0x0808c0f498b6e3cal },
-          { 0x7eaed9ef7fe53900l,0xf157a2a5e5a830bal,0xd13ec09127974afcl,
-            0x78d710a70b87997dl },
-          0 },
-        /* 61 << 232 */
-        { { 0xcbb96ecb4e263f81l,0x093e0d1509084351l,0x7af3232629220a81l,
-            0xd721b415c60f36dcl },
-          { 0xe3340a87fe9387a1l,0x6088bf482ff2b126l,0xd31028f1d2bc982cl,
-            0x9794e106630d52cbl },
-          0 },
-        /* 63 << 232 */
-        { { 0x1dac76780b11e972l,0x46e814c62698dafel,0x553f7370c37640d6l,
-            0xdcf588cc51cede93l },
-          { 0x4d6b56d3c3f6215bl,0x07edc6621b8f8f03l,0xdfef9d60b9a5dfbcl,
-            0x377edf4d10af7a5bl },
-          0 },
-        /* 64 << 232 */
-        { { 0x8928e99aeeaf8c49l,0xee7aa73d6e24d728l,0x4c5007c2e72b156cl,
-            0x5fcf57c5ed408a1dl },
-          { 0x9f719e39b6057604l,0x7d343c01c2868bbfl,0x2cca254b7e103e2dl,
-            0xe6eb38a9f131bea2l },
-          0 },
-        /* 65 << 232 */
-        { { 0x26ae28bede7a4b7el,0xd2f07569d2664163l,0x798690d4ff69266al,
-            0x77093d356ef3695dl },
-          { 0xaca9903d567dd3dfl,0x259c59a3a274c67bl,0x9f34bc0bfc1198b0l,
-            0x51a7726290b1521cl },
-          0 },
-        /* 71 << 232 */
-        { { 0xa20644bc80ca5391l,0xf9cdb4f7e5b36ea3l,0xe7936c0641426e22l,
-            0x39bc23033eef8a52l },
-          { 0x31253f43e5d8f896l,0xb0e5a588dc3df499l,0x1d03519a2d7e66d5l,
-            0x923de91f6d7da5e3l },
-          0 },
-        /* 77 << 232 */
-        { { 0x17a833ffedf861e4l,0x0ee3d0af4ebec965l,0xd0fac1c1ea66870el,
-            0x325756d0ae810cf4l },
-          { 0x4ed78d2c78e9a415l,0x6cc65685192046e4l,0x03e4243d8498a91el,
-            0x56a02dd25ab97794l },
-          0 },
-        /* 83 << 232 */
-        { { 0xc2fd373748e2b156l,0x259e9a98139645bel,0xe90106fb9877b4f1l,
-            0x49e5bac5889ce002l },
-          { 0x936a7dd18cf14e0bl,0x70bf6d304e3a8a01l,0x99d3e8bfeb748b62l,
-            0xa52a27c99b31c55cl },
-          0 },
-        /* 89 << 232 */
-        { { 0x9db1d41d300637d5l,0xe38744397c2dd836l,0x36179baf0d04ceb3l,
-            0xe9ccd17b251b3f2dl },
-          { 0xd8228073442b6d1dl,0x59a038363eed2971l,0xb443732046979f5cl,
-            0x54ad4113ae63937cl },
-          0 },
-        /* 95 << 232 */
-        { { 0x092c34e6d9246e9fl,0xb4b3b63d3eeb18a7l,0x8b3778beed9d1383l,
-            0xe4cb7be9d70d5d80l },
-          { 0xcff12e9b3d059203l,0x277af117ba86699fl,0x9bd4e8e363603585l,
-            0x0750b0f28e89c8d5l },
-          0 },
-        /* 101 << 232 */
-        { { 0x38b77e5958f7187bl,0x31c7068de0cb618el,0xa0f8e0d6c11ebe62l,
-            0x07adc8010473d7ebl },
-          { 0x36161a2c5c3e9510l,0xb2ec90d64ad04815l,0x01e2dd1f917d8166l,
-            0x549bcbdd6aa0f794l },
-          0 },
-        /* 107 << 232 */
-        { { 0x4ab27c3a8e4e45e5l,0xf6bd9d82f2bb99e7l,0xcab48c735e9da59fl,
-            0xdeb09eb2b9727353l },
-          { 0xc4a7954bafb8fa3el,0x34af2a49abf6803dl,0xc1ee1416d63e13bbl,
-            0xd49bf42d7a949193l },
-          0 },
-        /* 113 << 232 */
-        { { 0x504823ea9c9c07c6l,0x9dbec902bee2288cl,0x018d7875f0ceb6bbl,
-            0x678b997304f7022cl },
-          { 0x74d658238c5fb369l,0x7d4e1f114ca89ee8l,0x148316399905abc0l,
-            0xc107324e2c4deff4l },
-          0 },
-        /* 116 << 232 */
-        { { 0x1bc4fa8bdadc4404l,0x0edb9534daa12ee3l,0x084481b6a5f7289cl,
-            0x7f42461d9d8fb3d2l },
-          { 0xf93f1d3212293c70l,0xc14706596bb73ea3l,0xf80834afde339cadl,
-            0x99dcfc0081f22953l },
-          0 },
-        /* 119 << 232 */
-        { { 0x497e544f9fca737el,0x7f6342210e91e1afl,0x638e500c78d7b20bl,
-            0xb1ffed3f7ebaa947l },
-          { 0x751aa54871086f83l,0x8100bb703cf97848l,0xc32f91ace19ad68fl,
-            0x7dffb6851fb9157el },
-          0 },
-        /* 125 << 232 */
-        { { 0x5108589778e25060l,0x33e3cb7316cfe6cbl,0x0884cb8d410c0822l,
-            0xaa806ecc0be3fc94l },
-          { 0x9f9121f5f692353el,0xb9ab0310f8ee3349l,0x390032ce2561973el,
-            0xc07b6c6c8856b766l },
-          0 },
-    },
-    {
-        /* 0 << 240 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 240 */
-        { { 0x1083e2ea1f095615l,0x0a28ad7714e68c33l,0x6bfc02523d8818bel,
-            0xb585113af35850cdl },
-          { 0x7d935f0b30df8aa1l,0xaddda07c4ab7e3acl,0x92c34299552f00cbl,
-            0xc33ed1de2909df6cl },
-          0 },
-        /* 3 << 240 */
-        { { 0xabe7905a83cdd60el,0x50602fb5a1170184l,0x689886cdb023642al,
-            0xd568d090a6e1fb00l },
-          { 0x5b1922c70259217fl,0x93831cd9c43141e4l,0xdfca35870c95f86el,
-            0xdec2057a568ae828l },
-          0 },
-        /* 4 << 240 */
-        { { 0x568f8925913cc16dl,0x18bc5b6de1a26f5al,0xdfa413bef5f499ael,
-            0xf8835decc3f0ae84l },
-          { 0xb6e60bd865a40ab0l,0x65596439194b377el,0xbcd8562592084a69l,
-            0x5ce433b94f23ede0l },
-          0 },
-        /* 5 << 240 */
-        { { 0x860d523d42e06189l,0xbf0779414e3aff13l,0x0b616dcac1b20650l,
-            0xe66dd6d12131300dl },
-          { 0xd4a0fd67ff99abdel,0xc9903550c7aac50dl,0x022ecf8b7c46b2d7l,
-            0x3333b1e83abf92afl },
-          0 },
-        /* 7 << 240 */
-        { { 0xefecdef7be42a582l,0xd3fc608065046be6l,0xc9af13c809e8dba9l,
-            0x1e6c9847641491ffl },
-          { 0x3b574925d30c31f7l,0xb7eb72baac2a2122l,0x776a0dacef0859e7l,
-            0x06fec31421900942l },
-          0 },
-        /* 9 << 240 */
-        { { 0x7ec62fbbf4737f21l,0xd8dba5ab6209f5acl,0x24b5d7a9a5f9adbel,
-            0x707d28f7a61dc768l },
-          { 0x7711460bcaa999eal,0xba7b174d1c92e4ccl,0x3c4bab6618d4bf2dl,
-            0xb8f0c980eb8bd279l },
-          0 },
-        /* 10 << 240 */
-        { { 0x9d658932790691bfl,0xed61058906b736ael,0x712c2f04c0d63b6el,
-            0x5cf06fd5c63d488fl },
-          { 0x97363facd9588e41l,0x1f9bf7622b93257el,0xa9d1ffc4667acacel,
-            0x1cf4a1aa0a061ecfl },
-          0 },
-        /* 11 << 240 */
-        { { 0x28d675b2c0519a23l,0x9ebf94fe4f6952e3l,0xf28bb767a2294a8al,
-            0x85512b4dfe0af3f5l },
-          { 0x18958ba899b16a0dl,0x95c2430cba7548a7l,0xb30d1b10a16be615l,
-            0xe3ebbb9785bfb74cl },
-          0 },
-        /* 13 << 240 */
-        { { 0x81eeb865d2fdca23l,0x5a15ee08cc8ef895l,0x768fa10a01905614l,
-            0xeff5b8ef880ee19bl },
-          { 0xf0c0cabbcb1c8a0el,0x2e1ee9cdb8c838f9l,0x0587d8b88a4a14c0l,
-            0xf6f278962ff698e5l },
-          0 },
-        /* 15 << 240 */
-        { { 0x9c4b646e9e2fce99l,0x68a210811e80857fl,0x06d54e443643b52al,
-            0xde8d6d630d8eb843l },
-          { 0x7032156342146a0al,0x8ba826f25eaa3622l,0x227a58bd86138787l,
-            0x43b6c03c10281d37l },
-          0 },
-        /* 16 << 240 */
-        { { 0x02b37a952f41deffl,0x0e44a59ae63b89b7l,0x673257dc143ff951l,
-            0x19c02205d752baf4l },
-          { 0x46c23069c4b7d692l,0x2e6392c3fd1502acl,0x6057b1a21b220846l,
-            0xe51ff9460c1b5b63l },
-          0 },
-        /* 17 << 240 */
-        { { 0x7aca2632f02fc0f0l,0xb92b337dc7f01c86l,0x624bc4bf5afbdc7dl,
-            0x812b07bc4de21a5el },
-          { 0x29d137240b2090ccl,0x0403c5095a1b2132l,0x1dca34d50e35e015l,
-            0xf085ed7d3bbbb66fl },
-          0 },
-        /* 19 << 240 */
-        { { 0xc27b98f9f781e865l,0x51e1f692994e1345l,0x0807d516e19361eel,
-            0x13885ceffb998aefl },
-          { 0xd223d5e92f0f8a17l,0x48672010e8d20280l,0x6f02fd60237eac98l,
-            0xcc51bfad9ada7ee7l },
-          0 },
-        /* 21 << 240 */
-        { { 0x2756bcdd1e09701dl,0x94e31db990d45c80l,0xb9e856a98566e584l,
-            0x4f87d9deab10e3f3l },
-          { 0x166ecb373ded9cb2l,0xfd14c7073f653d3el,0x105d049b92aec425l,
-            0x7f657e4909a42e11l },
-          0 },
-        /* 23 << 240 */
-        { { 0xea6490076a159594l,0x3e424d6b1f97ce52l,0xac6df30a185e8ccbl,
-            0xad56ec80517747bfl },
-          { 0xf0935ccf4391fe93l,0x866b260f03811d40l,0x792047b99f7b9abel,
-            0xb1600bc88ee42d84l },
-          0 },
-        /* 25 << 240 */
-        { { 0x2d97b3db7768a85fl,0x2b78f6334287e038l,0x86c947676f892bb1l,
-            0x920bfb1ac0a9c200l },
-          { 0x4292f6ec332041b2l,0xa30bb937c9989d54l,0x39f941ebc6d5879el,
-            0x76a450fcdfdbb187l },
-          0 },
-        /* 27 << 240 */
-        { { 0x31256089ee430db6l,0xaece9bd8f6836f56l,0x484cfc4bfb85a046l,
-            0xee1e3e2c1599b2b9l },
-          { 0x7e3c38903d122eafl,0xaa940ce0c770556cl,0x4802d6631b08fae8l,
-            0xb08a85807f69f8bal },
-          0 },
-        /* 28 << 240 */
-        { { 0x70ed0a0405411eael,0x60deb08f16494c66l,0x8cf20fc6133797bbl,
-            0x3e30f4f50c6bc310l },
-          { 0x1a677c29749c46c7l,0xfe1d93f4f11e981cl,0x937303d82e3e688bl,
-            0x01aef5a7a6aa9e85l },
-          0 },
-        /* 29 << 240 */
-        { { 0x4902f495b959b920l,0x13b0fdbdfca2d885l,0x41cbd9e7b6a2f0fal,
-            0xf9bdf11056430b87l },
-          { 0xd705a223954d19b9l,0x74d0fc5c972a4fdel,0xcbcbfed6912977eal,
-            0x870611fdcc59a5afl },
-          0 },
-        /* 31 << 240 */
-        { { 0xf4f19bd04089236al,0x3b206c12313d0e0bl,0x73e70df303feaeb2l,
-            0x09dba0eb9bd1efe0l },
-          { 0x4c7fd532fc4e5305l,0xd792ffede93d787al,0xc72dc4e2e4245010l,
-            0xe7e0d47d0466bbbdl },
-          0 },
-        /* 33 << 240 */
-        { { 0x549c861983e4f8bbl,0xf70133fbd8e06829l,0xc962b8e28c64e849l,
-            0xad87f5b1901e4c25l },
-          { 0xd005bde568a1cab5l,0x6a591acf0d2a95bal,0x728f14ce30ebcae4l,
-            0x303cec99a3459b0fl },
-          0 },
-        /* 34 << 240 */
-        { { 0x62e62f258350e6bcl,0x5a5ea94d96adba1fl,0x36c2a2844a23c7b3l,
-            0x32f50a72992f5c8bl },
-          { 0x55d685204136c6afl,0x1aafd32992794f20l,0x69f5d820b59aa9bfl,
-            0x218966a8570e209al },
-          0 },
-        /* 35 << 240 */
-        { { 0xf3204feb2f9a31fcl,0x77f33a360429f463l,0xfb9f3a5a59a1d6a7l,
-            0x4445a2e93b1a78e0l },
-          { 0xc77a9b6fd58e32d3l,0xa44e23c8302e6390l,0x7d8e00b4c0f7bcb0l,
-            0xd2e2237b0ffa46f4l },
-          0 },
-        /* 36 << 240 */
-        { { 0xb3046cb13c8ea6d3l,0xf0151b5efce2f445l,0xa968e60b55e5715el,
-            0x39e52662587dce61l },
-          { 0xfde176e0b7de2862l,0x298d83e68e8db497l,0x1042136773641bfbl,
-            0xd72ac78d36e0bb0dl },
-          0 },
-        /* 37 << 240 */
-        { { 0x2cabb94fff6b8340l,0xf425a35a21771acbl,0x564fec3d12c4a758l,
-            0x57a61af39ba8f281l },
-          { 0x5807e78c97e9a71dl,0x991d9be75b8314e6l,0x1cd90b16ec4133b9l,
-            0xff043efa0f1ac621l },
-          0 },
-        /* 39 << 240 */
-        { { 0xea6e5527d7e58321l,0xfb95c13c04056ff1l,0x9447361f2fc4e732l,
-            0x63cbc655786d0154l },
-          { 0x302c0d668610fb71l,0xbf692d6920d06613l,0x8465b74b4be8355al,
-            0xcc883c95c31356b7l },
-          0 },
-        /* 40 << 240 */
-        { { 0x4ab6e919b33eabcal,0xb58f0998a1acacbfl,0xa747e5782ddbc28fl,
-            0xf9dd04ca59866cbcl },
-          { 0x084c062ff7a0073fl,0x6d22acdfb577fc38l,0x0870ee08eacd907cl,
-            0x710b4b266c9fcf95l },
-          0 },
-        /* 41 << 240 */
-        { { 0xa99546faf1c835a7l,0x1514a5a30d59f933l,0x1f6ad0f81bedd730l,
-            0x24de76287b528aaal },
-          { 0x4d9e7845c02fff87l,0xba74f8a942c79e67l,0x5bf5015f476e285bl,
-            0x0b1a5d8b1b93b364l },
-          0 },
-        /* 43 << 240 */
-        { { 0x8c7c0d7ff839819fl,0xc82b819827a95965l,0xce7294d377270519l,
-            0xfb508d6cad47aff7l },
-          { 0xf6de15431035076al,0x697d60ac5dd465c6l,0x88d771b8a76dcd26l,
-            0x8c7ce11ab10c9c44l },
-          0 },
-        /* 44 << 240 */
-        { { 0x215ea44a08216060l,0xccfa18a187996cf6l,0xccfb2483f7eccdd2l,
-            0x07aa601ad453c66al },
-          { 0xd43cf263cffee9e2l,0x230bc099718f69bfl,0xc43de21300c193e8l,
-            0x94cf251799c8746fl },
-          0 },
-        /* 45 << 240 */
-        { { 0x4785d7f87d1320c5l,0x84bed8c3d0771dcbl,0xff28044d22254edbl,
-            0x2e5992a445f71504l },
-          { 0xcb92695b72bbf5cdl,0x9bcbde35c42422e5l,0x856594fd1d07ed86l,
-            0x3aaf0b717716b4ffl },
-          0 },
-        /* 46 << 240 */
-        { { 0x3edf24f9eebed405l,0x9e3141360eccb503l,0xf7704c25b85c2bc2l,
-            0x4cb7c1de9a3247eel },
-          { 0x798ac8f2f0b507c5l,0x6e6217206851bbf1l,0xc0b89398c0d9ed16l,
-            0xf7d5d2a09f20728fl },
-          0 },
-        /* 47 << 240 */
-        { { 0x7358a94a19f0ededl,0x5e08c4c3e32ccfbbl,0x84a8eeeb0089f071l,
-            0xdaf0514c41fc436el },
-          { 0x30fe216f310309afl,0xe72f77bd564e6fc9l,0xe7ef3bddfdc59fd5l,
-            0xd199b1c9a8e1169cl },
-          0 },
-        /* 48 << 240 */
-        { { 0xb9dc857c5b0f7bd4l,0x6990c2c9108ea1cdl,0x84730b83b984c7a9l,
-            0x552723d2eab18a78l },
-          { 0x9752c2e2919ba0f9l,0x075a3bd94bf40890l,0x71e52a04a6d98212l,
-            0x3fb6607a9f18a4c8l },
-          0 },
-        /* 49 << 240 */
-        { { 0xa0305d01e8c3214dl,0x025b3cae8d51cea3l,0xeeaf7ab239923274l,
-            0x51179407c876b72cl },
-          { 0xcf0241c7d4549a68l,0xffae7f4c793dab3dl,0xdfb5917b4bdf2280l,
-            0xcf25c870a652e391l },
-          0 },
-        /* 51 << 240 */
-        { { 0xb1345466b922e1c8l,0xae42f46ab5bf8a34l,0x1e1ab6053310e604l,
-            0x64093cd9b4d7a658l },
-          { 0x5d3b385ab3d9242cl,0x2225b99ae56f8ec7l,0x19a8cbfc9a916e11l,
-            0x11c5df831f957c03l },
-          0 },
-        /* 52 << 240 */
-        { { 0x09f1d04af381147bl,0x7be13628b26b345fl,0xd8371966d1c60b78l,
-            0xf1743c2c5d91808fl },
-          { 0x8a2966acafc71cc3l,0x0ba9702efdfc24c3l,0x60c80158e6fbb539l,
-            0x58eaee49812c32f4l },
-          0 },
-        /* 53 << 240 */
-        { { 0x31af7f5ee89d0b84l,0xa776dada6caa110bl,0xd67b7891df6d54ddl,
-            0x831613cab82b8a5cl },
-          { 0x7a4eb86ef020af6dl,0x2914fd11bd795a7bl,0xc038a273fcb54a17l,
-            0x6b2dc8e18219cc75l },
-          0 },
-        /* 55 << 240 */
-        { { 0x031fc875464ba9b5l,0xe268cf45bd812dd3l,0x443f57defbfb664al,
-            0xfd1a38544e28c2fal },
-          { 0xb8799782cb96515bl,0xa12d3e3f1138c95dl,0x0cc5ee117748ee57l,
-            0x6ab167cf955a7dfcl },
-          0 },
-        /* 57 << 240 */
-        { { 0x0d54aaca4dc1c74fl,0x74af1807bf2e0d61l,0x151254f87aebe0f1l,
-            0x4072f38bf6376095l },
-          { 0x31ebe17a26646abfl,0xdc8cb6b40ecc1282l,0x4f6326bbbc095a66l,
-            0x37dad65a0363636dl },
-          0 },
-        /* 59 << 240 */
-        { { 0xc851860a70f8c15al,0xb2d4555488368381l,0xbfd46e197019c7b6l,
-            0xa1a9b12f6bb6f33bl },
-          { 0xecfd5fe6f170c82bl,0x6d58bb52d601afc3l,0xb8b3de15fe6eb102l,
-            0xad07336886a47964l },
-          0 },
-        /* 60 << 240 */
-        { { 0x89f514c91911840fl,0xc9fa6b504cc106bcl,0x70a97f0dfe55b4f1l,
-            0xada6306be5888609l },
-          { 0xa9437881c6dc8d15l,0x0fc0f5368411f3dfl,0xd26162087a913dd2l,
-            0x4fe1c7c4e92848cdl },
-          0 },
-        /* 61 << 240 */
-        { { 0xaa18eb262e07383dl,0xb948c35c34e90f3dl,0x95e97f81d3653565l,
-            0x4a821a2687b5b75dl },
-          { 0x87b4d81c892db882l,0xa69e65d689f3bfadl,0xe475f532eb371cacl,
-            0xd8cc23fa17194d5dl },
-          0 },
-        /* 63 << 240 */
-        { { 0x3fc0052ad789d484l,0xe8c67aac29324323l,0x133fd07cf54c43d3l,
-            0xd4a0848fb91d4faal },
-          { 0xf683ce065ea5098fl,0xe84348f9887c8a76l,0x38f8c2cf79b224b6l,
-            0x327e4c534a818cb1l },
-          0 },
-        /* 64 << 240 */
-        { { 0xb6d92a7f3e5f9f11l,0x9afe153ad6cb3b8el,0x4d1a6dd7ddf800bdl,
-            0xf6c13cc0caf17e19l },
-          { 0x15f6c58e325fc3eel,0x71095400a31dc3b2l,0x168e7c07afa3d3e7l,
-            0x3f8417a194c7ae2dl },
-          0 },
-        /* 65 << 240 */
-        { { 0x0c9e9237d5f812bcl,0xdae5b7e9595f02e5l,0x5ec1dece42b1e9a8l,
-            0x506a6ef8e527a685l },
-          { 0xe3049290236af251l,0x6322dd1bf81970acl,0x1459d39c516d5e61l,
-            0x672f502d9455b694l },
-          0 },
-        /* 71 << 240 */
-        { { 0xf83788e06b228af2l,0xaafc823911f596fal,0x6d47fa592f0fcb13l,
-            0x0b7af65f1c99c5d4l },
-          { 0xbc4c185dca961e6fl,0xec02b09f158481a4l,0x4bbfd9f31423fdd4l,
-            0x0ff44a53b619644bl },
-          0 },
-        /* 77 << 240 */
-        { { 0x23e255a3ea3f59d8l,0x1f4a47a8261ac30bl,0x346bf409c8faf0b3l,
-            0xd13e73fbc03a226bl },
-          { 0x670ddc792fe8a79bl,0x335fa172f1aac412l,0xe2347de1a5ceff20l,
-            0x66e02c73381130f2l },
-          0 },
-        /* 83 << 240 */
-        { { 0xa6b874c51db717cdl,0x027d318ab00f160bl,0x578f89f49be791afl,
-            0x659ef2f01f3b5e9bl },
-          { 0xa0c593033835d84cl,0xb71e261fdb6f9a60l,0x65837c7f44b7813fl,
-            0xea776163ea4bcc96l },
-          0 },
-        /* 89 << 240 */
-        { { 0x208234118df3f15fl,0xe0514d4694f341acl,0xdc66282d6486d704l,
-            0xd5fb354ad2548389l },
-          { 0xf3e98d72df273295l,0x27ded7fa50cd09fcl,0x4f486af3c5c1c169l,
-            0xe51044150aa41ba3l },
-          0 },
-        /* 95 << 240 */
-        { { 0x66b14d296fce0aecl,0x35fe5e60c8915ceal,0x06a023b736c5da39l,
-            0x0977c9f0404e932fl },
-          { 0x1dd6f95db54866del,0xe5ec79359387430cl,0x98dee57b5ef42e67l,
-            0x1707f01912ed3ad0l },
-          0 },
-        /* 101 << 240 */
-        { { 0xeb3abdedeec82495l,0x587a696e764a41c7l,0x13fdcce2add1a6a3l,
-            0x299a0d43286b2162l },
-          { 0x2c4e71e18131f1b4l,0x48f0e806ada3d04fl,0x91d2de80c57491b2l,
-            0x1b1266236cc355cbl },
-          0 },
-        /* 107 << 240 */
-        { { 0xdc28afe5a6d44444l,0xb5ad8d3cfe0b947bl,0x50c6126c96ce9fb9l,
-            0x5384a998d1fc7d39l },
-          { 0xa43ff8898788f51cl,0x30359593a6bc7b87l,0x3e1691dccc0d019al,
-            0xda0ef5ad7943abcdl },
-          0 },
-        /* 113 << 240 */
-        { { 0x5bc58b6f020b5cd7l,0x9098e202e103ff4el,0xc1f1a3d9f6fce7c7l,
-            0xf9dc32a856090ccel },
-          { 0x4c7d2520a9cc3b09l,0x98d47b5dd8c4dfcel,0xdcee788297e689b4l,
-            0xe5eec71815f982b9l },
-          0 },
-        /* 116 << 240 */
-        { { 0xff154bb8a1e1538cl,0xb9883276f7dcfae9l,0x1ac0a4d2c1c8cba4l,
-            0x511a54cc76e6b284l },
-          { 0xe2da436f00011f6dl,0x4d357a190f43a8adl,0xf36899c95458655bl,
-            0xe5f75c768d613ed9l },
-          0 },
-        /* 119 << 240 */
-        { { 0x15b4af1d93f12ef8l,0x3f4c5868fd032f88l,0x39f67a08f27d86bdl,
-            0x2f551820da32db6bl },
-          { 0x72fe295ac2c16214l,0x39927c381a2cf9afl,0x8dda23d6b1dc1ae7l,
-            0x1209ff3ed32071d4l },
-          0 },
-        /* 125 << 240 */
-        { { 0x861fdceb9a3c6c6fl,0x76d7a01386778453l,0xbf8d147cd5e422cbl,
-            0xd16f532e51772d19l },
-          { 0x72025ee2570d02cdl,0xe8e7737be80c7664l,0x81b7d56c334a8d8fl,
-            0x42477a0ff1b79308l },
-          0 },
-    },
-    {
-        /* 0 << 248 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 248 */
-        { { 0xf306a3c8ee3c76cbl,0x3cf11623d32a1f6el,0xe6d5ab646863e956l,
-            0x3b8a4cbe5c005c26l },
-          { 0xdcd529a59ce6bb27l,0xc4afaa5204d4b16fl,0xb0624a267923798dl,
-            0x85e56df66b307fabl },
-          0 },
-        /* 3 << 248 */
-        { { 0x896895959884aaf7l,0xb1959be307b348a6l,0x96250e573c147c87l,
-            0xae0efb3add0c61f8l },
-          { 0xed00745eca8c325el,0x3c911696ecff3f70l,0x73acbc65319ad41dl,
-            0x7b01a020f0b1c7efl },
-          0 },
-        /* 4 << 248 */
-        { { 0x9910ba6b23a5d896l,0x1fe19e357fe4364el,0x6e1da8c39a33c677l,
-            0x15b4488b29fd9fd0l },
-          { 0x1f4392541a1f22bfl,0x920a8a70ab8163e8l,0x3fd1b24907e5658el,
-            0xf2c4f79cb6ec839bl },
-          0 },
-        /* 5 << 248 */
-        { { 0x262143b5224c08dcl,0x2bbb09b481b50c91l,0xc16ed709aca8c84fl,
-            0xa6210d9db2850ca8l },
-          { 0x6d8df67a09cb54d6l,0x91eef6e0500919a4l,0x90f613810f132857l,
-            0x9acede47f8d5028bl },
-          0 },
-        /* 7 << 248 */
-        { { 0x45e21446de673629l,0x57f7aa1e703c2d21l,0xa0e99b7f98c868c7l,
-            0x4e42f66d8b641676l },
-          { 0x602884dc91077896l,0xa0d690cfc2c9885bl,0xfeb4da333b9a5187l,
-            0x5f789598153c87eel },
-          0 },
-        /* 9 << 248 */
-        { { 0xb19b1c4fca66eca8l,0xf04a20b55663de54l,0x42a29a33c223b617l,
-            0x86c68d0d44827e11l },
-          { 0x71f90ddeadba1206l,0xeeffb4167a6ceeeal,0x9e302fbac543e8afl,
-            0xcf07f7471aa77b96l },
-          0 },
-        /* 10 << 248 */
-        { { 0xcf57fca29849e95bl,0x96e9793ed510053cl,0x89fa443d07d3e75el,
-            0xfe2bc235e52800a0l },
-          { 0x1c208b8c0ac7e740l,0xb5852a49e7222263l,0x217e4005e541e592l,
-            0xee52747dc960b0e1l },
-          0 },
-        /* 11 << 248 */
-        { { 0x5fd7cafb475952afl,0x23a6d71954a43337l,0xa83a7523b1617941l,
-            0x0b7f35d412b37dd4l },
-          { 0x81ec51292ae27eafl,0x7ca92fb3318169dfl,0xc01bfd6078d0875al,
-            0xcc6074e3c99c436el },
-          0 },
-        /* 13 << 248 */
-        { { 0x4ca6bdebf57912b8l,0x9a17577e98507b5al,0x8ed4ab7759e51dfcl,
-            0x103b7b2a470f5a36l },
-          { 0x0c8545ac12553321l,0xab5861a760482817l,0xf4b5f602b9b856cfl,
-            0x609955787adf2e5fl },
-          0 },
-        /* 15 << 248 */
-        { { 0x60ce25b1ee5cb44fl,0xddcc7d182c2d7598l,0x1765a1b301847b5cl,
-            0xf5d9c3635d0d23b7l },
-          { 0x42ff1ba7928b65d0l,0x587ac69d6148e043l,0x3099be0dd320390bl,
-            0xa7b88dfc4278329fl },
-          0 },
-        /* 16 << 248 */
-        { { 0x80802dc91ec34f9el,0xd8772d3533810603l,0x3f06d66c530cb4f3l,
-            0x7be5ed0dc475c129l },
-          { 0xcb9e3c1931e82b10l,0xc63d2857c9ff6b4cl,0xb92118c692a1b45el,
-            0x0aec44147285bbcal },
-          0 },
-        /* 17 << 248 */
-        { { 0x7685bb9e0ba4e0b7l,0x330a7ebc5e58c29bl,0xbc1d9173e8a3797al,
-            0x7c506a16ea60f86cl },
-          { 0x9defb9248c099445l,0xcf1ddcc0256df210l,0x4844ce293d07e990l,
-            0x92318e37e2628503l },
-          0 },
-        /* 19 << 248 */
-        { { 0x61acd597fdf968d7l,0x7321a8b26598c381l,0xcb86a2809f448a0cl,
-            0x38534a01855df66al },
-          { 0xc119ec141e29037fl,0xe23c20ad0b42ba67l,0xefb1c4e033fb4f22l,
-            0xf088358f445a5032l },
-          0 },
-        /* 21 << 248 */
-        { { 0x2d73f5d1b8475744l,0xcc297e0a9d399b06l,0xa8c61d4038d3df06l,
-            0xacc6e8651a2d27a0l },
-          { 0x63dd6f6230153bf2l,0x6b23ad7bd73b83b7l,0x25382bf767ff7dcdl,
-            0x7e268c8fcf7ce2d1l },
-          0 },
-        /* 23 << 248 */
-        { { 0x4b9161c3cb2ebef1l,0x6009716b669ed801l,0x97c65219aacefe44l,
-            0xde13597d71aae4b5l },
-          { 0x3a077a816141d651l,0xe1b4e80129f876eal,0x729aed6d5c00c96cl,
-            0x0c6f404374cc645el },
-          0 },
-        /* 25 << 248 */
-        { { 0x22c51812df5a66e1l,0x1c8069c9ae7dedeal,0xcff9d86f0eea5180l,
-            0x676dbd6f44235ddal },
-          { 0xa53f01383db1ad42l,0xd079e571bcf19029l,0x1e37b9ecfab0cf82l,
-            0x93ae35ed4844e9c4l },
-          0 },
-        /* 27 << 248 */
-        { { 0xdaee55a543756358l,0x0ace18d41b2d3f89l,0x3391fa36824dd7d4l,
-            0x7b9963d1770e5f3fl },
-          { 0xc1fb9a78c94f724dl,0x94ff86fe76c4da6bl,0xb5d928c64170609bl,
-            0xc9372becfb015a9fl },
-          0 },
-        /* 28 << 248 */
-        { { 0x9c34b650e16e05e9l,0x965a774094e74640l,0xa3fd22fbcea3f029l,
-            0x1eb6a9688f95277cl },
-          { 0x2520a63d7bad84f6l,0xad917201f58f2feel,0xea92c1669b840d48l,
-            0x12109c4aacef5cbdl },
-          0 },
-        /* 29 << 248 */
-        { { 0xd85850d0d407a252l,0x6fa3b14de63909d4l,0x2ff9f6593e0fba69l,
-            0x7f9fd2a2d1b2cd0bl },
-          { 0x611233d745ad896al,0xfe4211648df850f9l,0x7808832399e32983l,
-            0x4b040859dee6741dl },
-          0 },
-        /* 31 << 248 */
-        { { 0x7dd2afd456e1ed5cl,0xd48429ec41ba4992l,0x97a02188968bab27l,
-            0x09ecf813e63c4168l },
-          { 0xf4ac65e77288b10cl,0x10630ab2afac7410l,0x4e3e59c3bb049e56l,
-            0x25972fff40fea0b1l },
-          0 },
-        /* 33 << 248 */
-        { { 0xfd8363da98365c18l,0x8aa57b1a8d47bf91l,0x423dce57695f4dd6l,
-            0xfccf54d4cc17f034l },
-          { 0x8fdba27c3610ea51l,0xcc0a06d654306b06l,0xb97a121c389b9dfdl,
-            0x7dbb90eb1ed0ca42l },
-          0 },
-        /* 34 << 248 */
-        { { 0xd32d7cec0094e84cl,0x862ae25e2ece8f72l,0x8644ef1cdfceb8abl,
-            0x68a9969c8e225628l },
-          { 0xdf209e27b3117876l,0x308a6e1882ba242bl,0xcbd09a659bf0cdb6l,
-            0x79f2826cc85b9705l },
-          0 },
-        /* 35 << 248 */
-        { { 0x3b36b6bf8f011496l,0xea6acc1a9bcf6ef8l,0x6db132263b101f12l,
-            0x4fc4e35e3b7585c3l },
-          { 0x641de27556eb64c6l,0x9b2834d3f3b08519l,0xebb76a2ba1f44b40l,
-            0x1b545ccd3cd31677l },
-          0 },
-        /* 36 << 248 */
-        { { 0xab293027aad991c1l,0x598d0bf8849be4b7l,0x8c94a21ab972da90l,
-            0xada4cfdd7ecfa840l },
-          { 0x93d4b9c0fbcec63al,0x7ca617a203219a34l,0x900424eb6a652a55l,
-            0xaf9346e9eb8562e0l },
-          0 },
-        /* 37 << 248 */
-        { { 0x9681a73d2d8bc904l,0x8b5f9b317b1553bel,0xfb03b874f6bc852fl,
-            0x8e658fb8cbbec8b0l },
-          { 0x9b2ff17bb9e9f9d1l,0xf46e9bf3e8679854l,0x7fbb1323618ed3aal,
-            0x064a1c5d714ebc3dl },
-          0 },
-        /* 39 << 248 */
-        { { 0xac0bdfc39f0e69dcl,0x71957386ae12f132l,0xa263ef2e6aa90b5bl,
-            0xa94b152390d42976l },
-          { 0xfb2d17741bcdbf7bl,0xba77b77c3a04f72fl,0xa6818ed8ec3e25a1l,
-            0x2e0e01743733e251l },
-          0 },
-        /* 40 << 248 */
-        { { 0xc3e04d7902381461l,0xb1643ab5911bc478l,0xc92becfa390b3ef2l,
-            0x54476778acd2f1b6l },
-          { 0x8daa0c4d66bf3aafl,0x2bc1287b2c21c65al,0xee182910b5a13ac3l,
-            0xbb04730090b0790al },
-          0 },
-        /* 41 << 248 */
-        { { 0x8bdd6f35a8540489l,0x788c03e5ee390d4el,0x203323c18f653017l,
-            0x39953308c4bc0094l },
-          { 0x6ee0857118308d0bl,0x70e9f90b450b0002l,0x191662aa8139f145l,
-            0xd7c5415b62d71124l },
-          0 },
-        /* 43 << 248 */
-        { { 0x41b37d72b927231cl,0xca17b5429e4de13al,0x7bc03469cded2ce3l,
-            0x961b0ecb4f4560f9l },
-          { 0x7c5bd41b43d31fa1l,0x3ed047f643f44dc3l,0x5b02083efe1a4d14l,
-            0xcc2c66ac18b330bcl },
-          0 },
-        /* 44 << 248 */
-        { { 0x83766947d17d4e0bl,0xc5772beefdc3a47bl,0x765a50db1a6fd0ffl,
-            0x17f904ba45b0995el },
-          { 0xcee643832883487el,0xf56db7f3c270aaedl,0x6738d94f46cb1fd9l,
-            0xc8fa426a142fd4d5l },
-          0 },
-        /* 45 << 248 */
-        { { 0xc85bef5b5a78efcel,0xaf380c6b0580e41el,0x6c093256a43b8d9bl,
-            0xed9d07bbea670933l },
-          { 0xfdb9a295f1682c6el,0x4cc29a63532b6bb7l,0x21a918f9f8e42dd0l,
-            0x9ac935ce0edacca0l },
-          0 },
-        /* 46 << 248 */
-        { { 0xbfe48a8ff43daf9dl,0xd7799b31b313c052l,0x46d480d77119c60el,
-            0x5090d91f0b80bcb9l },
-          { 0xc94c4c1e873bd7bfl,0x16e69b4f9915aa0al,0x769be02bb1d5928cl,
-            0x3fdaf62162e1d85al },
-          0 },
-        /* 47 << 248 */
-        { { 0x03497a57371c1b5cl,0x11e4c0b3552ab6abl,0xf857061f0a169ee7l,
-            0xc21c6c43e6d1bc66l },
-          { 0x706283a82832be7al,0xd35b143299aba62cl,0x7f4da83de9aef62dl,
-            0x2b7e5fc8723fa4e5l },
-          0 },
-        /* 48 << 248 */
-        { { 0xae485bb72b724759l,0x945353e1b2d4c63al,0x82159d07de7d6f2cl,
-            0x389caef34ec5b109l },
-          { 0x4a8ebb53db65ef14l,0x2dc2cb7edd99de43l,0x816fa3ed83f2405fl,
-            0x73429bb9c14208a3l },
-          0 },
-        /* 49 << 248 */
-        { { 0xc086e737eb4cfa54l,0x9400e1ad3c44aad9l,0x210bba94336959b4l,
-            0x08621a809106f0cal },
-          { 0x2ae66096c510ee9cl,0x2ba21617fc76a895l,0xc0707f8b0c186f1el,
-            0x1fe170a3ed0bfe25l },
-          0 },
-        /* 51 << 248 */
-        { { 0x3780fe2084759c5cl,0x716ec626b7050aa7l,0x6a43fb8b84b63bd1l,
-            0xb01098a039bc449fl },
-          { 0x96b3ff8ebb7daa4dl,0x2d146882654a7f01l,0x2500f701dcae6143l,
-            0xc13d51d01626fd3bl },
-          0 },
-        /* 52 << 248 */
-        { { 0x08ed8febd56daf06l,0x8d98277b4a837f69l,0x9947c636a9b6e05al,
-            0x58c8a77ac0d58abdl },
-          { 0xf45496a45f121e4fl,0x16cd67c71076d3d3l,0xecbd1958e3fb0c5dl,
-            0xfbe185ec38e1eb47l },
-          0 },
-        /* 53 << 248 */
-        { { 0x65b067eb740216e3l,0x1e19a71479db8760l,0x8d30dca18878de5al,
-            0x627d03e8aa47c005l },
-          { 0x096d58c0d2536c96l,0x232e6a4d69b12c2al,0x850eb8c0e7044bcel,
-            0xd9cf923bef2ee9a1l },
-          0 },
-        /* 55 << 248 */
-        { { 0x8b301094c8eaee90l,0x9a96950b8330928fl,0x472ba105faccc3bal,
-            0x00f8620e9153172al },
-          { 0x019b8164303fcdf5l,0x614d5c3c41fb4c73l,0x632d98f2c5992f89l,
-            0xfbeb29d790e2dea5l },
-          0 },
-        /* 57 << 248 */
-        { { 0xefd48b577f91d6e0l,0x8575605595bcf5d4l,0x7677b4a7bb9d891bl,
-            0xdc9931e9685912c9l },
-          { 0x69bca306f31a07c8l,0x3dd729534962a7f0l,0xdcea49cc9d366c2al,
-            0xce664ba7dc79a57dl },
-          0 },
-        /* 59 << 248 */
-        { { 0x7842d547013ec3b5l,0xa2785ceb433cf990l,0x9d667e5f700ab14al,
-            0x4b46f362a0f46d55l },
-          { 0x152c0e80cc7a3487l,0x7f3a88cef86f5e68l,0x6f950a73f1b2a75fl,
-            0x9be5b1aa51d24f3bl },
-          0 },
-        /* 60 << 248 */
-        { { 0xaea68626dc4ad4f4l,0x5dc516824ddbc0b6l,0xa76697bd602e9065l,
-            0xbeeb3ea58c37888el },
-          { 0x1ec4a2f214569113l,0xe48b820ca35f4484l,0x9fb560949ae44df2l,
-            0x6ca1346292cc09fdl },
-          0 },
-        /* 61 << 248 */
-        { { 0x887e0b87bcdc3a36l,0x6b0d617d503dee65l,0x96bda1f6cebcb893l,
-            0xdc0dd17341e20b3el },
-          { 0x812fbacfa6657c11l,0x32492fcbc94a6f4bl,0x854a0bcb6a772123l,
-            0x1ed573f65d463f31l },
-          0 },
-        /* 63 << 248 */
-        { { 0x22c7ef7bd022cc4dl,0xeec383d61e63b4bcl,0x52e0aaa06502b46fl,
-            0x9224187ded5e41bfl },
-          { 0x3a01f53dd26faf1cl,0x9bc4ee2e4e591d10l,0x10b7a98eea7e4c88l,
-            0xe521c150e2c1beccl },
-          0 },
-        /* 64 << 248 */
-        { { 0xb618d590b01e6e27l,0x047e2ccde180b2dcl,0xd1b299b504aea4a9l,
-            0x412c9e1e9fa403a4l },
-          { 0x88d28a3679407552l,0x49c50136f332b8e3l,0x3a1b6fcce668de19l,
-            0x178851bc75122b97l },
-          0 },
-        /* 65 << 248 */
-        { { 0x26f9b9322ed53a71l,0x0bac7348c72ef2e0l,0x7e96001da5c6faf1l,
-            0x5d43f76dea00eb2dl },
-          { 0x1327370f44f1c478l,0x1c83a9ac6bb964c8l,0xa3a9769f76ffbd25l,
-            0xdf045fb6b04f1bddl },
-          0 },
-        /* 71 << 248 */
-        { { 0x4283898d556b975el,0x6e2301ffe3880361l,0xc6d3b2bbe9198077l,
-            0xc4799578d21cac02l },
-          { 0x11448ff8f784eb7cl,0xb775973fbb81898dl,0x4e51f061519c76b9l,
-            0xaba1f3ef3cad0393l },
-          0 },
-        /* 77 << 248 */
-        { { 0x59d60c1c9b339830l,0x5af60a44ac32746dl,0x5ac006bc9dea8d80l,
-            0x4a2a56d97f2b1180l },
-          { 0x2032845a46946fc4l,0xe25b911226a3b503l,0xfed89db9a28827d3l,
-            0xdd2d7e90c6b74593l },
-          0 },
-        /* 83 << 248 */
-        { { 0x9b047a26cda38ecfl,0x6889284f5f6cb442l,0x4d128bcb14753820l,
-            0x8f9937c160eedd78l },
-          { 0xe333bad751ab9127l,0xd31b01c67ace3b19l,0x0732de39d7c0b4bel,
-            0xad04fa4c649e2b9bl },
-          0 },
-        /* 89 << 248 */
-        { { 0x02e042689d1495bal,0x95dca5a85591b5f8l,0xb10488d856f46c71l,
-            0x97829baf3590000al },
-          { 0xaeda5cb378c9e78al,0x3615873a7ba1c71cl,0x7c9f9f4d4333aa12l,
-            0x893fab42cea8e6d3l },
-          0 },
-        /* 95 << 248 */
-        { { 0x9eb09fff69aaa09fl,0xf36678a926731322l,0x8be61ee1cafcabafl,
-            0x77a172f558ddb763l },
-          { 0x7e09dfc66471130el,0x7f8909791039771el,0x0e44071d37800b9bl,
-            0x09123d27fe762d10l },
-          0 },
-        /* 101 << 248 */
-        { { 0xffd455a7a1b7fdd6l,0xb6162cb4dabdffael,0xf859519ec89c0e56l,
-            0x07406c1b421f2846l },
-          { 0x42db24ed9e96ddbbl,0x03bcae092dc5da85l,0x75099cd217aa7493l,
-            0x8cd1aa4266b8740dl },
-          0 },
-        /* 107 << 248 */
-        { { 0xe94333d5dde7fec3l,0x894fd673745a9be3l,0xaf3d97c725683748l,
-            0xeaa469a2c9ec165fl },
-          { 0xc9a18decdc7abd3bl,0xf059008082717b02l,0x9816374a4fdf4300l,
-            0x449d3eb74fb5a6cel },
-          0 },
-        /* 113 << 248 */
-        { { 0x7fc983ebd28001a6l,0xeabf5276dae74b6bl,0x50adb67d742ed0a5l,
-            0x1d2ad363650e1446l },
-          { 0x5a564253d122f5d0l,0x7e5aefc7e30471del,0xdc64cbb3e5dc2f2cl,
-            0xe645b9fa9437be4el },
-          0 },
-        /* 116 << 248 */
-        { { 0x0f58cec54e27d357l,0x08dcf2b70004539el,0xb1ead64104f96709l,
-            0x350fed185a914c72l },
-          { 0x44f43523c5147854l,0x45f8b46f46d04ac7l,0x62c306869a449d51l,
-            0xaacc0f0d9e66d9a3l },
-          0 },
-        /* 119 << 248 */
-        { { 0x94cb62e5bdd61b63l,0xe6ce5b5104a0ec57l,0x0461cb95f0bda8a4l,
-            0xca2d6220cbadfe8fl },
-          { 0x6c19bdf03c1ad65el,0x774a49bae04239d5l,0xf78cb7404a2fd59dl,
-            0xaebf90ed66a09130l },
-          0 },
-        /* 125 << 248 */
-        { { 0x10e4074857cc8d54l,0x29985831918e3cf9l,0x3d87def9f2e344eel,
-            0x8899992c68977860l },
-          { 0xbdc8d73b210f3c50l,0x98aa042fa9857f46l,0x76a34daf6c71357fl,
-            0x086289d3200bcb6dl },
-          0 },
-    },
-    {
-        /* 0 << 256 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 256 */
-        { { 0xb4e370af3aeac968l,0xe4f7fee9c4b63266l,0xb4acd4c2e3ac5664l,
-            0xf8910bd2ceb38cbfl },
-          { 0x1c3ae50cc9c0726el,0x15309569d97b40bfl,0x70884b7ffd5a5a1bl,
-            0x3890896aef8314cdl },
-          0 },
-        /* 3 << 256 */
-        { { 0x996884f5903fa271l,0xe6da0fd2b9da921el,0xa6f2f2695db01e54l,
-            0x1ee3e9bd6876214el },
-          { 0xa26e181ce27a9497l,0x36d254e48e215e04l,0x42f32a6c252cabcal,
-            0x9948148780b57614l },
-          0 },
-        /* 4 << 256 */
-        { { 0xab41b43a43228d83l,0x24ae1c304ad63f99l,0x8e525f1a46a51229l,
-            0x14af860fcd26d2b4l },
-          { 0xd6baef613f714aa1l,0xf51865adeb78795el,0xd3e21fcee6a9d694l,
-            0x82ceb1dd8a37b527l },
-          0 },
-        /* 5 << 256 */
-        { { 0x4a665bfd2f9fd51al,0x7f2f1fe2481b97f7l,0xcad05d69ad36ce50l,
-            0x314fc2a4844f4dedl },
-          { 0xd5593d8cb55fc5c6l,0xe3510ce8bfb1e23dl,0xf9b7be6937453ccel,
-            0xd3541b7969fae631l },
-          0 },
-        /* 7 << 256 */
-        { { 0x711b8a4176a9f05dl,0x06ca4e4b9011d488l,0x543bc62ba248a65el,
-            0x017535ffc9290894l },
-          { 0x840b84ce406851d7l,0xafa3acdf90e960b4l,0xac3394af7128fd34l,
-            0x54eb4d5b2ac0f92cl },
-          0 },
-        /* 9 << 256 */
-        { { 0x3549a0f14df48fecl,0x6ae7b1eec239f83al,0x001dcf253eb90ff3l,
-            0x02ff0f02581e90edl },
-          { 0x72921d8ca103dcefl,0x2c513c3c5876293el,0xc07064ca6b68875el,
-            0x7198d44653b9537cl },
-          0 },
-        /* 10 << 256 */
-        { { 0x58349b77685e089bl,0x1c678441219b7b8cl,0xba8da91f61e2e20dl,
-            0xf9c50b8c309fd4e6l },
-          { 0x99b0164996d0ef64l,0xac334ded60cdb63al,0x6b9ada19fb0bce4fl,
-            0x39dc9375c7896377l },
-          0 },
-        /* 11 << 256 */
-        { { 0x068dda8b7e1bc126l,0x77c7c58176243a21l,0xcc8ba55c875f9dael,
-            0xdde7afe2ce469f95l },
-          { 0xde2a15f5e9523b85l,0x447512c6d85674ael,0x5691f89e12c6c20cl,
-            0xd64ef40e0fae4513l },
-          0 },
-        /* 13 << 256 */
-        { { 0x10db2041c4d9eb40l,0x420eccb724f03f8al,0x64470fd17d29080el,
-            0xf66c5b4416e52414l },
-          { 0xa32cc70e4ca94031l,0xa67931592c8401bal,0x34f2dc29abfcc58dl,
-            0x6f340f9a07325d7dl },
-          0 },
-        /* 15 << 256 */
-        { { 0xf55d446b060a52bbl,0x2f33cb9f02939f24l,0x0f27a01bc8953718l,
-            0x362882917fcd3932l },
-          { 0x7485613488ed4436l,0xcfe69e27195f089el,0xd6ab040a8ff10bd8l,
-            0x9741c5472e4a1623l },
-          0 },
-        /* 16 << 256 */
-        { { 0xc52d8d8b6d55d6a4l,0xc4130fb3be58e8f9l,0x5f55c345e1275627l,
-            0xb881a03c117042d3l },
-          { 0x00a970a53238d301l,0x40d7cf2412a2c4f1l,0xace4a2f5d770ea74l,
-            0x36a2e587e96940b2l },
-          0 },
-        /* 17 << 256 */
-        { { 0x84793d9fef12d4c8l,0x04b89b152d8a163cl,0x0fdb566fb4a87740l,
-            0xf7e6e5cf9e595680l },
-          { 0xbcb973e41c5cd74el,0xafcb439fe4ed49d8l,0xd5c0820aebbae8eel,
-            0x23483d836f56e2a2l },
-          0 },
-        /* 19 << 256 */
-        { { 0x91f9b8be5e8ad115l,0xf1fd6a2e225db496l,0xf362d2cf4a444085l,
-            0x033d9201eea043ebl },
-          { 0x1e50c0989951a150l,0x4814fca5cfcf1f94l,0xaf3e8ef41bf82de5l,
-            0xba0e2991038cff53l },
-          0 },
-        /* 21 << 256 */
-        { { 0x904a41ae5fc373fal,0x235556d61a6a3fc4l,0xe44eb3ea36eeb570l,
-            0xa4e1b34a26ba5ca6l },
-          { 0x210e7c9131180257l,0x2c28669622158b0cl,0xc78b69c783ddd341l,
-            0xfc05941b294e1750l },
-          0 },
-        /* 23 << 256 */
-        { { 0x70666f51fc167dedl,0x47e9e289fe75b8d1l,0x8a5f59739605a03el,
-            0x19876a58dd579094l },
-          { 0x69a5c8cca964e426l,0xed74a652ccf20306l,0x5c93ae3cf06d31d5l,
-            0x51922fa2127a8a12l },
-          0 },
-        /* 25 << 256 */
-        { { 0xa18e26f99e3d509el,0xbc296dd2c10814fal,0x5dadd6eeaa24e147l,
-            0xdba2121a8340f12el },
-          { 0xd348e7f3e245ca21l,0x1e45a42978e3eb5bl,0x252bf89c169677bbl,
-            0xfb33a2564021ac55l },
-          0 },
-        /* 27 << 256 */
-        { { 0x30dc46586e7d72b8l,0x38df46fb0d81c3d6l,0x901bab6e10e84162l,
-            0x25d7303ff7932801l },
-          { 0xe781d5f37500be42l,0x9a7104c3380ff208l,0xfa801181652121a1l,
-            0xef89f4f18d3bed43l },
-          0 },
-        /* 28 << 256 */
-        { { 0xbe4ae5683594917al,0xef7c1c47a04bf81el,0xa1dc3612046d91a0l,
-            0x3eee37affb11b338l },
-          { 0x7e90278fd03d8f51l,0x3045a6da4fa183c6l,0xb39e573391cd16a9l,
-            0xc748a504e54e9411l },
-          0 },
-        /* 29 << 256 */
-        { { 0x07804331a1c6ec56l,0x25358e795b347123l,0x1ab9b39acf9432a4l,
-            0x9628501d0a7881cel },
-          { 0x749d58988a46d98el,0x01ea43346a17c321l,0xe2b197f9b1f9160fl,
-            0x2052c7c07815f2a2l },
-          0 },
-        /* 31 << 256 */
-        { { 0xaa691bfbc57a1a6dl,0x06cae127d737d525l,0x5be04b2f963c7c98l,
-            0x936b1f5bfc00bc4al },
-          { 0x3fed4ac77eda6a34l,0xba6ca7aa2500a438l,0x1e979fa6786c2a75l,
-            0xa3db26bec13f37d4l },
-          0 },
-        /* 33 << 256 */
-        { { 0x20afae333d7006d1l,0xdcbca6fbbda467d1l,0x2714b3827df4006cl,
-            0x9abc0510c8e94549l },
-          { 0x5b30a6d464c14915l,0xba91d0c35752b44fl,0x7ad9b19bbb389f1fl,
-            0xe4c7aa04ef7c6e13l },
-          0 },
-        /* 34 << 256 */
-        { { 0x1e24a3f23d12e2b6l,0xf99df403febd6db3l,0x61e580a6b0c8e12fl,
-            0x819341b7c2bfe085l },
-          { 0xd53002d640828921l,0x31e1eb65cea010efl,0xc48d0cfe85b3279fl,
-            0xb90de69089f35fa5l },
-          0 },
-        /* 35 << 256 */
-        { { 0xa3f6fd3c88ed748fl,0x6d72613af48127b9l,0xe85ed703d1e6f7e5l,
-            0xbb563db449636f40l },
-          { 0x23bae3c9708497bal,0x89dbff163aa65cf4l,0x70861847e6c0850al,
-            0x5ef19d5d48b2e90cl },
-          0 },
-        /* 36 << 256 */
-        { { 0xab6a1e13107f7bacl,0x83a8bc57972091f5l,0x3c65b454f6dcba41l,
-            0xd7606ff96abc431dl },
-          { 0xa3af9c189bd09971l,0x6ddd3bbf276bad63l,0xd2aba9beab4f0816l,
-            0x8f13063c151581edl },
-          0 },
-        /* 37 << 256 */
-        { { 0xf9c02364f5761b15l,0x3cfa250afd478139l,0x67d51e7416e26191l,
-            0x0281bbf65eda396cl },
-          { 0xbd38d4d70d1f4510l,0x2032a930edff593el,0x0ab74a0cf2ea4ad7l,
-            0xb95aa9c3302498d6l },
-          0 },
-        /* 39 << 256 */
-        { { 0x2995495dd7da3c7cl,0x28d579d0a0bb703el,0xabec6afec8288837l,
-            0x93c34dfd05ab989bl },
-          { 0xcc94f05dde5ea3dfl,0xc3e3d4ef90f436e6l,0x32b3dee1cf59dc4el,
-            0x5eab01635447d9d9l },
-          0 },
-        /* 40 << 256 */
-        { { 0xd31c5e8e2c23464el,0x5bcc382f50cfbde7l,0x6cee3d8da93c3d9bl,
-            0xbee2948909ee62acl },
-          { 0x4848d59c10742b84l,0x2486796fe35e9c84l,0x1a1d9570cd8f391al,
-            0x839aa0913eedb743l },
-          0 },
-        /* 41 << 256 */
-        { { 0xae02a7ce0f83f369l,0x3b67c56097994835l,0x715def441ae4bbeal,
-            0x11e764ee59f6b9eel },
-          { 0x70c775051c962c3al,0x42811507d937a258l,0x06dbdceed03e6e86l,
-            0x39a3a7ed48cae79el },
-          0 },
-        /* 43 << 256 */
-        { { 0xa32e729fb220eef8l,0x12d876baf37ac5d7l,0x9376ab45105a7f34l,
-            0xb422331a4deb7275l },
-          { 0x6ea07fb7686dea5el,0xba67ed3e1d8e32c9l,0x5ae52632bbc6bb9cl,
-            0xdca55b86d1397575l },
-          0 },
-        /* 44 << 256 */
-        { { 0xd9183f74378200b1l,0xe5ea1645762f5605l,0x78b42e2f7bd6290fl,
-            0xa0bdfccc07fa0899l },
-          { 0x2f92ea52dacda629l,0x810b4e6c48de27e2l,0x013d8587d9d1250dl,
-            0xc153d519dd5141d5l },
-          0 },
-        /* 45 << 256 */
-        { { 0x8f1f6cb5b8f1d719l,0xa9abc27b04e15a4el,0xc0d944a92ad42296l,
-            0x69ecc877f3d2b0e5l },
-          { 0xec60dbea16a5581al,0x2a0ead5fb85130d6l,0x7b3d2ebb6fddac23l,
-            0x06213269ac448663l },
-          0 },
-        /* 46 << 256 */
-        { { 0xe1074008ac11e180l,0xdff3339c14b8f830l,0x136e22be636504f3l,
-            0xb07ae98aa09c5c4cl },
-          { 0x9b0a0517192168e9l,0x39e09fac86ad0865l,0x24f90705adb08d41l,
-            0x9c699cc759d3be24l },
-          0 },
-        /* 47 << 256 */
-        { { 0xd9e16551907e36b0l,0x57f24b6caf91cb5al,0xbdb7dfdb062edae4l,
-            0x99e3bffe4b85f424l },
-          { 0x250774f4b2961ba7l,0xe7c0f2386d993c51l,0xcd0aae29f559b4bdl,
-            0x3b12893a09a6859bl },
-          0 },
-        /* 48 << 256 */
-        { { 0xac177eb985ae12c3l,0x8e6cb5cc6cf76537l,0x134abb19f265f9e3l,
-            0xc37309b71ba3f55dl },
-          { 0x570833b4392d564bl,0xaa273a27d8c22f00l,0x9ba6b6276006773al,
-            0x2156c94f0a16c092l },
-          0 },
-        /* 49 << 256 */
-        { { 0x2be0436b408e1258l,0xb179a2e34f47f121l,0x140b948fa42d3cfcl,
-            0x96649c6700d2b4e6l },
-          { 0x2bf934c7d08a4b34l,0x371c770136b472ddl,0x36297876e06adc73l,
-            0x59e0d8251c3e6558l },
-          0 },
-        /* 51 << 256 */
-        { { 0x9368cfd304a8bc81l,0x145249d4c49e58c7l,0x8c7ac1891392be01l,
-            0x58cbcb5fbc7b0903l },
-          { 0x502218a1a0377b0al,0x5c17eb8afb625836l,0x845c09ef349f4d26l,
-            0x15fdeb2554ddce85l },
-          0 },
-        /* 52 << 256 */
-        { { 0xf773535a64e8344dl,0xb8486a33d0dbabe6l,0x43c2df99b578862dl,
-            0xcead29a11a39820el },
-          { 0x3e5466fe63134d63l,0xc37ea88fdf43a104l,0x3b34ac34bbaacb5al,
-            0x8281c240bc20be5al },
-          0 },
-        /* 53 << 256 */
-        { { 0x55113d5e0f8dec77l,0xdfe59f251d7e1543l,0x3b2837e0a63a849al,
-            0xdfbdb8b67a5691afl },
-          { 0x8dd6faf0bd4cf444l,0x28b2bdfaab128b6cl,0x44af3ee24b1098ebl,
-            0xbbf328ebe50b2d02l },
-          0 },
-        /* 55 << 256 */
-        { { 0xf231b1f4e4e6151al,0x6ac7130413258c6al,0x6f9cb1c1a09b9f86l,
-            0xbfc9291ee52ed880l },
-          { 0x2a7d8230bea258a2l,0xd52a0da6baf386acl,0x5166764b3af00b7el,
-            0x84792b043c985be2l },
-          0 },
-        /* 57 << 256 */
-        { { 0x914ca588a906d9e4l,0xb4e4e86abc27a876l,0x97e6ed27724324f2l,
-            0xda7e9aa5c0b87d2cl },
-          { 0xafccbe6b33a56f84l,0x69e8fd4ac892d90al,0xb47512910bb5457fl,
-            0xad65e4d05cb136fal },
-          0 },
-        /* 59 << 256 */
-        { { 0xb09974d2fd679a1bl,0x17abc2a54578faf0l,0xe7da92828c830388l,
-            0x7e455d8b0edf6146l },
-          { 0xdff3b2f0c324bdb6l,0xe7a1718769f4a4f9l,0xfb4e0b3129c500a4l,
-            0x1ed50799a09c5a07l },
-          0 },
-        /* 60 << 256 */
-        { { 0x6b669496c679d9f9l,0x3b741f36e78f0830l,0xf99d4857eb3f9e53l,
-            0x41be594276f7d4ael },
-          { 0x75f44d57c09a112bl,0xa5139fd68475eeb7l,0xa4560cd5c6bc9df6l,
-            0x8ce2c4cf50845434l },
-          0 },
-        /* 61 << 256 */
-        { { 0x96b515c32b3cb0a6l,0x65836de3930d5344l,0xfb032d5b00e6d403l,
-            0x2648301843c93bd1l },
-          { 0xfc4525dd4b572363l,0x12b7923e7b28ab5cl,0xf376b633e22ac5e6l,
-            0xd6ff6582e30b4707l },
-          0 },
-        /* 63 << 256 */
-        { { 0x8bdce75c83b09e07l,0x64228b19227717c4l,0xeae8f8a2dc6a1f02l,
-            0x1081031be72f3b6dl },
-          { 0xba0f876072c3f736l,0xde38a0c5246a28adl,0x0b116fe08596c412l,
-            0xb9e37be3fa135d11l },
-          0 },
-        /* 64 << 256 */
-        { { 0x09800dc1b48d4168l,0xa740b282bfee87a2l,0x80c6b75dc94a547al,
-            0x8cb622f0099c1985l },
-          { 0xe6c789631467e05dl,0x027b658822fd3064l,0xe14735e2c2fdb68cl,
-            0xfd2869947d853158l },
-          0 },
-        /* 65 << 256 */
-        { { 0x301916a5bbd7caf1l,0xef563fda4e2076c2l,0xccbc56088467f279l,
-            0xd7de3088b8d0f1bfl },
-          { 0x3d9adcce8586910dl,0x3fa3b8b9d775e0e9l,0x4b7a4a1d88136503l,
-            0xc748656de4994fcel },
-          0 },
-        /* 71 << 256 */
-        { { 0x18cc605c2d9f8646l,0x3764f1c29e441b64l,0xb0ea7f7fc4b64ee3l,
-            0xb5c22d0c042f8678l },
-          { 0x3761f7f89b3057fdl,0xc85b8de64a207ce4l,0x11da715bc5c04cf7l,
-            0x0cb1fa77c8e99c1fl },
-          0 },
-        /* 77 << 256 */
-        { { 0x35f9cfc8045dab4el,0x08a65c6771a7d720l,0xf076767b8eef1351l,
-            0x5351dbff8638fbe5l },
-          { 0x5aead6f7772ad54cl,0x5f6b441fafe93e69l,0xb7b83d1aeeb876b5l,
-            0xbe1ba4a7cdc094d9l },
-          0 },
-        /* 83 << 256 */
-        { { 0x005d8f04ec0377bal,0x036b8e1ace58f05dl,0xdd6ffc6f1b28cf58l,
-            0xc3d95a58e206189fl },
-          { 0xcb2873c1f52e8b8cl,0xcffdb18d80142af1l,0x7cf88eb64c77ed78l,
-            0xb3a3141981ef2c12l },
-          0 },
-        /* 89 << 256 */
-        { { 0xbb17e6f957c175b1l,0xf33abc63260a6f6dl,0x9435f2de620ddd6bl,
-            0x90bdde59ff3e99eal },
-          { 0x3d7875e0567b520fl,0xdd6954aa813b4978l,0x1af3dc24de7b631cl,
-            0x82ddcd08934d3c97l },
-          0 },
-        /* 95 << 256 */
-        { { 0x7a9d60affc5ce598l,0xc6f507597c37abfdl,0xaa1b32f3a79355d0l,
-            0xac581b94d7e4fcf3l },
-          { 0x2669cefd139f6466l,0x560a98bb26f97570l,0x32e1c1db2837b908l,
-            0x7823d7922d252781l },
-          0 },
-        /* 101 << 256 */
-        { { 0xea018b4cdedf9af0l,0x4b64c0a380c1d2f9l,0x527a0b1c36992c44l,
-            0x72a2408142b7adffl },
-          { 0x0023d10f97a502eel,0xc0f9ed067b401ac4l,0xabd1bd03d6d3b516l,
-            0xc320e3e478c5d0bel },
-          0 },
-        /* 107 << 256 */
-        { { 0x9f5d2a6a37dd009cl,0x88c0f42ac2c3cbacl,0x3155636977552a1el,
-            0xe78ec89d02f8098fl },
-          { 0x276c2ad71b6eeff9l,0xf4c49a28f7f91856l,0x698a2368dc795124l,
-            0x5502810de92a6c0fl },
-          0 },
-        /* 113 << 256 */
-        { { 0x82a5042e9f5e5192l,0x64da65fac0965a88l,0xf4c80dd56668399el,
-            0x635323757e33c233l },
-          { 0x5e5339b1a0048616l,0x4a17b1931c91741fl,0x65fdc7c213dcf3d0l,
-            0x230181426d10c410l },
-          0 },
-        /* 116 << 256 */
-        { { 0x090a04220f46c635l,0xc7eac842a04de3f5l,0x45b69d4c8990d4b2l,
-            0x032aeb50b8e0cdc6l },
-          { 0x02ce332a4ee3f307l,0x3c80c1545043980fl,0xc774838bcbd5287cl,
-            0x052661074a37d0ael },
-          0 },
-        /* 119 << 256 */
-        { { 0xc401b9c0f4d70fbfl,0xf82bbfde98ee47fel,0x94965118c84d91afl,
-            0xdd9a67c4d3b6ad1dl },
-          { 0x85c9cf1eb66a3ad4l,0x05580a0fbf5f514cl,0xf3ef0fd00218536el,
-            0x1dc2cf2bd14a7ca9l },
-          0 },
-        /* 125 << 256 */
-        { { 0x18c83e337c1e24d4l,0x30911165563657c6l,0xf9be1af679e53083l,
-            0x9b058059637753cel },
-          { 0x6a37fa24e54522b9l,0xc11d38b426dbf4c4l,0xbc6738655ebd4d9al,
-            0x2b40e9427fd4e2ecl },
-          0 },
-    },
-};
-
-/* Structure used to describe recoding of scalar multiplication. */
-typedef struct ecc_recode_sum {
-    /* Index into pre-computation table. */
-    uint8_t i;
-    /* Multiplier to add point into. */
-    uint8_t mul;
-    /* Use the negative of the point. */
-    uint8_t neg;
-} ecc_recode_sum;
-
 /* The index into pre-computation table to use. */
-static uint8_t recode_index_4_8[258] = {
-     0,  1,  1,  1,  3,  4,  2,  5,  3,  2,  4,  8,  3,  9,  5,  4,
-    11, 12,  6, 13,  7,  5,  8, 15, 55, 16,  9,  6, 18, 19,  7, 20,
-    11,  8, 12, 23, 24, 25, 13,  9, 27, 28, 14, 29, 30, 10, 15, 33,
-    11, 35, 16, 12, 37, 38, 17, 39, 18, 13, 19, 41, 42, 43, 20, 14,
-    45, 46, 21, 44, 22, 15, 23, 47, 24, 43, 25, 16, 42, 48, 26, 41,
-    27, 17, 28, 49, 18, 40, 29, 19, 30, 50, 31, 39, 32, 20, 33, 51,
-    34, 38, 35, 21, 37, 52, 22, 36, 37, 23, 38, 53, 24, 35, 39, 25,
-    34, 54, 40, 33, 55, 26, 32, 56, 27, 31, 43, 28, 30, 57, 44, 29,
-    45, 29, 44, 57, 30, 28, 43, 31, 27, 56, 32, 26, 55, 33, 40, 54,
-    34, 25, 39, 35, 24, 53, 38, 23, 37, 36, 22, 52, 37, 21, 35, 38,
-    34, 51, 33, 20, 32, 39, 31, 50, 30, 19, 29, 40, 18, 49, 28, 17,
-    27, 41, 26, 48, 42, 16, 25, 43, 24, 47, 23, 15, 22, 44, 21, 46,
-    45, 14, 20, 43, 42, 41, 19, 13, 18, 39, 17, 38, 37, 12, 16, 35,
-    11, 33, 15, 10, 30, 29, 14, 28, 27,  9, 13, 25, 24, 23, 12,  8,
-    11, 20,  7, 19, 18,  6,  9, 16, 55, 15,  8,  5,  7, 13,  6, 12,
-    11,  4,  5,  9,  3,  8,  4,  2,  3,  5,  2,  4,  3,  1,  1,  1,
-     0,  1,
-};
-
-/* Multiple to add point into. */
-static uint8_t recode_mul_4_8[258] = {
-     0,  1,  2,  3,  1,  1,  2,  1,  2,  3,  2,  1,  3,  1,  2,  3,
-     1,  1,  2,  1,  2,  3,  2,  1,  2,  1,  2,  3,  1,  1,  3,  1,
-     2,  3,  2,  1,  1,  1,  2,  3,  1,  1,  2,  1,  1,  3,  2,  1,
-     3,  1,  2,  3,  1,  1,  2,  1,  2,  3,  2,  1,  1,  1,  2,  3,
-     1,  1,  2,  3,  2,  3,  2,  1,  2,  3,  2,  3,  3,  1,  2,  3,
-     2,  3,  2,  1,  3,  3,  2,  3,  2,  1,  2,  3,  2,  3,  2,  1,
-     2,  3,  2,  3,  3,  1,  3,  3,  2,  3,  2,  1,  3,  3,  2,  3,
-     3,  1,  2,  3,  1,  3,  3,  1,  3,  3,  2,  3,  3,  1,  2,  3,
-     2,  3,  2,  1,  3,  3,  2,  3,  3,  1,  3,  3,  1,  3,  2,  1,
-     3,  3,  2,  3,  3,  1,  2,  3,  2,  3,  3,  1,  3,  3,  2,  3,
-     2,  1,  2,  3,  2,  3,  2,  1,  2,  3,  2,  3,  3,  1,  2,  3,
-     2,  3,  2,  1,  3,  3,  2,  3,  2,  1,  2,  3,  2,  3,  2,  1,
-     1,  3,  2,  1,  1,  1,  2,  3,  2,  1,  2,  1,  1,  3,  2,  1,
-     3,  1,  2,  3,  1,  1,  2,  1,  1,  3,  2,  1,  1,  1,  2,  3,
-     2,  1,  3,  1,  1,  3,  2,  1,  2,  1,  2,  3,  2,  1,  2,  1,
-     1,  3,  2,  1,  3,  1,  2,  3,  2,  1,  2,  1,  1,  3,  2,  1,
+static const uint8_t recode_index_4_7[130] = {
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
+    48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,
      0,  1,
 };
 
 /* Whether to negate y-ordinate. */
-static uint8_t recode_neg_4_8[258] = {
+static const uint8_t recode_neg_4_7[130] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  1,  0,  0,  1,
-     0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,
-     0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,
-     1,  0,  0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  1,
-     0,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,
-     0,  1,  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  0,  1,  1,  0,
-     1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,
-     1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
      0,  0,
 };
 
-/* Recode the scalar for multiplication using pre-computed values, multipliers
- * and subtraction.
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
  *
  * k  Scalar to multiply by.
- * v  Vector of operations to peform.
- */
-static void sp_256_ecc_recode_sum_8_4(sp_digit* k, ecc_recode_sum* v)
+ * v  Vector of operations to perform.
+ */
+static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v)
 {
     int i, j;
-    uint16_t y;
+    uint8_t y;
     int carry = 0;
     int o;
     sp_digit n;
@@ -27554,36 +22520,11989 @@
     j = 0;
     n = k[j];
     o = 0;
-    for (i=0; i<33; i++) {
+    for (i=0; i<37; i++) {
         y = n;
-        if (o + 8 < 64) {
-            y &= 0xff;
-            n >>= 8;
-            o += 8;
-        }
-        else if (o + 8 == 64) {
-            n >>= 8;
+        if (o + 7 < 64) {
+            y &= 0x7f;
+            n >>= 7;
+            o += 7;
+        }
+        else if (o + 7 == 64) {
+            n >>= 7;
             if (++j < 4)
                 n = k[j];
             o = 0;
         }
         else if (++j < 4) {
             n = k[j];
-            y |= (n << (64 - o)) & 0xff;
-            o -= 56;
+            y |= (n << (64 - o)) & 0x7f;
+            o -= 57;
             n >>= o;
         }
 
         y += carry;
-        v[i].i = recode_index_4_8[y];
-        v[i].mul = recode_mul_4_8[y];
-        v[i].neg = recode_neg_4_8[y];
-        carry = (y >> 8) + v[i].neg;
-    }
-}
-
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+        v[i].i = recode_index_4_7[y];
+        v[i].neg = recode_neg_4_7[y];
+        carry = (y >> 7) + v[i].neg;
+    }
+}
+
+static const sp_table_entry_256 p256_table[2405] = {
+    /* 0 << 0 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 0 */
+    { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L,
+        0x18905f76a53755c6L },
+      { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L,
+        0x8571ff1825885d85L } },
+    /* 2 << 0 */
+    { { 0x850046d410ddd64dL,0xaa6ae3c1a433827dL,0x732205038d1490d9L,
+        0xf6bb32e43dcf3a3bL },
+      { 0x2f3648d361bee1a5L,0x152cd7cbeb236ff8L,0x19a8fb0e92042dbeL,
+        0x78c577510a5b8a3bL } },
+    /* 3 << 0 */
+    { { 0xffac3f904eebc127L,0xb027f84a087d81fbL,0x66ad77dd87cbbc98L,
+        0x26936a3fb6ff747eL },
+      { 0xb04c5c1fc983a7ebL,0x583e47ad0861fe1aL,0x788208311a2ee98eL,
+        0xd5f06a29e587cc07L } },
+    /* 4 << 0 */
+    { { 0x74b0b50d46918dccL,0x4650a6edc623c173L,0x0cdaacace8100af2L,
+        0x577362f541b0176bL },
+      { 0x2d96f24ce4cbaba6L,0x17628471fad6f447L,0x6b6c36dee5ddd22eL,
+        0x84b14c394c5ab863L } },
+    /* 5 << 0 */
+    { { 0xbe1b8aaec45c61f5L,0x90ec649a94b9537dL,0x941cb5aad076c20cL,
+        0xc9079605890523c8L },
+      { 0xeb309b4ae7ba4f10L,0x73c568efe5eb882bL,0x3540a9877e7a1f68L,
+        0x73a076bb2dd1e916L } },
+    /* 6 << 0 */
+    { { 0x403947373e77664aL,0x55ae744f346cee3eL,0xd50a961a5b17a3adL,
+        0x13074b5954213673L },
+      { 0x93d36220d377e44bL,0x299c2b53adff14b5L,0xf424d44cef639f11L,
+        0xa4c9916d4a07f75fL } },
+    /* 7 << 0 */
+    { { 0x0746354ea0173b4fL,0x2bd20213d23c00f7L,0xf43eaab50c23bb08L,
+        0x13ba5119c3123e03L },
+      { 0x2847d0303f5b9d4dL,0x6742f2f25da67bddL,0xef933bdc77c94195L,
+        0xeaedd9156e240867L } },
+    /* 8 << 0 */
+    { { 0x27f14cd19499a78fL,0x462ab5c56f9b3455L,0x8f90f02af02cfc6bL,
+        0xb763891eb265230dL },
+      { 0xf59da3a9532d4977L,0x21e3327dcf9eba15L,0x123c7b84be60bbf0L,
+        0x56ec12f27706df76L } },
+    /* 9 << 0 */
+    { { 0x75c96e8f264e20e8L,0xabe6bfed59a7a841L,0x2cc09c0444c8eb00L,
+        0xe05b3080f0c4e16bL },
+      { 0x1eb7777aa45f3314L,0x56af7bedce5d45e3L,0x2b6e019a88b12f1aL,
+        0x086659cdfd835f9bL } },
+    /* 10 << 0 */
+    { { 0x2c18dbd19dc21ec8L,0x98f9868a0fcf8139L,0x737d2cd648250b49L,
+        0xcc61c94724b3428fL },
+      { 0x0c2b407880dd9e76L,0xc43a8991383fbe08L,0x5f7d2d65779be5d2L,
+        0x78719a54eb3b4ab5L } },
+    /* 11 << 0 */
+    { { 0xea7d260a6245e404L,0x9de407956e7fdfe0L,0x1ff3a4158dac1ab5L,
+        0x3e7090f1649c9073L },
+      { 0x1a7685612b944e88L,0x250f939ee57f61c8L,0x0c0daa891ead643dL,
+        0x68930023e125b88eL } },
+    /* 12 << 0 */
+    { { 0x04b71aa7d2697768L,0xabdedef5ca345a33L,0x2409d29dee37385eL,
+        0x4ee1df77cb83e156L },
+      { 0x0cac12d91cbb5b43L,0x170ed2f6ca895637L,0x28228cfa8ade6d66L,
+        0x7ff57c9553238acaL } },
+    /* 13 << 0 */
+    { { 0xccc425634b2ed709L,0x0e356769856fd30dL,0xbcbcd43f559e9811L,
+        0x738477ac5395b759L },
+      { 0x35752b90c00ee17fL,0x68748390742ed2e3L,0x7cd06422bd1f5bc1L,
+        0xfbc08769c9e7b797L } },
+    /* 14 << 0 */
+    { { 0xa242a35bb0cf664aL,0x126e48f77f9707e3L,0x1717bf54c6832660L,
+        0xfaae7332fd12c72eL },
+      { 0x27b52db7995d586bL,0xbe29569e832237c2L,0xe8e4193e2a65e7dbL,
+        0x152706dc2eaa1bbbL } },
+    /* 15 << 0 */
+    { { 0x72bcd8b7bc60055bL,0x03cc23ee56e27e4bL,0xee337424e4819370L,
+        0xe2aa0e430ad3da09L },
+      { 0x40b8524f6383c45dL,0xd766355442a41b25L,0x64efa6de778a4797L,
+        0x2042170a7079adf4L } },
+    /* 16 << 0 */
+    { { 0x808b0b650bc6fb80L,0x5882e0753ffe2e6bL,0xd5ef2f7c2c83f549L,
+        0x54d63c809103b723L },
+      { 0xf2f11bd652a23f9bL,0x3670c3194b0b6587L,0x55c4623bb1580e9eL,
+        0x64edf7b201efe220L } },
+    /* 17 << 0 */
+    { { 0x97091dcbd53c5c9dL,0xf17624b6ac0a177bL,0xb0f139752cfe2dffL,
+        0xc1a35c0a6c7a574eL },
+      { 0x227d314693e79987L,0x0575bf30e89cb80eL,0x2f4e247f0d1883bbL,
+        0xebd512263274c3d0L } },
+    /* 18 << 0 */
+    { { 0x5f3e51c856ada97aL,0x4afc964d8f8b403eL,0xa6f247ab412e2979L,
+        0x675abd1b6f80ebdaL },
+      { 0x66a2bd725e485a1dL,0x4b2a5caf8f4f0b3cL,0x2626927f1b847bbaL,
+        0x6c6fc7d90502394dL } },
+    /* 19 << 0 */
+    { { 0xfea912baa5659ae8L,0x68363aba25e1a16eL,0xb8842277752c41acL,
+        0xfe545c282897c3fcL },
+      { 0x2d36e9e7dc4c696bL,0x5806244afba977c5L,0x85665e9be39508c1L,
+        0xf720ee256d12597bL } },
+    /* 20 << 0 */
+    { { 0x8a979129d2337a31L,0x5916868f0f862bdcL,0x048099d95dd283baL,
+        0xe2d1eeb6fe5bfb4eL },
+      { 0x82ef1c417884005dL,0xa2d4ec17ffffcbaeL,0x9161c53f8aa95e66L,
+        0x5ee104e1c5fee0d0L } },
+    /* 21 << 0 */
+    { { 0x562e4cecc135b208L,0x74e1b2654783f47dL,0x6d2a506c5a3f3b30L,
+        0xecead9f4c16762fcL },
+      { 0xf29dd4b2e286e5b9L,0x1b0fadc083bb3c61L,0x7a75023e7fac29a4L,
+        0xc086d5f1c9477fa3L } },
+    /* 22 << 0 */
+    { { 0x0fc611352f6f3076L,0xc99ffa23e3912a9aL,0x6a0b0685d2f8ba3dL,
+        0xfdc777e8e93358a4L },
+      { 0x94a787bb35415f04L,0x640c2d6a4d23fea4L,0x9de917da153a35b5L,
+        0x793e8d075d5cd074L } },
+    /* 23 << 0 */
+    { { 0xf4f876532de45068L,0x37c7a7e89e2e1f6eL,0xd0825fa2a3584069L,
+        0xaf2cea7c1727bf42L },
+      { 0x0360a4fb9e4785a9L,0xe5fda49c27299f4aL,0x48068e1371ac2f71L,
+        0x83d0687b9077666fL } },
+    /* 24 << 0 */
+    { { 0x6d3883b215d02819L,0x6d0d755040dd9a35L,0x61d7cbf91d2b469fL,
+        0xf97b232f2efc3115L },
+      { 0xa551d750b24bcbc7L,0x11ea494988a1e356L,0x7669f03193cb7501L,
+        0x595dc55eca737b8aL } },
+    /* 25 << 0 */
+    { { 0xa4a319acd837879fL,0x6fc1b49eed6b67b0L,0xe395993332f1f3afL,
+        0x966742eb65432a2eL },
+      { 0x4b8dc9feb4966228L,0x96cc631243f43950L,0x12068859c9b731eeL,
+        0x7b948dc356f79968L } },
+    /* 26 << 0 */
+    { { 0x61e4ad32ed1f8008L,0xe6c9267ad8b17538L,0x1ac7c5eb857ff6fbL,
+        0x994baaa855f2fb10L },
+      { 0x84cf14e11d248018L,0x5a39898b628ac508L,0x14fde97b5fa944f5L,
+        0xed178030d12e5ac7L } },
+    /* 27 << 0 */
+    { { 0x042c2af497e2feb4L,0xd36a42d7aebf7313L,0x49d2c9eb084ffdd7L,
+        0x9f8aa54b2ef7c76aL },
+      { 0x9200b7ba09895e70L,0x3bd0c66fddb7fb58L,0x2d97d10878eb4cbbL,
+        0x2d431068d84bde31L } },
+    /* 28 << 0 */
+    { { 0x4b523eb7172ccd1fL,0x7323cb2830a6a892L,0x97082ec0cfe153ebL,
+        0xe97f6b6af2aadb97L },
+      { 0x1d3d393ed1a83da1L,0xa6a7f9c7804b2a68L,0x4a688b482d0cb71eL,
+        0xa9b4cc5f40585278L } },
+    /* 29 << 0 */
+    { { 0x5e5db46acb66e132L,0xf1be963a0d925880L,0x944a70270317b9e2L,
+        0xe266f95948603d48L },
+      { 0x98db66735c208899L,0x90472447a2fb18a3L,0x8a966939777c619fL,
+        0x3798142a2a3be21bL } },
+    /* 30 << 0 */
+    { { 0xb4241cb13298b343L,0xa3a14e49b44f65a1L,0xc5f4d6cd3ac77acdL,
+        0xd0288cb552b6fc3cL },
+      { 0xd5cc8c2f1c040abcL,0xb675511e06bf9b4aL,0xd667da379b3aa441L,
+        0x460d45ce51601f72L } },
+    /* 31 << 0 */
+    { { 0xe2f73c696755ff89L,0xdd3cf7e7473017e6L,0x8ef5689d3cf7600dL,
+        0x948dc4f8b1fc87b4L },
+      { 0xd9e9fe814ea53299L,0x2d921ca298eb6028L,0xfaecedfd0c9803fcL,
+        0xf38ae8914d7b4745L } },
+    /* 32 << 0 */
+    { { 0xd8c5fccfc5e3a3d8L,0xbefd904c4079dfbfL,0xbc6d6a58fead0197L,
+        0x39227077695532a4L },
+      { 0x09e23e6ddbef42f5L,0x7e449b64480a9908L,0x7b969c1aad9a2e40L,
+        0x6231d7929591c2a4L } },
+    /* 33 << 0 */
+    { { 0x871514560f664534L,0x85ceae7c4b68f103L,0xac09c4ae65578ab9L,
+        0x33ec6868f044b10cL },
+      { 0x6ac4832b3a8ec1f1L,0x5509d1285847d5efL,0xf909604f763f1574L,
+        0xb16c4303c32f63c4L } },
+    /* 34 << 0 */
+    { { 0xb6ab20147ca23cd3L,0xcaa7a5c6a391849dL,0x5b0673a375678d94L,
+        0xc982ddd4dd303e64L },
+      { 0xfd7b000b5db6f971L,0xbba2cb1f6f876f92L,0xc77332a33c569426L,
+        0xa159100c570d74f8L } },
+    /* 35 << 0 */
+    { { 0xfd16847fdec67ef5L,0x742ee464233e76b7L,0x0b8e4134efc2b4c8L,
+        0xca640b8642a3e521L },
+      { 0x653a01908ceb6aa9L,0x313c300c547852d5L,0x24e4ab126b237af7L,
+        0x2ba901628bb47af8L } },
+    /* 36 << 0 */
+    { { 0x3d5e58d6a8219bb7L,0xc691d0bd1b06c57fL,0x0ae4cb10d257576eL,
+        0x3569656cd54a3dc3L },
+      { 0xe5ebaebd94cda03aL,0x934e82d3162bfe13L,0x450ac0bae251a0c6L,
+        0x480b9e11dd6da526L } },
+    /* 37 << 0 */
+    { { 0x00467bc58cce08b5L,0xb636458c7f178d55L,0xc5748baea677d806L,
+        0x2763a387dfa394ebL },
+      { 0xa12b448a7d3cebb6L,0xe7adda3e6f20d850L,0xf63ebce51558462cL,
+        0x58b36143620088a8L } },
+    /* 38 << 0 */
+    { { 0x8a2cc3ca4d63c0eeL,0x512331170fe948ceL,0x7463fd85222ef33bL,
+        0xadf0c7dc7c603d6cL },
+      { 0x0ec32d3bfe7765e5L,0xccaab359bf380409L,0xbdaa84d68e59319cL,
+        0xd9a4c2809c80c34dL } },
+    /* 39 << 0 */
+    { { 0xa9d89488a059c142L,0x6f5ae714ff0b9346L,0x068f237d16fb3664L,
+        0x5853e4c4363186acL },
+      { 0xe2d87d2363c52f98L,0x2ec4a76681828876L,0x47b864fae14e7b1cL,
+        0x0c0bc0e569192408L } },
+    /* 40 << 0 */
+    { { 0xe4d7681db82e9f3eL,0x83200f0bdf25e13cL,0x8909984c66f27280L,
+        0x462d7b0075f73227L },
+      { 0xd90ba188f2651798L,0x74c6e18c36ab1c34L,0xab256ea35ef54359L,
+        0x03466612d1aa702fL } },
+    /* 41 << 0 */
+    { { 0x624d60492ed22e91L,0x6fdfe0b56f072822L,0xeeca111539ce2271L,
+        0x98100a4fdb01614fL },
+      { 0xb6b0daa2a35c628fL,0xb6f94d2ec87e9a47L,0xc67732591d57d9ceL,
+        0xf70bfeec03884a7bL } },
+    /* 42 << 0 */
+    { { 0x5fb35ccfed2bad01L,0xa155cbe31da6a5c7L,0xc2e2594c30a92f8fL,
+        0x649c89ce5bfafe43L },
+      { 0xd158667de9ff257aL,0x9b359611f32c50aeL,0x4b00b20b906014cfL,
+        0xf3a8cfe389bc7d3dL } },
+    /* 43 << 0 */
+    { { 0x4ff23ffd248a7d06L,0x80c5bfb4878873faL,0xb7d9ad9005745981L,
+        0x179c85db3db01994L },
+      { 0xba41b06261a6966cL,0x4d82d052eadce5a8L,0x9e91cd3ba5e6a318L,
+        0x47795f4f95b2dda0L } },
+    /* 44 << 0 */
+    { { 0xecfd7c1fd55a897cL,0x009194abb29110fbL,0x5f0e2046e381d3b0L,
+        0x5f3425f6a98dd291L },
+      { 0xbfa06687730d50daL,0x0423446c4b083b7fL,0x397a247dd69d3417L,
+        0xeb629f90387ba42aL } },
+    /* 45 << 0 */
+    { { 0x1ee426ccd5cd79bfL,0x0032940b946c6e18L,0x1b1e8ae057477f58L,
+        0xe94f7d346d823278L },
+      { 0xc747cb96782ba21aL,0xc5254469f72b33a5L,0x772ef6dec7f80c81L,
+        0xd73acbfe2cd9e6b5L } },
+    /* 46 << 0 */
+    { { 0x4075b5b149ee90d9L,0x785c339aa06e9ebaL,0xa1030d5babf825e0L,
+        0xcec684c3a42931dcL },
+      { 0x42ab62c9c1586e63L,0x45431d665ab43f2bL,0x57c8b2c055f7835dL,
+        0x033da338c1b7f865L } },
+    /* 47 << 0 */
+    { { 0x283c7513caa76097L,0x0a624fa936c83906L,0x6b20afec715af2c7L,
+        0x4b969974eba78bfdL },
+      { 0x220755ccd921d60eL,0x9b944e107baeca13L,0x04819d515ded93d4L,
+        0x9bbff86e6dddfd27L } },
+    /* 48 << 0 */
+    { { 0x6b34413077adc612L,0xa7496529bbd803a0L,0x1a1baaa76d8805bdL,
+        0xc8403902470343adL },
+      { 0x39f59f66175adff1L,0x0b26d7fbb7d8c5b7L,0xa875f5ce529d75e3L,
+        0x85efc7e941325cc2L } },
+    /* 49 << 0 */
+    { { 0x21950b421ff6acd3L,0xffe7048453dc6909L,0xff4cd0b228766127L,
+        0xabdbe6084fb7db2bL },
+      { 0x837c92285e1109e8L,0x26147d27f4645b5aL,0x4d78f592f7818ed8L,
+        0xd394077ef247fa36L } },
+    /* 50 << 0 */
+    { { 0x0fb9c2d0488c171aL,0xa78bfbaa13685278L,0xedfbe268d5b1fa6aL,
+        0x0dceb8db2b7eaba7L },
+      { 0xbf9e80899ae2b710L,0xefde7ae6a4449c96L,0x43b7716bcc143a46L,
+        0xd7d34194c3628c13L } },
+    /* 51 << 0 */
+    { { 0x508cec1c3b3f64c9L,0xe20bc0ba1e5edf3fL,0xda1deb852f4318d4L,
+        0xd20ebe0d5c3fa443L },
+      { 0x370b4ea773241ea3L,0x61f1511c5e1a5f65L,0x99a5e23d82681c62L,
+        0xd731e383a2f54c2dL } },
+    /* 52 << 0 */
+    { { 0x2692f36e83445904L,0x2e0ec469af45f9c0L,0x905a3201c67528b7L,
+        0x88f77f34d0e5e542L },
+      { 0xf67a8d295864687cL,0x23b92eae22df3562L,0x5c27014b9bbec39eL,
+        0x7ef2f2269c0f0f8dL } },
+    /* 53 << 0 */
+    { { 0x97359638546c4d8dL,0x5f9c3fc492f24679L,0x912e8beda8c8acd9L,
+        0xec3a318d306634b0L },
+      { 0x80167f41c31cb264L,0x3db82f6f522113f2L,0xb155bcd2dcafe197L,
+        0xfba1da5943465283L } },
+    /* 54 << 0 */
+    { { 0xa0425b8eb212cf53L,0x4f2e512ef8557c5fL,0xc1286ff925c4d56cL,
+        0xbb8a0feaee26c851L },
+      { 0xc28f70d2e7d6107eL,0x7ee0c444e76265aaL,0x3df277a41d1936b1L,
+        0x1a556e3fea9595ebL } },
+    /* 55 << 0 */
+    { { 0x258bbbf9e7305683L,0x31eea5bf07ef5be6L,0x0deb0e4a46c814c1L,
+        0x5cee8449a7b730ddL },
+      { 0xeab495c5a0182bdeL,0xee759f879e27a6b4L,0xc2cf6a6880e518caL,
+        0x25e8013ff14cf3f4L } },
+    /* 56 << 0 */
+    { { 0x8fc441407e8d7a14L,0xbb1ff3ca9556f36aL,0x6a84438514600044L,
+        0xba3f0c4a7451ae63L },
+      { 0xdfcac25b1f9af32aL,0x01e0db86b1f2214bL,0x4e9a5bc2a4b596acL,
+        0x83927681026c2c08L } },
+    /* 57 << 0 */
+    { { 0x3ec832e77acaca28L,0x1bfeea57c7385b29L,0x068212e3fd1eaf38L,
+        0xc13298306acf8cccL },
+      { 0xb909f2db2aac9e59L,0x5748060db661782aL,0xc5ab2632c79b7a01L,
+        0xda44c6c600017626L } },
+    /* 58 << 0 */
+    { { 0xf26c00e8a7ea82f0L,0x99cac80de4299aafL,0xd66fe3b67ed78be1L,
+        0x305f725f648d02cdL },
+      { 0x33ed1bc4623fb21bL,0xfa70533e7a6319adL,0x17ab562dbe5ffb3eL,
+        0x0637499456674741L } },
+    /* 59 << 0 */
+    { { 0x69d44ed65c46aa8eL,0x2100d5d3a8d063d1L,0xcb9727eaa2d17c36L,
+        0x4c2bab1b8add53b7L },
+      { 0xa084e90c15426704L,0x778afcd3a837ebeaL,0x6651f7017ce477f8L,
+        0xa062499846fb7a8bL } },
+    /* 60 << 0 */
+    { { 0xdc1e6828ed8a6e19L,0x33fc23364189d9c7L,0x026f8fe2671c39bcL,
+        0xd40c4ccdbc6f9915L },
+      { 0xafa135bbf80e75caL,0x12c651a022adff2cL,0xc40a04bd4f51ad96L,
+        0x04820109bbe4e832L } },
+    /* 61 << 0 */
+    { { 0x3667eb1a7f4c04ccL,0x59556621a9404f84L,0x71cdf6537eceb50aL,
+        0x994a44a69b8335faL },
+      { 0xd7faf819dbeb9b69L,0x473c5680eed4350dL,0xb6658466da44bba2L,
+        0x0d1bc780872bdbf3L } },
+    /* 62 << 0 */
+    { { 0xe535f175a1962f91L,0x6ed7e061ed58f5a7L,0x177aa4c02089a233L,
+        0x0dbcb03ae539b413L },
+      { 0xe3dc424ebb32e38eL,0x6472e5ef6806701eL,0xdd47ff98814be9eeL,
+        0x6b60cfff35ace009L } },
+    /* 63 << 0 */
+    { { 0xb8d3d9319ff91fe5L,0x039c4800f0518eedL,0x95c376329182cb26L,
+        0x0763a43482fc568dL },
+      { 0x707c04d5383e76baL,0xac98b930824e8197L,0x92bf7c8f91230de0L,
+        0x90876a0140959b70L } },
+    /* 64 << 0 */
+    { { 0xdb6d96f305968b80L,0x380a0913089f73b9L,0x7da70b83c2c61e01L,
+        0x95fb8394569b38c7L },
+      { 0x9a3c651280edfe2fL,0x8f726bb98faeaf82L,0x8010a4a078424bf8L,
+        0x296720440e844970L } },
+    /* 0 << 7 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 7 */
+    { { 0x63c5cb817a2ad62aL,0x7ef2b6b9ac62ff54L,0x3749bba4b3ad9db5L,
+        0xad311f2c46d5a617L },
+      { 0xb77a8087c2ff3b6dL,0xb46feaf3367834ffL,0xf8aa266d75d6b138L,
+        0xfa38d320ec008188L } },
+    /* 2 << 7 */
+    { { 0x486d8ffa696946fcL,0x50fbc6d8b9cba56dL,0x7e3d423e90f35a15L,
+        0x7c3da195c0dd962cL },
+      { 0xe673fdb03cfd5d8bL,0x0704b7c2889dfca5L,0xf6ce581ff52305aaL,
+        0x399d49eb914d5e53L } },
+    /* 3 << 7 */
+    { { 0x380a496d6ec293cdL,0x733dbda78e7051f5L,0x037e388db849140aL,
+        0xee4b32b05946dbf6L },
+      { 0xb1c4fda9cae368d1L,0x5001a7b0fdb0b2f3L,0x6df593742e3ac46eL,
+        0x4af675f239b3e656L } },
+    /* 4 << 7 */
+    { { 0x44e3811039949296L,0x5b63827b361db1b5L,0x3e5323ed206eaff5L,
+        0x942370d2c21f4290L },
+      { 0xf2caaf2ee0d985a1L,0x192cc64b7239846dL,0x7c0b8f47ae6312f8L,
+        0x7dc61f9196620108L } },
+    /* 5 << 7 */
+    { { 0xb830fb5bc2da7de9L,0xd0e643df0ff8d3beL,0x31ee77ba188a9641L,
+        0x4e8aa3aabcf6d502L },
+      { 0xf9fb65329a49110fL,0xd18317f62dd6b220L,0x7e3ced4152c3ea5aL,
+        0x0d296a147d579c4aL } },
+    /* 6 << 7 */
+    { { 0x35d6a53eed4c3717L,0x9f8240cf3d0ed2a3L,0x8c0d4d05e5543aa5L,
+        0x45d5bbfbdd33b4b4L },
+      { 0xfa04cc73137fd28eL,0x862ac6efc73b3ffdL,0x403ff9f531f51ef2L,
+        0x34d5e0fcbc73f5a2L } },
+    /* 7 << 7 */
+    { { 0xf252682008913f4fL,0xea20ed61eac93d95L,0x51ed38b46ca6b26cL,
+        0x8662dcbcea4327b0L },
+      { 0x6daf295c725d2aaaL,0xbad2752f8e52dcdaL,0x2210e7210b17daccL,
+        0xa37f7912d51e8232L } },
+    /* 8 << 7 */
+    { { 0x4f7081e144cc3addL,0xd5ffa1d687be82cfL,0x89890b6c0edd6472L,
+        0xada26e1a3ed17863L },
+      { 0x276f271563483caaL,0xe6924cd92f6077fdL,0x05a7fe980a466e3cL,
+        0xf1c794b0b1902d1fL } },
+    /* 9 << 7 */
+    { { 0xe521368882a8042cL,0xd931cfafcd278298L,0x069a0ae0f597a740L,
+        0x0adbb3f3eb59107cL },
+      { 0x983e951e5eaa8eb8L,0xe663a8b511b48e78L,0x1631cc0d8a03f2c5L,
+        0x7577c11e11e271e2L } },
+    /* 10 << 7 */
+    { { 0x33b2385c08369a90L,0x2990c59b190eb4f8L,0x819a6145c68eac80L,
+        0x7a786d622ec4a014L },
+      { 0x33faadbe20ac3a8dL,0x31a217815aba2d30L,0x209d2742dba4f565L,
+        0xdb2ce9e355aa0fbbL } },
+    /* 11 << 7 */
+    { { 0x8cef334b168984dfL,0xe81dce1733879638L,0xf6e6949c263720f0L,
+        0x5c56feaff593cbecL },
+      { 0x8bff5601fde58c84L,0x74e241172eccb314L,0xbcf01b614c9a8a78L,
+        0xa233e35e544c9868L } },
+    /* 12 << 7 */
+    { { 0xb3156bf38bd7aff1L,0x1b5ee4cb1d81b146L,0x7ba1ac41d628a915L,
+        0x8f3a8f9cfd89699eL },
+      { 0x7329b9c9a0748be7L,0x1d391c95a92e621fL,0xe51e6b214d10a837L,
+        0xd255f53a4947b435L } },
+    /* 13 << 7 */
+    { { 0x07669e04f1788ee3L,0xc14f27afa86938a2L,0x8b47a334e93a01c0L,
+        0xff627438d9366808L },
+      { 0x7a0985d8ca2a5965L,0x3d9a5542d6e9b9b3L,0xc23eb80b4cf972e8L,
+        0x5c1c33bb4fdf72fdL } },
+    /* 14 << 7 */
+    { { 0x0c4a58d474a86108L,0xf8048a8fee4c5d90L,0xe3c7c924e86d4c80L,
+        0x28c889de056a1e60L },
+      { 0x57e2662eb214a040L,0xe8c48e9837e10347L,0x8774286280ac748aL,
+        0xf1c24022186b06f2L } },
+    /* 15 << 7 */
+    { { 0xac2dd4c35f74040aL,0x409aeb71fceac957L,0x4fbad78255c4ec23L,
+        0xb359ed618a7b76ecL },
+      { 0x12744926ed6f4a60L,0xe21e8d7f4b912de3L,0xe2575a59fc705a59L,
+        0x72f1d4deed2dbc0eL } },
+    /* 16 << 7 */
+    { { 0x3d2b24b9eb7926b8L,0xbff88cb3cdbe5509L,0xd0f399afe4dd640bL,
+        0x3c5fe1302f76ed45L },
+      { 0x6f3562f43764fb3dL,0x7b5af3183151b62dL,0xd5bd0bc7d79ce5f3L,
+        0xfdaf6b20ec66890fL } },
+    /* 17 << 7 */
+    { { 0x735c67ec6063540cL,0x50b259c2e5f9cb8fL,0xb8734f9a3f99c6abL,
+        0xf8cc13d5a3a7bc85L },
+      { 0x80c1b305c5217659L,0xfe5364d44ec12a54L,0xbd87045e681345feL,
+        0x7f8efeb1582f897fL } },
+    /* 18 << 7 */
+    { { 0xe8cbf1e5d5923359L,0xdb0cea9d539b9fb0L,0x0c5b34cf49859b98L,
+        0x5e583c56a4403cc6L },
+      { 0x11fc1a2dd48185b7L,0xc93fbc7e6e521787L,0x47e7a05805105b8bL,
+        0x7b4d4d58db8260c8L } },
+    /* 19 << 7 */
+    { { 0xe33930b046eb842aL,0x8e844a9a7bdae56dL,0x34ef3a9e13f7fdfcL,
+        0xb3768f82636ca176L },
+      { 0x2821f4e04e09e61cL,0x414dc3a1a0c7cddcL,0xd537943754945fcdL,
+        0x151b6eefb3555ff1L } },
+    /* 20 << 7 */
+    { { 0xb31bd6136339c083L,0x39ff8155dfb64701L,0x7c3388d2e29604abL,
+        0x1e19084ba6b10442L },
+      { 0x17cf54c0eccd47efL,0x896933854a5dfb30L,0x69d023fb47daf9f6L,
+        0x9222840b7d91d959L } },
+    /* 21 << 7 */
+    { { 0x439108f5803bac62L,0x0b7dd91d379bd45fL,0xd651e827ca63c581L,
+        0x5c5d75f6509c104fL },
+      { 0x7d5fc7381f2dc308L,0x20faa7bfd98454beL,0x95374beea517b031L,
+        0xf036b9b1642692acL } },
+    /* 22 << 7 */
+    { { 0xc510610939842194L,0xb7e2353e49d05295L,0xfc8c1d5cefb42ee0L,
+        0xe04884eb08ce811cL },
+      { 0xf1f75d817419f40eL,0x5b0ac162a995c241L,0x120921bbc4c55646L,
+        0x713520c28d33cf97L } },
+    /* 23 << 7 */
+    { { 0xb4a65a5ce98c5100L,0x6cec871d2ddd0f5aL,0x251f0b7f9ba2e78bL,
+        0x224a8434ce3a2a5fL },
+      { 0x26827f6125f5c46fL,0x6a22bedc48545ec0L,0x25ae5fa0b1bb5cdcL,
+        0xd693682ffcb9b98fL } },
+    /* 24 << 7 */
+    { { 0x32027fe891e5d7d3L,0xf14b7d1773a07678L,0xf88497b3c0dfdd61L,
+        0xf7c2eec02a8c4f48L },
+      { 0xaa5573f43756e621L,0xc013a2401825b948L,0x1c03b34563878572L,
+        0xa0472bea653a4184L } },
+    /* 25 << 7 */
+    { { 0xf4222e270ac69a80L,0x34096d25f51e54f6L,0x00a648cb8fffa591L,
+        0x4e87acdc69b6527fL },
+      { 0x0575e037e285ccb4L,0x188089e450ddcf52L,0xaa96c9a8870ff719L,
+        0x74a56cd81fc7e369L } },
+    /* 26 << 7 */
+    { { 0x41d04ee21726931aL,0x0bbbb2c83660ecfdL,0xa6ef6de524818e18L,
+        0xe421cc51e7d57887L },
+      { 0xf127d208bea87be6L,0x16a475d3b1cdd682L,0x9db1b684439b63f7L,
+        0x5359b3dbf0f113b6L } },
+    /* 27 << 7 */
+    { { 0xdfccf1de8bf06e31L,0x1fdf8f44dd383901L,0x10775cad5017e7d2L,
+        0xdfc3a59758d11eefL },
+      { 0x6ec9c8a0b1ecff10L,0xee6ed6cc28400549L,0xb5ad7bae1b4f8d73L,
+        0x61b4f11de00aaab9L } },
+    /* 28 << 7 */
+    { { 0x7b32d69bd4eff2d7L,0x88ae67714288b60fL,0x159461b437a1e723L,
+        0x1f3d4789570aae8cL },
+      { 0x869118c07f9871daL,0x35fbda78f635e278L,0x738f3641e1541dacL,
+        0x6794b13ac0dae45fL } },
+    /* 29 << 7 */
+    { { 0x065064ac09cc0917L,0x27c53729c68540fdL,0x0d2d4c8eef227671L,
+        0xd23a9f80a1785a04L },
+      { 0x98c5952852650359L,0xfa09ad0174a1acadL,0x082d5a290b55bf5cL,
+        0xa40f1c67419b8084L } },
+    /* 30 << 7 */
+    { { 0x3a5c752edcc18770L,0x4baf1f2f8825c3a5L,0xebd63f7421b153edL,
+        0xa2383e47b2f64723L },
+      { 0xe7bf620a2646d19aL,0x56cb44ec03c83ffdL,0xaf7267c94f6be9f1L,
+        0x8b2dfd7bc06bb5e9L } },
+    /* 31 << 7 */
+    { { 0xb87072f2a672c5c7L,0xeacb11c80d53c5e2L,0x22dac29dff435932L,
+        0x37bdb99d4408693cL },
+      { 0xf6e62fb62899c20fL,0x3535d512447ece24L,0xfbdc6b88ff577ce3L,
+        0x726693bd190575f2L } },
+    /* 32 << 7 */
+    { { 0x6772b0e5ab4b35a2L,0x1d8b6001f5eeaacfL,0x728f7ce4795b9580L,
+        0x4a20ed2a41fb81daL },
+      { 0x9f685cd44fec01e6L,0x3ed7ddcca7ff50adL,0x460fd2640c2d97fdL,
+        0x3a241426eb82f4f9L } },
+    /* 33 << 7 */
+    { { 0x17d1df2c6a8ea820L,0xb2b50d3bf22cc254L,0x03856cbab7291426L,
+        0x87fd26ae04f5ee39L },
+      { 0x9cb696cc02bee4baL,0x5312180406820fd6L,0xa5dfc2690212e985L,
+        0x666f7ffa160f9a09L } },
+    /* 34 << 7 */
+    { { 0xc503cd33bccd9617L,0x365dede4ba7730a3L,0x798c63555ddb0786L,
+        0xa6c3200efc9cd3bcL },
+      { 0x060ffb2ce5e35efdL,0x99a4e25b5555a1c1L,0x11d95375f70b3751L,
+        0x0a57354a160e1bf6L } },
+    /* 35 << 7 */
+    { { 0xecb3ae4bf8e4b065L,0x07a834c42e53022bL,0x1cd300b38692ed96L,
+        0x16a6f79261ee14ecL },
+      { 0x8f1063c66a8649edL,0xfbcdfcfe869f3e14L,0x2cfb97c100a7b3ecL,
+        0xcea49b3c7130c2f1L } },
+    /* 36 << 7 */
+    { { 0x462d044fe9d96488L,0x4b53d52e8182a0c1L,0x84b6ddd30391e9e9L,
+        0x80ab7b48b1741a09L },
+      { 0xec0e15d427d3317fL,0x8dfc1ddb1a64671eL,0x93cc5d5fd49c5b92L,
+        0xc995d53d3674a331L } },
+    /* 37 << 7 */
+    { { 0x302e41ec090090aeL,0x2278a0ccedb06830L,0x1d025932fbc99690L,
+        0x0c32fbd2b80d68daL },
+      { 0xd79146daf341a6c1L,0xae0ba1391bef68a0L,0xc6b8a5638d774b3aL,
+        0x1cf307bd880ba4d7L } },
+    /* 38 << 7 */
+    { { 0xc033bdc719803511L,0xa9f97b3b8888c3beL,0x3d68aebc85c6d05eL,
+        0xc3b88a9d193919ebL },
+      { 0x2d300748c48b0ee3L,0x7506bc7c07a746c1L,0xfc48437c6e6d57f3L,
+        0x5bd71587cfeaa91aL } },
+    /* 39 << 7 */
+    { { 0xa4ed0408c1bc5225L,0xd0b946db2719226dL,0x109ecd62758d2d43L,
+        0x75c8485a2751759bL },
+      { 0xb0b75f499ce4177aL,0x4fa61a1e79c10c3dL,0xc062d300a167fcd7L,
+        0x4df3874c750f0fa8L } },
+    /* 40 << 7 */
+    { { 0x29ae2cf983dfedc9L,0xf84371348d87631aL,0xaf5717117429c8d2L,
+        0x18d15867146d9272L },
+      { 0x83053ecf69769bb7L,0xc55eb856c479ab82L,0x5ef7791c21b0f4b2L,
+        0xaa5956ba3d491525L } },
+    /* 41 << 7 */
+    { { 0x407a96c29fe20ebaL,0xf27168bbe52a5ad3L,0x43b60ab3bf1d9d89L,
+        0xe45c51ef710e727aL },
+      { 0xdfca5276099b4221L,0x8dc6407c2557a159L,0x0ead833591035895L,
+        0x0a9db9579c55dc32L } },
+    /* 42 << 7 */
+    { { 0xe40736d3df61bc76L,0x13a619c03f778cdbL,0x6dd921a4c56ea28fL,
+        0x76a524332fa647b4L },
+      { 0x23591891ac5bdc5dL,0xff4a1a72bac7dc01L,0x9905e26162df8453L,
+        0x3ac045dfe63b265fL } },
+    /* 43 << 7 */
+    { { 0x8a3f341bad53dba7L,0x8ec269cc837b625aL,0xd71a27823ae31189L,
+        0x8fb4f9a355e96120L },
+      { 0x804af823ff9875cfL,0x23224f575d442a9bL,0x1c4d3b9eecc62679L,
+        0x91da22fba0e7ddb1L } },
+    /* 44 << 7 */
+    { { 0xa370324d6c04a661L,0x9710d3b65e376d17L,0xed8c98f03044e357L,
+        0xc364ebbe6422701cL },
+      { 0x347f5d517733d61cL,0xd55644b9cea826c3L,0x80c6e0ad55a25548L,
+        0x0aa7641d844220a7L } },
+    /* 45 << 7 */
+    { { 0x1438ec8131810660L,0x9dfa6507de4b4043L,0x10b515d8cc3e0273L,
+        0x1b6066dd28d8cfb2L },
+      { 0xd3b045919c9efebdL,0x425d4bdfa21c1ff4L,0x5fe5af19d57607d3L,
+        0xbbf773f754481084L } },
+    /* 46 << 7 */
+    { { 0x8435bd6994b03ed1L,0xd9ad1de3634cc546L,0x2cf423fc00e420caL,
+        0xeed26d80a03096ddL },
+      { 0xd7f60be7a4db09d2L,0xf47f569d960622f7L,0xe5925fd77296c729L,
+        0xeff2db2626ca2715L } },
+    /* 47 << 7 */
+    { { 0xa6fcd014b913e759L,0x53da47868ff4de93L,0x14616d79c32068e1L,
+        0xb187d664ccdf352eL },
+      { 0xf7afb6501dc90b59L,0x8170e9437daa1b26L,0xc8e3bdd8700c0a84L,
+        0x6e8d345f6482bdfaL } },
+    /* 48 << 7 */
+    { { 0x84cfbfa1c5c5ea50L,0xd3baf14c67960681L,0x263984030dd50942L,
+        0xe4b7839c4716a663L },
+      { 0xd5f1f794e7de6dc0L,0x5cd0f4d4622aa7ceL,0x5295f3f159acfeecL,
+        0x8d933552953e0607L } },
+    /* 49 << 7 */
+    { { 0xc7db8ec5776c5722L,0xdc467e622b5f290cL,0xd4297e704ff425a9L,
+        0x4be924c10cf7bb72L },
+      { 0x0d5dc5aea1892131L,0x8bf8a8e3a705c992L,0x73a0b0647a305ac5L,
+        0x00c9ca4e9a8c77a8L } },
+    /* 50 << 7 */
+    { { 0x5dfee80f83774bddL,0x6313160285734485L,0xa1b524ae914a69a9L,
+        0xebc2ffafd4e300d7L },
+      { 0x52c93db77cfa46a5L,0x71e6161f21653b50L,0x3574fc57a4bc580aL,
+        0xc09015dde1bc1253L } },
+    /* 51 << 7 */
+    { { 0x4b7b47b2d174d7aaL,0x4072d8e8f3a15d04L,0xeeb7d47fd6fa07edL,
+        0x6f2b9ff9edbdafb1L },
+      { 0x18c516153760fe8aL,0x7a96e6bff06c6c13L,0x4d7a04100ea2d071L,
+        0xa1914e9b0be2a5ceL } },
+    /* 52 << 7 */
+    { { 0x5726e357d8a3c5cfL,0x1197ecc32abb2b13L,0x6c0d7f7f31ae88ddL,
+        0x15b20d1afdbb3efeL },
+      { 0xcd06aa2670584039L,0x2277c969a7dc9747L,0xbca695877855d815L,
+        0x899ea2385188b32aL } },
+    /* 53 << 7 */
+    { { 0x37d9228b760c1c9dL,0xc7efbb119b5c18daL,0x7f0d1bc819f6dbc5L,
+        0x4875384b07e6905bL },
+      { 0xc7c50baa3ba8cd86L,0xb0ce40fbc2905de0L,0x708406737a231952L,
+        0xa912a262cf43de26L } },
+    /* 54 << 7 */
+    { { 0x9c38ddcceb5b76c1L,0x746f528526fc0ab4L,0x52a63a50d62c269fL,
+        0x60049c5599458621L },
+      { 0xe7f48f823c2f7c9eL,0x6bd99043917d5cf3L,0xeb1317a88701f469L,
+        0xbd3fe2ed9a449fe0L } },
+    /* 55 << 7 */
+    { { 0x421e79ca12ef3d36L,0x9ee3c36c3e7ea5deL,0xe48198b5cdff36f7L,
+        0xaff4f967c6b82228L },
+      { 0x15e19dd0c47adb7eL,0x45699b23032e7dfaL,0x40680c8b1fae026aL,
+        0x5a347a48550dbf4dL } },
+    /* 56 << 7 */
+    { { 0xe652533b3cef0d7dL,0xd94f7b182bbb4381L,0x838752be0e80f500L,
+        0x8e6e24889e9c9bfbL },
+      { 0xc975169716caca6aL,0x866c49d838531ad9L,0xc917e2397151ade1L,
+        0x2d016ec16037c407L } },
+    /* 57 << 7 */
+    { { 0xa407ccc900eac3f9L,0x835f6280e2ed4748L,0xcc54c3471cc98e0dL,
+        0x0e969937dcb572ebL },
+      { 0x1b16c8e88f30c9cbL,0xa606ae75373c4661L,0x47aa689b35502cabL,
+        0xf89014ae4d9bb64fL } },
+    /* 58 << 7 */
+    { { 0x202f6a9c31c71f7bL,0x01f95aa3296ffe5cL,0x5fc0601453cec3a3L,
+        0xeb9912375f498a45L },
+      { 0xae9a935e5d91ba87L,0xc6ac62810b564a19L,0x8a8fe81c3bd44e69L,
+        0x7c8b467f9dd11d45L } },
+    /* 59 << 7 */
+    { { 0xf772251fea5b8e69L,0xaeecb3bdc5b75fbcL,0x1aca3331887ff0e5L,
+        0xbe5d49ff19f0a131L },
+      { 0x582c13aae5c8646fL,0xdbaa12e820e19980L,0x8f40f31af7abbd94L,
+        0x1f13f5a81dfc7663L } },
+    /* 60 << 7 */
+    { { 0x5d81f1eeaceb4fc0L,0x362560025e6f0f42L,0x4b67d6d7751370c8L,
+        0x2608b69803e80589L },
+      { 0xcfc0d2fc05268301L,0xa6943d3940309212L,0x192a90c21fd0e1c2L,
+        0xb209f11337f1dc76L } },
+    /* 61 << 7 */
+    { { 0xefcc5e0697bf1298L,0xcbdb6730219d639eL,0xd009c116b81e8c6fL,
+        0xa3ffdde31a7ce2e5L },
+      { 0xc53fbaaaa914d3baL,0x836d500f88df85eeL,0xd98dc71b66ee0751L,
+        0x5a3d7005714516fdL } },
+    /* 62 << 7 */
+    { { 0x21d3634d39eedbbaL,0x35cd2e680455a46dL,0xc8cafe65f9d7eb0cL,
+        0xbda3ce9e00cefb3eL },
+      { 0xddc17a602c9cf7a4L,0x01572ee47bcb8773L,0xa92b2b018c7548dfL,
+        0x732fd309a84600e3L } },
+    /* 63 << 7 */
+    { { 0xe22109c716543a40L,0x9acafd36fede3c6cL,0xfb2068526824e614L,
+        0x2a4544a9da25dca0L },
+      { 0x2598526291d60b06L,0x281b7be928753545L,0xec667b1a90f13b27L,
+        0x33a83aff940e2eb4L } },
+    /* 64 << 7 */
+    { { 0x80009862d5d721d5L,0x0c3357a35bd3a182L,0x27f3a83b7aa2cda4L,
+        0xb58ae74ef6f83085L },
+      { 0x2a911a812e6dad6bL,0xde286051f43d6c5bL,0x4bdccc41f996c4d8L,
+        0xe7312ec00ae1e24eL } },
+    /* 0 << 14 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 14 */
+    { { 0xf8d112e76e6485b3L,0x4d3e24db771c52f8L,0x48e3ee41684a2f6dL,
+        0x7161957d21d95551L },
+      { 0x19631283cdb12a6cL,0xbf3fa8822e50e164L,0xf6254b633166cc73L,
+        0x3aefa7aeaee8cc38L } },
+    /* 2 << 14 */
+    { { 0x79b0fe623b36f9fdL,0x26543b23fde19fc0L,0x136e64a0958482efL,
+        0x23f637719b095825L },
+      { 0x14cfd596b6a1142eL,0x5ea6aac6335aac0bL,0x86a0e8bdf3081dd5L,
+        0x5fb89d79003dc12aL } },
+    /* 3 << 14 */
+    { { 0xf615c33af72e34d4L,0x0bd9ea40110eec35L,0x1c12bc5bc1dea34eL,
+        0x686584c949ae4699L },
+      { 0x13ad95d38c97b942L,0x4609561a4e5c7562L,0x9e94a4aef2737f89L,
+        0xf57594c6371c78b6L } },
+    /* 4 << 14 */
+    { { 0x0f0165fce3779ee3L,0xe00e7f9dbd495d9eL,0x1fa4efa220284e7aL,
+        0x4564bade47ac6219L },
+      { 0x90e6312ac4708e8eL,0x4f5725fba71e9adfL,0xe95f55ae3d684b9fL,
+        0x47f7ccb11e94b415L } },
+    /* 5 << 14 */
+    { { 0x7322851b8d946581L,0xf0d13133bdf4a012L,0xa3510f696584dae0L,
+        0x03a7c1713c9f6c6dL },
+      { 0x5be97f38e475381aL,0xca1ba42285823334L,0xf83cc5c70be17ddaL,
+        0x158b14940b918c0fL } },
+    /* 6 << 14 */
+    { { 0xda3a77e5522e6b69L,0x69c908c3bbcd6c18L,0x1f1b9e48d924fd56L,
+        0x37c64e36aa4bb3f7L },
+      { 0x5a4fdbdfee478d7dL,0xba75c8bc0193f7a0L,0x84bc1e8456cd16dfL,
+        0x1fb08f0846fad151L } },
+    /* 7 << 14 */
+    { { 0x8a7cabf9842e9f30L,0xa331d4bf5eab83afL,0xd272cfba017f2a6aL,
+        0x27560abc83aba0e3L },
+      { 0x94b833870e3a6b75L,0x25c6aea26b9f50f5L,0x803d691db5fdf6d0L,
+        0x03b77509e6333514L } },
+    /* 8 << 14 */
+    { { 0x3617890361a341c1L,0x3604dc600cfd6142L,0x022295eb8533316cL,
+        0x3dbde4ac44af2922L },
+      { 0x898afc5d1c7eef69L,0x58896805d14f4fa1L,0x05002160203c21caL,
+        0x6f0d1f3040ef730bL } },
+    /* 9 << 14 */
+    { { 0x8e8c44d4196224f8L,0x75a4ab95374d079dL,0x79085ecc7d48f123L,
+        0x56f04d311bf65ad8L },
+      { 0xe220bf1cbda602b2L,0x73ee1742f9612c69L,0x76008fc8084fd06bL,
+        0x4000ef9ff11380d1L } },
+    /* 10 << 14 */
+    { { 0x48201b4b12cfe297L,0x3eee129c292f74e5L,0xe1fe114ec9e874e8L,
+        0x899b055c92c5fc41L },
+      { 0x4e477a643a39c8cfL,0x82f09efe78963cc9L,0x6fd3fd8fd333f863L,
+        0x85132b2adc949c63L } },
+    /* 11 << 14 */
+    { { 0x7e06a3ab516eb17bL,0x73bec06fd2c7372bL,0xe4f74f55ba896da6L,
+        0xbb4afef88e9eb40fL },
+      { 0x2d75bec8e61d66b0L,0x02bda4b4ef29300bL,0x8bbaa8de026baa5aL,
+        0xff54befda07f4440L } },
+    /* 12 << 14 */
+    { { 0xbd9b8b1dbe7a2af3L,0xec51caa94fb74a72L,0xb9937a4b63879697L,
+        0x7c9a9d20ec2687d5L },
+      { 0x1773e44f6ef5f014L,0x8abcf412e90c6900L,0x387bd0228142161eL,
+        0x50393755fcb6ff2aL } },
+    /* 13 << 14 */
+    { { 0x9813fd56ed6def63L,0x53cf64827d53106cL,0x991a35bd431f7ac1L,
+        0xf1e274dd63e65fafL },
+      { 0xf63ffa3c44cc7880L,0x411a426b7c256981L,0xb698b9fd93a420e0L,
+        0x89fdddc0ae53f8feL } },
+    /* 14 << 14 */
+    { { 0x766e072232398baaL,0x205fee425cfca031L,0xa49f53417a029cf2L,
+        0xa88c68b84023890dL },
+      { 0xbc2750417337aaa8L,0x9ed364ad0eb384f4L,0xe0816f8529aba92fL,
+        0x2e9e194104e38a88L } },
+    /* 15 << 14 */
+    { { 0x57eef44a3dafd2d5L,0x35d1fae597ed98d8L,0x50628c092307f9b1L,
+        0x09d84aaed6cba5c6L },
+      { 0x67071bc788aaa691L,0x2dea57a9afe6cb03L,0xdfe11bb43d78ac01L,
+        0x7286418c7fd7aa51L } },
+    /* 16 << 14 */
+    { { 0xfabf770977f7195aL,0x8ec86167adeb838fL,0xea1285a8bb4f012dL,
+        0xd68835039a3eab3fL },
+      { 0xee5d24f8309004c2L,0xa96e4b7613ffe95eL,0x0cdffe12bd223ea4L,
+        0x8f5c2ee5b6739a53L } },
+    /* 17 << 14 */
+    { { 0x5cb4aaa5dd968198L,0xfa131c5272413a6cL,0x53d46a909536d903L,
+        0xb270f0d348606d8eL },
+      { 0x518c7564a053a3bcL,0x088254b71a86caefL,0xb3ba8cb40ab5efd0L,
+        0x5c59900e4605945dL } },
+    /* 18 << 14 */
+    { { 0xecace1dda1887395L,0x40960f36932a65deL,0x9611ff5c3aa95529L,
+        0xc58215b07c1e5a36L },
+      { 0xd48c9b58f0e1a524L,0xb406856bf590dfb8L,0xc7605e049cd95662L,
+        0x0dd036eea33ecf82L } },
+    /* 19 << 14 */
+    { { 0xa50171acc33156b3L,0xf09d24ea4a80172eL,0x4e1f72c676dc8eefL,
+        0xe60caadc5e3d44eeL },
+      { 0x006ef8a6979b1d8fL,0x60908a1c97788d26L,0x6e08f95b266feec0L,
+        0x618427c222e8c94eL } },
+    /* 20 << 14 */
+    { { 0x3d61333959145a65L,0xcd9bc368fa406337L,0x82d11be32d8a52a0L,
+        0xf6877b2797a1c590L },
+      { 0x837a819bf5cbdb25L,0x2a4fd1d8de090249L,0x622a7de774990e5fL,
+        0x840fa5a07945511bL } },
+    /* 21 << 14 */
+    { { 0x30b974be6558842dL,0x70df8c6417f3d0a6L,0x7c8035207542e46dL,
+        0x7251fe7fe4ecc823L },
+      { 0xe59134cb5e9aac9aL,0x11bb0934f0045d71L,0x53e5d9b5dbcb1d4eL,
+        0x8d97a90592defc91L } },
+    /* 22 << 14 */
+    { { 0xfe2893277946d3f9L,0xe132bd2407472273L,0xeeeb510c1eb6ae86L,
+        0x777708c5f0595067L },
+      { 0x18e2c8cd1297029eL,0x2c61095cbbf9305eL,0xe466c2586b85d6d9L,
+        0x8ac06c36da1ea530L } },
+    /* 23 << 14 */
+    { { 0xa365dc39a1304668L,0xe4a9c88507f89606L,0x65a4898facc7228dL,
+        0x3e2347ff84ca8303L },
+      { 0xa5f6fb77ea7d23a3L,0x2fac257d672a71cdL,0x6908bef87e6a44d3L,
+        0x8ff87566891d3d7aL } },
+    /* 24 << 14 */
+    { { 0xe58e90b36b0cf82eL,0x6438d2462615b5e7L,0x07b1f8fc669c145aL,
+        0xb0d8b2da36f1e1cbL },
+      { 0x54d5dadbd9184c4dL,0x3dbb18d5f93d9976L,0x0a3e0f56d1147d47L,
+        0x2afa8c8da0a48609L } },
+    /* 25 << 14 */
+    { { 0x275353e8bc36742cL,0x898f427eeea0ed90L,0x26f4947e3e477b00L,
+        0x8ad8848a308741e3L },
+      { 0x6c703c38d74a2a46L,0x5e3e05a99ba17ba2L,0xc1fa6f664ab9a9e4L,
+        0x474a2d9a3841d6ecL } },
+    /* 26 << 14 */
+    { { 0x871239ad653ae326L,0x14bcf72aa74cbb43L,0x8737650e20d4c083L,
+        0x3df86536110ed4afL },
+      { 0xd2d86fe7b53ca555L,0x688cb00dabd5d538L,0xcf81bda31ad38468L,
+        0x7ccfe3ccf01167b6L } },
+    /* 27 << 14 */
+    { { 0xcf4f47e06c4c1fe6L,0x557e1f1a298bbb79L,0xf93b974f30d45a14L,
+        0x174a1d2d0baf97c4L },
+      { 0x7a003b30c51fbf53L,0xd8940991ee68b225L,0x5b0aa7b71c0f4173L,
+        0x975797c9a20a7153L } },
+    /* 28 << 14 */
+    { { 0x26e08c07e3533d77L,0xd7222e6a2e341c99L,0x9d60ec3d8d2dc4edL,
+        0xbdfe0d8f7c476cf8L },
+      { 0x1fe59ab61d056605L,0xa9ea9df686a8551fL,0x8489941e47fb8d8cL,
+        0xfeb874eb4a7f1b10L } },
+    /* 29 << 14 */
+    { { 0xfe5fea867ee0d98fL,0x201ad34bdbf61864L,0x45d8fe4737c031d4L,
+        0xd5f49fae795f0822L },
+      { 0xdb0fb291c7f4a40cL,0x2e69d9c1730ddd92L,0x754e105449d76987L,
+        0x8a24911d7662db87L } },
+    /* 30 << 14 */
+    { { 0x61fc181060a71676L,0xe852d1a8f66a8ad1L,0x172bbd656417231eL,
+        0x0d6de7bd3babb11fL },
+      { 0x6fde6f88c8e347f8L,0x1c5875479bd99cc3L,0x78e54ed034076950L,
+        0x97f0f334796e83baL } },
+    /* 31 << 14 */
+    { { 0xe4dbe1ce4924867aL,0xbd5f51b060b84917L,0x375300403cb09a79L,
+        0xdb3fe0f8ff1743d8L },
+      { 0xed7894d8556fa9dbL,0xfa26216923412fbfL,0x563be0dbba7b9291L,
+        0x6ca8b8c00c9fb234L } },
+    /* 32 << 14 */
+    { { 0xed406aa9bd763802L,0xc21486a065303da1L,0x61ae291ec7e62ec4L,
+        0x622a0492df99333eL },
+      { 0x7fd80c9dbb7a8ee0L,0xdc2ed3bc6c01aedbL,0x35c35a1208be74ecL,
+        0xd540cb1a469f671fL } },
+    /* 33 << 14 */
+    { { 0xd16ced4ecf84f6c7L,0x8561fb9c2d090f43L,0x7e693d796f239db4L,
+        0xa736f92877bd0d94L },
+      { 0x07b4d9292c1950eeL,0xda17754356dc11b3L,0xa5dfbbaa7a6a878eL,
+        0x1c70cb294decb08aL } },
+    /* 34 << 14 */
+    { { 0xfba28c8b6f0f7c50L,0xa8eba2b8854dcc6dL,0x5ff8e89a36b78642L,
+        0x070c1c8ef6873adfL },
+      { 0xbbd3c3716484d2e4L,0xfb78318f0d414129L,0x2621a39c6ad93b0bL,
+        0x979d74c2a9e917f7L } },
+    /* 35 << 14 */
+    { { 0xfc19564761fb0428L,0x4d78954abee624d4L,0xb94896e0b8ae86fdL,
+        0x6667ac0cc91c8b13L },
+      { 0x9f18051243bcf832L,0xfbadf8b7a0010137L,0xc69b4089b3ba8aa7L,
+        0xfac4bacde687ce85L } },
+    /* 36 << 14 */
+    { { 0x9164088d977eab40L,0x51f4c5b62760b390L,0xd238238f340dd553L,
+        0x358566c3db1d31c9L },
+      { 0x3a5ad69e5068f5ffL,0xf31435fcdaff6b06L,0xae549a5bd6debff0L,
+        0x59e5f0b775e01331L } },
+    /* 37 << 14 */
+    { { 0x5d492fb898559acfL,0x96018c2e4db79b50L,0x55f4a48f609f66aaL,
+        0x1943b3af4900a14fL },
+      { 0xc22496df15a40d39L,0xb2a446844c20f7c5L,0x76a35afa3b98404cL,
+        0xbec75725ff5d1b77L } },
+    /* 38 << 14 */
+    { { 0xb67aa163bea06444L,0x27e95bb2f724b6f2L,0x3c20e3e9d238c8abL,
+        0x1213754eddd6ae17L },
+      { 0x8c431020716e0f74L,0x6679c82effc095c2L,0x2eb3adf4d0ac2932L,
+        0x2cc970d301bb7a76L } },
+    /* 39 << 14 */
+    { { 0x70c71f2f740f0e66L,0x545c616b2b6b23ccL,0x4528cfcbb40a8bd7L,
+        0xff8396332ab27722L },
+      { 0x049127d9025ac99aL,0xd314d4a02b63e33bL,0xc8c310e728d84519L,
+        0x0fcb8983b3bc84baL } },
+    /* 40 << 14 */
+    { { 0x2cc5226138634818L,0x501814f4b44c2e0bL,0xf7e181aa54dfdba3L,
+        0xcfd58ff0e759718cL },
+      { 0xf90cdb14d3b507a8L,0x57bd478ec50bdad8L,0x29c197e250e5f9aaL,
+        0x4db6eef8e40bc855L } },
+    /* 41 << 14 */
+    { { 0x2cc8f21ad1fc0654L,0xc71cc96381269d73L,0xecfbb204077f49f9L,
+        0xdde92571ca56b793L },
+      { 0x9abed6a3f97ad8f7L,0xe6c19d3f924de3bdL,0x8dce92f4a140a800L,
+        0x85f44d1e1337af07L } },
+    /* 42 << 14 */
+    { { 0x5953c08b09d64c52L,0xa1b5e49ff5df9749L,0x336a8fb852735f7dL,
+        0xb332b6db9add676bL },
+      { 0x558b88a0b4511aa4L,0x09788752dbd5cc55L,0x16b43b9cd8cd52bdL,
+        0x7f0bc5a0c2a2696bL } },
+    /* 43 << 14 */
+    { { 0x146e12d4c11f61efL,0x9ce107543a83e79eL,0x08ec73d96cbfca15L,
+        0x09ff29ad5b49653fL },
+      { 0xe31b72bde7da946eL,0xebf9eb3bee80a4f2L,0xd1aabd0817598ce4L,
+        0x18b5fef453f37e80L } },
+    /* 44 << 14 */
+    { { 0xd5d5cdd35958cd79L,0x3580a1b51d373114L,0xa36e4c91fa935726L,
+        0xa38c534def20d760L },
+      { 0x7088e40a2ff5845bL,0xe5bb40bdbd78177fL,0x4f06a7a8857f9920L,
+        0xe3cc3e50e968f05dL } },
+    /* 45 << 14 */
+    { { 0x1d68b7fee5682d26L,0x5206f76faec7f87cL,0x41110530041951abL,
+        0x58ec52c1d4b5a71aL },
+      { 0xf3488f990f75cf9aL,0xf411951fba82d0d5L,0x27ee75be618895abL,
+        0xeae060d46d8aab14L } },
+    /* 46 << 14 */
+    { { 0x9ae1df737fb54dc2L,0x1f3e391b25963649L,0x242ec32afe055081L,
+        0x5bd450ef8491c9bdL },
+      { 0x367efc67981eb389L,0xed7e19283a0550d5L,0x362e776bab3ce75cL,
+        0xe890e3081f24c523L } },
+    /* 47 << 14 */
+    { { 0xb961b682feccef76L,0x8b8e11f58bba6d92L,0x8f2ccc4c2b2375c4L,
+        0x0d7f7a52e2f86cfaL },
+      { 0xfd94d30a9efe5633L,0x2d8d246b5451f934L,0x2234c6e3244e6a00L,
+        0xde2b5b0dddec8c50L } },
+    /* 48 << 14 */
+    { { 0x2ce53c5abf776f5bL,0x6f72407160357b05L,0xb259371771bf3f7aL,
+        0x87d2501c440c4a9fL },
+      { 0x440552e187b05340L,0xb7bf7cc821624c32L,0x4155a6ce22facddbL,
+        0x5a4228cb889837efL } },
+    /* 49 << 14 */
+    { { 0xef87d6d6fd4fd671L,0xa233687ec2daa10eL,0x7562224403c0eb96L,
+        0x7632d1848bf19be6L },
+      { 0x05d0f8e940735ff4L,0x3a3e6e13c00931f1L,0x31ccde6adafe3f18L,
+        0xf381366acfe51207L } },
+    /* 50 << 14 */
+    { { 0x24c222a960167d92L,0x62f9d6f87529f18cL,0x412397c00353b114L,
+        0x334d89dcef808043L },
+      { 0xd9ec63ba2a4383ceL,0xcec8e9375cf92ba0L,0xfb8b4288c8be74c0L,
+        0x67d6912f105d4391L } },
+    /* 51 << 14 */
+    { { 0x7b996c461b913149L,0x36aae2ef3a4e02daL,0xb68aa003972de594L,
+        0x284ec70d4ec6d545L },
+      { 0xf3d2b2d061391d54L,0x69c5d5d6fe114e92L,0xbe0f00b5b4482dffL,
+        0xe1596fa5f5bf33c5L } },
+    /* 52 << 14 */
+    { { 0x10595b5696a71cbaL,0x944938b2fdcadeb7L,0xa282da4cfccd8471L,
+        0x98ec05f30d37bfe1L },
+      { 0xe171ce1b0698304aL,0x2d69144421bdf79bL,0xd0cd3b741b21dec1L,
+        0x712ecd8b16a15f71L } },
+    /* 53 << 14 */
+    { { 0x8d4c00a700fd56e1L,0x02ec9692f9527c18L,0x21c449374a3e42e1L,
+        0x9176fbab1392ae0aL },
+      { 0x8726f1ba44b7b618L,0xb4d7aae9f1de491cL,0xf91df7b907b582c0L,
+        0x7e116c30ef60aa3aL } },
+    /* 54 << 14 */
+    { { 0x99270f81466265d7L,0xb15b6fe24df7adf0L,0xfe33b2d3f9738f7fL,
+        0x48553ab9d6d70f95L },
+      { 0x2cc72ac8c21e94dbL,0x795ac38dbdc0bbeeL,0x0a1be4492e40478fL,
+        0x81bd3394052bde55L } },
+    /* 55 << 14 */
+    { { 0x63c8dbe956b3c4f2L,0x017a99cf904177ccL,0x947bbddb4d010fc1L,
+        0xacf9b00bbb2c9b21L },
+      { 0x2970bc8d47173611L,0x1a4cbe08ac7d756fL,0x06d9f4aa67d541a2L,
+        0xa3e8b68959c2cf44L } },
+    /* 56 << 14 */
+    { { 0xaad066da4d88f1ddL,0xc604f1657ad35deaL,0x7edc07204478ca67L,
+        0xa10dfae0ba02ce06L },
+      { 0xeceb1c76af36f4e4L,0x994b2292af3f8f48L,0xbf9ed77b77c8a68cL,
+        0x74f544ea51744c9dL } },
+    /* 57 << 14 */
+    { { 0x82d05bb98113a757L,0x4ef2d2b48a9885e4L,0x1e332be51aa7865fL,
+        0x22b76b18290d1a52L },
+      { 0x308a231044351683L,0x9d861896a3f22840L,0x5959ddcd841ed947L,
+        0x0def0c94154b73bfL } },
+    /* 58 << 14 */
+    { { 0xf01054174c7c15e0L,0x539bfb023a277c32L,0xe699268ef9dccf5fL,
+        0x9f5796a50247a3bdL },
+      { 0x8b839de84f157269L,0xc825c1e57a30196bL,0x6ef0aabcdc8a5a91L,
+        0xf4a8ce6c498b7fe6L } },
+    /* 59 << 14 */
+    { { 0x1cce35a770cbac78L,0x83488e9bf6b23958L,0x0341a070d76cb011L,
+        0xda6c9d06ae1b2658L },
+      { 0xb701fb30dd648c52L,0x994ca02c52fb9fd1L,0x069331176f563086L,
+        0x3d2b810017856babL } },
+    /* 60 << 14 */
+    { { 0xe89f48c85963a46eL,0x658ab875a99e61c7L,0x6e296f874b8517b4L,
+        0x36c4fcdcfc1bc656L },
+      { 0xde5227a1a3906defL,0x9fe95f5762418945L,0x20c91e81fdd96cdeL,
+        0x5adbe47eda4480deL } },
+    /* 61 << 14 */
+    { { 0xa009370f396de2b6L,0x98583d4bf0ecc7bdL,0xf44f6b57e51d0672L,
+        0x03d6b078556b1984L },
+      { 0x27dbdd93b0b64912L,0x9b3a343415687b09L,0x0dba646151ec20a9L,
+        0xec93db7fff28187cL } },
+    /* 62 << 14 */
+    { { 0x00ff8c2466e48bddL,0x2514f2f911ccd78eL,0xeba11f4fe1250603L,
+        0x8a22cd41243fa156L },
+      { 0xa4e58df4b283e4c6L,0x78c298598b39783fL,0x5235aee2a5259809L,
+        0xc16284b50e0227ddL } },
+    /* 63 << 14 */
+    { { 0xa5f579161338830dL,0x6d4b8a6bd2123fcaL,0x236ea68af9c546f8L,
+        0xc1d36873fa608d36L },
+      { 0xcd76e4958d436d13L,0xd4d9c2218fb080afL,0x665c1728e8ad3fb5L,
+        0xcf1ebe4db3d572e0L } },
+    /* 64 << 14 */
+    { { 0xa7a8746a584c5e20L,0x267e4ea1b9dc7035L,0x593a15cfb9548c9bL,
+        0x5e6e21354bd012f3L },
+      { 0xdf31cc6a8c8f936eL,0x8af84d04b5c241dcL,0x63990a6f345efb86L,
+        0x6fef4e61b9b962cbL } },
+    /* 0 << 21 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 21 */
+    { { 0xf6368f0925722608L,0x131260db131cf5c6L,0x40eb353bfab4f7acL,
+        0x85c7888037eee829L },
+      { 0x4c1581ffc3bdf24eL,0x5bff75cbf5c3c5a8L,0x35e8c83fa14e6f40L,
+        0xb81d1c0f0295e0caL } },
+    /* 2 << 21 */
+    { { 0xfcde7cc8f43a730fL,0xe89b6f3c33ab590eL,0xc823f529ad03240bL,
+        0x82b79afe98bea5dbL },
+      { 0x568f2856962fe5deL,0x0c590adb60c591f3L,0x1fc74a144a28a858L,
+        0x3b662498b3203f4cL } },
+    /* 3 << 21 */
+    { { 0x91e3cf0d6c39765aL,0xa2db3acdac3cca0bL,0x288f2f08cb953b50L,
+        0x2414582ccf43cf1aL },
+      { 0x8dec8bbc60eee9a8L,0x54c79f02729aa042L,0xd81cd5ec6532f5d5L,
+        0xa672303acf82e15fL } },
+    /* 4 << 21 */
+    { { 0x376aafa8719c0563L,0xcd8ad2dcbc5fc79fL,0x303fdb9fcb750cd3L,
+        0x14ff052f4418b08eL },
+      { 0xf75084cf3e2d6520L,0x7ebdf0f8144ed509L,0xf43bf0f2d3f25b98L,
+        0x86ad71cfa354d837L } },
+    /* 5 << 21 */
+    { { 0xb827fe9226f43572L,0xdfd3ab5b5d824758L,0x315dd23a539094c1L,
+        0x85c0e37a66623d68L },
+      { 0x575c79727be19ae0L,0x616a3396df0d36b5L,0xa1ebb3c826b1ff7eL,
+        0x635b9485140ad453L } },
+    /* 6 << 21 */
+    { { 0x92bf3cdada430c0bL,0x4702850e3a96dac6L,0xc91cf0a515ac326aL,
+        0x95de4f49ab8c25e4L },
+      { 0xb01bad09e265c17cL,0x24e45464087b3881L,0xd43e583ce1fac5caL,
+        0xe17cb3186ead97a6L } },
+    /* 7 << 21 */
+    { { 0x6cc3924374dcec46L,0x33cfc02d54c2b73fL,0x82917844f26cd99cL,
+        0x8819dd95d1773f89L },
+      { 0x09572aa60871f427L,0x8e0cf365f6f01c34L,0x7fa52988bff1f5afL,
+        0x4eb357eae75e8e50L } },
+    /* 8 << 21 */
+    { { 0xd9d0c8c4868af75dL,0xd7325cff45c8c7eaL,0xab471996cc81ecb0L,
+        0xff5d55f3611824edL },
+      { 0xbe3145411977a0eeL,0x5085c4c5722038c6L,0x2d5335bff94bb495L,
+        0x894ad8a6c8e2a082L } },
+    /* 9 << 21 */
+    { { 0x5c3e2341ada35438L,0xf4a9fc89049b8c4eL,0xbeeb355a9f17cf34L,
+        0x3f311e0e6c91fe10L },
+      { 0xc2d2003892ab9891L,0x257bdcc13e8ce9a9L,0x1b2d978988c53beeL,
+        0x927ce89acdba143aL } },
+    /* 10 << 21 */
+    { { 0xb0a32cca523db280L,0x5c889f8a50d43783L,0x503e04b34897d16fL,
+        0x8cdb6e7808f5f2e8L },
+      { 0x6ab91cf0179c8e74L,0xd8874e5248211d60L,0xf948d4d5ea851200L,
+        0x4076d41ee6f9840aL } },
+    /* 11 << 21 */
+    { { 0xc20e263c47b517eaL,0x79a448fd30685e5eL,0xe55f6f78f90631a0L,
+        0x88a790b1a79e6346L },
+      { 0x62160c7d80969fe8L,0x54f92fd441491bb9L,0xa6645c235c957526L,
+        0xf44cc5aebea3ce7bL } },
+    /* 12 << 21 */
+    { { 0xf76283278b1e68b7L,0xc731ad7a303f29d3L,0xfe5a9ca957d03ecbL,
+        0x96c0d50c41bc97a7L },
+      { 0xc4669fe79b4f7f24L,0xfdd781d83d9967efL,0x7892c7c35d2c208dL,
+        0x8bf64f7cae545cb3L } },
+    /* 13 << 21 */
+    { { 0xc01f862c467be912L,0xf4c85ee9c73d30ccL,0x1fa6f4be6ab83ec7L,
+        0xa07a3c1c4e3e3cf9L },
+      { 0x87f8ef450c00beb3L,0x30e2c2b3000d4c3eL,0x1aa00b94fe08bf5bL,
+        0x32c133aa9224ef52L } },
+    /* 14 << 21 */
+    { { 0x38df16bb32e5685dL,0x68a9e06958e6f544L,0x495aaff7cdc5ebc6L,
+        0xf894a645378b135fL },
+      { 0xf316350a09e27ecfL,0xeced201e58f7179dL,0x2eec273ce97861baL,
+        0x47ec2caed693be2eL } },
+    /* 15 << 21 */
+    { { 0xfa4c97c4f68367ceL,0xe4f47d0bbe5a5755L,0x17de815db298a979L,
+        0xd7eca659c177dc7dL },
+      { 0x20fdbb7149ded0a3L,0x4cb2aad4fb34d3c5L,0x2cf31d2860858a33L,
+        0x3b6873efa24aa40fL } },
+    /* 16 << 21 */
+    { { 0x540234b22c11bb37L,0x2d0366dded4c74a3L,0xf9a968daeec5f25dL,
+        0x3660106867b63142L },
+      { 0x07cd6d2c68d7b6d4L,0xa8f74f090c842942L,0xe27514047768b1eeL,
+        0x4b5f7e89fe62aee4L } },
+    /* 17 << 21 */
+    { { 0xc6a7717789070d26L,0xa1f28e4edd1c8bc7L,0xea5f4f06469e1f17L,
+        0x78fc242afbdb78e0L },
+      { 0xc9c7c5928b0588f1L,0xb6b7a0fd1535921eL,0xcc5bdb91bde5ae35L,
+        0xb42c485e12ff1864L } },
+    /* 18 << 21 */
+    { { 0xa1113e13dbab98aaL,0xde9d469ba17b1024L,0x23f48b37c0462d3aL,
+        0x3752e5377c5c078dL },
+      { 0xe3a86add15544eb9L,0xf013aea780fba279L,0x8b5bb76cf22001b5L,
+        0xe617ba14f02891abL } },
+    /* 19 << 21 */
+    { { 0xd39182a6936219d3L,0x5ce1f194ae51cb19L,0xc78f8598bf07a74cL,
+        0x6d7158f222cbf1bcL },
+      { 0x3b846b21e300ce18L,0x35fba6302d11275dL,0x5fe25c36a0239b9bL,
+        0xd8beb35ddf05d940L } },
+    /* 20 << 21 */
+    { { 0x4db02bb01f7e320dL,0x0641c3646da320eaL,0x6d95fa5d821389a3L,
+        0x926997488fcd8e3dL },
+      { 0x316fef17ceb6c143L,0x67fcb841d933762bL,0xbb837e35118b17f8L,
+        0x4b92552f9fd24821L } },
+    /* 21 << 21 */
+    { { 0xae6bc70e46aca793L,0x1cf0b0e4e579311bL,0x8dc631be5802f716L,
+        0x099bdc6fbddbee4dL },
+      { 0xcc352bb20caf8b05L,0xf74d505a72d63df2L,0xb9876d4b91c4f408L,
+        0x1ce184739e229b2dL } },
+    /* 22 << 21 */
+    { { 0x4950759783abdb4aL,0x850fbcb6dee84b18L,0x6325236e609e67dcL,
+        0x04d831d99336c6d8L },
+      { 0x8deaae3bfa12d45dL,0xe425f8ce4746e246L,0x8004c17524f5f31eL,
+        0xaca16d8fad62c3b7L } },
+    /* 23 << 21 */
+    { { 0x0dc15a6a9152f934L,0xf1235e5ded0e12c1L,0xc33c06ecda477dacL,
+        0x76be8732b2ea0006L },
+      { 0xcf3f78310c0cd313L,0x3c524553a614260dL,0x31a756f8cab22d15L,
+        0x03ee10d177827a20L } },
+    /* 24 << 21 */
+    { { 0xd1e059b21994ef20L,0x2a653b69638ae318L,0x70d5eb582f699010L,
+        0x279739f709f5f84aL },
+      { 0x5da4663c8b799336L,0xfdfdf14d203c37ebL,0x32d8a9dca1dbfb2dL,
+        0xab40cff077d48f9bL } },
+    /* 25 << 21 */
+    { { 0xc018b383d20b42d5L,0xf9a810ef9f78845fL,0x40af3753bdba9df0L,
+        0xb90bdcfc131dfdf9L },
+      { 0x18720591f01ab782L,0xc823f2116af12a88L,0xa51b80f30dc14401L,
+        0xde248f77fb2dfbe3L } },
+    /* 26 << 21 */
+    { { 0xef5a44e50cafe751L,0x73997c9cd4dcd221L,0x32fd86d1de854024L,
+        0xd5b53adca09b84bbL },
+      { 0x008d7a11dcedd8d1L,0x406bd1c874b32c84L,0x5d4472ff05dde8b1L,
+        0x2e25f2cdfce2b32fL } },
+    /* 27 << 21 */
+    { { 0xbec0dd5e29dfc254L,0x4455fcf62b98b267L,0x0b4d43a5c72df2adL,
+        0xea70e6be48a75397L },
+      { 0x2aad61695820f3bfL,0xf410d2dd9e37f68fL,0x70fb7dba7be5ac83L,
+        0x636bb64536ec3eecL } },
+    /* 28 << 21 */
+    { { 0x27104ea39754e21cL,0xbc87a3e68d63c373L,0x483351d74109db9aL,
+        0x0fa724e360134da7L },
+      { 0x9ff44c29b0720b16L,0x2dd0cf1306aceeadL,0x5942758ce26929a6L,
+        0x96c5db92b766a92bL } },
+    /* 29 << 21 */
+    { { 0xcec7d4c05f18395eL,0xd3f227441f80d032L,0x7a68b37acb86075bL,
+        0x074764ddafef92dbL },
+      { 0xded1e9507bc7f389L,0xc580c850b9756460L,0xaeeec2a47da48157L,
+        0x3f0b4e7f82c587b3L } },
+    /* 30 << 21 */
+    { { 0x231c6de8a9f19c53L,0x5717bd736974e34eL,0xd9e1d216f1508fa9L,
+        0x9f112361dadaa124L },
+      { 0x80145e31823b7348L,0x4dd8f0d5ac634069L,0xe3d82fc72297c258L,
+        0x276fcfee9cee7431L } },
+    /* 31 << 21 */
+    { { 0x8eb61b5e2bc0aea9L,0x4f668fd5de329431L,0x03a32ab138e4b87eL,
+        0xe137451773d0ef0bL },
+      { 0x1a46f7e6853ac983L,0xc3bdf42e68e78a57L,0xacf207852ea96dd1L,
+        0xa10649b9f1638460L } },
+    /* 32 << 21 */
+    { { 0xf2369f0b879fbbedL,0x0ff0ae86da9d1869L,0x5251d75956766f45L,
+        0x4984d8c02be8d0fcL },
+      { 0x7ecc95a6d21008f0L,0x29bd54a03a1a1c49L,0xab9828c5d26c50f3L,
+        0x32c0087c51d0d251L } },
+    /* 33 << 21 */
+    { { 0x9bac3ce60c1cdb26L,0xcd94d947557ca205L,0x1b1bd5989db1fdcdL,
+        0x0eda0108a3d8b149L },
+      { 0x9506661056152fccL,0xc2f037e6e7192b33L,0xdeffb41ac92e05a4L,
+        0x1105f6c2c2f6c62eL } },
+    /* 34 << 21 */
+    { { 0x68e735008733913cL,0xcce861633f3adc40L,0xf407a94238a278e9L,
+        0xd13c1b9d2ab21292L },
+      { 0x93ed7ec71c74cf5cL,0x8887dc48f1a4c1b4L,0x3830ff304b3a11f1L,
+        0x358c5a3c58937cb6L } },
+    /* 35 << 21 */
+    { { 0x027dc40489022829L,0x40e939773b798f79L,0x90ad333738be6eadL,
+        0x9c23f6bcf34c0a5dL },
+      { 0xd1711a35fbffd8bbL,0x60fcfb491949d3ddL,0x09c8ef4b7825d93aL,
+        0x24233cffa0a8c968L } },
+    /* 36 << 21 */
+    { { 0x67ade46ce6d982afL,0xebb6bf3ee7544d7cL,0xd6b9ba763d8bd087L,
+        0x46fe382d4dc61280L },
+      { 0xbd39a7e8b5bdbd75L,0xab381331b8f228feL,0x0709a77cce1c4300L,
+        0x6a247e56f337ceacL } },
+    /* 37 << 21 */
+    { { 0x8f34f21b636288beL,0x9dfdca74c8a7c305L,0x6decfd1bea919e04L,
+        0xcdf2688d8e1991f8L },
+      { 0xe607df44d0f8a67eL,0xd985df4b0b58d010L,0x57f834c50c24f8f4L,
+        0xe976ef56a0bf01aeL } },
+    /* 38 << 21 */
+    { { 0x536395aca1c32373L,0x351027aa734c0a13L,0xd2f1b5d65e6bd5bcL,
+        0x2b539e24223debedL },
+      { 0xd4994cec0eaa1d71L,0x2a83381d661dcf65L,0x5f1aed2f7b54c740L,
+        0x0bea3fa5d6dda5eeL } },
+    /* 39 << 21 */
+    { { 0x9d4fb68436cc6134L,0x8eb9bbf3c0a443ddL,0xfc500e2e383b7d2aL,
+        0x7aad621c5b775257L },
+      { 0x69284d740a8f7cc0L,0xe820c2ce07562d65L,0xbf9531b9499758eeL,
+        0x73e95ca56ee0cc2dL } },
+    /* 40 << 21 */
+    { { 0xf61790abfbaf50a5L,0xdf55e76b684e0750L,0xec516da7f176b005L,
+        0x575553bb7a2dddc7L },
+      { 0x37c87ca3553afa73L,0x315f3ffc4d55c251L,0xe846442aaf3e5d35L,
+        0x61b911496495ff28L } },
+    /* 41 << 21 */
+    { { 0x23cc95d3fa326dc3L,0x1df4da1f18fc2ceaL,0x24bf9adcd0a37d59L,
+        0xb6710053320d6e1eL },
+      { 0x96f9667e618344d1L,0xcc7ce042a06445afL,0xa02d8514d68dbc3aL,
+        0x4ea109e4280b5a5bL } },
+    /* 42 << 21 */
+    { { 0x5741a7acb40961bfL,0x4ada59376aa56bfaL,0x7feb914502b765d1L,
+        0x561e97bee6ad1582L },
+      { 0xbbc4a5b6da3982f5L,0x0c2659edb546f468L,0xb8e7e6aa59612d20L,
+        0xd83dfe20ac19e8e0L } },
+    /* 43 << 21 */
+    { { 0x8530c45fb835398cL,0x6106a8bfb38a41c2L,0x21e8f9a635f5dcdbL,
+        0x39707137cae498edL },
+      { 0x70c23834d8249f00L,0x9f14b58fab2537a0L,0xd043c3655f61c0c2L,
+        0xdc5926d609a194a7L } },
+    /* 44 << 21 */
+    { { 0xddec03398e77738aL,0xd07a63effba46426L,0x2e58e79cee7f6e86L,
+        0xe59b0459ff32d241L },
+      { 0xc5ec84e520fa0338L,0x97939ac8eaff5aceL,0x0310a4e3b4a38313L,
+        0x9115fba28f9d9885L } },
+    /* 45 << 21 */
+    { { 0x8dd710c25fadf8c3L,0x66be38a2ce19c0e2L,0xd42a279c4cfe5022L,
+        0x597bb5300e24e1b8L },
+      { 0x3cde86b7c153ca7fL,0xa8d30fb3707d63bdL,0xac905f92bd60d21eL,
+        0x98e7ffb67b9a54abL } },
+    /* 46 << 21 */
+    { { 0xd7147df8e9726a30L,0xb5e216ffafce3533L,0xb550b7992ff1ec40L,
+        0x6b613b87a1e953fdL },
+      { 0x87b88dba792d5610L,0x2ee1270aa190fbe1L,0x02f4e2dc2ef581daL,
+        0x016530e4eff82a95L } },
+    /* 47 << 21 */
+    { { 0xcbb93dfd8fd6ee89L,0x16d3d98646848fffL,0x600eff241da47adfL,
+        0x1b9754a00ad47a71L },
+      { 0x8f9266df70c33b98L,0xaadc87aedf34186eL,0x0d2ce8e14ad24132L,
+        0x8a47cbfc19946ebaL } },
+    /* 48 << 21 */
+    { { 0x47feeb6662b5f3afL,0xcefab5610abb3734L,0x449de60e19f35cb1L,
+        0x39f8db14157f0eb9L },
+      { 0xffaecc5b3c61bfd6L,0xa5a4d41d41216703L,0x7f8fabed224e1cc2L,
+        0x0d5a8186871ad953L } },
+    /* 49 << 21 */
+    { { 0xf10774f7d22da9a9L,0x45b8a678cc8a9b0dL,0xd9c2e722bdc32cffL,
+        0xbf71b5f5337202a5L },
+      { 0x95c57f2f69fc4db9L,0xb6dad34c765d01e1L,0x7e0bd13fcb904635L,
+        0x61751253763a588cL } },
+    /* 50 << 21 */
+    { { 0xd85c299781af2c2dL,0xc0f7d9c481b9d7daL,0x838a34ae08533e8dL,
+        0x15c4cb08311d8311L },
+      { 0x97f832858e121e14L,0xeea7dc1e85000a5fL,0x0c6059b65d256274L,
+        0xec9beaceb95075c0L } },
+    /* 51 << 21 */
+    { { 0x173daad71df97828L,0xbf851cb5a8937877L,0xb083c59401646f3cL,
+        0x3bad30cf50c6d352L },
+      { 0xfeb2b202496bbceaL,0x3cf9fd4f18a1e8baL,0xd26de7ff1c066029L,
+        0x39c81e9e4e9ed4f8L } },
+    /* 52 << 21 */
+    { { 0xd8be0cb97b390d35L,0x01df2bbd964aab27L,0x3e8c1a65c3ef64f8L,
+        0x567291d1716ed1ddL },
+      { 0x95499c6c5f5406d3L,0x71fdda395ba8e23fL,0xcfeb320ed5096eceL,
+        0xbe7ba92bca66dd16L } },
+    /* 53 << 21 */
+    { { 0x4608d36bc6fb5a7dL,0xe3eea15a6d2dd0e0L,0x75b0a3eb8f97a36aL,
+        0xf59814cc1c83de1eL },
+      { 0x56c9c5b01c33c23fL,0xa96c1da46faa4136L,0x46bf2074de316551L,
+        0x3b866e7b1f756c8fL } },
+    /* 54 << 21 */
+    { { 0x727727d81495ed6bL,0xb2394243b682dce7L,0x8ab8454e758610f3L,
+        0xc243ce84857d72a4L },
+      { 0x7b320d71dbbf370fL,0xff9afa3778e0f7caL,0x0119d1e0ea7b523fL,
+        0xb997f8cb058c7d42L } },
+    /* 55 << 21 */
+    { { 0x285bcd2a37bbb184L,0x51dcec49a45d1fa6L,0x6ade3b64e29634cbL,
+        0x080c94a726b86ef1L },
+      { 0xba583db12283fbe3L,0x902bddc85a9315edL,0x07c1ccb386964becL,
+        0x78f4eacfb6258301L } },
+    /* 56 << 21 */
+    { { 0x4bdf3a4956f90823L,0xba0f5080741d777bL,0x091d71c3f38bf760L,
+        0x9633d50f9b625b02L },
+      { 0x03ecb743b8c9de61L,0xb47512545de74720L,0x9f9defc974ce1cb2L,
+        0x774a4f6a00bd32efL } },
+    /* 57 << 21 */
+    { { 0xaca385f773848f22L,0x53dad716f3f8558eL,0xab7b34b093c471f9L,
+        0xf530e06919644bc7L },
+      { 0x3d9fb1ffdd59d31aL,0x4382e0df08daa795L,0x165c6f4bd5cc88d7L,
+        0xeaa392d54a18c900L } },
+    /* 58 << 21 */
+    { { 0x94203c67648024eeL,0x188763f28c2fabcdL,0xa80f87acbbaec835L,
+        0x632c96e0f29d8d54L },
+      { 0x29b0a60e4c00a95eL,0x2ef17f40e011e9faL,0xf6c0e1d115b77223L,
+        0xaaec2c6214b04e32L } },
+    /* 59 << 21 */
+    { { 0xd35688d83d84e58cL,0x2af5094c958571dbL,0x4fff7e19760682a6L,
+        0x4cb27077e39a407cL },
+      { 0x0f59c5474ff0e321L,0x169f34a61b34c8ffL,0x2bff109652bc1ba7L,
+        0xa25423b783583544L } },
+    /* 60 << 21 */
+    { { 0x5d55d5d50ac8b782L,0xff6622ec2db3c892L,0x48fce7416b8bb642L,
+        0x31d6998c69d7e3dcL },
+      { 0xdbaf8004cadcaed0L,0x801b0142d81d053cL,0x94b189fc59630ec6L,
+        0x120e9934af762c8eL } },
+    /* 61 << 21 */
+    { { 0x53a29aa4fdc6a404L,0x19d8e01ea1909948L,0x3cfcabf1d7e89681L,
+        0x3321a50d4e132d37L },
+      { 0xd0496863e9a86111L,0x8c0cde6106a3bc65L,0xaf866c49fc9f8eefL,
+        0x2066350eff7f5141L } },
+    /* 62 << 21 */
+    { { 0x4f8a4689e56ddfbdL,0xea1b0c07fe32983aL,0x2b317462873cb8cbL,
+        0x658deddc2d93229fL },
+      { 0x65efaf4d0f64ef58L,0xfe43287d730cc7a8L,0xaebc0c723d047d70L,
+        0x92efa539d92d26c9L } },
+    /* 63 << 21 */
+    { { 0x06e7845794b56526L,0x415cb80f0961002dL,0x89e5c56576dcb10fL,
+        0x8bbb6982ff9259feL },
+      { 0x4fe8795b9abc2668L,0xb5d4f5341e678fb1L,0x6601f3be7b7da2b9L,
+        0x98da59e2a13d6805L } },
+    /* 64 << 21 */
+    { { 0x190d8ea601799a52L,0xa20cec41b86d2952L,0x3062ffb27fff2a7cL,
+        0x741b32e579f19d37L },
+      { 0xf80d81814eb57d47L,0x7a2d0ed416aef06bL,0x09735fb01cecb588L,
+        0x1641caaac6061f5bL } },
+    /* 0 << 28 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 28 */
+    { { 0x7f99824f20151427L,0x206828b692430206L,0xaa9097d7e1112357L,
+        0xacf9a2f209e414ecL },
+      { 0xdbdac9da27915356L,0x7e0734b7001efee3L,0x54fab5bbd2b288e2L,
+        0x4c630fc4f62dd09cL } },
+    /* 2 << 28 */
+    { { 0x8537107a1ac2703bL,0xb49258d86bc857b5L,0x57df14debcdaccd1L,
+        0x24ab68d7c4ae8529L },
+      { 0x7ed8b5d4734e59d0L,0x5f8740c8c495cc80L,0x84aedd5a291db9b3L,
+        0x80b360f84fb995beL } },
+    /* 3 << 28 */
+    { { 0xae915f5d5fa067d1L,0x4134b57f9668960cL,0xbd3656d6a48edaacL,
+        0xdac1e3e4fc1d7436L },
+      { 0x674ff869d81fbb26L,0x449ed3ecb26c33d4L,0x85138705d94203e8L,
+        0xccde538bbeeb6f4aL } },
+    /* 4 << 28 */
+    { { 0x55d5c68da61a76faL,0x598b441dca1554dcL,0xd39923b9773b279cL,
+        0x33331d3c36bf9efcL },
+      { 0x2d4c848e298de399L,0xcfdb8e77a1a27f56L,0x94c855ea57b8ab70L,
+        0xdcdb9dae6f7879baL } },
+    /* 5 << 28 */
+    { { 0x7bdff8c2019f2a59L,0xb3ce5bb3cb4fbc74L,0xea907f688a9173ddL,
+        0x6cd3d0d395a75439L },
+      { 0x92ecc4d6efed021cL,0x09a9f9b06a77339aL,0x87ca6b157188c64aL,
+        0x10c2996844899158L } },
+    /* 6 << 28 */
+    { { 0x5859a229ed6e82efL,0x16f338e365ebaf4eL,0x0cd313875ead67aeL,
+        0x1c73d22854ef0bb4L },
+      { 0x4cb5513174a5c8c7L,0x01cd29707f69ad6aL,0xa04d00dde966f87eL,
+        0xd96fe4470b7b0321L } },
+    /* 7 << 28 */
+    { { 0x342ac06e88fbd381L,0x02cd4a845c35a493L,0xe8fa89de54f1bbcdL,
+        0x341d63672575ed4cL },
+      { 0xebe357fbd238202bL,0x600b4d1aa984ead9L,0xc35c9f4452436ea0L,
+        0x96fe0a39a370751bL } },
+    /* 8 << 28 */
+    { { 0x4c4f07367f636a38L,0x9f943fb70e76d5cbL,0xb03510baa8b68b8bL,
+        0xc246780a9ed07a1fL },
+      { 0x3c0514156d549fc2L,0xc2953f31607781caL,0x955e2c69d8d95413L,
+        0xb300fadc7bd282e3L } },
+    /* 9 << 28 */
+    { { 0x81fe7b5087e9189fL,0xdb17375cf42dda27L,0x22f7d896cf0a5904L,
+        0xa0e57c5aebe348e6L },
+      { 0xa61011d3f40e3c80L,0xb11893218db705c5L,0x4ed9309e50fedec3L,
+        0xdcf14a104d6d5c1dL } },
+    /* 10 << 28 */
+    { { 0x056c265b55691342L,0xe8e0850491049dc7L,0x131329f5c9bae20aL,
+        0x96c8b3e8d9dccdb4L },
+      { 0x8c5ff838fb4ee6b4L,0xfc5a9aeb41e8ccf0L,0x7417b764fae050c6L,
+        0x0953c3d700452080L } },
+    /* 11 << 28 */
+    { { 0x2137268238dfe7e8L,0xea417e152bb79d4bL,0x59641f1c76e7cf2dL,
+        0x271e3059ea0bcfccL },
+      { 0x624c7dfd7253ecbdL,0x2f552e254fca6186L,0xcbf84ecd4d866e9cL,
+        0x73967709f68d4610L } },
+    /* 12 << 28 */
+    { { 0xa14b1163c27901b4L,0xfd9236e0899b8bf3L,0x42b091eccbc6da0aL,
+        0xbb1dac6f5ad1d297L },
+      { 0x80e61d53a91cf76eL,0x4110a412d31f1ee7L,0x2d87c3ba13efcf77L,
+        0x1f374bb4df450d76L } },
+    /* 13 << 28 */
+    { { 0x5e78e2f20d188dabL,0xe3968ed0f4b885efL,0x46c0568e7314570fL,
+        0x3161633801170521L },
+      { 0x18e1e7e24f0c8afeL,0x4caa75ffdeea78daL,0x82db67f27c5d8a51L,
+        0x36a44d866f505370L } },
+    /* 14 << 28 */
+    { { 0xd72c5bda0333974fL,0x5db516ae27a70146L,0x34705281210ef921L,
+        0xbff17a8f0c9c38e5L },
+      { 0x78f4814e12476da1L,0xc1e1661333c16980L,0x9e5b386f424d4bcaL,
+        0x4c274e87c85740deL } },
+    /* 15 << 28 */
+    { { 0xb6a9b88d6c2f5226L,0x14d1b944550d7ca8L,0x580c85fc1fc41709L,
+        0xc1da368b54c6d519L },
+      { 0x2b0785ced5113cf7L,0x0670f6335a34708fL,0x46e2376715cc3f88L,
+        0x1b480cfa50c72c8fL } },
+    /* 16 << 28 */
+    { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L,
+        0xd953c50ddbdf58e9L },
+      { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL,
+        0x863ebb7e9eb288f3L } },
+    /* 17 << 28 */
+    { { 0x6e6ab7616aca8ee7L,0x97d10b39d7b40358L,0x1687d3771e5feb0dL,
+        0xc83e50e48265a27aL },
+      { 0x8f75a9fec954b313L,0xcc2e8f47310d1f61L,0xf5ba81c56557d0e0L,
+        0x25f9680c3eaf6207L } },
+    /* 18 << 28 */
+    { { 0xf95c66094354080bL,0x5225bfa57bf2fe1cL,0xc5c004e25c7d98faL,
+        0x3561bf1c019aaf60L },
+      { 0x5e6f9f17ba151474L,0xdec2f934b04f6ecaL,0x64e368a1269acb1eL,
+        0x1332d9e40cdda493L } },
+    /* 19 << 28 */
+    { { 0x60d6cf69df23de05L,0x66d17da2009339a0L,0x9fcac9850a693923L,
+        0xbcf057fced7c6a6dL },
+      { 0xc3c5c8c5f0b5662cL,0x25318dd8dcba4f24L,0x60e8cb75082b69ffL,
+        0x7c23b3ee1e728c01L } },
+    /* 20 << 28 */
+    { { 0x15e10a0a097e4403L,0xcb3d0a8619854665L,0x88d8e211d67d4826L,
+        0xb39af66e0b9d2839L },
+      { 0xa5f94588bd475ca8L,0xe06b7966c077b80bL,0xfedb1485da27c26cL,
+        0xd290d33afe0fd5e0L } },
+    /* 21 << 28 */
+    { { 0xa40bcc47f34fb0faL,0xb4760cc81fb1ab09L,0x8fca0993a273bfe3L,
+        0x13e4fe07f70b213cL },
+      { 0x3bcdb992fdb05163L,0x8c484b110c2b19b6L,0x1acb815faaf2e3e2L,
+        0xc6905935b89ff1b4L } },
+    /* 22 << 28 */
+    { { 0xb2ad6f9d586e74e1L,0x488883ad67b80484L,0x758aa2c7369c3ddbL,
+        0x8ab74e699f9afd31L },
+      { 0x10fc2d285e21beb1L,0x3484518a318c42f9L,0x377427dc53cf40c3L,
+        0x9de0781a391bc1d9L } },
+    /* 23 << 28 */
+    { { 0x8faee858693807e1L,0xa38653274e81ccc7L,0x02c30ff26f835b84L,
+        0xb604437b0d3d38d4L },
+      { 0xb3fc8a985ca1823dL,0xb82f7ec903be0324L,0xee36d761cf684a33L,
+        0x5a01df0e9f29bf7dL } },
+    /* 24 << 28 */
+    { { 0x686202f31306583dL,0x05b10da0437c622eL,0xbf9aaa0f076a7bc8L,
+        0x25e94efb8f8f4e43L },
+      { 0x8a35c9b7fa3dc26dL,0xe0e5fb9396ff03c5L,0xa77e3843ebc394ceL,
+        0xcede65958361de60L } },
+    /* 25 << 28 */
+    { { 0xd27c22f6a1993545L,0xab01cc3624d671baL,0x63fa2877a169c28eL,
+        0x925ef9042eb08376L },
+      { 0x3b2fa3cf53aa0b32L,0xb27beb5b71c49d7aL,0xb60e1834d105e27fL,
+        0xd60897884f68570dL } },
+    /* 26 << 28 */
+    { { 0x23094ce0d6fbc2acL,0x738037a1815ff551L,0xda73b1bb6bef119cL,
+        0xdcf6c430eef506baL },
+      { 0x00e4fe7be3ef104aL,0xebdd9a2c0a065628L,0x853a81c38792043eL,
+        0x22ad6eceb3b59108L } },
+    /* 27 << 28 */
+    { { 0x9fb813c039cd297dL,0x8ec7e16e05bda5d9L,0x2834797c0d104b96L,
+        0xcc11a2e77c511510L },
+      { 0x96ca5a5396ee6380L,0x054c8655cea38742L,0xb5946852d54dfa7dL,
+        0x97c422e71f4ab207L } },
+    /* 28 << 28 */
+    { { 0xbf9075090c22b540L,0x2cde42aab7c267d4L,0xba18f9ed5ab0d693L,
+        0x3ba62aa66e4660d9L },
+      { 0xb24bf97bab9ea96aL,0x5d039642e3b60e32L,0x4e6a45067c4d9bd5L,
+        0x666c5b9e7ed4a6a4L } },
+    /* 29 << 28 */
+    { { 0xfa3fdcd98edbd7ccL,0x4660bb87c6ccd753L,0x9ae9082021e6b64fL,
+        0x8a56a713b36bfb3fL },
+      { 0xabfce0965726d47fL,0x9eed01b20b1a9a7fL,0x30e9cad44eb74a37L,
+        0x7b2524cc53e9666dL } },
+    /* 30 << 28 */
+    { { 0x6a29683b8f4b002fL,0xc2200d7a41f4fc20L,0xcf3af47a3a338accL,
+        0x6539a4fbe7128975L },
+      { 0xcec31c14c33c7fcfL,0x7eb6799bc7be322bL,0x119ef4e96646f623L,
+        0x7b7a26a554d7299bL } },
+    /* 31 << 28 */
+    { { 0xcb37f08d403f46f2L,0x94b8fc431a0ec0c7L,0xbb8514e3c332142fL,
+        0xf3ed2c33e80d2a7aL },
+      { 0x8d2080afb639126cL,0xf7b6be60e3553adeL,0x3950aa9f1c7e2b09L,
+        0x847ff9586410f02bL } },
+    /* 32 << 28 */
+    { { 0x877b7cf5678a31b0L,0xd50301ae3998b620L,0x734257c5c00fb396L,
+        0xf9fb18a004e672a6L },
+      { 0xff8bd8ebe8758851L,0x1e64e4c65d99ba44L,0x4b8eaedf7dfd93b7L,
+        0xba2f2a9804e76b8cL } },
+    /* 33 << 28 */
+    { { 0x7d790cbae8053433L,0xc8e725a03d2c9585L,0x58c5c476cdd8f5edL,
+        0xd106b952efa9fe1dL },
+      { 0x3c5c775b0eff13a9L,0x242442bae057b930L,0xe9f458d4c9b70cbdL,
+        0x69b71448a3cdb89aL } },
+    /* 34 << 28 */
+    { { 0x41ee46f60e2ed742L,0x573f104540067493L,0xb1e154ff9d54c304L,
+        0x2ad0436a8d3a7502L },
+      { 0xee4aaa2d431a8121L,0xcd38b3ab886f11edL,0x57d49ea6034a0eb7L,
+        0xd2b773bdf7e85e58L } },
+    /* 35 << 28 */
+    { { 0x4a559ac49b5c1f14L,0xc444be1a3e54df2bL,0x13aad704eda41891L,
+        0xcd927bec5eb5c788L },
+      { 0xeb3c8516e48c8a34L,0x1b7ac8124b546669L,0x1815f896594df8ecL,
+        0x87c6a79c79227865L } },
+    /* 36 << 28 */
+    { { 0xae02a2f09b56ddbdL,0x1339b5ac8a2f1cf3L,0xf2b569c7839dff0dL,
+        0xb0b9e864fee9a43dL },
+      { 0x4ff8ca4177bb064eL,0x145a2812fd249f63L,0x3ab7beacf86f689aL,
+        0x9bafec2701d35f5eL } },
+    /* 37 << 28 */
+    { { 0x28054c654265aa91L,0xa4b18304035efe42L,0x6887b0e69639dec7L,
+        0xf4b8f6ad3d52aea5L },
+      { 0xfb9293cc971a8a13L,0x3f159e5d4c934d07L,0x2c50e9b109acbc29L,
+        0x08eb65e67154d129L } },
+    /* 38 << 28 */
+    { { 0x4feff58930b75c3eL,0x0bb82fe294491c93L,0xd8ac377a89af62bbL,
+        0xd7b514909685e49fL },
+      { 0xabca9a7b04497f19L,0x1b35ed0a1a7ad13fL,0x6b601e213ec86ed6L,
+        0xda91fcb9ce0c76f1L } },
+    /* 39 << 28 */
+    { { 0x9e28507bd7ab27e1L,0x7c19a55563945b7bL,0x6b43f0a1aafc9827L,
+        0x443b4fbd3aa55b91L },
+      { 0x962b2e656962c88fL,0x139da8d4ce0db0caL,0xb93f05dd1b8d6c4fL,
+        0x779cdff7180b9824L } },
+    /* 40 << 28 */
+    { { 0xbba23fddae57c7b7L,0x345342f21b932522L,0xfd9c80fe556d4aa3L,
+        0xa03907ba6525bb61L },
+      { 0x38b010e1ff218933L,0xc066b654aa52117bL,0x8e14192094f2e6eaL,
+        0x66a27dca0d32f2b2L } },
+    /* 41 << 28 */
+    { { 0x69c7f993048b3717L,0xbf5a989ab178ae1cL,0x49fa9058564f1d6bL,
+        0x27ec6e15d31fde4eL },
+      { 0x4cce03737276e7fcL,0x64086d7989d6bf02L,0x5a72f0464ccdd979L,
+        0x909c356647775631L } },
+    /* 42 << 28 */
+    { { 0x1c07bc6b75dd7125L,0xb4c6bc9787a0428dL,0x507ece52fdeb6b9dL,
+        0xfca56512b2c95432L },
+      { 0x15d97181d0e8bd06L,0x384dd317c6bb46eaL,0x5441ea203952b624L,
+        0xbcf70dee4e7dc2fbL } },
+    /* 43 << 28 */
+    { { 0x372b016e6628e8c3L,0x07a0d667b60a7522L,0xcf05751b0a344ee2L,
+        0x0ec09a48118bdeecL },
+      { 0x6e4b3d4ed83dce46L,0x43a6316d99d2fc6eL,0xa99d898956cf044cL,
+        0x7c7f4454ae3e5fb7L } },
+    /* 44 << 28 */
+    { { 0xb2e6b121fbabbe92L,0x281850fbe1330076L,0x093581ec97890015L,
+        0x69b1dded75ff77f5L },
+      { 0x7cf0b18fab105105L,0x953ced31a89ccfefL,0x3151f85feb914009L,
+        0x3c9f1b8788ed48adL } },
+    /* 45 << 28 */
+    { { 0xc9aba1a14a7eadcbL,0x928e7501522e71cfL,0xeaede7273a2e4f83L,
+        0x467e10d11ce3bbd3L },
+      { 0xf3442ac3b955dcf0L,0xba96307dd3d5e527L,0xf763a10efd77f474L,
+        0x5d744bd06a6e1ff0L } },
+    /* 46 << 28 */
+    { { 0xd287282aa777899eL,0xe20eda8fd03f3cdeL,0x6a7e75bb50b07d31L,
+        0x0b7e2a946f379de4L },
+      { 0x31cb64ad19f593cfL,0x7b1a9e4f1e76ef1dL,0xe18c9c9db62d609cL,
+        0x439bad6de779a650L } },
+    /* 47 << 28 */
+    { { 0x219d9066e032f144L,0x1db632b8e8b2ec6aL,0xff0d0fd4fda12f78L,
+        0x56fb4c2d2a25d265L },
+      { 0x5f4e2ee1255a03f1L,0x61cd6af2e96af176L,0xe0317ba8d068bc97L,
+        0x927d6bab264b988eL } },
+    /* 48 << 28 */
+    { { 0xa18f07e0e90fb21eL,0x00fd2b80bba7fca1L,0x20387f2795cd67b5L,
+        0x5b89a4e7d39707f7L },
+      { 0x8f83ad3f894407ceL,0xa0025b946c226132L,0xc79563c7f906c13bL,
+        0x5f548f314e7bb025L } },
+    /* 49 << 28 */
+    { { 0x2b4c6b8feac6d113L,0xa67e3f9c0e813c76L,0x3982717c3fe1f4b9L,
+        0x5886581926d8050eL },
+      { 0x99f3640cf7f06f20L,0xdc6102162a66ebc2L,0x52f2c175767a1e08L,
+        0x05660e1a5999871bL } },
+    /* 50 << 28 */
+    { { 0x6b0f17626d3c4693L,0xf0e7d62737ed7beaL,0xc51758c7b75b226dL,
+        0x40a886281f91613bL },
+      { 0x889dbaa7bbb38ce0L,0xe0404b65bddcad81L,0xfebccd3a8bc9671fL,
+        0xfbf9a357ee1f5375L } },
+    /* 51 << 28 */
+    { { 0x5dc169b028f33398L,0xb07ec11d72e90f65L,0xae7f3b4afaab1eb1L,
+        0xd970195e5f17538aL },
+      { 0x52b05cbe0181e640L,0xf5debd622643313dL,0x761481545df31f82L,
+        0x23e03b333a9e13c5L } },
+    /* 52 << 28 */
+    { { 0xff7589494fde0c1fL,0xbf8a1abee5b6ec20L,0x702278fb87e1db6cL,
+        0xc447ad7a35ed658fL },
+      { 0x48d4aa3803d0ccf2L,0x80acb338819a7c03L,0x9bc7c89e6e17ceccL,
+        0x46736b8b03be1d82L } },
+    /* 53 << 28 */
+    { { 0xd65d7b60c0432f96L,0xddebe7a3deb5442fL,0x79a253077dff69a2L,
+        0x37a56d9402cf3122L },
+      { 0x8bab8aedf2350d0aL,0x13c3f276037b0d9aL,0xc664957c44c65caeL,
+        0x88b44089c2e71a88L } },
+    /* 54 << 28 */
+    { { 0xdb88e5a35cb02664L,0x5d4c0bf18686c72eL,0xea3d9b62a682d53eL,
+        0x9b605ef40b2ad431L },
+      { 0x71bac202c69645d0L,0xa115f03a6a1b66e7L,0xfe2c563a158f4dc4L,
+        0xf715b3a04d12a78cL } },
+    /* 55 << 28 */
+    { { 0x8f7f0a48d413213aL,0x2035806dc04becdbL,0xecd34a995d8587f5L,
+        0x4d8c30799f6d3a71L },
+      { 0x1b2a2a678d95a8f6L,0xc58c9d7df2110d0dL,0xdeee81d5cf8fba3fL,
+        0xa42be3c00c7cdf68L } },
+    /* 56 << 28 */
+    { { 0x2126f742d43b5eaaL,0x054a0766dfa59b85L,0x9d0d5e36126bfd45L,
+        0xa1f8fbd7384f8a8fL },
+      { 0x317680f5d563fcccL,0x48ca5055f280a928L,0xe00b81b227b578cfL,
+        0x10aad9182994a514L } },
+    /* 57 << 28 */
+    { { 0xd9e07b62b7bdc953L,0x9f0f6ff25bc086ddL,0x09d1ccff655eee77L,
+        0x45475f795bef7df1L },
+      { 0x3faa28fa86f702ccL,0x92e609050f021f07L,0xe9e629687f8fa8c6L,
+        0xbd71419af036ea2cL } },
+    /* 58 << 28 */
+    { { 0x171ee1cc6028da9aL,0x5352fe1ac251f573L,0xf8ff236e3fa997f4L,
+        0xd831b6c9a5749d5fL },
+      { 0x7c872e1de350e2c2L,0xc56240d91e0ce403L,0xf9deb0776974f5cbL,
+        0x7d50ba87961c3728L } },
+    /* 59 << 28 */
+    { { 0xd6f894265a3a2518L,0xcf817799c6303d43L,0x510a0471619e5696L,
+        0xab049ff63a5e307bL },
+      { 0xe4cdf9b0feb13ec7L,0xd5e971179d8ff90cL,0xf6f64d069afa96afL,
+        0x00d0bf5e9d2012a2L } },
+    /* 60 << 28 */
+    { { 0xe63f301f358bcdc0L,0x07689e990a9d47f8L,0x1f689e2f4f43d43aL,
+        0x4d542a1690920904L },
+      { 0xaea293d59ca0a707L,0xd061fe458ac68065L,0x1033bf1b0090008cL,
+        0x29749558c08a6db6L } },
+    /* 61 << 28 */
+    { { 0x74b5fc59c1d5d034L,0xf712e9f667e215e0L,0xfd520cbd860200e6L,
+        0x0229acb43ea22588L },
+      { 0x9cd1e14cfff0c82eL,0x87684b6259c69e73L,0xda85e61c96ccb989L,
+        0x2d5dbb02a3d06493L } },
+    /* 62 << 28 */
+    { { 0xf22ad33ae86b173cL,0xe8e41ea5a79ff0e3L,0x01d2d725dd0d0c10L,
+        0x31f39088032d28f9L },
+      { 0x7b3f71e17829839eL,0x0cf691b44502ae58L,0xef658dbdbefc6115L,
+        0xa5cd6ee5b3ab5314L } },
+    /* 63 << 28 */
+    { { 0x206c8d7b5f1d2347L,0x794645ba4cc2253aL,0xd517d8ff58389e08L,
+        0x4fa20dee9f847288L },
+      { 0xeba072d8d797770aL,0x7360c91dbf429e26L,0x7200a3b380af8279L,
+        0x6a1c915082dadce3L } },
+    /* 64 << 28 */
+    { { 0x0ee6d3a7c35d8794L,0x042e65580356bae5L,0x9f59698d643322fdL,
+        0x9379ae1550a61967L },
+      { 0x64b9ae62fcc9981eL,0xaed3d6316d2934c6L,0x2454b3025e4e65ebL,
+        0xab09f647f9950428L } },
+    /* 0 << 35 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 35 */
+    { { 0xb2083a1222248accL,0x1f6ec0ef3264e366L,0x5659b7045afdee28L,
+        0x7a823a40e6430bb5L },
+      { 0x24592a04e1900a79L,0xcde09d4ac9ee6576L,0x52b6463f4b5ea54aL,
+        0x1efe9ed3d3ca65a7L } },
+    /* 2 << 35 */
+    { { 0xe27a6dbe305406ddL,0x8eb7dc7fdd5d1957L,0xf54a6876387d4d8fL,
+        0x9c479409c7762de4L },
+      { 0xbe4d5b5d99b30778L,0x25380c566e793682L,0x602d37f3dac740e3L,
+        0x140deabe1566e4aeL } },
+    /* 3 << 35 */
+    { { 0x4481d067afd32acfL,0xd8f0fccae1f71ccfL,0xd208dd0cb596f2daL,
+        0xd049d7309aad93f9L },
+      { 0xc79f263d42ab580eL,0x09411bb123f707b4L,0x8cfde1ff835e0edaL,
+        0x7270749090f03402L } },
+    /* 4 << 35 */
+    { { 0xeaee6126c49a861eL,0x024f3b65e14f0d06L,0x51a3f1e8c69bfc17L,
+        0xc3c3a8e9a7686381L },
+      { 0x3400752cb103d4c8L,0x02bc46139218b36bL,0xc67f75eb7651504aL,
+        0xd6848b56d02aebfaL } },
+    /* 5 << 35 */
+    { { 0xbd9802e6c30fa92bL,0x5a70d96d9a552784L,0x9085c4ea3f83169bL,
+        0xfa9423bb06908228L },
+      { 0x2ffebe12fe97a5b9L,0x85da604971b99118L,0x9cbc2f7f63178846L,
+        0xfd96bc709153218eL } },
+    /* 6 << 35 */
+    { { 0x958381db1782269bL,0xae34bf792597e550L,0xbb5c60645f385153L,
+        0x6f0e96afe3088048L },
+      { 0xbf6a021577884456L,0xb3b5688c69310ea7L,0x17c9429504fad2deL,
+        0xe020f0e517896d4dL } },
+    /* 7 << 35 */
+    { { 0x730ba0ab0976505fL,0x567f6813095e2ec5L,0x470620106331ab71L,
+        0x72cfa97741d22b9fL },
+      { 0x33e55ead8a2373daL,0xa8d0d5f47ba45a68L,0xba1d8f9c03029d15L,
+        0x8f34f1ccfc55b9f3L } },
+    /* 8 << 35 */
+    { { 0xcca4428dbbe5a1a9L,0x8187fd5f3126bd67L,0x0036973a48105826L,
+        0xa39b6663b8bd61a0L },
+      { 0x6d42deef2d65a808L,0x4969044f94636b19L,0xf611ee47dd5d564cL,
+        0x7b2f3a49d2873077L } },
+    /* 9 << 35 */
+    { { 0x94157d45300eb294L,0x2b2a656e169c1494L,0xc000dd76d3a47aa9L,
+        0xa2864e4fa6243ea4L },
+      { 0x82716c47db89842eL,0x12dfd7d761479fb7L,0x3b9a2c56e0b2f6dcL,
+        0x46be862ad7f85d67L } },
+    /* 10 << 35 */
+    { { 0x03b0d8dd0f82b214L,0x460c34f9f103cbc6L,0xf32e5c0318d79e19L,
+        0x8b8888baa84117f8L },
+      { 0x8f3c37dcc0722677L,0x10d21be91c1c0f27L,0xd47c8468e0f7a0c6L,
+        0x9bf02213adecc0e0L } },
+    /* 11 << 35 */
+    { { 0x0baa7d1242b48b99L,0x1bcb665d48424096L,0x8b847cd6ebfb5cfbL,
+        0x87c2ae569ad4d10dL },
+      { 0xf1cbb1220de36726L,0xe7043c683fdfbd21L,0x4bd0826a4e79d460L,
+        0x11f5e5984bd1a2cbL } },
+    /* 12 << 35 */
+    { { 0x97554160b7fe7b6eL,0x7d16189a400a3fb2L,0xd73e9beae328ca1eL,
+        0x0dd04b97e793d8ccL },
+      { 0xa9c83c9b506db8ccL,0x5cd47aaecf38814cL,0x26fc430db64b45e6L,
+        0x079b5499d818ea84L } },
+    /* 13 << 35 */
+    { { 0xebb01102c1c24a3bL,0xca24e5681c161c1aL,0x103eea6936f00a4aL,
+        0x9ad76ee876176c7bL },
+      { 0x97451fc2538e0ff7L,0x94f898096604b3b0L,0x6311436e3249cfd7L,
+        0x27b4a7bd41224f69L } },
+    /* 14 << 35 */
+    { { 0x03b5d21ae0ac2941L,0x279b0254c2d31937L,0x3307c052cac992d0L,
+        0x6aa7cb92efa8b1f3L },
+      { 0x5a1825800d37c7a5L,0x13380c37342d5422L,0x92ac2d66d5d2ef92L,
+        0x035a70c9030c63c6L } },
+    /* 15 << 35 */
+    { { 0xc16025dd4ce4f152L,0x1f419a71f9df7c06L,0x6d5b221491e4bb14L,
+        0xfc43c6cc839fb4ceL },
+      { 0x49f06591925d6b2dL,0x4b37d9d362186598L,0x8c54a971d01b1629L,
+        0xe1a9c29f51d50e05L } },
+    /* 16 << 35 */
+    { { 0x5109b78571ba1861L,0x48b22d5cd0c8f93dL,0xe8fa84a78633bb93L,
+        0x53fba6ba5aebbd08L },
+      { 0x7ff27df3e5eea7d8L,0x521c879668ca7158L,0xb9d5133bce6f1a05L,
+        0x2d50cd53fd0ebee4L } },
+    /* 17 << 35 */
+    { { 0xc82115d6c5a3ef16L,0x993eff9dba079221L,0xe4da2c5e4b5da81cL,
+        0x9a89dbdb8033fd85L },
+      { 0x60819ebf2b892891L,0x53902b215d14a4d5L,0x6ac35051d7fda421L,
+        0xcc6ab88561c83284L } },
+    /* 18 << 35 */
+    { { 0x14eba133f74cff17L,0x240aaa03ecb813f2L,0xcfbb65406f665beeL,
+        0x084b1fe4a425ad73L },
+      { 0x009d5d16d081f6a6L,0x35304fe8eef82c90L,0xf20346d5aa9eaa22L,
+        0x0ada9f07ac1c91e3L } },
+    /* 19 << 35 */
+    { { 0xa6e21678968a6144L,0x54c1f77c07b31a1eL,0xd6bb787e5781fbe1L,
+        0x61bd2ee0e31f1c4aL },
+      { 0xf25aa1e9781105fcL,0x9cf2971f7b2f8e80L,0x26d15412cdff919bL,
+        0x01db4ebe34bc896eL } },
+    /* 20 << 35 */
+    { { 0x7d9b3e23b40df1cfL,0x5933737394e971b4L,0xbf57bd14669cf921L,
+        0x865daedf0c1a1064L },
+      { 0x3eb70bd383279125L,0xbc3d5b9f34ecdaabL,0x91e3ed7e5f755cafL,
+        0x49699f54d41e6f02L } },
+    /* 21 << 35 */
+    { { 0x185770e1d4a7a15bL,0x08f3587aeaac87e7L,0x352018db473133eaL,
+        0x674ce71904fd30fcL },
+      { 0x7b8d9835088b3e0eL,0x7a0356a95d0d47a1L,0x9d9e76596474a3c4L,
+        0x61ea48a7ff66966cL } },
+    /* 22 << 35 */
+    { { 0x304177580f3e4834L,0xfdbb21c217a9afcbL,0x756fa17f2f9a67b3L,
+        0x2a6b2421a245c1a8L },
+      { 0x64be27944af02291L,0xade465c62a5804feL,0x8dffbd39a6f08fd7L,
+        0xc4efa84caa14403bL } },
+    /* 23 << 35 */
+    { { 0xa1b91b2a442b0f5cL,0xb748e317cf997736L,0x8d1b62bfcee90e16L,
+        0x907ae2710b2078c0L },
+      { 0xdf31534b0c9bcdddL,0x043fb05439adce83L,0x99031043d826846aL,
+        0x61a9c0d6b144f393L } },
+    /* 24 << 35 */
+    { { 0xdab4804647718427L,0xdf17ff9b6e830f8bL,0x408d7ee8e49a1347L,
+        0x6ac71e2391c1d4aeL },
+      { 0xc8cbb9fd1defd73cL,0x19840657bbbbfec5L,0x39db1cb59e7ef8eaL,
+        0x78aa829664105f30L } },
+    /* 25 << 35 */
+    { { 0xa3d9b7f0a3738c29L,0x0a2f235abc3250a3L,0x55e506f6445e4cafL,
+        0x0974f73d33475f7aL },
+      { 0xd37dbba35ba2f5a8L,0x542c6e636af40066L,0x26d99b53c5d73e2cL,
+        0x06060d7d6c3ca33eL } },
+    /* 26 << 35 */
+    { { 0xcdbef1c2065fef4aL,0x77e60f7dfd5b92e3L,0xd7c549f026708350L,
+        0x201b3ad034f121bfL },
+      { 0x5fcac2a10334fc14L,0x8a9a9e09344552f6L,0x7dd8a1d397653082L,
+        0x5fc0738f79d4f289L } },
+    /* 27 << 35 */
+    { { 0x787d244d17d2d8c3L,0xeffc634570830684L,0x5ddb96dde4f73ae5L,
+        0x8efb14b1172549a5L },
+      { 0x6eb73eee2245ae7aL,0xbca4061eea11f13eL,0xb577421d30b01f5dL,
+        0xaa688b24782e152cL } },
+    /* 28 << 35 */
+    { { 0x67608e71bd3502baL,0x4ef41f24b4de75a0L,0xb08dde5efd6125e5L,
+        0xde484825a409543fL },
+      { 0x1f198d9865cc2295L,0x428a37716e0edfa2L,0x4f9697a2adf35fc7L,
+        0x01a43c79f7cac3c7L } },
+    /* 29 << 35 */
+    { { 0xb05d70590fd3659aL,0x8927f30cbb7f2d9aL,0x4023d1ac8cf984d3L,
+        0x32125ed302897a45L },
+      { 0xfb572dad3d414205L,0x73000ef2e3fa82a9L,0x4c0868e9f10a5581L,
+        0x5b61fc676b0b3ca5L } },
+    /* 30 << 35 */
+    { { 0xc1258d5b7cae440cL,0x21c08b41402b7531L,0xf61a8955de932321L,
+        0x3568faf82d1408afL },
+      { 0x71b15e999ecf965bL,0xf14ed248e917276fL,0xc6f4caa1820cf9e2L,
+        0x681b20b218d83c7eL } },
+    /* 31 << 35 */
+    { { 0x6cde738dc6c01120L,0x71db0813ae70e0dbL,0x95fc064474afe18cL,
+        0x34619053129e2be7L },
+      { 0x80615ceadb2a3b15L,0x0a49a19edb4c7073L,0x0e1b84c88fd2d367L,
+        0xd74bf462033fb8aaL } },
+    /* 32 << 35 */
+    { { 0x889f6d65533ef217L,0x7158c7e4c3ca2e87L,0xfb670dfbdc2b4167L,
+        0x75910a01844c257fL },
+      { 0xf336bf07cf88577dL,0x22245250e45e2aceL,0x2ed92e8d7ca23d85L,
+        0x29f8be4c2b812f58L } },
+    /* 33 << 35 */
+    { { 0xdd9ebaa7076fe12bL,0x3f2400cbae1537f9L,0x1aa9352817bdfb46L,
+        0xc0f9843067883b41L },
+      { 0x5590ede10170911dL,0x7562f5bb34d4b17fL,0xe1fa1df21826b8d2L,
+        0xb40b796a6bd80d59L } },
+    /* 34 << 35 */
+    { { 0xd65bf1973467ba92L,0x8c9b46dbf70954b0L,0x97c8a0f30e78f15dL,
+        0xa8f3a69a85a4c961L },
+      { 0x4242660f61e4ce9bL,0xbf06aab36ea6790cL,0xc6706f8eec986416L,
+        0x9e56dec19a9fc225L } },
+    /* 35 << 35 */
+    { { 0x527c46f49a9898d9L,0xd799e77b5633cdefL,0x24eacc167d9e4297L,
+        0xabb61cea6b1cb734L },
+      { 0xbee2e8a7f778443cL,0x3bb42bf129de2fe6L,0xcbed86a13003bb6fL,
+        0xd3918e6cd781cdf6L } },
+    /* 36 << 35 */
+    { { 0x4bee32719a5103f1L,0x5243efc6f50eac06L,0xb8e122cb6adcc119L,
+        0x1b7faa84c0b80a08L },
+      { 0x32c3d1bd6dfcd08cL,0x129dec4e0be427deL,0x98ab679c1d263c83L,
+        0xafc83cb7cef64effL } },
+    /* 37 << 35 */
+    { { 0x85eb60882fa6be76L,0x892585fb1328cbfeL,0xc154d3edcf618ddaL,
+        0xc44f601b3abaf26eL },
+      { 0x7bf57d0b2be1fdfdL,0xa833bd2d21137feeL,0x9353af362db591a8L,
+        0xc76f26dc5562a056L } },
+    /* 38 << 35 */
+    { { 0x1d87e47d3fdf5a51L,0x7afb5f9355c9cab0L,0x91bbf58f89e0586eL,
+        0x7c72c0180d843709L },
+      { 0xa9a5aafb99b5c3dcL,0xa48a0f1d3844aeb0L,0x7178b7ddb667e482L,
+        0x453985e96e23a59aL } },
+    /* 39 << 35 */
+    { { 0x4a54c86001b25dd8L,0x0dd37f48fb897c8aL,0x5f8aa6100ea90cd9L,
+        0xc8892c6816d5830dL },
+      { 0xeb4befc0ef514ca5L,0x478eb679e72c9ee6L,0x9bca20dadbc40d5fL,
+        0xf015de21dde4f64aL } },
+    /* 40 << 35 */
+    { { 0xaa6a4de0eaf4b8a5L,0x68cfd9ca4bc60e32L,0x668a4b017fd15e70L,
+        0xd9f0694af27dc09dL },
+      { 0xf6c3cad5ba708bcdL,0x5cd2ba695bb95c2aL,0xaa28c1d333c0a58fL,
+        0x23e274e3abc77870L } },
+    /* 41 << 35 */
+    { { 0x44c3692ddfd20a4aL,0x091c5fd381a66653L,0x6c0bb69109a0757dL,
+        0x9072e8b9667343eaL },
+      { 0x31d40eb080848becL,0x95bd480a79fd36ccL,0x01a77c6165ed43f5L,
+        0xafccd1272e0d40bfL } },
+    /* 42 << 35 */
+    { { 0xeccfc82d1cc1884bL,0xc85ac2015d4753b4L,0xc7a6caac658e099fL,
+        0xcf46369e04b27390L },
+      { 0xe2e7d049506467eaL,0x481b63a237cdecccL,0x4029abd8ed80143aL,
+        0x28bfe3c7bcb00b88L } },
+    /* 43 << 35 */
+    { { 0x3bec10090643d84aL,0x885f3668abd11041L,0xdb02432cf83a34d6L,
+        0x32f7b360719ceebeL },
+      { 0xf06c7837dad1fe7aL,0x60a157a95441a0b0L,0x704970e9e2d47550L,
+        0xcd2bd553271b9020L } },
+    /* 44 << 35 */
+    { { 0xff57f82f33e24a0bL,0x9cbee23ff2565079L,0x16353427eb5f5825L,
+        0x276feec4e948d662L },
+      { 0xd1b62bc6da10032bL,0x718351ddf0e72a53L,0x934520762420e7baL,
+        0x96368fff3a00118dL } },
+    /* 45 << 35 */
+    { { 0x00ce2d26150a49e4L,0x0c28b6363f04706bL,0xbad65a4658b196d0L,
+        0x6c8455fcec9f8b7cL },
+      { 0xe90c895f2d71867eL,0x5c0be31bedf9f38cL,0x2a37a15ed8f6ec04L,
+        0x239639e78cd85251L } },
+    /* 46 << 35 */
+    { { 0xd89753159c7c4c6bL,0x603aa3c0d7409af7L,0xb8d53d0c007132fbL,
+        0x68d12af7a6849238L },
+      { 0xbe0607e7bf5d9279L,0x9aa50055aada74ceL,0xe81079cbba7e8ccbL,
+        0x610c71d1a5f4ff5eL } },
+    /* 47 << 35 */
+    { { 0x9e2ee1a75aa07093L,0xca84004ba75da47cL,0x074d39513de75401L,
+        0xf938f756bb311592L },
+      { 0x9619761800a43421L,0x39a2536207bc78c8L,0x278f710a0a171276L,
+        0xb28446ea8d1a8f08L } },
+    /* 48 << 35 */
+    { { 0x184781bfe3b6a661L,0x7751cb1de6d279f7L,0xf8ff95d6c59eb662L,
+        0x186d90b758d3dea7L },
+      { 0x0e4bb6c1dfb4f754L,0x5c5cf56b2b2801dcL,0xc561e4521f54564dL,
+        0xb4fb8c60f0dd7f13L } },
+    /* 49 << 35 */
+    { { 0xf884963033ff98c7L,0x9619fffacf17769cL,0xf8090bf61bfdd80aL,
+        0x14d9a149422cfe63L },
+      { 0xb354c3606f6df9eaL,0xdbcf770d218f17eaL,0x207db7c879eb3480L,
+        0x213dbda8559b6a26L } },
+    /* 50 << 35 */
+    { { 0xac4c200b29fc81b3L,0xebc3e09f171d87c1L,0x917995301481aa9eL,
+        0x051b92e192e114faL },
+      { 0xdf8f92e9ecb5537fL,0x44b1b2cc290c7483L,0xa711455a2adeb016L,
+        0x964b685681a10c2cL } },
+    /* 51 << 35 */
+    { { 0x4f159d99cec03623L,0x05532225ef3271eaL,0xb231bea3c5ee4849L,
+        0x57a54f507094f103L },
+      { 0x3e2d421d9598b352L,0xe865a49c67412ab4L,0xd2998a251cc3a912L,
+        0x5d0928080c74d65dL } },
+    /* 52 << 35 */
+    { { 0x73f459084088567aL,0xeb6b280e1f214a61L,0x8c9adc34caf0c13dL,
+        0x39d12938f561fb80L },
+      { 0xb2dc3a5ebc6edfb4L,0x7485b1b1fe4d210eL,0x062e0400e186ae72L,
+        0x91e32d5c6eeb3b88L } },
+    /* 53 << 35 */
+    { { 0x6df574d74be59224L,0xebc88ccc716d55f3L,0x26c2e6d0cad6ed33L,
+        0xc6e21e7d0d3e8b10L },
+      { 0x2cc5840e5bcc36bbL,0x9292445e7da74f69L,0x8be8d3214e5193a8L,
+        0x3ec236298df06413L } },
+    /* 54 << 35 */
+    { { 0xc7e9ae85b134defaL,0x6073b1d01bb2d475L,0xb9ad615e2863c00dL,
+        0x9e29493d525f4ac4L },
+      { 0xc32b1dea4e9acf4fL,0x3e1f01c8a50db88dL,0xb05d70ea04da916cL,
+        0x714b0d0ad865803eL } },
+    /* 55 << 35 */
+    { { 0x4bd493fc9920cb5eL,0x5b44b1f792c7a3acL,0xa2a77293bcec9235L,
+        0x5ee06e87cd378553L },
+      { 0xceff8173da621607L,0x2bb03e4c99f5d290L,0x2945106aa6f734acL,
+        0xb5056604d25c4732L } },
+    /* 56 << 35 */
+    { { 0x5945920ce079afeeL,0x686e17a06789831fL,0x5966bee8b74a5ae5L,
+        0x38a673a21e258d46L },
+      { 0xbd1cc1f283141c95L,0x3b2ecf4f0e96e486L,0xcd3aa89674e5fc78L,
+        0x415ec10c2482fa7aL } },
+    /* 57 << 35 */
+    { { 0x1523441980503380L,0x513d917ad314b392L,0xb0b52f4e63caecaeL,
+        0x07bf22ad2dc7780bL },
+      { 0xe761e8a1e4306839L,0x1b3be9625dd7feaaL,0x4fe728de74c778f1L,
+        0xf1fa0bda5e0070f6L } },
+    /* 58 << 35 */
+    { { 0x85205a316ec3f510L,0x2c7e4a14d2980475L,0xde3c19c06f30ebfdL,
+        0xdb1c1f38d4b7e644L },
+      { 0xfe291a755dce364aL,0xb7b22a3c058f5be3L,0x2cd2c30237fea38cL,
+        0x2930967a2e17be17L } },
+    /* 59 << 35 */
+    { { 0x87f009de0c061c65L,0xcb014aacedc6ed44L,0x49bd1cb43bafb1ebL,
+        0x81bd8b5c282d3688L },
+      { 0x1cdab87ef01a17afL,0x21f37ac4e710063bL,0x5a6c567642fc8193L,
+        0xf4753e7056a6015cL } },
+    /* 60 << 35 */
+    { { 0x020f795ea15b0a44L,0x8f37c8d78958a958L,0x63b7e89ba4b675b5L,
+        0xb4fb0c0c0fc31aeaL },
+      { 0xed95e639a7ff1f2eL,0x9880f5a3619614fbL,0xdeb6ff02947151abL,
+        0x5bc5118ca868dcdbL } },
+    /* 61 << 35 */
+    { { 0xd8da20554c20cea5L,0xcac2776e14c4d69aL,0xcccb22c1622d599bL,
+        0xa4ddb65368a9bb50L },
+      { 0x2c4ff1511b4941b4L,0xe1ff19b46efba588L,0x35034363c48345e0L,
+        0x45542e3d1e29dfc4L } },
+    /* 62 << 35 */
+    { { 0xf197cb91349f7aedL,0x3b2b5a008fca8420L,0x7c175ee823aaf6d8L,
+        0x54dcf42135af32b6L },
+      { 0x0ba1430727d6561eL,0x879d5ee4d175b1e2L,0xc7c4367399807db5L,
+        0x77a544559cd55bcdL } },
+    /* 63 << 35 */
+    { { 0xe6c2ff130105c072L,0x18f7a99f8dda7da4L,0x4c3018200e2d35c1L,
+        0x06a53ca0d9cc6c82L },
+      { 0xaa21cc1ef1aa1d9eL,0x324143344a75b1e8L,0x2a6d13280ebe9fdcL,
+        0x16bd173f98a4755aL } },
+    /* 64 << 35 */
+    { { 0xfbb9b2452133ffd9L,0x39a8b2f1830f1a20L,0x484bc97dd5a1f52aL,
+        0xd6aebf56a40eddf8L },
+      { 0x32257acb76ccdac6L,0xaf4d36ec1586ff27L,0x8eaa8863f8de7dd1L,
+        0x0045d5cf88647c16L } },
+    /* 0 << 42 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 42 */
+    { { 0xa6f3d574c005979dL,0xc2072b426a40e350L,0xfca5c1568de2ecf9L,
+        0xa8c8bf5ba515344eL },
+      { 0x97aee555114df14aL,0xd4374a4dfdc5ec6bL,0x754cc28f2ca85418L,
+        0x71cb9e27d3c41f78L } },
+    /* 2 << 42 */
+    { { 0x8910507903605c39L,0xf0843d9ea142c96cL,0xf374493416923684L,
+        0x732caa2ffa0a2893L },
+      { 0xb2e8c27061160170L,0xc32788cc437fbaa3L,0x39cd818ea6eda3acL,
+        0xe2e942399e2b2e07L } },
+    /* 3 << 42 */
+    { { 0x6967d39b0260e52aL,0xd42585cc90653325L,0x0d9bd60521ca7954L,
+        0x4fa2087781ed57b3L },
+      { 0x60c1eff8e34a0bbeL,0x56b0040c84f6ef64L,0x28be2b24b1af8483L,
+        0xb2278163f5531614L } },
+    /* 4 << 42 */
+    { { 0x8df275455922ac1cL,0xa7b3ef5ca52b3f63L,0x8e77b21471de57c4L,
+        0x31682c10834c008bL },
+      { 0xc76824f04bd55d31L,0xb6d1c08617b61c71L,0x31db0903c2a5089dL,
+        0x9c092172184e5d3fL } },
+    /* 5 << 42 */
+    { { 0xdd7ced5bc00cc638L,0x1a2015eb61278fc2L,0x2e8e52886a37f8d6L,
+        0xc457786fe79933adL },
+      { 0xb3fe4cce2c51211aL,0xad9b10b224c20498L,0x90d87a4fd28db5e5L,
+        0x698cd1053aca2fc3L } },
+    /* 6 << 42 */
+    { { 0x4f112d07e91b536dL,0xceb982f29eba09d6L,0x3c157b2c197c396fL,
+        0xe23c2d417b66eb24L },
+      { 0x480c57d93f330d37L,0xb3a4c8a179108debL,0x702388decb199ce5L,
+        0x0b019211b944a8d4L } },
+    /* 7 << 42 */
+    { { 0x24f2a692840bb336L,0x7c353bdca669fa7bL,0xda20d6fcdec9c300L,
+        0x625fbe2fa13a4f17L },
+      { 0xa2b1b61adbc17328L,0x008965bfa9515621L,0x49690939c620ff46L,
+        0x182dd27d8717e91cL } },
+    /* 8 << 42 */
+    { { 0x5ace5035ea6c3997L,0x54259aaac2610befL,0xef18bb3f3c80dd39L,
+        0x6910b95b5fc3fa39L },
+      { 0xfce2f51043e09aeeL,0xced56c9fa7675665L,0x10e265acd872db61L,
+        0x6982812eae9fce69L } },
+    /* 9 << 42 */
+    { { 0x29be11c6ce800998L,0x72bb1752b90360d9L,0x2c1931975a4ad590L,
+        0x2ba2f5489fc1dbc0L },
+      { 0x7fe4eebbe490ebe0L,0x12a0a4cd7fae11c0L,0x7197cf81e903ba37L,
+        0xcf7d4aa8de1c6dd8L } },
+    /* 10 << 42 */
+    { { 0x92af6bf43fd5684cL,0x2b26eecf80360aa1L,0xbd960f3000546a82L,
+        0x407b3c43f59ad8feL },
+      { 0x86cae5fe249c82baL,0x9e0faec72463744cL,0x87f551e894916272L,
+        0x033f93446ceb0615L } },
+    /* 11 << 42 */
+    { { 0x1e5eb0d18be82e84L,0x89967f0e7a582fefL,0xbcf687d5a6e921faL,
+        0xdfee4cf3d37a09baL },
+      { 0x94f06965b493c465L,0x638b9a1c7635c030L,0x7666786466f05e9fL,
+        0xccaf6808c04da725L } },
+    /* 12 << 42 */
+    { { 0xca2eb690768fccfcL,0xf402d37db835b362L,0x0efac0d0e2fdfcceL,
+        0xefc9cdefb638d990L },
+      { 0x2af12b72d1669a8bL,0x33c536bc5774ccbdL,0x30b21909fb34870eL,
+        0xc38fa2f77df25acaL } },
+    /* 13 << 42 */
+    { { 0x74c5f02bbf81f3f5L,0x0525a5aeaf7e4581L,0x88d2aaba433c54aeL,
+        0xed9775db806a56c5L },
+      { 0xd320738ac0edb37dL,0x25fdb6ee66cc1f51L,0xac661d1710600d76L,
+        0x931ec1f3bdd1ed76L } },
+    /* 14 << 42 */
+    { { 0x65c11d6219ee43f1L,0x5cd57c3e60829d97L,0xd26c91a3984be6e8L,
+        0xf08d93098b0c53bdL },
+      { 0x94bc9e5bc016e4eaL,0xd391683911d43d2bL,0x886c5ad773701155L,
+        0xe037762620b00715L } },
+    /* 15 << 42 */
+    { { 0x7f01c9ecaa80ba59L,0x3083411a68538e51L,0x970370f1e88128afL,
+        0x625cc3db91dec14bL },
+      { 0xfef9666c01ac3107L,0xb2a8d577d5057ac3L,0xb0f2629992be5df7L,
+        0xf579c8e500353924L } },
+    /* 16 << 42 */
+    { { 0xb8fa3d931341ed7aL,0x4223272ca7b59d49L,0x3dcb194783b8c4a4L,
+        0x4e413c01ed1302e4L },
+      { 0x6d999127e17e44ceL,0xee86bf7533b3adfbL,0xf6902fe625aa96caL,
+        0xb73540e4e5aae47dL } },
+    /* 17 << 42 */
+    { { 0x32801d7b1b4a158cL,0xe571c99e27e2a369L,0x40cb76c010d9f197L,
+        0xc308c2893167c0aeL },
+      { 0xa6ef9dd3eb7958f2L,0xa7226dfc300879b1L,0x6cd0b3627edf0636L,
+        0x4efbce6c7bc37eedL } },
+    /* 18 << 42 */
+    { { 0x75f92a058d699021L,0x586d4c79772566e3L,0x378ca5f1761ad23aL,
+        0x650d86fc1465a8acL },
+      { 0x7a4ed457842ba251L,0x6b65e3e642234933L,0xaf1543b731aad657L,
+        0xa4cefe98cbfec369L } },
+    /* 19 << 42 */
+    { { 0xb587da909f47befbL,0x6562e9fb41312d13L,0xa691ea59eff1cefeL,
+        0xcc30477a05fc4cf6L },
+      { 0xa16324610b0ffd3dL,0xa1f16f3b5b355956L,0x5b148d534224ec24L,
+        0xdc834e7bf977012aL } },
+    /* 20 << 42 */
+    { { 0x7bfc5e75b2c69dbcL,0x3aa77a2903c3da6cL,0xde0df03cca910271L,
+        0xcbd5ca4a7806dc55L },
+      { 0xe1ca58076db476cbL,0xfde15d625f37a31eL,0xf49af520f41af416L,
+        0x96c5c5b17d342db5L } },
+    /* 21 << 42 */
+    { { 0x155c43b7eb4ceb9bL,0x2e9930104e77371aL,0x1d2987da675d43afL,
+        0xef2bc1c08599fd72L },
+      { 0x96894b7b9342f6b2L,0x201eadf27c8e71f0L,0xf3479d9f4a1f3efcL,
+        0xe0f8a742702a9704L } },
+    /* 22 << 42 */
+    { { 0xeafd44b6b3eba40cL,0xf9739f29c1c1e0d0L,0x0091471a619d505eL,
+        0xc15f9c969d7c263eL },
+      { 0x5be4728583afbe33L,0xa3b6d6af04f1e092L,0xe76526b9751a9d11L,
+        0x2ec5b26d9a4ae4d2L } },
+    /* 23 << 42 */
+    { { 0xeb66f4d902f6fb8dL,0x4063c56196912164L,0xeb7050c180ef3000L,
+        0x288d1c33eaa5b3f0L },
+      { 0xe87c68d607806fd8L,0xb2f7f9d54bbbf50fL,0x25972f3aac8d6627L,
+        0xf854777410e8c13bL } },
+    /* 24 << 42 */
+    { { 0xcc50ef6c872b4a60L,0xab2a34a44613521bL,0x39c5c190983e15d1L,
+        0x61dde5df59905512L },
+      { 0xe417f6219f2275f3L,0x0750c8b6451d894bL,0x75b04ab978b0bdaaL,
+        0x3bfd9fd4458589bdL } },
+    /* 25 << 42 */
+    { { 0xf1013e30ee9120b6L,0x2b51af9323a4743eL,0xea96ffae48d14d9eL,
+        0x71dc0dbe698a1d32L },
+      { 0x914962d20180cca4L,0x1ae60677c3568963L,0x8cf227b1437bc444L,
+        0xc650c83bc9962c7aL } },
+    /* 26 << 42 */
+    { { 0x23c2c7ddfe7ccfc4L,0xf925c89d1b929d48L,0x4460f74b06783c33L,
+        0xac2c8d49a590475aL },
+      { 0xfb40b407b807bba0L,0x9d1e362d69ff8f3aL,0xa33e9681cbef64a4L,
+        0x67ece5fa332fb4b2L } },
+    /* 27 << 42 */
+    { { 0x6900a99b739f10e3L,0xc3341ca9ff525925L,0xee18a626a9e2d041L,
+        0xa5a8368529580dddL },
+      { 0xf3470c819d7de3cdL,0xedf025862062cf9cL,0xf43522fac010edb0L,
+        0x3031413513a4b1aeL } },
+    /* 28 << 42 */
+    { { 0xc792e02adb22b94bL,0x993d8ae9a1eaa45bL,0x8aad6cd3cd1e1c63L,
+        0x89529ca7c5ce688aL },
+      { 0x2ccee3aae572a253L,0xe02b643802a21efbL,0xa7091b6ec9430358L,
+        0x06d1b1fa9d7db504L } },
+    /* 29 << 42 */
+    { { 0x58846d32c4744733L,0x40517c71379f9e34L,0x2f65655f130ef6caL,
+        0x526e4488f1f3503fL },
+      { 0x8467bd177ee4a976L,0x1d9dc913921363d1L,0xd8d24c33b069e041L,
+        0x5eb5da0a2cdf7f51L } },
+    /* 30 << 42 */
+    { { 0x1c0f3cb1197b994fL,0x3c95a6c52843eae9L,0x7766ffc9a6097ea5L,
+        0x7bea4093d723b867L },
+      { 0xb48e1f734db378f9L,0x70025b00e37b77acL,0x943dc8e7af24ad46L,
+        0xb98a15ac16d00a85L } },
+    /* 31 << 42 */
+    { { 0x3adc38ba2743b004L,0xb1c7f4f7334415eeL,0xea43df8f1e62d05aL,
+        0x326189059d76a3b6L },
+      { 0x2fbd0bb5a23a0f46L,0x5bc971db6a01918cL,0x7801d94ab4743f94L,
+        0xb94df65e676ae22bL } },
+    /* 32 << 42 */
+    { { 0xaafcbfabaf95894cL,0x7b9bdc07276b2241L,0xeaf983625bdda48bL,
+        0x5977faf2a3fcb4dfL },
+      { 0xbed042ef052c4b5bL,0x9fe87f71067591f0L,0xc89c73ca22f24ec7L,
+        0x7d37fa9ee64a9f1bL } },
+    /* 33 << 42 */
+    { { 0x2710841a15562627L,0x2c01a613c243b034L,0x1d135c562bc68609L,
+        0xc2ca17158b03f1f6L },
+      { 0xc9966c2d3eb81d82L,0xc02abf4a8f6df13eL,0x77b34bd78f72b43bL,
+        0xaff6218f360c82b0L } },
+    /* 34 << 42 */
+    { { 0x0aa5726c8d55b9d2L,0xdc0adbe999e9bffbL,0x9097549cefb9e72aL,
+        0x167557129dfb3111L },
+      { 0xdd8bf984f26847f9L,0xbcb8e387dfb30cb7L,0xc1fd32a75171ef9cL,
+        0x977f3fc7389b363fL } },
+    /* 35 << 42 */
+    { { 0x116eaf2bf4babda0L,0xfeab68bdf7113c8eL,0xd1e3f064b7def526L,
+        0x1ac30885e0b3fa02L },
+      { 0x1c5a6e7b40142d9dL,0x839b560330921c0bL,0x48f301fa36a116a3L,
+        0x380e1107cfd9ee6dL } },
+    /* 36 << 42 */
+    { { 0x7945ead858854be1L,0x4111c12ecbd4d49dL,0xece3b1ec3a29c2efL,
+        0x6356d4048d3616f5L },
+      { 0x9f0d6a8f594d320eL,0x0989316df651ccd2L,0x6c32117a0f8fdde4L,
+        0x9abe5cc5a26a9bbcL } },
+    /* 37 << 42 */
+    { { 0xcff560fb9723f671L,0x21b2a12d7f3d593cL,0xe4cb18da24ba0696L,
+        0x186e2220c3543384L },
+      { 0x722f64e088312c29L,0x94282a9917dc7752L,0x62467bbf5a85ee89L,
+        0xf435c650f10076a0L } },
+    /* 38 << 42 */
+    { { 0xc9ff153943b3a50bL,0x7132130c1a53efbcL,0x31bfe063f7b0c5b7L,
+        0xb0179a7d4ea994ccL },
+      { 0x12d064b3c85f455bL,0x472593288f6e0062L,0xf64e590bb875d6d9L,
+        0x22dd6225ad92bcc7L } },
+    /* 39 << 42 */
+    { { 0xb658038eb9c3bd6dL,0x00cdb0d6fbba27c8L,0x0c6813371062c45dL,
+        0xd8515b8c2d33407dL },
+      { 0xcb8f699e8cbb5ecfL,0x8c4347f8c608d7d8L,0x2c11850abb3e00dbL,
+        0x20a8dafdecb49d19L } },
+    /* 40 << 42 */
+    { { 0xbd78148045ee2f40L,0x75e354af416b60cfL,0xde0b58a18d49a8c4L,
+        0xe40e94e2fa359536L },
+      { 0xbd4fa59f62accd76L,0x05cf466a8c762837L,0xb5abda99448c277bL,
+        0x5a9e01bf48b13740L } },
+    /* 41 << 42 */
+    { { 0x9d457798326aad8dL,0xbdef4954c396f7e7L,0x6fb274a2c253e292L,
+        0x2800bf0a1cfe53e7L },
+      { 0x22426d3144438fd4L,0xef2339235e259f9aL,0x4188503c03f66264L,
+        0x9e5e7f137f9fdfabL } },
+    /* 42 << 42 */
+    { { 0x565eb76c5fcc1abaL,0xea63254859b5bff8L,0x5587c087aab6d3faL,
+        0x92b639ea6ce39c1bL },
+      { 0x0706e782953b135cL,0x7308912e425268efL,0x599e92c7090e7469L,
+        0x83b90f529bc35e75L } },
+    /* 43 << 42 */
+    { { 0x4750b3d0244975b3L,0xf3a4435811965d72L,0x179c67749c8dc751L,
+        0xff18cdfed23d9ff0L },
+      { 0xc40138332028e247L,0x96e280e2f3bfbc79L,0xf60417bdd0880a84L,
+        0x263c9f3d2a568151L } },
+    /* 44 << 42 */
+    { { 0x36be15b32d2ce811L,0x846dc0c2f8291d21L,0x5cfa0ecb789fcfdbL,
+        0x45a0beedd7535b9aL },
+      { 0xec8e9f0796d69af1L,0x31a7c5b8599ab6dcL,0xd36d45eff9e2e09fL,
+        0x3cf49ef1dcee954bL } },
+    /* 45 << 42 */
+    { { 0x6be34cf3086cff9bL,0x88dbd49139a3360fL,0x1e96b8cc0dbfbd1dL,
+        0xc1e5f7bfcb7e2552L },
+      { 0x0547b21428819d98L,0xc770dd9c7aea9dcbL,0xaef0d4c7041d68c8L,
+        0xcc2b981813cb9ba8L } },
+    /* 46 << 42 */
+    { { 0x7fc7bc76fe86c607L,0x6b7b9337502a9a95L,0x1948dc27d14dab63L,
+        0x249dd198dae047beL },
+      { 0xe8356584a981a202L,0x3531dd183a893387L,0x1be11f90c85c7209L,
+        0x93d2fe1ee2a52b5aL } },
+    /* 47 << 42 */
+    { { 0x8225bfe2ec6d6b97L,0x9cf6d6f4bd0aa5deL,0x911459cb54779f5fL,
+        0x5649cddb86aeb1f3L },
+      { 0x321335793f26ce5aL,0xc289a102550f431eL,0x559dcfda73b84c6fL,
+        0x84973819ee3ac4d7L } },
+    /* 48 << 42 */
+    { { 0xb51e55e6f2606a82L,0xe25f706190f2fb57L,0xacef6c2ab1a4e37cL,
+        0x864e359d5dcf2706L },
+      { 0x479e6b187ce57316L,0x2cab25003a96b23dL,0xed4898628ef16df7L,
+        0x2056538cef3758b5L } },
+    /* 49 << 42 */
+    { { 0xa7df865ef15d3101L,0x80c5533a61b553d7L,0x366e19974ed14294L,
+        0x6620741fb3c0bcd6L },
+      { 0x21d1d9c4edc45418L,0x005b859ec1cc4a9dL,0xdf01f630a1c462f0L,
+        0x15d06cf3f26820c7L } },
+    /* 50 << 42 */
+    { { 0x9f7f24ee3484be47L,0x2ff33e964a0c902fL,0x00bdf4575a0bc453L,
+        0x2378dfaf1aa238dbL },
+      { 0x272420ec856720f2L,0x2ad9d95b96797291L,0xd1242cc6768a1558L,
+        0x2e287f8b5cc86aa8L } },
+    /* 51 << 42 */
+    { { 0x796873d0990cecaaL,0xade55f81675d4080L,0x2645eea321f0cd84L,
+        0x7a1efa0fb4e17d02L },
+      { 0xf6858420037cc061L,0x682e05f0d5d43e12L,0x59c3699427218710L,
+        0x85cbba4d3f7cd2fcL } },
+    /* 52 << 42 */
+    { { 0x726f97297a3cd22aL,0x9f8cd5dc4a628397L,0x17b93ab9c23165edL,
+        0xff5f5dbf122823d4L },
+      { 0xc1e4e4b5654a446dL,0xd1a9496f677257baL,0x6387ba94de766a56L,
+        0x23608bc8521ec74aL } },
+    /* 53 << 42 */
+    { { 0x16a522d76688c4d4L,0x9d6b428207373abdL,0xa62f07acb42efaa3L,
+        0xf73e00f7e3b90180L },
+      { 0x36175fec49421c3eL,0xc4e44f9b3dcf2678L,0x76df436b7220f09fL,
+        0x172755fb3aa8b6cfL } },
+    /* 54 << 42 */
+    { { 0xbab89d57446139ccL,0x0a0a6e025fe0208fL,0xcdbb63e211e5d399L,
+        0x33ecaa12a8977f0bL },
+      { 0x59598b21f7c42664L,0xb3e91b32ab65d08aL,0x035822eef4502526L,
+        0x1dcf0176720a82a9L } },
+    /* 55 << 42 */
+    { { 0x50f8598f3d589e02L,0xdf0478ffb1d63d2cL,0x8b8068bd1571cd07L,
+        0x30c3aa4fd79670cdL },
+      { 0x25e8fd4b941ade7fL,0x3d1debdc32790011L,0x65b6dcbd3a3f9ff0L,
+        0x282736a4793de69cL } },
+    /* 56 << 42 */
+    { { 0xef69a0c3d41d3bd3L,0xb533b8c907a26bdeL,0xe2801d97db2edf9fL,
+        0xdc4a8269e1877af0L },
+      { 0x6c1c58513d590dbeL,0x84632f6bee4e9357L,0xd36d36b779b33374L,
+        0xb46833e39bbca2e6L } },
+    /* 57 << 42 */
+    { { 0x37893913f7fc0586L,0x385315f766bf4719L,0x72c56293b31855dcL,
+        0xd1416d4e849061feL },
+      { 0xbeb3ab7851047213L,0x447f6e61f040c996L,0xd06d310d638b1d0cL,
+        0xe28a413fbad1522eL } },
+    /* 58 << 42 */
+    { { 0x685a76cb82003f86L,0x610d07f70bcdbca3L,0x6ff660219ca4c455L,
+        0x7df39b87cea10eecL },
+      { 0xb9255f96e22db218L,0x8cc6d9eb08a34c44L,0xcd4ffb86859f9276L,
+        0x8fa15eb250d07335L } },
+    /* 59 << 42 */
+    { { 0xdf553845cf2c24b5L,0x89f66a9f52f9c3baL,0x8f22b5b9e4a7ceb3L,
+        0xaffef8090e134686L },
+      { 0x3e53e1c68eb8fac2L,0x93c1e4eb28aec98eL,0xb6b91ec532a43bcbL,
+        0x2dbfa947b2d74a51L } },
+    /* 60 << 42 */
+    { { 0xe065d190ca84bad7L,0xfb13919fad58e65cL,0x3c41718bf1cb6e31L,
+        0x688969f006d05c3fL },
+      { 0xd4f94ce721264d45L,0xfdfb65e97367532bL,0x5b1be8b10945a39dL,
+        0x229f789c2b8baf3bL } },
+    /* 61 << 42 */
+    { { 0xd8f41f3e6f49f15dL,0x678ce828907f0792L,0xc69ace82fca6e867L,
+        0x106451aed01dcc89L },
+      { 0x1bb4f7f019fc32d2L,0x64633dfcb00c52d2L,0x8f13549aad9ea445L,
+        0x99a3bf50fb323705L } },
+    /* 62 << 42 */
+    { { 0x0c9625a2534d4dbcL,0x45b8f1d1c2a2fea3L,0x76ec21a1a530fc1aL,
+        0x4bac9c2a9e5bd734L },
+      { 0x5996d76a7b4e3587L,0x0045cdee1182d9e3L,0x1aee24b91207f13dL,
+        0x66452e9797345a41L } },
+    /* 63 << 42 */
+    { { 0x16e5b0549f950cd0L,0x9cc72fb1d7fdd075L,0x6edd61e766249663L,
+        0xde4caa4df043cccbL },
+      { 0x11b1f57a55c7ac17L,0x779cbd441a85e24dL,0x78030f86e46081e7L,
+        0xfd4a60328e20f643L } },
+    /* 64 << 42 */
+    { { 0xcc7a64880a750c0fL,0x39bacfe34e548e83L,0x3d418c760c110f05L,
+        0x3e4daa4cb1f11588L },
+      { 0x2733e7b55ffc69ffL,0x46f147bc92053127L,0x885b2434d722df94L,
+        0x6a444f65e6fc6b7cL } },
+    /* 0 << 49 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 49 */
+    { { 0x7a1a465ac3f16ea8L,0x115a461db2f1d11cL,0x4767dd956c68a172L,
+        0x3392f2ebd13a4698L },
+      { 0xc7a99ccde526cdc7L,0x8e537fdc22292b81L,0x76d8cf69a6d39198L,
+        0xffc5ff432446852dL } },
+    /* 2 << 49 */
+    { { 0x97b14f7ea90567e6L,0x513257b7b6ae5cb7L,0x85454a3c9f10903dL,
+        0xd8d2c9ad69bc3724L },
+      { 0x38da93246b29cb44L,0xb540a21d77c8cbacL,0x9bbfe43501918e42L,
+        0xfffa707a56c3614eL } },
+    /* 3 << 49 */
+    { { 0x0ce4e3f1d4e353b7L,0x062d8a14ef46b0a0L,0x6408d5ab574b73fdL,
+        0xbc41d1c9d3273ffdL },
+      { 0x3538e1e76be77800L,0x71fe8b37c5655031L,0x1cd916216b9b331aL,
+        0xad825d0bbb388f73L } },
+    /* 4 << 49 */
+    { { 0x56c2e05b1cb76219L,0x0ec0bf9171567e7eL,0xe7076f8661c4c910L,
+        0xd67b085bbabc04d9L },
+      { 0x9fb904595e93a96aL,0x7526c1eafbdc249aL,0x0d44d367ecdd0bb7L,
+        0x953999179dc0d695L } },
+    /* 5 << 49 */
+    { { 0x61360ee99e240d18L,0x057cdcacb4b94466L,0xe7667cd12fe5325cL,
+        0x1fa297b521974e3bL },
+      { 0xfa4081e7db083d76L,0x31993be6f206bd15L,0x8949269b14c19f8cL,
+        0x21468d72a9d92357L } },
+    /* 6 << 49 */
+    { { 0x2ccbc583a4c506ecL,0x957ed188d1acfe97L,0x8baed83312f1aea2L,
+        0xef2a6cb48325362dL },
+      { 0x130dde428e195c43L,0xc842025a0e6050c6L,0x2da972a708686a5dL,
+        0xb52999a1e508b4a8L } },
+    /* 7 << 49 */
+    { { 0xd9f090b910a5a8bdL,0xca91d249096864daL,0x8e6a93be3f67dbc1L,
+        0xacae6fbaf5f4764cL },
+      { 0x1563c6e0d21411a0L,0x28fa787fda0a4ad8L,0xd524491c908c8030L,
+        0x1257ba0e4c795f07L } },
+    /* 8 << 49 */
+    { { 0x83f49167ceca9754L,0x426d2cf64b7939a0L,0x2555e355723fd0bfL,
+        0xa96e6d06c4f144e2L },
+      { 0x4768a8dd87880e61L,0x15543815e508e4d5L,0x09d7e772b1b65e15L,
+        0x63439dd6ac302fa0L } },
+    /* 9 << 49 */
+    { { 0xb93f802fc14e35c2L,0x71735b7c4341333cL,0x03a2510416d4f362L,
+        0x3f4d069bbf433c8eL },
+      { 0x0d83ae01f78f5a7cL,0x50a8ffbe7c4eed07L,0xc74f890676e10f83L,
+        0x7d0809669ddaf8e1L } },
+    /* 10 << 49 */
+    { { 0xb11df8e1698e04ccL,0x877be203169005c8L,0x32749e8c4f3c6179L,
+        0x2dbc9d0a7853fc05L },
+      { 0x187d4f939454d937L,0xe682ce9db4800e1bL,0xa9129ad8165e68e8L,
+        0x0fe29735be7f785bL } },
+    /* 11 << 49 */
+    { { 0x5303f40c5b9e02b7L,0xa37c969235ee04e8L,0x5f46cc2034d6632bL,
+        0x55ef72b296ac545bL },
+      { 0xabec5c1f7b91b062L,0x0a79e1c7bb33e821L,0xbb04b4283a9f4117L,
+        0x0de1f28ffd2a475aL } },
+    /* 12 << 49 */
+    { { 0x31019ccf3a4434b4L,0xa34581111a7954dcL,0xa9dac80de34972a7L,
+        0xb043d05474f6b8ddL },
+      { 0x021c319e11137b1aL,0x00a754ceed5cc03fL,0x0aa2c794cbea5ad4L,
+        0x093e67f470c015b6L } },
+    /* 13 << 49 */
+    { { 0x72cdfee9c97e3f6bL,0xc10bcab4b6da7461L,0x3b02d2fcb59806b9L,
+        0x85185e89a1de6f47L },
+      { 0x39e6931f0eb6c4d4L,0x4d4440bdd4fa5b04L,0x5418786e34be7eb8L,
+        0x6380e5219d7259bcL } },
+    /* 14 << 49 */
+    { { 0x20ac0351d598d710L,0x272c4166cb3a4da4L,0xdb82fe1aca71de1fL,
+        0x746e79f2d8f54b0fL },
+      { 0x6e7fc7364b573e9bL,0x75d03f46fd4b5040L,0x5c1cc36d0b98d87bL,
+        0x513ba3f11f472da1L } },
+    /* 15 << 49 */
+    { { 0x79d0af26abb177ddL,0xf82ab5687891d564L,0x2b6768a972232173L,
+        0xefbb3bb08c1f6619L },
+      { 0xb29c11dba6d18358L,0x519e2797b0916d3aL,0xd4dc18f09188e290L,
+        0x648e86e398b0ca7fL } },
+    /* 16 << 49 */
+    { { 0x859d3145983c38b5L,0xb14f176c637abc8bL,0x2793fb9dcaff7be6L,
+        0xebe5a55f35a66a5aL },
+      { 0x7cec1dcd9f87dc59L,0x7c595cd3fbdbf560L,0x5b543b2226eb3257L,
+        0x69080646c4c935fdL } },
+    /* 17 << 49 */
+    { { 0x7f2e440381e9ede3L,0x243c3894caf6df0aL,0x7c605bb11c073b11L,
+        0xcd06a541ba6a4a62L },
+      { 0x2916894949d4e2e5L,0x33649d074af66880L,0xbfc0c885e9a85035L,
+        0xb4e52113fc410f4bL } },
+    /* 18 << 49 */
+    { { 0xdca3b70678a6513bL,0x92ea4a2a9edb1943L,0x02642216db6e2dd8L,
+        0x9b45d0b49fd57894L },
+      { 0x114e70dbc69d11aeL,0x1477dd194c57595fL,0xbc2208b4ec77c272L,
+        0x95c5b4d7db68f59cL } },
+    /* 19 << 49 */
+    { { 0xb8c4fc6342e532b7L,0x386ba4229ae35290L,0xfb5dda42d201ecbcL,
+        0x2353dc8ba0e38fd6L },
+      { 0x9a0b85ea68f7e978L,0x96ec56822ad6d11fL,0x5e279d6ce5f6886dL,
+        0xd3fe03cd3cb1914dL } },
+    /* 20 << 49 */
+    { { 0xfe541fa47ea67c77L,0x952bd2afe3ea810cL,0x791fef568d01d374L,
+        0xa3a1c6210f11336eL },
+      { 0x5ad0d5a9c7ec6d79L,0xff7038af3225c342L,0x003c6689bc69601bL,
+        0x25059bc745e8747dL } },
+    /* 21 << 49 */
+    { { 0xfa4965b2f2086fbfL,0xf6840ea686916078L,0xd7ac762070081d6cL,
+        0xe600da31b5328645L },
+      { 0x01916f63529b8a80L,0xe80e48582d7d6f3eL,0x29eb0fe8d664ca7cL,
+        0xf017637be7b43b0cL } },
+    /* 22 << 49 */
+    { { 0x9a75c80676cb2566L,0x8f76acb1b24892d9L,0x7ae7b9cc1f08fe45L,
+        0x19ef73296a4907d8L },
+      { 0x2db4ab715f228bf0L,0xf3cdea39817032d7L,0x0b1f482edcabe3c0L,
+        0x3baf76b4bb86325cL } },
+    /* 23 << 49 */
+    { { 0xd49065e010089465L,0x3bab5d298e77c596L,0x7636c3a6193dbd95L,
+        0xdef5d294b246e499L },
+      { 0xb22c58b9286b2475L,0xa0b93939cd80862bL,0x3002c83af0992388L,
+        0x6de01f9beacbe14cL } },
+    /* 24 << 49 */
+    { { 0x6aac688eadd70482L,0x708de92a7b4a4e8aL,0x75b6dd73758a6eefL,
+        0xea4bf352725b3c43L },
+      { 0x10041f2c87912868L,0xb1b1be95ef09297aL,0x19ae23c5a9f3860aL,
+        0xc4f0f839515dcf4bL } },
+    /* 25 << 49 */
+    { { 0x3c7ecca397f6306aL,0x744c44ae68a3a4b0L,0x69cd13a0b3a1d8a2L,
+        0x7cad0a1e5256b578L },
+      { 0xea653fcd33791d9eL,0x9cc2a05d74b2e05fL,0x73b391dcfd7affa2L,
+        0xddb7091eb6b05442L } },
+    /* 26 << 49 */
+    { { 0xc71e27bf8538a5c6L,0x195c63dd89abff17L,0xfd3152851b71e3daL,
+        0x9cbdfda7fa680fa0L },
+      { 0x9db876ca849d7eabL,0xebe2764b3c273271L,0x663357e3f208dceaL,
+        0x8c5bd833565b1b70L } },
+    /* 27 << 49 */
+    { { 0xccc3b4f59837fc0dL,0x9b641ba8a79cf00fL,0x7428243ddfdf3990L,
+        0x83a594c4020786b1L },
+      { 0xb712451a526c4502L,0x9d39438e6adb3f93L,0xfdb261e3e9ff0ccdL,
+        0x80344e3ce07af4c3L } },
+    /* 28 << 49 */
+    { { 0x75900d7c2fa4f126L,0x08a3b8655c99a232L,0x2478b6bfdb25e0c3L,
+        0x482cc2c271db2edfL },
+      { 0x37df7e645f321bb8L,0x8a93821b9a8005b4L,0x3fa2f10ccc8c1958L,
+        0x0d3322182c269d0aL } },
+    /* 29 << 49 */
+    { { 0x20ab8119e246b0e6L,0xb39781e4d349fd17L,0xd293231eb31aa100L,
+        0x4b779c97bb032168L },
+      { 0x4b3f19e1c8470500L,0x45b7efe90c4c869dL,0xdb84f38aa1a6bbccL,
+        0x3b59cb15b2fddbc1L } },
+    /* 30 << 49 */
+    { { 0xba5514df3fd165e8L,0x499fd6a9061f8811L,0x72cd1fe0bfef9f00L,
+        0x120a4bb979ad7e8aL },
+      { 0xf2ffd0955f4a5ac5L,0xcfd174f195a7a2f0L,0xd42301ba9d17baf1L,
+        0xd2fa487a77f22089L } },
+    /* 31 << 49 */
+    { { 0x9cb09efeb1dc77e1L,0xe956693921c99682L,0x8c5469016c6067bbL,
+        0xfd37857461c24456L },
+      { 0x2b6a6cbe81796b33L,0x62d550f658e87f8bL,0x1b763e1c7f1b01b4L,
+        0x4b93cfea1b1b5e12L } },
+    /* 32 << 49 */
+    { { 0xb93452381d531696L,0x57201c0088cdde69L,0xdde922519a86afc7L,
+        0xe3043895bd35cea8L },
+      { 0x7608c1e18555970dL,0x8267dfa92535935eL,0xd4c60a57322ea38bL,
+        0xe0bf7977804ef8b5L } },
+    /* 33 << 49 */
+    { { 0x1a0dab28c06fece4L,0xd405991e94e7b49dL,0xc542b6d2706dab28L,
+        0xcb228da3a91618fbL },
+      { 0x224e4164107d1ceaL,0xeb9fdab3d0f5d8f1L,0xc02ba3860d6e41cdL,
+        0x676a72c59b1f7146L } },
+    /* 34 << 49 */
+    { { 0xffd6dd984d6cb00bL,0xcef9c5cade2e8d7cL,0xa1bbf5d7641c7936L,
+        0x1b95b230ee8f772eL },
+      { 0xf765a92ee8ac25b1L,0xceb04cfc3a18b7c6L,0x27944cef0acc8966L,
+        0xcbb3c957434c1004L } },
+    /* 35 << 49 */
+    { { 0x9c9971a1a43ff93cL,0x5bc2db17a1e358a9L,0x45b4862ea8d9bc82L,
+        0x70ebfbfb2201e052L },
+      { 0xafdf64c792871591L,0xea5bcae6b42d0219L,0xde536c552ad8f03cL,
+        0xcd6c3f4da76aa33cL } },
+    /* 36 << 49 */
+    { { 0xbeb5f6230bca6de3L,0xdd20dd99b1e706fdL,0x90b3ff9dac9059d4L,
+        0x2d7b29027ccccc4eL },
+      { 0x8a090a59ce98840fL,0xa5d947e08410680aL,0x49ae346a923379a5L,
+        0x7dbc84f9b28a3156L } },
+    /* 37 << 49 */
+    { { 0xfd40d91654a1aff2L,0xabf318ba3a78fb9bL,0x50152ed83029f95eL,
+        0x9fc1dd77c58ad7faL },
+      { 0x5fa5791513595c17L,0xb95046688f62b3a9L,0x907b5b24ff3055b0L,
+        0x2e995e359a84f125L } },
+    /* 38 << 49 */
+    { { 0x87dacf697e9bbcfbL,0x95d0c1d6e86d96e3L,0x65726e3c2d95a75cL,
+        0x2c3c9001acd27f21L },
+      { 0x1deab5616c973f57L,0x108b7e2ca5221643L,0x5fee9859c4ef79d4L,
+        0xbd62b88a40d4b8c6L } },
+    /* 39 << 49 */
+    { { 0xb4dd29c4197c75d6L,0x266a6df2b7076febL,0x9512d0ea4bf2df11L,
+        0x1320c24f6b0cc9ecL },
+      { 0x6bb1e0e101a59596L,0x8317c5bbeff9aaacL,0x65bb405e385aa6c9L,
+        0x613439c18f07988fL } },
+    /* 40 << 49 */
+    { { 0xd730049f16a66e91L,0xe97f2820fa1b0e0dL,0x4131e003304c28eaL,
+        0x820ab732526bac62L },
+      { 0xb2ac9ef928714423L,0x54ecfffaadb10cb2L,0x8781476ef886a4ccL,
+        0x4b2c87b5db2f8d49L } },
+    /* 41 << 49 */
+    { { 0xe857cd200a44295dL,0x707d7d2158c6b044L,0xae8521f9f596757cL,
+        0x87448f0367b2b714L },
+      { 0x13a9bc455ebcd58dL,0x79bcced99122d3c1L,0x3c6442479e076642L,
+        0x0cf227782df4767dL } },
+    /* 42 << 49 */
+    { { 0x5e61aee471d444b6L,0x211236bfc5084a1dL,0x7e15bc9a4fd3eaf6L,
+        0x68df2c34ab622bf5L },
+      { 0x9e674f0f59bf4f36L,0xf883669bd7f34d73L,0xc48ac1b831497b1dL,
+        0x323b925d5106703bL } },
+    /* 43 << 49 */
+    { { 0x22156f4274082008L,0xeffc521ac8482bcbL,0x5c6831bf12173479L,
+        0xcaa2528fc4739490L },
+      { 0x84d2102a8f1b3c4dL,0xcf64dfc12d9bec0dL,0x433febad78a546efL,
+        0x1f621ec37b73cef1L } },
+    /* 44 << 49 */
+    { { 0x6aecd62737338615L,0x162082ab01d8edf6L,0x833a811919e86b66L,
+        0x6023a251d299b5dbL },
+      { 0xf5bb0c3abbf04b89L,0x6735eb69ae749a44L,0xd0e058c54713de3bL,
+        0xfdf2593e2c3d4ccdL } },
+    /* 45 << 49 */
+    { { 0x1b8f414efdd23667L,0xdd52aacafa2015eeL,0x3e31b517bd9625ffL,
+        0x5ec9322d8db5918cL },
+      { 0xbc73ac85a96f5294L,0x82aa5bf361a0666aL,0x49755810bf08ac42L,
+        0xd21cdfd5891cedfcL } },
+    /* 46 << 49 */
+    { { 0x918cb57b67f8be10L,0x365d1a7c56ffa726L,0x2435c5046532de93L,
+        0xc0fc5e102674cd02L },
+      { 0x6e51fcf89cbbb142L,0x1d436e5aafc50692L,0x766bffff3fbcae22L,
+        0x3148c2fdfd55d3b8L } },
+    /* 47 << 49 */
+    { { 0x52c7fdc9233222faL,0x89ff1092e419fb6bL,0x3cd6db9925254977L,
+        0x2e85a1611cf12ca7L },
+      { 0xadd2547cdc810bc9L,0xea3f458f9d257c22L,0x642c1fbe27d6b19bL,
+        0xed07e6b5140481a6L } },
+    /* 48 << 49 */
+    { { 0x6ada1d4286d2e0f8L,0xe59201220e8a9fd5L,0x02c936af708c1b49L,
+        0x60f30fee2b4bfaffL },
+      { 0x6637ad06858e6a61L,0xce4c77673fd374d0L,0x39d54b2d7188defbL,
+        0xa8c9d250f56a6b66L } },
+    /* 49 << 49 */
+    { { 0x58fc0f5eb24fe1dcL,0x9eaf9dee6b73f24cL,0xa90d588b33650705L,
+        0xde5b62c5af2ec729L },
+      { 0x5c72cfaed3c2b36eL,0x868c19d5034435daL,0x88605f93e17ee145L,
+        0xaa60c4ee77a5d5b1L } },
+    /* 50 << 49 */
+    { { 0xbcf5bfd23b60c472L,0xaf4ef13ceb1d3049L,0x373f44fce13895c9L,
+        0xf29b382f0cbc9822L },
+      { 0x1bfcb85373efaef6L,0xcf56ac9ca8c96f40L,0xd7adf1097a191e24L,
+        0x98035f44bf8a8dc2L } },
+    /* 51 << 49 */
+    { { 0xf40a71b91e750c84L,0xc57f7b0c5dc6c469L,0x49a0e79c6fbc19c1L,
+        0x6b0f5889a48ebdb8L },
+      { 0x5d3fd084a07c4e9fL,0xc3830111ab27de14L,0x0e4929fe33e08dccL,
+        0xf4a5ad2440bb73a3L } },
+    /* 52 << 49 */
+    { { 0xde86c2bf490f97caL,0x288f09c667a1ce18L,0x364bb8861844478dL,
+        0x7840fa42ceedb040L },
+      { 0x1269fdd25a631b37L,0x94761f1ea47c8b7dL,0xfc0c2e17481c6266L,
+        0x85e16ea23daa5fa7L } },
+    /* 53 << 49 */
+    { { 0xccd8603392491048L,0x0c2f6963f4d402d7L,0x6336f7dfdf6a865cL,
+        0x0a2a463cb5c02a87L },
+      { 0xb0e29be7bf2f12eeL,0xf0a2200266bad988L,0x27f87e039123c1d7L,
+        0x21669c55328a8c98L } },
+    /* 54 << 49 */
+    { { 0x186b980392f14529L,0xd3d056cc63954df3L,0x2f03fd58175a46f6L,
+        0x63e34ebe11558558L },
+      { 0xe13fedee5b80cfa5L,0xe872a120d401dbd1L,0x52657616e8a9d667L,
+        0xbc8da4b6e08d6693L } },
+    /* 55 << 49 */
+    { { 0x370fb9bb1b703e75L,0x6773b186d4338363L,0x18dad378ecef7bffL,
+        0xaac787ed995677daL },
+      { 0x4801ea8b0437164bL,0xf430ad2073fe795eL,0xb164154d8ee5eb73L,
+        0x0884ecd8108f7c0eL } },
+    /* 56 << 49 */
+    { { 0x0e6ec0965f520698L,0x640631fe44f7b8d9L,0x92fd34fca35a68b9L,
+        0x9c5a4b664d40cf4eL },
+      { 0x949454bf80b6783dL,0x80e701fe3a320a10L,0x8d1a564a1a0a39b2L,
+        0x1436d53d320587dbL } },
+    /* 57 << 49 */
+    { { 0xf5096e6d6556c362L,0xbc23a3c0e2455d7eL,0x3a7aee54807230f9L,
+        0x9ba1cfa622ae82fdL },
+      { 0x833a057a99c5d706L,0x8be85f4b842315c9L,0xd083179a66a72f12L,
+        0x2fc77d5dcdcc73cdL } },
+    /* 58 << 49 */
+    { { 0x22b88a805616ee30L,0xfb09548fe7ab1083L,0x8ad6ab0d511270cdL,
+        0x61f6c57a6924d9abL },
+      { 0xa0f7bf7290aecb08L,0x849f87c90df784a4L,0x27c79c15cfaf1d03L,
+        0xbbf9f675c463faceL } },
+    /* 59 << 49 */
+    { { 0x91502c65765ba543L,0x18ce3cac42ea60ddL,0xe5cee6ac6e43ecb3L,
+        0x63e4e91068f2aeebL },
+      { 0x26234fa3c85932eeL,0x96883e8b4c90c44dL,0x29b9e738a18a50f6L,
+        0xbfc62b2a3f0420dfL } },
+    /* 60 << 49 */
+    { { 0xd22a7d906d3e1fa9L,0x17115618fe05b8a3L,0x2a0c9926bb2b9c01L,
+        0xc739fcc6e07e76a2L },
+      { 0x540e9157165e439aL,0x06353a626a9063d8L,0x84d9559461e927a3L,
+        0x013b9b26e2e0be7fL } },
+    /* 61 << 49 */
+    { { 0x4feaec3b973497f1L,0x15c0f94e093ebc2dL,0x6af5f22733af0583L,
+        0x0c2af206c61f3340L },
+      { 0xd25dbdf14457397cL,0x2e8ed017cabcbae0L,0xe3010938c2815306L,
+        0xbaa99337e8c6cd68L } },
+    /* 62 << 49 */
+    { { 0x085131823b0ec7deL,0x1e1b822b58df05dfL,0x5c14842fa5c3b683L,
+        0x98fe977e3eba34ceL },
+      { 0xfd2316c20d5e8873L,0xe48d839abd0d427dL,0x495b2218623fc961L,
+        0x24ee56e7b46fba5eL } },
+    /* 63 << 49 */
+    { { 0x9184a55b91e4de58L,0xa7488ca5dfdea288L,0xa723862ea8dcc943L,
+        0x92d762b2849dc0fcL },
+      { 0x3c444a12091ff4a9L,0x581113fa0cada274L,0xb9de0a4530d8eae2L,
+        0x5e0fcd85df6b41eaL } },
+    /* 64 << 49 */
+    { { 0x6233ea68c094dbb5L,0xb77d062ed968d410L,0x3e719bbc58b3002dL,
+        0x68e7dd3d3dc49d58L },
+      { 0x8d825740013a5e58L,0x213117473c9e3c1bL,0x0cb0a2a77c99b6abL,
+        0x5c48a3b3c2f888f2L } },
+    /* 0 << 56 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 56 */
+    { { 0xc7913e91991724f3L,0x5eda799c39cbd686L,0xddb595c763d4fc1eL,
+        0x6b63b80bac4fed54L },
+      { 0x6ea0fc697e5fb516L,0x737708bad0f1c964L,0x9628745f11a92ca5L,
+        0x61f379589a86967aL } },
+    /* 2 << 56 */
+    { { 0x9af39b2caa665072L,0x78322fa4efd324efL,0x3d153394c327bd31L,
+        0x81d5f2713129dab0L },
+      { 0xc72e0c42f48027f5L,0xaa40cdbc8536e717L,0xf45a657a2d369d0fL,
+        0xb03bbfc4ea7f74e6L } },
+    /* 3 << 56 */
+    { { 0x46a8c4180d738dedL,0x6f1a5bb0e0de5729L,0xf10230b98ba81675L,
+        0x32c6f30c112b33d4L },
+      { 0x7559129dd8fffb62L,0x6a281b47b459bf05L,0x77c1bd3afa3b6776L,
+        0x0709b3807829973aL } },
+    /* 4 << 56 */
+    { { 0x8c26b232a3326505L,0x38d69272ee1d41bfL,0x0459453effe32afaL,
+        0xce8143ad7cb3ea87L },
+      { 0x932ec1fa7e6ab666L,0x6cd2d23022286264L,0x459a46fe6736f8edL,
+        0x50bf0d009eca85bbL } },
+    /* 5 << 56 */
+    { { 0x0b825852877a21ecL,0x300414a70f537a94L,0x3f1cba4021a9a6a2L,
+        0x50824eee76943c00L },
+      { 0xa0dbfcecf83cba5dL,0xf953814893b4f3c0L,0x6174416248f24dd7L,
+        0x5322d64de4fb09ddL } },
+    /* 6 << 56 */
+    { { 0x574473843d9325f3L,0xa9bef2d0f371cb84L,0x77d2188ba61e36c5L,
+        0xbbd6a7d7c602df72L },
+      { 0xba3aa9028f61bc0bL,0xf49085ed6ed0b6a1L,0x8bc625d6ae6e8298L,
+        0x832b0b1da2e9c01dL } },
+    /* 7 << 56 */
+    { { 0xa337c447f1f0ced1L,0x800cc7939492dd2bL,0x4b93151dbea08efaL,
+        0x820cf3f8de0a741eL },
+      { 0xff1982dc1c0f7d13L,0xef92196084dde6caL,0x1ad7d97245f96ee3L,
+        0x319c8dbe29dea0c7L } },
+    /* 8 << 56 */
+    { { 0xd3ea38717b82b99bL,0x75922d4d470eb624L,0x8f66ec543b95d466L,
+        0x66e673ccbee1e346L },
+      { 0x6afe67c4b5f2b89aL,0x3de9c1e6290e5cd3L,0x8c278bb6310a2adaL,
+        0x420fa3840bdb323bL } },
+    /* 9 << 56 */
+    { { 0x0ae1d63b0eb919b0L,0xd74ee51da74b9620L,0x395458d0a674290cL,
+        0x324c930f4620a510L },
+      { 0x2d1f4d19fbac27d4L,0x4086e8ca9bedeeacL,0x0cdd211b9b679ab8L,
+        0x5970167d7090fec4L } },
+    /* 10 << 56 */
+    { { 0x3420f2c9faf1fc63L,0x616d333a328c8bb4L,0x7d65364c57f1fe4aL,
+        0x9343e87755e5c73aL },
+      { 0x5795176be970e78cL,0xa36ccebf60533627L,0xfc7c738009cdfc1bL,
+        0xb39a2afeb3fec326L } },
+    /* 11 << 56 */
+    { { 0xb7ff1ba16224408aL,0xcc856e92247cfc5eL,0x01f102e7c18bc493L,
+        0x4613ab742091c727L },
+      { 0xaa25e89cc420bf2bL,0x00a5317690337ec2L,0xd2be9f437d025fc7L,
+        0x3316fb856e6fe3dcL } },
+    /* 12 << 56 */
+    { { 0x27520af59ac50814L,0xfdf95e789a8e4223L,0xb7e7df2a56bec5a0L,
+        0xf7022f7ddf159e5dL },
+      { 0x93eeeab1cac1fe8fL,0x8040188c37451168L,0x7ee8aa8ad967dce6L,
+        0xfa0e79e73abc9299L } },
+    /* 13 << 56 */
+    { { 0x67332cfc2064cfd1L,0x339c31deb0651934L,0x719b28d52a3bcbeaL,
+        0xee74c82b9d6ae5c6L },
+      { 0x0927d05ebaf28ee6L,0x82cecf2c9d719028L,0x0b0d353eddb30289L,
+        0xfe4bb977fddb2e29L } },
+    /* 14 << 56 */
+    { { 0xbb5bb990640bfd9eL,0xd226e27782f62108L,0x4bf0098502ffdd56L,
+        0x7756758a2ca1b1b5L },
+      { 0xc32b62a35285fe91L,0xedbc546a8c9cd140L,0x1e47a013af5cb008L,
+        0xbca7e720073ce8f2L } },
+    /* 15 << 56 */
+    { { 0xe10b2ab817a91caeL,0xb89aab6508e27f63L,0x7b3074a7dba3ddf9L,
+        0x1c20ce09330c2972L },
+      { 0x6b9917b45fcf7e33L,0xe6793743945ceb42L,0x18fc22155c633d19L,
+        0xad1adb3cc7485474L } },
+    /* 16 << 56 */
+    { { 0x646f96796424c49bL,0xf888dfe867c241c9L,0xe12d4b9324f68b49L,
+        0x9a6b62d8a571df20L },
+      { 0x81b4b26d179483cbL,0x666f96329511fae2L,0xd281b3e4d53aa51fL,
+        0x7f96a7657f3dbd16L } },
+    /* 17 << 56 */
+    { { 0xa7f8b5bf074a30ceL,0xd7f52107005a32e6L,0x6f9e090750237ed4L,
+        0x2f21da478096fa2bL },
+      { 0xf3e19cb4eec863a0L,0xd18f77fd9527620aL,0x9505c81c407c1cf8L,
+        0x9998db4e1b6ec284L } },
+    /* 18 << 56 */
+    { { 0x7e3389e5c247d44dL,0x125071413f4f3d80L,0xd4ba01104a78a6c7L,
+        0x312874a0767720beL },
+      { 0xded059a675944370L,0xd6123d903b2c0bddL,0xa56b717b51c108e3L,
+        0x9bb7940e070623e9L } },
+    /* 19 << 56 */
+    { { 0x794e2d5984ac066cL,0xf5954a92e68c69a0L,0x28c524584fd99dccL,
+        0x60e639fcb1012517L },
+      { 0xc2e601257de79248L,0xe9ef6404f12fc6d7L,0x4c4f28082a3b5d32L,
+        0x865ad32ec768eb8aL } },
+    /* 20 << 56 */
+    { { 0xac02331b13fb70b6L,0x037b44c195599b27L,0x1a860fc460bd082cL,
+        0xa2e25745c980cd01L },
+      { 0xee3387a81da0263eL,0x931bfb952d10f3d6L,0x5b687270a1f24a32L,
+        0xf140e65dca494b86L } },
+    /* 21 << 56 */
+    { { 0x4f4ddf91b2f1ac7aL,0xf99eaabb760fee27L,0x57f4008a49c228e5L,
+        0x090be4401cf713bbL },
+      { 0xac91fbe45004f022L,0xd838c2c2569e1af6L,0xd6c7d20b0f1daaa5L,
+        0xaa063ac11bbb02c0L } },
+    /* 22 << 56 */
+    { { 0x0938a42259558a78L,0x5343c6698435da2fL,0x96f67b18034410dcL,
+        0x7cc1e42484510804L },
+      { 0x86a1543f16dfbb7dL,0x921fa9425b5bd592L,0x9dcccb6eb33dd03cL,
+        0x8581ddd9b843f51eL } },
+    /* 23 << 56 */
+    { { 0x54935fcb81d73c9eL,0x6d07e9790a5e97abL,0x4dc7b30acf3a6babL,
+        0x147ab1f3170bee11L },
+      { 0x0aaf8e3d9fafdee4L,0xfab3dbcb538a8b95L,0x405df4b36ef13871L,
+        0xf1f4e9cb088d5a49L } },
+    /* 24 << 56 */
+    { { 0x9bcd24d366b33f1dL,0x3b97b8205ce445c0L,0xe2926549ba93ff61L,
+        0xd9c341ce4dafe616L },
+      { 0xfb30a76e16efb6f3L,0xdf24b8ca605b953cL,0x8bd52afec2fffb9fL,
+        0xbbac5ff7e19d0b96L } },
+    /* 25 << 56 */
+    { { 0x43c01b87459afccdL,0x6bd45143b7432652L,0x8473453055b5d78eL,
+        0x81088fdb1554ba7dL },
+      { 0xada0a52c1e269375L,0xf9f037c42dc5ec10L,0xc066060794bfbc11L,
+        0xc0a630bbc9c40d2fL } },
+    /* 26 << 56 */
+    { { 0x5efc797eab64c31eL,0xffdb1dab74507144L,0xf61242871ca6790cL,
+        0xe9609d81e69bf1bfL },
+      { 0xdb89859500d24fc9L,0x9c750333e51fb417L,0x51830a91fef7bbdeL,
+        0x0ce67dc8945f585cL } },
+    /* 27 << 56 */
+    { { 0x9a730ed44763eb50L,0x24a0e221c1ab0d66L,0x643b6393648748f3L,
+        0x1982daa16d3c6291L },
+      { 0x6f00a9f78bbc5549L,0x7a1783e17f36384eL,0xe8346323de977f50L,
+        0x91ab688db245502aL } },
+    /* 28 << 56 */
+    { { 0x331ab6b56d0bdd66L,0x0a6ef32e64b71229L,0x1028150efe7c352fL,
+        0x27e04350ce7b39d3L },
+      { 0x2a3c8acdc1070c82L,0xfb2034d380c9feefL,0x2d729621709f3729L,
+        0x8df290bf62cb4549L } },
+    /* 29 << 56 */
+    { { 0x02f99f33fc2e4326L,0x3b30076d5eddf032L,0xbb21f8cf0c652fb5L,
+        0x314fb49eed91cf7bL },
+      { 0xa013eca52f700750L,0x2b9e3c23712a4575L,0xe5355557af30fbb0L,
+        0x1ada35167c77e771L } },
+    /* 30 << 56 */
+    { { 0x45f6ecb27b135670L,0xe85d19df7cfc202eL,0x0f1b50c758d1be9fL,
+        0x5ebf2c0aead2e344L },
+      { 0x1531fe4eabc199c9L,0xc703259256bab0aeL,0x16ab2e486c1fec54L,
+        0x0f87fda804280188L } },
+    /* 31 << 56 */
+    { { 0xdc9f46fc609e4a74L,0x2a44a143ba667f91L,0xbc3d8b95b4d83436L,
+        0xa01e4bd0c7bd2958L },
+      { 0x7b18293273483c90L,0xa79c6aa1a7c7b598L,0xbf3983c6eaaac07eL,
+        0x8f18181e96e0d4e6L } },
+    /* 32 << 56 */
+    { { 0x8553d37c051af62bL,0xe9a998eb0bf94496L,0xe0844f9fb0d59aa1L,
+        0x983fd558e6afb813L },
+      { 0x9670c0ca65d69804L,0x732b22de6ea5ff2dL,0xd7640ba95fd8623bL,
+        0x9f619163a6351782L } },
+    /* 33 << 56 */
+    { { 0x0bfc27eeacee5043L,0xae419e732eb10f02L,0x19c028d18943fb05L,
+        0x71f01cf7ff13aa2aL },
+      { 0x7790737e8887a132L,0x6751330966318410L,0x9819e8a37ddb795eL,
+        0xfecb8ef5dad100b2L } },
+    /* 34 << 56 */
+    { { 0x59f74a223021926aL,0xb7c28a496f9b4c1cL,0xed1a733f912ad0abL,
+        0x42a910af01a5659cL },
+      { 0x3842c6e07bd68cabL,0x2b57fa3876d70ac8L,0x8a6707a83c53aaebL,
+        0x62c1c51065b4db18L } },
+    /* 35 << 56 */
+    { { 0x8de2c1fbb2d09dc7L,0xc3dfed12266bd23bL,0x927d039bd5b27db6L,
+        0x2fb2f0f1103243daL },
+      { 0xf855a07b80be7399L,0xed9327ce1f9f27a8L,0xa0bd99c7729bdef7L,
+        0x2b67125e28250d88L } },
+    /* 36 << 56 */
+    { { 0x784b26e88670ced7L,0xe3dfe41fc31bd3b4L,0x9e353a06bcc85cbcL,
+        0x302e290960178a9dL },
+      { 0x860abf11a6eac16eL,0x76447000aa2b3aacL,0x46ff9d19850afdabL,
+        0x35bdd6a5fdb2d4c1L } },
+    /* 37 << 56 */
+    { { 0xe82594b07e5c9ce9L,0x0f379e5320af346eL,0x608b31e3bc65ad4aL,
+        0x710c6b12267c4826L },
+      { 0x51c966f971954cf1L,0xb1cec7930d0aa215L,0x1f15598986bd23a8L,
+        0xae2ff99cf9452e86L } },
+    /* 38 << 56 */
+    { { 0xd8dd953c340ceaa2L,0x263552752e2e9333L,0x15d4e5f98586f06dL,
+        0xd6bf94a8f7cab546L },
+      { 0x33c59a0ab76a9af0L,0x52740ab3ba095af7L,0xc444de8a24389ca0L,
+        0xcc6f9863706da0cbL } },
+    /* 39 << 56 */
+    { { 0xb5a741a76b2515cfL,0x71c416019585c749L,0x78350d4fe683de97L,
+        0x31d6152463d0b5f5L },
+      { 0x7a0cc5e1fbce090bL,0xaac927edfbcb2a5bL,0xe920de4920d84c35L,
+        0x8c06a0b622b4de26L } },
+    /* 40 << 56 */
+    { { 0xd34dd58bafe7ddf3L,0x55851fedc1e6e55bL,0xd1395616960696e7L,
+        0x940304b25f22705fL },
+      { 0x6f43f861b0a2a860L,0xcf1212820e7cc981L,0x121862120ab64a96L,
+        0x09215b9ab789383cL } },
+    /* 41 << 56 */
+    { { 0x311eb30537387c09L,0xc5832fcef03ee760L,0x30358f5832f7ea19L,
+        0xe01d3c3491d53551L },
+      { 0x1ca5ee41da48ea80L,0x34e71e8ecf4fa4c1L,0x312abd257af1e1c7L,
+        0xe3afcdeb2153f4a5L } },
+    /* 42 << 56 */
+    { { 0x9d5c84d700235e9aL,0x0308d3f48c4c836fL,0xc0a66b0489332de5L,
+        0x610dd39989e566efL },
+      { 0xf8eea460d1ac1635L,0x84cbb3fb20a2c0dfL,0x40afb488e74a48c5L,
+        0x29738198d326b150L } },
+    /* 43 << 56 */
+    { { 0x2a17747fa6d74081L,0x60ea4c0555a26214L,0x53514bb41f88c5feL,
+        0xedd645677e83426cL },
+      { 0xd5d6cbec96460b25L,0xa12fd0ce68dc115eL,0xc5bc3ed2697840eaL,
+        0x969876a8a6331e31L } },
+    /* 44 << 56 */
+    { { 0x60c36217472ff580L,0xf42297054ad41393L,0x4bd99ef0a03b8b92L,
+        0x501c7317c144f4f6L },
+      { 0x159009b318464945L,0x6d5e594c74c5c6beL,0x2d587011321a3660L,
+        0xd1e184b13898d022L } },
+    /* 45 << 56 */
+    { { 0x5ba047524c6a7e04L,0x47fa1e2b45550b65L,0x9419daf048c0a9a5L,
+        0x663629537c243236L },
+      { 0xcd0744b15cb12a88L,0x561b6f9a2b646188L,0x599415a566c2c0c0L,
+        0xbe3f08590f83f09aL } },
+    /* 46 << 56 */
+    { { 0x9141c5beb92041b8L,0x01ae38c726477d0dL,0xca8b71f3d12c7a94L,
+        0xfab5b31f765c70dbL },
+      { 0x76ae7492487443e9L,0x8595a310990d1349L,0xf8dbeda87d460a37L,
+        0x7f7ad0821e45a38fL } },
+    /* 47 << 56 */
+    { { 0xed1d4db61059705aL,0xa3dd492ae6b9c697L,0x4b92ee3a6eb38bd5L,
+        0xbab2609d67cc0bb7L },
+      { 0x7fc4fe896e70ee82L,0xeff2c56e13e6b7e3L,0x9b18959e34d26fcaL,
+        0x2517ab66889d6b45L } },
+    /* 48 << 56 */
+    { { 0xf167b4e0bdefdd4fL,0x69958465f366e401L,0x5aa368aba73bbec0L,
+        0x121487097b240c21L },
+      { 0x378c323318969006L,0xcb4d73cee1fe53d1L,0x5f50a80e130c4361L,
+        0xd67f59517ef5212bL } },
+    /* 49 << 56 */
+    { { 0xf145e21e9e70c72eL,0xb2e52e295566d2fbL,0x44eaba4a032397f5L,
+        0x5e56937b7e31a7deL },
+      { 0x68dcf517456c61e1L,0xbc2e954aa8b0a388L,0xe3552fa760a8b755L,
+        0x03442dae73ad0cdeL } },
+    /* 50 << 56 */
+    { { 0x37ffe747ceb26210L,0x983545e8787baef9L,0x8b8c853586a3de31L,
+        0xc621dbcbfacd46dbL },
+      { 0x82e442e959266fbbL,0xa3514c37339d471cL,0x3a11b77162cdad96L,
+        0xf0cb3b3cecf9bdf0L } },
+    /* 51 << 56 */
+    { { 0x3fcbdbce478e2135L,0x7547b5cfbda35342L,0xa97e81f18a677af6L,
+        0xc8c2bf8328817987L },
+      { 0xdf07eaaf45580985L,0xc68d1f05c93b45cbL,0x106aa2fec77b4cacL,
+        0x4c1d8afc04a7ae86L } },
+    /* 52 << 56 */
+    { { 0xdb41c3fd9eb45ab2L,0x5b234b5bd4b22e74L,0xda253decf215958aL,
+        0x67e0606ea04edfa0L },
+      { 0xabbbf070ef751b11L,0xf352f175f6f06dceL,0xdfc4b6af6839f6b4L,
+        0x53ddf9a89959848eL } },
+    /* 53 << 56 */
+    { { 0xda49c379c21520b0L,0x90864ff0dbd5d1b6L,0x2f055d235f49c7f7L,
+        0xe51e4e6aa796b2d8L },
+      { 0xc361a67f5c9dc340L,0x5ad53c37bca7c620L,0xda1d658832c756d0L,
+        0xad60d9118bb67e13L } },
+    /* 54 << 56 */
+    { { 0xd6c47bdf0eeec8c6L,0x4a27fec1078a1821L,0x081f7415c3099524L,
+        0x8effdf0b82cd8060L },
+      { 0xdb70ec1c65842df8L,0x8821b358d319a901L,0x72ee56eede42b529L,
+        0x5bb39592236e4286L } },
+    /* 55 << 56 */
+    { { 0xd1183316fd6f7140L,0xf9fadb5bbd8e81f7L,0x701d5e0c5a02d962L,
+        0xfdee4dbf1b601324L },
+      { 0xbed1740735d7620eL,0x04e3c2c3f48c0012L,0x9ee29da73455449aL,
+        0x562cdef491a836c4L } },
+    /* 56 << 56 */
+    { { 0x8f682a5f47701097L,0x617125d8ff88d0c2L,0x948fda2457bb86ddL,
+        0x348abb8f289f7286L },
+      { 0xeb10eab599d94bbdL,0xd51ba28e4684d160L,0xabe0e51c30c8f41aL,
+        0x66588b4513254f4aL } },
+    /* 57 << 56 */
+    { { 0x147ebf01fad097a5L,0x49883ea8610e815dL,0xe44d60ba8a11de56L,
+        0xa970de6e827a7a6dL },
+      { 0x2be414245e17fc19L,0xd833c65701214057L,0x1375813b363e723fL,
+        0x6820bb88e6a52e9bL } },
+    /* 58 << 56 */
+    { { 0x7e7f6970d875d56aL,0xd6a0a9ac51fbf6bfL,0x54ba8790a3083c12L,
+        0xebaeb23d6ae7eb64L },
+      { 0xa8685c3ab99a907aL,0xf1e74550026bf40bL,0x7b73a027c802cd9eL,
+        0x9a8a927c4fef4635L } },
+    /* 59 << 56 */
+    { { 0xe1b6f60c08191224L,0xc4126ebbde4ec091L,0xe1dff4dc4ae38d84L,
+        0xde3f57db4f2ef985L },
+      { 0x34964337d446a1ddL,0x7bf217a0859e77f6L,0x8ff105278e1d13f5L,
+        0xa304ef0374eeae27L } },
+    /* 60 << 56 */
+    { { 0xfc6f5e47d19dfa5aL,0xdb007de37fad982bL,0x28205ad1613715f5L,
+        0x251e67297889529eL },
+      { 0x727051841ae98e78L,0xf818537d271cac32L,0xc8a15b7eb7f410f5L,
+        0xc474356f81f62393L } },
+    /* 61 << 56 */
+    { { 0x92dbdc5ac242316bL,0xabe060acdbf4aff5L,0x6e8c38fe909a8ec6L,
+        0x43e514e56116cb94L },
+      { 0x2078fa3807d784f9L,0x1161a880f4b5b357L,0x5283ce7913adea3dL,
+        0x0756c3e6cc6a910bL } },
+    /* 62 << 56 */
+    { { 0x60bcfe01aaa79697L,0x04a73b2956391db1L,0xdd8dad47189b45a0L,
+        0xbfac0dd048d5b8d9L },
+      { 0x34ab3af57d3d2ec2L,0x6fa2fc2d207bd3afL,0x9ff4009266550dedL,
+        0x719b3e871fd5b913L } },
+    /* 63 << 56 */
+    { { 0xa573a4966d17fbc7L,0x0cd1a70a73d2b24eL,0x34e2c5cab2676937L,
+        0xe7050b06bf669f21L },
+      { 0xfbe948b61ede9046L,0xa053005197662659L,0x58cbd4edf10124c5L,
+        0xde2646e4dd6c06c8L } },
+    /* 64 << 56 */
+    { { 0x332f81088cad38c0L,0x471b7e906bd68ae2L,0x56ac3fb20d8e27a3L,
+        0xb54660db136b4b0dL },
+      { 0x123a1e11a6fd8de4L,0x44dbffeaa37799efL,0x4540b977ce6ac17cL,
+        0x495173a8af60acefL } },
+    /* 0 << 63 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 63 */
+    { { 0x9ebb284d391c2a82L,0xbcdd4863158308e8L,0x006f16ec83f1edcaL,
+        0xa13e2c37695dc6c8L },
+      { 0x2ab756f04a057a87L,0xa8765500a6b48f98L,0x4252face68651c44L,
+        0xa52b540be1765e02L } },
+    /* 2 << 63 */
+    { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL,
+        0x2f5e6961fd1b667fL },
+      { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L,
+        0xf648f9168d6f0f7bL } },
+    /* 3 << 63 */
+    { { 0x6dc1acafe60b7cf7L,0x25860a5084a9d869L,0x56fc6f09e7ba8ac4L,
+        0x828c5bd06148d29eL },
+      { 0xac6b435edc55ae5fL,0xa527f56cc0117411L,0x94d5045efd24342cL,
+        0x2c4c0a3570b67c0dL } },
+    /* 4 << 63 */
+    { { 0x027cc8b8fac61d9aL,0x7d25e062e3c6fe8aL,0xe08805bfe5bff503L,
+        0x13271e6c6ff632f7L },
+      { 0x55dca6c0232f76a5L,0x8957c32d701ef426L,0xee728bcba10a5178L,
+        0x5ea60411b62c5173L } },
+    /* 5 << 63 */
+    { { 0xfc4e964ed0b8892bL,0x9ea176839301bb74L,0x6265c5aefcc48626L,
+        0xe60cf82ebb3e9102L },
+      { 0x57adf797d4df5531L,0x235b59a18deeefe2L,0x60adcf583f306eb1L,
+        0x105c27533d09492dL } },
+    /* 6 << 63 */
+    { { 0x4090914bb5def996L,0x1cb69c83233dd1e7L,0xc1e9c1d39b3d5e76L,
+        0x1f3338edfccf6012L },
+      { 0xb1e95d0d2f5378a8L,0xacf4c2c72f00cd21L,0x6e984240eb5fe290L,
+        0xd66c038d248088aeL } },
+    /* 7 << 63 */
+    { { 0x804d264af94d70cfL,0xbdb802ef7314bf7eL,0x8fb54de24333ed02L,
+        0x740461e0285635d9L },
+      { 0x4113b2c8365e9383L,0xea762c833fdef652L,0x4eec6e2e47b956c1L,
+        0xa3d814be65620fa4L } },
+    /* 8 << 63 */
+    { { 0x9ad5462bb4d8bc50L,0x181c0b16a9195770L,0xebd4fe1c78412a68L,
+        0xae0341bcc0dff48cL },
+      { 0xb6bc45cf7003e866L,0xf11a6dea8a24a41bL,0x5407151ad04c24c2L,
+        0x62c9d27dda5b7b68L } },
+    /* 9 << 63 */
+    { { 0x2e96423588cceff6L,0x8594c54f8b07ed69L,0x1578e73cc84d0d0dL,
+        0x7b4e1055ff532868L },
+      { 0xa348c0d5b5ec995aL,0xbf4b9d5514289a54L,0x9ba155a658fbd777L,
+        0x186ed7a81a84491dL } },
+    /* 10 << 63 */
+    { { 0xd4992b30614c0900L,0xda98d121bd00c24bL,0x7f534dc87ec4bfa1L,
+        0x4a5ff67437dc34bcL },
+      { 0x68c196b81d7ea1d7L,0x38cf289380a6d208L,0xfd56cd09e3cbbd6eL,
+        0xec72e27e4205a5b6L } },
+    /* 11 << 63 */
+    { { 0x15ea68f5a44f77f7L,0x7aa5f9fdb43c52bcL,0x86ff676f94f0e609L,
+        0xa4cde9632e2d432bL },
+      { 0x8cafa0c0eee470afL,0x84137d0e8a3f5ec8L,0xebb40411faa31231L,
+        0xa239c13f6f7f7ccfL } },
+    /* 12 << 63 */
+    { { 0x32865719a8afd30bL,0x867983288a826dceL,0xdf04e891c4a8fbe0L,
+        0xbb6b6e1bebf56ad3L },
+      { 0x0a695b11471f1ff0L,0xd76c3389be15baf0L,0x018edb95be96c43eL,
+        0xf2beaaf490794158L } },
+    /* 13 << 63 */
+    { { 0x152db09ec3076a27L,0x5e82908ee416545dL,0xa2c41272356d6f2eL,
+        0xdc9c964231fd74e1L },
+      { 0x66ceb88d519bf615L,0xe29ecd7605a2274eL,0x3a0473c4bf5e2fa0L,
+        0x6b6eb67164284e67L } },
+    /* 14 << 63 */
+    { { 0xe8b97932b88756ddL,0xed4e8652f17e3e61L,0xc2dd14993ee1c4a4L,
+        0xc0aaee17597f8c0eL },
+      { 0x15c4edb96c168af3L,0x6563c7bfb39ae875L,0xadfadb6f20adb436L,
+        0xad55e8c99a042ac0L } },
+    /* 15 << 63 */
+    { { 0x975a1ed8b76da1f5L,0x10dfa466a58acb94L,0x8dd7f7e3ac060282L,
+        0x6813e66a572a051eL },
+      { 0xb4ccae1e350cb901L,0xb653d65650cb7822L,0x42484710dfab3b87L,
+        0xcd7ee5379b670fd0L } },
+    /* 16 << 63 */
+    { { 0x0a50b12e523b8bf6L,0x8009eb5b8f910c1bL,0xf535af824a167588L,
+        0x0f835f9cfb2a2abdL },
+      { 0xf59b29312afceb62L,0xc797df2a169d383fL,0xeb3f5fb066ac02b0L,
+        0x029d4c6fdaa2d0caL } },
+    /* 17 << 63 */
+    { { 0xd4059bc1afab4bc5L,0x833f5c6f56783247L,0xb53466308d2d3605L,
+        0x83387891d34d8433L },
+      { 0xd973b30fadd9419aL,0xbcca1099afe3fce8L,0x081783150809aac6L,
+        0x01b7f21a540f0f11L } },
+    /* 18 << 63 */
+    { { 0x65c29219909523c8L,0xa62f648fa3a1c741L,0x88598d4f60c9e55aL,
+        0xbce9141b0e4f347aL },
+      { 0x9af97d8435f9b988L,0x0210da62320475b6L,0x3c076e229191476cL,
+        0x7520dbd944fc7834L } },
+    /* 19 << 63 */
+    { { 0x6a6b2cfec1ab1bbdL,0xef8a65bedc650938L,0x72855540805d7bc4L,
+        0xda389396ed11fdfdL },
+      { 0xa9d5bd3674660876L,0x11d67c54b45dff35L,0x6af7d148a4f5da94L,
+        0xbb8d4c3fc0bbeb31L } },
+    /* 20 << 63 */
+    { { 0x87a7ebd1e0a1b12aL,0x1e4ef88d770ba95fL,0x8c33345cdc2ae9cbL,
+        0xcecf127601cc8403L },
+      { 0x687c012e1b39b80fL,0xfd90d0ad35c33ba4L,0xa3ef5a675c9661c2L,
+        0x368fc88ee017429eL } },
+    /* 21 << 63 */
+    { { 0xd30c6761196a2fa2L,0x931b9817bd5b312eL,0xba01000c72f54a31L,
+        0xa203d2c866eaa541L },
+      { 0xf2abdee098939db3L,0xe37d6c2c3e606c02L,0xf2921574521ff643L,
+        0x2781b3c4d7e2fca3L } },
+    /* 22 << 63 */
+    { { 0x664300b07850ec06L,0xac5a38b97d3a10cfL,0x9233188de34ab39dL,
+        0xe77057e45072cbb9L },
+      { 0xbcf0c042b59e78dfL,0x4cfc91e81d97de52L,0x4661a26c3ee0ca4aL,
+        0x5620a4c1fb8507bcL } },
+    /* 23 << 63 */
+    { { 0x4b44d4aa049f842cL,0xceabc5d51540e82bL,0x306710fd15c6f156L,
+        0xbe5ae52b63db1d72L },
+      { 0x06f1e7e6334957f1L,0x57e388f031144a70L,0xfb69bb2fdf96447bL,
+        0x0f78ebd373e38a12L } },
+    /* 24 << 63 */
+    { { 0xb82226052b7ce542L,0xe6d4ce997472bde1L,0x53e16ebe09d2f4daL,
+        0x180ff42e53b92b2eL },
+      { 0xc59bcc022c34a1c6L,0x3803d6f9422c46c2L,0x18aff74f5c14a8a2L,
+        0x55aebf8010a08b28L } },
+    /* 25 << 63 */
+    { { 0x66097d587135593fL,0x32e6eff72be570cdL,0x584e6a102a8c860dL,
+        0xcd185890a2eb4163L },
+      { 0x7ceae99d6d97e134L,0xd42c6b70dd8447ceL,0x59ddbb4ab8c50273L,
+        0x03c612df3cf34e1eL } },
+    /* 26 << 63 */
+    { { 0x84b9ca1504b6c5a0L,0x35216f3918f0e3a3L,0x3ec2d2bcbd986c00L,
+        0x8bf546d9d19228feL },
+      { 0xd1c655a44cd623c3L,0x366ce718502b8e5aL,0x2cfc84b4eea0bfe7L,
+        0xe01d5ceecf443e8eL } },
+    /* 27 << 63 */
+    { { 0x8ec045d9036520f8L,0xdfb3c3d192d40e98L,0x0bac4ccecc559a04L,
+        0x35eccae5240ea6b1L },
+      { 0x180b32dbf8a5a0acL,0x547972a5eb699700L,0xa3765801ca26bca0L,
+        0x57e09d0ea647f25aL } },
+    /* 28 << 63 */
+    { { 0xb956970e2fdd23ccL,0xb80288bc5682e971L,0xe6e6d91e9ae86ebcL,
+        0x0564c83f8c9f1939L },
+      { 0x551932a239560368L,0xe893752b049c28e2L,0x0b03cee5a6a158c3L,
+        0xe12d656b04964263L } },
+    /* 29 << 63 */
+    { { 0x4b47554e63e3bc1dL,0xc719b6a245044ff7L,0x4f24d30ae48daa07L,
+        0xa3f37556c8c1edc3L },
+      { 0x9a47bf760700d360L,0xbb1a1824822ae4e2L,0x22e275a389f1fb4cL,
+        0x72b1aa239968c5f5L } },
+    /* 30 << 63 */
+    { { 0xa75feacabe063f64L,0x9b392f43bce47a09L,0xd42415091ad07acaL,
+        0x4b0c591b8d26cd0fL },
+      { 0x2d42ddfd92f1169aL,0x63aeb1ac4cbf2392L,0x1de9e8770691a2afL,
+        0xebe79af7d98021daL } },
+    /* 31 << 63 */
+    { { 0xcfdf2a4e40e50acfL,0xf0a98ad7af01d665L,0xefb640bf1831be1fL,
+        0x6fe8bd2f80e9ada0L },
+      { 0x94c103a16cafbc91L,0x170f87598308e08cL,0x5de2d2ab9780ff4fL,
+        0x666466bc45b201f2L } },
+    /* 32 << 63 */
+    { { 0x58af2010f5b343bcL,0x0f2e400af2f142feL,0x3483bfdea85f4bdfL,
+        0xf0b1d09303bfeaa9L },
+      { 0x2ea01b95c7081603L,0xe943e4c93dba1097L,0x47be92adb438f3a6L,
+        0x00bb7742e5bf6636L } },
+    /* 33 << 63 */
+    { { 0x136b7083824297b4L,0x9d0e55805584455fL,0xab48cedcf1c7d69eL,
+        0x53a9e4812a256e76L },
+      { 0x0402b0e065eb2413L,0xdadbbb848fc407a7L,0xa65cd5a48d7f5492L,
+        0x21d4429374bae294L } },
+    /* 34 << 63 */
+    { { 0x66917ce63b5f1cc4L,0x37ae52eace872e62L,0xbb087b722905f244L,
+        0x120770861e6af74fL },
+      { 0x4b644e491058edeaL,0x827510e3b638ca1dL,0x8cf2b7046038591cL,
+        0xffc8b47afe635063L } },
+    /* 35 << 63 */
+    { { 0x3ae220e61b4d5e63L,0xbd8647429d961b4bL,0x610c107e9bd16bedL,
+        0x4270352a1127147bL },
+      { 0x7d17ffe664cfc50eL,0x50dee01a1e36cb42L,0x068a762235dc5f9aL,
+        0x9a08d536df53f62cL } },
+    /* 36 << 63 */
+    { { 0x4ed714576be5f7deL,0xd93006f8c2263c9eL,0xe073694ccacacb36L,
+        0x2ff7a5b43ae118abL },
+      { 0x3cce53f1cd871236L,0xf156a39dc2aa6d52L,0x9cc5f271b198d76dL,
+        0xbc615b6f81383d39L } },
+    /* 37 << 63 */
+    { { 0xa54538e8de3eee6bL,0x58c77538ab910d91L,0x31e5bdbc58d278bdL,
+        0x3cde4adfb963acaeL },
+      { 0xb1881fd25302169cL,0x8ca60fa0a989ed8bL,0xa1999458ff96a0eeL,
+        0xc1141f03ac6c283dL } },
+    /* 38 << 63 */
+    { { 0x7677408d6dfafed3L,0x33a0165339661588L,0x3c9c15ec0b726fa0L,
+        0x090cfd936c9b56daL },
+      { 0xe34f4baea3c40af5L,0x3469eadbd21129f1L,0xcc51674a1e207ce8L,
+        0x1e293b24c83b1ef9L } },
+    /* 39 << 63 */
+    { { 0x17173d131e6c0bb4L,0x1900469590776d35L,0xe7980e346de6f922L,
+        0x873554cbf4dd9a22L },
+      { 0x0316c627cbf18a51L,0x4d93651b3032c081L,0x207f27713946834dL,
+        0x2c08d7b430cdbf80L } },
+    /* 40 << 63 */
+    { { 0x137a4fb486df2a61L,0xa1ed9c07ecf7b4a2L,0xb2e460e27bd042ffL,
+        0xb7f5e2fa5f62f5ecL },
+      { 0x7aa6ec6bcc2423b7L,0x75ce0a7fba63eea7L,0x67a45fb1f250a6e1L,
+        0x93bc919ce53cdc9fL } },
+    /* 41 << 63 */
+    { { 0x9271f56f871942dfL,0x2372ff6f7859ad66L,0x5f4c2b9633cb1a78L,
+        0xe3e291015838aa83L },
+      { 0xa7ed1611e4e8110cL,0x2a2d70d5330198ceL,0xbdf132e86720efe0L,
+        0xe61a896266a471bfL } },
+    /* 42 << 63 */
+    { { 0x796d3a85825808bdL,0x51dc3cb73fd6e902L,0x643c768a916219d1L,
+        0x36cd7685a2ad7d32L },
+      { 0xe3db9d05b22922a4L,0x6494c87edba29660L,0xf0ac91dfbcd2ebc7L,
+        0x4deb57a045107f8dL } },
+    /* 43 << 63 */
+    { { 0x42271f59c3d12a73L,0x5f71687ca5c2c51dL,0xcb1f50c605797bcbL,
+        0x29ed0ed9d6d34eb0L },
+      { 0xe5fe5b474683c2ebL,0x4956eeb597447c46L,0x5b163a4371207167L,
+        0x93fa2fed0248c5efL } },
+    /* 44 << 63 */
+    { { 0x67930af231f63950L,0xa77797c114caa2c9L,0x526e80ee27ac7e62L,
+        0xe1e6e62658b28aecL },
+      { 0x636178b0b3c9fef0L,0xaf7752e06d5f90beL,0x94ecaf18eece51cfL,
+        0x2864d0edca806e1fL } },
+    /* 45 << 63 */
+    { { 0x6de2e38397c69134L,0x5a42c316eb291293L,0xc77792196a60bae0L,
+        0xa24de3466b7599d1L },
+      { 0x49d374aab75d4941L,0x989005862d501ff0L,0x9f16d40eeb7974cfL,
+        0x1033860bcdd8c115L } },
+    /* 46 << 63 */
+    { { 0xb6c69ac82094cec3L,0x9976fb88403b770cL,0x1dea026c4859590dL,
+        0xb6acbb468562d1fdL },
+      { 0x7cd6c46144569d85L,0xc3190a3697f0891dL,0xc6f5319548d5a17dL,
+        0x7d919966d749abc8L } },
+    /* 47 << 63 */
+    { { 0x65104837dd1c8a20L,0x7e5410c82f683419L,0x958c3ca8be94022eL,
+        0x605c31976145dac2L },
+      { 0x3fc0750101683d54L,0x1d7127c5595b1234L,0x10b8f87c9481277fL,
+        0x677db2a8e65a1adbL } },
+    /* 48 << 63 */
+    { { 0xec2fccaaddce3345L,0x2a6811b7012a4350L,0x96760ff1ac598bdcL,
+        0x054d652ad1bf4128L },
+      { 0x0a1151d492a21005L,0xad7f397133110fdfL,0x8c95928c1960100fL,
+        0x6c91c8257bf03362L } },
+    /* 49 << 63 */
+    { { 0xc8c8b2a2ce309f06L,0xfdb27b59ca27204bL,0xd223eaa50848e32eL,
+        0xb93e4b2ee7bfaf1eL },
+      { 0xc5308ae644aa3dedL,0x317a666ac015d573L,0xc888ce231a979707L,
+        0xf141c1e60d5c4958L } },
+    /* 50 << 63 */
+    { { 0xb53b7de561906373L,0x858dbadeeb999595L,0x8cbb47b2a59e5c36L,
+        0x660318b3dcf4e842L },
+      { 0xbd161ccd12ba4b7aL,0xf399daabf8c8282aL,0x1587633aeeb2130dL,
+        0xa465311ada38dd7dL } },
+    /* 51 << 63 */
+    { { 0x5f75eec864d3779bL,0x3c5d0476ad64c171L,0x874103712a914428L,
+        0x8096a89190e2fc29L },
+      { 0xd3d2ae9d23b3ebc2L,0x90bdd6dba580cfd6L,0x52dbb7f3c5b01f6cL,
+        0xe68eded4e102a2dcL } },
+    /* 52 << 63 */
+    { { 0x17785b7799eb6df0L,0x26c3cc517386b779L,0x345ed9886417a48eL,
+        0xe990b4e407d6ef31L },
+      { 0x0f456b7e2586abbaL,0x239ca6a559c96e9aL,0xe327459ce2eb4206L,
+        0x3a4c3313a002b90aL } },
+    /* 53 << 63 */
+    { { 0x2a114806f6a3f6fbL,0xad5cad2f85c251ddL,0x92c1f613f5a784d3L,
+        0xec7bfacf349766d5L },
+      { 0x04b3cd333e23cb3bL,0x3979fe84c5a64b2dL,0x192e27207e589106L,
+        0xa60c43d1a15b527fL } },
+    /* 54 << 63 */
+    { { 0x2dae9082be7cf3a6L,0xcc86ba92bc967274L,0xf28a2ce8aea0a8a9L,
+        0x404ca6d96ee988b3L },
+      { 0xfd7e9c5d005921b8L,0xf56297f144e79bf9L,0xa163b4600d75ddc2L,
+        0x30b23616a1f2be87L } },
+    /* 55 << 63 */
+    { { 0x4b070d21bfe50e2bL,0x7ef8cfd0e1bfede1L,0xadba00112aac4ae0L,
+        0x2a3e7d01b9ebd033L },
+      { 0x995277ece38d9d1cL,0xb500249e9c5d2de3L,0x8912b820f13ca8c9L,
+        0xc8798114877793afL } },
+    /* 56 << 63 */
+    { { 0x19e6125dec3f1decL,0x07b1f040911178daL,0xd93ededa904a6738L,
+        0x55187a5a0bebedcdL },
+      { 0xf7d04722eb329d41L,0xf449099ef170b391L,0xfd317a69ca99f828L,
+        0x50c3db2b34a4976dL } },
+    /* 57 << 63 */
+    { { 0xe9ba77843757b392L,0x326caefdaa3ca05aL,0x78e5293bf1e593d4L,
+        0x7842a9370d98fd13L },
+      { 0xe694bf965f96b10dL,0x373a9df606a8cd05L,0x997d1e51e8f0c7fcL,
+        0x1d01979063fd972eL } },
+    /* 58 << 63 */
+    { { 0x0064d8585499fb32L,0x7b67bad977a8aeb7L,0x1d3eb9772d08eec5L,
+        0x5fc047a6cbabae1dL },
+      { 0x0577d159e54a64bbL,0x8862201bc43497e4L,0xad6b4e282ce0608dL,
+        0x8b687b7d0b167aacL } },
+    /* 59 << 63 */
+    { { 0x6ed4d3678b2ecfa9L,0x24dfe62da90c3c38L,0xa1862e103fe5c42bL,
+        0x1ca73dcad5732a9fL },
+      { 0x35f038b776bb87adL,0x674976abf242b81fL,0x4f2bde7eb0fd90cdL,
+        0x6efc172ea7fdf092L } },
+    /* 60 << 63 */
+    { { 0x3806b69b92222f1fL,0x5a2459ca6cf7ae70L,0x6789f69ca85217eeL,
+        0x5f232b5ee3dc85acL },
+      { 0x660e3ec548e9e516L,0x124b4e473197eb31L,0x10a0cb13aafcca23L,
+        0x7bd63ba48213224fL } },
+    /* 61 << 63 */
+    { { 0xaffad7cc290a7f4fL,0x6b409c9e0286b461L,0x58ab809fffa407afL,
+        0xc3122eedc68ac073L },
+      { 0x17bf9e504ef24d7eL,0x5d9297943e2a5811L,0x519bc86702902e01L,
+        0x76bba5da39c8a851L } },
+    /* 62 << 63 */
+    { { 0xe9f9669cda94951eL,0x4b6af58d66b8d418L,0xfa32107417d426a4L,
+        0xc78e66a99dde6027L },
+      { 0x0516c0834a53b964L,0xfc659d38ff602330L,0x0ab55e5c58c5c897L,
+        0x985099b2838bc5dfL } },
+    /* 63 << 63 */
+    { { 0x061d9efcc52fc238L,0x712b27286ac1da3fL,0xfb6581499283fe08L,
+        0x4954ac94b8aaa2f7L },
+      { 0x85c0ada47fb2e74fL,0xee8ba98eb89926b0L,0xe4f9d37d23d1af5bL,
+        0x14ccdbf9ba9b015eL } },
+    /* 64 << 63 */
+    { { 0xb674481b7bfe7178L,0x4e1debae65405868L,0x061b2821c48c867dL,
+        0x69c15b35513b30eaL },
+      { 0x3b4a166636871088L,0xe5e29f5d1220b1ffL,0x4b82bb35233d9f4dL,
+        0x4e07633318cdc675L } },
+    /* 0 << 70 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 70 */
+    { { 0x0d53f5c7a3e6fcedL,0xe8cbbdd5f45fbdebL,0xf85c01df13339a70L,
+        0x0ff71880142ceb81L },
+      { 0x4c4e8774bd70437aL,0x5fb32891ba0bda6aL,0x1cdbebd2f18bd26eL,
+        0x2f9526f103a9d522L } },
+    /* 2 << 70 */
+    { { 0x40ce305192c4d684L,0x8b04d7257612efcdL,0xb9dcda366f9cae20L,
+        0x0edc4d24f058856cL },
+      { 0x64f2e6bf85427900L,0x3de81295dc09dfeaL,0xd41b4487379bf26cL,
+        0x50b62c6d6df135a9L } },
+    /* 3 << 70 */
+    { { 0xd4f8e3b4c72dfe67L,0xc416b0f690e19fdfL,0x18b9098d4c13bd35L,
+        0xac11118a15b8cb9eL },
+      { 0xf598a318f0062841L,0xbfe0602f89f356f4L,0x7ae3637e30177a0cL,
+        0x3409774761136537L } },
+    /* 4 << 70 */
+    { { 0x0db2fb5ed005832aL,0x5f5efd3b91042e4fL,0x8c4ffdc6ed70f8caL,
+        0xe4645d0bb52da9ccL },
+      { 0x9596f58bc9001d1fL,0x52c8f0bc4e117205L,0xfd4aa0d2e398a084L,
+        0x815bfe3a104f49deL } },
+    /* 5 << 70 */
+    { { 0x97e5443f23885e5fL,0xf72f8f99e8433aabL,0xbd00b154e4d4e604L,
+        0xd0b35e6ae5e173ffL },
+      { 0x57b2a0489164722dL,0x3e3c665b88761ec8L,0x6bdd13973da83832L,
+        0x3c8b1a1e73dafe3bL } },
+    /* 6 << 70 */
+    { { 0x4497ace654317cacL,0xbe600ab9521771b3L,0xb42e409eb0dfe8b8L,
+        0x386a67d73942310fL },
+      { 0x25548d8d4431cc28L,0xa7cff142985dc524L,0x4d60f5a193c4be32L,
+        0x83ebd5c8d071c6e1L } },
+    /* 7 << 70 */
+    { { 0xba3a80a7b1fd2b0bL,0x9b3ad3965bec33e8L,0xb3868d6179743fb3L,
+        0xcfd169fcfdb462faL },
+      { 0xd3b499d79ce0a6afL,0x55dc1cf1e42d3ff8L,0x04fb9e6cc6c3e1b2L,
+        0x47e6961d6f69a474L } },
+    /* 8 << 70 */
+    { { 0x54eb3acce548b37bL,0xb38e754284d40549L,0x8c3daa517b341b4fL,
+        0x2f6928ec690bf7faL },
+      { 0x0496b32386ce6c41L,0x01be1c5510adadcdL,0xc04e67e74bb5faf9L,
+        0x3cbaf678e15c9985L } },
+    /* 9 << 70 */
+    { { 0x8cd1214550ca4247L,0xba1aa47ae7dd30aaL,0x2f81ddf1e58fee24L,
+        0x03452936eec9b0e8L },
+      { 0x8bdc3b81243aea96L,0x9a2919af15c3d0e5L,0x9ea640ec10948361L,
+        0x5ac86d5b6e0bcccfL } },
+    /* 10 << 70 */
+    { { 0xf892d918c36cf440L,0xaed3e837c939719cL,0xb07b08d2c0218b64L,
+        0x6f1bcbbace9790ddL },
+      { 0x4a84d6ed60919b8eL,0xd89007918ac1f9ebL,0xf84941aa0dd5daefL,
+        0xb22fe40a67fd62c5L } },
+    /* 11 << 70 */
+    { { 0x97e15ba2157f2db3L,0xbda2fc8f8e28ca9cL,0x5d050da437b9f454L,
+        0x3d57eb572379d72eL },
+      { 0xe9b5eba2fb5ee997L,0x01648ca2e11538caL,0x32bb76f6f6327974L,
+        0x338f14b8ff3f4bb7L } },
+    /* 12 << 70 */
+    { { 0x524d226ad7ab9a2dL,0x9c00090d7dfae958L,0x0ba5f5398751d8c2L,
+        0x8afcbcdd3ab8262dL },
+      { 0x57392729e99d043bL,0xef51263baebc943aL,0x9feace9320862935L,
+        0x639efc03b06c817bL } },
+    /* 13 << 70 */
+    { { 0x1fe054b366b4be7aL,0x3f25a9de84a37a1eL,0xf39ef1ad78d75cd9L,
+        0xd7b58f495062c1b5L },
+      { 0x6f74f9a9ff563436L,0xf718ff29e8af51e7L,0x5234d31315e97fecL,
+        0xb6a8e2b1292f1c0aL } },
+    /* 14 << 70 */
+    { { 0xa7f53aa8327720c1L,0x956ca322ba092cc8L,0x8f03d64a28746c4dL,
+        0x51fe178266d0d392L },
+      { 0xd19b34db3c832c80L,0x60dccc5c6da2e3b4L,0x245dd62e0a104cccL,
+        0xa7ab1de1620b21fdL } },
+    /* 15 << 70 */
+    { { 0xb293ae0b3893d123L,0xf7b75783b15ee71cL,0x5aa3c61442a9468bL,
+        0xd686123cdb15d744L },
+      { 0x8c616891a7ab4116L,0x6fcd72c8a4e6a459L,0xac21911077e5fad7L,
+        0xfb6a20e7704fa46bL } },
+    /* 16 << 70 */
+    { { 0xe839be7d341d81dcL,0xcddb688932148379L,0xda6211a1f7026eadL,
+        0xf3b2575ff4d1cc5eL },
+      { 0x40cfc8f6a7a73ae6L,0x83879a5e61d5b483L,0xc5acb1ed41a50ebcL,
+        0x59a60cc83c07d8faL } },
+    /* 17 << 70 */
+    { { 0x1b73bdceb1876262L,0x2b0d79f012af4ee9L,0x8bcf3b0bd46e1d07L,
+        0x17d6af9de45d152fL },
+      { 0x735204616d736451L,0x43cbbd9756b0bf5aL,0xb0833a5bd5999b9dL,
+        0x702614f0eb72e398L } },
+    /* 18 << 70 */
+    { { 0x0aadf01a59c3e9f8L,0x40200e77ce6b3d16L,0xda22bdd3deddafadL,
+        0x76dedaf4310d72e1L },
+      { 0x49ef807c4bc2e88fL,0x6ba81291146dd5a5L,0xa1a4077a7d8d59e9L,
+        0x87b6a2e7802db349L } },
+    /* 19 << 70 */
+    { { 0xd56799971b4e598eL,0xf499ef1f06fe4b1dL,0x3978d3aefcb267c5L,
+        0xb582b557235786d0L },
+      { 0x32b3b2ca1715cb07L,0x4c3de6a28480241dL,0x63b5ffedcb571ecdL,
+        0xeaf53900ed2fe9a9L } },
+    /* 20 << 70 */
+    { { 0xdec98d4ac3b81990L,0x1cb837229e0cc8feL,0xfe0b0491d2b427b9L,
+        0x0f2386ace983a66cL },
+      { 0x930c4d1eb3291213L,0xa2f82b2e59a62ae4L,0x77233853f93e89e3L,
+        0x7f8063ac11777c7fL } },
+    /* 21 << 70 */
+    { { 0xff0eb56759ad2877L,0x6f4546429865c754L,0xe6fe701a236e9a84L,
+        0xc586ef1606e40fc3L },
+      { 0x3f62b6e024bafad9L,0xc8b42bd264da906aL,0xc98e1eb4da3276a0L,
+        0x30d0e5fc06cbf852L } },
+    /* 22 << 70 */
+    { { 0x1b6b2ae1e8b4dfd4L,0xd754d5c78301cbacL,0x66097629112a39acL,
+        0xf86b599993ba4ab9L },
+      { 0x26c9dea799f9d581L,0x0473b1a8c2fafeaaL,0x1469af553b2505a5L,
+        0x227d16d7d6a43323L } },
+    /* 23 << 70 */
+    { { 0x3316f73cad3d97f9L,0x52bf3bb51f137455L,0x953eafeb09954e7cL,
+        0xa721dfeddd732411L },
+      { 0xb4929821141d4579L,0x3411321caa3bd435L,0xafb355aa17fa6015L,
+        0xb4e7ef4a18e42f0eL } },
+    /* 24 << 70 */
+    { { 0x604ac97c59371000L,0xe1c48c707f759c18L,0x3f62ecc5a5db6b65L,
+        0x0a78b17338a21495L },
+      { 0x6be1819dbcc8ad94L,0x70dc04f6d89c3400L,0x462557b4a6b4840aL,
+        0x544c6ade60bd21c0L } },
+    /* 25 << 70 */
+    { { 0x6a00f24e907a544bL,0xa7520dcb313da210L,0xfe939b7511e4994bL,
+        0x918b6ba6bc275d70L },
+      { 0xd3e5e0fc644be892L,0x707a9816fdaf6c42L,0x60145567f15c13feL,
+        0x4818ebaae130a54aL } },
+    /* 26 << 70 */
+    { { 0x28aad3ad58d2f767L,0xdc5267fdd7e7c773L,0x4919cc88c3afcc98L,
+        0xaa2e6ab02db8cd4bL },
+      { 0xd46fec04d0c63eaaL,0xa1cb92c519ffa832L,0x678dd178e43a631fL,
+        0xfb5ae1cd3dc788b3L } },
+    /* 27 << 70 */
+    { { 0x68b4fb906e77de04L,0x7992bcf0f06dbb97L,0x896e6a13c417c01dL,
+        0x8d96332cb956be01L },
+      { 0x902fc93a413aa2b9L,0x99a4d915fc98c8a5L,0x52c29407565f1137L,
+        0x4072690f21e4f281L } },
+    /* 28 << 70 */
+    { { 0x36e607cf02ff6072L,0xa47d2ca98ad98cdcL,0xbf471d1ef5f56609L,
+        0xbcf86623f264ada0L },
+      { 0xb70c0687aa9e5cb6L,0xc98124f217401c6cL,0x8189635fd4a61435L,
+        0xd28fb8afa9d98ea6L } },
+    /* 29 << 70 */
+    { { 0xb9a67c2a40c251f8L,0x88cd5d87a2da44beL,0x437deb96e09b5423L,
+        0x150467db64287dc1L },
+      { 0xe161debbcdabb839L,0xa79e9742f1839a3eL,0xbb8dd3c2652d202bL,
+        0x7b3e67f7e9f97d96L } },
+    /* 30 << 70 */
+    { { 0x5aa5d78fb1cb6ac9L,0xffa13e8eca1d0d45L,0x369295dd2ba5bf95L,
+        0xd68bd1f839aff05eL },
+      { 0xaf0d86f926d783f2L,0x543a59b3fc3aafc1L,0x3fcf81d27b7da97cL,
+        0xc990a056d25dee46L } },
+    /* 31 << 70 */
+    { { 0x3e6775b8519cce2cL,0xfc9af71fae13d863L,0x774a4a6f47c1605cL,
+        0x46ba42452fd205e8L },
+      { 0xa06feea4d3fd524dL,0x1e7246416de1acc2L,0xf53816f1334e2b42L,
+        0x49e5918e922f0024L } },
+    /* 32 << 70 */
+    { { 0x439530b665c7322dL,0xcf12cc01b3c1b3fbL,0xc70b01860172f685L,
+        0xb915ee221b58391dL },
+      { 0x9afdf03ba317db24L,0x87dec65917b8ffc4L,0x7f46597be4d3d050L,
+        0x80a1c1ed006500e7L } },
+    /* 33 << 70 */
+    { { 0x84902a9678bf030eL,0xfb5e9c9a50560148L,0x6dae0a9263362426L,
+        0xdcaeecf4a9e30c40L },
+      { 0xc0d887bb518d0c6bL,0x99181152cb985b9dL,0xad186898ef7bc381L,
+        0x18168ffb9ee46201L } },
+    /* 34 << 70 */
+    { { 0x9a04cdaa2502753cL,0xbb279e2651407c41L,0xeacb03aaf23564e5L,
+        0x1833658271e61016L },
+      { 0x8684b8c4eb809877L,0xb336e18dea0e672eL,0xefb601f034ee5867L,
+        0x2733edbe1341cfd1L } },
+    /* 35 << 70 */
+    { { 0xb15e809a26025c3cL,0xe6e981a69350df88L,0x923762378502fd8eL,
+        0x4791f2160c12be9bL },
+      { 0xb725678925f02425L,0xec8631947a974443L,0x7c0ce882fb41cc52L,
+        0xc266ff7ef25c07f2L } },
+    /* 36 << 70 */
+    { { 0x3d4da8c3017025f3L,0xefcf628cfb9579b4L,0x5c4d00161f3716ecL,
+        0x9c27ebc46801116eL },
+      { 0x5eba0ea11da1767eL,0xfe15145247004c57L,0x3ace6df68c2373b7L,
+        0x75c3dffe5dbc37acL } },
+    /* 37 << 70 */
+    { { 0x3dc32a73ddc925fcL,0xb679c8412f65ee0bL,0x715a3295451cbfebL,
+        0xd9889768f76e9a29L },
+      { 0xec20ce7fb28ad247L,0xe99146c400894d79L,0x71457d7c9f5e3ea7L,
+        0x097b266238030031L } },
+    /* 38 << 70 */
+    { { 0xdb7f6ae6cf9f82a8L,0x319decb9438f473aL,0xa63ab386283856c3L,
+        0x13e3172fb06a361bL },
+      { 0x2959f8dc7d5a006cL,0x2dbc27c675fba752L,0xc1227ab287c22c9eL,
+        0x06f61f7571a268b2L } },
+    /* 39 << 70 */
+    { { 0x1b6bb97104779ce2L,0xaca838120aadcb1dL,0x297ae0bcaeaab2d5L,
+        0xa5c14ee75bfb9f13L },
+      { 0xaa00c583f17a62c7L,0x39eb962c173759f6L,0x1eeba1d486c9a88fL,
+        0x0ab6c37adf016c5eL } },
+    /* 40 << 70 */
+    { { 0xa2a147dba28a0749L,0x246c20d6ee519165L,0x5068d1b1d3810715L,
+        0xb1e7018c748160b9L },
+      { 0x03f5b1faf380ff62L,0xef7fb1ddf3cb2c1eL,0xeab539a8fc91a7daL,
+        0x83ddb707f3f9b561L } },
+    /* 41 << 70 */
+    { { 0xc550e211fe7df7a4L,0xa7cd07f2063f6f40L,0xb0de36352976879cL,
+        0xb5f83f85e55741daL },
+      { 0x4ea9d25ef3d8ac3dL,0x6fe2066f62819f02L,0x4ab2b9c2cef4a564L,
+        0x1e155d965ffa2de3L } },
+    /* 42 << 70 */
+    { { 0x0eb0a19bc3a72d00L,0x4037665b8513c31bL,0x2fb2b6bf04c64637L,
+        0x45c34d6e08cdc639L },
+      { 0x56f1e10ff01fd796L,0x4dfb8101fe3667b8L,0xe0eda2539021d0c0L,
+        0x7a94e9ff8a06c6abL } },
+    /* 43 << 70 */
+    { { 0x2d3bb0d9bb9aa882L,0xea20e4e5ec05fd10L,0xed7eeb5f1a1ca64eL,
+        0x2fa6b43cc6327cbdL },
+      { 0xb577e3cf3aa91121L,0x8c6bd5ea3a34079bL,0xd7e5ba3960e02fc0L,
+        0xf16dd2c390141bf8L } },
+    /* 44 << 70 */
+    { { 0xb57276d980101b98L,0x760883fdb82f0f66L,0x89d7de754bc3eff3L,
+        0x03b606435dc2ab40L },
+      { 0xcd6e53dfe05beeacL,0xf2f1e862bc3325cdL,0xdd0f7921774f03c3L,
+        0x97ca72214552cc1bL } },
+    /* 45 << 70 */
+    { { 0x5a0d6afe1cd19f72L,0xa20915dcf183fbebL,0x9fda4b40832c403cL,
+        0x32738eddbe425442L },
+      { 0x469a1df6b5eccf1aL,0x4b5aff4228bbe1f0L,0x31359d7f570dfc93L,
+        0xa18be235f0088628L } },
+    /* 46 << 70 */
+    { { 0xa5b30fbab00ed3a9L,0x34c6137473cdf8beL,0x2c5c5f46abc56797L,
+        0x5cecf93db82a8ae2L },
+      { 0x7d3dbe41a968fbf0L,0xd23d45831a5c7f3dL,0xf28f69a0c087a9c7L,
+        0xc2d75471474471caL } },
+    /* 47 << 70 */
+    { { 0x36ec9f4a4eb732ecL,0x6c943bbdb1ca6bedL,0xd64535e1f2457892L,
+        0x8b84a8eaf7e2ac06L },
+      { 0xe0936cd32499dd5fL,0x12053d7e0ed04e57L,0x4bdd0076e4305d9dL,
+        0x34a527b91f67f0a2L } },
+    /* 48 << 70 */
+    { { 0xe79a4af09cec46eaL,0xb15347a1658b9bc7L,0x6bd2796f35af2f75L,
+        0xac9579904051c435L },
+      { 0x2669dda3c33a655dL,0x5d503c2e88514aa3L,0xdfa113373753dd41L,
+        0x3f0546730b754f78L } },
+    /* 49 << 70 */
+    { { 0xbf185677496125bdL,0xfb0023c83775006cL,0xfa0f072f3a037899L,
+        0x4222b6eb0e4aea57L },
+      { 0x3dde5e767866d25aL,0xb6eb04f84837aa6fL,0x5315591a2cf1cdb8L,
+        0x6dfb4f412d4e683cL } },
+    /* 50 << 70 */
+    { { 0x7e923ea448ee1f3aL,0x9604d9f705a2afd5L,0xbe1d4a3340ea4948L,
+        0x5b45f1f4b44cbd2fL },
+      { 0x5faf83764acc757eL,0xa7cf9ab863d68ff7L,0x8ad62f69df0e404bL,
+        0xd65f33c212bdafdfL } },
+    /* 51 << 70 */
+    { { 0xc365de15a377b14eL,0x6bf5463b8e39f60cL,0x62030d2d2ce68148L,
+        0xd95867efe6f843a8L },
+      { 0xd39a0244ef5ab017L,0x0bd2d8c14ab55d12L,0xc9503db341639169L,
+        0x2d4e25b0f7660c8aL } },
+    /* 52 << 70 */
+    { { 0x760cb3b5e224c5d7L,0xfa3baf8c68616919L,0x9fbca1138d142552L,
+        0x1ab18bf17669ebf5L },
+      { 0x55e6f53e9bdf25ddL,0x04cc0bf3cb6cd154L,0x595bef4995e89080L,
+        0xfe9459a8104a9ac1L } },
+    /* 53 << 70 */
+    { { 0xad2d89cacce9bb32L,0xddea65e1f7de8285L,0x62ed8c35b351bd4bL,
+        0x4150ff360c0e19a7L },
+      { 0x86e3c801345f4e47L,0x3bf21f71203a266cL,0x7ae110d4855b1f13L,
+        0x5d6aaf6a07262517L } },
+    /* 54 << 70 */
+    { { 0x1e0f12e1813d28f1L,0x6000e11d7ad7a523L,0xc7d8deefc744a17bL,
+        0x1e990b4814c05a00L },
+      { 0x68fddaee93e976d5L,0x696241d146610d63L,0xb204e7c3893dda88L,
+        0x8bccfa656a3a6946L } },
+    /* 55 << 70 */
+    { { 0xb59425b4c5cd1411L,0x701b4042ff3658b1L,0xe3e56bca4784cf93L,
+        0x27de5f158fe68d60L },
+      { 0x4ab9cfcef8d53f19L,0xddb10311a40a730dL,0x6fa73cd14eee0a8aL,
+        0xfd5487485249719dL } },
+    /* 56 << 70 */
+    { { 0x49d66316a8123ef0L,0x73c32db4e7f95438L,0x2e2ed2090d9e7854L,
+        0xf98a93299d9f0507L },
+      { 0xc5d33cf60c6aa20aL,0x9a32ba1475279bb2L,0x7e3202cb774a7307L,
+        0x64ed4bc4e8c42dbdL } },
+    /* 57 << 70 */
+    { { 0xc20f1a06d4caed0dL,0xb8021407171d22b3L,0xd426ca04d13268d7L,
+        0x9237700725f4d126L },
+      { 0x4204cbc371f21a85L,0x18461b7af82369baL,0xc0c07d313fc858f9L,
+        0x5deb5a50e2bab569L } },
+    /* 58 << 70 */
+    { { 0xd5959d46d5eea89eL,0xfdff842408437f4bL,0xf21071e43cfe254fL,
+        0x7241769695468321L },
+      { 0x5d8288b9102cae3eL,0x2d143e3df1965dffL,0x00c9a376a078d847L,
+        0x6fc0da3126028731L } },
+    /* 59 << 70 */
+    { { 0xa2baeadfe45083a2L,0x66bc72185e5b4bcdL,0x2c826442d04b8e7fL,
+        0xc19f54516c4b586bL },
+      { 0x60182c495b7eeed5L,0xd9954ecd7aa9dfa1L,0xa403a8ecc73884adL,
+        0x7fb17de29bb39041L } },
+    /* 60 << 70 */
+    { { 0x694b64c5abb020e8L,0x3d18c18419c4eec7L,0x9c4673ef1c4793e5L,
+        0xc7b8aeb5056092e6L },
+      { 0x3aa1ca43f0f8c16bL,0x224ed5ecd679b2f6L,0x0d56eeaf55a205c9L,
+        0xbfe115ba4b8e028bL } },
+    /* 61 << 70 */
+    { { 0x97e608493927f4feL,0xf91fbf94759aa7c5L,0x985af7696be90a51L,
+        0xc1277b7878ccb823L },
+      { 0x395b656ee7a75952L,0x00df7de0928da5f5L,0x09c231754ca4454fL,
+        0x4ec971f47aa2d3c1L } },
+    /* 62 << 70 */
+    { { 0x45c3c507e75d9cccL,0x63b7be8a3dc90306L,0x37e09c665db44bdcL,
+        0x50d60da16841c6a2L },
+      { 0x6f9b65ee08df1b12L,0x387348797ff089dfL,0x9c331a663fe8013dL,
+        0x017f5de95f42fcc8L } },
+    /* 63 << 70 */
+    { { 0x43077866e8e57567L,0xc9f781cef9fcdb18L,0x38131dda9b12e174L,
+        0x25d84aa38a03752aL },
+      { 0x45e09e094d0c0ce2L,0x1564008b92bebba5L,0xf7e8ad31a87284c7L,
+        0xb7c4b46c97e7bbaaL } },
+    /* 64 << 70 */
+    { { 0x3e22a7b397acf4ecL,0x0426c4005ea8b640L,0x5e3295a64e969285L,
+        0x22aabc59a6a45670L },
+      { 0xb929714c5f5942bcL,0x9a6168bdfa3182edL,0x2216a665104152baL,
+        0x46908d03b6926368L } },
+    /* 0 << 77 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 77 */
+    { { 0xa9f5d8745a1251fbL,0x967747a8c72725c7L,0x195c33e531ffe89eL,
+        0x609d210fe964935eL },
+      { 0xcafd6ca82fe12227L,0xaf9b5b960426469dL,0x2e9ee04c5693183cL,
+        0x1084a333c8146fefL } },
+    /* 2 << 77 */
+    { { 0x96649933aed1d1f7L,0x566eaff350563090L,0x345057f0ad2e39cfL,
+        0x148ff65b1f832124L },
+      { 0x042e89d4cf94cf0dL,0x319bec84520c58b3L,0x2a2676265361aa0dL,
+        0xc86fa3028fbc87adL } },
+    /* 3 << 77 */
+    { { 0xfc83d2ab5c8b06d5L,0xb1a785a2fe4eac46L,0xb99315bc846f7779L,
+        0xcf31d816ef9ea505L },
+      { 0x2391fe6a15d7dc85L,0x2f132b04b4016b33L,0x29547fe3181cb4c7L,
+        0xdb66d8a6650155a1L } },
+    /* 4 << 77 */
+    { { 0x6b66d7e1adc1696fL,0x98ebe5930acd72d0L,0x65f24550cc1b7435L,
+        0xce231393b4b9a5ecL },
+      { 0x234a22d4db067df9L,0x98dda095caff9b00L,0x1bbc75a06100c9c1L,
+        0x1560a9c8939cf695L } },
+    /* 5 << 77 */
+    { { 0xcf006d3e99e0925fL,0x2dd74a966322375aL,0xc58b446ab56af5baL,
+        0x50292683e0b9b4f1L },
+      { 0xe2c34cb41aeaffa3L,0x8b17203f9b9587c1L,0x6d559207ead1350cL,
+        0x2b66a215fb7f9604L } },
+    /* 6 << 77 */
+    { { 0x0850325efe51bf74L,0x9c4f579e5e460094L,0x5c87b92a76da2f25L,
+        0x889de4e06febef33L },
+      { 0x6900ec06646083ceL,0xbe2a0335bfe12773L,0xadd1da35c5344110L,
+        0x757568b7b802cd20L } },
+    /* 7 << 77 */
+    { { 0x7555977900f7e6c8L,0x38e8b94f0facd2f0L,0xfea1f3af03fde375L,
+        0x5e11a1d875881dfcL },
+      { 0xb3a6b02ec1e2f2efL,0x193d2bbbc605a6c5L,0x325ffeee339a0b2dL,
+        0x27b6a7249e0c8846L } },
+    /* 8 << 77 */
+    { { 0xe4050f1cf1c367caL,0x9bc85a9bc90fbc7dL,0xa373c4a2e1a11032L,
+        0xb64232b7ad0393a9L },
+      { 0xf5577eb0167dad29L,0x1604f30194b78ab2L,0x0baa94afe829348bL,
+        0x77fbd8dd41654342L } },
+    /* 9 << 77 */
+    { { 0xdab50ea5b964e39aL,0xd4c29e3cd0d3c76eL,0x80dae67c56d11964L,
+        0x7307a8bfe5ffcc2fL },
+      { 0x65bbc1aa91708c3bL,0xa151e62c28bf0eebL,0x6cb533816fa34db7L,
+        0x5139e05ca29403a8L } },
+    /* 10 << 77 */
+    { { 0x6ff651b494a7cd2eL,0x5671ffd10699336cL,0x6f5fd2cc979a896aL,
+        0x11e893a8d8148cefL },
+      { 0x988906a165cf7b10L,0x81b67178c50d8485L,0x7c0deb358a35b3deL,
+        0x423ac855c1d29799L } },
+    /* 11 << 77 */
+    { { 0xaf580d87dac50b74L,0x28b2b89f5869734cL,0x99a3b936874e28fbL,
+        0xbb2c919025f3f73aL },
+      { 0x199f691884a9d5b7L,0x7ebe23257e770374L,0xf442e1070738efe2L,
+        0xcf9f3f56cf9082d2L } },
+    /* 12 << 77 */
+    { { 0x719f69e109618708L,0xcc9e8364c183f9b1L,0xec203a95366a21afL,
+        0x6aec5d6d068b141fL },
+      { 0xee2df78a994f04e9L,0xb39ccae8271245b0L,0xb875a4a997e43f4fL,
+        0x507dfe11db2cea98L } },
+    /* 13 << 77 */
+    { { 0x4fbf81cb489b03e9L,0xdb86ec5b6ec414faL,0xfad444f9f51b3ae5L,
+        0xca7d33d61914e3feL },
+      { 0xa9c32f5c0ae6c4d0L,0xa9ca1d1e73969568L,0x98043c311aa7467eL,
+        0xe832e75ce21b5ac6L } },
+    /* 14 << 77 */
+    { { 0x314b7aea5232123dL,0x08307c8c65ae86dbL,0x06e7165caa4668edL,
+        0xb170458bb4d3ec39L },
+      { 0x4d2e3ec6c19bb986L,0xc5f34846ae0304edL,0x917695a06c9f9722L,
+        0x6c7f73174cab1c0aL } },
+    /* 15 << 77 */
+    { { 0x6295940e9d6d2e8bL,0xd318b8c1549f7c97L,0x2245320497713885L,
+        0x468d834ba8a440feL },
+      { 0xd81fe5b2bfba796eL,0x152364db6d71f116L,0xbb8c7c59b5b66e53L,
+        0x0b12c61b2641a192L } },
+    /* 16 << 77 */
+    { { 0x31f14802fcf0a7fdL,0x42fd07895488b01eL,0x71d78d6d9952b498L,
+        0x8eb572d907ac5201L },
+      { 0xe0a2a44c4d194a88L,0xd2b63fd9ba017e66L,0x78efc6c8f888aefcL,
+        0xb76f6bda4a881a11L } },
+    /* 17 << 77 */
+    { { 0x187f314bb46c2397L,0x004cf5665ded2819L,0xa9ea570438764d34L,
+        0xbba4521778084709L },
+      { 0x064745711171121eL,0xad7b7eb1e7c9b671L,0xdacfbc40730f7507L,
+        0x178cd8c6c7ad7bd1L } },
+    /* 18 << 77 */
+    { { 0xbf0be101b2a67238L,0x3556d367af9c14f2L,0x104b7831a5662075L,
+        0x58ca59bb79d9e60aL },
+      { 0x4bc45392a569a73bL,0x517a52e85698f6c9L,0x85643da5aeadd755L,
+        0x1aed0cd52a581b84L } },
+    /* 19 << 77 */
+    { { 0xb9b4ff8480af1372L,0x244c3113f1ba5d1fL,0x2a5dacbef5f98d31L,
+        0x2c3323e84375bc2aL },
+      { 0x17a3ab4a5594b1ddL,0xa1928bfbceb4797eL,0xe83af245e4886a19L,
+        0x8979d54672b5a74aL } },
+    /* 20 << 77 */
+    { { 0xa0f726bc19f9e967L,0xd9d03152e8fbbf4eL,0xcfd6f51db7707d40L,
+        0x633084d963f6e6e0L },
+      { 0xedcd9cdc55667eafL,0x73b7f92b2e44d56fL,0xfb2e39b64e962b14L,
+        0x7d408f6ef671fcbfL } },
+    /* 21 << 77 */
+    { { 0xcc634ddc164a89bbL,0x74a42bb23ef3bd05L,0x1280dbb2428decbbL,
+        0x6103f6bb402c8596L },
+      { 0xfa2bf581355a5752L,0x562f96a800946674L,0x4e4ca16d6da0223bL,
+        0xfe47819f28d3aa25L } },
+    /* 22 << 77 */
+    { { 0x9eea3075f8dfcf8aL,0xa284f0aa95669825L,0xb3fca250867d3fd8L,
+        0x20757b5f269d691eL },
+      { 0xf2c2402093b8a5deL,0xd3f93359ebc06da6L,0x1178293eb2739c33L,
+        0xd2a3e770bcd686e5L } },
+    /* 23 << 77 */
+    { { 0xa76f49f4cd941534L,0x0d37406be3c71c0eL,0x172d93973b97f7e3L,
+        0xec17e239bd7fd0deL },
+      { 0xe32905516f496ba2L,0x6a69317236ad50e7L,0xc4e539a283e7eff5L,
+        0x752737e718e1b4cfL } },
+    /* 24 << 77 */
+    { { 0xa2f7932c68af43eeL,0x5502468e703d00bdL,0xe5dc978f2fb061f5L,
+        0xc9a1904a28c815adL },
+      { 0xd3af538d470c56a4L,0x159abc5f193d8cedL,0x2a37245f20108ef3L,
+        0xfa17081e223f7178L } },
+    /* 25 << 77 */
+    { { 0x27b0fb2b10c8c0f5L,0x2102c3ea40650547L,0x594564df8ac3bfa7L,
+        0x98102033509dad96L },
+      { 0x6989643ff1d18a13L,0x35eebd91d7fc5af0L,0x078d096afaeaafd8L,
+        0xb7a89341def3de98L } },
+    /* 26 << 77 */
+    { { 0x2a206e8decf2a73aL,0x066a63978e551994L,0x3a6a088ab98d53a2L,
+        0x0ce7c67c2d1124aaL },
+      { 0x48cec671759a113cL,0xe3b373d34f6f67faL,0x5455d479fd36727bL,
+        0xe5a428eea13c0d81L } },
+    /* 27 << 77 */
+    { { 0xb853dbc81c86682bL,0xb78d2727b8d02b2aL,0xaaf69bed8ebc329aL,
+        0xdb6b40b3293b2148L },
+      { 0xe42ea77db8c4961fL,0xb1a12f7c20e5e0abL,0xa0ec527479e8b05eL,
+        0x68027391fab60a80L } },
+    /* 28 << 77 */
+    { { 0x6bfeea5f16b1bd5eL,0xf957e4204de30ad3L,0xcbaf664e6a353b9eL,
+        0x5c87331226d14febL },
+      { 0x4e87f98cb65f57cbL,0xdb60a6215e0cdd41L,0x67c16865a6881440L,
+        0x1093ef1a46ab52aaL } },
+    /* 29 << 77 */
+    { { 0xc095afb53f4ece64L,0x6a6bb02e7604551aL,0x55d44b4e0b26b8cdL,
+        0xe5f9a999f971268aL },
+      { 0xc08ec42511a7de84L,0x83568095fda469ddL,0x737bfba16c6c90a2L,
+        0x1cb9c4a0be229831L } },
+    /* 30 << 77 */
+    { { 0x93bccbbabb2eec64L,0xa0c23b64da03adbeL,0x5f7aa00ae0e86ac4L,
+        0x470b941efc1401e6L },
+      { 0x5ad8d6799df43574L,0x4ccfb8a90f65d810L,0x1bce80e3aa7fbd81L,
+        0x273291ad9508d20aL } },
+    /* 31 << 77 */
+    { { 0xf5c4b46b42a92806L,0x810684eca86ab44aL,0x4591640bca0bc9f8L,
+        0xb5efcdfc5c4b6054L },
+      { 0x16fc89076e9edd12L,0xe29d0b50d4d792f9L,0xa45fd01c9b03116dL,
+        0x85035235c81765a4L } },
+    /* 32 << 77 */
+    { { 0x1fe2a9b2b4b4b67cL,0xc1d10df0e8020604L,0x9d64abfcbc8058d8L,
+        0x8943b9b2712a0fbbL },
+      { 0x90eed9143b3def04L,0x85ab3aa24ce775ffL,0x605fd4ca7bbc9040L,
+        0x8b34a564e2c75dfbL } },
+    /* 33 << 77 */
+    { { 0x41ffc94a10358560L,0x2d8a50729e5c28aaL,0xe915a0fc4cc7eb15L,
+        0xe9efab058f6d0f5dL },
+      { 0xdbab47a9d19e9b91L,0x8cfed7450276154cL,0x154357ae2cfede0dL,
+        0x520630df19f5a4efL } },
+    /* 34 << 77 */
+    { { 0x25759f7ce382360fL,0xb6db05c988bf5857L,0x2917d61d6c58d46cL,
+        0x14f8e491fd20cb7aL },
+      { 0xb68a727a11c20340L,0x0386f86faf7ccbb6L,0x5c8bc6ccfee09a20L,
+        0x7d76ff4abb7eea35L } },
+    /* 35 << 77 */
+    { { 0xa7bdebe7db15be7aL,0x67a08054d89f0302L,0x56bf0ea9c1193364L,
+        0xc824446762837ebeL },
+      { 0x32bd8e8b20d841b8L,0x127a0548dbb8a54fL,0x83dd4ca663b20236L,
+        0x87714718203491faL } },
+    /* 36 << 77 */
+    { { 0x4dabcaaaaa8a5288L,0x91cc0c8aaf23a1c9L,0x34c72c6a3f220e0cL,
+        0xbcc20bdf1232144aL },
+      { 0x6e2f42daa20ede1bL,0xc441f00c74a00515L,0xbf46a5b6734b8c4bL,
+        0x574095037b56c9a4L } },
+    /* 37 << 77 */
+    { { 0x9f735261e4585d45L,0x9231faed6734e642L,0x1158a176be70ee6cL,
+        0x35f1068d7c3501bfL },
+      { 0x6beef900a2d26115L,0x649406f2ef0afee3L,0x3f43a60abc2420a1L,
+        0x509002a7d5aee4acL } },
+    /* 38 << 77 */
+    { { 0xb46836a53ff3571bL,0x24f98b78837927c1L,0x6254256a4533c716L,
+        0xf27abb0bd07ee196L },
+      { 0xd7cf64fc5c6d5bfdL,0x6915c751f0cd7a77L,0xd9f590128798f534L,
+        0x772b0da8f81d8b5fL } },
+    /* 39 << 77 */
+    { { 0x1244260c2e03fa69L,0x36cf0e3a3be1a374L,0x6e7c1633ef06b960L,
+        0xa71a4c55671f90f6L },
+      { 0x7a94125133c673dbL,0xc0bea51073e8c131L,0x61a8a699d4f6c734L,
+        0x25e78c88341ed001L } },
+    /* 40 << 77 */
+    { { 0x5c18acf88e2f7d90L,0xfdbf33d777be32cdL,0x0a085cd7d2eb5ee9L,
+        0x2d702cfbb3201115L },
+      { 0xb6e0ebdb85c88ce8L,0x23a3ce3c1e01d617L,0x3041618e567333acL,
+        0x9dd0fd8f157edb6bL } },
+    /* 41 << 77 */
+    { { 0x27f74702b57872b8L,0x2ef26b4f657d5fe1L,0x95426f0a57cf3d40L,
+        0x847e2ad165a6067aL },
+      { 0xd474d9a009996a74L,0x16a56acd2a26115cL,0x02a615c3d16f4d43L,
+        0xcc3fc965aadb85b7L } },
+    /* 42 << 77 */
+    { { 0x386bda73ce07d1b0L,0xd82910c258ad4178L,0x124f82cfcd2617f4L,
+        0xcc2f5e8def691770L },
+      { 0x82702550b8c30cccL,0x7b856aea1a8e575aL,0xbb822fefb1ab9459L,
+        0x085928bcec24e38eL } },
+    /* 43 << 77 */
+    { { 0x5d0402ecba8f4b4dL,0xc07cd4ba00b4d58bL,0x5d8dffd529227e7aL,
+        0x61d44d0c31bf386fL },
+      { 0xe486dc2b135e6f4dL,0x680962ebe79410efL,0xa61bd343f10088b5L,
+        0x6aa76076e2e28686L } },
+    /* 44 << 77 */
+    { { 0x80463d118fb98871L,0xcb26f5c3bbc76affL,0xd4ab8eddfbe03614L,
+        0xc8eb579bc0cf2deeL },
+      { 0xcc004c15c93bae41L,0x46fbae5d3aeca3b2L,0x671235cf0f1e9ab1L,
+        0xadfba9349ec285c1L } },
+    /* 45 << 77 */
+    { { 0x88ded013f216c980L,0xc8ac4fb8f79e0bc1L,0xa29b89c6fb97a237L,
+        0xb697b7809922d8e7L },
+      { 0x3142c639ddb945b5L,0x447b06c7e094c3a9L,0xcdcb364272266c90L,
+        0x633aad08a9385046L } },
+    /* 46 << 77 */
+    { { 0xa36c936bb57c6477L,0x871f8b64e94dbcc6L,0x28d0fb62a591a67bL,
+        0x9d40e081c1d926f5L },
+      { 0x3111eaf6f2d84b5aL,0x228993f9a565b644L,0x0ccbf5922c83188bL,
+        0xf87b30ab3df3e197L } },
+    /* 47 << 77 */
+    { { 0xb8658b317642bca8L,0x1a032d7f52800f17L,0x051dcae579bf9445L,
+        0xeba6b8ee54a2e253L },
+      { 0x5c8b9cadd4485692L,0x84bda40e8986e9beL,0xd16d16a42f0db448L,
+        0x8ec80050a14d4188L } },
+    /* 48 << 77 */
+    { { 0xb2b2610798fa7aaaL,0x41209ee4f073aa4eL,0xf1570359f2d6b19bL,
+        0xcbe6868cfc577cafL },
+      { 0x186c4bdc32c04dd3L,0xa6c35faecfeee397L,0xb4a1b312f086c0cfL,
+        0xe0a5ccc6d9461fe2L } },
+    /* 49 << 77 */
+    { { 0xc32278aa1536189fL,0x1126c55fba6df571L,0x0f71a602b194560eL,
+        0x8b2d7405324bd6e1L },
+      { 0x8481939e3738be71L,0xb5090b1a1a4d97a9L,0x116c65a3f05ba915L,
+        0x21863ad3aae448aaL } },
+    /* 50 << 77 */
+    { { 0xd24e2679a7aae5d3L,0x7076013d0de5c1c4L,0x2d50f8babb05b629L,
+        0x73c1abe26e66efbbL },
+      { 0xefd4b422f2488af7L,0xe4105d02663ba575L,0x7eb60a8b53a69457L,
+        0x62210008c945973bL } },
+    /* 51 << 77 */
+    { { 0xfb25547877a50ec6L,0xbf0392f70a37a72cL,0xa0a7a19c4be18e7aL,
+        0x90d8ea1625b1e0afL },
+      { 0x7582a293ef953f57L,0x90a64d05bdc5465aL,0xca79c497e2510717L,
+        0x560dbb7c18cb641fL } },
+    /* 52 << 77 */
+    { { 0x1d8e32864b66abfbL,0xd26f52e559030900L,0x1ee3f6435584941aL,
+        0x6d3b3730569f5958L },
+      { 0x9ff2a62f4789dba5L,0x91fcb81572b5c9b7L,0xf446cb7d6c8f9a0eL,
+        0x48f625c139b7ecb5L } },
+    /* 53 << 77 */
+    { { 0xbabae8011c6219b8L,0xe7a562d928ac2f23L,0xe1b4873226e20588L,
+        0x06ee1cad775af051L },
+      { 0xda29ae43faff79f7L,0xc141a412652ee9e0L,0x1e127f6f195f4bd0L,
+        0x29c6ab4f072f34f8L } },
+    /* 54 << 77 */
+    { { 0x7b7c147730448112L,0x82b51af1e4a38656L,0x2bf2028a2f315010L,
+        0xc9a4a01f6ea88cd4L },
+      { 0xf63e95d8257e5818L,0xdd8efa10b4519b16L,0xed8973e00da910bfL,
+        0xed49d0775c0fe4a9L } },
+    /* 55 << 77 */
+    { { 0xac3aac5eb7caee1eL,0x1033898da7f4da57L,0x42145c0e5c6669b9L,
+        0x42daa688c1aa2aa0L },
+      { 0x629cc15c1a1d885aL,0x25572ec0f4b76817L,0x8312e4359c8f8f28L,
+        0x8107f8cd81965490L } },
+    /* 56 << 77 */
+    { { 0x516ff3a36fa6110cL,0x74fb1eb1fb93561fL,0x6c0c90478457522bL,
+        0xcfd321046bb8bdc6L },
+      { 0x2d6884a2cc80ad57L,0x7c27fc3586a9b637L,0x3461baedadf4e8cdL,
+        0x1d56251a617242f0L } },
+    /* 57 << 77 */
+    { { 0x0b80d209c955bef4L,0xdf02cad206adb047L,0xf0d7cb915ec74feeL,
+        0xd25033751111ba44L },
+      { 0x9671755edf53cb36L,0x54dcb6123368551bL,0x66d69aacc8a025a4L,
+        0x6be946c6e77ef445L } },
+    /* 58 << 77 */
+    { { 0x719946d1a995e094L,0x65e848f6e51e04d8L,0xe62f33006a1e3113L,
+        0x1541c7c1501de503L },
+      { 0x4daac9faf4acfadeL,0x0e58589744cd0b71L,0x544fd8690a51cd77L,
+        0x60fc20ed0031016dL } },
+    /* 59 << 77 */
+    { { 0x58b404eca4276867L,0x46f6c3cc34f34993L,0x477ca007c636e5bdL,
+        0x8018f5e57c458b47L },
+      { 0xa1202270e47b668fL,0xcef48ccdee14f203L,0x23f98bae62ff9b4dL,
+        0x55acc035c589edddL } },
+    /* 60 << 77 */
+    { { 0x3fe712af64db4444L,0x19e9d634becdd480L,0xe08bc047a930978aL,
+        0x2dbf24eca1280733L },
+      { 0x3c0ae38c2cd706b2L,0x5b012a5b359017b9L,0x3943c38c72e0f5aeL,
+        0x786167ea57176fa3L } },
+    /* 61 << 77 */
+    { { 0xe5f9897d594881dcL,0x6b5efad8cfb820c1L,0xb2179093d55018deL,
+        0x39ad7d320bac56ceL },
+      { 0xb55122e02cfc0e81L,0x117c4661f6d89daaL,0x362d01e1cb64fa09L,
+        0x6a309b4e3e9c4dddL } },
+    /* 62 << 77 */
+    { { 0xfa979fb7abea49b1L,0xb4b1d27d10e2c6c5L,0xbd61c2c423afde7aL,
+        0xeb6614f89786d358L },
+      { 0x4a5d816b7f6f7459L,0xe431a44f09360e7bL,0x8c27a032c309914cL,
+        0xcea5d68acaede3d8L } },
+    /* 63 << 77 */
+    { { 0x3668f6653a0a3f95L,0x893694167ceba27bL,0x89981fade4728fe9L,
+        0x7102c8a08a093562L },
+      { 0xbb80310e235d21c8L,0x505e55d1befb7f7bL,0xa0a9081112958a67L,
+        0xd67e106a4d851fefL } },
+    /* 64 << 77 */
+    { { 0xb84011a9431dd80eL,0xeb7c7cca73306cd9L,0x20fadd29d1b3b730L,
+        0x83858b5bfe37b3d3L },
+      { 0xbf4cd193b6251d5cL,0x1cca1fd31352d952L,0xc66157a490fbc051L,
+        0x7990a63889b98636L } },
+    /* 0 << 84 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 84 */
+    { { 0xe5aa692a87dec0e1L,0x010ded8df7b39d00L,0x7b1b80c854cfa0b5L,
+        0x66beb876a0f8ea28L },
+      { 0x50d7f5313476cd0eL,0xa63d0e65b08d3949L,0x1a09eea953479fc6L,
+        0x82ae9891f499e742L } },
+    /* 2 << 84 */
+    { { 0xab58b9105ca7d866L,0x582967e23adb3b34L,0x89ae4447cceac0bcL,
+        0x919c667c7bf56af5L },
+      { 0x9aec17b160f5dcd7L,0xec697b9fddcaadbcL,0x0b98f341463467f5L,
+        0xb187f1f7a967132fL } },
+    /* 3 << 84 */
+    { { 0x90fe7a1d214aeb18L,0x1506af3c741432f7L,0xbb5565f9e591a0c4L,
+        0x10d41a77b44f1bc3L },
+      { 0xa09d65e4a84bde96L,0x42f060d8f20a6a1cL,0x652a3bfdf27f9ce7L,
+        0xb6bdb65c3b3d739fL } },
+    /* 4 << 84 */
+    { { 0xeb5ddcb6ec7fae9fL,0x995f2714efb66e5aL,0xdee95d8e69445d52L,
+        0x1b6c2d4609e27620L },
+      { 0x32621c318129d716L,0xb03909f10958c1aaL,0x8c468ef91af4af63L,
+        0x162c429ffba5cdf6L } },
+    /* 5 << 84 */
+    { { 0x2f682343753b9371L,0x29cab45a5f1f9cd7L,0x571623abb245db96L,
+        0xc507db093fd79999L },
+      { 0x4e2ef652af036c32L,0x86f0cc7805018e5cL,0xc10a73d4ab8be350L,
+        0x6519b3977e826327L } },
+    /* 6 << 84 */
+    { { 0xe8cb5eef9c053df7L,0x8de25b37b300ea6fL,0xdb03fa92c849cffbL,
+        0x242e43a7e84169bbL },
+      { 0xe4fa51f4dd6f958eL,0x6925a77ff4445a8dL,0xe6e72a50e90d8949L,
+        0xc66648e32b1f6390L } },
+    /* 7 << 84 */
+    { { 0xb2ab1957173e460cL,0x1bbbce7530704590L,0xc0a90dbddb1c7162L,
+        0x505e399e15cdd65dL },
+      { 0x68434dcb57797ab7L,0x60ad35ba6a2ca8e8L,0x4bfdb1e0de3336c1L,
+        0xbbef99ebd8b39015L } },
+    /* 8 << 84 */
+    { { 0x6c3b96f31711ebecL,0x2da40f1fce98fdc4L,0xb99774d357b4411fL,
+        0x87c8bdf415b65bb6L },
+      { 0xda3a89e3c2eef12dL,0xde95bb9b3c7471f3L,0x600f225bd812c594L,
+        0x54907c5d2b75a56bL } },
+    /* 9 << 84 */
+    { { 0xa93cc5f08db60e35L,0x743e3cd6fa833319L,0x7dad5c41f81683c9L,
+        0x70c1e7d99c34107eL },
+      { 0x0edc4a39a6be0907L,0x36d4703586d0b7d3L,0x8c76da03272bfa60L,
+        0x0b4a07ea0f08a414L } },
+    /* 10 << 84 */
+    { { 0x699e4d2945c1dd53L,0xcadc5898231debb5L,0xdf49fcc7a77f00e0L,
+        0x93057bbfa73e5a0eL },
+      { 0x2f8b7ecd027a4cd1L,0x114734b3c614011aL,0xe7a01db767677c68L,
+        0x89d9be5e7e273f4fL } },
+    /* 11 << 84 */
+    { { 0xd225cb2e089808efL,0xf1f7a27dd59e4107L,0x53afc7618211b9c9L,
+        0x0361bc67e6819159L },
+      { 0x2a865d0b7f071426L,0x6a3c1810e7072567L,0x3e3bca1e0d6bcabdL,
+        0xa1b02bc1408591bcL } },
+    /* 12 << 84 */
+    { { 0xe0deee5931fba239L,0xf47424d398bd91d1L,0x0f8886f4071a3c1dL,
+        0x3f7d41e8a819233bL },
+      { 0x708623c2cf6eb998L,0x86bb49af609a287fL,0x942bb24963c90762L,
+        0x0ef6eea555a9654bL } },
+    /* 13 << 84 */
+    { { 0x5f6d2d7236f5defeL,0xfa9922dc56f99176L,0x6c8c5ecef78ce0c7L,
+        0x7b44589dbe09b55eL },
+      { 0xe11b3bca9ea83770L,0xd7fa2c7f2ab71547L,0x2a3dd6fa2a1ddcc0L,
+        0x09acb4305a7b7707L } },
+    /* 14 << 84 */
+    { { 0x4add4a2e649d4e57L,0xcd53a2b01917526eL,0xc526233020b44ac4L,
+        0x4028746abaa2c31dL },
+      { 0x5131839064291d4cL,0xbf48f151ee5ad909L,0xcce57f597b185681L,
+        0x7c3ac1b04854d442L } },
+    /* 15 << 84 */
+    { { 0x65587dc3c093c171L,0xae7acb2424f42b65L,0x5a338adb955996cbL,
+        0xc8e656756051f91bL },
+      { 0x66711fba28b8d0b1L,0x15d74137b6c10a90L,0x70cdd7eb3a232a80L,
+        0xc9e2f07f6191ed24L } },
+    /* 16 << 84 */
+    { { 0xa80d1db6f79588c0L,0xfa52fc69b55768ccL,0x0b4df1ae7f54438aL,
+        0x0cadd1a7f9b46a4fL },
+      { 0xb40ea6b31803dd6fL,0x488e4fa555eaae35L,0x9f047d55382e4e16L,
+        0xc9b5b7e02f6e0c98L } },
+    /* 17 << 84 */
+    { { 0x6b1bd2d395762649L,0xa9604ee7c7aea3f6L,0x3646ff276dc6f896L,
+        0x9bf0e7f52860bad1L },
+      { 0x2d92c8217cb44b92L,0xa2f5ce63aea9c182L,0xd0a2afb19154a5fdL,
+        0x482e474c95801da6L } },
+    /* 18 << 84 */
+    { { 0xc19972d0b611c24bL,0x1d468e6560a8f351L,0xeb7580697bcf6421L,
+        0xec9dd0ee88fbc491L },
+      { 0x5b59d2bf956c2e32L,0x73dc6864dcddf94eL,0xfd5e2321bcee7665L,
+        0xa7b4f8ef5e9a06c4L } },
+    /* 19 << 84 */
+    { { 0xfba918dd7280f855L,0xbbaac2608baec688L,0xa3b3f00f33400f42L,
+        0x3d2dba2966f2e6e4L },
+      { 0xb6f71a9498509375L,0x8f33031fcea423ccL,0x009b8dd04807e6fbL,
+        0x5163cfe55cdb954cL } },
+    /* 20 << 84 */
+    { { 0x03cc8f17cf41c6e8L,0xf1f03c2a037b925cL,0xc39c19cc66d2427cL,
+        0x823d24ba7b6c18e4L },
+      { 0x32ef9013901f0b4fL,0x684360f1f8941c2eL,0x0ebaff522c28092eL,
+        0x7891e4e3256c932fL } },
+    /* 21 << 84 */
+    { { 0x51264319ac445e3dL,0x553432e78ea74381L,0xe6eeaa6967e9c50aL,
+        0x27ced28462e628c7L },
+      { 0x3f96d3757a4afa57L,0xde0a14c3e484c150L,0x364a24eb38bd9923L,
+        0x1df18da0e5177422L } },
+    /* 22 << 84 */
+    { { 0x174e8f82d8d38a9bL,0x2e97c600e7de1391L,0xc5709850a1c175ddL,
+        0x969041a032ae5035L },
+      { 0xcbfd533b76a2086bL,0xd6bba71bd7c2e8feL,0xb2d58ee6099dfb67L,
+        0x3a8b342d064a85d9L } },
+    /* 23 << 84 */
+    { { 0x3bc07649522f9be3L,0x690c075bdf1f49a8L,0x80e1aee83854ec42L,
+        0x2a7dbf4417689dc7L },
+      { 0xc004fc0e3faf4078L,0xb2f02e9edf11862cL,0xf10a5e0fa0a1b7b3L,
+        0x30aca6238936ec80L } },
+    /* 24 << 84 */
+    { { 0xf83cbf0502f40d9aL,0x4681c4682c318a4dL,0x985756180e9c2674L,
+        0xbe79d0461847092eL },
+      { 0xaf1e480a78bd01e0L,0x6dd359e472a51db9L,0x62ce3821e3afbab6L,
+        0xc5cee5b617733199L } },
+    /* 25 << 84 */
+    { { 0xe08b30d46ffd9fbbL,0x6e5bc69936c610b7L,0xf343cff29ce262cfL,
+        0xca2e4e3568b914c1L },
+      { 0x011d64c016de36c5L,0xe0b10fdd42e2b829L,0x789429816685aaf8L,
+        0xe7511708230ede97L } },
+    /* 26 << 84 */
+    { { 0x671ed8fc3b922bf8L,0xe4d8c0a04c29b133L,0x87eb12393b6e99c4L,
+        0xaff3974c8793bebaL },
+      { 0x037494052c18df9bL,0xc5c3a29391007139L,0x6a77234fe37a0b95L,
+        0x02c29a21b661c96bL } },
+    /* 27 << 84 */
+    { { 0xc3aaf1d6141ecf61L,0x9195509e3bb22f53L,0x2959740422d51357L,
+        0x1b083822537bed60L },
+      { 0xcd7d6e35e07289f0L,0x1f94c48c6dd86effL,0xc8bb1f82eb0f9cfaL,
+        0x9ee0b7e61b2eb97dL } },
+    /* 28 << 84 */
+    { { 0x5a52fe2e34d74e31L,0xa352c3103bf79ab6L,0x97ff6c5aabfeeb8fL,
+        0xbfbe8feff5c97305L },
+      { 0xd6081ce6a7904608L,0x1f812f3ac4fca249L,0x9b24bc9ab9e5e200L,
+        0x91022c6738012ee8L } },
+    /* 29 << 84 */
+    { { 0xe83d9c5d30a713a1L,0x4876e3f084ef0f93L,0xc9777029c1fbf928L,
+        0xef7a6bb3bce7d2a4L },
+      { 0xb8067228dfa2a659L,0xd5cd3398d877a48fL,0xbea4fd8f025d0f3fL,
+        0xd67d2e352eae7c2bL } },
+    /* 30 << 84 */
+    { { 0x184de7d7cc5f4394L,0xb5551b5c4536e142L,0x2e89b212d34aa60aL,
+        0x14a96feaf50051d5L },
+      { 0x4e21ef740d12bb0bL,0xc522f02060b9677eL,0x8b12e4672df7731dL,
+        0x39f803827b326d31L } },
+    /* 31 << 84 */
+    { { 0xdfb8630c39024a94L,0xaacb96a897319452L,0xd68a3961eda3867cL,
+        0x0c58e2b077c4ffcaL },
+      { 0x3d545d634da919faL,0xef79b69af15e2289L,0x54bc3d3d808bab10L,
+        0xc8ab300745f82c37L } },
+    /* 32 << 84 */
+    { { 0xc12738b67c4a658aL,0xb3c4763940e72182L,0x3b77be468798e44fL,
+        0xdc047df217a7f85fL },
+      { 0x2439d4c55e59d92dL,0xcedca475e8e64d8dL,0xa724cd0d87ca9b16L,
+        0x35e4fd59a5540dfeL } },
+    /* 33 << 84 */
+    { { 0xf8c1ff18e4bcf6b1L,0x856d6285295018faL,0x433f665c3263c949L,
+        0xa6a76dd6a1f21409L },
+      { 0x17d32334cc7b4f79L,0xa1d0312206720e4aL,0xadb6661d81d9bed5L,
+        0xf0d6fb0211db15d1L } },
+    /* 34 << 84 */
+    { { 0x7fd11ad51fb747d2L,0xab50f9593033762bL,0x2a7e711bfbefaf5aL,
+        0xc73932783fef2bbfL },
+      { 0xe29fa2440df6f9beL,0x9092757b71efd215L,0xee60e3114f3d6fd9L,
+        0x338542d40acfb78bL } },
+    /* 35 << 84 */
+    { { 0x44a23f0838961a0fL,0x1426eade986987caL,0x36e6ee2e4a863cc6L,
+        0x48059420628b8b79L },
+      { 0x30303ad87396e1deL,0x5c8bdc4838c5aad1L,0x3e40e11f5c8f5066L,
+        0xabd6e7688d246bbdL } },
+    /* 36 << 84 */
+    { { 0x68aa40bb23330a01L,0xd23f5ee4c34eafa0L,0x3bbee3155de02c21L,
+        0x18dd4397d1d8dd06L },
+      { 0x3ba1939a122d7b44L,0xe6d3b40aa33870d6L,0x8e620f701c4fe3f8L,
+        0xf6bba1a5d3a50cbfL } },
+    /* 37 << 84 */
+    { { 0x4a78bde5cfc0aee0L,0x847edc46c08c50bdL,0xbaa2439cad63c9b2L,
+        0xceb4a72810fc2acbL },
+      { 0xa419e40e26da033dL,0x6cc3889d03e02683L,0x1cd28559fdccf725L,
+        0x0fd7e0f18d13d208L } },
+    /* 38 << 84 */
+    { { 0x01b9733b1f0df9d4L,0x8cc2c5f3a2b5e4f3L,0x43053bfa3a304fd4L,
+        0x8e87665c0a9f1aa7L },
+      { 0x087f29ecd73dc965L,0x15ace4553e9023dbL,0x2370e3092bce28b4L,
+        0xf9723442b6b1e84aL } },
+    /* 39 << 84 */
+    { { 0xbeee662eb72d9f26L,0xb19396def0e47109L,0x85b1fa73e13289d0L,
+        0x436cf77e54e58e32L },
+      { 0x0ec833b3e990ef77L,0x7373e3ed1b11fc25L,0xbe0eda870fc332ceL,
+        0xced049708d7ea856L } },
+    /* 40 << 84 */
+    { { 0xf85ff7857e977ca0L,0xb66ee8dadfdd5d2bL,0xf5e37950905af461L,
+        0x587b9090966d487cL },
+      { 0x6a198a1b32ba0127L,0xa7720e07141615acL,0xa23f3499996ef2f2L,
+        0xef5f64b4470bcb3dL } },
+    /* 41 << 84 */
+    { { 0xa526a96292b8c559L,0x0c14aac069740a0fL,0x0d41a9e3a6bdc0a5L,
+        0x97d521069c48aef4L },
+      { 0xcf16bd303e7c253bL,0xcc834b1a47fdedc1L,0x7362c6e5373aab2eL,
+        0x264ed85ec5f590ffL } },
+    /* 42 << 84 */
+    { { 0x7a46d9c066d41870L,0xa50c20b14787ba09L,0x185e7e51e3d44635L,
+        0xb3b3e08031e2d8dcL },
+      { 0xbed1e558a179e9d9L,0x2daa3f7974a76781L,0x4372baf23a40864fL,
+        0x46900c544fe75cb5L } },
+    /* 43 << 84 */
+    { { 0xb95f171ef76765d0L,0x4ad726d295c87502L,0x2ec769da4d7c99bdL,
+        0x5e2ddd19c36cdfa8L },
+      { 0xc22117fca93e6deaL,0xe8a2583b93771123L,0xbe2f6089fa08a3a2L,
+        0x4809d5ed8f0e1112L } },
+    /* 44 << 84 */
+    { { 0x3b414aa3da7a095eL,0x9049acf126f5aaddL,0x78d46a4d6be8b84aL,
+        0xd66b1963b732b9b3L },
+      { 0x5c2ac2a0de6e9555L,0xcf52d098b5bd8770L,0x15a15fa60fd28921L,
+        0x56ccb81e8b27536dL } },
+    /* 45 << 84 */
+    { { 0x0f0d8ab89f4ccbb8L,0xed5f44d2db221729L,0x4314198800bed10cL,
+        0xc94348a41d735b8bL },
+      { 0x79f3e9c429ef8479L,0x4c13a4e3614c693fL,0x32c9af568e143a14L,
+        0xbc517799e29ac5c4L } },
+    /* 46 << 84 */
+    { { 0x05e179922774856fL,0x6e52fb056c1bf55fL,0xaeda4225e4f19e16L,
+        0x70f4728aaf5ccb26L },
+      { 0x5d2118d1b2947f22L,0xc827ea16281d6fb9L,0x8412328d8cf0eabdL,
+        0x45ee9fb203ef9dcfL } },
+    /* 47 << 84 */
+    { { 0x8e700421bb937d63L,0xdf8ff2d5cc4b37a6L,0xa4c0d5b25ced7b68L,
+        0x6537c1efc7308f59L },
+      { 0x25ce6a263b37f8e8L,0x170e9a9bdeebc6ceL,0xdd0379528728d72cL,
+        0x445b0e55850154bcL } },
+    /* 48 << 84 */
+    { { 0x4b7d0e0683a7337bL,0x1e3416d4ffecf249L,0x24840eff66a2b71fL,
+        0xd0d9a50ab37cc26dL },
+      { 0xe21981506fe28ef7L,0x3cc5ef1623324c7fL,0x220f3455769b5263L,
+        0xe2ade2f1a10bf475L } },
+    /* 49 << 84 */
+    { { 0x28cd20fa458d3671L,0x1549722c2dc4847bL,0x6dd01e55591941e3L,
+        0x0e6fbcea27128ccbL },
+      { 0xae1a1e6b3bef0262L,0xfa8c472c8f54e103L,0x7539c0a872c052ecL,
+        0xd7b273695a3490e9L } },
+    /* 50 << 84 */
+    { { 0x143fe1f171684349L,0x36b4722e32e19b97L,0xdc05922790980affL,
+        0x175c9c889e13d674L },
+      { 0xa7de5b226e6bfdb1L,0x5ea5b7b2bedb4b46L,0xd5570191d34a6e44L,
+        0xfcf60d2ea24ff7e6L } },
+    /* 51 << 84 */
+    { { 0x614a392d677819e1L,0x7be74c7eaa5a29e8L,0xab50fece63c85f3fL,
+        0xaca2e2a946cab337L },
+      { 0x7f700388122a6fe3L,0xdb69f703882a04a8L,0x9a77935dcf7aed57L,
+        0xdf16207c8d91c86fL } },
+    /* 52 << 84 */
+    { { 0x2fca49ab63ed9998L,0xa3125c44a77ddf96L,0x05dd8a8624344072L,
+        0xa023dda2fec3fb56L },
+      { 0x421b41fc0c743032L,0x4f2120c15e438639L,0xfb7cae51c83c1b07L,
+        0xb2370caacac2171aL } },
+    /* 53 << 84 */
+    { { 0x2eb2d9626cc820fbL,0x59feee5cb85a44bfL,0x94620fca5b6598f0L,
+        0x6b922cae7e314051L },
+      { 0xff8745ad106bed4eL,0x546e71f5dfa1e9abL,0x935c1e481ec29487L,
+        0x9509216c4d936530L } },
+    /* 54 << 84 */
+    { { 0xc7ca306785c9a2dbL,0xd6ae51526be8606fL,0x09dbcae6e14c651dL,
+        0xc9536e239bc32f96L },
+      { 0xa90535a934521b03L,0xf39c526c878756ffL,0x383172ec8aedf03cL,
+        0x20a8075eefe0c034L } },
+    /* 55 << 84 */
+    { { 0xf22f9c6264026422L,0x8dd1078024b9d076L,0x944c742a3bef2950L,
+        0x55b9502e88a2b00bL },
+      { 0xa59e14b486a09817L,0xa39dd3ac47bb4071L,0x55137f663be0592fL,
+        0x07fcafd4c9e63f5bL } },
+    /* 56 << 84 */
+    { { 0x963652ee346eb226L,0x7dfab085ec2facb7L,0x273bf2b8691add26L,
+        0x30d74540f2b46c44L },
+      { 0x05e8e73ef2c2d065L,0xff9b8a00d42eeac9L,0x2fcbd20597209d22L,
+        0xeb740ffade14ea2cL } },
+    /* 57 << 84 */
+    { { 0xc71ff913a8aef518L,0x7bfc74bbfff4cfa2L,0x1716680cb6b36048L,
+        0x121b2cce9ef79af1L },
+      { 0xbff3c836a01eb3d3L,0x50eb1c6a5f79077bL,0xa48c32d6a004bbcfL,
+        0x47a593167d64f61dL } },
+    /* 58 << 84 */
+    { { 0x6068147f93102016L,0x12c5f65494d12576L,0xefb071a7c9bc6b91L,
+        0x7c2da0c56e23ea95L },
+      { 0xf4fd45b6d4a1dd5dL,0x3e7ad9b69122b13cL,0x342ca118e6f57a48L,
+        0x1c2e94a706f8288fL } },
+    /* 59 << 84 */
+    { { 0x99e68f075a97d231L,0x7c80de974d838758L,0xbce0f5d005872727L,
+        0xbe5d95c219c4d016L },
+      { 0x921d5cb19c2492eeL,0x42192dc1404d6fb3L,0x4c84dcd132f988d3L,
+        0xde26d61fa17b8e85L } },
+    /* 60 << 84 */
+    { { 0xc466dcb6137c7408L,0x9a38d7b636a266daL,0x7ef5cb0683bebf1bL,
+        0xe5cdcbbf0fd014e3L },
+      { 0x30aa376df65965a0L,0x60fe88c2ebb3e95eL,0x33fd0b6166ee6f20L,
+        0x8827dcdb3f41f0a0L } },
+    /* 61 << 84 */
+    { { 0xbf8a9d240c56c690L,0x40265dadddb7641dL,0x522b05bf3a6b662bL,
+        0x466d1dfeb1478c9bL },
+      { 0xaa6169621484469bL,0x0db6054902df8f9fL,0xc37bca023cb8bf51L,
+        0x5effe34621371ce8L } },
+    /* 62 << 84 */
+    { { 0xe8f65264ff112c32L,0x8a9c736d7b971fb2L,0xa4f194707b75080dL,
+        0xfc3f2c5a8839c59bL },
+      { 0x1d6c777e5aeb49c2L,0xf3db034dda1addfeL,0xd76fee5a5535affcL,
+        0x0853ac70b92251fdL } },
+    /* 63 << 84 */
+    { { 0x37e3d5948b2a29d5L,0x28f1f4574de00ddbL,0x8083c1b5f42c328bL,
+        0xd8ef1d8fe493c73bL },
+      { 0x96fb626041dc61bdL,0xf74e8a9d27ee2f8aL,0x7c605a802c946a5dL,
+        0xeed48d653839ccfdL } },
+    /* 64 << 84 */
+    { { 0x9894344f3a29467aL,0xde81e949c51eba6dL,0xdaea066ba5e5c2f2L,
+        0x3fc8a61408c8c7b3L },
+      { 0x7adff88f06d0de9fL,0xbbc11cf53b75ce0aL,0x9fbb7accfbbc87d5L,
+        0xa1458e267badfde2L } },
+    /* 0 << 91 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 91 */
+    { { 0x1cb43668e039c256L,0x5f26fb8b7c17fd5dL,0xeee426af79aa062bL,
+        0x072002d0d78fbf04L },
+      { 0x4c9ca237e84fb7e3L,0xb401d8a10c82133dL,0xaaa525926d7e4181L,
+        0xe943083373dbb152L } },
+    /* 2 << 91 */
+    { { 0xf92dda31be24319aL,0x03f7d28be095a8e7L,0xa52fe84098782185L,
+        0x276ddafe29c24dbcL },
+      { 0x80cd54961d7a64ebL,0xe43608897f1dbe42L,0x2f81a8778438d2d5L,
+        0x7e4d52a885169036L } },
+    /* 3 << 91 */
+    { { 0x19e3d5b11d59715dL,0xc7eaa762d788983eL,0xe5a730b0abf1f248L,
+        0xfbab8084fae3fd83L },
+      { 0x65e50d2153765b2fL,0xbdd4e083fa127f3dL,0x9cf3c074397b1b10L,
+        0x59f8090cb1b59fd3L } },
+    /* 4 << 91 */
+    { { 0x7b15fd9d615faa8fL,0x8fa1eb40968554edL,0x7bb4447e7aa44882L,
+        0x2bb2d0d1029fff32L },
+      { 0x075e2a646caa6d2fL,0x8eb879de22e7351bL,0xbcd5624e9a506c62L,
+        0x218eaef0a87e24dcL } },
+    /* 5 << 91 */
+    { { 0x37e5684744ddfa35L,0x9ccfc5c5dab3f747L,0x9ac1df3f1ee96cf4L,
+        0x0c0571a13b480b8fL },
+      { 0x2fbeb3d54b3a7b3cL,0x35c036695dcdbb99L,0x52a0f5dcb2415b3aL,
+        0xd57759b44413ed9aL } },
+    /* 6 << 91 */
+    { { 0x1fe647d83d30a2c5L,0x0857f77ef78a81dcL,0x11d5a334131a4a9bL,
+        0xc0a94af929d393f5L },
+      { 0xbc3a5c0bdaa6ec1aL,0xba9fe49388d2d7edL,0xbb4335b4bb614797L,
+        0x991c4d6872f83533L } },
+    /* 7 << 91 */
+    { { 0x53258c28d2f01cb3L,0x93d6eaa3d75db0b1L,0x419a2b0de87d0db4L,
+        0xa1e48f03d8fe8493L },
+      { 0xf747faf6c508b23aL,0xf137571a35d53549L,0x9f5e58e2fcf9b838L,
+        0xc7186ceea7fd3cf5L } },
+    /* 8 << 91 */
+    { { 0x77b868cee978a1d3L,0xe3a68b337ab92d04L,0x5102979487a5b862L,
+        0x5f0606c33a61d41dL },
+      { 0x2814be276f9326f1L,0x2f521c14c6fe3c2eL,0x17464d7dacdf7351L,
+        0x10f5f9d3777f7e44L } },
+    /* 9 << 91 */
+    { { 0xce8e616b269fb37dL,0xaaf738047de62de5L,0xaba111754fdd4153L,
+        0x515759ba3770b49bL },
+      { 0x8b09ebf8aa423a61L,0x592245a1cd41fb92L,0x1cba8ec19b4c8936L,
+        0xa87e91e3af36710eL } },
+    /* 10 << 91 */
+    { { 0x1fd84ce43d34a2e3L,0xee3759ceb43b5d61L,0x895bc78c619186c7L,
+        0xf19c3809cbb9725aL },
+      { 0xc0be21aade744b1fL,0xa7d222b060f8056bL,0x74be6157b23efe11L,
+        0x6fab2b4f0cd68253L } },
+    /* 11 << 91 */
+    { { 0xad33ea5f4bf1d725L,0x9c1d8ee24f6c950fL,0x544ee78aa377af06L,
+        0x54f489bb94a113e1L },
+      { 0x8f11d634992fb7e8L,0x0169a7aaa2a44347L,0x1d49d4af95020e00L,
+        0x95945722e08e120bL } },
+    /* 12 << 91 */
+    { { 0xb6e33878a4d32282L,0xe36e029d48020ae7L,0xe05847fb37a9b750L,
+        0xf876812cb29e3819L },
+      { 0x84ad138ed23a17f0L,0x6d7b4480f0b3950eL,0xdfa8aef42fd67ae0L,
+        0x8d3eea2452333af6L } },
+    /* 13 << 91 */
+    { { 0x0d052075b15d5accL,0xc6d9c79fbd815bc4L,0x8dcafd88dfa36cf2L,
+        0x908ccbe238aa9070L },
+      { 0x638722c4ba35afceL,0x5a3da8b0fd6abf0bL,0x2dce252cc9c335c1L,
+        0x84e7f0de65aa799bL } },
+    /* 14 << 91 */
+    { { 0x2101a522b99a72cbL,0x06de6e6787618016L,0x5ff8c7cde6f3653eL,
+        0x0a821ab5c7a6754aL },
+      { 0x7e3fa52b7cb0b5a2L,0xa7fb121cc9048790L,0x1a72502006ce053aL,
+        0xb490a31f04e929b0L } },
+    /* 15 << 91 */
+    { { 0xe17be47d62dd61adL,0x781a961c6be01371L,0x1063bfd3dae3cbbaL,
+        0x356474067f73c9baL },
+      { 0xf50e957b2736a129L,0xa6313702ed13f256L,0x9436ee653a19fcc5L,
+        0xcf2bdb29e7a4c8b6L } },
+    /* 16 << 91 */
+    { { 0xb06b1244c5f95cd8L,0xda8c8af0f4ab95f4L,0x1bae59c2b9e5836dL,
+        0x07d51e7e3acffffcL },
+      { 0x01e15e6ac2ccbcdaL,0x3bc1923f8528c3e0L,0x43324577a49fead4L,
+        0x61a1b8842aa7a711L } },
+    /* 17 << 91 */
+    { { 0xf9a86e08700230efL,0x0af585a1bd19adf8L,0x7645f361f55ad8f2L,
+        0x6e67622346c3614cL },
+      { 0x23cb257c4e774d3fL,0x82a38513ac102d1bL,0x9bcddd887b126aa5L,
+        0xe716998beefd3ee4L } },
+    /* 18 << 91 */
+    { { 0x4239d571fb167583L,0xdd011c78d16c8f8aL,0x271c289569a27519L,
+        0x9ce0a3b7d2d64b6aL },
+      { 0x8c977289d5ec6738L,0xa3b49f9a8840ef6bL,0x808c14c99a453419L,
+        0x5c00295b0cf0a2d5L } },
+    /* 19 << 91 */
+    { { 0x524414fb1d4bcc76L,0xb07691d2459a88f1L,0x77f43263f70d110fL,
+        0x64ada5e0b7abf9f3L },
+      { 0xafd0f94e5b544cf5L,0xb4a13a15fd2713feL,0xb99b7d6e250c74f4L,
+        0x097f2f7320324e45L } },
+    /* 20 << 91 */
+    { { 0x994b37d8affa8208L,0xc3c31b0bdc29aafcL,0x3da746517a3a607fL,
+        0xd8e1b8c1fe6955d6L },
+      { 0x716e1815c8418682L,0x541d487f7dc91d97L,0x48a04669c6996982L,
+        0xf39cab1583a6502eL } },
+    /* 21 << 91 */
+    { { 0x025801a0e68db055L,0xf3569758ba3338d5L,0xb0c8c0aaee2afa84L,
+        0x4f6985d3fb6562d1L },
+      { 0x351f1f15132ed17aL,0x510ed0b4c04365feL,0xa3f98138e5b1f066L,
+        0xbc9d95d632df03dcL } },
+    /* 22 << 91 */
+    { { 0xa83ccf6e19abd09eL,0x0b4097c14ff17edbL,0x58a5c478d64a06ceL,
+        0x2ddcc3fd544a58fdL },
+      { 0xd449503d9e8153b8L,0x3324fd027774179bL,0xaf5d47c8dbd9120cL,
+        0xeb86016234fa94dbL } },
+    /* 23 << 91 */
+    { { 0x5817bdd1972f07f4L,0xe5579e2ed27bbcebL,0x86847a1f5f11e5a6L,
+        0xb39ed2557c3cf048L },
+      { 0xe1076417a2f62e55L,0x6b9ab38f1bcf82a2L,0x4bb7c3197aeb29f9L,
+        0xf6d17da317227a46L } },
+    /* 24 << 91 */
+    { { 0xab53ddbd0f968c00L,0xa03da7ec000c880bL,0x7b2396246a9ad24dL,
+        0x612c040101ec60d0L },
+      { 0x70d10493109f5df1L,0xfbda403080af7550L,0x30b93f95c6b9a9b3L,
+        0x0c74ec71007d9418L } },
+    /* 25 << 91 */
+    { { 0x941755646edb951fL,0x5f4a9d787f22c282L,0xb7870895b38d1196L,
+        0xbc593df3a228ce7cL },
+      { 0xc78c5bd46af3641aL,0x7802200b3d9b3dccL,0x0dc73f328be33304L,
+        0x847ed87d61ffb79aL } },
+    /* 26 << 91 */
+    { { 0xf85c974e6d671192L,0x1e14100ade16f60fL,0x45cb0d5a95c38797L,
+        0x18923bba9b022da4L },
+      { 0xef2be899bbe7e86eL,0x4a1510ee216067bfL,0xd98c815484d5ce3eL,
+        0x1af777f0f92a2b90L } },
+    /* 27 << 91 */
+    { { 0x9fbcb4004ef65724L,0x3e04a4c93c0ca6feL,0xfb3e2cb555002994L,
+        0x1f3a93c55363ecabL },
+      { 0x1fe00efe3923555bL,0x744bedd91e1751eaL,0x3fb2db596ab69357L,
+        0x8dbd7365f5e6618bL } },
+    /* 28 << 91 */
+    { { 0x99d53099df1ea40eL,0xb3f24a0b57d61e64L,0xd088a198596eb812L,
+        0x22c8361b5762940bL },
+      { 0x66f01f97f9c0d95cL,0x884611728e43cdaeL,0x11599a7fb72b15c3L,
+        0x135a7536420d95ccL } },
+    /* 29 << 91 */
+    { { 0x2dcdf0f75f7ae2f6L,0x15fc6e1dd7fa6da2L,0x81ca829ad1d441b6L,
+        0x84c10cf804a106b6L },
+      { 0xa9b26c95a73fbbd0L,0x7f24e0cb4d8f6ee8L,0x48b459371e25a043L,
+        0xf8a74fca036f3dfeL } },
+    /* 30 << 91 */
+    { { 0x1ed46585c9f84296L,0x7fbaa8fb3bc278b0L,0xa8e96cd46c4fcbd0L,
+        0x940a120273b60a5fL },
+      { 0x34aae12055a4aec8L,0x550e9a74dbd742f0L,0x794456d7228c68abL,
+        0x492f8868a4e25ec6L } },
+    /* 31 << 91 */
+    { { 0x682915adb2d8f398L,0xf13b51cc5b84c953L,0xcda90ab85bb917d6L,
+        0x4b6155604ea3dee1L },
+      { 0x578b4e850a52c1c8L,0xeab1a69520b75fc4L,0x60c14f3caa0bb3c6L,
+        0x220f448ab8216094L } },
+    /* 32 << 91 */
+    { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L,
+        0x8589fb9206d54831L },
+      { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L,
+        0xebb0696d02541c4fL } },
+    /* 33 << 91 */
+    { { 0x5a171fdeb9718710L,0x38f1bed8f374a9f5L,0xc8c582e1ba39bdc1L,
+        0xfc457b0a908cc0ceL },
+      { 0x9a187fd4883841e2L,0x8ec25b3938725381L,0x2553ed0596f84395L,
+        0x095c76616f6c6897L } },
+    /* 34 << 91 */
+    { { 0x917ac85c4bdc5610L,0xb2885fe4179eb301L,0x5fc655478b78bdccL,
+        0x4a9fc893e59e4699L },
+      { 0xbb7ff0cd3ce299afL,0x195be9b3adf38b20L,0x6a929c87d38ddb8fL,
+        0x55fcc99cb21a51b9L } },
+    /* 35 << 91 */
+    { { 0x2b695b4c721a4593L,0xed1e9a15768eaac2L,0xfb63d71c7489f914L,
+        0xf98ba31c78118910L },
+      { 0x802913739b128eb4L,0x7801214ed448af4aL,0xdbd2e22b55418dd3L,
+        0xeffb3c0dd3998242L } },
+    /* 36 << 91 */
+    { { 0xdfa6077cc7bf3827L,0xf2165bcb47f8238fL,0xfe37cf688564d554L,
+        0xe5f825c40a81fb98L },
+      { 0x43cc4f67ffed4d6fL,0xbc609578b50a34b0L,0x8aa8fcf95041faf1L,
+        0x5659f053651773b6L } },
+    /* 37 << 91 */
+    { { 0xe87582c36044d63bL,0xa60894090cdb0ca0L,0x8c993e0fbfb2bcf6L,
+        0xfc64a71945985cfcL },
+      { 0x15c4da8083dbedbaL,0x804ae1122be67df7L,0xda4c9658a23defdeL,
+        0x12002ddd5156e0d3L } },
+    /* 38 << 91 */
+    { { 0xe68eae895dd21b96L,0x8b99f28bcf44624dL,0x0ae008081ec8897aL,
+        0xdd0a93036712f76eL },
+      { 0x962375224e233de4L,0x192445b12b36a8a5L,0xabf9ff74023993d9L,
+        0x21f37bf42aad4a8fL } },
+    /* 39 << 91 */
+    { { 0x340a4349f8bd2bbdL,0x1d902cd94868195dL,0x3d27bbf1e5fdb6f1L,
+        0x7a5ab088124f9f1cL },
+      { 0xc466ab06f7a09e03L,0x2f8a197731f2c123L,0xda355dc7041b6657L,
+        0xcb840d128ece2a7cL } },
+    /* 40 << 91 */
+    { { 0xb600ad9f7db32675L,0x78fea13307a06f1bL,0x5d032269b31f6094L,
+        0x07753ef583ec37aaL },
+      { 0x03485aed9c0bea78L,0x41bb3989bc3f4524L,0x09403761697f726dL,
+        0x6109beb3df394820L } },
+    /* 41 << 91 */
+    { { 0x804111ea3b6d1145L,0xb6271ea9a8582654L,0x619615e624e66562L,
+        0xa2554945d7b6ad9cL },
+      { 0xd9c4985e99bfe35fL,0x9770ccc07b51cdf6L,0x7c32701392881832L,
+        0x8777d45f286b26d1L } },
+    /* 42 << 91 */
+    { { 0x9bbeda22d847999dL,0x03aa33b6c3525d32L,0x4b7b96d428a959a1L,
+        0xbb3786e531e5d234L },
+      { 0xaeb5d3ce6961f247L,0x20aa85af02f93d3fL,0x9cd1ad3dd7a7ae4fL,
+        0xbf6688f0781adaa8L } },
+    /* 43 << 91 */
+    { { 0xb1b40e867469ceadL,0x1904c524309fca48L,0x9b7312af4b54bbc7L,
+        0xbe24bf8f593affa2L },
+      { 0xbe5e0790bd98764bL,0xa0f45f17a26e299eL,0x4af0d2c26b8fe4c7L,
+        0xef170db18ae8a3e6L } },
+    /* 44 << 91 */
+    { { 0x0e8d61a029e0ccc1L,0xcd53e87e60ad36caL,0x328c6623c8173822L,
+        0x7ee1767da496be55L },
+      { 0x89f13259648945afL,0x9e45a5fd25c8009cL,0xaf2febd91f61ab8cL,
+        0x43f6bc868a275385L } },
+    /* 45 << 91 */
+    { { 0x87792348f2142e79L,0x17d89259c6e6238aL,0x7536d2f64a839d9bL,
+        0x1f428fce76a1fbdcL },
+      { 0x1c1096010db06dfeL,0xbfc16bc150a3a3ccL,0xf9cbd9ec9b30f41bL,
+        0x5b5da0d600138cceL } },
+    /* 46 << 91 */
+    { { 0xec1d0a4856ef96a7L,0xb47eb848982bf842L,0x66deae32ec3f700dL,
+        0x4e43c42caa1181e0L },
+      { 0xa1d72a31d1a4aa2aL,0x440d4668c004f3ceL,0x0d6a2d3b45fe8a7aL,
+        0x820e52e2fb128365L } },
+    /* 47 << 91 */
+    { { 0x29ac5fcf25e51b09L,0x180cd2bf2023d159L,0xa9892171a1ebf90eL,
+        0xf97c4c877c132181L },
+      { 0x9f1dc724c03dbb7eL,0xae043765018cbbe4L,0xfb0b2a360767d153L,
+        0xa8e2f4d6249cbaebL } },
+    /* 48 << 91 */
+    { { 0x172a5247d95ea168L,0x1758fada2970764aL,0xac803a511d978169L,
+        0x299cfe2ede77e01bL },
+      { 0x652a1e17b0a98927L,0x2e26e1d120014495L,0x7ae0af9f7175b56aL,
+        0xc2e22a80d64b9f95L } },
+    /* 49 << 91 */
+    { { 0x4d0ff9fbd90a060aL,0x496a27dbbaf38085L,0x32305401da776bcfL,
+        0xb8cdcef6725f209eL },
+      { 0x61ba0f37436a0bbaL,0x263fa10876860049L,0x92beb98eda3542cfL,
+        0xa2d4d14ad5849538L } },
+    /* 50 << 91 */
+    { { 0x989b9d6812e9a1bcL,0x61d9075c5f6e3268L,0x352c6aa999ace638L,
+        0xde4e4a55920f43ffL },
+      { 0xe5e4144ad673c017L,0x667417ae6f6e05eaL,0x613416aedcd1bd56L,
+        0x5eb3620186693711L } },
+    /* 51 << 91 */
+    { { 0x2d7bc5043a1aa914L,0x175a129976dc5975L,0xe900e0f23fc8125cL,
+        0x569ef68c11198875L },
+      { 0x9012db6363a113b4L,0xe3bd3f5698835766L,0xa5c94a5276412deaL,
+        0xad9e2a09aa735e5cL } },
+    /* 52 << 91 */
+    { { 0x405a984c508b65e9L,0xbde4a1d16df1a0d1L,0x1a9433a1dfba80daL,
+        0xe9192ff99440ad2eL },
+      { 0x9f6496965099fe92L,0x25ddb65c0b27a54aL,0x178279ddc590da61L,
+        0x5479a999fbde681aL } },
+    /* 53 << 91 */
+    { { 0xd0e84e05013fe162L,0xbe11dc92632d471bL,0xdf0b0c45fc0e089fL,
+        0x04fb15b04c144025L },
+      { 0xa61d5fc213c99927L,0xa033e9e03de2eb35L,0xf8185d5cb8dacbb4L,
+        0x9a88e2658644549dL } },
+    /* 54 << 91 */
+    { { 0xf717af6254671ff6L,0x4bd4241b5fa58603L,0x06fba40be67773c0L,
+        0xc1d933d26a2847e9L },
+      { 0xf4f5acf3689e2c70L,0x92aab0e746bafd31L,0x798d76aa3473f6e5L,
+        0xcc6641db93141934L } },
+    /* 55 << 91 */
+    { { 0xcae27757d31e535eL,0x04cc43b687c2ee11L,0x8d1f96752e029ffaL,
+        0xc2150672e4cc7a2cL },
+      { 0x3b03c1e08d68b013L,0xa9d6816fedf298f3L,0x1bfbb529a2804464L,
+        0x95a52fae5db22125L } },
+    /* 56 << 91 */
+    { { 0x55b321600e1cb64eL,0x004828f67e7fc9feL,0x13394b821bb0fb93L,
+        0xb6293a2d35f1a920L },
+      { 0xde35ef21d145d2d9L,0xbe6225b3bb8fa603L,0x00fc8f6b32cf252dL,
+        0xa28e52e6117cf8c2L } },
+    /* 57 << 91 */
+    { { 0x9d1dc89b4c371e6dL,0xcebe067536ef0f28L,0x5de05d09a4292f81L,
+        0xa8303593353e3083L },
+      { 0xa1715b0a7e37a9bbL,0x8c56f61e2b8faec3L,0x5250743133c9b102L,
+        0x0130cefca44431f0L } },
+    /* 58 << 91 */
+    { { 0x56039fa0bd865cfbL,0x4b03e578bc5f1dd7L,0x40edf2e4babe7224L,
+        0xc752496d3a1988f6L },
+      { 0xd1572d3b564beb6bL,0x0db1d11039a1c608L,0x568d193416f60126L,
+        0x05ae9668f354af33L } },
+    /* 59 << 91 */
+    { { 0x19de6d37c92544f2L,0xcc084353a35837d5L,0xcbb6869c1a514eceL,
+        0xb633e7282e1d1066L },
+      { 0xf15dd69f936c581cL,0x96e7b8ce7439c4f9L,0x5e676f482e448a5bL,
+        0xb2ca7d5bfd916bbbL } },
+    /* 60 << 91 */
+    { { 0xd55a2541f5024025L,0x47bc5769e4c2d937L,0x7d31b92a0362189fL,
+        0x83f3086eef7816f9L },
+      { 0xf9f46d94b587579aL,0xec2d22d830e76c5fL,0x27d57461b000ffcfL,
+        0xbb7e65f9364ffc2cL } },
+    /* 61 << 91 */
+    { { 0x7c7c94776652a220L,0x61618f89d696c981L,0x5021701d89effff3L,
+        0xf2c8ff8e7c314163L },
+      { 0x2da413ad8efb4d3eL,0x937b5adfce176d95L,0x22867d342a67d51cL,
+        0x262b9b1018eb3ac9L } },
+    /* 62 << 91 */
+    { { 0x4e314fe4c43ff28bL,0x764766276a664e7aL,0x3e90e40bb7a565c2L,
+        0x8588993ac1acf831L },
+      { 0xd7b501d68f938829L,0x996627ee3edd7d4cL,0x37d44a6290cd34c7L,
+        0xa8327499f3833e8dL } },
+    /* 63 << 91 */
+    { { 0x2e18917d4bf50353L,0x85dd726b556765fbL,0x54fe65d693d5ab66L,
+        0x3ddbaced915c25feL },
+      { 0xa799d9a412f22e85L,0xe2a248676d06f6bcL,0xf4f1ee5643ca1637L,
+        0xfda2828b61ece30aL } },
+    /* 64 << 91 */
+    { { 0x758c1a3ea2dee7a6L,0xdcde2f3c734b2284L,0xaba445d24eaba6adL,
+        0x35aaf66876cee0a7L },
+      { 0x7e0b04a9e5aa049aL,0xe74083ad91103e84L,0xbeb183ce40afecc3L,
+        0x6b89de9fea043f7aL } },
+    /* 0 << 98 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 98 */
+    { { 0x0e299d23fe67ba66L,0x9145076093cf2f34L,0xf45b5ea997fcf913L,
+        0x5be008438bd7dddaL },
+      { 0x358c3e05d53ff04dL,0xbf7ccdc35de91ef7L,0xad684dbfb69ec1a0L,
+        0x367e7cf2801fd997L } },
+    /* 2 << 98 */
+    { { 0x0ca1f3b7b0dc8595L,0x27de46089f1d9f2eL,0x1af3bf39badd82a7L,
+        0x79356a7965862448L },
+      { 0xc0602345f5f9a052L,0x1a8b0f89139a42f9L,0xb53eee42844d40fcL,
+        0x93b0bfe54e5b6368L } },
+    /* 3 << 98 */
+    { { 0x5434dd02c024789cL,0x90dca9ea41b57bfcL,0x8aa898e2243398dfL,
+        0xf607c834894a94bbL },
+      { 0xbb07be97c2c99b76L,0x6576ba6718c29302L,0x3d79efcce703a88cL,
+        0xf259ced7b6a0d106L } },
+    /* 4 << 98 */
+    { { 0x0f893a5dc8de610bL,0xe8c515fb67e223ceL,0x7774bfa64ead6dc5L,
+        0x89d20f95925c728fL },
+      { 0x7a1e0966098583ceL,0xa2eedb9493f2a7d7L,0x1b2820974c304d4aL,
+        0x0842e3dac077282dL } },
+    /* 5 << 98 */
+    { { 0xe4d972a33b9e2d7bL,0x7cc60b27c48218ffL,0x8fc7083884149d91L,
+        0x5c04346f2f461eccL },
+      { 0xebe9fdf2614650a9L,0x5e35b537c1f666acL,0x645613d188babc83L,
+        0x88cace3ac5e1c93eL } },
+    /* 6 << 98 */
+    { { 0x209ca3753de92e23L,0xccb03cc85fbbb6e3L,0xccb90f03d7b1487eL,
+        0xfa9c2a38c710941fL },
+      { 0x756c38236724ceedL,0x3a902258192d0323L,0xb150e519ea5e038eL,
+        0xdcba2865c7427591L } },
+    /* 7 << 98 */
+    { { 0xe549237f78890732L,0xc443bef953fcb4d9L,0x9884d8a6eb3480d6L,
+        0x8a35b6a13048b186L },
+      { 0xb4e4471665e9a90aL,0x45bf380d653006c0L,0x8f3f820d4fe9ae3bL,
+        0x244a35a0979a3b71L } },
+    /* 8 << 98 */
+    { { 0xa1010e9d74cd06ffL,0x9c17c7dfaca3eeacL,0x74c86cd38063aa2bL,
+        0x8595c4b3734614ffL },
+      { 0xa3de00ca990f62ccL,0xd9bed213ca0c3be5L,0x7886078adf8ce9f5L,
+        0xddb27ce35cd44444L } },
+    /* 9 << 98 */
+    { { 0xed374a6658926dddL,0x138b2d49908015b8L,0x886c6579de1f7ab8L,
+        0x888b9aa0c3020b7aL },
+      { 0xd3ec034e3a96e355L,0xba65b0b8f30fbe9aL,0x064c8e50ff21367aL,
+        0x1f508ea40b04b46eL } },
+    /* 10 << 98 */
+    { { 0x98561a49747c866cL,0xbbb1e5fe0518a062L,0x20ff4e8becdc3608L,
+        0x7f55cded20184027L },
+      { 0x8d73ec95f38c85f0L,0x5b589fdf8bc3b8c3L,0xbe95dd980f12b66fL,
+        0xf5bd1a090e338e01L } },
+    /* 11 << 98 */
+    { { 0x65163ae55e915918L,0x6158d6d986f8a46bL,0x8466b538eeebf99cL,
+        0xca8761f6bca477efL },
+      { 0xaf3449c29ebbc601L,0xef3b0f41e0c3ae2fL,0xaa6c577d5de63752L,
+        0xe916660164682a51L } },
+    /* 12 << 98 */
+    { { 0x5a3097befc15aa1eL,0x40d12548b54b0745L,0x5bad4706519a5f12L,
+        0xed03f717a439dee6L },
+      { 0x0794bb6c4a02c499L,0xf725083dcffe71d2L,0x2cad75190f3adcafL,
+        0x7f68ea1c43729310L } },
+    /* 13 << 98 */
+    { { 0xe747c8c7b7ffd977L,0xec104c3580761a22L,0x8395ebaf5a3ffb83L,
+        0xfb3261f4e4b63db7L },
+      { 0x53544960d883e544L,0x13520d708cc2eeb8L,0x08f6337bd3d65f99L,
+        0x83997db2781cf95bL } },
+    /* 14 << 98 */
+    { { 0xce6ff1060dbd2c01L,0x4f8eea6b1f9ce934L,0x546f7c4b0e993921L,
+        0x6236a3245e753fc7L },
+      { 0x65a41f84a16022e9L,0x0c18d87843d1dbb2L,0x73c556402d4cef9cL,
+        0xa042810870444c74L } },
+    /* 15 << 98 */
+    { { 0x68e4f15e9afdfb3cL,0x49a561435bdfb6dfL,0xa9bc1bd45f823d97L,
+        0xbceb5970ea111c2aL },
+      { 0x366b455fb269bbc4L,0x7cd85e1ee9bc5d62L,0xc743c41c4f18b086L,
+        0xa4b4099095294fb9L } },
+    /* 16 << 98 */
+    { { 0x9c7c581d26ee8382L,0xcf17dcc5359d638eL,0xee8273abb728ae3dL,
+        0x1d112926f821f047L },
+      { 0x1149847750491a74L,0x687fa761fde0dfb9L,0x2c2580227ea435abL,
+        0x6b8bdb9491ce7e3fL } },
+    /* 17 << 98 */
+    { { 0x4c5b5dc93bf834aaL,0x043718194f6c7e4bL,0xc284e00a3736bcadL,
+        0x0d88111821ae8f8dL },
+      { 0xf9cf0f82f48c8e33L,0xa11fd075a1bf40dbL,0xdceab0dedc2733e5L,
+        0xc560a8b58e986bd7L } },
+    /* 18 << 98 */
+    { { 0x48dd1fe23929d097L,0x3885b29092f188f1L,0x0f2ae613da6fcdacL,
+        0x9054303eb662a46cL },
+      { 0xb6871e440738042aL,0x98e6a977bdaf6449L,0xd8bc0650d1c9df1bL,
+        0xef3d645136e098f9L } },
+    /* 19 << 98 */
+    { { 0x03fbae82b6d72d28L,0x77ca9db1f5d84080L,0x8a112cffa58efc1cL,
+        0x518d761cc564cb4aL },
+      { 0x69b5740ef0d1b5ceL,0x717039cce9eb1785L,0x3fe29f9022f53382L,
+        0x8e54ba566bc7c95cL } },
+    /* 20 << 98 */
+    { { 0x9c806d8af7f91d0fL,0x3b61b0f1a82a5728L,0x4640032d94d76754L,
+        0x273eb5de47d834c6L },
+      { 0x2988abf77b4e4d53L,0xb7ce66bfde401777L,0x9fba6b32715071b3L,
+        0x82413c24ad3a1a98L } },
+    /* 21 << 98 */
+    { { 0x5b7fc8c4e0e8ad93L,0xb5679aee5fab868dL,0xb1f9d2fa2b3946f3L,
+        0x458897dc5685b50aL },
+      { 0x1e98c93089d0caf3L,0x39564c5f78642e92L,0x1b77729a0dbdaf18L,
+        0xf9170722579e82e6L } },
+    /* 22 << 98 */
+    { { 0x680c0317e4515fa5L,0xf85cff84fb0c790fL,0xc7a82aab6d2e0765L,
+        0x7446bca935c82b32L },
+      { 0x5de607aa6d63184fL,0x7c1a46a8262803a6L,0xd218313daebe8035L,
+        0x92113ffdc73c51f8L } },
+    /* 23 << 98 */
+    { { 0x4b38e08312e7e46cL,0x69d0a37a56126bd5L,0xfb3f324b73c07e04L,
+        0xa0c22f678fda7267L },
+      { 0x8f2c00514d2c7d8fL,0xbc45ced3cbe2cae5L,0xe1c6cf07a8f0f277L,
+        0xbc3923121eb99a98L } },
+    /* 24 << 98 */
+    { { 0x75537b7e3cc8ac85L,0x8d725f57dd02753bL,0xfd05ff64b737df2fL,
+        0x55fe8712f6d2531dL },
+      { 0x57ce04a96ab6b01cL,0x69a02a897cd93724L,0x4f82ac35cf86699bL,
+        0x8242d3ad9cb4b232L } },
+    /* 25 << 98 */
+    { { 0x713d0f65d62105e5L,0xbb222bfa2d29be61L,0xf2f9a79e6cfbef09L,
+        0xfc24d8d3d5d6782fL },
+      { 0x5db77085d4129967L,0xdb81c3ccdc3c2a43L,0x9d655fc005d8d9a3L,
+        0x3f5d057a54298026L } },
+    /* 26 << 98 */
+    { { 0x1157f56d88c54694L,0xb26baba59b09573eL,0x2cab03b022adffd1L,
+        0x60a412c8dd69f383L },
+      { 0xed76e98b54b25039L,0xd4ee67d3687e714dL,0x877396487b00b594L,
+        0xce419775c9ef709bL } },
+    /* 27 << 98 */
+    { { 0x40f76f851c203a40L,0x30d352d6eafd8f91L,0xaf196d3d95578dd2L,
+        0xea4bb3d777cc3f3dL },
+      { 0x42a5bd03b98e782bL,0xac958c400624920dL,0xb838134cfc56fcc8L,
+        0x86ec4ccf89572e5eL } },
+    /* 28 << 98 */
+    { { 0x69c435269be47be0L,0x323b7dd8cb28fea1L,0xfa5538ba3a6c67e5L,
+        0xef921d701d378e46L },
+      { 0xf92961fc3c4b880eL,0x3f6f914e98940a67L,0xa990eb0afef0ff39L,
+        0xa6c2920ff0eeff9cL } },
+    /* 29 << 98 */
+    { { 0xca80416651b8d9a3L,0x42531bc90ffb0db1L,0x72ce4718aa82e7ceL,
+        0x6e199913df574741L },
+      { 0xd5f1b13dd5d36946L,0x8255dc65f68f0194L,0xdc9df4cd8710d230L,
+        0x3453c20f138c1988L } },
+    /* 30 << 98 */
+    { { 0x9af98dc089a6ef01L,0x4dbcc3f09857df85L,0x348056015c1ad924L,
+        0x40448da5d0493046L },
+      { 0xf629926d4ee343e2L,0x6343f1bd90e8a301L,0xefc9349140815b3fL,
+        0xf882a423de8f66fbL } },
+    /* 31 << 98 */
+    { { 0x3a12d5f4e7db9f57L,0x7dfba38a3c384c27L,0x7a904bfd6fc660b1L,
+        0xeb6c5db32773b21cL },
+      { 0xc350ee661cdfe049L,0x9baac0ce44540f29L,0xbc57b6aba5ec6aadL,
+        0x167ce8c30a7c1baaL } },
+    /* 32 << 98 */
+    { { 0xb23a03a553fb2b56L,0x6ce141e74e057f78L,0x796525c389e490d9L,
+        0x0bc95725a31a7e75L },
+      { 0x1ec567911220fd06L,0x716e3a3c408b0bd6L,0x31cd6bf7e8ebeba9L,
+        0xa7326ca6bee6b670L } },
+    /* 33 << 98 */
+    { { 0x3d9f851ccd090c43L,0x561e8f13f12c3988L,0x50490b6a904b7be4L,
+        0x61690ce10410737bL },
+      { 0x299e9a370f009052L,0x258758f0f026092eL,0x9fa255f3fdfcdc0fL,
+        0xdbc9fb1fc0e1bcd2L } },
+    /* 34 << 98 */
+    { { 0x35f9dd6e24651840L,0xdca45a84a5c59abcL,0x103d396fecca4938L,
+        0x4532da0ab97b3f29L },
+      { 0xc4135ea51999a6bfL,0x3aa9505a5e6bf2eeL,0xf77cef063f5be093L,
+        0x97d1a0f8a943152eL } },
+    /* 35 << 98 */
+    { { 0x2cb0ebba2e1c21ddL,0xf41b29fc2c6797c4L,0xc6e17321b300101fL,
+        0x4422b0e9d0d79a89L },
+      { 0x49e4901c92f1bfc4L,0x06ab1f8fe1e10ed9L,0x84d35577db2926b8L,
+        0xca349d39356e8ec2L } },
+    /* 36 << 98 */
+    { { 0x70b63d32343bf1a9L,0x8fd3bd2837d1a6b1L,0x0454879c316865b4L,
+        0xee959ff6c458efa2L },
+      { 0x0461dcf89706dc3fL,0x737db0e2164e4b2eL,0x092626802f8843c8L,
+        0x54498bbc7745e6f6L } },
+    /* 37 << 98 */
+    { { 0x359473faa29e24afL,0xfcc3c45470aa87a1L,0xfd2c4bf500573aceL,
+        0xb65b514e28dd1965L },
+      { 0xe46ae7cf2193e393L,0x60e9a4e1f5444d97L,0xe7594e9600ff38edL,
+        0x43d84d2f0a0e0f02L } },
+    /* 38 << 98 */
+    { { 0x8b6db141ee398a21L,0xb88a56aee3bcc5beL,0x0a1aa52f373460eaL,
+        0x20da1a56160bb19bL },
+      { 0xfb54999d65bf0384L,0x71a14d245d5a180eL,0xbc44db7b21737b04L,
+        0xd84fcb1801dd8e92L } },
+    /* 39 << 98 */
+    { { 0x80de937bfa44b479L,0x535054995c98fd4fL,0x1edb12ab28f08727L,
+        0x4c58b582a5f3ef53L },
+      { 0xbfb236d88327f246L,0xc3a3bfaa4d7df320L,0xecd96c59b96024f2L,
+        0xfc293a537f4e0433L } },
+    /* 40 << 98 */
+    { { 0x5341352b5acf6e10L,0xc50343fdafe652c3L,0x4af3792d18577a7fL,
+        0xe1a4c617af16823dL },
+      { 0x9b26d0cd33425d0aL,0x306399ed9b7bc47fL,0x2a792f33706bb20bL,
+        0x3121961498111055L } },
+    /* 41 << 98 */
+    { { 0x864ec06487f5d28bL,0x11392d91962277fdL,0xb5aa7942bb6aed5fL,
+        0x080094dc47e799d9L },
+      { 0x4afa588c208ba19bL,0xd3e7570f8512f284L,0xcbae64e602f5799aL,
+        0xdeebe7ef514b9492L } },
+    /* 42 << 98 */
+    { { 0x30300f98e5c298ffL,0x17f561be3678361fL,0xf52ff31298cb9a16L,
+        0x6233c3bc5562d490L },
+      { 0x7bfa15a192e3a2cbL,0x961bcfd1e6365119L,0x3bdd29bf2c8c53b1L,
+        0x739704df822844baL } },
+    /* 43 << 98 */
+    { { 0x7dacfb587e7b754bL,0x23360791a806c9b9L,0xe7eb88c923504452L,
+        0x2983e996852c1783L },
+      { 0xdd4ae529958d881dL,0x026bae03262c7b3cL,0x3a6f9193960b52d1L,
+        0xd0980f9092696cfbL } },
+    /* 44 << 98 */
+    { { 0x4c1f428cd5f30851L,0x94dfed272a4f6630L,0x4df53772fc5d48a4L,
+        0xdd2d5a2f933260ceL },
+      { 0x574115bdd44cc7a5L,0x4ba6b20dbd12533aL,0x30e93cb8243057c9L,
+        0x794c486a14de320eL } },
+    /* 45 << 98 */
+    { { 0xe925d4cef21496e4L,0xf951d198ec696331L,0x9810e2de3e8d812fL,
+        0xd0a47259389294abL },
+      { 0x513ba2b50e3bab66L,0x462caff5abad306fL,0xe2dc6d59af04c49eL,
+        0x1aeb8750e0b84b0bL } },
+    /* 46 << 98 */
+    { { 0xc034f12f2f7d0ca2L,0x6d2e8128e06acf2fL,0x801f4f8321facc2fL,
+        0xa1170c03f40ef607L },
+      { 0xfe0a1d4f7805a99cL,0xbde56a36cc26aba5L,0x5b1629d035531f40L,
+        0xac212c2b9afa6108L } },
+    /* 47 << 98 */
+    { { 0x30a06bf315697be5L,0x6f0545dc2c63c7c1L,0x5d8cb8427ccdadafL,
+        0xd52e379bac7015bbL },
+      { 0xc4f56147f462c23eL,0xd44a429846bc24b0L,0xbc73d23ae2856d4fL,
+        0x61cedd8c0832bcdfL } },
+    /* 48 << 98 */
+    { { 0x6095355699f241d7L,0xee4adbd7001a349dL,0x0b35bf6aaa89e491L,
+        0x7f0076f4136f7546L },
+      { 0xd19a18ba9264da3dL,0x6eb2d2cd62a7a28bL,0xcdba941f8761c971L,
+        0x1550518ba3be4a5dL } },
+    /* 49 << 98 */
+    { { 0xd0e8e2f057d0b70cL,0xeea8612ecd133ba3L,0x814670f044416aecL,
+        0x424db6c330775061L },
+      { 0xd96039d116213fd1L,0xc61e7fa518a3478fL,0xa805bdcccb0c5021L,
+        0xbdd6f3a80cc616ddL } },
+    /* 50 << 98 */
+    { { 0x060096675d97f7e2L,0x31db0fc1af0bf4b6L,0x23680ed45491627aL,
+        0xb99a3c667d741fb1L },
+      { 0xe9bb5f5536b1ff92L,0x29738577512b388dL,0xdb8a2ce750fcf263L,
+        0x385346d46c4f7b47L } },
+    /* 51 << 98 */
+    { { 0xbe86c5ef31631f9eL,0xbf91da2103a57a29L,0xc3b1f7967b23f821L,
+        0x0f7d00d2770db354L },
+      { 0x8ffc6c3bd8fe79daL,0xcc5e8c40d525c996L,0x4640991dcfff632aL,
+        0x64d97e8c67112528L } },
+    /* 52 << 98 */
+    { { 0xc232d97302f1cd1eL,0xce87eacb1dd212a4L,0x6e4c8c73e69802f7L,
+        0x12ef02901fffddbdL },
+      { 0x941ec74e1bcea6e2L,0xd0b540243cb92cbbL,0x809fb9d47e8f9d05L,
+        0x3bf16159f2992aaeL } },
+    /* 53 << 98 */
+    { { 0xad40f279f8a7a838L,0x11aea63105615660L,0xbf52e6f1a01f6fa1L,
+        0xef0469953dc2aec9L },
+      { 0x785dbec9d8080711L,0xe1aec60a9fdedf76L,0xece797b5fa21c126L,
+        0xc66e898f05e52732L } },
+    /* 54 << 98 */
+    { { 0x39bb69c408811fdbL,0x8bfe1ef82fc7f082L,0xc8e7a393174f4138L,
+        0xfba8ad1dd58d1f98L },
+      { 0xbc21d0cebfd2fd5bL,0x0b839a826ee60d61L,0xaacf7658afd22253L,
+        0xb526bed8aae396b3L } },
+    /* 55 << 98 */
+    { { 0xccc1bbc238564464L,0x9e3ff9478c45bc73L,0xcde9bca358188a78L,
+        0x138b8ee0d73bf8f7L },
+      { 0x5c7e234c4123c489L,0x66e69368fa643297L,0x0629eeee39a15fa3L,
+        0x95fab881a9e2a927L } },
+    /* 56 << 98 */
+    { { 0xb2497007eafbb1e1L,0xd75c9ce6e75b7a93L,0x3558352defb68d78L,
+        0xa2f26699223f6396L },
+      { 0xeb911ecfe469b17aL,0x62545779e72d3ec2L,0x8ea47de782cb113fL,
+        0xebe4b0864e1fa98dL } },
+    /* 57 << 98 */
+    { { 0xec2d5ed78cdfedb1L,0xa535c077fe211a74L,0x9678109b11d244c5L,
+        0xf17c8bfbbe299a76L },
+      { 0xb651412efb11fbc4L,0xea0b548294ab3f65L,0xd8dffd950cf78243L,
+        0x2e719e57ce0361d4L } },
+    /* 58 << 98 */
+    { { 0x9007f085304ddc5bL,0x095e8c6d4daba2eaL,0x5a33cdb43f9d28a9L,
+        0x85b95cd8e2283003L },
+      { 0xbcd6c819b9744733L,0x29c5f538fc7f5783L,0x6c49b2fad59038e4L,
+        0x68349cc13bbe1018L } },
+    /* 59 << 98 */
+    { { 0xcc490c1d21830ee5L,0x36f9c4eee9bfa297L,0x58fd729448de1a94L,
+        0xaadb13a84e8f2cdcL },
+      { 0x515eaaa081313dbaL,0xc76bb468c2152dd8L,0x357f8d75a653dbf8L,
+        0xe4d8c4d1b14ac143L } },
+    /* 60 << 98 */
+    { { 0xbdb8e675b055cb40L,0x898f8e7b977b5167L,0xecc65651b82fb863L,
+        0x565448146d88f01fL },
+      { 0xb0928e95263a75a9L,0xcfb6836f1a22fcdaL,0x651d14db3f3bd37cL,
+        0x1d3837fbb6ad4664L } },
+    /* 61 << 98 */
+    { { 0x7c5fb538ff4f94abL,0x7243c7126d7fb8f2L,0xef13d60ca85c5287L,
+        0x18cfb7c74bb8dd1bL },
+      { 0x82f9bfe672908219L,0x35c4592b9d5144abL,0x52734f379cf4b42fL,
+        0x6bac55e78c60ddc4L } },
+    /* 62 << 98 */
+    { { 0xb5cd811e94dea0f6L,0x259ecae4e18cc1a3L,0x6a0e836e15e660f8L,
+        0x6c639ea60e02bff2L },
+      { 0x8721b8cb7e1026fdL,0x9e73b50b63261942L,0xb8c7097477f01da3L,
+        0x1839e6a68268f57fL } },
+    /* 63 << 98 */
+    { { 0x571b94155150b805L,0x1892389ef92c7097L,0x8d69c18e4a084b95L,
+        0x7014c512be5b495cL },
+      { 0x4780db361b07523cL,0x2f6219ce2c1c64faL,0xc38b81b0602c105aL,
+        0xab4f4f205dc8e360L } },
+    /* 64 << 98 */
+    { { 0x20d3c982cf7d62d2L,0x1f36e29d23ba8150L,0x48ae0bf092763f9eL,
+        0x7a527e6b1d3a7007L },
+      { 0xb4a89097581a85e3L,0x1f1a520fdc158be5L,0xf98db37d167d726eL,
+        0x8802786e1113e862L } },
+    /* 0 << 105 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 105 */
+    { { 0xefb2149e36f09ab0L,0x03f163ca4a10bb5bL,0xd029704506e20998L,
+        0x56f0af001b5a3babL },
+      { 0x7af4cfec70880e0dL,0x7332a66fbe3d913fL,0x32e6c84a7eceb4bdL,
+        0xedc4a79a9c228f55L } },
+    /* 2 << 105 */
+    { { 0xc37c7dd0c55c4496L,0xa6a9635725bbabd2L,0x5b7e63f2add7f363L,
+        0x9dce37822e73f1dfL },
+      { 0xe1e5a16ab2b91f71L,0xe44898235ba0163cL,0xf2759c32f6e515adL,
+        0xa5e2f1f88615eecfL } },
+    /* 3 << 105 */
+    { { 0x74519be7abded551L,0x03d358b8c8b74410L,0x4d00b10b0e10d9a9L,
+        0x6392b0b128da52b7L },
+      { 0x6744a2980b75c904L,0xc305b0aea8f7f96cL,0x042e421d182cf932L,
+        0xf6fc5d509e4636caL } },
+    /* 4 << 105 */
+    { { 0x795847c9d64cc78cL,0x6c50621b9b6cb27bL,0x07099bf8df8022abL,
+        0x48f862ebc04eda1dL },
+      { 0xd12732ede1603c16L,0x19a80e0f5c9a9450L,0xe2257f54b429b4fcL,
+        0x66d3b2c645460515L } },
+    /* 5 << 105 */
+    { { 0x6ca4f87e822e37beL,0x73f237b4253bda4eL,0xf747f3a241190aebL,
+        0xf06fa36f804cf284L },
+      { 0x0a6bbb6efc621c12L,0x5d624b6440b80ec6L,0x4b0724257ba556f3L,
+        0x7fa0c3543e2d20a8L } },
+    /* 6 << 105 */
+    { { 0xe921fa31e3229d41L,0xa929c65294531bd4L,0x84156027a6d38209L,
+        0xf3d69f736bdb97bdL },
+      { 0x8906d19a16833631L,0x68a34c2e03d51be3L,0xcb59583b0e511cd8L,
+        0x99ce6bfdfdc132a8L } },
+    /* 7 << 105 */
+    { { 0x3facdaaaffcdb463L,0x658bbc1a34a38b08L,0x12a801f8f1a9078dL,
+        0x1567bcf96ab855deL },
+      { 0xe08498e03572359bL,0xcf0353e58659e68bL,0xbb86e9c87d23807cL,
+        0xbc08728d2198e8a2L } },
+    /* 8 << 105 */
+    { { 0x8de2b7bc453cadd6L,0x203900a7bc0bc1f8L,0xbcd86e47a6abd3afL,
+        0x911cac128502effbL },
+      { 0x2d550242ec965469L,0x0e9f769229e0017eL,0x633f078f65979885L,
+        0xfb87d4494cf751efL } },
+    /* 9 << 105 */
+    { { 0xe1790e4bfc25419aL,0x364672034bff3cfdL,0xc8db638625b6e83fL,
+        0x6cc69f236cad6fd2L },
+      { 0x0219e45a6bc68bb9L,0xe43d79b6297f7334L,0x7d445368465dc97cL,
+        0x4b9eea322a0b949aL } },
+    /* 10 << 105 */
+    { { 0x1b96c6ba6102d021L,0xeaafac782f4461eaL,0xd4b85c41c49f19a8L,
+        0x275c28e4cf538875L },
+      { 0x35451a9ddd2e54e0L,0x6991adb50605618bL,0x5b8b4bcd7b36cd24L,
+        0x372a4f8c56f37216L } },
+    /* 11 << 105 */
+    { { 0xc890bd73a6a5da60L,0x6f083da0dc4c9ff0L,0xf4e14d94f0536e57L,
+        0xf9ee1edaaaec8243L },
+      { 0x571241ec8bdcf8e7L,0xa5db82710b041e26L,0x9a0b9a99e3fff040L,
+        0xcaaf21dd7c271202L } },
+    /* 12 << 105 */
+    { { 0xb4e2b2e14f0dd2e8L,0xe77e7c4f0a377ac7L,0x69202c3f0d7a2198L,
+        0xf759b7ff28200eb8L },
+      { 0xc87526eddcfe314eL,0xeb84c52453d5cf99L,0xb1b52ace515138b6L,
+        0x5aa7ff8c23fca3f4L } },
+    /* 13 << 105 */
+    { { 0xff0b13c3b9791a26L,0x960022dacdd58b16L,0xdbd55c9257aad2deL,
+        0x3baaaaa3f30fe619L },
+      { 0x9a4b23460d881efdL,0x506416c046325e2aL,0x91381e76035c18d4L,
+        0xb3bb68bef27817b0L } },
+    /* 14 << 105 */
+    { { 0x15bfb8bf5116f937L,0x7c64a586c1268943L,0x71e25cc38419a2c8L,
+        0x9fd6b0c48335f463L },
+      { 0x4bf0ba3ce8ee0e0eL,0x6f6fba60298c21faL,0x57d57b39ae66bee0L,
+        0x292d513022672544L } },
+    /* 15 << 105 */
+    { { 0xf451105dbab093b3L,0x012f59b902839986L,0x8a9158023474a89cL,
+        0x048c919c2de03e97L },
+      { 0xc476a2b591071cd5L,0x791ed89a034970a5L,0x89bd9042e1b7994bL,
+        0x8eaf5179a1057ffdL } },
+    /* 16 << 105 */
+    { { 0x6066e2a2d551ee10L,0x87a8f1d8727e09a6L,0x00d08bab2c01148dL,
+        0x6da8e4f1424f33feL },
+      { 0x466d17f0cf9a4e71L,0xff5020103bf5cb19L,0xdccf97d8d062ecc0L,
+        0x80c0d9af81d80ac4L } },
+    /* 17 << 105 */
+    { { 0xe87771d8033f2876L,0xb0186ec67d5cc3dbL,0x58e8bb803bc9bc1dL,
+        0x4d1395cc6f6ef60eL },
+      { 0xa73c62d6186244a0L,0x918e5f23110a5b53L,0xed4878ca741b7eabL,
+        0x3038d71adbe03e51L } },
+    /* 18 << 105 */
+    { { 0x840204b7a93c3246L,0x21ab6069a0b9b4cdL,0xf5fa6e2bb1d64218L,
+        0x1de6ad0ef3d56191L },
+      { 0x570aaa88ff1929c7L,0xc6df4c6b640e87b5L,0xde8a74f2c65f0cccL,
+        0x8b972fd5e6f6cc01L } },
+    /* 19 << 105 */
+    { { 0x3fff36b60b846531L,0xba7e45e610a5e475L,0x84a1d10e4145b6c5L,
+        0xf1f7f91a5e046d9dL },
+      { 0x0317a69244de90d7L,0x951a1d4af199c15eL,0x91f78046c9d73debL,
+        0x74c82828fab8224fL } },
+    /* 20 << 105 */
+    { { 0xaa6778fce7560b90L,0xb4073e61a7e824ceL,0xff0d693cd642eba8L,
+        0x7ce2e57a5dccef38L },
+      { 0x89c2c7891df1ad46L,0x83a06922098346fdL,0x2d715d72da2fc177L,
+        0x7b6dd71d85b6cf1dL } },
+    /* 21 << 105 */
+    { { 0xc60a6d0a73fa9cb0L,0xedd3992e328bf5a9L,0xc380ddd0832c8c82L,
+        0xd182d410a2a0bf50L },
+      { 0x7d9d7438d9a528dbL,0xe8b1a0e9caf53994L,0xddd6e5fe0e19987cL,
+        0xacb8df03190b059dL } },
+    /* 22 << 105 */
+    { { 0x53703a328300129fL,0x1f63766268c43bfdL,0xbcbd191300e54051L,
+        0x812fcc627bf5a8c5L },
+      { 0x3f969d5f29fb85daL,0x72f4e00a694759e8L,0x426b6e52790726b7L,
+        0x617bbc873bdbb209L } },
+    /* 23 << 105 */
+    { { 0x511f8bb997aee317L,0x812a4096e81536a8L,0x137dfe593ac09b9bL,
+        0x0682238fba8c9a7aL },
+      { 0x7072ead6aeccb4bdL,0x6a34e9aa692ba633L,0xc82eaec26fff9d33L,
+        0xfb7535121d4d2b62L } },
+    /* 24 << 105 */
+    { { 0x1a0445ff1d7aadabL,0x65d38260d5f6a67cL,0x6e62fb0891cfb26fL,
+        0xef1e0fa55c7d91d6L },
+      { 0x47e7c7ba33db72cdL,0x017cbc09fa7c74b2L,0x3c931590f50a503cL,
+        0xcac54f60616baa42L } },
+    /* 25 << 105 */
+    { { 0x9b6cd380b2369f0fL,0x97d3a70d23c76151L,0x5f9dd6fc9862a9c6L,
+        0x044c4ab212312f51L },
+      { 0x035ea0fd834a2ddcL,0x49e6b862cc7b826dL,0xb03d688362fce490L,
+        0x62f2497ab37e36e9L } },
+    /* 26 << 105 */
+    { { 0x04b005b6c6458293L,0x36bb5276e8d10af7L,0xacf2dc138ee617b8L,
+        0x470d2d35b004b3d4L },
+      { 0x06790832feeb1b77L,0x2bb75c3985657f9cL,0xd70bd4edc0f60004L,
+        0xfe797ecc219b018bL } },
+    /* 27 << 105 */
+    { { 0x9b5bec2a753aebccL,0xdaf9f3dcc939eca5L,0xd6bc6833d095ad09L,
+        0x98abdd51daa4d2fcL },
+      { 0xd9840a318d168be5L,0xcf7c10e02325a23cL,0xa5c02aa07e6ecfafL,
+        0x2462e7e6b5bfdf18L } },
+    /* 28 << 105 */
+    { { 0xab2d8a8ba0cc3f12L,0x68dd485dbc672a29L,0x72039752596f2cd3L,
+        0x5d3eea67a0cf3d8dL },
+      { 0x810a1a81e6602671L,0x8f144a4014026c0cL,0xbc753a6d76b50f85L,
+        0xc4dc21e8645cd4a4L } },
+    /* 29 << 105 */
+    { { 0xc5262dea521d0378L,0x802b8e0e05011c6fL,0x1ba19cbb0b4c19eaL,
+        0x21db64b5ebf0aaecL },
+      { 0x1f394ee970342f9dL,0x93a10aee1bc44a14L,0xa7eed31b3efd0baaL,
+        0x6e7c824e1d154e65L } },
+    /* 30 << 105 */
+    { { 0xee23fa819966e7eeL,0x64ec4aa805b7920dL,0x2d44462d2d90aad4L,
+        0xf44dd195df277ad5L },
+      { 0x8d6471f1bb46b6a1L,0x1e65d313fd885090L,0x33a800f513a977b4L,
+        0xaca9d7210797e1efL } },
+    /* 31 << 105 */
+    { { 0x9a5a85a0fcff6a17L,0x9970a3f31eca7ceeL,0xbb9f0d6bc9504be3L,
+        0xe0c504beadd24ee2L },
+      { 0x7e09d95677fcc2f4L,0xef1a522765bb5fc4L,0x145d4fb18b9286aaL,
+        0x66fd0c5d6649028bL } },
+    /* 32 << 105 */
+    { { 0x98857ceb1bf4581cL,0xe635e186aca7b166L,0x278ddd22659722acL,
+        0xa0903c4c1db68007L },
+      { 0x366e458948f21402L,0x31b49c14b96abda2L,0x329c4b09e0403190L,
+        0x97197ca3d29f43feL } },
+    /* 33 << 105 */
+    { { 0x8073dd1e274983d8L,0xda1a3bde55717c8fL,0xfd3d4da20361f9d1L,
+        0x1332d0814c7de1ceL },
+      { 0x9b7ef7a3aa6d0e10L,0x17db2e73f54f1c4aL,0xaf3dffae4cd35567L,
+        0xaaa2f406e56f4e71L } },
+    /* 34 << 105 */
+    { { 0x8966759e7ace3fc7L,0x9594eacf45a8d8c6L,0x8de3bd8b91834e0eL,
+        0xafe4ca53548c0421L },
+      { 0xfdd7e856e6ee81c6L,0x8f671beb6b891a3aL,0xf7a58f2bfae63829L,
+        0x9ab186fb9c11ac9fL } },
+    /* 35 << 105 */
+    { { 0x8d6eb36910b5be76L,0x046b7739fb040bcdL,0xccb4529fcb73de88L,
+        0x1df0fefccf26be03L },
+      { 0xad7757a6bcfcd027L,0xa8786c75bb3165caL,0xe9db1e347e99a4d9L,
+        0x99ee86dfb06c504bL } },
+    /* 36 << 105 */
+    { { 0x5b7c2dddc15c9f0aL,0xdf87a7344295989eL,0x59ece47c03d08fdaL,
+        0xb074d3ddad5fc702L },
+      { 0x2040790351a03776L,0x2bb1f77b2a608007L,0x25c58f4fe1153185L,
+        0xe6df62f6766e6447L } },
+    /* 37 << 105 */
+    { { 0xefb3d1beed51275aL,0x5de47dc72f0f483fL,0x7932d98e97c2bedfL,
+        0xd5c119270219f8a1L },
+      { 0x9d751200a73a294eL,0x5f88434a9dc20172L,0xd28d9fd3a26f506aL,
+        0xa890cd319d1dcd48L } },
+    /* 38 << 105 */
+    { { 0x0aebaec170f4d3b4L,0xfd1a13690ffc8d00L,0xb9d9c24057d57838L,
+        0x45929d2668bac361L },
+      { 0x5a2cd06025b15ca6L,0x4b3c83e16e474446L,0x1aac7578ee1e5134L,
+        0xa418f5d6c91e2f41L } },
+    /* 39 << 105 */
+    { { 0x6936fc8a213ed68bL,0x860ae7ed510a5224L,0x63660335def09b53L,
+        0x641b2897cd79c98dL },
+      { 0x29bd38e101110f35L,0x79c26f42648b1937L,0x64dae5199d9164f4L,
+        0xd85a23100265c273L } },
+    /* 40 << 105 */
+    { { 0x7173dd5d4b07e2b1L,0xd144c4cb8d9ea221L,0xe8b04ea41105ab14L,
+        0x92dda542fe80d8f1L },
+      { 0xe9982fa8cf03dce6L,0x8b5ea9651a22cffcL,0xf7f4ea7f3fad88c4L,
+        0x62db773e6a5ba95cL } },
+    /* 41 << 105 */
+    { { 0xd20f02fb93f24567L,0xfd46c69a315257caL,0x0ac74cc78bcab987L,
+        0x46f31c015ceca2f5L },
+      { 0x40aedb59888b219eL,0xe50ecc37e1fccd02L,0x1bcd9dad911f816cL,
+        0x583cc1ec8db9b00cL } },
+    /* 42 << 105 */
+    { { 0xf3cd2e66a483bf11L,0xfa08a6f5b1b2c169L,0xf375e2454be9fa28L,
+        0x99a7ffec5b6d011fL },
+      { 0x6a3ebddbc4ae62daL,0x6cea00ae374aef5dL,0xab5fb98d9d4d05bcL,
+        0x7cba1423d560f252L } },
+    /* 43 << 105 */
+    { { 0x49b2cc21208490deL,0x1ca66ec3bcfb2879L,0x7f1166b71b6fb16fL,
+        0xfff63e0865fe5db3L },
+      { 0xb8345abe8b2610beL,0xb732ed8039de3df4L,0x0e24ed50211c32b4L,
+        0xd10d8a69848ff27dL } },
+    /* 44 << 105 */
+    { { 0xc1074398ed4de248L,0xd7cedace10488927L,0xa4aa6bf885673e13L,
+        0xb46bae916daf30afL },
+      { 0x07088472fcef7ad8L,0x61151608d4b35e97L,0xbcfe8f26dde29986L,
+        0xeb84c4c7d5a34c79L } },
+    /* 45 << 105 */
+    { { 0xc1eec55c164e1214L,0x891be86da147bb03L,0x9fab4d100ba96835L,
+        0xbf01e9b8a5c1ae9fL },
+      { 0x6b4de139b186ebc0L,0xd5c74c2685b91bcaL,0x5086a99cc2d93854L,
+        0xeed62a7ba7a9dfbcL } },
+    /* 46 << 105 */
+    { { 0x8778ed6f76b7618aL,0xbff750a503b66062L,0x4cb7be22b65186dbL,
+        0x369dfbf0cc3a6d13L },
+      { 0xc7dab26c7191a321L,0x9edac3f940ed718eL,0xbc142b36d0cfd183L,
+        0xc8af82f67c991693L } },
+    /* 47 << 105 */
+    { { 0xb3d1e4d897ce0b2aL,0xe6d7c87fc3a55cdfL,0x35846b9568b81afeL,
+        0x018d12afd3c239d8L },
+      { 0x2b2c620801206e15L,0xe0e42453a3b882c6L,0x854470a3a50162d5L,
+        0x081574787017a62aL } },
+    /* 48 << 105 */
+    { { 0x18bd3fb4820357c7L,0x992039ae6f1458adL,0x9a1df3c525b44aa1L,
+        0x2d780357ed3d5281L },
+      { 0x58cf7e4dc77ad4d4L,0xd49a7998f9df4fc4L,0x4465a8b51d71205eL,
+        0xa0ee0ea6649254aaL } },
+    /* 49 << 105 */
+    { { 0x4b5eeecfab7bd771L,0x6c87307335c262b9L,0xdc5bd6483c9d61e7L,
+        0x233d6d54321460d2L },
+      { 0xd20c5626fc195bccL,0x2544595804d78b63L,0xe03fcb3d17ec8ef3L,
+        0x54b690d146b8f781L } },
+    /* 50 << 105 */
+    { { 0x82fa2c8a21230646L,0xf51aabb9084f418cL,0xff4fbec11a30ba43L,
+        0x6a5acf73743c9df7L },
+      { 0x1da2b357d635b4d5L,0xc3de68ddecd5c1daL,0xa689080bd61af0ddL,
+        0xdea5938ad665bf99L } },
+    /* 51 << 105 */
+    { { 0x0231d71afe637294L,0x01968aa6a5a81cd8L,0x11252d50048e63b5L,
+        0xc446bc526ca007e9L },
+      { 0xef8c50a696d6134bL,0x9361fbf59e09a05cL,0xf17f85a6dca3291aL,
+        0xb178d548ff251a21L } },
+    /* 52 << 105 */
+    { { 0x87f6374ba4df3915L,0x566ce1bf2fd5d608L,0x425cba4d7de35102L,
+        0x6b745f8f58c5d5e2L },
+      { 0x88402af663122edfL,0x3190f9ed3b989a89L,0x4ad3d387ebba3156L,
+        0xef385ad9c7c469a5L } },
+    /* 53 << 105 */
+    { { 0xb08281de3f642c29L,0x20be0888910ffb88L,0xf353dd4ad5292546L,
+        0x3f1627de8377a262L },
+      { 0xa5faa013eefcd638L,0x8f3bf62674cc77c3L,0x32618f65a348f55eL,
+        0x5787c0dc9fefeb9eL } },
+    /* 54 << 105 */
+    { { 0xf1673aa2d9a23e44L,0x88dfa9934e10690dL,0x1ced1b362bf91108L,
+        0x9193ceca3af48649L },
+      { 0xfb34327d2d738fc5L,0x6697b037975fee6cL,0x2f485da0c04079a5L,
+        0x2cdf57352feaa1acL } },
+    /* 55 << 105 */
+    { { 0x76944420bd55659eL,0x7973e32b4376090cL,0x86bb4fe1163b591aL,
+        0x10441aedc196f0caL },
+      { 0x3b431f4a045ad915L,0x6c11b437a4afacb1L,0x30b0c7db71fdbbd8L,
+        0xb642931feda65acdL } },
+    /* 56 << 105 */
+    { { 0x4baae6e89c92b235L,0xa73bbd0e6b3993a1L,0xd06d60ec693dd031L,
+        0x03cab91b7156881cL },
+      { 0xd615862f1db3574bL,0x485b018564bb061aL,0x27434988a0181e06L,
+        0x2cd61ad4c1c0c757L } },
+    /* 57 << 105 */
+    { { 0x3effed5a2ff9f403L,0x8dc98d8b62239029L,0x2206021e1f17b70dL,
+        0xafbec0cabf510015L },
+      { 0x9fed716480130dfaL,0x306dc2b58a02dcf5L,0x48f06620feb10fc0L,
+        0x78d1e1d55a57cf51L } },
+    /* 58 << 105 */
+    { { 0xadef8c5a192ef710L,0x88afbd4b3b7431f9L,0x7e1f740764250c9eL,
+        0x6e31318db58bec07L },
+      { 0xfd4fc4b824f89b4eL,0x65a5dd8848c36a2aL,0x4f1eccfff024baa7L,
+        0x22a21cf2cba94650L } },
+    /* 59 << 105 */
+    { { 0x95d29dee42a554f7L,0x828983a5002ec4baL,0x8112a1f78badb73dL,
+        0x79ea8897a27c1839L },
+      { 0x8969a5a7d065fd83L,0xf49af791b262a0bcL,0xfcdea8b6af2b5127L,
+        0x10e913e1564c2dbcL } },
+    /* 60 << 105 */
+    { { 0x51239d14bc21ef51L,0xe51c3ceb4ce57292L,0x795ff06847bbcc3bL,
+        0x86b46e1ebd7e11e6L },
+      { 0x0ea6ba2380041ef4L,0xd72fe5056262342eL,0x8abc6dfd31d294d4L,
+        0xbbe017a21278c2c9L } },
+    /* 61 << 105 */
+    { { 0xb1fcfa09b389328aL,0x322fbc62d01771b5L,0x04c0d06360b045bfL,
+        0xdb652edc10e52d01L },
+      { 0x50ef932c03ec6627L,0xde1b3b2dc1ee50e3L,0x5ab7bdc5dc37a90dL,
+        0xfea6721331e33a96L } },
+    /* 62 << 105 */
+    { { 0x6482b5cb4f2999aaL,0x38476cc6b8cbf0ddL,0x93ebfacb173405bbL,
+        0x15cdafe7e52369ecL },
+      { 0xd42d5ba4d935b7dbL,0x648b60041c99a4cdL,0x785101bda3b5545bL,
+        0x4bf2c38a9dd67fafL } },
+    /* 63 << 105 */
+    { { 0xb1aadc634442449cL,0xe0e9921a33ad4fb8L,0x5c552313aa686d82L,
+        0xdee635fa465d866cL },
+      { 0xbc3c224a18ee6e8aL,0xeed748a6ed42e02fL,0xe70f930ad474cd08L,
+        0x774ea6ecfff24adfL } },
+    /* 64 << 105 */
+    { { 0x03e2de1cf3480d4aL,0xf0d8edc7bc8acf1aL,0xf23e330368295a9cL,
+        0xfadd5f68c546a97dL },
+      { 0x895597ad96f8acb1L,0xbddd49d5671bdae2L,0x16fcd52821dd43f4L,
+        0xa5a454126619141aL } },
+    /* 0 << 112 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 112 */
+    { { 0x8ce9b6bfc360e25aL,0xe6425195075a1a78L,0x9dc756a8481732f4L,
+        0x83c0440f5432b57aL },
+      { 0xc670b3f1d720281fL,0x2205910ed135e051L,0xded14b0edb052be7L,
+        0x697b3d27c568ea39L } },
+    /* 2 << 112 */
+    { { 0x2e599b9afb3ff9edL,0x28c2e0ab17f6515cL,0x1cbee4fd474da449L,
+        0x071279a44f364452L },
+      { 0x97abff6601fbe855L,0x3ee394e85fda51c4L,0x190385f667597c0bL,
+        0x6e9fccc6a27ee34bL } },
+    /* 3 << 112 */
+    { { 0x0b89de9314092ebbL,0xf17256bd428e240cL,0xcf89a7f393d2f064L,
+        0x4f57841ee1ed3b14L },
+      { 0x4ee14405e708d855L,0x856aae7203f1c3d0L,0xc8e5424fbdd7eed5L,
+        0x3333e4ef73ab4270L } },
+    /* 4 << 112 */
+    { { 0x3bc77adedda492f8L,0xc11a3aea78297205L,0x5e89a3e734931b4cL,
+        0x17512e2e9f5694bbL },
+      { 0x5dc349f3177bf8b6L,0x232ea4ba08c7ff3eL,0x9c4f9d16f511145dL,
+        0xccf109a333b379c3L } },
+    /* 5 << 112 */
+    { { 0xe75e7a88a1f25897L,0x7ac6961fa1b5d4d8L,0xe3e1077308f3ed5cL,
+        0x208a54ec0a892dfbL },
+      { 0xbe826e1978660710L,0x0cf70a97237df2c8L,0x418a7340ed704da5L,
+        0xa3eeb9a908ca33fdL } },
+    /* 6 << 112 */
+    { { 0x49d96233169bca96L,0x04d286d42da6aafbL,0xc09606eca0c2fa94L,
+        0x8869d0d523ff0fb3L },
+      { 0xa99937e5d0150d65L,0xa92e2503240c14c9L,0x656bf945108e2d49L,
+        0x152a733aa2f59e2bL } },
+    /* 7 << 112 */
+    { { 0xb4323d588434a920L,0xc0af8e93622103c5L,0x667518ef938dbf9aL,
+        0xa184307383a9cdf2L },
+      { 0x350a94aa5447ab80L,0xe5e5a325c75a3d61L,0x74ba507f68411a9eL,
+        0x10581fc1594f70c5L } },
+    /* 8 << 112 */
+    { { 0x60e2857080eb24a9L,0x7bedfb4d488e0cfdL,0x721ebbd7c259cdb8L,
+        0x0b0da855bc6390a9L },
+      { 0x2b4d04dbde314c70L,0xcdbf1fbc6c32e846L,0x33833eabb162fc9eL,
+        0x9939b48bb0dd3ab7L } },
+    /* 9 << 112 */
+    { { 0x5aaa98a7cb0c9c8cL,0x75105f3081c4375cL,0xceee50575ef1c90fL,
+        0xb31e065fc23a17bfL },
+      { 0x5364d275d4b6d45aL,0xd363f3ad62ec8996L,0xb5d212394391c65bL,
+        0x84564765ebb41b47L } },
+    /* 10 << 112 */
+    { { 0x20d18ecc37107c78L,0xacff3b6b570c2a66L,0x22f975d99bd0d845L,
+        0xef0a0c46ba178fa0L },
+      { 0x1a41965176b6028eL,0xc49ec674248612d4L,0x5b6ac4f27338af55L,
+        0x06145e627bee5a36L } },
+    /* 11 << 112 */
+    { { 0x33e95d07e75746b5L,0x1c1e1f6dc40c78beL,0x967833ef222ff8e2L,
+        0x4bedcf6ab49180adL },
+      { 0x6b37e9c13d7a4c8aL,0x2748887c6ddfe760L,0xf7055123aa3a5bbcL,
+        0x954ff2257bbb8e74L } },
+    /* 12 << 112 */
+    { { 0xc42b8ab197c3dfb9L,0x55a549b0cf168154L,0xad6748e7c1b50692L,
+        0x2775780f6fc5cbcbL },
+      { 0x4eab80b8e1c9d7c8L,0x8c69dae13fdbcd56L,0x47e6b4fb9969eaceL,
+        0x002f1085a705cb5aL } },
+    /* 13 << 112 */
+    { { 0x4e23ca446d3fea55L,0xb4ae9c86f4810568L,0x47bfb91b2a62f27dL,
+        0x60deb4c9d9bac28cL },
+      { 0xa892d8947de6c34cL,0x4ee682594494587dL,0x914ee14e1a3f8a5bL,
+        0xbb113eaa28700385L } },
+    /* 14 << 112 */
+    { { 0x81ca03b92115b4c9L,0x7c163d388908cad1L,0xc912a118aa18179aL,
+        0xe09ed750886e3081L },
+      { 0xa676e3fa26f516caL,0x753cacf78e732f91L,0x51592aea833da8b4L,
+        0xc626f42f4cbea8aaL } },
+    /* 15 << 112 */
+    { { 0xef9dc899a7b56eafL,0x00c0e52c34ef7316L,0x5b1e4e24fe818a86L,
+        0x9d31e20dc538be47L },
+      { 0x22eb932d3ed68974L,0xe44bbc087c4e87c4L,0x4121086e0dde9aefL,
+        0x8e6b9cff134f4345L } },
+    /* 16 << 112 */
+    { { 0x96892c1f711b0eb9L,0xb905f2c8780ab954L,0xace26309a20792dbL,
+        0xec8ac9b30684e126L },
+      { 0x486ad8b6b40a2447L,0x60121fc19fe3fb24L,0x5626fccf1a8e3b3fL,
+        0x4e5686226ad1f394L } },
+    /* 17 << 112 */
+    { { 0xda7aae0d196aa5a1L,0xe0df8c771041b5fbL,0x451465d926b318b7L,
+        0xc29b6e557ab136e9L },
+      { 0x2c2ab48b71148463L,0xb5738de364454a76L,0x54ccf9a05a03abe4L,
+        0x377c02960427d58eL } },
+    /* 18 << 112 */
+    { { 0x73f5f0b92bb39c1fL,0x14373f2ce608d8c5L,0xdcbfd31400fbb805L,
+        0xdf18fb2083afdcfbL },
+      { 0x81a57f4242b3523fL,0xe958532d87f650fbL,0xaa8dc8b68b0a7d7cL,
+        0x1b75dfb7150166beL } },
+    /* 19 << 112 */
+    { { 0x90e4f7c92d7d1413L,0x67e2d6b59834f597L,0x4fd4f4f9a808c3e8L,
+        0xaf8237e0d5281ec1L },
+      { 0x25ab5fdc84687ceeL,0xc5ded6b1a5b26c09L,0x8e4a5aecc8ea7650L,
+        0x23b73e5c14cc417fL } },
+    /* 20 << 112 */
+    { { 0x2bfb43183037bf52L,0xb61e6db578c725d7L,0x8efd4060bbb3e5d7L,
+        0x2e014701dbac488eL },
+      { 0xac75cf9a360aa449L,0xb70cfd0579634d08L,0xa591536dfffb15efL,
+        0xb2c37582d07c106cL } },
+    /* 21 << 112 */
+    { { 0xb4293fdcf50225f9L,0xc52e175cb0e12b03L,0xf649c3bad0a8bf64L,
+        0x745a8fefeb8ae3c6L },
+      { 0x30d7e5a358321bc3L,0xb1732be70bc4df48L,0x1f217993e9ea5058L,
+        0xf7a71cde3e4fd745L } },
+    /* 22 << 112 */
+    { { 0x86cc533e894c5bbbL,0x6915c7d969d83082L,0xa6aa2d055815c244L,
+        0xaeeee59249b22ce5L },
+      { 0x89e39d1378135486L,0x3a275c1f16b76f2fL,0xdb6bcc1be036e8f5L,
+        0x4df69b215e4709f5L } },
+    /* 23 << 112 */
+    { { 0xa188b2502d0f39aaL,0x622118bb15a85947L,0x2ebf520ffde0f4faL,
+        0xa40e9f294860e539L },
+      { 0x7b6a51eb22b57f0fL,0x849a33b97e80644aL,0x50e5d16f1cf095feL,
+        0xd754b54eec55f002L } },
+    /* 24 << 112 */
+    { { 0x5cfbbb22236f4a98L,0x0b0c59e9066800bbL,0x4ac69a8f5a9a7774L,
+        0x2b33f804d6bec948L },
+      { 0xb372929532e6c466L,0x68956d0f4e599c73L,0xa47a249f155c31ccL,
+        0x24d80f0de1ce284eL } },
+    /* 25 << 112 */
+    { { 0xcd821dfb988baf01L,0xe6331a7ddbb16647L,0x1eb8ad33094cb960L,
+        0x593cca38c91bbca5L },
+      { 0x384aac8d26567456L,0x40fa0309c04b6490L,0x97834cd6dab6c8f6L,
+        0x68a7318d3f91e55fL } },
+    /* 26 << 112 */
+    { { 0xa00fd04efc4d3157L,0xb56f8ab22bf3bdeaL,0x014f56484fa57172L,
+        0x948c5860450abdb3L },
+      { 0x342b5df00ebd4f08L,0x3e5168cd0e82938eL,0x7aedc1ceb0df5dd0L,
+        0x6bbbc6d9e5732516L } },
+    /* 27 << 112 */
+    { { 0xc7bfd486605daaa6L,0x46fd72b7bb9a6c9eL,0xe4847fb1a124fb89L,
+        0x75959cbda2d8ffbcL },
+      { 0x42579f65c8a588eeL,0x368c92e6b80b499dL,0xea4ef6cd999a5df1L,
+        0xaa73bb7f936fe604L } },
+    /* 28 << 112 */
+    { { 0xf347a70d6457d188L,0x86eda86b8b7a388bL,0xb7cdff060ccd6013L,
+        0xbeb1b6c7d0053fb2L },
+      { 0x0b02238799240a9fL,0x1bbb384f776189b2L,0x8695e71e9066193aL,
+        0x2eb5009706ffac7eL } },
+    /* 29 << 112 */
+    { { 0x0654a9c04a7d2caaL,0x6f3fb3d1a5aaa290L,0x835db041ff476e8fL,
+        0x540b8b0bc42295e4L },
+      { 0xa5c73ac905e214f5L,0x9a74075a56a0b638L,0x2e4b1090ce9e680bL,
+        0x57a5b4796b8d9afaL } },
+    /* 30 << 112 */
+    { { 0x0dca48e726bfe65cL,0x097e391c7290c307L,0x683c462e6669e72eL,
+        0xf505be1e062559acL },
+      { 0x5fbe3ea1e3a3035aL,0x6431ebf69cd50da8L,0xfd169d5c1f6407f2L,
+        0x8d838a9560fce6b8L } },
+    /* 31 << 112 */
+    { { 0x2a2bfa7f650006f0L,0xdfd7dad350c0fbb2L,0x92452495ccf9ad96L,
+        0x183bf494d95635f9L },
+      { 0x02d5df434a7bd989L,0x505385cca5431095L,0xdd98e67dfd43f53eL,
+        0xd61e1a6c500c34a9L } },
+    /* 32 << 112 */
+    { { 0x5a4b46c64a8a3d62L,0x8469c4d0247743d2L,0x2bb3a13d88f7e433L,
+        0x62b23a1001be5849L },
+      { 0xe83596b4a63d1a4cL,0x454e7fea7d183f3eL,0x643fce6117afb01cL,
+        0x4e65e5e61c4c3638L } },
+    /* 33 << 112 */
+    { { 0x41d85ea1ef74c45bL,0x2cfbfa66ae328506L,0x98b078f53ada7da9L,
+        0xd985fe37ec752fbbL },
+      { 0xeece68fe5a0148b4L,0x6f9a55c72d78136dL,0x232dccc4d2b729ceL,
+        0xa27e0dfd90aafbc4L } },
+    /* 34 << 112 */
+    { { 0x9647445212b4603eL,0xa876c5516b706d14L,0xdf145fcf69a9d412L,
+        0xe2ab75b72d479c34L },
+      { 0x12df9a761a23ff97L,0xc61389925d359d10L,0x6e51c7aefa835f22L,
+        0x69a79cb1c0fcc4d9L } },
+    /* 35 << 112 */
+    { { 0xf57f350d594cc7e1L,0x3079ca633350ab79L,0x226fb6149aff594aL,
+        0x35afec026d59a62bL },
+      { 0x9bee46f406ed2c6eL,0x58da17357d939a57L,0x44c504028fd1797eL,
+        0xd8853e7c5ccea6caL } },
+    /* 36 << 112 */
+    { { 0x4065508da35fcd5fL,0x8965df8c495ccaebL,0x0f2da85012e1a962L,
+        0xee471b94c1cf1cc4L },
+      { 0xcef19bc80a08fb75L,0x704958f581de3591L,0x2867f8b23aef4f88L,
+        0x8d749384ea9f9a5fL } },
+    /* 37 << 112 */
+    { { 0x1b3855378c9049f4L,0x5be948f37b92d8b6L,0xd96f725db6e2bd6bL,
+        0x37a222bc958c454dL },
+      { 0xe7c61abb8809bf61L,0x46f07fbc1346f18dL,0xfb567a7ae87c0d1cL,
+        0x84a461c87ef3d07aL } },
+    /* 38 << 112 */
+    { { 0x0a5adce6d9278d98L,0x24d948139dfc73e1L,0x4f3528b6054321c3L,
+        0x2e03fdde692ea706L },
+      { 0x10e6061947b533c0L,0x1a8bc73f2ca3c055L,0xae58d4b21bb62b8fL,
+        0xb2045a73584a24e3L } },
+    /* 39 << 112 */
+    { { 0x3ab3d5afbd76e195L,0x478dd1ad6938a810L,0x6ffab3936ee3d5cbL,
+        0xdfb693db22b361e4L },
+      { 0xf969449651dbf1a7L,0xcab4b4ef08a2e762L,0xe8c92f25d39bba9aL,
+        0x850e61bcf1464d96L } },
+    /* 40 << 112 */
+    { { 0xb7e830e3dc09508bL,0xfaf6d2cf74317655L,0x72606cebdf690355L,
+        0x48bb92b3d0c3ded6L },
+      { 0x65b754845c7cf892L,0xf6cd7ac9d5d5f01fL,0xc2c30a5996401d69L,
+        0x91268650ed921878L } },
+    /* 41 << 112 */
+    { { 0x380bf913b78c558fL,0x43c0baebc8afdaa9L,0x377f61d554f169d3L,
+        0xf8da07e3ae5ff20bL },
+      { 0xb676c49da8a90ea8L,0x81c1ff2b83a29b21L,0x383297ac2ad8d276L,
+        0x3001122fba89f982L } },
+    /* 42 << 112 */
+    { { 0xe1d794be6718e448L,0x246c14827c3e6e13L,0x56646ef85d26b5efL,
+        0x80f5091e88069cddL },
+      { 0xc5992e2f724bdd38L,0x02e915b48471e8c7L,0x96ff320a0d0ff2a9L,
+        0xbf8864874384d1a0L } },
+    /* 43 << 112 */
+    { { 0xbbe1e6a6c93f72d6L,0xd5f75d12cad800eaL,0xfa40a09fe7acf117L,
+        0x32c8cdd57581a355L },
+      { 0x742219927023c499L,0xa8afe5d738ec3901L,0x5691afcba90e83f0L,
+        0x41bcaa030b8f8eacL } },
+    /* 44 << 112 */
+    { { 0xe38b5ff98d2668d5L,0x0715281a7ad81965L,0x1bc8fc7c03c6ce11L,
+        0xcbbee6e28b650436L },
+      { 0x06b00fe80cdb9808L,0x17d6e066fe3ed315L,0x2e9d38c64d0b5018L,
+        0xab8bfd56844dcaefL } },
+    /* 45 << 112 */
+    { { 0x42894a59513aed8bL,0xf77f3b6d314bd07aL,0xbbdecb8f8e42b582L,
+        0xf10e2fa8d2390fe6L },
+      { 0xefb9502262a2f201L,0x4d59ea5050ee32b0L,0xd87f77286da789a8L,
+        0xcf98a2cff79492c4L } },
+    /* 46 << 112 */
+    { { 0xf9577239720943c2L,0xba044cf53990b9d0L,0x5aa8e82395f2884aL,
+        0x834de6ed0278a0afL },
+      { 0xc8e1ee9a5f25bd12L,0x9259ceaa6f7ab271L,0x7e6d97a277d00b76L,
+        0x5c0c6eeaa437832aL } },
+    /* 47 << 112 */
+    { { 0x5232c20f5606b81dL,0xabd7b3750d991ee5L,0x4d2bfe358632d951L,
+        0x78f8514698ed9364L },
+      { 0x951873f0f30c3282L,0x0da8ac80a789230bL,0x3ac7789c5398967fL,
+        0xa69b8f7fbdda0fb5L } },
+    /* 48 << 112 */
+    { { 0xe5db77176add8545L,0x1b71cb6672c49b66L,0xd856073968421d77L,
+        0x03840fe883e3afeaL },
+      { 0xb391dad51ec69977L,0xae243fb9307f6726L,0xc88ac87be8ca160cL,
+        0x5174cced4ce355f4L } },
+    /* 49 << 112 */
+    { { 0x98a35966e58ba37dL,0xfdcc8da27817335dL,0x5b75283083fbc7bfL,
+        0x68e419d4d9c96984L },
+      { 0x409a39f402a40380L,0x88940faf1fe977bcL,0xc640a94b8f8edea6L,
+        0x1e22cd17ed11547dL } },
+    /* 50 << 112 */
+    { { 0xe28568ce59ffc3e2L,0x60aa1b55c1dee4e7L,0xc67497c8837cb363L,
+        0x06fb438a105a2bf2L },
+      { 0x30357ec4500d8e20L,0x1ad9095d0670db10L,0x7f589a05c73b7cfdL,
+        0xf544607d880d6d28L } },
+    /* 51 << 112 */
+    { { 0x17ba93b1a20ef103L,0xad8591306ba6577bL,0x65c91cf66fa214a0L,
+        0xd7d49c6c27990da5L },
+      { 0xecd9ec8d20bb569dL,0xbd4b2502eeffbc33L,0x2056ca5a6bed0467L,
+        0x7916a1f75b63728cL } },
+    /* 52 << 112 */
+    { { 0xd4f9497d53a4f566L,0x8973466497b56810L,0xf8e1da740494a621L,
+        0x82546a938d011c68L },
+      { 0x1f3acb19c61ac162L,0x52f8fa9cabad0d3eL,0x15356523b4b7ea43L,
+        0x5a16ad61ae608125L } },
+    /* 53 << 112 */
+    { { 0xb0bcb87f4faed184L,0x5f236b1d5029f45fL,0xd42c76070bc6b1fcL,
+        0xc644324e68aefce3L },
+      { 0x8e191d595c5d8446L,0xc020807713ae1979L,0xadcaee553ba59cc7L,
+        0x20ed6d6ba2cb81baL } },
+    /* 54 << 112 */
+    { { 0x0952ba19b6efcffcL,0x60f12d6897c0b87cL,0x4ee2c7c49caa30bcL,
+        0x767238b797fbff4eL },
+      { 0xebc73921501b5d92L,0x3279e3dfc2a37737L,0x9fc12bc86d197543L,
+        0xfa94dc6f0a40db4eL } },
+    /* 55 << 112 */
+    { { 0x7392b41a530ccbbdL,0x87c82146ea823525L,0xa52f984c05d98d0cL,
+        0x2ae57d735ef6974cL },
+      { 0x9377f7bf3042a6ddL,0xb1a007c019647a64L,0xfaa9079a0cca9767L,
+        0x3d81a25bf68f72d5L } },
+    /* 56 << 112 */
+    { { 0x752067f8ff81578eL,0x786221509045447dL,0xc0c22fcf0505aa6fL,
+        0x1030f0a66bed1c77L },
+      { 0x31f29f151f0bd739L,0x2d7989c7e6debe85L,0x5c070e728e677e98L,
+        0x0a817bd306e81fd5L } },
+    /* 57 << 112 */
+    { { 0xc110d830b0f2ac95L,0x48d0995aab20e64eL,0x0f3e00e17729cd9aL,
+        0x2a570c20dd556946L },
+      { 0x912dbcfd4e86214dL,0x2d014ee2cf615498L,0x55e2b1e63530d76eL,
+        0xc5135ae4fd0fd6d1L } },
+    /* 58 << 112 */
+    { { 0x0066273ad4f3049fL,0xbb8e9893e7087477L,0x2dba1ddb14c6e5fdL,
+        0xdba3788651f57e6cL },
+      { 0x5aaee0a65a72f2cfL,0x1208bfbf7bea5642L,0xf5c6aa3b67872c37L,
+        0xd726e08343f93224L } },
+    /* 59 << 112 */
+    { { 0x1854daa5061f1658L,0xc0016df1df0cd2b3L,0xc2a3f23e833d50deL,
+        0x73b681d2bbbd3017L },
+      { 0x2f046dc43ac343c0L,0x9c847e7d85716421L,0xe1e13c910917eed4L,
+        0x3fc9eebd63a1b9c6L } },
+    /* 60 << 112 */
+    { { 0x0f816a727fe02299L,0x6335ccc2294f3319L,0x3820179f4745c5beL,
+        0xe647b782922f066eL },
+      { 0xc22e49de02cafb8aL,0x299bc2fffcc2ecccL,0x9a8feea26e0e8282L,
+        0xa627278bfe893205L } },
+    /* 61 << 112 */
+    { { 0xa7e197337933e47bL,0xf4ff6b132e766402L,0xa4d8be0a98440d9fL,
+        0x658f5c2f38938808L },
+      { 0x90b75677c95b3b3eL,0xfa0442693137b6ffL,0x077b039b43c47c29L,
+        0xcca95dd38a6445b2L } },
+    /* 62 << 112 */
+    { { 0x0b498ba42333fc4cL,0x274f8e68f736a1b1L,0x6ca348fd5f1d4b2eL,
+        0x24d3be78a8f10199L },
+      { 0x8535f858ca14f530L,0xa6e7f1635b982e51L,0x847c851236e1bf62L,
+        0xf6a7c58e03448418L } },
+    /* 63 << 112 */
+    { { 0x583f3703f9374ab6L,0x864f91956e564145L,0x33bc3f4822526d50L,
+        0x9f323c801262a496L },
+      { 0xaa97a7ae3f046a9aL,0x70da183edf8a039aL,0x5b68f71c52aa0ba6L,
+        0x9be0fe5121459c2dL } },
+    /* 64 << 112 */
+    { { 0xc1e17eb6cbc613e5L,0x33131d55497ea61cL,0x2f69d39eaf7eded5L,
+        0x73c2f434de6af11bL },
+      { 0x4ca52493a4a375faL,0x5f06787cb833c5c2L,0x814e091f3e6e71cfL,
+        0x76451f578b746666L } },
+    /* 0 << 119 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 119 */
+    { { 0x80f9bdef694db7e0L,0xedca8787b9fcddc6L,0x51981c3403b8dce1L,
+        0x4274dcf170e10ba1L },
+      { 0xf72743b86def6d1aL,0xd25b1670ebdb1866L,0xc4491e8c050c6f58L,
+        0x2be2b2ab87fbd7f5L } },
+    /* 2 << 119 */
+    { { 0x3e0e5c9dd111f8ecL,0xbcc33f8db7c4e760L,0x702f9a91bd392a51L,
+        0x7da4a795c132e92dL },
+      { 0x1a0b0ae30bb1151bL,0x54febac802e32251L,0xea3a5082694e9e78L,
+        0xe58ffec1e4fe40b8L } },
+    /* 3 << 119 */
+    { { 0xf85592fcd1e0cf9eL,0xdea75f0dc0e7b2e8L,0xc04215cfc135584eL,
+        0x174fc7272f57092aL },
+      { 0xe7277877eb930beaL,0x504caccb5eb02a5aL,0xf9fe08f7f5241b9bL,
+        0xe7fb62f48d5ca954L } },
+    /* 4 << 119 */
+    { { 0xfbb8349d29c4120bL,0x9f94391fc0d0d915L,0xc4074fa75410ba51L,
+        0xa66adbf6150a5911L },
+      { 0xc164543c34bfca38L,0xe0f27560b9e1ccfcL,0x99da0f53e820219cL,
+        0xe8234498c6b4997aL } },
+    /* 5 << 119 */
+    { { 0xcfb88b769d4c5423L,0x9e56eb10b0521c49L,0x418e0b5ebe8700a1L,
+        0x00cbaad6f93cb58aL },
+      { 0xe923fbded92a5e67L,0xca4979ac1f347f11L,0x89162d856bc0585bL,
+        0xdd6254afac3c70e3L } },
+    /* 6 << 119 */
+    { { 0x7b23c513516e19e4L,0x56e2e847c5c4d593L,0x9f727d735ce71ef6L,
+        0x5b6304a6f79a44c5L },
+      { 0x6638a7363ab7e433L,0x1adea470fe742f83L,0xe054b8545b7fc19fL,
+        0xf935381aba1d0698L } },
+    /* 7 << 119 */
+    { { 0x546eab2d799e9a74L,0x96239e0ea949f729L,0xca274c6b7090055aL,
+        0x835142c39020c9b0L },
+      { 0xa405667aa2e8807fL,0x29f2c0851aa3d39eL,0xcc555d6442fc72f5L,
+        0xe856e0e7fbeacb3cL } },
+    /* 8 << 119 */
+    { { 0xb5504f9d918e4936L,0x65035ef6b2513982L,0x0553a0c26f4d9cb9L,
+        0x6cb10d56bea85509L },
+      { 0x48d957b7a242da11L,0x16a4d3dd672b7268L,0x3d7e637c8502a96bL,
+        0x27c7032b730d463bL } },
+    /* 9 << 119 */
+    { { 0xbdc02b18e4136a14L,0xbacf969d678e32bfL,0xc98d89a3dd9c3c03L,
+        0x7b92420a23becc4fL },
+      { 0xd4b41f78c64d565cL,0x9f969d0010f28295L,0xec7f7f76b13d051aL,
+        0x08945e1ea92da585L } },
+    /* 10 << 119 */
+    { { 0x55366b7d5846426fL,0xe7d09e89247d441dL,0x510b404d736fbf48L,
+        0x7fa003d0e784bd7dL },
+      { 0x25f7614f17fd9596L,0x49e0e0a135cb98dbL,0x2c65957b2e83a76aL,
+        0x5d40da8dcddbe0f8L } },
+    /* 11 << 119 */
+    { { 0xf2b8c405050bad24L,0x8918426dc2aa4823L,0x2aeab3dda38365a7L,
+        0x720317177c91b690L },
+      { 0x8b00d69960a94120L,0x478a255de99eaeecL,0xbf656a5f6f60aafdL,
+        0xdfd7cb755dee77b3L } },
+    /* 12 << 119 */
+    { { 0x37f68bb4a595939dL,0x0355647928740217L,0x8e740e7c84ad7612L,
+        0xd89bc8439044695fL },
+      { 0xf7f3da5d85a9184dL,0x562563bb9fc0b074L,0x06d2e6aaf88a888eL,
+        0x612d8643161fbe7cL } },
+    /* 13 << 119 */
+    { { 0x465edba7f64085e7L,0xb230f30429aa8511L,0x53388426cda2d188L,
+        0x908857354b666649L },
+      { 0x6f02ff9a652f54f6L,0x65c822945fae2bf0L,0x7816ade062f5eee3L,
+        0xdcdbdf43fcc56d70L } },
+    /* 14 << 119 */
+    { { 0x9fb3bba354530bb2L,0xbde3ef77cb0869eaL,0x89bc90460b431163L,
+        0x4d03d7d2e4819a35L },
+      { 0x33ae4f9e43b6a782L,0x216db3079c88a686L,0x91dd88e000ffedd9L,
+        0xb280da9f12bd4840L } },
+    /* 15 << 119 */
+    { { 0x32a7cb8a1635e741L,0xfe14008a78be02a7L,0x3fafb3341b7ae030L,
+        0x7fd508e75add0ce9L },
+      { 0x72c83219d607ad51L,0x0f229c0a8d40964aL,0x1be2c3361c878da2L,
+        0xe0c96742eab2ab86L } },
+    /* 16 << 119 */
+    { { 0x458f86913e538cd7L,0xa7001f6c8e08ad53L,0x52b8c6e6bf5d15ffL,
+        0x548234a4011215ddL },
+      { 0xff5a9d2d3d5b4045L,0xb0ffeeb64a904190L,0x55a3aca448607f8bL,
+        0x8cbd665c30a0672aL } },
+    /* 17 << 119 */
+    { { 0x87f834e042583068L,0x02da2aebf3f6e683L,0x6b763e5d05c12248L,
+        0x7230378f65a8aefcL },
+      { 0x93bd80b571e8e5caL,0x53ab041cb3b62524L,0x1b8605136c9c552eL,
+        0xe84d402cd5524e66L } },
+    /* 18 << 119 */
+    { { 0xa37f3573f37f5937L,0xeb0f6c7dd1e4fca5L,0x2965a554ac8ab0fcL,
+        0x17fbf56c274676acL },
+      { 0x2e2f6bd9acf7d720L,0x41fc8f8810224766L,0x517a14b385d53befL,
+        0xdae327a57d76a7d1L } },
+    /* 19 << 119 */
+    { { 0x6ad0a065c4818267L,0x33aa189b37c1bbc1L,0x64970b5227392a92L,
+        0x21699a1c2d1535eaL },
+      { 0xcd20779cc2d7a7fdL,0xe318605999c83cf2L,0x9b69440b72c0b8c7L,
+        0xa81497d77b9e0e4dL } },
+    /* 20 << 119 */
+    { { 0x515d5c891f5f82dcL,0x9a7f67d76361079eL,0xa8da81e311a35330L,
+        0xe44990c44b18be1bL },
+      { 0xc7d5ed95af103e59L,0xece8aba78dac9261L,0xbe82b0999394b8d3L,
+        0x6830f09a16adfe83L } },
+    /* 21 << 119 */
+    { { 0x250a29b488172d01L,0x8b20bd65caff9e02L,0xb8a7661ee8a6329aL,
+        0x4520304dd3fce920L },
+      { 0xae45da1f2b47f7efL,0xe07f52885bffc540L,0xf79970093464f874L,
+        0x2244c2cda6fa1f38L } },
+    /* 22 << 119 */
+    { { 0x43c41ac194d7d9b1L,0x5bafdd82c82e7f17L,0xdf0614c15fda0fcaL,
+        0x74b043a7a8ae37adL },
+      { 0x3ba6afa19e71734cL,0x15d5437e9c450f2eL,0x4a5883fe67e242b1L,
+        0x5143bdc22c1953c2L } },
+    /* 23 << 119 */
+    { { 0x542b8b53fc5e8920L,0x363bf9a89a9cee08L,0x02375f10c3486e08L,
+        0x2037543b8c5e70d2L },
+      { 0x7109bccc625640b4L,0xcbc1051e8bc62c3bL,0xf8455fed803f26eaL,
+        0x6badceabeb372424L } },
+    /* 24 << 119 */
+    { { 0xa2a9ce7c6b53f5f9L,0x642465951b176d99L,0xb1298d36b95c081bL,
+        0x53505bb81d9a9ee6L },
+      { 0x3f6f9e61f2ba70b0L,0xd07e16c98afad453L,0x9f1694bbe7eb4a6aL,
+        0xdfebced93cb0bc8eL } },
+    /* 25 << 119 */
+    { { 0x92d3dcdc53868c8bL,0x174311a2386107a6L,0x4109e07c689b4e64L,
+        0x30e4587f2df3dcb6L },
+      { 0x841aea310811b3b2L,0x6144d41d0cce43eaL,0x464c45812a9a7803L,
+        0xd03d371f3e158930L } },
+    /* 26 << 119 */
+    { { 0xc676d7f2b1f3390bL,0x9f7a1b8ca5b61272L,0x4ebebfc9c2e127a9L,
+        0x4602500c5dd997bfL },
+      { 0x7f09771c4711230fL,0x058eb37c020f09c1L,0xab693d4bfee5e38bL,
+        0x9289eb1f4653cbc0L } },
+    /* 27 << 119 */
+    { { 0xbecf46abd51b9cf5L,0xd2aa9c029f0121afL,0x36aaf7d2e90dc274L,
+        0x909e4ea048b95a3cL },
+      { 0xe6b704966f32dbdbL,0x672188a08b030b3eL,0xeeffe5b3cfb617e2L,
+        0x87e947de7c82709eL } },
+    /* 28 << 119 */
+    { { 0xa44d2b391770f5a7L,0xe4d4d7910e44eb82L,0x42e69d1e3f69712aL,
+        0xbf11c4d6ac6a820eL },
+      { 0xb5e7f3e542c4224cL,0xd6b4e81c449d941cL,0x5d72bd165450e878L,
+        0x6a61e28aee25ac54L } },
+    /* 29 << 119 */
+    { { 0x33272094e6f1cd95L,0x7512f30d0d18673fL,0x32f7a4ca5afc1464L,
+        0x2f0956566bbb977bL },
+      { 0x586f47caa8226200L,0x02c868ad1ac07369L,0x4ef2b845c613acbeL,
+        0x43d7563e0386054cL } },
+    /* 30 << 119 */
+    { { 0x54da9dc7ab952578L,0xb5423df226e84d0bL,0xa8b64eeb9b872042L,
+        0xac2057825990f6dfL },
+      { 0x4ff696eb21f4c77aL,0x1a79c3e4aab273afL,0x29bc922e9436b3f1L,
+        0xff807ef8d6d9a27aL } },
+    /* 31 << 119 */
+    { { 0x82acea3d778f22a0L,0xfb10b2e85b5e7469L,0xc0b169802818ee7dL,
+        0x011afff4c91c1a2fL },
+      { 0x95a6d126ad124418L,0x31c081a5e72e295fL,0x36bb283af2f4db75L,
+        0xd115540f7acef462L } },
+    /* 32 << 119 */
+    { { 0xc7f3a8f833f6746cL,0x21e46f65fea990caL,0x915fd5c5caddb0a9L,
+        0xbd41f01678614555L },
+      { 0x346f4434426ffb58L,0x8055943614dbc204L,0xf3dd20fe5a969b7fL,
+        0x9d59e956e899a39aL } },
+    /* 33 << 119 */
+    { { 0xf1b0971c8ad4cf4bL,0x034488602ffb8fb8L,0xf071ac3c65340ba4L,
+        0x408d0596b27fd758L },
+      { 0xe7c78ea498c364b0L,0xa4aac4a5051e8ab5L,0xb9e1d560485d9002L,
+        0x9acd518a88844455L } },
+    /* 34 << 119 */
+    { { 0xe4ca688fd06f56c0L,0xa48af70ddf027972L,0x691f0f045e9a609dL,
+        0xa9dd82cdee61270eL },
+      { 0x8903ca63a0ef18d3L,0x9fb7ee353d6ca3bdL,0xa7b4a09cabf47d03L,
+        0x4cdada011c67de8eL } },
+    /* 35 << 119 */
+    { { 0x520037499355a244L,0xe77fd2b64f2151a9L,0x695d6cf666b4efcbL,
+        0xc5a0cacfda2cfe25L },
+      { 0x104efe5cef811865L,0xf52813e89ea5cc3dL,0x855683dc40b58dbcL,
+        0x0338ecde175fcb11L } },
+    /* 36 << 119 */
+    { { 0xf9a0563774921592L,0xb4f1261db9bb9d31L,0x551429b74e9c5459L,
+        0xbe182e6f6ea71f53L },
+      { 0xd3a3b07cdfc50573L,0x9ba1afda62be8d44L,0x9bcfd2cb52ab65d3L,
+        0xdf11d547a9571802L } },
+    /* 37 << 119 */
+    { { 0x099403ee02a2404aL,0x497406f421088a71L,0x994794095004ae71L,
+        0xbdb42078a812c362L },
+      { 0x2b72a30fd8828442L,0x283add27fcb5ed1cL,0xf7c0e20066a40015L,
+        0x3e3be64108b295efL } },
+    /* 38 << 119 */
+    { { 0xac127dc1e038a675L,0x729deff38c5c6320L,0xb7df8fd4a90d2c53L,
+        0x9b74b0ec681e7cd3L },
+      { 0x5cb5a623dab407e5L,0xcdbd361576b340c6L,0xa184415a7d28392cL,
+        0xc184c1d8e96f7830L } },
+    /* 39 << 119 */
+    { { 0xc3204f1981d3a80fL,0xfde0c841c8e02432L,0x78203b3e8149e0c1L,
+        0x5904bdbb08053a73L },
+      { 0x30fc1dd1101b6805L,0x43c223bc49aa6d49L,0x9ed671417a174087L,
+        0x311469a0d5997008L } },
+    /* 40 << 119 */
+    { { 0xb189b6845e43fc61L,0xf3282375e0d3ab57L,0x4fa34b67b1181da8L,
+        0x621ed0b299ee52b8L },
+      { 0x9b178de1ad990676L,0xd51de67b56d54065L,0x2a2c27c47538c201L,
+        0x33856ec838a40f5cL } },
+    /* 41 << 119 */
+    { { 0x2522fc15be6cdcdeL,0x1e603f339f0c6f89L,0x7994edc3103e30a6L,
+        0x033a00db220c853eL },
+      { 0xd3cfa409f7bb7fd7L,0x70f8781e462d18f6L,0xbbd82980687fe295L,
+        0x6eef4c32595669f3L } },
+    /* 42 << 119 */
+    { { 0x86a9303b2f7e85c3L,0x5fce462171988f9bL,0x5b935bf6c138acb5L,
+        0x30ea7d6725661212L },
+      { 0xef1eb5f4e51ab9a2L,0x0587c98aae067c78L,0xb3ce1b3c77ca9ca6L,
+        0x2a553d4d54b5f057L } },
+    /* 43 << 119 */
+    { { 0xc78982364da29ec2L,0xdbdd5d13b9c57316L,0xc57d6e6b2cd80d47L,
+        0x80b460cffe9e7391L },
+      { 0x98648cabf963c31eL,0x67f9f633cc4d32fdL,0x0af42a9dfdf7c687L,
+        0x55f292a30b015ea7L } },
+    /* 44 << 119 */
+    { { 0x89e468b2cd21ab3dL,0xe504f022c393d392L,0xab21e1d4a5013af9L,
+        0xe3283f78c2c28acbL },
+      { 0xf38b35f6226bf99fL,0xe83542740e291e69L,0x61673a15b20c162dL,
+        0xc101dc75b04fbdbeL } },
+    /* 45 << 119 */
+    { { 0x8323b4c2255bd617L,0x6c9696936c2a9154L,0xc6e6586062679387L,
+        0x8e01db0cb8c88e23L },
+      { 0x33c42873893a5559L,0x7630f04b47a3e149L,0xb5d80805ddcf35f8L,
+        0x582ca08077dfe732L } },
+    /* 46 << 119 */
+    { { 0x2c7156e10b1894a0L,0x92034001d81c68c0L,0xed225d00c8b115b5L,
+        0x237f9c2283b907f2L },
+      { 0x0ea2f32f4470e2c0L,0xb725f7c158be4e95L,0x0f1dcafab1ae5463L,
+        0x59ed51871ba2fc04L } },
+    /* 47 << 119 */
+    { { 0xf6e0f316d0115d4dL,0x5180b12fd3691599L,0x157e32c9527f0a41L,
+        0x7b0b081da8e0ecc0L },
+      { 0x6dbaaa8abf4f0dd0L,0x99b289c74d252696L,0x79b7755edbf864feL,
+        0x6974e2b176cad3abL } },
+    /* 48 << 119 */
+    { { 0x35dbbee206ddd657L,0xe7cbdd112ff3a96dL,0x88381968076be758L,
+        0x2d737e7208c91f5dL },
+      { 0x5f83ab6286ec3776L,0x98aa649d945fa7a1L,0xf477ec3772ef0933L,
+        0x66f52b1e098c17b1L } },
+    /* 49 << 119 */
+    { { 0x9eec58fbd803738bL,0x91aaade7e4e86aa4L,0x6b1ae617a5b51492L,
+        0x63272121bbc45974L },
+      { 0x7e0e28f0862c5129L,0x0a8f79a93321a4a0L,0xe26d16645041c88fL,
+        0x0571b80553233e3aL } },
+    /* 50 << 119 */
+    { { 0xd1b0ccdec9520711L,0x55a9e4ed3c8b84bfL,0x9426bd39a1fef314L,
+        0x4f5f638e6eb93f2bL },
+      { 0xba2a1ed32bf9341bL,0xd63c13214d42d5a9L,0xd2964a89316dc7c5L,
+        0xd1759606ca511851L } },
+    /* 51 << 119 */
+    { { 0xd8a9201ff9e6ed35L,0xb7b5ee456736925aL,0x0a83fbbc99581af7L,
+        0x3076bc4064eeb051L },
+      { 0x5511c98c02dec312L,0x270de898238dcb78L,0x2cf4cf9c539c08c9L,
+        0xa70cb65e38d3b06eL } },
+    /* 52 << 119 */
+    { { 0xb12ec10ecfe57bbdL,0x82c7b65635a0c2b5L,0xddc7d5cd161c67bdL,
+        0xe32e8985ae3a32ccL },
+      { 0x7aba9444d11a5529L,0xe964ed022427fa1aL,0x1528392d24a1770aL,
+        0xa152ce2c12c72fcdL } },
+    /* 53 << 119 */
+    { { 0x714553a48ec07649L,0x18b4c290459dd453L,0xea32b7147b64b110L,
+        0xb871bfa52e6f07a2L },
+      { 0xb67112e59e2e3c9bL,0xfbf250e544aa90f6L,0xf77aedb8bd539006L,
+        0x3b0cdf9ad172a66fL } },
+    /* 54 << 119 */
+    { { 0xedf69feaf8c51187L,0x05bb67ec741e4da7L,0x47df0f3208114345L,
+        0x56facb07bb9792b1L },
+      { 0xf3e007e98f6229e4L,0x62d103f4526fba0fL,0x4f33bef7b0339d79L,
+        0x9841357bb59bfec1L } },
+    /* 55 << 119 */
+    { { 0xfa8dbb59c34e6705L,0xc3c7180b7fdaa84cL,0xf95872fca4108537L,
+        0x8750cc3b932a3e5aL },
+      { 0xb61cc69db7275d7dL,0xffa0168b2e59b2e9L,0xca032abc6ecbb493L,
+        0x1d86dbd32c9082d8L } },
+    /* 56 << 119 */
+    { { 0xae1e0b67e28ef5baL,0x2c9a4699cb18e169L,0x0ecd0e331e6bbd20L,
+        0x571b360eaf5e81d2L },
+      { 0xcd9fea58101c1d45L,0x6651788e18880452L,0xa99726351f8dd446L,
+        0x44bed022e37281d0L } },
+    /* 57 << 119 */
+    { { 0x094b2b2d33da525dL,0xf193678e13144fd8L,0xb8ab5ba4f4c1061dL,
+        0x4343b5fadccbe0f4L },
+      { 0xa870237163812713L,0x47bf6d2df7611d93L,0x46729b8cbd21e1d7L,
+        0x7484d4e0d629e77dL } },
+    /* 58 << 119 */
+    { { 0x830e6eea60dbac1fL,0x23d8c484da06a2f7L,0x896714b050ca535bL,
+        0xdc8d3644ebd97a9bL },
+      { 0x106ef9fab12177b4L,0xf79bf464534d5d9cL,0x2537a349a6ab360bL,
+        0xc7c54253a00c744fL } },
+    /* 59 << 119 */
+    { { 0xb3c7a047e5911a76L,0x61ffa5c8647f1ee7L,0x15aed36f8f56ab42L,
+        0x6a0d41b0a3ff9ac9L },
+      { 0x68f469f5cc30d357L,0xbe9adf816b72be96L,0x1cd926fe903ad461L,
+        0x7e89e38fcaca441bL } },
+    /* 60 << 119 */
+    { { 0xf0f82de5facf69d4L,0x363b7e764775344cL,0x6894f312b2e36d04L,
+        0x3c6cb4fe11d1c9a5L },
+      { 0x85d9c3394008e1f2L,0x5e9a85ea249f326cL,0xdc35c60a678c5e06L,
+        0xc08b944f9f86fba9L } },
+    /* 61 << 119 */
+    { { 0xde40c02c89f71f0fL,0xad8f3e31ff3da3c0L,0x3ea5096b42125dedL,
+        0x13879cbfa7379183L },
+      { 0x6f4714a56b306a0bL,0x359c2ea667646c5eL,0xfacf894307726368L,
+        0x07a5893565ff431eL } },
+    /* 62 << 119 */
+    { { 0x24d661d168754ab0L,0x801fce1d6f429a76L,0xc068a85fa58ce769L,
+        0xedc35c545d5eca2bL },
+      { 0xea31276fa3f660d1L,0xa0184ebeb8fc7167L,0x0f20f21a1d8db0aeL,
+        0xd96d095f56c35e12L } },
+    /* 63 << 119 */
+    { { 0xedf402b5f8c2a25bL,0x1bb772b9059204b6L,0x50cbeae219b4e34cL,
+        0x93109d803fa0845aL },
+      { 0x54f7ccf78ef59fb5L,0x3b438fe288070963L,0x9e28c65931f3ba9bL,
+        0x9cc31b46ead9da92L } },
+    /* 64 << 119 */
+    { { 0x3c2f0ba9b733aa5fL,0xdece47cbf05af235L,0xf8e3f715a2ac82a5L,
+        0xc97ba6412203f18aL },
+      { 0xc3af550409c11060L,0x56ea2c0546af512dL,0xfac28daff3f28146L,
+        0x87fab43a959ef494L } },
+    /* 0 << 126 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 126 */
+    { { 0x09891641d4c5105fL,0x1ae80f8e6d7fbd65L,0x9d67225fbee6bdb0L,
+        0x3b433b597fc4d860L },
+      { 0x44e66db693e85638L,0xf7b59252e3e9862fL,0xdb785157665c32ecL,
+        0x702fefd7ae362f50L } },
+    /* 2 << 126 */
+    { { 0x3754475d0fefb0c3L,0xd48fb56b46d7c35dL,0xa070b633363798a4L,
+        0xae89f3d28fdb98e6L },
+      { 0x970b89c86363d14cL,0x8981752167abd27dL,0x9bf7d47444d5a021L,
+        0xb3083bafcac72aeeL } },
+    /* 3 << 126 */
+    { { 0x389741debe949a44L,0x638e9388546a4fa5L,0x3fe6419ca0047bdcL,
+        0x7047f648aaea57caL },
+      { 0x54e48a9041fbab17L,0xda8e0b28576bdba2L,0xe807eebcc72afddcL,
+        0x07d3336df42577bfL } },
+    /* 4 << 126 */
+    { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L,
+        0x61d587d421d324f6L },
+      { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL,
+        0xfa11fe124621efbeL } },
+    /* 5 << 126 */
+    { { 0x047b772e81685d7bL,0x23f27d81bf34a976L,0xc27608e2915f48efL,
+        0x3b0b43faa521d5c3L },
+      { 0x7613fb2663ca7284L,0x7f5729b41d4db837L,0x87b14898583b526bL,
+        0x00b732a6bbadd3d1L } },
+    /* 6 << 126 */
+    { { 0x8e02f4262048e396L,0x436b50b6383d9de4L,0xf78d3481471e85adL,
+        0x8b01ea6ad005c8d6L },
+      { 0xd3c7afee97015c07L,0x46cdf1a94e3ba2aeL,0x7a42e50183d3a1d2L,
+        0xd54b5268b541dff4L } },
+    /* 7 << 126 */
+    { { 0x3f24cf304e23e9bcL,0x4387f816126e3624L,0x26a46a033b0b6d61L,
+        0xaf1bc8458b2d777cL },
+      { 0x25c401ba527de79cL,0x0e1346d44261bbb6L,0x4b96c44b287b4bc7L,
+        0x658493c75254562fL } },
+    /* 8 << 126 */
+    { { 0x23f949feb8a24a20L,0x17ebfed1f52ca53fL,0x9b691bbebcfb4853L,
+        0x5617ff6b6278a05dL },
+      { 0x241b34c5e3c99ebdL,0xfc64242e1784156aL,0x4206482f695d67dfL,
+        0xb967ce0eee27c011L } },
+    /* 9 << 126 */
+    { { 0x65db375121c80b5dL,0x2e7a563ca31ecca0L,0xe56ffc4e5238a07eL,
+        0x3d6c296632ced854L },
+      { 0xe99d7d1aaf70b885L,0xafc3bad92d686459L,0x9c78bf460cc8ba5bL,
+        0x5a43951918955aa3L } },
+    /* 10 << 126 */
+    { { 0xf8b517a85fe4e314L,0xe60234d0fcb8906fL,0xffe542acf2061b23L,
+        0x287e191f6b4cb59cL },
+      { 0x21857ddc09d877d8L,0x1c23478c14678941L,0xbbf0c056b6e05ea4L,
+        0x82da4b53b01594feL } },
+    /* 11 << 126 */
+    { { 0xf7526791fadb8608L,0x049e832d7b74cdf6L,0xa43581ccc2b90a34L,
+        0x73639eb89360b10cL },
+      { 0x4fba331fe1e4a71bL,0x6ffd6b938072f919L,0x6e53271c65679032L,
+        0x67206444f14272ceL } },
+    /* 12 << 126 */
+    { { 0xc0f734a3b2335834L,0x9526205a90ef6860L,0xcb8be71704e2bb0dL,
+        0x2418871e02f383faL },
+      { 0xd71776814082c157L,0xcc914ad029c20073L,0xf186c1ebe587e728L,
+        0x6fdb3c2261bcd5fdL } },
+    /* 13 << 126 */
+    { { 0x30d014a6f2f9f8e9L,0x963ece234fec49d2L,0x862025c59605a8d9L,
+        0x3987444519f8929aL },
+      { 0x01b6ff6512bf476aL,0x598a64d809cf7d91L,0xd7ec774993be56caL,
+        0x10899785cbb33615L } },
+    /* 14 << 126 */
+    { { 0xb8a092fd02eee3adL,0xa86b3d3530145270L,0x323d98c68512b675L,
+        0x4b8bc78562ebb40fL },
+      { 0x7d301f54413f9cdeL,0xa5e4fb4f2bab5664L,0x1d2b252d1cbfec23L,
+        0xfcd576bbe177120dL } },
+    /* 15 << 126 */
+    { { 0x04427d3e83731a34L,0x2bb9028eed836e8eL,0xb36acff8b612ca7cL,
+        0xb88fe5efd3d9c73aL },
+      { 0xbe2a6bc6edea4eb3L,0x43b93133488eec77L,0xf41ff566b17106e1L,
+        0x469e9172654efa32L } },
+    /* 16 << 126 */
+    { { 0xb4480f0441c23fa3L,0xb4712eb0c1989a2eL,0x3ccbba0f93a29ca7L,
+        0x6e205c14d619428cL },
+      { 0x90db7957b3641686L,0x0432691d45ac8b4eL,0x07a759acf64e0350L,
+        0x0514d89c9c972517L } },
+    /* 17 << 126 */
+    { { 0x1701147fa8e67fc3L,0x9e2e0b8bab2085beL,0xd5651824ac284e57L,
+        0x890d432574893664L },
+      { 0x8a7c5e6ec55e68a3L,0xbf12e90b4339c85aL,0x31846b85f922b655L,
+        0x9a54ce4d0bf4d700L } },
+    /* 18 << 126 */
+    { { 0xd7f4e83af1a14295L,0x916f955cb285d4f9L,0xe57bb0e099ffdabaL,
+        0x28a43034eab0d152L },
+      { 0x0a36ffa2b8a9cef8L,0x5517407eb9ec051aL,0x9c796096ea68e672L,
+        0x853db5fbfb3c77fbL } },
+    /* 19 << 126 */
+    { { 0x21474ba9e864a51aL,0x6c2676996e8a1b8bL,0x7c82362694120a28L,
+        0xe61e9a488383a5dbL },
+      { 0x7dd750039f84216dL,0xab020d07ad43cd85L,0x9437ae48da12c659L,
+        0x6449c2ebe65452adL } },
+    /* 20 << 126 */
+    { { 0xcc7c4c1c2cf9d7c1L,0x1320886aee95e5abL,0xbb7b9056beae170cL,
+        0xc8a5b250dbc0d662L },
+      { 0x4ed81432c11d2303L,0x7da669121f03769fL,0x3ac7a5fd84539828L,
+        0x14dada943bccdd02L } },
+    /* 21 << 126 */
+    { { 0x8b84c3217ef6b0d1L,0x52a9477a7c933f22L,0x5ef6728afd440b82L,
+        0x5c3bd8596ce4bd5eL },
+      { 0x918b80f5f22c2d3eL,0x368d5040b7bb6cc5L,0xb66142a12695a11cL,
+        0x60ac583aeb19ea70L } },
+    /* 22 << 126 */
+    { { 0x317cbb980eab2437L,0x8cc08c555e2654c8L,0xfe2d6520e6d8307fL,
+        0xe9f147f357428993L },
+      { 0x5f9c7d14d2fd6cf1L,0xa3ecd0642d4fcbb0L,0xad83fef08e7341f7L,
+        0x643f23a03a63115cL } },
+    /* 23 << 126 */
+    { { 0xd38a78abe65ab743L,0xbf7c75b135edc89cL,0x3dd8752e530df568L,
+        0xf85c4a76e308c682L },
+      { 0x4c9955b2e68acf37L,0xa544df3dab32af85L,0x4b8ec3f5a25cf493L,
+        0x4d8f27641a622febL } },
+    /* 24 << 126 */
+    { { 0x7bb4f7aaf0dcbc49L,0x7de551f970bbb45bL,0xcfd0f3e49f2ca2e5L,
+        0xece587091f5c76efL },
+      { 0x32920edd167d79aeL,0x039df8a2fa7d7ec1L,0xf46206c0bb30af91L,
+        0x1ff5e2f522676b59L } },
+    /* 25 << 126 */
+    { { 0x11f4a0396ea51d66L,0x506c1445807d7a26L,0x60da5705755a9b24L,
+        0x8fc8cc321f1a319eL },
+      { 0x83642d4d9433d67dL,0x7fa5cb8f6a7dd296L,0x576591db9b7bde07L,
+        0x13173d25419716fbL } },
+    /* 26 << 126 */
+    { { 0xea30599dd5b340ffL,0xfc6b5297b0fe76c5L,0x1c6968c8ab8f5adcL,
+        0xf723c7f5901c928dL },
+      { 0x4203c3219773d402L,0xdf7c6aa31b51dd47L,0x3d49e37a552be23cL,
+        0x57febee80b5a6e87L } },
+    /* 27 << 126 */
+    { { 0xc5ecbee47bd8e739L,0x79d44994ae63bf75L,0x168bd00f38fb8923L,
+        0x75d48ee4d0533130L },
+      { 0x554f77aadb5cdf33L,0x3396e8963c696769L,0x2fdddbf2d3fd674eL,
+        0xbbb8f6ee99d0e3e5L } },
+    /* 28 << 126 */
+    { { 0x51b90651cbae2f70L,0xefc4bc0593aaa8ebL,0x8ecd8689dd1df499L,
+        0x1aee99a822f367a5L },
+      { 0x95d485b9ae8274c5L,0x6c14d4457d30b39cL,0xbafea90bbcc1ef81L,
+        0x7c5f317aa459a2edL } },
+    /* 29 << 126 */
+    { { 0x012110754ef44227L,0xa17bed6edc20f496L,0x0cdfe424819853cdL,
+        0x13793298f71e2ce7L },
+      { 0x3c1f3078dbbe307bL,0x6dd1c20e76ee9936L,0x23ee4b57423caa20L,
+        0x4ac3793b8efb840eL } },
+    /* 30 << 126 */
+    { { 0x934438ebed1f8ca0L,0x3e5466584ebb25a2L,0xc415af0ec069896fL,
+        0xc13eddb09a5aa43dL },
+      { 0x7a04204fd49eb8f6L,0xd0d5bdfcd74f1670L,0x3697e28656fc0558L,
+        0x1020737101cebadeL } },
+    /* 31 << 126 */
+    { { 0x5f87e6900647a82bL,0x908e0ed48f40054fL,0xa9f633d479853803L,
+        0x8ed13c9a4a28b252L },
+      { 0x3e2ef6761f460f64L,0x53930b9b36d06336L,0x347073ac8fc4979bL,
+        0x84380e0e5ecd5597L } },
+    /* 32 << 126 */
+    { { 0xe3b22c6bc4fe3c39L,0xba4a81536c7bebdfL,0xf23ab6b725693459L,
+        0x53bc377014922b11L },
+      { 0x4645c8ab5afc60dbL,0xaa02235520b9f2a3L,0x52a2954cce0fc507L,
+        0x8c2731bb7ce1c2e7L } },
+    /* 33 << 126 */
+    { { 0xf39608ab18a0339dL,0xac7a658d3735436cL,0xb22c2b07cd992b4fL,
+        0x4e83daecf40dcfd4L },
+      { 0x8a34c7be2f39ea3eL,0xef0c005fb0a56d2eL,0x62731f6a6edd8038L,
+        0x5721d7404e3cb075L } },
+    /* 34 << 126 */
+    { { 0x1ea41511fbeeee1bL,0xd1ef5e73ef1d0c05L,0x42feefd173c07d35L,
+        0xe530a00a8a329493L },
+      { 0x5d55b7fef15ebfb0L,0x549de03cd322491aL,0xf7b5f602745b3237L,
+        0x3632a3a21ab6e2b6L } },
+    /* 35 << 126 */
+    { { 0x0d3bba890ef59f78L,0x0dfc6443c9e52b9aL,0x1dc7969972631447L,
+        0xef033917b3be20b1L },
+      { 0x0c92735db1383948L,0xc1fc29a2c0dd7d7dL,0x6485b697403ed068L,
+        0x13bfaab3aac93bdcL } },
+    /* 36 << 126 */
+    { { 0x410dc6a90deeaf52L,0xb003fb024c641c15L,0x1384978c5bc504c4L,
+        0x37640487864a6a77L },
+      { 0x05991bc6222a77daL,0x62260a575e47eb11L,0xc7af6613f21b432cL,
+        0x22f3acc9ab4953e9L } },
+    /* 37 << 126 */
+    { { 0x529349228e41d155L,0x4d0245683ac059efL,0xb02017554d884411L,
+        0xce8055cfa59a178fL },
+      { 0xcd77d1aff6204549L,0xa0a00a3ec7066759L,0x471071ef0272c229L,
+        0x009bcf6bd3c4b6b0L } },
+    /* 38 << 126 */
+    { { 0x2a2638a822305177L,0xd51d59df41645bbfL,0xa81142fdc0a7a3c0L,
+        0xa17eca6d4c7063eeL },
+      { 0x0bb887ed60d9dcecL,0xd6d28e5120ad2455L,0xebed6308a67102baL,
+        0x042c31148bffa408L } },
+    /* 39 << 126 */
+    { { 0xfd099ac58aa68e30L,0x7a6a3d7c1483513eL,0xffcc6b75ba2d8f0cL,
+        0x54dacf961e78b954L },
+      { 0xf645696fa4a9af89L,0x3a41194006ac98ecL,0x41b8b3f622a67a20L,
+        0x2d0b1e0f99dec626L } },
+    /* 40 << 126 */
+    { { 0x27c8919240be34e8L,0xc7162b3791907f35L,0x90188ec1a956702bL,
+        0xca132f7ddf93769cL },
+      { 0x3ece44f90e2025b4L,0x67aaec690c62f14cL,0xad74141822e3cc11L,
+        0xcf9b75c37ff9a50eL } },
+    /* 41 << 126 */
+    { { 0x02fa2b164d348272L,0xbd99d61a9959d56dL,0xbc4f19db18762916L,
+        0xcc7cce5049c1ac80L },
+      { 0x4d59ebaad846bd83L,0x8775a9dca9202849L,0x07ec4ae16e1f4ca9L,
+        0x27eb5875ba893f11L } },
+    /* 42 << 126 */
+    { { 0x00284d51662cc565L,0x82353a6b0db4138dL,0xd9c7aaaaaa32a594L,
+        0xf5528b5ea5669c47L },
+      { 0xf32202312f23c5ffL,0xe3e8147a6affa3a1L,0xfb423d5c202ddda0L,
+        0x3d6414ac6b871bd4L } },
+    /* 43 << 126 */
+    { { 0x586f82e1a51a168aL,0xb712c67148ae5448L,0x9a2e4bd176233eb8L,
+        0x0188223a78811ca9L },
+      { 0x553c5e21f7c18de1L,0x7682e451b27bb286L,0x3ed036b30e51e929L,
+        0xf487211bec9cb34fL } },
+    /* 44 << 126 */
+    { { 0x0d0942770c24efc8L,0x0349fd04bef737a4L,0x6d1c9dd2514cdd28L,
+        0x29c135ff30da9521L },
+      { 0xea6e4508f78b0b6fL,0x176f5dd2678c143cL,0x081484184be21e65L,
+        0x27f7525ce7df38c4L } },
+    /* 45 << 126 */
+    { { 0x1fb70e09748ab1a4L,0x9cba50a05efe4433L,0x7846c7a615f75af2L,
+        0x2a7c2c575ee73ea8L },
+      { 0x42e566a43f0a449aL,0x45474c3bad90fc3dL,0x7447be3d8b61d057L,
+        0x3e9d1cf13a4ec092L } },
+    /* 46 << 126 */
+    { { 0x1603e453f380a6e6L,0x0b86e4319b1437c2L,0x7a4173f2ef29610aL,
+        0x8fa729a7f03d57f7L },
+      { 0x3e186f6e6c9c217eL,0xbe1d307991919524L,0x92a62a70153d4fb1L,
+        0x32ed3e34d68c2f71L } },
+    /* 47 << 126 */
+    { { 0xd785027f9eb1a8b7L,0xbc37eb77c5b22fe8L,0x466b34f0b9d6a191L,
+        0x008a89af9a05f816L },
+      { 0x19b028fb7d42c10aL,0x7fe8c92f49b3f6b8L,0x58907cc0a5a0ade3L,
+        0xb3154f51559d1a7cL } },
+    /* 48 << 126 */
+    { { 0x5066efb6d9790ed6L,0xa77a0cbca6aa793bL,0x1a915f3c223e042eL,
+        0x1c5def0469c5874bL },
+      { 0x0e83007873b6c1daL,0x55cf85d2fcd8557aL,0x0f7c7c760460f3b1L,
+        0x87052acb46e58063L } },
+    /* 49 << 126 */
+    { { 0x09212b80907eae66L,0x3cb068e04d721c89L,0xa87941aedd45ac1cL,
+        0xde8d5c0d0daa0dbbL },
+      { 0xda421fdce3502e6eL,0xc89442014d89a084L,0x7307ba5ef0c24bfbL,
+        0xda212beb20bde0efL } },
+    /* 50 << 126 */
+    { { 0xea2da24bf82ce682L,0x058d381607f71fe4L,0x35a024625ffad8deL,
+        0xcd7b05dcaadcefabL },
+      { 0xd442f8ed1d9f54ecL,0x8be3d618b2d3b5caL,0xe2220ed0e06b2ce2L,
+        0x82699a5f1b0da4c0L } },
+    /* 51 << 126 */
+    { { 0x3ff106f571c0c3a7L,0x8f580f5a0d34180cL,0x4ebb120e22d7d375L,
+        0x5e5782cce9513675L },
+      { 0x2275580c99c82a70L,0xe8359fbf15ea8c4cL,0x53b48db87b415e70L,
+        0xaacf2240100c6014L } },
+    /* 52 << 126 */
+    { { 0x9faaccf5e4652f1dL,0xbd6fdd2ad56157b2L,0xa4f4fb1f6261ec50L,
+        0x244e55ad476bcd52L },
+      { 0x881c9305047d320bL,0x1ca983d56181263fL,0x354e9a44278fb8eeL,
+        0xad2dbc0f396e4964L } },
+    /* 53 << 126 */
+    { { 0x723f3aa29268b3deL,0x0d1ca29ae6e0609aL,0x794866aa6cf44252L,
+        0x0b59f3e301af87edL },
+      { 0xe234e5ff7f4a6c51L,0xa8768fd261dc2f7eL,0xdafc73320a94d81fL,
+        0xd7f8428206938ce1L } },
+    /* 54 << 126 */
+    { { 0xae0b3c0e0546063eL,0x7fbadcb25d61abc6L,0xd5d7a2c9369ac400L,
+        0xa5978d09ae67d10cL },
+      { 0x290f211e4f85eaacL,0xe61e2ad1facac681L,0xae125225388384cdL,
+        0xa7fb68e9ccfde30fL } },
+    /* 55 << 126 */
+    { { 0x7a59b9363daed4c2L,0x80a9aa402606f789L,0xb40c1ea5f6a6d90aL,
+        0x948364d3514d5885L },
+      { 0x062ebc6070985182L,0xa6db5b0e33310895L,0x64a12175e329c2f5L,
+        0xc5f25bd290ea237eL } },
+    /* 56 << 126 */
+    { { 0x7915c5242d0a4c23L,0xeb5d26e46bb3cc52L,0x369a9116c09e2c92L,
+        0x0c527f92cf182cf8L },
+      { 0x9e5919382aede0acL,0xb29222086cc34939L,0x3c9d896299a34361L,
+        0x3c81836dc1905fe6L } },
+    /* 57 << 126 */
+    { { 0x4bfeb57fa001ec5aL,0xe993f5bba0dc5dbaL,0x47884109724a1380L,
+        0x8a0369ab32fe9a04L },
+      { 0xea068d608c927db8L,0xbf5f37cf94655741L,0x47d402a204b6c7eaL,
+        0x4551c2956af259cbL } },
+    /* 58 << 126 */
+    { { 0x698b71e7ed77ee8bL,0xbddf7bd0f309d5c7L,0x6201c22c34e780caL,
+        0xab04f7d84c295ef4L },
+      { 0x1c9472944313a8ceL,0xe532e4ac92ca4cfeL,0x89738f80d0a7a97aL,
+        0xec088c88a580fd5bL } },
+    /* 59 << 126 */
+    { { 0x612b1ecc42ce9e51L,0x8f9840fdb25fdd2aL,0x3cda78c001e7f839L,
+        0x546b3d3aece05480L },
+      { 0x271719a980d30916L,0x45497107584c20c4L,0xaf8f94785bc78608L,
+        0x28c7d484277e2a4cL } },
+    /* 60 << 126 */
+    { { 0xfce0176788a2ffe4L,0xdc506a3528e169a5L,0x0ea108617af9c93aL,
+        0x1ed2436103fa0e08L },
+      { 0x96eaaa92a3d694e7L,0xc0f43b4def50bc74L,0xce6aa58c64114db4L,
+        0x8218e8ea7c000fd4L } },
+    /* 61 << 126 */
+    { { 0xac815dfb185f8844L,0xcd7e90cb1557abfbL,0x23d16655afbfecdfL,
+        0x80f3271f085cac4aL },
+      { 0x7fc39aa7d0e62f47L,0x88d519d1460a48e5L,0x59559ac4d28f101eL,
+        0x7981d9e9ca9ae816L } },
+    /* 62 << 126 */
+    { { 0x5c38652c9ac38203L,0x86eaf87f57657fe5L,0x568fc472e21f5416L,
+        0x2afff39ce7e597b5L },
+      { 0x3adbbb07256d4eabL,0x225986928285ab89L,0x35f8112a041caefeL,
+        0x95df02e3a5064c8bL } },
+    /* 63 << 126 */
+    { { 0x4d63356ec7004bf3L,0x230a08f4db83c7deL,0xca27b2708709a7b7L,
+        0x0d1c4cc4cb9abd2dL },
+      { 0x8a0bc66e7550fee8L,0x369cd4c79cf7247eL,0x75562e8492b5b7e7L,
+        0x8fed0da05802af7bL } },
+    /* 64 << 126 */
+    { { 0x6a7091c2e48fb889L,0x26882c137b8a9d06L,0xa24986631b82a0e2L,
+        0x844ed7363518152dL },
+      { 0x282f476fd86e27c7L,0xa04edaca04afefdcL,0x8b256ebc6119e34dL,
+        0x56a413e90787d78bL } },
+    /* 0 << 133 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 133 */
+    { { 0x82ee061d5a74be50L,0xe41781c4dea16ff5L,0xe0b0c81e99bfc8a2L,
+        0x624f4d690b547e2dL },
+      { 0x3a83545dbdcc9ae4L,0x2573dbb6409b1e8eL,0x482960c4a6c93539L,
+        0xf01059ad5ae18798L } },
+    /* 2 << 133 */
+    { { 0x715c9f973112795fL,0xe8244437984e6ee1L,0x55cb4858ecb66bcdL,
+        0x7c136735abaffbeeL },
+      { 0x546615955dbec38eL,0x51c0782c388ad153L,0x9ba4c53ac6e0952fL,
+        0x27e6782a1b21dfa8L } },
+    /* 3 << 133 */
+    { { 0x682f903d4ed2dbc2L,0x0eba59c87c3b2d83L,0x8e9dc84d9c7e9335L,
+        0x5f9b21b00eb226d7L },
+      { 0xe33bd394af267baeL,0xaa86cc25be2e15aeL,0x4f0bf67d6a8ec500L,
+        0x5846aa44f9630658L } },
+    /* 4 << 133 */
+    { { 0xfeb09740e2c2bf15L,0x627a2205a9e99704L,0xec8d73d0c2fbc565L,
+        0x223eed8fc20c8de8L },
+      { 0x1ee32583a8363b49L,0x1a0b6cb9c9c2b0a6L,0x49f7c3d290dbc85cL,
+        0xa8dfbb971ef4c1acL } },
+    /* 5 << 133 */
+    { { 0xafb34d4c65c7c2abL,0x1d4610e7e2c5ea84L,0x893f6d1b973c4ab5L,
+        0xa3cdd7e9945ba5c4L },
+      { 0x60514983064417eeL,0x1459b23cad6bdf2bL,0x23b2c3415cf726c3L,
+        0x3a82963532d6354aL } },
+    /* 6 << 133 */
+    { { 0x294f901fab192c18L,0xec5fcbfe7030164fL,0xe2e2fcb7e2246ba6L,
+        0x1e7c88b3221a1a0cL },
+      { 0x72c7dd93c92d88c5L,0x41c2148e1106fb59L,0x547dd4f5a0f60f14L,
+        0xed9b52b263960f31L } },
+    /* 7 << 133 */
+    { { 0x6c8349ebb0a5b358L,0xb154c5c29e7e2ed6L,0xcad5eccfeda462dbL,
+        0xf2d6dbe42de66b69L },
+      { 0x426aedf38665e5b2L,0x488a85137b7f5723L,0x15cc43b38bcbb386L,
+        0x27ad0af3d791d879L } },
+    /* 8 << 133 */
+    { { 0xc16c236e846e364fL,0x7f33527cdea50ca0L,0xc48107750926b86dL,
+        0x6c2a36090598e70cL },
+      { 0xa6755e52f024e924L,0xe0fa07a49db4afcaL,0x15c3ce7d66831790L,
+        0x5b4ef350a6cbb0d6L } },
+    /* 9 << 133 */
+    { { 0x2c4aafc4b6205969L,0x42563f02f6c7854fL,0x016aced51d983b48L,
+        0xfeb356d899949755L },
+      { 0x8c2a2c81d1a39bd7L,0x8f44340fe6934ae9L,0x148cf91c447904daL,
+        0x7340185f0f51a926L } },
+    /* 10 << 133 */
+    { { 0x2f8f00fb7409ab46L,0x057e78e680e289b2L,0x03e5022ca888e5d1L,
+        0x3c87111a9dede4e2L },
+      { 0x5b9b0e1c7809460bL,0xe751c85271c9abc7L,0x8b944e28c7cc1dc9L,
+        0x4f201ffa1d3cfa08L } },
+    /* 11 << 133 */
+    { { 0x02fc905c3e6721ceL,0xd52d70dad0b3674cL,0x5dc2e5ca18810da4L,
+        0xa984b2735c69dd99L },
+      { 0x63b9252784de5ca4L,0x2f1c9872c852dec4L,0x18b03593c2e3de09L,
+        0x19d70b019813dc2fL } },
+    /* 12 << 133 */
+    { { 0x42806b2da6dc1d29L,0xd3030009f871e144L,0xa1feb333aaf49276L,
+        0xb5583b9ec70bc04bL },
+      { 0x1db0be7895695f20L,0xfc84181189d012b5L,0x6409f27205f61643L,
+        0x40d34174d5883128L } },
+    /* 13 << 133 */
+    { { 0xd79196f567419833L,0x6059e252863b7b08L,0x84da18171c56700cL,
+        0x5758ee56b28d3ec4L },
+      { 0x7da2771d013b0ea6L,0xfddf524b54c5e9b9L,0x7df4faf824305d80L,
+        0x58f5c1bf3a97763fL } },
+    /* 14 << 133 */
+    { { 0xa5af37f17c696042L,0xd4cba22c4a2538deL,0x211cb9959ea42600L,
+        0xcd105f417b069889L },
+      { 0xb1e1cf19ddb81e74L,0x472f2d895157b8caL,0x086fb008ee9db885L,
+        0x365cd5700f26d131L } },
+    /* 15 << 133 */
+    { { 0x284b02bba2be7053L,0xdcbbf7c67ab9a6d6L,0x4425559c20f7a530L,
+        0x961f2dfa188767c8L },
+      { 0xe2fd943570dc80c4L,0x104d6b63f0784120L,0x7f592bc153567122L,
+        0xf6bc1246f688ad77L } },
+    /* 16 << 133 */
+    { { 0x05214c050f15dde9L,0xa47a76a80d5f2b82L,0xbb254d3062e82b62L,
+        0x11a05fe03ec955eeL },
+      { 0x7eaff46e9d529b36L,0x55ab13018f9e3df6L,0xc463e37199317698L,
+        0xfd251438ccda47adL } },
+    /* 17 << 133 */
+    { { 0xca9c354723d695eaL,0x48ce626e16e589b5L,0x6b5b64c7b187d086L,
+        0xd02e1794b2207948L },
+      { 0x8b58e98f7198111dL,0x90ca6305dcf9c3ccL,0x5691fe72f34089b0L,
+        0x60941af1fc7c80ffL } },
+    /* 18 << 133 */
+    { { 0xa09bc0a222eb51e5L,0xc0bb7244aa9cf09aL,0x36a8077f80159f06L,
+        0x8b5c989edddc560eL },
+      { 0x19d2f316512e1f43L,0x02eac554ad08ff62L,0x012ab84c07d20b4eL,
+        0x37d1e115d6d4e4e1L } },
+    /* 19 << 133 */
+    { { 0xb6443e1aab7b19a8L,0xf08d067edef8cd45L,0x63adf3e9685e03daL,
+        0xcf15a10e4792b916L },
+      { 0xf44bcce5b738a425L,0xebe131d59636b2fdL,0x940688417850d605L,
+        0x09684eaab40d749dL } },
+    /* 20 << 133 */
+    { { 0x8c3c669c72ba075bL,0x89f78b55ba469015L,0x5706aade3e9f8ba8L,
+        0x6d8bd565b32d7ed7L },
+      { 0x25f4e63b805f08d6L,0x7f48200dc3bcc1b5L,0x4e801968b025d847L,
+        0x74afac0487cbe0a8L } },
+    /* 21 << 133 */
+    { { 0x43ed2c2b7e63d690L,0xefb6bbf00223cdb8L,0x4fec3cae2884d3feL,
+        0x065ecce6d75e25a4L },
+      { 0x6c2294ce69f79071L,0x0d9a8e5f044b8666L,0x5009f23817b69d8fL,
+        0x3c29f8fec5dfdaf7L } },
+    /* 22 << 133 */
+    { { 0x9067528febae68c4L,0x5b38563230c5ba21L,0x540df1191fdd1aecL,
+        0xcf37825bcfba4c78L },
+      { 0x77eff980beb11454L,0x40a1a99160c1b066L,0xe8018980f889a1c7L,
+        0xb9c52ae976c24be0L } },
+    /* 23 << 133 */
+    { { 0x05fbbcce45650ef4L,0xae000f108aa29ac7L,0x884b71724f04c470L,
+        0x7cd4fde219bb5c25L },
+      { 0x6477b22ae8840869L,0xa88688595fbd0686L,0xf23cc02e1116dfbaL,
+        0x76cd563fd87d7776L } },
+    /* 24 << 133 */
+    { { 0xe2a37598a9d82abfL,0x5f188ccbe6c170f5L,0x816822005066b087L,
+        0xda22c212c7155adaL },
+      { 0x151e5d3afbddb479L,0x4b606b846d715b99L,0x4a73b54bf997cb2eL,
+        0x9a1bfe433ecd8b66L } },
+    /* 25 << 133 */
+    { { 0x1c3128092a67d48aL,0xcd6a671e031fa9e2L,0xbec3312a0e43a34aL,
+        0x1d93563955ef47d3L },
+      { 0x5ea024898fea73eaL,0x8247b364a035afb2L,0xb58300a65265b54cL,
+        0x3286662f722c7148L } },
+    /* 26 << 133 */
+    { { 0xb77fd76bb4ec4c20L,0xf0a12fa70f3fe3fdL,0xf845bbf541d8c7e8L,
+        0xe4d969ca5ec10aa8L },
+      { 0x4c0053b743e232a3L,0xdc7a3fac37f8a45aL,0x3c4261c520d81c8fL,
+        0xfd4b3453b00eab00L } },
+    /* 27 << 133 */
+    { { 0x76d48f86d36e3062L,0x626c5277a143ff02L,0x538174deaf76f42eL,
+        0x2267aa866407ceacL },
+      { 0xfad7635172e572d5L,0xab861af7ba7330ebL,0xa0a1c8c7418d8657L,
+        0x988821cb20289a52L } },
+    /* 28 << 133 */
+    { { 0x79732522cccc18adL,0xaadf3f8df1a6e027L,0xf7382c9317c2354dL,
+        0x5ce1680cd818b689L },
+      { 0x359ebbfcd9ecbee9L,0x4330689c1cae62acL,0xb55ce5b4c51ac38aL,
+        0x7921dfeafe238ee8L } },
+    /* 29 << 133 */
+    { { 0x3972bef8271d1ca5L,0x3e423bc7e8aabd18L,0x57b09f3f44a3e5e3L,
+        0x5da886ae7b444d66L },
+      { 0x68206634a9964375L,0x356a2fa3699cd0ffL,0xaf0faa24dba515e9L,
+        0x536e1f5cb321d79aL } },
+    /* 30 << 133 */
+    { { 0xd3b9913a5c04e4eaL,0xd549dcfed6f11513L,0xee227bf579fd1d94L,
+        0x9f35afeeb43f2c67L },
+      { 0xd2638d24f1314f53L,0x62baf948cabcd822L,0x5542de294ef48db0L,
+        0xb3eb6a04fc5f6bb2L } },
+    /* 31 << 133 */
+    { { 0x23c110ae1208e16aL,0x1a4d15b5f8363e24L,0x30716844164be00bL,
+        0xa8e24824f6f4690dL },
+      { 0x548773a290b170cfL,0xa1bef33142f191f4L,0x70f418d09247aa97L,
+        0xea06028e48be9147L } },
+    /* 32 << 133 */
+    { { 0xe13122f3dbfb894eL,0xbe9b79f6ce274b18L,0x85a49de5ca58aadfL,
+        0x2495775811487351L },
+      { 0x111def61bb939099L,0x1d6a974a26d13694L,0x4474b4ced3fc253bL,
+        0x3a1485e64c5db15eL } },
+    /* 33 << 133 */
+    { { 0xe79667b4147c15b4L,0xe34f553b7bc61301L,0x032b80f817094381L,
+        0x55d8bafd723eaa21L },
+      { 0x5a987995f1c0e74eL,0x5a9b292eebba289cL,0x413cd4b2eb4c8251L,
+        0x98b5d243d162db0aL } },
+    /* 34 << 133 */
+    { { 0xbb47bf6668342520L,0x08d68949baa862d1L,0x11f349c7e906abcdL,
+        0x454ce985ed7bf00eL },
+      { 0xacab5c9eb55b803bL,0xb03468ea31e3c16dL,0x5c24213dd273bf12L,
+        0x211538eb71587887L } },
+    /* 35 << 133 */
+    { { 0x198e4a2f731dea2dL,0xd5856cf274ed7b2aL,0x86a632eb13a664feL,
+        0x932cd909bda41291L },
+      { 0x850e95d4c0c4ddc0L,0xc0f422f8347fc2c9L,0xe68cbec486076bcbL,
+        0xf9e7c0c0cd6cd286L } },
+    /* 36 << 133 */
+    { { 0x65994ddb0f5f27caL,0xe85461fba80d59ffL,0xff05481a66601023L,
+        0xc665427afc9ebbfbL },
+      { 0xb0571a697587fd52L,0x935289f88d49efceL,0x61becc60ea420688L,
+        0xb22639d913a786afL } },
+    /* 37 << 133 */
+    { { 0x1a8e6220361ecf90L,0x001f23e025506463L,0xe4ae9b5d0a5c2b79L,
+        0xebc9cdadd8149db5L },
+      { 0xb33164a1934aa728L,0x750eb00eae9b60f3L,0x5a91615b9b9cfbfdL,
+        0x97015cbfef45f7f6L } },
+    /* 38 << 133 */
+    { { 0xb462c4a5bf5151dfL,0x21adcc41b07118f2L,0xd60c545b043fa42cL,
+        0xfc21aa54e96be1abL },
+      { 0xe84bc32f4e51ea80L,0x3dae45f0259b5d8dL,0xbb73c7ebc38f1b5eL,
+        0xe405a74ae8ae617dL } },
+    /* 39 << 133 */
+    { { 0xbb1ae9c69f1c56bdL,0x8c176b9849f196a4L,0xc448f3116875092bL,
+        0xb5afe3de9f976033L },
+      { 0xa8dafd49145813e5L,0x687fc4d9e2b34226L,0xf2dfc92d4c7ff57fL,
+        0x004e3fc1401f1b46L } },
+    /* 40 << 133 */
+    { { 0x5afddab61430c9abL,0x0bdd41d32238e997L,0xf0947430418042aeL,
+        0x71f9addacdddc4cbL },
+      { 0x7090c016c52dd907L,0xd9bdf44d29e2047fL,0xe6f1fe801b1011a6L,
+        0xb63accbcd9acdc78L } },
+    /* 41 << 133 */
+    { { 0xcfc7e2351272a95bL,0x0c667717a6276ac8L,0x3c0d3709e2d7eef7L,
+        0x5add2b069a685b3eL },
+      { 0x363ad32d14ea5d65L,0xf8e01f068d7dd506L,0xc9ea221375b4aac6L,
+        0xed2a2bf90d353466L } },
+    /* 42 << 133 */
+    { { 0x439d79b5e9d3a7c3L,0x8e0ee5a681b7f34bL,0xcf3dacf51dc4ba75L,
+        0x1d3d1773eb3310c7L },
+      { 0xa8e671127747ae83L,0x31f43160197d6b40L,0x0521cceecd961400L,
+        0x67246f11f6535768L } },
+    /* 43 << 133 */
+    { { 0x702fcc5aef0c3133L,0x247cc45d7e16693bL,0xfd484e49c729b749L,
+        0x522cef7db218320fL },
+      { 0xe56ef40559ab93b3L,0x225fba119f181071L,0x33bd659515330ed0L,
+        0xc4be69d51ddb32f7L } },
+    /* 44 << 133 */
+    { { 0x264c76680448087cL,0xac30903f71432daeL,0x3851b26600f9bf47L,
+        0x400ed3116cdd6d03L },
+      { 0x045e79fef8fd2424L,0xfdfd974afa6da98bL,0x45c9f6410c1e673aL,
+        0x76f2e7335b2c5168L } },
+    /* 45 << 133 */
+    { { 0x1adaebb52a601753L,0xb286514cc57c2d49L,0xd87696701e0bfd24L,
+        0x950c547e04478922L },
+      { 0xd1d41969e5d32bfeL,0x30bc1472750d6c3eL,0x8f3679fee0e27f3aL,
+        0x8f64a7dca4a6ee0cL } },
+    /* 46 << 133 */
+    { { 0x2fe59937633dfb1fL,0xea82c395977f2547L,0xcbdfdf1a661ea646L,
+        0xc7ccc591b9085451L },
+      { 0x8217796281761e13L,0xda57596f9196885cL,0xbc17e84928ffbd70L,
+        0x1e6e0a412671d36fL } },
+    /* 47 << 133 */
+    { { 0x61ae872c4152fcf5L,0x441c87b09e77e754L,0xd0799dd5a34dff09L,
+        0x766b4e4488a6b171L },
+      { 0xdc06a51211f1c792L,0xea02ae934be35c3eL,0xe5ca4d6de90c469eL,
+        0x4df4368e56e4ff5cL } },
+    /* 48 << 133 */
+    { { 0x7817acab4baef62eL,0x9f5a2202a85b91e8L,0x9666ebe66ce57610L,
+        0x32ad31f3f73bfe03L },
+      { 0x628330a425bcf4d6L,0xea950593515056e6L,0x59811c89e1332156L,
+        0xc89cf1fe8c11b2d7L } },
+    /* 49 << 133 */
+    { { 0x75b6391304e60cc0L,0xce811e8d4625d375L,0x030e43fc2d26e562L,
+        0xfbb30b4b608d36a0L },
+      { 0x634ff82c48528118L,0x7c6fe085cd285911L,0x7f2830c099358f28L,
+        0x2e60a95e665e6c09L } },
+    /* 50 << 133 */
+    { { 0x08407d3d9b785dbfL,0x530889aba759bce7L,0xf228e0e652f61239L,
+        0x2b6d14616879be3cL },
+      { 0xe6902c0451a7bbf7L,0x30ad99f076f24a64L,0x66d9317a98bc6da0L,
+        0xf4f877f3cb596ac0L } },
+    /* 51 << 133 */
+    { { 0xb05ff62d4c44f119L,0x4555f536e9b77416L,0xc7c0d0598caed63bL,
+        0x0cd2b7cec358b2a9L },
+      { 0x3f33287b46945fa3L,0xf8785b20d67c8791L,0xc54a7a619637bd08L,
+        0x54d4598c18be79d7L } },
+    /* 52 << 133 */
+    { { 0x889e5acbc46d7ce1L,0x9a515bb78b085877L,0xfac1a03d0b7a5050L,
+        0x7d3e738af2926035L },
+      { 0x861cc2ce2a6cb0ebL,0x6f2e29558f7adc79L,0x61c4d45133016376L,
+        0xd9fd2c805ad59090L } },
+    /* 53 << 133 */
+    { { 0xe5a83738b2b836a1L,0x855b41a07c0d6622L,0x186fe3177cc19af1L,
+        0x6465c1fffdd99acbL },
+      { 0x46e5c23f6974b99eL,0x75a7cf8ba2717cbeL,0x4d2ebc3f062be658L,
+        0x094b44475f209c98L } },
+    /* 54 << 133 */
+    { { 0x4af285edb940cb5aL,0x6706d7927cc82f10L,0xc8c8776c030526faL,
+        0xfa8e6f76a0da9140L },
+      { 0x77ea9d34591ee4f0L,0x5f46e33740274166L,0x1bdf98bbea671457L,
+        0xd7c08b46862a1fe2L } },
+    /* 55 << 133 */
+    { { 0x46cc303c1c08ad63L,0x995434404c845e7bL,0x1b8fbdb548f36bf7L,
+        0x5b82c3928c8273a7L },
+      { 0x08f712c4928435d5L,0x071cf0f179330380L,0xc74c2d24a8da054aL,
+        0xcb0e720143c46b5cL } },
+    /* 56 << 133 */
+    { { 0x0ad7337ac0b7eff3L,0x8552225ec5e48b3cL,0xe6f78b0c73f13a5fL,
+        0x5e70062e82349cbeL },
+      { 0x6b8d5048e7073969L,0x392d2a29c33cb3d2L,0xee4f727c4ecaa20fL,
+        0xa068c99e2ccde707L } },
+    /* 57 << 133 */
+    { { 0xfcd5651fb87a2913L,0xea3e3c153cc252f0L,0x777d92df3b6cd3e4L,
+        0x7a414143c5a732e7L },
+      { 0xa895951aa71ff493L,0xfe980c92bbd37cf6L,0x45bd5e64decfeeffL,
+        0x910dc2a9a44c43e9L } },
+    /* 58 << 133 */
+    { { 0xcb403f26cca9f54dL,0x928bbdfb9303f6dbL,0x3c37951ea9eee67cL,
+        0x3bd61a52f79961c3L },
+      { 0x09a238e6395c9a79L,0x6940ca2d61eb352dL,0x7d1e5c5ec1875631L,
+        0x1e19742c1e1b20d1L } },
+    /* 59 << 133 */
+    { { 0x4633d90823fc2e6eL,0xa76e29a908959149L,0x61069d9c84ed7da5L,
+        0x0baa11cf5dbcad51L },
+      { 0xd01eec64961849daL,0x93b75f1faf3d8c28L,0x57bc4f9f1ca2ee44L,
+        0x5a26322d00e00558L } },
+    /* 60 << 133 */
+    { { 0x1888d65861a023efL,0x1d72aab4b9e5246eL,0xa9a26348e5563ec0L,
+        0xa0971963c3439a43L },
+      { 0x567dd54badb9b5b7L,0x73fac1a1c45a524bL,0x8fe97ef7fe38e608L,
+        0x608748d23f384f48L } },
+    /* 61 << 133 */
+    { { 0xb0571794c486094fL,0x869254a38bf3a8d6L,0x148a8dd1310b0e25L,
+        0x99ab9f3f9aa3f7d8L },
+      { 0x0927c68a6706c02eL,0x22b5e76c69790e6cL,0x6c3252606c71376cL,
+        0x53a5769009ef6657L } },
+    /* 62 << 133 */
+    { { 0x8d63f852edffcf3aL,0xb4d2ed043c0a6f55L,0xdb3aa8de12519b9eL,
+        0x5d38e9c41e0a569aL },
+      { 0x871528bf303747e2L,0xa208e77cf5b5c18dL,0x9d129c88ca6bf923L,
+        0xbcbf197fbf02839fL } },
+    /* 63 << 133 */
+    { { 0x9b9bf03027323194L,0x3b055a8b339ca59dL,0xb46b23120f669520L,
+        0x19789f1f497e5f24L },
+      { 0x9c499468aaf01801L,0x72ee11908b69d59cL,0x8bd39595acf4c079L,
+        0x3ee11ece8e0cd048L } },
+    /* 64 << 133 */
+    { { 0xebde86ec1ed66f18L,0x225d906bd61fce43L,0x5cab07d6e8bed74dL,
+        0x16e4617f27855ab7L },
+      { 0x6568aaddb2fbc3ddL,0xedb5484f8aeddf5bL,0x878f20e86dcf2fadL,
+        0x3516497c615f5699L } },
+    /* 0 << 140 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 140 */
+    { { 0xef0a3fecfa181e69L,0x9ea02f8130d69a98L,0xb2e9cf8e66eab95dL,
+        0x520f2beb24720021L },
+      { 0x621c540a1df84361L,0x1203772171fa6d5dL,0x6e3c7b510ff5f6ffL,
+        0x817a069babb2bef3L } },
+    /* 2 << 140 */
+    { { 0x83572fb6b294cda6L,0x6ce9bf75b9039f34L,0x20e012f0095cbb21L,
+        0xa0aecc1bd063f0daL },
+      { 0x57c21c3af02909e5L,0xc7d59ecf48ce9cdcL,0x2732b8448ae336f8L,
+        0x056e37233f4f85f4L } },
+    /* 3 << 140 */
+    { { 0x8a10b53189e800caL,0x50fe0c17145208fdL,0x9e43c0d3b714ba37L,
+        0x427d200e34189accL },
+      { 0x05dee24fe616e2c0L,0x9c25f4c8ee1854c1L,0x4d3222a58f342a73L,
+        0x0807804fa027c952L } },
+    /* 4 << 140 */
+    { { 0xc222653a4f0d56f3L,0x961e4047ca28b805L,0x2c03f8b04a73434bL,
+        0x4c966787ab712a19L },
+      { 0xcc196c42864fee42L,0xc1be93da5b0ece5cL,0xa87d9f22c131c159L,
+        0x2bb6d593dce45655L } },
+    /* 5 << 140 */
+    { { 0x22c49ec9b809b7ceL,0x8a41486be2c72c2cL,0x813b9420fea0bf36L,
+        0xb3d36ee9a66dac69L },
+      { 0x6fddc08a328cc987L,0x0a3bcd2c3a326461L,0x7103c49dd810dbbaL,
+        0xf9d81a284b78a4c4L } },
+    /* 6 << 140 */
+    { { 0x3de865ade4d55941L,0xdedafa5e30384087L,0x6f414abb4ef18b9bL,
+        0x9ee9ea42faee5268L },
+      { 0x260faa1637a55a4aL,0xeb19a514015f93b9L,0x51d7ebd29e9c3598L,
+        0x523fc56d1932178eL } },
+    /* 7 << 140 */
+    { { 0x501d070cb98fe684L,0xd60fbe9a124a1458L,0xa45761c892bc6b3fL,
+        0xf5384858fe6f27cbL },
+      { 0x4b0271f7b59e763bL,0x3d4606a95b5a8e5eL,0x1eda5d9b05a48292L,
+        0xda7731d0e6fec446L } },
+    /* 8 << 140 */
+    { { 0xa3e3369390d45871L,0xe976404006166d8dL,0xb5c3368289a90403L,
+        0x4bd1798372f1d637L },
+      { 0xa616679ed5d2c53aL,0x5ec4bcd8fdcf3b87L,0xae6d7613b66a694eL,
+        0x7460fc76e3fc27e5L } },
+    /* 9 << 140 */
+    { { 0x70469b8295caabeeL,0xde024ca5889501e3L,0x6bdadc06076ed265L,
+        0x0cb1236b5a0ef8b2L },
+      { 0x4065ddbf0972ebf9L,0xf1dd387522aca432L,0xa88b97cf744aff76L,
+        0xd1359afdfe8e3d24L } },
+    /* 10 << 140 */
+    { { 0x52a3ba2b91502cf3L,0x2c3832a8084db75dL,0x04a12dddde30b1c9L,
+        0x7802eabce31fd60cL },
+      { 0x33707327a37fddabL,0x65d6f2abfaafa973L,0x3525c5b811e6f91aL,
+        0x76aeb0c95f46530bL } },
+    /* 11 << 140 */
+    { { 0xe8815ff62f93a675L,0xa6ec968405f48679L,0x6dcbb556358ae884L,
+        0x0af61472e19e3873L },
+      { 0x72334372a5f696beL,0xc65e57ea6f22fb70L,0x268da30c946cea90L,
+        0x136a8a8765681b2aL } },
+    /* 12 << 140 */
+    { { 0xad5e81dc0f9f44d4L,0xf09a69602c46585aL,0xd1649164c447d1b1L,
+        0x3b4b36c8879dc8b1L },
+      { 0x20d4177b3b6b234cL,0x096a25051730d9d0L,0x0611b9b8ef80531dL,
+        0xba904b3b64bb495dL } },
+    /* 13 << 140 */
+    { { 0x1192d9d493a3147aL,0x9f30a5dc9a565545L,0x90b1f9cb6ef07212L,
+        0x299585460d87fc13L },
+      { 0xd3323effc17db9baL,0xcb18548ccb1644a8L,0x18a306d44f49ffbcL,
+        0x28d658f14c2e8684L } },
+    /* 14 << 140 */
+    { { 0x44ba60cda99f8c71L,0x67b7abdb4bf742ffL,0x66310f9c914b3f99L,
+        0xae430a32f412c161L },
+      { 0x1e6776d388ace52fL,0x4bc0fa2452d7067dL,0x03c286aa8f07cd1bL,
+        0x4cb8f38ca985b2c1L } },
+    /* 15 << 140 */
+    { { 0x83ccbe808c3bff36L,0x005a0bd25263e575L,0x460d7dda259bdcd1L,
+        0x4a1c5642fa5cab6bL },
+      { 0x2b7bdbb99fe4fc88L,0x09418e28cc97bbb5L,0xd8274fb4a12321aeL,
+        0xb137007d5c87b64eL } },
+    /* 16 << 140 */
+    { { 0x80531fe1c63c4962L,0x50541e89981fdb25L,0xdc1291a1fd4c2b6bL,
+        0xc0693a17a6df4fcaL },
+      { 0xb2c4604e0117f203L,0x245f19630a99b8d0L,0xaedc20aac6212c44L,
+        0xb1ed4e56520f52a8L } },
+    /* 17 << 140 */
+    { { 0xfe48f575f8547be3L,0x0a7033cda9e45f98L,0x4b45d3a918c50100L,
+        0xb2a6cd6aa61d41daL },
+      { 0x60bbb4f557933c6bL,0xa7538ebd2b0d7ffcL,0x9ea3ab8d8cd626b6L,
+        0x8273a4843601625aL } },
+    /* 18 << 140 */
+    { { 0x888598450168e508L,0x8cbc9bb299a94abdL,0x713ac792fab0a671L,
+        0xa3995b196c9ebffcL },
+      { 0xe711668e1239e152L,0x56892558bbb8dff4L,0x8bfc7dabdbf17963L,
+        0x5b59fe5ab3de1253L } },
+    /* 19 << 140 */
+    { { 0x7e3320eb34a9f7aeL,0xe5e8cf72d751efe4L,0x7ea003bcd9be2f37L,
+        0xc0f551a0b6c08ef7L },
+      { 0x56606268038f6725L,0x1dd38e356d92d3b6L,0x07dfce7cc3cbd686L,
+        0x4e549e04651c5da8L } },
+    /* 20 << 140 */
+    { { 0x4058f93b08b19340L,0xc2fae6f4cac6d89dL,0x4bad8a8c8f159cc7L,
+        0x0ddba4b3cb0b601cL },
+      { 0xda4fc7b51dd95f8cL,0x1d163cd7cea5c255L,0x30707d06274a8c4cL,
+        0x79d9e0082802e9ceL } },
+    /* 21 << 140 */
+    { { 0x02a29ebfe6ddd505L,0x37064e74b50bed1aL,0x3f6bae65a7327d57L,
+        0x3846f5f1f83920bcL },
+      { 0x87c3749160df1b9bL,0x4cfb28952d1da29fL,0x10a478ca4ed1743cL,
+        0x390c60303edd47c6L } },
+    /* 22 << 140 */
+    { { 0x8f3e53128c0a78deL,0xccd02bda1e85df70L,0xd6c75c03a61b6582L,
+        0x0762921cfc0eebd1L },
+      { 0xd34d0823d85010c0L,0xd73aaacb0044cf1fL,0xfb4159bba3b5e78aL,
+        0x2287c7f7e5826f3fL } },
+    /* 23 << 140 */
+    { { 0x4aeaf742580b1a01L,0xf080415d60423b79L,0xe12622cda7dea144L,
+        0x49ea499659d62472L },
+      { 0xb42991ef571f3913L,0x0610f214f5b25a8aL,0x47adc58530b79e8fL,
+        0xf90e3df607a065a2L } },
+    /* 24 << 140 */
+    { { 0x5d0a5deb43e2e034L,0x53fb5a34444024aaL,0xa8628c686b0c9f7fL,
+        0x9c69c29cac563656L },
+      { 0x5a231febbace47b6L,0xbdce02899ea5a2ecL,0x05da1fac9463853eL,
+        0x96812c52509e78aaL } },
+    /* 25 << 140 */
+    { { 0xd3fb577157151692L,0xeb2721f8d98e1c44L,0xc050608732399be1L,
+        0xda5a5511d979d8b8L },
+      { 0x737ed55dc6f56780L,0xe20d30040dc7a7f4L,0x02ce7301f5941a03L,
+        0x91ef5215ed30f83aL } },
+    /* 26 << 140 */
+    { { 0x28727fc14092d85fL,0x72d223c65c49e41aL,0xa7cf30a2ba6a4d81L,
+        0x7c086209b030d87dL },
+      { 0x04844c7dfc588b09L,0x728cd4995874bbb0L,0xcc1281eee84c0495L,
+        0x0769b5baec31958fL } },
+    /* 27 << 140 */
+    { { 0x665c228bf99c2471L,0xf2d8a11b191eb110L,0x4594f494d36d7024L,
+        0x482ded8bcdcb25a1L },
+      { 0xc958a9d8dadd4885L,0x7004477ef1d2b547L,0x0a45f6ef2a0af550L,
+        0x4fc739d62f8d6351L } },
+    /* 28 << 140 */
+    { { 0x75cdaf27786f08a9L,0x8700bb2642c2737fL,0x855a71411c4e2670L,
+        0x810188c115076fefL },
+      { 0xc251d0c9abcd3297L,0xae4c8967f48108ebL,0xbd146de718ceed30L,
+        0xf9d4f07ac986bcedL } },
+    /* 29 << 140 */
+    { { 0x5ad98ed583fa1e08L,0x7780d33ebeabd1fbL,0xe330513c903b1196L,
+        0xba11de9ea47bc8c4L },
+      { 0x684334da02c2d064L,0x7ecf360da48de23bL,0x57a1b4740a9089d8L,
+        0xf28fa439ff36734cL } },
+    /* 30 << 140 */
+    { { 0xf2a482cbea4570b3L,0xee65d68ba5ebcee9L,0x988d0036b9694cd5L,
+        0x53edd0e937885d32L },
+      { 0xe37e3307beb9bc6dL,0xe9abb9079f5c6768L,0x4396ccd551f2160fL,
+        0x2500888c47336da6L } },
+    /* 31 << 140 */
+    { { 0x383f9ed9926fce43L,0x809dd1c704da2930L,0x30f6f5968a4cb227L,
+        0x0d700c7f73a56b38L },
+      { 0x1825ea33ab64a065L,0xaab9b7351338df80L,0x1516100d9b63f57fL,
+        0x2574395a27a6a634L } },
+    /* 32 << 140 */
+    { { 0xb5560fb6700a1acdL,0xe823fd73fd999681L,0xda915d1f6cb4e1baL,
+        0x0d0301186ebe00a3L },
+      { 0x744fb0c989fca8cdL,0x970d01dbf9da0e0bL,0x0ad8c5647931d76fL,
+        0xb15737bff659b96aL } },
+    /* 33 << 140 */
+    { { 0xdc9933e8a8b484e7L,0xb2fdbdf97a26dec7L,0x2349e9a49f1f0136L,
+        0x7860368e70fddddbL },
+      { 0xd93d2c1cf9ad3e18L,0x6d6c5f17689f4e79L,0x7a544d91b24ff1b6L,
+        0x3e12a5ebfe16cd8cL } },
+    /* 34 << 140 */
+    { { 0x543574e9a56b872fL,0xa1ad550cfcf68ea2L,0x689e37d23f560ef7L,
+        0x8c54b9cac9d47a8bL },
+      { 0x46d40a4a088ac342L,0xec450c7c1576c6d0L,0xb589e31c1f9689e9L,
+        0xdacf2602b8781718L } },
+    /* 35 << 140 */
+    { { 0xa89237c6c8cb6b42L,0x1326fc93b96ef381L,0x55d56c6db5f07825L,
+        0xacba2eea7449e22dL },
+      { 0x74e0887a633c3000L,0xcb6cd172d7cbcf71L,0x309e81dec36cf1beL,
+        0x07a18a6d60ae399bL } },
+    /* 36 << 140 */
+    { { 0xb36c26799edce57eL,0x52b892f4df001d41L,0xd884ae5d16a1f2c6L,
+        0x9b329424efcc370aL },
+      { 0x3120daf2bd2e21dfL,0x55298d2d02470a99L,0x0b78af6ca05db32eL,
+        0x5c76a331601f5636L } },
+    /* 37 << 140 */
+    { { 0xaae861fff8a4f29cL,0x70dc9240d68f8d49L,0x960e649f81b1321cL,
+        0x3d2c801b8792e4ceL },
+      { 0xf479f77242521876L,0x0bed93bc416c79b1L,0xa67fbc05263e5bc9L,
+        0x01e8e630521db049L } },
+    /* 38 << 140 */
+    { { 0x76f26738c6f3431eL,0xe609cb02e3267541L,0xb10cff2d818c877cL,
+        0x1f0e75ce786a13cbL },
+      { 0xf4fdca641158544dL,0x5d777e896cb71ed0L,0x3c233737a9aa4755L,
+        0x7b453192e527ab40L } },
+    /* 39 << 140 */
+    { { 0xdb59f68839f05ffeL,0x8f4f4be06d82574eL,0xcce3450cee292d1bL,
+        0xaa448a1261ccd086L },
+      { 0xabce91b3f7914967L,0x4537f09b1908a5edL,0xa812421ef51042e7L,
+        0xfaf5cebcec0b3a34L } },
+    /* 40 << 140 */
+    { { 0x730ffd874ca6b39aL,0x70fb72ed02efd342L,0xeb4735f9d75c8edbL,
+        0xc11f2157c278aa51L },
+      { 0xc459f635bf3bfebfL,0x3a1ff0b46bd9601fL,0xc9d12823c420cb73L,
+        0x3e9af3e23c2915a3L } },
+    /* 41 << 140 */
+    { { 0xe0c82c72b41c3440L,0x175239e5e3039a5fL,0xe1084b8a558795a3L,
+        0x328d0a1dd01e5c60L },
+      { 0x0a495f2ed3788a04L,0x25d8ff1666c11a9fL,0xf5155f059ed692d6L,
+        0x954fa1074f425fe4L } },
+    /* 42 << 140 */
+    { { 0xd16aabf2e98aaa99L,0x90cd8ba096b0f88aL,0x957f4782c154026aL,
+        0x54ee073452af56d2L },
+      { 0xbcf89e5445b4147aL,0x3d102f219a52816cL,0x6808517e39b62e77L,
+        0x92e2542169169ad8L } },
+    /* 43 << 140 */
+    { { 0xd721d871bb608558L,0x60e4ebaef6d4ff9bL,0x0ba1081941f2763eL,
+        0xca2e45be51ee3247L },
+      { 0x66d172ec2bfd7a5fL,0x528a8f2f74d0b12dL,0xe17f1e38dabe70dcL,
+        0x1d5d73169f93983cL } },
+    /* 44 << 140 */
+    { { 0x51b2184adf423e31L,0xcb417291aedb1a10L,0x2054ca93625bcab9L,
+        0x54396860a98998f0L },
+      { 0x4e53f6c4a54ae57eL,0x0ffeb590ee648e9dL,0xfbbdaadc6afaf6bcL,
+        0xf88ae796aa3bfb8aL } },
+    /* 45 << 140 */
+    { { 0x209f1d44d2359ed9L,0xac68dd03f3544ce2L,0xf378da47fd51e569L,
+        0xe1abd8602cc80097L },
+      { 0x23ca18d9343b6e3aL,0x480797e8b40a1baeL,0xd1f0c717533f3e67L,
+        0x4489697006e6cdfcL } },
+    /* 46 << 140 */
+    { { 0x8ca2105552a82e8dL,0xb2caf78578460cdcL,0x4c1b7b62e9037178L,
+        0xefc09d2cdb514b58L },
+      { 0x5f2df9ee9113be5cL,0x2fbda78fb3f9271cL,0xe09a81af8f83fc54L,
+        0x06b138668afb5141L } },
+    /* 47 << 140 */
+    { { 0x38f6480f43e3865dL,0x72dd77a81ddf47d9L,0xf2a8e9714c205ff7L,
+        0x46d449d89d088ad8L },
+      { 0x926619ea185d706fL,0xe47e02ebc7dd7f62L,0xe7f120a78cbc2031L,
+        0xc18bef00998d4ac9L } },
+    /* 48 << 140 */
+    { { 0x18f37a9c6bdf22daL,0xefbc432f90dc82dfL,0xc52cef8e5d703651L,
+        0x82887ba0d99881a5L },
+      { 0x7cec9ddab920ec1dL,0xd0d7e8c3ec3e8d3bL,0x445bc3954ca88747L,
+        0xedeaa2e09fd53535L } },
+    /* 49 << 140 */
+    { { 0x461b1d936cc87475L,0xd92a52e26d2383bdL,0xfabccb59d7903546L,
+        0x6111a7613d14b112L },
+      { 0x0ae584feb3d5f612L,0x5ea69b8d60e828ecL,0x6c07898554087030L,
+        0x649cab04ac4821feL } },
+    /* 50 << 140 */
+    { { 0x25ecedcf8bdce214L,0xb5622f7286af7361L,0x0e1227aa7038b9e2L,
+        0xd0efb273ac20fa77L },
+      { 0x817ff88b79df975bL,0x856bf2861999503eL,0xb4d5351f5038ec46L,
+        0x740a52c5fc42af6eL } },
+    /* 51 << 140 */
+    { { 0x2e38bb152cbb1a3fL,0xc3eb99fe17a83429L,0xca4fcbf1dd66bb74L,
+        0x880784d6cde5e8fcL },
+      { 0xddc84c1cb4e7a0beL,0x8780510dbd15a72fL,0x44bcf1af81ec30e1L,
+        0x141e50a80a61073eL } },
+    /* 52 << 140 */
+    { { 0x0d95571847be87aeL,0x68a61417f76a4372L,0xf57e7e87c607c3d3L,
+        0x043afaf85252f332L },
+      { 0xcc14e1211552a4d2L,0xb6dee692bb4d4ab4L,0xb6ab74c8a03816a4L,
+        0x84001ae46f394a29L } },
+    /* 53 << 140 */
+    { { 0x5bed8344d795fb45L,0x57326e7db79f55a5L,0xc9533ce04accdffcL,
+        0x53473caf3993fa04L },
+      { 0x7906eb93a13df4c8L,0xa73e51f697cbe46fL,0xd1ab3ae10ae4ccf8L,
+        0x256145088a5b3dbcL } },
+    /* 54 << 140 */
+    { { 0x61eff96211a71b27L,0xdf71412b6bb7fa39L,0xb31ba6b82bd7f3efL,
+        0xb0b9c41569180d29L },
+      { 0xeec14552014cdde5L,0x702c624b227b4bbbL,0x2b15e8c2d3e988f3L,
+        0xee3bcc6da4f7fd04L } },
+    /* 55 << 140 */
+    { { 0x9d00822a42ac6c85L,0x2db0cea61df9f2b7L,0xd7cad2ab42de1e58L,
+        0x346ed5262d6fbb61L },
+      { 0xb39629951a2faf09L,0x2fa8a5807c25612eL,0x30ae04da7cf56490L,
+        0x756629080eea3961L } },
+    /* 56 << 140 */
+    { { 0x3609f5c53d080847L,0xcb081d395241d4f6L,0xb4fb381077961a63L,
+        0xc20c59842abb66fcL },
+      { 0x3d40aa7cf902f245L,0x9cb127364e536b1eL,0x5eda24da99b3134fL,
+        0xafbd9c695cd011afL } },
+    /* 57 << 140 */
+    { { 0x9a16e30ac7088c7dL,0x5ab657103207389fL,0x1b09547fe7407a53L,
+        0x2322f9d74fdc6eabL },
+      { 0xc0f2f22d7430de4dL,0x19382696e68ca9a9L,0x17f1eff1918e5868L,
+        0xe3b5b635586f4204L } },
+    /* 58 << 140 */
+    { { 0x146ef9803fbc4341L,0x359f2c805b5eed4eL,0x9f35744e7482e41dL,
+        0x9a9ac3ecf3b224c2L },
+      { 0x9161a6fe91fc50aeL,0x89ccc66bc613fa7cL,0x89268b14c732f15aL,
+        0x7cd6f4e2b467ed03L } },
+    /* 59 << 140 */
+    { { 0xfbf79869ce56b40eL,0xf93e094cc02dde98L,0xefe0c3a8edee2cd7L,
+        0x90f3ffc0b268fd42L },
+      { 0x81a7fd5608241aedL,0x95ab7ad800b1afe8L,0x401270563e310d52L,
+        0xd3ffdeb109d9fc43L } },
+    /* 60 << 140 */
+    { { 0xc8f85c91d11a8594L,0x2e74d25831cf6db8L,0x829c7ca302b5dfd0L,
+        0xe389cfbe69143c86L },
+      { 0xd01b6405941768d8L,0x4510399503bf825dL,0xcc4ee16656cd17e2L,
+        0xbea3c283ba037e79L } },
+    /* 61 << 140 */
+    { { 0x4e1ac06ed9a47520L,0xfbfe18aaaf852404L,0x5615f8e28087648aL,
+        0x7301e47eb9d150d9L },
+      { 0x79f9f9ddb299b977L,0x76697a7ba5b78314L,0x10d674687d7c90e7L,
+        0x7afffe03937210b5L } },
+    /* 62 << 140 */
+    { { 0x5aef3e4b28c22ceeL,0xefb0ecd809fd55aeL,0x4cea71320d2a5d6aL,
+        0x9cfb5fa101db6357L },
+      { 0x395e0b57f36e1ac5L,0x008fa9ad36cafb7dL,0x8f6cdf705308c4dbL,
+        0x51527a3795ed2477L } },
+    /* 63 << 140 */
+    { { 0xba0dee305bd21311L,0x6ed41b22909c90d7L,0xc5f6b7587c8696d3L,
+        0x0db8eaa83ce83a80L },
+      { 0xd297fe37b24b4b6fL,0xfe58afe8522d1f0dL,0x973587368c98dbd9L,
+        0x6bc226ca9454a527L } },
+    /* 64 << 140 */
+    { { 0xa12b384ece53c2d0L,0x779d897d5e4606daL,0xa53e47b073ec12b0L,
+        0x462dbbba5756f1adL },
+      { 0x69fe09f2cafe37b6L,0x273d1ebfecce2e17L,0x8ac1d5383cf607fdL,
+        0x8035f7ff12e10c25L } },
+    /* 0 << 147 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 147 */
+    { { 0x854d34c77e6c5520L,0xc27df9efdcb9ea58L,0x405f2369d686666dL,
+        0x29d1febf0417aa85L },
+      { 0x9846819e93470afeL,0x3e6a9669e2a27f9eL,0x24d008a2e31e6504L,
+        0xdba7cecf9cb7680aL } },
+    /* 2 << 147 */
+    { { 0xecaff541338d6e43L,0x56f7dd734541d5ccL,0xb5d426de96bc88caL,
+        0x48d94f6b9ed3a2c3L },
+      { 0x6354a3bb2ef8279cL,0xd575465b0b1867f2L,0xef99b0ff95225151L,
+        0xf3e19d88f94500d8L } },
+    /* 3 << 147 */
+    { { 0x92a83268e32dd620L,0x913ec99f627849a2L,0xedd8fdfa2c378882L,
+        0xaf96f33eee6f8cfeL },
+      { 0xc06737e5dc3fa8a5L,0x236bb531b0b03a1dL,0x33e59f2989f037b0L,
+        0x13f9b5a7d9a12a53L } },
+    /* 4 << 147 */
+    { { 0x0d0df6ce51efb310L,0xcb5b2eb4958df5beL,0xd6459e2936158e59L,
+        0x82aae2b91466e336L },
+      { 0xfb658a39411aa636L,0x7152ecc5d4c0a933L,0xf10c758a49f026b7L,
+        0xf4837f97cb09311fL } },
+    /* 5 << 147 */
+    { { 0xddfb02c4c753c45fL,0x18ca81b6f9c840feL,0x846fd09ab0f8a3e6L,
+        0xb1162adde7733dbcL },
+      { 0x7070ad20236e3ab6L,0xf88cdaf5b2a56326L,0x05fc8719997cbc7aL,
+        0x442cd4524b665272L } },
+    /* 6 << 147 */
+    { { 0x7807f364b71698f5L,0x6ba418d29f7b605eL,0xfd20b00fa03b2cbbL,
+        0x883eca37da54386fL },
+      { 0xff0be43ff3437f24L,0xe910b432a48bb33cL,0x4963a128329df765L,
+        0xac1dd556be2fe6f7L } },
+    /* 7 << 147 */
+    { { 0x557610f924a0a3fcL,0x38e17bf4e881c3f9L,0x6ba84fafed0dac99L,
+        0xd4a222c359eeb918L },
+      { 0xc79c1dbe13f542b6L,0x1fc65e0de425d457L,0xeffb754f1debb779L,
+        0x638d8fd09e08af60L } },
+    /* 8 << 147 */
+    { { 0x994f523a626332d5L,0x7bc388335561bb44L,0x005ed4b03d845ea2L,
+        0xd39d3ee1c2a1f08aL },
+      { 0x6561fdd3e7676b0dL,0x620e35fffb706017L,0x36ce424ff264f9a8L,
+        0xc4c3419fda2681f7L } },
+    /* 9 << 147 */
+    { { 0xfb6afd2f69beb6e8L,0x3a50b9936d700d03L,0xc840b2ad0c83a14fL,
+        0x573207be54085befL },
+      { 0x5af882e309fe7e5bL,0x957678a43b40a7e1L,0x172d4bdd543056e2L,
+        0x9c1b26b40df13c0aL } },
+    /* 10 << 147 */
+    { { 0x1c30861cf405ff06L,0xebac86bd486e828bL,0xe791a971636933fcL,
+        0x50e7c2be7aeee947L },
+      { 0xc3d4a095fa90d767L,0xae60eb7be670ab7bL,0x17633a64397b056dL,
+        0x93a21f33105012aaL } },
+    /* 11 << 147 */
+    { { 0x663c370babb88643L,0x91df36d722e21599L,0x183ba8358b761671L,
+        0x381eea1d728f3bf1L },
+      { 0xb9b2f1ba39966e6cL,0x7c464a28e7295492L,0x0fd5f70a09b26b7fL,
+        0xa9aba1f9fbe009dfL } },
+    /* 12 << 147 */
+    { { 0x857c1f22369b87adL,0x3c00e5d932fca556L,0x1ad74cab90b06466L,
+        0xa7112386550faaf2L },
+      { 0x7435e1986d9bd5f5L,0x2dcc7e3859c3463fL,0xdc7df748ca7bd4b2L,
+        0x13cd4c089dec2f31L } },
+    /* 13 << 147 */
+    { { 0x0d3b5df8e3237710L,0x0dadb26ecbd2f7b0L,0x9f5966abe4aa082bL,
+        0x666ec8de350e966eL },
+      { 0x1bfd1ed5ee524216L,0xcd93c59b41dab0b6L,0x658a8435d186d6baL,
+        0x1b7d34d2159d1195L } },
+    /* 14 << 147 */
+    { { 0x5936e46022caf46bL,0x6a45dd8f9a96fe4fL,0xf7925434b98f474eL,
+        0x414104120053ef15L },
+      { 0x71cf8d1241de97bfL,0xb8547b61bd80bef4L,0xb47d3970c4db0037L,
+        0xf1bcd328fef20dffL } },
+    /* 15 << 147 */
+    { { 0x31a92e0910caad67L,0x1f5919605531a1e1L,0x3bb852e05f4fc840L,
+        0x63e297ca93a72c6cL },
+      { 0x3c2b0b2e49abad67L,0x6ec405fced3db0d9L,0xdc14a5307fef1d40L,
+        0xccd19846280896fcL } },
+    /* 16 << 147 */
+    { { 0x00f831769bb81648L,0xd69eb485653120d0L,0xd17d75f44ccabc62L,
+        0x34a07f82b749fcb1L },
+      { 0x2c3af787bbfb5554L,0xb06ed4d062e283f8L,0x5722889fa19213a0L,
+        0x162b085edcf3c7b4L } },
+    /* 17 << 147 */
+    { { 0xbcaecb31e0dd3ecaL,0xc6237fbce52f13a5L,0xcc2b6b0327bac297L,
+        0x2ae1cac5b917f54aL },
+      { 0x474807d47845ae4fL,0xfec7dd92ce5972e0L,0xc3bd25411d7915bbL,
+        0x66f85dc4d94907caL } },
+    /* 18 << 147 */
+    { { 0xd981b888bdbcf0caL,0xd75f5da6df279e9fL,0x128bbf247054e934L,
+        0x3c6ff6e581db134bL },
+      { 0x795b7cf4047d26e4L,0xf370f7b85049ec37L,0xc6712d4dced945afL,
+        0xdf30b5ec095642bcL } },
+    /* 19 << 147 */
+    { { 0x9b034c624896246eL,0x5652c016ee90bbd1L,0xeb38636f87fedb73L,
+        0x5e32f8470135a613L },
+      { 0x0703b312cf933c83L,0xd05bb76e1a7f47e6L,0x825e4f0c949c2415L,
+        0x569e56227250d6f8L } },
+    /* 20 << 147 */
+    { { 0xbbe9eb3a6568013eL,0x8dbd203f22f243fcL,0x9dbd7694b342734aL,
+        0x8f6d12f846afa984L },
+      { 0xb98610a2c9eade29L,0xbab4f32347dd0f18L,0x5779737b671c0d46L,
+        0x10b6a7c6d3e0a42aL } },
+    /* 21 << 147 */
+    { { 0xfb19ddf33035b41cL,0xd336343f99c45895L,0x61fe493854c857e5L,
+        0xc4d506beae4e57d5L },
+      { 0x3cd8c8cbbbc33f75L,0x7281f08a9262c77dL,0x083f4ea6f11a2823L,
+        0x8895041e9fba2e33L } },
+    /* 22 << 147 */
+    { { 0xfcdfea499c438edfL,0x7678dcc391edba44L,0xf07b3b87e2ba50f0L,
+        0xc13888ef43948c1bL },
+      { 0xc2135ad41140af42L,0x8e5104f3926ed1a7L,0xf24430cb88f6695fL,
+        0x0ce0637b6d73c120L } },
+    /* 23 << 147 */
+    { { 0xb2db01e6fe631e8fL,0x1c5563d7d7bdd24bL,0x8daea3ba369ad44fL,
+        0x000c81b68187a9f9L },
+      { 0x5f48a951aae1fd9aL,0xe35626c78d5aed8aL,0x209527630498c622L,
+        0x76d17634773aa504L } },
+    /* 24 << 147 */
+    { { 0x36d90ddaeb300f7aL,0x9dcf7dfcedb5e801L,0x645cb26874d5244cL,
+        0xa127ee79348e3aa2L },
+      { 0x488acc53575f1dbbL,0x95037e8580e6161eL,0x57e59283292650d0L,
+        0xabe67d9914938216L } },
+    /* 25 << 147 */
+    { { 0x3c7f944b3f8e1065L,0xed908cb6330e8924L,0x08ee8fd56f530136L,
+        0x2227b7d5d7ffc169L },
+      { 0x4f55c893b5cd6dd5L,0x82225e11a62796e8L,0x5c6cead1cb18e12cL,
+        0x4381ae0c84f5a51aL } },
+    /* 26 << 147 */
+    { { 0x345913d37fafa4c8L,0x3d9180820491aac0L,0x9347871f3e69264cL,
+        0xbea9dd3cb4f4f0cdL },
+      { 0xbda5d0673eadd3e7L,0x0033c1b80573bcd8L,0x255893795da2486cL,
+        0xcb89ee5b86abbee7L } },
+    /* 27 << 147 */
+    { { 0x8fe0a8f322532e5dL,0xb6410ff0727dfc4cL,0x619b9d58226726dbL,
+        0x5ec256697a2b2dc7L },
+      { 0xaf4d2e064c3beb01L,0x852123d07acea556L,0x0e9470faf783487aL,
+        0x75a7ea045664b3ebL } },
+    /* 28 << 147 */
+    { { 0x4ad78f356798e4baL,0x9214e6e5c7d0e091L,0xc420b488b1290403L,
+        0x64049e0afc295749L },
+      { 0x03ef5af13ae9841fL,0xdbe4ca19b0b662a6L,0x46845c5ffa453458L,
+        0xf8dabf1910b66722L } },
+    /* 29 << 147 */
+    { { 0xb650f0aacce2793bL,0x71db851ec5ec47c1L,0x3eb78f3e3b234fa9L,
+        0xb0c60f35fc0106ceL },
+      { 0x05427121774eadbdL,0x25367fafce323863L,0x7541b5c9cd086976L,
+        0x4ff069e2dc507ad1L } },
+    /* 30 << 147 */
+    { { 0x741452568776e667L,0x6e76142cb23c6bb5L,0xdbf307121b3a8a87L,
+        0x60e7363e98450836L },
+      { 0x5741450eb7366d80L,0xe4ee14ca4837dbdfL,0xa765eb9b69d4316fL,
+        0x04548dca8ef43825L } },
+    /* 31 << 147 */
+    { { 0x9c9f4e4c5ae888ebL,0x733abb5156e9ac99L,0xdaad3c20ba6ac029L,
+        0x9b8dd3d32ba3e38eL },
+      { 0xa9bb4c920bc5d11aL,0xf20127a79c5f88a3L,0x4f52b06e161d3cb8L,
+        0x26c1ff096afaf0a6L } },
+    /* 32 << 147 */
+    { { 0x32670d2f7189e71fL,0xc64387485ecf91e7L,0x15758e57db757a21L,
+        0x427d09f8290a9ce5L },
+      { 0x846a308f38384a7aL,0xaac3acb4b0732b99L,0x9e94100917845819L,
+        0x95cba111a7ce5e03L } },
+    /* 33 << 147 */
+    { { 0x6f3d4f7fb00009c4L,0xb8396c278ff28b5fL,0xb1a9ae431c97975dL,
+        0x9d7ba8afe5d9fed5L },
+      { 0x338cf09f34f485b6L,0xbc0ddacc64122516L,0xa450da1205d471feL,
+        0x4c3a6250628dd8c9L } },
+    /* 34 << 147 */
+    { { 0x69c7d103d1295837L,0xa2893e503807eb2fL,0xd6e1e1debdb41491L,
+        0xc630745b5e138235L },
+      { 0xc892109e48661ae1L,0x8d17e7ebea2b2674L,0x00ec0f87c328d6b5L,
+        0x6d858645f079ff9eL } },
+    /* 35 << 147 */
+    { { 0x6cdf243e19115eadL,0x1ce1393e4bac4fcfL,0x2c960ed09c29f25bL,
+        0x59be4d8e9d388a05L },
+      { 0x0d46e06cd0def72bL,0xb923db5de0342748L,0xf7d3aacd936d4a3dL,
+        0x558519cc0b0b099eL } },
+    /* 36 << 147 */
+    { { 0x3ea8ebf8827097efL,0x259353dbd054f55dL,0x84c89abc6d2ed089L,
+        0x5c548b698e096a7cL },
+      { 0xd587f616994b995dL,0x4d1531f6a5845601L,0x792ab31e451fd9f0L,
+        0xc8b57bb265adf6caL } },
+    /* 37 << 147 */
+    { { 0x68440fcb1cd5ad73L,0xb9c860e66144da4fL,0x2ab286aa8462beb8L,
+        0xcc6b8fffef46797fL },
+      { 0xac820da420c8a471L,0x69ae05a177ff7fafL,0xb9163f39bfb5da77L,
+        0xbd03e5902c73ab7aL } },
+    /* 38 << 147 */
+    { { 0x7e862b5eb2940d9eL,0x3c663d864b9af564L,0xd8309031bde3033dL,
+        0x298231b2d42c5bc6L },
+      { 0x42090d2c552ad093L,0xa4799d1cff854695L,0x0a88b5d6d31f0d00L,
+        0xf8b40825a2f26b46L } },
+    /* 39 << 147 */
+    { { 0xec29b1edf1bd7218L,0xd491c53b4b24c86eL,0xd2fe588f3395ea65L,
+        0x6f3764f74456ef15L },
+      { 0xdb43116dcdc34800L,0xcdbcd456c1e33955L,0xefdb554074ab286bL,
+        0x948c7a51d18c5d7cL } },
+    /* 40 << 147 */
+    { { 0xeb81aa377378058eL,0x41c746a104411154L,0xa10c73bcfb828ac7L,
+        0x6439be919d972b29L },
+      { 0x4bf3b4b043a2fbadL,0x39e6dadf82b5e840L,0x4f7164086397bd4cL,
+        0x0f7de5687f1eeccbL } },
+    /* 41 << 147 */
+    { { 0x5865c5a1d2ffbfc1L,0xf74211fa4ccb6451L,0x66368a88c0b32558L,
+        0x5b539dc29ad7812eL },
+      { 0x579483d02f3af6f6L,0x5213207899934eceL,0x50b9650fdcc9e983L,
+        0xca989ec9aee42b8aL } },
+    /* 42 << 147 */
+    { { 0x6a44c829d6f62f99L,0x8f06a3094c2a7c0cL,0x4ea2b3a098a0cb0aL,
+        0x5c547b70beee8364L },
+      { 0x461d40e1682afe11L,0x9e0fc77a7b41c0a8L,0x79e4aefde20d5d36L,
+        0x2916e52032dd9f63L } },
+    /* 43 << 147 */
+    { { 0xf59e52e83f883fafL,0x396f96392b868d35L,0xc902a9df4ca19881L,
+        0x0fc96822db2401a6L },
+      { 0x4123758766f1c68dL,0x10fc6de3fb476c0dL,0xf8b6b579841f5d90L,
+        0x2ba8446cfa24f44aL } },
+    /* 44 << 147 */
+    { { 0xa237b920ef4a9975L,0x60bb60042330435fL,0xd6f4ab5acfb7e7b5L,
+        0xb2ac509783435391L },
+      { 0xf036ee2fb0d1ea67L,0xae779a6a74c56230L,0x59bff8c8ab838ae6L,
+        0xcd83ca999b38e6f0L } },
+    /* 45 << 147 */
+    { { 0xbb27bef5e33deed3L,0xe6356f6f001892a8L,0xbf3be6cc7adfbd3eL,
+        0xaecbc81c33d1ac9dL },
+      { 0xe4feb909e6e861dcL,0x90a247a453f5f801L,0x01c50acb27346e57L,
+        0xce29242e461acc1bL } },
+    /* 46 << 147 */
+    { { 0x04dd214a2f998a91L,0x271ee9b1d4baf27bL,0x7e3027d1e8c26722L,
+        0x21d1645c1820dce5L },
+      { 0x086f242c7501779cL,0xf0061407fa0e8009L,0xf23ce47760187129L,
+        0x05bbdedb0fde9bd0L } },
+    /* 47 << 147 */
+    { { 0x682f483225d98473L,0xf207fe855c658427L,0xb6fdd7ba4166ffa1L,
+        0x0c3140569eed799dL },
+      { 0x0db8048f4107e28fL,0x74ed387141216840L,0x74489f8f56a3c06eL,
+        0x1e1c005b12777134L } },
+    /* 48 << 147 */
+    { { 0xdb332a73f37ec3c3L,0xc65259bddd59eba0L,0x2291709cdb4d3257L,
+        0x9a793b25bd389390L },
+      { 0xf39fe34be43756f0L,0x2f76bdce9afb56c9L,0x9f37867a61208b27L,
+        0xea1d4307089972c3L } },
+    /* 49 << 147 */
+    { { 0x8c5953308bdf623aL,0x5f5accda8441fb7dL,0xfafa941832ddfd95L,
+        0x6ad40c5a0fde9be7L },
+      { 0x43faba89aeca8709L,0xc64a7cf12c248a9dL,0x1662025272637a76L,
+        0xaee1c79122b8d1bbL } },
+    /* 50 << 147 */
+    { { 0xf0f798fd21a843b2L,0x56e4ed4d8d005cb1L,0x355f77801f0d8abeL,
+        0x197b04cf34522326L },
+      { 0x41f9b31ffd42c13fL,0x5ef7feb2b40f933dL,0x27326f425d60bad4L,
+        0x027ecdb28c92cf89L } },
+    /* 51 << 147 */
+    { { 0x04aae4d14e3352feL,0x08414d2f73591b90L,0x5ed6124eb7da7d60L,
+        0xb985b9314d13d4ecL },
+      { 0xa592d3ab96bf36f9L,0x012dbed5bbdf51dfL,0xa57963c0df6c177dL,
+        0x010ec86987ca29cfL } },
+    /* 52 << 147 */
+    { { 0xba1700f6bf926dffL,0x7c9fdbd1f4bf6bc2L,0xdc18dc8f64da11f5L,
+        0xa6074b7ad938ae75L },
+      { 0x14270066e84f44a4L,0x99998d38d27b954eL,0xc1be8ab2b4f38e9aL,
+        0x8bb55bbf15c01016L } },
+    /* 53 << 147 */
+    { { 0xf73472b40ea2ab30L,0xd365a340f73d68ddL,0xc01a716819c2e1ebL,
+        0x32f49e3734061719L },
+      { 0xb73c57f101d8b4d6L,0x03c8423c26b47700L,0x321d0bc8a4d8826aL,
+        0x6004213c4bc0e638L } },
+    /* 54 << 147 */
+    { { 0xf78c64a1c1c06681L,0x16e0a16fef018e50L,0x31cbdf91db42b2b3L,
+        0xf8f4ffcee0d36f58L },
+      { 0xcdcc71cd4cc5e3e0L,0xd55c7cfaa129e3e0L,0xccdb6ba00fb2cbf1L,
+        0x6aba0005c4bce3cbL } },
+    /* 55 << 147 */
+    { { 0x501cdb30d232cfc4L,0x9ddcf12ed58a3cefL,0x02d2cf9c87e09149L,
+        0xdc5d7ec72c976257L },
+      { 0x6447986e0b50d7ddL,0x88fdbaf7807f112aL,0x58c9822ab00ae9f6L,
+        0x6abfb9506d3d27e0L } },
+    /* 56 << 147 */
+    { { 0xd0a744878a429f4fL,0x0649712bdb516609L,0xb826ba57e769b5dfL,
+        0x82335df21fc7aaf2L },
+      { 0x2389f0675c93d995L,0x59ac367a68677be6L,0xa77985ff21d9951bL,
+        0x038956fb85011cceL } },
+    /* 57 << 147 */
+    { { 0x608e48cbbb734e37L,0xc08c0bf22be5b26fL,0x17bbdd3bf9b1a0d9L,
+        0xeac7d89810483319L },
+      { 0xc95c4bafbc1a6deaL,0xfdd0e2bf172aafdbL,0x40373cbc8235c41aL,
+        0x14303f21fb6f41d5L } },
+    /* 58 << 147 */
+    { { 0xba0636210408f237L,0xcad3b09aecd2d1edL,0x4667855a52abb6a2L,
+        0xba9157dcaa8b417bL },
+      { 0xfe7f35074f013efbL,0x1b112c4baa38c4a2L,0xa1406a609ba64345L,
+        0xe53cba336993c80bL } },
+    /* 59 << 147 */
+    { { 0x45466063ded40d23L,0x3d5f1f4d54908e25L,0x9ebefe62403c3c31L,
+        0x274ea0b50672a624L },
+      { 0xff818d99451d1b71L,0x80e826438f79cf79L,0xa165df1373ce37f5L,
+        0xa744ef4ffe3a21fdL } },
+    /* 60 << 147 */
+    { { 0x73f1e7f5cf551396L,0xc616898e868c676bL,0x671c28c78c442c36L,
+        0xcfe5e5585e0a317dL },
+      { 0x1242d8187051f476L,0x56fad2a614f03442L,0x262068bc0a44d0f6L,
+        0xdfa2cd6ece6edf4eL } },
+    /* 61 << 147 */
+    { { 0x0f43813ad15d1517L,0x61214cb2377d44f5L,0xd399aa29c639b35fL,
+        0x42136d7154c51c19L },
+      { 0x9774711b08417221L,0x0a5546b352545a57L,0x80624c411150582dL,
+        0x9ec5c418fbc555bcL } },
+    /* 62 << 147 */
+    { { 0x2c87dcad771849f1L,0xb0c932c501d7bf6fL,0x6aa5cd3e89116eb2L,
+        0xd378c25a51ca7bd3L },
+      { 0xc612a0da9e6e3e31L,0x0417a54db68ad5d0L,0x00451e4a22c6edb8L,
+        0x9fbfe019b42827ceL } },
+    /* 63 << 147 */
+    { { 0x2fa92505ba9384a2L,0x21b8596e64ad69c1L,0x8f4fcc49983b35a6L,
+        0xde09376072754672L },
+      { 0x2f14ccc8f7bffe6dL,0x27566bff5d94263dL,0xb5b4e9c62df3ec30L,
+        0x94f1d7d53e6ea6baL } },
+    /* 64 << 147 */
+    { { 0x97b7851aaaca5e9bL,0x518aa52156713b97L,0x3357e8c7150a61f6L,
+        0x7842e7e2ec2c2b69L },
+      { 0x8dffaf656868a548L,0xd963bd82e068fc81L,0x64da5c8b65917733L,
+        0x927090ff7b247328L } },
+    /* 0 << 154 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 154 */
+    { { 0x214bc9a7d298c241L,0xe3b697ba56807cfdL,0xef1c78024564eadbL,
+        0xdde8cdcfb48149c5L },
+      { 0x946bf0a75a4d2604L,0x27154d7f6c1538afL,0x95cc9230de5b1fccL,
+        0xd88519e966864f82L } },
+    /* 2 << 154 */
+    { { 0xb828dd1a7cb1282cL,0xa08d7626be46973aL,0x6baf8d40e708d6b2L,
+        0x72571fa14daeb3f3L },
+      { 0x85b1732ff22dfd98L,0x87ab01a70087108dL,0xaaaafea85988207aL,
+        0xccc832f869f00755L } },
+    /* 3 << 154 */
+    { { 0x964d950e36ff3bf0L,0x8ad20f6ff0b34638L,0x4d9177b3b5d7585fL,
+        0xcf839760ef3f019fL },
+      { 0x582fc5b38288c545L,0x2f8e4e9b13116bd1L,0xf91e1b2f332120efL,
+        0xcf5687242a17dd23L } },
+    /* 4 << 154 */
+    { { 0x488f1185ca8d9d1aL,0xadf2c77dd987ded2L,0x5f3039f060c46124L,
+        0xe5d70b7571e095f4L },
+      { 0x82d586506260e70fL,0x39d75ea7f750d105L,0x8cf3d0b175bac364L,
+        0xf3a7564d21d01329L } },
+    /* 5 << 154 */
+    { { 0x182f04cd2f52d2a7L,0x4fde149ae2df565aL,0xb80c5eeca79fb2f7L,
+        0xab491d7b22ddc897L },
+      { 0x99d76c18c6312c7fL,0xca0d5f3d6aa41a57L,0x71207325d15363a0L,
+        0xe82aa265beb252c2L } },
+    /* 6 << 154 */
+    { { 0x94ab4700ec3128c2L,0x6c76d8628e383f49L,0xdc36b150c03024ebL,
+        0xfb43947753daac69L },
+      { 0xfc68764a8dc79623L,0x5b86995db440fbb2L,0xd66879bfccc5ee0dL,
+        0x0522894295aa8bd3L } },
+    /* 7 << 154 */
+    { { 0xb51a40a51e6a75c1L,0x24327c760ea7d817L,0x0663018207774597L,
+        0xd6fdbec397fa7164L },
+      { 0x20c99dfb13c90f48L,0xd6ac5273686ef263L,0xc6a50bdcfef64eebL,
+        0xcd87b28186fdfc32L } },
+    /* 8 << 154 */
+    { { 0xb24aa43e3fcd3efcL,0xdd26c034b8088e9aL,0xa5ef4dc9bd3d46eaL,
+        0xa2f99d588a4c6a6fL },
+      { 0xddabd3552f1da46cL,0x72c3f8ce1afacdd1L,0xd90c4eee92d40578L,
+        0xd28bb41fca623b94L } },
+    /* 9 << 154 */
+    { { 0x50fc0711745edc11L,0x9dd9ad7d3dc87558L,0xce6931fbb49d1e64L,
+        0x6c77a0a2c98bd0f9L },
+      { 0x62b9a6296baf7cb1L,0xcf065f91ccf72d22L,0x7203cce979639071L,
+        0x09ae4885f9cb732fL } },
+    /* 10 << 154 */
+    { { 0x5e7c3becee8314f3L,0x1c068aeddbea298fL,0x08d381f17c80acecL,
+        0x03b56be8e330495bL },
+      { 0xaeffb8f29222882dL,0x95ff38f6c4af8bf7L,0x50e32d351fc57d8cL,
+        0x6635be5217b444f0L } },
+    /* 11 << 154 */
+    { { 0x04d15276a5177900L,0x4e1dbb47f6858752L,0x5b475622c615796cL,
+        0xa6fa0387691867bfL },
+      { 0xed7f5d562844c6d0L,0xc633cf9b03a2477dL,0xf6be5c402d3721d6L,
+        0xaf312eb7e9fd68e6L } },
+    /* 12 << 154 */
+    { { 0x242792d2e7417ce1L,0xff42bc71970ee7f5L,0x1ff4dc6d5c67a41eL,
+        0x77709b7b20882a58L },
+      { 0x3554731dbe217f2cL,0x2af2a8cd5bb72177L,0x58eee769591dd059L,
+        0xbb2930c94bba6477L } },
+    /* 13 << 154 */
+    { { 0x863ee0477d930cfcL,0x4c262ad1396fd1f4L,0xf4765bc8039af7e1L,
+        0x2519834b5ba104f6L },
+      { 0x7cd61b4cd105f961L,0xa5415da5d63bca54L,0x778280a088a1f17cL,
+        0xc49689492329512cL } },
+    /* 14 << 154 */
+    { { 0x174a9126cecdaa7aL,0xfc8c7e0e0b13247bL,0x29c110d23484c1c4L,
+        0xf8eb8757831dfc3bL },
+      { 0x022f0212c0067452L,0x3f6f69ee7b9b926cL,0x09032da0ef42daf4L,
+        0x79f00ade83f80de4L } },
+    /* 15 << 154 */
+    { { 0x6210db7181236c97L,0x74f7685b3ee0781fL,0x4df7da7ba3e41372L,
+        0x2aae38b1b1a1553eL },
+      { 0x1688e222f6dd9d1bL,0x576954485b8b6487L,0x478d21274b2edeaaL,
+        0xb2818fa51e85956aL } },
+    /* 16 << 154 */
+    { { 0x1e6adddaf176f2c0L,0x01ca4604e2572658L,0x0a404ded85342ffbL,
+        0x8cf60f96441838d6L },
+      { 0x9bbc691cc9071c4aL,0xfd58874434442803L,0x97101c85809c0d81L,
+        0xa7fb754c8c456f7fL } },
+    /* 17 << 154 */
+    { { 0xc95f3c5cd51805e1L,0xab4ccd39b299dca8L,0x3e03d20b47eaf500L,
+        0xfa3165c1d7b80893L },
+      { 0x005e8b54e160e552L,0xdc4972ba9019d11fL,0x21a6972e0c9a4a7aL,
+        0xa52c258f37840fd7L } },
+    /* 18 << 154 */
+    { { 0xf8559ff4c1e99d81L,0x08e1a7d6a3c617c0L,0xb398fd43248c6ba7L,
+        0x6ffedd91d1283794L },
+      { 0x8a6a59d2d629d208L,0xa9d141d53490530eL,0x42f6fc1838505989L,
+        0x09bf250d479d94eeL } },
+    /* 19 << 154 */
+    { { 0x223ad3b1b3822790L,0x6c5926c093b8971cL,0x609efc7e75f7fa62L,
+        0x45d66a6d1ec2d989L },
+      { 0x4422d663987d2792L,0x4a73caad3eb31d2bL,0xf06c2ac1a32cb9e6L,
+        0xd9445c5f91aeba84L } },
+    /* 20 << 154 */
+    { { 0x6af7a1d5af71013fL,0xe68216e50bedc946L,0xf4cba30bd27370a0L,
+        0x7981afbf870421ccL },
+      { 0x02496a679449f0e1L,0x86cfc4be0a47edaeL,0x3073c936b1feca22L,
+        0xf569461203f8f8fbL } },
+    /* 21 << 154 */
+    { { 0xd063b723901515eaL,0x4c6c77a5749cf038L,0x6361e360ab9e5059L,
+        0x596cf171a76a37c0L },
+      { 0x800f53fa6530ae7aL,0x0f5e631e0792a7a6L,0x5cc29c24efdb81c9L,
+        0xa269e8683f9c40baL } },
+    /* 22 << 154 */
+    { { 0xec14f9e12cb7191eL,0x78ea1bd8e5b08ea6L,0x3c65aa9b46332bb9L,
+        0x84cc22b3bf80ce25L },
+      { 0x0098e9e9d49d5bf1L,0xcd4ec1c619087da4L,0x3c9d07c5aef6e357L,
+        0x839a02689f8f64b8L } },
+    /* 23 << 154 */
+    { { 0xc5e9eb62c6d8607fL,0x759689f56aa995e4L,0x70464669bbb48317L,
+        0x921474bfe402417dL },
+      { 0xcabe135b2a354c8cL,0xd51e52d2812fa4b5L,0xec74109653311fe8L,
+        0x4f774535b864514bL } },
+    /* 24 << 154 */
+    { { 0xbcadd6715bde48f8L,0xc97038732189bc7dL,0x5d45299ec709ee8aL,
+        0xd1287ee2845aaff8L },
+      { 0x7d1f8874db1dbf1fL,0xea46588b990c88d6L,0x60ba649a84368313L,
+        0xd5fdcbce60d543aeL } },
+    /* 25 << 154 */
+    { { 0x90b46d43810d5ab0L,0x6739d8f904d7e5ccL,0x021c1a580d337c33L,
+        0x00a6116268e67c40L },
+      { 0x95ef413b379f0a1fL,0xfe126605e9e2ab95L,0x67578b852f5f199cL,
+        0xf5c003292cb84913L } },
+    /* 26 << 154 */
+    { { 0xf795643037577dd8L,0x83b82af429c5fe88L,0x9c1bea26cdbdc132L,
+        0x589fa0869c04339eL },
+      { 0x033e9538b13799dfL,0x85fa8b21d295d034L,0xdf17f73fbd9ddccaL,
+        0xf32bd122ddb66334L } },
+    /* 27 << 154 */
+    { { 0x55ef88a7858b044cL,0x1f0d69c25aa9e397L,0x55fd9cc340d85559L,
+        0xc774df727785ddb2L },
+      { 0x5dcce9f6d3bd2e1cL,0xeb30da20a85dfed0L,0x5ed7f5bbd3ed09c4L,
+        0x7d42a35c82a9c1bdL } },
+    /* 28 << 154 */
+    { { 0xcf3de9959890272dL,0x75f3432a3e713a10L,0x5e13479fe28227b8L,
+        0xb8561ea9fefacdc8L },
+      { 0xa6a297a08332aafdL,0x9b0d8bb573809b62L,0xd2fa1cfd0c63036fL,
+        0x7a16eb55bd64bda8L } },
+    /* 29 << 154 */
+    { { 0x3f5cf5f678e62ddcL,0x2267c45407fd752bL,0x5e361b6b5e437bbeL,
+        0x95c595018354e075L },
+      { 0xec725f85f2b254d9L,0x844b617d2cb52b4eL,0xed8554f5cf425fb5L,
+        0xab67703e2af9f312L } },
+    /* 30 << 154 */
+    { { 0x4cc34ec13cf48283L,0xb09daa259c8a705eL,0xd1e9d0d05b7d4f84L,
+        0x4df6ef64db38929dL },
+      { 0xe16b0763aa21ba46L,0xc6b1d178a293f8fbL,0x0ff5b602d520aabfL,
+        0x94d671bdc339397aL } },
+    /* 31 << 154 */
+    { { 0x7c7d98cf4f5792faL,0x7c5e0d6711215261L,0x9b19a631a7c5a6d4L,
+        0xc8511a627a45274dL },
+      { 0x0c16621ca5a60d99L,0xf7fbab88cf5e48cbL,0xab1e6ca2f7ddee08L,
+        0x83bd08cee7867f3cL } },
+    /* 32 << 154 */
+    { { 0xf7e48e8a2ac13e27L,0x4494f6df4eb1a9f5L,0xedbf84eb981f0a62L,
+        0x49badc32536438f0L },
+      { 0x50bea541004f7571L,0xbac67d10df1c94eeL,0x253d73a1b727bc31L,
+        0xb3d01cf230686e28L } },
+    /* 33 << 154 */
+    { { 0x51b77b1b55fd0b8bL,0xa099d183feec3173L,0x202b1fb7670e72b7L,
+        0xadc88b33a8e1635fL },
+      { 0x34e8216af989d905L,0xc2e68d2029b58d01L,0x11f81c926fe55a93L,
+        0x15f1462a8f296f40L } },
+    /* 34 << 154 */
+    { { 0x1915d375ea3d62f2L,0xa17765a301c8977dL,0x7559710ae47b26f6L,
+        0xe0bd29c8535077a5L },
+      { 0x615f976d08d84858L,0x370dfe8569ced5c1L,0xbbc7503ca734fa56L,
+        0xfbb9f1ec91ac4574L } },
+    /* 35 << 154 */
+    { { 0x95d7ec53060dd7efL,0xeef2dacd6e657979L,0x54511af3e2a08235L,
+        0x1e324aa41f4aea3dL },
+      { 0x550e7e71e6e67671L,0xbccd5190bf52faf7L,0xf880d316223cc62aL,
+        0x0d402c7e2b32eb5dL } },
+    /* 36 << 154 */
+    { { 0xa40bc039306a5a3bL,0x4e0a41fd96783a1bL,0xa1e8d39a0253cdd4L,
+        0x6480be26c7388638L },
+      { 0xee365e1d2285f382L,0x188d8d8fec0b5c36L,0x34ef1a481f0f4d82L,
+        0x1a8f43e1a487d29aL } },
+    /* 37 << 154 */
+    { { 0x8168226d77aefb3aL,0xf69a751e1e72c253L,0x8e04359ae9594df1L,
+        0x475ffd7dd14c0467L },
+      { 0xb5a2c2b13844e95cL,0x85caf647dd12ef94L,0x1ecd2a9ff1063d00L,
+        0x1dd2e22923843311L } },
+    /* 38 << 154 */
+    { { 0x38f0e09d73d17244L,0x3ede77468fc653f1L,0xae4459f5dc20e21cL,
+        0x00db2ffa6a8599eaL },
+      { 0x11682c3930cfd905L,0x4934d074a5c112a6L,0xbdf063c5568bfe95L,
+        0x779a440a016c441aL } },
+    /* 39 << 154 */
+    { { 0x0c23f21897d6fbdcL,0xd3a5cd87e0776aacL,0xcee37f72d712e8dbL,
+        0xfb28c70d26f74e8dL },
+      { 0xffe0c728b61301a0L,0xa6282168d3724354L,0x7ff4cb00768ffedcL,
+        0xc51b308803b02de9L } },
+    /* 40 << 154 */
+    { { 0xa5a8147c3902dda5L,0x35d2f706fe6973b4L,0x5ac2efcfc257457eL,
+        0x933f48d48700611bL },
+      { 0xc365af884912beb2L,0x7f5a4de6162edf94L,0xc646ba7c0c32f34bL,
+        0x632c6af3b2091074L } },
+    /* 41 << 154 */
+    { { 0x58d4f2e3753e43a9L,0x70e1d21724d4e23fL,0xb24bf729afede6a6L,
+        0x7f4a94d8710c8b60L },
+      { 0xaad90a968d4faa6aL,0xd9ed0b32b066b690L,0x52fcd37b78b6dbfdL,
+        0x0b64615e8bd2b431L } },
+    /* 42 << 154 */
+    { { 0x228e2048cfb9fad5L,0xbeaa386d240b76bdL,0x2d6681c890dad7bcL,
+        0x3e553fc306d38f5eL },
+      { 0xf27cdb9b9d5f9750L,0x3e85c52ad28c5b0eL,0x190795af5247c39bL,
+        0x547831ebbddd6828L } },
+    /* 43 << 154 */
+    { { 0xf327a2274a82f424L,0x36919c787e47f89dL,0xe478391943c7392cL,
+        0xf101b9aa2316fefeL },
+      { 0xbcdc9e9c1c5009d2L,0xfb55ea139cd18345L,0xf5b5e231a3ce77c7L,
+        0xde6b4527d2f2cb3dL } },
+    /* 44 << 154 */
+    { { 0x10f6a3339bb26f5fL,0x1e85db8e044d85b6L,0xc3697a0894197e54L,
+        0x65e18cc0a7cb4ea8L },
+      { 0xa38c4f50a471fe6eL,0xf031747a2f13439cL,0x53c4a6bac007318bL,
+        0xa8da3ee51deccb3dL } },
+    /* 45 << 154 */
+    { { 0x0555b31c558216b1L,0x90c7810c2f79e6c2L,0x9b669f4dfe8eed3cL,
+        0x70398ec8e0fac126L },
+      { 0xa96a449ef701b235L,0x0ceecdb3eb94f395L,0x285fc368d0cb7431L,
+        0x0d37bb5216a18c64L } },
+    /* 46 << 154 */
+    { { 0x05110d38b880d2ddL,0xa60f177b65930d57L,0x7da34a67f36235f5L,
+        0x47f5e17c183816b9L },
+      { 0xc7664b57db394af4L,0x39ba215d7036f789L,0x46d2ca0e2f27b472L,
+        0xc42647eef73a84b7L } },
+    /* 47 << 154 */
+    { { 0x44bc754564488f1dL,0xaa922708f4cf85d5L,0x721a01d553e4df63L,
+        0x649c0c515db46cedL },
+      { 0x6bf0d64e3cffcb6cL,0xe3bf93fe50f71d96L,0x75044558bcc194a0L,
+        0x16ae33726afdc554L } },
+    /* 48 << 154 */
+    { { 0xbfc01adf5ca48f3fL,0x64352f06e22a9b84L,0xcee54da1c1099e4aL,
+        0xbbda54e8fa1b89c0L },
+      { 0x166a3df56f6e55fbL,0x1ca44a2420176f88L,0x936afd88dfb7b5ffL,
+        0xe34c24378611d4a0L } },
+    /* 49 << 154 */
+    { { 0x7effbb7586142103L,0x6704ba1b1f34fc4dL,0x7c2a468f10c1b122L,
+        0x36b3a6108c6aace9L },
+      { 0xabfcc0a775a0d050L,0x066f91973ce33e32L,0xce905ef429fe09beL,
+        0x89ee25baa8376351L } },
+    /* 50 << 154 */
+    { { 0x2a3ede22fd29dc76L,0x7fd32ed936f17260L,0x0cadcf68284b4126L,
+        0x63422f08a7951fc8L },
+      { 0x562b24f40807e199L,0xfe9ce5d122ad4490L,0xc2f51b100db2b1b4L,
+        0xeb3613ffe4541d0dL } },
+    /* 51 << 154 */
+    { { 0xbd2c4a052680813bL,0x527aa55d561b08d6L,0xa9f8a40ea7205558L,
+        0xe3eea56f243d0becL },
+      { 0x7b853817a0ff58b3L,0xb67d3f651a69e627L,0x0b76bbb9a869b5d6L,
+        0xa3afeb82546723edL } },
+    /* 52 << 154 */
+    { { 0x5f24416d3e554892L,0x8413b53d430e2a45L,0x99c56aee9032a2a0L,
+        0x09432bf6eec367b1L },
+      { 0x552850c6daf0ecc1L,0x49ebce555bc92048L,0xdfb66ba654811307L,
+        0x1b84f7976f298597L } },
+    /* 53 << 154 */
+    { { 0x795904818d1d7a0dL,0xd9fabe033a6fa556L,0xa40f9c59ba9e5d35L,
+        0xcb1771c1f6247577L },
+      { 0x542a47cae9a6312bL,0xa34b3560552dd8c5L,0xfdf94de00d794716L,
+        0xd46124a99c623094L } },
+    /* 54 << 154 */
+    { { 0x56b7435d68afe8b4L,0x27f205406c0d8ea1L,0x12b77e1473186898L,
+        0xdbc3dd467479490fL },
+      { 0x951a9842c03b0c05L,0x8b1b3bb37921bc96L,0xa573b3462b202e0aL,
+        0x77e4665d47254d56L } },
+    /* 55 << 154 */
+    { { 0x08b70dfcd23e3984L,0xab86e8bcebd14236L,0xaa3e07f857114ba7L,
+        0x5ac71689ab0ef4f2L },
+      { 0x88fca3840139d9afL,0x72733f8876644af0L,0xf122f72a65d74f4aL,
+        0x13931577a5626c7aL } },
+    /* 56 << 154 */
+    { { 0xd5b5d9eb70f8d5a4L,0x375adde7d7bbb228L,0x31e88b860c1c0b32L,
+        0xd1f568c4173edbaaL },
+      { 0x1592fc835459df02L,0x2beac0fb0fcd9a7eL,0xb0a6fdb81b473b0aL,
+        0xe3224c6f0fe8fc48L } },
+    /* 57 << 154 */
+    { { 0x680bd00ee87edf5bL,0x30385f0220e77cf5L,0xe9ab98c04d42d1b2L,
+        0x72d191d2d3816d77L },
+      { 0x1564daca0917d9e5L,0x394eab591f8fed7fL,0xa209aa8d7fbb3896L,
+        0x5564f3b9be6ac98eL } },
+    /* 58 << 154 */
+    { { 0xead21d05d73654efL,0x68d1a9c413d78d74L,0x61e017086d4973a0L,
+        0x83da350046e6d32aL },
+      { 0x6a3dfca468ae0118L,0xa1b9a4c9d02da069L,0x0b2ff9c7ebab8302L,
+        0x98af07c3944ba436L } },
+    /* 59 << 154 */
+    { { 0x85997326995f0f9fL,0x467fade071b58bc6L,0x47e4495abd625a2bL,
+        0xfdd2d01d33c3b8cdL },
+      { 0x2c38ae28c693f9faL,0x48622329348f7999L,0x97bf738e2161f583L,
+        0x15ee2fa7565e8cc9L } },
+    /* 60 << 154 */
+    { { 0xa1a5c8455777e189L,0xcc10bee0456f2829L,0x8ad95c56da762bd5L,
+        0x152e2214e9d91da8L },
+      { 0x975b0e727cb23c74L,0xfd5d7670a90c66dfL,0xb5b5b8ad225ffc53L,
+        0xab6dff73faded2aeL } },
+    /* 61 << 154 */
+    { { 0xebd567816f4cbe9dL,0x0ed8b2496a574bd7L,0x41c246fe81a881faL,
+        0x91564805c3db9c70L },
+      { 0xd7c12b085b862809L,0x1facd1f155858d7bL,0x7693747caf09e92aL,
+        0x3b69dcba189a425fL } },
+    /* 62 << 154 */
+    { { 0x0be28e9f967365efL,0x57300eb2e801f5c9L,0x93b8ac6ad583352fL,
+        0xa2cf1f89cd05b2b7L },
+      { 0x7c0c9b744dcc40ccL,0xfee38c45ada523fbL,0xb49a4dec1099cc4dL,
+        0x325c377f69f069c6L } },
+    /* 63 << 154 */
+    { { 0xe12458ce476cc9ffL,0x580e0b6cc6d4cb63L,0xd561c8b79072289bL,
+        0x0377f264a619e6daL },
+      { 0x2668536288e591a5L,0xa453a7bd7523ca2bL,0x8a9536d2c1df4533L,
+        0xc8e50f2fbe972f79L } },
+    /* 64 << 154 */
+    { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L,
+        0x810ee252af7c9860L },
+      { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L,
+        0xd485717a92731745L } },
+    /* 0 << 161 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 161 */
+    { { 0x896c42e8ee36860cL,0xdaf04dfd4113c22dL,0x1adbb7b744104213L,
+        0xe5fd5fa11fd394eaL },
+      { 0x68235d941a4e0551L,0x6772cfbe18d10151L,0x276071e309984523L,
+        0xe4e879de5a56ba98L } },
+    /* 2 << 161 */
+    { { 0xaaafafb0285b9491L,0x01a0be881e4c705eL,0xff1d4f5d2ad9caabL,
+        0x6e349a4ac37a233fL },
+      { 0xcf1c12464a1c6a16L,0xd99e6b6629383260L,0xea3d43665f6d5471L,
+        0x36974d04ff8cc89bL } },
+    /* 3 << 161 */
+    { { 0xc26c49a1cfe89d80L,0xb42c026dda9c8371L,0xca6c013adad066d2L,
+        0xfb8f722856a4f3eeL },
+      { 0x08b579ecd850935bL,0x34c1a74cd631e1b3L,0xcb5fe596ac198534L,
+        0x39ff21f6e1f24f25L } },
+    /* 4 << 161 */
+    { { 0x27f29e148f929057L,0x7a64ae06c0c853dfL,0x256cd18358e9c5ceL,
+        0x9d9cce82ded092a5L },
+      { 0xcc6e59796e93b7c7L,0xe1e4709231bb9e27L,0xb70b3083aa9e29a0L,
+        0xbf181a753785e644L } },
+    /* 5 << 161 */
+    { { 0xf53f2c658ead09f7L,0x1335e1d59780d14dL,0x69cc20e0cd1b66bcL,
+        0x9b670a37bbe0bfc8L },
+      { 0xce53dc8128efbeedL,0x0c74e77c8326a6e5L,0x3604e0d2b88e9a63L,
+        0xbab38fca13dc2248L } },
+    /* 6 << 161 */
+    { { 0x8ed6e8c85c0a3f1eL,0xbcad24927c87c37fL,0xfdfb62bb9ee3b78dL,
+        0xeba8e477cbceba46L },
+      { 0x37d38cb0eeaede4bL,0x0bc498e87976deb6L,0xb2944c046b6147fbL,
+        0x8b123f35f71f9609L } },
+    /* 7 << 161 */
+    { { 0xa155dcc7de79dc24L,0xf1168a32558f69cdL,0xbac215950d1850dfL,
+        0x15c8295bb204c848L },
+      { 0xf661aa367d8184ffL,0xc396228e30447bdbL,0x11cd5143bde4a59eL,
+        0xe3a26e3b6beab5e6L } },
+    /* 8 << 161 */
+    { { 0xd3b3a13f1402b9d0L,0x573441c32c7bc863L,0x4b301ec4578c3e6eL,
+        0xc26fc9c40adaf57eL },
+      { 0x96e71bfd7493cea3L,0xd05d4b3f1af81456L,0xdaca2a8a6a8c608fL,
+        0x53ef07f60725b276L } },
+    /* 9 << 161 */
+    { { 0x07a5fbd27824fc56L,0x3467521813289077L,0x5bf69fd5e0c48349L,
+        0xa613ddd3b6aa7875L },
+      { 0x7f78c19c5450d866L,0x46f4409c8f84a481L,0x9f1d192890fce239L,
+        0x016c4168b2ce44b9L } },
+    /* 10 << 161 */
+    { { 0xbae023f0c7435978L,0xb152c88820e30e19L,0x9c241645e3fa6fafL,
+        0x735d95c184823e60L },
+      { 0x0319757303955317L,0x0b4b02a9f03b4995L,0x076bf55970274600L,
+        0x32c5cc53aaf57508L } },
+    /* 11 << 161 */
+    { { 0xe8af6d1f60624129L,0xb7bc5d649a5e2b5eL,0x3814b0485f082d72L,
+        0x76f267f2ce19677aL },
+      { 0x626c630fb36eed93L,0x55230cd73bf56803L,0x78837949ce2736a0L,
+        0x0d792d60aa6c55f1L } },
+    /* 12 << 161 */
+    { { 0x0318dbfdd5c7c5d2L,0xb38f8da7072b342dL,0x3569bddc7b8de38aL,
+        0xf25b5887a1c94842L },
+      { 0xb2d5b2842946ad60L,0x854f29ade9d1707eL,0xaa5159dc2c6a4509L,
+        0x899f94c057189837L } },
+    /* 13 << 161 */
+    { { 0xcf6adc51f4a55b03L,0x261762de35e3b2d5L,0x4cc4301204827b51L,
+        0xcd22a113c6021442L },
+      { 0xce2fd61a247c9569L,0x59a50973d152becaL,0x6c835a1163a716d4L,
+        0xc26455ed187dedcfL } },
+    /* 14 << 161 */
+    { { 0x27f536e049ce89e7L,0x18908539cc890cb5L,0x308909abd83c2aa1L,
+        0xecd3142b1ab73bd3L },
+      { 0x6a85bf59b3f5ab84L,0x3c320a68f2bea4c6L,0xad8dc5386da4541fL,
+        0xeaf34eb0b7c41186L } },
+    /* 15 << 161 */
+    { { 0x1c780129977c97c4L,0x5ff9beebc57eb9faL,0xa24d0524c822c478L,
+        0xfd8eec2a461cd415L },
+      { 0xfbde194ef027458cL,0xb4ff53191d1be115L,0x63f874d94866d6f4L,
+        0x35c75015b21ad0c9L } },
+    /* 16 << 161 */
+    { { 0xa6b5c9d646ac49d2L,0x42c77c0b83137aa9L,0x24d000fc68225a38L,
+        0x0f63cfc82fe1e907L },
+      { 0x22d1b01bc6441f95L,0x7d38f719ec8e448fL,0x9b33fa5f787fb1baL,
+        0x94dcfda1190158dfL } },
+    /* 17 << 161 */
+    { { 0xc47cb3395f6d4a09L,0x6b4f355cee52b826L,0x3d100f5df51b930aL,
+        0xf4512fac9f668f69L },
+      { 0x546781d5206c4c74L,0xd021d4d4cb4d2e48L,0x494a54c2ca085c2dL,
+        0xf1dbaca4520850a8L } },
+    /* 18 << 161 */
+    { { 0x63c79326490a1acaL,0xcb64dd9c41526b02L,0xbb772591a2979258L,
+        0x3f58297048d97846L },
+      { 0xd66b70d17c213ba7L,0xc28febb5e8a0ced4L,0x6b911831c10338c1L,
+        0x0d54e389bf0126f3L } },
+    /* 19 << 161 */
+    { { 0x7048d4604af206eeL,0x786c88f677e97cb9L,0xd4375ae1ac64802eL,
+        0x469bcfe1d53ec11cL },
+      { 0xfc9b340d47062230L,0xe743bb57c5b4a3acL,0xfe00b4aa59ef45acL,
+        0x29a4ef2359edf188L } },
+    /* 20 << 161 */
+    { { 0x40242efeb483689bL,0x2575d3f6513ac262L,0xf30037c80ca6db72L,
+        0xc9fcce8298864be2L },
+      { 0x84a112ff0149362dL,0x95e575821c4ae971L,0x1fa4b1a8945cf86cL,
+        0x4525a7340b024a2fL } },
+    /* 21 << 161 */
+    { { 0xe76c8b628f338360L,0x483ff59328edf32bL,0x67e8e90a298b1aecL,
+        0x9caab338736d9a21L },
+      { 0x5c09d2fd66892709L,0x2496b4dcb55a1d41L,0x93f5fb1ae24a4394L,
+        0x08c750496fa8f6c1L } },
+    /* 22 << 161 */
+    { { 0xcaead1c2c905d85fL,0xe9d7f7900733ae57L,0x24c9a65cf07cdd94L,
+        0x7389359ca4b55931L },
+      { 0xf58709b7367e45f7L,0x1f203067cb7e7adcL,0x82444bffc7b72818L,
+        0x07303b35baac8033L } },
+    /* 23 << 161 */
+    { { 0x1e1ee4e4d13b7ea1L,0xe6489b24e0e74180L,0xa5f2c6107e70ef70L,
+        0xa1655412bdd10894L },
+      { 0x555ebefb7af4194eL,0x533c1c3c8e89bd9cL,0x735b9b5789895856L,
+        0x15fb3cd2567f5c15L } },
+    /* 24 << 161 */
+    { { 0x057fed45526f09fdL,0xe8a4f10c8128240aL,0x9332efc4ff2bfd8dL,
+        0x214e77a0bd35aa31L },
+      { 0x32896d7314faa40eL,0x767867ec01e5f186L,0xc9adf8f117a1813eL,
+        0xcb6cda7854741795L } },
+    /* 25 << 161 */
+    { { 0xb7521b6d349d51aaL,0xf56b5a9ee3c7b8e9L,0xc6f1e5c932a096dfL,
+        0x083667c4a3635024L },
+      { 0x365ea13518087f2fL,0xf1b8eaacd136e45dL,0xc8a0e48473aec989L,
+        0xd75a324b142c9259L } },
+    /* 26 << 161 */
+    { { 0xb7b4d00101dae185L,0x45434e0b9b7a94bcL,0xf54339affbd8cb0bL,
+        0xdcc4569ee98ef49eL },
+      { 0x7789318a09a51299L,0x81b4d206b2b025d8L,0xf64aa418fae85792L,
+        0x3e50258facd7baf7L } },
+    /* 27 << 161 */
+    { { 0xdce84cdb2996864bL,0xa2e670891f485fa4L,0xb28b2bb6534c6a5aL,
+        0x31a7ec6bc94b9d39L },
+      { 0x1d217766d6bc20daL,0x4acdb5ec86761190L,0x6872632873701063L,
+        0x4d24ee7c2128c29bL } },
+    /* 28 << 161 */
+    { { 0xc072ebd3a19fd868L,0x612e481cdb8ddd3bL,0xb4e1d7541a64d852L,
+        0x00ef95acc4c6c4abL },
+      { 0x1536d2edaa0a6c46L,0x6129408643774790L,0x54af25e8343fda10L,
+        0x9ff9d98dfd25d6f2L } },
+    /* 29 << 161 */
+    { { 0x0746af7c468b8835L,0x977a31cb730ecea7L,0xa5096b80c2cf4a81L,
+        0xaa9868336458c37aL },
+      { 0x6af29bf3a6bd9d34L,0x6a62fe9b33c5d854L,0x50e6c304b7133b5eL,
+        0x04b601597d6e6848L } },
+    /* 30 << 161 */
+    { { 0x4cd296df5579bea4L,0x10e35ac85ceedaf1L,0x04c4c5fde3bcc5b1L,
+        0x95f9ee8a89412cf9L },
+      { 0x2c9459ee82b6eb0fL,0x2e84576595c2aaddL,0x774a84aed327fcfeL,
+        0xd8c937220368d476L } },
+    /* 31 << 161 */
+    { { 0x0dbd5748f83e8a3bL,0xa579aa968d2495f3L,0x535996a0ae496e9bL,
+        0x07afbfe9b7f9bcc2L },
+      { 0x3ac1dc6d5b7bd293L,0x3b592cff7022323dL,0xba0deb989c0a3e76L,
+        0x18e78e9f4b197acbL } },
+    /* 32 << 161 */
+    { { 0x211cde10296c36efL,0x7ee8967282c4da77L,0xb617d270a57836daL,
+        0xf0cd9c319cb7560bL },
+      { 0x01fdcbf7e455fe90L,0x3fb53cbb7e7334f3L,0x781e2ea44e7de4ecL,
+        0x8adab3ad0b384fd0L } },
+    /* 33 << 161 */
+    { { 0x129eee2f53d64829L,0x7a471e17a261492bL,0xe4f9adb9e4cb4a2cL,
+        0x3d359f6f97ba2c2dL },
+      { 0x346c67860aacd697L,0x92b444c375c2f8a8L,0xc79fa117d85df44eL,
+        0x56782372398ddf31L } },
+    /* 34 << 161 */
+    { { 0x60e690f2bbbab3b8L,0x4851f8ae8b04816bL,0xc72046ab9c92e4d2L,
+        0x518c74a17cf3136bL },
+      { 0xff4eb50af9877d4cL,0x14578d90a919cabbL,0x8218f8c4ac5eb2b6L,
+        0xa3ccc547542016e4L } },
+    /* 35 << 161 */
+    { { 0x025bf48e327f8349L,0xf3e97346f43cb641L,0xdc2bafdf500f1085L,
+        0x571678762f063055L },
+      { 0x5bd914b9411925a6L,0x7c078d48a1123de5L,0xee6bf835182b165dL,
+        0xb11b5e5bba519727L } },
+    /* 36 << 161 */
+    { { 0xe33ea76c1eea7b85L,0x2352b46192d4f85eL,0xf101d334afe115bbL,
+        0xfabc1294889175a3L },
+      { 0x7f6bcdc05233f925L,0xe0a802dbe77fec55L,0xbdb47b758069b659L,
+        0x1c5e12def98fbd74L } },
+    /* 37 << 161 */
+    { { 0x869c58c64b8457eeL,0xa5360f694f7ea9f7L,0xe576c09ff460b38fL,
+        0x6b70d54822b7fb36L },
+      { 0x3fd237f13bfae315L,0x33797852cbdff369L,0x97df25f525b516f9L,
+        0x46f388f2ba38ad2dL } },
+    /* 38 << 161 */
+    { { 0x656c465889d8ddbbL,0x8830b26e70f38ee8L,0x4320fd5cde1212b0L,
+        0xc34f30cfe4a2edb2L },
+      { 0xabb131a356ab64b8L,0x7f77f0ccd99c5d26L,0x66856a37bf981d94L,
+        0x19e76d09738bd76eL } },
+    /* 39 << 161 */
+    { { 0xe76c8ac396238f39L,0xc0a482bea830b366L,0xb7b8eaff0b4eb499L,
+        0x8ecd83bc4bfb4865L },
+      { 0x971b2cb7a2f3776fL,0xb42176a4f4b88adfL,0xb9617df5be1fa446L,
+        0x8b32d508cd031bd2L } },
+    /* 40 << 161 */
+    { { 0x1c6bd47d53b618c0L,0xc424f46c6a227923L,0x7303ffdedd92d964L,
+        0xe971287871b5abf2L },
+      { 0x8f48a632f815561dL,0x85f48ff5d3c055d1L,0x222a14277525684fL,
+        0xd0d841a067360cc3L } },
+    /* 41 << 161 */
+    { { 0x4245a9260b9267c6L,0xc78913f1cf07f863L,0xaa844c8e4d0d9e24L,
+        0xa42ad5223d5f9017L },
+      { 0xbd371749a2c989d5L,0x928292dfe1f5e78eL,0x493b383e0a1ea6daL,
+        0x5136fd8d13aee529L } },
+    /* 42 << 161 */
+    { { 0x860c44b1f2c34a99L,0x3b00aca4bf5855acL,0xabf6aaa0faaf37beL,
+        0x65f436822a53ec08L },
+      { 0x1d9a5801a11b12e1L,0x78a7ab2ce20ed475L,0x0de1067e9a41e0d5L,
+        0x30473f5f305023eaL } },
+    /* 43 << 161 */
+    { { 0xdd3ae09d169c7d97L,0x5cd5baa4cfaef9cdL,0x5cd7440b65a44803L,
+        0xdc13966a47f364deL },
+      { 0x077b2be82b8357c1L,0x0cb1b4c5e9d57c2aL,0x7a4ceb3205ff363eL,
+        0xf310fa4dca35a9efL } },
+    /* 44 << 161 */
+    { { 0xdbb7b352f97f68c6L,0x0c773b500b02cf58L,0xea2e48213c1f96d9L,
+        0xffb357b0eee01815L },
+      { 0xb9c924cde0f28039L,0x0b36c95a46a3fbe4L,0x1faaaea45e46db6cL,
+        0xcae575c31928aaffL } },
+    /* 45 << 161 */
+    { { 0x7f671302a70dab86L,0xfcbd12a971c58cfcL,0xcbef9acfbee0cb92L,
+        0x573da0b9f8c1b583L },
+      { 0x4752fcfe0d41d550L,0xe7eec0e32155cffeL,0x0fc39fcb545ae248L,
+        0x522cb8d18065f44eL } },
+    /* 46 << 161 */
+    { { 0x263c962a70cbb96cL,0xe034362abcd124a9L,0xf120db283c2ae58dL,
+        0xb9a38d49fef6d507L },
+      { 0xb1fd2a821ff140fdL,0xbd162f3020aee7e0L,0x4e17a5d4cb251949L,
+        0x2aebcb834f7e1c3dL } },
+    /* 47 << 161 */
+    { { 0x608eb25f937b0527L,0xf42e1e47eb7d9997L,0xeba699c4b8a53a29L,
+        0x1f921c71e091b536L },
+      { 0xcce29e7b5b26bbd5L,0x7a8ef5ed3b61a680L,0xe5ef8043ba1f1c7eL,
+        0x16ea821718158ddaL } },
+    /* 48 << 161 */
+    { { 0x01778a2b599ff0f9L,0x68a923d78104fc6bL,0x5bfa44dfda694ff3L,
+        0x4f7199dbf7667f12L },
+      { 0xc06d8ff6e46f2a79L,0x08b5deade9f8131dL,0x02519a59abb4ce7cL,
+        0xc4f710bcb42aec3eL } },
+    /* 49 << 161 */
+    { { 0x3d77b05778bde41aL,0x6474bf80b4186b5aL,0x048b3f6788c65741L,
+        0xc64519de03c7c154L },
+      { 0xdf0738460edfcc4fL,0x319aa73748f1aa6bL,0x8b9f8a02ca909f77L,
+        0x902581397580bfefL } },
+    /* 50 << 161 */
+    { { 0xd8bfd3cac0c22719L,0xc60209e4c9ca151eL,0x7a744ab5d9a1a69cL,
+        0x6de5048b14937f8fL },
+      { 0x171938d8e115ac04L,0x7df709401c6b16d2L,0xa6aeb6637f8e94e7L,
+        0xc130388e2a2cf094L } },
+    /* 51 << 161 */
+    { { 0x1850be8477f54e6eL,0x9f258a7265d60fe5L,0xff7ff0c06c9146d6L,
+        0x039aaf90e63a830bL },
+      { 0x38f27a739460342fL,0x4703148c3f795f8aL,0x1bb5467b9681a97eL,
+        0x00931ba5ecaeb594L } },
+    /* 52 << 161 */
+    { { 0xcdb6719d786f337cL,0xd9c01cd2e704397dL,0x0f4a3f20555c2fefL,
+        0x004525097c0af223L },
+      { 0x54a5804784db8e76L,0x3bacf1aa93c8aa06L,0x11ca957cf7919422L,
+        0x5064105378cdaa40L } },
+    /* 53 << 161 */
+    { { 0x7a3038749f7144aeL,0x170c963f43d4acfdL,0x5e14814958ddd3efL,
+        0xa7bde5829e72dba8L },
+      { 0x0769da8b6fa68750L,0xfa64e532572e0249L,0xfcaadf9d2619ad31L,
+        0x87882daaa7b349cdL } },
+    /* 54 << 161 */
+    { { 0x9f6eb7316c67a775L,0xcb10471aefc5d0b1L,0xb433750ce1b806b2L,
+        0x19c5714d57b1ae7eL },
+      { 0xc0dc8b7bed03fd3fL,0xdd03344f31bc194eL,0xa66c52a78c6320b5L,
+        0x8bc82ce3d0b6fd93L } },
+    /* 55 << 161 */
+    { { 0xf8e13501b35f1341L,0xe53156dd25a43e42L,0xd3adf27e4daeb85cL,
+        0xb81d8379bbeddeb5L },
+      { 0x1b0b546e2e435867L,0x9020eb94eba5dd60L,0x37d911618210cb9dL,
+        0x4c596b315c91f1cfL } },
+    /* 56 << 161 */
+    { { 0xb228a90f0e0b040dL,0xbaf02d8245ff897fL,0x2aac79e600fa6122L,
+        0x248288178e36f557L },
+      { 0xb9521d31113ec356L,0x9e48861e15eff1f8L,0x2aa1d412e0d41715L,
+        0x71f8620353f131b8L } },
+    /* 57 << 161 */
+    { { 0xf60da8da3fd19408L,0x4aa716dc278d9d99L,0x394531f7a8c51c90L,
+        0xb560b0e8f59db51cL },
+      { 0xa28fc992fa34bdadL,0xf024fa149cd4f8bdL,0x5cf530f723a9d0d3L,
+        0x615ca193e28c9b56L } },
+    /* 58 << 161 */
+    { { 0x6d2a483d6f73c51eL,0xa4cb2412ea0dc2ddL,0x50663c411eb917ffL,
+        0x3d3a74cfeade299eL },
+      { 0x29b3990f4a7a9202L,0xa9bccf59a7b15c3dL,0x66a3ccdca5df9208L,
+        0x48027c1443f2f929L } },
+    /* 59 << 161 */
+    { { 0xd385377c40b557f0L,0xe001c366cd684660L,0x1b18ed6be2183a27L,
+        0x879738d863210329L },
+      { 0xa687c74bbda94882L,0xd1bbcc48a684b299L,0xaf6f1112863b3724L,
+        0x6943d1b42c8ce9f8L } },
+    /* 60 << 161 */
+    { { 0xe044a3bb098cafb4L,0x27ed231060d48cafL,0x542b56753a31b84dL,
+        0xcbf3dd50fcddbed7L },
+      { 0x25031f1641b1d830L,0xa7ec851dcb0c1e27L,0xac1c8fe0b5ae75dbL,
+        0xb24c755708c52120L } },
+    /* 61 << 161 */
+    { { 0x57f811dc1d4636c3L,0xf8436526681a9939L,0x1f6bc6d99c81adb3L,
+        0x840f8ac35b7d80d4L },
+      { 0x731a9811f4387f1aL,0x7c501cd3b5156880L,0xa5ca4a07dfe68867L,
+        0xf123d8f05fcea120L } },
+    /* 62 << 161 */
+    { { 0x1fbb0e71d607039eL,0x2b70e215cd3a4546L,0x32d2f01d53324091L,
+        0xb796ff08180ab19bL },
+      { 0x32d87a863c57c4aaL,0x2aed9cafb7c49a27L,0x9fb35eac31630d98L,
+        0x338e8cdf5c3e20a3L } },
+    /* 63 << 161 */
+    { { 0x80f1618266cde8dbL,0x4e1599802d72fd36L,0xd7b8f13b9b6e5072L,
+        0xf52139073b7b5dc1L },
+      { 0x4d431f1d8ce4396eL,0x37a1a680a7ed2142L,0xbf375696d01aaf6bL,
+        0xaa1c0c54e63aab66L } },
+    /* 64 << 161 */
+    { { 0x3014368b4ed80940L,0x67e6d0567a6fceddL,0x7c208c49ca97579fL,
+        0xfe3d7a81a23597f6L },
+      { 0x5e2032027e096ae2L,0xb1f3e1e724b39366L,0x26da26f32fdcdffcL,
+        0x79422f1d6097be83L } },
+    /* 0 << 168 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 168 */
+    { { 0x263a2cfb9db3b381L,0x9c3a2deed4df0a4bL,0x728d06e97d04e61fL,
+        0x8b1adfbc42449325L },
+      { 0x6ec1d9397e053a1bL,0xee2be5c766daf707L,0x80ba1e14810ac7abL,
+        0xdd2ae778f530f174L } },
+    /* 2 << 168 */
+    { { 0x0435d97a205b9d8bL,0x6eb8f064056756d4L,0xd5e88a8bb6f8210eL,
+        0x070ef12dec9fd9eaL },
+      { 0x4d8495053bcc876aL,0x12a75338a7404ce3L,0xd22b49e1b8a1db5eL,
+        0xec1f205114bfa5adL } },
+    /* 3 << 168 */
+    { { 0xadbaeb79b6828f36L,0x9d7a025801bd5b9eL,0xeda01e0d1e844b0cL,
+        0x4b625175887edfc9L },
+      { 0x14109fdd9669b621L,0x88a2ca56f6f87b98L,0xfe2eb788170df6bcL,
+        0x0cea06f4ffa473f9L } },
+    /* 4 << 168 */
+    { { 0x43ed81b5c4e83d33L,0xd9f358795efd488bL,0x164a620f9deb4d0fL,
+        0xc6927bdbac6a7394L },
+      { 0x45c28df79f9e0f03L,0x2868661efcd7e1a9L,0x7cf4e8d0ffa348f1L,
+        0x6bd4c284398538e0L } },
+    /* 5 << 168 */
+    { { 0x2618a091289a8619L,0xef796e606671b173L,0x664e46e59090c632L,
+        0xa38062d41e66f8fbL },
+      { 0x6c744a200573274eL,0xd07b67e4a9271394L,0x391223b26bdc0e20L,
+        0xbe2d93f1eb0a05a7L } },
+    /* 6 << 168 */
+    { { 0xf23e2e533f36d141L,0xe84bb3d44dfca442L,0xb804a48d6b7c023aL,
+        0x1e16a8fa76431c3bL },
+      { 0x1b5452adddd472e0L,0x7d405ee70d1ee127L,0x50fc6f1dffa27599L,
+        0x351ac53cbf391b35L } },
+    /* 7 << 168 */
+    { { 0x7efa14b84444896bL,0x64974d2ff94027fbL,0xefdcd0e8de84487dL,
+        0x8c45b2602b48989bL },
+      { 0xa8fcbbc2d8463487L,0xd1b2b3f73fbc476cL,0x21d005b7c8f443c0L,
+        0x518f2e6740c0139cL } },
+    /* 8 << 168 */
+    { { 0x56036e8c06d75fc1L,0x2dcf7bb73249a89fL,0x81dd1d3de245e7ddL,
+        0xf578dc4bebd6e2a7L },
+      { 0x4c028903df2ce7a0L,0xaee362889c39afacL,0xdc847c31146404abL,
+        0x6304c0d8a4e97818L } },
+    /* 9 << 168 */
+    { { 0xae51dca2a91f6791L,0x2abe41909baa9efcL,0xd9d2e2f4559c7ac1L,
+        0xe82f4b51fc9f773aL },
+      { 0xa77130274073e81cL,0xc0276facfbb596fcL,0x1d819fc9a684f70cL,
+        0x29b47fddc9f7b1e0L } },
+    /* 10 << 168 */
+    { { 0x358de103459b1940L,0xec881c595b013e93L,0x51574c9349532ad3L,
+        0x2db1d445b37b46deL },
+      { 0xc6445b87df239fd8L,0xc718af75151d24eeL,0xaea1c4a4f43c6259L,
+        0x40c0e5d770be02f7L } },
+    /* 11 << 168 */
+    { { 0x6a4590f4721b33f2L,0x2124f1fbfedf04eaL,0xf8e53cde9745efe7L,
+        0xe7e1043265f046d9L },
+      { 0xc3fca28ee4d0c7e6L,0x847e339a87253b1bL,0x9b5953483743e643L,
+        0xcb6a0a0b4fd12fc5L } },
+    /* 12 << 168 */
+    { { 0xfb6836c327d02dccL,0x5ad009827a68bcc2L,0x1b24b44c005e912dL,
+        0xcc83d20f811fdcfeL },
+      { 0x36527ec1666fba0cL,0x6994819714754635L,0xfcdcb1a8556da9c2L,
+        0xa593426781a732b2L } },
+    /* 13 << 168 */
+    { { 0xec1214eda714181dL,0x609ac13b6067b341L,0xff4b4c97a545df1fL,
+        0xa124050134d2076bL },
+      { 0x6efa0c231409ca97L,0x254cc1a820638c43L,0xd4e363afdcfb46cdL,
+        0x62c2adc303942a27L } },
+    /* 14 << 168 */
+    { { 0xc67b9df056e46483L,0xa55abb2063736356L,0xab93c098c551bc52L,
+        0x382b49f9b15fe64bL },
+      { 0x9ec221ad4dff8d47L,0x79caf615437df4d6L,0x5f13dc64bb456509L,
+        0xe4c589d9191f0714L } },
+    /* 15 << 168 */
+    { { 0x27b6a8ab3fd40e09L,0xe455842e77313ea9L,0x8b51d1e21f55988bL,
+        0x5716dd73062bbbfcL },
+      { 0x633c11e54e8bf3deL,0x9a0e77b61b85be3bL,0x565107290911cca6L,
+        0x27e76495efa6590fL } },
+    /* 16 << 168 */
+    { { 0xe4ac8b33070d3aabL,0x2643672b9a2cd5e5L,0x52eff79b1cfc9173L,
+        0x665ca49b90a7c13fL },
+      { 0x5a8dda59b3efb998L,0x8a5b922d052f1341L,0xae9ebbab3cf9a530L,
+        0x35986e7bf56da4d7L } },
+    /* 17 << 168 */
+    { { 0x3a636b5cff3513ccL,0xbb0cf8ba3198f7ddL,0xb8d4052241f16f86L,
+        0x760575d8de13a7bfL },
+      { 0x36f74e169f7aa181L,0x163a3ecff509ed1cL,0x6aead61f3c40a491L,
+        0x158c95fcdfe8fcaaL } },
+    /* 18 << 168 */
+    { { 0xa3991b6e13cda46fL,0x79482415342faed0L,0xf3ba5bde666b5970L,
+        0x1d52e6bcb26ab6ddL },
+      { 0x768ba1e78608dd3dL,0x4930db2aea076586L,0xd9575714e7dc1afaL,
+        0x1fc7bf7df7c58817L } },
+    /* 19 << 168 */
+    { { 0x6b47accdd9eee96cL,0x0ca277fbe58cec37L,0x113fe413e702c42aL,
+        0xdd1764eec47cbe51L },
+      { 0x041e7cde7b3ed739L,0x50cb74595ce9e1c0L,0x355685132925b212L,
+        0x7cff95c4001b081cL } },
+    /* 20 << 168 */
+    { { 0x63ee4cbd8088b454L,0xdb7f32f79a9e0c8aL,0xb377d4186b2447cbL,
+        0xe3e982aad370219bL },
+      { 0x06ccc1e4c2a2a593L,0x72c368650773f24fL,0xa13b4da795859423L,
+        0x8bbf1d3375040c8fL } },
+    /* 21 << 168 */
+    { { 0x726f0973da50c991L,0x48afcd5b822d6ee2L,0xe5fc718b20fd7771L,
+        0xb9e8e77dfd0807a1L },
+      { 0x7f5e0f4499a7703dL,0x6972930e618e36f3L,0x2b7c77b823807bbeL,
+        0xe5b82405cb27ff50L } },
+    /* 22 << 168 */
+    { { 0xba8b8be3bd379062L,0xd64b7a1d2dce4a92L,0x040a73c5b2952e37L,
+        0x0a9e252ed438aecaL },
+      { 0xdd43956bc39d3bcbL,0x1a31ca00b32b2d63L,0xd67133b85c417a18L,
+        0xd08e47902ef442c8L } },
+    /* 23 << 168 */
+    { { 0x98cb1ae9255c0980L,0x4bd863812b4a739fL,0x5a5c31e11e4a45a1L,
+        0x1e5d55fe9cb0db2fL },
+      { 0x74661b068ff5cc29L,0x026b389f0eb8a4f4L,0x536b21a458848c24L,
+        0x2e5bf8ec81dc72b0L } },
+    /* 24 << 168 */
+    { { 0x03c187d0ad886aacL,0x5c16878ab771b645L,0xb07dfc6fc74045abL,
+        0x2c6360bf7800caedL },
+      { 0x24295bb5b9c972a3L,0xc9e6f88e7c9a6dbaL,0x90ffbf2492a79aa6L,
+        0xde29d50a41c26ac2L } },
+    /* 25 << 168 */
+    { { 0x9f0af483d309cbe6L,0x5b020d8ae0bced4fL,0x606e986db38023e3L,
+        0xad8f2c9d1abc6933L },
+      { 0x19292e1de7400e93L,0xfe3e18a952be5e4dL,0xe8e9771d2e0680bfL,
+        0x8c5bec98c54db063L } },
+    /* 26 << 168 */
+    { { 0x2af9662a74a55d1fL,0xe3fbf28f046f66d8L,0xa3a72ab4d4dc4794L,
+        0x09779f455c7c2dd8L },
+      { 0xd893bdafc3d19d8dL,0xd5a7509457d6a6dfL,0x8cf8fef9952e6255L,
+        0x3da67cfbda9a8affL } },
+    /* 27 << 168 */
+    { { 0x4c23f62a2c160dcdL,0x34e6c5e38f90eaefL,0x35865519a9a65d5aL,
+        0x07c48aae8fd38a3dL },
+      { 0xb7e7aeda50068527L,0x2c09ef231c90936aL,0x31ecfeb6e879324cL,
+        0xa0871f6bfb0ec938L } },
+    /* 28 << 168 */
+    { { 0xb1f0fb68d84d835dL,0xc90caf39861dc1e6L,0x12e5b0467594f8d7L,
+        0x26897ae265012b92L },
+      { 0xbcf68a08a4d6755dL,0x403ee41c0991fbdaL,0x733e343e3bbf17e8L,
+        0xd2c7980d679b3d65L } },
+    /* 29 << 168 */
+    { { 0x33056232d2e11305L,0x966be492f3c07a6fL,0x6a8878ffbb15509dL,
+        0xff2211010a9b59a4L },
+      { 0x6c9f564aabe30129L,0xc6f2c940336e64cfL,0x0fe752628b0c8022L,
+        0xbe0267e96ae8db87L } },
+    /* 30 << 168 */
+    { { 0x22e192f193bc042bL,0xf085b534b237c458L,0xa0d192bd832c4168L,
+        0x7a76e9e3bdf6271dL },
+      { 0x52a882fab88911b5L,0xc85345e4b4db0eb5L,0xa3be02a681a7c3ffL,
+        0x51889c8cf0ec0469L } },
+    /* 31 << 168 */
+    { { 0x9d031369a5e829e5L,0xcbb4c6fc1607aa41L,0x75ac59a6241d84c1L,
+        0xc043f2bf8829e0eeL },
+      { 0x82a38f758ea5e185L,0x8bda40b9d87cbd9fL,0x9e65e75e2d8fc601L,
+        0x3d515f74a35690b3L } },
+    /* 32 << 168 */
+    { { 0x534acf4fda79e5acL,0x68b83b3a8630215fL,0x5c748b2ed085756eL,
+        0xb0317258e5d37cb2L },
+      { 0x6735841ac5ccc2c4L,0x7d7dc96b3d9d5069L,0xa147e410fd1754bdL,
+        0x65296e94d399ddd5L } },
+    /* 33 << 168 */
+    { { 0xf6b5b2d0bc8fa5bcL,0x8a5ead67500c277bL,0x214625e6dfa08a5dL,
+        0x51fdfedc959cf047L },
+      { 0x6bc9430b289fca32L,0xe36ff0cf9d9bdc3fL,0x2fe187cb58ea0edeL,
+        0xed66af205a900b3fL } },
+    /* 34 << 168 */
+    { { 0x00e0968b5fa9f4d6L,0x2d4066ce37a362e7L,0xa99a9748bd07e772L,
+        0x710989c006a4f1d0L },
+      { 0xd5dedf35ce40cbd8L,0xab55c5f01743293dL,0x766f11448aa24e2cL,
+        0x94d874f8605fbcb4L } },
+    /* 35 << 168 */
+    { { 0xa365f0e8a518001bL,0xee605eb69d04ef0fL,0x5a3915cdba8d4d25L,
+        0x44c0e1b8b5113472L },
+      { 0xcbb024e88b6740dcL,0x89087a53ee1d4f0cL,0xa88fa05c1fc4e372L,
+        0x8bf395cbaf8b3af2L } },
+    /* 36 << 168 */
+    { { 0x1e71c9a1deb8568bL,0xa35daea080fb3d32L,0xe8b6f2662cf8fb81L,
+        0x6d51afe89490696aL },
+      { 0x81beac6e51803a19L,0xe3d24b7f86219080L,0x727cfd9ddf6f463cL,
+        0x8c6865ca72284ee8L } },
+    /* 37 << 168 */
+    { { 0x32c88b7db743f4efL,0x3793909be7d11dceL,0xd398f9222ff2ebe8L,
+        0x2c70ca44e5e49796L },
+      { 0xdf4d9929cb1131b1L,0x7826f29825888e79L,0x4d3a112cf1d8740aL,
+        0x00384cb6270afa8bL } },
+    /* 38 << 168 */
+    { { 0xcb64125b3ab48095L,0x3451c25662d05106L,0xd73d577da4955845L,
+        0x39570c16bf9f4433L },
+      { 0xd7dfaad3adecf263L,0xf1c3d8d1dc76e102L,0x5e774a5854c6a836L,
+        0xdad4b6723e92d47bL } },
+    /* 39 << 168 */
+    { { 0xbe7e990ff0d796a0L,0x5fc62478df0e8b02L,0x8aae8bf4030c00adL,
+        0x3d2db93b9004ba0fL },
+      { 0xe48c8a79d85d5ddcL,0xe907caa76bb07f34L,0x58db343aa39eaed5L,
+        0x0ea6e007adaf5724L } },
+    /* 40 << 168 */
+    { { 0xe00df169d23233f3L,0x3e32279677cb637fL,0x1f897c0e1da0cf6cL,
+        0xa651f5d831d6bbddL },
+      { 0xdd61af191a230c76L,0xbd527272cdaa5e4aL,0xca753636d0abcd7eL,
+        0x78bdd37c370bd8dcL } },
+    /* 41 << 168 */
+    { { 0xc23916c217cd93feL,0x65b97a4ddadce6e2L,0xe04ed4eb174e42f8L,
+        0x1491ccaabb21480aL },
+      { 0x145a828023196332L,0x3c3862d7587b479aL,0x9f4a88a301dcd0edL,
+        0x4da2b7ef3ea12f1fL } },
+    /* 42 << 168 */
+    { { 0xf8e7ae33b126e48eL,0x404a0b32f494e237L,0x9beac474c55acadbL,
+        0x4ee5cf3bcbec9fd9L },
+      { 0x336b33b97df3c8c3L,0xbd905fe3b76808fdL,0x8f436981aa45c16aL,
+        0x255c5bfa3dd27b62L } },
+    /* 43 << 168 */
+    { { 0x71965cbfc3dd9b4dL,0xce23edbffc068a87L,0xb78d4725745b029bL,
+        0x74610713cefdd9bdL },
+      { 0x7116f75f1266bf52L,0x0204672218e49bb6L,0xdf43df9f3d6f19e3L,
+        0xef1bc7d0e685cb2fL } },
+    /* 44 << 168 */
+    { { 0xcddb27c17078c432L,0xe1961b9cb77fedb7L,0x1edc2f5cc2290570L,
+        0x2c3fefca19cbd886L },
+      { 0xcf880a36c2af389aL,0x96c610fdbda71ceaL,0xf03977a932aa8463L,
+        0x8eb7763f8586d90aL } },
+    /* 45 << 168 */
+    { { 0x3f3424542a296e77L,0xc871868342837a35L,0x7dc710906a09c731L,
+        0x54778ffb51b816dbL },
+      { 0x6b33bfecaf06defdL,0xfe3c105f8592b70bL,0xf937fda461da6114L,
+        0x3c13e6514c266ad7L } },
+    /* 46 << 168 */
+    { { 0xe363a829855938e8L,0x2eeb5d9e9de54b72L,0xbeb93b0e20ccfab9L,
+        0x3dffbb5f25e61a25L },
+      { 0x7f655e431acc093dL,0x0cb6cc3d3964ce61L,0x6ab283a1e5e9b460L,
+        0x55d787c5a1c7e72dL } },
+    /* 47 << 168 */
+    { { 0x4d2efd47deadbf02L,0x11e80219ac459068L,0x810c762671f311f0L,
+        0xfa17ef8d4ab6ef53L },
+      { 0xaf47fd2593e43bffL,0x5cb5ff3f0be40632L,0x546871068ee61da3L,
+        0x7764196eb08afd0fL } },
+    /* 48 << 168 */
+    { { 0x831ab3edf0290a8fL,0xcae81966cb47c387L,0xaad7dece184efb4fL,
+        0xdcfc53b34749110eL },
+      { 0x6698f23c4cb632f9L,0xc42a1ad6b91f8067L,0xb116a81d6284180aL,
+        0xebedf5f8e901326fL } },
+    /* 49 << 168 */
+    { { 0xf2274c9f97e3e044L,0x4201852011d09fc9L,0x56a65f17d18e6e23L,
+        0x2ea61e2a352b683cL },
+      { 0x27d291bc575eaa94L,0x9e7bc721b8ff522dL,0x5f7268bfa7f04d6fL,
+        0x5868c73faba41748L } },
+    /* 50 << 168 */
+    { { 0x9f85c2db7be0eeadL,0x511e7842ff719135L,0x5a06b1e9c5ea90d7L,
+        0x0c19e28326fab631L },
+      { 0x8af8f0cfe9206c55L,0x89389cb43553c06aL,0x39dbed97f65f8004L,
+        0x0621b037c508991dL } },
+    /* 51 << 168 */
+    { { 0x1c52e63596e78cc4L,0x5385c8b20c06b4a8L,0xd84ddfdbb0e87d03L,
+        0xc49dfb66934bafadL },
+      { 0x7071e17059f70772L,0x3a073a843a1db56bL,0x034949033b8af190L,
+        0x7d882de3d32920f0L } },
+    /* 52 << 168 */
+    { { 0x91633f0ab2cf8940L,0x72b0b1786f948f51L,0x2d28dc30782653c8L,
+        0x88829849db903a05L },
+      { 0xb8095d0c6a19d2bbL,0x4b9e7f0c86f782cbL,0x7af739882d907064L,
+        0xd12be0fe8b32643cL } },
+    /* 53 << 168 */
+    { { 0x358ed23d0e165dc3L,0x3d47ce624e2378ceL,0x7e2bb0b9feb8a087L,
+        0x3246e8aee29e10b9L },
+      { 0x459f4ec703ce2b4dL,0xe9b4ca1bbbc077cfL,0x2613b4f20e9940c1L,
+        0xfc598bb9047d1eb1L } },
+    /* 54 << 168 */
+    { { 0x9744c62b45036099L,0xa9dee742167c65d8L,0x0c511525dabe1943L,
+        0xda11055493c6c624L },
+      { 0xae00a52c651a3be2L,0xcda5111d884449a6L,0x063c06f4ff33bed1L,
+        0x73baaf9a0d3d76b4L } },
+    /* 55 << 168 */
+    { { 0x52fb0c9d7fc63668L,0x6886c9dd0c039cdeL,0x602bd59955b22351L,
+        0xb00cab02360c7c13L },
+      { 0x8cb616bc81b69442L,0x41486700b55c3ceeL,0x71093281f49ba278L,
+        0xad956d9c64a50710L } },
+    /* 56 << 168 */
+    { { 0x9561f28b638a7e81L,0x54155cdf5980ddc3L,0xb2db4a96d26f247aL,
+        0x9d774e4e4787d100L },
+      { 0x1a9e6e2e078637d2L,0x1c363e2d5e0ae06aL,0x7493483ee9cfa354L,
+        0x76843cb37f74b98dL } },
+    /* 57 << 168 */
+    { { 0xbaca6591d4b66947L,0xb452ce9804460a8cL,0x6830d24643768f55L,
+        0xf4197ed87dff12dfL },
+      { 0x6521b472400dd0f7L,0x59f5ca8f4b1e7093L,0x6feff11b080338aeL,
+        0x0ada31f6a29ca3c6L } },
+    /* 58 << 168 */
+    { { 0x24794eb694a2c215L,0xd83a43ab05a57ab4L,0x264a543a2a6f89feL,
+        0x2c2a3868dd5ec7c2L },
+      { 0xd33739408439d9b2L,0x715ea6720acd1f11L,0x42c1d235e7e6cc19L,
+        0x81ce6e96b990585cL } },
+    /* 59 << 168 */
+    { { 0x04e5dfe0d809c7bdL,0xd7b2580c8f1050abL,0x6d91ad78d8a4176fL,
+        0x0af556ee4e2e897cL },
+      { 0x162a8b73921de0acL,0x52ac9c227ea78400L,0xee2a4eeaefce2174L,
+        0xbe61844e6d637f79L } },
+    /* 60 << 168 */
+    { { 0x0491f1bc789a283bL,0x72d3ac3d880836f4L,0xaa1c5ea388e5402dL,
+        0x1b192421d5cc473dL },
+      { 0x5c0b99989dc84cacL,0xb0a8482d9c6e75b8L,0x639961d03a191ce2L,
+        0xda3bc8656d837930L } },
+    /* 61 << 168 */
+    { { 0xca990653056e6f8fL,0x84861c4164d133a7L,0x8b403276746abe40L,
+        0xb7b4d51aebf8e303L },
+      { 0x05b43211220a255dL,0xc997152c02419e6eL,0x76ff47b6630c2feaL,
+        0x50518677281fdadeL } },
+    /* 62 << 168 */
+    { { 0x3283b8bacf902b0bL,0x8d4b4eb537db303bL,0xcc89f42d755011bcL,
+        0xb43d74bbdd09d19bL },
+      { 0x65746bc98adba350L,0x364eaf8cb51c1927L,0x13c7659610ad72ecL,
+        0x30045121f8d40c20L } },
+    /* 63 << 168 */
+    { { 0x6d2d99b7ea7b979bL,0xcd78cd74e6fb3bcdL,0x11e45a9e86cffbfeL,
+        0x78a61cf4637024f6L },
+      { 0xd06bc8723d502295L,0xf1376854458cb288L,0xb9db26a1342f8586L,
+        0xf33effcf4beee09eL } },
+    /* 64 << 168 */
+    { { 0xd7e0c4cdb30cfb3aL,0x6d09b8c16c9db4c8L,0x40ba1a4207c8d9dfL,
+        0x6fd495f71c52c66dL },
+      { 0xfb0e169f275264daL,0x80c2b746e57d8362L,0xedd987f749ad7222L,
+        0xfdc229af4398ec7bL } },
+    /* 0 << 175 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 175 */
+    { { 0xb0d1ed8452666a58L,0x4bcb6e00e6a9c3c2L,0x3c57411c26906408L,
+        0xcfc2075513556400L },
+      { 0xa08b1c505294dba3L,0xa30ba2868b7dd31eL,0xd70ba90e991eca74L,
+        0x094e142ce762c2b9L } },
+    /* 2 << 175 */
+    { { 0xb81d783e979f3925L,0x1efd130aaf4c89a7L,0x525c2144fd1bf7faL,
+        0x4b2969041b265a9eL },
+      { 0xed8e9634b9db65b6L,0x35c82e3203599d8aL,0xdaa7a54f403563f3L,
+        0x9df088ad022c38abL } },
+    /* 3 << 175 */
+    { { 0xe5cfb066bb3fd30aL,0x429169daeff0354eL,0x809cf8523524e36cL,
+        0x136f4fb30155be1dL },
+      { 0x4826af011fbba712L,0x6ef0f0b4506ba1a1L,0xd9928b3177aea73eL,
+        0xe2bf6af25eaa244eL } },
+    /* 4 << 175 */
+    { { 0x8d084f124237b64bL,0x688ebe99e3ecfd07L,0x57b8a70cf6845dd8L,
+        0x808fc59c5da4a325L },
+      { 0xa9032b2ba3585862L,0xb66825d5edf29386L,0xb5a5a8db431ec29bL,
+        0xbb143a983a1e8dc8L } },
+    /* 5 << 175 */
+    { { 0x35ee94ce12ae381bL,0x3a7f176c86ccda90L,0xc63a657e4606eacaL,
+        0x9ae5a38043cd04dfL },
+      { 0x9bec8d15ed251b46L,0x1f5d6d30caca5e64L,0x347b3b359ff20f07L,
+        0x4d65f034f7e4b286L } },
+    /* 6 << 175 */
+    { { 0x9e93ba24f111661eL,0xedced484b105eb04L,0x96dc9ba1f424b578L,
+        0xbf8f66b7e83e9069L },
+      { 0x872d4df4d7ed8216L,0xbf07f3778e2cbecfL,0x4281d89998e73754L,
+        0xfec85fbb8aab8708L } },
+    /* 7 << 175 */
+    { { 0x9a3c0deea5ba5b0bL,0xe6a116ce42d05299L,0xae9775fee9b02d42L,
+        0x72b05200a1545cb6L },
+      { 0xbc506f7d31a3b4eaL,0xe58930788bbd9b32L,0xc8bc5f37e4b12a97L,
+        0x6b000c064a73b671L } },
+    /* 8 << 175 */
+    { { 0x13b5bf22765fa7d0L,0x59805bf01d6a5370L,0x67a5e29d4280db98L,
+        0x4f53916f776b1ce3L },
+      { 0x714ff61f33ddf626L,0x4206238ea085d103L,0x1c50d4b7e5809ee3L,
+        0x999f450d85f8eb1dL } },
+    /* 9 << 175 */
+    { { 0x658a6051e4c79e9bL,0x1394cb73c66a9feaL,0x27f31ed5c6be7b23L,
+        0xf4c88f365aa6f8feL },
+      { 0x0fb0721f4aaa499eL,0x68b3a7d5e3fb2a6bL,0xa788097d3a92851dL,
+        0x060e7f8ae96f4913L } },
+    /* 10 << 175 */
+    { { 0x82eebe731a3a93bcL,0x42bbf465a21adc1aL,0xc10b6fa4ef030efdL,
+        0x247aa4c787b097bbL },
+      { 0x8b8dc632f60c77daL,0x6ffbc26ac223523eL,0xa4f6ff11344579cfL,
+        0x5825653c980250f6L } },
+    /* 11 << 175 */
+    { { 0xb2dd097ebc1aa2b9L,0x0788939337a0333aL,0x1cf55e7137a0db38L,
+        0x2648487f792c1613L },
+      { 0xdad013363fcef261L,0x6239c81d0eabf129L,0x8ee761de9d276be2L,
+        0x406a7a341eda6ad3L } },
+    /* 12 << 175 */
+    { { 0x4bf367ba4a493b31L,0x54f20a529bf7f026L,0xb696e0629795914bL,
+        0xcddab96d8bf236acL },
+      { 0x4ff2c70aed25ea13L,0xfa1d09eb81cbbbe7L,0x88fc8c87468544c5L,
+        0x847a670d696b3317L } },
+    /* 13 << 175 */
+    { { 0xf133421e64bcb626L,0xaea638c826dee0b5L,0xd6e7680bb310346cL,
+        0xe06f4097d5d4ced3L },
+      { 0x099614527512a30bL,0xf3d867fde589a59aL,0x2e73254f52d0c180L,
+        0x9063d8a3333c74acL } },
+    /* 14 << 175 */
+    { { 0xeda6c595d314e7bcL,0x2ee7464b467899edL,0x1cef423c0a1ed5d3L,
+        0x217e76ea69cc7613L },
+      { 0x27ccce1fe7cda917L,0x12d8016b8a893f16L,0xbcd6de849fc74f6bL,
+        0xfa5817e2f3144e61L } },
+    /* 15 << 175 */
+    { { 0x1f3541640821ee4cL,0x1583eab40bc61992L,0x7490caf61d72879fL,
+        0x998ad9f3f76ae7b2L },
+      { 0x1e181950a41157f7L,0xa9d7e1e6e8da3a7eL,0x963784eb8426b95fL,
+        0x0ee4ed6e542e2a10L } },
+    /* 16 << 175 */
+    { { 0xb79d4cc5ac751e7bL,0x93f96472fd4211bdL,0x8c72d3d2c8de4fc6L,
+        0x7b69cbf5df44f064L },
+      { 0x3da90ca2f4bf94e1L,0x1a5325f8f12894e2L,0x0a437f6c7917d60bL,
+        0x9be7048696c9cb5dL } },
+    /* 17 << 175 */
+    { { 0xb4d880bfe1dc5c05L,0xd738addaeebeeb57L,0x6f0119d3df0fe6a3L,
+        0x5c686e5566eaaf5aL },
+      { 0x9cb10b50dfd0b7ecL,0xbdd0264b6a497c21L,0xfc0935148c546c96L,
+        0x58a947fa79dbf42aL } },
+    /* 18 << 175 */
+    { { 0xc0b48d4e49ccd6d7L,0xff8fb02c88bd5580L,0xc75235e907d473b2L,
+        0x4fab1ac5a2188af3L },
+      { 0x030fa3bc97576ec0L,0xe8c946e80b7e7d2fL,0x40a5c9cc70305600L,
+        0x6d8260a9c8b013b4L } },
+    /* 19 << 175 */
+    { { 0x0368304f70bba85cL,0xad090da1a4a0d311L,0x7170e8702415eec1L,
+        0xbfba35fe8461ea47L },
+      { 0x6279019ac1e91938L,0xa47638f31afc415fL,0x36c65cbbbcba0e0fL,
+        0x02160efb034e2c48L } },
+    /* 20 << 175 */
+    { { 0xe6c51073615cd9e4L,0x498ec047f1243c06L,0x3e5a8809b17b3d8cL,
+        0x5cd99e610cc565f1L },
+      { 0x81e312df7851dafeL,0xf156f5baa79061e2L,0x80d62b71880c590eL,
+        0xbec9746f0a39faa1L } },
+    /* 21 << 175 */
+    { { 0x1d98a9c1c8ed1f7aL,0x09e43bb5a81d5ff2L,0xd5f00f680da0794aL,
+        0x412050d9661aa836L },
+      { 0xa89f7c4e90747e40L,0x6dc05ebbb62a3686L,0xdf4de847308e3353L,
+        0x53868fbb9fb53bb9L } },
+    /* 22 << 175 */
+    { { 0x2b09d2c3cfdcf7ddL,0x41a9fce3723fcab4L,0x73d905f707f57ca3L,
+        0x080f9fb1ac8e1555L },
+      { 0x7c088e849ba7a531L,0x07d35586ed9a147fL,0x602846abaf48c336L,
+        0x7320fd320ccf0e79L } },
+    /* 23 << 175 */
+    { { 0xaa780798b18bd1ffL,0x52c2e300afdd2905L,0xf27ea3d6434267cdL,
+        0x8b96d16d15605b5fL },
+      { 0x7bb310494b45706bL,0xe7f58b8e743d25f8L,0xe9b5e45b87f30076L,
+        0xd19448d65d053d5aL } },
+    /* 24 << 175 */
+    { { 0x1ecc8cb9d3210a04L,0x6bc7d463dafb5269L,0x3e59b10a67c3489fL,
+        0x1769788c65641e1bL },
+      { 0x8a53b82dbd6cb838L,0x7066d6e6236d5f22L,0x03aa1c616908536eL,
+        0xc971da0d66ae9809L } },
+    /* 25 << 175 */
+    { { 0x01b3a86bc49a2facL,0x3b8420c03092e77aL,0x020573007d6fb556L,
+        0x6941b2a1bff40a87L },
+      { 0x140b63080658ff2aL,0x878043633424ab36L,0x0253bd515751e299L,
+        0xc75bcd76449c3e3aL } },
+    /* 26 << 175 */
+    { { 0x92eb40907f8f875dL,0x9c9d754e56c26bbfL,0x158cea618110bbe7L,
+        0x62a6b802745f91eaL },
+      { 0xa79c41aac6e7394bL,0x445b6a83ad57ef10L,0x0c5277eb6ea6f40cL,
+        0x319fe96b88633365L } },
+    /* 27 << 175 */
+    { { 0x0b0fc61f385f63cbL,0x41250c8422bdd127L,0x67d153f109e942c2L,
+        0x60920d08c021ad5dL },
+      { 0x229f5746724d81a5L,0xb7ffb8925bba3299L,0x518c51a1de413032L,
+        0x2a9bfe773c2fd94cL } },
+    /* 28 << 175 */
+    { { 0xcbcde2393191f4fdL,0x43093e16d3d6ada1L,0x184579f358769606L,
+        0x2c94a8b3d236625cL },
+      { 0x6922b9c05c437d8eL,0x3d4ae423d8d9f3c8L,0xf72c31c12e7090a2L,
+        0x4ac3f5f3d76a55bdL } },
+    /* 29 << 175 */
+    { { 0x342508fc6b6af991L,0x0d5271001b5cebbdL,0xb84740d0dd440dd7L,
+        0x748ef841780162fdL },
+      { 0xa8dbfe0edfc6fafbL,0xeadfdf05f7300f27L,0x7d06555ffeba4ec9L,
+        0x12c56f839e25fa97L } },
+    /* 30 << 175 */
+    { { 0x77f84203d39b8c34L,0xed8b1be63125eddbL,0x5bbf2441f6e39dc5L,
+        0xb00f6ee66a5d678aL },
+      { 0xba456ecf57d0ea99L,0xdcae0f5817e06c43L,0x01643de40f5b4baaL,
+        0x2c324341d161b9beL } },
+    /* 31 << 175 */
+    { { 0x80177f55e126d468L,0xed325f1f76748e09L,0x6116004acfa9bdc2L,
+        0x2d8607e63a9fb468L },
+      { 0x0e573e276009d660L,0x3a525d2e8d10c5a1L,0xd26cb45c3b9009a0L,
+        0xb6b0cdc0de9d7448L } },
+    /* 32 << 175 */
+    { { 0x949c9976e1337c26L,0x6faadebdd73d68e5L,0x9e158614f1b768d9L,
+        0x22dfa5579cc4f069L },
+      { 0xccd6da17be93c6d6L,0x24866c61a504f5b9L,0x2121353c8d694da1L,
+        0x1c6ca5800140b8c6L } },
+    /* 33 << 175 */
+    { { 0xc245ad8ce964021eL,0xb83bffba032b82b3L,0xfaa220c647ef9898L,
+        0x7e8d3ac6982c948aL },
+      { 0x1faa2091bc2d124aL,0xbd54c3dd05b15ff4L,0x386bf3abc87c6fb7L,
+        0xfb2b0563fdeb6f66L } },
+    /* 34 << 175 */
+    { { 0x4e77c5575b45afb4L,0xe9ded649efb8912dL,0x7ec9bbf542f6e557L,
+        0x2570dfff62671f00L },
+      { 0x2b3bfb7888e084bdL,0xa024b238f37fe5b4L,0x44e7dc0495649aeeL,
+        0x498ca2555e7ec1d8L } },
+    /* 35 << 175 */
+    { { 0x3bc766eaaaa07e86L,0x0db6facbf3608586L,0xbadd2549bdc259c8L,
+        0x95af3c6e041c649fL },
+      { 0xb36a928c02e30afbL,0x9b5356ad008a88b8L,0x4b67a5f1cf1d9e9dL,
+        0xc6542e47a5d8d8ceL } },
+    /* 36 << 175 */
+    { { 0x73061fe87adfb6ccL,0xcc826fd398678141L,0x00e758b13c80515aL,
+        0x6afe324741485083L },
+      { 0x0fcb08b9b6ae8a75L,0xb8cf388d4acf51e1L,0x344a55606961b9d6L,
+        0x1a6778b86a97fd0cL } },
+    /* 37 << 175 */
+    { { 0xd840fdc1ecc4c7e3L,0xde9fe47d16db68ccL,0xe95f89dea3e216aaL,
+        0x84f1a6a49594a8beL },
+      { 0x7ddc7d725a7b162bL,0xc5cfda19adc817a3L,0x80a5d35078b58d46L,
+        0x93365b1382978f19L } },
+    /* 38 << 175 */
+    { { 0x2e44d22526a1fc90L,0x0d6d10d24d70705dL,0xd94b6b10d70c45f4L,
+        0x0f201022b216c079L },
+      { 0xcec966c5658fde41L,0xa8d2bc7d7e27601dL,0xbfcce3e1ff230be7L,
+        0x3394ff6b0033ffb5L } },
+    /* 39 << 175 */
+    { { 0xd890c5098132c9afL,0xaac4b0eb361e7868L,0x5194ded3e82d15aaL,
+        0x4550bd2e23ae6b7dL },
+      { 0x3fda318eea5399d4L,0xd989bffa91638b80L,0x5ea124d0a14aa12dL,
+        0x1fb1b8993667b944L } },
+    /* 40 << 175 */
+    { { 0x95ec796944c44d6aL,0x91df144a57e86137L,0x915fd62073adac44L,
+        0x8f01732d59a83801L },
+      { 0xec579d253aa0a633L,0x06de5e7cc9d6d59cL,0xc132f958b1ef8010L,
+        0x29476f96e65c1a02L } },
+    /* 41 << 175 */
+    { { 0x336a77c0d34c3565L,0xef1105b21b9f1e9eL,0x63e6d08bf9e08002L,
+        0x9aff2f21c613809eL },
+      { 0xb5754f853a80e75dL,0xde71853e6bbda681L,0x86f041df8197fd7aL,
+        0x8b332e08127817faL } },
+    /* 42 << 175 */
+    { { 0x05d99be8b9c20cdaL,0x89f7aad5d5cd0c98L,0x7ef936fe5bb94183L,
+        0x92ca0753b05cd7f2L },
+      { 0x9d65db1174a1e035L,0x02628cc813eaea92L,0xf2d9e24249e4fbf2L,
+        0x94fdfd9be384f8b7L } },
+    /* 43 << 175 */
+    { { 0x65f5605463428c6bL,0x2f7205b290b409a5L,0xf778bb78ff45ae11L,
+        0xa13045bec5ee53b2L },
+      { 0xe00a14ff03ef77feL,0x689cd59fffef8befL,0x3578f0ed1e9ade22L,
+        0xe99f3ec06268b6a8L } },
+    /* 44 << 175 */
+    { { 0xa2057d91ea1b3c3eL,0x2d1a7053b8823a4aL,0xabbb336a2cca451eL,
+        0xcd2466e32218bb5dL },
+      { 0x3ac1f42fc8cb762dL,0x7e312aae7690211fL,0xebb9bd7345d07450L,
+        0x207c4b8246c2213fL } },
+    /* 45 << 175 */
+    { { 0x99d425c1375913ecL,0x94e45e9667908220L,0xc08f3087cd67dbf6L,
+        0xa5670fbec0887056L },
+      { 0x6717b64a66f5b8fcL,0xd5a56aea786fec28L,0xa8c3f55fc0ff4952L,
+        0xa77fefae457ac49bL } },
+    /* 46 << 175 */
+    { { 0x29882d7c98379d44L,0xd000bdfb509edc8aL,0xc6f95979e66fe464L,
+        0x504a6115fa61bde0L },
+      { 0x56b3b871effea31aL,0x2d3de26df0c21a54L,0x21dbff31834753bfL,
+        0xe67ecf4969269d86L } },
+    /* 47 << 175 */
+    { { 0x7a176952151fe690L,0x035158047f2adb5fL,0xee794b15d1b62a8dL,
+        0xf004ceecaae454e6L },
+      { 0x0897ea7cf0386facL,0x3b62ff12d1fca751L,0x154181df1b7a04ecL,
+        0x2008e04afb5847ecL } },
+    /* 48 << 175 */
+    { { 0xd147148e41dbd772L,0x2b419f7322942654L,0x669f30d3e9c544f7L,
+        0x52a2c223c8540149L },
+      { 0x5da9ee14634dfb02L,0x5f074ff0f47869f3L,0x74ee878da3933accL,
+        0xe65106514fe35ed1L } },
+    /* 49 << 175 */
+    { { 0xb3eb9482f1012e7aL,0x51013cc0a8a566aeL,0xdd5e924347c00d3bL,
+        0x7fde089d946bb0e5L },
+      { 0x030754fec731b4b3L,0x12a136a499fda062L,0x7c1064b85a1a35bcL,
+        0xbf1f5763446c84efL } },
+    /* 50 << 175 */
+    { { 0xed29a56da16d4b34L,0x7fba9d09dca21c4fL,0x66d7ac006d8de486L,
+        0x6006198773a2a5e1L },
+      { 0x8b400f869da28ff0L,0x3133f70843c4599cL,0x9911c9b8ee28cb0dL,
+        0xcd7e28748e0af61dL } },
+    /* 51 << 175 */
+    { { 0x5a85f0f272ed91fcL,0x85214f319cd4a373L,0x881fe5be1925253cL,
+        0xd8dc98e091e8bc76L },
+      { 0x7120affe585cc3a2L,0x724952ed735bf97aL,0x5581e7dc3eb34581L,
+        0x5cbff4f2e52ee57dL } },
+    /* 52 << 175 */
+    { { 0x8d320a0e87d8cc7bL,0x9beaa7f3f1d280d0L,0x7a0b95719beec704L,
+        0x9126332e5b7f0057L },
+      { 0x01fbc1b48ed3bd6dL,0x35bb2c12d945eb24L,0x6404694e9a8ae255L,
+        0xb6092eec8d6abfb3L } },
+    /* 53 << 175 */
+    { { 0x4d76143fcc058865L,0x7b0a5af26e249922L,0x8aef94406a50d353L,
+        0xe11e4bcc64f0e07aL },
+      { 0x4472993aa14a90faL,0x7706e20cba0c51d4L,0xf403292f1532672dL,
+        0x52573bfa21829382L } },
+    /* 54 << 175 */
+    { { 0x6a7bb6a93b5bdb83L,0x08da65c0a4a72318L,0xc58d22aa63eb065fL,
+        0x1717596c1b15d685L },
+      { 0x112df0d0b266d88bL,0xf688ae975941945aL,0x487386e37c292cacL,
+        0x42f3b50d57d6985cL } },
+    /* 55 << 175 */
+    { { 0x6da4f9986a90fc34L,0xc8f257d365ca8a8dL,0xc2feabca6951f762L,
+        0xe1bc81d074c323acL },
+      { 0x1bc68f67251a2a12L,0x10d86587be8a70dcL,0xd648af7ff0f84d2eL,
+        0xf0aa9ebc6a43ac92L } },
+    /* 56 << 175 */
+    { { 0x69e3be0427596893L,0xb6bb02a645bf452bL,0x0875c11af4c698c8L,
+        0x6652b5c7bece3794L },
+      { 0x7b3755fd4f5c0499L,0x6ea16558b5532b38L,0xd1c69889a2e96ef7L,
+        0x9c773c3a61ed8f48L } },
+    /* 57 << 175 */
+    { { 0x2b653a409b323abcL,0xe26605e1f0e1d791L,0x45d410644a87157aL,
+        0x8f9a78b7cbbce616L },
+      { 0xcf1e44aac407edddL,0x81ddd1d8a35b964fL,0x473e339efd083999L,
+        0x6c94bdde8e796802L } },
+    /* 58 << 175 */
+    { { 0x5a304ada8545d185L,0x82ae44ea738bb8cbL,0x628a35e3df87e10eL,
+        0xd3624f3da15b9fe3L },
+      { 0xcc44209b14be4254L,0x7d0efcbcbdbc2ea5L,0x1f60336204c37bbeL,
+        0x21f363f556a5852cL } },
+    /* 59 << 175 */
+    { { 0xa1503d1ca8501550L,0x2251e0e1d8ab10bbL,0xde129c966961c51cL,
+        0x1f7246a481910f68L },
+      { 0x2eb744ee5f2591f2L,0x3c47d33f5e627157L,0x4d6d62c922f3bd68L,
+        0x6120a64bcb8df856L } },
+    /* 60 << 175 */
+    { { 0x3a9ac6c07b5d07dfL,0xa92b95587ef39783L,0xe128a134ab3a9b4fL,
+        0x41c18807b1252f05L },
+      { 0xfc7ed08980ba9b1cL,0xac8dc6dec532a9ddL,0xbf829cef55246809L,
+        0x101b784f5b4ee80fL } },
+    /* 61 << 175 */
+    { { 0xc09945bbb6f11603L,0x57b09dbe41d2801eL,0xfba5202fa97534a8L,
+        0x7fd8ae5fc17b9614L },
+      { 0xa50ba66678308435L,0x9572f77cd3868c4dL,0x0cef7bfd2dd7aab0L,
+        0xe7958e082c7c79ffL } },
+    /* 62 << 175 */
+    { { 0x81262e4225346689L,0x716da290b07c7004L,0x35f911eab7950ee3L,
+        0x6fd72969261d21b5L },
+      { 0x5238980308b640d3L,0x5b0026ee887f12a1L,0x20e21660742e9311L,
+        0x0ef6d5415ff77ff7L } },
+    /* 63 << 175 */
+    { { 0x969127f0f9c41135L,0xf21d60c968a64993L,0x656e5d0ce541875cL,
+        0xf1e0f84ea1d3c233L },
+      { 0x9bcca35906002d60L,0xbe2da60c06191552L,0x5da8bbae61181ec3L,
+        0x9f04b82365806f19L } },
+    /* 64 << 175 */
+    { { 0xf1604a7dd4b79bb8L,0xaee806fb52c878c8L,0x34144f118d47b8e8L,
+        0x72edf52b949f9054L },
+      { 0xebfca84e2127015aL,0x9051d0c09cb7cef3L,0x86e8fe58296deec8L,
+        0x33b2818841010d74L } },
+    /* 0 << 182 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 182 */
+    { { 0x01079383171b445fL,0x9bcf21e38131ad4cL,0x8cdfe205c93987e8L,
+        0xe63f4152c92e8c8fL },
+      { 0x729462a930add43dL,0x62ebb143c980f05aL,0x4f3954e53b06e968L,
+        0xfe1d75ad242cf6b1L } },
+    /* 2 << 182 */
+    { { 0x5f95c6c7af8685c8L,0xd4c1c8ce2f8f01aaL,0xc44bbe322574692aL,
+        0xb8003478d4a4a068L },
+      { 0x7c8fc6e52eca3cdbL,0xea1db16bec04d399L,0xb05bc82e8f2bc5cfL,
+        0x763d517ff44793d2L } },
+    /* 3 << 182 */
+    { { 0x4451c1b808bd98d0L,0x644b1cd46575f240L,0x6907eb337375d270L,
+        0x56c8bebdfa2286bdL },
+      { 0xc713d2acc4632b46L,0x17da427aafd60242L,0x313065b7c95c7546L,
+        0xf8239898bf17a3deL } },
+    /* 4 << 182 */
+    { { 0xf3b7963f4c830320L,0x842c7aa0903203e3L,0xaf22ca0ae7327afbL,
+        0x38e13092967609b6L },
+      { 0x73b8fb62757558f1L,0x3cc3e831f7eca8c1L,0xe4174474f6331627L,
+        0xa77989cac3c40234L } },
+    /* 5 << 182 */
+    { { 0xe5fd17a144a081e0L,0xd797fb7db70e296aL,0x2b472b30481f719cL,
+        0x0e632a98fe6f8c52L },
+      { 0x89ccd116c5f0c284L,0xf51088af2d987c62L,0x2a2bccda4c2de6cfL,
+        0x810f9efef679f0f9L } },
+    /* 6 << 182 */
+    { { 0xb0f394b97ffe4b3eL,0x0b691d21e5fa5d21L,0xb0bd77479dfbbc75L,
+        0xd2830fdafaf78b00L },
+      { 0xf78c249c52434f57L,0x4b1f754598096dabL,0x73bf6f948ff8c0b3L,
+        0x34aef03d454e134cL } },
+    /* 7 << 182 */
+    { { 0xf8d151f4b7ac7ec5L,0xd6ceb95ae50da7d5L,0xa1b492b0dc3a0eb8L,
+        0x75157b69b3dd2863L },
+      { 0xe2c4c74ec5413d62L,0xbe329ff7bc5fc4c7L,0x835a2aea60fa9ddaL,
+        0xf117f5ad7445cb87L } },
+    /* 8 << 182 */
+    { { 0xae8317f4b0166f7aL,0xfbd3e3f7ceec74e6L,0xfdb516ace0874bfdL,
+        0x3d846019c681f3a3L },
+      { 0x0b12ee5c7c1620b0L,0xba68b4dd2b63c501L,0xac03cd326668c51eL,
+        0x2a6279f74e0bcb5bL } },
+    /* 9 << 182 */
+    { { 0x17bd69b06ae85c10L,0x729469791dfdd3a6L,0xd9a032682c078becL,
+        0x41c6a658bfd68a52L },
+      { 0xcdea10240e023900L,0xbaeec121b10d144dL,0x5a600e74058ab8dcL,
+        0x1333af21bb89ccddL } },
+    /* 10 << 182 */
+    { { 0xdf25eae03aaba1f1L,0x2cada16e3b7144cfL,0x657ee27d71ab98bcL,
+        0x99088b4c7a6fc96eL },
+      { 0x05d5c0a03549dbd4L,0x42cbdf8ff158c3acL,0x3fb6b3b087edd685L,
+        0x22071cf686f064d0L } },
+    /* 11 << 182 */
+    { { 0xd2d6721fff2811e5L,0xdb81b703fe7fae8cL,0x3cfb74efd3f1f7bbL,
+        0x0cdbcd7616cdeb5dL },
+      { 0x4f39642a566a808cL,0x02b74454340064d6L,0xfabbadca0528fa6fL,
+        0xe4c3074cd3fc0bb6L } },
+    /* 12 << 182 */
+    { { 0xb32cb8b0b796d219L,0xc3e95f4f34741dd9L,0x8721212568edf6f5L,
+        0x7a03aee4a2b9cb8eL },
+      { 0x0cd3c376f53a89aaL,0x0d8af9b1948a28dcL,0xcf86a3f4902ab04fL,
+        0x8aacb62a7f42002dL } },
+    /* 13 << 182 */
+    { { 0x106985ebf62ffd52L,0xe670b54e5797bf10L,0x4b405209c5e30aefL,
+        0x12c97a204365b5e9L },
+      { 0x104646ce1fe32093L,0x13cb4ff63907a8c9L,0x8b9f30d1d46e726bL,
+        0xe1985e21aba0f499L } },
+    /* 14 << 182 */
+    { { 0xc573dea910a230cdL,0x24f46a93cd30f947L,0xf2623fcfabe2010aL,
+        0x3f278cb273f00e4fL },
+      { 0xed55c67d50b920ebL,0xf1cb9a2d8e760571L,0x7c50d1090895b709L,
+        0x4207cf07190d4369L } },
+    /* 15 << 182 */
+    { { 0x3b027e81c4127fe1L,0xa9f8b9ad3ae9c566L,0x5ab10851acbfbba5L,
+        0xa747d648569556f5L },
+      { 0xcc172b5c2ba97bf7L,0x15e0f77dbcfa3324L,0xa345b7977686279dL,
+        0x5a723480e38003d3L } },
+    /* 16 << 182 */
+    { { 0xfd8e139f8f5fcda8L,0xf3e558c4bdee5bfdL,0xd76cbaf4e33f9f77L,
+        0x3a4c97a471771969L },
+      { 0xda27e84bf6dce6a7L,0xff373d9613e6c2d1L,0xf115193cd759a6e9L,
+        0x3f9b702563d2262cL } },
+    /* 17 << 182 */
+    { { 0xd9764a31317cd062L,0x30779d8e199f8332L,0xd807410616b11b0bL,
+        0x7917ab9f78aeaed8L },
+      { 0xb67a9cbe28fb1d8eL,0x2e313563136eda33L,0x010b7069a371a86cL,
+        0x44d90fa26744e6b7L } },
+    /* 18 << 182 */
+    { { 0x68190867d6b3e243L,0x9fe6cd9d59048c48L,0xb900b02895731538L,
+        0xa012062f32cae04fL },
+      { 0x8107c8bc9399d082L,0x47e8c54a41df12e2L,0x14ba5117b6ef3f73L,
+        0x22260bea81362f0bL } },
+    /* 19 << 182 */
+    { { 0x90ea261e1a18cc20L,0x2192999f2321d636L,0xef64d314e311b6a0L,
+        0xd7401e4c3b54a1f5L },
+      { 0x190199836fbca2baL,0x46ad32938fbffc4bL,0xa142d3f63786bf40L,
+        0xeb5cbc26b67039fcL } },
+    /* 20 << 182 */
+    { { 0x9cb0ae6c252bd479L,0x05e0f88a12b5848fL,0x78f6d2b2a5c97663L,
+        0x6f6e149bc162225cL },
+      { 0xe602235cde601a89L,0xd17bbe98f373be1fL,0xcaf49a5ba8471827L,
+        0x7e1a0a8518aaa116L } },
+    /* 21 << 182 */
+    { { 0x6c833196270580c3L,0x1e233839f1c98a14L,0x67b2f7b4ae34e0a5L,
+        0x47ac8745d8ce7289L },
+      { 0x2b74779a100dd467L,0x274a43374ee50d09L,0x603dcf1383608bc9L,
+        0xcd9da6c3c89e8388L } },
+    /* 22 << 182 */
+    { { 0x2660199f355116acL,0xcc38bb59b6d18eedL,0x3075f31f2f4bc071L,
+        0x9774457f265dc57eL },
+      { 0x06a6a9c8c6db88bbL,0x6429d07f4ec98e04L,0x8d05e57b05ecaa8bL,
+        0x20f140b17872ea7bL } },
+    /* 23 << 182 */
+    { { 0xdf8c0f09ca494693L,0x48d3a020f252e909L,0x4c5c29af57b14b12L,
+        0x7e6fa37dbf47ad1cL },
+      { 0x66e7b50649a0c938L,0xb72c0d486be5f41fL,0x6a6242b8b2359412L,
+        0xcd35c7748e859480L } },
+    /* 24 << 182 */
+    { { 0x12536fea87baa627L,0x58c1fec1f72aa680L,0x6c29b637601e5dc9L,
+        0x9e3c3c1cde9e01b9L },
+      { 0xefc8127b2bcfe0b0L,0x351071022a12f50dL,0x6ccd6cb14879b397L,
+        0xf792f804f8a82f21L } },
+    /* 25 << 182 */
+    { { 0x509d4804a9b46402L,0xedddf85dc10f0850L,0x928410dc4b6208aaL,
+        0xf6229c46391012dcL },
+      { 0xc5a7c41e7727b9b6L,0x289e4e4baa444842L,0x049ba1d9e9a947eaL,
+        0x44f9e47f83c8debcL } },
+    /* 26 << 182 */
+    { { 0xfa77a1fe611f8b8eL,0xfd2e416af518f427L,0xc5fffa70114ebac3L,
+        0xfe57c4e95d89697bL },
+      { 0xfdd053acb1aaf613L,0x31df210fea585a45L,0x318cc10e24985034L,
+        0x1a38efd15f1d6130L } },
+    /* 27 << 182 */
+    { { 0xbf86f2370b1e9e21L,0xb258514d1dbe88aaL,0x1e38a58890c1baf9L,
+        0x2936a01ebdb9b692L },
+      { 0xd576de986dd5b20cL,0xb586bf7170f98ecfL,0xcccf0f12c42d2fd7L,
+        0x8717e61cfb35bd7bL } },
+    /* 28 << 182 */
+    { { 0x8b1e572235e6fc06L,0x3477728f0b3e13d5L,0x150c294daa8a7372L,
+        0xc0291d433bfa528aL },
+      { 0xc6c8bc67cec5a196L,0xdeeb31e45c2e8a7cL,0xba93e244fb6e1c51L,
+        0xb9f8b71b2e28e156L } },
+    /* 29 << 182 */
+    { { 0xce65a287968a2ab9L,0xe3c5ce6946bbcb1fL,0xf8c835b9e7ae3f30L,
+        0x16bbee26ff72b82bL },
+      { 0x665e2017fd42cd22L,0x1e139970f8b1d2a0L,0x125cda2979204932L,
+        0x7aee94a549c3bee5L } },
+    /* 30 << 182 */
+    { { 0x68c7016089821a66L,0xf7c376788f981669L,0xd90829fc48cc3645L,
+        0x346af049d70addfcL },
+      { 0x2057b232370bf29cL,0xf90c73ce42e650eeL,0xe03386eaa126ab90L,
+        0x0e266e7e975a087bL } },
+    /* 31 << 182 */
+    { { 0x80578eb90fca65d9L,0x7e2989ea16af45b8L,0x7438212dcac75a4eL,
+        0x38c7ca394fef36b8L },
+      { 0x8650c494d402676aL,0x26ab5a66f72c7c48L,0x4e6cb426ce3a464eL,
+        0xf8f998962b72f841L } },
+    /* 32 << 182 */
+    { { 0x8c3184911a335cc8L,0x563459ba6a5913e4L,0x1b920d61c7b32919L,
+        0x805ab8b6a02425adL },
+      { 0x2ac512da8d006086L,0x6ca4846abcf5c0fdL,0xafea51d8ac2138d7L,
+        0xcb647545344cd443L } },
+    /* 33 << 182 */
+    { { 0x0429ee8fbd7d9040L,0xee66a2de819b9c96L,0x54f9ec25dea7d744L,
+        0x2ffea642671721bbL },
+      { 0x4f19dbd1114344eaL,0x04304536fd0dbc8bL,0x014b50aa29ec7f91L,
+        0xb5fc22febb06014dL } },
+    /* 34 << 182 */
+    { { 0x60d963a91ee682e0L,0xdf48abc0fe85c727L,0x0cadba132e707c2dL,
+        0xde608d3aa645aeffL },
+      { 0x05f1c28bedafd883L,0x3c362edebd94de1fL,0x8dd0629d13593e41L,
+        0x0a5e736f766d6eafL } },
+    /* 35 << 182 */
+    { { 0xbfa92311f68cf9d1L,0xa4f9ef87c1797556L,0x10d75a1f5601c209L,
+        0x651c374c09b07361L },
+      { 0x49950b5888b5ceadL,0x0ef000586fa9dbaaL,0xf51ddc264e15f33aL,
+        0x1f8b5ca62ef46140L } },
+    /* 36 << 182 */
+    { { 0x343ac0a3ee9523f0L,0xbb75eab2975ea978L,0x1bccf332107387f4L,
+        0x790f92599ab0062eL },
+      { 0xf1a363ad1e4f6a5fL,0x06e08b8462519a50L,0x609151877265f1eeL,
+        0x6a80ca3493ae985eL } },
+    /* 37 << 182 */
+    { { 0x81b29768aaba4864L,0xb13cabf28d52a7d6L,0xb5c363488ead03f1L,
+        0xc932ad9581c7c1c0L },
+      { 0x5452708ecae1e27bL,0x9dac42691b0df648L,0x233e3f0cdfcdb8bcL,
+        0xe6ceccdfec540174L } },
+    /* 38 << 182 */
+    { { 0xbd0d845e95081181L,0xcc8a7920699355d5L,0x111c0f6dc3b375a8L,
+        0xfd95bc6bfd51e0dcL },
+      { 0x4a106a266888523aL,0x4d142bd6cb01a06dL,0x79bfd289adb9b397L,
+        0x0bdbfb94e9863914L } },
+    /* 39 << 182 */
+    { { 0x29d8a2291660f6a6L,0x7f6abcd6551c042dL,0x13039deb0ac3ffe8L,
+        0xa01be628ec8523fbL },
+      { 0x6ea341030ca1c328L,0xc74114bdb903928eL,0x8aa4ff4e9e9144b0L,
+        0x7064091f7f9a4b17L } },
+    /* 40 << 182 */
+    { { 0xa3f4f521e447f2c4L,0x81b8da7a604291f0L,0xd680bc467d5926deL,
+        0x84f21fd534a1202fL },
+      { 0x1d1e31814e9df3d8L,0x1ca4861a39ab8d34L,0x809ddeec5b19aa4aL,
+        0x59f72f7e4d329366L } },
+    /* 41 << 182 */
+    { { 0xa2f93f41386d5087L,0x40bf739cdd67d64fL,0xb449420566702158L,
+        0xc33c65be73b1e178L },
+      { 0xcdcd657c38ca6153L,0x97f4519adc791976L,0xcc7c7f29cd6e1f39L,
+        0x38de9cfb7e3c3932L } },
+    /* 42 << 182 */
+    { { 0xe448eba37b793f85L,0xe9f8dbf9f067e914L,0xc0390266f114ae87L,
+        0x39ed75a7cd6a8e2aL },
+      { 0xadb148487ffba390L,0x67f8cb8b6af9bc09L,0x322c38489c7476dbL,
+        0xa320fecf52a538d6L } },
+    /* 43 << 182 */
+    { { 0xe0493002b2aced2bL,0xdfba1809616bd430L,0x531c4644c331be70L,
+        0xbc04d32e90d2e450L },
+      { 0x1805a0d10f9f142dL,0x2c44a0c547ee5a23L,0x31875a433989b4e3L,
+        0x6b1949fd0c063481L } },
+    /* 44 << 182 */
+    { { 0x2dfb9e08be0f4492L,0x3ff0da03e9d5e517L,0x03dbe9a1f79466a8L,
+        0x0b87bcd015ea9932L },
+      { 0xeb64fc83ab1f58abL,0x6d9598da817edc8aL,0x699cff661d3b67e5L,
+        0x645c0f2992635853L } },
+    /* 45 << 182 */
+    { { 0x253cdd82eabaf21cL,0x82b9602a2241659eL,0x2cae07ec2d9f7091L,
+        0xbe4c720c8b48cd9bL },
+      { 0x6ce5bc036f08d6c9L,0x36e8a997af10bf40L,0x83422d213e10ff12L,
+        0x7b26d3ebbcc12494L } },
+    /* 46 << 182 */
+    { { 0xb240d2d0c9469ad6L,0xc4a11b4d30afa05bL,0x4b604acedd6ba286L,
+        0x184866003ee2864cL },
+      { 0x5869d6ba8d9ce5beL,0x0d8f68c5ff4bfb0dL,0xb69f210b5700cf73L,
+        0x61f6653a6d37c135L } },
+    /* 47 << 182 */
+    { { 0xff3d432b5aff5a48L,0x0d81c4b972ba3a69L,0xee879ae9fa1899efL,
+        0xbac7e2a02d6acafdL },
+      { 0xd6d93f6c1c664399L,0x4c288de15bcb135dL,0x83031dab9dab7cbfL,
+        0xfe23feb03abbf5f0L } },
+    /* 48 << 182 */
+    { { 0x9f1b2466cdedca85L,0x140bb7101a09538cL,0xac8ae8515e11115dL,
+        0x0d63ff676f03f59eL },
+      { 0x755e55517d234afbL,0x61c2db4e7e208fc1L,0xaa9859cef28a4b5dL,
+        0xbdd6d4fc34af030fL } },
+    /* 49 << 182 */
+    { { 0xd1c4a26d3be01cb1L,0x9ba14ffc243aa07cL,0xf95cd3a9b2503502L,
+        0xe379bc067d2a93abL },
+      { 0x3efc18e9d4ca8d68L,0x083558ec80bb412aL,0xd903b9409645a968L,
+        0xa499f0b69ba6054fL } },
+    /* 50 << 182 */
+    { { 0x208b573cb8349abeL,0x3baab3e530b4fc1cL,0x87e978bacb524990L,
+        0x3524194eccdf0e80L },
+      { 0x627117257d4bcc42L,0xe90a3d9bb90109baL,0x3b1bdd571323e1e0L,
+        0xb78e9bd55eae1599L } },
+    /* 51 << 182 */
+    { { 0x0794b7469e03d278L,0x80178605d70e6297L,0x171792f899c97855L,
+        0x11b393eef5a86b5cL },
+      { 0x48ef6582d8884f27L,0xbd44737abf19ba5fL,0x8698de4ca42062c6L,
+        0x8975eb8061ce9c54L } },
+    /* 52 << 182 */
+    { { 0xd50e57c7d7fe71f3L,0x15342190bc97ce38L,0x51bda2de4df07b63L,
+        0xba12aeae200eb87dL },
+      { 0xabe135d2a9b4f8f6L,0x04619d65fad6d99cL,0x4a6683a77994937cL,
+        0x7a778c8b6f94f09aL } },
+    /* 53 << 182 */
+    { { 0x8c50862320a71b89L,0x241a2aed1c229165L,0x352be595aaf83a99L,
+        0x9fbfee7f1562bac8L },
+      { 0xeaf658b95c4017e3L,0x1dc7f9e015120b86L,0xd84f13dd4c034d6fL,
+        0x283dd737eaea3038L } },
+    /* 54 << 182 */
+    { { 0x197f2609cd85d6a2L,0x6ebbc345fae60177L,0xb80f031b4e12fedeL,
+        0xde55d0c207a2186bL },
+      { 0x1fb3e37f24dcdd5aL,0x8d602da57ed191fbL,0x108fb05676023e0dL,
+        0x70178c71459c20c0L } },
+    /* 55 << 182 */
+    { { 0xfad5a3863fe54cf0L,0xa4a3ec4f02bbb475L,0x1aa5ec20919d94d7L,
+        0x5d3b63b5a81e4ab3L },
+      { 0x7fa733d85ad3d2afL,0xfbc586ddd1ac7a37L,0x282925de40779614L,
+        0xfe0ffffbe74a242aL } },
+    /* 56 << 182 */
+    { { 0x3f39e67f906151e5L,0xcea27f5f55e10649L,0xdca1d4e1c17cf7b7L,
+        0x0c326d122fe2362dL },
+      { 0x05f7ac337dd35df3L,0x0c3b7639c396dbdfL,0x0912f5ac03b7db1cL,
+        0x9dea4b705c9ed4a9L } },
+    /* 57 << 182 */
+    { { 0x475e6e53aae3f639L,0xfaba0e7cfc278bacL,0x16f9e2219490375fL,
+        0xaebf9746a5a7ed0aL },
+      { 0x45f9af3ff41ad5d6L,0x03c4623cb2e99224L,0x82c5bb5cb3cf56aaL,
+        0x6431181934567ed3L } },
+    /* 58 << 182 */
+    { { 0xec57f2118be489acL,0x2821895db9a1104bL,0x610dc8756064e007L,
+        0x8e526f3f5b20d0feL },
+      { 0x6e71ca775b645aeeL,0x3d1dcb9f800e10ffL,0x36b51162189cf6deL,
+        0x2c5a3e306bb17353L } },
+    /* 59 << 182 */
+    { { 0xc186cd3e2a6c6fbfL,0xa74516fa4bf97906L,0x5b4b8f4b279d6901L,
+        0x0c4e57b42b573743L },
+      { 0x75fdb229b6e386b6L,0xb46793fd99deac27L,0xeeec47eacf712629L,
+        0xe965f3c4cbc3b2ddL } },
+    /* 60 << 182 */
+    { { 0x8dd1fb83425c6559L,0x7fc00ee60af06fdaL,0xe98c922533d956dfL,
+        0x0f1ef3354fbdc8a2L },
+      { 0x2abb5145b79b8ea2L,0x40fd2945bdbff288L,0x6a814ac4d7185db7L,
+        0xc4329d6fc084609aL } },
+    /* 61 << 182 */
+    { { 0xc9ba7b52ed1be45dL,0x891dd20de4cd2c74L,0x5a4d4a7f824139b1L,
+        0x66c17716b873c710L },
+      { 0x5e5bc1412843c4e0L,0xd5ac4817b97eb5bfL,0xc0f8af54450c95c7L,
+        0xc91b3fa0318406c5L } },
+    /* 62 << 182 */
+    { { 0x360c340aab9d97f8L,0xfb57bd0790a2d611L,0x4339ae3ca6a6f7e5L,
+        0x9c1fcd2a2feb8a10L },
+      { 0x972bcca9c7ea7432L,0x1b0b924c308076f6L,0x80b2814a2a5b4ca5L,
+        0x2f78f55b61ef3b29L } },
+    /* 63 << 182 */
+    { { 0xf838744ac18a414fL,0xc611eaae903d0a86L,0x94dabc162a453f55L,
+        0xe6f2e3da14efb279L },
+      { 0x5b7a60179320dc3cL,0x692e382f8df6b5a4L,0x3f5e15e02d40fa90L,
+        0xc87883ae643dd318L } },
+    /* 64 << 182 */
+    { { 0x511053e453544774L,0x834d0ecc3adba2bcL,0x4215d7f7bae371f5L,
+        0xfcfd57bf6c8663bcL },
+      { 0xded2383dd6901b1dL,0x3b49fbb4b5587dc3L,0xfd44a08d07625f62L,
+        0x3ee4d65b9de9b762L } },
+    /* 0 << 189 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 189 */
+    { { 0x64e5137d0d63d1faL,0x658fc05202a9d89fL,0x4889487450436309L,
+        0xe9ae30f8d598da61L },
+      { 0x2ed710d1818baf91L,0xe27e9e068b6a0c20L,0x1e28dcfb1c1a6b44L,
+        0x883acb64d6ac57dcL } },
+    /* 2 << 189 */
+    { { 0x8735728dc2c6ff70L,0x79d6122fc5dc2235L,0x23f5d00319e277f9L,
+        0x7ee84e25dded8cc7L },
+      { 0x91a8afb063cd880aL,0x3f3ea7c63574af60L,0x0cfcdc8402de7f42L,
+        0x62d0792fb31aa152L } },
+    /* 3 << 189 */
+    { { 0x8e1b4e438a5807ceL,0xad283893e4109a7eL,0xc30cc9cbafd59ddaL,
+        0xf65f36c63d8d8093L },
+      { 0xdf31469ea60d32b2L,0xee93df4b3e8191c8L,0x9c1017c5355bdeb5L,
+        0xd26231858616aa28L } },
+    /* 4 << 189 */
+    { { 0xb02c83f9dec31a21L,0x988c8b236ad9d573L,0x53e983aea57be365L,
+        0xe968734d646f834eL },
+      { 0x9137ea8f5da6309bL,0x10f3a624c1f1ce16L,0x782a9ea2ca440921L,
+        0xdf94739e5b46f1b5L } },
+    /* 5 << 189 */
+    { { 0x9f9be006cce85c9bL,0x360e70d6a4c7c2d3L,0x2cd5beeaaefa1e60L,
+        0x64cf63c08c3d2b6dL },
+      { 0xfb107fa3e1cf6f90L,0xb7e937c6d5e044e6L,0x74e8ca78ce34db9fL,
+        0x4f8b36c13e210bd0L } },
+    /* 6 << 189 */
+    { { 0x1df165a434a35ea8L,0x3418e0f74d4412f6L,0x5af1f8af518836c3L,
+        0x42ceef4d130e1965L },
+      { 0x5560ca0b543a1957L,0xc33761e5886cb123L,0x66624b1ffe98ed30L,
+        0xf772f4bf1090997dL } },
+    /* 7 << 189 */
+    { { 0xf4e540bb4885d410L,0x7287f8109ba5f8d7L,0x22d0d865de98dfb1L,
+        0x49ff51a1bcfbb8a3L },
+      { 0xb6b6fa536bc3012eL,0x3d31fd72170d541dL,0x8018724f4b0f4966L,
+        0x79e7399f87dbde07L } },
+    /* 8 << 189 */
+    { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L,
+        0x803f3e02cd42ab1bL },
+      { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL,
+        0xc097440e5067adc1L } },
+    /* 9 << 189 */
+    { { 0x730eafb63524ff16L,0xd7f9b51e823fc6ceL,0x27bd0d32443e4ac0L,
+        0x40c59ad94d66f217L },
+      { 0x6c33136f17c387a4L,0x5043b8d5eb86804dL,0x74970312675a73c9L,
+        0x838fdb31f16669b6L } },
+    /* 10 << 189 */
+    { { 0xc507b6dd418e7dddL,0x39888d93472f19d6L,0x7eae26be0c27eb4dL,
+        0x17b53ed3fbabb884L },
+      { 0xfc27021b2b01ae4fL,0x88462e87cf488682L,0xbee096ec215e2d87L,
+        0xeb2fea9ad242e29bL } },
+    /* 11 << 189 */
+    { { 0x5d985b5fb821fc28L,0x89d2e197dc1e2ad2L,0x55b566b89030ba62L,
+        0xe3fd41b54f41b1c6L },
+      { 0xb738ac2eb9a96d61L,0x7f8567ca369443f4L,0x8698622df803a440L,
+        0x2b5862368fe2f4dcL } },
+    /* 12 << 189 */
+    { { 0xbbcc00c756b95bceL,0x5ec03906616da680L,0x79162ee672214252L,
+        0x43132b6386a892d2L },
+      { 0x4bdd3ff22f3263bfL,0xd5b3733c9cd0a142L,0x592eaa8244415ccbL,
+        0x663e89248d5474eaL } },
+    /* 13 << 189 */
+    { { 0x8058a25e5236344eL,0x82e8df9dbda76ee6L,0xdcf6efd811cc3d22L,
+        0x00089cda3b4ab529L },
+      { 0x91d3a071bd38a3dbL,0x4ea97fc0ef72b925L,0x0c9fc15bea3edf75L,
+        0x5a6297cda4348ed3L } },
+    /* 14 << 189 */
+    { { 0x0d38ab35ce7c42d4L,0x9fd493ef82feab10L,0x46056b6d82111b45L,
+        0xda11dae173efc5c3L },
+      { 0xdc7402785545a7fbL,0xbdb2601c40d507e6L,0x121dfeeb7066fa58L,
+        0x214369a839ae8c2aL } },
+    /* 15 << 189 */
+    { { 0x195709cb06e0956cL,0x4c9d254f010cd34bL,0xf51e13f70471a532L,
+        0xe19d67911e73054dL },
+      { 0xf702a628db5c7be3L,0xc7141218b24dde05L,0xdc18233cf29b2e2eL,
+        0x3a6bd1e885342dbaL } },
+    /* 16 << 189 */
+    { { 0x3f747fa0b311898cL,0xe2a272e4cd0eac65L,0x4bba5851f914d0bcL,
+        0x7a1a9660c4a43ee3L },
+      { 0xe5a367cea1c8cde9L,0x9d958ba97271abe3L,0xf3ff7eb63d1615cdL,
+        0xa2280dcef5ae20b0L } },
+    /* 17 << 189 */
+    { { 0x56dba5c1cf640147L,0xea5a2e3d5e83d118L,0x04cd6b6dda24c511L,
+        0x1c0f4671e854d214L },
+      { 0x91a6b7a969565381L,0xdc966240decf1f5bL,0x1b22d21cfcf5d009L,
+        0x2a05f6419021dbd5L } },
+    /* 18 << 189 */
+    { { 0x8c0ed566d4312483L,0x5179a95d643e216fL,0xcc185fec17044493L,
+        0xb306333954991a21L },
+      { 0xd801ecdb0081a726L,0x0149b0c64fa89bbbL,0xafe9065a4391b6b9L,
+        0xedc92786d633f3a3L } },
+    /* 19 << 189 */
+    { { 0xe408c24aae6a8e13L,0x85833fde9f3897abL,0x43800e7ed81a0715L,
+        0xde08e346b44ffc5fL },
+      { 0x7094184ccdeff2e0L,0x49f9387b165eaed1L,0x635d6129777c468aL,
+        0x8c0dcfd1538c2dd8L } },
+    /* 20 << 189 */
+    { { 0xd6d9d9e37a6a308bL,0x623758304c2767d3L,0x874a8bc6f38cbeb6L,
+        0xd94d3f1accb6fd9eL },
+      { 0x92a9735bba21f248L,0x272ad0e56cd1efb0L,0x7437b69c05b03284L,
+        0xe7f047026948c225L } },
+    /* 21 << 189 */
+    { { 0x8a56c04acba2ececL,0x0c181270e3a73e41L,0x6cb34e9d03e93725L,
+        0xf77c8713496521a9L },
+      { 0x94569183fa7f9f90L,0xf2e7aa4c8c9707adL,0xced2c9ba26c1c9a3L,
+        0x9109fe9640197507L } },
+    /* 22 << 189 */
+    { { 0x9ae868a9e9adfe1cL,0x3984403d314e39bbL,0xb5875720f2fe378fL,
+        0x33f901e0ba44a628L },
+      { 0xea1125fe3652438cL,0xae9ec4e69dd1f20bL,0x1e740d9ebebf7fbdL,
+        0x6dbd3ddc42dbe79cL } },
+    /* 23 << 189 */
+    { { 0x62082aecedd36776L,0xf612c478e9859039L,0xa493b201032f7065L,
+        0xebd4d8f24ff9b211L },
+      { 0x3f23a0aaaac4cb32L,0xea3aadb715ed4005L,0xacf17ea4afa27e63L,
+        0x56125c1ac11fd66cL } },
+    /* 24 << 189 */
+    { { 0x266344a43794f8dcL,0xdcca923a483c5c36L,0x2d6b6bbf3f9d10a0L,
+        0xb320c5ca81d9bdf3L },
+      { 0x620e28ff47b50a95L,0x933e3b01cef03371L,0xf081bf8599100153L,
+        0x183be9a0c3a8c8d6L } },
+    /* 25 << 189 */
+    { { 0x4e3ddc5ad6bbe24dL,0xc6c7463053843795L,0x78193dd765ec2d4cL,
+        0xb8df26cccd3c89b2L },
+      { 0x98dbe3995a483f8dL,0x72d8a9577dd3313aL,0x65087294ab0bd375L,
+        0xfcd892487c259d16L } },
+    /* 26 << 189 */
+    { { 0x8a9443d77613aa81L,0x8010080085fe6584L,0x70fc4dbc7fb10288L,
+        0xf58280d3e86beee8L },
+      { 0x14fdd82f7c978c38L,0xdf1204c10de44d7bL,0xa08a1c844160252fL,
+        0x591554cac17646a5L } },
+    /* 27 << 189 */
+    { { 0x214a37d6a05bd525L,0x48d5f09b07957b3cL,0x0247cdcbd7109bc9L,
+        0x40f9e4bb30599ce7L },
+      { 0xc325fa03f46ad2ecL,0x00f766cfc3e3f9eeL,0xab556668d43a4577L,
+        0x68d30a613ee03b93L } },
+    /* 28 << 189 */
+    { { 0x7ddc81ea77b46a08L,0xcf5a6477c7480699L,0x43a8cb346633f683L,
+        0x1b867e6b92363c60L },
+      { 0x439211141f60558eL,0xcdbcdd632f41450eL,0x7fc04601cc630e8bL,
+        0xea7c66d597038b43L } },
+    /* 29 << 189 */
+    { { 0x7259b8a504e99fd8L,0x98a8dd124785549aL,0x0e459a7c840552e1L,
+        0xcdfcf4d04bb0909eL },
+      { 0x34a86db253758da7L,0xe643bb83eac997e1L,0x96400bd7530c5b7eL,
+        0x9f97af87b41c8b52L } },
+    /* 30 << 189 */
+    { { 0x34fc8820fbeee3f9L,0x93e5349049091afdL,0x764b9be59a31f35cL,
+        0x71f3786457e3d924L },
+      { 0x02fb34e0943aa75eL,0xa18c9c58ab8ff6e4L,0x080f31b133cf0d19L,
+        0x5c9682db083518a7L } },
+    /* 31 << 189 */
+    { { 0x873d4ca6b709c3deL,0x64a842623575b8f0L,0x6275da1f020154bbL,
+        0x97678caad17cf1abL },
+      { 0x8779795f951a95c3L,0xdd35b16350fccc08L,0x3270962733d8f031L,
+        0x3c5ab10a498dd85cL } },
+    /* 32 << 189 */
+    { { 0xb6c185c341dca566L,0x7de7fedad8622aa3L,0x99e84d92901b6dfbL,
+        0x30a02b0e7c4ad288L },
+      { 0xc7c81daa2fd3cf36L,0xd1319547df89e59fL,0xb2be8184cd496733L,
+        0xd5f449eb93d3412bL } },
+    /* 33 << 189 */
+    { { 0x7ea41b1b25fe531dL,0xf97974326a1d5646L,0x86067f722bde501aL,
+        0xf91481c00c85e89cL },
+      { 0xca8ee465f8b05bc6L,0x1844e1cf02e83cdaL,0xca82114ab4dbe33bL,
+        0x0f9f87694eabfde2L } },
+    /* 34 << 189 */
+    { { 0x4936b1c038b27fe2L,0x63b6359baba402dfL,0x40c0ea2f656bdbabL,
+        0x9c992a896580c39cL },
+      { 0x600e8f152a60aed1L,0xeb089ca4e0bf49dfL,0x9c233d7d2d42d99aL,
+        0x648d3f954c6bc2faL } },
+    /* 35 << 189 */
+    { { 0xdcc383a8e1add3f3L,0xf42c0c6a4f64a348L,0x2abd176f0030dbdbL,
+        0x4de501a37d6c215eL },
+      { 0x4a107c1f4b9a64bcL,0xa77f0ad32496cd59L,0xfb78ac627688dffbL,
+        0x7025a2ca67937d8eL } },
+    /* 36 << 189 */
+    { { 0xfde8b2d1d1a8f4e7L,0xf5b3da477354927cL,0xe48606a3d9205735L,
+        0xac477cc6e177b917L },
+      { 0xfb1f73d2a883239aL,0xe12572f6cc8b8357L,0x9d355e9cfb1f4f86L,
+        0x89b795f8d9f3ec6eL } },
+    /* 37 << 189 */
+    { { 0x27be56f1b54398dcL,0x1890efd73fedeed5L,0x62f77f1f9c6d0140L,
+        0x7ef0e314596f0ee4L },
+      { 0x50ca6631cc61dab3L,0x4a39801df4866e4fL,0x66c8d032ae363b39L,
+        0x22c591e52ead66aaL } },
+    /* 38 << 189 */
+    { { 0x954ba308de02a53eL,0x2a6c060fd389f357L,0xe6cfcde8fbf40b66L,
+        0x8e02fc56c6340ce1L },
+      { 0xe495779573adb4baL,0x7b86122ca7b03805L,0x63f835120c8e6fa6L,
+        0x83660ea0057d7804L } },
+    /* 39 << 189 */
+    { { 0xbad7910521ba473cL,0xb6c50beeded5389dL,0xee2caf4daa7c9bc0L,
+        0xd97b8de48c4e98a7L },
+      { 0xa9f63e70ab3bbddbL,0x3898aabf2597815aL,0x7659af89ac15b3d9L,
+        0xedf7725b703ce784L } },
+    /* 40 << 189 */
+    { { 0x25470fabe085116bL,0x04a4337587285310L,0x4e39187ee2bfd52fL,
+        0x36166b447d9ebc74L },
+      { 0x92ad433cfd4b322cL,0x726aa817ba79ab51L,0xf96eacd8c1db15ebL,
+        0xfaf71e910476be63L } },
+    /* 41 << 189 */
+    { { 0xdd69a640641fad98L,0xb799591829622559L,0x03c6daa5de4199dcL,
+        0x92cadc97ad545eb4L },
+      { 0x1028238b256534e4L,0x73e80ce68595409aL,0x690d4c66d05dc59bL,
+        0xc95f7b8f981dee80L } },
+    /* 42 << 189 */
+    { { 0xf4337014d856ac25L,0x441bd9ddac524dcaL,0x640b3d855f0499f5L,
+        0x39cf84a9d5fda182L },
+      { 0x04e7b055b2aa95a0L,0x29e33f0a0ddf1860L,0x082e74b5423f6b43L,
+        0x217edeb90aaa2b0fL } },
+    /* 43 << 189 */
+    { { 0x58b83f3583cbea55L,0xc485ee4dbc185d70L,0x833ff03b1e5f6992L,
+        0xb5b9b9cccf0c0dd5L },
+      { 0x7caaee8e4e9e8a50L,0x462e907b6269dafdL,0x6ed5cee9fbe791c6L,
+        0x68ca3259ed430790L } },
+    /* 44 << 189 */
+    { { 0x2b72bdf213b5ba88L,0x60294c8a35ef0ac4L,0x9c3230ed19b99b08L,
+        0x560fff176c2589aaL },
+      { 0x552b8487d6770374L,0xa373202d9a56f685L,0xd3e7f90745f175d9L,
+        0x3c2f315fd080d810L } },
+    /* 45 << 189 */
+    { { 0x1130e9dd7b9520e8L,0xc078f9e20af037b5L,0x38cd2ec71e9c104cL,
+        0x0f684368c472fe92L },
+      { 0xd3f1b5ed6247e7efL,0xb32d33a9396dfe21L,0x46f59cf44a9aa2c2L,
+        0x69cd5168ff0f7e41L } },
+    /* 46 << 189 */
+    { { 0x3f59da0f4b3234daL,0xcf0b0235b4579ebeL,0x6d1cbb256d2476c7L,
+        0x4f0837e69dc30f08L },
+      { 0x9a4075bb906f6e98L,0x253bb434c761e7d1L,0xde2e645f6e73af10L,
+        0xb89a40600c5f131cL } },
+    /* 47 << 189 */
+    { { 0xd12840c5b8cc037fL,0x3d093a5b7405bb47L,0x6202c253206348b8L,
+        0xbf5d57fcc55a3ca7L },
+      { 0x89f6c90c8c3bef48L,0x23ac76235a0a960aL,0xdfbd3d6b552b42abL,
+        0x3ef22458132061f6L } },
+    /* 48 << 189 */
+    { { 0xd74e9bdac97e6516L,0x88779360c230f49eL,0xa6ec1de31e74ea49L,
+        0x581dcee53fb645a2L },
+      { 0xbaef23918f483f14L,0x6d2dddfcd137d13bL,0x54cde50ed2743a42L,
+        0x89a34fc5e4d97e67L } },
+    /* 49 << 189 */
+    { { 0x13f1f5b312e08ce5L,0xa80540b8a7f0b2caL,0x854bcf7701982805L,
+        0xb8653ffd233bea04L },
+      { 0x8e7b878702b0b4c9L,0x2675261f9acb170aL,0x061a9d90930c14e5L,
+        0xb59b30e0def0abeaL } },
+    /* 50 << 189 */
+    { { 0x1dc19ea60200ec7dL,0xb6f4a3f90bce132bL,0xb8d5de90f13e27e0L,
+        0xbaee5ef01fade16fL },
+      { 0x6f406aaae4c6cf38L,0xab4cfe06d1369815L,0x0dcffe87efd550c6L,
+        0x9d4f59c775ff7d39L } },
+    /* 51 << 189 */
+    { { 0xb02553b151deb6adL,0x812399a4b1877749L,0xce90f71fca6006e1L,
+        0xc32363a6b02b6e77L },
+      { 0x02284fbedc36c64dL,0x86c81e31a7e1ae61L,0x2576c7e5b909d94aL,
+        0x8b6f7d02818b2bb0L } },
+    /* 52 << 189 */
+    { { 0xeca3ed0756faa38aL,0xa3790e6c9305bb54L,0xd784eeda7bc73061L,
+        0xbd56d3696dd50614L },
+      { 0xd6575949229a8aa9L,0xdcca8f474595ec28L,0x814305c106ab4fe6L,
+        0xc8c3976824f43f16L } },
+    /* 53 << 189 */
+    { { 0xe2a45f36523f2b36L,0x995c6493920d93bbL,0xf8afdab790f1632bL,
+        0x79ebbecd1c295954L },
+      { 0xc7bb3ddb79592f48L,0x67216a7b5f88e998L,0xd91f098bbc01193eL,
+        0xf7d928a5b1db83fcL } },
+    /* 54 << 189 */
+    { { 0x55e38417e991f600L,0x2a91113e2981a934L,0xcbc9d64806b13bdeL,
+        0xb011b6ac0755ff44L },
+      { 0x6f4cb518045ec613L,0x522d2d31c2f5930aL,0x5acae1af382e65deL,
+        0x5764306727bc966fL } },
+    /* 55 << 189 */
+    { { 0x5e12705d1c7193f0L,0xf0f32f473be8858eL,0x785c3d7d96c6dfc7L,
+        0xd75b4a20bf31795dL },
+      { 0x91acf17b342659d4L,0xe596ea3444f0378fL,0x4515708fce52129dL,
+        0x17387e1e79f2f585L } },
+    /* 56 << 189 */
+    { { 0x72cfd2e949dee168L,0x1ae052233e2af239L,0x009e75be1d94066aL,
+        0x6cca31c738abf413L },
+      { 0xb50bd61d9bc49908L,0x4a9b4a8cf5e2bc1eL,0xeb6cc5f7946f83acL,
+        0x27da93fcebffab28L } },
+    /* 57 << 189 */
+    { { 0xea314c964821c8c5L,0x8de49deda83c15f4L,0x7a64cf207af33004L,
+        0x45f1bfebc9627e10L },
+      { 0x878b062654b9df60L,0x5e4fdc3ca95c0b33L,0xe54a37cac2035d8eL,
+        0x9087cda980f20b8cL } },
+    /* 58 << 189 */
+    { { 0x36f61c238319ade4L,0x766f287ade8cfdf8L,0x48821948346f3705L,
+        0x49a7b85316e4f4a2L },
+      { 0xb9b3f8a75cedadfdL,0x8f5628158db2a815L,0xc0b7d55401f68f95L,
+        0x12971e27688a208eL } },
+    /* 59 << 189 */
+    { { 0xc9f8b696d0ff34fcL,0x20824de21222718cL,0x7213cf9f0c95284dL,
+        0xe2ad741bdc158240L },
+      { 0x0ee3a6df54043ccfL,0x16ff479bd84412b3L,0xf6c74ee0dfc98af0L,
+        0xa78a169f52fcd2fbL } },
+    /* 60 << 189 */
+    { { 0xd8ae874699c930e9L,0x1d33e85849e117a5L,0x7581fcb46624759fL,
+        0xde50644f5bedc01dL },
+      { 0xbeec5d00caf3155eL,0x672d66acbc73e75fL,0x86b9d8c6270b01dbL,
+        0xd249ef8350f55b79L } },
+    /* 61 << 189 */
+    { { 0x6131d6d473978fe3L,0xcc4e4542754b00a1L,0x4e05df0557dfcfe9L,
+        0x94b29cdd51ef6bf0L },
+      { 0xe4530cff9bc7edf2L,0x8ac236fdd3da65f3L,0x0faf7d5fc8eb0b48L,
+        0x4d2de14c660eb039L } },
+    /* 62 << 189 */
+    { { 0xc006bba760430e54L,0x10a2d0d6da3289abL,0x9c037a5dd7979c59L,
+        0x04d1f3d3a116d944L },
+      { 0x9ff224738a0983cdL,0x28e25b38c883cabbL,0xe968dba547a58995L,
+        0x2c80b505774eebdfL } },
+    /* 63 << 189 */
+    { { 0xee763b714a953bebL,0x502e223f1642e7f6L,0x6fe4b64161d5e722L,
+        0x9d37c5b0dbef5316L },
+      { 0x0115ed70f8330bc7L,0x139850e675a72789L,0x27d7faecffceccc2L,
+        0x3016a8604fd9f7f6L } },
+    /* 64 << 189 */
+    { { 0xc492ec644cd8f64cL,0x58a2d790279d7b51L,0x0ced1fc51fc75256L,
+        0x3e658aed8f433017L },
+      { 0x0b61942e05da59ebL,0xba3d60a30ddc3722L,0x7c311cd1742e7f87L,
+        0x6473ffeef6b01b6eL } },
+    /* 0 << 196 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 196 */
+    { { 0x8303604f692ac542L,0xf079ffe1227b91d3L,0x19f63e6315aaf9bdL,
+        0xf99ee565f1f344fbL },
+      { 0x8a1d661fd6219199L,0x8c883bc6d48ce41cL,0x1065118f3c74d904L,
+        0x713889ee0faf8b1bL } },
+    /* 2 << 196 */
+    { { 0x972b3f8f81a1b3beL,0x4f3ce145ce2764a0L,0xe2d0f1cc28c4f5f7L,
+        0xdeee0c0dc7f3985bL },
+      { 0x7df4adc0d39e25c3L,0x40619820c467a080L,0x440ebc9361cf5a58L,
+        0x527729a6422ad600L } },
+    /* 3 << 196 */
+    { { 0xca6c0937b1b76ba6L,0x1a2eab854d2026dcL,0xb1715e1519d9ae0aL,
+        0xf1ad9199bac4a026L },
+      { 0x35b3dfb807ea7b0eL,0xedf5496f3ed9eb89L,0x8932e5ff2d6d08abL,
+        0xf314874e25bd2731L } },
+    /* 4 << 196 */
+    { { 0xefb26a753f73f449L,0x1d1c94f88d44fc79L,0x49f0fbc53bc0dc4dL,
+        0xb747ea0b3698a0d0L },
+      { 0x5218c3fe228d291eL,0x35b804b543c129d6L,0xfac859b8d1acc516L,
+        0x6c10697d95d6e668L } },
+    /* 5 << 196 */
+    { { 0xc38e438f0876fd4eL,0x45f0c30783d2f383L,0x203cc2ecb10934cbL,
+        0x6a8f24392c9d46eeL },
+      { 0xf16b431b65ccde7bL,0x41e2cd1827e76a6fL,0xb9c8cf8f4e3484d7L,
+        0x64426efd8315244aL } },
+    /* 6 << 196 */
+    { { 0x1c0a8e44fc94dea3L,0x34c8cdbfdad6a0b0L,0x919c384004113cefL,
+        0xfd32fba415490ffaL },
+      { 0x58d190f6795dcfb7L,0xfef01b0383588bafL,0x9e6d1d63ca1fc1c0L,
+        0x53173f96f0a41ac9L } },
+    /* 7 << 196 */
+    { { 0x2b1d402aba16f73bL,0x2fb310148cf9b9fcL,0x2d51e60e446ef7bfL,
+        0xc731021bb91e1745L },
+      { 0x9d3b47244fee99d4L,0x4bca48b6fac5c1eaL,0x70f5f514bbea9af7L,
+        0x751f55a5974c283aL } },
+    /* 8 << 196 */
+    { { 0x6e30251acb452fdbL,0x31ee696550f30650L,0xb0b3e508933548d9L,
+        0xb8949a4ff4b0ef5bL },
+      { 0x208b83263c88f3bdL,0xab147c30db1d9989L,0xed6515fd44d4df03L,
+        0x17a12f75e72eb0c5L } },
+    /* 9 << 196 */
+    { { 0x3b59796d36cf69dbL,0x1219eee956670c18L,0xfe3341f77a070d8eL,
+        0x9b70130ba327f90cL },
+      { 0x36a324620ae18e0eL,0x2021a62346c0a638L,0x251b5817c62eb0d4L,
+        0x87bfbcdf4c762293L } },
+    /* 10 << 196 */
+    { { 0xf78ab505cdd61d64L,0x8c7a53fcc8c18857L,0xa653ce6f16147515L,
+        0x9c923aa5ea7d52d5L },
+      { 0xc24709cb5c18871fL,0x7d53bec873b3cc74L,0x59264afffdd1d4c4L,
+        0x5555917e240da582L } },
+    /* 11 << 196 */
+    { { 0xcae8bbda548f5a0eL,0x1910eaba3bbfbbe1L,0xae5796857677afc3L,
+        0x49ea61f173ff0b5cL },
+      { 0x786554784f7c3922L,0x95d337cd20c68eefL,0x68f1e1e5df779ab9L,
+        0x14b491b0b5cf69a8L } },
+    /* 12 << 196 */
+    { { 0x7a6cbbe028e3fe89L,0xe7e1fee4c5aac0ebL,0x7f47eda5697e5140L,
+        0x4f450137b454921fL },
+      { 0xdb625f8495cd8185L,0x74be0ba1cdb2e583L,0xaee4fd7cdd5e6de4L,
+        0x4251437de8101739L } },
+    /* 13 << 196 */
+    { { 0x686d72a0ac620366L,0x4be3fb9cb6d59344L,0x6e8b44e7a1eb75b9L,
+        0x84e39da391a5c10cL },
+      { 0x37cc1490b38f0409L,0x029519432c2ade82L,0x9b6887831190a2d8L,
+        0x25627d14231182baL } },
+    /* 14 << 196 */
+    { { 0x6eb550aa658a6d87L,0x1405aaa7cf9c7325L,0xd147142e5c8748c9L,
+        0x7f637e4f53ede0e0L },
+      { 0xf8ca277614ffad2cL,0xe58fb1bdbafb6791L,0x17158c23bf8f93fcL,
+        0x7f15b3730a4a4655L } },
+    /* 15 << 196 */
+    { { 0x39d4add2d842ca72L,0xa71e43913ed96305L,0x5bb09cbe6700be14L,
+        0x68d69d54d8befcf6L },
+      { 0xa45f536737183bcfL,0x7152b7bb3370dff7L,0xcf887baabf12525bL,
+        0xe7ac7bddd6d1e3cdL } },
+    /* 16 << 196 */
+    { { 0x25914f7881fdad90L,0xcf638f560d2cf6abL,0xb90bc03fcc054de5L,
+        0x932811a718b06350L },
+      { 0x2f00b3309bbd11ffL,0x76108a6fb4044974L,0x801bb9e0a851d266L,
+        0x0dd099bebf8990c1L } },
+    /* 17 << 196 */
+    { { 0x58c5aaaaabe32986L,0x0fe9dd2a50d59c27L,0x84951ff48d307305L,
+        0x6c23f82986529b78L },
+      { 0x50bb22180b136a79L,0x7e2174de77a20996L,0x6f00a4b9c0bb4da6L,
+        0x89a25a17efdde8daL } },
+    /* 18 << 196 */
+    { { 0xf728a27ec11ee01dL,0xf900553ae5f10dfbL,0x189a83c802ec893cL,
+        0x3ca5bdc123f66d77L },
+      { 0x9878153797eada9fL,0x59c50ab310256230L,0x346042d9323c69b3L,
+        0x1b715a6d2c460449L } },
+    /* 19 << 196 */
+    { { 0xa41dd4766ae06e0bL,0xcdd7888e9d42e25fL,0x0f395f7456b25a20L,
+        0xeadfe0ae8700e27eL },
+      { 0xb09d52a969950093L,0x3525d9cb327f8d40L,0xb8235a9467df886aL,
+        0x77e4b0dd035faec2L } },
+    /* 20 << 196 */
+    { { 0x115eb20a517d7061L,0x77fe34336c2df683L,0x6870ddc7cdc6fc67L,
+        0xb16105880b87de83L },
+      { 0x343584cad9c4ddbeL,0xb3164f1c3d754be2L,0x0731ed3ac1e6c894L,
+        0x26327dec4f6b904cL } },
+    /* 21 << 196 */
+    { { 0x9d49c6de97b5cd32L,0x40835daeb5eceecdL,0xc66350edd9ded7feL,
+        0x8aeebb5c7a678804L },
+      { 0x51d42fb75b8ee9ecL,0xd7a17bdd8e3ca118L,0x40d7511a2ef4400eL,
+        0xc48990ac875a66f4L } },
+    /* 22 << 196 */
+    { { 0x8de07d2a2199e347L,0xbee755562a39e051L,0x56918786916e51dcL,
+        0xeb1913134a2d89ecL },
+      { 0x6679610d37d341edL,0x434fbb4156d51c2bL,0xe54b7ee7d7492dbaL,
+        0xaa33a79a59021493L } },
+    /* 23 << 196 */
+    { { 0x49fc5054e4bd6d3dL,0x09540f045ab551d0L,0x8acc90854942d3a6L,
+        0x231af02f2d28323bL },
+      { 0x93458cac0992c163L,0x1fef8e71888e3bb4L,0x27578da5be8c268cL,
+        0xcc8be792e805ec00L } },
+    /* 24 << 196 */
+    { { 0x29267baec61c3855L,0xebff429d58c1fd3bL,0x22d886c08c0b93b8L,
+        0xca5e00b22ddb8953L },
+      { 0xcf330117c3fed8b7L,0xd49ac6fa819c01f6L,0x6ddaa6bd3c0fbd54L,
+        0x917430688049a2cfL } },
+    /* 25 << 196 */
+    { { 0xd67f981eaff2ef81L,0xc3654d352818ae80L,0x81d050441b2aa892L,
+        0x2db067bf3d099328L },
+      { 0xe7c79e86703dcc97L,0xe66f9b37e133e215L,0xcdf119a6e39a7a5cL,
+        0x47c60de3876f1b61L } },
+    /* 26 << 196 */
+    { { 0x6e405939d860f1b2L,0x3e9a1dbcf5ed4d4aL,0x3f23619ec9b6bcbdL,
+        0x5ee790cf734e4497L },
+      { 0xf0a834b15bdaf9bbL,0x02cedda74ca295f0L,0x4619aa2bcb8e378cL,
+        0xe5613244cc987ea4L } },
+    /* 27 << 196 */
+    { { 0x0bc022cc76b23a50L,0x4a2793ad0a6c21ceL,0x3832878089cac3f5L,
+        0x29176f1bcba26d56L },
+      { 0x062961874f6f59ebL,0x86e9bca98bdc658eL,0x2ca9c4d357e30402L,
+        0x5438b216516a09bbL } },
+    /* 28 << 196 */
+    { { 0x0a6a063c7672765aL,0x37a3ce640547b9bfL,0x42c099c898b1a633L,
+        0xb5ab800d05ee6961L },
+      { 0xf1963f5911a5acd6L,0xbaee615746201063L,0x36d9a649a596210aL,
+        0xaed043631ba7138cL } },
+    /* 29 << 196 */
+    { { 0xcf817d1ca4a82b76L,0x5586960ef3806be9L,0x7ab67c8909dc6bb5L,
+        0x52ace7a0114fe7ebL },
+      { 0xcd987618cbbc9b70L,0x4f06fd5a604ca5e1L,0x90af14ca6dbde133L,
+        0x1afe4322948a3264L } },
+    /* 30 << 196 */
+    { { 0xa70d2ca6c44b2c6cL,0xab7267990ef87dfeL,0x310f64dc2e696377L,
+        0x49b42e684c8126a0L },
+      { 0x0ea444c3cea0b176L,0x53a8ddf7cb269182L,0xf3e674ebbbba9dcbL,
+        0x0d2878a8d8669d33L } },
+    /* 31 << 196 */
+    { { 0x04b935d5d019b6a3L,0xbb5cf88e406f1e46L,0xa1912d165b57c111L,
+        0x9803fc2119ebfd78L },
+      { 0x4f231c9ec07764a9L,0xd93286eeb75bd055L,0x83a9457d8ee6c9deL,
+        0x046959156087ec90L } },
+    /* 32 << 196 */
+    { { 0x14c6dd8a58d6cd46L,0x9cb633b58e6634d2L,0xc1305047f81bc328L,
+        0x12ede0e226a177e5L },
+      { 0x332cca62065a6f4fL,0xc3a47ecd67be487bL,0x741eb1870f47ed1cL,
+        0x99e66e58e7598b14L } },
+    /* 33 << 196 */
+    { { 0x6f0544ca63d0ff12L,0xe5efc784b610a05fL,0xf72917b17cad7b47L,
+        0x3ff6ea20f2cac0c0L },
+      { 0xcc23791bf21db8b7L,0x7dac70b1d7d93565L,0x682cda1d694bdaadL,
+        0xeb88bb8c1023516dL } },
+    /* 34 << 196 */
+    { { 0xc4c634b4dfdbeb1bL,0x22f5ca72b4ee4deaL,0x1045a368e6524821L,
+        0xed9e8a3f052b18b2L },
+      { 0x9b7f2cb1b961f49aL,0x7fee2ec17b009670L,0x350d875422507a6dL,
+        0x561bd7114db55f1dL } },
+    /* 35 << 196 */
+    { { 0x4c189ccc320bbcafL,0x568434cfdf1de48cL,0x6af1b00e0fa8f128L,
+        0xf0ba9d028907583cL },
+      { 0x735a400432ff9f60L,0x3dd8e4b6c25dcf33L,0xf2230f1642c74cefL,
+        0xd8117623013fa8adL } },
+    /* 36 << 196 */
+    { { 0x36822876f51fe76eL,0x8a6811cc11d62589L,0xc3fc7e6546225718L,
+        0xb7df2c9fc82fdbcdL },
+      { 0x3b1d4e52dd7b205bL,0xb695947847a2e414L,0x05e4d793efa91148L,
+        0xb47ed446fd2e9675L } },
+    /* 37 << 196 */
+    { { 0x1a7098b904c9d9bfL,0x661e28811b793048L,0xb1a16966b01ee461L,
+        0xbc5213082954746fL },
+      { 0xc909a0fc2477de50L,0xd80bb41c7dbd51efL,0xa85be7ec53294905L,
+        0x6d465b1883958f97L } },
+    /* 38 << 196 */
+    { { 0x16f6f330fb6840fdL,0xfaaeb2143401e6c8L,0xaf83d30fccb5b4f8L,
+        0x22885739266dec4bL },
+      { 0x51b4367c7bc467dfL,0x926562e3d842d27aL,0xdfcb66140fea14a6L,
+        0xeb394daef2734cd9L } },
+    /* 39 << 196 */
+    { { 0x3eeae5d211c0be98L,0xb1e6ed11814e8165L,0x191086bce52bce1cL,
+        0x14b74cc6a75a04daL },
+      { 0x63cf11868c060985L,0x071047de2dbd7f7cL,0x4e433b8bce0942caL,
+        0xecbac447d8fec61dL } },
+    /* 40 << 196 */
+    { { 0x8f0ed0e2ebf3232fL,0xfff80f9ec52a2eddL,0xad9ab43375b55fdbL,
+        0x73ca7820e42e0c11L },
+      { 0x6dace0a0e6251b46L,0x89bc6b5c4c0d932dL,0x3438cd77095da19aL,
+        0x2f24a9398d48bdfbL } },
+    /* 41 << 196 */
+    { { 0x99b47e46766561b7L,0x736600e60ed0322aL,0x06a47cb1638e1865L,
+        0x927c1c2dcb136000L },
+      { 0x295423370cc5df69L,0x99b37c0209d649a9L,0xc5f0043c6aefdb27L,
+        0x6cdd99871be95c27L } },
+    /* 42 << 196 */
+    { { 0x69850931390420d2L,0x299c40ac0983efa4L,0x3a05e778af39aeadL,
+        0x8427440843a45193L },
+      { 0x6bcd0fb991a711a0L,0x461592c89f52ab17L,0xb49302b4da3c6ed6L,
+        0xc51fddc7330d7067L } },
+    /* 43 << 196 */
+    { { 0x94babeb6da50d531L,0x521b840da6a7b9daL,0x5305151e404bdc89L,
+        0x1bcde201d0d07449L },
+      { 0xf427a78b3b76a59aL,0xf84841ce07791a1bL,0xebd314bebf91ed1cL,
+        0x8e61d34cbf172943L } },
+    /* 44 << 196 */
+    { { 0x1d5dc4515541b892L,0xb186ee41fc9d9e54L,0x9d9f345ed5bf610dL,
+        0x3e7ba65df6acca9fL },
+      { 0x9dda787aa8369486L,0x09f9dab78eb5ba53L,0x5afb2033d6481bc3L,
+        0x76f4ce30afa62104L } },
+    /* 45 << 196 */
+    { { 0xa8fa00cff4f066b5L,0x89ab5143461dafc2L,0x44339ed7a3389998L,
+        0x2ff862f1bc214903L },
+      { 0x2c88f985b05556e3L,0xcd96058e3467081eL,0x7d6a4176edc637eaL,
+        0xe1743d0936a5acdcL } },
+    /* 46 << 196 */
+    { { 0x66fd72e27eb37726L,0xf7fa264e1481a037L,0x9fbd3bde45f4aa79L,
+        0xed1e0147767c3e22L },
+      { 0x7621f97982e7abe2L,0x19eedc7245f633f8L,0xe69b155e6137bf3aL,
+        0xa0ad13ce414ee94eL } },
+    /* 47 << 196 */
+    { { 0x93e3d5241c0e651aL,0xab1a6e2a02ce227eL,0xe7af17974ab27ecaL,
+        0x245446debd444f39L },
+      { 0x59e22a2156c07613L,0x43deafcef4275498L,0x10834ccb67fd0946L,
+        0xa75841e547406edfL } },
+    /* 48 << 196 */
+    { { 0xebd6a6777b0ac93dL,0xa6e37b0d78f5e0d7L,0x2516c09676f5492bL,
+        0x1e4bf8889ac05f3aL },
+      { 0xcdb42ce04df0ba2bL,0x935d5cfd5062341bL,0x8a30333382acac20L,
+        0x429438c45198b00eL } },
+    /* 49 << 196 */
+    { { 0x1d083bc9049d33faL,0x58b82dda946f67ffL,0xac3e2db867a1d6a3L,
+        0x62e6bead1798aac8L },
+      { 0xfc85980fde46c58cL,0xa7f6937969c8d7beL,0x23557927837b35ecL,
+        0x06a933d8e0790c0cL } },
+    /* 50 << 196 */
+    { { 0x827c0e9b077ff55dL,0x53977798bb26e680L,0x595308741d9cb54fL,
+        0xcca3f4494aac53efL },
+      { 0x11dc5c87a07eda0fL,0xc138bccffd6400c8L,0x549680d313e5da72L,
+        0xc93eed824540617eL } },
+    /* 51 << 196 */
+    { { 0xfd3db1574d0b75c0L,0x9716eb426386075bL,0x0639605c817b2c16L,
+        0x09915109f1e4f201L },
+      { 0x35c9a9285cca6c3bL,0xb25f7d1a3505c900L,0xeb9f7d20630480c4L,
+        0xc3c7b8c62a1a501cL } },
+    /* 52 << 196 */
+    { { 0x3f99183c5a1f8e24L,0xfdb118fa9dd255f0L,0xb9b18b90c27f62a6L,
+        0xe8f732f7396ec191L },
+      { 0x524a2d910be786abL,0x5d32adef0ac5a0f5L,0x9b53d4d69725f694L,
+        0x032a76c60510ba89L } },
+    /* 53 << 196 */
+    { { 0x840391a3ebeb1544L,0x44b7b88c3ed73ac3L,0xd24bae7a256cb8b3L,
+        0x7ceb151ae394cb12L },
+      { 0xbd6b66d05bc1e6a8L,0xec70cecb090f07bfL,0x270644ed7d937589L,
+        0xee9e1a3d5f1dccfeL } },
+    /* 54 << 196 */
+    { { 0xb0d40a84745b98d2L,0xda429a212556ed40L,0xf676eced85148cb9L,
+        0x5a22d40cded18936L },
+      { 0x3bc4b9e570e8a4ceL,0xbfd1445b9eae0379L,0xf23f2c0c1a0bd47eL,
+        0xa9c0bb31e1845531L } },
+    /* 55 << 196 */
+    { { 0x9ddc4d600a4c3f6bL,0xbdfaad792c15ef44L,0xce55a2367f484accL,
+        0x08653ca7055b1f15L },
+      { 0x2efa8724538873a3L,0x09299e5dace1c7e7L,0x07afab66ade332baL,
+        0x9be1fdf692dd71b7L } },
+    /* 56 << 196 */
+    { { 0xa49b5d595758b11cL,0x0b852893c8654f40L,0xb63ef6f452379447L,
+        0xd4957d29105e690cL },
+      { 0x7d484363646559b0L,0xf4a8273c49788a8eL,0xee406cb834ce54a9L,
+        0x1e1c260ff86fda9bL } },
+    /* 57 << 196 */
+    { { 0xe150e228cf6a4a81L,0x1fa3b6a31b488772L,0x1e6ff110c5a9c15bL,
+        0xc6133b918ad6aa47L },
+      { 0x8ac5d55c9dffa978L,0xba1d1c1d5f3965f2L,0xf969f4e07732b52fL,
+        0xfceecdb5a5172a07L } },
+    /* 58 << 196 */
+    { { 0xb0120a5f10f2b8f5L,0xc83a6cdf5c4c2f63L,0x4d47a491f8f9c213L,
+        0xd9e1cce5d3f1bbd5L },
+      { 0x0d91bc7caba7e372L,0xfcdc74c8dfd1a2dbL,0x05efa800374618e5L,
+        0x1121696915a7925eL } },
+    /* 59 << 196 */
+    { { 0xd4c89823f6021c5dL,0x880d5e84eff14423L,0x6523bc5a6dcd1396L,
+        0xd1acfdfc113c978bL },
+      { 0xb0c164e8bbb66840L,0xf7f4301e72b58459L,0xc29ad4a6a638e8ecL,
+        0xf5ab896146b78699L } },
+    /* 60 << 196 */
+    { { 0x9dbd79740e954750L,0x0121de8864f9d2c6L,0x2e597b42d985232eL,
+        0x55b6c3c553451777L },
+      { 0xbb53e547519cb9fbL,0xf134019f8428600dL,0x5a473176e081791aL,
+        0x2f3e226335fb0c08L } },
+    /* 61 << 196 */
+    { { 0xb28c301773d273b0L,0xccd210767721ef9aL,0x054cc292b650dc39L,
+        0x662246de6188045eL },
+      { 0x904b52fa6b83c0d1L,0xa72df26797e9cd46L,0x886b43cd899725e4L,
+        0x2b651688d849ff22L } },
+    /* 62 << 196 */
+    { { 0x60479b7902f34533L,0x5e354c140c77c148L,0xb4bb7581a8537c78L,
+        0x188043d7efe1495fL },
+      { 0x9ba12f428c1d5026L,0x2e0c8a2693d4aaabL,0xbdba7b8baa57c450L,
+        0x140c9ad69bbdafefL } },
+    /* 63 << 196 */
+    { { 0x2067aa4225ac0f18L,0xf7b1295b04d1fbf3L,0x14829111a4b04824L,
+        0x2ce3f19233bd5e91L },
+      { 0x9c7a1d558f2e1b72L,0xfe932286302aa243L,0x497ca7b4d4be9554L,
+        0xb8e821b8e0547a6eL } },
+    /* 64 << 196 */
+    { { 0xfb2838be67e573e0L,0x05891db94084c44bL,0x9131137396c1c2c5L,
+        0x6aebfa3fd958444bL },
+      { 0xac9cdce9e56e55c1L,0x7148ced32caa46d0L,0x2e10c7efb61fe8ebL,
+        0x9fd835daff97cf4dL } },
+    /* 0 << 203 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 203 */
+    { { 0xa36da109081e9387L,0xfb9780d78c935828L,0xd5940332e540b015L,
+        0xc9d7b51be0f466faL },
+      { 0xfaadcd41d6d9f671L,0xba6c1e28b1a2ac17L,0x066a7833ed201e5fL,
+        0x19d99719f90f462bL } },
+    /* 2 << 203 */
+    { { 0xf431f462060b5f61L,0xa56f46b47bd057c2L,0x348dca6c47e1bf65L,
+        0x9a38783e41bcf1ffL },
+      { 0x7a5d33a9da710718L,0x5a7799872e0aeaf6L,0xca87314d2d29d187L,
+        0xfa0edc3ec687d733L } },
+    /* 3 << 203 */
+    { { 0x9df336216a31e09bL,0xde89e44dc1350e35L,0x292148714ca0cf52L,
+        0xdf3796720b88a538L },
+      { 0xc92a510a2591d61bL,0x79aa87d7585b447bL,0xf67db604e5287f77L,
+        0x1697c8bf5efe7a80L } },
+    /* 4 << 203 */
+    { { 0x1c894849cb198ac7L,0xa884a93d0f264665L,0x2da964ef9b200678L,
+        0x3c351b87009834e6L },
+      { 0xafb2ef9fe2c4b44bL,0x580f6c473326790cL,0xb84805210b02264aL,
+        0x8ba6f9e242a194e2L } },
+    /* 5 << 203 */
+    { { 0xfc87975f8fb54738L,0x3516078827c3ead3L,0x834116d2b74a085aL,
+        0x53c99a73a62fe996L },
+      { 0x87585be05b81c51bL,0x925bafa8be0852b7L,0x76a4fafda84d19a7L,
+        0x39a45982585206d4L } },
+    /* 6 << 203 */
+    { { 0x499b6ab65eb03c0eL,0xf19b795472bc3fdeL,0xa86b5b9c6e3a80d2L,
+        0xe43775086d42819fL },
+      { 0xc1663650bb3ee8a3L,0x75eb14fcb132075fL,0xa8ccc9067ad834f6L,
+        0xea6a2474e6e92ffdL } },
+    /* 7 << 203 */
+    { { 0x9d72fd950f8d6758L,0xcb84e101408c07ddL,0xb9114bfda5e23221L,
+        0x358b5fe2e94e742cL },
+      { 0x1c0577ec95f40e75L,0xf01554513d73f3d6L,0x9d55cd67bd1b9b66L,
+        0x63e86e78af8d63c7L } },
+    /* 8 << 203 */
+    { { 0x39d934abd3c095f1L,0x04b261bee4b76d71L,0x1d2e6970e73e6984L,
+        0x879fb23b5e5fcb11L },
+      { 0x11506c72dfd75490L,0x3a97d08561bcf1c1L,0x43201d82bf5e7007L,
+        0x7f0ac52f798232a7L } },
+    /* 9 << 203 */
+    { { 0x2715cbc46eb564d4L,0x8d6c752c9e570e29L,0xf80247c89ef5fd5dL,
+        0xc3c66b46d53eb514L },
+      { 0x9666b4010f87de56L,0xce62c06fc6c603b5L,0xae7b4c607e4fc942L,
+        0x38ac0b77663a9c19L } },
+    /* 10 << 203 */
+    { { 0xcb4d20ee4b049136L,0x8b63bf12356a4613L,0x1221aef670e08128L,
+        0xe62d8c514acb6b16L },
+      { 0x71f64a67379e7896L,0xb25237a2cafd7fa5L,0xf077bd983841ba6aL,
+        0xc4ac02443cd16e7eL } },
+    /* 11 << 203 */
+    { { 0x548ba86921fea4caL,0xd36d0817f3dfdac1L,0x09d8d71ff4685fafL,
+        0x8eff66bec52c459aL },
+      { 0x182faee70b57235eL,0xee3c39b10106712bL,0x5107331fc0fcdcb0L,
+        0x669fb9dca51054baL } },
+    /* 12 << 203 */
+    { { 0xb25101fb319d7682L,0xb02931290a982feeL,0x51c1c9b90261b344L,
+        0x0e008c5bbfd371faL },
+      { 0xd866dd1c0278ca33L,0x666f76a6e5aa53b1L,0xe5cfb7796013a2cfL,
+        0x1d3a1aada3521836L } },
+    /* 13 << 203 */
+    { { 0xcedd253173faa485L,0xc8ee6c4fc0a76878L,0xddbccfc92a11667dL,
+        0x1a418ea91c2f695aL },
+      { 0xdb11bd9251f73971L,0x3e4b3c82da2ed89fL,0x9a44f3f4e73e0319L,
+        0xd1e3de0f303431afL } },
+    /* 14 << 203 */
+    { { 0x3c5604ff50f75f9cL,0x1d8eddf37e752b22L,0x0ef074dd3c9a1118L,
+        0xd0ffc172ccb86d7bL },
+      { 0xabd1ece3037d90f2L,0xe3f307d66055856cL,0x422f93287e4c6dafL,
+        0x902aac66334879a0L } },
+    /* 15 << 203 */
+    { { 0xb6a1e7bf94cdfadeL,0x6c97e1ed7fc6d634L,0x662ad24da2fb63f8L,
+        0xf81be1b9a5928405L },
+      { 0x86d765e4d14b4206L,0xbecc2e0e8fa0db65L,0xa28838e0b17fc76cL,
+        0xe49a602ae37cf24eL } },
+    /* 16 << 203 */
+    { { 0x76b4131a567193ecL,0xaf3c305ae5f6e70bL,0x9587bd39031eebddL,
+        0x5709def871bbe831L },
+      { 0x570599830eb2b669L,0x4d80ce1b875b7029L,0x838a7da80364ac16L,
+        0x2f431d23be1c83abL } },
+    /* 17 << 203 */
+    { { 0xe56812a6f9294dd3L,0xb448d01f9b4b0d77L,0xf3ae606104e8305cL,
+        0x2bead64594d8c63eL },
+      { 0x0a85434d84fd8b07L,0x537b983ff7a9dee5L,0xedcc5f18ef55bd85L,
+        0x2041af6221c6cf8bL } },
+    /* 18 << 203 */
+    { { 0x8e52874cb940c71eL,0x211935a9db5f4b3aL,0x94350492301b1dc3L,
+        0x33d2646d29958620L },
+      { 0x16b0d64bef911404L,0x9d1f25ea9a3c5ef4L,0x20f200eb4a352c78L,
+        0x43929f2c4bd0b428L } },
+    /* 19 << 203 */
+    { { 0xa5656667c7196e29L,0x7992c2f09391be48L,0xaaa97cbd9ee0cd6eL,
+        0x51b0310c3dc8c9bfL },
+      { 0x237f8acfdd9f22cbL,0xbb1d81a1b585d584L,0x8d5d85f58c416388L,
+        0x0d6e5a5a42fe474fL } },
+    /* 20 << 203 */
+    { { 0xe781276638235d4eL,0x1c62bd67496e3298L,0x8378660c3f175bc8L,
+        0x4d04e18917afdd4dL },
+      { 0x32a8160185a8068cL,0xdb58e4e192b29a85L,0xe8a65b86c70d8a3bL,
+        0x5f0e6f4e98a0403bL } },
+    /* 21 << 203 */
+    { { 0x0812968469ed2370L,0x34dc30bd0871ee26L,0x3a5ce9487c9c5b05L,
+        0x7d487b8043a90c87L },
+      { 0x4089ba37dd0e7179L,0x45f80191b4041811L,0x1c3e105898747ba5L,
+        0x98c4e13a6e1ae592L } },
+    /* 22 << 203 */
+    { { 0xd44636e6e82c9f9eL,0x711db87cc33a1043L,0x6f431263aa8aec05L,
+        0x43ff120d2744a4aaL },
+      { 0xd3bd892fae77779bL,0xf0fe0cc98cdc9f82L,0xca5f7fe6f1c5b1bcL,
+        0xcc63a68244929a72L } },
+    /* 23 << 203 */
+    { { 0xc7eaba0c09dbe19aL,0x2f3585ad6b5c73c2L,0x8ab8924b0ae50c30L,
+        0x17fcd27a638b30baL },
+      { 0xaf414d3410b3d5a5L,0x09c107d22a9accf1L,0x15dac49f946a6242L,
+        0xaec3df2ad707d642L } },
+    /* 24 << 203 */
+    { { 0x2c2492b73f894ae0L,0xf59df3e5b75f18ceL,0x7cb740d28f53cad0L,
+        0x3eb585fbc4f01294L },
+      { 0x17da0c8632c7f717L,0xeb8c795baf943f4cL,0x4ee23fb5f67c51d2L,
+        0xef18757568889949L } },
+    /* 25 << 203 */
+    { { 0xa6b4bdb20389168bL,0xc4ecd258ea577d03L,0x3a63782b55743082L,
+        0x6f678f4cc72f08cdL },
+      { 0x553511cf65e58dd8L,0xd53b4e3ed402c0cdL,0x37de3e29a037c14cL,
+        0x86b6c516c05712aaL } },
+    /* 26 << 203 */
+    { { 0x2834da3eb38dff6fL,0xbe012c52ea636be8L,0x292d238c61dd37f8L,
+        0x0e54523f8f8142dbL },
+      { 0xe31eb436036a05d8L,0x83e3cdff1e93c0ffL,0x3fd2fe0f50821ddfL,
+        0xc8e19b0dff9eb33bL } },
+    /* 27 << 203 */
+    { { 0xc8cc943fb569a5feL,0xad0090d4d4342d75L,0x82090b4bcaeca000L,
+        0xca39687f1bd410ebL },
+      { 0xe7bb0df765959d77L,0x39d782189c964999L,0xd87f62e8b2415451L,
+        0xe5efb774bed76108L } },
+    /* 28 << 203 */
+    { { 0x3ea011a4e822f0d0L,0xbc647ad15a8704f8L,0xbb315b3550c6820fL,
+        0x863dec3db7e76becL },
+      { 0x01ff5d3af017bfc7L,0x20054439976b8229L,0x067fca370bbd0d3bL,
+        0xf63dde647f5e3d0fL } },
+    /* 29 << 203 */
+    { { 0x22dbefb32a4c94e9L,0xafbff0fe96f8278aL,0x80aea0b13503793dL,
+        0xb22380295f06cd29L },
+      { 0x65703e578ec3fecaL,0x06c38314393e7053L,0xa0b751eb7c6734c4L,
+        0xd2e8a435c59f0f1eL } },
+    /* 30 << 203 */
+    { { 0x147d90525e9ca895L,0x2f4dd31e972072dfL,0xa16fda8ee6c6755cL,
+        0xc66826ffcf196558L },
+      { 0x1f1a76a30cf43895L,0xa9d604e083c3097bL,0xe190830966390e0eL,
+        0xa50bf753b3c85effL } },
+    /* 31 << 203 */
+    { { 0x0696bddef6a70251L,0x548b801b3c6ab16aL,0x37fcf704a4d08762L,
+        0x090b3defdff76c4eL },
+      { 0x87e8cb8969cb9158L,0x44a90744995ece43L,0xf85395f40ad9fbf5L,
+        0x49b0f6c54fb0c82dL } },
+    /* 32 << 203 */
+    { { 0x75d9bc15adf7cccfL,0x81a3e5d6dfa1e1b0L,0x8c39e444249bc17eL,
+        0xf37dccb28ea7fd43L },
+      { 0xda654873907fba12L,0x35daa6da4a372904L,0x0564cfc66283a6c5L,
+        0xd09fa4f64a9395bfL } },
+    /* 33 << 203 */
+    { { 0x688e9ec9aeb19a36L,0xd913f1cec7bfbfb4L,0x797b9a3c61c2faa6L,
+        0x2f979bec6a0a9c12L },
+      { 0xb5969d0f359679ecL,0xebcf523d079b0460L,0xfd6b000810fab870L,
+        0x3f2edcda9373a39cL } },
+    /* 34 << 203 */
+    { { 0x0d64f9a76f568431L,0xf848c27c02f8898cL,0xf418ade1260b5bd5L,
+        0xc1f3e3236973dee8L },
+      { 0x46e9319c26c185ddL,0x6d85b7d8546f0ac4L,0x427965f2247f9d57L,
+        0xb519b636b0035f48L } },
+    /* 35 << 203 */
+    { { 0x6b6163a9ab87d59cL,0xff9f58c339caaa11L,0x4ac39cde3177387bL,
+        0x5f6557c2873e77f9L },
+      { 0x6750400636a83041L,0x9b1c96ca75ef196cL,0xf34283deb08c7940L,
+        0x7ea096441128c316L } },
+    /* 36 << 203 */
+    { { 0xb510b3b56aa39dffL,0x59b43da29f8e4d8cL,0xa8ce31fd9e4c4b9fL,
+        0x0e20be26c1303c01L },
+      { 0x18187182e8ee47c9L,0xd9687cdb7db98101L,0x7a520e4da1e14ff6L,
+        0x429808ba8836d572L } },
+    /* 37 << 203 */
+    { { 0xa37ca60d4944b663L,0xf901f7a9a3f91ae5L,0xe4e3e76e9e36e3b1L,
+        0x9aa219cf29d93250L },
+      { 0x347fe275056a2512L,0xa4d643d9de65d95cL,0x9669d396699fc3edL,
+        0xb598dee2cf8c6bbeL } },
+    /* 38 << 203 */
+    { { 0x682ac1e5dda9e5c6L,0x4e0d3c72caa9fc95L,0x17faaade772bea44L,
+        0x5ef8428cab0009c8L },
+      { 0xcc4ce47a460ff016L,0xda6d12bf725281cbL,0x44c678480223aad2L,
+        0x6e342afa36256e28L } },
+    /* 39 << 203 */
+    { { 0x1400bb0b93a37c04L,0x62b1bc9bdd10bd96L,0x7251adeb0dac46b7L,
+        0x7d33b92e7be4ef51L },
+      { 0x28b2a94be61fa29aL,0x4b2be13f06422233L,0x36d6d062330d8d37L,
+        0x5ef80e1eb28ca005L } },
+    /* 40 << 203 */
+    { { 0x174d46996d16768eL,0x9fc4ff6a628bf217L,0x77705a94154e490dL,
+        0x9d96dd288d2d997aL },
+      { 0x77e2d9d8ce5d72c4L,0x9d06c5a4c11c714fL,0x02aa513679e4a03eL,
+        0x1386b3c2030ff28bL } },
+    /* 41 << 203 */
+    { { 0xfe82e8a6fb283f61L,0x7df203e5f3abc3fbL,0xeec7c3513a4d3622L,
+        0xf7d17dbfdf762761L },
+      { 0xc3956e44522055f0L,0xde3012db8fa748dbL,0xca9fcb63bf1dcc14L,
+        0xa56d9dcfbe4e2f3aL } },
+    /* 42 << 203 */
+    { { 0xb86186b68bcec9c2L,0x7cf24df9680b9f06L,0xc46b45eac0d29281L,
+        0xfff42bc507b10e12L },
+      { 0x12263c404d289427L,0x3d5f1899b4848ec4L,0x11f97010d040800cL,
+        0xb4c5f529300feb20L } },
+    /* 43 << 203 */
+    { { 0xcc543f8fde94fdcbL,0xe96af739c7c2f05eL,0xaa5e0036882692e1L,
+        0x09c75b68950d4ae9L },
+      { 0x62f63df2b5932a7aL,0x2658252ede0979adL,0x2a19343fb5e69631L,
+        0x718c7501525b666bL } },
+    /* 44 << 203 */
+    { { 0x26a42d69ea40dc3aL,0xdc84ad22aecc018fL,0x25c36c7b3270f04aL,
+        0x46ba6d4750fa72edL },
+      { 0x6c37d1c593e58a8eL,0xa2394731120c088cL,0xc3be4263cb6e86daL,
+        0x2c417d367126d038L } },
+    /* 45 << 203 */
+    { { 0x5b70f9c58b6f8efaL,0x671a2faa37718536L,0xd3ced3c6b539c92bL,
+        0xe56f1bd9a31203c2L },
+      { 0x8b096ec49ff3c8ebL,0x2deae43243491ceaL,0x2465c6eb17943794L,
+        0x5d267e6620586843L } },
+    /* 46 << 203 */
+    { { 0x9d3d116db07159d0L,0xae07a67fc1896210L,0x8fc84d87bb961579L,
+        0x30009e491c1f8dd6L },
+      { 0x8a8caf22e3132819L,0xcffa197cf23ab4ffL,0x58103a44205dd687L,
+        0x57b796c30ded67a2L } },
+    /* 47 << 203 */
+    { { 0x0b9c3a6ca1779ad7L,0xa33cfe2e357c09c5L,0x2ea293153db4a57eL,
+        0x919596958ebeb52eL },
+      { 0x118db9a6e546c879L,0x8e996df46295c8d6L,0xdd99048455ec806bL,
+        0x24f291ca165c1035L } },
+    /* 48 << 203 */
+    { { 0xcca523bb440e2229L,0x324673a273ef4d04L,0xaf3adf343e11ec39L,
+        0x6136d7f1dc5968d3L },
+      { 0x7a7b2899b053a927L,0x3eaa2661ae067ecdL,0x8549b9c802779cd9L,
+        0x061d7940c53385eaL } },
+    /* 49 << 203 */
+    { { 0x3e0ba883f06d18bdL,0x4ba6de53b2700843L,0xb966b668591a9e4dL,
+        0x93f675677f4fa0edL },
+      { 0x5a02711b4347237bL,0xbc041e2fe794608eL,0x55af10f570f73d8cL,
+        0xd2d4d4f7bb7564f7L } },
+    /* 50 << 203 */
+    { { 0xd7d27a89b3e93ce7L,0xf7b5a8755d3a2c1bL,0xb29e68a0255b218aL,
+        0xb533837e8af76754L },
+      { 0xd1b05a73579fab2eL,0xb41055a1ecd74385L,0xb2369274445e9115L,
+        0x2972a7c4f520274eL } },
+    /* 51 << 203 */
+    { { 0x6c08334ef678e68aL,0x4e4160f099b057edL,0x3cfe11b852ccb69aL,
+        0x2fd1823a21c8f772L },
+      { 0xdf7f072f3298f055L,0x8c0566f9fec74a6eL,0xe549e0195bb4d041L,
+        0x7c3930ba9208d850L } },
+    /* 52 << 203 */
+    { { 0xe07141fcaaa2902bL,0x539ad799e4f69ad3L,0xa6453f94813f9ffdL,
+        0xc58d3c48375bc2f7L },
+      { 0xb3326fad5dc64e96L,0x3aafcaa9b240e354L,0x1d1b0903aca1e7a9L,
+        0x4ceb97671211b8a0L } },
+    /* 53 << 203 */
+    { { 0xeca83e49e32a858eL,0x4c32892eae907badL,0xd5b42ab62eb9b494L,
+        0x7fde3ee21eabae1bL },
+      { 0x13b5ab09caf54957L,0xbfb028bee5f5d5d5L,0x928a06502003e2c0L,
+        0x90793aac67476843L } },
+    /* 54 << 203 */
+    { { 0x5e942e79c81710a0L,0x557e4a3627ccadd4L,0x72a2bc564bcf6d0cL,
+        0x09ee5f4326d7b80cL },
+      { 0x6b70dbe9d4292f19L,0x56f74c2663f16b18L,0xc23db0f735fbb42aL,
+        0xb606bdf66ae10040L } },
+    /* 55 << 203 */
+    { { 0x1eb15d4d044573acL,0x7dc3cf86556b0ba4L,0x97af9a33c60df6f7L,
+        0x0b1ef85ca716ce8cL },
+      { 0x2922f884c96958beL,0x7c32fa9435690963L,0x2d7f667ceaa00061L,
+        0xeaaf7c173547365cL } },
+    /* 56 << 203 */
+    { { 0x1eb4de4687032d58L,0xc54f3d835e2c79e0L,0x07818df45d04ef23L,
+        0x55faa9c8673d41b4L },
+      { 0xced64f6f89b95355L,0x4860d2eab7415c84L,0x5fdb9bd2050ebad3L,
+        0xdb53e0cc6685a5bfL } },
+    /* 57 << 203 */
+    { { 0xb830c0319feb6593L,0xdd87f3106accff17L,0x2303ebab9f555c10L,
+        0x94603695287e7065L },
+      { 0xf88311c32e83358cL,0x508dd9b4eefb0178L,0x7ca237062dba8652L,
+        0x62aac5a30047abe5L } },
+    /* 58 << 203 */
+    { { 0x9a61d2a08b1ea7b3L,0xd495ab63ae8b1485L,0x38740f8487052f99L,
+        0x178ebe5bb2974eeaL },
+      { 0x030bbcca5b36d17fL,0xb5e4cce3aaf86eeaL,0xb51a022068f8e9e0L,
+        0xa434879609eb3e75L } },
+    /* 59 << 203 */
+    { { 0xbe592309eef1a752L,0x5d7162d76f2aa1edL,0xaebfb5ed0f007dd2L,
+        0x255e14b2c89edd22L },
+      { 0xba85e0720303b697L,0xc5d17e25f05720ffL,0x02b58d6e5128ebb6L,
+        0x2c80242dd754e113L } },
+    /* 60 << 203 */
+    { { 0x919fca5fabfae1caL,0x937afaac1a21459bL,0x9e0ca91c1f66a4d2L,
+        0x194cc7f323ec1331L },
+      { 0xad25143a8aa11690L,0xbe40ad8d09b59e08L,0x37d60d9be750860aL,
+        0x6c53b008c6bf434cL } },
+    /* 61 << 203 */
+    { { 0xb572415d1356eb80L,0xb8bf9da39578ded8L,0x22658e365e8fb38bL,
+        0x9b70ce225af8cb22L },
+      { 0x7c00018a829a8180L,0x84329f93b81ed295L,0x7c343ea25f3cea83L,
+        0x38f8655f67586536L } },
+    /* 62 << 203 */
+    { { 0xa661a0d01d3ec517L,0x98744652512321aeL,0x084ca591eca92598L,
+        0xa9bb9dc91dcb3febL },
+      { 0x14c5435578b4c240L,0x5ed62a3b610cafdcL,0x07512f371b38846bL,
+        0x571bb70ab0e38161L } },
+    /* 63 << 203 */
+    { { 0xb556b95b2da705d2L,0x3ef8ada6b1a08f98L,0x85302ca7ddecfbe5L,
+        0x0e530573943105cdL },
+      { 0x60554d5521a9255dL,0x63a32fa1f2f3802aL,0x35c8c5b0cd477875L,
+        0x97f458ea6ad42da1L } },
+    /* 64 << 203 */
+    { { 0x832d7080eb6b242dL,0xd30bd0233b71e246L,0x7027991bbe31139dL,
+        0x68797e91462e4e53L },
+      { 0x423fe20a6b4e185aL,0x82f2c67e42d9b707L,0x25c817684cf7811bL,
+        0xbd53005e045bb95dL } },
+    /* 0 << 210 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 210 */
+    { { 0xe5f649be9d8e68fdL,0xdb0f05331b044320L,0xf6fde9b3e0c33398L,
+        0x92f4209b66c8cfaeL },
+      { 0xe9d1afcc1a739d4bL,0x09aea75fa28ab8deL,0x14375fb5eac6f1d0L,
+        0x6420b560708f7aa5L } },
+    /* 2 << 210 */
+    { { 0x9eae499c6254dc41L,0x7e2939247a837e7eL,0x74aec08c090524a7L,
+        0xf82b92198d6f55f2L },
+      { 0x493c962e1402cec5L,0x9f17ca17fa2f30e7L,0xbcd783e8e9b879cbL,
+        0xea3d8c145a6f145fL } },
+    /* 3 << 210 */
+    { { 0xdede15e75e0dee6eL,0x74f24872dc628aa2L,0xd3e9c4fe7861bb93L,
+        0x56d4822a6187b2e0L },
+      { 0xb66417cfc59826f9L,0xca2609692408169eL,0xedf69d06c79ef885L,
+        0x00031f8adc7d138fL } },
+    /* 4 << 210 */
+    { { 0x103c46e60ebcf726L,0x4482b8316231470eL,0x6f6dfaca487c2109L,
+        0x2e0ace9762e666efL },
+      { 0x3246a9d31f8d1f42L,0x1b1e83f1574944d2L,0x13dfa63aa57f334bL,
+        0x0cf8daed9f025d81L } },
+    /* 5 << 210 */
+    { { 0x30d78ea800ee11c1L,0xeb053cd4b5e3dd75L,0x9b65b13ed58c43c5L,
+        0xc3ad49bdbd151663L },
+      { 0x99fd8e41b6427990L,0x12cf15bd707eae1eL,0x29ad4f1b1aabb71eL,
+        0x5143e74d07545d0eL } },
+    /* 6 << 210 */
+    { { 0x30266336c88bdee1L,0x25f293065876767cL,0x9c078571c6731996L,
+        0xc88690b2ed552951L },
+      { 0x274f2c2d852705b4L,0xb0bf8d444e09552dL,0x7628beeb986575d1L,
+        0x407be2387f864651L } },
+    /* 7 << 210 */
+    { { 0x0e5e3049a639fc6bL,0xe75c35d986003625L,0x0cf35bd85dcc1646L,
+        0x8bcaced26c26273aL },
+      { 0xe22ecf1db5536742L,0x013dd8971a9e068bL,0x17f411cb8a7909c5L,
+        0x5757ac98861dd506L } },
+    /* 8 << 210 */
+    { { 0x85de1f0d1e935abbL,0xdefd10b4154de37aL,0xb8d9e392369cebb5L,
+        0x54d5ef9b761324beL },
+      { 0x4d6341ba74f17e26L,0xc0a0e3c878c1dde4L,0xa6d7758187d918fdL,
+        0x6687601502ca3a13L } },
+    /* 9 << 210 */
+    { { 0xc7313e9cf36658f0L,0xc433ef1c71f8057eL,0x853262461b6a835aL,
+        0xc8f053987c86394cL },
+      { 0xff398cdfe983c4a1L,0xbf5e816203b7b931L,0x93193c46b7b9045bL,
+        0x1e4ebf5da4a6e46bL } },
+    /* 10 << 210 */
+    { { 0xf9942a6043a24fe7L,0x29c1191effb3492bL,0x9f662449902fde05L,
+        0xc792a7ac6713c32dL },
+      { 0x2fd88ad8b737982cL,0x7e3a0319a21e60e3L,0x09b0de447383591aL,
+        0x6df141ee8310a456L } },
+    /* 11 << 210 */
+    { { 0xaec1a039e6d6f471L,0x14b2ba0f1198d12eL,0xebc1a1603aeee5acL,
+        0x401f4836e0b964ceL },
+      { 0x2ee437964fd03f66L,0x3fdb4e49dd8f3f12L,0x6ef267f629380f18L,
+        0x3e8e96708da64d16L } },
+    /* 12 << 210 */
+    { { 0xbc19180c207674f1L,0x112e09a733ae8fdbL,0x996675546aaeb71eL,
+        0x79432af1e101b1c7L },
+      { 0xd5eb558fde2ddec6L,0x81392d1f5357753fL,0xa7a76b973ae1158aL,
+        0x416fbbff4a899991L } },
+    /* 13 << 210 */
+    { { 0x9e65fdfd0d4a9dcfL,0x7bc29e48944ddf12L,0xbc1a92d93c856866L,
+        0x273c69056e98dfe2L },
+      { 0x69fce418cdfaa6b8L,0x606bd8235061c69fL,0x42d495a06af75e27L,
+        0x8ed3d5056d873a1fL } },
+    /* 14 << 210 */
+    { { 0xaf5528416ab25b6aL,0xc6c0ffc72b1a4523L,0xab18827b21c99e03L,
+        0x060e86489034691bL },
+      { 0x5207f90f93c7f398L,0x9f4a96cb82f8d10bL,0xdd71cd793ad0f9e3L,
+        0x84f435d2fc3a54f5L } },
+    /* 15 << 210 */
+    { { 0x4b03c55b8e33787fL,0xef42f975a6384673L,0xff7304f75051b9f0L,
+        0x18aca1dc741c87c2L },
+      { 0x56f120a72d4bfe80L,0xfd823b3d053e732cL,0x11bccfe47537ca16L,
+        0xdf6c9c741b5a996bL } },
+    /* 16 << 210 */
+    { { 0xee7332c7904fc3faL,0x14a23f45c7e3636aL,0xc38659c3f091d9aaL,
+        0x4a995e5db12d8540L },
+      { 0x20a53becf3a5598aL,0x56534b17b1eaa995L,0x9ed3dca4bf04e03cL,
+        0x716c563ad8d56268L } },
+    /* 17 << 210 */
+    { { 0x27ba77a41d6178e7L,0xe4c80c4068a1ff8eL,0x750110990a13f63dL,
+        0x7bf33521a61d46f3L },
+      { 0x0aff218e10b365bbL,0x810218040fd7ea75L,0x05a3fd8aa4b3a925L,
+        0xb829e75f9b3db4e6L } },
+    /* 18 << 210 */
+    { { 0x6bdc75a54d53e5fbL,0x04a5dc02d52717e3L,0x86af502fe9a42ec2L,
+        0x8867e8fb2630e382L },
+      { 0xbf845c6ebec9889bL,0x54f491f2cb47c98dL,0xa3091fba790c2a12L,
+        0xd7f6fd78c20f708bL } },
+    /* 19 << 210 */
+    { { 0xa569ac30acde5e17L,0xd0f996d06852b4d7L,0xe51d4bb54609ae54L,
+        0x3fa37d170daed061L },
+      { 0x62a8868434b8fb41L,0x99a2acbd9efb64f1L,0xb75c1a5e6448e1f2L,
+        0xfa99951a42b5a069L } },
+    /* 20 << 210 */
+    { { 0x6d956e892f3b26e7L,0xf4709860da875247L,0x3ad151792482dda3L,
+        0xd64110e3017d82f0L },
+      { 0x14928d2cfad414e4L,0x2b155f582ed02b24L,0x481a141bcb821bf1L,
+        0x12e3c7704f81f5daL } },
+    /* 21 << 210 */
+    { { 0xe49c5de59fff8381L,0x110532325bbec894L,0xa0d051cc454d88c4L,
+        0x4f6db89c1f8e531bL },
+      { 0x34fe3fd6ca563a44L,0x7f5c221558da8ab9L,0x8445016d9474f0a1L,
+        0x17d34d61cb7d8a0aL } },
+    /* 22 << 210 */
+    { { 0x8e9d39101c474019L,0xcaff2629d52ceefbL,0xf9cf3e32c1622c2bL,
+        0xd4b95e3ce9071a05L },
+      { 0xfbbca61f1594438cL,0x1eb6e6a604aadedfL,0x853027f468e14940L,
+        0x221d322adfabda9cL } },
+    /* 23 << 210 */
+    { { 0xed8ea9f6b7cb179aL,0xdc7b764db7934dccL,0xfcb139405e09180dL,
+        0x6629a6bfb47dc2ddL },
+      { 0xbfc55e4e9f5a915eL,0xb1db9d376204441eL,0xf82d68cf930c5f53L,
+        0x17d3a142cbb605b1L } },
+    /* 24 << 210 */
+    { { 0xdd5944ea308780f2L,0xdc8de7613845f5e4L,0x6beaba7d7624d7a3L,
+        0x1e709afd304df11eL },
+      { 0x9536437602170456L,0xbf204b3ac8f94b64L,0x4e53af7c5680ca68L,
+        0x0526074ae0c67574L } },
+    /* 25 << 210 */
+    { { 0x95d8cef8ecd92af6L,0xe6b9fa7a6cd1745aL,0x3d546d3da325c3e4L,
+        0x1f57691d9ae93aaeL },
+      { 0xe891f3fe9d2e1a33L,0xd430093fac063d35L,0xeda59b125513a327L,
+        0xdc2134f35536f18fL } },
+    /* 26 << 210 */
+    { { 0xaa51fe2c5c210286L,0x3f68aaee1cab658cL,0x5a23a00bf9357292L,
+        0x9a626f397efdabedL },
+      { 0xfe2b3bf3199d78e3L,0xb7a2af7771bbc345L,0x3d19827a1e59802cL,
+        0x823bbc15b487a51cL } },
+    /* 27 << 210 */
+    { { 0x856139f299d0a422L,0x9ac3df65f456c6fbL,0xaddf65c6701f8bd6L,
+        0x149f321e3758df87L },
+      { 0xb1ecf714721b7ebaL,0xe17df09831a3312aL,0xdb2fd6ecd5c4d581L,
+        0xfd02996f8fcea1b3L } },
+    /* 28 << 210 */
+    { { 0xe29fa63e7882f14fL,0xc9f6dc3507c6cadcL,0x46f22d6fb882bed0L,
+        0x1a45755bd118e52cL },
+      { 0x9f2c7c277c4608cfL,0x7ccbdf32568012c2L,0xfcb0aedd61729b0eL,
+        0x7ca2ca9ef7d75dbfL } },
+    /* 29 << 210 */
+    { { 0xf58fecb16f640f62L,0xe274b92b39f51946L,0x7f4dfc046288af44L,
+        0x0a91f32aeac329e5L },
+      { 0x43ad274bd6aaba31L,0x719a16400f6884f9L,0x685d29f6daf91e20L,
+        0x5ec1cc3327e49d52L } },
+    /* 30 << 210 */
+    { { 0x38f4de963b54a059L,0x0e0015e5efbcfdb3L,0x177d23d94dbb8da6L,
+        0x98724aa297a617adL },
+      { 0x30f0885bfdb6558eL,0xf9f7a28ac7899a96L,0xd2ae8ac8872dc112L,
+        0xfa0642ca73c3c459L } },
+    /* 31 << 210 */
+    { { 0x15296981e7dfc8d6L,0x67cd44501fb5b94aL,0x0ec71cf10eddfd37L,
+        0xc7e5eeb39a8eddc7L },
+      { 0x02ac8e3d81d95028L,0x0088f17270b0e35dL,0xec041fabe1881fe3L,
+        0x62cf71b8d99e7faaL } },
+    /* 32 << 210 */
+    { { 0x5043dea7e0f222c2L,0x309d42ac72e65142L,0x94fe9ddd9216cd30L,
+        0xd6539c7d0f87feecL },
+      { 0x03c5a57c432ac7d7L,0x72692cf0327fda10L,0xec28c85f280698deL,
+        0x2331fb467ec283b1L } },
+    /* 33 << 210 */
+    { { 0xd34bfa322867e633L,0x78709a820a9cc815L,0xb7fe6964875e2fa5L,
+        0x25cc064f9e98bfb5L },
+      { 0x9eb0151c493a65c5L,0x5fb5d94153182464L,0x69e6f130f04618e2L,
+        0xa8ecec22f89c8ab6L } },
+    /* 34 << 210 */
+    { { 0xcd6ac88bb96209bdL,0x65fa8cdbb3e1c9e0L,0xa47d22f54a8d8eacL,
+        0x83895cdf8d33f963L },
+      { 0xa8adca59b56cd3d1L,0x10c8350bdaf38232L,0x2b161fb3a5080a9fL,
+        0xbe7f5c643af65b3aL } },
+    /* 35 << 210 */
+    { { 0x2c75403997403a11L,0x94626cf7121b96afL,0x431de7c46a983ec2L,
+        0x3780dd3a52cc3df7L },
+      { 0xe28a0e462baf8e3bL,0xabe68aad51d299aeL,0x603eb8f9647a2408L,
+        0x14c61ed65c750981L } },
+    /* 36 << 210 */
+    { { 0x88b34414c53352e7L,0x5a34889c1337d46eL,0x612c1560f95f2bc8L,
+        0x8a3f8441d4807a3aL },
+      { 0x680d9e975224da68L,0x60cd6e88c3eb00e9L,0x3875a98e9a6bc375L,
+        0xdc80f9244fd554c2L } },
+    /* 37 << 210 */
+    { { 0x6c4b34156ac77407L,0xa1e5ea8f25420681L,0x541bfa144607a458L,
+        0x5dbc7e7a96d7fbf9L },
+      { 0x646a851b31590a47L,0x039e85ba15ee6df8L,0xd19fa231d7b43fc0L,
+        0x84bc8be8299a0e04L } },
+    /* 38 << 210 */
+    { { 0x2b9d2936f20df03aL,0x240543828608d472L,0x76b6ba049149202aL,
+        0xb21c38313670e7b7L },
+      { 0xddd93059d6fdee10L,0x9da47ad378488e71L,0x99cc1dfda0fcfb25L,
+        0x42abde1064696954L } },
+    /* 39 << 210 */
+    { { 0x14cc15fc17eab9feL,0xd6e863e4d3e70972L,0x29a7765c6432112cL,
+        0x886600015b0774d8L },
+      { 0x3729175a2c088eaeL,0x13afbcae8230b8d4L,0x44768151915f4379L,
+        0xf086431ad8d22812L } },
+    /* 40 << 210 */
+    { { 0x37461955c298b974L,0x905fb5f0f8711e04L,0x787abf3afe969d18L,
+        0x392167c26f6a494eL },
+      { 0xfc7a0d2d28c511daL,0xf127c7dcb66a262dL,0xf9c4bb95fd63fdf0L,
+        0x900165893913ef46L } },
+    /* 41 << 210 */
+    { { 0x74d2a73c11aa600dL,0x2f5379bd9fb5ab52L,0xe49e53a47fb70068L,
+        0x68dd39e5404aa9a7L },
+      { 0xb9b0cf572ecaa9c3L,0xba0e103be824826bL,0x60c2198b4631a3c4L,
+        0xc5ff84abfa8966a2L } },
+    /* 42 << 210 */
+    { { 0x2d6ebe22ac95aff8L,0x1c9bb6dbb5a46d09L,0x419062da53ee4f8dL,
+        0x7b9042d0bb97efefL },
+      { 0x0f87f080830cf6bdL,0x4861d19a6ec8a6c6L,0xd3a0daa1202f01aaL,
+        0xb0111674f25afbd5L } },
+    /* 43 << 210 */
+    { { 0x6d00d6cf1afb20d9L,0x1369500040671bc5L,0x913ab0dc2485ea9bL,
+        0x1f2bed069eef61acL },
+      { 0x850c82176d799e20L,0x93415f373271c2deL,0x5afb06e96c4f5910L,
+        0x688a52dfc4e9e421L } },
+    /* 44 << 210 */
+    { { 0x30495ba3e2a9a6dbL,0x4601303d58f9268bL,0xbe3b0dad7eb0f04fL,
+        0x4ea472504456936dL },
+      { 0x8caf8798d33fd3e7L,0x1ccd8a89eb433708L,0x9effe3e887fd50adL,
+        0xbe240a566b29c4dfL } },
+    /* 45 << 210 */
+    { { 0xec4ffd98ca0e7ebdL,0xf586783ae748616eL,0xa5b00d8fc77baa99L,
+        0x0acada29b4f34c9cL },
+      { 0x36dad67d0fe723acL,0x1d8e53a539c36c1eL,0xe4dd342d1f4bea41L,
+        0x64fd5e35ebc9e4e0L } },
+    /* 46 << 210 */
+    { { 0x96f01f9057908805L,0xb5b9ea3d5ed480ddL,0x366c5dc23efd2dd0L,
+        0xed2fe3056e9dfa27L },
+      { 0x4575e8926e9197e2L,0x11719c09ab502a5dL,0x264c7bece81f213fL,
+        0x741b924155f5c457L } },
+    /* 47 << 210 */
+    { { 0x78ac7b6849a5f4f4L,0xf91d70a29fc45b7dL,0x39b05544b0f5f355L,
+        0x11f06bceeef930d9L },
+      { 0xdb84d25d038d05e1L,0x04838ee5bacc1d51L,0x9da3ce869e8ee00bL,
+        0xc3412057c36eda1fL } },
+    /* 48 << 210 */
+    { { 0xae80b91364d9c2f4L,0x7468bac3a010a8ffL,0xdfd2003737359d41L,
+        0x1a0f5ab815efeaccL },
+      { 0x7c25ad2f659d0ce0L,0x4011bcbb6785cff1L,0x128b99127e2192c7L,
+        0xa549d8e113ccb0e8L } },
+    /* 49 << 210 */
+    { { 0x805588d8c85438b1L,0x5680332dbc25cb27L,0xdcd1bc961a4bfdf4L,
+        0x779ff428706f6566L },
+      { 0x8bbee998f059987aL,0xf6ce8cf2cc686de7L,0xf8ad3c4a953cfdb2L,
+        0xd1d426d92205da36L } },
+    /* 50 << 210 */
+    { { 0xb3c0f13fc781a241L,0x3e89360ed75362a8L,0xccd05863c8a91184L,
+        0x9bd0c9b7efa8a7f4L },
+      { 0x97ee4d538a912a4bL,0xde5e15f8bcf518fdL,0x6a055bf8c467e1e0L,
+        0x10be4b4b1587e256L } },
+    /* 51 << 210 */
+    { { 0xd90c14f2668621c9L,0xd5518f51ab9c92c1L,0x8e6a0100d6d47b3cL,
+        0xcbe980dd66716175L },
+      { 0x500d3f10ddd83683L,0x3b6cb35d99cac73cL,0x53730c8b6083d550L,
+        0xcf159767df0a1987L } },
+    /* 52 << 210 */
+    { { 0x84bfcf5343ad73b3L,0x1b528c204f035a94L,0x4294edf733eeac69L,
+        0xb6283e83817f3240L },
+      { 0xc3fdc9590a5f25b1L,0xefaf8aa55844ee22L,0xde269ba5dbdde4deL,
+        0xe3347160c56133bfL } },
+    /* 53 << 210 */
+    { { 0xc11842198d9ea9f8L,0x090de5dbf3fc1ab5L,0x404c37b10bf22cdaL,
+        0x7de20ec8f5618894L },
+      { 0x754c588eecdaecabL,0x6ca4b0ed88342743L,0x76f08bddf4a938ecL,
+        0xd182de8991493ccbL } },
+    /* 54 << 210 */
+    { { 0xd652c53ec8a4186aL,0xb3e878db946d8e33L,0x088453c05f37663cL,
+        0x5cd9daaab407748bL },
+      { 0xa1f5197f586d5e72L,0x47500be8c443ca59L,0x78ef35b2e2652424L,
+        0x09c5d26f6dd7767dL } },
+    /* 55 << 210 */
+    { { 0x7175a79aa74d3f7bL,0x0428fd8dcf5ea459L,0x511cb97ca5d1746dL,
+        0x36363939e71d1278L },
+      { 0xcf2df95510350bf4L,0xb381743960aae782L,0xa748c0e43e688809L,
+        0x98021fbfd7a5a006L } },
+    /* 56 << 210 */
+    { { 0x9076a70c0e367a98L,0xbea1bc150f62b7c2L,0x2645a68c30fe0343L,
+        0xacaffa78699dc14fL },
+      { 0xf4469964457bf9c4L,0x0db6407b0d2ead83L,0x68d56cadb2c6f3ebL,
+        0x3b512e73f376356cL } },
+    /* 57 << 210 */
+    { { 0xe43b0e1ffce10408L,0x89ddc0035a5e257dL,0xb0ae0d120362e5b3L,
+        0x07f983c7b0519161L },
+      { 0xc2e94d155d5231e7L,0xcff22aed0b4f9513L,0xb02588dd6ad0b0b5L,
+        0xb967d1ac11d0dcd5L } },
+    /* 58 << 210 */
+    { { 0x8dac6bc6cf777b6cL,0x0062bdbd4c6d1959L,0x53da71b50ef5cc85L,
+        0x07012c7d4006f14fL },
+      { 0x4617f962ac47800dL,0x53365f2bc102ed75L,0xb422efcb4ab8c9d3L,
+        0x195cb26b34af31c9L } },
+    /* 59 << 210 */
+    { { 0x3a926e2905f2c4ceL,0xbd2bdecb9856966cL,0x5d16ab3a85527015L,
+        0x9f81609e4486c231L },
+      { 0xd8b96b2cda350002L,0xbd054690fa1b7d36L,0xdc90ebf5e71d79bcL,
+        0xf241b6f908964e4eL } },
+    /* 60 << 210 */
+    { { 0x7c8386432fe3cd4cL,0xe0f33acbb4bc633cL,0xb4a9ecec3d139f1fL,
+        0x05ce69cddc4a1f49L },
+      { 0xa19d1b16f5f98aafL,0x45bb71d66f23e0efL,0x33789fcd46cdfdd3L,
+        0x9b8e2978cee040caL } },
+    /* 61 << 210 */
+    { { 0x9c69b246ae0a6828L,0xba533d247078d5aaL,0x7a2e42c07bb4fbdbL,
+        0xcfb4879a7035385cL },
+      { 0x8c3dd30b3281705bL,0x7e361c6c404fe081L,0x7b21649c3f604edfL,
+        0x5dbf6a3fe52ffe47L } },
+    /* 62 << 210 */
+    { { 0xc41b7c234b54d9bfL,0x1374e6813511c3d9L,0x1863bf16c1b2b758L,
+        0x90e785071e9e6a96L },
+      { 0xab4bf98d5d86f174L,0xd74e0bd385e96fe4L,0x8afde39fcac5d344L,
+        0x90946dbcbd91b847L } },
+    /* 63 << 210 */
+    { { 0xf5b42358fe1a838cL,0x05aae6c5620ac9d8L,0x8e193bd8a1ce5a0bL,
+        0x8f7105714dabfd72L },
+      { 0x8d8fdd48182caaacL,0x8c4aeefa040745cfL,0x73c6c30af3b93e6dL,
+        0x991241f316f42011L } },
+    /* 64 << 210 */
+    { { 0xa0158eeae457a477L,0xd19857dbee6ddc05L,0xb326522418c41671L,
+        0x3ffdfc7e3c2c0d58L },
+      { 0x3a3a525426ee7cdaL,0x341b0869df02c3a8L,0xa023bf42723bbfc8L,
+        0x3d15002a14452691L } },
+    /* 0 << 217 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 217 */
+    { { 0x5ef7324c85edfa30L,0x2597655487d4f3daL,0x352f5bc0dcb50c86L,
+        0x8f6927b04832a96cL },
+      { 0xd08ee1ba55f2f94cL,0x6a996f99344b45faL,0xe133cb8da8aa455dL,
+        0x5d0721ec758dc1f7L } },
+    /* 2 << 217 */
+    { { 0x6ba7a92079e5fb67L,0xe1331feb70aa725eL,0x5080ccf57df5d837L,
+        0xe4cae01d7ff72e21L },
+      { 0xd9243ee60412a77dL,0x06ff7cacdf449025L,0xbe75f7cd23ef5a31L,
+        0xbc9578220ddef7a8L } },
+    /* 3 << 217 */
+    { { 0x8cf7230cb0ce1c55L,0x5b534d050bbfb607L,0xee1ef1130e16363bL,
+        0x27e0aa7ab4999e82L },
+      { 0xce1dac2d79362c41L,0x67920c9091bb6cb0L,0x1e648d632223df24L,
+        0x0f7d9eefe32e8f28L } },
+    /* 4 << 217 */
+    { { 0x6943f39afa833834L,0x22951722a6328562L,0x81d63dd54170fc10L,
+        0x9f5fa58faecc2e6dL },
+      { 0xb66c8725e77d9a3bL,0x11235cea6384ebe0L,0x06a8c1185845e24aL,
+        0x0137b286ebd093b1L } },
+    /* 5 << 217 */
+    { { 0xc589e1ce44ace150L,0xe0f8d3d94381e97cL,0x59e99b1162c5a4b8L,
+        0x90d262f7fd0ec9f9L },
+      { 0xfbc854c9283e13c9L,0x2d04fde7aedc7085L,0x057d776547dcbecbL,
+        0x8dbdf5919a76fa5fL } },
+    /* 6 << 217 */
+    { { 0xd01506950de1e578L,0x2e1463e7e9f72bc6L,0xffa684411b39eca5L,
+        0x673c85307c037f2fL },
+      { 0xd0d6a600747f91daL,0xb08d43e1c9cb78e9L,0x0fc0c64427b5cef5L,
+        0x5c1d160aa60a2fd6L } },
+    /* 7 << 217 */
+    { { 0xf98cae5328c8e13bL,0x375f10c4b2eddcd1L,0xd4eb8b7f5cce06adL,
+        0xb4669f4580a2e1efL },
+      { 0xd593f9d05bbd8699L,0x5528a4c9e7976d13L,0x3923e0951c7e28d3L,
+        0xb92937903f6bb577L } },
+    /* 8 << 217 */
+    { { 0xdb567d6ac42bd6d2L,0x6df86468bb1f96aeL,0x0efe5b1a4843b28eL,
+        0x961bbb056379b240L },
+      { 0xb6caf5f070a6a26bL,0x70686c0d328e6e39L,0x80da06cf895fc8d3L,
+        0x804d8810b363fdc9L } },
+    /* 9 << 217 */
+    { { 0xbe22877b207f1670L,0x9b0dd1884e615291L,0x625ae8dc97a3c2bfL,
+        0x08584ef7439b86e8L },
+      { 0xde7190a5dcd898ffL,0x26286c402058ee3dL,0x3db0b2175f87b1c1L,
+        0xcc334771102a6db5L } },
+    /* 10 << 217 */
+    { { 0xd99de9542f770fb1L,0x97c1c6204cd7535eL,0xd3b6c4483f09cefcL,
+        0xd725af155a63b4f8L },
+      { 0x0c95d24fc01e20ecL,0xdfd374949ae7121fL,0x7d6ddb72ec77b7ecL,
+        0xfe079d3b0353a4aeL } },
+    /* 11 << 217 */
+    { { 0x3066e70a2e6ac8d2L,0x9c6b5a43106e5c05L,0x52d3c6f5ede59b8cL,
+        0x30d6a5c3fccec9aeL },
+      { 0xedec7c224fc0a9efL,0x190ff08395c16cedL,0xbe12ec8f94de0fdeL,
+        0x0d131ab8852d3433L } },
+    /* 12 << 217 */
+    { { 0x42ace07e85701291L,0x94793ed9194061a8L,0x30e83ed6d7f4a485L,
+        0x9eec7269f9eeff4dL },
+      { 0x90acba590c9d8005L,0x5feca4581e79b9d1L,0x8fbe54271d506a1eL,
+        0xa32b2c8e2439cfa7L } },
+    /* 13 << 217 */
+    { { 0x1671c17373dd0b4eL,0x37a2821444a054c6L,0x81760a1b4e8b53f1L,
+        0xa6c04224f9f93b9eL },
+      { 0x18784b34cf671e3cL,0x81bbecd2cda9b994L,0x38831979b2ab3848L,
+        0xef54feb7f2e03c2dL } },
+    /* 14 << 217 */
+    { { 0xcf197ca7fb8088faL,0x014272474ddc96c5L,0xa2d2550a30777176L,
+        0x534698984d0cf71dL },
+      { 0x6ce937b83a2aaac6L,0xe9f91dc35af38d9bL,0x2598ad83c8bf2899L,
+        0x8e706ac9b5536c16L } },
+    /* 15 << 217 */
+    { { 0x40dc7495f688dc98L,0x26490cd7124c4afcL,0xe651ec841f18775cL,
+        0x393ea6c3b4fdaf4aL },
+      { 0x1e1f33437f338e0dL,0x39fb832b6053e7b5L,0x46e702da619e14d5L,
+        0x859cacd1cdeef6e0L } },
+    /* 16 << 217 */
+    { { 0x63b99ce74462007dL,0xb8ab48a54cb5f5b7L,0x9ec673d2f55edde7L,
+        0xd1567f748cfaefdaL },
+      { 0x46381b6b0887bcecL,0x694497cee178f3c2L,0x5e6525e31e6266cbL,
+        0x5931de26697d6413L } },
+    /* 17 << 217 */
+    { { 0x87f8df7c0e58d493L,0xb1ae5ed058b73f12L,0xc368f784dea0c34dL,
+        0x9bd0a120859a91a0L },
+      { 0xb00d88b7cc863c68L,0x3a1cc11e3d1f4d65L,0xea38e0e70aa85593L,
+        0x37f13e987dc4aee8L } },
+    /* 18 << 217 */
+    { { 0x10d38667bc947badL,0x738e07ce2a36ee2eL,0xc93470cdc577fcacL,
+        0xdee1b6162782470dL },
+      { 0x36a25e672e793d12L,0xd6aa6caee0f186daL,0x474d0fd980e07af7L,
+        0xf7cdc47dba8a5cd4L } },
+    /* 19 << 217 */
+    { { 0x28af6d9dab15247fL,0x7c789c10493a537fL,0x7ac9b11023a334e7L,
+        0x0236ac0912c9c277L },
+      { 0xa7e5bd251d7a5144L,0x098b9c2af13ec4ecL,0x3639dacad3f0abcaL,
+        0x642da81aa23960f9L } },
+    /* 20 << 217 */
+    { { 0x7d2e5c054f7269b1L,0xfcf30777e287c385L,0x10edc84ff2a46f21L,
+        0x354417574f43fa36L },
+      { 0xf1327899fd703431L,0xa438d7a616dd587aL,0x65c34c57e9c8352dL,
+        0xa728edab5cc5a24eL } },
+    /* 21 << 217 */
+    { { 0xaed78abc42531689L,0x0a51a0e8010963efL,0x5776fa0ad717d9b3L,
+        0xf356c2397dd3428bL },
+      { 0x29903fff8d3a3dacL,0x409597fa3d94491fL,0x4cd7a5ffbf4a56a4L,
+        0xe50964748adab462L } },
+    /* 22 << 217 */
+    { { 0xa97b51265c3427b0L,0x6401405cd282c9bdL,0x3629f8d7222c5c45L,
+        0xb1c02c16e8d50aedL },
+      { 0xbea2ed75d9635bc9L,0x226790c76e24552fL,0x3c33f2a365f1d066L,
+        0x2a43463e6dfccc2eL } },
+    /* 23 << 217 */
+    { { 0x8cc3453adb483761L,0xe7cc608565d5672bL,0x277ed6cbde3efc87L,
+        0x19f2f36869234eafL },
+      { 0x9aaf43175c0b800bL,0x1f1e7c898b6da6e2L,0x6cfb4715b94ec75eL,
+        0xd590dd5f453118c2L } },
+    /* 24 << 217 */
+    { { 0x14e49da11f17a34cL,0x5420ab39235a1456L,0xb76372412f50363bL,
+        0x7b15d623c3fabb6eL },
+      { 0xa0ef40b1e274e49cL,0x5cf5074496b1860aL,0xd6583fbf66afe5a4L,
+        0x44240510f47e3e9aL } },
+    /* 25 << 217 */
+    { { 0x9925434311b2d595L,0xf1367499eec8df57L,0x3cb12c613e73dd05L,
+        0xd248c0337dac102aL },
+      { 0xcf154f13a77739f5L,0xbf4288cb23d2af42L,0xaa64c9b632e4a1cfL,
+        0xee8c07a8c8a208f3L } },
+    /* 26 << 217 */
+    { { 0xe10d49996fe8393fL,0x0f809a3fe91f3a32L,0x61096d1c802f63c8L,
+        0x289e146257750d3dL },
+      { 0xed06167e9889feeaL,0xd5c9c0e2e0993909L,0x46fca0d856508ac6L,
+        0x918260474f1b8e83L } },
+    /* 27 << 217 */
+    { { 0x4f2c877a9a4a2751L,0x71bd0072cae6feadL,0x38df8dcc06aa1941L,
+        0x5a074b4c63beeaa8L },
+      { 0xd6d65934c1cec8edL,0xa6ecb49eaabc03bdL,0xaade91c2de8a8415L,
+        0xcfb0efdf691136e0L } },
+    /* 28 << 217 */
+    { { 0x11af45ee23ab3495L,0xa132df880b77463dL,0x8923c15c815d06f4L,
+        0xc3ceb3f50d61a436L },
+      { 0xaf52291de88fb1daL,0xea0579741da12179L,0xb0d7218cd2fef720L,
+        0x6c0899c98e1d8845L } },
+    /* 29 << 217 */
+    { { 0x98157504752ddad7L,0xd60bd74fa1a68a97L,0x7047a3a9f658fb99L,
+        0x1f5d86d65f8511e4L },
+      { 0xb8a4bc424b5a6d88L,0x69eb2c331abefa7dL,0x95bf39e813c9c510L,
+        0xf571960ad48aab43L } },
+    /* 30 << 217 */
+    { { 0x7e8cfbcf704e23c6L,0xc71b7d2228aaa65bL,0xa041b2bd245e3c83L,
+        0x69b98834d21854ffL },
+      { 0x89d227a3963bfeecL,0x99947aaade7da7cbL,0x1d9ee9dbee68a9b1L,
+        0x0a08f003698ec368L } },
+    /* 31 << 217 */
+    { { 0xe9ea409478ef2487L,0xc8d2d41502cfec26L,0xc52f9a6eb7dcf328L,
+        0x0ed489e385b6a937L },
+      { 0x9b94986bbef3366eL,0x0de59c70edddddb8L,0xffdb748ceadddbe2L,
+        0x9b9784bb8266ea40L } },
+    /* 32 << 217 */
+    { { 0x142b55021a93507aL,0xb4cd11878d3c06cfL,0xdf70e76a91ec3f40L,
+        0x484e81ad4e7553c2L },
+      { 0x830f87b5272e9d6eL,0xea1c93e5c6ff514aL,0x67cc2adcc4192a8eL,
+        0xc77e27e242f4535aL } },
+    /* 33 << 217 */
+    { { 0x9cdbab36d2b713c5L,0x86274ea0cf7b0cd3L,0x784680f309af826bL,
+        0xbfcc837a0c72dea3L },
+      { 0xa8bdfe9dd6529b73L,0x708aa22863a88002L,0x6c7a9a54c91d45b9L,
+        0xdf1a38bbfd004f56L } },
+    /* 34 << 217 */
+    { { 0x2e8c9a26b8bad853L,0x2d52cea33723eae7L,0x054d6d8156ca2830L,
+        0xa3317d149a8dc411L },
+      { 0xa08662fefd4ddedaL,0xed2a153ab55d792bL,0x7035c16abfc6e944L,
+        0xb6bc583400171cf3L } },
+    /* 35 << 217 */
+    { { 0xe27152b383d102b6L,0xfe695a470646b848L,0xa5bb09d8916e6d37L,
+        0xb4269d640d17015eL },
+      { 0x8d8156a10a1d2285L,0xfeef6c5146d26d72L,0x9dac57c84c5434a7L,
+        0x0282e5be59d39e31L } },
+    /* 36 << 217 */
+    { { 0xedfff181721c486dL,0x301baf10bc58824eL,0x8136a6aa00570031L,
+        0x55aaf78c1cddde68L },
+      { 0x2682937159c63952L,0x3a3bd2748bc25bafL,0xecdf8657b7e52dc3L,
+        0x2dd8c087fd78e6c8L } },
+    /* 37 << 217 */
+    { { 0x20553274f5531461L,0x8b4a12815d95499bL,0xe2c8763a1a80f9d2L,
+        0xd1dbe32b4ddec758L },
+      { 0xaf12210d30c34169L,0xba74a95378baa533L,0x3d133c6ea438f254L,
+        0xa431531a201bef5bL } },
+    /* 38 << 217 */
+    { { 0x15295e22f669d7ecL,0xca374f64357fb515L,0x8a8406ffeaa3fdb3L,
+        0x106ae448df3f2da8L },
+      { 0x8f9b0a9033c8e9a1L,0x234645e271ad5885L,0x3d0832241c0aed14L,
+        0xf10a7d3e7a942d46L } },
+    /* 39 << 217 */
+    { { 0x7c11deee40d5c9beL,0xb2bae7ffba84ed98L,0x93e97139aad58dddL,
+        0x3d8727963f6d1fa3L },
+      { 0x483aca818569ff13L,0x8b89a5fb9a600f72L,0x4cbc27c3c06f2b86L,
+        0x2213071363ad9c0bL } },
+    /* 40 << 217 */
+    { { 0xb5358b1e48ac2840L,0x18311294ecba9477L,0xda58f990a6946b43L,
+        0x3098baf99ab41819L },
+      { 0x66c4c1584198da52L,0xab4fc17c146bfd1bL,0x2f0a4c3cbf36a908L,
+        0x2ae9e34b58cf7838L } },
+    /* 41 << 217 */
+    { { 0xf411529e3fa11b1fL,0x21e43677974af2b4L,0x7c20958ec230793bL,
+        0x710ea88516e840f3L },
+      { 0xfc0b21fcc5dc67cfL,0x08d5164788405718L,0xd955c21fcfe49eb7L,
+        0x9722a5d556dd4a1fL } },
+    /* 42 << 217 */
+    { { 0xc9ef50e2c861baa5L,0xc0c21a5d9505ac3eL,0xaf6b9a338b7c063fL,
+        0xc63703392f4779c1L },
+      { 0x22df99c7638167c3L,0xfe6ffe76795db30cL,0x2b822d33a4854989L,
+        0xfef031dd30563aa5L } },
+    /* 43 << 217 */
+    { { 0x16b09f82d57c667fL,0xc70312cecc0b76f1L,0xbf04a9e6c9118aecL,
+        0x82fcb4193409d133L },
+      { 0x1a8ab385ab45d44dL,0xfba07222617b83a3L,0xb05f50dd58e81b52L,
+        0x1d8db55321ce5affL } },
+    /* 44 << 217 */
+    { { 0x3097b8d4e344a873L,0x7d8d116dfe36d53eL,0x6db22f587875e750L,
+        0x2dc5e37343e144eaL },
+      { 0xc05f32e6e799eb95L,0xe9e5f4df6899e6ecL,0xbdc3bd681fab23d5L,
+        0xb72b8ab773af60e6L } },
+    /* 45 << 217 */
+    { { 0x8db27ae02cecc84aL,0x600016d87bdb871cL,0x42a44b13d7c46f58L,
+        0xb8919727c3a77d39L },
+      { 0xcfc6bbbddafd6088L,0x1a7401466bd20d39L,0x8c747abd98c41072L,
+        0x4c91e765bdf68ea1L } },
+    /* 46 << 217 */
+    { { 0x7c95e5ca08819a78L,0xcf48b729c9587921L,0x091c7c5fdebbcc7dL,
+        0x6f287404f0e05149L },
+      { 0xf83b5ac226cd44ecL,0x88ae32a6cfea250eL,0x6ac5047a1d06ebc5L,
+        0xc7e550b4d434f781L } },
+    /* 47 << 217 */
+    { { 0x61ab1cf25c727bd2L,0x2e4badb11cf915b0L,0x1b4dadecf69d3920L,
+        0xe61b1ca6f14c1dfeL },
+      { 0x90b479ccbd6bd51fL,0x8024e4018045ec30L,0xcab29ca325ef0e62L,
+        0x4f2e941649e4ebc0L } },
+    /* 48 << 217 */
+    { { 0x45eb40ec0ccced58L,0x25cd4b9c0da44f98L,0x43e06458871812c6L,
+        0x99f80d5516cef651L },
+      { 0x571340c9ce6dc153L,0x138d5117d8665521L,0xacdb45bc4e07014dL,
+        0x2f34bb3884b60b91L } },
+    /* 49 << 217 */
+    { { 0xf44a4fd22ae8921eL,0xb039288e892ba1e2L,0x9da50174b1c180b2L,
+        0x6b70ab661693dc87L },
+      { 0x7e9babc9e7057481L,0x4581ddef9c80dc41L,0x0c890da951294682L,
+        0x0b5629d33f4736e5L } },
+    /* 50 << 217 */
+    { { 0x2340c79eb06f5b41L,0xa42e84ce4e243469L,0xf9a20135045a71a9L,
+        0xefbfb415d27b6fb6L },
+      { 0x25ebea239d33cd6fL,0x9caedb88aa6c0af8L,0x53dc7e9ad9ce6f96L,
+        0x3897f9fd51e0b15aL } },
+    /* 51 << 217 */
+    { { 0xf51cb1f88e5d788eL,0x1aec7ba8e1d490eeL,0x265991e0cc58cb3cL,
+        0x9f306e8c9fc3ad31L },
+      { 0x5fed006e5040a0acL,0xca9d5043fb476f2eL,0xa19c06e8beea7a23L,
+        0xd28658010edabb63L } },
+    /* 52 << 217 */
+    { { 0xdb92293f6967469aL,0x2894d8398d8a8ed8L,0x87c9e406bbc77122L,
+        0x8671c6f12ea3a26aL },
+      { 0xe42df8d6d7de9853L,0x2e3ce346b1f2bcc7L,0xda601dfc899d50cfL,
+        0xbfc913defb1b598fL } },
+    /* 53 << 217 */
+    { { 0x81c4909fe61f7908L,0x192e304f9bbc7b29L,0xc3ed8738c104b338L,
+        0xedbe9e47783f5d61L },
+      { 0x0c06e9be2db30660L,0xda3e613fc0eb7d8eL,0xd8fa3e97322e096eL,
+        0xfebd91e8d336e247L } },
+    /* 54 << 217 */
+    { { 0x8f13ccc4df655a49L,0xa9e00dfc5eb20210L,0x84631d0fc656b6eaL,
+        0x93a058cdd8c0d947L },
+      { 0x6846904a67bd3448L,0x4a3d4e1af394fd5cL,0xc102c1a5db225f52L,
+        0xe3455bbafc4f5e9aL } },
+    /* 55 << 217 */
+    { { 0x6b36985b4b9ad1ceL,0xa98185365bb7f793L,0x6c25e1d048b1a416L,
+        0x1381dd533c81bee7L },
+      { 0xd2a30d617a4a7620L,0xc841292639b8944cL,0x3c1c6fbe7a97c33aL,
+        0x941e541d938664e7L } },
+    /* 56 << 217 */
+    { { 0x417499e84a34f239L,0x15fdb83cb90402d5L,0xb75f46bf433aa832L,
+        0xb61e15af63215db1L },
+      { 0xaabe59d4a127f89aL,0x5d541e0c07e816daL,0xaaba0659a618b692L,
+        0x5532773317266026L } },
+    /* 57 << 217 */
+    { { 0xaf53a0fc95f57552L,0x329476506cacb0c9L,0x253ff58dc821be01L,
+        0xb0309531a06f1146L },
+      { 0x59bbbdf505c2e54dL,0x158f27ad26e8dd22L,0xcc5b7ffb397e1e53L,
+        0xae03f65b7fc1e50dL } },
+    /* 58 << 217 */
+    { { 0xa9784ebd9c95f0f9L,0x5ed9deb224640771L,0x31244af7035561c4L,
+        0x87332f3a7ee857deL },
+      { 0x09e16e9e2b9e0d88L,0x52d910f456a06049L,0x507ed477a9592f48L,
+        0x85cb917b2365d678L } },
+    /* 59 << 217 */
+    { { 0xf8511c934c8998d1L,0x2186a3f1730ea58fL,0x50189626b2029db0L,
+        0x9137a6d902ceb75aL },
+      { 0x2fe17f37748bc82cL,0x87c2e93180469f8cL,0x850f71cdbf891aa2L,
+        0x0ca1b89b75ec3d8dL } },
+    /* 60 << 217 */
+    { { 0x516c43aa5e1cd3cdL,0x893978089a887c28L,0x0059c699ddea1f9fL,
+        0x7737d6fa8e6868f7L },
+      { 0x6d93746a60f1524bL,0x36985e55ba052aa7L,0x41b1d322ed923ea5L,
+        0x3429759f25852a11L } },
+    /* 61 << 217 */
+    { { 0xbeca6ec3092e9f41L,0x3a238c6662256bbdL,0xd82958ea70ad487dL,
+        0x4ac8aaf965610d93L },
+      { 0x3fa101b15e4ccab0L,0x9bf430f29de14bfbL,0xa10f5cc66531899dL,
+        0x590005fbea8ce17dL } },
+    /* 62 << 217 */
+    { { 0xc437912f24544cb6L,0x9987b71ad79ac2e3L,0x13e3d9ddc058a212L,
+        0x00075aacd2de9606L },
+      { 0x80ab508b6cac8369L,0x87842be7f54f6c89L,0xa7ad663d6bc532a4L,
+        0x67813de778a91bc8L } },
+    /* 63 << 217 */
+    { { 0x5dcb61cec3427239L,0x5f3c7cf0c56934d9L,0xc079e0fbe3191591L,
+        0xe40896bdb01aada7L },
+      { 0x8d4667910492d25fL,0x8aeb30c9e7408276L,0xe94374959287aaccL,
+        0x23d4708d79fe03d4L } },
+    /* 64 << 217 */
+    { { 0x8cda9cf2d0c05199L,0x502fbc22fae78454L,0xc0bda9dff572a182L,
+        0x5f9b71b86158b372L },
+      { 0xe0f33a592b82dd07L,0x763027359523032eL,0x7fe1a721c4505a32L,
+        0x7b6e3e82f796409fL } },
+    /* 0 << 224 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 224 */
+    { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L,
+        0x2c41114ce0cdf943L },
+      { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L,
+        0x20477abf42ff9297L } },
+    /* 2 << 224 */
+    { { 0xa004dcb3292a9287L,0xddc15cf677b092c7L,0x083a8464806c0605L,
+        0x4a68df703db997b0L },
+      { 0x9c134e4505bf7dd0L,0xa4e63d398ccf7f8cL,0xa6e6517f41b5f8afL,
+        0xaa8b9342ad7bc1ccL } },
+    /* 3 << 224 */
+    { { 0x126f35b51e706ad9L,0xb99cebb4c3a9ebdfL,0xa75389afbf608d90L,
+        0x76113c4fc6c89858L },
+      { 0x80de8eb097e2b5aaL,0x7e1022cc63b91304L,0x3bdab6056ccc066cL,
+        0x33cbb144b2edf900L } },
+    /* 4 << 224 */
+    { { 0xc41764717af715d2L,0xe2f7f594d0134a96L,0x2c1873efa41ec956L,
+        0xe4e7b4f677821304L },
+      { 0xe5c8ff9788d5374aL,0x2b915e6380823d5bL,0xea6bc755b2ee8fe2L,
+        0x6657624ce7112651L } },
+    /* 5 << 224 */
+    { { 0x157af101dace5acaL,0xc4fdbcf211a6a267L,0xdaddf340c49c8609L,
+        0x97e49f52e9604a65L },
+      { 0x9be8e790937e2ad5L,0x846e2508326e17f1L,0x3f38007a0bbbc0dcL,
+        0xcf03603fb11e16d6L } },
+    /* 6 << 224 */
+    { { 0xd6f800e07442f1d5L,0x475607d166e0e3abL,0x82807f16b7c64047L,
+        0x8858e1e3a749883dL },
+      { 0x5859120b8231ee10L,0x1b80e7eb638a1eceL,0xcb72525ac6aa73a4L,
+        0xa7cdea3d844423acL } },
+    /* 7 << 224 */
+    { { 0x5ed0c007f8ae7c38L,0x6db07a5c3d740192L,0xbe5e9c2a5fe36db3L,
+        0xd5b9d57a76e95046L },
+      { 0x54ac32e78eba20f2L,0xef11ca8f71b9a352L,0x305e373eff98a658L,
+        0xffe5a100823eb667L } },
+    /* 8 << 224 */
+    { { 0x57477b11e51732d2L,0xdfd6eb282538fc0eL,0x5c43b0cc3b39eec5L,
+        0x6af12778cb36cc57L },
+      { 0x70b0852d06c425aeL,0x6df92f8c5c221b9bL,0x6c8d4f9ece826d9cL,
+        0xf59aba7bb49359c3L } },
+    /* 9 << 224 */
+    { { 0x5c8ed8d5da64309dL,0x61a6de5691b30704L,0xd6b52f6a2f9b5808L,
+        0x0eee419498c958a7L },
+      { 0xcddd9aab771e4caaL,0x83965dfd78bc21beL,0x02affce3b3b504f5L,
+        0x30847a21561c8291L } },
+    /* 10 << 224 */
+    { { 0xd2eb2cf152bfda05L,0xe0e4c4e96197b98cL,0x1d35076cf8a1726fL,
+        0x6c06085b2db11e3dL },
+      { 0x15c0c4d74463ba14L,0x9d292f830030238cL,0x1311ee8b3727536dL,
+        0xfeea86efbeaedc1eL } },
+    /* 11 << 224 */
+    { { 0xb9d18cd366131e2eL,0xf31d974f80fe2682L,0xb6e49e0fe4160289L,
+        0x7c48ec0b08e92799L },
+      { 0x818111d8d1989aa7L,0xb34fa0aaebf926f9L,0xdb5fe2f5a245474aL,
+        0xf80a6ebb3c7ca756L } },
+    /* 12 << 224 */
+    { { 0xa7f96054afa05dd8L,0x26dfcf21fcaf119eL,0xe20ef2e30564bb59L,
+        0xef4dca5061cb02b8L },
+      { 0xcda7838a65d30672L,0x8b08d534fd657e86L,0x4c5b439546d595c8L,
+        0x39b58725425cb836L } },
+    /* 13 << 224 */
+    { { 0x8ea610593de9abe3L,0x404348819cdc03beL,0x9b261245cfedce8cL,
+        0x78c318b4cf5234a1L },
+      { 0x510bcf16fde24c99L,0x2a77cb75a2c2ff5dL,0x9c895c2b27960fb4L,
+        0xd30ce975b0eda42bL } },
+    /* 14 << 224 */
+    { { 0xfda853931a62cc26L,0x23c69b9650c0e052L,0xa227df15bfc633f3L,
+        0x2ac788481bae7d48L },
+      { 0x487878f9187d073dL,0x6c2be919967f807dL,0x765861d8336e6d8fL,
+        0x88b8974cce528a43L } },
+    /* 15 << 224 */
+    { { 0x09521177ff57d051L,0x2ff38037fb6a1961L,0xfc0aba74a3d76ad4L,
+        0x7c76480325a7ec17L },
+      { 0x7532d75f48879bc8L,0xea7eacc058ce6bc1L,0xc82176b48e896c16L,
+        0x9a30e0b22c750fedL } },
+    /* 16 << 224 */
+    { { 0xc37e2c2e421d3aa4L,0xf926407ce84fa840L,0x18abc03d1454e41cL,
+        0x26605ecd3f7af644L },
+      { 0x242341a6d6a5eabfL,0x1edb84f4216b668eL,0xd836edb804010102L,
+        0x5b337ce7945e1d8cL } },
+    /* 17 << 224 */
+    { { 0xd2075c77c055dc14L,0x2a0ffa2581d89cdfL,0x8ce815ea6ffdcbafL,
+        0xa3428878fb648867L },
+      { 0x277699cf884655fbL,0xfa5b5bd6364d3e41L,0x01f680c6441e1cb7L,
+        0x3fd61e66b70a7d67L } },
+    /* 18 << 224 */
+    { { 0x666ba2dccc78cf66L,0xb30181746fdbff77L,0x8d4dd0db168d4668L,
+        0x259455d01dab3a2aL },
+      { 0xf58564c5cde3acecL,0x7714192513adb276L,0x527d725d8a303f65L,
+        0x55deb6c9e6f38f7bL } },
+    /* 19 << 224 */
+    { { 0xfd5bb657b1fa70fbL,0xfa07f50fd8073a00L,0xf72e3aa7bca02500L,
+        0xf68f895d9975740dL },
+      { 0x301120605cae2a6aL,0x01bd721802874842L,0x3d4238917ce47bd3L,
+        0xa66663c1789544f6L } },
+    /* 20 << 224 */
+    { { 0x864d05d73272d838L,0xe22924f9fa6295c5L,0x8189593f6c2fda32L,
+        0x330d7189b184b544L },
+      { 0x79efa62cbde1f714L,0x35771c94e5cb1a63L,0x2f4826b8641c8332L,
+        0x00a894fbc8cee854L } },
+    /* 21 << 224 */
+    { { 0xb4b9a39b36194d40L,0xe857a7c577612601L,0xf4209dd24ecf2f58L,
+        0x82b9e66d5a033487L },
+      { 0xc1e36934e4e8b9ddL,0xd2372c9da42377d7L,0x51dc94c70e3ae43bL,
+        0x4c57761e04474f6fL } },
+    /* 22 << 224 */
+    { { 0xdcdacd0a1058a318L,0x369cf3f578053a9aL,0xc6c3de5031c68de2L,
+        0x4653a5763c4b6d9fL },
+      { 0x1688dd5aaa4e5c97L,0x5be80aa1b7ab3c74L,0x70cefe7cbc65c283L,
+        0x57f95f1306867091L } },
+    /* 23 << 224 */
+    { { 0xa39114e24415503bL,0xc08ff7c64cbb17e9L,0x1eff674dd7dec966L,
+        0x6d4690af53376f63L },
+      { 0xff6fe32eea74237bL,0xc436d17ecd57508eL,0x15aa28e1edcc40feL,
+        0x0d769c04581bbb44L } },
+    /* 24 << 224 */
+    { { 0xc240b6de34eaacdaL,0xd9e116e82ba0f1deL,0xcbe45ec779438e55L,
+        0x91787c9d96f752d7L },
+      { 0x897f532bf129ac2fL,0xd307b7c85a36e22cL,0x91940675749fb8f3L,
+        0xd14f95d0157fdb28L } },
+    /* 25 << 224 */
+    { { 0xfe51d0296ae55043L,0x8931e98f44a87de1L,0xe57f1cc609e4fee2L,
+        0x0d063b674e072d92L },
+      { 0x70a998b9ed0e4316L,0xe74a736b306aca46L,0xecf0fbf24fda97c7L,
+        0xa40f65cb3e178d93L } },
+    /* 26 << 224 */
+    { { 0x1625360416df4285L,0xb0c9babbd0c56ae2L,0x73032b19cfc5cfc3L,
+        0xe497e5c309752056L },
+      { 0x12096bb4164bda96L,0x1ee42419a0b74da1L,0x8fc36243403826baL,
+        0x0c8f0069dc09e660L } },
+    /* 27 << 224 */
+    { { 0x8667e981c27253c9L,0x05a6aefb92b36a45L,0xa62c4b369cb7bb46L,
+        0x8394f37511f7027bL },
+      { 0x747bc79c5f109d0fL,0xcad88a765b8cc60aL,0x80c5a66b58f09e68L,
+        0xe753d451f6127eacL } },
+    /* 28 << 224 */
+    { { 0xc44b74a15b0ec6f5L,0x47989fe45289b2b8L,0x745f848458d6fc73L,
+        0xec362a6ff61c70abL },
+      { 0x070c98a7b3a8ad41L,0x73a20fc07b63db51L,0xed2c2173f44c35f4L,
+        0x8a56149d9acc9dcaL } },
+    /* 29 << 224 */
+    { { 0x98f178819ac6e0f4L,0x360fdeafa413b5edL,0x0625b8f4a300b0fdL,
+        0xf1f4d76a5b3222d3L },
+      { 0x9d6f5109587f76b8L,0x8b4ee08d2317fdb5L,0x88089bb78c68b095L,
+        0x95570e9a5808d9b9L } },
+    /* 30 << 224 */
+    { { 0xa395c36f35d33ae7L,0x200ea12350bb5a94L,0x20c789bd0bafe84bL,
+        0x243ef52d0919276aL },
+      { 0x3934c577e23ae233L,0xb93807afa460d1ecL,0xb72a53b1f8fa76a4L,
+        0xd8914cb0c3ca4491L } },
+    /* 31 << 224 */
+    { { 0x2e1284943fb42622L,0x3b2700ac500907d5L,0xf370fb091a95ec63L,
+        0xf8f30be231b6dfbdL },
+      { 0xf2b2f8d269e55f15L,0x1fead851cc1323e9L,0xfa366010d9e5eef6L,
+        0x64d487b0e316107eL } },
+    /* 32 << 224 */
+    { { 0x4c076b86d23ddc82L,0x03fd344c7e0143f0L,0xa95362ff317af2c5L,
+        0x0add3db7e18b7a4fL },
+      { 0x9c673e3f8260e01bL,0xfbeb49e554a1cc91L,0x91351bf292f2e433L,
+        0xc755e7ec851141ebL } },
+    /* 33 << 224 */
+    { { 0xc9a9513929607745L,0x0ca07420a26f2b28L,0xcb2790e74bc6f9ddL,
+        0x345bbb58adcaffc0L },
+      { 0xc65ea38cbe0f27a2L,0x67c24d7c641fcb56L,0x2c25f0a7a9e2c757L,
+        0x93f5cdb016f16c49L } },
+    /* 34 << 224 */
+    { { 0x2ca5a9d7c5ee30a1L,0xd1593635b909b729L,0x804ce9f3dadeff48L,
+        0xec464751b07c30c3L },
+      { 0x89d65ff39e49af6aL,0xf2d6238a6f3d01bcL,0x1095561e0bced843L,
+        0x51789e12c8a13fd8L } },
+    /* 35 << 224 */
+    { { 0xd633f929763231dfL,0x46df9f7de7cbddefL,0x01c889c0cb265da8L,
+        0xfce1ad10af4336d2L },
+      { 0x8d110df6fc6a0a7eL,0xdd431b986da425dcL,0xcdc4aeab1834aabeL,
+        0x84deb1248439b7fcL } },
+    /* 36 << 224 */
+    { { 0x8796f1693c2a5998L,0x9b9247b47947190dL,0x55b9d9a511597014L,
+        0x7e9dd70d7b1566eeL },
+      { 0x94ad78f7cbcd5e64L,0x0359ac179bd4c032L,0x3b11baaf7cc222aeL,
+        0xa6a6e284ba78e812L } },
+    /* 37 << 224 */
+    { { 0x8392053f24cea1a0L,0xc97bce4a33621491L,0x7eb1db3435399ee9L,
+        0x473f78efece81ad1L },
+      { 0x41d72fe0f63d3d0dL,0xe620b880afab62fcL,0x92096bc993158383L,
+        0x41a213578f896f6cL } },
+    /* 38 << 224 */
+    { { 0x1b5ee2fac7dcfcabL,0x650acfde9546e007L,0xc081b749b1b02e07L,
+        0xda9e41a0f9eca03dL },
+      { 0x013ba727175a54abL,0xca0cd190ea5d8d10L,0x85ea52c095fd96a9L,
+        0x2c591b9fbc5c3940L } },
+    /* 39 << 224 */
+    { { 0x6fb4d4e42bad4d5fL,0xfa4c3590fef0059bL,0x6a10218af5122294L,
+        0x9a78a81aa85751d1L },
+      { 0x04f20579a98e84e7L,0xfe1242c04997e5b5L,0xe77a273bca21e1e4L,
+        0xfcc8b1ef9411939dL } },
+    /* 40 << 224 */
+    { { 0xe20ea30292d0487aL,0x1442dbec294b91feL,0x1f7a4afebb6b0e8fL,
+        0x1700ef746889c318L },
+      { 0xf5bbffc370f1fc62L,0x3b31d4b669c79ccaL,0xe8bc2aaba7f6340dL,
+        0xb0b08ab4a725e10aL } },
+    /* 41 << 224 */
+    { { 0x44f05701ae340050L,0xba4b30161cf0c569L,0x5aa29f83fbe19a51L,
+        0x1b9ed428b71d752eL },
+      { 0x1666e54eeb4819f5L,0x616cdfed9e18b75bL,0x112ed5be3ee27b0bL,
+        0xfbf2831944c7de4dL } },
+    /* 42 << 224 */
+    { { 0xd685ec85e0e60d84L,0x68037e301db7ee78L,0x5b65bdcd003c4d6eL,
+        0x33e7363a93e29a6aL },
+      { 0x995b3a6108d0756cL,0xd727f85c2faf134bL,0xfac6edf71d337823L,
+        0x99b9aa500439b8b4L } },
+    /* 43 << 224 */
+    { { 0x722eb104e2b4e075L,0x49987295437c4926L,0xb1e4c0e446a9b82dL,
+        0xd0cb319757a006f5L },
+      { 0xf3de0f7dd7808c56L,0xb5c54d8f51f89772L,0x500a114aadbd31aaL,
+        0x9afaaaa6295f6cabL } },
+    /* 44 << 224 */
+    { { 0x94705e2104cf667aL,0xfc2a811b9d3935d7L,0x560b02806d09267cL,
+        0xf19ed119f780e53bL },
+      { 0xf0227c09067b6269L,0x967b85335caef599L,0x155b924368efeebcL,
+        0xcd6d34f5c497bae6L } },
+    /* 45 << 224 */
+    { { 0x1dd8d5d36cceb370L,0x2aeac579a78d7bf9L,0x5d65017d70b67a62L,
+        0x70c8e44f17c53f67L },
+      { 0xd1fc095086a34d09L,0xe0fca256e7134907L,0xe24fa29c80fdd315L,
+        0x2c4acd03d87499adL } },
+    /* 46 << 224 */
+    { { 0xbaaf75173b5a9ba6L,0xb9cbe1f612e51a51L,0xd88edae35e154897L,
+        0xe4309c3c77b66ca0L },
+      { 0xf5555805f67f3746L,0x85fc37baa36401ffL,0xdf86e2cad9499a53L,
+        0x6270b2a3ecbc955bL } },
+    /* 47 << 224 */
+    { { 0xafae64f5974ad33bL,0x04d85977fe7b2df1L,0x2a3db3ff4ab03f73L,
+        0x0b87878a8702740aL },
+      { 0x6d263f015a061732L,0xc25430cea32a1901L,0xf7ebab3ddb155018L,
+        0x3a86f69363a9b78eL } },
+    /* 48 << 224 */
+    { { 0x349ae368da9f3804L,0x470f07fea164349cL,0xd52f4cc98562baa5L,
+        0xc74a9e862b290df3L },
+      { 0xd3a1aa3543471a24L,0x239446beb8194511L,0xbec2dd0081dcd44dL,
+        0xca3d7f0fc42ac82dL } },
+    /* 49 << 224 */
+    { { 0x1f3db085fdaf4520L,0xbb6d3e804549daf2L,0xf5969d8a19ad5c42L,
+        0x7052b13ddbfd1511L },
+      { 0x11890d1b682b9060L,0xa71d3883ac34452cL,0xa438055b783805b4L,
+        0x432412774725b23eL } },
+    /* 50 << 224 */
+    { { 0xf20cf96e4901bbedL,0x6419c710f432a2bbL,0x57a0fbb9dfa9cd7dL,
+        0x589111e400daa249L },
+      { 0x19809a337b60554eL,0xea5f8887ede283a4L,0x2d713802503bfd35L,
+        0x151bb0af585d2a53L } },
+    /* 51 << 224 */
+    { { 0x40b08f7443b30ca8L,0xe10b5bbad9934583L,0xe8a546d6b51110adL,
+        0x1dd50e6628e0b6c5L },
+      { 0x292e9d54cff2b821L,0x3882555d47281760L,0x134838f83724d6e3L,
+        0xf2c679e022ddcda1L } },
+    /* 52 << 224 */
+    { { 0x40ee88156d2a5768L,0x7f227bd21c1e7e2dL,0x487ba134d04ff443L,
+        0x76e2ff3dc614e54bL },
+      { 0x36b88d6fa3177ec7L,0xbf731d512328fff5L,0x758caea249ba158eL,
+        0x5ab8ff4c02938188L } },
+    /* 53 << 224 */
+    { { 0x33e1605635edc56dL,0x5a69d3497e940d79L,0x6c4fd00103866dcbL,
+        0x20a38f574893cdefL },
+      { 0xfbf3e790fac3a15bL,0x6ed7ea2e7a4f8e6bL,0xa663eb4fbc3aca86L,
+        0x22061ea5080d53f7L } },
+    /* 54 << 224 */
+    { { 0x2480dfe6f546783fL,0xd38bc6da5a0a641eL,0xfb093cd12ede8965L,
+        0x89654db4acb455cfL },
+      { 0x413cbf9a26e1adeeL,0x291f3764373294d4L,0x00797257648083feL,
+        0x25f504d3208cc341L } },
+    /* 55 << 224 */
+    { { 0x635a8e5ec3a0ee43L,0x70aaebca679898ffL,0x9ee9f5475dc63d56L,
+        0xce987966ffb34d00L },
+      { 0xf9f86b195e26310aL,0x9e435484382a8ca8L,0x253bcb81c2352fe4L,
+        0xa4eac8b04474b571L } },
+    /* 56 << 224 */
+    { { 0xc1b97512c1ad8cf8L,0x193b4e9e99e0b697L,0x939d271601e85df0L,
+        0x4fb265b3cd44eafdL },
+      { 0x321e7dcde51e1ae2L,0x8e3a8ca6e3d8b096L,0x8de46cb052604998L,
+        0x91099ad839072aa7L } },
+    /* 57 << 224 */
+    { { 0x2617f91c93aa96b8L,0x0fc8716b7fca2e13L,0xa7106f5e95328723L,
+        0xd1c9c40b262e6522L },
+      { 0xb9bafe8642b7c094L,0x1873439d1543c021L,0xe1baa5de5cbefd5dL,
+        0xa363fc5e521e8affL } },
+    /* 58 << 224 */
+    { { 0xefe6320df862eaacL,0x14419c6322c647dcL,0x0e06707c4e46d428L,
+        0xcb6c834f4a178f8fL },
+      { 0x0f993a45d30f917cL,0xd4c4b0499879afeeL,0xb6142a1e70500063L,
+        0x7c9b41c3a5d9d605L } },
+    /* 59 << 224 */
+    { { 0xbc00fc2f2f8ba2c7L,0x0966eb2f7c67aa28L,0x13f7b5165a786972L,
+        0x3bfb75578a2fbba0L },
+      { 0x131c4f235a2b9620L,0xbff3ed276faf46beL,0x9b4473d17e172323L,
+        0x421e8878339f6246L } },
+    /* 60 << 224 */
+    { { 0x0fa8587a25a41632L,0xc0814124a35b6c93L,0x2b18a9f559ebb8dbL,
+        0x264e335776edb29cL },
+      { 0xaf245ccdc87c51e2L,0x16b3015b501e6214L,0xbb31c5600a3882ceL,
+        0x6961bb94fec11e04L } },
+    /* 61 << 224 */
+    { { 0x3b825b8deff7a3a0L,0xbec33738b1df7326L,0x68ad747c99604a1fL,
+        0xd154c9349a3bd499L },
+      { 0xac33506f1cc7a906L,0x73bb53926c560e8fL,0x6428fcbe263e3944L,
+        0xc11828d51c387434L } },
+    /* 62 << 224 */
+    { { 0x3cd04be13e4b12ffL,0xc3aad9f92d88667cL,0xc52ddcf8248120cfL,
+        0x985a892e2a389532L },
+      { 0xfbb4b21b3bb85fa0L,0xf95375e08dfc6269L,0xfb4fb06c7ee2aceaL,
+        0x6785426e309c4d1fL } },
+    /* 63 << 224 */
+    { { 0x659b17c8d8ceb147L,0x9b649eeeb70a5554L,0x6b7fa0b5ac6bc634L,
+        0xd99fe2c71d6e732fL },
+      { 0x30e6e7628d3abba2L,0x18fee6e7a797b799L,0x5c9d360dc696464dL,
+        0xe3baeb4827bfde12L } },
+    /* 64 << 224 */
+    { { 0x2bf5db47f23206d5L,0x2f6d34201d260152L,0x17b876533f8ff89aL,
+        0x5157c30c378fa458L },
+      { 0x7517c5c52d4fb936L,0xef22f7ace6518cdcL,0xdeb483e6bf847a64L,
+        0xf508455892e0fa89L } },
+    /* 0 << 231 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 231 */
+    { { 0xab9659d8df7304d4L,0xb71bcf1bff210e8eL,0xa9a2438bd73fbd60L,
+        0x4595cd1f5d11b4deL },
+      { 0x9c0d329a4835859dL,0x4a0f0d2d7dbb6e56L,0xc6038e5edf928a4eL,
+        0xc94296218f5ad154L } },
+    /* 2 << 231 */
+    { { 0x91213462f23f2d92L,0x6cab71bd60b94078L,0x6bdd0a63176cde20L,
+        0x54c9b20cee4d54bcL },
+      { 0x3cd2d8aa9f2ac02fL,0x03f8e617206eedb0L,0xc7f68e1693086434L,
+        0x831469c592dd3db9L } },
+    /* 3 << 231 */
+    { { 0x8521df248f981354L,0x587e23ec3588a259L,0xcbedf281d7a0992cL,
+        0x06930a5538961407L },
+      { 0x09320debbe5bbe21L,0xa7ffa5b52491817fL,0xe6c8b4d909065160L,
+        0xac4f3992fff6d2a9L } },
+    /* 4 << 231 */
+    { { 0x7aa7a1583ae9c1bdL,0xe0af6d98e37ce240L,0xe54342d928ab38b4L,
+        0xe8b750070a1c98caL },
+      { 0xefce86afe02358f2L,0x31b8b856ea921228L,0x052a19120a1c67fcL,
+        0xb4069ea4e3aead59L } },
+    /* 5 << 231 */
+    { { 0x3232d6e27fa03cb3L,0xdb938e5b0fdd7d88L,0x04c1d2cd2ccbfc5dL,
+        0xd2f45c12af3a580fL },
+      { 0x592620b57883e614L,0x5fd27e68be7c5f26L,0x139e45a91567e1e3L,
+        0x2cc71d2d44d8aaafL } },
+    /* 6 << 231 */
+    { { 0x4a9090cde36d0757L,0xf722d7b1d9a29382L,0xfb7fb04c04b48ddfL,
+        0x628ad2a7ebe16f43L },
+      { 0xcd3fbfb520226040L,0x6c34ecb15104b6c4L,0x30c0754ec903c188L,
+        0xec336b082d23cab0L } },
+    /* 7 << 231 */
+    { { 0x473d62a21e206ee5L,0xf1e274808c49a633L,0x87ab956ce9f6b2c3L,
+        0x61830b4862b606eaL },
+      { 0x67cd6846e78e815fL,0xfe40139f4c02082aL,0x52bbbfcb952ec365L,
+        0x74c116426b9836abL } },
+    /* 8 << 231 */
+    { { 0x9f51439e558df019L,0x230da4baac712b27L,0x518919e355185a24L,
+        0x4dcefcdd84b78f50L },
+      { 0xa7d90fb2a47d4c5aL,0x55ac9abfb30e009eL,0xfd2fc35974eed273L,
+        0xb72d824cdbea8fafL } },
+    /* 9 << 231 */
+    { { 0xce721a744513e2caL,0x0b41861238240b2cL,0x05199968d5baa450L,
+        0xeb1757ed2b0e8c25L },
+      { 0x6ebc3e283dfac6d5L,0xb2431e2e48a237f5L,0x2acb5e2352f61499L,
+        0x5558a2a7e06c936bL } },
+    /* 10 << 231 */
+    { { 0xd213f923cbb13d1bL,0x98799f425bfb9bfeL,0x1ae8ddc9701144a9L,
+        0x0b8b3bb64c5595eeL },
+      { 0x0ea9ef2e3ecebb21L,0x17cb6c4b3671f9a7L,0x47ef464f726f1d1fL,
+        0x171b94846943a276L } },
+    /* 11 << 231 */
+    { { 0x51a4ae2d7ef0329cL,0x0850922291c4402aL,0x64a61d35afd45bbcL,
+        0x38f096fe3035a851L },
+      { 0xc7468b74a1dec027L,0xe8cf10e74fc7dcbaL,0xea35ff40f4a06353L,
+        0x0b4c0dfa8b77dd66L } },
+    /* 12 << 231 */
+    { { 0x779b8552de7e5c19L,0xfab28609c1c0256cL,0x64f58eeeabd4743dL,
+        0x4e8ef8387b6cc93bL },
+      { 0xee650d264cb1bf3dL,0x4c1f9d0973dedf61L,0xaef7c9d7bfb70cedL,
+        0x1ec0507e1641de1eL } },
+    /* 13 << 231 */
+    { { 0xcd7e5cc7cde45079L,0xde173c9a516ac9e4L,0x517a8494c170315cL,
+        0x438fd90591d8e8fbL },
+      { 0x5145c506c7d9630bL,0x6457a87bf47d4d75L,0xd31646bf0d9a80e8L,
+        0x453add2bcef3aabeL } },
+    /* 14 << 231 */
+    { { 0xc9941109a607419dL,0xfaa71e62bb6bca80L,0x34158c1307c431f3L,
+        0x594abebc992bc47aL },
+      { 0x6dfea691eb78399fL,0x48aafb353f42cba4L,0xedcd65af077c04f0L,
+        0x1a29a366e884491aL } },
+    /* 15 << 231 */
+    { { 0x023a40e51c21f2bfL,0xf99a513ca5057aeeL,0xa3fe7e25bcab072eL,
+        0x8568d2e140e32bcfL },
+      { 0x904594ebd3f69d9fL,0x181a973307affab1L,0xe4d68d76b6e330f4L,
+        0x87a6dafbc75a7fc1L } },
+    /* 16 << 231 */
+    { { 0x549db2b5ef7d9289L,0x2480d4a8197f015aL,0x61d5590bc40493b6L,
+        0x3a55b52e6f780331L },
+      { 0x40eb8115309eadb0L,0xdea7de5a92e5c625L,0x64d631f0cc6a3d5aL,
+        0x9d5e9d7c93e8dd61L } },
+    /* 17 << 231 */
+    { { 0xf297bef5206d3ffcL,0x23d5e0337d808bd4L,0x4a4f6912d24cf5baL,
+        0xe4d8163b09cdaa8aL },
+      { 0x0e0de9efd3082e8eL,0x4fe1246c0192f360L,0x1f9001504b8eee0aL,
+        0x5219da81f1da391bL } },
+    /* 18 << 231 */
+    { { 0x7bf6a5c1f7ea25aaL,0xd165e6bffbb07d5fL,0xe353936189e78671L,
+        0xa3fcac892bac4219L },
+      { 0xdfab6fd4f0baa8abL,0x5a4adac1e2c1c2e5L,0x6cd75e3140d85849L,
+        0xce263fea19b39181L } },
+    /* 19 << 231 */
+    { { 0xcb6803d307032c72L,0x7f40d5ce790968c8L,0xa6de86bddce978f0L,
+        0x25547c4f368f751cL },
+      { 0xb1e685fd65fb2a9eL,0xce69336f1eb9179cL,0xb15d1c2712504442L,
+        0xb7df465cb911a06bL } },
+    /* 20 << 231 */
+    { { 0xb8d804a3315980cdL,0x693bc492fa3bebf7L,0x3578aeee2253c504L,
+        0x158de498cd2474a2L },
+      { 0x1331f5c7cfda8368L,0xd2d7bbb378d7177eL,0xdf61133af3c1e46eL,
+        0x5836ce7dd30e7be8L } },
+    /* 21 << 231 */
+    { { 0x83084f1994f834cbL,0xd35653d4429ed782L,0xa542f16f59e58243L,
+        0xc2b52f650470a22dL },
+      { 0xe3b6221b18f23d96L,0xcb05abac3f5252b4L,0xca00938b87d61402L,
+        0x2f186cdd411933e4L } },
+    /* 22 << 231 */
+    { { 0xe042ece59a29a5c5L,0xb19b3c073b6c8402L,0xc97667c719d92684L,
+        0xb5624622ebc66372L },
+      { 0x0cb96e653c04fa02L,0x83a7176c8eaa39aaL,0x2033561deaa1633fL,
+        0x45a9d0864533df73L } },
+    /* 23 << 231 */
+    { { 0xe0542c1d3dc090bcL,0x82c996efaa59c167L,0xe3f735e80ee7fc4dL,
+        0x7b1793937c35db79L },
+      { 0xb6419e25f8c5dbfdL,0x4d9d7a1e1f327b04L,0x979f6f9b298dfca8L,
+        0xc7c5dff18de9366aL } },
+    /* 24 << 231 */
+    { { 0x1b7a588d04c82bddL,0x68005534f8319dfdL,0xde8a55b5d8eb9580L,
+        0x5ea886da8d5bca81L },
+      { 0xe8530a01252a0b4dL,0x1bffb4fe35eaa0a1L,0x2ad828b1d8e99563L,
+        0x7de96ef595f9cd87L } },
+    /* 25 << 231 */
+    { { 0x4abb2d0cd77d970cL,0x03cfb933d33ef9cbL,0xb0547c018b211fe9L,
+        0x2fe64809a56ed1c6L },
+      { 0xcb7d5624c2ac98ccL,0x2a1372c01a393e33L,0xc8d1ec1c29660521L,
+        0xf3d31b04b37ac3e9L } },
+    /* 26 << 231 */
+    { { 0xa29ae9df5ece6e7cL,0x0603ac8f0facfb55L,0xcfe85b7adda233a5L,
+        0xe618919fbd75f0b8L },
+      { 0xf555a3d299bf1603L,0x1f43afc9f184255aL,0xdcdaf341319a3e02L,
+        0xd3b117ef03903a39L } },
+    /* 27 << 231 */
+    { { 0xe095da1365d1d131L,0x86f16367c37ad03eL,0x5f37389e462cd8ddL,
+        0xc103fa04d67a60e6L },
+      { 0x57c34344f4b478f0L,0xce91edd8e117c98dL,0x001777b0231fc12eL,
+        0x11ae47f2b207bccbL } },
+    /* 28 << 231 */
+    { { 0xd983cf8d20f8a242L,0x7aff5b1df22e1ad8L,0x68fd11d07fc4feb3L,
+        0x5d53ae90b0f1c3e1L },
+      { 0x50fb7905ec041803L,0x85e3c97714404888L,0x0e67faedac628d8fL,
+        0x2e8651506668532cL } },
+    /* 29 << 231 */
+    { { 0x15acaaa46a67a6b0L,0xf4cdee25b25cec41L,0x49ee565ae4c6701eL,
+        0x2a04ca66fc7d63d8L },
+      { 0xeb105018ef0543fbL,0xf709a4f5d1b0d81dL,0x5b906ee62915d333L,
+        0xf4a8741296f1f0abL } },
+    /* 30 << 231 */
+    { { 0xb6b82fa74d82f4c2L,0x90725a606804efb3L,0xbc82ec46adc3425eL,
+        0xb7b805812787843eL },
+      { 0xdf46d91cdd1fc74cL,0xdc1c62cbe783a6c4L,0x59d1b9f31a04cbbaL,
+        0xd87f6f7295e40764L } },
+    /* 31 << 231 */
+    { { 0x02b4cfc1317f4a76L,0x8d2703eb91036bceL,0x98206cc6a5e72a56L,
+        0x57be9ed1cf53fb0fL },
+      { 0x09374571ef0b17acL,0x74b2655ed9181b38L,0xc8f80ea889935d0eL,
+        0xc0d9e94291529936L } },
+    /* 32 << 231 */
+    { { 0x196860411e84e0e5L,0xa5db84d3aea34c93L,0xf9d5bb197073a732L,
+        0xb8d2fe566bcfd7c0L },
+      { 0x45775f36f3eb82faL,0x8cb20cccfdff8b58L,0x1659b65f8374c110L,
+        0xb8b4a422330c789aL } },
+    /* 33 << 231 */
+    { { 0x75e3c3ea6fe8208bL,0xbd74b9e4286e78feL,0x0be2e81bd7d93a1aL,
+        0x7ed06e27dd0a5aaeL },
+      { 0x721f5a586be8b800L,0x428299d1d846db28L,0x95cb8e6b5be88ed3L,
+        0xc3186b231c034e11L } },
+    /* 34 << 231 */
+    { { 0xa6312c9e8977d99bL,0xbe94433183f531e7L,0x8232c0c218d3b1d4L,
+        0x617aae8be1247b73L },
+      { 0x40153fc4282aec3bL,0xc6063d2ff7b8f823L,0x68f10e583304f94cL,
+        0x31efae74ee676346L } },
+    /* 35 << 231 */
+    { { 0xbadb6c6d40a9b97cL,0x14702c634f666256L,0xdeb954f15184b2e3L,
+        0x5184a52694b6ca40L },
+      { 0xfff05337003c32eaL,0x5aa374dd205974c7L,0x9a7638544b0dd71aL,
+        0x459cd27fdeb947ecL } },
+    /* 36 << 231 */
+    { { 0xa6e28161459c2b92L,0x2f020fa875ee8ef5L,0xb132ec2d30b06310L,
+        0xc3e15899bc6a4530L },
+      { 0xdc5f53feaa3f451aL,0x3a3c7f23c2d9acacL,0x2ec2f8926b27e58bL,
+        0x68466ee7d742799fL } },
+    /* 37 << 231 */
+    { { 0x98324dd41fa26613L,0xa2dc6dabbdc29d63L,0xf9675faad712d657L,
+        0x813994be21fd8d15L },
+      { 0x5ccbb722fd4f7553L,0x5135ff8bf3a36b20L,0x44be28af69559df5L,
+        0x40b65bed9d41bf30L } },
+    /* 38 << 231 */
+    { { 0xd98bf2a43734e520L,0x5e3abbe3209bdcbaL,0x77c76553bc945b35L,
+        0x5331c093c6ef14aaL },
+      { 0x518ffe2976b60c80L,0x2285593b7ace16f8L,0xab1f64ccbe2b9784L,
+        0xe8f2c0d9ab2421b6L } },
+    /* 39 << 231 */
+    { { 0x617d7174c1df065cL,0xafeeb5ab5f6578faL,0x16ff1329263b54a8L,
+        0x45c55808c990dce3L },
+      { 0x42eab6c0ecc8c177L,0x799ea9b55982ecaaL,0xf65da244b607ef8eL,
+        0x8ab226ce32a3fc2cL } },
+    /* 40 << 231 */
+    { { 0x745741e57ea973dcL,0x5c00ca7020888f2eL,0x7cdce3cf45fd9cf1L,
+        0x8a741ef15507f872L },
+      { 0x47c51c2f196b4cecL,0x70d08e43c97ea618L,0x930da15c15b18a2bL,
+        0x33b6c6782f610514L } },
+    /* 41 << 231 */
+    { { 0xc662e4f807ac9794L,0x1eccf050ba06cb79L,0x1ff08623e7d954e5L,
+        0x6ef2c5fb24cf71c3L },
+      { 0xb2c063d267978453L,0xa0cf37961d654af8L,0x7cb242ea7ebdaa37L,
+        0x206e0b10b86747e0L } },
+    /* 42 << 231 */
+    { { 0x481dae5fd5ecfefcL,0x07084fd8c2bff8fcL,0x8040a01aea324596L,
+        0x4c646980d4de4036L },
+      { 0x9eb8ab4ed65abfc3L,0xe01cb91f13541ec7L,0x8f029adbfd695012L,
+        0x9ae284833c7569ecL } },
+    /* 43 << 231 */
+    { { 0xa5614c9ea66d80a1L,0x680a3e4475f5f911L,0x0c07b14dceba4fc1L,
+        0x891c285ba13071c1L },
+      { 0xcac67ceb799ece3cL,0x29b910a941e07e27L,0x66bdb409f2e43123L,
+        0x06f8b1377ac9ecbeL } },
+    /* 44 << 231 */
+    { { 0x5981fafd38547090L,0x19ab8b9f85e3415dL,0xfc28c194c7e31b27L,
+        0x843be0aa6fbcbb42L },
+      { 0xf3b1ed43a6db836cL,0x2a1330e401a45c05L,0x4f19f3c595c1a377L,
+        0xa85f39d044b5ee33L } },
+    /* 45 << 231 */
+    { { 0x3da18e6d4ae52834L,0x5a403b397423dcb0L,0xbb555e0af2374aefL,
+        0x2ad599c41e8ca111L },
+      { 0x1b3a2fb9014b3bf8L,0x73092684f66d5007L,0x079f1426c4340102L,
+        0x1827cf818fddf4deL } },
+    /* 46 << 231 */
+    { { 0xc83605f6f10ff927L,0xd387145123739fc6L,0x6d163450cac1c2ccL,
+        0x6b521296a2ec1ac5L },
+      { 0x0606c4f96e3cb4a5L,0xe47d3f41778abff7L,0x425a8d5ebe8e3a45L,
+        0x53ea9e97a6102160L } },
+    /* 47 << 231 */
+    { { 0x477a106e39cbb688L,0x532401d2f3386d32L,0x8e564f64b1b9b421L,
+        0xca9b838881dad33fL },
+      { 0xb1422b4e2093913eL,0x533d2f9269bc8112L,0x3fa017beebe7b2c7L,
+        0xb2767c4acaf197c6L } },
+    /* 48 << 231 */
+    { { 0xc925ff87aedbae9fL,0x7daf0eb936880a54L,0x9284ddf59c4d0e71L,
+        0x1581cf93316f8cf5L },
+      { 0x3eeca8873ac1f452L,0xb417fce9fb6aeffeL,0xa5918046eefb8dc3L,
+        0x73d318ac02209400L } },
+    /* 49 << 231 */
+    { { 0xe800400f728693e5L,0xe87d814b339927edL,0x93e94d3b57ea9910L,
+        0xff8a35b62245fb69L },
+      { 0x043853d77f200d34L,0x470f1e680f653ce1L,0x81ac05bd59a06379L,
+        0xa14052c203930c29L } },
+    /* 50 << 231 */
+    { { 0x6b72fab526bc2797L,0x13670d1699f16771L,0x001700521e3e48d1L,
+        0x978fe401b7adf678L },
+      { 0x55ecfb92d41c5dd4L,0x5ff8e247c7b27da5L,0xe7518272013fb606L,
+        0x5768d7e52f547a3cL } },
+    /* 51 << 231 */
+    { { 0xbb24eaa360017a5fL,0x6b18e6e49c64ce9bL,0xc225c655103dde07L,
+        0xfc3672ae7592f7eaL },
+      { 0x9606ad77d06283a1L,0x542fc650e4d59d99L,0xabb57c492a40e7c2L,
+        0xac948f13a8db9f55L } },
+    /* 52 << 231 */
+    { { 0x6d4c9682b04465c3L,0xe3d062fa6468bd15L,0xa51729ac5f318d7eL,
+        0x1fc87df69eb6fc95L },
+      { 0x63d146a80591f652L,0xa861b8f7589621aaL,0x59f5f15ace31348cL,
+        0x8f663391440da6daL } },
+    /* 53 << 231 */
+    { { 0xcfa778acb591ffa3L,0x027ca9c54cdfebceL,0xbe8e05a5444ea6b3L,
+        0x8aab4e69a78d8254L },
+      { 0x2437f04fb474d6b8L,0x6597ffd4045b3855L,0xbb0aea4eca47ecaaL,
+        0x568aae8385c7ebfcL } },
+    /* 54 << 231 */
+    { { 0x0e966e64c73b2383L,0x49eb3447d17d8762L,0xde1078218da05dabL,
+        0x443d8baa016b7236L },
+      { 0x163b63a5ea7610d6L,0xe47e4185ce1ca979L,0xae648b6580baa132L,
+        0xebf53de20e0d5b64L } },
+    /* 55 << 231 */
+    { { 0x8d3bfcb4d3c8c1caL,0x0d914ef35d04b309L,0x55ef64153de7d395L,
+        0xbde1666f26b850e8L },
+      { 0xdbe1ca6ed449ab19L,0x8902b322e89a2672L,0xb1674b7edacb7a53L,
+        0x8e9faf6ef52523ffL } },
+    /* 56 << 231 */
+    { { 0x6ba535da9a85788bL,0xd21f03aebd0626d4L,0x099f8c47e873dc64L,
+        0xcda8564d018ec97eL },
+      { 0x3e8d7a5cde92c68cL,0x78e035a173323cc4L,0x3ef26275f880ff7cL,
+        0xa4ee3dff273eedaaL } },
+    /* 57 << 231 */
+    { { 0x58823507af4e18f8L,0x967ec9b50672f328L,0x9ded19d9559d3186L,
+        0x5e2ab3de6cdce39cL },
+      { 0xabad6e4d11c226dfL,0xf9783f4387723014L,0x9a49a0cf1a885719L,
+        0xfc0c1a5a90da9dbfL } },
+    /* 58 << 231 */
+    { { 0x8bbaec49571d92acL,0x569e85fe4692517fL,0x8333b014a14ea4afL,
+        0x32f2a62f12e5c5adL },
+      { 0x98c2ce3a06d89b85L,0xb90741aa2ff77a08L,0x2530defc01f795a2L,
+        0xd6e5ba0b84b3c199L } },
+    /* 59 << 231 */
+    { { 0x7d8e845112e4c936L,0xae419f7dbd0be17bL,0xa583fc8c22262bc9L,
+        0x6b842ac791bfe2bdL },
+      { 0x33cef4e9440d6827L,0x5f69f4deef81fb14L,0xf16cf6f6234fbb92L,
+        0x76ae3fc3d9e7e158L } },
+    /* 60 << 231 */
+    { { 0x4e89f6c2e9740b33L,0x677bc85d4962d6a1L,0x6c6d8a7f68d10d15L,
+        0x5f9a72240257b1cdL },
+      { 0x7096b9164ad85961L,0x5f8c47f7e657ab4aL,0xde57d7d0f7461d7eL,
+        0x7eb6094d80ce5ee2L } },
+    /* 61 << 231 */
+    { { 0x0b1e1dfd34190547L,0x8a394f43f05dd150L,0x0a9eb24d97df44e6L,
+        0x78ca06bf87675719L },
+      { 0x6f0b34626ffeec22L,0x9d91bcea36cdd8fbL,0xac83363ca105be47L,
+        0x81ba76c1069710e3L } },
+    /* 62 << 231 */
+    { { 0x3d1b24cb28c682c6L,0x27f252288612575bL,0xb587c779e8e66e98L,
+        0x7b0c03e9405eb1feL },
+      { 0xfdf0d03015b548e7L,0xa8be76e038b36af7L,0x4cdab04a4f310c40L,
+        0x6287223ef47ecaecL } },
+    /* 63 << 231 */
+    { { 0x678e60558b399320L,0x61fe3fa6c01e4646L,0xc482866b03261a5eL,
+        0xdfcf45b85c2f244aL },
+      { 0x8fab9a512f684b43L,0xf796c654c7220a66L,0x1d90707ef5afa58fL,
+        0x2c421d974fdbe0deL } },
+    /* 64 << 231 */
+    { { 0xc4f4cda3af2ebc2fL,0xa0af843dcb4efe24L,0x53b857c19ccd10b1L,
+        0xddc9d1eb914d3e04L },
+      { 0x7bdec8bb62771debL,0x829277aa91c5aa81L,0x7af18dd6832391aeL,
+        0x1740f316c71a84caL } },
+    /* 0 << 238 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 238 */
+    { { 0x8928e99aeeaf8c49L,0xee7aa73d6e24d728L,0x4c5007c2e72b156cL,
+        0x5fcf57c5ed408a1dL },
+      { 0x9f719e39b6057604L,0x7d343c01c2868bbfL,0x2cca254b7e103e2dL,
+        0xe6eb38a9f131bea2L } },
+    /* 2 << 238 */
+    { { 0xb33e624f8be762b4L,0x2a9ee4d1058e3413L,0x968e636967d805faL,
+        0x9848949b7db8bfd7L },
+      { 0x5308d7e5d23a8417L,0x892f3b1df3e29da5L,0xc95c139e3dee471fL,
+        0x8631594dd757e089L } },
+    /* 3 << 238 */
+    { { 0xe0c82a3cde918dccL,0x2e7b599426fdcf4bL,0x82c5024932cb1b2dL,
+        0xea613a9d7657ae07L },
+      { 0xc2eb5f6cf1fdc9f7L,0xb6eae8b8879fe682L,0x253dfee0591cbc7fL,
+        0x000da7133e1290e6L } },
+    /* 4 << 238 */
+    { { 0x1083e2ea1f095615L,0x0a28ad7714e68c33L,0x6bfc02523d8818beL,
+        0xb585113af35850cdL },
+      { 0x7d935f0b30df8aa1L,0xaddda07c4ab7e3acL,0x92c34299552f00cbL,
+        0xc33ed1de2909df6cL } },
+    /* 5 << 238 */
+    { { 0x22c2195d80e87766L,0x9e99e6d89ddf4ac0L,0x09642e4e65e74934L,
+        0x2610ffa2ff1ff241L },
+      { 0x4d1d47d4751c8159L,0x697b4985af3a9363L,0x0318ca4687477c33L,
+        0xa90cb5659441eff3L } },
+    /* 6 << 238 */
+    { { 0x58bb384836f024cbL,0x85be1f7736016168L,0x6c59587cdc7e07f1L,
+        0x191be071af1d8f02L },
+      { 0xbf169fa5cca5e55cL,0x3864ba3cf7d04eacL,0x915e367f8d7d05dbL,
+        0xb48a876da6549e5dL } },
+    /* 7 << 238 */
+    { { 0xef89c656580e40a2L,0xf194ed8c728068bcL,0x74528045a47990c9L,
+        0xf53fc7d75e1a4649L },
+      { 0xbec5ae9b78593e7dL,0x2cac4ee341db65d7L,0xa8c1eb2404a3d39bL,
+        0x53b7d63403f8f3efL } },
+    /* 8 << 238 */
+    { { 0x2dc40d483e07113cL,0x6e4a5d397d8b63aeL,0x5582a94b79684c2bL,
+        0x932b33d4622da26cL },
+      { 0xf534f6510dbbf08dL,0x211d07c964c23a52L,0x0eeece0fee5bdc9bL,
+        0xdf178168f7015558L } },
+    /* 9 << 238 */
+    { { 0xd42946350a712229L,0x93cbe44809273f8cL,0x00b095ef8f13bc83L,
+        0xbb7419728798978cL },
+      { 0x9d7309a256dbe6e7L,0xe578ec565a5d39ecL,0x3961151b851f9a31L,
+        0x2da7715de5709eb4L } },
+    /* 10 << 238 */
+    { { 0x867f301753dfabf0L,0x728d2078b8e39259L,0x5c75a0cd815d9958L,
+        0xf84867a616603be1L },
+      { 0xc865b13d70e35b1cL,0x0241446819b03e2cL,0xe46041daac1f3121L,
+        0x7c9017ad6f028a7cL } },
+    /* 11 << 238 */
+    { { 0xabc96de90a482873L,0x4265d6b1b77e54d4L,0x68c38e79a57d88e7L,
+        0xd461d7669ce82de3L },
+      { 0x817a9ec564a7e489L,0xcc5675cda0def5f2L,0x9a00e785985d494eL,
+        0xc626833f1b03514aL } },
+    /* 12 << 238 */
+    { { 0xabe7905a83cdd60eL,0x50602fb5a1170184L,0x689886cdb023642aL,
+        0xd568d090a6e1fb00L },
+      { 0x5b1922c70259217fL,0x93831cd9c43141e4L,0xdfca35870c95f86eL,
+        0xdec2057a568ae828L } },
+    /* 13 << 238 */
+    { { 0xc44ea599f98a759aL,0x55a0a7a2f7c23c1dL,0xd5ffb6e694c4f687L,
+        0x3563cce212848478L },
+      { 0x812b3517e7b1fbe1L,0x8a7dc9794f7338e0L,0x211ecee952d048dbL,
+        0x2eea4056c86ea3b8L } },
+    /* 14 << 238 */
+    { { 0xd8cb68a7ba772b34L,0xe16ed3415f4e2541L,0x9b32f6a60fec14dbL,
+        0xeee376f7391698beL },
+      { 0xe9a7aa1783674c02L,0x65832f975843022aL,0x29f3a8da5ba4990fL,
+        0x79a59c3afb8e3216L } },
+    /* 15 << 238 */
+    { { 0x9cdc4d2ebd19bb16L,0xc6c7cfd0b3262d86L,0xd4ce14d0969c0b47L,
+        0x1fa352b713e56128L },
+      { 0x383d55b8973db6d3L,0x71836850e8e5b7bfL,0xc7714596e6bb571fL,
+        0x259df31f2d5b2dd2L } },
+    /* 16 << 238 */
+    { { 0x568f8925913cc16dL,0x18bc5b6de1a26f5aL,0xdfa413bef5f499aeL,
+        0xf8835decc3f0ae84L },
+      { 0xb6e60bd865a40ab0L,0x65596439194b377eL,0xbcd8562592084a69L,
+        0x5ce433b94f23ede0L } },
+    /* 17 << 238 */
+    { { 0xe8e8f04f6ad65143L,0x11511827d6e14af6L,0x3d390a108295c0c7L,
+        0x71e29ee4621eba16L },
+      { 0xa588fc0963717b46L,0x02be02fee06ad4a2L,0x931558c604c22b22L,
+        0xbb4d4bd612f3c849L } },
+    /* 18 << 238 */
+    { { 0x54a4f49620efd662L,0x92ba6d20c5952d14L,0x2db8ea1ecc9784c2L,
+        0x81cc10ca4b353644L },
+      { 0x40b570ad4b4d7f6cL,0x5c9f1d9684a1dcd2L,0x01379f813147e797L,
+        0xe5c6097b2bd499f5L } },
+    /* 19 << 238 */
+    { { 0x40dcafa6328e5e20L,0xf7b5244a54815550L,0xb9a4f11847bfc978L,
+        0x0ea0e79fd25825b1L },
+      { 0xa50f96eb646c7ecfL,0xeb811493446dea9dL,0x2af04677dfabcf69L,
+        0xbe3a068fc713f6e8L } },
+    /* 20 << 238 */
+    { { 0x860d523d42e06189L,0xbf0779414e3aff13L,0x0b616dcac1b20650L,
+        0xe66dd6d12131300dL },
+      { 0xd4a0fd67ff99abdeL,0xc9903550c7aac50dL,0x022ecf8b7c46b2d7L,
+        0x3333b1e83abf92afL } },
+    /* 21 << 238 */
+    { { 0x11cc113c6c491c14L,0x0597668880dd3f88L,0xf5b4d9e729d932edL,
+        0xe982aad8a2c38b6dL },
+      { 0x6f9253478be0dcf0L,0x700080ae65ca53f2L,0xd8131156443ca77fL,
+        0xe92d6942ec51f984L } },
+    /* 22 << 238 */
+    { { 0xd2a08af885dfe9aeL,0xd825d9a54d2a86caL,0x2c53988d39dff020L,
+        0xf38b135a430cdc40L },
+      { 0x0c918ae062a7150bL,0xf31fd8de0c340e9bL,0xafa0e7ae4dbbf02eL,
+        0x5847fb2a5eba6239L } },
+    /* 23 << 238 */
+    { { 0x6b1647dcdccbac8bL,0xb642aa7806f485c8L,0x873f37657038ecdfL,
+        0x2ce5e865fa49d3feL },
+      { 0xea223788c98c4400L,0x8104a8cdf1fa5279L,0xbcf7cc7a06becfd7L,
+        0x49424316c8f974aeL } },
+    /* 24 << 238 */
+    { { 0xc0da65e784d6365dL,0xbcb7443f8f759fb8L,0x35c712b17ae81930L,
+        0x80428dff4c6e08abL },
+      { 0xf19dafefa4faf843L,0xced8538dffa9855fL,0x20ac409cbe3ac7ceL,
+        0x358c1fb6882da71eL } },
+    /* 25 << 238 */
+    { { 0xafa9c0e5fd349961L,0x2b2cfa518421c2fcL,0x2a80db17f3a28d38L,
+        0xa8aba5395d138e7eL },
+      { 0x52012d1d6e96eb8dL,0x65d8dea0cbaf9622L,0x57735447b264f56cL,
+        0xbeebef3f1b6c8da2L } },
+    /* 26 << 238 */
+    { { 0xfc346d98ce785254L,0xd50e8d72bb64a161L,0xc03567c749794addL,
+        0x15a76065752c7ef6L },
+      { 0x59f3a222961f23d6L,0x378e443873ecc0b0L,0xc74be4345a82fde4L,
+        0xae509af2d8b9cf34L } },
+    /* 27 << 238 */
+    { { 0x4a61ee46577f44a1L,0xe09b748cb611deebL,0xc0481b2cf5f7b884L,
+        0x3562667861acfa6bL },
+      { 0x37f4c518bf8d21e6L,0x22d96531b205a76dL,0x37fb85e1954073c0L,
+        0xbceafe4f65b3a567L } },
+    /* 28 << 238 */
+    { { 0xefecdef7be42a582L,0xd3fc608065046be6L,0xc9af13c809e8dba9L,
+        0x1e6c9847641491ffL },
+      { 0x3b574925d30c31f7L,0xb7eb72baac2a2122L,0x776a0dacef0859e7L,
+        0x06fec31421900942L } },
+    /* 29 << 238 */
+    { { 0x2464bc10f8c22049L,0x9bfbcce7875ebf69L,0xd7a88e2a4336326bL,
+        0xda05261c5bc2acfaL },
+      { 0xc29f5bdceba7efc8L,0x471237ca25dbbf2eL,0xa72773f22975f127L,
+        0xdc744e8e04d0b326L } },
+    /* 30 << 238 */
+    { { 0x38a7ed16a56edb73L,0x64357e372c007e70L,0xa167d15b5080b400L,
+        0x07b4116423de4be1L },
+      { 0xb2d91e3274c89883L,0x3c1628212882e7edL,0xad6b36ba7503e482L,
+        0x48434e8e0ea34331L } },
+    /* 31 << 238 */
+    { { 0x79f4f24f2c7ae0b9L,0xc46fbf811939b44aL,0x76fefae856595eb1L,
+        0x417b66abcd5f29c7L },
+      { 0x5f2332b2c5ceec20L,0xd69661ffe1a1cae2L,0x5ede7e529b0286e6L,
+        0x9d062529e276b993L } },
+    /* 32 << 238 */
+    { { 0x324794b07e50122bL,0xdd744f8b4af07ca5L,0x30a12f08d63fc97bL,
+        0x39650f1a76626d9dL },
+      { 0x101b47f71fa38477L,0x3d815f19d4dc124fL,0x1569ae95b26eb58aL,
+        0xc3cde18895fb1887L } },
+    /* 33 << 238 */
+    { { 0x54e9f37bf9539a48L,0xb0100e067408c1a5L,0x821d9811ea580cbbL,
+        0x8af52d3586e50c56L },
+      { 0xdfbd9d47dbbf698bL,0x2961a1ea03dc1c73L,0x203d38f8e76a5df8L,
+        0x08a53a686def707aL } },
+    /* 34 << 238 */
+    { { 0x26eefb481bee45d4L,0xb3cee3463c688036L,0x463c5315c42f2469L,
+        0x19d84d2e81378162L },
+      { 0x22d7c3c51c4d349fL,0x65965844163d59c5L,0xcf198c56b8abceaeL,
+        0x6fb1fb1b628559d5L } },
+    /* 35 << 238 */
+    { { 0x8bbffd0607bf8fe3L,0x46259c583467734bL,0xd8953cea35f7f0d3L,
+        0x1f0bece2d65b0ff1L },
+      { 0xf7d5b4b3f3c72914L,0x29e8ea953cb53389L,0x4a365626836b6d46L,
+        0xe849f910ea174fdeL } },
+    /* 36 << 238 */
+    { { 0x7ec62fbbf4737f21L,0xd8dba5ab6209f5acL,0x24b5d7a9a5f9adbeL,
+        0x707d28f7a61dc768L },
+      { 0x7711460bcaa999eaL,0xba7b174d1c92e4ccL,0x3c4bab6618d4bf2dL,
+        0xb8f0c980eb8bd279L } },
+    /* 37 << 238 */
+    { { 0x024bea9a324b4737L,0xfba9e42332a83bcaL,0x6e635643a232dcedL,
+        0x996193672571c8baL },
+      { 0xe8c9f35754b7032bL,0xf936b3ba2442d54aL,0x2263f0f08290c65aL,
+        0x48989780ee2c7fdbL } },
+    /* 38 << 238 */
+    { { 0xadc5d55a13d4f95eL,0x737cff85ad9b8500L,0x271c557b8a73f43dL,
+        0xbed617a4e18bc476L },
+      { 0x662454017dfd8ab2L,0xae7b89ae3a2870aaL,0x1b555f5323a7e545L,
+        0x6791e247be057e4cL } },
+    /* 39 << 238 */
+    { { 0x860136ad324fa34dL,0xea1114474cbeae28L,0x023a4270bedd3299L,
+        0x3d5c3a7fc1c35c34L },
+      { 0xb0f6db678d0412d2L,0xd92625e2fcdc6b9aL,0x92ae5ccc4e28a982L,
+        0xea251c3647a3ce7eL } },
+    /* 40 << 238 */
+    { { 0x9d658932790691bfL,0xed61058906b736aeL,0x712c2f04c0d63b6eL,
+        0x5cf06fd5c63d488fL },
+      { 0x97363facd9588e41L,0x1f9bf7622b93257eL,0xa9d1ffc4667acaceL,
+        0x1cf4a1aa0a061ecfL } },
+    /* 41 << 238 */
+    { { 0x40e48a49dc1818d0L,0x0643ff39a3621ab0L,0x5768640ce39ef639L,
+        0x1fc099ea04d86854L },
+      { 0x9130b9c3eccd28fdL,0xd743cbd27eec54abL,0x052b146fe5b475b6L,
+        0x058d9a82900a7d1fL } },
+    /* 42 << 238 */
+    { { 0x65e0229291262b72L,0x96f924f9bb0edf03L,0x5cfa59c8fe206842L,
+        0xf60370045eafa720L },
+      { 0x5f30699e18d7dd96L,0x381e8782cbab2495L,0x91669b46dd8be949L,
+        0xb40606f526aae8efL } },
+    /* 43 << 238 */
+    { { 0x2812b839fc6751a4L,0x16196214fba800efL,0x4398d5ca4c1a2875L,
+        0x720c00ee653d8349L },
+      { 0xc2699eb0d820007cL,0x880ee660a39b5825L,0x70694694471f6984L,
+        0xf7d16ea8e3dda99aL } },
+    /* 44 << 238 */
+    { { 0x28d675b2c0519a23L,0x9ebf94fe4f6952e3L,0xf28bb767a2294a8aL,
+        0x85512b4dfe0af3f5L },
+      { 0x18958ba899b16a0dL,0x95c2430cba7548a7L,0xb30d1b10a16be615L,
+        0xe3ebbb9785bfb74cL } },
+    /* 45 << 238 */
+    { { 0xa3273cfe18549fdbL,0xf6e200bf4fcdb792L,0x54a76e1883aba56cL,
+        0x73ec66f689ef6aa2L },
+      { 0x8d17add7d1b9a305L,0xa959c5b9b7ae1b9dL,0x886435226bcc094aL,
+        0xcc5616c4d7d429b9L } },
+    /* 46 << 238 */
+    { { 0xa6dada01e6a33f7cL,0xc6217a079d4e70adL,0xd619a81809c15b7cL,
+        0xea06b3290e80c854L },
+      { 0x174811cea5f5e7b9L,0x66dfc310787c65f4L,0x4ea7bd693316ab54L,
+        0xc12c4acb1dcc0f70L } },
+    /* 47 << 238 */
+    { { 0xe4308d1a1e407dd9L,0xe8a3587c91afa997L,0xea296c12ab77b7a5L,
+        0xb5ad49e4673c0d52L },
+      { 0x40f9b2b27006085aL,0xa88ff34087bf6ec2L,0x978603b14e3066a6L,
+        0xb3f99fc2b5e486e2L } },
+    /* 48 << 238 */
+    { { 0x07b53f5eb2e63645L,0xbe57e54784c84232L,0xd779c2167214d5cfL,
+        0x617969cd029a3acaL },
+      { 0xd17668cd8a7017a0L,0x77b4d19abe9b7ee8L,0x58fd0e939c161776L,
+        0xa8c4f4efd5968a72L } },
+    /* 49 << 238 */
+    { { 0x296071cc67b3de77L,0xae3c0b8e634f7905L,0x67e440c28a7100c9L,
+        0xbb8c3c1beb4b9b42L },
+      { 0x6d71e8eac51b3583L,0x7591f5af9525e642L,0xf73a2f7b13f509f3L,
+        0x618487aa5619ac9bL } },
+    /* 50 << 238 */
+    { { 0x3a72e5f79d61718aL,0x00413bcc7592d28cL,0x7d9b11d3963c35cfL,
+        0x77623bcfb90a46edL },
+      { 0xdeef273bdcdd2a50L,0x4a741f9b0601846eL,0x33b89e510ec6e929L,
+        0xcb02319f8b7f22cdL } },
+    /* 51 << 238 */
+    { { 0xbbe1500d084bae24L,0x2f0ae8d7343d2693L,0xacffb5f27cdef811L,
+        0xaa0c030a263fb94fL },
+      { 0x6eef0d61a0f442deL,0xf92e181727b139d3L,0x1ae6deb70ad8bc28L,
+        0xa89e38dcc0514130L } },
+    /* 52 << 238 */
+    { { 0x81eeb865d2fdca23L,0x5a15ee08cc8ef895L,0x768fa10a01905614L,
+        0xeff5b8ef880ee19bL },
+      { 0xf0c0cabbcb1c8a0eL,0x2e1ee9cdb8c838f9L,0x0587d8b88a4a14c0L,
+        0xf6f278962ff698e5L } },
+    /* 53 << 238 */
+    { { 0xed38ef1c89ee6256L,0xf44ee1fe6b353b45L,0x9115c0c770e903b3L,
+        0xc78ec0a1818f31dfL },
+      { 0x6c003324b7dccbc6L,0xd96dd1f3163bbc25L,0x33aa82dd5cedd805L,
+        0x123aae4f7f7eb2f1L } },
+    /* 54 << 238 */
+    { { 0x1723fcf5a26262cdL,0x1f7f4d5d0060ebd5L,0xf19c5c01b2eaa3afL,
+        0x2ccb9b149790accfL },
+      { 0x1f9c1cad52324aa6L,0x632005267247df54L,0x5732fe42bac96f82L,
+        0x52fe771f01a1c384L } },
+    /* 55 << 238 */
+    { { 0x546ca13db1001684L,0xb56b4eeea1709f75L,0x266545a9d5db8672L,
+        0xed971c901e8f3cfbL },
+      { 0x4e7d8691e3a07b29L,0x7570d9ece4b696b9L,0xdc5fa0677bc7e9aeL,
+        0x68b44cafc82c4844L } },
+    /* 56 << 238 */
+    { { 0x519d34b3bf44da80L,0x283834f95ab32e66L,0x6e6087976278a000L,
+        0x1e62960e627312f6L },
+      { 0x9b87b27be6901c55L,0x80e7853824fdbc1fL,0xbbbc09512facc27dL,
+        0x06394239ac143b5aL } },
+    /* 57 << 238 */
+    { { 0x35bb4a40376c1944L,0x7cb6269463da1511L,0xafd29161b7148a3bL,
+        0xa6f9d9ed4e2ea2eeL },
+      { 0x15dc2ca2880dd212L,0x903c3813a61139a9L,0x2aa7b46d6c0f8785L,
+        0x36ce2871901c60ffL } },
+    /* 58 << 238 */
+    { { 0xc683b028e10d9c12L,0x7573baa2032f33d3L,0x87a9b1f667a31b58L,
+        0xfd3ed11af4ffae12L },
+      { 0x83dcaa9a0cb2748eL,0x8239f0185d6fdf16L,0xba67b49c72753941L,
+        0x2beec455c321cb36L } },
+    /* 59 << 238 */
+    { { 0x880156063f8b84ceL,0x764170838d38c86fL,0x054f1ca7598953ddL,
+        0xc939e1104e8e7429L },
+      { 0x9b1ac2b35a914f2fL,0x39e35ed3e74b8f9cL,0xd0debdb2781b2fb0L,
+        0x1585638f2d997ba2L } },
+    /* 60 << 238 */
+    { { 0x9c4b646e9e2fce99L,0x68a210811e80857fL,0x06d54e443643b52aL,
+        0xde8d6d630d8eb843L },
+      { 0x7032156342146a0aL,0x8ba826f25eaa3622L,0x227a58bd86138787L,
+        0x43b6c03c10281d37L } },
+    /* 61 << 238 */
+    { { 0x6326afbbb54dde39L,0x744e5e8adb6f2d5fL,0x48b2a99acff158e1L,
+        0xa93c8fa0ef87918fL },
+      { 0x2182f956de058c5cL,0x216235d2936f9e7aL,0xace0c0dbd2e31e67L,
+        0xc96449bff23ac3e7L } },
+    /* 62 << 238 */
+    { { 0x7e9a2874170693bdL,0xa28e14fda45e6335L,0x5757f6b356427344L,
+        0x822e4556acf8edf9L },
+      { 0x2b7a6ee2e6a285cdL,0x5866f211a9df3af0L,0x40dde2ddf845b844L,
+        0x986c3726110e5e49L } },
+    /* 63 << 238 */
+    { { 0x73680c2af7172277L,0x57b94f0f0cccb244L,0xbdff72672d438ca7L,
+        0xbad1ce11cf4663fdL },
+      { 0x9813ed9dd8f71caeL,0xf43272a6961fdaa6L,0xbeff0119bd6d1637L,
+        0xfebc4f9130361978L } },
+    /* 64 << 238 */
+    { { 0x02b37a952f41deffL,0x0e44a59ae63b89b7L,0x673257dc143ff951L,
+        0x19c02205d752baf4L },
+      { 0x46c23069c4b7d692L,0x2e6392c3fd1502acL,0x6057b1a21b220846L,
+        0xe51ff9460c1b5b63L } },
+    /* 0 << 245 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 245 */
+    { { 0x6e85cb51566c5c43L,0xcff9c9193597f046L,0x9354e90c4994d94aL,
+        0xe0a393322147927dL },
+      { 0x8427fac10dc1eb2bL,0x88cfd8c22ff319faL,0xe2d4e68401965274L,
+        0xfa2e067d67aaa746L } },
+    /* 2 << 245 */
+    { { 0xb6d92a7f3e5f9f11L,0x9afe153ad6cb3b8eL,0x4d1a6dd7ddf800bdL,
+        0xf6c13cc0caf17e19L },
+      { 0x15f6c58e325fc3eeL,0x71095400a31dc3b2L,0x168e7c07afa3d3e7L,
+        0x3f8417a194c7ae2dL } },
+    /* 3 << 245 */
+    { { 0xec234772813b230dL,0x634d0f5f17344427L,0x11548ab1d77fc56aL,
+        0x7fab1750ce06af77L },
+      { 0xb62c10a74f7c4f83L,0xa7d2edc4220a67d9L,0x1c404170921209a0L,
+        0x0b9815a0face59f0L } },
+    /* 4 << 245 */
+    { { 0x2842589b319540c3L,0x18490f59a283d6f8L,0xa2731f84daae9fcbL,
+        0x3db6d960c3683ba0L },
+      { 0xc85c63bb14611069L,0xb19436af0788bf05L,0x905459df347460d2L,
+        0x73f6e094e11a7db1L } },
+    /* 5 << 245 */
+    { { 0xdc7f938eb6357f37L,0xc5d00f792bd8aa62L,0xc878dcb92ca979fcL,
+        0x37e83ed9eb023a99L },
+      { 0x6b23e2731560bf3dL,0x1086e4591d0fae61L,0x782483169a9414bdL,
+        0x1b956bc0f0ea9ea1L } },
+    /* 6 << 245 */
+    { { 0x7b85bb91c31b9c38L,0x0c5aa90b48ef57b5L,0xdedeb169af3bab6fL,
+        0xe610ad732d373685L },
+      { 0xf13870df02ba8e15L,0x0337edb68ca7f771L,0xe4acf747b62c036cL,
+        0xd921d576b6b94e81L } },
+    /* 7 << 245 */
+    { { 0xdbc864392c422f7aL,0xfb635362ed348898L,0x83084668c45bfcd1L,
+        0xc357c9e32b315e11L },
+      { 0xb173b5405b2e5b8cL,0x7e946931e102b9a4L,0x17c890eb7b0fb199L,
+        0xec225a83d61b662bL } },
+    /* 8 << 245 */
+    { { 0xf306a3c8ee3c76cbL,0x3cf11623d32a1f6eL,0xe6d5ab646863e956L,
+        0x3b8a4cbe5c005c26L },
+      { 0xdcd529a59ce6bb27L,0xc4afaa5204d4b16fL,0xb0624a267923798dL,
+        0x85e56df66b307fabL } },
+    /* 9 << 245 */
+    { { 0x0281893c2bf29698L,0x91fc19a4d7ce7603L,0x75a5dca3ad9a558fL,
+        0x40ceb3fa4d50bf77L },
+      { 0x1baf6060bc9ba369L,0x927e1037597888c2L,0xd936bf1986a34c07L,
+        0xd4cf10c1c34ae980L } },
+    /* 10 << 245 */
+    { { 0x3a3e5334859dd614L,0x9c475b5b18d0c8eeL,0x63080d1f07cd51d5L,
+        0xc9c0d0a6b88b4326L },
+      { 0x1ac98691c234296fL,0x2a0a83a494887fb6L,0x565114270cea9cf2L,
+        0x5230a6e8a24802f5L } },
+    /* 11 << 245 */
+    { { 0xf7a2bf0f72e3d5c1L,0x377174464f21439eL,0xfedcbf259ce30334L,
+        0xe0030a787ce202f9L },
+      { 0x6f2d9ebf1202e9caL,0xe79dde6c75e6e591L,0xf52072aff1dac4f8L,
+        0x6c8d087ebb9b404dL } },
+    /* 12 << 245 */
+    { { 0xad0fc73dbce913afL,0x909e587b458a07cbL,0x1300da84d4f00c8aL,
+        0x425cd048b54466acL },
+      { 0xb59cb9be90e9d8bfL,0x991616db3e431b0eL,0xd3aa117a531aecffL,
+        0x91af92d359f4dc3bL } },
+    /* 13 << 245 */
+    { { 0x9b1ec292e93fda29L,0x76bb6c17e97d91bcL,0x7509d95faface1e6L,
+        0x3653fe47be855ae3L },
+      { 0x73180b280f680e75L,0x75eefd1beeb6c26cL,0xa4cdf29fb66d4236L,
+        0x2d70a9976b5821d8L } },
+    /* 14 << 245 */
+    { { 0x7a3ee20720445c36L,0x71d1ac8259877174L,0x0fc539f7949f73e9L,
+        0xd05cf3d7982e3081L },
+      { 0x8758e20b7b1c7129L,0xffadcc20569e61f2L,0xb05d3a2f59544c2dL,
+        0xbe16f5c19fff5e53L } },
+    /* 15 << 245 */
+    { { 0x73cf65b8aad58135L,0x622c2119037aa5beL,0x79373b3f646fd6a0L,
+        0x0e029db50d3978cfL },
+      { 0x8bdfc43794fba037L,0xaefbd687620797a6L,0x3fa5382bbd30d38eL,
+        0x7627cfbf585d7464L } },
+    /* 16 << 245 */
+    { { 0xb2330fef4e4ca463L,0xbcef72873566cc63L,0xd161d2cacf780900L,
+        0x135dc5395b54827dL },
+      { 0x638f052e27bf1bc6L,0x10a224f007dfa06cL,0xe973586d6d3321daL,
+        0x8b0c573826152c8fL } },
+    /* 17 << 245 */
+    { { 0x07ef4f2a34606074L,0x80fe7fe8a0f7047aL,0x3d1a8152e1a0e306L,
+        0x32cf43d888da5222L },
+      { 0xbf89a95f5f02ffe6L,0x3d9eb9a4806ad3eaL,0x012c17bb79c8e55eL,
+        0xfdcd1a7499c81dacL } },
+    /* 18 << 245 */
+    { { 0x7043178bb9556098L,0x4090a1df801c3886L,0x759800ff9b67b912L,
+        0x3e5c0304232620c8L },
+      { 0x4b9d3c4b70dceecaL,0xbb2d3c15181f648eL,0xf981d8376e33345cL,
+        0xb626289b0cf2297aL } },
+    /* 19 << 245 */
+    { { 0x766ac6598baebdcfL,0x1a28ae0975df01e5L,0xb71283da375876d8L,
+        0x4865a96d607b9800L },
+      { 0x25dd1bcd237936b2L,0x332f4f4b60417494L,0xd0923d68370a2147L,
+        0x497f5dfbdc842203L } },
+    /* 20 << 245 */
+    { { 0x9dc74cbd32be5e0fL,0x7475bcb717a01375L,0x438477c950d872b1L,
+        0xcec67879ffe1d63dL },
+      { 0x9b006014d8578c70L,0xc9ad99a878bb6b8bL,0x6799008e11fb3806L,
+        0xcfe81435cd44cab3L } },
+    /* 21 << 245 */
+    { { 0xa2ee15822f4fb344L,0xb8823450483fa6ebL,0x622d323d652c7749L,
+        0xd8474a98beb0a15bL },
+      { 0xe43c154d5d1c00d0L,0x7fd581d90e3e7aacL,0x2b44c6192525ddf8L,
+        0x67a033ebb8ae9739L } },
+    /* 22 << 245 */
+    { { 0x113ffec19ef2d2e4L,0x1bf6767ed5a0ea7fL,0x57fff75e03714c0aL,
+        0xa23c422e0a23e9eeL },
+      { 0xdd5f6b2d540f83afL,0xc2c2c27e55ea46a7L,0xeb6b4246672a1208L,
+        0xd13599f7ae634f7aL } },
+    /* 23 << 245 */
+    { { 0xcf914b5cd7b32c6eL,0x61a5a640eaf61814L,0x8dc3df8b208a1bbbL,
+        0xef627fd6b6d79aa5L },
+      { 0x44232ffcc4c86bc8L,0xe6f9231b061539feL,0x1d04f25a958b9533L,
+        0x180cf93449e8c885L } },
+    /* 24 << 245 */
+    { { 0x896895959884aaf7L,0xb1959be307b348a6L,0x96250e573c147c87L,
+        0xae0efb3add0c61f8L },
+      { 0xed00745eca8c325eL,0x3c911696ecff3f70L,0x73acbc65319ad41dL,
+        0x7b01a020f0b1c7efL } },
+    /* 25 << 245 */
+    { { 0xea32b29363a1483fL,0x89eabe717a248f96L,0x9c6231d3343157e5L,
+        0x93a375e5df3c546dL },
+      { 0xe76e93436a2afe69L,0xc4f89100e166c88eL,0x248efd0d4f872093L,
+        0xae0eb3ea8fe0ea61L } },
+    /* 26 << 245 */
+    { { 0xaf89790d9d79046eL,0x4d650f2d6cee0976L,0xa3935d9a43071ecaL,
+        0x66fcd2c9283b0bfeL },
+      { 0x0e665eb5696605f1L,0xe77e5d07a54cd38dL,0x90ee050a43d950cfL,
+        0x86ddebdad32e69b5L } },
+    /* 27 << 245 */
+    { { 0x6ad94a3dfddf7415L,0xf7fa13093f6e8d5aL,0xc4831d1de9957f75L,
+        0x7de28501d5817447L },
+      { 0x6f1d70789e2aeb6bL,0xba2b9ff4f67a53c2L,0x36963767df9defc3L,
+        0x479deed30d38022cL } },
+    /* 28 << 245 */
+    { { 0xd2edb89b3a8631e8L,0x8de855de7a213746L,0xb2056cb7b00c5f11L,
+        0xdeaefbd02c9b85e4L },
+      { 0x03f39a8dd150892dL,0x37b84686218b7985L,0x36296dd8b7375f1aL,
+        0x472cd4b1b78e898eL } },
+    /* 29 << 245 */
+    { { 0x15dff651e9f05de9L,0xd40450692ce98ba9L,0x8466a7ae9b38024cL,
+        0xb910e700e5a6b5efL },
+      { 0xae1c56eab3aa8f0dL,0xbab2a5077eee74a6L,0x0dca11e24b4c4620L,
+        0xfd896e2e4c47d1f4L } },
+    /* 30 << 245 */
+    { { 0xeb45ae53308fbd93L,0x46cd5a2e02c36fdaL,0x6a3d4e90baa48385L,
+        0xdd55e62e9dbe9960L },
+      { 0xa1406aa02a81ede7L,0x6860dd14f9274ea7L,0xcfdcb0c280414f86L,
+        0xff410b1022f94327L } },
+    /* 31 << 245 */
+    { { 0x5a33cc3849ad467bL,0xefb48b6c0a7335f1L,0x14fb54a4b153a360L,
+        0x604aa9d2b52469ccL },
+      { 0x5e9dc486754e48e9L,0x693cb45537471e8eL,0xfb2fd7cd8d3b37b6L,
+        0x63345e16cf09ff07L } },
+    /* 32 << 245 */
+    { { 0x9910ba6b23a5d896L,0x1fe19e357fe4364eL,0x6e1da8c39a33c677L,
+        0x15b4488b29fd9fd0L },
+      { 0x1f4392541a1f22bfL,0x920a8a70ab8163e8L,0x3fd1b24907e5658eL,
+        0xf2c4f79cb6ec839bL } },
+    /* 33 << 245 */
+    { { 0x1abbc3d04aa38d1bL,0x3b0db35cb5d9510eL,0x1754ac783e60dec0L,
+        0x53272fd7ea099b33L },
+      { 0x5fb0494f07a8e107L,0x4a89e1376a8191faL,0xa113b7f63c4ad544L,
+        0x88a2e9096cb9897bL } },
+    /* 34 << 245 */
+    { { 0x17d55de3b44a3f84L,0xacb2f34417c6c690L,0x3208816810232390L,
+        0xf2e8a61f6c733bf7L },
+      { 0xa774aab69c2d7652L,0xfb5307e3ed95c5bcL,0xa05c73c24981f110L,
+        0x1baae31ca39458c9L } },
+    /* 35 << 245 */
+    { { 0x1def185bcbea62e7L,0xe8ac9eaeeaf63059L,0x098a8cfd9921851cL,
+        0xd959c3f13abe2f5bL },
+      { 0xa4f1952520e40ae5L,0x320789e307a24aa1L,0x259e69277392b2bcL,
+        0x58f6c6671918668bL } },
+    /* 36 << 245 */
+    { { 0xce1db2bbc55d2d8bL,0x41d58bb7f4f6ca56L,0x7650b6808f877614L,
+        0x905e16baf4c349edL },
+      { 0xed415140f661acacL,0x3b8784f0cb2270afL,0x3bc280ac8a402cbaL,
+        0xd53f71460937921aL } },
+    /* 37 << 245 */
+    { { 0xc03c8ee5e5681e83L,0x62126105f6ac9e4aL,0x9503a53f936b1a38L,
+        0x3d45e2d4782fecbdL },
+      { 0x69a5c43976e8ae98L,0xb53b2eebbfb4b00eL,0xf167471272386c89L,
+        0x30ca34a24268bce4L } },
+    /* 38 << 245 */
+    { { 0x7f1ed86c78341730L,0x8ef5beb8b525e248L,0xbbc489fdb74fbf38L,
+        0x38a92a0e91a0b382L },
+      { 0x7a77ba3f22433ccfL,0xde8362d6a29f05a9L,0x7f6a30ea61189afcL,
+        0x693b550559ef114fL } },
+    /* 39 << 245 */
+    { { 0x50266bc0cd1797a1L,0xea17b47ef4b7af2dL,0xd6c4025c3df9483eL,
+        0x8cbb9d9fa37b18c9L },
+      { 0x91cbfd9c4d8424cfL,0xdb7048f1ab1c3506L,0x9eaf641f028206a3L,
+        0xf986f3f925bdf6ceL } },
+    /* 40 << 245 */
+    { { 0x262143b5224c08dcL,0x2bbb09b481b50c91L,0xc16ed709aca8c84fL,
+        0xa6210d9db2850ca8L },
+      { 0x6d8df67a09cb54d6L,0x91eef6e0500919a4L,0x90f613810f132857L,
+        0x9acede47f8d5028bL } },
+    /* 41 << 245 */
+    { { 0x844d1b7190b771c3L,0x563b71e4ba6426beL,0x2efa2e83bdb802ffL,
+        0x3410cbabab5b4a41L },
+      { 0x555b2d2630da84ddL,0xd0711ae9ee1cc29aL,0xcf3e8c602f547792L,
+        0x03d7d5dedc678b35L } },
+    /* 42 << 245 */
+    { { 0x071a2fa8ced806b8L,0x222e6134697f1478L,0xdc16fd5dabfcdbbfL,
+        0x44912ebf121b53b8L },
+      { 0xac9436742496c27cL,0x8ea3176c1ffc26b0L,0xb6e224ac13debf2cL,
+        0x524cc235f372a832L } },
+    /* 43 << 245 */
+    { { 0xd706e1d89f6f1b18L,0x2552f00544cce35bL,0x8c8326c2a88e31fcL,
+        0xb5468b2cf9552047L },
+      { 0xce683e883ff90f2bL,0x77947bdf2f0a5423L,0xd0a1b28bed56e328L,
+        0xaee35253c20134acL } },
+    /* 44 << 245 */
+    { { 0x7e98367d3567962fL,0x379ed61f8188bffbL,0x73bba348faf130a1L,
+        0x6c1f75e1904ed734L },
+      { 0x189566423b4a79fcL,0xf20bc83d54ef4493L,0x836d425d9111eca1L,
+        0xe5b5c318009a8dcfL } },
+    /* 45 << 245 */
+    { { 0x3360b25d13221bc5L,0x707baad26b3eeaf7L,0xd7279ed8743a95a1L,
+        0x7450a875969e809fL },
+      { 0x32b6bd53e5d0338fL,0x1e77f7af2b883bbcL,0x90da12cc1063ecd0L,
+        0xe2697b58c315be47L } },
+    /* 46 << 245 */
+    { { 0x2771a5bdda85d534L,0x53e78c1fff980eeaL,0xadf1cf84900385e7L,
+        0x7d3b14f6c9387b62L },
+      { 0x170e74b0cb8f2bd2L,0x2d50b486827fa993L,0xcdbe8c9af6f32babL,
+        0x55e906b0c3b93ab8L } },
+    /* 47 << 245 */
+    { { 0x747f22fc8fe280d1L,0xcd8e0de5b2e114abL,0x5ab7dbebe10b68b0L,
+        0x9dc63a9ca480d4b2L },
+      { 0x78d4bc3b4be1495fL,0x25eb3db89359122dL,0x3f8ac05b0809cbdcL,
+        0xbf4187bbd37c702fL } },
+    /* 48 << 245 */
+    { { 0x84cea0691416a6a5L,0x8f860c7943ef881cL,0x41311f8a38038a5dL,
+        0xe78c2ec0fc612067L },
+      { 0x494d2e815ad73581L,0xb4cc9e0059604097L,0xff558aecf3612cbaL,
+        0x35beef7a9e36c39eL } },
+    /* 49 << 245 */
+    { { 0x1845c7cfdbcf41b9L,0x5703662aaea997c0L,0x8b925afee402f6d8L,
+        0xd0a1b1ae4dd72162L },
+      { 0x9f47b37503c41c4bL,0xa023829b0391d042L,0x5f5045c3503b8b0aL,
+        0x123c268898c010e5L } },
+    /* 50 << 245 */
+    { { 0x324ec0cc36ba06eeL,0xface31153dd2cc0cL,0xb364f3bef333e91fL,
+        0xef8aff7328e832b0L },
+      { 0x1e9bad042d05841bL,0x42f0e3df356a21e2L,0xa3270bcb4add627eL,
+        0xb09a8158d322e711L } },
+    /* 51 << 245 */
+    { { 0x86e326a10fee104aL,0xad7788f83703f65dL,0x7e76543047bc4833L,
+        0x6cee582b2b9b893aL },
+      { 0x9cd2a167e8f55a7bL,0xefbee3c6d9e4190dL,0x33ee7185d40c2e9dL,
+        0x844cc9c5a380b548L } },
+    /* 52 << 245 */
+    { { 0x323f8ecd66926e04L,0x0001e38f8110c1baL,0x8dbcac12fc6a7f07L,
+        0xd65e1d580cec0827L },
+      { 0xd2cd4141be76ca2dL,0x7895cf5ce892f33aL,0x956d230d367139d2L,
+        0xa91abd3ed012c4c1L } },
+    /* 53 << 245 */
+    { { 0x34fa488387eb36bfL,0xc5f07102914b8fb4L,0x90f0e579adb9c95fL,
+        0xfe6ea8cb28888195L },
+      { 0x7b9b5065edfa9284L,0x6c510bd22b8c8d65L,0xd7b8ebefcbe8aafdL,
+        0xedb3af9896b1da07L } },
+    /* 54 << 245 */
+    { { 0x28ff779d6295d426L,0x0c4f6ac73fa3ad7bL,0xec44d0548b8e2604L,
+        0x9b32a66d8b0050e1L },
+      { 0x1f943366f0476ce2L,0x7554d953a602c7b4L,0xbe35aca6524f2809L,
+        0xb6881229fd4edbeaL } },
+    /* 55 << 245 */
+    { { 0xe8cd0c8f508efb63L,0x9eb5b5c86abcefc7L,0xf5621f5fb441ab4fL,
+        0x79e6c046b76a2b22L },
+      { 0x74a4792ce37a1f69L,0xcbd252cb03542b60L,0x785f65d5b3c20bd3L,
+        0x8dea61434fabc60cL } },
+    /* 56 << 245 */
+    { { 0x45e21446de673629L,0x57f7aa1e703c2d21L,0xa0e99b7f98c868c7L,
+        0x4e42f66d8b641676L },
+      { 0x602884dc91077896L,0xa0d690cfc2c9885bL,0xfeb4da333b9a5187L,
+        0x5f789598153c87eeL } },
+    /* 57 << 245 */
+    { { 0x2192dd4752b16dbaL,0xdeefc0e63524c1b1L,0x465ea76ee4383693L,
+        0x79401711361b8d98L },
+      { 0xa5f9ace9f21a15cbL,0x73d26163efee9aebL,0xcca844b3e677016cL,
+        0x6c122b0757eaee06L } },
+    /* 58 << 245 */
+    { { 0xb782dce715f09690L,0x508b9b122dfc0fc9L,0x9015ab4b65d89fc6L,
+        0x5e79dab7d6d5bb0fL },
+      { 0x64f021f06c775aa2L,0xdf09d8cc37c7eca1L,0x9a761367ef2fa506L,
+        0xed4ca4765b81eec6L } },
+    /* 59 << 245 */
+    { { 0x262ede3610bbb8b5L,0x0737ce830641ada3L,0x4c94288ae9831cccL,
+        0x487fc1ce8065e635L },
+      { 0xb13d7ab3b8bb3659L,0xdea5df3e855e4120L,0xb9a1857385eb0244L,
+        0x1a1b8ea3a7cfe0a3L } },
+    /* 60 << 245 */
+    { { 0x3b83711967b0867cL,0x8d5e0d089d364520L,0x52dccc1ed930f0e3L,
+        0xefbbcec7bf20bbafL },
+      { 0x99cffcab0263ad10L,0xd8199e6dfcd18f8aL,0x64e2773fe9f10617L,
+        0x0079e8e108704848L } },
+    /* 61 << 245 */
+    { { 0x1169989f8a342283L,0x8097799ca83012e6L,0xece966cb8a6a9001L,
+        0x93b3afef072ac7fcL },
+      { 0xe6893a2a2db3d5baL,0x263dc46289bf4fdcL,0x8852dfc9e0396673L,
+        0x7ac708953af362b6L } },
+    /* 62 << 245 */
+    { { 0xbb9cce4d5c2f342bL,0xbf80907ab52d7aaeL,0x97f3d3cd2161bcd0L,
+        0xb25b08340962744dL },
+      { 0xc5b18ea56c3a1ddaL,0xfe4ec7eb06c92317L,0xb787b890ad1c4afeL,
+        0xdccd9a920ede801aL } },
+    /* 63 << 245 */
+    { { 0x9ac6dddadb58da1fL,0x22bbc12fb8cae6eeL,0xc6f8bced815c4a43L,
+        0x8105a92cf96480c7L },
+      { 0x0dc3dbf37a859d51L,0xe3ec7ce63041196bL,0xd9f64b250d1067c9L,
+        0xf23213213d1f8dd8L } },
+    /* 64 << 245 */
+    { { 0x8b5c619c76497ee8L,0x5d2b0ac6c717370eL,0x98204cb64fcf68e1L,
+        0x0bdec21162bc6792L },
+      { 0x6973ccefa63b1011L,0xf9e3fa97e0de1ac5L,0x5efb693e3d0e0c8bL,
+        0x037248e9d2d4fcb4L } },
+    /* 0 << 252 */
+    { { 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 << 252 */
+    { { 0x80802dc91ec34f9eL,0xd8772d3533810603L,0x3f06d66c530cb4f3L,
+        0x7be5ed0dc475c129L },
+      { 0xcb9e3c1931e82b10L,0xc63d2857c9ff6b4cL,0xb92118c692a1b45eL,
+        0x0aec44147285bbcaL } },
+    /* 2 << 252 */
+    { { 0xfc189ae71e29a3efL,0xcbe906f04c93302eL,0xd0107914ceaae10eL,
+        0xb7a23f34b68e19f8L },
+      { 0xe9d875c2efd2119dL,0x03198c6efcadc9c8L,0x65591bf64da17113L,
+        0x3cf0bbf83d443038L } },
+    /* 3 << 252 */
+    { { 0xae485bb72b724759L,0x945353e1b2d4c63aL,0x82159d07de7d6f2cL,
+        0x389caef34ec5b109L },
+      { 0x4a8ebb53db65ef14L,0x2dc2cb7edd99de43L,0x816fa3ed83f2405fL,
+        0x73429bb9c14208a3L } },
+    /* 4 << 252 */
+    { { 0xb618d590b01e6e27L,0x047e2ccde180b2dcL,0xd1b299b504aea4a9L,
+        0x412c9e1e9fa403a4L },
+      { 0x88d28a3679407552L,0x49c50136f332b8e3L,0x3a1b6fcce668de19L,
+        0x178851bc75122b97L } },
+    /* 5 << 252 */
+    { { 0xb1e13752fb85fa4cL,0xd61257ce383c8ce9L,0xd43da670d2f74daeL,
+        0xa35aa23fbf846bbbL },
+      { 0x5e74235d4421fc83L,0xf6df8ee0c363473bL,0x34d7f52a3c4aa158L,
+        0x50d05aab9bc6d22eL } },
+    /* 6 << 252 */
+    { { 0x8c56e735a64785f4L,0xbc56637b5f29cd07L,0x53b2bb803ee35067L,
+        0x50235a0fdc919270L },
+      { 0x191ab6d8f2c4aa65L,0xc34758318396023bL,0x80400ba5f0f805baL,
+        0x8881065b5ec0f80fL } },
+    /* 7 << 252 */
+    { { 0xc370e522cc1b5e83L,0xde2d4ad1860b8bfbL,0xad364df067b256dfL,
+        0x8f12502ee0138997L },
+      { 0x503fa0dc7783920aL,0xe80014adc0bc866aL,0x3f89b744d3064ba6L,
+        0x03511dcdcba5dba5L } },
+    /* 8 << 252 */
+    { { 0x197dd46d95a7b1a2L,0x9c4e7ad63c6341fbL,0x426eca29484c2eceL,
+        0x9211e489de7f4f8aL },
+      { 0x14997f6ec78ef1f4L,0x2b2c091006574586L,0x17286a6e1c3eede8L,
+        0x25f92e470f60e018L } },
+    /* 9 << 252 */
+    { { 0x805c564631890a36L,0x703ef60057feea5bL,0x389f747caf3c3030L,
+        0xe0e5daeb54dd3739L },
+      { 0xfe24a4c3c9c9f155L,0x7e4bf176b5393962L,0x37183de2af20bf29L,
+        0x4a1bd7b5f95a8c3bL } },
+    /* 10 << 252 */
+    { { 0xa83b969946191d3dL,0x281fc8dd7b87f257L,0xb18e2c1354107588L,
+        0x6372def79b2bafe8L },
+      { 0xdaf4bb480d8972caL,0x3f2dd4b756167a3fL,0x1eace32d84310cf4L,
+        0xe3bcefafe42700aaL } },
+    /* 11 << 252 */
+    { { 0x5fe5691ed785e73dL,0xa5db5ab62ea60467L,0x02e23d41dfc6514aL,
+        0x35e8048ee03c3665L },
+      { 0x3f8b118f1adaa0f8L,0x28ec3b4584ce1a5aL,0xe8cacc6e2c6646b8L,
+        0x1343d185dbd0e40fL } },
+    /* 12 << 252 */
+    { { 0xe5d7f844caaa358cL,0x1a1db7e49924182aL,0xd64cd42d9c875d9aL,
+        0xb37b515f042eeec8L },
+      { 0x4d4dd4097b165fbeL,0xfc322ed9e206eff3L,0x7dee410259b7e17eL,
+        0x55a481c08236ca00L } },
+    /* 13 << 252 */
+    { { 0x8c885312c23fc975L,0x1571580605d6297bL,0xa078868ef78edd39L,
+        0x956b31e003c45e52L },
+      { 0x470275d5ff7b33a6L,0xc8d5dc3a0c7e673fL,0x419227b47e2f2598L,
+        0x8b37b6344c14a975L } },
+    /* 14 << 252 */
+    { { 0xd0667ed68b11888cL,0x5e0e8c3e803e25dcL,0x34e5d0dcb987a24aL,
+        0x9f40ac3bae920323L },
+      { 0x5463de9534e0f63aL,0xa128bf926b6328f9L,0x491ccd7cda64f1b7L,
+        0x7ef1ec27c47bde35L } },
+    /* 15 << 252 */
+    { { 0xa857240fa36a2737L,0x35dc136663621bc1L,0x7a3a6453d4fb6897L,
+        0x80f1a439c929319dL },
+      { 0xfc18274bf8cb0ba0L,0xb0b537668078c5ebL,0xfb0d49241e01d0efL,
+        0x50d7c67d372ab09cL } },
+    /* 16 << 252 */
+    { { 0xb4e370af3aeac968L,0xe4f7fee9c4b63266L,0xb4acd4c2e3ac5664L,
+        0xf8910bd2ceb38cbfL },
+      { 0x1c3ae50cc9c0726eL,0x15309569d97b40bfL,0x70884b7ffd5a5a1bL,
+        0x3890896aef8314cdL } },
+    /* 17 << 252 */
+    { { 0x58e1515ca5618c93L,0xe665432b77d942d1L,0xb32181bfb6f767a8L,
+        0x753794e83a604110L },
+      { 0x09afeb7ce8c0dbccL,0x31e02613598673a3L,0x5d98e5577d46db00L,
+        0xfc21fb8c9d985b28L } },
+    /* 18 << 252 */
+    { { 0xc9040116b0843e0bL,0x53b1b3a869b04531L,0xdd1649f085d7d830L,
+        0xbb3bcc87cb7427e8L },
+      { 0x77261100c93dce83L,0x7e79da61a1922a2aL,0x587a2b02f3149ce8L,
+        0x147e1384de92ec83L } },
+    /* 19 << 252 */
+    { { 0x484c83d3af077f30L,0xea78f8440658b53aL,0x912076c2027aec53L,
+        0xf34714e393c8177dL },
+      { 0x37ef5d15c2376c84L,0x8315b6593d1aa783L,0x3a75c484ef852a90L,
+        0x0ba0c58a16086bd4L } },
+    /* 20 << 252 */
+    { { 0x29688d7a529a6d48L,0x9c7f250dc2f19203L,0x123042fb682e2df9L,
+        0x2b7587e7ad8121bcL },
+      { 0x30fc0233e0182a65L,0xb82ecf87e3e1128aL,0x7168286193fb098fL,
+        0x043e21ae85e9e6a7L } },
+    /* 21 << 252 */
+    { { 0xab5b49d666c834eaL,0x3be43e1847414287L,0xf40fb859219a2a47L,
+        0x0e6559e9cc58df3cL },
+      { 0xfe1dfe8e0c6615b4L,0x14abc8fd56459d70L,0x7be0fa8e05de0386L,
+        0x8e63ef68e9035c7cL } },
+    /* 22 << 252 */
+    { { 0x116401b453b31e91L,0x0cba7ad44436b4d8L,0x9151f9a0107afd66L,
+        0xafaca8d01f0ee4c4L },
+      { 0x75fe5c1d9ee9761cL,0x3497a16bf0c0588fL,0x3ee2bebd0304804cL,
+        0xa8fb9a60c2c990b9L } },
+    /* 23 << 252 */
+    { { 0xd14d32fe39251114L,0x36bf25bccac73366L,0xc9562c66dba7495cL,
+        0x324d301b46ad348bL },
+      { 0x9f46620cd670407eL,0x0ea8d4f1e3733a01L,0xd396d532b0c324e0L,
+        0x5b211a0e03c317cdL } },
+    /* 24 << 252 */
+    { { 0x090d7d205ffe7b37L,0x3b7f3efb1747d2daL,0xa2cb525fb54fc519L,
+        0x6e220932f66a971eL },
+      { 0xddc160dfb486d440L,0x7fcfec463fe13465L,0x83da7e4e76e4c151L,
+        0xd6fa48a1d8d302b5L } },
+    /* 25 << 252 */
+    { { 0xc6304f265872cd88L,0x806c1d3c278b90a1L,0x3553e725caf0bc1cL,
+        0xff59e603bb9d8d5cL },
+      { 0xa4550f327a0b85ddL,0xdec5720a93ecc217L,0x0b88b74169d62213L,
+        0x7212f2455b365955L } },
+    /* 26 << 252 */
+    { { 0x20764111b5cae787L,0x13cb7f581dfd3124L,0x2dca77da1175aefbL,
+        0xeb75466bffaae775L },
+      { 0x74d76f3bdb6cff32L,0x7440f37a61fcda9aL,0x1bb3ac92b525028bL,
+        0x20fbf8f7a1975f29L } },
+    /* 27 << 252 */
+    { { 0x982692e1df83097fL,0x28738f6c554b0800L,0xdc703717a2ce2f2fL,
+        0x7913b93c40814194L },
+      { 0x049245931fe89636L,0x7b98443ff78834a6L,0x11c6ab015114a5a1L,
+        0x60deb383ffba5f4cL } },
+    /* 28 << 252 */
+    { { 0x4caa54c601a982e6L,0x1dd35e113491cd26L,0x973c315f7cbd6b05L,
+        0xcab0077552494724L },
+      { 0x04659b1f6565e15aL,0xbf30f5298c8fb026L,0xfc21641ba8a0de37L,
+        0xe9c7a366fa5e5114L } },
+    /* 29 << 252 */
+    { { 0xdb849ca552f03ad8L,0xc7e8dbe9024e35c0L,0xa1a2bbaccfc3c789L,
+        0xbf733e7d9c26f262L },
+      { 0x882ffbf5b8444823L,0xb7224e886bf8483bL,0x53023b8b65bef640L,
+        0xaabfec91d4d5f8cdL } },
+    /* 30 << 252 */
+    { { 0xa40e1510079ea1bdL,0x1ad9addcd05d5d26L,0xdb3f2eab13e68d4fL,
+        0x1cff1ae2640f803fL },
+      { 0xe0e7b749d4cee117L,0x8e9f275b4036d909L,0xce34e31d8f4d4c38L,
+        0x22b37f69d75130fcL } },
+    /* 31 << 252 */
+    { { 0x83e0f1fdb4014604L,0xa8ce991989415078L,0x82375b7541792efeL,
+        0x4f59bf5c97d4515bL },
+      { 0xac4f324f923a277dL,0xd9bc9b7d650f3406L,0xc6fa87d18a39bc51L,
+        0x825885305ccc108fL } },
+    /* 32 << 252 */
+    { { 0x5ced3c9f82e4c634L,0x8efb83143a4464f8L,0xe706381b7a1dca25L,
+        0x6cd15a3c5a2a412bL },
+      { 0x9347a8fdbfcd8fb5L,0x31db2eef6e54cd22L,0xc4aeb11ef8d8932fL,
+        0x11e7c1ed344411afL } },
+    /* 33 << 252 */
+    { { 0x2653050cdc9a151eL,0x9edbfc083bb0a859L,0x926c81c7fd5691e7L,
+        0x9c1b23426f39019aL },
+      { 0x64a81c8b7f8474b9L,0x90657c0701761819L,0x390b333155e0375aL,
+        0xc676c626b6ebc47dL } },
+    /* 34 << 252 */
+    { { 0x51623247b7d6dee8L,0x0948d92779659313L,0x99700161e9ab35edL,
+        0x06cc32b48ddde408L },
+      { 0x6f2fd664061ef338L,0x1606fa02c202e9edL,0x55388bc1929ba99bL,
+        0xc4428c5e1e81df69L } },
+    /* 35 << 252 */
+    { { 0xce2028aef91b0b2aL,0xce870a23f03dfd3fL,0x66ec2c870affe8edL,
+        0xb205fb46284d0c00L },
+      { 0xbf5dffe744cefa48L,0xb6fc37a8a19876d7L,0xbecfa84c08b72863L,
+        0xd7205ff52576374fL } },
+    /* 36 << 252 */
+    { { 0x80330d328887de41L,0x5de0df0c869ea534L,0x13f427533c56ea17L,
+        0xeb1f6069452b1a78L },
+      { 0x50474396e30ea15cL,0x575816a1c1494125L,0xbe1ce55bfe6bb38fL,
+        0xb901a94896ae30f7L } },
+    /* 37 << 252 */
+    { { 0xe5af0f08d8fc3548L,0x5010b5d0d73bfd08L,0x993d288053fe655aL,
+        0x99f2630b1c1309fdL },
+      { 0xd8677bafb4e3b76fL,0x14e51ddcb840784bL,0x326c750cbf0092ceL,
+        0xc83d306bf528320fL } },
+    /* 38 << 252 */
+    { { 0xc445671577d4715cL,0xd30019f96b703235L,0x207ccb2ed669e986L,
+        0x57c824aff6dbfc28L },
+      { 0xf0eb532fd8f92a23L,0x4a557fd49bb98fd2L,0xa57acea7c1e6199aL,
+        0x0c6638208b94b1edL } },
+    /* 39 << 252 */
+    { { 0x9b42be8ff83a9266L,0xc7741c970101bd45L,0x95770c1107bd9cebL,
+        0x1f50250a8b2e0744L },
+      { 0xf762eec81477b654L,0xc65b900e15efe59aL,0x88c961489546a897L,
+        0x7e8025b3c30b4d7cL } },
+    /* 40 << 252 */
+    { { 0xae4065ef12045cf9L,0x6fcb2caf9ccce8bdL,0x1fa0ba4ef2cf6525L,
+        0xf683125dcb72c312L },
+      { 0xa01da4eae312410eL,0x67e286776cd8e830L,0xabd9575298fb3f07L,
+        0x05f11e11eef649a5L } },
+    /* 41 << 252 */
+    { { 0xba47faef9d3472c2L,0x3adff697c77d1345L,0x4761fa04dd15afeeL,
+        0x64f1f61ab9e69462L },
+      { 0xfa691fab9bfb9093L,0x3df8ae8fa1133dfeL,0xcd5f896758cc710dL,
+        0xfbb88d5016c7fe79L } },
+    /* 42 << 252 */
+    { { 0x8e011b4ce88c50d1L,0x7532e807a8771c4fL,0x64c78a48e2278ee4L,
+        0x0b283e833845072aL },
+      { 0x98a6f29149e69274L,0xb96e96681868b21cL,0x38f0adc2b1a8908eL,
+        0x90afcff71feb829dL } },
+    /* 43 << 252 */
+    { { 0x9915a383210b0856L,0xa5a80602def04889L,0x800e9af97c64d509L,
+        0x81382d0bb8996f6fL },
+      { 0x490eba5381927e27L,0x46c63b324af50182L,0x784c5fd9d3ad62ceL,
+        0xe4fa1870f8ae8736L } },
+    /* 44 << 252 */
+    { { 0x4ec9d0bcd7466b25L,0x84ddbe1adb235c65L,0x5e2645ee163c1688L,
+        0x570bd00e00eba747L },
+      { 0xfa51b629128bfa0fL,0x92fce1bd6c1d3b68L,0x3e7361dcb66778b1L,
+        0x9c7d249d5561d2bbL } },
+    /* 45 << 252 */
+    { { 0xa40b28bf0bbc6229L,0x1c83c05edfd91497L,0x5f9f5154f083df05L,
+        0xbac38b3ceee66c9dL },
+      { 0xf71db7e3ec0dfcfdL,0xf2ecda8e8b0a8416L,0x52fddd867812aa66L,
+        0x2896ef104e6f4272L } },
+    /* 46 << 252 */
+    { { 0xff27186a0fe9a745L,0x08249fcd49ca70dbL,0x7425a2e6441cac49L,
+        0xf4a0885aece5ff57L },
+      { 0x6e2cb7317d7ead58L,0xf96cf7d61898d104L,0xafe67c9d4f2c9a89L,
+        0x89895a501c7bf5bcL } },
+    /* 47 << 252 */
+    { { 0xdc7cb8e5573cecfaL,0x66497eaed15f03e6L,0x6bc0de693f084420L,
+        0x323b9b36acd532b0L },
+      { 0xcfed390a0115a3c1L,0x9414c40b2d65ca0eL,0x641406bd2f530c78L,
+        0x29369a44833438f2L } },
+    /* 48 << 252 */
+    { { 0x996884f5903fa271L,0xe6da0fd2b9da921eL,0xa6f2f2695db01e54L,
+        0x1ee3e9bd6876214eL },
+      { 0xa26e181ce27a9497L,0x36d254e48e215e04L,0x42f32a6c252cabcaL,
+        0x9948148780b57614L } },
+    /* 49 << 252 */
+    { { 0x4c4dfe6940d9cae1L,0x0586958011a10f09L,0xca287b573491b64bL,
+        0x77862d5d3fd4a53bL },
+      { 0xbf94856e50349126L,0x2be30bd171c5268fL,0x10393f19cbb650a6L,
+        0x639531fe778cf9fdL } },
+    /* 50 << 252 */
+    { { 0x02556a11b2935359L,0xda38aa96af8c126eL,0x47dbe6c20960167fL,
+        0x37bbabb6501901cdL },
+      { 0xb6e979e02c947778L,0xd69a51757a1a1dc6L,0xc3ed50959d9faf0cL,
+        0x4dd9c0961d5fa5f0L } },
+    /* 51 << 252 */
+    { { 0xa0c4304d64f16ea8L,0x8b1cac167e718623L,0x0b5765467c67f03eL,
+        0x559cf5adcbd88c01L },
+      { 0x074877bb0e2af19aL,0x1f717ec1a1228c92L,0x70bcb800326e8920L,
+        0xec6e2c5c4f312804L } },
+    /* 52 << 252 */
+    { { 0x426aea7d3fca4752L,0xf12c09492211f62aL,0x24beecd87be7b6b5L,
+        0xb77eaf4c36d7a27dL },
+      { 0x154c2781fda78fd3L,0x848a83b0264eeabeL,0x81287ef04ffe2bc4L,
+        0x7b6d88c6b6b6fc2aL } },
+    /* 53 << 252 */
+    { { 0x805fb947ce417d99L,0x4b93dcc38b916cc4L,0x72e65bb321273323L,
+        0xbcc1badd6ea9886eL },
+      { 0x0e2230114bc5ee85L,0xa561be74c18ee1e4L,0x762fd2d4a6bcf1f1L,
+        0x50e6a5a495231489L } },
+    /* 54 << 252 */
+    { { 0xca96001fa00b500bL,0x5c098cfc5d7dcdf5L,0xa64e2d2e8c446a85L,
+        0xbae9bcf1971f3c62L },
+      { 0x4ec226838435a2c5L,0x8ceaed6c4bad4643L,0xe9f8fb47ccccf4e3L,
+        0xbd4f3fa41ce3b21eL } },
+    /* 55 << 252 */
+    { { 0xd79fb110a3db3292L,0xe28a37dab536c66aL,0x279ce87b8e49e6a9L,
+        0x70ccfe8dfdcec8e3L },
+      { 0x2193e4e03ba464b2L,0x0f39d60eaca9a398L,0x7d7932aff82c12abL,
+        0xd8ff50ed91e7e0f7L } },
+    /* 56 << 252 */
+    { { 0xea961058fa28a7e0L,0xc726cf250bf5ec74L,0xe74d55c8db229666L,
+        0x0bd9abbfa57f5799L },
+      { 0x7479ef074dfc47b3L,0xd9c65fc30c52f91dL,0x8e0283fe36a8bde2L,
+        0xa32a8b5e7d4b7280L } },
+    /* 57 << 252 */
+    { { 0x6a677c6112e83233L,0x0fbb3512dcc9bf28L,0x562e8ea50d780f61L,
+        0x0db8b22b1dc4e89cL },
+      { 0x0a6fd1fb89be0144L,0x8c77d246ca57113bL,0x4639075dff09c91cL,
+        0x5b47b17f5060824cL } },
+    /* 58 << 252 */
+    { { 0x58aea2b016287b52L,0xa1343520d0cd8eb0L,0x6148b4d0c5d58573L,
+        0xdd2b6170291c68aeL },
+      { 0xa61b39291da3b3b7L,0x5f946d7908c4ac10L,0x4105d4a57217d583L,
+        0x5061da3d25e6de5eL } },
+    /* 59 << 252 */
+    { { 0x3113940dec1b4991L,0xf12195e136f485aeL,0xa7507fb2731a2ee0L,
+        0x95057a8e6e9e196eL },
+      { 0xa3c2c9112e130136L,0x97dfbb3633c60d15L,0xcaf3c581b300ee2bL,
+        0x77f25d90f4bac8b8L } },
+    /* 60 << 252 */
+    { { 0xdb1c4f986d840cd6L,0x471d62c0e634288cL,0x8ec2f85ecec8a161L,
+        0x41f37cbcfa6f4ae2L },
+      { 0x6793a20f4b709985L,0x7a7bd33befa8985bL,0x2c6a3fbd938e6446L,
+        0x190426192a8d47c1L } },
+    /* 61 << 252 */
+    { { 0x16848667cc36975fL,0x02acf1689d5f1dfbL,0x62d41ad4613baa94L,
+        0xb56fbb929f684670L },
+      { 0xce610d0de9e40569L,0x7b99c65f35489fefL,0x0c88ad1b3df18b97L,
+        0x81b7d9be5d0e9edbL } },
+    /* 62 << 252 */
+    { { 0xd85218c0c716cc0aL,0xf4b5ff9085691c49L,0xa4fd666bce356ac6L,
+        0x17c728954b327a7aL },
+      { 0xf93d5085da6be7deL,0xff71530e3301d34eL,0x4cd96442d8f448e8L,
+        0x9283d3312ed18ffaL } },
+    /* 63 << 252 */
+    { { 0x4d33dd992a849870L,0xa716964b41576335L,0xff5e3a9b179be0e5L,
+        0x5b9d6b1b83b13632L },
+      { 0x3b8bd7d4a52f313bL,0xc9dd95a0637a4660L,0x300359620b3e218fL,
+        0xce1481a3c7b28a3cL } },
+    /* 64 << 252 */
+    { { 0xab41b43a43228d83L,0x24ae1c304ad63f99L,0x8e525f1a46a51229L,
+        0x14af860fcd26d2b4L },
+      { 0xd6baef613f714aa1L,0xf51865adeb78795eL,0xd3e21fcee6a9d694L,
+        0x82ceb1dd8a37b527L } },
+};
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -27591,93 +34510,97 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
-        void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[4];
-    sp_point pd;
+static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
     sp_digit tmpd[2 * 4 * 5];
 #endif
-    sp_point* t;
-    sp_point* p;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* tmp;
     sp_digit* negy;
     int i;
-    ecc_recode_sum v[33];
+    ecc_recode_256 v[37];
     int err;
 
+    (void)g;
     (void)heap;
 
-    err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 4, heap, DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
+    err = sp_256_point_new_4(heap, rtd, rt);
+    if (err == MP_OKAY)
+        err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
 #else
-    t = td;
     tmp = tmpd;
 #endif
     negy = tmp;
 
     if (err == MP_OKAY) {
-        sp_256_ecc_recode_sum_8_4(k, v);
+        sp_256_ecc_recode_7_4(k, v);
 
         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
-        XMEMSET(t, 0, sizeof(sp_point) * 4);
-        for (i=0; i<4; i++) {
-            XMEMCPY(t[i].z, p256_norm_mod, sizeof(p256_norm_mod));
-            t[i].infinity = 1;
-        }
-
-        i = 32;
-        XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
-        XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
-        t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
+        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+        i = 36;
+        XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+        XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+        rt->infinity = !v[i].i;
         for (--i; i>=0; i--) {
-            XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
-            XMEMCPY(p->y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
-            p->infinity = p256_table[i][v[i].i].infinity;
+            XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+            XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+            p->infinity = !v[i].i;
             sp_256_sub_4(negy, p256_mod, p->y);
-            sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
-            sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
-        }
-        sp_256_proj_point_add_4(&t[2], &t[2], &t[3], tmp);
-        sp_256_proj_point_add_4(&t[1], &t[1], &t[3], tmp);
-        sp_256_proj_point_dbl_4(&t[2], &t[2], tmp);
-        sp_256_proj_point_add_4(&t[1], &t[1], &t[2], tmp);
-
-        if (map)
-            sp_256_map_4(r, &t[1], tmp);
-        else
-            XMEMCPY(r, &t[1], sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 4);
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-    }
+            sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg);
+            sp_256_proj_point_add_qz1_4(rt, rt, p, tmp);
+        }
+        if (map != 0) {
+            sp_256_map_4(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5);
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
-#endif
-    sp_ecc_point_free(p, 0, heap);
+    ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5);
+#endif
+    sp_256_point_free_4(p, 0, heap);
+    sp_256_point_free_4(rt, 0, heap);
 
     return MP_OKAY;
 }
 
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_256_ecc_mulmod_add_only_4(r, NULL, p256_table,
+                                      k, map, heap);
+}
+
 #endif /* WOLFSSL_SP_SMALL */
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * r     Resulting point.
@@ -27687,23 +34610,22 @@
  */
 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[4];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
+
+    err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     k = kd;
@@ -27711,26 +34633,24 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 4, km);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_4(point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_4(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
+    }
+#endif
+    sp_256_point_free_4(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
 /* Returns 1 if the number of zero.
  * Implementation is constant time.
  *
@@ -27742,22 +34662,22 @@
     return (a[0] | a[1] | a[2] | a[3]) == 0;
 }
 
-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 /* Add 1 to a. (a = a + 1)
  *
- * r  A single precision integer.
  * a  A single precision integer.
  */
 static void sp_256_add_one_4(sp_digit* a)
 {
     __asm__ __volatile__ (
-        "ldp	x1, x2,  [%[a], 0]\n\t"
-        "ldp	x3, x4,  [%[a], 16]\n\t"
+        "ldp	x1, x2, [%[a], 0]\n\t"
         "adds	x1, x1, #1\n\t"
+        "ldr	x3, [%[a], 16]\n\t"
         "adcs	x2, x2, xzr\n\t"
+        "ldr	x4, [%[a], 24]\n\t"
         "adcs	x3, x3, xzr\n\t"
+        "stp	x1, x2, [%[a], 0]\n\t"
         "adcs	x4, x4, xzr\n\t"
-        "stp	x1, x2, [%[a], 0]\n\t"
         "stp	x3, x4, [%[a], 16]\n\t"
         :
         : [a] "r" (a)
@@ -27765,33 +34685,49 @@
     );
 }
 
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 56) {
-            r[j] &= 0xffffffffffffffffl;
-            s = 64 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j;
+    byte* d;
+
+    for (i = n - 1,j = 0; i >= 7; i -= 8) {
+        r[j]  = ((sp_digit)a[i - 0] <<  0) |
+                ((sp_digit)a[i - 1] <<  8) |
+                ((sp_digit)a[i - 2] << 16) |
+                ((sp_digit)a[i - 3] << 24) |
+                ((sp_digit)a[i - 4] << 32) |
+                ((sp_digit)a[i - 5] << 40) |
+                ((sp_digit)a[i - 6] << 48) |
+                ((sp_digit)a[i - 7] << 56);
+        j++;
+    }
+
+    if (i >= 0) {
         r[j] = 0;
+
+        d = (byte*)r;
+        switch (i) {
+            case 6: d[n - 1 - 6] = a[6]; //fallthrough
+            case 5: d[n - 1 - 5] = a[5]; //fallthrough
+            case 4: d[n - 1 - 4] = a[4]; //fallthrough
+            case 3: d[n - 1 - 3] = a[3]; //fallthrough
+            case 2: d[n - 1 - 2] = a[2]; //fallthrough
+            case 1: d[n - 1 - 1] = a[1]; //fallthrough
+            case 0: d[n - 1 - 0] = a[0]; //fallthrough
+        }
+        j++;
+    }
+
+    for (; j < size; j++) {
+        r[j] = 0;
+    }
 }
 
 /* Generates a scalar that is in the range 1..order-1.
@@ -27809,7 +34745,7 @@
     do {
         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
         if (err == 0) {
-            sp_256_from_bin(k, 4, buf, sizeof(buf));
+            sp_256_from_bin(k, 4, buf, (int)sizeof(buf));
             if (sp_256_cmp_4(k, p256_order2) < 0) {
                 sp_256_add_one_4(k);
                 break;
@@ -27832,87 +34768,80 @@
  */
 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[4];
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point inf;
-#endif
-#endif
-    sp_point* point;
+    sp_point_256 inf;
+#endif
+#endif
+    sp_point_256* point;
     sp_digit* k = NULL;
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point* infinity;
+    sp_point_256* infinity;
 #endif
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
+    err = sp_256_point_new_4(heap, p, point);
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, inf, infinity);
-#endif
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     k = kd;
 #endif
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_ecc_gen_k_4(rng, k);
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, NULL);
-        else
-#endif
+    }
+    if (err == MP_OKAY) {
             err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
     }
 
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1,
-                                                                          NULL);
-        }
-        else
-#endif
             err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL);
     }
     if (err == MP_OKAY) {
-        if (!sp_256_iszero_4(point->x) || !sp_256_iszero_4(point->y))
+        if ((sp_256_iszero_4(point->x) == 0) || (sp_256_iszero_4(point->y) == 0)) {
             err = ECC_INF_E;
-    }
-#endif
-
-    if (err == MP_OKAY)
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(k, priv);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_4(point, pub);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_ecc_point_free(infinity, 1, heap);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    sp_256_point_free_4(infinity, 1, heap);
+#endif
+    sp_256_point_free_4(point, 1, heap);
 
     return err;
 }
 
 #ifdef HAVE_ECC_DHE
-/* Write r as big endian to byte aray.
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
  *
  * r  A single precision integer.
@@ -27920,25 +34849,17 @@
  */
 static void sp_256_to_bin(sp_digit* r, byte* a)
 {
-    int i, j, s = 0, b;
-
-    j = 256 / 8 - 1;
-    a[j] = 0;
-    for (i=0; i<4 && j>=0; i++) {
-        b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
-            break;
-        while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
-        }
-        s = 8 - (b - 64);
-        if (j >= 0)
-            a[j] = 0;
-        if (s != 0)
-            j++;
+    int i, j;
+
+    for (i = 3, j = 0; i >= 0; i--) {
+        a[j++] = r[i] >> 56;
+        a[j++] = r[i] >> 48;
+        a[j++] = r[i] >> 40;
+        a[j++] = r[i] >> 32;
+        a[j++] = r[i] >> 24;
+        a[j++] = r[i] >> 16;
+        a[j++] = r[i] >> 8;
+        a[j++] = r[i] >> 0;
     }
 }
 
@@ -27957,25 +34878,25 @@
 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
                           word32* outLen, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[4];
 #endif
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 32)
+
+    if (*outLen < 32U) {
         err = BUFFER_E;
-
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -27986,11 +34907,6 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 4, priv);
         sp_256_point_from_ecc_point_4(point, pub);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(point, point, k, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_4(point, point, k, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -27998,11 +34914,12 @@
         *outLen = 32;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_4(point, 0, heap);
 
     return err;
 }
@@ -28018,26 +34935,24 @@
 static sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x3, x4, [%[a], 0]\n\t"
-        "ldp	x5, x6, [%[a], 16]\n\t"
         "ldp	x7, x8, [%[b], 0]\n\t"
-        "ldp	x9, x10, [%[b], 16]\n\t"
         "adds	x3, x3, x7\n\t"
-        "adcs	x4, x4, x8\n\t"
-        "adcs	x5, x5, x9\n\t"
-        "adcs	x6, x6, x10\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
         "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
         "stp	x5, x6, [%[r], 16]\n\t"
-        "cset	%[c], cs\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
         : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
     );
 
-    return c;
+    return (sp_digit)r;
 }
 
 #endif
@@ -28105,119 +35020,113 @@
     sp_digit tmp[4];
 
     __asm__ __volatile__ (
-        "ldp	x8, x9, [%[a], 0]\n\t"
-        "ldp	x10, x11, [%[a], 16]\n\t"
-        "ldp	x12, x13, [%[b], 0]\n\t"
-        "ldp	x14, x15, [%[b], 16]\n\t"
+        "ldp       x16, x17, [%[a], 0]\n\t"
+        "ldp       x21, x22, [%[b], 0]\n\t"
         "#  A[0] * B[0]\n\t"
-        "mul	x3, x8, x12\n\t"
-        "umulh	x4, x8, x12\n\t"
-        "str	x3, [%[tmp]]\n\t"
+        "mul       x8, x16, x21\n\t"
+        "ldr       x19, [%[a], 16]\n\t"
+        "umulh     x9, x16, x21\n\t"
+        "ldr       x23, [%[b], 16]\n\t"
         "#  A[0] * B[1]\n\t"
-        "mul	x6, x8, x13\n\t"
-        "umulh	x7, x8, x13\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adc	x5, xzr, x7\n\t"
+        "mul       x4, x16, x22\n\t"
+        "ldr       x20, [%[a], 24]\n\t"
+        "umulh     x5, x16, x22\n\t"
+        "ldr       x24, [%[b], 24]\n\t"
+        "adds  x9, x9, x4\n\t"
         "#  A[1] * B[0]\n\t"
-        "mul	x6, x9, x12\n\t"
-        "umulh	x7, x9, x12\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
-        "str	x4, [%[tmp], 8]\n\t"
+        "mul       x4, x17, x21\n\t"
+        "adc   x10, xzr, x5\n\t"
+        "umulh     x5, x17, x21\n\t"
+        "adds  x9, x9, x4\n\t"
         "#  A[0] * B[2]\n\t"
-        "mul	x6, x8, x14\n\t"
-        "umulh	x7, x8, x14\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "mul       x4, x16, x23\n\t"
+        "adcs   x10, x10, x5\n\t"
+        "umulh     x5, x16, x23\n\t"
+        "adc     x11, xzr, xzr\n\t"
+        "adds  x10, x10, x4\n\t"
         "#  A[1] * B[1]\n\t"
-        "mul	x6, x9, x13\n\t"
-        "umulh	x7, x9, x13\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul       x4, x17, x22\n\t"
+        "adc   x11, x11, x5\n\t"
+        "umulh     x5, x17, x22\n\t"
+        "adds  x10, x10, x4\n\t"
         "#  A[2] * B[0]\n\t"
-        "mul	x6, x10, x12\n\t"
-        "umulh	x7, x10, x12\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[tmp], 16]\n\t"
+        "mul       x4, x19, x21\n\t"
+        "adcs   x11, x11, x5\n\t"
+        "umulh     x5, x19, x21\n\t"
+        "adc     x12, xzr, xzr\n\t"
+        "adds  x10, x10, x4\n\t"
         "#  A[0] * B[3]\n\t"
-        "mul	x6, x8, x15\n\t"
-        "umulh	x7, x8, x15\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, xzr, xzr\n\t"
+        "mul       x4, x16, x24\n\t"
+        "adcs   x11, x11, x5\n\t"
+        "umulh     x5, x16, x24\n\t"
+        "adc     x12, x12, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
         "#  A[1] * B[2]\n\t"
-        "mul	x6, x9, x14\n\t"
-        "umulh	x7, x9, x14\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul       x4, x17, x23\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x17, x23\n\t"
+        "adc     x13, xzr, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
         "#  A[2] * B[1]\n\t"
-        "mul	x6, x10, x13\n\t"
-        "umulh	x7, x10, x13\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
+        "mul       x4, x19, x22\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x19, x22\n\t"
+        "adc     x13, x13, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
         "#  A[3] * B[0]\n\t"
-        "mul	x6, x11, x12\n\t"
-        "umulh	x7, x11, x12\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adcs	x4, x4, x7\n\t"
-        "adc	x5, x5, xzr\n\t"
-        "str	x3, [%[tmp], 24]\n\t"
+        "mul       x4, x20, x21\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x20, x21\n\t"
+        "adc     x13, x13, xzr\n\t"
+        "adds  x11, x11, x4\n\t"
         "#  A[1] * B[3]\n\t"
-        "mul	x6, x9, x15\n\t"
-        "umulh	x7, x9, x15\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, xzr, xzr\n\t"
+        "mul       x4, x17, x24\n\t"
+        "adcs   x12, x12, x5\n\t"
+        "umulh     x5, x17, x24\n\t"
+        "adc     x13, x13, xzr\n\t"
+        "adds  x12, x12, x4\n\t"
         "#  A[2] * B[2]\n\t"
-        "mul	x6, x10, x14\n\t"
-        "umulh	x7, x10, x14\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
+        "mul       x4, x19, x23\n\t"
+        "adcs   x13, x13, x5\n\t"
+        "umulh     x5, x19, x23\n\t"
+        "adc     x14, xzr, xzr\n\t"
+        "adds  x12, x12, x4\n\t"
         "#  A[3] * B[1]\n\t"
-        "mul	x6, x11, x13\n\t"
-        "umulh	x7, x11, x13\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 32]\n\t"
+        "mul       x4, x20, x22\n\t"
+        "adcs   x13, x13, x5\n\t"
+        "umulh     x5, x20, x22\n\t"
+        "adc     x14, x14, xzr\n\t"
+        "adds  x12, x12, x4\n\t"
         "#  A[2] * B[3]\n\t"
-        "mul	x6, x10, x15\n\t"
-        "umulh	x7, x10, x15\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, xzr, xzr\n\t"
+        "mul       x4, x19, x24\n\t"
+        "adcs   x13, x13, x5\n\t"
+        "umulh     x5, x19, x24\n\t"
+        "adc     x14, x14, xzr\n\t"
+        "adds  x13, x13, x4\n\t"
         "#  A[3] * B[2]\n\t"
-        "mul	x6, x11, x14\n\t"
-        "umulh	x7, x11, x14\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x5, [%[r], 40]\n\t"
+        "mul       x4, x20, x23\n\t"
+        "adcs   x14, x14, x5\n\t"
+        "umulh     x5, x20, x23\n\t"
+        "adc     x15, xzr, xzr\n\t"
+        "adds  x13, x13, x4\n\t"
         "#  A[3] * B[3]\n\t"
-        "mul	x6, x11, x15\n\t"
-        "umulh	x7, x11, x15\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adc	x4, x4, x7\n\t"
-        "stp	x3, x4, [%[r], 48]\n\t"
+        "mul       x4, x20, x24\n\t"
+        "adcs   x14, x14, x5\n\t"
+        "umulh     x5, x20, x24\n\t"
+        "adc     x15, x15, xzr\n\t"
+        "adds  x14, x14, x4\n\t"
+        "adc   x15, x15, x5\n\t"
+        "stp	x8, x9, [%[r], 0]\n\t"
+        "stp	x10, x11, [%[r], 16]\n\t"
+        "stp	x12, x13, [%[r], 32]\n\t"
+        "stp	x14, x15, [%[r], 48]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#endif /* WOLFSSL_SP_SMALL */
-#ifdef HAVE_INTEL_AVX2
-#endif /* HAVE_INTEL_AVX2 */
+        : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* Sub b from a into a. (a -= b)
@@ -28227,26 +35136,24 @@
  */
 static sp_digit sp_256_sub_in_place_4(sp_digit* a, const sp_digit* b)
 {
-    sp_digit c = 0;
-
     __asm__ __volatile__ (
         "ldp	x2, x3, [%[a], 0]\n\t"
-        "ldp	x4, x5, [%[a], 16]\n\t"
         "ldp	x6, x7, [%[b], 0]\n\t"
-        "ldp	x8, x9, [%[b], 16]\n\t"
         "subs	x2, x2, x6\n\t"
-        "sbcs	x3, x3, x7\n\t"
-        "sbcs	x4, x4, x8\n\t"
-        "sbcs	x5, x5, x9\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
         "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
         "stp	x4, x5, [%[a], 16]\n\t"
-        "csetm	%[c], cc\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
         : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 
-    return c;
+    return (sp_digit)a;
 }
 
 /* Mul a by digit b into r. (r = a * b)
@@ -28256,44 +35163,35 @@
  * b  A single precision digit.
  */
 static void sp_256_mul_d_4(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
+        sp_digit b)
 {
     __asm__ __volatile__ (
         "# A[0] * B\n\t"
-        "ldr	x8, [%[a]]\n\t"
-        "mul	x3, %[b], x8\n\t"
-        "umulh	x4, %[b], x8\n\t"
-        "mov	x5, 0\n\t"
-        "str	x3, [%[r]]\n\t"
+        "ldp	x2, x3, [%[a]]\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "umulh	x7, %[b], x2\n\t"
+        "mul	x2, %[b], x2\n\t"
         "# A[1] * B\n\t"
-        "ldr		x8, [%[a], 8]\n\t"
-        "mov		x3, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x4, x4, x6\n\t"
-        "adcs	x5, x5, x7\n\t"
-        "adc		x3, xzr, xzr\n\t"
-        "str		x4, [%[r], 8]\n\t"
+        "mul	x8, %[b], x3\n\t"
+        "umulh	x9, %[b], x3\n\t"
+        "adds	x3, x7, x8\n\t"
         "# A[2] * B\n\t"
-        "ldr		x8, [%[a], 16]\n\t"
-        "mov		x4, 0\n\t"
-        "mul		x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x5, x5, x6\n\t"
-        "adcs	x3, x3, x7\n\t"
-        "adc		x4, xzr, xzr\n\t"
-        "str		x5, [%[r], 16]\n\t"
+        "mul	x8, %[b], x4\n\t"
+        "adc	x7, xzr, x9\n\t"
+        "umulh	x9, %[b], x4\n\t"
+        "adds	x4, x7, x8\n\t"
         "# A[3] * B\n\t"
-        "ldr	x8, [%[a], 24]\n\t"
-        "mul	x6, %[b], x8\n\t"
-        "umulh	x7, %[b], x8\n\t"
-        "adds	x3, x3, x6\n\t"
-        "adc	x4, x4, x7\n\t"
-        "str	x3, [%[r], 24]\n\t"
-        "str	x4, [%[r], 32]\n\t"
+        "mul	x8, %[b], x5\n\t"
+        "adc	x7, xzr, x9\n\t"
+        "umulh	x9, %[b], x5\n\t"
+        "adds	x5, x7, x8\n\t"
+        "str	x2, [%[r]]\n\t"
+        "adc	x6, xzr, x9\n\t"
+        "stp	x3, x4, [%[r], 8]\n\t"
+        "stp	x5, x6, [%[r], 24]\n\t"
         :
         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8"
+        : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
     );
 }
 
@@ -28327,8 +35225,8 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
@@ -28337,21 +35235,16 @@
         "subs	%[d0], %[d0], x4\n\t"
         "sbc	%[d1], %[d1], x3\n\t"
 
-        "lsl	x3, %[d1], 32\n\t"
-        "orr	x3, x3, %[d0], lsr 32\n\t"
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
 
         "udiv	x3, x3, x5\n\t"
         "add	x6, x6, x3\n\t"
         "mul	x4, %[div], x3\n\t"
-        "umulh	x3, %[div], x3\n\t"
-        "subs	%[d0], %[d0], x4\n\t"
-        "sbc	%[d1], %[d1], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
 
         "udiv	x3, %[d0], %[div]\n\t"
-        "add	x6, x6, x3\n\t"
-        "mul	x3, %[div], x3\n\t"
-        "sub	%[d0], %[d0], x3\n\t"
-        "mov	%[r], x6\n\t"
+        "add	%[r], x6, x3\n\t"
 
         : [r] "=r" (r)
         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
@@ -28367,13 +35260,14 @@
  * a  A single precision integer.
  * m  Mask to AND against each digit.
  */
-static void sp_256_mask_4(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<4; i++)
+static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<4; i++) {
         r[i] = a[i] & m;
+    }
 #else
     r[0] = a[0] & m;
     r[1] = a[1] & m;
@@ -28391,7 +35285,7 @@
  * r  Remainder from the division.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_256_div_4(sp_digit* a, sp_digit* d, sp_digit* m,
+static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     sp_digit t1[8], t2[5];
@@ -28415,7 +35309,7 @@
     }
 
     r1 = sp_256_cmp_4(t1, d) >= 0;
-    sp_256_cond_sub_4(r, t1, t2, (sp_digit)0 - r1);
+    sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
 
     return MP_OKAY;
 }
@@ -28427,80 +35321,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MP_OKAY indicating success.
  */
-static WC_INLINE int sp_256_mod_4(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_256_div_4(a, m, NULL, r);
 }
 
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-#ifdef WOLFSSL_SP_SMALL
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-static void sp_256_sqr_4(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[8];
-
-    __asm__ __volatile__ (
-        "mov	x6, 0\n\t"
-        "mov	x7, 0\n\t"
-        "mov	x8, 0\n\t"
-        "mov	x5, 0\n\t"
-        "\n1:\n\t"
-        "subs	x3, x5, 24\n\t"
-        "csel	x3, xzr, x3, cc\n\t"
-        "sub	x4, x5, x3\n\t"
-        "\n2:\n\t"
-        "cmp	x4, x3\n\t"
-        "b.eq	4f\n\t"
-        "ldr	x10, [%[a], x3]\n\t"
-        "ldr	x11, [%[a], x4]\n\t"
-        "mul	x9, x10, x11\n\t"
-        "umulh	x10, x10, x11\n\t"
-        "adds	x6, x6, x9\n\t"
-        "adcs	x7, x7, x10\n\t"
-        "adc	x8, x8, xzr\n\t"
-        "adds	x6, x6, x9\n\t"
-        "adcs	x7, x7, x10\n\t"
-        "adc	x8, x8, xzr\n\t"
-        "b.al	5f\n\t"
-        "\n4:\n\t"
-        "ldr	x10, [%[a], x3]\n\t"
-        "mul	x9, x10, x10\n\t"
-        "umulh	x10, x10, x10\n\t"
-        "adds	x6, x6, x9\n\t"
-        "adcs	x7, x7, x10\n\t"
-        "adc	x8, x8, xzr\n\t"
-        "\n5:\n\t"
-        "add	x3, x3, #8\n\t"
-        "sub	x4, x4, #8\n\t"
-        "cmp	x3, 32\n\t"
-        "b.eq	3f\n\t"
-        "cmp	x3, x4\n\t"
-        "b.gt	3f\n\t"
-        "cmp	x3, x5\n\t"
-        "b.le	2b\n\t"
-        "\n3:\n\t"
-        "str	x6, [%[r], x5]\n\t"
-        "mov	x6, x7\n\t"
-        "mov	x7, x8\n\t"
-        "mov	x8, #0\n\t"
-        "add	x5, x5, #8\n\t"
-        "cmp	x5, 48\n\t"
-        "b.le	1b\n\t"
-        "str	x6, [%[r], x5]\n\t"
-        :
-        : [r] "r" (tmp), [a] "r" (a)
-        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#else
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
@@ -28508,112 +35335,86 @@
  */
 static void sp_256_sqr_4(sp_digit* r, const sp_digit* a)
 {
-    sp_digit tmp[4];
-
-    __asm__ __volatile__ (
-        "ldp	x10, x11, [%[a], 0]\n\t"
-        "ldp	x12, x13, [%[a], 16]\n\t"
-        "#  A[0] * A[0]\n\t"
-        "mul	x2, x10, x10\n\t"
-        "umulh	x3, x10, x10\n\t"
-        "str	x2, [%[tmp]]\n\t"
-        "mov	x4, 0\n\t"
+    __asm__ __volatile__ (
+        "ldp       x16, x17, [%[a], 0]\n\t"
         "#  A[0] * A[1]\n\t"
-        "mul	x8, x10, x11\n\t"
-        "umulh	x9, x10, x11\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, xzr, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[tmp], 8]\n\t"
+        "mul	x9, x16, x17\n\t"
+        "ldr       x19, [%[a], 16]\n\t"
+        "umulh	x10, x16, x17\n\t"
+        "ldr       x20, [%[a], 24]\n\t"
         "#  A[0] * A[2]\n\t"
-        "mul	x8, x10, x12\n\t"
-        "umulh	x9, x10, x12\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, xzr, xzr\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "#  A[1] * A[1]\n\t"
-        "mul	x8, x11, x11\n\t"
-        "umulh	x9, x11, x11\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[tmp], 16]\n\t"
+        "mul	x4, x16, x19\n\t"
+        "umulh	x5, x16, x19\n\t"
+        "adds	x10, x10, x4\n\t"
         "#  A[0] * A[3]\n\t"
-        "mul	x8, x10, x13\n\t"
-        "umulh	x9, x10, x13\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, xzr, xzr\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
+        "mul	x4, x16, x20\n\t"
+        "adc	x11, xzr, x5\n\t"
+        "umulh	x5, x16, x20\n\t"
+        "adds	x11, x11, x4\n\t"
         "#  A[1] * A[2]\n\t"
-        "mul	x8, x11, x12\n\t"
-        "umulh	x9, x11, x12\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adcs	x3, x3, x9\n\t"
-        "adc	x4, x4, xzr\n\t"
-        "str	x2, [%[tmp], 24]\n\t"
+        "mul	x4, x17, x19\n\t"
+        "adc	x12, xzr, x5\n\t"
+        "umulh	x5, x17, x19\n\t"
+        "adds	x11, x11, x4\n\t"
         "#  A[1] * A[3]\n\t"
-        "mul	x8, x11, x13\n\t"
-        "umulh	x9, x11, x13\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, xzr, xzr\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
+        "mul	x4, x17, x20\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x17, x20\n\t"
+        "adc	x13, xzr, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
+        "#  A[2] * A[3]\n\t"
+        "mul	x4, x19, x20\n\t"
+        "adc	x13, x13, x5\n\t"
+        "umulh	x5, x19, x20\n\t"
+        "adds	x13, x13, x4\n\t"
+        "adc	x14, xzr, x5\n\t"
+        "# Double\n\t"
+        "adds	x9, x9, x9\n\t"
+        "adcs	x10, x10, x10\n\t"
+        "adcs	x11, x11, x11\n\t"
+        "adcs	x12, x12, x12\n\t"
+        "adcs	x13, x13, x13\n\t"
+        "#  A[0] * A[0]\n\t"
+        "mul	x8, x16, x16\n\t"
+        "adcs	x14, x14, x14\n\t"
+        "umulh	x3, x16, x16\n\t"
+        "cset	x15, cs\n\t"
+        "#  A[1] * A[1]\n\t"
+        "mul	x4, x17, x17\n\t"
+        "adds	x9, x9, x3\n\t"
+        "umulh	x5, x17, x17\n\t"
+        "adcs	x10, x10, x4\n\t"
         "#  A[2] * A[2]\n\t"
-        "mul	x8, x12, x12\n\t"
-        "umulh	x9, x12, x12\n\t"
-        "adds	x3, x3, x8\n\t"
-        "adcs	x4, x4, x9\n\t"
-        "adc	x2, x2, xzr\n\t"
-        "str	x3, [%[r], 32]\n\t"
-        "#  A[2] * A[3]\n\t"
-        "mul	x8, x12, x13\n\t"
-        "umulh	x9, x12, x13\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, xzr, xzr\n\t"
-        "adds	x4, x4, x8\n\t"
-        "adcs	x2, x2, x9\n\t"
-        "adc	x3, x3, xzr\n\t"
-        "str	x4, [%[r], 40]\n\t"
+        "mul	x6, x19, x19\n\t"
+        "adcs	x11, x11, x5\n\t"
+        "umulh	x7, x19, x19\n\t"
+        "adcs	x12, x12, x6\n\t"
         "#  A[3] * A[3]\n\t"
-        "mul	x8, x13, x13\n\t"
-        "umulh	x9, x13, x13\n\t"
-        "adds	x2, x2, x8\n\t"
-        "adc	x3, x3, x9\n\t"
-        "stp	x2, x3, [%[r], 48]\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+        "mul	x16, x20, x20\n\t"
+        "adcs	x13, x13, x7\n\t"
+        "umulh	x17, x20, x20\n\t"
+        "adcs	x14, x14, x16\n\t"
+        "adc	x15, x15, x17\n\t"
+        "stp	x8, x9, [%[r], 0]\n\t"
+        "stp	x10, x11, [%[r], 16]\n\t"
+        "stp	x12, x13, [%[r], 32]\n\t"
+        "stp	x14, x15, [%[r], 48]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20"
+    );
+}
+
 #ifdef WOLFSSL_SP_SMALL
 /* Order-2 for the P256 curve. */
-static const uint64_t p256_order_2[4] = {
-    0xf3b9cac2fc63254f,0xbce6faada7179e84,0xffffffffffffffff,
-    0xffffffff00000000
+static const uint64_t p256_order_minus_2[4] = {
+    0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU,
+    0xffffffff00000000U
 };
 #else
 /* The low half of the order-2 of the P256 curve. */
 static const uint64_t p256_order_low[2] = {
-    0xf3b9cac2fc63254f,0xbce6faada7179e84
+    0xf3b9cac2fc63254fU,0xbce6faada7179e84U
 };
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -28623,10 +35424,10 @@
  * a  First operand of the multiplication.
  * b  Second operand of the multiplication.
  */
-static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
+static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
     sp_256_mul_4(r, a, b);
-    sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
 }
 
 /* Square number mod the order of P256 curve. (r = a * a mod order)
@@ -28634,10 +35435,10 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_order_4(sp_digit* r, sp_digit* a)
+static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a)
 {
     sp_256_sqr_4(r, a);
-    sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
 }
 
 #ifndef WOLFSSL_SP_SMALL
@@ -28647,13 +35448,14 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_n_order_4(sp_digit* r, sp_digit* a, int n)
+static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n)
 {
     int i;
 
     sp_256_mont_sqr_order_4(r, a);
-    for (i=1; i<n; i++)
+    for (i=1; i<n; i++) {
         sp_256_mont_sqr_order_4(r, r);
+    }
 }
 #endif /* !WOLFSSL_SP_SMALL */
 
@@ -28664,7 +35466,7 @@
  * a   Number to invert.
  * td  Temporary data.
  */
-static void sp_256_mont_inv_order_4(sp_digit* r, sp_digit* a,
+static void sp_256_mont_inv_order_4(sp_digit* r, const sp_digit* a,
         sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
@@ -28674,10 +35476,11 @@
     XMEMCPY(t, a, sizeof(sp_digit) * 4);
     for (i=254; i>=0; i--) {
         sp_256_mont_sqr_order_4(t, t);
-        if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
+        if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_4(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 4U);
 #else
     sp_digit* t = td;
     sp_digit* t2 = td + 2 * 4;
@@ -28715,8 +35518,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
     for (i=127; i>=112; i--) {
         sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_4(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
     sp_256_mont_sqr_n_order_4(t2, t2, 4);
@@ -28724,8 +35528,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
     for (i=107; i>=64; i--) {
         sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_4(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
     sp_256_mont_sqr_n_order_4(t2, t2, 4);
@@ -28733,8 +35538,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
     for (i=59; i>=32; i--) {
         sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_4(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
     sp_256_mont_sqr_n_order_4(t2, t2, 4);
@@ -28742,8 +35548,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
     for (i=27; i>=0; i--) {
         sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_4(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
     sp_256_mont_sqr_n_order_4(t2, t2, 4);
@@ -28752,143 +35559,6 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
- *
- * r  Result of the multiplication.
- * a  First operand of the multiplication.
- * b  Second operand of the multiplication.
- */
-static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
-{
-    sp_256_mul_avx2_4(r, a, b);
-    sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
-}
-
-/* Square number mod the order of P256 curve. (r = a * a mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, sp_digit* a)
-{
-    sp_256_sqr_avx2_4(r, a);
-    sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square number mod the order of P256 curve a number of times.
- * (r = a ^ n mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, sp_digit* a, int n)
-{
-    int i;
-
-    sp_256_mont_sqr_order_avx2_4(r, a);
-    for (i=1; i<n; i++)
-        sp_256_mont_sqr_order_avx2_4(r, r);
-}
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
- * (r = 1 / a mod order)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_order_avx2_4(sp_digit* r, sp_digit* a,
-        sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 4);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_4(t, t);
-        if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 4;
-    sp_digit* t3 = td + 4 * 4;
-    int i;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_order_avx2_4(t, a);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_order_avx2_4(t, t, a);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 2);
-    /* t3= a^f = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t3, t2, t);
-    /* t2= a^f0 = t3 ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t3, 4);
-    /* t = a^ff = t2 * t3 */
-    sp_256_mont_mul_order_avx2_4(t, t2, t3);
-    /* t3= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 8);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t, t2, t);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 16);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t, t2, t);
-    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 64);
-    /* t2= a^ffffffff00000000ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 32);
-    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
-    for (i=127; i>=112; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
-    for (i=107; i>=64; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
-    for (i=59; i>=32; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
-    for (i=27; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
-    sp_256_mont_mul_order_avx2_4(r, t2, t3);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-#endif /* HAVE_INTEL_AVX2 */
 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 #ifdef HAVE_ECC_SIGN
 #ifndef SP_ECC_MAX_SIG_GEN
@@ -28912,114 +35582,102 @@
  * MP_OKAY on success.
  */
 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
-                    mp_int* rm, mp_int* sm, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
 #else
     sp_digit ed[2*4];
     sp_digit xd[2*4];
     sp_digit kd[2*4];
     sp_digit rd[2*4];
     sp_digit td[3 * 2*4];
-    sp_point p;
+    sp_point_256 p;
 #endif
     sp_digit* e = NULL;
     sp_digit* x = NULL;
     sp_digit* k = NULL;
     sp_digit* r = NULL;
     sp_digit* tmp = NULL;
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit carry;
-    sp_digit* s;
-    sp_digit* kInv;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
     int err = MP_OKAY;
     int64_t c;
     int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            e = d + 0 * 4;
-            x = d + 2 * 4;
-            k = d + 4 * 4;
-            r = d + 6 * 4;
-            tmp = d + 8 * 4;
-        }
-        else
+    err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
             err = MEMORY_E;
-    }
-#else
-    e = ed;
-    x = xd;
-    k = kd;
-    r = rd;
-    tmp = td;
-#endif
-    s = e;
-    kInv = k;
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(e, 4, hash, hashLen);
-        sp_256_from_mp(x, 4, priv);
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 4;
+        x = d + 2 * 4;
+        k = d + 4 * 4;
+        r = d + 6 * 4;
+        tmp = d + 8 * 4;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(e, 4, hash, (int)hashLen);
     }
 
     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 4, priv);
+
         /* New random point. */
-        err = sp_256_ecc_gen_k_4(rng, k);
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_256_ecc_gen_k_4(rng, k);
+        }
+        else {
+            sp_256_from_mp(k, 4, km);
+            mp_zero(km);
+        }
         if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, heap);
-            else
-#endif
                 err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
         }
 
         if (err == MP_OKAY) {
             /* r = point->x mod order */
-            XMEMCPY(r, point->x, sizeof(sp_digit) * 4);
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 4U);
             sp_256_norm_4(r);
             c = sp_256_cmp_4(r, p256_order);
-            sp_256_cond_sub_4(r, r, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_4(r, r, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_4(r);
 
             /* Conv k to Montgomery form (mod order) */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_4(k, k, p256_norm_order);
-            else
-#endif
                 sp_256_mul_4(k, k, p256_norm_order);
             err = sp_256_mod_4(k, k, p256_order);
         }
         if (err == MP_OKAY) {
             sp_256_norm_4(k);
             /* kInv = 1/k mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_inv_order_avx2_4(kInv, k, tmp);
-            else
-#endif
                 sp_256_mont_inv_order_4(kInv, k, tmp);
             sp_256_norm_4(kInv);
 
             /* s = r * x + e */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_4(x, x, r);
-            else
-#endif
                 sp_256_mul_4(x, x, r);
             err = sp_256_mod_4(x, x, p256_order);
         }
@@ -29029,46 +35687,45 @@
             sp_256_cond_sub_4(s, s, p256_order, 0 - carry);
             sp_256_norm_4(s);
             c = sp_256_cmp_4(s, p256_order);
-            sp_256_cond_sub_4(s, s, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_4(s, s, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_4(s);
 
             /* s = s * k^-1 mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_mul_order_avx2_4(s, s, kInv);
-            else
-#endif
                 sp_256_mont_mul_order_4(s, s, kInv);
             sp_256_norm_4(s);
 
             /* Check that signature is usable. */
-            if (!sp_256_iszero_4(s))
+            if (sp_256_iszero_4(s) == 0) {
                 break;
-        }
-    }
-
-    if (i == 0)
+            }
+        }
+    }
+
+    if (i == 0) {
         err = RNG_FAILURE_E;
-
-    if (err == MP_OKAY)
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(r, rm);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(s, sm);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 4);
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    XMEMSET(e, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(x, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(k, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*4);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 4U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 4U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 4U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 4U);
+#endif
+    sp_256_point_free_4(point, 1, heap);
 
     return err;
 }
@@ -29098,109 +35755,100 @@
 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit u1d[2*4];
     sp_digit u2d[2*4];
     sp_digit sd[2*4];
     sp_digit tmpd[2*4 * 5];
-    sp_point p1d;
-    sp_point p2d;
-#endif
-    sp_digit* u1;
-    sp_digit* u2;
-    sp_digit* s;
-    sp_digit* tmp;
-    sp_point* p1;
-    sp_point* p2 = NULL;
+    sp_point_256 p1d;
+    sp_point_256 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* p1;
+    sp_point_256* p2 = NULL;
     sp_digit carry;
     int64_t c;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p1d, p1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p2d, p2);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 16 * 4, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            u1  = d + 0 * 4;
-            u2  = d + 2 * 4;
-            s   = d + 4 * 4;
-            tmp = d + 6 * 4;
-        }
-        else
+
+    err = sp_256_point_new_4(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
             err = MEMORY_E;
-    }
-#else
-    u1 = u1d;
-    u2 = u2d;
-    s  = sd;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(u1, 4, hash, hashLen);
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 4;
+        u2  = d + 2 * 4;
+        s   = d + 4 * 4;
+        tmp = d + 6 * 4;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(u1, 4, hash, (int)hashLen);
         sp_256_from_mp(u2, 4, r);
         sp_256_from_mp(s, 4, sm);
         sp_256_from_mp(p2->x, 4, pX);
         sp_256_from_mp(p2->y, 4, pY);
         sp_256_from_mp(p2->z, 4, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_mul_avx2_4(s, s, p256_norm_order);
-        else
-#endif
+        {
             sp_256_mul_4(s, s, p256_norm_order);
+        }
         err = sp_256_mod_4(s, s, p256_order);
     }
     if (err == MP_OKAY) {
         sp_256_norm_4(s);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_inv_order_avx2_4(s, s, tmp);
-            sp_256_mont_mul_order_avx2_4(u1, u1, s);
-            sp_256_mont_mul_order_avx2_4(u2, u2, s);
-        }
-        else
-#endif
         {
             sp_256_mont_inv_order_4(s, s, tmp);
             sp_256_mont_mul_order_4(u1, u1, s);
             sp_256_mont_mul_order_4(u2, u2, s);
         }
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(p1, u1, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap);
     }
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(p2, p2, u2, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap);
     }
 
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_4(p1, p1, p2, tmp);
-        else
-#endif
+        {
             sp_256_proj_point_add_4(p1, p1, p2, tmp);
+            if (sp_256_iszero_4(p1->z)) {
+                if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) {
+                    sp_256_proj_point_dbl_4(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+                }
+            }
+        }
 
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
@@ -29212,13 +35860,13 @@
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod);
         sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod);
-        *res = sp_256_cmp_4(p1->x, u1) == 0;
+        *res = (int)(sp_256_cmp_4(p1->x, u1) == 0);
         if (*res == 0) {
             /* Reload r and add order. */
             sp_256_from_mp(u2, 4, r);
             carry = sp_256_add_4(u2, u2, p256_order);
             /* Carry means result is greater than mod and is not valid. */
-            if (!carry) {
+            if (carry == 0) {
                 sp_256_norm_4(u2);
 
                 /* Compare with mod and if greater or equal then not valid. */
@@ -29230,19 +35878,19 @@
                         /* u1 = (r + 1*order).z'.z' mod prime */
                         sp_256_mont_mul_4(u1, u2, p1->z, p256_mod,
                                                                   p256_mp_mod);
-                        *res = sp_256_cmp_4(p1->x, u2) == 0;
+                        *res = (int)(sp_256_cmp_4(p1->x, u1) == 0);
                     }
                 }
             }
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL)
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
 #endif
-    sp_ecc_point_free(p1, 0, heap);
-    sp_ecc_point_free(p2, 0, heap);
+    sp_256_point_free_4(p1, 0, heap);
+    sp_256_point_free_4(p2, 0, heap);
 
     return err;
 }
@@ -29256,9 +35904,9 @@
  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  * not on the curve and MP_OKAY otherwise.
  */
-static int sp_256_ecc_is_point_4(sp_point* point, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_ecc_is_point_4(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit t1d[2*4];
@@ -29268,42 +35916,46 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 4;
         t2 = d + 2 * 4;
-    }
-    else
-        err = MEMORY_E;
-#else
-    (void)heap;
-
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         sp_256_sqr_4(t1, point->y);
-        sp_256_mod_4(t1, t1, p256_mod);
+        (void)sp_256_mod_4(t1, t1, p256_mod);
         sp_256_sqr_4(t2, point->x);
-        sp_256_mod_4(t2, t2, p256_mod);
+        (void)sp_256_mod_4(t2, t2, p256_mod);
         sp_256_mul_4(t2, t2, point->x);
-        sp_256_mod_4(t2, t2, p256_mod);
-	sp_256_sub_4(t2, p256_mod, t2);
+        (void)sp_256_mod_4(t2, t2, p256_mod);
+        (void)sp_256_sub_4(t2, p256_mod, t2);
         sp_256_mont_add_4(t1, t1, t2, p256_mod);
 
         sp_256_mont_add_4(t1, t1, point->x, p256_mod);
         sp_256_mont_add_4(t1, t1, point->x, p256_mod);
         sp_256_mont_add_4(t1, t1, point->x, p256_mod);
 
-        if (sp_256_cmp_4(t1, p256_b) != 0)
+        if (sp_256_cmp_4(t1, p256_b) != 0) {
             err = MP_VAL;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
@@ -29318,23 +35970,23 @@
  */
 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point pubd;
-#endif
-    sp_point* pub;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 pubd;
+#endif
+    sp_point_256* pub;
     byte one[1] = { 1 };
     int err;
 
-    err = sp_ecc_point_new(NULL, pubd, pub);
+    err = sp_256_point_new_4(NULL, pubd, pub);
     if (err == MP_OKAY) {
         sp_256_from_mp(pub->x, 4, pX);
         sp_256_from_mp(pub->y, 4, pY);
-        sp_256_from_bin(pub->z, 4, one, sizeof(one));
+        sp_256_from_bin(pub->z, 4, one, (int)sizeof(one));
 
         err = sp_256_ecc_is_point_4(pub, NULL);
     }
 
-    sp_ecc_point_free(pub, 0, NULL);
+    sp_256_point_free_4(pub, 0, NULL);
 
     return err;
 }
@@ -29352,50 +36004,54 @@
  */
 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit privd[4];
-    sp_point pubd;
-    sp_point pd;
+    sp_point_256 pubd;
+    sp_point_256 pd;
 #endif
     sp_digit* priv = NULL;
-    sp_point* pub;
-    sp_point* p = NULL;
+    sp_point_256* pub;
+    sp_point_256* p = NULL;
     byte one[1] = { 1 };
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, pubd, pub);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        priv = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (priv == NULL)
+
+    err = sp_256_point_new_4(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
             err = MEMORY_E;
-    }
-#else
-    priv = privd;
-#endif
-
-    if (err == MP_OKAY) {
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
         sp_256_from_mp(pub->x, 4, pX);
         sp_256_from_mp(pub->y, 4, pY);
-        sp_256_from_bin(pub->z, 4, one, sizeof(one));
+        sp_256_from_bin(pub->z, 4, one, (int)sizeof(one));
         sp_256_from_mp(priv, 4, privm);
 
         /* Check point at infinitiy. */
-        if (sp_256_iszero_4(pub->x) &&
-            sp_256_iszero_4(pub->y))
+        if ((sp_256_iszero_4(pub->x) != 0) &&
+            (sp_256_iszero_4(pub->y) != 0)) {
             err = ECC_INF_E;
+        }
     }
 
     if (err == MP_OKAY) {
         /* Check range of X and Y */
         if (sp_256_cmp_4(pub->x, p256_mod) >= 0 ||
-            sp_256_cmp_4(pub->y, p256_mod) >= 0)
+            sp_256_cmp_4(pub->y, p256_mod) >= 0) {
             err = ECC_OUT_OF_RANGE_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -29405,28 +36061,18 @@
 
     if (err == MP_OKAY) {
         /* Point * order = infinity */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(p, pub, p256_order, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap);
     }
     if (err == MP_OKAY) {
         /* Check result is infinity */
-        if (!sp_256_iszero_4(p->x) ||
-            !sp_256_iszero_4(p->y)) {
+        if ((sp_256_iszero_4(p->x) == 0) ||
+            (sp_256_iszero_4(p->y) == 0)) {
             err = ECC_INF_E;
         }
     }
 
     if (err == MP_OKAY) {
         /* Base * private = point */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(p, priv, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -29437,12 +36083,13 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (priv != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(pub, 0, heap);
+    }
+#endif
+    sp_256_point_free_4(p, 0, heap);
+    sp_256_point_free_4(pub, 0, heap);
 
     return err;
 }
@@ -29466,27 +36113,27 @@
                               mp_int* qX, mp_int* qY, mp_int* qZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 4 * 5];
-    sp_point pd;
-    sp_point qd;
+    sp_point_256 pd;
+    sp_point_256 qd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
-    sp_point* q = NULL;
+    sp_point_256* p;
+    sp_point_256* q = NULL;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(NULL, qd, q);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
+
+    err = sp_256_point_new_4(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_4(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -29500,27 +36147,26 @@
         sp_256_from_mp(q->y, 4, qY);
         sp_256_from_mp(q->z, 4, qZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_4(p, p, q, tmp);
-        else
-#endif
             sp_256_proj_point_add_4(p, p, q, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(q, 0, NULL);
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_4(q, 0, NULL);
+    sp_256_point_free_4(p, 0, NULL);
 
     return err;
 }
@@ -29539,23 +36185,22 @@
 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 4 * 2];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
+
+    err = sp_256_point_new_4(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -29566,26 +36211,25 @@
         sp_256_from_mp(p->y, 4, pY);
         sp_256_from_mp(p->z, 4, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_dbl_avx2_4(p, p, tmp);
-        else
-#endif
             sp_256_proj_point_dbl_4(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_4(p, 0, NULL);
 
     return err;
 }
@@ -29600,20 +36244,22 @@
  */
 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 4 * 4];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
 
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
+    err = sp_256_point_new_4(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
             err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -29626,18 +36272,22 @@
         sp_256_map_4(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, pX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_4(p, 0, NULL);
 
     return err;
 }
@@ -29650,7 +36300,7 @@
  */
 static int sp_256_mont_sqrt_4(sp_digit* y)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit t1d[2 * 4];
@@ -29659,58 +36309,23 @@
     sp_digit* t1;
     sp_digit* t2;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 4;
         t2 = d + 2 * 4;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            /* t2 = y ^ 0x2 */
-            sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0x3 */
-            sp_256_mont_mul_avx2_4(t1, t2, y, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xc */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xf */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xf0 */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 4, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xff */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xff00 */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 8, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffff */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xffff0000 */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 16, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000000 */
-            sp_256_mont_sqr_n_avx2_4(t1, t1, 32, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001 */
-            sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
-            sp_256_mont_sqr_n_avx2_4(t1, t1, 96, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
-            sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
-            sp_256_mont_sqr_n_avx2_4(y, t1, 94, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         {
             /* t2 = y ^ 0x2 */
             sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
@@ -29744,13 +36359,15 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
+    }
+#endif
+
+    return err;
+}
+
 
 /* Uncompress the point given the X ordinate.
  *
@@ -29761,47 +36378,37 @@
  */
 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit xd[2 * 4];
     sp_digit yd[2 * 4];
 #endif
-    sp_digit* x;
-    sp_digit* y;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         x = d + 0 * 4;
         y = d + 2 * 4;
-    }
-    else
-        err = MEMORY_E;
-#else
-    x = xd;
-    y = yd;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        x = xd;
+        y = yd;
+#endif
+
         sp_256_from_mp(x, 4, xm);
-
         err = sp_256_mod_mul_norm_4(x, x, p256_mod);
     }
-
     if (err == MP_OKAY) {
         /* y = x^3 */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_sqr_avx2_4(y, x, p256_mod, p256_mp_mod);
-            sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
         {
             sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod);
             sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
@@ -29819,23 +36426,5657 @@
         err = sp_256_mont_sqrt_4(y);
     }
     if (err == MP_OKAY) {
-        XMEMSET(y + 4, 0, 4 * sizeof(sp_digit));
+        XMEMSET(y + 4, 0, 4U * sizeof(sp_digit));
         sp_256_mont_reduce_4(y, p256_mod, p256_mp_mod);
-        if (((y[0] ^ odd) & 1) != 0)
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
             sp_256_mont_sub_4(y, p256_mod, y, p256_mod);
+        }
 
         err = sp_256_to_mp(y, ym);
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
-#endif
-#endif /* WOLFSSL_SP_NO_256 */
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+    sp_digit x[2 * 6];
+    sp_digit y[2 * 6];
+    sp_digit z[2 * 6];
+    int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[6] = {
+    0x00000000ffffffffL,0xffffffff00000000L,0xfffffffffffffffeL,
+    0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[6] = {
+    0xffffffff00000001L,0x00000000ffffffffL,0x0000000000000001L,
+    0x0000000000000000L,0x0000000000000000L,0x0000000000000000L
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x0000000100000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[6] = {
+    0xecec196accc52973L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL,
+    0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[6] = {
+    0xecec196accc52971L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL,
+    0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[6] = {
+    0x1313e695333ad68dL,0xa7e5f24db74f5885L,0x389cb27e0bc8d220L,
+    0x0000000000000000L,0x0000000000000000L,0x0000000000000000L
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x6ed46089e88fdc45l;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+    /* X ordinate */
+    {
+        0x3a545e3872760ab7L,0x5502f25dbf55296cL,0x59f741e082542a38L,
+        0x6e1d3b628ba79b98L,0x8eb1c71ef320ad74L,0xaa87ca22be8b0537L,
+        0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x7a431d7c90ea0e5fL,0x0a60b1ce1d7e819dL,0xe9da3113b5f0b8c0L,
+        0xf8f41dbd289a147cL,0x5d9e98bf9292dc29L,0x3617de4a96262c6fL,
+        0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x0000000000000001L,0x0000000000000000L,0x0000000000000000L,
+        0x0000000000000000L,0x0000000000000000L,0x0000000000000000L,
+        0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[6] = {
+    0x2a85c8edd3ec2aefL,0xc656398d8a2ed19dL,0x0314088f5013875aL,
+    0x181d9c6efe814112L,0x988e056be3f82d19L,0xb3312fa7e23ee7e4L
+};
+#endif
+
+static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_6(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    int64_t* td;
+#else
+    int64_t td[12];
+    int64_t a32d[12];
+#endif
+    int64_t* t;
+    int64_t* a32;
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t = td;
+        a32 = td + 12;
+#else
+        t = td;
+        a32 = a32d;
+#endif
+
+        a32[0] = a[0] & 0xffffffff;
+        a32[1] = a[0] >> 32;
+        a32[2] = a[1] & 0xffffffff;
+        a32[3] = a[1] >> 32;
+        a32[4] = a[2] & 0xffffffff;
+        a32[5] = a[2] >> 32;
+        a32[6] = a[3] & 0xffffffff;
+        a32[7] = a[3] >> 32;
+        a32[8] = a[4] & 0xffffffff;
+        a32[9] = a[4] >> 32;
+        a32[10] = a[5] & 0xffffffff;
+        a32[11] = a[5] >> 32;
+
+        /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
+        t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+        /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
+        t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+        /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
+        t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+        /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
+        t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+        /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
+        t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] -  2 * a32[11];
+        /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
+        t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+        /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
+        t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+        /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
+        t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+        /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
+        t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+        /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
+        t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+        /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
+        t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+        /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
+        t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+        o     = t[11] >> 32; t[11] &= 0xffffffff;
+        t[0] += o;
+        t[1] -= o;
+        t[3] += o;
+        t[4] += o;
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+        r[0] = (t[1] << 32) | t[0];
+        r[1] = (t[3] << 32) | t[2];
+        r[2] = (t[5] << 32) | t[4];
+        r[3] = (t[7] << 32) | t[6];
+        r[4] = (t[9] << 32) | t[8];
+        r[5] = (t[11] << 32) | t[10];
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL)
+        XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+    return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 64
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffffffffffffl;
+        s = 64U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 64U) <= (word32)DIGIT_BIT) {
+            s += 64U;
+            r[j] &= 0xffffffffffffffffl;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 64) {
+            r[j] &= 0xffffffffffffffffl;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 64 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p   Point of type sp_point_384 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_6(sp_point_384* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_384_from_mp(p->x, 6, pm->x);
+    sp_384_from_mp(p->y, 6, pm->y);
+    sp_384_from_mp(p->z, 6, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 6);
+        r->used = 6;
+        mp_clamp(r);
+#elif DIGIT_BIT < 64
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 6; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 64) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 64 - s;
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 6; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 64 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 64 - s;
+            }
+            else {
+                s += 64;
+            }
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p   Point of type sp_point_384.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_6(const sp_point_384* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_384_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+/* Conditionally copy a into r using the mask m.
+ * m is -1 to copy and 0 when not.
+ *
+ * r  A single precision number to copy over.
+ * a  A single precision number to copy.
+ * m  Mask value to apply.
+ */
+static void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[r], 0]\n\t"
+        "ldp	x5, x6, [%[r], 16]\n\t"
+        "ldp	x7, x8, [%[r], 32]\n\t"
+        "ldp	x9, x10, [%[a], 0]\n\t"
+        "ldp	x11, x12, [%[a], 16]\n\t"
+        "ldp	x13, x14, [%[a], 32]\n\t"
+        "eor	x9, x9, x3\n\t"
+        "eor	x10, x10, x4\n\t"
+        "eor	x11, x11, x5\n\t"
+        "eor	x12, x12, x6\n\t"
+        "eor	x13, x13, x7\n\t"
+        "eor	x14, x14, x8\n\t"
+        "and	x9, x9, %[m]\n\t"
+        "and	x10, x10, %[m]\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "and	x13, x13, %[m]\n\t"
+        "and	x14, x14, %[m]\n\t"
+        "eor	x3, x3, x9\n\t"
+        "eor	x4, x4, x10\n\t"
+        "eor	x5, x5, x11\n\t"
+        "eor	x6, x6, x12\n\t"
+        "eor	x7, x7, x13\n\t"
+        "eor	x8, x8, x14\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "stp	x7, x8, [%[r], 32]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+    );
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_digit tmp[12];
+
+    __asm__ __volatile__ (
+        "mov	x5, 0\n\t"
+        "mov	x6, 0\n\t"
+        "mov	x7, 0\n\t"
+        "mov	x8, 0\n\t"
+        "\n1:\n\t"
+        "subs	x3, x5, 40\n\t"
+        "csel	x3, xzr, x3, cc\n\t"
+        "sub	x4, x5, x3\n\t"
+        "\n2:\n\t"
+        "ldr	x10, [%[a], x3]\n\t"
+        "ldr	x11, [%[b], x4]\n\t"
+        "mul	x9, x10, x11\n\t"
+        "umulh	x10, x10, x11\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "add	x3, x3, #8\n\t"
+        "sub	x4, x4, #8\n\t"
+        "cmp	x3, 48\n\t"
+        "b.eq	3f\n\t"
+        "cmp	x3, x5\n\t"
+        "b.le	2b\n\t"
+        "\n3:\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        "mov	x6, x7\n\t"
+        "mov	x7, x8\n\t"
+        "mov	x8, #0\n\t"
+        "add	x5, x5, #8\n\t"
+        "cmp	x5, 80\n\t"
+        "b.le	1b\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_digit tmp[6];
+
+    __asm__ __volatile__ (
+        "ldp	x9, x10, [%[a], 0]\n\t"
+        "ldp	x11, x12, [%[a], 16]\n\t"
+        "ldp	x13, x14, [%[a], 32]\n\t"
+        "ldp	x15, x16, [%[b], 0]\n\t"
+        "ldp	x17, x19, [%[b], 16]\n\t"
+        "ldp	x20, x21, [%[b], 32]\n\t"
+        "#  A[0] * B[0]\n\t"
+        "mul	x4, x9, x15\n\t"
+        "umulh	x5, x9, x15\n\t"
+        "str	x4, [%[tmp]]\n\t"
+        "#  A[0] * B[1]\n\t"
+        "mul	x7, x9, x16\n\t"
+        "umulh	x8, x9, x16\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[1] * B[0]\n\t"
+        "mul	x7, x10, x15\n\t"
+        "adc	x6, xzr, x8\n\t"
+        "umulh	x8, x10, x15\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 8]\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "#  A[0] * B[2]\n\t"
+        "mul	x7, x9, x17\n\t"
+        "umulh	x8, x9, x17\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[1] * B[1]\n\t"
+        "mul	x7, x10, x16\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x10, x16\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[2] * B[0]\n\t"
+        "mul	x7, x11, x15\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x11, x15\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 16]\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "#  A[0] * B[3]\n\t"
+        "mul	x7, x9, x19\n\t"
+        "umulh	x8, x9, x19\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[1] * B[2]\n\t"
+        "mul	x7, x10, x17\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x10, x17\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[2] * B[1]\n\t"
+        "mul	x7, x11, x16\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x11, x16\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[3] * B[0]\n\t"
+        "mul	x7, x12, x15\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x12, x15\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[tmp], 24]\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "#  A[0] * B[4]\n\t"
+        "mul	x7, x9, x20\n\t"
+        "umulh	x8, x9, x20\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[1] * B[3]\n\t"
+        "mul	x7, x10, x19\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x10, x19\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[2] * B[2]\n\t"
+        "mul	x7, x11, x17\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x11, x17\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[3] * B[1]\n\t"
+        "mul	x7, x12, x16\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x12, x16\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[4] * B[0]\n\t"
+        "mul	x7, x13, x15\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x13, x15\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[tmp], 32]\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "#  A[0] * B[5]\n\t"
+        "mul	x7, x9, x21\n\t"
+        "umulh	x8, x9, x21\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[1] * B[4]\n\t"
+        "mul	x7, x10, x20\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x10, x20\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[2] * B[3]\n\t"
+        "mul	x7, x11, x19\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x11, x19\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[3] * B[2]\n\t"
+        "mul	x7, x12, x17\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x12, x17\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[4] * B[1]\n\t"
+        "mul	x7, x13, x16\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x13, x16\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[5] * B[0]\n\t"
+        "mul	x7, x14, x15\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x14, x15\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[tmp], 40]\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "#  A[1] * B[5]\n\t"
+        "mul	x7, x10, x21\n\t"
+        "umulh	x8, x10, x21\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[2] * B[4]\n\t"
+        "mul	x7, x11, x20\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x11, x20\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[3] * B[3]\n\t"
+        "mul	x7, x12, x19\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x12, x19\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[4] * B[2]\n\t"
+        "mul	x7, x13, x17\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x13, x17\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[5] * B[1]\n\t"
+        "mul	x7, x14, x16\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x14, x16\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 48]\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "#  A[2] * B[5]\n\t"
+        "mul	x7, x11, x21\n\t"
+        "umulh	x8, x11, x21\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[3] * B[4]\n\t"
+        "mul	x7, x12, x20\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x12, x20\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[4] * B[3]\n\t"
+        "mul	x7, x13, x19\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x13, x19\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "#  A[5] * B[2]\n\t"
+        "mul	x7, x14, x17\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "umulh	x8, x14, x17\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adcs	x6, x6, x8\n\t"
+        "str	x5, [%[r], 56]\n\t"
+        "adc	x4, x4, xzr\n\t"
+        "#  A[3] * B[5]\n\t"
+        "mul	x7, x12, x21\n\t"
+        "umulh	x8, x12, x21\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[4] * B[4]\n\t"
+        "mul	x7, x13, x20\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x13, x20\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "#  A[5] * B[3]\n\t"
+        "mul	x7, x14, x19\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "umulh	x8, x14, x19\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str	x6, [%[r], 64]\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "#  A[4] * B[5]\n\t"
+        "mul	x7, x13, x21\n\t"
+        "umulh	x8, x13, x21\n\t"
+        "adds	x4, x4, x7\n\t"
+        "#  A[5] * B[4]\n\t"
+        "mul	x7, x14, x20\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "umulh	x8, x14, x20\n\t"
+        "adc	x6, xzr, xzr\n\t"
+        "adds	x4, x4, x7\n\t"
+        "adcs	x5, x5, x8\n\t"
+        "str	x4, [%[r], 72]\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "#  A[5] * B[5]\n\t"
+        "mul	x7, x14, x21\n\t"
+        "umulh	x8, x14, x21\n\t"
+        "adds	x5, x5, x7\n\t"
+        "adc	x6, x6, x8\n\t"
+        "stp	x5, x6, [%[r], 80]\n\t"
+        "ldp	x9, x10, [%[tmp], 0]\n\t"
+        "ldp	x11, x12, [%[tmp], 16]\n\t"
+        "ldp	x13, x14, [%[tmp], 32]\n\t"
+        "stp	x9, x10, [%[r], 0]\n\t"
+        "stp	x11, x12, [%[r], 16]\n\t"
+        "stp	x13, x14, [%[r], 32]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "subs	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "sbcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "sbcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "sbcs	x4, x4, x5\n\t"
+        "sbcs	x6, x6, x7\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+}
+
+#define sp_384_mont_reduce_order_6    sp_384_mont_reduce_6
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "ldp	x14, x15, [%[m], 0]\n\t"
+        "ldp	x16, x17, [%[m], 16]\n\t"
+        "ldp	x19, x20, [%[m], 32]\n\t"
+        "# i = 6\n\t"
+        "mov	x4, 6\n\t"
+        "ldp	x12, x13, [%[a], 0]\n\t"
+        "\n1:\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mul	x9, %[mp], x12\n\t"
+        "# a[i+0] += m[0] * mu\n\t"
+        "mul	x7, x14, x9\n\t"
+        "umulh	x8, x14, x9\n\t"
+        "adds	x12, x12, x7\n\t"
+        "# a[i+1] += m[1] * mu\n\t"
+        "mul	x7, x15, x9\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "umulh	x8, x15, x9\n\t"
+        "adds	x12, x13, x7\n\t"
+        "# a[i+2] += m[2] * mu\n\t"
+        "ldr	x13, [%[a], 16]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "mul	x7, x16, x9\n\t"
+        "adds	x12, x12, x6\n\t"
+        "umulh	x8, x16, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "adds	x13, x13, x7\n\t"
+        "# a[i+3] += m[3] * mu\n\t"
+        "ldr	x10, [%[a], 24]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "mul	x7, x17, x9\n\t"
+        "adds	x13, x13, x5\n\t"
+        "umulh	x8, x17, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "adds	x10, x10, x7\n\t"
+        "# a[i+4] += m[4] * mu\n\t"
+        "ldr	x11, [%[a], 32]\n\t"
+        "adc	x5, x8, xzr\n\t"
+        "adds	x10, x10, x6\n\t"
+        "mul	x7, x19, x9\n\t"
+        "adc	x5, x5, xzr\n\t"
+        "umulh	x8, x19, x9\n\t"
+        "str	x10, [%[a], 24]\n\t"
+        "adds	x11, x11, x7\n\t"
+        "# a[i+5] += m[5] * mu\n\t"
+        "ldr	x10, [%[a], 40]\n\t"
+        "adc	x6, x8, xzr\n\t"
+        "adds	x11, x11, x5\n\t"
+        "mul	x7, x20, x9\n\t"
+        "adc	x6, x6, xzr\n\t"
+        "umulh	x8, x20, x9\n\t"
+        "adds	x6, x6, x7\n\t"
+        "adcs	x8, x8, %[ca]\n\t"
+        "str	x11, [%[a], 32]\n\t"
+        "cset  %[ca], cs\n\t"
+        "adds	x10, x10, x6\n\t"
+        "ldr	x11, [%[a], 48]\n\t"
+        "str	x10, [%[a], 40]\n\t"
+        "adcs	x11, x11, x8\n\t"
+        "str	x11, [%[a], 48]\n\t"
+        "adc	%[ca], %[ca], xzr\n\t"
+        "subs	x4, x4, 1\n\t"
+        "add	%[a], %[a], 8\n\t"
+        "bne	1b\n\t"
+        "stp	x12, x13, [%[a], 0]\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20"
+    );
+
+    sp_384_cond_sub_6(a - 6, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mul_6(r, a, b);
+    sp_384_mont_reduce_6(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_384_sqr_6(sp_digit* r, const sp_digit* a)
+{
+    sp_digit tmp[12];
+
+    __asm__ __volatile__ (
+        "mov	x6, 0\n\t"
+        "mov	x7, 0\n\t"
+        "mov	x8, 0\n\t"
+        "mov	x5, 0\n\t"
+        "\n1:\n\t"
+        "subs	x3, x5, 40\n\t"
+        "csel	x3, xzr, x3, cc\n\t"
+        "sub	x4, x5, x3\n\t"
+        "\n2:\n\t"
+        "cmp	x4, x3\n\t"
+        "b.eq	4f\n\t"
+        "ldr	x10, [%[a], x3]\n\t"
+        "ldr	x11, [%[a], x4]\n\t"
+        "mul	x9, x10, x11\n\t"
+        "umulh	x10, x10, x11\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "b.al	5f\n\t"
+        "\n4:\n\t"
+        "ldr	x10, [%[a], x3]\n\t"
+        "mul	x9, x10, x10\n\t"
+        "umulh	x10, x10, x10\n\t"
+        "adds	x6, x6, x9\n\t"
+        "adcs	x7, x7, x10\n\t"
+        "adc	x8, x8, xzr\n\t"
+        "\n5:\n\t"
+        "add	x3, x3, #8\n\t"
+        "sub	x4, x4, #8\n\t"
+        "cmp	x3, 48\n\t"
+        "b.eq	3f\n\t"
+        "cmp	x3, x4\n\t"
+        "b.gt	3f\n\t"
+        "cmp	x3, x5\n\t"
+        "b.le	2b\n\t"
+        "\n3:\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        "mov	x6, x7\n\t"
+        "mov	x7, x8\n\t"
+        "mov	x8, #0\n\t"
+        "add	x5, x5, #8\n\t"
+        "cmp	x5, 80\n\t"
+        "b.le	1b\n\t"
+        "str	x6, [%[r], x5]\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+static void sp_384_sqr_6(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp       x17, x19, [%[a], 0]\n\t"
+        "ldp       x20, x21, [%[a], 16]\n\t"
+        "ldp       x22, x23, [%[a], 32]\n\t"
+        "#  A[0] * A[1]\n\t"
+        "mul	x3, x17, x19\n\t"
+        "umulh	x7, x17, x19\n\t"
+        "#  A[0] * A[2]\n\t"
+        "mul	x4, x17, x20\n\t"
+        "umulh	x5, x17, x20\n\t"
+        "adds	x7, x7, x4\n\t"
+        "#  A[0] * A[3]\n\t"
+        "mul	x4, x17, x21\n\t"
+        "adc	x8, xzr, x5\n\t"
+        "umulh	x5, x17, x21\n\t"
+        "adds	x8, x8, x4\n\t"
+        "#  A[1] * A[2]\n\t"
+        "mul	x4, x19, x20\n\t"
+        "adc	x9, xzr, x5\n\t"
+        "umulh	x5, x19, x20\n\t"
+        "adds	x8, x8, x4\n\t"
+        "#  A[0] * A[4]\n\t"
+        "mul	x4, x17, x22\n\t"
+        "adcs	x9, x9, x5\n\t"
+        "umulh	x5, x17, x22\n\t"
+        "adc	x10, xzr, xzr\n\t"
+        "adds	x9, x9, x4\n\t"
+        "#  A[1] * A[3]\n\t"
+        "mul	x4, x19, x21\n\t"
+        "adc	x10, x10, x5\n\t"
+        "umulh	x5, x19, x21\n\t"
+        "adds	x9, x9, x4\n\t"
+        "#  A[0] * A[5]\n\t"
+        "mul	x4, x17, x23\n\t"
+        "adcs	x10, x10, x5\n\t"
+        "umulh	x5, x17, x23\n\t"
+        "adc	x11, xzr, xzr\n\t"
+        "adds	x10, x10, x4\n\t"
+        "#  A[1] * A[4]\n\t"
+        "mul	x4, x19, x22\n\t"
+        "adc	x11, x11, x5\n\t"
+        "umulh	x5, x19, x22\n\t"
+        "adds	x10, x10, x4\n\t"
+        "#  A[2] * A[3]\n\t"
+        "mul	x4, x20, x21\n\t"
+        "adcs	x11, x11, x5\n\t"
+        "umulh	x5, x20, x21\n\t"
+        "adc	x12, xzr, xzr\n\t"
+        "adds	x10, x10, x4\n\t"
+        "#  A[1] * A[5]\n\t"
+        "mul	x4, x19, x23\n\t"
+        "adcs	x11, x11, x5\n\t"
+        "umulh	x5, x19, x23\n\t"
+        "adc	x12, x12, xzr\n\t"
+        "adds	x11, x11, x4\n\t"
+        "#  A[2] * A[4]\n\t"
+        "mul	x4, x20, x22\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x20, x22\n\t"
+        "adc	x13, xzr, xzr\n\t"
+        "adds	x11, x11, x4\n\t"
+        "#  A[2] * A[5]\n\t"
+        "mul	x4, x20, x23\n\t"
+        "adcs	x12, x12, x5\n\t"
+        "umulh	x5, x20, x23\n\t"
+        "adc	x13, x13, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
+        "#  A[3] * A[4]\n\t"
+        "mul	x4, x21, x22\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "umulh	x5, x21, x22\n\t"
+        "adc	x14, xzr, xzr\n\t"
+        "adds	x12, x12, x4\n\t"
+        "#  A[3] * A[5]\n\t"
+        "mul	x4, x21, x23\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "umulh	x5, x21, x23\n\t"
+        "adc	x14, x14, xzr\n\t"
+        "adds	x13, x13, x4\n\t"
+        "#  A[4] * A[5]\n\t"
+        "mul	x4, x22, x23\n\t"
+        "adcs	x14, x14, x5\n\t"
+        "umulh	x5, x22, x23\n\t"
+        "adc	x15, xzr, xzr\n\t"
+        "adds	x14, x14, x4\n\t"
+        "adc	x15, x15, x5\n\t"
+        "# Double\n\t"
+        "adds	x3, x3, x3\n\t"
+        "adcs	x7, x7, x7\n\t"
+        "adcs	x8, x8, x8\n\t"
+        "adcs	x9, x9, x9\n\t"
+        "adcs	x10, x10, x10\n\t"
+        "adcs	x11, x11, x11\n\t"
+        "adcs	x12, x12, x12\n\t"
+        "adcs	x13, x13, x13\n\t"
+        "adcs	x14, x14, x14\n\t"
+        "#  A[0] * A[0]\n\t"
+        "mul	x2, x17, x17\n\t"
+        "adcs	x15, x15, x15\n\t"
+        "umulh	x4, x17, x17\n\t"
+        "cset  x16, cs\n\t"
+        "#  A[1] * A[1]\n\t"
+        "mul	x5, x19, x19\n\t"
+        "adds	x3, x3, x4\n\t"
+        "umulh	x6, x19, x19\n\t"
+        "adcs	x7, x7, x5\n\t"
+        "#  A[2] * A[2]\n\t"
+        "mul	x4, x20, x20\n\t"
+        "adcs	x8, x8, x6\n\t"
+        "umulh	x5, x20, x20\n\t"
+        "adcs	x9, x9, x4\n\t"
+        "#  A[3] * A[3]\n\t"
+        "mul	x6, x21, x21\n\t"
+        "adcs	x10, x10, x5\n\t"
+        "umulh	x4, x21, x21\n\t"
+        "adcs	x11, x11, x6\n\t"
+        "#  A[4] * A[4]\n\t"
+        "mul	x5, x22, x22\n\t"
+        "adcs	x12, x12, x4\n\t"
+        "umulh	x6, x22, x22\n\t"
+        "adcs	x13, x13, x5\n\t"
+        "#  A[5] * A[5]\n\t"
+        "mul	x4, x23, x23\n\t"
+        "adcs	x14, x14, x6\n\t"
+        "umulh	x5, x23, x23\n\t"
+        "adcs	x15, x15, x4\n\t"
+        "stp	x2, x3, [%[r], 0]\n\t"
+        "adc	x16, x16, x5\n\t"
+        "stp	x7, x8, [%[r], 16]\n\t"
+        "stp	x9, x10, [%[r], 32]\n\t"
+        "stp	x11, x12, [%[r], 48]\n\t"
+        "stp	x13, x14, [%[r], 64]\n\t"
+        "stp	x15, x16, [%[r], 80]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "x4", "x5", "x6", "x2", "x3", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_384_sqr_6(r, a);
+    sp_384_mont_reduce_6(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mont_sqr_6(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_384_mont_sqr_6(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint64_t p384_mod_minus_2[6] = {
+    0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU,
+    0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_6(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 6);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_6(t, t, p384_mod, p384_mp_mod);
+        if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+            sp_384_mont_mul_6(t, t, a, p384_mod, p384_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 6);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 6;
+    sp_digit* t3 = td + 4 * 6;
+    sp_digit* t4 = td + 6 * 6;
+    sp_digit* t5 = td + 8 * 6;
+
+    /* 0x2 */
+    sp_384_mont_sqr_6(t1, a, p384_mod, p384_mp_mod);
+    /* 0x3 */
+    sp_384_mont_mul_6(t5, t1, a, p384_mod, p384_mp_mod);
+    /* 0xc */
+    sp_384_mont_sqr_n_6(t1, t5, 2, p384_mod, p384_mp_mod);
+    /* 0xf */
+    sp_384_mont_mul_6(t2, t5, t1, p384_mod, p384_mp_mod);
+    /* 0x1e */
+    sp_384_mont_sqr_6(t1, t2, p384_mod, p384_mp_mod);
+    /* 0x1f */
+    sp_384_mont_mul_6(t4, t1, a, p384_mod, p384_mp_mod);
+    /* 0x3e0 */
+    sp_384_mont_sqr_n_6(t1, t4, 5, p384_mod, p384_mp_mod);
+    /* 0x3ff */
+    sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x7fe0 */
+    sp_384_mont_sqr_n_6(t1, t2, 5, p384_mod, p384_mp_mod);
+    /* 0x7fff */
+    sp_384_mont_mul_6(t4, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x3fff8000 */
+    sp_384_mont_sqr_n_6(t1, t4, 15, p384_mod, p384_mp_mod);
+    /* 0x3fffffff */
+    sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffc */
+    sp_384_mont_sqr_n_6(t3, t2, 2, p384_mod, p384_mp_mod);
+    /* 0xfffffffd */
+    sp_384_mont_mul_6(r, t3, a, p384_mod, p384_mp_mod);
+    /* 0xffffffff */
+    sp_384_mont_mul_6(t3, t5, t3, p384_mod, p384_mp_mod);
+    /* 0xfffffffc0000000 */
+    sp_384_mont_sqr_n_6(t1, t2, 30, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff */
+    sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff000000000000000 */
+    sp_384_mont_sqr_n_6(t1, t2, 60, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+    sp_384_mont_sqr_n_6(t1, t2, 120, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+    sp_384_mont_sqr_n_6(t1, t2, 15, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+    sp_384_mont_sqr_n_6(t1, t2, 33, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+    sp_384_mont_mul_6(t2, t3, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_6(t1, t2, 96, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+    sp_384_mont_mul_6(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_384_cmp_6(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "mov	x5, 40\n\t"
+        "1:\n\t"
+        "ldr	x6, [%[a], x5]\n\t"
+        "ldr	x7, [%[b], x5]\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x7, x7, x4\n\t"
+        "subs	x6, x6, x7\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "subs	x5, x5, #8\n\t"
+        "b.cs	1b\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16"
+    );
+#else
+    __asm__ __volatile__ (
+        "mov	x2, -1\n\t"
+        "mov	x3, 1\n\t"
+        "mov	x4, -1\n\t"
+        "ldp	x5, x6, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[a], 16]\n\t"
+        "ldp	x9, x10, [%[a], 32]\n\t"
+        "ldp	x11, x12, [%[b], 0]\n\t"
+        "ldp	x13, x14, [%[b], 16]\n\t"
+        "ldp	x15, x16, [%[b], 32]\n\t"
+        "and	x10, x10, x4\n\t"
+        "and	x16, x16, x4\n\t"
+        "subs	x10, x10, x16\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x9, x9, x4\n\t"
+        "and	x15, x15, x4\n\t"
+        "subs	x9, x9, x15\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x8, x8, x4\n\t"
+        "and	x14, x14, x4\n\t"
+        "subs	x8, x8, x14\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x7, x7, x4\n\t"
+        "and	x13, x13, x4\n\t"
+        "subs	x7, x7, x13\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x6, x6, x4\n\t"
+        "and	x12, x12, x4\n\t"
+        "subs	x6, x6, x12\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "and	x5, x5, x4\n\t"
+        "and	x11, x11, x4\n\t"
+        "subs	x5, x5, x11\n\t"
+        "csel	x2, x4, x2, lo\n\t"
+        "csel	x4, x4, xzr, eq\n\t"
+        "csel	x2, x3, x2, hi\n\t"
+        "eor	%[a], x2, x4\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16"
+    );
+#endif
+
+    return (int64_t)a;
+}
+
+/* Normalize the values in each word to 64.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+#define sp_384_norm_6(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*6;
+    int64_t n;
+
+    sp_384_mont_inv_6(t1, p->z, t + 2*6);
+
+    sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    /* x /= z^2 */
+    sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod);
+    XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U);
+    sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_384_cmp_6(r->x, p384_mod);
+    sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_6(r->x);
+
+    /* y /= z^3 */
+    sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod);
+    XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U);
+    sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_384_cmp_6(r->y, p384_mod);
+    sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_6(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_add_6(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "adds	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "adcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "adcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldr		x3, [%[a], 32]\n\t"
+        "ldr		x4, [%[a], 40]\n\t"
+        "ldr		x7, [%[b], 32]\n\t"
+        "ldr		x8, [%[b], 40]\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "adcs	x4, x4, x8\n\t"
+        "str		x3, [%[r], 32]\n\t"
+        "str		x4, [%[r], 40]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_6(r, a, b);
+    sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_6(r, a, a);
+    sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_6(r, a, a);
+    sp_384_cond_sub_6(r, r, m, 0 - o);
+    o = sp_384_add_6(r, r, a);
+    sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x3, x4, [%[a], 0]\n\t"
+        "ldp	x7, x8, [%[b], 0]\n\t"
+        "subs	x3, x3, x7\n\t"
+        "ldp	x5, x6, [%[a], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "ldp	x9, x10, [%[b], 16]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x3, x4, [%[r], 0]\n\t"
+        "sbcs	x6, x6, x10\n\t"
+        "stp	x5, x6, [%[r], 16]\n\t"
+        "ldr		x3, [%[a], 32]\n\t"
+        "ldr		x4, [%[a], 40]\n\t"
+        "ldr		x7, [%[b], 32]\n\t"
+        "ldr		x8, [%[b], 40]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "str		x3, [%[r], 32]\n\t"
+        "str		x4, [%[r], 40]\n\t"
+        "csetm	%[r], cc\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+    );
+
+    return (sp_digit)r;
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	x8, #0\n\t"
+        "1:\n\t"
+        "adds	%[c], %[c], #-1\n\t"
+        "ldr	x4, [%[a], x8]\n\t"
+        "ldr	x5, [%[b], x8]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "cset	%[c], cs\n\t"
+        "str	x4, [%[r], x8]\n\t"
+        "add	x8, x8, #8\n\t"
+        "cmp	x8, 48\n\t"
+        "b.lt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return c;
+#else
+    __asm__ __volatile__ (
+
+        "ldp	x5, x7, [%[b], 0]\n\t"
+        "ldp	x11, x12, [%[b], 16]\n\t"
+        "ldp	x4, x6, [%[a], 0]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "ldp	x9, x10, [%[a], 16]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adds	x4, x4, x5\n\t"
+        "and	x11, x11, %[m]\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "and	x12, x12, %[m]\n\t"
+        "adcs	x9, x9, x11\n\t"
+        "stp	x4, x6, [%[r], 0]\n\t"
+        "adcs	x10, x10, x12\n\t"
+        "stp	x9, x10, [%[r], 16]\n\t"
+        "ldp	x5, x7, [%[b], 32]\n\t"
+        "ldp	x4, x6, [%[a], 32]\n\t"
+        "and	x5, x5, %[m]\n\t"
+        "and	x7, x7, %[m]\n\t"
+        "adcs	x4, x4, x5\n\t"
+        "adcs	x6, x6, x7\n\t"
+        "stp	x4, x6, [%[r], 32]\n\t"
+        "cset	%[r], cs\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+    );
+
+    return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_sub_6(r, a, b);
+    sp_384_cond_add_6(r, r, m, o);
+}
+
+static void sp_384_rshift1_6(sp_digit* r, sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x2, x3, [%[a]]\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "ldp	x6, x7, [%[a], 32]\n\t"
+        "lsr	x11, x6, 1\n\t"
+        "lsr	x10, x5, 1\n\t"
+        "lsr	x9, x4, 1\n\t"
+        "lsr	x8, x3, 1\n\t"
+        "lsr	x2, x2, 1\n\t"
+        "orr	x2, x2, x3, lsl 63\n\t"
+        "orr	x3, x8, x4, lsl 63\n\t"
+        "orr	x4, x9, x5, lsl 63\n\t"
+        "orr	x5, x10, x6, lsl 63\n\t"
+        "orr	x6, x11, x7, lsl 63\n\t"
+        "lsr	x7, x7, 1\n\t"
+        "stp	x2, x3, [%[r]]\n\t"
+        "stp	x4, x5, [%[r], 16]\n\t"
+        "stp	x6, x7, [%[r], 32]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+    );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_cond_add_6(r, a, m, 0 - (a[0] & 1));
+    sp_384_rshift1_6(r, r);
+    r[5] |= o << 63;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*6;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod);
+    /* Z = Y * Z */
+    sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod);
+    /* Z = 2Z */
+    sp_384_mont_dbl_6(z, z, p384_mod);
+    /* T2 = X - T1 */
+    sp_384_mont_sub_6(t2, p->x, t1, p384_mod);
+    /* T1 = X + T1 */
+    sp_384_mont_add_6(t1, p->x, t1, p384_mod);
+    /* T2 = T1 * T2 */
+    sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod);
+    /* T1 = 3T2 */
+    sp_384_mont_tpl_6(t1, t2, p384_mod);
+    /* Y = 2Y */
+    sp_384_mont_dbl_6(y, p->y, p384_mod);
+    /* Y = Y * Y */
+    sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod);
+    /* T2 = Y * Y */
+    sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+    /* T2 = T2/2 */
+    sp_384_div2_6(t2, t2, p384_mod);
+    /* Y = Y * X */
+    sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod);
+    /* X = T1 * T1 */
+    sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_6(x, x, y, p384_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_6(x, x, y, p384_mod);
+    /* Y = Y - X */
+    sp_384_mont_sub_6(y, y, x, p384_mod);
+    /* Y = Y * T1 */
+    sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod);
+    /* Y = Y - T2 */
+    sp_384_mont_sub_6(y, y, t2, p384_mod);
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*6;
+    sp_digit* b = t + 4*6;
+    sp_digit* t1 = t + 6*6;
+    sp_digit* t2 = t + 8*6;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_6(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_6(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_6(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod);
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_6(t2, b, p384_mod);
+        sp_384_mont_sub_6(x, x, t2, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod);
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_6(y, b, x, p384_mod);
+        sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_6(y, y, p384_mod);
+        sp_384_mont_sub_6(y, y, t1, p384_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_6(t1, t1, w, p384_mod);
+    sp_384_mont_tpl_6(a, t1, p384_mod);
+    /* B = X*Y^2 */
+    sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod);
+    /* X = A^2 - 2B */
+    sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_6(t2, b, p384_mod);
+    sp_384_mont_sub_6(x, x, t2, p384_mod);
+    /* Z = Z*Y */
+    sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod);
+    /* t2 = Y^4 */
+    sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_384_mont_sub_6(y, b, x, p384_mod);
+    sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_6(y, y, p384_mod);
+    sp_384_mont_sub_6(y, y, t1, p384_mod);
+#endif
+    /* Y = Y/2 */
+    sp_384_div2_6(y, y, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+        sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*6;
+    sp_digit* t3 = t + 4*6;
+    sp_digit* t4 = t + 6*6;
+    sp_digit* t5 = t + 8*6;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_384* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_384_sub_6(t1, p384_mod, q->y);
+    sp_384_norm_6(t1);
+    if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+        (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_6(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<6; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<6; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<6; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - U1 */
+        sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+        /* R = S2 - S1 */
+        sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_6(x, x, t5, p384_mod);
+        sp_384_mont_dbl_6(t1, y, p384_mod);
+        sp_384_mont_sub_6(x, x, t1, p384_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_384_mont_sub_6(y, y, x, p384_mod);
+        sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_6(y, y, t5, p384_mod);
+    }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, const sp_point_384* p,
+        int n, int m, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*6;
+    sp_digit* b = t + 4*6;
+    sp_digit* t1 = t + 6*6;
+    sp_digit* t2 = t + 8*6;
+    sp_digit* x = r[2*m].x;
+    sp_digit* y = r[(1<<n)*m].y;
+    sp_digit* z = r[2*m].z;
+    int i;
+
+    for (i=0; i<6; i++) {
+        x[i] = p->x[i];
+    }
+    for (i=0; i<6; i++) {
+        y[i] = p->y[i];
+    }
+    for (i=0; i<6; i++) {
+        z[i] = p->z[i];
+    }
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_6(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod);
+    for (i=1; i<=n; i++) {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_6(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_6(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(b, t2, x, p384_mod, p384_mp_mod);
+        x = r[(1<<i)*m].x;
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_6(t1, b, p384_mod);
+        sp_384_mont_sub_6(x, x, t1, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_6(r[(1<<i)*m].z, z, y, p384_mod, p384_mp_mod);
+        z = r[(1<<i)*m].z;
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_6(t2, t2, p384_mod, p384_mp_mod);
+        if (i != n) {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_6(w, w, t2, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_6(y, b, x, p384_mod);
+        sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_6(y, y, p384_mod);
+        sp_384_mont_sub_6(y, y, t2, p384_mod);
+
+        /* Y = Y/2 */
+        sp_384_div2_6(r[(1<<i)*m].y, y, p384_mod);
+        r[(1<<i)*m].infinity = 0;
+    }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra  Result of addition.
+ * rs  Result of subtraction.
+ * p   First point to add.
+ * q   Second point to add.
+ * t   Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_sub_6(sp_point_384* ra, sp_point_384* rs,
+        const sp_point_384* p, const sp_point_384* q, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*6;
+    sp_digit* t3 = t + 4*6;
+    sp_digit* t4 = t + 6*6;
+    sp_digit* t5 = t + 8*6;
+    sp_digit* t6 = t + 10*6;
+    sp_digit* x = ra->x;
+    sp_digit* y = ra->y;
+    sp_digit* z = ra->z;
+    sp_digit* xs = rs->x;
+    sp_digit* ys = rs->y;
+    sp_digit* zs = rs->z;
+
+
+    XMEMCPY(x, p->x, sizeof(p->x) / 2);
+    XMEMCPY(y, p->y, sizeof(p->y) / 2);
+    XMEMCPY(z, p->z, sizeof(p->z) / 2);
+    ra->infinity = 0;
+    rs->infinity = 0;
+
+    /* U1 = X1*Z2^2 */
+    sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod);
+    /* U2 = X2*Z1^2 */
+    sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+    /* S1 = Y1*Z2^3 */
+    sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod);
+    /* S2 = Y2*Z1^3 */
+    sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+    /* H = U2 - U1 */
+    sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+    /* RS = S2 + S1 */
+    sp_384_mont_add_6(t6, t4, t3, p384_mod);
+    /* R = S2 - S1 */
+    sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+    /* Z3 = H*Z1*Z2 */
+    /* ZS = H*Z1*Z2 */
+    sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+    XMEMCPY(zs, z, sizeof(p->z)/2);
+    /* X3 = R^2 - H^3 - 2*U1*H^2 */
+    /* XS = RS^2 - H^3 - 2*U1*H^2 */
+    sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_6(xs, t6, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_6(x, x, t5, p384_mod);
+    sp_384_mont_sub_6(xs, xs, t5, p384_mod);
+    sp_384_mont_dbl_6(t1, y, p384_mod);
+    sp_384_mont_sub_6(x, x, t1, p384_mod);
+    sp_384_mont_sub_6(xs, xs, t1, p384_mod);
+    /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+    /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+    sp_384_mont_sub_6(ys, y, xs, p384_mod);
+    sp_384_mont_sub_6(y, y, x, p384_mod);
+    sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod);
+    sp_384_sub_6(t6, p384_mod, t6);
+    sp_384_mont_mul_6(ys, ys, t6, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_6(y, y, t5, p384_mod);
+    sp_384_mont_sub_6(ys, ys, t5, p384_mod);
+}
+
+/* Structure used to describe recoding of scalar multiplication. */
+typedef struct ecc_recode_384 {
+    /* Index into pre-computation table. */
+    uint8_t i;
+    /* Use the negative of the point. */
+    uint8_t neg;
+} ecc_recode_384;
+
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_6_6[66] = {
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,
+     0,  1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_6_6[66] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     0,  0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k  Scalar to multiply by.
+ * v  Vector of operations to perform.
+ */
+static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v)
+{
+    int i, j;
+    uint8_t y;
+    int carry = 0;
+    int o;
+    sp_digit n;
+
+    j = 0;
+    n = k[j];
+    o = 0;
+    for (i=0; i<65; i++) {
+        y = n;
+        if (o + 6 < 64) {
+            y &= 0x3f;
+            n >>= 6;
+            o += 6;
+        }
+        else if (o + 6 == 64) {
+            n >>= 6;
+            if (++j < 6)
+                n = k[j];
+            o = 0;
+        }
+        else if (++j < 6) {
+            n = k[j];
+            y |= (n << (64 - o)) & 0x3f;
+            o -= 58;
+            n >>= o;
+        }
+
+        y += carry;
+        v[i].i = recode_index_6_6[y];
+        v[i].neg = recode_neg_6_6[y];
+        carry = (y >> 6) + v[i].neg;
+    }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* g,
+        const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td[33];
+    sp_point_384 rtd, pd;
+    sp_digit tmpd[2 * 6 * 6];
+#endif
+    sp_point_384* t;
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* tmp;
+    sp_digit* negy;
+    int i;
+    ecc_recode_384 v[65];
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_6(heap, rtd, rt);
+    if (err == MP_OKAY)
+        err = sp_384_point_new_6(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        t[1].infinity = 0;
+        /* t[2] ... t[32]  */
+        sp_384_proj_point_dbl_n_store_6(t, &t[ 1], 5, 1, tmp);
+        sp_384_proj_point_add_6(&t[ 3], &t[ 2], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[ 6], &t[ 3], tmp);
+        sp_384_proj_point_add_sub_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[10], &t[ 5], tmp);
+        sp_384_proj_point_add_sub_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[12], &t[ 6], tmp);
+        sp_384_proj_point_dbl_6(&t[14], &t[ 7], tmp);
+        sp_384_proj_point_add_sub_6(&t[15], &t[13], &t[14], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[18], &t[ 9], tmp);
+        sp_384_proj_point_add_sub_6(&t[19], &t[17], &t[18], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[20], &t[10], tmp);
+        sp_384_proj_point_dbl_6(&t[22], &t[11], tmp);
+        sp_384_proj_point_add_sub_6(&t[23], &t[21], &t[22], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[24], &t[12], tmp);
+        sp_384_proj_point_dbl_6(&t[26], &t[13], tmp);
+        sp_384_proj_point_add_sub_6(&t[27], &t[25], &t[26], &t[ 1], tmp);
+        sp_384_proj_point_dbl_6(&t[28], &t[14], tmp);
+        sp_384_proj_point_dbl_6(&t[30], &t[15], tmp);
+        sp_384_proj_point_add_sub_6(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+        negy = t[0].y;
+
+        sp_384_ecc_recode_6_6(k, v);
+
+        i = 64;
+        XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384));
+        for (--i; i>=0; i--) {
+            sp_384_proj_point_dbl_n_6(rt, 6, tmp);
+
+            XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384));
+            sp_384_sub_6(negy, p384_mod, p->y);
+            sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg);
+            sp_384_proj_point_add_6(rt, rt, p, tmp);
+        }
+
+        if (map != 0) {
+            sp_384_map_6(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL)
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    if (tmp != NULL)
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_6(p, 0, heap);
+    sp_384_point_free_6(rt, 0, heap);
+
+    return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+    sp_digit x[6];
+    sp_digit y[6];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p,
+        const sp_point_384* q, sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*6;
+    sp_digit* t3 = t + 4*6;
+    sp_digit* t4 = t + 6*6;
+    sp_digit* t5 = t + 8*6;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_384_sub_6(t1, p384_mod, q->y);
+    sp_384_norm_6(t1);
+    if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+        (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_6(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<6; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<6; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<6; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - X1 */
+        sp_384_mont_sub_6(t2, t2, x, p384_mod);
+        /* R = S2 - Y1 */
+        sp_384_mont_sub_6(t4, t4, y, p384_mod);
+        /* Z3 = H*Z1 */
+        sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_384_mont_sqr_6(t1, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t3, x, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_6(x, t1, t5, p384_mod);
+        sp_384_mont_dbl_6(t1, t3, p384_mod);
+        sp_384_mont_sub_6(x, x, t1, p384_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_384_mont_sub_6(t3, t3, x, p384_mod);
+        sp_384_mont_mul_6(t3, t3, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(t5, t5, y, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_6(y, t3, t5, p384_mod);
+    }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_384_proj_to_affine_6(sp_point_384* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 6;
+    sp_digit* tmp = t + 4 * 6;
+
+    sp_384_mont_inv_6(t1, a->z, tmp);
+
+    sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    sp_384_mont_mul_6(a->x, a->x, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_6(a->y, a->y, t1, p384_mod, p384_mp_mod);
+    XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_6(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_6(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_6(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_384_proj_point_dbl_n_6(t, 48, tmp);
+            sp_384_proj_to_affine_6(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_6(t, s1, s2, tmp);
+                sp_384_proj_to_affine_6(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_6(s2, 0, heap);
+    sp_384_point_free_6(s1, 0, heap);
+    sp_384_point_free_6( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 6 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_6(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=47; j<8; j++,x+=48) {
+            y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=46; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=48) {
+                y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_6(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_6(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_6(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(p, 0, heap);
+    sp_384_point_free_6(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[6];
+    sp_digit y[6];
+    sp_table_entry_384 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_6(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_6(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 6 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_6(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_6(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[6];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 6, km);
+        sp_384_point_from_ecc_point_6(point, gm);
+
+            err = sp_384_ecc_mulmod_6(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_6(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(point, 0, heap);
+
+    return err;
+}
+
+static const sp_table_entry_384 p384_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x3dd0756649c0b528L,0x20e378e2a0d6ce38L,0x879c3afc541b4d6eL,
+        0x6454868459a30effL,0x812ff723614ede2bL,0x4d3aadc2299e1513L },
+      { 0x23043dad4b03a4feL,0xa1bfa8bf7bb4a9acL,0x8bade7562e83b050L,
+        0xc6c3521968f4ffd9L,0xdd8002263969a840L,0x2b78abc25a15c5e9L } },
+    /* 2 */
+    { { 0x298647532b0c535bL,0x90dd695370506296L,0x038cd6b4216ab9acL,
+        0x3df9b7b7be12d76aL,0x13f4d9785f347bdbL,0x222c5c9c13e94489L },
+      { 0x5f8e796f2680dc64L,0x120e7cb758352417L,0x254b5d8ad10740b8L,
+        0xc38b8efb5337dee6L,0xf688c2e194f02247L,0x7b5c75f36c25bc4cL } },
+    /* 3 */
+    { { 0xe26a3cc39edffea5L,0x35bbfd1c37d7e9fcL,0xf0e7700d9bde3ef6L,
+        0x0380eb471a538f5aL,0x2e9da8bb05bf9eb3L,0xdbb93c731a460c3eL },
+      { 0x37dba260f526b605L,0x95d4978efd785537L,0x24ed793aed72a04aL,
+        0x2694837776005b1aL,0x99f557b99e681f82L,0xae5f9557d64954efL } },
+    /* 4 */
+    { { 0x24480c57f26feef9L,0xc31a26943a0e1240L,0x735002c3273e2bc7L,
+        0x8c42e9c53ef1ed4cL,0x028babf67f4948e8L,0x6a502f438a978632L },
+      { 0xf5f13a46b74536feL,0x1d218babd8a9f0ebL,0x30f36bcc37232768L,
+        0xc5317b31576e8c18L,0xef1d57a69bbcb766L,0x917c4930b3e3d4dcL } },
+    /* 5 */
+    { { 0x11426e2ee349ddd0L,0x9f117ef99b2fc250L,0xff36b480ec0174a6L,
+        0x4f4bde7618458466L,0x2f2edb6d05806049L,0x8adc75d119dfca92L },
+      { 0xa619d097b7d5a7ceL,0x874275e5a34411e9L,0x5403e0470da4b4efL,
+        0x2ebaafd977901d8fL,0x5e63ebcea747170fL,0x12a369447f9d8036L } },
+    /* 6 */
+    { { 0x28f9c07a4fc52870L,0xce0b37481a53a961L,0xd550fa180e1828d9L,
+        0xa24abaf76adb225aL,0xd11ed0a56e58a348L,0xf3d811e6948acb62L },
+      { 0x8618dd774c61ed22L,0x0bb747f980b47c9dL,0x22bf796fde6b8559L,
+        0xfdfd1c6d680a21e9L,0xc0db15772af2c9ddL,0xa09379e6c1e90f3dL } },
+    /* 7 */
+    { { 0x386c66efe085c629L,0x5fc2a461095bc89aL,0x1353d631203f4b41L,
+        0x7ca1972b7e4bd8f5L,0xb077380aa7df8ce9L,0xd8a90389ee7e4ea3L },
+      { 0x1bc74dc7e7b14461L,0xdc2cb0140c9c4f78L,0x52b4b3a684ef0a10L,
+        0xbde6ea5d20327fe2L,0xb71ec435660f9615L,0xeede5a04b8ad8173L } },
+    /* 8 */
+    { { 0x5584cbb3893b9a2dL,0x820c660b00850c5dL,0x4126d8267df2d43dL,
+        0xdd5bbbf00109e801L,0x85b92ee338172f1cL,0x609d4f93f31430d9L },
+      { 0x1e059a07eadaf9d6L,0x70e6536c0f125fb0L,0xd6220751560f20e7L,
+        0xa59489ae7aaf3a9aL,0x7b70e2f664bae14eL,0x0dd0370176d08249L } },
+    /* 9 */
+    { { 0x4cc13be88510521fL,0x87315ba9f724cc17L,0xb49d83bb353dc263L,
+        0x8b677efe0c279257L,0x510a1c1cc93c9537L,0x33e30cd8a4702c99L },
+      { 0xf0ffc89d2208353fL,0x0170fa8dced42b2bL,0x090851ed26e2a5f5L,
+        0x81276455ecb52c96L,0x0646c4e17fe1adf4L,0x513f047eb0868eabL } },
+    /* 10 */
+    { { 0xc07611f4df5bdf53L,0x45d331a758b11a6dL,0x58965daf1c4ee394L,
+        0xba8bebe75a5878d1L,0xaecc0a1882dd3025L,0xcf2a3899a923eb8bL },
+      { 0xf98c9281d24fd048L,0x841bfb598bbb025dL,0xb8ddf8cec9ab9d53L,
+        0x538a4cb67fef044eL,0x092ac21f23236662L,0xa919d3850b66f065L } },
+    /* 11 */
+    { { 0x3db03b4085d480d8L,0x8cd9f4791b287a7dL,0x8f24dc754a8f3baeL,
+        0x482eb8003db41892L,0x38bf9eb39c56e0f5L,0x8b9773209a91dc6fL },
+      { 0xa31b05b27209cfc2L,0x4c49bf8505b2db70L,0x56462498d619527bL,
+        0x3fe510391fac51baL,0xfb04f55eab4b8342L,0xc07c10dc04c6eabfL } },
+    /* 12 */
+    { { 0xad22fe4cdb32f048L,0x5f23bf91475ed6dfL,0xa50ce0c0aa66b6cbL,
+        0xdf627a89f03405c0L,0x3674837df95e2d6aL,0x081c95b6ba42e64eL },
+      { 0xeba3e036e71d6cebL,0xb45bcccf6c6b0271L,0x67b47e630684701dL,
+        0x60f8f942e712523fL,0x824234725cd47adcL,0x83027d7987649cbbL } },
+    /* 13 */
+    { { 0xb3929ea63615b0b8L,0xb41441fda54dac41L,0x8995d556b5b6a368L,
+        0xa80d4529167ef05eL,0xf6bcb4a16d25a27fL,0x210d6a4c7bd55b68L },
+      { 0xf3804abb25351130L,0x1d2df699903e37ebL,0x5f201efc084c25c8L,
+        0x31a28c87a1c68e91L,0x81dad253563f62a5L,0x5dd6de70d6c415d4L } },
+    /* 14 */
+    { { 0x29f470fd846612ceL,0x986f3eecda18d997L,0x6b84c1612f34af86L,
+        0x5ef0a40846ddaf8bL,0x14405a00e49e795fL,0x5f491b16aa2f7a37L },
+      { 0xc7f07ae4db41b38dL,0xef7d119e18fbfcaaL,0x3a18e07614443b19L,
+        0x4356841a79a19926L,0x91f4a91ce2226fbeL,0xdc77248c3cc88721L } },
+    /* 15 */
+    { { 0xd570ff1ae4b1ec9dL,0x21d23e0ee7eef706L,0x3cde40f4ca19e086L,
+        0x7d6523c4cd4bb270L,0x16c1f06cbf13aa6cL,0x5aa7245ad14c4b60L },
+      { 0x37f8146744b74de8L,0x839e7a17620a934eL,0xf74d14e8de8b1aa1L,
+        0x8789fa51f30d75e2L,0x09b24052c81c261eL,0x654e267833c565eeL } },
+    /* 16 */
+    { { 0x378205de2f9fbe67L,0xc4afcb837f728e44L,0xdbcec06c682e00f1L,
+        0xf2a145c3114d5423L,0xa01d98747a52463eL,0xfc0935b17d717b0aL },
+      { 0x9653bc4fd4d01f95L,0x9aa83ea89560ad34L,0xf77943dcaf8e3f3fL,
+        0x70774a10e86fe16eL,0x6b62e6f1bf9ffdcfL,0x8a72f39e588745c9L } },
+    /* 17 */
+    { { 0x73ade4da2341c342L,0xdd326e54ea704422L,0x336c7d983741cef3L,
+        0x1eafa00d59e61549L,0xcd3ed892bd9a3efdL,0x03faf26cc5c6c7e4L },
+      { 0x087e2fcf3045f8acL,0x14a65532174f1e73L,0x2cf84f28fe0af9a7L,
+        0xddfd7a842cdc935bL,0x4c0f117b6929c895L,0x356572d64c8bcfccL } },
+    /* 18 */
+    { { 0x7ecbac017d8c1bbaL,0x6058f9c390b0f3d5L,0xaee116e3f6197d0fL,
+        0xc4dd70684033b128L,0xf084dba6c209b983L,0x97c7c2cf831dbc4aL },
+      { 0x2f4e61ddf96010e8L,0xd97e4e20529faa17L,0x4ee6666069d37f20L,
+        0xccc139ed3d366d72L,0x690b6ee213488e0fL,0x7cad1dc5f3a6d533L } },
+    /* 19 */
+    { { 0x660a9a81da57a41fL,0xe74a0412ec0039b6L,0x42343c6b5e1dad15L,
+        0x284f3ff546681d4cL,0xb51087f163749e89L,0x070f23cc6f9f2f13L },
+      { 0x542211da5d186e14L,0x84748f37fddb0dffL,0x41a3aab4db1f4180L,
+        0x25ed667ba6402d0eL,0x2f2924a902f58355L,0x5844ee7cfa44a689L } },
+    /* 20 */
+    { { 0xfab086073f3b236fL,0x19e9d41d81e221daL,0xf3f6571e3927b428L,
+        0x4348a9337550f1f6L,0x7167b996a85e62f0L,0x62d437597f5452bfL },
+      { 0xd85feb9ef2955926L,0x440a561f6df78353L,0x389668ec9ca36b59L,
+        0x052bf1a1a22da016L,0xbdfbff72f6093254L,0x94e50f28e22209f3L } },
+    /* 21 */
+    { { 0x90b2e5b33062e8afL,0xa8572375e8a3d369L,0x3fe1b00b201db7b1L,
+        0xe926def0ee651aa2L,0x6542c9beb9b10ad7L,0x098e309ba2fcbe74L },
+      { 0x779deeb3fff1d63fL,0x23d0e80a20bfd374L,0x8452bb3b8768f797L,
+        0xcf75bb4d1f952856L,0x8fe6b40029ea3faaL,0x12bd3e4081373a53L } },
+    /* 22 */
+    { { 0xc023780d104cbba5L,0x6207e747fa35dd4cL,0x35c239281ca9b6a3L,
+        0x4ff19be897987b10L,0xb8476bbf8022eee8L,0xaa0a4a14d3bbe74dL },
+      { 0x20f94331187d4543L,0x3215387079f6e066L,0x83b0f74eac7e82e1L,
+        0xa7748ba2828f06abL,0xc5f0298ac26ef35fL,0x0f0c50708e9a7dbdL } },
+    /* 23 */
+    { { 0x0c5c244cdef029ddL,0x3dabc687850661b8L,0x9992b865fe11d981L,
+        0xe9801b8f6274dbadL,0xe54e6319098da242L,0x9929a91a91a53d08L },
+      { 0x37bffd7235285887L,0xbc759425f1418102L,0x9280cc35fd2e6e20L,
+        0x735c600cfbc42ee5L,0xb7ad28648837619aL,0xa3627231a778c57bL } },
+    /* 24 */
+    { { 0xae799b5c91361ed8L,0x47d71b756c63366cL,0x54cdd5211b265a6aL,
+        0xe0215a5998d77b74L,0x4424d9b7bab29db0L,0x8b0ffacc7fd9e536L },
+      { 0x46d85d1237b5d9efL,0x5b106d62bfa91747L,0xed0479f85f99ba2dL,
+        0x0e6f39231d104de4L,0x83a84c8425e8983fL,0xa9507e0af8105a70L } },
+    /* 25 */
+    { { 0xf6c68a6e14cf381cL,0xaf9d27bdc22e31ccL,0x23568d4daa8a5ccbL,
+        0xe431eec0e338e4d2L,0xf1a828fe8f52ad1fL,0xdb6a0579e86acd80L },
+      { 0x2885672e4507832aL,0x73fc275f887e5289L,0x65f8027805610d08L,
+        0x8d9b4554075ff5b0L,0x3a8e8fb109f712b5L,0x39f0ac862ebe9cf2L } },
+    /* 26 */
+    { { 0xd8fabf784c52edf5L,0xdcd737e5a589ae53L,0x94918bf0d791ab17L,
+        0xb5fbd956bcff06c9L,0xf6d3032edca46d45L,0x2cdff7e141a3e486L },
+      { 0x6674b3ba61f47ec8L,0x8a882163eef84608L,0xa257c7054c687f90L,
+        0xe30cb2edf6cdf227L,0x2c4c64ca7f6ea846L,0x186fa17ccc6bcd3cL } },
+    /* 27 */
+    { { 0x48a3f5361dfcb91eL,0x83595e13646d358aL,0xbd15827b91128798L,
+        0x3ce612b82187757aL,0x873150a161bd7372L,0xf4684530b662f568L },
+      { 0x8833950b401896f6L,0xe11cb89a77f3e090L,0xb2f12cac48e7f4a5L,
+        0x313dd769f606677eL,0xfdcf08b316579f93L,0x6429cec946b8f22bL } },
+    /* 28 */
+    { { 0x4984dd54bb75f9a4L,0x4aef06b929d3b570L,0xb5f84ca23d6e4c1eL,
+        0x24c61c11b083ef35L,0xce4a7392392ca9ffL,0x865d65176730a800L },
+      { 0xca3dfe76722b4a2bL,0x12c04bf97b083e0eL,0x803ce5b51b86b8a5L,
+        0x3fc7632d6a7e3e0cL,0xc89970c2c81adbe4L,0x3cbcd3ad120e16b1L } },
+    /* 29 */
+    { { 0xfbfb4cc7ec30ce93L,0x10ed6c7db72720a2L,0xec675bf747b55500L,
+        0x90725903333ff7c3L,0xc7c3973e5075bfc0L,0xb049ecb007acf31bL },
+      { 0xb4076eaf4f58839cL,0x101896daa2b05e4fL,0x3f6033b0ab40c66eL,
+        0x19ee9eebc8d864baL,0xeb6cf15547bf6d2aL,0x8e5a9663f826477dL } },
+    /* 30 */
+    { { 0x69e62fddf7fbd5e1L,0x38ecfe5476912b1dL,0x845a3d56d1da3bfbL,
+        0x0494950e1c86f0d4L,0x83cadbf93bc36ce8L,0x41fce5724fccc8d1L },
+      { 0x05f939c28332c144L,0xb17f248b0871e46eL,0x3d8534e266e8aff6L,
+        0x1d06f1dc3b85c629L,0xdb06a32ea3131b73L,0xf295184d8b3f64e5L } },
+    /* 31 */
+    { { 0xd9653ff736ddc103L,0x25f43e3795ef606fL,0x09e301fcfe06dce8L,
+        0x85af234130b6eebfL,0x79b12b530ff56b20L,0x9b4fb499fe9a3c6bL },
+      { 0x0154f89251d27ac2L,0xd33167e356ca5389L,0x7828ec1fafc065a6L,
+        0x0959a2587f746c9bL,0xb18f1be30c44f837L,0xa7946117c4132fdbL } },
+    /* 32 */
+    { { 0xc0426b775e3c647bL,0xbfcbd9398cf05348L,0x31d312e3172c0d3dL,
+        0x5f49fde6ee754737L,0x895530f06da7ee61L,0xcf281b0ae8b3a5fbL },
+      { 0xfd14973541b8a543L,0x41a625a73080dd30L,0xe2baae07653908cfL,
+        0xc3d01436ba02a278L,0xa0d0222e7b21b8f8L,0xfdc270e9d7ec1297L } },
+    /* 33 */
+    { { 0x00873c0cbc7f41d6L,0xd976113e1b7ad641L,0x2a536ff4238443fbL,
+        0x030d00e241e62e45L,0x532e98675f545fc6L,0xcd0331088e91208cL },
+      { 0xd1a04c999797612cL,0xd4393e02eea674e2L,0xd56fa69ee19742a1L,
+        0xdd2ab48085f0590eL,0xa5cefc5248a2243dL,0x48cc67b654383f41L } },
+    /* 34 */
+    { { 0x4e50430efc14ab48L,0x195b7f4f26706a74L,0x2fe8a228cc881ff6L,
+        0xb1b968e2d945013dL,0x936aa5794b92162bL,0x4fb766b7364e754aL },
+      { 0x13f93bca31e1ff7fL,0x696eb5cace4f2691L,0xff754bf8a2b09e02L,
+        0x58f13c9ce58e3ff8L,0xb757346f1678c0b0L,0xd54200dba86692b3L } },
+    /* 35 */
+    { { 0x9a030bbd6dda1265L,0xf7b4f3fce89718ddL,0xa6a4931f936065b8L,
+        0xbce72d875f72241cL,0x6cbb51cb65775857L,0xc71618154e993675L },
+      { 0xe81a0f792ee32189L,0xef2fab26277dc0b2L,0x9e64f6feb71f469fL,
+        0xb448ce33dfdaf859L,0x3f5c1c4cbe6b5df1L,0xfb8dfb001de45f7bL } },
+    /* 36 */
+    { { 0xc7345fa74d5bb921L,0x5c7e04be4d2b667eL,0x47ed3a80282d7a3eL,
+        0x5c2777f87e47b2a4L,0x89b3b10008488e2eL,0x9aad77c2b2eb5b45L },
+      { 0xd681bca7daac34aeL,0x2452e4e526afb326L,0x0c88792441a1ee14L,
+        0x743b04d4c2407adeL,0xcb5e999bfc17a2acL,0x4dca2f824a701a06L } },
+    /* 37 */
+    { { 0x68e31ca61127bc1aL,0xa3edd59b17ead3beL,0x67b6b645e25f5a15L,
+        0x76221794a420e15eL,0x794fd83b4b1e872eL,0x7cab3f03b2dece1bL },
+      { 0x7119bf15ca9b3586L,0xa55459244d250bd7L,0x173633eacc6bcf24L,
+        0x9bd308c2b1b6f884L,0x3bae06f5447d38c3L,0x54dcc135f341fe1cL } },
+    /* 38 */
+    { { 0x56d3598d943caf0dL,0xce044ea9225ff133L,0x9edf6a7c563fadeaL,
+        0x632eb94473e8dc27L,0x814b467e3190dcabL,0x2d4f4f316dbb1e31L },
+      { 0x8d69811ca143b7caL,0x4ec1ac32de7cf950L,0x223ab5fd37b5fe82L,
+        0xe82616e49390f1d9L,0xabff4b2075804610L,0x11b9be15875b08f0L } },
+    /* 39 */
+    { { 0x4ae31a3d3bbe682cL,0xbc7c5d2674eef2ddL,0x92afd10a3c47dd40L,
+        0xec7e0a3bc14ab9e1L,0x6a6c3dd1b2e495e4L,0x085ee5e9309bcd85L },
+      { 0xf381a9088c2e67fdL,0x32083a80e261eaf2L,0x0fcd6a4996deee15L,
+        0xe3b8fb035e524c79L,0x8dc360d91d5b08b9L,0x3a06e2c87f26719fL } },
+    /* 40 */
+    { { 0x5cd9f5a87237cac0L,0x93f0b59d43586794L,0x4384a764e94f6c4eL,
+        0x8304ed2bb62782d3L,0x0b8db8b3cde06015L,0x4336dd535dbe190fL },
+      { 0x5744355392ab473aL,0x031c7275be5ed046L,0x3e78678c21909aa4L,
+        0x4ab7e04f99202ddbL,0x2648d2066977e635L,0xd427d184093198beL } },
+    /* 41 */
+    { { 0x822848f50f9b5a31L,0xbb003468baadb62aL,0x233a04723357559cL,
+        0x49ef688079aee843L,0xa89867a0aeb9e1e3L,0xc151931b1f6f9a55L },
+      { 0xd264eb0bad74251eL,0x37b9b2634abf295eL,0xb600921b04960d10L,
+        0x0de53dbc4da77dc0L,0x01d9bab3d2b18697L,0xad54ec7af7156ddfL } },
+    /* 42 */
+    { { 0x8e74dc3579efdc58L,0x456bd3694ff68ddbL,0x724e74ccd32096a5L,
+        0xe41cff42386783d0L,0xa04c7f217c70d8a4L,0x41199d2fe61a19a2L },
+      { 0xd389a3e029c05dd2L,0x535f2a6be7e3fda9L,0x26ecf72d7c2b4df8L,
+        0x678275f4fe745294L,0x6319c9cc9d23f519L,0x1e05a02d88048fc4L } },
+    /* 43 */
+    { { 0x75cc8e2ed4d5ffe8L,0xf8bb4896dbea17f2L,0x35059790cee3cb4aL,
+        0x4c06ee85a47c6165L,0xf98fff2592935d2fL,0x34c4a57232ffd7c7L },
+      { 0xc4b14806ea0376a2L,0x2ea5e7504f115e02L,0x532d76e21e55d7c0L,
+        0x68dc9411f31044daL,0x9272e46571b77993L,0xadaa38bb93a8cfd5L } },
+    /* 44 */
+    { { 0x4bf0c7127d4ed72aL,0xda0e9264ba1f79a3L,0x48c0258bf4c39ea4L,
+        0xa5394ed82a715138L,0x4af511cebf06c660L,0xfcebceefec5c37cdL },
+      { 0xf23b75aa779ae8c1L,0xdeff59ccad1e606eL,0xf3f526fd22755c82L,
+        0x64c5ab44bb32cefdL,0xa96e11a2915bdefdL,0xab19746a1143813eL } },
+    /* 45 */
+    { { 0x43c78585ec837d7dL,0xca5b6fbcb8ee0ba4L,0x34e924d9d5dbb5eeL,
+        0x3f4fa104bb4f1ca5L,0x15458b72398640f7L,0x4231faa9d7f407eaL },
+      { 0x53e0661ef96e6896L,0x554e4c69d03b0f9dL,0xd4fcb07b9c7858d1L,
+        0x7e95279352cb04faL,0x5f5f15748974e7f7L,0x2e3fa5586b6d57c8L } },
+    /* 46 */
+    { { 0x42cd48036a9951a8L,0xa8b15b8842792ad0L,0x18e8bcf9abb29a73L,
+        0xbfd9a092409933e8L,0x760a3594efb88dc4L,0x1441886340724458L },
+      { 0x162a56ee99caedc7L,0x8fb12ecd91d101c9L,0xea671967393202daL,
+        0x1aac8c4aa4ccd796L,0x7db050361cf185a8L,0x0c9f86cd8cfd095aL } },
+    /* 47 */
+    { { 0x9a72814710b2a556L,0x767ca964327b70b2L,0x04ed9e125e3799b7L,
+        0x6781d2dc22a3eb2aL,0x5bd116eb0d9450acL,0xeccac1fca7ebe08aL },
+      { 0xde68444fdc2d6e94L,0x3621f42935ecf21bL,0x14e2d54329e03a2cL,
+        0x53e42cd57d3e7f0aL,0xbba26c0973ed00b9L,0x00297c39c57d2272L } },
+    /* 48 */
+    { { 0x3aaaab10b8243a7dL,0x6eeef93e8fa58c5bL,0xf866fca39ae7f764L,
+        0x64105a2661ab04d3L,0xa3578d8a03945d66L,0xb08cd3e4791b848cL },
+      { 0x45edc5f8756d2411L,0xd4a790d9a755128cL,0xc2cf096349e5f6a0L,
+        0xc66d267df649beaaL,0x3ce6d9688467039eL,0x50046c6b42f7816fL } },
+    /* 49 */
+    { { 0x92ae160266425043L,0x1ff66afdf08db890L,0x386f5a7f8f162ce5L,
+        0x18d2dea0fcf5598fL,0x78372b3a1a8ca18eL,0xdf0d20eb8cd0e6f7L },
+      { 0x7edd5e1d75bb4045L,0x252a47ceb96d94b7L,0xbdb293582c626776L,
+        0x853c394340dd1031L,0x9dc9becf7d5f47fdL,0x27c2302fbae4044aL } },
+    /* 50 */
+    { { 0x2d1d208a8f2d49ceL,0x0d91aa02162df0a2L,0x9c5cce8709a07f65L,
+        0xdf07238b84339012L,0x5028e2c8419442cdL,0x2dcbd35872062abaL },
+      { 0xb5fbc3cbe4680967L,0x2a7bc6459f92d72cL,0x806c76e1116c369dL,
+        0x5c50677a3177e8d8L,0x753739eb4569df57L,0x2d481ef636c3f40bL } },
+    /* 51 */
+    { { 0x1a2d39fdfea1103eL,0xeaae559295f81b17L,0xdbd0aa18f59b264aL,
+        0x90c39c1acb592ee0L,0xdf62f80d9750cca3L,0xda4d8283df97cc6cL },
+      { 0x0a6dd3461e201067L,0x1531f85969fb1f6bL,0x4895e5521d60121fL,
+        0x0b21aab04c041c91L,0x9d896c46bcc1ccf8L,0xd24da3b33141bde7L } },
+    /* 52 */
+    { { 0x575a053753b0a354L,0x392ff2f40c6ddcd8L,0x0b8e8cff56157b94L,
+        0x073e57bd3b1b80d1L,0x2a75e0f03fedee15L,0x752380e4aa8e6f19L },
+      { 0x1f4e227c6558ffe9L,0x3a34861819ec5415L,0xab382d5ef7997085L,
+        0x5e6deaffddc46ac2L,0xe5144078fc8d094cL,0xf674fe51f60e37c6L } },
+    /* 53 */
+    { { 0x6fb87ae5af63408fL,0xa39c36a9cd75a737L,0x7833313fcf4c618dL,
+        0xfbcd4482f034c88dL,0x4469a76139b35288L,0x77a711c566b5d9c9L },
+      { 0x4a695dc7944f8d65L,0xe6da5f65161aaba8L,0x8654e9c324601669L,
+        0xbc8b93f528ae7491L,0x5f1d1e838f5580d8L,0x8ccf9a1acea32cc8L } },
+    /* 54 */
+    { { 0x28ab110c7196fee2L,0x75799d63874c8945L,0xa262934829aedaddL,
+        0x9714cc7b2be88ff4L,0xf71293cfd58d60d6L,0xda6b6cb332a564e9L },
+      { 0xf43fddb13dd821c2L,0xf2f2785f90dd323dL,0x91246419048489f8L,
+        0x61660f26d24c6749L,0x961d9e8cc803c15cL,0x631c6158faadc4c9L } },
+    /* 55 */
+    { { 0xacf2ebe0fd752366L,0xb93c340e139be88bL,0x98f664850f20179eL,
+        0x14820254ff1da785L,0x5278e2764f85c16eL,0xa246ee457aab1913L },
+      { 0x43861eb453763b33L,0xc49f03fc45c0bc0dL,0xafff16bcad6b1ea1L,
+        0xce33908b6fd49c99L,0x5c51e9bff7fde8c3L,0x076a7a39ff142c5eL } },
+    /* 56 */
+    { { 0x04639dfe9e338d10L,0x8ee6996ff42b411bL,0x960461d1a875cef2L,
+        0x1057b6d695b4d0baL,0x27639252a906e0bcL,0x2c19f09ae1c20f8aL },
+      { 0x5b8fc3f0eef4c43dL,0xe2e1b1a807a84aa9L,0x5f455528835d2bdbL,
+        0x0f4aee4d207132ddL,0xe9f8338c3907f675L,0x7a874dc90e0531f0L } },
+    /* 57 */
+    { { 0x84b22d4597c27050L,0xbd0b8df759e70bf8L,0xb4d6740579738b9bL,
+        0x47f4d5f5cd917c4fL,0x9099c4ce13ce6e33L,0x942bfd39521d0f8bL },
+      { 0x5028f0f6a43b566dL,0xaf6e866921bff7deL,0x83f6f856c44232cdL,
+        0x65680579f915069aL,0xd12095a2ecfecb85L,0xcf7f06aedb01ba16L } },
+    /* 58 */
+    { { 0x0f56e3c48ef96c80L,0xd521f2b33ddb609cL,0x2be941027dc1450dL,
+        0x2d21a07102a91fe2L,0x2e6f74fa1efa37deL,0x9a9a90b8156c28a1L },
+      { 0xc54ea9ea9dc7dfcbL,0xc74e66fc2c2c1d62L,0x9f23f96749d3e067L,
+        0x1c7c3a4654dd38adL,0xc70058845946cee3L,0x8985636845cc045dL } },
+    /* 59 */
+    { { 0x29da7cd4fce73946L,0x8f697db523168563L,0x8e235e9ccba92ec6L,
+        0x55d4655f9f91d3eaL,0xf3689f23aa50a6cdL,0xdcf21c2621e6a1a0L },
+      { 0xcffbc82e61b818bfL,0xc74a2f96da47a243L,0x234e980a8bc1a0cfL,
+        0xf35fd6b57929cb6dL,0x81468e12efe17d6cL,0xddea6ae558b2dafbL } },
+    /* 60 */
+    { { 0x294de8877e787b2eL,0x258acc1f39a9310dL,0x92d9714aac14265dL,
+        0x18b5591c708b48a0L,0x27cc6bb0e1abbf71L,0xc0581fa3568307b9L },
+      { 0x9e0f58a3f24d4d58L,0xfebe9bb8e0ce2327L,0x91fd6a419d1be702L,
+        0x9a7d8a45facac993L,0xabc0a08c9e50d66dL,0x02c342f706498201L } },
+    /* 61 */
+    { { 0xccd71407157bdbc2L,0x72fa89c6ad0e1605L,0xb1d3da2bb92a015fL,
+        0x8ad9e7cda0a3fe56L,0x160edcbd24f06737L,0x79d4db3361275be6L },
+      { 0xd3d31fd95f3497c4L,0x8cafeaee04192fb0L,0xe13ca74513a50af3L,
+        0x188261678c85aae5L,0xce06cea89eb556ffL,0x2eef1995bdb549f3L } },
+    /* 62 */
+    { { 0x8ed7d3eb50596edcL,0xaa359362905243a2L,0xa212c2c2a4b6d02bL,
+        0x611fd727c4fbec68L,0x8a0b8ff7b84f733dL,0xd85a6b905f0daf0eL },
+      { 0x60e899f5d4091cf7L,0x4fef2b672eff2768L,0xc1f195cb10c33964L,
+        0x8275d36993626a8fL,0xc77904f40d6c840aL,0x88d8b7fd7a868acdL } },
+    /* 63 */
+    { { 0x85f237237bd98425L,0xd4463992c70b154eL,0xcbb00ee296687a2eL,
+        0x905fdbf7c83214fdL,0x2019d29313593684L,0x0428c393ef51218eL },
+      { 0x40c7623f981e909aL,0x925133857be192daL,0x48fe480f4010907eL,
+        0xdd7a187c3120b459L,0xc9d7702da1fd8f3cL,0x66e4753be358efc5L } },
+    /* 64 */
+    { { 0x070d34e116973cf4L,0x20aee08b7e4f34f7L,0x269af9b95eb8ad29L,
+        0xdde0a036a6a45ddaL,0xa18b528e63df41e0L,0x03cc71b2a260df2aL },
+      { 0x24a6770aa06b1dd7L,0x5bfa9c119d2675d3L,0x73c1e2a196844432L,
+        0x3660558d131a6cf0L,0xb0289c832ee79454L,0xa6aefb01c6d8ddcdL } },
+    /* 65 */
+    { { 0xba1464b401ab5245L,0x9b8d0b6dc48d93ffL,0x939867dc93ad272cL,
+        0xbebe085eae9fdc77L,0x73ae5103894ea8bdL,0x740fc89a39ac22e1L },
+      { 0x5e28b0a328e23b23L,0x2352722ee13104d0L,0xf4667a18b0a2640dL,
+        0xac74a72e49bb37c3L,0x79f734f0e81e183aL,0xbffe5b6c3fd9c0ebL } },
+    /* 66 */
+    { { 0xb1a358f5c6a2123fL,0x927b2d95fe28df6dL,0x89702753f199d2f9L,
+        0x0a73754c1a3f82dcL,0x063d029d777affe1L,0x5439817edae6d34dL },
+      { 0xf7979eef6b8b83c4L,0x615cb2149d945682L,0x8f0e4facc5e57eaeL,
+        0x042b89b8113047ddL,0x888356dc93f36508L,0xbf008d185fd1f32fL } },
+    /* 67 */
+    { { 0x8012aa244e8068dbL,0xc72cc641a5729a47L,0x3c33df2c43f0691dL,
+        0xfa0573471d92145fL,0xaefc0f2fb97f7946L,0x813d75cb2f8121bfL },
+      { 0x05613c724383bba6L,0xa924ce70a4224b3fL,0xe59cecbe5f2179a6L,
+        0x78e2e8aa79f62b61L,0x3ac2cc3b53ad8079L,0x55518d71d8f4fa96L } },
+    /* 68 */
+    { { 0x03cf292200623f3bL,0x095c71115f29ebffL,0x42d7224780aa6823L,
+        0x044c7ba17458c0b0L,0xca62f7ef0959ec20L,0x40ae2ab7f8ca929fL },
+      { 0xb8c5377aa927b102L,0x398a86a0dc031771L,0x04908f9dc216a406L,
+        0xb423a73a918d3300L,0x634b0ff1e0b94739L,0xe29de7252d69f697L } },
+    /* 69 */
+    { { 0x744d14008435af04L,0x5f255b1dfec192daL,0x1f17dc12336dc542L,
+        0x5c90c2a7636a68a8L,0x960c9eb77704ca1eL,0x9de8cf1e6fb3d65aL },
+      { 0xc60fee0d511d3d06L,0x466e2313f9eb52c7L,0x743c0f5f206b0914L,
+        0x42f55bac2191aa4dL,0xcefc7c8fffebdbc2L,0xd4fa6081e6e8ed1cL } },
+    /* 70 */
+    { { 0xb5e405d3b0ab9645L,0xaeec7f98d5f1f711L,0x8ad42311585c2a6eL,
+        0x045acb9e512c6944L,0xae106c4ea90db1c6L,0xb89f33d5898e6563L },
+      { 0x43b07cd97fed2ce4L,0xf9934e17dd815b20L,0x6778d4d50a81a349L,
+        0x9e616ade52918061L,0xfa06db06d7e67112L,0x1da23cf188488091L } },
+    /* 71 */
+    { { 0x821c46b342f2c4b5L,0x931513ef66059e47L,0x7030ae4366f50cd1L,
+        0x43b536c943e7b127L,0x006258cf5fca5360L,0xe4e3ee796b557abfL },
+      { 0xbb6b390024c8b22fL,0x2eb5e2c1fcbf1054L,0x937b18c9567492afL,
+        0xf09432e4acf53957L,0x585f5a9d1dbf3a56L,0xf86751fdbe0887cfL } },
+    /* 72 */
+    { { 0x157399cb9d10e0b2L,0x1c0d595660dc51b7L,0x1d496b8a1f583090L,
+        0x6658bc2688590484L,0x88c08ab703213f28L,0x8d2e0f737ae58de4L },
+      { 0x9b79bc95486cfee6L,0x036a26c7e9e5bc57L,0x1ad03601cd8ae97aL,
+        0x06907f87ff3a0494L,0x078f4bbf2c7eb584L,0xe3731bf57e8d0a5aL } },
+    /* 73 */
+    { { 0x72f2282be1cd0abeL,0xd4f9015e87efefa2L,0x9d1898066c3834bdL,
+        0x9c8cdcc1b8a29cedL,0x0601b9f4fee82ebcL,0x371052bc7206a756L },
+      { 0x76fa109246f32562L,0xdaad534c17351bb4L,0xc3d64c37b3636bb5L,
+        0x038a8c5145d54e00L,0x301e618032c09e7cL,0x9764eae795735151L } },
+    /* 74 */
+    { { 0x8791b19fcbd5256aL,0x4007e0f26ca13a3bL,0x03b794604cf06904L,
+        0xb18a9c22b6c17589L,0xa1cb7d7d81d45908L,0x6e13fa9d21bb68f1L },
+      { 0x47183c62a71e6e16L,0x5cf0ef8ee18749edL,0x2c9c7f9b2e5ed409L,
+        0x042eeacce6e117e1L,0xb86d481613fb5a7fL,0xea1cf0edc9e5feb1L } },
+    /* 75 */
+    { { 0x6e6573c9cea4cc9bL,0x5417961dafcec8f3L,0x804bf02aa438b6f6L,
+        0xb894b03cdcd4ea88L,0xd0f807e93799571fL,0x3466a7f5862156e8L },
+      { 0x51e59acd56515664L,0x55b0f93ca3c5eb0bL,0x84a06b026a4279dbL,
+        0x5c850579c5fae08eL,0xcf07b8dba663a1a2L,0x49a36bbcf46ffc8dL } },
+    /* 76 */
+    { { 0xe47f5acc46d93106L,0x65b7ade0aa897c9cL,0x37cf4c9412d7e4beL,
+        0xa2ae9b80d4b2caa9L,0x5e7ce09ce60357a3L,0x29f77667c8ecd5f9L },
+      { 0xdf6868f5a8a0b1c5L,0x240858cf62978ad8L,0x0f7ac101dc0002a1L,
+        0x1d28a9d7ffe9aa05L,0x744984d65b962c97L,0xa8a7c00b3d28c8b2L } },
+    /* 77 */
+    { { 0x7c58a852ae11a338L,0xa78613f1d1af96e7L,0x7e9767d25355cc73L,
+        0x6ba37009792a2de6L,0x7d60f618124386b2L,0xab09b53111157674L },
+      { 0x95a0484198eb9dd0L,0xe6c17acc15070328L,0xafc6da45489c6e49L,
+        0xab45a60abb211530L,0xc58d65927d7ea933L,0xa3ef3c65095642c6L } },
+    /* 78 */
+    { { 0x89d420e9df010879L,0x9d25255d39576179L,0x9cdefd50e39513b6L,
+        0xe4efe45bd5d1c313L,0xc0149de73f7af771L,0x55a6b4f4340ab06bL },
+      { 0xf1325251ebeaf771L,0x2ab44128878d4288L,0xfcd5832e18e05afeL,
+        0xef52a348cc1fb62bL,0x2bd08274c1c4792aL,0x345c5846877c6dc7L } },
+    /* 79 */
+    { { 0xde15ceb0bea65e90L,0x0987f72b2416d99cL,0x44db578dfd863decL,
+        0xf617b74bac6a3578L,0x9e62bd7adb48e999L,0x877cae61eab1a1beL },
+      { 0x23adddaa3a358610L,0x2fc4d6d1325e2b07L,0x897198f51585754eL,
+        0xf741852cb392b584L,0x9927804cb55f7de1L,0xe9e6c4ed1aa8efaeL } },
+    /* 80 */
+    { { 0x867db63998683186L,0xfb5cf424ddcc4ea9L,0xcc9a7ffed4f0e7bdL,
+        0x7c57f71c7a779f7eL,0x90774079d6b25ef2L,0x90eae903b4081680L },
+      { 0xdf2aae5e0ee1fcebL,0x3ff1da24e86c1a1fL,0x80f587d6ca193edfL,
+        0xa5695523dc9b9d6aL,0x7b84090085920303L,0x1efa4dfcba6dbdefL } },
+    /* 81 */
+    { { 0xfbd838f9e0540015L,0x2c323946c39077dcL,0x8b1fb9e6ad619124L,
+        0x9612440c0ca62ea8L,0x9ad9b52c2dbe00ffL,0xf52abaa1ae197643L },
+      { 0xd0e898942cac32adL,0xdfb79e4262a98f91L,0x65452ecf276f55cbL,
+        0xdb1ac0d27ad23e12L,0xf68c5f6ade4986f0L,0x389ac37b82ce327dL } },
+    /* 82 */
+    { { 0x511188b4f8e60f5bL,0x7fe6701548aa2adaL,0xdb333cb8381abca2L,
+        0xb15e6d9ddaf3fc97L,0x4b24f6eb36aabc03L,0xc59789df72a748b4L },
+      { 0x26fcb8a529cf5279L,0x7a3c6bfc01ad9a6cL,0x866cf88d4b8bac9bL,
+        0xf4c899899c80d041L,0xf0a0424170add148L,0x5a02f47945d81a41L } },
+    /* 83 */
+    { { 0xfa5c877cc1c90202L,0xd099d440f8ac7570L,0x428a5b1bd17881f7L,
+        0x61e267db5b2501d7L,0xf889bf04f2e4465bL,0x4da3ae0876aa4cb8L },
+      { 0x3ef0fe26e3e66861L,0x5e7729533318b86dL,0xc3c35fbc747396dfL,
+        0x5115a29c439ffd37L,0xbfc4bd97b2d70374L,0x088630ea56246b9dL } },
+    /* 84 */
+    { { 0xcd96866db8a9e8c9L,0xa11963b85bb8091eL,0xc7f90d53045b3cd2L,
+        0x755a72b580f36504L,0x46f8b39921d3751cL,0x4bffdc9153c193deL },
+      { 0xcd15c049b89554e7L,0x353c6754f7a26be6L,0x79602370bd41d970L,
+        0xde16470b12b176c0L,0x56ba117540c8809dL,0xe2db35c3e435fb1eL } },
+    /* 85 */
+    { { 0xd71e4aab6328e33fL,0x5486782baf8136d1L,0x07a4995f86d57231L,
+        0xf1f0a5bd1651a968L,0xa5dc5b2476803b6dL,0x5c587cbc42dda935L },
+      { 0x2b6cdb32bae8b4c0L,0x66d1598bb1331138L,0x4a23b2d25d7e9614L,
+        0x93e402a674a8c05dL,0x45ac94e6da7ce82eL,0xeb9f8281e463d465L } },
+    /* 86 */
+    { { 0x34e0f9d1fecf5b9bL,0xa115b12bf206966aL,0x5591cf3b1eaa0534L,
+        0x5f0293cbfb1558f9L,0x1c8507a41bc703a5L,0x92e6b81c862c1f81L },
+      { 0xcc9ebc66cdaf24e3L,0x68917ecd72fcfc70L,0x6dc9a9308157ba48L,
+        0x5d425c08b06ab2b2L,0x362f8ce736e929c4L,0x09f6f57c62e89324L } },
+    /* 87 */
+    { { 0x1c7d6b78d29375fbL,0xfabd851ee35d1157L,0xf6f62dcd4243ea47L,
+        0x1dd924608fe30b0fL,0x08166dfaffc6e709L,0xc6c4c6930881e6a7L },
+      { 0x20368f87d6a53fb0L,0x38718e9f9eb4d1f9L,0x03f08acdafd7e790L,
+        0x0835eb4472fe2a1cL,0x7e05090388076e5dL,0x538f765ea638e731L } },
+    /* 88 */
+    { { 0x0e0249d9c2663b4bL,0xe700ab5b47cd38ddL,0xb192559d2c46559fL,
+        0x8f9f74a84bcde66dL,0xad1615233e2aced5L,0xc155c0473dd03a5bL },
+      { 0x346a87993be454ebL,0x66ee94db83b7dccdL,0x1f6d8378ab9d2abeL,
+        0x4a396dd27733f355L,0x419bd40af53553c2L,0xd0ead98d731dd943L } },
+    /* 89 */
+    { { 0x908e0b0eec142408L,0x98943cb94114b310L,0x03dbf7d81742b1d7L,
+        0xd270df6b693412f4L,0xc50654948f69e20cL,0xa76a90c3697e43a1L },
+      { 0xe0fa33844624825aL,0x82e48c0b8acc34c2L,0x7b24bd14e9a14f2bL,
+        0x4f5dd5e24db30803L,0x0c77a9e7932da0a3L,0x20db90f274c653dcL } },
+    /* 90 */
+    { { 0x261179b70e6c5fd9L,0xf8bec1236c982eeaL,0x47683338d4957b7eL,
+        0xcc47e6640a72f66aL,0xbd54bf6a1bad9350L,0xdfbf4c6af454e95aL },
+      { 0x3f7a7afa6907f4faL,0x7311fae0865ca735L,0x24737ab82a496adaL,
+        0x13e425f115feb79bL,0xe9e97c50a1b93c21L,0xb26b6eac4ddd3eb5L } },
+    /* 91 */
+    { { 0x81cab9f52a2e5f2bL,0xf93caf29bf385ac4L,0xf4bf35c3c909963aL,
+        0x081e730074c9143cL,0x3ea57fa8c281b4c5L,0xe497905c9b340741L },
+      { 0xf556dd8a55ab3cfbL,0xd444b96b518db6adL,0x34f5425a5ef4b955L,
+        0xdda7a3acecd26aa3L,0xb57da11bda655e97L,0x02da3effc2024c70L } },
+    /* 92 */
+    { { 0xe24b00366481d0d9L,0x3740dbe5818fdfe2L,0xc1fc1f45190fda00L,
+        0x329c92803cf27fdeL,0x7435cb536934f43eL,0x2b505a5d7884e8feL },
+      { 0x6cfcc6a6711adcc9L,0xf034325c531e21e1L,0xa2f4a9679b2a8a99L,
+        0x9d5f38423c21bdffL,0xb25c781131b57d66L,0xdb5344d80b8093b9L } },
+    /* 93 */
+    { { 0x0d72e667ae50a2f5L,0x9b7f8d8ae4a861d1L,0xa129f70f330df1cbL,
+        0xe90aa5d7e04fefc3L,0xff561ecbe72c3ae1L,0x0d8fb428cdb955faL },
+      { 0xd2235f73d7663784L,0xc05baec67e2c456aL,0xe5c292e42adbfcccL,
+        0x4fd17988efb110d5L,0x27e57734d19d49f3L,0x188ac4ce84f679feL } },
+    /* 94 */
+    { { 0x7ee344cfa796c53eL,0xbbf6074d0868009bL,0x1f1594f7474a1295L,
+        0x66776edcac11632dL,0x1862278b04e2fa5aL,0x52665cf2c854a89aL },
+      { 0x7e3764648104ab58L,0x167759137204fd6dL,0x86ca06a544ea1199L,
+        0xaa3f765b1c9240ddL,0x5f8501a924746149L,0x7b982e30dcd251d7L } },
+    /* 95 */
+    { { 0xe44e9efcc15f3060L,0x5ad62f2ea87ebbe6L,0x36499d41c79500d4L,
+        0xa66d6dc0336fa9d1L,0xf8afc4955afd3b1fL,0x1d8ccb24e5c9822bL },
+      { 0x4031422b79d7584bL,0xc54a0580ea3f20ddL,0x3f837c8f958468c5L,
+        0x3d82f110fbea7735L,0x679a87787dffe2fcL,0x48eba63b20704803L } },
+    /* 96 */
+    { { 0x89b10d41df46e2f6L,0x13ab57f819514367L,0x067372b91d469c87L,
+        0x0c195afa4f6c5798L,0xea43a12a272c9acfL,0x9dadd8cb678abdacL },
+      { 0xcce56c6be182579aL,0x86febadb2d26c2d8L,0x1c668ee12a44745cL,
+        0x580acd8698dc047aL,0x5a2b79cc51b9ec2dL,0x007da6084054f6a0L } },
+    /* 97 */
+    { { 0x9e3ca35217b00dd0L,0x046779cb0e81a7a6L,0xb999fef3d482d871L,
+        0xe6f38134d9233fbcL,0x112c3001f48cd0e0L,0x934e75763c6c66aeL },
+      { 0xb44d4fc3d73234dcL,0xfcae2062864eafc1L,0x843afe2526bef21aL,
+        0x61355107f3b75fdfL,0x8367a5aa794c2e6bL,0x3d2629b18548a372L } },
+    /* 98 */
+    { { 0x6230618f437cfaf8L,0x5b8742cb2032c299L,0x949f72472293643aL,
+        0xb8040f1a09464f79L,0x049462d24f254143L,0xabd6b522366c7e76L },
+      { 0x119b392bd5338f55L,0x1a80a9ce01495a0cL,0xf3118ca7f8d7537eL,
+        0xb715adc26bf4b762L,0x24506165a8482b6cL,0xd958d7c696a7c84dL } },
+    /* 99 */
+    { { 0x9ad8aa87bdc21f31L,0xadb3cab48063e58cL,0xefd86283b07dd7b8L,
+        0xc7b9b7621be7c6b4L,0x2ef58741015582deL,0xc970c52e299addf3L },
+      { 0x78f02e2a22f24d66L,0xefec1d1074cc100aL,0xaf2a6a3909316e1aL,
+        0xce7c22055849dd49L,0x9c1fe75c96bffc4cL,0xcad98fd27ba06ec0L } },
+    /* 100 */
+    { { 0xed76e2d0b648b73eL,0xa9f92ce51cfd285eL,0xa8c86c062ed13de1L,
+        0x1d3a574ea5191a93L,0x385cdf8b1ad1b8bfL,0xbbecc28a47d2cfe3L },
+      { 0x98d326c069cec548L,0x4f5bc1ddf240a0b2L,0x241a706229057236L,
+        0x0fc6e9c5c68294a4L,0x4d04838ba319f17aL,0x8b612cf19ffc1c6fL } },
+    /* 101 */
+    { { 0x9bb0b5014c3830ebL,0x3d08f83c8ee0d0c5L,0xa4a6264279ba9389L,
+        0x5d5d40449cbc2914L,0xae9eb83e074c46f0L,0x63bb758f74ead7d6L },
+      { 0x1c40d2eac6bb29e0L,0x95aa2d874b02f41eL,0x9298917553cb199aL,
+        0xdd91bafe51584f6dL,0x3715efb931a1aaecL,0xc1b6ae5b46780f9eL } },
+    /* 102 */
+    { { 0xcded3e4b42772f41L,0x3a700d5d3bcb79d1L,0x4430d50e80feee60L,
+        0x444ef1fcf5e5d4bbL,0xc660194fe6e358ffL,0xe68a2f326a91b43cL },
+      { 0x5842775c977fe4d2L,0x78fdef5c7e2a41ebL,0x5f3bec02ff8df00eL,
+        0xf4b840cd5852525dL,0x0870483a4e6988bdL,0x39499e39cc64b837L } },
+    /* 103 */
+    { { 0xfc05de80b08df5feL,0x0c12957c63ba0362L,0xea379414d5cf1428L,
+        0xc559132a54ef6216L,0x33d5f12fb9e65cf8L,0x09c602781695d663L },
+      { 0x3ac1ced461f7a2fbL,0xdd838444d4f5eeb8L,0x82a38c6c8318fcadL,
+        0x315be2e5e9f1a864L,0x317b5771442daf47L,0x81b5904a95aa5f9eL } },
+    /* 104 */
+    { { 0x6b6b1c508b21d232L,0x87f3dbc08c2cba75L,0xa7e74b46ae9f0fafL,
+        0x036a0985bb7b8079L,0x4f185b908d974a25L,0x5aa7cef0d9af5ec9L },
+      { 0xe0566a7057dcfffcL,0x6ea311dab8453225L,0x72ea1a8d23368aa9L,
+        0xed9b208348cd552dL,0xb987967cc80ea435L,0xad735c756c104173L } },
+    /* 105 */
+    { { 0xaea85ab3cee76ef4L,0x44997444af1d2b93L,0x0851929beacb923fL,
+        0xb080b59051e3bc0cL,0xc4ee1d8659be68a2L,0xf00de21964b26cdaL },
+      { 0x8d7fb5c0f2e90d4dL,0x00e219a777d9ec64L,0xc4e6febd5d1c491cL,
+        0x080e37541a8f4585L,0x4a9b86c848d2af9cL,0x2ed70db6b6679851L } },
+    /* 106 */
+    { { 0xaee44116586f25cbL,0xf7b6861fa0fcf70fL,0x55d2cd2018a350e8L,
+        0x861bf3e592dc286fL,0x9ab18ffa6226aba7L,0xd15827bea9857b03L },
+      { 0x26c1f54792e6acefL,0x422c63c8ac1fbac3L,0xa2d8760dfcbfd71dL,
+        0x35f6a539b2511224L,0xbaa88fa1048d1a21L,0x49f1abe9ebf999dbL } },
+    /* 107 */
+    { { 0x16f9f4f4f7492b73L,0xcf28ec1ecb392b1aL,0x45b130d469ca6ffcL,
+        0x28ba8d40b72efa58L,0xace987c75ca066f5L,0x3e3992464ad022ebL },
+      { 0x63a2d84e752555bbL,0xaaa93b4a9c2ae394L,0xcd80424ec89539caL,
+        0x6d6b5a6daa119a99L,0xbd50334c379f2629L,0x899e925eef3cc7d3L } },
+    /* 108 */
+    { { 0xb7ff3651bf825dc4L,0x0f741cc440b9c462L,0x771ff5a95cc4fb5bL,
+        0xcb9e9c9b47fd56feL,0xbdf053db5626c0d3L,0xa97ce675f7e14098L },
+      { 0x68afe5a36c934f5eL,0x6cd5e148ccefc46fL,0xc7758570d7a88586L,
+        0x49978f5edd558d40L,0xa1d5088a64ae00c1L,0x58f2a720f1d65bb2L } },
+    /* 109 */
+    { { 0x66fdda4a3e4daedbL,0x38318c1265d1b052L,0x28d910a24c4bbf5cL,
+        0x762fe5c478a9cd14L,0x08e5ebaad2cc0aeeL,0xd2cdf257ca0c654cL },
+      { 0x48f7c58b08b717d2L,0x3807184a386cd07aL,0x3240f626ae7d0112L,
+        0x03e9361bc43917b0L,0xf261a87620aea018L,0x53f556a47e1e6372L } },
+    /* 110 */
+    { { 0xc84cee562f512a90L,0x24b3c0041b0ea9f1L,0x0ee15d2de26cc1eaL,
+        0xd848762cf0c9ef7dL,0x1026e9c5d5341435L,0x8f5b73dcfdb16b31L },
+      { 0x1f69bef2d2c75d95L,0x8d33d581be064ddaL,0x8c024c1257ed35e6L,
+        0xf8d435f9c309c281L,0xfd295061d6960193L,0x66618d78e9e49541L } },
+    /* 111 */
+    { { 0x571cfd458ce382deL,0x175806eede900ddeL,0x6184996534aba3b5L,
+        0xe899778ade7aec95L,0xe8f00f6eff4aa97fL,0xae971cb5010b0c6dL },
+      { 0x1827eebc3af788f1L,0xd46229ffe413fe2dL,0x8a15455b4741c9b4L,
+        0x5f02e690f8e424ebL,0x40a1202edae87712L,0x49b3bda264944f6dL } },
+    /* 112 */
+    { { 0xd63c6067035b2d69L,0xb507150d6bed91b0L,0x1f35f82f7afb39b2L,
+        0xb9bd9c0116012b66L,0x00d97960ed0a5f50L,0xed7054512716f7c9L },
+      { 0x1576eff4127abdb4L,0x6850d698f01e701cL,0x9fa7d7493fc87e2fL,
+        0x0b6bcc6fb0ce3e48L,0xf4fbe1f5f7d8c1c0L,0xcf75230e02719cc6L } },
+    /* 113 */
+    { { 0x6761d6c2722d94edL,0xd1ec3f213718820eL,0x65a40b7025d0e7c6L,
+        0xd67f830ebaf3cf31L,0x633b3807b93ea430L,0x17faa0ea0bc96c69L },
+      { 0xe6bf3482df866b98L,0x205c1ee9a9db52d4L,0x51ef9bbdff9ab869L,
+        0x3863dad175eeb985L,0xef216c3bd3cf442aL,0x3fb228e3f9c8e321L } },
+    /* 114 */
+    { { 0x94f9b70c0760ac07L,0xf3c9ccae9d79bf4dL,0x73cea084c5ffc83dL,
+        0xef50f943dc49c38eL,0xf467a2aebc9e7330L,0x5ee534b644ea7fbaL },
+      { 0x20cb627203609e7fL,0x0984435562fdc9f0L,0xaf5c8e580f1457f7L,
+        0xd1f50a6cb4b25941L,0x77cb247c2ec82395L,0xa5f3e1e5da3dca33L } },
+    /* 115 */
+    { { 0x023489d67d85fa94L,0x0ba405372db9ce47L,0x0fdf7a1faed7aad1L,
+        0xa57b0d739a4ccb40L,0x48fcec995b18967cL,0xf30b5b6eb7274d24L },
+      { 0x7ccb4773c81c5338L,0xb85639e6a3ed6bd0L,0x7d9df95f1d56eadaL,
+        0xe256d57f0a1607adL,0x6da7ffdc957574d6L,0x65f8404601c7a8c4L } },
+    /* 116 */
+    { { 0x8d45d0cbcba1e7f1L,0xef0a08c002b55f64L,0x771ca31b17e19892L,
+        0xe1843ecb4885907eL,0x67797ebc364ce16aL,0x816d2b2d8df4b338L },
+      { 0xe870b0e539aa8671L,0x9f0db3e4c102b5f5L,0x342966591720c697L,
+        0x0ad4c89e613c0d2aL,0x1af900b2418ddd61L,0xe087ca72d336e20eL } },
+    /* 117 */
+    { { 0x222831ffaba10079L,0x0dc5f87b6d64fff2L,0x445479073e8cb330L,
+        0xe815aaa2702a33fbL,0x338d6b2e5fba3215L,0x0f7535cb79f549c8L },
+      { 0x471ecd972ee95923L,0x1e868b37c6d1c09fL,0x2bc7b8ecc666ef4eL,
+        0xf5416589808a4bfcL,0xf23e9ee23fbc4d2eL,0x4357236c2d75125bL } },
+    /* 118 */
+    { { 0xfe176d95ba9cdb1bL,0x45a1ca012f82791eL,0x97654af24de4cca2L,
+        0xbdbf9d0e5cc4bcb9L,0xf6a7df50ad97ac0aL,0xc52112b061359fd6L },
+      { 0x696d9ce34f05eae3L,0x903adc02e943ac2bL,0xa90753470848be17L,
+        0x1e20f1702a3973e5L,0xe1aacc1c6feb67e9L,0x2ca0ac32e16bc6b9L } },
+    /* 119 */
+    { { 0xffea12e4ef871eb5L,0x94c2f25da8bf0a7aL,0x4d1e4c2a78134eaaL,
+        0x11ed16fb0360fb10L,0x4029b6db85fc11beL,0x5e9f7ab7f4d390faL },
+      { 0x5076d72f30646612L,0xa0afed1ddda1d0d8L,0x2902225785a1d103L,
+        0xcb499e174e276bcdL,0x16d1da7151246c3dL,0xc72d56d3589a0443L } },
+    /* 120 */
+    { { 0xdf5ffc74dae5bb45L,0x99068c4a261bd6dcL,0xdc0afa7aaa98ec7bL,
+        0xedd2ee00f121e96dL,0x163cc7be1414045cL,0xb0b1bbce335af50eL },
+      { 0xd440d78501a06293L,0xcdebab7c6552e644L,0x48cb8dbc8c757e46L,
+        0x81f9cf783cabe3cbL,0xddd02611b123f59aL,0x3dc7b88eeeb3784dL } },
+    /* 121 */
+    { { 0xe1b8d398c4741456L,0xa9dfa9026032a121L,0x1cbfc86d1263245bL,
+        0xf411c7625244718cL,0x96521d5405b0fc54L,0x1afab46edbaa4985L },
+      { 0xa75902ba8674b4adL,0x486b43ad5ad87d12L,0x72b1c73636e0d099L,
+        0x39890e07bb6cd6d6L,0x8128999c59bace4eL,0xd8da430b7b535e33L } },
+    /* 122 */
+    { { 0x39f65642c6b75791L,0x050947a621806bfbL,0x0ca3e3701362ef84L,
+        0x9bc60aed8c3d2391L,0x9b488671732e1ddcL,0x12d10d9ea98ee077L },
+      { 0xb6f2822d3651b7dcL,0x6345a5ba80abd138L,0x62033262472d3c84L,
+        0xd54a1d40acc57527L,0x6ea46b3a424447cbL,0x5bc410572fb1a496L } },
+    /* 123 */
+    { { 0xe70c57a3a751cd0eL,0x190d8419eba3c7d6L,0xb1c3bee79d47d55aL,
+        0xda941266f912c6d8L,0x12e9aacc407a6ad6L,0xd6ce5f116e838911L },
+      { 0x063ca97b70e1f2ceL,0xa3e47c728213d434L,0xa016e24184df810aL,
+        0x688ad7b0dfd881a4L,0xa37d99fca89bf0adL,0xd8e3f339a23c2d23L } },
+    /* 124 */
+    { { 0xbdf53163750bed6fL,0x808abc3283e68b0aL,0x85a366275bb08a33L,
+        0xf72a3a0f6b0e4abeL,0xf7716d19faf0c6adL,0x22dcc0205379b25fL },
+      { 0x7400bf8df9a56e11L,0x6cb8bad756a47f21L,0x7c97176f7a6eb644L,
+        0xe8fd84f7d1f5b646L,0x98320a9444ddb054L,0x07071ba31dde86f5L } },
+    /* 125 */
+    { { 0x6fdfa0e598f8fcb9L,0x89cec8e094d0d70cL,0xa0899397106d20a8L,
+        0x915bfb9aba8acc9cL,0x1370c94b5507e01cL,0x83246a608a821ffbL },
+      { 0xa8273a9fbe3c378fL,0x7e54478935a25be9L,0x6cfa49724dd929d7L,
+        0x987fed9d365bd878L,0x4982ac945c29a7aeL,0x4589a5d75ddd7ec5L } },
+    /* 126 */
+    { { 0x9fabb174a95540a9L,0x7cfb886f0162c5b0L,0x17be766bea3dee18L,
+        0xff7da41fe88e624cL,0xad0b71eb8b919c38L,0x86a522e0f31ff9a9L },
+      { 0xbc8e6f72868bc259L,0x6130c6383ccef9e4L,0x09f1f4549a466555L,
+        0x8e6c0f0919b2bfb4L,0x945c46c90ca7bb22L,0xacd871684dafb67bL } },
+    /* 127 */
+    { { 0x090c72ca10c53841L,0xc20ae01b55a4fcedL,0x03f7ebd5e10234adL,
+        0xb3f42a6a85892064L,0xbdbc30c0b4a14722L,0x971bc4378ca124ccL },
+      { 0x6f79f46d517ff2ffL,0x6a9c96e2ecba947bL,0x5e79f2f462925122L,
+        0x30a96bb16a4e91f1L,0x1147c9232d4c72daL,0x65bc311f5811e4dfL } },
+    /* 128 */
+    { { 0x87c7dd7d139b3239L,0x8b57824e4d833baeL,0xbcbc48789fff0015L,
+        0x8ffcef8b909eaf1aL,0x9905f4eef1443a78L,0x020dd4a2e15cbfedL },
+      { 0xca2969eca306d695L,0xdf940cadb93caf60L,0x67f7fab787ea6e39L,
+        0x0d0ee10ff98c4fe5L,0xc646879ac19cb91eL,0x4b4ea50c7d1d7ab4L } },
+    /* 129 */
+    { { 0x19e409457a0db57eL,0xe6017cad9a8c9702L,0xdbf739e51be5cff9L,
+        0x3646b3cda7a938a2L,0x0451108568350dfcL,0xad3bd6f356e098b5L },
+      { 0x935ebabfee2e3e3eL,0xfbd01702473926cbL,0x7c735b029e9fb5aaL,
+        0xc52a1b852e3feff0L,0x9199abd3046b405aL,0xe306fcec39039971L } },
+    /* 130 */
+    { { 0xd6d9aec823e4712cL,0x7ca8376cc3c198eeL,0xe6d8318731bebd8aL,
+        0xed57aff3d88bfef3L,0x72a645eecf44edc7L,0xd4e63d0b5cbb1517L },
+      { 0x98ce7a1cceee0ecfL,0x8f0126335383ee8eL,0x3b879078a6b455e8L,
+        0xcbcd3d96c7658c06L,0x721d6fe70783336aL,0xf21a72635a677136L } },
+    /* 131 */
+    { { 0x19d8b3cd9586ba11L,0xd9e0aeb28a5c0480L,0xe4261dbf2230ef5cL,
+        0x095a9dee02e6bf09L,0x8963723c80dc7784L,0x5c97dbaf145157b1L },
+      { 0x97e744344bc4503eL,0x0fb1cb3185a6b370L,0x3e8df2becd205d4bL,
+        0x497dd1bcf8f765daL,0x92ef95c76c988a1aL,0x3f924baa64dc4cfaL } },
+    /* 132 */
+    { { 0x6bf1b8dd7268b448L,0xd4c28ba1efd79b94L,0x2fa1f8c8e4e3551fL,
+        0x769e3ad45c9187a9L,0x28843b4d40326c0dL,0xfefc809450d5d669L },
+      { 0x30c85bfd90339366L,0x4eeb56f15ccf6c3aL,0x0e72b14928ccd1dcL,
+        0x73ee85b5f2ce978eL,0xcdeb2bf33165bb23L,0x8106c9234e410abfL } },
+    /* 133 */
+    { { 0xc8df01617d02f4eeL,0x8a78154718e21225L,0x4ea895eb6acf9e40L,
+        0x8b000cb56e5a633dL,0xf31d86d57e981ffbL,0xf5c8029c4475bc32L },
+      { 0x764561ce1b568973L,0x2f809b81a62996ecL,0x9e513d64da085408L,
+        0xc27d815de61ce309L,0x0da6ff99272999e0L,0xbd284779fead73f7L } },
+    /* 134 */
+    { { 0x6033c2f99b1cdf2bL,0x2a99cf06bc5fa151L,0x7d27d25912177b3bL,
+        0xb1f15273c4485483L,0x5fd57d81102e2297L,0x3d43e017c7f6acb7L },
+      { 0x41a8bb0b3a70eb28L,0x67de2d8e3e80b06bL,0x09245a4170c28de5L,
+        0xad7dbcb1a7b26023L,0x70b08a352cbc6c1eL,0xb504fb669b33041fL } },
+    /* 135 */
+    { { 0xa8e85ab5f97a27c2L,0x6ac5ec8bc10a011bL,0x55745533ffbcf161L,
+        0x01780e8565790a60L,0xe451bf8599ee75b0L,0x8907a63b39c29881L },
+      { 0x76d46738260189edL,0x284a443647bd35cbL,0xd74e8c4020cab61eL,
+        0x6264bf8c416cf20aL,0xfa5a6c955fd820ceL,0xfa7154d0f24bb5fcL } },
+    /* 136 */
+    { { 0x18482cec9b3f5034L,0x962d445acd9e68fdL,0x266fb1d695746f23L,
+        0xc66ade5a58c94a4bL,0xdbbda826ed68a5b6L,0x05664a4d7ab0d6aeL },
+      { 0xbcd4fe51025e32fcL,0x61a5aebfa96df252L,0xd88a07e231592a31L,
+        0x5d9d94de98905517L,0x96bb40105fd440e7L,0x1b0c47a2e807db4cL } },
+    /* 137 */
+    { { 0x5c2a6ac808223878L,0xba08c269e65a5558L,0xd22b1b9b9bbc27fdL,
+        0x919171bf72b9607dL,0x9ab455f9e588dc58L,0x6d54916e23662d93L },
+      { 0x8da8e9383b1de0c1L,0xa84d186a804f278fL,0xbf4988ccd3461695L,
+        0xf5eae3bee10eb0cbL,0x1ff8b68fbf2a66edL,0xa68daf67c305b570L } },
+    /* 138 */
+    { { 0xc1004cff44b2e045L,0x91b5e1364b1c05d4L,0x53ae409088a48a07L,
+        0x73fb2995ea11bb1aL,0x320485703d93a4eaL,0xcce45de83bfc8a5fL },
+      { 0xaff4a97ec2b3106eL,0x9069c630b6848b4fL,0xeda837a6ed76241cL,
+        0x8a0daf136cc3f6cfL,0x199d049d3da018a8L,0xf867c6b1d9093ba3L } },
+    /* 139 */
+    { { 0xe4d42a5656527296L,0xae26c73dce71178dL,0x70a0adac6c251664L,
+        0x813483ae5dc0ae1dL,0x7574eacddaab2dafL,0xc56b52dcc2d55f4fL },
+      { 0x872bc16795f32923L,0x4be175815bdd2a89L,0x9b57f1e7a7699f00L,
+        0x5fcd9c723ac2de02L,0x83af3ba192377739L,0xa64d4e2bfc50b97fL } },
+    /* 140 */
+    { { 0x2172dae20e552b40L,0x62f49725d34d52e8L,0x7930ee4007958f98L,
+        0x56da2a90751fdd74L,0xf1192834f53e48c3L,0x34d2ac268e53c343L },
+      { 0x1073c21813111286L,0x201dac14da9d9827L,0xec2c29dbee95d378L,
+        0x9316f1191f3ee0b1L,0x7890c9f0544ce71cL,0xd77138af27612127L } },
+    /* 141 */
+    { { 0x78045e6d3b4ad1cdL,0xcd86b94e4aa49bc1L,0x57e51f1dfd677a16L,
+        0xd9290935fa613697L,0x7a3f959334f4d893L,0x8c9c248b5d5fcf9bL },
+      { 0x9f23a4826f70d4e9L,0x1727345463190ae9L,0x4bdd7c135b081a48L,
+        0x1e2de38928d65271L,0x0bbaaa25e5841d1fL,0xc4c18a79746772e5L } },
+    /* 142 */
+    { { 0x10ee2681593375acL,0x4f3288be7dd5e113L,0x9a97b2fb240f3538L,
+        0xfa11089f1de6b1e2L,0x516da5621351bc58L,0x573b61192dfa85b5L },
+      { 0x89e966836cba7df5L,0xf299be158c28ab40L,0xe91c9348ad43fcbfL,
+        0xe9bbc7cc9a1cefb3L,0xc8add876738b2775L,0x6e3b1f2e775eaa01L } },
+    /* 143 */
+    { { 0x0365a888b677788bL,0x634ae8c43fd6173cL,0x304987619e498dbeL,
+        0x08c43e6dc8f779abL,0x068ae3844c09aca9L,0x2380c70b2018d170L },
+      { 0xcf77fbc3a297c5ecL,0xdacbc853ca457948L,0x3690de04336bec7eL,
+        0x26bbac6414eec461L,0xd1c23c7e1f713abfL,0xf08bbfcde6fd569eL } },
+    /* 144 */
+    { { 0x5f8163f484770ee3L,0x0e0c7f94744a1706L,0x9c8f05f7e1b2d46dL,
+        0x417eafe7d01fd99aL,0x2ba15df511440e5bL,0xdc5c552a91a6fbcfL },
+      { 0x86271d74a270f721L,0x32c0a075a004485bL,0x9d1a87e38defa075L,
+        0xb590a7acbf0d20feL,0x430c41c28feda1f5L,0x454d287958f6ec24L } },
+    /* 145 */
+    { { 0x52b7a6357c525435L,0x3d9ef57f37c4bdbcL,0x2bb93e9edffcc475L,
+        0xf7b8ba987710f3beL,0x42ee86da21b727deL,0x55ac3f192e490d01L },
+      { 0x487e3a6ec0c1c390L,0x036fb345446cde7bL,0x089eb276496ae951L,
+        0xedfed4d971ed1234L,0x661b0dd5900f0b46L,0x11bd6f1b8582f0d3L } },
+    /* 146 */
+    { { 0x5cf9350f076bc9d1L,0x15d903becf3cd2c3L,0x21cfc8c225af031cL,
+        0xe0ad32488b1cc657L,0xdd9fb96370014e87L,0xf0f3a5a1297f1658L },
+      { 0xbb908fbaf1f703aaL,0x2f9cc4202f6760baL,0x00ceec6666a38b51L,
+        0x4deda33005d645daL,0xb9cf5c72f7de3394L,0xaeef65021ad4c906L } },
+    /* 147 */
+    { { 0x0583c8b17a19045dL,0xae7c3102d052824cL,0x2a234979ff6cfa58L,
+        0xfe9dffc962c733c0L,0x3a7fa2509c0c4b09L,0x516437bb4fe21805L },
+      { 0x9454e3d5c2a23ddbL,0x0726d887289c104eL,0x8977d9184fd15243L,
+        0xc559e73f6d7790baL,0x8fd3e87d465af85fL,0xa2615c745feee46bL } },
+    /* 148 */
+    { { 0xc8d607a84335167dL,0x8b42d804e0f5c887L,0x5f9f13df398d11f9L,
+        0x5aaa508720740c67L,0x83da9a6aa3d9234bL,0xbd3a5c4e2a54bad1L },
+      { 0xdd13914c2db0f658L,0x29dcb66e5a3f373aL,0xbfd62df55245a72bL,
+        0x19d1802391e40847L,0xd9df74dbb136b1aeL,0x72a06b6b3f93bc5bL } },
+    /* 149 */
+    { { 0x6da19ec3ad19d96fL,0xb342daa4fb2a4099L,0x0e61633a662271eaL,
+        0x3bcece81ce8c054bL,0x7cc8e0618bd62dc6L,0xae189e19ee578d8bL },
+      { 0x73e7a25ddced1eedL,0xc1257f0a7875d3abL,0x2cb2d5a21cfef026L,
+        0xd98ef39bb1fdf61cL,0xcd8e6f6924e83e6cL,0xd71e7076c7b7088bL } },
+    /* 150 */
+    { { 0x339368309d4245bfL,0x22d962172ac2953bL,0xb3bf5a8256c3c3cdL,
+        0x50c9be910d0699e8L,0xec0944638f366459L,0x6c056dba513b7c35L },
+      { 0x687a6a83045ab0e3L,0x8d40b57f445c9295L,0x0f345048a16f5954L,
+        0x64b5c6393d8f0a87L,0x106353a29f71c5e2L,0xdd58b475874f0dd4L } },
+    /* 151 */
+    { { 0x67ec084f62230c72L,0xf14f6cca481385e3L,0xf58bb4074cda7774L,
+        0xe15011b1aa2dbb6bL,0xd488369d0c035ab1L,0xef83c24a8245f2fdL },
+      { 0xfb57328f9fdc2538L,0x79808293191fe46aL,0xe28f5c4432ede548L,
+        0x1b3cda99ea1a022cL,0x39e639b73df2ec7fL,0x77b6272b760e9a18L } },
+    /* 152 */
+    { { 0x2b1d51bda65d56d5L,0x3a9b71f97ea696e0L,0x95250ecc9904f4c4L,
+        0x8bc4d6ebe75774b7L,0x0e343f8aeaeeb9aaL,0xc473c1d1930e04cbL },
+      { 0x282321b1064cd8aeL,0xf4b4371e5562221cL,0xc1cc81ecd1bf1221L,
+        0xa52a07a9e2c8082fL,0x350d8e59ba64a958L,0x29e4f3de6fb32c9aL } },
+    /* 153 */
+    { { 0x0aa9d56cba89aaa5L,0xf0208ac0c4c6059eL,0x7400d9c6bd6ddca4L,
+        0xb384e475f2c2f74aL,0x4c1061fcb1562dd3L,0x3924e2482e153b8dL },
+      { 0xf38b8d98849808abL,0x29bf3260a491aa36L,0x85159ada88220edeL,
+        0x8b47915bbe5bc422L,0xa934d72ed7300967L,0xc4f303982e515d0dL } },
+    /* 154 */
+    { { 0xe3e9ee421b1de38bL,0xa124e25a42636760L,0x90bf73c090165b1aL,
+        0x21802a34146434c5L,0x54aa83f22e1fa109L,0x1d4bd03ced9c51e9L },
+      { 0xc2d96a38798751e6L,0xed27235f8c3507f5L,0xb5fb80e2c8c24f88L,
+        0xf873eefad37f4f78L,0x7229fd74f224ba96L,0x9dcd91999edd7149L } },
+    /* 155 */
+    { { 0xee9f81a64e94f22aL,0xe5609892f71ec341L,0x6c818ddda998284eL,
+        0x9fd472953b54b098L,0x47a6ac030e8a7cc9L,0xde684e5eb207a382L },
+      { 0x4bdd1ecd2b6b956bL,0x09084414f01b3583L,0xe2f80b3255233b14L,
+        0x5a0fec54ef5ebc5eL,0x74cf25e6bf8b29a2L,0x1c757fa07f29e014L } },
+    /* 156 */
+    { { 0x1bcb5c4aeb0fdfe4L,0xd7c649b3f0899367L,0xaef68e3f05bc083bL,
+        0x57a06e46a78aa607L,0xa2136ecc21223a44L,0x89bd648452f5a50bL },
+      { 0x724411b94455f15aL,0x23dfa97008a9c0fdL,0x7b0da4d16db63befL,
+        0x6f8a7ec1fb162443L,0xc1ac9ceee98284fbL,0x085a582b33566022L } },
+    /* 157 */
+    { { 0x15cb61f9ec1f138aL,0x11c9a230668f0c28L,0xac829729df93f38fL,
+        0xcef256984048848dL,0x3f686da02bba8fbfL,0xed5fea78111c619aL },
+      { 0x9b4f73bcd6d1c833L,0x5095160686e7bf80L,0xa2a73508042b1d51L,
+        0x9ef6ea495fb89ec2L,0xf1008ce95ef8b892L,0x78a7e6849ae8568bL } },
+    /* 158 */
+    { { 0x3fe83a7c10470cd8L,0x92734682f86df000L,0xb5dac06bda9409b5L,
+        0x1e7a966094939c5fL,0xdec6c1505cc116dcL,0x1a52b40866bac8ccL },
+      { 0x5303a3656e864045L,0x45eae72a9139efc1L,0x83bec6466f31d54fL,
+        0x2fb4a86f6e958a6dL,0x6760718e4ff44030L,0x008117e3e91ae0dfL } },
+    /* 159 */
+    { { 0x5d5833ba384310a2L,0xbdfb4edc1fd6c9fcL,0xb9a4f102849c4fb8L,
+        0xe5fb239a581c1e1fL,0xba44b2e7d0a9746dL,0x78f7b7683bd942b9L },
+      { 0x076c8ca1c87607aeL,0x82b23c2ed5caaa7eL,0x6a581f392763e461L,
+        0xca8a5e4a3886df11L,0xc87e90cf264e7f22L,0x04f74870215cfcfcL } },
+    /* 160 */
+    { { 0x5285d116141d161cL,0x67cd2e0e93c4ed17L,0x12c62a647c36187eL,
+        0xf5329539ed2584caL,0xc4c777c442fbbd69L,0x107de7761bdfc50aL },
+      { 0x9976dcc5e96beebdL,0xbe2aff95a865a151L,0x0e0a9da19d8872afL,
+        0x5e357a3da63c17ccL,0xd31fdfd8e15cc67cL,0xc44bbefd7970c6d8L } },
+    /* 161 */
+    { { 0x703f83e24c0c62f1L,0x9b1e28ee4e195572L,0x6a82858bfe26ccedL,
+        0xd381c84bc43638faL,0x94f72867a5ba43d8L,0x3b4a783d10b82743L },
+      { 0xee1ad7b57576451eL,0xc3d0b59714b6b5c8L,0x3dc30954fcacc1b8L,
+        0x55df110e472c9d7bL,0x97c86ed702f8a328L,0xd043341388dc098fL } },
+    /* 162 */
+    { { 0x1a60d1522ca8f2feL,0x61640948491bd41fL,0x6dae29a558dfe035L,
+        0x9a615bea278e4863L,0xbbdb44779ad7c8e5L,0x1c7066302ceac2fcL },
+      { 0x5e2b54c699699b4bL,0xb509ca6d239e17e8L,0x728165feea063a82L,
+        0x6b5e609db6a22e02L,0x12813905b26ee1dfL,0x07b9f722439491faL } },
+    /* 163 */
+    { { 0x1592ec1448ff4e49L,0x3e4e9f176d644129L,0x7acf82881156acc0L,
+        0x5aa34ba8bb092b0bL,0xcd0f90227d38393dL,0x416724ddea4f8187L },
+      { 0x3c4e641cc0139e73L,0xe0fe46cf91e4d87dL,0xedb3c792cab61f8aL,
+        0x4cb46de4d3868753L,0xe449c21d20f1098aL,0x5e5fd059f5b8ea6eL } },
+    /* 164 */
+    { { 0x7fcadd4675856031L,0x89c7a4cdeaf2fbd0L,0x1af523ce7a87c480L,
+        0xe5fc109561d9ae90L,0x3fb5864fbcdb95f5L,0xbeb5188ebb5b2c7dL },
+      { 0x3d1563c33ae65825L,0x116854c40e57d641L,0x11f73d341942ebd3L,
+        0x24dc5904c06955b3L,0x8a0d4c83995a0a62L,0xfb26b86d5d577b7dL } },
+    /* 165 */
+    { { 0xc53108e7c686ae17L,0x9090d739d1c1da56L,0x4583b0139aec50aeL,
+        0xdd9a088ba49a6ab2L,0x28192eeaf382f850L,0xcc8df756f5fe910eL },
+      { 0x877823a39cab7630L,0x64984a9afb8e7fc1L,0x5448ef9c364bfc16L,
+        0xbbb4f871c44e2a9aL,0x901a41ab435c95e9L,0xc6c23e5faaa50a06L } },
+    /* 166 */
+    { { 0xb78016c19034d8ddL,0x856bb44b0b13e79bL,0x85c6409ab3241a05L,
+        0x8d2fe19a2d78ed21L,0xdcc7c26d726eddf2L,0x3ccaff5f25104f04L },
+      { 0x397d7edc6b21f843L,0xda88e4dde975de4cL,0x5273d3964f5ab69eL,
+        0x537680e39aae6cc0L,0xf749cce53e6f9461L,0x021ddbd9957bffd3L } },
+    /* 167 */
+    { { 0x7b64585f777233cfL,0xfe6771f60942a6f0L,0x636aba7adfe6eef0L,
+        0x63bbeb5686038029L,0xacee5842de8fcf36L,0x48d9aa99d4a20524L },
+      { 0xcff7a74c0da5e57aL,0xc232593ce549d6c9L,0x68504bccf0f2287bL,
+        0x6d7d098dbc8360b5L,0xeac5f1495b402f41L,0x61936f11b87d1bf1L } },
+    /* 168 */
+    { { 0xaa9da167b8153a9dL,0xa49fe3ac9e83ecf0L,0x14c18f8e1b661384L,
+        0x61c24dab38434de1L,0x3d973c3a283dae96L,0xc99baa0182754fc9L },
+      { 0x477d198f4c26b1e3L,0x12e8e186a7516202L,0x386e52f6362addfaL,
+        0x31e8f695c3962853L,0xdec2af136aaedb60L,0xfcfdb4c629cf74acL } },
+    /* 169 */
+    { { 0x6b3ee958cca40298L,0xc3878153f2f5d195L,0x0c565630ed2eae5bL,
+        0xd089b37e3a697cf2L,0xc2ed2ac7ad5029eaL,0x7e5cdfad0f0dda6aL },
+      { 0xf98426dfd9b86202L,0xed1960b14335e054L,0x1fdb02463f14639eL,
+        0x17f709c30db6c670L,0xbfc687ae773421e1L,0x13fefc4a26c1a8acL } },
+    /* 170 */
+    { { 0xe361a1987ffa0a5fL,0xf4b26102c63fe109L,0x264acbc56c74e111L,
+        0x4af445fa77abebafL,0x448c4fdd24cddb75L,0x0b13157d44506eeaL },
+      { 0x22a6b15972e9993dL,0x2c3c57e485e5ecbeL,0xa673560bfd83e1a1L,
+        0x6be23f82c3b8c83bL,0x40b13a9640bbe38eL,0x66eea033ad17399bL } },
+    /* 171 */
+    { { 0x49fc6e95b4c6c693L,0xefc735de36af7d38L,0xe053343d35fe42fcL,
+        0xf0aa427c6a9ab7c3L,0xc79f04364a0fcb24L,0x1628724393ebbc50L },
+      { 0x5c3d6bd016927e1eL,0x40158ed2673b984cL,0xa7f86fc84cd48b9aL,
+        0x1643eda660ea282dL,0x45b393eae2a1beedL,0x664c839e19571a94L } },
+    /* 172 */
+    { { 0x5774575027eeaf94L,0x2875c925ea99e1e7L,0xc127e7ba5086adeaL,
+        0x765252a086fe424fL,0x1143cc6c2b6c0281L,0xc9bb2989d671312dL },
+      { 0x880c337c51acb0a5L,0xa3710915d3c60f78L,0x496113c09262b6edL,
+        0x5d25d9f89ce48182L,0x53b6ad72b3813586L,0x0ea3bebc4c0e159cL } },
+    /* 173 */
+    { { 0xcaba450ac5e49beaL,0x684e54157c05da59L,0xa2e9cab9de7ac36cL,
+        0x4ca79b5f2e6f957bL,0xef7b024709b817b1L,0xeb3049907d89df0fL },
+      { 0x508f730746fe5096L,0x695810e82e04eaafL,0x88ef1bd93512f76cL,
+        0x776613513ebca06bL,0xf7d4863accf158b7L,0xb2a81e4494ee57daL } },
+    /* 174 */
+    { { 0xff288e5b6d53e6baL,0xa90de1a914484ea2L,0x2fadb60ced33c8ecL,
+        0x579d6ef328b66a40L,0x4f2dd6ddec24372dL,0xe9e33fc91d66ec7dL },
+      { 0x110899d2039eab6eL,0xa31a667a3e97bb5eL,0x6200166dcfdce68eL,
+        0xbe83ebae5137d54bL,0x085f7d874800acdfL,0xcf4ab1330c6f8c86L } },
+    /* 175 */
+    { { 0x03f65845931e08fbL,0x6438551e1506e2c0L,0x5791f0dc9c36961fL,
+        0x68107b29e3dcc916L,0x83242374f495d2caL,0xd8cfb6636ee5895bL },
+      { 0x525e0f16a0349b1bL,0x33cd2c6c4a0fab86L,0x46c12ee82af8dda9L,
+        0x7cc424ba71e97ad3L,0x69766ddf37621eb0L,0x95565f56a5f0d390L } },
+    /* 176 */
+    { { 0xe0e7bbf21a0f5e94L,0xf771e1151d82d327L,0x10033e3dceb111faL,
+        0xd269744dd3426638L,0xbdf2d9da00d01ef6L,0x1cb80c71a049ceafL },
+      { 0x17f183289e21c677L,0x6452af0519c8f98bL,0x35b9c5f780b67997L,
+        0x5c2e1cbe40f8f3d4L,0x43f9165666d667caL,0x9faaa059cf9d6e79L } },
+    /* 177 */
+    { { 0x8ad246180a078fe6L,0xf6cc73e6464fd1ddL,0x4d2ce34dc3e37448L,
+        0x624950c5e3271b5fL,0x62910f5eefc5af72L,0x8b585bf8aa132bc6L },
+      { 0x11723985a839327fL,0x34e2d27d4aac252fL,0x402f59ef6296cc4eL,
+        0x00ae055c47053de9L,0xfc22a97228b4f09bL,0xa9e86264fa0c180eL } },
+    /* 178 */
+    { { 0x0b7b6224bc310eccL,0x8a1a74f167fa14edL,0x87dd09607214395cL,
+        0xdf1b3d09f5c91128L,0x39ff23c686b264a8L,0xdc2d49d03e58d4c5L },
+      { 0x2152b7d3a9d6f501L,0xf4c32e24c04094f7L,0xc6366596d938990fL,
+        0x084d078f94fb207fL,0xfd99f1d7328594cbL,0x36defa64cb2d96b3L } },
+    /* 179 */
+    { { 0x4619b78113ed7cbeL,0x95e500159784bd0eL,0x2a32251c2c7705feL,
+        0xa376af995f0dd083L,0x55425c6c0361a45bL,0x812d2cef1f291e7bL },
+      { 0xccf581a05fd94972L,0x26e20e39e56dc383L,0x0093685d63dbfbf0L,
+        0x1fc164cc36b8c575L,0xb9c5ab81390ef5e7L,0x40086beb26908c66L } },
+    /* 180 */
+    { { 0xe5e54f7937e3c115L,0x69b8ee8cc1445a8aL,0x79aedff2b7659709L,
+        0xe288e1631b46fbe6L,0xdb4844f0d18d7bb7L,0xe0ea23d048aa6424L },
+      { 0x714c0e4ef3d80a73L,0x87a0aa9e3bd64f98L,0x8844b8a82ec63080L,
+        0xe0ac9c30255d81a3L,0x86151237455397fcL,0x0b9794642f820155L } },
+    /* 181 */
+    { { 0x127a255a4ae03080L,0x232306b4580a89fbL,0x04e8cd6a6416f539L,
+        0xaeb70dee13b02a0eL,0xa3038cf84c09684aL,0xa710ec3c28e433eeL },
+      { 0x77a72567681b1f7dL,0x86fbce952fc28170L,0xd3408683f5735ac8L,
+        0x3a324e2a6bd68e93L,0x7ec74353c027d155L,0xab60354cd4427177L } },
+    /* 182 */
+    { { 0x32a5342aef4c209dL,0x2ba7527408d62704L,0x4bb4af6fc825d5feL,
+        0x1c3919ced28e7ff1L,0x1dfc2fdcde0340f6L,0xc6580baf29f33ba9L },
+      { 0xae121e7541d442cbL,0x4c7727fd3a4724e4L,0xe556d6a4524f3474L,
+        0x87e13cc7785642a2L,0x182efbb1a17845fdL,0xdcec0cf14e144857L } },
+    /* 183 */
+    { { 0x1cb89541e9539819L,0xc8cb3b4f9d94dbf1L,0x1d353f63417da578L,
+        0xb7a697fb8053a09eL,0x8d841731c35d8b78L,0x85748d6fb656a7a9L },
+      { 0x1fd03947c1859c5dL,0x6ce965c1535d22a2L,0x1966a13e0ca3aadcL,
+        0x9802e41d4fb14effL,0xa9048cbb76dd3fcdL,0x89b182b5e9455bbaL } },
+    /* 184 */
+    { { 0xd777ad6a43360710L,0x841287ef55e9936bL,0xbaf5c67004a21b24L,
+        0xf2c0725f35ad86f1L,0x338fa650c707e72eL,0x2bf8ed2ed8883e52L },
+      { 0xb0212cf4b56e0d6aL,0x50537e126843290cL,0xd8b184a198b3dc6fL,
+        0xd2be9a350210b722L,0x407406db559781eeL,0x5a78d5910bc18534L } },
+    /* 185 */
+    { { 0x4d57aa2ad748b02cL,0xbe5b3451a12b3b95L,0xadca7a4564711258L,
+        0x597e091a322153dbL,0xf327100632eb1eabL,0xbd9adcba2873f301L },
+      { 0xd1dc79d138543f7fL,0x00022092921b1fefL,0x86db3ef51e5df8edL,
+        0x888cae049e6b944aL,0x71bd29ec791a32b4L,0xd3516206a6d1c13eL } },
+    /* 186 */
+    { { 0x2ef6b95255924f43L,0xd2f401ae4f9de8d5L,0xfc73e8d7adc68042L,
+        0x627ea70c0d9d1bb4L,0xc3bb3e3ebbf35679L,0x7e8a254ad882dee4L },
+      { 0x08906f50b5924407L,0xf14a0e61a1ad444aL,0xaa0efa2165f3738eL,
+        0xd60c7dd6ae71f161L,0x9e8390faf175894dL,0xd115cd20149f4c00L } },
+    /* 187 */
+    { { 0x2f2e2c1da52abf77L,0xc2a0dca554232568L,0xed423ea254966dccL,
+        0xe48c93c7cd0dd039L,0x1e54a225176405c7L,0x1efb5b1670d58f2eL },
+      { 0xa751f9d994fb1471L,0xfdb31e1f67d2941dL,0xa6c74eb253733698L,
+        0xd3155d1189a0f64aL,0x4414cfe4a4b8d2b6L,0x8d5a4be8f7a8e9e3L } },
+    /* 188 */
+    { { 0x5c96b4d452669e98L,0x4547f9228fd42a03L,0xcf5c1319d285174eL,
+        0x805cd1ae064bffa0L,0x50e8bc4f246d27e7L,0xf89ef98fd5781e11L },
+      { 0xb4ff95f6dee0b63fL,0xad850047222663a4L,0x026918604d23ce9cL,
+        0x3e5309ce50019f59L,0x27e6f72269a508aeL,0xe9376652267ba52cL } },
+    /* 189 */
+    { { 0xa04d289cc0368708L,0xc458872f5e306e1dL,0x76fa23de33112feaL,
+        0x718e39746efde42eL,0xf0c98cdc1d206091L,0x5fa3ca6214a71987L },
+      { 0xeee8188bdcaa9f2aL,0x312cc732589a860dL,0xf9808dd6c63aeb1fL,
+        0x70fd43db4ea62b53L,0x2c2bfe34890b6e97L,0x105f863cfa426aa6L } },
+    /* 190 */
+    { { 0x0b29795db38059adL,0x5686b77e90647ea0L,0xeff0470edb473a3eL,
+        0x278d2340f9b6d1e2L,0xebbff95bbd594ec7L,0xf4b72334d3a7f23dL },
+      { 0x2a285980a5a83f0bL,0x0786c41a9716a8b3L,0x138901bd22511812L,
+        0xd1b55221e2fede6eL,0x0806e264df4eb590L,0x6c4c897e762e462eL } },
+    /* 191 */
+    { { 0xd10b905fb4b41d9dL,0x826ca4664523a65bL,0x535bbd13b699fa37L,
+        0x5b9933d773bc8f90L,0x9332d61fcd2118adL,0x158c693ed4a65fd0L },
+      { 0x4ddfb2a8e6806e63L,0xe31ed3ecb5de651bL,0xf9460e51819bc69aL,
+        0x6229c0d62c76b1f8L,0xbb78f231901970a3L,0x31f3820f9cee72b8L } },
+    /* 192 */
+    { { 0xe931caf2c09e1c72L,0x0715f29812990cf4L,0x33aad81d943262d8L,
+        0x5d292b7a73048d3fL,0xb152aaa4dc7415f6L,0xc3d10fd90fd19587L },
+      { 0xf76b35c575ddadd0L,0x9f5f4a511e7b694cL,0x2f1ab7ebc0663025L,
+        0x01c9cc87920260b0L,0xc4b1f61a05d39da6L,0x6dcd76c4eb4a9c4eL } },
+    /* 193 */
+    { { 0x0ba0916ffdc83f01L,0x354c8b449553e4f9L,0xa6cc511affc5e622L,
+        0xb954726ae95be787L,0xcb04811575b41a62L,0xfa2ae6cdebfde989L },
+      { 0x6376bbc70f24659aL,0x13a999fd4c289c43L,0xc7134184ec9abd8bL,
+        0x28c02bf6a789ab04L,0xff841ebcd3e526ecL,0x442b191e640893a8L } },
+    /* 194 */
+    { { 0x4cac6c62fa2b6e20L,0x97f29e9bf6d69861L,0x228ab1dbbc96d12dL,
+        0x6eb913275e8e108dL,0xd4b3d4d140771245L,0x61b20623ca8a803aL },
+      { 0x2c2f3b41a6a560b1L,0x879e1d403859fcf4L,0x7cdb5145024dbfc3L,
+        0x55d08f153bfa5315L,0x2f57d773aa93823aL,0xa97f259cc6a2c9a2L } },
+    /* 195 */
+    { { 0xc306317be58edbbbL,0x25ade51c79dfdf13L,0x6b5beaf116d83dd6L,
+        0xe8038a441dd8f925L,0x7f00143cb2a87b6bL,0xa885d00df5b438deL },
+      { 0xe9f76790cf9e48bdL,0xf0bdf9f0a5162768L,0x0436709fad7b57cbL,
+        0x7e151c12f7c15db7L,0x3514f0225d90ee3bL,0x2e84e8032c361a8dL } },
+    /* 196 */
+    { { 0x2277607d563ec8d8L,0xa661811fe3934cb7L,0x3ca72e7af58fd5deL,
+        0x7989da0462294c6aL,0x88b3708bf6bbefe9L,0x0d524cf753ed7c82L },
+      { 0x69f699ca2f30c073L,0xf0fa264b9dc1dcf3L,0x44ca456805f0aaf6L,
+        0x0f5b23c7d19b9bafL,0x39193f41eabd1107L,0x9e3e10ad2a7c9b83L } },
+    /* 197 */
+    { { 0xa90824f0d4ae972fL,0x43eef02bc6e846e7L,0x7e46061229d2160aL,
+        0x29a178acfe604e91L,0x23056f044eb184b2L,0x4fcad55feb54cdf4L },
+      { 0xa0ff96f3ae728d15L,0x8a2680c6c6a00331L,0x5f84cae07ee52556L,
+        0x5e462c3ac5a65dadL,0x5d2b81dfe2d23f4fL,0x6e47301bc5b1eb07L } },
+    /* 198 */
+    { { 0x77411d68af8219b9L,0xcb883ce651b1907aL,0x25c87e57101383b5L,
+        0x9c7d9859982f970dL,0xaa6abca5118305d2L,0x725fed2f9013a5dbL },
+      { 0x487cdbafababd109L,0xc0f8cf5687586528L,0xa02591e68ad58254L,
+        0xc071b1d1debbd526L,0x927dfe8b961e7e31L,0x55f895f99263dfe1L } },
+    /* 199 */
+    { { 0xf899b00db175645bL,0x51f3a627b65b4b92L,0xa2f3ac8db67399efL,
+        0xe717867fe400bc20L,0x42cc90201967b952L,0x3d5967513ecd1de1L },
+      { 0xd41ebcdedb979775L,0x99ba61bc6a2e7e88L,0x039149a5321504f2L,
+        0xe7dc231427ba2fadL,0x9f556308b57d8368L,0x2b6d16c957da80a7L } },
+    /* 200 */
+    { { 0x84af5e76279ad982L,0x9bb4c92d9c8b81a6L,0xd79ad44e0e698e67L,
+        0xe8be9048265fc167L,0xf135f7e60c3a4cccL,0xa0a10d38b8863a33L },
+      { 0xe197247cd386efd9L,0x0eefd3f9b52346c2L,0xc22415f978607bc8L,
+        0xa2a8f862508674ceL,0xa72ad09ec8c9d607L,0xcd9f0ede50fa764fL } },
+    /* 201 */
+    { { 0x063391c7d1a46d4dL,0x2df51c119eb01693L,0xc5849800849e83deL,
+        0x48fd09aa8ad08382L,0xa405d873aa742736L,0xee49e61ee1f9600cL },
+      { 0xd76676be48c76f73L,0xd9c100f601274b2aL,0x110bb67c83f8718dL,
+        0xec85a42002fc0d73L,0xc0449e1e744656adL,0x28ce737637d9939bL } },
+    /* 202 */
+    { { 0x97e9af7244544ac7L,0xf2c658d5ba010426L,0x732dec39fb3adfbdL,
+        0xd12faf91a2df0b07L,0x8ac267252171e208L,0xf820cdc85b24fa54L },
+      { 0x307a6eea94f4cf77L,0x18c783d2944a33c6L,0x4b939d4c0b741ac5L,
+        0x1d7acd153ffbb6e4L,0x06a248587a255e44L,0x14fbc494ce336d50L } },
+    /* 203 */
+    { { 0x9b920c0c51584e3cL,0xc7733c59f7e54027L,0xe24ce13988422bbeL,
+        0x11ada812523bd6abL,0xde068800b88e6defL,0x7b872671fe8c582dL },
+      { 0x4e746f287de53510L,0x492f8b99f7971968L,0x1ec80bc77d928ac2L,
+        0xb3913e48432eb1b5L,0xad08486632028f6eL,0x122bb8358fc2f38bL } },
+    /* 204 */
+    { { 0x0a9f3b1e3b0b29c3L,0x837b64324fa44151L,0xb9905c9217b28ea7L,
+        0xf39bc93798451750L,0xcd383c24ce8b6da1L,0x299f57db010620b2L },
+      { 0x7b6ac39658afdce3L,0xa15206b33d05ef47L,0xa0ae37e2b9bb02ffL,
+        0x107760ab9db3964cL,0xe29de9a067954beaL,0x446a1ad8431c3f82L } },
+    /* 205 */
+    { { 0xc6fecea05c6b8195L,0xd744a7c5f49e71b9L,0xa8e96acc177a7ae7L,
+        0x1a05746c358773a7L,0xa416214637567369L,0xaa0217f787d1c971L },
+      { 0x61e9d15877fd3226L,0x0f6f2304e4f600beL,0xa9c4cebc7a6dff07L,
+        0xd15afa0109f12a24L,0x2bbadb228c863ee9L,0xa28290e4e5eb8c78L } },
+    /* 206 */
+    { { 0x55b87fa03e9de330L,0x12b26066195c145bL,0xe08536e0a920bef0L,
+        0x7bff6f2c4d195adcL,0x7f319e9d945f4187L,0xf9848863f892ce47L },
+      { 0xd0efc1d34fe37657L,0x3c58de825cf0e45aL,0x626ad21a8b0ccbbeL,
+        0xd2a31208af952fc5L,0x81791995eb437357L,0x5f19d30f98e95d4fL } },
+    /* 207 */
+    { { 0x72e83d9a0e6865bbL,0x22f5af3bf63456a6L,0x409e9c73463c8d9eL,
+        0x40e9e578dfe6970eL,0x876b6efa711b91caL,0x895512cf942625a3L },
+      { 0x84c8eda8cb4e462bL,0x84c0154a4412e7c8L,0x04325db1ceb7b71fL,
+        0x1537dde366f70877L,0xf3a093991992b9acL,0xa7316606d498ae77L } },
+    /* 208 */
+    { { 0x13990d2fcad260f5L,0x76c3be29eec0e8c0L,0x7dc5bee00f7bd7d5L,
+        0x9be167d2efebda4bL,0xcce3dde69122b87eL,0x75a28b0982b5415cL },
+      { 0xf6810bcde84607a6L,0xc6d581286f4dbf0dL,0xfead577d1b4dafebL,
+        0x9bc440b2066b28ebL,0x53f1da978b17e84bL,0x0459504bcda9a575L } },
+    /* 209 */
+    { { 0x13e39a02329e5836L,0x2c9e7d51f717269dL,0xc5ac58d6f26c963bL,
+        0x3b0c6c4379967bf5L,0x60bbea3f55908d9dL,0xd84811e7f07c9ad1L },
+      { 0xfe7609a75bd20e4aL,0xe4325dd20a70baa8L,0x3711f370b3600386L,
+        0x97f9562fd0924302L,0x040dc0c34acc4436L,0xfd6d725cde79cdd4L } },
+    /* 210 */
+    { { 0xb3efd0e3cf13eafbL,0x21009cbb5aa0ae5fL,0xe480c55379022279L,
+        0x755cf334b2fc9a6dL,0x8564a5bf07096ae7L,0xddd649d0bd238139L },
+      { 0xd0de10b18a045041L,0x6e05b413c957d572L,0x5c5ff8064e0fb25cL,
+        0xd933179b641162fbL,0x42d48485e57439f9L,0x70c5bd0a8a8d72aaL } },
+    /* 211 */
+    { { 0xa767173897bdf646L,0xaa1485b4ab329f7cL,0xce3e11d6f8f25fdfL,
+        0x76a3fc7ec6221824L,0x045f281ff3924740L,0x24557d4e96d13a9aL },
+      { 0x875c804bdd4c27cdL,0x11c5f0f40f5c7feaL,0xac8c880bdc55ff7eL,
+        0x2acddec51103f101L,0x38341a21f99faa89L,0xc7b67a2cce9d6b57L } },
+    /* 212 */
+    { { 0x9a0d724f8e357586L,0x1d7f4ff5df648da0L,0x9c3e6c9bfdee62a5L,
+        0x0499cef00389b372L,0xe904050d98eab879L,0xe8eef1b66c051617L },
+      { 0xebf5bfebc37e3ca9L,0x7c5e946da4e0b91dL,0x790973142c4bea28L,
+        0x81f6c109ee67b2b7L,0xaf237d9bdafc5edeL,0xd2e602012abb04c7L } },
+    /* 213 */
+    { { 0x6156060c8a4f57bfL,0xf9758696ff11182aL,0x8336773c6296ef00L,
+        0x9c054bceff666899L,0xd6a11611719cd11cL,0x9824a641dbe1acfaL },
+      { 0x0b7b7a5fba89fd01L,0xf8d3b809889f79d8L,0xc5e1ea08f578285cL,
+        0x7ac74536ae6d8288L,0x5d37a2007521ef5fL,0x5ecc4184b260a25dL } },
+    /* 214 */
+    { { 0xddcebb19a708c8d3L,0xe63ed04fc63f81ecL,0xd045f5a011873f95L,
+        0x3b5ad54479f276d5L,0x81272a3d425ae5b3L,0x8bfeb50110ce1605L },
+      { 0x4233809c888228bfL,0x4bd82acfb2aff7dfL,0x9c68f1800cbd4a7fL,
+        0xfcd771246b44323dL,0x60c0fcf6891db957L,0xcfbb4d8904da8f7fL } },
+    /* 215 */
+    { { 0x9a6a5df93b26139aL,0x3e076a83b2cc7eb8L,0x47a8e82d5a964bcdL,
+        0x8a4e2a39b9278d6bL,0x93506c98e4443549L,0x06497a8ff1e0d566L },
+      { 0x3dee8d992b1efa05L,0x2da63ca845393e33L,0xa4af7277cf0579adL,
+        0xaf4b46393236d8eaL,0x6ccad95b32b617f5L,0xce76d8b8b88bb124L } },
+    /* 216 */
+    { { 0x63d2537a083843dcL,0x89eb35141e4153b4L,0x5175ebc4ea9afc94L,
+        0x7a6525808ed1aed7L,0x67295611d85e8297L,0x8dd2d68bb584b73dL },
+      { 0x237139e60133c3a4L,0x9de838ab4bd278eaL,0xe829b072c062fcd9L,
+        0x70730d4f63ba8706L,0x6080483fd3cd05ecL,0x872ab5b80c85f84dL } },
+    /* 217 */
+    { { 0xfc0776d3999d4d49L,0xa3eb59deec3f45e7L,0xbc990e440dae1fc1L,
+        0x33596b1ea15371ffL,0xd447dcb29bc7ab25L,0xcd5b63e935979582L },
+      { 0xae3366fa77d1ff11L,0x59f28f05edee6903L,0x6f43fed1a4433bf2L,
+        0x15409c9bdf9ce00eL,0x21b5cdedaca9c5dcL,0xf9f3359582d7bdb4L } },
+    /* 218 */
+    { { 0x959443789422c792L,0x239ea923c958b8bfL,0x4b61a247df076541L,
+        0x4d29ce85bb9fc544L,0x9a692a670b424559L,0x6e0ca5a00e486900L },
+      { 0x6b79a78285b3beceL,0x41f35e39c61f9892L,0xff82099aae747f82L,
+        0x58c8ae3fd0ca59d6L,0x4ac930e299406b5fL,0x2ce04eb99df24243L } },
+    /* 219 */
+    { { 0x4366b9941ac37b82L,0xff0c728d25b04d83L,0x1f55136119c47b7cL,
+        0xdbf2d5edbeff13e7L,0xf78efd51e12a683dL,0x82cd85b9989cf9c4L },
+      { 0xe23c6db6e0cb5d37L,0x818aeebd72ee1a15L,0x8212aafd28771b14L,
+        0x7bc221d91def817dL,0xdac403a29445c51fL,0x711b051712c3746bL } },
+    /* 220 */
+    { { 0x0ed9ed485ea99eccL,0xf799500db8cab5e1L,0xa8ec87dcb570cbdcL,
+        0x52cfb2c2d35dfaecL,0x8d31fae26e4d80a4L,0xe6a37dc9dcdeabe5L },
+      { 0x5d365a341deca452L,0x09a5f8a50d68b44eL,0x59238ea5a60744b1L,
+        0xf2fedc0dbb4249e9L,0xe395c74ea909b2e3L,0xe156d1a539388250L } },
+    /* 221 */
+    { { 0xd796b3d047181ae9L,0xbaf44ba844197808L,0xe693309434cf3facL,
+        0x41aa6adec3bd5c46L,0x4fda75d8eed947c6L,0xacd9d4129ea5a525L },
+      { 0x65cc55a3d430301bL,0x3c9a5bcf7b52ea49L,0x22d319cf159507f0L,
+        0x2ee0b9b5de74a8ddL,0x20c26a1e877ac2b6L,0x387d73da92e7c314L } },
+    /* 222 */
+    { { 0x13c4833e8cd3fdacL,0x76fcd473332e5b8eL,0xff671b4be2fe1fd3L,
+        0x4d734e8b5d98d8ecL,0xb1ead3c6514bbc11L,0xd14ca8587b390494L },
+      { 0x95a443af5d2d37e9L,0x73c6ea7300464622L,0xa44aeb4b15755044L,
+        0xba3f8575fab58feeL,0x9779dbc9dc680a6fL,0xe1ee5f5a7b37ddfcL } },
+    /* 223 */
+    { { 0xcd0b464812d29f46L,0x93295b0b0ed53137L,0xbfe2609480bef6c9L,
+        0xa656578854248b00L,0x69c43fca80e7f9c4L,0x2190837bbe141ea1L },
+      { 0x875e159aa1b26cfbL,0x90ca9f877affe852L,0x15e6550d92ca598eL,
+        0xe3e0945d1938ad11L,0xef7636bb366ef937L,0xb6034d0bb39869e5L } },
+    /* 224 */
+    { { 0x4d255e3026d8356eL,0xf83666edd314626fL,0x421ddf61d0c8ed64L,
+        0x96e473c526677b61L,0xdad4af7e9e9b18b3L,0xfceffd4aa9393f75L },
+      { 0x843138a111c731d5L,0x05bcb3a1b2f141d9L,0x20e1fa95617b7671L,
+        0xbefce81288ccec7bL,0x582073dc90f1b568L,0xf572261a1f055cb7L } },
+    /* 225 */
+    { { 0xf314827736973088L,0xc008e70886a9f980L,0x1b795947e046c261L,
+        0xdf1e6a7dca76bca0L,0xabafd88671acddf0L,0xff7054d91364d8f4L },
+      { 0x2cf63547e2260594L,0x468a5372d73b277eL,0xc7419e24ef9bd35eL,
+        0x2b4a1c2024043cc3L,0xa28f047a890b39cdL,0xdca2cea146f9a2e3L } },
+    /* 226 */
+    { { 0xab78873653277538L,0xa734e225cf697738L,0x66ee1d1e6b22e2c1L,
+        0x2c615389ebe1d212L,0xf36cad4002bb0766L,0x120885c33e64f207L },
+      { 0x59e77d5690fbfec2L,0xf9e781aad7a574aeL,0x801410b05d045e53L,
+        0xd3b5f0aaa91b5f0eL,0xb3d1df007fbb3521L,0x11c4b33ec72bee9aL } },
+    /* 227 */
+    { { 0xd32b983283c3a7f3L,0x8083abcf88d8a354L,0xdeb1640450f4ec5aL,
+        0x18d747f0641e2907L,0x4e8978aef1bbf03eL,0x932447dc88a0cd89L },
+      { 0x561e0febcf3d5897L,0xfc3a682f13600e6dL,0xc78b9d73d16a6b73L,
+        0xe713feded29bf580L,0x0a22522308d69e5cL,0x3a924a571ff7fda4L } },
+    /* 228 */
+    { { 0xfb64554cb4093beeL,0xa6d65a25a58c6ec0L,0x4126994d43d0ed37L,
+        0xa5689a5155152d44L,0xb8e5ea8c284caa8dL,0x33f05d4fd1f25538L },
+      { 0xe0fdfe091b615d6eL,0x2ded7e8f705507daL,0xdd5631e517bbcc80L,
+        0x4f87453e267fd11fL,0xc6da723fff89d62dL,0x55cbcae2e3cda21dL } },
+    /* 229 */
+    { { 0x336bc94e6b4e84f3L,0x728630314ef72c35L,0x6d85fdeeeeb57f99L,
+        0x7f4e3272a42ece1bL,0x7f86cbb536f0320aL,0xf09b6a2b923331e6L },
+      { 0x21d3ecf156778435L,0x2977ba998323b2d2L,0x6a1b57fb1704bc0fL,
+        0xd777cf8b389f048aL,0x9ce2174fac6b42cdL,0x404e2bff09e6c55aL } },
+    /* 230 */
+    { { 0x9b9b135e204c5ddbL,0x9dbfe0443eff550eL,0x35eab4bfec3be0f6L,
+        0x8b4c3f0d0a43e56fL,0x4c1c66730e73f9b3L,0x92ed38bd2c78c905L },
+      { 0xc7003f6aa386e27cL,0xb9c4f46faced8507L,0xea024ec859df5464L,
+        0x4af96152429572eaL,0x279cd5e2e1fc1194L,0xaa376a03281e358cL } },
+    /* 231 */
+    { { 0x078592233cdbc95cL,0xaae1aa6aef2e337aL,0xc040108d472a8544L,
+        0x80c853e68d037b7dL,0xd221315c8c7eee24L,0x195d38568ee47752L },
+      { 0xd4b1ba03dacd7fbeL,0x4b5ac61ed3e0c52bL,0x68d3c0526aab7b52L,
+        0xf0d7248c660e3feaL,0xafdb3f893145efb4L,0xa73fd9a38f40936dL } },
+    /* 232 */
+    { { 0x891b9ef3bb1b17ceL,0x14023667c6127f31L,0x12b2e58d305521fdL,
+        0x3a47e449e3508088L,0xe49fc84bff751507L,0x4023f7225310d16eL },
+      { 0xa608e5edb73399faL,0xf12632d8d532aa3eL,0x13a2758e845e8415L,
+        0xae4b6f851fc2d861L,0x3879f5b1339d02f2L,0x446d22a680d99ebdL } },
+    /* 233 */
+    { { 0x0f5023024be164f1L,0x8d09d2d688b81920L,0x514056f1984aceffL,
+        0xa5c4ddf075e9e80dL,0x38cb47e6df496a93L,0x899e1d6b38df6bf7L },
+      { 0x69e87e88b59eb2a6L,0x280d9d639b47f38bL,0x599411ea3654e955L,
+        0xcf8dd4fd969aa581L,0xff5c2baf530742a7L,0xa43915361a373085L } },
+    /* 234 */
+    { { 0x6ace72a3a8a4bdd2L,0xc656cdd1b68ef702L,0xd4a33e7e90c4dad8L,
+        0x4aece08a9d951c50L,0xea8005ae085d68e6L,0xfdd7a7d76f7502b8L },
+      { 0xce6fb0a698d6fa45L,0x228f86721104eb8cL,0xd23d8787da09d7dcL,
+        0x5521428b2ae93065L,0x95faba3dea56c366L,0xedbe50390a88aca5L } },
+    /* 235 */
+    { { 0xd64da0adbfb26c82L,0xe5d70b3c952c2f9cL,0xf5e8f365f7e77f68L,
+        0x7234e00208f2d695L,0xfaf900eed12e7be6L,0x27dc69344acf734eL },
+      { 0x80e4ff5ec260a46aL,0x7da5ebce2dc31c28L,0x485c5d73ca69f552L,
+        0xcdfb6b2969cc84c2L,0x031c5afeed6d4ecaL,0xc7bbf4c822247637L } },
+    /* 236 */
+    { { 0x9d5b72c749fe01b2L,0x34785186793a91b8L,0xa3ba3c54cf460438L,
+        0x73e8e43d3ab21b6fL,0x50cde8e0be57b8abL,0x6488b3a7dd204264L },
+      { 0xa9e398b3dddc4582L,0x1698c1a95bec46feL,0x7f1446ef156d3843L,
+        0x3fd25dd8770329a2L,0x05b1221a2c710668L,0x65b2dc2aa72ee6cfL } },
+    /* 237 */
+    { { 0x21a885f7cd021d63L,0x3f344b15fea61f08L,0xad5ba6ddc5cf73e6L,
+        0x154d0d8f227a8b23L,0x9b74373cdc559311L,0x4feab71598620fa1L },
+      { 0x5098938e7d9ec924L,0x84d54a5e6d47e550L,0x1a2d1bdc1b617506L,
+        0x99fe1782615868a4L,0x171da7803005a924L,0xa70bf5ed7d8f79b6L } },
+    /* 238 */
+    { { 0x0bc1250dfe2216c5L,0x2c37e2507601b351L,0xb6300175d6f06b7eL,
+        0x4dde8ca18bfeb9b7L,0x4f210432b82f843dL,0x8d70e2f9b1ac0afdL },
+      { 0x25c73b78aae91abbL,0x0230dca3863028f2L,0x8b923ecfe5cf30b7L,
+        0xed754ec25506f265L,0x8e41b88c729a5e39L,0xee67cec2babf889bL } },
+    /* 239 */
+    { { 0xe183acf51be46c65L,0x9789538fe7565d7aL,0x87873391d9627b4eL,
+        0xbf4ac4c19f1d9187L,0x5db99f634691f5c8L,0xa68df80374a1fb98L },
+      { 0x3c448ed1bf92b5faL,0xa098c8413e0bdc32L,0x8e74cd5579bf016cL,
+        0x5df0d09c115e244dL,0x9418ad013410b66eL,0x8b6124cb17a02130L } },
+    /* 240 */
+    { { 0x425ec3afc26e3392L,0xc07f8470a1722e00L,0xdcc28190e2356b43L,
+        0x4ed97dffb1ef59a6L,0xc22b3ad1c63028c1L,0x070723c268c18988L },
+      { 0x70da302f4cf49e7dL,0xc5e87c933f12a522L,0x74acdd1d18594148L,
+        0xad5f73abca74124cL,0xe72e4a3ed69fd478L,0x615938687b117cc3L } },
+    /* 241 */
+    { { 0x7b7b9577a9aa0486L,0x6e41fb35a063d557L,0xb017d5c7da9047d7L,
+        0x8c74828068a87ba9L,0xab45fa5cdf08ad93L,0xcd9fb2174c288a28L },
+      { 0x595446425747843dL,0x34d64c6ca56111e3L,0x12e47ea14bfce8d5L,
+        0x17740e056169267fL,0x5c49438eeed03fb5L,0x9da30add4fc3f513L } },
+    /* 242 */
+    { { 0xc4e85282ccfa5200L,0x2707608f6a19b13dL,0xdcb9a53df5726e2fL,
+        0x612407c9e9427de5L,0x3e5a17e1d54d582aL,0xb99877de655ae118L },
+      { 0x6f0e972b015254deL,0x92a56db1f0a6f7c5L,0xd297e4e1a656f8b2L,
+        0x99fe0052ad981983L,0xd3652d2f07cfed84L,0xc784352e843c1738L } },
+    /* 243 */
+    { { 0x6ee90af07e9b2d8aL,0xac8d701857cf1964L,0xf6ed903171f28efcL,
+        0x7f70d5a96812b20eL,0x27b557f4f1c61eeeL,0xf1c9bd57c6263758L },
+      { 0x5cf7d0142a1a6194L,0xdd614e0b1890ab84L,0x3ef9de100e93c2a6L,
+        0xf98cf575e0cd91c5L,0x504ec0c614befc32L,0xd0513a666279d68cL } },
+    /* 244 */
+    { { 0xa8eadbada859fb6aL,0xcf8346e7db283666L,0x7b35e61a3e22e355L,
+        0x293ece2c99639c6bL,0xfa0162e256f241c8L,0xd2e6c7b9bf7a1ddaL },
+      { 0xd0de625340075e63L,0x2405aa61f9ec8286L,0x2237830a8fe45494L,
+        0x4fd01ac7364e9c8cL,0x4d9c3d21904ba750L,0xd589be14af1b520bL } },
+    /* 245 */
+    { { 0x13576a4f4662e53bL,0x35ec2f51f9077676L,0x66297d1397c0af97L,
+        0xed3201fe9e598b58L,0x49bc752a5e70f604L,0xb54af535bb12d951L },
+      { 0x36ea4c2b212c1c76L,0x18f5bbc7eb250dfdL,0xa0d466cc9a0a1a46L,
+        0x52564da4dac2d917L,0x206559f48e95fab5L,0x7487c1909ca67a33L } },
+    /* 246 */
+    { { 0x75abfe37dde98e9cL,0x99b90b262a411199L,0x1b410996dcdb1f7cL,
+        0xab346f118b3b5675L,0x04852193f1f8ae1eL,0x1ec4d2276b8b98c1L },
+      { 0xba3bc92645452baaL,0x387d1858acc4a572L,0x9478eff6e51f171eL,
+        0xf357077d931e1c00L,0xffee77cde54c8ca8L,0xfb4892ff551dc9a4L } },
+    /* 247 */
+    { { 0x5b1bdad02db8dff8L,0xd462f4fd5a2285a2L,0x1d6aad8eda00b461L,
+        0x43fbefcf41306d1bL,0x428e86f36a13fe19L,0xc8b2f11817f89404L },
+      { 0x762528aaf0d51afbL,0xa3e2fea4549b1d06L,0x86fad8f2ea3ddf66L,
+        0x0d9ccc4b4fbdd206L,0xcde97d4cc189ff5aL,0xc36793d6199f19a6L } },
+    /* 248 */
+    { { 0xea38909b51b85197L,0xffb17dd0b4c92895L,0x0eb0878b1ddb3f3fL,
+        0xb05d28ffc57cf0f2L,0xd8bde2e71abd57e2L,0x7f2be28dc40c1b20L },
+      { 0x6554dca2299a2d48L,0x5130ba2e8377982dL,0x8863205f1071971aL,
+        0x15ee62827cf2825dL,0xd4b6c57f03748f2bL,0xa9e3f4da430385a0L } },
+    /* 249 */
+    { { 0x33eb7cec83fbc9c6L,0x24a311c74541777eL,0xc81377f74f0767fcL,
+        0x12adae364ab702daL,0xb7fcb6db2a779696L,0x4a6fb28401cea6adL },
+      { 0x5e8b1d2acdfc73deL,0xd0efae8d1b02fd32L,0x3f99c190d81d8519L,
+        0x3c18f7fafc808971L,0x41f713e751b7ae7bL,0x0a4b3435f07fc3f8L } },
+    /* 250 */
+    { { 0x7dda3c4c019b7d2eL,0x631c8d1ad4dc4b89L,0x5489cd6e1cdb313cL,
+        0xd44aed104c07bb06L,0x8f97e13a75f000d1L,0x0e9ee64fdda5df4dL },
+      { 0xeaa99f3b3e346910L,0x622f6921fa294ad7L,0x22aaa20d0d0b2fe9L,
+        0x4fed2f991e5881baL,0x9af3b2d6c1571802L,0x919e67a8dc7ee17cL } },
+    /* 251 */
+    { { 0xc724fe4c76250533L,0x8a2080e57d817ef8L,0xa2afb0f4172c9751L,
+        0x9b10cdeb17c0702eL,0xbf3975e3c9b7e3e9L,0x206117df1cd0cdc5L },
+      { 0xfb049e61be05ebd5L,0xeb0bb55c16c782c0L,0x13a331b8ab7fed09L,
+        0xf6c58b1d632863f0L,0x6264ef6e4d3b6195L,0x92c51b639a53f116L } },
+    /* 252 */
+    { { 0xa57c7bc8288b364dL,0x4a562e087b41e5c4L,0x699d21c6698a9a11L,
+        0xa4ed9581f3f849b9L,0xa223eef39eb726baL,0x13159c23cc2884f9L },
+      { 0x73931e583a3f4963L,0x965003890ada6a81L,0x3ee8a1c65ab2950bL,
+        0xeedf4949775fab52L,0x63d652e14f2671b6L,0xfed4491c3c4e2f55L } },
+    /* 253 */
+    { { 0x335eadc3f4eb453eL,0x5ff74b63cadd1a5bL,0x6933d0d75d84a91aL,
+        0x9ca3eeb9b49ba337L,0x1f6faccec04c15b8L,0x4ef19326dc09a7e4L },
+      { 0x53d2d3243dca3233L,0x0ee40590a2259d4bL,0x18c22edb5546f002L,
+        0x9242980109ea6b71L,0xaada0addb0e91e61L,0x5fe53ef499963c50L } },
+    /* 254 */
+    { { 0x372dd06b90c28c65L,0x1765242c119ce47dL,0xc041fb806b22fc82L,
+        0x667edf07b0a7ccc1L,0xc79599e71261beceL,0xbc69d9ba19cff22aL },
+      { 0x009d77cd13c06819L,0x635a66aee282b79dL,0x4edac4a6225b1be8L,
+        0x57d4f4e4524008f9L,0xee299ac5b056af84L,0xcc38444c3a0bc386L } },
+    /* 255 */
+    { { 0x490643b1cd4c2356L,0x740a4851750547beL,0x643eaf29d4944c04L,
+        0xba572479299a98a0L,0x48b29f16ee05fdf9L,0x33fb4f61089b2d7bL },
+      { 0x86704902a950f955L,0x97e1034dfedc3ddfL,0x211320b605fbb6a2L,
+        0x23d7b93f432299bbL,0x1fe1a0578590e4a3L,0x8e1d0586f58c0ce6L } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_6(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_6(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[6];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 6, km);
+
+            err = sp_384_ecc_mulmod_base_6(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_6(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_6(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a  A single precision integer.
+ */
+static void sp_384_add_one_6(sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldp	x1, x2, [%[a], 0]\n\t"
+        "adds	x1, x1, #1\n\t"
+        "ldr	x3, [%[a], 16]\n\t"
+        "adcs	x2, x2, xzr\n\t"
+        "ldr	x4, [%[a], 24]\n\t"
+        "adcs	x3, x3, xzr\n\t"
+        "stp	x1, x2, [%[a], 0]\n\t"
+        "adcs	x4, x4, xzr\n\t"
+        "stp	x3, x4, [%[a], 16]\n\t"
+        "ldp	x1, x2, [%[a], 32]\n\t"
+        "adcs	x1, x1, xzr\n\t"
+        "adcs	x2, x2, xzr\n\t"
+        "stp	x1, x2, [%[a], 32]\n\t"
+        :
+        : [a] "r" (a)
+        : "memory", "x1", "x2", "x3", "x4"
+    );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j;
+    byte* d;
+
+    for (i = n - 1,j = 0; i >= 7; i -= 8) {
+        r[j]  = ((sp_digit)a[i - 0] <<  0) |
+                ((sp_digit)a[i - 1] <<  8) |
+                ((sp_digit)a[i - 2] << 16) |
+                ((sp_digit)a[i - 3] << 24) |
+                ((sp_digit)a[i - 4] << 32) |
+                ((sp_digit)a[i - 5] << 40) |
+                ((sp_digit)a[i - 6] << 48) |
+                ((sp_digit)a[i - 7] << 56);
+        j++;
+    }
+
+    if (i >= 0) {
+        r[j] = 0;
+
+        d = (byte*)r;
+        switch (i) {
+            case 6: d[n - 1 - 6] = a[6]; //fallthrough
+            case 5: d[n - 1 - 5] = a[5]; //fallthrough
+            case 4: d[n - 1 - 4] = a[4]; //fallthrough
+            case 3: d[n - 1 - 3] = a[3]; //fallthrough
+            case 2: d[n - 1 - 2] = a[2]; //fallthrough
+            case 1: d[n - 1 - 1] = a[1]; //fallthrough
+            case 0: d[n - 1 - 0] = a[0]; //fallthrough
+        }
+        j++;
+    }
+
+    for (; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[48];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_384_from_bin(k, 6, buf, (int)sizeof(buf));
+            if (sp_384_cmp_6(k, p384_order2) < 0) {
+                sp_384_add_one_6(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[6];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384 inf;
+#endif
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_6(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_ecc_gen_k_6(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_6(infinity, point, p384_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_384_iszero_6(point->x) == 0) || (sp_384_iszero_6(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_6(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_384_point_free_6(infinity, 1, heap);
+#endif
+    sp_384_point_free_6(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+    int i, j;
+
+    for (i = 5, j = 0; i >= 0; i--) {
+        a[j++] = r[i] >> 56;
+        a[j++] = r[i] >> 48;
+        a[j++] = r[i] >> 40;
+        a[j++] = r[i] >> 32;
+        a[j++] = r[i] >> 24;
+        a[j++] = r[i] >> 16;
+        a[j++] = r[i] >> 8;
+        a[j++] = r[i] >> 0;
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[6];
+#endif
+    sp_point_384* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 48U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 6, priv);
+        sp_384_point_from_ecc_point_6(point, pub);
+            err = sp_384_ecc_mulmod_6(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_384_to_bin(point->x, out);
+        *outLen = 48;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer and result.
+ * b  A single precision integer.
+ */
+static sp_digit sp_384_sub_in_place_6(sp_digit* a, const sp_digit* b)
+{
+    __asm__ __volatile__ (
+        "ldp	x2, x3, [%[a], 0]\n\t"
+        "ldp	x6, x7, [%[b], 0]\n\t"
+        "subs	x2, x2, x6\n\t"
+        "ldp	x4, x5, [%[a], 16]\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "ldp	x8, x9, [%[b], 16]\n\t"
+        "sbcs	x4, x4, x8\n\t"
+        "stp	x2, x3, [%[a], 0]\n\t"
+        "sbcs	x5, x5, x9\n\t"
+        "stp	x4, x5, [%[a], 16]\n\t"
+        "ldr		x2, [%[a], 32]\n\t"
+        "ldr		x3, [%[a], 40]\n\t"
+        "ldr		x6, [%[b], 32]\n\t"
+        "ldr		x7, [%[b], 40]\n\t"
+        "sbcs	x2, x2, x6\n\t"
+        "sbcs	x3, x3, x7\n\t"
+        "str		x2, [%[a], 32]\n\t"
+        "str		x3, [%[a], 40]\n\t"
+        "csetm	%[a], cc\n\t"
+        : [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+
+    return (sp_digit)a;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+static void sp_384_mul_d_6(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldr	x8, [%[a]]\n\t"
+        "mul	x5, %[b], x8\n\t"
+        "umulh	x3, %[b], x8\n\t"
+        "mov	x4, 0\n\t"
+        "str	x5, [%[r]]\n\t"
+        "mov	x5, 0\n\t"
+        "mov	x9, #8\n\t"
+        "1:\n\t"
+        "ldr	x8, [%[a], x9]\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adds	x3, x3, x6\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "str	x3, [%[r], x9]\n\t"
+        "mov	x3, x4\n\t"
+        "mov	x4, x5\n\t"
+        "mov	x5, #0\n\t"
+        "add	x9, x9, #8\n\t"
+        "cmp	x9, 48\n\t"
+        "b.lt	1b\n\t"
+        "str	x3, [%[r], 48]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#else
+    __asm__ __volatile__ (
+        "# A[0] * B\n\t"
+        "ldp	x8, x9, [%[a]]\n\t"
+        "mul	x3, %[b], x8\n\t"
+        "umulh	x4, %[b], x8\n\t"
+        "mov	x5, 0\n\t"
+        "# A[1] * B\n\t"
+        "str	x3, [%[r]]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[2] * B\n\t"
+        "ldp	x8, x9, [%[a], 16]\n\t"
+        "str	x4, [%[r], 8]\n\t"
+        "mov	x4, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "# A[3] * B\n\t"
+        "str	x5, [%[r], 16]\n\t"
+        "mov	x5, 0\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x3, x3, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x4, xzr, xzr\n\t"
+        "adds	x3, x3, x6\n\t"
+        "# A[4] * B\n\t"
+        "ldp	x8, x9, [%[a], 32]\n\t"
+        "str	x3, [%[r], 24]\n\t"
+        "mov	x3, 0\n\t"
+        "mul	x6, %[b], x8\n\t"
+        "adcs	x4, x4, x7\n\t"
+        "umulh	x7, %[b], x8\n\t"
+        "adc	x5, xzr, xzr\n\t"
+        "adds	x4, x4, x6\n\t"
+        "# A[5] * B\n\t"
+        "str	x4, [%[r], 32]\n\t"
+        "mul	x6, %[b], x9\n\t"
+        "adcs	x5, x5, x7\n\t"
+        "umulh	x7, %[b], x9\n\t"
+        "adc	x3, xzr, xzr\n\t"
+        "adds	x5, x5, x6\n\t"
+        "adc	x3, x3, x7\n\t"
+        "stp	x5, x3, [%[r], 40]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+    );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div)
+{
+    sp_digit r;
+
+    __asm__ __volatile__ (
+        "lsr	x5, %[div], 32\n\t"
+        "add	x5, x5, 1\n\t"
+
+        "udiv	x3, %[d1], x5\n\t"
+        "lsl	x6, x3, 32\n\t"
+        "mul	x4, %[div], x6\n\t"
+        "umulh	x3, %[div], x6\n\t"
+        "subs	%[d0], %[d0], x4\n\t"
+        "sbc	%[d1], %[d1], x3\n\t"
+
+        "udiv	x3, %[d1], x5\n\t"
+        "lsl	x3, x3, 32\n\t"
+        "add	x6, x6, x3\n\t"
+        "mul	x4, %[div], x3\n\t"
+        "umulh	x3, %[div], x3\n\t"
+        "subs	%[d0], %[d0], x4\n\t"
+        "sbc	%[d1], %[d1], x3\n\t"
+
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
+
+        "udiv	x3, x3, x5\n\t"
+        "add	x6, x6, x3\n\t"
+        "mul	x4, %[div], x3\n\t"
+        "umulh	x3, %[div], x3\n\t"
+        "subs	%[d0], %[d0], x4\n\t"
+        "sbc	%[d1], %[d1], x3\n\t"
+
+        "lsr	x3, %[d0], 32\n\t"
+        "orr	x3, x3, %[d1], lsl 32\n\t"
+
+        "udiv	x3, x3, x5\n\t"
+        "add	x6, x6, x3\n\t"
+        "mul	x4, %[div], x3\n\t"
+        "sub	%[d0], %[d0], x4\n\t"
+
+        "udiv	x3, %[d0], %[div]\n\t"
+        "add	%[r], x6, x3\n\t"
+
+        : [r] "=r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "x3", "x4", "x5", "x6"
+    );
+
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<6; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[12], t2[7];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[5];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 6);
+    for (i=5; i>=0; i--) {
+        r1 = div_384_word_6(t1[6 + i], t1[6 + i - 1], div);
+
+        sp_384_mul_d_6(t2, d, r1);
+        t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2);
+        t1[6 + i] -= t2[6];
+        sp_384_mask_6(t2, d, t1[6 + i]);
+        t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2);
+        sp_384_mask_6(t2, d, t1[6 + i]);
+        t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_384_cmp_6(t1, d) >= 0;
+    sp_384_cond_sub_6(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_384_div_6(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint64_t p384_order_minus_2[6] = {
+    0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU,
+    0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint64_t p384_order_low[3] = {
+    0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU
+    
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_384_mul_6(r, a, b);
+    sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_order_6(sp_digit* r, const sp_digit* a)
+{
+    sp_384_sqr_6(r, a);
+    sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_n_order_6(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_384_mont_sqr_order_6(r, a);
+    for (i=1; i<n; i++) {
+        sp_384_mont_sqr_order_6(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_order_6(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 6);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_order_6(t, t);
+        if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+            sp_384_mont_mul_order_6(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 6U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 6;
+    sp_digit* t3 = td + 4 * 6;
+    int i;
+
+    /* t = a^2 */
+    sp_384_mont_sqr_order_6(t, a);
+    /* t = a^3 = t * a */
+    sp_384_mont_mul_order_6(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_384_mont_sqr_n_order_6(t2, t, 2);
+    /* t = a^f = t2 * t */
+    sp_384_mont_mul_order_6(t, t2, t);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_384_mont_sqr_n_order_6(t2, t, 4);
+    /* t = a^ff = t2 * t */
+    sp_384_mont_mul_order_6(t, t2, t);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_384_mont_sqr_n_order_6(t2, t, 8);
+    /* t3= a^ffff = t2 * t */
+    sp_384_mont_mul_order_6(t3, t2, t);
+    /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+    sp_384_mont_sqr_n_order_6(t2, t3, 16);
+    /* t = a^ffffffff = t2 * t3 */
+    sp_384_mont_mul_order_6(t, t2, t3);
+    /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
+    sp_384_mont_sqr_n_order_6(t2, t, 16);
+    /* t = a^ffffffffffff = t2 * t3 */
+    sp_384_mont_mul_order_6(t, t2, t3);
+    /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
+    sp_384_mont_sqr_n_order_6(t2, t, 48);
+    /* t= a^fffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_6(t, t2, t);
+    /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_order_6(t2, t, 96);
+    /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_6(t2, t2, t);
+    for (i=191; i>=1; i--) {
+        sp_384_mont_sqr_order_6(t2, t2);
+        if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+            sp_384_mont_mul_order_6(t2, t2, a);
+        }
+    }
+    sp_384_mont_sqr_order_6(t2, t2);
+    sp_384_mont_mul_order_6(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 384 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*6];
+    sp_digit xd[2*6];
+    sp_digit kd[2*6];
+    sp_digit rd[2*6];
+    sp_digit td[3 * 2*6];
+    sp_point_384 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int64_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 6;
+        x = d + 2 * 6;
+        k = d + 4 * 6;
+        r = d + 6 * 6;
+        tmp = d + 8 * 6;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(e, 6, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_384_from_mp(x, 6, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_384_ecc_gen_k_6(rng, k);
+        }
+        else {
+            sp_384_from_mp(k, 6, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 6U);
+            sp_384_norm_6(r);
+            c = sp_384_cmp_6(r, p384_order);
+            sp_384_cond_sub_6(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_6(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_384_mul_6(k, k, p384_norm_order);
+            err = sp_384_mod_6(k, k, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_6(k);
+            /* kInv = 1/k mod order */
+                sp_384_mont_inv_order_6(kInv, k, tmp);
+            sp_384_norm_6(kInv);
+
+            /* s = r * x + e */
+                sp_384_mul_6(x, x, r);
+            err = sp_384_mod_6(x, x, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_6(x);
+            carry = sp_384_add_6(s, e, x);
+            sp_384_cond_sub_6(s, s, p384_order, 0 - carry);
+            sp_384_norm_6(s);
+            c = sp_384_cmp_6(s, p384_order);
+            sp_384_cond_sub_6(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_6(s);
+
+            /* s = s * k^-1 mod order */
+                sp_384_mont_mul_order_6(s, s, kInv);
+            sp_384_norm_6(s);
+
+            /* Check that signature is usable. */
+            if (sp_384_iszero_6(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 6);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 6U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 6U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 6U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 6U);
+#endif
+    sp_384_point_free_6(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 384)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*6];
+    sp_digit u2d[2*6];
+    sp_digit sd[2*6];
+    sp_digit tmpd[2*6 * 5];
+    sp_point_384 p1d;
+    sp_point_384 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* p1;
+    sp_point_384* p2 = NULL;
+    sp_digit carry;
+    int64_t c;
+    int err;
+
+    err = sp_384_point_new_6(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 6;
+        u2  = d + 2 * 6;
+        s   = d + 4 * 6;
+        tmp = d + 6 * 6;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(u1, 6, hash, (int)hashLen);
+        sp_384_from_mp(u2, 6, r);
+        sp_384_from_mp(s, 6, sm);
+        sp_384_from_mp(p2->x, 6, pX);
+        sp_384_from_mp(p2->y, 6, pY);
+        sp_384_from_mp(p2->z, 6, pZ);
+
+        {
+            sp_384_mul_6(s, s, p384_norm_order);
+        }
+        err = sp_384_mod_6(s, s, p384_order);
+    }
+    if (err == MP_OKAY) {
+        sp_384_norm_6(s);
+        {
+            sp_384_mont_inv_order_6(s, s, tmp);
+            sp_384_mont_mul_order_6(u1, u1, s);
+            sp_384_mont_mul_order_6(u2, u2, s);
+        }
+
+            err = sp_384_ecc_mulmod_base_6(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_6(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_384_proj_point_add_6(p1, p1, p2, tmp);
+            if (sp_384_iszero_6(p1->z)) {
+                if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) {
+                    sp_384_proj_point_dbl_6(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_384_from_mp(u2, 6, r);
+        err = sp_384_mod_mul_norm_6(u2, u2, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod);
+        *res = (int)(sp_384_cmp_6(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_384_from_mp(u2, 6, r);
+            carry = sp_384_add_6(u2, u2, p384_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_384_norm_6(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_384_cmp_6(u2, p384_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_384_mod_mul_norm_6(u2, u2, p384_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_384_mont_mul_6(u1, u2, p1->z, p384_mod,
+                                                                  p384_mp_mod);
+                        *res = (int)(sp_384_cmp_6(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_6(p1, 0, heap);
+    sp_384_point_free_6(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_6(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*6];
+    sp_digit t2d[2*6];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 6;
+        t2 = d + 2 * 6;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_384_sqr_6(t1, point->y);
+        (void)sp_384_mod_6(t1, t1, p384_mod);
+        sp_384_sqr_6(t2, point->x);
+        (void)sp_384_mod_6(t2, t2, p384_mod);
+        sp_384_mul_6(t2, t2, point->x);
+        (void)sp_384_mod_6(t2, t2, p384_mod);
+        (void)sp_384_sub_6(t2, p384_mod, t2);
+        sp_384_mont_add_6(t1, t1, t2, p384_mod);
+
+        sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+
+        if (sp_384_cmp_6(t1, p384_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 pubd;
+#endif
+    sp_point_384* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_6(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_384_from_mp(pub->x, 6, pX);
+        sp_384_from_mp(pub->y, 6, pY);
+        sp_384_from_bin(pub->z, 6, one, (int)sizeof(one));
+
+        err = sp_384_ecc_is_point_6(pub, NULL);
+    }
+
+    sp_384_point_free_6(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[6];
+    sp_point_384 pubd;
+    sp_point_384 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_384* pub;
+    sp_point_384* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_6(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_384_from_mp(pub->x, 6, pX);
+        sp_384_from_mp(pub->y, 6, pY);
+        sp_384_from_bin(pub->z, 6, one, (int)sizeof(one));
+        sp_384_from_mp(priv, 6, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_384_iszero_6(pub->x) != 0) &&
+            (sp_384_iszero_6(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_384_cmp_6(pub->x, p384_mod) >= 0 ||
+            sp_384_cmp_6(pub->y, p384_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_384_ecc_is_point_6(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_384_ecc_mulmod_6(p, pub, p384_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_384_iszero_6(p->x) == 0) ||
+            (sp_384_iszero_6(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_384_ecc_mulmod_base_6(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_384_cmp_6(p->x, pub->x) != 0 ||
+            sp_384_cmp_6(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(p, 0, heap);
+    sp_384_point_free_6(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 6 * 5];
+    sp_point_384 pd;
+    sp_point_384 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    sp_point_384* q = NULL;
+    int err;
+
+    err = sp_384_point_new_6(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_6(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 6, pX);
+        sp_384_from_mp(p->y, 6, pY);
+        sp_384_from_mp(p->z, 6, pZ);
+        sp_384_from_mp(q->x, 6, qX);
+        sp_384_from_mp(q->y, 6, qY);
+        sp_384_from_mp(q->z, 6, qZ);
+
+            sp_384_proj_point_add_6(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(q, 0, NULL);
+    sp_384_point_free_6(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 6 * 2];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_6(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 6, pX);
+        sp_384_from_mp(p->y, 6, pY);
+        sp_384_from_mp(p->z, 6, pZ);
+
+            sp_384_proj_point_dbl_6(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 6 * 6];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_6(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 6, pX);
+        sp_384_from_mp(p->y, 6, pY);
+        sp_384_from_mp(p->z, 6, pZ);
+
+        sp_384_map_6(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_6(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_6(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 6];
+    sp_digit t2d[2 * 6];
+    sp_digit t3d[2 * 6];
+    sp_digit t4d[2 * 6];
+    sp_digit t5d[2 * 6];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* t3;
+    sp_digit* t4;
+    sp_digit* t5;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 6, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 6;
+        t2 = d + 2 * 6;
+        t3 = d + 4 * 6;
+        t4 = d + 6 * 6;
+        t5 = d + 8 * 6;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        t3 = t3d;
+        t4 = t4d;
+        t5 = t5d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_384_mont_mul_6(t1, t2, y, p384_mod, p384_mp_mod);
+            /* t5 = y ^ 0xc */
+            sp_384_mont_sqr_n_6(t5, t1, 2, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_384_mont_mul_6(t1, t1, t5, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x1e */
+            sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x1f */
+            sp_384_mont_mul_6(t3, t2, y, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3e0 */
+            sp_384_mont_sqr_n_6(t2, t3, 5, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3ff */
+            sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fe0 */
+            sp_384_mont_sqr_n_6(t2, t1, 5, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x7fff */
+            sp_384_mont_mul_6(t3, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fff800 */
+            sp_384_mont_sqr_n_6(t2, t3, 15, p384_mod, p384_mp_mod);
+            /* t4 = y ^ 0x3ffffff */
+            sp_384_mont_mul_6(t4, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffc000000 */
+            sp_384_mont_sqr_n_6(t2, t4, 30, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffff */
+            sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+            sp_384_mont_sqr_n_6(t2, t1, 60, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+            sp_384_mont_sqr_n_6(t2, t1, 120, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+            sp_384_mont_sqr_n_6(t2, t1, 15, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+            sp_384_mont_sqr_n_6(t2, t1, 31, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+            sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+            sp_384_mont_sqr_n_6(t2, t1, 4, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+            sp_384_mont_mul_6(t1, t5, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+            sp_384_mont_sqr_n_6(t2, t1, 62, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+            sp_384_mont_mul_6(t1, y, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+            sp_384_mont_sqr_n_6(y, t1, 30, p384_mod, p384_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 6];
+    sp_digit yd[2 * 6];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 6, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 6;
+        y = d + 2 * 6;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_384_from_mp(x, 6, xm);
+        err = sp_384_mod_mul_norm_6(x, x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_384_mont_sqr_6(y, x, p384_mod, p384_mp_mod);
+            sp_384_mont_mul_6(y, y, x, p384_mod, p384_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_384_mont_sub_6(y, y, x, p384_mod);
+        sp_384_mont_sub_6(y, y, x, p384_mod);
+        sp_384_mont_sub_6(y, y, x, p384_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_384_mod_mul_norm_6(x, p384_b, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_384_mont_add_6(y, y, x, p384_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_384_mont_sqrt_6(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 6, 0, 6U * sizeof(sp_digit));
+        sp_384_mont_reduce_6(y, p384_mod, p384_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_384_mont_sub_6(y, p384_mod, y, p384_mod);
+        }
+
+        err = sp_384_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
 #endif /* WOLFSSL_HAVE_SP_ECC */
 #endif /* WOLFSSL_SP_ARM64_ASM */
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/sp_armthumb.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,27864 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+                                    defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef WOLFSSL_SP_ARM_THUMB_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 2048 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<64 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[8 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #32\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #28\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #56\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #64\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #28\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #32\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #56\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #60\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #64\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<8; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[16];
+    sp_digit a1[8];
+    sp_digit b1[8];
+    sp_digit z2[16];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_8(a1, a, &a[8]);
+    cb = sp_2048_add_8(b1, b, &b[8]);
+    u  = ca & cb;
+    sp_2048_mul_8(z1, a1, b1);
+    sp_2048_mul_8(z2, &a[8], &b[8]);
+    sp_2048_mul_8(z0, a, b);
+    sp_2048_mask_8(r + 16, a1, 0 - cb);
+    sp_2048_mask_8(b1, b1, 0 - ca);
+    u += sp_2048_add_8(r + 16, r + 16, b1);
+    u += sp_2048_sub_in_place_16(z1, z2);
+    u += sp_2048_sub_in_place_16(z1, z0);
+    u += sp_2048_add_16(r + 8, r + 8, z1);
+    r[24] = u;
+    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+    (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[16];
+    sp_digit z1[16];
+    sp_digit a1[8];
+    sp_digit u;
+
+    u = sp_2048_add_8(a1, a, &a[8]);
+    sp_2048_sqr_8(z1, a1);
+    sp_2048_sqr_8(z2, &a[8]);
+    sp_2048_sqr_8(z0, a);
+    sp_2048_mask_8(r + 16, a1, 0 - u);
+    u += sp_2048_add_8(r + 16, r + 16, r + 16);
+    u += sp_2048_sub_in_place_16(z1, z2);
+    u += sp_2048_sub_in_place_16(z1, z0);
+    u += sp_2048_add_16(r + 8, r + 8, z1);
+    r[24] = u;
+    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+    (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<16; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 16; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[32];
+    sp_digit a1[16];
+    sp_digit b1[16];
+    sp_digit z2[32];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_16(a1, a, &a[16]);
+    cb = sp_2048_add_16(b1, b, &b[16]);
+    u  = ca & cb;
+    sp_2048_mul_16(z1, a1, b1);
+    sp_2048_mul_16(z2, &a[16], &b[16]);
+    sp_2048_mul_16(z0, a, b);
+    sp_2048_mask_16(r + 32, a1, 0 - cb);
+    sp_2048_mask_16(b1, b1, 0 - ca);
+    u += sp_2048_add_16(r + 32, r + 32, b1);
+    u += sp_2048_sub_in_place_32(z1, z2);
+    u += sp_2048_sub_in_place_32(z1, z0);
+    u += sp_2048_add_32(r + 16, r + 16, z1);
+    r[48] = u;
+    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+    (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[32];
+    sp_digit z1[32];
+    sp_digit a1[16];
+    sp_digit u;
+
+    u = sp_2048_add_16(a1, a, &a[16]);
+    sp_2048_sqr_16(z1, a1);
+    sp_2048_sqr_16(z2, &a[16]);
+    sp_2048_sqr_16(z0, a);
+    sp_2048_mask_16(r + 32, a1, 0 - u);
+    u += sp_2048_add_16(r + 32, r + 32, r + 32);
+    u += sp_2048_sub_in_place_32(z1, z2);
+    u += sp_2048_sub_in_place_32(z1, z0);
+    u += sp_2048_add_32(r + 16, r + 16, z1);
+    r[48] = u;
+    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+    (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mvn	r7, r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r7"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<32; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[64];
+    sp_digit a1[32];
+    sp_digit b1[32];
+    sp_digit z2[64];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_32(a1, a, &a[32]);
+    cb = sp_2048_add_32(b1, b, &b[32]);
+    u  = ca & cb;
+    sp_2048_mul_32(z1, a1, b1);
+    sp_2048_mul_32(z2, &a[32], &b[32]);
+    sp_2048_mul_32(z0, a, b);
+    sp_2048_mask_32(r + 64, a1, 0 - cb);
+    sp_2048_mask_32(b1, b1, 0 - ca);
+    u += sp_2048_add_32(r + 64, r + 64, b1);
+    u += sp_2048_sub_in_place_64(z1, z2);
+    u += sp_2048_sub_in_place_64(z1, z0);
+    u += sp_2048_add_64(r + 32, r + 32, z1);
+    r[96] = u;
+    XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+    (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[64];
+    sp_digit z1[64];
+    sp_digit a1[32];
+    sp_digit u;
+
+    u = sp_2048_add_32(a1, a, &a[32]);
+    sp_2048_sqr_32(z1, a1);
+    sp_2048_sqr_32(z2, &a[32]);
+    sp_2048_sqr_32(z0, a);
+    sp_2048_mask_32(r + 64, a1, 0 - u);
+    u += sp_2048_add_32(r + 64, r + 64, r + 64);
+    u += sp_2048_sub_in_place_64(z1, z2);
+    u += sp_2048_sub_in_place_64(z1, z0);
+    u += sp_2048_add_64(r + 32, r + 32, z1);
+    r[96] = u;
+    XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+    (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r4, #1\n\t"
+        "lsl	r4, #8\n\t"
+        "sub	r7, #1\n\t"
+        "add	r6, r4\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "mov	r5, #1\n\t"
+        "lsl	r5, #8\n\t"
+        "add	r7, r5\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[64 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #1\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    int i;
+
+    for (i=0; i<32; i++) {
+        r[i] = a[i] & m;
+    }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "add	r6, #128\n\t"
+        "sub	r7, #1\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "add	r7, #128\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[32 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #128\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #124\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #124\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #128\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+    /* r = 2^n mod m */
+    sp_2048_sub_in_place_32(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #128\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #124\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+31] += m[31] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[31] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[31] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #128\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_2048_mul_32(r, a, b);
+    sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_2048_sqr_32(r, a);
+    sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #128\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #124\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[64], t2[33];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[31];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+    for (i=31; i>=0; i--) {
+        r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+        sp_2048_mul_d_32(t2, d, r1);
+        t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+        t1[32 + i] -= t2[32];
+        sp_2048_mask_32(t2, d, t1[32 + i]);
+        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+        sp_2048_mask_32(t2, d, t1[32 + i]);
+        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_2048_cmp_32(t1, d) >= 0;
+    sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_2048_div_32(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][64];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 64;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_32(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_32(t[1] + 32, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_32(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+            err = sp_2048_mod_32(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+
+            sp_2048_mont_mul_32(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+        sp_2048_mont_reduce_32(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+        sp_2048_cond_sub_32(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][64];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 64;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_32(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_32(t[1] + 32, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_32(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+            err = sp_2048_mod_32(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+        sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+        sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+        sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+        sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+        sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+        sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+        sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+        sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+        sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+        sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+        sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+        sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+        sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+        sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+        sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+        sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+
+            sp_2048_mont_mul_32(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+        sp_2048_mont_reduce_32(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+        sp_2048_cond_sub_32(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+    /* r = 2^n mod m */
+    sp_2048_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #1\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #252\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+63] += m[63] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[63] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[63] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #1\n\t"
+        "lsl	r4, r4, #8\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_2048_mul_64(r, a, b);
+    sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_2048_sqr_64(r, a);
+    sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<64; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #252\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[128], t2[65];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[63];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+    for (i=63; i>=0; i--) {
+        r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+        sp_2048_mul_d_64(t2, d, r1);
+        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+        t1[64 + i] -= t2[64];
+        sp_2048_mask_64(t2, d, t1[64 + i]);
+        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+        sp_2048_mask_64(t2, d, t1[64 + i]);
+        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_2048_cmp_64(t1, d) >= 0;
+    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_2048_div_64(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[128], t2[65];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[63];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+    for (i=63; i>=0; i--) {
+        r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+        sp_2048_mul_d_64(t2, d, r1);
+        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+        t1[64 + i] -= t2[64];
+        if (t1[64 + i] != 0) {
+            t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+            if (t1[64 + i] != 0)
+                t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_2048_cmp_64(t1, d) >= 0;
+    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_2048_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][128];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 128;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_64(t[1] + 64, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_64(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+            err = sp_2048_mod_64(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_mont_mul_64(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][128];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 128;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_64(t[1] + 64, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_64(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+            err = sp_2048_mod_64(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+        sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
+        sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+        sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
+        sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+        sp_2048_mont_sqr_64(t[20], t[10], m, mp);
+        sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
+        sp_2048_mont_sqr_64(t[22], t[11], m, mp);
+        sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
+        sp_2048_mont_sqr_64(t[24], t[12], m, mp);
+        sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
+        sp_2048_mont_sqr_64(t[26], t[13], m, mp);
+        sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
+        sp_2048_mont_sqr_64(t[28], t[14], m, mp);
+        sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
+        sp_2048_mont_sqr_64(t[30], t[15], m, mp);
+        sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_mont_mul_64(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 256 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128], m[64], r[128];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 256)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
+                                                     mp_count_bits(mm) != 2048))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 64 * 2;
+        m = r + 64 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 64;
+
+        sp_2048_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(m, 64, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_2048_sqr_64(r, ah);
+                err = sp_2048_mod_64_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_2048_mul_64(r, ah, r);
+                err = sp_2048_mod_64_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_2048_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 64);
+            err = sp_2048_mod_64_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 64);
+                for (i--; i>=0; i--) {
+                    sp_2048_mont_sqr_64(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_2048_mont_mul_64(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+                sp_2048_mont_reduce_64(r, m, mp);
+
+                for (i = 63; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_2048_sub_in_place_64(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 256U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+           err = MP_READ_E;
+        }
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 64;
+        m = a + 128;
+        r = a;
+
+        sp_2048_from_bin(a, 64, in, inLen);
+        sp_2048_from_mp(d, 64, dm);
+        sp_2048_from_mp(m, 64, mm);
+        err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 64);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #128\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, #1\n\t"
+        "add	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "adc	r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 256 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[64 * 2];
+    sp_digit p[32], q[32], dp[32];
+    sp_digit tmpa[64], tmpb[64];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 256)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 64 * 2;
+        q = p + 32;
+        qi = dq = dp = q + 32;
+        tmpa = qi + 32;
+        tmpb = tmpa + 64;
+
+        r = t + 64;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_2048_from_bin(a, 64, in, inLen);
+        sp_2048_from_mp(p, 32, pm);
+        sp_2048_from_mp(q, 32, qm);
+        sp_2048_from_mp(dp, 32, dpm);
+
+        err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(dq, 32, dqm);
+        err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_2048_sub_in_place_32(tmpa, tmpb);
+        c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
+        sp_2048_cond_add_32(tmpa, tmpa, p, c);
+
+        sp_2048_from_mp(qi, 32, qim);
+        sp_2048_mul_32(tmpa, tmpa, qi);
+        err = sp_2048_mod_32(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mul_32(tmpa, q, tmpa);
+        XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+        sp_2048_add_64(r, tmpb, tmpa);
+
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+        r->used = 64;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 64; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 64; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[128], e[64], m[64];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 2048) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 64, base);
+        sp_2048_from_mp(e, 64, exp);
+        sp_2048_from_mp(m, 64, mod);
+
+        err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_2048_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #31\n\t"
+        "sub	r6, r6, %[n]\n\t"
+        "add	%[a], %[a], #192\n\t"
+        "add	%[r], %[r], #192\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "lsr	r4, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r4, r4, r6\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #52]\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "str	r3, [%[r], #56]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #44]\n\t"
+        "str	r2, [%[r], #52]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #40]\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "str	r3, [%[r], #44]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "str	r2, [%[r], #40]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #28]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "str	r2, [%[r], #28]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "str	r2, [%[r], #68]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "str	r3, [%[r]]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[128];
+    sp_digit td[65];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 128;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_2048_lshift_64(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_lshift_64(r, r, y);
+            sp_2048_mul_d_64(tmp, norm, r[64]);
+            r[64] = 0;
+            o = sp_2048_add_64(r, r, tmp);
+            sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 256 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[128], e[64], m[64];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 2048) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 256) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 64, base);
+        sp_2048_from_bin(e, 64, exp, expLen);
+        sp_2048_from_mp(m, 64, mod);
+
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+            err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+        for (i=0; i<256 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[64], e[32], m[32];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 32, base);
+        sp_2048_from_mp(e, 32, exp);
+        sp_2048_from_mp(m, 32, mod);
+
+        err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 32, 0, sizeof(*r) * 32U);
+        err = sp_2048_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 3072 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<96 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[12 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #96\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #92\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #96\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+    r[8] = a[8] & m;
+    r[9] = a[9] & m;
+    r[10] = a[10] & m;
+    r[11] = a[11] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[24];
+    sp_digit a1[12];
+    sp_digit b1[12];
+    sp_digit z2[24];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_12(a1, a, &a[12]);
+    cb = sp_3072_add_12(b1, b, &b[12]);
+    u  = ca & cb;
+    sp_3072_mul_12(z1, a1, b1);
+    sp_3072_mul_12(z2, &a[12], &b[12]);
+    sp_3072_mul_12(z0, a, b);
+    sp_3072_mask_12(r + 24, a1, 0 - cb);
+    sp_3072_mask_12(b1, b1, 0 - ca);
+    u += sp_3072_add_12(r + 24, r + 24, b1);
+    u += sp_3072_sub_in_place_24(z1, z2);
+    u += sp_3072_sub_in_place_24(z1, z0);
+    u += sp_3072_add_24(r + 12, r + 12, z1);
+    r[36] = u;
+    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+    (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[24];
+    sp_digit z1[24];
+    sp_digit a1[12];
+    sp_digit u;
+
+    u = sp_3072_add_12(a1, a, &a[12]);
+    sp_3072_sqr_12(z1, a1);
+    sp_3072_sqr_12(z2, &a[12]);
+    sp_3072_sqr_12(z0, a);
+    sp_3072_mask_12(r + 24, a1, 0 - u);
+    u += sp_3072_add_12(r + 24, r + 24, r + 24);
+    u += sp_3072_sub_in_place_24(z1, z2);
+    u += sp_3072_sub_in_place_24(z1, z0);
+    u += sp_3072_add_24(r + 12, r + 12, z1);
+    r[36] = u;
+    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+    (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mvn	r7, r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r7"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<24; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 24; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[48];
+    sp_digit a1[24];
+    sp_digit b1[24];
+    sp_digit z2[48];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_24(a1, a, &a[24]);
+    cb = sp_3072_add_24(b1, b, &b[24]);
+    u  = ca & cb;
+    sp_3072_mul_24(z1, a1, b1);
+    sp_3072_mul_24(z2, &a[24], &b[24]);
+    sp_3072_mul_24(z0, a, b);
+    sp_3072_mask_24(r + 48, a1, 0 - cb);
+    sp_3072_mask_24(b1, b1, 0 - ca);
+    u += sp_3072_add_24(r + 48, r + 48, b1);
+    u += sp_3072_sub_in_place_48(z1, z2);
+    u += sp_3072_sub_in_place_48(z1, z0);
+    u += sp_3072_add_48(r + 24, r + 24, z1);
+    r[72] = u;
+    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+    (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[48];
+    sp_digit z1[48];
+    sp_digit a1[24];
+    sp_digit u;
+
+    u = sp_3072_add_24(a1, a, &a[24]);
+    sp_3072_sqr_24(z1, a1);
+    sp_3072_sqr_24(z2, &a[24]);
+    sp_3072_sqr_24(z0, a);
+    sp_3072_mask_24(r + 48, a1, 0 - u);
+    u += sp_3072_add_24(r + 48, r + 48, r + 48);
+    u += sp_3072_sub_in_place_48(z1, z2);
+    u += sp_3072_sub_in_place_48(z1, z0);
+    u += sp_3072_add_48(r + 24, r + 24, z1);
+    r[72] = u;
+    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+    (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mvn	r7, r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r7"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<48; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 48; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[96];
+    sp_digit a1[48];
+    sp_digit b1[48];
+    sp_digit z2[96];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_48(a1, a, &a[48]);
+    cb = sp_3072_add_48(b1, b, &b[48]);
+    u  = ca & cb;
+    sp_3072_mul_48(z1, a1, b1);
+    sp_3072_mul_48(z2, &a[48], &b[48]);
+    sp_3072_mul_48(z0, a, b);
+    sp_3072_mask_48(r + 96, a1, 0 - cb);
+    sp_3072_mask_48(b1, b1, 0 - ca);
+    u += sp_3072_add_48(r + 96, r + 96, b1);
+    u += sp_3072_sub_in_place_96(z1, z2);
+    u += sp_3072_sub_in_place_96(z1, z0);
+    u += sp_3072_add_96(r + 48, r + 48, z1);
+    r[144] = u;
+    XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+    (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[96];
+    sp_digit z1[96];
+    sp_digit a1[48];
+    sp_digit u;
+
+    u = sp_3072_add_48(a1, a, &a[48]);
+    sp_3072_sqr_48(z1, a1);
+    sp_3072_sqr_48(z2, &a[48]);
+    sp_3072_sqr_48(z0, a);
+    sp_3072_mask_48(r + 96, a1, 0 - u);
+    u += sp_3072_add_48(r + 96, r + 96, r + 96);
+    u += sp_3072_sub_in_place_96(z1, z2);
+    u += sp_3072_sub_in_place_96(z1, z0);
+    u += sp_3072_add_96(r + 48, r + 48, z1);
+    r[144] = u;
+    XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+    (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r4, #1\n\t"
+        "lsl	r4, #8\n\t"
+        "add	r4, #128\n\t"
+        "sub	r7, #1\n\t"
+        "add	r6, r4\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "mov	r5, #1\n\t"
+        "lsl	r5, #8\n\t"
+        "add	r5, #128\n\t"
+        "add	r7, r5\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[96 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #128\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #124\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #124\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #128\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #2\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    int i;
+
+    for (i=0; i<48; i++) {
+        r[i] = a[i] & m;
+    }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "add	r6, #192\n\t"
+        "sub	r7, #1\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "add	r7, #192\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[48 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #192\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #188\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #120\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #128\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #188\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #192\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #120\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #1\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, #124\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #128\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #128\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+    /* r = 2^n mod m */
+    sp_3072_sub_in_place_48(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #192\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #188\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+47] += m[47] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[47] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[47] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #192\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_3072_mul_48(r, a, b);
+    sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_3072_sqr_48(r, a);
+    sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #192\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #188\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[96], t2[49];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[47];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+    for (i=47; i>=0; i--) {
+        r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+        sp_3072_mul_d_48(t2, d, r1);
+        t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+        t1[48 + i] -= t2[48];
+        sp_3072_mask_48(t2, d, t1[48 + i]);
+        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+        sp_3072_mask_48(t2, d, t1[48 + i]);
+        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_3072_cmp_48(t1, d) >= 0;
+    sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_48(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][96];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 96;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_48(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_48(t[1] + 48, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_48(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+            err = sp_3072_mod_48(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+
+            sp_3072_mont_mul_48(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+        sp_3072_mont_reduce_48(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+        sp_3072_cond_sub_48(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][96];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 96;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_48(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_48(t[1] + 48, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_48(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+            err = sp_3072_mod_48(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+        sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+        sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+        sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+        sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+        sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+        sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+        sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+        sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+        sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+        sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+        sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+        sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+        sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+        sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+        sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+        sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+
+            sp_3072_mont_mul_48(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+        sp_3072_mont_reduce_48(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+        sp_3072_cond_sub_48(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 96);
+
+    /* r = 2^n mod m */
+    sp_3072_sub_in_place_96(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #1\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "add	r5, #128\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #1\n\t"
+        "lsl	r4, r4, #8\n\t"
+        "add	r4, #124\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+95] += m[95] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[95] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[95] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #1\n\t"
+        "lsl	r4, r4, #8\n\t"
+        "add	r4, #128\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_3072_mul_96(r, a, b);
+    sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_3072_sqr_96(r, a);
+    sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<96; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #124\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[192], t2[97];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[95];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+    for (i=95; i>=0; i--) {
+        r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+        sp_3072_mul_d_96(t2, d, r1);
+        t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+        t1[96 + i] -= t2[96];
+        sp_3072_mask_96(t2, d, t1[96 + i]);
+        t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+        sp_3072_mask_96(t2, d, t1[96 + i]);
+        t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_3072_cmp_96(t1, d) >= 0;
+    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_96(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[192], t2[97];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[95];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+    for (i=95; i>=0; i--) {
+        r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+        sp_3072_mul_d_96(t2, d, r1);
+        t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+        t1[96 + i] -= t2[96];
+        if (t1[96 + i] != 0) {
+            t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+            if (t1[96 + i] != 0)
+                t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_3072_cmp_96(t1, d) >= 0;
+    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_96_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][192];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 192;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_96(t[1] + 96, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_96(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+            err = sp_3072_mod_96(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_mont_mul_96(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][192];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 192;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_96(t[1] + 96, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_96(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+            err = sp_3072_mod_96(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+        sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
+        sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
+        sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
+        sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
+        sp_3072_mont_sqr_96(t[20], t[10], m, mp);
+        sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
+        sp_3072_mont_sqr_96(t[22], t[11], m, mp);
+        sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
+        sp_3072_mont_sqr_96(t[24], t[12], m, mp);
+        sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
+        sp_3072_mont_sqr_96(t[26], t[13], m, mp);
+        sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
+        sp_3072_mont_sqr_96(t[28], t[14], m, mp);
+        sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
+        sp_3072_mont_sqr_96(t[30], t[15], m, mp);
+        sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_mont_mul_96(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 384 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[192], m[96], r[192];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 384)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
+                                                     mp_count_bits(mm) != 3072))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 96 * 2;
+        m = r + 96 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 96;
+
+        sp_3072_from_bin(ah, 96, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(m, 96, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_3072_sqr_96(r, ah);
+                err = sp_3072_mod_96_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_3072_mul_96(r, ah, r);
+                err = sp_3072_mod_96_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_3072_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 96);
+            err = sp_3072_mod_96_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 96);
+                for (i--; i>=0; i--) {
+                    sp_3072_mont_sqr_96(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_3072_mont_mul_96(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+                sp_3072_mont_reduce_96(r, m, mp);
+
+                for (i = 95; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_3072_sub_in_place_96(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 384U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+           err = MP_READ_E;
+        }
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 96;
+        m = a + 192;
+        r = a;
+
+        sp_3072_from_bin(a, 96, in, inLen);
+        sp_3072_from_mp(d, 96, dm);
+        sp_3072_from_mp(m, 96, mm);
+        err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 96);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #192\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, #1\n\t"
+        "add	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "adc	r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 384 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[96 * 2];
+    sp_digit p[48], q[48], dp[48];
+    sp_digit tmpa[96], tmpb[96];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 384)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 96 * 2;
+        q = p + 48;
+        qi = dq = dp = q + 48;
+        tmpa = qi + 48;
+        tmpb = tmpa + 96;
+
+        r = t + 96;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_3072_from_bin(a, 96, in, inLen);
+        sp_3072_from_mp(p, 48, pm);
+        sp_3072_from_mp(q, 48, qm);
+        sp_3072_from_mp(dp, 48, dpm);
+
+        err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(dq, 48, dqm);
+        err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_3072_sub_in_place_48(tmpa, tmpb);
+        c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
+        sp_3072_cond_add_48(tmpa, tmpa, p, c);
+
+        sp_3072_from_mp(qi, 48, qim);
+        sp_3072_mul_48(tmpa, tmpa, qi);
+        err = sp_3072_mod_48(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mul_48(tmpa, q, tmpa);
+        XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
+        sp_3072_add_96(r, tmpb, tmpa);
+
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
+        r->used = 96;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 96; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 96; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[192], e[96], m[96];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 3072) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 96, base);
+        sp_3072_from_mp(e, 96, exp);
+        sp_3072_from_mp(m, 96, mod);
+
+        err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_3072_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #31\n\t"
+        "sub	r6, r6, %[n]\n\t"
+        "add	%[a], %[a], #255\n\t"
+        "add	%[r], %[r], #255\n\t"
+        "add	%[a], %[a], #65\n\t"
+        "add	%[r], %[r], #65\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "lsr	r4, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r4, r4, r6\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #52]\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "str	r3, [%[r], #56]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #44]\n\t"
+        "str	r2, [%[r], #52]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #40]\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "str	r3, [%[r], #44]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "str	r2, [%[r], #40]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #28]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "str	r2, [%[r], #28]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "str	r2, [%[r], #68]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #52]\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "str	r3, [%[r], #56]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #44]\n\t"
+        "str	r2, [%[r], #52]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #40]\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "str	r3, [%[r], #44]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "str	r2, [%[r], #40]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #28]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "str	r2, [%[r], #28]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "str	r2, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[192];
+    sp_digit td[97];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 192;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_3072_lshift_96(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_lshift_96(r, r, y);
+            sp_3072_mul_d_96(tmp, norm, r[96]);
+            r[96] = 0;
+            o = sp_3072_add_96(r, r, tmp);
+            sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 384 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[192], e[96], m[96];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 3072) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 384) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 96, base);
+        sp_3072_from_bin(e, 96, exp, expLen);
+        sp_3072_from_mp(m, 96, mod);
+
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
+            err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+        for (i=0; i<384 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[96], e[48], m[48];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 48, base);
+        sp_3072_from_mp(e, 48, exp);
+        sp_3072_from_mp(m, 48, mod);
+
+        err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 48, 0, sizeof(*r) * 48U);
+        err = sp_3072_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 4096 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<128 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mvn	r7, r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r7"
+    );
+
+    return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "ldr	r6, [%[b], #52]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #48]\n\t"
+        "str	r4, [%[a], #52]\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "ldr	r6, [%[b], #60]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #56]\n\t"
+        "str	r4, [%[a], #60]\n\t"
+        "ldr	r3, [%[a], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "ldr	r6, [%[b], #68]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #64]\n\t"
+        "str	r4, [%[a], #68]\n\t"
+        "ldr	r3, [%[a], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "ldr	r6, [%[b], #76]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #72]\n\t"
+        "str	r4, [%[a], #76]\n\t"
+        "ldr	r3, [%[a], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "ldr	r6, [%[b], #84]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #80]\n\t"
+        "str	r4, [%[a], #84]\n\t"
+        "ldr	r3, [%[a], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "ldr	r6, [%[b], #92]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #88]\n\t"
+        "str	r4, [%[a], #92]\n\t"
+        "ldr	r3, [%[a], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "ldr	r6, [%[b], #100]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #96]\n\t"
+        "str	r4, [%[a], #100]\n\t"
+        "ldr	r3, [%[a], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "ldr	r6, [%[b], #108]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #104]\n\t"
+        "str	r4, [%[a], #108]\n\t"
+        "ldr	r3, [%[a], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "ldr	r6, [%[b], #116]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #112]\n\t"
+        "str	r4, [%[a], #116]\n\t"
+        "ldr	r3, [%[a], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "ldr	r6, [%[b], #124]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #120]\n\t"
+        "str	r4, [%[a], #124]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r7, #0\n\t"
+        "mvn	r7, r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #0x80\n\t"
+        "add	%[b], #0x80\n\t"
+        "add	%[r], #0x80\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "ldr	r5, [%[b], #48]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "ldr	r5, [%[b], #52]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "ldr	r5, [%[b], #56]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "ldr	r5, [%[b], #60]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "ldr	r4, [%[a], #64]\n\t"
+        "ldr	r5, [%[b], #64]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "ldr	r4, [%[a], #68]\n\t"
+        "ldr	r5, [%[b], #68]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "ldr	r4, [%[a], #72]\n\t"
+        "ldr	r5, [%[b], #72]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #72]\n\t"
+        "ldr	r4, [%[a], #76]\n\t"
+        "ldr	r5, [%[b], #76]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #76]\n\t"
+        "ldr	r4, [%[a], #80]\n\t"
+        "ldr	r5, [%[b], #80]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #80]\n\t"
+        "ldr	r4, [%[a], #84]\n\t"
+        "ldr	r5, [%[b], #84]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #84]\n\t"
+        "ldr	r4, [%[a], #88]\n\t"
+        "ldr	r5, [%[b], #88]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #88]\n\t"
+        "ldr	r4, [%[a], #92]\n\t"
+        "ldr	r5, [%[b], #92]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #92]\n\t"
+        "ldr	r4, [%[a], #96]\n\t"
+        "ldr	r5, [%[b], #96]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #96]\n\t"
+        "ldr	r4, [%[a], #100]\n\t"
+        "ldr	r5, [%[b], #100]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #100]\n\t"
+        "ldr	r4, [%[a], #104]\n\t"
+        "ldr	r5, [%[b], #104]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #104]\n\t"
+        "ldr	r4, [%[a], #108]\n\t"
+        "ldr	r5, [%[b], #108]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #108]\n\t"
+        "ldr	r4, [%[a], #112]\n\t"
+        "ldr	r5, [%[b], #112]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #112]\n\t"
+        "ldr	r4, [%[a], #116]\n\t"
+        "ldr	r5, [%[b], #116]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #116]\n\t"
+        "ldr	r4, [%[a], #120]\n\t"
+        "ldr	r5, [%[b], #120]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #120]\n\t"
+        "ldr	r4, [%[a], #124]\n\t"
+        "ldr	r5, [%[b], #124]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #124]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r7"
+    );
+
+    return c;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[64 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<64; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[128];
+    sp_digit a1[64];
+    sp_digit b1[64];
+    sp_digit z2[128];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_64(a1, a, &a[64]);
+    cb = sp_2048_add_64(b1, b, &b[64]);
+    u  = ca & cb;
+    sp_2048_mul_64(z1, a1, b1);
+    sp_2048_mul_64(z2, &a[64], &b[64]);
+    sp_2048_mul_64(z0, a, b);
+    sp_2048_mask_64(r + 128, a1, 0 - cb);
+    sp_2048_mask_64(b1, b1, 0 - ca);
+    u += sp_2048_add_64(r + 128, r + 128, b1);
+    u += sp_4096_sub_in_place_128(z1, z2);
+    u += sp_4096_sub_in_place_128(z1, z0);
+    u += sp_4096_add_128(r + 64, r + 64, z1);
+    r[192] = u;
+    XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+    (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #1\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[128];
+    sp_digit z1[128];
+    sp_digit a1[64];
+    sp_digit u;
+
+    u = sp_2048_add_64(a1, a, &a[64]);
+    sp_2048_sqr_64(z1, a1);
+    sp_2048_sqr_64(z2, &a[64]);
+    sp_2048_sqr_64(z0, a);
+    sp_2048_mask_64(r + 128, a1, 0 - u);
+    u += sp_2048_add_64(r + 128, r + 128, r + 128);
+    u += sp_4096_sub_in_place_128(z1, z2);
+    u += sp_4096_sub_in_place_128(z1, z0);
+    u += sp_4096_add_128(r + 64, r + 64, z1);
+    r[192] = u;
+    XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+    (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "mov	r4, #2\n\t"
+        "lsl	r4, #8\n\t"
+        "sub	r7, #1\n\t"
+        "add	r6, r4\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "mov	r5, #2\n\t"
+        "lsl	r5, #8\n\t"
+        "add	r7, r5\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[128 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #252\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #4\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #252\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #248\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #3\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #4\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 128);
+
+    /* r = 2^n mod m */
+    sp_4096_sub_in_place_128(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #2\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #1\n\t"
+        "lsl	r4, r4, #8\n\t"
+        "add	r4, #252\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+127] += m[127] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[127] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[127] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #2\n\t"
+        "lsl	r4, r4, #8\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_128(r, a, b);
+    sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_128(r, a);
+    sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<128; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #252\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[256], t2[129];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[127];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+    for (i=127; i>=0; i--) {
+        r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+        sp_4096_mul_d_128(t2, d, r1);
+        t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+        t1[128 + i] -= t2[128];
+        sp_4096_mask_128(t2, d, t1[128 + i]);
+        t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+        sp_4096_mask_128(t2, d, t1[128 + i]);
+        t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_4096_cmp_128(t1, d) >= 0;
+    sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_128(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[256], t2[129];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[127];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+    for (i=127; i>=0; i--) {
+        r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+        sp_4096_mul_d_128(t2, d, r1);
+        t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+        t1[128 + i] -= t2[128];
+        if (t1[128 + i] != 0) {
+            t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+            if (t1[128 + i] != 0)
+                t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_4096_cmp_128(t1, d) >= 0;
+    sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_128_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][256];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 256;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_128(t[1] + 128, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_128(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_mont_mul_128(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][256];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 256;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_128(t[1] + 128, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_128(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_128(t[20], t[10], m, mp);
+        sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_128(t[22], t[11], m, mp);
+        sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_128(t[24], t[12], m, mp);
+        sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_128(t[26], t[13], m, mp);
+        sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_128(t[28], t[14], m, mp);
+        sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_128(t[30], t[15], m, mp);
+        sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_mont_mul_128(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[256], m[128], r[256];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
+                                                     mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 128 * 2;
+        m = r + 128 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 128;
+
+        sp_4096_from_bin(ah, 128, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 128, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_4096_sqr_128(r, ah);
+                err = sp_4096_mod_128_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_4096_mul_128(r, ah, r);
+                err = sp_4096_mod_128_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_4096_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 128);
+                for (i--; i>=0; i--) {
+                    sp_4096_mont_sqr_128(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_4096_mont_mul_128(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
+                sp_4096_mont_reduce_128(r, m, mp);
+
+                for (i = 127; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_4096_sub_in_place_128(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+           err = MP_READ_E;
+        }
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 128;
+        m = a + 256;
+        r = a;
+
+        sp_4096_from_bin(a, 128, in, inLen);
+        sp_4096_from_mp(d, 128, dm);
+        sp_4096_from_mp(m, 128, mm);
+        err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 128);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #1\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, #1\n\t"
+        "add	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "adc	r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128 * 2];
+    sp_digit p[64], q[64], dp[64];
+    sp_digit tmpa[128], tmpb[128];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 128 * 2;
+        q = p + 64;
+        qi = dq = dp = q + 64;
+        tmpa = qi + 64;
+        tmpb = tmpa + 128;
+
+        r = t + 128;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_4096_from_bin(a, 128, in, inLen);
+        sp_4096_from_mp(p, 64, pm);
+        sp_4096_from_mp(q, 64, qm);
+        sp_4096_from_mp(dp, 64, dpm);
+
+        err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(dq, 64, dqm);
+        err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_2048_sub_in_place_64(tmpa, tmpb);
+        c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
+        sp_4096_cond_add_64(tmpa, tmpa, p, c);
+
+        sp_2048_from_mp(qi, 64, qim);
+        sp_2048_mul_64(tmpa, tmpa, qi);
+        err = sp_2048_mod_64(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mul_64(tmpa, q, tmpa);
+        XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
+        sp_4096_add_128(r, tmpb, tmpa);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
+        r->used = 128;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 128; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 128; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[256], e[128], m[128];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 128, base);
+        sp_4096_from_mp(e, 128, exp);
+        sp_4096_from_mp(m, 128, mod);
+
+        err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #31\n\t"
+        "sub	r6, r6, %[n]\n\t"
+        "add	%[a], %[a], #255\n\t"
+        "add	%[r], %[r], #255\n\t"
+        "add	%[a], %[a], #193\n\t"
+        "add	%[r], %[r], #193\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "lsr	r4, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r4, r4, r6\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #52]\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "str	r3, [%[r], #56]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #44]\n\t"
+        "str	r2, [%[r], #52]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #40]\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "str	r3, [%[r], #44]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "str	r2, [%[r], #40]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #28]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "str	r2, [%[r], #28]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "str	r2, [%[r], #68]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r4, [%[a], #60]\n\t"
+        "str	r3, [%[r], #68]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #56]\n\t"
+        "str	r2, [%[r], #64]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #52]\n\t"
+        "str	r4, [%[r], #60]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #48]\n\t"
+        "str	r3, [%[r], #56]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #44]\n\t"
+        "str	r2, [%[r], #52]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #40]\n\t"
+        "str	r4, [%[r], #48]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "str	r3, [%[r], #44]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "str	r2, [%[r], #40]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #28]\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "str	r3, [%[r], #32]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #20]\n\t"
+        "str	r2, [%[r], #28]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #16]\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "str	r3, [%[r], #20]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "str	r2, [%[r], #16]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #4]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #0]\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r3, [%[a], #60]\n\t"
+        "str	r2, [%[r], #68]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #56]\n\t"
+        "str	r4, [%[r], #64]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #52]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #48]\n\t"
+        "str	r2, [%[r], #56]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #44]\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #36]\n\t"
+        "str	r2, [%[r], #44]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #32]\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "str	r2, [%[r], #32]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #20]\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "str	r3, [%[r], #24]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #12]\n\t"
+        "str	r2, [%[r], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #8]\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "str	r3, [%[r], #12]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "str	r2, [%[r], #8]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "sub	%[a], %[a], #64\n\t"
+        "sub	%[r], %[r], #64\n\t"
+        "ldr	r2, [%[a], #60]\n\t"
+        "str	r4, [%[r], #68]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #56]\n\t"
+        "str	r3, [%[r], #64]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #52]\n\t"
+        "str	r2, [%[r], #60]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #48]\n\t"
+        "str	r4, [%[r], #56]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r3, [%[r], #52]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r2, [%[r], #48]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsr	r5, r4, #1\n\t"
+        "lsl	r4, r4, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r2, [%[a], #0]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsr	r5, r2, #1\n\t"
+        "lsl	r2, r2, %[n]\n\t"
+        "lsr	r5, r5, r6\n\t"
+        "orr	r3, r3, r5\n\t"
+        "str	r2, [%[r]]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[256];
+    sp_digit td[129];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 256;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_4096_lshift_128(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_lshift_128(r, r, y);
+            sp_4096_mul_d_128(tmp, norm, r[128]);
+            r[128] = 0;
+            o = sp_4096_add_128(r, r, tmp);
+            sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 512 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[256], e[128], m[128];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 128, base);
+        sp_4096_from_bin(e, 128, exp, expLen);
+        sp_4096_from_mp(m, 128, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
+            err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+    sp_digit x[2 * 8];
+    sp_digit y[2 * 8];
+    sp_digit z[2 * 8];
+    int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[8] = {
+    0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
+    0x00000001,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[8] = {
+    0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
+    0xfffffffe,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[8] = {
+    0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+    0x00000000,0xffffffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[8] = {
+    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+    0x00000000,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[8] = {
+    0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
+    0xffffffff,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xee00bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+    /* X ordinate */
+    {
+        0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
+        0xe12c4247,0x6b17d1f2,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
+        0xfe1a7f9b,0x4fe342e2,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0x00000000,0x00000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[8] = {
+    0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
+    0xaa3a93e7,0x5ac635d8
+};
+#endif
+
+static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    int64_t t[8];
+    int64_t a64[8];
+    int64_t o;
+
+    (void)m;
+
+    a64[0] = a[0];
+    a64[1] = a[1];
+    a64[2] = a[2];
+    a64[3] = a[3];
+    a64[4] = a[4];
+    a64[5] = a[5];
+    a64[6] = a[6];
+    a64[7] = a[7];
+
+    /*  1  1  0 -1 -1 -1 -1  0 */
+    t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
+    /*  0  1  1  0 -1 -1 -1 -1 */
+    t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
+    /*  0  0  1  1  0 -1 -1 -1 */
+    t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
+    /* -1 -1  0  2  2  1  0 -1 */
+    t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
+    /*  0 -1 -1  0  2  2  1  0 */
+    t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
+    /*  0  0 -1 -1  0  2  2  1 */
+    t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
+    /* -1 -1  0  0  0  1  3  2 */
+    t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
+    /*  1  0 -1 -1 -1 -1  0  3 */
+    t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
+
+    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+    o     = t[7] >> 32; t[7] &= 0xffffffff;
+    t[0] += o;
+    t[3] -= o;
+    t[6] -= o;
+    t[7] += o;
+    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+    r[0] = t[0];
+    r[1] = t[1];
+    r[2] = t[2];
+    r[3] = t[3];
+    r[4] = t[4];
+    r[5] = t[5];
+    r[6] = t[6];
+    r[7] = t[7];
+
+    return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p   Point of type sp_point_256 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_256_from_mp(p->x, 8, pm->x);
+    sp_256_from_mp(p->y, 8, pm->y);
+    sp_256_from_mp(p->z, 8, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
+        r->used = 8;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 8; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 8; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p   Point of type sp_point_256.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_256_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[8 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #32\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #28\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #56\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #32\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    (void)mp;
+    (void)m;
+
+    __asm__ __volatile__ (
+        "mov	r2, #0\n\t"
+        "mov	r1, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r8, r2\n\t"
+        "\n1:\n\t"
+        "mov	r4, #0\n\t"
+        "# mu = a[i] * 1 (mp) = a[i]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "# a[i+0] += -1 * mu\n\t"
+        "mov	r5, r3\n\t"
+        "str	r4, [%[a], #0]\n\t"
+        "# a[i+1] += -1 * mu\n\t"
+        "ldr	r6, [%[a], #4]\n\t"
+        "mov	r4, r3\n\t"
+        "sub	r5, r3\n\t"
+        "sbc	r4, r2\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r2\n\t"
+        "str	r5, [%[a], #4]\n\t"
+        "# a[i+2] += -1 * mu\n\t"
+        "ldr	r6, [%[a], #8]\n\t"
+        "mov	r5, r3\n\t"
+        "sub	r4, r3\n\t"
+        "sbc	r5, r2\n\t"
+        "add	r4, r6\n\t"
+        "adc	r5, r2\n\t"
+        "str	r4, [%[a], #8]\n\t"
+        "# a[i+3] += 0 * mu\n\t"
+        "ldr	r6, [%[a], #12]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r2\n\t"
+        "str	r5, [%[a], #12]\n\t"
+        "# a[i+4] += 0 * mu\n\t"
+        "ldr	r6, [%[a], #16]\n\t"
+        "mov	r5, #0\n\t"
+        "add	r4, r6\n\t"
+        "adc	r5, r2\n\t"
+        "str	r4, [%[a], #16]\n\t"
+        "# a[i+5] += 0 * mu\n\t"
+        "ldr	r6, [%[a], #20]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r2\n\t"
+        "str	r5, [%[a], #20]\n\t"
+        "# a[i+6] += 1 * mu\n\t"
+        "ldr	r6, [%[a], #24]\n\t"
+        "mov	r5, #0\n\t"
+        "add	r4, r3\n\t"
+        "adc	r5, r2\n\t"
+        "add	r4, r6\n\t"
+        "adc	r5, r2\n\t"
+        "str	r4, [%[a], #24]\n\t"
+        "# a[i+7] += -1 * mu\n\t"
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r7, [%[a], #32]\n\t"
+        "add	r4, r1, r3\n\t"
+        "mov	r1, #0\n\t"
+        "adc	r1, r2\n\t"
+        "sub	r5, r3\n\t"
+        "sbc	r4, r2\n\t"
+        "sbc	r1, r2\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r1, r2\n\t"
+        "str	r5, [%[a],  #28]\n\t"
+        "str	r4, [%[a], #32]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r8, r6\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r6, #32\n\t"
+        "cmp	r8, r6\n\t"
+        "blt	1b\n\t"
+        "sub	%[a], #32\n\t"
+        "mov	r3, r1\n\t"
+        "sub	r1, #1\n\t"
+        "mvn	r1, r1\n\t"
+        "ldr	r5, [%[a],#32]\n\t"
+        "ldr	r4, [%[a],#36]\n\t"
+        "ldr	r6, [%[a],#40]\n\t"
+        "ldr	r7, [%[a],#44]\n\t"
+        "sub	r5, r1\n\t"
+        "sbc	r4, r1\n\t"
+        "sbc	r6, r1\n\t"
+        "sbc	r7, r2\n\t"
+        "str	r5, [%[a],#0]\n\t"
+        "str	r4, [%[a],#4]\n\t"
+        "str	r6, [%[a],#8]\n\t"
+        "str	r7, [%[a],#12]\n\t"
+        "ldr	r5, [%[a],#48]\n\t"
+        "ldr	r4, [%[a],#52]\n\t"
+        "ldr	r6, [%[a],#56]\n\t"
+        "ldr	r7, [%[a],#60]\n\t"
+        "sbc	r5, r2\n\t"
+        "sbc	r4, r2\n\t"
+        "sbc	r6, r3\n\t"
+        "sbc	r7, r1\n\t"
+        "str	r5, [%[a],#16]\n\t"
+        "str	r4, [%[a],#20]\n\t"
+        "str	r6, [%[a],#24]\n\t"
+        "str	r7, [%[a],#28]\n\t"
+        : [a] "+r" (a)
+        :
+        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
+    );
+
+
+    (void)m;
+    (void)mp;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #28\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+7] += m[7] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[7] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[7] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #32\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mul_8(r, a, b);
+    sp_256_mont_reduce_8(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #64\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #28\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #32\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #56\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #60\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #64\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_256_sqr_8(r, a);
+    sp_256_mont_reduce_8(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mont_sqr_8(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_256_mont_sqr_8(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+    0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+    0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 8);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
+        if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 8);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 8;
+    sp_digit* t3 = td + 4 * 8;
+    /* 0x2 */
+    sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
+    /* 0x3 */
+    sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
+    /* 0xc */
+    sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
+    /* 0xd */
+    sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
+    /* 0xf */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xf0 */
+    sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
+    /* 0xfd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xff00 */
+    sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
+    /* 0xfffd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffff0000 */
+    sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
+    /* 0xfffffffd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000000 */
+    sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffffffffffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001 */
+    sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
+    /* 0xffffffff000000010000000000000000000000000000000000000000 */
+    sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+    sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+    sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+    sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #28\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+#define sp_256_norm_8(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    int32_t n;
+
+    sp_256_mont_inv_8(t1, p->z, t + 2*8);
+
+    sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    /* x /= z^2 */
+    sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
+    XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
+    sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_256_cmp_8(r->x, p256_mod);
+    sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_8(r->x);
+
+    /* y /= z^3 */
+    sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
+    XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
+    sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_256_cmp_8(r->y, p256_mod);
+    sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_8(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "add	r6, #32\n\t"
+        "sub	r7, #1\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "ldr	r4, [%[a],#0]\n\t"
+        "ldr	r5, [%[a],#4]\n\t"
+        "ldr	r6, [%[b],#0]\n\t"
+        "ldr	r7, [%[b],#4]\n\t"
+        "add	r4, r6\n\t"
+        "adc	r5, r7\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[a],#8]\n\t"
+        "ldr	r5, [%[a],#12]\n\t"
+        "ldr	r6, [%[b],#8]\n\t"
+        "ldr	r7, [%[b],#12]\n\t"
+        "adc	r4, r6\n\t"
+        "adc	r5, r7\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "ldr	r4, [%[a],#16]\n\t"
+        "ldr	r5, [%[a],#20]\n\t"
+        "ldr	r6, [%[b],#16]\n\t"
+        "ldr	r7, [%[b],#20]\n\t"
+        "adc	r4, r6\n\t"
+        "adc	r5, r7\n\t"
+        "mov	r8, r4\n\t"
+        "mov	r9, r5\n\t"
+        "ldr	r4, [%[a],#24]\n\t"
+        "ldr	r5, [%[a],#28]\n\t"
+        "ldr	r6, [%[b],#24]\n\t"
+        "ldr	r7, [%[b],#28]\n\t"
+        "adc	r4, r6\n\t"
+        "adc	r5, r7\n\t"
+        "mov	r10, r4\n\t"
+        "mov	r11, r5\n\t"
+        "adc	r3, r3\n\t"
+        "mov	r6, r3\n\t"
+        "sub	r3, #1\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r7, #0\n\t"
+        "ldr	r4, [%[r],#0]\n\t"
+        "ldr	r5, [%[r],#4]\n\t"
+        "sub	r4, r3\n\t"
+        "sbc	r5, r3\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[r],#8]\n\t"
+        "ldr	r5, [%[r],#12]\n\t"
+        "sbc	r4, r3\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "mov	r4, r8\n\t"
+        "mov	r5, r9\n\t"
+        "sbc	r4, r7\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r],#16]\n\t"
+        "str	r5, [%[r],#20]\n\t"
+        "mov	r4, r10\n\t"
+        "mov	r5, r11\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r3\n\t"
+        "str	r4, [%[r],#24]\n\t"
+        "str	r5, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a],#0]\n\t"
+        "ldr	r5, [%[a],#4]\n\t"
+        "ldr	r6, [%[a],#8]\n\t"
+        "ldr	r7, [%[a],#12]\n\t"
+        "add	r4, r4\n\t"
+        "adc	r5, r5\n\t"
+        "adc	r6, r6\n\t"
+        "adc	r7, r7\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "str	r6, [%[r],#8]\n\t"
+        "str	r7, [%[r],#12]\n\t"
+        "ldr	r4, [%[a],#16]\n\t"
+        "ldr	r5, [%[a],#20]\n\t"
+        "ldr	r6, [%[a],#24]\n\t"
+        "ldr	r7, [%[a],#28]\n\t"
+        "adc	r4, r4\n\t"
+        "adc	r5, r5\n\t"
+        "adc	r6, r6\n\t"
+        "adc	r7, r7\n\t"
+        "mov	r8, r4\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r10, r6\n\t"
+        "mov	r11, r7\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r7, #0\n\t"
+        "adc	r3, r3\n\t"
+        "mov	r2, r3\n\t"
+        "sub	r3, #1\n\t"
+        "mvn	r3, r3\n\t"
+        "ldr	r4, [%[r],#0]\n\t"
+        "ldr	r5, [%[r],#4]\n\t"
+        "ldr	r6, [%[r],#8]\n\t"
+        "sub	r4, r3\n\t"
+        "sbc	r5, r3\n\t"
+        "sbc	r6, r3\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "str	r6, [%[r],#8]\n\t"
+        "ldr	r4, [%[r],#12]\n\t"
+        "mov	r5, r8\n\t"
+        "mov	r6, r9\n\t"
+        "sbc	r4, r7\n\t"
+        "sbc	r5, r7\n\t"
+        "sbc	r6, r7\n\t"
+        "str	r4, [%[r],#12]\n\t"
+        "str	r5, [%[r],#16]\n\t"
+        "str	r6, [%[r],#20]\n\t"
+        "mov	r4, r10\n\t"
+        "mov	r5, r11\n\t"
+        "sbc	r4, r2\n\t"
+        "sbc	r5, r3\n\t"
+        "str	r4, [%[r],#24]\n\t"
+        "str	r5, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r3", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "ldr   r6, [%[a],#0]\n\t"
+        "ldr   r7, [%[a],#4]\n\t"
+        "ldr   r4, [%[a],#8]\n\t"
+        "ldr   r5, [%[a],#12]\n\t"
+        "add   r6, r6\n\t"
+        "adc   r7, r7\n\t"
+        "adc   r4, r4\n\t"
+        "adc   r5, r5\n\t"
+        "mov   r8, r4\n\t"
+        "mov   r9, r5\n\t"
+        "ldr   r2, [%[a],#16]\n\t"
+        "ldr   r3, [%[a],#20]\n\t"
+        "ldr   r4, [%[a],#24]\n\t"
+        "ldr   r5, [%[a],#28]\n\t"
+        "adc   r2, r2\n\t"
+        "adc   r3, r3\n\t"
+        "adc   r4, r4\n\t"
+        "adc   r5, r5\n\t"
+        "mov   r10, r2\n\t"
+        "mov   r11, r3\n\t"
+        "mov   r12, r4\n\t"
+        "mov   r14, r5\n\t"
+        "mov   r3, #0\n\t"
+        "mov   r5, #0\n\t"
+        "adc   r3, r3\n\t"
+        "mov   r4, r3\n\t"
+        "sub   r3, #1\n\t"
+        "mvn   r3, r3\n\t"
+        "sub   r6, r3\n\t"
+        "sbc   r7, r3\n\t"
+        "mov   r2, r8\n\t"
+        "sbc   r2, r3\n\t"
+        "mov   r8, r2\n\t"
+        "mov   r2, r9\n\t"
+        "sbc   r2, r5\n\t"
+        "mov   r9, r2\n\t"
+        "mov   r2, r10\n\t"
+        "sbc   r2, r5\n\t"
+        "mov   r10, r2\n\t"
+        "mov   r2, r11\n\t"
+        "sbc   r2, r5\n\t"
+        "mov   r11, r2\n\t"
+        "mov   r2, r12\n\t"
+        "sbc   r2, r4\n\t"
+        "mov   r12, r2\n\t"
+        "mov   r2, r14\n\t"
+        "sbc   r2, r3\n\t"
+        "mov   r14, r2\n\t"
+        "ldr	r2, [%[a],#0]\n\t"
+        "ldr	r3, [%[a],#4]\n\t"
+        "add	r6, r2\n\t"
+        "adc	r7, r3\n\t"
+        "ldr	r2, [%[a],#8]\n\t"
+        "ldr	r3, [%[a],#12]\n\t"
+        "mov	r4, r8\n\t"
+        "mov	r5, r9\n\t"
+        "adc	r2, r4\n\t"
+        "adc	r3, r5\n\t"
+        "mov   r8, r2\n\t"
+        "mov   r9, r3\n\t"
+        "ldr	r2, [%[a],#16]\n\t"
+        "ldr	r3, [%[a],#20]\n\t"
+        "mov	r4, r10\n\t"
+        "mov	r5, r11\n\t"
+        "adc	r2, r4\n\t"
+        "adc	r3, r5\n\t"
+        "mov	r10, r2\n\t"
+        "mov	r11, r3\n\t"
+        "ldr	r2, [%[a],#24]\n\t"
+        "ldr	r3, [%[a],#28]\n\t"
+        "mov	r4, r12\n\t"
+        "mov	r5, r14\n\t"
+        "adc	r2, r4\n\t"
+        "adc	r3, r5\n\t"
+        "mov	r12, r2\n\t"
+        "mov	r14, r3\n\t"
+        "mov   r3, #0\n\t"
+        "mov	r5, #0\n\t"
+        "adc	r3, r3\n\t"
+        "mov	r4, r3\n\t"
+        "sub	r3, #1\n\t"
+        "mvn	r3, r3\n\t"
+        "sub	r6, r3\n\t"
+        "str	r6, [%[r],#0]\n\t"
+        "sbc	r7, r3\n\t"
+        "str	r7, [%[r],#4]\n\t"
+        "mov   r2, r8\n\t"
+        "sbc   r2, r3\n\t"
+        "str	r2, [%[r],#8]\n\t"
+        "mov   r2, r9\n\t"
+        "sbc   r2, r5\n\t"
+        "str	r2, [%[r],#12]\n\t"
+        "mov   r2, r10\n\t"
+        "sbc   r2, r5\n\t"
+        "str	r2, [%[r],#16]\n\t"
+        "mov   r2, r11\n\t"
+        "sbc   r2, r5\n\t"
+        "str	r2, [%[r],#20]\n\t"
+        "mov   r2, r12\n\t"
+        "sbc   r2, r4\n\t"
+        "str	r2, [%[r],#24]\n\t"
+        "mov   r2, r14\n\t"
+        "sbc   r2, r3\n\t"
+        "str	r2, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a],#0]\n\t"
+        "ldr	r5, [%[a],#4]\n\t"
+        "ldr	r6, [%[b],#0]\n\t"
+        "ldr	r7, [%[b],#4]\n\t"
+        "sub	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[a],#8]\n\t"
+        "ldr	r5, [%[a],#12]\n\t"
+        "ldr	r6, [%[b],#8]\n\t"
+        "ldr	r7, [%[b],#12]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "ldr	r4, [%[a],#16]\n\t"
+        "ldr	r5, [%[a],#20]\n\t"
+        "ldr	r6, [%[b],#16]\n\t"
+        "ldr	r7, [%[b],#20]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "mov	r8, r4\n\t"
+        "mov	r9, r5\n\t"
+        "ldr	r4, [%[a],#24]\n\t"
+        "ldr	r5, [%[a],#28]\n\t"
+        "ldr	r6, [%[b],#24]\n\t"
+        "ldr	r7, [%[b],#28]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "mov	r10, r4\n\t"
+        "mov	r11, r5\n\t"
+        "sbc   r3, r3\n\t"
+        "lsr   r7, r3, #31\n\t"
+        "mov   r6, #0\n\t"
+        "ldr	r4, [%[r],#0]\n\t"
+        "ldr	r5, [%[r],#4]\n\t"
+        "add	r4, r3\n\t"
+        "adc	r5, r3\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[r],#8]\n\t"
+        "ldr	r5, [%[r],#12]\n\t"
+        "adc	r4, r3\n\t"
+        "adc	r5, r6\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "mov	r4, r8\n\t"
+        "mov	r5, r9\n\t"
+        "adc	r4, r6\n\t"
+        "adc	r5, r6\n\t"
+        "str	r4, [%[r],#16]\n\t"
+        "str	r5, [%[r],#20]\n\t"
+        "mov	r4, r10\n\t"
+        "mov	r5, r11\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, r3\n\t"
+        "str	r4, [%[r],#24]\n\t"
+        "str	r5, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldr	r7, [%[a], #0]\n\t"
+        "lsl	r7, r7, #31\n\t"
+        "lsr	r7, r7, #31\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, r7\n\t"
+        "mov	r7, #0\n\t"
+        "lsl	r6, r5, #31\n\t"
+        "lsr	r6, r6, #31\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "add	r3, r5\n\t"
+        "adc	r4, r5\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "adc	r3, r5\n\t"
+        "adc	r4, r7\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "adc	r3, r7\n\t"
+        "adc	r4, r7\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "adc	r3, r6\n\t"
+        "adc	r4, r5\n\t"
+        "adc	r7, r7\n\t"
+        "lsl	r7, r7, #31\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, #31\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r4\n\t"
+        "orr	r6, r7\n\t"
+        "mov	r7, r3\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, #31\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r4\n\t"
+        "orr	r6, r7\n\t"
+        "mov	r7, r3\n\t"
+        "str	r5, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, #31\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r4\n\t"
+        "orr	r6, r7\n\t"
+        "mov	r7, r3\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[r], #0]\n\t"
+        "ldr	r4, [%[r], #4]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r4\n\t"
+        "orr	r6, r7\n\t"
+        "str	r5, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
+    /* Z = Y * Z */
+    sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
+    /* Z = 2Z */
+    sp_256_mont_dbl_8(z, z, p256_mod);
+    /* T2 = X - T1 */
+    sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
+    /* T1 = X + T1 */
+    sp_256_mont_add_8(t1, p->x, t1, p256_mod);
+    /* T2 = T1 * T2 */
+    sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
+    /* T1 = 3T2 */
+    sp_256_mont_tpl_8(t1, t2, p256_mod);
+    /* Y = 2Y */
+    sp_256_mont_dbl_8(y, p->y, p256_mod);
+    /* Y = Y * Y */
+    sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
+    /* T2 = Y * Y */
+    sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+    /* T2 = T2/2 */
+    sp_256_div2_8(t2, t2, p256_mod);
+    /* Y = Y * X */
+    sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
+    /* X = T1 * T1 */
+    sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
+    /* X = X - Y */
+    sp_256_mont_sub_8(x, x, y, p256_mod);
+    /* X = X - Y */
+    sp_256_mont_sub_8(x, x, y, p256_mod);
+    /* Y = Y - X */
+    sp_256_mont_sub_8(y, y, x, p256_mod);
+    /* Y = Y * T1 */
+    sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
+    /* Y = Y - T2 */
+    sp_256_mont_sub_8(y, y, t2, p256_mod);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "add	r6, #32\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "sbc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "sub	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r5, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r5, [%[r], #28]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+        sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    sp_digit* t3 = t + 4*8;
+    sp_digit* t4 = t + 6*8;
+    sp_digit* t5 = t + 8*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_256* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_256_sub_8(t1, p256_mod, q->y);
+    sp_256_norm_8(t1);
+    if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+        sp_256_proj_point_dbl_8(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<8; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<8; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<8; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+        /* H = U2 - U1 */
+        sp_256_mont_sub_8(t2, t2, t1, p256_mod);
+        /* R = S2 - S1 */
+        sp_256_mont_sub_8(t4, t4, t3, p256_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(x, x, t5, p256_mod);
+        sp_256_mont_dbl_8(t1, y, p256_mod);
+        sp_256_mont_sub_8(x, x, t1, p256_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(y, y, t5, p256_mod);
+    }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td[16];
+    sp_point_256 rtd;
+    sp_digit tmpd[2 * 8 * 5];
+#endif
+    sp_point_256* t;
+    sp_point_256* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
+        t[1].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 6;
+        n = k[i+1] << 0;
+        c = 28;
+        y = n >> 28;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+        n <<= 4;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--];
+                c += 32;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+
+            sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_256_map_8(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_256_point_free_8(rt, 1, heap);
+
+    return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+    sp_digit x[8];
+    sp_digit y[8];
+} sp_table_entry_256;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*8;
+    sp_digit* b = t + 4*8;
+    sp_digit* t1 = t + 6*8;
+    sp_digit* t2 = t + 8*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_256_mont_dbl_8(y, y, p256_mod);
+    /* W = Z^4 */
+    sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(t1, t1, w, p256_mod);
+        sp_256_mont_tpl_8(a, t1, p256_mod);
+        /* B = X*Y^2 */
+        sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+        /* X = A^2 - 2B */
+        sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+        sp_256_mont_dbl_8(t2, b, p256_mod);
+        sp_256_mont_sub_8(x, x, t2, p256_mod);
+        /* Z = Z*Y */
+        sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+        /* t2 = Y^4 */
+        sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_256_mont_sub_8(y, b, x, p256_mod);
+        sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+        sp_256_mont_dbl_8(y, y, p256_mod);
+        sp_256_mont_sub_8(y, y, t1, p256_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_8(t1, t1, w, p256_mod);
+    sp_256_mont_tpl_8(a, t1, p256_mod);
+    /* B = X*Y^2 */
+    sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+    /* X = A^2 - 2B */
+    sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_8(t2, b, p256_mod);
+    sp_256_mont_sub_8(x, x, t2, p256_mod);
+    /* Z = Z*Y */
+    sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+    /* t2 = Y^4 */
+    sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_256_mont_sub_8(y, b, x, p256_mod);
+    sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_8(y, y, p256_mod);
+    sp_256_mont_sub_8(y, y, t1, p256_mod);
+#endif
+    /* Y = Y/2 */
+    sp_256_div2_8(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
+        const sp_point_256* q, sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    sp_digit* t3 = t + 4*8;
+    sp_digit* t4 = t + 6*8;
+    sp_digit* t5 = t + 8*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_256_sub_8(t1, p256_mod, q->y);
+    sp_256_norm_8(t1);
+    if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+        sp_256_proj_point_dbl_8(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<8; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<8; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<8; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+        /* H = U2 - X1 */
+        sp_256_mont_sub_8(t2, t2, x, p256_mod);
+        /* R = S2 - Y1 */
+        sp_256_mont_sub_8(t4, t4, y, p256_mod);
+        /* Z3 = H*Z1 */
+        sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(x, t1, t5, p256_mod);
+        sp_256_mont_dbl_8(t1, t3, p256_mod);
+        sp_256_mont_sub_8(x, x, t1, p256_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_256_mont_sub_8(t3, t3, x, p256_mod);
+        sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(y, t3, t5, p256_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 8;
+    sp_digit* tmp = t + 4 * 8;
+
+    sp_256_mont_inv_8(t1, a->z, tmp);
+
+    sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
+    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_256_proj_to_affine_8(t, tmp);
+
+        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<4; i++) {
+            sp_256_proj_point_dbl_n_8(t, 64, tmp);
+            sp_256_proj_to_affine_8(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<4; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+                sp_256_proj_to_affine_8(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_256_point_free_8(s2, 0, heap);
+    sp_256_point_free_8(s1, 0, heap);
+    sp_256_point_free_8( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
+    sp_digit td[2 * 8 * 5];
+#endif
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+        y = 0;
+        for (j=0,x=63; j<4; j++,x+=64) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=62; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<4; j++,x+=64) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_256_proj_point_dbl_8(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_256_map_8(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+    sp_digit x[8];
+    sp_digit y[8];
+    sp_table_entry_256 table[16];
+    uint32_t cnt;
+    int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_256_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_256[i].set)
+            continue;
+
+        if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_256[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_256[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 8 * 5];
+    sp_cache_256_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_256(g, &cache);
+        if (cache->cnt == 2)
+            sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+        }
+        else {
+            err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_256_proj_to_affine_8(t, tmp);
+
+        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_256_proj_point_dbl_n_8(t, 32, tmp);
+            sp_256_proj_to_affine_8(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+                sp_256_proj_to_affine_8(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_256_point_free_8(s2, 0, heap);
+    sp_256_point_free_8(s1, 0, heap);
+    sp_256_point_free_8( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
+    sp_digit td[2 * 8 * 5];
+#endif
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+        y = 0;
+        for (j=0,x=31; j<8; j++,x+=32) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=30; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=32) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_256_proj_point_dbl_8(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_256_map_8(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+    sp_digit x[8];
+    sp_digit y[8];
+    sp_table_entry_256 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_256_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_256[i].set)
+            continue;
+
+        if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_256[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_256[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 8 * 5];
+    sp_cache_256_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_256(g, &cache);
+        if (cache->cnt == 2)
+            sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+        }
+        else {
+            err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#endif
+    sp_point_256* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(k, 8, km);
+        sp_256_point_from_ecc_point_8(point, gm);
+
+            err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_to_ecc_point_8(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[16] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+        0xa53755c6,0x18905f76 },
+      { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+        0x25885d85,0x8571ff18 } },
+    /* 2 */
+    { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+        0xfd1b667f,0x2f5e6961 },
+      { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+        0x8d6f0f7b,0xf648f916 } },
+    /* 3 */
+    { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+        0x133d0015,0x5abe0285 },
+      { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+        0x6b6f7383,0x94bb725b } },
+    /* 4 */
+    { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+        0x21d324f6,0x61d587d4 },
+      { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+        0x4621efbe,0xfa11fe12 } },
+    /* 5 */
+    { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+        0x1f13bedc,0x586eb04c },
+      { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+        0x70864f11,0x19d5ac08 } },
+    /* 6 */
+    { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+        0xc3b266b1,0xbb6de651 },
+      { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+        0x5d18b99b,0x60b4619a } },
+    /* 7 */
+    { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+        0xaeebffcd,0x9d0f27b2 },
+      { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+        0x356ec48d,0x244a566d } },
+    /* 8 */
+    { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+        0xcd42ab1b,0x803f3e02 },
+      { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+        0x5067adc1,0xc097440e } },
+    /* 9 */
+    { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+        0x915f1f30,0xf1af32d5 },
+      { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+        0xe2d41c8b,0x23d0f130 } },
+    /* 10 */
+    { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+        0x7990216a,0x50bbb4d9 },
+      { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+        0x01fe49c3,0x2b100118 } },
+    /* 11 */
+    { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+        0x83fbae0c,0xdd558999 },
+      { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+        0x149d6041,0xe6e4c551 } },
+    /* 12 */
+    { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+        0xdb7e63af,0xfad27148 },
+      { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+        0x9f0e1a84,0x77387de3 } },
+    /* 13 */
+    { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+        0xbef0c47e,0xb37b85c0 },
+      { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+        0xf9f628d5,0x9c135ac8 } },
+    /* 14 */
+    { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+        0x91ece900,0xc109f9cb },
+      { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+        0x2eee1ee1,0x9bc3344f } },
+    /* 15 */
+    { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+        0x5f1a4cc1,0x29591d52 },
+      { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+        0x18ef332c,0x6376551f } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+                                      k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+        0xa53755c6,0x18905f76 },
+      { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+        0x25885d85,0x8571ff18 } },
+    /* 2 */
+    { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
+        0xdbdf58e9,0xd953c50d },
+      { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
+        0x9eb288f3,0x863ebb7e } },
+    /* 3 */
+    { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
+        0xb5ff80a0,0x00076055 },
+      { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
+        0x34373ee0,0x83087761 } },
+    /* 4 */
+    { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+        0xfd1b667f,0x2f5e6961 },
+      { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+        0x8d6f0f7b,0xf648f916 } },
+    /* 5 */
+    { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+        0x133d0015,0x5abe0285 },
+      { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+        0x6b6f7383,0x94bb725b } },
+    /* 6 */
+    { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
+        0x2f7dc4ef,0xcdd6bbcb },
+      { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
+        0x4bdae5f6,0xa361bebd } },
+    /* 7 */
+    { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
+        0xc4b5292c,0xba12ca09 },
+      { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
+        0x701fef4b,0x53ebb99d } },
+    /* 8 */
+    { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
+        0x06d54831,0x8589fb92 },
+      { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
+        0x02541c4f,0xebb0696d } },
+    /* 9 */
+    { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
+        0xd1b27da3,0xeb2820cb },
+      { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
+        0x55a7da1d,0x1f28289b } },
+    /* 10 */
+    { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
+        0x05e54d63,0x337a4b59 },
+      { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
+        0xf4c2fbd6,0x0d65e0d5 } },
+    /* 11 */
+    { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
+        0x52f4a232,0xc23da242 },
+      { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
+        0xc790cff1,0x19de3b8c } },
+    /* 12 */
+    { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
+        0x91fccbfd,0xe34dcbd4 },
+      { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
+        0x7b4e0f7f,0xe7641f44 } },
+    /* 13 */
+    { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
+        0x052a57bf,0x4a12df57 },
+      { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
+        0xbb5bea46,0x6af5aa93 } },
+    /* 14 */
+    { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
+        0x66a44013,0x5fe3475a },
+      { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
+        0xecfea916,0xb544e308 } },
+    /* 15 */
+    { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
+        0xa6b0c20b,0xe0b6b2bd },
+      { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
+        0x25a63774,0x71c023de } },
+    /* 16 */
+    { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+        0x21d324f6,0x61d587d4 },
+      { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+        0x4621efbe,0xfa11fe12 } },
+    /* 17 */
+    { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+        0x1f13bedc,0x586eb04c },
+      { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+        0x70864f11,0x19d5ac08 } },
+    /* 18 */
+    { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
+        0x7f9c563f,0xe7c0073f },
+      { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
+        0xc65b3c0a,0xe08504fe } },
+    /* 19 */
+    { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
+        0x5b0996b4,0x78f01882 },
+      { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
+        0x7e94747a,0x43a773b8 } },
+    /* 20 */
+    { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+        0xc3b266b1,0xbb6de651 },
+      { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+        0x5d18b99b,0x60b4619a } },
+    /* 21 */
+    { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+        0xaeebffcd,0x9d0f27b2 },
+      { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+        0x356ec48d,0x244a566d } },
+    /* 22 */
+    { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
+        0x3581ef69,0x45e58c87 },
+      { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
+        0xc1e4b7a4,0xc040e21c } },
+    /* 23 */
+    { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
+        0x682c6ec7,0x1cdf5c97 },
+      { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
+        0xa92dff3d,0x046755f8 } },
+    /* 24 */
+    { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
+        0x3b83a5f3,0x046e5e11 },
+      { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
+        0x303d005b,0x6e0106c3 } },
+    /* 25 */
+    { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
+        0xe901cf1f,0x442594ed },
+      { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
+        0x4c2ee68e,0xa796fa51 } },
+    /* 26 */
+    { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
+        0xc69766e9,0xe4ad2da9 },
+      { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
+        0xc37b5143,0xc5e94046 } },
+    /* 27 */
+    { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
+        0xdb464747,0x63283daf },
+      { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
+        0x1981a938,0x68bd19ab } },
+    /* 28 */
+    { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
+        0x3c6fdfd6,0x495292f5 },
+      { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
+        0x26036837,0x0ec7530d } },
+    /* 29 */
+    { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
+        0x64863f0b,0x0f6207a6 },
+      { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
+        0x08ed6dcf,0xff0db072 } },
+    /* 30 */
+    { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
+        0x88740ea3,0x313b513c },
+      { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
+        0x86f19f81,0x2d3abcf9 } },
+    /* 31 */
+    { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
+        0xded98cdf,0xc036fa10 },
+      { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
+        0xb6d40194,0xa6b2a2c4 } },
+    /* 32 */
+    { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
+        0xaf7c9860,0x810ee252 },
+      { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
+        0x92731745,0xd485717a } },
+    /* 33 */
+    { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
+        0x2f9a604e,0x6a6045a7 },
+      { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
+        0xf9e15790,0xd3e45cfa } },
+    /* 34 */
+    { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
+        0xe3c2c19c,0x207755de },
+      { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
+        0x7154b00d,0x48dc5ee5 } },
+    /* 35 */
+    { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
+        0xdff6f445,0xf2fb0aed },
+      { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
+        0xdb28d525,0xa13e9015 } },
+    /* 36 */
+    { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
+        0x1497526f,0x2bf0d6b0 },
+      { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
+        0x162fe89f,0x42a94a5a } },
+    /* 37 */
+    { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
+        0xc65ede3d,0x2c2dd969 },
+      { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
+        0x42c56dbc,0xf437fa1f } },
+    /* 38 */
+    { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
+        0x54707aa8,0xaaf45b33 },
+      { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
+        0xf4f272bc,0xcdf6310d } },
+    /* 39 */
+    { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
+        0xda9e2ff2,0xf0d008ba },
+      { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
+        0xca887b8b,0x5bd5c2f5 } },
+    /* 40 */
+    { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
+        0xa09e4719,0xaa12dfc8 },
+      { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
+        0xe48ca901,0x6c036e73 } },
+    /* 41 */
+    { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
+        0x96afbe24,0x292ff658 },
+      { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
+        0x311b7276,0x644e0c90 } },
+    /* 42 */
+    { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
+        0xcab79a77,0xf25ae793 },
+      { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
+        0x13db0a3e,0x39b8e653 } },
+    /* 43 */
+    { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
+        0x0f19db06,0x39122f2f },
+      { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
+        0xce80ff8d,0x8de80af8 } },
+    /* 44 */
+    { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
+        0x2e368c04,0x87194906 },
+      { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
+        0x5b74fde1,0xfc315e6a } },
+    /* 45 */
+    { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
+        0xee389088,0xe6d4a7ad },
+      { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
+        0x9be2ae57,0x35dfaf9a } },
+    /* 46 */
+    { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
+        0x1c830d2b,0x1da5c7d7 },
+      { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
+        0xdbf4b9d6,0x7077c0fd } },
+    /* 47 */
+    { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
+        0xe50efe44,0x53a8632e },
+      { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
+        0x34e1fcc1,0x028ca76d } },
+    /* 48 */
+    { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
+        0x6962f046,0x04c17cd8 },
+      { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
+        0xfed97474,0xf7ba4de9 } },
+    /* 49 */
+    { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
+        0x52131c41,0xe31f9600 },
+      { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
+        0xce34d47b,0xaa3a6259 } },
+    /* 50 */
+    { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
+        0x7e79daee,0x2398dd62 },
+      { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
+        0x1c046210,0x5717f5b2 } },
+    /* 51 */
+    { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
+        0x0e3c28de,0x660a2c56 },
+      { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
+        0x4f522453,0x624ee54c } },
+    /* 52 */
+    { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
+        0x92bdfbc0,0x4f392afb },
+      { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
+        0xccdb399c,0x8a3e7977 } },
+    /* 53 */
+    { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
+        0x70c24404,0x3888d023 },
+      { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
+        0x18102336,0xa5e62e47 } },
+    /* 54 */
+    { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
+        0x466a5adc,0x2c4768e6 },
+      { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
+        0xf9e652a0,0x7b5e6441 } },
+    /* 55 */
+    { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
+        0x0c8d744a,0xb8af73cb },
+      { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
+        0x7f3f0895,0xa036395f } },
+    /* 56 */
+    { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
+        0x875fb533,0x4be36b01 },
+      { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
+        0x1bdc00c0,0x8cbc9a87 } },
+    /* 57 */
+    { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
+        0x0c0835f8,0x44e7553e },
+      { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
+        0x5eb8fc18,0x470a683a } },
+    /* 58 */
+    { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
+        0xc63dc6ef,0x16410690 },
+      { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
+        0x7abcbb4f,0xd73479fd } },
+    /* 59 */
+    { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
+        0x0771666b,0x816469e3 },
+      { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
+        0xf0dd3f9c,0x0a36dd23 } },
+    /* 60 */
+    { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
+        0xfdbab118,0xe331dfd6 },
+      { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
+        0x492e3389,0xd3b4782a } },
+    /* 61 */
+    { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
+        0x4c86a5bd,0x7281275a },
+      { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
+        0xce145059,0x2c062e7e } },
+    /* 62 */
+    { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
+        0x2c4e7ef1,0x282a35f9 },
+      { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
+        0x554d2abd,0xc71cd513 } },
+    /* 63 */
+    { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
+        0xcf47f3a3,0xc50f6740 },
+      { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
+        0x212958dc,0xb9ecb3a7 } },
+    /* 64 */
+    { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+        0xcd42ab1b,0x803f3e02 },
+      { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+        0x5067adc1,0xc097440e } },
+    /* 65 */
+    { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+        0x915f1f30,0xf1af32d5 },
+      { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+        0xe2d41c8b,0x23d0f130 } },
+    /* 66 */
+    { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
+        0xc0a3fadd,0xb0288dd6 },
+      { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
+        0xf408c8d2,0xffd3724f } },
+    /* 67 */
+    { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
+        0xd78c26df,0xf5590f4a },
+      { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
+        0xf6f74a20,0x18d6da54 } },
+    /* 68 */
+    { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+        0x7990216a,0x50bbb4d9 },
+      { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+        0x01fe49c3,0x2b100118 } },
+    /* 69 */
+    { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+        0x83fbae0c,0xdd558999 },
+      { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+        0x149d6041,0xe6e4c551 } },
+    /* 70 */
+    { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
+        0x07ed56ff,0x51e00db1 },
+      { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
+        0x49829177,0xe22f4241 } },
+    /* 71 */
+    { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
+        0x52dc48c9,0xf709373d },
+      { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
+        0xe7275b11,0xbd52d288 } },
+    /* 72 */
+    { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
+        0xc8aa77a6,0xa0d0f8e4 },
+      { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
+        0x946d6a00,0xa56c78c7 } },
+    /* 73 */
+    { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
+        0x731a367a,0xd8befdf8 },
+      { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
+        0xce9f6478,0x854a68a5 } },
+    /* 74 */
+    { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
+        0x98846a95,0x5cacea0b },
+      { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
+        0x35e4efa9,0xe4982d12 } },
+    /* 75 */
+    { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
+        0x16b20499,0x8046b7f6 },
+      { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
+        0x9082af55,0xeb17ca7b } },
+    /* 76 */
+    { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
+        0xfab5e131,0x097b00ba },
+      { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
+        0xafdbcc9e,0xf95c747b } },
+    /* 77 */
+    { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
+        0x566ed837,0x3512601e },
+      { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
+        0x6068ab6b,0x0ef97123 } },
+    /* 78 */
+    { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
+        0x3b4fbc95,0xfc16d933 },
+      { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
+        0xb95d7a17,0x14ca4af1 } },
+    /* 79 */
+    { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
+        0xf59c231d,0x4057b063 },
+      { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
+        0xf1330b13,0x1c3b5d64 } },
+    /* 80 */
+    { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+        0xdb7e63af,0xfad27148 },
+      { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+        0x9f0e1a84,0x77387de3 } },
+    /* 81 */
+    { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+        0xbef0c47e,0xb37b85c0 },
+      { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+        0xf9f628d5,0x9c135ac8 } },
+    /* 82 */
+    { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
+        0xc433851f,0x5721361f },
+      { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
+        0xe6bb11bd,0xdcbac3c9 } },
+    /* 83 */
+    { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
+        0x2d626862,0xb8c1c89e },
+      { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
+        0x2f9422d4,0x5d23bbda } },
+    /* 84 */
+    { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+        0x91ece900,0xc109f9cb },
+      { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+        0x2eee1ee1,0x9bc3344f } },
+    /* 85 */
+    { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+        0x5f1a4cc1,0x29591d52 },
+      { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+        0x18ef332c,0x6376551f } },
+    /* 86 */
+    { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
+        0x08e2987a,0xbdb79dc8 },
+      { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
+        0xadd3c14a,0x8ee86001 } },
+    /* 87 */
+    { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
+        0x6f77aa4b,0x92e51d7a },
+      { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
+        0x0a56aaaa,0x5182f86f } },
+    /* 88 */
+    { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
+        0x4073a6f2,0x91dcab5d },
+      { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
+        0x97974f2b,0x17a0cedb } },
+    /* 89 */
+    { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
+        0x7f4cdf41,0x2e8ce36c },
+      { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
+        0x34f668f3,0xf4ccc6cb } },
+    /* 90 */
+    { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
+        0x9a0df3c9,0xac0db488 },
+      { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
+        0x94c974a2,0x95a64a61 } },
+    /* 91 */
+    { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
+        0x29210677,0x231e54ba },
+      { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
+        0xd8a731e1,0xab0be032 } },
+    /* 92 */
+    { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
+        0x2cf6a679,0xf1bcc880 },
+      { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
+        0x5aebb271,0x85169469 } },
+    /* 93 */
+    { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
+        0xdaad55d8,0x8f67d9d2 },
+      { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
+        0xc0728b5d,0xf84572b9 } },
+    /* 94 */
+    { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
+        0x616b2c19,0xedee2710 },
+      { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
+        0x44ebd7f4,0x9fd27e9b } },
+    /* 95 */
+    { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
+        0x958ff387,0xa40c2fb6 },
+      { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
+        0x7dc6decf,0x99bc9bb8 } },
+    /* 96 */
+    { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
+        0xa16d7e64,0x9abe210b },
+      { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
+        0x87f344b0,0x7881c257 } },
+    /* 97 */
+    { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
+        0xa30e8940,0x15e6e319 },
+      { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
+        0x191172ce,0x0e55facf } },
+    /* 98 */
+    { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
+        0x6fe96577,0xd73d0976 },
+      { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
+        0x8f15a50b,0x9250a374 } },
+    /* 99 */
+    { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
+        0xc1cc8c0b,0x77414082 },
+      { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
+        0x12eb20b9,0x8cb04f4d } },
+    /* 100 */
+    { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
+        0x47123b51,0xe4e429ef },
+      { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
+        0x3c6e6552,0x37bca2ff } },
+    /* 101 */
+    { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
+        0x3002b22a,0x59913edc },
+      { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
+        0xb013e226,0x43786e4a } },
+    /* 102 */
+    { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
+        0xb7e79e7a,0x8638ca98 },
+      { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
+        0x7b3aa6f0,0x1ecdd36a } },
+    /* 103 */
+    { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
+        0xd459f32d,0xd85d0f85 },
+      { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
+        0xb4ed3c62,0xa04f19c3 } },
+    /* 104 */
+    { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
+        0x5c0950b0,0x92b2eeea },
+      { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
+        0x5834276c,0x1ee78221 } },
+    /* 105 */
+    { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
+        0x57a6e150,0xf3f2ced8 },
+      { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
+        0x3da3e210,0x0f56a454 } },
+    /* 106 */
+    { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
+        0x1969e263,0xbd8f1741 },
+      { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
+        0x30ccfa09,0x2d1a1c35 } },
+    /* 107 */
+    { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
+        0xb91fba46,0xa107a65e },
+      { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
+        0xf87a9af2,0x183d760a } },
+    /* 108 */
+    { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
+        0xc269d754,0x1d44179d },
+      { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
+        0x9606d262,0x771f9cc2 } },
+    /* 109 */
+    { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
+        0x0362718e,0x64427a31 },
+      { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
+        0x6ae90d6d,0x49d9b749 } },
+    /* 110 */
+    { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
+        0x3f605445,0x9037d81b },
+      { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
+        0x7cc0639c,0x08c3de6a } },
+    /* 111 */
+    { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
+        0x45796b2f,0xc6909442 },
+      { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
+        0xcafe3ac0,0x3fa3db02 } },
+    /* 112 */
+    { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
+        0xfdb808ff,0xc5c4bdb0 },
+      { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
+        0x46c2b6b5,0x2d56db94 } },
+    /* 113 */
+    { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
+        0xe503ba42,0x0f56bd9d },
+      { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
+        0x1173b5f1,0x4003bb9d } },
+    /* 114 */
+    { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
+        0xa07f2f9e,0x53765522 },
+      { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
+        0x6c5d4549,0x7a056f58 } },
+    /* 115 */
+    { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
+        0x7a1a2675,0x77d482f1 },
+      { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
+        0x2b38b0e4,0x4115012b } },
+    /* 116 */
+    { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
+        0xfbea0946,0xcdf04572 },
+      { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
+        0x97383109,0xee703dda } },
+    /* 117 */
+    { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
+        0xa162ce21,0x2a0ad89d },
+      { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
+        0xac2b4659,0xd62d0b67 } },
+    /* 118 */
+    { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
+        0x991c2426,0xb39a23f2 },
+      { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
+        0xc0674cc5,0x04ed0092 } },
+    /* 119 */
+    { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
+        0x0177c387,0xa0a91fc1 },
+      { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
+        0x9ed20c41,0x084cf988 } },
+    /* 120 */
+    { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
+        0x73abf77e,0xd57955b2 },
+      { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
+        0x02d141f1,0x8e14ea42 } },
+    /* 121 */
+    { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
+        0x2aa4d158,0x597e1a37 },
+      { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
+        0x199b4dea,0xca3f0236 } },
+    /* 122 */
+    { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
+        0x309c07e4,0xbde7fd7e },
+      { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
+        0x0a7dd198,0xb623ad0e } },
+    /* 123 */
+    { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
+        0x58ec137b,0xd6aa2e46 },
+      { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
+        0x2dcc513a,0x111662e0 } },
+    /* 124 */
+    { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
+        0x94b750f8,0xdb3ee1cb },
+      { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
+        0x52206a59,0x886a6442 } },
+    /* 125 */
+    { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
+        0x018a17bc,0xa70cf4eb },
+      { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
+        0xd1747b77,0xaa4772ab } },
+    /* 126 */
+    { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
+        0x30faf974,0x611a6ddc },
+      { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
+        0x16429c88,0x5cfffaf8 } },
+    /* 127 */
+    { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
+        0x7dc1994c,0x6e5a6b23 },
+      { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
+        0x242dabcc,0x481a238d } },
+    /* 128 */
+    { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
+        0xe0cdf943,0x2c41114c },
+      { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
+        0x42ff9297,0x20477abf } },
+    /* 129 */
+    { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
+        0xc77396b6,0xac66409a },
+      { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
+        0xcc122f85,0xce8e6975 } },
+    /* 130 */
+    { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
+        0x250bb4a8,0x08fde365 },
+      { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
+        0x565d6cd7,0x2f7e2fd2 } },
+    /* 131 */
+    { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
+        0x907702ae,0xc65be92e },
+      { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
+        0xd1193b3a,0x4bff8e47 } },
+    /* 132 */
+    { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
+        0x5772967d,0x3e4e4ae6 },
+      { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
+        0x58ec6028,0x5388aefd } },
+    /* 133 */
+    { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
+        0x4f75be0e,0x5cf908d1 },
+      { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
+        0x60f00ce2,0xa698ba40 } },
+    /* 134 */
+    { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
+        0x7aebad8d,0xb142ef8a },
+      { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
+        0x58515075,0xd1896a96 } },
+    /* 135 */
+    { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
+        0x7981da39,0x267b0e0b },
+      { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
+        0xa1119393,0xb54e287a } },
+    /* 136 */
+    { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
+        0x5f87d4e6,0x84abb28b },
+      { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
+        0x17655640,0xe5436f67 } },
+    /* 137 */
+    { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
+        0x5b9ce99e,0x0404f68b },
+      { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
+        0x0ac1c701,0x3a4263df } },
+    /* 138 */
+    { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
+        0x905ea367,0x0ca8fd3f },
+      { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
+        0x4ddb0c33,0x96dca264 } },
+    /* 139 */
+    { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
+        0x3aad59dc,0x4363e212 },
+      { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
+        0xd8bb98c4,0x840e115c } },
+    /* 140 */
+    { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
+        0x30ded6d4,0x5e0d6abd },
+      { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
+        0x2945a25a,0x7dea48f4 } },
+    /* 141 */
+    { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
+        0xebfd16d1,0xabc2a2be },
+      { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
+        0x6c7eefc1,0x4ea35394 } },
+    /* 142 */
+    { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
+        0x1c94ffc3,0x3a76e689 },
+      { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
+        0x465e6464,0x8212a10a } },
+    /* 143 */
+    { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
+        0x599cb164,0xaa7cab71 },
+      { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
+        0xfe0617c3,0x40e38073 } },
+    /* 144 */
+    { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
+        0xb3055526,0xe3604700 },
+      { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
+        0xa3dee15f,0x6542d677 } },
+    /* 145 */
+    { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
+        0x09bb6f21,0xa6534aee },
+      { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
+        0xdc9aef22,0xf3cb672f } },
+    /* 146 */
+    { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
+        0xaae870e7,0x7cafaa2e },
+      { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
+        0xb9bd522e,0x0aab13c1 } },
+    /* 147 */
+    { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
+        0x847012e9,0x4b91a602 },
+      { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
+        0x72321cab,0x49534c53 } },
+    /* 148 */
+    { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
+        0xd65ac5ee,0xcaf46c4f },
+      { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
+        0x04c6770f,0x14ce9e57 } },
+    /* 149 */
+    { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
+        0x3e4c9a71,0x1bb708a5 },
+      { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
+        0xda300102,0xf9d126f2 } },
+    /* 150 */
+    { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
+        0x729ecc69,0x807afcb9 },
+      { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
+        0x6568cd8c,0x751adcd1 } },
+    /* 151 */
+    { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
+        0x2537743f,0x29ec4468 },
+      { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
+        0x92a4077d,0xff9370e3 } },
+    /* 152 */
+    { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
+        0xa2a9d01a,0x9776478b },
+      { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
+        0xac2f82fa,0x74a6313f } },
+    /* 153 */
+    { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
+        0x0ff4863d,0xab75be15 },
+      { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
+        0x0b4459f6,0x4ebeac2e } },
+    /* 154 */
+    { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
+        0x2c1baffc,0xdf99887b },
+      { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
+        0x779f4058,0x27b040a7 } },
+    /* 155 */
+    { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
+        0xe4cfa3f5,0xb393dd37 },
+      { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
+        0xd0463419,0x09588c12 } },
+    /* 156 */
+    { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
+        0xdb9f648b,0x81c879a9 },
+      { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
+        0x5fc11bc4,0xfa0d48f5 } },
+    /* 157 */
+    { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
+        0xb6a367d6,0x8ea0e156 },
+      { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
+        0xfa00b5ac,0x3f5ab924 } },
+    /* 158 */
+    { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
+        0x2b74256e,0x8bc76887 },
+      { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
+        0x60fcf34f,0xb386f190 } },
+    /* 159 */
+    { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
+        0x1b069c4d,0x4cb460f7 },
+      { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
+        0x95ef5223,0x52c0d508 } },
+    /* 160 */
+    { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
+        0x2bb09c0b,0x4ac3c938 },
+      { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
+        0xe39705f4,0x380d94c7 } },
+    /* 161 */
+    { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
+        0xde2637af,0x2ce3e171 },
+      { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
+        0x0b624e4d,0x2e6cd852 } },
+    /* 162 */
+    { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
+        0x42c69d54,0xca177547 },
+      { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
+        0x9cab2ce6,0xa976a713 } },
+    /* 163 */
+    { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
+        0x0a1f4999,0x8720a717 },
+      { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
+        0xc769893c,0x9719ef29 } },
+    /* 164 */
+    { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
+        0xe15704c1,0xa5072976 },
+      { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
+        0xf7b77725,0x99389c9d } },
+    /* 165 */
+    { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
+        0x202c82e4,0xa88806aa },
+      { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
+        0x4738dcfe,0x0043bffb } },
+    /* 166 */
+    { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
+        0xba6c4866,0x52f3ef01 },
+      { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
+        0x9ef27e75,0x3296bd89 } },
+    /* 167 */
+    { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
+        0xaee571e9,0x3b90febf },
+      { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
+        0x9f810b18,0x6e88069d } },
+    /* 168 */
+    { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
+        0xdefaad13,0xa7222bea },
+      { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
+        0xbc2ac690,0xbe94d523 } },
+    /* 169 */
+    { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
+        0x9be8c766,0x7782defe },
+      { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
+        0xa2892e4b,0x03838567 } },
+    /* 170 */
+    { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
+        0xadf7b420,0xdbd986c4 },
+      { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
+        0x6860bbd0,0x8e24d3c4 } },
+    /* 171 */
+    { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
+        0x407bafc8,0x541a99c4 },
+      { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
+        0xf57d35d1,0xc0092c49 } },
+    /* 172 */
+    { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
+        0x7286944d,0x75e40634 },
+      { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
+        0xc7848586,0x5b7cb658 } },
+    /* 173 */
+    { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
+        0x8df097a1,0x7ae13eba },
+      { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
+        0xe2a8e3fd,0x787d8074 } },
+    /* 174 */
+    { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
+        0x9ef28484,0x5c222819 },
+      { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
+        0xbaf0f2b0,0xe45d37ab } },
+    /* 175 */
+    { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
+        0x84dfb9d3,0xed7bc122 },
+      { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
+        0x45ca6d27,0xaac97cc9 } },
+    /* 176 */
+    { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
+        0x1163dc4e,0x318f97b3 },
+      { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
+        0x9a84ff4d,0xfa41faa1 } },
+    /* 177 */
+    { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
+        0x1d26e9e2,0x38bb6b2c },
+      { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
+        0xce7601a5,0x94dd0905 } },
+    /* 178 */
+    { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
+        0xd25c2ae9,0x92077867 },
+      { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
+        0xd29beb51,0x81e8428b } },
+    /* 179 */
+    { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
+        0xdbbfa4b1,0x1b94ab62 },
+      { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
+        0x055590ee,0x06a38e28 } },
+    /* 180 */
+    { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
+        0x83d9d4f8,0xa7b36c20 },
+      { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
+        0xa2822a20,0xbe54c6b4 } },
+    /* 181 */
+    { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
+        0xeae022bb,0xbf30a5ab },
+      { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
+        0x2732d13a,0xd1c820de } },
+    /* 182 */
+    { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
+        0x68a18da3,0xb7d17bed },
+      { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
+        0x6412cc64,0x3997fd5e } },
+    /* 183 */
+    { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
+        0x3c6c13e8,0x0eeb8929 },
+      { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
+        0xc922b6ef,0x228916f8 } },
+    /* 184 */
+    { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
+        0x6e93097e,0xec05ad1d },
+      { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
+        0x7ff11b37,0x7d314156 } },
+    /* 185 */
+    { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
+        0x9bc1d7a3,0xe9ce66fc },
+      { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
+        0x72280651,0xd9650b01 } },
+    /* 186 */
+    { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
+        0x804eb7a2,0x14d6699a },
+      { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
+        0x0d43598a,0x6f4c6841 } },
+    /* 187 */
+    { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
+        0x61189abb,0x4c4350fd },
+      { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
+        0x5a3118b5,0xa726d242 } },
+    /* 188 */
+    { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
+        0xcc6cf392,0x13639e82 },
+      { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
+        0xc1a335a3,0xca9365e1 } },
+    /* 189 */
+    { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
+        0x970b72a5,0x9ce29c34 },
+      { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
+        0xab42af98,0x48c4abd7 } },
+    /* 190 */
+    { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
+        0xf67b33cb,0x78017c32 },
+      { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
+        0xde5c1c04,0x53cd0454 } },
+    /* 191 */
+    { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
+        0xd3d7fa8f,0xeea465c1 },
+      { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
+        0x7ae69193,0x1b6e42a4 } },
+    /* 192 */
+    { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
+        0x187fbd3d,0x0224da14 },
+      { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
+        0x42bfff33,0x60838ef0 } },
+    /* 193 */
+    { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
+        0x2d331643,0x636eb202 },
+      { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
+        0x39218bac,0x8844eeb6 } },
+    /* 194 */
+    { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
+        0x51fb789e,0x27ba83dc },
+      { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
+        0x87f3a4ab,0xadb62d34 } },
+    /* 195 */
+    { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
+        0x75e7c8b2,0xb990fd76 },
+      { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
+        0x4d10d18d,0x81707ef9 } },
+    /* 196 */
+    { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
+        0xd5a8aa5c,0x3792daea },
+      { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
+        0x94b001ba,0x5abd635e } },
+    /* 197 */
+    { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
+        0x846ab610,0x5995bf21 },
+      { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
+        0xd483411e,0x44c32ca2 } },
+    /* 198 */
+    { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
+        0x8082a54c,0x1f2162fb },
+      { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
+        0xc3e907c9,0x8f1d402b } },
+    /* 199 */
+    { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
+        0x926edbf9,0xb1980f43 },
+      { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
+        0x37448e45,0x2828ad9b } },
+    /* 200 */
+    { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
+        0x5a14b390,0x4973f127 },
+      { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
+        0xdb168ac7,0x6dac8ed0 } },
+    /* 201 */
+    { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
+        0x20b9de4c,0x4b23ef59 },
+      { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
+        0xddf49a4e,0x4dd71534 } },
+    /* 202 */
+    { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
+        0x2f4a4dbb,0xfd317000 },
+      { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
+        0x9569f365,0x14fac58c } },
+    /* 203 */
+    { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
+        0x36abda50,0xed7c7651 },
+      { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
+        0x4d2e9f53,0xfefcb7f7 } },
+    /* 204 */
+    { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
+        0x87e0d80b,0x1801a57e },
+      { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
+        0x1ead1064,0x9f8fc11e } },
+    /* 205 */
+    { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
+        0x3d3a69a9,0xa9d3809d },
+      { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
+        0xe1178ef7,0x3006b9ae } },
+    /* 206 */
+    { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
+        0x45f8f761,0x0ab85fd7 },
+      { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
+        0x11e942c2,0xb122d675 } },
+    /* 207 */
+    { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
+        0x097dbaec,0x9f599dc1 },
+      { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
+        0x8a294b78,0x7d5528e0 } },
+    /* 208 */
+    { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
+        0x303f1730,0x28ccea01 },
+      { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
+        0xa1d013bf,0xc18baf48 } },
+    /* 209 */
+    { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
+        0xb7a9596b,0x9def809d },
+      { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
+        0x68808ce5,0x0357f8b0 } },
+    /* 210 */
+    { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
+        0x1b489887,0xe4a01add },
+      { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
+        0xce10cc30,0x466d7d79 } },
+    /* 211 */
+    { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
+        0x451ead1a,0xc672a522 },
+      { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
+        0xf2a67513,0x5e3d64fa } },
+    /* 212 */
+    { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
+        0xeb8e42fc,0x6c8a7a95 },
+      { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
+        0xad82ca91,0x348ae422 } },
+    /* 213 */
+    { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
+        0xd9ef2d2e,0xc1074de0 },
+      { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
+        0xc9e54ffc,0xfbadfbdb } },
+    /* 214 */
+    { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
+        0x83716fcd,0xb7f976b4 },
+      { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
+        0xcafcc805,0xf4d41b2e } },
+    /* 215 */
+    { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
+        0xe0160f10,0x180824ea },
+      { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
+        0x83cf6d25,0x67e5f639 } },
+    /* 216 */
+    { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
+        0x04c11fc6,0x9fef789a },
+      { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
+        0xa99c4e20,0xbc80c181 } },
+    /* 217 */
+    { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
+        0x9f8cdf10,0x49270e62 },
+      { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
+        0x61372f7f,0xd2ee52f9 } },
+    /* 218 */
+    { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
+        0xe5abb733,0xdfb478be },
+      { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
+        0x08df473a,0xd9a140b4 } },
+    /* 219 */
+    { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
+        0x623f4b1a,0x760c058d },
+      { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
+        0x8f190409,0x7141982d } },
+    /* 220 */
+    { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
+        0x89d54e47,0x3af9d1ce },
+      { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
+        0x73957dd6,0xb1f815c3 } },
+    /* 221 */
+    { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
+        0x1543f052,0xa41aed14 },
+      { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
+        0x86fb60ef,0xd6e9c1dd } },
+    /* 222 */
+    { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
+        0xae9bf8c2,0x9c9c6e10 },
+      { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
+        0x40fa61b6,0x566bd596 } },
+    /* 223 */
+    { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
+        0xf525345e,0xcf2c7390 },
+      { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
+        0x8aa20979,0x02f51755 } },
+    /* 224 */
+    { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
+        0xe8d4d97d,0x14e9ada5 },
+      { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
+        0x8e9d9ae8,0xa0ad4fab } },
+    /* 225 */
+    { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
+        0x6e56ed1e,0xbcd530b8 },
+      { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
+        0x6979341d,0x909283cf } },
+    /* 226 */
+    { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
+        0xace1549a,0x35eeb7c9 },
+      { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
+        0x448ae864,0x9a8b2cf4 } },
+    /* 227 */
+    { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
+        0xd4491379,0x6bdb60f4 },
+      { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
+        0x94ba08a9,0x01ec3cfd } },
+    /* 228 */
+    { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
+        0x475464f6,0xd1acb1c0 },
+      { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
+        0x405626c2,0x7dcd079d } },
+    /* 229 */
+    { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
+        0x377d19b8,0x0bf53589 },
+      { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
+        0xe16686fc,0xd28be4d9 } },
+    /* 230 */
+    { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
+        0x510f88ce,0xd76007aa },
+      { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
+        0xb303bb01,0xf2b52f68 } },
+    /* 231 */
+    { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
+        0xcc5aed3a,0xd8dbe98e },
+      { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
+        0xee559705,0xe01593a3 } },
+    /* 232 */
+    { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
+        0xaeb8ef06,0xafec07b1 },
+      { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
+        0x6e2dbfdd,0xa71b9354 } },
+    /* 233 */
+    { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
+        0x628523d9,0x53a2005c },
+      { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
+        0x3d588e3d,0xbf47d19b } },
+    /* 234 */
+    { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
+        0x39c9a1b6,0x001c2c7f },
+      { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
+        0x86ffb99b,0xfdadf8e7 } },
+    /* 235 */
+    { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
+        0x5aa43c94,0x3a838e4d },
+      { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
+        0x873e1da3,0x3cdb8257 } },
+    /* 236 */
+    { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
+        0xf1f57fba,0x5a60cc89 },
+      { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
+        0xdbfd8fc0,0x922ff56f } },
+    /* 237 */
+    { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
+        0xf6c5cd62,0x72919a7d },
+      { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
+        0x3624089a,0x5e791780 } },
+    /* 238 */
+    { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
+        0xe24c2fab,0x4e0a5371 },
+      { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
+        0xd56604ee,0xf5ff7818 } },
+    /* 239 */
+    { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
+        0x533f5e64,0xe41df0e9 },
+      { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
+        0xac4f155f,0x8edd7d6e } },
+    /* 240 */
+    { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
+        0xed8aee96,0x1432c1ca },
+      { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
+        0x5ac8d2c6,0xcaef480b } },
+    /* 241 */
+    { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
+        0x8efae236,0xd0ba177e },
+      { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
+        0x1c54ae16,0xf31c957c } },
+    /* 242 */
+    { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
+        0x96e17c3a,0x013404cb },
+      { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
+        0x91933e6c,0x6f377c4b } },
+    /* 243 */
+    { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
+        0xd2d09506,0x6dba3e4e },
+      { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
+        0x3becf4a7,0xf13cf342 } },
+    /* 244 */
+    { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
+        0x274bbad3,0xc83fa9a9 },
+      { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
+        0x5d702683,0xb49d70f4 } },
+    /* 245 */
+    { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
+        0x0c30f1cf,0x59cfadbb },
+      { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
+        0x354a4b67,0x5babf362 } },
+    /* 246 */
+    { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
+        0x9026c8f0,0x6188c6a7 },
+      { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
+        0xdf50b9d9,0x993fe475 } },
+    /* 247 */
+    { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
+        0x4c80616b,0x81f76466 },
+      { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
+        0x5fe9060d,0x564a812a } },
+    /* 248 */
+    { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
+        0x00e51d6c,0x226bf3cf },
+      { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
+        0xff257836,0x68779f47 } },
+    /* 249 */
+    { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
+        0xeb092e0b,0x97bcb0d1 },
+      { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
+        0x0a784655,0xa872ffe8 } },
+    /* 250 */
+    { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
+        0xb732a36a,0x02812bfc },
+      { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
+        0xfe5396af,0x07391cc9 } },
+    /* 251 */
+    { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
+        0x7e6d2a08,0x355d2adc },
+      { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
+        0x7c2a3a79,0x3dc2b1e3 } },
+    /* 252 */
+    { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
+        0x3ccd846b,0xc4786910 },
+      { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
+        0xd5bb4d32,0xccc42968 } },
+    /* 253 */
+    { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
+        0xaa4871cf,0xe147eb42 },
+      { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
+        0x080e96e3,0x239ac047 } },
+    /* 254 */
+    { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
+        0xf5f7e59d,0xc55fa1a3 },
+      { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
+        0xd4f4b699,0x094cd99c } },
+    /* 255 */
+    { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
+        0x42abad33,0xb90a30b6 },
+      { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
+        0x1b7924f7,0x019f8b9a } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#endif
+    sp_point_256* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(k, 8, km);
+
+            err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_to_ecc_point_8(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_8(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r2, #1\n\t"
+        "ldr	r1, [%[a], #0]\n\t"
+        "add	r1, r2\n\t"
+        "mov	r2, #0\n\t"
+        "str	r1, [%[a], #0]\n\t"
+        "ldr	r1, [%[a], #4]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #4]\n\t"
+        "ldr	r1, [%[a], #8]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #8]\n\t"
+        "ldr	r1, [%[a], #12]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #12]\n\t"
+        "ldr	r1, [%[a], #16]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #16]\n\t"
+        "ldr	r1, [%[a], #20]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #20]\n\t"
+        "ldr	r1, [%[a], #24]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #24]\n\t"
+        "ldr	r1, [%[a], #28]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #28]\n\t"
+        :
+        : [a] "r" (a)
+        : "memory", "r1", "r2"
+    );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[32];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
+            if (sp_256_cmp_8(k, p256_order2) < 0) {
+                sp_256_add_one_8(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_256 inf;
+#endif
+#endif
+    sp_point_256* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_256* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_256_ecc_gen_k_8(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_to_ecc_point_8(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_256_point_free_8(infinity, 1, heap);
+#endif
+    sp_256_point_free_8(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 256 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<8 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#endif
+    sp_point_256* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 32U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(k, 8, priv);
+        sp_256_point_from_ecc_point_8(point, pub);
+            err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_256_to_bin(point->x, out);
+        *outLen = 32;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "add	r7, #32\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #32\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<8; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[16], t2[9];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[7];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
+    for (i=7; i>=0; i--) {
+        r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
+
+        sp_256_mul_d_8(t2, d, r1);
+        t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
+        t1[8 + i] -= t2[8];
+        sp_256_mask_8(t2, d, t1[8 + i]);
+        t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+        sp_256_mask_8(t2, d, t1[8 + i]);
+        t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_256_cmp_8(t1, d) >= 0;
+    sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_256_div_8(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_minus_2[8] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+    0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_256_mul_8(r, a, b);
+    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
+{
+    sp_256_sqr_8(r, a);
+    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_256_mont_sqr_order_8(r, a);
+    for (i=1; i<n; i++) {
+        sp_256_mont_sqr_order_8(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 8);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_order_8(t, t);
+        if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 8U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 8;
+    sp_digit* t3 = td + 4 * 8;
+    int i;
+
+    /* t = a^2 */
+    sp_256_mont_sqr_order_8(t, a);
+    /* t = a^3 = t * a */
+    sp_256_mont_mul_order_8(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_256_mont_sqr_n_order_8(t2, t, 2);
+    /* t3= a^f = t2 * t */
+    sp_256_mont_mul_order_8(t3, t2, t);
+    /* t2= a^f0 = t3 ^ 2 ^ 4 */
+    sp_256_mont_sqr_n_order_8(t2, t3, 4);
+    /* t = a^ff = t2 * t3 */
+    sp_256_mont_mul_order_8(t, t2, t3);
+    /* t3= a^ff00 = t ^ 2 ^ 8 */
+    sp_256_mont_sqr_n_order_8(t2, t, 8);
+    /* t = a^ffff = t2 * t */
+    sp_256_mont_mul_order_8(t, t2, t);
+    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+    sp_256_mont_sqr_n_order_8(t2, t, 16);
+    /* t = a^ffffffff = t2 * t */
+    sp_256_mont_mul_order_8(t, t2, t);
+    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
+    sp_256_mont_sqr_n_order_8(t2, t, 64);
+    /* t2= a^ffffffff00000000ffffffff = t2 * t */
+    sp_256_mont_mul_order_8(t2, t2, t);
+    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
+    sp_256_mont_sqr_n_order_8(t2, t2, 32);
+    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+    sp_256_mont_mul_order_8(t2, t2, t);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+    for (i=127; i>=112; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    sp_256_mont_mul_order_8(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+    for (i=107; i>=64; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    sp_256_mont_mul_order_8(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+    for (i=59; i>=32; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    sp_256_mont_mul_order_8(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+    for (i=27; i>=0; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+    sp_256_mont_mul_order_8(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 256 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*8];
+    sp_digit xd[2*8];
+    sp_digit kd[2*8];
+    sp_digit rd[2*8];
+    sp_digit td[3 * 2*8];
+    sp_point_256 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int32_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 8;
+        x = d + 2 * 8;
+        k = d + 4 * 8;
+        r = d + 6 * 8;
+        tmp = d + 8 * 8;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(e, 8, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 8, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_256_ecc_gen_k_8(rng, k);
+        }
+        else {
+            sp_256_from_mp(k, 8, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
+            sp_256_norm_8(r);
+            c = sp_256_cmp_8(r, p256_order);
+            sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+            sp_256_norm_8(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_256_mul_8(k, k, p256_norm_order);
+            err = sp_256_mod_8(k, k, p256_order);
+        }
+        if (err == MP_OKAY) {
+            sp_256_norm_8(k);
+            /* kInv = 1/k mod order */
+                sp_256_mont_inv_order_8(kInv, k, tmp);
+            sp_256_norm_8(kInv);
+
+            /* s = r * x + e */
+                sp_256_mul_8(x, x, r);
+            err = sp_256_mod_8(x, x, p256_order);
+        }
+        if (err == MP_OKAY) {
+            sp_256_norm_8(x);
+            carry = sp_256_add_8(s, e, x);
+            sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
+            sp_256_norm_8(s);
+            c = sp_256_cmp_8(s, p256_order);
+            sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+            sp_256_norm_8(s);
+
+            /* s = s * k^-1 mod order */
+                sp_256_mont_mul_order_8(s, s, kInv);
+            sp_256_norm_8(s);
+
+            /* Check that signature is usable. */
+            if (sp_256_iszero_8(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
+#endif
+    sp_256_point_free_8(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 256)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*8];
+    sp_digit u2d[2*8];
+    sp_digit sd[2*8];
+    sp_digit tmpd[2*8 * 5];
+    sp_point_256 p1d;
+    sp_point_256 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* p1;
+    sp_point_256* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+
+    err = sp_256_point_new_8(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 8;
+        u2  = d + 2 * 8;
+        s   = d + 4 * 8;
+        tmp = d + 6 * 8;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(u1, 8, hash, (int)hashLen);
+        sp_256_from_mp(u2, 8, r);
+        sp_256_from_mp(s, 8, sm);
+        sp_256_from_mp(p2->x, 8, pX);
+        sp_256_from_mp(p2->y, 8, pY);
+        sp_256_from_mp(p2->z, 8, pZ);
+
+        {
+            sp_256_mul_8(s, s, p256_norm_order);
+        }
+        err = sp_256_mod_8(s, s, p256_order);
+    }
+    if (err == MP_OKAY) {
+        sp_256_norm_8(s);
+        {
+            sp_256_mont_inv_order_8(s, s, tmp);
+            sp_256_mont_mul_order_8(u1, u1, s);
+            sp_256_mont_mul_order_8(u2, u2, s);
+        }
+
+            err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_256_proj_point_add_8(p1, p1, p2, tmp);
+            if (sp_256_iszero_8(p1->z)) {
+                if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
+                    sp_256_proj_point_dbl_8(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_256_from_mp(u2, 8, r);
+        err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
+        *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_256_from_mp(u2, 8, r);
+            carry = sp_256_add_8(u2, u2, p256_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_256_norm_8(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_256_cmp_8(u2, p256_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
+                                                                  p256_mp_mod);
+                        *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_256_point_free_8(p1, 0, heap);
+    sp_256_point_free_8(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*8];
+    sp_digit t2d[2*8];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 8;
+        t2 = d + 2 * 8;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_256_sqr_8(t1, point->y);
+        (void)sp_256_mod_8(t1, t1, p256_mod);
+        sp_256_sqr_8(t2, point->x);
+        (void)sp_256_mod_8(t2, t2, p256_mod);
+        sp_256_mul_8(t2, t2, point->x);
+        (void)sp_256_mod_8(t2, t2, p256_mod);
+        (void)sp_256_sub_8(t2, p256_mod, t2);
+        sp_256_mont_add_8(t1, t1, t2, p256_mod);
+
+        sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+        sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+        sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+
+        if (sp_256_cmp_8(t1, p256_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 pubd;
+#endif
+    sp_point_256* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_256_point_new_8(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_256_from_mp(pub->x, 8, pX);
+        sp_256_from_mp(pub->y, 8, pY);
+        sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+
+        err = sp_256_ecc_is_point_8(pub, NULL);
+    }
+
+    sp_256_point_free_8(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[8];
+    sp_point_256 pubd;
+    sp_point_256 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_256* pub;
+    sp_point_256* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_256_point_new_8(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_256_from_mp(pub->x, 8, pX);
+        sp_256_from_mp(pub->y, 8, pY);
+        sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+        sp_256_from_mp(priv, 8, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_256_iszero_8(pub->x) != 0) &&
+            (sp_256_iszero_8(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
+            sp_256_cmp_8(pub->y, p256_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_256_ecc_is_point_8(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_256_iszero_8(p->x) == 0) ||
+            (sp_256_iszero_8(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_256_cmp_8(p->x, pub->x) != 0 ||
+            sp_256_cmp_8(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 8 * 5];
+    sp_point_256 pd;
+    sp_point_256 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_256* p;
+    sp_point_256* q = NULL;
+    int err;
+
+    err = sp_256_point_new_8(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 8, pX);
+        sp_256_from_mp(p->y, 8, pY);
+        sp_256_from_mp(p->z, 8, pZ);
+        sp_256_from_mp(q->x, 8, qX);
+        sp_256_from_mp(q->y, 8, qY);
+        sp_256_from_mp(q->z, 8, qZ);
+
+            sp_256_proj_point_add_8(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(q, 0, NULL);
+    sp_256_point_free_8(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 8 * 2];
+    sp_point_256 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_256* p;
+    int err;
+
+    err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 8, pX);
+        sp_256_from_mp(p->y, 8, pY);
+        sp_256_from_mp(p->z, 8, pZ);
+
+            sp_256_proj_point_dbl_8(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 8 * 4];
+    sp_point_256 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_256* p;
+    int err;
+
+    err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 8, pX);
+        sp_256_from_mp(p->y, 8, pY);
+        sp_256_from_mp(p->z, 8, pZ);
+
+        sp_256_map_8(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_8(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 8];
+    sp_digit t2d[2 * 8];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 8;
+        t2 = d + 2 * 8;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xc */
+            sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xf0 */
+            sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xff */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xff00 */
+            sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffff */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xffff0000 */
+            sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000000 */
+            sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001 */
+            sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+            sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+            sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+            sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 8];
+    sp_digit yd[2 * 8];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 8;
+        y = d + 2 * 8;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_256_from_mp(x, 8, xm);
+        err = sp_256_mod_mul_norm_8(x, x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_256_mont_add_8(y, y, x, p256_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_256_mont_sqrt_8(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
+        sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
+        }
+
+        err = sp_256_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+    sp_digit x[2 * 12];
+    sp_digit y[2 * 12];
+    sp_digit z[2 * 12];
+    int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[12] = {
+    0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[12] = {
+    0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
+    0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[12] = {
+    0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[12] = {
+    0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[12] = {
+    0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
+    0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0xe88fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+    /* X ordinate */
+    {
+        0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
+        0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
+        0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[12] = {
+    0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
+    0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
+};
+#endif
+
+static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    int64_t* t;
+#else
+    int64_t t[12];
+#endif
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
+        t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
+        /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
+        t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
+        /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
+        t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
+        /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
+        t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
+        /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
+        t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] -  2 * (uint64_t)a[11];
+        /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
+        t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
+        /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
+        t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
+        /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
+        t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
+        /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
+        t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
+        /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
+        t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
+        /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
+        t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
+        /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
+        t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
+
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+        o     = t[11] >> 32; t[11] &= 0xffffffff;
+        t[0] += o;
+        t[1] -= o;
+        t[3] += o;
+        t[4] += o;
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+        r[0] = t[0];
+        r[1] = t[1];
+        r[2] = t[2];
+        r[3] = t[3];
+        r[4] = t[4];
+        r[5] = t[5];
+        r[6] = t[6];
+        r[7] = t[7];
+        r[8] = t[8];
+        r[9] = t[9];
+        r[10] = t[10];
+        r[11] = t[11];
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+    return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p   Point of type sp_point_384 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_384_from_mp(p->x, 12, pm->x);
+    sp_384_from_mp(p->y, 12, pm->y);
+    sp_384_from_mp(p->z, 12, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
+        r->used = 12;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 12; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 12; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p   Point of type sp_point_384.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_384_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[12 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r10, %[b]\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r9\n\t"
+        "mov	r12, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	%[b], r8\n\t"
+        "sub	%[b], %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	%[b], r10\n\t"
+        "\n2:\n\t"
+        "# Multiply Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [%[b]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply Done\n\t"
+        "add	%[a], #4\n\t"
+        "sub	%[b], #4\n\t"
+        "cmp	%[a], r12\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r8\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	%[b], r10\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #48\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "sbc	r5, r6\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+#define sp_384_mont_reduce_order_12   sp_384_mont_reduce_12
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r8, %[mp]\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	r14, %[m]\n\t"
+        "mov	r9, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "# i = 0\n\t"
+        "mov	r11, r4\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "mov	%[ca], #0\n\t"
+        "# mu = a[i] * mp\n\t"
+        "mov	%[mp], r8\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mul	%[mp], %[a]\n\t"
+        "mov	%[m], r14\n\t"
+        "mov	r10, r9\n\t"
+        "\n2:\n\t"
+        "# a[i+j] += m[j] * mu\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "# Multiply m[j] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	%[a], r7\n\t"
+        "adc	r5, %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	%[a], r6\n\t"
+        "adc	r5, r7\n\t"
+        "# Multiply m[j] and mu - Done\n\t"
+        "add	r4, %[a]\n\t"
+        "adc	r5, %[ca]\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r4, [%[a]]\n\t"
+        "mov	r6, #4\n\t"
+        "add	%[m], #4\n\t"
+        "add	r10, r6\n\t"
+        "mov	r4, #44\n\t"
+        "add	r4, r9\n\t"
+        "cmp	r10, r4\n\t"
+        "blt	2b\n\t"
+        "# a[i+11] += m[11] * mu\n\t"
+        "mov	%[ca], #0\n\t"
+        "mov	r4, r12\n\t"
+        "mov	%[a], #0\n\t"
+        "# Multiply m[11] and mu - Start\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r6, %[mp], #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r5, r7\n\t"
+        "adc	r4, %[ca]\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsr	r6, %[mp], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "ldr	r7, [%[m]]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r5, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	%[a], %[ca]\n\t"
+        "# Multiply m[11] and mu - Done\n\t"
+        "mov	%[ca], %[a]\n\t"
+        "mov	%[a], r10\n\t"
+        "ldr	r7, [%[a], #4]\n\t"
+        "ldr	%[a], [%[a]]\n\t"
+        "mov	r6, #0\n\t"
+        "add	r5, %[a]\n\t"
+        "adc	r7, r4\n\t"
+        "adc	%[ca], r6\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r5, [%[a]]\n\t"
+        "str	r7, [%[a], #4]\n\t"
+        "# i += 1\n\t"
+        "mov	r6, #4\n\t"
+        "add	r9, r6\n\t"
+        "add	r11, r6\n\t"
+        "mov	r12, %[ca]\n\t"
+        "mov	%[a], r9\n\t"
+        "mov	r4, #48\n\t"
+        "cmp	r11, r4\n\t"
+        "blt	1b\n\t"
+        "mov	%[m], r14\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mul_12(r, a, b);
+    sp_384_mont_reduce_12(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r8, r3\n\t"
+        "mov	r11, %[r]\n\t"
+        "mov	r6, #96\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, r6\n\t"
+        "mov	r10, sp\n\t"
+        "mov	r9, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r8\n\t"
+        "sub	%[a], r6\n\t"
+        "sbc	r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], r6\n\t"
+        "mov	r2, r8\n\t"
+        "sub	r2, %[a]\n\t"
+        "add	%[a], r9\n\t"
+        "add	r2, r9\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        "# Multiply * 2: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r7, [r2]\n\t"
+        "lsl	r7, r7, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Multiply * 2: Done\n\t"
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        "# Square: Start\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r6\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "mul	r7, r7\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #15\n\t"
+        "lsl	r6, r6, #17\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# Square: Done\n\t"
+        "\n5:\n\t"
+        "add	%[a], #4\n\t"
+        "sub	r2, #4\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r9\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r7, r8\n\t"
+        "add	r7, r9\n\t"
+        "cmp	%[a], r7\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r10\n\t"
+        "mov	r7, r8\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r7, #4\n\t"
+        "mov	r8, r7\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r7, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r9\n\t"
+        "str	r3, [%[r], r7]\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	r3, #92\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "sub	r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #96\n\t"
+        "add	sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_384_sqr_12(r, a);
+    sp_384_mont_reduce_12(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mont_sqr_12(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_384_mont_sqr_12(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+    0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 12);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
+        if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 12);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 12;
+    sp_digit* t3 = td + 4 * 12;
+    sp_digit* t4 = td + 6 * 12;
+    sp_digit* t5 = td + 8 * 12;
+
+    /* 0x2 */
+    sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
+    /* 0x3 */
+    sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
+    /* 0xc */
+    sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
+    /* 0xf */
+    sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
+    /* 0x1e */
+    sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
+    /* 0x1f */
+    sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
+    /* 0x3e0 */
+    sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
+    /* 0x3ff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x7fe0 */
+    sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
+    /* 0x7fff */
+    sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x3fff8000 */
+    sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
+    /* 0x3fffffff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffc */
+    sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
+    /* 0xfffffffd */
+    sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
+    /* 0xffffffff */
+    sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
+    /* 0xfffffffc0000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+    sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+    sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+    sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #44\n\t"
+        "1:\n\t"
+        "ldr	r7, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r7, r3\n\t"
+        "and	r5, r3\n\t"
+        "mov	r4, r7\n\t"
+        "sub	r7, r5\n\t"
+        "sbc	r7, r7\n\t"
+        "add	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r5, r4\n\t"
+        "sbc	r7, r7\n\t"
+        "sub	%[r], r7\n\t"
+        "mvn	r7, r7\n\t"
+        "and	r3, r7\n\t"
+        "sub	r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+#define sp_384_norm_12(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    int32_t n;
+
+    sp_384_mont_inv_12(t1, p->z, t + 2*12);
+
+    sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    /* x /= z^2 */
+    sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
+    XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
+    sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_384_cmp_12(r->x, p384_mod);
+    sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_12(r->x);
+
+    /* y /= z^3 */
+    sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
+    XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
+    sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_384_cmp_12(r->y, p384_mod);
+    sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_12(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r7, #0\n\t"
+        "add	r6, #48\n\t"
+        "sub	r7, #1\n\t"
+        "\n1:\n\t"
+        "add	%[c], r7\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "add	r4, r5\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #4]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #12]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #20]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #28]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #36]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #44]\n\t"
+        "adc	r4, r5\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, b);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+    o = sp_384_add_12(r, r, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "add	r6, #48\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "sbc	r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #4\n\t"
+        "add	%[b], #4\n\t"
+        "add	%[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r7, [%[b], #4]\n\t"
+        "sub	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #8]\n\t"
+        "ldr	r7, [%[b], #12]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r5, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r7, [%[b], #20]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #24]\n\t"
+        "ldr	r7, [%[b], #28]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r5, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[a], #36]\n\t"
+        "ldr	r6, [%[b], #32]\n\t"
+        "ldr	r7, [%[b], #36]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r5, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[a], #44]\n\t"
+        "ldr	r6, [%[b], #40]\n\t"
+        "ldr	r7, [%[b], #44]\n\t"
+        "sbc	r4, r6\n\t"
+        "sbc	r5, r7\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #48\n\t"
+        "mov	r8, r5\n\t"
+        "mov	r7, #0\n\t"
+        "1:\n\t"
+        "ldr	r6, [%[b], r7]\n\t"
+        "and	r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, #1\n\t"
+        "add	r5, %[c]\n\t"
+        "ldr	r5, [%[a], r7]\n\t"
+        "adc	r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c]\n\t"
+        "str	r5, [%[r], r7]\n\t"
+        "add	r7, #4\n\t"
+        "cmp	r7, r8\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r7", "r8"
+    );
+
+    return c;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_sub_12(r, a, b);
+    sp_384_cond_add_12(r, r, m, o);
+}
+
+static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a]]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "lsl	r5, r2, #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsl	r5, r2, #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsl	r5, r2, #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5"
+    );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
+    sp_384_rshift1_12(r, r);
+    r[11] |= o << 31;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
+    /* Z = Y * Z */
+    sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
+    /* Z = 2Z */
+    sp_384_mont_dbl_12(z, z, p384_mod);
+    /* T2 = X - T1 */
+    sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
+    /* T1 = X + T1 */
+    sp_384_mont_add_12(t1, p->x, t1, p384_mod);
+    /* T2 = T1 * T2 */
+    sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
+    /* T1 = 3T2 */
+    sp_384_mont_tpl_12(t1, t2, p384_mod);
+    /* Y = 2Y */
+    sp_384_mont_dbl_12(y, p->y, p384_mod);
+    /* Y = Y * Y */
+    sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
+    /* T2 = Y * Y */
+    sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+    /* T2 = T2/2 */
+    sp_384_div2_12(t2, t2, p384_mod);
+    /* Y = Y * X */
+    sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
+    /* X = T1 * T1 */
+    sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_12(x, x, y, p384_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_12(x, x, y, p384_mod);
+    /* Y = Y - X */
+    sp_384_mont_sub_12(y, y, x, p384_mod);
+    /* Y = Y * T1 */
+    sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
+    /* Y = Y - T2 */
+    sp_384_mont_sub_12(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+            (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+        sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* t3 = t + 4*12;
+    sp_digit* t4 = t + 6*12;
+    sp_digit* t5 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_384* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_384_sub_12(t1, p384_mod, q->y);
+    sp_384_norm_12(t1);
+    if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+        (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_12(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<12; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<12; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<12; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - U1 */
+        sp_384_mont_sub_12(t2, t2, t1, p384_mod);
+        /* R = S2 - S1 */
+        sp_384_mont_sub_12(t4, t4, t3, p384_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(x, x, t5, p384_mod);
+        sp_384_mont_dbl_12(t1, y, p384_mod);
+        sp_384_mont_sub_12(x, x, t1, p384_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(y, y, t5, p384_mod);
+    }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td[16];
+    sp_point_384 rtd;
+    sp_digit tmpd[2 * 12 * 6];
+#endif
+    sp_point_384* t;
+    sp_point_384* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
+        (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
+        (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
+        t[1].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 10;
+        n = k[i+1] << 0;
+        c = 28;
+        y = n >> 28;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+        n <<= 4;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--];
+                c += 32;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+
+            sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_384_point_free_12(rt, 1, heap);
+
+    return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+    sp_digit x[12];
+    sp_digit y[12];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*12;
+    sp_digit* b = t + 4*12;
+    sp_digit* t1 = t + 6*12;
+    sp_digit* t2 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_12(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_12(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_12(t2, b, p384_mod);
+        sp_384_mont_sub_12(x, x, t2, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_12(y, b, x, p384_mod);
+        sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_12(y, y, p384_mod);
+        sp_384_mont_sub_12(y, y, t1, p384_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_12(t1, t1, w, p384_mod);
+    sp_384_mont_tpl_12(a, t1, p384_mod);
+    /* B = X*Y^2 */
+    sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+    /* X = A^2 - 2B */
+    sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_12(t2, b, p384_mod);
+    sp_384_mont_sub_12(x, x, t2, p384_mod);
+    /* Z = Z*Y */
+    sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+    /* t2 = Y^4 */
+    sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_384_mont_sub_12(y, b, x, p384_mod);
+    sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_12(y, y, p384_mod);
+    sp_384_mont_sub_12(y, y, t1, p384_mod);
+#endif
+    /* Y = Y/2 */
+    sp_384_div2_12(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
+        const sp_point_384* q, sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* t3 = t + 4*12;
+    sp_digit* t4 = t + 6*12;
+    sp_digit* t5 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_384_sub_12(t1, p384_mod, q->y);
+    sp_384_norm_12(t1);
+    if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+        (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_12(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<12; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<12; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<12; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - X1 */
+        sp_384_mont_sub_12(t2, t2, x, p384_mod);
+        /* R = S2 - Y1 */
+        sp_384_mont_sub_12(t4, t4, y, p384_mod);
+        /* Z3 = H*Z1 */
+        sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(x, t1, t5, p384_mod);
+        sp_384_mont_dbl_12(t1, t3, p384_mod);
+        sp_384_mont_sub_12(x, x, t1, p384_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_384_mont_sub_12(t3, t3, x, p384_mod);
+        sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(y, t3, t5, p384_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 12;
+    sp_digit* tmp = t + 4 * 12;
+
+    sp_384_mont_inv_12(t1, a->z, tmp);
+
+    sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
+    XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_12(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<4; i++) {
+            sp_384_proj_point_dbl_n_12(t, 96, tmp);
+            sp_384_proj_to_affine_12(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<4; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+                sp_384_proj_to_affine_12(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_12(s2, 0, heap);
+    sp_384_point_free_12(s1, 0, heap);
+    sp_384_point_free_12( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 12 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=95; j<4; j++,x+=96) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=94; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<4; j++,x+=96) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_12(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[12];
+    sp_digit y[12];
+    sp_table_entry_384 table[16];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 12 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_12(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_384_proj_point_dbl_n_12(t, 48, tmp);
+            sp_384_proj_to_affine_12(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+                sp_384_proj_to_affine_12(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_12(s2, 0, heap);
+    sp_384_point_free_12(s1, 0, heap);
+    sp_384_point_free_12( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 12 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=47; j<8; j++,x+=48) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=46; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=48) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_12(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[12];
+    sp_digit y[12];
+    sp_table_entry_384 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 12 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, km);
+        sp_384_point_from_ecc_point_12(point, gm);
+
+            err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_384 p384_table[16] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+        0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+      { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+        0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+    /* 2 */
+    { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+        0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+      { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+        0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+    /* 3 */
+    { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+        0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+      { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+        0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+    /* 4 */
+    { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+        0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+      { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+        0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+    /* 5 */
+    { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+        0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+      { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+        0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+    /* 6 */
+    { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+        0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+      { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+        0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+    /* 7 */
+    { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+        0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+      { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+        0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+    /* 8 */
+    { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+        0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+      { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+        0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+    /* 9 */
+    { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+        0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+      { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+        0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+    /* 10 */
+    { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+        0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+      { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+        0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+    /* 11 */
+    { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+        0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+      { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+        0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+    /* 12 */
+    { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+        0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+      { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+        0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+    /* 13 */
+    { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+        0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+      { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+        0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+    /* 14 */
+    { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+        0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+      { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+        0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+    /* 15 */
+    { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+        0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+      { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+        0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+        0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+      { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+        0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+    /* 2 */
+    { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
+        0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
+      { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
+        0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
+    /* 3 */
+    { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
+        0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
+      { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
+        0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
+    /* 4 */
+    { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+        0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+      { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+        0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+    /* 5 */
+    { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+        0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+      { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+        0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+    /* 6 */
+    { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
+        0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
+      { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
+        0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
+    /* 7 */
+    { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
+        0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
+      { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
+        0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
+    /* 8 */
+    { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
+        0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
+      { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
+        0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
+    /* 9 */
+    { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
+        0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
+      { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
+        0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
+    /* 10 */
+    { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
+        0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
+      { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
+        0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
+    /* 11 */
+    { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
+        0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
+      { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
+        0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
+    /* 12 */
+    { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
+        0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
+      { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
+        0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
+    /* 13 */
+    { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
+        0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
+      { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
+        0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
+    /* 14 */
+    { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
+        0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
+      { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
+        0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
+    /* 15 */
+    { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
+        0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
+      { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
+        0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
+    /* 16 */
+    { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+        0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+      { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+        0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+    /* 17 */
+    { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+        0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+      { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+        0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+    /* 18 */
+    { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
+        0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
+      { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
+        0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
+    /* 19 */
+    { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
+        0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
+      { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
+        0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
+    /* 20 */
+    { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+        0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+      { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+        0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+    /* 21 */
+    { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+        0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+      { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+        0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+    /* 22 */
+    { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
+        0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
+      { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
+        0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
+    /* 23 */
+    { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
+        0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
+      { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
+        0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
+    /* 24 */
+    { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
+        0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
+      { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
+        0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
+    /* 25 */
+    { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
+        0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
+      { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
+        0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
+    /* 26 */
+    { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
+        0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
+      { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
+        0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
+    /* 27 */
+    { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
+        0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
+      { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
+        0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
+    /* 28 */
+    { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
+        0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
+      { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
+        0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
+    /* 29 */
+    { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
+        0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
+      { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
+        0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
+    /* 30 */
+    { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
+        0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
+      { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
+        0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
+    /* 31 */
+    { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
+        0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
+      { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
+        0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
+    /* 32 */
+    { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
+        0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
+      { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
+        0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
+    /* 33 */
+    { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
+        0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
+      { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
+        0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
+    /* 34 */
+    { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
+        0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
+      { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
+        0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
+    /* 35 */
+    { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
+        0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
+      { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
+        0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
+    /* 36 */
+    { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
+        0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
+      { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
+        0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
+    /* 37 */
+    { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
+        0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
+      { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
+        0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
+    /* 38 */
+    { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
+        0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
+      { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
+        0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
+    /* 39 */
+    { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
+        0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
+      { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
+        0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
+    /* 40 */
+    { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
+        0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
+      { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
+        0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
+    /* 41 */
+    { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
+        0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
+      { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
+        0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
+    /* 42 */
+    { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
+        0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
+      { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
+        0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
+    /* 43 */
+    { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
+        0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
+      { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
+        0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
+    /* 44 */
+    { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
+        0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
+      { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
+        0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
+    /* 45 */
+    { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
+        0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
+      { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
+        0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
+    /* 46 */
+    { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
+        0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
+      { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
+        0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
+    /* 47 */
+    { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
+        0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
+      { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
+        0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
+    /* 48 */
+    { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
+        0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
+      { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
+        0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
+    /* 49 */
+    { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
+        0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
+      { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
+        0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
+    /* 50 */
+    { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
+        0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
+      { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
+        0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
+    /* 51 */
+    { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
+        0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
+      { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
+        0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
+    /* 52 */
+    { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
+        0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
+      { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
+        0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
+    /* 53 */
+    { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
+        0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
+      { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
+        0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
+    /* 54 */
+    { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
+        0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
+      { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
+        0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
+    /* 55 */
+    { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
+        0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
+      { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
+        0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
+    /* 56 */
+    { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
+        0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
+      { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
+        0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
+    /* 57 */
+    { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
+        0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
+      { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
+        0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
+    /* 58 */
+    { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
+        0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
+      { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
+        0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
+    /* 59 */
+    { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
+        0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
+      { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
+        0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
+    /* 60 */
+    { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
+        0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
+      { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
+        0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
+    /* 61 */
+    { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
+        0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
+      { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
+        0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
+    /* 62 */
+    { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
+        0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
+      { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
+        0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
+    /* 63 */
+    { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
+        0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
+      { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
+        0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
+    /* 64 */
+    { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+        0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+      { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+        0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+    /* 65 */
+    { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+        0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+      { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+        0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+    /* 66 */
+    { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
+        0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
+      { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
+        0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
+    /* 67 */
+    { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
+        0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
+      { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
+        0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
+    /* 68 */
+    { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+        0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+      { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+        0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+    /* 69 */
+    { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+        0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+      { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+        0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+    /* 70 */
+    { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
+        0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
+      { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
+        0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
+    /* 71 */
+    { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
+        0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
+      { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
+        0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
+    /* 72 */
+    { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
+        0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
+      { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
+        0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
+    /* 73 */
+    { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
+        0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
+      { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
+        0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
+    /* 74 */
+    { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
+        0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
+      { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
+        0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
+    /* 75 */
+    { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
+        0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
+      { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
+        0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
+    /* 76 */
+    { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
+        0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
+      { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
+        0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
+    /* 77 */
+    { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
+        0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
+      { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
+        0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
+    /* 78 */
+    { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
+        0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
+      { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
+        0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
+    /* 79 */
+    { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
+        0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
+      { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
+        0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
+    /* 80 */
+    { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+        0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+      { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+        0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+    /* 81 */
+    { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+        0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+      { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+        0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+    /* 82 */
+    { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
+        0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
+      { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
+        0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
+    /* 83 */
+    { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
+        0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
+      { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
+        0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
+    /* 84 */
+    { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+        0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+      { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+        0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+    /* 85 */
+    { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+        0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+      { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+        0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+    /* 86 */
+    { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
+        0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
+      { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
+        0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
+    /* 87 */
+    { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
+        0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
+      { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
+        0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
+    /* 88 */
+    { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
+        0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
+      { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
+        0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
+    /* 89 */
+    { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
+        0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
+      { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
+        0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
+    /* 90 */
+    { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
+        0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
+      { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
+        0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
+    /* 91 */
+    { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
+        0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
+      { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
+        0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
+    /* 92 */
+    { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
+        0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
+      { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
+        0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
+    /* 93 */
+    { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
+        0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
+      { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
+        0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
+    /* 94 */
+    { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
+        0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
+      { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
+        0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
+    /* 95 */
+    { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
+        0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
+      { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
+        0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
+    /* 96 */
+    { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
+        0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
+      { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
+        0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
+    /* 97 */
+    { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
+        0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
+      { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
+        0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
+    /* 98 */
+    { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
+        0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
+      { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
+        0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
+    /* 99 */
+    { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
+        0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
+      { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
+        0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
+    /* 100 */
+    { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
+        0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
+      { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
+        0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
+    /* 101 */
+    { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
+        0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
+      { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
+        0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
+    /* 102 */
+    { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
+        0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
+      { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
+        0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
+    /* 103 */
+    { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
+        0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
+      { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
+        0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
+    /* 104 */
+    { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
+        0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
+      { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
+        0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
+    /* 105 */
+    { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
+        0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
+      { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
+        0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
+    /* 106 */
+    { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
+        0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
+      { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
+        0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
+    /* 107 */
+    { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
+        0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
+      { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
+        0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
+    /* 108 */
+    { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
+        0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
+      { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
+        0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
+    /* 109 */
+    { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
+        0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
+      { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
+        0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
+    /* 110 */
+    { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
+        0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
+      { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
+        0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
+    /* 111 */
+    { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
+        0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
+      { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
+        0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
+    /* 112 */
+    { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
+        0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
+      { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
+        0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
+    /* 113 */
+    { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
+        0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
+      { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
+        0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
+    /* 114 */
+    { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
+        0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
+      { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
+        0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
+    /* 115 */
+    { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
+        0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
+      { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
+        0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
+    /* 116 */
+    { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
+        0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
+      { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
+        0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
+    /* 117 */
+    { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
+        0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
+      { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
+        0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
+    /* 118 */
+    { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
+        0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
+      { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
+        0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
+    /* 119 */
+    { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
+        0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
+      { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
+        0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
+    /* 120 */
+    { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
+        0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
+      { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
+        0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
+    /* 121 */
+    { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
+        0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
+      { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
+        0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
+    /* 122 */
+    { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
+        0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
+      { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
+        0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
+    /* 123 */
+    { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
+        0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
+      { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
+        0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
+    /* 124 */
+    { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
+        0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
+      { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
+        0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
+    /* 125 */
+    { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
+        0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
+      { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
+        0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
+    /* 126 */
+    { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
+        0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
+      { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
+        0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
+    /* 127 */
+    { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
+        0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
+      { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
+        0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
+    /* 128 */
+    { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
+        0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
+      { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
+        0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
+    /* 129 */
+    { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
+        0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
+      { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
+        0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
+    /* 130 */
+    { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
+        0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
+      { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
+        0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
+    /* 131 */
+    { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
+        0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
+      { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
+        0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
+    /* 132 */
+    { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
+        0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
+      { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
+        0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
+    /* 133 */
+    { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
+        0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
+      { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
+        0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
+    /* 134 */
+    { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
+        0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
+      { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
+        0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
+    /* 135 */
+    { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
+        0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
+      { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
+        0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
+    /* 136 */
+    { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
+        0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
+      { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
+        0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
+    /* 137 */
+    { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
+        0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
+      { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
+        0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
+    /* 138 */
+    { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
+        0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
+      { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
+        0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
+    /* 139 */
+    { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
+        0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
+      { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
+        0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
+    /* 140 */
+    { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
+        0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
+      { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
+        0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
+    /* 141 */
+    { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
+        0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
+      { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
+        0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
+    /* 142 */
+    { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
+        0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
+      { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
+        0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
+    /* 143 */
+    { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
+        0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
+      { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
+        0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
+    /* 144 */
+    { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
+        0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
+      { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
+        0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
+    /* 145 */
+    { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
+        0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
+      { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
+        0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
+    /* 146 */
+    { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
+        0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
+      { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
+        0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
+    /* 147 */
+    { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
+        0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
+      { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
+        0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
+    /* 148 */
+    { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
+        0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
+      { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
+        0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
+    /* 149 */
+    { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
+        0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
+      { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
+        0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
+    /* 150 */
+    { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
+        0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
+      { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
+        0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
+    /* 151 */
+    { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
+        0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
+      { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
+        0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
+    /* 152 */
+    { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
+        0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
+      { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
+        0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
+    /* 153 */
+    { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
+        0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
+      { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
+        0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
+    /* 154 */
+    { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
+        0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
+      { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
+        0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
+    /* 155 */
+    { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
+        0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
+      { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
+        0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
+    /* 156 */
+    { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
+        0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
+      { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
+        0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
+    /* 157 */
+    { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
+        0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
+      { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
+        0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
+    /* 158 */
+    { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
+        0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
+      { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
+        0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
+    /* 159 */
+    { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
+        0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
+      { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
+        0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
+    /* 160 */
+    { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
+        0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
+      { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
+        0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
+    /* 161 */
+    { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
+        0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
+      { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
+        0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
+    /* 162 */
+    { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
+        0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
+      { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
+        0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
+    /* 163 */
+    { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
+        0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
+      { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
+        0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
+    /* 164 */
+    { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
+        0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
+      { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
+        0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
+    /* 165 */
+    { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
+        0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
+      { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
+        0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
+    /* 166 */
+    { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
+        0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
+      { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
+        0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
+    /* 167 */
+    { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
+        0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
+      { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
+        0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
+    /* 168 */
+    { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
+        0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
+      { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
+        0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
+    /* 169 */
+    { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
+        0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
+      { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
+        0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
+    /* 170 */
+    { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
+        0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
+      { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
+        0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
+    /* 171 */
+    { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
+        0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
+      { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
+        0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
+    /* 172 */
+    { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
+        0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
+      { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
+        0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
+    /* 173 */
+    { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
+        0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
+      { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
+        0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
+    /* 174 */
+    { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
+        0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
+      { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
+        0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
+    /* 175 */
+    { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
+        0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
+      { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
+        0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
+    /* 176 */
+    { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
+        0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
+      { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
+        0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
+    /* 177 */
+    { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
+        0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
+      { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
+        0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
+    /* 178 */
+    { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
+        0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
+      { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
+        0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
+    /* 179 */
+    { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
+        0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
+      { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
+        0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
+    /* 180 */
+    { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
+        0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
+      { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
+        0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
+    /* 181 */
+    { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
+        0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
+      { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
+        0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
+    /* 182 */
+    { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
+        0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
+      { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
+        0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
+    /* 183 */
+    { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
+        0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
+      { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
+        0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
+    /* 184 */
+    { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
+        0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
+      { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
+        0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
+    /* 185 */
+    { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
+        0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
+      { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
+        0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
+    /* 186 */
+    { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
+        0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
+      { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
+        0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
+    /* 187 */
+    { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
+        0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
+      { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
+        0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
+    /* 188 */
+    { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
+        0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
+      { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
+        0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
+    /* 189 */
+    { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
+        0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
+      { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
+        0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
+    /* 190 */
+    { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
+        0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
+      { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
+        0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
+    /* 191 */
+    { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
+        0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
+      { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
+        0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
+    /* 192 */
+    { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
+        0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
+      { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
+        0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
+    /* 193 */
+    { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
+        0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
+      { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
+        0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
+    /* 194 */
+    { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
+        0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
+      { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
+        0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
+    /* 195 */
+    { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
+        0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
+      { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
+        0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
+    /* 196 */
+    { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
+        0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
+      { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
+        0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
+    /* 197 */
+    { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
+        0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
+      { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
+        0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
+    /* 198 */
+    { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
+        0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
+      { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
+        0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
+    /* 199 */
+    { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
+        0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
+      { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
+        0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
+    /* 200 */
+    { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
+        0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
+      { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
+        0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
+    /* 201 */
+    { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
+        0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
+      { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
+        0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
+    /* 202 */
+    { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
+        0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
+      { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
+        0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
+    /* 203 */
+    { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
+        0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
+      { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
+        0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
+    /* 204 */
+    { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
+        0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
+      { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
+        0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
+    /* 205 */
+    { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
+        0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
+      { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
+        0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
+    /* 206 */
+    { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
+        0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
+      { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
+        0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
+    /* 207 */
+    { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
+        0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
+      { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
+        0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
+    /* 208 */
+    { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
+        0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
+      { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
+        0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
+    /* 209 */
+    { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
+        0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
+      { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
+        0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
+    /* 210 */
+    { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
+        0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
+      { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
+        0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
+    /* 211 */
+    { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
+        0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
+      { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
+        0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
+    /* 212 */
+    { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
+        0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
+      { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
+        0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
+    /* 213 */
+    { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
+        0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
+      { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
+        0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
+    /* 214 */
+    { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
+        0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
+      { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
+        0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
+    /* 215 */
+    { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
+        0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
+      { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
+        0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
+    /* 216 */
+    { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
+        0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
+      { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
+        0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
+    /* 217 */
+    { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
+        0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
+      { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
+        0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
+    /* 218 */
+    { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
+        0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
+      { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
+        0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
+    /* 219 */
+    { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
+        0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
+      { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
+        0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
+    /* 220 */
+    { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
+        0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
+      { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
+        0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
+    /* 221 */
+    { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
+        0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
+      { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
+        0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
+    /* 222 */
+    { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
+        0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
+      { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
+        0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
+    /* 223 */
+    { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
+        0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
+      { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
+        0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
+    /* 224 */
+    { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
+        0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
+      { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
+        0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
+    /* 225 */
+    { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
+        0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
+      { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
+        0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
+    /* 226 */
+    { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
+        0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
+      { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
+        0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
+    /* 227 */
+    { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
+        0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
+      { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
+        0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
+    /* 228 */
+    { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
+        0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
+      { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
+        0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
+    /* 229 */
+    { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
+        0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
+      { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
+        0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
+    /* 230 */
+    { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
+        0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
+      { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
+        0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
+    /* 231 */
+    { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
+        0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
+      { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
+        0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
+    /* 232 */
+    { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
+        0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
+      { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
+        0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
+    /* 233 */
+    { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
+        0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
+      { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
+        0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
+    /* 234 */
+    { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
+        0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
+      { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
+        0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
+    /* 235 */
+    { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
+        0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
+      { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
+        0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
+    /* 236 */
+    { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
+        0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
+      { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
+        0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
+    /* 237 */
+    { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
+        0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
+      { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
+        0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
+    /* 238 */
+    { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
+        0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
+      { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
+        0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
+    /* 239 */
+    { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
+        0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
+      { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
+        0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
+    /* 240 */
+    { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
+        0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
+      { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
+        0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
+    /* 241 */
+    { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
+        0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
+      { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
+        0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
+    /* 242 */
+    { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
+        0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
+      { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
+        0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
+    /* 243 */
+    { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
+        0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
+      { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
+        0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
+    /* 244 */
+    { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
+        0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
+      { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
+        0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
+    /* 245 */
+    { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
+        0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
+      { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
+        0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
+    /* 246 */
+    { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
+        0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
+      { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
+        0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
+    /* 247 */
+    { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
+        0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
+      { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
+        0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
+    /* 248 */
+    { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
+        0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
+      { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
+        0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
+    /* 249 */
+    { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
+        0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
+      { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
+        0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
+    /* 250 */
+    { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
+        0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
+      { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
+        0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
+    /* 251 */
+    { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
+        0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
+      { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
+        0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
+    /* 252 */
+    { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
+        0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
+      { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
+        0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
+    /* 253 */
+    { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
+        0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
+      { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
+        0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
+    /* 254 */
+    { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
+        0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
+      { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
+        0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
+    /* 255 */
+    { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
+        0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
+      { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
+        0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, km);
+
+            err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_12(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+            a[8] | a[9] | a[10] | a[11]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r2, #1\n\t"
+        "ldr	r1, [%[a], #0]\n\t"
+        "add	r1, r2\n\t"
+        "mov	r2, #0\n\t"
+        "str	r1, [%[a], #0]\n\t"
+        "ldr	r1, [%[a], #4]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #4]\n\t"
+        "ldr	r1, [%[a], #8]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #8]\n\t"
+        "ldr	r1, [%[a], #12]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #12]\n\t"
+        "ldr	r1, [%[a], #16]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #16]\n\t"
+        "ldr	r1, [%[a], #20]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #20]\n\t"
+        "ldr	r1, [%[a], #24]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #24]\n\t"
+        "ldr	r1, [%[a], #28]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #28]\n\t"
+        "ldr	r1, [%[a], #32]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #32]\n\t"
+        "ldr	r1, [%[a], #36]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #36]\n\t"
+        "ldr	r1, [%[a], #40]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #40]\n\t"
+        "ldr	r1, [%[a], #44]\n\t"
+        "adc	r1, r2\n\t"
+        "str	r1, [%[a], #44]\n\t"
+        :
+        : [a] "r" (a)
+        : "memory", "r1", "r2"
+    );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[48];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
+            if (sp_384_cmp_12(k, p384_order2) < 0) {
+                sp_384_add_one_12(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384 inf;
+#endif
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_ecc_gen_k_12(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_384_point_free_12(infinity, 1, heap);
+#endif
+    sp_384_point_free_12(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 384 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<12 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 48U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, priv);
+        sp_384_point_from_ecc_point_12(point, pub);
+            err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_384_to_bin(point->x, out);
+        *outLen = 48;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r7, %[a]\n\t"
+        "add	r7, #48\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        "add	%[a], #8\n\t"
+        "add	%[b], #8\n\t"
+        "cmp	%[a], r7\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b], #0]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sub	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #0]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "ldr	r5, [%[b], #8]\n\t"
+        "ldr	r6, [%[b], #12]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #8]\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "ldr	r5, [%[b], #16]\n\t"
+        "ldr	r6, [%[b], #20]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #16]\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "ldr	r5, [%[b], #24]\n\t"
+        "ldr	r6, [%[b], #28]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #24]\n\t"
+        "str	r4, [%[a], #28]\n\t"
+        "ldr	r3, [%[a], #32]\n\t"
+        "ldr	r4, [%[a], #36]\n\t"
+        "ldr	r5, [%[b], #32]\n\t"
+        "ldr	r6, [%[b], #36]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #32]\n\t"
+        "str	r4, [%[a], #36]\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "ldr	r5, [%[b], #40]\n\t"
+        "ldr	r6, [%[b], #44]\n\t"
+        "sbc	r3, r5\n\t"
+        "sbc	r4, r6\n\t"
+        "str	r3, [%[a], #40]\n\t"
+        "str	r4, [%[a], #44]\n\t"
+        "sbc	%[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #48\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r3, r7\n\t"
+        "adc	r4, %[r]\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "lsr	r7, %[b], #16\n\t"
+        "mul	r7, r6\n\t"
+        "add	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "lsl	r7, %[b], #16\n\t"
+        "lsr	r7, r7, #16\n\t"
+        "mul	r6, r7\n\t"
+        "lsr	r7, r6, #16\n\t"
+        "lsl	r6, r6, #16\n\t"
+        "add	r3, r6\n\t"
+        "adc	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r5, %[div], #1\n\t"
+        "add	r5, #1\n\t"
+        "mov	r8, %[d0]\n\t"
+        "mov	r9, %[d1]\n\t"
+        "# Do top 32\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "# Next 30 bits\n\t"
+        "mov	r4, #29\n\t"
+        "1:\n\t"
+        "lsl	%[d0], %[d0], #1\n\t"
+        "adc	%[d1], %[d1]\n\t"
+        "mov	r6, r5\n\t"
+        "sub	r6, %[d1]\n\t"
+        "sbc	r6, r6\n\t"
+        "add	%[r], %[r]\n\t"
+        "sub	%[r], r6\n\t"
+        "and	r6, r5\n\t"
+        "sub	%[d1], r6\n\t"
+        "sub	r4, #1\n\t"
+        "bpl	1b\n\t"
+        "mov	r7, #0\n\t"
+        "add	%[r], %[r]\n\t"
+        "add	%[r], #1\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "sub	%[d1], r4\n\t"
+        "mov	r4, %[d1]\n\t"
+        "mov	%[d1], r9\n\t"
+        "sbc	%[d1], r5\n\t"
+        "mov	r5, %[d1]\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "# r * div - Start\n\t"
+        "lsl	%[d1], %[r], #16\n\t"
+        "lsl	r4, %[div], #16\n\t"
+        "lsr	%[d1], %[d1], #16\n\t"
+        "lsr	r4, r4, #16\n\t"
+        "mul	r4, %[d1]\n\t"
+        "lsr	r6, %[div], #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r5, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r7\n\t"
+        "lsr	%[d1], %[r], #16\n\t"
+        "mul	r6, %[d1]\n\t"
+        "add	r5, r6\n\t"
+        "lsl	r6, %[div], #16\n\t"
+        "lsr	r6, r6, #16\n\t"
+        "mul	%[d1], r6\n\t"
+        "lsr	r6, %[d1], #16\n\t"
+        "lsl	%[d1], %[d1], #16\n\t"
+        "add	r4, %[d1]\n\t"
+        "adc	r5, r6\n\t"
+        "# r * div - Done\n\t"
+        "mov	%[d1], r8\n\t"
+        "mov	r6, r9\n\t"
+        "sub	r4, %[d1], r4\n\t"
+        "sbc	r6, r5\n\t"
+        "mov	r5, r6\n\t"
+        "add	%[r], r5\n\t"
+        "mov	r6, %[div]\n\t"
+        "sub	r6, r4\n\t"
+        "sbc	r6, r6\n\t"
+        "sub	%[r], r6\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r7", "r6", "r8", "r9"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+    r[8] = a[8] & m;
+    r[9] = a[9] & m;
+    r[10] = a[10] & m;
+    r[11] = a[11] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[24], t2[13];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[11];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
+    for (i=11; i>=0; i--) {
+        r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
+
+        sp_384_mul_d_12(t2, d, r1);
+        t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
+        t1[12 + i] -= t2[12];
+        sp_384_mask_12(t2, d, t1[12 + i]);
+        t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+        sp_384_mask_12(t2, d, t1[12 + i]);
+        t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_384_cmp_12(t1, d) >= 0;
+    sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_384_div_12(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+    
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_384_mul_12(r, a, b);
+    sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
+{
+    sp_384_sqr_12(r, a);
+    sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_384_mont_sqr_order_12(r, a);
+    for (i=1; i<n; i++) {
+        sp_384_mont_sqr_order_12(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 12);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_order_12(t, t);
+        if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_12(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 12U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 12;
+    sp_digit* t3 = td + 4 * 12;
+    int i;
+
+    /* t = a^2 */
+    sp_384_mont_sqr_order_12(t, a);
+    /* t = a^3 = t * a */
+    sp_384_mont_mul_order_12(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_384_mont_sqr_n_order_12(t2, t, 2);
+    /* t = a^f = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_384_mont_sqr_n_order_12(t2, t, 4);
+    /* t = a^ff = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_384_mont_sqr_n_order_12(t2, t, 8);
+    /* t3= a^ffff = t2 * t */
+    sp_384_mont_mul_order_12(t3, t2, t);
+    /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+    sp_384_mont_sqr_n_order_12(t2, t3, 16);
+    /* t = a^ffffffff = t2 * t3 */
+    sp_384_mont_mul_order_12(t, t2, t3);
+    /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
+    sp_384_mont_sqr_n_order_12(t2, t, 16);
+    /* t = a^ffffffffffff = t2 * t3 */
+    sp_384_mont_mul_order_12(t, t2, t3);
+    /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
+    sp_384_mont_sqr_n_order_12(t2, t, 48);
+    /* t= a^fffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_order_12(t2, t, 96);
+    /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_12(t2, t2, t);
+    for (i=191; i>=1; i--) {
+        sp_384_mont_sqr_order_12(t2, t2);
+        if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_12(t2, t2, a);
+        }
+    }
+    sp_384_mont_sqr_order_12(t2, t2);
+    sp_384_mont_mul_order_12(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 384 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*12];
+    sp_digit xd[2*12];
+    sp_digit kd[2*12];
+    sp_digit rd[2*12];
+    sp_digit td[3 * 2*12];
+    sp_point_384 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int32_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 12;
+        x = d + 2 * 12;
+        k = d + 4 * 12;
+        r = d + 6 * 12;
+        tmp = d + 8 * 12;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(e, 12, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_384_from_mp(x, 12, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_384_ecc_gen_k_12(rng, k);
+        }
+        else {
+            sp_384_from_mp(k, 12, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
+            sp_384_norm_12(r);
+            c = sp_384_cmp_12(r, p384_order);
+            sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_12(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_384_mul_12(k, k, p384_norm_order);
+            err = sp_384_mod_12(k, k, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_12(k);
+            /* kInv = 1/k mod order */
+                sp_384_mont_inv_order_12(kInv, k, tmp);
+            sp_384_norm_12(kInv);
+
+            /* s = r * x + e */
+                sp_384_mul_12(x, x, r);
+            err = sp_384_mod_12(x, x, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_12(x);
+            carry = sp_384_add_12(s, e, x);
+            sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
+            sp_384_norm_12(s);
+            c = sp_384_cmp_12(s, p384_order);
+            sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_12(s);
+
+            /* s = s * k^-1 mod order */
+                sp_384_mont_mul_order_12(s, s, kInv);
+            sp_384_norm_12(s);
+
+            /* Check that signature is usable. */
+            if (sp_384_iszero_12(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
+#endif
+    sp_384_point_free_12(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 384)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*12];
+    sp_digit u2d[2*12];
+    sp_digit sd[2*12];
+    sp_digit tmpd[2*12 * 5];
+    sp_point_384 p1d;
+    sp_point_384 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* p1;
+    sp_point_384* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+
+    err = sp_384_point_new_12(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 12;
+        u2  = d + 2 * 12;
+        s   = d + 4 * 12;
+        tmp = d + 6 * 12;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(u1, 12, hash, (int)hashLen);
+        sp_384_from_mp(u2, 12, r);
+        sp_384_from_mp(s, 12, sm);
+        sp_384_from_mp(p2->x, 12, pX);
+        sp_384_from_mp(p2->y, 12, pY);
+        sp_384_from_mp(p2->z, 12, pZ);
+
+        {
+            sp_384_mul_12(s, s, p384_norm_order);
+        }
+        err = sp_384_mod_12(s, s, p384_order);
+    }
+    if (err == MP_OKAY) {
+        sp_384_norm_12(s);
+        {
+            sp_384_mont_inv_order_12(s, s, tmp);
+            sp_384_mont_mul_order_12(u1, u1, s);
+            sp_384_mont_mul_order_12(u2, u2, s);
+        }
+
+            err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_384_proj_point_add_12(p1, p1, p2, tmp);
+            if (sp_384_iszero_12(p1->z)) {
+                if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
+                    sp_384_proj_point_dbl_12(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    p1->x[8] = 0;
+                    p1->x[9] = 0;
+                    p1->x[10] = 0;
+                    p1->x[11] = 0;
+                    XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_384_from_mp(u2, 12, r);
+        err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
+        *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_384_from_mp(u2, 12, r);
+            carry = sp_384_add_12(u2, u2, p384_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_384_norm_12(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_384_cmp_12(u2, p384_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
+                                                                  p384_mp_mod);
+                        *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_12(p1, 0, heap);
+    sp_384_point_free_12(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*12];
+    sp_digit t2d[2*12];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 12;
+        t2 = d + 2 * 12;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_384_sqr_12(t1, point->y);
+        (void)sp_384_mod_12(t1, t1, p384_mod);
+        sp_384_sqr_12(t2, point->x);
+        (void)sp_384_mod_12(t2, t2, p384_mod);
+        sp_384_mul_12(t2, t2, point->x);
+        (void)sp_384_mod_12(t2, t2, p384_mod);
+        (void)sp_384_sub_12(t2, p384_mod, t2);
+        sp_384_mont_add_12(t1, t1, t2, p384_mod);
+
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+
+        if (sp_384_cmp_12(t1, p384_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 pubd;
+#endif
+    sp_point_384* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_12(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_384_from_mp(pub->x, 12, pX);
+        sp_384_from_mp(pub->y, 12, pY);
+        sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+
+        err = sp_384_ecc_is_point_12(pub, NULL);
+    }
+
+    sp_384_point_free_12(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[12];
+    sp_point_384 pubd;
+    sp_point_384 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_384* pub;
+    sp_point_384* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_12(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_384_from_mp(pub->x, 12, pX);
+        sp_384_from_mp(pub->y, 12, pY);
+        sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+        sp_384_from_mp(priv, 12, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_384_iszero_12(pub->x) != 0) &&
+            (sp_384_iszero_12(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
+            sp_384_cmp_12(pub->y, p384_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_384_ecc_is_point_12(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_384_iszero_12(p->x) == 0) ||
+            (sp_384_iszero_12(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_384_cmp_12(p->x, pub->x) != 0 ||
+            sp_384_cmp_12(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 5];
+    sp_point_384 pd;
+    sp_point_384 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    sp_point_384* q = NULL;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+        sp_384_from_mp(q->x, 12, qX);
+        sp_384_from_mp(q->y, 12, qY);
+        sp_384_from_mp(q->z, 12, qZ);
+
+            sp_384_proj_point_add_12(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(q, 0, NULL);
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 2];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+
+            sp_384_proj_point_dbl_12(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 6];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+
+        sp_384_map_12(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_12(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 12];
+    sp_digit t2d[2 * 12];
+    sp_digit t3d[2 * 12];
+    sp_digit t4d[2 * 12];
+    sp_digit t5d[2 * 12];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* t3;
+    sp_digit* t4;
+    sp_digit* t5;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 12;
+        t2 = d + 2 * 12;
+        t3 = d + 4 * 12;
+        t4 = d + 6 * 12;
+        t5 = d + 8 * 12;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        t3 = t3d;
+        t4 = t4d;
+        t5 = t5d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
+            /* t5 = y ^ 0xc */
+            sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x1e */
+            sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x1f */
+            sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3e0 */
+            sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3ff */
+            sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fe0 */
+            sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x7fff */
+            sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fff800 */
+            sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
+            /* t4 = y ^ 0x3ffffff */
+            sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffc000000 */
+            sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffff */
+            sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+            sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+            sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+            sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+            sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+            sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+            sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 12];
+    sp_digit yd[2 * 12];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 12;
+        y = d + 2 * 12;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_384_from_mp(x, 12, xm);
+        err = sp_384_mod_mul_norm_12(x, x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
+            sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_384_mont_add_12(y, y, x, p384_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_384_mont_sqrt_12(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
+        sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
+        }
+
+        err = sp_384_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_ARM_THUMB_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
+
--- a/wolfcrypt/src/sp_c32.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sp_c32.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp.c
  *
- * Copyright (C) 2006-2018 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,7 +39,9 @@
                                     defined(WOLFSSL_HAVE_SP_ECC)
 
 #ifdef RSA_LOW_MEM
+#ifndef SP_RSA_PRIVATE_EXP_D
 #define SP_RSA_PRIVATE_EXP_D
+#endif
 
 #ifndef WOLFSSL_SP_SMALL
 #define WOLFSSL_SP_SMALL
@@ -50,92 +52,108 @@
 
 #ifndef WOLFSSL_SP_ASM
 #if SP_WORD_SIZE == 32
-#if defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)
+#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) &&              (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Mask for address to obfuscate which of the two address will be used. */
 static const size_t addr_mask[2] = { 0, (size_t)-1 };
 #endif
 
 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 #ifndef WOLFSSL_SP_NO_2048
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 15) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 15U) {
             r[j] &= 0x7fffff;
-            s = 23 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+            s = 23U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 23
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 23
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0x7fffff;
-        s = 23 - s;
-        if (j + 1 >= max)
+        s = 23U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 23 <= DIGIT_BIT) {
-            s += 23;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 23U) <= (word32)DIGIT_BIT) {
+            s += 23U;
             r[j] &= 0x7fffff;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 23) {
             r[j] &= 0x7fffff;
-            if (j + 1 >= max)
-                break;
+            if (j + 1 >= size) {
+                break;
+            }
             s = 23 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -146,16 +164,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 256
  *
  * r  A single precision integer.
@@ -173,19 +193,26 @@
     a[j] = 0;
     for (i=0; i<90 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 23) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
         }
         s = 8 - (b - 23);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -731,30 +758,30 @@
     sp_digit b0[15];
     sp_digit b1[15];
     sp_digit b2[15];
-    sp_2048_add_15(a0, a, &a[15]);
-    sp_2048_add_15(b0, b, &b[15]);
-    sp_2048_add_15(a1, &a[15], &a[30]);
-    sp_2048_add_15(b1, &b[15], &b[30]);
-    sp_2048_add_15(a2, a0, &a[30]);
-    sp_2048_add_15(b2, b0, &b[30]);
+    (void)sp_2048_add_15(a0, a, &a[15]);
+    (void)sp_2048_add_15(b0, b, &b[15]);
+    (void)sp_2048_add_15(a1, &a[15], &a[30]);
+    (void)sp_2048_add_15(b1, &b[15], &b[30]);
+    (void)sp_2048_add_15(a2, a0, &a[30]);
+    (void)sp_2048_add_15(b2, b0, &b[30]);
     sp_2048_mul_15(p0, a, b);
     sp_2048_mul_15(p2, &a[15], &b[15]);
     sp_2048_mul_15(p4, &a[30], &b[30]);
     sp_2048_mul_15(p1, a0, b0);
     sp_2048_mul_15(p3, a1, b1);
     sp_2048_mul_15(p5, a2, b2);
-    XMEMSET(r, 0, sizeof(*r)*2*45);
-    sp_2048_sub_30(t0, p3, p2);
-    sp_2048_sub_30(t1, p1, p2);
-    sp_2048_sub_30(t2, p5, t0);
-    sp_2048_sub_30(t2, t2, t1);
-    sp_2048_sub_30(t0, t0, p4);
-    sp_2048_sub_30(t1, t1, p0);
-    sp_2048_add_30(r, r, p0);
-    sp_2048_add_30(&r[15], &r[15], t1);
-    sp_2048_add_30(&r[30], &r[30], t2);
-    sp_2048_add_30(&r[45], &r[45], t0);
-    sp_2048_add_30(&r[60], &r[60], p4);
+    XMEMSET(r, 0, sizeof(*r)*2U*45U);
+    (void)sp_2048_sub_30(t0, p3, p2);
+    (void)sp_2048_sub_30(t1, p1, p2);
+    (void)sp_2048_sub_30(t2, p5, t0);
+    (void)sp_2048_sub_30(t2, t2, t1);
+    (void)sp_2048_sub_30(t0, t0, p4);
+    (void)sp_2048_sub_30(t1, t1, p0);
+    (void)sp_2048_add_30(r, r, p0);
+    (void)sp_2048_add_30(&r[15], &r[15], t1);
+    (void)sp_2048_add_30(&r[30], &r[30], t2);
+    (void)sp_2048_add_30(&r[45], &r[45], t0);
+    (void)sp_2048_add_30(&r[60], &r[60], p4);
 }
 
 /* Square a into r. (r = a * a)
@@ -776,27 +803,27 @@
     sp_digit a0[15];
     sp_digit a1[15];
     sp_digit a2[15];
-    sp_2048_add_15(a0, a, &a[15]);
-    sp_2048_add_15(a1, &a[15], &a[30]);
-    sp_2048_add_15(a2, a0, &a[30]);
+    (void)sp_2048_add_15(a0, a, &a[15]);
+    (void)sp_2048_add_15(a1, &a[15], &a[30]);
+    (void)sp_2048_add_15(a2, a0, &a[30]);
     sp_2048_sqr_15(p0, a);
     sp_2048_sqr_15(p2, &a[15]);
     sp_2048_sqr_15(p4, &a[30]);
     sp_2048_sqr_15(p1, a0);
     sp_2048_sqr_15(p3, a1);
     sp_2048_sqr_15(p5, a2);
-    XMEMSET(r, 0, sizeof(*r)*2*45);
-    sp_2048_sub_30(t0, p3, p2);
-    sp_2048_sub_30(t1, p1, p2);
-    sp_2048_sub_30(t2, p5, t0);
-    sp_2048_sub_30(t2, t2, t1);
-    sp_2048_sub_30(t0, t0, p4);
-    sp_2048_sub_30(t1, t1, p0);
-    sp_2048_add_30(r, r, p0);
-    sp_2048_add_30(&r[15], &r[15], t1);
-    sp_2048_add_30(&r[30], &r[30], t2);
-    sp_2048_add_30(&r[45], &r[45], t0);
-    sp_2048_add_30(&r[60], &r[60], p4);
+    XMEMSET(r, 0, sizeof(*r)*2U*45U);
+    (void)sp_2048_sub_30(t0, p3, p2);
+    (void)sp_2048_sub_30(t1, p1, p2);
+    (void)sp_2048_sub_30(t2, p5, t0);
+    (void)sp_2048_sub_30(t2, t2, t1);
+    (void)sp_2048_sub_30(t0, t0, p4);
+    (void)sp_2048_sub_30(t1, t1, p0);
+    (void)sp_2048_add_30(r, r, p0);
+    (void)sp_2048_add_30(&r[15], &r[15], t1);
+    (void)sp_2048_add_30(&r[30], &r[30], t2);
+    (void)sp_2048_add_30(&r[45], &r[45], t0);
+    (void)sp_2048_add_30(&r[60], &r[60], p4);
 }
 
 /* Add b to a into r. (r = a + b)
@@ -897,14 +924,14 @@
     sp_digit* a1 = z1;
     sp_digit b1[45];
     sp_digit* z2 = r + 90;
-    sp_2048_add_45(a1, a, &a[45]);
-    sp_2048_add_45(b1, b, &b[45]);
+    (void)sp_2048_add_45(a1, a, &a[45]);
+    (void)sp_2048_add_45(b1, b, &b[45]);
     sp_2048_mul_45(z2, &a[45], &b[45]);
     sp_2048_mul_45(z0, a, b);
     sp_2048_mul_45(z1, a1, b1);
-    sp_2048_sub_90(z1, z1, z2);
-    sp_2048_sub_90(z1, z1, z0);
-    sp_2048_add_90(r + 45, r + 45, z1);
+    (void)sp_2048_sub_90(z1, z1, z2);
+    (void)sp_2048_sub_90(z1, z1, z0);
+    (void)sp_2048_add_90(r + 45, r + 45, z1);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -918,16 +945,16 @@
     sp_digit z1[90];
     sp_digit* a1 = z1;
     sp_digit* z2 = r + 90;
-    sp_2048_add_45(a1, a, &a[45]);
+    (void)sp_2048_add_45(a1, a, &a[45]);
     sp_2048_sqr_45(z2, &a[45]);
     sp_2048_sqr_45(z0, a);
     sp_2048_sqr_45(z1, a1);
-    sp_2048_sub_90(z1, z1, z2);
-    sp_2048_sub_90(z1, z1, z0);
-    sp_2048_add_90(r + 45, r + 45, z1);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    (void)sp_2048_sub_90(z1, z1, z2);
+    (void)sp_2048_sub_90(z1, z1, z0);
+    (void)sp_2048_add_90(r + 45, r + 45, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -940,8 +967,9 @@
 {
     int i;
 
-    for (i = 0; i < 90; i++)
+    for (i = 0; i < 90; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -958,8 +986,9 @@
 {
     int i;
 
-    for (i = 0; i < 90; i++)
+    for (i = 0; i < 90; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -984,10 +1013,12 @@
     for (k = 177; k >= 0; k--) {
         for (i = 89; i >= 0; i--) {
             j = k - i;
-            if (j >= 90)
-                break;
-            if (j < 0)
+            if (j >= 90) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * b[j];
         }
@@ -1014,15 +1045,18 @@
     for (k = 177; k >= 0; k--) {
         for (i = 89; i >= 0; i--) {
             j = k - i;
-            if (j >= 90 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 90 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int64_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 46;
         r[k + 1] = (c >> 23) & 0x7fffff;
@@ -1032,7 +1066,7 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -1045,8 +1079,9 @@
 {
     int i;
 
-    for (i = 0; i < 45; i++)
+    for (i = 0; i < 45; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -1063,8 +1098,9 @@
 {
     int i;
 
-    for (i = 0; i < 45; i++)
+    for (i = 0; i < 45; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -1120,10 +1156,12 @@
     for (k = 87; k >= 0; k--) {
         for (i = 44; i >= 0; i--) {
             j = k - i;
-            if (j >= 45)
-                break;
-            if (j < 0)
+            if (j >= 45) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * b[j];
         }
@@ -1150,15 +1188,18 @@
     for (k = 87; k >= 0; k--) {
         for (i = 44; i >= 0; i--) {
             j = k - i;
-            if (j >= 45 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 45 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int64_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 46;
         r[k + 1] = (c >> 23) & 0x7fffff;
@@ -1168,14 +1209,14 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -1190,21 +1231,72 @@
     *rho = (1L << 23) - x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 90; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x7fffff;
+        t >>= 23;
+    }
+    r[90] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+    for (i = 0; i < 88; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+    }
+    t[1] = tb * a[89];
+    r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+    r[90] =  (sp_digit)(t[1] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_45(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_45(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<44; i++)
+    for (i=0; i<44; i++) {
         r[i] = 0x7fffff;
+    }
 #else
     int i;
 
@@ -1223,10 +1315,10 @@
     r[42] = 0x7fffff;
     r[43] = 0x7fffff;
 #endif
-    r[44] = 0xfffl;
+    r[44] = 0xfffL;
 
     /* r = (2^n - 1) mod n */
-    sp_2048_sub_45(r, r, m);
+    (void)sp_2048_sub_45(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -1245,25 +1337,26 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=44; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[44] - b[44]) & (0 - !r);
-    r |= (a[43] - b[43]) & (0 - !r);
-    r |= (a[42] - b[42]) & (0 - !r);
-    r |= (a[41] - b[41]) & (0 - !r);
-    r |= (a[40] - b[40]) & (0 - !r);
+    for (i=44; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[44] - b[44]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[43] - b[43]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[42] - b[42]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[41] - b[41]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[40] - b[40]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 32; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -1284,8 +1377,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 45; i++)
+    for (i = 0; i < 45; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -1332,30 +1426,30 @@
     int64_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
     for (i = 0; i < 40; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
+        r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
+        r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
+        r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
+        r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
+        r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
+        r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
+        r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
-    }
-    t[1] = tb * a[41]; r[41] += (t[0] >> 23) + (t[1] & 0x7fffff);
-    t[2] = tb * a[42]; r[42] += (t[1] >> 23) + (t[2] & 0x7fffff);
-    t[3] = tb * a[43]; r[43] += (t[2] >> 23) + (t[3] & 0x7fffff);
-    t[4] = tb * a[44]; r[44] += (t[3] >> 23) + (t[4] & 0x7fffff);
-    r[45] +=  t[4] >> 23;
+        r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+    }
+    t[1] = tb * a[41]; r[41] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+    t[2] = tb * a[42]; r[42] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+    t[3] = tb * a[43]; r[43] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+    t[4] = tb * a[44]; r[44] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+    r[45] +=  (sp_digit)(t[4] >> 23);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -1441,7 +1535,7 @@
     r[43] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[89]) << 11;
     r[44] = (sp_digit)n;
 #endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[45], 0, sizeof(*r) * 45);
+    XMEMSET(&r[45], 0, sizeof(*r) * 45U);
 }
 
 /* Reduce the number back to 2048 bits using Montgomery reduction.
@@ -1450,23 +1544,26 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_2048_mont_reduce_45(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_2048_mont_reduce_45(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_2048_norm_45(a + 45);
+
     for (i=0; i<44; i++) {
         mu = (a[i] * mp) & 0x7fffff;
         sp_2048_mul_add_45(a+i, m, mu);
         a[i+1] += a[i] >> 23;
     }
-    mu = (a[i] * mp) & 0xfffl;
+    mu = (a[i] * mp) & 0xfffL;
     sp_2048_mul_add_45(a+i, m, mu);
     a[i+1] += a[i] >> 23;
     a[i] &= 0x7fffff;
 
     sp_2048_mont_shift_45(a, a);
-    sp_2048_cond_sub_45(a, a, m, 0 - ((a[44] >> 12) > 0));
+    sp_2048_cond_sub_45(a, a, m, 0 - (((a[44] >> 12) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
     sp_2048_norm_45(a);
 }
 
@@ -1479,8 +1576,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_45(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_45(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_45(r, a, b);
     sp_2048_mont_reduce_45(r, m, mp);
@@ -1493,7 +1590,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_45(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_45(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_45(r, a);
@@ -1507,7 +1604,7 @@
  * b  A scalar.
  */
 SP_NOINLINE static void sp_2048_mul_d_45(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int64_t tb = b;
@@ -1556,56 +1653,6 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
-/* Multiply a by scalar b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A scalar.
- */
-SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int64_t tb = b;
-    int64_t t = 0;
-    int i;
-
-    for (i = 0; i < 90; i++) {
-        t += tb * a[i];
-        r[i] = t & 0x7fffff;
-        t >>= 23;
-    }
-    r[90] = (sp_digit)t;
-#else
-    int64_t tb = b;
-    int64_t t[8];
-    int i;
-
-    t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
-    for (i = 0; i < 88; i += 8) {
-        t[1] = tb * a[i+1];
-        r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
-        t[2] = tb * a[i+2];
-        r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
-        t[3] = tb * a[i+3];
-        r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
-        t[4] = tb * a[i+4];
-        r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
-        t[5] = tb * a[i+5];
-        r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
-        t[6] = tb * a[i+6];
-        r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
-        t[7] = tb * a[i+7];
-        r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
-        t[0] = tb * a[i+8];
-        r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
-    }
-    t[1] = tb * a[89];
-    r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
-    r[90] =  (sp_digit)(t[1] >> 23);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
 /* Conditionally add a and b using the mask m.
  * m is -1 to add and 0 when not.
  *
@@ -1620,8 +1667,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 45; i++)
+    for (i = 0; i < 45; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -1655,60 +1703,32 @@
 {
     int i;
 
-    for (i = 0; i < 45; i++)
+    for (i = 0; i < 45; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
 #endif
 SP_NOINLINE static void sp_2048_rshift_45(sp_digit* r, sp_digit* a, byte n)
 {
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<44; i++)
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<44; i++) {
         r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
-#else
-    r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff;
-    r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff;
-    r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff;
-    r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff;
-    r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff;
-    r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff;
-    r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff;
-    r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff;
-    r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff;
-    r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff;
-    r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff;
-    r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff;
-    r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff;
-    r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff;
-    r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff;
-    r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff;
-    r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff;
-    r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff;
-    r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff;
-    r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff;
-    r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff;
-    r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff;
-    r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff;
-    r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff;
-    r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff;
-    r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff;
-    r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff;
-    r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff;
-    r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff;
-    r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff;
-    r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff;
-    r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff;
-    r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff;
-    r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff;
-    r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff;
-    r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff;
-    r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff;
-    r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff;
-    r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff;
-    r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff;
+    }
+#else
+    for (i=0; i<40; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff;
+    }
     r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
     r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
     r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
@@ -1717,6 +1737,36 @@
     r[44] = a[44] >> n;
 }
 
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_2048_div_word_45(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 23 bits from d1 and top 8 bits from d0. */
+    d = (d1 << 8) | (d0 >> 15);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 9 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 7) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 17 bits in r */
+    /* Remaining 7 bits from d0. */
+    r <<= 7;
+    d <<= 7;
+    d |= d0 & ((1 << 7) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
@@ -1726,13 +1776,15 @@
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_div_45(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_2048_div_45(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_32
     int64_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[90 + 1], t2d[45 + 1], sdd[45 + 1];
@@ -1742,67 +1794,78 @@
     sp_digit* sd;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
-        t1 = td;
-        t2 = td + 90 + 1;
-        sd = t2 + 45 + 1;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-    sd = sdd;
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
 #endif
 
     (void)m;
 
     if (err == MP_OKAY) {
-        sp_2048_mul_d_45(sd, d, 1 << 11);
-        sp_2048_mul_d_90(t1, a, 1 << 11);
-        div = sd[44];
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 90 + 1;
+        sd = t2 + 45 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_2048_mul_d_45(sd, d, 1L << 11);
+        sp_2048_mul_d_90(t1, a, 1L << 11);
+        dv = sd[44];
         for (i=45; i>=0; i--) {
             t1[45 + i] += t1[45 + i - 1] >> 23;
             t1[45 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
             d1 = t1[45 + i];
             d1 <<= 23;
             d1 += t1[45 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_2048_div_word_45(t1[45 + i], t1[45 + i - 1], dv);
+#endif
 
             sp_2048_mul_d_45(t2, sd, r1);
-            sp_2048_sub_45(&t1[i], &t1[i], t2);
+            (void)sp_2048_sub_45(&t1[i], &t1[i], t2);
             t1[45 + i] -= t2[45];
             t1[45 + i] += t1[45 + i - 1] >> 23;
             t1[45 + i - 1] &= 0x7fffff;
-            r1 = (((-t1[45 + i]) << 23) - t1[45 + i - 1]) / div;
+            r1 = (((-t1[45 + i]) << 23) - t1[45 + i - 1]) / dv;
             r1 -= t1[45 + i];
             sp_2048_mul_d_45(t2, sd, r1);
-            sp_2048_add_45(&t1[i], &t1[i], t2);
+            (void)sp_2048_add_45(&t1[i], &t1[i], t2);
             t1[45 + i] += t1[45 + i - 1] >> 23;
             t1[45 + i - 1] &= 0x7fffff;
         }
         t1[45 - 1] += t1[45 - 2] >> 23;
         t1[45 - 2] &= 0x7fffff;
-        d1 = t1[45 - 1];
-        r1 = (sp_digit)(d1 / div);
+        r1 = t1[45 - 1] / dv;
 
         sp_2048_mul_d_45(t2, sd, r1);
         sp_2048_sub_45(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 45);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 45U);
         for (i=0; i<43; i++) {
             r[i+1] += r[i] >> 23;
             r[i] &= 0x7fffff;
         }
-        sp_2048_cond_add_45(r, r, sd, 0 - (r[44] < 0));
-    }
-
-    sp_2048_rshift_45(r, r, 11);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+        sp_2048_cond_add_45(r, r, sd, 0 - ((r[44] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_2048_norm_45(r);
+        sp_2048_rshift_45(r, r, 11);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -1815,7 +1878,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_mod_45(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_2048_mod_45(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_45(a, m, NULL, r);
 }
@@ -1829,8 +1892,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_45(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_45(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -1844,11 +1907,12 @@
 
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 45 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 45U * 2U);
 
         norm = t[0] = td;
         t[1] = &td[45 * 2];
@@ -1857,10 +1921,12 @@
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_45(norm, m);
 
-        if (reduceA)
+        if (reduceA != 0) {
             err = sp_2048_mod_45(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 45);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 45U);
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_mul_45(t[1], t[1], norm);
@@ -1873,8 +1939,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -1896,13 +1963,15 @@
 
         sp_2048_mont_reduce_45(t[0], m, mp);
         n = sp_2048_cmp_45(t[0], m);
-        sp_2048_cond_sub_45(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_45(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(*r) * 45 * 2);
 
     }
 
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
@@ -1922,24 +1991,23 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
         t[1] = &td[45 * 2];
         t[2] = &td[2 * 45 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_45(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_45(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_45(t[1], t[1], norm);
@@ -1958,8 +2026,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -1979,13 +2048,15 @@
 
         sp_2048_mont_reduce_45(t[0], m, mp);
         n = sp_2048_cmp_45(t[0], m);
-        sp_2048_cond_sub_45(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_45(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -2007,23 +2078,22 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 90, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
             t[i] = td + i * 90;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_45(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_45(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_45(t[1], t[1], norm);
@@ -2071,10 +2141,12 @@
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 22) / 23) - 1;
         c = bits % 23;
-        if (c == 0)
+        if (c == 0) {
             c = 23;
-        if (i < 45)
+        }
+        if (i < 45) {
             n = e[i--] << (32 - c);
+        }
         else {
             n = 0;
             i--;
@@ -2083,7 +2155,7 @@
             n |= e[i--] << (9 - c);
             c += 23;
         }
-        y = n >> 27;
+        y = (n >> 27) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -2107,35 +2179,38 @@
 
         sp_2048_mont_reduce_45(rt, m, mp);
         n = sp_2048_cmp_45(rt, m);
-        sp_2048_cond_sub_45(rt, rt, m, (n < 0) - 1);
+        sp_2048_cond_sub_45(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+    }
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_90(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_90(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<89; i++)
+    for (i=0; i<89; i++) {
         r[i] = 0x7fffff;
+    }
 #else
     int i;
 
@@ -2151,10 +2226,10 @@
     }
     r[88] = 0x7fffff;
 #endif
-    r[89] = 0x1l;
+    r[89] = 0x1L;
 
     /* r = (2^n - 1) mod n */
-    sp_2048_sub_90(r, r, m);
+    (void)sp_2048_sub_90(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -2173,22 +2248,23 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=89; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[89] - b[89]) & (0 - !r);
-    r |= (a[88] - b[88]) & (0 - !r);
+    for (i=89; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[89] - b[89]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[88] - b[88]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 80; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -2209,8 +2285,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 90; i++)
+    for (i = 0; i < 90; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -2254,27 +2331,27 @@
     int64_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
     for (i = 0; i < 88; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
+        r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
+        r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
+        r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
+        r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
+        r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
+        r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
+        r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
-    }
-    t[1] = tb * a[89]; r[89] += (t[0] >> 23) + (t[1] & 0x7fffff);
-    r[90] +=  t[1] >> 23;
+        r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+    }
+    t[1] = tb * a[89]; r[89] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+    r[90] +=  (sp_digit)(t[1] >> 23);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -2351,7 +2428,7 @@
     r[88] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[179]) << 22;
     r[89] = (sp_digit)n;
 #endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[90], 0, sizeof(*r) * 90);
+    XMEMSET(&r[90], 0, sizeof(*r) * 90U);
 }
 
 /* Reduce the number back to 2048 bits using Montgomery reduction.
@@ -2360,18 +2437,21 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_2048_mont_reduce_90(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_2048_mont_reduce_90(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_2048_norm_90(a + 90);
+
+#ifdef WOLFSSL_SP_DH
     if (mp != 1) {
         for (i=0; i<89; i++) {
             mu = (a[i] * mp) & 0x7fffff;
             sp_2048_mul_add_90(a+i, m, mu);
             a[i+1] += a[i] >> 23;
         }
-        mu = (a[i] * mp) & 0x1l;
+        mu = (a[i] * mp) & 0x1L;
         sp_2048_mul_add_90(a+i, m, mu);
         a[i+1] += a[i] >> 23;
         a[i] &= 0x7fffff;
@@ -2382,14 +2462,26 @@
             sp_2048_mul_add_90(a+i, m, mu);
             a[i+1] += a[i] >> 23;
         }
-        mu = a[i] & 0x1l;
+        mu = a[i] & 0x1L;
         sp_2048_mul_add_90(a+i, m, mu);
         a[i+1] += a[i] >> 23;
         a[i] &= 0x7fffff;
     }
+#else
+    for (i=0; i<89; i++) {
+        mu = (a[i] * mp) & 0x7fffff;
+        sp_2048_mul_add_90(a+i, m, mu);
+        a[i+1] += a[i] >> 23;
+    }
+    mu = (a[i] * mp) & 0x1L;
+    sp_2048_mul_add_90(a+i, m, mu);
+    a[i+1] += a[i] >> 23;
+    a[i] &= 0x7fffff;
+#endif
 
     sp_2048_mont_shift_90(a, a);
-    sp_2048_cond_sub_90(a, a, m, 0 - ((a[89] >> 1) > 0));
+    sp_2048_cond_sub_90(a, a, m, 0 - (((a[89] >> 1) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
     sp_2048_norm_90(a);
 }
 
@@ -2402,8 +2494,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_90(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_90(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_90(r, a, b);
     sp_2048_mont_reduce_90(r, m, mp);
@@ -2416,7 +2508,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_90(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_90(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_90(r, a);
@@ -2430,7 +2522,7 @@
  * b  A scalar.
  */
 SP_NOINLINE static void sp_2048_mul_d_180(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int64_t tb = b;
@@ -2491,8 +2583,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 90; i++)
+    for (i = 0; i < 90; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -2523,8 +2616,9 @@
 {
     int i;
 
-    for (i = 0; i < 90; i++)
+    for (i = 0; i < 90; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -2542,113 +2636,67 @@
 {
     int i;
 
-    for (i = 0; i < 90; i++)
+    for (i = 0; i < 90; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
 #endif
 SP_NOINLINE static void sp_2048_rshift_90(sp_digit* r, sp_digit* a, byte n)
 {
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<89; i++)
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<89; i++) {
         r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
-#else
-    r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff;
-    r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff;
-    r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff;
-    r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff;
-    r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff;
-    r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff;
-    r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff;
-    r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff;
-    r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff;
-    r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff;
-    r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff;
-    r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff;
-    r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff;
-    r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff;
-    r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff;
-    r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff;
-    r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff;
-    r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff;
-    r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff;
-    r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff;
-    r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff;
-    r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff;
-    r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff;
-    r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff;
-    r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff;
-    r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff;
-    r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff;
-    r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff;
-    r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff;
-    r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff;
-    r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff;
-    r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff;
-    r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff;
-    r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff;
-    r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff;
-    r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff;
-    r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff;
-    r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff;
-    r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff;
-    r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff;
-    r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
-    r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
-    r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
-    r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff;
-    r[44] = ((a[44] >> n) | (a[45] << (23 - n))) & 0x7fffff;
-    r[45] = ((a[45] >> n) | (a[46] << (23 - n))) & 0x7fffff;
-    r[46] = ((a[46] >> n) | (a[47] << (23 - n))) & 0x7fffff;
-    r[47] = ((a[47] >> n) | (a[48] << (23 - n))) & 0x7fffff;
-    r[48] = ((a[48] >> n) | (a[49] << (23 - n))) & 0x7fffff;
-    r[49] = ((a[49] >> n) | (a[50] << (23 - n))) & 0x7fffff;
-    r[50] = ((a[50] >> n) | (a[51] << (23 - n))) & 0x7fffff;
-    r[51] = ((a[51] >> n) | (a[52] << (23 - n))) & 0x7fffff;
-    r[52] = ((a[52] >> n) | (a[53] << (23 - n))) & 0x7fffff;
-    r[53] = ((a[53] >> n) | (a[54] << (23 - n))) & 0x7fffff;
-    r[54] = ((a[54] >> n) | (a[55] << (23 - n))) & 0x7fffff;
-    r[55] = ((a[55] >> n) | (a[56] << (23 - n))) & 0x7fffff;
-    r[56] = ((a[56] >> n) | (a[57] << (23 - n))) & 0x7fffff;
-    r[57] = ((a[57] >> n) | (a[58] << (23 - n))) & 0x7fffff;
-    r[58] = ((a[58] >> n) | (a[59] << (23 - n))) & 0x7fffff;
-    r[59] = ((a[59] >> n) | (a[60] << (23 - n))) & 0x7fffff;
-    r[60] = ((a[60] >> n) | (a[61] << (23 - n))) & 0x7fffff;
-    r[61] = ((a[61] >> n) | (a[62] << (23 - n))) & 0x7fffff;
-    r[62] = ((a[62] >> n) | (a[63] << (23 - n))) & 0x7fffff;
-    r[63] = ((a[63] >> n) | (a[64] << (23 - n))) & 0x7fffff;
-    r[64] = ((a[64] >> n) | (a[65] << (23 - n))) & 0x7fffff;
-    r[65] = ((a[65] >> n) | (a[66] << (23 - n))) & 0x7fffff;
-    r[66] = ((a[66] >> n) | (a[67] << (23 - n))) & 0x7fffff;
-    r[67] = ((a[67] >> n) | (a[68] << (23 - n))) & 0x7fffff;
-    r[68] = ((a[68] >> n) | (a[69] << (23 - n))) & 0x7fffff;
-    r[69] = ((a[69] >> n) | (a[70] << (23 - n))) & 0x7fffff;
-    r[70] = ((a[70] >> n) | (a[71] << (23 - n))) & 0x7fffff;
-    r[71] = ((a[71] >> n) | (a[72] << (23 - n))) & 0x7fffff;
-    r[72] = ((a[72] >> n) | (a[73] << (23 - n))) & 0x7fffff;
-    r[73] = ((a[73] >> n) | (a[74] << (23 - n))) & 0x7fffff;
-    r[74] = ((a[74] >> n) | (a[75] << (23 - n))) & 0x7fffff;
-    r[75] = ((a[75] >> n) | (a[76] << (23 - n))) & 0x7fffff;
-    r[76] = ((a[76] >> n) | (a[77] << (23 - n))) & 0x7fffff;
-    r[77] = ((a[77] >> n) | (a[78] << (23 - n))) & 0x7fffff;
-    r[78] = ((a[78] >> n) | (a[79] << (23 - n))) & 0x7fffff;
-    r[79] = ((a[79] >> n) | (a[80] << (23 - n))) & 0x7fffff;
-    r[80] = ((a[80] >> n) | (a[81] << (23 - n))) & 0x7fffff;
-    r[81] = ((a[81] >> n) | (a[82] << (23 - n))) & 0x7fffff;
-    r[82] = ((a[82] >> n) | (a[83] << (23 - n))) & 0x7fffff;
-    r[83] = ((a[83] >> n) | (a[84] << (23 - n))) & 0x7fffff;
-    r[84] = ((a[84] >> n) | (a[85] << (23 - n))) & 0x7fffff;
-    r[85] = ((a[85] >> n) | (a[86] << (23 - n))) & 0x7fffff;
-    r[86] = ((a[86] >> n) | (a[87] << (23 - n))) & 0x7fffff;
-    r[87] = ((a[87] >> n) | (a[88] << (23 - n))) & 0x7fffff;
+    }
+#else
+    for (i=0; i<88; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff;
+    }
     r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff;
 #endif
     r[89] = a[89] >> n;
 }
 
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_2048_div_word_90(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 23 bits from d1 and top 8 bits from d0. */
+    d = (d1 << 8) | (d0 >> 15);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 9 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 7) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 17 bits in r */
+    /* Remaining 7 bits from d0. */
+    r <<= 7;
+    d <<= 7;
+    d |= d0 & ((1 << 7) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
@@ -2658,13 +2706,15 @@
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_div_90(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_2048_div_90(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_32
     int64_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[180 + 1], t2d[90 + 1], sdd[90 + 1];
@@ -2674,67 +2724,78 @@
     sp_digit* sd;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
-        t1 = td;
-        t2 = td + 180 + 1;
-        sd = t2 + 90 + 1;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-    sd = sdd;
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
 #endif
 
     (void)m;
 
     if (err == MP_OKAY) {
-        sp_2048_mul_d_90(sd, d, 1 << 22);
-        sp_2048_mul_d_180(t1, a, 1 << 22);
-        div = sd[89];
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 180 + 1;
+        sd = t2 + 90 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_2048_mul_d_90(sd, d, 1L << 22);
+        sp_2048_mul_d_180(t1, a, 1L << 22);
+        dv = sd[89];
         for (i=90; i>=0; i--) {
             t1[90 + i] += t1[90 + i - 1] >> 23;
             t1[90 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
             d1 = t1[90 + i];
             d1 <<= 23;
             d1 += t1[90 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_2048_div_word_90(t1[90 + i], t1[90 + i - 1], dv);
+#endif
 
             sp_2048_mul_d_90(t2, sd, r1);
-            sp_2048_sub_90(&t1[i], &t1[i], t2);
+            (void)sp_2048_sub_90(&t1[i], &t1[i], t2);
             t1[90 + i] -= t2[90];
             t1[90 + i] += t1[90 + i - 1] >> 23;
             t1[90 + i - 1] &= 0x7fffff;
-            r1 = (((-t1[90 + i]) << 23) - t1[90 + i - 1]) / div;
+            r1 = (((-t1[90 + i]) << 23) - t1[90 + i - 1]) / dv;
             r1 -= t1[90 + i];
             sp_2048_mul_d_90(t2, sd, r1);
-            sp_2048_add_90(&t1[i], &t1[i], t2);
+            (void)sp_2048_add_90(&t1[i], &t1[i], t2);
             t1[90 + i] += t1[90 + i - 1] >> 23;
             t1[90 + i - 1] &= 0x7fffff;
         }
         t1[90 - 1] += t1[90 - 2] >> 23;
         t1[90 - 2] &= 0x7fffff;
-        d1 = t1[90 - 1];
-        r1 = (sp_digit)(d1 / div);
+        r1 = t1[90 - 1] / dv;
 
         sp_2048_mul_d_90(t2, sd, r1);
         sp_2048_sub_90(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 90);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 90U);
         for (i=0; i<88; i++) {
             r[i+1] += r[i] >> 23;
             r[i] &= 0x7fffff;
         }
-        sp_2048_cond_add_90(r, r, sd, 0 - (r[89] < 0));
-    }
-
-    sp_2048_rshift_90(r, r, 22);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+        sp_2048_cond_add_90(r, r, sd, 0 - ((r[89] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_2048_norm_90(r);
+        sp_2048_rshift_90(r, r, 22);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -2747,12 +2808,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_mod_90(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_2048_mod_90(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_90(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
  * r     A single precision number that is the result of the operation.
@@ -2762,8 +2824,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_90(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_90(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -2777,11 +2839,12 @@
 
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 90 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 90U * 2U);
 
         norm = t[0] = td;
         t[1] = &td[90 * 2];
@@ -2790,10 +2853,12 @@
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_90(norm, m);
 
-        if (reduceA)
+        if (reduceA != 0) {
             err = sp_2048_mod_90(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 90);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 90U);
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_mul_90(t[1], t[1], norm);
@@ -2806,8 +2871,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -2829,13 +2895,15 @@
 
         sp_2048_mont_reduce_90(t[0], m, mp);
         n = sp_2048_cmp_90(t[0], m);
-        sp_2048_cond_sub_90(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_90(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(*r) * 90 * 2);
 
     }
 
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
@@ -2855,24 +2923,23 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
         t[1] = &td[90 * 2];
         t[2] = &td[2 * 90 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_90(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_90(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_90(t[1], t[1], norm);
@@ -2891,8 +2958,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -2912,13 +2980,15 @@
 
         sp_2048_mont_reduce_90(t[0], m, mp);
         n = sp_2048_cmp_90(t[0], m);
-        sp_2048_cond_sub_90(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_90(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -2940,23 +3010,22 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 180, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
             t[i] = td + i * 180;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_90(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_90(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_90(t[1], t[1], norm);
@@ -3004,10 +3073,12 @@
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 22) / 23) - 1;
         c = bits % 23;
-        if (c == 0)
+        if (c == 0) {
             c = 23;
-        if (i < 90)
+        }
+        if (i < 90) {
             n = e[i--] << (32 - c);
+        }
         else {
             n = 0;
             i--;
@@ -3016,7 +3087,7 @@
             n |= e[i--] << (9 - c);
             c += 23;
         }
-        y = n >> 27;
+        y = (n >> 27) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -3040,57 +3111,23 @@
 
         sp_2048_mont_reduce_90(rt, m, mp);
         n = sp_2048_cmp_90(rt, m);
-        sp_2048_cond_sub_90(rt, rt, m, (n < 0) - 1);
+        sp_2048_cond_sub_90(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
-                                    !defined(RSA_LOW_MEM)
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_2048_mask_45(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<45; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 40; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-    r[40] = a[40] & m;
-    r[41] = a[41] & m;
-    r[42] = a[42] & m;
-    r[43] = a[43] & m;
-    r[44] = a[44] & m;
-#endif
-}
-
-#endif
+    }
+#endif
+
+    return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+       /* WOLFSSL_HAVE_SP_DH */
+
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
  *
@@ -3113,20 +3150,30 @@
     sp_digit* m;
     sp_digit* r;
     sp_digit* norm;
-    sp_digit e[1];
+    sp_digit e[1] = {0};
     sp_digit mp;
     int i;
     int err = MP_OKAY;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 23) {
+            err = MP_READ_E;
+        }
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -3139,14 +3186,16 @@
 
         sp_2048_from_bin(a, 90, in, inLen);
 #if DIGIT_BIT >= 23
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3160,31 +3209,36 @@
         err = sp_2048_mod_90(a, a, m);
     }
     if (err == MP_OKAY) {
-        for (i=22; i>=0; i--)
-            if (e[0] >> i)
-                break;
+        for (i=22; i>=0; i--) {
+            if ((e[0] >> i) != 0) {
+                break;
+            }
+        }
 
         XMEMCPY(r, a, sizeof(sp_digit) * 90 * 2);
         for (i--; i>=0; i--) {
             sp_2048_mont_sqr_90(r, r, m, mp);
 
-            if (((e[0] >> i) & 1) == 1)
+            if (((e[0] >> i) & 1) == 1) {
                 sp_2048_mont_mul_90(r, r, a, m, mp);
+            }
         }
         sp_2048_mont_reduce_90(r, m, mp);
         mp = sp_2048_cmp_90(r, m);
-        sp_2048_cond_sub_90(r, r, m, (mp < 0) - 1);
+        sp_2048_cond_sub_90(r, r, m, ((mp < 0) ?
+                    (sp_digit)1 : (sp_digit)0)- 1);
 
         sp_2048_to_bin(r, out);
         *outLen = 256;
     }
 
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    return err;
-#else
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit ad[180], md[90], rd[180];
 #else
     sp_digit* d = NULL;
@@ -3192,21 +3246,31 @@
     sp_digit* a;
     sp_digit* m;
     sp_digit* r;
-    sp_digit e[1];
-    int err = MP_OKAY;
-
-    if (*outLen < 256)
+    sp_digit e[1] = {0};
+    int err = MP_OKAY;
+
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 23) {
+            err = MP_READ_E;
+        }
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3223,23 +3287,23 @@
     if (err == MP_OKAY) {
         sp_2048_from_bin(a, 90, in, inLen);
 #if DIGIT_BIT >= 23
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_from_mp(m, 90, mm);
 
         if (e[0] == 0x3) {
-            if (err == MP_OKAY) {
-                sp_2048_sqr_90(r, a);
-                err = sp_2048_mod_90(r, r, m);
-            }
+            sp_2048_sqr_90(r, a);
+            err = sp_2048_mod_90(r, r, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_90(r, a, r);
                 err = sp_2048_mod_90(r, r, m);
@@ -3253,26 +3317,28 @@
             sp_2048_mont_setup(m, &mp);
             sp_2048_mont_norm_90(norm, m);
 
-            if (err == MP_OKAY) {
-                sp_2048_mul_90(a, a, norm);
-                err = sp_2048_mod_90(a, a, m);
-            }
+            sp_2048_mul_90(a, a, norm);
+            err = sp_2048_mod_90(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=22; i>=0; i--)
-                    if (e[0] >> i)
+                for (i=22; i>=0; i--) {
+                    if ((e[0] >> i) != 0) {
                         break;
-
-                XMEMCPY(r, a, sizeof(sp_digit) * 180);
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 180U);
                 for (i--; i>=0; i--) {
                     sp_2048_mont_sqr_90(r, r, m, mp);
 
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_2048_mont_mul_90(r, r, a, m, mp);
+                    }
                 }
                 sp_2048_mont_reduce_90(r, m, mp);
                 mp = sp_2048_cmp_90(r, m);
-                sp_2048_cond_sub_90(r, r, m, (mp < 0) - 1);
+                sp_2048_cond_sub_90(r, r, m, ((mp < 0) ?
+                           (sp_digit)1 : (sp_digit)0) - 1);
             }
         }
     }
@@ -3282,15 +3348,19 @@
         *outLen = 256;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif /* WOLFSSL_SP_SMALL */
-}
-
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
 /* RSA private key operation.
  *
  * in      Array of bytes representing the number to exponentiate, base.
@@ -3313,7 +3383,7 @@
     byte* out, word32* outLen)
 {
 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* a;
     sp_digit* d = NULL;
     sp_digit* m;
@@ -3326,21 +3396,31 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+           err = MP_READ_E;
+        }
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = d + 90;
-        m = a + 90;
+        m = a + 180;
         r = a;
 
         sp_2048_from_bin(a, 90, in, inLen);
@@ -3355,7 +3435,7 @@
 
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 90);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
     }
 
     return err;
@@ -3370,11 +3450,20 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+            err = MP_READ_E;
+        }
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         sp_2048_from_bin(a, 90, in, inLen);
@@ -3393,7 +3482,7 @@
     return err;
 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
 #else
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* t = NULL;
     sp_digit* a;
     sp_digit* p;
@@ -3401,7 +3490,6 @@
     sp_digit* dp;
     sp_digit* dq;
     sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
     sp_digit* r;
@@ -3410,16 +3498,24 @@
     (void)dm;
     (void)mm;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 45 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (t == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = t;
@@ -3429,8 +3525,7 @@
         tmpa = qi + 45;
         tmpb = tmpa + 90;
 
-        tmp = t;
-        r = tmp + 90;
+        r = t + 90;
 
         sp_2048_from_bin(a, 90, in, inLen);
         sp_2048_from_mp(p, 45, pm);
@@ -3443,9 +3538,9 @@
         err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1);
     }
     if (err == MP_OKAY) {
-        sp_2048_sub_45(tmpa, tmpa, tmpb);
-        sp_2048_mask_45(tmp, p, tmpa[44] >> 31);
-        sp_2048_add_45(tmpa, tmpa, tmp);
+        (void)sp_2048_sub_45(tmpa, tmpa, tmpb);
+        sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
+        sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
 
         sp_2048_from_mp(qi, 45, qim);
         sp_2048_mul_45(tmpa, tmpa, qi);
@@ -3454,7 +3549,7 @@
 
     if (err == MP_OKAY) {
         sp_2048_mul_45(tmpa, q, tmpa);
-        sp_2048_add_90(r, tmpb, tmpa);
+        (void)sp_2048_add_90(r, tmpb, tmpa);
         sp_2048_norm_90(r);
 
         sp_2048_to_bin(r, out);
@@ -3463,24 +3558,31 @@
 
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 45 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
     }
 
     return err;
 #else
     sp_digit a[90 * 2];
     sp_digit p[45], q[45], dp[45], dq[45], qi[45];
-    sp_digit tmp[90], tmpa[90], tmpb[90];
+    sp_digit tmpa[90], tmpb[90];
     sp_digit* r = a;
     int err = MP_OKAY;
 
     (void)dm;
     (void)mm;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         sp_2048_from_bin(a, 90, in, inLen);
@@ -3492,20 +3594,21 @@
 
         err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1);
-
-    if (err == MP_OKAY) {
-        sp_2048_sub_45(tmpa, tmpa, tmpb);
-        sp_2048_mask_45(tmp, p, tmpa[44] >> 31);
-        sp_2048_add_45(tmpa, tmpa, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        (void)sp_2048_sub_45(tmpa, tmpa, tmpb);
+        sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
+        sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
         sp_2048_mul_45(tmpa, tmpa, qi);
         err = sp_2048_mod_45(tmpa, tmpa, p);
     }
 
     if (err == MP_OKAY) {
         sp_2048_mul_45(tmpa, tmpa, q);
-        sp_2048_add_90(r, tmpb, tmpa);
+        (void)sp_2048_add_90(r, tmpb, tmpa);
         sp_2048_norm_90(r);
 
         sp_2048_to_bin(r, out);
@@ -3525,19 +3628,21 @@
 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 }
 
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_2048_to_mp(sp_digit* a, mp_int* r)
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 23
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 90);
         r->used = 90;
@@ -3547,14 +3652,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 90; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 23) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 23 - s;
         }
@@ -3567,15 +3677,16 @@
         for (i = 0; i < 90; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 23 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 32
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 23 - s;
             }
-            else
+            else {
                 s += 23;
+            }
         }
         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -3591,7 +3702,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -3605,16 +3716,27 @@
     sp_digit* r;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3635,8 +3757,8 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 90);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
@@ -3652,15 +3774,25 @@
     int err = MP_OKAY;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -3689,16 +3821,327 @@
         err = sp_2048_to_mp(r, res);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 90);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+#endif
+
+    return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+SP_NOINLINE static void sp_2048_lshift_90(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    r[90] = a[89] >> (23 - n);
+    for (i=89; i>0; i--) {
+        r[i] = ((a[i] << n) | (a[i-1] >> (23 - n))) & 0x7fffff;
+    }
+#else
+    sp_int_digit s, t;
+
+    s = (sp_int_digit)a[89];
+    r[90] = s >> (23U - n);
+    s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]);
+    r[89] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]);
+    r[88] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]);
+    r[87] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]);
+    r[86] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]);
+    r[85] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]);
+    r[84] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]);
+    r[83] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]);
+    r[82] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]);
+    r[81] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]);
+    r[80] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]);
+    r[79] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]);
+    r[78] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+    r[77] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+    r[76] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+    r[75] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+    r[74] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+    r[73] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+    r[72] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+    r[71] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+    r[70] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+    r[69] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+    r[68] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+    r[67] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+    r[66] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+    r[65] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+    r[64] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+    r[63] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+    r[62] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+    r[61] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+    r[60] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+    r[59] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+    r[58] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+    r[57] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+    r[56] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+    r[55] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+    r[54] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+    r[53] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+    r[52] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+    r[51] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+    r[50] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+    r[49] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+    r[48] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+    r[47] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+    r[46] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+    r[45] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+    r[44] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+    r[43] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+    r[42] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+    r[41] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+    r[40] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+    r[39] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+    r[38] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+    r[37] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+    r[36] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+    r[35] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+    r[34] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+    r[33] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+    r[32] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+    r[31] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+    r[30] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+    r[29] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+    r[28] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+    r[27] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+    r[26] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+    r[25] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+    r[24] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+    r[23] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+    r[22] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+    r[21] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+    r[20] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+    r[19] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+    r[18] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+    r[17] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+    r[16] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+    r[15] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+    r[14] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+    r[13] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+    r[12] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+    r[11] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+    r[10] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+    r[9] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+    r[8] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+    r[7] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+    r[6] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+    r[5] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+    r[4] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+    r[3] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+    r[2] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+    r[1] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+#endif
+    r[0] = (a[0] << n) & 0x7fffff;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_90(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[180];
+    sp_digit td[91];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 271, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 180;
+        XMEMSET(td, 0, sizeof(sp_digit) * 271);
+#else
+        norm = nd;
+        tmp  = td;
+        XMEMSET(td, 0, sizeof(td));
+#endif
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_90(norm, m);
+
+        bits = ((bits + 3) / 4) * 4;
+        i = ((bits + 22) / 23) - 1;
+        c = bits % 23;
+        if (c == 0) {
+            c = 23;
+        }
+        if (i < 90) {
+            n = e[i--] << (32 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 4) {
+            n |= e[i--] << (9 - c);
+            c += 23;
+        }
+        y = (n >> 28) & 0xf;
+        n <<= 4;
+        c -= 4;
+        sp_2048_lshift_90(r, norm, y);
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= e[i--] << (9 - c);
+                c += 23;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_2048_mont_sqr_90(r, r, m, mp);
+            sp_2048_mont_sqr_90(r, r, m, mp);
+            sp_2048_mont_sqr_90(r, r, m, mp);
+            sp_2048_mont_sqr_90(r, r, m, mp);
+
+            sp_2048_lshift_90(r, r, y);
+            sp_2048_mul_d_90(tmp, norm, (r[90] << 22) + (r[89] >> 1));
+            r[90] = 0;
+            r[89] &= 0x1L;
+            (void)sp_2048_add_90(r, r, tmp);
+            sp_2048_norm_90(r);
+            o = sp_2048_cmp_90(r, m);
+            sp_2048_cond_sub_90(r, r, m, ((o < 0) ?
+                                          (sp_digit)1 : (sp_digit)0) - 1);
+        }
+
+        sp_2048_mont_reduce_90(r, m, mp);
+        n = sp_2048_cmp_90(r, m);
+        sp_2048_cond_sub_90(r, r, m, ((n < 0) ?
+                                                (sp_digit)1 : (sp_digit)0) - 1);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+#endif /* HAVE_FFDHE_2048 */
 
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
@@ -3709,7 +4152,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 256 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
@@ -3724,16 +4167,27 @@
     sp_digit* r;
     word32 i;
 
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expLen > 256) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3746,7 +4200,14 @@
         sp_2048_from_bin(e, 90, exp, expLen);
         sp_2048_from_mp(m, 90, mod);
 
-        err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2 &&
+                ((m[89] << 15) | (m[88] >> 8)) == 0xffffL) {
+            err = sp_2048_mod_exp_2_90(r, e, expLen * 8, m);
+        }
+        else
+    #endif
+            err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
     }
 
     if (err == MP_OKAY) {
@@ -3759,8 +4220,8 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 90);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
@@ -3776,15 +4237,24 @@
     word32 i;
     int err = MP_OKAY;
 
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expLen > 256U) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -3806,113 +4276,283 @@
         sp_2048_from_bin(e, 90, exp, expLen);
         sp_2048_from_mp(m, 90, mod);
 
-        err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2U &&
+                ((m[89] << 15) | (m[88] >> 8)) == 0xffffL) {
+            err = sp_2048_mod_exp_2_90(r, e, expLen * 8U, m);
+        }
+        else {
+    #endif
+            err = sp_2048_mod_exp_90(r, b, e, expLen * 8U, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        }
+    #endif
     }
 
     if (err == MP_OKAY) {
         sp_2048_to_bin(r, out);
         *outLen = 256;
-        for (i=0; i<256 && out[i] == 0; i++) {
+        for (i=0; i<256U && out[i] == 0U; i++) {
         }
         *outLen -= i;
         XMEMMOVE(out, out + i, *outLen);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 90);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+#endif
+
+    return err;
+#endif
+}
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_2048 */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 45 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 45 * 2;
+        m = e + 45;
+        r = b;
+
+        sp_2048_from_mp(b, 45, base);
+        sp_2048_from_mp(e, 45, exp);
+        sp_2048_from_mp(m, 45, mod);
+
+        err = sp_2048_mod_exp_45(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 45, 0, sizeof(*r) * 45U);
+        err = sp_2048_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 45U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[90], ed[45], md[45];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 45 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 45 * 2;
+        m = e + 45;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 45, base);
+        sp_2048_from_mp(e, 45, exp);
+        sp_2048_from_mp(m, 45, mod);
+
+        err = sp_2048_mod_exp_45(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 45, 0, sizeof(*r) * 45U);
+        err = sp_2048_to_mp(r, res);
+    }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 45U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 45U);
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
 
 #ifndef WOLFSSL_SP_NO_3072
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 15) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 15U) {
             r[j] &= 0x7fffff;
-            s = 23 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+            s = 23U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 23
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 23
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0x7fffff;
-        s = 23 - s;
-        if (j + 1 >= max)
+        s = 23U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 23 <= DIGIT_BIT) {
-            s += 23;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 23U) <= (word32)DIGIT_BIT) {
+            s += 23U;
             r[j] &= 0x7fffff;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 23) {
             r[j] &= 0x7fffff;
-            if (j + 1 >= max)
-                break;
+            if (j + 1 >= size) {
+                break;
+            }
             s = 23 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -3923,16 +4563,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 384
  *
  * r  A single precision integer.
@@ -3942,27 +4584,34 @@
 {
     int i, j, s = 0, b;
 
-    for (i=0; i<135; i++) {
+    for (i=0; i<133; i++) {
         r[i+1] += r[i] >> 23;
         r[i] &= 0x7fffff;
     }
     j = 3072 / 8 - 1;
     a[j] = 0;
-    for (i=0; i<136 && j>=0; i++) {
+    for (i=0; i<134 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 23) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
         }
         s = 8 - (b - 23);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -3973,22 +4622,23 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static void sp_3072_mul_17(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_67(sp_digit* r, const sp_digit* a,
     const sp_digit* b)
 {
     int i, j;
-    int64_t t[34];
+    int64_t t[134];
 
     XMEMSET(t, 0, sizeof(t));
-    for (i=0; i<17; i++) {
-        for (j=0; j<17; j++)
+    for (i=0; i<67; i++) {
+        for (j=0; j<67; j++) {
             t[i+j] += ((int64_t)a[i]) * b[j];
-    }
-    for (i=0; i<33; i++) {
+        }
+    }
+    for (i=0; i<133; i++) {
         r[i] = t[i] & 0x7fffff;
         t[i+1] += t[i] >> 23;
     }
-    r[33] = (sp_digit)t[33];
+    r[133] = (sp_digit)t[133];
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -3996,48 +4646,23 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-SP_NOINLINE static void sp_3072_sqr_17(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_3072_sqr_67(sp_digit* r, const sp_digit* a)
 {
     int i, j;
-    int64_t t[34];
+    int64_t t[134];
 
     XMEMSET(t, 0, sizeof(t));
-    for (i=0; i<17; i++) {
-        for (j=0; j<i; j++)
+    for (i=0; i<67; i++) {
+        for (j=0; j<i; j++) {
             t[i+j] += (((int64_t)a[i]) * a[j]) * 2;
+        }
         t[i+i] += ((int64_t)a[i]) * a[i];
     }
-    for (i=0; i<33; i++) {
+    for (i=0; i<133; i++) {
         r[i] = t[i] & 0x7fffff;
         t[i+1] += t[i] >> 23;
     }
-    r[33] = (sp_digit)t[33];
-}
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_add_17(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 16; i += 8) {
-        r[i + 0] = a[i + 0] + b[i + 0];
-        r[i + 1] = a[i + 1] + b[i + 1];
-        r[i + 2] = a[i + 2] + b[i + 2];
-        r[i + 3] = a[i + 3] + b[i + 3];
-        r[i + 4] = a[i + 4] + b[i + 4];
-        r[i + 5] = a[i + 5] + b[i + 5];
-        r[i + 6] = a[i + 6] + b[i + 6];
-        r[i + 7] = a[i + 7] + b[i + 7];
-    }
-    r[16] = a[16] + b[16];
-
-    return 0;
+    r[133] = (sp_digit)t[133];
 }
 
 /* Add b to a into r. (r = a + b)
@@ -4046,105 +4671,7 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static int sp_3072_add_34(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 32; i += 8) {
-        r[i + 0] = a[i + 0] + b[i + 0];
-        r[i + 1] = a[i + 1] + b[i + 1];
-        r[i + 2] = a[i + 2] + b[i + 2];
-        r[i + 3] = a[i + 3] + b[i + 3];
-        r[i + 4] = a[i + 4] + b[i + 4];
-        r[i + 5] = a[i + 5] + b[i + 5];
-        r[i + 6] = a[i + 6] + b[i + 6];
-        r[i + 7] = a[i + 7] + b[i + 7];
-    }
-    r[32] = a[32] + b[32];
-    r[33] = a[33] + b[33];
-
-    return 0;
-}
-
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_sub_34(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 32; i += 8) {
-        r[i + 0] = a[i + 0] - b[i + 0];
-        r[i + 1] = a[i + 1] - b[i + 1];
-        r[i + 2] = a[i + 2] - b[i + 2];
-        r[i + 3] = a[i + 3] - b[i + 3];
-        r[i + 4] = a[i + 4] - b[i + 4];
-        r[i + 5] = a[i + 5] - b[i + 5];
-        r[i + 6] = a[i + 6] - b[i + 6];
-        r[i + 7] = a[i + 7] - b[i + 7];
-    }
-    r[32] = a[32] - b[32];
-    r[33] = a[33] - b[33];
-
-    return 0;
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_34(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[34];
-    sp_digit* a1 = z1;
-    sp_digit b1[17];
-    sp_digit* z2 = r + 34;
-    sp_3072_add_17(a1, a, &a[17]);
-    sp_3072_add_17(b1, b, &b[17]);
-    sp_3072_mul_17(z2, &a[17], &b[17]);
-    sp_3072_mul_17(z0, a, b);
-    sp_3072_mul_17(z1, a1, b1);
-    sp_3072_sub_34(z1, z1, z2);
-    sp_3072_sub_34(z1, z1, z0);
-    sp_3072_add_34(r + 17, r + 17, z1);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_34(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[34];
-    sp_digit* a1 = z1;
-    sp_digit* z2 = r + 34;
-    sp_3072_add_17(a1, a, &a[17]);
-    sp_3072_sqr_17(z2, &a[17]);
-    sp_3072_sqr_17(z0, a);
-    sp_3072_sqr_17(z1, a1);
-    sp_3072_sub_34(z1, z1, z2);
-    sp_3072_sub_34(z1, z1, z0);
-    sp_3072_add_34(r + 17, r + 17, z1);
-}
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     int i;
@@ -4162,7 +4689,37 @@
     r[64] = a[64] + b[64];
     r[65] = a[65] + b[65];
     r[66] = a[66] + b[66];
-    r[67] = a[67] + b[67];
+
+    return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[128] = a[128] + b[128];
+    r[129] = a[129] + b[129];
+    r[130] = a[130] + b[130];
+    r[131] = a[131] + b[131];
+    r[132] = a[132] + b[132];
+    r[133] = a[133] + b[133];
 
     return 0;
 }
@@ -4173,7 +4730,234 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
+        r[i + 0] = a[i + 0] - b[i + 0];
+        r[i + 1] = a[i + 1] - b[i + 1];
+        r[i + 2] = a[i + 2] - b[i + 2];
+        r[i + 3] = a[i + 3] - b[i + 3];
+        r[i + 4] = a[i + 4] - b[i + 4];
+        r[i + 5] = a[i + 5] - b[i + 5];
+        r[i + 6] = a[i + 6] - b[i + 6];
+        r[i + 7] = a[i + 7] - b[i + 7];
+    }
+    r[128] = a[128] - b[128];
+    r[129] = a[129] - b[129];
+    r[130] = a[130] - b[130];
+    r[131] = a[131] - b[131];
+    r[132] = a[132] - b[132];
+    r[133] = a[133] - b[133];
+
+    return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_134(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[134];
+    sp_digit* a1 = z1;
+    sp_digit b1[67];
+    sp_digit* z2 = r + 134;
+    (void)sp_3072_add_67(a1, a, &a[67]);
+    (void)sp_3072_add_67(b1, b, &b[67]);
+    sp_3072_mul_67(z2, &a[67], &b[67]);
+    sp_3072_mul_67(z0, a, b);
+    sp_3072_mul_67(z1, a1, b1);
+    (void)sp_3072_sub_134(z1, z1, z2);
+    (void)sp_3072_sub_134(z1, z1, z0);
+    (void)sp_3072_add_134(r + 67, r + 67, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_134(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[134];
+    sp_digit* a1 = z1;
+    sp_digit* z2 = r + 134;
+    (void)sp_3072_add_67(a1, a, &a[67]);
+    sp_3072_sqr_67(z2, &a[67]);
+    sp_3072_sqr_67(z0, a);
+    sp_3072_sqr_67(z1, a1);
+    (void)sp_3072_sub_134(z1, z1, z2);
+    (void)sp_3072_sub_134(z1, z1, z0);
+    (void)sp_3072_add_134(r + 67, r + 67, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 134; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 134; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_134(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[133]) * b[133];
+    r[267] = (sp_digit)(c >> 23);
+    c = (c & 0x7fffff) << 23;
+    for (k = 265; k >= 0; k--) {
+        for (i = 133; i >= 0; i--) {
+            j = k - i;
+            if (j >= 134) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 46;
+        r[k + 1] = (c >> 23) & 0x7fffff;
+        c = (c & 0x7fffff) << 23;
+    }
+    r[0] = (sp_digit)(c >> 23);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_134(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[133]) * a[133];
+    r[267] = (sp_digit)(c >> 23);
+    c = (c & 0x7fffff) << 23;
+    for (k = 265; k >= 0; k--) {
+        for (i = 133; i >= 0; i--) {
+            j = k - i;
+            if (j >= 134 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int64_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 46;
+        r[k + 1] = (c >> 23) & 0x7fffff;
+        c = (c & 0x7fffff) << 23;
+    }
+    r[0] = (sp_digit)(c >> 23);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 67; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_67(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 67; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_67(sp_digit* r, const sp_digit* a,
         const sp_digit* b)
 {
     int i;
@@ -4191,182 +4975,6 @@
     r[64] = a[64] - b[64];
     r[65] = a[65] - b[65];
     r[66] = a[66] - b[66];
-    r[67] = a[67] - b[67];
-
-    return 0;
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[68];
-    sp_digit* a1 = z1;
-    sp_digit b1[34];
-    sp_digit* z2 = r + 68;
-    sp_3072_add_34(a1, a, &a[34]);
-    sp_3072_add_34(b1, b, &b[34]);
-    sp_3072_mul_34(z2, &a[34], &b[34]);
-    sp_3072_mul_34(z0, a, b);
-    sp_3072_mul_34(z1, a1, b1);
-    sp_3072_sub_68(z1, z1, z2);
-    sp_3072_sub_68(z1, z1, z0);
-    sp_3072_add_68(r + 34, r + 34, z1);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[68];
-    sp_digit* a1 = z1;
-    sp_digit* z2 = r + 68;
-    sp_3072_add_34(a1, a, &a[34]);
-    sp_3072_sqr_34(z2, &a[34]);
-    sp_3072_sqr_34(z0, a);
-    sp_3072_sqr_34(z1, a1);
-    sp_3072_sub_68(z1, z1, z2);
-    sp_3072_sub_68(z1, z1, z0);
-    sp_3072_add_68(r + 34, r + 34, z1);
-}
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 136; i += 8) {
-        r[i + 0] = a[i + 0] + b[i + 0];
-        r[i + 1] = a[i + 1] + b[i + 1];
-        r[i + 2] = a[i + 2] + b[i + 2];
-        r[i + 3] = a[i + 3] + b[i + 3];
-        r[i + 4] = a[i + 4] + b[i + 4];
-        r[i + 5] = a[i + 5] + b[i + 5];
-        r[i + 6] = a[i + 6] + b[i + 6];
-        r[i + 7] = a[i + 7] + b[i + 7];
-    }
-
-    return 0;
-}
-
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 136; i += 8) {
-        r[i + 0] = a[i + 0] - b[i + 0];
-        r[i + 1] = a[i + 1] - b[i + 1];
-        r[i + 2] = a[i + 2] - b[i + 2];
-        r[i + 3] = a[i + 3] - b[i + 3];
-        r[i + 4] = a[i + 4] - b[i + 4];
-        r[i + 5] = a[i + 5] - b[i + 5];
-        r[i + 6] = a[i + 6] - b[i + 6];
-        r[i + 7] = a[i + 7] - b[i + 7];
-    }
-
-    return 0;
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[136];
-    sp_digit* a1 = z1;
-    sp_digit b1[68];
-    sp_digit* z2 = r + 136;
-    sp_3072_add_68(a1, a, &a[68]);
-    sp_3072_add_68(b1, b, &b[68]);
-    sp_3072_mul_68(z2, &a[68], &b[68]);
-    sp_3072_mul_68(z0, a, b);
-    sp_3072_mul_68(z1, a1, b1);
-    sp_3072_sub_136(z1, z1, z2);
-    sp_3072_sub_136(z1, z1, z0);
-    sp_3072_add_136(r + 68, r + 68, z1);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[136];
-    sp_digit* a1 = z1;
-    sp_digit* z2 = r + 136;
-    sp_3072_add_68(a1, a, &a[68]);
-    sp_3072_sqr_68(z2, &a[68]);
-    sp_3072_sqr_68(z0, a);
-    sp_3072_sqr_68(z1, a1);
-    sp_3072_sub_136(z1, z1, z2);
-    sp_3072_sub_136(z1, z1, z0);
-    sp_3072_add_136(r + 68, r + 68, z1);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
-#ifdef WOLFSSL_SP_SMALL
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 136; i++)
-        r[i] = a[i] + b[i];
-
-    return 0;
-}
-#endif /* WOLFSSL_SP_SMALL */
-#ifdef WOLFSSL_SP_SMALL
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 136; i++)
-        r[i] = a[i] - b[i];
 
     return 0;
 }
@@ -4379,22 +4987,24 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_67(sp_digit* r, const sp_digit* a,
     const sp_digit* b)
 {
     int i, j, k;
     int64_t c;
 
-    c = ((int64_t)a[135]) * b[135];
-    r[271] = (sp_digit)(c >> 23);
+    c = ((int64_t)a[66]) * b[66];
+    r[133] = (sp_digit)(c >> 23);
     c = (c & 0x7fffff) << 23;
-    for (k = 269; k >= 0; k--) {
-        for (i = 135; i >= 0; i--) {
+    for (k = 131; k >= 0; k--) {
+        for (i = 66; i >= 0; i--) {
             j = k - i;
-            if (j >= 136)
-                break;
-            if (j < 0)
+            if (j >= 67) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * b[j];
         }
@@ -4410,26 +5020,29 @@
  * r  A single precision integer.
  * a  A single precision integer.
  */
-SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_3072_sqr_67(sp_digit* r, const sp_digit* a)
 {
     int i, j, k;
     int64_t c;
 
-    c = ((int64_t)a[135]) * a[135];
-    r[271] = (sp_digit)(c >> 23);
+    c = ((int64_t)a[66]) * a[66];
+    r[133] = (sp_digit)(c >> 23);
     c = (c & 0x7fffff) << 23;
-    for (k = 269; k >= 0; k--) {
-        for (i = 135; i >= 0; i--) {
+    for (k = 131; k >= 0; k--) {
+        for (i = 66; i >= 0; i--) {
             j = k - i;
-            if (j >= 136 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 67 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int64_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 46;
         r[k + 1] = (c >> 23) & 0x7fffff;
@@ -4439,119 +5052,14 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-#ifdef WOLFSSL_SP_SMALL
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 68; i++)
-        r[i] = a[i] + b[i];
-
-    return 0;
-}
-#endif /* WOLFSSL_SP_SMALL */
-#ifdef WOLFSSL_SP_SMALL
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 68; i++)
-        r[i] = a[i] - b[i];
-
-    return 0;
-}
-
-#endif /* WOLFSSL_SP_SMALL */
-#ifdef WOLFSSL_SP_SMALL
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    int i, j, k;
-    int64_t c;
-
-    c = ((int64_t)a[67]) * b[67];
-    r[135] = (sp_digit)(c >> 23);
-    c = (c & 0x7fffff) << 23;
-    for (k = 133; k >= 0; k--) {
-        for (i = 67; i >= 0; i--) {
-            j = k - i;
-            if (j >= 68)
-                break;
-            if (j < 0)
-                continue;
-
-            c += ((int64_t)a[i]) * b[j];
-        }
-        r[k + 2] += c >> 46;
-        r[k + 1] = (c >> 23) & 0x7fffff;
-        c = (c & 0x7fffff) << 23;
-    }
-    r[0] = (sp_digit)(c >> 23);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a)
-{
-    int i, j, k;
-    int64_t c;
-
-    c = ((int64_t)a[67]) * a[67];
-    r[135] = (sp_digit)(c >> 23);
-    c = (c & 0x7fffff) << 23;
-    for (k = 133; k >= 0; k--) {
-        for (i = 67; i >= 0; i--) {
-            j = k - i;
-            if (j >= 68 || i <= j)
-                break;
-            if (j < 0)
-                continue;
-
-            c += ((int64_t)a[i]) * a[j] * 2;
-        }
-        if (i == j)
-           c += ((int64_t)a[i]) * a[i];
-
-        r[k + 2] += c >> 46;
-        r[k + 1] = (c >> 23) & 0x7fffff;
-        c = (c & 0x7fffff) << 23;
-    }
-    r[0] = (sp_digit)(c >> 23);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -4566,21 +5074,80 @@
     *rho = (1L << 23) - x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_134(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 134; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x7fffff;
+        t >>= 23;
+    }
+    r[134] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+    for (i = 0; i < 128; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+    }
+    t[1] = tb * a[129];
+    r[129] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+    t[2] = tb * a[130];
+    r[130] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+    t[3] = tb * a[131];
+    r[131] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+    t[4] = tb * a[132];
+    r[132] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+    t[5] = tb * a[133];
+    r[133] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+    r[134] =  (sp_digit)(t[5] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_68(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_67(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<67; i++)
+    for (i=0; i<66; i++) {
         r[i] = 0x7fffff;
+    }
 #else
     int i;
 
@@ -4596,12 +5163,11 @@
     }
     r[64] = 0x7fffff;
     r[65] = 0x7fffff;
-    r[66] = 0x7fffff;
-#endif
-    r[67] = 0x3ffffl;
+#endif
+    r[66] = 0x3ffffL;
 
     /* r = (2^n - 1) mod n */
-    sp_3072_sub_68(r, r, m);
+    (void)sp_3072_sub_67(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -4614,30 +5180,30 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static sp_digit sp_3072_cmp_68(const sp_digit* a, const sp_digit* b)
+static sp_digit sp_3072_cmp_67(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = 0;
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=67; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[67] - b[67]) & (0 - !r);
-    r |= (a[66] - b[66]) & (0 - !r);
-    r |= (a[65] - b[65]) & (0 - !r);
-    r |= (a[64] - b[64]) & (0 - !r);
+    for (i=66; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[66] - b[66]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[65] - b[65]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[64] - b[64]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 56; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -4652,14 +5218,15 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a,
+static void sp_3072_cond_sub_67(sp_digit* r, const sp_digit* a,
         const sp_digit* b, const sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 68; i++)
+    for (i = 0; i < 67; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -4676,7 +5243,6 @@
     r[64] = a[64] - (b[64] & m);
     r[65] = a[65] - (b[65] & m);
     r[66] = a[66] - (b[66] & m);
-    r[67] = a[67] - (b[67] & m);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -4686,7 +5252,7 @@
  * a  A single precision integer.
  * b  A scalar.
  */
-SP_NOINLINE static void sp_3072_mul_add_68(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_add_67(sp_digit* r, const sp_digit* a,
         const sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
@@ -4694,40 +5260,39 @@
     int64_t t = 0;
     int i;
 
-    for (i = 0; i < 68; i++) {
+    for (i = 0; i < 67; i++) {
         t += (tb * a[i]) + r[i];
         r[i] = t & 0x7fffff;
         t >>= 23;
     }
-    r[68] += t;
+    r[67] += t;
 #else
     int64_t tb = b;
     int64_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
     for (i = 0; i < 64; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
+        r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
+        r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
+        r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
+        r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
+        r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
+        r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
+        r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
-    }
-    t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff);
-    t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff);
-    t[3] = tb * a[67]; r[67] += (t[2] >> 23) + (t[3] & 0x7fffff);
-    r[68] +=  t[3] >> 23;
+        r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+    }
+    t[1] = tb * a[65]; r[65] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+    t[2] = tb * a[66]; r[66] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+    r[67] +=  (sp_digit)(t[2] >> 23);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -4735,11 +5300,11 @@
  *
  * a  Array of sp_digit to normalize.
  */
-static void sp_3072_norm_68(sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    for (i = 0; i < 67; i++) {
+static void sp_3072_norm_67(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 66; i++) {
         a[i+1] += a[i] >> 23;
         a[i] &= 0x7fffff;
     }
@@ -4760,8 +5325,6 @@
     a[64] &= 0x7fffff;
     a[65+1] += a[65] >> 23;
     a[65] &= 0x7fffff;
-    a[66+1] += a[66] >> 23;
-    a[66] &= 0x7fffff;
 #endif
 }
 
@@ -4770,54 +5333,52 @@
  * r  A single precision number.
  * a  A single precision number.
  */
-static void sp_3072_mont_shift_68(sp_digit* r, const sp_digit* a)
+static void sp_3072_mont_shift_67(sp_digit* r, const sp_digit* a)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
     sp_digit n, s;
 
-    s = a[68];
-    n = a[67] >> 18;
-    for (i = 0; i < 67; i++) {
+    s = a[67];
+    n = a[66] >> 18;
+    for (i = 0; i < 66; i++) {
         n += (s & 0x7fffff) << 5;
         r[i] = n & 0x7fffff;
         n >>= 23;
-        s = a[69 + i] + (s >> 23);
+        s = a[68 + i] + (s >> 23);
     }
     n += s << 5;
-    r[67] = n;
+    r[66] = n;
 #else
     sp_digit n, s;
     int i;
 
-    s = a[68]; n = a[67] >> 18;
+    s = a[67]; n = a[66] >> 18;
     for (i = 0; i < 64; i += 8) {
         n += (s & 0x7fffff) << 5; r[i+0] = n & 0x7fffff;
-        n >>= 23; s = a[i+69] + (s >> 23);
+        n >>= 23; s = a[i+68] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+1] = n & 0x7fffff;
-        n >>= 23; s = a[i+70] + (s >> 23);
+        n >>= 23; s = a[i+69] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+2] = n & 0x7fffff;
-        n >>= 23; s = a[i+71] + (s >> 23);
+        n >>= 23; s = a[i+70] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+3] = n & 0x7fffff;
-        n >>= 23; s = a[i+72] + (s >> 23);
+        n >>= 23; s = a[i+71] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+4] = n & 0x7fffff;
-        n >>= 23; s = a[i+73] + (s >> 23);
+        n >>= 23; s = a[i+72] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+5] = n & 0x7fffff;
-        n >>= 23; s = a[i+74] + (s >> 23);
+        n >>= 23; s = a[i+73] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+6] = n & 0x7fffff;
-        n >>= 23; s = a[i+75] + (s >> 23);
+        n >>= 23; s = a[i+74] + (s >> 23);
         n += (s & 0x7fffff) << 5; r[i+7] = n & 0x7fffff;
-        n >>= 23; s = a[i+76] + (s >> 23);
+        n >>= 23; s = a[i+75] + (s >> 23);
     }
     n += (s & 0x7fffff) << 5; r[64] = n & 0x7fffff;
-    n >>= 23; s = a[133] + (s >> 23);
+    n >>= 23; s = a[132] + (s >> 23);
     n += (s & 0x7fffff) << 5; r[65] = n & 0x7fffff;
-    n >>= 23; s = a[134] + (s >> 23);
-    n += (s & 0x7fffff) << 5; r[66] = n & 0x7fffff;
-    n >>= 23; s = a[135] + (s >> 23);
-    n += s << 5;              r[67] = n;
-#endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[68], 0, sizeof(*r) * 68);
+    n >>= 23; s = a[133] + (s >> 23);
+    n += s << 5;              r[66] = n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[67], 0, sizeof(*r) * 67U);
 }
 
 /* Reduce the number back to 3072 bits using Montgomery reduction.
@@ -4826,24 +5387,27 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_3072_mont_reduce_68(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_3072_mont_reduce_67(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
-    for (i=0; i<67; i++) {
+    sp_3072_norm_67(a + 67);
+
+    for (i=0; i<66; i++) {
         mu = (a[i] * mp) & 0x7fffff;
-        sp_3072_mul_add_68(a+i, m, mu);
+        sp_3072_mul_add_67(a+i, m, mu);
         a[i+1] += a[i] >> 23;
     }
-    mu = (a[i] * mp) & 0x3ffffl;
-    sp_3072_mul_add_68(a+i, m, mu);
+    mu = (a[i] * mp) & 0x3ffffL;
+    sp_3072_mul_add_67(a+i, m, mu);
     a[i+1] += a[i] >> 23;
     a[i] &= 0x7fffff;
 
-    sp_3072_mont_shift_68(a, a);
-    sp_3072_cond_sub_68(a, a, m, 0 - ((a[67] >> 18) > 0));
-    sp_3072_norm_68(a);
+    sp_3072_mont_shift_67(a, a);
+    sp_3072_cond_sub_67(a, a, m, 0 - (((a[66] >> 18) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_3072_norm_67(a);
 }
 
 /* Multiply two Montogmery form numbers mod the modulus (prime).
@@ -4855,11 +5419,11 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_68(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_3072_mul_68(r, a, b);
-    sp_3072_mont_reduce_68(r, m, mp);
+static void sp_3072_mont_mul_67(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_3072_mul_67(r, a, b);
+    sp_3072_mont_reduce_67(r, m, mp);
 }
 
 /* Square the Montgomery form number. (r = a * a mod m)
@@ -4869,11 +5433,11 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_68(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_67(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
-    sp_3072_sqr_68(r, a);
-    sp_3072_mont_reduce_68(r, m, mp);
+    sp_3072_sqr_67(r, a);
+    sp_3072_mont_reduce_67(r, m, mp);
 }
 
 /* Multiply a by scalar b into r. (r = a * b)
@@ -4882,20 +5446,20 @@
  * a  A single precision integer.
  * b  A scalar.
  */
-SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+SP_NOINLINE static void sp_3072_mul_d_67(sp_digit* r, const sp_digit* a,
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int64_t tb = b;
     int64_t t = 0;
     int i;
 
-    for (i = 0; i < 68; i++) {
+    for (i = 0; i < 67; i++) {
         t += tb * a[i];
         r[i] = t & 0x7fffff;
         t >>= 23;
     }
-    r[68] = (sp_digit)t;
+    r[67] = (sp_digit)t;
 #else
     int64_t tb = b;
     int64_t t[8];
@@ -4924,9 +5488,7 @@
     r[65] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
     t[2] = tb * a[66];
     r[66] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
-    t[3] = tb * a[67];
-    r[67] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
-    r[68] =  (sp_digit)(t[3] >> 23);
+    r[67] =  (sp_digit)(t[2] >> 23);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -4938,14 +5500,15 @@
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
-static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a,
+static void sp_3072_cond_add_67(sp_digit* r, const sp_digit* a,
         const sp_digit* b, const sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 68; i++)
+    for (i = 0; i < 67; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -4962,127 +5525,148 @@
     r[64] = a[64] + (b[64] & m);
     r[65] = a[65] + (b[65] & m);
     r[66] = a[66] + (b[66] & m);
-    r[67] = a[67] + (b[67] & m);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
 #ifdef WOLFSSL_SMALL
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 68; i++)
-        r[i] = a[i] - b[i];
-
-    return 0;
-}
-
-#endif
-#ifdef WOLFSSL_SMALL
 /* Add b to a into r. (r = a + b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 68; i++)
+SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 67; i++) {
         r[i] = a[i] + b[i];
-
-    return 0;
-}
-#endif
+    }
+
+    return 0;
+}
+#endif
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_3072_div_word_67(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 23 bits from d1 and top 8 bits from d0. */
+    d = (d1 << 8) | (d0 >> 15);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 9 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 7) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 17 bits in r */
+    /* Remaining 7 bits from d0. */
+    r <<= 7;
+    d <<= 7;
+    d |= d0 & ((1 << 7) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_3072_div_67(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_32
     int64_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
-    sp_digit t1d[136], t2d[68 + 1];
+    sp_digit t1d[134], t2d[67 + 1];
 #endif
     sp_digit* t1;
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 67 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
-        t2 = td + 2 * 68;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[67];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 68);
-        for (i=67; i>=0; i--) {
-            t1[68 + i] += t1[68 + i - 1] >> 23;
-            t1[68 + i - 1] &= 0x7fffff;
-            d1 = t1[68 + i];
+        t2 = td + 2 * 67;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[66];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 67U);
+        for (i=66; i>=0; i--) {
+            t1[67 + i] += t1[67 + i - 1] >> 23;
+            t1[67 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
+            d1 = t1[67 + i];
             d1 <<= 23;
-            d1 += t1[68 + i - 1];
-            r1 = (sp_digit)(d1 / div);
-
-            sp_3072_mul_d_68(t2, d, r1);
-            sp_3072_sub_68(&t1[i], &t1[i], t2);
-            t1[68 + i] -= t2[68];
-            t1[68 + i] += t1[68 + i - 1] >> 23;
-            t1[68 + i - 1] &= 0x7fffff;
-            r1 = (((-t1[68 + i]) << 23) - t1[68 + i - 1]) / div;
+            d1 += t1[67 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_3072_div_word_67(t1[67 + i], t1[67 + i - 1], dv);
+#endif
+
+            sp_3072_mul_d_67(t2, d, r1);
+            (void)sp_3072_sub_67(&t1[i], &t1[i], t2);
+            t1[67 + i] -= t2[67];
+            t1[67 + i] += t1[67 + i - 1] >> 23;
+            t1[67 + i - 1] &= 0x7fffff;
+            r1 = (((-t1[67 + i]) << 23) - t1[67 + i - 1]) / dv;
             r1++;
-            sp_3072_mul_d_68(t2, d, r1);
-            sp_3072_add_68(&t1[i], &t1[i], t2);
-            t1[68 + i] += t1[68 + i - 1] >> 23;
-            t1[68 + i - 1] &= 0x7fffff;
-        }
-        t1[68 - 1] += t1[68 - 2] >> 23;
-        t1[68 - 2] &= 0x7fffff;
-        d1 = t1[68 - 1];
-        r1 = (sp_digit)(d1 / div);
-
-        sp_3072_mul_d_68(t2, d, r1);
-        sp_3072_sub_68(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 68);
-        for (i=0; i<66; i++) {
+            sp_3072_mul_d_67(t2, d, r1);
+            (void)sp_3072_add_67(&t1[i], &t1[i], t2);
+            t1[67 + i] += t1[67 + i - 1] >> 23;
+            t1[67 + i - 1] &= 0x7fffff;
+        }
+        t1[67 - 1] += t1[67 - 2] >> 23;
+        t1[67 - 2] &= 0x7fffff;
+        r1 = t1[67 - 1] / dv;
+
+        sp_3072_mul_d_67(t2, d, r1);
+        (void)sp_3072_sub_67(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 67U);
+        for (i=0; i<65; i++) {
             r[i+1] += r[i] >> 23;
             r[i] &= 0x7fffff;
         }
-        sp_3072_cond_add_68(r, r, d, 0 - (r[67] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+        sp_3072_cond_add_67(r, r, d, 0 - ((r[66] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -5095,9 +5679,9 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_mod_68(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_3072_div_68(a, m, NULL, r);
+static int sp_3072_mod_67(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_67(a, m, NULL, r);
 }
 
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
@@ -5109,8 +5693,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_67(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -5122,29 +5706,32 @@
     int c, y;
     int err = MP_OKAY;
 
-    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL,
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 67 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 68 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 67U * 2U);
 
         norm = t[0] = td;
-        t[1] = &td[68 * 2];
-        t[2] = &td[2 * 68 * 2];
+        t[1] = &td[67 * 2];
+        t[2] = &td[2 * 67 * 2];
 
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_68(norm, m);
-
-        if (reduceA)
-            err = sp_3072_mod_68(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 68);
-    }
-    if (err == MP_OKAY) {
-        sp_3072_mul_68(t[1], t[1], norm);
-        err = sp_3072_mod_68(t[1], t[1], m);
+        sp_3072_mont_norm_67(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_3072_mod_67(t[1], a, m);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 67U);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_3072_mul_67(t[1], t[1], norm);
+        err = sp_3072_mod_67(t[1], t[1], m);
     }
 
     if (err == MP_OKAY) {
@@ -5153,8 +5740,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -5163,31 +5751,33 @@
             y = (n >> 22) & 1;
             n <<= 1;
 
-            sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp);
+            sp_3072_mont_mul_67(t[y^1], t[0], t[1], m, mp);
 
             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
                                   ((size_t)t[1] & addr_mask[y])),
-                    sizeof(*t[2]) * 68 * 2);
-            sp_3072_mont_sqr_68(t[2], t[2], m, mp);
+                    sizeof(*t[2]) * 67 * 2);
+            sp_3072_mont_sqr_67(t[2], t[2], m, mp);
             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
                             ((size_t)t[1] & addr_mask[y])), t[2],
-                    sizeof(*t[2]) * 68 * 2);
-        }
-
-        sp_3072_mont_reduce_68(t[0], m, mp);
-        n = sp_3072_cmp_68(t[0], m);
-        sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1);
-        XMEMCPY(r, t[0], sizeof(*r) * 68 * 2);
-
-    }
-
-    if (td != NULL)
+                    sizeof(*t[2]) * 67 * 2);
+        }
+
+        sp_3072_mont_reduce_67(t[0], m, mp);
+        n = sp_3072_cmp_67(t[0], m);
+        sp_3072_cond_sub_67(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(*r) * 67 * 2);
+
+    }
+
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
 #ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[3][136];
+    sp_digit t[3][134];
 #else
     sp_digit* td;
     sp_digit* t[3];
@@ -5200,35 +5790,34 @@
     int err = MP_OKAY;
 
 #ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL,
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 67 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
-        t[1] = &td[68 * 2];
-        t[2] = &td[2 * 68 * 2];
+        t[1] = &td[67 * 2];
+        t[2] = &td[2 * 67 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_68(norm, m);
-
-        if (reduceA) {
-            err = sp_3072_mod_68(t[1], a, m);
+        sp_3072_mont_norm_67(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_3072_mod_67(t[1], a, m);
             if (err == MP_OKAY) {
-                sp_3072_mul_68(t[1], t[1], norm);
-                err = sp_3072_mod_68(t[1], t[1], m);
-            }
-        }
-        else {
-            sp_3072_mul_68(t[1], a, norm);
-            err = sp_3072_mod_68(t[1], t[1], m);
+                sp_3072_mul_67(t[1], t[1], norm);
+                err = sp_3072_mod_67(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_3072_mul_67(t[1], a, norm);
+            err = sp_3072_mod_67(t[1], t[1], m);
         }
     }
 
@@ -5238,8 +5827,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -5248,36 +5838,38 @@
             y = (n >> 22) & 1;
             n <<= 1;
 
-            sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp);
+            sp_3072_mont_mul_67(t[y^1], t[0], t[1], m, mp);
 
             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
-            sp_3072_mont_sqr_68(t[2], t[2], m, mp);
+            sp_3072_mont_sqr_67(t[2], t[2], m, mp);
             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
         }
 
-        sp_3072_mont_reduce_68(t[0], m, mp);
-        n = sp_3072_cmp_68(t[0], m);
-        sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1);
+        sp_3072_mont_reduce_67(t[0], m, mp);
+        n = sp_3072_cmp_67(t[0], m);
+        sp_3072_cond_sub_67(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
 #else
 #ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][136];
+    sp_digit t[32][134];
 #else
     sp_digit* t[32];
     sp_digit* td;
 #endif
     sp_digit* norm;
-    sp_digit rt[136];
+    sp_digit rt[134];
     sp_digit mp = 1;
     sp_digit n;
     int i;
@@ -5285,76 +5877,77 @@
     int err = MP_OKAY;
 
 #ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 136, NULL,
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 134, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
-            t[i] = td + i * 136;
+            t[i] = td + i * 134;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_68(norm, m);
-
-        if (reduceA) {
-            err = sp_3072_mod_68(t[1], a, m);
+        sp_3072_mont_norm_67(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_3072_mod_67(t[1], a, m);
             if (err == MP_OKAY) {
-                sp_3072_mul_68(t[1], t[1], norm);
-                err = sp_3072_mod_68(t[1], t[1], m);
-            }
-        }
-        else {
-            sp_3072_mul_68(t[1], a, norm);
-            err = sp_3072_mod_68(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_sqr_68(t[ 2], t[ 1], m, mp);
-        sp_3072_mont_mul_68(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_3072_mont_sqr_68(t[ 4], t[ 2], m, mp);
-        sp_3072_mont_mul_68(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_3072_mont_sqr_68(t[ 6], t[ 3], m, mp);
-        sp_3072_mont_mul_68(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_3072_mont_sqr_68(t[ 8], t[ 4], m, mp);
-        sp_3072_mont_mul_68(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_3072_mont_sqr_68(t[10], t[ 5], m, mp);
-        sp_3072_mont_mul_68(t[11], t[ 6], t[ 5], m, mp);
-        sp_3072_mont_sqr_68(t[12], t[ 6], m, mp);
-        sp_3072_mont_mul_68(t[13], t[ 7], t[ 6], m, mp);
-        sp_3072_mont_sqr_68(t[14], t[ 7], m, mp);
-        sp_3072_mont_mul_68(t[15], t[ 8], t[ 7], m, mp);
-        sp_3072_mont_sqr_68(t[16], t[ 8], m, mp);
-        sp_3072_mont_mul_68(t[17], t[ 9], t[ 8], m, mp);
-        sp_3072_mont_sqr_68(t[18], t[ 9], m, mp);
-        sp_3072_mont_mul_68(t[19], t[10], t[ 9], m, mp);
-        sp_3072_mont_sqr_68(t[20], t[10], m, mp);
-        sp_3072_mont_mul_68(t[21], t[11], t[10], m, mp);
-        sp_3072_mont_sqr_68(t[22], t[11], m, mp);
-        sp_3072_mont_mul_68(t[23], t[12], t[11], m, mp);
-        sp_3072_mont_sqr_68(t[24], t[12], m, mp);
-        sp_3072_mont_mul_68(t[25], t[13], t[12], m, mp);
-        sp_3072_mont_sqr_68(t[26], t[13], m, mp);
-        sp_3072_mont_mul_68(t[27], t[14], t[13], m, mp);
-        sp_3072_mont_sqr_68(t[28], t[14], m, mp);
-        sp_3072_mont_mul_68(t[29], t[15], t[14], m, mp);
-        sp_3072_mont_sqr_68(t[30], t[15], m, mp);
-        sp_3072_mont_mul_68(t[31], t[16], t[15], m, mp);
+                sp_3072_mul_67(t[1], t[1], norm);
+                err = sp_3072_mod_67(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_3072_mul_67(t[1], a, norm);
+            err = sp_3072_mod_67(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_67(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_67(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_67(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_67(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_67(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_67(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_67(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_67(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_67(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_67(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_67(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_67(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_67(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_67(t[15], t[ 8], t[ 7], m, mp);
+        sp_3072_mont_sqr_67(t[16], t[ 8], m, mp);
+        sp_3072_mont_mul_67(t[17], t[ 9], t[ 8], m, mp);
+        sp_3072_mont_sqr_67(t[18], t[ 9], m, mp);
+        sp_3072_mont_mul_67(t[19], t[10], t[ 9], m, mp);
+        sp_3072_mont_sqr_67(t[20], t[10], m, mp);
+        sp_3072_mont_mul_67(t[21], t[11], t[10], m, mp);
+        sp_3072_mont_sqr_67(t[22], t[11], m, mp);
+        sp_3072_mont_mul_67(t[23], t[12], t[11], m, mp);
+        sp_3072_mont_sqr_67(t[24], t[12], m, mp);
+        sp_3072_mont_mul_67(t[25], t[13], t[12], m, mp);
+        sp_3072_mont_sqr_67(t[26], t[13], m, mp);
+        sp_3072_mont_mul_67(t[27], t[14], t[13], m, mp);
+        sp_3072_mont_sqr_67(t[28], t[14], m, mp);
+        sp_3072_mont_mul_67(t[29], t[15], t[14], m, mp);
+        sp_3072_mont_sqr_67(t[30], t[15], m, mp);
+        sp_3072_mont_mul_67(t[31], t[16], t[15], m, mp);
 
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 22) / 23) - 1;
         c = bits % 23;
-        if (c == 0)
+        if (c == 0) {
             c = 23;
-        if (i < 68)
+        }
+        if (i < 67) {
             n = e[i--] << (32 - c);
+        }
         else {
             n = 0;
             i--;
@@ -5363,7 +5956,7 @@
             n |= e[i--] << (9 - c);
             c += 23;
         }
-        y = n >> 27;
+        y = (n >> 27) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -5376,50 +5969,53 @@
             n <<= 5;
             c -= 5;
 
-            sp_3072_mont_sqr_68(rt, rt, m, mp);
-            sp_3072_mont_sqr_68(rt, rt, m, mp);
-            sp_3072_mont_sqr_68(rt, rt, m, mp);
-            sp_3072_mont_sqr_68(rt, rt, m, mp);
-            sp_3072_mont_sqr_68(rt, rt, m, mp);
-
-            sp_3072_mont_mul_68(rt, rt, t[y], m, mp);
-        }
-
-        sp_3072_mont_reduce_68(rt, m, mp);
-        n = sp_3072_cmp_68(rt, m);
-        sp_3072_cond_sub_68(rt, rt, m, (n < 0) - 1);
+            sp_3072_mont_sqr_67(rt, rt, m, mp);
+            sp_3072_mont_sqr_67(rt, rt, m, mp);
+            sp_3072_mont_sqr_67(rt, rt, m, mp);
+            sp_3072_mont_sqr_67(rt, rt, m, mp);
+            sp_3072_mont_sqr_67(rt, rt, m, mp);
+
+            sp_3072_mont_mul_67(rt, rt, t[y], m, mp);
+        }
+
+        sp_3072_mont_reduce_67(rt, m, mp);
+        n = sp_3072_cmp_67(rt, m);
+        sp_3072_cond_sub_67(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+    }
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_136(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_134(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<135; i++)
+    for (i=0; i<133; i++) {
         r[i] = 0x7fffff;
-#else
-    int i;
-
-    for (i = 0; i < 136; i += 8) {
+    }
+#else
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
         r[i + 0] = 0x7fffff;
         r[i + 1] = 0x7fffff;
         r[i + 2] = 0x7fffff;
@@ -5429,11 +6025,16 @@
         r[i + 6] = 0x7fffff;
         r[i + 7] = 0x7fffff;
     }
-#endif
-    r[135] = 0x1fffl;
+    r[128] = 0x7fffff;
+    r[129] = 0x7fffff;
+    r[130] = 0x7fffff;
+    r[131] = 0x7fffff;
+    r[132] = 0x7fffff;
+#endif
+    r[133] = 0x1fffL;
 
     /* r = (2^n - 1) mod n */
-    sp_3072_sub_136(r, r, m);
+    (void)sp_3072_sub_134(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -5446,26 +6047,33 @@
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static sp_digit sp_3072_cmp_136(const sp_digit* a, const sp_digit* b)
+static sp_digit sp_3072_cmp_134(const sp_digit* a, const sp_digit* b)
 {
     sp_digit r = 0;
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=135; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    for (i = 128; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+    for (i=133; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[133] - b[133]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[132] - b[132]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[131] - b[131]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[130] - b[130]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[129] - b[129]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[128] - b[128]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    for (i = 120; i >= 0; i -= 8) {
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -5480,18 +6088,19 @@
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
-static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a,
+static void sp_3072_cond_sub_134(sp_digit* r, const sp_digit* a,
         const sp_digit* b, const sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 136; i++)
+    for (i = 0; i < 134; i++) {
         r[i] = a[i] - (b[i] & m);
-#else
-    int i;
-
-    for (i = 0; i < 136; i += 8) {
+    }
+#else
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
         r[i + 0] = a[i + 0] - (b[i + 0] & m);
         r[i + 1] = a[i + 1] - (b[i + 1] & m);
         r[i + 2] = a[i + 2] - (b[i + 2] & m);
@@ -5501,6 +6110,12 @@
         r[i + 6] = a[i + 6] - (b[i + 6] & m);
         r[i + 7] = a[i + 7] - (b[i + 7] & m);
     }
+    r[128] = a[128] - (b[128] & m);
+    r[129] = a[129] - (b[129] & m);
+    r[130] = a[130] - (b[130] & m);
+    r[131] = a[131] - (b[131] & m);
+    r[132] = a[132] - (b[132] & m);
+    r[133] = a[133] - (b[133] & m);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -5510,7 +6125,7 @@
  * a  A single precision integer.
  * b  A scalar.
  */
-SP_NOINLINE static void sp_3072_mul_add_136(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_3072_mul_add_134(sp_digit* r, const sp_digit* a,
         const sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
@@ -5518,37 +6133,42 @@
     int64_t t = 0;
     int i;
 
-    for (i = 0; i < 136; i++) {
+    for (i = 0; i < 134; i++) {
         t += (tb * a[i]) + r[i];
         r[i] = t & 0x7fffff;
         t >>= 23;
     }
-    r[136] += t;
+    r[134] += t;
 #else
     int64_t tb = b;
     int64_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
-    for (i = 0; i < 136; i += 8) {
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
+    for (i = 0; i < 128; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
+        r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
+        r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
+        r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
+        r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
+        r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
+        r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
+        r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
-    }
-    r[136] +=  t[7] >> 23;
+        r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+    }
+    t[1] = tb * a[129]; r[129] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+    t[2] = tb * a[130]; r[130] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+    t[3] = tb * a[131]; r[131] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+    t[4] = tb * a[132]; r[132] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+    t[5] = tb * a[133]; r[133] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
+    r[134] +=  (sp_digit)(t[5] >> 23);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -5556,11 +6176,11 @@
  *
  * a  Array of sp_digit to normalize.
  */
-static void sp_3072_norm_136(sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    for (i = 0; i < 135; i++) {
+static void sp_3072_norm_134(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 133; i++) {
         a[i+1] += a[i] >> 23;
         a[i] &= 0x7fffff;
     }
@@ -5587,10 +6207,6 @@
     a[131] &= 0x7fffff;
     a[132+1] += a[132] >> 23;
     a[132] &= 0x7fffff;
-    a[133+1] += a[133] >> 23;
-    a[133] &= 0x7fffff;
-    a[134+1] += a[134] >> 23;
-    a[134] &= 0x7fffff;
 #endif
 }
 
@@ -5599,44 +6215,49 @@
  * r  A single precision number.
  * a  A single precision number.
  */
-static void sp_3072_mont_shift_136(sp_digit* r, const sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    int64_t n = a[135] >> 13;
-    n += ((int64_t)a[136]) << 10;
-
-    for (i = 0; i < 135; i++) {
+static void sp_3072_mont_shift_134(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    int64_t n = a[133] >> 13;
+    n += ((int64_t)a[134]) << 10;
+
+    for (i = 0; i < 133; i++) {
         r[i] = n & 0x7fffff;
         n >>= 23;
-        n += ((int64_t)a[137 + i]) << 10;
-    }
-    r[135] = (sp_digit)n;
-#else
-    int i;
-    int64_t n = a[135] >> 13;
-    n += ((int64_t)a[136]) << 10;
-    for (i = 0; i < 136; i += 8) {
+        n += ((int64_t)a[135 + i]) << 10;
+    }
+    r[133] = (sp_digit)n;
+#else
+    int i;
+    int64_t n = a[133] >> 13;
+    n += ((int64_t)a[134]) << 10;
+    for (i = 0; i < 128; i += 8) {
         r[i + 0] = n & 0x7fffff;
-        n >>= 23; n += ((int64_t)a[i + 137]) << 10;
+        n >>= 23; n += ((int64_t)a[i + 135]) << 10;
         r[i + 1] = n & 0x7fffff;
+        n >>= 23; n += ((int64_t)a[i + 136]) << 10;
+        r[i + 2] = n & 0x7fffff;
+        n >>= 23; n += ((int64_t)a[i + 137]) << 10;
+        r[i + 3] = n & 0x7fffff;
         n >>= 23; n += ((int64_t)a[i + 138]) << 10;
-        r[i + 2] = n & 0x7fffff;
+        r[i + 4] = n & 0x7fffff;
         n >>= 23; n += ((int64_t)a[i + 139]) << 10;
-        r[i + 3] = n & 0x7fffff;
+        r[i + 5] = n & 0x7fffff;
         n >>= 23; n += ((int64_t)a[i + 140]) << 10;
-        r[i + 4] = n & 0x7fffff;
-        n >>= 23; n += ((int64_t)a[i + 141]) << 10;
-        r[i + 5] = n & 0x7fffff;
-        n >>= 23; n += ((int64_t)a[i + 142]) << 10;
         r[i + 6] = n & 0x7fffff;
-        n >>= 23; n += ((int64_t)a[i + 143]) << 10;
+        n >>= 23; n += ((int64_t)a[i + 141]) << 10;
         r[i + 7] = n & 0x7fffff;
-        n >>= 23; n += ((int64_t)a[i + 144]) << 10;
-    }
-    r[135] = (sp_digit)n;
-#endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[136], 0, sizeof(*r) * 136);
+        n >>= 23; n += ((int64_t)a[i + 142]) << 10;
+    }
+    r[128] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[263]) << 10;
+    r[129] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[264]) << 10;
+    r[130] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[265]) << 10;
+    r[131] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[266]) << 10;
+    r[132] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[267]) << 10;
+    r[133] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[134], 0, sizeof(*r) * 134U);
 }
 
 /* Reduce the number back to 3072 bits using Montgomery reduction.
@@ -5645,37 +6266,52 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_3072_mont_reduce_134(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_3072_norm_134(a + 134);
+
+#ifdef WOLFSSL_SP_DH
     if (mp != 1) {
-        for (i=0; i<135; i++) {
+        for (i=0; i<133; i++) {
             mu = (a[i] * mp) & 0x7fffff;
-            sp_3072_mul_add_136(a+i, m, mu);
+            sp_3072_mul_add_134(a+i, m, mu);
             a[i+1] += a[i] >> 23;
         }
-        mu = (a[i] * mp) & 0x1fffl;
-        sp_3072_mul_add_136(a+i, m, mu);
+        mu = (a[i] * mp) & 0x1fffL;
+        sp_3072_mul_add_134(a+i, m, mu);
         a[i+1] += a[i] >> 23;
         a[i] &= 0x7fffff;
     }
     else {
-        for (i=0; i<135; i++) {
+        for (i=0; i<133; i++) {
             mu = a[i] & 0x7fffff;
-            sp_3072_mul_add_136(a+i, m, mu);
+            sp_3072_mul_add_134(a+i, m, mu);
             a[i+1] += a[i] >> 23;
         }
-        mu = a[i] & 0x1fffl;
-        sp_3072_mul_add_136(a+i, m, mu);
+        mu = a[i] & 0x1fffL;
+        sp_3072_mul_add_134(a+i, m, mu);
         a[i+1] += a[i] >> 23;
         a[i] &= 0x7fffff;
     }
-
-    sp_3072_mont_shift_136(a, a);
-    sp_3072_cond_sub_136(a, a, m, 0 - ((a[135] >> 13) > 0));
-    sp_3072_norm_136(a);
+#else
+    for (i=0; i<133; i++) {
+        mu = (a[i] * mp) & 0x7fffff;
+        sp_3072_mul_add_134(a+i, m, mu);
+        a[i+1] += a[i] >> 23;
+    }
+    mu = (a[i] * mp) & 0x1fffL;
+    sp_3072_mul_add_134(a+i, m, mu);
+    a[i+1] += a[i] >> 23;
+    a[i] &= 0x7fffff;
+#endif
+
+    sp_3072_mont_shift_134(a, a);
+    sp_3072_cond_sub_134(a, a, m, 0 - (((a[133] >> 13) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_3072_norm_134(a);
 }
 
 /* Multiply two Montogmery form numbers mod the modulus (prime).
@@ -5687,11 +6323,11 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_136(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_3072_mul_136(r, a, b);
-    sp_3072_mont_reduce_136(r, m, mp);
+static void sp_3072_mont_mul_134(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_3072_mul_134(r, a, b);
+    sp_3072_mont_reduce_134(r, m, mp);
 }
 
 /* Square the Montgomery form number. (r = a * a mod m)
@@ -5701,11 +6337,11 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_134(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
-    sp_3072_sqr_136(r, a);
-    sp_3072_mont_reduce_136(r, m, mp);
+    sp_3072_sqr_134(r, a);
+    sp_3072_mont_reduce_134(r, m, mp);
 }
 
 /* Multiply a by scalar b into r. (r = a * b)
@@ -5714,75 +6350,27 @@
  * a  A single precision integer.
  * b  A scalar.
  */
-SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+SP_NOINLINE static void sp_3072_mul_d_268(sp_digit* r, const sp_digit* a,
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int64_t tb = b;
     int64_t t = 0;
     int i;
 
-    for (i = 0; i < 136; i++) {
+    for (i = 0; i < 268; i++) {
         t += tb * a[i];
         r[i] = t & 0x7fffff;
         t >>= 23;
     }
-    r[136] = (sp_digit)t;
+    r[268] = (sp_digit)t;
 #else
     int64_t tb = b;
     int64_t t[8];
     int i;
 
     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
-    for (i = 0; i < 136; i += 8) {
-        t[1] = tb * a[i+1];
-        r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
-        t[2] = tb * a[i+2];
-        r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
-        t[3] = tb * a[i+3];
-        r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
-        t[4] = tb * a[i+4];
-        r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
-        t[5] = tb * a[i+5];
-        r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
-        t[6] = tb * a[i+6];
-        r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
-        t[7] = tb * a[i+7];
-        r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
-        t[0] = tb * a[i+8];
-        r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
-    }
-    r[136] =  (sp_digit)(t[7] >> 23);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Multiply a by scalar b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A scalar.
- */
-SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int64_t tb = b;
-    int64_t t = 0;
-    int i;
-
-    for (i = 0; i < 272; i++) {
-        t += tb * a[i];
-        r[i] = t & 0x7fffff;
-        t >>= 23;
-    }
-    r[272] = (sp_digit)t;
-#else
-    int64_t tb = b;
-    int64_t t[8];
-    int i;
-
-    t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
-    for (i = 0; i < 272; i += 8) {
+    for (i = 0; i < 264; i += 8) {
         t[1] = tb * a[i+1];
         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
         t[2] = tb * a[i+2];
@@ -5800,7 +6388,13 @@
         t[0] = tb * a[i+8];
         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
     }
-    r[272] =  (sp_digit)(t[7] >> 23);
+    t[1] = tb * a[265];
+    r[265] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+    t[2] = tb * a[266];
+    r[266] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+    t[3] = tb * a[267];
+    r[267] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+    r[268] =  (sp_digit)(t[3] >> 23);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -5812,18 +6406,19 @@
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
-static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a,
+static void sp_3072_cond_add_134(sp_digit* r, const sp_digit* a,
         const sp_digit* b, const sp_digit m)
 {
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 136; i++)
+    for (i = 0; i < 134; i++) {
         r[i] = a[i] + (b[i] & m);
-#else
-    int i;
-
-    for (i = 0; i < 136; i += 8) {
+    }
+#else
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
         r[i + 0] = a[i + 0] + (b[i + 0] & m);
         r[i + 1] = a[i + 1] + (b[i + 1] & m);
         r[i + 2] = a[i + 2] + (b[i + 2] & m);
@@ -5833,6 +6428,12 @@
         r[i + 6] = a[i + 6] + (b[i + 6] & m);
         r[i + 7] = a[i + 7] + (b[i + 7] & m);
     }
+    r[128] = a[128] + (b[128] & m);
+    r[129] = a[129] + (b[129] & m);
+    r[130] = a[130] + (b[130] & m);
+    r[131] = a[131] + (b[131] & m);
+    r[132] = a[132] + (b[132] & m);
+    r[133] = a[133] + (b[133] & m);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -5843,13 +6444,14 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 136; i++)
+SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 134; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -5862,163 +6464,75 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
-SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    int i;
-
-    for (i = 0; i < 136; i++)
+SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 134; i++) {
         r[i] = a[i] + b[i];
-
-    return 0;
-}
-#endif
-SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<135; i++)
+    }
+
+    return 0;
+}
+#endif
+SP_NOINLINE static void sp_3072_rshift_134(sp_digit* r, sp_digit* a, byte n)
+{
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<133; i++) {
         r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
-#else
-    r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff;
-    r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff;
-    r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff;
-    r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff;
-    r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff;
-    r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff;
-    r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff;
-    r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff;
-    r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff;
-    r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff;
-    r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff;
-    r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff;
-    r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff;
-    r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff;
-    r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff;
-    r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff;
-    r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff;
-    r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff;
-    r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff;
-    r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff;
-    r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff;
-    r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff;
-    r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff;
-    r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff;
-    r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff;
-    r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff;
-    r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff;
-    r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff;
-    r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff;
-    r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff;
-    r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff;
-    r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff;
-    r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff;
-    r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff;
-    r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff;
-    r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff;
-    r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff;
-    r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff;
-    r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff;
-    r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff;
-    r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
-    r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
-    r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
-    r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff;
-    r[44] = ((a[44] >> n) | (a[45] << (23 - n))) & 0x7fffff;
-    r[45] = ((a[45] >> n) | (a[46] << (23 - n))) & 0x7fffff;
-    r[46] = ((a[46] >> n) | (a[47] << (23 - n))) & 0x7fffff;
-    r[47] = ((a[47] >> n) | (a[48] << (23 - n))) & 0x7fffff;
-    r[48] = ((a[48] >> n) | (a[49] << (23 - n))) & 0x7fffff;
-    r[49] = ((a[49] >> n) | (a[50] << (23 - n))) & 0x7fffff;
-    r[50] = ((a[50] >> n) | (a[51] << (23 - n))) & 0x7fffff;
-    r[51] = ((a[51] >> n) | (a[52] << (23 - n))) & 0x7fffff;
-    r[52] = ((a[52] >> n) | (a[53] << (23 - n))) & 0x7fffff;
-    r[53] = ((a[53] >> n) | (a[54] << (23 - n))) & 0x7fffff;
-    r[54] = ((a[54] >> n) | (a[55] << (23 - n))) & 0x7fffff;
-    r[55] = ((a[55] >> n) | (a[56] << (23 - n))) & 0x7fffff;
-    r[56] = ((a[56] >> n) | (a[57] << (23 - n))) & 0x7fffff;
-    r[57] = ((a[57] >> n) | (a[58] << (23 - n))) & 0x7fffff;
-    r[58] = ((a[58] >> n) | (a[59] << (23 - n))) & 0x7fffff;
-    r[59] = ((a[59] >> n) | (a[60] << (23 - n))) & 0x7fffff;
-    r[60] = ((a[60] >> n) | (a[61] << (23 - n))) & 0x7fffff;
-    r[61] = ((a[61] >> n) | (a[62] << (23 - n))) & 0x7fffff;
-    r[62] = ((a[62] >> n) | (a[63] << (23 - n))) & 0x7fffff;
-    r[63] = ((a[63] >> n) | (a[64] << (23 - n))) & 0x7fffff;
-    r[64] = ((a[64] >> n) | (a[65] << (23 - n))) & 0x7fffff;
-    r[65] = ((a[65] >> n) | (a[66] << (23 - n))) & 0x7fffff;
-    r[66] = ((a[66] >> n) | (a[67] << (23 - n))) & 0x7fffff;
-    r[67] = ((a[67] >> n) | (a[68] << (23 - n))) & 0x7fffff;
-    r[68] = ((a[68] >> n) | (a[69] << (23 - n))) & 0x7fffff;
-    r[69] = ((a[69] >> n) | (a[70] << (23 - n))) & 0x7fffff;
-    r[70] = ((a[70] >> n) | (a[71] << (23 - n))) & 0x7fffff;
-    r[71] = ((a[71] >> n) | (a[72] << (23 - n))) & 0x7fffff;
-    r[72] = ((a[72] >> n) | (a[73] << (23 - n))) & 0x7fffff;
-    r[73] = ((a[73] >> n) | (a[74] << (23 - n))) & 0x7fffff;
-    r[74] = ((a[74] >> n) | (a[75] << (23 - n))) & 0x7fffff;
-    r[75] = ((a[75] >> n) | (a[76] << (23 - n))) & 0x7fffff;
-    r[76] = ((a[76] >> n) | (a[77] << (23 - n))) & 0x7fffff;
-    r[77] = ((a[77] >> n) | (a[78] << (23 - n))) & 0x7fffff;
-    r[78] = ((a[78] >> n) | (a[79] << (23 - n))) & 0x7fffff;
-    r[79] = ((a[79] >> n) | (a[80] << (23 - n))) & 0x7fffff;
-    r[80] = ((a[80] >> n) | (a[81] << (23 - n))) & 0x7fffff;
-    r[81] = ((a[81] >> n) | (a[82] << (23 - n))) & 0x7fffff;
-    r[82] = ((a[82] >> n) | (a[83] << (23 - n))) & 0x7fffff;
-    r[83] = ((a[83] >> n) | (a[84] << (23 - n))) & 0x7fffff;
-    r[84] = ((a[84] >> n) | (a[85] << (23 - n))) & 0x7fffff;
-    r[85] = ((a[85] >> n) | (a[86] << (23 - n))) & 0x7fffff;
-    r[86] = ((a[86] >> n) | (a[87] << (23 - n))) & 0x7fffff;
-    r[87] = ((a[87] >> n) | (a[88] << (23 - n))) & 0x7fffff;
-    r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff;
-    r[89] = ((a[89] >> n) | (a[90] << (23 - n))) & 0x7fffff;
-    r[90] = ((a[90] >> n) | (a[91] << (23 - n))) & 0x7fffff;
-    r[91] = ((a[91] >> n) | (a[92] << (23 - n))) & 0x7fffff;
-    r[92] = ((a[92] >> n) | (a[93] << (23 - n))) & 0x7fffff;
-    r[93] = ((a[93] >> n) | (a[94] << (23 - n))) & 0x7fffff;
-    r[94] = ((a[94] >> n) | (a[95] << (23 - n))) & 0x7fffff;
-    r[95] = ((a[95] >> n) | (a[96] << (23 - n))) & 0x7fffff;
-    r[96] = ((a[96] >> n) | (a[97] << (23 - n))) & 0x7fffff;
-    r[97] = ((a[97] >> n) | (a[98] << (23 - n))) & 0x7fffff;
-    r[98] = ((a[98] >> n) | (a[99] << (23 - n))) & 0x7fffff;
-    r[99] = ((a[99] >> n) | (a[100] << (23 - n))) & 0x7fffff;
-    r[100] = ((a[100] >> n) | (a[101] << (23 - n))) & 0x7fffff;
-    r[101] = ((a[101] >> n) | (a[102] << (23 - n))) & 0x7fffff;
-    r[102] = ((a[102] >> n) | (a[103] << (23 - n))) & 0x7fffff;
-    r[103] = ((a[103] >> n) | (a[104] << (23 - n))) & 0x7fffff;
-    r[104] = ((a[104] >> n) | (a[105] << (23 - n))) & 0x7fffff;
-    r[105] = ((a[105] >> n) | (a[106] << (23 - n))) & 0x7fffff;
-    r[106] = ((a[106] >> n) | (a[107] << (23 - n))) & 0x7fffff;
-    r[107] = ((a[107] >> n) | (a[108] << (23 - n))) & 0x7fffff;
-    r[108] = ((a[108] >> n) | (a[109] << (23 - n))) & 0x7fffff;
-    r[109] = ((a[109] >> n) | (a[110] << (23 - n))) & 0x7fffff;
-    r[110] = ((a[110] >> n) | (a[111] << (23 - n))) & 0x7fffff;
-    r[111] = ((a[111] >> n) | (a[112] << (23 - n))) & 0x7fffff;
-    r[112] = ((a[112] >> n) | (a[113] << (23 - n))) & 0x7fffff;
-    r[113] = ((a[113] >> n) | (a[114] << (23 - n))) & 0x7fffff;
-    r[114] = ((a[114] >> n) | (a[115] << (23 - n))) & 0x7fffff;
-    r[115] = ((a[115] >> n) | (a[116] << (23 - n))) & 0x7fffff;
-    r[116] = ((a[116] >> n) | (a[117] << (23 - n))) & 0x7fffff;
-    r[117] = ((a[117] >> n) | (a[118] << (23 - n))) & 0x7fffff;
-    r[118] = ((a[118] >> n) | (a[119] << (23 - n))) & 0x7fffff;
-    r[119] = ((a[119] >> n) | (a[120] << (23 - n))) & 0x7fffff;
-    r[120] = ((a[120] >> n) | (a[121] << (23 - n))) & 0x7fffff;
-    r[121] = ((a[121] >> n) | (a[122] << (23 - n))) & 0x7fffff;
-    r[122] = ((a[122] >> n) | (a[123] << (23 - n))) & 0x7fffff;
-    r[123] = ((a[123] >> n) | (a[124] << (23 - n))) & 0x7fffff;
-    r[124] = ((a[124] >> n) | (a[125] << (23 - n))) & 0x7fffff;
-    r[125] = ((a[125] >> n) | (a[126] << (23 - n))) & 0x7fffff;
-    r[126] = ((a[126] >> n) | (a[127] << (23 - n))) & 0x7fffff;
-    r[127] = ((a[127] >> n) | (a[128] << (23 - n))) & 0x7fffff;
+    }
+#else
+    for (i=0; i<128; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff;
+    }
     r[128] = ((a[128] >> n) | (a[129] << (23 - n))) & 0x7fffff;
     r[129] = ((a[129] >> n) | (a[130] << (23 - n))) & 0x7fffff;
     r[130] = ((a[130] >> n) | (a[131] << (23 - n))) & 0x7fffff;
     r[131] = ((a[131] >> n) | (a[132] << (23 - n))) & 0x7fffff;
     r[132] = ((a[132] >> n) | (a[133] << (23 - n))) & 0x7fffff;
-    r[133] = ((a[133] >> n) | (a[134] << (23 - n))) & 0x7fffff;
-    r[134] = ((a[134] >> n) | (a[135] << (23 - n))) & 0x7fffff;
-#endif
-    r[135] = a[135] >> n;
-}
+#endif
+    r[133] = a[133] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_3072_div_word_134(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 23 bits from d1 and top 8 bits from d0. */
+    d = (d1 << 8) | (d0 >> 15);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 9 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 7) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 17 bits in r */
+    /* Remaining 7 bits from d0. */
+    r <<= 7;
+    d <<= 7;
+    d |= d0 & ((1 << 7) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
 
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
@@ -6029,83 +6543,96 @@
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_3072_div_134(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_32
     int64_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
-    sp_digit t1d[272 + 1], t2d[136 + 1], sdd[136 + 1];
+    sp_digit t1d[268 + 1], t2d[134 + 1], sdd[134 + 1];
 #endif
     sp_digit* t1;
     sp_digit* t2;
     sp_digit* sd;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
-        t1 = td;
-        t2 = td + 272 + 1;
-        sd = t2 + 136 + 1;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-    sd = sdd;
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 134 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
 #endif
 
     (void)m;
 
     if (err == MP_OKAY) {
-        sp_3072_mul_d_136(sd, d, 1 << 10);
-        sp_3072_mul_d_272(t1, a, 1 << 10);
-        div = sd[135];
-        for (i=136; i>=0; i--) {
-            t1[136 + i] += t1[136 + i - 1] >> 23;
-            t1[136 + i - 1] &= 0x7fffff;
-            d1 = t1[136 + i];
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 268 + 1;
+        sd = t2 + 134 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_3072_mul_d_134(sd, d, 1L << 10);
+        sp_3072_mul_d_268(t1, a, 1L << 10);
+        dv = sd[133];
+        for (i=134; i>=0; i--) {
+            t1[134 + i] += t1[134 + i - 1] >> 23;
+            t1[134 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
+            d1 = t1[134 + i];
             d1 <<= 23;
-            d1 += t1[136 + i - 1];
-            r1 = (sp_digit)(d1 / div);
-
-            sp_3072_mul_d_136(t2, sd, r1);
-            sp_3072_sub_136(&t1[i], &t1[i], t2);
-            t1[136 + i] -= t2[136];
-            t1[136 + i] += t1[136 + i - 1] >> 23;
-            t1[136 + i - 1] &= 0x7fffff;
-            r1 = (((-t1[136 + i]) << 23) - t1[136 + i - 1]) / div;
-            r1 -= t1[136 + i];
-            sp_3072_mul_d_136(t2, sd, r1);
-            sp_3072_add_136(&t1[i], &t1[i], t2);
-            t1[136 + i] += t1[136 + i - 1] >> 23;
-            t1[136 + i - 1] &= 0x7fffff;
-        }
-        t1[136 - 1] += t1[136 - 2] >> 23;
-        t1[136 - 2] &= 0x7fffff;
-        d1 = t1[136 - 1];
-        r1 = (sp_digit)(d1 / div);
-
-        sp_3072_mul_d_136(t2, sd, r1);
-        sp_3072_sub_136(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 136);
-        for (i=0; i<134; i++) {
+            d1 += t1[134 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_3072_div_word_134(t1[134 + i], t1[134 + i - 1], dv);
+#endif
+
+            sp_3072_mul_d_134(t2, sd, r1);
+            (void)sp_3072_sub_134(&t1[i], &t1[i], t2);
+            t1[134 + i] -= t2[134];
+            t1[134 + i] += t1[134 + i - 1] >> 23;
+            t1[134 + i - 1] &= 0x7fffff;
+            r1 = (((-t1[134 + i]) << 23) - t1[134 + i - 1]) / dv;
+            r1 -= t1[134 + i];
+            sp_3072_mul_d_134(t2, sd, r1);
+            (void)sp_3072_add_134(&t1[i], &t1[i], t2);
+            t1[134 + i] += t1[134 + i - 1] >> 23;
+            t1[134 + i - 1] &= 0x7fffff;
+        }
+        t1[134 - 1] += t1[134 - 2] >> 23;
+        t1[134 - 2] &= 0x7fffff;
+        r1 = t1[134 - 1] / dv;
+
+        sp_3072_mul_d_134(t2, sd, r1);
+        sp_3072_sub_134(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 134U);
+        for (i=0; i<132; i++) {
             r[i+1] += r[i] >> 23;
             r[i] &= 0x7fffff;
         }
-        sp_3072_cond_add_136(r, r, sd, 0 - (r[135] < 0));
-    }
-
-    sp_3072_rshift_136(r, r, 10);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+        sp_3072_cond_add_134(r, r, sd, 0 - ((r[133] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_3072_norm_134(r);
+        sp_3072_rshift_134(r, r, 10);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -6118,12 +6645,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_mod_136(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_3072_div_136(a, m, NULL, r);
-}
-
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+static int sp_3072_mod_134(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_134(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
  * r     A single precision number that is the result of the operation.
@@ -6133,8 +6661,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_134(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -6146,29 +6674,32 @@
     int c, y;
     int err = MP_OKAY;
 
-    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL,
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 134 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 136 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 134U * 2U);
 
         norm = t[0] = td;
-        t[1] = &td[136 * 2];
-        t[2] = &td[2 * 136 * 2];
+        t[1] = &td[134 * 2];
+        t[2] = &td[2 * 134 * 2];
 
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_136(norm, m);
-
-        if (reduceA)
-            err = sp_3072_mod_136(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 136);
-    }
-    if (err == MP_OKAY) {
-        sp_3072_mul_136(t[1], t[1], norm);
-        err = sp_3072_mod_136(t[1], t[1], m);
+        sp_3072_mont_norm_134(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_3072_mod_134(t[1], a, m);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 134U);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_3072_mul_134(t[1], t[1], norm);
+        err = sp_3072_mod_134(t[1], t[1], m);
     }
 
     if (err == MP_OKAY) {
@@ -6177,8 +6708,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -6187,31 +6719,33 @@
             y = (n >> 22) & 1;
             n <<= 1;
 
-            sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp);
+            sp_3072_mont_mul_134(t[y^1], t[0], t[1], m, mp);
 
             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
                                   ((size_t)t[1] & addr_mask[y])),
-                    sizeof(*t[2]) * 136 * 2);
-            sp_3072_mont_sqr_136(t[2], t[2], m, mp);
+                    sizeof(*t[2]) * 134 * 2);
+            sp_3072_mont_sqr_134(t[2], t[2], m, mp);
             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
                             ((size_t)t[1] & addr_mask[y])), t[2],
-                    sizeof(*t[2]) * 136 * 2);
-        }
-
-        sp_3072_mont_reduce_136(t[0], m, mp);
-        n = sp_3072_cmp_136(t[0], m);
-        sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1);
-        XMEMCPY(r, t[0], sizeof(*r) * 136 * 2);
-
-    }
-
-    if (td != NULL)
+                    sizeof(*t[2]) * 134 * 2);
+        }
+
+        sp_3072_mont_reduce_134(t[0], m, mp);
+        n = sp_3072_cmp_134(t[0], m);
+        sp_3072_cond_sub_134(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(*r) * 134 * 2);
+
+    }
+
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
 #ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[3][272];
+    sp_digit t[3][268];
 #else
     sp_digit* td;
     sp_digit* t[3];
@@ -6224,35 +6758,34 @@
     int err = MP_OKAY;
 
 #ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL,
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 134 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
-        t[1] = &td[136 * 2];
-        t[2] = &td[2 * 136 * 2];
+        t[1] = &td[134 * 2];
+        t[2] = &td[2 * 134 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_136(norm, m);
-
-        if (reduceA) {
-            err = sp_3072_mod_136(t[1], a, m);
+        sp_3072_mont_norm_134(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_3072_mod_134(t[1], a, m);
             if (err == MP_OKAY) {
-                sp_3072_mul_136(t[1], t[1], norm);
-                err = sp_3072_mod_136(t[1], t[1], m);
-            }
-        }
-        else {
-            sp_3072_mul_136(t[1], a, norm);
-            err = sp_3072_mod_136(t[1], t[1], m);
+                sp_3072_mul_134(t[1], t[1], norm);
+                err = sp_3072_mod_134(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_3072_mul_134(t[1], a, norm);
+            err = sp_3072_mod_134(t[1], t[1], m);
         }
     }
 
@@ -6262,8 +6795,9 @@
         n = e[i--] << (23 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 23;
@@ -6272,36 +6806,38 @@
             y = (n >> 22) & 1;
             n <<= 1;
 
-            sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp);
+            sp_3072_mont_mul_134(t[y^1], t[0], t[1], m, mp);
 
             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
-            sp_3072_mont_sqr_136(t[2], t[2], m, mp);
+            sp_3072_mont_sqr_134(t[2], t[2], m, mp);
             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
         }
 
-        sp_3072_mont_reduce_136(t[0], m, mp);
-        n = sp_3072_cmp_136(t[0], m);
-        sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1);
+        sp_3072_mont_reduce_134(t[0], m, mp);
+        n = sp_3072_cmp_134(t[0], m);
+        sp_3072_cond_sub_134(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
 #else
 #ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][272];
+    sp_digit t[32][268];
 #else
     sp_digit* t[32];
     sp_digit* td;
 #endif
     sp_digit* norm;
-    sp_digit rt[272];
+    sp_digit rt[268];
     sp_digit mp = 1;
     sp_digit n;
     int i;
@@ -6309,76 +6845,77 @@
     int err = MP_OKAY;
 
 #ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 272, NULL,
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 268, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
-            t[i] = td + i * 272;
+            t[i] = td + i * 268;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_136(norm, m);
-
-        if (reduceA) {
-            err = sp_3072_mod_136(t[1], a, m);
+        sp_3072_mont_norm_134(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_3072_mod_134(t[1], a, m);
             if (err == MP_OKAY) {
-                sp_3072_mul_136(t[1], t[1], norm);
-                err = sp_3072_mod_136(t[1], t[1], m);
-            }
-        }
-        else {
-            sp_3072_mul_136(t[1], a, norm);
-            err = sp_3072_mod_136(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_sqr_136(t[ 2], t[ 1], m, mp);
-        sp_3072_mont_mul_136(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_3072_mont_sqr_136(t[ 4], t[ 2], m, mp);
-        sp_3072_mont_mul_136(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_3072_mont_sqr_136(t[ 6], t[ 3], m, mp);
-        sp_3072_mont_mul_136(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_3072_mont_sqr_136(t[ 8], t[ 4], m, mp);
-        sp_3072_mont_mul_136(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_3072_mont_sqr_136(t[10], t[ 5], m, mp);
-        sp_3072_mont_mul_136(t[11], t[ 6], t[ 5], m, mp);
-        sp_3072_mont_sqr_136(t[12], t[ 6], m, mp);
-        sp_3072_mont_mul_136(t[13], t[ 7], t[ 6], m, mp);
-        sp_3072_mont_sqr_136(t[14], t[ 7], m, mp);
-        sp_3072_mont_mul_136(t[15], t[ 8], t[ 7], m, mp);
-        sp_3072_mont_sqr_136(t[16], t[ 8], m, mp);
-        sp_3072_mont_mul_136(t[17], t[ 9], t[ 8], m, mp);
-        sp_3072_mont_sqr_136(t[18], t[ 9], m, mp);
-        sp_3072_mont_mul_136(t[19], t[10], t[ 9], m, mp);
-        sp_3072_mont_sqr_136(t[20], t[10], m, mp);
-        sp_3072_mont_mul_136(t[21], t[11], t[10], m, mp);
-        sp_3072_mont_sqr_136(t[22], t[11], m, mp);
-        sp_3072_mont_mul_136(t[23], t[12], t[11], m, mp);
-        sp_3072_mont_sqr_136(t[24], t[12], m, mp);
-        sp_3072_mont_mul_136(t[25], t[13], t[12], m, mp);
-        sp_3072_mont_sqr_136(t[26], t[13], m, mp);
-        sp_3072_mont_mul_136(t[27], t[14], t[13], m, mp);
-        sp_3072_mont_sqr_136(t[28], t[14], m, mp);
-        sp_3072_mont_mul_136(t[29], t[15], t[14], m, mp);
-        sp_3072_mont_sqr_136(t[30], t[15], m, mp);
-        sp_3072_mont_mul_136(t[31], t[16], t[15], m, mp);
+                sp_3072_mul_134(t[1], t[1], norm);
+                err = sp_3072_mod_134(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_3072_mul_134(t[1], a, norm);
+            err = sp_3072_mod_134(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_134(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_134(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_134(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_134(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_134(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_134(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_134(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_134(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_134(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_134(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_134(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_134(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_134(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_134(t[15], t[ 8], t[ 7], m, mp);
+        sp_3072_mont_sqr_134(t[16], t[ 8], m, mp);
+        sp_3072_mont_mul_134(t[17], t[ 9], t[ 8], m, mp);
+        sp_3072_mont_sqr_134(t[18], t[ 9], m, mp);
+        sp_3072_mont_mul_134(t[19], t[10], t[ 9], m, mp);
+        sp_3072_mont_sqr_134(t[20], t[10], m, mp);
+        sp_3072_mont_mul_134(t[21], t[11], t[10], m, mp);
+        sp_3072_mont_sqr_134(t[22], t[11], m, mp);
+        sp_3072_mont_mul_134(t[23], t[12], t[11], m, mp);
+        sp_3072_mont_sqr_134(t[24], t[12], m, mp);
+        sp_3072_mont_mul_134(t[25], t[13], t[12], m, mp);
+        sp_3072_mont_sqr_134(t[26], t[13], m, mp);
+        sp_3072_mont_mul_134(t[27], t[14], t[13], m, mp);
+        sp_3072_mont_sqr_134(t[28], t[14], m, mp);
+        sp_3072_mont_mul_134(t[29], t[15], t[14], m, mp);
+        sp_3072_mont_sqr_134(t[30], t[15], m, mp);
+        sp_3072_mont_mul_134(t[31], t[16], t[15], m, mp);
 
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 22) / 23) - 1;
         c = bits % 23;
-        if (c == 0)
+        if (c == 0) {
             c = 23;
-        if (i < 136)
+        }
+        if (i < 134) {
             n = e[i--] << (32 - c);
+        }
         else {
             n = 0;
             i--;
@@ -6387,7 +6924,7 @@
             n |= e[i--] << (9 - c);
             c += 23;
         }
-        y = n >> 27;
+        y = (n >> 27) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -6400,67 +6937,34 @@
             n <<= 5;
             c -= 5;
 
-            sp_3072_mont_sqr_136(rt, rt, m, mp);
-            sp_3072_mont_sqr_136(rt, rt, m, mp);
-            sp_3072_mont_sqr_136(rt, rt, m, mp);
-            sp_3072_mont_sqr_136(rt, rt, m, mp);
-            sp_3072_mont_sqr_136(rt, rt, m, mp);
-
-            sp_3072_mont_mul_136(rt, rt, t[y], m, mp);
-        }
-
-        sp_3072_mont_reduce_136(rt, m, mp);
-        n = sp_3072_cmp_136(rt, m);
-        sp_3072_cond_sub_136(rt, rt, m, (n < 0) - 1);
+            sp_3072_mont_sqr_134(rt, rt, m, mp);
+            sp_3072_mont_sqr_134(rt, rt, m, mp);
+            sp_3072_mont_sqr_134(rt, rt, m, mp);
+            sp_3072_mont_sqr_134(rt, rt, m, mp);
+            sp_3072_mont_sqr_134(rt, rt, m, mp);
+
+            sp_3072_mont_mul_134(rt, rt, t[y], m, mp);
+        }
+
+        sp_3072_mont_reduce_134(rt, m, mp);
+        n = sp_3072_cmp_134(rt, m);
+        sp_3072_cond_sub_134(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
-                                    !defined(RSA_LOW_MEM)
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_3072_mask_68(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<68; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 64; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-    r[64] = a[64] & m;
-    r[65] = a[65] & m;
-    r[66] = a[66] & m;
-    r[67] = a[67] & m;
-#endif
-}
-
-#endif
+    }
+#endif
+
+    return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+       /* WOLFSSL_HAVE_SP_DH */
+
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
  *
@@ -6483,106 +6987,133 @@
     sp_digit* m;
     sp_digit* r;
     sp_digit* norm;
-    sp_digit e[1];
+    sp_digit e[1] = {0};
     sp_digit mp;
     int i;
     int err = MP_OKAY;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 23) {
+            err = MP_READ_E;
+        }
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
 
     if (err == MP_OKAY) {
         a = d;
-        r = a + 136 * 2;
-        m = r + 136 * 2;
+        r = a + 134 * 2;
+        m = r + 134 * 2;
         norm = r;
 
-        sp_3072_from_bin(a, 136, in, inLen);
+        sp_3072_from_bin(a, 134, in, inLen);
 #if DIGIT_BIT >= 23
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(m, 136, mm);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(m, 134, mm);
 
         sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_136(norm, m);
-    }
-    if (err == MP_OKAY) {
-        sp_3072_mul_136(a, a, norm);
-        err = sp_3072_mod_136(a, a, m);
-    }
-    if (err == MP_OKAY) {
-        for (i=22; i>=0; i--)
-            if (e[0] >> i)
-                break;
-
-        XMEMCPY(r, a, sizeof(sp_digit) * 136 * 2);
+        sp_3072_mont_norm_134(norm, m);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_mul_134(a, a, norm);
+        err = sp_3072_mod_134(a, a, m);
+    }
+    if (err == MP_OKAY) {
+        for (i=22; i>=0; i--) {
+            if ((e[0] >> i) != 0) {
+                break;
+            }
+        }
+
+        XMEMCPY(r, a, sizeof(sp_digit) * 134 * 2);
         for (i--; i>=0; i--) {
-            sp_3072_mont_sqr_136(r, r, m, mp);
-
-            if (((e[0] >> i) & 1) == 1)
-                sp_3072_mont_mul_136(r, r, a, m, mp);
-        }
-        sp_3072_mont_reduce_136(r, m, mp);
-        mp = sp_3072_cmp_136(r, m);
-        sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1);
+            sp_3072_mont_sqr_134(r, r, m, mp);
+
+            if (((e[0] >> i) & 1) == 1) {
+                sp_3072_mont_mul_134(r, r, a, m, mp);
+            }
+        }
+        sp_3072_mont_reduce_134(r, m, mp);
+        mp = sp_3072_cmp_134(r, m);
+        sp_3072_cond_sub_134(r, r, m, ((mp < 0) ?
+                    (sp_digit)1 : (sp_digit)0)- 1);
 
         sp_3072_to_bin(r, out);
         *outLen = 384;
     }
 
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    return err;
-#else
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[272], md[136], rd[272];
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit ad[268], md[134], rd[268];
 #else
     sp_digit* d = NULL;
 #endif
     sp_digit* a;
     sp_digit* m;
     sp_digit* r;
-    sp_digit e[1];
-    int err = MP_OKAY;
-
-    if (*outLen < 384)
+    sp_digit e[1] = {0};
+    int err = MP_OKAY;
+
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 23) {
+            err = MP_READ_E;
+        }
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
         a = d;
-        r = a + 136 * 2;
-        m = r + 136 * 2;
+        r = a + 134 * 2;
+        m = r + 134 * 2;
     }
 #else
     a = ad;
@@ -6591,28 +7122,28 @@
 #endif
 
     if (err == MP_OKAY) {
-        sp_3072_from_bin(a, 136, in, inLen);
+        sp_3072_from_bin(a, 134, in, inLen);
 #if DIGIT_BIT >= 23
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
-    }
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(m, 136, mm);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(m, 134, mm);
 
         if (e[0] == 0x3) {
+            sp_3072_sqr_134(r, a);
+            err = sp_3072_mod_134(r, r, m);
             if (err == MP_OKAY) {
-                sp_3072_sqr_136(r, a);
-                err = sp_3072_mod_136(r, r, m);
-            }
-            if (err == MP_OKAY) {
-                sp_3072_mul_136(r, a, r);
-                err = sp_3072_mod_136(r, r, m);
+                sp_3072_mul_134(r, a, r);
+                err = sp_3072_mod_134(r, r, m);
             }
         }
         else {
@@ -6621,28 +7152,30 @@
             sp_digit mp;
 
             sp_3072_mont_setup(m, &mp);
-            sp_3072_mont_norm_136(norm, m);
-
-            if (err == MP_OKAY) {
-                sp_3072_mul_136(a, a, norm);
-                err = sp_3072_mod_136(a, a, m);
-            }
+            sp_3072_mont_norm_134(norm, m);
+
+            sp_3072_mul_134(a, a, norm);
+            err = sp_3072_mod_134(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=22; i>=0; i--)
-                    if (e[0] >> i)
+                for (i=22; i>=0; i--) {
+                    if ((e[0] >> i) != 0) {
                         break;
-
-                XMEMCPY(r, a, sizeof(sp_digit) * 272);
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 268U);
                 for (i--; i>=0; i--) {
-                    sp_3072_mont_sqr_136(r, r, m, mp);
-
-                    if (((e[0] >> i) & 1) == 1)
-                        sp_3072_mont_mul_136(r, r, a, m, mp);
+                    sp_3072_mont_sqr_134(r, r, m, mp);
+
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_3072_mont_mul_134(r, r, a, m, mp);
+                    }
                 }
-                sp_3072_mont_reduce_136(r, m, mp);
-                mp = sp_3072_cmp_136(r, m);
-                sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1);
+                sp_3072_mont_reduce_134(r, m, mp);
+                mp = sp_3072_cmp_134(r, m);
+                sp_3072_cond_sub_134(r, r, m, ((mp < 0) ?
+                           (sp_digit)1 : (sp_digit)0) - 1);
             }
         }
     }
@@ -6652,15 +7185,19 @@
         *outLen = 384;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif /* WOLFSSL_SP_SMALL */
-}
-
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
 /* RSA private key operation.
  *
  * in      Array of bytes representing the number to exponentiate, base.
@@ -6683,7 +7220,7 @@
     byte* out, word32* outLen)
 {
 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* a;
     sp_digit* d = NULL;
     sp_digit* m;
@@ -6696,27 +7233,37 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
-    }
-    if (err == MP_OKAY) {
-        a = d + 136;
-        m = a + 136;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+           err = MP_READ_E;
+        }
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 134;
+        m = a + 268;
         r = a;
 
-        sp_3072_from_bin(a, 136, in, inLen);
-        sp_3072_from_mp(d, 136, dm);
-        sp_3072_from_mp(m, 136, mm);
-        err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0);
+        sp_3072_from_bin(a, 134, in, inLen);
+        sp_3072_from_mp(d, 134, dm);
+        sp_3072_from_mp(m, 134, mm);
+        err = sp_3072_mod_exp_134(r, a, d, 3072, m, 0);
     }
     if (err == MP_OKAY) {
         sp_3072_to_bin(r, out);
@@ -6724,13 +7271,13 @@
     }
 
     if (d != NULL) {
-        XMEMSET(d, 0, sizeof(sp_digit) * 136);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-
-    return err;
-#else
-    sp_digit a[272], d[136], m[136];
+        XMEMSET(d, 0, sizeof(sp_digit) * 134);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+    sp_digit a[268], d[134], m[134];
     sp_digit* r = a;
     int err = MP_OKAY;
 
@@ -6740,17 +7287,26 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-    if (err == MP_OKAY) {
-        sp_3072_from_bin(a, 136, in, inLen);
-        sp_3072_from_mp(d, 136, dm);
-        sp_3072_from_mp(m, 136, mm);
-        err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0);
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+            err = MP_READ_E;
+        }
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_bin(a, 134, in, inLen);
+        sp_3072_from_mp(d, 134, dm);
+        sp_3072_from_mp(m, 134, mm);
+        err = sp_3072_mod_exp_134(r, a, d, 3072, m, 0);
     }
 
     if (err == MP_OKAY) {
@@ -6758,12 +7314,12 @@
         *outLen = 384;
     }
 
-    XMEMSET(d, 0, sizeof(sp_digit) * 136);
+    XMEMSET(d, 0, sizeof(sp_digit) * 134);
 
     return err;
 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
 #else
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* t = NULL;
     sp_digit* a;
     sp_digit* p;
@@ -6771,7 +7327,6 @@
     sp_digit* dp;
     sp_digit* dq;
     sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
     sp_digit* r;
@@ -6780,103 +7335,118 @@
     (void)dm;
     (void)mm;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-    if (err == MP_OKAY) {
-        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 68 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (t == NULL)
-            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 67 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = t;
-        p = a + 136 * 2;
-        q = p + 68;
-        qi = dq = dp = q + 68;
-        tmpa = qi + 68;
-        tmpb = tmpa + 136;
-
-        tmp = t;
-        r = tmp + 136;
-
-        sp_3072_from_bin(a, 136, in, inLen);
-        sp_3072_from_mp(p, 68, pm);
-        sp_3072_from_mp(q, 68, qm);
-        sp_3072_from_mp(dp, 68, dpm);
-        err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1);
-    }
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(dq, 68, dqm);
-        err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1);
-    }
-    if (err == MP_OKAY) {
-        sp_3072_sub_68(tmpa, tmpa, tmpb);
-        sp_3072_mask_68(tmp, p, tmpa[67] >> 31);
-        sp_3072_add_68(tmpa, tmpa, tmp);
-
-        sp_3072_from_mp(qi, 68, qim);
-        sp_3072_mul_68(tmpa, tmpa, qi);
-        err = sp_3072_mod_68(tmpa, tmpa, p);
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mul_68(tmpa, q, tmpa);
-        sp_3072_add_136(r, tmpb, tmpa);
-        sp_3072_norm_136(r);
+        p = a + 134 * 2;
+        q = p + 67;
+        qi = dq = dp = q + 67;
+        tmpa = qi + 67;
+        tmpb = tmpa + 134;
+
+        r = t + 134;
+
+        sp_3072_from_bin(a, 134, in, inLen);
+        sp_3072_from_mp(p, 67, pm);
+        sp_3072_from_mp(q, 67, qm);
+        sp_3072_from_mp(dp, 67, dpm);
+        err = sp_3072_mod_exp_67(tmpa, a, dp, 1536, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(dq, 67, dqm);
+        err = sp_3072_mod_exp_67(tmpb, a, dq, 1536, q, 1);
+    }
+    if (err == MP_OKAY) {
+        (void)sp_3072_sub_67(tmpa, tmpa, tmpb);
+        sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+        sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+
+        sp_3072_from_mp(qi, 67, qim);
+        sp_3072_mul_67(tmpa, tmpa, qi);
+        err = sp_3072_mod_67(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mul_67(tmpa, q, tmpa);
+        (void)sp_3072_add_134(r, tmpb, tmpa);
+        sp_3072_norm_134(r);
 
         sp_3072_to_bin(r, out);
         *outLen = 384;
     }
 
     if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_digit) * 68 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-
-    return err;
-#else
-    sp_digit a[136 * 2];
-    sp_digit p[68], q[68], dp[68], dq[68], qi[68];
-    sp_digit tmp[136], tmpa[136], tmpb[136];
+        XMEMSET(t, 0, sizeof(sp_digit) * 67 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+    sp_digit a[134 * 2];
+    sp_digit p[67], q[67], dp[67], dq[67], qi[67];
+    sp_digit tmpa[134], tmpb[134];
     sp_digit* r = a;
     int err = MP_OKAY;
 
     (void)dm;
     (void)mm;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-    if (err == MP_OKAY) {
-        sp_3072_from_bin(a, 136, in, inLen);
-        sp_3072_from_mp(p, 68, pm);
-        sp_3072_from_mp(q, 68, qm);
-        sp_3072_from_mp(dp, 68, dpm);
-        sp_3072_from_mp(dq, 68, dqm);
-        sp_3072_from_mp(qi, 68, qim);
-
-        err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1);
-    }
-    if (err == MP_OKAY)
-        err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1);
-
-    if (err == MP_OKAY) {
-        sp_3072_sub_68(tmpa, tmpa, tmpb);
-        sp_3072_mask_68(tmp, p, tmpa[67] >> 31);
-        sp_3072_add_68(tmpa, tmpa, tmp);
-        sp_3072_mul_68(tmpa, tmpa, qi);
-        err = sp_3072_mod_68(tmpa, tmpa, p);
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mul_68(tmpa, tmpa, q);
-        sp_3072_add_136(r, tmpb, tmpa);
-        sp_3072_norm_136(r);
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_bin(a, 134, in, inLen);
+        sp_3072_from_mp(p, 67, pm);
+        sp_3072_from_mp(q, 67, qm);
+        sp_3072_from_mp(dp, 67, dpm);
+        sp_3072_from_mp(dq, 67, dqm);
+        sp_3072_from_mp(qi, 67, qim);
+
+        err = sp_3072_mod_exp_67(tmpa, a, dp, 1536, p, 1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_3072_mod_exp_67(tmpb, a, dq, 1536, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        (void)sp_3072_sub_67(tmpa, tmpa, tmpb);
+        sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+        sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+        sp_3072_mul_67(tmpa, tmpa, qi);
+        err = sp_3072_mod_67(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mul_67(tmpa, tmpa, q);
+        (void)sp_3072_add_134(r, tmpb, tmpa);
+        sp_3072_norm_134(r);
 
         sp_3072_to_bin(r, out);
         *outLen = 384;
@@ -6895,36 +7465,43 @@
 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 }
 
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_3072_to_mp(sp_digit* a, mp_int* r)
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 23
-        XMEMCPY(r->dp, a, sizeof(sp_digit) * 136);
-        r->used = 136;
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 134);
+        r->used = 134;
         mp_clamp(r);
 #elif DIGIT_BIT < 23
         int i, j = 0, s = 0;
 
         r->dp[0] = 0;
-        for (i = 0; i < 136; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+        for (i = 0; i < 134; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 23) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 23 - s;
         }
@@ -6934,18 +7511,19 @@
         int i, j = 0, s = 0;
 
         r->dp[0] = 0;
-        for (i = 0; i < 136; i++) {
+        for (i = 0; i < 134; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 23 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 32
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 23 - s;
             }
-            else
+            else {
                 s += 23;
+            }
         }
         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -6961,7 +7539,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -6975,29 +7553,40 @@
     sp_digit* r;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
         b = d;
-        e = b + 136 * 2;
-        m = e + 136;
+        e = b + 134 * 2;
+        m = e + 134;
         r = b;
 
-        sp_3072_from_mp(b, 136, base);
-        sp_3072_from_mp(e, 136, exp);
-        sp_3072_from_mp(m, 136, mod);
-
-        err = sp_3072_mod_exp_136(r, b, e, mp_count_bits(exp), m, 0);
+        sp_3072_from_mp(b, 134, base);
+        sp_3072_from_mp(e, 134, exp);
+        sp_3072_from_mp(m, 134, mod);
+
+        err = sp_3072_mod_exp_134(r, b, e, mp_count_bits(exp), m, 0);
     }
 
     if (err == MP_OKAY) {
@@ -7005,13 +7594,13 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 136);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
 #ifndef WOLFSSL_SMALL_STACK
-    sp_digit bd[272], ed[136], md[136];
+    sp_digit bd[268], ed[134], md[134];
 #else
     sp_digit* d = NULL;
 #endif
@@ -7022,23 +7611,33 @@
     int err = MP_OKAY;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
 
     if (err == MP_OKAY) {
         b = d;
-        e = b + 136 * 2;
-        m = e + 136;
+        e = b + 134 * 2;
+        m = e + 134;
         r = b;
     }
 #else
@@ -7048,27 +7647,426 @@
 #endif
 
     if (err == MP_OKAY) {
-        sp_3072_from_mp(b, 136, base);
-        sp_3072_from_mp(e, 136, exp);
-        sp_3072_from_mp(m, 136, mod);
-
-        err = sp_3072_mod_exp_136(r, b, e, expBits, m, 0);
+        sp_3072_from_mp(b, 134, base);
+        sp_3072_from_mp(e, 134, exp);
+        sp_3072_from_mp(m, 134, mod);
+
+        err = sp_3072_mod_exp_134(r, b, e, expBits, m, 0);
     }
 
     if (err == MP_OKAY) {
         err = sp_3072_to_mp(r, res);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 136);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+#endif
+
+    return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+SP_NOINLINE static void sp_3072_lshift_134(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    r[134] = a[133] >> (23 - n);
+    for (i=133; i>0; i--) {
+        r[i] = ((a[i] << n) | (a[i-1] >> (23 - n))) & 0x7fffff;
+    }
+#else
+    sp_int_digit s, t;
+
+    s = (sp_int_digit)a[133];
+    r[134] = s >> (23U - n);
+    s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]);
+    r[133] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]);
+    r[132] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]);
+    r[131] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]);
+    r[130] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]);
+    r[129] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]);
+    r[128] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]);
+    r[127] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]);
+    r[126] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]);
+    r[125] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]);
+    r[124] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]);
+    r[123] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]);
+    r[122] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]);
+    r[121] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]);
+    r[120] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]);
+    r[119] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]);
+    r[118] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]);
+    r[117] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]);
+    r[116] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]);
+    r[115] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]);
+    r[114] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]);
+    r[113] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]);
+    r[112] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]);
+    r[111] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]);
+    r[110] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]);
+    r[109] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]);
+    r[108] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]);
+    r[107] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]);
+    r[106] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]);
+    r[105] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]);
+    r[104] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]);
+    r[103] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]);
+    r[102] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]);
+    r[101] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]);
+    r[100] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]);
+    r[99] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]);
+    r[98] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]);
+    r[97] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]);
+    r[96] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]);
+    r[95] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]);
+    r[94] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]);
+    r[93] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]);
+    r[92] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]);
+    r[91] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]);
+    r[90] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]);
+    r[89] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]);
+    r[88] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]);
+    r[87] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]);
+    r[86] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]);
+    r[85] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]);
+    r[84] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]);
+    r[83] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]);
+    r[82] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]);
+    r[81] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]);
+    r[80] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]);
+    r[79] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]);
+    r[78] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+    r[77] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+    r[76] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+    r[75] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+    r[74] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+    r[73] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+    r[72] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+    r[71] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+    r[70] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+    r[69] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+    r[68] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+    r[67] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+    r[66] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+    r[65] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+    r[64] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+    r[63] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+    r[62] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+    r[61] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+    r[60] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+    r[59] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+    r[58] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+    r[57] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+    r[56] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+    r[55] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+    r[54] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+    r[53] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+    r[52] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+    r[51] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+    r[50] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+    r[49] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+    r[48] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+    r[47] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+    r[46] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+    r[45] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+    r[44] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+    r[43] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+    r[42] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+    r[41] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+    r[40] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+    r[39] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+    r[38] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+    r[37] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+    r[36] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+    r[35] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+    r[34] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+    r[33] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+    r[32] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+    r[31] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+    r[30] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+    r[29] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+    r[28] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+    r[27] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+    r[26] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+    r[25] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+    r[24] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+    r[23] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+    r[22] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+    r[21] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+    r[20] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+    r[19] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+    r[18] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+    r[17] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+    r[16] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+    r[15] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+    r[14] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+    r[13] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+    r[12] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+    r[11] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+    r[10] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+    r[9] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+    r[8] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+    r[7] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+    r[6] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+    r[5] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+    r[4] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+    r[3] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+    r[2] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+    s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+    r[1] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+#endif
+    r[0] = (a[0] << n) & 0x7fffff;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_134(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[268];
+    sp_digit td[135];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 403, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 268;
+        XMEMSET(td, 0, sizeof(sp_digit) * 403);
+#else
+        norm = nd;
+        tmp  = td;
+        XMEMSET(td, 0, sizeof(td));
+#endif
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_134(norm, m);
+
+        bits = ((bits + 3) / 4) * 4;
+        i = ((bits + 22) / 23) - 1;
+        c = bits % 23;
+        if (c == 0) {
+            c = 23;
+        }
+        if (i < 134) {
+            n = e[i--] << (32 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 4) {
+            n |= e[i--] << (9 - c);
+            c += 23;
+        }
+        y = (n >> 28) & 0xf;
+        n <<= 4;
+        c -= 4;
+        sp_3072_lshift_134(r, norm, y);
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= e[i--] << (9 - c);
+                c += 23;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_3072_mont_sqr_134(r, r, m, mp);
+            sp_3072_mont_sqr_134(r, r, m, mp);
+            sp_3072_mont_sqr_134(r, r, m, mp);
+            sp_3072_mont_sqr_134(r, r, m, mp);
+
+            sp_3072_lshift_134(r, r, y);
+            sp_3072_mul_d_134(tmp, norm, (r[134] << 10) + (r[133] >> 13));
+            r[134] = 0;
+            r[133] &= 0x1fffL;
+            (void)sp_3072_add_134(r, r, tmp);
+            sp_3072_norm_134(r);
+            o = sp_3072_cmp_134(r, m);
+            sp_3072_cond_sub_134(r, r, m, ((o < 0) ?
+                                          (sp_digit)1 : (sp_digit)0) - 1);
+        }
+
+        sp_3072_mont_reduce_134(r, m, mp);
+        n = sp_3072_cmp_134(r, m);
+        sp_3072_cond_sub_134(r, r, m, ((n < 0) ?
+                                                (sp_digit)1 : (sp_digit)0) - 1);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+#endif /* HAVE_FFDHE_3072 */
 
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
@@ -7079,7 +8077,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 384 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
@@ -7094,29 +8092,47 @@
     sp_digit* r;
     word32 i;
 
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expLen > 384) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
         b = d;
-        e = b + 136 * 2;
-        m = e + 136;
+        e = b + 134 * 2;
+        m = e + 134;
         r = b;
 
-        sp_3072_from_mp(b, 136, base);
-        sp_3072_from_bin(e, 136, exp, expLen);
-        sp_3072_from_mp(m, 136, mod);
-
-        err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0);
+        sp_3072_from_mp(b, 134, base);
+        sp_3072_from_bin(e, 134, exp, expLen);
+        sp_3072_from_mp(m, 134, mod);
+
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2 &&
+                ((m[133] << 3) | (m[132] >> 20)) == 0xffffL) {
+            err = sp_3072_mod_exp_2_134(r, e, expLen * 8, m);
+        }
+        else
+    #endif
+            err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0);
     }
 
     if (err == MP_OKAY) {
@@ -7129,13 +8145,13 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 136);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
 #ifndef WOLFSSL_SMALL_STACK
-    sp_digit bd[272], ed[136], md[136];
+    sp_digit bd[268], ed[134], md[134];
 #else
     sp_digit* d = NULL;
 #endif
@@ -7146,23 +8162,187 @@
     word32 i;
     int err = MP_OKAY;
 
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expLen > 384U) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
 
     if (err == MP_OKAY) {
         b = d;
-        e = b + 136 * 2;
-        m = e + 136;
+        e = b + 134 * 2;
+        m = e + 134;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 134, base);
+        sp_3072_from_bin(e, 134, exp, expLen);
+        sp_3072_from_mp(m, 134, mod);
+
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2U &&
+                ((m[133] << 3) | (m[132] >> 20)) == 0xffffL) {
+            err = sp_3072_mod_exp_2_134(r, e, expLen * 8U, m);
+        }
+        else {
+    #endif
+            err = sp_3072_mod_exp_134(r, b, e, expLen * 8U, m, 0);
+    #ifdef HAVE_FFDHE_3072
+        }
+    #endif
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+        for (i=0; i<384U && out[i] == 0U; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+#endif
+
+    return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 67 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 67 * 2;
+        m = e + 67;
+        r = b;
+
+        sp_3072_from_mp(b, 67, base);
+        sp_3072_from_mp(e, 67, exp);
+        sp_3072_from_mp(m, 67, mod);
+
+        err = sp_3072_mod_exp_67(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 67, 0, sizeof(*r) * 67U);
+        err = sp_3072_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 67U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[134], ed[67], md[67];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 67 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 67 * 2;
+        m = e + 67;
         r = b;
     }
 #else
@@ -7172,144 +8352,4141 @@
 #endif
 
     if (err == MP_OKAY) {
-        sp_3072_from_mp(b, 136, base);
-        sp_3072_from_bin(e, 136, exp, expLen);
-        sp_3072_from_mp(m, 136, mod);
-
-        err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0);
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_to_bin(r, out);
-        *outLen = 384;
-        for (i=0; i<384 && out[i] == 0; i++) {
+        sp_3072_from_mp(b, 67, base);
+        sp_3072_from_mp(e, 67, exp);
+        sp_3072_from_mp(m, 67, mod);
+
+        err = sp_3072_mod_exp_67(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 67, 0, sizeof(*r) * 67U);
+        err = sp_3072_to_mp(r, res);
+    }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 67U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 67U);
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 13U) {
+            r[j] &= 0x1fffff;
+            s = 21U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 21
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 21
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0x1fffff;
+        s = 21U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 21U) <= (word32)DIGIT_BIT) {
+            s += 21U;
+            r[j] &= 0x1fffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 21) {
+            r[j] &= 0x1fffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 21 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    for (i=0; i<195; i++) {
+        r[i+1] += r[i] >> 21;
+        r[i] &= 0x1fffff;
+    }
+    j = 4096 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<196 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 21) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 21);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_49(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j;
+    int64_t t[98];
+
+    XMEMSET(t, 0, sizeof(t));
+    for (i=0; i<49; i++) {
+        for (j=0; j<49; j++) {
+            t[i+j] += ((int64_t)a[i]) * b[j];
+        }
+    }
+    for (i=0; i<97; i++) {
+        r[i] = t[i] & 0x1fffff;
+        t[i+1] += t[i] >> 21;
+    }
+    r[97] = (sp_digit)t[97];
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_49(sp_digit* r, const sp_digit* a)
+{
+    int i, j;
+    int64_t t[98];
+
+    XMEMSET(t, 0, sizeof(t));
+    for (i=0; i<49; i++) {
+        for (j=0; j<i; j++) {
+            t[i+j] += (((int64_t)a[i]) * a[j]) * 2;
+        }
+        t[i+i] += ((int64_t)a[i]) * a[i];
+    }
+    for (i=0; i<97; i++) {
+        r[i] = t[i] & 0x1fffff;
+        t[i+1] += t[i] >> 21;
+    }
+    r[97] = (sp_digit)t[97];
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_49(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 48; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[48] = a[48] + b[48];
+
+    return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[96] = a[96] + b[96];
+    r[97] = a[97] + b[97];
+
+    return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i + 0] = a[i + 0] - b[i + 0];
+        r[i + 1] = a[i + 1] - b[i + 1];
+        r[i + 2] = a[i + 2] - b[i + 2];
+        r[i + 3] = a[i + 3] - b[i + 3];
+        r[i + 4] = a[i + 4] - b[i + 4];
+        r[i + 5] = a[i + 5] - b[i + 5];
+        r[i + 6] = a[i + 6] - b[i + 6];
+        r[i + 7] = a[i + 7] - b[i + 7];
+    }
+    r[96] = a[96] - b[96];
+    r[97] = a[97] - b[97];
+
+    return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[98];
+    sp_digit* a1 = z1;
+    sp_digit b1[49];
+    sp_digit* z2 = r + 98;
+    (void)sp_4096_add_49(a1, a, &a[49]);
+    (void)sp_4096_add_49(b1, b, &b[49]);
+    sp_4096_mul_49(z2, &a[49], &b[49]);
+    sp_4096_mul_49(z0, a, b);
+    sp_4096_mul_49(z1, a1, b1);
+    (void)sp_4096_sub_98(z1, z1, z2);
+    (void)sp_4096_sub_98(z1, z1, z0);
+    (void)sp_4096_add_98(r + 49, r + 49, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[98];
+    sp_digit* a1 = z1;
+    sp_digit* z2 = r + 98;
+    (void)sp_4096_add_49(a1, a, &a[49]);
+    sp_4096_sqr_49(z2, &a[49]);
+    sp_4096_sqr_49(z0, a);
+    sp_4096_sqr_49(z1, a1);
+    (void)sp_4096_sub_98(z1, z1, z2);
+    (void)sp_4096_sub_98(z1, z1, z0);
+    (void)sp_4096_add_98(r + 49, r + 49, z1);
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 192; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[192] = a[192] + b[192];
+    r[193] = a[193] + b[193];
+    r[194] = a[194] + b[194];
+    r[195] = a[195] + b[195];
+
+    return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 192; i += 8) {
+        r[i + 0] = a[i + 0] - b[i + 0];
+        r[i + 1] = a[i + 1] - b[i + 1];
+        r[i + 2] = a[i + 2] - b[i + 2];
+        r[i + 3] = a[i + 3] - b[i + 3];
+        r[i + 4] = a[i + 4] - b[i + 4];
+        r[i + 5] = a[i + 5] - b[i + 5];
+        r[i + 6] = a[i + 6] - b[i + 6];
+        r[i + 7] = a[i + 7] - b[i + 7];
+    }
+    r[192] = a[192] - b[192];
+    r[193] = a[193] - b[193];
+    r[194] = a[194] - b[194];
+    r[195] = a[195] - b[195];
+
+    return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[196];
+    sp_digit* a1 = z1;
+    sp_digit b1[98];
+    sp_digit* z2 = r + 196;
+    (void)sp_4096_add_98(a1, a, &a[98]);
+    (void)sp_4096_add_98(b1, b, &b[98]);
+    sp_4096_mul_98(z2, &a[98], &b[98]);
+    sp_4096_mul_98(z0, a, b);
+    sp_4096_mul_98(z1, a1, b1);
+    (void)sp_4096_sub_196(z1, z1, z2);
+    (void)sp_4096_sub_196(z1, z1, z0);
+    (void)sp_4096_add_196(r + 98, r + 98, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[196];
+    sp_digit* a1 = z1;
+    sp_digit* z2 = r + 196;
+    (void)sp_4096_add_98(a1, a, &a[98]);
+    sp_4096_sqr_98(z2, &a[98]);
+    sp_4096_sqr_98(z0, a);
+    sp_4096_sqr_98(z1, a1);
+    (void)sp_4096_sub_196(z1, z1, z2);
+    (void)sp_4096_sub_196(z1, z1, z0);
+    (void)sp_4096_add_196(r + 98, r + 98, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[195]) * b[195];
+    r[391] = (sp_digit)(c >> 21);
+    c = (c & 0x1fffff) << 21;
+    for (k = 389; k >= 0; k--) {
+        for (i = 195; i >= 0; i--) {
+            j = k - i;
+            if (j >= 196) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 42;
+        r[k + 1] = (c >> 21) & 0x1fffff;
+        c = (c & 0x1fffff) << 21;
+    }
+    r[0] = (sp_digit)(c >> 21);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[195]) * a[195];
+    r[391] = (sp_digit)(c >> 21);
+    c = (c & 0x1fffff) << 21;
+    for (k = 389; k >= 0; k--) {
+        for (i = 195; i >= 0; i--) {
+            j = k - i;
+            if (j >= 196 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int64_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 42;
+        r[k + 1] = (c >> 21) & 0x1fffff;
+        c = (c & 0x1fffff) << 21;
+    }
+    r[0] = (sp_digit)(c >> 21);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[97]) * b[97];
+    r[195] = (sp_digit)(c >> 21);
+    c = (c & 0x1fffff) << 21;
+    for (k = 193; k >= 0; k--) {
+        for (i = 97; i >= 0; i--) {
+            j = k - i;
+            if (j >= 98) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 42;
+        r[k + 1] = (c >> 21) & 0x1fffff;
+        c = (c & 0x1fffff) << 21;
+    }
+    r[0] = (sp_digit)(c >> 21);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[97]) * a[97];
+    r[195] = (sp_digit)(c >> 21);
+    c = (c & 0x1fffff) << 21;
+    for (k = 193; k >= 0; k--) {
+        for (i = 97; i >= 0; i--) {
+            j = k - i;
+            if (j >= 98 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int64_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 42;
+        r[k + 1] = (c >> 21) & 0x1fffff;
+        c = (c & 0x1fffff) << 21;
+    }
+    r[0] = (sp_digit)(c >> 21);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+    x &= 0x1fffff;
+
+    /* rho = -1/m mod b */
+    *rho = (1L << 21) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_196(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1fffff;
+        t >>= 21;
+    }
+    r[196] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff;
+    for (i = 0; i < 192; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff);
+    }
+    t[1] = tb * a[193];
+    r[193] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+    t[2] = tb * a[194];
+    r[194] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+    t[3] = tb * a[195];
+    r[195] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+    r[196] =  (sp_digit)(t[3] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_98(sp_digit* r, const sp_digit* m)
+{
+    /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<97; i++) {
+        r[i] = 0x1fffff;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i + 0] = 0x1fffff;
+        r[i + 1] = 0x1fffff;
+        r[i + 2] = 0x1fffff;
+        r[i + 3] = 0x1fffff;
+        r[i + 4] = 0x1fffff;
+        r[i + 5] = 0x1fffff;
+        r[i + 6] = 0x1fffff;
+        r[i + 7] = 0x1fffff;
+    }
+    r[96] = 0x1fffff;
+#endif
+    r[97] = 0x7ffL;
+
+    /* r = (2^n - 1) mod n */
+    (void)sp_4096_sub_98(r, r, m);
+
+    /* Add one so r = 2^n mod m */
+    r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_98(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=97; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[97] - b[97]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[96] - b[96]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    for (i = 88; i >= 0; i -= 8) {
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_sub_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i + 0] = a[i + 0] - (b[i + 0] & m);
+        r[i + 1] = a[i + 1] - (b[i + 1] & m);
+        r[i + 2] = a[i + 2] - (b[i + 2] & m);
+        r[i + 3] = a[i + 3] - (b[i + 3] & m);
+        r[i + 4] = a[i + 4] - (b[i + 4] & m);
+        r[i + 5] = a[i + 5] - (b[i + 5] & m);
+        r[i + 6] = a[i + 6] - (b[i + 6] & m);
+        r[i + 7] = a[i + 7] - (b[i + 7] & m);
+    }
+    r[96] = a[96] - (b[96] & m);
+    r[97] = a[97] - (b[97] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x1fffff;
+        t >>= 21;
+    }
+    r[98] += t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff);
+    for (i = 0; i < 96; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+        t[2] = tb * a[i+2];
+        r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
+        t[3] = tb * a[i+3];
+        r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
+        t[4] = tb * a[i+4];
+        r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff));
+        t[5] = tb * a[i+5];
+        r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff));
+        t[6] = tb * a[i+6];
+        r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff));
+        t[7] = tb * a[i+7];
+        r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff));
+        t[0] = tb * a[i+8];
+        r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff));
+    }
+    t[1] = tb * a[97]; r[97] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+    r[98] +=  (sp_digit)(t[1] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 21.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_98(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 97; i++) {
+        a[i+1] += a[i] >> 21;
+        a[i] &= 0x1fffff;
+    }
+#else
+    int i;
+    for (i = 0; i < 96; i += 8) {
+        a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff;
+        a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff;
+        a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff;
+        a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff;
+        a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff;
+        a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff;
+        a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff;
+        a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff;
+        a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff;
+    }
+    a[96+1] += a[96] >> 21;
+    a[96] &= 0x1fffff;
+#endif
+}
+
+/* Shift the result in the high 2048 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_4096_mont_shift_98(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    int64_t n = a[97] >> 11;
+    n += ((int64_t)a[98]) << 10;
+
+    for (i = 0; i < 97; i++) {
+        r[i] = n & 0x1fffff;
+        n >>= 21;
+        n += ((int64_t)a[99 + i]) << 10;
+    }
+    r[97] = (sp_digit)n;
+#else
+    int i;
+    int64_t n = a[97] >> 11;
+    n += ((int64_t)a[98]) << 10;
+    for (i = 0; i < 96; i += 8) {
+        r[i + 0] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 99]) << 10;
+        r[i + 1] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 100]) << 10;
+        r[i + 2] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 101]) << 10;
+        r[i + 3] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 102]) << 10;
+        r[i + 4] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 103]) << 10;
+        r[i + 5] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 104]) << 10;
+        r[i + 6] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 105]) << 10;
+        r[i + 7] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 106]) << 10;
+    }
+    r[96] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[195]) << 10;
+    r[97] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[98], 0, sizeof(*r) * 98U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    sp_4096_norm_98(a + 98);
+
+    for (i=0; i<97; i++) {
+        mu = (a[i] * mp) & 0x1fffff;
+        sp_4096_mul_add_98(a+i, m, mu);
+        a[i+1] += a[i] >> 21;
+    }
+    mu = (a[i] * mp) & 0x7ffL;
+    sp_4096_mul_add_98(a+i, m, mu);
+    a[i+1] += a[i] >> 21;
+    a[i] &= 0x1fffff;
+
+    sp_4096_mont_shift_98(a, a);
+    sp_4096_cond_sub_98(a, a, m, 0 - (((a[97] >> 11) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_4096_norm_98(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_98(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_98(r, a, b);
+    sp_4096_mont_reduce_98(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_98(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_98(r, a);
+    sp_4096_mont_reduce_98(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_98(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1fffff;
+        t >>= 21;
+    }
+    r[98] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff;
+    for (i = 0; i < 96; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff);
+    }
+    t[1] = tb * a[97];
+    r[97] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+    r[98] =  (sp_digit)(t[1] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_add_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i + 0] = a[i + 0] + (b[i + 0] & m);
+        r[i + 1] = a[i + 1] + (b[i + 1] & m);
+        r[i + 2] = a[i + 2] + (b[i + 2] & m);
+        r[i + 3] = a[i + 3] + (b[i + 3] & m);
+        r[i + 4] = a[i + 4] + (b[i + 4] & m);
+        r[i + 5] = a[i + 5] + (b[i + 5] & m);
+        r[i + 6] = a[i + 6] + (b[i + 6] & m);
+        r[i + 7] = a[i + 7] + (b[i + 7] & m);
+    }
+    r[96] = a[96] + (b[96] & m);
+    r[97] = a[97] + (b[97] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 98; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_98(sp_digit* r, sp_digit* a, byte n)
+{
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<97; i++) {
+        r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff;
+    }
+#else
+    for (i=0; i<96; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff;
+    }
+    r[96] = ((a[96] >> n) | (a[97] << (21 - n))) & 0x1fffff;
+#endif
+    r[97] = a[97] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_4096_div_word_98(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 21 bits from d1 and top 10 bits from d0. */
+    d = (d1 << 10) | (d0 >> 11);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 11 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 1) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 21 bits in r */
+    /* Remaining 1 bits from d0. */
+    r <<= 1;
+    d <<= 1;
+    d |= d0 & ((1 << 1) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_98(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_32
+    int64_t d1;
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* td;
+#else
+    sp_digit t1d[196 + 1], t2d[98 + 1], sdd[98 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* sd;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 98 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    (void)m;
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 196 + 1;
+        sd = t2 + 98 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_4096_mul_d_98(sd, d, 1L << 10);
+        sp_4096_mul_d_196(t1, a, 1L << 10);
+        dv = sd[97];
+        for (i=98; i>=0; i--) {
+            t1[98 + i] += t1[98 + i - 1] >> 21;
+            t1[98 + i - 1] &= 0x1fffff;
+#ifndef WOLFSSL_SP_DIV_32
+            d1 = t1[98 + i];
+            d1 <<= 21;
+            d1 += t1[98 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_4096_div_word_98(t1[98 + i], t1[98 + i - 1], dv);
+#endif
+
+            sp_4096_mul_d_98(t2, sd, r1);
+            (void)sp_4096_sub_98(&t1[i], &t1[i], t2);
+            t1[98 + i] -= t2[98];
+            t1[98 + i] += t1[98 + i - 1] >> 21;
+            t1[98 + i - 1] &= 0x1fffff;
+            r1 = (((-t1[98 + i]) << 21) - t1[98 + i - 1]) / dv;
+            r1 -= t1[98 + i];
+            sp_4096_mul_d_98(t2, sd, r1);
+            (void)sp_4096_add_98(&t1[i], &t1[i], t2);
+            t1[98 + i] += t1[98 + i - 1] >> 21;
+            t1[98 + i - 1] &= 0x1fffff;
+        }
+        t1[98 - 1] += t1[98 - 2] >> 21;
+        t1[98 - 2] &= 0x1fffff;
+        r1 = t1[98 - 1] / dv;
+
+        sp_4096_mul_d_98(t2, sd, r1);
+        sp_4096_sub_98(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 98U);
+        for (i=0; i<96; i++) {
+            r[i+1] += r[i] >> 21;
+            r[i] &= 0x1fffff;
+        }
+        sp_4096_cond_add_98(r, r, sd, 0 - ((r[97] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_4096_norm_98(r);
+        sp_4096_rshift_98(r, r, 10);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_98(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_98(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_98(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* td;
+    sp_digit* t[3];
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 98U * 2U);
+
+        norm = t[0] = td;
+        t[1] = &td[98 * 2];
+        t[2] = &td[2 * 98 * 2];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_98(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_98(t[1], a, m);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 98U);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_mul_98(t[1], t[1], norm);
+        err = sp_4096_mod_98(t[1], t[1], m);
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 21;
+        c = bits % 21;
+        n = e[i--] << (21 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 21;
+            }
+
+            y = (n >> 20) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                  ((size_t)t[1] & addr_mask[y])),
+                    sizeof(*t[2]) * 98 * 2);
+            sp_4096_mont_sqr_98(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                            ((size_t)t[1] & addr_mask[y])), t[2],
+                    sizeof(*t[2]) * 98 * 2);
+        }
+
+        sp_4096_mont_reduce_98(t[0], m, mp);
+        n = sp_4096_cmp_98(t[0], m);
+        sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(*r) * 98 * 2);
+
+    }
+
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[3][196];
+#else
+    sp_digit* td;
+    sp_digit* t[3];
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        t[0] = td;
+        t[1] = &td[98 * 2];
+        t[2] = &td[2 * 98 * 2];
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_98(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_98(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_98(t[1], t[1], norm);
+                err = sp_4096_mod_98(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_98(t[1], a, norm);
+            err = sp_4096_mod_98(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 21;
+        c = bits % 21;
+        n = e[i--] << (21 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 21;
+            }
+
+            y = (n >> 20) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                 ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_4096_mont_sqr_98(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                           ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+        }
+
+        sp_4096_mont_reduce_98(t[0], m, mp);
+        n = sp_4096_cmp_98(t[0], m);
+        sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(t[0]));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][196];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit rt[196];
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 196, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++)
+            t[i] = td + i * 196;
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_98(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_98(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_98(t[1], t[1], norm);
+                err = sp_4096_mod_98(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_98(t[1], a, norm);
+            err = sp_4096_mod_98(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_98(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_98(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_98(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_98(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_98(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_98(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_98(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_98(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_98(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_98(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_98(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_98(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_98(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_98(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_98(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_98(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_98(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_98(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_98(t[20], t[10], m, mp);
+        sp_4096_mont_mul_98(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_98(t[22], t[11], m, mp);
+        sp_4096_mont_mul_98(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_98(t[24], t[12], m, mp);
+        sp_4096_mont_mul_98(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_98(t[26], t[13], m, mp);
+        sp_4096_mont_mul_98(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_98(t[28], t[14], m, mp);
+        sp_4096_mont_mul_98(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_98(t[30], t[15], m, mp);
+        sp_4096_mont_mul_98(t[31], t[16], t[15], m, mp);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 20) / 21) - 1;
+        c = bits % 21;
+        if (c == 0) {
+            c = 21;
+        }
+        if (i < 98) {
+            n = e[i--] << (32 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (11 - c);
+            c += 21;
+        }
+        y = (n >> 27) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        XMEMCPY(rt, t[y], sizeof(rt));
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (11 - c);
+                c += 21;
+            }
+            y = (n >> 27) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_4096_mont_sqr_98(rt, rt, m, mp);
+            sp_4096_mont_sqr_98(rt, rt, m, mp);
+            sp_4096_mont_sqr_98(rt, rt, m, mp);
+            sp_4096_mont_sqr_98(rt, rt, m, mp);
+            sp_4096_mont_sqr_98(rt, rt, m, mp);
+
+            sp_4096_mont_mul_98(rt, rt, t[y], m, mp);
+        }
+
+        sp_4096_mont_reduce_98(rt, m, mp);
+        n = sp_4096_cmp_98(rt, m);
+        sp_4096_cond_sub_98(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, rt, sizeof(rt));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_196(sp_digit* r, const sp_digit* m)
+{
+    /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<195; i++) {
+        r[i] = 0x1fffff;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 192; i += 8) {
+        r[i + 0] = 0x1fffff;
+        r[i + 1] = 0x1fffff;
+        r[i + 2] = 0x1fffff;
+        r[i + 3] = 0x1fffff;
+        r[i + 4] = 0x1fffff;
+        r[i + 5] = 0x1fffff;
+        r[i + 6] = 0x1fffff;
+        r[i + 7] = 0x1fffff;
+    }
+    r[192] = 0x1fffff;
+    r[193] = 0x1fffff;
+    r[194] = 0x1fffff;
+#endif
+    r[195] = 0x1L;
+
+    /* r = (2^n - 1) mod n */
+    (void)sp_4096_sub_196(r, r, m);
+
+    /* Add one so r = 2^n mod m */
+    r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_196(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=195; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[195] - b[195]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[194] - b[194]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[193] - b[193]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[192] - b[192]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    for (i = 184; i >= 0; i -= 8) {
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_sub_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 192; i += 8) {
+        r[i + 0] = a[i + 0] - (b[i + 0] & m);
+        r[i + 1] = a[i + 1] - (b[i + 1] & m);
+        r[i + 2] = a[i + 2] - (b[i + 2] & m);
+        r[i + 3] = a[i + 3] - (b[i + 3] & m);
+        r[i + 4] = a[i + 4] - (b[i + 4] & m);
+        r[i + 5] = a[i + 5] - (b[i + 5] & m);
+        r[i + 6] = a[i + 6] - (b[i + 6] & m);
+        r[i + 7] = a[i + 7] - (b[i + 7] & m);
+    }
+    r[192] = a[192] - (b[192] & m);
+    r[193] = a[193] - (b[193] & m);
+    r[194] = a[194] - (b[194] & m);
+    r[195] = a[195] - (b[195] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x1fffff;
+        t >>= 21;
+    }
+    r[196] += t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff);
+    for (i = 0; i < 192; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+        t[2] = tb * a[i+2];
+        r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
+        t[3] = tb * a[i+3];
+        r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
+        t[4] = tb * a[i+4];
+        r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff));
+        t[5] = tb * a[i+5];
+        r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff));
+        t[6] = tb * a[i+6];
+        r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff));
+        t[7] = tb * a[i+7];
+        r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff));
+        t[0] = tb * a[i+8];
+        r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff));
+    }
+    t[1] = tb * a[193]; r[193] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+    t[2] = tb * a[194]; r[194] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
+    t[3] = tb * a[195]; r[195] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
+    r[196] +=  (sp_digit)(t[3] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 21.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_196(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 195; i++) {
+        a[i+1] += a[i] >> 21;
+        a[i] &= 0x1fffff;
+    }
+#else
+    int i;
+    for (i = 0; i < 192; i += 8) {
+        a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff;
+        a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff;
+        a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff;
+        a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff;
+        a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff;
+        a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff;
+        a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff;
+        a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff;
+        a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff;
+    }
+    a[192+1] += a[192] >> 21;
+    a[192] &= 0x1fffff;
+    a[193+1] += a[193] >> 21;
+    a[193] &= 0x1fffff;
+    a[194+1] += a[194] >> 21;
+    a[194] &= 0x1fffff;
+#endif
+}
+
+/* Shift the result in the high 4096 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_4096_mont_shift_196(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    int64_t n = a[195] >> 1;
+    n += ((int64_t)a[196]) << 20;
+
+    for (i = 0; i < 195; i++) {
+        r[i] = n & 0x1fffff;
+        n >>= 21;
+        n += ((int64_t)a[197 + i]) << 20;
+    }
+    r[195] = (sp_digit)n;
+#else
+    int i;
+    int64_t n = a[195] >> 1;
+    n += ((int64_t)a[196]) << 20;
+    for (i = 0; i < 192; i += 8) {
+        r[i + 0] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 197]) << 20;
+        r[i + 1] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 198]) << 20;
+        r[i + 2] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 199]) << 20;
+        r[i + 3] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 200]) << 20;
+        r[i + 4] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 201]) << 20;
+        r[i + 5] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 202]) << 20;
+        r[i + 6] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 203]) << 20;
+        r[i + 7] = n & 0x1fffff;
+        n >>= 21; n += ((int64_t)a[i + 204]) << 20;
+    }
+    r[192] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[389]) << 20;
+    r[193] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[390]) << 20;
+    r[194] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[391]) << 20;
+    r[195] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[196], 0, sizeof(*r) * 196U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    sp_4096_norm_196(a + 196);
+
+#ifdef WOLFSSL_SP_DH
+    if (mp != 1) {
+        for (i=0; i<195; i++) {
+            mu = (a[i] * mp) & 0x1fffff;
+            sp_4096_mul_add_196(a+i, m, mu);
+            a[i+1] += a[i] >> 21;
+        }
+        mu = (a[i] * mp) & 0x1L;
+        sp_4096_mul_add_196(a+i, m, mu);
+        a[i+1] += a[i] >> 21;
+        a[i] &= 0x1fffff;
+    }
+    else {
+        for (i=0; i<195; i++) {
+            mu = a[i] & 0x1fffff;
+            sp_4096_mul_add_196(a+i, m, mu);
+            a[i+1] += a[i] >> 21;
+        }
+        mu = a[i] & 0x1L;
+        sp_4096_mul_add_196(a+i, m, mu);
+        a[i+1] += a[i] >> 21;
+        a[i] &= 0x1fffff;
+    }
+#else
+    for (i=0; i<195; i++) {
+        mu = (a[i] * mp) & 0x1fffff;
+        sp_4096_mul_add_196(a+i, m, mu);
+        a[i+1] += a[i] >> 21;
+    }
+    mu = (a[i] * mp) & 0x1L;
+    sp_4096_mul_add_196(a+i, m, mu);
+    a[i+1] += a[i] >> 21;
+    a[i] &= 0x1fffff;
+#endif
+
+    sp_4096_mont_shift_196(a, a);
+    sp_4096_cond_sub_196(a, a, m, 0 - (((a[195] >> 1) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_4096_norm_196(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_196(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_196(r, a, b);
+    sp_4096_mont_reduce_196(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_196(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_196(r, a);
+    sp_4096_mont_reduce_196(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_392(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 392; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1fffff;
+        t >>= 21;
+    }
+    r[392] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff;
+    for (i = 0; i < 392; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff);
+    }
+    r[392] =  (sp_digit)(t[7] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_add_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 192; i += 8) {
+        r[i + 0] = a[i + 0] + (b[i + 0] & m);
+        r[i + 1] = a[i + 1] + (b[i + 1] & m);
+        r[i + 2] = a[i + 2] + (b[i + 2] & m);
+        r[i + 3] = a[i + 3] + (b[i + 3] & m);
+        r[i + 4] = a[i + 4] + (b[i + 4] & m);
+        r[i + 5] = a[i + 5] + (b[i + 5] & m);
+        r[i + 6] = a[i + 6] + (b[i + 6] & m);
+        r[i + 7] = a[i + 7] + (b[i + 7] & m);
+    }
+    r[192] = a[192] + (b[192] & m);
+    r[193] = a[193] + (b[193] & m);
+    r[194] = a[194] + (b[194] & m);
+    r[195] = a[195] + (b[195] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 196; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_196(sp_digit* r, sp_digit* a, byte n)
+{
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<195; i++) {
+        r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff;
+    }
+#else
+    for (i=0; i<192; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff;
+    }
+    r[192] = ((a[192] >> n) | (a[193] << (21 - n))) & 0x1fffff;
+    r[193] = ((a[193] >> n) | (a[194] << (21 - n))) & 0x1fffff;
+    r[194] = ((a[194] >> n) | (a[195] << (21 - n))) & 0x1fffff;
+#endif
+    r[195] = a[195] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_4096_div_word_196(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 21 bits from d1 and top 10 bits from d0. */
+    d = (d1 << 10) | (d0 >> 11);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 11 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 1) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 21 bits in r */
+    /* Remaining 1 bits from d0. */
+    r <<= 1;
+    d <<= 1;
+    d |= d0 & ((1 << 1) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_196(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_32
+    int64_t d1;
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* td;
+#else
+    sp_digit t1d[392 + 1], t2d[196 + 1], sdd[196 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* sd;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 196 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    (void)m;
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 392 + 1;
+        sd = t2 + 196 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_4096_mul_d_196(sd, d, 1L << 20);
+        sp_4096_mul_d_392(t1, a, 1L << 20);
+        dv = sd[195];
+        for (i=196; i>=0; i--) {
+            t1[196 + i] += t1[196 + i - 1] >> 21;
+            t1[196 + i - 1] &= 0x1fffff;
+#ifndef WOLFSSL_SP_DIV_32
+            d1 = t1[196 + i];
+            d1 <<= 21;
+            d1 += t1[196 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_4096_div_word_196(t1[196 + i], t1[196 + i - 1], dv);
+#endif
+
+            sp_4096_mul_d_196(t2, sd, r1);
+            (void)sp_4096_sub_196(&t1[i], &t1[i], t2);
+            t1[196 + i] -= t2[196];
+            t1[196 + i] += t1[196 + i - 1] >> 21;
+            t1[196 + i - 1] &= 0x1fffff;
+            r1 = (((-t1[196 + i]) << 21) - t1[196 + i - 1]) / dv;
+            r1 -= t1[196 + i];
+            sp_4096_mul_d_196(t2, sd, r1);
+            (void)sp_4096_add_196(&t1[i], &t1[i], t2);
+            t1[196 + i] += t1[196 + i - 1] >> 21;
+            t1[196 + i - 1] &= 0x1fffff;
+        }
+        t1[196 - 1] += t1[196 - 2] >> 21;
+        t1[196 - 2] &= 0x1fffff;
+        r1 = t1[196 - 1] / dv;
+
+        sp_4096_mul_d_196(t2, sd, r1);
+        sp_4096_sub_196(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 196U);
+        for (i=0; i<194; i++) {
+            r[i+1] += r[i] >> 21;
+            r[i] &= 0x1fffff;
+        }
+        sp_4096_cond_add_196(r, r, sd, 0 - ((r[195] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_4096_norm_196(r);
+        sp_4096_rshift_196(r, r, 20);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_196(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_196(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* td;
+    sp_digit* t[3];
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 196U * 2U);
+
+        norm = t[0] = td;
+        t[1] = &td[196 * 2];
+        t[2] = &td[2 * 196 * 2];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_196(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_196(t[1], a, m);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 196U);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_mul_196(t[1], t[1], norm);
+        err = sp_4096_mod_196(t[1], t[1], m);
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 21;
+        c = bits % 21;
+        n = e[i--] << (21 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 21;
+            }
+
+            y = (n >> 20) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                  ((size_t)t[1] & addr_mask[y])),
+                    sizeof(*t[2]) * 196 * 2);
+            sp_4096_mont_sqr_196(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                            ((size_t)t[1] & addr_mask[y])), t[2],
+                    sizeof(*t[2]) * 196 * 2);
+        }
+
+        sp_4096_mont_reduce_196(t[0], m, mp);
+        n = sp_4096_cmp_196(t[0], m);
+        sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(*r) * 196 * 2);
+
+    }
+
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[3][392];
+#else
+    sp_digit* td;
+    sp_digit* t[3];
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        t[0] = td;
+        t[1] = &td[196 * 2];
+        t[2] = &td[2 * 196 * 2];
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_196(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_196(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_196(t[1], t[1], norm);
+                err = sp_4096_mod_196(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_196(t[1], a, norm);
+            err = sp_4096_mod_196(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 21;
+        c = bits % 21;
+        n = e[i--] << (21 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 21;
+            }
+
+            y = (n >> 20) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                 ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_4096_mont_sqr_196(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                           ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+        }
+
+        sp_4096_mont_reduce_196(t[0], m, mp);
+        n = sp_4096_cmp_196(t[0], m);
+        sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(t[0]));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][392];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit rt[392];
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 392, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++)
+            t[i] = td + i * 392;
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_196(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_196(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_196(t[1], t[1], norm);
+                err = sp_4096_mod_196(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_196(t[1], a, norm);
+            err = sp_4096_mod_196(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_196(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_196(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_196(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_196(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_196(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_196(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_196(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_196(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_196(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_196(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_196(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_196(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_196(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_196(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_196(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_196(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_196(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_196(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_196(t[20], t[10], m, mp);
+        sp_4096_mont_mul_196(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_196(t[22], t[11], m, mp);
+        sp_4096_mont_mul_196(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_196(t[24], t[12], m, mp);
+        sp_4096_mont_mul_196(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_196(t[26], t[13], m, mp);
+        sp_4096_mont_mul_196(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_196(t[28], t[14], m, mp);
+        sp_4096_mont_mul_196(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_196(t[30], t[15], m, mp);
+        sp_4096_mont_mul_196(t[31], t[16], t[15], m, mp);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 20) / 21) - 1;
+        c = bits % 21;
+        if (c == 0) {
+            c = 21;
+        }
+        if (i < 196) {
+            n = e[i--] << (32 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (11 - c);
+            c += 21;
+        }
+        y = (n >> 27) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        XMEMCPY(rt, t[y], sizeof(rt));
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (11 - c);
+                c += 21;
+            }
+            y = (n >> 27) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_4096_mont_sqr_196(rt, rt, m, mp);
+            sp_4096_mont_sqr_196(rt, rt, m, mp);
+            sp_4096_mont_sqr_196(rt, rt, m, mp);
+            sp_4096_mont_sqr_196(rt, rt, m, mp);
+            sp_4096_mont_sqr_196(rt, rt, m, mp);
+
+            sp_4096_mont_mul_196(rt, rt, t[y], m, mp);
+        }
+
+        sp_4096_mont_reduce_196(rt, m, mp);
+        n = sp_4096_cmp_196(rt, m);
+        sp_4096_cond_sub_196(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, rt, sizeof(rt));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+       /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+    sp_digit* norm;
+    sp_digit e[1] = {0};
+    sp_digit mp;
+    int i;
+    int err = MP_OKAY;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 21) {
+            err = MP_READ_E;
+        }
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 196 * 2;
+        m = r + 196 * 2;
+        norm = r;
+
+        sp_4096_from_bin(a, 196, in, inLen);
+#if DIGIT_BIT >= 21
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 196, mm);
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_196(norm, m);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_mul_196(a, a, norm);
+        err = sp_4096_mod_196(a, a, m);
+    }
+    if (err == MP_OKAY) {
+        for (i=20; i>=0; i--) {
+            if ((e[0] >> i) != 0) {
+                break;
+            }
+        }
+
+        XMEMCPY(r, a, sizeof(sp_digit) * 196 * 2);
+        for (i--; i>=0; i--) {
+            sp_4096_mont_sqr_196(r, r, m, mp);
+
+            if (((e[0] >> i) & 1) == 1) {
+                sp_4096_mont_mul_196(r, r, a, m, mp);
+            }
+        }
+        sp_4096_mont_reduce_196(r, m, mp);
+        mp = sp_4096_cmp_196(r, m);
+        sp_4096_cond_sub_196(r, r, m, ((mp < 0) ?
+                    (sp_digit)1 : (sp_digit)0)- 1);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit ad[392], md[196], rd[392];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+    sp_digit e[1] = {0};
+    int err = MP_OKAY;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 21) {
+            err = MP_READ_E;
+        }
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 196 * 2;
+        m = r + 196 * 2;
+    }
+#else
+    a = ad;
+    m = md;
+    r = rd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_4096_from_bin(a, 196, in, inLen);
+#if DIGIT_BIT >= 21
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 196, mm);
+
+        if (e[0] == 0x3) {
+            sp_4096_sqr_196(r, a);
+            err = sp_4096_mod_196(r, r, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_196(r, a, r);
+                err = sp_4096_mod_196(r, r, m);
+            }
+        }
+        else {
+            sp_digit* norm = r;
+            int i;
+            sp_digit mp;
+
+            sp_4096_mont_setup(m, &mp);
+            sp_4096_mont_norm_196(norm, m);
+
+            sp_4096_mul_196(a, a, norm);
+            err = sp_4096_mod_196(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i=20; i>=0; i--) {
+                    if ((e[0] >> i) != 0) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 392U);
+                for (i--; i>=0; i--) {
+                    sp_4096_mont_sqr_196(r, r, m, mp);
+
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_4096_mont_mul_196(r, r, a, m, mp);
+                    }
+                }
+                sp_4096_mont_reduce_196(r, m, mp);
+                mp = sp_4096_cmp_196(r, m);
+                sp_4096_cond_sub_196(r, r, m, ((mp < 0) ?
+                           (sp_digit)1 : (sp_digit)0) - 1);
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+           err = MP_READ_E;
+        }
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 196;
+        m = a + 392;
+        r = a;
+
+        sp_4096_from_bin(a, 196, in, inLen);
+        sp_4096_from_mp(d, 196, dm);
+        sp_4096_from_mp(m, 196, mm);
+        err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 196);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+    sp_digit a[392], d[196], m[196];
+    sp_digit* r = a;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+            err = MP_READ_E;
+        }
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_bin(a, 196, in, inLen);
+        sp_4096_from_mp(d, 196, dm);
+        sp_4096_from_mp(m, 196, mm);
+        err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    XMEMSET(d, 0, sizeof(sp_digit) * 196);
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* dq;
+    sp_digit* qi;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 98 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 196 * 2;
+        q = p + 98;
+        qi = dq = dp = q + 98;
+        tmpa = qi + 98;
+        tmpb = tmpa + 196;
+
+        r = t + 196;
+
+        sp_4096_from_bin(a, 196, in, inLen);
+        sp_4096_from_mp(p, 98, pm);
+        sp_4096_from_mp(q, 98, qm);
+        sp_4096_from_mp(dp, 98, dpm);
+        err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(dq, 98, dqm);
+        err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1);
+    }
+    if (err == MP_OKAY) {
+        (void)sp_4096_sub_98(tmpa, tmpa, tmpb);
+        sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+        sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+
+        sp_4096_from_mp(qi, 98, qim);
+        sp_4096_mul_98(tmpa, tmpa, qi);
+        err = sp_4096_mod_98(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mul_98(tmpa, q, tmpa);
+        (void)sp_4096_add_196(r, tmpb, tmpa);
+        sp_4096_norm_196(r);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 98 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+    sp_digit a[196 * 2];
+    sp_digit p[98], q[98], dp[98], dq[98], qi[98];
+    sp_digit tmpa[196], tmpb[196];
+    sp_digit* r = a;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_bin(a, 196, in, inLen);
+        sp_4096_from_mp(p, 98, pm);
+        sp_4096_from_mp(q, 98, qm);
+        sp_4096_from_mp(dp, 98, dpm);
+        sp_4096_from_mp(dq, 98, dqm);
+        sp_4096_from_mp(qi, 98, qim);
+
+        err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        (void)sp_4096_sub_98(tmpa, tmpa, tmpb);
+        sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+        sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+        sp_4096_mul_98(tmpa, tmpa, qi);
+        err = sp_4096_mod_98(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mul_98(tmpa, tmpa, q);
+        (void)sp_4096_add_196(r, tmpb, tmpa);
+        sp_4096_norm_196(r);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p, 0, sizeof(p));
+    XMEMSET(q, 0, sizeof(q));
+    XMEMSET(dp, 0, sizeof(dp));
+    XMEMSET(dq, 0, sizeof(dq));
+    XMEMSET(qi, 0, sizeof(qi));
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 21
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 196);
+        r->used = 196;
+        mp_clamp(r);
+#elif DIGIT_BIT < 21
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 196; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 21) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 21 - s;
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 196; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 21 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 21 - s;
+            }
+            else {
+                s += 21;
+            }
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 196 * 2;
+        m = e + 196;
+        r = b;
+
+        sp_4096_from_mp(b, 196, base);
+        sp_4096_from_mp(e, 196, exp);
+        sp_4096_from_mp(m, 196, mod);
+
+        err = sp_4096_mod_exp_196(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[392], ed[196], md[196];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 196 * 2;
+        m = e + 196;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 196, base);
+        sp_4096_from_mp(e, 196, exp);
+        sp_4096_from_mp(m, 196, mod);
+
+        err = sp_4096_mod_exp_196(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+#endif
+
+    return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+SP_NOINLINE static void sp_4096_lshift_196(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    r[196] = a[195] >> (21 - n);
+    for (i=195; i>0; i--) {
+        r[i] = ((a[i] << n) | (a[i-1] >> (21 - n))) & 0x1fffff;
+    }
+#else
+    sp_int_digit s, t;
+
+    s = (sp_int_digit)a[195];
+    r[196] = s >> (21U - n);
+    s = (sp_int_digit)(a[195]); t = (sp_int_digit)(a[194]);
+    r[195] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[194]); t = (sp_int_digit)(a[193]);
+    r[194] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[193]); t = (sp_int_digit)(a[192]);
+    r[193] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[192]); t = (sp_int_digit)(a[191]);
+    r[192] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[191]); t = (sp_int_digit)(a[190]);
+    r[191] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[190]); t = (sp_int_digit)(a[189]);
+    r[190] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[189]); t = (sp_int_digit)(a[188]);
+    r[189] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[188]); t = (sp_int_digit)(a[187]);
+    r[188] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[187]); t = (sp_int_digit)(a[186]);
+    r[187] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[186]); t = (sp_int_digit)(a[185]);
+    r[186] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[185]); t = (sp_int_digit)(a[184]);
+    r[185] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[184]); t = (sp_int_digit)(a[183]);
+    r[184] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[183]); t = (sp_int_digit)(a[182]);
+    r[183] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[182]); t = (sp_int_digit)(a[181]);
+    r[182] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[181]); t = (sp_int_digit)(a[180]);
+    r[181] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[180]); t = (sp_int_digit)(a[179]);
+    r[180] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[179]); t = (sp_int_digit)(a[178]);
+    r[179] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[178]); t = (sp_int_digit)(a[177]);
+    r[178] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[177]); t = (sp_int_digit)(a[176]);
+    r[177] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[176]); t = (sp_int_digit)(a[175]);
+    r[176] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[175]); t = (sp_int_digit)(a[174]);
+    r[175] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[174]); t = (sp_int_digit)(a[173]);
+    r[174] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[173]); t = (sp_int_digit)(a[172]);
+    r[173] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[172]); t = (sp_int_digit)(a[171]);
+    r[172] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[171]); t = (sp_int_digit)(a[170]);
+    r[171] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[170]); t = (sp_int_digit)(a[169]);
+    r[170] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[169]); t = (sp_int_digit)(a[168]);
+    r[169] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[168]); t = (sp_int_digit)(a[167]);
+    r[168] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[167]); t = (sp_int_digit)(a[166]);
+    r[167] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[166]); t = (sp_int_digit)(a[165]);
+    r[166] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[165]); t = (sp_int_digit)(a[164]);
+    r[165] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[164]); t = (sp_int_digit)(a[163]);
+    r[164] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[163]); t = (sp_int_digit)(a[162]);
+    r[163] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[162]); t = (sp_int_digit)(a[161]);
+    r[162] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[161]); t = (sp_int_digit)(a[160]);
+    r[161] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[160]); t = (sp_int_digit)(a[159]);
+    r[160] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[159]); t = (sp_int_digit)(a[158]);
+    r[159] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[158]); t = (sp_int_digit)(a[157]);
+    r[158] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[157]); t = (sp_int_digit)(a[156]);
+    r[157] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[156]); t = (sp_int_digit)(a[155]);
+    r[156] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[155]); t = (sp_int_digit)(a[154]);
+    r[155] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[154]); t = (sp_int_digit)(a[153]);
+    r[154] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[153]); t = (sp_int_digit)(a[152]);
+    r[153] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[152]); t = (sp_int_digit)(a[151]);
+    r[152] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[151]); t = (sp_int_digit)(a[150]);
+    r[151] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[150]); t = (sp_int_digit)(a[149]);
+    r[150] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[149]); t = (sp_int_digit)(a[148]);
+    r[149] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[148]); t = (sp_int_digit)(a[147]);
+    r[148] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[147]); t = (sp_int_digit)(a[146]);
+    r[147] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[146]); t = (sp_int_digit)(a[145]);
+    r[146] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[145]); t = (sp_int_digit)(a[144]);
+    r[145] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[144]); t = (sp_int_digit)(a[143]);
+    r[144] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[143]); t = (sp_int_digit)(a[142]);
+    r[143] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[142]); t = (sp_int_digit)(a[141]);
+    r[142] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[141]); t = (sp_int_digit)(a[140]);
+    r[141] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[140]); t = (sp_int_digit)(a[139]);
+    r[140] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[139]); t = (sp_int_digit)(a[138]);
+    r[139] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[138]); t = (sp_int_digit)(a[137]);
+    r[138] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[137]); t = (sp_int_digit)(a[136]);
+    r[137] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[136]); t = (sp_int_digit)(a[135]);
+    r[136] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[135]); t = (sp_int_digit)(a[134]);
+    r[135] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[134]); t = (sp_int_digit)(a[133]);
+    r[134] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]);
+    r[133] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]);
+    r[132] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]);
+    r[131] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]);
+    r[130] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]);
+    r[129] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]);
+    r[128] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]);
+    r[127] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]);
+    r[126] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]);
+    r[125] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]);
+    r[124] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]);
+    r[123] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]);
+    r[122] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]);
+    r[121] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]);
+    r[120] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]);
+    r[119] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]);
+    r[118] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]);
+    r[117] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]);
+    r[116] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]);
+    r[115] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]);
+    r[114] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]);
+    r[113] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]);
+    r[112] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]);
+    r[111] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]);
+    r[110] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]);
+    r[109] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]);
+    r[108] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]);
+    r[107] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]);
+    r[106] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]);
+    r[105] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]);
+    r[104] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]);
+    r[103] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]);
+    r[102] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]);
+    r[101] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]);
+    r[100] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]);
+    r[99] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]);
+    r[98] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]);
+    r[97] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]);
+    r[96] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]);
+    r[95] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]);
+    r[94] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]);
+    r[93] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]);
+    r[92] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]);
+    r[91] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]);
+    r[90] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]);
+    r[89] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]);
+    r[88] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]);
+    r[87] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]);
+    r[86] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]);
+    r[85] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]);
+    r[84] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]);
+    r[83] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]);
+    r[82] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]);
+    r[81] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]);
+    r[80] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]);
+    r[79] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]);
+    r[78] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+    r[77] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+    r[76] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+    r[75] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+    r[74] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+    r[73] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+    r[72] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+    r[71] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+    r[70] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+    r[69] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+    r[68] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+    r[67] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+    r[66] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+    r[65] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+    r[64] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+    r[63] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+    r[62] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+    r[61] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+    r[60] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+    r[59] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+    r[58] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+    r[57] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+    r[56] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+    r[55] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+    r[54] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+    r[53] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+    r[52] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+    r[51] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+    r[50] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+    r[49] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+    r[48] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+    r[47] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+    r[46] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+    r[45] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+    r[44] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+    r[43] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+    r[42] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+    r[41] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+    r[40] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+    r[39] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+    r[38] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+    r[37] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+    r[36] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+    r[35] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+    r[34] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+    r[33] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+    r[32] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+    r[31] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+    r[30] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+    r[29] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+    r[28] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+    r[27] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+    r[26] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+    r[25] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+    r[24] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+    r[23] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+    r[22] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+    r[21] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+    r[20] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+    r[19] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+    r[18] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+    r[17] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+    r[16] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+    r[15] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+    r[14] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+    r[13] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+    r[12] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+    r[11] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+    r[10] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+    r[9] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+    r[8] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+    r[7] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+    r[6] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+    r[5] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+    r[4] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+    r[3] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+    r[2] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+    s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+    r[1] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+#endif
+    r[0] = (a[0] << n) & 0x1fffff;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_196(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[392];
+    sp_digit td[197];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 589, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 392;
+        XMEMSET(td, 0, sizeof(sp_digit) * 589);
+#else
+        norm = nd;
+        tmp  = td;
+        XMEMSET(td, 0, sizeof(td));
+#endif
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_196(norm, m);
+
+        bits = ((bits + 3) / 4) * 4;
+        i = ((bits + 20) / 21) - 1;
+        c = bits % 21;
+        if (c == 0) {
+            c = 21;
+        }
+        if (i < 196) {
+            n = e[i--] << (32 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 4) {
+            n |= e[i--] << (11 - c);
+            c += 21;
+        }
+        y = (n >> 28) & 0xf;
+        n <<= 4;
+        c -= 4;
+        sp_4096_lshift_196(r, norm, y);
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= e[i--] << (11 - c);
+                c += 21;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_4096_mont_sqr_196(r, r, m, mp);
+            sp_4096_mont_sqr_196(r, r, m, mp);
+            sp_4096_mont_sqr_196(r, r, m, mp);
+            sp_4096_mont_sqr_196(r, r, m, mp);
+
+            sp_4096_lshift_196(r, r, y);
+            sp_4096_mul_d_196(tmp, norm, (r[196] << 20) + (r[195] >> 1));
+            r[196] = 0;
+            r[195] &= 0x1L;
+            (void)sp_4096_add_196(r, r, tmp);
+            sp_4096_norm_196(r);
+            o = sp_4096_cmp_196(r, m);
+            sp_4096_cond_sub_196(r, r, m, ((o < 0) ?
+                                          (sp_digit)1 : (sp_digit)0) - 1);
+        }
+
+        sp_4096_mont_reduce_196(r, m, mp);
+        n = sp_4096_cmp_196(r, m);
+        sp_4096_cond_sub_196(r, r, m, ((n < 0) ?
+                                                (sp_digit)1 : (sp_digit)0) - 1);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 512 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    word32 i;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 196 * 2;
+        m = e + 196;
+        r = b;
+
+        sp_4096_from_mp(b, 196, base);
+        sp_4096_from_bin(e, 196, exp, expLen);
+        sp_4096_from_mp(m, 196, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2 &&
+                ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) {
+            err = sp_4096_mod_exp_2_196(r, e, expLen * 8, m);
+        }
+        else
+    #endif
+            err = sp_4096_mod_exp_196(r, b, e, expLen * 8, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512 && out[i] == 0; i++) {
         }
         *outLen -= i;
         XMEMMOVE(out, out + i, *outLen);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 136);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[392], ed[196], md[196];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    word32 i;
+    int err = MP_OKAY;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512U) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 196 * 2;
+        m = e + 196;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 196, base);
+        sp_4096_from_bin(e, 196, exp, expLen);
+        sp_4096_from_mp(m, 196, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2U &&
+                ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) {
+            err = sp_4096_mod_exp_2_196(r, e, expLen * 8U, m);
+        }
+        else {
+    #endif
+            err = sp_4096_mod_exp_196(r, b, e, expLen * 8U, m, 0);
+    #ifdef HAVE_FFDHE_4096
+        }
+    #endif
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512U && out[i] == 0U; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+#endif
+
+    return err;
+#endif
+}
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_3072 */
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
 
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 #ifdef WOLFSSL_HAVE_SP_ECC
 #ifndef WOLFSSL_SP_NO_256
 
 /* Point structure to use. */
-typedef struct sp_point {
+typedef struct sp_point_256 {
     sp_digit x[2 * 10];
     sp_digit y[2 * 10];
     sp_digit z[2 * 10];
     int infinity;
-} sp_point;
+} sp_point_256;
 
 /* The modulus (prime) of the curve P256. */
-static sp_digit p256_mod[10] = {
+static const sp_digit p256_mod[10] = {
     0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000,
     0x0000400,0x3ff0000,0x03fffff
 };
-#ifndef WOLFSSL_SP_SMALL
 /* The Montogmery normalizer for modulus of the curve P256. */
-static sp_digit p256_norm_mod[10] = {
+static const sp_digit p256_norm_mod[10] = {
     0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff,
     0x3fffbff,0x000ffff,0x0000000
 };
-#endif /* WOLFSSL_SP_SMALL */
 /* The Montogmery multiplier for modulus of the curve P256. */
-static sp_digit p256_mp_mod = 0x000001;
+static const sp_digit p256_mp_mod = 0x000001;
 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
                                             defined(HAVE_ECC_VERIFY)
 /* The order of the curve P256. */
-static sp_digit p256_order[10] = {
+static const sp_digit p256_order[10] = {
     0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
     0x00003ff,0x3ff0000,0x03fffff
 };
 #endif
 /* The order of the curve P256 minus 2. */
-static sp_digit p256_order2[10] = {
+static const sp_digit p256_order2[10] = {
     0x063254f,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
     0x00003ff,0x3ff0000,0x03fffff
 };
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* The Montogmery normalizer for order of the curve P256. */
-static sp_digit p256_norm_order[10] = {
+static const sp_digit p256_norm_order[10] = {
     0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000,
     0x3fffc00,0x000ffff,0x0000000
 };
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* The Montogmery multiplier for order of the curve P256. */
-static sp_digit p256_mp_order = 0x200bc4f;
+static const sp_digit p256_mp_order = 0x200bc4f;
 #endif
 /* The base point of curve P256. */
-static sp_point p256_base = {
+static const sp_point_256 p256_base = {
     /* X ordinate */
     {
         0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,
-        0x1091fe2,0x1f2e12c,0x01ac5f4
+        0x1091fe2,0x1f2e12c,0x01ac5f4,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
     },
     /* Y ordinate */
     {
         0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,
-        0x1fe6e3b,0x2e2fe1a,0x013f8d0
+        0x1fe6e3b,0x2e2fe1a,0x013f8d0,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
     },
     /* Z ordinate */
     {
         0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
-        0x0000000,0x0000000,0x0000000
+        0x0000000,0x0000000,0x0000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
     },
     /* infinity */
     0
 };
 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
-static sp_digit p256_b[10] = {
+static const sp_digit p256_b[10] = {
     0x3d2604b,0x38f0f89,0x30f63bc,0x2c3314e,0x0651d06,0x1a621af,0x2bbd557,
     0x24f9ecf,0x1d8aa3a,0x016b18d
 };
 #endif
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_point_new_ex_10(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
 /* Allocate memory for point and return error. */
-#define sp_ecc_point_new(heap, sp, p)                                   \
-    ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
-        MEMORY_E : MP_OKAY
+#define sp_256_point_new_10(heap, sp, p) sp_256_point_new_ex_10((heap), NULL, &(p))
 #else
 /* Set pointer to data and return no error. */
-#define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#define sp_256_point_new_10(heap, sp, p) sp_256_point_new_ex_10((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_10(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
 /* If valid pointer then clear point data if requested and free data. */
-#define sp_ecc_point_free(p, clear, heap)     \
-    do {                                      \
-        if (p != NULL) {                      \
-            if (clear)                        \
-                XMEMSET(p, 0, sizeof(*p));    \
-            XFREE(p, heap, DYNAMIC_TYPE_ECC); \
-        }                                     \
-    }                                         \
-    while (0)
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
 #else
 /* Clear point data if requested. */
-#define sp_ecc_point_free(p, clear, heap) \
-    do {                                  \
-        if (clear)                        \
-            XMEMSET(p, 0, sizeof(*p));    \
-    }                                     \
-    while (0)
-#endif
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
 
 /* Multiply a number by Montogmery normalizer mod modulus (prime).
  *
@@ -7318,9 +12495,9 @@
  * m  The modulus (prime).
  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
  */
-static int sp_256_mod_mul_norm_10(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     int64_t* td;
 #else
     int64_t td[8];
@@ -7333,112 +12510,115 @@
 
     (void)m;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
-    if (td != NULL) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (td == NULL) {
+        return MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t = td;
         a32 = td + 8;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t = td;
-    a32 = a32d;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        t = td;
+        a32 = a32d;
+#endif
+
         a32[0] = a[0];
-        a32[0] |= a[1] << 26;
-        a32[0] &= 0xffffffff;
+        a32[0] |= a[1] << 26U;
+        a32[0] &= 0xffffffffL;
         a32[1] = (sp_digit)(a[1] >> 6);
-        a32[1] |= a[2] << 20;
-        a32[1] &= 0xffffffff;
+        a32[1] |= a[2] << 20U;
+        a32[1] &= 0xffffffffL;
         a32[2] = (sp_digit)(a[2] >> 12);
-        a32[2] |= a[3] << 14;
-        a32[2] &= 0xffffffff;
+        a32[2] |= a[3] << 14U;
+        a32[2] &= 0xffffffffL;
         a32[3] = (sp_digit)(a[3] >> 18);
-        a32[3] |= a[4] << 8;
-        a32[3] &= 0xffffffff;
+        a32[3] |= a[4] << 8U;
+        a32[3] &= 0xffffffffL;
         a32[4] = (sp_digit)(a[4] >> 24);
-        a32[4] |= a[5] << 2;
-        a32[4] |= a[6] << 28;
-        a32[4] &= 0xffffffff;
+        a32[4] |= a[5] << 2U;
+        a32[4] |= a[6] << 28U;
+        a32[4] &= 0xffffffffL;
         a32[5] = (sp_digit)(a[6] >> 4);
-        a32[5] |= a[7] << 22;
-        a32[5] &= 0xffffffff;
+        a32[5] |= a[7] << 22U;
+        a32[5] &= 0xffffffffL;
         a32[6] = (sp_digit)(a[7] >> 10);
-        a32[6] |= a[8] << 16;
-        a32[6] &= 0xffffffff;
+        a32[6] |= a[8] << 16U;
+        a32[6] &= 0xffffffffL;
         a32[7] = (sp_digit)(a[8] >> 16);
-        a32[7] |= a[9] << 10;
-        a32[7] &= 0xffffffff;
+        a32[7] |= a[9] << 10U;
+        a32[7] &= 0xffffffffL;
 
         /*  1  1  0 -1 -1 -1 -1  0 */
-        t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+            t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
         /*  0  1  1  0 -1 -1 -1 -1 */
-        t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+            t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
         /*  0  0  1  1  0 -1 -1 -1 */
-        t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+            t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
         /* -1 -1  0  2  2  1  0 -1 */
-        t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+            t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
         /*  0 -1 -1  0  2  2  1  0 */
-        t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+            t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
         /*  0  0 -1 -1  0  2  2  1 */
-        t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+            t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
         /* -1 -1  0  0  0  1  3  2 */
-        t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+            t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
         /*  1  0 -1 -1 -1 -1  0  3 */
-        t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
-
-        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-        o     = t[7] >> 32; t[7] &= 0xffffffff;
-        t[0] += o;
-        t[3] -= o;
-        t[6] -= o;
-        t[7] += o;
-        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-
-        r[0] = (sp_digit)(t[0]) & 0x3ffffff;
-        r[1] = (sp_digit)(t[0] >> 26);
-        r[1] |= t[1] << 6;
-        r[1] &= 0x3ffffff;
-        r[2] = (sp_digit)(t[1] >> 20);
-        r[2] |= t[2] << 12;
-        r[2] &= 0x3ffffff;
-        r[3] = (sp_digit)(t[2] >> 14);
-        r[3] |= t[3] << 18;
-        r[3] &= 0x3ffffff;
-        r[4] = (sp_digit)(t[3] >> 8);
-        r[4] |= t[4] << 24;
-        r[4] &= 0x3ffffff;
-        r[5] = (sp_digit)(t[4] >> 2) & 0x3ffffff;
-        r[6] = (sp_digit)(t[4] >> 28);
-        r[6] |= t[5] << 4;
-        r[6] &= 0x3ffffff;
-        r[7] = (sp_digit)(t[5] >> 22);
-        r[7] |= t[6] << 10;
-        r[7] &= 0x3ffffff;
-        r[8] = (sp_digit)(t[6] >> 16);
-        r[8] |= t[7] << 16;
-        r[8] &= 0x3ffffff;
-        r[9] = (sp_digit)(t[7] >> 10);
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+            t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+            t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+            t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+            t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+            t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+            t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+            t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+            t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+            o     = t[7] >> 32U; t[7] &= 0xffffffffL;
+            t[0] += o;
+            t[3] -= o;
+            t[6] -= o;
+            t[7] += o;
+            t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+            t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+            t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+            t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+            t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+            t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+            t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+
+        r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
+        r[1] = (sp_digit)(t[0] >> 26U);
+        r[1] |= t[1] << 6U;
+        r[1] &= 0x3ffffffL;
+        r[2] = (sp_digit)(t[1] >> 20U);
+        r[2] |= t[2] << 12U;
+        r[2] &= 0x3ffffffL;
+        r[3] = (sp_digit)(t[2] >> 14U);
+        r[3] |= t[3] << 18U;
+        r[3] &= 0x3ffffffL;
+        r[4] = (sp_digit)(t[3] >> 8U);
+        r[4] |= t[4] << 24U;
+        r[4] &= 0x3ffffffL;
+        r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
+        r[6] = (sp_digit)(t[4] >> 28U);
+        r[6] |= t[5] << 4U;
+        r[6] &= 0x3ffffffL;
+        r[7] = (sp_digit)(t[5] >> 22U);
+        r[7] |= t[6] << 10U;
+        r[7] &= 0x3ffffffL;
+        r[8] = (sp_digit)(t[6] >> 16U);
+        r[8] |= t[7] << 16U;
+        r[8] &= 0x3ffffffL;
+        r[9] = (sp_digit)(t[7] >> 10U);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
@@ -7447,53 +12627,64 @@
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 26
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 26
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
         r[j] &= 0x3ffffff;
-        s = 26 - s;
-        if (j + 1 >= max)
+        s = 26U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 26 <= DIGIT_BIT) {
-            s += 26;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 26U) <= (word32)DIGIT_BIT) {
+            s += 26U;
             r[j] &= 0x3ffffff;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 26) {
             r[j] &= 0x3ffffff;
-            if (j + 1 >= max)
-                break;
+            if (j + 1 >= size) {
+                break;
+            }
             s = 26 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -7504,21 +12695,23 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Convert a point of type ecc_point to type sp_point.
- *
- * p   Point of type sp_point (result).
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p   Point of type sp_point_256 (result).
  * pm  Point of type ecc_point.
  */
-static void sp_256_point_from_ecc_point_10(sp_point* p, ecc_point* pm)
+static void sp_256_point_from_ecc_point_10(sp_point_256* p, const ecc_point* pm)
 {
     XMEMSET(p->x, 0, sizeof(p->x));
     XMEMSET(p->y, 0, sizeof(p->y));
@@ -7534,12 +12727,12 @@
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_256_to_mp(sp_digit* a, mp_int* r)
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 26
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 10);
         r->used = 10;
@@ -7549,14 +12742,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 10; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 26) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 26 - s;
         }
@@ -7569,15 +12767,16 @@
         for (i = 0; i < 10; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 26 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 32
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 26 - s;
             }
-            else
+            else {
                 s += 26;
+            }
         }
         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -7587,245 +12786,26 @@
     return err;
 }
 
-/* Convert a point of type sp_point to type ecc_point.
- *
- * p   Point of type sp_point.
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p   Point of type sp_point_256.
  * pm  Point of type ecc_point (result).
  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
  * MP_OKAY.
  */
-static int sp_256_point_to_ecc_point_10(sp_point* p, ecc_point* pm)
+static int sp_256_point_to_ecc_point_10(const sp_point_256* p, ecc_point* pm)
 {
     int err;
 
     err = sp_256_to_mp(p->x, pm->x);
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pm->y);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pm->z);
-
-    return err;
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
-{
-    sp_digit r = 0;
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=9; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    r |= (a[ 9] - b[ 9]) & (0 - !r);
-    r |= (a[ 8] - b[ 8]) & (0 - !r);
-    r |= (a[ 7] - b[ 7]) & (0 - !r);
-    r |= (a[ 6] - b[ 6]) & (0 - !r);
-    r |= (a[ 5] - b[ 5]) & (0 - !r);
-    r |= (a[ 4] - b[ 4]) & (0 - !r);
-    r |= (a[ 3] - b[ 3]) & (0 - !r);
-    r |= (a[ 2] - b[ 2]) & (0 - !r);
-    r |= (a[ 1] - b[ 1]) & (0 - !r);
-    r |= (a[ 0] - b[ 0]) & (0 - !r);
-#endif /* WOLFSSL_SP_SMALL */
-
-    return r;
-}
-
-/* Normalize the values in each word to 26.
- *
- * a  Array of sp_digit to normalize.
- */
-static void sp_256_norm_10(sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    for (i = 0; i < 9; i++) {
-        a[i+1] += a[i] >> 26;
-        a[i] &= 0x3ffffff;
-    }
-#else
-    a[1] += a[0] >> 26; a[0] &= 0x3ffffff;
-    a[2] += a[1] >> 26; a[1] &= 0x3ffffff;
-    a[3] += a[2] >> 26; a[2] &= 0x3ffffff;
-    a[4] += a[3] >> 26; a[3] &= 0x3ffffff;
-    a[5] += a[4] >> 26; a[4] &= 0x3ffffff;
-    a[6] += a[5] >> 26; a[5] &= 0x3ffffff;
-    a[7] += a[6] >> 26; a[6] &= 0x3ffffff;
-    a[8] += a[7] >> 26; a[7] &= 0x3ffffff;
-    a[9] += a[8] >> 26; a[8] &= 0x3ffffff;
-#endif
-}
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
-        const sp_digit* b, const sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i = 0; i < 10; i++)
-        r[i] = a[i] - (b[i] & m);
-#else
-    r[ 0] = a[ 0] - (b[ 0] & m);
-    r[ 1] = a[ 1] - (b[ 1] & m);
-    r[ 2] = a[ 2] - (b[ 2] & m);
-    r[ 3] = a[ 3] - (b[ 3] & m);
-    r[ 4] = a[ 4] - (b[ 4] & m);
-    r[ 5] = a[ 5] - (b[ 5] & m);
-    r[ 6] = a[ 6] - (b[ 6] & m);
-    r[ 7] = a[ 7] - (b[ 7] & m);
-    r[ 8] = a[ 8] - (b[ 8] & m);
-    r[ 9] = a[ 9] - (b[ 9] & m);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Mul a by scalar b and add into r. (r += a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A scalar.
- */
-SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int64_t tb = b;
-    int64_t t = 0;
-    int i;
-
-    for (i = 0; i < 10; i++) {
-        t += (tb * a[i]) + r[i];
-        r[i] = t & 0x3ffffff;
-        t >>= 26;
-    }
-    r[10] += t;
-#else
-    int64_t tb = b;
-    int64_t t[10];
-
-    t[ 0] = tb * a[ 0];
-    t[ 1] = tb * a[ 1];
-    t[ 2] = tb * a[ 2];
-    t[ 3] = tb * a[ 3];
-    t[ 4] = tb * a[ 4];
-    t[ 5] = tb * a[ 5];
-    t[ 6] = tb * a[ 6];
-    t[ 7] = tb * a[ 7];
-    t[ 8] = tb * a[ 8];
-    t[ 9] = tb * a[ 9];
-    r[ 0] +=                 (t[ 0] & 0x3ffffff);
-    r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
-    r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
-    r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
-    r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
-    r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
-    r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
-    r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
-    r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
-    r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
-    r[10] +=  t[ 9] >> 26;
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Shift the result in the high 256 bits down to the bottom.
- *
- * r  A single precision number.
- * a  A single precision number.
- */
-static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    sp_digit n, s;
-
-    s = a[10];
-    n = a[9] >> 22;
-    for (i = 0; i < 9; i++) {
-        n += (s & 0x3ffffff) << 4;
-        r[i] = n & 0x3ffffff;
-        n >>= 26;
-        s = a[11 + i] + (s >> 26);
-    }
-    n += s << 4;
-    r[9] = n;
-#else
-    sp_digit n, s;
-
-    s = a[10]; n = a[9] >> 22;
-    n += (s & 0x3ffffff) << 4; r[ 0] = n & 0x3ffffff;
-    n >>= 26; s = a[11] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 1] = n & 0x3ffffff;
-    n >>= 26; s = a[12] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 2] = n & 0x3ffffff;
-    n >>= 26; s = a[13] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 3] = n & 0x3ffffff;
-    n >>= 26; s = a[14] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 4] = n & 0x3ffffff;
-    n >>= 26; s = a[15] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 5] = n & 0x3ffffff;
-    n >>= 26; s = a[16] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 6] = n & 0x3ffffff;
-    n >>= 26; s = a[17] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 7] = n & 0x3ffffff;
-    n >>= 26; s = a[18] + (s >> 26);
-    n += (s & 0x3ffffff) << 4; r[ 8] = n & 0x3ffffff;
-    n >>= 26; s = a[19] + (s >> 26);
-    n += s << 4;              r[ 9] = n;
-#endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[10], 0, sizeof(*r) * 10);
-}
-
-/* Reduce the number back to 256 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-static void sp_256_mont_reduce_10(sp_digit* a, sp_digit* m, sp_digit mp)
-{
-    int i;
-    sp_digit mu;
-
-    if (mp != 1) {
-        for (i=0; i<9; i++) {
-            mu = (a[i] * mp) & 0x3ffffff;
-            sp_256_mul_add_10(a+i, m, mu);
-            a[i+1] += a[i] >> 26;
-        }
-        mu = (a[i] * mp) & 0x3fffffl;
-        sp_256_mul_add_10(a+i, m, mu);
-        a[i+1] += a[i] >> 26;
-        a[i] &= 0x3ffffff;
-    }
-    else {
-        for (i=0; i<9; i++) {
-            mu = a[i] & 0x3ffffff;
-            sp_256_mul_add_10(a+i, p256_mod, mu);
-            a[i+1] += a[i] >> 26;
-        }
-        mu = a[i] & 0x3fffffl;
-        sp_256_mul_add_10(a+i, p256_mod, mu);
-        a[i+1] += a[i] >> 26;
-        a[i] &= 0x3ffffff;
-    }
-
-    sp_256_mont_shift_10(a, a);
-    sp_256_cond_sub_10(a, a, m, 0 - ((a[9] >> 22) > 0));
-    sp_256_norm_10(a);
+    }
+
+    return err;
 }
 
 #ifdef WOLFSSL_SP_SMALL
@@ -7847,10 +12827,12 @@
     for (k = 17; k >= 0; k--) {
         for (i = 9; i >= 0; i--) {
             j = k - i;
-            if (j >= 10)
-                break;
-            if (j < 0)
+            if (j >= 10) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * b[j];
         }
@@ -7995,6 +12977,232 @@
 }
 
 #endif /* WOLFSSL_SP_SMALL */
+#define sp_256_mont_reduce_order_10         sp_256_mont_reduce_10
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=9; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 10; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    r[ 0] = a[ 0] - (b[ 0] & m);
+    r[ 1] = a[ 1] - (b[ 1] & m);
+    r[ 2] = a[ 2] - (b[ 2] & m);
+    r[ 3] = a[ 3] - (b[ 3] & m);
+    r[ 4] = a[ 4] - (b[ 4] & m);
+    r[ 5] = a[ 5] - (b[ 5] & m);
+    r[ 6] = a[ 6] - (b[ 6] & m);
+    r[ 7] = a[ 7] - (b[ 7] & m);
+    r[ 8] = a[ 8] - (b[ 8] & m);
+    r[ 9] = a[ 9] - (b[ 9] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 10; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x3ffffff;
+        t >>= 26;
+    }
+    r[10] += t;
+#else
+    int64_t tb = b;
+    int64_t t[10];
+
+    t[ 0] = tb * a[ 0];
+    t[ 1] = tb * a[ 1];
+    t[ 2] = tb * a[ 2];
+    t[ 3] = tb * a[ 3];
+    t[ 4] = tb * a[ 4];
+    t[ 5] = tb * a[ 5];
+    t[ 6] = tb * a[ 6];
+    t[ 7] = tb * a[ 7];
+    t[ 8] = tb * a[ 8];
+    t[ 9] = tb * a[ 9];
+    r[ 0] += (sp_digit)                 (t[ 0] & 0x3ffffff);
+    r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
+    r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
+    r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
+    r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff));
+    r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff));
+    r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff));
+    r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff));
+    r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff));
+    r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff));
+    r[10] += (sp_digit) (t[ 9] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 26.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_256_norm_10(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 9; i++) {
+        a[i+1] += a[i] >> 26;
+        a[i] &= 0x3ffffff;
+    }
+#else
+    a[1] += a[0] >> 26; a[0] &= 0x3ffffff;
+    a[2] += a[1] >> 26; a[1] &= 0x3ffffff;
+    a[3] += a[2] >> 26; a[2] &= 0x3ffffff;
+    a[4] += a[3] >> 26; a[3] &= 0x3ffffff;
+    a[5] += a[4] >> 26; a[4] &= 0x3ffffff;
+    a[6] += a[5] >> 26; a[5] &= 0x3ffffff;
+    a[7] += a[6] >> 26; a[6] &= 0x3ffffff;
+    a[8] += a[7] >> 26; a[7] &= 0x3ffffff;
+    a[9] += a[8] >> 26; a[8] &= 0x3ffffff;
+#endif
+}
+
+/* Shift the result in the high 256 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    sp_digit n, s;
+
+    s = a[10];
+    n = a[9] >> 22;
+    for (i = 0; i < 9; i++) {
+        n += (s & 0x3ffffff) << 4;
+        r[i] = n & 0x3ffffff;
+        n >>= 26;
+        s = a[11 + i] + (s >> 26);
+    }
+    n += s << 4;
+    r[9] = n;
+#else
+    sp_digit n, s;
+
+    s = a[10]; n = a[9] >> 22;
+    n += (s & 0x3ffffff) << 4; r[ 0] = n & 0x3ffffff;
+    n >>= 26; s = a[11] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 1] = n & 0x3ffffff;
+    n >>= 26; s = a[12] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 2] = n & 0x3ffffff;
+    n >>= 26; s = a[13] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 3] = n & 0x3ffffff;
+    n >>= 26; s = a[14] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 4] = n & 0x3ffffff;
+    n >>= 26; s = a[15] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 5] = n & 0x3ffffff;
+    n >>= 26; s = a[16] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 6] = n & 0x3ffffff;
+    n >>= 26; s = a[17] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 7] = n & 0x3ffffff;
+    n >>= 26; s = a[18] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 8] = n & 0x3ffffff;
+    n >>= 26; s = a[19] + (s >> 26);
+    n += s << 4;              r[ 9] = n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[10], 0, sizeof(*r) * 10U);
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    if (mp != 1) {
+        for (i=0; i<9; i++) {
+            mu = (a[i] * mp) & 0x3ffffff;
+            sp_256_mul_add_10(a+i, m, mu);
+            a[i+1] += a[i] >> 26;
+        }
+        mu = (a[i] * mp) & 0x3fffffL;
+        sp_256_mul_add_10(a+i, m, mu);
+        a[i+1] += a[i] >> 26;
+        a[i] &= 0x3ffffff;
+    }
+    else {
+        for (i=0; i<9; i++) {
+            mu = a[i] & 0x3ffffff;
+            sp_256_mul_add_10(a+i, p256_mod, mu);
+            a[i+1] += a[i] >> 26;
+        }
+        mu = a[i] & 0x3fffffL;
+        sp_256_mul_add_10(a+i, p256_mod, mu);
+        a[i+1] += a[i] >> 26;
+        a[i] &= 0x3ffffff;
+    }
+
+    sp_256_mont_shift_10(a, a);
+    sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(a);
+}
+
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -8004,8 +13212,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_mul_10(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_256_mul_10(r, a, b);
     sp_256_mont_reduce_10(r, m, mp);
@@ -8028,15 +13236,18 @@
     for (k = 17; k >= 0; k--) {
         for (i = 9; i >= 0; i--) {
             j = k - i;
-            if (j >= 10 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 10 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int64_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int64_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 52;
         r[k + 1] = (c >> 26) & 0x3ffffff;
@@ -8139,14 +13350,14 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_10(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_256_sqr_10(r, a);
     sp_256_mont_reduce_10(r, m, mp);
 }
 
-#ifndef WOLFSSL_SP_SMALL
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
  *
  * r   Result of squaring.
@@ -8155,19 +13366,21 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_n_10(sp_digit* r, sp_digit* a, int n,
-        sp_digit* m, sp_digit mp)
+static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
 {
     sp_256_mont_sqr_10(r, a, m, mp);
-    for (; n > 1; n--)
+    for (; n > 1; n--) {
         sp_256_mont_sqr_10(r, r, m, mp);
-}
-
-#else
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
 /* Mod-2 for the P256 curve. */
-static const uint32_t p256_mod_2[8] = {
-    0xfffffffd,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
-    0x00000001,0xffffffff
+static const uint32_t p256_mod_minus_2[8] = {
+    0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+    0x00000001U,0xffffffffU
 };
 #endif /* !WOLFSSL_SP_SMALL */
 
@@ -8178,7 +13391,7 @@
  * a   Number to invert.
  * td  Temporary data.
  */
-static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a, sp_digit* td)
+static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* t = td;
@@ -8187,71 +13400,66 @@
     XMEMCPY(t, a, sizeof(sp_digit) * 10);
     for (i=254; i>=0; i--) {
         sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
-        if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
             sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
     }
     XMEMCPY(r, t, sizeof(sp_digit) * 10);
 #else
-    sp_digit* t = td;
+    sp_digit* t1 = td;
     sp_digit* t2 = td + 2 * 10;
     sp_digit* t3 = td + 4 * 10;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_10(t, a, p256_mod, p256_mp_mod);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_10(t2, t, 2, p256_mod, p256_mp_mod);
-    /* t3= a^d = t2 * a */
-    sp_256_mont_mul_10(t3, t2, a, p256_mod, p256_mp_mod);
-    /* t = a^f = t2 * t */
-    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^f0 = t ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_10(t2, t, 4, p256_mod, p256_mp_mod);
-    /* t3= a^fd = t2 * t3 */
-    sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ff = t2 * t */
-    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_10(t2, t, 8, p256_mod, p256_mp_mod);
-    /* t3= a^fffd = t2 * t3 */
-    sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_10(t2, t, 16, p256_mod, p256_mp_mod);
-    /* t3= a^fffffffd = t2 * t3 */
-    sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_10(t2, t, 32, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001 = t2 * a */
-    sp_256_mont_mul_10(t2, t2, a, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
-     *   = t2 ^ 2 ^ 160 */
-    sp_256_mont_sqr_n_10(t2, t2, 160, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
-     *   = t2 * t */
-    sp_256_mont_mul_10(t2, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
-     *   = t2 ^ 2 ^ 32 */
-    sp_256_mont_sqr_n_10(t2, t2, 32, p256_mod, p256_mp_mod);
-    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
-     *   = t2 * t3 */
-    sp_256_mont_mul_10(r, t2, t3, p256_mod, p256_mp_mod);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Map the Montgomery form projective co-ordinate point to an affine point.
- *
- * r  Resulting affine co-ordinate point.
- * p  Montgomery form projective co-ordinate point.
+    /* 0x2 */
+    sp_256_mont_sqr_10(t1, a, p256_mod, p256_mp_mod);
+    /* 0x3 */
+    sp_256_mont_mul_10(t2, t1, a, p256_mod, p256_mp_mod);
+    /* 0xc */
+    sp_256_mont_sqr_n_10(t1, t2, 2, p256_mod, p256_mp_mod);
+    /* 0xd */
+    sp_256_mont_mul_10(t3, t1, a, p256_mod, p256_mp_mod);
+    /* 0xf */
+    sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xf0 */
+    sp_256_mont_sqr_n_10(t1, t2, 4, p256_mod, p256_mp_mod);
+    /* 0xfd */
+    sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xff */
+    sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xff00 */
+    sp_256_mont_sqr_n_10(t1, t2, 8, p256_mod, p256_mp_mod);
+    /* 0xfffd */
+    sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffff */
+    sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffff0000 */
+    sp_256_mont_sqr_n_10(t1, t2, 16, p256_mod, p256_mp_mod);
+    /* 0xfffffffd */
+    sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff */
+    sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000000 */
+    sp_256_mont_sqr_n_10(t1, t2, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffffffffffff */
+    sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001 */
+    sp_256_mont_mul_10(r, t1, a, p256_mod, p256_mp_mod);
+    /* 0xffffffff000000010000000000000000000000000000000000000000 */
+    sp_256_mont_sqr_n_10(r, r, 160, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+    sp_256_mont_mul_10(r, r, t2, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+    sp_256_mont_sqr_n_10(r, r, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+    sp_256_mont_mul_10(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
  * t  Temporary ordinate data.
  */
-static void sp_256_map_10(sp_point* r, sp_point* p, sp_digit* t)
+static void sp_256_map_10(sp_point_256* r, const sp_point_256* p, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*10;
@@ -8264,20 +13472,22 @@
 
     /* x /= z^2 */
     sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
-    XMEMSET(r->x + 10, 0, sizeof(r->x) / 2);
+    XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U);
     sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
     /* Reduce x to less than modulus */
     n = sp_256_cmp_10(r->x, p256_mod);
-    sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_10(r->x);
 
     /* y /= z^3 */
     sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
-    XMEMSET(r->y + 10, 0, sizeof(r->y) / 2);
+    XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U);
     sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
     /* Reduce y to less than modulus */
     n = sp_256_cmp_10(r->y, p256_mod);
-    sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_10(r->y);
 
     XMEMSET(r->z, 0, sizeof(r->z));
@@ -8297,8 +13507,9 @@
 {
     int i;
 
-    for (i = 0; i < 10; i++)
+    for (i = 0; i < 10; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -8334,12 +13545,13 @@
  * b   Second number to add in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_add_10(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    sp_256_add_10(r, a, b);
+static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_256_add_10(r, a, b);
     sp_256_norm_10(r);
-    sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_10(r);
 }
 
@@ -8349,11 +13561,12 @@
  * a   Number to double in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_dbl_10(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    sp_256_add_10(r, a, a);
+static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_256_add_10(r, a, a);
     sp_256_norm_10(r);
-    sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_10(r);
 }
 
@@ -8363,15 +13576,17 @@
  * a   Number to triple in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_tpl_10(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    sp_256_add_10(r, a, a);
+static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_256_add_10(r, a, a);
     sp_256_norm_10(r);
-    sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_10(r);
-    sp_256_add_10(r, r, a);
+    (void)sp_256_add_10(r, r, a);
     sp_256_norm_10(r);
-    sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_10(r);
 }
 
@@ -8387,8 +13602,9 @@
 {
     int i;
 
-    for (i = 0; i < 10; i++)
+    for (i = 0; i < 10; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -8432,8 +13648,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 10; i++)
+    for (i = 0; i < 10; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     r[ 0] = a[ 0] + (b[ 0] & m);
     r[ 1] = a[ 1] + (b[ 1] & m);
@@ -8455,10 +13672,10 @@
  * b   Number to subtract with in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_sub_10(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    sp_256_sub_10(r, a, b);
+static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_256_sub_10(r, a, b);
     sp_256_cond_add_10(r, r, m, r[9] >> 22);
     sp_256_norm_10(r);
 }
@@ -8474,8 +13691,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<9; i++)
+    for (i=0; i<9; i++) {
         r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+    }
 #else
     r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff;
     r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff;
@@ -8496,7 +13714,7 @@
  * a  Number to divide.
  * m  Modulus (prime).
  */
-static void sp_256_div2_10(sp_digit* r, sp_digit* a, sp_digit* m)
+static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1));
     sp_256_norm_10(r);
@@ -8509,50 +13727,38 @@
  * p  Point to double.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_10(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*10;
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    /* When infinity don't double point passed in - constant time. */
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    /* Put point to double into result - good for infinty. */
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
     if (r != p) {
-        for (i=0; i<10; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<10; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<10; i++)
-            r->z[i] = p->z[i];
         r->infinity = p->infinity;
     }
 
     /* T1 = Z * Z */
-    sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_10(t1, p->z, p256_mod, p256_mp_mod);
     /* Z = Y * Z */
-    sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_10(z, p->y, p->z, p256_mod, p256_mp_mod);
     /* Z = 2Z */
     sp_256_mont_dbl_10(z, z, p256_mod);
     /* T2 = X - T1 */
-    sp_256_mont_sub_10(t2, x, t1, p256_mod);
+    sp_256_mont_sub_10(t2, p->x, t1, p256_mod);
     /* T1 = X + T1 */
-    sp_256_mont_add_10(t1, x, t1, p256_mod);
+    sp_256_mont_add_10(t1, p->x, t1, p256_mod);
     /* T2 = T1 * T2 */
     sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
     /* T1 = 3T2 */
     sp_256_mont_tpl_10(t1, t2, p256_mod);
     /* Y = 2Y */
-    sp_256_mont_dbl_10(y, y, p256_mod);
+    sp_256_mont_dbl_10(y, p->y, p256_mod);
     /* Y = Y * Y */
     sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod);
     /* T2 = Y * Y */
@@ -8560,9 +13766,9 @@
     /* T2 = T2/2 */
     sp_256_div2_10(t2, t2, p256_mod);
     /* Y = Y * X */
-    sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_10(y, y, p->x, p256_mod, p256_mp_mod);
     /* X = T1 * T1 */
-    sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_10(x, t1, p256_mod, p256_mp_mod);
     /* X = X - Y */
     sp_256_mont_sub_10(x, x, y, p256_mod);
     /* X = X - Y */
@@ -8573,7 +13779,6 @@
     sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod);
     /* Y = Y - T2 */
     sp_256_mont_sub_10(y, y, t2, p256_mod);
-
 }
 
 /* Compare two numbers to determine if they are equal.
@@ -8593,16 +13798,15 @@
 /* Add two Montgomery form projective points.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q,
+static void sp_256_proj_point_add_10(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
         sp_digit* t)
 {
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*10;
     sp_digit* t3 = t + 4*10;
@@ -8615,34 +13819,39 @@
 
     /* Ensure only the first point is the same as the result. */
     if (q == r) {
-        sp_point* a = p;
+        const sp_point_256* a = p;
         p = q;
         q = a;
     }
 
     /* Check double */
-    sp_256_sub_10(t1, p256_mod, q->y);
+    (void)sp_256_sub_10(t1, p256_mod, q->y);
     sp_256_norm_10(t1);
-    if (sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
-        (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) {
+    if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+        (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_10(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<10; i++)
+        for (i=0; i<10; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<10; i++)
+        }
+        for (i=0; i<10; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<10; i++)
+        }
+        for (i=0; i<10; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U1 = X1*Z2^2 */
@@ -8682,7 +13891,7 @@
 
 #ifdef WOLFSSL_SP_SMALL
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -8691,12 +13900,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-    sp_point* td;
-    sp_point* t[3];
+#ifdef WOLFSSL_SP_NO_MALLOC
+    sp_point_256 t[3];
+    sp_digit tmp[2 * 10 * 5];
+#else
+    sp_point_256* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -8704,30 +13917,28 @@
 
     (void)heap;
 
-    td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
-    if (td == NULL)
+#ifndef WOLFSSL_SP_NO_MALLOC
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
-                             DYNAMIC_TYPE_ECC);
+                                                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3);
-
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMSET(t, 0, sizeof(sp_point_256) * 3);
 
         /* t[0] = {0, 0, 1} * norm */
-        t[0]->infinity = 1;
+        t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
-        err = sp_256_mod_mul_norm_10(t[1]->x, g->x, p256_mod);
+        err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
     }
     if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_10(t[1]->y, g->y, p256_mod);
+        err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
     if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_10(t[1]->z, g->z, p256_mod);
+        err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
 
     if (err == MP_OKAY) {
         i = 9;
@@ -8745,38 +13956,45 @@
             y = (n >> 25) & 1;
             n <<= 1;
 
-            sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp);
-
-            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
-                                  ((size_t)t[1] & addr_mask[y])),
-                    sizeof(sp_point));
-            sp_256_proj_point_dbl_10(t[2], t[2], tmp);
-            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
-                            ((size_t)t[1] & addr_mask[y])), t[2],
-                    sizeof(sp_point));
-        }
-
-        if (map)
-            sp_256_map_10(r, t[0], tmp);
-        else
-            XMEMCPY(r, t[0], sizeof(sp_point));
-    }
-
+            sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                   ((size_t)&t[1] & addr_mask[y])),
+                    sizeof(sp_point_256));
+            sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                            ((size_t)&t[1] & addr_mask[y])), &t[2],
+                    sizeof(sp_point_256));
+        }
+
+        if (map != 0) {
+            sp_256_map_10(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_256));
+        }
+    }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
-        XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-    if (td != NULL) {
-        XMEMSET(td, 0, sizeof(sp_point) * 3);
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
+#endif
 
     return err;
 }
 
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -8785,15 +14003,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[3];
-    sp_digit tmpd[2 * 10 * 5];
-#endif
-    sp_point* t;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 t[3];
+    sp_digit tmp[2 * 10 * 5];
+#else
+    sp_point_256* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -8801,29 +14020,22 @@
 
     (void)heap;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_point td[3];
-    t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
-
+#endif
+
+    if (err == MP_OKAY) {
         /* t[0] = {0, 0, 1} * norm */
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
         err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
     }
     if (err == MP_OKAY)
@@ -8853,27 +14065,29 @@
                                  ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
             sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
             XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
-                           ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
-        }
-
-        if (map)
+                          ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+        }
+
+        if (map != 0) {
             sp_256_map_10(r, &t[0], tmp);
-        else
-            XMEMCPY(r, &t[0], sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
     }
     if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 3);
+        XMEMSET(t, 0, sizeof(sp_point_256) * 3);
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
 #endif
 
     return err;
@@ -8881,14 +14095,13 @@
 
 #else
 /* A table entry for pre-computed points. */
-typedef struct sp_table_entry {
+typedef struct sp_table_entry_256 {
     sp_digit x[10];
     sp_digit y[10];
-    byte infinity;
-} sp_table_entry;
+} sp_table_entry_256;
 
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -8897,16 +14110,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_fast_10(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_fast_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[16];
-    sp_point rtd;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td[16];
+    sp_point_256 rtd;
     sp_digit tmpd[2 * 10 * 5];
 #endif
-    sp_point* t;
-    sp_point* rt;
+    sp_point_256* t;
+    sp_point_256* rt;
     sp_digit* tmp;
     sp_digit n;
     int i;
@@ -8915,9 +14128,9 @@
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
+    err = sp_256_point_new_10(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
@@ -8934,9 +14147,9 @@
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
-        sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
-        sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
-        sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+        (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+        (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+        (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
         t[1].infinity = 0;
         sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp);
         t[ 2].infinity = 0;
@@ -8971,7 +14184,7 @@
         n = k[i+1] << 6;
         c = 18;
         y = n >> 24;
-        XMEMCPY(rt, &t[y], sizeof(sp_point));
+        XMEMCPY(rt, &t[y], sizeof(sp_point_256));
         n <<= 8;
         for (; i>=0 || c>=4; ) {
             if (c < 4) {
@@ -8990,26 +14203,28 @@
             sp_256_proj_point_add_10(rt, rt, &t[y], tmp);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_10(r, rt, tmp);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
     }
     if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 16);
+        XMEMSET(t, 0, sizeof(sp_point_256) * 16);
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
     ForceZero(tmpd, sizeof(tmpd));
     ForceZero(td, sizeof(td));
 #endif
-    sp_ecc_point_free(rt, 1, heap);
+    sp_256_point_free_10(rt, 1, heap);
 
     return err;
 }
@@ -9022,11 +14237,8 @@
  * n  Number of times to double
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_n_10(sp_point* r, sp_point* p, int n,
-        sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_n_10(sp_point_256* p, int n, sp_digit* t)
+{
     sp_digit* w = t;
     sp_digit* a = t + 2*10;
     sp_digit* b = t + 4*10;
@@ -9035,54 +14247,73 @@
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    if (r != p) {
-        for (i=0; i<10; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<10; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<10; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
 
     /* Y = 2*Y */
     sp_256_mont_dbl_10(y, y, p256_mod);
     /* W = Z^4 */
     sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod);
     sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod);
-    while (n--) {
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
         /* A = 3*(X^2 - W) */
         sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
         sp_256_mont_sub_10(t1, t1, w, p256_mod);
         sp_256_mont_tpl_10(a, t1, p256_mod);
         /* B = X*Y^2 */
-        sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_10(b, t2, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_10(t1, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(b, t1, x, p256_mod, p256_mp_mod);
         /* X = A^2 - 2B */
         sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_10(t1, b, p256_mod);
-        sp_256_mont_sub_10(x, x, t1, p256_mod);
+        sp_256_mont_dbl_10(t2, b, p256_mod);
+        sp_256_mont_sub_10(x, x, t2, p256_mod);
         /* Z = Z*Y */
         sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
         /* t2 = Y^4 */
-        sp_256_mont_sqr_10(t2, t2, p256_mod, p256_mp_mod);
-        if (n) {
+        sp_256_mont_sqr_10(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
             /* W = W*Y^4 */
-            sp_256_mont_mul_10(w, w, t2, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_10(w, w, t1, p256_mod, p256_mp_mod);
         }
         /* y = 2*A*(B - X) - Y^4 */
         sp_256_mont_sub_10(y, b, x, p256_mod);
         sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
         sp_256_mont_dbl_10(y, y, p256_mod);
-        sp_256_mont_sub_10(y, y, t2, p256_mod);
-    }
+        sp_256_mont_sub_10(y, y, t1, p256_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_10(t1, t1, w, p256_mod);
+    sp_256_mont_tpl_10(a, t1, p256_mod);
+    /* B = X*Y^2 */
+    sp_256_mont_sqr_10(t1, y, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_10(b, t1, x, p256_mod, p256_mp_mod);
+    /* X = A^2 - 2B */
+    sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_10(t2, b, p256_mod);
+    sp_256_mont_sub_10(x, x, t2, p256_mod);
+    /* Z = Z*Y */
+    sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
+    /* t2 = Y^4 */
+    sp_256_mont_sqr_10(t1, t1, p256_mod, p256_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_256_mont_sub_10(y, b, x, p256_mod);
+    sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_10(y, y, p256_mod);
+    sp_256_mont_sub_10(y, y, t1, p256_mod);
+#endif
     /* Y = Y/2 */
     sp_256_div2_10(y, y, p256_mod);
 }
@@ -9093,16 +14324,15 @@
  * Only the first point can be the same pointer as the result point.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_qz1_10(sp_point* r, sp_point* p,
-        sp_point* q, sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_add_qz1_10(sp_point_256* r, const sp_point_256* p,
+        const sp_point_256* q, sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*10;
     sp_digit* t3 = t + 4*10;
@@ -9114,28 +14344,33 @@
     int i;
 
     /* Check double */
-    sp_256_sub_10(t1, p256_mod, q->y);
+    (void)sp_256_sub_10(t1, p256_mod, q->y);
     sp_256_norm_10(t1);
-    if (sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
-        (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) {
+    if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+        (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_10(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<10; i++)
+        for (i=0; i<10; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<10; i++)
+        }
+        for (i=0; i<10; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<10; i++)
+        }
+        for (i=0; i<10; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U2 = X2*Z1^2 */
@@ -9171,9 +14406,9 @@
  * Ordinates are in Montgomery form.
  *
  * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_10(sp_point* a, sp_digit* t)
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_10(sp_point_256* a, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2 * 10;
@@ -9193,35 +14428,40 @@
  *
  * a      The base point.
  * table  Place to store generated point data.
- * tmp    Temprorary data.
+ * tmp    Temporary data.
  * heap  Heap to use for allocation.
  */
-static int sp_256_gen_stripe_table_10(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
+static int sp_256_gen_stripe_table_10(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
     int i, j;
     int err;
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
+    err = sp_256_point_new_10(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod);
+    }
     if (err == MP_OKAY) {
         t->infinity = 0;
         sp_256_proj_to_affine_10(t, tmp);
@@ -9232,19 +14472,16 @@
         s2->infinity = 0;
 
         /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
         /* table[1] = Affine version of 'a' in Montgomery form */
         XMEMCPY(table[1].x, t->x, sizeof(table->x));
         XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
 
         for (i=1; i<8; i++) {
-            sp_256_proj_point_dbl_n_10(t, t, 32, tmp);
+            sp_256_proj_point_dbl_n_10(t, 32, tmp);
             sp_256_proj_to_affine_10(t, tmp);
             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
         }
 
         for (i=1; i<8; i++) {
@@ -9257,21 +14494,20 @@
                 sp_256_proj_to_affine_10(t, tmp);
                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
+            }
+        }
+    }
+
+    sp_256_point_free_10(s2, 0, heap);
+    sp_256_point_free_10(s1, 0, heap);
+    sp_256_point_free_10( t, 0, heap);
 
     return err;
 }
 
 #endif /* FP_ECC */
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -9279,16 +14515,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_stripe_10(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
+static int sp_256_ecc_mulmod_stripe_10(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
     sp_digit td[2 * 10 * 5];
 #endif
-    sp_point* rt;
-    sp_point* p = NULL;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* t;
     int i, j;
     int y, x;
@@ -9297,14 +14533,17 @@
     (void)g;
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+
+    err = sp_256_point_new_10(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
                            DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
 #else
     t = td;
 #endif
@@ -9314,35 +14553,40 @@
         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
 
         y = 0;
-        for (j=0,x=31; j<8; j++,x+=32)
+        for (j=0,x=31; j<8; j++,x+=32) {
             y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+        }
         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
+        rt->infinity = !y;
         for (i=30; i>=0; i--) {
             y = 0;
-            for (j=0,x=i; j<8; j++,x+=32)
+            for (j=0,x=i; j<8; j++,x+=32) {
                 y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+            }
 
             sp_256_proj_point_dbl_10(rt, rt, t);
             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
+            p->infinity = !y;
             sp_256_proj_point_add_qz1_10(rt, rt, p, t);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_10(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
+    }
+#endif
+    sp_256_point_free_10(p, 0, heap);
+    sp_256_point_free_10(rt, 0, heap);
 
     return err;
 }
@@ -9352,43 +14596,43 @@
     #define FP_ENTRIES 16
 #endif
 
-typedef struct sp_cache_t {
+typedef struct sp_cache_256_t {
     sp_digit x[10];
     sp_digit y[10];
-    sp_table_entry table[256];
+    sp_table_entry_256 table[256];
     uint32_t cnt;
     int set;
-} sp_cache_t;
-
-static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
-static THREAD_LS_T int sp_cache_last = -1;
-static THREAD_LS_T int sp_cache_inited = 0;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
 
 #ifndef HAVE_THREAD_LS
-    static volatile int initCacheMutex = 0;
-    static wolfSSL_Mutex sp_cache_lock;
-#endif
-
-static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
 {
     int i, j;
     uint32_t least;
 
-    if (sp_cache_inited == 0) {
+    if (sp_cache_256_inited == 0) {
         for (i=0; i<FP_ENTRIES; i++) {
-            sp_cache[i].set = 0;
-        }
-        sp_cache_inited = 1;
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
     }
 
     /* Compare point with those in cache. */
     for (i=0; i<FP_ENTRIES; i++) {
-        if (!sp_cache[i].set)
+        if (!sp_cache_256[i].set)
             continue;
 
-        if (sp_256_cmp_equal_10(g->x, sp_cache[i].x) & 
-                           sp_256_cmp_equal_10(g->y, sp_cache[i].y)) {
-            sp_cache[i].cnt++;
+        if (sp_256_cmp_equal_10(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_10(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
             break;
         }
     }
@@ -9396,37 +14640,37 @@
     /* No match. */
     if (i == FP_ENTRIES) {
         /* Find empty entry. */
-        i = (sp_cache_last + 1) % FP_ENTRIES;
-        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
-            if (!sp_cache[i].set) {
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
                 break;
             }
         }
 
         /* Evict least used. */
-        if (i == sp_cache_last) {
-            least = sp_cache[0].cnt;
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
             for (j=1; j<FP_ENTRIES; j++) {
-                if (sp_cache[j].cnt < least) {
+                if (sp_cache_256[j].cnt < least) {
                     i = j;
-                    least = sp_cache[i].cnt;
+                    least = sp_cache_256[i].cnt;
                 }
             }
         }
 
-        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
-        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
-        sp_cache[i].set = 1;
-        sp_cache[i].cnt = 1;
-    }
-
-    *cache = &sp_cache[i];
-    sp_cache_last = i;
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
 }
 #endif /* FP_ECC */
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -9435,32 +14679,32 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
 #ifndef FP_ECC
     return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
 #else
     sp_digit tmp[2 * 10 * 5];
-    sp_cache_t* cache;
+    sp_cache_256_t* cache;
     int err = MP_OKAY;
 
 #ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
        err = BAD_MUTEX_E;
 #endif /* HAVE_THREAD_LS */
 
     if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
+        sp_ecc_get_cache_256(g, &cache);
         if (cache->cnt == 2)
             sp_256_gen_stripe_table_10(g, cache->table, tmp, heap);
 
 #ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
+        wc_UnLockMutex(&sp_cache_256_lock);
 #endif /* HAVE_THREAD_LS */
 
         if (cache->cnt < 2) {
@@ -9478,7 +14722,7 @@
 
 #endif
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * p     Point to multiply.
@@ -9490,21 +14734,19 @@
 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
         void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[10];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
+
+    err = sp_256_point_new_10(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -9515,28 +14757,42 @@
         sp_256_from_mp(k, 10, km);
         sp_256_point_from_ecc_point_10(point, gm);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_10(point, point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_10(point, point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_10(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_10(point, 0, heap);
 
     return err;
 }
 
 #ifdef WOLFSSL_SP_SMALL
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -9544,7 +14800,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     /* No pre-computed values. */
@@ -9552,1545 +14808,1289 @@
 }
 
 #else
-static sp_table_entry p256_table[256] = {
+static const sp_table_entry_256 p256_table[256] = {
     /* 0 */
     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-      1 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
     /* 1 */
     { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944,
         0x3b732b7,0x15719e7,0x376a537,0x0062417 },
       { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9,
-        0x288688d,0x176174b,0x3182588,0x0215c7f },
-      0 },
+        0x288688d,0x176174b,0x3182588,0x0215c7f } },
     /* 2 */
     { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2,
         0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 },
       { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933,
-        0x1961102,0x223cdff,0x37e9eb2,0x0218fae },
-      0 },
+        0x1961102,0x223cdff,0x37e9eb2,0x0218fae } },
     /* 3 */
     { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02,
         0x28d9544,0x20280f9,0x055b5ff,0x00001d8 },
       { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981,
-        0x247d398,0x0fb8383,0x3613437,0x020c21d },
-      0 },
+        0x247d398,0x0fb8383,0x3613437,0x020c21d } },
     /* 4 */
     { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22,
         0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a },
       { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4,
-        0x111b370,0x03dec12,0x1168d6f,0x03d923e },
-      0 },
+        0x111b370,0x03dec12,0x1168d6f,0x03d923e } },
     /* 5 */
     { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18,
         0x0427617,0x00056c7,0x285133d,0x016af80 },
       { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d,
-        0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc },
-      0 },
+        0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } },
     /* 6 */
     { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171,
         0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae },
       { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948,
-        0x0fa7920,0x397d830,0x2bd4bda,0x028d86f },
-      0 },
+        0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } },
     /* 7 */
     { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747,
         0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 },
       { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca,
-        0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee },
-      0 },
+        0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } },
     /* 8 */
     { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69,
         0x093334d,0x120c701,0x39206d5,0x021627e },
       { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194,
-        0x1045071,0x0713f32,0x16d0254,0x03aec1a },
-      0 },
+        0x1045071,0x0713f32,0x16d0254,0x03aec1a } },
     /* 9 */
     { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb,
         0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 },
       { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c,
-        0x2d2e428,0x368755b,0x09b55a7,0x007ca0a },
-      0 },
+        0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } },
     /* 10 */
     { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a,
         0x047862b,0x1358c9e,0x35905e5,0x00cde92 },
       { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba,
-        0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 },
-      0 },
+        0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } },
     /* 11 */
     { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99,
         0x362ff49,0x288cbc1,0x24252f4,0x0308f68 },
       { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8,
-        0x0dcb869,0x33fc4e9,0x38cc790,0x006778e },
-      0 },
+        0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } },
     /* 12 */
     { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b,
         0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 },
       { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b,
-        0x3901273,0x03dfe78,0x3447b4e,0x039d907 },
-      0 },
+        0x3901273,0x03dfe78,0x3447b4e,0x039d907 } },
     /* 13 */
     { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616,
         0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 },
       { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5,
-        0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a },
-      0 },
+        0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } },
     /* 14 */
     { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824,
         0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 },
       { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898,
-        0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 },
-      0 },
+        0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } },
     /* 15 */
     { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8,
         0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac },
       { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251,
-        0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 },
-      0 },
+        0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } },
     /* 16 */
     { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18,
         0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 },
       { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f,
-        0x38441e0,0x3bef843,0x2124621,0x03e847f },
-      0 },
+        0x38441e0,0x3bef843,0x2124621,0x03e847f } },
     /* 17 */
     { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3,
         0x103d678,0x2fb72db,0x04c1f13,0x0161bac },
       { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be,
-        0x0fd6c05,0x13c449e,0x0087086,0x006756b },
-      0 },
+        0x0fd6c05,0x13c449e,0x0087086,0x006756b } },
     /* 18 */
     { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348,
         0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 },
       { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5,
-        0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 },
-      0 },
+        0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } },
     /* 19 */
     { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba,
         0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 },
       { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d,
-        0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc },
-      0 },
+        0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } },
     /* 20 */
     { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4,
         0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 },
       { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09,
-        0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 },
-      0 },
+        0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } },
     /* 21 */
     { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb,
         0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 },
       { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c,
-        0x01dfe0a,0x312341f,0x26d356e,0x0091295 },
-      0 },
+        0x01dfe0a,0x312341f,0x26d356e,0x0091295 } },
     /* 22 */
     { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee,
         0x26ac1b8,0x3bda498,0x0873581,0x0117963 },
       { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5,
-        0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 },
-      0 },
+        0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } },
     /* 23 */
     { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a,
         0x2b35761,0x1bb1d20,0x097682c,0x00737d7 },
       { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86,
-        0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 },
-      0 },
+        0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } },
     /* 24 */
     { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1,
         0x3161727,0x297cfdb,0x2113b83,0x0011b97 },
       { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f,
-        0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 },
-      0 },
+        0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } },
     /* 25 */
     { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad,
         0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 },
       { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347,
-        0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be },
-      0 },
+        0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } },
     /* 26 */
     { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20,
         0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b },
       { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b,
-        0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 },
-      0 },
+        0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } },
     /* 27 */
     { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012,
         0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f },
       { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30,
-        0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 },
-      0 },
+        0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } },
     /* 28 */
     { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8,
         0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 },
       { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829,
-        0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 },
-      0 },
+        0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } },
     /* 29 */
     { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e,
         0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 },
       { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b,
-        0x1004072,0x1b73e3b,0x07208ed,0x03fc36c },
-      0 },
+        0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } },
     /* 30 */
     { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b,
         0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 },
       { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83,
-        0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf },
-      0 },
+        0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } },
     /* 31 */
     { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6,
         0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe },
       { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363,
-        0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 },
-      0 },
+        0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } },
     /* 32 */
     { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d,
         0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 },
       { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c,
-        0x057e74d,0x05d14ac,0x17a9273,0x035215c },
-      0 },
+        0x057e74d,0x05d14ac,0x17a9273,0x035215c } },
     /* 33 */
     { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7,
         0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 },
       { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3,
-        0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 },
-      0 },
+        0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } },
     /* 34 */
     { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3,
         0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 },
       { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a,
-        0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 },
-      0 },
+        0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } },
     /* 35 */
     { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e,
         0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 },
       { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4,
-        0x23c8ad3,0x354963e,0x015db28,0x0284fa4 },
-      0 },
+        0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } },
     /* 36 */
     { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8,
         0x1ce241e,0x149bc99,0x2b01497,0x00afc35 },
       { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417,
-        0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 },
-      0 },
+        0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } },
     /* 37 */
     { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d,
         0x2030504,0x378f7a1,0x169c65e,0x00b0b76 },
       { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521,
-        0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe },
-      0 },
+        0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } },
     /* 38 */
     { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1,
         0x00a050f,0x1eaa14b,0x3335470,0x02abd16 },
       { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511,
-        0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c },
-      0 },
+        0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } },
     /* 39 */
     { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d,
         0x1567722,0x0bfc906,0x0bada9e,0x03c3402 },
       { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6,
-        0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 },
-      0 },
+        0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } },
     /* 40 */
     { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7,
         0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 },
       { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751,
-        0x18ce73b,0x2a406e7,0x273e48c,0x01b00db },
-      0 },
+        0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } },
     /* 41 */
     { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1,
         0x13c64b4,0x2f89226,0x25896af,0x00a4bfd },
       { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba,
-        0x3b397fd,0x1c9d825,0x090311b,0x0191383 },
-      0 },
+        0x3b397fd,0x1c9d825,0x090311b,0x0191383 } },
     /* 42 */
     { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c,
         0x3dce877,0x269de4e,0x393cab7,0x03c96b9 },
       { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517,
-        0x3b5ea30,0x028f927,0x25313db,0x00e6e39 },
-      0 },
+        0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } },
     /* 43 */
     { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce,
         0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b },
       { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d,
-        0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 },
-      0 },
+        0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } },
     /* 44 */
     { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac,
         0x22b74b1,0x230137e,0x1062e36,0x021c652 },
       { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414,
-        0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 },
-      0 },
+        0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } },
     /* 45 */
     { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07,
         0x052a7b4,0x2422261,0x3adee38,0x039b529 },
       { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6,
-        0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb },
-      0 },
+        0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } },
     /* 46 */
     { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659,
         0x10bf410,0x034afec,0x3d71c83,0x0076971 },
       { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae,
-        0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 },
-      0 },
+        0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } },
     /* 47 */
     { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d,
         0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 },
       { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af,
-        0x194dc35,0x3f3058a,0x36d34e1,0x000a329 },
-      0 },
+        0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } },
     /* 48 */
     { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d,
         0x02c1175,0x3c11b40,0x0d86962,0x001305f },
       { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed,
-        0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 },
-      0 },
+        0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } },
     /* 49 */
     { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48,
         0x0405538,0x0710749,0x2005213,0x038c7e5 },
       { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5,
-        0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 },
-      0 },
+        0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } },
     /* 50 */
     { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6,
         0x1710fad,0x36bb924,0x1627e79,0x008e637 },
       { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3,
-        0x1c03773,0x18843fe,0x1b21c04,0x015c5fd },
-      0 },
+        0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } },
     /* 51 */
     { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3,
         0x13d2391,0x0a37be8,0x0560e3c,0x019828b },
       { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3,
-        0x32f4817,0x0914cca,0x14c4f52,0x01893b9 },
-      0 },
+        0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } },
     /* 52 */
     { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08,
         0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a },
       { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f,
-        0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e },
-      0 },
+        0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } },
     /* 53 */
     { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa,
         0x2999de5,0x11013bd,0x02370c2,0x00e2234 },
       { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db,
-        0x27edc86,0x08cd860,0x2471810,0x029798b },
-      0 },
+        0x27edc86,0x08cd860,0x2471810,0x029798b } },
     /* 54 */
     { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c,
         0x23edf79,0x16b7033,0x0e6466a,0x00b11da },
       { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21,
-        0x09d064b,0x14a8365,0x041f9e6,0x01ed799 },
-      0 },
+        0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } },
     /* 55 */
     { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3,
         0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc },
       { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c,
-        0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e },
-      0 },
+        0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } },
     /* 56 */
     { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7,
         0x0df6825,0x2d4cc40,0x301875f,0x012f8da },
       { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8,
-        0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 },
-      0 },
+        0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } },
     /* 57 */
     { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95,
         0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 },
       { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d,
-        0x3702760,0x3f06257,0x03a5eb8,0x011c29a },
-      0 },
+        0x3702760,0x3f06257,0x03a5eb8,0x011c29a } },
     /* 58 */
     { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20,
         0x2441ee0,0x31bbf36,0x290c63d,0x0059041 },
       { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632,
-        0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e },
-      0 },
+        0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } },
     /* 59 */
     { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5,
         0x2d5bb18,0x199ac4b,0x1e30771,0x020591a },
       { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144,
-        0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 },
-      0 },
+        0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } },
     /* 60 */
     { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44,
         0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 },
       { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435,
-        0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e },
-      0 },
+        0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } },
     /* 61 */
     { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45,
         0x3669531,0x296f42e,0x35a4c86,0x01ca049 },
       { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec,
-        0x30f11a7,0x141658a,0x27ece14,0x00b018b },
-      0 },
+        0x30f11a7,0x141658a,0x27ece14,0x00b018b } },
     /* 62 */
     { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811,
         0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d },
       { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047,
-        0x2caf383,0x0aaf664,0x113554d,0x031c735 },
-      0 },
+        0x2caf383,0x0aaf664,0x113554d,0x031c735 } },
     /* 63 */
     { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7,
         0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 },
       { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14,
-        0x06a2228,0x16370be,0x3a72129,0x02e7b2c },
-      0 },
+        0x06a2228,0x16370be,0x3a72129,0x02e7b2c } },
     /* 64 */
     { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0,
         0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf },
       { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f,
-        0x097273a,0x2b70718,0x00e5067,0x03025d1 },
-      0 },
+        0x097273a,0x2b70718,0x00e5067,0x03025d1 } },
     /* 65 */
     { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b,
         0x3144591,0x07cc080,0x2d5915f,0x03c6bcc },
       { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f,
-        0x13a4184,0x0722c18,0x130e2d4,0x008f43c },
-      0 },
+        0x13a4184,0x0722c18,0x130e2d4,0x008f43c } },
     /* 66 */
     { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654,
         0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 },
       { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d,
-        0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc },
-      0 },
+        0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } },
     /* 67 */
     { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5,
         0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 },
       { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2,
-        0x29864f6,0x1288073,0x254f6f7,0x00635b6 },
-      0 },
+        0x29864f6,0x1288073,0x254f6f7,0x00635b6 } },
     /* 68 */
     { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff,
         0x1409262,0x085a90c,0x0d97990,0x0142eed },
       { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d,
-        0x28099b0,0x1270d06,0x11801fe,0x00ac400 },
-      0 },
+        0x28099b0,0x1270d06,0x11801fe,0x00ac400 } },
     /* 69 */
     { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e,
         0x04f3aad,0x2b831c5,0x19983fb,0x0375562 },
       { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a,
-        0x3a9eaf4,0x1810669,0x151149d,0x039b931 },
-      0 },
+        0x3a9eaf4,0x1810669,0x151149d,0x039b931 } },
     /* 70 */
     { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839,
         0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 },
       { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de,
-        0x3d51f53,0x245df01,0x2414982,0x0388bd0 },
-      0 },
+        0x3d51f53,0x245df01,0x2414982,0x0388bd0 } },
     /* 71 */
     { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3,
         0x072c1fb,0x1232725,0x33d52dc,0x03dc24d },
       { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f,
-        0x26162a9,0x16c44e5,0x288e727,0x02f54b4 },
-      0 },
+        0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } },
     /* 72 */
     { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276,
         0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e },
       { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85,
-        0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e },
-      0 },
+        0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } },
     /* 73 */
     { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20,
         0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf },
       { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2,
-        0x39cb400,0x191e337,0x0a5ce9f,0x021529a },
-      0 },
+        0x39cb400,0x191e337,0x0a5ce9f,0x021529a } },
     /* 74 */
     { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353,
         0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a },
       { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf,
-        0x1d3de8a,0x3bea423,0x11235e4,0x039260b },
-      0 },
+        0x1d3de8a,0x3bea423,0x11235e4,0x039260b } },
     /* 75 */
     { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650,
         0x345afa1,0x01267ec,0x3f616b2,0x02011ad },
       { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31,
-        0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 },
-      0 },
+        0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } },
     /* 76 */
     { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e,
         0x2035653,0x384c74f,0x0bafab5,0x0025ec0 },
       { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661,
-        0x33ba11d,0x3327980,0x07bafdb,0x03e571d },
-      0 },
+        0x33ba11d,0x3327980,0x07bafdb,0x03e571d } },
     /* 77 */
     { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1,
         0x3705b1d,0x360deba,0x01e566e,0x00d4498 },
       { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879,
-        0x17eee27,0x2adad1d,0x1236068,0x003be5c },
-      0 },
+        0x17eee27,0x2adad1d,0x1236068,0x003be5c } },
     /* 78 */
     { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4,
         0x163f744,0x2f25522,0x1333b4f,0x03f05b6 },
       { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d,
-        0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 },
-      0 },
+        0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } },
     /* 79 */
     { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241,
         0x18bef7c,0x08c7762,0x063f59c,0x01015ec },
       { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6,
-        0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 },
-      0 },
+        0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } },
     /* 80 */
     { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759,
         0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c },
       { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f,
-        0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f },
-      0 },
+        0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } },
     /* 81 */
     { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3,
         0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 },
       { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3,
-        0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 },
-      0 },
+        0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } },
     /* 82 */
     { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606,
         0x21b1761,0x2147ee0,0x21fc433,0x015c84d },
       { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d,
-        0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 },
-      0 },
+        0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } },
     /* 83 */
     { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf,
         0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 },
       { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a,
-        0x027fd9f,0x08b509b,0x3da2f94,0x01748ee },
-      0 },
+        0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } },
     /* 84 */
     { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b,
         0x018403d,0x3a40279,0x1cb91ec,0x030427e },
       { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126,
-        0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd },
-      0 },
+        0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } },
     /* 85 */
     { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca,
         0x1ca665b,0x133051a,0x1525f1a,0x00a5647 },
       { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862,
-        0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 },
-      0 },
+        0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } },
     /* 86 */
     { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614,
         0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 },
       { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e,
-        0x23e0227,0x3052b0a,0x001add3,0x023ba18 },
-      0 },
+        0x23e0227,0x3052b0a,0x001add3,0x023ba18 } },
     /* 87 */
     { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9,
         0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 },
       { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451,
-        0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be },
-      0 },
+        0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } },
     /* 88 */
     { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316,
         0x3b27cb5,0x29bc976,0x35d4073,0x024772a },
       { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd,
-        0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 },
-      0 },
+        0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } },
     /* 89 */
     { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76,
         0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 },
       { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7,
-        0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 },
-      0 },
+        0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } },
     /* 90 */
     { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a,
         0x231741a,0x3cf2784,0x0889a0d,0x02b036d },
       { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed,
-        0x080e4f5,0x1d28b96,0x26194c9,0x0256992 },
-      0 },
+        0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } },
     /* 91 */
     { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb,
         0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 },
       { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88,
-        0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 },
-      0 },
+        0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } },
     /* 92 */
     { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2,
         0x086196d,0x299e46b,0x0802cf6,0x03c6f32 },
       { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9,
-        0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 },
-      0 },
+        0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } },
     /* 93 */
     { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f,
         0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 },
       { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea,
-        0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c },
-      0 },
+        0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } },
     /* 94 */
     { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418,
         0x34ae070,0x0b06686,0x310616b,0x03b7b89 },
       { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3,
-        0x0148f3a,0x35fd237,0x29b44eb,0x027f49f },
-      0 },
+        0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } },
     /* 95 */
     { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560,
         0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b },
       { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84,
-        0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 },
-      0 },
+        0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } },
     /* 96 */
     { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77,
         0x1581a0f,0x1f99276,0x10ba16d,0x026af88 },
       { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296,
-        0x1539871,0x112c31f,0x25787f3,0x01e2070 },
-      0 },
+        0x1539871,0x112c31f,0x25787f3,0x01e2070 } },
     /* 97 */
     { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a,
         0x3465a2d,0x225023e,0x319a30e,0x00579b8 },
       { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434,
-        0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e },
-      0 },
+        0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } },
     /* 98 */
     { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35,
         0x0158cab,0x195ddac,0x1766fe9,0x035cf42 },
       { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948,
-        0x0bb8595,0x2942d77,0x3748f15,0x0249428 },
-      0 },
+        0x0bb8595,0x2942d77,0x3748f15,0x0249428 } },
     /* 99 */
     { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9,
         0x363289e,0x2302fc7,0x082c1cc,0x01dd050 },
       { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50,
-        0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 },
-      0 },
+        0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } },
     /* 100 */
     { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10,
         0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a },
       { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692,
-        0x066e078,0x1954974,0x2ff3c6e,0x00def28 },
-      0 },
+        0x066e078,0x1954974,0x2ff3c6e,0x00def28 } },
     /* 101 */
     { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef,
         0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f },
       { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593,
-        0x0c0375e,0x3889b3e,0x24ab013,0x010de1b },
-      0 },
+        0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } },
     /* 102 */
     { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9,
         0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 },
       { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3,
-        0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 },
-      0 },
+        0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } },
     /* 103 */
     { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355,
         0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 },
       { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3,
-        0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 },
-      0 },
+        0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } },
     /* 104 */
     { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b,
         0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb },
       { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224,
-        0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 },
-      0 },
+        0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } },
     /* 105 */
     { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027,
         0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 },
       { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f,
-        0x231aa7e,0x3884395,0x0543da3,0x003d5a9 },
-      0 },
+        0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } },
     /* 106 */
     { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e,
         0x256ec0d,0x3898c69,0x3411969,0x02f63c5 },
       { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6,
-        0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 },
-      0 },
+        0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } },
     /* 107 */
     { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c,
         0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 },
       { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287,
-        0x3c5584b,0x26bc869,0x20af87a,0x0060f5d },
-      0 },
+        0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } },
     /* 108 */
     { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e,
         0x1b49634,0x35d508a,0x39dc269,0x0075105 },
       { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6,
-        0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 },
-      0 },
+        0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } },
     /* 109 */
     { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09,
         0x00f6f2f,0x1c63b3d,0x2310362,0x019109e },
       { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b,
-        0x1b2c6df,0x035b480,0x3496ae9,0x012766d },
-      0 },
+        0x1b2c6df,0x035b480,0x3496ae9,0x012766d } },
     /* 110 */
     { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290,
         0x02e2a02,0x151140b,0x01b3f60,0x0240df6 },
       { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4,
-        0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 },
-      0 },
+        0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } },
     /* 111 */
     { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4,
         0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 },
       { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848,
-        0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 },
-      0 },
+        0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } },
     /* 112 */
     { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249,
         0x07861cf,0x023fd05,0x1b0fdb8,0x031712f },
       { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a,
-        0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 },
-      0 },
+        0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } },
     /* 113 */
     { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378,
         0x3ea3c40,0x2e90beb,0x19de503,0x003d5af },
       { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00,
-        0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee },
-      0 },
+        0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } },
     /* 114 */
     { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8,
         0x17208dd,0x0be790a,0x122a07f,0x014dd95 },
       { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5,
-        0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b },
-      0 },
+        0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } },
     /* 115 */
     { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e,
         0x2c000ea,0x099d547,0x2f17a1a,0x01df520 },
       { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791,
-        0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 },
-      0 },
+        0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } },
     /* 116 */
     { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4,
         0x38ce9e6,0x0251986,0x172fbea,0x0337c11 },
       { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03,
-        0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f },
-      0 },
+        0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } },
     /* 117 */
     { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61,
         0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 },
       { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d,
-        0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 },
-      0 },
+        0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } },
     /* 118 */
     { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35,
         0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 },
       { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0,
-        0x23c137f,0x1331475,0x092c067,0x0013b40 },
-      0 },
+        0x23c137f,0x1331475,0x092c067,0x0013b40 } },
     /* 119 */
     { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05,
         0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 },
       { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d,
-        0x12c8a15,0x031063c,0x1889ed2,0x002133e },
-      0 },
+        0x12c8a15,0x031063c,0x1889ed2,0x002133e } },
     /* 120 */
     { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e,
         0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 },
       { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082,
-        0x112089d,0x107c753,0x24202d1,0x023853a },
-      0 },
+        0x112089d,0x107c753,0x24202d1,0x023853a } },
     /* 121 */
     { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8,
         0x19c194c,0x3456323,0x2372aa4,0x0165f86 },
       { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1,
-        0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 },
-      0 },
+        0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } },
     /* 122 */
     { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78,
         0x363ff14,0x01f928c,0x17e309c,0x02f79ff },
       { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6,
-        0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb },
-      0 },
+        0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } },
     /* 123 */
     { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5,
         0x220fd0d,0x04defe0,0x24658ec,0x035aa8b },
       { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb,
-        0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 },
-      0 },
+        0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } },
     /* 124 */
     { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a,
         0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 },
       { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f,
-        0x2cdf937,0x1a96574,0x0425220,0x0221a99 },
-      0 },
+        0x2cdf937,0x1a96574,0x0425220,0x0221a99 } },
     /* 125 */
     { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867,
         0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d },
       { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258,
-        0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc },
-      0 },
+        0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } },
     /* 126 */
     { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6,
         0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b },
       { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62,
-        0x335adf3,0x27220db,0x2f81642,0x0173ffe },
-      0 },
+        0x335adf3,0x27220db,0x2f81642,0x0173ffe } },
     /* 127 */
     { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78,
         0x0a908f6,0x265300e,0x3237dc1,0x01b969a },
       { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de,
-        0x11c0c67,0x2af3396,0x38d242d,0x0120688 },
-      0 },
+        0x11c0c67,0x2af3396,0x38d242d,0x0120688 } },
     /* 128 */
     { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4,
         0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 },
       { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2,
-        0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de },
-      0 },
+        0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } },
     /* 129 */
     { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007,
         0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 },
       { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04,
-        0x01cebaa,0x0be1595,0x175cc12,0x033a39a },
-      0 },
+        0x01cebaa,0x0be1595,0x175cc12,0x033a39a } },
     /* 130 */
     { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd,
         0x044f1d6,0x2d2a038,0x365250b,0x0023f78 },
       { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520,
-        0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b },
-      0 },
+        0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } },
     /* 131 */
     { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c,
         0x334e9d1,0x00ab953,0x12e9077,0x03196fa },
       { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef,
-        0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 },
-      0 },
+        0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } },
     /* 132 */
     { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa,
         0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 },
       { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0,
-        0x170e261,0x180a27b,0x2fd58ec,0x014e22b },
-      0 },
+        0x170e261,0x180a27b,0x2fd58ec,0x014e22b } },
     /* 133 */
     { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98,
         0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 },
       { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330,
-        0x29f51f8,0x0338838,0x24060f0,0x029a62e },
-      0 },
+        0x29f51f8,0x0338838,0x24060f0,0x029a62e } },
     /* 134 */
     { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf,
         0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb },
       { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677,
-        0x160d1bd,0x141d5af,0x2965851,0x034625a },
-      0 },
+        0x160d1bd,0x141d5af,0x2965851,0x034625a } },
     /* 135 */
     { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb,
         0x016b73a,0x368e462,0x20b7981,0x0099ec3 },
       { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6,
-        0x10faf03,0x24e4d18,0x07aa111,0x02d538a },
-      0 },
+        0x10faf03,0x24e4d18,0x07aa111,0x02d538a } },
     /* 136 */
     { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd,
         0x0b31ab1,0x3539814,0x28b5f87,0x0212aec },
       { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884,
-        0x355c1b6,0x15901d7,0x3671765,0x03950db },
-      0 },
+        0x355c1b6,0x15901d7,0x3671765,0x03950db } },
     /* 137 */
     { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8,
         0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d },
       { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8,
-        0x1769600,0x31c055d,0x3df0ac1,0x00e9098 },
-      0 },
+        0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } },
     /* 138 */
     { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58,
         0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f },
       { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f,
-        0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 },
-      0 },
+        0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } },
     /* 139 */
     { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de,
         0x3a57702,0x1677348,0x2123aad,0x010d8f8 },
       { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5,
-        0x3086045,0x26313e6,0x15cd8bb,0x0210384 },
-      0 },
+        0x3086045,0x26313e6,0x15cd8bb,0x0210384 } },
     /* 140 */
     { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777,
         0x12a2721,0x35b538a,0x2bd30de,0x017835a },
       { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c,
-        0x27bef91,0x289689a,0x0f42945,0x01f7a92 },
-      0 },
+        0x27bef91,0x289689a,0x0f42945,0x01f7a92 } },
     /* 141 */
     { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2,
         0x136c540,0x05b473f,0x2beebfd,0x02af0a8 },
       { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6,
-        0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 },
-      0 },
+        0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } },
     /* 142 */
     { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770,
         0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 },
       { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef,
-        0x230c724,0x1919146,0x10a465e,0x02084a8 },
-      0 },
+        0x230c724,0x1919146,0x10a465e,0x02084a8 } },
     /* 143 */
     { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2,
         0x1dff677,0x2c59334,0x371599c,0x02a9f2a },
       { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692,
-        0x221292f,0x05f0c5d,0x073fe06,0x01038e0 },
-      0 },
+        0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } },
     /* 144 */
     { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738,
         0x3e53209,0x1549afe,0x300b305,0x038d811 },
       { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b,
-        0x1ed4346,0x3857f73,0x277a3de,0x01950b5 },
-      0 },
+        0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } },
     /* 145 */
     { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd,
         0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 },
       { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4,
-        0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 },
-      0 },
+        0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } },
     /* 146 */
     { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1,
         0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea },
       { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3,
-        0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 },
-      0 },
+        0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } },
     /* 147 */
     { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc,
         0x0431737,0x04ba4b7,0x2028470,0x012e469 },
       { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3,
-        0x34360ac,0x072ad23,0x0537232,0x01254d3 },
-      0 },
+        0x34360ac,0x072ad23,0x0537232,0x01254d3 } },
     /* 148 */
     { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8,
         0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b },
       { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac,
-        0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 },
-      0 },
+        0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } },
     /* 149 */
     { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828,
         0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 },
       { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b,
-        0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 },
-      0 },
+        0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } },
     /* 150 */
     { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c,
         0x3a0311b,0x331a789,0x0b9729e,0x0201ebf },
       { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e,
-        0x3afa594,0x3363217,0x0d16568,0x01d46b7 },
-      0 },
+        0x3afa594,0x3363217,0x0d16568,0x01d46b7 } },
     /* 151 */
     { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b,
         0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 },
       { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c,
-        0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc },
-      0 },
+        0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } },
     /* 152 */
     { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4,
         0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 },
       { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a,
-        0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c },
-      0 },
+        0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } },
     /* 153 */
     { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7,
         0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f },
       { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d,
-        0x3dbf03c,0x167d857,0x02e0b44,0x013afab },
-      0 },
+        0x3dbf03c,0x167d857,0x02e0b44,0x013afab } },
     /* 154 */
     { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176,
         0x087633f,0x2bff322,0x07b2c1b,0x037e662 },
       { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b,
-        0x0dd511b,0x101625e,0x0a7779f,0x009ec10 },
-      0 },
+        0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } },
     /* 155 */
     { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f,
         0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 },
       { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408,
-        0x36f1be0,0x0d065b0,0x012d046,0x0025623 },
-      0 },
+        0x36f1be0,0x0d065b0,0x012d046,0x0025623 } },
     /* 156 */
     { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca,
         0x1cf2808,0x1922e55,0x1a9db9f,0x020721e },
       { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e,
-        0x388a419,0x06f1113,0x0f55fc1,0x03e8352 },
-      0 },
+        0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } },
     /* 157 */
     { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5,
         0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 },
       { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29,
-        0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae },
-      0 },
+        0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } },
     /* 158 */
     { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4,
         0x252dd64,0x095b8b6,0x0872b74,0x022f1da },
       { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee,
-        0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc },
-      0 },
+        0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } },
     /* 159 */
     { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230,
         0x02b4eae,0x2713668,0x0f71b06,0x0132d18 },
       { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859,
-        0x075f66b,0x1488f85,0x10895ef,0x014b035 },
-      0 },
+        0x075f66b,0x1488f85,0x10895ef,0x014b035 } },
     /* 160 */
     { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c,
         0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 },
       { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf,
-        0x20b7651,0x017d077,0x0c7e397,0x00e0365 },
-      0 },
+        0x20b7651,0x017d077,0x0c7e397,0x00e0365 } },
     /* 161 */
     { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21,
         0x3a29776,0x0debdac,0x171de26,0x00b38f8 },
       { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b,
-        0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 },
-      0 },
+        0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } },
     /* 162 */
     { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2,
         0x17626e7,0x275502f,0x14742c6,0x03285dd },
       { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a,
-        0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 },
-      0 },
+        0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } },
     /* 163 */
     { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0,
         0x08cfa73,0x12666aa,0x3170a1f,0x021c829 },
       { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0,
-        0x28d00ab,0x224f23a,0x329c769,0x025c67b },
-      0 },
+        0x28d00ab,0x224f23a,0x329c769,0x025c67b } },
     /* 164 */
     { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409,
         0x3999d06,0x013075f,0x176e157,0x02941ca },
       { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97,
-        0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 },
-      0 },
+        0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } },
     /* 165 */
     { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b,
         0x272e897,0x20b91ad,0x2aa202c,0x02a2201 },
       { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5,
-        0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef },
-      0 },
+        0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } },
     /* 166 */
     { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0,
         0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb },
       { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2,
-        0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af },
-      0 },
+        0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } },
     /* 167 */
     { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61,
         0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f },
       { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73,
-        0x31dd489,0x02c600f,0x29d9f81,0x01ba201 },
-      0 },
+        0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } },
     /* 168 */
     { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61,
         0x1b02218,0x2b44e14,0x3eadefa,0x029c88a },
       { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e,
-        0x11eca56,0x31a4247,0x123bc2a,0x02fa535 },
-      0 },
+        0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } },
     /* 169 */
     { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f,
         0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 },
       { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736,
-        0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 },
-      0 },
+        0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } },
     /* 170 */
     { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5,
         0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 },
       { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f,
-        0x25475dc,0x2ef4151,0x3c46860,0x0238934 },
-      0 },
+        0x25475dc,0x2ef4151,0x3c46860,0x0238934 } },
     /* 171 */
     { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1,
         0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 },
       { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8,
-        0x1d14b44,0x0d74716,0x049f57d,0x030024b },
-      0 },
+        0x1d14b44,0x0d74716,0x049f57d,0x030024b } },
     /* 172 */
     { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d,
         0x2d30d19,0x2513718,0x2347286,0x01d7901 },
       { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d,
-        0x051a162,0x2161a47,0x258c784,0x016df2d },
-      0 },
+        0x051a162,0x2161a47,0x258c784,0x016df2d } },
     /* 173 */
     { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce,
         0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f },
       { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c,
-        0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 },
-      0 },
+        0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } },
     /* 174 */
     { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff,
         0x15e9a36,0x2121129,0x0199ef2,0x017088a },
       { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf,
-        0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d },
-      0 },
+        0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } },
     /* 175 */
     { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69,
         0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 },
       { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa,
-        0x3861405,0x1b49f62,0x0c945ca,0x02ab25f },
-      0 },
+        0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } },
     /* 176 */
     { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01,
         0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 },
       { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81,
-        0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e },
-      0 },
+        0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } },
     /* 177 */
     { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982,
         0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda },
       { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca,
-        0x336abf6,0x00697fc,0x105ce76,0x0253742 },
-      0 },
+        0x336abf6,0x00697fc,0x105ce76,0x0253742 } },
     /* 178 */
     { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4,
         0x004df9c,0x0aba409,0x067d25c,0x02481de },
       { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63,
-        0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 },
-      0 },
+        0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } },
     /* 179 */
     { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43,
         0x37db3fe,0x292c62b,0x362dbbf,0x006e52a },
       { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0,
-        0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 },
-      0 },
+        0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } },
     /* 180 */
     { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7,
         0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb },
       { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c,
-        0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 },
-      0 },
+        0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } },
     /* 181 */
     { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd,
         0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 },
       { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac,
-        0x1b5feb5,0x344eb84,0x0de2732,0x0347208 },
-      0 },
+        0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } },
     /* 182 */
     { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4,
         0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e },
       { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52,
-        0x0de4af7,0x3319126,0x15e6412,0x00e65ff },
-      0 },
+        0x0de4af7,0x3319126,0x15e6412,0x00e65ff } },
     /* 183 */
     { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578,
         0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 },
       { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1,
-        0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 },
-      0 },
+        0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } },
     /* 184 */
     { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3,
         0x18429ed,0x025fa01,0x11d6e93,0x03b016b },
       { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39,
-        0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 },
-      0 },
+        0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } },
     /* 185 */
     { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5,
         0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 },
       { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e,
-        0x147f346,0x01947e2,0x3017228,0x0365942 },
-      0 },
+        0x147f346,0x01947e2,0x3017228,0x0365942 } },
     /* 186 */
     { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f,
         0x3532081,0x2de8a98,0x19a804e,0x005359a },
       { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb,
-        0x1eef901,0x1662ad3,0x0410d43,0x01bd31a },
-      0 },
+        0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } },
     /* 187 */
     { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2,
         0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 },
       { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0,
-        0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 },
-      0 },
+        0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } },
     /* 188 */
     { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc,
         0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 },
       { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca,
-        0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 },
-      0 },
+        0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } },
     /* 189 */
     { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7,
         0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 },
       { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6,
-        0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a },
-      0 },
+        0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } },
     /* 190 */
     { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba,
         0x3c76989,0x0cf2d24,0x032f67b,0x01e005f },
       { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693,
-        0x37bf552,0x07013e2,0x054de5c,0x014f341 },
-      0 },
+        0x37bf552,0x07013e2,0x054de5c,0x014f341 } },
     /* 191 */
     { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea,
         0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 },
       { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c,
-        0x15a7702,0x2464d06,0x2a47ae6,0x006db90 },
-      0 },
+        0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } },
     /* 192 */
     { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358,
         0x35d8879,0x2f4f55a,0x214187f,0x0008936 },
       { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b,
-        0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 },
-      0 },
+        0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } },
     /* 193 */
     { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40,
         0x097c7a9,0x0590d7d,0x2022d33,0x018dbac },
       { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407,
-        0x3658a29,0x22eb3d0,0x2b63921,0x022113b },
-      0 },
+        0x3658a29,0x22eb3d0,0x2b63921,0x022113b } },
     /* 194 */
     { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6,
         0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 },
       { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92,
-        0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b },
-      0 },
+        0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } },
     /* 195 */
     { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245,
         0x24f2e71,0x322cb27,0x17675e7,0x02e643f },
       { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4,
-        0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f },
-      0 },
+        0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } },
     /* 196 */
     { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2,
         0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 },
       { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829,
-        0x0825275,0x006e85f,0x35e94b0,0x016af58 },
-      0 },
+        0x0825275,0x006e85f,0x35e94b0,0x016af58 } },
     /* 197 */
     { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837,
         0x3cd5ea4,0x2d84292,0x321846a,0x016656f },
       { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9,
-        0x3baf447,0x1047a6c,0x0a2d483,0x01130cb },
-      0 },
+        0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } },
     /* 198 */
     { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6,
         0x32124b7,0x295314f,0x2fb8082,0x007c858 },
       { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977,
-        0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 },
-      0 },
+        0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } },
     /* 199 */
     { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315,
         0x1cfe37b,0x36fe715,0x343926e,0x02c6603 },
       { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18,
-        0x08ead48,0x23915bc,0x19b3744,0x00a0a2b },
-      0 },
+        0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } },
     /* 200 */
     { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c,
         0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc },
       { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4,
-        0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 },
-      0 },
+        0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } },
     /* 201 */
     { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2,
         0x1a65e01,0x379300d,0x35920b9,0x012c8fb },
       { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba,
-        0x127863a,0x26939cd,0x134ddf4,0x01375c5 },
-      0 },
+        0x127863a,0x26939cd,0x134ddf4,0x01375c5 } },
     /* 202 */
     { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8,
         0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c },
       { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260,
-        0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 },
-      0 },
+        0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } },
     /* 203 */
     { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11,
         0x2142408,0x3694366,0x25136ab,0x03b5f1d },
       { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b,
-        0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d },
-      0 },
+        0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } },
     /* 204 */
     { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e,
         0x2479dea,0x3602d24,0x17e87e0,0x0060069 },
       { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef,
-        0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 },
-      0 },
+        0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } },
     /* 205 */
     { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c,
         0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 },
       { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a,
-        0x25244e6,0x23bdd79,0x1aee117,0x00c01ae },
-      0 },
+        0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } },
     /* 206 */
     { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e,
         0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 },
       { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010,
-        0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 },
-      0 },
+        0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } },
     /* 207 */
     { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971,
         0x1043014,0x2ebb2be,0x1c1097d,0x027d667 },
       { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8,
-        0x1605a41,0x12de109,0x0e08a29,0x01f554a },
-      0 },
+        0x1605a41,0x12de109,0x0e08a29,0x01f554a } },
     /* 208 */
     { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20,
         0x27d79b0,0x05cc3ef,0x201303f,0x00a333a },
       { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e,
-        0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb },
-      0 },
+        0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } },
     /* 209 */
     { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192,
         0x13a1719,0x165af51,0x09db7a9,0x0277be0 },
       { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862,
-        0x106a6d6,0x23395f5,0x0b06880,0x000d5fe },
-      0 },
+        0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } },
     /* 210 */
     { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832,
         0x309874c,0x2621d28,0x2dd1b48,0x0392806 },
       { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1,
-        0x295c717,0x330c29c,0x179ce10,0x0119b5f },
-      0 },
+        0x295c717,0x330c29c,0x179ce10,0x0119b5f } },
     /* 211 */
     { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678,
         0x24fd288,0x2b46a43,0x122451e,0x0319ca9 },
       { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9,
-        0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 },
-      0 },
+        0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } },
     /* 212 */
     { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d,
         0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e },
       { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c,
-        0x2c37383,0x32a4631,0x022ad82,0x00d22b9 },
-      0 },
+        0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } },
     /* 213 */
     { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8,
         0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 },
       { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2,
-        0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e },
-      0 },
+        0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } },
     /* 214 */
     { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c,
         0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d },
       { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8,
-        0x3477600,0x3201523,0x32ecafc,0x03d3506 },
-      0 },
+        0x3477600,0x3201523,0x32ecafc,0x03d3506 } },
     /* 215 */
     { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba,
         0x0e29741,0x03c41ab,0x0eae016,0x0060209 },
       { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1,
-        0x380e345,0x1b49608,0x23983cf,0x019f97d },
-      0 },
+        0x380e345,0x1b49608,0x23983cf,0x019f97d } },
     /* 216 */
     { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2,
         0x2f9276b,0x07f1951,0x09a04c1,0x027fbde },
       { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb,
-        0x2b3fb06,0x1388329,0x181a99c,0x02f2030 },
-      0 },
+        0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } },
     /* 217 */
     { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697,
         0x0e21715,0x37c431e,0x2629f8c,0x01249c3 },
       { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e,
-        0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 },
-      0 },
+        0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } },
     /* 218 */
     { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949,
         0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e },
       { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b,
-        0x26f7bf5,0x11ce979,0x0b408df,0x0366850 },
-      0 },
+        0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } },
     /* 219 */
     { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7,
         0x2b83913,0x12c6b81,0x18d623f,0x01d8301 },
       { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a,
-        0x204110f,0x0102538,0x02d8f19,0x01c5066 },
-      0 },
+        0x204110f,0x0102538,0x02d8f19,0x01c5066 } },
     /* 220 */
     { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757,
         0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 },
       { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26,
-        0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 },
-      0 },
+        0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } },
     /* 221 */
     { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989,
         0x1c807dc,0x3c149a4,0x1141543,0x02906bb },
       { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd,
-        0x1660be8,0x183be17,0x1dd86fb,0x035ba70 },
-      0 },
+        0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } },
     /* 222 */
     { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f,
         0x112bb7b,0x3e3086e,0x210ae9b,0x027271b },
       { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c,
-        0x344b23a,0x186da27,0x19640fa,0x0159af5 },
-      0 },
+        0x344b23a,0x186da27,0x19640fa,0x0159af5 } },
     /* 223 */
     { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91,
         0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c },
       { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95,
-        0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 },
-      0 },
+        0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } },
     /* 224 */
     { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3,
         0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b },
       { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f,
-        0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 },
-      0 },
+        0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } },
     /* 225 */
     { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757,
         0x13e7374,0x3b47855,0x0b86e56,0x02f354c },
       { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed,
-        0x3a97611,0x0d07736,0x3cf6979,0x02424a0 },
-      0 },
+        0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } },
     /* 226 */
     { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675,
         0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad },
       { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0,
-        0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb },
-      0 },
+        0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } },
     /* 227 */
     { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2,
         0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 },
       { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7,
-        0x2197414,0x022a50b,0x0fd94ba,0x0007b0f },
-      0 },
+        0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } },
     /* 228 */
     { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362,
         0x21d60f7,0x193d93d,0x1c04754,0x0346b2c },
       { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb,
-        0x38a8133,0x09b0937,0x39d4056,0x01f7341 },
-      0 },
+        0x38a8133,0x09b0937,0x39d4056,0x01f7341 } },
     /* 229 */
     { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456,
         0x0f79718,0x066e346,0x189377d,0x002fd4d },
       { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8,
-        0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 },
-      0 },
+        0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } },
     /* 230 */
     { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf,
         0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 },
       { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e,
-        0x067082a,0x2ec05fd,0x368b303,0x03cad4b },
-      0 },
+        0x067082a,0x2ec05fd,0x368b303,0x03cad4b } },
     /* 231 */
     { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66,
         0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa },
       { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14,
-        0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 },
-      0 },
+        0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } },
     /* 232 */
     { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901,
         0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 },
       { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2,
-        0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 },
-      0 },
+        0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } },
     /* 233 */
     { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab,
         0x0505db0,0x08f6454,0x05c6285,0x014e880 },
       { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b,
-        0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 },
-      0 },
+        0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } },
     /* 234 */
     { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f,
         0x0005ae8,0x286d851,0x07f39c9,0x000070b },
       { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429,
-        0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e },
-      0 },
+        0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } },
     /* 235 */
     { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef,
         0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 },
       { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0,
-        0x1591a60,0x0768d45,0x257873e,0x00f36e0 },
-      0 },
+        0x1591a60,0x0768d45,0x257873e,0x00f36e0 } },
     /* 236 */
     { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437,
         0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 },
       { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0,
-        0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd },
-      0 },
+        0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } },
     /* 237 */
     { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f,
         0x380c46c,0x3358810,0x27df6c5,0x01ca466 },
       { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615,
-        0x2c0e777,0x0226b8c,0x3803624,0x0179e45 },
-      0 },
+        0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } },
     /* 238 */
     { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2,
         0x226bea5,0x0beaff3,0x371e24c,0x0138294 },
       { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c,
-        0x3370ae4,0x013bae3,0x018d566,0x03d7fde },
-      0 },
+        0x3370ae4,0x013bae3,0x018d566,0x03d7fde } },
     /* 239 */
     { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405,
         0x16bd0a2,0x1799082,0x0e9533f,0x039077c },
       { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1,
-        0x26e1927,0x0557c21,0x16eac4f,0x023b75f },
-      0 },
+        0x26e1927,0x0557c21,0x16eac4f,0x023b75f } },
     /* 240 */
     { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b,
         0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 },
       { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6,
-        0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 },
-      0 },
+        0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } },
     /* 241 */
     { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2,
         0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 },
       { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca,
-        0x163605d,0x2b85807,0x17c1c54,0x03cc725 },
-      0 },
+        0x163605d,0x2b85807,0x17c1c54,0x03cc725 } },
     /* 242 */
     { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052,
         0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 },
       { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b,
-        0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf },
-      0 },
+        0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } },
     /* 243 */
     { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3,
         0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f },
       { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc,
-        0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c },
-      0 },
+        0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } },
     /* 244 */
     { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6,
         0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea },
       { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1,
-        0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c },
-      0 },
+        0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } },
     /* 245 */
     { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756,
         0x050418d,0x3c73db6,0x1bb0c30,0x01673eb },
       { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219,
-        0x2ec58c9,0x12d9fab,0x362354a,0x016eafc },
-      0 },
+        0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } },
     /* 246 */
     { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016,
         0x2970f1b,0x323c1c0,0x2a79026,0x0186231 },
       { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d,
-        0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 },
-      0 },
+        0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } },
     /* 247 */
     { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440,
         0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 },
       { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af,
-        0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 },
-      0 },
+        0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } },
     /* 248 */
     { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a,
         0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc },
       { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd,
-        0x136c498,0x1e0d845,0x347ff25,0x01a1de7 },
-      0 },
+        0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } },
     /* 249 */
     { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4,
         0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c },
       { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c,
-        0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf },
-      0 },
+        0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } },
     /* 250 */
     { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af,
         0x10cd913,0x28daa6f,0x3fcb732,0x000a04a },
       { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586,
-        0x1683982,0x25abe87,0x0c9fe53,0x001ce47 },
-      0 },
+        0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } },
     /* 251 */
     { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab,
         0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a },
       { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a,
-        0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac },
-      0 },
+        0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } },
     /* 252 */
     { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07,
         0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a },
       { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383,
-        0x313ed56,0x134c9cc,0x168d5bb,0x033310a },
-      0 },
+        0x313ed56,0x134c9cc,0x168d5bb,0x033310a } },
     /* 253 */
     { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2,
         0x0076406,0x1c73f70,0x342aa48,0x03851fa },
       { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a,
-        0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 },
-      0 },
+        0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } },
     /* 254 */
     { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf,
         0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 },
       { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec,
-        0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 },
-      0 },
+        0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } },
     /* 255 */
     { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b,
         0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c },
       { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627,
-        0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 },
-      0 },
+        0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } },
 };
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -11098,7 +16098,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table,
@@ -11108,7 +16108,7 @@
 #endif
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * r     Resulting point.
@@ -11118,23 +16118,22 @@
  */
 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[10];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
+
+    err = sp_256_point_new_10(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     k = kd;
@@ -11142,26 +16141,24 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 10, km);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_10(point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_10(point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_10(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
+    }
+#endif
+    sp_256_point_free_10(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
 /* Returns 1 if the number of zero.
  * Implementation is constant time.
  *
@@ -11174,7 +16171,7 @@
             a[8] | a[9]) == 0;
 }
 
-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 /* Add 1 to a. (a = a + 1)
  *
  * r  A single precision integer.
@@ -11186,33 +16183,38 @@
     sp_256_norm_10(a);
 }
 
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 18) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 18U) {
             r[j] &= 0x3ffffff;
-            s = 26 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+            s = 26U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Generates a scalar that is in the range 1..order-1.
@@ -11230,7 +16232,7 @@
     do {
         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
         if (err == 0) {
-            sp_256_from_bin(k, 10, buf, sizeof(buf));
+            sp_256_from_bin(k, 10, buf, (int)sizeof(buf));
             if (sp_256_cmp_10(k, p256_order2) < 0) {
                 sp_256_add_one_10(k);
                 break;
@@ -11253,87 +16255,80 @@
  */
 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[10];
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point inf;
-#endif
-#endif
-    sp_point* point;
+    sp_point_256 inf;
+#endif
+#endif
+    sp_point_256* point;
     sp_digit* k = NULL;
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point* infinity;
+    sp_point_256* infinity;
 #endif
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
+    err = sp_256_point_new_10(heap, p, point);
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, inf, infinity);
-#endif
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     k = kd;
 #endif
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_ecc_gen_k_10(rng, k);
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, NULL);
-        else
-#endif
+    }
+    if (err == MP_OKAY) {
             err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
     }
 
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            err = sp_256_ecc_mulmod_avx2_10(infinity, point, p256_order, 1,
-                                                                          NULL);
-        }
-        else
-#endif
             err = sp_256_ecc_mulmod_10(infinity, point, p256_order, 1, NULL);
     }
     if (err == MP_OKAY) {
-        if (!sp_256_iszero_10(point->x) || !sp_256_iszero_10(point->y))
+        if ((sp_256_iszero_10(point->x) == 0) || (sp_256_iszero_10(point->y) == 0)) {
             err = ECC_INF_E;
-    }
-#endif
-
-    if (err == MP_OKAY)
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(k, priv);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_10(point, pub);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_ecc_point_free(infinity, 1, heap);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    sp_256_point_free_10(infinity, 1, heap);
+#endif
+    sp_256_point_free_10(point, 1, heap);
 
     return err;
 }
 
 #ifdef HAVE_ECC_DHE
-/* Write r as big endian to byte aray.
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
  *
  * r  A single precision integer.
@@ -11351,19 +16346,26 @@
     a[j] = 0;
     for (i=0; i<10 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 26) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
         }
         s = 8 - (b - 26);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -11382,25 +16384,25 @@
 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
                           word32* outLen, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[10];
 #endif
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 32)
+
+    if (*outLen < 32U) {
         err = BUFFER_E;
-
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -11411,11 +16413,6 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 10, priv);
         sp_256_point_from_ecc_point_10(point, pub);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_10(point, point, k, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_10(point, point, k, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -11423,19 +16420,18 @@
         *outLen = 32;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_10(point, 0, heap);
 
     return err;
 }
 #endif /* HAVE_ECC_DHE */
 
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-#ifdef HAVE_INTEL_AVX2
-#endif /* HAVE_INTEL_AVX2 */
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* Multiply a by scalar b into r. (r = a * b)
@@ -11445,7 +16441,7 @@
  * b  A scalar.
  */
 SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int64_t tb = b;
@@ -11486,22 +16482,78 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 26 bits from d1 and top 5 bits from d0. */
+    d = (d1 << 5) | (d0 >> 21);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 6 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 16) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 11 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 11) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 16 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 6) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 21 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 1) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 26 bits in r */
+    /* Remaining 1 bits from d0. */
+    r <<= 1;
+    d <<= 1;
+    d |= d0 & ((1 << 1) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_256_div_10(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_32
     int64_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[20], t2d[10 + 1];
@@ -11510,62 +16562,70 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
         t2 = td + 2 * 10;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[9];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 10);
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[9];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U);
         for (i=9; i>=0; i--) {
             t1[10 + i] += t1[10 + i - 1] >> 26;
             t1[10 + i - 1] &= 0x3ffffff;
+#ifndef WOLFSSL_SP_DIV_32
             d1 = t1[10 + i];
             d1 <<= 26;
             d1 += t1[10 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv);
+#endif
 
             sp_256_mul_d_10(t2, d, r1);
-            sp_256_sub_10(&t1[i], &t1[i], t2);
+            (void)sp_256_sub_10(&t1[i], &t1[i], t2);
             t1[10 + i] -= t2[10];
             t1[10 + i] += t1[10 + i - 1] >> 26;
             t1[10 + i - 1] &= 0x3ffffff;
-            r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / div;
+            r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv;
             r1++;
             sp_256_mul_d_10(t2, d, r1);
-            sp_256_add_10(&t1[i], &t1[i], t2);
+            (void)sp_256_add_10(&t1[i], &t1[i], t2);
             t1[10 + i] += t1[10 + i - 1] >> 26;
             t1[10 + i - 1] &= 0x3ffffff;
         }
         t1[10 - 1] += t1[10 - 2] >> 26;
         t1[10 - 2] &= 0x3ffffff;
-        d1 = t1[10 - 1];
-        r1 = (sp_digit)(d1 / div);
+        r1 = t1[10 - 1] / dv;
 
         sp_256_mul_d_10(t2, d, r1);
-        sp_256_sub_10(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 10);
+        (void)sp_256_sub_10(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 10U);
         for (i=0; i<8; i++) {
             r[i+1] += r[i] >> 26;
             r[i] &= 0x3ffffff;
         }
-        sp_256_cond_add_10(r, r, d, 0 - (r[9] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+        sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -11578,7 +16638,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_256_mod_10(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_256_div_10(a, m, NULL, r);
 }
@@ -11587,14 +16647,14 @@
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 #ifdef WOLFSSL_SP_SMALL
 /* Order-2 for the P256 curve. */
-static const uint32_t p256_order_2[8] = {
-    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
-    0x00000000,0xffffffff
+static const uint32_t p256_order_minus_2[8] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+    0x00000000U,0xffffffffU
 };
 #else
 /* The low half of the order-2 of the P256 curve. */
 static const uint32_t p256_order_low[4] = {
-    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
 };
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -11604,10 +16664,10 @@
  * a  First operand of the multiplication.
  * b  Second operand of the multiplication.
  */
-static void sp_256_mont_mul_order_10(sp_digit* r, sp_digit* a, sp_digit* b)
+static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
     sp_256_mul_10(r, a, b);
-    sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
 }
 
 /* Square number mod the order of P256 curve. (r = a * a mod order)
@@ -11615,10 +16675,10 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_order_10(sp_digit* r, sp_digit* a)
+static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a)
 {
     sp_256_sqr_10(r, a);
-    sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
 }
 
 #ifndef WOLFSSL_SP_SMALL
@@ -11628,13 +16688,14 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_n_order_10(sp_digit* r, sp_digit* a, int n)
+static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n)
 {
     int i;
 
     sp_256_mont_sqr_order_10(r, a);
-    for (i=1; i<n; i++)
+    for (i=1; i<n; i++) {
         sp_256_mont_sqr_order_10(r, r);
+    }
 }
 #endif /* !WOLFSSL_SP_SMALL */
 
@@ -11645,7 +16706,7 @@
  * a   Number to invert.
  * td  Temporary data.
  */
-static void sp_256_mont_inv_order_10(sp_digit* r, sp_digit* a,
+static void sp_256_mont_inv_order_10(sp_digit* r, const sp_digit* a,
         sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
@@ -11655,10 +16716,11 @@
     XMEMCPY(t, a, sizeof(sp_digit) * 10);
     for (i=254; i>=0; i--) {
         sp_256_mont_sqr_order_10(t, t);
-        if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
+        if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_10(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 10);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 10U);
 #else
     sp_digit* t = td;
     sp_digit* t2 = td + 2 * 10;
@@ -11696,8 +16758,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
     for (i=127; i>=112; i--) {
         sp_256_mont_sqr_order_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_10(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
     sp_256_mont_sqr_n_order_10(t2, t2, 4);
@@ -11705,8 +16768,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
     for (i=107; i>=64; i--) {
         sp_256_mont_sqr_order_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_10(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
     sp_256_mont_sqr_n_order_10(t2, t2, 4);
@@ -11714,8 +16778,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
     for (i=59; i>=32; i--) {
         sp_256_mont_sqr_order_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_10(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
     sp_256_mont_sqr_n_order_10(t2, t2, 4);
@@ -11723,8 +16788,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
     for (i=27; i>=0; i--) {
         sp_256_mont_sqr_order_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
             sp_256_mont_mul_order_10(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
     sp_256_mont_sqr_n_order_10(t2, t2, 4);
@@ -11733,143 +16799,6 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
- *
- * r  Result of the multiplication.
- * a  First operand of the multiplication.
- * b  Second operand of the multiplication.
- */
-static void sp_256_mont_mul_order_avx2_10(sp_digit* r, sp_digit* a, sp_digit* b)
-{
-    sp_256_mul_avx2_10(r, a, b);
-    sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
-}
-
-/* Square number mod the order of P256 curve. (r = a * a mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_order_avx2_10(sp_digit* r, sp_digit* a)
-{
-    sp_256_sqr_avx2_10(r, a);
-    sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square number mod the order of P256 curve a number of times.
- * (r = a ^ n mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_n_order_avx2_10(sp_digit* r, sp_digit* a, int n)
-{
-    int i;
-
-    sp_256_mont_sqr_order_avx2_10(r, a);
-    for (i=1; i<n; i++)
-        sp_256_mont_sqr_order_avx2_10(r, r);
-}
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
- * (r = 1 / a mod order)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_order_avx2_10(sp_digit* r, sp_digit* a,
-        sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 10);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_10(t, t);
-        if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_10(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 10);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 10;
-    sp_digit* t3 = td + 4 * 10;
-    int i;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_order_avx2_10(t, a);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_order_avx2_10(t, t, a);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t, 2);
-    /* t3= a^f = t2 * t */
-    sp_256_mont_mul_order_avx2_10(t3, t2, t);
-    /* t2= a^f0 = t3 ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t3, 4);
-    /* t = a^ff = t2 * t3 */
-    sp_256_mont_mul_order_avx2_10(t, t2, t3);
-    /* t3= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t, 8);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_order_avx2_10(t, t2, t);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t, 16);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_10(t, t2, t);
-    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t, 64);
-    /* t2= a^ffffffff00000000ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_10(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t2, 32);
-    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_10(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
-    for (i=127; i>=112; i--) {
-        sp_256_mont_sqr_order_avx2_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_10(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_10(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
-    for (i=107; i>=64; i--) {
-        sp_256_mont_sqr_order_avx2_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_10(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_10(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
-    for (i=59; i>=32; i--) {
-        sp_256_mont_sqr_order_avx2_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_10(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_10(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
-    for (i=27; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_10(t2, t2);
-        if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
-            sp_256_mont_mul_order_avx2_10(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
-    sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
-    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
-    sp_256_mont_mul_order_avx2_10(r, t2, t3);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-#endif /* HAVE_INTEL_AVX2 */
 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 #ifdef HAVE_ECC_SIGN
 #ifndef SP_ECC_MAX_SIG_GEN
@@ -11893,114 +16822,102 @@
  * MP_OKAY on success.
  */
 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
-                    mp_int* rm, mp_int* sm, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
 #else
     sp_digit ed[2*10];
     sp_digit xd[2*10];
     sp_digit kd[2*10];
     sp_digit rd[2*10];
     sp_digit td[3 * 2*10];
-    sp_point p;
+    sp_point_256 p;
 #endif
     sp_digit* e = NULL;
     sp_digit* x = NULL;
     sp_digit* k = NULL;
     sp_digit* r = NULL;
     sp_digit* tmp = NULL;
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit carry;
-    sp_digit* s;
-    sp_digit* kInv;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
     int err = MP_OKAY;
     int32_t c;
     int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 10, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            e = d + 0 * 10;
-            x = d + 2 * 10;
-            k = d + 4 * 10;
-            r = d + 6 * 10;
-            tmp = d + 8 * 10;
-        }
-        else
-            err = MEMORY_E;
-    }
-#else
-    e = ed;
-    x = xd;
-    k = kd;
-    r = rd;
-    tmp = td;
-#endif
-    s = e;
-    kInv = k;
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(e, 10, hash, hashLen);
-        sp_256_from_mp(x, 10, priv);
+    err = sp_256_point_new_10(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 10;
+        x = d + 2 * 10;
+        k = d + 4 * 10;
+        r = d + 6 * 10;
+        tmp = d + 8 * 10;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(e, 10, hash, (int)hashLen);
     }
 
     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 10, priv);
+
         /* New random point. */
-        err = sp_256_ecc_gen_k_10(rng, k);
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_256_ecc_gen_k_10(rng, k);
+        }
+        else {
+            sp_256_from_mp(k, 10, km);
+            mp_zero(km);
+        }
         if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, heap);
-            else
-#endif
                 err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
         }
 
         if (err == MP_OKAY) {
             /* r = point->x mod order */
-            XMEMCPY(r, point->x, sizeof(sp_digit) * 10);
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 10U);
             sp_256_norm_10(r);
             c = sp_256_cmp_10(r, p256_order);
-            sp_256_cond_sub_10(r, r, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_10(r, r, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_10(r);
 
             /* Conv k to Montgomery form (mod order) */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_10(k, k, p256_norm_order);
-            else
-#endif
                 sp_256_mul_10(k, k, p256_norm_order);
             err = sp_256_mod_10(k, k, p256_order);
         }
         if (err == MP_OKAY) {
             sp_256_norm_10(k);
             /* kInv = 1/k mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_inv_order_avx2_10(kInv, k, tmp);
-            else
-#endif
                 sp_256_mont_inv_order_10(kInv, k, tmp);
             sp_256_norm_10(kInv);
 
             /* s = r * x + e */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_10(x, x, r);
-            else
-#endif
                 sp_256_mul_10(x, x, r);
             err = sp_256_mod_10(x, x, p256_order);
         }
@@ -12010,46 +16927,45 @@
             sp_256_cond_sub_10(s, s, p256_order, 0 - carry);
             sp_256_norm_10(s);
             c = sp_256_cmp_10(s, p256_order);
-            sp_256_cond_sub_10(s, s, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_10(s, s, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_10(s);
 
             /* s = s * k^-1 mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_mul_order_avx2_10(s, s, kInv);
-            else
-#endif
                 sp_256_mont_mul_order_10(s, s, kInv);
             sp_256_norm_10(s);
 
             /* Check that signature is usable. */
-            if (!sp_256_iszero_10(s))
-                break;
-        }
-    }
-
-    if (i == 0)
+            if (sp_256_iszero_10(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
         err = RNG_FAILURE_E;
-
-    if (err == MP_OKAY)
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(r, rm);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(s, sm);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 10);
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    XMEMSET(e, 0, sizeof(sp_digit) * 2 * 10);
-    XMEMSET(x, 0, sizeof(sp_digit) * 2 * 10);
-    XMEMSET(k, 0, sizeof(sp_digit) * 2 * 10);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 10);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 10);
-    XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*10);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 10U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 10U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 10U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 10U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 10U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 10U);
+#endif
+    sp_256_point_free_10(point, 1, heap);
 
     return err;
 }
@@ -12079,109 +16995,106 @@
 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit u1d[2*10];
     sp_digit u2d[2*10];
     sp_digit sd[2*10];
     sp_digit tmpd[2*10 * 5];
-    sp_point p1d;
-    sp_point p2d;
-#endif
-    sp_digit* u1;
-    sp_digit* u2;
-    sp_digit* s;
-    sp_digit* tmp;
-    sp_point* p1;
-    sp_point* p2 = NULL;
+    sp_point_256 p1d;
+    sp_point_256 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* p1;
+    sp_point_256* p2 = NULL;
     sp_digit carry;
     int32_t c;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p1d, p1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p2d, p2);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 16 * 10, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            u1  = d + 0 * 10;
-            u2  = d + 2 * 10;
-            s   = d + 4 * 10;
-            tmp = d + 6 * 10;
-        }
-        else
-            err = MEMORY_E;
-    }
-#else
-    u1 = u1d;
-    u2 = u2d;
-    s  = sd;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(u1, 10, hash, hashLen);
+
+    err = sp_256_point_new_10(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 10;
+        u2  = d + 2 * 10;
+        s   = d + 4 * 10;
+        tmp = d + 6 * 10;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(u1, 10, hash, (int)hashLen);
         sp_256_from_mp(u2, 10, r);
         sp_256_from_mp(s, 10, sm);
         sp_256_from_mp(p2->x, 10, pX);
         sp_256_from_mp(p2->y, 10, pY);
         sp_256_from_mp(p2->z, 10, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_mul_avx2_10(s, s, p256_norm_order);
-        else
-#endif
+        {
             sp_256_mul_10(s, s, p256_norm_order);
+        }
         err = sp_256_mod_10(s, s, p256_order);
     }
     if (err == MP_OKAY) {
         sp_256_norm_10(s);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_inv_order_avx2_10(s, s, tmp);
-            sp_256_mont_mul_order_avx2_10(u1, u1, s);
-            sp_256_mont_mul_order_avx2_10(u2, u2, s);
-        }
-        else
-#endif
         {
             sp_256_mont_inv_order_10(s, s, tmp);
             sp_256_mont_mul_order_10(u1, u1, s);
             sp_256_mont_mul_order_10(u2, u2, s);
         }
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_10(p1, u1, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
     }
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_10(p2, p2, u2, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
     }
 
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_10(p1, p1, p2, tmp);
-        else
-#endif
+        {
             sp_256_proj_point_add_10(p1, p1, p2, tmp);
+            if (sp_256_iszero_10(p1->z)) {
+                if (sp_256_iszero_10(p1->x) && sp_256_iszero_10(p1->y)) {
+                    sp_256_proj_point_dbl_10(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    p1->x[8] = 0;
+                    p1->x[9] = 0;
+                    XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+                }
+            }
+        }
 
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
@@ -12193,13 +17106,13 @@
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod);
         sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod);
-        *res = sp_256_cmp_10(p1->x, u1) == 0;
+        *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
         if (*res == 0) {
             /* Reload r and add order. */
             sp_256_from_mp(u2, 10, r);
             carry = sp_256_add_10(u2, u2, p256_order);
             /* Carry means result is greater than mod and is not valid. */
-            if (!carry) {
+            if (carry == 0) {
                 sp_256_norm_10(u2);
 
                 /* Compare with mod and if greater or equal then not valid. */
@@ -12211,19 +17124,19 @@
                         /* u1 = (r + 1*order).z'.z' mod prime */
                         sp_256_mont_mul_10(u1, u2, p1->z, p256_mod,
                                                                   p256_mp_mod);
-                        *res = sp_256_cmp_10(p1->x, u2) == 0;
+                        *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
                     }
                 }
             }
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL)
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
 #endif
-    sp_ecc_point_free(p1, 0, heap);
-    sp_ecc_point_free(p2, 0, heap);
+    sp_256_point_free_10(p1, 0, heap);
+    sp_256_point_free_10(p2, 0, heap);
 
     return err;
 }
@@ -12237,9 +17150,9 @@
  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  * not on the curve and MP_OKAY otherwise.
  */
-static int sp_256_ecc_is_point_10(sp_point* point, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_ecc_is_point_10(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit t1d[2*10];
@@ -12249,42 +17162,46 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 10 * 4, heap, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 10;
         t2 = d + 2 * 10;
-    }
-    else
-        err = MEMORY_E;
-#else
-    (void)heap;
-
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         sp_256_sqr_10(t1, point->y);
-        sp_256_mod_10(t1, t1, p256_mod);
+        (void)sp_256_mod_10(t1, t1, p256_mod);
         sp_256_sqr_10(t2, point->x);
-        sp_256_mod_10(t2, t2, p256_mod);
+        (void)sp_256_mod_10(t2, t2, p256_mod);
         sp_256_mul_10(t2, t2, point->x);
-        sp_256_mod_10(t2, t2, p256_mod);
-	sp_256_sub_10(t2, p256_mod, t2);
+        (void)sp_256_mod_10(t2, t2, p256_mod);
+        (void)sp_256_sub_10(t2, p256_mod, t2);
         sp_256_mont_add_10(t1, t1, t2, p256_mod);
 
         sp_256_mont_add_10(t1, t1, point->x, p256_mod);
         sp_256_mont_add_10(t1, t1, point->x, p256_mod);
         sp_256_mont_add_10(t1, t1, point->x, p256_mod);
 
-        if (sp_256_cmp_10(t1, p256_b) != 0)
+        if (sp_256_cmp_10(t1, p256_b) != 0) {
             err = MP_VAL;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
@@ -12299,23 +17216,23 @@
  */
 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point pubd;
-#endif
-    sp_point* pub;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 pubd;
+#endif
+    sp_point_256* pub;
     byte one[1] = { 1 };
     int err;
 
-    err = sp_ecc_point_new(NULL, pubd, pub);
+    err = sp_256_point_new_10(NULL, pubd, pub);
     if (err == MP_OKAY) {
         sp_256_from_mp(pub->x, 10, pX);
         sp_256_from_mp(pub->y, 10, pY);
-        sp_256_from_bin(pub->z, 10, one, sizeof(one));
+        sp_256_from_bin(pub->z, 10, one, (int)sizeof(one));
 
         err = sp_256_ecc_is_point_10(pub, NULL);
     }
 
-    sp_ecc_point_free(pub, 0, NULL);
+    sp_256_point_free_10(pub, 0, NULL);
 
     return err;
 }
@@ -12333,50 +17250,54 @@
  */
 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit privd[10];
-    sp_point pubd;
-    sp_point pd;
+    sp_point_256 pubd;
+    sp_point_256 pd;
 #endif
     sp_digit* priv = NULL;
-    sp_point* pub;
-    sp_point* p = NULL;
+    sp_point_256* pub;
+    sp_point_256* p = NULL;
     byte one[1] = { 1 };
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, pubd, pub);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        priv = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
-        if (priv == NULL)
-            err = MEMORY_E;
-    }
-#else
-    priv = privd;
-#endif
-
-    if (err == MP_OKAY) {
+
+    err = sp_256_point_new_10(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
         sp_256_from_mp(pub->x, 10, pX);
         sp_256_from_mp(pub->y, 10, pY);
-        sp_256_from_bin(pub->z, 10, one, sizeof(one));
+        sp_256_from_bin(pub->z, 10, one, (int)sizeof(one));
         sp_256_from_mp(priv, 10, privm);
 
         /* Check point at infinitiy. */
-        if (sp_256_iszero_10(pub->x) &&
-            sp_256_iszero_10(pub->y))
+        if ((sp_256_iszero_10(pub->x) != 0) &&
+            (sp_256_iszero_10(pub->y) != 0)) {
             err = ECC_INF_E;
+        }
     }
 
     if (err == MP_OKAY) {
         /* Check range of X and Y */
         if (sp_256_cmp_10(pub->x, p256_mod) >= 0 ||
-            sp_256_cmp_10(pub->y, p256_mod) >= 0)
+            sp_256_cmp_10(pub->y, p256_mod) >= 0) {
             err = ECC_OUT_OF_RANGE_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -12386,28 +17307,18 @@
 
     if (err == MP_OKAY) {
         /* Point * order = infinity */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_10(p, pub, p256_order, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_10(p, pub, p256_order, 1, heap);
     }
     if (err == MP_OKAY) {
         /* Check result is infinity */
-        if (!sp_256_iszero_10(p->x) ||
-            !sp_256_iszero_10(p->y)) {
+        if ((sp_256_iszero_10(p->x) == 0) ||
+            (sp_256_iszero_10(p->y) == 0)) {
             err = ECC_INF_E;
         }
     }
 
     if (err == MP_OKAY) {
         /* Base * private = point */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_10(p, priv, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_10(p, priv, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -12418,12 +17329,13 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (priv != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(pub, 0, heap);
+    }
+#endif
+    sp_256_point_free_10(p, 0, heap);
+    sp_256_point_free_10(pub, 0, heap);
 
     return err;
 }
@@ -12447,27 +17359,27 @@
                               mp_int* qX, mp_int* qY, mp_int* qZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 10 * 5];
-    sp_point pd;
-    sp_point qd;
+    sp_point_256 pd;
+    sp_point_256 qd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
-    sp_point* q = NULL;
+    sp_point_256* p;
+    sp_point_256* q = NULL;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(NULL, qd, q);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
+
+    err = sp_256_point_new_10(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_10(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -12481,27 +17393,26 @@
         sp_256_from_mp(q->y, 10, qY);
         sp_256_from_mp(q->z, 10, qZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_10(p, p, q, tmp);
-        else
-#endif
             sp_256_proj_point_add_10(p, p, q, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(q, 0, NULL);
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_10(q, 0, NULL);
+    sp_256_point_free_10(p, 0, NULL);
 
     return err;
 }
@@ -12520,23 +17431,22 @@
 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 10 * 2];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
+
+    err = sp_256_point_new_10(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -12547,26 +17457,25 @@
         sp_256_from_mp(p->y, 10, pY);
         sp_256_from_mp(p->z, 10, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_dbl_avx2_10(p, p, tmp);
-        else
-#endif
             sp_256_proj_point_dbl_10(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_10(p, 0, NULL);
 
     return err;
 }
@@ -12581,20 +17490,22 @@
  */
 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 10 * 4];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
 
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
+    err = sp_256_point_new_10(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -12607,18 +17518,22 @@
         sp_256_map_10(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, pX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_10(p, 0, NULL);
 
     return err;
 }
@@ -12631,7 +17546,7 @@
  */
 static int sp_256_mont_sqrt_10(sp_digit* y)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit t1d[2 * 10];
@@ -12640,58 +17555,23 @@
     sp_digit* t1;
     sp_digit* t2;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 10;
         t2 = d + 2 * 10;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            /* t2 = y ^ 0x2 */
-            sp_256_mont_sqr_avx2_10(t2, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0x3 */
-            sp_256_mont_mul_avx2_10(t1, t2, y, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xc */
-            sp_256_mont_sqr_n_avx2_10(t2, t1, 2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xf */
-            sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xf0 */
-            sp_256_mont_sqr_n_avx2_10(t2, t1, 4, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xff */
-            sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xff00 */
-            sp_256_mont_sqr_n_avx2_10(t2, t1, 8, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffff */
-            sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xffff0000 */
-            sp_256_mont_sqr_n_avx2_10(t2, t1, 16, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff */
-            sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000000 */
-            sp_256_mont_sqr_n_avx2_10(t1, t1, 32, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001 */
-            sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
-            sp_256_mont_sqr_n_avx2_10(t1, t1, 96, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
-            sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
-            sp_256_mont_sqr_n_avx2_10(y, t1, 94, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         {
             /* t2 = y ^ 0x2 */
             sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
@@ -12725,13 +17605,15 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
+    }
+#endif
+
+    return err;
+}
+
 
 /* Uncompress the point given the X ordinate.
  *
@@ -12742,47 +17624,37 @@
  */
 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit xd[2 * 10];
     sp_digit yd[2 * 10];
 #endif
-    sp_digit* x;
-    sp_digit* y;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         x = d + 0 * 10;
         y = d + 2 * 10;
-    }
-    else
-        err = MEMORY_E;
-#else
-    x = xd;
-    y = yd;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        x = xd;
+        y = yd;
+#endif
+
         sp_256_from_mp(x, 10, xm);
-
         err = sp_256_mod_mul_norm_10(x, x, p256_mod);
     }
-
     if (err == MP_OKAY) {
         /* y = x^3 */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_sqr_avx2_10(y, x, p256_mod, p256_mp_mod);
-            sp_256_mont_mul_avx2_10(y, y, x, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
         {
             sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
             sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
@@ -12800,25 +17672,6187 @@
         err = sp_256_mont_sqrt_10(y);
     }
     if (err == MP_OKAY) {
-        XMEMSET(y + 10, 0, 10 * sizeof(sp_digit));
+        XMEMSET(y + 10, 0, 10U * sizeof(sp_digit));
         sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod);
-        if (((y[0] ^ odd) & 1) != 0)
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
             sp_256_mont_sub_10(y, p256_mod, y, p256_mod);
+        }
 
         err = sp_256_to_mp(y, ym);
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
-#endif
-#endif /* WOLFSSL_SP_NO_256 */
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+    sp_digit x[2 * 15];
+    sp_digit y[2 * 15];
+    sp_digit z[2 * 15];
+    int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[15] = {
+    0x3ffffff,0x000003f,0x0000000,0x3fc0000,0x2ffffff,0x3ffffff,0x3ffffff,
+    0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff
+    
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[15] = {
+    0x0000001,0x3ffffc0,0x3ffffff,0x003ffff,0x1000000,0x0000000,0x0000000,
+    0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000
+    
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[15] = {
+    0x0c52973,0x3065ab3,0x277aece,0x2c922c2,0x3581a0d,0x10dcb77,0x234d81f,
+    0x3ffff1d,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff
+    
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[15] = {
+    0x0c52971,0x3065ab3,0x277aece,0x2c922c2,0x3581a0d,0x10dcb77,0x234d81f,
+    0x3ffff1d,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff
+    
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[15] = {
+    0x33ad68d,0x0f9a54c,0x1885131,0x136dd3d,0x0a7e5f2,0x2f23488,0x1cb27e0,
+    0x00000e2,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000
+    
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x8fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+    /* X ordinate */
+    {
+        0x2760ab7,0x1178e1c,0x296c3a5,0x176fd54,0x05502f2,0x0950a8e,0x3741e08,
+        0x26e6167,0x3628ba7,0x11b874e,0x3320ad7,0x2c71c7b,0x305378e,0x288afa2,0x00aa87c,
+        
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x0ea0e5f,0x0c75f24,0x019d7a4,0x33875fa,0x00a60b1,0x17c2e30,0x1a3113b,
+        0x051f3a7,0x1bd289a,0x27e3d07,0x1292dc2,0x27a62fe,0x22c6f5d,0x392a589,0x003617d,
+        
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+        0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+        
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[15] = {
+    0x3ec2aef,0x1723b74,0x119d2a8,0x23628bb,0x2c65639,0x004e1d6,0x14088f5,
+    0x104480c,0x06efe81,0x2460767,0x23f82d1,0x23815af,0x2e7e498,0x3e9f88f,0x00b3312
+    
+};
+#endif
+
+static int sp_384_point_new_ex_15(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_15(heap, sp, p) sp_384_point_new_ex_15((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_15(heap, sp, p) sp_384_point_new_ex_15((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_15(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    int64_t* td;
+#else
+    int64_t td[12];
+    int64_t a32d[12];
+#endif
+    int64_t* t;
+    int64_t* a32;
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t = td;
+        a32 = td + 12;
+#else
+        t = td;
+        a32 = a32d;
+#endif
+
+        a32[0] = a[0];
+        a32[0] |= a[1] << 26U;
+        a32[0] &= 0xffffffffL;
+        a32[1] = (sp_digit)(a[1] >> 6);
+        a32[1] |= a[2] << 20U;
+        a32[1] &= 0xffffffffL;
+        a32[2] = (sp_digit)(a[2] >> 12);
+        a32[2] |= a[3] << 14U;
+        a32[2] &= 0xffffffffL;
+        a32[3] = (sp_digit)(a[3] >> 18);
+        a32[3] |= a[4] << 8U;
+        a32[3] &= 0xffffffffL;
+        a32[4] = (sp_digit)(a[4] >> 24);
+        a32[4] |= a[5] << 2U;
+        a32[4] |= a[6] << 28U;
+        a32[4] &= 0xffffffffL;
+        a32[5] = (sp_digit)(a[6] >> 4);
+        a32[5] |= a[7] << 22U;
+        a32[5] &= 0xffffffffL;
+        a32[6] = (sp_digit)(a[7] >> 10);
+        a32[6] |= a[8] << 16U;
+        a32[6] &= 0xffffffffL;
+        a32[7] = (sp_digit)(a[8] >> 16);
+        a32[7] |= a[9] << 10U;
+        a32[7] &= 0xffffffffL;
+        a32[8] = (sp_digit)(a[9] >> 22);
+        a32[8] |= a[10] << 4U;
+        a32[8] |= a[11] << 30U;
+        a32[8] &= 0xffffffffL;
+        a32[9] = (sp_digit)(a[11] >> 2);
+        a32[9] |= a[12] << 24U;
+        a32[9] &= 0xffffffffL;
+        a32[10] = (sp_digit)(a[12] >> 8);
+        a32[10] |= a[13] << 18U;
+        a32[10] &= 0xffffffffL;
+        a32[11] = (sp_digit)(a[13] >> 14);
+        a32[11] |= a[14] << 12U;
+        a32[11] &= 0xffffffffL;
+
+        /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
+        t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+        /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
+        t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+        /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
+        t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+        /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
+        t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+        /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
+        t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] -  2 * a32[11];
+        /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
+        t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+        /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
+        t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+        /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
+        t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+        /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
+        t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+        /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
+        t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+        /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
+        t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+        /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
+        t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+        o     = t[11] >> 32; t[11] &= 0xffffffff;
+        t[0] += o;
+        t[1] -= o;
+        t[3] += o;
+        t[4] += o;
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+        r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
+        r[1] = (sp_digit)(t[0] >> 26U);
+        r[1] |= t[1] << 6U;
+        r[1] &= 0x3ffffffL;
+        r[2] = (sp_digit)(t[1] >> 20U);
+        r[2] |= t[2] << 12U;
+        r[2] &= 0x3ffffffL;
+        r[3] = (sp_digit)(t[2] >> 14U);
+        r[3] |= t[3] << 18U;
+        r[3] &= 0x3ffffffL;
+        r[4] = (sp_digit)(t[3] >> 8U);
+        r[4] |= t[4] << 24U;
+        r[4] &= 0x3ffffffL;
+        r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
+        r[6] = (sp_digit)(t[4] >> 28U);
+        r[6] |= t[5] << 4U;
+        r[6] &= 0x3ffffffL;
+        r[7] = (sp_digit)(t[5] >> 22U);
+        r[7] |= t[6] << 10U;
+        r[7] &= 0x3ffffffL;
+        r[8] = (sp_digit)(t[6] >> 16U);
+        r[8] |= t[7] << 16U;
+        r[8] &= 0x3ffffffL;
+        r[9] = (sp_digit)(t[7] >> 10U);
+        r[9] |= t[8] << 22U;
+        r[9] &= 0x3ffffffL;
+        r[10] = (sp_digit)(t[8] >> 4U) & 0x3ffffffL;
+        r[11] = (sp_digit)(t[8] >> 30U);
+        r[11] |= t[9] << 2U;
+        r[11] &= 0x3ffffffL;
+        r[12] = (sp_digit)(t[9] >> 24U);
+        r[12] |= t[10] << 8U;
+        r[12] &= 0x3ffffffL;
+        r[13] = (sp_digit)(t[10] >> 18U);
+        r[13] |= t[11] << 14U;
+        r[13] &= 0x3ffffffL;
+        r[14] = (sp_digit)(t[11] >> 12U);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL)
+        XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+    return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 26
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 26
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0x3ffffff;
+        s = 26U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 26U) <= (word32)DIGIT_BIT) {
+            s += 26U;
+            r[j] &= 0x3ffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 26) {
+            r[j] &= 0x3ffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 26 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p   Point of type sp_point_384 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_15(sp_point_384* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_384_from_mp(p->x, 15, pm->x);
+    sp_384_from_mp(p->y, 15, pm->y);
+    sp_384_from_mp(p->z, 15, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 26
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 15);
+        r->used = 15;
+        mp_clamp(r);
+#elif DIGIT_BIT < 26
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 15; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 26) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 26 - s;
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 15; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 26 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 26 - s;
+            }
+            else {
+                s += 26;
+            }
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p   Point of type sp_point_384.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_15(const sp_point_384* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_384_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[14]) * b[14];
+    r[29] = (sp_digit)(c >> 26);
+    c = (c & 0x3ffffff) << 26;
+    for (k = 27; k >= 0; k--) {
+        for (i = 14; i >= 0; i--) {
+            j = k - i;
+            if (j >= 15) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 52;
+        r[k + 1] = (c >> 26) & 0x3ffffff;
+        c = (c & 0x3ffffff) << 26;
+    }
+    r[0] = (sp_digit)(c >> 26);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int64_t t0   = ((int64_t)a[ 0]) * b[ 0];
+    int64_t t1   = ((int64_t)a[ 0]) * b[ 1]
+                 + ((int64_t)a[ 1]) * b[ 0];
+    int64_t t2   = ((int64_t)a[ 0]) * b[ 2]
+                 + ((int64_t)a[ 1]) * b[ 1]
+                 + ((int64_t)a[ 2]) * b[ 0];
+    int64_t t3   = ((int64_t)a[ 0]) * b[ 3]
+                 + ((int64_t)a[ 1]) * b[ 2]
+                 + ((int64_t)a[ 2]) * b[ 1]
+                 + ((int64_t)a[ 3]) * b[ 0];
+    int64_t t4   = ((int64_t)a[ 0]) * b[ 4]
+                 + ((int64_t)a[ 1]) * b[ 3]
+                 + ((int64_t)a[ 2]) * b[ 2]
+                 + ((int64_t)a[ 3]) * b[ 1]
+                 + ((int64_t)a[ 4]) * b[ 0];
+    int64_t t5   = ((int64_t)a[ 0]) * b[ 5]
+                 + ((int64_t)a[ 1]) * b[ 4]
+                 + ((int64_t)a[ 2]) * b[ 3]
+                 + ((int64_t)a[ 3]) * b[ 2]
+                 + ((int64_t)a[ 4]) * b[ 1]
+                 + ((int64_t)a[ 5]) * b[ 0];
+    int64_t t6   = ((int64_t)a[ 0]) * b[ 6]
+                 + ((int64_t)a[ 1]) * b[ 5]
+                 + ((int64_t)a[ 2]) * b[ 4]
+                 + ((int64_t)a[ 3]) * b[ 3]
+                 + ((int64_t)a[ 4]) * b[ 2]
+                 + ((int64_t)a[ 5]) * b[ 1]
+                 + ((int64_t)a[ 6]) * b[ 0];
+    int64_t t7   = ((int64_t)a[ 0]) * b[ 7]
+                 + ((int64_t)a[ 1]) * b[ 6]
+                 + ((int64_t)a[ 2]) * b[ 5]
+                 + ((int64_t)a[ 3]) * b[ 4]
+                 + ((int64_t)a[ 4]) * b[ 3]
+                 + ((int64_t)a[ 5]) * b[ 2]
+                 + ((int64_t)a[ 6]) * b[ 1]
+                 + ((int64_t)a[ 7]) * b[ 0];
+    int64_t t8   = ((int64_t)a[ 0]) * b[ 8]
+                 + ((int64_t)a[ 1]) * b[ 7]
+                 + ((int64_t)a[ 2]) * b[ 6]
+                 + ((int64_t)a[ 3]) * b[ 5]
+                 + ((int64_t)a[ 4]) * b[ 4]
+                 + ((int64_t)a[ 5]) * b[ 3]
+                 + ((int64_t)a[ 6]) * b[ 2]
+                 + ((int64_t)a[ 7]) * b[ 1]
+                 + ((int64_t)a[ 8]) * b[ 0];
+    int64_t t9   = ((int64_t)a[ 0]) * b[ 9]
+                 + ((int64_t)a[ 1]) * b[ 8]
+                 + ((int64_t)a[ 2]) * b[ 7]
+                 + ((int64_t)a[ 3]) * b[ 6]
+                 + ((int64_t)a[ 4]) * b[ 5]
+                 + ((int64_t)a[ 5]) * b[ 4]
+                 + ((int64_t)a[ 6]) * b[ 3]
+                 + ((int64_t)a[ 7]) * b[ 2]
+                 + ((int64_t)a[ 8]) * b[ 1]
+                 + ((int64_t)a[ 9]) * b[ 0];
+    int64_t t10  = ((int64_t)a[ 0]) * b[10]
+                 + ((int64_t)a[ 1]) * b[ 9]
+                 + ((int64_t)a[ 2]) * b[ 8]
+                 + ((int64_t)a[ 3]) * b[ 7]
+                 + ((int64_t)a[ 4]) * b[ 6]
+                 + ((int64_t)a[ 5]) * b[ 5]
+                 + ((int64_t)a[ 6]) * b[ 4]
+                 + ((int64_t)a[ 7]) * b[ 3]
+                 + ((int64_t)a[ 8]) * b[ 2]
+                 + ((int64_t)a[ 9]) * b[ 1]
+                 + ((int64_t)a[10]) * b[ 0];
+    int64_t t11  = ((int64_t)a[ 0]) * b[11]
+                 + ((int64_t)a[ 1]) * b[10]
+                 + ((int64_t)a[ 2]) * b[ 9]
+                 + ((int64_t)a[ 3]) * b[ 8]
+                 + ((int64_t)a[ 4]) * b[ 7]
+                 + ((int64_t)a[ 5]) * b[ 6]
+                 + ((int64_t)a[ 6]) * b[ 5]
+                 + ((int64_t)a[ 7]) * b[ 4]
+                 + ((int64_t)a[ 8]) * b[ 3]
+                 + ((int64_t)a[ 9]) * b[ 2]
+                 + ((int64_t)a[10]) * b[ 1]
+                 + ((int64_t)a[11]) * b[ 0];
+    int64_t t12  = ((int64_t)a[ 0]) * b[12]
+                 + ((int64_t)a[ 1]) * b[11]
+                 + ((int64_t)a[ 2]) * b[10]
+                 + ((int64_t)a[ 3]) * b[ 9]
+                 + ((int64_t)a[ 4]) * b[ 8]
+                 + ((int64_t)a[ 5]) * b[ 7]
+                 + ((int64_t)a[ 6]) * b[ 6]
+                 + ((int64_t)a[ 7]) * b[ 5]
+                 + ((int64_t)a[ 8]) * b[ 4]
+                 + ((int64_t)a[ 9]) * b[ 3]
+                 + ((int64_t)a[10]) * b[ 2]
+                 + ((int64_t)a[11]) * b[ 1]
+                 + ((int64_t)a[12]) * b[ 0];
+    int64_t t13  = ((int64_t)a[ 0]) * b[13]
+                 + ((int64_t)a[ 1]) * b[12]
+                 + ((int64_t)a[ 2]) * b[11]
+                 + ((int64_t)a[ 3]) * b[10]
+                 + ((int64_t)a[ 4]) * b[ 9]
+                 + ((int64_t)a[ 5]) * b[ 8]
+                 + ((int64_t)a[ 6]) * b[ 7]
+                 + ((int64_t)a[ 7]) * b[ 6]
+                 + ((int64_t)a[ 8]) * b[ 5]
+                 + ((int64_t)a[ 9]) * b[ 4]
+                 + ((int64_t)a[10]) * b[ 3]
+                 + ((int64_t)a[11]) * b[ 2]
+                 + ((int64_t)a[12]) * b[ 1]
+                 + ((int64_t)a[13]) * b[ 0];
+    int64_t t14  = ((int64_t)a[ 0]) * b[14]
+                 + ((int64_t)a[ 1]) * b[13]
+                 + ((int64_t)a[ 2]) * b[12]
+                 + ((int64_t)a[ 3]) * b[11]
+                 + ((int64_t)a[ 4]) * b[10]
+                 + ((int64_t)a[ 5]) * b[ 9]
+                 + ((int64_t)a[ 6]) * b[ 8]
+                 + ((int64_t)a[ 7]) * b[ 7]
+                 + ((int64_t)a[ 8]) * b[ 6]
+                 + ((int64_t)a[ 9]) * b[ 5]
+                 + ((int64_t)a[10]) * b[ 4]
+                 + ((int64_t)a[11]) * b[ 3]
+                 + ((int64_t)a[12]) * b[ 2]
+                 + ((int64_t)a[13]) * b[ 1]
+                 + ((int64_t)a[14]) * b[ 0];
+    int64_t t15  = ((int64_t)a[ 1]) * b[14]
+                 + ((int64_t)a[ 2]) * b[13]
+                 + ((int64_t)a[ 3]) * b[12]
+                 + ((int64_t)a[ 4]) * b[11]
+                 + ((int64_t)a[ 5]) * b[10]
+                 + ((int64_t)a[ 6]) * b[ 9]
+                 + ((int64_t)a[ 7]) * b[ 8]
+                 + ((int64_t)a[ 8]) * b[ 7]
+                 + ((int64_t)a[ 9]) * b[ 6]
+                 + ((int64_t)a[10]) * b[ 5]
+                 + ((int64_t)a[11]) * b[ 4]
+                 + ((int64_t)a[12]) * b[ 3]
+                 + ((int64_t)a[13]) * b[ 2]
+                 + ((int64_t)a[14]) * b[ 1];
+    int64_t t16  = ((int64_t)a[ 2]) * b[14]
+                 + ((int64_t)a[ 3]) * b[13]
+                 + ((int64_t)a[ 4]) * b[12]
+                 + ((int64_t)a[ 5]) * b[11]
+                 + ((int64_t)a[ 6]) * b[10]
+                 + ((int64_t)a[ 7]) * b[ 9]
+                 + ((int64_t)a[ 8]) * b[ 8]
+                 + ((int64_t)a[ 9]) * b[ 7]
+                 + ((int64_t)a[10]) * b[ 6]
+                 + ((int64_t)a[11]) * b[ 5]
+                 + ((int64_t)a[12]) * b[ 4]
+                 + ((int64_t)a[13]) * b[ 3]
+                 + ((int64_t)a[14]) * b[ 2];
+    int64_t t17  = ((int64_t)a[ 3]) * b[14]
+                 + ((int64_t)a[ 4]) * b[13]
+                 + ((int64_t)a[ 5]) * b[12]
+                 + ((int64_t)a[ 6]) * b[11]
+                 + ((int64_t)a[ 7]) * b[10]
+                 + ((int64_t)a[ 8]) * b[ 9]
+                 + ((int64_t)a[ 9]) * b[ 8]
+                 + ((int64_t)a[10]) * b[ 7]
+                 + ((int64_t)a[11]) * b[ 6]
+                 + ((int64_t)a[12]) * b[ 5]
+                 + ((int64_t)a[13]) * b[ 4]
+                 + ((int64_t)a[14]) * b[ 3];
+    int64_t t18  = ((int64_t)a[ 4]) * b[14]
+                 + ((int64_t)a[ 5]) * b[13]
+                 + ((int64_t)a[ 6]) * b[12]
+                 + ((int64_t)a[ 7]) * b[11]
+                 + ((int64_t)a[ 8]) * b[10]
+                 + ((int64_t)a[ 9]) * b[ 9]
+                 + ((int64_t)a[10]) * b[ 8]
+                 + ((int64_t)a[11]) * b[ 7]
+                 + ((int64_t)a[12]) * b[ 6]
+                 + ((int64_t)a[13]) * b[ 5]
+                 + ((int64_t)a[14]) * b[ 4];
+    int64_t t19  = ((int64_t)a[ 5]) * b[14]
+                 + ((int64_t)a[ 6]) * b[13]
+                 + ((int64_t)a[ 7]) * b[12]
+                 + ((int64_t)a[ 8]) * b[11]
+                 + ((int64_t)a[ 9]) * b[10]
+                 + ((int64_t)a[10]) * b[ 9]
+                 + ((int64_t)a[11]) * b[ 8]
+                 + ((int64_t)a[12]) * b[ 7]
+                 + ((int64_t)a[13]) * b[ 6]
+                 + ((int64_t)a[14]) * b[ 5];
+    int64_t t20  = ((int64_t)a[ 6]) * b[14]
+                 + ((int64_t)a[ 7]) * b[13]
+                 + ((int64_t)a[ 8]) * b[12]
+                 + ((int64_t)a[ 9]) * b[11]
+                 + ((int64_t)a[10]) * b[10]
+                 + ((int64_t)a[11]) * b[ 9]
+                 + ((int64_t)a[12]) * b[ 8]
+                 + ((int64_t)a[13]) * b[ 7]
+                 + ((int64_t)a[14]) * b[ 6];
+    int64_t t21  = ((int64_t)a[ 7]) * b[14]
+                 + ((int64_t)a[ 8]) * b[13]
+                 + ((int64_t)a[ 9]) * b[12]
+                 + ((int64_t)a[10]) * b[11]
+                 + ((int64_t)a[11]) * b[10]
+                 + ((int64_t)a[12]) * b[ 9]
+                 + ((int64_t)a[13]) * b[ 8]
+                 + ((int64_t)a[14]) * b[ 7];
+    int64_t t22  = ((int64_t)a[ 8]) * b[14]
+                 + ((int64_t)a[ 9]) * b[13]
+                 + ((int64_t)a[10]) * b[12]
+                 + ((int64_t)a[11]) * b[11]
+                 + ((int64_t)a[12]) * b[10]
+                 + ((int64_t)a[13]) * b[ 9]
+                 + ((int64_t)a[14]) * b[ 8];
+    int64_t t23  = ((int64_t)a[ 9]) * b[14]
+                 + ((int64_t)a[10]) * b[13]
+                 + ((int64_t)a[11]) * b[12]
+                 + ((int64_t)a[12]) * b[11]
+                 + ((int64_t)a[13]) * b[10]
+                 + ((int64_t)a[14]) * b[ 9];
+    int64_t t24  = ((int64_t)a[10]) * b[14]
+                 + ((int64_t)a[11]) * b[13]
+                 + ((int64_t)a[12]) * b[12]
+                 + ((int64_t)a[13]) * b[11]
+                 + ((int64_t)a[14]) * b[10];
+    int64_t t25  = ((int64_t)a[11]) * b[14]
+                 + ((int64_t)a[12]) * b[13]
+                 + ((int64_t)a[13]) * b[12]
+                 + ((int64_t)a[14]) * b[11];
+    int64_t t26  = ((int64_t)a[12]) * b[14]
+                 + ((int64_t)a[13]) * b[13]
+                 + ((int64_t)a[14]) * b[12];
+    int64_t t27  = ((int64_t)a[13]) * b[14]
+                 + ((int64_t)a[14]) * b[13];
+    int64_t t28  = ((int64_t)a[14]) * b[14];
+
+    t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
+    t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
+    t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
+    t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
+    t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
+    t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
+    t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
+    t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
+    t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
+    t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
+    t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
+    t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
+    t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
+    t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
+    t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
+    t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
+    t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
+    t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
+    t19  += t18 >> 26; r[18] = t18 & 0x3ffffff;
+    t20  += t19 >> 26; r[19] = t19 & 0x3ffffff;
+    t21  += t20 >> 26; r[20] = t20 & 0x3ffffff;
+    t22  += t21 >> 26; r[21] = t21 & 0x3ffffff;
+    t23  += t22 >> 26; r[22] = t22 & 0x3ffffff;
+    t24  += t23 >> 26; r[23] = t23 & 0x3ffffff;
+    t25  += t24 >> 26; r[24] = t24 & 0x3ffffff;
+    t26  += t25 >> 26; r[25] = t25 & 0x3ffffff;
+    t27  += t26 >> 26; r[26] = t26 & 0x3ffffff;
+    t28  += t27 >> 26; r[27] = t27 & 0x3ffffff;
+    r[29] = (sp_digit)(t28 >> 26);
+                       r[28] = t28 & 0x3ffffff;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_384_mont_reduce_order_15         sp_384_mont_reduce_15
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_384_cmp_15(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=14; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    r |= (a[14] - b[14]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[13] - b[13]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[12] - b[12]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[11] - b[11]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[10] - b[10]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_384_cond_sub_15(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    r[ 0] = a[ 0] - (b[ 0] & m);
+    r[ 1] = a[ 1] - (b[ 1] & m);
+    r[ 2] = a[ 2] - (b[ 2] & m);
+    r[ 3] = a[ 3] - (b[ 3] & m);
+    r[ 4] = a[ 4] - (b[ 4] & m);
+    r[ 5] = a[ 5] - (b[ 5] & m);
+    r[ 6] = a[ 6] - (b[ 6] & m);
+    r[ 7] = a[ 7] - (b[ 7] & m);
+    r[ 8] = a[ 8] - (b[ 8] & m);
+    r[ 9] = a[ 9] - (b[ 9] & m);
+    r[10] = a[10] - (b[10] & m);
+    r[11] = a[11] - (b[11] & m);
+    r[12] = a[12] - (b[12] & m);
+    r[13] = a[13] - (b[13] & m);
+    r[14] = a[14] - (b[14] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_add_15(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x3ffffff;
+        t >>= 26;
+    }
+    r[15] += t;
+#else
+    int64_t tb = b;
+    int64_t t[15];
+
+    t[ 0] = tb * a[ 0];
+    t[ 1] = tb * a[ 1];
+    t[ 2] = tb * a[ 2];
+    t[ 3] = tb * a[ 3];
+    t[ 4] = tb * a[ 4];
+    t[ 5] = tb * a[ 5];
+    t[ 6] = tb * a[ 6];
+    t[ 7] = tb * a[ 7];
+    t[ 8] = tb * a[ 8];
+    t[ 9] = tb * a[ 9];
+    t[10] = tb * a[10];
+    t[11] = tb * a[11];
+    t[12] = tb * a[12];
+    t[13] = tb * a[13];
+    t[14] = tb * a[14];
+    r[ 0] += (sp_digit)                 (t[ 0] & 0x3ffffff);
+    r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
+    r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
+    r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
+    r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff));
+    r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff));
+    r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff));
+    r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff));
+    r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff));
+    r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff));
+    r[10] += (sp_digit)((t[ 9] >> 26) + (t[10] & 0x3ffffff));
+    r[11] += (sp_digit)((t[10] >> 26) + (t[11] & 0x3ffffff));
+    r[12] += (sp_digit)((t[11] >> 26) + (t[12] & 0x3ffffff));
+    r[13] += (sp_digit)((t[12] >> 26) + (t[13] & 0x3ffffff));
+    r[14] += (sp_digit)((t[13] >> 26) + (t[14] & 0x3ffffff));
+    r[15] += (sp_digit) (t[14] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 26.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_384_norm_15(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 14; i++) {
+        a[i+1] += a[i] >> 26;
+        a[i] &= 0x3ffffff;
+    }
+#else
+    a[1] += a[0] >> 26; a[0] &= 0x3ffffff;
+    a[2] += a[1] >> 26; a[1] &= 0x3ffffff;
+    a[3] += a[2] >> 26; a[2] &= 0x3ffffff;
+    a[4] += a[3] >> 26; a[3] &= 0x3ffffff;
+    a[5] += a[4] >> 26; a[4] &= 0x3ffffff;
+    a[6] += a[5] >> 26; a[5] &= 0x3ffffff;
+    a[7] += a[6] >> 26; a[6] &= 0x3ffffff;
+    a[8] += a[7] >> 26; a[7] &= 0x3ffffff;
+    a[9] += a[8] >> 26; a[8] &= 0x3ffffff;
+    a[10] += a[9] >> 26; a[9] &= 0x3ffffff;
+    a[11] += a[10] >> 26; a[10] &= 0x3ffffff;
+    a[12] += a[11] >> 26; a[11] &= 0x3ffffff;
+    a[13] += a[12] >> 26; a[12] &= 0x3ffffff;
+    a[14] += a[13] >> 26; a[13] &= 0x3ffffff;
+#endif
+}
+
+/* Shift the result in the high 384 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_384_mont_shift_15(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    int64_t n = a[14] >> 20;
+    n += ((int64_t)a[15]) << 6;
+
+    for (i = 0; i < 14; i++) {
+        r[i] = n & 0x3ffffff;
+        n >>= 26;
+        n += ((int64_t)a[16 + i]) << 6;
+    }
+    r[14] = (sp_digit)n;
+#else
+    int64_t n = a[14] >> 20;
+    n += ((int64_t)a[15]) << 6;
+    r[ 0] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[16]) << 6;
+    r[ 1] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[17]) << 6;
+    r[ 2] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[18]) << 6;
+    r[ 3] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[19]) << 6;
+    r[ 4] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[20]) << 6;
+    r[ 5] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[21]) << 6;
+    r[ 6] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[22]) << 6;
+    r[ 7] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[23]) << 6;
+    r[ 8] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[24]) << 6;
+    r[ 9] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[25]) << 6;
+    r[10] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[26]) << 6;
+    r[11] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[27]) << 6;
+    r[12] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[28]) << 6;
+    r[13] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[29]) << 6;
+    r[14] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[15], 0, sizeof(*r) * 15U);
+}
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_384_mont_reduce_15(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    sp_384_norm_15(a + 15);
+
+    for (i=0; i<14; i++) {
+        mu = (a[i] * mp) & 0x3ffffff;
+        sp_384_mul_add_15(a+i, m, mu);
+        a[i+1] += a[i] >> 26;
+    }
+    mu = (a[i] * mp) & 0xfffffL;
+    sp_384_mul_add_15(a+i, m, mu);
+    a[i+1] += a[i] >> 26;
+    a[i] &= 0x3ffffff;
+
+    sp_384_mont_shift_15(a, a);
+    sp_384_cond_sub_15(a, a, m, 0 - (((a[14] >> 20) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_15(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mul_15(r, a, b);
+    sp_384_mont_reduce_15(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int64_t c;
+
+    c = ((int64_t)a[14]) * a[14];
+    r[29] = (sp_digit)(c >> 26);
+    c = (c & 0x3ffffff) << 26;
+    for (k = 27; k >= 0; k--) {
+        for (i = 14; i >= 0; i--) {
+            j = k - i;
+            if (j >= 15 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int64_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int64_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 52;
+        r[k + 1] = (c >> 26) & 0x3ffffff;
+        c = (c & 0x3ffffff) << 26;
+    }
+    r[0] = (sp_digit)(c >> 26);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a)
+{
+    int64_t t0   =  ((int64_t)a[ 0]) * a[ 0];
+    int64_t t1   = (((int64_t)a[ 0]) * a[ 1]) * 2;
+    int64_t t2   = (((int64_t)a[ 0]) * a[ 2]) * 2
+                 +  ((int64_t)a[ 1]) * a[ 1];
+    int64_t t3   = (((int64_t)a[ 0]) * a[ 3]
+                 +  ((int64_t)a[ 1]) * a[ 2]) * 2;
+    int64_t t4   = (((int64_t)a[ 0]) * a[ 4]
+                 +  ((int64_t)a[ 1]) * a[ 3]) * 2
+                 +  ((int64_t)a[ 2]) * a[ 2];
+    int64_t t5   = (((int64_t)a[ 0]) * a[ 5]
+                 +  ((int64_t)a[ 1]) * a[ 4]
+                 +  ((int64_t)a[ 2]) * a[ 3]) * 2;
+    int64_t t6   = (((int64_t)a[ 0]) * a[ 6]
+                 +  ((int64_t)a[ 1]) * a[ 5]
+                 +  ((int64_t)a[ 2]) * a[ 4]) * 2
+                 +  ((int64_t)a[ 3]) * a[ 3];
+    int64_t t7   = (((int64_t)a[ 0]) * a[ 7]
+                 +  ((int64_t)a[ 1]) * a[ 6]
+                 +  ((int64_t)a[ 2]) * a[ 5]
+                 +  ((int64_t)a[ 3]) * a[ 4]) * 2;
+    int64_t t8   = (((int64_t)a[ 0]) * a[ 8]
+                 +  ((int64_t)a[ 1]) * a[ 7]
+                 +  ((int64_t)a[ 2]) * a[ 6]
+                 +  ((int64_t)a[ 3]) * a[ 5]) * 2
+                 +  ((int64_t)a[ 4]) * a[ 4];
+    int64_t t9   = (((int64_t)a[ 0]) * a[ 9]
+                 +  ((int64_t)a[ 1]) * a[ 8]
+                 +  ((int64_t)a[ 2]) * a[ 7]
+                 +  ((int64_t)a[ 3]) * a[ 6]
+                 +  ((int64_t)a[ 4]) * a[ 5]) * 2;
+    int64_t t10  = (((int64_t)a[ 0]) * a[10]
+                 +  ((int64_t)a[ 1]) * a[ 9]
+                 +  ((int64_t)a[ 2]) * a[ 8]
+                 +  ((int64_t)a[ 3]) * a[ 7]
+                 +  ((int64_t)a[ 4]) * a[ 6]) * 2
+                 +  ((int64_t)a[ 5]) * a[ 5];
+    int64_t t11  = (((int64_t)a[ 0]) * a[11]
+                 +  ((int64_t)a[ 1]) * a[10]
+                 +  ((int64_t)a[ 2]) * a[ 9]
+                 +  ((int64_t)a[ 3]) * a[ 8]
+                 +  ((int64_t)a[ 4]) * a[ 7]
+                 +  ((int64_t)a[ 5]) * a[ 6]) * 2;
+    int64_t t12  = (((int64_t)a[ 0]) * a[12]
+                 +  ((int64_t)a[ 1]) * a[11]
+                 +  ((int64_t)a[ 2]) * a[10]
+                 +  ((int64_t)a[ 3]) * a[ 9]
+                 +  ((int64_t)a[ 4]) * a[ 8]
+                 +  ((int64_t)a[ 5]) * a[ 7]) * 2
+                 +  ((int64_t)a[ 6]) * a[ 6];
+    int64_t t13  = (((int64_t)a[ 0]) * a[13]
+                 +  ((int64_t)a[ 1]) * a[12]
+                 +  ((int64_t)a[ 2]) * a[11]
+                 +  ((int64_t)a[ 3]) * a[10]
+                 +  ((int64_t)a[ 4]) * a[ 9]
+                 +  ((int64_t)a[ 5]) * a[ 8]
+                 +  ((int64_t)a[ 6]) * a[ 7]) * 2;
+    int64_t t14  = (((int64_t)a[ 0]) * a[14]
+                 +  ((int64_t)a[ 1]) * a[13]
+                 +  ((int64_t)a[ 2]) * a[12]
+                 +  ((int64_t)a[ 3]) * a[11]
+                 +  ((int64_t)a[ 4]) * a[10]
+                 +  ((int64_t)a[ 5]) * a[ 9]
+                 +  ((int64_t)a[ 6]) * a[ 8]) * 2
+                 +  ((int64_t)a[ 7]) * a[ 7];
+    int64_t t15  = (((int64_t)a[ 1]) * a[14]
+                 +  ((int64_t)a[ 2]) * a[13]
+                 +  ((int64_t)a[ 3]) * a[12]
+                 +  ((int64_t)a[ 4]) * a[11]
+                 +  ((int64_t)a[ 5]) * a[10]
+                 +  ((int64_t)a[ 6]) * a[ 9]
+                 +  ((int64_t)a[ 7]) * a[ 8]) * 2;
+    int64_t t16  = (((int64_t)a[ 2]) * a[14]
+                 +  ((int64_t)a[ 3]) * a[13]
+                 +  ((int64_t)a[ 4]) * a[12]
+                 +  ((int64_t)a[ 5]) * a[11]
+                 +  ((int64_t)a[ 6]) * a[10]
+                 +  ((int64_t)a[ 7]) * a[ 9]) * 2
+                 +  ((int64_t)a[ 8]) * a[ 8];
+    int64_t t17  = (((int64_t)a[ 3]) * a[14]
+                 +  ((int64_t)a[ 4]) * a[13]
+                 +  ((int64_t)a[ 5]) * a[12]
+                 +  ((int64_t)a[ 6]) * a[11]
+                 +  ((int64_t)a[ 7]) * a[10]
+                 +  ((int64_t)a[ 8]) * a[ 9]) * 2;
+    int64_t t18  = (((int64_t)a[ 4]) * a[14]
+                 +  ((int64_t)a[ 5]) * a[13]
+                 +  ((int64_t)a[ 6]) * a[12]
+                 +  ((int64_t)a[ 7]) * a[11]
+                 +  ((int64_t)a[ 8]) * a[10]) * 2
+                 +  ((int64_t)a[ 9]) * a[ 9];
+    int64_t t19  = (((int64_t)a[ 5]) * a[14]
+                 +  ((int64_t)a[ 6]) * a[13]
+                 +  ((int64_t)a[ 7]) * a[12]
+                 +  ((int64_t)a[ 8]) * a[11]
+                 +  ((int64_t)a[ 9]) * a[10]) * 2;
+    int64_t t20  = (((int64_t)a[ 6]) * a[14]
+                 +  ((int64_t)a[ 7]) * a[13]
+                 +  ((int64_t)a[ 8]) * a[12]
+                 +  ((int64_t)a[ 9]) * a[11]) * 2
+                 +  ((int64_t)a[10]) * a[10];
+    int64_t t21  = (((int64_t)a[ 7]) * a[14]
+                 +  ((int64_t)a[ 8]) * a[13]
+                 +  ((int64_t)a[ 9]) * a[12]
+                 +  ((int64_t)a[10]) * a[11]) * 2;
+    int64_t t22  = (((int64_t)a[ 8]) * a[14]
+                 +  ((int64_t)a[ 9]) * a[13]
+                 +  ((int64_t)a[10]) * a[12]) * 2
+                 +  ((int64_t)a[11]) * a[11];
+    int64_t t23  = (((int64_t)a[ 9]) * a[14]
+                 +  ((int64_t)a[10]) * a[13]
+                 +  ((int64_t)a[11]) * a[12]) * 2;
+    int64_t t24  = (((int64_t)a[10]) * a[14]
+                 +  ((int64_t)a[11]) * a[13]) * 2
+                 +  ((int64_t)a[12]) * a[12];
+    int64_t t25  = (((int64_t)a[11]) * a[14]
+                 +  ((int64_t)a[12]) * a[13]) * 2;
+    int64_t t26  = (((int64_t)a[12]) * a[14]) * 2
+                 +  ((int64_t)a[13]) * a[13];
+    int64_t t27  = (((int64_t)a[13]) * a[14]) * 2;
+    int64_t t28  =  ((int64_t)a[14]) * a[14];
+
+    t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
+    t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
+    t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
+    t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
+    t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
+    t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
+    t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
+    t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
+    t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
+    t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
+    t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
+    t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
+    t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
+    t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
+    t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
+    t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
+    t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
+    t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
+    t19  += t18 >> 26; r[18] = t18 & 0x3ffffff;
+    t20  += t19 >> 26; r[19] = t19 & 0x3ffffff;
+    t21  += t20 >> 26; r[20] = t20 & 0x3ffffff;
+    t22  += t21 >> 26; r[21] = t21 & 0x3ffffff;
+    t23  += t22 >> 26; r[22] = t22 & 0x3ffffff;
+    t24  += t23 >> 26; r[23] = t23 & 0x3ffffff;
+    t25  += t24 >> 26; r[24] = t24 & 0x3ffffff;
+    t26  += t25 >> 26; r[25] = t25 & 0x3ffffff;
+    t27  += t26 >> 26; r[26] = t26 & 0x3ffffff;
+    t28  += t27 >> 26; r[27] = t27 & 0x3ffffff;
+    r[29] = (sp_digit)(t28 >> 26);
+                       r[28] = t28 & 0x3ffffff;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_384_sqr_15(r, a);
+    sp_384_mont_reduce_15(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_15(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mont_sqr_15(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_384_mont_sqr_15(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+    0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_15(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 15);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_15(t, t, p384_mod, p384_mp_mod);
+        if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_384_mont_mul_15(t, t, a, p384_mod, p384_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 15);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 15;
+    sp_digit* t3 = td + 4 * 15;
+    sp_digit* t4 = td + 6 * 15;
+    sp_digit* t5 = td + 8 * 15;
+
+    /* 0x2 */
+    sp_384_mont_sqr_15(t1, a, p384_mod, p384_mp_mod);
+    /* 0x3 */
+    sp_384_mont_mul_15(t5, t1, a, p384_mod, p384_mp_mod);
+    /* 0xc */
+    sp_384_mont_sqr_n_15(t1, t5, 2, p384_mod, p384_mp_mod);
+    /* 0xf */
+    sp_384_mont_mul_15(t2, t5, t1, p384_mod, p384_mp_mod);
+    /* 0x1e */
+    sp_384_mont_sqr_15(t1, t2, p384_mod, p384_mp_mod);
+    /* 0x1f */
+    sp_384_mont_mul_15(t4, t1, a, p384_mod, p384_mp_mod);
+    /* 0x3e0 */
+    sp_384_mont_sqr_n_15(t1, t4, 5, p384_mod, p384_mp_mod);
+    /* 0x3ff */
+    sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x7fe0 */
+    sp_384_mont_sqr_n_15(t1, t2, 5, p384_mod, p384_mp_mod);
+    /* 0x7fff */
+    sp_384_mont_mul_15(t4, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x3fff8000 */
+    sp_384_mont_sqr_n_15(t1, t4, 15, p384_mod, p384_mp_mod);
+    /* 0x3fffffff */
+    sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffc */
+    sp_384_mont_sqr_n_15(t3, t2, 2, p384_mod, p384_mp_mod);
+    /* 0xfffffffd */
+    sp_384_mont_mul_15(r, t3, a, p384_mod, p384_mp_mod);
+    /* 0xffffffff */
+    sp_384_mont_mul_15(t3, t5, t3, p384_mod, p384_mp_mod);
+    /* 0xfffffffc0000000 */
+    sp_384_mont_sqr_n_15(t1, t2, 30, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff */
+    sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff000000000000000 */
+    sp_384_mont_sqr_n_15(t1, t2, 60, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+    sp_384_mont_sqr_n_15(t1, t2, 120, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+    sp_384_mont_sqr_n_15(t1, t2, 15, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+    sp_384_mont_sqr_n_15(t1, t2, 33, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+    sp_384_mont_mul_15(t2, t3, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_15(t1, t2, 96, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+    sp_384_mont_mul_15(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*15;
+    int32_t n;
+
+    sp_384_mont_inv_15(t1, p->z, t + 2*15);
+
+    sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_15(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    /* x /= z^2 */
+    sp_384_mont_mul_15(r->x, p->x, t2, p384_mod, p384_mp_mod);
+    XMEMSET(r->x + 15, 0, sizeof(r->x) / 2U);
+    sp_384_mont_reduce_15(r->x, p384_mod, p384_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_384_cmp_15(r->x, p384_mod);
+    sp_384_cond_sub_15(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(r->x);
+
+    /* y /= z^3 */
+    sp_384_mont_mul_15(r->y, p->y, t1, p384_mod, p384_mp_mod);
+    XMEMSET(r->y + 15, 0, sizeof(r->y) / 2U);
+    sp_384_mont_reduce_15(r->y, p384_mod, p384_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_384_cmp_15(r->y, p384_mod);
+    sp_384_cond_sub_15(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_15(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_15(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    r[ 0] = a[ 0] + b[ 0];
+    r[ 1] = a[ 1] + b[ 1];
+    r[ 2] = a[ 2] + b[ 2];
+    r[ 3] = a[ 3] + b[ 3];
+    r[ 4] = a[ 4] + b[ 4];
+    r[ 5] = a[ 5] + b[ 5];
+    r[ 6] = a[ 6] + b[ 6];
+    r[ 7] = a[ 7] + b[ 7];
+    r[ 8] = a[ 8] + b[ 8];
+    r[ 9] = a[ 9] + b[ 9];
+    r[10] = a[10] + b[10];
+    r[11] = a[11] + b[11];
+    r[12] = a[12] + b[12];
+    r[13] = a[13] + b[13];
+    r[14] = a[14] + b[14];
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_384_add_15(r, a, b);
+    sp_384_norm_15(r);
+    sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(r);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_384_add_15(r, a, a);
+    sp_384_norm_15(r);
+    sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(r);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_tpl_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_384_add_15(r, a, a);
+    sp_384_norm_15(r);
+    sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(r);
+    (void)sp_384_add_15(r, r, a);
+    sp_384_norm_15(r);
+    sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_15(r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    r[ 0] = a[ 0] - b[ 0];
+    r[ 1] = a[ 1] - b[ 1];
+    r[ 2] = a[ 2] - b[ 2];
+    r[ 3] = a[ 3] - b[ 3];
+    r[ 4] = a[ 4] - b[ 4];
+    r[ 5] = a[ 5] - b[ 5];
+    r[ 6] = a[ 6] - b[ 6];
+    r[ 7] = a[ 7] - b[ 7];
+    r[ 8] = a[ 8] - b[ 8];
+    r[ 9] = a[ 9] - b[ 9];
+    r[10] = a[10] - b[10];
+    r[11] = a[11] - b[11];
+    r[12] = a[12] - b[12];
+    r[13] = a[13] - b[13];
+    r[14] = a[14] - b[14];
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_384_cond_add_15(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    r[ 0] = a[ 0] + (b[ 0] & m);
+    r[ 1] = a[ 1] + (b[ 1] & m);
+    r[ 2] = a[ 2] + (b[ 2] & m);
+    r[ 3] = a[ 3] + (b[ 3] & m);
+    r[ 4] = a[ 4] + (b[ 4] & m);
+    r[ 5] = a[ 5] + (b[ 5] & m);
+    r[ 6] = a[ 6] + (b[ 6] & m);
+    r[ 7] = a[ 7] + (b[ 7] & m);
+    r[ 8] = a[ 8] + (b[ 8] & m);
+    r[ 9] = a[ 9] + (b[ 9] & m);
+    r[10] = a[10] + (b[10] & m);
+    r[11] = a[11] + (b[11] & m);
+    r[12] = a[12] + (b[12] & m);
+    r[13] = a[13] + (b[13] & m);
+    r[14] = a[14] + (b[14] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_sub_15(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_384_sub_15(r, a, b);
+    sp_384_cond_add_15(r, r, m, r[14] >> 20);
+    sp_384_norm_15(r);
+}
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r  Result of shift.
+ * a  Number to shift.
+ */
+SP_NOINLINE static void sp_384_rshift1_15(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<14; i++) {
+        r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+    }
+#else
+    r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff;
+    r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff;
+    r[2] = ((a[2] >> 1) | (a[3] << 25)) & 0x3ffffff;
+    r[3] = ((a[3] >> 1) | (a[4] << 25)) & 0x3ffffff;
+    r[4] = ((a[4] >> 1) | (a[5] << 25)) & 0x3ffffff;
+    r[5] = ((a[5] >> 1) | (a[6] << 25)) & 0x3ffffff;
+    r[6] = ((a[6] >> 1) | (a[7] << 25)) & 0x3ffffff;
+    r[7] = ((a[7] >> 1) | (a[8] << 25)) & 0x3ffffff;
+    r[8] = ((a[8] >> 1) | (a[9] << 25)) & 0x3ffffff;
+    r[9] = ((a[9] >> 1) | (a[10] << 25)) & 0x3ffffff;
+    r[10] = ((a[10] >> 1) | (a[11] << 25)) & 0x3ffffff;
+    r[11] = ((a[11] >> 1) | (a[12] << 25)) & 0x3ffffff;
+    r[12] = ((a[12] >> 1) | (a[13] << 25)) & 0x3ffffff;
+    r[13] = ((a[13] >> 1) | (a[14] << 25)) & 0x3ffffff;
+#endif
+    r[14] = a[14] >> 1;
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_384_cond_add_15(r, a, m, 0 - (a[0] & 1));
+    sp_384_norm_15(r);
+    sp_384_rshift1_15(r, r);
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*15;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod);
+    /* Z = Y * Z */
+    sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod);
+    /* Z = 2Z */
+    sp_384_mont_dbl_15(z, z, p384_mod);
+    /* T2 = X - T1 */
+    sp_384_mont_sub_15(t2, p->x, t1, p384_mod);
+    /* T1 = X + T1 */
+    sp_384_mont_add_15(t1, p->x, t1, p384_mod);
+    /* T2 = T1 * T2 */
+    sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod);
+    /* T1 = 3T2 */
+    sp_384_mont_tpl_15(t1, t2, p384_mod);
+    /* Y = 2Y */
+    sp_384_mont_dbl_15(y, p->y, p384_mod);
+    /* Y = Y * Y */
+    sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod);
+    /* T2 = Y * Y */
+    sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod);
+    /* T2 = T2/2 */
+    sp_384_div2_15(t2, t2, p384_mod);
+    /* Y = Y * X */
+    sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod);
+    /* X = T1 * T1 */
+    sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_15(x, x, y, p384_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_15(x, x, y, p384_mod);
+    /* Y = Y - X */
+    sp_384_mont_sub_15(y, y, x, p384_mod);
+    /* Y = Y * T1 */
+    sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod);
+    /* Y = Y - T2 */
+    sp_384_mont_sub_15(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_15(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+            (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11]) |
+            (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_15(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+        sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*15;
+    sp_digit* t3 = t + 4*15;
+    sp_digit* t4 = t + 6*15;
+    sp_digit* t5 = t + 8*15;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_384* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_384_sub_15(t1, p384_mod, q->y);
+    sp_384_norm_15(t1);
+    if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) &
+        (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_15(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<15; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<15; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<15; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t1, t1, x, p384_mod, p384_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_384_mont_mul_15(t3, t3, y, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - U1 */
+        sp_384_mont_sub_15(t2, t2, t1, p384_mod);
+        /* R = S2 - S1 */
+        sp_384_mont_sub_15(t4, t4, t3, p384_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_15(x, x, t5, p384_mod);
+        sp_384_mont_dbl_15(t1, y, p384_mod);
+        sp_384_mont_sub_15(x, x, t1, p384_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_384_mont_sub_15(y, y, x, p384_mod);
+        sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_15(y, y, t5, p384_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifdef WOLFSSL_SP_NO_MALLOC
+    sp_point_384 t[3];
+    sp_digit tmp[2 * 15 * 6];
+#else
+    sp_point_384* t;
+    sp_digit* tmp;
+#endif
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    (void)heap;
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+
+        /* t[0] = {0, 0, 1} * norm */
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
+    }
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod);
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod);
+
+    if (err == MP_OKAY) {
+        i = 14;
+        c = 20;
+        n = k[i--] << (26 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1)
+                    break;
+
+                n = k[i--];
+                c = 26;
+            }
+
+            y = (n >> 25) & 1;
+            n <<= 1;
+
+            sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                   ((size_t)&t[1] & addr_mask[y])),
+                    sizeof(sp_point_384));
+            sp_384_proj_point_dbl_15(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                            ((size_t)&t[1] & addr_mask[y])), &t[2],
+                    sizeof(sp_point_384));
+        }
+
+        if (map != 0) {
+            sp_384_map_15(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_384));
+        }
+    }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6);
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
+#endif
+
+    return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 t[3];
+    sp_digit tmp[2 * 15 * 6];
+#else
+    sp_point_384* t;
+    sp_digit* tmp;
+#endif
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    (void)heap;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
+        err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
+    }
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod);
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod);
+
+    if (err == MP_OKAY) {
+        i = 14;
+        c = 20;
+        n = k[i--] << (26 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1)
+                    break;
+
+                n = k[i--];
+                c = 26;
+            }
+
+            y = (n >> 25) & 1;
+            n <<= 1;
+
+            sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                 ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_384_proj_point_dbl_15(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                          ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+        }
+
+        if (map != 0) {
+            sp_384_map_15(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
+#endif
+
+    return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+    sp_digit x[15];
+    sp_digit y[15];
+} sp_table_entry_384;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td[16];
+    sp_point_384 rtd;
+    sp_digit tmpd[2 * 15 * 6];
+#endif
+    sp_point_384* t;
+    sp_point_384* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_15(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
+        (void)sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod);
+        (void)sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod);
+        t[1].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_384_proj_point_add_15(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_384_proj_point_add_15(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_384_proj_point_add_15(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_384_proj_point_add_15(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_384_proj_point_add_15(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_384_proj_point_add_15(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_384_proj_point_dbl_15(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_384_proj_point_add_15(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 13;
+        n = k[i+1] << 6;
+        c = 16;
+        y = n >> 22;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+        n <<= 10;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--] << (6 - c);
+                c += 26;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_384_proj_point_dbl_15(rt, rt, tmp);
+            sp_384_proj_point_dbl_15(rt, rt, tmp);
+            sp_384_proj_point_dbl_15(rt, rt, tmp);
+            sp_384_proj_point_dbl_15(rt, rt, tmp);
+
+            sp_384_proj_point_add_15(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_384_map_15(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_384_point_free_15(rt, 1, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*15;
+    sp_digit* b = t + 4*15;
+    sp_digit* t1 = t + 6*15;
+    sp_digit* t2 = t + 8*15;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_15(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_15(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_15(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_15(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_15(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_15(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod);
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_15(t2, b, p384_mod);
+        sp_384_mont_sub_15(x, x, t2, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod);
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_15(w, w, t1, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_15(y, b, x, p384_mod);
+        sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_15(y, y, p384_mod);
+        sp_384_mont_sub_15(y, y, t1, p384_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_384_mont_sqr_15(t1, x, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_15(t1, t1, w, p384_mod);
+    sp_384_mont_tpl_15(a, t1, p384_mod);
+    /* B = X*Y^2 */
+    sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod);
+    /* X = A^2 - 2B */
+    sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_15(t2, b, p384_mod);
+    sp_384_mont_sub_15(x, x, t2, p384_mod);
+    /* Z = Z*Y */
+    sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod);
+    /* t2 = Y^4 */
+    sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_384_mont_sub_15(y, b, x, p384_mod);
+    sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_15(y, y, p384_mod);
+    sp_384_mont_sub_15(y, y, t1, p384_mod);
+#endif
+    /* Y = Y/2 */
+    sp_384_div2_15(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p,
+        const sp_point_384* q, sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*15;
+    sp_digit* t3 = t + 4*15;
+    sp_digit* t4 = t + 6*15;
+    sp_digit* t5 = t + 8*15;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_384_sub_15(t1, p384_mod, q->y);
+    sp_384_norm_15(t1);
+    if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) &
+        (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_15(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<15; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<15; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<15; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - X1 */
+        sp_384_mont_sub_15(t2, t2, x, p384_mod);
+        /* R = S2 - Y1 */
+        sp_384_mont_sub_15(t4, t4, y, p384_mod);
+        /* Z3 = H*Z1 */
+        sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_384_mont_sqr_15(t1, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t3, x, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_15(x, t1, t5, p384_mod);
+        sp_384_mont_dbl_15(t1, t3, p384_mod);
+        sp_384_mont_sub_15(x, x, t1, p384_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_384_mont_sub_15(t3, t3, x, p384_mod);
+        sp_384_mont_mul_15(t3, t3, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(t5, t5, y, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_15(y, t3, t5, p384_mod);
+    }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_384_proj_to_affine_15(sp_point_384* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 15;
+    sp_digit* tmp = t + 4 * 15;
+
+    sp_384_mont_inv_15(t1, a->z, tmp);
+
+    sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_15(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    sp_384_mont_mul_15(a->x, a->x, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_15(a->y, a->y, t1, p384_mod, p384_mp_mod);
+    XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_15(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_15(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_15(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_15(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_15(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_15(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_384_proj_point_dbl_n_15(t, 48, tmp);
+            sp_384_proj_to_affine_15(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_15(t, s1, s2, tmp);
+                sp_384_proj_to_affine_15(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_15(s2, 0, heap);
+    sp_384_point_free_15(s1, 0, heap);
+    sp_384_point_free_15( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_15(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 15 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_15(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=47; j<8; j++,x+=48) {
+            y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=46; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=48) {
+                y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_15(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_15(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_15(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(p, 0, heap);
+    sp_384_point_free_15(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[15];
+    sp_digit y[15];
+    sp_table_entry_384 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_15(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_15(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_15(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 15 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_15(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_15(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_15(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[15];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_15(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 15, km);
+        sp_384_point_from_ecc_point_15(point, gm);
+
+            err = sp_384_ecc_mulmod_15(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_15(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x1c0b528,0x01d5992,0x0e383dd,0x38a835b,0x220e378,0x106d35b,
+        0x1c3afc5,0x03bfe1e,0x28459a3,0x2d91521,0x214ede2,0x0bfdc8d,
+        0x2151381,0x3708a67,0x004d3aa },
+      { 0x303a4fe,0x10f6b52,0x29ac230,0x2fdeed2,0x0a1bfa8,0x3a0ec14,
+        0x2de7562,0x3ff662e,0x21968f4,0x031b0d4,0x3969a84,0x2000898,
+        0x1c5e9dd,0x2f09685,0x002b78a } },
+    /* 2 */
+    { { 0x30c535b,0x191d4ca,0x2296298,0x14dc141,0x090dd69,0x05aae6b,
+        0x0cd6b42,0x35da80e,0x3b7be12,0x2cf7e6d,0x1f347bd,0x3d365e1,
+        0x1448913,0x32704fa,0x00222c5 },
+      { 0x280dc64,0x39e5bc9,0x24175f8,0x2dd60d4,0x0120e7c,0x041d02e,
+        0x0b5d8ad,0x37b9895,0x2fb5337,0x1f0e2e3,0x14f0224,0x2230b86,
+        0x1bc4cf6,0x17cdb09,0x007b5c7 } },
+    /* 3 */
+    { { 0x2dffea5,0x28f30e7,0x29fce26,0x070df5f,0x235bbfd,0x2f78fbd,
+        0x27700d9,0x23d6bc3,0x3471a53,0x0c0e03a,0x05bf9eb,0x276a2ec,
+        0x20c3e2e,0x31cc691,0x00dbb93 },
+      { 0x126b605,0x2e8983d,0x153737d,0x23bf5e1,0x295d497,0x35ca812,
+        0x2d793ae,0x16c6893,0x3777600,0x089a520,0x1e681f8,0x3d55ee6,
+        0x154ef99,0x155f592,0x00ae5f9 } },
+    /* 4 */
+    { { 0x26feef9,0x20315fc,0x1240244,0x250e838,0x3c31a26,0x1cf8af1,
+        0x1002c32,0x3b531cd,0x1c53ef1,0x22310ba,0x3f4948e,0x22eafd9,
+        0x3863202,0x3d0e2a5,0x006a502 },
+      { 0x34536fe,0x04e91ad,0x30ebf5f,0x2af62a7,0x01d218b,0x1c8c9da,
+        0x336bcc3,0x23060c3,0x331576e,0x1b14c5e,0x1bbcb76,0x0755e9a,
+        0x3d4dcef,0x24c2cf8,0x00917c4 } },
+    /* 5 */
+    { { 0x349ddd0,0x09b8bb8,0x0250114,0x3e66cbf,0x29f117e,0x3005d29,
+        0x36b480e,0x2119bfc,0x2761845,0x253d2f7,0x0580604,0x0bb6db4,
+        0x3ca922f,0x1744677,0x008adc7 },
+      { 0x3d5a7ce,0x27425ed,0x11e9a61,0x3968d10,0x3874275,0x3692d3b,
+        0x03e0470,0x0763d50,0x3d97790,0x3cbaeab,0x2747170,0x18faf3a,
+        0x180365e,0x2511fe7,0x0012a36 } },
+    /* 6 */
+    { { 0x3c52870,0x2701e93,0x296128f,0x120694e,0x1ce0b37,0x3860a36,
+        0x10fa180,0x0896b55,0x2f76adb,0x22892ae,0x2e58a34,0x07b4295,
+        0x2cb62d1,0x079a522,0x00f3d81 },
+      { 0x061ed22,0x2375dd3,0x3c9d861,0x3e602d1,0x10bb747,0x39ae156,
+        0x3f796fd,0x087a48a,0x06d680a,0x37f7f47,0x2af2c9d,0x36c55dc,
+        0x10f3dc0,0x279b07a,0x00a0937 } },
+    /* 7 */
+    { { 0x085c629,0x319bbf8,0x089a386,0x184256f,0x15fc2a4,0x00fd2d0,
+        0x13d6312,0x363d44d,0x32b7e4b,0x25f2865,0x27df8ce,0x1dce02a,
+        0x24ea3b0,0x0e27b9f,0x00d8a90 },
+      { 0x3b14461,0x1d371f9,0x0f781bc,0x0503271,0x0dc2cb0,0x13bc284,
+        0x34b3a68,0x1ff894a,0x25d2032,0x16f79ba,0x260f961,0x07b10d5,
+        0x18173b7,0x2812e2b,0x00eede5 } },
+    /* 8 */
+    { { 0x13b9a2d,0x132ece2,0x0c5d558,0x02c0214,0x1820c66,0x37cb50f,
+        0x26d8267,0x3a00504,0x3f00109,0x33756ee,0x38172f1,0x2e4bb8c,
+        0x030d985,0x3e4fcc5,0x00609d4 },
+      { 0x2daf9d6,0x16681fa,0x1fb01e0,0x1b03c49,0x370e653,0x183c839,
+        0x2207515,0x0ea6b58,0x1ae7aaf,0x3a96522,0x24bae14,0x1c38bd9,
+        0x082497b,0x1c05db4,0x000dd03 } },
+    /* 9 */
+    { { 0x110521f,0x04efa21,0x0c174cc,0x2a7dc93,0x387315b,0x14f7098,
+        0x1d83bb3,0x2495ed2,0x2fe0c27,0x1e2d9df,0x093c953,0x0287073,
+        0x02c9951,0x336291c,0x0033e30 },
+      { 0x208353f,0x3f22748,0x2b2bf0f,0x2373b50,0x10170fa,0x1b8a97d,
+        0x0851ed2,0x0b25824,0x055ecb5,0x12049d9,0x3fe1adf,0x11b1385,
+        0x28eab06,0x11fac21,0x00513f0 } },
+    /* 10 */
+    { { 0x35bdf53,0x1847d37,0x1a6dc07,0x29d62c4,0x045d331,0x313b8e5,
+        0x165daf1,0x1e34562,0x3e75a58,0x16ea2fa,0x02dd302,0x3302862,
+        0x3eb8bae,0x2266a48,0x00cf2a3 },
+      { 0x24fd048,0x324a074,0x025df98,0x1662eec,0x3841bfb,0x26ae754,
+        0x1df8cec,0x0113ae3,0x0b67fef,0x094e293,0x2323666,0x0ab087c,
+        0x2f06509,0x0e142d9,0x00a919d } },
+    /* 11 */
+    { { 0x1d480d8,0x00ed021,0x3a7d3db,0x1e46ca1,0x28cd9f4,0x2a3ceeb,
+        0x24dc754,0x0624a3c,0x0003db4,0x1520bae,0x1c56e0f,0x2fe7ace,
+        0x1dc6f38,0x0c826a4,0x008b977 },
+      { 0x209cfc2,0x2c16c9c,0x1b70a31,0x21416cb,0x34c49bf,0x186549e,
+        0x062498d,0x146e959,0x0391fac,0x08ff944,0x2b4b834,0x013d57a,
+        0x2eabffb,0x0370131,0x00c07c1 } },
+    /* 12 */
+    { { 0x332f048,0x0bf9336,0x16dfad2,0x2451d7b,0x35f23bf,0x299adb2,
+        0x0ce0c0a,0x0170294,0x289f034,0x2b7d89e,0x395e2d6,0x1d20df7,
+        0x2e64e36,0x16dae90,0x00081c9 },
+      { 0x31d6ceb,0x0f80db9,0x0271eba,0x33db1ac,0x1b45bcc,0x1a11c07,
+        0x347e630,0x148fd9e,0x142e712,0x3183e3e,0x1cd47ad,0x108d1c9,
+        0x09cbb82,0x35e61d9,0x0083027 } },
+    /* 13 */
+    { { 0x215b0b8,0x0a7a98d,0x2c41b39,0x3f69536,0x0b41441,0x16da8da,
+        0x15d556b,0x3c17a26,0x129167e,0x3ea0351,0x2d25a27,0x2f2d285,
+        0x15b68f6,0x2931ef5,0x00210d6 },
+      { 0x1351130,0x012aec9,0x37ebf38,0x26640f8,0x01d2df6,0x2130972,
+        0x201efc0,0x23a457c,0x087a1c6,0x14c68a3,0x163f62a,0x36b494d,
+        0x015d481,0x39c35b1,0x005dd6d } },
+    /* 14 */
+    { { 0x06612ce,0x11c3f61,0x199729f,0x3b36863,0x2986f3e,0x3cd2be1,
+        0x04c1612,0x2be2dae,0x00846dd,0x3d7bc29,0x249e795,0x1016803,
+        0x37a3714,0x2c5aa8b,0x005f491 },
+      { 0x341b38d,0x01eb936,0x3caac7f,0x27863ef,0x1ef7d11,0x1110ec6,
+        0x18e0761,0x26498e8,0x01a79a1,0x390d5a1,0x22226fb,0x3d2a473,
+        0x0872191,0x1230f32,0x00dc772 } },
+    /* 15 */
+    { { 0x0b1ec9d,0x03fc6b9,0x3706d57,0x03b9fbb,0x221d23e,0x2867821,
+        0x1e40f4c,0x2c9c0f3,0x3c4cd4b,0x31f5948,0x3f13aa6,0x307c1b2,
+        0x04b6016,0x116b453,0x005aa72 },
+      { 0x0b74de8,0x20519d1,0x134e37f,0x05d882a,0x1839e7a,0x3a2c6a8,
+        0x0d14e8d,0x1d78bdd,0x251f30d,0x3a1e27e,0x081c261,0x2c9014b,
+        0x165ee09,0x19e0cf1,0x00654e2 } },
+    /* 16 */
+    { { 0x39fbe67,0x081778b,0x0e44378,0x20dfdca,0x1c4afcb,0x20b803c,
+        0x0ec06c6,0x1508f6f,0x1c3114d,0x3bca851,0x3a52463,0x07661d1,
+        0x17b0aa0,0x16c5f5c,0x00fc093 },
+      { 0x0d01f95,0x0ef13f5,0x2d34965,0x2a25582,0x39aa83e,0x3e38fcf,
+        0x3943dca,0x385bbdd,0x210e86f,0x3dc1dd2,0x3f9ffdc,0x18b9bc6,
+        0x345c96b,0x0e79621,0x008a72f } },
+    /* 17 */
+    { { 0x341c342,0x3793688,0x042273a,0x153a9c1,0x3dd326e,0x1d073bc,
+        0x2c7d983,0x05524cd,0x00d59e6,0x347abe8,0x3d9a3ef,0x0fb624a,
+        0x2c7e4cd,0x09b3171,0x0003faf },
+      { 0x045f8ac,0x38bf3cc,0x1e73087,0x0c85d3c,0x314a655,0x382be69,
+        0x384f28f,0x24d6cb3,0x2842cdc,0x1777f5e,0x2929c89,0x03c45ed,
+        0x3cfcc4c,0x0b59322,0x0035657 } },
+    /* 18 */
+    { { 0x18c1bba,0x2eb005f,0x33d57ec,0x30e42c3,0x36058f9,0x1865f43,
+        0x2116e3f,0x2c4a2bb,0x0684033,0x0f1375c,0x0209b98,0x2136e9b,
+        0x1bc4af0,0x0b3e0c7,0x0097c7c },
+      { 0x16010e8,0x398777e,0x2a172f4,0x0814a7e,0x0d97e4e,0x274dfc8,
+        0x2666606,0x1b5c93b,0x1ed3d36,0x3f3304e,0x13488e0,0x02dbb88,
+        0x2d53369,0x3717ce9,0x007cad1 } },
+    /* 19 */
+    { { 0x257a41f,0x2a6a076,0x39b6660,0x04bb000,0x1e74a04,0x3876b45,
+        0x343c6b5,0x0753108,0x3f54668,0x24a13cf,0x23749e8,0x0421fc5,
+        0x32f13b5,0x0f31be7,0x00070f2 },
+      { 0x1186e14,0x0847697,0x0dff542,0x0dff76c,0x084748f,0x2c7d060,
+        0x23aab4d,0x0b43906,0x27ba640,0x1497b59,0x02f5835,0x0a492a4,
+        0x0a6892f,0x39f3e91,0x005844e } },
+    /* 20 */
+    { { 0x33b236f,0x02181cf,0x21dafab,0x0760788,0x019e9d4,0x249ed0a,
+        0x36571e3,0x3c7dbcf,0x1337550,0x010d22a,0x285e62f,0x19ee65a,
+        0x052bf71,0x1d65fd5,0x0062d43 },
+      { 0x2955926,0x3fae7bc,0x0353d85,0x07db7de,0x1440a56,0x328dad6,
+        0x1668ec9,0x28058e2,0x1a1a22d,0x1014afc,0x3609325,0x3effdcb,
+        0x209f3bd,0x3ca3888,0x0094e50 } },
+    /* 21 */
+    { { 0x062e8af,0x0b96ccc,0x136990b,0x1d7a28f,0x1a85723,0x0076dec,
+        0x21b00b2,0x06a88ff,0x2f0ee65,0x1fa49b7,0x39b10ad,0x10b26fa,
+        0x0be7465,0x026e8bf,0x00098e3 },
+      { 0x3f1d63f,0x37bacff,0x1374779,0x02882ff,0x323d0e8,0x1da3de5,
+        0x12bb3b8,0x0a15a11,0x34d1f95,0x2b3dd6e,0x29ea3fa,0x39ad000,
+        0x33a538f,0x390204d,0x0012bd3 } },
+    /* 22 */
+    { { 0x04cbba5,0x0de0344,0x1d4cc02,0x11fe8d7,0x36207e7,0x32a6da8,
+        0x0239281,0x1ec40d7,0x3e89798,0x213fc66,0x0022eee,0x11daefe,
+        0x3e74db8,0x28534ee,0x00aa0a4 },
+      { 0x07d4543,0x250cc46,0x206620f,0x1c1e7db,0x1321538,0x31fa0b8,
+        0x30f74ea,0x01aae0e,0x3a2828f,0x3e9dd22,0x026ef35,0x3c0a62b,
+        0x27dbdc5,0x01c23a6,0x000f0c5 } },
+    /* 23 */
+    { { 0x2f029dd,0x3091337,0x21b80c5,0x21e1419,0x13dabc6,0x3847660,
+        0x12b865f,0x36eb666,0x38f6274,0x0ba6006,0x098da24,0x1398c64,
+        0x13d08e5,0x246a469,0x009929a },
+      { 0x1285887,0x3ff5c8d,0x010237b,0x097c506,0x0bc7594,0x34b9b88,
+        0x00cc35f,0x0bb964a,0x00cfbc4,0x29cd718,0x0837619,0x2b4a192,
+        0x0c57bb7,0x08c69de,0x00a3627 } },
+    /* 24 */
+    { { 0x1361ed8,0x266d724,0x366cae7,0x1d5b18c,0x247d71b,0x2c9969a,
+        0x0dd5211,0x1edd153,0x25998d7,0x0380856,0x3ab29db,0x09366de,
+        0x1e53644,0x2b31ff6,0x008b0ff },
+      { 0x3b5d9ef,0x217448d,0x174746d,0x18afea4,0x15b106d,0x3e66e8b,
+        0x0479f85,0x13793b4,0x1231d10,0x3c39bce,0x25e8983,0x2a13210,
+        0x05a7083,0x382be04,0x00a9507 } },
+    /* 25 */
+    { { 0x0cf381c,0x1a29b85,0x31ccf6c,0x2f708b8,0x3af9d27,0x2a29732,
+        0x168d4da,0x393488d,0x2c0e338,0x3f90c7b,0x0f52ad1,0x2a0a3fa,
+        0x2cd80f1,0x15e7a1a,0x00db6a0 },
+      { 0x107832a,0x159cb91,0x1289288,0x17e21f9,0x073fc27,0x1584342,
+        0x3802780,0x3d6c197,0x154075f,0x16366d1,0x09f712b,0x23a3ec4,
+        0x29cf23a,0x3218baf,0x0039f0a } },
+    /* 26 */
+    { { 0x052edf5,0x2afde13,0x2e53d8f,0x3969626,0x3dcd737,0x1e46ac5,
+        0x118bf0d,0x01b2652,0x156bcff,0x16d7ef6,0x1ca46d4,0x34c0cbb,
+        0x3e486f6,0x1f85068,0x002cdff },
+      { 0x1f47ec8,0x12cee98,0x0608667,0x18fbbe1,0x08a8821,0x31a1fe4,
+        0x17c7054,0x3c89e89,0x2edf6cd,0x1b8c32c,0x3f6ea84,0x1319329,
+        0x3cd3c2c,0x05f331a,0x00186fa } },
+    /* 27 */
+    { { 0x1fcb91e,0x0fd4d87,0x358a48a,0x04d91b4,0x083595e,0x044a1e6,
+        0x15827b9,0x1d5eaf4,0x2b82187,0x08f3984,0x21bd737,0x0c54285,
+        0x2f56887,0x14c2d98,0x00f4684 },
+      { 0x01896f6,0x0e542d0,0x2090883,0x269dfcf,0x1e11cb8,0x239fd29,
+        0x312cac4,0x19dfacb,0x369f606,0x0cc4f75,0x16579f9,0x33c22cc,
+        0x0f22bfd,0x3b251ae,0x006429c } },
+    /* 28 */
+    { { 0x375f9a4,0x137552e,0x3570498,0x2e4a74e,0x24aef06,0x35b9307,
+        0x384ca23,0x3bcd6d7,0x011b083,0x3c93187,0x392ca9f,0x129ce48,
+        0x0a800ce,0x145d9cc,0x00865d6 },
+      { 0x22b4a2b,0x37f9d9c,0x3e0eca3,0x3e5ec20,0x112c04b,0x2e1ae29,
+        0x3ce5b51,0x0f83200,0x32d6a7e,0x10ff1d8,0x081adbe,0x265c30b,
+        0x216b1c8,0x0eb4483,0x003cbcd } },
+    /* 29 */
+    { { 0x030ce93,0x2d331fb,0x20a2fbf,0x1f6dc9c,0x010ed6c,0x1ed5540,
+        0x275bf74,0x3df0fb1,0x103333f,0x0241c96,0x1075bfc,0x30e5cf9,
+        0x0f31bc7,0x32c01eb,0x00b049e },
+      { 0x358839c,0x1dbabd3,0x1e4fb40,0x36a8ac1,0x2101896,0x2d0319b,
+        0x2033b0a,0x192e8fd,0x2ebc8d8,0x2867ba7,0x07bf6d2,0x1b3c555,
+        0x2477deb,0x198fe09,0x008e5a9 } },
+    /* 30 */
+    { { 0x3fbd5e1,0x18bf77d,0x2b1d69e,0x151da44,0x338ecfe,0x0768efe,
+        0x1a3d56d,0x3c35211,0x10e1c86,0x2012525,0x3bc36ce,0x32b6fe4,
+        0x0c8d183,0x15c93f3,0x0041fce },
+      { 0x332c144,0x24e70a0,0x246e05f,0x22c21c7,0x2b17f24,0x1ba2bfd,
+        0x0534e26,0x318a4f6,0x1dc3b85,0x0c741bc,0x23131b7,0x01a8cba,
+        0x364e5db,0x21362cf,0x00f2951 } },
+    /* 31 */
+    { { 0x2ddc103,0x14ffdcd,0x206fd96,0x0de57bd,0x025f43e,0x381b73a,
+        0x2301fcf,0x3bafc27,0x34130b6,0x0216bc8,0x0ff56b2,0x2c4ad4c,
+        0x23c6b79,0x1267fa6,0x009b4fb },
+      { 0x1d27ac2,0x13e2494,0x1389015,0x38d5b29,0x2d33167,0x3f01969,
+        0x28ec1fa,0x1b26de0,0x2587f74,0x1c25668,0x0c44f83,0x23c6f8c,
+        0x32fdbb1,0x045f104,0x00a7946 } },
+    /* 32 */
+    { { 0x23c647b,0x09addd7,0x1348c04,0x0e633c1,0x1bfcbd9,0x1cb034f,
+        0x1312e31,0x11cdcc7,0x1e6ee75,0x057d27f,0x2da7ee6,0x154c3c1,
+        0x3a5fb89,0x2c2ba2c,0x00cf281 },
+      { 0x1b8a543,0x125cd50,0x1d30fd1,0x29cc203,0x341a625,0x14e4233,
+        0x3aae076,0x289e38a,0x036ba02,0x230f405,0x3b21b8f,0x34088b9,
+        0x01297a0,0x03a75fb,0x00fdc27 } },
+    /* 33 */
+    { { 0x07f41d6,0x1cf032f,0x1641008,0x0f86deb,0x3d97611,0x0e110fe,
+        0x136ff42,0x0b914a9,0x0e241e6,0x180c340,0x1f545fc,0x0ba619d,
+        0x1208c53,0x04223a4,0x00cd033 },
+      { 0x397612c,0x0132665,0x34e2d1a,0x00bba99,0x1d4393e,0x065d0a8,
+        0x2fa69ee,0x1643b55,0x08085f0,0x3774aad,0x08a2243,0x33bf149,
+        0x03f41a5,0x1ed950e,0x0048cc6 } },
+    /* 34 */
+    { { 0x014ab48,0x010c3bf,0x2a744e5,0x13c99c1,0x2195b7f,0x32207fd,
+        0x28a228c,0x004f4bf,0x0e2d945,0x2ec6e5a,0x0b92162,0x1aa95e5,
+        0x2754a93,0x1adcd93,0x004fb76 },
+      { 0x1e1ff7f,0x24ef28c,0x269113f,0x32b393c,0x2696eb5,0x0ac2780,
+        0x354bf8a,0x0ffe3fd,0x09ce58e,0x0163c4f,0x1678c0b,0x15cd1bc,
+        0x292b3b7,0x036ea19,0x00d5420 } },
+    /* 35 */
+    { { 0x1da1265,0x0c2ef5b,0x18dd9a0,0x3f3a25c,0x0f7b4f3,0x0d8196e,
+        0x24931f9,0x090729a,0x1875f72,0x1ef39cb,0x2577585,0x2ed472d,
+        0x136756c,0x20553a6,0x00c7161 },
+      { 0x2e32189,0x283de4b,0x00b2e81,0x0989df7,0x3ef2fab,0x1c7d1a7,
+        0x24f6feb,0x3e16679,0x233dfda,0x06d1233,0x3e6b5df,0x1707132,
+        0x05f7b3f,0x2c00779,0x00fb8df } },
+    /* 36 */
+    { { 0x15bb921,0x117e9d3,0x267ec73,0x2f934ad,0x25c7e04,0x20b5e8f,
+        0x2d3a802,0x2ca911f,0x3f87e47,0x39709dd,0x08488e2,0x2cec400,
+        0x35b4589,0x1f0acba,0x009aad7 },
+      { 0x2ac34ae,0x06f29f6,0x3326d68,0x3949abe,0x02452e4,0x0687b85,
+        0x0879244,0x1eb7832,0x0d4c240,0x31d0ec1,0x3c17a2a,0x17a666f,
+        0x01a06cb,0x3e0929c,0x004dca2 } },
+    /* 37 */
+    { { 0x127bc1a,0x0c72984,0x13be68e,0x26c5fab,0x1a3edd5,0x097d685,
+        0x36b645e,0x385799e,0x394a420,0x39d8885,0x0b1e872,0x13f60ed,
+        0x2ce1b79,0x3c0ecb7,0x007cab3 },
+      { 0x29b3586,0x26fc572,0x0bd7711,0x0913494,0x0a55459,0x31af3c9,
+        0x3633eac,0x3e2105c,0x0c2b1b6,0x0e6f4c2,0x047d38c,0x2b81bd5,
+        0x1fe1c3b,0x04d7cd0,0x0054dcc } },
+    /* 38 */
+    { { 0x03caf0d,0x0d66365,0x313356d,0x2a4897f,0x2ce044e,0x18feb7a,
+        0x1f6a7c5,0x3709e7b,0x14473e8,0x2d8cbae,0x3190dca,0x12d19f8,
+        0x31e3181,0x3cc5b6e,0x002d4f4 },
+      { 0x143b7ca,0x2604728,0x39508d6,0x0cb79f3,0x24ec1ac,0x1ed7fa0,
+        0x3ab5fd3,0x3c76488,0x2e49390,0x03a0985,0x3580461,0x3fd2c81,
+        0x308f0ab,0x38561d6,0x0011b9b } },
+    /* 39 */
+    { { 0x3be682c,0x0c68f4e,0x32dd4ae,0x099d3bb,0x0bc7c5d,0x311f750,
+        0x2fd10a3,0x2e7864a,0x23bc14a,0x13b1f82,0x32e495e,0x1b0f746,
+        0x3cd856a,0x17a4c26,0x00085ee },
+      { 0x02e67fd,0x06a4223,0x2af2f38,0x2038987,0x132083a,0x1b7bb85,
+        0x0d6a499,0x131e43f,0x3035e52,0x278ee3e,0x1d5b08b,0x30d8364,
+        0x2719f8d,0x0b21fc9,0x003a06e } },
+    /* 40 */
+    { { 0x237cac0,0x27d6a1c,0x27945cd,0x2750d61,0x293f0b5,0x253db13,
+        0x04a764e,0x20b4d0e,0x12bb627,0x160c13b,0x0de0601,0x236e2cf,
+        0x2190f0b,0x354d76f,0x004336d },
+      { 0x2ab473a,0x10d54e4,0x1046574,0x1d6f97b,0x0031c72,0x06426a9,
+        0x38678c2,0x0b76cf9,0x04f9920,0x152adf8,0x2977e63,0x1234819,
+        0x198be26,0x061024c,0x00d427d } },
+    /* 41 */
+    { { 0x39b5a31,0x2123d43,0x362a822,0x1a2eab6,0x0bb0034,0x0d5d567,
+        0x3a04723,0x3a10c8c,0x08079ae,0x0d27bda,0x2eb9e1e,0x2619e82,
+        0x39a55a8,0x0c6c7db,0x00c1519 },
+      { 0x174251e,0x13ac2eb,0x295ed26,0x18d2afc,0x037b9b2,0x1258344,
+        0x00921b0,0x1f702d8,0x1bc4da7,0x1c3794f,0x12b1869,0x366eacf,
+        0x16ddf01,0x31ebdc5,0x00ad54e } },
+    /* 42 */
+    { { 0x1efdc58,0x1370d5e,0x0ddb8e7,0x1a53fda,0x1456bd3,0x0c825a9,
+        0x0e74ccd,0x20f41c9,0x3423867,0x139073f,0x3c70d8a,0x131fc85,
+        0x219a2a0,0x34bf986,0x0041199 },
+      { 0x1c05dd2,0x268f80a,0x3da9d38,0x1af9f8f,0x0535f2a,0x30ad37e,
+        0x2cf72d7,0x14a509b,0x1f4fe74,0x259e09d,0x1d23f51,0x0672732,
+        0x08fc463,0x00b6201,0x001e05a } },
+    /* 43 */
+    { { 0x0d5ffe8,0x3238bb5,0x17f275c,0x25b6fa8,0x2f8bb48,0x3b8f2d2,
+        0x059790c,0x18594d4,0x285a47c,0x3d301bb,0x12935d2,0x23ffc96,
+        0x3d7c7f9,0x15c8cbf,0x0034c4a },
+      { 0x20376a2,0x05201ba,0x1e02c4b,0x1413c45,0x02ea5e7,0x39575f0,
+        0x2d76e21,0x113694c,0x011f310,0x0da3725,0x31b7799,0x1cb9195,
+        0x0cfd592,0x22ee4ea,0x00adaa3 } },
+    /* 44 */
+    { { 0x14ed72a,0x031c49f,0x39a34bf,0x192e87d,0x0da0e92,0x130e7a9,
+        0x00258bf,0x144e123,0x2d82a71,0x0294e53,0x3f06c66,0x3d4473a,
+        0x037cd4a,0x3bbfb17,0x00fcebc },
+      { 0x39ae8c1,0x2dd6a9d,0x206ef23,0x332b479,0x2deff59,0x09d5720,
+        0x3526fd2,0x33bf7cf,0x344bb32,0x359316a,0x115bdef,0x1b8468a,
+        0x3813ea9,0x11a8450,0x00ab197 } },
+    /* 45 */
+    { { 0x0837d7d,0x1e1617b,0x0ba443c,0x2f2e3b8,0x2ca5b6f,0x176ed7b,
+        0x2924d9d,0x07294d3,0x104bb4f,0x1cfd3e8,0x398640f,0x1162dc8,
+        0x007ea15,0x2aa75fd,0x004231f },
+      { 0x16e6896,0x01987be,0x0f9d53e,0x1a740ec,0x1554e4c,0x31e1634,
+        0x3cb07b9,0x013eb53,0x39352cb,0x1dfa549,0x0974e7f,0x17c55d2,
+        0x157c85f,0x1561adb,0x002e3fa } },
+    /* 46 */
+    { { 0x29951a8,0x35200da,0x2ad042c,0x22109e4,0x3a8b15b,0x2eca69c,
+        0x28bcf9a,0x0cfa063,0x0924099,0x12ff668,0x2fb88dc,0x028d653,
+        0x2445876,0x218d01c,0x0014418 },
+      { 0x1caedc7,0x295bba6,0x01c9162,0x3364744,0x28fb12e,0x24c80b6,
+        0x2719673,0x35e5ba9,0x04aa4cc,0x206ab23,0x1cf185a,0x2c140d8,
+        0x1095a7d,0x1b3633f,0x000c9f8 } },
+    /* 47 */
+    { { 0x0b2a556,0x0a051c4,0x30b29a7,0x190c9ed,0x3767ca9,0x38de66d,
+        0x2d9e125,0x3aca813,0x2dc22a3,0x319e074,0x0d9450a,0x3445bac,
+        0x3e08a5b,0x07f29fa,0x00eccac },
+      { 0x02d6e94,0x21113f7,0x321bde6,0x0a4d7b3,0x03621f4,0x2780e8b,
+        0x22d5432,0x1fc2853,0x0d57d3e,0x254f90b,0x33ed00b,0x289b025,
+        0x12272bb,0x30e715f,0x0000297 } },
+    /* 48 */
+    { { 0x0243a7d,0x2aac42e,0x0c5b3aa,0x0fa3e96,0x06eeef9,0x2b9fdd9,
+        0x26fca39,0x0134fe1,0x22661ab,0x1990416,0x03945d6,0x15e3628,
+        0x3848ca3,0x0f91e46,0x00b08cd },
+      { 0x16d2411,0x3717e1d,0x128c45e,0x3669d54,0x0d4a790,0x2797da8,
+        0x0f09634,0x2faab0b,0x27df649,0x3b19b49,0x0467039,0x39b65a2,
+        0x3816f3c,0x31ad0bd,0x0050046 } },
+    /* 49 */
+    { { 0x2425043,0x3858099,0x389092a,0x3f7c236,0x11ff66a,0x3c58b39,
+        0x2f5a7f8,0x1663ce1,0x2a0fcf5,0x38634b7,0x1a8ca18,0x0dcace8,
+        0x0e6f778,0x03ae334,0x00df0d2 },
+      { 0x1bb4045,0x357875d,0x14b77ed,0x33ae5b6,0x2252a47,0x31899dd,
+        0x3293582,0x040c6f6,0x14340dd,0x3614f0e,0x3d5f47f,0x326fb3d,
+        0x0044a9d,0x00beeb9,0x0027c23 } },
+    /* 50 */
+    { { 0x32d49ce,0x34822a3,0x30a22d1,0x00858b7,0x10d91aa,0x2681fd9,
+        0x1cce870,0x2404a71,0x38b8433,0x377c1c8,0x019442c,0x0a38b21,
+        0x22aba50,0x0d61c81,0x002dcbd },
+      { 0x0680967,0x2f0f2f9,0x172cb5f,0x1167e4b,0x12a7bc6,0x05b0da7,
+        0x2c76e11,0x3a36201,0x37a3177,0x1d71419,0x0569df5,0x0dce7ad,
+        0x3f40b75,0x3bd8db0,0x002d481 } },
+    /* 51 */
+    { { 0x2a1103e,0x34e7f7f,0x1b171a2,0x24a57e0,0x2eaae55,0x166c992,
+        0x10aa18f,0x0bb836f,0x01acb59,0x0e430e7,0x1750cca,0x18be036,
+        0x3cc6cdf,0x0a0f7e5,0x00da4d8 },
+      { 0x2201067,0x374d187,0x1f6b0a6,0x165a7ec,0x31531f8,0x3580487,
+        0x15e5521,0x0724522,0x2b04c04,0x202c86a,0x3cc1ccf,0x225b11a,
+        0x1bde79d,0x0eccc50,0x00d24da } },
+    /* 52 */
+    { { 0x3b0a354,0x2814dd4,0x1cd8575,0x3d031b7,0x0392ff2,0x1855ee5,
+        0x0e8cff5,0x203442e,0x3bd3b1b,0x141cf95,0x3fedee1,0x1d783c0,
+        0x26f192a,0x0392aa3,0x0075238 },
+      { 0x158ffe9,0x3889f19,0x14151f4,0x06067b1,0x13a3486,0x1e65c21,
+        0x382d5ef,0x1ab0aac,0x2ffddc4,0x3179b7a,0x3c8d094,0x05101e3,
+        0x237c6e5,0x3947d83,0x00f674f } },
+    /* 53 */
+    { { 0x363408f,0x21eb96b,0x27376fb,0x2a735d6,0x1a39c36,0x3d31863,
+        0x33313fc,0x32235e0,0x082f034,0x23ef351,0x39b3528,0x1a69d84,
+        0x1d9c944,0x07159ad,0x0077a71 },
+      { 0x04f8d65,0x25771e5,0x2ba84a6,0x194586a,0x1e6da5f,0x118059a,
+        0x14e9c32,0x1d24619,0x3f528ae,0x22f22e4,0x0f5580d,0x0747a0e,
+        0x32cc85f,0x286b3a8,0x008ccf9 } },
+    /* 54 */
+    { { 0x196fee2,0x2c4431c,0x094528a,0x18e1d32,0x175799d,0x26bb6b7,
+        0x2293482,0x23fd289,0x07b2be8,0x1a5c533,0x158d60d,0x04a4f3f,
+        0x164e9f7,0x32ccca9,0x00da6b6 },
+      { 0x1d821c2,0x3f76c4f,0x323df43,0x17e4374,0x0f2f278,0x121227e,
+        0x2464190,0x19d2644,0x326d24c,0x3185983,0x0803c15,0x0767a33,
+        0x1c4c996,0x0563eab,0x00631c6 } },
+    /* 55 */
+    { { 0x1752366,0x0baf83f,0x288bacf,0x0384e6f,0x2b93c34,0x3c805e7,
+        0x3664850,0x29e1663,0x254ff1d,0x3852080,0x0f85c16,0x1e389d9,
+        0x3191352,0x3915eaa,0x00a246e },
+      { 0x3763b33,0x187ad14,0x3c0d438,0x3f11702,0x1c49f03,0x35ac7a8,
+        0x3f16bca,0x27266bf,0x08b6fd4,0x0f38ce4,0x37fde8c,0x147a6ff,
+        0x02c5e5c,0x28e7fc5,0x00076a7 } },
+    /* 56 */
+    { { 0x2338d10,0x0e77fa7,0x011b046,0x1bfd0ad,0x28ee699,0x21d73bc,
+        0x0461d1a,0x342ea58,0x2d695b4,0x30415ed,0x2906e0b,0x18e494a,
+        0x20f8a27,0x026b870,0x002c19f },
+      { 0x2f4c43d,0x3f0fc3b,0x0aa95b8,0x2a01ea1,0x3e2e1b1,0x0d74af6,
+        0x0555288,0x0cb757d,0x24d2071,0x143d2bb,0x3907f67,0x3e0ce30,
+        0x131f0e9,0x3724381,0x007a874 } },
+    /* 57 */
+    { { 0x3c27050,0x08b5165,0x0bf884b,0x3dd679c,0x3bd0b8d,0x25ce2e6,
+        0x1674057,0x1f13ed3,0x1f5cd91,0x0d1fd35,0x13ce6e3,0x2671338,
+        0x10f8b90,0x34e5487,0x00942bf },
+      { 0x03b566d,0x23c3da9,0x37de502,0x1a486ff,0x1af6e86,0x1108cb3,
+        0x36f856c,0x01a6a0f,0x179f915,0x1595a01,0x2cfecb8,0x082568b,
+        0x1ba16d1,0x1abb6c0,0x00cf7f0 } },
+    /* 58 */
+    { { 0x2f96c80,0x1b8f123,0x209c0f5,0x2ccf76d,0x1d521f2,0x3705143,
+        0x2941027,0x07f88af,0x07102a9,0x38b4868,0x1efa37d,0x1bdd3e8,
+        0x028a12e,0x02e055b,0x009a9a9 },
+      { 0x1c7dfcb,0x3aa7aa7,0x1d62c54,0x3f0b0b0,0x3c74e66,0x274f819,
+        0x23f9674,0x0e2b67c,0x24654dd,0x0c71f0e,0x1946cee,0x0016211,
+        0x0045dc7,0x0da1173,0x0089856 } },
+    /* 59 */
+    { { 0x0e73946,0x29f353f,0x056329d,0x2d48c5a,0x28f697d,0x2ea4bb1,
+        0x235e9cc,0x34faa38,0x15f9f91,0x3557519,0x2a50a6c,0x1a27c8e,
+        0x2a1a0f3,0x3098879,0x00dcf21 },
+      { 0x1b818bf,0x2f20b98,0x2243cff,0x25b691e,0x3c74a2f,0x2f06833,
+        0x0e980a8,0x32db48d,0x2b57929,0x33cd7f5,0x2fe17d6,0x11a384b,
+        0x2dafb81,0x2b9562c,0x00ddea6 } },
+    /* 60 */
+    { { 0x2787b2e,0x37a21df,0x310d294,0x07ce6a4,0x1258acc,0x3050997,
+        0x19714aa,0x122824b,0x11c708b,0x0462d56,0x21abbf7,0x331aec3,
+        0x307b927,0x3e8d5a0,0x00c0581 },
+      { 0x24d4d58,0x3d628fc,0x23279e0,0x2e38338,0x2febe9b,0x346f9c0,
+        0x3d6a419,0x3264e47,0x245faca,0x3669f62,0x1e50d66,0x3028232,
+        0x18201ab,0x0bdc192,0x0002c34 } },
+    /* 61 */
+    { { 0x17bdbc2,0x1c501c5,0x1605ccd,0x31ab438,0x372fa89,0x24a8057,
+        0x13da2bb,0x3f95ac7,0x3cda0a3,0x1e2b679,0x24f0673,0x03b72f4,
+        0x35be616,0x2ccd849,0x0079d4d },
+      { 0x33497c4,0x0c7f657,0x2fb0d3d,0x3b81064,0x38cafea,0x0e942bc,
+        0x3ca7451,0x2ab9784,0x1678c85,0x3c62098,0x1eb556f,0x01b3aa2,
+        0x149f3ce,0x2656f6d,0x002eef1 } },
+    /* 62 */
+    { { 0x0596edc,0x1f4fad4,0x03a28ed,0x18a4149,0x3aa3593,0x12db40a,
+        0x12c2c2a,0x3b1a288,0x327c4fb,0x35847f5,0x384f733,0x02e3fde,
+        0x1af0e8a,0x2e417c3,0x00d85a6 },
+      { 0x0091cf7,0x2267d75,0x276860e,0x19cbbfc,0x04fef2b,0x030ce59,
+        0x3195cb1,0x1aa3f07,0x3699362,0x2a09d74,0x0d6c840,0x1e413d0,
+        0x28acdc7,0x1ff5ea1,0x0088d8b } },
+    /* 63 */
+    { { 0x3d98425,0x08dc8de,0x154e85f,0x24b1c2c,0x2d44639,0x19a1e8b,
+        0x300ee29,0x053f72e,0x3f7c832,0x12417f6,0x1359368,0x0674a4c,
+        0x1218e20,0x0e4fbd4,0x000428c },
+      { 0x01e909a,0x1d88fe6,0x12da40c,0x215ef86,0x2925133,0x004241f,
+        0x3e480f4,0x2d16523,0x07c3120,0x3375e86,0x21fd8f3,0x35dc0b6,
+        0x0efc5c9,0x14ef8d6,0x0066e47 } },
+    /* 64 */
+    { { 0x2973cf4,0x34d3845,0x34f7070,0x22df93c,0x120aee0,0x3ae2b4a,
+        0x1af9b95,0x177689a,0x036a6a4,0x0377828,0x23df41e,0x22d4a39,
+        0x0df2aa1,0x06ca898,0x0003cc7 },
+      { 0x06b1dd7,0x19dc2a8,0x35d324a,0x0467499,0x25bfa9c,0x1a1110c,
+        0x01e2a19,0x1b3c1cf,0x18d131a,0x10d9815,0x2ee7945,0x0a2720c,
+        0x0ddcdb0,0x2c071b6,0x00a6aef } },
+    /* 65 */
+    { { 0x1ab5245,0x1192d00,0x13ffba1,0x1b71236,0x09b8d0b,0x0eb49cb,
+        0x1867dc9,0x371de4e,0x05eae9f,0x36faf82,0x094ea8b,0x2b9440e,
+        0x022e173,0x2268e6b,0x00740fc },
+      { 0x0e23b23,0x22c28ca,0x04d05e2,0x0bb84c4,0x1235272,0x0289903,
+        0x267a18b,0x0df0fd1,0x32e49bb,0x2ab1d29,0x281e183,0x3dcd3c3,
+        0x1c0eb79,0x2db0ff6,0x00bffe5 } },
+    /* 66 */
+    { { 0x2a2123f,0x0d63d71,0x1f6db1a,0x257f8a3,0x1927b2d,0x06674be,
+        0x302753f,0x20b7225,0x14c1a3f,0x0429cdd,0x377affe,0x0f40a75,
+        0x2d34d06,0x05fb6b9,0x0054398 },
+      { 0x38b83c4,0x1e7bbda,0x1682f79,0x0527651,0x2615cb2,0x1795fab,
+        0x0e4facc,0x11f763c,0x1b81130,0x2010ae2,0x13f3650,0x20d5b72,
+        0x1f32f88,0x34617f4,0x00bf008 } },
+    /* 67 */
+    { { 0x28068db,0x0aa8913,0x1a47801,0x10695ca,0x1c72cc6,0x0fc1a47,
+        0x33df2c4,0x0517cf0,0x3471d92,0x1be815c,0x397f794,0x3f03cbe,
+        0x121bfae,0x172cbe0,0x00813d7 },
+      { 0x383bba6,0x04f1c90,0x0b3f056,0x1c29089,0x2a924ce,0x3c85e69,
+        0x1cecbe5,0x0ad8796,0x0aa79f6,0x25e38ba,0x13ad807,0x30b30ed,
+        0x0fa963a,0x35c763d,0x0055518 } },
+    /* 68 */
+    { { 0x0623f3b,0x3ca4880,0x2bff03c,0x0457ca7,0x3095c71,0x02a9a08,
+        0x1722478,0x302c10b,0x3a17458,0x001131e,0x0959ec2,0x18bdfbc,
+        0x2929fca,0x2adfe32,0x0040ae2 },
+      { 0x127b102,0x14ddeaa,0x1771b8c,0x283700c,0x2398a86,0x085a901,
+        0x108f9dc,0x0cc0012,0x33a918d,0x26d08e9,0x20b9473,0x12c3fc7,
+        0x1f69763,0x1c94b5a,0x00e29de } },
+    /* 69 */
+    { { 0x035af04,0x3450021,0x12da744,0x077fb06,0x25f255b,0x0db7150,
+        0x17dc123,0x1a2a07c,0x2a7636a,0x3972430,0x3704ca1,0x0327add,
+        0x3d65a96,0x3c79bec,0x009de8c },
+      { 0x11d3d06,0x3fb8354,0x12c7c60,0x04fe7ad,0x0466e23,0x01ac245,
+        0x3c0f5f2,0x2a935d0,0x3ac2191,0x090bd56,0x3febdbc,0x3f1f23f,
+        0x0ed1cce,0x02079ba,0x00d4fa6 } },
+    /* 70 */
+    { { 0x0ab9645,0x10174ec,0x3711b5e,0x26357c7,0x2aeec7f,0x2170a9b,
+        0x1423115,0x1a5122b,0x39e512c,0x18116b2,0x290db1c,0x041b13a,
+        0x26563ae,0x0f56263,0x00b89f3 },
+      { 0x3ed2ce4,0x01f365f,0x1b2043b,0x05f7605,0x1f9934e,0x2a068d2,
+        0x38d4d50,0x201859d,0x2de5291,0x0a7985a,0x17e6711,0x01b6c1b,
+        0x08091fa,0x33c6212,0x001da23 } },
+    /* 71 */
+    { { 0x2f2c4b5,0x311acd0,0x1e47821,0x3bd9816,0x1931513,0x1bd4334,
+        0x30ae436,0x2c49dc0,0x2c943e7,0x010ed4d,0x1fca536,0x189633d,
+        0x17abf00,0x39e5ad5,0x00e4e3e },
+      { 0x0c8b22f,0x2ce4009,0x1054bb6,0x307f2fc,0x32eb5e2,0x19d24ab,
+        0x3b18c95,0x0e55e4d,0x2e4acf5,0x1bc250c,0x1dbf3a5,0x17d6a74,
+        0x087cf58,0x07f6f82,0x00f8675 } },
+    /* 72 */
+    { { 0x110e0b2,0x0e672e7,0x11b7157,0x1598371,0x01c0d59,0x3d60c24,
+        0x096b8a1,0x0121075,0x0268859,0x219962f,0x03213f2,0x3022adc,
+        0x18de488,0x3dcdeb9,0x008d2e0 },
+      { 0x06cfee6,0x26f2552,0x3c579b7,0x31fa796,0x2036a26,0x362ba5e,
+        0x103601c,0x012506b,0x387ff3a,0x101a41f,0x2c7eb58,0x23d2efc,
+        0x10a5a07,0x2fd5fa3,0x00e3731 } },
+    /* 73 */
+    { { 0x1cd0abe,0x08a0af8,0x2fa272f,0x17a1fbf,0x1d4f901,0x30e0d2f,
+        0x1898066,0x273b674,0x0c1b8a2,0x3272337,0x3ee82eb,0x006e7d3,
+        0x2a75606,0x0af1c81,0x0037105 },
+      { 0x2f32562,0x2842491,0x1bb476f,0x1305cd4,0x1daad53,0x0d8daed,
+        0x164c37b,0x138030f,0x05145d5,0x300e2a3,0x32c09e7,0x0798600,
+        0x3515130,0x2b9e55c,0x009764e } },
+    /* 74 */
+    { { 0x3d5256a,0x06c67f2,0x3a3b879,0x3c9b284,0x04007e0,0x33c1a41,
+        0x3794604,0x1d6240e,0x022b6c1,0x22c62a7,0x01d4590,0x32df5f6,
+        0x368f1a1,0x2a7486e,0x006e13f },
+      { 0x31e6e16,0x20f18a9,0x09ed471,0x23b861d,0x15cf0ef,0x397b502,
+        0x1c7f9b2,0x05f84b2,0x2cce6e1,0x3c10bba,0x13fb5a7,0x1b52058,
+        0x1feb1b8,0x03b7279,0x00ea1cf } },
+    /* 75 */
+    { { 0x2a4cc9b,0x15cf273,0x08f36e6,0x076bf3b,0x2541796,0x10e2dbd,
+        0x0bf02aa,0x3aa2201,0x03cdcd4,0x3ee252c,0x3799571,0x3e01fa4,
+        0x156e8d0,0x1fd6188,0x003466a },
+      { 0x2515664,0x166b355,0x2b0b51e,0x0f28f17,0x355b0f9,0x2909e76,
+        0x206b026,0x3823a12,0x179c5fa,0x0972141,0x2663a1a,0x01ee36e,
+        0x3fc8dcf,0x2ef3d1b,0x0049a36 } },
+    /* 76 */
+    { { 0x2d93106,0x3d6b311,0x3c9ce47,0x382aa25,0x265b7ad,0x0b5f92f,
+        0x0f4c941,0x32aa4df,0x380d4b2,0x0e8aba6,0x260357a,0x1f38273,
+        0x0d5f95e,0x199f23b,0x0029f77 },
+      { 0x0a0b1c5,0x21a3d6a,0x0ad8df6,0x33d8a5e,0x1240858,0x30000a8,
+        0x3ac101d,0x2a8143d,0x1d7ffe9,0x1c74a2a,0x1b962c9,0x1261359,
+        0x0c8b274,0x002cf4a,0x00a8a7c } },
+    /* 77 */
+    { { 0x211a338,0x22a14ab,0x16e77c5,0x3c746be,0x3a78613,0x0d5731c,
+        0x1767d25,0x0b799fa,0x009792a,0x09ae8dc,0x124386b,0x183d860,
+        0x176747d,0x14c4445,0x00ab09b },
+      { 0x0eb9dd0,0x0121066,0x032895a,0x330541c,0x1e6c17a,0x2271b92,
+        0x06da454,0x054c2bf,0x20abb21,0x0ead169,0x3d7ea93,0x2359649,
+        0x242c6c5,0x3194255,0x00a3ef3 } },
+    /* 78 */
+    { { 0x3010879,0x1083a77,0x217989d,0x174e55d,0x29d2525,0x0e544ed,
+        0x1efd50e,0x30c4e73,0x05bd5d1,0x0793bf9,0x3f7af77,0x052779c,
+        0x2b06bc0,0x13d0d02,0x0055a6b },
+      { 0x3eaf771,0x094947a,0x0288f13,0x0a21e35,0x22ab441,0x23816bf,
+        0x15832e1,0x2d8aff3,0x348cc1f,0x2bbd4a8,0x01c4792,0x34209d3,
+        0x06dc72b,0x211a1df,0x00345c5 } },
+    /* 79 */
+    { { 0x2a65e90,0x173ac2f,0x199cde1,0x0ac905b,0x00987f7,0x3618f7b,
+        0x1b578df,0x0d5e113,0x34bac6a,0x27d85ed,0x1b48e99,0x18af5eb,
+        0x1a1be9e,0x3987aac,0x00877ca },
+      { 0x2358610,0x3776a8e,0x2b0723a,0x344c978,0x22fc4d6,0x1615d53,
+        0x3198f51,0x2d61225,0x12cb392,0x07dd061,0x355f7de,0x09e0132,
+        0x0efae99,0x13b46aa,0x00e9e6c } },
+    /* 80 */
+    { { 0x0683186,0x36d8e66,0x0ea9867,0x0937731,0x1fb5cf4,0x13c39ef,
+        0x1a7ffed,0x27dfb32,0x31c7a77,0x09f15fd,0x16b25ef,0x1dd01e7,
+        0x0168090,0x240ed02,0x0090eae },
+      { 0x2e1fceb,0x2ab9783,0x1a1fdf2,0x093a1b0,0x33ff1da,0x2864fb7,
+        0x3587d6c,0x275aa03,0x123dc9b,0x0e95a55,0x0592030,0x2102402,
+        0x1bdef7b,0x37f2e9b,0x001efa4 } },
+    /* 81 */
+    { { 0x0540015,0x20e3e78,0x37dcfbd,0x11b0e41,0x02c3239,0x3586449,
+        0x1fb9e6a,0x0baa22c,0x00c0ca6,0x3e58491,0x2dbe00f,0x366d4b0,
+        0x176439a,0x2a86b86,0x00f52ab },
+      { 0x0ac32ad,0x226250b,0x0f91d0e,0x1098aa6,0x3dfb79e,0x1dbd572,
+        0x052ecf2,0x0f84995,0x0d27ad2,0x036c6b0,0x1e4986f,0x2317dab,
+        0x2327df6,0x0dee0b3,0x00389ac } },
+    /* 82 */
+    { { 0x0e60f5b,0x0622d3e,0x2ada511,0x05522a8,0x27fe670,0x206af28,
+        0x333cb83,0x3f25f6c,0x19ddaf3,0x0ec579b,0x36aabc0,0x093dbac,
+        0x348b44b,0x277dca9,0x00c5978 },
+      { 0x1cf5279,0x32e294a,0x1a6c26f,0x3f006b6,0x37a3c6b,0x2e2eb26,
+        0x2cf88d4,0x3410619,0x1899c80,0x23d3226,0x30add14,0x2810905,
+        0x01a41f0,0x11e5176,0x005a02f } },
+    /* 83 */
+    { { 0x1c90202,0x321df30,0x3570fa5,0x103e2b1,0x3d099d4,0x05e207d,
+        0x0a5b1bd,0x0075d0a,0x3db5b25,0x2d87899,0x32e4465,0x226fc13,
+        0x24cb8f8,0x3821daa,0x004da3a },
+      { 0x3e66861,0x03f89b8,0x386d3ef,0x14ccc62,0x35e7729,0x11ce5b7,
+        0x035fbc7,0x3f4df0f,0x29c439f,0x1144568,0x32d7037,0x312f65e,
+        0x06b9dbf,0x03a9589,0x0008863 } },
+    /* 84 */
+    { { 0x0a9e8c9,0x1a19b6e,0x091ecd9,0x2e16ee0,0x2a11963,0x116cf34,
+        0x390d530,0x194131f,0x2b580f3,0x31d569c,0x21d3751,0x3e2ce64,
+        0x193de46,0x32454f0,0x004bffd },
+      { 0x09554e7,0x170126e,0x2be6cd1,0x153de89,0x0353c67,0x350765c,
+        0x202370b,0x1db01e5,0x30b12b1,0x3778591,0x00c8809,0x2e845d5,
+        0x1fb1e56,0x170f90d,0x00e2db3 } },
+    /* 85 */
+    { { 0x328e33f,0x392aad8,0x36d1d71,0x0aebe04,0x1548678,0x1b55c8c,
+        0x24995f8,0x2a5a01e,0x1bd1651,0x37c7c29,0x36803b6,0x3716c91,
+        0x1a935a5,0x32f10b7,0x005c587 },
+      { 0x2e8b4c0,0x336ccae,0x11382b6,0x22ec4cc,0x066d159,0x35fa585,
+        0x23b2d25,0x3017528,0x2a674a8,0x3a4f900,0x1a7ce82,0x2b2539b,
+        0x3d46545,0x0a07918,0x00eb9f8 } },
+    /* 86 */
+    { { 0x2cf5b9b,0x03e747f,0x166a34e,0x0afc81a,0x0a115b1,0x3aa814d,
+        0x11cf3b1,0x163e556,0x3cbfb15,0x157c0a4,0x1bc703a,0x2141e90,
+        0x01f811c,0x207218b,0x0092e6b },
+      { 0x1af24e3,0x3af19b3,0x3c70cc9,0x335cbf3,0x068917e,0x055ee92,
+        0x09a9308,0x2cac9b7,0x008b06a,0x1175097,0x36e929c,0x0be339c,
+        0x0932436,0x15f18ba,0x0009f6f } },
+    /* 87 */
+    { { 0x29375fb,0x35ade34,0x11571c7,0x07b8d74,0x3fabd85,0x090fa91,
+        0x362dcd4,0x02c3fdb,0x0608fe3,0x2477649,0x3fc6e70,0x059b7eb,
+        0x1e6a708,0x1a4c220,0x00c6c4c },
+      { 0x2a53fb0,0x1a3e1f5,0x11f9203,0x27e7ad3,0x038718e,0x3f5f9e4,
+        0x308acda,0x0a8700f,0x34472fe,0x3420d7a,0x08076e5,0x014240e,
+        0x0e7317e,0x197a98e,0x00538f7 } },
+    /* 88 */
+    { { 0x2663b4b,0x0927670,0x38dd0e0,0x16d1f34,0x3e700ab,0x3119567,
+        0x12559d2,0x399b6c6,0x0a84bcd,0x163e7dd,0x3e2aced,0x058548c,
+        0x03a5bad,0x011cf74,0x00c155c },
+      { 0x3e454eb,0x2a1e64e,0x1ccd346,0x36e0edf,0x266ee94,0x2e74aaf,
+        0x2d8378a,0x3cd547d,0x1d27733,0x0928e5b,0x353553c,0x26f502b,
+        0x1d94341,0x2635cc7,0x00d0ead } },
+    /* 89 */
+    { { 0x0142408,0x382c3bb,0x3310908,0x2e50452,0x398943c,0x1d0ac75,
+        0x1bf7d81,0x04bd00f,0x36b6934,0x3349c37,0x0f69e20,0x0195252,
+        0x243a1c5,0x030da5f,0x00a76a9 },
+      { 0x224825a,0x28ce111,0x34c2e0f,0x02e2b30,0x382e48c,0x26853ca,
+        0x24bd14e,0x0200dec,0x1e24db3,0x0d3d775,0x132da0a,0x1dea79e,
+        0x253dc0c,0x03c9d31,0x0020db9 } },
+    /* 90 */
+    { { 0x26c5fd9,0x05e6dc3,0x2eea261,0x08db260,0x2f8bec1,0x1255edf,
+        0x283338d,0x3d9a91d,0x2640a72,0x03311f9,0x1bad935,0x152fda8,
+        0x0e95abd,0x31abd15,0x00dfbf4 },
+      { 0x107f4fa,0x29ebe9a,0x27353f7,0x3821972,0x27311fa,0x2925ab6,
+        0x337ab82,0x2de6c91,0x1f115fe,0x044f909,0x21b93c2,0x3a5f142,
+        0x13eb5e9,0x3ab1377,0x00b26b6 } },
+    /* 91 */
+    { { 0x22e5f2b,0x2ae7d4a,0x1ac481c,0x0a6fce1,0x2f93caf,0x242658e,
+        0x3f35c3c,0x050f3d2,0x30074c9,0x142079c,0x0281b4c,0x295fea3,
+        0x007413e,0x01726cd,0x00e4979 },
+      { 0x1ab3cfb,0x1b76295,0x36adf55,0x1ad4636,0x1d444b9,0x3bd2e55,
+        0x35425a5,0x1aa8cd3,0x3acecd2,0x1f769e8,0x1a655e9,0x1f6846f,
+        0x24c70b5,0x3bff080,0x0002da3 } },
+    /* 92 */
+    { { 0x081d0d9,0x2c00d99,0x1fe2e24,0x396063f,0x03740db,0x243f680,
+        0x3c1f451,0x1ff7b07,0x2803cf2,0x38ca724,0x2934f43,0x0d72d4d,
+        0x0e8fe74,0x2975e21,0x002b505 },
+      { 0x11adcc9,0x331a99c,0x21e16cf,0x1714c78,0x1f03432,0x2caa2a6,
+        0x34a9679,0x2f7fe8b,0x0423c21,0x1a757ce,0x31b57d6,0x171e044,
+        0x093b9b2,0x13602e0,0x00db534 } },
+    /* 93 */
+    { { 0x250a2f5,0x0b999eb,0x21d10d7,0x22b92a1,0x39b7f8d,0x0c37c72,
+        0x29f70f3,0x3bf0e84,0x1d7e04f,0x07a42a9,0x272c3ae,0x1587b2f,
+        0x155faff,0x10a336e,0x000d8fb },
+      { 0x3663784,0x0d7dcf5,0x056ad22,0x319f8b1,0x0c05bae,0x2b6ff33,
+        0x0292e42,0x0435797,0x188efb1,0x0d3f45e,0x119d49f,0x395dcd3,
+        0x279fe27,0x133a13d,0x00188ac } },
+    /* 94 */
+    { { 0x396c53e,0x0d133e9,0x009b7ee,0x13421a0,0x1bbf607,0x1d284a5,
+        0x1594f74,0x18cb47c,0x2dcac11,0x2999ddb,0x04e2fa5,0x1889e2c,
+        0x0a89a18,0x33cb215,0x0052665 },
+      { 0x104ab58,0x1d91920,0x3d6d7e3,0x04dc813,0x1167759,0x13a8466,
+        0x0a06a54,0x103761b,0x25b1c92,0x26a8fdd,0x2474614,0x21406a4,
+        0x251d75f,0x38c3734,0x007b982 } },
+    /* 95 */
+    { { 0x15f3060,0x3a7bf30,0x3be6e44,0x0baa1fa,0x05ad62f,0x1e54035,
+        0x099d41c,0x2a744d9,0x1c0336f,0x3e99b5b,0x1afd3b1,0x2bf1255,
+        0x1822bf8,0x2c93972,0x001d8cc },
+      { 0x1d7584b,0x0508ade,0x20dd403,0x203a8fc,0x1c54a05,0x1611a31,
+        0x037c8f9,0x1dcd4fe,0x110fbea,0x30f60bc,0x3dffe2f,0x26a1de1,
+        0x0480367,0x18ec81c,0x0048eba } },
+    /* 96 */
+    { { 0x346e2f6,0x0435077,0x036789b,0x3e06545,0x313ab57,0x351a721,
+        0x3372b91,0x15e6019,0x2fa4f6c,0x3c30656,0x272c9ac,0x10e84a8,
+        0x2bdacea,0x232d9e2,0x009dadd },
+      { 0x182579a,0x15b1af8,0x02d8cce,0x36cb49b,0x086feba,0x2911d17,
+        0x268ee12,0x011e871,0x18698dc,0x35602b3,0x11b9ec2,0x0ade731,
+        0x0f6a05a,0x1821015,0x00007da } },
+    /* 97 */
+    { { 0x3b00dd0,0x328d485,0x27a69e3,0x32c3a06,0x1046779,0x120b61c,
+        0x19fef3d,0x0fef2e6,0x134d923,0x039bce0,0x348cd0e,0x0b0c007,
+        0x066ae11,0x15d8f1b,0x00934e7 },
+      { 0x33234dc,0x353f0f5,0x2fc1b44,0x18a193a,0x2fcae20,0x1afbc86,
+        0x3afe252,0x17f7e10,0x107f3b7,0x2d84d54,0x394c2e6,0x19e96a9,
+        0x0a37283,0x26c6152,0x003d262 } },
+    /* 98 */
+    { { 0x37cfaf8,0x01863d0,0x0299623,0x32c80cb,0x25b8742,0x0a4d90e,
+        0x1f72472,0x13de652,0x31a0946,0x0ee0103,0x0f25414,0x2518b49,
+        0x07e7604,0x1488d9b,0x00abd6b },
+      { 0x1338f55,0x2ce4af5,0x1a0c119,0x3380525,0x21a80a9,0x235d4df,
+        0x118ca7f,0x2dd8bcc,0x1c26bf4,0x32dc56b,0x28482b6,0x1418596,
+        0x3c84d24,0x1f1a5a9,0x00d958d } },
+    /* 99 */
+    { { 0x1c21f31,0x22aa1ef,0x258c9ad,0x2d2018f,0x0adb3ca,0x01f75ee,
+        0x186283b,0x31ad3bf,0x3621be7,0x3b1ee6d,0x015582d,0x3d61d04,
+        0x2ddf32e,0x14b8a66,0x00c970c },
+      { 0x2f24d66,0x00b8a88,0x100a78f,0x041d330,0x2efec1d,0x24c5b86,
+        0x2a6a390,0x37526bc,0x2055849,0x3339f08,0x16bffc4,0x07f9d72,
+        0x06ec09c,0x3f49ee8,0x00cad98 } },
+    /* 100 */
+    { { 0x248b73e,0x1b8b42d,0x285eed7,0x39473f4,0x1a9f92c,0x3b44f78,
+        0x086c062,0x06a4ea3,0x34ea519,0x3c74e95,0x1ad1b8b,0x1737e2c,
+        0x2cfe338,0x0a291f4,0x00bbecc },
+      { 0x1cec548,0x0c9b01a,0x20b298d,0x377c902,0x24f5bc1,0x2415c8d,
+        0x1a70622,0x2529090,0x1c5c682,0x283f1ba,0x2319f17,0x0120e2e,
+        0x01c6f4d,0x33c67ff,0x008b612 } },
+    /* 101 */
+    { { 0x03830eb,0x02d4053,0x10c59bb,0x0f23b83,0x13d08f8,0x26ea4e2,
+        0x2626427,0x0a45292,0x0449cbc,0x0175750,0x074c46f,0x27ae0f8,
+        0x2d7d6ae,0x163dd3a,0x0063bb7 },
+      { 0x2bb29e0,0x034bab1,0x341e1c4,0x21d2c0b,0x295aa2d,0x0f2c666,
+        0x1891755,0x13db64a,0x2fe5158,0x337646e,0x31a1aae,0x057bee4,
+        0x00f9e37,0x396d19e,0x00c1b6a } },
+    /* 102 */
+    { { 0x2772f41,0x34f92d0,0x39d1cde,0x174ef2d,0x03a700d,0x03fbb98,
+        0x30d50e8,0x352ed10,0x1fcf5e5,0x3d113bc,0x26e358f,0x180653f,
+        0x1b43cc6,0x3cc9aa4,0x00e68a2 },
+      { 0x37fe4d2,0x09dd725,0x01eb584,0x171f8a9,0x278fdef,0x3e37c03,
+        0x3bec02f,0x149757c,0x0cd5852,0x37d2e10,0x0e6988b,0x1c120e9,
+        0x0b83708,0x38e7319,0x0039499 } },
+    /* 103 */
+    { { 0x08df5fe,0x177a02c,0x0362fc0,0x1f18ee8,0x00c1295,0x173c50a,
+        0x379414d,0x1885ba8,0x32a54ef,0x2315644,0x39e65cf,0x357c4be,
+        0x1d66333,0x09e05a5,0x0009c60 },
+      { 0x1f7a2fb,0x073b518,0x2eb83ac,0x11353d7,0x1dd8384,0x0c63f2b,
+        0x238c6c8,0x2a1920a,0x2e5e9f1,0x1cc56f8,0x042daf4,0x1ed5dc5,
+        0x25f9e31,0x012a56a,0x0081b59 } },
+    /* 104 */
+    { { 0x321d232,0x2c71422,0x3a756b6,0x30230b2,0x387f3db,0x3a7c3eb,
+        0x274b46a,0x201e69f,0x185bb7b,0x140da82,0x0d974a2,0x0616e42,
+        0x35ec94f,0x3bc366b,0x005aa7c },
+      { 0x3dcfffc,0x19a9c15,0x3225e05,0x36ae114,0x16ea311,0x0cda2aa,
+        0x2a1a8d2,0x154b5cb,0x08348cd,0x17b66c8,0x080ea43,0x21e59f3,
+        0x04173b9,0x31d5b04,0x00ad735 } },
+    /* 105 */
+    { { 0x2e76ef4,0x216acf3,0x2b93aea,0x112bc74,0x3449974,0x2b2e48f,
+        0x11929be,0x2f03021,0x19051e3,0x0ac202d,0x19be68a,0x3b87619,
+        0x26cdac4,0x086592c,0x00f00de },
+      { 0x2e90d4d,0x3ed703c,0x2c648d7,0x29ddf67,0x000e219,0x3471247,
+        0x26febd5,0x1161713,0x3541a8f,0x302038d,0x08d2af9,0x26e1b21,
+        0x398514a,0x36dad99,0x002ed70 } },
+    /* 106 */
+    { { 0x06f25cb,0x1104596,0x370faee,0x07e83f3,0x0f7b686,0x228d43a,
+        0x12cd201,0x0a1bd57,0x3e592dc,0x1e186fc,0x2226aba,0x2c63fe9,
+        0x17b039a,0x1efaa61,0x00d1582 },
+      { 0x2e6acef,0x07d51e4,0x3ac326c,0x322b07e,0x1422c63,0x32ff5c7,
+        0x18760df,0x048928b,0x139b251,0x04d7da9,0x048d1a2,0x2a23e84,
+        0x199dbba,0x2fa7afe,0x0049f1a } },
+    /* 107 */
+    { { 0x3492b73,0x27d3d3d,0x2b1a16f,0x07b2ce4,0x0cf28ec,0x2729bff,
+        0x3130d46,0x3e96116,0x140b72e,0x14a2ea3,0x1ca066f,0x3a61f1d,
+        0x022ebac,0x09192b4,0x003e399 },
+      { 0x12555bb,0x0b6139d,0x239463a,0x12a70ab,0x2aaa93b,0x2254e72,
+        0x00424ec,0x26a6736,0x26daa11,0x25b5ad6,0x379f262,0x140cd30,
+        0x0c7d3bd,0x097bbcf,0x00899e9 } },
+    /* 108 */
+    { { 0x3825dc4,0x3cd946f,0x0462b7f,0x31102e7,0x30f741c,0x3313ed6,
+        0x1ff5a95,0x15bf9dc,0x09b47fd,0x0f2e7a7,0x1626c0d,0x3c14f6d,
+        0x14098bd,0x19d7df8,0x00a97ce },
+      { 0x0934f5e,0x3f968db,0x046f68a,0x12333bf,0x26cd5e1,0x1ea2161,
+        0x358570d,0x235031d,0x35edd55,0x05265e3,0x24ae00c,0x3542229,
+        0x25bb2a1,0x1c83c75,0x0058f2a } },
+    /* 109 */
+    { { 0x24daedb,0x376928f,0x305266f,0x0499746,0x038318c,0x312efd7,
+        0x1910a24,0x33450a3,0x1c478a9,0x39d8bf9,0x12cc0ae,0x397aeab,
+        0x0654c08,0x095f283,0x00d2cdf },
+      { 0x0b717d2,0x1f162c2,0x107a48f,0x128e1b3,0x2380718,0x39f4044,
+        0x00f626a,0x05ec0c9,0x21bc439,0x200fa4d,0x20aea01,0x186a1d8,
+        0x26372f2,0x1a91f87,0x0053f55 } },
+    /* 110 */
+    { { 0x3512a90,0x33b958b,0x29f1c84,0x0106c3a,0x224b3c0,0x09b307a,
+        0x215d2de,0x3bdf43b,0x22cf0c9,0x176121d,0x1534143,0x09ba717,
+        0x16b3110,0x0f73f6c,0x008f5b7 },
+      { 0x2c75d95,0x26fbcb4,0x0dda1f6,0x206f819,0x28d33d5,0x1fb4d79,
+        0x024c125,0x30a0630,0x1f9c309,0x0fe350d,0x1696019,0x0a54187,
+        0x09541fd,0x35e3a79,0x0066618 } },
+    /* 111 */
+    { { 0x0e382de,0x33f5163,0x0dde571,0x3bb7a40,0x1175806,0x12ae8ed,
+        0x0499653,0x3b25586,0x38ade7a,0x3fa265d,0x3f4aa97,0x3c03dbb,
+        0x30c6de8,0x32d4042,0x00ae971 },
+      { 0x2f788f1,0x1fbaf0e,0x3e2d182,0x3ff904f,0x0d46229,0x1d0726d,
+        0x15455b4,0x093ae28,0x290f8e4,0x097c0b9,0x1ae8771,0x28480bb,
+        0x04f6d40,0x3689925,0x0049b3b } },
+    /* 112 */
+    { { 0x35b2d69,0x31819c0,0x11b0d63,0x035afb6,0x2b50715,0x2bece6c,
+        0x35f82f7,0x0ad987c,0x0011601,0x02e6f67,0x2d0a5f5,0x365e583,
+        0x2f7c900,0x11449c5,0x00ed705 },
+      { 0x27abdb4,0x1bbfd04,0x301c157,0x263c079,0x36850d6,0x3f21f8b,
+        0x27d7493,0x0f9227e,0x06fb0ce,0x002daf3,0x37d8c1c,0x3ef87d7,
+        0x19cc6f4,0x0c3809c,0x00cf752 } },
+    /* 113 */
+    { { 0x22d94ed,0x075b09c,0x020e676,0x084dc62,0x2d1ec3f,0x17439f1,
+        0x240b702,0x33cc596,0x30ebaf3,0x0359fe0,0x393ea43,0x0ece01e,
+        0x16c6963,0x03a82f2,0x0017faa },
+      { 0x3866b98,0x3cd20b7,0x12d4e6b,0x3a6a76d,0x1205c1e,0x3e6ae1a,
+        0x2f9bbdf,0x2e61547,0x2d175ee,0x28e18f6,0x13cf442,0x085b0ef,
+        0x0e321ef,0x238fe72,0x003fb22 } },
+    /* 114 */
+    { { 0x360ac07,0x26dc301,0x3f4d94f,0x2ba75e6,0x1f3c9cc,0x17ff20f,
+        0x0ea084c,0x30e39cf,0x143dc49,0x03bd43e,0x3c9e733,0x19e8aba,
+        0x27fbaf4,0x12d913a,0x005ee53 },
+      { 0x3609e7f,0x2d89c80,0x09f020c,0x1558bf7,0x3098443,0x3c515fd,
+        0x1c8e580,0x16506bd,0x26cb4b2,0x1747d42,0x2ec8239,0x32c91f0,
+        0x1ca3377,0x079768f,0x00a5f3e } },
+    /* 115 */
+    { { 0x185fa94,0x122759f,0x0e47023,0x0dcb6e7,0x10ba405,0x3b5eab4,
+        0x1f7a1fa,0x32d003f,0x1739a4c,0x3295ec3,0x1b18967,0x3f3b265,
+        0x34d2448,0x2dbadc9,0x00f30b5 },
+      { 0x01c5338,0x2d1dcf2,0x2bd07cc,0x39a8fb5,0x2b85639,0x355bab6,
+        0x1df95f1,0x01eb5f6,0x17f0a16,0x1b895b5,0x157574d,0x29fff72,
+        0x3a8c46d,0x0118071,0x0065f84 } },
+    /* 116 */
+    { { 0x3a1e7f1,0x17432f2,0x1f648d4,0x3000ad5,0x2ef0a08,0x1f86624,
+        0x1ca31b1,0x241f9dc,0x2cb4885,0x2b8610f,0x364ce16,0x1e5faf0,
+        0x0b33867,0x2cb637d,0x00816d2 },
+      { 0x1aa8671,0x02c394e,0x35f5e87,0x393040a,0x39f0db3,0x1c831a5,
+        0x2966591,0x034a8d0,0x09e613c,0x042b532,0x018ddd6,0x3e402c9,
+        0x2e20e1a,0x29cb4cd,0x00e087c } },
+    /* 117 */
+    { { 0x3a10079,0x20c7fea,0x3ff2222,0x1edb593,0x00dc5f8,0x3a32ccc,
+        0x1479073,0x0cfed11,0x2a2702a,0x17a056a,0x1fba321,0x235acb9,
+        0x149c833,0x172de7d,0x000f753 },
+      { 0x2e95923,0x3b365cb,0x009f471,0x0df1b47,0x21e868b,0x199bbd3,
+        0x07b8ecc,0x12ff0af,0x189808a,0x3bd5059,0x3fbc4d2,0x0fa7b88,
+        0x1125bf2,0x0db0b5d,0x0043572 } },
+    /* 118 */
+    { { 0x29cdb1b,0x1db656e,0x391efe1,0x004be09,0x245a1ca,0x3793328,
+        0x254af24,0x2f2e65d,0x10e5cc4,0x2af6fe7,0x2d97ac0,0x29f7d42,
+        0x19fd6f6,0x0ac184d,0x00c5211 },
+      { 0x305eae3,0x36738d3,0x2c2b696,0x00ba50e,0x3903adc,0x2122f85,
+        0x0753470,0x1cf96a4,0x1702a39,0x247883c,0x2feb67e,0x2ab3071,
+        0x3c6b9e1,0x30cb85a,0x002ca0a } },
+    /* 119 */
+    { { 0x3871eb5,0x284b93b,0x0a7affe,0x176a2fc,0x294c2f2,0x204d3aa,
+        0x1e4c2a7,0x3ec4134,0x2fb0360,0x3847b45,0x05fc11b,0x0a6db6e,
+        0x390fa40,0x2adfd34,0x005e9f7 },
+      { 0x0646612,0x1b5cbcc,0x10d8507,0x0777687,0x3a0afed,0x1687440,
+        0x0222578,0x1af34a4,0x2174e27,0x372d267,0x11246c3,0x34769c5,
+        0x2044316,0x1b4d626,0x00c72d5 } },
+    /* 120 */
+    { { 0x2e5bb45,0x3ff1d36,0x16dcdf5,0x128986f,0x399068c,0x2a63b1e,
+        0x0afa7aa,0x3a5b770,0x200f121,0x33b74bb,0x1414045,0x0f31ef8,
+        0x2f50e16,0x2f38cd6,0x00b0b1b },
+      { 0x1a06293,0x035e140,0x2644d44,0x1f1954b,0x2cdebab,0x31d5f91,
+        0x0b8dbc8,0x38f2d23,0x3783cab,0x2a07e73,0x3123f59,0x3409846,
+        0x3784ddd,0x223bbac,0x003dc7b } },
+    /* 121 */
+    { { 0x0741456,0x234e631,0x2121e1b,0x00980ca,0x3a9dfa9,0x098c916,
+        0x3fc86d1,0x1c63072,0x3625244,0x13d0471,0x05b0fc5,0x1487550,
+        0x2498596,0x11bb6ea,0x001afab },
+      { 0x274b4ad,0x240aea1,0x3d12a75,0x2b56b61,0x1486b43,0x1b83426,
+        0x31c7363,0x35b59ca,0x207bb6c,0x38e6243,0x19bace4,0x0a26671,
+        0x35e3381,0x0c2ded4,0x00d8da4 } },
+    /* 122 */
+    { { 0x2b75791,0x19590b1,0x2bfb39f,0x2988601,0x0050947,0x0d8bbe1,
+        0x23e3701,0x08e4432,0x2ed8c3d,0x326f182,0x332e1dd,0x12219c5,
+        0x2e0779b,0x367aa63,0x0012d10 },
+      { 0x251b7dc,0x0a08b4d,0x1138b6f,0x2ea02af,0x06345a5,0x1cb4f21,
+        0x0332624,0x1d49d88,0x140acc5,0x2f55287,0x024447c,0x291ace9,
+        0x1a4966e,0x015cbec,0x005bc41 } },
+    /* 123 */
+    { { 0x351cd0e,0x315e8e9,0x07d6e70,0x067ae8f,0x2190d84,0x351f556,
+        0x03bee79,0x31b62c7,0x266f912,0x1b6a504,0x007a6ad,0x3a6ab31,
+        0x3891112,0x3c45ba0,0x00d6ce5 },
+      { 0x0e1f2ce,0x32a5edc,0x1434063,0x1ca084f,0x2a3e47c,0x137e042,
+        0x16e2418,0x2069280,0x3b0dfd8,0x35a22b5,0x289bf0a,0x1f667f2,
+        0x02d23a3,0x0ce688f,0x00d8e3f } },
+    /* 124 */
+    { { 0x10bed6f,0x14c58dd,0x0b0abdf,0x0ca0f9a,0x3808abc,0x2ec228c,
+        0x2366275,0x12afa16,0x20f6b0e,0x37dca8e,0x3af0c6a,0x1c5b467,
+        0x1b25ff7,0x00814de,0x0022dcc },
+      { 0x1a56e11,0x02fe37e,0x3f21740,0x35d5a91,0x06cb8ba,0x29bad91,
+        0x17176f7,0x2d919f2,0x0f7d1f5,0x13a3f61,0x04ddb05,0x0c82a51,
+        0x286f598,0x2e8c777,0x0007071 } },
+    /* 125 */
+    { { 0x0f8fcb9,0x3e83966,0x170c6fd,0x3825343,0x089cec8,0x01b482a,
+        0x0993971,0x3327282,0x39aba8a,0x32456fe,0x1507e01,0x1c3252d,
+        0x21ffb13,0x29822a0,0x0083246 },
+      { 0x23c378f,0x1cea7ef,0x1be9a82,0x224d689,0x37e5447,0x3764a75,
+        0x3a49724,0x361e1b3,0x19d365b,0x3a61ffb,0x1c29a7a,0x20ab251,
+        0x17ec549,0x175d777,0x004589a } },
+    /* 126 */
+    { { 0x15540a9,0x2ec5d2a,0x05b09fa,0x1bc058b,0x07cfb88,0x28f7b86,
+        0x3e766be,0x189305e,0x01fe88e,0x23fdf69,0x0b919c3,0x02dc7ae,
+        0x3f9a9ad,0x0b83cc7,0x0086a52 },
+      { 0x28bc259,0x39bdca1,0x39e4bc8,0x0e0f33b,0x16130c6,0x2919955,
+        0x31f4549,0x2fed027,0x30919b2,0x0a39b03,0x0ca7bb2,0x1711b24,
+        0x3b67b94,0x05a136b,0x00acd87 } },
+    /* 127 */
+    { { 0x0c53841,0x31cb284,0x3ced090,0x06d5693,0x1c20ae0,0x0408d2b,
+        0x37ebd5e,0x081900f,0x26a8589,0x0acfd0a,0x34a1472,0x2f0c302,
+        0x124ccbd,0x10de328,0x00971bc },
+      { 0x17ff2ff,0x27d1b54,0x147b6f7,0x38bb2ea,0x26a9c96,0x0a49448,
+        0x39f2f46,0x247c579,0x3b16a4e,0x28c2a5a,0x2d4c72d,0x11f248c,
+        0x1e4df11,0x047d604,0x0065bc3 } },
+    /* 128 */
+    { { 0x39b3239,0x1f75f44,0x3bae87c,0x139360c,0x18b5782,0x3ffc005,
+        0x3c48789,0x2bc6af2,0x38b909e,0x223ff3b,0x31443a7,0x017d3bb,
+        0x0bfed99,0x128b857,0x00020dd },
+      { 0x306d695,0x25a7b28,0x2f60ca2,0x2b6e4f2,0x1df940c,0x1fa9b8e,
+        0x37fab78,0x13f959f,0x10ff98c,0x38343b8,0x019cb91,0x11a1e6b,
+        0x17ab4c6,0x1431f47,0x004b4ea } },
+    /* 129 */
+    { { 0x20db57e,0x102515e,0x170219e,0x2b66a32,0x1e6017c,0x2f973fe,
+        0x3739e51,0x0e28b6f,0x3cda7a9,0x30d91ac,0x28350df,0x1444215,
+        0x098b504,0x1bcd5b8,0x00ad3bd },
+      { 0x22e3e3e,0x3aeaffb,0x26cb935,0x0091ce4,0x2fbd017,0x3a7ed6a,
+        0x335b029,0x3bfc1f1,0x3852e3f,0x2b14a86,0x046b405,0x266af4c,
+        0x3997191,0x33b0e40,0x00e306f } },
+    /* 130 */
+    { { 0x3e4712c,0x26bb208,0x18eed6d,0x1b30f06,0x27ca837,0x06faf62,
+        0x1831873,0x3fbcf9b,0x3f3d88b,0x1fb55eb,0x0f44edc,0x29917bb,
+        0x3151772,0x342d72e,0x00d4e63 },
+      { 0x2ee0ecf,0x39e8733,0x2e8e98c,0x0cd4e0f,0x08f0126,0x1ad157a,
+        0x079078a,0x23018ee,0x196c765,0x2b2f34f,0x0783336,0x075bf9c,
+        0x3713672,0x098d699,0x00f21a7 } },
+    /* 131 */
+    { { 0x186ba11,0x22cf365,0x048019d,0x2ca2970,0x0d9e0ae,0x08c3bd7,
+        0x261dbf2,0x2fc2790,0x1ee02e6,0x10256a7,0x00dc778,0x18dc8f2,
+        0x157b189,0x2ebc514,0x005c97d },
+      { 0x3c4503e,0x1d10d12,0x337097e,0x0c6169a,0x30fb1cb,0x3481752,
+        0x0df2bec,0x19768fa,0x1bcf8f7,0x2925f74,0x2c988a1,0x3be571d,
+        0x04cfa92,0x2ea9937,0x003f924 } },
+    /* 132 */
+    { { 0x268b448,0x06e375c,0x1b946bf,0x287bf5e,0x3d4c28b,0x138d547,
+        0x21f8c8e,0x21ea4be,0x2d45c91,0x35da78e,0x00326c0,0x210ed35,
+        0x1d66928,0x0251435,0x00fefc8 },
+      { 0x0339366,0x216ff64,0x2c3a30c,0x3c5733d,0x04eeb56,0x2333477,
+        0x32b1492,0x25e3839,0x1b5f2ce,0x0dcfba1,0x3165bb2,0x3acafcc,
+        0x10abfcd,0x248d390,0x008106c } },
+    /* 133 */
+    { { 0x102f4ee,0x3c0585f,0x1225c8d,0x11c6388,0x08a7815,0x2b3e790,
+        0x2895eb6,0x18cf53a,0x0b56e5a,0x2e2c003,0x3e981ff,0x0761b55,
+        0x1bc32f3,0x0a7111d,0x00f5c80 },
+      { 0x3568973,0x1587386,0x16ec764,0x20698a6,0x02f809b,0x2821502,
+        0x113d64d,0x38c2679,0x15de61c,0x0309f60,0x272999e,0x29bfe64,
+        0x173f70d,0x1de7fab,0x00bd284 } },
+    /* 134 */
+    { { 0x31cdf2b,0x0f0be66,0x2151603,0x01af17e,0x32a99cf,0x085dece,
+        0x27d2591,0x1520df4,0x273c448,0x1ec7c54,0x102e229,0x355f604,
+        0x2acb75f,0x005f1fd,0x003d43e },
+      { 0x270eb28,0x22ec2ce,0x306b41a,0x238fa02,0x167de2d,0x030a379,
+        0x245a417,0x1808c24,0x0b1a7b2,0x3ab5f6f,0x2cbc6c1,0x2c228d4,
+        0x3041f70,0x2d9a6cc,0x00b504f } },
+    /* 135 */
+    { { 0x17a27c2,0x216ad7e,0x011ba8e,0x22f0428,0x16ac5ec,0x3ef3c58,
+        0x345533f,0x0298155,0x2856579,0x0005e03,0x19ee75b,0x146fe16,
+        0x29881e4,0x18ece70,0x008907a },
+      { 0x20189ed,0x119ce09,0x35cb76d,0x0d91ef4,0x2284a44,0x032ad87,
+        0x0e8c402,0x3c82b5d,0x38c416c,0x398992f,0x1fd820c,0x169b255,
+        0x3b5fcfa,0x1343c92,0x00fa715 } },
+    /* 136 */
+    { { 0x33f5034,0x20b3b26,0x28fd184,0x16b3679,0x3962d44,0x15d1bc8,
+        0x2fb1d69,0x1292c99,0x25a58c9,0x1b19ab7,0x2d68a5b,0x2f6a09b,
+        0x0d6aedb,0x2935eac,0x0005664 },
+      { 0x25e32fc,0x13f9440,0x3252bcd,0x2fea5b7,0x161a5ae,0x0564a8c,
+        0x0a07e23,0x1545f62,0x0de9890,0x1d76765,0x1fd440e,0x2ed0041,
+        0x3db4c96,0x1e8ba01,0x001b0c4 } },
+    /* 137 */
+    { { 0x0223878,0x29ab202,0x15585c2,0x1a79969,0x1ba08c2,0x2ef09ff,
+        0x2b1b9b9,0x181f748,0x1bf72b9,0x224645c,0x2588dc5,0x2d157e7,
+        0x22d939a,0x05b88d9,0x006d549 },
+      { 0x31de0c1,0x23a4e0e,0x278f8da,0x1aa013c,0x1a84d18,0x0d185a5,
+        0x0988ccd,0x2c32efd,0x3bee10e,0x37d7ab8,0x3f2a66e,0x3e2da3e,
+        0x1b5701f,0x3d9f0c1,0x00a68da } },
+    /* 138 */
+    { { 0x0b2e045,0x0133fd1,0x05d4c10,0x0d92c70,0x391b5e1,0x2292281,
+        0x2e40908,0x2ec694e,0x195ea11,0x29cfeca,0x3d93a4e,0x01215c0,
+        0x08a5f32,0x37a0eff,0x00cce45 },
+      { 0x2b3106e,0x12a5fb0,0x0b4faff,0x0c2da12,0x09069c6,0x35d8907,
+        0x2837a6e,0x3db3fb6,0x3136cc3,0x222836b,0x3da018a,0x2741274,
+        0x13ba319,0x1ac7642,0x00f867c } },
+    /* 139 */
+    { { 0x2527296,0x10a9595,0x178de4d,0x0f739c4,0x0ae26c7,0x3094599,
+        0x20adac6,0x2b875c2,0x3ae5dc0,0x3e04d20,0x1aab2da,0x1d3ab37,
+        0x15f4f75,0x0b730b5,0x00c56b5 },
+      { 0x1f32923,0x2f059e5,0x2a89872,0x2056f74,0x04be175,0x1da67c0,
+        0x17f1e7a,0x3780a6d,0x0723ac2,0x257f367,0x1237773,0x2bcee86,
+        0x0b97f83,0x38aff14,0x00a64d4 } },
+    /* 140 */
+    { { 0x2552b40,0x0b6b883,0x12e8217,0x0974d35,0x062f497,0x1e563e6,
+        0x30ee400,0x375d1e4,0x290751f,0x0d5b68a,0x353e48c,0x064a0d3,
+        0x3c343f1,0x309a394,0x0034d2a },
+      { 0x3111286,0x0f08604,0x1827107,0x0536a76,0x0201dac,0x3a574de,
+        0x2c29dbe,0x382c7b0,0x1191f3e,0x324c5bc,0x144ce71,0x24327c1,
+        0x1212778,0x22bc9d8,0x00d7713 } },
+    /* 141 */
+    { { 0x34ad1cd,0x1179b4e,0x1bc1780,0x1392a92,0x2cd86b9,0x359de85,
+        0x251f1df,0x0da5d5f,0x135fa61,0x0f64a42,0x34f4d89,0x0fe564c,
+        0x3cf9b7a,0x122d757,0x008c9c2 },
+      { 0x370d4e9,0x0e9209b,0x0ae99f2,0x1518c64,0x0172734,0x2c20692,
+        0x1d7c135,0x149c52f,0x38928d6,0x3c78b78,0x25841d1,0x2eaa897,
+        0x372e50b,0x29e5d19,0x00c4c18 } },
+    /* 142 */
+    { { 0x13375ac,0x389a056,0x211310e,0x2f9f757,0x04f3288,0x103cd4e,
+        0x17b2fb2,0x2c78a6a,0x09f1de6,0x23e8442,0x1351bc5,0x1b69588,
+        0x285b551,0x0464b7e,0x00573b6 },
+      { 0x0ba7df5,0x259a0db,0x2b4089e,0x05630a2,0x3f299be,0x350ff2f,
+        0x1c9348a,0x3becfa4,0x3cc9a1c,0x17a6ef1,0x338b277,0x2b761d9,
+        0x2aa01c8,0x3cb9dd7,0x006e3b1 } },
+    /* 143 */
+    { { 0x277788b,0x16a222d,0x173c036,0x310ff58,0x2634ae8,0x392636f,
+        0x0987619,0x1e6acc1,0x26dc8f7,0x242310f,0x0c09aca,0x22b8e11,
+        0x0d17006,0x1c2c806,0x002380c },
+      { 0x297c5ec,0x1fef0e8,0x3948cf7,0x14f2915,0x2dacbc8,0x0dafb1f,
+        0x10de043,0x31184da,0x06414ee,0x3c9aeeb,0x1f713ab,0x308f1f8,
+        0x1569ed1,0x3f379bf,0x00f08bb } },
+    /* 144 */
+    { { 0x0770ee3,0x058fd21,0x17065f8,0x251d128,0x10e0c7f,0x06cb51b,
+        0x0f05f7e,0x3666a72,0x3e7d01f,0x2d05fab,0x11440e5,0x28577d4,
+        0x2fbcf2b,0x14aa469,0x00dc5c5 },
+      { 0x270f721,0x1c75d28,0x085b862,0x1d68011,0x132c0a0,0x37be81d,
+        0x1a87e38,0x083fa74,0x3acbf0d,0x16d6429,0x0feda1f,0x031070a,
+        0x2ec2443,0x21e563d,0x00454d2 } },
+    /* 145 */
+    { { 0x0525435,0x1e98d5f,0x3dbc52b,0x1fcdf12,0x13d9ef5,0x3ff311d,
+        0x393e9ed,0x3cef8ae,0x2987710,0x3bdee2e,0x21b727d,0x3ba1b68,
+        0x10d0142,0x3c64b92,0x0055ac3 },
+      { 0x0c1c390,0x38e9bb0,0x1e7b487,0x11511b3,0x1036fb3,0x25aba54,
+        0x1eb2764,0x048d022,0x0d971ed,0x1bb7fb5,0x100f0b4,0x06c3756,
+        0x2f0d366,0x3c6e160,0x0011bd6 } },
+    /* 146 */
+    { { 0x36bc9d1,0x24d43c1,0x12c35cf,0x2fb3cf3,0x015d903,0x16bc0c7,
+        0x0fc8c22,0x3195c87,0x2488b1c,0x1f82b4c,0x30014e8,0x27ee58d,
+        0x31658dd,0x1684a5f,0x00f0f3a },
+      { 0x1f703aa,0x023eebc,0x20babb9,0x080bd9d,0x12f9cc4,0x1a8e2d4,
+        0x0eec666,0x1176803,0x33005d6,0x1137b68,0x37de339,0x33d71cb,
+        0x0c906b9,0x14086b5,0x00aeef6 } },
+    /* 147 */
+    { { 0x219045d,0x0f22c5e,0x024c058,0x00b414a,0x0ae7c31,0x3db3e96,
+        0x234979f,0x0cf00a8,0x3c962c7,0x27fa77f,0x1c0c4b0,0x1fe8942,
+        0x218053a,0x1eed3f8,0x0051643 },
+      { 0x2a23ddb,0x138f570,0x104e945,0x21ca270,0x30726d8,0x3f45490,
+        0x37d9184,0x242ea25,0x33f6d77,0x3f15679,0x065af85,0x34fa1f5,
+        0x2e46b8f,0x31d17fb,0x00a2615 } },
+    /* 148 */
+    { { 0x335167d,0x181ea10,0x0887c8d,0x01383d7,0x18b42d8,0x263447e,
+        0x1f13df3,0x0319d7e,0x0872074,0x2d6aa94,0x23d9234,0x36a69aa,
+        0x0bad183,0x3138a95,0x00bd3a5 },
+      { 0x1b0f658,0x0e4530b,0x373add1,0x1b968fc,0x329dcb6,0x09169ca,
+        0x162df55,0x0211eff,0x02391e4,0x3867460,0x3136b1a,0x37dd36e,
+        0x3bc5bd9,0x2dacfe4,0x0072a06 } },
+    /* 149 */
+    { { 0x119d96f,0x067b0eb,0x00996da,0x293eca9,0x2b342da,0x1889c7a,
+        0x21633a6,0x0152c39,0x281ce8c,0x18ef3b3,0x0bd62dc,0x3238186,
+        0x38d8b7c,0x3867b95,0x00ae189 },
+      { 0x0ed1eed,0x1e89777,0x13ab73e,0x029e1d7,0x2c1257f,0x33fbc09,
+        0x32d5a21,0x3d870b2,0x39bb1fd,0x33663bc,0x24e83e6,0x239bda4,
+        0x3088bcd,0x01db1ed,0x00d71e7 } },
+    /* 150 */
+    { { 0x14245bf,0x0da0c27,0x153b339,0x05cab0a,0x122d962,0x1b0f0f3,
+        0x3f5a825,0x267a2ce,0x2910d06,0x254326f,0x0f36645,0x025118e,
+        0x37c35ec,0x36e944e,0x006c056 },
+      { 0x05ab0e3,0x29aa0c1,0x1295687,0x1fd1172,0x08d40b5,0x05bd655,
+        0x345048a,0x02a1c3c,0x2393d8f,0x0992d71,0x1f71c5e,0x18d4e8a,
+        0x30dd410,0x11d61d3,0x00dd58b } },
+    /* 151 */
+    { { 0x2230c72,0x30213d8,0x05e367e,0x329204e,0x0f14f6c,0x3369ddd,
+        0x0bb4074,0x2edafd6,0x1b1aa2d,0x0785404,0x0c035ab,0x220da74,
+        0x1f2fdd4,0x092a091,0x00ef83c },
+      { 0x3dc2538,0x1cca3e7,0x246afb5,0x24c647f,0x0798082,0x0bb7952,
+        0x0f5c443,0x008b38a,0x299ea1a,0x3c6cf36,0x3df2ec7,0x398e6dc,
+        0x29a1839,0x1cadd83,0x0077b62 } },
+    /* 152 */
+    { { 0x25d56d5,0x3546f69,0x16e02b1,0x3e5fa9a,0x03a9b71,0x2413d31,
+        0x250ecc9,0x1d2de54,0x2ebe757,0x2a2f135,0x2aeeb9a,0x0d0fe2b,
+        0x204cb0e,0x07464c3,0x00c473c },
+      { 0x24cd8ae,0x0c86c41,0x221c282,0x0795588,0x1f4b437,0x06fc488,
+        0x0c81ecd,0x020bf07,0x3a9e2c8,0x2294a81,0x3a64a95,0x0363966,
+        0x32c9a35,0x0f79bec,0x0029e4f } },
+    /* 153 */
+    { { 0x289aaa5,0x2755b2e,0x059e0aa,0x3031318,0x0f0208a,0x35b7729,
+        0x00d9c6b,0x3dd29d0,0x075f2c2,0x0ece139,0x31562dd,0x04187f2,
+        0x13b8d4c,0x0920b85,0x003924e },
+      { 0x09808ab,0x2e36621,0x2a36f38,0x1829246,0x229bf32,0x20883b7,
+        0x159ada8,0x3108a14,0x15bbe5b,0x1e2d1e4,0x1730096,0x0d35cbb,
+        0x15d0da9,0x0e60b94,0x00c4f30 } },
+    /* 154 */
+    { { 0x31de38b,0x27b9086,0x2760e3e,0x169098d,0x2a124e2,0x00596c6,
+        0x3f73c09,0x0d31642,0x2341464,0x248600a,0x2e1fa10,0x2aa0fc8,
+        0x051e954,0x00f3b67,0x001d4bd },
+      { 0x18751e6,0x25a8e1e,0x07f5c2d,0x17e30d4,0x0ed2723,0x23093e2,
+        0x3b80e2c,0x13de2d7,0x2fad37f,0x1be1cfb,0x3224ba9,0x0a7f5d3,
+        0x1714972,0x06667b7,0x009dcd9 } },
+    /* 155 */
+    { { 0x294f22a,0x3e06993,0x0341ee9,0x24bdc7b,0x2e56098,0x2660a13,
+        0x018ddda,0x2c261b2,0x2953b54,0x267f51c,0x0e8a7cc,0x29ab00c,
+        0x3a38247,0x397ac81,0x00de684 },
+      { 0x36b956b,0x347b34a,0x35834bd,0x053c06c,0x0090844,0x148cec5,
+        0x380b325,0x2f17b8b,0x054ef5e,0x09683fb,0x3f8b29a,0x33c979a,
+        0x1e01474,0x3e81fca,0x001c757 } },
+    /* 156 */
+    { { 0x30fdfe4,0x2d712ba,0x13671bc,0x2cfc226,0x3d7c649,0x16f020e,
+        0x368e3f0,0x2981ebb,0x246a78a,0x115e81b,0x21223a4,0x04dbb30,
+        0x1a50ba2,0x12114bd,0x0089bd6 },
+      { 0x055f15a,0x1046e51,0x00fd724,0x1c022a7,0x323dfa9,0x36d8efb,
+        0x0da4d16,0x0910dec,0x2c1fb16,0x2dbe29f,0x298284f,0x2b273bb,
+        0x26022c1,0x20accd5,0x00085a5 } },
+    /* 157 */
+    { { 0x01f138a,0x2d87e7b,0x0c2815c,0x0c19a3c,0x311c9a2,0x3e4fce3,
+        0x029729d,0x21236b2,0x2984048,0x3f3bc95,0x2bba8fb,0x1a1b680,
+        0x0619a3f,0x29e0447,0x00ed5fe },
+      { 0x2d1c833,0x3dcef35,0x3f809b4,0x01a1b9e,0x1509516,0x10ac754,
+        0x2735080,0x27b0a8a,0x2495fb8,0x0a7bdba,0x1ef8b89,0x00233a5,
+        0x0568bf1,0x1a126ba,0x0078a7e } },
+    /* 158 */
+    { { 0x0470cd8,0x20e9f04,0x30003fe,0x20be1b7,0x1927346,0x2a5026d,
+        0x1ac06bd,0x2717ed7,0x2609493,0x3079ea5,0x1cc116d,0x31b0541,
+        0x2c8ccde,0x10219ae,0x001a52b },
+      { 0x2864045,0x0e8d95b,0x2fc1530,0x0aa44e7,0x345eae7,0x3cc7553,
+        0x3ec6466,0x229b60e,0x06f6e95,0x00bed2a,0x0ff4403,0x181c639,
+        0x2e0df67,0x1f8fa46,0x0000811 } },
+    /* 159 */
+    { { 0x04310a2,0x20cee8e,0x09fc5d5,0x3707f5b,0x0bdfb4e,0x12713ee,
+        0x24f1028,0x0787ee6,0x39a581c,0x3797ec8,0x10a9746,0x112cb9f,
+        0x142b9ba,0x1da0ef6,0x0078f7b },
+      { 0x07607ae,0x3232872,0x2a7e076,0x0bb572a,0x182b23c,0x1d8f918,
+        0x181f392,0x37c45a9,0x24a3886,0x0b2a297,0x264e7f2,0x1fa433c,
+        0x0fcfcc8,0x21c0857,0x0004f74 } },
+    /* 160 */
+    { { 0x01d161c,0x1744585,0x2d17528,0x03a4f13,0x267cd2e,0x30d861f,
+        0x062a647,0x213284b,0x139ed25,0x27d4ca5,0x02fbbd6,0x31ddf11,
+        0x3c50ac4,0x1dd86f7,0x00107de },
+      { 0x16beebd,0x1b7317a,0x2151997,0x256a196,0x3be2aff,0x3621cab,
+        0x0a9da19,0x05f3038,0x23da63c,0x3178d5e,0x215cc67,0x07f7f63,
+        0x0c6d8d3,0x3bf5e5c,0x00c44bb } },
+    /* 161 */
+    { { 0x00c62f1,0x3e0f893,0x1572703,0x3b93865,0x19b1e28,0x389b33b,
+        0x02858bf,0x0e3e9aa,0x04bc436,0x234e072,0x25ba43d,0x3dca19e,
+        0x0274394,0x20f442e,0x003b4a7 },
+      { 0x176451e,0x2b5ed5d,0x35c8ee1,0x25c52da,0x0c3d0b5,0x32b306e,
+        0x030954f,0x275ecf7,0x10e472c,0x21577c4,0x02f8a32,0x321bb5c,
+        0x0098f97,0x104e237,0x00d0433 } },
+    /* 162 */
+    { { 0x0a8f2fe,0x034548b,0x141f1a6,0x121246f,0x1616409,0x237f80d,
+        0x2e29a55,0x1218db6,0x3ea278e,0x1669856,0x1ad7c8e,0x36d11de,
+        0x2c2fcbb,0x18c0b3a,0x001c706 },
+      { 0x1699b4b,0x2d531a6,0x17e85e2,0x1b48e78,0x2b509ca,0x2818ea0,
+        0x0165fee,0x0b809ca,0x09db6a2,0x3dad798,0x326ee1d,0x204e416,
+        0x091fa12,0x1c890e5,0x0007b9f } },
+    /* 163 */
+    { { 0x0ff4e49,0x0bb0512,0x0129159,0x05db591,0x03e4e9f,0x055ab30,
+        0x0f82881,0x0ac2deb,0x3a8bb09,0x356a8d2,0x3d38393,0x03e4089,
+        0x38187cd,0x1377a93,0x0041672 },
+      { 0x0139e73,0x3990730,0x187d3c4,0x33e4793,0x2e0fe46,0x2ad87e2,
+        0x33c792c,0x21d4fb6,0x1e4d386,0x2932d1b,0x20f1098,0x1270874,
+        0x0ea6ee4,0x0167d6e,0x005e5fd } },
+    /* 164 */
+    { { 0x1856031,0x2b7519d,0x3bd07fc,0x337abcb,0x089c7a4,0x2a1f120,
+        0x3523ce7,0x2ba406b,0x09561d9,0x1797f04,0x3cdb95f,0x2d6193e,
+        0x32c7d3f,0x223aed6,0x00beb51 },
+      { 0x2e65825,0x158f0ce,0x16413d1,0x310395f,0x3116854,0x250baf4,
+        0x373d341,0x156cc47,0x104c069,0x0893716,0x195a0a6,0x035320e,
+        0x37b7d8a,0x21b5755,0x00fb26b } },
+    /* 165 */
+    { { 0x286ae17,0x04239f1,0x1a56c53,0x0e74707,0x29090d7,0x2bb142b,
+        0x03b0139,0x1aac916,0x08ba49a,0x0376682,0x3382f85,0x064bbab,
+        0x2910e28,0x1d5bd7f,0x00cc8df },
+      { 0x0ab7630,0x208e8e7,0x3fc1877,0x26bee39,0x264984a,0x192ff05,
+        0x08ef9c3,0x0aa6951,0x071c44e,0x26eed3e,0x035c95e,0x06906ad,
+        0x10a0690,0x397eaa9,0x00c6c23 } },
+    /* 166 */
+    { { 0x034d8dd,0x005b064,0x279bb78,0x12c2c4f,0x1856bb4,0x0c90681,
+        0x06409ab,0x3b48617,0x19a2d78,0x0a34bf8,0x326eddf,0x31f09b5,
+        0x04f04dc,0x3d7c944,0x003ccaf },
+      { 0x321f843,0x35fb71a,0x1e4c397,0x377a5d7,0x2da88e4,0x3d6ada7,
+        0x33d3964,0x1b30149,0x0e39aae,0x054dda0,0x3e6f946,0x1273394,
+        0x3ffd3f7,0x2f6655e,0x00021dd } },
+    /* 167 */
+    { { 0x37233cf,0x11617dd,0x26f07b6,0x3d8250a,0x0fe6771,0x3f9bbbc,
+        0x2aba7ad,0x200a58d,0x3568603,0x198eefa,0x1e8fcf3,0x3b9610b,
+        0x20524ac,0x2a67528,0x0048d9a },
+      { 0x1a5e57a,0x1e9d303,0x16c9cff,0x0f39527,0x3c23259,0x03c8a1e,
+        0x104bccf,0x182d5a1,0x18dbc83,0x05b5f42,0x1b402f4,0x317c525,
+        0x11bf1ea,0x3c46e1f,0x0061936 } },
+    /* 168 */
+    { { 0x0153a9d,0x36859ee,0x2cf0aa9,0x2b27a0f,0x0a49fe3,0x2d984e1,
+        0x018f8e1,0x1378453,0x1ab3843,0x1987093,0x283dae9,0x25cf0e8,
+        0x14fc93d,0x280609d,0x00c99ba },
+      { 0x026b1e3,0x34663d3,0x2202477,0x21a9d45,0x212e8e1,0x18ab77e,
+        0x2e52f63,0x0a14ce1,0x295c396,0x00c7a3d,0x2aaedb6,0x30abc4d,
+        0x374acde,0x1318a73,0x00fcfdb } },
+    /* 169 */
+    { { 0x0a40298,0x3ba5633,0x11956b3,0x14fcbd7,0x3c38781,0x34bab96,
+        0x165630e,0x1f3c831,0x37e3a69,0x2b4226c,0x2d5029e,0x3b4ab1e,
+        0x1da6ac2,0x3eb43c3,0x007e5cd },
+      { 0x1b86202,0x109b7f6,0x2054f98,0x2c50cd7,0x2ed1960,0x3c518e7,
+        0x1b02463,0x319c07f,0x1c30db6,0x045fdc2,0x373421e,0x31a1eb9,
+        0x1a8acbf,0x31289b0,0x0013fef } },
+    /* 170 */
+    { { 0x3fa0a5f,0x068661f,0x2109e36,0x00b18ff,0x1f4b261,0x31d3844,
+        0x0acbc56,0x3aebc99,0x1fa77ab,0x152bd11,0x24cddb7,0x2313f74,
+        0x06eea44,0x15f5114,0x000b131 },
+      { 0x2e9993d,0x1ac565c,0x2cbe22a,0x3921797,0x12c3c57,0x360f868,
+        0x33560bf,0x320ee99,0x382c3b8,0x39af88f,0x00bbe38,0x2c4ea59,
+        0x3399b40,0x00ceb45,0x0066eea } },
+    /* 171 */
+    { { 0x0c6c693,0x31ba56d,0x3d3849f,0x378dabd,0x0efc735,0x17f90bf,
+        0x13343d3,0x2df0f81,0x27c6a9a,0x13c2a90,0x0a0fcb2,0x27c10d9,
+        0x3bc50c7,0x090e4fa,0x0016287 },
+      { 0x2927e1e,0x35af405,0x184c5c3,0x3499cee,0x240158e,0x33522e6,
+        0x386fc84,0x0a0b69f,0x1a660ea,0x34590fb,0x22a1bee,0x2ce4fab,
+        0x31a9445,0x0e78655,0x00664c8 } },
+    /* 172 */
+    { { 0x3eeaf94,0x115d409,0x21e7577,0x097aa67,0x22875c9,0x021ab7a,
+        0x27e7ba5,0x1093f04,0x2a086fe,0x05d9494,0x2b6c028,0x10f31b0,
+        0x1312d11,0x262759c,0x00c9bb2 },
+      { 0x1acb0a5,0x30cdf14,0x0f78880,0x0574f18,0x1a37109,0x098adbb,
+        0x2113c09,0x2060925,0x1f89ce4,0x1974976,0x3381358,0x2dab5ca,
+        0x2159c53,0x3af1303,0x000ea3b } },
+    /* 173 */
+    { { 0x1e49bea,0x29142b1,0x1a59cab,0x055f017,0x0684e54,0x39eb0db,
+        0x29cab9d,0x255ee8b,0x35f2e6f,0x05329e6,0x09b817b,0x1ec091c,
+        0x1df0fef,0x2641f62,0x00eb304 },
+      { 0x2fe5096,0x3dcc1d1,0x2aaf508,0x3a0b813,0x0695810,0x144bddb,
+        0x2f1bd93,0x281ae23,0x3513ebc,0x1ddd984,0x0cf158b,0x35218eb,
+        0x257daf7,0x391253b,0x00b2a81 } },
+    /* 174 */
+    { { 0x153e6ba,0x22396db,0x0ea2ff2,0x2a45121,0x0a90de1,0x34cf23b,
+        0x2db60ce,0x1a900be,0x2f328b6,0x355e75b,0x2c24372,0x0b75b77,
+        0x2ec7d4f,0x3f24759,0x00e9e33 },
+      { 0x39eab6e,0x2267480,0x3b5e110,0x1e8fa5e,0x2a31a66,0x3f739a3,
+        0x00166dc,0x3552d88,0x3ae5137,0x3efa0fa,0x0800acd,0x17df61d,
+        0x38c8608,0x04cc31b,0x00cf4ab } },
+    /* 175 */
+    { { 0x31e08fb,0x1961164,0x22c003f,0x078541b,0x3643855,0x30da587,
+        0x11f0dc9,0x324595e,0x329e3dc,0x29a041e,0x3495d2c,0x0908dd3,
+        0x1895b83,0x198dbb9,0x00d8cfb },
+      { 0x0349b1b,0x383c5a8,0x2b86525,0x1b1283e,0x133cd2c,0x2be376a,
+        0x012ee82,0x1eb4d1b,0x0ba71e9,0x01f3109,0x37621eb,0x1d9b77c,
+        0x0d39069,0x3d5a97c,0x0095565 } },
+    /* 176 */
+    { { 0x20f5e94,0x1eefc86,0x1327e0e,0x054760b,0x2f771e1,0x3ac447e,
+        0x033e3dc,0x198e040,0x04dd342,0x1b49a5d,0x00d01ef,0x3cb6768,
+        0x1ceafbd,0x31c6812,0x001cb80 },
+      { 0x221c677,0x060ca27,0x398b17f,0x0146723,0x36452af,0x02d9e65,
+        0x39c5f78,0x3cf50d6,0x0be40f8,0x2970b87,0x26d667c,0x3e45959,
+        0x16e7943,0x01673e7,0x009faaa } },
+    /* 177 */
+    { { 0x2078fe6,0x0918602,0x11dd8ad,0x399193f,0x0f6cc73,0x0f8dd12,
+        0x2ce34dc,0x06d7d34,0x0c5e327,0x0989254,0x2fc5af7,0x2443d7b,
+        0x32bc662,0x2fe2a84,0x008b585 },
+      { 0x039327f,0x08e616a,0x252f117,0x1f52ab0,0x234e2d2,0x0a5b313,
+        0x2f59ef6,0x0f7a500,0x15c4705,0x2c02b81,0x28b4f09,0x08aa5c8,
+        0x0180efc,0x0993e83,0x00a9e86 } },
+    /* 178 */
+    { { 0x0310ecc,0x2d8892f,0x14ed0b7,0x3c59fe8,0x08a1a74,0x0850e57,
+        0x1d09607,0x044a21f,0x109f5c9,0x237c6cf,0x06b264a,0x3fc8f1a,
+        0x0d4c539,0x2740f96,0x00dc2d4 },
+      { 0x1d6f501,0x0adf4ea,0x14f7215,0x0930102,0x3f4c32e,0x24e2643,
+        0x366596d,0x081ff18,0x38f94fb,0x2c21341,0x328594c,0x267c75c,
+        0x196b3fd,0x29932cb,0x0036def } },
+    /* 179 */
+    { { 0x3ed7cbe,0x26de044,0x3d0e461,0x0565e12,0x295e500,0x31dc17f,
+        0x32251c2,0x3420ca8,0x3995f0d,0x2e8ddab,0x0361a45,0x10971b0,
+        0x11e7b55,0x33bc7ca,0x00812d2 },
+      { 0x3d94972,0x1606817,0x0383ccf,0x0e795b7,0x026e20e,0x0f6fefc,
+        0x13685d6,0x315d402,0x0cc36b8,0x1c7f059,0x390ef5e,0x316ae04,
+        0x08c66b9,0x2fac9a4,0x0040086 } },
+    /* 180 */
+    { { 0x3e3c115,0x153de4d,0x1a8ae5e,0x2330511,0x169b8ee,0x1d965c2,
+        0x2edff2b,0x3ef99e6,0x1631b46,0x1f8a238,0x118d7bb,0x12113c3,
+        0x26424db,0x0f4122a,0x00e0ea2 },
+      { 0x3d80a73,0x30393bc,0x0f98714,0x278ef59,0x087a0aa,0x3b18c20,
+        0x04b8a82,0x2068e21,0x030255d,0x3382b27,0x055397f,0x05448dd,
+        0x2015586,0x1190be0,0x000b979 } },
+    /* 181 */
+    { { 0x2e03080,0x2895692,0x09fb127,0x2d1602a,0x1232306,0x105bd4e,
+        0x28cd6a6,0x0a83813,0x1ee13b0,0x2abadc3,0x0c09684,0x00e33e1,
+        0x033eea3,0x30f0a39,0x00a710e },
+      { 0x01b1f7d,0x1c959da,0x017077a,0x254bf0a,0x086fbce,0x15cd6b2,
+        0x008683f,0x23a4f4d,0x22a6bd6,0x14e8c93,0x0027d15,0x31d0d4f,
+        0x271777e,0x1533510,0x00ab603 } },
+    /* 182 */
+    { { 0x34c209d,0x14d0abb,0x270432a,0x1d02358,0x22ba752,0x209757f,
+        0x34af6fc,0x1ffc52e,0x1ced28e,0x1870e46,0x1e0340f,0x3f0bf73,
+        0x33ba91d,0x2ebca7c,0x00c6580 },
+      { 0x1d442cb,0x0879d50,0x24e4ae1,0x3f4e91c,0x04c7727,0x093cd1d,
+        0x16d6a45,0x10a8b95,0x0c77856,0x361f84f,0x217845f,0x0bbeec6,
+        0x0485718,0x33c5385,0x00dcec0 } },
+    /* 183 */
+    { { 0x1539819,0x225507a,0x1bf11cb,0x13e7653,0x0c8cb3b,0x05f695e,
+        0x353f634,0x2827874,0x3fb8053,0x22de9a5,0x035d8b7,0x2105cc7,
+        0x2a7a98d,0x35bed95,0x0085748 },
+      { 0x1859c5d,0x00e51f0,0x22a21fd,0x3054d74,0x06ce965,0x328eab7,
+        0x26a13e0,0x13bfc65,0x01d4fb1,0x36600b9,0x36dd3fc,0x01232ed,
+        0x15bbaa9,0x0ad7a51,0x0089b18 } },
+    /* 184 */
+    { { 0x3360710,0x1eb5a90,0x136bd77,0x3bd57a6,0x0841287,0x12886c9,
+        0x35c6700,0x21bc6eb,0x25f35ad,0x3bcb01c,0x0707e72,0x23e9943,
+        0x03e5233,0x34bb622,0x002bf8e },
+      { 0x16e0d6a,0x04b3d2d,0x290cb02,0x049a10c,0x350537e,0x22cf71b,
+        0x3184a19,0x2dc8b62,0x2350210,0x3b4afa6,0x159781e,0x1d01b6d,
+        0x1853440,0x16442f0,0x005a78d } },
+    /* 185 */
+    { { 0x348b02c,0x1ea8ab5,0x3b954d5,0x14684ac,0x0be5b34,0x11c4496,
+        0x0a7a456,0x14f6eb7,0x11a3221,0x2d65f82,0x32eb1ea,0x09c4018,
+        0x3f301f3,0x32e8a1c,0x00bd9ad },
+      { 0x0543f7f,0x31e744e,0x1fefd1d,0x24a486c,0x1000220,0x3977e3b,
+        0x1b3ef51,0x2512a1b,0x2049e6b,0x122232b,0x391a32b,0x2f4a7b1,
+        0x1c13e71,0x081a9b4,0x00d3516 } },
+    /* 186 */
+    { { 0x1924f43,0x1ae5495,0x28d52ef,0x2b93e77,0x2d2f401,0x371a010,
+        0x33e8d7a,0x06ed3f1,0x30c0d9d,0x2589fa9,0x3bf3567,0x2ecf8fa,
+        0x2dee4c3,0x152b620,0x007e8a2 },
+      { 0x1924407,0x01bd42d,0x044a089,0x18686b5,0x2f14a0e,0x17cdce3,
+        0x0efa216,0x3c586a8,0x1d6ae71,0x375831f,0x3175894,0x20e43eb,
+        0x34c009e,0x3480527,0x00d115c } },
+    /* 187 */
+    { { 0x12abf77,0x38b0769,0x25682f2,0x295508c,0x0c2a0dc,0x1259b73,
+        0x023ea25,0x340e7b5,0x3c7cd0d,0x1f92324,0x176405c,0x1528894,
+        0x18f2e1e,0x2c59c35,0x001efb5 },
+      { 0x0fb1471,0x07e7665,0x141da75,0x07d9f4a,0x0fdb31e,0x0dccda6,
+        0x074eb25,0x3d92a9b,0x11189a0,0x1b4c557,0x24b8d2b,0x0533f92,
+        0x0e9e344,0x2fa3dea,0x008d5a4 } },
+    /* 188 */
+    { { 0x2669e98,0x1ad3514,0x2a035c9,0x08a3f50,0x24547f9,0x0a145d3,
+        0x1c1319d,0x3fe833d,0x1ae064b,0x1e01734,0x246d27e,0x3a2f13c,
+        0x01e1150,0x263f55e,0x00f89ef },
+      { 0x2e0b63f,0x3e57db7,0x23a4b4f,0x11c8899,0x0ad8500,0x348f3a7,
+        0x2918604,0x27d6409,0x1ce5001,0x38f94c2,0x29a508a,0x39bdc89,
+        0x3a52c27,0x194899e,0x00e9376 } },
+    /* 189 */
+    { { 0x0368708,0x34a2730,0x2e1da04,0x0bd78c1,0x2c45887,0x0c44bfa,
+        0x3a23de3,0x390b9db,0x1746efd,0x05c638e,0x1d20609,0x3263370,
+        0x31987f0,0x2988529,0x005fa3c },
+      { 0x0aa9f2a,0x20622f7,0x060deee,0x0c9626a,0x3312cc7,0x18ebac7,
+        0x008dd6c,0x0ad4fe6,0x3db4ea6,0x1dc3f50,0x090b6e9,0x0aff8d2,
+        0x26aa62c,0x18f3e90,0x00105f8 } },
+    /* 190 */
+    { { 0x38059ad,0x25e576c,0x3ea00b2,0x1fa4191,0x25686b7,0x2d1ce8f,
+        0x30470ed,0x3478bbf,0x340f9b6,0x1c9e348,0x3d594ec,0x2ffe56e,
+        0x3f23deb,0x0cd34e9,0x00f4b72 },
+      { 0x1a83f0b,0x2166029,0x28b32a2,0x06a5c5a,0x20786c4,0x0944604,
+        0x0901bd2,0x379b84e,0x221e2fe,0x0346d54,0x1f4eb59,0x01b8993,
+        0x2462e08,0x25f9d8b,0x006c4c8 } },
+    /* 191 */
+    { { 0x0b41d9d,0x2e417ed,0x265bd10,0x199148e,0x3826ca4,0x1a67e8d,
+        0x1bbd13b,0x23e414d,0x3d773bc,0x356e64c,0x0d2118a,0x0cb587f,
+        0x25fd093,0x24fb529,0x00158c6 },
+      { 0x2806e63,0x3ecaa39,0x251b4dd,0x3b2d779,0x2e31ed3,0x066f1a6,
+        0x060e518,0x2c7e3e5,0x0d62c76,0x0d88a70,0x101970a,0x1e3c8c6,
+        0x272b8bb,0x083e73b,0x0031f38 } },
+    /* 192 */
+    { { 0x09e1c72,0x072bcb0,0x0cf4e93,0x2604a64,0x00715f2,0x10c98b6,
+        0x2ad81d9,0x234fcce,0x37a7304,0x1974a4a,0x1c7415f,0x14aaa93,
+        0x19587b1,0x3f643f4,0x00c3d10 },
+      { 0x1ddadd0,0x2cd715d,0x294cf76,0x14479ed,0x19f5f4a,0x0198c09,
+        0x1ab7ebc,0x182c0bc,0x0879202,0x1807273,0x05d39da,0x2c7d868,
+        0x29c4ec4,0x1b13ad2,0x006dcd7 } },
+    /* 193 */
+    { { 0x1c83f01,0x0245bff,0x24f90ba,0x112554f,0x2354c8b,0x3f17988,
+        0x0c511af,0x39e1e9b,0x26ae95b,0x0ae551c,0x35b41a6,0x0120455,
+        0x1e989cb,0x1b37aff,0x00fa2ae },
+      { 0x324659a,0x1aef1c3,0x1c43637,0x3f530a2,0x313a999,0x326af62,
+        0x134184e,0x2ac131c,0x3f6a789,0x30a300a,0x13e526e,0x2107af3,
+        0x093a8ff,0x2479902,0x00442b1 } },
+    /* 194 */
+    { { 0x22b6e20,0x31b18be,0x18614ca,0x26fdb5a,0x197f29e,0x325b44b,
+        0x0ab1dbb,0x042348a,0x3275e8e,0x15bae44,0x0077124,0x2cf5345,
+        0x2803ad4,0x188f2a2,0x0061b20 },
+      { 0x2a560b1,0x3ced069,0x3cf42c2,0x100e167,0x3879e1d,0x0936ff0,
+        0x1b51450,0x14c55f3,0x3153bfa,0x2957423,0x2a93823,0x15f5dce,
+        0x2c9a22f,0x16731a8,0x00a97f2 } },
+    /* 195 */
+    { { 0x18edbbb,0x18c5ef9,0x1f13c30,0x071e77f,0x225ade5,0x1b60f75,
+        0x1beaf11,0x3e495ad,0x2441dd8,0x2fa00e2,0x32a87b6,0x00050f2,
+        0x038de7f,0x0037d6d,0x00a885d },
+      { 0x39e48bd,0x1d9e433,0x2768e9f,0x3c29458,0x3f0bdf9,0x35ed5f2,
+        0x36709fa,0x176dc10,0x012f7c1,0x2df8547,0x1d90ee3,0x053c089,
+        0x21a8d35,0x200cb0d,0x002e84e } },
+    /* 196 */
+    { { 0x23ec8d8,0x1d81f55,0x0cb7227,0x07f8e4d,0x2a66181,0x163f577,
+        0x272e7af,0x131a8f2,0x2046229,0x25e6276,0x36bbefe,0x2cdc22f,
+        0x17c8288,0x33dd4fb,0x000d524 },
+      { 0x330c073,0x1a6728b,0x1cf369f,0x12e7707,0x2f0fa26,0x17c2abd,
+        0x0a45680,0x26ebd13,0x3c7d19b,0x1c3d6c8,0x2abd110,0x064fd07,
+        0x09b8339,0x02b4a9f,0x009e3e1 } },
+    /* 197 */
+    { { 0x0ae972f,0x2093c35,0x06e7a90,0x0af1ba1,0x243eef0,0x2748582,
+        0x0606122,0x13a45f9,0x0acfe60,0x08a685e,0x0eb184b,0x015bc11,
+        0x0cdf423,0x157fad5,0x004fcad },
+      { 0x2728d15,0x3e5bceb,0x0331a0f,0x31b1a80,0x28a2680,0x3b94955,
+        0x04cae07,0x176b57e,0x03ac5a6,0x3d7918b,0x22d23f4,0x0ae077f,
+        0x1eb075d,0x006f16c,0x006e473 } },
+    /* 198 */
+    { { 0x38219b9,0x0475a2b,0x107a774,0x39946c6,0x1cb883c,0x004e0ed,
+        0x087e571,0x25c3497,0x059982f,0x0a71f66,0x118305d,0x1aaf294,
+        0x3a5dbaa,0x34be404,0x00725fe },
+      { 0x3abd109,0x336ebea,0x2528487,0x15a1d61,0x0c0f8cf,0x2b56095,
+        0x2591e68,0x3549a80,0x1d1debb,0x0701c6c,0x161e7e3,0x1f7fa2e,
+        0x3dfe192,0x17e6498,0x0055f89 } },
+    /* 199 */
+    { { 0x175645b,0x26c036c,0x0b92f89,0x09ed96d,0x351f3a6,0x19ce67b,
+        0x33ac8db,0x2f0828b,0x27fe400,0x0b9c5e1,0x1967b95,0x3324080,
+        0x11de142,0x1d44fb3,0x003d596 },
+      { 0x3979775,0x3af37b6,0x3e88d41,0x2f1a8b9,0x299ba61,0x085413c,
+        0x1149a53,0x0beb40e,0x31427ba,0x239f708,0x357d836,0x1558c22,
+        0x280a79f,0x1b255f6,0x002b6d1 } },
+    /* 200 */
+    { { 0x39ad982,0x3d79d89,0x01a684a,0x0b6722e,0x39bb4c9,0x39a6399,
+        0x1ad44e0,0x3059f5e,0x048265f,0x33a2fa4,0x0c3a4cc,0x0d7df98,
+        0x23a33f1,0x34e2e21,0x00a0a10 },
+      { 0x386efd9,0x1c91f34,0x06c2e19,0x3e6d48d,0x00eefd3,0x2181ef2,
+        0x2415f97,0x1d33b08,0x0625086,0x1e8aa3e,0x08c9d60,0x0ab427b,
+        0x2764fa7,0x3b7943e,0x00cd9f0 } },
+    /* 201 */
+    { { 0x1a46d4d,0x0e471f4,0x1693063,0x0467ac0,0x22df51c,0x127a0f7,
+        0x0498008,0x20e0b16,0x1aa8ad0,0x1923f42,0x2a74273,0x01761ce,
+        0x1600ca4,0x187b87e,0x00ee49e },
+      { 0x0c76f73,0x19daf92,0x0b2ad76,0x3d8049d,0x1d9c100,0x0fe1c63,
+        0x0bb67c8,0x035cc44,0x02002fc,0x37b2169,0x344656a,0x1127879,
+        0x1939bc0,0x0dd8df6,0x0028ce7 } },
+    /* 202 */
+    { { 0x0544ac7,0x26bdc91,0x042697e,0x356e804,0x1f2c658,0x2ceb7ef,
+        0x2dec39f,0x02c1dcc,0x391a2df,0x2344beb,0x2171e20,0x3099c94,
+        0x0fa548a,0x37216c9,0x00f820c },
+      { 0x0f4cf77,0x29bbaa5,0x33c6307,0x34a5128,0x118c783,0x2dd06b1,
+        0x139d4c0,0x2db912e,0x1153ffb,0x1075eb3,0x3a255e4,0x2892161,
+        0x36d5006,0x125338c,0x0014fbc } },
+    /* 203 */
+    { { 0x1584e3c,0x0830314,0x00279b9,0x167df95,0x2c7733c,0x2108aef,
+        0x0ce1398,0x35aaf89,0x012523b,0x3c46b6a,0x388e6de,0x01a2002,
+        0x0582dde,0x19c7fa3,0x007b872 },
+      { 0x1e53510,0x11bca1f,0x19684e7,0x267de5c,0x2492f8b,0x364a2b0,
+        0x080bc77,0x2c6d47b,0x248432e,0x3ace44f,0x32028f6,0x0212198,
+        0x2f38bad,0x20d63f0,0x00122bb } },
+    /* 204 */
+    { { 0x30b29c3,0x3cec78e,0x01510a9,0x0c93e91,0x3837b64,0x1eca3a9,
+        0x105c921,0x05d42e6,0x1379845,0x07ce6f2,0x0e8b6da,0x0e0f093,
+        0x220b2cd,0x1f6c041,0x00299f5 },
+      { 0x0afdce3,0x2b0e596,0x2f477b6,0x2ccf417,0x3a15206,0x26ec0bf,
+        0x2e37e2b,0x2593282,0x0ab9db3,0x2841dd8,0x27954be,0x277a681,
+        0x03f82e2,0x2b610c7,0x00446a1 } },
+    /* 205 */
+    { { 0x06b8195,0x3b3a817,0x31b9c6f,0x317d279,0x3d744a7,0x1de9eb9,
+        0x296acc1,0x1ce9ea3,0x06c3587,0x246815d,0x3756736,0x0588518,
+        0x1c971a4,0x1fde1f4,0x00aa021 },
+      { 0x3fd3226,0x274561d,0x00be61e,0x01393d8,0x30f6f23,0x29b7fc1,
+        0x04cebc7,0x0a892a7,0x20109f1,0x27456be,0x0c863ee,0x2eb6c8a,
+        0x38c782b,0x039397a,0x00a2829 } },
+    /* 206 */
+    { { 0x29de330,0x21fe80f,0x145b55b,0x1986570,0x012b260,0x2482fbc,
+        0x0536e0a,0x16b7382,0x32c4d19,0x1deffdb,0x145f418,0x0c67a76,
+        0x2ce477f,0x218fe24,0x00f9848 },
+      { 0x3e37657,0x3f074d3,0x245ad0e,0x20973c3,0x23c58de,0x2c332ef,
+        0x2ad21a8,0x0bf1589,0x208af95,0x1f4a8c4,0x2b43735,0x1e46657,
+        0x15d4f81,0x0c3e63a,0x005f19d } },
+    /* 207 */
+    { { 0x26865bb,0x20f6683,0x16a672e,0x0efd8d1,0x222f5af,0x18f2367,
+        0x1e9c734,0x25c3902,0x178dfe6,0x2903a79,0x311b91c,0x1adbbe9,
+        0x225a387,0x0b3e509,0x0089551 },
+      { 0x34e462b,0x23b6a32,0x27c884c,0x129104b,0x384c015,0x3adedc7,
+        0x325db1c,0x021dc10,0x1e366f7,0x3054df7,0x1992b9a,0x2824e64,
+        0x0ae77f3,0x181b526,0x00a7316 } },
+    /* 208 */
+    { { 0x2d260f5,0x2434bf2,0x28c0139,0x0a7bb03,0x176c3be,0x3def5f5,
+        0x05bee00,0x3692df7,0x3d2efeb,0x3a6f859,0x1122b87,0x38f779a,
+        0x1415ccc,0x2c260ad,0x0075a28 },
+      { 0x04607a6,0x042f37a,0x3f0df68,0x0a1bd36,0x3c6d581,0x2d36bfa,
+        0x2d577d1,0x0a3affa,0x0b2066b,0x2e6f110,0x0b17e84,0x3c76a5e,
+        0x1a57553,0x012f36a,0x0004595 } },
+    /* 209 */
+    { { 0x29e5836,0x0e6808c,0x269d13e,0x147dc5c,0x32c9e7d,0x09b258e,
+        0x2c58d6f,0x1efd716,0x0437996,0x34ec31b,0x15908d9,0x2efa8fd,
+        0x09ad160,0x079fc1f,0x00d8481 },
+      { 0x3d20e4a,0x18269d6,0x3aa8fe7,0x34829c2,0x2e4325d,0x0d800e1,
+        0x11f370b,0x10c08dc,0x22fd092,0x1a5fe55,0x0acc443,0x037030d,
+        0x1cdd404,0x097379e,0x00fd6d7 } },
+    /* 210 */
+    { { 0x313eafb,0x3f438f3,0x2e5fb3e,0x2ed6a82,0x121009c,0x240889e,
+        0x00c5537,0x269b792,0x334b2fc,0x1dd573c,0x07096ae,0x19296fc,
+        0x3813985,0x2742f48,0x00ddd64 },
+      { 0x2045041,0x3842c62,0x1572d0d,0x04f255f,0x06e05b4,0x383ec97,
+        0x1ff8064,0x18bed71,0x39b6411,0x2764cc5,0x257439f,0x3521217,
+        0x172aa42,0x342a2a3,0x0070c5b } },
+    /* 211 */
+    { { 0x3bdf646,0x1c5ce25,0x1f7ca76,0x2d2acca,0x3aa1485,0x23c97f7,
+        0x3e11d6f,0x0609338,0x07ec622,0x01da8ff,0x3392474,0x17ca07f,
+        0x13a9a04,0x353a5b4,0x0024557 },
+      { 0x14c27cd,0x32012f7,0x3fea875,0x3d03d71,0x211c5f0,0x3157fdf,
+        0x0c880bd,0x3c406b2,0x2c51103,0x24ab377,0x399faa8,0x0d06887,
+        0x16b5738,0x28b33a7,0x00c7b67 } },
+    /* 212 */
+    { { 0x2357586,0x35c93e3,0x0da09a0,0x3d77d92,0x11d7f4f,0x37b98a9,
+        0x3e6c9bf,0x2cdca70,0x2f00389,0x2412673,0x18eab87,0x0101436,
+        0x11617e9,0x06d9b01,0x00e8eef },
+      { 0x37e3ca9,0x16ffaf0,0x391debf,0x1b69382,0x07c5e94,0x312fa8a,
+        0x0973142,0x2cadde4,0x109ee67,0x3a07db0,0x1afc5ed,0x08df66f,
+        0x304c7af,0x0804aae,0x00d2e60 } },
+    /* 213 */
+    { { 0x24f57bf,0x1818322,0x182a615,0x25bfc44,0x0f97586,0x0a5bbc0,
+        0x36773c6,0x1a2660c,0x3ceff66,0x3270152,0x319cd11,0x2845845,
+        0x1acfad6,0x19076f8,0x009824a },
+      { 0x289fd01,0x2de97ee,0x39d80b7,0x026227d,0x0f8d3b8,0x15e0a17,
+        0x21ea08f,0x20a2317,0x136ae6d,0x3deb1d1,0x3521ef5,0x0de8801,
+        0x0a25d5d,0x0612c98,0x005ecc4 } },
+    /* 214 */
+    { { 0x308c8d3,0x3aec669,0x01ecddc,0x13f18fe,0x1e63ed0,0x061cfe5,
+        0x05f5a01,0x1db5741,0x14479f2,0x0ced6b5,0x025ae5b,0x09ca8f5,
+        0x2160581,0x1404433,0x008bfeb },
+      { 0x08228bf,0x0e02722,0x37df423,0x33ecabf,0x34bd82a,0x32f529f,
+        0x28f1800,0x0c8f671,0x1246b44,0x1ff35dc,0x091db95,0x303f3da,
+        0x28f7f60,0x3624136,0x00cfbb4 } },
+    /* 215 */
+    { { 0x326139a,0x2977e4e,0x3eb89a6,0x20ecb31,0x13e076a,0x2a592f3,
+        0x28e82d5,0x235ad1e,0x239b927,0x262938a,0x2444354,0x141b263,
+        0x0d56693,0x2a3fc78,0x0006497 },
+      { 0x31efa05,0x3a3664a,0x3e333de,0x2a114e4,0x12da63c,0x3c15e6b,
+        0x2f7277c,0x363aa92,0x2393236,0x16bd2d1,0x32b617f,0x32b656c,
+        0x3b1246c,0x22e2e22,0x00ce76d } },
+    /* 216 */
+    { { 0x03843dc,0x094de82,0x13b463d,0x0507905,0x089eb35,0x2a6bf25,
+        0x35ebc4e,0x2bb5d45,0x1808ed1,0x1de9949,0x185e829,0x0a55847,
+        0x0b73d67,0x1a2ed61,0x008dd2d },
+      { 0x133c3a4,0x04e7980,0x38ea237,0x2ad2f49,0x19de838,0x018bf36,
+        0x29b072c,0x21c1ba0,0x14f63ba,0x31c1cc3,0x13cd05e,0x20120ff,
+        0x1f84d60,0x16e0321,0x00872ab } },
+    /* 217 */
+    { { 0x19d4d49,0x1ddb4e6,0x05e7fc0,0x37bb0fd,0x1a3eb59,0x36b87f0,
+        0x190e440,0x1c7fef2,0x31ea153,0x14cd65a,0x1bc7ab2,0x11f72ca,
+        0x39582d4,0x0fa4d65,0x00cd5b6 },
+      { 0x3d1ff11,0x0d9be9d,0x2903ae3,0x017b7b9,0x259f28f,0x110cefc,
+        0x03fed1a,0x38039bd,0x09bdf9c,0x3055027,0x2ca9c5d,0x2d737b6,
+        0x3bdb421,0x16560b5,0x00f9f33 } },
+    /* 218 */
+    { { 0x022c792,0x110de25,0x38bf959,0x08f2562,0x1239ea9,0x3c1d950,
+        0x21a247d,0x315112d,0x285bb9f,0x2534a73,0x0b42455,0x1a4a99c,
+        0x069009a,0x1680392,0x006e0ca },
+      { 0x1b3bece,0x269e0a1,0x18926b7,0x0e7187e,0x241f35e,0x39d1fe0,
+        0x02099aa,0x1675bfe,0x23fd0ca,0x3d6322b,0x19406b5,0x324c38a,
+        0x242434a,0x3ae677c,0x002ce04 } },
+    /* 219 */
+    { { 0x2c37b82,0x1ae6506,0x0d83436,0x23496c1,0x0ff0c72,0x2711edf,
+        0x1513611,0x04f9c7d,0x1edbeff,0x376fcb5,0x212a683,0x23bf547,
+        0x0f9c4f7,0x16e6627,0x0082cd8 },
+      { 0x0cb5d37,0x31b6db8,0x1a15e23,0x2f5cbb8,0x0818aee,0x21dc6c5,
+        0x12aafd2,0x205f608,0x1d91def,0x3def088,0x1445c51,0x3100e8a,
+        0x3746bda,0x145c4b0,0x00711b0 } },
+    /* 220 */
+    { { 0x2a99ecc,0x27b5217,0x35e10ed,0x036e32a,0x0f79950,0x15c32f7,
+        0x2c87dcb,0x3ebb2a3,0x2c2d35d,0x114b3ec,0x2e4d80a,0x0c7eb89,
+        0x2abe58d,0x3727737,0x00e6a37 },
+      { 0x1eca452,0x1968d07,0x344e5d3,0x29435a2,0x109a5f8,0x181d12c,
+        0x238ea5a,0x127a564,0x00dbb42,0x0fcbfb7,0x2909b2e,0x2571d3a,
+        0x08250e3,0x0694e4e,0x00e156d } },
+    /* 221 */
+    { { 0x3181ae9,0x1acf411,0x3808d79,0x2a11065,0x0baf44b,0x133cfeb,
+        0x1330943,0x1711b9a,0x2dec3bd,0x1906a9a,0x2ed947c,0x369d763,
+        0x1a5254f,0x104a7a9,0x00acd9d },
+      { 0x030301b,0x31568f5,0x2a4965c,0x33ded4b,0x03c9a5b,0x16541fc,
+        0x1319cf1,0x2a3748b,0x1b5de74,0x18bb82e,0x077ac2b,0x309a87a,
+        0x3c31420,0x0f6a4b9,0x00387d7 } },
+    /* 222 */
+    { { 0x0d3fdac,0x120cfa3,0x1b8e13c,0x1ccccb9,0x376fcd4,0x0bf87f4,
+        0x271b4be,0x363b3fd,0x28b5d98,0x0535cd3,0x114bbc1,0x3ab4f19,
+        0x10494b1,0x2161ece,0x00d14ca },
+      { 0x12d37e9,0x110ebd7,0x062295a,0x1cc0119,0x073c6ea,0x15d5411,
+        0x0aeb4b1,0x23fba91,0x175fab5,0x3ee8fe1,0x1c680a6,0x1e76f27,
+        0x3ddfc97,0x3d69ecd,0x00e1ee5 } },
+    /* 223 */
+    { { 0x2d29f46,0x2d19204,0x3137cd0,0x02c3b54,0x193295b,0x02fbdb2,
+        0x2260948,0x22c02ff,0x3885424,0x1299595,0x00e7f9c,0x310ff2a,
+        0x01ea169,0x0deef85,0x0021908 },
+      { 0x1b26cfb,0x38566a8,0x2852875,0x21debff,0x290ca9f,0x0b29663,
+        0x26550d9,0x2b44457,0x05d1938,0x1f8f825,0x366ef93,0x1d8daec,
+        0x069e5ef,0x342ece6,0x00b6034 } },
+    /* 224 */
+    { { 0x2d8356e,0x1578c09,0x226f4d2,0x3b74c51,0x0f83666,0x0323b59,
+        0x1ddf61d,0x1ed8508,0x3c52667,0x0e5b91c,0x1e9b18b,0x352bdfa,
+        0x13f75da,0x352aa4e,0x00fceff },
+      { 0x1c731d5,0x04e2844,0x01d9843,0x286cbc5,0x105bcb3,0x05edd9c,
+        0x21fa956,0x3b1ec83,0x01288cc,0x22fbf3a,0x10f1b56,0x081cf72,
+        0x15cb758,0x18687c1,0x00f5722 } },
+    /* 225 */
+    { { 0x2973088,0x1209dcd,0x3980f31,0x0221aa7,0x1c008e7,0x011b098,
+        0x395947e,0x2f2806d,0x27dca76,0x037c79a,0x31acddf,0x2bf6219,
+        0x0d8f4ab,0x13644d9,0x00ff705 },
+      { 0x2260594,0x18d51f8,0x277e2cf,0x1cb5cec,0x2468a53,0x3e6f4d7,
+        0x019e24e,0x0f30f1d,0x0202404,0x34ad287,0x090b39c,0x23c11ea,
+        0x1a2e3a2,0x3a851be,0x00dca2c } },
+    /* 226 */
+    { { 0x3277538,0x221cd94,0x3738ab7,0x0973da5,0x1a734e2,0x2c8b8b0,
+        0x2e1d1e6,0x348499b,0x389ebe1,0x18b1854,0x02bb076,0x1b2b500,
+        0x0f207f3,0x170cf99,0x0012088 },
+      { 0x0fbfec2,0x1df55a4,0x34ae59e,0x2ab5e95,0x3f9e781,0x3411794,
+        0x1410b05,0x17c3a00,0x0aaa91b,0x074ed7c,0x3fbb352,0x3477c01,
+        0x3ee9ab3,0x0cfb1ca,0x0011c4b } },
+    /* 227 */
+    { { 0x3c3a7f3,0x2e60ca0,0x2354d32,0x33e2362,0x28083ab,0x03d3b16,
+        0x3164045,0x0a41f7a,0x3f0641e,0x38635d1,0x31bbf03,0x225e2bb,
+        0x0cd894e,0x1f72228,0x0093244 },
+      { 0x33d5897,0x383faf3,0x0e6d561,0x0bc4d80,0x3fc3a68,0x05a9adc,
+        0x0b9d73d,0x3d6031e,0x2ded29b,0x339c4ff,0x08d69e5,0x089488c,
+        0x3fda40a,0x295c7fd,0x003a924 } },
+    /* 228 */
+    { { 0x0093bee,0x115532d,0x2ec0fb6,0x0969631,0x3a6d65a,0x0f43b4d,
+        0x26994d4,0x0b51104,0x2515515,0x3695a26,0x284caa8,0x397aa30,
+        0x25538b8,0x353f47c,0x0033f05 },
+      { 0x3615d6e,0x37f8246,0x07dae0f,0x23dc154,0x02ded7e,0x1eef320,
+        0x1631e51,0x3447f75,0x13e267f,0x353e1d1,0x3f89d62,0x369c8ff,
+        0x1a21dc6,0x2b8b8f3,0x0055cbc } },
+    /* 229 */
+    { { 0x34e84f3,0x2f2539a,0x2c35336,0x0c53bdc,0x1728630,0x3ad5fe6,
+        0x05fdeee,0x3386db6,0x272a42e,0x29fd38c,0x36f0320,0x21b2ed4,
+        0x331e67f,0x28ae48c,0x00f09b6 },
+      { 0x2778435,0x0fb3c55,0x32d221d,0x2660c8e,0x32977ba,0x1c12f03,
+        0x1b57fb1,0x01229a8,0x38b389f,0x375ddf3,0x2c6b42c,0x3885d3e,
+        0x2c55a9c,0x2ffc279,0x00404e2 } },
+    /* 230 */
+    { { 0x04c5ddb,0x2c4d788,0x150e9b9,0x110fbfd,0x29dbfe0,0x30ef83d,
+        0x2ab4bfe,0x395bcd7,0x30d0a43,0x0e2d30f,0x0e73f9b,0x07199cc,
+        0x0c9054c,0x22f4b1e,0x0092ed3 },
+      { 0x386e27c,0x00fdaa8,0x0507c70,0x1beb3b6,0x0b9c4f4,0x277d519,
+        0x024ec85,0x1cbaba8,0x1524295,0x112be58,0x21fc119,0x273578b,
+        0x2358c27,0x280ca07,0x00aa376 } },
+    /* 231 */
+    { { 0x0dbc95c,0x16488cf,0x337a078,0x1abbcb8,0x0aae1aa,0x1caa151,
+        0x00108d4,0x1edf701,0x3e68d03,0x1203214,0x0c7eee2,0x084c572,
+        0x07752d2,0x215a3b9,0x00195d3 },
+      { 0x2cd7fbe,0x06e80f6,0x052bd4b,0x07b4f83,0x24b5ac6,0x2aaded4,
+        0x13c0526,0x0ffa9a3,0x08c660e,0x13c35c9,0x3145efb,0x36cfe24,
+        0x0936daf,0x268e3d0,0x00a73fd } },
+    /* 232 */
+    { { 0x31b17ce,0x2e7bcee,0x3f31891,0x19f1849,0x1140236,0x015487f,
+        0x32e58d3,0x202204a,0x049e350,0x1ce91f9,0x3f75150,0x27f212f,
+        0x0d16ee4,0x1c894c4,0x004023f },
+      { 0x33399fa,0x2397b6d,0x2a3ea60,0x36354ca,0x1f12632,0x117a105,
+        0x22758e8,0x361844e,0x3851fc2,0x0ab92db,0x339d02f,0x1e7d6c4,
+        0x19ebd38,0x0a9a036,0x00446d2 } },
+    /* 233 */
+    { { 0x3e164f1,0x008c092,0x19200f5,0x35a22e0,0x38d09d2,0x212b3bf,
+        0x0056f19,0x3a03545,0x1f075e9,0x0e97137,0x1f496a9,0x32d1f9b,
+        0x36bf738,0x35ace37,0x00899e1 },
+      { 0x19eb2a6,0x21fa22d,0x338b69e,0x18e6d1f,0x1280d9d,0x1953a55,
+        0x1411ea3,0x2960566,0x0fd969a,0x1f3e375,0x130742a,0x170aebd,
+        0x33085ff,0x14d868d,0x00a4391 } },
+    /* 234 */
+    { { 0x0a4bdd2,0x39ca8ea,0x37026ac,0x346da3b,0x0c656cd,0x03136b6,
+        0x233e7e9,0x0714352,0x08a9d95,0x192bb38,0x085d68e,0x20016b8,
+        0x102b8ea,0x1f5dbdd,0x00fdd7a },
+      { 0x0d6fa45,0x3ec29a6,0x2b8cce6,0x1c84413,0x0228f86,0x28275f7,
+        0x3d8787d,0x0c19748,0x28b2ae9,0x1954850,0x2a56c36,0x3eae8f7,
+        0x0aca595,0x00e42a2,0x00edbe5 } },
+    /* 235 */
+    { { 0x3b26c82,0x3682b6f,0x2f9cd64,0x0f254b0,0x0e5d70b,0x1f9dfda,
+        0x28f365f,0x35a57d7,0x00208f2,0x19c8d38,0x112e7be,0x3e403bb,
+        0x3734efa,0x24d12b3,0x0027dc6 },
+      { 0x260a46a,0x13fd7b0,0x1c2880e,0x338b70c,0x27da5eb,0x29a7d54,
+        0x1c5d73c,0x2130921,0x32969cc,0x2b37eda,0x2d6d4ec,0x0716bfb,
+        0x0763703,0x1320889,0x00c7bbf } },
+    /* 236 */
+    { { 0x1fe01b2,0x2dcb1d2,0x11b89d5,0x219e4ea,0x0347851,0x3d1810e,
+        0x3a3c54c,0x06dbe8e,0x03d3ab2,0x2dcfa39,0x3e57b8a,0x337a382,
+        0x0426450,0x0e9f748,0x006488b },
+      { 0x1dc4582,0x0e62cf7,0x06fea9e,0x2a56fb1,0x31698c1,0x15b4e10,
+        0x1446ef1,0x0a689fc,0x1d87703,0x20ff497,0x2c71066,0x2c48868,
+        0x2e6cf05,0x30aa9cb,0x0065b2d } },
+    /* 237 */
+    { { 0x1021d63,0x2217df3,0x1f0821a,0x057fa98,0x23f344b,0x173dcf9,
+        0x1ba6ddc,0x22c8eb5,0x18f227a,0x0455343,0x1c55931,0x1d0dcf3,
+        0x20fa19b,0x1c56618,0x004feab },
+      { 0x19ec924,0x224e39f,0x2550509,0x179b51f,0x284d54a,0x2d85d41,
+        0x2d1bdc1,0x1a29068,0x3826158,0x1267f85,0x3005a92,0x0769e00,
+        0x379b617,0x17b5f63,0x00a70bf } },
+    /* 238 */
+    { { 0x22216c5,0x049437f,0x33510bc,0x141d806,0x22c37e2,0x1bc1adf,
+        0x300175d,0x2e6ded8,0x0a18bfe,0x35377a3,0x382f843,0x08410ca,
+        0x00afd4f,0x0be6c6b,0x008d70e },
+      { 0x2e91abb,0x1cede2a,0x28f225c,0x28e18c0,0x30230dc,0x173cc2d,
+        0x123ecfe,0x3c9962e,0x2c25506,0x27b5d53,0x329a5e3,0x106e231,
+        0x3889b8e,0x3b0aeaf,0x00ee67c } },
+    /* 239 */
+    { { 0x3e46c65,0x0eb3d46,0x1d7ae18,0x23f9d59,0x2978953,0x2589ed3,
+        0x073391d,0x2461e1e,0x0c19f1d,0x22fd2b1,0x0691f5c,0x2e67d8d,
+        0x1fb985d,0x200dd28,0x00a68df },
+      { 0x392b5fa,0x123b46f,0x1c323c4,0x104f82f,0x0a098c8,0x26fc05b,
+        0x34cd557,0x0913639,0x09c115e,0x3977c34,0x3410b66,0x062b404,
+        0x0213094,0x132c5e8,0x008b612 } },
+    /* 240 */
+    { { 0x26e3392,0x3b0ebf0,0x2e00425,0x1c285c8,0x3c07f84,0x08d5ad0,
+        0x028190e,0x1669b73,0x1ffb1ef,0x053b65f,0x063028c,0x0aceb47,
+        0x18988c2,0x0f09a30,0x0007072 },
+      { 0x0f49e7d,0x28c0bd3,0x252270d,0x24cfc4a,0x0c5e87c,0x2165052,
+        0x2cdd1d1,0x04931d2,0x3abca74,0x22b57dc,0x169fd47,0x0b928fb,
+        0x17cc3e7,0x21a1ec4,0x0061593 } },
+    /* 241 */
+    { { 0x1aa0486,0x2e55dea,0x15577b7,0x0d6818f,0x36e41fb,0x2a411f5,
+        0x17d5c7d,0x1eea6c0,0x28068a8,0x0e31d20,0x1f08ad9,0x117e973,
+        0x08a28ab,0x085d30a,0x00cd9fb },
+      { 0x347843d,0x1119095,0x11e3595,0x1b29584,0x134d64c,0x2ff3a35,
+        0x247ea14,0x099fc4b,0x2056169,0x145dd03,0x2ed03fb,0x1250e3b,
+        0x3f5135c,0x2b753f0,0x009da30 } },
+    /* 242 */
+    { { 0x0fa5200,0x214a0b3,0x313dc4e,0x23da866,0x3270760,0x15c9b8b,
+        0x39a53df,0x1f79772,0x3c9e942,0x2984901,0x154d582,0x1685f87,
+        0x2e1183e,0x1f79956,0x00b9987 },
+      { 0x15254de,0x3a5cac0,0x37c56f0,0x2c7c29b,0x292a56d,0x195be2c,
+        0x17e4e1a,0x0660f4a,0x052ad98,0x1267f80,0x07cfed8,0x194b4bc,
+        0x01738d3,0x14ba10f,0x00c7843 } },
+    /* 243 */
+    { { 0x29b2d8a,0x242bc1f,0x19646ee,0x0615f3c,0x0ac8d70,0x07ca3bf,
+        0x2d90317,0x2c83bdb,0x1a96812,0x39fdc35,0x31c61ee,0x2d55fd3,
+        0x2375827,0x355f189,0x00f1c9b },
+      { 0x21a6194,0x1f4050a,0x2b845cf,0x02c6242,0x2dd614e,0x3a4f0a9,
+        0x39de100,0x24714fb,0x175e0cd,0x0be633d,0x14befc3,0x13b0318,
+        0x1d68c50,0x299989e,0x00d0513 } },
+    /* 244 */
+    { { 0x059fb6a,0x2b6eb6a,0x3666a8e,0x39f6ca0,0x1cf8346,0x388b8d5,
+        0x35e61a3,0x271adec,0x22c9963,0x20a4fb3,0x16f241c,0x0058b89,
+        0x21ddafa,0x1ee6fde,0x00d2e6c },
+      { 0x0075e63,0x39894d0,0x0286d0d,0x187e7b2,0x02405aa,0x3f91525,
+        0x37830a8,0x2723088,0x2c7364e,0x013f406,0x104ba75,0x270f486,
+        0x3520b4d,0x3852bc6,0x00d589b } },
+    /* 245 */
+    { { 0x262e53b,0x1da93d1,0x3676135,0x147e41d,0x335ec2f,0x1f02be5,
+        0x297d139,0x22d6198,0x1fe9e59,0x13b4c80,0x1e70f60,0x2f1d4a9,
+        0x2d95149,0x14d6ec4,0x00b54af },
+      { 0x12c1c76,0x2930ac8,0x0dfd36e,0x31fac94,0x218f5bb,0x2828691,
+        0x1466cc9,0x3645e83,0x1a4dac2,0x1549593,0x0e95fab,0x19567d2,
+        0x27a3320,0x0642729,0x007487c } },
+    /* 246 */
+    { { 0x1e98e9c,0x2ff8df7,0x119975a,0x098a904,0x099b90b,0x336c7df,
+        0x010996d,0x159d46d,0x3118b3b,0x3aacd1b,0x31f8ae1,0x214864f,
+        0x398c104,0x089dae2,0x001ec4d },
+      { 0x1452baa,0x2f24991,0x2572ba3,0x162b312,0x2387d18,0x147c5c7,
+        0x38eff6e,0x0700251,0x37d931e,0x23cd5c1,0x254c8ca,0x3b9df37,
+        0x1c9a4ff,0x0bfd547,0x00fb489 } },
+    /* 247 */
+    { { 0x1b8dff8,0x2f6b40b,0x05a25b1,0x3f5688a,0x1d462f4,0x2802d18,
+        0x2aad8ed,0x1b46c75,0x3cf4130,0x250fefb,0x2a13fe1,0x23a1bcd,
+        0x0940442,0x04605fe,0x00c8b2f },
+      { 0x0d51afb,0x14a2abc,0x1d06762,0x291526c,0x2a3e2fe,0x28f77d9,
+        0x3ad8f2e,0x3481a1b,0x04b4fbd,0x2836733,0x0189ff5,0x3a5f533,
+        0x319a6cd,0x0f58667,0x00c3679 } },
+    /* 248 */
+    { { 0x1b85197,0x22426d4,0x2895ea3,0x342d324,0x3ffb17d,0x376cfcf,
+        0x30878b1,0x3c3c83a,0x0ffc57c,0x0ac174a,0x1abd57e,0x2f78b9c,
+        0x01b20d8,0x0a37103,0x007f2be },
+      { 0x19a2d48,0x137288a,0x182d655,0x0ba0dde,0x25130ba,0x01c65c6,
+        0x23205f1,0x2097621,0x2827cf2,0x2c57b98,0x03748f2,0x2db15fc,
+        0x385a0d4,0x13690c0,0x00a9e3f } },
+    /* 249 */
+    { { 0x3fbc9c6,0x2df3b20,0x377e33e,0x31d1505,0x024a311,0x3c1d9ff,
+        0x1377f74,0x00b6b20,0x2364ab7,0x184ab6b,0x2a77969,0x3f2db6c,
+        0x2a6adb7,0x0a10073,0x004a6fb },
+      { 0x1fc73de,0x2c74ab3,0x3d325e8,0x2346c0b,0x1d0efae,0x2076146,
+        0x19c190d,0x225c4fe,0x3fafc80,0x2cf063d,0x11b7ae7,0x3dc4f9d,
+        0x3c3f841,0x10d7c1f,0x000a4b3 } },
+    /* 250 */
+    { { 0x19b7d2e,0x28f1300,0x0b897dd,0x06b5371,0x0631c8d,0x336cc4f,
+        0x09cd6e1,0x2ec1952,0x1104c07,0x07512bb,0x35f000d,0x25f84e9,
+        0x1df4d8f,0x193f769,0x000e9ee },
+      { 0x2346910,0x267cecf,0x0ad7eaa,0x087e8a5,0x1622f69,0x342cbfa,
+        0x2aa20d0,0x206e88a,0x3991e58,0x093fb4b,0x0157180,0x3cecb5b,
+        0x2e17c9a,0x1ea371f,0x00919e6 } },
+    /* 251 */
+    { { 0x2250533,0x13f931d,0x3ef8c72,0x395f605,0x18a2080,0x1cb25d4,
+        0x2fb0f41,0x1c0ba8a,0x1eb17c0,0x266c433,0x09b7e3e,0x0e5d78f,
+        0x0cdc5bf,0x1f7c734,0x0020611 },
+      { 0x205ebd5,0x127986f,0x02c0fb0,0x1705b1e,0x1eb0bb5,0x2dffb42,
+        0x2331b8a,0x18fc04e,0x31d6328,0x17db162,0x0d3b619,0x193bdb9,
+        0x3f11662,0x2d8e694,0x0092c51 } },
+    /* 252 */
+    { { 0x08b364d,0x31ef20a,0x25c4a57,0x021ed07,0x14a562e,0x262a684,
+        0x1d21c66,0x126e5a6,0x181f3f8,0x2a93b65,0x1eb726b,0x08fbbce,
+        0x084f9a2,0x308f30a,0x0013159 },
+      { 0x23f4963,0x0c7960e,0x2a81739,0x2242b69,0x3965003,0x2aca542,
+        0x28a1c65,0x2ad48fb,0x149775f,0x1bbb7d2,0x0f2671b,0x3594b85,
+        0x22f5563,0x2470f13,0x00fed44 } },
+    /* 253 */
+    { { 0x0eb453e,0x3ab70fd,0x1a5b335,0x18f2b74,0x25ff74b,0x3612a46,
+        0x33d0d75,0x28cdda4,0x2b9b49b,0x22728fb,0x004c15b,0x1beb33b,
+        0x1a7e41f,0x0c9b702,0x004ef19 },
+      { 0x1ca3233,0x0b4c90f,0x1d4b53d,0x2428896,0x20ee405,0x151bc00,
+        0x022edb5,0x1adc463,0x00109ea,0x06490a6,0x30e91e6,0x3682b76,
+        0x23c50aa,0x3bd2665,0x005fe53 } },
+    /* 254 */
+    { { 0x0c28c65,0x3741ae4,0x247d372,0x0b04673,0x2176524,0x2c8bf20,
+        0x01fb806,0x3330701,0x307b0a7,0x3999fb7,0x1261bec,0x256679c,
+        0x3f22ac7,0x26e8673,0x00bc69d },
+      { 0x3c06819,0x35df344,0x379d009,0x2bb8a0a,0x0635a66,0x096c6fa,
+        0x1ac4a62,0x023e53b,0x0e45240,0x115f53d,0x3056af8,0x0a66b16,
+        0x3c386ee,0x1130e82,0x00cc384 } },
+    /* 255 */
+    { { 0x14c2356,0x190ec73,0x07be490,0x145d415,0x0740a48,0x1251301,
+        0x3eaf29d,0x2628190,0x079299a,0x26e95c9,0x2e05fdf,0x2ca7c5b,
+        0x32d7b48,0x3d84226,0x0033fb4 },
+      { 0x150f955,0x01240aa,0x3ddf867,0x137fb70,0x297e103,0x17eeda8,
+        0x1320b60,0x266ec84,0x13f4322,0x0c8f5ee,0x0590e4a,0x386815e,
+        0x00ce61f,0x161bd63,0x008e1d0 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_15(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[15];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_15(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 15, km);
+
+            err = sp_384_ecc_mulmod_base_15(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_15(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_15(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+            a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_15(sp_digit* a)
+{
+    a[0]++;
+    sp_384_norm_15(a);
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 18U) {
+            r[j] &= 0x3ffffff;
+            s = 26U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_15(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[48];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_384_from_bin(k, 15, buf, (int)sizeof(buf));
+            if (sp_384_cmp_15(k, p384_order2) < 0) {
+                sp_384_add_one_15(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[15];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384 inf;
+#endif
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_15(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_ecc_gen_k_15(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_base_15(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_15(infinity, point, p384_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_384_iszero_15(point->x) == 0) || (sp_384_iszero_15(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_15(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_384_point_free_15(infinity, 1, heap);
+#endif
+    sp_384_point_free_15(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    for (i=0; i<14; i++) {
+        r[i+1] += r[i] >> 26;
+        r[i] &= 0x3ffffff;
+    }
+    j = 384 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<15 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 26) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 26);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[15];
+#endif
+    sp_point_384* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 48U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 15, priv);
+        sp_384_point_from_ecc_point_15(point, pub);
+            err = sp_384_ecc_mulmod_15(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_384_to_bin(point->x, out);
+        *outLen = 48;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_d_15(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x3ffffff;
+        t >>= 26;
+    }
+    r[15] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[15];
+
+    t[ 0] = tb * a[ 0];
+    t[ 1] = tb * a[ 1];
+    t[ 2] = tb * a[ 2];
+    t[ 3] = tb * a[ 3];
+    t[ 4] = tb * a[ 4];
+    t[ 5] = tb * a[ 5];
+    t[ 6] = tb * a[ 6];
+    t[ 7] = tb * a[ 7];
+    t[ 8] = tb * a[ 8];
+    t[ 9] = tb * a[ 9];
+    t[10] = tb * a[10];
+    t[11] = tb * a[11];
+    t[12] = tb * a[12];
+    t[13] = tb * a[13];
+    t[14] = tb * a[14];
+    r[ 0] =                           (t[ 0] & 0x3ffffff);
+    r[ 1] = (sp_digit)(t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
+    r[ 2] = (sp_digit)(t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
+    r[ 3] = (sp_digit)(t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
+    r[ 4] = (sp_digit)(t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
+    r[ 5] = (sp_digit)(t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
+    r[ 6] = (sp_digit)(t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
+    r[ 7] = (sp_digit)(t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
+    r[ 8] = (sp_digit)(t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
+    r[ 9] = (sp_digit)(t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
+    r[10] = (sp_digit)(t[ 9] >> 26) + (t[10] & 0x3ffffff);
+    r[11] = (sp_digit)(t[10] >> 26) + (t[11] & 0x3ffffff);
+    r[12] = (sp_digit)(t[11] >> 26) + (t[12] & 0x3ffffff);
+    r[13] = (sp_digit)(t[12] >> 26) + (t[13] & 0x3ffffff);
+    r[14] = (sp_digit)(t[13] >> 26) + (t[14] & 0x3ffffff);
+    r[15] = (sp_digit)(t[14] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_384_div_word_15(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 26 bits from d1 and top 5 bits from d0. */
+    d = (d1 << 5) | (d0 >> 21);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 6 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 16) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 11 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 11) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 16 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 6) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 21 bits in r */
+    /* Next 5 bits from d0. */
+    r <<= 5;
+    d <<= 5;
+    d |= (d0 >> 1) & ((1 << 5) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 26 bits in r */
+    /* Remaining 1 bits from d0. */
+    r <<= 1;
+    d <<= 1;
+    d |= d0 & ((1 << 1) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Number to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_div_15(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_32
+    int64_t d1;
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* td;
+#else
+    sp_digit t1d[30], t2d[15 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 15 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 2 * 15;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[14];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 15U);
+        for (i=14; i>=0; i--) {
+            t1[15 + i] += t1[15 + i - 1] >> 26;
+            t1[15 + i - 1] &= 0x3ffffff;
+#ifndef WOLFSSL_SP_DIV_32
+            d1 = t1[15 + i];
+            d1 <<= 26;
+            d1 += t1[15 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_384_div_word_15(t1[15 + i], t1[15 + i - 1], dv);
+#endif
+
+            sp_384_mul_d_15(t2, d, r1);
+            (void)sp_384_sub_15(&t1[i], &t1[i], t2);
+            t1[15 + i] -= t2[15];
+            t1[15 + i] += t1[15 + i - 1] >> 26;
+            t1[15 + i - 1] &= 0x3ffffff;
+            r1 = (((-t1[15 + i]) << 26) - t1[15 + i - 1]) / dv;
+            r1++;
+            sp_384_mul_d_15(t2, d, r1);
+            (void)sp_384_add_15(&t1[i], &t1[i], t2);
+            t1[15 + i] += t1[15 + i - 1] >> 26;
+            t1[15 + i - 1] &= 0x3ffffff;
+        }
+        t1[15 - 1] += t1[15 - 2] >> 26;
+        t1[15 - 2] &= 0x3ffffff;
+        r1 = t1[15 - 1] / dv;
+
+        sp_384_mul_d_15(t2, d, r1);
+        (void)sp_384_sub_15(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 15U);
+        for (i=0; i<13; i++) {
+            r[i+1] += r[i] >> 26;
+            r[i] &= 0x3ffffff;
+        }
+        sp_384_cond_add_15(r, r, d, 0 - ((r[14] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_mod_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_384_div_15(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+    
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_15(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_384_mul_15(r, a, b);
+    sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_order_15(sp_digit* r, const sp_digit* a)
+{
+    sp_384_sqr_15(r, a);
+    sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_n_order_15(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_384_mont_sqr_order_15(r, a);
+    for (i=1; i<n; i++) {
+        sp_384_mont_sqr_order_15(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_order_15(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 15);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_order_15(t, t);
+        if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_15(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 15U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 15;
+    sp_digit* t3 = td + 4 * 15;
+    int i;
+
+    /* t = a^2 */
+    sp_384_mont_sqr_order_15(t, a);
+    /* t = a^3 = t * a */
+    sp_384_mont_mul_order_15(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_384_mont_sqr_n_order_15(t2, t, 2);
+    /* t = a^f = t2 * t */
+    sp_384_mont_mul_order_15(t, t2, t);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_384_mont_sqr_n_order_15(t2, t, 4);
+    /* t = a^ff = t2 * t */
+    sp_384_mont_mul_order_15(t, t2, t);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_384_mont_sqr_n_order_15(t2, t, 8);
+    /* t3= a^ffff = t2 * t */
+    sp_384_mont_mul_order_15(t3, t2, t);
+    /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+    sp_384_mont_sqr_n_order_15(t2, t3, 16);
+    /* t = a^ffffffff = t2 * t3 */
+    sp_384_mont_mul_order_15(t, t2, t3);
+    /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
+    sp_384_mont_sqr_n_order_15(t2, t, 16);
+    /* t = a^ffffffffffff = t2 * t3 */
+    sp_384_mont_mul_order_15(t, t2, t3);
+    /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
+    sp_384_mont_sqr_n_order_15(t2, t, 48);
+    /* t= a^fffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_15(t, t2, t);
+    /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_order_15(t2, t, 96);
+    /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_15(t2, t2, t);
+    for (i=191; i>=1; i--) {
+        sp_384_mont_sqr_order_15(t2, t2);
+        if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_15(t2, t2, a);
+        }
+    }
+    sp_384_mont_sqr_order_15(t2, t2);
+    sp_384_mont_mul_order_15(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 384 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*15];
+    sp_digit xd[2*15];
+    sp_digit kd[2*15];
+    sp_digit rd[2*15];
+    sp_digit td[3 * 2*15];
+    sp_point_384 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int32_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_384_point_new_15(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 15;
+        x = d + 2 * 15;
+        k = d + 4 * 15;
+        r = d + 6 * 15;
+        tmp = d + 8 * 15;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(e, 15, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_384_from_mp(x, 15, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_384_ecc_gen_k_15(rng, k);
+        }
+        else {
+            sp_384_from_mp(k, 15, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_384_ecc_mulmod_base_15(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 15U);
+            sp_384_norm_15(r);
+            c = sp_384_cmp_15(r, p384_order);
+            sp_384_cond_sub_15(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_15(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_384_mul_15(k, k, p384_norm_order);
+            err = sp_384_mod_15(k, k, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_15(k);
+            /* kInv = 1/k mod order */
+                sp_384_mont_inv_order_15(kInv, k, tmp);
+            sp_384_norm_15(kInv);
+
+            /* s = r * x + e */
+                sp_384_mul_15(x, x, r);
+            err = sp_384_mod_15(x, x, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_15(x);
+            carry = sp_384_add_15(s, e, x);
+            sp_384_cond_sub_15(s, s, p384_order, 0 - carry);
+            sp_384_norm_15(s);
+            c = sp_384_cmp_15(s, p384_order);
+            sp_384_cond_sub_15(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_15(s);
+
+            /* s = s * k^-1 mod order */
+                sp_384_mont_mul_order_15(s, s, kInv);
+            sp_384_norm_15(s);
+
+            /* Check that signature is usable. */
+            if (sp_384_iszero_15(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 15);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 15U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 15U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 15U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 15U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 15U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 15U);
+#endif
+    sp_384_point_free_15(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 384)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*15];
+    sp_digit u2d[2*15];
+    sp_digit sd[2*15];
+    sp_digit tmpd[2*15 * 5];
+    sp_point_384 p1d;
+    sp_point_384 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* p1;
+    sp_point_384* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+
+    err = sp_384_point_new_15(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 15;
+        u2  = d + 2 * 15;
+        s   = d + 4 * 15;
+        tmp = d + 6 * 15;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(u1, 15, hash, (int)hashLen);
+        sp_384_from_mp(u2, 15, r);
+        sp_384_from_mp(s, 15, sm);
+        sp_384_from_mp(p2->x, 15, pX);
+        sp_384_from_mp(p2->y, 15, pY);
+        sp_384_from_mp(p2->z, 15, pZ);
+
+        {
+            sp_384_mul_15(s, s, p384_norm_order);
+        }
+        err = sp_384_mod_15(s, s, p384_order);
+    }
+    if (err == MP_OKAY) {
+        sp_384_norm_15(s);
+        {
+            sp_384_mont_inv_order_15(s, s, tmp);
+            sp_384_mont_mul_order_15(u1, u1, s);
+            sp_384_mont_mul_order_15(u2, u2, s);
+        }
+
+            err = sp_384_ecc_mulmod_base_15(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_15(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_384_proj_point_add_15(p1, p1, p2, tmp);
+            if (sp_384_iszero_15(p1->z)) {
+                if (sp_384_iszero_15(p1->x) && sp_384_iszero_15(p1->y)) {
+                    sp_384_proj_point_dbl_15(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    p1->x[8] = 0;
+                    p1->x[9] = 0;
+                    p1->x[10] = 0;
+                    p1->x[11] = 0;
+                    p1->x[12] = 0;
+                    p1->x[13] = 0;
+                    p1->x[14] = 0;
+                    XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_384_from_mp(u2, 15, r);
+        err = sp_384_mod_mul_norm_15(u2, u2, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_384_mont_sqr_15(p1->z, p1->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, p384_mp_mod);
+        *res = (int)(sp_384_cmp_15(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_384_from_mp(u2, 15, r);
+            carry = sp_384_add_15(u2, u2, p384_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_384_norm_15(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_384_cmp_15(u2, p384_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_384_mod_mul_norm_15(u2, u2, p384_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_384_mont_mul_15(u1, u2, p1->z, p384_mod,
+                                                                  p384_mp_mod);
+                        *res = (int)(sp_384_cmp_15(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_15(p1, 0, heap);
+    sp_384_point_free_15(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_15(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*15];
+    sp_digit t2d[2*15];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 15;
+        t2 = d + 2 * 15;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_384_sqr_15(t1, point->y);
+        (void)sp_384_mod_15(t1, t1, p384_mod);
+        sp_384_sqr_15(t2, point->x);
+        (void)sp_384_mod_15(t2, t2, p384_mod);
+        sp_384_mul_15(t2, t2, point->x);
+        (void)sp_384_mod_15(t2, t2, p384_mod);
+        (void)sp_384_sub_15(t2, p384_mod, t2);
+        sp_384_mont_add_15(t1, t1, t2, p384_mod);
+
+        sp_384_mont_add_15(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_15(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_15(t1, t1, point->x, p384_mod);
+
+        if (sp_384_cmp_15(t1, p384_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 pubd;
+#endif
+    sp_point_384* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_15(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_384_from_mp(pub->x, 15, pX);
+        sp_384_from_mp(pub->y, 15, pY);
+        sp_384_from_bin(pub->z, 15, one, (int)sizeof(one));
+
+        err = sp_384_ecc_is_point_15(pub, NULL);
+    }
+
+    sp_384_point_free_15(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[15];
+    sp_point_384 pubd;
+    sp_point_384 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_384* pub;
+    sp_point_384* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_15(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_384_from_mp(pub->x, 15, pX);
+        sp_384_from_mp(pub->y, 15, pY);
+        sp_384_from_bin(pub->z, 15, one, (int)sizeof(one));
+        sp_384_from_mp(priv, 15, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_384_iszero_15(pub->x) != 0) &&
+            (sp_384_iszero_15(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_384_cmp_15(pub->x, p384_mod) >= 0 ||
+            sp_384_cmp_15(pub->y, p384_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_384_ecc_is_point_15(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_384_ecc_mulmod_15(p, pub, p384_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_384_iszero_15(p->x) == 0) ||
+            (sp_384_iszero_15(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_384_ecc_mulmod_base_15(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_384_cmp_15(p->x, pub->x) != 0 ||
+            sp_384_cmp_15(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(p, 0, heap);
+    sp_384_point_free_15(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 15 * 5];
+    sp_point_384 pd;
+    sp_point_384 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    sp_point_384* q = NULL;
+    int err;
+
+    err = sp_384_point_new_15(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_15(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 15, pX);
+        sp_384_from_mp(p->y, 15, pY);
+        sp_384_from_mp(p->z, 15, pZ);
+        sp_384_from_mp(q->x, 15, qX);
+        sp_384_from_mp(q->y, 15, qY);
+        sp_384_from_mp(q->z, 15, qZ);
+
+            sp_384_proj_point_add_15(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(q, 0, NULL);
+    sp_384_point_free_15(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 15 * 2];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_15(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 15, pX);
+        sp_384_from_mp(p->y, 15, pY);
+        sp_384_from_mp(p->z, 15, pZ);
+
+            sp_384_proj_point_dbl_15(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 15 * 6];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_15(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 15, pX);
+        sp_384_from_mp(p->y, 15, pY);
+        sp_384_from_mp(p->z, 15, pZ);
+
+        sp_384_map_15(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_15(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_15(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 15];
+    sp_digit t2d[2 * 15];
+    sp_digit t3d[2 * 15];
+    sp_digit t4d[2 * 15];
+    sp_digit t5d[2 * 15];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* t3;
+    sp_digit* t4;
+    sp_digit* t5;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 15, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 15;
+        t2 = d + 2 * 15;
+        t3 = d + 4 * 15;
+        t4 = d + 6 * 15;
+        t5 = d + 8 * 15;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        t3 = t3d;
+        t4 = t4d;
+        t5 = t5d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_384_mont_mul_15(t1, t2, y, p384_mod, p384_mp_mod);
+            /* t5 = y ^ 0xc */
+            sp_384_mont_sqr_n_15(t5, t1, 2, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_384_mont_mul_15(t1, t1, t5, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x1e */
+            sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x1f */
+            sp_384_mont_mul_15(t3, t2, y, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3e0 */
+            sp_384_mont_sqr_n_15(t2, t3, 5, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3ff */
+            sp_384_mont_mul_15(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fe0 */
+            sp_384_mont_sqr_n_15(t2, t1, 5, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x7fff */
+            sp_384_mont_mul_15(t3, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fff800 */
+            sp_384_mont_sqr_n_15(t2, t3, 15, p384_mod, p384_mp_mod);
+            /* t4 = y ^ 0x3ffffff */
+            sp_384_mont_mul_15(t4, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffc000000 */
+            sp_384_mont_sqr_n_15(t2, t4, 30, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffff */
+            sp_384_mont_mul_15(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+            sp_384_mont_sqr_n_15(t2, t1, 60, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+            sp_384_mont_sqr_n_15(t2, t1, 120, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+            sp_384_mont_sqr_n_15(t2, t1, 15, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_15(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+            sp_384_mont_sqr_n_15(t2, t1, 31, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+            sp_384_mont_mul_15(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+            sp_384_mont_sqr_n_15(t2, t1, 4, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+            sp_384_mont_mul_15(t1, t5, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+            sp_384_mont_sqr_n_15(t2, t1, 62, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+            sp_384_mont_mul_15(t1, y, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+            sp_384_mont_sqr_n_15(y, t1, 30, p384_mod, p384_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 15];
+    sp_digit yd[2 * 15];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 15, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 15;
+        y = d + 2 * 15;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_384_from_mp(x, 15, xm);
+        err = sp_384_mod_mul_norm_15(x, x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_384_mont_sqr_15(y, x, p384_mod, p384_mp_mod);
+            sp_384_mont_mul_15(y, y, x, p384_mod, p384_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_384_mont_sub_15(y, y, x, p384_mod);
+        sp_384_mont_sub_15(y, y, x, p384_mod);
+        sp_384_mont_sub_15(y, y, x, p384_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_384_mod_mul_norm_15(x, p384_b, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_384_mont_add_15(y, y, x, p384_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_384_mont_sqrt_15(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 15, 0, 15U * sizeof(sp_digit));
+        sp_384_mont_reduce_15(y, p384_mod, p384_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_384_mont_sub_15(y, p384_mod, y, p384_mod);
+        }
+
+        err = sp_384_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
 #endif /* SP_WORD_SIZE == 32 */
 #endif /* !WOLFSSL_SP_ASM */
-#endif /* WOLFSSL_HAVE_SP_ECC */
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
 
--- a/wolfcrypt/src/sp_c64.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sp_c64.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp.c
  *
- * Copyright (C) 2006-2018 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,7 +39,9 @@
                                     defined(WOLFSSL_HAVE_SP_ECC)
 
 #ifdef RSA_LOW_MEM
+#ifndef SP_RSA_PRIVATE_EXP_D
 #define SP_RSA_PRIVATE_EXP_D
+#endif
 
 #ifndef WOLFSSL_SP_SMALL
 #define WOLFSSL_SP_SMALL
@@ -50,92 +52,108 @@
 
 #ifndef WOLFSSL_SP_ASM
 #if SP_WORD_SIZE == 64
-#if defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)
+#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) &&              (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Mask for address to obfuscate which of the two address will be used. */
 static const size_t addr_mask[2] = { 0, (size_t)-1 };
 #endif
 
 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
 #ifndef WOLFSSL_SP_NO_2048
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 49) {
-            r[j] &= 0x1ffffffffffffffl;
-            s = 57 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 49U) {
+            r[j] &= 0x1ffffffffffffffL;
+            s = 57U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 57
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 57
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
-        r[j] &= 0x1ffffffffffffffl;
-        s = 57 - s;
-        if (j + 1 >= max)
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0x1ffffffffffffffL;
+        s = 57U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 57 <= DIGIT_BIT) {
-            s += 57;
-            r[j] &= 0x1ffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 57U) <= (word32)DIGIT_BIT) {
+            s += 57U;
+            r[j] &= 0x1ffffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 57) {
-            r[j] &= 0x1ffffffffffffffl;
-            if (j + 1 >= max)
-                break;
+            r[j] &= 0x1ffffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
             s = 57 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -146,16 +164,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 256
  *
  * r  A single precision integer.
@@ -167,25 +187,32 @@
 
     for (i=0; i<35; i++) {
         r[i+1] += r[i] >> 57;
-        r[i] &= 0x1ffffffffffffffl;
+        r[i] &= 0x1ffffffffffffffL;
     }
     j = 2048 / 8 - 1;
     a[j] = 0;
     for (i=0; i<36 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 57) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
         }
         s = 8 - (b - 57);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -281,24 +308,24 @@
                  + ((int128_t)a[ 8]) * b[ 7];
     int128_t t16  = ((int128_t)a[ 8]) * b[ 8];
 
-    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
-    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
-    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
-    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
-    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
-    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
-    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
-    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
-    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
-    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
-    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
-    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
-    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
-    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
-    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
-    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
+    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffL;
+    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffL;
+    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffL;
+    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffL;
+    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffL;
+    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffL;
+    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffL;
+    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffL;
+    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffL;
+    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffL;
+    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
     r[17] = (sp_digit)(t16 >> 57);
-                       r[16] = t16 & 0x1ffffffffffffffl;
+                       r[16] = t16 & 0x1ffffffffffffffL;
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -354,24 +381,24 @@
     int128_t t15  = (((int128_t)a[ 7]) * a[ 8]) * 2;
     int128_t t16  =  ((int128_t)a[ 8]) * a[ 8];
 
-    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
-    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
-    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
-    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
-    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
-    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
-    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
-    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
-    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
-    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
-    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
-    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
-    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
-    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
-    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
-    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
+    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffL;
+    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffL;
+    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffL;
+    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffL;
+    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffL;
+    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffL;
+    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffL;
+    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffL;
+    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffL;
+    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffL;
+    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
     r[17] = (sp_digit)(t16 >> 57);
-                       r[16] = t16 & 0x1ffffffffffffffl;
+                       r[16] = t16 & 0x1ffffffffffffffL;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -464,14 +491,14 @@
     sp_digit* a1 = z1;
     sp_digit b1[9];
     sp_digit* z2 = r + 18;
-    sp_2048_add_9(a1, a, &a[9]);
-    sp_2048_add_9(b1, b, &b[9]);
+    (void)sp_2048_add_9(a1, a, &a[9]);
+    (void)sp_2048_add_9(b1, b, &b[9]);
     sp_2048_mul_9(z2, &a[9], &b[9]);
     sp_2048_mul_9(z0, a, b);
     sp_2048_mul_9(z1, a1, b1);
-    sp_2048_sub_18(z1, z1, z2);
-    sp_2048_sub_18(z1, z1, z0);
-    sp_2048_add_18(r + 9, r + 9, z1);
+    (void)sp_2048_sub_18(z1, z1, z2);
+    (void)sp_2048_sub_18(z1, z1, z0);
+    (void)sp_2048_add_18(r + 9, r + 9, z1);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -485,13 +512,13 @@
     sp_digit z1[18];
     sp_digit* a1 = z1;
     sp_digit* z2 = r + 18;
-    sp_2048_add_9(a1, a, &a[9]);
+    (void)sp_2048_add_9(a1, a, &a[9]);
     sp_2048_sqr_9(z2, &a[9]);
     sp_2048_sqr_9(z0, a);
     sp_2048_sqr_9(z1, a1);
-    sp_2048_sub_18(z1, z1, z2);
-    sp_2048_sub_18(z1, z1, z0);
-    sp_2048_add_18(r + 9, r + 9, z1);
+    (void)sp_2048_sub_18(z1, z1, z2);
+    (void)sp_2048_sub_18(z1, z1, z0);
+    (void)sp_2048_add_18(r + 9, r + 9, z1);
 }
 
 /* Add b to a into r. (r = a + b)
@@ -566,14 +593,14 @@
     sp_digit* a1 = z1;
     sp_digit b1[18];
     sp_digit* z2 = r + 36;
-    sp_2048_add_18(a1, a, &a[18]);
-    sp_2048_add_18(b1, b, &b[18]);
+    (void)sp_2048_add_18(a1, a, &a[18]);
+    (void)sp_2048_add_18(b1, b, &b[18]);
     sp_2048_mul_18(z2, &a[18], &b[18]);
     sp_2048_mul_18(z0, a, b);
     sp_2048_mul_18(z1, a1, b1);
-    sp_2048_sub_36(z1, z1, z2);
-    sp_2048_sub_36(z1, z1, z0);
-    sp_2048_add_36(r + 18, r + 18, z1);
+    (void)sp_2048_sub_36(z1, z1, z2);
+    (void)sp_2048_sub_36(z1, z1, z0);
+    (void)sp_2048_add_36(r + 18, r + 18, z1);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -587,16 +614,16 @@
     sp_digit z1[36];
     sp_digit* a1 = z1;
     sp_digit* z2 = r + 36;
-    sp_2048_add_18(a1, a, &a[18]);
+    (void)sp_2048_add_18(a1, a, &a[18]);
     sp_2048_sqr_18(z2, &a[18]);
     sp_2048_sqr_18(z0, a);
     sp_2048_sqr_18(z1, a1);
-    sp_2048_sub_36(z1, z1, z2);
-    sp_2048_sub_36(z1, z1, z0);
-    sp_2048_add_36(r + 18, r + 18, z1);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    (void)sp_2048_sub_36(z1, z1, z2);
+    (void)sp_2048_sub_36(z1, z1, z0);
+    (void)sp_2048_add_36(r + 18, r + 18, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -609,8 +636,9 @@
 {
     int i;
 
-    for (i = 0; i < 36; i++)
+    for (i = 0; i < 36; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -627,8 +655,9 @@
 {
     int i;
 
-    for (i = 0; i < 36; i++)
+    for (i = 0; i < 36; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -649,20 +678,22 @@
 
     c = ((int128_t)a[35]) * b[35];
     r[71] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 69; k >= 0; k--) {
         for (i = 35; i >= 0; i--) {
             j = k - i;
-            if (j >= 36)
-                break;
-            if (j < 0)
+            if (j >= 36) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * b[j];
         }
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
@@ -679,29 +710,32 @@
 
     c = ((int128_t)a[35]) * a[35];
     r[71] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 69; k >= 0; k--) {
         for (i = 35; i >= 0; i--) {
             j = k - i;
-            if (j >= 36 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 36 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int128_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -714,8 +748,9 @@
 {
     int i;
 
-    for (i = 0; i < 18; i++)
+    for (i = 0; i < 18; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -732,8 +767,9 @@
 {
     int i;
 
-    for (i = 0; i < 18; i++)
+    for (i = 0; i < 18; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -754,20 +790,22 @@
 
     c = ((int128_t)a[17]) * b[17];
     r[35] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 33; k >= 0; k--) {
         for (i = 17; i >= 0; i--) {
             j = k - i;
-            if (j >= 18)
-                break;
-            if (j < 0)
+            if (j >= 18) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * b[j];
         }
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
@@ -784,36 +822,39 @@
 
     c = ((int128_t)a[17]) * a[17];
     r[35] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 33; k >= 0; k--) {
         for (i = 17; i >= 0; i--) {
             j = k - i;
-            if (j >= 18 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 18 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int128_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -823,46 +864,101 @@
     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
     x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
-    x &= 0x1ffffffffffffffl;
+    x &= 0x1ffffffffffffffL;
 
     /* rho = -1/m mod b */
     *rho = (1L << 57) - x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 36; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1ffffffffffffffL;
+        t >>= 57;
+    }
+    r[36] = (sp_digit)t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
+    for (i = 0; i < 32; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
+    }
+    t[1] = tb * a[33];
+    r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+    t[2] = tb * a[34];
+    r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+    t[3] = tb * a[35];
+    r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+    r[36] =  (sp_digit)(t[3] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_18(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_18(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<17; i++)
-        r[i] = 0x1ffffffffffffffl;
+    for (i=0; i<17; i++) {
+        r[i] = 0x1ffffffffffffffL;
+    }
 #else
     int i;
 
     for (i = 0; i < 16; i += 8) {
-        r[i + 0] = 0x1ffffffffffffffl;
-        r[i + 1] = 0x1ffffffffffffffl;
-        r[i + 2] = 0x1ffffffffffffffl;
-        r[i + 3] = 0x1ffffffffffffffl;
-        r[i + 4] = 0x1ffffffffffffffl;
-        r[i + 5] = 0x1ffffffffffffffl;
-        r[i + 6] = 0x1ffffffffffffffl;
-        r[i + 7] = 0x1ffffffffffffffl;
-    }
-    r[16] = 0x1ffffffffffffffl;
-#endif
-    r[17] = 0x7fffffffffffffl;
+        r[i + 0] = 0x1ffffffffffffffL;
+        r[i + 1] = 0x1ffffffffffffffL;
+        r[i + 2] = 0x1ffffffffffffffL;
+        r[i + 3] = 0x1ffffffffffffffL;
+        r[i + 4] = 0x1ffffffffffffffL;
+        r[i + 5] = 0x1ffffffffffffffL;
+        r[i + 6] = 0x1ffffffffffffffL;
+        r[i + 7] = 0x1ffffffffffffffL;
+    }
+    r[16] = 0x1ffffffffffffffL;
+#endif
+    r[17] = 0x7fffffffffffffL;
 
     /* r = (2^n - 1) mod n */
-    sp_2048_sub_18(r, r, m);
+    (void)sp_2048_sub_18(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -881,22 +977,23 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=17; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[17] - b[17]) & (0 - !r);
-    r |= (a[16] - b[16]) & (0 - !r);
+    for (i=17; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[17] - b[17]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[16] - b[16]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 8; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -917,8 +1014,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 18; i++)
+    for (i = 0; i < 18; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -953,7 +1051,7 @@
 
     for (i = 0; i < 18; i++) {
         t += (tb * a[i]) + r[i];
-        r[i] = t & 0x1ffffffffffffffl;
+        r[i] = t & 0x1ffffffffffffffL;
         t >>= 57;
     }
     r[18] += t;
@@ -962,27 +1060,27 @@
     int128_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
     for (i = 0; i < 16; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+        r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+        r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
+        r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
+        r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
+        r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
+        r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
+        r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
-    }
-    t[1] = tb * a[17]; r[17] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-    r[18] +=  t[1] >> 57;
+        r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+    }
+    t[1] = tb * a[17]; r[17] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+    r[18] +=  (sp_digit)(t[1] >> 57);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -996,23 +1094,23 @@
     int i;
     for (i = 0; i < 17; i++) {
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
+        a[i] &= 0x1ffffffffffffffL;
     }
 #else
     int i;
     for (i = 0; i < 16; i += 8) {
-        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
-        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
-        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
-        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
-        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
-        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
-        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
-        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
-        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
+        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
     }
     a[16+1] += a[16] >> 57;
-    a[16] &= 0x1ffffffffffffffl;
+    a[16] &= 0x1ffffffffffffffL;
 #endif
 }
 
@@ -1029,31 +1127,32 @@
 
     n = a[17] >> 55;
     for (i = 0; i < 17; i++) {
-        n += a[18 + i] << 2;
-        r[i] = n & 0x1ffffffffffffffl;
+        n += (word64)a[18 + i] << 2;
+        r[i] = n & 0x1ffffffffffffffL;
         n >>= 57;
     }
-    n += a[35] << 2;
+    n += (word64)a[35] << 2;
     r[17] = n;
 #else
     word64 n;
     int i;
 
-    n  = a[17] >> 55;
+    n  = (word64)a[17];
+    n  = n >> 55U;
     for (i = 0; i < 16; i += 8) {
-        n += a[i+18] << 2; r[i+0] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+19] << 2; r[i+1] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+20] << 2; r[i+2] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+21] << 2; r[i+3] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+22] << 2; r[i+4] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+23] << 2; r[i+5] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+24] << 2; r[i+6] = n & 0x1ffffffffffffffl; n >>= 57;
-        n += a[i+25] << 2; r[i+7] = n & 0x1ffffffffffffffl; n >>= 57;
-    }
-    n += a[34] << 2; r[16] = n & 0x1ffffffffffffffl; n >>= 57;
-    n += a[35] << 2; r[17] = n;
-#endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[18], 0, sizeof(*r) * 18);
+        n += (word64)a[i+18] << 2U; r[i+0] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+19] << 2U; r[i+1] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+20] << 2U; r[i+2] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+21] << 2U; r[i+3] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+22] << 2U; r[i+4] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+23] << 2U; r[i+5] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+24] << 2U; r[i+6] = n & 0x1ffffffffffffffUL; n >>= 57U;
+        n += (word64)a[i+25] << 2U; r[i+7] = n & 0x1ffffffffffffffUL; n >>= 57U;
+    }
+    n += (word64)a[34] << 2U; r[16] = n & 0x1ffffffffffffffUL; n >>= 57U;
+    n += (word64)a[35] << 2U; r[17] = n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[18], 0, sizeof(*r) * 18U);
 }
 
 /* Reduce the number back to 2048 bits using Montgomery reduction.
@@ -1062,23 +1161,26 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_2048_mont_reduce_18(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_2048_norm_18(a + 18);
+
     for (i=0; i<17; i++) {
-        mu = (a[i] * mp) & 0x1ffffffffffffffl;
+        mu = (a[i] * mp) & 0x1ffffffffffffffL;
         sp_2048_mul_add_18(a+i, m, mu);
         a[i+1] += a[i] >> 57;
     }
-    mu = (a[i] * mp) & 0x7fffffffffffffl;
+    mu = (a[i] * mp) & 0x7fffffffffffffL;
     sp_2048_mul_add_18(a+i, m, mu);
     a[i+1] += a[i] >> 57;
-    a[i] &= 0x1ffffffffffffffl;
+    a[i] &= 0x1ffffffffffffffL;
 
     sp_2048_mont_shift_18(a, a);
-    sp_2048_cond_sub_18(a, a, m, 0 - ((a[17] >> 55) > 0));
+    sp_2048_cond_sub_18(a, a, m, 0 - (((a[17] >> 55) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
     sp_2048_norm_18(a);
 }
 
@@ -1091,8 +1193,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_18(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_18(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_18(r, a, b);
     sp_2048_mont_reduce_18(r, m, mp);
@@ -1105,7 +1207,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_18(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_18(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_18(r, a);
@@ -1119,7 +1221,7 @@
  * b  A scalar.
  */
 SP_NOINLINE static void sp_2048_mul_d_18(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int128_t tb = b;
@@ -1128,7 +1230,7 @@
 
     for (i = 0; i < 18; i++) {
         t += tb * a[i];
-        r[i] = t & 0x1ffffffffffffffl;
+        r[i] = t & 0x1ffffffffffffffL;
         t >>= 57;
     }
     r[18] = (sp_digit)t;
@@ -1137,27 +1239,27 @@
     int128_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
     for (i = 0; i < 16; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
         t[2] = tb * a[i+2];
-        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
         t[3] = tb * a[i+3];
-        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
+        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
         t[4] = tb * a[i+4];
-        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
+        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
         t[5] = tb * a[i+5];
-        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
+        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
         t[6] = tb * a[i+6];
-        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
+        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
         t[7] = tb * a[i+7];
-        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
+        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
         t[0] = tb * a[i+8];
-        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
+        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
     }
     t[1] = tb * a[17];
-    r[17] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+    r[17] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
     r[18] =  (sp_digit)(t[1] >> 57);
 #endif /* WOLFSSL_SP_SMALL */
 }
@@ -1176,8 +1278,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 18; i++)
+    for (i = 0; i < 18; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -1208,8 +1311,9 @@
 {
     int i;
 
-    for (i = 0; i < 18; i++)
+    for (i = 0; i < 18; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -1227,28 +1331,117 @@
 {
     int i;
 
-    for (i = 0; i < 18; i++)
+    for (i = 0; i < 18; i++) {
         r[i] = a[i] + b[i];
-
-    return 0;
-}
-#endif
+    }
+
+    return 0;
+}
+#endif
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 57 bits from d1 and top 6 bits from d0. */
+    d = (d1 << 6) | (d0 >> 51);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 7 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 45) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 13 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 39) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 19 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 33) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 25 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 27) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 31 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 21) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 37 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 15) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 43 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 9) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 49 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 3) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 55 bits in r */
+    /* Remaining 3 bits from d0. */
+    r <<= 3;
+    d <<= 3;
+    d |= d0 & ((1 << 3) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_div_18(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_64
     int128_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[36], t2d[18 + 1];
@@ -1257,62 +1450,70 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
         t2 = td + 2 * 18;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[17];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 18);
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[17];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 18U);
         for (i=17; i>=0; i--) {
             t1[18 + i] += t1[18 + i - 1] >> 57;
-            t1[18 + i - 1] &= 0x1ffffffffffffffl;
+            t1[18 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
             d1 = t1[18 + i];
             d1 <<= 57;
             d1 += t1[18 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_2048_div_word_18(t1[18 + i], t1[18 + i - 1], dv);
+#endif
 
             sp_2048_mul_d_18(t2, d, r1);
-            sp_2048_sub_18(&t1[i], &t1[i], t2);
+            (void)sp_2048_sub_18(&t1[i], &t1[i], t2);
             t1[18 + i] -= t2[18];
             t1[18 + i] += t1[18 + i - 1] >> 57;
-            t1[18 + i - 1] &= 0x1ffffffffffffffl;
-            r1 = (((-t1[18 + i]) << 57) - t1[18 + i - 1]) / div;
+            t1[18 + i - 1] &= 0x1ffffffffffffffL;
+            r1 = (((-t1[18 + i]) << 57) - t1[18 + i - 1]) / dv;
             r1++;
             sp_2048_mul_d_18(t2, d, r1);
-            sp_2048_add_18(&t1[i], &t1[i], t2);
+            (void)sp_2048_add_18(&t1[i], &t1[i], t2);
             t1[18 + i] += t1[18 + i - 1] >> 57;
-            t1[18 + i - 1] &= 0x1ffffffffffffffl;
+            t1[18 + i - 1] &= 0x1ffffffffffffffL;
         }
         t1[18 - 1] += t1[18 - 2] >> 57;
-        t1[18 - 2] &= 0x1ffffffffffffffl;
-        d1 = t1[18 - 1];
-        r1 = (sp_digit)(d1 / div);
+        t1[18 - 2] &= 0x1ffffffffffffffL;
+        r1 = t1[18 - 1] / dv;
 
         sp_2048_mul_d_18(t2, d, r1);
-        sp_2048_sub_18(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 18);
+        (void)sp_2048_sub_18(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 18U);
         for (i=0; i<16; i++) {
             r[i+1] += r[i] >> 57;
-            r[i] &= 0x1ffffffffffffffl;
-        }
-        sp_2048_cond_add_18(r, r, d, 0 - (r[17] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+            r[i] &= 0x1ffffffffffffffL;
+        }
+        sp_2048_cond_add_18(r, r, d, 0 - ((r[17] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -1325,7 +1526,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_mod_18(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_2048_mod_18(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_18(a, m, NULL, r);
 }
@@ -1339,8 +1540,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_18(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -1354,11 +1555,12 @@
 
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 18 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 18U * 2U);
 
         norm = t[0] = td;
         t[1] = &td[18 * 2];
@@ -1367,10 +1569,12 @@
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_18(norm, m);
 
-        if (reduceA)
+        if (reduceA != 0) {
             err = sp_2048_mod_18(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 18);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 18U);
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_mul_18(t[1], t[1], norm);
@@ -1383,8 +1587,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -1406,13 +1611,15 @@
 
         sp_2048_mont_reduce_18(t[0], m, mp);
         n = sp_2048_cmp_18(t[0], m);
-        sp_2048_cond_sub_18(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(*r) * 18 * 2);
 
     }
 
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
@@ -1432,24 +1639,23 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
         t[1] = &td[18 * 2];
         t[2] = &td[2 * 18 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_18(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_18(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_18(t[1], t[1], norm);
@@ -1468,8 +1674,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -1489,13 +1696,15 @@
 
         sp_2048_mont_reduce_18(t[0], m, mp);
         n = sp_2048_cmp_18(t[0], m);
-        sp_2048_cond_sub_18(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -1517,23 +1726,22 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 36, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
             t[i] = td + i * 36;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_18(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_18(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_18(t[1], t[1], norm);
@@ -1581,10 +1789,12 @@
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 56) / 57) - 1;
         c = bits % 57;
-        if (c == 0)
+        if (c == 0) {
             c = 57;
-        if (i < 18)
+        }
+        if (i < 18) {
             n = e[i--] << (64 - c);
+        }
         else {
             n = 0;
             i--;
@@ -1593,7 +1803,7 @@
             n |= e[i--] << (7 - c);
             c += 57;
         }
-        y = n >> 59;
+        y = (n >> 59) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -1617,56 +1827,59 @@
 
         sp_2048_mont_reduce_18(rt, m, mp);
         n = sp_2048_cmp_18(rt, m);
-        sp_2048_cond_sub_18(rt, rt, m, (n < 0) - 1);
+        sp_2048_cond_sub_18(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+    }
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 2048 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_36(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_36(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<35; i++)
-        r[i] = 0x1ffffffffffffffl;
+    for (i=0; i<35; i++) {
+        r[i] = 0x1ffffffffffffffL;
+    }
 #else
     int i;
 
     for (i = 0; i < 32; i += 8) {
-        r[i + 0] = 0x1ffffffffffffffl;
-        r[i + 1] = 0x1ffffffffffffffl;
-        r[i + 2] = 0x1ffffffffffffffl;
-        r[i + 3] = 0x1ffffffffffffffl;
-        r[i + 4] = 0x1ffffffffffffffl;
-        r[i + 5] = 0x1ffffffffffffffl;
-        r[i + 6] = 0x1ffffffffffffffl;
-        r[i + 7] = 0x1ffffffffffffffl;
-    }
-    r[32] = 0x1ffffffffffffffl;
-    r[33] = 0x1ffffffffffffffl;
-    r[34] = 0x1ffffffffffffffl;
-#endif
-    r[35] = 0x1fffffffffffffl;
+        r[i + 0] = 0x1ffffffffffffffL;
+        r[i + 1] = 0x1ffffffffffffffL;
+        r[i + 2] = 0x1ffffffffffffffL;
+        r[i + 3] = 0x1ffffffffffffffL;
+        r[i + 4] = 0x1ffffffffffffffL;
+        r[i + 5] = 0x1ffffffffffffffL;
+        r[i + 6] = 0x1ffffffffffffffL;
+        r[i + 7] = 0x1ffffffffffffffL;
+    }
+    r[32] = 0x1ffffffffffffffL;
+    r[33] = 0x1ffffffffffffffL;
+    r[34] = 0x1ffffffffffffffL;
+#endif
+    r[35] = 0x1fffffffffffffL;
 
     /* r = (2^n - 1) mod n */
-    sp_2048_sub_36(r, r, m);
+    (void)sp_2048_sub_36(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -1685,24 +1898,25 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=35; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[35] - b[35]) & (0 - !r);
-    r |= (a[34] - b[34]) & (0 - !r);
-    r |= (a[33] - b[33]) & (0 - !r);
-    r |= (a[32] - b[32]) & (0 - !r);
+    for (i=35; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 24; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -1723,8 +1937,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 36; i++)
+    for (i = 0; i < 36; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -1761,7 +1976,7 @@
 
     for (i = 0; i < 36; i++) {
         t += (tb * a[i]) + r[i];
-        r[i] = t & 0x1ffffffffffffffl;
+        r[i] = t & 0x1ffffffffffffffL;
         t >>= 57;
     }
     r[36] += t;
@@ -1770,29 +1985,29 @@
     int128_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
     for (i = 0; i < 32; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+        r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+        r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
+        r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
+        r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
+        r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
+        r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
+        r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
-    }
-    t[1] = tb * a[33]; r[33] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-    t[2] = tb * a[34]; r[34] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-    t[3] = tb * a[35]; r[35] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
-    r[36] +=  t[3] >> 57;
+        r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+    }
+    t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+    t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+    t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+    r[36] +=  (sp_digit)(t[3] >> 57);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -1806,27 +2021,27 @@
     int i;
     for (i = 0; i < 35; i++) {
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
+        a[i] &= 0x1ffffffffffffffL;
     }
 #else
     int i;
     for (i = 0; i < 32; i += 8) {
-        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
-        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
-        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
-        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
-        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
-        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
-        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
-        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
-        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
+        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
     }
     a[32+1] += a[32] >> 57;
-    a[32] &= 0x1ffffffffffffffl;
+    a[32] &= 0x1ffffffffffffffL;
     a[33+1] += a[33] >> 57;
-    a[33] &= 0x1ffffffffffffffl;
+    a[33] &= 0x1ffffffffffffffL;
     a[34+1] += a[34] >> 57;
-    a[34] &= 0x1ffffffffffffffl;
+    a[34] &= 0x1ffffffffffffffL;
 #endif
 }
 
@@ -1844,8 +2059,8 @@
     s = a[36];
     n = a[35] >> 53;
     for (i = 0; i < 35; i++) {
-        n += (s & 0x1ffffffffffffffl) << 4;
-        r[i] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4;
+        r[i] = n & 0x1ffffffffffffffL;
         n >>= 57;
         s = a[37 + i] + (s >> 57);
     }
@@ -1857,32 +2072,32 @@
 
     s = a[36]; n = a[35] >> 53;
     for (i = 0; i < 32; i += 8) {
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+0] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+0] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+37] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+1] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+1] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+38] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+2] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+2] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+39] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+3] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+3] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+40] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+4] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+4] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+41] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+5] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+5] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+42] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+6] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+6] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+43] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 4; r[i+7] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 4; r[i+7] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+44] + (s >> 57);
     }
-    n += (s & 0x1ffffffffffffffl) << 4; r[32] = n & 0x1ffffffffffffffl;
+    n += (s & 0x1ffffffffffffffL) << 4; r[32] = n & 0x1ffffffffffffffL;
     n >>= 57; s = a[69] + (s >> 57);
-    n += (s & 0x1ffffffffffffffl) << 4; r[33] = n & 0x1ffffffffffffffl;
+    n += (s & 0x1ffffffffffffffL) << 4; r[33] = n & 0x1ffffffffffffffL;
     n >>= 57; s = a[70] + (s >> 57);
-    n += (s & 0x1ffffffffffffffl) << 4; r[34] = n & 0x1ffffffffffffffl;
+    n += (s & 0x1ffffffffffffffL) << 4; r[34] = n & 0x1ffffffffffffffL;
     n >>= 57; s = a[71] + (s >> 57);
     n += s << 4;              r[35] = n;
 #endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[36], 0, sizeof(*r) * 36);
+    XMEMSET(&r[36], 0, sizeof(*r) * 36U);
 }
 
 /* Reduce the number back to 2048 bits using Montgomery reduction.
@@ -1891,36 +2106,51 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_2048_mont_reduce_36(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_2048_norm_36(a + 36);
+
+#ifdef WOLFSSL_SP_DH
     if (mp != 1) {
         for (i=0; i<35; i++) {
-            mu = (a[i] * mp) & 0x1ffffffffffffffl;
+            mu = (a[i] * mp) & 0x1ffffffffffffffL;
             sp_2048_mul_add_36(a+i, m, mu);
             a[i+1] += a[i] >> 57;
         }
-        mu = (a[i] * mp) & 0x1fffffffffffffl;
+        mu = (a[i] * mp) & 0x1fffffffffffffL;
         sp_2048_mul_add_36(a+i, m, mu);
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
+        a[i] &= 0x1ffffffffffffffL;
     }
     else {
         for (i=0; i<35; i++) {
-            mu = a[i] & 0x1ffffffffffffffl;
+            mu = a[i] & 0x1ffffffffffffffL;
             sp_2048_mul_add_36(a+i, m, mu);
             a[i+1] += a[i] >> 57;
         }
-        mu = a[i] & 0x1fffffffffffffl;
+        mu = a[i] & 0x1fffffffffffffL;
+        sp_2048_mul_add_36(a+i, m, mu);
+        a[i+1] += a[i] >> 57;
+        a[i] &= 0x1ffffffffffffffL;
+    }
+#else
+    for (i=0; i<35; i++) {
+        mu = (a[i] * mp) & 0x1ffffffffffffffL;
         sp_2048_mul_add_36(a+i, m, mu);
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
-    }
+    }
+    mu = (a[i] * mp) & 0x1fffffffffffffL;
+    sp_2048_mul_add_36(a+i, m, mu);
+    a[i+1] += a[i] >> 57;
+    a[i] &= 0x1ffffffffffffffL;
+#endif
 
     sp_2048_mont_shift_36(a, a);
-    sp_2048_cond_sub_36(a, a, m, 0 - ((a[35] >> 53) > 0));
+    sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] >> 53) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
     sp_2048_norm_36(a);
 }
 
@@ -1933,8 +2163,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_mul_36(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_2048_mul_36(r, a, b);
     sp_2048_mont_reduce_36(r, m, mp);
@@ -1947,67 +2177,13 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_2048_mont_sqr_36(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_2048_sqr_36(r, a);
     sp_2048_mont_reduce_36(r, m, mp);
 }
 
-/* Multiply a by scalar b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A scalar.
- */
-SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int128_t tb = b;
-    int128_t t = 0;
-    int i;
-
-    for (i = 0; i < 36; i++) {
-        t += tb * a[i];
-        r[i] = t & 0x1ffffffffffffffl;
-        t >>= 57;
-    }
-    r[36] = (sp_digit)t;
-#else
-    int128_t tb = b;
-    int128_t t[8];
-    int i;
-
-    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
-    for (i = 0; i < 32; i += 8) {
-        t[1] = tb * a[i+1];
-        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-        t[2] = tb * a[i+2];
-        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-        t[3] = tb * a[i+3];
-        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
-        t[4] = tb * a[i+4];
-        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
-        t[5] = tb * a[i+5];
-        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
-        t[6] = tb * a[i+6];
-        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
-        t[7] = tb * a[i+7];
-        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
-        t[0] = tb * a[i+8];
-        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
-    }
-    t[1] = tb * a[33];
-    r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-    t[2] = tb * a[34];
-    r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-    t[3] = tb * a[35];
-    r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
-    r[36] =  (sp_digit)(t[3] >> 57);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
 /* Conditionally add a and b using the mask m.
  * m is -1 to add and 0 when not.
  *
@@ -2022,8 +2198,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 36; i++)
+    for (i = 0; i < 36; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -2056,8 +2233,9 @@
 {
     int i;
 
-    for (i = 0; i < 36; i++)
+    for (i = 0; i < 36; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -2075,28 +2253,117 @@
 {
     int i;
 
-    for (i = 0; i < 36; i++)
+    for (i = 0; i < 36; i++) {
         r[i] = a[i] + b[i];
-
-    return 0;
-}
-#endif
+    }
+
+    return 0;
+}
+#endif
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 57 bits from d1 and top 6 bits from d0. */
+    d = (d1 << 6) | (d0 >> 51);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 7 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 45) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 13 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 39) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 19 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 33) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 25 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 27) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 31 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 21) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 37 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 15) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 43 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 9) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 49 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 3) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 55 bits in r */
+    /* Remaining 3 bits from d0. */
+    r <<= 3;
+    d <<= 3;
+    d |= d0 & ((1 << 3) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_div_36(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_64
     int128_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[72], t2d[36 + 1];
@@ -2105,62 +2372,70 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
         t2 = td + 2 * 36;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[35];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 36);
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[35];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 36U);
         for (i=35; i>=0; i--) {
             t1[36 + i] += t1[36 + i - 1] >> 57;
-            t1[36 + i - 1] &= 0x1ffffffffffffffl;
+            t1[36 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
             d1 = t1[36 + i];
             d1 <<= 57;
             d1 += t1[36 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv);
+#endif
 
             sp_2048_mul_d_36(t2, d, r1);
-            sp_2048_sub_36(&t1[i], &t1[i], t2);
+            (void)sp_2048_sub_36(&t1[i], &t1[i], t2);
             t1[36 + i] -= t2[36];
             t1[36 + i] += t1[36 + i - 1] >> 57;
-            t1[36 + i - 1] &= 0x1ffffffffffffffl;
-            r1 = (((-t1[36 + i]) << 57) - t1[36 + i - 1]) / div;
+            t1[36 + i - 1] &= 0x1ffffffffffffffL;
+            r1 = (((-t1[36 + i]) << 57) - t1[36 + i - 1]) / dv;
             r1++;
             sp_2048_mul_d_36(t2, d, r1);
-            sp_2048_add_36(&t1[i], &t1[i], t2);
+            (void)sp_2048_add_36(&t1[i], &t1[i], t2);
             t1[36 + i] += t1[36 + i - 1] >> 57;
-            t1[36 + i - 1] &= 0x1ffffffffffffffl;
+            t1[36 + i - 1] &= 0x1ffffffffffffffL;
         }
         t1[36 - 1] += t1[36 - 2] >> 57;
-        t1[36 - 2] &= 0x1ffffffffffffffl;
-        d1 = t1[36 - 1];
-        r1 = (sp_digit)(d1 / div);
+        t1[36 - 2] &= 0x1ffffffffffffffL;
+        r1 = t1[36 - 1] / dv;
 
         sp_2048_mul_d_36(t2, d, r1);
-        sp_2048_sub_36(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 36);
+        (void)sp_2048_sub_36(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 36U);
         for (i=0; i<34; i++) {
             r[i+1] += r[i] >> 57;
-            r[i] &= 0x1ffffffffffffffl;
-        }
-        sp_2048_cond_add_36(r, r, d, 0 - (r[35] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+            r[i] &= 0x1ffffffffffffffL;
+        }
+        sp_2048_cond_add_36(r, r, d, 0 - ((r[35] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -2173,12 +2448,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_2048_mod_36(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_2048_mod_36(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_2048_div_36(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
  * r     A single precision number that is the result of the operation.
@@ -2188,8 +2464,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_2048_mod_exp_36(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -2203,11 +2479,12 @@
 
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 36 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 36U * 2U);
 
         norm = t[0] = td;
         t[1] = &td[36 * 2];
@@ -2216,10 +2493,12 @@
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_36(norm, m);
 
-        if (reduceA)
+        if (reduceA != 0) {
             err = sp_2048_mod_36(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 36);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 36U);
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_mul_36(t[1], t[1], norm);
@@ -2232,8 +2511,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -2255,13 +2535,15 @@
 
         sp_2048_mont_reduce_36(t[0], m, mp);
         n = sp_2048_cmp_36(t[0], m);
-        sp_2048_cond_sub_36(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(*r) * 36 * 2);
 
     }
 
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
@@ -2281,24 +2563,23 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
         t[1] = &td[36 * 2];
         t[2] = &td[2 * 36 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_36(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_36(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_36(t[1], t[1], norm);
@@ -2317,8 +2598,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -2338,13 +2620,15 @@
 
         sp_2048_mont_reduce_36(t[0], m, mp);
         n = sp_2048_cmp_36(t[0], m);
-        sp_2048_cond_sub_36(t[0], t[0], m, (n < 0) - 1);
+        sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -2366,23 +2650,22 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 72, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
             t[i] = td + i * 72;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_2048_mont_setup(m, &mp);
         sp_2048_mont_norm_36(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_2048_mod_36(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_36(t[1], t[1], norm);
@@ -2430,10 +2713,12 @@
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 56) / 57) - 1;
         c = bits % 57;
-        if (c == 0)
+        if (c == 0) {
             c = 57;
-        if (i < 36)
+        }
+        if (i < 36) {
             n = e[i--] << (64 - c);
+        }
         else {
             n = 0;
             i--;
@@ -2442,7 +2727,7 @@
             n |= e[i--] << (7 - c);
             c += 57;
         }
-        y = n >> 59;
+        y = (n >> 59) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -2466,54 +2751,23 @@
 
         sp_2048_mont_reduce_36(rt, m, mp);
         n = sp_2048_cmp_36(rt, m);
-        sp_2048_cond_sub_36(rt, rt, m, (n < 0) - 1);
+        sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
-                                    !defined(RSA_LOW_MEM)
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_2048_mask_18(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<18; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 16; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-    r[16] = a[16] & m;
-    r[17] = a[17] & m;
-#endif
-}
-
-#endif
+    }
+#endif
+
+    return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+       /* WOLFSSL_HAVE_SP_DH */
+
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
  *
@@ -2536,20 +2790,30 @@
     sp_digit* m;
     sp_digit* r;
     sp_digit* norm;
-    sp_digit e[1];
+    sp_digit e[1] = {0};
     sp_digit mp;
     int i;
     int err = MP_OKAY;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 57) {
+            err = MP_READ_E;
+        }
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -2562,14 +2826,16 @@
 
         sp_2048_from_bin(a, 36, in, inLen);
 #if DIGIT_BIT >= 57
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -2583,31 +2849,36 @@
         err = sp_2048_mod_36(a, a, m);
     }
     if (err == MP_OKAY) {
-        for (i=56; i>=0; i--)
-            if (e[0] >> i)
-                break;
+        for (i=56; i>=0; i--) {
+            if ((e[0] >> i) != 0) {
+                break;
+            }
+        }
 
         XMEMCPY(r, a, sizeof(sp_digit) * 36 * 2);
         for (i--; i>=0; i--) {
             sp_2048_mont_sqr_36(r, r, m, mp);
 
-            if (((e[0] >> i) & 1) == 1)
+            if (((e[0] >> i) & 1) == 1) {
                 sp_2048_mont_mul_36(r, r, a, m, mp);
+            }
         }
         sp_2048_mont_reduce_36(r, m, mp);
         mp = sp_2048_cmp_36(r, m);
-        sp_2048_cond_sub_36(r, r, m, (mp < 0) - 1);
+        sp_2048_cond_sub_36(r, r, m, ((mp < 0) ?
+                    (sp_digit)1 : (sp_digit)0)- 1);
 
         sp_2048_to_bin(r, out);
         *outLen = 256;
     }
 
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    return err;
-#else
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit ad[72], md[36], rd[72];
 #else
     sp_digit* d = NULL;
@@ -2615,21 +2886,31 @@
     sp_digit* a;
     sp_digit* m;
     sp_digit* r;
-    sp_digit e[1];
-    int err = MP_OKAY;
-
-    if (*outLen < 256)
+    sp_digit e[1] = {0};
+    int err = MP_OKAY;
+
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 57) {
+            err = MP_READ_E;
+        }
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -2646,23 +2927,23 @@
     if (err == MP_OKAY) {
         sp_2048_from_bin(a, 36, in, inLen);
 #if DIGIT_BIT >= 57
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_2048_from_mp(m, 36, mm);
 
         if (e[0] == 0x3) {
-            if (err == MP_OKAY) {
-                sp_2048_sqr_36(r, a);
-                err = sp_2048_mod_36(r, r, m);
-            }
+            sp_2048_sqr_36(r, a);
+            err = sp_2048_mod_36(r, r, m);
             if (err == MP_OKAY) {
                 sp_2048_mul_36(r, a, r);
                 err = sp_2048_mod_36(r, r, m);
@@ -2676,26 +2957,28 @@
             sp_2048_mont_setup(m, &mp);
             sp_2048_mont_norm_36(norm, m);
 
-            if (err == MP_OKAY) {
-                sp_2048_mul_36(a, a, norm);
-                err = sp_2048_mod_36(a, a, m);
-            }
+            sp_2048_mul_36(a, a, norm);
+            err = sp_2048_mod_36(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=56; i>=0; i--)
-                    if (e[0] >> i)
+                for (i=56; i>=0; i--) {
+                    if ((e[0] >> i) != 0) {
                         break;
-
-                XMEMCPY(r, a, sizeof(sp_digit) * 72);
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 72U);
                 for (i--; i>=0; i--) {
                     sp_2048_mont_sqr_36(r, r, m, mp);
 
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_2048_mont_mul_36(r, r, a, m, mp);
+                    }
                 }
                 sp_2048_mont_reduce_36(r, m, mp);
                 mp = sp_2048_cmp_36(r, m);
-                sp_2048_cond_sub_36(r, r, m, (mp < 0) - 1);
+                sp_2048_cond_sub_36(r, r, m, ((mp < 0) ?
+                           (sp_digit)1 : (sp_digit)0) - 1);
             }
         }
     }
@@ -2705,15 +2988,19 @@
         *outLen = 256;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif /* WOLFSSL_SP_SMALL */
-}
-
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
 /* RSA private key operation.
  *
  * in      Array of bytes representing the number to exponentiate, base.
@@ -2736,7 +3023,7 @@
     byte* out, word32* outLen)
 {
 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* a;
     sp_digit* d = NULL;
     sp_digit* m;
@@ -2749,21 +3036,31 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+           err = MP_READ_E;
+        }
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = d + 36;
-        m = a + 36;
+        m = a + 72;
         r = a;
 
         sp_2048_from_bin(a, 36, in, inLen);
@@ -2778,7 +3075,7 @@
 
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 36);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
     }
 
     return err;
@@ -2793,11 +3090,20 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+            err = MP_READ_E;
+        }
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         sp_2048_from_bin(a, 36, in, inLen);
@@ -2816,7 +3122,7 @@
     return err;
 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
 #else
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* t = NULL;
     sp_digit* a;
     sp_digit* p;
@@ -2824,7 +3130,6 @@
     sp_digit* dp;
     sp_digit* dq;
     sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
     sp_digit* r;
@@ -2833,16 +3138,24 @@
     (void)dm;
     (void)mm;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (t == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = t;
@@ -2852,8 +3165,7 @@
         tmpa = qi + 18;
         tmpb = tmpa + 36;
 
-        tmp = t;
-        r = tmp + 36;
+        r = t + 36;
 
         sp_2048_from_bin(a, 36, in, inLen);
         sp_2048_from_mp(p, 18, pm);
@@ -2866,9 +3178,9 @@
         err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1);
     }
     if (err == MP_OKAY) {
-        sp_2048_sub_18(tmpa, tmpa, tmpb);
-        sp_2048_mask_18(tmp, p, tmpa[17] >> 63);
-        sp_2048_add_18(tmpa, tmpa, tmp);
+        (void)sp_2048_sub_18(tmpa, tmpa, tmpb);
+        sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
+        sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
 
         sp_2048_from_mp(qi, 18, qim);
         sp_2048_mul_18(tmpa, tmpa, qi);
@@ -2877,7 +3189,7 @@
 
     if (err == MP_OKAY) {
         sp_2048_mul_18(tmpa, q, tmpa);
-        sp_2048_add_36(r, tmpb, tmpa);
+        (void)sp_2048_add_36(r, tmpb, tmpa);
         sp_2048_norm_36(r);
 
         sp_2048_to_bin(r, out);
@@ -2886,24 +3198,31 @@
 
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 18 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
     }
 
     return err;
 #else
     sp_digit a[36 * 2];
     sp_digit p[18], q[18], dp[18], dq[18], qi[18];
-    sp_digit tmp[36], tmpa[36], tmpb[36];
+    sp_digit tmpa[36], tmpb[36];
     sp_digit* r = a;
     int err = MP_OKAY;
 
     (void)dm;
     (void)mm;
 
-    if (*outLen < 256)
+    if (*outLen < 256U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 256U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         sp_2048_from_bin(a, 36, in, inLen);
@@ -2915,20 +3234,21 @@
 
         err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1);
-
-    if (err == MP_OKAY) {
-        sp_2048_sub_18(tmpa, tmpa, tmpb);
-        sp_2048_mask_18(tmp, p, tmpa[17] >> 63);
-        sp_2048_add_18(tmpa, tmpa, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        (void)sp_2048_sub_18(tmpa, tmpa, tmpb);
+        sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
+        sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
         sp_2048_mul_18(tmpa, tmpa, qi);
         err = sp_2048_mod_18(tmpa, tmpa, p);
     }
 
     if (err == MP_OKAY) {
         sp_2048_mul_18(tmpa, tmpa, q);
-        sp_2048_add_36(r, tmpb, tmpa);
+        (void)sp_2048_add_36(r, tmpb, tmpa);
         sp_2048_norm_36(r);
 
         sp_2048_to_bin(r, out);
@@ -2948,19 +3268,21 @@
 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 }
 
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_2048_to_mp(sp_digit* a, mp_int* r)
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 57
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 36);
         r->used = 36;
@@ -2970,14 +3292,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 36; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 57) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 57 - s;
         }
@@ -2990,15 +3317,16 @@
         for (i = 0; i < 36; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 57 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 57 - s;
             }
-            else
+            else {
                 s += 57;
+            }
         }
         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -3014,7 +3342,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -3028,16 +3356,27 @@
     sp_digit* r;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3058,8 +3397,8 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 36);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
@@ -3075,15 +3414,25 @@
     int err = MP_OKAY;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -3112,16 +3461,220 @@
         err = sp_2048_to_mp(r, res);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 36);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+#endif
+
+    return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+SP_NOINLINE static void sp_2048_lshift_36(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    r[36] = a[35] >> (57 - n);
+    for (i=35; i>0; i--) {
+        r[i] = ((a[i] << n) | (a[i-1] >> (57 - n))) & 0x1ffffffffffffffL;
+    }
+#else
+    sp_int_digit s, t;
+
+    s = (sp_int_digit)a[35];
+    r[36] = s >> (57U - n);
+    s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+    r[35] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+    r[34] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+    r[33] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+    r[32] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+    r[31] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+    r[30] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+    r[29] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+    r[28] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+    r[27] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+    r[26] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+    r[25] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+    r[24] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+    r[23] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+    r[22] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+    r[21] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+    r[20] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+    r[19] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+    r[18] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+    r[17] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+    r[16] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+    r[15] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+    r[14] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+    r[13] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+    r[12] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+    r[11] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+    r[10] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+    r[9] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+    r[8] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+    r[7] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+    r[6] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+    r[5] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+    r[4] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+    r[3] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+    r[2] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+    r[1] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+#endif
+    r[0] = (a[0] << n) & 0x1ffffffffffffffL;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[72];
+    sp_digit td[37];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 109, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 72;
+        XMEMSET(td, 0, sizeof(sp_digit) * 109);
+#else
+        norm = nd;
+        tmp  = td;
+        XMEMSET(td, 0, sizeof(td));
+#endif
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_36(norm, m);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 56) / 57) - 1;
+        c = bits % 57;
+        if (c == 0) {
+            c = 57;
+        }
+        if (i < 36) {
+            n = e[i--] << (64 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (7 - c);
+            c += 57;
+        }
+        y = (n >> 59) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        sp_2048_lshift_36(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (7 - c);
+                c += 57;
+            }
+            y = (n >> 59) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_2048_mont_sqr_36(r, r, m, mp);
+            sp_2048_mont_sqr_36(r, r, m, mp);
+            sp_2048_mont_sqr_36(r, r, m, mp);
+            sp_2048_mont_sqr_36(r, r, m, mp);
+            sp_2048_mont_sqr_36(r, r, m, mp);
+
+            sp_2048_lshift_36(r, r, y);
+            sp_2048_mul_d_36(tmp, norm, (r[36] << 4) + (r[35] >> 53));
+            r[36] = 0;
+            r[35] &= 0x1fffffffffffffL;
+            (void)sp_2048_add_36(r, r, tmp);
+            sp_2048_norm_36(r);
+            o = sp_2048_cmp_36(r, m);
+            sp_2048_cond_sub_36(r, r, m, ((o < 0) ?
+                                          (sp_digit)1 : (sp_digit)0) - 1);
+        }
+
+        sp_2048_mont_reduce_36(r, m, mp);
+        n = sp_2048_cmp_36(r, m);
+        sp_2048_cond_sub_36(r, r, m, ((n < 0) ?
+                                                (sp_digit)1 : (sp_digit)0) - 1);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+#endif /* HAVE_FFDHE_2048 */
 
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
@@ -3132,7 +3685,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 256 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
@@ -3147,16 +3700,27 @@
     sp_digit* r;
     word32 i;
 
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expLen > 256) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -3169,7 +3733,14 @@
         sp_2048_from_bin(e, 36, exp, expLen);
         sp_2048_from_mp(m, 36, mod);
 
-        err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2 &&
+                (m[35] >> 21) == 0xffffffffL) {
+            err = sp_2048_mod_exp_2_36(r, e, expLen * 8, m);
+        }
+        else
+    #endif
+            err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
     }
 
     if (err == MP_OKAY) {
@@ -3182,8 +3753,8 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 36);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
@@ -3199,15 +3770,24 @@
     word32 i;
     int err = MP_OKAY;
 
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
+    if (mp_count_bits(base) > 2048) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expLen > 256U) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -3229,113 +3809,283 @@
         sp_2048_from_bin(e, 36, exp, expLen);
         sp_2048_from_mp(m, 36, mod);
 
-        err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2U &&
+                (m[35] >> 21) == 0xffffffffL) {
+            err = sp_2048_mod_exp_2_36(r, e, expLen * 8U, m);
+        }
+        else {
+    #endif
+            err = sp_2048_mod_exp_36(r, b, e, expLen * 8U, m, 0);
+    #ifdef HAVE_FFDHE_2048
+        }
+    #endif
     }
 
     if (err == MP_OKAY) {
         sp_2048_to_bin(r, out);
         *outLen = 256;
-        for (i=0; i<256 && out[i] == 0; i++) {
+        for (i=0; i<256U && out[i] == 0U; i++) {
         }
         *outLen -= i;
         XMEMMOVE(out, out + i, *outLen);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 36);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+#endif
+
+    return err;
+#endif
+}
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_2048 */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 18 * 2;
+        m = e + 18;
+        r = b;
+
+        sp_2048_from_mp(b, 18, base);
+        sp_2048_from_mp(e, 18, exp);
+        sp_2048_from_mp(m, 18, mod);
+
+        err = sp_2048_mod_exp_18(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 18, 0, sizeof(*r) * 18U);
+        err = sp_2048_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 18U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[36], ed[18], md[18];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 18 * 2;
+        m = e + 18;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 18, base);
+        sp_2048_from_mp(e, 18, exp);
+        sp_2048_from_mp(m, 18, mod);
+
+        err = sp_2048_mod_exp_18(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 18, 0, sizeof(*r) * 18U);
+        err = sp_2048_to_mp(r, res);
+    }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 18U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 18U);
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
 
 #ifndef WOLFSSL_SP_NO_3072
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 49) {
-            r[j] &= 0x1ffffffffffffffl;
-            s = 57 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 49U) {
+            r[j] &= 0x1ffffffffffffffL;
+            s = 57U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 57
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 57
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
-        r[j] &= 0x1ffffffffffffffl;
-        s = 57 - s;
-        if (j + 1 >= max)
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0x1ffffffffffffffL;
+        s = 57U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 57 <= DIGIT_BIT) {
-            s += 57;
-            r[j] &= 0x1ffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 57U) <= (word32)DIGIT_BIT) {
+            s += 57U;
+            r[j] &= 0x1ffffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 57) {
-            r[j] &= 0x1ffffffffffffffl;
-            if (j + 1 >= max)
-                break;
+            r[j] &= 0x1ffffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
             s = 57 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -3346,16 +4096,18 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 384
  *
  * r  A single precision integer.
@@ -3367,25 +4119,32 @@
 
     for (i=0; i<53; i++) {
         r[i+1] += r[i] >> 57;
-        r[i] &= 0x1ffffffffffffffl;
+        r[i] &= 0x1ffffffffffffffL;
     }
     j = 3072 / 8 - 1;
     a[j] = 0;
     for (i=0; i<54 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 57) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
         }
         s = 8 - (b - 57);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -3481,24 +4240,24 @@
                  + ((int128_t)a[ 8]) * b[ 7];
     int128_t t16  = ((int128_t)a[ 8]) * b[ 8];
 
-    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
-    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
-    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
-    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
-    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
-    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
-    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
-    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
-    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
-    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
-    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
-    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
-    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
-    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
-    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
-    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
+    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffL;
+    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffL;
+    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffL;
+    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffL;
+    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffL;
+    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffL;
+    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffL;
+    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffL;
+    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffL;
+    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffL;
+    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
     r[17] = (sp_digit)(t16 >> 57);
-                       r[16] = t16 & 0x1ffffffffffffffl;
+                       r[16] = t16 & 0x1ffffffffffffffL;
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -3554,24 +4313,24 @@
     int128_t t15  = (((int128_t)a[ 7]) * a[ 8]) * 2;
     int128_t t16  =  ((int128_t)a[ 8]) * a[ 8];
 
-    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
-    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
-    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
-    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
-    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
-    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
-    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
-    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
-    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
-    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
-    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
-    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
-    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
-    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
-    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
-    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
+    t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffL;
+    t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffL;
+    t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffL;
+    t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffL;
+    t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffL;
+    t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffL;
+    t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffL;
+    t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffL;
+    t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffL;
+    t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffL;
+    t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+    t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+    t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+    t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+    t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+    t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
     r[17] = (sp_digit)(t16 >> 57);
-                       r[16] = t16 & 0x1ffffffffffffffl;
+                       r[16] = t16 & 0x1ffffffffffffffL;
 }
 
 /* Add b to a into r. (r = a + b)
@@ -3664,14 +4423,14 @@
     sp_digit* a1 = z1;
     sp_digit b1[9];
     sp_digit* z2 = r + 18;
-    sp_3072_add_9(a1, a, &a[9]);
-    sp_3072_add_9(b1, b, &b[9]);
+    (void)sp_3072_add_9(a1, a, &a[9]);
+    (void)sp_3072_add_9(b1, b, &b[9]);
     sp_3072_mul_9(z2, &a[9], &b[9]);
     sp_3072_mul_9(z0, a, b);
     sp_3072_mul_9(z1, a1, b1);
-    sp_3072_sub_18(z1, z1, z2);
-    sp_3072_sub_18(z1, z1, z0);
-    sp_3072_add_18(r + 9, r + 9, z1);
+    (void)sp_3072_sub_18(z1, z1, z2);
+    (void)sp_3072_sub_18(z1, z1, z0);
+    (void)sp_3072_add_18(r + 9, r + 9, z1);
 }
 
 /* Square a and put result in r. (r = a * a)
@@ -3685,13 +4444,13 @@
     sp_digit z1[18];
     sp_digit* a1 = z1;
     sp_digit* z2 = r + 18;
-    sp_3072_add_9(a1, a, &a[9]);
+    (void)sp_3072_add_9(a1, a, &a[9]);
     sp_3072_sqr_9(z2, &a[9]);
     sp_3072_sqr_9(z0, a);
     sp_3072_sqr_9(z1, a1);
-    sp_3072_sub_18(z1, z1, z2);
-    sp_3072_sub_18(z1, z1, z0);
-    sp_3072_add_18(r + 9, r + 9, z1);
+    (void)sp_3072_sub_18(z1, z1, z2);
+    (void)sp_3072_sub_18(z1, z1, z0);
+    (void)sp_3072_add_18(r + 9, r + 9, z1);
 }
 
 /* Sub b from a into r. (r = a - b)
@@ -3776,30 +4535,30 @@
     sp_digit b0[18];
     sp_digit b1[18];
     sp_digit b2[18];
-    sp_3072_add_18(a0, a, &a[18]);
-    sp_3072_add_18(b0, b, &b[18]);
-    sp_3072_add_18(a1, &a[18], &a[36]);
-    sp_3072_add_18(b1, &b[18], &b[36]);
-    sp_3072_add_18(a2, a0, &a[36]);
-    sp_3072_add_18(b2, b0, &b[36]);
+    (void)sp_3072_add_18(a0, a, &a[18]);
+    (void)sp_3072_add_18(b0, b, &b[18]);
+    (void)sp_3072_add_18(a1, &a[18], &a[36]);
+    (void)sp_3072_add_18(b1, &b[18], &b[36]);
+    (void)sp_3072_add_18(a2, a0, &a[36]);
+    (void)sp_3072_add_18(b2, b0, &b[36]);
     sp_3072_mul_18(p0, a, b);
     sp_3072_mul_18(p2, &a[18], &b[18]);
     sp_3072_mul_18(p4, &a[36], &b[36]);
     sp_3072_mul_18(p1, a0, b0);
     sp_3072_mul_18(p3, a1, b1);
     sp_3072_mul_18(p5, a2, b2);
-    XMEMSET(r, 0, sizeof(*r)*2*54);
-    sp_3072_sub_36(t0, p3, p2);
-    sp_3072_sub_36(t1, p1, p2);
-    sp_3072_sub_36(t2, p5, t0);
-    sp_3072_sub_36(t2, t2, t1);
-    sp_3072_sub_36(t0, t0, p4);
-    sp_3072_sub_36(t1, t1, p0);
-    sp_3072_add_36(r, r, p0);
-    sp_3072_add_36(&r[18], &r[18], t1);
-    sp_3072_add_36(&r[36], &r[36], t2);
-    sp_3072_add_36(&r[54], &r[54], t0);
-    sp_3072_add_36(&r[72], &r[72], p4);
+    XMEMSET(r, 0, sizeof(*r)*2U*54U);
+    (void)sp_3072_sub_36(t0, p3, p2);
+    (void)sp_3072_sub_36(t1, p1, p2);
+    (void)sp_3072_sub_36(t2, p5, t0);
+    (void)sp_3072_sub_36(t2, t2, t1);
+    (void)sp_3072_sub_36(t0, t0, p4);
+    (void)sp_3072_sub_36(t1, t1, p0);
+    (void)sp_3072_add_36(r, r, p0);
+    (void)sp_3072_add_36(&r[18], &r[18], t1);
+    (void)sp_3072_add_36(&r[36], &r[36], t2);
+    (void)sp_3072_add_36(&r[54], &r[54], t0);
+    (void)sp_3072_add_36(&r[72], &r[72], p4);
 }
 
 /* Square a into r. (r = a * a)
@@ -3821,30 +4580,30 @@
     sp_digit a0[18];
     sp_digit a1[18];
     sp_digit a2[18];
-    sp_3072_add_18(a0, a, &a[18]);
-    sp_3072_add_18(a1, &a[18], &a[36]);
-    sp_3072_add_18(a2, a0, &a[36]);
+    (void)sp_3072_add_18(a0, a, &a[18]);
+    (void)sp_3072_add_18(a1, &a[18], &a[36]);
+    (void)sp_3072_add_18(a2, a0, &a[36]);
     sp_3072_sqr_18(p0, a);
     sp_3072_sqr_18(p2, &a[18]);
     sp_3072_sqr_18(p4, &a[36]);
     sp_3072_sqr_18(p1, a0);
     sp_3072_sqr_18(p3, a1);
     sp_3072_sqr_18(p5, a2);
-    XMEMSET(r, 0, sizeof(*r)*2*54);
-    sp_3072_sub_36(t0, p3, p2);
-    sp_3072_sub_36(t1, p1, p2);
-    sp_3072_sub_36(t2, p5, t0);
-    sp_3072_sub_36(t2, t2, t1);
-    sp_3072_sub_36(t0, t0, p4);
-    sp_3072_sub_36(t1, t1, p0);
-    sp_3072_add_36(r, r, p0);
-    sp_3072_add_36(&r[18], &r[18], t1);
-    sp_3072_add_36(&r[36], &r[36], t2);
-    sp_3072_add_36(&r[54], &r[54], t0);
-    sp_3072_add_36(&r[72], &r[72], p4);
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(r, 0, sizeof(*r)*2U*54U);
+    (void)sp_3072_sub_36(t0, p3, p2);
+    (void)sp_3072_sub_36(t1, p1, p2);
+    (void)sp_3072_sub_36(t2, p5, t0);
+    (void)sp_3072_sub_36(t2, t2, t1);
+    (void)sp_3072_sub_36(t0, t0, p4);
+    (void)sp_3072_sub_36(t1, t1, p0);
+    (void)sp_3072_add_36(r, r, p0);
+    (void)sp_3072_add_36(&r[18], &r[18], t1);
+    (void)sp_3072_add_36(&r[36], &r[36], t2);
+    (void)sp_3072_add_36(&r[54], &r[54], t0);
+    (void)sp_3072_add_36(&r[72], &r[72], p4);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -3857,8 +4616,9 @@
 {
     int i;
 
-    for (i = 0; i < 54; i++)
+    for (i = 0; i < 54; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -3907,8 +4667,9 @@
 {
     int i;
 
-    for (i = 0; i < 54; i++)
+    for (i = 0; i < 54; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -3961,20 +4722,22 @@
 
     c = ((int128_t)a[53]) * b[53];
     r[107] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 105; k >= 0; k--) {
         for (i = 53; i >= 0; i--) {
             j = k - i;
-            if (j >= 54)
-                break;
-            if (j < 0)
+            if (j >= 54) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * b[j];
         }
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
@@ -3991,29 +4754,32 @@
 
     c = ((int128_t)a[53]) * a[53];
     r[107] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 105; k >= 0; k--) {
         for (i = 53; i >= 0; i--) {
             j = k - i;
-            if (j >= 54 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 54 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int128_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* Add b to a into r. (r = a + b)
  *
@@ -4026,8 +4792,9 @@
 {
     int i;
 
-    for (i = 0; i < 27; i++)
+    for (i = 0; i < 27; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -4073,8 +4840,9 @@
 {
     int i;
 
-    for (i = 0; i < 27; i++)
+    for (i = 0; i < 27; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -4124,20 +4892,22 @@
 
     c = ((int128_t)a[26]) * b[26];
     r[53] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 51; k >= 0; k--) {
         for (i = 26; i >= 0; i--) {
             j = k - i;
-            if (j >= 27)
-                break;
-            if (j < 0)
+            if (j >= 27) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * b[j];
         }
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
@@ -4157,11 +4927,12 @@
 
     XMEMSET(t, 0, sizeof(t));
     for (i=0; i<27; i++) {
-        for (j=0; j<27; j++)
+        for (j=0; j<27; j++) {
             t[i+j] += ((int128_t)a[i]) * b[j];
+        }
     }
     for (i=0; i<53; i++) {
-        r[i] = t[i] & 0x1ffffffffffffffl;
+        r[i] = t[i] & 0x1ffffffffffffffL;
         t[i+1] += t[i] >> 57;
     }
     r[53] = (sp_digit)t[53];
@@ -4181,23 +4952,26 @@
 
     c = ((int128_t)a[26]) * a[26];
     r[53] = (sp_digit)(c >> 57);
-    c = (c & 0x1ffffffffffffffl) << 57;
+    c = (c & 0x1ffffffffffffffL) << 57;
     for (k = 51; k >= 0; k--) {
         for (i = 26; i >= 0; i--) {
             j = k - i;
-            if (j >= 27 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 27 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int128_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 114;
-        r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
-        c = (c & 0x1ffffffffffffffl) << 57;
+        r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+        c = (c & 0x1ffffffffffffffL) << 57;
     }
     r[0] = (sp_digit)(c >> 57);
 }
@@ -4215,26 +4989,27 @@
 
     XMEMSET(t, 0, sizeof(t));
     for (i=0; i<27; i++) {
-        for (j=0; j<i; j++)
+        for (j=0; j<i; j++) {
             t[i+j] += (((int128_t)a[i]) * a[j]) * 2;
+        }
         t[i+i] += ((int128_t)a[i]) * a[i];
     }
     for (i=0; i<53; i++) {
-        r[i] = t[i] & 0x1ffffffffffffffl;
+        r[i] = t[i] & 0x1ffffffffffffffL;
         t[i+1] += t[i] >> 57;
     }
     r[53] = (sp_digit)t[53];
 }
 
 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* Caclulate the bottom digit of -1/a mod 2^n.
  *
  * a    A single precision number.
  * rho  Bottom word of inverse.
  */
-static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
 {
     sp_digit x, b;
 
@@ -4244,47 +5019,106 @@
     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
     x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
-    x &= 0x1ffffffffffffffl;
+    x &= 0x1ffffffffffffffL;
 
     /* rho = -1/m mod b */
     *rho = (1L << 57) - x;
 }
 
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 54; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1ffffffffffffffL;
+        t >>= 57;
+    }
+    r[54] = (sp_digit)t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
+    for (i = 0; i < 48; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
+    }
+    t[1] = tb * a[49];
+    r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+    t[2] = tb * a[50];
+    r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+    t[3] = tb * a[51];
+    r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+    t[4] = tb * a[52];
+    r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+    t[5] = tb * a[53];
+    r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+    r[54] =  (sp_digit)(t[5] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_27(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_27(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<26; i++)
-        r[i] = 0x1ffffffffffffffl;
+    for (i=0; i<26; i++) {
+        r[i] = 0x1ffffffffffffffL;
+    }
 #else
     int i;
 
     for (i = 0; i < 24; i += 8) {
-        r[i + 0] = 0x1ffffffffffffffl;
-        r[i + 1] = 0x1ffffffffffffffl;
-        r[i + 2] = 0x1ffffffffffffffl;
-        r[i + 3] = 0x1ffffffffffffffl;
-        r[i + 4] = 0x1ffffffffffffffl;
-        r[i + 5] = 0x1ffffffffffffffl;
-        r[i + 6] = 0x1ffffffffffffffl;
-        r[i + 7] = 0x1ffffffffffffffl;
-    }
-    r[24] = 0x1ffffffffffffffl;
-    r[25] = 0x1ffffffffffffffl;
-#endif
-    r[26] = 0x3fffffffffffffl;
+        r[i + 0] = 0x1ffffffffffffffL;
+        r[i + 1] = 0x1ffffffffffffffL;
+        r[i + 2] = 0x1ffffffffffffffL;
+        r[i + 3] = 0x1ffffffffffffffL;
+        r[i + 4] = 0x1ffffffffffffffL;
+        r[i + 5] = 0x1ffffffffffffffL;
+        r[i + 6] = 0x1ffffffffffffffL;
+        r[i + 7] = 0x1ffffffffffffffL;
+    }
+    r[24] = 0x1ffffffffffffffL;
+    r[25] = 0x1ffffffffffffffL;
+#endif
+    r[26] = 0x3fffffffffffffL;
 
     /* r = (2^n - 1) mod n */
-    sp_3072_sub_27(r, r, m);
+    (void)sp_3072_sub_27(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -4303,23 +5137,24 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=26; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[26] - b[26]) & (0 - !r);
-    r |= (a[25] - b[25]) & (0 - !r);
-    r |= (a[24] - b[24]) & (0 - !r);
+    for (i=26; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[26] - b[26]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[25] - b[25]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[24] - b[24]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 16; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -4340,8 +5175,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 27; i++)
+    for (i = 0; i < 27; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -4377,7 +5213,7 @@
 
     for (i = 0; i < 27; i++) {
         t += (tb * a[i]) + r[i];
-        r[i] = t & 0x1ffffffffffffffl;
+        r[i] = t & 0x1ffffffffffffffL;
         t >>= 57;
     }
     r[27] += t;
@@ -4386,28 +5222,28 @@
     int128_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
     for (i = 0; i < 24; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+        r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+        r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
+        r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
+        r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
+        r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
+        r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
+        r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
-    }
-    t[1] = tb * a[25]; r[25] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-    t[2] = tb * a[26]; r[26] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-    r[27] +=  t[2] >> 57;
+        r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+    }
+    t[1] = tb * a[25]; r[25] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+    t[2] = tb * a[26]; r[26] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+    r[27] +=  (sp_digit)(t[2] >> 57);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -4421,25 +5257,25 @@
     int i;
     for (i = 0; i < 26; i++) {
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
+        a[i] &= 0x1ffffffffffffffL;
     }
 #else
     int i;
     for (i = 0; i < 24; i += 8) {
-        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
-        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
-        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
-        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
-        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
-        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
-        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
-        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
-        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
+        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
     }
     a[24+1] += a[24] >> 57;
-    a[24] &= 0x1ffffffffffffffl;
+    a[24] &= 0x1ffffffffffffffL;
     a[25+1] += a[25] >> 57;
-    a[25] &= 0x1ffffffffffffffl;
+    a[25] &= 0x1ffffffffffffffL;
 #endif
 }
 
@@ -4457,8 +5293,8 @@
     s = a[27];
     n = a[26] >> 54;
     for (i = 0; i < 26; i++) {
-        n += (s & 0x1ffffffffffffffl) << 3;
-        r[i] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3;
+        r[i] = n & 0x1ffffffffffffffL;
         n >>= 57;
         s = a[28 + i] + (s >> 57);
     }
@@ -4470,30 +5306,30 @@
 
     s = a[27]; n = a[26] >> 54;
     for (i = 0; i < 24; i += 8) {
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+0] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+0] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+28] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+1] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+1] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+29] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+2] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+2] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+30] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+3] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+3] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+31] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+4] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+4] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+32] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+5] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+5] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+33] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+6] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+6] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+34] + (s >> 57);
-        n += (s & 0x1ffffffffffffffl) << 3; r[i+7] = n & 0x1ffffffffffffffl;
+        n += (s & 0x1ffffffffffffffL) << 3; r[i+7] = n & 0x1ffffffffffffffL;
         n >>= 57; s = a[i+35] + (s >> 57);
     }
-    n += (s & 0x1ffffffffffffffl) << 3; r[24] = n & 0x1ffffffffffffffl;
+    n += (s & 0x1ffffffffffffffL) << 3; r[24] = n & 0x1ffffffffffffffL;
     n >>= 57; s = a[52] + (s >> 57);
-    n += (s & 0x1ffffffffffffffl) << 3; r[25] = n & 0x1ffffffffffffffl;
+    n += (s & 0x1ffffffffffffffL) << 3; r[25] = n & 0x1ffffffffffffffL;
     n >>= 57; s = a[53] + (s >> 57);
     n += s << 3;              r[26] = n;
 #endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[27], 0, sizeof(*r) * 27);
+    XMEMSET(&r[27], 0, sizeof(*r) * 27U);
 }
 
 /* Reduce the number back to 3072 bits using Montgomery reduction.
@@ -4502,23 +5338,26 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_3072_mont_reduce_27(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_3072_norm_27(a + 27);
+
     for (i=0; i<26; i++) {
-        mu = (a[i] * mp) & 0x1ffffffffffffffl;
+        mu = (a[i] * mp) & 0x1ffffffffffffffL;
         sp_3072_mul_add_27(a+i, m, mu);
         a[i+1] += a[i] >> 57;
     }
-    mu = (a[i] * mp) & 0x3fffffffffffffl;
+    mu = (a[i] * mp) & 0x3fffffffffffffL;
     sp_3072_mul_add_27(a+i, m, mu);
     a[i+1] += a[i] >> 57;
-    a[i] &= 0x1ffffffffffffffl;
+    a[i] &= 0x1ffffffffffffffL;
 
     sp_3072_mont_shift_27(a, a);
-    sp_3072_cond_sub_27(a, a, m, 0 - ((a[26] >> 54) > 0));
+    sp_3072_cond_sub_27(a, a, m, 0 - (((a[26] >> 54) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
     sp_3072_norm_27(a);
 }
 
@@ -4531,8 +5370,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_27(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_3072_mont_mul_27(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_3072_mul_27(r, a, b);
     sp_3072_mont_reduce_27(r, m, mp);
@@ -4545,7 +5384,7 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_27(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_27(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_3072_sqr_27(r, a);
@@ -4559,7 +5398,7 @@
  * b  A scalar.
  */
 SP_NOINLINE static void sp_3072_mul_d_27(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int128_t tb = b;
@@ -4568,7 +5407,7 @@
 
     for (i = 0; i < 27; i++) {
         t += tb * a[i];
-        r[i] = t & 0x1ffffffffffffffl;
+        r[i] = t & 0x1ffffffffffffffL;
         t >>= 57;
     }
     r[27] = (sp_digit)t;
@@ -4577,29 +5416,29 @@
     int128_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
     for (i = 0; i < 24; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
         t[2] = tb * a[i+2];
-        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
         t[3] = tb * a[i+3];
-        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
+        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
         t[4] = tb * a[i+4];
-        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
+        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
         t[5] = tb * a[i+5];
-        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
+        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
         t[6] = tb * a[i+6];
-        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
+        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
         t[7] = tb * a[i+7];
-        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
+        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
         t[0] = tb * a[i+8];
-        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
+        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
     }
     t[1] = tb * a[25];
-    r[25] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+    r[25] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
     t[2] = tb * a[26];
-    r[26] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+    r[26] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
     r[27] =  (sp_digit)(t[2] >> 57);
 #endif /* WOLFSSL_SP_SMALL */
 }
@@ -4618,8 +5457,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 27; i++)
+    for (i = 0; i < 27; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -4639,22 +5479,110 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 57 bits from d1 and top 6 bits from d0. */
+    d = (d1 << 6) | (d0 >> 51);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 7 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 45) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 13 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 39) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 19 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 33) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 25 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 27) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 31 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 21) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 37 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 15) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 43 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 9) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 49 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 3) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 55 bits in r */
+    /* Remaining 3 bits from d0. */
+    r <<= 3;
+    d <<= 3;
+    d |= d0 & ((1 << 3) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_div_27(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_64
     int128_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[54], t2d[27 + 1];
@@ -4663,62 +5591,70 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
         t2 = td + 2 * 27;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[26];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 27);
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[26];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 27U);
         for (i=26; i>=0; i--) {
             t1[27 + i] += t1[27 + i - 1] >> 57;
-            t1[27 + i - 1] &= 0x1ffffffffffffffl;
+            t1[27 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
             d1 = t1[27 + i];
             d1 <<= 57;
             d1 += t1[27 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_3072_div_word_27(t1[27 + i], t1[27 + i - 1], dv);
+#endif
 
             sp_3072_mul_d_27(t2, d, r1);
-            sp_3072_sub_27(&t1[i], &t1[i], t2);
+            (void)sp_3072_sub_27(&t1[i], &t1[i], t2);
             t1[27 + i] -= t2[27];
             t1[27 + i] += t1[27 + i - 1] >> 57;
-            t1[27 + i - 1] &= 0x1ffffffffffffffl;
-            r1 = (((-t1[27 + i]) << 57) - t1[27 + i - 1]) / div;
+            t1[27 + i - 1] &= 0x1ffffffffffffffL;
+            r1 = (((-t1[27 + i]) << 57) - t1[27 + i - 1]) / dv;
             r1++;
             sp_3072_mul_d_27(t2, d, r1);
-            sp_3072_add_27(&t1[i], &t1[i], t2);
+            (void)sp_3072_add_27(&t1[i], &t1[i], t2);
             t1[27 + i] += t1[27 + i - 1] >> 57;
-            t1[27 + i - 1] &= 0x1ffffffffffffffl;
+            t1[27 + i - 1] &= 0x1ffffffffffffffL;
         }
         t1[27 - 1] += t1[27 - 2] >> 57;
-        t1[27 - 2] &= 0x1ffffffffffffffl;
-        d1 = t1[27 - 1];
-        r1 = (sp_digit)(d1 / div);
+        t1[27 - 2] &= 0x1ffffffffffffffL;
+        r1 = t1[27 - 1] / dv;
 
         sp_3072_mul_d_27(t2, d, r1);
-        sp_3072_sub_27(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 27);
+        (void)sp_3072_sub_27(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 27U);
         for (i=0; i<25; i++) {
             r[i+1] += r[i] >> 57;
-            r[i] &= 0x1ffffffffffffffl;
-        }
-        sp_3072_cond_add_27(r, r, d, 0 - (r[26] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+            r[i] &= 0x1ffffffffffffffL;
+        }
+        sp_3072_cond_add_27(r, r, d, 0 - ((r[26] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -4731,7 +5667,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_mod_27(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_3072_mod_27(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_27(a, m, NULL, r);
 }
@@ -4745,8 +5681,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_27(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -4760,11 +5696,12 @@
 
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 27 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 27U * 2U);
 
         norm = t[0] = td;
         t[1] = &td[27 * 2];
@@ -4773,10 +5710,12 @@
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_27(norm, m);
 
-        if (reduceA)
+        if (reduceA != 0) {
             err = sp_3072_mod_27(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 27);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 27U);
+        }
     }
     if (err == MP_OKAY) {
         sp_3072_mul_27(t[1], t[1], norm);
@@ -4789,8 +5728,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -4812,13 +5752,15 @@
 
         sp_3072_mont_reduce_27(t[0], m, mp);
         n = sp_3072_cmp_27(t[0], m);
-        sp_3072_cond_sub_27(t[0], t[0], m, (n < 0) - 1);
+        sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(*r) * 27 * 2);
 
     }
 
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
@@ -4838,24 +5780,23 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
         t[1] = &td[27 * 2];
         t[2] = &td[2 * 27 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_27(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_3072_mod_27(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_3072_mul_27(t[1], t[1], norm);
@@ -4874,8 +5815,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -4895,13 +5837,15 @@
 
         sp_3072_mont_reduce_27(t[0], m, mp);
         n = sp_3072_cmp_27(t[0], m);
-        sp_3072_cond_sub_27(t[0], t[0], m, (n < 0) - 1);
+        sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -4923,23 +5867,22 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 54, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
             t[i] = td + i * 54;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_27(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_3072_mod_27(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_3072_mul_27(t[1], t[1], norm);
@@ -4987,10 +5930,12 @@
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 56) / 57) - 1;
         c = bits % 57;
-        if (c == 0)
+        if (c == 0) {
             c = 57;
-        if (i < 27)
+        }
+        if (i < 27) {
             n = e[i--] << (64 - c);
+        }
         else {
             n = 0;
             i--;
@@ -4999,7 +5944,7 @@
             n |= e[i--] << (7 - c);
             c += 57;
         }
-        y = n >> 59;
+        y = (n >> 59) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -5023,58 +5968,61 @@
 
         sp_3072_mont_reduce_27(rt, m, mp);
         n = sp_3072_cmp_27(rt, m);
-        sp_3072_cond_sub_27(rt, rt, m, (n < 0) - 1);
+        sp_3072_cond_sub_27(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+    }
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
 
 /* r = 2^n mod m where n is the number of bits to reduce by.
  * Given m must be 3072 bits, just need to subtract.
  *
  * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_54(sp_digit* r, sp_digit* m)
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_54(sp_digit* r, const sp_digit* m)
 {
     /* Set r = 2^n - 1. */
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<53; i++)
-        r[i] = 0x1ffffffffffffffl;
+    for (i=0; i<53; i++) {
+        r[i] = 0x1ffffffffffffffL;
+    }
 #else
     int i;
 
     for (i = 0; i < 48; i += 8) {
-        r[i + 0] = 0x1ffffffffffffffl;
-        r[i + 1] = 0x1ffffffffffffffl;
-        r[i + 2] = 0x1ffffffffffffffl;
-        r[i + 3] = 0x1ffffffffffffffl;
-        r[i + 4] = 0x1ffffffffffffffl;
-        r[i + 5] = 0x1ffffffffffffffl;
-        r[i + 6] = 0x1ffffffffffffffl;
-        r[i + 7] = 0x1ffffffffffffffl;
-    }
-    r[48] = 0x1ffffffffffffffl;
-    r[49] = 0x1ffffffffffffffl;
-    r[50] = 0x1ffffffffffffffl;
-    r[51] = 0x1ffffffffffffffl;
-    r[52] = 0x1ffffffffffffffl;
-#endif
-    r[53] = 0x7ffffffffffffl;
+        r[i + 0] = 0x1ffffffffffffffL;
+        r[i + 1] = 0x1ffffffffffffffL;
+        r[i + 2] = 0x1ffffffffffffffL;
+        r[i + 3] = 0x1ffffffffffffffL;
+        r[i + 4] = 0x1ffffffffffffffL;
+        r[i + 5] = 0x1ffffffffffffffL;
+        r[i + 6] = 0x1ffffffffffffffL;
+        r[i + 7] = 0x1ffffffffffffffL;
+    }
+    r[48] = 0x1ffffffffffffffL;
+    r[49] = 0x1ffffffffffffffL;
+    r[50] = 0x1ffffffffffffffL;
+    r[51] = 0x1ffffffffffffffL;
+    r[52] = 0x1ffffffffffffffL;
+#endif
+    r[53] = 0x7ffffffffffffL;
 
     /* r = (2^n - 1) mod n */
-    sp_3072_sub_54(r, r, m);
+    (void)sp_3072_sub_54(r, r, m);
 
     /* Add one so r = 2^n mod m */
     r[0] += 1;
@@ -5093,26 +6041,27 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=53; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    int i;
-
-    r |= (a[53] - b[53]) & (0 - !r);
-    r |= (a[52] - b[52]) & (0 - !r);
-    r |= (a[51] - b[51]) & (0 - !r);
-    r |= (a[50] - b[50]) & (0 - !r);
-    r |= (a[49] - b[49]) & (0 - !r);
-    r |= (a[48] - b[48]) & (0 - !r);
+    for (i=53; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[53] - b[53]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[52] - b[52]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[51] - b[51]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[50] - b[50]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[49] - b[49]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[48] - b[48]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     for (i = 40; i >= 0; i -= 8) {
-        r |= (a[i + 7] - b[i + 7]) & (0 - !r);
-        r |= (a[i + 6] - b[i + 6]) & (0 - !r);
-        r |= (a[i + 5] - b[i + 5]) & (0 - !r);
-        r |= (a[i + 4] - b[i + 4]) & (0 - !r);
-        r |= (a[i + 3] - b[i + 3]) & (0 - !r);
-        r |= (a[i + 2] - b[i + 2]) & (0 - !r);
-        r |= (a[i + 1] - b[i + 1]) & (0 - !r);
-        r |= (a[i + 0] - b[i + 0]) & (0 - !r);
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
     }
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -5133,8 +6082,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 54; i++)
+    for (i = 0; i < 54; i++) {
         r[i] = a[i] - (b[i] & m);
+    }
 #else
     int i;
 
@@ -5173,7 +6123,7 @@
 
     for (i = 0; i < 54; i++) {
         t += (tb * a[i]) + r[i];
-        r[i] = t & 0x1ffffffffffffffl;
+        r[i] = t & 0x1ffffffffffffffL;
         t >>= 57;
     }
     r[54] += t;
@@ -5182,31 +6132,31 @@
     int128_t t[8];
     int i;
 
-    t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
     for (i = 0; i < 48; i += 8) {
         t[1] = tb * a[i+1];
-        r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
+        r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
         t[2] = tb * a[i+2];
-        r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
+        r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
         t[3] = tb * a[i+3];
-        r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
+        r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
         t[4] = tb * a[i+4];
-        r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
+        r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
         t[5] = tb * a[i+5];
-        r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
+        r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
         t[6] = tb * a[i+6];
-        r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
+        r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
         t[7] = tb * a[i+7];
-        r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
+        r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
         t[0] = tb * a[i+8];
-        r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
-    }
-    t[1] = tb * a[49]; r[49] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-    t[2] = tb * a[50]; r[50] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-    t[3] = tb * a[51]; r[51] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
-    t[4] = tb * a[52]; r[52] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
-    t[5] = tb * a[53]; r[53] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
-    r[54] +=  t[5] >> 57;
+        r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+    }
+    t[1] = tb * a[49]; r[49] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+    t[2] = tb * a[50]; r[50] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+    t[3] = tb * a[51]; r[51] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+    t[4] = tb * a[52]; r[52] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
+    t[5] = tb * a[53]; r[53] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
+    r[54] +=  (sp_digit)(t[5] >> 57);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -5220,31 +6170,31 @@
     int i;
     for (i = 0; i < 53; i++) {
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
+        a[i] &= 0x1ffffffffffffffL;
     }
 #else
     int i;
     for (i = 0; i < 48; i += 8) {
-        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
-        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
-        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
-        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
-        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
-        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
-        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
-        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
-        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
+        a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+        a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+        a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+        a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+        a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+        a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+        a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+        a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+        a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
     }
     a[48+1] += a[48] >> 57;
-    a[48] &= 0x1ffffffffffffffl;
+    a[48] &= 0x1ffffffffffffffL;
     a[49+1] += a[49] >> 57;
-    a[49] &= 0x1ffffffffffffffl;
+    a[49] &= 0x1ffffffffffffffL;
     a[50+1] += a[50] >> 57;
-    a[50] &= 0x1ffffffffffffffl;
+    a[50] &= 0x1ffffffffffffffL;
     a[51+1] += a[51] >> 57;
-    a[51] &= 0x1ffffffffffffffl;
+    a[51] &= 0x1ffffffffffffffL;
     a[52+1] += a[52] >> 57;
-    a[52] &= 0x1ffffffffffffffl;
+    a[52] &= 0x1ffffffffffffffL;
 #endif
 }
 
@@ -5261,7 +6211,7 @@
     n += ((int128_t)a[54]) << 6;
 
     for (i = 0; i < 53; i++) {
-        r[i] = n & 0x1ffffffffffffffl;
+        r[i] = n & 0x1ffffffffffffffL;
         n >>= 57;
         n += ((int128_t)a[55 + i]) << 6;
     }
@@ -5271,31 +6221,31 @@
     int128_t n = a[53] >> 51;
     n += ((int128_t)a[54]) << 6;
     for (i = 0; i < 48; i += 8) {
-        r[i + 0] = n & 0x1ffffffffffffffl;
+        r[i + 0] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 55]) << 6;
-        r[i + 1] = n & 0x1ffffffffffffffl;
+        r[i + 1] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 56]) << 6;
-        r[i + 2] = n & 0x1ffffffffffffffl;
+        r[i + 2] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 57]) << 6;
-        r[i + 3] = n & 0x1ffffffffffffffl;
+        r[i + 3] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 58]) << 6;
-        r[i + 4] = n & 0x1ffffffffffffffl;
+        r[i + 4] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 59]) << 6;
-        r[i + 5] = n & 0x1ffffffffffffffl;
+        r[i + 5] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 60]) << 6;
-        r[i + 6] = n & 0x1ffffffffffffffl;
+        r[i + 6] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 61]) << 6;
-        r[i + 7] = n & 0x1ffffffffffffffl;
+        r[i + 7] = n & 0x1ffffffffffffffL;
         n >>= 57; n += ((int128_t)a[i + 62]) << 6;
     }
-    r[48] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[103]) << 6;
-    r[49] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[104]) << 6;
-    r[50] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[105]) << 6;
-    r[51] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[106]) << 6;
-    r[52] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[107]) << 6;
+    r[48] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[103]) << 6;
+    r[49] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[104]) << 6;
+    r[50] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[105]) << 6;
+    r[51] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[106]) << 6;
+    r[52] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[107]) << 6;
     r[53] = (sp_digit)n;
 #endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[54], 0, sizeof(*r) * 54);
+    XMEMSET(&r[54], 0, sizeof(*r) * 54U);
 }
 
 /* Reduce the number back to 3072 bits using Montgomery reduction.
@@ -5304,36 +6254,51 @@
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_3072_mont_reduce_54(sp_digit* a, sp_digit* m, sp_digit mp)
+static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp)
 {
     int i;
     sp_digit mu;
 
+    sp_3072_norm_54(a + 54);
+
+#ifdef WOLFSSL_SP_DH
     if (mp != 1) {
         for (i=0; i<53; i++) {
-            mu = (a[i] * mp) & 0x1ffffffffffffffl;
+            mu = (a[i] * mp) & 0x1ffffffffffffffL;
             sp_3072_mul_add_54(a+i, m, mu);
             a[i+1] += a[i] >> 57;
         }
-        mu = (a[i] * mp) & 0x7ffffffffffffl;
+        mu = (a[i] * mp) & 0x7ffffffffffffL;
         sp_3072_mul_add_54(a+i, m, mu);
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
+        a[i] &= 0x1ffffffffffffffL;
     }
     else {
         for (i=0; i<53; i++) {
-            mu = a[i] & 0x1ffffffffffffffl;
+            mu = a[i] & 0x1ffffffffffffffL;
             sp_3072_mul_add_54(a+i, m, mu);
             a[i+1] += a[i] >> 57;
         }
-        mu = a[i] & 0x7ffffffffffffl;
+        mu = a[i] & 0x7ffffffffffffL;
+        sp_3072_mul_add_54(a+i, m, mu);
+        a[i+1] += a[i] >> 57;
+        a[i] &= 0x1ffffffffffffffL;
+    }
+#else
+    for (i=0; i<53; i++) {
+        mu = (a[i] * mp) & 0x1ffffffffffffffL;
         sp_3072_mul_add_54(a+i, m, mu);
         a[i+1] += a[i] >> 57;
-        a[i] &= 0x1ffffffffffffffl;
-    }
+    }
+    mu = (a[i] * mp) & 0x7ffffffffffffL;
+    sp_3072_mul_add_54(a+i, m, mu);
+    a[i+1] += a[i] >> 57;
+    a[i] &= 0x1ffffffffffffffL;
+#endif
 
     sp_3072_mont_shift_54(a, a);
-    sp_3072_cond_sub_54(a, a, m, 0 - ((a[53] >> 51) > 0));
+    sp_3072_cond_sub_54(a, a, m, 0 - (((a[53] >> 51) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
     sp_3072_norm_54(a);
 }
 
@@ -5346,8 +6311,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_mul_54(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_3072_mont_mul_54(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_3072_mul_54(r, a, b);
     sp_3072_mont_reduce_54(r, m, mp);
@@ -5360,71 +6325,13 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_3072_mont_sqr_54(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_3072_mont_sqr_54(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_3072_sqr_54(r, a);
     sp_3072_mont_reduce_54(r, m, mp);
 }
 
-/* Multiply a by scalar b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A scalar.
- */
-SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int128_t tb = b;
-    int128_t t = 0;
-    int i;
-
-    for (i = 0; i < 54; i++) {
-        t += tb * a[i];
-        r[i] = t & 0x1ffffffffffffffl;
-        t >>= 57;
-    }
-    r[54] = (sp_digit)t;
-#else
-    int128_t tb = b;
-    int128_t t[8];
-    int i;
-
-    t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
-    for (i = 0; i < 48; i += 8) {
-        t[1] = tb * a[i+1];
-        r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-        t[2] = tb * a[i+2];
-        r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-        t[3] = tb * a[i+3];
-        r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
-        t[4] = tb * a[i+4];
-        r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
-        t[5] = tb * a[i+5];
-        r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
-        t[6] = tb * a[i+6];
-        r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
-        t[7] = tb * a[i+7];
-        r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
-        t[0] = tb * a[i+8];
-        r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
-    }
-    t[1] = tb * a[49];
-    r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
-    t[2] = tb * a[50];
-    r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
-    t[3] = tb * a[51];
-    r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
-    t[4] = tb * a[52];
-    r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
-    t[5] = tb * a[53];
-    r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
-    r[54] =  (sp_digit)(t[5] >> 57);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
 /* Conditionally add a and b using the mask m.
  * m is -1 to add and 0 when not.
  *
@@ -5439,8 +6346,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 54; i++)
+    for (i = 0; i < 54; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     int i;
 
@@ -5463,22 +6371,110 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 57 bits from d1 and top 6 bits from d0. */
+    d = (d1 << 6) | (d0 >> 51);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 7 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 45) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 13 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 39) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 19 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 33) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 25 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 27) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 31 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 21) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 37 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 15) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 43 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 9) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 49 bits in r */
+    /* Next 6 bits from d0. */
+    r <<= 6;
+    d <<= 6;
+    d |= (d0 >> 3) & ((1 << 6) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 55 bits in r */
+    /* Remaining 3 bits from d0. */
+    r <<= 3;
+    d <<= 3;
+    d |= d0 & ((1 << 3) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_div_54(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_64
     int128_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[108], t2d[54 + 1];
@@ -5487,62 +6483,70 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
         t2 = td + 2 * 54;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[53];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 54);
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[53];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 54U);
         for (i=53; i>=0; i--) {
             t1[54 + i] += t1[54 + i - 1] >> 57;
-            t1[54 + i - 1] &= 0x1ffffffffffffffl;
+            t1[54 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
             d1 = t1[54 + i];
             d1 <<= 57;
             d1 += t1[54 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_3072_div_word_54(t1[54 + i], t1[54 + i - 1], dv);
+#endif
 
             sp_3072_mul_d_54(t2, d, r1);
-            sp_3072_sub_54(&t1[i], &t1[i], t2);
+            (void)sp_3072_sub_54(&t1[i], &t1[i], t2);
             t1[54 + i] -= t2[54];
             t1[54 + i] += t1[54 + i - 1] >> 57;
-            t1[54 + i - 1] &= 0x1ffffffffffffffl;
-            r1 = (((-t1[54 + i]) << 57) - t1[54 + i - 1]) / div;
+            t1[54 + i - 1] &= 0x1ffffffffffffffL;
+            r1 = (((-t1[54 + i]) << 57) - t1[54 + i - 1]) / dv;
             r1++;
             sp_3072_mul_d_54(t2, d, r1);
-            sp_3072_add_54(&t1[i], &t1[i], t2);
+            (void)sp_3072_add_54(&t1[i], &t1[i], t2);
             t1[54 + i] += t1[54 + i - 1] >> 57;
-            t1[54 + i - 1] &= 0x1ffffffffffffffl;
+            t1[54 + i - 1] &= 0x1ffffffffffffffL;
         }
         t1[54 - 1] += t1[54 - 2] >> 57;
-        t1[54 - 2] &= 0x1ffffffffffffffl;
-        d1 = t1[54 - 1];
-        r1 = (sp_digit)(d1 / div);
+        t1[54 - 2] &= 0x1ffffffffffffffL;
+        r1 = t1[54 - 1] / dv;
 
         sp_3072_mul_d_54(t2, d, r1);
-        sp_3072_sub_54(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 54);
+        (void)sp_3072_sub_54(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 54U);
         for (i=0; i<52; i++) {
             r[i+1] += r[i] >> 57;
-            r[i] &= 0x1ffffffffffffffl;
-        }
-        sp_3072_cond_add_54(r, r, d, 0 - (r[53] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+            r[i] &= 0x1ffffffffffffffL;
+        }
+        sp_3072_cond_add_54(r, r, d, 0 - ((r[53] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -5555,12 +6559,13 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_3072_mod_54(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_3072_mod_54(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_3072_div_54(a, m, NULL, r);
 }
 
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  *
  * r     A single precision number that is the result of the operation.
@@ -5570,8 +6575,8 @@
  * m     A single precision number that is the modulus.
  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  */
-static int sp_3072_mod_exp_54(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
-    sp_digit* m, int reduceA)
+static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* td;
@@ -5585,11 +6590,12 @@
 
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3 * 54 * 2);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 54U * 2U);
 
         norm = t[0] = td;
         t[1] = &td[54 * 2];
@@ -5598,10 +6604,12 @@
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_54(norm, m);
 
-        if (reduceA)
+        if (reduceA != 0) {
             err = sp_3072_mod_54(t[1], a, m);
-        else
-            XMEMCPY(t[1], a, sizeof(sp_digit) * 54);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 54U);
+        }
     }
     if (err == MP_OKAY) {
         sp_3072_mul_54(t[1], t[1], norm);
@@ -5614,8 +6622,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -5637,13 +6646,15 @@
 
         sp_3072_mont_reduce_54(t[0], m, mp);
         n = sp_3072_cmp_54(t[0], m);
-        sp_3072_cond_sub_54(t[0], t[0], m, (n < 0) - 1);
+        sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(*r) * 54 * 2);
 
     }
 
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 
     return err;
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
@@ -5663,24 +6674,23 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         t[0] = td;
         t[1] = &td[54 * 2];
         t[2] = &td[2 * 54 * 2];
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_54(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_3072_mod_54(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_3072_mul_54(t[1], t[1], norm);
@@ -5699,8 +6709,9 @@
         n = e[i--] << (57 - c);
         for (; ; c--) {
             if (c == 0) {
-                if (i == -1)
+                if (i == -1) {
                     break;
+                }
 
                 n = e[i--];
                 c = 57;
@@ -5720,13 +6731,15 @@
 
         sp_3072_mont_reduce_54(t[0], m, mp);
         n = sp_3072_cmp_54(t[0], m);
-        sp_3072_cond_sub_54(t[0], t[0], m, (n < 0) - 1);
+        sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, t[0], sizeof(t[0]));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -5748,23 +6761,22 @@
 #ifdef WOLFSSL_SMALL_STACK
     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 108, NULL,
                             DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
         for (i=0; i<32; i++)
             t[i] = td + i * 108;
+#endif
         norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
+
         sp_3072_mont_setup(m, &mp);
         sp_3072_mont_norm_54(norm, m);
 
-        if (reduceA) {
+        if (reduceA != 0) {
             err = sp_3072_mod_54(t[1], a, m);
             if (err == MP_OKAY) {
                 sp_3072_mul_54(t[1], t[1], norm);
@@ -5812,10 +6824,12 @@
         bits = ((bits + 4) / 5) * 5;
         i = ((bits + 56) / 57) - 1;
         c = bits % 57;
-        if (c == 0)
+        if (c == 0) {
             c = 57;
-        if (i < 54)
+        }
+        if (i < 54) {
             n = e[i--] << (64 - c);
+        }
         else {
             n = 0;
             i--;
@@ -5824,7 +6838,7 @@
             n |= e[i--] << (7 - c);
             c += 57;
         }
-        y = n >> 59;
+        y = (n >> 59) & 0x1f;
         n <<= 5;
         c -= 5;
         XMEMCPY(rt, t[y], sizeof(rt));
@@ -5848,55 +6862,23 @@
 
         sp_3072_mont_reduce_54(rt, m, mp);
         n = sp_3072_cmp_54(rt, m);
-        sp_3072_cond_sub_54(rt, rt, m, (n < 0) - 1);
+        sp_3072_cond_sub_54(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
         XMEMCPY(r, rt, sizeof(rt));
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
-                                    !defined(RSA_LOW_MEM)
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_3072_mask_27(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<27; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 24; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-    r[24] = a[24] & m;
-    r[25] = a[25] & m;
-    r[26] = a[26] & m;
-#endif
-}
-
-#endif
+    }
+#endif
+
+    return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+       /* WOLFSSL_HAVE_SP_DH */
+
 #ifdef WOLFSSL_HAVE_SP_RSA
 /* RSA public key operation.
  *
@@ -5919,20 +6901,30 @@
     sp_digit* m;
     sp_digit* r;
     sp_digit* norm;
-    sp_digit e[1];
+    sp_digit e[1] = {0};
     sp_digit mp;
     int i;
     int err = MP_OKAY;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 57) {
+            err = MP_READ_E;
+        }
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+                                                              DYNAMIC_TYPE_RSA);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -5945,14 +6937,16 @@
 
         sp_3072_from_bin(a, 54, in, inLen);
 #if DIGIT_BIT >= 57
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -5966,31 +6960,36 @@
         err = sp_3072_mod_54(a, a, m);
     }
     if (err == MP_OKAY) {
-        for (i=56; i>=0; i--)
-            if (e[0] >> i)
-                break;
+        for (i=56; i>=0; i--) {
+            if ((e[0] >> i) != 0) {
+                break;
+            }
+        }
 
         XMEMCPY(r, a, sizeof(sp_digit) * 54 * 2);
         for (i--; i>=0; i--) {
             sp_3072_mont_sqr_54(r, r, m, mp);
 
-            if (((e[0] >> i) & 1) == 1)
+            if (((e[0] >> i) & 1) == 1) {
                 sp_3072_mont_mul_54(r, r, a, m, mp);
+            }
         }
         sp_3072_mont_reduce_54(r, m, mp);
         mp = sp_3072_cmp_54(r, m);
-        sp_3072_cond_sub_54(r, r, m, (mp < 0) - 1);
+        sp_3072_cond_sub_54(r, r, m, ((mp < 0) ?
+                    (sp_digit)1 : (sp_digit)0)- 1);
 
         sp_3072_to_bin(r, out);
         *outLen = 384;
     }
 
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    return err;
-#else
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit ad[108], md[54], rd[108];
 #else
     sp_digit* d = NULL;
@@ -5998,21 +6997,31 @@
     sp_digit* a;
     sp_digit* m;
     sp_digit* r;
-    sp_digit e[1];
-    int err = MP_OKAY;
-
-    if (*outLen < 384)
+    sp_digit e[1] = {0};
+    int err = MP_OKAY;
+
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 57) {
+            err = MP_READ_E;
+        }
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -6029,23 +7038,23 @@
     if (err == MP_OKAY) {
         sp_3072_from_bin(a, 54, in, inLen);
 #if DIGIT_BIT >= 57
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
+        }
+#endif
+        if (e[0] == 0) {
             err = MP_EXPTMOD_E;
+        }
     }
     if (err == MP_OKAY) {
         sp_3072_from_mp(m, 54, mm);
 
         if (e[0] == 0x3) {
-            if (err == MP_OKAY) {
-                sp_3072_sqr_54(r, a);
-                err = sp_3072_mod_54(r, r, m);
-            }
+            sp_3072_sqr_54(r, a);
+            err = sp_3072_mod_54(r, r, m);
             if (err == MP_OKAY) {
                 sp_3072_mul_54(r, a, r);
                 err = sp_3072_mod_54(r, r, m);
@@ -6059,26 +7068,28 @@
             sp_3072_mont_setup(m, &mp);
             sp_3072_mont_norm_54(norm, m);
 
-            if (err == MP_OKAY) {
-                sp_3072_mul_54(a, a, norm);
-                err = sp_3072_mod_54(a, a, m);
-            }
+            sp_3072_mul_54(a, a, norm);
+            err = sp_3072_mod_54(a, a, m);
 
             if (err == MP_OKAY) {
-                for (i=56; i>=0; i--)
-                    if (e[0] >> i)
+                for (i=56; i>=0; i--) {
+                    if ((e[0] >> i) != 0) {
                         break;
-
-                XMEMCPY(r, a, sizeof(sp_digit) * 108);
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 108U);
                 for (i--; i>=0; i--) {
                     sp_3072_mont_sqr_54(r, r, m, mp);
 
-                    if (((e[0] >> i) & 1) == 1)
+                    if (((e[0] >> i) & 1) == 1) {
                         sp_3072_mont_mul_54(r, r, a, m, mp);
+                    }
                 }
                 sp_3072_mont_reduce_54(r, m, mp);
                 mp = sp_3072_cmp_54(r, m);
-                sp_3072_cond_sub_54(r, r, m, (mp < 0) - 1);
+                sp_3072_cond_sub_54(r, r, m, ((mp < 0) ?
+                           (sp_digit)1 : (sp_digit)0) - 1);
             }
         }
     }
@@ -6088,15 +7099,19 @@
         *outLen = 384;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif /* WOLFSSL_SP_SMALL */
-}
-
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
 /* RSA private key operation.
  *
  * in      Array of bytes representing the number to exponentiate, base.
@@ -6119,7 +7134,7 @@
     byte* out, word32* outLen)
 {
 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* a;
     sp_digit* d = NULL;
     sp_digit* m;
@@ -6132,21 +7147,31 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+           err = MP_READ_E;
+        }
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = d + 54;
-        m = a + 54;
+        m = a + 108;
         r = a;
 
         sp_3072_from_bin(a, 54, in, inLen);
@@ -6161,7 +7186,7 @@
 
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 54);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
     }
 
     return err;
@@ -6176,11 +7201,20 @@
     (void)dqm;
     (void)qim;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+            err = MP_READ_E;
+        }
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         sp_3072_from_bin(a, 54, in, inLen);
@@ -6199,7 +7233,7 @@
     return err;
 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
 #else
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* t = NULL;
     sp_digit* a;
     sp_digit* p;
@@ -6207,7 +7241,6 @@
     sp_digit* dp;
     sp_digit* dq;
     sp_digit* qi;
-    sp_digit* tmp;
     sp_digit* tmpa;
     sp_digit* tmpb;
     sp_digit* r;
@@ -6216,16 +7249,24 @@
     (void)dm;
     (void)mm;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (t == NULL)
-            err = MEMORY_E;
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL) {
+            err = MEMORY_E;
+        }
     }
     if (err == MP_OKAY) {
         a = t;
@@ -6235,8 +7276,7 @@
         tmpa = qi + 27;
         tmpb = tmpa + 54;
 
-        tmp = t;
-        r = tmp + 54;
+        r = t + 54;
 
         sp_3072_from_bin(a, 54, in, inLen);
         sp_3072_from_mp(p, 27, pm);
@@ -6249,9 +7289,9 @@
         err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1);
     }
     if (err == MP_OKAY) {
-        sp_3072_sub_27(tmpa, tmpa, tmpb);
-        sp_3072_mask_27(tmp, p, tmpa[26] >> 63);
-        sp_3072_add_27(tmpa, tmpa, tmp);
+        (void)sp_3072_sub_27(tmpa, tmpa, tmpb);
+        sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
+        sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
 
         sp_3072_from_mp(qi, 27, qim);
         sp_3072_mul_27(tmpa, tmpa, qi);
@@ -6260,7 +7300,7 @@
 
     if (err == MP_OKAY) {
         sp_3072_mul_27(tmpa, q, tmpa);
-        sp_3072_add_54(r, tmpb, tmpa);
+        (void)sp_3072_add_54(r, tmpb, tmpa);
         sp_3072_norm_54(r);
 
         sp_3072_to_bin(r, out);
@@ -6269,24 +7309,31 @@
 
     if (t != NULL) {
         XMEMSET(t, 0, sizeof(sp_digit) * 27 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
     }
 
     return err;
 #else
     sp_digit a[54 * 2];
     sp_digit p[27], q[27], dp[27], dq[27], qi[27];
-    sp_digit tmp[54], tmpa[54], tmpb[54];
+    sp_digit tmpa[54], tmpb[54];
     sp_digit* r = a;
     int err = MP_OKAY;
 
     (void)dm;
     (void)mm;
 
-    if (*outLen < 384)
+    if (*outLen < 384U) {
         err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 384U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
 
     if (err == MP_OKAY) {
         sp_3072_from_bin(a, 54, in, inLen);
@@ -6298,20 +7345,21 @@
 
         err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1);
-
-    if (err == MP_OKAY) {
-        sp_3072_sub_27(tmpa, tmpa, tmpb);
-        sp_3072_mask_27(tmp, p, tmpa[26] >> 63);
-        sp_3072_add_27(tmpa, tmpa, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        (void)sp_3072_sub_27(tmpa, tmpa, tmpb);
+        sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
+        sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
         sp_3072_mul_27(tmpa, tmpa, qi);
         err = sp_3072_mod_27(tmpa, tmpa, p);
     }
 
     if (err == MP_OKAY) {
         sp_3072_mul_27(tmpa, tmpa, q);
-        sp_3072_add_54(r, tmpb, tmpa);
+        (void)sp_3072_add_54(r, tmpb, tmpa);
         sp_3072_norm_54(r);
 
         sp_3072_to_bin(r, out);
@@ -6331,19 +7379,21 @@
 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
 }
 
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
 /* Convert an array of sp_digit to an mp_int.
  *
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_3072_to_mp(sp_digit* a, mp_int* r)
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 57
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 54);
         r->used = 54;
@@ -6353,14 +7403,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 54; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 57) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 57 - s;
         }
@@ -6373,15 +7428,16 @@
         for (i = 0; i < 54; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 57 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 57 - s;
             }
-            else
+            else {
                 s += 57;
+            }
         }
         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -6397,7 +7453,7 @@
  * exp   Exponent. MP integer.
  * mod   Modulus. MP integer.
  * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
@@ -6411,16 +7467,27 @@
     sp_digit* r;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -6441,8 +7508,8 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 54);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
@@ -6458,15 +7525,25 @@
     int err = MP_OKAY;
     int expBits = mp_count_bits(exp);
 
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -6495,16 +7572,256 @@
         err = sp_3072_to_mp(r, res);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 54);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+#endif
+
+    return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+SP_NOINLINE static void sp_3072_lshift_54(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    r[54] = a[53] >> (57 - n);
+    for (i=53; i>0; i--) {
+        r[i] = ((a[i] << n) | (a[i-1] >> (57 - n))) & 0x1ffffffffffffffL;
+    }
+#else
+    sp_int_digit s, t;
+
+    s = (sp_int_digit)a[53];
+    r[54] = s >> (57U - n);
+    s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+    r[53] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+    r[52] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+    r[51] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+    r[50] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+    r[49] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+    r[48] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+    r[47] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+    r[46] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+    r[45] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+    r[44] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+    r[43] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+    r[42] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+    r[41] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+    r[40] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+    r[39] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+    r[38] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+    r[37] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+    r[36] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+    r[35] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+    r[34] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+    r[33] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+    r[32] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+    r[31] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+    r[30] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+    r[29] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+    r[28] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+    r[27] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+    r[26] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+    r[25] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+    r[24] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+    r[23] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+    r[22] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+    r[21] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+    r[20] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+    r[19] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+    r[18] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+    r[17] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+    r[16] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+    r[15] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+    r[14] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+    r[13] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+    r[12] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+    r[11] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+    r[10] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+    r[9] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+    r[8] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+    r[7] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+    r[6] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+    r[5] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+    r[4] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+    r[3] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+    r[2] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+    s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+    r[1] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+#endif
+    r[0] = (a[0] << n) & 0x1ffffffffffffffL;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[108];
+    sp_digit td[55];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 163, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 108;
+        XMEMSET(td, 0, sizeof(sp_digit) * 163);
+#else
+        norm = nd;
+        tmp  = td;
+        XMEMSET(td, 0, sizeof(td));
+#endif
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_54(norm, m);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 56) / 57) - 1;
+        c = bits % 57;
+        if (c == 0) {
+            c = 57;
+        }
+        if (i < 54) {
+            n = e[i--] << (64 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (7 - c);
+            c += 57;
+        }
+        y = (n >> 59) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        sp_3072_lshift_54(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (7 - c);
+                c += 57;
+            }
+            y = (n >> 59) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_3072_mont_sqr_54(r, r, m, mp);
+            sp_3072_mont_sqr_54(r, r, m, mp);
+            sp_3072_mont_sqr_54(r, r, m, mp);
+            sp_3072_mont_sqr_54(r, r, m, mp);
+            sp_3072_mont_sqr_54(r, r, m, mp);
+
+            sp_3072_lshift_54(r, r, y);
+            sp_3072_mul_d_54(tmp, norm, (r[54] << 6) + (r[53] >> 51));
+            r[54] = 0;
+            r[53] &= 0x7ffffffffffffL;
+            (void)sp_3072_add_54(r, r, tmp);
+            sp_3072_norm_54(r);
+            o = sp_3072_cmp_54(r, m);
+            sp_3072_cond_sub_54(r, r, m, ((o < 0) ?
+                                          (sp_digit)1 : (sp_digit)0) - 1);
+        }
+
+        sp_3072_mont_reduce_54(r, m, mp);
+        n = sp_3072_cmp_54(r, m);
+        sp_3072_cond_sub_54(r, r, m, ((n < 0) ?
+                                                (sp_digit)1 : (sp_digit)0) - 1);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+#endif /* HAVE_FFDHE_3072 */
 
 /* Perform the modular exponentiation for Diffie-Hellman.
  *
@@ -6515,7 +7832,7 @@
  * out      Buffer to hold big-endian bytes of exponentiation result.
  *          Must be at least 384 bytes long.
  * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
  * and MEMORY_E if memory allocation fails.
  */
 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
@@ -6530,16 +7847,27 @@
     sp_digit* r;
     word32 i;
 
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
     if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
+        if (expLen > 384) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -6552,7 +7880,14 @@
         sp_3072_from_bin(e, 54, exp, expLen);
         sp_3072_from_mp(m, 54, mod);
 
-        err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2 &&
+                (m[53] >> 19) == 0xffffffffL) {
+            err = sp_3072_mod_exp_2_54(r, e, expLen * 8, m);
+        }
+        else
+    #endif
+            err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
     }
 
     if (err == MP_OKAY) {
@@ -6565,8 +7900,8 @@
     }
 
     if (d != NULL) {
-        XMEMSET(e, 0, sizeof(sp_digit) * 54);
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
     }
     return err;
 #else
@@ -6582,15 +7917,24 @@
     word32 i;
     int err = MP_OKAY;
 
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
+    if (mp_count_bits(base) > 3072) {
         err = MP_READ_E;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
+    if (err == MP_OKAY) {
+        if (expLen > 384U) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
         if (d == NULL)
             err = MEMORY_E;
     }
@@ -6612,140 +7956,4571 @@
         sp_3072_from_bin(e, 54, exp, expLen);
         sp_3072_from_mp(m, 54, mod);
 
-        err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2U &&
+                (m[53] >> 19) == 0xffffffffL) {
+            err = sp_3072_mod_exp_2_54(r, e, expLen * 8U, m);
+        }
+        else {
+    #endif
+            err = sp_3072_mod_exp_54(r, b, e, expLen * 8U, m, 0);
+    #ifdef HAVE_FFDHE_3072
+        }
+    #endif
     }
 
     if (err == MP_OKAY) {
         sp_3072_to_bin(r, out);
         *outLen = 384;
-        for (i=0; i<384 && out[i] == 0; i++) {
+        for (i=0; i<384U && out[i] == 0U; i++) {
         }
         *outLen -= i;
         XMEMMOVE(out, out + i, *outLen);
     }
 
-    XMEMSET(e, 0, sizeof(sp_digit) * 54);
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-#endif
-}
-
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+#endif
+
+    return err;
+#endif
+}
 #endif /* WOLFSSL_HAVE_SP_DH */
 
-#endif /* WOLFSSL_SP_NO_3072 */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 27 * 2;
+        m = e + 27;
+        r = b;
+
+        sp_3072_from_mp(b, 27, base);
+        sp_3072_from_mp(e, 27, exp);
+        sp_3072_from_mp(m, 27, mod);
+
+        err = sp_3072_mod_exp_27(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 27, 0, sizeof(*r) * 27U);
+        err = sp_3072_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 27U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[54], ed[27], md[27];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 27 * 2;
+        m = e + 27;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 27, base);
+        sp_3072_from_mp(e, 27, exp);
+        sp_3072_from_mp(m, 27, mod);
+
+        err = sp_3072_mod_exp_27(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 27, 0, sizeof(*r) * 27U);
+        err = sp_3072_to_mp(r, res);
+    }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 27U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 27U);
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 45U) {
+            r[j] &= 0x1fffffffffffffL;
+            s = 53U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 53
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 53
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0x1fffffffffffffL;
+        s = 53U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 53U) <= (word32)DIGIT_BIT) {
+            s += 53U;
+            r[j] &= 0x1fffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 53) {
+            r[j] &= 0x1fffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 53 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    for (i=0; i<77; i++) {
+        r[i+1] += r[i] >> 53;
+        r[i] &= 0x1fffffffffffffL;
+    }
+    j = 4096 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<78 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 53) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 53);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_13(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int128_t t0   = ((int128_t)a[ 0]) * b[ 0];
+    int128_t t1   = ((int128_t)a[ 0]) * b[ 1]
+                 + ((int128_t)a[ 1]) * b[ 0];
+    int128_t t2   = ((int128_t)a[ 0]) * b[ 2]
+                 + ((int128_t)a[ 1]) * b[ 1]
+                 + ((int128_t)a[ 2]) * b[ 0];
+    int128_t t3   = ((int128_t)a[ 0]) * b[ 3]
+                 + ((int128_t)a[ 1]) * b[ 2]
+                 + ((int128_t)a[ 2]) * b[ 1]
+                 + ((int128_t)a[ 3]) * b[ 0];
+    int128_t t4   = ((int128_t)a[ 0]) * b[ 4]
+                 + ((int128_t)a[ 1]) * b[ 3]
+                 + ((int128_t)a[ 2]) * b[ 2]
+                 + ((int128_t)a[ 3]) * b[ 1]
+                 + ((int128_t)a[ 4]) * b[ 0];
+    int128_t t5   = ((int128_t)a[ 0]) * b[ 5]
+                 + ((int128_t)a[ 1]) * b[ 4]
+                 + ((int128_t)a[ 2]) * b[ 3]
+                 + ((int128_t)a[ 3]) * b[ 2]
+                 + ((int128_t)a[ 4]) * b[ 1]
+                 + ((int128_t)a[ 5]) * b[ 0];
+    int128_t t6   = ((int128_t)a[ 0]) * b[ 6]
+                 + ((int128_t)a[ 1]) * b[ 5]
+                 + ((int128_t)a[ 2]) * b[ 4]
+                 + ((int128_t)a[ 3]) * b[ 3]
+                 + ((int128_t)a[ 4]) * b[ 2]
+                 + ((int128_t)a[ 5]) * b[ 1]
+                 + ((int128_t)a[ 6]) * b[ 0];
+    int128_t t7   = ((int128_t)a[ 0]) * b[ 7]
+                 + ((int128_t)a[ 1]) * b[ 6]
+                 + ((int128_t)a[ 2]) * b[ 5]
+                 + ((int128_t)a[ 3]) * b[ 4]
+                 + ((int128_t)a[ 4]) * b[ 3]
+                 + ((int128_t)a[ 5]) * b[ 2]
+                 + ((int128_t)a[ 6]) * b[ 1]
+                 + ((int128_t)a[ 7]) * b[ 0];
+    int128_t t8   = ((int128_t)a[ 0]) * b[ 8]
+                 + ((int128_t)a[ 1]) * b[ 7]
+                 + ((int128_t)a[ 2]) * b[ 6]
+                 + ((int128_t)a[ 3]) * b[ 5]
+                 + ((int128_t)a[ 4]) * b[ 4]
+                 + ((int128_t)a[ 5]) * b[ 3]
+                 + ((int128_t)a[ 6]) * b[ 2]
+                 + ((int128_t)a[ 7]) * b[ 1]
+                 + ((int128_t)a[ 8]) * b[ 0];
+    int128_t t9   = ((int128_t)a[ 0]) * b[ 9]
+                 + ((int128_t)a[ 1]) * b[ 8]
+                 + ((int128_t)a[ 2]) * b[ 7]
+                 + ((int128_t)a[ 3]) * b[ 6]
+                 + ((int128_t)a[ 4]) * b[ 5]
+                 + ((int128_t)a[ 5]) * b[ 4]
+                 + ((int128_t)a[ 6]) * b[ 3]
+                 + ((int128_t)a[ 7]) * b[ 2]
+                 + ((int128_t)a[ 8]) * b[ 1]
+                 + ((int128_t)a[ 9]) * b[ 0];
+    int128_t t10  = ((int128_t)a[ 0]) * b[10]
+                 + ((int128_t)a[ 1]) * b[ 9]
+                 + ((int128_t)a[ 2]) * b[ 8]
+                 + ((int128_t)a[ 3]) * b[ 7]
+                 + ((int128_t)a[ 4]) * b[ 6]
+                 + ((int128_t)a[ 5]) * b[ 5]
+                 + ((int128_t)a[ 6]) * b[ 4]
+                 + ((int128_t)a[ 7]) * b[ 3]
+                 + ((int128_t)a[ 8]) * b[ 2]
+                 + ((int128_t)a[ 9]) * b[ 1]
+                 + ((int128_t)a[10]) * b[ 0];
+    int128_t t11  = ((int128_t)a[ 0]) * b[11]
+                 + ((int128_t)a[ 1]) * b[10]
+                 + ((int128_t)a[ 2]) * b[ 9]
+                 + ((int128_t)a[ 3]) * b[ 8]
+                 + ((int128_t)a[ 4]) * b[ 7]
+                 + ((int128_t)a[ 5]) * b[ 6]
+                 + ((int128_t)a[ 6]) * b[ 5]
+                 + ((int128_t)a[ 7]) * b[ 4]
+                 + ((int128_t)a[ 8]) * b[ 3]
+                 + ((int128_t)a[ 9]) * b[ 2]
+                 + ((int128_t)a[10]) * b[ 1]
+                 + ((int128_t)a[11]) * b[ 0];
+    int128_t t12  = ((int128_t)a[ 0]) * b[12]
+                 + ((int128_t)a[ 1]) * b[11]
+                 + ((int128_t)a[ 2]) * b[10]
+                 + ((int128_t)a[ 3]) * b[ 9]
+                 + ((int128_t)a[ 4]) * b[ 8]
+                 + ((int128_t)a[ 5]) * b[ 7]
+                 + ((int128_t)a[ 6]) * b[ 6]
+                 + ((int128_t)a[ 7]) * b[ 5]
+                 + ((int128_t)a[ 8]) * b[ 4]
+                 + ((int128_t)a[ 9]) * b[ 3]
+                 + ((int128_t)a[10]) * b[ 2]
+                 + ((int128_t)a[11]) * b[ 1]
+                 + ((int128_t)a[12]) * b[ 0];
+    int128_t t13  = ((int128_t)a[ 1]) * b[12]
+                 + ((int128_t)a[ 2]) * b[11]
+                 + ((int128_t)a[ 3]) * b[10]
+                 + ((int128_t)a[ 4]) * b[ 9]
+                 + ((int128_t)a[ 5]) * b[ 8]
+                 + ((int128_t)a[ 6]) * b[ 7]
+                 + ((int128_t)a[ 7]) * b[ 6]
+                 + ((int128_t)a[ 8]) * b[ 5]
+                 + ((int128_t)a[ 9]) * b[ 4]
+                 + ((int128_t)a[10]) * b[ 3]
+                 + ((int128_t)a[11]) * b[ 2]
+                 + ((int128_t)a[12]) * b[ 1];
+    int128_t t14  = ((int128_t)a[ 2]) * b[12]
+                 + ((int128_t)a[ 3]) * b[11]
+                 + ((int128_t)a[ 4]) * b[10]
+                 + ((int128_t)a[ 5]) * b[ 9]
+                 + ((int128_t)a[ 6]) * b[ 8]
+                 + ((int128_t)a[ 7]) * b[ 7]
+                 + ((int128_t)a[ 8]) * b[ 6]
+                 + ((int128_t)a[ 9]) * b[ 5]
+                 + ((int128_t)a[10]) * b[ 4]
+                 + ((int128_t)a[11]) * b[ 3]
+                 + ((int128_t)a[12]) * b[ 2];
+    int128_t t15  = ((int128_t)a[ 3]) * b[12]
+                 + ((int128_t)a[ 4]) * b[11]
+                 + ((int128_t)a[ 5]) * b[10]
+                 + ((int128_t)a[ 6]) * b[ 9]
+                 + ((int128_t)a[ 7]) * b[ 8]
+                 + ((int128_t)a[ 8]) * b[ 7]
+                 + ((int128_t)a[ 9]) * b[ 6]
+                 + ((int128_t)a[10]) * b[ 5]
+                 + ((int128_t)a[11]) * b[ 4]
+                 + ((int128_t)a[12]) * b[ 3];
+    int128_t t16  = ((int128_t)a[ 4]) * b[12]
+                 + ((int128_t)a[ 5]) * b[11]
+                 + ((int128_t)a[ 6]) * b[10]
+                 + ((int128_t)a[ 7]) * b[ 9]
+                 + ((int128_t)a[ 8]) * b[ 8]
+                 + ((int128_t)a[ 9]) * b[ 7]
+                 + ((int128_t)a[10]) * b[ 6]
+                 + ((int128_t)a[11]) * b[ 5]
+                 + ((int128_t)a[12]) * b[ 4];
+    int128_t t17  = ((int128_t)a[ 5]) * b[12]
+                 + ((int128_t)a[ 6]) * b[11]
+                 + ((int128_t)a[ 7]) * b[10]
+                 + ((int128_t)a[ 8]) * b[ 9]
+                 + ((int128_t)a[ 9]) * b[ 8]
+                 + ((int128_t)a[10]) * b[ 7]
+                 + ((int128_t)a[11]) * b[ 6]
+                 + ((int128_t)a[12]) * b[ 5];
+    int128_t t18  = ((int128_t)a[ 6]) * b[12]
+                 + ((int128_t)a[ 7]) * b[11]
+                 + ((int128_t)a[ 8]) * b[10]
+                 + ((int128_t)a[ 9]) * b[ 9]
+                 + ((int128_t)a[10]) * b[ 8]
+                 + ((int128_t)a[11]) * b[ 7]
+                 + ((int128_t)a[12]) * b[ 6];
+    int128_t t19  = ((int128_t)a[ 7]) * b[12]
+                 + ((int128_t)a[ 8]) * b[11]
+                 + ((int128_t)a[ 9]) * b[10]
+                 + ((int128_t)a[10]) * b[ 9]
+                 + ((int128_t)a[11]) * b[ 8]
+                 + ((int128_t)a[12]) * b[ 7];
+    int128_t t20  = ((int128_t)a[ 8]) * b[12]
+                 + ((int128_t)a[ 9]) * b[11]
+                 + ((int128_t)a[10]) * b[10]
+                 + ((int128_t)a[11]) * b[ 9]
+                 + ((int128_t)a[12]) * b[ 8];
+    int128_t t21  = ((int128_t)a[ 9]) * b[12]
+                 + ((int128_t)a[10]) * b[11]
+                 + ((int128_t)a[11]) * b[10]
+                 + ((int128_t)a[12]) * b[ 9];
+    int128_t t22  = ((int128_t)a[10]) * b[12]
+                 + ((int128_t)a[11]) * b[11]
+                 + ((int128_t)a[12]) * b[10];
+    int128_t t23  = ((int128_t)a[11]) * b[12]
+                 + ((int128_t)a[12]) * b[11];
+    int128_t t24  = ((int128_t)a[12]) * b[12];
+
+    t1   += t0  >> 53; r[ 0] = t0  & 0x1fffffffffffffL;
+    t2   += t1  >> 53; r[ 1] = t1  & 0x1fffffffffffffL;
+    t3   += t2  >> 53; r[ 2] = t2  & 0x1fffffffffffffL;
+    t4   += t3  >> 53; r[ 3] = t3  & 0x1fffffffffffffL;
+    t5   += t4  >> 53; r[ 4] = t4  & 0x1fffffffffffffL;
+    t6   += t5  >> 53; r[ 5] = t5  & 0x1fffffffffffffL;
+    t7   += t6  >> 53; r[ 6] = t6  & 0x1fffffffffffffL;
+    t8   += t7  >> 53; r[ 7] = t7  & 0x1fffffffffffffL;
+    t9   += t8  >> 53; r[ 8] = t8  & 0x1fffffffffffffL;
+    t10  += t9  >> 53; r[ 9] = t9  & 0x1fffffffffffffL;
+    t11  += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL;
+    t12  += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL;
+    t13  += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL;
+    t14  += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL;
+    t15  += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL;
+    t16  += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL;
+    t17  += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL;
+    t18  += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL;
+    t19  += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL;
+    t20  += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL;
+    t21  += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL;
+    t22  += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL;
+    t23  += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL;
+    t24  += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL;
+    r[25] = (sp_digit)(t24 >> 53);
+                       r[24] = t24 & 0x1fffffffffffffL;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_13(sp_digit* r, const sp_digit* a)
+{
+    int128_t t0   =  ((int128_t)a[ 0]) * a[ 0];
+    int128_t t1   = (((int128_t)a[ 0]) * a[ 1]) * 2;
+    int128_t t2   = (((int128_t)a[ 0]) * a[ 2]) * 2
+                 +  ((int128_t)a[ 1]) * a[ 1];
+    int128_t t3   = (((int128_t)a[ 0]) * a[ 3]
+                 +  ((int128_t)a[ 1]) * a[ 2]) * 2;
+    int128_t t4   = (((int128_t)a[ 0]) * a[ 4]
+                 +  ((int128_t)a[ 1]) * a[ 3]) * 2
+                 +  ((int128_t)a[ 2]) * a[ 2];
+    int128_t t5   = (((int128_t)a[ 0]) * a[ 5]
+                 +  ((int128_t)a[ 1]) * a[ 4]
+                 +  ((int128_t)a[ 2]) * a[ 3]) * 2;
+    int128_t t6   = (((int128_t)a[ 0]) * a[ 6]
+                 +  ((int128_t)a[ 1]) * a[ 5]
+                 +  ((int128_t)a[ 2]) * a[ 4]) * 2
+                 +  ((int128_t)a[ 3]) * a[ 3];
+    int128_t t7   = (((int128_t)a[ 0]) * a[ 7]
+                 +  ((int128_t)a[ 1]) * a[ 6]
+                 +  ((int128_t)a[ 2]) * a[ 5]
+                 +  ((int128_t)a[ 3]) * a[ 4]) * 2;
+    int128_t t8   = (((int128_t)a[ 0]) * a[ 8]
+                 +  ((int128_t)a[ 1]) * a[ 7]
+                 +  ((int128_t)a[ 2]) * a[ 6]
+                 +  ((int128_t)a[ 3]) * a[ 5]) * 2
+                 +  ((int128_t)a[ 4]) * a[ 4];
+    int128_t t9   = (((int128_t)a[ 0]) * a[ 9]
+                 +  ((int128_t)a[ 1]) * a[ 8]
+                 +  ((int128_t)a[ 2]) * a[ 7]
+                 +  ((int128_t)a[ 3]) * a[ 6]
+                 +  ((int128_t)a[ 4]) * a[ 5]) * 2;
+    int128_t t10  = (((int128_t)a[ 0]) * a[10]
+                 +  ((int128_t)a[ 1]) * a[ 9]
+                 +  ((int128_t)a[ 2]) * a[ 8]
+                 +  ((int128_t)a[ 3]) * a[ 7]
+                 +  ((int128_t)a[ 4]) * a[ 6]) * 2
+                 +  ((int128_t)a[ 5]) * a[ 5];
+    int128_t t11  = (((int128_t)a[ 0]) * a[11]
+                 +  ((int128_t)a[ 1]) * a[10]
+                 +  ((int128_t)a[ 2]) * a[ 9]
+                 +  ((int128_t)a[ 3]) * a[ 8]
+                 +  ((int128_t)a[ 4]) * a[ 7]
+                 +  ((int128_t)a[ 5]) * a[ 6]) * 2;
+    int128_t t12  = (((int128_t)a[ 0]) * a[12]
+                 +  ((int128_t)a[ 1]) * a[11]
+                 +  ((int128_t)a[ 2]) * a[10]
+                 +  ((int128_t)a[ 3]) * a[ 9]
+                 +  ((int128_t)a[ 4]) * a[ 8]
+                 +  ((int128_t)a[ 5]) * a[ 7]) * 2
+                 +  ((int128_t)a[ 6]) * a[ 6];
+    int128_t t13  = (((int128_t)a[ 1]) * a[12]
+                 +  ((int128_t)a[ 2]) * a[11]
+                 +  ((int128_t)a[ 3]) * a[10]
+                 +  ((int128_t)a[ 4]) * a[ 9]
+                 +  ((int128_t)a[ 5]) * a[ 8]
+                 +  ((int128_t)a[ 6]) * a[ 7]) * 2;
+    int128_t t14  = (((int128_t)a[ 2]) * a[12]
+                 +  ((int128_t)a[ 3]) * a[11]
+                 +  ((int128_t)a[ 4]) * a[10]
+                 +  ((int128_t)a[ 5]) * a[ 9]
+                 +  ((int128_t)a[ 6]) * a[ 8]) * 2
+                 +  ((int128_t)a[ 7]) * a[ 7];
+    int128_t t15  = (((int128_t)a[ 3]) * a[12]
+                 +  ((int128_t)a[ 4]) * a[11]
+                 +  ((int128_t)a[ 5]) * a[10]
+                 +  ((int128_t)a[ 6]) * a[ 9]
+                 +  ((int128_t)a[ 7]) * a[ 8]) * 2;
+    int128_t t16  = (((int128_t)a[ 4]) * a[12]
+                 +  ((int128_t)a[ 5]) * a[11]
+                 +  ((int128_t)a[ 6]) * a[10]
+                 +  ((int128_t)a[ 7]) * a[ 9]) * 2
+                 +  ((int128_t)a[ 8]) * a[ 8];
+    int128_t t17  = (((int128_t)a[ 5]) * a[12]
+                 +  ((int128_t)a[ 6]) * a[11]
+                 +  ((int128_t)a[ 7]) * a[10]
+                 +  ((int128_t)a[ 8]) * a[ 9]) * 2;
+    int128_t t18  = (((int128_t)a[ 6]) * a[12]
+                 +  ((int128_t)a[ 7]) * a[11]
+                 +  ((int128_t)a[ 8]) * a[10]) * 2
+                 +  ((int128_t)a[ 9]) * a[ 9];
+    int128_t t19  = (((int128_t)a[ 7]) * a[12]
+                 +  ((int128_t)a[ 8]) * a[11]
+                 +  ((int128_t)a[ 9]) * a[10]) * 2;
+    int128_t t20  = (((int128_t)a[ 8]) * a[12]
+                 +  ((int128_t)a[ 9]) * a[11]) * 2
+                 +  ((int128_t)a[10]) * a[10];
+    int128_t t21  = (((int128_t)a[ 9]) * a[12]
+                 +  ((int128_t)a[10]) * a[11]) * 2;
+    int128_t t22  = (((int128_t)a[10]) * a[12]) * 2
+                 +  ((int128_t)a[11]) * a[11];
+    int128_t t23  = (((int128_t)a[11]) * a[12]) * 2;
+    int128_t t24  =  ((int128_t)a[12]) * a[12];
+
+    t1   += t0  >> 53; r[ 0] = t0  & 0x1fffffffffffffL;
+    t2   += t1  >> 53; r[ 1] = t1  & 0x1fffffffffffffL;
+    t3   += t2  >> 53; r[ 2] = t2  & 0x1fffffffffffffL;
+    t4   += t3  >> 53; r[ 3] = t3  & 0x1fffffffffffffL;
+    t5   += t4  >> 53; r[ 4] = t4  & 0x1fffffffffffffL;
+    t6   += t5  >> 53; r[ 5] = t5  & 0x1fffffffffffffL;
+    t7   += t6  >> 53; r[ 6] = t6  & 0x1fffffffffffffL;
+    t8   += t7  >> 53; r[ 7] = t7  & 0x1fffffffffffffL;
+    t9   += t8  >> 53; r[ 8] = t8  & 0x1fffffffffffffL;
+    t10  += t9  >> 53; r[ 9] = t9  & 0x1fffffffffffffL;
+    t11  += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL;
+    t12  += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL;
+    t13  += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL;
+    t14  += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL;
+    t15  += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL;
+    t16  += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL;
+    t17  += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL;
+    t18  += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL;
+    t19  += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL;
+    t20  += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL;
+    t21  += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL;
+    t22  += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL;
+    t23  += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL;
+    t24  += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL;
+    r[25] = (sp_digit)(t24 >> 53);
+                       r[24] = t24 & 0x1fffffffffffffL;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_13(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    r[ 0] = a[ 0] + b[ 0];
+    r[ 1] = a[ 1] + b[ 1];
+    r[ 2] = a[ 2] + b[ 2];
+    r[ 3] = a[ 3] + b[ 3];
+    r[ 4] = a[ 4] + b[ 4];
+    r[ 5] = a[ 5] + b[ 5];
+    r[ 6] = a[ 6] + b[ 6];
+    r[ 7] = a[ 7] + b[ 7];
+    r[ 8] = a[ 8] + b[ 8];
+    r[ 9] = a[ 9] + b[ 9];
+    r[10] = a[10] + b[10];
+    r[11] = a[11] + b[11];
+    r[12] = a[12] + b[12];
+
+    return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_26(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 24; i += 8) {
+        r[i + 0] = a[i + 0] - b[i + 0];
+        r[i + 1] = a[i + 1] - b[i + 1];
+        r[i + 2] = a[i + 2] - b[i + 2];
+        r[i + 3] = a[i + 3] - b[i + 3];
+        r[i + 4] = a[i + 4] - b[i + 4];
+        r[i + 5] = a[i + 5] - b[i + 5];
+        r[i + 6] = a[i + 6] - b[i + 6];
+        r[i + 7] = a[i + 7] - b[i + 7];
+    }
+    r[24] = a[24] - b[24];
+    r[25] = a[25] - b[25];
+
+    return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_26(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 24; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[24] = a[24] + b[24];
+    r[25] = a[25] + b[25];
+
+    return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    sp_digit p0[26];
+    sp_digit p1[26];
+    sp_digit p2[26];
+    sp_digit p3[26];
+    sp_digit p4[26];
+    sp_digit p5[26];
+    sp_digit t0[26];
+    sp_digit t1[26];
+    sp_digit t2[26];
+    sp_digit a0[13];
+    sp_digit a1[13];
+    sp_digit a2[13];
+    sp_digit b0[13];
+    sp_digit b1[13];
+    sp_digit b2[13];
+    (void)sp_4096_add_13(a0, a, &a[13]);
+    (void)sp_4096_add_13(b0, b, &b[13]);
+    (void)sp_4096_add_13(a1, &a[13], &a[26]);
+    (void)sp_4096_add_13(b1, &b[13], &b[26]);
+    (void)sp_4096_add_13(a2, a0, &a[26]);
+    (void)sp_4096_add_13(b2, b0, &b[26]);
+    sp_4096_mul_13(p0, a, b);
+    sp_4096_mul_13(p2, &a[13], &b[13]);
+    sp_4096_mul_13(p4, &a[26], &b[26]);
+    sp_4096_mul_13(p1, a0, b0);
+    sp_4096_mul_13(p3, a1, b1);
+    sp_4096_mul_13(p5, a2, b2);
+    XMEMSET(r, 0, sizeof(*r)*2U*39U);
+    (void)sp_4096_sub_26(t0, p3, p2);
+    (void)sp_4096_sub_26(t1, p1, p2);
+    (void)sp_4096_sub_26(t2, p5, t0);
+    (void)sp_4096_sub_26(t2, t2, t1);
+    (void)sp_4096_sub_26(t0, t0, p4);
+    (void)sp_4096_sub_26(t1, t1, p0);
+    (void)sp_4096_add_26(r, r, p0);
+    (void)sp_4096_add_26(&r[13], &r[13], t1);
+    (void)sp_4096_add_26(&r[26], &r[26], t2);
+    (void)sp_4096_add_26(&r[39], &r[39], t0);
+    (void)sp_4096_add_26(&r[52], &r[52], p4);
+}
+
+/* Square a into r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a)
+{
+    sp_digit p0[26];
+    sp_digit p1[26];
+    sp_digit p2[26];
+    sp_digit p3[26];
+    sp_digit p4[26];
+    sp_digit p5[26];
+    sp_digit t0[26];
+    sp_digit t1[26];
+    sp_digit t2[26];
+    sp_digit a0[13];
+    sp_digit a1[13];
+    sp_digit a2[13];
+    (void)sp_4096_add_13(a0, a, &a[13]);
+    (void)sp_4096_add_13(a1, &a[13], &a[26]);
+    (void)sp_4096_add_13(a2, a0, &a[26]);
+    sp_4096_sqr_13(p0, a);
+    sp_4096_sqr_13(p2, &a[13]);
+    sp_4096_sqr_13(p4, &a[26]);
+    sp_4096_sqr_13(p1, a0);
+    sp_4096_sqr_13(p3, a1);
+    sp_4096_sqr_13(p5, a2);
+    XMEMSET(r, 0, sizeof(*r)*2U*39U);
+    (void)sp_4096_sub_26(t0, p3, p2);
+    (void)sp_4096_sub_26(t1, p1, p2);
+    (void)sp_4096_sub_26(t2, p5, t0);
+    (void)sp_4096_sub_26(t2, t2, t1);
+    (void)sp_4096_sub_26(t0, t0, p4);
+    (void)sp_4096_sub_26(t1, t1, p0);
+    (void)sp_4096_add_26(r, r, p0);
+    (void)sp_4096_add_26(&r[13], &r[13], t1);
+    (void)sp_4096_add_26(&r[26], &r[26], t2);
+    (void)sp_4096_add_26(&r[39], &r[39], t0);
+    (void)sp_4096_add_26(&r[52], &r[52], p4);
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[32] = a[32] + b[32];
+    r[33] = a[33] + b[33];
+    r[34] = a[34] + b[34];
+    r[35] = a[35] + b[35];
+    r[36] = a[36] + b[36];
+    r[37] = a[37] + b[37];
+    r[38] = a[38] + b[38];
+
+    return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 72; i += 8) {
+        r[i + 0] = a[i + 0] + b[i + 0];
+        r[i + 1] = a[i + 1] + b[i + 1];
+        r[i + 2] = a[i + 2] + b[i + 2];
+        r[i + 3] = a[i + 3] + b[i + 3];
+        r[i + 4] = a[i + 4] + b[i + 4];
+        r[i + 5] = a[i + 5] + b[i + 5];
+        r[i + 6] = a[i + 6] + b[i + 6];
+        r[i + 7] = a[i + 7] + b[i + 7];
+    }
+    r[72] = a[72] + b[72];
+    r[73] = a[73] + b[73];
+    r[74] = a[74] + b[74];
+    r[75] = a[75] + b[75];
+    r[76] = a[76] + b[76];
+    r[77] = a[77] + b[77];
+
+    return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 72; i += 8) {
+        r[i + 0] = a[i + 0] - b[i + 0];
+        r[i + 1] = a[i + 1] - b[i + 1];
+        r[i + 2] = a[i + 2] - b[i + 2];
+        r[i + 3] = a[i + 3] - b[i + 3];
+        r[i + 4] = a[i + 4] - b[i + 4];
+        r[i + 5] = a[i + 5] - b[i + 5];
+        r[i + 6] = a[i + 6] - b[i + 6];
+        r[i + 7] = a[i + 7] - b[i + 7];
+    }
+    r[72] = a[72] - b[72];
+    r[73] = a[73] - b[73];
+    r[74] = a[74] - b[74];
+    r[75] = a[75] - b[75];
+    r[76] = a[76] - b[76];
+    r[77] = a[77] - b[77];
+
+    return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[78];
+    sp_digit* a1 = z1;
+    sp_digit b1[39];
+    sp_digit* z2 = r + 78;
+    (void)sp_4096_add_39(a1, a, &a[39]);
+    (void)sp_4096_add_39(b1, b, &b[39]);
+    sp_4096_mul_39(z2, &a[39], &b[39]);
+    sp_4096_mul_39(z0, a, b);
+    sp_4096_mul_39(z1, a1, b1);
+    (void)sp_4096_sub_78(z1, z1, z2);
+    (void)sp_4096_sub_78(z1, z1, z0);
+    (void)sp_4096_add_78(r + 39, r + 39, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[78];
+    sp_digit* a1 = z1;
+    sp_digit* z2 = r + 78;
+    (void)sp_4096_add_39(a1, a, &a[39]);
+    sp_4096_sqr_39(z2, &a[39]);
+    sp_4096_sqr_39(z0, a);
+    sp_4096_sqr_39(z1, a1);
+    (void)sp_4096_sub_78(z1, z1, z2);
+    (void)sp_4096_sub_78(z1, z1, z0);
+    (void)sp_4096_add_78(r + 39, r + 39, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int128_t c;
+
+    c = ((int128_t)a[77]) * b[77];
+    r[155] = (sp_digit)(c >> 53);
+    c = (c & 0x1fffffffffffffL) << 53;
+    for (k = 153; k >= 0; k--) {
+        for (i = 77; i >= 0; i--) {
+            j = k - i;
+            if (j >= 78) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int128_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 106;
+        r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+        c = (c & 0x1fffffffffffffL) << 53;
+    }
+    r[0] = (sp_digit)(c >> 53);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int128_t c;
+
+    c = ((int128_t)a[77]) * a[77];
+    r[155] = (sp_digit)(c >> 53);
+    c = (c & 0x1fffffffffffffL) << 53;
+    for (k = 153; k >= 0; k--) {
+        for (i = 77; i >= 0; i--) {
+            j = k - i;
+            if (j >= 78 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int128_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int128_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 106;
+        r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+        c = (c & 0x1fffffffffffffL) << 53;
+    }
+    r[0] = (sp_digit)(c >> 53);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i + 0] = a[i + 0] - b[i + 0];
+        r[i + 1] = a[i + 1] - b[i + 1];
+        r[i + 2] = a[i + 2] - b[i + 2];
+        r[i + 3] = a[i + 3] - b[i + 3];
+        r[i + 4] = a[i + 4] - b[i + 4];
+        r[i + 5] = a[i + 5] - b[i + 5];
+        r[i + 6] = a[i + 6] - b[i + 6];
+        r[i + 7] = a[i + 7] - b[i + 7];
+    }
+    r[32] = a[32] - b[32];
+    r[33] = a[33] - b[33];
+    r[34] = a[34] - b[34];
+    r[35] = a[35] - b[35];
+    r[36] = a[36] - b[36];
+    r[37] = a[37] - b[37];
+    r[38] = a[38] - b[38];
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int128_t c;
+
+    c = ((int128_t)a[38]) * b[38];
+    r[77] = (sp_digit)(c >> 53);
+    c = (c & 0x1fffffffffffffL) << 53;
+    for (k = 75; k >= 0; k--) {
+        for (i = 38; i >= 0; i--) {
+            j = k - i;
+            if (j >= 39) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int128_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 106;
+        r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+        c = (c & 0x1fffffffffffffL) << 53;
+    }
+    r[0] = (sp_digit)(c >> 53);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int128_t c;
+
+    c = ((int128_t)a[38]) * a[38];
+    r[77] = (sp_digit)(c >> 53);
+    c = (c & 0x1fffffffffffffL) << 53;
+    for (k = 75; k >= 0; k--) {
+        for (i = 38; i >= 0; i--) {
+            j = k - i;
+            if (j >= 39 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int128_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int128_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 106;
+        r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+        c = (c & 0x1fffffffffffffL) << 53;
+    }
+    r[0] = (sp_digit)(c >> 53);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
+    x &= 0x1fffffffffffffL;
+
+    /* rho = -1/m mod b */
+    *rho = (1L << 53) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_78(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1fffffffffffffL;
+        t >>= 53;
+    }
+    r[78] = (sp_digit)t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL;
+    for (i = 0; i < 72; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL);
+    }
+    t[1] = tb * a[73];
+    r[73] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+    t[2] = tb * a[74];
+    r[74] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+    t[3] = tb * a[75];
+    r[75] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+    t[4] = tb * a[76];
+    r[76] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+    t[5] = tb * a[77];
+    r[77] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+    r[78] =  (sp_digit)(t[5] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_39(sp_digit* r, const sp_digit* m)
+{
+    /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<38; i++) {
+        r[i] = 0x1fffffffffffffL;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i + 0] = 0x1fffffffffffffL;
+        r[i + 1] = 0x1fffffffffffffL;
+        r[i + 2] = 0x1fffffffffffffL;
+        r[i + 3] = 0x1fffffffffffffL;
+        r[i + 4] = 0x1fffffffffffffL;
+        r[i + 5] = 0x1fffffffffffffL;
+        r[i + 6] = 0x1fffffffffffffL;
+        r[i + 7] = 0x1fffffffffffffL;
+    }
+    r[32] = 0x1fffffffffffffL;
+    r[33] = 0x1fffffffffffffL;
+    r[34] = 0x1fffffffffffffL;
+    r[35] = 0x1fffffffffffffL;
+    r[36] = 0x1fffffffffffffL;
+    r[37] = 0x1fffffffffffffL;
+#endif
+    r[38] = 0x3ffffffffL;
+
+    /* r = (2^n - 1) mod n */
+    (void)sp_4096_sub_39(r, r, m);
+
+    /* Add one so r = 2^n mod m */
+    r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_39(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=38; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[38] - b[38]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[37] - b[37]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[36] - b[36]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    for (i = 24; i >= 0; i -= 8) {
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_sub_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i + 0] = a[i + 0] - (b[i + 0] & m);
+        r[i + 1] = a[i + 1] - (b[i + 1] & m);
+        r[i + 2] = a[i + 2] - (b[i + 2] & m);
+        r[i + 3] = a[i + 3] - (b[i + 3] & m);
+        r[i + 4] = a[i + 4] - (b[i + 4] & m);
+        r[i + 5] = a[i + 5] - (b[i + 5] & m);
+        r[i + 6] = a[i + 6] - (b[i + 6] & m);
+        r[i + 7] = a[i + 7] - (b[i + 7] & m);
+    }
+    r[32] = a[32] - (b[32] & m);
+    r[33] = a[33] - (b[33] & m);
+    r[34] = a[34] - (b[34] & m);
+    r[35] = a[35] - (b[35] & m);
+    r[36] = a[36] - (b[36] & m);
+    r[37] = a[37] - (b[37] & m);
+    r[38] = a[38] - (b[38] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x1fffffffffffffL;
+        t >>= 53;
+    }
+    r[39] += t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL);
+    for (i = 0; i < 32; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+        t[2] = tb * a[i+2];
+        r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+        t[3] = tb * a[i+3];
+        r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+        t[4] = tb * a[i+4];
+        r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+        t[5] = tb * a[i+5];
+        r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+        t[6] = tb * a[i+6];
+        r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
+        t[7] = tb * a[i+7];
+        r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL));
+        t[0] = tb * a[i+8];
+        r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL));
+    }
+    t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+    t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+    t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+    t[4] = tb * a[36]; r[36] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+    t[5] = tb * a[37]; r[37] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+    t[6] = tb * a[38]; r[38] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
+    r[39] +=  (sp_digit)(t[6] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 53.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_39(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 38; i++) {
+        a[i+1] += a[i] >> 53;
+        a[i] &= 0x1fffffffffffffL;
+    }
+#else
+    int i;
+    for (i = 0; i < 32; i += 8) {
+        a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL;
+        a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL;
+        a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL;
+        a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL;
+        a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL;
+        a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL;
+        a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL;
+        a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL;
+        a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL;
+    }
+    a[32+1] += a[32] >> 53;
+    a[32] &= 0x1fffffffffffffL;
+    a[33+1] += a[33] >> 53;
+    a[33] &= 0x1fffffffffffffL;
+    a[34+1] += a[34] >> 53;
+    a[34] &= 0x1fffffffffffffL;
+    a[35+1] += a[35] >> 53;
+    a[35] &= 0x1fffffffffffffL;
+    a[36+1] += a[36] >> 53;
+    a[36] &= 0x1fffffffffffffL;
+    a[37+1] += a[37] >> 53;
+    a[37] &= 0x1fffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 2048 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_4096_mont_shift_39(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    int128_t n = a[38] >> 34;
+    n += ((int128_t)a[39]) << 19;
+
+    for (i = 0; i < 38; i++) {
+        r[i] = n & 0x1fffffffffffffL;
+        n >>= 53;
+        n += ((int128_t)a[40 + i]) << 19;
+    }
+    r[38] = (sp_digit)n;
+#else
+    int i;
+    int128_t n = a[38] >> 34;
+    n += ((int128_t)a[39]) << 19;
+    for (i = 0; i < 32; i += 8) {
+        r[i + 0] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 40]) << 19;
+        r[i + 1] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 41]) << 19;
+        r[i + 2] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 42]) << 19;
+        r[i + 3] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 43]) << 19;
+        r[i + 4] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 44]) << 19;
+        r[i + 5] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 45]) << 19;
+        r[i + 6] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 46]) << 19;
+        r[i + 7] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 47]) << 19;
+    }
+    r[32] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[72]) << 19;
+    r[33] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[73]) << 19;
+    r[34] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[74]) << 19;
+    r[35] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[75]) << 19;
+    r[36] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[76]) << 19;
+    r[37] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[77]) << 19;
+    r[38] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[39], 0, sizeof(*r) * 39U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    sp_4096_norm_39(a + 39);
+
+    for (i=0; i<38; i++) {
+        mu = (a[i] * mp) & 0x1fffffffffffffL;
+        sp_4096_mul_add_39(a+i, m, mu);
+        a[i+1] += a[i] >> 53;
+    }
+    mu = (a[i] * mp) & 0x3ffffffffL;
+    sp_4096_mul_add_39(a+i, m, mu);
+    a[i+1] += a[i] >> 53;
+    a[i] &= 0x1fffffffffffffL;
+
+    sp_4096_mont_shift_39(a, a);
+    sp_4096_cond_sub_39(a, a, m, 0 - (((a[38] >> 34) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_4096_norm_39(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_39(r, a, b);
+    sp_4096_mont_reduce_39(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_39(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_39(r, a);
+    sp_4096_mont_reduce_39(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_39(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1fffffffffffffL;
+        t >>= 53;
+    }
+    r[39] = (sp_digit)t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL;
+    for (i = 0; i < 32; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL);
+    }
+    t[1] = tb * a[33];
+    r[33] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+    t[2] = tb * a[34];
+    r[34] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+    t[3] = tb * a[35];
+    r[35] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+    t[4] = tb * a[36];
+    r[36] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+    t[5] = tb * a[37];
+    r[37] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+    t[6] = tb * a[38];
+    r[38] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+    r[39] =  (sp_digit)(t[6] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_add_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i + 0] = a[i + 0] + (b[i + 0] & m);
+        r[i + 1] = a[i + 1] + (b[i + 1] & m);
+        r[i + 2] = a[i + 2] + (b[i + 2] & m);
+        r[i + 3] = a[i + 3] + (b[i + 3] & m);
+        r[i + 4] = a[i + 4] + (b[i + 4] & m);
+        r[i + 5] = a[i + 5] + (b[i + 5] & m);
+        r[i + 6] = a[i + 6] + (b[i + 6] & m);
+        r[i + 7] = a[i + 7] + (b[i + 7] & m);
+    }
+    r[32] = a[32] + (b[32] & m);
+    r[33] = a[33] + (b[33] & m);
+    r[34] = a[34] + (b[34] & m);
+    r[35] = a[35] + (b[35] & m);
+    r[36] = a[36] + (b[36] & m);
+    r[37] = a[37] + (b[37] & m);
+    r[38] = a[38] + (b[38] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 39; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, sp_digit* a, byte n)
+{
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<38; i++) {
+        r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL;
+    }
+#else
+    for (i=0; i<32; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL;
+    }
+    r[32] = ((a[32] >> n) | (a[33] << (53 - n))) & 0x1fffffffffffffL;
+    r[33] = ((a[33] >> n) | (a[34] << (53 - n))) & 0x1fffffffffffffL;
+    r[34] = ((a[34] >> n) | (a[35] << (53 - n))) & 0x1fffffffffffffL;
+    r[35] = ((a[35] >> n) | (a[36] << (53 - n))) & 0x1fffffffffffffL;
+    r[36] = ((a[36] >> n) | (a[37] << (53 - n))) & 0x1fffffffffffffL;
+    r[37] = ((a[37] >> n) | (a[38] << (53 - n))) & 0x1fffffffffffffL;
+#endif
+    r[38] = a[38] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 53 bits from d1 and top 10 bits from d0. */
+    d = (d1 << 10) | (d0 >> 43);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 11 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 33) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 21 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 23) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 31 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 13) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 41 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 3) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 51 bits in r */
+    /* Remaining 3 bits from d0. */
+    r <<= 3;
+    d <<= 3;
+    d |= d0 & ((1 << 3) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_64
+    int128_t d1;
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* td;
+#else
+    sp_digit t1d[78 + 1], t2d[39 + 1], sdd[39 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* sd;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 39 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    (void)m;
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 78 + 1;
+        sd = t2 + 39 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_4096_mul_d_39(sd, d, 1L << 19);
+        sp_4096_mul_d_78(t1, a, 1L << 19);
+        dv = sd[38];
+        for (i=39; i>=0; i--) {
+            t1[39 + i] += t1[39 + i - 1] >> 53;
+            t1[39 + i - 1] &= 0x1fffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+            d1 = t1[39 + i];
+            d1 <<= 53;
+            d1 += t1[39 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_4096_div_word_39(t1[39 + i], t1[39 + i - 1], dv);
+#endif
+
+            sp_4096_mul_d_39(t2, sd, r1);
+            (void)sp_4096_sub_39(&t1[i], &t1[i], t2);
+            t1[39 + i] -= t2[39];
+            t1[39 + i] += t1[39 + i - 1] >> 53;
+            t1[39 + i - 1] &= 0x1fffffffffffffL;
+            r1 = (((-t1[39 + i]) << 53) - t1[39 + i - 1]) / dv;
+            r1 -= t1[39 + i];
+            sp_4096_mul_d_39(t2, sd, r1);
+            (void)sp_4096_add_39(&t1[i], &t1[i], t2);
+            t1[39 + i] += t1[39 + i - 1] >> 53;
+            t1[39 + i - 1] &= 0x1fffffffffffffL;
+        }
+        t1[39 - 1] += t1[39 - 2] >> 53;
+        t1[39 - 2] &= 0x1fffffffffffffL;
+        r1 = t1[39 - 1] / dv;
+
+        sp_4096_mul_d_39(t2, sd, r1);
+        sp_4096_sub_39(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 39U);
+        for (i=0; i<37; i++) {
+            r[i+1] += r[i] >> 53;
+            r[i] &= 0x1fffffffffffffL;
+        }
+        sp_4096_cond_add_39(r, r, sd, 0 - ((r[38] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_4096_norm_39(r);
+        sp_4096_rshift_39(r, r, 19);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_39(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_39(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* td;
+    sp_digit* t[3];
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 39U * 2U);
+
+        norm = t[0] = td;
+        t[1] = &td[39 * 2];
+        t[2] = &td[2 * 39 * 2];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_39(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_39(t[1], a, m);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 39U);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_mul_39(t[1], t[1], norm);
+        err = sp_4096_mod_39(t[1], t[1], m);
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 53;
+        c = bits % 53;
+        n = e[i--] << (53 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 53;
+            }
+
+            y = (n >> 52) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                  ((size_t)t[1] & addr_mask[y])),
+                    sizeof(*t[2]) * 39 * 2);
+            sp_4096_mont_sqr_39(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                            ((size_t)t[1] & addr_mask[y])), t[2],
+                    sizeof(*t[2]) * 39 * 2);
+        }
+
+        sp_4096_mont_reduce_39(t[0], m, mp);
+        n = sp_4096_cmp_39(t[0], m);
+        sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(*r) * 39 * 2);
+
+    }
+
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[3][78];
+#else
+    sp_digit* td;
+    sp_digit* t[3];
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        t[0] = td;
+        t[1] = &td[39 * 2];
+        t[2] = &td[2 * 39 * 2];
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_39(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_39(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_39(t[1], t[1], norm);
+                err = sp_4096_mod_39(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_39(t[1], a, norm);
+            err = sp_4096_mod_39(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 53;
+        c = bits % 53;
+        n = e[i--] << (53 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 53;
+            }
+
+            y = (n >> 52) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                 ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_4096_mont_sqr_39(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                           ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+        }
+
+        sp_4096_mont_reduce_39(t[0], m, mp);
+        n = sp_4096_cmp_39(t[0], m);
+        sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(t[0]));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][78];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit rt[78];
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 78, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++)
+            t[i] = td + i * 78;
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_39(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_39(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_39(t[1], t[1], norm);
+                err = sp_4096_mod_39(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_39(t[1], a, norm);
+            err = sp_4096_mod_39(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_39(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_39(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_39(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_39(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_39(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_39(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_39(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_39(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_39(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_39(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_39(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_39(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_39(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_39(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_39(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_39(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_39(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_39(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_39(t[20], t[10], m, mp);
+        sp_4096_mont_mul_39(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_39(t[22], t[11], m, mp);
+        sp_4096_mont_mul_39(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_39(t[24], t[12], m, mp);
+        sp_4096_mont_mul_39(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_39(t[26], t[13], m, mp);
+        sp_4096_mont_mul_39(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_39(t[28], t[14], m, mp);
+        sp_4096_mont_mul_39(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_39(t[30], t[15], m, mp);
+        sp_4096_mont_mul_39(t[31], t[16], t[15], m, mp);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 52) / 53) - 1;
+        c = bits % 53;
+        if (c == 0) {
+            c = 53;
+        }
+        if (i < 39) {
+            n = e[i--] << (64 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (11 - c);
+            c += 53;
+        }
+        y = (n >> 59) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        XMEMCPY(rt, t[y], sizeof(rt));
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (11 - c);
+                c += 53;
+            }
+            y = (n >> 59) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_4096_mont_sqr_39(rt, rt, m, mp);
+            sp_4096_mont_sqr_39(rt, rt, m, mp);
+            sp_4096_mont_sqr_39(rt, rt, m, mp);
+            sp_4096_mont_sqr_39(rt, rt, m, mp);
+            sp_4096_mont_sqr_39(rt, rt, m, mp);
+
+            sp_4096_mont_mul_39(rt, rt, t[y], m, mp);
+        }
+
+        sp_4096_mont_reduce_39(rt, m, mp);
+        n = sp_4096_cmp_39(rt, m);
+        sp_4096_cond_sub_39(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, rt, sizeof(rt));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_78(sp_digit* r, const sp_digit* m)
+{
+    /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<77; i++) {
+        r[i] = 0x1fffffffffffffL;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 72; i += 8) {
+        r[i + 0] = 0x1fffffffffffffL;
+        r[i + 1] = 0x1fffffffffffffL;
+        r[i + 2] = 0x1fffffffffffffL;
+        r[i + 3] = 0x1fffffffffffffL;
+        r[i + 4] = 0x1fffffffffffffL;
+        r[i + 5] = 0x1fffffffffffffL;
+        r[i + 6] = 0x1fffffffffffffL;
+        r[i + 7] = 0x1fffffffffffffL;
+    }
+    r[72] = 0x1fffffffffffffL;
+    r[73] = 0x1fffffffffffffL;
+    r[74] = 0x1fffffffffffffL;
+    r[75] = 0x1fffffffffffffL;
+    r[76] = 0x1fffffffffffffL;
+#endif
+    r[77] = 0x7fffL;
+
+    /* r = (2^n - 1) mod n */
+    (void)sp_4096_sub_78(r, r, m);
+
+    /* Add one so r = 2^n mod m */
+    r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_78(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=77; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    int i;
+
+    r |= (a[77] - b[77]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[76] - b[76]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[75] - b[75]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[74] - b[74]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[73] - b[73]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[72] - b[72]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    for (i = 64; i >= 0; i -= 8) {
+        r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+        r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_sub_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 72; i += 8) {
+        r[i + 0] = a[i + 0] - (b[i + 0] & m);
+        r[i + 1] = a[i + 1] - (b[i + 1] & m);
+        r[i + 2] = a[i + 2] - (b[i + 2] & m);
+        r[i + 3] = a[i + 3] - (b[i + 3] & m);
+        r[i + 4] = a[i + 4] - (b[i + 4] & m);
+        r[i + 5] = a[i + 5] - (b[i + 5] & m);
+        r[i + 6] = a[i + 6] - (b[i + 6] & m);
+        r[i + 7] = a[i + 7] - (b[i + 7] & m);
+    }
+    r[72] = a[72] - (b[72] & m);
+    r[73] = a[73] - (b[73] & m);
+    r[74] = a[74] - (b[74] & m);
+    r[75] = a[75] - (b[75] & m);
+    r[76] = a[76] - (b[76] & m);
+    r[77] = a[77] - (b[77] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x1fffffffffffffL;
+        t >>= 53;
+    }
+    r[78] += t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL);
+    for (i = 0; i < 72; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+        t[2] = tb * a[i+2];
+        r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+        t[3] = tb * a[i+3];
+        r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+        t[4] = tb * a[i+4];
+        r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+        t[5] = tb * a[i+5];
+        r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+        t[6] = tb * a[i+6];
+        r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
+        t[7] = tb * a[i+7];
+        r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL));
+        t[0] = tb * a[i+8];
+        r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL));
+    }
+    t[1] = tb * a[73]; r[73] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+    t[2] = tb * a[74]; r[74] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+    t[3] = tb * a[75]; r[75] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+    t[4] = tb * a[76]; r[76] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+    t[5] = tb * a[77]; r[77] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+    r[78] +=  (sp_digit)(t[5] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 53.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_78(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 77; i++) {
+        a[i+1] += a[i] >> 53;
+        a[i] &= 0x1fffffffffffffL;
+    }
+#else
+    int i;
+    for (i = 0; i < 72; i += 8) {
+        a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL;
+        a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL;
+        a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL;
+        a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL;
+        a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL;
+        a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL;
+        a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL;
+        a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL;
+        a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL;
+    }
+    a[72+1] += a[72] >> 53;
+    a[72] &= 0x1fffffffffffffL;
+    a[73+1] += a[73] >> 53;
+    a[73] &= 0x1fffffffffffffL;
+    a[74+1] += a[74] >> 53;
+    a[74] &= 0x1fffffffffffffL;
+    a[75+1] += a[75] >> 53;
+    a[75] &= 0x1fffffffffffffL;
+    a[76+1] += a[76] >> 53;
+    a[76] &= 0x1fffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 4096 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_4096_mont_shift_78(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    int128_t n = a[77] >> 15;
+    n += ((int128_t)a[78]) << 38;
+
+    for (i = 0; i < 77; i++) {
+        r[i] = n & 0x1fffffffffffffL;
+        n >>= 53;
+        n += ((int128_t)a[79 + i]) << 38;
+    }
+    r[77] = (sp_digit)n;
+#else
+    int i;
+    int128_t n = a[77] >> 15;
+    n += ((int128_t)a[78]) << 38;
+    for (i = 0; i < 72; i += 8) {
+        r[i + 0] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 79]) << 38;
+        r[i + 1] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 80]) << 38;
+        r[i + 2] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 81]) << 38;
+        r[i + 3] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 82]) << 38;
+        r[i + 4] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 83]) << 38;
+        r[i + 5] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 84]) << 38;
+        r[i + 6] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 85]) << 38;
+        r[i + 7] = n & 0x1fffffffffffffL;
+        n >>= 53; n += ((int128_t)a[i + 86]) << 38;
+    }
+    r[72] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[151]) << 38;
+    r[73] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[152]) << 38;
+    r[74] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[153]) << 38;
+    r[75] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[154]) << 38;
+    r[76] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[155]) << 38;
+    r[77] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[78], 0, sizeof(*r) * 78U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    sp_4096_norm_78(a + 78);
+
+#ifdef WOLFSSL_SP_DH
+    if (mp != 1) {
+        for (i=0; i<77; i++) {
+            mu = (a[i] * mp) & 0x1fffffffffffffL;
+            sp_4096_mul_add_78(a+i, m, mu);
+            a[i+1] += a[i] >> 53;
+        }
+        mu = (a[i] * mp) & 0x7fffL;
+        sp_4096_mul_add_78(a+i, m, mu);
+        a[i+1] += a[i] >> 53;
+        a[i] &= 0x1fffffffffffffL;
+    }
+    else {
+        for (i=0; i<77; i++) {
+            mu = a[i] & 0x1fffffffffffffL;
+            sp_4096_mul_add_78(a+i, m, mu);
+            a[i+1] += a[i] >> 53;
+        }
+        mu = a[i] & 0x7fffL;
+        sp_4096_mul_add_78(a+i, m, mu);
+        a[i+1] += a[i] >> 53;
+        a[i] &= 0x1fffffffffffffL;
+    }
+#else
+    for (i=0; i<77; i++) {
+        mu = (a[i] * mp) & 0x1fffffffffffffL;
+        sp_4096_mul_add_78(a+i, m, mu);
+        a[i+1] += a[i] >> 53;
+    }
+    mu = (a[i] * mp) & 0x7fffL;
+    sp_4096_mul_add_78(a+i, m, mu);
+    a[i+1] += a[i] >> 53;
+    a[i] &= 0x1fffffffffffffL;
+#endif
+
+    sp_4096_mont_shift_78(a, a);
+    sp_4096_cond_sub_78(a, a, m, 0 - (((a[77] >> 15) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_4096_norm_78(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_78(r, a, b);
+    sp_4096_mont_reduce_78(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_78(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_78(r, a);
+    sp_4096_mont_reduce_78(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_156(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 156; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x1fffffffffffffL;
+        t >>= 53;
+    }
+    r[156] = (sp_digit)t;
+#else
+    int128_t tb = b;
+    int128_t t[8];
+    int i;
+
+    t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL;
+    for (i = 0; i < 152; i += 8) {
+        t[1] = tb * a[i+1];
+        r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+        t[2] = tb * a[i+2];
+        r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+        t[3] = tb * a[i+3];
+        r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+        t[4] = tb * a[i+4];
+        r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+        t[5] = tb * a[i+5];
+        r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+        t[6] = tb * a[i+6];
+        r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+        t[7] = tb * a[i+7];
+        r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL);
+        t[0] = tb * a[i+8];
+        r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL);
+    }
+    t[1] = tb * a[153];
+    r[153] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+    t[2] = tb * a[154];
+    r[154] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+    t[3] = tb * a[155];
+    r[155] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+    r[156] =  (sp_digit)(t[3] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_4096_cond_add_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    int i;
+
+    for (i = 0; i < 72; i += 8) {
+        r[i + 0] = a[i + 0] + (b[i + 0] & m);
+        r[i + 1] = a[i + 1] + (b[i + 1] & m);
+        r[i + 2] = a[i + 2] + (b[i + 2] & m);
+        r[i + 3] = a[i + 3] + (b[i + 3] & m);
+        r[i + 4] = a[i + 4] + (b[i + 4] & m);
+        r[i + 5] = a[i + 5] + (b[i + 5] & m);
+        r[i + 6] = a[i + 6] + (b[i + 6] & m);
+        r[i + 7] = a[i + 7] + (b[i + 7] & m);
+    }
+    r[72] = a[72] + (b[72] & m);
+    r[73] = a[73] + (b[73] & m);
+    r[74] = a[74] + (b[74] & m);
+    r[75] = a[75] + (b[75] & m);
+    r[76] = a[76] + (b[76] & m);
+    r[77] = a[77] + (b[77] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 78; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, sp_digit* a, byte n)
+{
+    int i;
+
+#ifdef WOLFSSL_SP_SMALL
+    for (i=0; i<77; i++) {
+        r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL;
+    }
+#else
+    for (i=0; i<72; i += 8) {
+        r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL;
+        r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL;
+    }
+    r[72] = ((a[72] >> n) | (a[73] << (53 - n))) & 0x1fffffffffffffL;
+    r[73] = ((a[73] >> n) | (a[74] << (53 - n))) & 0x1fffffffffffffL;
+    r[74] = ((a[74] >> n) | (a[75] << (53 - n))) & 0x1fffffffffffffL;
+    r[75] = ((a[75] >> n) | (a[76] << (53 - n))) & 0x1fffffffffffffL;
+    r[76] = ((a[76] >> n) | (a[77] << (53 - n))) & 0x1fffffffffffffL;
+#endif
+    r[77] = a[77] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 53 bits from d1 and top 10 bits from d0. */
+    d = (d1 << 10) | (d0 >> 43);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 11 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 33) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 21 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 23) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 31 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 13) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 41 bits in r */
+    /* Next 10 bits from d0. */
+    r <<= 10;
+    d <<= 10;
+    d |= (d0 >> 3) & ((1 << 10) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 51 bits in r */
+    /* Remaining 3 bits from d0. */
+    r <<= 3;
+    d <<= 3;
+    d |= d0 & ((1 << 3) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_64
+    int128_t d1;
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* td;
+#else
+    sp_digit t1d[156 + 1], t2d[78 + 1], sdd[78 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* sd;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 78 + 3), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    (void)m;
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 156 + 1;
+        sd = t2 + 78 + 1;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        sd = sdd;
+#endif
+
+        sp_4096_mul_d_78(sd, d, 1L << 38);
+        sp_4096_mul_d_156(t1, a, 1L << 38);
+        dv = sd[77];
+        for (i=78; i>=0; i--) {
+            t1[78 + i] += t1[78 + i - 1] >> 53;
+            t1[78 + i - 1] &= 0x1fffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+            d1 = t1[78 + i];
+            d1 <<= 53;
+            d1 += t1[78 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_4096_div_word_78(t1[78 + i], t1[78 + i - 1], dv);
+#endif
+
+            sp_4096_mul_d_78(t2, sd, r1);
+            (void)sp_4096_sub_78(&t1[i], &t1[i], t2);
+            t1[78 + i] -= t2[78];
+            t1[78 + i] += t1[78 + i - 1] >> 53;
+            t1[78 + i - 1] &= 0x1fffffffffffffL;
+            r1 = (((-t1[78 + i]) << 53) - t1[78 + i - 1]) / dv;
+            r1 -= t1[78 + i];
+            sp_4096_mul_d_78(t2, sd, r1);
+            (void)sp_4096_add_78(&t1[i], &t1[i], t2);
+            t1[78 + i] += t1[78 + i - 1] >> 53;
+            t1[78 + i - 1] &= 0x1fffffffffffffL;
+        }
+        t1[78 - 1] += t1[78 - 2] >> 53;
+        t1[78 - 2] &= 0x1fffffffffffffL;
+        r1 = t1[78 - 1] / dv;
+
+        sp_4096_mul_d_78(t2, sd, r1);
+        sp_4096_sub_78(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 78U);
+        for (i=0; i<76; i++) {
+            r[i+1] += r[i] >> 53;
+            r[i] &= 0x1fffffffffffffL;
+        }
+        sp_4096_cond_add_78(r, r, sd, 0 - ((r[77] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+
+        sp_4096_norm_78(r);
+        sp_4096_rshift_78(r, r, 38);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_78(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_78(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+    const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* td;
+    sp_digit* t[3];
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3U * 78U * 2U);
+
+        norm = t[0] = td;
+        t[1] = &td[78 * 2];
+        t[2] = &td[2 * 78 * 2];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_78(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_78(t[1], a, m);
+        }
+        else {
+            XMEMCPY(t[1], a, sizeof(sp_digit) * 78U);
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_mul_78(t[1], t[1], norm);
+        err = sp_4096_mod_78(t[1], t[1], m);
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 53;
+        c = bits % 53;
+        n = e[i--] << (53 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 53;
+            }
+
+            y = (n >> 52) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                  ((size_t)t[1] & addr_mask[y])),
+                    sizeof(*t[2]) * 78 * 2);
+            sp_4096_mont_sqr_78(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                            ((size_t)t[1] & addr_mask[y])), t[2],
+                    sizeof(*t[2]) * 78 * 2);
+        }
+
+        sp_4096_mont_reduce_78(t[0], m, mp);
+        n = sp_4096_cmp_78(t[0], m);
+        sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ?
+                    (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(*r) * 78 * 2);
+
+    }
+
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+
+    return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[3][156];
+#else
+    sp_digit* td;
+    sp_digit* t[3];
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        t[0] = td;
+        t[1] = &td[78 * 2];
+        t[2] = &td[2 * 78 * 2];
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_78(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_78(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_78(t[1], t[1], norm);
+                err = sp_4096_mod_78(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_78(t[1], a, norm);
+            err = sp_4096_mod_78(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        i = bits / 53;
+        c = bits % 53;
+        n = e[i--] << (53 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1) {
+                    break;
+                }
+
+                n = e[i--];
+                c = 53;
+            }
+
+            y = (n >> 52) & 1;
+            n <<= 1;
+
+            sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                 ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_4096_mont_sqr_78(t[2], t[2], m, mp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                           ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+        }
+
+        sp_4096_mont_reduce_78(t[0], m, mp);
+        n = sp_4096_cmp_78(t[0], m);
+        sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, t[0], sizeof(t[0]));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][156];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit rt[156];
+    sp_digit mp = 1;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 156, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++)
+            t[i] = td + i * 156;
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_78(norm, m);
+
+        if (reduceA != 0) {
+            err = sp_4096_mod_78(t[1], a, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_78(t[1], t[1], norm);
+                err = sp_4096_mod_78(t[1], t[1], m);
+            }
+        }
+        else {
+            sp_4096_mul_78(t[1], a, norm);
+            err = sp_4096_mod_78(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_78(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_78(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_78(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_78(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_78(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_78(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_78(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_78(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_78(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_78(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_78(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_78(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_78(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_78(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_78(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_78(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_78(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_78(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_78(t[20], t[10], m, mp);
+        sp_4096_mont_mul_78(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_78(t[22], t[11], m, mp);
+        sp_4096_mont_mul_78(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_78(t[24], t[12], m, mp);
+        sp_4096_mont_mul_78(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_78(t[26], t[13], m, mp);
+        sp_4096_mont_mul_78(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_78(t[28], t[14], m, mp);
+        sp_4096_mont_mul_78(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_78(t[30], t[15], m, mp);
+        sp_4096_mont_mul_78(t[31], t[16], t[15], m, mp);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 52) / 53) - 1;
+        c = bits % 53;
+        if (c == 0) {
+            c = 53;
+        }
+        if (i < 78) {
+            n = e[i--] << (64 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (11 - c);
+            c += 53;
+        }
+        y = (n >> 59) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        XMEMCPY(rt, t[y], sizeof(rt));
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (11 - c);
+                c += 53;
+            }
+            y = (n >> 59) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_4096_mont_sqr_78(rt, rt, m, mp);
+            sp_4096_mont_sqr_78(rt, rt, m, mp);
+            sp_4096_mont_sqr_78(rt, rt, m, mp);
+            sp_4096_mont_sqr_78(rt, rt, m, mp);
+            sp_4096_mont_sqr_78(rt, rt, m, mp);
+
+            sp_4096_mont_mul_78(rt, rt, t[y], m, mp);
+        }
+
+        sp_4096_mont_reduce_78(rt, m, mp);
+        n = sp_4096_cmp_78(rt, m);
+        sp_4096_cond_sub_78(rt, rt, m, ((n < 0) ?
+                   (sp_digit)1 : (sp_digit)0) - 1);
+        XMEMCPY(r, rt, sizeof(rt));
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+       /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+    sp_digit* norm;
+    sp_digit e[1] = {0};
+    sp_digit mp;
+    int i;
+    int err = MP_OKAY;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 53) {
+            err = MP_READ_E;
+        }
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 78 * 2;
+        m = r + 78 * 2;
+        norm = r;
+
+        sp_4096_from_bin(a, 78, in, inLen);
+#if DIGIT_BIT >= 53
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 78, mm);
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_78(norm, m);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_mul_78(a, a, norm);
+        err = sp_4096_mod_78(a, a, m);
+    }
+    if (err == MP_OKAY) {
+        for (i=52; i>=0; i--) {
+            if ((e[0] >> i) != 0) {
+                break;
+            }
+        }
+
+        XMEMCPY(r, a, sizeof(sp_digit) * 78 * 2);
+        for (i--; i>=0; i--) {
+            sp_4096_mont_sqr_78(r, r, m, mp);
+
+            if (((e[0] >> i) & 1) == 1) {
+                sp_4096_mont_mul_78(r, r, a, m, mp);
+            }
+        }
+        sp_4096_mont_reduce_78(r, m, mp);
+        mp = sp_4096_cmp_78(r, m);
+        sp_4096_cond_sub_78(r, r, m, ((mp < 0) ?
+                    (sp_digit)1 : (sp_digit)0)- 1);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit ad[156], md[78], rd[156];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+    sp_digit e[1] = {0};
+    int err = MP_OKAY;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(em) > 53) {
+            err = MP_READ_E;
+        }
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 78 * 2;
+        m = r + 78 * 2;
+    }
+#else
+    a = ad;
+    m = md;
+    r = rd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_4096_from_bin(a, 78, in, inLen);
+#if DIGIT_BIT >= 53
+        e[0] = (sp_digit)em->dp[0];
+#else
+        e[0] = (sp_digit)em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 78, mm);
+
+        if (e[0] == 0x3) {
+            sp_4096_sqr_78(r, a);
+            err = sp_4096_mod_78(r, r, m);
+            if (err == MP_OKAY) {
+                sp_4096_mul_78(r, a, r);
+                err = sp_4096_mod_78(r, r, m);
+            }
+        }
+        else {
+            sp_digit* norm = r;
+            int i;
+            sp_digit mp;
+
+            sp_4096_mont_setup(m, &mp);
+            sp_4096_mont_norm_78(norm, m);
+
+            sp_4096_mul_78(a, a, norm);
+            err = sp_4096_mod_78(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i=52; i>=0; i--) {
+                    if ((e[0] >> i) != 0) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 156U);
+                for (i--; i>=0; i--) {
+                    sp_4096_mont_sqr_78(r, r, m, mp);
+
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_4096_mont_mul_78(r, r, a, m, mp);
+                    }
+                }
+                sp_4096_mont_reduce_78(r, m, mp);
+                mp = sp_4096_cmp_78(r, m);
+                sp_4096_cond_sub_78(r, r, m, ((mp < 0) ?
+                           (sp_digit)1 : (sp_digit)0) - 1);
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+           err = MP_READ_E;
+        }
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 78;
+        m = a + 156;
+        r = a;
+
+        sp_4096_from_bin(a, 78, in, inLen);
+        sp_4096_from_mp(d, 78, dm);
+        sp_4096_from_mp(m, 78, mm);
+        err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 78);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+    sp_digit a[156], d[78], m[78];
+    sp_digit* r = a;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+            err = MP_READ_E;
+        }
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_bin(a, 78, in, inLen);
+        sp_4096_from_mp(d, 78, dm);
+        sp_4096_from_mp(m, 78, mm);
+        err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    XMEMSET(d, 0, sizeof(sp_digit) * 78);
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* dq;
+    sp_digit* qi;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 78 * 2;
+        q = p + 39;
+        qi = dq = dp = q + 39;
+        tmpa = qi + 39;
+        tmpb = tmpa + 78;
+
+        r = t + 78;
+
+        sp_4096_from_bin(a, 78, in, inLen);
+        sp_4096_from_mp(p, 39, pm);
+        sp_4096_from_mp(q, 39, qm);
+        sp_4096_from_mp(dp, 39, dpm);
+        err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(dq, 39, dqm);
+        err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1);
+    }
+    if (err == MP_OKAY) {
+        (void)sp_4096_sub_39(tmpa, tmpa, tmpb);
+        sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+        sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+
+        sp_4096_from_mp(qi, 39, qim);
+        sp_4096_mul_39(tmpa, tmpa, qi);
+        err = sp_4096_mod_39(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mul_39(tmpa, q, tmpa);
+        (void)sp_4096_add_78(r, tmpb, tmpa);
+        sp_4096_norm_78(r);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 39 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+    sp_digit a[78 * 2];
+    sp_digit p[39], q[39], dp[39], dq[39], qi[39];
+    sp_digit tmpa[78], tmpb[78];
+    sp_digit* r = a;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (inLen > 512U) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_bin(a, 78, in, inLen);
+        sp_4096_from_mp(p, 39, pm);
+        sp_4096_from_mp(q, 39, qm);
+        sp_4096_from_mp(dp, 39, dpm);
+        sp_4096_from_mp(dq, 39, dqm);
+        sp_4096_from_mp(qi, 39, qim);
+
+        err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        (void)sp_4096_sub_39(tmpa, tmpa, tmpb);
+        sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+        sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+        sp_4096_mul_39(tmpa, tmpa, qi);
+        err = sp_4096_mod_39(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mul_39(tmpa, tmpa, q);
+        (void)sp_4096_add_78(r, tmpb, tmpa);
+        sp_4096_norm_78(r);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p, 0, sizeof(p));
+    XMEMSET(q, 0, sizeof(q));
+    XMEMSET(dp, 0, sizeof(dp));
+    XMEMSET(dq, 0, sizeof(dq));
+    XMEMSET(qi, 0, sizeof(qi));
+
+    return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 53
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 78);
+        r->used = 78;
+        mp_clamp(r);
+#elif DIGIT_BIT < 53
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 78; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 53) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 53 - s;
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 78; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 53 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 53 - s;
+            }
+            else {
+                s += 53;
+            }
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 78 * 2;
+        m = e + 78;
+        r = b;
+
+        sp_4096_from_mp(b, 78, base);
+        sp_4096_from_mp(e, 78, exp);
+        sp_4096_from_mp(m, 78, mod);
+
+        err = sp_4096_mod_exp_78(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[156], ed[78], md[78];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+    
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 78 * 2;
+        m = e + 78;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 78, base);
+        sp_4096_from_mp(e, 78, exp);
+        sp_4096_from_mp(m, 78, mod);
+
+        err = sp_4096_mod_exp_78(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+#endif
+
+    return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+SP_NOINLINE static void sp_4096_lshift_78(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    r[78] = a[77] >> (53 - n);
+    for (i=77; i>0; i--) {
+        r[i] = ((a[i] << n) | (a[i-1] >> (53 - n))) & 0x1fffffffffffffL;
+    }
+#else
+    sp_int_digit s, t;
+
+    s = (sp_int_digit)a[77];
+    r[78] = s >> (53U - n);
+    s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+    r[77] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+    r[76] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+    r[75] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+    r[74] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+    r[73] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+    r[72] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+    r[71] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+    r[70] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+    r[69] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+    r[68] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+    r[67] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+    r[66] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+    r[65] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+    r[64] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+    r[63] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+    r[62] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+    r[61] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+    r[60] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+    r[59] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+    r[58] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+    r[57] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+    r[56] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+    r[55] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+    r[54] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+    r[53] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+    r[52] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+    r[51] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+    r[50] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+    r[49] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+    r[48] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+    r[47] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+    r[46] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+    r[45] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+    r[44] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+    r[43] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+    r[42] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+    r[41] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+    r[40] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+    r[39] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+    r[38] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+    r[37] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+    r[36] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+    r[35] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+    r[34] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+    r[33] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+    r[32] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+    r[31] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+    r[30] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+    r[29] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+    r[28] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+    r[27] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+    r[26] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+    r[25] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+    r[24] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+    r[23] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+    r[22] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+    r[21] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+    r[20] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+    r[19] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+    r[18] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+    r[17] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+    r[16] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+    r[15] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+    r[14] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+    r[13] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+    r[12] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+    r[11] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+    r[10] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+    r[9] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+    r[8] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+    r[7] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+    r[6] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+    r[5] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+    r[4] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+    r[3] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+    r[2] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+    s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+    r[1] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+#endif
+    r[0] = (a[0] << n) & 0x1fffffffffffffL;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[156];
+    sp_digit td[79];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 235, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 156;
+        XMEMSET(td, 0, sizeof(sp_digit) * 235);
+#else
+        norm = nd;
+        tmp  = td;
+        XMEMSET(td, 0, sizeof(td));
+#endif
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_78(norm, m);
+
+        bits = ((bits + 4) / 5) * 5;
+        i = ((bits + 52) / 53) - 1;
+        c = bits % 53;
+        if (c == 0) {
+            c = 53;
+        }
+        if (i < 78) {
+            n = e[i--] << (64 - c);
+        }
+        else {
+            n = 0;
+            i--;
+        }
+        if (c < 5) {
+            n |= e[i--] << (11 - c);
+            c += 53;
+        }
+        y = (n >> 59) & 0x1f;
+        n <<= 5;
+        c -= 5;
+        sp_4096_lshift_78(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c < 5) {
+                n |= e[i--] << (11 - c);
+                c += 53;
+            }
+            y = (n >> 59) & 0x1f;
+            n <<= 5;
+            c -= 5;
+
+            sp_4096_mont_sqr_78(r, r, m, mp);
+            sp_4096_mont_sqr_78(r, r, m, mp);
+            sp_4096_mont_sqr_78(r, r, m, mp);
+            sp_4096_mont_sqr_78(r, r, m, mp);
+            sp_4096_mont_sqr_78(r, r, m, mp);
+
+            sp_4096_lshift_78(r, r, y);
+            sp_4096_mul_d_78(tmp, norm, (r[78] << 38) + (r[77] >> 15));
+            r[78] = 0;
+            r[77] &= 0x7fffL;
+            (void)sp_4096_add_78(r, r, tmp);
+            sp_4096_norm_78(r);
+            o = sp_4096_cmp_78(r, m);
+            sp_4096_cond_sub_78(r, r, m, ((o < 0) ?
+                                          (sp_digit)1 : (sp_digit)0) - 1);
+        }
+
+        sp_4096_mont_reduce_78(r, m, mp);
+        n = sp_4096_cmp_78(r, m);
+        sp_4096_cond_sub_78(r, r, m, ((n < 0) ?
+                                                (sp_digit)1 : (sp_digit)0) - 1);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 512 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    word32 i;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 78 * 2;
+        m = e + 78;
+        r = b;
+
+        sp_4096_from_mp(b, 78, base);
+        sp_4096_from_bin(e, 78, exp, expLen);
+        sp_4096_from_mp(m, 78, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2 &&
+                ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) {
+            err = sp_4096_mod_exp_2_78(r, e, expLen * 8, m);
+        }
+        else
+    #endif
+            err = sp_4096_mod_exp_78(r, b, e, expLen * 8, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[156], ed[78], md[78];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    word32 i;
+    int err = MP_OKAY;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512U) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 78 * 2;
+        m = e + 78;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 78, base);
+        sp_4096_from_bin(e, 78, exp, expLen);
+        sp_4096_from_mp(m, 78, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2U &&
+                ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) {
+            err = sp_4096_mod_exp_2_78(r, e, expLen * 8U, m);
+        }
+        else {
+    #endif
+            err = sp_4096_mod_exp_78(r, b, e, expLen * 8U, m, 0);
+    #ifdef HAVE_FFDHE_4096
+        }
+    #endif
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512U && out[i] == 0U; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+#endif
+
+    return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
 
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 #ifdef WOLFSSL_HAVE_SP_ECC
 #ifndef WOLFSSL_SP_NO_256
 
 /* Point structure to use. */
-typedef struct sp_point {
+typedef struct sp_point_256 {
     sp_digit x[2 * 5];
     sp_digit y[2 * 5];
     sp_digit z[2 * 5];
     int infinity;
-} sp_point;
+} sp_point_256;
 
 /* The modulus (prime) of the curve P256. */
-static sp_digit p256_mod[5] = {
-    0xfffffffffffffl,0x00fffffffffffl,0x0000000000000l,0x0001000000000l,
-    0x0ffffffff0000l
+static const sp_digit p256_mod[5] = {
+    0xfffffffffffffL,0x00fffffffffffL,0x0000000000000L,0x0001000000000L,
+    0x0ffffffff0000L
 };
-#ifndef WOLFSSL_SP_SMALL
 /* The Montogmery normalizer for modulus of the curve P256. */
-static sp_digit p256_norm_mod[5] = {
-    0x0000000000001l,0xff00000000000l,0xfffffffffffffl,0xfffefffffffffl,
-    0x000000000ffffl
+static const sp_digit p256_norm_mod[5] = {
+    0x0000000000001L,0xff00000000000L,0xfffffffffffffL,0xfffefffffffffL,
+    0x000000000ffffL
 };
-#endif /* WOLFSSL_SP_SMALL */
 /* The Montogmery multiplier for modulus of the curve P256. */
-static sp_digit p256_mp_mod = 0x0000000000001;
+static const sp_digit p256_mp_mod = 0x0000000000001;
 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
                                             defined(HAVE_ECC_VERIFY)
 /* The order of the curve P256. */
-static sp_digit p256_order[5] = {
-    0x9cac2fc632551l,0xada7179e84f3bl,0xfffffffbce6fal,0x0000fffffffffl,
-    0x0ffffffff0000l
+static const sp_digit p256_order[5] = {
+    0x9cac2fc632551L,0xada7179e84f3bL,0xfffffffbce6faL,0x0000fffffffffL,
+    0x0ffffffff0000L
 };
 #endif
 /* The order of the curve P256 minus 2. */
-static sp_digit p256_order2[5] = {
-    0x9cac2fc63254fl,0xada7179e84f3bl,0xfffffffbce6fal,0x0000fffffffffl,
-    0x0ffffffff0000l
+static const sp_digit p256_order2[5] = {
+    0x9cac2fc63254fL,0xada7179e84f3bL,0xfffffffbce6faL,0x0000fffffffffL,
+    0x0ffffffff0000L
 };
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* The Montogmery normalizer for order of the curve P256. */
-static sp_digit p256_norm_order[5] = {
-    0x6353d039cdaafl,0x5258e8617b0c4l,0x0000000431905l,0xffff000000000l,
-    0x000000000ffffl
+static const sp_digit p256_norm_order[5] = {
+    0x6353d039cdaafL,0x5258e8617b0c4L,0x0000000431905L,0xffff000000000L,
+    0x000000000ffffL
 };
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* The Montogmery multiplier for order of the curve P256. */
-static sp_digit p256_mp_order = 0x1c8aaee00bc4fl;
+static const sp_digit p256_mp_order = 0x1c8aaee00bc4fL;
 #endif
 /* The base point of curve P256. */
-static sp_point p256_base = {
+static const sp_point_256 p256_base = {
     /* X ordinate */
     {
-        0x13945d898c296l,0x812deb33a0f4al,0x3a440f277037dl,0x4247f8bce6e56l,
-        0x06b17d1f2e12cl
+        0x13945d898c296L,0x812deb33a0f4aL,0x3a440f277037dL,0x4247f8bce6e56L,
+        0x06b17d1f2e12cL,
+        0L, 0L, 0L, 0L, 0L
     },
     /* Y ordinate */
     {
-        0x6406837bf51f5l,0x576b315ececbbl,0xc0f9e162bce33l,0x7f9b8ee7eb4a7l,
-        0x04fe342e2fe1al
+        0x6406837bf51f5L,0x576b315ececbbL,0xc0f9e162bce33L,0x7f9b8ee7eb4a7L,
+        0x04fe342e2fe1aL,
+        0L, 0L, 0L, 0L, 0L
     },
     /* Z ordinate */
     {
-        0x0000000000001l,0x0000000000000l,0x0000000000000l,0x0000000000000l,
-        0x0000000000000l
+        0x0000000000001L,0x0000000000000L,0x0000000000000L,0x0000000000000L,
+        0x0000000000000L,
+        0L, 0L, 0L, 0L, 0L
     },
     /* infinity */
     0
 };
 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
-static sp_digit p256_b[5] = {
-    0xe3c3e27d2604bl,0xb0cc53b0f63bcl,0x69886bc651d06l,0x93e7b3ebbd557l,
-    0x05ac635d8aa3al
+static const sp_digit p256_b[5] = {
+    0xe3c3e27d2604bL,0xb0cc53b0f63bcL,0x69886bc651d06L,0x93e7b3ebbd557L,
+    0x05ac635d8aa3aL
 };
 #endif
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_point_new_ex_5(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
 /* Allocate memory for point and return error. */
-#define sp_ecc_point_new(heap, sp, p)                                   \
-    ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
-        MEMORY_E : MP_OKAY
+#define sp_256_point_new_5(heap, sp, p) sp_256_point_new_ex_5((heap), NULL, &(p))
 #else
 /* Set pointer to data and return no error. */
-#define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#define sp_256_point_new_5(heap, sp, p) sp_256_point_new_ex_5((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_5(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
 /* If valid pointer then clear point data if requested and free data. */
-#define sp_ecc_point_free(p, clear, heap)     \
-    do {                                      \
-        if (p != NULL) {                      \
-            if (clear)                        \
-                XMEMSET(p, 0, sizeof(*p));    \
-            XFREE(p, heap, DYNAMIC_TYPE_ECC); \
-        }                                     \
-    }                                         \
-    while (0)
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
 #else
 /* Clear point data if requested. */
-#define sp_ecc_point_free(p, clear, heap) \
-    do {                                  \
-        if (clear)                        \
-            XMEMSET(p, 0, sizeof(*p));    \
-    }                                     \
-    while (0)
-#endif
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
 
 /* Multiply a number by Montogmery normalizer mod modulus (prime).
  *
@@ -6754,9 +12529,9 @@
  * m  The modulus (prime).
  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
  */
-static int sp_256_mod_mul_norm_5(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     int64_t* td;
 #else
     int64_t td[8];
@@ -6769,95 +12544,98 @@
 
     (void)m;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
-    if (td != NULL) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (td == NULL) {
+        return MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t = td;
         a32 = td + 8;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t = td;
-    a32 = a32d;
-#endif
-
-    if (err == MP_OKAY) {
-        a32[0] = (sp_digit)(a[0]) & 0xffffffff;
-        a32[1] = (sp_digit)(a[0] >> 32);
-        a32[1] |= a[1] << 20;
-        a32[1] &= 0xffffffff;
-        a32[2] = (sp_digit)(a[1] >> 12) & 0xffffffff;
-        a32[3] = (sp_digit)(a[1] >> 44);
-        a32[3] |= a[2] << 8;
-        a32[3] &= 0xffffffff;
-        a32[4] = (sp_digit)(a[2] >> 24);
-        a32[4] |= a[3] << 28;
-        a32[4] &= 0xffffffff;
-        a32[5] = (sp_digit)(a[3] >> 4) & 0xffffffff;
-        a32[6] = (sp_digit)(a[3] >> 36);
-        a32[6] |= a[4] << 16;
-        a32[6] &= 0xffffffff;
-        a32[7] = (sp_digit)(a[4] >> 16) & 0xffffffff;
+#else
+        t = td;
+        a32 = a32d;
+#endif
+
+        a32[0] = (sp_digit)(a[0]) & 0xffffffffL;
+        a32[1] = (sp_digit)(a[0] >> 32U);
+        a32[1] |= a[1] << 20U;
+        a32[1] &= 0xffffffffL;
+        a32[2] = (sp_digit)(a[1] >> 12U) & 0xffffffffL;
+        a32[3] = (sp_digit)(a[1] >> 44U);
+        a32[3] |= a[2] << 8U;
+        a32[3] &= 0xffffffffL;
+        a32[4] = (sp_digit)(a[2] >> 24U);
+        a32[4] |= a[3] << 28U;
+        a32[4] &= 0xffffffffL;
+        a32[5] = (sp_digit)(a[3] >> 4U) & 0xffffffffL;
+        a32[6] = (sp_digit)(a[3] >> 36U);
+        a32[6] |= a[4] << 16U;
+        a32[6] &= 0xffffffffL;
+        a32[7] = (sp_digit)(a[4] >> 16U) & 0xffffffffL;
 
         /*  1  1  0 -1 -1 -1 -1  0 */
-        t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+            t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
         /*  0  1  1  0 -1 -1 -1 -1 */
-        t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+            t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
         /*  0  0  1  1  0 -1 -1 -1 */
-        t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+            t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
         /* -1 -1  0  2  2  1  0 -1 */
-        t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+            t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
         /*  0 -1 -1  0  2  2  1  0 */
-        t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+            t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
         /*  0  0 -1 -1  0  2  2  1 */
-        t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+            t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
         /* -1 -1  0  0  0  1  3  2 */
-        t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+            t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
         /*  1  0 -1 -1 -1 -1  0  3 */
-        t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
-
-        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-        o     = t[7] >> 32; t[7] &= 0xffffffff;
-        t[0] += o;
-        t[3] -= o;
-        t[6] -= o;
-        t[7] += o;
-        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+            t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+            t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+            t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+            t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+            t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+            t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+            t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+            t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+            o     = t[7] >> 32U; t[7] &= 0xffffffffL;
+            t[0] += o;
+            t[3] -= o;
+            t[6] -= o;
+            t[7] += o;
+            t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+            t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+            t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+            t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+            t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+            t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+            t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
 
         r[0] = t[0];
-        r[0] |= t[1] << 32;
-        r[0] &= 0xfffffffffffffl;
+        r[0] |= t[1] << 32U;
+        r[0] &= 0xfffffffffffffLL;
         r[1] = (sp_digit)(t[1] >> 20);
-        r[1] |= t[2] << 12;
-        r[1] |= t[3] << 44;
-        r[1] &= 0xfffffffffffffl;
+        r[1] |= t[2] << 12U;
+        r[1] |= t[3] << 44U;
+        r[1] &= 0xfffffffffffffLL;
         r[2] = (sp_digit)(t[3] >> 8);
-        r[2] |= t[4] << 24;
-        r[2] &= 0xfffffffffffffl;
+        r[2] |= t[4] << 24U;
+        r[2] &= 0xfffffffffffffLL;
         r[3] = (sp_digit)(t[4] >> 28);
-        r[3] |= t[5] << 4;
-        r[3] |= t[6] << 36;
-        r[3] &= 0xfffffffffffffl;
+        r[3] |= t[5] << 4U;
+        r[3] |= t[6] << 36U;
+        r[3] &= 0xfffffffffffffLL;
         r[4] = (sp_digit)(t[6] >> 16);
-        r[4] |= t[7] << 16;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+        r[4] |= t[7] << 16U;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
@@ -6866,53 +12644,64 @@
 /* Convert an mp_int to an array of sp_digit.
  *
  * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  A multi-precision integer.
  */
-static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
 {
 #if DIGIT_BIT == 52
     int j;
 
     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
 
-    for (j = a->used; j < max; j++)
+    for (j = a->used; j < size; j++) {
         r[j] = 0;
+    }
 #elif DIGIT_BIT > 52
-    int i, j = 0, s = 0;
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
-        r[j] &= 0xfffffffffffffl;
-        s = 52 - s;
-        if (j + 1 >= max)
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xfffffffffffffL;
+        s = 52U - s;
+        if (j + 1 >= size) {
             break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 52 <= DIGIT_BIT) {
-            s += 52;
-            r[j] &= 0xfffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 52U) <= (word32)DIGIT_BIT) {
+            s += 52U;
+            r[j] &= 0xfffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 #else
     int i, j = 0, s = 0;
 
     r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
+    for (i = 0; i < a->used && j < size; i++) {
         r[j] |= ((sp_digit)a->dp[i]) << s;
         if (s + DIGIT_BIT >= 52) {
-            r[j] &= 0xfffffffffffffl;
-            if (j + 1 >= max)
-                break;
+            r[j] &= 0xfffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
             s = 52 - s;
             if (s == DIGIT_BIT) {
                 r[++j] = 0;
@@ -6923,21 +12712,23 @@
                 s = DIGIT_BIT - s;
             }
         }
-        else
+        else {
             s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
-#endif
-}
-
-/* Convert a point of type ecc_point to type sp_point.
- *
- * p   Point of type sp_point (result).
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p   Point of type sp_point_256 (result).
  * pm  Point of type ecc_point.
  */
-static void sp_256_point_from_ecc_point_5(sp_point* p, ecc_point* pm)
+static void sp_256_point_from_ecc_point_5(sp_point_256* p, const ecc_point* pm)
 {
     XMEMSET(p->x, 0, sizeof(p->x));
     XMEMSET(p->y, 0, sizeof(p->y));
@@ -6953,12 +12744,12 @@
  * a  A single precision integer.
  * r  A multi-precision integer.
  */
-static int sp_256_to_mp(sp_digit* a, mp_int* r)
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
 {
     int err;
 
     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
 #if DIGIT_BIT == 52
         XMEMCPY(r->dp, a, sizeof(sp_digit) * 5);
         r->used = 5;
@@ -6968,14 +12759,19 @@
 
         r->dp[0] = 0;
         for (i = 0; i < 5; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
             s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
             while (s + DIGIT_BIT <= 52) {
                 s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
             }
             s = 52 - s;
         }
@@ -6988,15 +12784,16 @@
         for (i = 0; i < 5; i++) {
             r->dp[j] |= ((mp_digit)a[i]) << s;
             if (s + 52 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
     #endif
                 s = DIGIT_BIT - s;
                 r->dp[++j] = a[i] >> s;
                 s = 52 - s;
             }
-            else
+            else {
                 s += 52;
+            }
         }
         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
         mp_clamp(r);
@@ -7006,204 +12803,26 @@
     return err;
 }
 
-/* Convert a point of type sp_point to type ecc_point.
- *
- * p   Point of type sp_point.
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p   Point of type sp_point_256.
  * pm  Point of type ecc_point (result).
  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
  * MP_OKAY.
  */
-static int sp_256_point_to_ecc_point_5(sp_point* p, ecc_point* pm)
+static int sp_256_point_to_ecc_point_5(const sp_point_256* p, ecc_point* pm)
 {
     int err;
 
     err = sp_256_to_mp(p->x, pm->x);
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pm->y);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pm->z);
-
-    return err;
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b)
-{
-    sp_digit r = 0;
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=4; i>=0; i--)
-        r |= (a[i] - b[i]) & (0 - !r);
-#else
-    r |= (a[ 4] - b[ 4]) & (0 - !r);
-    r |= (a[ 3] - b[ 3]) & (0 - !r);
-    r |= (a[ 2] - b[ 2]) & (0 - !r);
-    r |= (a[ 1] - b[ 1]) & (0 - !r);
-    r |= (a[ 0] - b[ 0]) & (0 - !r);
-#endif /* WOLFSSL_SP_SMALL */
-
-    return r;
-}
-
-/* Normalize the values in each word to 52.
- *
- * a  Array of sp_digit to normalize.
- */
-static void sp_256_norm_5(sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    for (i = 0; i < 4; i++) {
-        a[i+1] += a[i] >> 52;
-        a[i] &= 0xfffffffffffffl;
-    }
-#else
-    a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffl;
-    a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffl;
-    a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffl;
-    a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffl;
-#endif
-}
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a,
-        const sp_digit* b, const sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i = 0; i < 5; i++)
-        r[i] = a[i] - (b[i] & m);
-#else
-    r[ 0] = a[ 0] - (b[ 0] & m);
-    r[ 1] = a[ 1] - (b[ 1] & m);
-    r[ 2] = a[ 2] - (b[ 2] & m);
-    r[ 3] = a[ 3] - (b[ 3] & m);
-    r[ 4] = a[ 4] - (b[ 4] & m);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Mul a by scalar b and add into r. (r += a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A scalar.
- */
-SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int128_t tb = b;
-    int128_t t = 0;
-    int i;
-
-    for (i = 0; i < 5; i++) {
-        t += (tb * a[i]) + r[i];
-        r[i] = t & 0xfffffffffffffl;
-        t >>= 52;
-    }
-    r[5] += t;
-#else
-    int128_t tb = b;
-    int128_t t[5];
-
-    t[ 0] = tb * a[ 0];
-    t[ 1] = tb * a[ 1];
-    t[ 2] = tb * a[ 2];
-    t[ 3] = tb * a[ 3];
-    t[ 4] = tb * a[ 4];
-    r[ 0] +=                 (t[ 0] & 0xfffffffffffffl);
-    r[ 1] += (t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffl);
-    r[ 2] += (t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffl);
-    r[ 3] += (t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffl);
-    r[ 4] += (t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffl);
-    r[ 5] +=  t[ 4] >> 52;
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Shift the result in the high 256 bits down to the bottom.
- *
- * r  A single precision number.
- * a  A single precision number.
- */
-static void sp_256_mont_shift_5(sp_digit* r, const sp_digit* a)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-    word64 n;
-
-    n = a[4] >> 48;
-    for (i = 0; i < 4; i++) {
-        n += a[5 + i] << 4;
-        r[i] = n & 0xfffffffffffffl;
-        n >>= 52;
-    }
-    n += a[9] << 4;
-    r[4] = n;
-#else
-    word64 n;
-
-    n  = a[4] >> 48;
-    n += a[ 5] << 4; r[ 0] = n & 0xfffffffffffffl; n >>= 52;
-    n += a[ 6] << 4; r[ 1] = n & 0xfffffffffffffl; n >>= 52;
-    n += a[ 7] << 4; r[ 2] = n & 0xfffffffffffffl; n >>= 52;
-    n += a[ 8] << 4; r[ 3] = n & 0xfffffffffffffl; n >>= 52;
-    n += a[ 9] << 4; r[ 4] = n;
-#endif /* WOLFSSL_SP_SMALL */
-    XMEMSET(&r[5], 0, sizeof(*r) * 5);
-}
-
-/* Reduce the number back to 256 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-static void sp_256_mont_reduce_5(sp_digit* a, sp_digit* m, sp_digit mp)
-{
-    int i;
-    sp_digit mu;
-
-    if (mp != 1) {
-        for (i=0; i<4; i++) {
-            mu = (a[i] * mp) & 0xfffffffffffffl;
-            sp_256_mul_add_5(a+i, m, mu);
-            a[i+1] += a[i] >> 52;
-        }
-        mu = (a[i] * mp) & 0xffffffffffffl;
-        sp_256_mul_add_5(a+i, m, mu);
-        a[i+1] += a[i] >> 52;
-        a[i] &= 0xfffffffffffffl;
-    }
-    else {
-        for (i=0; i<4; i++) {
-            mu = a[i] & 0xfffffffffffffl;
-            sp_256_mul_add_5(a+i, p256_mod, mu);
-            a[i+1] += a[i] >> 52;
-        }
-        mu = a[i] & 0xffffffffffffl;
-        sp_256_mul_add_5(a+i, p256_mod, mu);
-        a[i+1] += a[i] >> 52;
-        a[i] &= 0xfffffffffffffl;
-    }
-
-    sp_256_mont_shift_5(a, a);
-    sp_256_cond_sub_5(a, a, m, 0 - ((a[4] >> 48) > 0));
-    sp_256_norm_5(a);
+    }
+
+    return err;
 }
 
 #ifdef WOLFSSL_SP_SMALL
@@ -7221,20 +12840,22 @@
 
     c = ((int128_t)a[4]) * b[4];
     r[9] = (sp_digit)(c >> 52);
-    c = (c & 0xfffffffffffffl) << 52;
+    c = (c & 0xfffffffffffffL) << 52;
     for (k = 7; k >= 0; k--) {
         for (i = 4; i >= 0; i--) {
             j = k - i;
-            if (j >= 5)
-                break;
-            if (j < 0)
+            if (j >= 5) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * b[j];
         }
         r[k + 2] += c >> 104;
-        r[k + 1] = (c >> 52) & 0xfffffffffffffl;
-        c = (c & 0xfffffffffffffl) << 52;
+        r[k + 1] = (c >> 52) & 0xfffffffffffffL;
+        c = (c & 0xfffffffffffffL) << 52;
     }
     r[0] = (sp_digit)(c >> 52);
 }
@@ -7275,19 +12896,204 @@
                  + ((int128_t)a[ 4]) * b[ 3];
     int128_t t8   = ((int128_t)a[ 4]) * b[ 4];
 
-    t1   += t0  >> 52; r[ 0] = t0  & 0xfffffffffffffl;
-    t2   += t1  >> 52; r[ 1] = t1  & 0xfffffffffffffl;
-    t3   += t2  >> 52; r[ 2] = t2  & 0xfffffffffffffl;
-    t4   += t3  >> 52; r[ 3] = t3  & 0xfffffffffffffl;
-    t5   += t4  >> 52; r[ 4] = t4  & 0xfffffffffffffl;
-    t6   += t5  >> 52; r[ 5] = t5  & 0xfffffffffffffl;
-    t7   += t6  >> 52; r[ 6] = t6  & 0xfffffffffffffl;
-    t8   += t7  >> 52; r[ 7] = t7  & 0xfffffffffffffl;
+    t1   += t0  >> 52; r[ 0] = t0  & 0xfffffffffffffL;
+    t2   += t1  >> 52; r[ 1] = t1  & 0xfffffffffffffL;
+    t3   += t2  >> 52; r[ 2] = t2  & 0xfffffffffffffL;
+    t4   += t3  >> 52; r[ 3] = t3  & 0xfffffffffffffL;
+    t5   += t4  >> 52; r[ 4] = t4  & 0xfffffffffffffL;
+    t6   += t5  >> 52; r[ 5] = t5  & 0xfffffffffffffL;
+    t7   += t6  >> 52; r[ 6] = t6  & 0xfffffffffffffL;
+    t8   += t7  >> 52; r[ 7] = t7  & 0xfffffffffffffL;
     r[9] = (sp_digit)(t8 >> 52);
-                       r[8] = t8 & 0xfffffffffffffl;
-}
-
-#endif /* WOLFSSL_SP_SMALL */
+                       r[8] = t8 & 0xfffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_256_mont_reduce_order_5         sp_256_mont_reduce_5
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=4; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 5; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    r[ 0] = a[ 0] - (b[ 0] & m);
+    r[ 1] = a[ 1] - (b[ 1] & m);
+    r[ 2] = a[ 2] - (b[ 2] & m);
+    r[ 3] = a[ 3] - (b[ 3] & m);
+    r[ 4] = a[ 4] - (b[ 4] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 5; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0xfffffffffffffL;
+        t >>= 52;
+    }
+    r[5] += t;
+#else
+    int128_t tb = b;
+    int128_t t[5];
+
+    t[ 0] = tb * a[ 0];
+    t[ 1] = tb * a[ 1];
+    t[ 2] = tb * a[ 2];
+    t[ 3] = tb * a[ 3];
+    t[ 4] = tb * a[ 4];
+    r[ 0] += (sp_digit)                 (t[ 0] & 0xfffffffffffffL);
+    r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL));
+    r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL));
+    r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL));
+    r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL));
+    r[ 5] += (sp_digit) (t[ 4] >> 52);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 52.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_256_norm_5(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 4; i++) {
+        a[i+1] += a[i] >> 52;
+        a[i] &= 0xfffffffffffffL;
+    }
+#else
+    a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffL;
+    a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffL;
+    a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffL;
+    a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 256 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_256_mont_shift_5(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    word64 n;
+
+    n = a[4] >> 48;
+    for (i = 0; i < 4; i++) {
+        n += (word64)a[5 + i] << 4;
+        r[i] = n & 0xfffffffffffffL;
+        n >>= 52;
+    }
+    n += (word64)a[9] << 4;
+    r[4] = n;
+#else
+    word64 n;
+
+    n  = a[4] >> 48;
+    n += (word64)a[ 5] << 4U; r[ 0] = n & 0xfffffffffffffUL; n >>= 52U;
+    n += (word64)a[ 6] << 4U; r[ 1] = n & 0xfffffffffffffUL; n >>= 52U;
+    n += (word64)a[ 7] << 4U; r[ 2] = n & 0xfffffffffffffUL; n >>= 52U;
+    n += (word64)a[ 8] << 4U; r[ 3] = n & 0xfffffffffffffUL; n >>= 52U;
+    n += (word64)a[ 9] << 4U; r[ 4] = n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[5], 0, sizeof(*r) * 5U);
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_256_mont_reduce_5(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    if (mp != 1) {
+        for (i=0; i<4; i++) {
+            mu = (a[i] * mp) & 0xfffffffffffffL;
+            sp_256_mul_add_5(a+i, m, mu);
+            a[i+1] += a[i] >> 52;
+        }
+        mu = (a[i] * mp) & 0xffffffffffffL;
+        sp_256_mul_add_5(a+i, m, mu);
+        a[i+1] += a[i] >> 52;
+        a[i] &= 0xfffffffffffffL;
+    }
+    else {
+        for (i=0; i<4; i++) {
+            mu = a[i] & 0xfffffffffffffL;
+            sp_256_mul_add_5(a+i, p256_mod, mu);
+            a[i+1] += a[i] >> 52;
+        }
+        mu = a[i] & 0xffffffffffffL;
+        sp_256_mul_add_5(a+i, p256_mod, mu);
+        a[i+1] += a[i] >> 52;
+        a[i] &= 0xfffffffffffffL;
+    }
+
+    sp_256_mont_shift_5(a, a);
+    sp_256_cond_sub_5(a, a, m, 0 - (((a[4] >> 48) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_5(a);
+}
+
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -7297,8 +13103,8 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_mul_5(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
+static void sp_256_mont_mul_5(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
 {
     sp_256_mul_5(r, a, b);
     sp_256_mont_reduce_5(r, m, mp);
@@ -7317,23 +13123,26 @@
 
     c = ((int128_t)a[4]) * a[4];
     r[9] = (sp_digit)(c >> 52);
-    c = (c & 0xfffffffffffffl) << 52;
+    c = (c & 0xfffffffffffffL) << 52;
     for (k = 7; k >= 0; k--) {
         for (i = 4; i >= 0; i--) {
             j = k - i;
-            if (j >= 5 || i <= j)
-                break;
-            if (j < 0)
+            if (j >= 5 || i <= j) {
+                break;
+            }
+            if (j < 0) {
                 continue;
+            }
 
             c += ((int128_t)a[i]) * a[j] * 2;
         }
-        if (i == j)
+        if (i == j) {
            c += ((int128_t)a[i]) * a[i];
+        }
 
         r[k + 2] += c >> 104;
-        r[k + 1] = (c >> 52) & 0xfffffffffffffl;
-        c = (c & 0xfffffffffffffl) << 52;
+        r[k + 1] = (c >> 52) & 0xfffffffffffffL;
+        c = (c & 0xfffffffffffffL) << 52;
     }
     r[0] = (sp_digit)(c >> 52);
 }
@@ -7362,16 +13171,16 @@
     int128_t t7   = (((int128_t)a[ 3]) * a[ 4]) * 2;
     int128_t t8   =  ((int128_t)a[ 4]) * a[ 4];
 
-    t1   += t0  >> 52; r[ 0] = t0  & 0xfffffffffffffl;
-    t2   += t1  >> 52; r[ 1] = t1  & 0xfffffffffffffl;
-    t3   += t2  >> 52; r[ 2] = t2  & 0xfffffffffffffl;
-    t4   += t3  >> 52; r[ 3] = t3  & 0xfffffffffffffl;
-    t5   += t4  >> 52; r[ 4] = t4  & 0xfffffffffffffl;
-    t6   += t5  >> 52; r[ 5] = t5  & 0xfffffffffffffl;
-    t7   += t6  >> 52; r[ 6] = t6  & 0xfffffffffffffl;
-    t8   += t7  >> 52; r[ 7] = t7  & 0xfffffffffffffl;
+    t1   += t0  >> 52; r[ 0] = t0  & 0xfffffffffffffL;
+    t2   += t1  >> 52; r[ 1] = t1  & 0xfffffffffffffL;
+    t3   += t2  >> 52; r[ 2] = t2  & 0xfffffffffffffL;
+    t4   += t3  >> 52; r[ 3] = t3  & 0xfffffffffffffL;
+    t5   += t4  >> 52; r[ 4] = t4  & 0xfffffffffffffL;
+    t6   += t5  >> 52; r[ 5] = t5  & 0xfffffffffffffL;
+    t7   += t6  >> 52; r[ 6] = t6  & 0xfffffffffffffL;
+    t8   += t7  >> 52; r[ 7] = t7  & 0xfffffffffffffL;
     r[9] = (sp_digit)(t8 >> 52);
-                       r[8] = t8 & 0xfffffffffffffl;
+                       r[8] = t8 & 0xfffffffffffffL;
 }
 
 #endif /* WOLFSSL_SP_SMALL */
@@ -7382,14 +13191,14 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_5(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, const sp_digit* m,
         sp_digit mp)
 {
     sp_256_sqr_5(r, a);
     sp_256_mont_reduce_5(r, m, mp);
 }
 
-#ifndef WOLFSSL_SP_SMALL
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
  *
  * r   Result of squaring.
@@ -7398,19 +13207,21 @@
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_n_5(sp_digit* r, sp_digit* a, int n,
-        sp_digit* m, sp_digit mp)
+static void sp_256_mont_sqr_n_5(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
 {
     sp_256_mont_sqr_5(r, a, m, mp);
-    for (; n > 1; n--)
+    for (; n > 1; n--) {
         sp_256_mont_sqr_5(r, r, m, mp);
-}
-
-#else
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
 /* Mod-2 for the P256 curve. */
-static const uint64_t p256_mod_2[4] = {
-    0xfffffffffffffffd,0x00000000ffffffff,0x0000000000000000,
-    0xffffffff00000001
+static const uint64_t p256_mod_minus_2[4] = {
+    0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U,
+    0xffffffff00000001U
 };
 #endif /* !WOLFSSL_SP_SMALL */
 
@@ -7421,7 +13232,7 @@
  * a   Number to invert.
  * td  Temporary data.
  */
-static void sp_256_mont_inv_5(sp_digit* r, sp_digit* a, sp_digit* td)
+static void sp_256_mont_inv_5(sp_digit* r, const sp_digit* a, sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
     sp_digit* t = td;
@@ -7430,71 +13241,66 @@
     XMEMCPY(t, a, sizeof(sp_digit) * 5);
     for (i=254; i>=0; i--) {
         sp_256_mont_sqr_5(t, t, p256_mod, p256_mp_mod);
-        if (p256_mod_2[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
             sp_256_mont_mul_5(t, t, a, p256_mod, p256_mp_mod);
     }
     XMEMCPY(r, t, sizeof(sp_digit) * 5);
 #else
-    sp_digit* t = td;
+    sp_digit* t1 = td;
     sp_digit* t2 = td + 2 * 5;
     sp_digit* t3 = td + 4 * 5;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_5(t, a, p256_mod, p256_mp_mod);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_5(t, t, a, p256_mod, p256_mp_mod);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_5(t2, t, 2, p256_mod, p256_mp_mod);
-    /* t3= a^d = t2 * a */
-    sp_256_mont_mul_5(t3, t2, a, p256_mod, p256_mp_mod);
-    /* t = a^f = t2 * t */
-    sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^f0 = t ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_5(t2, t, 4, p256_mod, p256_mp_mod);
-    /* t3= a^fd = t2 * t3 */
-    sp_256_mont_mul_5(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ff = t2 * t */
-    sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_5(t2, t, 8, p256_mod, p256_mp_mod);
-    /* t3= a^fffd = t2 * t3 */
-    sp_256_mont_mul_5(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_5(t2, t, 16, p256_mod, p256_mp_mod);
-    /* t3= a^fffffffd = t2 * t3 */
-    sp_256_mont_mul_5(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_5(t2, t, 32, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001 = t2 * a */
-    sp_256_mont_mul_5(t2, t2, a, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
-     *   = t2 ^ 2 ^ 160 */
-    sp_256_mont_sqr_n_5(t2, t2, 160, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
-     *   = t2 * t */
-    sp_256_mont_mul_5(t2, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
-     *   = t2 ^ 2 ^ 32 */
-    sp_256_mont_sqr_n_5(t2, t2, 32, p256_mod, p256_mp_mod);
-    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
-     *   = t2 * t3 */
-    sp_256_mont_mul_5(r, t2, t3, p256_mod, p256_mp_mod);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Map the Montgomery form projective co-ordinate point to an affine point.
- *
- * r  Resulting affine co-ordinate point.
- * p  Montgomery form projective co-ordinate point.
+    /* 0x2 */
+    sp_256_mont_sqr_5(t1, a, p256_mod, p256_mp_mod);
+    /* 0x3 */
+    sp_256_mont_mul_5(t2, t1, a, p256_mod, p256_mp_mod);
+    /* 0xc */
+    sp_256_mont_sqr_n_5(t1, t2, 2, p256_mod, p256_mp_mod);
+    /* 0xd */
+    sp_256_mont_mul_5(t3, t1, a, p256_mod, p256_mp_mod);
+    /* 0xf */
+    sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xf0 */
+    sp_256_mont_sqr_n_5(t1, t2, 4, p256_mod, p256_mp_mod);
+    /* 0xfd */
+    sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xff */
+    sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xff00 */
+    sp_256_mont_sqr_n_5(t1, t2, 8, p256_mod, p256_mp_mod);
+    /* 0xfffd */
+    sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffff */
+    sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffff0000 */
+    sp_256_mont_sqr_n_5(t1, t2, 16, p256_mod, p256_mp_mod);
+    /* 0xfffffffd */
+    sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff */
+    sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000000 */
+    sp_256_mont_sqr_n_5(t1, t2, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffffffffffff */
+    sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001 */
+    sp_256_mont_mul_5(r, t1, a, p256_mod, p256_mp_mod);
+    /* 0xffffffff000000010000000000000000000000000000000000000000 */
+    sp_256_mont_sqr_n_5(r, r, 160, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+    sp_256_mont_mul_5(r, r, t2, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+    sp_256_mont_sqr_n_5(r, r, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+    sp_256_mont_mul_5(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
  * t  Temporary ordinate data.
  */
-static void sp_256_map_5(sp_point* r, sp_point* p, sp_digit* t)
+static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*5;
@@ -7507,20 +13313,22 @@
 
     /* x /= z^2 */
     sp_256_mont_mul_5(r->x, p->x, t2, p256_mod, p256_mp_mod);
-    XMEMSET(r->x + 5, 0, sizeof(r->x) / 2);
+    XMEMSET(r->x + 5, 0, sizeof(r->x) / 2U);
     sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod);
     /* Reduce x to less than modulus */
     n = sp_256_cmp_5(r->x, p256_mod);
-    sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_5(r->x);
 
     /* y /= z^3 */
     sp_256_mont_mul_5(r->y, p->y, t1, p256_mod, p256_mp_mod);
-    XMEMSET(r->y + 5, 0, sizeof(r->y) / 2);
+    XMEMSET(r->y + 5, 0, sizeof(r->y) / 2U);
     sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod);
     /* Reduce y to less than modulus */
     n = sp_256_cmp_5(r->y, p256_mod);
-    sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - (n >= 0));
+    sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_5(r->y);
 
     XMEMSET(r->z, 0, sizeof(r->z));
@@ -7540,8 +13348,9 @@
 {
     int i;
 
-    for (i = 0; i < 5; i++)
+    for (i = 0; i < 5; i++) {
         r[i] = a[i] + b[i];
+    }
 
     return 0;
 }
@@ -7572,12 +13381,13 @@
  * b   Second number to add in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_add_5(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    sp_256_add_5(r, a, b);
+static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_256_add_5(r, a, b);
     sp_256_norm_5(r);
-    sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
+    sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_5(r);
 }
 
@@ -7587,11 +13397,12 @@
  * a   Number to double in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_dbl_5(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    sp_256_add_5(r, a, a);
+static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_256_add_5(r, a, a);
     sp_256_norm_5(r);
-    sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
+    sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_5(r);
 }
 
@@ -7601,15 +13412,17 @@
  * a   Number to triple in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_tpl_5(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    sp_256_add_5(r, a, a);
+static void sp_256_mont_tpl_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_256_add_5(r, a, a);
     sp_256_norm_5(r);
-    sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
+    sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_5(r);
-    sp_256_add_5(r, r, a);
+    (void)sp_256_add_5(r, r, a);
     sp_256_norm_5(r);
-    sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
+    sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
     sp_256_norm_5(r);
 }
 
@@ -7625,8 +13438,9 @@
 {
     int i;
 
-    for (i = 0; i < 5; i++)
+    for (i = 0; i < 5; i++) {
         r[i] = a[i] - b[i];
+    }
 
     return 0;
 }
@@ -7665,8 +13479,9 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i = 0; i < 5; i++)
+    for (i = 0; i < 5; i++) {
         r[i] = a[i] + (b[i] & m);
+    }
 #else
     r[ 0] = a[ 0] + (b[ 0] & m);
     r[ 1] = a[ 1] + (b[ 1] & m);
@@ -7683,10 +13498,10 @@
  * b   Number to subtract with in Montogmery form.
  * m   Modulus (prime).
  */
-static void sp_256_mont_sub_5(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    sp_256_sub_5(r, a, b);
+static void sp_256_mont_sub_5(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_256_sub_5(r, a, b);
     sp_256_cond_add_5(r, r, m, r[4] >> 48);
     sp_256_norm_5(r);
 }
@@ -7702,13 +13517,14 @@
 #ifdef WOLFSSL_SP_SMALL
     int i;
 
-    for (i=0; i<4; i++)
-        r[i] = ((a[i] >> 1) | (a[i + 1] << 51)) & 0xfffffffffffffl;
-#else
-    r[0] = ((a[0] >> 1) | (a[1] << 51)) & 0xfffffffffffffl;
-    r[1] = ((a[1] >> 1) | (a[2] << 51)) & 0xfffffffffffffl;
-    r[2] = ((a[2] >> 1) | (a[3] << 51)) & 0xfffffffffffffl;
-    r[3] = ((a[3] >> 1) | (a[4] << 51)) & 0xfffffffffffffl;
+    for (i=0; i<4; i++) {
+        r[i] = ((a[i] >> 1) | (a[i + 1] << 51)) & 0xfffffffffffffL;
+    }
+#else
+    r[0] = ((a[0] >> 1) | (a[1] << 51)) & 0xfffffffffffffL;
+    r[1] = ((a[1] >> 1) | (a[2] << 51)) & 0xfffffffffffffL;
+    r[2] = ((a[2] >> 1) | (a[3] << 51)) & 0xfffffffffffffL;
+    r[3] = ((a[3] >> 1) | (a[4] << 51)) & 0xfffffffffffffL;
 #endif
     r[4] = a[4] >> 1;
 }
@@ -7719,7 +13535,7 @@
  * a  Number to divide.
  * m  Modulus (prime).
  */
-static void sp_256_div2_5(sp_digit* r, sp_digit* a, sp_digit* m)
+static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     sp_256_cond_add_5(r, a, m, 0 - (a[0] & 1));
     sp_256_norm_5(r);
@@ -7732,50 +13548,38 @@
  * p  Point to double.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_5(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*5;
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    /* When infinity don't double point passed in - constant time. */
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    /* Put point to double into result - good for infinty. */
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
     if (r != p) {
-        for (i=0; i<5; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<5; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<5; i++)
-            r->z[i] = p->z[i];
         r->infinity = p->infinity;
     }
 
     /* T1 = Z * Z */
-    sp_256_mont_sqr_5(t1, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod);
     /* Z = Y * Z */
-    sp_256_mont_mul_5(z, y, z, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod);
     /* Z = 2Z */
     sp_256_mont_dbl_5(z, z, p256_mod);
     /* T2 = X - T1 */
-    sp_256_mont_sub_5(t2, x, t1, p256_mod);
+    sp_256_mont_sub_5(t2, p->x, t1, p256_mod);
     /* T1 = X + T1 */
-    sp_256_mont_add_5(t1, x, t1, p256_mod);
+    sp_256_mont_add_5(t1, p->x, t1, p256_mod);
     /* T2 = T1 * T2 */
     sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod);
     /* T1 = 3T2 */
     sp_256_mont_tpl_5(t1, t2, p256_mod);
     /* Y = 2Y */
-    sp_256_mont_dbl_5(y, y, p256_mod);
+    sp_256_mont_dbl_5(y, p->y, p256_mod);
     /* Y = Y * Y */
     sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod);
     /* T2 = Y * Y */
@@ -7783,9 +13587,9 @@
     /* T2 = T2/2 */
     sp_256_div2_5(t2, t2, p256_mod);
     /* Y = Y * X */
-    sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod);
     /* X = T1 * T1 */
-    sp_256_mont_mul_5(x, t1, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod);
     /* X = X - Y */
     sp_256_mont_sub_5(x, x, y, p256_mod);
     /* X = X - Y */
@@ -7796,7 +13600,6 @@
     sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod);
     /* Y = Y - T2 */
     sp_256_mont_sub_5(y, y, t2, p256_mod);
-
 }
 
 /* Compare two numbers to determine if they are equal.
@@ -7815,16 +13618,15 @@
 /* Add two Montgomery form projective points.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_5(sp_point* r, sp_point* p, sp_point* q,
+static void sp_256_proj_point_add_5(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
         sp_digit* t)
 {
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*5;
     sp_digit* t3 = t + 4*5;
@@ -7837,34 +13639,39 @@
 
     /* Ensure only the first point is the same as the result. */
     if (q == r) {
-        sp_point* a = p;
+        const sp_point_256* a = p;
         p = q;
         q = a;
     }
 
     /* Check double */
-    sp_256_sub_5(t1, p256_mod, q->y);
+    (void)sp_256_sub_5(t1, p256_mod, q->y);
     sp_256_norm_5(t1);
-    if (sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
-        (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) {
+    if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
+        (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_5(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<5; i++)
+        for (i=0; i<5; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<5; i++)
+        }
+        for (i=0; i<5; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<5; i++)
+        }
+        for (i=0; i<5; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U1 = X1*Z2^2 */
@@ -7904,7 +13711,7 @@
 
 #ifdef WOLFSSL_SP_SMALL
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -7913,12 +13720,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-    sp_point* td;
-    sp_point* t[3];
+#ifdef WOLFSSL_SP_NO_MALLOC
+    sp_point_256 t[3];
+    sp_digit tmp[2 * 5 * 5];
+#else
+    sp_point_256* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -7926,30 +13737,28 @@
 
     (void)heap;
 
-    td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
-    if (td == NULL)
+#ifndef WOLFSSL_SP_NO_MALLOC
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
-                             DYNAMIC_TYPE_ECC);
+                                                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        XMEMSET(td, 0, sizeof(*td) * 3);
-
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMSET(t, 0, sizeof(sp_point_256) * 3);
 
         /* t[0] = {0, 0, 1} * norm */
-        t[0]->infinity = 1;
+        t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
-        err = sp_256_mod_mul_norm_5(t[1]->x, g->x, p256_mod);
+        err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
     }
     if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_5(t[1]->y, g->y, p256_mod);
+        err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
     if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_5(t[1]->z, g->z, p256_mod);
+        err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
 
     if (err == MP_OKAY) {
         i = 4;
@@ -7967,38 +13776,45 @@
             y = (n >> 51) & 1;
             n <<= 1;
 
-            sp_256_proj_point_add_5(t[y^1], t[0], t[1], tmp);
-
-            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
-                                  ((size_t)t[1] & addr_mask[y])),
-                    sizeof(sp_point));
-            sp_256_proj_point_dbl_5(t[2], t[2], tmp);
-            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
-                            ((size_t)t[1] & addr_mask[y])), t[2],
-                    sizeof(sp_point));
-        }
-
-        if (map)
-            sp_256_map_5(r, t[0], tmp);
-        else
-            XMEMCPY(r, t[0], sizeof(sp_point));
-    }
-
+            sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                   ((size_t)&t[1] & addr_mask[y])),
+                    sizeof(sp_point_256));
+            sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                            ((size_t)&t[1] & addr_mask[y])), &t[2],
+                    sizeof(sp_point_256));
+        }
+
+        if (map != 0) {
+            sp_256_map_5(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_256));
+        }
+    }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
-        XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-    if (td != NULL) {
-        XMEMSET(td, 0, sizeof(sp_point) * 3);
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
+#endif
 
     return err;
 }
 
 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -8007,15 +13823,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[3];
-    sp_digit tmpd[2 * 5 * 5];
-#endif
-    sp_point* t;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 t[3];
+    sp_digit tmp[2 * 5 * 5];
+#else
+    sp_point_256* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -8023,29 +13840,22 @@
 
     (void)heap;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_point td[3];
-    t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
-
+#endif
+
+    if (err == MP_OKAY) {
         /* t[0] = {0, 0, 1} * norm */
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
         err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
     }
     if (err == MP_OKAY)
@@ -8075,27 +13885,29 @@
                                  ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
             sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
             XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
-                           ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
-        }
-
-        if (map)
+                          ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+        }
+
+        if (map != 0) {
             sp_256_map_5(r, &t[0], tmp);
-        else
-            XMEMCPY(r, &t[0], sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
     }
     if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 3);
+        XMEMSET(t, 0, sizeof(sp_point_256) * 3);
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
 #endif
 
     return err;
@@ -8103,14 +13915,13 @@
 
 #else
 /* A table entry for pre-computed points. */
-typedef struct sp_table_entry {
+typedef struct sp_table_entry_256 {
     sp_digit x[5];
     sp_digit y[5];
-    byte infinity;
-} sp_table_entry;
+} sp_table_entry_256;
 
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -8119,16 +13930,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_fast_5(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_fast_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[16];
-    sp_point rtd;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td[16];
+    sp_point_256 rtd;
     sp_digit tmpd[2 * 5 * 5];
 #endif
-    sp_point* t;
-    sp_point* rt;
+    sp_point_256* t;
+    sp_point_256* rt;
     sp_digit* tmp;
     sp_digit n;
     int i;
@@ -8137,9 +13948,9 @@
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
+    err = sp_256_point_new_5(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
@@ -8156,9 +13967,9 @@
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
-        sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
-        sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
-        sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
+        (void)sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
+        (void)sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
+        (void)sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
         t[1].infinity = 0;
         sp_256_proj_point_dbl_5(&t[ 2], &t[ 1], tmp);
         t[ 2].infinity = 0;
@@ -8193,7 +14004,7 @@
         n = k[i+1] << 12;
         c = 44;
         y = n >> 56;
-        XMEMCPY(rt, &t[y], sizeof(sp_point));
+        XMEMCPY(rt, &t[y], sizeof(sp_point_256));
         n <<= 8;
         for (; i>=0 || c>=4; ) {
             if (c < 4) {
@@ -8212,26 +14023,28 @@
             sp_256_proj_point_add_5(rt, rt, &t[y], tmp);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_5(r, rt, tmp);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (tmp != NULL) {
         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
     }
     if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 16);
+        XMEMSET(t, 0, sizeof(sp_point_256) * 16);
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
     ForceZero(tmpd, sizeof(tmpd));
     ForceZero(td, sizeof(td));
 #endif
-    sp_ecc_point_free(rt, 1, heap);
+    sp_256_point_free_5(rt, 1, heap);
 
     return err;
 }
@@ -8244,11 +14057,8 @@
  * n  Number of times to double
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_dbl_n_5(sp_point* r, sp_point* p, int n,
-        sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_digit* t)
+{
     sp_digit* w = t;
     sp_digit* a = t + 2*5;
     sp_digit* b = t + 4*5;
@@ -8257,54 +14067,73 @@
     sp_digit* x;
     sp_digit* y;
     sp_digit* z;
-    int i;
-
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    if (r != p) {
-        for (i=0; i<5; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<5; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<5; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
 
     /* Y = 2*Y */
     sp_256_mont_dbl_5(y, y, p256_mod);
     /* W = Z^4 */
     sp_256_mont_sqr_5(w, z, p256_mod, p256_mp_mod);
     sp_256_mont_sqr_5(w, w, p256_mod, p256_mp_mod);
-    while (n--) {
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
         /* A = 3*(X^2 - W) */
         sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod);
         sp_256_mont_sub_5(t1, t1, w, p256_mod);
         sp_256_mont_tpl_5(a, t1, p256_mod);
         /* B = X*Y^2 */
-        sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_5(b, t2, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod);
         /* X = A^2 - 2B */
         sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_5(t1, b, p256_mod);
-        sp_256_mont_sub_5(x, x, t1, p256_mod);
+        sp_256_mont_dbl_5(t2, b, p256_mod);
+        sp_256_mont_sub_5(x, x, t2, p256_mod);
         /* Z = Z*Y */
         sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod);
         /* t2 = Y^4 */
-        sp_256_mont_sqr_5(t2, t2, p256_mod, p256_mp_mod);
-        if (n) {
+        sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
             /* W = W*Y^4 */
-            sp_256_mont_mul_5(w, w, t2, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_5(w, w, t1, p256_mod, p256_mp_mod);
         }
         /* y = 2*A*(B - X) - Y^4 */
         sp_256_mont_sub_5(y, b, x, p256_mod);
         sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod);
         sp_256_mont_dbl_5(y, y, p256_mod);
-        sp_256_mont_sub_5(y, y, t2, p256_mod);
-    }
+        sp_256_mont_sub_5(y, y, t1, p256_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_5(t1, t1, w, p256_mod);
+    sp_256_mont_tpl_5(a, t1, p256_mod);
+    /* B = X*Y^2 */
+    sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod);
+    /* X = A^2 - 2B */
+    sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_5(t2, b, p256_mod);
+    sp_256_mont_sub_5(x, x, t2, p256_mod);
+    /* Z = Z*Y */
+    sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod);
+    /* t2 = Y^4 */
+    sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_256_mont_sub_5(y, b, x, p256_mod);
+    sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_5(y, y, p256_mod);
+    sp_256_mont_sub_5(y, y, t1, p256_mod);
+#endif
     /* Y = Y/2 */
     sp_256_div2_5(y, y, p256_mod);
 }
@@ -8315,16 +14144,15 @@
  * Only the first point can be the same pointer as the result point.
  *
  * r  Result of addition.
- * p  Frist point to add.
+ * p  First point to add.
  * q  Second point to add.
  * t  Temporary ordinate data.
  */
-static void sp_256_proj_point_add_qz1_5(sp_point* r, sp_point* p,
-        sp_point* q, sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
+static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p,
+        const sp_point_256* q, sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2*5;
     sp_digit* t3 = t + 4*5;
@@ -8336,28 +14164,33 @@
     int i;
 
     /* Check double */
-    sp_256_sub_5(t1, p256_mod, q->y);
+    (void)sp_256_sub_5(t1, p256_mod, q->y);
     sp_256_norm_5(t1);
-    if (sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
-        (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) {
+    if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
+        (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) {
         sp_256_proj_point_dbl_5(r, p, t);
     }
     else {
         rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
         x = rp[p->infinity | q->infinity]->x;
         y = rp[p->infinity | q->infinity]->y;
         z = rp[p->infinity | q->infinity]->z;
 
         ap[0] = p;
         ap[1] = q;
-        for (i=0; i<5; i++)
+        for (i=0; i<5; i++) {
             r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<5; i++)
+        }
+        for (i=0; i<5; i++) {
             r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<5; i++)
+        }
+        for (i=0; i<5; i++) {
             r->z[i] = ap[p->infinity]->z[i];
+        }
         r->infinity = ap[p->infinity]->infinity;
 
         /* U2 = X2*Z1^2 */
@@ -8393,9 +14226,9 @@
  * Ordinates are in Montgomery form.
  *
  * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_5(sp_point* a, sp_digit* t)
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_5(sp_point_256* a, sp_digit* t)
 {
     sp_digit* t1 = t;
     sp_digit* t2 = t + 2 * 5;
@@ -8415,35 +14248,40 @@
  *
  * a      The base point.
  * table  Place to store generated point data.
- * tmp    Temprorary data.
+ * tmp    Temporary data.
  * heap  Heap to use for allocation.
  */
-static int sp_256_gen_stripe_table_5(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
+static int sp_256_gen_stripe_table_5(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
     int i, j;
     int err;
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
+    err = sp_256_point_new_5(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_5(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_5(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_mod_mul_norm_5(t->z, a->z, p256_mod);
+    }
     if (err == MP_OKAY) {
         t->infinity = 0;
         sp_256_proj_to_affine_5(t, tmp);
@@ -8454,19 +14292,16 @@
         s2->infinity = 0;
 
         /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
         /* table[1] = Affine version of 'a' in Montgomery form */
         XMEMCPY(table[1].x, t->x, sizeof(table->x));
         XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
 
         for (i=1; i<8; i++) {
-            sp_256_proj_point_dbl_n_5(t, t, 32, tmp);
+            sp_256_proj_point_dbl_n_5(t, 32, tmp);
             sp_256_proj_to_affine_5(t, tmp);
             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
         }
 
         for (i=1; i<8; i++) {
@@ -8479,21 +14314,20 @@
                 sp_256_proj_to_affine_5(t, tmp);
                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
+            }
+        }
+    }
+
+    sp_256_point_free_5(s2, 0, heap);
+    sp_256_point_free_5(s1, 0, heap);
+    sp_256_point_free_5( t, 0, heap);
 
     return err;
 }
 
 #endif /* FP_ECC */
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -8501,16 +14335,16 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_stripe_5(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
+static int sp_256_ecc_mulmod_stripe_5(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
     sp_digit td[2 * 5 * 5];
 #endif
-    sp_point* rt;
-    sp_point* p = NULL;
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
     sp_digit* t;
     int i, j;
     int y, x;
@@ -8519,14 +14353,17 @@
     (void)g;
     (void)heap;
 
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+
+    err = sp_256_point_new_5(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
                            DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
 #else
     t = td;
 #endif
@@ -8536,35 +14373,40 @@
         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
 
         y = 0;
-        for (j=0,x=31; j<8; j++,x+=32)
+        for (j=0,x=31; j<8; j++,x+=32) {
             y |= ((k[x / 52] >> (x % 52)) & 1) << j;
+        }
         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
+        rt->infinity = !y;
         for (i=30; i>=0; i--) {
             y = 0;
-            for (j=0,x=i; j<8; j++,x+=32)
+            for (j=0,x=i; j<8; j++,x+=32) {
                 y |= ((k[x / 52] >> (x % 52)) & 1) << j;
+            }
 
             sp_256_proj_point_dbl_5(rt, rt, t);
             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
+            p->infinity = !y;
             sp_256_proj_point_add_qz1_5(rt, rt, p, t);
         }
 
-        if (map)
+        if (map != 0) {
             sp_256_map_5(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
+    }
+#endif
+    sp_256_point_free_5(p, 0, heap);
+    sp_256_point_free_5(rt, 0, heap);
 
     return err;
 }
@@ -8574,43 +14416,43 @@
     #define FP_ENTRIES 16
 #endif
 
-typedef struct sp_cache_t {
+typedef struct sp_cache_256_t {
     sp_digit x[5];
     sp_digit y[5];
-    sp_table_entry table[256];
+    sp_table_entry_256 table[256];
     uint32_t cnt;
     int set;
-} sp_cache_t;
-
-static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
-static THREAD_LS_T int sp_cache_last = -1;
-static THREAD_LS_T int sp_cache_inited = 0;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
 
 #ifndef HAVE_THREAD_LS
-    static volatile int initCacheMutex = 0;
-    static wolfSSL_Mutex sp_cache_lock;
-#endif
-
-static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
 {
     int i, j;
     uint32_t least;
 
-    if (sp_cache_inited == 0) {
+    if (sp_cache_256_inited == 0) {
         for (i=0; i<FP_ENTRIES; i++) {
-            sp_cache[i].set = 0;
-        }
-        sp_cache_inited = 1;
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
     }
 
     /* Compare point with those in cache. */
     for (i=0; i<FP_ENTRIES; i++) {
-        if (!sp_cache[i].set)
+        if (!sp_cache_256[i].set)
             continue;
 
-        if (sp_256_cmp_equal_5(g->x, sp_cache[i].x) & 
-                           sp_256_cmp_equal_5(g->y, sp_cache[i].y)) {
-            sp_cache[i].cnt++;
+        if (sp_256_cmp_equal_5(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_5(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
             break;
         }
     }
@@ -8618,37 +14460,37 @@
     /* No match. */
     if (i == FP_ENTRIES) {
         /* Find empty entry. */
-        i = (sp_cache_last + 1) % FP_ENTRIES;
-        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
-            if (!sp_cache[i].set) {
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
                 break;
             }
         }
 
         /* Evict least used. */
-        if (i == sp_cache_last) {
-            least = sp_cache[0].cnt;
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
             for (j=1; j<FP_ENTRIES; j++) {
-                if (sp_cache[j].cnt < least) {
+                if (sp_cache_256[j].cnt < least) {
                     i = j;
-                    least = sp_cache[i].cnt;
+                    least = sp_cache_256[i].cnt;
                 }
             }
         }
 
-        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
-        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
-        sp_cache[i].set = 1;
-        sp_cache[i].cnt = 1;
-    }
-
-    *cache = &sp_cache[i];
-    sp_cache_last = i;
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
 }
 #endif /* FP_ECC */
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * g     Point to multiply.
@@ -8657,32 +14499,32 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
+static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
         int map, void* heap)
 {
 #ifndef FP_ECC
     return sp_256_ecc_mulmod_fast_5(r, g, k, map, heap);
 #else
     sp_digit tmp[2 * 5 * 5];
-    sp_cache_t* cache;
+    sp_cache_256_t* cache;
     int err = MP_OKAY;
 
 #ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
        err = BAD_MUTEX_E;
 #endif /* HAVE_THREAD_LS */
 
     if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
+        sp_ecc_get_cache_256(g, &cache);
         if (cache->cnt == 2)
             sp_256_gen_stripe_table_5(g, cache->table, tmp, heap);
 
 #ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
+        wc_UnLockMutex(&sp_cache_256_lock);
 #endif /* HAVE_THREAD_LS */
 
         if (cache->cnt < 2) {
@@ -8700,7 +14542,7 @@
 
 #endif
 /* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * p     Point to multiply.
@@ -8712,21 +14554,19 @@
 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
         void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[5];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
+
+    err = sp_256_point_new_5(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -8737,28 +14577,42 @@
         sp_256_from_mp(k, 5, km);
         sp_256_point_from_ecc_point_5(point, gm);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_5(point, point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_5(point, point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_5(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_5(point, 0, heap);
 
     return err;
 }
 
 #ifdef WOLFSSL_SP_SMALL
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -8766,7 +14620,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_5(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     /* No pre-computed values. */
@@ -8774,1545 +14628,1289 @@
 }
 
 #else
-static sp_table_entry p256_table[256] = {
+static const sp_table_entry_256 p256_table[256] = {
     /* 0 */
     { { 0x00, 0x00, 0x00, 0x00, 0x00 },
-      { 0x00, 0x00, 0x00, 0x00, 0x00 },
-      1 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00 } },
     /* 1 */
-    { { 0x730d418a9143cl,0xfc5fedb60179el,0x762251075ba95l,0x55c679fb732b7l,
-        0x018905f76a537l },
-      { 0x25357ce95560al,0xe4ba19e45cddfl,0xd21f3258b4ab8l,0x5d85d2e88688dl,
-        0x08571ff182588l },
-      0 },
+    { { 0x730d418a9143cL,0xfc5fedb60179eL,0x762251075ba95L,0x55c679fb732b7L,
+        0x018905f76a537L },
+      { 0x25357ce95560aL,0xe4ba19e45cddfL,0xd21f3258b4ab8L,0x5d85d2e88688dL,
+        0x08571ff182588L } },
     /* 2 */
-    { { 0x886024147519al,0xac26b372f0202l,0x785ebc8d0981el,0x58e9a9d4a7caal,
-        0x0d953c50ddbdfl },
-      { 0x361ccfd590f8fl,0x6b44e6c9179d6l,0x2eb64cf72e962l,0x88f37fd961102l,
-        0x0863ebb7e9eb2l },
-      0 },
+    { { 0x886024147519aL,0xac26b372f0202L,0x785ebc8d0981eL,0x58e9a9d4a7caaL,
+        0x0d953c50ddbdfL },
+      { 0x361ccfd590f8fL,0x6b44e6c9179d6L,0x2eb64cf72e962L,0x88f37fd961102L,
+        0x0863ebb7e9eb2L } },
     /* 3 */
-    { { 0x6b6235cdb6485l,0xa22f0a2f97785l,0xf7e300b808f0el,0x80a03e68d9544l,
-        0x000076055b5ffl },
-      { 0x4eb9b838d2010l,0xbb3243708a763l,0x42a660654014fl,0x3ee0e0e47d398l,
-        0x0830877613437l },
-      0 },
+    { { 0x6b6235cdb6485L,0xa22f0a2f97785L,0xf7e300b808f0eL,0x80a03e68d9544L,
+        0x000076055b5ffL },
+      { 0x4eb9b838d2010L,0xbb3243708a763L,0x42a660654014fL,0x3ee0e0e47d398L,
+        0x0830877613437L } },
     /* 4 */
-    { { 0x22fc516a0d2bbl,0x6c1a6234994f9l,0x7c62c8b0d5cc1l,0x667f9241cf3a5l,
-        0x02f5e6961fd1bl },
-      { 0x5c70bf5a01797l,0x4d609561925c1l,0x71fdb523d20b4l,0x0f7b04911b370l,
-        0x0f648f9168d6fl },
-      0 },
+    { { 0x22fc516a0d2bbL,0x6c1a6234994f9L,0x7c62c8b0d5cc1L,0x667f9241cf3a5L,
+        0x02f5e6961fd1bL },
+      { 0x5c70bf5a01797L,0x4d609561925c1L,0x71fdb523d20b4L,0x0f7b04911b370L,
+        0x0f648f9168d6fL } },
     /* 5 */
-    { { 0x66847e137bbbcl,0x9e8a6a0bec9e5l,0x9d73463e43446l,0x0015b1c427617l,
-        0x05abe0285133dl },
-      { 0xa837cc04c7dabl,0x4c43260c0792al,0x8e6cc37573d9fl,0x73830c9315627l,
-        0x094bb725b6b6fl },
-      0 },
+    { { 0x66847e137bbbcL,0x9e8a6a0bec9e5L,0x9d73463e43446L,0x0015b1c427617L,
+        0x05abe0285133dL },
+      { 0xa837cc04c7dabL,0x4c43260c0792aL,0x8e6cc37573d9fL,0x73830c9315627L,
+        0x094bb725b6b6fL } },
     /* 6 */
-    { { 0x9b48f720f141cl,0xcd2df5bc74bbfl,0x11045c46199b3l,0xc4efdc3f61294l,
-        0x0cdd6bbcb2f7dl },
-      { 0x6700beaf436fdl,0x6db99326beccal,0x14f25226f647fl,0xe5f60c0fa7920l,
-        0x0a361bebd4bdal },
-      0 },
+    { { 0x9b48f720f141cL,0xcd2df5bc74bbfL,0x11045c46199b3L,0xc4efdc3f61294L,
+        0x0cdd6bbcb2f7dL },
+      { 0x6700beaf436fdL,0x6db99326beccaL,0x14f25226f647fL,0xe5f60c0fa7920L,
+        0x0a361bebd4bdaL } },
     /* 7 */
-    { { 0xa2558597c13c7l,0x5f50b7c3e128al,0x3c09d1dc38d63l,0x292c07039aecfl,
-        0x0ba12ca09c4b5l },
-      { 0x08fa459f91dfdl,0x66ceea07fb9e4l,0xd780b293af43bl,0xef4b1eceb0899l,
-        0x053ebb99d701fl },
-      0 },
+    { { 0xa2558597c13c7L,0x5f50b7c3e128aL,0x3c09d1dc38d63L,0x292c07039aecfL,
+        0x0ba12ca09c4b5L },
+      { 0x08fa459f91dfdL,0x66ceea07fb9e4L,0xd780b293af43bL,0xef4b1eceb0899L,
+        0x053ebb99d701fL } },
     /* 8 */
-    { { 0x7ee31b0e63d34l,0x72a9e54fab4fel,0x5e7b5a4f46005l,0x4831c0493334dl,
-        0x08589fb9206d5l },
-      { 0x0f5cc6583553al,0x4ae25649e5aa7l,0x0044652087909l,0x1c4fcc9045071l,
-        0x0ebb0696d0254l },
-      0 },
+    { { 0x7ee31b0e63d34L,0x72a9e54fab4feL,0x5e7b5a4f46005L,0x4831c0493334dL,
+        0x08589fb9206d5L },
+      { 0x0f5cc6583553aL,0x4ae25649e5aa7L,0x0044652087909L,0x1c4fcc9045071L,
+        0x0ebb0696d0254L } },
     /* 9 */
-    { { 0x6ca15ac1647c5l,0x47c4cf5799461l,0x64dfbacb8127dl,0x7da3dc666aa37l,
-        0x0eb2820cbd1b2l },
-      { 0x6f8d86a87e008l,0x9d922378f3940l,0x0ccecb2d87dfal,0xda1d56ed2e428l,
-        0x01f28289b55a7l },
-      0 },
+    { { 0x6ca15ac1647c5L,0x47c4cf5799461L,0x64dfbacb8127dL,0x7da3dc666aa37L,
+        0x0eb2820cbd1b2L },
+      { 0x6f8d86a87e008L,0x9d922378f3940L,0x0ccecb2d87dfaL,0xda1d56ed2e428L,
+        0x01f28289b55a7L } },
     /* 10 */
-    { { 0xaa0c03b89da99l,0x9eb8284022abbl,0x81c05e8a6f2d7l,0x4d6327847862bl,
-        0x0337a4b5905e5l },
-      { 0x7500d21f7794al,0xb77d6d7f613c6l,0x4cfd6e8207005l,0xfbd60a5a37810l,
-        0x00d65e0d5f4c2l },
-      0 },
+    { { 0xaa0c03b89da99L,0x9eb8284022abbL,0x81c05e8a6f2d7L,0x4d6327847862bL,
+        0x0337a4b5905e5L },
+      { 0x7500d21f7794aL,0xb77d6d7f613c6L,0x4cfd6e8207005L,0xfbd60a5a37810L,
+        0x00d65e0d5f4c2L } },
     /* 11 */
-    { { 0x09bbeb5275d38l,0x450be0a358d9dl,0x73eb2654268a7l,0xa232f0762ff49l,
-        0x0c23da24252f4l },
-      { 0x1b84f0b94520cl,0x63b05bd78e5dal,0x4d29ea1096667l,0xcff13a4dcb869l,
-        0x019de3b8cc790l },
-      0 },
+    { { 0x09bbeb5275d38L,0x450be0a358d9dL,0x73eb2654268a7L,0xa232f0762ff49L,
+        0x0c23da24252f4L },
+      { 0x1b84f0b94520cL,0x63b05bd78e5daL,0x4d29ea1096667L,0xcff13a4dcb869L,
+        0x019de3b8cc790L } },
     /* 12 */
-    { { 0xa716c26c5fe04l,0x0b3bba1bdb183l,0x4cb712c3b28del,0xcbfd7432c586al,
-        0x0e34dcbd491fcl },
-      { 0x8d46baaa58403l,0x8682e97a53b40l,0x6aaa8af9a6974l,0x0f7f9e3901273l,
-        0x0e7641f447b4el },
-      0 },
+    { { 0xa716c26c5fe04L,0x0b3bba1bdb183L,0x4cb712c3b28deL,0xcbfd7432c586aL,
+        0x0e34dcbd491fcL },
+      { 0x8d46baaa58403L,0x8682e97a53b40L,0x6aaa8af9a6974L,0x0f7f9e3901273L,
+        0x0e7641f447b4eL } },
     /* 13 */
-    { { 0x53941df64ba59l,0xec0b0242fc7d7l,0x1581859d33f10l,0x57bf4f06dfc6al,
-        0x04a12df57052al },
-      { 0x6338f9439dbd0l,0xd4bde53e1fbfal,0x1f1b314d3c24bl,0xea46fd5e4ffa2l,
-        0x06af5aa93bb5bl },
-      0 },
+    { { 0x53941df64ba59L,0xec0b0242fc7d7L,0x1581859d33f10L,0x57bf4f06dfc6aL,
+        0x04a12df57052aL },
+      { 0x6338f9439dbd0L,0xd4bde53e1fbfaL,0x1f1b314d3c24bL,0xea46fd5e4ffa2L,
+        0x06af5aa93bb5bL } },
     /* 14 */
-    { { 0x0b69910c91999l,0x402a580491da1l,0x8cc20900a24b4l,0x40133e0094b4bl,
-        0x05fe3475a66a4l },
-      { 0x8cabdf93e7b4bl,0x1a7c23f91ab0fl,0xd1e6263292b50l,0xa91642e889aecl,
-        0x0b544e308ecfel },
-      0 },
+    { { 0x0b69910c91999L,0x402a580491da1L,0x8cc20900a24b4L,0x40133e0094b4bL,
+        0x05fe3475a66a4L },
+      { 0x8cabdf93e7b4bL,0x1a7c23f91ab0fL,0xd1e6263292b50L,0xa91642e889aecL,
+        0x0b544e308ecfeL } },
     /* 15 */
-    { { 0x8c6e916ddfdcel,0x66f89179e6647l,0xd4e67e12c3291l,0xc20b4e8d6e764l,
-        0x0e0b6b2bda6b0l },
-      { 0x12df2bb7efb57l,0xde790c40070d3l,0x79bc9441aac0dl,0x3774f90336ad6l,
-        0x071c023de25a6l },
-      0 },
+    { { 0x8c6e916ddfdceL,0x66f89179e6647L,0xd4e67e12c3291L,0xc20b4e8d6e764L,
+        0x0e0b6b2bda6b0L },
+      { 0x12df2bb7efb57L,0xde790c40070d3L,0x79bc9441aac0dL,0x3774f90336ad6L,
+        0x071c023de25a6L } },
     /* 16 */
-    { { 0x8c244bfe20925l,0xc38fdce86762al,0xd38706391c19al,0x24f65a96a5d5dl,
-        0x061d587d421d3l },
-      { 0x673a2a37173eal,0x0853778b65e87l,0x5bab43e238480l,0xefbe10f8441e0l,
-        0x0fa11fe124621l },
-      0 },
+    { { 0x8c244bfe20925L,0xc38fdce86762aL,0xd38706391c19aL,0x24f65a96a5d5dL,
+        0x061d587d421d3L },
+      { 0x673a2a37173eaL,0x0853778b65e87L,0x5bab43e238480L,0xefbe10f8441e0L,
+        0x0fa11fe124621L } },
     /* 17 */
-    { { 0x91f2b2cb19ffdl,0x5bb1923c231c8l,0xac5ca8e01ba8dl,0xbedcb6d03d678l,
-        0x0586eb04c1f13l },
-      { 0x5c6e527e8ed09l,0x3c1819ede20c3l,0x6c652fa1e81a3l,0x4f11278fd6c05l,
-        0x019d5ac087086l },
-      0 },
+    { { 0x91f2b2cb19ffdL,0x5bb1923c231c8L,0xac5ca8e01ba8dL,0xbedcb6d03d678L,
+        0x0586eb04c1f13L },
+      { 0x5c6e527e8ed09L,0x3c1819ede20c3L,0x6c652fa1e81a3L,0x4f11278fd6c05L,
+        0x019d5ac087086L } },
     /* 18 */
-    { { 0x9f581309a4e1fl,0x1be92700741e9l,0xfd28d20ab7de7l,0x563f26a5ef0bel,
-        0x0e7c0073f7f9cl },
-      { 0xd663a0ef59f76l,0x5420fcb0501f6l,0xa6602d4669b3bl,0x3c0ac08c1f7a7l,
-        0x0e08504fec65bl },
-      0 },
+    { { 0x9f581309a4e1fL,0x1be92700741e9L,0xfd28d20ab7de7L,0x563f26a5ef0beL,
+        0x0e7c0073f7f9cL },
+      { 0xd663a0ef59f76L,0x5420fcb0501f6L,0xa6602d4669b3bL,0x3c0ac08c1f7a7L,
+        0x0e08504fec65bL } },
     /* 19 */
-    { { 0x8f68da031b3cal,0x9ee6da6d66f09l,0x4f246e86d1cabl,0x96b45bfd81fa9l,
-        0x078f018825b09l },
-      { 0xefde43a25787fl,0x0d1dccac9bb7el,0x35bfc368016f8l,0x747a0cea4877bl,
-        0x043a773b87e94l },
-      0 },
+    { { 0x8f68da031b3caL,0x9ee6da6d66f09L,0x4f246e86d1cabL,0x96b45bfd81fa9L,
+        0x078f018825b09L },
+      { 0xefde43a25787fL,0x0d1dccac9bb7eL,0x35bfc368016f8L,0x747a0cea4877bL,
+        0x043a773b87e94L } },
     /* 20 */
-    { { 0x77734d2b533d5l,0xf6a1bdddc0625l,0x79ec293673b8al,0x66b1577e7c9aal,
-        0x0bb6de651c3b2l },
-      { 0x9303ab65259b3l,0xd3d03a7480e7el,0xb3cfc27d6a0afl,0xb99bc5ac83d19l,
-        0x060b4619a5d18l },
-      0 },
+    { { 0x77734d2b533d5L,0xf6a1bdddc0625L,0x79ec293673b8aL,0x66b1577e7c9aaL,
+        0x0bb6de651c3b2L },
+      { 0x9303ab65259b3L,0xd3d03a7480e7eL,0xb3cfc27d6a0afL,0xb99bc5ac83d19L,
+        0x060b4619a5d18L } },
     /* 21 */
-    { { 0xa38e11ae5aa1cl,0x2b49e73658bd6l,0xe5f87edb8b765l,0xffcd0b130014el,
-        0x09d0f27b2aeebl },
-      { 0x246317a730a55l,0x2fddbbc83aca9l,0xc019a719c955bl,0xc48d07c1dfe0al,
-        0x0244a566d356el },
-      0 },
+    { { 0xa38e11ae5aa1cL,0x2b49e73658bd6L,0xe5f87edb8b765L,0xffcd0b130014eL,
+        0x09d0f27b2aeebL },
+      { 0x246317a730a55L,0x2fddbbc83aca9L,0xc019a719c955bL,0xc48d07c1dfe0aL,
+        0x0244a566d356eL } },
     /* 22 */
-    { { 0x0394aeacf1f96l,0xa9024c271c6dbl,0x2cbd3b99f2122l,0xef692626ac1b8l,
-        0x045e58c873581l },
-      { 0xf479da38f9dbcl,0x46e888a040d3fl,0x6e0bed7a8aaf1l,0xb7a4945adfb24l,
-        0x0c040e21cc1e4l },
-      0 },
+    { { 0x0394aeacf1f96L,0xa9024c271c6dbL,0x2cbd3b99f2122L,0xef692626ac1b8L,
+        0x045e58c873581L },
+      { 0xf479da38f9dbcL,0x46e888a040d3fL,0x6e0bed7a8aaf1L,0xb7a4945adfb24L,
+        0x0c040e21cc1e4L } },
     /* 23 */
-    { { 0xaf0006f8117b6l,0xff73a35433847l,0xd9475eb651969l,0x6ec7482b35761l,
-        0x01cdf5c97682cl },
-      { 0x775b411f04839l,0xf448de16987dbl,0x70b32197dbeacl,0xff3db2921dd1bl,
-        0x0046755f8a92dl },
-      0 },
+    { { 0xaf0006f8117b6L,0xff73a35433847L,0xd9475eb651969L,0x6ec7482b35761L,
+        0x01cdf5c97682cL },
+      { 0x775b411f04839L,0xf448de16987dbL,0x70b32197dbeacL,0xff3db2921dd1bL,
+        0x0046755f8a92dL } },
     /* 24 */
-    { { 0xac5d2bce8ffcdl,0x8b2fe61a82cc8l,0x202d6c70d53c4l,0xa5f3f6f161727l,
-        0x0046e5e113b83l },
-      { 0x8ff64d8007f01l,0x125af43183e7bl,0x5e1a03c7fb1efl,0x005b045c5ea63l,
-        0x06e0106c3303dl },
-      0 },
+    { { 0xac5d2bce8ffcdL,0x8b2fe61a82cc8L,0x202d6c70d53c4L,0xa5f3f6f161727L,
+        0x0046e5e113b83L },
+      { 0x8ff64d8007f01L,0x125af43183e7bL,0x5e1a03c7fb1efL,0x005b045c5ea63L,
+        0x06e0106c3303dL } },
     /* 25 */
-    { { 0x7358488dd73b1l,0x8f995ed0d948cl,0x56a2ab7767070l,0xcf1f38385ea8cl,
-        0x0442594ede901l },
-      { 0xaa2c912d4b65bl,0x3b96c90c37f8fl,0xe978d1f94c234l,0xe68ed326e4a15l,
-        0x0a796fa514c2el },
-      0 },
+    { { 0x7358488dd73b1L,0x8f995ed0d948cL,0x56a2ab7767070L,0xcf1f38385ea8cL,
+        0x0442594ede901L },
+      { 0xaa2c912d4b65bL,0x3b96c90c37f8fL,0xe978d1f94c234L,0xe68ed326e4a15L,
+        0x0a796fa514c2eL } },
     /* 26 */
-    { { 0xfb604823addd7l,0x83e56693b3359l,0xcbf3c809e2a61l,0x66e9f885b78e3l,
-        0x0e4ad2da9c697l },
-      { 0xf7f428e048a61l,0x8cc092d9a0357l,0x03ed8ef082d19l,0x5143fc3a1af4cl,
-        0x0c5e94046c37bl },
-      0 },
+    { { 0xfb604823addd7L,0x83e56693b3359L,0xcbf3c809e2a61L,0x66e9f885b78e3L,
+        0x0e4ad2da9c697L },
+      { 0xf7f428e048a61L,0x8cc092d9a0357L,0x03ed8ef082d19L,0x5143fc3a1af4cL,
+        0x0c5e94046c37bL } },
     /* 27 */
-    { { 0xa538c2be75f9el,0xe8cb123a78476l,0x109c04b6fd1a9l,0x4747d85e4df0bl,
-        0x063283dafdb46l },
-      { 0x28cf7baf2df15l,0x550ad9a7f4ce7l,0x834bcc3e592c4l,0xa938fab226adel,
-        0x068bd19ab1981l },
-      0 },
+    { { 0xa538c2be75f9eL,0xe8cb123a78476L,0x109c04b6fd1a9L,0x4747d85e4df0bL,
+        0x063283dafdb46L },
+      { 0x28cf7baf2df15L,0x550ad9a7f4ce7L,0x834bcc3e592c4L,0xa938fab226adeL,
+        0x068bd19ab1981L } },
     /* 28 */
-    { { 0xead511887d659l,0xf4b359305ac08l,0xfe74fe33374d5l,0xdfd696986981cl,
-        0x0495292f53c6fl },
-      { 0x78c9e1acec896l,0x10ec5b44844a8l,0x64d60a7d964b2l,0x68376696f7e26l,
-        0x00ec7530d2603l },
-      0 },
+    { { 0xead511887d659L,0xf4b359305ac08L,0xfe74fe33374d5L,0xdfd696986981cL,
+        0x0495292f53c6fL },
+      { 0x78c9e1acec896L,0x10ec5b44844a8L,0x64d60a7d964b2L,0x68376696f7e26L,
+        0x00ec7530d2603L } },
     /* 29 */
-    { { 0x13a05ad2687bbl,0x6af32e21fa2dal,0xdd4607ba1f83bl,0x3f0b390f5ef51l,
-        0x00f6207a66486l },
-      { 0x7e3bb0f138233l,0x6c272aa718bd6l,0x6ec88aedd66b9l,0x6dcf8ed004072l,
-        0x0ff0db07208edl },
-      0 },
+    { { 0x13a05ad2687bbL,0x6af32e21fa2daL,0xdd4607ba1f83bL,0x3f0b390f5ef51L,
+        0x00f6207a66486L },
+      { 0x7e3bb0f138233L,0x6c272aa718bd6L,0x6ec88aedd66b9L,0x6dcf8ed004072L,
+        0x0ff0db07208edL } },
     /* 30 */
-    { { 0xfa1014c95d553l,0xfd5d680a8a749l,0xf3b566fa44052l,0x0ea3183b4317fl,
-        0x0313b513c8874l },
-      { 0x2e2ac08d11549l,0x0bb4dee21cb40l,0x7f2320e071ee1l,0x9f8126b987dd4l,
-        0x02d3abcf986f1l },
-      0 },
+    { { 0xfa1014c95d553L,0xfd5d680a8a749L,0xf3b566fa44052L,0x0ea3183b4317fL,
+        0x0313b513c8874L },
+      { 0x2e2ac08d11549L,0x0bb4dee21cb40L,0x7f2320e071ee1L,0x9f8126b987dd4L,
+        0x02d3abcf986f1L } },
     /* 31 */
-    { { 0x88501815581a2l,0x56632211af4c2l,0xcab2e999a0a6dl,0x8cdf19ba7a0f0l,
-        0x0c036fa10ded9l },
-      { 0xe08bac1fbd009l,0x9006d1581629al,0xb9e0d8f0b68b1l,0x0194c2eb32779l,
-        0x0a6b2a2c4b6d4l },
-      0 },
+    { { 0x88501815581a2L,0x56632211af4c2L,0xcab2e999a0a6dL,0x8cdf19ba7a0f0L,
+        0x0c036fa10ded9L },
+      { 0xe08bac1fbd009L,0x9006d1581629aL,0xb9e0d8f0b68b1L,0x0194c2eb32779L,
+        0x0a6b2a2c4b6d4L } },
     /* 32 */
-    { { 0x3e50f6d3549cfl,0x6ffacd665ed43l,0xe11fcb46f3369l,0x9860695bfdaccl,
-        0x0810ee252af7cl },
-      { 0x50fe17159bb2cl,0xbe758b357b654l,0x69fea72f7dfbel,0x17452b057e74dl,
-        0x0d485717a9273l },
-      0 },
+    { { 0x3e50f6d3549cfL,0x6ffacd665ed43L,0xe11fcb46f3369L,0x9860695bfdaccL,
+        0x0810ee252af7cL },
+      { 0x50fe17159bb2cL,0xbe758b357b654L,0x69fea72f7dfbeL,0x17452b057e74dL,
+        0x0d485717a9273L } },
     /* 33 */
-    { { 0x41a8af0cb5a98l,0x931f3110bf117l,0xb382adfd3da8fl,0x604e1994e2cbal,
-        0x06a6045a72f9al },
-      { 0xc0d3fa2b2411dl,0x3e510e96e0170l,0x865b3ccbe0eb8l,0x57903bcc9f738l,
-        0x0d3e45cfaf9e1l },
-      0 },
+    { { 0x41a8af0cb5a98L,0x931f3110bf117L,0xb382adfd3da8fL,0x604e1994e2cbaL,
+        0x06a6045a72f9aL },
+      { 0xc0d3fa2b2411dL,0x3e510e96e0170L,0x865b3ccbe0eb8L,0x57903bcc9f738L,
+        0x0d3e45cfaf9e1L } },
     /* 34 */
-    { { 0xf69bbe83f7669l,0x8272877d6bce1l,0x244278d09f8ael,0xc19c9548ae543l,
-        0x0207755dee3c2l },
-      { 0xd61d96fef1945l,0xefb12d28c387bl,0x2df64aa18813cl,0xb00d9fbcd1d67l,
-        0x048dc5ee57154l },
-      0 },
+    { { 0xf69bbe83f7669L,0x8272877d6bce1L,0x244278d09f8aeL,0xc19c9548ae543L,
+        0x0207755dee3c2L },
+      { 0xd61d96fef1945L,0xefb12d28c387bL,0x2df64aa18813cL,0xb00d9fbcd1d67L,
+        0x048dc5ee57154L } },
     /* 35 */
-    { { 0x790bff7e5a199l,0xcf989ccbb7123l,0xa519c79e0efb8l,0xf445c27a2bfe0l,
-        0x0f2fb0aeddff6l },
-      { 0x09575f0b5025fl,0xd740fa9f2241cl,0x80bfbd0550543l,0xd5258fa3c8ad3l,
-        0x0a13e9015db28l },
-      0 },
+    { { 0x790bff7e5a199L,0xcf989ccbb7123L,0xa519c79e0efb8L,0xf445c27a2bfe0L,
+        0x0f2fb0aeddff6L },
+      { 0x09575f0b5025fL,0xd740fa9f2241cL,0x80bfbd0550543L,0xd5258fa3c8ad3L,
+        0x0a13e9015db28L } },
     /* 36 */
-    { { 0x7a350a2b65cbcl,0x722a464226f9fl,0x23f07a10b04b9l,0x526f265ce241el,
-        0x02bf0d6b01497l },
-      { 0x4dd3f4b216fb7l,0x67fbdda26ad3dl,0x708505cf7d7b8l,0xe89faeb7b83f6l,
-        0x042a94a5a162fl },
-      0 },
+    { { 0x7a350a2b65cbcL,0x722a464226f9fL,0x23f07a10b04b9L,0x526f265ce241eL,
+        0x02bf0d6b01497L },
+      { 0x4dd3f4b216fb7L,0x67fbdda26ad3dL,0x708505cf7d7b8L,0xe89faeb7b83f6L,
+        0x042a94a5a162fL } },
     /* 37 */
-    { { 0x6ad0beaadf191l,0x9025a268d7584l,0x94dc1f60f8a48l,0xde3de86030504l,
-        0x02c2dd969c65el },
-      { 0x2171d93849c17l,0xba1da250dd6d0l,0xc3a5485460488l,0x6dbc4810c7063l,
-        0x0f437fa1f42c5l },
-      0 },
+    { { 0x6ad0beaadf191L,0x9025a268d7584L,0x94dc1f60f8a48L,0xde3de86030504L,
+        0x02c2dd969c65eL },
+      { 0x2171d93849c17L,0xba1da250dd6d0L,0xc3a5485460488L,0x6dbc4810c7063L,
+        0x0f437fa1f42c5L } },
     /* 38 */
-    { { 0x0d7144a0f7dabl,0x931776e9ac6aal,0x5f397860f0497l,0x7aa852c0a050fl,
-        0x0aaf45b335470l },
-      { 0x37c33c18d364al,0x063e49716585el,0x5ec5444d40b9bl,0x72bcf41716811l,
-        0x0cdf6310df4f2l },
-      0 },
+    { { 0x0d7144a0f7dabL,0x931776e9ac6aaL,0x5f397860f0497L,0x7aa852c0a050fL,
+        0x0aaf45b335470L },
+      { 0x37c33c18d364aL,0x063e49716585eL,0x5ec5444d40b9bL,0x72bcf41716811L,
+        0x0cdf6310df4f2L } },
     /* 39 */
-    { { 0x3c6238ea8b7efl,0x1885bc2287747l,0xbda8e3408e935l,0x2ff2419567722l,
-        0x0f0d008bada9el },
-      { 0x2671d2414d3b1l,0x85b019ea76291l,0x53bcbdbb37549l,0x7b8b5c61b96d4l,
-        0x05bd5c2f5ca88l },
-      0 },
+    { { 0x3c6238ea8b7efL,0x1885bc2287747L,0xbda8e3408e935L,0x2ff2419567722L,
+        0x0f0d008bada9eL },
+      { 0x2671d2414d3b1L,0x85b019ea76291L,0x53bcbdbb37549L,0x7b8b5c61b96d4L,
+        0x05bd5c2f5ca88L } },
     /* 40 */
-    { { 0xf469ef49a3154l,0x956e2b2e9aef0l,0xa924a9c3e85a5l,0x471945aaec1eal,
-        0x0aa12dfc8a09el },
-      { 0x272274df69f1dl,0x2ca2ff5e7326fl,0x7a9dd44e0e4c8l,0xa901b9d8ce73bl,
-        0x06c036e73e48cl },
-      0 },
+    { { 0xf469ef49a3154L,0x956e2b2e9aef0L,0xa924a9c3e85a5L,0x471945aaec1eaL,
+        0x0aa12dfc8a09eL },
+      { 0x272274df69f1dL,0x2ca2ff5e7326fL,0x7a9dd44e0e4c8L,0xa901b9d8ce73bL,
+        0x06c036e73e48cL } },
     /* 41 */
-    { { 0xae12a0f6e3138l,0x0025ad345a5cfl,0x5672bc56966efl,0xbe248993c64b4l,
-        0x0292ff65896afl },
-      { 0x50d445e213402l,0x274392c9fed52l,0xa1c72e8f6580el,0x7276097b397fdl,
-        0x0644e0c90311bl },
-      0 },
+    { { 0xae12a0f6e3138L,0x0025ad345a5cfL,0x5672bc56966efL,0xbe248993c64b4L,
+        0x0292ff65896afL },
+      { 0x50d445e213402L,0x274392c9fed52L,0xa1c72e8f6580eL,0x7276097b397fdL,
+        0x0644e0c90311bL } },
     /* 42 */
-    { { 0x421e1a47153f0l,0x79920418c9e1el,0x05d7672b86c3bl,0x9a7793bdce877l,
-        0x0f25ae793cab7l },
-      { 0x194a36d869d0cl,0x824986c2641f3l,0x96e945e9d55c8l,0x0a3e49fb5ea30l,
-        0x039b8e65313dbl },
-      0 },
+    { { 0x421e1a47153f0L,0x79920418c9e1eL,0x05d7672b86c3bL,0x9a7793bdce877L,
+        0x0f25ae793cab7L },
+      { 0x194a36d869d0cL,0x824986c2641f3L,0x96e945e9d55c8L,0x0a3e49fb5ea30L,
+        0x039b8e65313dbL } },
     /* 43 */
-    { { 0x54200b6fd2e59l,0x669255c98f377l,0xe2a573935e2c0l,0xdb06d9dab21a0l,
-        0x039122f2f0f19l },
-      { 0xce1e003cad53cl,0x0fe65c17e3cfbl,0xaa13877225b2cl,0xff8d72baf1d29l,
-        0x08de80af8ce80l },
-      0 },
+    { { 0x54200b6fd2e59L,0x669255c98f377L,0xe2a573935e2c0L,0xdb06d9dab21a0L,
+        0x039122f2f0f19L },
+      { 0xce1e003cad53cL,0x0fe65c17e3cfbL,0xaa13877225b2cL,0xff8d72baf1d29L,
+        0x08de80af8ce80L } },
     /* 44 */
-    { { 0xea8d9207bbb76l,0x7c21782758afbl,0xc0436b1921c7el,0x8c04dfa2b74b1l,
-        0x0871949062e36l },
-      { 0x928bba3993df5l,0xb5f3b3d26ab5fl,0x5b55050639d75l,0xfde1011aa78a8l,
-        0x0fc315e6a5b74l },
-      0 },
+    { { 0xea8d9207bbb76L,0x7c21782758afbL,0xc0436b1921c7eL,0x8c04dfa2b74b1L,
+        0x0871949062e36L },
+      { 0x928bba3993df5L,0xb5f3b3d26ab5fL,0x5b55050639d75L,0xfde1011aa78a8L,
+        0x0fc315e6a5b74L } },
     /* 45 */
-    { { 0xfd41ae8d6ecfal,0xf61aec7f86561l,0x924741d5f8c44l,0x908898452a7b4l,
-        0x0e6d4a7adee38l },
-      { 0x52ed14593c75dl,0xa4dd271162605l,0xba2c7db70a70dl,0xae57d2aede937l,
-        0x035dfaf9a9be2l },
-      0 },
+    { { 0xfd41ae8d6ecfaL,0xf61aec7f86561L,0x924741d5f8c44L,0x908898452a7b4L,
+        0x0e6d4a7adee38L },
+      { 0x52ed14593c75dL,0xa4dd271162605L,0xba2c7db70a70dL,0xae57d2aede937L,
+        0x035dfaf9a9be2L } },
     /* 46 */
-    { { 0x56fcdaa736636l,0x97ae2cab7e6b9l,0xf34996609f51dl,0x0d2bfb10bf410l,
-        0x01da5c7d71c83l },
-      { 0x1e4833cce6825l,0x8ff9573c3b5c4l,0x23036b815ad11l,0xb9d6a28552c7fl,
-        0x07077c0fddbf4l },
-      0 },
+    { { 0x56fcdaa736636L,0x97ae2cab7e6b9L,0xf34996609f51dL,0x0d2bfb10bf410L,
+        0x01da5c7d71c83L },
+      { 0x1e4833cce6825L,0x8ff9573c3b5c4L,0x23036b815ad11L,0xb9d6a28552c7fL,
+        0x07077c0fddbf4L } },
     /* 47 */
-    { { 0x3ff8d46b9661cl,0x6b0d2cfd71bf6l,0x847f8f7a1dfd3l,0xfe440373e140al,
-        0x053a8632ee50el },
-      { 0x6ff68696d8051l,0x95c74f468a097l,0xe4e26bddaec0cl,0xfcc162994dc35l,
-        0x0028ca76d34e1l },
-      0 },
+    { { 0x3ff8d46b9661cL,0x6b0d2cfd71bf6L,0x847f8f7a1dfd3L,0xfe440373e140aL,
+        0x053a8632ee50eL },
+      { 0x6ff68696d8051L,0x95c74f468a097L,0xe4e26bddaec0cL,0xfcc162994dc35L,
+        0x0028ca76d34e1L } },
     /* 48 */
-    { { 0xd47dcfc9877eel,0x10801d0002d11l,0x4c260b6c8b362l,0xf046d002c1175l,
-        0x004c17cd86962l },
-      { 0xbd094b0daddf5l,0x7524ce55c06d9l,0x2da03b5bea235l,0x7474663356e67l,
-        0x0f7ba4de9fed9l },
-      0 },
+    { { 0xd47dcfc9877eeL,0x10801d0002d11L,0x4c260b6c8b362L,0xf046d002c1175L,
+        0x004c17cd86962L },
+      { 0xbd094b0daddf5L,0x7524ce55c06d9L,0x2da03b5bea235L,0x7474663356e67L,
+        0x0f7ba4de9fed9L } },
     /* 49 */
-    { { 0xbfa34ebe1263fl,0x3571ae7ce6d0dl,0x2a6f523557637l,0x1c41d24405538l,
-        0x0e31f96005213l },
-      { 0xb9216ea6b6ec6l,0x2e73c2fc44d1bl,0x9d0a29437a1d1l,0xd47bc10e7eac8l,
-        0x0aa3a6259ce34l },
-      0 },
+    { { 0xbfa34ebe1263fL,0x3571ae7ce6d0dL,0x2a6f523557637L,0x1c41d24405538L,
+        0x0e31f96005213L },
+      { 0xb9216ea6b6ec6L,0x2e73c2fc44d1bL,0x9d0a29437a1d1L,0xd47bc10e7eac8L,
+        0x0aa3a6259ce34L } },
     /* 50 */
-    { { 0xf9df536f3dcd3l,0x50d2bf7360fbcl,0xf504f5b6cededl,0xdaee491710fadl,
-        0x02398dd627e79l },
-      { 0x705a36d09569el,0xbb5149f769cf4l,0x5f6034cea0619l,0x6210ff9c03773l,
-        0x05717f5b21c04l },
-      0 },
+    { { 0xf9df536f3dcd3L,0x50d2bf7360fbcL,0xf504f5b6cededL,0xdaee491710fadL,
+        0x02398dd627e79L },
+      { 0x705a36d09569eL,0xbb5149f769cf4L,0x5f6034cea0619L,0x6210ff9c03773L,
+        0x05717f5b21c04L } },
     /* 51 */
-    { { 0x229c921dd895el,0x0040c284519fel,0xd637ecd8e5185l,0x28defa13d2391l,
-        0x0660a2c560e3cl },
-      { 0xa88aed67fcbd0l,0x780ea9f0969ccl,0x2e92b4dc84724l,0x245332b2f4817l,
-        0x0624ee54c4f52l },
-      0 },
+    { { 0x229c921dd895eL,0x0040c284519feL,0xd637ecd8e5185L,0x28defa13d2391L,
+        0x0660a2c560e3cL },
+      { 0xa88aed67fcbd0L,0x780ea9f0969ccL,0x2e92b4dc84724L,0x245332b2f4817L,
+        0x0624ee54c4f52L } },
     /* 52 */
-    { { 0x49ce4d897ecccl,0xd93f9880aa095l,0x43a7c204d49d1l,0xfbc0723c24230l,
-        0x04f392afb92bdl },
-      { 0x9f8fa7de44fd9l,0xe457b32156696l,0x68ebc3cb66cfbl,0x399cdb2fa8033l,
-        0x08a3e7977ccdbl },
-      0 },
+    { { 0x49ce4d897ecccL,0xd93f9880aa095L,0x43a7c204d49d1L,0xfbc0723c24230L,
+        0x04f392afb92bdL },
+      { 0x9f8fa7de44fd9L,0xe457b32156696L,0x68ebc3cb66cfbL,0x399cdb2fa8033L,
+        0x08a3e7977ccdbL } },
     /* 53 */
-    { { 0x1881f06c4b125l,0x00f6e3ca8cddel,0xc7a13e9ae34e3l,0x4404ef6999de5l,
-        0x03888d02370c2l },
-      { 0x8035644f91081l,0x615f015504762l,0x32cd36e3d9fcfl,0x23361827edc86l,
-        0x0a5e62e471810l },
-      0 },
+    { { 0x1881f06c4b125L,0x00f6e3ca8cddeL,0xc7a13e9ae34e3L,0x4404ef6999de5L,
+        0x03888d02370c2L },
+      { 0x8035644f91081L,0x615f015504762L,0x32cd36e3d9fcfL,0x23361827edc86L,
+        0x0a5e62e471810L } },
     /* 54 */
-    { { 0x25ee32facd6c8l,0x5454bcbc661a8l,0x8df9931699c63l,0x5adc0ce3edf79l,
-        0x02c4768e6466al },
-      { 0x6ff8c90a64bc9l,0x20e4779f5cb34l,0xc05e884630a60l,0x52a0d949d064bl,
-        0x07b5e6441f9e6l },
-      0 },
+    { { 0x25ee32facd6c8L,0x5454bcbc661a8L,0x8df9931699c63L,0x5adc0ce3edf79L,
+        0x02c4768e6466aL },
+      { 0x6ff8c90a64bc9L,0x20e4779f5cb34L,0xc05e884630a60L,0x52a0d949d064bL,
+        0x07b5e6441f9e6L } },
     /* 55 */
-    { { 0x9422c1d28444al,0xd8be136a39216l,0xb0c7fcee996c5l,0x744a2387afe5fl,
-        0x0b8af73cb0c8dl },
-      { 0xe83aa338b86fdl,0x58a58a5cff5fdl,0x0ac9433fee3f1l,0x0895c9ee8f6f2l,
-        0x0a036395f7f3fl },
-      0 },
+    { { 0x9422c1d28444aL,0xd8be136a39216L,0xb0c7fcee996c5L,0x744a2387afe5fL,
+        0x0b8af73cb0c8dL },
+      { 0xe83aa338b86fdL,0x58a58a5cff5fdL,0x0ac9433fee3f1L,0x0895c9ee8f6f2L,
+        0x0a036395f7f3fL } },
     /* 56 */
-    { { 0x3c6bba10f7770l,0x81a12a0e248c7l,0x1bc2b9fa6f16dl,0xb533100df6825l,
-        0x04be36b01875fl },
-      { 0x6086e9fb56dbbl,0x8b07e7a4f8922l,0x6d52f20306fefl,0x00c0eeaccc056l,
-        0x08cbc9a871bdcl },
-      0 },
+    { { 0x3c6bba10f7770L,0x81a12a0e248c7L,0x1bc2b9fa6f16dL,0xb533100df6825L,
+        0x04be36b01875fL },
+      { 0x6086e9fb56dbbL,0x8b07e7a4f8922L,0x6d52f20306fefL,0x00c0eeaccc056L,
+        0x08cbc9a871bdcL } },
     /* 57 */
-    { { 0x1895cc0dac4abl,0x40712ff112e13l,0xa1cee57a874a4l,0x35f86332ae7c6l,
-        0x044e7553e0c08l },
-      { 0x03fff7734002dl,0x8b0b34425c6d5l,0xe8738b59d35cbl,0xfc1895f702760l,
-        0x0470a683a5eb8l },
-      0 },
+    { { 0x1895cc0dac4abL,0x40712ff112e13L,0xa1cee57a874a4L,0x35f86332ae7c6L,
+        0x044e7553e0c08L },
+      { 0x03fff7734002dL,0x8b0b34425c6d5L,0xe8738b59d35cbL,0xfc1895f702760L,
+        0x0470a683a5eb8L } },
     /* 58 */
-    { { 0x761dc90513482l,0x2a01e9276a81bl,0xce73083028720l,0xc6efcda441ee0l,
-        0x016410690c63dl },
-      { 0x34a066d06a2edl,0x45189b100bf50l,0xb8218c9dd4d77l,0xbb4fd914ae72al,
-        0x0d73479fd7abcl },
-      0 },
+    { { 0x761dc90513482L,0x2a01e9276a81bL,0xce73083028720L,0xc6efcda441ee0L,
+        0x016410690c63dL },
+      { 0x34a066d06a2edL,0x45189b100bf50L,0xb8218c9dd4d77L,0xbb4fd914ae72aL,
+        0x0d73479fd7abcL } },
     /* 59 */
-    { { 0xefb165ad4c6e5l,0x8f5b06d04d7edl,0x575cb14262cf0l,0x666b12ed5bb18l,
-        0x0816469e30771l },
-      { 0xb9d79561e291el,0x22c1de1661d7al,0x35e0513eb9dafl,0x3f9cf49827eb1l,
-        0x00a36dd23f0ddl },
-      0 },
+    { { 0xefb165ad4c6e5L,0x8f5b06d04d7edL,0x575cb14262cf0L,0x666b12ed5bb18L,
+        0x0816469e30771L },
+      { 0xb9d79561e291eL,0x22c1de1661d7aL,0x35e0513eb9dafL,0x3f9cf49827eb1L,
+        0x00a36dd23f0ddL } },
     /* 60 */
-    { { 0xd32c741d5533cl,0x9e8684628f098l,0x349bd117c5f5al,0xb11839a228adel,
-        0x0e331dfd6fdbal },
-      { 0x0ab686bcc6ed8l,0xbdef7a260e510l,0xce850d77160c3l,0x33899063d9a7bl,
-        0x0d3b4782a492el },
-      0 },
+    { { 0xd32c741d5533cL,0x9e8684628f098L,0x349bd117c5f5aL,0xb11839a228adeL,
+        0x0e331dfd6fdbaL },
+      { 0x0ab686bcc6ed8L,0xbdef7a260e510L,0xce850d77160c3L,0x33899063d9a7bL,
+        0x0d3b4782a492eL } },
     /* 61 */
-    { { 0x9b6e8f3821f90l,0xed66eb7aada14l,0xa01311692edd9l,0xa5bd0bb669531l,
-        0x07281275a4c86l },
-      { 0x858f7d3ff47e5l,0xbc61016441503l,0xdfd9bb15e1616l,0x505962b0f11a7l,
-        0x02c062e7ece14l },
-      0 },
+    { { 0x9b6e8f3821f90L,0xed66eb7aada14L,0xa01311692edd9L,0xa5bd0bb669531L,
+        0x07281275a4c86L },
+      { 0x858f7d3ff47e5L,0xbc61016441503L,0xdfd9bb15e1616L,0x505962b0f11a7L,
+        0x02c062e7ece14L } },
     /* 62 */
-    { { 0xf996f0159ac2el,0x36cbdb2713a76l,0x8e46047281e77l,0x7ef12ad6d2880l,
-        0x0282a35f92c4el },
-      { 0x54b1ec0ce5cd2l,0xc91379c2299c3l,0xe82c11ecf99efl,0x2abd992caf383l,
-        0x0c71cd513554dl },
-      0 },
+    { { 0xf996f0159ac2eL,0x36cbdb2713a76L,0x8e46047281e77L,0x7ef12ad6d2880L,
+        0x0282a35f92c4eL },
+      { 0x54b1ec0ce5cd2L,0xc91379c2299c3L,0xe82c11ecf99efL,0x2abd992caf383L,
+        0x0c71cd513554dL } },
     /* 63 */
-    { { 0x5de9c09b578f4l,0x58e3affa7a488l,0x9182f1f1884e2l,0xf3a38f76b1b75l,
-        0x0c50f6740cf47l },
-      { 0x4adf3374b68eal,0x2369965fe2a9cl,0x5a53050a406f3l,0x58dc2f86a2228l,
-        0x0b9ecb3a72129l },
-      0 },
+    { { 0x5de9c09b578f4L,0x58e3affa7a488L,0x9182f1f1884e2L,0xf3a38f76b1b75L,
+        0x0c50f6740cf47L },
+      { 0x4adf3374b68eaL,0x2369965fe2a9cL,0x5a53050a406f3L,0x58dc2f86a2228L,
+        0x0b9ecb3a72129L } },
     /* 64 */
-    { { 0x8410ef4f8b16al,0xfec47b266a56fl,0xd9c87c197241al,0xab1b0a406b8e6l,
-        0x0803f3e02cd42l },
-      { 0x309a804dbec69l,0xf73bbad05f7f0l,0xd8e197fa83b85l,0xadc1c6097273al,
-        0x0c097440e5067l },
-      0 },
+    { { 0x8410ef4f8b16aL,0xfec47b266a56fL,0xd9c87c197241aL,0xab1b0a406b8e6L,
+        0x0803f3e02cd42L },
+      { 0x309a804dbec69L,0xf73bbad05f7f0L,0xd8e197fa83b85L,0xadc1c6097273aL,
+        0x0c097440e5067L } },
     /* 65 */
-    { { 0xa56f2c379ab34l,0x8b841df8d1846l,0x76c68efa8ee06l,0x1f30203144591l,
-        0x0f1af32d5915fl },
-      { 0x375315d75bd50l,0xbaf72f67bc99cl,0x8d7723f837cffl,0x1c8b0613a4184l,
-        0x023d0f130e2d4l },
-      0 },
+    { { 0xa56f2c379ab34L,0x8b841df8d1846L,0x76c68efa8ee06L,0x1f30203144591L,
+        0x0f1af32d5915fL },
+      { 0x375315d75bd50L,0xbaf72f67bc99cL,0x8d7723f837cffL,0x1c8b0613a4184L,
+        0x023d0f130e2d4L } },
     /* 66 */
-    { { 0xab6edf41500d9l,0xe5fcbeada8857l,0x97259510d890al,0xfadd52fe86488l,
-        0x0b0288dd6c0a3l },
-      { 0x20f30650bcb08l,0x13695d6e16853l,0x989aa7671af63l,0xc8d231f520a7bl,
-        0x0ffd3724ff408l },
-      0 },
+    { { 0xab6edf41500d9L,0xe5fcbeada8857L,0x97259510d890aL,0xfadd52fe86488L,
+        0x0b0288dd6c0a3L },
+      { 0x20f30650bcb08L,0x13695d6e16853L,0x989aa7671af63L,0xc8d231f520a7bL,
+        0x0ffd3724ff408L } },
     /* 67 */
-    { { 0x68e64b458e6cbl,0x20317a5d28539l,0xaa75f56992dadl,0x26df3814ae0b7l,
-        0x0f5590f4ad78cl },
-      { 0x24bd3cf0ba55al,0x4a0c778bae0fcl,0x83b674a0fc472l,0x4a201ce9864f6l,
-        0x018d6da54f6f7l },
-      0 },
+    { { 0x68e64b458e6cbL,0x20317a5d28539L,0xaa75f56992dadL,0x26df3814ae0b7L,
+        0x0f5590f4ad78cL },
+      { 0x24bd3cf0ba55aL,0x4a0c778bae0fcL,0x83b674a0fc472L,0x4a201ce9864f6L,
+        0x018d6da54f6f7L } },
     /* 68 */
-    { { 0x3e225d5be5a2bl,0x835934f3c6ed9l,0x2626ffc6fe799l,0x216a431409262l,
-        0x050bbb4d97990l },
-      { 0x191c6e57ec63el,0x40181dcdb2378l,0x236e0f665422cl,0x49c341a8099b0l,
-        0x02b10011801fel },
-      0 },
+    { { 0x3e225d5be5a2bL,0x835934f3c6ed9L,0x2626ffc6fe799L,0x216a431409262L,
+        0x050bbb4d97990L },
+      { 0x191c6e57ec63eL,0x40181dcdb2378L,0x236e0f665422cL,0x49c341a8099b0L,
+        0x02b10011801feL } },
     /* 69 */
-    { { 0x8b5c59b391593l,0xa2598270fcfc6l,0x19adcbbc385f5l,0xae0c7144f3aadl,
-        0x0dd55899983fbl },
-      { 0x88b8e74b82ff4l,0x4071e734c993bl,0x3c0322ad2e03cl,0x60419a7a9eaf4l,
-        0x0e6e4c551149dl },
-      0 },
+    { { 0x8b5c59b391593L,0xa2598270fcfc6L,0x19adcbbc385f5L,0xae0c7144f3aadL,
+        0x0dd55899983fbL },
+      { 0x88b8e74b82ff4L,0x4071e734c993bL,0x3c0322ad2e03cL,0x60419a7a9eaf4L,
+        0x0e6e4c551149dL } },
     /* 70 */
-    { { 0x655bb1e9af288l,0x64f7ada93155fl,0xb2820e5647e1al,0x56ff43697e4bcl,
-        0x051e00db107edl },
-      { 0x169b8771c327el,0x0b4a96c2ad43dl,0xdeb477929cdb2l,0x9177c07d51f53l,
-        0x0e22f42414982l },
-      0 },
+    { { 0x655bb1e9af288L,0x64f7ada93155fL,0xb2820e5647e1aL,0x56ff43697e4bcL,
+        0x051e00db107edL },
+      { 0x169b8771c327eL,0x0b4a96c2ad43dL,0xdeb477929cdb2L,0x9177c07d51f53L,
+        0x0e22f42414982L } },
     /* 71 */
-    { { 0x5e8f4635f1abbl,0xb568538874cd4l,0x5a8034d7edc0cl,0x48c9c9472c1fbl,
-        0x0f709373d52dcl },
-      { 0x966bba8af30d6l,0x4af137b69c401l,0x361c47e95bf5fl,0x5b113966162a9l,
-        0x0bd52d288e727l },
-      0 },
+    { { 0x5e8f4635f1abbL,0xb568538874cd4L,0x5a8034d7edc0cL,0x48c9c9472c1fbL,
+        0x0f709373d52dcL },
+      { 0x966bba8af30d6L,0x4af137b69c401L,0x361c47e95bf5fL,0x5b113966162a9L,
+        0x0bd52d288e727L } },
     /* 72 */
-    { { 0x55c7a9c5fa877l,0x727d3a3d48ab1l,0x3d189d817dad6l,0x77a643f43f9e7l,
-        0x0a0d0f8e4c8aal },
-      { 0xeafd8cc94f92dl,0xbe0c4ddb3a0bbl,0x82eba14d818c8l,0x6a0022cc65f8bl,
-        0x0a56c78c7946dl },
-      0 },
+    { { 0x55c7a9c5fa877L,0x727d3a3d48ab1L,0x3d189d817dad6L,0x77a643f43f9e7L,
+        0x0a0d0f8e4c8aaL },
+      { 0xeafd8cc94f92dL,0xbe0c4ddb3a0bbL,0x82eba14d818c8L,0x6a0022cc65f8bL,
+        0x0a56c78c7946dL } },
     /* 73 */
-    { { 0x2391b0dd09529l,0xa63daddfcf296l,0xb5bf481803e0el,0x367a2c77351f5l,
-        0x0d8befdf8731al },
-      { 0x19d42fc0157f4l,0xd7fec8e650ab9l,0x2d48b0af51cael,0x6478cdf9cb400l,
-        0x0854a68a5ce9fl },
-      0 },
+    { { 0x2391b0dd09529L,0xa63daddfcf296L,0xb5bf481803e0eL,0x367a2c77351f5L,
+        0x0d8befdf8731aL },
+      { 0x19d42fc0157f4L,0xd7fec8e650ab9L,0x2d48b0af51caeL,0x6478cdf9cb400L,
+        0x0854a68a5ce9fL } },
     /* 74 */
-    { { 0x5f67b63506ea5l,0x89a4fe0d66dc3l,0xe95cd4d9286c4l,0x6a953f101d3bfl,
-        0x05cacea0b9884l },
-      { 0xdf60c9ceac44dl,0xf4354d1c3aa90l,0xd5dbabe3db29al,0xefa908dd3de8al,
-        0x0e4982d1235e4l },
-      0 },
+    { { 0x5f67b63506ea5L,0x89a4fe0d66dc3L,0xe95cd4d9286c4L,0x6a953f101d3bfL,
+        0x05cacea0b9884L },
+      { 0xdf60c9ceac44dL,0xf4354d1c3aa90L,0xd5dbabe3db29aL,0xefa908dd3de8aL,
+        0x0e4982d1235e4L } },
     /* 75 */
-    { { 0x04a22c34cd55el,0xb32680d132231l,0xfa1d94358695bl,0x0499fb345afa1l,
-        0x08046b7f616b2l },
-      { 0x3581e38e7d098l,0x8df46f0b70b53l,0x4cb78c4d7f61el,0xaf5530dea9ea4l,
-        0x0eb17ca7b9082l },
-      0 },
+    { { 0x04a22c34cd55eL,0xb32680d132231L,0xfa1d94358695bL,0x0499fb345afa1L,
+        0x08046b7f616b2L },
+      { 0x3581e38e7d098L,0x8df46f0b70b53L,0x4cb78c4d7f61eL,0xaf5530dea9ea4L,
+        0x0eb17ca7b9082L } },
     /* 76 */
-    { { 0x1b59876a145b9l,0x0fc1bc71ec175l,0x92715bba5cf6bl,0xe131d3e035653l,
-        0x0097b00bafab5l },
-      { 0x6c8e9565f69e1l,0x5ab5be5199aa6l,0xa4fd98477e8f7l,0xcc9e6033ba11dl,
-        0x0f95c747bafdbl },
-      0 },
+    { { 0x1b59876a145b9L,0x0fc1bc71ec175L,0x92715bba5cf6bL,0xe131d3e035653L,
+        0x0097b00bafab5L },
+      { 0x6c8e9565f69e1L,0x5ab5be5199aa6L,0xa4fd98477e8f7L,0xcc9e6033ba11dL,
+        0x0f95c747bafdbL } },
     /* 77 */
-    { { 0xf01d3bebae45el,0xf0c4bc6955558l,0xbc64fc6a8ebe9l,0xd837aeb705b1dl,
-        0x03512601e566el },
-      { 0x6f1e1fa1161cdl,0xd54c65ef87933l,0x24f21e5328ab8l,0xab6b4757eee27l,
-        0x00ef971236068l },
-      0 },
+    { { 0xf01d3bebae45eL,0xf0c4bc6955558L,0xbc64fc6a8ebe9L,0xd837aeb705b1dL,
+        0x03512601e566eL },
+      { 0x6f1e1fa1161cdL,0xd54c65ef87933L,0x24f21e5328ab8L,0xab6b4757eee27L,
+        0x00ef971236068L } },
     /* 78 */
-    { { 0x98cf754ca4226l,0x38f8642c8e025l,0x68e17905eede1l,0xbc9548963f744l,
-        0x0fc16d9333b4fl },
-      { 0x6fb31e7c800cal,0x312678adaabe9l,0xff3e8b5138063l,0x7a173d6244976l,
-        0x014ca4af1b95dl },
-      0 },
+    { { 0x98cf754ca4226L,0x38f8642c8e025L,0x68e17905eede1L,0xbc9548963f744L,
+        0x0fc16d9333b4fL },
+      { 0x6fb31e7c800caL,0x312678adaabe9L,0xff3e8b5138063L,0x7a173d6244976L,
+        0x014ca4af1b95dL } },
     /* 79 */
-    { { 0x771babd2f81d5l,0x6901f7d1967a4l,0xad9c9071a5f9dl,0x231dd898bef7cl,
-        0x04057b063f59cl },
-      { 0xd82fe89c05c0al,0x6f1dc0df85bffl,0x35a16dbe4911cl,0x0b133befccaeal,
-        0x01c3b5d64f133l },
-      0 },
+    { { 0x771babd2f81d5L,0x6901f7d1967a4L,0xad9c9071a5f9dL,0x231dd898bef7cL,
+        0x04057b063f59cL },
+      { 0xd82fe89c05c0aL,0x6f1dc0df85bffL,0x35a16dbe4911cL,0x0b133befccaeaL,
+        0x01c3b5d64f133L } },
     /* 80 */
-    { { 0x14bfe80ec21fel,0x6ac255be825fel,0xf4a5d67f6ce11l,0x63af98bc5a072l,
-        0x0fad27148db7el },
-      { 0x0b6ac29ab05b3l,0x3c4e251ae690cl,0x2aade7d37a9a8l,0x1a840a7dc875cl,
-        0x077387de39f0el },
-      0 },
+    { { 0x14bfe80ec21feL,0x6ac255be825feL,0xf4a5d67f6ce11L,0x63af98bc5a072L,
+        0x0fad27148db7eL },
+      { 0x0b6ac29ab05b3L,0x3c4e251ae690cL,0x2aade7d37a9a8L,0x1a840a7dc875cL,
+        0x077387de39f0eL } },
     /* 81 */
-    { { 0xecc49a56c0dd7l,0xd846086c741e9l,0x505aecea5cffcl,0xc47e8f7a1408fl,
-        0x0b37b85c0bef0l },
-      { 0x6b6e4cc0e6a8fl,0xbf6b388f23359l,0x39cef4efd6d4bl,0x28d5aba453facl,
-        0x09c135ac8f9f6l },
-      0 },
+    { { 0xecc49a56c0dd7L,0xd846086c741e9L,0x505aecea5cffcL,0xc47e8f7a1408fL,
+        0x0b37b85c0bef0L },
+      { 0x6b6e4cc0e6a8fL,0xbf6b388f23359L,0x39cef4efd6d4bL,0x28d5aba453facL,
+        0x09c135ac8f9f6L } },
     /* 82 */
-    { { 0xa320284e35743l,0xb185a3cdef32al,0xdf19819320d6al,0x851fb821b1761l,
-        0x05721361fc433l },
-      { 0xdb36a71fc9168l,0x735e5c403c1f0l,0x7bcd8f55f98bal,0x11bdf64ca87e3l,
-        0x0dcbac3c9e6bbl },
-      0 },
+    { { 0xa320284e35743L,0xb185a3cdef32aL,0xdf19819320d6aL,0x851fb821b1761L,
+        0x05721361fc433L },
+      { 0xdb36a71fc9168L,0x735e5c403c1f0L,0x7bcd8f55f98baL,0x11bdf64ca87e3L,
+        0x0dcbac3c9e6bbL } },
     /* 83 */
-    { { 0xd99684518cbe2l,0x189c9eb04ef01l,0x47feebfd242fcl,0x6862727663c7el,
-        0x0b8c1c89e2d62l },
-      { 0x58bddc8e1d569l,0xc8b7d88cd051al,0x11f31eb563809l,0x22d426c27fd9fl,
-        0x05d23bbda2f94l },
-      0 },
+    { { 0xd99684518cbe2L,0x189c9eb04ef01L,0x47feebfd242fcL,0x6862727663c7eL,
+        0x0b8c1c89e2d62L },
+      { 0x58bddc8e1d569L,0xc8b7d88cd051aL,0x11f31eb563809L,0x22d426c27fd9fL,
+        0x05d23bbda2f94L } },
     /* 84 */
-    { { 0xc729495c8f8bel,0x803bf362bf0a1l,0xf63d4ac2961c4l,0xe9009e418403dl,
-        0x0c109f9cb91ecl },
-      { 0x095d058945705l,0x96ddeb85c0c2dl,0xa40449bb9083dl,0x1ee184692b8d7l,
-        0x09bc3344f2eeel },
-      0 },
+    { { 0xc729495c8f8beL,0x803bf362bf0a1L,0xf63d4ac2961c4L,0xe9009e418403dL,
+        0x0c109f9cb91ecL },
+      { 0x095d058945705L,0x96ddeb85c0c2dL,0xa40449bb9083dL,0x1ee184692b8d7L,
+        0x09bc3344f2eeeL } },
     /* 85 */
-    { { 0xae35642913074l,0x2748a542b10d5l,0x310732a55491bl,0x4cc1469ca665bl,
-        0x029591d525f1al },
-      { 0xf5b6bb84f983fl,0x419f5f84e1e76l,0x0baa189be7eefl,0x332c1200d4968l,
-        0x06376551f18efl },
-      0 },
+    { { 0xae35642913074L,0x2748a542b10d5L,0x310732a55491bL,0x4cc1469ca665bL,
+        0x029591d525f1aL },
+      { 0xf5b6bb84f983fL,0x419f5f84e1e76L,0x0baa189be7eefL,0x332c1200d4968L,
+        0x06376551f18efL } },
     /* 86 */
-    { { 0x5f14e562976ccl,0xe60ef12c38bdal,0xcca985222bca3l,0x987abbfa30646l,
-        0x0bdb79dc808e2l },
-      { 0xcb5c9cb06a772l,0xaafe536dcefd2l,0xc2b5db838f475l,0xc14ac2a3e0227l,
-        0x08ee86001add3l },
-      0 },
+    { { 0x5f14e562976ccL,0xe60ef12c38bdaL,0xcca985222bca3L,0x987abbfa30646L,
+        0x0bdb79dc808e2L },
+      { 0xcb5c9cb06a772L,0xaafe536dcefd2L,0xc2b5db838f475L,0xc14ac2a3e0227L,
+        0x08ee86001add3L } },
     /* 87 */
-    { { 0x96981a4ade873l,0x4dc4fba48ccbel,0xa054ba57ee9aal,0xaa4b2cee28995l,
-        0x092e51d7a6f77l },
-      { 0xbafa87190a34dl,0x5bf6bd1ed1948l,0xcaf1144d698f7l,0xaaaad00ee6e30l,
-        0x05182f86f0a56l },
-      0 },
+    { { 0x96981a4ade873L,0x4dc4fba48ccbeL,0xa054ba57ee9aaL,0xaa4b2cee28995L,
+        0x092e51d7a6f77L },
+      { 0xbafa87190a34dL,0x5bf6bd1ed1948L,0xcaf1144d698f7L,0xaaaad00ee6e30L,
+        0x05182f86f0a56L } },
     /* 88 */
-    { { 0x6212c7a4cc99cl,0x683e6d9ca1fbal,0xac98c5aff609bl,0xa6f25dbb27cb5l,
-        0x091dcab5d4073l },
-      { 0x6cc3d5f575a70l,0x396f8d87fa01bl,0x99817360cb361l,0x4f2b165d4e8c8l,
-        0x017a0cedb9797l },
-      0 },
+    { { 0x6212c7a4cc99cL,0x683e6d9ca1fbaL,0xac98c5aff609bL,0xa6f25dbb27cb5L,
+        0x091dcab5d4073L },
+      { 0x6cc3d5f575a70L,0x396f8d87fa01bL,0x99817360cb361L,0x4f2b165d4e8c8L,
+        0x017a0cedb9797L } },
     /* 89 */
-    { { 0x61e2a076c8d3al,0x39210f924b388l,0x3a835d9701aadl,0xdf4194d0eae41l,
-        0x02e8ce36c7f4cl },
-      { 0x73dab037a862bl,0xb760e4c8fa912l,0x3baf2dd01ba9bl,0x68f3f96453883l,
-        0x0f4ccc6cb34f6l },
-      0 },
+    { { 0x61e2a076c8d3aL,0x39210f924b388L,0x3a835d9701aadL,0xdf4194d0eae41L,
+        0x02e8ce36c7f4cL },
+      { 0x73dab037a862bL,0xb760e4c8fa912L,0x3baf2dd01ba9bL,0x68f3f96453883L,
+        0x0f4ccc6cb34f6L } },
     /* 90 */
-    { { 0xf525cf1f79687l,0x9592efa81544el,0x5c78d297c5954l,0xf3c9e1231741al,
-        0x0ac0db4889a0dl },
-      { 0xfc711df01747fl,0x58ef17df1386bl,0xccb6bb5592b93l,0x74a2e5880e4f5l,
-        0x095a64a6194c9l },
-      0 },
+    { { 0xf525cf1f79687L,0x9592efa81544eL,0x5c78d297c5954L,0xf3c9e1231741aL,
+        0x0ac0db4889a0dL },
+      { 0xfc711df01747fL,0x58ef17df1386bL,0xccb6bb5592b93L,0x74a2e5880e4f5L,
+        0x095a64a6194c9L } },
     /* 91 */
-    { { 0x1efdac15a4c93l,0x738258514172cl,0x6cb0bad40269bl,0x06776a8dfb1c1l,
-        0x0231e54ba2921l },
-      { 0xdf9178ae6d2dcl,0x3f39112918a70l,0xe5b72234d6aa6l,0x31e1f627726b5l,
-        0x0ab0be032d8a7l },
-      0 },
+    { { 0x1efdac15a4c93L,0x738258514172cL,0x6cb0bad40269bL,0x06776a8dfb1c1L,
+        0x0231e54ba2921L },
+      { 0xdf9178ae6d2dcL,0x3f39112918a70L,0xe5b72234d6aa6L,0x31e1f627726b5L,
+        0x0ab0be032d8a7L } },
     /* 92 */
-    { { 0xad0e98d131f2dl,0xe33b04f101097l,0x5e9a748637f09l,0xa6791ac86196dl,
-        0x0f1bcc8802cf6l },
-      { 0x69140e8daacb4l,0x5560f6500925cl,0x77937a63c4e40l,0xb271591cc8fc4l,
-        0x0851694695aebl },
-      0 },
+    { { 0xad0e98d131f2dL,0xe33b04f101097L,0x5e9a748637f09L,0xa6791ac86196dL,
+        0x0f1bcc8802cf6L },
+      { 0x69140e8daacb4L,0x5560f6500925cL,0x77937a63c4e40L,0xb271591cc8fc4L,
+        0x0851694695aebL } },
     /* 93 */
-    { { 0x5c143f1dcf593l,0x29b018be3bde3l,0xbdd9d3d78202bl,0x55d8e9cdadc29l,
-        0x08f67d9d2daadl },
-      { 0x116567481ea5fl,0xe9e34c590c841l,0x5053fa8e7d2ddl,0x8b5dffdd43f40l,
-        0x0f84572b9c072l },
-      0 },
+    { { 0x5c143f1dcf593L,0x29b018be3bde3L,0xbdd9d3d78202bL,0x55d8e9cdadc29L,
+        0x08f67d9d2daadL },
+      { 0x116567481ea5fL,0xe9e34c590c841L,0x5053fa8e7d2ddL,0x8b5dffdd43f40L,
+        0x0f84572b9c072L } },
     /* 94 */
-    { { 0xa7a7197af71c9l,0x447a7365655e1l,0xe1d5063a14494l,0x2c19a1b4ae070l,
-        0x0edee2710616bl },
-      { 0x034f511734121l,0x554a25e9f0b2fl,0x40c2ecf1cac6el,0xd7f48dc148f3al,
-        0x09fd27e9b44ebl },
-      0 },
+    { { 0xa7a7197af71c9L,0x447a7365655e1L,0xe1d5063a14494L,0x2c19a1b4ae070L,
+        0x0edee2710616bL },
+      { 0x034f511734121L,0x554a25e9f0b2fL,0x40c2ecf1cac6eL,0xd7f48dc148f3aL,
+        0x09fd27e9b44ebL } },
     /* 95 */
-    { { 0x7658af6e2cb16l,0x2cfe5919b63ccl,0x68d5583e3eb7dl,0xf3875a8c58161l,
-        0x0a40c2fb6958fl },
-      { 0xec560fedcc158l,0xc655f230568c9l,0xa307e127ad804l,0xdecfd93967049l,
-        0x099bc9bb87dc6l },
-      0 },
+    { { 0x7658af6e2cb16L,0x2cfe5919b63ccL,0x68d5583e3eb7dL,0xf3875a8c58161L,
+        0x0a40c2fb6958fL },
+      { 0xec560fedcc158L,0xc655f230568c9L,0xa307e127ad804L,0xdecfd93967049L,
+        0x099bc9bb87dc6L } },
     /* 96 */
-    { { 0x9521d927dafc6l,0x695c09cd1984al,0x9366dde52c1fbl,0x7e649d9581a0fl,
-        0x09abe210ba16dl },
-      { 0xaf84a48915220l,0x6a4dd816c6480l,0x681ca5afa7317l,0x44b0c7d539871l,
-        0x07881c25787f3l },
-      0 },
+    { { 0x9521d927dafc6L,0x695c09cd1984aL,0x9366dde52c1fbL,0x7e649d9581a0fL,
+        0x09abe210ba16dL },
+      { 0xaf84a48915220L,0x6a4dd816c6480L,0x681ca5afa7317L,0x44b0c7d539871L,
+        0x07881c25787f3L } },
     /* 97 */
-    { { 0x99b51e0bcf3ffl,0xc5127f74f6933l,0xd01d9680d02cbl,0x89408fb465a2dl,
-        0x015e6e319a30el },
-      { 0xd6e0d3e0e05f4l,0xdc43588404646l,0x4f850d3fad7bdl,0x72cebe61c7d1cl,
-        0x00e55facf1911l },
-      0 },
+    { { 0x99b51e0bcf3ffL,0xc5127f74f6933L,0xd01d9680d02cbL,0x89408fb465a2dL,
+        0x015e6e319a30eL },
+      { 0xd6e0d3e0e05f4L,0xdc43588404646L,0x4f850d3fad7bdL,0x72cebe61c7d1cL,
+        0x00e55facf1911L } },
     /* 98 */
-    { { 0xd9806f8787564l,0x2131e85ce67e9l,0x819e8d61a3317l,0x65776b0158cabl,
-        0x0d73d09766fe9l },
-      { 0x834251eb7206el,0x0fc618bb42424l,0xe30a520a51929l,0xa50b5dcbb8595l,
-        0x09250a3748f15l },
-      0 },
+    { { 0xd9806f8787564L,0x2131e85ce67e9L,0x819e8d61a3317L,0x65776b0158cabL,
+        0x0d73d09766fe9L },
+      { 0x834251eb7206eL,0x0fc618bb42424L,0xe30a520a51929L,0xa50b5dcbb8595L,
+        0x09250a3748f15L } },
     /* 99 */
-    { { 0xf08f8be577410l,0x035077a8c6cafl,0xc0a63a4fd408al,0x8c0bf1f63289el,
-        0x077414082c1ccl },
-      { 0x40fa6eb0991cdl,0x6649fdc29605al,0x324fd40c1ca08l,0x20b93a68a3c7bl,
-        0x08cb04f4d12ebl },
-      0 },
+    { { 0xf08f8be577410L,0x035077a8c6cafL,0xc0a63a4fd408aL,0x8c0bf1f63289eL,
+        0x077414082c1ccL },
+      { 0x40fa6eb0991cdL,0x6649fdc29605aL,0x324fd40c1ca08L,0x20b93a68a3c7bL,
+        0x08cb04f4d12ebL } },
     /* 100 */
-    { { 0x2d0556906171cl,0xcdb0240c3fb1cl,0x89068419073e9l,0x3b51db8e6b4fdl,
-        0x0e4e429ef4712l },
-      { 0xdd53c38ec36f4l,0x01ff4b6a270b8l,0x79a9a48f9d2dcl,0x65525d066e078l,
-        0x037bca2ff3c6el },
-      0 },
+    { { 0x2d0556906171cL,0xcdb0240c3fb1cL,0x89068419073e9L,0x3b51db8e6b4fdL,
+        0x0e4e429ef4712L },
+      { 0xdd53c38ec36f4L,0x01ff4b6a270b8L,0x79a9a48f9d2dcL,0x65525d066e078L,
+        0x037bca2ff3c6eL } },
     /* 101 */
-    { { 0x2e3c7df562470l,0xa2c0964ac94cdl,0x0c793be44f272l,0xb22a7c6d5df98l,
-        0x059913edc3002l },
-      { 0x39a835750592al,0x80e783de027a1l,0xa05d64f99e01dl,0xe226cf8c0375el,
-        0x043786e4ab013l },
-      0 },
+    { { 0x2e3c7df562470L,0xa2c0964ac94cdL,0x0c793be44f272L,0xb22a7c6d5df98L,
+        0x059913edc3002L },
+      { 0x39a835750592aL,0x80e783de027a1L,0xa05d64f99e01dL,0xe226cf8c0375eL,
+        0x043786e4ab013L } },
     /* 102 */
-    { { 0x2b0ed9e56b5a6l,0xa6d9fc68f9ff3l,0x97846a70750d9l,0x9e7aec15e8455l,
-        0x08638ca98b7e7l },
-      { 0xae0960afc24b2l,0xaf4dace8f22f5l,0xecba78f05398el,0xa6f03b765dd0al,
-        0x01ecdd36a7b3al },
-      0 },
+    { { 0x2b0ed9e56b5a6L,0xa6d9fc68f9ff3L,0x97846a70750d9L,0x9e7aec15e8455L,
+        0x08638ca98b7e7L },
+      { 0xae0960afc24b2L,0xaf4dace8f22f5L,0xecba78f05398eL,0xa6f03b765dd0aL,
+        0x01ecdd36a7b3aL } },
     /* 103 */
-    { { 0xacd626c5ff2f3l,0xc02873a9785d3l,0x2110d54a2d516l,0xf32dad94c9fadl,
-        0x0d85d0f85d459l },
-      { 0x00b8d10b11da3l,0x30a78318c49f7l,0x208decdd2c22cl,0x3c62556988f49l,
-        0x0a04f19c3b4edl },
-      0 },
+    { { 0xacd626c5ff2f3L,0xc02873a9785d3L,0x2110d54a2d516L,0xf32dad94c9fadL,
+        0x0d85d0f85d459L },
+      { 0x00b8d10b11da3L,0x30a78318c49f7L,0x208decdd2c22cL,0x3c62556988f49L,
+        0x0a04f19c3b4edL } },
     /* 104 */
-    { { 0x924c8ed7f93bdl,0x5d392f51f6087l,0x21b71afcb64acl,0x50b07cae330a8l,
-        0x092b2eeea5c09l },
-      { 0xc4c9485b6e235l,0xa92936c0f085al,0x0508891ab2ca4l,0x276c80faa6b3el,
-        0x01ee782215834l },
-      0 },
+    { { 0x924c8ed7f93bdL,0x5d392f51f6087L,0x21b71afcb64acL,0x50b07cae330a8L,
+        0x092b2eeea5c09L },
+      { 0xc4c9485b6e235L,0xa92936c0f085aL,0x0508891ab2ca4L,0x276c80faa6b3eL,
+        0x01ee782215834L } },
     /* 105 */
-    { { 0xa2e00e63e79f7l,0xb2f399d906a60l,0x607c09df590e7l,0xe1509021054a6l,
-        0x0f3f2ced857a6l },
-      { 0x510f3f10d9b55l,0xacd8642648200l,0x8bd0e7c9d2fcfl,0xe210e5631aa7el,
-        0x00f56a4543da3l },
-      0 },
+    { { 0xa2e00e63e79f7L,0xb2f399d906a60L,0x607c09df590e7L,0xe1509021054a6L,
+        0x0f3f2ced857a6L },
+      { 0x510f3f10d9b55L,0xacd8642648200L,0x8bd0e7c9d2fcfL,0xe210e5631aa7eL,
+        0x00f56a4543da3L } },
     /* 106 */
-    { { 0x1bffa1043e0dfl,0xcc9c007e6d5b2l,0x4a8517a6c74b6l,0xe2631a656ec0dl,
-        0x0bd8f17411969l },
-      { 0xbbb86beb7494al,0x6f45f3b8388a9l,0x4e5a79a1567d4l,0xfa09df7a12a7al,
-        0x02d1a1c3530ccl },
-      0 },
+    { { 0x1bffa1043e0dfL,0xcc9c007e6d5b2L,0x4a8517a6c74b6L,0xe2631a656ec0dL,
+        0x0bd8f17411969L },
+      { 0xbbb86beb7494aL,0x6f45f3b8388a9L,0x4e5a79a1567d4L,0xfa09df7a12a7aL,
+        0x02d1a1c3530ccL } },
     /* 107 */
-    { { 0xe3813506508dal,0xc4a1d795a7192l,0xa9944b3336180l,0xba46cddb59497l,
-        0x0a107a65eb91fl },
-      { 0x1d1c50f94d639l,0x758a58b7d7e6dl,0xd37ca1c8b4af3l,0x9af21a7c5584bl,
-        0x0183d760af87al },
-      0 },
+    { { 0xe3813506508daL,0xc4a1d795a7192L,0xa9944b3336180L,0xba46cddb59497L,
+        0x0a107a65eb91fL },
+      { 0x1d1c50f94d639L,0x758a58b7d7e6dL,0xd37ca1c8b4af3L,0x9af21a7c5584bL,
+        0x0183d760af87aL } },
     /* 108 */
-    { { 0x697110dde59a4l,0x070e8bef8729dl,0xf2ebe78f1ad8dl,0xd754229b49634l,
-        0x01d44179dc269l },
-      { 0xdc0cf8390d30el,0x530de8110cb32l,0xbc0339a0a3b27l,0xd26231af1dc52l,
-        0x0771f9cc29606l },
-      0 },
+    { { 0x697110dde59a4L,0x070e8bef8729dL,0xf2ebe78f1ad8dL,0xd754229b49634L,
+        0x01d44179dc269L },
+      { 0xdc0cf8390d30eL,0x530de8110cb32L,0xbc0339a0a3b27L,0xd26231af1dc52L,
+        0x0771f9cc29606L } },
     /* 109 */
-    { { 0x93e7785040739l,0xb98026a939999l,0x5f8fc2644539dl,0x718ecf40f6f2fl,
-        0x064427a310362l },
-      { 0xf2d8785428aa8l,0x3febfb49a84f4l,0x23d01ac7b7adcl,0x0d6d201b2c6dfl,
-        0x049d9b7496ae9l },
-      0 },
+    { { 0x93e7785040739L,0xb98026a939999L,0x5f8fc2644539dL,0x718ecf40f6f2fL,
+        0x064427a310362L },
+      { 0xf2d8785428aa8L,0x3febfb49a84f4L,0x23d01ac7b7adcL,0x0d6d201b2c6dfL,
+        0x049d9b7496ae9L } },
     /* 110 */
-    { { 0x8d8bc435d1099l,0x4e8e8d1a08cc7l,0xcb68a412adbcdl,0x544502c2e2a02l,
-        0x09037d81b3f60l },
-      { 0xbac27074c7b61l,0xab57bfd72e7cdl,0x96d5352fe2031l,0x639c61ccec965l,
-        0x008c3de6a7cc0l },
-      0 },
+    { { 0x8d8bc435d1099L,0x4e8e8d1a08cc7L,0xcb68a412adbcdL,0x544502c2e2a02L,
+        0x09037d81b3f60L },
+      { 0xbac27074c7b61L,0xab57bfd72e7cdL,0x96d5352fe2031L,0x639c61ccec965L,
+        0x008c3de6a7cc0L } },
     /* 111 */
-    { { 0xdd020f6d552abl,0x9805cd81f120fl,0x135129156baffl,0x6b2f06fb7c3e9l,
-        0x0c69094424579l },
-      { 0x3ae9c41231bd1l,0x875cc5820517bl,0x9d6a1221eac6el,0x3ac0208837abfl,
-        0x03fa3db02cafel },
-      0 },
+    { { 0xdd020f6d552abL,0x9805cd81f120fL,0x135129156baffL,0x6b2f06fb7c3e9L,
+        0x0c69094424579L },
+      { 0x3ae9c41231bd1L,0x875cc5820517bL,0x9d6a1221eac6eL,0x3ac0208837abfL,
+        0x03fa3db02cafeL } },
     /* 112 */
-    { { 0xa3e6505058880l,0xef643943f2d75l,0xab249257da365l,0x08ff4147861cfl,
-        0x0c5c4bdb0fdb8l },
-      { 0x13e34b272b56bl,0x9511b9043a735l,0x8844969c8327el,0xb6b5fd8ce37dfl,
-        0x02d56db9446c2l },
-      0 },
+    { { 0xa3e6505058880L,0xef643943f2d75L,0xab249257da365L,0x08ff4147861cfL,
+        0x0c5c4bdb0fdb8L },
+      { 0x13e34b272b56bL,0x9511b9043a735L,0x8844969c8327eL,0xb6b5fd8ce37dfL,
+        0x02d56db9446c2L } },
     /* 113 */
-    { { 0x1782fff46ac6bl,0x2607a2e425246l,0x9a48de1d19f79l,0xba42fafea3c40l,
-        0x00f56bd9de503l },
-      { 0xd4ed1345cda49l,0xfc816f299d137l,0xeb43402821158l,0xb5f1e7c6a54aal,
-        0x04003bb9d1173l },
-      0 },
+    { { 0x1782fff46ac6bL,0x2607a2e425246L,0x9a48de1d19f79L,0xba42fafea3c40L,
+        0x00f56bd9de503L },
+      { 0xd4ed1345cda49L,0xfc816f299d137L,0xeb43402821158L,0xb5f1e7c6a54aaL,
+        0x04003bb9d1173L } },
     /* 114 */
-    { { 0xe8189a0803387l,0xf539cbd4043b8l,0x2877f21ece115l,0x2f9e4297208ddl,
-        0x053765522a07fl },
-      { 0x80a21a8a4182dl,0x7a3219df79a49l,0xa19a2d4a2bbd0l,0x4549674d0a2e1l,
-        0x07a056f586c5dl },
-      0 },
+    { { 0xe8189a0803387L,0xf539cbd4043b8L,0x2877f21ece115L,0x2f9e4297208ddL,
+        0x053765522a07fL },
+      { 0x80a21a8a4182dL,0x7a3219df79a49L,0xa19a2d4a2bbd0L,0x4549674d0a2e1L,
+        0x07a056f586c5dL } },
     /* 115 */
-    { { 0xb25589d8a2a47l,0x48c3df2773646l,0xbf0d5395b5829l,0x267551ec000eal,
-        0x077d482f17a1al },
-      { 0x1bd9587853948l,0xbd6cfbffeeb8al,0x0681e47a6f817l,0xb0e4ab6ec0578l,
-        0x04115012b2b38l },
-      0 },
+    { { 0xb25589d8a2a47L,0x48c3df2773646L,0xbf0d5395b5829L,0x267551ec000eaL,
+        0x077d482f17a1aL },
+      { 0x1bd9587853948L,0xbd6cfbffeeb8aL,0x0681e47a6f817L,0xb0e4ab6ec0578L,
+        0x04115012b2b38L } },
     /* 116 */
-    { { 0x3f0f46de28cedl,0x609b13ec473c7l,0xe5c63921d5da7l,0x094661b8ce9e6l,
-        0x0cdf04572fbeal },
-      { 0x3c58b6c53c3b0l,0x10447b843c1cbl,0xcb9780e97fe3cl,0x3109fb2b8ae12l,
-        0x0ee703dda9738l },
-      0 },
+    { { 0x3f0f46de28cedL,0x609b13ec473c7L,0xe5c63921d5da7L,0x094661b8ce9e6L,
+        0x0cdf04572fbeaL },
+      { 0x3c58b6c53c3b0L,0x10447b843c1cbL,0xcb9780e97fe3cL,0x3109fb2b8ae12L,
+        0x0ee703dda9738L } },
     /* 117 */
-    { { 0x15140ff57e43al,0xd3b1b811b8345l,0xf42b986d44660l,0xce212b3b5dff8l,
-        0x02a0ad89da162l },
-      { 0x4a6946bc277bal,0x54c141c27664el,0xabf6274c788c9l,0x4659141aa64ccl,
-        0x0d62d0b67ac2bl },
-      0 },
+    { { 0x15140ff57e43aL,0xd3b1b811b8345L,0xf42b986d44660L,0xce212b3b5dff8L,
+        0x02a0ad89da162L },
+      { 0x4a6946bc277baL,0x54c141c27664eL,0xabf6274c788c9L,0x4659141aa64ccL,
+        0x0d62d0b67ac2bL } },
     /* 118 */
-    { { 0x5d87b2c054ac4l,0x59f27df78839cl,0x18128d6570058l,0x2426edf7cbf3bl,
-        0x0b39a23f2991cl },
-      { 0x84a15f0b16ae5l,0xb1a136f51b952l,0x27007830c6a05l,0x4cc51d63c137fl,
-        0x004ed0092c067l },
-      0 },
+    { { 0x5d87b2c054ac4L,0x59f27df78839cL,0x18128d6570058L,0x2426edf7cbf3bL,
+        0x0b39a23f2991cL },
+      { 0x84a15f0b16ae5L,0xb1a136f51b952L,0x27007830c6a05L,0x4cc51d63c137fL,
+        0x004ed0092c067L } },
     /* 119 */
-    { { 0x185d19ae90393l,0x294a3d64e61f4l,0x854fc143047b4l,0xc387ae0001a69l,
-        0x0a0a91fc10177l },
-      { 0xa3f01ae2c831el,0x822b727e16ff0l,0xa3075b4bb76ael,0x0c418f12c8a15l,
-        0x0084cf9889ed2l },
-      0 },
+    { { 0x185d19ae90393L,0x294a3d64e61f4L,0x854fc143047b4L,0xc387ae0001a69L,
+        0x0a0a91fc10177L },
+      { 0xa3f01ae2c831eL,0x822b727e16ff0L,0xa3075b4bb76aeL,0x0c418f12c8a15L,
+        0x0084cf9889ed2L } },
     /* 120 */
-    { { 0x509defca6becfl,0x807dffb328d98l,0x778e8b92fceael,0xf77e5d8a15c44l,
-        0x0d57955b273abl },
-      { 0xda79e31b5d4f1l,0x4b3cfa7a1c210l,0xc27c20baa52f0l,0x41f1d4d12089dl,
-        0x08e14ea4202d1l },
-      0 },
+    { { 0x509defca6becfL,0x807dffb328d98L,0x778e8b92fceaeL,0xf77e5d8a15c44L,
+        0x0d57955b273abL },
+      { 0xda79e31b5d4f1L,0x4b3cfa7a1c210L,0xc27c20baa52f0L,0x41f1d4d12089dL,
+        0x08e14ea4202d1L } },
     /* 121 */
-    { { 0x50345f2897042l,0x1f43402c4aeedl,0x8bdfb218d0533l,0xd158c8d9c194cl,
-        0x0597e1a372aa4l },
-      { 0x7ec1acf0bd68cl,0xdcab024945032l,0x9fe3e846d4be0l,0x4dea5b9c8d7acl,
-        0x0ca3f0236199bl },
-      0 },
+    { { 0x50345f2897042L,0x1f43402c4aeedL,0x8bdfb218d0533L,0xd158c8d9c194cL,
+        0x0597e1a372aa4L },
+      { 0x7ec1acf0bd68cL,0xdcab024945032L,0x9fe3e846d4be0L,0x4dea5b9c8d7acL,
+        0x0ca3f0236199bL } },
     /* 122 */
-    { { 0xa10b56170bd20l,0xf16d3f5de7592l,0x4b2ade20ea897l,0x07e4a3363ff14l,
-        0x0bde7fd7e309cl },
-      { 0xbb6d2b8f5432cl,0xcbe043444b516l,0x8f95b5a210dc1l,0xd1983db01e6ffl,
-        0x0b623ad0e0a7dl },
-      0 },
+    { { 0xa10b56170bd20L,0xf16d3f5de7592L,0x4b2ade20ea897L,0x07e4a3363ff14L,
+        0x0bde7fd7e309cL },
+      { 0xbb6d2b8f5432cL,0xcbe043444b516L,0x8f95b5a210dc1L,0xd1983db01e6ffL,
+        0x0b623ad0e0a7dL } },
     /* 123 */
-    { { 0xbd67560c7b65bl,0x9023a4a289a75l,0x7b26795ab8c55l,0x137bf8220fd0dl,
-        0x0d6aa2e4658ecl },
-      { 0xbc00b5138bb85l,0x21d833a95c10al,0x702a32e8c31d1l,0x513ab24ff00b1l,
-        0x0111662e02dccl },
-      0 },
+    { { 0xbd67560c7b65bL,0x9023a4a289a75L,0x7b26795ab8c55L,0x137bf8220fd0dL,
+        0x0d6aa2e4658ecL },
+      { 0xbc00b5138bb85L,0x21d833a95c10aL,0x702a32e8c31d1L,0x513ab24ff00b1L,
+        0x0111662e02dccL } },
     /* 124 */
-    { { 0x14015efb42b87l,0x701b6c4dff781l,0x7d7c129bd9f5dl,0x50f866ecccd7al,
-        0x0db3ee1cb94b7l },
-      { 0xf3db0f34837cfl,0x8bb9578d4fb26l,0xc56657de7eed1l,0x6a595d2cdf937l,
-        0x0886a64425220l },
-      0 },
+    { { 0x14015efb42b87L,0x701b6c4dff781L,0x7d7c129bd9f5dL,0x50f866ecccd7aL,
+        0x0db3ee1cb94b7L },
+      { 0xf3db0f34837cfL,0x8bb9578d4fb26L,0xc56657de7eed1L,0x6a595d2cdf937L,
+        0x0886a64425220L } },
     /* 125 */
-    { { 0x34cfb65b569eal,0x41f72119c13c2l,0x15a619e200111l,0x17bc8badc85dal,
-        0x0a70cf4eb018al },
-      { 0xf97ae8c4a6a65l,0x270134378f224l,0xf7e096036e5cfl,0x7b77be3a609e4l,
-        0x0aa4772abd174l },
-      0 },
+    { { 0x34cfb65b569eaL,0x41f72119c13c2L,0x15a619e200111L,0x17bc8badc85daL,
+        0x0a70cf4eb018aL },
+      { 0xf97ae8c4a6a65L,0x270134378f224L,0xf7e096036e5cfL,0x7b77be3a609e4L,
+        0x0aa4772abd174L } },
     /* 126 */
-    { { 0x761317aa60cc0l,0x610368115f676l,0xbc1bb5ac79163l,0xf974ded98bb4bl,
-        0x0611a6ddc30fal },
-      { 0x78cbcc15ee47al,0x824e0d96a530el,0xdd9ed882e8962l,0x9c8836f35adf3l,
-        0x05cfffaf81642l },
-      0 },
+    { { 0x761317aa60cc0L,0x610368115f676L,0xbc1bb5ac79163L,0xf974ded98bb4bL,
+        0x0611a6ddc30faL },
+      { 0x78cbcc15ee47aL,0x824e0d96a530eL,0xdd9ed882e8962L,0x9c8836f35adf3L,
+        0x05cfffaf81642L } },
     /* 127 */
-    { { 0x54cff9b7a99cdl,0x9d843c45a1c0dl,0x2c739e17bf3b9l,0x994c038a908f6l,
-        0x06e5a6b237dc1l },
-      { 0xb454e0ba5db77l,0x7facf60d63ef8l,0x6608378b7b880l,0xabcce591c0c67l,
-        0x0481a238d242dl },
-      0 },
+    { { 0x54cff9b7a99cdL,0x9d843c45a1c0dL,0x2c739e17bf3b9L,0x994c038a908f6L,
+        0x06e5a6b237dc1L },
+      { 0xb454e0ba5db77L,0x7facf60d63ef8L,0x6608378b7b880L,0xabcce591c0c67L,
+        0x0481a238d242dL } },
     /* 128 */
-    { { 0x17bc035d0b34al,0x6b8327c0a7e34l,0xc0362d1440b38l,0xf9438fb7262dal,
-        0x02c41114ce0cdl },
-      { 0x5cef1ad95a0b1l,0xa867d543622bal,0x1e486c9c09b37l,0x929726d6cdd20l,
-        0x020477abf42ffl },
-      0 },
+    { { 0x17bc035d0b34aL,0x6b8327c0a7e34L,0xc0362d1440b38L,0xf9438fb7262daL,
+        0x02c41114ce0cdL },
+      { 0x5cef1ad95a0b1L,0xa867d543622baL,0x1e486c9c09b37L,0x929726d6cdd20L,
+        0x020477abf42ffL } },
     /* 129 */
-    { { 0x5173c18d65dbfl,0x0e339edad82f7l,0xcf1001c77bf94l,0x96b67022d26bdl,
-        0x0ac66409ac773l },
-      { 0xbb36fc6261cc3l,0xc9190e7e908b0l,0x45e6c10213f7bl,0x2f856541cebaal,
-        0x0ce8e6975cc12l },
-      0 },
+    { { 0x5173c18d65dbfL,0x0e339edad82f7L,0xcf1001c77bf94L,0x96b67022d26bdL,
+        0x0ac66409ac773L },
+      { 0xbb36fc6261cc3L,0xc9190e7e908b0L,0x45e6c10213f7bL,0x2f856541cebaaL,
+        0x0ce8e6975cc12L } },
     /* 130 */
-    { { 0x21b41bc0a67d2l,0x0a444d248a0f1l,0x59b473762d476l,0xb4a80e044f1d6l,
-        0x008fde365250bl },
-      { 0xec3da848bf287l,0x82d3369d6eacel,0x2449482c2a621l,0x6cd73582dfdc9l,
-        0x02f7e2fd2565dl },
-      0 },
+    { { 0x21b41bc0a67d2L,0x0a444d248a0f1L,0x59b473762d476L,0xb4a80e044f1d6L,
+        0x008fde365250bL },
+      { 0xec3da848bf287L,0x82d3369d6eaceL,0x2449482c2a621L,0x6cd73582dfdc9L,
+        0x02f7e2fd2565dL } },
     /* 131 */
-    { { 0xb92dbc3770fa7l,0x5c379043f9ae4l,0x7761171095e8dl,0x02ae54f34e9d1l,
-        0x0c65be92e9077l },
-      { 0x8a303f6fd0a40l,0xe3bcce784b275l,0xf9767bfe7d822l,0x3b3a7ae4f5854l,
-        0x04bff8e47d119l },
-      0 },
+    { { 0xb92dbc3770fa7L,0x5c379043f9ae4L,0x7761171095e8dL,0x02ae54f34e9d1L,
+        0x0c65be92e9077L },
+      { 0x8a303f6fd0a40L,0xe3bcce784b275L,0xf9767bfe7d822L,0x3b3a7ae4f5854L,
+        0x04bff8e47d119L } },
     /* 132 */
-    { { 0x1d21f00ff1480l,0x7d0754db16cd4l,0xbe0f3ea2ab8fbl,0x967dac81d2efbl,
-        0x03e4e4ae65772l },
-      { 0x8f36d3c5303e6l,0x4b922623977e1l,0x324c3c03bd999l,0x60289ed70e261l,
-        0x05388aefd58ecl },
-      0 },
+    { { 0x1d21f00ff1480L,0x7d0754db16cd4L,0xbe0f3ea2ab8fbL,0x967dac81d2efbL,
+        0x03e4e4ae65772L },
+      { 0x8f36d3c5303e6L,0x4b922623977e1L,0x324c3c03bd999L,0x60289ed70e261L,
+        0x05388aefd58ecL } },
     /* 133 */
-    { { 0x317eb5e5d7713l,0xee75de49daad1l,0x74fb26109b985l,0xbe0e32f5bc4fcl,
-        0x05cf908d14f75l },
-      { 0x435108e657b12l,0xa5b96ed9e6760l,0x970ccc2bfd421l,0x0ce20e29f51f8l,
-        0x0a698ba4060f0l },
-      0 },
+    { { 0x317eb5e5d7713L,0xee75de49daad1L,0x74fb26109b985L,0xbe0e32f5bc4fcL,
+        0x05cf908d14f75L },
+      { 0x435108e657b12L,0xa5b96ed9e6760L,0x970ccc2bfd421L,0x0ce20e29f51f8L,
+        0x0a698ba4060f0L } },
     /* 134 */
-    { { 0xb1686ef748fecl,0xa27e9d2cf973dl,0xe265effe6e755l,0xad8d630b6544cl,
-        0x0b142ef8a7aebl },
-      { 0x1af9f17d5770al,0x672cb3412fad3l,0xf3359de66af3bl,0x50756bd60d1bdl,
-        0x0d1896a965851l },
-      0 },
+    { { 0xb1686ef748fecL,0xa27e9d2cf973dL,0xe265effe6e755L,0xad8d630b6544cL,
+        0x0b142ef8a7aebL },
+      { 0x1af9f17d5770aL,0x672cb3412fad3L,0xf3359de66af3bL,0x50756bd60d1bdL,
+        0x0d1896a965851L } },
     /* 135 */
-    { { 0x957ab33c41c08l,0xac5468e2e1ec5l,0xc472f6c87de94l,0xda3918816b73al,
-        0x0267b0e0b7981l },
-      { 0x54e5d8e62b988l,0x55116d21e76e5l,0xd2a6f99d8ddc7l,0x93934610faf03l,
-        0x0b54e287aa111l },
-      0 },
+    { { 0x957ab33c41c08L,0xac5468e2e1ec5L,0xc472f6c87de94L,0xda3918816b73aL,
+        0x0267b0e0b7981L },
+      { 0x54e5d8e62b988L,0x55116d21e76e5L,0xd2a6f99d8ddc7L,0x93934610faf03L,
+        0x0b54e287aa111L } },
     /* 136 */
-    { { 0x122b5178a876bl,0xff085104b40a0l,0x4f29f7651ff96l,0xd4e6050b31ab1l,
-        0x084abb28b5f87l },
-      { 0xd439f8270790al,0x9d85e3f46bd5el,0xc1e22122d6cb5l,0x564075f55c1b6l,
-        0x0e5436f671765l },
-      0 },
+    { { 0x122b5178a876bL,0xff085104b40a0L,0x4f29f7651ff96L,0xd4e6050b31ab1L,
+        0x084abb28b5f87L },
+      { 0xd439f8270790aL,0x9d85e3f46bd5eL,0xc1e22122d6cb5L,0x564075f55c1b6L,
+        0x0e5436f671765L } },
     /* 137 */
-    { { 0x9025e2286e8d5l,0xb4864453be53fl,0x408e3a0353c95l,0xe99ed832f5bdel,
-        0x00404f68b5b9cl },
-      { 0x33bdea781e8e5l,0x18163c2f5bcadl,0x119caa33cdf50l,0xc701575769600l,
-        0x03a4263df0ac1l },
-      0 },
+    { { 0x9025e2286e8d5L,0xb4864453be53fL,0x408e3a0353c95L,0xe99ed832f5bdeL,
+        0x00404f68b5b9cL },
+      { 0x33bdea781e8e5L,0x18163c2f5bcadL,0x119caa33cdf50L,0xc701575769600L,
+        0x03a4263df0ac1L } },
     /* 138 */
-    { { 0x65ecc9aeb596dl,0xe7023c92b4c29l,0xe01396101ea03l,0xa3674704b4b62l,
-        0x00ca8fd3f905el },
-      { 0x23a42551b2b61l,0x9c390fcd06925l,0x392a63e1eb7a8l,0x0c33e7f1d2be0l,
-        0x096dca2644ddbl },
-      0 },
+    { { 0x65ecc9aeb596dL,0xe7023c92b4c29L,0xe01396101ea03L,0xa3674704b4b62L,
+        0x00ca8fd3f905eL },
+      { 0x23a42551b2b61L,0x9c390fcd06925L,0x392a63e1eb7a8L,0x0c33e7f1d2be0L,
+        0x096dca2644ddbL } },
     /* 139 */
-    { { 0xbb43a387510afl,0xa8a9a36a01203l,0xf950378846feal,0x59dcd23a57702l,
-        0x04363e2123aadl },
-      { 0x3a1c740246a47l,0xd2e55dd24dca4l,0xd8faf96b362b8l,0x98c4f9b086045l,
-        0x0840e115cd8bbl },
-      0 },
+    { { 0xbb43a387510afL,0xa8a9a36a01203L,0xf950378846feaL,0x59dcd23a57702L,
+        0x04363e2123aadL },
+      { 0x3a1c740246a47L,0xd2e55dd24dca4L,0xd8faf96b362b8L,0x98c4f9b086045L,
+        0x0840e115cd8bbL } },
     /* 140 */
-    { { 0x205e21023e8a7l,0xcdd8dc7a0bf12l,0x63a5ddfc808a8l,0xd6d4e292a2721l,
-        0x05e0d6abd30del },
-      { 0x721c27cfc0f64l,0x1d0e55ed8807al,0xd1f9db242eec0l,0xa25a26a7bef91l,
-        0x07dea48f42945l },
-      0 },
+    { { 0x205e21023e8a7L,0xcdd8dc7a0bf12L,0x63a5ddfc808a8L,0xd6d4e292a2721L,
+        0x05e0d6abd30deL },
+      { 0x721c27cfc0f64L,0x1d0e55ed8807aL,0xd1f9db242eec0L,0xa25a26a7bef91L,
+        0x07dea48f42945L } },
     /* 141 */
-    { { 0xf6f1ce5060a81l,0x72f8f95615abdl,0x6ac268be79f9cl,0x16d1cfd36c540l,
-        0x0abc2a2beebfdl },
-      { 0x66f91d3e2eac7l,0x63d2dd04668acl,0x282d31b6f10bal,0xefc16790e3770l,
-        0x04ea353946c7el },
-      0 },
+    { { 0xf6f1ce5060a81L,0x72f8f95615abdL,0x6ac268be79f9cL,0x16d1cfd36c540L,
+        0x0abc2a2beebfdL },
+      { 0x66f91d3e2eac7L,0x63d2dd04668acL,0x282d31b6f10baL,0xefc16790e3770L,
+        0x04ea353946c7eL } },
     /* 142 */
-    { { 0xa2f8d5266309dl,0xc081945a3eed8l,0x78c5dc10a51c6l,0xffc3cecaf45a5l,
-        0x03a76e6891c94l },
-      { 0xce8a47d7b0d0fl,0x968f584a5f9aal,0xe697fbe963acel,0x646451a30c724l,
-        0x08212a10a465el },
-      0 },
+    { { 0xa2f8d5266309dL,0xc081945a3eed8L,0x78c5dc10a51c6L,0xffc3cecaf45a5L,
+        0x03a76e6891c94L },
+      { 0xce8a47d7b0d0fL,0x968f584a5f9aaL,0xe697fbe963aceL,0x646451a30c724L,
+        0x08212a10a465eL } },
     /* 143 */
-    { { 0xc61c3cfab8caal,0x840e142390ef7l,0xe9733ca18eb8el,0xb164cd1dff677l,
-        0x0aa7cab71599cl },
-      { 0xc9273bc837bd1l,0xd0c36af5d702fl,0x423da49c06407l,0x17c317621292fl,
-        0x040e38073fe06l },
-      0 },
+    { { 0xc61c3cfab8caaL,0x840e142390ef7L,0xe9733ca18eb8eL,0xb164cd1dff677L,
+        0x0aa7cab71599cL },
+      { 0xc9273bc837bd1L,0xd0c36af5d702fL,0x423da49c06407L,0x17c317621292fL,
+        0x040e38073fe06L } },
     /* 144 */
-    { { 0x80824a7bf9b7cl,0x203fbe30d0f4fl,0x7cf9ce3365d23l,0x5526bfbe53209l,
-        0x0e3604700b305l },
-      { 0xb99116cc6c2c7l,0x08ba4cbee64dcl,0x37ad9ec726837l,0xe15fdcded4346l,
-        0x06542d677a3del },
-      0 },
+    { { 0x80824a7bf9b7cL,0x203fbe30d0f4fL,0x7cf9ce3365d23L,0x5526bfbe53209L,
+        0x0e3604700b305L },
+      { 0xb99116cc6c2c7L,0x08ba4cbee64dcL,0x37ad9ec726837L,0xe15fdcded4346L,
+        0x06542d677a3deL } },
     /* 145 */
-    { { 0x2b6d07b6c377al,0x47903448be3f3l,0x0da8af76cb038l,0x6f21d6fdd3a82l,
-        0x0a6534aee09bbl },
-      { 0x1780d1035facfl,0x339dcb47e630al,0x447f39335e55al,0xef226ea50fe1cl,
-        0x0f3cb672fdc9al },
-      0 },
+    { { 0x2b6d07b6c377aL,0x47903448be3f3L,0x0da8af76cb038L,0x6f21d6fdd3a82L,
+        0x0a6534aee09bbL },
+      { 0x1780d1035facfL,0x339dcb47e630aL,0x447f39335e55aL,0xef226ea50fe1cL,
+        0x0f3cb672fdc9aL } },
     /* 146 */
-    { { 0x719fe3b55fd83l,0x6c875ddd10eb3l,0x5cea784e0d7a4l,0x70e733ac9fa90l,
-        0x07cafaa2eaae8l },
-      { 0x14d041d53b338l,0xa0ef87e6c69b8l,0x1672b0fe0acc0l,0x522efb93d1081l,
-        0x00aab13c1b9bdl },
-      0 },
+    { { 0x719fe3b55fd83L,0x6c875ddd10eb3L,0x5cea784e0d7a4L,0x70e733ac9fa90L,
+        0x07cafaa2eaae8L },
+      { 0x14d041d53b338L,0xa0ef87e6c69b8L,0x1672b0fe0acc0L,0x522efb93d1081L,
+        0x00aab13c1b9bdL } },
     /* 147 */
-    { { 0xce278d2681297l,0xb1b509546addcl,0x661aaf2cb350el,0x12e92dc431737l,
-        0x04b91a6028470l },
-      { 0xf109572f8ddcfl,0x1e9a911af4dcfl,0x372430e08ebf6l,0x1cab48f4360acl,
-        0x049534c537232l },
-      0 },
+    { { 0xce278d2681297L,0xb1b509546addcL,0x661aaf2cb350eL,0x12e92dc431737L,
+        0x04b91a6028470L },
+      { 0xf109572f8ddcfL,0x1e9a911af4dcfL,0x372430e08ebf6L,0x1cab48f4360acL,
+        0x049534c537232L } },
     /* 148 */
-    { { 0xf7d71f07b7e9dl,0xa313cd516f83dl,0xc047ee3a478efl,0xc5ee78ef264b6l,
-        0x0caf46c4fd65al },
-      { 0xd0c7792aa8266l,0x66913684bba04l,0xe4b16b0edf454l,0x770f56e65168al,
-        0x014ce9e5704c6l },
-      0 },
+    { { 0xf7d71f07b7e9dL,0xa313cd516f83dL,0xc047ee3a478efL,0xc5ee78ef264b6L,
+        0x0caf46c4fd65aL },
+      { 0xd0c7792aa8266L,0x66913684bba04L,0xe4b16b0edf454L,0x770f56e65168aL,
+        0x014ce9e5704c6L } },
     /* 149 */
-    { { 0x45e3e965e8f91l,0xbacb0f2492994l,0x0c8a0a0d3aca1l,0x9a71d31cc70f9l,
-        0x01bb708a53e4cl },
-      { 0xa9e69558bdd7al,0x08018a26b1d5cl,0xc9cf1ec734a05l,0x0102b093aa714l,
-        0x0f9d126f2da30l },
-      0 },
+    { { 0x45e3e965e8f91L,0xbacb0f2492994L,0x0c8a0a0d3aca1L,0x9a71d31cc70f9L,
+        0x01bb708a53e4cL },
+      { 0xa9e69558bdd7aL,0x08018a26b1d5cL,0xc9cf1ec734a05L,0x0102b093aa714L,
+        0x0f9d126f2da30L } },
     /* 150 */
-    { { 0xbca7aaff9563el,0xfeb49914a0749l,0xf5f1671dd077al,0xcc69e27a0311bl,
-        0x0807afcb9729el },
-      { 0xa9337c9b08b77l,0x85443c7e387f8l,0x76fd8ba86c3a7l,0xcd8c85fafa594l,
-        0x0751adcd16568l },
-      0 },
+    { { 0xbca7aaff9563eL,0xfeb49914a0749L,0xf5f1671dd077aL,0xcc69e27a0311bL,
+        0x0807afcb9729eL },
+      { 0xa9337c9b08b77L,0x85443c7e387f8L,0x76fd8ba86c3a7L,0xcd8c85fafa594L,
+        0x0751adcd16568L } },
     /* 151 */
-    { { 0xa38b410715c0dl,0x718f7697f78ael,0x3fbf06dd113eal,0x743f665eab149l,
-        0x029ec44682537l },
-      { 0x4719cb50bebbcl,0xbfe45054223d9l,0xd2dedb1399ee5l,0x077d90cd5b3a8l,
-        0x0ff9370e392a4l },
-      0 },
+    { { 0xa38b410715c0dL,0x718f7697f78aeL,0x3fbf06dd113eaL,0x743f665eab149L,
+        0x029ec44682537L },
+      { 0x4719cb50bebbcL,0xbfe45054223d9L,0xd2dedb1399ee5L,0x077d90cd5b3a8L,
+        0x0ff9370e392a4L } },
     /* 152 */
-    { { 0x2d69bc6b75b65l,0xd5266651c559al,0xde9d7d24188f8l,0xd01a28a9f33e3l,
-        0x09776478ba2a9l },
-      { 0x2622d929af2c7l,0x6d4e690923885l,0x89a51e9334f5dl,0x82face6cc7e5al,
-        0x074a6313fac2fl },
-      0 },
+    { { 0x2d69bc6b75b65L,0xd5266651c559aL,0xde9d7d24188f8L,0xd01a28a9f33e3L,
+        0x09776478ba2a9L },
+      { 0x2622d929af2c7L,0x6d4e690923885L,0x89a51e9334f5dL,0x82face6cc7e5aL,
+        0x074a6313fac2fL } },
     /* 153 */
-    { { 0x4dfddb75f079cl,0x9518e36fbbb2fl,0x7cd36dd85b07cl,0x863d1b6cfcf0el,
-        0x0ab75be150ff4l },
-      { 0x367c0173fc9b7l,0x20d2594fd081bl,0x4091236b90a74l,0x59f615fdbf03cl,
-        0x04ebeac2e0b44l },
-      0 },
+    { { 0x4dfddb75f079cL,0x9518e36fbbb2fL,0x7cd36dd85b07cL,0x863d1b6cfcf0eL,
+        0x0ab75be150ff4L },
+      { 0x367c0173fc9b7L,0x20d2594fd081bL,0x4091236b90a74L,0x59f615fdbf03cL,
+        0x04ebeac2e0b44L } },
     /* 154 */
-    { { 0xc5fe75c9f2c53l,0x118eae9411eb6l,0x95ac5d8d25220l,0xaffcc8887633fl,
-        0x0df99887b2c1bl },
-      { 0x8eed2850aaecbl,0x1b01d6a272bb7l,0x1cdbcac9d4918l,0x4058978dd511bl,
-        0x027b040a7779fl },
-      0 },
+    { { 0xc5fe75c9f2c53L,0x118eae9411eb6L,0x95ac5d8d25220L,0xaffcc8887633fL,
+        0x0df99887b2c1bL },
+      { 0x8eed2850aaecbL,0x1b01d6a272bb7L,0x1cdbcac9d4918L,0x4058978dd511bL,
+        0x027b040a7779fL } },
     /* 155 */
-    { { 0x05db7f73b2eb2l,0x088e1b2118904l,0x962327ee0df85l,0xa3f5501b71525l,
-        0x0b393dd37e4cfl },
-      { 0x30e7b3fd75165l,0xc2bcd33554a12l,0xf7b5022d66344l,0x34196c36f1be0l,
-        0x009588c12d046l },
-      0 },
+    { { 0x05db7f73b2eb2L,0x088e1b2118904L,0x962327ee0df85L,0xa3f5501b71525L,
+        0x0b393dd37e4cfL },
+      { 0x30e7b3fd75165L,0xc2bcd33554a12L,0xf7b5022d66344L,0x34196c36f1be0L,
+        0x009588c12d046L } },
     /* 156 */
-    { { 0x6093f02601c3bl,0xf8cf5c335fe08l,0x94aff28fb0252l,0x648b955cf2808l,
-        0x081c879a9db9fl },
-      { 0xe687cc6f56c51l,0x693f17618c040l,0x059353bfed471l,0x1bc444f88a419l,
-        0x0fa0d48f55fc1l },
-      0 },
+    { { 0x6093f02601c3bL,0xf8cf5c335fe08L,0x94aff28fb0252L,0x648b955cf2808L,
+        0x081c879a9db9fL },
+      { 0xe687cc6f56c51L,0x693f17618c040L,0x059353bfed471L,0x1bc444f88a419L,
+        0x0fa0d48f55fc1L } },
     /* 157 */
-    { { 0xe1c9de1608e4dl,0x113582822cbc6l,0x57ec2d7010ddal,0x67d6f6b7ddc11l,
-        0x08ea0e156b6a3l },
-      { 0x4e02f2383b3b4l,0x943f01f53ca35l,0xde03ca569966bl,0xb5ac4ff6632b2l,
-        0x03f5ab924fa00l },
-      0 },
+    { { 0xe1c9de1608e4dL,0x113582822cbc6L,0x57ec2d7010ddaL,0x67d6f6b7ddc11L,
+        0x08ea0e156b6a3L },
+      { 0x4e02f2383b3b4L,0x943f01f53ca35L,0xde03ca569966bL,0xb5ac4ff6632b2L,
+        0x03f5ab924fa00L } },
     /* 158 */
-    { { 0xbb0d959739efbl,0xf4e7ebec0d337l,0x11a67d1c751b0l,0x256e2da52dd64l,
-        0x08bc768872b74l },
-      { 0xe3b7282d3d253l,0xa1f58d779fa5bl,0x16767bba9f679l,0xf34fa1cac168el,
-        0x0b386f19060fcl },
-      0 },
+    { { 0xbb0d959739efbL,0xf4e7ebec0d337L,0x11a67d1c751b0L,0x256e2da52dd64L,
+        0x08bc768872b74L },
+      { 0xe3b7282d3d253L,0xa1f58d779fa5bL,0x16767bba9f679L,0xf34fa1cac168eL,
+        0x0b386f19060fcL } },
     /* 159 */
-    { { 0x3c1352fedcfc2l,0x6262f8af0d31fl,0x57288c25396bfl,0x9c4d9a02b4eael,
-        0x04cb460f71b06l },
-      { 0x7b4d35b8095eal,0x596fc07603ae6l,0x614a16592bbf8l,0x5223e1475f66bl,
-        0x052c0d50895efl },
-      0 },
+    { { 0x3c1352fedcfc2L,0x6262f8af0d31fL,0x57288c25396bfL,0x9c4d9a02b4eaeL,
+        0x04cb460f71b06L },
+      { 0x7b4d35b8095eaL,0x596fc07603ae6L,0x614a16592bbf8L,0x5223e1475f66bL,
+        0x052c0d50895efL } },
     /* 160 */
-    { { 0xc210e15339848l,0xe870778c8d231l,0x956e170e87a28l,0x9c0b9d1de6616l,
-        0x04ac3c9382bb0l },
-      { 0xe05516998987dl,0xc4ae09f4d619bl,0xa3f933d8b2376l,0x05f41de0b7651l,
-        0x0380d94c7e397l },
-      0 },
+    { { 0xc210e15339848L,0xe870778c8d231L,0x956e170e87a28L,0x9c0b9d1de6616L,
+        0x04ac3c9382bb0L },
+      { 0xe05516998987dL,0xc4ae09f4d619bL,0xa3f933d8b2376L,0x05f41de0b7651L,
+        0x0380d94c7e397L } },
     /* 161 */
-    { { 0x355aa81542e75l,0xa1ee01b9b701al,0x24d708796c724l,0x37af6b3a29776l,
-        0x02ce3e171de26l },
-      { 0xfeb49f5d5bc1al,0x7e2777e2b5cfel,0x513756ca65560l,0x4e4d4feaac2f9l,
-        0x02e6cd8520b62l },
-      0 },
+    { { 0x355aa81542e75L,0xa1ee01b9b701aL,0x24d708796c724L,0x37af6b3a29776L,
+        0x02ce3e171de26L },
+      { 0xfeb49f5d5bc1aL,0x7e2777e2b5cfeL,0x513756ca65560L,0x4e4d4feaac2f9L,
+        0x02e6cd8520b62L } },
     /* 162 */
-    { { 0x5954b8c31c31dl,0x005bf21a0c368l,0x5c79ec968533dl,0x9d540bd7626e7l,
-        0x0ca17754742c6l },
-      { 0xedafff6d2dbb2l,0xbd174a9d18cc6l,0xa4578e8fd0d8cl,0x2ce6875e8793al,
-        0x0a976a7139cabl },
-      0 },
+    { { 0x5954b8c31c31dL,0x005bf21a0c368L,0x5c79ec968533dL,0x9d540bd7626e7L,
+        0x0ca17754742c6L },
+      { 0xedafff6d2dbb2L,0xbd174a9d18cc6L,0xa4578e8fd0d8cL,0x2ce6875e8793aL,
+        0x0a976a7139cabL } },
     /* 163 */
-    { { 0x51f1b93fb353dl,0x8b57fcfa720a6l,0x1b15281d75cabl,0x4999aa88cfa73l,
-        0x08720a7170a1fl },
-      { 0xe8d37693e1b90l,0x0b16f6dfc38c3l,0x52a8742d345dcl,0x893c8ea8d00abl,
-        0x09719ef29c769l },
-      0 },
+    { { 0x51f1b93fb353dL,0x8b57fcfa720a6L,0x1b15281d75cabL,0x4999aa88cfa73L,
+        0x08720a7170a1fL },
+      { 0xe8d37693e1b90L,0x0b16f6dfc38c3L,0x52a8742d345dcL,0x893c8ea8d00abL,
+        0x09719ef29c769L } },
     /* 164 */
-    { { 0xeed8d58e35909l,0xdc33ddc116820l,0xe2050269366d8l,0x04c1d7f999d06l,
-        0x0a5072976e157l },
-      { 0xa37eac4e70b2el,0x576890aa8a002l,0x45b2a5c84dcf6l,0x7725cd71bf186l,
-        0x099389c9df7b7l },
-      0 },
+    { { 0xeed8d58e35909L,0xdc33ddc116820L,0xe2050269366d8L,0x04c1d7f999d06L,
+        0x0a5072976e157L },
+      { 0xa37eac4e70b2eL,0x576890aa8a002L,0x45b2a5c84dcf6L,0x7725cd71bf186L,
+        0x099389c9df7b7L } },
     /* 165 */
-    { { 0xc08f27ada7a4bl,0x03fd389366238l,0x66f512c3abe9dl,0x82e46b672e897l,
-        0x0a88806aa202cl },
-      { 0x2044ad380184el,0xc4126a8b85660l,0xd844f17a8cb78l,0xdcfe79d670c0al,
-        0x00043bffb4738l },
-      0 },
+    { { 0xc08f27ada7a4bL,0x03fd389366238L,0x66f512c3abe9dL,0x82e46b672e897L,
+        0x0a88806aa202cL },
+      { 0x2044ad380184eL,0xc4126a8b85660L,0xd844f17a8cb78L,0xdcfe79d670c0aL,
+        0x00043bffb4738L } },
     /* 166 */
-    { { 0x9b5dc36d5192el,0xd34590b2af8d5l,0x1601781acf885l,0x486683566d0a1l,
-        0x052f3ef01ba6cl },
-      { 0x6732a0edcb64dl,0x238068379f398l,0x040f3090a482cl,0x7e7516cbe5fa7l,
-        0x03296bd899ef2l },
-      0 },
+    { { 0x9b5dc36d5192eL,0xd34590b2af8d5L,0x1601781acf885L,0x486683566d0a1L,
+        0x052f3ef01ba6cL },
+      { 0x6732a0edcb64dL,0x238068379f398L,0x040f3090a482cL,0x7e7516cbe5fa7L,
+        0x03296bd899ef2L } },
     /* 167 */
-    { { 0xaba89454d81d7l,0xef51eb9b3c476l,0x1c579869eade7l,0x71e9619a21cd8l,
-        0x03b90febfaee5l },
-      { 0x3023e5496f7cbl,0xd87fb51bc4939l,0x9beb5ce55be41l,0x0b1803f1dd489l,
-        0x06e88069d9f81l },
-      0 },
+    { { 0xaba89454d81d7L,0xef51eb9b3c476L,0x1c579869eade7L,0x71e9619a21cd8L,
+        0x03b90febfaee5L },
+      { 0x3023e5496f7cbL,0xd87fb51bc4939L,0x9beb5ce55be41L,0x0b1803f1dd489L,
+        0x06e88069d9f81L } },
     /* 168 */
-    { { 0x7ab11b43ea1dbl,0xa95259d292ce3l,0xf84f1860a7ff1l,0xad13851b02218l,
-        0x0a7222beadefal },
-      { 0xc78ec2b0a9144l,0x51f2fa59c5a2al,0x147ce385a0240l,0xc69091d1eca56l,
-        0x0be94d523bc2al },
-      0 },
+    { { 0x7ab11b43ea1dbL,0xa95259d292ce3L,0xf84f1860a7ff1L,0xad13851b02218L,
+        0x0a7222beadefaL },
+      { 0xc78ec2b0a9144L,0x51f2fa59c5a2aL,0x147ce385a0240L,0xc69091d1eca56L,
+        0x0be94d523bc2aL } },
     /* 169 */
-    { { 0x4945e0b226ce7l,0x47967e8b7072fl,0x5a6c63eb8afd7l,0xc766edea46f18l,
-        0x07782defe9be8l },
-      { 0xd2aa43db38626l,0x8776f67ad1760l,0x4499cdb460ae7l,0x2e4b341b86fc5l,
-        0x003838567a289l },
-      0 },
+    { { 0x4945e0b226ce7L,0x47967e8b7072fL,0x5a6c63eb8afd7L,0xc766edea46f18L,
+        0x07782defe9be8L },
+      { 0xd2aa43db38626L,0x8776f67ad1760L,0x4499cdb460ae7L,0x2e4b341b86fc5L,
+        0x003838567a289L } },
     /* 170 */
-    { { 0xdaefd79ec1a0fl,0xfdceb39c972d8l,0x8f61a953bbcd6l,0xb420f5575ffc5l,
-        0x0dbd986c4adf7l },
-      { 0xa881415f39eb7l,0xf5b98d976c81al,0xf2f717d6ee2fcl,0xbbd05465475dcl,
-        0x08e24d3c46860l },
-      0 },
+    { { 0xdaefd79ec1a0fL,0xfdceb39c972d8L,0x8f61a953bbcd6L,0xb420f5575ffc5L,
+        0x0dbd986c4adf7L },
+      { 0xa881415f39eb7L,0xf5b98d976c81aL,0xf2f717d6ee2fcL,0xbbd05465475dcL,
+        0x08e24d3c46860L } },
     /* 171 */
-    { { 0xd8e549a587390l,0x4f0cbec588749l,0x25983c612bb19l,0xafc846e07da4bl,
-        0x0541a99c4407bl },
-      { 0x41692624c8842l,0x2ad86c05ffdb2l,0xf7fcf626044c1l,0x35d1c59d14b44l,
-        0x0c0092c49f57dl },
-      0 },
+    { { 0xd8e549a587390L,0x4f0cbec588749L,0x25983c612bb19L,0xafc846e07da4bL,
+        0x0541a99c4407bL },
+      { 0x41692624c8842L,0x2ad86c05ffdb2L,0xf7fcf626044c1L,0x35d1c59d14b44L,
+        0x0c0092c49f57dL } },
     /* 172 */
-    { { 0xc75c3df2e61efl,0xc82e1b35cad3cl,0x09f29f47e8841l,0x944dc62d30d19l,
-        0x075e406347286l },
-      { 0x41fc5bbc237d0l,0xf0ec4f01c9e7dl,0x82bd534c9537bl,0x858691c51a162l,
-        0x05b7cb658c784l },
-      0 },
+    { { 0xc75c3df2e61efL,0xc82e1b35cad3cL,0x09f29f47e8841L,0x944dc62d30d19L,
+        0x075e406347286L },
+      { 0x41fc5bbc237d0L,0xf0ec4f01c9e7dL,0x82bd534c9537bL,0x858691c51a162L,
+        0x05b7cb658c784L } },
     /* 173 */
-    { { 0xa70848a28ead1l,0x08fd3b47f6964l,0x67e5b39802dc5l,0x97a19ae4bfd17l,
-        0x07ae13eba8df0l },
-      { 0x16ef8eadd384el,0xd9b6b2ff06fd2l,0xbcdb5f30361a2l,0xe3fd204b98784l,
-        0x0787d8074e2a8l },
-      0 },
+    { { 0xa70848a28ead1L,0x08fd3b47f6964L,0x67e5b39802dc5L,0x97a19ae4bfd17L,
+        0x07ae13eba8df0L },
+      { 0x16ef8eadd384eL,0xd9b6b2ff06fd2L,0xbcdb5f30361a2L,0xe3fd204b98784L,
+        0x0787d8074e2a8L } },
     /* 174 */
-    { { 0x25d6b757fbb1cl,0xb2ca201debc5el,0xd2233ffe47bddl,0x84844a55e9a36l,
-        0x05c2228199ef2l },
-      { 0xd4a8588315250l,0x2b827097c1773l,0xef5d33f21b21al,0xf2b0ab7c4ea1dl,
-        0x0e45d37abbaf0l },
-      0 },
+    { { 0x25d6b757fbb1cL,0xb2ca201debc5eL,0xd2233ffe47bddL,0x84844a55e9a36L,
+        0x05c2228199ef2L },
+      { 0xd4a8588315250L,0x2b827097c1773L,0xef5d33f21b21aL,0xf2b0ab7c4ea1dL,
+        0x0e45d37abbaf0L } },
     /* 175 */
-    { { 0xf1e3428511c8al,0xc8bdca6cd3d2dl,0x27c39a7ebb229l,0xb9d3578a71a76l,
-        0x0ed7bc12284dfl },
-      { 0x2a6df93dea561l,0x8dd48f0ed1cf2l,0xbad23e85443f1l,0x6d27d8b861405l,
-        0x0aac97cc945cal },
-      0 },
+    { { 0xf1e3428511c8aL,0xc8bdca6cd3d2dL,0x27c39a7ebb229L,0xb9d3578a71a76L,
+        0x0ed7bc12284dfL },
+      { 0x2a6df93dea561L,0x8dd48f0ed1cf2L,0xbad23e85443f1L,0x6d27d8b861405L,
+        0x0aac97cc945caL } },
     /* 176 */
-    { { 0x4ea74a16bd00al,0xadf5c0bcc1eb5l,0xf9bfc06d839e9l,0xdc4e092bb7f11l,
-        0x0318f97b31163l },
-      { 0x0c5bec30d7138l,0x23abc30220eccl,0x022360644e8dfl,0xff4d2bb7972fbl,
-        0x0fa41faa19a84l },
-      0 },
+    { { 0x4ea74a16bd00aL,0xadf5c0bcc1eb5L,0xf9bfc06d839e9L,0xdc4e092bb7f11L,
+        0x0318f97b31163L },
+      { 0x0c5bec30d7138L,0x23abc30220eccL,0x022360644e8dfL,0xff4d2bb7972fbL,
+        0x0fa41faa19a84L } },
     /* 177 */
-    { { 0x2d974a6642269l,0xce9bb783bd440l,0x941e60bc81814l,0xe9e2398d38e47l,
-        0x038bb6b2c1d26l },
-      { 0xe4a256a577f87l,0x53dc11fe1cc64l,0x22807288b52d2l,0x01a5ff336abf6l,
-        0x094dd0905ce76l },
-      0 },
+    { { 0x2d974a6642269L,0xce9bb783bd440L,0x941e60bc81814L,0xe9e2398d38e47L,
+        0x038bb6b2c1d26L },
+      { 0xe4a256a577f87L,0x53dc11fe1cc64L,0x22807288b52d2L,0x01a5ff336abf6L,
+        0x094dd0905ce76L } },
     /* 178 */
-    { { 0xcf7dcde93f92al,0xcb89b5f315156l,0x995e750a01333l,0x2ae902404df9cl,
-        0x092077867d25cl },
-      { 0x71e010bf39d44l,0x2096bb53d7e24l,0xc9c3d8f5f2c90l,0xeb514c44b7b35l,
-        0x081e8428bd29bl },
-      0 },
+    { { 0xcf7dcde93f92aL,0xcb89b5f315156L,0x995e750a01333L,0x2ae902404df9cL,
+        0x092077867d25cL },
+      { 0x71e010bf39d44L,0x2096bb53d7e24L,0xc9c3d8f5f2c90L,0xeb514c44b7b35L,
+        0x081e8428bd29bL } },
     /* 179 */
-    { { 0x9c2bac477199fl,0xee6b5ecdd96ddl,0xe40fd0e8cb8eel,0xa4b18af7db3fel,
-        0x01b94ab62dbbfl },
-      { 0x0d8b3ce47f143l,0xfc63f4616344fl,0xc59938351e623l,0x90eef18f270fcl,
-        0x006a38e280555l },
-      0 },
+    { { 0x9c2bac477199fL,0xee6b5ecdd96ddL,0xe40fd0e8cb8eeL,0xa4b18af7db3feL,
+        0x01b94ab62dbbfL },
+      { 0x0d8b3ce47f143L,0xfc63f4616344fL,0xc59938351e623L,0x90eef18f270fcL,
+        0x006a38e280555L } },
     /* 180 */
-    { { 0xb0139b3355b49l,0x60b4ebf99b2e5l,0x269f3dc20e265l,0xd4f8c08ffa6bdl,
-        0x0a7b36c2083d9l },
-      { 0x15c3a1b3e8830l,0xe1a89f9c0b64dl,0x2d16930d5fceal,0x2a20cfeee4a2el,
-        0x0be54c6b4a282l },
-      0 },
+    { { 0xb0139b3355b49L,0x60b4ebf99b2e5L,0x269f3dc20e265L,0xd4f8c08ffa6bdL,
+        0x0a7b36c2083d9L },
+      { 0x15c3a1b3e8830L,0xe1a89f9c0b64dL,0x2d16930d5fceaL,0x2a20cfeee4a2eL,
+        0x0be54c6b4a282L } },
     /* 181 */
-    { { 0xdb3df8d91167cl,0x79e7a6625ed6cl,0x46ac7f4517c3fl,0x22bb7105648f3l,
-        0x0bf30a5abeae0l },
-      { 0x785be93828a68l,0x327f3ef0368e7l,0x92146b25161c3l,0xd13ae11b5feb5l,
-        0x0d1c820de2732l },
-      0 },
+    { { 0xdb3df8d91167cL,0x79e7a6625ed6cL,0x46ac7f4517c3fL,0x22bb7105648f3L,
+        0x0bf30a5abeae0L },
+      { 0x785be93828a68L,0x327f3ef0368e7L,0x92146b25161c3L,0xd13ae11b5feb5L,
+        0x0d1c820de2732L } },
     /* 182 */
-    { { 0xe13479038b363l,0x546b05e519043l,0x026cad158c11fl,0x8da34fe57abe6l,
-        0x0b7d17bed68a1l },
-      { 0xa5891e29c2559l,0x765bfffd8444cl,0x4e469484f7a03l,0xcc64498de4af7l,
-        0x03997fd5e6412l },
-      0 },
+    { { 0xe13479038b363L,0x546b05e519043L,0x026cad158c11fL,0x8da34fe57abe6L,
+        0x0b7d17bed68a1L },
+      { 0xa5891e29c2559L,0x765bfffd8444cL,0x4e469484f7a03L,0xcc64498de4af7L,
+        0x03997fd5e6412L } },
     /* 183 */
-    { { 0x746828bd61507l,0xd534a64d2af20l,0xa8a15e329e132l,0x13e8ffeddfb08l,
-        0x00eeb89293c6cl },
-      { 0x69a3ea7e259f8l,0xe6d13e7e67e9bl,0xd1fa685ce1db7l,0xb6ef277318f6al,
-        0x0228916f8c922l },
-      0 },
+    { { 0x746828bd61507L,0xd534a64d2af20L,0xa8a15e329e132L,0x13e8ffeddfb08L,
+        0x00eeb89293c6cL },
+      { 0x69a3ea7e259f8L,0xe6d13e7e67e9bL,0xd1fa685ce1db7L,0xb6ef277318f6aL,
+        0x0228916f8c922L } },
     /* 184 */
-    { { 0xae25b0a12ab5bl,0x1f957bc136959l,0x16e2b0ccc1117l,0x097e8058429edl,
-        0x0ec05ad1d6e93l },
-      { 0xba5beac3f3708l,0x3530b59d77157l,0x18234e531baf9l,0x1b3747b552371l,
-        0x07d3141567ff1l },
-      0 },
+    { { 0xae25b0a12ab5bL,0x1f957bc136959L,0x16e2b0ccc1117L,0x097e8058429edL,
+        0x0ec05ad1d6e93L },
+      { 0xba5beac3f3708L,0x3530b59d77157L,0x18234e531baf9L,0x1b3747b552371L,
+        0x07d3141567ff1L } },
     /* 185 */
-    { { 0x9c05cf6dfefabl,0x68dcb377077bdl,0xa38bb95be2f22l,0xd7a3e53ead973l,
-        0x0e9ce66fc9bc1l },
-      { 0xa15766f6a02a1l,0xdf60e600ed75al,0x8cdc1b938c087l,0x0651f8947f346l,
-        0x0d9650b017228l },
-      0 },
+    { { 0x9c05cf6dfefabL,0x68dcb377077bdL,0xa38bb95be2f22L,0xd7a3e53ead973L,
+        0x0e9ce66fc9bc1L },
+      { 0xa15766f6a02a1L,0xdf60e600ed75aL,0x8cdc1b938c087L,0x0651f8947f346L,
+        0x0d9650b017228L } },
     /* 186 */
-    { { 0xb4c4a5a057e60l,0xbe8def25e4504l,0x7c1ccbdcbccc3l,0xb7a2a63532081l,
-        0x014d6699a804el },
-      { 0xa8415db1f411al,0x0bf80d769c2c8l,0xc2f77ad09fbafl,0x598ab4deef901l,
-        0x06f4c68410d43l },
-      0 },
+    { { 0xb4c4a5a057e60L,0xbe8def25e4504L,0x7c1ccbdcbccc3L,0xb7a2a63532081L,
+        0x014d6699a804eL },
+      { 0xa8415db1f411aL,0x0bf80d769c2c8L,0xc2f77ad09fbafL,0x598ab4deef901L,
+        0x06f4c68410d43L } },
     /* 187 */
-    { { 0x6df4e96c24a96l,0x85fcbd99a3872l,0xb2ae30a534dbcl,0x9abb3c466ef28l,
-        0x04c4350fd6118l },
-      { 0x7f716f855b8dal,0x94463c38a1296l,0xae9334341a423l,0x18b5c37e1413el,
-        0x0a726d2425a31l },
-      0 },
+    { { 0x6df4e96c24a96L,0x85fcbd99a3872L,0xb2ae30a534dbcL,0x9abb3c466ef28L,
+        0x04c4350fd6118L },
+      { 0x7f716f855b8daL,0x94463c38a1296L,0xae9334341a423L,0x18b5c37e1413eL,
+        0x0a726d2425a31L } },
     /* 188 */
-    { { 0x6b3ee948c1086l,0x3dcbd3a2e1dael,0x3d022f3f1de50l,0xf3923f35ed3f0l,
-        0x013639e82cc6cl },
-      { 0x938fbcdafaa86l,0xfb2654a2589acl,0x5051329f45bc5l,0x35a31963b26e4l,
-        0x0ca9365e1c1a3l },
-      0 },
+    { { 0x6b3ee948c1086L,0x3dcbd3a2e1daeL,0x3d022f3f1de50L,0xf3923f35ed3f0L,
+        0x013639e82cc6cL },
+      { 0x938fbcdafaa86L,0xfb2654a2589acL,0x5051329f45bc5L,0x35a31963b26e4L,
+        0x0ca9365e1c1a3L } },
     /* 189 */
-    { { 0x5ac754c3b2d20l,0x17904e241b361l,0xc9d071d742a54l,0x72a5b08521c4cl,
-        0x09ce29c34970bl },
-      { 0x81f736d3e0ad6l,0x9ef2f8434c8ccl,0xce862d98060dal,0xaf9835ed1d1a6l,
-        0x048c4abd7ab42l },
-      0 },
+    { { 0x5ac754c3b2d20L,0x17904e241b361L,0xc9d071d742a54L,0x72a5b08521c4cL,
+        0x09ce29c34970bL },
+      { 0x81f736d3e0ad6L,0x9ef2f8434c8ccL,0xce862d98060daL,0xaf9835ed1d1a6L,
+        0x048c4abd7ab42L } },
     /* 190 */
-    { { 0x1b0cc40c7485al,0xbbe5274dbfd22l,0x263d2e8ead455l,0x33cb493c76989l,
-        0x078017c32f67bl },
-      { 0x35769930cb5eel,0x940c408ed2b9dl,0x72f1a4dc0d14el,0x1c04f8b7bf552l,
-        0x053cd0454de5cl },
-      0 },
+    { { 0x1b0cc40c7485aL,0xbbe5274dbfd22L,0x263d2e8ead455L,0x33cb493c76989L,
+        0x078017c32f67bL },
+      { 0x35769930cb5eeL,0x940c408ed2b9dL,0x72f1a4dc0d14eL,0x1c04f8b7bf552L,
+        0x053cd0454de5cL } },
     /* 191 */
-    { { 0x585fa5d28ccacl,0x56005b746ebcdl,0xd0123aa5f823el,0xfa8f7c79f0a1cl,
-        0x0eea465c1d3d7l },
-      { 0x0659f0551803bl,0x9f7ce6af70781l,0x9288e706c0b59l,0x91934195a7702l,
-        0x01b6e42a47ae6l },
-      0 },
+    { { 0x585fa5d28ccacL,0x56005b746ebcdL,0xd0123aa5f823eL,0xfa8f7c79f0a1cL,
+        0x0eea465c1d3d7L },
+      { 0x0659f0551803bL,0x9f7ce6af70781L,0x9288e706c0b59L,0x91934195a7702L,
+        0x01b6e42a47ae6L } },
     /* 192 */
-    { { 0x0937cf67d04c3l,0xe289eeb8112e8l,0x2594d601e312bl,0xbd3d56b5d8879l,
-        0x00224da14187fl },
-      { 0xbb8630c5fe36fl,0x604ef51f5f87al,0x3b429ec580f3cl,0xff33964fb1bfbl,
-        0x060838ef042bfl },
-      0 },
+    { { 0x0937cf67d04c3L,0xe289eeb8112e8L,0x2594d601e312bL,0xbd3d56b5d8879L,
+        0x00224da14187fL },
+      { 0xbb8630c5fe36fL,0x604ef51f5f87aL,0x3b429ec580f3cL,0xff33964fb1bfbL,
+        0x060838ef042bfL } },
     /* 193 */
-    { { 0xcb2f27e0bbe99l,0xf304aa39ee432l,0xfa939037bda44l,0x16435f497c7a9l,
-        0x0636eb2022d33l },
-      { 0xd0e6193ae00aal,0xfe31ae6d2ffcfl,0xf93901c875a00l,0x8bacf43658a29l,
-        0x08844eeb63921l },
-      0 },
+    { { 0xcb2f27e0bbe99L,0xf304aa39ee432L,0xfa939037bda44L,0x16435f497c7a9L,
+        0x0636eb2022d33L },
+      { 0xd0e6193ae00aaL,0xfe31ae6d2ffcfL,0xf93901c875a00L,0x8bacf43658a29L,
+        0x08844eeb63921L } },
     /* 194 */
-    { { 0x171d26b3bae58l,0x7117e39f3e114l,0x1a8eada7db3dfl,0x789ecd37bc7f8l,
-        0x027ba83dc51fbl },
-      { 0xf439ffbf54de5l,0x0bb5fe1a71a7dl,0xb297a48727703l,0xa4ab42ee8e35dl,
-        0x0adb62d3487f3l },
-      0 },
+    { { 0x171d26b3bae58L,0x7117e39f3e114L,0x1a8eada7db3dfL,0x789ecd37bc7f8L,
+        0x027ba83dc51fbL },
+      { 0xf439ffbf54de5L,0x0bb5fe1a71a7dL,0xb297a48727703L,0xa4ab42ee8e35dL,
+        0x0adb62d3487f3L } },
     /* 195 */
-    { { 0x168a2a175df2al,0x4f618c32e99b1l,0x46b0916082aa0l,0xc8b2c9e4f2e71l,
-        0x0b990fd7675e7l },
-      { 0x9d96b4df37313l,0x79d0b40789082l,0x80877111c2055l,0xd18d66c9ae4a7l,
-        0x081707ef94d10l },
-      0 },
+    { { 0x168a2a175df2aL,0x4f618c32e99b1L,0x46b0916082aa0L,0xc8b2c9e4f2e71L,
+        0x0b990fd7675e7L },
+      { 0x9d96b4df37313L,0x79d0b40789082L,0x80877111c2055L,0xd18d66c9ae4a7L,
+        0x081707ef94d10L } },
     /* 196 */
-    { { 0x7cab203d6ff96l,0xfc0d84336097dl,0x042db4b5b851bl,0xaa5c268823c4dl,
-        0x03792daead5a8l },
-      { 0x18865941afa0bl,0x4142d83671528l,0xbe4e0a7f3e9e7l,0x01ba17c825275l,
-        0x05abd635e94b0l },
-      0 },
+    { { 0x7cab203d6ff96L,0xfc0d84336097dL,0x042db4b5b851bL,0xaa5c268823c4dL,
+        0x03792daead5a8L },
+      { 0x18865941afa0bL,0x4142d83671528L,0xbe4e0a7f3e9e7L,0x01ba17c825275L,
+        0x05abd635e94b0L } },
     /* 197 */
-    { { 0xfa84e0ac4927cl,0x35a7c8cf23727l,0xadca0dfe38860l,0xb610a4bcd5ea4l,
-        0x05995bf21846al },
-      { 0xf860b829dfa33l,0xae958fc18be90l,0x8630366caafe2l,0x411e9b3baf447l,
-        0x044c32ca2d483l },
-      0 },
+    { { 0xfa84e0ac4927cL,0x35a7c8cf23727L,0xadca0dfe38860L,0xb610a4bcd5ea4L,
+        0x05995bf21846aL },
+      { 0xf860b829dfa33L,0xae958fc18be90L,0x8630366caafe2L,0x411e9b3baf447L,
+        0x044c32ca2d483L } },
     /* 198 */
-    { { 0xa97f1e40ed80cl,0xb131d2ca82a74l,0xc2d6ad95f938cl,0xa54c53f2124b7l,
-        0x01f2162fb8082l },
-      { 0x67cc5720b173el,0x66085f12f97e4l,0xc9d65dc40e8a6l,0x07c98cebc20e4l,
-        0x08f1d402bc3e9l },
-      0 },
+    { { 0xa97f1e40ed80cL,0xb131d2ca82a74L,0xc2d6ad95f938cL,0xa54c53f2124b7L,
+        0x01f2162fb8082L },
+      { 0x67cc5720b173eL,0x66085f12f97e4L,0xc9d65dc40e8a6L,0x07c98cebc20e4L,
+        0x08f1d402bc3e9L } },
     /* 199 */
-    { { 0x92f9cfbc4058al,0xb6292f56704f5l,0xc1d8c57b15e14l,0xdbf9c55cfe37bl,
-        0x0b1980f43926el },
-      { 0x33e0932c76b09l,0x9d33b07f7898cl,0x63bb4611df527l,0x8e456f08ead48l,
-        0x02828ad9b3744l },
-      0 },
+    { { 0x92f9cfbc4058aL,0xb6292f56704f5L,0xc1d8c57b15e14L,0xdbf9c55cfe37bL,
+        0x0b1980f43926eL },
+      { 0x33e0932c76b09L,0x9d33b07f7898cL,0x63bb4611df527L,0x8e456f08ead48L,
+        0x02828ad9b3744L } },
     /* 200 */
-    { { 0x722c4c4cf4ac5l,0x3fdde64afb696l,0x0890832f5ac1al,0xb3900551baa2el,
-        0x04973f1275a14l },
-      { 0xd8335322eac5dl,0xf50bd9b568e59l,0x25883935e07eel,0x8ac7ab36720fal,
-        0x06dac8ed0db16l },
-      0 },
+    { { 0x722c4c4cf4ac5L,0x3fdde64afb696L,0x0890832f5ac1aL,0xb3900551baa2eL,
+        0x04973f1275a14L },
+      { 0xd8335322eac5dL,0xf50bd9b568e59L,0x25883935e07eeL,0x8ac7ab36720faL,
+        0x06dac8ed0db16L } },
     /* 201 */
-    { { 0x545aeeda835efl,0xd21d10ed51f7bl,0x3741b094aa113l,0xde4c035a65e01l,
-        0x04b23ef5920b9l },
-      { 0xbb6803c4c7341l,0x6d3f58bc37e82l,0x51e3ee8d45770l,0x9a4e73527863al,
-        0x04dd71534ddf4l },
-      0 },
+    { { 0x545aeeda835efL,0xd21d10ed51f7bL,0x3741b094aa113L,0xde4c035a65e01L,
+        0x04b23ef5920b9L },
+      { 0xbb6803c4c7341L,0x6d3f58bc37e82L,0x51e3ee8d45770L,0x9a4e73527863aL,
+        0x04dd71534ddf4L } },
     /* 202 */
-    { { 0x4467295476cd9l,0x2fe31a725bbf9l,0xc4b67e0648d07l,0x4dbb1441c8b8fl,
-        0x0fd3170002f4al },
-      { 0x43ff48995d0e1l,0xd10ef729aa1cbl,0x179898276e695l,0xf365e0d5f9764l,
-        0x014fac58c9569l },
-      0 },
+    { { 0x4467295476cd9L,0x2fe31a725bbf9L,0xc4b67e0648d07L,0x4dbb1441c8b8fL,
+        0x0fd3170002f4aL },
+      { 0x43ff48995d0e1L,0xd10ef729aa1cbL,0x179898276e695L,0xf365e0d5f9764L,
+        0x014fac58c9569L } },
     /* 203 */
-    { { 0xa0065f312ae18l,0xc0fcc93fc9ad9l,0xa7d284651958dl,0xda50d9a142408l,
-        0x0ed7c765136abl },
-      { 0x70f1a25d4abbcl,0xf3f1a113ea462l,0xb51952f9b5dd8l,0x9f53c609b0755l,
-        0x0fefcb7f74d2el },
-      0 },
+    { { 0xa0065f312ae18L,0xc0fcc93fc9ad9L,0xa7d284651958dL,0xda50d9a142408L,
+        0x0ed7c765136abL },
+      { 0x70f1a25d4abbcL,0xf3f1a113ea462L,0xb51952f9b5dd8L,0x9f53c609b0755L,
+        0x0fefcb7f74d2eL } },
     /* 204 */
-    { { 0x9497aba119185l,0x30aac45ba4bd0l,0xa521179d54e8cl,0xd80b492479deal,
-        0x01801a57e87e0l },
-      { 0xd3f8dfcafffb0l,0x0bae255240073l,0xb5fdfbc6cf33cl,0x1064781d763b5l,
-        0x09f8fc11e1eadl },
-      0 },
+    { { 0x9497aba119185L,0x30aac45ba4bd0L,0xa521179d54e8cL,0xd80b492479deaL,
+        0x01801a57e87e0L },
+      { 0xd3f8dfcafffb0L,0x0bae255240073L,0xb5fdfbc6cf33cL,0x1064781d763b5L,
+        0x09f8fc11e1eadL } },
     /* 205 */
-    { { 0x3a1715e69544cl,0x67f04b7813158l,0x78a4c320eaf85l,0x69a91e22a8fd2l,
-        0x0a9d3809d3d3al },
-      { 0xc2c2c59a2da3bl,0xf61895c847936l,0x3d5086938ccbcl,0x8ef75e65244e6l,
-        0x03006b9aee117l },
-      0 },
+    { { 0x3a1715e69544cL,0x67f04b7813158L,0x78a4c320eaf85L,0x69a91e22a8fd2L,
+        0x0a9d3809d3d3aL },
+      { 0xc2c2c59a2da3bL,0xf61895c847936L,0x3d5086938ccbcL,0x8ef75e65244e6L,
+        0x03006b9aee117L } },
     /* 206 */
-    { { 0x1f2b0c9eead28l,0x5d89f4dfbc0bbl,0x2ce89397eef63l,0xf761074757fdbl,
-        0x00ab85fd745f8l },
-      { 0xa7c933e5b4549l,0x5c97922f21ecdl,0x43b80404be2bbl,0x42c2261a1274bl,
-        0x0b122d67511e9l },
-      0 },
+    { { 0x1f2b0c9eead28L,0x5d89f4dfbc0bbL,0x2ce89397eef63L,0xf761074757fdbL,
+        0x00ab85fd745f8L },
+      { 0xa7c933e5b4549L,0x5c97922f21ecdL,0x43b80404be2bbL,0x42c2261a1274bL,
+        0x0b122d67511e9L } },
     /* 207 */
-    { { 0x607be66a5ae7al,0xfa76adcbe33bel,0xeb6e5c501e703l,0xbaecaf9043014l,
-        0x09f599dc1097dl },
-      { 0x5b7180ff250edl,0x74349a20dc6d7l,0x0b227a38eb915l,0x4b78425605a41l,
-        0x07d5528e08a29l },
-      0 },
+    { { 0x607be66a5ae7aL,0xfa76adcbe33beL,0xeb6e5c501e703L,0xbaecaf9043014L,
+        0x09f599dc1097dL },
+      { 0x5b7180ff250edL,0x74349a20dc6d7L,0x0b227a38eb915L,0x4b78425605a41L,
+        0x07d5528e08a29L } },
     /* 208 */
-    { { 0x58f6620c26defl,0xea582b2d1ef0fl,0x1ce3881025585l,0x1730fbe7d79b0l,
-        0x028ccea01303fl },
-      { 0xabcd179644ba5l,0xe806fff0b8d1dl,0x6b3e17b1fc643l,0x13bfa60a76fc6l,
-        0x0c18baf48a1d0l },
-      0 },
+    { { 0x58f6620c26defL,0xea582b2d1ef0fL,0x1ce3881025585L,0x1730fbe7d79b0L,
+        0x028ccea01303fL },
+      { 0xabcd179644ba5L,0xe806fff0b8d1dL,0x6b3e17b1fc643L,0x13bfa60a76fc6L,
+        0x0c18baf48a1d0L } },
     /* 209 */
-    { { 0x638c85dc4216dl,0x67206142ac34el,0x5f5064a00c010l,0x596bd453a1719l,
-        0x09def809db7a9l },
-      { 0x8642e67ab8d2cl,0x336237a2b641el,0x4c4218bb42404l,0x8ce57d506a6d6l,
-        0x00357f8b06880l },
-      0 },
+    { { 0x638c85dc4216dL,0x67206142ac34eL,0x5f5064a00c010L,0x596bd453a1719L,
+        0x09def809db7a9L },
+      { 0x8642e67ab8d2cL,0x336237a2b641eL,0x4c4218bb42404L,0x8ce57d506a6d6L,
+        0x00357f8b06880L } },
     /* 210 */
-    { { 0xdbe644cd2cc88l,0x8df0b8f39d8e9l,0xd30a0c8cc61c2l,0x98874a309874cl,
-        0x0e4a01add1b48l },
-      { 0x1eeacf57cd8f9l,0x3ebd594c482edl,0xbd2f7871b767dl,0xcc30a7295c717l,
-        0x0466d7d79ce10l },
-      0 },
+    { { 0xdbe644cd2cc88L,0x8df0b8f39d8e9L,0xd30a0c8cc61c2L,0x98874a309874cL,
+        0x0e4a01add1b48L },
+      { 0x1eeacf57cd8f9L,0x3ebd594c482edL,0xbd2f7871b767dL,0xcc30a7295c717L,
+        0x0466d7d79ce10L } },
     /* 211 */
-    { { 0x318929dada2c7l,0xc38f9aa27d47dl,0x20a59e14fa0a6l,0xad1a90e4fd288l,
-        0x0c672a522451el },
-      { 0x07cc85d86b655l,0x3bf9ad4af1306l,0x71172a6f0235dl,0x751399a086805l,
-        0x05e3d64faf2a6l },
-      0 },
+    { { 0x318929dada2c7L,0xc38f9aa27d47dL,0x20a59e14fa0a6L,0xad1a90e4fd288L,
+        0x0c672a522451eL },
+      { 0x07cc85d86b655L,0x3bf9ad4af1306L,0x71172a6f0235dL,0x751399a086805L,
+        0x05e3d64faf2a6L } },
     /* 212 */
-    { { 0x410c79b3b4416l,0x85eab26d99aa6l,0xb656a74cd8fcfl,0x42fc5ebff74adl,
-        0x06c8a7a95eb8el },
-      { 0x60ba7b02a63bdl,0x038b8f004710cl,0x12d90b06b2f23l,0xca918c6c37383l,
-        0x0348ae422ad82l },
-      0 },
+    { { 0x410c79b3b4416L,0x85eab26d99aa6L,0xb656a74cd8fcfL,0x42fc5ebff74adL,
+        0x06c8a7a95eb8eL },
+      { 0x60ba7b02a63bdL,0x038b8f004710cL,0x12d90b06b2f23L,0xca918c6c37383L,
+        0x0348ae422ad82L } },
     /* 213 */
-    { { 0x746635ccda2fbl,0xa18e0726d27f4l,0x92b1f2022accal,0x2d2e85adf7824l,
-        0x0c1074de0d9efl },
-      { 0x3ce44ae9a65b3l,0xac05d7151bfcfl,0xe6a9788fd71e4l,0x4ffcd4711f50cl,
-        0x0fbadfbdbc9e5l },
-      0 },
+    { { 0x746635ccda2fbL,0xa18e0726d27f4L,0x92b1f2022accaL,0x2d2e85adf7824L,
+        0x0c1074de0d9efL },
+      { 0x3ce44ae9a65b3L,0xac05d7151bfcfL,0xe6a9788fd71e4L,0x4ffcd4711f50cL,
+        0x0fbadfbdbc9e5L } },
     /* 214 */
-    { { 0x3f1cd20a99363l,0x8f6cf22775171l,0x4d359b2b91565l,0x6fcd968175cd2l,
-        0x0b7f976b48371l },
-      { 0x8e24d5d6dbf74l,0xfd71c3af36575l,0x243dfe38d23bal,0xc80548f477600l,
-        0x0f4d41b2ecafcl },
-      0 },
+    { { 0x3f1cd20a99363L,0x8f6cf22775171L,0x4d359b2b91565L,0x6fcd968175cd2L,
+        0x0b7f976b48371L },
+      { 0x8e24d5d6dbf74L,0xfd71c3af36575L,0x243dfe38d23baL,0xc80548f477600L,
+        0x0f4d41b2ecafcL } },
     /* 215 */
-    { { 0x1cf28fdabd48dl,0x3632c078a451fl,0x17146e9ce81bel,0x0f106ace29741l,
-        0x0180824eae016l },
-      { 0x7698b66e58358l,0x52ce6ca358038l,0xe41e6c5635687l,0x6d2582380e345l,
-        0x067e5f63983cfl },
-      0 },
+    { { 0x1cf28fdabd48dL,0x3632c078a451fL,0x17146e9ce81beL,0x0f106ace29741L,
+        0x0180824eae016L },
+      { 0x7698b66e58358L,0x52ce6ca358038L,0xe41e6c5635687L,0x6d2582380e345L,
+        0x067e5f63983cfL } },
     /* 216 */
-    { { 0xccb8dcf4899efl,0xf09ebb44c0f89l,0x2598ec9949015l,0x1fc6546f9276bl,
-        0x09fef789a04c1l },
-      { 0x67ecf53d2a071l,0x7fa4519b096d3l,0x11e2eefb10e1al,0x4e20ca6b3fb06l,
-        0x0bc80c181a99cl },
-      0 },
+    { { 0xccb8dcf4899efL,0xf09ebb44c0f89L,0x2598ec9949015L,0x1fc6546f9276bL,
+        0x09fef789a04c1L },
+      { 0x67ecf53d2a071L,0x7fa4519b096d3L,0x11e2eefb10e1aL,0x4e20ca6b3fb06L,
+        0x0bc80c181a99cL } },
     /* 217 */
-    { { 0x536f8e5eb82e6l,0xc7f56cb920972l,0x0b5da5e1a484fl,0xdf10c78e21715l,
-        0x049270e629f8cl },
-      { 0x9b7bbea6b50adl,0xc1a2388ffc1a3l,0x107197b9a0284l,0x2f7f5403eb178l,
-        0x0d2ee52f96137l },
-      0 },
+    { { 0x536f8e5eb82e6L,0xc7f56cb920972L,0x0b5da5e1a484fL,0xdf10c78e21715L,
+        0x049270e629f8cL },
+      { 0x9b7bbea6b50adL,0xc1a2388ffc1a3L,0x107197b9a0284L,0x2f7f5403eb178L,
+        0x0d2ee52f96137L } },
     /* 218 */
-    { { 0xcd28588e0362al,0xa78fa5d94dd37l,0x434a526442fa8l,0xb733aff836e5al,
-        0x0dfb478bee5abl },
-      { 0xf1ce7673eede6l,0xd42b5b2f04a91l,0x530da2fa5390al,0x473a5e66f7bf5l,
-        0x0d9a140b408dfl },
-      0 },
+    { { 0xcd28588e0362aL,0xa78fa5d94dd37L,0x434a526442fa8L,0xb733aff836e5aL,
+        0x0dfb478bee5abL },
+      { 0xf1ce7673eede6L,0xd42b5b2f04a91L,0x530da2fa5390aL,0x473a5e66f7bf5L,
+        0x0d9a140b408dfL } },
     /* 219 */
-    { { 0x221b56e8ea498l,0x293563ee090e0l,0x35d2ade623478l,0x4b1ae06b83913l,
-        0x0760c058d623fl },
-      { 0x9b58cc198aa79l,0xd2f07aba7f0b8l,0xde2556af74890l,0x04094e204110fl,
-        0x07141982d8f19l },
-      0 },
+    { { 0x221b56e8ea498L,0x293563ee090e0L,0x35d2ade623478L,0x4b1ae06b83913L,
+        0x0760c058d623fL },
+      { 0x9b58cc198aa79L,0xd2f07aba7f0b8L,0xde2556af74890L,0x04094e204110fL,
+        0x07141982d8f19L } },
     /* 220 */
-    { { 0xa0e334d4b0f45l,0x38392a94e16f0l,0x3c61d5ed9280bl,0x4e473af324c6bl,
-        0x03af9d1ce89d5l },
-      { 0xf798120930371l,0x4c21c17097fd8l,0xc42309beda266l,0x7dd60e9545dcdl,
-        0x0b1f815c37395l },
-      0 },
+    { { 0xa0e334d4b0f45L,0x38392a94e16f0L,0x3c61d5ed9280bL,0x4e473af324c6bL,
+        0x03af9d1ce89d5L },
+      { 0xf798120930371L,0x4c21c17097fd8L,0xc42309beda266L,0x7dd60e9545dcdL,
+        0x0b1f815c37395L } },
     /* 221 */
-    { { 0xaa78e89fec44al,0x473caa4caf84fl,0x1b6a624c8c2ael,0xf052691c807dcl,
-        0x0a41aed141543l },
-      { 0x353997d5ffe04l,0xdf625b6e20424l,0x78177758bacb2l,0x60ef85d660be8l,
-        0x0d6e9c1dd86fbl },
-      0 },
+    { { 0xaa78e89fec44aL,0x473caa4caf84fL,0x1b6a624c8c2aeL,0xf052691c807dcL,
+        0x0a41aed141543L },
+      { 0x353997d5ffe04L,0xdf625b6e20424L,0x78177758bacb2L,0x60ef85d660be8L,
+        0x0d6e9c1dd86fbL } },
     /* 222 */
-    { { 0x2e97ec6853264l,0xb7e2304a0b3aal,0x8eae9be771533l,0xf8c21b912bb7bl,
-        0x09c9c6e10ae9bl },
-      { 0x09a59e030b74cl,0x4d6a631e90a23l,0x49b79f24ed749l,0x61b689f44b23al,
-        0x0566bd59640fal },
-      0 },
+    { { 0x2e97ec6853264L,0xb7e2304a0b3aaL,0x8eae9be771533L,0xf8c21b912bb7bL,
+        0x09c9c6e10ae9bL },
+      { 0x09a59e030b74cL,0x4d6a631e90a23L,0x49b79f24ed749L,0x61b689f44b23aL,
+        0x0566bd59640faL } },
     /* 223 */
-    { { 0xc0118c18061f3l,0xd37c83fc70066l,0x7273245190b25l,0x345ef05fc8e02l,
-        0x0cf2c7390f525l },
-      { 0xbceb410eb30cfl,0xba0d77703aa09l,0x50ff255cfd2ebl,0x0979e842c43a1l,
-        0x002f517558aa2l },
-      0 },
+    { { 0xc0118c18061f3L,0xd37c83fc70066L,0x7273245190b25L,0x345ef05fc8e02L,
+        0x0cf2c7390f525L },
+      { 0xbceb410eb30cfL,0xba0d77703aa09L,0x50ff255cfd2ebL,0x0979e842c43a1L,
+        0x002f517558aa2L } },
     /* 224 */
-    { { 0xef794addb7d07l,0x4224455500396l,0x78aa3ce0b4fc7l,0xd97dfaff8eaccl,
-        0x014e9ada5e8d4l },
-      { 0x480a12f7079e2l,0xcde4b0800edaal,0x838157d45baa3l,0x9ae801765e2d7l,
-        0x0a0ad4fab8e9dl },
-      0 },
+    { { 0xef794addb7d07L,0x4224455500396L,0x78aa3ce0b4fc7L,0xd97dfaff8eaccL,
+        0x014e9ada5e8d4L },
+      { 0x480a12f7079e2L,0xcde4b0800edaaL,0x838157d45baa3L,0x9ae801765e2d7L,
+        0x0a0ad4fab8e9dL } },
     /* 225 */
-    { { 0xb76214a653618l,0x3c31eaaa5f0bfl,0x4949d5e187281l,0xed1e1553e7374l,
-        0x0bcd530b86e56l },
-      { 0xbe85332e9c47bl,0xfeb50059ab169l,0x92bfbb4dc2776l,0x341dcdba97611l,
-        0x0909283cf6979l },
-      0 },
+    { { 0xb76214a653618L,0x3c31eaaa5f0bfL,0x4949d5e187281L,0xed1e1553e7374L,
+        0x0bcd530b86e56L },
+      { 0xbe85332e9c47bL,0xfeb50059ab169L,0x92bfbb4dc2776L,0x341dcdba97611L,
+        0x0909283cf6979L } },
     /* 226 */
-    { { 0x0032476e81a13l,0x996217123967bl,0x32e19d69bee1al,0x549a08ed361bdl,
-        0x035eeb7c9ace1l },
-      { 0x0ae5a7e4e5bdcl,0xd3b6ceec6e128l,0xe266bc12dcd2cl,0xe86452e4224c6l,
-        0x09a8b2cf4448al },
-      0 },
+    { { 0x0032476e81a13L,0x996217123967bL,0x32e19d69bee1aL,0x549a08ed361bdL,
+        0x035eeb7c9ace1L },
+      { 0x0ae5a7e4e5bdcL,0xd3b6ceec6e128L,0xe266bc12dcd2cL,0xe86452e4224c6L,
+        0x09a8b2cf4448aL } },
     /* 227 */
-    { { 0x71bf209d03b59l,0xa3b65af2abf64l,0xbd5eec9c90e62l,0x1379ff7ff168el,
-        0x06bdb60f4d449l },
-      { 0xafebc8a55bc30l,0x1610097fe0dadl,0xc1e3bddc79eadl,0x08a942e197414l,
-        0x001ec3cfd94bal },
-      0 },
+    { { 0x71bf209d03b59L,0xa3b65af2abf64L,0xbd5eec9c90e62L,0x1379ff7ff168eL,
+        0x06bdb60f4d449L },
+      { 0xafebc8a55bc30L,0x1610097fe0dadL,0xc1e3bddc79eadL,0x08a942e197414L,
+        0x001ec3cfd94baL } },
     /* 228 */
-    { { 0x277ebdc9485c2l,0x7922fb10c7ba6l,0x0a28d8a48cc9al,0x64f64f61d60f7l,
-        0x0d1acb1c04754l },
-      { 0x902b126f36612l,0x4ee0618d8bd26l,0x08357ee59c3a4l,0x26c24df8a8133l,
-        0x07dcd079d4056l },
-      0 },
+    { { 0x277ebdc9485c2L,0x7922fb10c7ba6L,0x0a28d8a48cc9aL,0x64f64f61d60f7L,
+        0x0d1acb1c04754L },
+      { 0x902b126f36612L,0x4ee0618d8bd26L,0x08357ee59c3a4L,0x26c24df8a8133L,
+        0x07dcd079d4056L } },
     /* 229 */
-    { { 0x7d4d3f05a4b48l,0x52372307725cel,0x12a915aadcd29l,0x19b8d18f79718l,
-        0x00bf53589377dl },
-      { 0xcd95a6c68ea73l,0xca823a584d35el,0x473a723c7f3bbl,0x86fc9fb674c6fl,
-        0x0d28be4d9e166l },
-      0 },
+    { { 0x7d4d3f05a4b48L,0x52372307725ceL,0x12a915aadcd29L,0x19b8d18f79718L,
+        0x00bf53589377dL },
+      { 0xcd95a6c68ea73L,0xca823a584d35eL,0x473a723c7f3bbL,0x86fc9fb674c6fL,
+        0x0d28be4d9e166L } },
     /* 230 */
-    { { 0xb990638fa8e4bl,0x6e893fd8fc5d2l,0x36fb6fc559f18l,0x88ce3a6de2aa4l,
-        0x0d76007aa510fl },
-      { 0x0aab6523a4988l,0x4474dd02732d1l,0x3407278b455cfl,0xbb017f467082al,
-        0x0f2b52f68b303l },
-      0 },
+    { { 0xb990638fa8e4bL,0x6e893fd8fc5d2L,0x36fb6fc559f18L,0x88ce3a6de2aa4L,
+        0x0d76007aa510fL },
+      { 0x0aab6523a4988L,0x4474dd02732d1L,0x3407278b455cfL,0xbb017f467082aL,
+        0x0f2b52f68b303L } },
     /* 231 */
-    { { 0x7eafa9835b4cal,0xfcbb669cbc0d5l,0x66431982d2232l,0xed3a8eeeb680cl,
-        0x0d8dbe98ecc5al },
-      { 0x9be3fc5a02709l,0xe5f5ba1fa8cbal,0x10ea85230be68l,0x9705febd43cdfl,
-        0x0e01593a3ee55l },
-      0 },
+    { { 0x7eafa9835b4caL,0xfcbb669cbc0d5L,0x66431982d2232L,0xed3a8eeeb680cL,
+        0x0d8dbe98ecc5aL },
+      { 0x9be3fc5a02709L,0xe5f5ba1fa8cbaL,0x10ea85230be68L,0x9705febd43cdfL,
+        0x0e01593a3ee55L } },
     /* 232 */
-    { { 0x5af50ea75a0a6l,0xac57858033d3el,0x0176406512226l,0xef066fe6d50fdl,
-        0x0afec07b1aeb8l },
-      { 0x9956780bb0a31l,0xcc37309aae7fbl,0x1abf3896f1af3l,0xbfdd9153a15a0l,
-        0x0a71b93546e2dl },
-      0 },
+    { { 0x5af50ea75a0a6L,0xac57858033d3eL,0x0176406512226L,0xef066fe6d50fdL,
+        0x0afec07b1aeb8L },
+      { 0x9956780bb0a31L,0xcc37309aae7fbL,0x1abf3896f1af3L,0xbfdd9153a15a0L,
+        0x0a71b93546e2dL } },
     /* 233 */
-    { { 0xe12e018f593d2l,0x28a078122bbf8l,0xba4f2add1a904l,0x23d9150505db0l,
-        0x053a2005c6285l },
-      { 0x8b639e7f2b935l,0x5ac182961a07cl,0x518ca2c2bff97l,0x8e3d86bceea77l,
-        0x0bf47d19b3d58l },
-      0 },
+    { { 0xe12e018f593d2L,0x28a078122bbf8L,0xba4f2add1a904L,0x23d9150505db0L,
+        0x053a2005c6285L },
+      { 0x8b639e7f2b935L,0x5ac182961a07cL,0x518ca2c2bff97L,0x8e3d86bceea77L,
+        0x0bf47d19b3d58L } },
     /* 234 */
-    { { 0x967a7dd7665d5l,0x572f2f4de5672l,0x0d4903f4e3030l,0xa1b6144005ae8l,
-        0x0001c2c7f39c9l },
-      { 0xa801469efc6d6l,0xaa7bc7a724143l,0x78150a4c810bdl,0xb99b5f65670bal,
-        0x0fdadf8e786ffl },
-      0 },
+    { { 0x967a7dd7665d5L,0x572f2f4de5672L,0x0d4903f4e3030L,0xa1b6144005ae8L,
+        0x0001c2c7f39c9L },
+      { 0xa801469efc6d6L,0xaa7bc7a724143L,0x78150a4c810bdL,0xb99b5f65670baL,
+        0x0fdadf8e786ffL } },
     /* 235 */
-    { { 0x8cb88ffc00785l,0x913b48eb67fd3l,0xf368fbc77fa75l,0x3c940454d055bl,
-        0x03a838e4d5aa4l },
-      { 0x663293e97bb9al,0x63441d94d9561l,0xadb2a839eb933l,0x1da3515591a60l,
-        0x03cdb8257873el },
-      0 },
+    { { 0x8cb88ffc00785L,0x913b48eb67fd3L,0xf368fbc77fa75L,0x3c940454d055bL,
+        0x03a838e4d5aa4L },
+      { 0x663293e97bb9aL,0x63441d94d9561L,0xadb2a839eb933L,0x1da3515591a60L,
+        0x03cdb8257873eL } },
     /* 236 */
-    { { 0x140a97de77eabl,0x0d41648109137l,0xeb1d0dff7e1c5l,0x7fba762dcad2cl,
-        0x05a60cc89f1f5l },
-      { 0x3638240d45673l,0x195913c65580bl,0xd64b7411b82bel,0x8fc0057284b8dl,
-        0x0922ff56fdbfdl },
-      0 },
+    { { 0x140a97de77eabL,0x0d41648109137L,0xeb1d0dff7e1c5L,0x7fba762dcad2cL,
+        0x05a60cc89f1f5L },
+      { 0x3638240d45673L,0x195913c65580bL,0xd64b7411b82beL,0x8fc0057284b8dL,
+        0x0922ff56fdbfdL } },
     /* 237 */
-    { { 0x65deec9a129a1l,0x57cc284e041b2l,0xebfbe3ca5b1cel,0xcd6204380c46cl,
-        0x072919a7df6c5l },
-      { 0xf453a8fb90f9al,0x0b88e4031b298l,0x96f1856d719c0l,0x089ae32c0e777l,
-        0x05e7917803624l },
-      0 },
+    { { 0x65deec9a129a1L,0x57cc284e041b2L,0xebfbe3ca5b1ceL,0xcd6204380c46cL,
+        0x072919a7df6c5L },
+      { 0xf453a8fb90f9aL,0x0b88e4031b298L,0x96f1856d719c0L,0x089ae32c0e777L,
+        0x05e7917803624L } },
     /* 238 */
-    { { 0x6ec557f63cdfbl,0x71f1cae4fd5c1l,0x60597ca8e6a35l,0x2fabfce26bea5l,
-        0x04e0a5371e24cl },
-      { 0xa40d3a5765357l,0x440d73a2b4276l,0x1d11a323c89afl,0x04eeb8f370ae4l,
-        0x0f5ff7818d566l },
-      0 },
+    { { 0x6ec557f63cdfbL,0x71f1cae4fd5c1L,0x60597ca8e6a35L,0x2fabfce26bea5L,
+        0x04e0a5371e24cL },
+      { 0xa40d3a5765357L,0x440d73a2b4276L,0x1d11a323c89afL,0x04eeb8f370ae4L,
+        0x0f5ff7818d566L } },
     /* 239 */
-    { { 0x3e3fe1a09df21l,0x8ee66e8e47fbfl,0x9c8901526d5d2l,0x5e642096bd0a2l,
-        0x0e41df0e9533fl },
-      { 0xfda40b3ba9e3fl,0xeb2604d895305l,0xf0367c7f2340cl,0x155f0866e1927l,
-        0x08edd7d6eac4fl },
-      0 },
+    { { 0x3e3fe1a09df21L,0x8ee66e8e47fbfL,0x9c8901526d5d2L,0x5e642096bd0a2L,
+        0x0e41df0e9533fL },
+      { 0xfda40b3ba9e3fL,0xeb2604d895305L,0xf0367c7f2340cL,0x155f0866e1927L,
+        0x08edd7d6eac4fL } },
     /* 240 */
-    { { 0x1dc0e0bfc8ff3l,0x2be936f42fc9al,0xca381ef14efd8l,0xee9667016f7ccl,
-        0x01432c1caed8al },
-      { 0x8482970b23c26l,0x730735b273ec6l,0xaef0f5aa64fe8l,0xd2c6e389f6e5el,
-        0x0caef480b5ac8l },
-      0 },
+    { { 0x1dc0e0bfc8ff3L,0x2be936f42fc9aL,0xca381ef14efd8L,0xee9667016f7ccL,
+        0x01432c1caed8aL },
+      { 0x8482970b23c26L,0x730735b273ec6L,0xaef0f5aa64fe8L,0xd2c6e389f6e5eL,
+        0x0caef480b5ac8L } },
     /* 241 */
-    { { 0x5c97875315922l,0x713063cca5524l,0x64ef2cbd82951l,0xe236f3ce60d0bl,
-        0x0d0ba177e8efal },
-      { 0x9ae8fb1b3af60l,0xe53d2da20e53al,0xf9eef281a796al,0xae1601d63605dl,
-        0x0f31c957c1c54l },
-      0 },
+    { { 0x5c97875315922L,0x713063cca5524L,0x64ef2cbd82951L,0xe236f3ce60d0bL,
+        0x0d0ba177e8efaL },
+      { 0x9ae8fb1b3af60L,0xe53d2da20e53aL,0xf9eef281a796aL,0xae1601d63605dL,
+        0x0f31c957c1c54L } },
     /* 242 */
-    { { 0x58d5249cc4597l,0xb0bae0a028c0fl,0x34a814adc5015l,0x7c3aefc5fc557l,
-        0x0013404cb96e1l },
-      { 0xe2585c9a824bfl,0x5e001eaed7b29l,0x1ef68acd59318l,0x3e6c8d6ee6826l,
-        0x06f377c4b9193l },
-      0 },
+    { { 0x58d5249cc4597L,0xb0bae0a028c0fL,0x34a814adc5015L,0x7c3aefc5fc557L,
+        0x0013404cb96e1L },
+      { 0xe2585c9a824bfL,0x5e001eaed7b29L,0x1ef68acd59318L,0x3e6c8d6ee6826L,
+        0x06f377c4b9193L } },
     /* 243 */
-    { { 0x3bad1a8333fd2l,0x025a2a95b89f9l,0xaf75acea89302l,0x9506211e5037el,
-        0x06dba3e4ed2d0l },
-      { 0xef98cd04399cdl,0x6ee6b73adea48l,0x17ecaf31811c6l,0xf4a772f60752cl,
-        0x0f13cf3423becl },
-      0 },
+    { { 0x3bad1a8333fd2L,0x025a2a95b89f9L,0xaf75acea89302L,0x9506211e5037eL,
+        0x06dba3e4ed2d0L },
+      { 0xef98cd04399cdL,0x6ee6b73adea48L,0x17ecaf31811c6L,0xf4a772f60752cL,
+        0x0f13cf3423becL } },
     /* 244 */
-    { { 0xb9ec0a919e2ebl,0x95f62c0f68ceel,0xaba229983a9a1l,0xbad3cfba3bb67l,
-        0x0c83fa9a9274bl },
-      { 0xd1b0b62fa1ce0l,0xf53418efbf0d7l,0x2706f04e58b60l,0x2683bfa8ef9e5l,
-        0x0b49d70f45d70l },
-      0 },
+    { { 0xb9ec0a919e2ebL,0x95f62c0f68ceeL,0xaba229983a9a1L,0xbad3cfba3bb67L,
+        0x0c83fa9a9274bL },
+      { 0xd1b0b62fa1ce0L,0xf53418efbf0d7L,0x2706f04e58b60L,0x2683bfa8ef9e5L,
+        0x0b49d70f45d70L } },
     /* 245 */
-    { { 0xc7510fad5513bl,0xecb1751e2d914l,0x9fb9d5905f32el,0xf1cf6d850418dl,
-        0x059cfadbb0c30l },
-      { 0x7ac2355cb7fd6l,0xb8820426a3e16l,0x0a78864249367l,0x4b67eaeec58c9l,
-        0x05babf362354al },
-      0 },
+    { { 0xc7510fad5513bL,0xecb1751e2d914L,0x9fb9d5905f32eL,0xf1cf6d850418dL,
+        0x059cfadbb0c30L },
+      { 0x7ac2355cb7fd6L,0xb8820426a3e16L,0x0a78864249367L,0x4b67eaeec58c9L,
+        0x05babf362354aL } },
     /* 246 */
-    { { 0x981d1ee424865l,0x78f2e5577f37cl,0x9e0c0588b0028l,0xc8f0702970f1bl,
-        0x06188c6a79026l },
-      { 0x9a19bd0f244dal,0x5cfb08087306fl,0xf2136371eccedl,0xb9d935470f9b9l,
-        0x0993fe475df50l },
-      0 },
+    { { 0x981d1ee424865L,0x78f2e5577f37cL,0x9e0c0588b0028L,0xc8f0702970f1bL,
+        0x06188c6a79026L },
+      { 0x9a19bd0f244daL,0x5cfb08087306fL,0xf2136371eccedL,0xb9d935470f9b9L,
+        0x0993fe475df50L } },
     /* 247 */
-    { { 0x31cdf9b2c3609l,0xc02c46d4ea68el,0xa77510184eb19l,0x616b7ac9ec1a9l,
-        0x081f764664c80l },
-      { 0xc2a5a75fbe978l,0xd3f183b3561d7l,0x01dd2bf6743fel,0x060d838d1f045l,
-        0x0564a812a5fe9l },
-      0 },
+    { { 0x31cdf9b2c3609L,0xc02c46d4ea68eL,0xa77510184eb19L,0x616b7ac9ec1a9L,
+        0x081f764664c80L },
+      { 0xc2a5a75fbe978L,0xd3f183b3561d7L,0x01dd2bf6743feL,0x060d838d1f045L,
+        0x0564a812a5fe9L } },
     /* 248 */
-    { { 0xa64f4fa817d1dl,0x44bea82e0f7a5l,0xd57f9aa55f968l,0x1d6cb5ff5a0fcl,
-        0x0226bf3cf00e5l },
-      { 0x1a9f92f2833cfl,0x5a4f4f89a8d6dl,0xf3f7f7720a0a3l,0x783611536c498l,
-        0x068779f47ff25l },
-      0 },
+    { { 0xa64f4fa817d1dL,0x44bea82e0f7a5L,0xd57f9aa55f968L,0x1d6cb5ff5a0fcL,
+        0x0226bf3cf00e5L },
+      { 0x1a9f92f2833cfL,0x5a4f4f89a8d6dL,0xf3f7f7720a0a3L,0x783611536c498L,
+        0x068779f47ff25L } },
     /* 249 */
-    { { 0x0c1c173043d08l,0x741fc020fa79bl,0xa6d26d0a54467l,0x2e0bd3767e289l,
-        0x097bcb0d1eb09l },
-      { 0x6eaa8f32ed3c3l,0x51b281bc482abl,0xfa178f3c8a4f1l,0x46554d1bf4f3bl,
-        0x0a872ffe80a78l },
-      0 },
+    { { 0x0c1c173043d08L,0x741fc020fa79bL,0xa6d26d0a54467L,0x2e0bd3767e289L,
+        0x097bcb0d1eb09L },
+      { 0x6eaa8f32ed3c3L,0x51b281bc482abL,0xfa178f3c8a4f1L,0x46554d1bf4f3bL,
+        0x0a872ffe80a78L } },
     /* 250 */
-    { { 0xb7935a32b2086l,0x0e8160f486b1al,0xb6ae6bee1eb71l,0xa36a9bd0cd913l,
-        0x002812bfcb732l },
-      { 0xfd7cacf605318l,0x50fdfd6d1da63l,0x102d619646e5dl,0x96afa1d683982l,
-        0x007391cc9fe53l },
-      0 },
+    { { 0xb7935a32b2086L,0x0e8160f486b1aL,0xb6ae6bee1eb71L,0xa36a9bd0cd913L,
+        0x002812bfcb732L },
+      { 0xfd7cacf605318L,0x50fdfd6d1da63L,0x102d619646e5dL,0x96afa1d683982L,
+        0x007391cc9fe53L } },
     /* 251 */
-    { { 0x157f08b80d02bl,0xd162877f7fc50l,0x8d542ae6b8333l,0x2a087aca1af87l,
-        0x0355d2adc7e6dl },
-      { 0xf335a287386e1l,0x94f8e43275b41l,0x79989eafd272al,0x3a79286ca2cdel,
-        0x03dc2b1e37c2al },
-      0 },
+    { { 0x157f08b80d02bL,0xd162877f7fc50L,0x8d542ae6b8333L,0x2a087aca1af87L,
+        0x0355d2adc7e6dL },
+      { 0xf335a287386e1L,0x94f8e43275b41L,0x79989eafd272aL,0x3a79286ca2cdeL,
+        0x03dc2b1e37c2aL } },
     /* 252 */
-    { { 0x9d21c04581352l,0x25376782bed68l,0xfed701f0a00c8l,0x846b203bd5909l,
-        0x0c47869103ccdl },
-      { 0xa770824c768edl,0x026841f6575dbl,0xaccce0e72feeal,0x4d3273313ed56l,
-        0x0ccc42968d5bbl },
-      0 },
+    { { 0x9d21c04581352L,0x25376782bed68L,0xfed701f0a00c8L,0x846b203bd5909L,
+        0x0c47869103ccdL },
+      { 0xa770824c768edL,0x026841f6575dbL,0xaccce0e72feeaL,0x4d3273313ed56L,
+        0x0ccc42968d5bbL } },
     /* 253 */
-    { { 0x50de13d7620b9l,0x8a5992a56a94el,0x75487c9d89a5cl,0x71cfdc0076406l,
-        0x0e147eb42aa48l },
-      { 0xab4eeacf3ae46l,0xfb50350fbe274l,0x8c840eafd4936l,0x96e3df2afe474l,
-        0x0239ac047080el },
-      0 },
+    { { 0x50de13d7620b9L,0x8a5992a56a94eL,0x75487c9d89a5cL,0x71cfdc0076406L,
+        0x0e147eb42aa48L },
+      { 0xab4eeacf3ae46L,0xfb50350fbe274L,0x8c840eafd4936L,0x96e3df2afe474L,
+        0x0239ac047080eL } },
     /* 254 */
-    { { 0xd1f352bfee8d4l,0xcffa7b0fec481l,0xce9af3cce80b5l,0xe59d105c4c9e2l,
-        0x0c55fa1a3f5f7l },
-      { 0x6f14e8257c227l,0x3f342be00b318l,0xa904fb2c5b165l,0xb69909afc998al,
-        0x0094cd99cd4f4l },
-      0 },
+    { { 0xd1f352bfee8d4L,0xcffa7b0fec481L,0xce9af3cce80b5L,0xe59d105c4c9e2L,
+        0x0c55fa1a3f5f7L },
+      { 0x6f14e8257c227L,0x3f342be00b318L,0xa904fb2c5b165L,0xb69909afc998aL,
+        0x0094cd99cd4f4L } },
     /* 255 */
-    { { 0x81c84d703bebal,0x5032ceb2918a9l,0x3bd49ec8631d1l,0xad33a445f2c9el,
-        0x0b90a30b642abl },
-      { 0x5404fb4a5abf9l,0xc375db7603b46l,0xa35d89f004750l,0x24f76f9a42cccl,
-        0x0019f8b9a1b79l },
-      0 },
+    { { 0x81c84d703bebaL,0x5032ceb2918a9L,0x3bd49ec8631d1L,0xad33a445f2c9eL,
+        0x0b90a30b642abL },
+      { 0x5404fb4a5abf9L,0xc375db7603b46L,0xa35d89f004750L,0x24f76f9a42cccL,
+        0x0019f8b9a1b79L } },
 };
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * r     Resulting point.
  * k     Scalar to multiply by.
@@ -10320,7 +15918,7 @@
  * heap  Heap to use for allocation.
  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  */
-static int sp_256_ecc_mulmod_base_5(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
         int map, void* heap)
 {
     return sp_256_ecc_mulmod_stripe_5(r, &p256_base, p256_table,
@@ -10330,7 +15928,7 @@
 #endif
 
 /* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
+ * If map is true then convert result to affine coordinates.
  *
  * km    Scalar to multiply by.
  * r     Resulting point.
@@ -10340,23 +15938,22 @@
  */
 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[5];
 #endif
-    sp_point* point;
+    sp_point_256* point;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
+
+    err = sp_256_point_new_5(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     k = kd;
@@ -10364,26 +15961,24 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 5, km);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_5(point, k, map, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_5(point, k, map, heap);
     }
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_5(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
+    }
+#endif
+    sp_256_point_free_5(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
 /* Returns 1 if the number of zero.
  * Implementation is constant time.
  *
@@ -10395,7 +15990,7 @@
     return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0;
 }
 
-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 /* Add 1 to a. (a = a + 1)
  *
  * r  A single precision integer.
@@ -10407,33 +16002,38 @@
     sp_256_norm_5(a);
 }
 
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
 
     r[0] = 0;
     for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 44) {
-            r[j] &= 0xfffffffffffffl;
-            s = 52 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 44U) {
+            r[j] &= 0xfffffffffffffL;
+            s = 52U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
         r[j] = 0;
+    }
 }
 
 /* Generates a scalar that is in the range 1..order-1.
@@ -10451,7 +16051,7 @@
     do {
         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
         if (err == 0) {
-            sp_256_from_bin(k, 5, buf, sizeof(buf));
+            sp_256_from_bin(k, 5, buf, (int)sizeof(buf));
             if (sp_256_cmp_5(k, p256_order2) < 0) {
                 sp_256_add_one_5(k);
                 break;
@@ -10474,87 +16074,80 @@
  */
 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[5];
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point inf;
-#endif
-#endif
-    sp_point* point;
+    sp_point_256 inf;
+#endif
+#endif
+    sp_point_256* point;
     sp_digit* k = NULL;
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point* infinity;
+    sp_point_256* infinity;
 #endif
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
+    err = sp_256_point_new_5(heap, p, point);
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, inf, infinity);
-#endif
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     k = kd;
 #endif
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_ecc_gen_k_5(rng, k);
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, NULL);
-        else
-#endif
+    }
+    if (err == MP_OKAY) {
             err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
     }
 
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            err = sp_256_ecc_mulmod_avx2_5(infinity, point, p256_order, 1,
-                                                                          NULL);
-        }
-        else
-#endif
             err = sp_256_ecc_mulmod_5(infinity, point, p256_order, 1, NULL);
     }
     if (err == MP_OKAY) {
-        if (!sp_256_iszero_5(point->x) || !sp_256_iszero_5(point->y))
+        if ((sp_256_iszero_5(point->x) == 0) || (sp_256_iszero_5(point->y) == 0)) {
             err = ECC_INF_E;
-    }
-#endif
-
-    if (err == MP_OKAY)
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(k, priv);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_point_to_ecc_point_5(point, pub);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_ecc_point_free(infinity, 1, heap);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    sp_256_point_free_5(infinity, 1, heap);
+#endif
+    sp_256_point_free_5(point, 1, heap);
 
     return err;
 }
 
 #ifdef HAVE_ECC_DHE
-/* Write r as big endian to byte aray.
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
  *
  * r  A single precision integer.
@@ -10566,25 +16159,32 @@
 
     for (i=0; i<4; i++) {
         r[i+1] += r[i] >> 52;
-        r[i] &= 0xfffffffffffffl;
+        r[i] &= 0xfffffffffffffL;
     }
     j = 256 / 8 - 1;
     a[j] = 0;
     for (i=0; i<5 && j>=0; i++) {
         b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
             break;
+        }
         while (b < 52) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
         }
         s = 8 - (b - 52);
-        if (j >= 0)
+        if (j >= 0) {
             a[j] = 0;
-        if (s != 0)
+        }
+        if (s != 0) {
             j++;
+        }
     }
 }
 
@@ -10603,25 +16203,25 @@
 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
                           word32* outLen, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
     sp_digit kd[5];
 #endif
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit* k = NULL;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 32)
+
+    if (*outLen < 32U) {
         err = BUFFER_E;
-
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
         if (k == NULL)
             err = MEMORY_E;
     }
@@ -10632,11 +16232,6 @@
     if (err == MP_OKAY) {
         sp_256_from_mp(k, 5, priv);
         sp_256_point_from_ecc_point_5(point, pub);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_5(point, point, k, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_5(point, point, k, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -10644,19 +16239,18 @@
         *outLen = 32;
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
         XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
+    }
+#endif
+    sp_256_point_free_5(point, 0, heap);
 
     return err;
 }
 #endif /* HAVE_ECC_DHE */
 
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-#ifdef HAVE_INTEL_AVX2
-#endif /* HAVE_INTEL_AVX2 */
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 /* Multiply a by scalar b into r. (r = a * b)
@@ -10666,7 +16260,7 @@
  * b  A scalar.
  */
 SP_NOINLINE static void sp_256_mul_d_5(sp_digit* r, const sp_digit* a,
-    const sp_digit b)
+    sp_digit b)
 {
 #ifdef WOLFSSL_SP_SMALL
     int128_t tb = b;
@@ -10675,7 +16269,7 @@
 
     for (i = 0; i < 5; i++) {
         t += tb * a[i];
-        r[i] = t & 0xfffffffffffffl;
+        r[i] = t & 0xfffffffffffffL;
         t >>= 52;
     }
     r[5] = (sp_digit)t;
@@ -10688,31 +16282,79 @@
     t[ 2] = tb * a[ 2];
     t[ 3] = tb * a[ 3];
     t[ 4] = tb * a[ 4];
-    r[ 0] =                           (t[ 0] & 0xfffffffffffffl);
-    r[ 1] = (sp_digit)(t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffl);
-    r[ 2] = (sp_digit)(t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffl);
-    r[ 3] = (sp_digit)(t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffl);
-    r[ 4] = (sp_digit)(t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffl);
+    r[ 0] =                           (t[ 0] & 0xfffffffffffffL);
+    r[ 1] = (sp_digit)(t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL);
+    r[ 2] = (sp_digit)(t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL);
+    r[ 3] = (sp_digit)(t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL);
+    r[ 4] = (sp_digit)(t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL);
     r[ 5] = (sp_digit)(t[ 4] >> 52);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_256_div_word_5(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 52 bits from d1 and top 11 bits from d0. */
+    d = (d1 << 11) | (d0 >> 41);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 12 bits in r */
+    /* Next 11 bits from d0. */
+    r <<= 11;
+    d <<= 11;
+    d |= (d0 >> 30) & ((1 << 11) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 23 bits in r */
+    /* Next 11 bits from d0. */
+    r <<= 11;
+    d <<= 11;
+    d |= (d0 >> 19) & ((1 << 11) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 34 bits in r */
+    /* Next 11 bits from d0. */
+    r <<= 11;
+    d <<= 11;
+    d |= (d0 >> 8) & ((1 << 11) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 45 bits in r */
+    /* Remaining 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= d0 & ((1 << 8) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
 /* Divide d in a and put remainder into r (m*d + r = a)
  * m is not calculated as it is not needed at this time.
  *
- * a  Nmber to be divided.
+ * a  Number to be divided.
  * d  Number to divide with.
  * m  Multiplier result.
  * r  Remainder from the division.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_256_div_5(sp_digit* a, sp_digit* d, sp_digit* m,
+static int sp_256_div_5(const sp_digit* a, const sp_digit* d, sp_digit* m,
         sp_digit* r)
 {
     int i;
+#ifndef WOLFSSL_SP_DIV_64
     int128_t d1;
-    sp_digit div, r1;
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* td;
 #else
     sp_digit t1d[10], t2d[5 + 1];
@@ -10721,62 +16363,70 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    td = XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (td != NULL) {
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = td;
         t2 = td + 2 * 5;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    (void)m;
-
-    if (err == MP_OKAY) {
-        div = d[4];
-        XMEMCPY(t1, a, sizeof(*t1) * 2 * 5);
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[4];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 5U);
         for (i=4; i>=0; i--) {
             t1[5 + i] += t1[5 + i - 1] >> 52;
-            t1[5 + i - 1] &= 0xfffffffffffffl;
+            t1[5 + i - 1] &= 0xfffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
             d1 = t1[5 + i];
             d1 <<= 52;
             d1 += t1[5 + i - 1];
-            r1 = (sp_digit)(d1 / div);
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_256_div_word_5(t1[5 + i], t1[5 + i - 1], dv);
+#endif
 
             sp_256_mul_d_5(t2, d, r1);
-            sp_256_sub_5(&t1[i], &t1[i], t2);
+            (void)sp_256_sub_5(&t1[i], &t1[i], t2);
             t1[5 + i] -= t2[5];
             t1[5 + i] += t1[5 + i - 1] >> 52;
-            t1[5 + i - 1] &= 0xfffffffffffffl;
-            r1 = (((-t1[5 + i]) << 52) - t1[5 + i - 1]) / div;
+            t1[5 + i - 1] &= 0xfffffffffffffL;
+            r1 = (((-t1[5 + i]) << 52) - t1[5 + i - 1]) / dv;
             r1++;
             sp_256_mul_d_5(t2, d, r1);
-            sp_256_add_5(&t1[i], &t1[i], t2);
+            (void)sp_256_add_5(&t1[i], &t1[i], t2);
             t1[5 + i] += t1[5 + i - 1] >> 52;
-            t1[5 + i - 1] &= 0xfffffffffffffl;
+            t1[5 + i - 1] &= 0xfffffffffffffL;
         }
         t1[5 - 1] += t1[5 - 2] >> 52;
-        t1[5 - 2] &= 0xfffffffffffffl;
-        d1 = t1[5 - 1];
-        r1 = (sp_digit)(d1 / div);
+        t1[5 - 2] &= 0xfffffffffffffL;
+        r1 = t1[5 - 1] / dv;
 
         sp_256_mul_d_5(t2, d, r1);
-        sp_256_sub_5(t1, t1, t2);
-        XMEMCPY(r, t1, sizeof(*r) * 2 * 5);
+        (void)sp_256_sub_5(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 5U);
         for (i=0; i<3; i++) {
             r[i+1] += r[i] >> 52;
-            r[i] &= 0xfffffffffffffl;
-        }
-        sp_256_cond_add_5(r, r, d, 0 - (r[4] < 0));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (td != NULL)
+            r[i] &= 0xfffffffffffffL;
+        }
+        sp_256_cond_add_5(r, r, d, 0 - ((r[4] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
 #endif
 
     return err;
@@ -10789,7 +16439,7 @@
  * m  A single precision number that is the modulus to reduce with.
  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
  */
-static int sp_256_mod_5(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_256_mod_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
     return sp_256_div_5(a, m, NULL, r);
 }
@@ -10798,14 +16448,14 @@
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 #ifdef WOLFSSL_SP_SMALL
 /* Order-2 for the P256 curve. */
-static const uint64_t p256_order_2[4] = {
-    0xf3b9cac2fc63254f,0xbce6faada7179e84,0xffffffffffffffff,
-    0xffffffff00000000
+static const uint64_t p256_order_minus_2[4] = {
+    0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU,
+    0xffffffff00000000U
 };
 #else
 /* The low half of the order-2 of the P256 curve. */
 static const uint64_t p256_order_low[2] = {
-    0xf3b9cac2fc63254f,0xbce6faada7179e84
+    0xf3b9cac2fc63254fU,0xbce6faada7179e84U
 };
 #endif /* WOLFSSL_SP_SMALL */
 
@@ -10815,10 +16465,10 @@
  * a  First operand of the multiplication.
  * b  Second operand of the multiplication.
  */
-static void sp_256_mont_mul_order_5(sp_digit* r, sp_digit* a, sp_digit* b)
+static void sp_256_mont_mul_order_5(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
     sp_256_mul_5(r, a, b);
-    sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
 }
 
 /* Square number mod the order of P256 curve. (r = a * a mod order)
@@ -10826,10 +16476,10 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_order_5(sp_digit* r, sp_digit* a)
+static void sp_256_mont_sqr_order_5(sp_digit* r, const sp_digit* a)
 {
     sp_256_sqr_5(r, a);
-    sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
+    sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
 }
 
 #ifndef WOLFSSL_SP_SMALL
@@ -10839,13 +16489,14 @@
  * r  Result of the squaring.
  * a  Number to square.
  */
-static void sp_256_mont_sqr_n_order_5(sp_digit* r, sp_digit* a, int n)
+static void sp_256_mont_sqr_n_order_5(sp_digit* r, const sp_digit* a, int n)
 {
     int i;
 
     sp_256_mont_sqr_order_5(r, a);
-    for (i=1; i<n; i++)
+    for (i=1; i<n; i++) {
         sp_256_mont_sqr_order_5(r, r);
+    }
 }
 #endif /* !WOLFSSL_SP_SMALL */
 
@@ -10856,7 +16507,7 @@
  * a   Number to invert.
  * td  Temporary data.
  */
-static void sp_256_mont_inv_order_5(sp_digit* r, sp_digit* a,
+static void sp_256_mont_inv_order_5(sp_digit* r, const sp_digit* a,
         sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
@@ -10866,10 +16517,11 @@
     XMEMCPY(t, a, sizeof(sp_digit) * 5);
     for (i=254; i>=0; i--) {
         sp_256_mont_sqr_order_5(t, t);
-        if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
+        if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_5(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 5);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 5U);
 #else
     sp_digit* t = td;
     sp_digit* t2 = td + 2 * 5;
@@ -10907,8 +16559,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
     for (i=127; i>=112; i--) {
         sp_256_mont_sqr_order_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_5(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
     sp_256_mont_sqr_n_order_5(t2, t2, 4);
@@ -10916,8 +16569,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
     for (i=107; i>=64; i--) {
         sp_256_mont_sqr_order_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_5(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
     sp_256_mont_sqr_n_order_5(t2, t2, 4);
@@ -10925,8 +16579,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
     for (i=59; i>=32; i--) {
         sp_256_mont_sqr_order_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_5(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
     sp_256_mont_sqr_n_order_5(t2, t2, 4);
@@ -10934,8 +16589,9 @@
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
     for (i=27; i>=0; i--) {
         sp_256_mont_sqr_order_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
+        if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
             sp_256_mont_mul_order_5(t2, t2, a);
+        }
     }
     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
     sp_256_mont_sqr_n_order_5(t2, t2, 4);
@@ -10944,143 +16600,6 @@
 #endif /* WOLFSSL_SP_SMALL */
 }
 
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
- *
- * r  Result of the multiplication.
- * a  First operand of the multiplication.
- * b  Second operand of the multiplication.
- */
-static void sp_256_mont_mul_order_avx2_5(sp_digit* r, sp_digit* a, sp_digit* b)
-{
-    sp_256_mul_avx2_5(r, a, b);
-    sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
-}
-
-/* Square number mod the order of P256 curve. (r = a * a mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_order_avx2_5(sp_digit* r, sp_digit* a)
-{
-    sp_256_sqr_avx2_5(r, a);
-    sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square number mod the order of P256 curve a number of times.
- * (r = a ^ n mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_n_order_avx2_5(sp_digit* r, sp_digit* a, int n)
-{
-    int i;
-
-    sp_256_mont_sqr_order_avx2_5(r, a);
-    for (i=1; i<n; i++)
-        sp_256_mont_sqr_order_avx2_5(r, r);
-}
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
- * (r = 1 / a mod order)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_order_avx2_5(sp_digit* r, sp_digit* a,
-        sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 5);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_5(t, t);
-        if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_5(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 5);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 5;
-    sp_digit* t3 = td + 4 * 5;
-    int i;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_order_avx2_5(t, a);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_order_avx2_5(t, t, a);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t, 2);
-    /* t3= a^f = t2 * t */
-    sp_256_mont_mul_order_avx2_5(t3, t2, t);
-    /* t2= a^f0 = t3 ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t3, 4);
-    /* t = a^ff = t2 * t3 */
-    sp_256_mont_mul_order_avx2_5(t, t2, t3);
-    /* t3= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t, 8);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_order_avx2_5(t, t2, t);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t, 16);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_5(t, t2, t);
-    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t, 64);
-    /* t2= a^ffffffff00000000ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_5(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t2, 32);
-    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_5(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
-    for (i=127; i>=112; i--) {
-        sp_256_mont_sqr_order_avx2_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_5(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_5(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
-    for (i=107; i>=64; i--) {
-        sp_256_mont_sqr_order_avx2_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_5(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_5(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
-    for (i=59; i>=32; i--) {
-        sp_256_mont_sqr_order_avx2_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_5(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_5(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
-    for (i=27; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_5(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_5(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
-    sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
-    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
-    sp_256_mont_mul_order_avx2_5(r, t2, t3);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-#endif /* HAVE_INTEL_AVX2 */
 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
 #ifdef HAVE_ECC_SIGN
 #ifndef SP_ECC_MAX_SIG_GEN
@@ -11104,114 +16623,102 @@
  * MP_OKAY on success.
  */
 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
-                    mp_int* rm, mp_int* sm, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
 #else
     sp_digit ed[2*5];
     sp_digit xd[2*5];
     sp_digit kd[2*5];
     sp_digit rd[2*5];
     sp_digit td[3 * 2*5];
-    sp_point p;
+    sp_point_256 p;
 #endif
     sp_digit* e = NULL;
     sp_digit* x = NULL;
     sp_digit* k = NULL;
     sp_digit* r = NULL;
     sp_digit* tmp = NULL;
-    sp_point* point = NULL;
+    sp_point_256* point = NULL;
     sp_digit carry;
-    sp_digit* s;
-    sp_digit* kInv;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
     int err = MP_OKAY;
     int64_t c;
     int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
 
     (void)heap;
 
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            e = d + 0 * 5;
-            x = d + 2 * 5;
-            k = d + 4 * 5;
-            r = d + 6 * 5;
-            tmp = d + 8 * 5;
-        }
-        else
-            err = MEMORY_E;
-    }
-#else
-    e = ed;
-    x = xd;
-    k = kd;
-    r = rd;
-    tmp = td;
-#endif
-    s = e;
-    kInv = k;
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(e, 5, hash, hashLen);
-        sp_256_from_mp(x, 5, priv);
+    err = sp_256_point_new_5(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 5;
+        x = d + 2 * 5;
+        k = d + 4 * 5;
+        r = d + 6 * 5;
+        tmp = d + 8 * 5;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(e, 5, hash, (int)hashLen);
     }
 
     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 5, priv);
+
         /* New random point. */
-        err = sp_256_ecc_gen_k_5(rng, k);
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_256_ecc_gen_k_5(rng, k);
+        }
+        else {
+            sp_256_from_mp(k, 5, km);
+            mp_zero(km);
+        }
         if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, heap);
-            else
-#endif
                 err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
         }
 
         if (err == MP_OKAY) {
             /* r = point->x mod order */
-            XMEMCPY(r, point->x, sizeof(sp_digit) * 5);
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 5U);
             sp_256_norm_5(r);
             c = sp_256_cmp_5(r, p256_order);
-            sp_256_cond_sub_5(r, r, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_5(r, r, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_5(r);
 
             /* Conv k to Montgomery form (mod order) */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_5(k, k, p256_norm_order);
-            else
-#endif
                 sp_256_mul_5(k, k, p256_norm_order);
             err = sp_256_mod_5(k, k, p256_order);
         }
         if (err == MP_OKAY) {
             sp_256_norm_5(k);
             /* kInv = 1/k mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_inv_order_avx2_5(kInv, k, tmp);
-            else
-#endif
                 sp_256_mont_inv_order_5(kInv, k, tmp);
             sp_256_norm_5(kInv);
 
             /* s = r * x + e */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_5(x, x, r);
-            else
-#endif
                 sp_256_mul_5(x, x, r);
             err = sp_256_mod_5(x, x, p256_order);
         }
@@ -11221,46 +16728,45 @@
             sp_256_cond_sub_5(s, s, p256_order, 0 - carry);
             sp_256_norm_5(s);
             c = sp_256_cmp_5(s, p256_order);
-            sp_256_cond_sub_5(s, s, p256_order, 0 - (c >= 0));
+            sp_256_cond_sub_5(s, s, p256_order, 0L - (sp_digit)(c >= 0));
             sp_256_norm_5(s);
 
             /* s = s * k^-1 mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_mul_order_avx2_5(s, s, kInv);
-            else
-#endif
                 sp_256_mont_mul_order_5(s, s, kInv);
             sp_256_norm_5(s);
 
             /* Check that signature is usable. */
-            if (!sp_256_iszero_5(s))
-                break;
-        }
-    }
-
-    if (i == 0)
+            if (sp_256_iszero_5(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
         err = RNG_FAILURE_E;
-
-    if (err == MP_OKAY)
+    }
+
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(r, rm);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(s, sm);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL) {
         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 5);
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    XMEMSET(e, 0, sizeof(sp_digit) * 2 * 5);
-    XMEMSET(x, 0, sizeof(sp_digit) * 2 * 5);
-    XMEMSET(k, 0, sizeof(sp_digit) * 2 * 5);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 5);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 5);
-    XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*5);
-#endif
-    sp_ecc_point_free(point, 1, heap);
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 5U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 5U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 5U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 5U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 5U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 5U);
+#endif
+    sp_256_point_free_5(point, 1, heap);
 
     return err;
 }
@@ -11290,109 +16796,101 @@
 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit u1d[2*5];
     sp_digit u2d[2*5];
     sp_digit sd[2*5];
     sp_digit tmpd[2*5 * 5];
-    sp_point p1d;
-    sp_point p2d;
-#endif
-    sp_digit* u1;
-    sp_digit* u2;
-    sp_digit* s;
-    sp_digit* tmp;
-    sp_point* p1;
-    sp_point* p2 = NULL;
+    sp_point_256 p1d;
+    sp_point_256 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* p1;
+    sp_point_256* p2 = NULL;
     sp_digit carry;
     int64_t c;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p1d, p1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p2d, p2);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 16 * 5, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            u1  = d + 0 * 5;
-            u2  = d + 2 * 5;
-            s   = d + 4 * 5;
-            tmp = d + 6 * 5;
-        }
-        else
-            err = MEMORY_E;
-    }
-#else
-    u1 = u1d;
-    u2 = u2d;
-    s  = sd;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(u1, 5, hash, hashLen);
+
+    err = sp_256_point_new_5(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 5;
+        u2  = d + 2 * 5;
+        s   = d + 4 * 5;
+        tmp = d + 6 * 5;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(u1, 5, hash, (int)hashLen);
         sp_256_from_mp(u2, 5, r);
         sp_256_from_mp(s, 5, sm);
         sp_256_from_mp(p2->x, 5, pX);
         sp_256_from_mp(p2->y, 5, pY);
         sp_256_from_mp(p2->z, 5, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_mul_avx2_5(s, s, p256_norm_order);
-        else
-#endif
+        {
             sp_256_mul_5(s, s, p256_norm_order);
+        }
         err = sp_256_mod_5(s, s, p256_order);
     }
     if (err == MP_OKAY) {
         sp_256_norm_5(s);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_inv_order_avx2_5(s, s, tmp);
-            sp_256_mont_mul_order_avx2_5(u1, u1, s);
-            sp_256_mont_mul_order_avx2_5(u2, u2, s);
-        }
-        else
-#endif
         {
             sp_256_mont_inv_order_5(s, s, tmp);
             sp_256_mont_mul_order_5(u1, u1, s);
             sp_256_mont_mul_order_5(u2, u2, s);
         }
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_5(p1, u1, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_5(p1, u1, 0, heap);
     }
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_5(p2, p2, u2, 0, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_5(p2, p2, u2, 0, heap);
     }
 
     if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_5(p1, p1, p2, tmp);
-        else
-#endif
+        {
             sp_256_proj_point_add_5(p1, p1, p2, tmp);
+            if (sp_256_iszero_5(p1->z)) {
+                if (sp_256_iszero_5(p1->x) && sp_256_iszero_5(p1->y)) {
+                    sp_256_proj_point_dbl_5(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+                }
+            }
+        }
 
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
@@ -11404,13 +16902,13 @@
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_5(p1->z, p1->z, p256_mod, p256_mp_mod);
         sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod);
-        *res = sp_256_cmp_5(p1->x, u1) == 0;
+        *res = (int)(sp_256_cmp_5(p1->x, u1) == 0);
         if (*res == 0) {
             /* Reload r and add order. */
             sp_256_from_mp(u2, 5, r);
             carry = sp_256_add_5(u2, u2, p256_order);
             /* Carry means result is greater than mod and is not valid. */
-            if (!carry) {
+            if (carry == 0) {
                 sp_256_norm_5(u2);
 
                 /* Compare with mod and if greater or equal then not valid. */
@@ -11422,19 +16920,19 @@
                         /* u1 = (r + 1*order).z'.z' mod prime */
                         sp_256_mont_mul_5(u1, u2, p1->z, p256_mod,
                                                                   p256_mp_mod);
-                        *res = sp_256_cmp_5(p1->x, u2) == 0;
+                        *res = (int)(sp_256_cmp_5(p1->x, u1) == 0);
                     }
                 }
             }
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     if (d != NULL)
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
 #endif
-    sp_ecc_point_free(p1, 0, heap);
-    sp_ecc_point_free(p2, 0, heap);
+    sp_256_point_free_5(p1, 0, heap);
+    sp_256_point_free_5(p2, 0, heap);
 
     return err;
 }
@@ -11448,9 +16946,9 @@
  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  * not on the curve and MP_OKAY otherwise.
  */
-static int sp_256_ecc_is_point_5(sp_point* point, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+static int sp_256_ecc_is_point_5(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d = NULL;
 #else
     sp_digit t1d[2*5];
@@ -11460,42 +16958,46 @@
     sp_digit* t2;
     int err = MP_OKAY;
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 5 * 4, heap, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 5;
         t2 = d + 2 * 5;
-    }
-    else
-        err = MEMORY_E;
-#else
-    (void)heap;
-
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         sp_256_sqr_5(t1, point->y);
-        sp_256_mod_5(t1, t1, p256_mod);
+        (void)sp_256_mod_5(t1, t1, p256_mod);
         sp_256_sqr_5(t2, point->x);
-        sp_256_mod_5(t2, t2, p256_mod);
+        (void)sp_256_mod_5(t2, t2, p256_mod);
         sp_256_mul_5(t2, t2, point->x);
-        sp_256_mod_5(t2, t2, p256_mod);
-	sp_256_sub_5(t2, p256_mod, t2);
+        (void)sp_256_mod_5(t2, t2, p256_mod);
+        (void)sp_256_sub_5(t2, p256_mod, t2);
         sp_256_mont_add_5(t1, t1, t2, p256_mod);
 
         sp_256_mont_add_5(t1, t1, point->x, p256_mod);
         sp_256_mont_add_5(t1, t1, point->x, p256_mod);
         sp_256_mont_add_5(t1, t1, point->x, p256_mod);
 
-        if (sp_256_cmp_5(t1, p256_b) != 0)
+        if (sp_256_cmp_5(t1, p256_b) != 0) {
             err = MP_VAL;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
 #endif
 
     return err;
@@ -11510,23 +17012,23 @@
  */
 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point pubd;
-#endif
-    sp_point* pub;
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 pubd;
+#endif
+    sp_point_256* pub;
     byte one[1] = { 1 };
     int err;
 
-    err = sp_ecc_point_new(NULL, pubd, pub);
+    err = sp_256_point_new_5(NULL, pubd, pub);
     if (err == MP_OKAY) {
         sp_256_from_mp(pub->x, 5, pX);
         sp_256_from_mp(pub->y, 5, pY);
-        sp_256_from_bin(pub->z, 5, one, sizeof(one));
+        sp_256_from_bin(pub->z, 5, one, (int)sizeof(one));
 
         err = sp_256_ecc_is_point_5(pub, NULL);
     }
 
-    sp_ecc_point_free(pub, 0, NULL);
+    sp_256_point_free_5(pub, 0, NULL);
 
     return err;
 }
@@ -11544,50 +17046,54 @@
  */
 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit privd[5];
-    sp_point pubd;
-    sp_point pd;
+    sp_point_256 pubd;
+    sp_point_256 pd;
 #endif
     sp_digit* priv = NULL;
-    sp_point* pub;
-    sp_point* p = NULL;
+    sp_point_256* pub;
+    sp_point_256* p = NULL;
     byte one[1] = { 1 };
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, pubd, pub);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        priv = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
-        if (priv == NULL)
-            err = MEMORY_E;
-    }
-#else
-    priv = privd;
-#endif
-
-    if (err == MP_OKAY) {
+
+    err = sp_256_point_new_5(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
         sp_256_from_mp(pub->x, 5, pX);
         sp_256_from_mp(pub->y, 5, pY);
-        sp_256_from_bin(pub->z, 5, one, sizeof(one));
+        sp_256_from_bin(pub->z, 5, one, (int)sizeof(one));
         sp_256_from_mp(priv, 5, privm);
 
         /* Check point at infinitiy. */
-        if (sp_256_iszero_5(pub->x) &&
-            sp_256_iszero_5(pub->y))
+        if ((sp_256_iszero_5(pub->x) != 0) &&
+            (sp_256_iszero_5(pub->y) != 0)) {
             err = ECC_INF_E;
+        }
     }
 
     if (err == MP_OKAY) {
         /* Check range of X and Y */
         if (sp_256_cmp_5(pub->x, p256_mod) >= 0 ||
-            sp_256_cmp_5(pub->y, p256_mod) >= 0)
+            sp_256_cmp_5(pub->y, p256_mod) >= 0) {
             err = ECC_OUT_OF_RANGE_E;
+        }
     }
 
     if (err == MP_OKAY) {
@@ -11597,28 +17103,18 @@
 
     if (err == MP_OKAY) {
         /* Point * order = infinity */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_5(p, pub, p256_order, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_5(p, pub, p256_order, 1, heap);
     }
     if (err == MP_OKAY) {
         /* Check result is infinity */
-        if (!sp_256_iszero_5(p->x) ||
-            !sp_256_iszero_5(p->y)) {
+        if ((sp_256_iszero_5(p->x) == 0) ||
+            (sp_256_iszero_5(p->y) == 0)) {
             err = ECC_INF_E;
         }
     }
 
     if (err == MP_OKAY) {
         /* Base * private = point */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_5(p, priv, 1, heap);
-        else
-#endif
             err = sp_256_ecc_mulmod_base_5(p, priv, 1, heap);
     }
     if (err == MP_OKAY) {
@@ -11629,12 +17125,13 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (priv != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(pub, 0, heap);
+    }
+#endif
+    sp_256_point_free_5(p, 0, heap);
+    sp_256_point_free_5(pub, 0, heap);
 
     return err;
 }
@@ -11658,27 +17155,27 @@
                               mp_int* qX, mp_int* qY, mp_int* qZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 5 * 5];
-    sp_point pd;
-    sp_point qd;
+    sp_point_256 pd;
+    sp_point_256 qd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
-    sp_point* q = NULL;
+    sp_point_256* p;
+    sp_point_256* q = NULL;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(NULL, qd, q);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
+
+    err = sp_256_point_new_5(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_5(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -11692,27 +17189,26 @@
         sp_256_from_mp(q->y, 5, qY);
         sp_256_from_mp(q->z, 5, qZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_5(p, p, q, tmp);
-        else
-#endif
             sp_256_proj_point_add_5(p, p, q, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(q, 0, NULL);
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_5(q, 0, NULL);
+    sp_256_point_free_5(p, 0, NULL);
 
     return err;
 }
@@ -11731,23 +17227,22 @@
 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
                               mp_int* rX, mp_int* rY, mp_int* rZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 5 * 2];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 5 * 2, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
+
+    err = sp_256_point_new_5(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -11758,26 +17253,25 @@
         sp_256_from_mp(p->y, 5, pY);
         sp_256_from_mp(p->z, 5, pZ);
 
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_dbl_avx2_5(p, p, tmp);
-        else
-#endif
             sp_256_proj_point_dbl_5(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_5(p, 0, NULL);
 
     return err;
 }
@@ -11792,20 +17286,22 @@
  */
 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
 {
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit tmpd[2 * 5 * 4];
-    sp_point pd;
+    sp_point_256 pd;
 #endif
     sp_digit* tmp;
-    sp_point* p;
+    sp_point_256* p;
     int err;
 
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 5 * 4, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
+    err = sp_256_point_new_5(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
     }
 #else
     tmp = tmpd;
@@ -11818,18 +17314,22 @@
         sp_256_map_5(p, p, tmp);
     }
 
-    if (err == MP_OKAY)
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->x, pX);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->y, pY);
-    if (err == MP_OKAY)
+    }
+    if (err == MP_OKAY) {
         err = sp_256_to_mp(p->z, pZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
+    }
+#endif
+    sp_256_point_free_5(p, 0, NULL);
 
     return err;
 }
@@ -11842,7 +17342,7 @@
  */
 static int sp_256_mont_sqrt_5(sp_digit* y)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit t1d[2 * 5];
@@ -11851,58 +17351,23 @@
     sp_digit* t1;
     sp_digit* t2;
     int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         t1 = d + 0 * 5;
         t2 = d + 2 * 5;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            /* t2 = y ^ 0x2 */
-            sp_256_mont_sqr_avx2_5(t2, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0x3 */
-            sp_256_mont_mul_avx2_5(t1, t2, y, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xc */
-            sp_256_mont_sqr_n_avx2_5(t2, t1, 2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xf */
-            sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xf0 */
-            sp_256_mont_sqr_n_avx2_5(t2, t1, 4, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xff */
-            sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xff00 */
-            sp_256_mont_sqr_n_avx2_5(t2, t1, 8, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffff */
-            sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xffff0000 */
-            sp_256_mont_sqr_n_avx2_5(t2, t1, 16, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff */
-            sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000000 */
-            sp_256_mont_sqr_n_avx2_5(t1, t1, 32, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001 */
-            sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
-            sp_256_mont_sqr_n_avx2_5(t1, t1, 96, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
-            sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
-            sp_256_mont_sqr_n_avx2_5(y, t1, 94, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
         {
             /* t2 = y ^ 0x2 */
             sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
@@ -11936,13 +17401,15 @@
         }
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
+    }
+#endif
+
+    return err;
+}
+
 
 /* Uncompress the point given the X ordinate.
  *
@@ -11953,47 +17420,37 @@
  */
 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
 {
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
     sp_digit* d;
 #else
     sp_digit xd[2 * 5];
     sp_digit yd[2 * 5];
 #endif
-    sp_digit* x;
-    sp_digit* y;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
         x = d + 0 * 5;
         y = d + 2 * 5;
-    }
-    else
-        err = MEMORY_E;
-#else
-    x = xd;
-    y = yd;
-#endif
-
-    if (err == MP_OKAY) {
+#else
+        x = xd;
+        y = yd;
+#endif
+
         sp_256_from_mp(x, 5, xm);
-
         err = sp_256_mod_mul_norm_5(x, x, p256_mod);
     }
-
     if (err == MP_OKAY) {
         /* y = x^3 */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_sqr_avx2_5(y, x, p256_mod, p256_mp_mod);
-            sp_256_mont_mul_avx2_5(y, y, x, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
         {
             sp_256_mont_sqr_5(y, x, p256_mod, p256_mp_mod);
             sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);
@@ -12011,23 +17468,5752 @@
         err = sp_256_mont_sqrt_5(y);
     }
     if (err == MP_OKAY) {
-        XMEMSET(y + 5, 0, 5 * sizeof(sp_digit));
+        XMEMSET(y + 5, 0, 5U * sizeof(sp_digit));
         sp_256_mont_reduce_5(y, p256_mod, p256_mp_mod);
-        if (((y[0] ^ odd) & 1) != 0)
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
             sp_256_mont_sub_5(y, p256_mod, y, p256_mod);
+        }
 
         err = sp_256_to_mp(y, ym);
     }
 
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
-#endif
-#endif /* WOLFSSL_SP_NO_256 */
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+    sp_digit x[2 * 7];
+    sp_digit y[2 * 7];
+    sp_digit z[2 * 7];
+    int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[7] = {
+    0x000000ffffffffL,0x7ffe0000000000L,0x7ffffffffbffffL,0x7fffffffffffffL,
+    0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[7] = {
+    0x7fffff00000001L,0x0001ffffffffffL,0x00000000040000L,0x00000000000000L,
+    0x00000000000000L,0x00000000000000L,0x00000000000000L
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x0000100000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[7] = {
+    0x6c196accc52973L,0x1b6491614ef5d9L,0x07d0dcb77d6068L,0x7ffffffe3b1a6cL,
+    0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[7] = {
+    0x6c196accc52971L,0x1b6491614ef5d9L,0x07d0dcb77d6068L,0x7ffffffe3b1a6cL,
+    0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[7] = {
+    0x13e695333ad68dL,0x649b6e9eb10a26L,0x782f2348829f97L,0x00000001c4e593L,
+    0x00000000000000L,0x00000000000000L,0x00000000000000L
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x546089e88fdc45l;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+    /* X ordinate */
+    {
+        0x545e3872760ab7L,0x64bb7eaa52d874L,0x020950a8e1540bL,
+        0x5d3cdcc2cfba0fL,0x0ad746e1d3b628L,0x26f1d638e3de64L,0x2aa1f288afa2c1L,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x431d7c90ea0e5fL,0x639c3afd033af4L,0x4ed7c2e3002982L,
+        0x44d0a3e74ed188L,0x2dc29f8f41dbd2L,0x0debb3d317f252L,0x0d85f792a5898bL,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000000000001L,0x00000000000000L,0x00000000000000L,
+        0x00000000000000L,0x00000000000000L,0x00000000000000L,0x00000000000000L,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[7] = {
+    0x05c8edd3ec2aefL,0x731b145da33a55L,0x3d404e1d6b1958L,0x740a089018a044L,
+    0x02d19181d9c6efL,0x7c9311c0ad7c7fL,0x2ccc4be9f88fb9L
+};
+#endif
+
+static int sp_384_point_new_ex_7(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_7(heap, sp, p) sp_384_point_new_ex_7((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_7(heap, sp, p) sp_384_point_new_ex_7((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_7(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    int64_t* td;
+#else
+    int64_t td[12];
+    int64_t a32d[12];
+#endif
+    int64_t* t;
+    int64_t* a32;
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t = td;
+        a32 = td + 12;
+#else
+        t = td;
+        a32 = a32d;
+#endif
+
+        a32[0] = (sp_digit)(a[0]) & 0xffffffffL;
+        a32[1] = (sp_digit)(a[0] >> 32U);
+        a32[1] |= a[1] << 23U;
+        a32[1] &= 0xffffffffL;
+        a32[2] = (sp_digit)(a[1] >> 9U) & 0xffffffffL;
+        a32[3] = (sp_digit)(a[1] >> 41U);
+        a32[3] |= a[2] << 14U;
+        a32[3] &= 0xffffffffL;
+        a32[4] = (sp_digit)(a[2] >> 18U) & 0xffffffffL;
+        a32[5] = (sp_digit)(a[2] >> 50U);
+        a32[5] |= a[3] << 5U;
+        a32[5] &= 0xffffffffL;
+        a32[6] = (sp_digit)(a[3] >> 27U);
+        a32[6] |= a[4] << 28U;
+        a32[6] &= 0xffffffffL;
+        a32[7] = (sp_digit)(a[4] >> 4U) & 0xffffffffL;
+        a32[8] = (sp_digit)(a[4] >> 36U);
+        a32[8] |= a[5] << 19U;
+        a32[8] &= 0xffffffffL;
+        a32[9] = (sp_digit)(a[5] >> 13U) & 0xffffffffL;
+        a32[10] = (sp_digit)(a[5] >> 45U);
+        a32[10] |= a[6] << 10U;
+        a32[10] &= 0xffffffffL;
+        a32[11] = (sp_digit)(a[6] >> 22U) & 0xffffffffL;
+
+        /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
+        t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+        /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
+        t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+        /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
+        t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+        /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
+        t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+        /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
+        t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] -  2 * a32[11];
+        /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
+        t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+        /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
+        t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+        /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
+        t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+        /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
+        t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+        /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
+        t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+        /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
+        t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+        /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
+        t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+        o     = t[11] >> 32; t[11] &= 0xffffffff;
+        t[0] += o;
+        t[1] -= o;
+        t[3] += o;
+        t[4] += o;
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+        r[0] = t[0];
+        r[0] |= t[1] << 32U;
+        r[0] &= 0x7fffffffffffffLL;
+        r[1] = (sp_digit)(t[1] >> 23);
+        r[1] |= t[2] << 9U;
+        r[1] |= t[3] << 41U;
+        r[1] &= 0x7fffffffffffffLL;
+        r[2] = (sp_digit)(t[3] >> 14);
+        r[2] |= t[4] << 18U;
+        r[2] |= t[5] << 50U;
+        r[2] &= 0x7fffffffffffffLL;
+        r[3] = (sp_digit)(t[5] >> 5);
+        r[3] |= t[6] << 27U;
+        r[3] &= 0x7fffffffffffffLL;
+        r[4] = (sp_digit)(t[6] >> 28);
+        r[4] |= t[7] << 4U;
+        r[4] |= t[8] << 36U;
+        r[4] &= 0x7fffffffffffffLL;
+        r[5] = (sp_digit)(t[8] >> 19);
+        r[5] |= t[9] << 13U;
+        r[5] |= t[10] << 45U;
+        r[5] &= 0x7fffffffffffffLL;
+        r[6] = (sp_digit)(t[10] >> 10);
+        r[6] |= t[11] << 22U;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL)
+        XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+    return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 55
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 55
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0x7fffffffffffffL;
+        s = 55U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 55U) <= (word32)DIGIT_BIT) {
+            s += 55U;
+            r[j] &= 0x7fffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 55) {
+            r[j] &= 0x7fffffffffffffL;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 55 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p   Point of type sp_point_384 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_7(sp_point_384* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_384_from_mp(p->x, 7, pm->x);
+    sp_384_from_mp(p->y, 7, pm->y);
+    sp_384_from_mp(p->z, 7, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 55
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 7);
+        r->used = 7;
+        mp_clamp(r);
+#elif DIGIT_BIT < 55
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 7; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 55) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 55 - s;
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 7; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 55 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 55 - s;
+            }
+            else {
+                s += 55;
+            }
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p   Point of type sp_point_384.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_7(const sp_point_384* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_384_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_7(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int i, j, k;
+    int128_t c;
+
+    c = ((int128_t)a[6]) * b[6];
+    r[13] = (sp_digit)(c >> 55);
+    c = (c & 0x7fffffffffffffL) << 55;
+    for (k = 11; k >= 0; k--) {
+        for (i = 6; i >= 0; i--) {
+            j = k - i;
+            if (j >= 7) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int128_t)a[i]) * b[j];
+        }
+        r[k + 2] += c >> 110;
+        r[k + 1] = (c >> 55) & 0x7fffffffffffffL;
+        c = (c & 0x7fffffffffffffL) << 55;
+    }
+    r[0] = (sp_digit)(c >> 55);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_7(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+    int128_t t0   = ((int128_t)a[ 0]) * b[ 0];
+    int128_t t1   = ((int128_t)a[ 0]) * b[ 1]
+                 + ((int128_t)a[ 1]) * b[ 0];
+    int128_t t2   = ((int128_t)a[ 0]) * b[ 2]
+                 + ((int128_t)a[ 1]) * b[ 1]
+                 + ((int128_t)a[ 2]) * b[ 0];
+    int128_t t3   = ((int128_t)a[ 0]) * b[ 3]
+                 + ((int128_t)a[ 1]) * b[ 2]
+                 + ((int128_t)a[ 2]) * b[ 1]
+                 + ((int128_t)a[ 3]) * b[ 0];
+    int128_t t4   = ((int128_t)a[ 0]) * b[ 4]
+                 + ((int128_t)a[ 1]) * b[ 3]
+                 + ((int128_t)a[ 2]) * b[ 2]
+                 + ((int128_t)a[ 3]) * b[ 1]
+                 + ((int128_t)a[ 4]) * b[ 0];
+    int128_t t5   = ((int128_t)a[ 0]) * b[ 5]
+                 + ((int128_t)a[ 1]) * b[ 4]
+                 + ((int128_t)a[ 2]) * b[ 3]
+                 + ((int128_t)a[ 3]) * b[ 2]
+                 + ((int128_t)a[ 4]) * b[ 1]
+                 + ((int128_t)a[ 5]) * b[ 0];
+    int128_t t6   = ((int128_t)a[ 0]) * b[ 6]
+                 + ((int128_t)a[ 1]) * b[ 5]
+                 + ((int128_t)a[ 2]) * b[ 4]
+                 + ((int128_t)a[ 3]) * b[ 3]
+                 + ((int128_t)a[ 4]) * b[ 2]
+                 + ((int128_t)a[ 5]) * b[ 1]
+                 + ((int128_t)a[ 6]) * b[ 0];
+    int128_t t7   = ((int128_t)a[ 1]) * b[ 6]
+                 + ((int128_t)a[ 2]) * b[ 5]
+                 + ((int128_t)a[ 3]) * b[ 4]
+                 + ((int128_t)a[ 4]) * b[ 3]
+                 + ((int128_t)a[ 5]) * b[ 2]
+                 + ((int128_t)a[ 6]) * b[ 1];
+    int128_t t8   = ((int128_t)a[ 2]) * b[ 6]
+                 + ((int128_t)a[ 3]) * b[ 5]
+                 + ((int128_t)a[ 4]) * b[ 4]
+                 + ((int128_t)a[ 5]) * b[ 3]
+                 + ((int128_t)a[ 6]) * b[ 2];
+    int128_t t9   = ((int128_t)a[ 3]) * b[ 6]
+                 + ((int128_t)a[ 4]) * b[ 5]
+                 + ((int128_t)a[ 5]) * b[ 4]
+                 + ((int128_t)a[ 6]) * b[ 3];
+    int128_t t10  = ((int128_t)a[ 4]) * b[ 6]
+                 + ((int128_t)a[ 5]) * b[ 5]
+                 + ((int128_t)a[ 6]) * b[ 4];
+    int128_t t11  = ((int128_t)a[ 5]) * b[ 6]
+                 + ((int128_t)a[ 6]) * b[ 5];
+    int128_t t12  = ((int128_t)a[ 6]) * b[ 6];
+
+    t1   += t0  >> 55; r[ 0] = t0  & 0x7fffffffffffffL;
+    t2   += t1  >> 55; r[ 1] = t1  & 0x7fffffffffffffL;
+    t3   += t2  >> 55; r[ 2] = t2  & 0x7fffffffffffffL;
+    t4   += t3  >> 55; r[ 3] = t3  & 0x7fffffffffffffL;
+    t5   += t4  >> 55; r[ 4] = t4  & 0x7fffffffffffffL;
+    t6   += t5  >> 55; r[ 5] = t5  & 0x7fffffffffffffL;
+    t7   += t6  >> 55; r[ 6] = t6  & 0x7fffffffffffffL;
+    t8   += t7  >> 55; r[ 7] = t7  & 0x7fffffffffffffL;
+    t9   += t8  >> 55; r[ 8] = t8  & 0x7fffffffffffffL;
+    t10  += t9  >> 55; r[ 9] = t9  & 0x7fffffffffffffL;
+    t11  += t10 >> 55; r[10] = t10 & 0x7fffffffffffffL;
+    t12  += t11 >> 55; r[11] = t11 & 0x7fffffffffffffL;
+    r[13] = (sp_digit)(t12 >> 55);
+                       r[12] = t12 & 0x7fffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_384_mont_reduce_order_7         sp_384_mont_reduce_7
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_384_cmp_7(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=6; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_384_cond_sub_7(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 7; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    r[ 0] = a[ 0] - (b[ 0] & m);
+    r[ 1] = a[ 1] - (b[ 1] & m);
+    r[ 2] = a[ 2] - (b[ 2] & m);
+    r[ 3] = a[ 3] - (b[ 3] & m);
+    r[ 4] = a[ 4] - (b[ 4] & m);
+    r[ 5] = a[ 5] - (b[ 5] & m);
+    r[ 6] = a[ 6] - (b[ 6] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_add_7(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 7; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x7fffffffffffffL;
+        t >>= 55;
+    }
+    r[7] += t;
+#else
+    int128_t tb = b;
+    int128_t t[7];
+
+    t[ 0] = tb * a[ 0];
+    t[ 1] = tb * a[ 1];
+    t[ 2] = tb * a[ 2];
+    t[ 3] = tb * a[ 3];
+    t[ 4] = tb * a[ 4];
+    t[ 5] = tb * a[ 5];
+    t[ 6] = tb * a[ 6];
+    r[ 0] += (sp_digit)                 (t[ 0] & 0x7fffffffffffffL);
+    r[ 1] += (sp_digit)((t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL));
+    r[ 2] += (sp_digit)((t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL));
+    r[ 3] += (sp_digit)((t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL));
+    r[ 4] += (sp_digit)((t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL));
+    r[ 5] += (sp_digit)((t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL));
+    r[ 6] += (sp_digit)((t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL));
+    r[ 7] += (sp_digit) (t[ 6] >> 55);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 55.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_384_norm_7(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 6; i++) {
+        a[i+1] += a[i] >> 55;
+        a[i] &= 0x7fffffffffffffL;
+    }
+#else
+    a[1] += a[0] >> 55; a[0] &= 0x7fffffffffffffL;
+    a[2] += a[1] >> 55; a[1] &= 0x7fffffffffffffL;
+    a[3] += a[2] >> 55; a[2] &= 0x7fffffffffffffL;
+    a[4] += a[3] >> 55; a[3] &= 0x7fffffffffffffL;
+    a[5] += a[4] >> 55; a[4] &= 0x7fffffffffffffL;
+    a[6] += a[5] >> 55; a[5] &= 0x7fffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 384 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_384_mont_shift_7(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    word64 n;
+
+    n = a[6] >> 54;
+    for (i = 0; i < 6; i++) {
+        n += (word64)a[7 + i] << 1;
+        r[i] = n & 0x7fffffffffffffL;
+        n >>= 55;
+    }
+    n += (word64)a[13] << 1;
+    r[6] = n;
+#else
+    word64 n;
+
+    n  = a[6] >> 54;
+    n += (word64)a[ 7] << 1U; r[ 0] = n & 0x7fffffffffffffUL; n >>= 55U;
+    n += (word64)a[ 8] << 1U; r[ 1] = n & 0x7fffffffffffffUL; n >>= 55U;
+    n += (word64)a[ 9] << 1U; r[ 2] = n & 0x7fffffffffffffUL; n >>= 55U;
+    n += (word64)a[10] << 1U; r[ 3] = n & 0x7fffffffffffffUL; n >>= 55U;
+    n += (word64)a[11] << 1U; r[ 4] = n & 0x7fffffffffffffUL; n >>= 55U;
+    n += (word64)a[12] << 1U; r[ 5] = n & 0x7fffffffffffffUL; n >>= 55U;
+    n += (word64)a[13] << 1U; r[ 6] = n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[7], 0, sizeof(*r) * 7U);
+}
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_384_mont_reduce_7(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    int i;
+    sp_digit mu;
+
+    sp_384_norm_7(a + 7);
+
+    for (i=0; i<6; i++) {
+        mu = (a[i] * mp) & 0x7fffffffffffffL;
+        sp_384_mul_add_7(a+i, m, mu);
+        a[i+1] += a[i] >> 55;
+    }
+    mu = (a[i] * mp) & 0x3fffffffffffffL;
+    sp_384_mul_add_7(a+i, m, mu);
+    a[i+1] += a[i] >> 55;
+    a[i] &= 0x7fffffffffffffL;
+
+    sp_384_mont_shift_7(a, a);
+    sp_384_cond_sub_7(a, a, m, 0 - (((a[6] >> 54) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_7(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mul_7(r, a, b);
+    sp_384_mont_reduce_7(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_7(sp_digit* r, const sp_digit* a)
+{
+    int i, j, k;
+    int128_t c;
+
+    c = ((int128_t)a[6]) * a[6];
+    r[13] = (sp_digit)(c >> 55);
+    c = (c & 0x7fffffffffffffL) << 55;
+    for (k = 11; k >= 0; k--) {
+        for (i = 6; i >= 0; i--) {
+            j = k - i;
+            if (j >= 7 || i <= j) {
+                break;
+            }
+            if (j < 0) {
+                continue;
+            }
+
+            c += ((int128_t)a[i]) * a[j] * 2;
+        }
+        if (i == j) {
+           c += ((int128_t)a[i]) * a[i];
+        }
+
+        r[k + 2] += c >> 110;
+        r[k + 1] = (c >> 55) & 0x7fffffffffffffL;
+        c = (c & 0x7fffffffffffffL) << 55;
+    }
+    r[0] = (sp_digit)(c >> 55);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_7(sp_digit* r, const sp_digit* a)
+{
+    int128_t t0   =  ((int128_t)a[ 0]) * a[ 0];
+    int128_t t1   = (((int128_t)a[ 0]) * a[ 1]) * 2;
+    int128_t t2   = (((int128_t)a[ 0]) * a[ 2]) * 2
+                 +  ((int128_t)a[ 1]) * a[ 1];
+    int128_t t3   = (((int128_t)a[ 0]) * a[ 3]
+                 +  ((int128_t)a[ 1]) * a[ 2]) * 2;
+    int128_t t4   = (((int128_t)a[ 0]) * a[ 4]
+                 +  ((int128_t)a[ 1]) * a[ 3]) * 2
+                 +  ((int128_t)a[ 2]) * a[ 2];
+    int128_t t5   = (((int128_t)a[ 0]) * a[ 5]
+                 +  ((int128_t)a[ 1]) * a[ 4]
+                 +  ((int128_t)a[ 2]) * a[ 3]) * 2;
+    int128_t t6   = (((int128_t)a[ 0]) * a[ 6]
+                 +  ((int128_t)a[ 1]) * a[ 5]
+                 +  ((int128_t)a[ 2]) * a[ 4]) * 2
+                 +  ((int128_t)a[ 3]) * a[ 3];
+    int128_t t7   = (((int128_t)a[ 1]) * a[ 6]
+                 +  ((int128_t)a[ 2]) * a[ 5]
+                 +  ((int128_t)a[ 3]) * a[ 4]) * 2;
+    int128_t t8   = (((int128_t)a[ 2]) * a[ 6]
+                 +  ((int128_t)a[ 3]) * a[ 5]) * 2
+                 +  ((int128_t)a[ 4]) * a[ 4];
+    int128_t t9   = (((int128_t)a[ 3]) * a[ 6]
+                 +  ((int128_t)a[ 4]) * a[ 5]) * 2;
+    int128_t t10  = (((int128_t)a[ 4]) * a[ 6]) * 2
+                 +  ((int128_t)a[ 5]) * a[ 5];
+    int128_t t11  = (((int128_t)a[ 5]) * a[ 6]) * 2;
+    int128_t t12  =  ((int128_t)a[ 6]) * a[ 6];
+
+    t1   += t0  >> 55; r[ 0] = t0  & 0x7fffffffffffffL;
+    t2   += t1  >> 55; r[ 1] = t1  & 0x7fffffffffffffL;
+    t3   += t2  >> 55; r[ 2] = t2  & 0x7fffffffffffffL;
+    t4   += t3  >> 55; r[ 3] = t3  & 0x7fffffffffffffL;
+    t5   += t4  >> 55; r[ 4] = t4  & 0x7fffffffffffffL;
+    t6   += t5  >> 55; r[ 5] = t5  & 0x7fffffffffffffL;
+    t7   += t6  >> 55; r[ 6] = t6  & 0x7fffffffffffffL;
+    t8   += t7  >> 55; r[ 7] = t7  & 0x7fffffffffffffL;
+    t9   += t8  >> 55; r[ 8] = t8  & 0x7fffffffffffffL;
+    t10  += t9  >> 55; r[ 9] = t9  & 0x7fffffffffffffL;
+    t11  += t10 >> 55; r[10] = t10 & 0x7fffffffffffffL;
+    t12  += t11 >> 55; r[11] = t11 & 0x7fffffffffffffL;
+    r[13] = (sp_digit)(t12 >> 55);
+                       r[12] = t12 & 0x7fffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_384_sqr_7(r, a);
+    sp_384_mont_reduce_7(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_7(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mont_sqr_7(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_384_mont_sqr_7(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint64_t p384_mod_minus_2[6] = {
+    0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU,
+    0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_7(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 7);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_7(t, t, p384_mod, p384_mp_mod);
+        if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+            sp_384_mont_mul_7(t, t, a, p384_mod, p384_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 7);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 7;
+    sp_digit* t3 = td + 4 * 7;
+    sp_digit* t4 = td + 6 * 7;
+    sp_digit* t5 = td + 8 * 7;
+
+    /* 0x2 */
+    sp_384_mont_sqr_7(t1, a, p384_mod, p384_mp_mod);
+    /* 0x3 */
+    sp_384_mont_mul_7(t5, t1, a, p384_mod, p384_mp_mod);
+    /* 0xc */
+    sp_384_mont_sqr_n_7(t1, t5, 2, p384_mod, p384_mp_mod);
+    /* 0xf */
+    sp_384_mont_mul_7(t2, t5, t1, p384_mod, p384_mp_mod);
+    /* 0x1e */
+    sp_384_mont_sqr_7(t1, t2, p384_mod, p384_mp_mod);
+    /* 0x1f */
+    sp_384_mont_mul_7(t4, t1, a, p384_mod, p384_mp_mod);
+    /* 0x3e0 */
+    sp_384_mont_sqr_n_7(t1, t4, 5, p384_mod, p384_mp_mod);
+    /* 0x3ff */
+    sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x7fe0 */
+    sp_384_mont_sqr_n_7(t1, t2, 5, p384_mod, p384_mp_mod);
+    /* 0x7fff */
+    sp_384_mont_mul_7(t4, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x3fff8000 */
+    sp_384_mont_sqr_n_7(t1, t4, 15, p384_mod, p384_mp_mod);
+    /* 0x3fffffff */
+    sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffc */
+    sp_384_mont_sqr_n_7(t3, t2, 2, p384_mod, p384_mp_mod);
+    /* 0xfffffffd */
+    sp_384_mont_mul_7(r, t3, a, p384_mod, p384_mp_mod);
+    /* 0xffffffff */
+    sp_384_mont_mul_7(t3, t5, t3, p384_mod, p384_mp_mod);
+    /* 0xfffffffc0000000 */
+    sp_384_mont_sqr_n_7(t1, t2, 30, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff */
+    sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff000000000000000 */
+    sp_384_mont_sqr_n_7(t1, t2, 60, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+    sp_384_mont_sqr_n_7(t1, t2, 120, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+    sp_384_mont_sqr_n_7(t1, t2, 15, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+    sp_384_mont_sqr_n_7(t1, t2, 33, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+    sp_384_mont_mul_7(t2, t3, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_7(t1, t2, 96, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+    sp_384_mont_mul_7(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*7;
+    int64_t n;
+
+    sp_384_mont_inv_7(t1, p->z, t + 2*7);
+
+    sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_7(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    /* x /= z^2 */
+    sp_384_mont_mul_7(r->x, p->x, t2, p384_mod, p384_mp_mod);
+    XMEMSET(r->x + 7, 0, sizeof(r->x) / 2U);
+    sp_384_mont_reduce_7(r->x, p384_mod, p384_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_384_cmp_7(r->x, p384_mod);
+    sp_384_cond_sub_7(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(r->x);
+
+    /* y /= z^3 */
+    sp_384_mont_mul_7(r->y, p->y, t1, p384_mod, p384_mp_mod);
+    XMEMSET(r->y + 7, 0, sizeof(r->y) / 2U);
+    sp_384_mont_reduce_7(r->y, p384_mod, p384_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_384_cmp_7(r->y, p384_mod);
+    sp_384_cond_sub_7(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_7(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 7; i++) {
+        r[i] = a[i] + b[i];
+    }
+
+    return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_7(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    r[ 0] = a[ 0] + b[ 0];
+    r[ 1] = a[ 1] + b[ 1];
+    r[ 2] = a[ 2] + b[ 2];
+    r[ 3] = a[ 3] + b[ 3];
+    r[ 4] = a[ 4] + b[ 4];
+    r[ 5] = a[ 5] + b[ 5];
+    r[ 6] = a[ 6] + b[ 6];
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_384_add_7(r, a, b);
+    sp_384_norm_7(r);
+    sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(r);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_384_add_7(r, a, a);
+    sp_384_norm_7(r);
+    sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(r);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_tpl_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_384_add_7(r, a, a);
+    sp_384_norm_7(r);
+    sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(r);
+    (void)sp_384_add_7(r, r, a);
+    sp_384_norm_7(r);
+    sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_7(r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    int i;
+
+    for (i = 0; i < 7; i++) {
+        r[i] = a[i] - b[i];
+    }
+
+    return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    r[ 0] = a[ 0] - b[ 0];
+    r[ 1] = a[ 1] - b[ 1];
+    r[ 2] = a[ 2] - b[ 2];
+    r[ 3] = a[ 3] - b[ 3];
+    r[ 4] = a[ 4] - b[ 4];
+    r[ 5] = a[ 5] - b[ 5];
+    r[ 6] = a[ 6] - b[ 6];
+
+    return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_384_cond_add_7(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 7; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    r[ 0] = a[ 0] + (b[ 0] & m);
+    r[ 1] = a[ 1] + (b[ 1] & m);
+    r[ 2] = a[ 2] + (b[ 2] & m);
+    r[ 3] = a[ 3] + (b[ 3] & m);
+    r[ 4] = a[ 4] + (b[ 4] & m);
+    r[ 5] = a[ 5] + (b[ 5] & m);
+    r[ 6] = a[ 6] + (b[ 6] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_384_mont_sub_7(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_384_sub_7(r, a, b);
+    sp_384_cond_add_7(r, r, m, r[6] >> 54);
+    sp_384_norm_7(r);
+}
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r  Result of shift.
+ * a  Number to shift.
+ */
+SP_NOINLINE static void sp_384_rshift1_7(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<6; i++) {
+        r[i] = ((a[i] >> 1) | (a[i + 1] << 54)) & 0x7fffffffffffffL;
+    }
+#else
+    r[0] = ((a[0] >> 1) | (a[1] << 54)) & 0x7fffffffffffffL;
+    r[1] = ((a[1] >> 1) | (a[2] << 54)) & 0x7fffffffffffffL;
+    r[2] = ((a[2] >> 1) | (a[3] << 54)) & 0x7fffffffffffffL;
+    r[3] = ((a[3] >> 1) | (a[4] << 54)) & 0x7fffffffffffffL;
+    r[4] = ((a[4] >> 1) | (a[5] << 54)) & 0x7fffffffffffffL;
+    r[5] = ((a[5] >> 1) | (a[6] << 54)) & 0x7fffffffffffffL;
+#endif
+    r[6] = a[6] >> 1;
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_384_cond_add_7(r, a, m, 0 - (a[0] & 1));
+    sp_384_norm_7(r);
+    sp_384_rshift1_7(r, r);
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*7;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod);
+    /* Z = Y * Z */
+    sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod);
+    /* Z = 2Z */
+    sp_384_mont_dbl_7(z, z, p384_mod);
+    /* T2 = X - T1 */
+    sp_384_mont_sub_7(t2, p->x, t1, p384_mod);
+    /* T1 = X + T1 */
+    sp_384_mont_add_7(t1, p->x, t1, p384_mod);
+    /* T2 = T1 * T2 */
+    sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod);
+    /* T1 = 3T2 */
+    sp_384_mont_tpl_7(t1, t2, p384_mod);
+    /* Y = 2Y */
+    sp_384_mont_dbl_7(y, p->y, p384_mod);
+    /* Y = Y * Y */
+    sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod);
+    /* T2 = Y * Y */
+    sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod);
+    /* T2 = T2/2 */
+    sp_384_div2_7(t2, t2, p384_mod);
+    /* Y = Y * X */
+    sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod);
+    /* X = T1 * T1 */
+    sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_7(x, x, y, p384_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_7(x, x, y, p384_mod);
+    /* Y = Y - X */
+    sp_384_mont_sub_7(y, y, x, p384_mod);
+    /* Y = Y * T1 */
+    sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod);
+    /* Y = Y - T2 */
+    sp_384_mont_sub_7(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_7(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_7(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+        sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*7;
+    sp_digit* t3 = t + 4*7;
+    sp_digit* t4 = t + 6*7;
+    sp_digit* t5 = t + 8*7;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_384* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_384_sub_7(t1, p384_mod, q->y);
+    sp_384_norm_7(t1);
+    if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) &
+        (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_7(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<7; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<7; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<7; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t1, t1, x, p384_mod, p384_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_384_mont_mul_7(t3, t3, y, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - U1 */
+        sp_384_mont_sub_7(t2, t2, t1, p384_mod);
+        /* R = S2 - S1 */
+        sp_384_mont_sub_7(t4, t4, t3, p384_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_7(x, x, t5, p384_mod);
+        sp_384_mont_dbl_7(t1, y, p384_mod);
+        sp_384_mont_sub_7(x, x, t1, p384_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_384_mont_sub_7(y, y, x, p384_mod);
+        sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_7(y, y, t5, p384_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifdef WOLFSSL_SP_NO_MALLOC
+    sp_point_384 t[3];
+    sp_digit tmp[2 * 7 * 6];
+#else
+    sp_point_384* t;
+    sp_digit* tmp;
+#endif
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    (void)heap;
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+                                                              DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+
+        /* t[0] = {0, 0, 1} * norm */
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
+    }
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod);
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod);
+
+    if (err == MP_OKAY) {
+        i = 6;
+        c = 54;
+        n = k[i--] << (55 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1)
+                    break;
+
+                n = k[i--];
+                c = 55;
+            }
+
+            y = (n >> 54) & 1;
+            n <<= 1;
+
+            sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                   ((size_t)&t[1] & addr_mask[y])),
+                    sizeof(sp_point_384));
+            sp_384_proj_point_dbl_7(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                            ((size_t)&t[1] & addr_mask[y])), &t[2],
+                    sizeof(sp_point_384));
+        }
+
+        if (map != 0) {
+            sp_384_map_7(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_384));
+        }
+    }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6);
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
+#endif
+
+    return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 t[3];
+    sp_digit tmp[2 * 7 * 6];
+#else
+    sp_point_384* t;
+    sp_digit* tmp;
+#endif
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    (void)heap;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
+        err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
+    }
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod);
+    if (err == MP_OKAY)
+        err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod);
+
+    if (err == MP_OKAY) {
+        i = 6;
+        c = 54;
+        n = k[i--] << (55 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1)
+                    break;
+
+                n = k[i--];
+                c = 55;
+            }
+
+            y = (n >> 54) & 1;
+            n <<= 1;
+
+            sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                 ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_384_proj_point_dbl_7(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                          ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+        }
+
+        if (map != 0) {
+            sp_384_map_7(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
+#endif
+
+    return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+    sp_digit x[7];
+    sp_digit y[7];
+} sp_table_entry_384;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td[16];
+    sp_point_384 rtd;
+    sp_digit tmpd[2 * 7 * 6];
+#endif
+    sp_point_384* t;
+    sp_point_384* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_7(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
+        (void)sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod);
+        (void)sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod);
+        t[1].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_384_proj_point_add_7(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_384_proj_point_add_7(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_384_proj_point_add_7(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_384_proj_point_add_7(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_384_proj_point_add_7(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_384_proj_point_add_7(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_384_proj_point_dbl_7(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_384_proj_point_add_7(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 5;
+        n = k[i+1] << 9;
+        c = 50;
+        y = n >> 59;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+        n <<= 5;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--] << (9 - c);
+                c += 55;
+            }
+            y = (n >> 60) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_384_proj_point_dbl_7(rt, rt, tmp);
+            sp_384_proj_point_dbl_7(rt, rt, tmp);
+            sp_384_proj_point_dbl_7(rt, rt, tmp);
+            sp_384_proj_point_dbl_7(rt, rt, tmp);
+
+            sp_384_proj_point_add_7(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_384_map_7(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_384_point_free_7(rt, 1, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*7;
+    sp_digit* b = t + 4*7;
+    sp_digit* t1 = t + 6*7;
+    sp_digit* t2 = t + 8*7;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_7(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_7(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_7(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_7(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_7(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_7(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod);
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_7(t2, b, p384_mod);
+        sp_384_mont_sub_7(x, x, t2, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod);
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_7(w, w, t1, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_7(y, b, x, p384_mod);
+        sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_7(y, y, p384_mod);
+        sp_384_mont_sub_7(y, y, t1, p384_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_384_mont_sqr_7(t1, x, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_7(t1, t1, w, p384_mod);
+    sp_384_mont_tpl_7(a, t1, p384_mod);
+    /* B = X*Y^2 */
+    sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod);
+    /* X = A^2 - 2B */
+    sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_7(t2, b, p384_mod);
+    sp_384_mont_sub_7(x, x, t2, p384_mod);
+    /* Z = Z*Y */
+    sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod);
+    /* t2 = Y^4 */
+    sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_384_mont_sub_7(y, b, x, p384_mod);
+    sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_7(y, y, p384_mod);
+    sp_384_mont_sub_7(y, y, t1, p384_mod);
+#endif
+    /* Y = Y/2 */
+    sp_384_div2_7(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p,
+        const sp_point_384* q, sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*7;
+    sp_digit* t3 = t + 4*7;
+    sp_digit* t4 = t + 6*7;
+    sp_digit* t5 = t + 8*7;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_384_sub_7(t1, p384_mod, q->y);
+    sp_384_norm_7(t1);
+    if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) &
+        (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_7(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<7; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<7; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<7; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - X1 */
+        sp_384_mont_sub_7(t2, t2, x, p384_mod);
+        /* R = S2 - Y1 */
+        sp_384_mont_sub_7(t4, t4, y, p384_mod);
+        /* Z3 = H*Z1 */
+        sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_384_mont_sqr_7(t1, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t3, x, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_7(x, t1, t5, p384_mod);
+        sp_384_mont_dbl_7(t1, t3, p384_mod);
+        sp_384_mont_sub_7(x, x, t1, p384_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_384_mont_sub_7(t3, t3, x, p384_mod);
+        sp_384_mont_mul_7(t3, t3, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(t5, t5, y, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_7(y, t3, t5, p384_mod);
+    }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_384_proj_to_affine_7(sp_point_384* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 7;
+    sp_digit* tmp = t + 4 * 7;
+
+    sp_384_mont_inv_7(t1, a->z, tmp);
+
+    sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_7(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    sp_384_mont_mul_7(a->x, a->x, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_7(a->y, a->y, t1, p384_mod, p384_mp_mod);
+    XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_7(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_7(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_7(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_7(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_7(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_7(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_384_proj_point_dbl_n_7(t, 48, tmp);
+            sp_384_proj_to_affine_7(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_7(t, s1, s2, tmp);
+                sp_384_proj_to_affine_7(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_7(s2, 0, heap);
+    sp_384_point_free_7(s1, 0, heap);
+    sp_384_point_free_7( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_7(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 7 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_7(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=47; j<8; j++,x+=48) {
+            y |= ((k[x / 55] >> (x % 55)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=46; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=48) {
+                y |= ((k[x / 55] >> (x % 55)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_7(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_7(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_7(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(p, 0, heap);
+    sp_384_point_free_7(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[7];
+    sp_digit y[7];
+    sp_table_entry_384 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_7(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_7(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_7(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 7 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_7(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_7(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_7(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[7];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_7(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 7, km);
+        sp_384_point_from_ecc_point_7(point, gm);
+
+            err = sp_384_ecc_mulmod_7(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_7(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x50756649c0b528L,0x71c541ad9c707bL,0x71506d35b8838dL,
+        0x4d1877fc3ce1d7L,0x6de2b645486845L,0x227025fee46c29L,
+        0x134eab708a6785L },
+      { 0x043dad4b03a4feL,0x517ef769535846L,0x58ba0ec14286feL,
+        0x47a7fecc5d6f3aL,0x1a840c6c352196L,0x3d3bb00044c72dL,
+        0x0ade2af0968571L } },
+    /* 2 */
+    { { 0x0647532b0c535bL,0x52a6e0a0c52c53L,0x5085aae6b24375L,
+        0x7096bb501c66b5L,0x47bdb3df9b7b7bL,0x11227e9b2f0be6L,
+        0x088b172704fa51L },
+      { 0x0e796f2680dc64L,0x796eb06a482ebfL,0x2b441d02e04839L,
+        0x19bef7312a5aecL,0x02247c38b8efb5L,0x099ed1185c329eL,
+        0x1ed71d7cdb096fL } },
+    /* 3 */
+    { { 0x6a3cc39edffea5L,0x7a386fafd3f9c4L,0x366f78fbd8d6efL,
+        0x529c7ad7873b80L,0x79eb30380eb471L,0x07c5d3b51760b7L,
+        0x36ee4f1cc69183L },
+      { 0x5ba260f526b605L,0x2f1dfaf0aa6e6fL,0x6bb5ca812a5752L,
+        0x3002d8d1276bc9L,0x01f82269483777L,0x1df33eaaf733cdL,
+        0x2b97e555f59255L } },
+    /* 4 */
+    { { 0x480c57f26feef9L,0x4d28741c248048L,0x0c9cf8af1f0c68L,
+        0x778f6a639a8016L,0x148e88c42e9c53L,0x464051757ecfe9L,
+        0x1a940bd0e2a5e1L },
+      { 0x713a46b74536feL,0x1757b153e1d7ebL,0x30dc8c9da07486L,
+        0x3b7460c1879b5eL,0x4b766c5317b315L,0x1b9de3aaf4d377L,
+        0x245f124c2cf8f5L } },
+    /* 5 */
+    { { 0x426e2ee349ddd0L,0x7df3365f84a022L,0x03b005d29a7c45L,
+        0x422c2337f9b5a4L,0x060494f4bde761L,0x5245e5db6da0b0L,
+        0x22b71d744677f2L },
+      { 0x19d097b7d5a7ceL,0x6bcb468823d34cL,0x1c3692d3be1d09L,
+        0x3c80ec7aa01f02L,0x7170f2ebaafd97L,0x06cbcc7d79d4e8L,
+        0x04a8da511fe760L } },
+    /* 6 */
+    { { 0x79c07a4fc52870L,0x6e9034a752c251L,0x603860a367382cL,
+        0x56d912d6aa87d0L,0x0a348a24abaf76L,0x6c5a23da14adcbL,
+        0x3cf60479a522b2L },
+      { 0x18dd774c61ed22L,0x0ff30168f93b0cL,0x3f79ae15642eddL,
+        0x40510f4915fbcbL,0x2c9ddfdfd1c6d6L,0x67b81b62aee55eL,
+        0x2824de79b07a43L } },
+    /* 7 */
+    { { 0x6c66efe085c629L,0x48c212b7913470L,0x4480fd2d057f0aL,
+        0x725ec7a89a9eb1L,0x78ce97ca1972b7L,0x54760ee70154fbL,
+        0x362a40e27b9f93L },
+      { 0x474dc7e7b14461L,0x602819389ef037L,0x1a13bc284370b2L,
+        0x0193ff1295a59dL,0x79615bde6ea5d2L,0x2e76e3d886acc1L,
+        0x3bb796812e2b60L } },
+    /* 8 */
+    { { 0x04cbb3893b9a2dL,0x4c16010a18baabL,0x19f7cb50f60831L,
+        0x084f400a0936c1L,0x72f1cdd5bbbf00L,0x1b30b725dc6702L,
+        0x182753e4fcc50cL },
+      { 0x059a07eadaf9d6L,0x26d81e24bf603cL,0x45583c839dc399L,
+        0x5579d4d6b1103aL,0x2e14ea59489ae7L,0x492f6e1c5ecc97L,
+        0x03740dc05db420L } },
+    /* 9 */
+    { { 0x413be88510521fL,0x3753ee49982e99L,0x6cd4f7098e1cc5L,
+        0x613c92bda4ec1dL,0x495378b677efe0L,0x132a2143839927L,
+        0x0cf8c336291c0bL },
+      { 0x7fc89d2208353fL,0x751b9da85657e1L,0x349b8a97d405c3L,
+        0x65a964b048428fL,0x1adf481276455eL,0x5560c8d89c2ffcL,
+        0x144fc11fac21a3L } },
+    /* 10 */
+    { { 0x7611f4df5bdf53L,0x634eb16234db80L,0x3c713b8e51174cL,
+        0x52c3c68ac4b2edL,0x53025ba8bebe75L,0x7175d98143105bL,
+        0x33ca8e266a48faL },
+      { 0x0c9281d24fd048L,0x76b3177604bbf3L,0x3b26ae754e106fL,
+        0x7f782275c6efc6L,0x36662538a4cb67L,0x0ca1255843e464L,
+        0x2a4674e142d9bcL } },
+    /* 11 */
+    { { 0x303b4085d480d8L,0x68f23650f4fa7bL,0x552a3ceeba3367L,
+        0x6da0c4947926e3L,0x6e0f5482eb8003L,0x0de717f3d6738aL,
+        0x22e5dcc826a477L },
+      { 0x1b05b27209cfc2L,0x7f0a0b65b6e146L,0x63586549ed3126L,
+        0x7d628dd2b23124L,0x383423fe510391L,0x57ff609eabd569L,
+        0x301f04370131baL } },
+    /* 12 */
+    { { 0x22fe4cdb32f048L,0x7f228ebdadbf5aL,0x02a99adb2d7c8eL,
+        0x01a02e05286706L,0x62d6adf627a89fL,0x49c6ce906fbf2bL,
+        0x0207256dae90b9L },
+      { 0x23e036e71d6cebL,0x199ed8d604e3d7L,0x0c1a11c076d16fL,
+        0x389291fb3da3f3L,0x47adc60f8f942eL,0x177048468e4b9aL,
+        0x20c09f5e61d927L } },
+    /* 13 */
+    { { 0x129ea63615b0b8L,0x03fb4a9b588367L,0x5ad6da8da2d051L,
+        0x33f782f44caeaaL,0x5a27fa80d45291L,0x6d1ed796942da4L,
+        0x08435a931ef556L },
+      { 0x004abb25351130L,0x6d33207c6fd7e7L,0x702130972074b7L,
+        0x0e34748af900f7L,0x762a531a28c87aL,0x3a903b5a4a6ac7L,
+        0x1775b79c35b105L } },
+    /* 14 */
+    { { 0x7470fd846612ceL,0x7dd9b431b32e53L,0x04bcd2be1a61bcL,
+        0x36ed7c5b5c260bL,0x6795f5ef0a4084L,0x46e2880b401c93L,
+        0x17d246c5aa8bdeL },
+      { 0x707ae4db41b38dL,0x233c31f7f9558fL,0x585110ec67bdf4L,
+        0x4d0cc931d0c703L,0x26fbe4356841a7L,0x64323e95239c44L,
+        0x371dc9230f3221L } },
+    /* 15 */
+    { { 0x70ff1ae4b1ec9dL,0x7c1dcfddee0daaL,0x53286782188748L,
+        0x6a5d9381e6f207L,0x3aa6c7d6523c4cL,0x6c02d83e0d97e2L,
+        0x16a9c916b45312L },
+      { 0x78146744b74de8L,0x742ec415269c6fL,0x237a2c6a860e79L,
+        0x186baf17ba68a7L,0x4261e8789fa51fL,0x3dc136480a5903L,
+        0x1953899e0cf159L } },
+    /* 16 */
+    { { 0x0205de2f9fbe67L,0x1706fee51c886fL,0x31a0b803c712bfL,
+        0x0a6aa11ede7603L,0x2463ef2a145c31L,0x615403b30e8f4aL,
+        0x3f024d6c5f5c5eL },
+      { 0x53bc4fd4d01f95L,0x7d512ac15a692cL,0x72be38fcfe6aa0L,
+        0x437f0b77bbca1eL,0x7fdcf70774a10eL,0x392d6c5cde37f3L,
+        0x229cbce79621d1L } },
+    /* 17 */
+    { { 0x2de4da2341c342L,0x5ca9d4e08844e7L,0x60dd073bcf74c9L,
+        0x4f30aa499b63ecL,0x23efd1eafa00d5L,0x7c99a7db1257b3L,
+        0x00febc9b3171b1L },
+      { 0x7e2fcf3045f8acL,0x2a642e9e3ce610L,0x23f82be69c5299L,
+        0x66e49ad967c279L,0x1c895ddfd7a842L,0x798981e22f6d25L,
+        0x0d595cb59322f3L } },
+    /* 18 */
+    { { 0x4bac017d8c1bbaL,0x73872161e7aafdL,0x0fd865f43d8163L,
+        0x019d89457708b7L,0x1b983c4dd70684L,0x095e109b74d841L,
+        0x25f1f0b3e0c76fL },
+      { 0x4e61ddf96010e8L,0x1c40a53f542e5eL,0x01a74dfc8365f9L,
+        0x69b36b92773333L,0x08e0fccc139ed3L,0x266d216ddc4269L,
+        0x1f2b47717ce9b5L } },
+    /* 19 */
+    { { 0x0a9a81da57a41fL,0x0825d800736cccL,0x2d7876b4579d28L,
+        0x3340ea6211a1e3L,0x49e89284f3ff54L,0x6276a210fe2c6eL,
+        0x01c3c8f31be7cbL },
+      { 0x2211da5d186e14L,0x1e6ffbb61bfea8L,0x536c7d060211d2L,
+        0x320168720d1d55L,0x5835525ed667baL,0x5125e52495205eL,
+        0x16113b9f3e9129L } },
+    /* 20 */
+    { { 0x3086073f3b236fL,0x283b03c443b5f5L,0x78e49ed0a067a7L,
+        0x2a878fb79fb2b8L,0x662f04348a9337L,0x57ee2cf732d50bL,
+        0x18b50dd65fd514L },
+      { 0x5feb9ef2955926L,0x2c3edbef06a7b0L,0x32728dad651029L,
+        0x116d00b1c4b347L,0x13254052bf1a1aL,0x3e77bf7fee5ec1L,
+        0x253943ca388882L } },
+    /* 21 */
+    { { 0x32e5b33062e8afL,0x46ebd147a6d321L,0x2c8076dec6a15cL,
+        0x7328d511ff0d80L,0x10ad7e926def0eL,0x4e8ca85937d736L,
+        0x02638c26e8bf2fL },
+      { 0x1deeb3fff1d63fL,0x5014417fa6e8efL,0x6e1da3de5c8f43L,
+        0x7ca942b42295d9L,0x23faacf75bb4d1L,0x4a71fcd680053dL,
+        0x04af4f90204dceL } },
+    /* 22 */
+    { { 0x23780d104cbba5L,0x4e8ff46bba9980L,0x2072a6da8d881fL,
+        0x3cc3d881ae11c9L,0x2eee84ff19be89L,0x69b708ed77f004L,
+        0x2a82928534eef9L },
+      { 0x794331187d4543L,0x70e0f3edc0cc41L,0x3ab1fa0b84c854L,
+        0x1478355c1d87baL,0x6f35fa7748ba28L,0x37b8be0531584dL,
+        0x03c3141c23a69fL } },
+    /* 23 */
+    { { 0x5c244cdef029ddL,0x0d0f0a0cc37018L,0x17f8476604f6afL,
+        0x13a6dd6ccc95c3L,0x5a242e9801b8f6L,0x211ca9cc632131L,
+        0x264a6a46a4694fL },
+      { 0x3ffd7235285887L,0x284be28302046fL,0x57f4b9b882f1d6L,
+        0x5e21772c940661L,0x7619a735c600cfL,0x2f76f5a50c9106L,
+        0x28d89c8c69de31L } },
+    /* 24 */
+    { { 0x799b5c91361ed8L,0x36ead8c66cd95cL,0x046c9969a91f5cL,
+        0x46bbdba2a66ea9L,0x29db0e0215a599L,0x26c8849b36f756L,
+        0x22c3feb31ff679L },
+      { 0x585d1237b5d9efL,0x5ac57f522e8e8dL,0x617e66e8b56c41L,
+        0x68826f276823cfL,0x0983f0e6f39231L,0x4e1075099084bdL,
+        0x2a541f82be0416L } },
+    /* 25 */
+    { { 0x468a6e14cf381cL,0x4f7b845c6399edL,0x36aa29732ebe74L,
+        0x19c726911ab46aL,0x2ad1fe431eec0eL,0x301e35051fd1eaL,
+        0x36da815e7a1ab3L },
+      { 0x05672e4507832aL,0x4ebf10fca51251L,0x6015843421cff0L,
+        0x3affad832fc013L,0x712b58d9b45540L,0x1e4751d1f6213eL,
+        0x0e7c2b218bafa7L } },
+    /* 26 */
+    { { 0x7abf784c52edf5L,0x6fcb4b135ca7b1L,0x435e46ac5f735cL,
+        0x67f8364ca48c5fL,0x46d45b5fbd956bL,0x10deda6065db94L,
+        0x0b37fdf85068f9L },
+      { 0x74b3ba61f47ec8L,0x42c7ddf08c10ccL,0x1531a1fe422a20L,
+        0x366f913d12be38L,0x6a846e30cb2edfL,0x2785898c994fedL,
+        0x061be85f331af3L } },
+    /* 27 */
+    { { 0x23f5361dfcb91eL,0x3c26c8da6b1491L,0x6e444a1e620d65L,
+        0x0c3babd5e8ac13L,0x573723ce612b82L,0x2d10e62a142c37L,
+        0x3d1a114c2d98bdL },
+      { 0x33950b401896f6L,0x7134efe7c12110L,0x31239fd2978472L,
+        0x30333bf5978965L,0x79f93313dd769fL,0x457fb9e11662caL,
+        0x190a73b251ae3cL } },
+    /* 28 */
+    { { 0x04dd54bb75f9a4L,0x0d7253a76ae093L,0x08f5b930792bbcL,
+        0x041f79adafc265L,0x4a9ff24c61c11bL,0x0019c94e724725L,
+        0x21975945d9cc2aL },
+      { 0x3dfe76722b4a2bL,0x17f2f6107c1d94L,0x546e1ae2944b01L,
+        0x53f1f06401e72dL,0x2dbe43fc7632d6L,0x5639132e185903L,
+        0x0f2f34eb448385L } },
+    /* 29 */
+    { { 0x7b4cc7ec30ce93L,0x58fb6e4e4145f7L,0x5d1ed5540043b5L,
+        0x19ffbe1f633adfL,0x5bfc0907259033L,0x6378f872e7ca0eL,
+        0x2c127b2c01eb3cL },
+      { 0x076eaf4f58839cL,0x2db54560bc9f68L,0x42ad0319b84062L,
+        0x46c325d1fb019dL,0x76d2a19ee9eebcL,0x6fbd6d9e2aa8f7L,
+        0x2396a598fe0991L } },
+    /* 30 */
+    { { 0x662fddf7fbd5e1L,0x7ca8ed22563ad3L,0x5b4768efece3b3L,
+        0x643786a422d1eaL,0x36ce80494950e1L,0x1a30795b7f2778L,
+        0x107f395c93f332L },
+      { 0x7939c28332c144L,0x491610e3c8dc0bL,0x099ba2bfdac5fcL,
+        0x5c2e3149ec29a7L,0x31b731d06f1dc3L,0x1cbb60d465d462L,
+        0x3ca5461362cfd9L } },
+    /* 31 */
+    { { 0x653ff736ddc103L,0x7c6f2bdec0dfb2L,0x73f81b73a097d0L,
+        0x05b775f84f180fL,0x56b2085af23413L,0x0d6f36256a61feL,
+        0x26d3ed267fa68fL },
+      { 0x54f89251d27ac2L,0x4fc6ad94a71202L,0x7ebf01969b4cc5L,
+        0x7ba364dbc14760L,0x4f8370959a2587L,0x7b7631e37c6188L,
+        0x29e51845f104cbL } },
+    /* 32 */
+    { { 0x426b775e3c647bL,0x327319e0a69180L,0x0c5cb034f6ff2fL,
+        0x73aa39b98e9897L,0x7ee615f49fde6eL,0x3f712aa61e0db4L,
+        0x33ca06c2ba2ce9L },
+      { 0x14973541b8a543L,0x4b4e6101ba61faL,0x1d94e4233d0698L,
+        0x501513c715d570L,0x1b8f8c3d01436bL,0x52f41a0445cf64L,
+        0x3f709c3a75fb04L } },
+    /* 33 */
+    { { 0x073c0cbc7f41d6L,0x227c36f5ac8201L,0x508e110fef65d8L,
+        0x0f317229529b7fL,0x45fc6030d00e24L,0x118a65d30cebeaL,
+        0x3340cc4223a448L },
+      { 0x204c999797612cL,0x7c05dd4ce9c5a3L,0x7b865d0a8750e4L,
+        0x2f82c876ab7d34L,0x2243ddd2ab4808L,0x6834b9df8a4914L,
+        0x123319ed950e0fL } },
+    /* 34 */
+    { { 0x50430efc14ab48L,0x7e9e4ce0d4e89cL,0x2332207fd8656dL,
+        0x4a2809e97f4511L,0x2162bb1b968e2dL,0x29526d54af2972L,
+        0x13edd9adcd939dL },
+      { 0x793bca31e1ff7fL,0x6b959c9e4d2227L,0x628ac27809a5baL,
+        0x2c71ffc7fbaa5fL,0x0c0b058f13c9ceL,0x5676eae68de2cfL,
+        0x35508036ea19a4L } },
+    /* 35 */
+    { { 0x030bbd6dda1265L,0x67f9d12e31bb34L,0x7e4d8196e3ded3L,
+        0x7b9120e5352498L,0x75857bce72d875L,0x4ead976a396caeL,
+        0x31c5860553a64dL },
+      { 0x1a0f792ee32189L,0x564c4efb8165d0L,0x7adc7d1a7fbcbeL,
+        0x7ed7c2ccf327b7L,0x35df1b448ce33dL,0x6f67eb838997cdL,
+        0x3ee37ec0077917L } },
+    /* 36 */
+    { { 0x345fa74d5bb921L,0x097c9a56ccfd8eL,0x00a0b5e8f971f8L,
+        0x723d95223f69d4L,0x08e2e5c2777f87L,0x68b13676200109L,
+        0x26ab5df0acbad6L },
+      { 0x01bca7daac34aeL,0x49ca4d5f664dadL,0x110687b850914bL,
+        0x1203d6f06443c9L,0x7a2ac743b04d4cL,0x40d96bd3337f82L,
+        0x13728be0929c06L } },
+    /* 37 */
+    { { 0x631ca61127bc1aL,0x2b362fd5a77cd1L,0x17897d68568fb7L,
+        0x21070af33db5b2L,0x6872e76221794aL,0x436f29fb076963L,
+        0x1f2acfc0ecb7b3L },
+      { 0x19bf15ca9b3586L,0x32489a4a17aee2L,0x2b31af3c929551L,
+        0x0db7c420b9b19fL,0x538c39bd308c2bL,0x438775c0dea88fL,
+        0x1537304d7cd07fL } },
+    /* 38 */
+    { { 0x53598d943caf0dL,0x1d5244bfe266adL,0x7158feb7ab3811L,
+        0x1f46e13cf6fb53L,0x0dcab632eb9447L,0x46302968cfc632L,
+        0x0b53d3cc5b6ec7L },
+      { 0x69811ca143b7caL,0x5865bcf9f2a11aL,0x74ded7fa093b06L,
+        0x1c878ec911d5afL,0x04610e82616e49L,0x1e157fe9640eb0L,
+        0x046e6f8561d6c2L } },
+    /* 39 */
+    { { 0x631a3d3bbe682cL,0x3a4ce9dde5ba95L,0x28f11f7502f1f1L,
+        0x0a55cf0c957e88L,0x495e4ec7e0a3bcL,0x30ad4d87ba365cL,
+        0x0217b97a4c26f3L },
+      { 0x01a9088c2e67fdL,0x7501c4c3d5e5e7L,0x265b7bb854c820L,
+        0x729263c87e6b52L,0x308b9e3b8fb035L,0x33f1b86c1b23abL,
+        0x0e81b8b21fc99cL } },
+    /* 40 */
+    { { 0x59f5a87237cac0L,0x6b3a86b0cf28b9L,0x13a53db13a4fc2L,
+        0x313c169a1c253bL,0x060158304ed2bbL,0x21e171b71679bcL,
+        0x10cdb754d76f86L },
+      { 0x44355392ab473aL,0x64eb7cbda08caeL,0x3086426a900c71L,
+        0x49016ed9f3c33cL,0x7e6354ab7e04f9L,0x17c4c91a40cd2eL,
+        0x3509f461024c66L } },
+    /* 41 */
+    { { 0x2848f50f9b5a31L,0x68d1755b6c5504L,0x48cd5d5672ec00L,
+        0x4d77421919d023L,0x1e1e349ef68807L,0x4ab5130cf415d7L,
+        0x305464c6c7dbe6L },
+      { 0x64eb0bad74251eL,0x64c6957e52bda4L,0x6c12583440dee6L,
+        0x6d3bee05b00490L,0x186970de53dbc4L,0x3be03b37567a56L,
+        0x2b553b1ebdc55bL } },
+    /* 42 */
+    { { 0x74dc3579efdc58L,0x26d29fed1bb71cL,0x334c825a9515afL,
+        0x433c1e839273a6L,0x0d8a4e41cff423L,0x3454098fe42f8eL,
+        0x1046674bf98686L },
+      { 0x09a3e029c05dd2L,0x54d7cfc7fb53a7L,0x35f0ad37e14d7cL,
+        0x73a294a13767b9L,0x3f519678275f4fL,0x788c63393993a4L,
+        0x0781680b620123L } },
+    /* 43 */
+    { { 0x4c8e2ed4d5ffe8L,0x112db7d42fe4ebL,0x433b8f2d2be2edL,
+        0x23e30b29a82cbcL,0x35d2f4c06ee85aL,0x78ff31ffe4b252L,
+        0x0d31295c8cbff5L },
+      { 0x314806ea0376a2L,0x4ea09e22bc0589L,0x0879575f00ba97L,
+        0x188226d2996bb7L,0x7799368dc9411fL,0x7ab24e5c8cae36L,
+        0x2b6a8e2ee4ea33L } },
+    /* 44 */
+    { { 0x70c7127d4ed72aL,0x24c9743ef34697L,0x2fd30e7a93683aL,
+        0x538a89c246012cL,0x6c660a5394ed82L,0x79a95ea239d7e0L,
+        0x3f3af3bbfb170dL },
+      { 0x3b75aa779ae8c1L,0x33995a3cc0dde4L,0x7489d5720b7bfdL,
+        0x599677ef9fa937L,0x3defd64c5ab44bL,0x27d52dc234522bL,
+        0x2ac65d1a8450e0L } },
+    /* 45 */
+    { { 0x478585ec837d7dL,0x5f7971dc174887L,0x67576ed7bb296dL,
+        0x5a78e529a74926L,0x640f73f4fa104bL,0x7d42a8b16e4730L,
+        0x108c7eaa75fd01L },
+      { 0x60661ef96e6896L,0x18d3a0761f3aa7L,0x6e71e163455539L,
+        0x165827d6a7e583L,0x4e7f77e9527935L,0x790bebe2ae912eL,
+        0x0b8fe9561adb55L } },
+    /* 46 */
+    { { 0x4d48036a9951a8L,0x371084f255a085L,0x66aeca69cea2c5L,
+        0x04c99f40c745e7L,0x08dc4bfd9a0924L,0x0b0ec146b29df7L,
+        0x05106218d01c91L },
+      { 0x2a56ee99caedc7L,0x5d9b23a203922cL,0x1ce4c80b6a3ec4L,
+        0x2666bcb75338cbL,0x185a81aac8c4aaL,0x2b4fb60a06c39eL,
+        0x0327e1b3633f42L } },
+    /* 47 */
+    { { 0x72814710b2a556L,0x52c864f6e16534L,0x4978de66ddd9f2L,
+        0x151f5950276cf0L,0x450ac6781d2dc2L,0x114b7a22dd61b2L,
+        0x3b32b07f29faf8L },
+      { 0x68444fdc2d6e94L,0x68526bd9e437bcL,0x0ca780e8b0d887L,
+        0x69f3f850a716aaL,0x500b953e42cd57L,0x4e57744d812e7dL,
+        0x000a5f0e715f48L } },
+    /* 48 */
+    { { 0x2aab10b8243a7dL,0x727d1f4b18b675L,0x0e6b9fdd91bbbbL,
+        0x0d58269fc337e5L,0x45d6664105a266L,0x11946af1b14072L,
+        0x2c2334f91e46e1L },
+      { 0x6dc5f8756d2411L,0x21b34eaa25188bL,0x0d2797da83529eL,
+        0x324df55616784bL,0x7039ec66d267dfL,0x2de79cdb2d108cL,
+        0x14011b1ad0bde0L } },
+    /* 49 */
+    { { 0x2e160266425043L,0x55fbe11b712125L,0x7e3c58b3947fd9L,
+        0x67aacc79c37ad3L,0x4a18e18d2dea0fL,0x5eef06e5674351L,
+        0x37c3483ae33439L },
+      { 0x5d5e1d75bb4045L,0x0f9d72db296efdL,0x60b1899dd894a9L,
+        0x06e8818ded949aL,0x747fd853c39434L,0x0953b937d9efabL,
+        0x09f08c0beeb901L } },
+    /* 50 */
+    { { 0x1d208a8f2d49ceL,0x54042c5be1445aL,0x1c2681fd943646L,
+        0x219c8094e2e674L,0x442cddf07238b8L,0x574a051c590832L,
+        0x0b72f4d61c818aL },
+      { 0x7bc3cbe4680967L,0x0c8b3f25ae596bL,0x0445b0da74a9efL,
+        0x0bbf46c40363b7L,0x1df575c50677a3L,0x016ea6e73d68adL,
+        0x0b5207bd8db0fdL } },
+    /* 51 */
+    { { 0x2d39fdfea1103eL,0x2b252bf0362e34L,0x63d66c992baab9L,
+        0x5ac97706de8550L,0x0cca390c39c1acL,0x0d9bec5f01b2eaL,
+        0x369360a0f7e5f3L },
+      { 0x6dd3461e201067L,0x70b2d3f63ed614L,0x487580487c54c7L,
+        0x6020e48a44af2aL,0x1ccf80b21aab04L,0x3cf3b12d88d798L,
+        0x349368eccc506fL } },
+    /* 52 */
+    { { 0x5a053753b0a354L,0x65e818dbb9b0aeL,0x7d5855ee50e4bfL,
+        0x58dc06885c7467L,0x5ee15073e57bd3L,0x63254ebc1e07fdL,
+        0x1d48e0392aa39bL },
+      { 0x4e227c6558ffe9L,0x0c3033d8a82a3eL,0x7bde65c214e8d2L,
+        0x6e23561559c16aL,0x5094c5e6deaffdL,0x78dca2880f1f91L,
+        0x3d9d3f947d838dL } },
+    /* 53 */
+    { { 0x387ae5af63408fL,0x6d539aeb4e6edfL,0x7f3d3186368e70L,
+        0x01a6446bc19989L,0x35288fbcd4482fL,0x39288d34ec2736L,
+        0x1de9c47159ad76L },
+      { 0x695dc7944f8d65L,0x3eca2c35575094L,0x0c918059a79b69L,
+        0x4573a48c32a74eL,0x580d8bc8b93f52L,0x190be3a3d071eaL,
+        0x2333e686b3a8cbL } },
+    /* 54 */
+    { { 0x2b110c7196fee2L,0x3ac70e99128a51L,0x20a6bb6b75d5e6L,
+        0x5f447fa513149aL,0x560d69714cc7b2L,0x1d3ee25279fab1L,
+        0x369adb2ccca959L },
+      { 0x3fddb13dd821c2L,0x70bf21ba647be8L,0x64121227e3cbc9L,
+        0x12633a4c892320L,0x3c15c61660f26dL,0x1932c3b3d19900L,
+        0x18c718563eab71L } },
+    /* 55 */
+    { { 0x72ebe0fd752366L,0x681c2737d11759L,0x143c805e7ae4f0L,
+        0x78ed3c2cc7b324L,0x5c16e14820254fL,0x226a4f1c4ec9f0L,
+        0x2891bb915eaac6L },
+      { 0x061eb453763b33L,0x07f88b81781a87L,0x72b5ac7a87127cL,
+        0x7ea4e4cd7ff8b5L,0x5e8c3ce33908b6L,0x0bcb8a3d37feffL,
+        0x01da9e8e7fc50bL } },
+    /* 56 */
+    { { 0x639dfe9e338d10L,0x32dfe856823608L,0x46a1d73bca3b9aL,
+        0x2da685d4b0230eL,0x6e0bc1057b6d69L,0x7144ec724a5520L,
+        0x0b067c26b87083L },
+      { 0x0fc3f0eef4c43dL,0x63500f509552b7L,0x220d74af6f8b86L,
+        0x038996eafa2aa9L,0x7f6750f4aee4d2L,0x3e1d3f06718720L,
+        0x1ea1d37243814cL } },
+    /* 57 */
+    { { 0x322d4597c27050L,0x1beeb3ce17f109L,0x15e5ce2e6ef42eL,
+        0x6c8be27da6b3a0L,0x66e3347f4d5f5cL,0x7172133899c279L,
+        0x250aff4e548743L },
+      { 0x28f0f6a43b566dL,0x0cd2437fefbca0L,0x5b1108cb36bdbaL,
+        0x48a834d41fb7c2L,0x6cb8565680579fL,0x42da2412b45d9fL,
+        0x33dfc1abb6c06eL } },
+    /* 58 */
+    { { 0x56e3c48ef96c80L,0x65667bb6c1381eL,0x09f70514375487L,
+        0x1548ff115f4a08L,0x237de2d21a0710L,0x1425cdee9f43dfL,
+        0x26a6a42e055b0aL },
+      { 0x4ea9ea9dc7dfcbL,0x4df858583ac58aL,0x1d274f819f1d39L,
+        0x26e9c56cf91fcbL,0x6cee31c7c3a465L,0x0bb8e00b108b28L,
+        0x226158da117301L } },
+    /* 59 */
+    { { 0x5a7cd4fce73946L,0x7b6a462d0ac653L,0x732ea4bb1a3da5L,
+        0x7c8e9f54711af4L,0x0a6cd55d4655f9L,0x341e6d13e4754aL,
+        0x373c87098879a8L },
+      { 0x7bc82e61b818bfL,0x5f2db48f44879fL,0x2a2f06833f1d28L,
+        0x494e5b691a74c0L,0x17d6cf35fd6b57L,0x5f7028d1c25dfcL,
+        0x377a9ab9562cb6L } },
+    /* 60 */
+    { { 0x4de8877e787b2eL,0x183e7352621a52L,0x2ab0509974962bL,
+        0x045a450496cb8aL,0x3bf7118b5591c7L,0x7724f98d761c35L,
+        0x301607e8d5a0c1L },
+      { 0x0f58a3f24d4d58L,0x3771c19c464f3cL,0x06746f9c0bfafaL,
+        0x56564c9c8feb52L,0x0d66d9a7d8a45fL,0x403578141193caL,
+        0x00b0d0bdc19260L } },
+    /* 61 */
+    { { 0x571407157bdbc2L,0x138d5a1c2c0b99L,0x2ee4a8057dcbeaL,
+        0x051ff2b58e9ed1L,0x067378ad9e7cdaL,0x7cc2c1db97a49eL,
+        0x1e7536ccd849d6L },
+      { 0x531fd95f3497c4L,0x55dc08325f61a7L,0x144e942bce32bfL,
+        0x642d572f09e53aL,0x556ff188261678L,0x3e79c0d9d513d6L,
+        0x0bbbc6656f6d52L } },
+    /* 62 */
+    { { 0x57d3eb50596edcL,0x26c520a487451dL,0x0a92db40aea8d6L,
+        0x27df6345109616L,0x7733d611fd727cL,0x61d14171fef709L,
+        0x36169ae417c36bL },
+      { 0x6899f5d4091cf7L,0x56ce5dfe4ed0c1L,0x2c430ce5913fbcL,
+        0x1b13547e0f8caeL,0x4840a8275d3699L,0x59b8ef209e81adL,
+        0x22362dff5ea1a2L } },
+    /* 63 */
+    { { 0x7237237bd98425L,0x73258e162a9d0bL,0x0a59a1e8bb5118L,
+        0x4190a7ee5d8077L,0x13684905fdbf7cL,0x31c4033a52626bL,
+        0x010a30e4fbd448L },
+      { 0x47623f981e909aL,0x670af7c325b481L,0x3d004241fa4944L,
+        0x0905a2ca47f240L,0x58f3cdd7a187c3L,0x78b93aee05b43fL,
+        0x19b91d4ef8d63bL } },
+    /* 64 */
+    { { 0x0d34e116973cf4L,0x4116fc9e69ee0eL,0x657ae2b4a482bbL,
+        0x3522eed134d7cdL,0x741e0dde0a036aL,0x6554316a51cc7bL,
+        0x00f31c6ca89837L },
+      { 0x26770aa06b1dd7L,0x38233a4ceba649L,0x065a1110c96feaL,
+        0x18d367839e0f15L,0x794543660558d1L,0x39b605139065dcL,
+        0x29abbec071b637L } },
+    /* 65 */
+    { { 0x1464b401ab5245L,0x16db891b27ff74L,0x724eb49cb26e34L,
+        0x74fee3bc9cc33eL,0x6a8bdbebe085eaL,0x5c2e75ca207129L,
+        0x1d03f2268e6b08L },
+      { 0x28b0a328e23b23L,0x645dc26209a0bcL,0x62c28990348d49L,
+        0x4dd9be1fa333d0L,0x6183aac74a72e4L,0x1d6f3ee69e1d03L,
+        0x2fff96db0ff670L } },
+    /* 66 */
+    { { 0x2358f5c6a2123fL,0x5b2bfc51bedb63L,0x4fc6674be649ecL,
+        0x51fc16e44b813aL,0x2ffe10a73754c1L,0x69a0c7a053aeefL,
+        0x150e605fb6b9b4L },
+      { 0x179eef6b8b83c4L,0x64293b28ad05efL,0x331795fab98572L,
+        0x09823eec78727dL,0x36508042b89b81L,0x65f1106adb927eL,
+        0x2fc0234617f47cL } },
+    /* 67 */
+    { { 0x12aa244e8068dbL,0x0c834ae5348f00L,0x310fc1a4771cb3L,
+        0x6c90a2f9e19ef9L,0x77946fa0573471L,0x37f5df81e5f72fL,
+        0x204f5d72cbe048L },
+      { 0x613c724383bba6L,0x1ce14844967e0aL,0x797c85e69aa493L,
+        0x4fb15b0f2ce765L,0x5807978e2e8aa7L,0x52c75859876a75L,
+        0x1554635c763d3eL } },
+    /* 68 */
+    { { 0x4f292200623f3bL,0x6222be53d7fe07L,0x1e02a9a08c2571L,
+        0x22c6058216b912L,0x1ec20044c7ba17L,0x53f94c5efde12bL,
+        0x102b8aadfe32a4L },
+      { 0x45377aa927b102L,0x0d41b8062ee371L,0x77085a9018e62aL,
+        0x0c69980024847cL,0x14739b423a73a9L,0x52ec6961fe3c17L,
+        0x38a779c94b5a7dL } },
+    /* 69 */
+    { { 0x4d14008435af04L,0x363bfd8325b4e8L,0x48cdb715097c95L,
+        0x1b534540f8bee0L,0x4ca1e5c90c2a76L,0x4b52c193d6eee0L,
+        0x277a33c79becf5L },
+      { 0x0fee0d511d3d06L,0x4627f3d6a58f8cL,0x7c81ac245119b8L,
+        0x0c8d526ba1e07aL,0x3dbc242f55bac2L,0x2399df8f91fffdL,
+        0x353e982079ba3bL } },
+    /* 70 */
+    { { 0x6405d3b0ab9645L,0x7f31abe3ee236bL,0x456170a9babbb1L,
+        0x09634a2456a118L,0x5b1c6045acb9e5L,0x2c75c20d89d521L,
+        0x2e27ccf5626399L },
+      { 0x307cd97fed2ce4L,0x1c2fbb02b64087L,0x542a068d27e64dL,
+        0x148c030b3bc6a6L,0x671129e616ade5L,0x123f40db60dafcL,
+        0x07688f3c621220L } },
+    /* 71 */
+    { { 0x1c46b342f2c4b5L,0x27decc0b3c8f04L,0x0d9bd433464c54L,
+        0x1f3d893b818572L,0x2536043b536c94L,0x57e00c4b19ebf9L,
+        0x3938fb9e5ad55eL },
+      { 0x6b390024c8b22fL,0x4583f97e20a976L,0x2559d24abcbad7L,
+        0x67a9cabc9bd8c6L,0x73a56f09432e4aL,0x79eb0beb53a3b7L,
+        0x3e19d47f6f8221L } },
+    /* 72 */
+    { { 0x7399cb9d10e0b2L,0x32acc1b8a36e2aL,0x287d60c2407035L,
+        0x42c82420ea4b5cL,0x13f286658bc268L,0x3c91181156e064L,
+        0x234b83dcdeb963L },
+      { 0x79bc95486cfee6L,0x4d8fd3cb78af36L,0x07362ba5e80da8L,
+        0x79d024a0d681b0L,0x6b58406907f87fL,0x4b40f1e977e58fL,
+        0x38dcc6fd5fa342L } },
+    /* 73 */
+    { { 0x72282be1cd0abeL,0x02bd0fdfdf44e5L,0x19b0e0d2f753e4L,
+        0x4514e76ce8c4c0L,0x02ebc9c8cdcc1bL,0x6ac0c0373e9fddL,
+        0x0dc414af1c81a9L },
+      { 0x7a109246f32562L,0x26982e6a3768edL,0x5ecd8daed76ab5L,
+        0x2eaa70061eb261L,0x09e7c038a8c514L,0x2a2603cc300658L,
+        0x25d93ab9e55cd4L } },
+    /* 74 */
+    { { 0x11b19fcbd5256aL,0x41e4d94274770fL,0x0133c1a411001fL,
+        0x360bac481dbca3L,0x45908b18a9c22bL,0x1e34396fafb03aL,
+        0x1b84fea7486edaL },
+      { 0x183c62a71e6e16L,0x5f1dc30e93da8eL,0x6cb97b502573c3L,
+        0x3708bf0964e3fcL,0x35a7f042eeacceL,0x56370da902c27fL,
+        0x3a873c3b72797fL } },
+    /* 75 */
+    { { 0x6573c9cea4cc9bL,0x2c3b5f9d91e6dcL,0x2a90e2dbd9505eL,
+        0x66a75444025f81L,0x1571fb894b03cdL,0x5d1a1f00fd26f3L,
+        0x0d19a9fd618855L },
+      { 0x659acd56515664L,0x7279478bd616a3L,0x09a909e76d56c3L,
+        0x2fd70474250358L,0x3a1a25c850579cL,0x11b9e0f71b74ccL,
+        0x1268daef3d1bffL } },
+    /* 76 */
+    { { 0x7f5acc46d93106L,0x5bc15512f939c8L,0x504b5f92f996deL,
+        0x25965549be7a64L,0x357a3a2ae9b80dL,0x3f2bcf9c139cc0L,
+        0x0a7ddd99f23b35L },
+      { 0x6868f5a8a0b1c5L,0x319ec52f15b1beL,0x0770000a849021L,
+        0x7f4d50287bd608L,0x62c971d28a9d7fL,0x164e89309acb72L,
+        0x2a29f002cf4a32L } },
+    /* 77 */
+    { { 0x58a852ae11a338L,0x27e3a35f2dcef8L,0x494d5731ce9e18L,
+        0x49516f33f4bb3eL,0x386b26ba370097L,0x4e8fac1ec30248L,
+        0x2ac26d4c44455dL },
+      { 0x20484198eb9dd0L,0x75982a0e06512bL,0x152271b9279b05L,
+        0x5908a9857e36d2L,0x6a933ab45a60abL,0x58d8b1acb24fafL,
+        0x28fbcf19425590L } },
+    /* 78 */
+    { { 0x5420e9df010879L,0x4aba72aec2f313L,0x438e544eda7494L,
+        0x2e8e189ce6f7eaL,0x2f771e4efe45bdL,0x0d780293bce7efL,
+        0x1569ad3d0d02acL },
+      { 0x325251ebeaf771L,0x02510f1a8511e2L,0x3863816bf8aad1L,
+        0x60fdb15fe6ac19L,0x4792aef52a348cL,0x38e57a104e9838L,
+        0x0d171611a1df1bL } },
+    /* 79 */
+    { { 0x15ceb0bea65e90L,0x6e56482db339bcL,0x37f618f7b0261fL,
+        0x6351abc226dabcL,0x0e999f617b74baL,0x37d3cc57af5b69L,
+        0x21df2b987aac68L },
+      { 0x2dddaa3a358610L,0x2da264bc560e47L,0x545615d538bf13L,
+        0x1c95ac244b8cc7L,0x77de1f741852cbL,0x75d324f00996abL,
+        0x3a79b13b46aa3bL } },
+    /* 80 */
+    { { 0x7db63998683186L,0x6849bb989d530cL,0x7b53c39ef7ed73L,
+        0x53bcfbf664d3ffL,0x25ef27c57f71c7L,0x50120ee80f3ad6L,
+        0x243aba40ed0205L },
+      { 0x2aae5e0ee1fcebL,0x3449d0d8343fbeL,0x5b2864fb7cffc7L,
+        0x64dceb5407ac3eL,0x20303a5695523dL,0x3def70812010b2L,
+        0x07be937f2e9b6fL } },
+    /* 81 */
+    { { 0x5838f9e0540015L,0x728d8720efb9f7L,0x1ab5864490b0c8L,
+        0x6531754458fdcfL,0x600ff9612440c0L,0x48735b36a585b7L,
+        0x3d4aaea86b865dL },
+      { 0x6898942cac32adL,0x3c84c5531f23a1L,0x3c9dbd572f7edeL,
+        0x5691f0932a2976L,0x186f0db1ac0d27L,0x4fbed18bed5bc9L,
+        0x0e26b0dee0b38cL } },
+    /* 82 */
+    { { 0x1188b4f8e60f5bL,0x602a915455b4a2L,0x60e06af289ff99L,
+        0x579fe4bed999e5L,0x2bc03b15e6d9ddL,0x1689649edd66d5L,
+        0x3165e277dca9d2L },
+      { 0x7cb8a529cf5279L,0x57f8035b34d84dL,0x352e2eb26de8f1L,
+        0x6406820c3367c4L,0x5d148f4c899899L,0x483e1408482e15L,
+        0x1680bd1e517606L } },
+    /* 83 */
+    { { 0x5c877cc1c90202L,0x2881f158eae1f4L,0x6f45e207df4267L,
+        0x59280eba1452d8L,0x4465b61e267db5L,0x171f1137e09e5cL,
+        0x1368eb821daa93L },
+      { 0x70fe26e3e66861L,0x52a6663170da7dL,0x71d1ce5b7d79dcL,
+        0x1cffe9be1e1afdL,0x703745115a29c4L,0x73b7f897b2f65aL,
+        0x02218c3a95891aL } },
+    /* 84 */
+    { { 0x16866db8a9e8c9L,0x4770b770123d9bL,0x4c116cf34a8465L,
+        0x079b28263fc86aL,0x3751c755a72b58L,0x7bc8df1673243aL,
+        0x12fff72454f064L },
+      { 0x15c049b89554e7L,0x4ea9ef44d7cd9aL,0x42f50765c0d4f1L,
+        0x158bb603cb011bL,0x0809dde16470b1L,0x63cad7422ea819L,
+        0x38b6cd70f90d7eL } },
+    /* 85 */
+    { { 0x1e4aab6328e33fL,0x70575f026da3aeL,0x7e1b55c8c55219L,
+        0x328d4b403d24caL,0x03b6df1f0a5bd1L,0x26b4bb8b648ed0L,
+        0x17161f2f10b76aL },
+      { 0x6cdb32bae8b4c0L,0x33176266227056L,0x4975fa58519b45L,
+        0x254602ea511d96L,0x4e82e93e402a67L,0x0ca8b5929cdb4fL,
+        0x3ae7e0a07918f5L } },
+    /* 86 */
+    { { 0x60f9d1fecf5b9bL,0x6257e40d2cd469L,0x6c7aa814d28456L,
+        0x58aac7caac8e79L,0x703a55f0293cbfL,0x702390a0f48378L,
+        0x24b9ae07218b07L },
+      { 0x1ebc66cdaf24e3L,0x7d9ae5f9f8e199L,0x42055ee921a245L,
+        0x035595936e4d49L,0x129c45d425c08bL,0x6486c5f19ce6ddL,
+        0x027dbd5f18ba24L } },
+    /* 87 */
+    { { 0x7d6b78d29375fbL,0x0a3dc6ba22ae38L,0x35090fa91feaf6L,
+        0x7f18587fb7b16eL,0x6e7091dd924608L,0x54e102cdbf5ff8L,
+        0x31b131a4c22079L },
+      { 0x368f87d6a53fb0L,0x1d3f3d69a3f240L,0x36bf5f9e40e1c6L,
+        0x17f150e01f8456L,0x76e5d0835eb447L,0x662fc0a1207100L,
+        0x14e3dd97a98e39L } },
+    /* 88 */
+    { { 0x0249d9c2663b4bL,0x56b68f9a71ba1cL,0x74b119567f9c02L,
+        0x5e6f336d8c92acL,0x2ced58f9f74a84L,0x4b75a2c2a467c5L,
+        0x30557011cf740eL },
+      { 0x6a87993be454ebL,0x29b7076fb99a68L,0x62ae74aaf99bbaL,
+        0x399f9aa8fb6c1bL,0x553c24a396dd27L,0x2868337a815ea6L,
+        0x343ab6635cc776L } },
+    /* 89 */
+    { { 0x0e0b0eec142408L,0x79728229662121L,0x605d0ac75e6250L,
+        0x49a097a01edfbeL,0x1e20cd270df6b6L,0x7438a0ca9291edL,
+        0x29daa430da5f90L },
+      { 0x7a33844624825aL,0x181715986985c1L,0x53a6853cae0b92L,
+        0x6d98401bd925e8L,0x5a0a34f5dd5e24L,0x7b818ef53cf265L,
+        0x0836e43c9d3194L } },
+    /* 90 */
+    { { 0x1179b70e6c5fd9L,0x0246d9305dd44cL,0x635255edfbe2fbL,
+        0x5397b3523b4199L,0x59350cc47e6640L,0x2b57aa97ed4375L,
+        0x37efd31abd153aL },
+      { 0x7a7afa6907f4faL,0x75c10cb94e6a7eL,0x60a925ab69cc47L,
+        0x2ff5bcd9239bd5L,0x13c2113e425f11L,0x56bd3d2f8a1437L,
+        0x2c9adbab13774fL } },
+    /* 91 */
+    { { 0x4ab9f52a2e5f2bL,0x5e537e70b58903L,0x0f242658ebe4f2L,
+        0x2648a1e7a5f9aeL,0x1b4c5081e73007L,0x6827d4aff51850L,
+        0x3925e41726cd01L },
+      { 0x56dd8a55ab3cfbL,0x72d6a31b6d5beaL,0x697bd2e5575112L,
+        0x66935519a7aa12L,0x55e97dda7a3aceL,0x0e16afb4237b4cL,
+        0x00b68fbff08093L } },
+    /* 92 */
+    { { 0x4b00366481d0d9L,0x37cb031fbfc5c4L,0x14643f6800dd03L,
+        0x6793fef60fe0faL,0x4f43e329c92803L,0x1fce86b96a6d26L,
+        0x0ad416975e213aL },
+      { 0x7cc6a6711adcc9L,0x64b8a63c43c2d9L,0x1e6caa2a67c0d0L,
+        0x610deffd17a54bL,0x57d669d5f38423L,0x77364b8f022636L,
+        0x36d4d13602e024L } },
+    /* 93 */
+    { { 0x72e667ae50a2f5L,0x1b15c950c3a21aL,0x3ccc37c72e6dfeL,
+        0x027f7e1d094fb8L,0x43ae1e90aa5d7eL,0x3f5feac3d97ce5L,
+        0x0363ed0a336e55L },
+      { 0x235f73d7663784L,0x5d8cfc588ad5a4L,0x10ab6ff333016eL,
+        0x7d8886af2e1497L,0x549f34fd17988eL,0x3fc4fcaee69a33L,
+        0x0622b133a13d9eL } },
+    /* 94 */
+    { { 0x6344cfa796c53eL,0x0e9a10d00136fdL,0x5d1d284a56efd8L,
+        0x608b1968f8aca7L,0x2fa5a66776edcaL,0x13430c44f1609cL,
+        0x1499973cb2152aL },
+      { 0x3764648104ab58L,0x3226e409fadafcL,0x1513a8466459ddL,
+        0x649206ec365035L,0x46149aa3f765b1L,0x3aebf0a035248eL,
+        0x1ee60b8c373494L } },
+    /* 95 */
+    { { 0x4e9efcc15f3060L,0x5e5d50fd77cdc8L,0x071e5403516b58L,
+        0x1b7d4e89b24ceaL,0x53b1fa66d6dc03L,0x457f15f892ab5fL,
+        0x076332c9397260L },
+      { 0x31422b79d7584bL,0x0b01d47e41ba80L,0x3e5611a3171528L,
+        0x5f53b9a9fc1be4L,0x7e2fc3d82f110fL,0x006cf350ef0fbfL,
+        0x123ae98ec81c12L } },
+    /* 96 */
+    { { 0x310d41df46e2f6L,0x2ff032a286cf13L,0x64751a721c4eadL,
+        0x7b62bcc0339b95L,0x49acf0c195afa4L,0x359d48742544e5L,
+        0x276b7632d9e2afL },
+      { 0x656c6be182579aL,0x75b65a4d85b199L,0x04a911d1721bfaL,
+        0x46e023d0e33477L,0x1ec2d580acd869L,0x540b456f398a37L,
+        0x001f698210153dL } },
+    /* 97 */
+    { { 0x3ca35217b00dd0L,0x73961d034f4d3cL,0x4f520b61c4119dL,
+        0x4919fde5cccff7L,0x4d0e0e6f38134dL,0x55c22586003e91L,
+        0x24d39d5d8f1b19L },
+      { 0x4d4fc3d73234dcL,0x40c50c9d5f8368L,0x149afbc86bf2b8L,
+        0x1dbafefc21d7f1L,0x42e6b61355107fL,0x6e506cf4b54f29L,
+        0x0f498a6c615228L } },
+    /* 98 */
+    { { 0x30618f437cfaf8L,0x059640658532c4L,0x1c8a4d90e96e1dL,
+        0x4a327bcca4fb92L,0x54143b8040f1a0L,0x4ec0928c5a49e4L,
+        0x2af5ad488d9b1fL },
+      { 0x1b392bd5338f55L,0x539c0292b41823L,0x1fe35d4df86a02L,
+        0x5fa5bb17988c65L,0x02b6cb715adc26L,0x09a48a0c2cb509L,
+        0x365635f1a5a9f2L } },
+    /* 99 */
+    { { 0x58aa87bdc21f31L,0x156900c7cb1935L,0x0ec1f75ee2b6cfL,
+        0x5f3e35a77ec314L,0x582dec7b9b7621L,0x3e65deb0e8202aL,
+        0x325c314b8a66b7L },
+      { 0x702e2a22f24d66L,0x3a20e9982014f1L,0x6424c5b86bbfb0L,
+        0x424eea4d795351L,0x7fc4cce7c22055L,0x581383fceb92d7L,
+        0x32b663f49ee81bL } },
+    /* 100 */
+    { { 0x76e2d0b648b73eL,0x59ca39fa50bddaL,0x18bb44f786a7e4L,
+        0x28c8d49d464360L,0x1b8bf1d3a574eaL,0x7c670b9bf1635aL,
+        0x2efb30a291f4b3L },
+      { 0x5326c069cec548L,0x03bbe481416531L,0x08a415c8d93d6fL,
+        0x3414a52120d383L,0x1f17a0fc6e9c5cL,0x0de9a090717463L,
+        0x22d84b3c67ff07L } },
+    /* 101 */
+    { { 0x30b5014c3830ebL,0x70791dc1a18b37L,0x09e6ea4e24f423L,
+        0x65e148a5253132L,0x446f05d5d40449L,0x7ad5d3d707c0e9L,
+        0x18eedd63dd3ab5L },
+      { 0x40d2eac6bb29e0L,0x5b0e9605e83c38L,0x554f2c666a56a8L,
+        0x0ac27b6c94c48bL,0x1aaecdd91bafe5L,0x73c6e2bdf72634L,
+        0x306dab96d19e03L } },
+    /* 102 */
+    { { 0x6d3e4b42772f41L,0x1aba7796f3a39bL,0x3a03fbb980e9c0L,
+        0x2f2ea5da2186a8L,0x358ff444ef1fcfL,0x0798cc0329fcdcL,
+        0x39a28bcc9aa46dL },
+      { 0x42775c977fe4d2L,0x5eb8fc5483d6b0L,0x0bfe37c039e3f7L,
+        0x429292eaf9df60L,0x188bdf4b840cd5L,0x06e10e090749cdL,
+        0x0e52678e73192eL } },
+    /* 103 */
+    { { 0x05de80b08df5feL,0x2af8c77406c5f8L,0x53573c50a0304aL,
+        0x277b10b751bca0L,0x65cf8c559132a5L,0x4c667abe25f73cL,
+        0x0271809e05a575L },
+      { 0x41ced461f7a2fbL,0x0889a9ebdd7075L,0x320c63f2b7760eL,
+        0x4f8d4324151c63L,0x5af47315be2e5eL,0x73c62f6aee2885L,
+        0x206d6412a56a97L } },
+    /* 104 */
+    { { 0x6b1c508b21d232L,0x3781185974ead6L,0x1aba7c3ebe1fcfL,
+        0x5bdc03cd3f3a5aL,0x74a25036a0985bL,0x5929e30b7211b2L,
+        0x16a9f3bc366bd7L },
+      { 0x566a7057dcfffcL,0x23b5708a644bc0L,0x348cda2aa5ba8cL,
+        0x466aa96b9750d4L,0x6a435ed9b20834L,0x2e7730f2cf9901L,
+        0x2b5cd71d5b0410L } },
+    /* 105 */
+    { { 0x285ab3cee76ef4L,0x68895e3a57275dL,0x6fab2e48fd1265L,
+        0x0f1de060428c94L,0x668a2b080b5905L,0x1b589dc3b0cb37L,
+        0x3c037886592c9bL },
+      { 0x7fb5c0f2e90d4dL,0x334eefb3d8c91aL,0x75747124700388L,
+        0x547a2c2e2737f5L,0x2af9c080e37541L,0x0a295370d9091aL,
+        0x0bb5c36dad99e6L } },
+    /* 106 */
+    { { 0x644116586f25cbL,0x0c3f41f9ee1f5dL,0x00628d43a3dedaL,
+        0x16e1437aae9669L,0x6aba7861bf3e59L,0x60735631ff4c44L,
+        0x345609efaa615eL },
+      { 0x41f54792e6acefL,0x4791583f75864dL,0x37f2ff5c7508b1L,
+        0x1288912516c3b0L,0x51a2135f6a539bL,0x3b775511f42091L,
+        0x127c6afa7afe66L } },
+    /* 107 */
+    { { 0x79f4f4f7492b73L,0x583d967256342dL,0x51a729bff33ca3L,
+        0x3977d2c22d8986L,0x066f528ba8d40bL,0x5d759d30f8eb94L,
+        0x0f8e649192b408L },
+      { 0x22d84e752555bbL,0x76953855c728c7L,0x3b2254e72aaaa4L,
+        0x508cd4ce6c0212L,0x726296d6b5a6daL,0x7a77aa066986f3L,
+        0x2267a497bbcf31L } },
+    /* 108 */
+    { { 0x7f3651bf825dc4L,0x3988817388c56fL,0x257313ed6c3dd0L,
+        0x3feab7f3b8ffadL,0x6c0d3cb9e9c9b4L,0x1317be0a7b6ac4L,
+        0x2a5f399d7df850L },
+      { 0x2fe5a36c934f5eL,0x429199df88ded1L,0x435ea21619b357L,
+        0x6aac6a063bac2bL,0x600c149978f5edL,0x76543aa1114c95L,
+        0x163ca9c83c7596L } },
+    /* 109 */
+    { { 0x7dda4a3e4daedbL,0x1824cba360a4cdL,0x09312efd70e0c6L,
+        0x454e68a146c885L,0x40aee762fe5c47L,0x29811cbd755a59L,
+        0x34b37c95f28319L },
+      { 0x77c58b08b717d2L,0x309470d9a0f491L,0x1ab9f40448e01cL,
+        0x21c8bd819207b1L,0x6a01803e9361bcL,0x6e5e4c350ec415L,
+        0x14fd55a91f8798L } },
+    /* 110 */
+    { { 0x4cee562f512a90L,0x0008361d53e390L,0x3789b307a892cfL,
+        0x064f7be8770ae9L,0x41435d848762cfL,0x662204dd38baa6L,
+        0x23d6dcf73f6c5aL },
+      { 0x69bef2d2c75d95L,0x2b037c0c9bb43eL,0x495fb4d79a34cfL,
+        0x184e140c601260L,0x60193f8d435f9cL,0x283fa52a0c3ad2L,
+        0x1998635e3a7925L } },
+    /* 111 */
+    { { 0x1cfd458ce382deL,0x0dddbd201bbcaeL,0x14d2ae8ed45d60L,
+        0x73d764ab0c24cbL,0x2a97fe899778adL,0x0dbd1e01eddfe9L,
+        0x2ba5c72d4042c3L },
+      { 0x27eebc3af788f1L,0x53ffc827fc5a30L,0x6d1d0726d35188L,
+        0x4721275c50aa2aL,0x077125f02e690fL,0x6da8142405db5dL,
+        0x126cef68992513L } },
+    /* 112 */
+    { { 0x3c6067035b2d69L,0x2a1ad7db2361acL,0x3debece6cad41cL,
+        0x30095b30f9afc1L,0x25f50b9bd9c011L,0x79201b2f2c1da1L,
+        0x3b5c151449c5bdL },
+      { 0x76eff4127abdb4L,0x2d31e03ce0382aL,0x24ff21f8bda143L,
+        0x0671f244fd3ebaL,0x0c1c00b6bcc6fbL,0x18de9f7c3ebefbL,
+        0x33dd48c3809c67L } },
+    /* 113 */
+    { { 0x61d6c2722d94edL,0x7e426e31041cceL,0x4097439f1b47b0L,
+        0x579e798b2d205bL,0x6a430d67f830ebL,0x0d2c676700f727L,
+        0x05fea83a82f25bL },
+      { 0x3f3482df866b98L,0x3dd353b6a5a9cdL,0x77fe6ae1a48170L,
+        0x2f75cc2a8f7cddL,0x7442a3863dad17L,0x643de42d877a79L,
+        0x0fec8a38fe7238L } },
+    /* 114 */
+    { { 0x79b70c0760ac07L,0x195d3af37e9b29L,0x1317ff20f7cf27L,
+        0x624e1c739e7504L,0x67330ef50f943dL,0x775e8cf455d793L,
+        0x17b94d2d913a9fL },
+      { 0x4b627203609e7fL,0x06aac5fb93e041L,0x603c515fdc2611L,
+        0x2592ca0d7ae472L,0x02395d1f50a6cbL,0x466ef9648f85d9L,
+        0x297cf879768f72L } },
+    /* 115 */
+    { { 0x3489d67d85fa94L,0x0a6e5b739c8e04L,0x7ebb5eab442e90L,
+        0x52665a007efbd0L,0x0967ca57b0d739L,0x24891f9d932b63L,
+        0x3cc2d6dbadc9d3L },
+      { 0x4b4773c81c5338L,0x73cd47dad7a0f9L,0x7c755bab6ae158L,
+        0x50b03d6becefcaL,0x574d6e256d57f0L,0x188db4fffb92aeL,
+        0x197e10118071eaL } },
+    /* 116 */
+    { { 0x45d0cbcba1e7f1L,0x1180056abec91aL,0x6c5f86624bbc28L,
+        0x442c83f3b8e518L,0x4e16ae1843ecb4L,0x670cef2fd786c9L,
+        0x205b4acb637d2cL },
+      { 0x70b0e539aa8671L,0x67c982056bebd0L,0x645c831a5e7c36L,
+        0x09e06951a14b32L,0x5dd610ad4c89e6L,0x41c35f20164831L,
+        0x3821f29cb4cdb8L } },
+    /* 117 */
+    { { 0x2831ffaba10079L,0x70f6dac9ffe444L,0x1cfa32ccc03717L,
+        0x01519fda22a3c8L,0x23215e815aaa27L,0x390671ad65cbf7L,
+        0x03dd4d72de7d52L },
+      { 0x1ecd972ee95923L,0x166f8da3813e8eL,0x33199bbd387a1aL,
+        0x04525fe15e3dc7L,0x44d2ef54165898L,0x4b7e47d3dc47f7L,
+        0x10d5c8db0b5d44L } },
+    /* 118 */
+    { { 0x176d95ba9cdb1bL,0x14025f04f23dfcL,0x49379332891687L,
+        0x6625e5ccbb2a57L,0x7ac0abdbf9d0e5L,0x7aded4fbea15b2L,
+        0x314844ac184d67L },
+      { 0x6d9ce34f05eae3L,0x3805d2875856d2L,0x1c2122f85e40ebL,
+        0x51cb9f2d483a9aL,0x367e91e20f1702L,0x573c3559838dfdL,
+        0x0b282b0cb85af1L } },
+    /* 119 */
+    { { 0x6a12e4ef871eb5L,0x64bb517e14f5ffL,0x29e04d3aaa530bL,
+        0x1b07d88268f261L,0x411be11ed16fb0L,0x1f480536db70bfL,
+        0x17a7deadfd34e4L },
+      { 0x76d72f30646612L,0x5a3bbb43a1b0a0L,0x5e1687440e82bfL,
+        0x713b5e69481112L,0x46c3dcb499e174L,0x0862da3b4e2a24L,
+        0x31cb55b4d62681L } },
+    /* 120 */
+    { { 0x5ffc74dae5bb45L,0x18944c37adb9beL,0x6aaa63b1ee641aL,
+        0x090f4b6ee057d3L,0x4045cedd2ee00fL,0x21c2c798f7c282L,
+        0x2c2c6ef38cd6bdL },
+      { 0x40d78501a06293L,0x56f8caa5cc89a8L,0x7231d5f91b37aeL,
+        0x655f1e5a465c6dL,0x3f59a81f9cf783L,0x09bbba04c23624L,
+        0x0f71ee23bbacdeL } },
+    /* 121 */
+    { { 0x38d398c4741456L,0x5204c0654243c3L,0x34498c916ea77eL,
+        0x12238c60e5fe43L,0x0fc54f411c7625L,0x30b2ca43aa80b6L,
+        0x06bead1bb6ea92L },
+      { 0x5902ba8674b4adL,0x075ab5b0fa254eL,0x58db83426521adL,
+        0x5b66b6b3958e39L,0x2ce4e39890e07bL,0x46702513338b37L,
+        0x363690c2ded4d7L } },
+    /* 122 */
+    { { 0x765642c6b75791L,0x0f4c4300d7f673L,0x404d8bbe101425L,
+        0x61e91c88651f1bL,0x61ddc9bc60aed8L,0x0ef36910ce2e65L,
+        0x04b44367aa63b8L },
+      { 0x72822d3651b7dcL,0x4b750157a2716dL,0x091cb4f2118d16L,
+        0x662ba93b101993L,0x447cbd54a1d40aL,0x12cdd48d674848L,
+        0x16f10415cbec69L } },
+    /* 123 */
+    { { 0x0c57a3a751cd0eL,0x0833d7478fadceL,0x1e751f55686436L,
+        0x489636c58e1df7L,0x26ad6da941266fL,0x22225d3559880fL,
+        0x35b397c45ba0e2L },
+      { 0x3ca97b70e1f2ceL,0x78e50427a8680cL,0x06137e042a8f91L,
+        0x7ec40d2500b712L,0x3f0ad688ad7b0dL,0x24746fb33f9513L,
+        0x3638fcce688f0bL } },
+    /* 124 */
+    { { 0x753163750bed6fL,0x786507cd16157bL,0x1d6ec228ce022aL,
+        0x587255f42d1b31L,0x0c6adf72a3a0f6L,0x4bfeee2da33f5eL,
+        0x08b7300814de6cL },
+      { 0x00bf8df9a56e11L,0x75aead48fe42e8L,0x3de9bad911b2e2L,
+        0x0fadb233e4b8bbL,0x5b054e8fd84f7dL,0x5eb3064152889bL,
+        0x01c1c6e8c777a1L } },
+    /* 125 */
+    { { 0x5fa0e598f8fcb9L,0x11c129a1ae18dfL,0x5c41b482a2273bL,
+        0x545664e5044c9cL,0x7e01c915bfb9abL,0x7f626e19296aa0L,
+        0x20c91a9822a087L },
+      { 0x273a9fbe3c378fL,0x0f126b44b7d350L,0x493764a75df951L,
+        0x32dec3c367d24bL,0x1a7ae987fed9d3L,0x58a93055928b85L,
+        0x11626975d7775fL } },
+    /* 126 */
+    { { 0x2bb174a95540a9L,0x10de02c58b613fL,0x2fa8f7b861f3eeL,
+        0x44731260bdf3b3L,0x19c38ff7da41feL,0x3535a16e3d7172L,
+        0x21a948b83cc7feL },
+      { 0x0e6f72868bc259L,0x0c70799df3c979L,0x526919955584c3L,
+        0x4d95fda04f8fa2L,0x7bb228e6c0f091L,0x4f728b88d92194L,
+        0x2b361c5a136bedL } },
+    /* 127 */
+    { { 0x0c72ca10c53841L,0x4036ab49f9da12L,0x578408d2b7082bL,
+        0x2c4903201fbf5eL,0x14722b3f42a6a8L,0x1997b786181694L,
+        0x25c6f10de32849L },
+      { 0x79f46d517ff2ffL,0x2dc5d97528f6deL,0x518a494489aa72L,
+        0x52748f8af3cf97L,0x472da30a96bb16L,0x1be228f92465a9L,
+        0x196f0c47d60479L } },
+    /* 128 */
+    { { 0x47dd7d139b3239L,0x049c9b06775d0fL,0x627ffc00562d5eL,
+        0x04f578d5e5e243L,0x43a788ffcef8b9L,0x7db320be9dde28L,
+        0x00837528b8572fL },
+      { 0x2969eca306d695L,0x195b72795ec194L,0x5e1fa9b8e77e50L,
+        0x4c627f2b3fbfd5L,0x4b91e0d0ee10ffL,0x5698c8d0f35833L,
+        0x12d3a9431f475eL } },
+    /* 129 */
+    { { 0x6409457a0db57eL,0x795b35192e0433L,0x146f973fe79805L,
+        0x3d49c516dfb9cfL,0x50dfc3646b3cdaL,0x16a08a2210ad06L,
+        0x2b4ef5bcd5b826L },
+      { 0x5ebabfee2e3e3eL,0x2e048e724d9726L,0x0a7a7ed6abef40L,
+        0x71ff7f83e39ad8L,0x3405ac52a1b852L,0x2e3233357a608dL,
+        0x38c1bf3b0e40e6L } },
+    /* 130 */
+    { { 0x59aec823e4712cL,0x6ed9878331ddadL,0x1cc6faf629f2a0L,
+        0x445ff79f36c18cL,0x4edc7ed57aff3dL,0x22ee54c8bdd9e8L,
+        0x35398f42d72ec5L },
+      { 0x4e7a1cceee0ecfL,0x4c66a707dd1d31L,0x629ad157a23c04L,
+        0x3b2c6031dc3c83L,0x3336acbcd3d96cL,0x26ce43adfce0f0L,
+        0x3c869c98d699dcL } },
+    /* 131 */
+    { { 0x58b3cd9586ba11L,0x5d6514b8090033L,0x7c88c3bd736782L,
+        0x1735f84f2130edL,0x47784095a9dee0L,0x76312c6e47901bL,
+        0x1725f6ebc51455L },
+      { 0x6744344bc4503eL,0x16630b4d66e12fL,0x7b3481752c3ec7L,
+        0x47bb2ed1f46f95L,0x08a1a497dd1bcfL,0x1f525df2b8ed93L,
+        0x0fe492ea993713L } },
+    /* 132 */
+    { { 0x71b8dd7268b448L,0x1743dfaf3728d7L,0x23938d547f530aL,
+        0x648c3d497d0fc6L,0x26c0d769e3ad45L,0x4d25108769a806L,
+        0x3fbf2025143575L },
+      { 0x485bfd90339366L,0x2de2b99ed87461L,0x24a33347713badL,
+        0x1674bc7073958aL,0x5bb2373ee85b5fL,0x57f9bd657e662cL,
+        0x2041b248d39042L } },
+    /* 133 */
+    { { 0x5f01617d02f4eeL,0x2a8e31c4244b91L,0x2dab3e790229e0L,
+        0x72d319ea7544afL,0x01ffb8b000cb56L,0x065e63b0daafd3L,
+        0x3d7200a7111d6fL },
+      { 0x4561ce1b568973L,0x37034c532dd8ecL,0x1368215020be02L,
+        0x30e7184cf289ebL,0x199e0c27d815deL,0x7ee1b4dff324e5L,
+        0x2f4a11de7fab5cL } },
+    /* 134 */
+    { { 0x33c2f99b1cdf2bL,0x1e0d78bf42a2c0L,0x64485dececaa67L,
+        0x2242a41be93e92L,0x62297b1f15273cL,0x16ebfaafb02205L,
+        0x0f50f805f1fdabL },
+      { 0x28bb0b3a70eb28L,0x5b1c7d0160d683L,0x05c30a37959f78L,
+        0x3d9301184922d2L,0x46c1ead7dbcb1aL,0x03ee161146a597L,
+        0x2d413ed9a6ccc1L } },
+    /* 135 */
+    { { 0x685ab5f97a27c2L,0x59178214023751L,0x4ffef3c585ab17L,
+        0x2bc85302aba2a9L,0x675b001780e856L,0x103c8a37f0b33dL,
+        0x2241e98ece70a6L },
+      { 0x546738260189edL,0x086c8f7a6b96edL,0x00832ad878a129L,
+        0x0b679056ba7462L,0x020ce6264bf8c4L,0x3f9f4b4d92abfbL,
+        0x3e9c55343c92edL } },
+    /* 136 */
+    { { 0x482cec9b3f5034L,0x08b59b3cd1fa30L,0x5a55d1bc8e58b5L,
+        0x464a5259337d8eL,0x0a5b6c66ade5a5L,0x55db77b504ddadL,
+        0x015992935eac35L },
+      { 0x54fe51025e32fcL,0x5d7f52dbe4a579L,0x08c564a8c58696L,
+        0x4482a8bec4503fL,0x440e75d9d94de9L,0x6992d768020bfaL,
+        0x06c311e8ba01f6L } },
+    /* 137 */
+    { { 0x2a6ac808223878L,0x04d3ccb4aab0b8L,0x6e6ef09ff6e823L,
+        0x15cb03ee9158dcL,0x0dc58919171bf7L,0x3273568abf3cb1L,
+        0x1b55245b88d98bL },
+      { 0x28e9383b1de0c1L,0x30d5009e4f1f1bL,0x334d185a56a134L,
+        0x0875865dfa4c46L,0x266edf5eae3beeL,0x2e03ff16d1f7e5L,
+        0x29a36bd9f0c16dL } },
+    /* 138 */
+    { { 0x004cff44b2e045L,0x426c96380ba982L,0x422292281e46d7L,
+        0x508dd8d29d7204L,0x3a4ea73fb2995eL,0x4be64090ae07b2L,
+        0x3339177a0eff22L },
+      { 0x74a97ec2b3106eL,0x0c616d09169f5fL,0x1bb5d8907241a7L,
+        0x661fb67f6d41bdL,0x018a88a0daf136L,0x746333a093a7b4L,
+        0x3e19f1ac76424eL } },
+    /* 139 */
+    { { 0x542a5656527296L,0x0e7b9ce22f1bc9L,0x31b0945992b89bL,
+        0x6e0570eb85056dL,0x32daf813483ae5L,0x69eeae9d59bb55L,
+        0x315ad4b730b557L },
+      { 0x2bc16795f32923L,0x6b02b7ba55130eL,0x1e9da67c012f85L,
+        0x5616f014dabf8fL,0x777395fcd9c723L,0x2ff075e7743246L,
+        0x2993538aff142eL } },
+    /* 140 */
+    { { 0x72dae20e552b40L,0x2e4ba69aa5d042L,0x001e563e618bd2L,
+        0x28feeba3c98772L,0x648c356da2a907L,0x687e2325069ea7L,
+        0x0d34ab09a394f0L },
+      { 0x73c21813111286L,0x5829b53b304e20L,0x6fba574de08076L,
+        0x79f7058f61614eL,0x4e71c9316f1191L,0x24ef12193e0a89L,
+        0x35dc4e2bc9d848L } },
+    /* 141 */
+    { { 0x045e6d3b4ad1cdL,0x729c95493782f0L,0x77f59de85b361aL,
+        0x5309b4babf28f8L,0x4d893d9290935fL,0x736f47f2b2669eL,
+        0x23270922d757f3L },
+      { 0x23a4826f70d4e9L,0x68a8c63215d33eL,0x4d6c2069205c9cL,
+        0x46b2938a5eebe0L,0x41d1f1e2de3892L,0x5ca1775544bcb0L,
+        0x3130629e5d19dcL } },
+    /* 142 */
+    { { 0x6e2681593375acL,0x117cfbabc22621L,0x6c903cd4e13ccaL,
+        0x6f358f14d4bd97L,0x1bc58fa11089f1L,0x36aa2db4ac426aL,
+        0x15ced8464b7ea1L },
+      { 0x6966836cba7df5L,0x7c2b1851568113L,0x22b50ff2ffca66L,
+        0x50e77d9f48e49aL,0x32775e9bbc7cc9L,0x403915bb0ece71L,
+        0x1b8ec7cb9dd7aaL } },
+    /* 143 */
+    { { 0x65a888b677788bL,0x51887fac2e7806L,0x06792636f98d2bL,
+        0x47bbcd59824c3bL,0x1aca908c43e6dcL,0x2e00d15c708981L,
+        0x08e031c2c80634L },
+      { 0x77fbc3a297c5ecL,0x10a7948af2919eL,0x10cdafb1fb6b2fL,
+        0x27762309b486f0L,0x13abf26bbac641L,0x53da38478fc3eeL,
+        0x3c22eff379bf55L } },
+    /* 144 */
+    { { 0x0163f484770ee3L,0x7f28e8942e0cbfL,0x5f86cb51b43831L,
+        0x00feccd4e4782fL,0x40e5b417eafe7dL,0x79e5742bbea228L,
+        0x3717154aa469beL },
+      { 0x271d74a270f721L,0x40eb400890b70cL,0x0e37be81d4cb02L,
+        0x786907f4e8d43fL,0x5a1f5b590a7acbL,0x048861883851fdL,
+        0x11534a1e563dbbL } },
+    /* 145 */
+    { { 0x37a6357c525435L,0x6afe6f897b78a5L,0x7b7ff311d4f67bL,
+        0x38879df15dc9f4L,0x727def7b8ba987L,0x20285dd0db4436L,
+        0x156b0fc64b9243L },
+      { 0x7e3a6ec0c1c390L,0x668a88d9bcf690L,0x5925aba5440dbeL,
+        0x0f6891a044f593L,0x70b46edfed4d97L,0x1a6cc361bab201L,
+        0x046f5bc6e160bcL } },
+    /* 146 */
+    { { 0x79350f076bc9d1L,0x077d9e79a586b9L,0x0896bc0c705764L,
+        0x58e632b90e7e46L,0x14e87e0ad32488L,0x4b1bb3f72c6e00L,
+        0x3c3ce9684a5fc5L },
+      { 0x108fbaf1f703aaL,0x08405ecec17577L,0x199a8e2d44be73L,
+        0x2eb22ed0067763L,0x633944deda3300L,0x20d739eb8e5efbL,
+        0x2bbbd94086b532L } },
+    /* 147 */
+    { { 0x03c8b17a19045dL,0x6205a0a504980bL,0x67fdb3e962b9f0L,
+        0x16399e01511a4bL,0x44b09fe9dffc96L,0x00a74ff44a1381L,
+        0x14590deed3f886L },
+      { 0x54e3d5c2a23ddbL,0x310e5138209d28L,0x613f45490c1c9bL,
+        0x6bbc85d44bbec8L,0x2f85fc559e73f6L,0x0d71fa7d0fa8cbL,
+        0x2898571d17fbb9L } },
+    /* 148 */
+    { { 0x5607a84335167dL,0x3009c1eb910f91L,0x7ce63447e62d0bL,
+        0x03a0633afcf89eL,0x1234b5aaa50872L,0x5a307b534d547bL,
+        0x2f4e97138a952eL },
+      { 0x13914c2db0f658L,0x6cdcb47e6e75baL,0x5549169caca772L,
+        0x0f20423dfeb16fL,0x6b1ae19d180239L,0x0b7b3bee9b7626L,
+        0x1ca81adacfe4efL } },
+    /* 149 */
+    { { 0x219ec3ad19d96fL,0x3549f6548132dbL,0x699889c7aacd0bL,
+        0x74602a58730b19L,0x62dc63bcece81cL,0x316f991c0c317aL,
+        0x2b8627867b95e3L },
+      { 0x67a25ddced1eedL,0x7e14f0eba756e7L,0x0873fbc09b0495L,
+        0x0fefb0e16596adL,0x03e6cd98ef39bbL,0x1179b1cded249dL,
+        0x35c79c1db1edc2L } },
+    /* 150 */
+    { { 0x1368309d4245bfL,0x442e55852a7667L,0x095b0f0f348b65L,
+        0x6834cf459dfad4L,0x6645950c9be910L,0x06bd81288c71e6L,
+        0x1b015b6e944edfL },
+      { 0x7a6a83045ab0e3L,0x6afe88b9252ad0L,0x2285bd65523502L,
+        0x6c78543879a282L,0x1c5e264b5c6393L,0x3a820c6a7453eeL,
+        0x37562d1d61d3c3L } },
+    /* 151 */
+    { { 0x6c084f62230c72L,0x599490270bc6cfL,0x1d3369ddd3c53dL,
+        0x516ddb5fac5da0L,0x35ab1e15011b1aL,0x5fba9106d3a180L,
+        0x3be0f092a0917cL },
+      { 0x57328f9fdc2538L,0x0526323fc8d5f6L,0x10cbb79521e602L,
+        0x50d01167147ae2L,0x2ec7f1b3cda99eL,0x43073cc736e7beL,
+        0x1ded89cadd83a6L } },
+    /* 152 */
+    { { 0x1d51bda65d56d5L,0x63f2fd4d2dc056L,0x326413d310ea6dL,
+        0x3abba5bca92876L,0x6b9aa8bc4d6ebeL,0x1961c687f15d5dL,
+        0x311cf07464c381L },
+      { 0x2321b1064cd8aeL,0x6e3caac4443850L,0x3346fc4887d2d0L,
+        0x1640417e0e640fL,0x4a958a52a07a9eL,0x1346a1b1cb374cL,
+        0x0a793cf79beccbL } },
+    /* 153 */
+    { { 0x29d56cba89aaa5L,0x1581898c0b3c15L,0x1af5b77293c082L,
+        0x1617ba53a006ceL,0x62dd3b384e475fL,0x71a9820c3f962aL,
+        0x0e4938920b854eL },
+      { 0x0b8d98849808abL,0x64c14923546de7L,0x6a20883b78a6fcL,
+        0x72de211428acd6L,0x009678b47915bbL,0x21b5269ae5dae6L,
+        0x313cc0e60b9457L } },
+    /* 154 */
+    { { 0x69ee421b1de38bL,0x44b484c6cec1c7L,0x0240596c6a8493L,
+        0x2321a62c85fb9eL,0x7a10921802a341L,0x3d2a95507e45c3L,
+        0x0752f40f3b6714L },
+      { 0x596a38798751e6L,0x46bf186a0feb85L,0x0b23093e23b49cL,
+        0x1bfa7bc5afdc07L,0x4ba96f873eefadL,0x292e453fae9e44L,
+        0x2773646667b75cL } },
+    /* 155 */
+    { { 0x1f81a64e94f22aL,0x3125ee3d8683ddL,0x76a660a13b9582L,
+        0x5aa584c3640c6eL,0x27cc99fd472953L,0x7048f4d58061d1L,
+        0x379a1397ac81e8L },
+      { 0x5d1ecd2b6b956bL,0x0829e0366b0697L,0x49548cec502421L,
+        0x7af5e2f717c059L,0x329a25a0fec54eL,0x028e99e4bcd7f1L,
+        0x071d5fe81fca78L } },
+    /* 156 */
+    { { 0x4b5c4aeb0fdfe4L,0x1367e11326ce37L,0x7c16f020ef5f19L,
+        0x3c55303d77b471L,0x23a4457a06e46aL,0x2174426dd98424L,
+        0x226f592114bd69L },
+      { 0x4411b94455f15aL,0x52e0115381fae4L,0x45b6d8efbc8f7eL,
+        0x58b1221bd86d26L,0x284fb6f8a7ec1fL,0x045835939ddd30L,
+        0x0216960accd598L } },
+    /* 157 */
+    { { 0x4b61f9ec1f138aL,0x4460cd1e18502bL,0x277e4fce3c4726L,
+        0x0244246d6414b9L,0x28fbfcef256984L,0x3347ed0db40577L,
+        0x3b57fa9e044718L },
+      { 0x4f73bcd6d1c833L,0x2c0d0dcf7f0136L,0x2010ac75454254L,
+        0x7dc4f6151539a8L,0x0b8929ef6ea495L,0x517e20119d2bdfL,
+        0x1e29f9a126ba15L } },
+    /* 158 */
+    { { 0x683a7c10470cd8L,0x0d05f0dbe0007fL,0x2f6a5026d649cdL,
+        0x249ce2fdaed603L,0x116dc1e7a96609L,0x199bd8d82a0b98L,
+        0x0694ad0219aeb2L },
+      { 0x03a3656e864045L,0x4e552273df82a6L,0x19bcc7553d17abL,
+        0x74ac536c1df632L,0x440302fb4a86f6L,0x1becec0e31c9feL,
+        0x002045f8fa46b8L } },
+    /* 159 */
+    { { 0x5833ba384310a2L,0x1db83fad93f8baL,0x0a12713ee2f7edL,
+        0x40e0f0fdcd2788L,0x1746de5fb239a5L,0x573748965cfa15L,
+        0x1e3dedda0ef650L },
+      { 0x6c8ca1c87607aeL,0x785dab9554fc0eL,0x649d8f91860ac8L,
+        0x4436f88b52c0f9L,0x67f22ca8a5e4a3L,0x1f990fd219e4c9L,
+        0x013dd21c08573fL } },
+    /* 160 */
+    { { 0x05d116141d161cL,0x5c1d2789da2ea5L,0x11f0d861f99f34L,
+        0x692c2650963153L,0x3bd69f5329539eL,0x215898eef8885fL,
+        0x041f79dd86f7f1L },
+      { 0x76dcc5e96beebdL,0x7f2b50cb42a332L,0x067621cabef8abL,
+        0x31e0be607054edL,0x4c67c5e357a3daL,0x5b1a63fbfb1c2bL,
+        0x3112efbf5e5c31L } },
+    /* 161 */
+    { { 0x3f83e24c0c62f1L,0x51dc9c32aae4e0L,0x2ff89b33b66c78L,
+        0x21b1c7d354142cL,0x243d8d381c84bcL,0x68729ee50cf4b7L,
+        0x0ed29e0f442e09L },
+      { 0x1ad7b57576451eL,0x6b2e296d6b91dcL,0x53f2b306e30f42L,
+        0x3964ebd9ee184aL,0x0a32855df110e4L,0x31f2f90ddae05fL,
+        0x3410cd04e23702L } },
+    /* 162 */
+    { { 0x60d1522ca8f2feL,0x12909237a83e34L,0x15637f80d58590L,
+        0x3c72431b6d714dL,0x7c8e59a615bea2L,0x5f977b688ef35aL,
+        0x071c198c0b3ab0L },
+      { 0x2b54c699699b4bL,0x14da473c2fd0bcL,0x7ba818ea0ad427L,
+        0x35117013940b2fL,0x6e1df6b5e609dbL,0x3f42502720b64dL,
+        0x01ee7dc890e524L } },
+    /* 163 */
+    { { 0x12ec1448ff4e49L,0x3e2edac882522bL,0x20455ab300f93aL,
+        0x5849585bd67c14L,0x0393d5aa34ba8bL,0x30f9a1f2044fa7L,
+        0x1059c9377a93e0L },
+      { 0x4e641cc0139e73L,0x0d9f23c9b0fa78L,0x4b2ad87e2b83f9L,
+        0x1c343a9f6d9e3cL,0x1098a4cb46de4dL,0x4ddc893843a41eL,
+        0x1797f4167d6e3aL } },
+    /* 164 */
+    { { 0x4add4675856031L,0x499bd5e5f7a0ffL,0x39ea1f1202271eL,
+        0x0ecd7480d7a91eL,0x395f5e5fc10956L,0x0fa7f6b0c9f79bL,
+        0x2fad4623aed6cbL },
+      { 0x1563c33ae65825L,0x29881cafac827aL,0x50650baf4c45a1L,
+        0x034aad988fb9e9L,0x20a6224dc5904cL,0x6fb141a990732bL,
+        0x3ec9ae1b5755deL } },
+    /* 165 */
+    { { 0x3108e7c686ae17L,0x2e73a383b4ad8aL,0x4e6bb142ba4243L,
+        0x24d355922c1d80L,0x2f850dd9a088baL,0x21c50325dd5e70L,
+        0x33237dd5bd7fa4L },
+      { 0x7823a39cab7630L,0x1535f71cff830eL,0x70d92ff0599261L,
+        0x227154d2a2477cL,0x495e9bbb4f871cL,0x40d2034835686bL,
+        0x31b08f97eaa942L } },
+    /* 166 */
+    { { 0x0016c19034d8ddL,0x68961627cf376fL,0x6acc90681615aeL,
+        0x6bc7690c2e3204L,0x6ddf28d2fe19a2L,0x609b98f84dae4dL,
+        0x0f32bfd7c94413L },
+      { 0x7d7edc6b21f843L,0x49bbd2ebbc9872L,0x593d6ada7b6a23L,
+        0x55736602939e9cL,0x79461537680e39L,0x7a7ee9399ca7cdL,
+        0x008776f6655effL } },
+    /* 167 */
+    { { 0x64585f777233cfL,0x63ec12854de0f6L,0x6b7f9bbbc3f99dL,
+        0x301c014b1b55d3L,0x7cf3663bbeb568L,0x24959dcb085bd1L,
+        0x12366aa6752881L },
+      { 0x77a74c0da5e57aL,0x3279ca93ad939fL,0x33c3c8a1ef08c9L,
+        0x641b05ab42825eL,0x02f416d7d098dbL,0x7e3d58be292b68L,
+        0x1864dbc46e1f46L } },
+    /* 168 */
+    { { 0x1da167b8153a9dL,0x47593d07d9e155L,0x386d984e12927fL,
+        0x421a6f08a60c7cL,0x5ae9661c24dab3L,0x7927b2e7874507L,
+        0x3266ea80609d53L },
+      { 0x7d198f4c26b1e3L,0x430d4ea2c4048eL,0x58d8ab77e84ba3L,
+        0x1cb14299c37297L,0x6db6031e8f695cL,0x159bd855e26d55L,
+        0x3f3f6d318a73ddL } },
+    /* 169 */
+    { { 0x3ee958cca40298L,0x02a7e5eba32ad6L,0x43b4bab96f0e1eL,
+        0x534be79062b2b1L,0x029ead089b37e3L,0x4d585da558f5aaL,
+        0x1f9737eb43c376L },
+      { 0x0426dfd9b86202L,0x4162866bc0a9f3L,0x18fc518e7bb465L,
+        0x6db63380fed812L,0x421e117f709c30L,0x1597f8d0f5cee6L,
+        0x04ffbf1289b06aL } },
+    /* 170 */
+    { { 0x61a1987ffa0a5fL,0x42058c7fc213c6L,0x15b1d38447d2c9L,
+        0x3d5f5d7932565eL,0x5db754af445fa7L,0x5d489189fba499L,
+        0x02c4c55f51141bL },
+      { 0x26b15972e9993dL,0x2fc90bcbd97c45L,0x2ff60f8684b0f1L,
+        0x1dc641dd339ab0L,0x3e38e6be23f82cL,0x3368162752c817L,
+        0x19bba80ceb45ceL } },
+    /* 171 */
+    { { 0x7c6e95b4c6c693L,0x6bbc6d5efa7093L,0x74d7f90bf3bf1cL,
+        0x54d5be1f0299a1L,0x7cb24f0aa427c6L,0x0a18f3e086c941L,
+        0x058a1c90e4faefL },
+      { 0x3d6bd016927e1eL,0x1da4ce773098b8L,0x2133522e690056L,
+        0x0751416d3fc37eL,0x1beed1643eda66L,0x5288b6727d5c54L,
+        0x199320e78655c6L } },
+    /* 172 */
+    { { 0x74575027eeaf94L,0x124bd533c3ceaeL,0x69421ab7a8a1d7L,
+        0x37f2127e093f3dL,0x40281765252a08L,0x25a228798d856dL,
+        0x326eca62759c4cL },
+      { 0x0c337c51acb0a5L,0x122ba78c1ef110L,0x02498adbb68dc4L,
+        0x67240c124b089eL,0x135865d25d9f89L,0x338a76d5ae5670L,
+        0x03a8efaf130385L } },
+    /* 173 */
+    { { 0x3a450ac5e49beaL,0x282af80bb4b395L,0x6779eb0db1a139L,
+        0x737cabdd174e55L,0x017b14ca79b5f2L,0x61fdef6048e137L,
+        0x3acc12641f6277L },
+      { 0x0f730746fe5096L,0x21d05c09d55ea1L,0x64d44bddb1a560L,
+        0x75e5035c4778deL,0x158b7776613513L,0x7b5efa90c7599eL,
+        0x2caa0791253b95L } },
+    /* 174 */
+    { { 0x288e5b6d53e6baL,0x435228909d45feL,0x33b4cf23b2a437L,
+        0x45b352017d6db0L,0x4372d579d6ef32L,0x0fa9e5badbbd84L,
+        0x3a78cff24759bbL },
+      { 0x0899d2039eab6eL,0x4cf47d2f76bc22L,0x373f739a3a8c69L,
+        0x09beaa5b1000b3L,0x0acdfbe83ebae5L,0x10c10befb0e900L,
+        0x33d2ac4cc31be3L } },
+    /* 175 */
+    { { 0x765845931e08fbL,0x2a3c2a0dc58007L,0x7270da587d90e1L,
+        0x1ee648b2bc8f86L,0x5d2ca68107b29eL,0x2b7064846e9e92L,
+        0x3633ed98dbb962L },
+      { 0x5e0f16a0349b1bL,0x58d8941f570ca4L,0x20abe376a4cf34L,
+        0x0f4bd69a360977L,0x21eb07cc424ba7L,0x720d2ecdbbe6ecL,
+        0x255597d5a97c34L } },
+    /* 176 */
+    { { 0x67bbf21a0f5e94L,0x422a3b05a64fc1L,0x773ac447ebddc7L,
+        0x1a1331c08019f1L,0x01ef6d269744ddL,0x55f7be5b3b401aL,
+        0x072e031c681273L },
+      { 0x7183289e21c677L,0x5e0a3391f3162fL,0x5e02d9e65d914aL,
+        0x07c79ea1adce2fL,0x667ca5c2e1cbe4L,0x4f287f22caccdaL,
+        0x27eaa81673e75bL } },
+    /* 177 */
+    { { 0x5246180a078fe6L,0x67cc8c9fa3bb15L,0x370f8dd123db31L,
+        0x1938dafa69671aL,0x5af72624950c5eL,0x78cc5221ebddf8L,
+        0x22d616fe2a84caL },
+      { 0x723985a839327fL,0x24fa95584a5e22L,0x3d8a5b3138d38bL,
+        0x3829ef4a017acfL,0x4f09b00ae055c4L,0x01df84552e4516L,
+        0x2a7a18993e8306L } },
+    /* 178 */
+    { { 0x7b6224bc310eccL,0x69e2cff429da16L,0x01c850e5722869L,
+        0x2e4889443ee84bL,0x264a8df1b3d09fL,0x18a73fe478d0d6L,
+        0x370b52740f9635L },
+      { 0x52b7d3a9d6f501L,0x5c49808129ee42L,0x5b64e2643fd30cL,
+        0x27d903fe31b32cL,0x594cb084d078f9L,0x567fb33e3ae650L,
+        0x0db7be9932cb65L } },
+    /* 179 */
+    { { 0x19b78113ed7cbeL,0x002b2f097a1c8cL,0x70b1dc17fa5794L,
+        0x786e8419519128L,0x1a45ba376af995L,0x4f6aa84b8d806cL,
+        0x204b4b3bc7ca47L },
+      { 0x7581a05fd94972L,0x1c73cadb870799L,0x758f6fefc09b88L,
+        0x35c62ba8049b42L,0x6f5e71fc164cc3L,0x0cd738b5702721L,
+        0x10021afac9a423L } },
+    /* 180 */
+    { { 0x654f7937e3c115L,0x5d198288b515cbL,0x4add965c25a6e3L,
+        0x5a37df33cd76ffL,0x57bb7e288e1631L,0x049b69089e1a31L,
+        0x383a88f4122a99L },
+      { 0x4c0e4ef3d80a73L,0x553c77ac9f30e2L,0x20bb18c2021e82L,
+        0x2aec0d1c4225c5L,0x397fce0ac9c302L,0x2ab0c2a246e8aaL,
+        0x02e5e5190be080L } },
+    /* 181 */
+    { { 0x7a255a4ae03080L,0x0d68b01513f624L,0x29905bd4e48c8cL,
+        0x1d81507027466bL,0x1684aaeb70dee1L,0x7dd460719f0981L,
+        0x29c43b0f0a390cL },
+      { 0x272567681b1f7dL,0x1d2a5f8502e0efL,0x0fd5cd6b221befL,
+        0x5eb4749e9a0434L,0x7d1553a324e2a6L,0x2eefd8e86a7804L,
+        0x2ad80d5335109cL } },
+    /* 182 */
+    { { 0x25342aef4c209dL,0x24e811ac4e0865L,0x3f209757f8ae9dL,
+        0x1473ff8a5da57bL,0x340f61c3919cedL,0x7523bf85fb9bc0L,
+        0x319602ebca7cceL },
+      { 0x121e7541d442cbL,0x4ffa748e49c95cL,0x11493cd1d131dcL,
+        0x42b215172ab6b5L,0x045fd87e13cc77L,0x0ae305df76342fL,
+        0x373b033c538512L } },
+    /* 183 */
+    { { 0x389541e9539819L,0x769f3b29b7e239L,0x0d05f695e3232cL,
+        0x029d04f0e9a9fbL,0x58b78b7a697fb8L,0x7531b082e6386bL,
+        0x215d235bed95a9L },
+      { 0x503947c1859c5dL,0x4b82a6ba45443fL,0x78328eab71b3a5L,
+        0x7d8a77f8cb3509L,0x53fcd9802e41d4L,0x77552091976edbL,
+        0x226c60ad7a5156L } },
+    /* 184 */
+    { { 0x77ad6a43360710L,0x0fdeabd326d7aeL,0x4012886c92104aL,
+        0x2d6c378dd7ae33L,0x7e72ef2c0725f3L,0x4a4671f4ca18e0L,
+        0x0afe3b4bb6220fL },
+      { 0x212cf4b56e0d6aL,0x7c24d086521960L,0x0662cf71bd414dL,
+        0x1085b916c58c25L,0x781eed2be9a350L,0x26880e80db6ab2L,
+        0x169e356442f061L } },
+    /* 185 */
+    { { 0x57aa2ad748b02cL,0x68a34256772a9aL,0x1591c44962f96cL,
+        0x110a9edd6e53d2L,0x31eab597e091a3L,0x603e64e200c65dL,
+        0x2f66b72e8a1cfcL },
+      { 0x5c79d138543f7fL,0x412524363fdfa3L,0x547977e3b40008L,
+        0x735ca25436d9f7L,0x232b4888cae049L,0x27ce37a53d8f23L,
+        0x34d45881a9b470L } },
+    /* 186 */
+    { { 0x76b95255924f43L,0x035c9f3bd1aa5dL,0x5eb71a010b4bd0L,
+        0x6ce8dda7e39f46L,0x35679627ea70c0L,0x5c987767c7d77eL,
+        0x1fa28952b620b7L },
+      { 0x106f50b5924407L,0x1cc3435a889411L,0x0597cdce3bc528L,
+        0x738f8b0d5077d1L,0x5894dd60c7dd6aL,0x0013d0721f5e2eL,
+        0x344573480527d3L } },
+    /* 187 */
+    { { 0x2e2c1da52abf77L,0x394aa8464ad05eL,0x095259b7330a83L,
+        0x686e81cf6a11f5L,0x405c7e48c93c7cL,0x65c3ca9444a2ecL,
+        0x07bed6c59c3563L },
+      { 0x51f9d994fb1471L,0x3c3ecfa5283b4eL,0x494dccda63f6ccL,
+        0x4d07b255363a75L,0x0d2b6d3155d118L,0x3c688299fc9497L,
+        0x235692fa3dea3aL } },
+    /* 188 */
+    { { 0x16b4d452669e98L,0x72451fa85406b9L,0x674a145d39151fL,
+        0x325ffd067ae098L,0x527e7805cd1ae0L,0x422a1d1789e48dL,
+        0x3e27be63f55e07L },
+      { 0x7f95f6dee0b63fL,0x008e444cc74969L,0x01348f3a72b614L,
+        0x000cfac81348c3L,0x508ae3e5309ce5L,0x2584fcdee44d34L,
+        0x3a4dd994899ee9L } },
+    /* 189 */
+    { { 0x4d289cc0368708L,0x0e5ebc60dc3b40L,0x78cc44bfab1162L,
+        0x77ef2173b7d11eL,0x06091718e39746L,0x30fe19319b83a4L,
+        0x17e8f2988529c6L },
+      { 0x68188bdcaa9f2aL,0x0e64b1350c1bddL,0x5b18ebac7cc4b3L,
+        0x75315a9fcc046eL,0x36e9770fd43db4L,0x54c5857fc69121L,
+        0x0417e18f3e909aL } },
+    /* 190 */
+    { { 0x29795db38059adL,0x6efd20c8fd4016L,0x3b6d1ce8f95a1aL,
+        0x4db68f177f8238L,0x14ec7278d2340fL,0x47bd77ff2b77abL,
+        0x3d2dc8cd34e9fcL },
+      { 0x285980a5a83f0bL,0x08352e2d516654L,0x74894460481e1bL,
+        0x17f6f3709c480dL,0x6b590d1b55221eL,0x45c100dc4c9be9L,
+        0x1b13225f9d8b91L } },
+    /* 191 */
+    { { 0x0b905fb4b41d9dL,0x48cc8a474cb7a2L,0x4eda67e8de09b2L,
+        0x1de47c829adde8L,0x118ad5b9933d77L,0x7a12665ac3f9a4L,
+        0x05631a4fb52997L },
+      { 0x5fb2a8e6806e63L,0x27d96bbcca369bL,0x46066f1a6b8c7bL,
+        0x63b58fc7ca3072L,0x170a36229c0d62L,0x57176f1e463203L,
+        0x0c7ce083e73b9cL } },
+    /* 192 */
+    { { 0x31caf2c09e1c72L,0x6530253219e9d2L,0x7650c98b601c57L,
+        0x182469f99d56c0L,0x415f65d292b7a7L,0x30f62a55549b8eL,
+        0x30f443f643f465L },
+      { 0x6b35c575ddadd0L,0x14a23cf6d299eeL,0x2f0198c0967d7dL,
+        0x1013058178d5bfL,0x39da601c9cc879L,0x09d8963ec340baL,
+        0x1b735db13ad2a7L } },
+    /* 193 */
+    { { 0x20916ffdc83f01L,0x16892aa7c9f217L,0x6bff179888d532L,
+        0x4adf3c3d366288L,0x41a62b954726aeL,0x3139609022aeb6L,
+        0x3e8ab9b37aff7aL },
+      { 0x76bbc70f24659aL,0x33fa98513886c6L,0x13b26af62c4ea6L,
+        0x3c4d5826389a0cL,0x526ec28c02bf6aL,0x751ff083d79a7cL,
+        0x110ac647990224L } },
+    /* 194 */
+    { { 0x2c6c62fa2b6e20L,0x3d37edad30c299L,0x6ef25b44b65fcaL,
+        0x7470846914558eL,0x712456eb913275L,0x075a967a9a280eL,
+        0x186c8188f2a2a0L },
+      { 0x2f3b41a6a560b1L,0x3a8070b3f9e858L,0x140936ff0e1e78L,
+        0x5fd298abe6da8aL,0x3823a55d08f153L,0x3445eafaee7552L,
+        0x2a5fc96731a8b2L } },
+    /* 195 */
+    { { 0x06317be58edbbbL,0x4a38f3bfbe2786L,0x445b60f75896b7L,
+        0x6ec7c92b5adf57L,0x07b6be8038a441L,0x1bcfe002879655L,
+        0x2a2174037d6d0eL },
+      { 0x776790cf9e48bdL,0x73e14a2c4ed1d3L,0x7eb5ed5f2fc2f7L,
+        0x3e0aedb821b384L,0x0ee3b7e151c12fL,0x51a6a29e044bb2L,
+        0x0ba13a00cb0d86L } },
+    /* 196 */
+    { { 0x77607d563ec8d8L,0x023fc726996e44L,0x6bd63f577a9986L,
+        0x114a6351e53973L,0x3efe97989da046L,0x1051166e117ed7L,
+        0x0354933dd4fb5fL },
+      { 0x7699ca2f30c073L,0x4c973b83b9e6d3L,0x2017c2abdbc3e8L,
+        0x0cdcdd7a26522bL,0x511070f5b23c7dL,0x70672327e83d57L,
+        0x278f842b4a9f26L } },
+    /* 197 */
+    { { 0x0824f0d4ae972fL,0x60578dd08dcf52L,0x48a74858290fbbL,
+        0x7302748bf23030L,0x184b229a178acfL,0x3e8460ade089d6L,
+        0x13f2b557fad533L },
+      { 0x7f96f3ae728d15L,0x018d8d40066341L,0x01fb94955a289aL,
+        0x2d32ed6afc2657L,0x23f4f5e462c3acL,0x60eba5703bfc5aL,
+        0x1b91cc06f16c7aL } },
+    /* 198 */
+    { { 0x411d68af8219b9L,0x79cca36320f4eeL,0x5c404e0ed72e20L,
+        0x417cb8692e43f2L,0x305d29c7d98599L,0x3b754d5794a230L,
+        0x1c97fb4be404e9L },
+      { 0x7cdbafababd109L,0x1ead0eb0ca5090L,0x1a2b56095303e3L,
+        0x75dea935012c8fL,0x67e31c071b1d1dL,0x7c324fbfd172c3L,
+        0x157e257e6498f7L } },
+    /* 199 */
+    { { 0x19b00db175645bL,0x4c4f6cb69725f1L,0x36d9ce67bd47ceL,
+        0x2005e105179d64L,0x7b952e717867feL,0x3c28599204032cL,
+        0x0f5659d44fb347L },
+      { 0x1ebcdedb979775L,0x4378d45cfd11a8L,0x14c85413ca66e9L,
+        0x3dd17d681c8a4dL,0x58368e7dc23142L,0x14f3eaac6116afL,
+        0x0adb45b255f6a0L } },
+    /* 200 */
+    { { 0x2f5e76279ad982L,0x125b3917034d09L,0x3839a6399e6ed3L,
+        0x32fe0b3ebcd6a2L,0x24ccce8be90482L,0x467e26befcc187L,
+        0x2828434e2e218eL },
+      { 0x17247cd386efd9L,0x27f36a468d85c3L,0x65e181ef203bbfL,
+        0x0433a6761120afL,0x1d607a2a8f8625L,0x49f4e55a13d919L,
+        0x3367c3b7943e9dL } },
+    /* 201 */
+    { { 0x3391c7d1a46d4dL,0x38233d602d260cL,0x02127a0f78b7d4L,
+        0x56841c162c24c0L,0x4273648fd09aa8L,0x019480bb0e754eL,
+        0x3b927987b87e58L },
+      { 0x6676be48c76f73L,0x01ec024e9655aeL,0x720fe1c6376704L,
+        0x17e06b98885db3L,0x656adec85a4200L,0x73780893c3ce88L,
+        0x0a339cdd8df664L } },
+    /* 202 */
+    { { 0x69af7244544ac7L,0x31ab7402084d2fL,0x67eceb7ef7cb19L,
+        0x16f8583b996f61L,0x1e208d12faf91aL,0x4a91584ce4a42eL,
+        0x3e08337216c93eL },
+      { 0x7a6eea94f4cf77L,0x07a52894678c60L,0x302dd06b14631eL,
+        0x7fddb7225c9ceaL,0x55e441d7acd153L,0x2a00d4490b0f44L,
+        0x053ef125338cdbL } },
+    /* 203 */
+    { { 0x120c0c51584e3cL,0x78b3efca804f37L,0x662108aefb1dccL,
+        0x11deb55f126709L,0x66def11ada8125L,0x05bbc0d1001711L,
+        0x1ee1c99c7fa316L },
+      { 0x746f287de53510L,0x1733ef2e32d09cL,0x1df64a2b0924beL,
+        0x19758da8f6405eL,0x28f6eb3913e484L,0x7175a1090cc640L,
+        0x048aee0d63f0bcL } },
+    /* 204 */
+    { { 0x1f3b1e3b0b29c3L,0x48649f4882a215L,0x485eca3a9e0dedL,
+        0x4228ba85cc82e4L,0x36da1f39bc9379L,0x1659a7078499d1L,
+        0x0a67d5f6c04188L },
+      { 0x6ac39658afdce3L,0x0d667a0bde8ef6L,0x0ae6ec0bfe8548L,
+        0x6d9cb2650571bfL,0x54bea107760ab9L,0x705c53bd340cf2L,
+        0x111a86b610c70fL } },
+    /* 205 */
+    { { 0x7ecea05c6b8195L,0x4f8be93ce3738dL,0x305de9eb9f5d12L,
+        0x2c3b9d3d474b56L,0x673691a05746c3L,0x2e3482c428c6eaL,
+        0x2a8085fde1f472L },
+      { 0x69d15877fd3226L,0x4609c9ec017cc3L,0x71e9b7fc1c3dbcL,
+        0x4f8951254e2675L,0x63ee9d15afa010L,0x0f05775b645190L,
+        0x28a0a439397ae3L } },
+    /* 206 */
+    { { 0x387fa03e9de330L,0x40cc32b828b6abL,0x02a482fbc04ac9L,
+        0x68cad6e70429b7L,0x741877bff6f2c4L,0x48efe633d3b28bL,
+        0x3e612218fe24b3L },
+      { 0x6fc1d34fe37657L,0x3d04b9e1c8b5a1L,0x6a2c332ef8f163L,
+        0x7ca97e2b135690L,0x37357d2a31208aL,0x29f02f2332bd68L,
+        0x17c674c3e63a57L } },
+    /* 207 */
+    { { 0x683d9a0e6865bbL,0x5e77ec68ad4ce5L,0x4d18f236788bd6L,
+        0x7f34b87204f4e3L,0x391ca40e9e578dL,0x3470ed6ddf4e23L,
+        0x225544b3e50989L },
+      { 0x48eda8cb4e462bL,0x2a948825cf9109L,0x473adedc7e1300L,
+        0x37b843b82192edL,0x2b9ac1537dde36L,0x4efe7412732332L,
+        0x29cc5981b5262bL } },
+    /* 208 */
+    { { 0x190d2fcad260f5L,0x7c53dd81d18027L,0x003def5f55db0eL,
+        0x7f5ed25bee2df7L,0x2b87e9be167d2eL,0x2b999c7bbcd224L,
+        0x1d68a2c260ad50L },
+      { 0x010bcde84607a6L,0x0250de9b7e1bedL,0x746d36bfaf1b56L,
+        0x3359475ff56abbL,0x7e84b9bc440b20L,0x2eaa7e3b52f162L,
+        0x01165412f36a69L } },
+    /* 209 */
+    { { 0x639a02329e5836L,0x7aa3ee2e4d3a27L,0x5bc9b258ecb279L,
+        0x4cb3dfae2d62c6L,0x08d9d3b0c6c437L,0x5a2c177d47eab2L,
+        0x36120479fc1f26L },
+      { 0x7609a75bd20e4aL,0x3ba414e17551fcL,0x42cd800e1b90c9L,
+        0x04921811b88f9bL,0x4443697f9562fdL,0x3a8081b8186959L,
+        0x3f5b5c97379e73L } },
+    /* 210 */
+    { { 0x6fd0e3cf13eafbL,0x3976b5415cbf67L,0x4de40889e48402L,
+        0x17e4d36f24062aL,0x16ae7755cf334bL,0x2730ac94b7e0e1L,
+        0x377592742f48e0L },
+      { 0x5e10b18a045041L,0x682792afaae5a1L,0x19383ec971b816L,
+        0x208b17dae2ffc0L,0x439f9d933179b6L,0x55485a9090bcaeL,
+        0x1c316f42a2a35cL } },
+    /* 211 */
+    { { 0x67173897bdf646L,0x0b6956653ef94eL,0x5be3c97f7ea852L,
+        0x3110c12671f08eL,0x2474076a3fc7ecL,0x53408be503fe72L,
+        0x09155f53a5b44eL },
+      { 0x5c804bdd4c27cdL,0x61e81eb8ffd50eL,0x2f7157fdf84717L,
+        0x081f880d646440L,0x7aa892acddec51L,0x6ae70683443f33L,
+        0x31ed9e8b33a75aL } },
+    /* 212 */
+    { { 0x0d724f8e357586L,0x1febbec91b4134L,0x6ff7b98a9475fdL,
+        0x1c4d9b94e1f364L,0x2b8790499cef00L,0x42fd2080a1b31dL,
+        0x3a3bbc6d9b0145L },
+      { 0x75bfebc37e3ca9L,0x28db49c1723bd7L,0x50b12fa8a1f17aL,
+        0x733d95bbc84b98L,0x45ede81f6c109eL,0x18f5e46fb37b5fL,
+        0x34b980804aaec1L } },
+    /* 213 */
+    { { 0x56060c8a4f57bfL,0x0d2dfe223054c2L,0x718a5bbc03e5d6L,
+        0x7b3344cc19b3b9L,0x4d11c9c054bcefL,0x1f5ad422c22e33L,
+        0x2609299076f86bL },
+      { 0x7b7a5fba89fd01L,0x7013113ef3b016L,0x23d5e0a173e34eL,
+        0x736c14462f0f50L,0x1ef5f7ac74536aL,0x4baba6f4400ea4L,
+        0x17b310612c9828L } },
+    /* 214 */
+    { { 0x4ebb19a708c8d3L,0x209f8c7f03d9bbL,0x00461cfe5798fbL,
+        0x4f93b6ae822fadL,0x2e5b33b5ad5447L,0x40b024e547a84bL,
+        0x22ffad40443385L },
+      { 0x33809c888228bfL,0x559f655fefbe84L,0x0032f529fd2f60L,
+        0x5a2191ece3478cL,0x5b957fcd771246L,0x6fec181f9ed123L,
+        0x33eed3624136a3L } },
+    /* 215 */
+    { { 0x6a5df93b26139aL,0x55076598fd7134L,0x356a592f34f81dL,
+        0x493c6b5a3d4741L,0x435498a4e2a39bL,0x2cd26a0d931c88L,
+        0x01925ea3fc7835L },
+      { 0x6e8d992b1efa05L,0x79508a727c667bL,0x5f3c15e6b4b698L,
+        0x11b6c755257b93L,0x617f5af4b46393L,0x248d995b2b6656L,
+        0x339db62e2e22ecL } },
+    /* 216 */
+    { { 0x52537a083843dcL,0x6a283c82a768c7L,0x13aa6bf25227acL,
+        0x768d76ba8baf5eL,0x682977a6525808L,0x67ace52ac23b0bL,
+        0x2374b5a2ed612dL },
+      { 0x7139e60133c3a4L,0x715697a4f1d446L,0x4b018bf36677a0L,
+        0x1dd43837414d83L,0x505ec70730d4f6L,0x09ac100907fa79L,
+        0x21caad6e03217eL } },
+    /* 217 */
+    { { 0x0776d3999d4d49L,0x33bdd87e8bcff8L,0x1036b87f068fadL,
+        0x0a9b8ffde4c872L,0x7ab2533596b1eaL,0x305a88fb965378L,
+        0x3356d8fa4d65e5L },
+      { 0x3366fa77d1ff11L,0x1e0bdbdcd2075cL,0x46910cefc967caL,
+        0x7ce700737a1ff6L,0x1c5dc15409c9bdL,0x368436b9bdb595L,
+        0x3e7ccd6560b5efL } },
+    /* 218 */
+    { { 0x1443789422c792L,0x524792b1717f2bL,0x1f7c1d95048e7aL,
+        0x5cfe2a225b0d12L,0x245594d29ce85bL,0x20134d254ce168L,
+        0x1b83296803921aL },
+      { 0x79a78285b3beceL,0x3c738c3f3124d6L,0x6ab9d1fe0907cdL,
+        0x0652ceb7fc104cL,0x06b5f58c8ae3fdL,0x486959261c5328L,
+        0x0b3813ae677c90L } },
+    /* 219 */
+    { { 0x66b9941ac37b82L,0x651a4b609b0686L,0x046711edf3fc31L,
+        0x77f89f38faa89bL,0x2683ddbf2d5edbL,0x389ef1dfaa3c25L,
+        0x20b3616e66273eL },
+      { 0x3c6db6e0cb5d37L,0x5d7ae5dc342bc4L,0x74a1dc6c52062bL,
+        0x6f7c0bec109557L,0x5c51f7bc221d91L,0x0d7b5880745288L,
+        0x1c46c145c4b0ddL } },
+    /* 220 */
+    { { 0x59ed485ea99eccL,0x201b71956bc21dL,0x72d5c32f73de65L,
+        0x1aefd76547643eL,0x580a452cfb2c2dL,0x7cb1a63f5c4dc9L,
+        0x39a8df727737aaL },
+      { 0x365a341deca452L,0x714a1ad1689cbaL,0x16981d12c42697L,
+        0x5a124f4ac91c75L,0x1b2e3f2fedc0dbL,0x4a1c72b8e9d521L,
+        0x3855b4694e4e20L } },
+    /* 221 */
+    { { 0x16b3d047181ae9L,0x17508832f011afL,0x50d33cfeb2ebd1L,
+        0x1deae237349984L,0x147c641aa6adecL,0x24a9fb4ebb1ddbL,
+        0x2b367504a7a969L },
+      { 0x4c55a3d430301bL,0x379ef6a5d492cbL,0x3c56541fc0f269L,
+        0x73a546e91698ceL,0x2c2b62ee0b9b5dL,0x6284184d43d0efL,
+        0x0e1f5cf6a4b9f0L } },
+    /* 222 */
+    { { 0x44833e8cd3fdacL,0x28e6665cb71c27L,0x2f8bf87f4ddbf3L,
+        0x6cc6c767fb38daL,0x3bc114d734e8b5L,0x12963d5a78ca29L,
+        0x34532a161ece41L },
+      { 0x2443af5d2d37e9L,0x54e6008c8c452bL,0x2c55d54111cf1bL,
+        0x55ac7f7522575aL,0x00a6fba3f8575fL,0x3f92ef3b793b8dL,
+        0x387b97d69ecdf7L } },
+    /* 223 */
+    { { 0x0b464812d29f46L,0x36161daa626f9aL,0x5202fbdb264ca5L,
+        0x21245805ff1304L,0x7f9c4a65657885L,0x542d3887f9501cL,
+        0x086420deef8507L },
+      { 0x5e159aa1b26cfbL,0x3f0ef5ffd0a50eL,0x364b29663a432aL,
+        0x49c56888af32a8L,0x6f937e3e0945d1L,0x3cbdeec6d766cdL,
+        0x2d80d342ece61aL } },
+    /* 224 */
+    { { 0x255e3026d8356eL,0x4ddba628c4de9aL,0x074323b593e0d9L,
+        0x333bdb0a10eefbL,0x318b396e473c52L,0x6ebb5a95efd3d3L,
+        0x3f3bff52aa4e4fL },
+      { 0x3138a111c731d5L,0x674365e283b308L,0x5585edd9c416f2L,
+        0x466763d9070fd4L,0x1b568befce8128L,0x16eb040e7b921eL,
+        0x3d5c898687c157L } },
+    /* 225 */
+    { { 0x14827736973088L,0x4e110d53f301e6L,0x1f811b09870023L,
+        0x53b5e500dbcacaL,0x4ddf0df1e6a7dcL,0x1e9575fb10ce35L,
+        0x3fdc153644d936L },
+      { 0x763547e2260594L,0x26e5ae764efc59L,0x13be6f4d791a29L,
+        0x2021e61e3a0cf1L,0x339cd2b4a1c202L,0x5c7451e08f5121L,
+        0x3728b3a851be68L } },
+    /* 226 */
+    { { 0x78873653277538L,0x444b9ed2ee7156L,0x79ac8b8b069cd3L,
+        0x5f0e90933770e8L,0x307662c615389eL,0x40fe6d95a80057L,
+        0x04822170cf993cL },
+      { 0x677d5690fbfec2L,0x0355af4ae95cb3L,0x417411794fe79eL,
+        0x48daf87400a085L,0x33521d3b5f0aaaL,0x53567a3be00ff7L,
+        0x04712ccfb1cafbL } },
+    /* 227 */
+    { { 0x2b983283c3a7f3L,0x579f11b146a9a6L,0x1143d3b16a020eL,
+        0x20f1483ef58b20L,0x3f03e18d747f06L,0x3129d12f15de37L,
+        0x24c911f7222833L },
+      { 0x1e0febcf3d5897L,0x505e26c01cdaacL,0x4f45a9adcff0e9L,
+        0x14dfac063c5cebL,0x69e5ce713fededL,0x3481444a44611aL,
+        0x0ea49295c7fdffL } },
+    /* 228 */
+    { { 0x64554cb4093beeL,0x344b4b18dd81f6L,0x350f43b4de9b59L,
+        0x28a96a220934caL,0x4aa8da5689a515L,0x27171cbd518509L,
+        0x0cfc1753f47c95L },
+      { 0x7dfe091b615d6eL,0x7d1ee0aa0fb5c1L,0x145eef3200b7b5L,
+        0x33fe88feeab18fL,0x1d62d4f87453e2L,0x43b8db4e47fff1L,
+        0x1572f2b8b8f368L } },
+    /* 229 */
+    { { 0x6bc94e6b4e84f3L,0x60629dee586a66L,0x3bbad5fe65ca18L,
+        0x217670db6c2fefL,0x0320a7f4e3272aL,0x3ccff0d976a6deL,
+        0x3c26da8ae48cccL },
+      { 0x53ecf156778435L,0x7533064765a443L,0x6c5c12f03ca5deL,
+        0x44f8245350dabfL,0x342cdd777cf8b3L,0x2b539c42e9f58dL,
+        0x10138affc279b1L } },
+    /* 230 */
+    { { 0x1b135e204c5ddbL,0x40887dfeaa1d37L,0x7fb0ef83da76ffL,
+        0x521f2b79af55a5L,0x3f9b38b4c3f0d0L,0x20a9838cce61ceL,
+        0x24bb4e2f4b1e32L },
+      { 0x003f6aa386e27cL,0x68df59db0a0f8eL,0x21677d5192e713L,
+        0x14ab9757501276L,0x411944af961524L,0x3184f39abc5c3fL,
+        0x2a8dda80ca078dL } },
+    /* 231 */
+    { { 0x0592233cdbc95cL,0x54d5de5c66f40fL,0x351caa1512ab86L,
+        0x681bdbee020084L,0x6ee2480c853e68L,0x6a5a44262b918fL,
+        0x06574e15a3b91dL },
+      { 0x31ba03dacd7fbeL,0x0c3da7c18a57a9L,0x49aaaded492d6bL,
+        0x3071ff53469e02L,0x5efb4f0d7248c6L,0x6db5fb67f12628L,
+        0x29cff668e3d024L } },
+    /* 232 */
+    { { 0x1b9ef3bb1b17ceL,0x6ccf8c24fe6312L,0x34c15487f45008L,
+        0x1a84044095972cL,0x515073a47e449eL,0x2ddc93f9097feeL,
+        0x1008fdc894c434L },
+      { 0x08e5edb73399faL,0x65b1aa65547d4cL,0x3a117a1057c498L,
+        0x7e16c3089d13acL,0x502f2ae4b6f851L,0x57a70f3eb62673L,
+        0x111b48a9a03667L } },
+    /* 233 */
+    { { 0x5023024be164f1L,0x25ad117032401eL,0x46612b3bfe3427L,
+        0x2f4f406a8a02b7L,0x16a93a5c4ddf07L,0x7ee71968fcdbe9L,
+        0x2267875ace37daL },
+      { 0x687e88b59eb2a6L,0x3ac7368fe716d3L,0x28d953a554a036L,
+        0x34d52c0acca08fL,0x742a7cf8dd4fd9L,0x10bfeb8575ea60L,
+        0x290e454d868dccL } },
+    /* 234 */
+    { { 0x4e72a3a8a4bdd2L,0x1ba36d1dee04d5L,0x7a43136b63195bL,
+        0x6ca8e286a519f3L,0x568e64aece08a9L,0x571d5000b5c10bL,
+        0x3f75e9f5dbdd40L },
+      { 0x6fb0a698d6fa45L,0x0ce42209d7199cL,0x1f68275f708a3eL,
+        0x5749832e91ec3cL,0x6c3665521428b2L,0x14b2bf5747bd4aL,
+        0x3b6f940e42a22bL } },
+    /* 235 */
+    { { 0x4da0adbfb26c82L,0x16792a585f39acL,0x17df9dfda3975cL,
+        0x4796b4afaf479bL,0x67be67234e0020L,0x69df5f201dda25L,
+        0x09f71a4d12b3dcL },
+      { 0x64ff5ec260a46aL,0x579c5b86385101L,0x4f29a7d549f697L,
+        0x4e64261242e2ebL,0x54ecacdfb6b296L,0x46e0638b5fddadL,
+        0x31eefd3208891dL } },
+    /* 236 */
+    { { 0x5b72c749fe01b2L,0x230cf27523713aL,0x533d1810e0d1e1L,
+        0x5590db7d1dd1e2L,0x7b8ab73e8e43d3L,0x4c8a19bd1c17caL,
+        0x19222ce9f74810L },
+      { 0x6398b3dddc4582L,0x0352b7d88dfd53L,0x3c55b4e10c5a63L,
+        0x38194d13f8a237L,0x106683fd25dd87L,0x59e0b62443458eL,
+        0x196cb70aa9cbb9L } },
+    /* 237 */
+    { { 0x2885f7cd021d63L,0x162bfd4c3e1043L,0x77173dcf98fcd1L,
+        0x13d4591d6add36L,0x59311154d0d8f2L,0x74336e86e79b8aL,
+        0x13faadc5661883L },
+      { 0x18938e7d9ec924L,0x14bcda8fcaa0a1L,0x706d85d41a1355L,
+        0x0ac34520d168deL,0x5a92499fe17826L,0x36c2e3b4f00600L,
+        0x29c2fd7b5f63deL } },
+    /* 238 */
+    { { 0x41250dfe2216c5L,0x44a0ec0366a217L,0x575bc1adf8b0dfL,
+        0x5ff5cdbdb1800bL,0x7843d4dde8ca18L,0x5fa9e420865705L,
+        0x235c38be6c6b02L },
+      { 0x473b78aae91abbL,0x39470c6051e44bL,0x3f973cc2dc08c3L,
+        0x2837932c5c91f6L,0x25e39ed754ec25L,0x1371c837118e53L,
+        0x3b99f3b0aeafe2L } },
+    /* 239 */
+    { { 0x03acf51be46c65L,0x271fceacbaf5c3L,0x476589ed3a5e25L,
+        0x78ec8c3c3c399cL,0x1f5c8bf4ac4c19L,0x730bb733ec68d2L,
+        0x29a37e00dd287eL },
+      { 0x448ed1bf92b5faL,0x10827c17b86478L,0x55e6fc05b28263L,
+        0x0af1226c73a66aL,0x0b66e5df0d09c1L,0x26128315a02682L,
+        0x22d84932c5e808L } },
+    /* 240 */
+    { { 0x5ec3afc26e3392L,0x08e142e45c0084L,0x4388d5ad0f01feL,
+        0x0f7acd36e6140cL,0x028c14ed97dffbL,0x311845675a38c6L,
+        0x01c1c8f09a3062L },
+      { 0x5a302f4cf49e7dL,0x79267e254a44e1L,0x746165052317a1L,
+        0x53a09263a566e8L,0x7d478ad5f73abcL,0x187ce5c947dad3L,
+        0x18564e1a1ec45fL } },
+    /* 241 */
+    { { 0x7b9577a9aa0486L,0x766b40c7aaaef6L,0x1f6a411f5db907L,
+        0x4543dd4d80beaeL,0x0ad938c7482806L,0x451568bf4b9be1L,
+        0x3367ec85d30a22L },
+      { 0x5446425747843dL,0x18d94ac223c6b2L,0x052ff3a354d359L,
+        0x0b4933f89723f5L,0x03fb517740e056L,0x226b892871dddaL,
+        0x2768c2b753f0fdL } },
+    /* 242 */
+    { { 0x685282ccfa5200L,0x411ed433627b89L,0x77d5c9b8bc9c1dL,
+        0x4a13ef2ee5cd29L,0x5582a612407c9eL,0x2307cb42fc3aa9L,
+        0x2e661df79956b8L },
+      { 0x0e972b015254deL,0x5b63e14def8adeL,0x06995be2ca4a95L,
+        0x6cc0cc1e94bf27L,0x7ed8499fe0052aL,0x671a6ca5a5e0f9L,
+        0x31e10d4ba10f05L } },
+    /* 243 */
+    { { 0x690af07e9b2d8aL,0x6030af9e32c8ddL,0x45c7ca3bf2b235L,
+        0x40959077b76c81L,0x61eee7f70d5a96L,0x6b04f6aafe9e38L,
+        0x3c726f55f1898dL },
+      { 0x77d0142a1a6194L,0x1c1631215708b9L,0x403a4f0a9b7585L,
+        0x066c8e29f7cef0L,0x6fc32f98cf575eL,0x518a09d818c297L,
+        0x34144e99989e75L } },
+    /* 244 */
+    { { 0x6adbada859fb6aL,0x0dcfb6506ccd51L,0x68f88b8d573e0dL,
+        0x4b1ce35bd9af30L,0x241c8293ece2c9L,0x3b5f402c5c4adeL,
+        0x34b9b1ee6fde87L },
+      { 0x5e625340075e63L,0x54c3f3d9050da1L,0x2a3f9152509016L,
+        0x3274e46111bc18L,0x3a7504fd01ac73L,0x4169b387a43209L,
+        0x35626f852bc6d4L } },
+    /* 245 */
+    { { 0x576a4f4662e53bL,0x5ea3f20eecec26L,0x4e5f02be5cd7b0L,
+        0x72cc5ac3314be8L,0x0f604ed3201fe9L,0x2a29378ea54bceL,
+        0x2d52bd4d6ec4b6L },
+      { 0x6a4c2b212c1c76L,0x778fd64a1bfa6dL,0x326828691863d6L,
+        0x5616c8bd06a336L,0x5fab552564da4dL,0x46640cab3e91d2L,
+        0x1d21f06427299eL } },
+    /* 246 */
+    { { 0x2bfe37dde98e9cL,0x164c54822332ebL,0x5b736c7df266e4L,
+        0x59dab3a8da084cL,0x0ae1eab346f118L,0x182090a4327e3fL,
+        0x07b13489dae2e6L },
+      { 0x3bc92645452baaL,0x30b159894ae574L,0x5b947c5c78e1f4L,
+        0x18f0e004a3c77fL,0x48ca8f357077d9L,0x349ffdcef9bca9L,
+        0x3ed224bfd54772L } },
+    /* 247 */
+    { { 0x1bdad02db8dff8L,0x69fab4450b44b6L,0x3b6802d187518bL,
+        0x098368d8eb556cL,0x3fe1943fbefcf4L,0x008851d0de6d42L,
+        0x322cbc4605fe25L },
+      { 0x2528aaf0d51afbL,0x7d48a9363a0cecL,0x4ba8f77d9a8f8bL,
+        0x7dee903437d6c7L,0x1ff5a0d9ccc4b4L,0x34d9bd2fa99831L,
+        0x30d9e4f58667c6L } },
+    /* 248 */
+    { { 0x38909b51b85197L,0x7ba16992512bd4L,0x2c776cfcfffec5L,
+        0x2be7879075843cL,0x557e2b05d28ffcL,0x641b17bc5ce357L,
+        0x1fcaf8a3710306L },
+      { 0x54dca2299a2d48L,0x745d06ef305acaL,0x7c41c65c6944c2L,
+        0x679412ec431902L,0x48f2b15ee62827L,0x341a96d8afe06eL,
+        0x2a78fd3690c0e1L } },
+    /* 249 */
+    { { 0x6b7cec83fbc9c6L,0x238e8a82eefc67L,0x5d3c1d9ff0928cL,
+        0x55b816d6409bbfL,0x7969612adae364L,0x55b6ff96db654eL,
+        0x129beca10073a9L },
+      { 0x0b1d2acdfc73deL,0x5d1a3605fa64bdL,0x436076146743beL,
+        0x64044b89fcce0cL,0x7ae7b3c18f7fafL,0x7f083ee27cea36L,
+        0x0292cd0d7c1ff0L } },
+    /* 250 */
+    { { 0x5a3c4c019b7d2eL,0x1a35a9b89712fbL,0x38736cc4f18c72L,
+        0x603dd832a44e6bL,0x000d1d44aed104L,0x69b1f2fc274ebeL,
+        0x03a7b993f76977L },
+      { 0x299f3b3e346910L,0x5243f45295afd5L,0x34342cbfa588bdL,
+        0x72c40dd1155510L,0x718024fed2f991L,0x2f935e765ad82aL,
+        0x246799ea371fb8L } },
+    /* 251 */
+    { { 0x24fe4c76250533L,0x01cafb02fdf18eL,0x505cb25d462882L,
+        0x3e038175157d87L,0x7e3e99b10cdeb1L,0x38b7e72ebc7936L,
+        0x081845f7c73433L },
+      { 0x049e61be05ebd5L,0x6ab82d8f0581f6L,0x62adffb427ac2eL,
+        0x19431f809d198dL,0x36195f6c58b1d6L,0x22cc4c9dedc9a7L,
+        0x24b146d8e694fcL } },
+    /* 252 */
+    { { 0x7c7bc8288b364dL,0x5c10f683cb894aL,0x19a62a68452958L,
+        0x1fc24dcb4ce90eL,0x726baa4ed9581fL,0x1f34447dde73d6L,
+        0x04c56708f30a21L },
+      { 0x131e583a3f4963L,0x071215b4d502e7L,0x196aca542e5940L,
+        0x3afd5a91f7450eL,0x671b6eedf49497L,0x6aac7aca5c29e4L,
+        0x3fb512470f138bL } },
+    /* 253 */
+    { { 0x5eadc3f4eb453eL,0x16c795ba34b666L,0x5d7612a4697fddL,
+        0x24dd19bb499e86L,0x415b89ca3eeb9bL,0x7c83edf599d809L,
+        0x13bc64c9b70269L },
+      { 0x52d3243dca3233L,0x0b21444b3a96a7L,0x6d551bc0083b90L,
+        0x4f535b88c61176L,0x11e61924298010L,0x0a155b415bb61dL,
+        0x17f94fbd26658fL } },
+    /* 254 */
+    { { 0x2dd06b90c28c65L,0x48582339c8fa6eL,0x01ac8bf2085d94L,
+        0x053e660e020fdcL,0x1bece667edf07bL,0x4558f2b33ce24cL,
+        0x2f1a766e8673fcL },
+      { 0x1d77cd13c06819L,0x4d5dc5056f3a01L,0x18896c6fa18d69L,
+        0x120047ca76d625L,0x6af8457d4f4e45L,0x70ddc53358b60aL,
+        0x330e11130e82f0L } },
+    /* 255 */
+    { { 0x0643b1cd4c2356L,0x10a2ea0a8f7c92L,0x2752513011d029L,
+        0x4cd4c50321f579L,0x5fdf9ba5724792L,0x2f691653e2ddc0L,
+        0x0cfed3d84226cbL },
+      { 0x704902a950f955L,0x069bfdb87bbf0cL,0x5817eeda8a5f84L,
+        0x1914cdd9089905L,0x0e4a323d7b93f4L,0x1cc3fc340af0b2L,
+        0x23874161bd6303L } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_7(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[7];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_7(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 7, km);
+
+            err = sp_384_ecc_mulmod_base_7(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_7(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_7(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_7(sp_digit* a)
+{
+    a[0]++;
+    sp_384_norm_7(a);
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 47U) {
+            r[j] &= 0x7fffffffffffffL;
+            s = 55U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_7(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[48];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_384_from_bin(k, 7, buf, (int)sizeof(buf));
+            if (sp_384_cmp_7(k, p384_order2) < 0) {
+                sp_384_add_one_7(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[7];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384 inf;
+#endif
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_7(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_ecc_gen_k_7(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_base_7(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_7(infinity, point, p384_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_384_iszero_7(point->x) == 0) || (sp_384_iszero_7(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_7(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_384_point_free_7(infinity, 1, heap);
+#endif
+    sp_384_point_free_7(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    for (i=0; i<6; i++) {
+        r[i+1] += r[i] >> 55;
+        r[i] &= 0x7fffffffffffffL;
+    }
+    j = 384 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<7 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 55) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 55);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[7];
+#endif
+    sp_point_384* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 48U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 7, priv);
+        sp_384_point_from_ecc_point_7(point, pub);
+            err = sp_384_ecc_mulmod_7(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_384_to_bin(point->x, out);
+        *outLen = 48;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_d_7(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int128_t tb = b;
+    int128_t t = 0;
+    int i;
+
+    for (i = 0; i < 7; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x7fffffffffffffL;
+        t >>= 55;
+    }
+    r[7] = (sp_digit)t;
+#else
+    int128_t tb = b;
+    int128_t t[7];
+
+    t[ 0] = tb * a[ 0];
+    t[ 1] = tb * a[ 1];
+    t[ 2] = tb * a[ 2];
+    t[ 3] = tb * a[ 3];
+    t[ 4] = tb * a[ 4];
+    t[ 5] = tb * a[ 5];
+    t[ 6] = tb * a[ 6];
+    r[ 0] =                           (t[ 0] & 0x7fffffffffffffL);
+    r[ 1] = (sp_digit)(t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL);
+    r[ 2] = (sp_digit)(t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL);
+    r[ 3] = (sp_digit)(t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL);
+    r[ 4] = (sp_digit)(t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL);
+    r[ 5] = (sp_digit)(t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL);
+    r[ 6] = (sp_digit)(t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL);
+    r[ 7] = (sp_digit)(t[ 6] >> 55);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_384_div_word_7(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t;
+
+    /* All 55 bits from d1 and top 8 bits from d0. */
+    d = (d1 << 8) | (d0 >> 47);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 9 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 39) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 17 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 31) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 25 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 23) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 33 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 15) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 41 bits in r */
+    /* Next 8 bits from d0. */
+    r <<= 8;
+    d <<= 8;
+    d |= (d0 >> 7) & ((1 << 8) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 49 bits in r */
+    /* Remaining 7 bits from d0. */
+    r <<= 7;
+    d <<= 7;
+    d |= d0 & ((1 << 7) - 1);
+    t = d / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Number to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_div_7(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_64
+    int128_t d1;
+#endif
+    sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* td;
+#else
+    sp_digit t1d[14], t2d[7 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 7 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = td;
+        t2 = td + 2 * 7;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[6];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 7U);
+        for (i=6; i>=0; i--) {
+            t1[7 + i] += t1[7 + i - 1] >> 55;
+            t1[7 + i - 1] &= 0x7fffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+            d1 = t1[7 + i];
+            d1 <<= 55;
+            d1 += t1[7 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_384_div_word_7(t1[7 + i], t1[7 + i - 1], dv);
+#endif
+
+            sp_384_mul_d_7(t2, d, r1);
+            (void)sp_384_sub_7(&t1[i], &t1[i], t2);
+            t1[7 + i] -= t2[7];
+            t1[7 + i] += t1[7 + i - 1] >> 55;
+            t1[7 + i - 1] &= 0x7fffffffffffffL;
+            r1 = (((-t1[7 + i]) << 55) - t1[7 + i - 1]) / dv;
+            r1++;
+            sp_384_mul_d_7(t2, d, r1);
+            (void)sp_384_add_7(&t1[i], &t1[i], t2);
+            t1[7 + i] += t1[7 + i - 1] >> 55;
+            t1[7 + i - 1] &= 0x7fffffffffffffL;
+        }
+        t1[7 - 1] += t1[7 - 2] >> 55;
+        t1[7 - 2] &= 0x7fffffffffffffL;
+        r1 = t1[7 - 1] / dv;
+
+        sp_384_mul_d_7(t2, d, r1);
+        (void)sp_384_sub_7(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 7U);
+        for (i=0; i<5; i++) {
+            r[i+1] += r[i] >> 55;
+            r[i] &= 0x7fffffffffffffL;
+        }
+        sp_384_cond_add_7(r, r, d, 0 - ((r[6] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_mod_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_384_div_7(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint64_t p384_order_minus_2[6] = {
+    0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU,
+    0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint64_t p384_order_low[3] = {
+    0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU
+    
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_7(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_384_mul_7(r, a, b);
+    sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_order_7(sp_digit* r, const sp_digit* a)
+{
+    sp_384_sqr_7(r, a);
+    sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_n_order_7(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_384_mont_sqr_order_7(r, a);
+    for (i=1; i<n; i++) {
+        sp_384_mont_sqr_order_7(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_order_7(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 7);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_order_7(t, t);
+        if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+            sp_384_mont_mul_order_7(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 7U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 7;
+    sp_digit* t3 = td + 4 * 7;
+    int i;
+
+    /* t = a^2 */
+    sp_384_mont_sqr_order_7(t, a);
+    /* t = a^3 = t * a */
+    sp_384_mont_mul_order_7(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_384_mont_sqr_n_order_7(t2, t, 2);
+    /* t = a^f = t2 * t */
+    sp_384_mont_mul_order_7(t, t2, t);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_384_mont_sqr_n_order_7(t2, t, 4);
+    /* t = a^ff = t2 * t */
+    sp_384_mont_mul_order_7(t, t2, t);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_384_mont_sqr_n_order_7(t2, t, 8);
+    /* t3= a^ffff = t2 * t */
+    sp_384_mont_mul_order_7(t3, t2, t);
+    /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+    sp_384_mont_sqr_n_order_7(t2, t3, 16);
+    /* t = a^ffffffff = t2 * t3 */
+    sp_384_mont_mul_order_7(t, t2, t3);
+    /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
+    sp_384_mont_sqr_n_order_7(t2, t, 16);
+    /* t = a^ffffffffffff = t2 * t3 */
+    sp_384_mont_mul_order_7(t, t2, t3);
+    /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
+    sp_384_mont_sqr_n_order_7(t2, t, 48);
+    /* t= a^fffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_7(t, t2, t);
+    /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_order_7(t2, t, 96);
+    /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_7(t2, t2, t);
+    for (i=191; i>=1; i--) {
+        sp_384_mont_sqr_order_7(t2, t2);
+        if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+            sp_384_mont_mul_order_7(t2, t2, a);
+        }
+    }
+    sp_384_mont_sqr_order_7(t2, t2);
+    sp_384_mont_mul_order_7(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 384 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*7];
+    sp_digit xd[2*7];
+    sp_digit kd[2*7];
+    sp_digit rd[2*7];
+    sp_digit td[3 * 2*7];
+    sp_point_384 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int64_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_384_point_new_7(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 7;
+        x = d + 2 * 7;
+        k = d + 4 * 7;
+        r = d + 6 * 7;
+        tmp = d + 8 * 7;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(e, 7, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_384_from_mp(x, 7, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_384_ecc_gen_k_7(rng, k);
+        }
+        else {
+            sp_384_from_mp(k, 7, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_384_ecc_mulmod_base_7(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 7U);
+            sp_384_norm_7(r);
+            c = sp_384_cmp_7(r, p384_order);
+            sp_384_cond_sub_7(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_7(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_384_mul_7(k, k, p384_norm_order);
+            err = sp_384_mod_7(k, k, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_7(k);
+            /* kInv = 1/k mod order */
+                sp_384_mont_inv_order_7(kInv, k, tmp);
+            sp_384_norm_7(kInv);
+
+            /* s = r * x + e */
+                sp_384_mul_7(x, x, r);
+            err = sp_384_mod_7(x, x, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_7(x);
+            carry = sp_384_add_7(s, e, x);
+            sp_384_cond_sub_7(s, s, p384_order, 0 - carry);
+            sp_384_norm_7(s);
+            c = sp_384_cmp_7(s, p384_order);
+            sp_384_cond_sub_7(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_7(s);
+
+            /* s = s * k^-1 mod order */
+                sp_384_mont_mul_order_7(s, s, kInv);
+            sp_384_norm_7(s);
+
+            /* Check that signature is usable. */
+            if (sp_384_iszero_7(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 7);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 7U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 7U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 7U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 7U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 7U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 7U);
+#endif
+    sp_384_point_free_7(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 384)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*7];
+    sp_digit u2d[2*7];
+    sp_digit sd[2*7];
+    sp_digit tmpd[2*7 * 5];
+    sp_point_384 p1d;
+    sp_point_384 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* p1;
+    sp_point_384* p2 = NULL;
+    sp_digit carry;
+    int64_t c;
+    int err;
+
+    err = sp_384_point_new_7(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 7;
+        u2  = d + 2 * 7;
+        s   = d + 4 * 7;
+        tmp = d + 6 * 7;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(u1, 7, hash, (int)hashLen);
+        sp_384_from_mp(u2, 7, r);
+        sp_384_from_mp(s, 7, sm);
+        sp_384_from_mp(p2->x, 7, pX);
+        sp_384_from_mp(p2->y, 7, pY);
+        sp_384_from_mp(p2->z, 7, pZ);
+
+        {
+            sp_384_mul_7(s, s, p384_norm_order);
+        }
+        err = sp_384_mod_7(s, s, p384_order);
+    }
+    if (err == MP_OKAY) {
+        sp_384_norm_7(s);
+        {
+            sp_384_mont_inv_order_7(s, s, tmp);
+            sp_384_mont_mul_order_7(u1, u1, s);
+            sp_384_mont_mul_order_7(u2, u2, s);
+        }
+
+            err = sp_384_ecc_mulmod_base_7(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_7(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_384_proj_point_add_7(p1, p1, p2, tmp);
+            if (sp_384_iszero_7(p1->z)) {
+                if (sp_384_iszero_7(p1->x) && sp_384_iszero_7(p1->y)) {
+                    sp_384_proj_point_dbl_7(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_384_from_mp(u2, 7, r);
+        err = sp_384_mod_mul_norm_7(u2, u2, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_384_mont_sqr_7(p1->z, p1->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, p384_mp_mod);
+        *res = (int)(sp_384_cmp_7(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_384_from_mp(u2, 7, r);
+            carry = sp_384_add_7(u2, u2, p384_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_384_norm_7(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_384_cmp_7(u2, p384_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_384_mod_mul_norm_7(u2, u2, p384_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_384_mont_mul_7(u1, u2, p1->z, p384_mod,
+                                                                  p384_mp_mod);
+                        *res = (int)(sp_384_cmp_7(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_7(p1, 0, heap);
+    sp_384_point_free_7(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_7(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*7];
+    sp_digit t2d[2*7];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 7;
+        t2 = d + 2 * 7;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_384_sqr_7(t1, point->y);
+        (void)sp_384_mod_7(t1, t1, p384_mod);
+        sp_384_sqr_7(t2, point->x);
+        (void)sp_384_mod_7(t2, t2, p384_mod);
+        sp_384_mul_7(t2, t2, point->x);
+        (void)sp_384_mod_7(t2, t2, p384_mod);
+        (void)sp_384_sub_7(t2, p384_mod, t2);
+        sp_384_mont_add_7(t1, t1, t2, p384_mod);
+
+        sp_384_mont_add_7(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_7(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_7(t1, t1, point->x, p384_mod);
+
+        if (sp_384_cmp_7(t1, p384_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 pubd;
+#endif
+    sp_point_384* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_7(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_384_from_mp(pub->x, 7, pX);
+        sp_384_from_mp(pub->y, 7, pY);
+        sp_384_from_bin(pub->z, 7, one, (int)sizeof(one));
+
+        err = sp_384_ecc_is_point_7(pub, NULL);
+    }
+
+    sp_384_point_free_7(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[7];
+    sp_point_384 pubd;
+    sp_point_384 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_384* pub;
+    sp_point_384* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_7(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_384_from_mp(pub->x, 7, pX);
+        sp_384_from_mp(pub->y, 7, pY);
+        sp_384_from_bin(pub->z, 7, one, (int)sizeof(one));
+        sp_384_from_mp(priv, 7, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_384_iszero_7(pub->x) != 0) &&
+            (sp_384_iszero_7(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_384_cmp_7(pub->x, p384_mod) >= 0 ||
+            sp_384_cmp_7(pub->y, p384_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_384_ecc_is_point_7(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_384_ecc_mulmod_7(p, pub, p384_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_384_iszero_7(p->x) == 0) ||
+            (sp_384_iszero_7(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_384_ecc_mulmod_base_7(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_384_cmp_7(p->x, pub->x) != 0 ||
+            sp_384_cmp_7(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(p, 0, heap);
+    sp_384_point_free_7(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 7 * 5];
+    sp_point_384 pd;
+    sp_point_384 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    sp_point_384* q = NULL;
+    int err;
+
+    err = sp_384_point_new_7(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_7(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 7, pX);
+        sp_384_from_mp(p->y, 7, pY);
+        sp_384_from_mp(p->z, 7, pZ);
+        sp_384_from_mp(q->x, 7, qX);
+        sp_384_from_mp(q->y, 7, qY);
+        sp_384_from_mp(q->z, 7, qZ);
+
+            sp_384_proj_point_add_7(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(q, 0, NULL);
+    sp_384_point_free_7(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 7 * 2];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_7(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 7, pX);
+        sp_384_from_mp(p->y, 7, pY);
+        sp_384_from_mp(p->z, 7, pZ);
+
+            sp_384_proj_point_dbl_7(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 7 * 6];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_7(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 7, pX);
+        sp_384_from_mp(p->y, 7, pY);
+        sp_384_from_mp(p->z, 7, pZ);
+
+        sp_384_map_7(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_7(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_7(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 7];
+    sp_digit t2d[2 * 7];
+    sp_digit t3d[2 * 7];
+    sp_digit t4d[2 * 7];
+    sp_digit t5d[2 * 7];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* t3;
+    sp_digit* t4;
+    sp_digit* t5;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 7, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 7;
+        t2 = d + 2 * 7;
+        t3 = d + 4 * 7;
+        t4 = d + 6 * 7;
+        t5 = d + 8 * 7;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        t3 = t3d;
+        t4 = t4d;
+        t5 = t5d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_384_mont_mul_7(t1, t2, y, p384_mod, p384_mp_mod);
+            /* t5 = y ^ 0xc */
+            sp_384_mont_sqr_n_7(t5, t1, 2, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_384_mont_mul_7(t1, t1, t5, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x1e */
+            sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x1f */
+            sp_384_mont_mul_7(t3, t2, y, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3e0 */
+            sp_384_mont_sqr_n_7(t2, t3, 5, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3ff */
+            sp_384_mont_mul_7(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fe0 */
+            sp_384_mont_sqr_n_7(t2, t1, 5, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x7fff */
+            sp_384_mont_mul_7(t3, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fff800 */
+            sp_384_mont_sqr_n_7(t2, t3, 15, p384_mod, p384_mp_mod);
+            /* t4 = y ^ 0x3ffffff */
+            sp_384_mont_mul_7(t4, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffc000000 */
+            sp_384_mont_sqr_n_7(t2, t4, 30, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffff */
+            sp_384_mont_mul_7(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+            sp_384_mont_sqr_n_7(t2, t1, 60, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+            sp_384_mont_sqr_n_7(t2, t1, 120, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+            sp_384_mont_sqr_n_7(t2, t1, 15, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_7(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+            sp_384_mont_sqr_n_7(t2, t1, 31, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+            sp_384_mont_mul_7(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+            sp_384_mont_sqr_n_7(t2, t1, 4, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+            sp_384_mont_mul_7(t1, t5, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+            sp_384_mont_sqr_n_7(t2, t1, 62, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+            sp_384_mont_mul_7(t1, y, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+            sp_384_mont_sqr_n_7(y, t1, 30, p384_mod, p384_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 7];
+    sp_digit yd[2 * 7];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 7, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 7;
+        y = d + 2 * 7;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_384_from_mp(x, 7, xm);
+        err = sp_384_mod_mul_norm_7(x, x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_384_mont_sqr_7(y, x, p384_mod, p384_mp_mod);
+            sp_384_mont_mul_7(y, y, x, p384_mod, p384_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_384_mont_sub_7(y, y, x, p384_mod);
+        sp_384_mont_sub_7(y, y, x, p384_mod);
+        sp_384_mont_sub_7(y, y, x, p384_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_384_mod_mul_norm_7(x, p384_b, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_384_mont_add_7(y, y, x, p384_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_384_mont_sqrt_7(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 7, 0, 7U * sizeof(sp_digit));
+        sp_384_mont_reduce_7(y, p384_mod, p384_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_384_mont_sub_7(y, p384_mod, y, p384_mod);
+        }
+
+        err = sp_384_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
 #endif /* WOLFSSL_HAVE_SP_ECC */
 #endif /* SP_WORD_SIZE == 64 */
 #endif /* !WOLFSSL_SP_ASM */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/sp_cortexm.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,25688 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+                                    defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef __IAR_SYSTEMS_ICC__
+#define __asm__        asm
+#define __volatile__   volatile
+#endif /* __IAR_SYSTEMS_ICC__ */
+#ifdef __KEIL__
+#define __asm__        __asm
+#define __volatile__   volatile
+#endif
+
+#ifdef WOLFSSL_SP_ARM_CORTEX_M_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 2048 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<64 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[8];
+
+    __asm__ __volatile__ (
+        /* A[0] * B[0] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r8\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[tmp], #0]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * B[1] */
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* A[1] * B[0] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #4]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * B[2] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[1] * B[1] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[2] * B[0] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[tmp], #8]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * B[3] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[1] * B[2] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[2] * B[1] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[3] * B[0] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[tmp], #12]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * B[4] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[1] * B[3] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[2] * B[2] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[3] * B[1] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[4] * B[0] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #16]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * B[5] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[1] * B[4] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[2] * B[3] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[3] * B[2] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[4] * B[1] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * B[0] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[tmp], #20]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * B[6] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[1] * B[5] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[2] * B[4] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[3] * B[3] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[4] * B[2] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[5] * B[1] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * B[0] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[tmp], #24]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * B[7] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[1] * B[6] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[2] * B[5] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[3] * B[4] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[4] * B[3] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[5] * B[2] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[6] * B[1] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[7] * B[0] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #28]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[1] * B[7] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[2] * B[6] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[3] * B[5] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[4] * B[4] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * B[3] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[6] * B[2] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[7] * B[1] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[2] * B[7] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[3] * B[6] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[4] * B[5] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[5] * B[4] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * B[3] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[7] * B[2] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[3] * B[7] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[4] * B[6] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[5] * B[5] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[6] * B[4] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[7] * B[3] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[4] * B[7] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * B[6] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[6] * B[5] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[7] * B[4] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[5] * B[7] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * B[6] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[7] * B[5] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[6] * B[7] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[7] * B[6] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[7] * B[7] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r8\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        /* Transfer tmp to r */
+        "ldr	r3, [%[tmp], #0]\n\t"
+        "ldr	r4, [%[tmp], #4]\n\t"
+        "ldr	r5, [%[tmp], #8]\n\t"
+        "ldr	r6, [%[tmp], #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[tmp], #16]\n\t"
+        "ldr	r4, [%[tmp], #20]\n\t"
+        "ldr	r5, [%[tmp], #24]\n\t"
+        "ldr	r6, [%[tmp], #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+{
+    sp_digit tmp[8];
+    __asm__ __volatile__ (
+        /* A[0] * A[0] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "umull	r3, r4, r6, r6\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[tmp], #0]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * A[1] */
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #4]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * A[2] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[1] * A[1] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[tmp], #8]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * A[3] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[2] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adc	r5, r5, r11\n\t"
+        "str	r3, [%[tmp], #12]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * A[4] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[3] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[2] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r4, r4, r9\n\t"
+        "adcs	r5, r5, r10\n\t"
+        "adc	r3, r3, r11\n\t"
+        "str	r4, [%[tmp], #16]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * A[5] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[4] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[3] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r5, r5, r9\n\t"
+        "adcs	r3, r3, r10\n\t"
+        "adc	r4, r4, r11\n\t"
+        "str	r5, [%[tmp], #20]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * A[6] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[5] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[4] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[3] * A[3] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adc	r5, r5, r11\n\t"
+        "str	r3, [%[tmp], #24]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * A[7] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[6] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[5] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[3] * A[4] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r4, r4, r9\n\t"
+        "adcs	r5, r5, r10\n\t"
+        "adc	r3, r3, r11\n\t"
+        "str	r4, [%[tmp], #28]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[1] * A[7] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[2] * A[6] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[3] * A[5] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[4] * A[4] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r5, r5, r9\n\t"
+        "adcs	r3, r3, r10\n\t"
+        "adc	r4, r4, r11\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[2] * A[7] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[3] * A[6] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[4] * A[5] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adc	r5, r5, r11\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[3] * A[7] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[4] * A[6] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[5] * A[5] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r4, r4, r9\n\t"
+        "adcs	r5, r5, r10\n\t"
+        "adc	r3, r3, r11\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[4] * A[7] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * A[6] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[5] * A[7] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * A[6] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[6] * A[7] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[7] * A[7] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r8\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        /* Transfer tmp to r */
+        "ldr	r3, [%[tmp], #0]\n\t"
+        "ldr	r4, [%[tmp], #4]\n\t"
+        "ldr	r5, [%[tmp], #8]\n\t"
+        "ldr	r6, [%[tmp], #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[tmp], #16]\n\t"
+        "ldr	r4, [%[tmp], #20]\n\t"
+        "ldr	r5, [%[tmp], #24]\n\t"
+        "ldr	r6, [%[tmp], #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<8; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[16];
+    sp_digit a1[8];
+    sp_digit b1[8];
+    sp_digit z2[16];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_8(a1, a, &a[8]);
+    cb = sp_2048_add_8(b1, b, &b[8]);
+    u  = ca & cb;
+    sp_2048_mul_8(z1, a1, b1);
+    sp_2048_mul_8(z2, &a[8], &b[8]);
+    sp_2048_mul_8(z0, a, b);
+    sp_2048_mask_8(r + 16, a1, 0 - cb);
+    sp_2048_mask_8(b1, b1, 0 - ca);
+    u += sp_2048_add_8(r + 16, r + 16, b1);
+    u += sp_2048_sub_in_place_16(z1, z2);
+    u += sp_2048_sub_in_place_16(z1, z0);
+    u += sp_2048_add_16(r + 8, r + 8, z1);
+    r[24] = u;
+    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+    (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[16];
+    sp_digit z1[16];
+    sp_digit a1[8];
+    sp_digit u;
+
+    u = sp_2048_add_8(a1, a, &a[8]);
+    sp_2048_sqr_8(z1, a1);
+    sp_2048_sqr_8(z2, &a[8]);
+    sp_2048_sqr_8(z0, a);
+    sp_2048_mask_8(r + 16, a1, 0 - u);
+    u += sp_2048_add_8(r + 16, r + 16, r + 16);
+    u += sp_2048_sub_in_place_16(z1, z2);
+    u += sp_2048_sub_in_place_16(z1, z0);
+    u += sp_2048_add_16(r + 8, r + 8, z1);
+    r[24] = u;
+    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+    (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<16; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 16; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[32];
+    sp_digit a1[16];
+    sp_digit b1[16];
+    sp_digit z2[32];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_16(a1, a, &a[16]);
+    cb = sp_2048_add_16(b1, b, &b[16]);
+    u  = ca & cb;
+    sp_2048_mul_16(z1, a1, b1);
+    sp_2048_mul_16(z2, &a[16], &b[16]);
+    sp_2048_mul_16(z0, a, b);
+    sp_2048_mask_16(r + 32, a1, 0 - cb);
+    sp_2048_mask_16(b1, b1, 0 - ca);
+    u += sp_2048_add_16(r + 32, r + 32, b1);
+    u += sp_2048_sub_in_place_32(z1, z2);
+    u += sp_2048_sub_in_place_32(z1, z0);
+    u += sp_2048_add_32(r + 16, r + 16, z1);
+    r[48] = u;
+    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+    (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[32];
+    sp_digit z1[32];
+    sp_digit a1[16];
+    sp_digit u;
+
+    u = sp_2048_add_16(a1, a, &a[16]);
+    sp_2048_sqr_16(z1, a1);
+    sp_2048_sqr_16(z2, &a[16]);
+    sp_2048_sqr_16(z0, a);
+    sp_2048_mask_16(r + 32, a1, 0 - u);
+    u += sp_2048_add_16(r + 32, r + 32, r + 32);
+    u += sp_2048_sub_in_place_32(z1, z2);
+    u += sp_2048_sub_in_place_32(z1, z0);
+    u += sp_2048_add_32(r + 16, r + 16, z1);
+    r[48] = u;
+    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+    (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<32; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[64];
+    sp_digit a1[32];
+    sp_digit b1[32];
+    sp_digit z2[64];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_32(a1, a, &a[32]);
+    cb = sp_2048_add_32(b1, b, &b[32]);
+    u  = ca & cb;
+    sp_2048_mul_32(z1, a1, b1);
+    sp_2048_mul_32(z2, &a[32], &b[32]);
+    sp_2048_mul_32(z0, a, b);
+    sp_2048_mask_32(r + 64, a1, 0 - cb);
+    sp_2048_mask_32(b1, b1, 0 - ca);
+    u += sp_2048_add_32(r + 64, r + 64, b1);
+    u += sp_2048_sub_in_place_64(z1, z2);
+    u += sp_2048_sub_in_place_64(z1, z0);
+    u += sp_2048_add_64(r + 32, r + 32, z1);
+    r[96] = u;
+    XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+    (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[64];
+    sp_digit z1[64];
+    sp_digit a1[32];
+    sp_digit u;
+
+    u = sp_2048_add_32(a1, a, &a[32]);
+    sp_2048_sqr_32(z1, a1);
+    sp_2048_sqr_32(z2, &a[32]);
+    sp_2048_sqr_32(z0, a);
+    sp_2048_mask_32(r + 64, a1, 0 - u);
+    u += sp_2048_add_32(r + 64, r + 64, r + 64);
+    u += sp_2048_sub_in_place_64(z1, z2);
+    u += sp_2048_sub_in_place_64(z1, z0);
+    u += sp_2048_add_64(r + 32, r + 32, z1);
+    r[96] = u;
+    XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+    (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #256\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #256\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[64 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #1\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    int i;
+
+    for (i=0; i<32; i++) {
+        r[i] = a[i] & m;
+    }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #128\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #128\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[32 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #128\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #124\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #124\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #128\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #256\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+    /* r = 2^n mod m */
+    sp_2048_sub_in_place_32(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #128\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #128\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #120\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+30] += m[30] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+31] += m[31] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[31] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[31] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #120\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_2048_mul_32(r, a, b);
+    sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_2048_sqr_32(r, a);
+    sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #128\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #124\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[64], t2[33];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[31];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+    for (i=31; i>=0; i--) {
+        r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+        sp_2048_mul_d_32(t2, d, r1);
+        t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+        t1[32 + i] -= t2[32];
+        sp_2048_mask_32(t2, d, t1[32 + i]);
+        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+        sp_2048_mask_32(t2, d, t1[32 + i]);
+        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_2048_cmp_32(t1, d) >= 0;
+    sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_2048_div_32(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][64];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 64;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_32(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_32(t[1] + 32, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_32(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+            err = sp_2048_mod_32(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+
+            sp_2048_mont_mul_32(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+        sp_2048_mont_reduce_32(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+        sp_2048_cond_sub_32(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][64];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 64;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_32(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_32(t[1] + 32, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_32(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+            err = sp_2048_mod_32(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+        sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+        sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+        sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+        sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+        sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+        sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+        sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+        sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+        sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+        sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+        sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+        sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+        sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+        sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+        sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+        sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+            sp_2048_mont_sqr_32(r, r, m, mp);
+
+            sp_2048_mont_mul_32(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+        sp_2048_mont_reduce_32(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+        sp_2048_cond_sub_32(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+    /* r = 2^n mod m */
+    sp_2048_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #1\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #256\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #248\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+62] += m[62] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+63] += m[63] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[63] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[63] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #248\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_2048_mul_64(r, a, b);
+    sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_2048_sqr_64(r, a);
+    sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<64; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #252\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[128], t2[65];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[63];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+    for (i=63; i>=0; i--) {
+        r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+        sp_2048_mul_d_64(t2, d, r1);
+        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+        t1[64 + i] -= t2[64];
+        sp_2048_mask_64(t2, d, t1[64 + i]);
+        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+        sp_2048_mask_64(t2, d, t1[64 + i]);
+        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_2048_cmp_64(t1, d) >= 0;
+    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_2048_div_64(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[128], t2[65];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[63];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+    for (i=63; i>=0; i--) {
+        r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+        sp_2048_mul_d_64(t2, d, r1);
+        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+        t1[64 + i] -= t2[64];
+        if (t1[64 + i] != 0) {
+            t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+            if (t1[64 + i] != 0)
+                t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_2048_cmp_64(t1, d) >= 0;
+    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_2048_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][128];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 128;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_64(t[1] + 64, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_64(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+            err = sp_2048_mod_64(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_mont_mul_64(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][128];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 128;
+        }
+#endif
+        norm = t[0];
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+        if (reduceA != 0) {
+            err = sp_2048_mod_64(t[1] + 64, a, m);
+            if (err == MP_OKAY) {
+                err = sp_2048_mod_64(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+            err = sp_2048_mod_64(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+        sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
+        sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+        sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
+        sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+        sp_2048_mont_sqr_64(t[20], t[10], m, mp);
+        sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
+        sp_2048_mont_sqr_64(t[22], t[11], m, mp);
+        sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
+        sp_2048_mont_sqr_64(t[24], t[12], m, mp);
+        sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
+        sp_2048_mont_sqr_64(t[26], t[13], m, mp);
+        sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
+        sp_2048_mont_sqr_64(t[28], t[14], m, mp);
+        sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
+        sp_2048_mont_sqr_64(t[30], t[15], m, mp);
+        sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_mont_mul_64(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 256 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128], m[64], r[128];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 256)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
+                                                     mp_count_bits(mm) != 2048))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 64 * 2;
+        m = r + 64 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 64;
+
+        sp_2048_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(m, 64, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_2048_sqr_64(r, ah);
+                err = sp_2048_mod_64_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_2048_mul_64(r, ah, r);
+                err = sp_2048_mod_64_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_2048_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 64);
+            err = sp_2048_mod_64_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 64);
+                for (i--; i>=0; i--) {
+                    sp_2048_mont_sqr_64(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_2048_mont_mul_64(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+                sp_2048_mont_reduce_64(r, m, mp);
+
+                for (i = 63; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_2048_sub_in_place_64(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 256U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 2048) {
+           err = MP_READ_E;
+        }
+        if (inLen > 256) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 64;
+        m = a + 128;
+        r = a;
+
+        sp_2048_from_bin(a, 64, in, inLen);
+        sp_2048_from_mp(d, 64, dm);
+        sp_2048_from_mp(m, 64, mm);
+        err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 64);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #128\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "adds	r5, %[c], #-1\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "adcs	r5, r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 256 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[64 * 2];
+    sp_digit p[32], q[32], dp[32];
+    sp_digit tmpa[64], tmpb[64];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 256)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 64 * 2;
+        q = p + 32;
+        qi = dq = dp = q + 32;
+        tmpa = qi + 32;
+        tmpb = tmpa + 64;
+
+        r = t + 64;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_2048_from_bin(a, 64, in, inLen);
+        sp_2048_from_mp(p, 32, pm);
+        sp_2048_from_mp(q, 32, qm);
+        sp_2048_from_mp(dp, 32, dpm);
+
+        err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(dq, 32, dqm);
+        err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_2048_sub_in_place_32(tmpa, tmpb);
+        c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
+        sp_2048_cond_add_32(tmpa, tmpa, p, c);
+
+        sp_2048_from_mp(qi, 32, qim);
+        sp_2048_mul_32(tmpa, tmpa, qi);
+        err = sp_2048_mod_32(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mul_32(tmpa, q, tmpa);
+        XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+        sp_2048_add_64(r, tmpb, tmpa);
+
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+        r->used = 64;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 64; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 64; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[128], e[64], m[64];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 2048) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 64, base);
+        sp_2048_from_mp(e, 64, exp);
+        sp_2048_from_mp(m, 64, mod);
+
+        err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_2048_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov r6, #31\n\t"
+        "sub r6, r6, %[n]\n\t"
+        "add       %[a], %[a], #192\n\t"
+        "add       %[r], %[r], #192\n\t"
+        "ldr r3, [%[a], #60]\n\t"
+        "lsr r4, r3, #1\n\t"
+        "lsl r3, r3, %[n]\n\t"
+        "lsr r4, r4, r6\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r2, [%[a], #60]\n\t"
+        "str       r4, [%[r], #68]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #56]\n\t"
+        "str       r3, [%[r], #64]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #52]\n\t"
+        "str       r2, [%[r], #60]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #48]\n\t"
+        "str       r4, [%[r], #56]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #44]\n\t"
+        "str       r3, [%[r], #52]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #40]\n\t"
+        "str       r2, [%[r], #48]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #36]\n\t"
+        "str       r4, [%[r], #44]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #32]\n\t"
+        "str       r3, [%[r], #40]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #28]\n\t"
+        "str       r2, [%[r], #36]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #24]\n\t"
+        "str       r4, [%[r], #32]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #20]\n\t"
+        "str       r3, [%[r], #28]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #16]\n\t"
+        "str       r2, [%[r], #24]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #12]\n\t"
+        "str       r4, [%[r], #20]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #8]\n\t"
+        "str       r3, [%[r], #16]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #4]\n\t"
+        "str       r2, [%[r], #12]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #0]\n\t"
+        "str       r4, [%[r], #8]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r4, [%[a], #60]\n\t"
+        "str       r3, [%[r], #68]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #56]\n\t"
+        "str       r2, [%[r], #64]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #52]\n\t"
+        "str       r4, [%[r], #60]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #48]\n\t"
+        "str       r3, [%[r], #56]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #44]\n\t"
+        "str       r2, [%[r], #52]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #40]\n\t"
+        "str       r4, [%[r], #48]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #36]\n\t"
+        "str       r3, [%[r], #44]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #32]\n\t"
+        "str       r2, [%[r], #40]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #28]\n\t"
+        "str       r4, [%[r], #36]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #24]\n\t"
+        "str       r3, [%[r], #32]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #20]\n\t"
+        "str       r2, [%[r], #28]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #16]\n\t"
+        "str       r4, [%[r], #24]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #12]\n\t"
+        "str       r3, [%[r], #20]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #8]\n\t"
+        "str       r2, [%[r], #16]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #4]\n\t"
+        "str       r4, [%[r], #12]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #0]\n\t"
+        "str       r3, [%[r], #8]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r3, [%[a], #60]\n\t"
+        "str       r2, [%[r], #68]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "str r3, [%[r]]\n\t"
+        "str r4, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[128];
+    sp_digit td[65];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 128;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_2048_mont_setup(m, &mp);
+        sp_2048_mont_norm_64(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_2048_lshift_64(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+            sp_2048_mont_sqr_64(r, r, m, mp);
+
+            sp_2048_lshift_64(r, r, y);
+            sp_2048_mul_d_64(tmp, norm, r[64]);
+            r[64] = 0;
+            o = sp_2048_add_64(r, r, tmp);
+            sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+        sp_2048_mont_reduce_64(r, m, mp);
+
+        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+        sp_2048_cond_sub_64(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 256 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[128], e[64], m[64];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 2048) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 256) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 2048) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 64, base);
+        sp_2048_from_bin(e, 64, exp, expLen);
+        sp_2048_from_mp(m, 64, mod);
+
+    #ifdef HAVE_FFDHE_2048
+        if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+            err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_to_bin(r, out);
+        *outLen = 256;
+        for (i=0; i<256 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[64], e[32], m[32];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1024) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 32, base);
+        sp_2048_from_mp(e, 32, exp);
+        sp_2048_from_mp(m, 32, mod);
+
+        err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 32, 0, sizeof(*r) * 32U);
+        err = sp_2048_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 3072 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<96 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[12 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #96\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #92\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #96\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+    r[8] = a[8] & m;
+    r[9] = a[9] & m;
+    r[10] = a[10] & m;
+    r[11] = a[11] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[24];
+    sp_digit a1[12];
+    sp_digit b1[12];
+    sp_digit z2[24];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_12(a1, a, &a[12]);
+    cb = sp_3072_add_12(b1, b, &b[12]);
+    u  = ca & cb;
+    sp_3072_mul_12(z1, a1, b1);
+    sp_3072_mul_12(z2, &a[12], &b[12]);
+    sp_3072_mul_12(z0, a, b);
+    sp_3072_mask_12(r + 24, a1, 0 - cb);
+    sp_3072_mask_12(b1, b1, 0 - ca);
+    u += sp_3072_add_12(r + 24, r + 24, b1);
+    u += sp_3072_sub_in_place_24(z1, z2);
+    u += sp_3072_sub_in_place_24(z1, z0);
+    u += sp_3072_add_24(r + 12, r + 12, z1);
+    r[36] = u;
+    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+    (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[24];
+    sp_digit z1[24];
+    sp_digit a1[12];
+    sp_digit u;
+
+    u = sp_3072_add_12(a1, a, &a[12]);
+    sp_3072_sqr_12(z1, a1);
+    sp_3072_sqr_12(z2, &a[12]);
+    sp_3072_sqr_12(z0, a);
+    sp_3072_mask_12(r + 24, a1, 0 - u);
+    u += sp_3072_add_12(r + 24, r + 24, r + 24);
+    u += sp_3072_sub_in_place_24(z1, z2);
+    u += sp_3072_sub_in_place_24(z1, z0);
+    u += sp_3072_add_24(r + 12, r + 12, z1);
+    r[36] = u;
+    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+    (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<24; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 24; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[48];
+    sp_digit a1[24];
+    sp_digit b1[24];
+    sp_digit z2[48];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_24(a1, a, &a[24]);
+    cb = sp_3072_add_24(b1, b, &b[24]);
+    u  = ca & cb;
+    sp_3072_mul_24(z1, a1, b1);
+    sp_3072_mul_24(z2, &a[24], &b[24]);
+    sp_3072_mul_24(z0, a, b);
+    sp_3072_mask_24(r + 48, a1, 0 - cb);
+    sp_3072_mask_24(b1, b1, 0 - ca);
+    u += sp_3072_add_24(r + 48, r + 48, b1);
+    u += sp_3072_sub_in_place_48(z1, z2);
+    u += sp_3072_sub_in_place_48(z1, z0);
+    u += sp_3072_add_48(r + 24, r + 24, z1);
+    r[72] = u;
+    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+    (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[48];
+    sp_digit z1[48];
+    sp_digit a1[24];
+    sp_digit u;
+
+    u = sp_3072_add_24(a1, a, &a[24]);
+    sp_3072_sqr_24(z1, a1);
+    sp_3072_sqr_24(z2, &a[24]);
+    sp_3072_sqr_24(z0, a);
+    sp_3072_mask_24(r + 48, a1, 0 - u);
+    u += sp_3072_add_24(r + 48, r + 48, r + 48);
+    u += sp_3072_sub_in_place_48(z1, z2);
+    u += sp_3072_sub_in_place_48(z1, z0);
+    u += sp_3072_add_48(r + 24, r + 24, z1);
+    r[72] = u;
+    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+    (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<48; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 48; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[96];
+    sp_digit a1[48];
+    sp_digit b1[48];
+    sp_digit z2[96];
+    sp_digit u, ca, cb;
+
+    ca = sp_3072_add_48(a1, a, &a[48]);
+    cb = sp_3072_add_48(b1, b, &b[48]);
+    u  = ca & cb;
+    sp_3072_mul_48(z1, a1, b1);
+    sp_3072_mul_48(z2, &a[48], &b[48]);
+    sp_3072_mul_48(z0, a, b);
+    sp_3072_mask_48(r + 96, a1, 0 - cb);
+    sp_3072_mask_48(b1, b1, 0 - ca);
+    u += sp_3072_add_48(r + 96, r + 96, b1);
+    u += sp_3072_sub_in_place_96(z1, z2);
+    u += sp_3072_sub_in_place_96(z1, z0);
+    u += sp_3072_add_96(r + 48, r + 48, z1);
+    r[144] = u;
+    XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+    (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[96];
+    sp_digit z1[96];
+    sp_digit a1[48];
+    sp_digit u;
+
+    u = sp_3072_add_48(a1, a, &a[48]);
+    sp_3072_sqr_48(z1, a1);
+    sp_3072_sqr_48(z2, &a[48]);
+    sp_3072_sqr_48(z0, a);
+    sp_3072_mask_48(r + 96, a1, 0 - u);
+    u += sp_3072_add_48(r + 96, r + 96, r + 96);
+    u += sp_3072_sub_in_place_96(z1, z2);
+    u += sp_3072_sub_in_place_96(z1, z0);
+    u += sp_3072_add_96(r + 48, r + 48, z1);
+    r[144] = u;
+    XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+    (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #384\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #384\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[96 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #128\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #124\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #124\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #128\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #2\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+    int i;
+
+    for (i=0; i<48; i++) {
+        r[i] = a[i] & m;
+    }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #192\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #192\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[48 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #192\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #188\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #120\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #128\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #188\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #192\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #120\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #1\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, r3, #124\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #128\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #384\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+    /* r = 2^n mod m */
+    sp_3072_sub_in_place_48(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #192\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #192\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #184\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+46] += m[46] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+47] += m[47] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[47] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[47] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #184\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_3072_mul_48(r, a, b);
+    sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_3072_sqr_48(r, a);
+    sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #192\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #188\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[96], t2[49];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[47];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+    for (i=47; i>=0; i--) {
+        r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+        sp_3072_mul_d_48(t2, d, r1);
+        t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+        t1[48 + i] -= t2[48];
+        sp_3072_mask_48(t2, d, t1[48 + i]);
+        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+        sp_3072_mask_48(t2, d, t1[48 + i]);
+        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_3072_cmp_48(t1, d) >= 0;
+    sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_48(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][96];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 96;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_48(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_48(t[1] + 48, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_48(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+            err = sp_3072_mod_48(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+
+            sp_3072_mont_mul_48(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+        sp_3072_mont_reduce_48(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+        sp_3072_cond_sub_48(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][96];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 96;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_48(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_48(t[1] + 48, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_48(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+            err = sp_3072_mod_48(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+        sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+        sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+        sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+        sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+        sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+        sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+        sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+        sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+        sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+        sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+        sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+        sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+        sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+        sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+        sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+        sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+            sp_3072_mont_sqr_48(r, r, m, mp);
+
+            sp_3072_mont_mul_48(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+        sp_3072_mont_reduce_48(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+        sp_3072_cond_sub_48(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 96);
+
+    /* r = 2^n mod m */
+    sp_3072_sub_in_place_96(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #1\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "add	r5, r5, #128\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #384\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #376\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+94] += m[94] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+95] += m[95] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[95] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[95] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #376\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_3072_mul_96(r, a, b);
+    sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_3072_sqr_96(r, a);
+    sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<96; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 96; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #124\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[192], t2[97];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[95];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+    for (i=95; i>=0; i--) {
+        r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+        sp_3072_mul_d_96(t2, d, r1);
+        t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+        t1[96 + i] -= t2[96];
+        sp_3072_mask_96(t2, d, t1[96 + i]);
+        t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+        sp_3072_mask_96(t2, d, t1[96 + i]);
+        t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_3072_cmp_96(t1, d) >= 0;
+    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_96(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[192], t2[97];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[95];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+    for (i=95; i>=0; i--) {
+        r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+        sp_3072_mul_d_96(t2, d, r1);
+        t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+        t1[96 + i] -= t2[96];
+        if (t1[96 + i] != 0) {
+            t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+            if (t1[96 + i] != 0)
+                t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_3072_cmp_96(t1, d) >= 0;
+    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_3072_div_96_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][192];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 192;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_96(t[1] + 96, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_96(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+            err = sp_3072_mod_96(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_mont_mul_96(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][192];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 192;
+        }
+#endif
+        norm = t[0];
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+        if (reduceA != 0) {
+            err = sp_3072_mod_96(t[1] + 96, a, m);
+            if (err == MP_OKAY) {
+                err = sp_3072_mod_96(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+            err = sp_3072_mod_96(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+        sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+        sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+        sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+        sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+        sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+        sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+        sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+        sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+        sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+        sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
+        sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
+        sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
+        sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
+        sp_3072_mont_sqr_96(t[20], t[10], m, mp);
+        sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
+        sp_3072_mont_sqr_96(t[22], t[11], m, mp);
+        sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
+        sp_3072_mont_sqr_96(t[24], t[12], m, mp);
+        sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
+        sp_3072_mont_sqr_96(t[26], t[13], m, mp);
+        sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
+        sp_3072_mont_sqr_96(t[28], t[14], m, mp);
+        sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
+        sp_3072_mont_sqr_96(t[30], t[15], m, mp);
+        sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_mont_mul_96(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 384 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[192], m[96], r[192];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 384)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
+                                                     mp_count_bits(mm) != 3072))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 96 * 2;
+        m = r + 96 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 96;
+
+        sp_3072_from_bin(ah, 96, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(m, 96, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_3072_sqr_96(r, ah);
+                err = sp_3072_mod_96_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_3072_mul_96(r, ah, r);
+                err = sp_3072_mod_96_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_3072_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 96);
+            err = sp_3072_mod_96_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 96);
+                for (i--; i>=0; i--) {
+                    sp_3072_mont_sqr_96(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_3072_mont_mul_96(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+                sp_3072_mont_reduce_96(r, m, mp);
+
+                for (i = 95; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_3072_sub_in_place_96(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 384U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 3072) {
+           err = MP_READ_E;
+        }
+        if (inLen > 384) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 96;
+        m = a + 192;
+        r = a;
+
+        sp_3072_from_bin(a, 96, in, inLen);
+        sp_3072_from_mp(d, 96, dm);
+        sp_3072_from_mp(m, 96, mm);
+        err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 96);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #192\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "adds	r5, %[c], #-1\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "adcs	r5, r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 384 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[96 * 2];
+    sp_digit p[48], q[48], dp[48];
+    sp_digit tmpa[96], tmpb[96];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 384)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 96 * 2;
+        q = p + 48;
+        qi = dq = dp = q + 48;
+        tmpa = qi + 48;
+        tmpb = tmpa + 96;
+
+        r = t + 96;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_3072_from_bin(a, 96, in, inLen);
+        sp_3072_from_mp(p, 48, pm);
+        sp_3072_from_mp(q, 48, qm);
+        sp_3072_from_mp(dp, 48, dpm);
+
+        err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(dq, 48, dqm);
+        err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_3072_sub_in_place_48(tmpa, tmpb);
+        c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
+        sp_3072_cond_add_48(tmpa, tmpa, p, c);
+
+        sp_3072_from_mp(qi, 48, qim);
+        sp_3072_mul_48(tmpa, tmpa, qi);
+        err = sp_3072_mod_48(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_mul_48(tmpa, q, tmpa);
+        XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
+        sp_3072_add_96(r, tmpb, tmpa);
+
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
+        r->used = 96;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 96; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 96; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[192], e[96], m[96];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 3072) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 96, base);
+        sp_3072_from_mp(e, 96, exp);
+        sp_3072_from_mp(m, 96, mod);
+
+        err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_3072_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov r6, #31\n\t"
+        "sub r6, r6, %[n]\n\t"
+        "add       %[a], %[a], #320\n\t"
+        "add       %[r], %[r], #320\n\t"
+        "ldr r3, [%[a], #60]\n\t"
+        "lsr r4, r3, #1\n\t"
+        "lsl r3, r3, %[n]\n\t"
+        "lsr r4, r4, r6\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r2, [%[a], #60]\n\t"
+        "str       r4, [%[r], #68]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #56]\n\t"
+        "str       r3, [%[r], #64]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #52]\n\t"
+        "str       r2, [%[r], #60]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #48]\n\t"
+        "str       r4, [%[r], #56]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #44]\n\t"
+        "str       r3, [%[r], #52]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #40]\n\t"
+        "str       r2, [%[r], #48]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #36]\n\t"
+        "str       r4, [%[r], #44]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #32]\n\t"
+        "str       r3, [%[r], #40]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #28]\n\t"
+        "str       r2, [%[r], #36]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #24]\n\t"
+        "str       r4, [%[r], #32]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #20]\n\t"
+        "str       r3, [%[r], #28]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #16]\n\t"
+        "str       r2, [%[r], #24]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #12]\n\t"
+        "str       r4, [%[r], #20]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #8]\n\t"
+        "str       r3, [%[r], #16]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #4]\n\t"
+        "str       r2, [%[r], #12]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #0]\n\t"
+        "str       r4, [%[r], #8]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r4, [%[a], #60]\n\t"
+        "str       r3, [%[r], #68]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #56]\n\t"
+        "str       r2, [%[r], #64]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #52]\n\t"
+        "str       r4, [%[r], #60]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #48]\n\t"
+        "str       r3, [%[r], #56]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #44]\n\t"
+        "str       r2, [%[r], #52]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #40]\n\t"
+        "str       r4, [%[r], #48]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #36]\n\t"
+        "str       r3, [%[r], #44]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #32]\n\t"
+        "str       r2, [%[r], #40]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #28]\n\t"
+        "str       r4, [%[r], #36]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #24]\n\t"
+        "str       r3, [%[r], #32]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #20]\n\t"
+        "str       r2, [%[r], #28]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #16]\n\t"
+        "str       r4, [%[r], #24]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #12]\n\t"
+        "str       r3, [%[r], #20]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #8]\n\t"
+        "str       r2, [%[r], #16]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #4]\n\t"
+        "str       r4, [%[r], #12]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #0]\n\t"
+        "str       r3, [%[r], #8]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r3, [%[a], #60]\n\t"
+        "str       r2, [%[r], #68]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r2, [%[a], #60]\n\t"
+        "str       r4, [%[r], #68]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #56]\n\t"
+        "str       r3, [%[r], #64]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #52]\n\t"
+        "str       r2, [%[r], #60]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #48]\n\t"
+        "str       r4, [%[r], #56]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #44]\n\t"
+        "str       r3, [%[r], #52]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #40]\n\t"
+        "str       r2, [%[r], #48]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #36]\n\t"
+        "str       r4, [%[r], #44]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #32]\n\t"
+        "str       r3, [%[r], #40]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #28]\n\t"
+        "str       r2, [%[r], #36]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #24]\n\t"
+        "str       r4, [%[r], #32]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #20]\n\t"
+        "str       r3, [%[r], #28]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #16]\n\t"
+        "str       r2, [%[r], #24]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #12]\n\t"
+        "str       r4, [%[r], #20]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #8]\n\t"
+        "str       r3, [%[r], #16]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #4]\n\t"
+        "str       r2, [%[r], #12]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #0]\n\t"
+        "str       r4, [%[r], #8]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r4, [%[a], #60]\n\t"
+        "str       r3, [%[r], #68]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #56]\n\t"
+        "str       r2, [%[r], #64]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #52]\n\t"
+        "str       r4, [%[r], #60]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #48]\n\t"
+        "str       r3, [%[r], #56]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #44]\n\t"
+        "str       r2, [%[r], #52]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #40]\n\t"
+        "str       r4, [%[r], #48]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #36]\n\t"
+        "str       r3, [%[r], #44]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #32]\n\t"
+        "str       r2, [%[r], #40]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #28]\n\t"
+        "str       r4, [%[r], #36]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #24]\n\t"
+        "str       r3, [%[r], #32]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #20]\n\t"
+        "str       r2, [%[r], #28]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #16]\n\t"
+        "str       r4, [%[r], #24]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #12]\n\t"
+        "str       r3, [%[r], #20]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #8]\n\t"
+        "str       r2, [%[r], #16]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #4]\n\t"
+        "str       r4, [%[r], #12]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #0]\n\t"
+        "str       r3, [%[r], #8]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "str r4, [%[r]]\n\t"
+        "str r2, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[192];
+    sp_digit td[97];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 192;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_3072_mont_setup(m, &mp);
+        sp_3072_mont_norm_96(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_3072_lshift_96(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+            sp_3072_mont_sqr_96(r, r, m, mp);
+
+            sp_3072_lshift_96(r, r, y);
+            sp_3072_mul_d_96(tmp, norm, r[96]);
+            r[96] = 0;
+            o = sp_3072_add_96(r, r, tmp);
+            sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+        sp_3072_mont_reduce_96(r, m, mp);
+
+        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+        sp_3072_cond_sub_96(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 384 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[192], e[96], m[96];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 3072) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 384) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 3072) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 96, base);
+        sp_3072_from_bin(e, 96, exp, expLen);
+        sp_3072_from_mp(m, 96, mod);
+
+    #ifdef HAVE_FFDHE_3072
+        if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
+            err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_to_bin(r, out);
+        *outLen = 384;
+        for (i=0; i<384 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[96], e[48], m[48];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 1536) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 48, base);
+        sp_3072_from_mp(e, 48, exp);
+        sp_3072_from_mp(m, 48, mod);
+
+        err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 48, 0, sizeof(*r) * 48U);
+        err = sp_3072_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 4096 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<128 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[64 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<64; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit* z0 = r;
+    sp_digit z1[128];
+    sp_digit a1[64];
+    sp_digit b1[64];
+    sp_digit z2[128];
+    sp_digit u, ca, cb;
+
+    ca = sp_2048_add_64(a1, a, &a[64]);
+    cb = sp_2048_add_64(b1, b, &b[64]);
+    u  = ca & cb;
+    sp_2048_mul_64(z1, a1, b1);
+    sp_2048_mul_64(z2, &a[64], &b[64]);
+    sp_2048_mul_64(z0, a, b);
+    sp_2048_mask_64(r + 128, a1, 0 - cb);
+    sp_2048_mask_64(b1, b1, 0 - ca);
+    u += sp_2048_add_64(r + 128, r + 128, b1);
+    u += sp_4096_sub_in_place_128(z1, z2);
+    u += sp_4096_sub_in_place_128(z1, z0);
+    u += sp_4096_add_128(r + 64, r + 64, z1);
+    r[192] = u;
+    XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+    (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #252\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #1\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+    sp_digit* z0 = r;
+    sp_digit z2[128];
+    sp_digit z1[128];
+    sp_digit a1[64];
+    sp_digit u;
+
+    u = sp_2048_add_64(a1, a, &a[64]);
+    sp_2048_sqr_64(z1, a1);
+    sp_2048_sqr_64(z2, &a[64]);
+    sp_2048_sqr_64(z0, a);
+    sp_2048_mask_64(r + 128, a1, 0 - u);
+    u += sp_2048_add_64(r + 128, r + 128, r + 128);
+    u += sp_4096_sub_in_place_128(z1, z2);
+    u += sp_4096_sub_in_place_128(z1, z0);
+    u += sp_4096_add_128(r + 64, r + 64, z1);
+    r[192] = u;
+    XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+    (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #512\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #512\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[128 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #252\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #4\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #252\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #2\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #3\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #248\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #3\n\t"
+        "lsl	r3, r3, #8\n\t"
+        "add	r3, r3, #252\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #4\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a    A single precision number.
+ * rho  Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+    sp_digit x, b;
+
+    b = a[0];
+    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
+    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
+
+    /* rho = -1/m mod b */
+    *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #512\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r  A single precision number.
+ * m  A single precision number.
+ */
+static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
+{
+    XMEMSET(r, 0, sizeof(sp_digit) * 128);
+
+    /* r = 2^n mod m */
+    sp_4096_sub_in_place_128(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #2\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #512\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #504\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+126] += m[126] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+127] += m[127] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[127] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[127] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #504\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_4096_mul_128(r, a, b);
+    sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_4096_sqr_128(r, a);
+    sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<128; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    int i;
+
+    for (i = 0; i < 128; i += 8) {
+        r[i+0] = a[i+0] & m;
+        r[i+1] = a[i+1] & m;
+        r[i+2] = a[i+2] & m;
+        r[i+3] = a[i+3] & m;
+        r[i+4] = a[i+4] & m;
+        r[i+5] = a[i+5] & m;
+        r[i+6] = a[i+6] & m;
+        r[i+7] = a[i+7] & m;
+    }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, r6, #252\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[256], t2[129];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[127];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+    for (i=127; i>=0; i--) {
+        r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+        sp_4096_mul_d_128(t2, d, r1);
+        t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+        t1[128 + i] -= t2[128];
+        sp_4096_mask_128(t2, d, t1[128 + i]);
+        t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+        sp_4096_mask_128(t2, d, t1[128 + i]);
+        t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_4096_cmp_128(t1, d) >= 0;
+    sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_128(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[256], t2[129];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[127];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+    for (i=127; i>=0; i--) {
+        r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+        sp_4096_mul_d_128(t2, d, r1);
+        t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+        t1[128 + i] -= t2[128];
+        if (t1[128 + i] != 0) {
+            t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+            if (t1[128 + i] != 0)
+                t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+        }
+    }
+
+    r1 = sp_4096_cmp_128(t1, d) >= 0;
+    sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_4096_div_128_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[16][256];
+#else
+    sp_digit* t[16];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<16; i++) {
+            t[i] = td + i * 256;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_128(t[1] + 128, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_128(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 4;
+        if (c == 32) {
+            c = 28;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+        for (; i>=0 || c>=4; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 28;
+                n <<= 4;
+                c = 28;
+            }
+            else if (c < 4) {
+                y = n >> 28;
+                n = e[i--];
+                c = 4 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 28) & 0xf;
+                n <<= 4;
+                c -= 4;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_mont_mul_128(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * a     A single precision number being exponentiated.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+        int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit t[32][256];
+#else
+    sp_digit* t[32];
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit mp = 1;
+    sp_digit n;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        for (i=0; i<32; i++) {
+            t[i] = td + i * 256;
+        }
+#endif
+        norm = t[0];
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+        if (reduceA != 0) {
+            err = sp_4096_mod_128(t[1] + 128, a, m);
+            if (err == MP_OKAY) {
+                err = sp_4096_mod_128(t[1], t[1], m);
+            }
+        }
+        else {
+            XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128(t[1], t[1], m);
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+        sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+        sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+        sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+        sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+        sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+        sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+        sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+        sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+        sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+        sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+        sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+        sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+        sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+        sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
+        sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
+        sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
+        sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
+        sp_4096_mont_sqr_128(t[20], t[10], m, mp);
+        sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
+        sp_4096_mont_sqr_128(t[22], t[11], m, mp);
+        sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
+        sp_4096_mont_sqr_128(t[24], t[12], m, mp);
+        sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
+        sp_4096_mont_sqr_128(t[26], t[13], m, mp);
+        sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
+        sp_4096_mont_sqr_128(t[28], t[14], m, mp);
+        sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
+        sp_4096_mont_sqr_128(t[30], t[15], m, mp);
+        sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_mont_mul_128(r, r, t[y], m, mp);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * em      Public exponent.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[256], m[128], r[256];
+#else
+    sp_digit* d = NULL;
+    sp_digit* a;
+    sp_digit* m;
+    sp_digit* r;
+#endif
+    sp_digit *ah;
+    sp_digit e[1];
+    int err = MP_OKAY;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
+                                                     mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        a = d;
+        r = a + 128 * 2;
+        m = r + 128 * 2;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        ah = a + 128;
+
+        sp_4096_from_bin(ah, 128, in, inLen);
+#if DIGIT_BIT >= 32
+        e[0] = em->dp[0];
+#else
+        e[0] = em->dp[0];
+        if (em->used > 1) {
+            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+        }
+#endif
+        if (e[0] == 0) {
+            err = MP_EXPTMOD_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(m, 128, mm);
+
+        if (e[0] == 0x3) {
+            if (err == MP_OKAY) {
+                sp_4096_sqr_128(r, ah);
+                err = sp_4096_mod_128_cond(r, r, m);
+            }
+            if (err == MP_OKAY) {
+                sp_4096_mul_128(r, ah, r);
+                err = sp_4096_mod_128_cond(r, r, m);
+            }
+        }
+        else {
+            int i;
+            sp_digit mp;
+
+            sp_4096_mont_setup(m, &mp);
+
+            /* Convert to Montgomery form. */
+            XMEMSET(a, 0, sizeof(sp_digit) * 128);
+            err = sp_4096_mod_128_cond(a, a, m);
+
+            if (err == MP_OKAY) {
+                for (i = 31; i >= 0; i--) {
+                    if (e[0] >> i) {
+                        break;
+                    }
+                }
+
+                XMEMCPY(r, a, sizeof(sp_digit) * 128);
+                for (i--; i>=0; i--) {
+                    sp_4096_mont_sqr_128(r, r, m, mp);
+                    if (((e[0] >> i) & 1) == 1) {
+                        sp_4096_mont_mul_128(r, r, a, m, mp);
+                    }
+                }
+                XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
+                sp_4096_mont_reduce_128(r, m, mp);
+
+                for (i = 127; i > 0; i--) {
+                    if (r[i] != m[i]) {
+                        break;
+                    }
+                }
+                if (r[i] >= m[i]) {
+                    sp_4096_sub_in_place_128(r, m);
+                }
+            }
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+#endif
+
+    return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+    sp_digit* a;
+    sp_digit* d = NULL;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+
+    (void)pm;
+    (void)qm;
+    (void)dpm;
+    (void)dqm;
+    (void)qim;
+
+    if (*outLen < 512U) {
+        err = MP_TO_E;
+    }
+    if (err == MP_OKAY) {
+        if (mp_count_bits(dm) > 4096) {
+           err = MP_READ_E;
+        }
+        if (inLen > 512) {
+            err = MP_READ_E;
+        }
+        if (mp_count_bits(mm) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+    if (err == MP_OKAY) {
+        a = d + 128;
+        m = a + 256;
+        r = a;
+
+        sp_4096_from_bin(a, 128, in, inLen);
+        sp_4096_from_mp(d, 128, dm);
+        sp_4096_from_mp(m, 128, mm);
+        err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 128);
+        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+    }
+
+    return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #1\n\t"
+        "lsl	r5, r5, #8\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "adds	r5, %[c], #-1\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "adcs	r5, r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* RSA private key operation.
+ *
+ * in      Array of bytes representing the number to exponentiate, base.
+ * inLen   Number of bytes in base.
+ * dm      Private exponent.
+ * pm      First prime.
+ * qm      Second prime.
+ * dpm     First prime's CRT exponent.
+ * dqm     Second prime's CRT exponent.
+ * qim     Inverse of second prime mod p.
+ * mm      Modulus.
+ * out     Buffer to hold big-endian bytes of exponentiation result.
+ *         Must be at least 512 bytes long.
+ * outLen  Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+    byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit a[128 * 2];
+    sp_digit p[64], q[64], dp[64];
+    sp_digit tmpa[128], tmpb[128];
+#else
+    sp_digit* t = NULL;
+    sp_digit* a;
+    sp_digit* p;
+    sp_digit* q;
+    sp_digit* dp;
+    sp_digit* tmpa;
+    sp_digit* tmpb;
+#endif
+    sp_digit* r;
+    sp_digit* qi;
+    sp_digit* dq;
+    sp_digit c;
+    int err = MP_OKAY;
+
+    (void)dm;
+    (void)mm;
+
+    if (*outLen < 512)
+        err = MP_TO_E;
+    if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+        err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
+                                                              DYNAMIC_TYPE_RSA);
+        if (t == NULL)
+            err = MEMORY_E;
+    }
+    if (err == MP_OKAY) {
+        a = t;
+        p = a + 128 * 2;
+        q = p + 64;
+        qi = dq = dp = q + 64;
+        tmpa = qi + 64;
+        tmpb = tmpa + 128;
+
+        r = t + 128;
+    }
+#else
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        r = a;
+        qi = dq = dp;
+#endif
+        sp_4096_from_bin(a, 128, in, inLen);
+        sp_4096_from_mp(p, 64, pm);
+        sp_4096_from_mp(q, 64, qm);
+        sp_4096_from_mp(dp, 64, dpm);
+
+        err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
+    }
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(dq, 64, dqm);
+        err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
+    }
+
+    if (err == MP_OKAY) {
+        c = sp_2048_sub_in_place_64(tmpa, tmpb);
+        c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
+        sp_4096_cond_add_64(tmpa, tmpa, p, c);
+
+        sp_2048_from_mp(qi, 64, qim);
+        sp_2048_mul_64(tmpa, tmpa, qi);
+        err = sp_2048_mod_64(tmpa, tmpa, p);
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_mul_64(tmpa, q, tmpa);
+        XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
+        sp_4096_add_128(r, tmpb, tmpa);
+
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
+        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+    }
+#else
+    XMEMSET(tmpa, 0, sizeof(tmpa));
+    XMEMSET(tmpb, 0, sizeof(tmpb));
+    XMEMSET(p,    0, sizeof(p));
+    XMEMSET(q,    0, sizeof(q));
+    XMEMSET(dp,   0, sizeof(dp));
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
+        r->used = 128;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 128; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 128; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[256], e[128], m[128];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expBits > 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 128, base);
+        sp_4096_from_mp(e, 128, exp);
+        sp_4096_from_mp(m, 128, mod);
+
+        err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_4096_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
+{
+    __asm__ __volatile__ (
+        "mov r6, #31\n\t"
+        "sub r6, r6, %[n]\n\t"
+        "add       %[a], %[a], #448\n\t"
+        "add       %[r], %[r], #448\n\t"
+        "ldr r3, [%[a], #60]\n\t"
+        "lsr r4, r3, #1\n\t"
+        "lsl r3, r3, %[n]\n\t"
+        "lsr r4, r4, r6\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r2, [%[a], #60]\n\t"
+        "str       r4, [%[r], #68]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #56]\n\t"
+        "str       r3, [%[r], #64]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #52]\n\t"
+        "str       r2, [%[r], #60]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #48]\n\t"
+        "str       r4, [%[r], #56]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #44]\n\t"
+        "str       r3, [%[r], #52]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #40]\n\t"
+        "str       r2, [%[r], #48]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #36]\n\t"
+        "str       r4, [%[r], #44]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #32]\n\t"
+        "str       r3, [%[r], #40]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #28]\n\t"
+        "str       r2, [%[r], #36]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #24]\n\t"
+        "str       r4, [%[r], #32]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #20]\n\t"
+        "str       r3, [%[r], #28]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #16]\n\t"
+        "str       r2, [%[r], #24]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #12]\n\t"
+        "str       r4, [%[r], #20]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #8]\n\t"
+        "str       r3, [%[r], #16]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #4]\n\t"
+        "str       r2, [%[r], #12]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #0]\n\t"
+        "str       r4, [%[r], #8]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r4, [%[a], #60]\n\t"
+        "str       r3, [%[r], #68]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #56]\n\t"
+        "str       r2, [%[r], #64]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #52]\n\t"
+        "str       r4, [%[r], #60]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #48]\n\t"
+        "str       r3, [%[r], #56]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #44]\n\t"
+        "str       r2, [%[r], #52]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #40]\n\t"
+        "str       r4, [%[r], #48]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #36]\n\t"
+        "str       r3, [%[r], #44]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #32]\n\t"
+        "str       r2, [%[r], #40]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #28]\n\t"
+        "str       r4, [%[r], #36]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #24]\n\t"
+        "str       r3, [%[r], #32]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #20]\n\t"
+        "str       r2, [%[r], #28]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #16]\n\t"
+        "str       r4, [%[r], #24]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #12]\n\t"
+        "str       r3, [%[r], #20]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #8]\n\t"
+        "str       r2, [%[r], #16]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #4]\n\t"
+        "str       r4, [%[r], #12]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #0]\n\t"
+        "str       r3, [%[r], #8]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r3, [%[a], #60]\n\t"
+        "str       r2, [%[r], #68]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r2, [%[a], #60]\n\t"
+        "str       r4, [%[r], #68]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #56]\n\t"
+        "str       r3, [%[r], #64]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #52]\n\t"
+        "str       r2, [%[r], #60]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #48]\n\t"
+        "str       r4, [%[r], #56]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #44]\n\t"
+        "str       r3, [%[r], #52]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #40]\n\t"
+        "str       r2, [%[r], #48]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #36]\n\t"
+        "str       r4, [%[r], #44]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #32]\n\t"
+        "str       r3, [%[r], #40]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #28]\n\t"
+        "str       r2, [%[r], #36]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #24]\n\t"
+        "str       r4, [%[r], #32]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #20]\n\t"
+        "str       r3, [%[r], #28]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #16]\n\t"
+        "str       r2, [%[r], #24]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #12]\n\t"
+        "str       r4, [%[r], #20]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #8]\n\t"
+        "str       r3, [%[r], #16]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #4]\n\t"
+        "str       r2, [%[r], #12]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #0]\n\t"
+        "str       r4, [%[r], #8]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r4, [%[a], #60]\n\t"
+        "str       r3, [%[r], #68]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #56]\n\t"
+        "str       r2, [%[r], #64]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #52]\n\t"
+        "str       r4, [%[r], #60]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #48]\n\t"
+        "str       r3, [%[r], #56]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #44]\n\t"
+        "str       r2, [%[r], #52]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #40]\n\t"
+        "str       r4, [%[r], #48]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #36]\n\t"
+        "str       r3, [%[r], #44]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #32]\n\t"
+        "str       r2, [%[r], #40]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #28]\n\t"
+        "str       r4, [%[r], #36]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #24]\n\t"
+        "str       r3, [%[r], #32]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #20]\n\t"
+        "str       r2, [%[r], #28]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #16]\n\t"
+        "str       r4, [%[r], #24]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #12]\n\t"
+        "str       r3, [%[r], #20]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #8]\n\t"
+        "str       r2, [%[r], #16]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #4]\n\t"
+        "str       r4, [%[r], #12]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #0]\n\t"
+        "str       r3, [%[r], #8]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r3, [%[a], #60]\n\t"
+        "str       r2, [%[r], #68]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #56]\n\t"
+        "str       r4, [%[r], #64]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #52]\n\t"
+        "str       r3, [%[r], #60]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #48]\n\t"
+        "str       r2, [%[r], #56]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #44]\n\t"
+        "str       r4, [%[r], #52]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #40]\n\t"
+        "str       r3, [%[r], #48]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #36]\n\t"
+        "str       r2, [%[r], #44]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #32]\n\t"
+        "str       r4, [%[r], #40]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #28]\n\t"
+        "str       r3, [%[r], #36]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #24]\n\t"
+        "str       r2, [%[r], #32]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #20]\n\t"
+        "str       r4, [%[r], #28]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #16]\n\t"
+        "str       r3, [%[r], #24]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #12]\n\t"
+        "str       r2, [%[r], #20]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #8]\n\t"
+        "str       r4, [%[r], #16]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #4]\n\t"
+        "str       r3, [%[r], #12]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #0]\n\t"
+        "str       r2, [%[r], #8]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "sub     %[a], %[a], #64\n\t"
+        "sub     %[r], %[r], #64\n\t"
+        "ldr       r2, [%[a], #60]\n\t"
+        "str       r4, [%[r], #68]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #56]\n\t"
+        "str       r3, [%[r], #64]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #52]\n\t"
+        "str       r2, [%[r], #60]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #48]\n\t"
+        "str       r4, [%[r], #56]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #44]\n\t"
+        "str       r3, [%[r], #52]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #40]\n\t"
+        "str       r2, [%[r], #48]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #36]\n\t"
+        "str       r4, [%[r], #44]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #32]\n\t"
+        "str       r3, [%[r], #40]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #28]\n\t"
+        "str       r2, [%[r], #36]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #24]\n\t"
+        "str       r4, [%[r], #32]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #20]\n\t"
+        "str       r3, [%[r], #28]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #16]\n\t"
+        "str       r2, [%[r], #24]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #12]\n\t"
+        "str       r4, [%[r], #20]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "ldr       r4, [%[a], #8]\n\t"
+        "str       r3, [%[r], #16]\n\t"
+        "lsr       r5, r4, #1\n\t"
+        "lsl       r4, r4, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r2, r2, r5\n\t"
+        "ldr       r3, [%[a], #4]\n\t"
+        "str       r2, [%[r], #12]\n\t"
+        "lsr       r5, r3, #1\n\t"
+        "lsl       r3, r3, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r4, r4, r5\n\t"
+        "ldr       r2, [%[a], #0]\n\t"
+        "str       r4, [%[r], #8]\n\t"
+        "lsr       r5, r2, #1\n\t"
+        "lsl       r2, r2, %[n]\n\t"
+        "lsr       r5, r5, r6\n\t"
+        "orr       r3, r3, r5\n\t"
+        "str r2, [%[r]]\n\t"
+        "str r3, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+        : "memory", "r2", "r3", "r4", "r5", "r6"
+    );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r     A single precision number that is the result of the operation.
+ * e     A single precision number that is the exponent.
+ * bits  The number of bits in the exponent.
+ * m     A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
+        const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit nd[256];
+    sp_digit td[129];
+#else
+    sp_digit* td;
+#endif
+    sp_digit* norm;
+    sp_digit* tmp;
+    sp_digit mp = 1;
+    sp_digit n, o;
+    sp_digit mask;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
+                            DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        norm = td;
+        tmp  = td + 256;
+#else
+        norm = nd;
+        tmp  = td;
+#endif
+
+        sp_4096_mont_setup(m, &mp);
+        sp_4096_mont_norm_128(norm, m);
+
+        i = (bits - 1) / 32;
+        n = e[i--];
+        c = bits & 31;
+        if (c == 0) {
+            c = 32;
+        }
+        c -= bits % 5;
+        if (c == 32) {
+            c = 27;
+        }
+        y = (int)(n >> c);
+        n <<= 32 - c;
+        sp_4096_lshift_128(r, norm, y);
+        for (; i>=0 || c>=5; ) {
+            if (c == 0) {
+                n = e[i--];
+                y = n >> 27;
+                n <<= 5;
+                c = 27;
+            }
+            else if (c < 5) {
+                y = n >> 27;
+                n = e[i--];
+                c = 5 - c;
+                y |= n >> (32 - c);
+                n <<= c;
+                c = 32 - c;
+            }
+            else {
+                y = (n >> 27) & 0x1f;
+                n <<= 5;
+                c -= 5;
+            }
+
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+            sp_4096_mont_sqr_128(r, r, m, mp);
+
+            sp_4096_lshift_128(r, r, y);
+            sp_4096_mul_d_128(tmp, norm, r[128]);
+            r[128] = 0;
+            o = sp_4096_add_128(r, r, tmp);
+            sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
+        }
+
+        XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+        sp_4096_mont_reduce_128(r, m, mp);
+
+        mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+        sp_4096_cond_sub_128(r, r, m, mask);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base     Base.
+ * exp      Array of bytes that is the exponent.
+ * expLen   Length of data, in bytes, in exponent.
+ * mod      Modulus.
+ * out      Buffer to hold big-endian bytes of exponentiation result.
+ *          Must be at least 512 bytes long.
+ * outLen   Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen)
+{
+    int err = MP_OKAY;
+    sp_digit b[256], e[128], m[128];
+    sp_digit* r = b;
+    word32 i;
+
+    if (mp_count_bits(base) > 4096) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        if (expLen > 512) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        if (mp_count_bits(mod) != 4096) {
+            err = MP_READ_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_from_mp(b, 128, base);
+        sp_4096_from_bin(e, 128, exp, expLen);
+        sp_4096_from_mp(m, 128, mod);
+
+    #ifdef HAVE_FFDHE_4096
+        if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
+            err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
+        else
+    #endif
+            err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
+
+    }
+
+    if (err == MP_OKAY) {
+        sp_4096_to_bin(r, out);
+        *outLen = 512;
+        for (i=0; i<512 && out[i] == 0; i++) {
+        }
+        *outLen -= i;
+        XMEMMOVE(out, out + i, *outLen);
+
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+    sp_digit x[2 * 8];
+    sp_digit y[2 * 8];
+    sp_digit z[2 * 8];
+    int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[8] = {
+    0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
+    0x00000001,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[8] = {
+    0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
+    0xfffffffe,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[8] = {
+    0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+    0x00000000,0xffffffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[8] = {
+    0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+    0x00000000,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[8] = {
+    0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
+    0xffffffff,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xee00bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+    /* X ordinate */
+    {
+        0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
+        0xe12c4247,0x6b17d1f2,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
+        0xfe1a7f9b,0x4fe342e2,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0x00000000,0x00000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[8] = {
+    0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
+    0xaa3a93e7,0x5ac635d8
+};
+#endif
+
+static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    int64_t t[8];
+    int64_t a64[8];
+    int64_t o;
+
+    (void)m;
+
+    a64[0] = a[0];
+    a64[1] = a[1];
+    a64[2] = a[2];
+    a64[3] = a[3];
+    a64[4] = a[4];
+    a64[5] = a[5];
+    a64[6] = a[6];
+    a64[7] = a[7];
+
+    /*  1  1  0 -1 -1 -1 -1  0 */
+    t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
+    /*  0  1  1  0 -1 -1 -1 -1 */
+    t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
+    /*  0  0  1  1  0 -1 -1 -1 */
+    t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
+    /* -1 -1  0  2  2  1  0 -1 */
+    t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
+    /*  0 -1 -1  0  2  2  1  0 */
+    t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
+    /*  0  0 -1 -1  0  2  2  1 */
+    t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
+    /* -1 -1  0  0  0  1  3  2 */
+    t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
+    /*  1  0 -1 -1 -1 -1  0  3 */
+    t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
+
+    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+    o     = t[7] >> 32; t[7] &= 0xffffffff;
+    t[0] += o;
+    t[3] -= o;
+    t[6] -= o;
+    t[7] += o;
+    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+    r[0] = t[0];
+    r[1] = t[1];
+    r[2] = t[2];
+    r[3] = t[3];
+    r[4] = t[4];
+    r[5] = t[5];
+    r[6] = t[6];
+    r[7] = t[7];
+
+    return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p   Point of type sp_point_256 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_256_from_mp(p->x, 8, pm->x);
+    sp_256_from_mp(p->y, 8, pm->y);
+    sp_256_from_mp(p->z, 8, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
+        r->used = 8;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 8; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 8; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p   Point of type sp_point_256.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_256_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[8];
+
+    __asm__ __volatile__ (
+        /* A[0] * B[0] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r3, r4, r6, r8\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[tmp], #0]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * B[1] */
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* A[1] * B[0] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #4]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * B[2] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[1] * B[1] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[2] * B[0] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[tmp], #8]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * B[3] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[1] * B[2] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[2] * B[1] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[3] * B[0] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[tmp], #12]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * B[4] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[1] * B[3] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[2] * B[2] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[3] * B[1] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[4] * B[0] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #16]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * B[5] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[1] * B[4] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[2] * B[3] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[3] * B[2] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[4] * B[1] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * B[0] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[tmp], #20]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * B[6] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[1] * B[5] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[2] * B[4] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[3] * B[3] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[4] * B[2] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[5] * B[1] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * B[0] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[tmp], #24]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * B[7] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[1] * B[6] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[2] * B[5] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[3] * B[4] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[4] * B[3] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[5] * B[2] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[6] * B[1] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[7] * B[0] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #0]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #28]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[1] * B[7] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[2] * B[6] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[3] * B[5] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[4] * B[4] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * B[3] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[6] * B[2] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[7] * B[1] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[2] * B[7] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[3] * B[6] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[4] * B[5] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[5] * B[4] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * B[3] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[7] * B[2] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[3] * B[7] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[4] * B[6] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[5] * B[5] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[6] * B[4] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[7] * B[3] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[4] * B[7] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * B[6] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[6] * B[5] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[7] * B[4] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[5] * B[7] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * B[6] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[7] * B[5] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[6] * B[7] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        /* A[7] * B[6] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[7] * B[7] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r8\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        /* Transfer tmp to r */
+        "ldr	r3, [%[tmp], #0]\n\t"
+        "ldr	r4, [%[tmp], #4]\n\t"
+        "ldr	r5, [%[tmp], #8]\n\t"
+        "ldr	r6, [%[tmp], #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[tmp], #16]\n\t"
+        "ldr	r4, [%[tmp], #20]\n\t"
+        "ldr	r5, [%[tmp], #24]\n\t"
+        "ldr	r6, [%[tmp], #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #32\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    (void)mp;
+    (void)m;
+
+    __asm__ __volatile__ (
+        "mov	r2, #0\n\t"
+        "mov	r1, #0\n\t"
+        /* i = 0 */
+        "mov	r9, r2\n\t"
+        "\n1:\n\t"
+        "mov	r4, #0\n\t"
+        /* mu = a[i] * 1 (mp) = a[i] */
+        "ldr	r3, [%[a]]\n\t"
+        /* a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry */
+        /* a[i+1] += -1 * mu */
+        "ldr	r6, [%[a], #4]\n\t"
+        "mov	r5, #0\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r2\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        /* a[i+2] += -1 * mu */
+        "ldr	r6, [%[a], #8]\n\t"
+        "mov	r4, #0\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r4, r4, r2\n\t"
+        "str	r5, [%[a], #8]\n\t"
+        /* a[i+3] += 0 * mu */
+        "ldr	r6, [%[a], #12]\n\t"
+        "mov	r5, #0\n\t"
+        "adds	r4, r4, r3\n\t"
+        "adc	r5, r5, r2\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r2\n\t"
+        "str	r4, [%[a], #12]\n\t"
+        /* a[i+4] += 0 * mu */
+        "ldr	r6, [%[a], #16]\n\t"
+        "mov	r4, #0\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r4, r4, r2\n\t"
+        "str	r5, [%[a], #16]\n\t"
+        /* a[i+5] += 0 * mu */
+        "ldr	r6, [%[a], #20]\n\t"
+        "mov	r5, #0\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r2\n\t"
+        "str	r4, [%[a], #20]\n\t"
+        /* a[i+6] += 1 * mu */
+        "ldr	r6, [%[a], #24]\n\t"
+        "mov	r4, #0\n\t"
+        "adds	r5, r5, r3\n\t"
+        "adc	r4, r4, r2\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r4, r4, r2\n\t"
+        "str	r5, [%[a], #24]\n\t"
+        /* a[i+7] += -1 * mu */
+        "ldr	r6, [%[a], #28]\n\t"
+        "ldr	r8, [%[a], #32]\n\t"
+        "adds	r5, r1, r3\n\t"
+        "mov	r1, #0\n\t"
+        "adc	r1, r1, r2\n\t"
+        "subs	r4, r4, r3\n\t"
+        "sbcs	r5, r5, r2\n\t"
+        "sbc	r1, r1, r2\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "adc	r1, r1, r2\n\t"
+        "str	r4, [%[a],  #28]\n\t"
+        "str	r5, [%[a], #32]\n\t"
+        /* i += 1 */
+        "add	r9, r9, #1\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "mov	r6, #8\n\t"
+        "cmp	r9, r6\n\t"
+        "blt	1b\n\t"
+        "sub	%[a], %[a], #32\n\t"
+        "mov	r3, r1\n\t"
+        "sub	r1, r1, #1\n\t"
+        "mvn	r1, r1\n\t"
+        "ldr	r4, [%[a],#32]\n\t"
+        "ldr	r5, [%[a],#36]\n\t"
+        "ldr	r6, [%[a],#40]\n\t"
+        "ldr	r8, [%[a],#44]\n\t"
+        "subs	r4, r4, r1\n\t"
+        "sbcs	r5, r5, r1\n\t"
+        "sbcs	r6, r6, r1\n\t"
+        "sbcs	r8, r8, r2\n\t"
+        "str	r4, [%[a],#0]\n\t"
+        "str	r5, [%[a],#4]\n\t"
+        "str	r6, [%[a],#8]\n\t"
+        "str	r8, [%[a],#12]\n\t"
+        "ldr	r4, [%[a],#48]\n\t"
+        "ldr	r5, [%[a],#52]\n\t"
+        "ldr	r6, [%[a],#56]\n\t"
+        "ldr	r8, [%[a],#60]\n\t"
+        "sbcs	r4, r4, r2\n\t"
+        "sbcs	r5, r5, r2\n\t"
+        "sbcs	r6, r6, r3\n\t"
+        "sbc	r8, r8, r1\n\t"
+        "str	r4, [%[a],#16]\n\t"
+        "str	r5, [%[a],#20]\n\t"
+        "str	r6, [%[a],#24]\n\t"
+        "str	r8, [%[a],#28]\n\t"
+        : [a] "+r" (a)
+        :
+        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+
+
+    (void)m;
+    (void)mp;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #32\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #24\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+6] += m[6] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+7] += m[7] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[7] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[7] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #24\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mul_8(r, a, b);
+    sp_256_mont_reduce_8(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+{
+    sp_digit tmp[8];
+    __asm__ __volatile__ (
+        /* A[0] * A[0] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "umull	r3, r4, r6, r6\n\t"
+        "mov	r5, #0\n\t"
+        "str	r3, [%[tmp], #0]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * A[1] */
+        "ldr	r8, [%[a], #4]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[tmp], #4]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * A[2] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[1] * A[1] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[tmp], #8]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * A[3] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[2] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #8]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adc	r5, r5, r11\n\t"
+        "str	r3, [%[tmp], #12]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * A[4] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[3] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[2] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r4, r4, r9\n\t"
+        "adcs	r5, r5, r10\n\t"
+        "adc	r3, r3, r11\n\t"
+        "str	r4, [%[tmp], #16]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[0] * A[5] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[4] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[3] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #12]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r5, r5, r9\n\t"
+        "adcs	r3, r3, r10\n\t"
+        "adc	r4, r4, r11\n\t"
+        "str	r5, [%[tmp], #20]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[0] * A[6] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[5] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[4] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[3] * A[3] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adc	r5, r5, r11\n\t"
+        "str	r3, [%[tmp], #24]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[0] * A[7] */
+        "ldr	r6, [%[a], #0]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[1] * A[6] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[2] * A[5] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[3] * A[4] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r4, r4, r9\n\t"
+        "adcs	r5, r5, r10\n\t"
+        "adc	r3, r3, r11\n\t"
+        "str	r4, [%[tmp], #28]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[1] * A[7] */
+        "ldr	r6, [%[a], #4]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[2] * A[6] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[3] * A[5] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[4] * A[4] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r5, r5, r9\n\t"
+        "adcs	r3, r3, r10\n\t"
+        "adc	r4, r4, r11\n\t"
+        "str	r5, [%[r], #32]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[2] * A[7] */
+        "ldr	r6, [%[a], #8]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[3] * A[6] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[4] * A[5] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r3, r3, r9\n\t"
+        "adcs	r4, r4, r10\n\t"
+        "adc	r5, r5, r11\n\t"
+        "str	r3, [%[r], #36]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[3] * A[7] */
+        "ldr	r6, [%[a], #12]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r9, r10, r6, r8\n\t"
+        "mov	r11, #0\n\t"
+        /* A[4] * A[6] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r9, r9, r6\n\t"
+        "adcs 	r10, r10, r8\n\t"
+        "adc	r11, r11, #0\n\t"
+        /* A[5] * A[5] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "adds	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "adc	r11, r11, r11\n\t"
+        "adds	r4, r4, r9\n\t"
+        "adcs	r5, r5, r10\n\t"
+        "adc	r3, r3, r11\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[4] * A[7] */
+        "ldr	r6, [%[a], #16]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        /* A[5] * A[6] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r3, r3, r8\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "mov	r5, #0\n\t"
+        /* A[5] * A[7] */
+        "ldr	r6, [%[a], #20]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[6] * A[6] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r3, [%[r], #48]\n\t"
+        "mov	r3, #0\n\t"
+        /* A[6] * A[7] */
+        "ldr	r6, [%[a], #24]\n\t"
+        "ldr	r8, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs 	r5, r5, r8\n\t"
+        "adc	r3, r3, #0\n\t"
+        "str	r4, [%[r], #52]\n\t"
+        "mov	r4, #0\n\t"
+        /* A[7] * A[7] */
+        "ldr	r6, [%[a], #28]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adc	r3, r3, r8\n\t"
+        "str	r5, [%[r], #56]\n\t"
+        "str	r3, [%[r], #60]\n\t"
+        /* Transfer tmp to r */
+        "ldr	r3, [%[tmp], #0]\n\t"
+        "ldr	r4, [%[tmp], #4]\n\t"
+        "ldr	r5, [%[tmp], #8]\n\t"
+        "ldr	r6, [%[tmp], #12]\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[tmp], #16]\n\t"
+        "ldr	r4, [%[tmp], #20]\n\t"
+        "ldr	r5, [%[tmp], #24]\n\t"
+        "ldr	r6, [%[tmp], #28]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
+    );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_256_sqr_8(r, a);
+    sp_256_mont_reduce_8(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mont_sqr_8(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_256_mont_sqr_8(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+    0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+    0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 8);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
+        if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 8);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 8;
+    sp_digit* t3 = td + 4 * 8;
+    /* 0x2 */
+    sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
+    /* 0x3 */
+    sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
+    /* 0xc */
+    sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
+    /* 0xd */
+    sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
+    /* 0xf */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xf0 */
+    sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
+    /* 0xfd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xff00 */
+    sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
+    /* 0xfffd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffff0000 */
+    sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
+    /* 0xfffffffd */
+    sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000000 */
+    sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffffffffffff */
+    sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001 */
+    sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
+    /* 0xffffffff000000010000000000000000000000000000000000000000 */
+    sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+    sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+    sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
+    /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+    sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #28\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+#define sp_256_norm_8(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    int32_t n;
+
+    sp_256_mont_inv_8(t1, p->z, t + 2*8);
+
+    sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    /* x /= z^2 */
+    sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
+    XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
+    sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_256_cmp_8(r->x, p256_mod);
+    sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_8(r->x);
+
+    /* y /= z^3 */
+    sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
+    XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
+    sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_256_cmp_8(r->y, p256_mod);
+    sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_8(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #32\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "ldr	r4, [%[a],#0]\n\t"
+        "ldr	r5, [%[a],#4]\n\t"
+        "ldr	r6, [%[b],#0]\n\t"
+        "ldr	r8, [%[b],#4]\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[a],#8]\n\t"
+        "ldr	r5, [%[a],#12]\n\t"
+        "ldr	r6, [%[b],#8]\n\t"
+        "ldr	r8, [%[b],#12]\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "ldr	r4, [%[a],#16]\n\t"
+        "ldr	r5, [%[a],#20]\n\t"
+        "ldr	r6, [%[b],#16]\n\t"
+        "ldr	r8, [%[b],#20]\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "mov	r9, r4\n\t"
+        "mov	r10, r5\n\t"
+        "ldr	r4, [%[a],#24]\n\t"
+        "ldr	r5, [%[a],#28]\n\t"
+        "ldr	r6, [%[b],#24]\n\t"
+        "ldr	r8, [%[b],#28]\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "mov	r11, r4\n\t"
+        "mov	r12, r5\n\t"
+        "adc	r3, r3, r3\n\t"
+        "mov	r6, r3\n\t"
+        "sub	r3, r3, #1\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r8, #0\n\t"
+        "ldr	r4, [%[r],#0]\n\t"
+        "ldr	r5, [%[r],#4]\n\t"
+        "subs	r4, r4, r3\n\t"
+        "sbcs	r5, r5, r3\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[r],#8]\n\t"
+        "ldr	r5, [%[r],#12]\n\t"
+        "sbcs	r4, r4, r3\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "mov	r4, r9\n\t"
+        "mov	r5, r10\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r],#16]\n\t"
+        "str	r5, [%[r],#20]\n\t"
+        "mov	r4, r11\n\t"
+        "mov	r5, r12\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbc	r5, r5, r3\n\t"
+        "str	r4, [%[r],#24]\n\t"
+        "str	r5, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a],#0]\n\t"
+        "ldr	r5, [%[a],#4]\n\t"
+        "ldr	r6, [%[a],#8]\n\t"
+        "ldr	r8, [%[a],#12]\n\t"
+        "adds	r4, r4, r4\n\t"
+        "adcs	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adcs	r8, r8, r8\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "str	r6, [%[r],#8]\n\t"
+        "str	r8, [%[r],#12]\n\t"
+        "ldr	r4, [%[a],#16]\n\t"
+        "ldr	r5, [%[a],#20]\n\t"
+        "ldr	r6, [%[a],#24]\n\t"
+        "ldr	r8, [%[a],#28]\n\t"
+        "adcs	r4, r4, r4\n\t"
+        "adcs	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adcs	r8, r8, r8\n\t"
+        "mov	r9, r4\n\t"
+        "mov	r10, r5\n\t"
+        "mov	r11, r6\n\t"
+        "mov	r12, r8\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r8, #0\n\t"
+        "adc	r3, r3, r3\n\t"
+        "mov	r2, r3\n\t"
+        "sub	r3, r3, #1\n\t"
+        "mvn	r3, r3\n\t"
+        "ldr	r4, [%[r],#0]\n\t"
+        "ldr	r5, [%[r],#4]\n\t"
+        "ldr	r6, [%[r],#8]\n\t"
+        "subs	r4, r4, r3\n\t"
+        "sbcs	r5, r5, r3\n\t"
+        "sbcs	r6, r6, r3\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "str	r6, [%[r],#8]\n\t"
+        "ldr	r4, [%[r],#12]\n\t"
+        "mov	r5, r9\n\t"
+        "mov	r6, r10\n\t"
+        "sbcs	r4, r4, r8\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "sbcs	r6, r6, r8\n\t"
+        "str	r4, [%[r],#12]\n\t"
+        "str	r5, [%[r],#16]\n\t"
+        "str	r6, [%[r],#20]\n\t"
+        "mov	r4, r11\n\t"
+        "mov	r5, r12\n\t"
+        "sbcs	r4, r4, r2\n\t"
+        "sbc	r5, r5, r3\n\t"
+        "str	r4, [%[r],#24]\n\t"
+        "str	r5, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r3", "r2", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a],#0]\n\t"
+        "ldr	r3, [%[a],#4]\n\t"
+        "ldr	r4, [%[a],#8]\n\t"
+        "ldr	r5, [%[a],#12]\n\t"
+        "ldr	r6, [%[a],#16]\n\t"
+        "ldr	r8, [%[a],#20]\n\t"
+        "ldr	r9, [%[a],#24]\n\t"
+        "ldr	r10, [%[a],#28]\n\t"
+        "adds	r2, r2, r2\n\t"
+        "adcs	r3, r3, r3\n\t"
+        "adcs	r4, r4, r4\n\t"
+        "adcs	r5, r5, r5\n\t"
+        "adcs	r6, r6, r6\n\t"
+        "adcs	r8, r8, r8\n\t"
+        "adcs	r9, r9, r9\n\t"
+        "adcs	r10, r10, r10\n\t"
+        "mov	r11, #0\n\t"
+        "mov	r14, #0\n\t"
+        "adc	r11, r11, r11\n\t"
+        "mov	r12, r11\n\t"
+        "sub	r11, r11, #1\n\t"
+        "mvn	r11, r11\n\t"
+        "subs	r2, r2, r11\n\t"
+        "sbcs	r3, r3, r11\n\t"
+        "sbcs	r4, r4, r11\n\t"
+        "sbcs	r5, r5, r14\n\t"
+        "sbcs	r6, r6, r14\n\t"
+        "sbcs	r8, r8, r14\n\t"
+        "sbcs	r9, r9, r12\n\t"
+        "sbc	r10, r10, r11\n\t"
+        "ldr	r12, [%[a],#0]\n\t"
+        "ldr	r14, [%[a],#4]\n\t"
+        "adds	r2, r2, r12\n\t"
+        "adcs	r3, r3, r14\n\t"
+        "ldr	r12, [%[a],#8]\n\t"
+        "ldr	r14, [%[a],#12]\n\t"
+        "adcs	r4, r4, r12\n\t"
+        "adcs	r5, r5, r14\n\t"
+        "ldr	r12, [%[a],#16]\n\t"
+        "ldr	r14, [%[a],#20]\n\t"
+        "adcs	r6, r6, r12\n\t"
+        "adcs	r8, r8, r14\n\t"
+        "ldr	r12, [%[a],#24]\n\t"
+        "ldr	r14, [%[a],#28]\n\t"
+        "adcs	r9, r9, r12\n\t"
+        "adcs	r10, r10, r14\n\t"
+        "mov	r11, #0\n\t"
+        "mov	r14, #0\n\t"
+        "adc	r11, r11, r11\n\t"
+        "mov	r12, r11\n\t"
+        "sub	r11, r11, #1\n\t"
+        "mvn	r11, r11\n\t"
+        "subs	r2, r2, r11\n\t"
+        "str	r2, [%[r],#0]\n\t"
+        "sbcs	r3, r3, r11\n\t"
+        "str	r3, [%[r],#4]\n\t"
+        "sbcs	r4, r4, r11\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "sbcs	r5, r5, r14\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "sbcs	r6, r6, r14\n\t"
+        "str	r6, [%[r],#16]\n\t"
+        "sbcs	r8, r8, r14\n\t"
+        "str	r8, [%[r],#20]\n\t"
+        "sbcs	r9, r9, r12\n\t"
+        "str	r9, [%[r],#24]\n\t"
+        "sbc	r10, r10, r11\n\t"
+        "str	r10, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r11", "r12", "r14", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10"
+    );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)m;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a],#0]\n\t"
+        "ldr	r5, [%[a],#4]\n\t"
+        "ldr	r6, [%[b],#0]\n\t"
+        "ldr	r8, [%[b],#4]\n\t"
+        "subs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[a],#8]\n\t"
+        "ldr	r5, [%[a],#12]\n\t"
+        "ldr	r6, [%[b],#8]\n\t"
+        "ldr	r8, [%[b],#12]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "ldr	r4, [%[a],#16]\n\t"
+        "ldr	r5, [%[a],#20]\n\t"
+        "ldr	r6, [%[b],#16]\n\t"
+        "ldr	r8, [%[b],#20]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "mov	r9, r4\n\t"
+        "mov	r10, r5\n\t"
+        "ldr	r4, [%[a],#24]\n\t"
+        "ldr	r5, [%[a],#28]\n\t"
+        "ldr	r6, [%[b],#24]\n\t"
+        "ldr	r8, [%[b],#28]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "mov	r11, r4\n\t"
+        "mov	r12, r5\n\t"
+        "sbc   r3, r3, r3\n\t"
+        "lsr   r8, r3, #31\n\t"
+        "mov   r6, #0\n\t"
+        "ldr	r4, [%[r],#0]\n\t"
+        "ldr	r5, [%[r],#4]\n\t"
+        "adds	r4, r4, r3\n\t"
+        "adcs	r5, r5, r3\n\t"
+        "str	r4, [%[r],#0]\n\t"
+        "str	r5, [%[r],#4]\n\t"
+        "ldr	r4, [%[r],#8]\n\t"
+        "ldr	r5, [%[r],#12]\n\t"
+        "adcs	r4, r4, r3\n\t"
+        "adcs	r5, r5, r6\n\t"
+        "str	r4, [%[r],#8]\n\t"
+        "str	r5, [%[r],#12]\n\t"
+        "mov	r4, r9\n\t"
+        "mov	r5, r10\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r6\n\t"
+        "str	r4, [%[r],#16]\n\t"
+        "str	r5, [%[r],#20]\n\t"
+        "mov	r4, r11\n\t"
+        "mov	r5, r12\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, r3\n\t"
+        "str	r4, [%[r],#24]\n\t"
+        "str	r5, [%[r],#28]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    __asm__ __volatile__ (
+        "ldr	r8, [%[a], #0]\n\t"
+        "lsl	r8, r8, #31\n\t"
+        "lsr	r8, r8, #31\n\t"
+        "mov	r5, #0\n\t"
+        "sub	r5, r5, r8\n\t"
+        "mov	r8, #0\n\t"
+        "lsl	r6, r5, #31\n\t"
+        "lsr	r6, r6, #31\n\t"
+        "ldr	r3, [%[a], #0]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "adds	r3, r3, r5\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r3, [%[r], #0]\n\t"
+        "str	r4, [%[r], #4]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "adcs	r3, r3, r5\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "str	r3, [%[r], #8]\n\t"
+        "str	r4, [%[r], #12]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "adcs	r3, r3, r8\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "ldr	r3, [%[a], #24]\n\t"
+        "ldr	r4, [%[a], #28]\n\t"
+        "adcs	r3, r3, r6\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "adc	r8, r8, r8\n\t"
+        "lsl	r8, r8, #31\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, #31\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r5, r4\n\t"
+        "orr	r6, r6, r8\n\t"
+        "mov	r8, r3\n\t"
+        "str	r5, [%[r], #24]\n\t"
+        "str	r6, [%[r], #28]\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, #31\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r5, r4\n\t"
+        "orr	r6, r6, r8\n\t"
+        "mov	r8, r3\n\t"
+        "str	r5, [%[r], #16]\n\t"
+        "str	r6, [%[r], #20]\n\t"
+        "ldr	r3, [%[a], #8]\n\t"
+        "ldr	r4, [%[a], #12]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsl	r3, r3, #31\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r5, r4\n\t"
+        "orr	r6, r6, r8\n\t"
+        "mov	r8, r3\n\t"
+        "str	r5, [%[r], #8]\n\t"
+        "str	r6, [%[r], #12]\n\t"
+        "ldr	r3, [%[r], #0]\n\t"
+        "ldr	r4, [%[r], #4]\n\t"
+        "lsr	r5, r3, #1\n\t"
+        "lsr	r6, r4, #1\n\t"
+        "lsl	r4, r4, #31\n\t"
+        "orr	r5, r5, r4\n\t"
+        "orr	r6, r6, r8\n\t"
+        "str	r5, [%[r], #0]\n\t"
+        "str	r6, [%[r], #4]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
+    /* Z = Y * Z */
+    sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
+    /* Z = 2Z */
+    sp_256_mont_dbl_8(z, z, p256_mod);
+    /* T2 = X - T1 */
+    sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
+    /* T1 = X + T1 */
+    sp_256_mont_add_8(t1, p->x, t1, p256_mod);
+    /* T2 = T1 * T2 */
+    sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
+    /* T1 = 3T2 */
+    sp_256_mont_tpl_8(t1, t2, p256_mod);
+    /* Y = 2Y */
+    sp_256_mont_dbl_8(y, p->y, p256_mod);
+    /* Y = Y * Y */
+    sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
+    /* T2 = Y * Y */
+    sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+    /* T2 = T2/2 */
+    sp_256_div2_8(t2, t2, p256_mod);
+    /* Y = Y * X */
+    sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
+    /* X = T1 * T1 */
+    sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
+    /* X = X - Y */
+    sp_256_mont_sub_8(x, x, y, p256_mod);
+    /* X = X - Y */
+    sp_256_mont_sub_8(x, x, y, p256_mod);
+    /* Y = Y - X */
+    sp_256_mont_sub_8(y, y, x, p256_mod);
+    /* Y = Y * T1 */
+    sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
+    /* Y = Y - T2 */
+    sp_256_mont_sub_8(y, y, t2, p256_mod);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "add	r6, r6, #32\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "subs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #8]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r5, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #24]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r5, [%[r], #28]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+        sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    sp_digit* t3 = t + 4*8;
+    sp_digit* t4 = t + 6*8;
+    sp_digit* t5 = t + 8*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_256* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_256_sub_8(t1, p256_mod, q->y);
+    sp_256_norm_8(t1);
+    if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+        sp_256_proj_point_dbl_8(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<8; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<8; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<8; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+        /* H = U2 - U1 */
+        sp_256_mont_sub_8(t2, t2, t1, p256_mod);
+        /* R = S2 - S1 */
+        sp_256_mont_sub_8(t4, t4, t3, p256_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(x, x, t5, p256_mod);
+        sp_256_mont_dbl_8(t1, y, p256_mod);
+        sp_256_mont_sub_8(x, x, t1, p256_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(y, y, t5, p256_mod);
+    }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td[16];
+    sp_point_256 rtd;
+    sp_digit tmpd[2 * 8 * 5];
+#endif
+    sp_point_256* t;
+    sp_point_256* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
+        (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
+        t[1].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 6;
+        n = k[i+1] << 0;
+        c = 28;
+        y = n >> 28;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+        n <<= 4;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--];
+                c += 32;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+            sp_256_proj_point_dbl_8(rt, rt, tmp);
+
+            sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_256_map_8(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_256_point_free_8(rt, 1, heap);
+
+    return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+    sp_digit x[8];
+    sp_digit y[8];
+} sp_table_entry_256;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*8;
+    sp_digit* b = t + 4*8;
+    sp_digit* t1 = t + 6*8;
+    sp_digit* t2 = t + 8*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_256_mont_dbl_8(y, y, p256_mod);
+    /* W = Z^4 */
+    sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(t1, t1, w, p256_mod);
+        sp_256_mont_tpl_8(a, t1, p256_mod);
+        /* B = X*Y^2 */
+        sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+        /* X = A^2 - 2B */
+        sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+        sp_256_mont_dbl_8(t2, b, p256_mod);
+        sp_256_mont_sub_8(x, x, t2, p256_mod);
+        /* Z = Z*Y */
+        sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+        /* t2 = Y^4 */
+        sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_256_mont_sub_8(y, b, x, p256_mod);
+        sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+        sp_256_mont_dbl_8(y, y, p256_mod);
+        sp_256_mont_sub_8(y, y, t1, p256_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+    sp_256_mont_sub_8(t1, t1, w, p256_mod);
+    sp_256_mont_tpl_8(a, t1, p256_mod);
+    /* B = X*Y^2 */
+    sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+    /* X = A^2 - 2B */
+    sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_8(t2, b, p256_mod);
+    sp_256_mont_sub_8(x, x, t2, p256_mod);
+    /* Z = Z*Y */
+    sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+    /* t2 = Y^4 */
+    sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_256_mont_sub_8(y, b, x, p256_mod);
+    sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+    sp_256_mont_dbl_8(y, y, p256_mod);
+    sp_256_mont_sub_8(y, y, t1, p256_mod);
+#endif
+    /* Y = Y/2 */
+    sp_256_div2_8(y, y, p256_mod);
+}
+
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 8;
+    sp_digit* tmp = t + 4 * 8;
+
+    sp_256_mont_inv_8(t1, a->z, tmp);
+
+    sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
+    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
+        const sp_point_256* q, sp_digit* t)
+{
+    const sp_point_256* ap[2];
+    sp_point_256* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*8;
+    sp_digit* t3 = t + 4*8;
+    sp_digit* t4 = t + 6*8;
+    sp_digit* t5 = t + 8*8;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_256_sub_8(t1, p256_mod, q->y);
+    sp_256_norm_8(t1);
+    if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+        (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+        sp_256_proj_point_dbl_8(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_256));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<8; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<8; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<8; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+        /* H = U2 - X1 */
+        sp_256_mont_sub_8(t2, t2, x, p256_mod);
+        /* R = S2 - Y1 */
+        sp_256_mont_sub_8(t4, t4, y, p256_mod);
+        /* Z3 = H*Z1 */
+        sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(x, t1, t5, p256_mod);
+        sp_256_mont_dbl_8(t1, t3, p256_mod);
+        sp_256_mont_sub_8(x, x, t1, p256_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_256_mont_sub_8(t3, t3, x, p256_mod);
+        sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_8(y, t3, t5, p256_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_256_proj_to_affine_8(t, tmp);
+
+        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<4; i++) {
+            sp_256_proj_point_dbl_n_8(t, 64, tmp);
+            sp_256_proj_to_affine_8(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<4; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+                sp_256_proj_to_affine_8(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_256_point_free_8(s2, 0, heap);
+    sp_256_point_free_8(s1, 0, heap);
+    sp_256_point_free_8( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
+    sp_digit td[2 * 8 * 5];
+#endif
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+        y = 0;
+        for (j=0,x=63; j<4; j++,x+=64) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=62; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<4; j++,x+=64) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_256_proj_point_dbl_8(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_256_map_8(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+    sp_digit x[8];
+    sp_digit y[8];
+    sp_table_entry_256 table[16];
+    uint32_t cnt;
+    int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_256_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_256[i].set)
+            continue;
+
+        if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_256[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_256[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 8 * 5];
+    sp_cache_256_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_256(g, &cache);
+        if (cache->cnt == 2)
+            sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+        }
+        else {
+            err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+        sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 td, s1d, s2d;
+#endif
+    sp_point_256* t;
+    sp_point_256* s1 = NULL;
+    sp_point_256* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_256_proj_to_affine_8(t, tmp);
+
+        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_256_proj_point_dbl_n_8(t, 32, tmp);
+            sp_256_proj_to_affine_8(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+                sp_256_proj_to_affine_8(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_256_point_free_8(s2, 0, heap);
+    sp_256_point_free_8(s1, 0, heap);
+    sp_256_point_free_8( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+        const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 rtd;
+    sp_point_256 pd;
+    sp_digit td[2 * 8 * 5];
+#endif
+    sp_point_256* rt;
+    sp_point_256* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_256_point_new_8(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+        y = 0;
+        for (j=0,x=31; j<8; j++,x+=32) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=30; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=32) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_256_proj_point_dbl_8(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_256_map_8(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_256));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+    sp_digit x[8];
+    sp_digit y[8];
+    sp_table_entry_256 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_256 = 0;
+    static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_256_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_256[i].set = 0;
+        }
+        sp_cache_256_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_256[i].set)
+            continue;
+
+        if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+                           sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+            sp_cache_256[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_256_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_256[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_256_last) {
+            least = sp_cache_256[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_256[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_256[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+        XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+        sp_cache_256[i].set = 1;
+        sp_cache_256[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_256[i];
+    sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 8 * 5];
+    sp_cache_256_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_256 == 0) {
+         wc_InitMutex(&sp_cache_256_lock);
+         initCacheMutex_256 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_256_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_256(g, &cache);
+        if (cache->cnt == 2)
+            sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+        }
+        else {
+            err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#endif
+    sp_point_256* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(k, 8, km);
+        sp_256_point_from_ecc_point_8(point, gm);
+
+            err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_to_ecc_point_8(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[16] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+        0xa53755c6,0x18905f76 },
+      { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+        0x25885d85,0x8571ff18 } },
+    /* 2 */
+    { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+        0xfd1b667f,0x2f5e6961 },
+      { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+        0x8d6f0f7b,0xf648f916 } },
+    /* 3 */
+    { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+        0x133d0015,0x5abe0285 },
+      { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+        0x6b6f7383,0x94bb725b } },
+    /* 4 */
+    { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+        0x21d324f6,0x61d587d4 },
+      { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+        0x4621efbe,0xfa11fe12 } },
+    /* 5 */
+    { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+        0x1f13bedc,0x586eb04c },
+      { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+        0x70864f11,0x19d5ac08 } },
+    /* 6 */
+    { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+        0xc3b266b1,0xbb6de651 },
+      { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+        0x5d18b99b,0x60b4619a } },
+    /* 7 */
+    { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+        0xaeebffcd,0x9d0f27b2 },
+      { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+        0x356ec48d,0x244a566d } },
+    /* 8 */
+    { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+        0xcd42ab1b,0x803f3e02 },
+      { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+        0x5067adc1,0xc097440e } },
+    /* 9 */
+    { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+        0x915f1f30,0xf1af32d5 },
+      { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+        0xe2d41c8b,0x23d0f130 } },
+    /* 10 */
+    { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+        0x7990216a,0x50bbb4d9 },
+      { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+        0x01fe49c3,0x2b100118 } },
+    /* 11 */
+    { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+        0x83fbae0c,0xdd558999 },
+      { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+        0x149d6041,0xe6e4c551 } },
+    /* 12 */
+    { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+        0xdb7e63af,0xfad27148 },
+      { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+        0x9f0e1a84,0x77387de3 } },
+    /* 13 */
+    { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+        0xbef0c47e,0xb37b85c0 },
+      { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+        0xf9f628d5,0x9c135ac8 } },
+    /* 14 */
+    { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+        0x91ece900,0xc109f9cb },
+      { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+        0x2eee1ee1,0x9bc3344f } },
+    /* 15 */
+    { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+        0x5f1a4cc1,0x29591d52 },
+      { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+        0x18ef332c,0x6376551f } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+                                      k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+        0xa53755c6,0x18905f76 },
+      { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+        0x25885d85,0x8571ff18 } },
+    /* 2 */
+    { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
+        0xdbdf58e9,0xd953c50d },
+      { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
+        0x9eb288f3,0x863ebb7e } },
+    /* 3 */
+    { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
+        0xb5ff80a0,0x00076055 },
+      { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
+        0x34373ee0,0x83087761 } },
+    /* 4 */
+    { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+        0xfd1b667f,0x2f5e6961 },
+      { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+        0x8d6f0f7b,0xf648f916 } },
+    /* 5 */
+    { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+        0x133d0015,0x5abe0285 },
+      { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+        0x6b6f7383,0x94bb725b } },
+    /* 6 */
+    { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
+        0x2f7dc4ef,0xcdd6bbcb },
+      { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
+        0x4bdae5f6,0xa361bebd } },
+    /* 7 */
+    { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
+        0xc4b5292c,0xba12ca09 },
+      { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
+        0x701fef4b,0x53ebb99d } },
+    /* 8 */
+    { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
+        0x06d54831,0x8589fb92 },
+      { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
+        0x02541c4f,0xebb0696d } },
+    /* 9 */
+    { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
+        0xd1b27da3,0xeb2820cb },
+      { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
+        0x55a7da1d,0x1f28289b } },
+    /* 10 */
+    { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
+        0x05e54d63,0x337a4b59 },
+      { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
+        0xf4c2fbd6,0x0d65e0d5 } },
+    /* 11 */
+    { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
+        0x52f4a232,0xc23da242 },
+      { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
+        0xc790cff1,0x19de3b8c } },
+    /* 12 */
+    { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
+        0x91fccbfd,0xe34dcbd4 },
+      { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
+        0x7b4e0f7f,0xe7641f44 } },
+    /* 13 */
+    { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
+        0x052a57bf,0x4a12df57 },
+      { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
+        0xbb5bea46,0x6af5aa93 } },
+    /* 14 */
+    { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
+        0x66a44013,0x5fe3475a },
+      { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
+        0xecfea916,0xb544e308 } },
+    /* 15 */
+    { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
+        0xa6b0c20b,0xe0b6b2bd },
+      { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
+        0x25a63774,0x71c023de } },
+    /* 16 */
+    { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+        0x21d324f6,0x61d587d4 },
+      { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+        0x4621efbe,0xfa11fe12 } },
+    /* 17 */
+    { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+        0x1f13bedc,0x586eb04c },
+      { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+        0x70864f11,0x19d5ac08 } },
+    /* 18 */
+    { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
+        0x7f9c563f,0xe7c0073f },
+      { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
+        0xc65b3c0a,0xe08504fe } },
+    /* 19 */
+    { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
+        0x5b0996b4,0x78f01882 },
+      { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
+        0x7e94747a,0x43a773b8 } },
+    /* 20 */
+    { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+        0xc3b266b1,0xbb6de651 },
+      { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+        0x5d18b99b,0x60b4619a } },
+    /* 21 */
+    { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+        0xaeebffcd,0x9d0f27b2 },
+      { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+        0x356ec48d,0x244a566d } },
+    /* 22 */
+    { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
+        0x3581ef69,0x45e58c87 },
+      { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
+        0xc1e4b7a4,0xc040e21c } },
+    /* 23 */
+    { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
+        0x682c6ec7,0x1cdf5c97 },
+      { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
+        0xa92dff3d,0x046755f8 } },
+    /* 24 */
+    { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
+        0x3b83a5f3,0x046e5e11 },
+      { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
+        0x303d005b,0x6e0106c3 } },
+    /* 25 */
+    { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
+        0xe901cf1f,0x442594ed },
+      { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
+        0x4c2ee68e,0xa796fa51 } },
+    /* 26 */
+    { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
+        0xc69766e9,0xe4ad2da9 },
+      { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
+        0xc37b5143,0xc5e94046 } },
+    /* 27 */
+    { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
+        0xdb464747,0x63283daf },
+      { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
+        0x1981a938,0x68bd19ab } },
+    /* 28 */
+    { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
+        0x3c6fdfd6,0x495292f5 },
+      { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
+        0x26036837,0x0ec7530d } },
+    /* 29 */
+    { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
+        0x64863f0b,0x0f6207a6 },
+      { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
+        0x08ed6dcf,0xff0db072 } },
+    /* 30 */
+    { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
+        0x88740ea3,0x313b513c },
+      { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
+        0x86f19f81,0x2d3abcf9 } },
+    /* 31 */
+    { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
+        0xded98cdf,0xc036fa10 },
+      { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
+        0xb6d40194,0xa6b2a2c4 } },
+    /* 32 */
+    { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
+        0xaf7c9860,0x810ee252 },
+      { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
+        0x92731745,0xd485717a } },
+    /* 33 */
+    { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
+        0x2f9a604e,0x6a6045a7 },
+      { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
+        0xf9e15790,0xd3e45cfa } },
+    /* 34 */
+    { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
+        0xe3c2c19c,0x207755de },
+      { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
+        0x7154b00d,0x48dc5ee5 } },
+    /* 35 */
+    { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
+        0xdff6f445,0xf2fb0aed },
+      { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
+        0xdb28d525,0xa13e9015 } },
+    /* 36 */
+    { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
+        0x1497526f,0x2bf0d6b0 },
+      { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
+        0x162fe89f,0x42a94a5a } },
+    /* 37 */
+    { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
+        0xc65ede3d,0x2c2dd969 },
+      { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
+        0x42c56dbc,0xf437fa1f } },
+    /* 38 */
+    { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
+        0x54707aa8,0xaaf45b33 },
+      { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
+        0xf4f272bc,0xcdf6310d } },
+    /* 39 */
+    { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
+        0xda9e2ff2,0xf0d008ba },
+      { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
+        0xca887b8b,0x5bd5c2f5 } },
+    /* 40 */
+    { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
+        0xa09e4719,0xaa12dfc8 },
+      { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
+        0xe48ca901,0x6c036e73 } },
+    /* 41 */
+    { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
+        0x96afbe24,0x292ff658 },
+      { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
+        0x311b7276,0x644e0c90 } },
+    /* 42 */
+    { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
+        0xcab79a77,0xf25ae793 },
+      { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
+        0x13db0a3e,0x39b8e653 } },
+    /* 43 */
+    { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
+        0x0f19db06,0x39122f2f },
+      { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
+        0xce80ff8d,0x8de80af8 } },
+    /* 44 */
+    { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
+        0x2e368c04,0x87194906 },
+      { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
+        0x5b74fde1,0xfc315e6a } },
+    /* 45 */
+    { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
+        0xee389088,0xe6d4a7ad },
+      { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
+        0x9be2ae57,0x35dfaf9a } },
+    /* 46 */
+    { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
+        0x1c830d2b,0x1da5c7d7 },
+      { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
+        0xdbf4b9d6,0x7077c0fd } },
+    /* 47 */
+    { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
+        0xe50efe44,0x53a8632e },
+      { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
+        0x34e1fcc1,0x028ca76d } },
+    /* 48 */
+    { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
+        0x6962f046,0x04c17cd8 },
+      { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
+        0xfed97474,0xf7ba4de9 } },
+    /* 49 */
+    { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
+        0x52131c41,0xe31f9600 },
+      { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
+        0xce34d47b,0xaa3a6259 } },
+    /* 50 */
+    { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
+        0x7e79daee,0x2398dd62 },
+      { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
+        0x1c046210,0x5717f5b2 } },
+    /* 51 */
+    { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
+        0x0e3c28de,0x660a2c56 },
+      { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
+        0x4f522453,0x624ee54c } },
+    /* 52 */
+    { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
+        0x92bdfbc0,0x4f392afb },
+      { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
+        0xccdb399c,0x8a3e7977 } },
+    /* 53 */
+    { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
+        0x70c24404,0x3888d023 },
+      { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
+        0x18102336,0xa5e62e47 } },
+    /* 54 */
+    { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
+        0x466a5adc,0x2c4768e6 },
+      { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
+        0xf9e652a0,0x7b5e6441 } },
+    /* 55 */
+    { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
+        0x0c8d744a,0xb8af73cb },
+      { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
+        0x7f3f0895,0xa036395f } },
+    /* 56 */
+    { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
+        0x875fb533,0x4be36b01 },
+      { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
+        0x1bdc00c0,0x8cbc9a87 } },
+    /* 57 */
+    { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
+        0x0c0835f8,0x44e7553e },
+      { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
+        0x5eb8fc18,0x470a683a } },
+    /* 58 */
+    { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
+        0xc63dc6ef,0x16410690 },
+      { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
+        0x7abcbb4f,0xd73479fd } },
+    /* 59 */
+    { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
+        0x0771666b,0x816469e3 },
+      { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
+        0xf0dd3f9c,0x0a36dd23 } },
+    /* 60 */
+    { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
+        0xfdbab118,0xe331dfd6 },
+      { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
+        0x492e3389,0xd3b4782a } },
+    /* 61 */
+    { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
+        0x4c86a5bd,0x7281275a },
+      { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
+        0xce145059,0x2c062e7e } },
+    /* 62 */
+    { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
+        0x2c4e7ef1,0x282a35f9 },
+      { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
+        0x554d2abd,0xc71cd513 } },
+    /* 63 */
+    { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
+        0xcf47f3a3,0xc50f6740 },
+      { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
+        0x212958dc,0xb9ecb3a7 } },
+    /* 64 */
+    { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+        0xcd42ab1b,0x803f3e02 },
+      { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+        0x5067adc1,0xc097440e } },
+    /* 65 */
+    { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+        0x915f1f30,0xf1af32d5 },
+      { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+        0xe2d41c8b,0x23d0f130 } },
+    /* 66 */
+    { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
+        0xc0a3fadd,0xb0288dd6 },
+      { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
+        0xf408c8d2,0xffd3724f } },
+    /* 67 */
+    { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
+        0xd78c26df,0xf5590f4a },
+      { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
+        0xf6f74a20,0x18d6da54 } },
+    /* 68 */
+    { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+        0x7990216a,0x50bbb4d9 },
+      { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+        0x01fe49c3,0x2b100118 } },
+    /* 69 */
+    { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+        0x83fbae0c,0xdd558999 },
+      { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+        0x149d6041,0xe6e4c551 } },
+    /* 70 */
+    { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
+        0x07ed56ff,0x51e00db1 },
+      { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
+        0x49829177,0xe22f4241 } },
+    /* 71 */
+    { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
+        0x52dc48c9,0xf709373d },
+      { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
+        0xe7275b11,0xbd52d288 } },
+    /* 72 */
+    { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
+        0xc8aa77a6,0xa0d0f8e4 },
+      { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
+        0x946d6a00,0xa56c78c7 } },
+    /* 73 */
+    { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
+        0x731a367a,0xd8befdf8 },
+      { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
+        0xce9f6478,0x854a68a5 } },
+    /* 74 */
+    { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
+        0x98846a95,0x5cacea0b },
+      { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
+        0x35e4efa9,0xe4982d12 } },
+    /* 75 */
+    { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
+        0x16b20499,0x8046b7f6 },
+      { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
+        0x9082af55,0xeb17ca7b } },
+    /* 76 */
+    { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
+        0xfab5e131,0x097b00ba },
+      { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
+        0xafdbcc9e,0xf95c747b } },
+    /* 77 */
+    { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
+        0x566ed837,0x3512601e },
+      { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
+        0x6068ab6b,0x0ef97123 } },
+    /* 78 */
+    { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
+        0x3b4fbc95,0xfc16d933 },
+      { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
+        0xb95d7a17,0x14ca4af1 } },
+    /* 79 */
+    { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
+        0xf59c231d,0x4057b063 },
+      { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
+        0xf1330b13,0x1c3b5d64 } },
+    /* 80 */
+    { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+        0xdb7e63af,0xfad27148 },
+      { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+        0x9f0e1a84,0x77387de3 } },
+    /* 81 */
+    { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+        0xbef0c47e,0xb37b85c0 },
+      { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+        0xf9f628d5,0x9c135ac8 } },
+    /* 82 */
+    { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
+        0xc433851f,0x5721361f },
+      { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
+        0xe6bb11bd,0xdcbac3c9 } },
+    /* 83 */
+    { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
+        0x2d626862,0xb8c1c89e },
+      { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
+        0x2f9422d4,0x5d23bbda } },
+    /* 84 */
+    { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+        0x91ece900,0xc109f9cb },
+      { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+        0x2eee1ee1,0x9bc3344f } },
+    /* 85 */
+    { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+        0x5f1a4cc1,0x29591d52 },
+      { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+        0x18ef332c,0x6376551f } },
+    /* 86 */
+    { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
+        0x08e2987a,0xbdb79dc8 },
+      { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
+        0xadd3c14a,0x8ee86001 } },
+    /* 87 */
+    { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
+        0x6f77aa4b,0x92e51d7a },
+      { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
+        0x0a56aaaa,0x5182f86f } },
+    /* 88 */
+    { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
+        0x4073a6f2,0x91dcab5d },
+      { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
+        0x97974f2b,0x17a0cedb } },
+    /* 89 */
+    { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
+        0x7f4cdf41,0x2e8ce36c },
+      { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
+        0x34f668f3,0xf4ccc6cb } },
+    /* 90 */
+    { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
+        0x9a0df3c9,0xac0db488 },
+      { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
+        0x94c974a2,0x95a64a61 } },
+    /* 91 */
+    { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
+        0x29210677,0x231e54ba },
+      { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
+        0xd8a731e1,0xab0be032 } },
+    /* 92 */
+    { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
+        0x2cf6a679,0xf1bcc880 },
+      { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
+        0x5aebb271,0x85169469 } },
+    /* 93 */
+    { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
+        0xdaad55d8,0x8f67d9d2 },
+      { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
+        0xc0728b5d,0xf84572b9 } },
+    /* 94 */
+    { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
+        0x616b2c19,0xedee2710 },
+      { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
+        0x44ebd7f4,0x9fd27e9b } },
+    /* 95 */
+    { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
+        0x958ff387,0xa40c2fb6 },
+      { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
+        0x7dc6decf,0x99bc9bb8 } },
+    /* 96 */
+    { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
+        0xa16d7e64,0x9abe210b },
+      { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
+        0x87f344b0,0x7881c257 } },
+    /* 97 */
+    { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
+        0xa30e8940,0x15e6e319 },
+      { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
+        0x191172ce,0x0e55facf } },
+    /* 98 */
+    { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
+        0x6fe96577,0xd73d0976 },
+      { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
+        0x8f15a50b,0x9250a374 } },
+    /* 99 */
+    { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
+        0xc1cc8c0b,0x77414082 },
+      { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
+        0x12eb20b9,0x8cb04f4d } },
+    /* 100 */
+    { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
+        0x47123b51,0xe4e429ef },
+      { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
+        0x3c6e6552,0x37bca2ff } },
+    /* 101 */
+    { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
+        0x3002b22a,0x59913edc },
+      { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
+        0xb013e226,0x43786e4a } },
+    /* 102 */
+    { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
+        0xb7e79e7a,0x8638ca98 },
+      { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
+        0x7b3aa6f0,0x1ecdd36a } },
+    /* 103 */
+    { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
+        0xd459f32d,0xd85d0f85 },
+      { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
+        0xb4ed3c62,0xa04f19c3 } },
+    /* 104 */
+    { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
+        0x5c0950b0,0x92b2eeea },
+      { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
+        0x5834276c,0x1ee78221 } },
+    /* 105 */
+    { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
+        0x57a6e150,0xf3f2ced8 },
+      { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
+        0x3da3e210,0x0f56a454 } },
+    /* 106 */
+    { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
+        0x1969e263,0xbd8f1741 },
+      { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
+        0x30ccfa09,0x2d1a1c35 } },
+    /* 107 */
+    { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
+        0xb91fba46,0xa107a65e },
+      { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
+        0xf87a9af2,0x183d760a } },
+    /* 108 */
+    { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
+        0xc269d754,0x1d44179d },
+      { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
+        0x9606d262,0x771f9cc2 } },
+    /* 109 */
+    { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
+        0x0362718e,0x64427a31 },
+      { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
+        0x6ae90d6d,0x49d9b749 } },
+    /* 110 */
+    { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
+        0x3f605445,0x9037d81b },
+      { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
+        0x7cc0639c,0x08c3de6a } },
+    /* 111 */
+    { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
+        0x45796b2f,0xc6909442 },
+      { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
+        0xcafe3ac0,0x3fa3db02 } },
+    /* 112 */
+    { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
+        0xfdb808ff,0xc5c4bdb0 },
+      { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
+        0x46c2b6b5,0x2d56db94 } },
+    /* 113 */
+    { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
+        0xe503ba42,0x0f56bd9d },
+      { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
+        0x1173b5f1,0x4003bb9d } },
+    /* 114 */
+    { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
+        0xa07f2f9e,0x53765522 },
+      { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
+        0x6c5d4549,0x7a056f58 } },
+    /* 115 */
+    { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
+        0x7a1a2675,0x77d482f1 },
+      { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
+        0x2b38b0e4,0x4115012b } },
+    /* 116 */
+    { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
+        0xfbea0946,0xcdf04572 },
+      { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
+        0x97383109,0xee703dda } },
+    /* 117 */
+    { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
+        0xa162ce21,0x2a0ad89d },
+      { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
+        0xac2b4659,0xd62d0b67 } },
+    /* 118 */
+    { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
+        0x991c2426,0xb39a23f2 },
+      { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
+        0xc0674cc5,0x04ed0092 } },
+    /* 119 */
+    { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
+        0x0177c387,0xa0a91fc1 },
+      { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
+        0x9ed20c41,0x084cf988 } },
+    /* 120 */
+    { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
+        0x73abf77e,0xd57955b2 },
+      { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
+        0x02d141f1,0x8e14ea42 } },
+    /* 121 */
+    { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
+        0x2aa4d158,0x597e1a37 },
+      { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
+        0x199b4dea,0xca3f0236 } },
+    /* 122 */
+    { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
+        0x309c07e4,0xbde7fd7e },
+      { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
+        0x0a7dd198,0xb623ad0e } },
+    /* 123 */
+    { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
+        0x58ec137b,0xd6aa2e46 },
+      { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
+        0x2dcc513a,0x111662e0 } },
+    /* 124 */
+    { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
+        0x94b750f8,0xdb3ee1cb },
+      { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
+        0x52206a59,0x886a6442 } },
+    /* 125 */
+    { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
+        0x018a17bc,0xa70cf4eb },
+      { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
+        0xd1747b77,0xaa4772ab } },
+    /* 126 */
+    { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
+        0x30faf974,0x611a6ddc },
+      { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
+        0x16429c88,0x5cfffaf8 } },
+    /* 127 */
+    { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
+        0x7dc1994c,0x6e5a6b23 },
+      { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
+        0x242dabcc,0x481a238d } },
+    /* 128 */
+    { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
+        0xe0cdf943,0x2c41114c },
+      { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
+        0x42ff9297,0x20477abf } },
+    /* 129 */
+    { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
+        0xc77396b6,0xac66409a },
+      { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
+        0xcc122f85,0xce8e6975 } },
+    /* 130 */
+    { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
+        0x250bb4a8,0x08fde365 },
+      { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
+        0x565d6cd7,0x2f7e2fd2 } },
+    /* 131 */
+    { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
+        0x907702ae,0xc65be92e },
+      { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
+        0xd1193b3a,0x4bff8e47 } },
+    /* 132 */
+    { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
+        0x5772967d,0x3e4e4ae6 },
+      { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
+        0x58ec6028,0x5388aefd } },
+    /* 133 */
+    { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
+        0x4f75be0e,0x5cf908d1 },
+      { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
+        0x60f00ce2,0xa698ba40 } },
+    /* 134 */
+    { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
+        0x7aebad8d,0xb142ef8a },
+      { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
+        0x58515075,0xd1896a96 } },
+    /* 135 */
+    { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
+        0x7981da39,0x267b0e0b },
+      { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
+        0xa1119393,0xb54e287a } },
+    /* 136 */
+    { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
+        0x5f87d4e6,0x84abb28b },
+      { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
+        0x17655640,0xe5436f67 } },
+    /* 137 */
+    { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
+        0x5b9ce99e,0x0404f68b },
+      { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
+        0x0ac1c701,0x3a4263df } },
+    /* 138 */
+    { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
+        0x905ea367,0x0ca8fd3f },
+      { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
+        0x4ddb0c33,0x96dca264 } },
+    /* 139 */
+    { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
+        0x3aad59dc,0x4363e212 },
+      { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
+        0xd8bb98c4,0x840e115c } },
+    /* 140 */
+    { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
+        0x30ded6d4,0x5e0d6abd },
+      { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
+        0x2945a25a,0x7dea48f4 } },
+    /* 141 */
+    { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
+        0xebfd16d1,0xabc2a2be },
+      { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
+        0x6c7eefc1,0x4ea35394 } },
+    /* 142 */
+    { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
+        0x1c94ffc3,0x3a76e689 },
+      { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
+        0x465e6464,0x8212a10a } },
+    /* 143 */
+    { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
+        0x599cb164,0xaa7cab71 },
+      { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
+        0xfe0617c3,0x40e38073 } },
+    /* 144 */
+    { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
+        0xb3055526,0xe3604700 },
+      { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
+        0xa3dee15f,0x6542d677 } },
+    /* 145 */
+    { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
+        0x09bb6f21,0xa6534aee },
+      { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
+        0xdc9aef22,0xf3cb672f } },
+    /* 146 */
+    { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
+        0xaae870e7,0x7cafaa2e },
+      { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
+        0xb9bd522e,0x0aab13c1 } },
+    /* 147 */
+    { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
+        0x847012e9,0x4b91a602 },
+      { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
+        0x72321cab,0x49534c53 } },
+    /* 148 */
+    { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
+        0xd65ac5ee,0xcaf46c4f },
+      { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
+        0x04c6770f,0x14ce9e57 } },
+    /* 149 */
+    { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
+        0x3e4c9a71,0x1bb708a5 },
+      { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
+        0xda300102,0xf9d126f2 } },
+    /* 150 */
+    { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
+        0x729ecc69,0x807afcb9 },
+      { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
+        0x6568cd8c,0x751adcd1 } },
+    /* 151 */
+    { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
+        0x2537743f,0x29ec4468 },
+      { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
+        0x92a4077d,0xff9370e3 } },
+    /* 152 */
+    { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
+        0xa2a9d01a,0x9776478b },
+      { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
+        0xac2f82fa,0x74a6313f } },
+    /* 153 */
+    { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
+        0x0ff4863d,0xab75be15 },
+      { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
+        0x0b4459f6,0x4ebeac2e } },
+    /* 154 */
+    { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
+        0x2c1baffc,0xdf99887b },
+      { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
+        0x779f4058,0x27b040a7 } },
+    /* 155 */
+    { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
+        0xe4cfa3f5,0xb393dd37 },
+      { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
+        0xd0463419,0x09588c12 } },
+    /* 156 */
+    { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
+        0xdb9f648b,0x81c879a9 },
+      { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
+        0x5fc11bc4,0xfa0d48f5 } },
+    /* 157 */
+    { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
+        0xb6a367d6,0x8ea0e156 },
+      { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
+        0xfa00b5ac,0x3f5ab924 } },
+    /* 158 */
+    { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
+        0x2b74256e,0x8bc76887 },
+      { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
+        0x60fcf34f,0xb386f190 } },
+    /* 159 */
+    { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
+        0x1b069c4d,0x4cb460f7 },
+      { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
+        0x95ef5223,0x52c0d508 } },
+    /* 160 */
+    { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
+        0x2bb09c0b,0x4ac3c938 },
+      { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
+        0xe39705f4,0x380d94c7 } },
+    /* 161 */
+    { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
+        0xde2637af,0x2ce3e171 },
+      { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
+        0x0b624e4d,0x2e6cd852 } },
+    /* 162 */
+    { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
+        0x42c69d54,0xca177547 },
+      { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
+        0x9cab2ce6,0xa976a713 } },
+    /* 163 */
+    { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
+        0x0a1f4999,0x8720a717 },
+      { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
+        0xc769893c,0x9719ef29 } },
+    /* 164 */
+    { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
+        0xe15704c1,0xa5072976 },
+      { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
+        0xf7b77725,0x99389c9d } },
+    /* 165 */
+    { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
+        0x202c82e4,0xa88806aa },
+      { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
+        0x4738dcfe,0x0043bffb } },
+    /* 166 */
+    { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
+        0xba6c4866,0x52f3ef01 },
+      { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
+        0x9ef27e75,0x3296bd89 } },
+    /* 167 */
+    { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
+        0xaee571e9,0x3b90febf },
+      { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
+        0x9f810b18,0x6e88069d } },
+    /* 168 */
+    { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
+        0xdefaad13,0xa7222bea },
+      { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
+        0xbc2ac690,0xbe94d523 } },
+    /* 169 */
+    { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
+        0x9be8c766,0x7782defe },
+      { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
+        0xa2892e4b,0x03838567 } },
+    /* 170 */
+    { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
+        0xadf7b420,0xdbd986c4 },
+      { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
+        0x6860bbd0,0x8e24d3c4 } },
+    /* 171 */
+    { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
+        0x407bafc8,0x541a99c4 },
+      { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
+        0xf57d35d1,0xc0092c49 } },
+    /* 172 */
+    { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
+        0x7286944d,0x75e40634 },
+      { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
+        0xc7848586,0x5b7cb658 } },
+    /* 173 */
+    { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
+        0x8df097a1,0x7ae13eba },
+      { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
+        0xe2a8e3fd,0x787d8074 } },
+    /* 174 */
+    { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
+        0x9ef28484,0x5c222819 },
+      { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
+        0xbaf0f2b0,0xe45d37ab } },
+    /* 175 */
+    { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
+        0x84dfb9d3,0xed7bc122 },
+      { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
+        0x45ca6d27,0xaac97cc9 } },
+    /* 176 */
+    { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
+        0x1163dc4e,0x318f97b3 },
+      { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
+        0x9a84ff4d,0xfa41faa1 } },
+    /* 177 */
+    { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
+        0x1d26e9e2,0x38bb6b2c },
+      { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
+        0xce7601a5,0x94dd0905 } },
+    /* 178 */
+    { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
+        0xd25c2ae9,0x92077867 },
+      { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
+        0xd29beb51,0x81e8428b } },
+    /* 179 */
+    { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
+        0xdbbfa4b1,0x1b94ab62 },
+      { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
+        0x055590ee,0x06a38e28 } },
+    /* 180 */
+    { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
+        0x83d9d4f8,0xa7b36c20 },
+      { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
+        0xa2822a20,0xbe54c6b4 } },
+    /* 181 */
+    { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
+        0xeae022bb,0xbf30a5ab },
+      { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
+        0x2732d13a,0xd1c820de } },
+    /* 182 */
+    { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
+        0x68a18da3,0xb7d17bed },
+      { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
+        0x6412cc64,0x3997fd5e } },
+    /* 183 */
+    { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
+        0x3c6c13e8,0x0eeb8929 },
+      { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
+        0xc922b6ef,0x228916f8 } },
+    /* 184 */
+    { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
+        0x6e93097e,0xec05ad1d },
+      { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
+        0x7ff11b37,0x7d314156 } },
+    /* 185 */
+    { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
+        0x9bc1d7a3,0xe9ce66fc },
+      { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
+        0x72280651,0xd9650b01 } },
+    /* 186 */
+    { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
+        0x804eb7a2,0x14d6699a },
+      { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
+        0x0d43598a,0x6f4c6841 } },
+    /* 187 */
+    { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
+        0x61189abb,0x4c4350fd },
+      { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
+        0x5a3118b5,0xa726d242 } },
+    /* 188 */
+    { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
+        0xcc6cf392,0x13639e82 },
+      { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
+        0xc1a335a3,0xca9365e1 } },
+    /* 189 */
+    { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
+        0x970b72a5,0x9ce29c34 },
+      { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
+        0xab42af98,0x48c4abd7 } },
+    /* 190 */
+    { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
+        0xf67b33cb,0x78017c32 },
+      { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
+        0xde5c1c04,0x53cd0454 } },
+    /* 191 */
+    { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
+        0xd3d7fa8f,0xeea465c1 },
+      { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
+        0x7ae69193,0x1b6e42a4 } },
+    /* 192 */
+    { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
+        0x187fbd3d,0x0224da14 },
+      { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
+        0x42bfff33,0x60838ef0 } },
+    /* 193 */
+    { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
+        0x2d331643,0x636eb202 },
+      { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
+        0x39218bac,0x8844eeb6 } },
+    /* 194 */
+    { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
+        0x51fb789e,0x27ba83dc },
+      { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
+        0x87f3a4ab,0xadb62d34 } },
+    /* 195 */
+    { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
+        0x75e7c8b2,0xb990fd76 },
+      { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
+        0x4d10d18d,0x81707ef9 } },
+    /* 196 */
+    { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
+        0xd5a8aa5c,0x3792daea },
+      { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
+        0x94b001ba,0x5abd635e } },
+    /* 197 */
+    { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
+        0x846ab610,0x5995bf21 },
+      { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
+        0xd483411e,0x44c32ca2 } },
+    /* 198 */
+    { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
+        0x8082a54c,0x1f2162fb },
+      { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
+        0xc3e907c9,0x8f1d402b } },
+    /* 199 */
+    { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
+        0x926edbf9,0xb1980f43 },
+      { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
+        0x37448e45,0x2828ad9b } },
+    /* 200 */
+    { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
+        0x5a14b390,0x4973f127 },
+      { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
+        0xdb168ac7,0x6dac8ed0 } },
+    /* 201 */
+    { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
+        0x20b9de4c,0x4b23ef59 },
+      { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
+        0xddf49a4e,0x4dd71534 } },
+    /* 202 */
+    { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
+        0x2f4a4dbb,0xfd317000 },
+      { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
+        0x9569f365,0x14fac58c } },
+    /* 203 */
+    { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
+        0x36abda50,0xed7c7651 },
+      { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
+        0x4d2e9f53,0xfefcb7f7 } },
+    /* 204 */
+    { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
+        0x87e0d80b,0x1801a57e },
+      { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
+        0x1ead1064,0x9f8fc11e } },
+    /* 205 */
+    { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
+        0x3d3a69a9,0xa9d3809d },
+      { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
+        0xe1178ef7,0x3006b9ae } },
+    /* 206 */
+    { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
+        0x45f8f761,0x0ab85fd7 },
+      { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
+        0x11e942c2,0xb122d675 } },
+    /* 207 */
+    { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
+        0x097dbaec,0x9f599dc1 },
+      { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
+        0x8a294b78,0x7d5528e0 } },
+    /* 208 */
+    { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
+        0x303f1730,0x28ccea01 },
+      { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
+        0xa1d013bf,0xc18baf48 } },
+    /* 209 */
+    { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
+        0xb7a9596b,0x9def809d },
+      { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
+        0x68808ce5,0x0357f8b0 } },
+    /* 210 */
+    { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
+        0x1b489887,0xe4a01add },
+      { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
+        0xce10cc30,0x466d7d79 } },
+    /* 211 */
+    { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
+        0x451ead1a,0xc672a522 },
+      { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
+        0xf2a67513,0x5e3d64fa } },
+    /* 212 */
+    { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
+        0xeb8e42fc,0x6c8a7a95 },
+      { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
+        0xad82ca91,0x348ae422 } },
+    /* 213 */
+    { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
+        0xd9ef2d2e,0xc1074de0 },
+      { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
+        0xc9e54ffc,0xfbadfbdb } },
+    /* 214 */
+    { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
+        0x83716fcd,0xb7f976b4 },
+      { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
+        0xcafcc805,0xf4d41b2e } },
+    /* 215 */
+    { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
+        0xe0160f10,0x180824ea },
+      { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
+        0x83cf6d25,0x67e5f639 } },
+    /* 216 */
+    { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
+        0x04c11fc6,0x9fef789a },
+      { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
+        0xa99c4e20,0xbc80c181 } },
+    /* 217 */
+    { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
+        0x9f8cdf10,0x49270e62 },
+      { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
+        0x61372f7f,0xd2ee52f9 } },
+    /* 218 */
+    { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
+        0xe5abb733,0xdfb478be },
+      { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
+        0x08df473a,0xd9a140b4 } },
+    /* 219 */
+    { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
+        0x623f4b1a,0x760c058d },
+      { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
+        0x8f190409,0x7141982d } },
+    /* 220 */
+    { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
+        0x89d54e47,0x3af9d1ce },
+      { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
+        0x73957dd6,0xb1f815c3 } },
+    /* 221 */
+    { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
+        0x1543f052,0xa41aed14 },
+      { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
+        0x86fb60ef,0xd6e9c1dd } },
+    /* 222 */
+    { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
+        0xae9bf8c2,0x9c9c6e10 },
+      { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
+        0x40fa61b6,0x566bd596 } },
+    /* 223 */
+    { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
+        0xf525345e,0xcf2c7390 },
+      { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
+        0x8aa20979,0x02f51755 } },
+    /* 224 */
+    { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
+        0xe8d4d97d,0x14e9ada5 },
+      { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
+        0x8e9d9ae8,0xa0ad4fab } },
+    /* 225 */
+    { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
+        0x6e56ed1e,0xbcd530b8 },
+      { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
+        0x6979341d,0x909283cf } },
+    /* 226 */
+    { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
+        0xace1549a,0x35eeb7c9 },
+      { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
+        0x448ae864,0x9a8b2cf4 } },
+    /* 227 */
+    { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
+        0xd4491379,0x6bdb60f4 },
+      { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
+        0x94ba08a9,0x01ec3cfd } },
+    /* 228 */
+    { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
+        0x475464f6,0xd1acb1c0 },
+      { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
+        0x405626c2,0x7dcd079d } },
+    /* 229 */
+    { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
+        0x377d19b8,0x0bf53589 },
+      { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
+        0xe16686fc,0xd28be4d9 } },
+    /* 230 */
+    { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
+        0x510f88ce,0xd76007aa },
+      { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
+        0xb303bb01,0xf2b52f68 } },
+    /* 231 */
+    { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
+        0xcc5aed3a,0xd8dbe98e },
+      { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
+        0xee559705,0xe01593a3 } },
+    /* 232 */
+    { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
+        0xaeb8ef06,0xafec07b1 },
+      { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
+        0x6e2dbfdd,0xa71b9354 } },
+    /* 233 */
+    { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
+        0x628523d9,0x53a2005c },
+      { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
+        0x3d588e3d,0xbf47d19b } },
+    /* 234 */
+    { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
+        0x39c9a1b6,0x001c2c7f },
+      { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
+        0x86ffb99b,0xfdadf8e7 } },
+    /* 235 */
+    { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
+        0x5aa43c94,0x3a838e4d },
+      { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
+        0x873e1da3,0x3cdb8257 } },
+    /* 236 */
+    { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
+        0xf1f57fba,0x5a60cc89 },
+      { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
+        0xdbfd8fc0,0x922ff56f } },
+    /* 237 */
+    { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
+        0xf6c5cd62,0x72919a7d },
+      { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
+        0x3624089a,0x5e791780 } },
+    /* 238 */
+    { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
+        0xe24c2fab,0x4e0a5371 },
+      { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
+        0xd56604ee,0xf5ff7818 } },
+    /* 239 */
+    { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
+        0x533f5e64,0xe41df0e9 },
+      { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
+        0xac4f155f,0x8edd7d6e } },
+    /* 240 */
+    { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
+        0xed8aee96,0x1432c1ca },
+      { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
+        0x5ac8d2c6,0xcaef480b } },
+    /* 241 */
+    { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
+        0x8efae236,0xd0ba177e },
+      { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
+        0x1c54ae16,0xf31c957c } },
+    /* 242 */
+    { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
+        0x96e17c3a,0x013404cb },
+      { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
+        0x91933e6c,0x6f377c4b } },
+    /* 243 */
+    { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
+        0xd2d09506,0x6dba3e4e },
+      { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
+        0x3becf4a7,0xf13cf342 } },
+    /* 244 */
+    { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
+        0x274bbad3,0xc83fa9a9 },
+      { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
+        0x5d702683,0xb49d70f4 } },
+    /* 245 */
+    { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
+        0x0c30f1cf,0x59cfadbb },
+      { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
+        0x354a4b67,0x5babf362 } },
+    /* 246 */
+    { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
+        0x9026c8f0,0x6188c6a7 },
+      { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
+        0xdf50b9d9,0x993fe475 } },
+    /* 247 */
+    { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
+        0x4c80616b,0x81f76466 },
+      { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
+        0x5fe9060d,0x564a812a } },
+    /* 248 */
+    { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
+        0x00e51d6c,0x226bf3cf },
+      { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
+        0xff257836,0x68779f47 } },
+    /* 249 */
+    { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
+        0xeb092e0b,0x97bcb0d1 },
+      { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
+        0x0a784655,0xa872ffe8 } },
+    /* 250 */
+    { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
+        0xb732a36a,0x02812bfc },
+      { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
+        0xfe5396af,0x07391cc9 } },
+    /* 251 */
+    { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
+        0x7e6d2a08,0x355d2adc },
+      { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
+        0x7c2a3a79,0x3dc2b1e3 } },
+    /* 252 */
+    { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
+        0x3ccd846b,0xc4786910 },
+      { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
+        0xd5bb4d32,0xccc42968 } },
+    /* 253 */
+    { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
+        0xaa4871cf,0xe147eb42 },
+      { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
+        0x080e96e3,0x239ac047 } },
+    /* 254 */
+    { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
+        0xf5f7e59d,0xc55fa1a3 },
+      { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
+        0xd4f4b699,0x094cd99c } },
+    /* 255 */
+    { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
+        0x42abad33,0xb90a30b6 },
+      { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
+        0x1b7924f7,0x019f8b9a } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#endif
+    sp_point_256* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(k, 8, km);
+
+            err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_to_ecc_point_8(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_8(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r2, #1\n\t"
+        "ldr	r1, [%[a], #0]\n\t"
+        "adds	r1, r1, r2\n\t"
+        "mov	r2, #0\n\t"
+        "str	r1, [%[a], #0]\n\t"
+        "ldr	r1, [%[a], #4]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #4]\n\t"
+        "ldr	r1, [%[a], #8]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #8]\n\t"
+        "ldr	r1, [%[a], #12]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #12]\n\t"
+        "ldr	r1, [%[a], #16]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #16]\n\t"
+        "ldr	r1, [%[a], #20]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #20]\n\t"
+        "ldr	r1, [%[a], #24]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #24]\n\t"
+        "ldr	r1, [%[a], #28]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #28]\n\t"
+        :
+        : [a] "r" (a)
+        : "memory", "r1", "r2"
+    );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[32];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
+            if (sp_256_cmp_8(k, p256_order2) < 0) {
+                sp_256_add_one_8(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_256 inf;
+#endif
+#endif
+    sp_point_256* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_256* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_256_ecc_gen_k_8(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_point_to_ecc_point_8(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_256_point_free_8(infinity, 1, heap);
+#endif
+    sp_256_point_free_8(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 256 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<8 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 p;
+    sp_digit kd[8];
+#endif
+    sp_point_256* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 32U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(k, 8, priv);
+        sp_256_point_from_ecc_point_8(point, pub);
+            err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_256_to_bin(point->x, out);
+        *outLen = 32;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #32\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #32\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<8; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[16], t2[9];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[7];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
+    for (i=7; i>=0; i--) {
+        r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
+
+        sp_256_mul_d_8(t2, d, r1);
+        t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
+        t1[8 + i] -= t2[8];
+        sp_256_mask_8(t2, d, t1[8 + i]);
+        t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+        sp_256_mask_8(t2, d, t1[8 + i]);
+        t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_256_cmp_8(t1, d) >= 0;
+    sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_256_div_8(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_minus_2[8] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+    0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_256_mul_8(r, a, b);
+    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
+{
+    sp_256_sqr_8(r, a);
+    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_256_mont_sqr_order_8(r, a);
+    for (i=1; i<n; i++) {
+        sp_256_mont_sqr_order_8(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 8);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_order_8(t, t);
+        if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 8U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 8;
+    sp_digit* t3 = td + 4 * 8;
+    int i;
+
+    /* t = a^2 */
+    sp_256_mont_sqr_order_8(t, a);
+    /* t = a^3 = t * a */
+    sp_256_mont_mul_order_8(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_256_mont_sqr_n_order_8(t2, t, 2);
+    /* t3= a^f = t2 * t */
+    sp_256_mont_mul_order_8(t3, t2, t);
+    /* t2= a^f0 = t3 ^ 2 ^ 4 */
+    sp_256_mont_sqr_n_order_8(t2, t3, 4);
+    /* t = a^ff = t2 * t3 */
+    sp_256_mont_mul_order_8(t, t2, t3);
+    /* t3= a^ff00 = t ^ 2 ^ 8 */
+    sp_256_mont_sqr_n_order_8(t2, t, 8);
+    /* t = a^ffff = t2 * t */
+    sp_256_mont_mul_order_8(t, t2, t);
+    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+    sp_256_mont_sqr_n_order_8(t2, t, 16);
+    /* t = a^ffffffff = t2 * t */
+    sp_256_mont_mul_order_8(t, t2, t);
+    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
+    sp_256_mont_sqr_n_order_8(t2, t, 64);
+    /* t2= a^ffffffff00000000ffffffff = t2 * t */
+    sp_256_mont_mul_order_8(t2, t2, t);
+    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
+    sp_256_mont_sqr_n_order_8(t2, t2, 32);
+    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+    sp_256_mont_mul_order_8(t2, t2, t);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+    for (i=127; i>=112; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    sp_256_mont_mul_order_8(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+    for (i=107; i>=64; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    sp_256_mont_mul_order_8(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+    for (i=59; i>=32; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    sp_256_mont_mul_order_8(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+    for (i=27; i>=0; i--) {
+        sp_256_mont_sqr_order_8(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_8(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+    sp_256_mont_sqr_n_order_8(t2, t2, 4);
+    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+    sp_256_mont_mul_order_8(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 256 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*8];
+    sp_digit xd[2*8];
+    sp_digit kd[2*8];
+    sp_digit rd[2*8];
+    sp_digit td[3 * 2*8];
+    sp_point_256 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int32_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 8;
+        x = d + 2 * 8;
+        k = d + 4 * 8;
+        r = d + 6 * 8;
+        tmp = d + 8 * 8;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(e, 8, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 8, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_256_ecc_gen_k_8(rng, k);
+        }
+        else {
+            sp_256_from_mp(k, 8, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
+            sp_256_norm_8(r);
+            c = sp_256_cmp_8(r, p256_order);
+            sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+            sp_256_norm_8(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_256_mul_8(k, k, p256_norm_order);
+            err = sp_256_mod_8(k, k, p256_order);
+        }
+        if (err == MP_OKAY) {
+            sp_256_norm_8(k);
+            /* kInv = 1/k mod order */
+                sp_256_mont_inv_order_8(kInv, k, tmp);
+            sp_256_norm_8(kInv);
+
+            /* s = r * x + e */
+                sp_256_mul_8(x, x, r);
+            err = sp_256_mod_8(x, x, p256_order);
+        }
+        if (err == MP_OKAY) {
+            sp_256_norm_8(x);
+            carry = sp_256_add_8(s, e, x);
+            sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
+            sp_256_norm_8(s);
+            c = sp_256_cmp_8(s, p256_order);
+            sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+            sp_256_norm_8(s);
+
+            /* s = s * k^-1 mod order */
+                sp_256_mont_mul_order_8(s, s, kInv);
+            sp_256_norm_8(s);
+
+            /* Check that signature is usable. */
+            if (sp_256_iszero_8(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
+#endif
+    sp_256_point_free_8(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 256)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*8];
+    sp_digit u2d[2*8];
+    sp_digit sd[2*8];
+    sp_digit tmpd[2*8 * 5];
+    sp_point_256 p1d;
+    sp_point_256 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_256* p1;
+    sp_point_256* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+
+    err = sp_256_point_new_8(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 8;
+        u2  = d + 2 * 8;
+        s   = d + 4 * 8;
+        tmp = d + 6 * 8;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 32U) {
+            hashLen = 32U;
+        }
+
+        sp_256_from_bin(u1, 8, hash, (int)hashLen);
+        sp_256_from_mp(u2, 8, r);
+        sp_256_from_mp(s, 8, sm);
+        sp_256_from_mp(p2->x, 8, pX);
+        sp_256_from_mp(p2->y, 8, pY);
+        sp_256_from_mp(p2->z, 8, pZ);
+
+        {
+            sp_256_mul_8(s, s, p256_norm_order);
+        }
+        err = sp_256_mod_8(s, s, p256_order);
+    }
+    if (err == MP_OKAY) {
+        sp_256_norm_8(s);
+        {
+            sp_256_mont_inv_order_8(s, s, tmp);
+            sp_256_mont_mul_order_8(u1, u1, s);
+            sp_256_mont_mul_order_8(u2, u2, s);
+        }
+
+            err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_256_proj_point_add_8(p1, p1, p2, tmp);
+            if (sp_256_iszero_8(p1->z)) {
+                if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
+                    sp_256_proj_point_dbl_8(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_256_from_mp(u2, 8, r);
+        err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
+        *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_256_from_mp(u2, 8, r);
+            carry = sp_256_add_8(u2, u2, p256_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_256_norm_8(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_256_cmp_8(u2, p256_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
+                                                                  p256_mp_mod);
+                        *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_256_point_free_8(p1, 0, heap);
+    sp_256_point_free_8(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*8];
+    sp_digit t2d[2*8];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 8;
+        t2 = d + 2 * 8;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_256_sqr_8(t1, point->y);
+        (void)sp_256_mod_8(t1, t1, p256_mod);
+        sp_256_sqr_8(t2, point->x);
+        (void)sp_256_mod_8(t2, t2, p256_mod);
+        sp_256_mul_8(t2, t2, point->x);
+        (void)sp_256_mod_8(t2, t2, p256_mod);
+        (void)sp_256_sub_8(t2, p256_mod, t2);
+        sp_256_mont_add_8(t1, t1, t2, p256_mod);
+
+        sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+        sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+        sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+
+        if (sp_256_cmp_8(t1, p256_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_256 pubd;
+#endif
+    sp_point_256* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_256_point_new_8(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_256_from_mp(pub->x, 8, pX);
+        sp_256_from_mp(pub->y, 8, pY);
+        sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+
+        err = sp_256_ecc_is_point_8(pub, NULL);
+    }
+
+    sp_256_point_free_8(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[8];
+    sp_point_256 pubd;
+    sp_point_256 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_256* pub;
+    sp_point_256* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_256_point_new_8(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_256_from_mp(pub->x, 8, pX);
+        sp_256_from_mp(pub->y, 8, pY);
+        sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+        sp_256_from_mp(priv, 8, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_256_iszero_8(pub->x) != 0) &&
+            (sp_256_iszero_8(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
+            sp_256_cmp_8(pub->y, p256_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_256_ecc_is_point_8(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_256_iszero_8(p->x) == 0) ||
+            (sp_256_iszero_8(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_256_cmp_8(p->x, pub->x) != 0 ||
+            sp_256_cmp_8(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, heap);
+    sp_256_point_free_8(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 8 * 5];
+    sp_point_256 pd;
+    sp_point_256 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_256* p;
+    sp_point_256* q = NULL;
+    int err;
+
+    err = sp_256_point_new_8(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_256_point_new_8(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 8, pX);
+        sp_256_from_mp(p->y, 8, pY);
+        sp_256_from_mp(p->z, 8, pZ);
+        sp_256_from_mp(q->x, 8, qX);
+        sp_256_from_mp(q->y, 8, qY);
+        sp_256_from_mp(q->z, 8, qZ);
+
+            sp_256_proj_point_add_8(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(q, 0, NULL);
+    sp_256_point_free_8(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 8 * 2];
+    sp_point_256 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_256* p;
+    int err;
+
+    err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 8, pX);
+        sp_256_from_mp(p->y, 8, pY);
+        sp_256_from_mp(p->z, 8, pZ);
+
+            sp_256_proj_point_dbl_8(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 8 * 4];
+    sp_point_256 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_256* p;
+    int err;
+
+    err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 8, pX);
+        sp_256_from_mp(p->y, 8, pY);
+        sp_256_from_mp(p->z, 8, pZ);
+
+        sp_256_map_8(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_256_point_free_8(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_8(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 8];
+    sp_digit t2d[2 * 8];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 8;
+        t2 = d + 2 * 8;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xc */
+            sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xf0 */
+            sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xff */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xff00 */
+            sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffff */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xffff0000 */
+            sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff */
+            sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000000 */
+            sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001 */
+            sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+            sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+            sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+            sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 8];
+    sp_digit yd[2 * 8];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 8;
+        y = d + 2 * 8;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_256_from_mp(x, 8, xm);
+        err = sp_256_mod_mul_norm_8(x, x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        sp_256_mont_sub_8(y, y, x, p256_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_256_mont_add_8(y, y, x, p256_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_256_mont_sqrt_8(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
+        sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
+        }
+
+        err = sp_256_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+    sp_digit x[2 * 12];
+    sp_digit y[2 * 12];
+    sp_digit z[2 * 12];
+    int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[12] = {
+    0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[12] = {
+    0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
+    0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[12] = {
+    0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[12] = {
+    0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+    0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[12] = {
+    0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
+    0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0xe88fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+    /* X ordinate */
+    {
+        0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
+        0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
+        0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[12] = {
+    0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
+    0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
+};
+#endif
+
+static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    (void)sp;
+    *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (*p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    int64_t* t;
+#else
+    int64_t t[12];
+#endif
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
+        t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
+        /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
+        t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
+        /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
+        t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
+        /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
+        t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
+        /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
+        t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] -  2 * (uint64_t)a[11];
+        /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
+        t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
+        /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
+        t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
+        /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
+        t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
+        /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
+        t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
+        /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
+        t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
+        /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
+        t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
+        /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
+        t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
+
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+        o     = t[11] >> 32; t[11] &= 0xffffffff;
+        t[0] += o;
+        t[1] -= o;
+        t[3] += o;
+        t[4] += o;
+        t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+        t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+        t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+        t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+        t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+        t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+        t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+        t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+        t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+        t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+        t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+        r[0] = t[0];
+        r[1] = t[1];
+        r[2] = t[2];
+        r[3] = t[3];
+        r[4] = t[4];
+        r[5] = t[5];
+        r[6] = t[6];
+        r[7] = t[7];
+        r[8] = t[8];
+        r[9] = t[9];
+        r[10] = t[10];
+        r[11] = t[11];
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+    return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 32
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i] << s);
+        r[j] &= 0xffffffff;
+        s = 32U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 32U) <= (word32)DIGIT_BIT) {
+            s += 32U;
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((sp_digit)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 32) {
+            r[j] &= 0xffffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 32 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p   Point of type sp_point_384 (result).
+ * pm  Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
+{
+    XMEMSET(p->x, 0, sizeof(p->x));
+    XMEMSET(p->y, 0, sizeof(p->y));
+    XMEMSET(p->z, 0, sizeof(p->z));
+    sp_384_from_mp(p->x, 12, pm->x);
+    sp_384_from_mp(p->y, 12, pm->y);
+    sp_384_from_mp(p->z, 12, pm->z);
+    p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a  A single precision integer.
+ * r  A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+    int err;
+
+    err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+        XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
+        r->used = 12;
+        mp_clamp(r);
+#elif DIGIT_BIT < 32
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 12; i++) {
+            r->dp[j] |= (mp_digit)(a[i] << s);
+            r->dp[j] &= (1L << DIGIT_BIT) - 1;
+            s = DIGIT_BIT - s;
+            r->dp[++j] = (mp_digit)(a[i] >> s);
+            while (s + DIGIT_BIT <= 32) {
+                s += DIGIT_BIT;
+                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+                if (s == SP_WORD_SIZE) {
+                    r->dp[j] = 0;
+                }
+                else {
+                    r->dp[j] = (mp_digit)(a[i] >> s);
+                }
+            }
+            s = 32 - s;
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#else
+        int i, j = 0, s = 0;
+
+        r->dp[0] = 0;
+        for (i = 0; i < 12; i++) {
+            r->dp[j] |= ((mp_digit)a[i]) << s;
+            if (s + 32 >= DIGIT_BIT) {
+    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+                r->dp[j] &= (1L << DIGIT_BIT) - 1;
+    #endif
+                s = DIGIT_BIT - s;
+                r->dp[++j] = a[i] >> s;
+                s = 32 - s;
+            }
+            else {
+                s += 32;
+            }
+        }
+        r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+        mp_clamp(r);
+#endif
+    }
+
+    return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p   Point of type sp_point_384.
+ * pm  Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
+{
+    int err;
+
+    err = sp_384_to_mp(p->x, pm->x);
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pm->y);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pm->z);
+    }
+
+    return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit tmp[12 * 2];
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r11, %[b]\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r6, r10\n\t"
+        "mov	r14, r6\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	%[b], r9\n\t"
+        "sub	%[b], %[b], %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	%[b], %[b], r11\n\t"
+        "\n2:\n\t"
+        /* Multiply Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [%[b]]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply Done */
+        "add	%[a], %[a], #4\n\t"
+        "sub	%[b], %[b], #4\n\t"
+        "cmp	%[a], r14\n\t"
+        "beq	3f\n\t"
+        "mov	r6, r9\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[b], r11\n\t"
+        :
+        : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #48\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "sbcs	r5, r5, r6\n\t"
+        "sbcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+#define sp_384_mont_reduce_order_12   sp_384_mont_reduce_12
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_digit ca = 0;
+
+    __asm__ __volatile__ (
+        "mov	r9, %[mp]\n\t"
+        "mov	r12, %[m]\n\t"
+        "mov	r10, %[a]\n\t"
+        "mov	r4, #0\n\t"
+        "add	r11, r10, #48\n\t"
+        "\n1:\n\t"
+        /* mu = a[i] * mp */
+        "mov	%[mp], r9\n\t"
+        "ldr	%[a], [r10]\n\t"
+        "mul	%[mp], %[mp], %[a]\n\t"
+        "mov	%[m], r12\n\t"
+        "add	r14, r10, #40\n\t"
+        "\n2:\n\t"
+        /* a[i+j] += m[j] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r4, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r4, r4, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r5, r5, %[a]\n\t"
+        "adc	r4, r4, #0\n\t"
+        "str	r5, [r10], #4\n\t"
+        "cmp	r10, r14\n\t"
+        "blt	2b\n\t"
+        /* a[i+10] += m[10] * mu */
+        "ldr	%[a], [r10]\n\t"
+        "mov	r5, #0\n\t"
+        /* Multiply m[j] and mu - Start */
+        "ldr	r8, [%[m]], #4\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	%[a], %[a], r6\n\t"
+        "adc	r5, r5, r8\n\t"
+        /* Multiply m[j] and mu - Done */
+        "adds	r4, r4, %[a]\n\t"
+        "adc	r5, r5, #0\n\t"
+        "str	r4, [r10], #4\n\t"
+        /* a[i+11] += m[11] * mu */
+        "mov	r4, %[ca]\n\t"
+        "mov	%[ca], #0\n\t"
+        /* Multiply m[11] and mu - Start */
+        "ldr	r8, [%[m]]\n\t"
+        "umull	r6, r8, %[mp], r8\n\t"
+        "adds	r5, r5, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        /* Multiply m[11] and mu - Done */
+        "ldr	r6, [r10]\n\t"
+        "ldr	r8, [r10, #4]\n\t"
+        "adds	r6, r6, r5\n\t"
+        "adcs	r8, r8, r4\n\t"
+        "adc	%[ca], %[ca], #0\n\t"
+        "str	r6, [r10]\n\t"
+        "str	r8, [r10, #4]\n\t"
+        /* Next word in a */
+        "sub	r10, r10, #40\n\t"
+        "cmp	r10, r11\n\t"
+        "blt	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "mov	%[m], r12\n\t"
+        : [ca] "+r" (ca), [a] "+r" (a)
+        : [m] "r" (m), [mp] "r" (mp)
+        : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+    );
+
+    sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mul_12(r, a, b);
+    sp_384_mont_reduce_12(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "mov	r5, #0\n\t"
+        "mov	r9, r3\n\t"
+        "mov	r12, %[r]\n\t"
+        "mov	r6, #96\n\t"
+        "neg	r6, r6\n\t"
+        "add	sp, sp, r6\n\t"
+        "mov	r11, sp\n\t"
+        "mov	r10, %[a]\n\t"
+        "\n1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r6, #44\n\t"
+        "mov	%[a], r9\n\t"
+        "subs	%[a], %[a], r6\n\t"
+        "sbc	r6, r6, r6\n\t"
+        "mvn	r6, r6\n\t"
+        "and	%[a], %[a], r6\n\t"
+        "mov	r2, r9\n\t"
+        "sub	r2, r2, %[a]\n\t"
+        "add	%[a], %[a], r10\n\t"
+        "add	r2, r2, r10\n\t"
+        "\n2:\n\t"
+        "cmp	r2, %[a]\n\t"
+        "beq	4f\n\t"
+        /* Multiply * 2: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "ldr	r8, [r2]\n\t"
+        "umull	r6, r8, r6, r8\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Multiply * 2: Done */
+        "bal	5f\n\t"
+        "\n4:\n\t"
+        /* Square: Start */
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r8, r6, r6\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs	r4, r4, r8\n\t"
+        "adc	r5, r5, %[r]\n\t"
+        /* Square: Done */
+        "\n5:\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "sub	r2, r2, #4\n\t"
+        "mov	r6, #48\n\t"
+        "add	r6, r6, r10\n\t"
+        "cmp	%[a], r6\n\t"
+        "beq	3f\n\t"
+        "cmp	%[a], r2\n\t"
+        "bgt	3f\n\t"
+        "mov	r8, r9\n\t"
+        "add	r8, r8, r10\n\t"
+        "cmp	%[a], r8\n\t"
+        "ble	2b\n\t"
+        "\n3:\n\t"
+        "mov	%[r], r11\n\t"
+        "mov	r8, r9\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "mov	r5, #0\n\t"
+        "add	r8, r8, #4\n\t"
+        "mov	r9, r8\n\t"
+        "mov	r6, #88\n\t"
+        "cmp	r8, r6\n\t"
+        "ble	1b\n\t"
+        "mov	%[a], r10\n\t"
+        "str	r3, [%[r], r8]\n\t"
+        "mov	%[r], r12\n\t"
+        "mov	%[a], r11\n\t"
+        "mov	r3, #92\n\t"
+        "\n4:\n\t"
+        "ldr	r6, [%[a], r3]\n\t"
+        "str	r6, [%[r], r3]\n\t"
+        "subs	r3, r3, #4\n\t"
+        "bge	4b\n\t"
+        "mov	r6, #96\n\t"
+        "add	sp, sp, r6\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+    );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_384_sqr_12(r, a);
+    sp_384_mont_reduce_12(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_384_mont_sqr_12(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_384_mont_sqr_12(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+    0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 12);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
+        if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 12);
+#else
+    sp_digit* t1 = td;
+    sp_digit* t2 = td + 2 * 12;
+    sp_digit* t3 = td + 4 * 12;
+    sp_digit* t4 = td + 6 * 12;
+    sp_digit* t5 = td + 8 * 12;
+
+    /* 0x2 */
+    sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
+    /* 0x3 */
+    sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
+    /* 0xc */
+    sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
+    /* 0xf */
+    sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
+    /* 0x1e */
+    sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
+    /* 0x1f */
+    sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
+    /* 0x3e0 */
+    sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
+    /* 0x3ff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x7fe0 */
+    sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
+    /* 0x7fff */
+    sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
+    /* 0x3fff8000 */
+    sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
+    /* 0x3fffffff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffc */
+    sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
+    /* 0xfffffffd */
+    sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
+    /* 0xffffffff */
+    sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
+    /* 0xfffffffc0000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffff000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
+    /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+    sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
+    /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+    sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+    sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
+    /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+    sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+
+
+    __asm__ __volatile__ (
+        "mov	r3, #0\n\t"
+        "mvn	r3, r3\n\t"
+        "mov	r6, #44\n\t"
+        "\n1:\n\t"
+        "ldr	r8, [%[a], r6]\n\t"
+        "ldr	r5, [%[b], r6]\n\t"
+        "and	r8, r8, r3\n\t"
+        "and	r5, r5, r3\n\t"
+        "mov	r4, r8\n\t"
+        "subs	r8, r8, r5\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "add	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "subs	r5, r5, r4\n\t"
+        "sbc	r8, r8, r8\n\t"
+        "sub	%[r], %[r], r8\n\t"
+        "mvn	r8, r8\n\t"
+        "and	r3, r3, r8\n\t"
+        "sub	r6, r6, #4\n\t"
+        "cmp	r6, #0\n\t"
+        "bge	1b\n\t"
+        : [r] "+r" (r)
+        : [a] "r" (a), [b] "r" (b)
+        : "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+#define sp_384_norm_12(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r  Resulting affine coordinate point.
+ * p  Montgomery form projective coordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    int32_t n;
+
+    sp_384_mont_inv_12(t1, p->z, t + 2*12);
+
+    sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    /* x /= z^2 */
+    sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
+    XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
+    sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_384_cmp_12(r->x, p384_mod);
+    sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_12(r->x);
+
+    /* y /= z^3 */
+    sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
+    XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
+    sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_384_cmp_12(r->y, p384_mod);
+    sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_384_norm_12(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "mov	r8, #0\n\t"
+        "add	r6, r6, #48\n\t"
+        "sub	r8, r8, #1\n\t"
+        "\n1:\n\t"
+        "adds	%[c], %[c], r8\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "adcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adds	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "ldm	%[a]!, {r4, r5}\n\t"
+        "ldm	%[b]!, {r6, r8}\n\t"
+        "adcs	r4, r4, r6\n\t"
+        "adcs	r5, r5, r8\n\t"
+        "stm	%[r]!, {r4, r5}\n\t"
+        "mov	%[c], #0\n\t"
+        "adc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, b);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_add_12(r, a, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+    o = sp_384_add_12(r, r, a);
+    sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r6, %[a]\n\t"
+        "add	r6, r6, #48\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r4, [%[a]]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "sbcs	r4, r4, r5\n\t"
+        "str	r4, [%[r]]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #4\n\t"
+        "add	%[b], %[b], #4\n\t"
+        "add	%[r], %[r], #4\n\t"
+        "cmp	%[a], r6\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldr	r4, [%[a], #0]\n\t"
+        "ldr	r5, [%[a], #4]\n\t"
+        "ldr	r6, [%[b], #0]\n\t"
+        "ldr	r8, [%[b], #4]\n\t"
+        "subs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #0]\n\t"
+        "str	r5, [%[r], #4]\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "ldr	r5, [%[a], #12]\n\t"
+        "ldr	r6, [%[b], #8]\n\t"
+        "ldr	r8, [%[b], #12]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "str	r5, [%[r], #12]\n\t"
+        "ldr	r4, [%[a], #16]\n\t"
+        "ldr	r5, [%[a], #20]\n\t"
+        "ldr	r6, [%[b], #16]\n\t"
+        "ldr	r8, [%[b], #20]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #16]\n\t"
+        "str	r5, [%[r], #20]\n\t"
+        "ldr	r4, [%[a], #24]\n\t"
+        "ldr	r5, [%[a], #28]\n\t"
+        "ldr	r6, [%[b], #24]\n\t"
+        "ldr	r8, [%[b], #28]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #24]\n\t"
+        "str	r5, [%[r], #28]\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "ldr	r5, [%[a], #36]\n\t"
+        "ldr	r6, [%[b], #32]\n\t"
+        "ldr	r8, [%[b], #36]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "str	r5, [%[r], #36]\n\t"
+        "ldr	r4, [%[a], #40]\n\t"
+        "ldr	r5, [%[a], #44]\n\t"
+        "ldr	r6, [%[b], #40]\n\t"
+        "ldr	r8, [%[b], #44]\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "sbcs	r5, r5, r8\n\t"
+        "str	r4, [%[r], #40]\n\t"
+        "str	r5, [%[r], #44]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        sp_digit m)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "mov	r5, #48\n\t"
+        "mov	r9, r5\n\t"
+        "mov	r8, #0\n\t"
+        "\n1:\n\t"
+        "ldr	r6, [%[b], r8]\n\t"
+        "and	r6, r6, %[m]\n\t"
+        "adds	r5, %[c], #-1\n\t"
+        "ldr	r5, [%[a], r8]\n\t"
+        "adcs	r5, r5, r6\n\t"
+        "mov	%[c], #0\n\t"
+        "adcs	%[c], %[c], %[c]\n\t"
+        "str	r5, [%[r], r8]\n\t"
+        "add	r8, r8, #4\n\t"
+        "cmp	r8, r9\n\t"
+        "blt	1b\n\t"
+        : [c] "+r" (c)
+        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+        : "memory", "r5", "r6", "r8", "r9"
+    );
+
+    return c;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_sub_12(r, a, b);
+    sp_384_cond_add_12(r, r, m, o);
+}
+
+static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "ldr	r2, [%[a]]\n\t"
+        "ldr	r3, [%[a], #4]\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #8]\n\t"
+        "str	r2, [%[r], #0]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r2, [%[a], #12]\n\t"
+        "str	r3, [%[r], #4]\n\t"
+        "lsl	r5, r2, #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r3, [%[a], #16]\n\t"
+        "str	r4, [%[r], #8]\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #20]\n\t"
+        "str	r2, [%[r], #12]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r2, [%[a], #24]\n\t"
+        "str	r3, [%[r], #16]\n\t"
+        "lsl	r5, r2, #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r3, [%[a], #28]\n\t"
+        "str	r4, [%[r], #20]\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #32]\n\t"
+        "str	r2, [%[r], #24]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "ldr	r2, [%[a], #36]\n\t"
+        "str	r3, [%[r], #28]\n\t"
+        "lsl	r5, r2, #31\n\t"
+        "lsr	r2, r2, #1\n\t"
+        "orr	r4, r4, r5\n\t"
+        "ldr	r3, [%[a], #40]\n\t"
+        "str	r4, [%[r], #32]\n\t"
+        "lsl	r5, r3, #31\n\t"
+        "lsr	r3, r3, #1\n\t"
+        "orr	r2, r2, r5\n\t"
+        "ldr	r4, [%[a], #44]\n\t"
+        "str	r2, [%[r], #36]\n\t"
+        "lsl	r5, r4, #31\n\t"
+        "lsr	r4, r4, #1\n\t"
+        "orr	r3, r3, r5\n\t"
+        "str	r3, [%[r], #40]\n\t"
+        "str	r4, [%[r], #44]\n\t"
+        :
+        : [r] "r" (r), [a] "r" (a)
+        : "memory", "r2", "r3", "r4", "r5"
+    );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_digit o;
+
+    o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
+    sp_384_rshift1_12(r, r);
+    r[11] |= o << 31;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = r->x;
+    y = r->y;
+    z = r->z;
+    /* Put infinity into result. */
+    if (r != p) {
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
+    /* Z = Y * Z */
+    sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
+    /* Z = 2Z */
+    sp_384_mont_dbl_12(z, z, p384_mod);
+    /* T2 = X - T1 */
+    sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
+    /* T1 = X + T1 */
+    sp_384_mont_add_12(t1, p->x, t1, p384_mod);
+    /* T2 = T1 * T2 */
+    sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
+    /* T1 = 3T2 */
+    sp_384_mont_tpl_12(t1, t2, p384_mod);
+    /* Y = 2Y */
+    sp_384_mont_dbl_12(y, p->y, p384_mod);
+    /* Y = Y * Y */
+    sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
+    /* T2 = Y * Y */
+    sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+    /* T2 = T2/2 */
+    sp_384_div2_12(t2, t2, p384_mod);
+    /* Y = Y * X */
+    sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
+    /* X = T1 * T1 */
+    sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_12(x, x, y, p384_mod);
+    /* X = X - Y */
+    sp_384_mont_sub_12(x, x, y, p384_mod);
+    /* Y = Y - X */
+    sp_384_mont_sub_12(y, y, x, p384_mod);
+    /* Y = Y * T1 */
+    sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
+    /* Y = Y - T2 */
+    sp_384_mont_sub_12(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+            (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+        sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* t3 = t + 4*12;
+    sp_digit* t4 = t + 6*12;
+    sp_digit* t5 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point_384* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_384_sub_12(t1, p384_mod, q->y);
+    sp_384_norm_12(t1);
+    if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+        (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_12(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<12; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<12; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<12; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - U1 */
+        sp_384_mont_sub_12(t2, t2, t1, p384_mod);
+        /* R = S2 - S1 */
+        sp_384_mont_sub_12(t4, t4, t3, p384_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(x, x, t5, p384_mod);
+        sp_384_mont_dbl_12(t1, y, p384_mod);
+        sp_384_mont_sub_12(x, x, t1, p384_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(y, y, t5, p384_mod);
+    }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td[16];
+    sp_point_384 rtd;
+    sp_digit tmpd[2 * 12 * 6];
+#endif
+    sp_point_384* t;
+    sp_point_384* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
+        (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
+        (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
+        t[1].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 10;
+        n = k[i+1] << 0;
+        c = 28;
+        y = n >> 28;
+        XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+        n <<= 4;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--];
+                c += 32;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+            sp_384_proj_point_dbl_12(rt, rt, tmp);
+
+            sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_384_point_free_12(rt, 1, heap);
+
+    return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+    sp_digit x[12];
+    sp_digit y[12];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
+{
+    sp_digit* w = t;
+    sp_digit* a = t + 2*12;
+    sp_digit* b = t + 4*12;
+    sp_digit* t1 = t + 6*12;
+    sp_digit* t2 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+
+    x = p->x;
+    y = p->y;
+    z = p->z;
+
+    /* Y = 2*Y */
+    sp_384_mont_dbl_12(y, y, p384_mod);
+    /* W = Z^4 */
+    sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
+    sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+    while (--n > 0)
+#else
+    while (--n >= 0)
+#endif
+    {
+        /* A = 3*(X^2 - W) */
+        sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(t1, t1, w, p384_mod);
+        sp_384_mont_tpl_12(a, t1, p384_mod);
+        /* B = X*Y^2 */
+        sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+        /* X = A^2 - 2B */
+        sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_12(t2, b, p384_mod);
+        sp_384_mont_sub_12(x, x, t2, p384_mod);
+        /* Z = Z*Y */
+        sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+        /* t2 = Y^4 */
+        sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+        if (n != 0)
+#endif
+        {
+            /* W = W*Y^4 */
+            sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_384_mont_sub_12(y, b, x, p384_mod);
+        sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+        sp_384_mont_dbl_12(y, y, p384_mod);
+        sp_384_mont_sub_12(y, y, t1, p384_mod);
+    }
+#ifndef WOLFSSL_SP_SMALL
+    /* A = 3*(X^2 - W) */
+    sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+    sp_384_mont_sub_12(t1, t1, w, p384_mod);
+    sp_384_mont_tpl_12(a, t1, p384_mod);
+    /* B = X*Y^2 */
+    sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+    /* X = A^2 - 2B */
+    sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_12(t2, b, p384_mod);
+    sp_384_mont_sub_12(x, x, t2, p384_mod);
+    /* Z = Z*Y */
+    sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+    /* t2 = Y^4 */
+    sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+    /* y = 2*A*(B - X) - Y^4 */
+    sp_384_mont_sub_12(y, b, x, p384_mod);
+    sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+    sp_384_mont_dbl_12(y, y, p384_mod);
+    sp_384_mont_sub_12(y, y, t1, p384_mod);
+#endif
+    /* Y = Y/2 */
+    sp_384_div2_12(y, y, p384_mod);
+}
+
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 12;
+    sp_digit* tmp = t + 4 * 12;
+
+    sp_384_mont_inv_12(t1, a->z, tmp);
+
+    sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+    sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
+    sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
+    XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
+        const sp_point_384* q, sp_digit* t)
+{
+    const sp_point_384* ap[2];
+    sp_point_384* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*12;
+    sp_digit* t3 = t + 4*12;
+    sp_digit* t4 = t + 6*12;
+    sp_digit* t5 = t + 8*12;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_384_sub_12(t1, p384_mod, q->y);
+    sp_384_norm_12(t1);
+    if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+        (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+        sp_384_proj_point_dbl_12(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point_384));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<12; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<12; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<12; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+        /* H = U2 - X1 */
+        sp_384_mont_sub_12(t2, t2, x, p384_mod);
+        /* R = S2 - Y1 */
+        sp_384_mont_sub_12(t4, t4, y, p384_mod);
+        /* Z3 = H*Z1 */
+        sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(x, t1, t5, p384_mod);
+        sp_384_mont_dbl_12(t1, t3, p384_mod);
+        sp_384_mont_sub_12(x, x, t1, p384_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_384_mont_sub_12(t3, t3, x, p384_mod);
+        sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
+        sp_384_mont_sub_12(y, t3, t5, p384_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_12(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<4; i++) {
+            sp_384_proj_point_dbl_n_12(t, 96, tmp);
+            sp_384_proj_to_affine_12(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<4; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+                sp_384_proj_to_affine_12(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_12(s2, 0, heap);
+    sp_384_point_free_12(s1, 0, heap);
+    sp_384_point_free_12( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 12 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=95; j<4; j++,x+=96) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=94; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<4; j++,x+=96) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_12(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[12];
+    sp_digit y[12];
+    sp_table_entry_384 table[16];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 12 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+        sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 td, s1d, s2d;
+#endif
+    sp_point_384* t;
+    sp_point_384* s1 = NULL;
+    sp_point_384* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_384_proj_to_affine_12(t, tmp);
+
+        XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_384_proj_point_dbl_n_12(t, 48, tmp);
+            sp_384_proj_to_affine_12(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+                sp_384_proj_to_affine_12(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_384_point_free_12(s2, 0, heap);
+    sp_384_point_free_12(s1, 0, heap);
+    sp_384_point_free_12( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+        const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 rtd;
+    sp_point_384 pd;
+    sp_digit td[2 * 12 * 6];
+#endif
+    sp_point_384* rt;
+    sp_point_384* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+
+    err = sp_384_point_new_12(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+        XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+        y = 0;
+        for (j=0,x=47; j<8; j++,x+=48) {
+            y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=46; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=48) {
+                y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+            }
+
+            sp_384_proj_point_dbl_12(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_384_map_12(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point_384));
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+    sp_digit x[12];
+    sp_digit y[12];
+    sp_table_entry_384 table[256];
+    uint32_t cnt;
+    int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex_384 = 0;
+    static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_384_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache_384[i].set = 0;
+        }
+        sp_cache_384_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache_384[i].set)
+            continue;
+
+        if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+                           sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+            sp_cache_384[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_384_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache_384[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_384_last) {
+            least = sp_cache_384[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache_384[j].cnt < least) {
+                    i = j;
+                    least = sp_cache_384[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+        XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+        sp_cache_384[i].set = 1;
+        sp_cache_384[i].cnt = 1;
+    }
+
+    *cache = &sp_cache_384[i];
+    sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 12 * 7];
+    sp_cache_384_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex_384 == 0) {
+         wc_InitMutex(&sp_cache_384_lock);
+         initCacheMutex_384 = 1;
+    }
+    if (wc_LockMutex(&sp_cache_384_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache_384(g, &cache);
+        if (cache->cnt == 2)
+            sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+        }
+        else {
+            err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * p     Point to multiply.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+        void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, km);
+        sp_384_point_from_ecc_point_12(point, gm);
+
+            err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_384 p384_table[16] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+        0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+      { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+        0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+    /* 2 */
+    { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+        0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+      { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+        0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+    /* 3 */
+    { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+        0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+      { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+        0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+    /* 4 */
+    { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+        0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+      { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+        0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+    /* 5 */
+    { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+        0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+      { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+        0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+    /* 6 */
+    { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+        0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+      { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+        0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+    /* 7 */
+    { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+        0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+      { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+        0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+    /* 8 */
+    { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+        0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+      { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+        0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+    /* 9 */
+    { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+        0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+      { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+        0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+    /* 10 */
+    { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+        0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+      { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+        0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+    /* 11 */
+    { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+        0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+      { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+        0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+    /* 12 */
+    { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+        0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+      { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+        0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+    /* 13 */
+    { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+        0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+      { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+        0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+    /* 14 */
+    { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+        0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+      { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+        0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+    /* 15 */
+    { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+        0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+      { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+        0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+        0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+      { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+        0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+    /* 2 */
+    { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
+        0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
+      { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
+        0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
+    /* 3 */
+    { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
+        0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
+      { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
+        0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
+    /* 4 */
+    { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+        0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+      { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+        0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+    /* 5 */
+    { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+        0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+      { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+        0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+    /* 6 */
+    { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
+        0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
+      { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
+        0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
+    /* 7 */
+    { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
+        0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
+      { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
+        0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
+    /* 8 */
+    { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
+        0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
+      { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
+        0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
+    /* 9 */
+    { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
+        0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
+      { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
+        0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
+    /* 10 */
+    { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
+        0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
+      { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
+        0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
+    /* 11 */
+    { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
+        0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
+      { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
+        0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
+    /* 12 */
+    { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
+        0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
+      { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
+        0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
+    /* 13 */
+    { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
+        0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
+      { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
+        0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
+    /* 14 */
+    { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
+        0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
+      { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
+        0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
+    /* 15 */
+    { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
+        0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
+      { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
+        0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
+    /* 16 */
+    { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+        0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+      { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+        0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+    /* 17 */
+    { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+        0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+      { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+        0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+    /* 18 */
+    { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
+        0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
+      { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
+        0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
+    /* 19 */
+    { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
+        0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
+      { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
+        0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
+    /* 20 */
+    { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+        0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+      { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+        0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+    /* 21 */
+    { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+        0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+      { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+        0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+    /* 22 */
+    { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
+        0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
+      { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
+        0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
+    /* 23 */
+    { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
+        0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
+      { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
+        0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
+    /* 24 */
+    { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
+        0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
+      { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
+        0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
+    /* 25 */
+    { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
+        0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
+      { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
+        0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
+    /* 26 */
+    { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
+        0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
+      { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
+        0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
+    /* 27 */
+    { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
+        0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
+      { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
+        0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
+    /* 28 */
+    { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
+        0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
+      { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
+        0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
+    /* 29 */
+    { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
+        0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
+      { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
+        0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
+    /* 30 */
+    { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
+        0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
+      { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
+        0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
+    /* 31 */
+    { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
+        0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
+      { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
+        0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
+    /* 32 */
+    { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
+        0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
+      { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
+        0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
+    /* 33 */
+    { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
+        0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
+      { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
+        0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
+    /* 34 */
+    { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
+        0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
+      { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
+        0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
+    /* 35 */
+    { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
+        0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
+      { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
+        0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
+    /* 36 */
+    { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
+        0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
+      { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
+        0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
+    /* 37 */
+    { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
+        0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
+      { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
+        0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
+    /* 38 */
+    { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
+        0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
+      { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
+        0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
+    /* 39 */
+    { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
+        0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
+      { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
+        0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
+    /* 40 */
+    { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
+        0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
+      { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
+        0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
+    /* 41 */
+    { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
+        0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
+      { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
+        0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
+    /* 42 */
+    { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
+        0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
+      { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
+        0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
+    /* 43 */
+    { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
+        0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
+      { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
+        0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
+    /* 44 */
+    { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
+        0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
+      { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
+        0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
+    /* 45 */
+    { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
+        0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
+      { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
+        0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
+    /* 46 */
+    { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
+        0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
+      { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
+        0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
+    /* 47 */
+    { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
+        0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
+      { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
+        0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
+    /* 48 */
+    { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
+        0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
+      { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
+        0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
+    /* 49 */
+    { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
+        0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
+      { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
+        0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
+    /* 50 */
+    { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
+        0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
+      { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
+        0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
+    /* 51 */
+    { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
+        0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
+      { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
+        0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
+    /* 52 */
+    { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
+        0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
+      { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
+        0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
+    /* 53 */
+    { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
+        0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
+      { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
+        0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
+    /* 54 */
+    { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
+        0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
+      { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
+        0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
+    /* 55 */
+    { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
+        0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
+      { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
+        0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
+    /* 56 */
+    { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
+        0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
+      { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
+        0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
+    /* 57 */
+    { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
+        0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
+      { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
+        0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
+    /* 58 */
+    { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
+        0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
+      { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
+        0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
+    /* 59 */
+    { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
+        0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
+      { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
+        0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
+    /* 60 */
+    { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
+        0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
+      { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
+        0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
+    /* 61 */
+    { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
+        0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
+      { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
+        0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
+    /* 62 */
+    { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
+        0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
+      { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
+        0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
+    /* 63 */
+    { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
+        0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
+      { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
+        0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
+    /* 64 */
+    { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+        0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+      { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+        0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+    /* 65 */
+    { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+        0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+      { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+        0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+    /* 66 */
+    { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
+        0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
+      { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
+        0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
+    /* 67 */
+    { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
+        0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
+      { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
+        0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
+    /* 68 */
+    { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+        0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+      { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+        0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+    /* 69 */
+    { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+        0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+      { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+        0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+    /* 70 */
+    { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
+        0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
+      { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
+        0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
+    /* 71 */
+    { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
+        0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
+      { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
+        0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
+    /* 72 */
+    { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
+        0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
+      { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
+        0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
+    /* 73 */
+    { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
+        0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
+      { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
+        0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
+    /* 74 */
+    { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
+        0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
+      { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
+        0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
+    /* 75 */
+    { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
+        0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
+      { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
+        0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
+    /* 76 */
+    { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
+        0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
+      { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
+        0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
+    /* 77 */
+    { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
+        0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
+      { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
+        0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
+    /* 78 */
+    { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
+        0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
+      { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
+        0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
+    /* 79 */
+    { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
+        0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
+      { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
+        0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
+    /* 80 */
+    { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+        0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+      { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+        0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+    /* 81 */
+    { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+        0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+      { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+        0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+    /* 82 */
+    { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
+        0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
+      { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
+        0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
+    /* 83 */
+    { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
+        0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
+      { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
+        0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
+    /* 84 */
+    { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+        0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+      { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+        0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+    /* 85 */
+    { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+        0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+      { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+        0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+    /* 86 */
+    { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
+        0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
+      { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
+        0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
+    /* 87 */
+    { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
+        0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
+      { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
+        0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
+    /* 88 */
+    { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
+        0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
+      { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
+        0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
+    /* 89 */
+    { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
+        0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
+      { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
+        0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
+    /* 90 */
+    { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
+        0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
+      { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
+        0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
+    /* 91 */
+    { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
+        0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
+      { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
+        0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
+    /* 92 */
+    { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
+        0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
+      { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
+        0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
+    /* 93 */
+    { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
+        0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
+      { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
+        0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
+    /* 94 */
+    { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
+        0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
+      { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
+        0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
+    /* 95 */
+    { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
+        0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
+      { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
+        0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
+    /* 96 */
+    { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
+        0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
+      { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
+        0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
+    /* 97 */
+    { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
+        0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
+      { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
+        0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
+    /* 98 */
+    { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
+        0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
+      { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
+        0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
+    /* 99 */
+    { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
+        0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
+      { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
+        0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
+    /* 100 */
+    { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
+        0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
+      { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
+        0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
+    /* 101 */
+    { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
+        0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
+      { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
+        0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
+    /* 102 */
+    { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
+        0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
+      { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
+        0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
+    /* 103 */
+    { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
+        0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
+      { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
+        0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
+    /* 104 */
+    { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
+        0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
+      { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
+        0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
+    /* 105 */
+    { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
+        0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
+      { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
+        0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
+    /* 106 */
+    { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
+        0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
+      { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
+        0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
+    /* 107 */
+    { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
+        0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
+      { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
+        0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
+    /* 108 */
+    { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
+        0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
+      { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
+        0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
+    /* 109 */
+    { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
+        0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
+      { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
+        0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
+    /* 110 */
+    { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
+        0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
+      { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
+        0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
+    /* 111 */
+    { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
+        0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
+      { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
+        0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
+    /* 112 */
+    { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
+        0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
+      { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
+        0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
+    /* 113 */
+    { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
+        0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
+      { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
+        0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
+    /* 114 */
+    { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
+        0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
+      { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
+        0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
+    /* 115 */
+    { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
+        0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
+      { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
+        0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
+    /* 116 */
+    { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
+        0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
+      { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
+        0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
+    /* 117 */
+    { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
+        0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
+      { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
+        0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
+    /* 118 */
+    { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
+        0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
+      { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
+        0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
+    /* 119 */
+    { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
+        0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
+      { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
+        0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
+    /* 120 */
+    { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
+        0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
+      { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
+        0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
+    /* 121 */
+    { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
+        0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
+      { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
+        0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
+    /* 122 */
+    { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
+        0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
+      { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
+        0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
+    /* 123 */
+    { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
+        0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
+      { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
+        0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
+    /* 124 */
+    { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
+        0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
+      { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
+        0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
+    /* 125 */
+    { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
+        0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
+      { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
+        0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
+    /* 126 */
+    { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
+        0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
+      { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
+        0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
+    /* 127 */
+    { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
+        0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
+      { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
+        0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
+    /* 128 */
+    { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
+        0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
+      { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
+        0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
+    /* 129 */
+    { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
+        0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
+      { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
+        0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
+    /* 130 */
+    { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
+        0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
+      { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
+        0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
+    /* 131 */
+    { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
+        0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
+      { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
+        0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
+    /* 132 */
+    { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
+        0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
+      { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
+        0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
+    /* 133 */
+    { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
+        0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
+      { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
+        0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
+    /* 134 */
+    { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
+        0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
+      { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
+        0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
+    /* 135 */
+    { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
+        0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
+      { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
+        0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
+    /* 136 */
+    { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
+        0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
+      { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
+        0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
+    /* 137 */
+    { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
+        0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
+      { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
+        0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
+    /* 138 */
+    { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
+        0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
+      { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
+        0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
+    /* 139 */
+    { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
+        0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
+      { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
+        0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
+    /* 140 */
+    { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
+        0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
+      { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
+        0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
+    /* 141 */
+    { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
+        0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
+      { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
+        0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
+    /* 142 */
+    { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
+        0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
+      { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
+        0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
+    /* 143 */
+    { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
+        0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
+      { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
+        0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
+    /* 144 */
+    { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
+        0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
+      { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
+        0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
+    /* 145 */
+    { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
+        0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
+      { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
+        0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
+    /* 146 */
+    { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
+        0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
+      { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
+        0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
+    /* 147 */
+    { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
+        0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
+      { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
+        0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
+    /* 148 */
+    { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
+        0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
+      { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
+        0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
+    /* 149 */
+    { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
+        0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
+      { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
+        0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
+    /* 150 */
+    { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
+        0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
+      { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
+        0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
+    /* 151 */
+    { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
+        0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
+      { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
+        0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
+    /* 152 */
+    { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
+        0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
+      { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
+        0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
+    /* 153 */
+    { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
+        0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
+      { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
+        0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
+    /* 154 */
+    { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
+        0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
+      { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
+        0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
+    /* 155 */
+    { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
+        0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
+      { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
+        0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
+    /* 156 */
+    { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
+        0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
+      { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
+        0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
+    /* 157 */
+    { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
+        0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
+      { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
+        0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
+    /* 158 */
+    { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
+        0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
+      { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
+        0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
+    /* 159 */
+    { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
+        0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
+      { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
+        0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
+    /* 160 */
+    { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
+        0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
+      { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
+        0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
+    /* 161 */
+    { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
+        0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
+      { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
+        0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
+    /* 162 */
+    { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
+        0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
+      { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
+        0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
+    /* 163 */
+    { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
+        0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
+      { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
+        0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
+    /* 164 */
+    { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
+        0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
+      { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
+        0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
+    /* 165 */
+    { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
+        0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
+      { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
+        0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
+    /* 166 */
+    { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
+        0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
+      { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
+        0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
+    /* 167 */
+    { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
+        0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
+      { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
+        0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
+    /* 168 */
+    { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
+        0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
+      { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
+        0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
+    /* 169 */
+    { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
+        0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
+      { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
+        0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
+    /* 170 */
+    { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
+        0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
+      { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
+        0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
+    /* 171 */
+    { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
+        0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
+      { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
+        0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
+    /* 172 */
+    { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
+        0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
+      { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
+        0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
+    /* 173 */
+    { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
+        0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
+      { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
+        0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
+    /* 174 */
+    { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
+        0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
+      { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
+        0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
+    /* 175 */
+    { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
+        0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
+      { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
+        0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
+    /* 176 */
+    { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
+        0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
+      { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
+        0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
+    /* 177 */
+    { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
+        0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
+      { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
+        0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
+    /* 178 */
+    { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
+        0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
+      { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
+        0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
+    /* 179 */
+    { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
+        0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
+      { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
+        0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
+    /* 180 */
+    { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
+        0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
+      { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
+        0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
+    /* 181 */
+    { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
+        0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
+      { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
+        0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
+    /* 182 */
+    { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
+        0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
+      { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
+        0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
+    /* 183 */
+    { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
+        0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
+      { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
+        0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
+    /* 184 */
+    { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
+        0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
+      { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
+        0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
+    /* 185 */
+    { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
+        0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
+      { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
+        0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
+    /* 186 */
+    { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
+        0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
+      { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
+        0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
+    /* 187 */
+    { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
+        0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
+      { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
+        0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
+    /* 188 */
+    { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
+        0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
+      { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
+        0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
+    /* 189 */
+    { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
+        0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
+      { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
+        0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
+    /* 190 */
+    { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
+        0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
+      { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
+        0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
+    /* 191 */
+    { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
+        0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
+      { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
+        0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
+    /* 192 */
+    { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
+        0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
+      { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
+        0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
+    /* 193 */
+    { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
+        0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
+      { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
+        0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
+    /* 194 */
+    { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
+        0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
+      { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
+        0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
+    /* 195 */
+    { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
+        0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
+      { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
+        0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
+    /* 196 */
+    { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
+        0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
+      { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
+        0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
+    /* 197 */
+    { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
+        0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
+      { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
+        0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
+    /* 198 */
+    { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
+        0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
+      { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
+        0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
+    /* 199 */
+    { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
+        0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
+      { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
+        0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
+    /* 200 */
+    { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
+        0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
+      { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
+        0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
+    /* 201 */
+    { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
+        0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
+      { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
+        0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
+    /* 202 */
+    { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
+        0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
+      { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
+        0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
+    /* 203 */
+    { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
+        0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
+      { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
+        0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
+    /* 204 */
+    { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
+        0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
+      { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
+        0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
+    /* 205 */
+    { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
+        0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
+      { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
+        0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
+    /* 206 */
+    { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
+        0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
+      { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
+        0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
+    /* 207 */
+    { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
+        0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
+      { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
+        0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
+    /* 208 */
+    { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
+        0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
+      { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
+        0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
+    /* 209 */
+    { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
+        0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
+      { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
+        0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
+    /* 210 */
+    { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
+        0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
+      { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
+        0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
+    /* 211 */
+    { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
+        0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
+      { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
+        0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
+    /* 212 */
+    { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
+        0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
+      { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
+        0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
+    /* 213 */
+    { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
+        0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
+      { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
+        0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
+    /* 214 */
+    { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
+        0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
+      { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
+        0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
+    /* 215 */
+    { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
+        0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
+      { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
+        0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
+    /* 216 */
+    { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
+        0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
+      { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
+        0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
+    /* 217 */
+    { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
+        0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
+      { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
+        0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
+    /* 218 */
+    { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
+        0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
+      { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
+        0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
+    /* 219 */
+    { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
+        0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
+      { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
+        0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
+    /* 220 */
+    { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
+        0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
+      { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
+        0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
+    /* 221 */
+    { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
+        0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
+      { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
+        0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
+    /* 222 */
+    { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
+        0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
+      { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
+        0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
+    /* 223 */
+    { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
+        0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
+      { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
+        0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
+    /* 224 */
+    { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
+        0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
+      { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
+        0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
+    /* 225 */
+    { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
+        0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
+      { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
+        0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
+    /* 226 */
+    { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
+        0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
+      { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
+        0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
+    /* 227 */
+    { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
+        0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
+      { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
+        0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
+    /* 228 */
+    { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
+        0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
+      { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
+        0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
+    /* 229 */
+    { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
+        0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
+      { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
+        0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
+    /* 230 */
+    { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
+        0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
+      { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
+        0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
+    /* 231 */
+    { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
+        0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
+      { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
+        0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
+    /* 232 */
+    { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
+        0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
+      { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
+        0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
+    /* 233 */
+    { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
+        0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
+      { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
+        0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
+    /* 234 */
+    { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
+        0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
+      { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
+        0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
+    /* 235 */
+    { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
+        0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
+      { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
+        0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
+    /* 236 */
+    { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
+        0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
+      { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
+        0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
+    /* 237 */
+    { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
+        0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
+      { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
+        0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
+    /* 238 */
+    { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
+        0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
+      { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
+        0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
+    /* 239 */
+    { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
+        0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
+      { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
+        0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
+    /* 240 */
+    { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
+        0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
+      { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
+        0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
+    /* 241 */
+    { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
+        0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
+      { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
+        0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
+    /* 242 */
+    { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
+        0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
+      { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
+        0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
+    /* 243 */
+    { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
+        0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
+      { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
+        0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
+    /* 244 */
+    { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
+        0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
+      { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
+        0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
+    /* 245 */
+    { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
+        0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
+      { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
+        0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
+    /* 246 */
+    { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
+        0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
+      { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
+        0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
+    /* 247 */
+    { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
+        0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
+      { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
+        0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
+    /* 248 */
+    { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
+        0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
+      { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
+        0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
+    /* 249 */
+    { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
+        0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
+      { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
+        0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
+    /* 250 */
+    { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
+        0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
+      { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
+        0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
+    /* 251 */
+    { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
+        0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
+      { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
+        0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
+    /* 252 */
+    { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
+        0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
+      { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
+        0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
+    /* 253 */
+    { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
+        0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
+      { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
+        0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
+    /* 254 */
+    { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
+        0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
+      { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
+        0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
+    /* 255 */
+    { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
+        0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
+      { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
+        0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+                                      k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km    Scalar to multiply by.
+ * r     Resulting point.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, km);
+
+            err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, r);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                                        defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a  Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_12(const sp_digit* a)
+{
+    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+            a[8] | a[9] | a[10] | a[11]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
+{
+    __asm__ __volatile__ (
+        "mov	r2, #1\n\t"
+        "ldr	r1, [%[a], #0]\n\t"
+        "adds	r1, r1, r2\n\t"
+        "mov	r2, #0\n\t"
+        "str	r1, [%[a], #0]\n\t"
+        "ldr	r1, [%[a], #4]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #4]\n\t"
+        "ldr	r1, [%[a], #8]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #8]\n\t"
+        "ldr	r1, [%[a], #12]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #12]\n\t"
+        "ldr	r1, [%[a], #16]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #16]\n\t"
+        "ldr	r1, [%[a], #20]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #20]\n\t"
+        "ldr	r1, [%[a], #24]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #24]\n\t"
+        "ldr	r1, [%[a], #28]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #28]\n\t"
+        "ldr	r1, [%[a], #32]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #32]\n\t"
+        "ldr	r1, [%[a], #36]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #36]\n\t"
+        "ldr	r1, [%[a], #40]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #40]\n\t"
+        "ldr	r1, [%[a], #44]\n\t"
+        "adcs	r1, r1, r2\n\t"
+        "str	r1, [%[a], #44]\n\t"
+        :
+        : [a] "r" (a)
+        : "memory", "r1", "r2"
+    );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((sp_digit)a[i]) << s);
+        if (s >= 24U) {
+            r[j] &= 0xffffffff;
+            s = 32U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (sp_digit)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng  Random number generator.
+ * k    Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
+{
+    int err;
+    byte buf[48];
+
+    do {
+        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+        if (err == 0) {
+            sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
+            if (sp_384_cmp_12(k, p384_order2) < 0) {
+                sp_384_add_one_12(k);
+                break;
+            }
+        }
+    }
+    while (err == 0);
+
+    return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng   Random number generator.
+ * priv  Generated private value.
+ * pub   Generated public point.
+ * heap  Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384 inf;
+#endif
+#endif
+    sp_point_384* point;
+    sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_point_384* infinity;
+#endif
+    int err;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, inf, infinity);
+    }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_ecc_gen_k_12(rng, k);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+    }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
+    }
+    if (err == MP_OKAY) {
+        if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(k, priv);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_point_to_ecc_point_12(point, pub);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+    sp_384_point_free_12(infinity, 1, heap);
+#endif
+    sp_384_point_free_12(point, 1, heap);
+
+    return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r  A single precision integer.
+ * a  Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+    int i, j, s = 0, b;
+
+    j = 384 / 8 - 1;
+    a[j] = 0;
+    for (i=0; i<12 && j>=0; i++) {
+        b = 0;
+        /* lint allow cast of mismatch sp_digit and int */
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
+        if (j < 0) {
+            break;
+        }
+        while (b < 32) {
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
+            if (j < 0) {
+                break;
+            }
+        }
+        s = 8 - (b - 32);
+        if (j >= 0) {
+            a[j] = 0;
+        }
+        if (s != 0) {
+            j++;
+        }
+    }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv    Scalar to multiply the point by.
+ * pub     Point to multiply.
+ * out     Buffer to hold X ordinate.
+ * outLen  On entry, size of the buffer in bytes.
+ *         On exit, length of data in buffer in bytes.
+ * heap    Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 p;
+    sp_digit kd[12];
+#endif
+    sp_point_384* point = NULL;
+    sp_digit* k = NULL;
+    int err = MP_OKAY;
+
+    if (*outLen < 48U) {
+        err = BUFFER_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, p, point);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (k == NULL)
+            err = MEMORY_E;
+    }
+#else
+    k = kd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(k, 12, priv);
+        sp_384_point_from_ecc_point_12(point, pub);
+            err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        sp_384_to_bin(point->x, out);
+        *outLen = 48;
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (k != NULL) {
+        XFREE(k, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(point, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+    __asm__ __volatile__ (
+        "mov	r8, %[a]\n\t"
+        "add	r8, r8, #48\n\t"
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        "subs	r5, r5, %[c]\n\t"
+        "ldr	r3, [%[a]]\n\t"
+        "ldr	r4, [%[a], #4]\n\t"
+        "ldr	r5, [%[b]]\n\t"
+        "ldr	r6, [%[b], #4]\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "str	r3, [%[a]]\n\t"
+        "str	r4, [%[a], #4]\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        "add	%[a], %[a], #8\n\t"
+        "add	%[b], %[b], #8\n\t"
+        "cmp	%[a], r8\n\t"
+        "bne	1b\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r8"
+    );
+
+    return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+        const sp_digit* b)
+{
+    sp_digit c = 0;
+
+    __asm__ __volatile__ (
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "subs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "ldm	%[a], {r3, r4}\n\t"
+        "ldm	%[b]!, {r5, r6}\n\t"
+        "sbcs	r3, r3, r5\n\t"
+        "sbcs	r4, r4, r6\n\t"
+        "stm	%[a]!, {r3, r4}\n\t"
+        "sbc	%[c], %[c], %[c]\n\t"
+        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+        :
+        : "memory", "r3", "r4", "r5", "r6"
+    );
+
+    return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
+        sp_digit b)
+{
+    __asm__ __volatile__ (
+        "add	r9, %[a], #48\n\t"
+        /* A[0] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r5, r3, r6, %[b]\n\t"
+        "mov	r4, #0\n\t"
+        "str	r5, [%[r]], #4\n\t"
+        /* A[0] * B - Done */
+        "\n1:\n\t"
+        "mov	r5, #0\n\t"
+        /* A[] * B */
+        "ldr	r6, [%[a]], #4\n\t"
+        "umull	r6, r8, r6, %[b]\n\t"
+        "adds	r3, r3, r6\n\t"
+        "adcs 	r4, r4, r8\n\t"
+        "adc	r5, r5, #0\n\t"
+        /* A[] * B - Done */
+        "str	r3, [%[r]], #4\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+    );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1   The high order half of the number to divide.
+ * d0   The low order half of the number to divide.
+ * div  The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
+        sp_digit div)
+{
+    sp_digit r = 0;
+
+    __asm__ __volatile__ (
+        "lsr	r6, %[div], #16\n\t"
+        "add	r6, r6, #1\n\t"
+        "udiv	r4, %[d1], r6\n\t"
+        "lsl	r8, r4, #16\n\t"
+        "umull	r4, r5, %[div], r8\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r5, %[d1], r6\n\t"
+        "lsl	r4, r5, #16\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "lsl	r4, %[d1], #16\n\t"
+        "orr	r4, r4, %[d0], lsr #16\n\t"
+        "udiv	r4, r4, r6\n\t"
+        "add	r8, r8, r4\n\t"
+        "umull	r4, r5, %[div], r4\n\t"
+        "subs	%[d0], %[d0], r4\n\t"
+        "sbc	%[d1], %[d1], r5\n\t"
+        "udiv	r4, %[d0], %[div]\n\t"
+        "add	r8, r8, r4\n\t"
+        "mov	%[r], r8\n\t"
+        : [r] "+r" (r)
+        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+        : "r4", "r5", "r6", "r8"
+    );
+    return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * m  Mask to AND against each digit.
+ */
+static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<12; i++) {
+        r[i] = a[i] & m;
+    }
+#else
+    r[0] = a[0] & m;
+    r[1] = a[1] & m;
+    r[2] = a[2] & m;
+    r[3] = a[3] & m;
+    r[4] = a[4] & m;
+    r[5] = a[5] & m;
+    r[6] = a[6] & m;
+    r[7] = a[7] & m;
+    r[8] = a[8] & m;
+    r[9] = a[9] & m;
+    r[10] = a[10] & m;
+    r[11] = a[11] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Nmber to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    sp_digit t1[24], t2[13];
+    sp_digit div, r1;
+    int i;
+
+    (void)m;
+
+    div = d[11];
+    XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
+    for (i=11; i>=0; i--) {
+        r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
+
+        sp_384_mul_d_12(t2, d, r1);
+        t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
+        t1[12 + i] -= t2[12];
+        sp_384_mask_12(t2, d, t1[12 + i]);
+        t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+        sp_384_mask_12(t2, d, t1[12 + i]);
+        t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+    }
+
+    r1 = sp_384_cmp_12(t1, d) >= 0;
+    sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
+
+    return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_384_div_12(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+    0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+    0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+    
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_384_mul_12(r, a, b);
+    sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
+{
+    sp_384_sqr_12(r, a);
+    sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_384_mont_sqr_order_12(r, a);
+    for (i=1; i<n; i++) {
+        sp_384_mont_sqr_order_12(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 12);
+    for (i=382; i>=0; i--) {
+        sp_384_mont_sqr_order_12(t, t);
+        if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_12(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 12U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 12;
+    sp_digit* t3 = td + 4 * 12;
+    int i;
+
+    /* t = a^2 */
+    sp_384_mont_sqr_order_12(t, a);
+    /* t = a^3 = t * a */
+    sp_384_mont_mul_order_12(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_384_mont_sqr_n_order_12(t2, t, 2);
+    /* t = a^f = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_384_mont_sqr_n_order_12(t2, t, 4);
+    /* t = a^ff = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_384_mont_sqr_n_order_12(t2, t, 8);
+    /* t3= a^ffff = t2 * t */
+    sp_384_mont_mul_order_12(t3, t2, t);
+    /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+    sp_384_mont_sqr_n_order_12(t2, t3, 16);
+    /* t = a^ffffffff = t2 * t3 */
+    sp_384_mont_mul_order_12(t, t2, t3);
+    /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
+    sp_384_mont_sqr_n_order_12(t2, t, 16);
+    /* t = a^ffffffffffff = t2 * t3 */
+    sp_384_mont_mul_order_12(t, t2, t3);
+    /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
+    sp_384_mont_sqr_n_order_12(t2, t, 48);
+    /* t= a^fffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_12(t, t2, t);
+    /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+    sp_384_mont_sqr_n_order_12(t2, t, 96);
+    /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+    sp_384_mont_mul_order_12(t2, t2, t);
+    for (i=191; i>=1; i--) {
+        sp_384_mont_sqr_order_12(t2, t2);
+        if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_384_mont_mul_order_12(t2, t2, a);
+        }
+    }
+    sp_384_mont_sqr_order_12(t2, t2);
+    sp_384_mont_mul_order_12(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN  64
+#endif
+
+/* Sign the hash using the private key.
+ *   e = [hash, 384 bits] from binary
+ *   r = (k.G)->x mod order
+ *   s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit ed[2*12];
+    sp_digit xd[2*12];
+    sp_digit kd[2*12];
+    sp_digit rd[2*12];
+    sp_digit td[3 * 2*12];
+    sp_point_384 p;
+#endif
+    sp_digit* e = NULL;
+    sp_digit* x = NULL;
+    sp_digit* k = NULL;
+    sp_digit* r = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* point = NULL;
+    sp_digit carry;
+    sp_digit* s = NULL;
+    sp_digit* kInv = NULL;
+    int err = MP_OKAY;
+    int32_t c;
+    int i;
+
+    (void)heap;
+
+    err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        e = d + 0 * 12;
+        x = d + 2 * 12;
+        k = d + 4 * 12;
+        r = d + 6 * 12;
+        tmp = d + 8 * 12;
+#else
+        e = ed;
+        x = xd;
+        k = kd;
+        r = rd;
+        tmp = td;
+#endif
+        s = e;
+        kInv = k;
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(e, 12, hash, (int)hashLen);
+    }
+
+    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_384_from_mp(x, 12, priv);
+
+        /* New random point. */
+        if (km == NULL || mp_iszero(km)) {
+            err = sp_384_ecc_gen_k_12(rng, k);
+        }
+        else {
+            sp_384_from_mp(k, 12, km);
+            mp_zero(km);
+        }
+        if (err == MP_OKAY) {
+                err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+        }
+
+        if (err == MP_OKAY) {
+            /* r = point->x mod order */
+            XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
+            sp_384_norm_12(r);
+            c = sp_384_cmp_12(r, p384_order);
+            sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_12(r);
+
+            /* Conv k to Montgomery form (mod order) */
+                sp_384_mul_12(k, k, p384_norm_order);
+            err = sp_384_mod_12(k, k, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_12(k);
+            /* kInv = 1/k mod order */
+                sp_384_mont_inv_order_12(kInv, k, tmp);
+            sp_384_norm_12(kInv);
+
+            /* s = r * x + e */
+                sp_384_mul_12(x, x, r);
+            err = sp_384_mod_12(x, x, p384_order);
+        }
+        if (err == MP_OKAY) {
+            sp_384_norm_12(x);
+            carry = sp_384_add_12(s, e, x);
+            sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
+            sp_384_norm_12(s);
+            c = sp_384_cmp_12(s, p384_order);
+            sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+            sp_384_norm_12(s);
+
+            /* s = s * k^-1 mod order */
+                sp_384_mont_mul_order_12(s, s, kInv);
+            sp_384_norm_12(s);
+
+            /* Check that signature is usable. */
+            if (sp_384_iszero_12(s) == 0) {
+                break;
+            }
+        }
+    }
+
+    if (i == 0) {
+        err = RNG_FAILURE_E;
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(r, rm);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(s, sm);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+    XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
+#endif
+    sp_384_point_free_12(point, 1, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 384)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit u1d[2*12];
+    sp_digit u2d[2*12];
+    sp_digit sd[2*12];
+    sp_digit tmpd[2*12 * 5];
+    sp_point_384 p1d;
+    sp_point_384 p2d;
+#endif
+    sp_digit* u1 = NULL;
+    sp_digit* u2 = NULL;
+    sp_digit* s = NULL;
+    sp_digit* tmp = NULL;
+    sp_point_384* p1;
+    sp_point_384* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+
+    err = sp_384_point_new_12(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, p2d, p2);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (d == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        u1  = d + 0 * 12;
+        u2  = d + 2 * 12;
+        s   = d + 4 * 12;
+        tmp = d + 6 * 12;
+#else
+        u1 = u1d;
+        u2 = u2d;
+        s  = sd;
+        tmp = tmpd;
+#endif
+
+        if (hashLen > 48U) {
+            hashLen = 48U;
+        }
+
+        sp_384_from_bin(u1, 12, hash, (int)hashLen);
+        sp_384_from_mp(u2, 12, r);
+        sp_384_from_mp(s, 12, sm);
+        sp_384_from_mp(p2->x, 12, pX);
+        sp_384_from_mp(p2->y, 12, pY);
+        sp_384_from_mp(p2->z, 12, pZ);
+
+        {
+            sp_384_mul_12(s, s, p384_norm_order);
+        }
+        err = sp_384_mod_12(s, s, p384_order);
+    }
+    if (err == MP_OKAY) {
+        sp_384_norm_12(s);
+        {
+            sp_384_mont_inv_order_12(s, s, tmp);
+            sp_384_mont_mul_order_12(u1, u1, s);
+            sp_384_mont_mul_order_12(u2, u2, s);
+        }
+
+            err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+        {
+            sp_384_proj_point_add_12(p1, p1, p2, tmp);
+            if (sp_384_iszero_12(p1->z)) {
+                if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
+                    sp_384_proj_point_dbl_12(p1, p2, tmp);
+                }
+                else {
+                    /* Y ordinate is not used from here - don't set. */
+                    p1->x[0] = 0;
+                    p1->x[1] = 0;
+                    p1->x[2] = 0;
+                    p1->x[3] = 0;
+                    p1->x[4] = 0;
+                    p1->x[5] = 0;
+                    p1->x[6] = 0;
+                    p1->x[7] = 0;
+                    p1->x[8] = 0;
+                    p1->x[9] = 0;
+                    p1->x[10] = 0;
+                    p1->x[11] = 0;
+                    XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+                }
+            }
+        }
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+        sp_384_from_mp(u2, 12, r);
+        err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
+        sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
+        *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+            sp_384_from_mp(u2, 12, r);
+            carry = sp_384_add_12(u2, u2, p384_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_384_norm_12(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_384_cmp_12(u2, p384_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
+                                                                  p384_mp_mod);
+                        *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_384_point_free_12(p1, 0, heap);
+    sp_384_point_free_12(p2, 0, heap);
+
+    return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point  EC point.
+ * heap   Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d = NULL;
+#else
+    sp_digit t1d[2*12];
+    sp_digit t2d[2*12];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 12;
+        t2 = d + 2 * 12;
+#else
+        (void)heap;
+
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        sp_384_sqr_12(t1, point->y);
+        (void)sp_384_mod_12(t1, t1, p384_mod);
+        sp_384_sqr_12(t2, point->x);
+        (void)sp_384_mod_12(t2, t2, p384_mod);
+        sp_384_mul_12(t2, t2, point->x);
+        (void)sp_384_mod_12(t2, t2, p384_mod);
+        (void)sp_384_sub_12(t2, p384_mod, t2);
+        sp_384_mont_add_12(t1, t1, t2, p384_mod);
+
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+        sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+
+        if (sp_384_cmp_12(t1, p384_b) != 0) {
+            err = MP_VAL;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX  X ordinate of EC point.
+ * pY  Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_point_384 pubd;
+#endif
+    sp_point_384* pub;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_12(NULL, pubd, pub);
+    if (err == MP_OKAY) {
+        sp_384_from_mp(pub->x, 12, pX);
+        sp_384_from_mp(pub->y, 12, pY);
+        sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+
+        err = sp_384_ecc_is_point_12(pub, NULL);
+    }
+
+    sp_384_point_free_12(pub, 0, NULL);
+
+    return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX     X ordinate of EC point.
+ * pY     Y ordinate of EC point.
+ * privm  Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit privd[12];
+    sp_point_384 pubd;
+    sp_point_384 pd;
+#endif
+    sp_digit* priv = NULL;
+    sp_point_384* pub;
+    sp_point_384* p = NULL;
+    byte one[1] = { 1 };
+    int err;
+
+    err = sp_384_point_new_12(heap, pubd, pub);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(heap, pd, p);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (priv == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+        priv = privd;
+#endif
+
+        sp_384_from_mp(pub->x, 12, pX);
+        sp_384_from_mp(pub->y, 12, pY);
+        sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+        sp_384_from_mp(priv, 12, privm);
+
+        /* Check point at infinitiy. */
+        if ((sp_384_iszero_12(pub->x) != 0) &&
+            (sp_384_iszero_12(pub->y) != 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check range of X and Y */
+        if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
+            sp_384_cmp_12(pub->y, p384_mod) >= 0) {
+            err = ECC_OUT_OF_RANGE_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Check point is on curve */
+        err = sp_384_ecc_is_point_12(pub, heap);
+    }
+
+    if (err == MP_OKAY) {
+        /* Point * order = infinity */
+            err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is infinity */
+        if ((sp_384_iszero_12(p->x) == 0) ||
+            (sp_384_iszero_12(p->y) == 0)) {
+            err = ECC_INF_E;
+        }
+    }
+
+    if (err == MP_OKAY) {
+        /* Base * private = point */
+            err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
+    }
+    if (err == MP_OKAY) {
+        /* Check result is public key */
+        if (sp_384_cmp_12(p->x, pub->x) != 0 ||
+            sp_384_cmp_12(p->y, pub->y) != 0) {
+            err = ECC_PRIV_KEY_E;
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (priv != NULL) {
+        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, heap);
+    sp_384_point_free_12(pub, 0, heap);
+
+    return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 5];
+    sp_point_384 pd;
+    sp_point_384 qd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    sp_point_384* q = NULL;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_384_point_new_12(NULL, qd, q);
+    }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+        sp_384_from_mp(q->x, 12, qX);
+        sp_384_from_mp(q->y, 12, qY);
+        sp_384_from_mp(q->z, 12, qZ);
+
+            sp_384_proj_point_add_12(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(q, 0, NULL);
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 2];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+
+            sp_384_proj_point_dbl_12(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, rZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit tmpd[2 * 12 * 6];
+    sp_point_384 pd;
+#endif
+    sp_digit* tmp;
+    sp_point_384* p;
+    int err;
+
+    err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_384_from_mp(p->x, 12, pX);
+        sp_384_from_mp(p->y, 12, pY);
+        sp_384_from_mp(p->z, 12, pZ);
+
+        sp_384_map_12(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_384_to_mp(p->z, pZ);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_384_point_free_12(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_12(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 12];
+    sp_digit t2d[2 * 12];
+    sp_digit t3d[2 * 12];
+    sp_digit t4d[2 * 12];
+    sp_digit t5d[2 * 12];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    sp_digit* t3;
+    sp_digit* t4;
+    sp_digit* t5;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        t1 = d + 0 * 12;
+        t2 = d + 2 * 12;
+        t3 = d + 4 * 12;
+        t4 = d + 6 * 12;
+        t5 = d + 8 * 12;
+#else
+        t1 = t1d;
+        t2 = t2d;
+        t3 = t3d;
+        t4 = t4d;
+        t5 = t5d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
+            /* t5 = y ^ 0xc */
+            sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x1e */
+            sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x1f */
+            sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3e0 */
+            sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3ff */
+            sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fe0 */
+            sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
+            /* t3 = y ^ 0x7fff */
+            sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fff800 */
+            sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
+            /* t4 = y ^ 0x3ffffff */
+            sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffc000000 */
+            sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffff */
+            sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+            sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+            sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+            sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+            sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+            sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+            sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
+            /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+            sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
+            /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+            sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
+        }
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 12];
+    sp_digit yd[2 * 12];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+        x = d + 0 * 12;
+        y = d + 2 * 12;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_384_from_mp(x, 12, xm);
+        err = sp_384_mod_mul_norm_12(x, x, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
+            sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        sp_384_mont_sub_12(y, y, x, p384_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_384_mont_add_12(y, y, x, p384_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_384_mont_sqrt_12(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
+        sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
+        }
+
+        err = sp_384_to_mp(y, ym);
+    }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_ARM_CORTEX_M_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/sp_dsp32.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,4909 @@
+/* sp_cdsp_signed.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* from wolfcrypt/src/sp_c32.c */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_ECC)
+#ifdef WOLFSSL_DSP
+
+#include <wolfssl/wolfcrypt/sp.h>
+#include "remote.h"
+#include "hexagon_protos.h"
+#include "hexagon_types.h"
+
+#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) &&              (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Mask for address to obfuscate which of the two address will be used. */
+static const size_t addr_mask[2] = { 0, (size_t)-1 };
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point {
+    sp_digit x[2 * 10] __attribute__((aligned(128)));
+    sp_digit y[2 * 10] __attribute__((aligned(128)));
+    sp_digit z[2 * 10] __attribute__((aligned(128)));
+    int infinity;
+} sp_point;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[10] __attribute__((aligned(128))) = {
+    0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000,
+    0x0000400,0x3ff0000,0x03fffff
+};
+#ifndef WOLFSSL_SP_SMALL
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[10] __attribute__((aligned(128))) = {
+    0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff,
+    0x3fffbff,0x000ffff,0x0000000
+};
+#endif /* WOLFSSL_SP_SMALL */
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod __attribute__((aligned(128))) = 0x000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+                                            defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[10] __attribute__((aligned(128))) = {
+    0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
+    0x00003ff,0x3ff0000,0x03fffff
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[10] __attribute__((aligned(128))) = {
+    0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000,
+    0x3fffc00,0x000ffff,0x0000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order __attribute__((aligned(128))) = 0x200bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point p256_base __attribute__((aligned(128))) = {
+    /* X ordinate */
+    {
+        0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,
+        0x1091fe2,0x1f2e12c,0x01ac5f4, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Y ordinate */
+    {
+        0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,
+        0x1fe6e3b,0x2e2fe1a,0x013f8d0, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* Z ordinate */
+    {
+        0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+        0x0000000,0x0000000,0x0000000, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+    },
+    /* infinity */
+    0
+};
+
+static int sp_ecc_point_new_ex(void* heap, sp_point* sp, sp_point** p)
+{
+    int ret = MP_OKAY;
+    (void)heap;
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    (void)sp;
+    *p = (sp_point*)XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC);
+#else
+    *p = sp;
+#endif
+    if (p == NULL) {
+        ret = MEMORY_E;
+    }
+    return ret;
+}
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+/* Allocate memory for point and return error. */
+#define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), &(sp), &(p))
+#endif
+
+
+static void sp_ecc_point_free(sp_point* p, int clear, void* heap)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+/* If valid pointer then clear point data if requested and free data. */
+    if (p != NULL) {
+        if (clear != 0) {
+            XMEMSET(p, 0, sizeof(*p));
+        }
+        XFREE(p, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+/* Clear point data if requested. */
+    if (clear != 0) {
+        XMEMSET(p, 0, sizeof(*p));
+    }
+#endif
+    (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r  The resulting Montgomery form number.
+ * a  The number to convert.
+ * m  The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    int64_t* td;
+#else
+    int64_t td[8];
+    int64_t a32d[8];
+#endif
+    int64_t* t;
+    int64_t* a32;
+    int64_t o;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        t = td;
+        a32 = td + 8;
+#else
+        t = td;
+        a32 = a32d;
+#endif
+
+        a32[0] = a[0];
+        a32[0] |= a[1] << 26U;
+        a32[0] &= 0xffffffffL;
+        a32[1] = (sp_digit)(a[1] >> 6);
+        a32[1] |= a[2] << 20U;
+        a32[1] &= 0xffffffffL;
+        a32[2] = (sp_digit)(a[2] >> 12);
+        a32[2] |= a[3] << 14U;
+        a32[2] &= 0xffffffffL;
+        a32[3] = (sp_digit)(a[3] >> 18);
+        a32[3] |= a[4] << 8U;
+        a32[3] &= 0xffffffffL;
+        a32[4] = (sp_digit)(a[4] >> 24);
+        a32[4] |= a[5] << 2U;
+        a32[4] |= a[6] << 28U;
+        a32[4] &= 0xffffffffL;
+        a32[5] = (sp_digit)(a[6] >> 4);
+        a32[5] |= a[7] << 22U;
+        a32[5] &= 0xffffffffL;
+        a32[6] = (sp_digit)(a[7] >> 10);
+        a32[6] |= a[8] << 16U;
+        a32[6] &= 0xffffffffL;
+        a32[7] = (sp_digit)(a[8] >> 16);
+        a32[7] |= a[9] << 10U;
+        a32[7] &= 0xffffffffL;
+
+        /*  1  1  0 -1 -1 -1 -1  0 */
+        t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+        /*  0  1  1  0 -1 -1 -1 -1 */
+        t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+        /*  0  0  1  1  0 -1 -1 -1 */
+        t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+        /* -1 -1  0  2  2  1  0 -1 */
+        t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+        /*  0 -1 -1  0  2  2  1  0 */
+        t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+        /*  0  0 -1 -1  0  2  2  1 */
+        t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+        /* -1 -1  0  0  0  1  3  2 */
+        t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+        /*  1  0 -1 -1 -1 -1  0  3 */
+        t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+        t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+        t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+        t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+        t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+        t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+        t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+        t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+        o     = t[7] >> 32U; t[7] &= 0xffffffffL;
+        t[0] += o;
+        t[3] -= o;
+        t[6] -= o;
+        t[7] += o;
+        t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+        t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+        t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+        t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+        t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+        t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+        t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+
+        r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
+        r[1] = (sp_digit)(t[0] >> 26U);
+        r[1] |= t[1] << 6U;
+        r[1] &= 0x3ffffffL;
+        r[2] = (sp_digit)(t[1] >> 20U);
+        r[2] |= t[2] << 12U;
+        r[2] &= 0x3ffffffL;
+        r[3] = (sp_digit)(t[2] >> 14U);
+        r[3] |= t[3] << 18U;
+        r[3] &= 0x3ffffffL;
+        r[4] = (sp_digit)(t[3] >> 8U);
+        r[4] |= t[4] << 24U;
+        r[4] &= 0x3ffffffL;
+        r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
+        r[6] = (sp_digit)(t[4] >> 28U);
+        r[6] |= t[5] << 4U;
+        r[6] &= 0x3ffffffL;
+        r[7] = (sp_digit)(t[5] >> 22U);
+        r[7] |= t[6] << 10U;
+        r[7] &= 0x3ffffffL;
+        r[8] = (sp_digit)(t[6] >> 16U);
+        r[8] |= t[7] << 16U;
+        r[8] &= 0x3ffffffL;
+        r[9] = (sp_digit)(t[7] >> 10U);
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+
+/* Compare a with b in constant time.
+ *
+ * a  A single precision integer.
+ * b  A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
+{
+    sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=9; i>=0; i--) {
+        r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    }
+#else
+    r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+    r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+    return r;
+}
+
+/* Normalize the values in each word to 26.
+ *
+ * a  Array of sp_digit to normalize.
+ */
+static void sp_256_norm_10(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    for (i = 0; i < 9; i++) {
+        a[i+1] += a[i] >> 26;
+        a[i] &= 0x3ffffff;
+    }
+#else
+    a[1] += a[0] >> 26; a[0] = Q6_R_and_RR(a[0], 0x3ffffff);
+    a[2] += a[1] >> 26; a[1] = Q6_R_and_RR(a[1], 0x3ffffff);
+    a[3] += a[2] >> 26; a[2] = Q6_R_and_RR(a[2], 0x3ffffff);
+    a[4] += a[3] >> 26; a[3] = Q6_R_and_RR(a[3], 0x3ffffff);
+    a[5] += a[4] >> 26; a[4] = Q6_R_and_RR(a[4], 0x3ffffff);
+    a[6] += a[5] >> 26; a[5] = Q6_R_and_RR(a[5], 0x3ffffff);
+    a[7] += a[6] >> 26; a[6] = Q6_R_and_RR(a[6], 0x3ffffff);
+    a[8] += a[7] >> 26; a[7] = Q6_R_and_RR(a[7], 0x3ffffff);
+    a[9] += a[8] >> 26; a[8] = Q6_R_and_RR(a[8], 0x3ffffff);
+#endif
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r  A single precision number representing condition subtract result.
+ * a  A single precision number to subtract from.
+ * b  A single precision number to subtract.
+ * m  Mask value to apply.
+ */
+static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 10; i++) {
+        r[i] = a[i] - (b[i] & m);
+    }
+#else
+    r[ 0] = Q6_R_sub_RR(a[ 0], Q6_R_and_RR(b[ 0], m));
+    r[ 1] = Q6_R_sub_RR(a[ 1], Q6_R_and_RR(b[ 1], m));
+    r[ 2] = Q6_R_sub_RR(a[ 2], Q6_R_and_RR(b[ 2], m));
+    r[ 3] = Q6_R_sub_RR(a[ 3], Q6_R_and_RR(b[ 3], m));
+    r[ 4] = Q6_R_sub_RR(a[ 4], Q6_R_and_RR(b[ 4], m));
+    r[ 5] = Q6_R_sub_RR(a[ 5], Q6_R_and_RR(b[ 5], m));
+    r[ 6] = Q6_R_sub_RR(a[ 6], Q6_R_and_RR(b[ 6], m));
+    r[ 7] = Q6_R_sub_RR(a[ 7], Q6_R_and_RR(b[ 7], m));
+    r[ 8] = Q6_R_sub_RR(a[ 8], Q6_R_and_RR(b[ 8], m));
+    r[ 9] = Q6_R_sub_RR(a[ 9], Q6_R_and_RR(b[ 9], m));
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#define sp_256_mont_reduce_order_10         sp_256_mont_reduce_10
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 10; i++) {
+        t += (tb * a[i]) + r[i];
+        r[i] = t & 0x3ffffff;
+        t >>= 26;
+    }
+    r[10] += t;
+#else
+    int64_t tb = b;
+    int64_t t[10];
+
+    t[ 0] = Q6_P_mpy_RR(tb, a[ 0]);
+    t[ 1] = Q6_P_mpy_RR(tb, a[ 1]);
+    t[ 2] = Q6_P_mpy_RR(tb, a[ 2]);
+    t[ 3] = Q6_P_mpy_RR(tb, a[ 3]);
+    t[ 4] = Q6_P_mpy_RR(tb, a[ 4]);
+    t[ 5] = Q6_P_mpy_RR(tb, a[ 5]);
+    t[ 6] = Q6_P_mpy_RR(tb, a[ 6]);
+    t[ 7] = Q6_P_mpy_RR(tb, a[ 7]);
+    t[ 8] = Q6_P_mpy_RR(tb, a[ 8]);
+    t[ 9] = Q6_P_mpy_RR(tb, a[ 9]);
+    r[ 0] +=                 (t[ 0] & 0x3ffffff);
+    r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
+    r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
+    r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
+    r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
+    r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
+    r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
+    r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
+    r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
+    r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
+    r[10] +=  t[ 9] >> 26;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Shift the result in the high 256 bits down to the bottom.
+ *
+ * r  A single precision number.
+ * a  A single precision number.
+ */
+static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+    sp_digit n, s;
+
+    s = a[10];
+    n = a[9] >> 22;
+    for (i = 0; i < 9; i++) {
+        n += (s & 0x3ffffff) << 4;
+        r[i] = n & 0x3ffffff;
+        n >>= 26;
+        s = a[11 + i] + (s >> 26);
+    }
+    n += s << 4;
+    r[9] = n;
+#else
+    sp_digit n, s;
+
+    s = a[10]; n = a[9] >> 22;
+    n += (s & 0x3ffffff) << 4; r[ 0] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[11] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 1] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[12] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 2] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[13] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 3] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[14] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 4] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[15] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 5] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[16] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 6] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[17] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 7] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[18] + (s >> 26);
+    n += (s & 0x3ffffff) << 4; r[ 8] = Q6_R_and_RR(n, 0x3ffffff);
+    n >>= 26; s = a[19] + (s >> 26);
+    n += s << 4;              r[ 9] = n;
+#endif /* WOLFSSL_SP_SMALL */
+    XMEMSET(&r[10], 0, sizeof(*r) * 10U);
+}
+
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+    sp_digit mu;
+
+
+    /* unrolled for loops due to unexpected behavior with -O optimizations */
+    if (mp != 1) {
+        mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+0, m, mu);
+        a[0+1] += a[0] >> 26;
+
+        mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+1, m, mu);
+        a[1+1] += a[1] >> 26;
+
+        mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+2, m, mu);
+        a[2+1] += a[2] >> 26;
+
+        mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+3, m, mu);
+        a[3+1] += a[3] >> 26;
+
+        mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+4, m, mu);
+        a[4+1] += a[4] >> 26;
+
+        mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+5, m, mu);
+        a[5+1] += a[5] >> 26;
+
+        mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+6, m, mu);
+        a[6+1] += a[6] >> 26;
+
+        mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+7, m, mu);
+        a[7+1] += a[7] >> 26;
+
+        mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+8, m, mu);
+        a[8+1] += a[8] >> 26;
+
+        mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL;
+        sp_256_mul_add_10(a+9, m, mu);
+        a[9+1] += a[9] >> 26;
+        a[9] &= 0x3ffffff;
+    }
+    else {
+        mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+0, p256_mod, mu);
+        a[0+1] += a[0] >> 26;
+
+        mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+1, p256_mod, mu);
+        a[1+1] += a[1] >> 26;
+
+        mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+2, p256_mod, mu);
+        a[2+1] += a[2] >> 26;
+
+        mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+3, p256_mod, mu);
+        a[3+1] += a[3] >> 26;
+
+        mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+4, p256_mod, mu);
+        a[4+1] += a[4] >> 26;
+
+        mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+5, p256_mod, mu);
+        a[5+1] += a[5] >> 26;
+
+        mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+6, p256_mod, mu);
+        a[6+1] += a[6] >> 26;
+
+        mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+7, p256_mod, mu);
+        a[7+1] += a[7] >> 26;
+
+        mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff;
+        sp_256_mul_add_10(a+8, p256_mod, mu);
+        a[8+1] += a[8] >> 26;
+
+        mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL;
+        sp_256_mul_add_10(a+9, p256_mod, mu);
+        a[9+1] += a[9] >> 26;
+        a[9] &= 0x3ffffff;
+    }
+
+
+    sp_256_mont_shift_10(a, a);
+    sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ?
+            (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(a);
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
+    const sp_digit* b)
+{
+#if 1
+    int64_t t0   = Q6_P_mpy_RR(a[0], b[0]);
+    int64_t t1   = Q6_P_mpy_RR(a[0], b[1])
+                 + Q6_P_mpy_RR(a[1], b[0]);
+    int64_t t2   = Q6_P_mpy_RR(a[0], b[2])
+                 + Q6_P_mpy_RR(a[1], b[1])
+                 + Q6_P_mpy_RR(a[2], b[0]);
+    int64_t t3   = Q6_P_mpy_RR(a[0], b[3])
+                 + Q6_P_mpy_RR(a[1], b[2])
+                 + Q6_P_mpy_RR(a[2], b[1])
+                 + Q6_P_mpy_RR(a[3], b[0]);
+    int64_t t4   = Q6_P_mpy_RR(a[0], b[4])
+                 + Q6_P_mpy_RR(a[1], b[3])
+                 + Q6_P_mpy_RR(a[2], b[2])
+                 + Q6_P_mpy_RR(a[3], b[1])
+                 + Q6_P_mpy_RR(a[4], b[0]);
+    int64_t t5   = Q6_P_mpy_RR(a[0], b[5])
+                 + Q6_P_mpy_RR(a[1], b[4])
+                 + Q6_P_mpy_RR(a[2], b[3])
+                 + Q6_P_mpy_RR(a[3], b[2])
+                 + Q6_P_mpy_RR(a[4], b[1])
+                 + Q6_P_mpy_RR(a[5], b[0]);
+    int64_t t6   = Q6_P_mpy_RR(a[0], b[6])
+                 + Q6_P_mpy_RR(a[1], b[5])
+                 + Q6_P_mpy_RR(a[2], b[4])
+                 + Q6_P_mpy_RR(a[3], b[3])
+                 + Q6_P_mpy_RR(a[4], b[2])
+                 + Q6_P_mpy_RR(a[5], b[1])
+                 + Q6_P_mpy_RR(a[6], b[0]);
+    int64_t t7   = Q6_P_mpy_RR(a[0], b[7])
+                 + Q6_P_mpy_RR(a[1], b[6])
+                 + Q6_P_mpy_RR(a[2], b[5])
+                 + Q6_P_mpy_RR(a[3], b[4])
+                 + Q6_P_mpy_RR(a[4], b[3])
+                 + Q6_P_mpy_RR(a[5], b[2])
+                 + Q6_P_mpy_RR(a[6], b[1])
+                 + Q6_P_mpy_RR(a[7], b[0]);
+    int64_t t8   = Q6_P_mpy_RR(a[0], b[8])
+                 + Q6_P_mpy_RR(a[1], b[7])
+                 + Q6_P_mpy_RR(a[2], b[6])
+                 + Q6_P_mpy_RR(a[3], b[5])
+                 + Q6_P_mpy_RR(a[4], b[4])
+                 + Q6_P_mpy_RR(a[5], b[3])
+                 + Q6_P_mpy_RR(a[6], b[2])
+                 + Q6_P_mpy_RR(a[7], b[1])
+                 + Q6_P_mpy_RR(a[8], b[0]);
+    int64_t t9   = Q6_P_mpy_RR(a[0], b[9])
+                 + Q6_P_mpy_RR(a[1], b[8])
+                 + Q6_P_mpy_RR(a[2], b[7])
+                 + Q6_P_mpy_RR(a[3], b[6])
+                 + Q6_P_mpy_RR(a[4], b[5])
+                 + Q6_P_mpy_RR(a[5], b[4])
+                 + Q6_P_mpy_RR(a[6], b[3])
+                 + Q6_P_mpy_RR(a[7], b[2])
+                 + Q6_P_mpy_RR(a[8], b[1])
+                 + Q6_P_mpy_RR(a[9], b[0]);
+    int64_t t10  = Q6_P_mpy_RR(a[1], b[9])
+                 + Q6_P_mpy_RR(a[2], b[8])
+                 + Q6_P_mpy_RR(a[3], b[7])
+                 + Q6_P_mpy_RR(a[4], b[6])
+                 + Q6_P_mpy_RR(a[5], b[5])
+                 + Q6_P_mpy_RR(a[6], b[4])
+                 + Q6_P_mpy_RR(a[7], b[3])
+                 + Q6_P_mpy_RR(a[8], b[2])
+                 + Q6_P_mpy_RR(a[9], b[1]);
+    int64_t t11  = Q6_P_mpy_RR(a[2], b[9])
+                 + Q6_P_mpy_RR(a[3], b[8])
+                 + Q6_P_mpy_RR(a[4], b[7])
+                 + Q6_P_mpy_RR(a[5], b[6])
+                 + Q6_P_mpy_RR(a[6], b[5])
+                 + Q6_P_mpy_RR(a[7], b[4])
+                 + Q6_P_mpy_RR(a[8], b[3])
+                 + Q6_P_mpy_RR(a[9], b[2]);
+    int64_t t12  = Q6_P_mpy_RR(a[3], b[9])
+                 + Q6_P_mpy_RR(a[4], b[8])
+                 + Q6_P_mpy_RR(a[5], b[7])
+                 + Q6_P_mpy_RR(a[6], b[6])
+                 + Q6_P_mpy_RR(a[7], b[5])
+                 + Q6_P_mpy_RR(a[8], b[4])
+                 + Q6_P_mpy_RR(a[9], b[3]);
+    int64_t t13  = Q6_P_mpy_RR(a[4], b[9])
+                 + Q6_P_mpy_RR(a[5], b[8])
+                 + Q6_P_mpy_RR(a[6], b[7])
+                 + Q6_P_mpy_RR(a[7], b[6])
+                 + Q6_P_mpy_RR(a[8], b[5])
+                 + Q6_P_mpy_RR(a[9], b[4]);
+    int64_t t14  = Q6_P_mpy_RR(a[5], b[9])
+                 + Q6_P_mpy_RR(a[6], b[8])
+                 + Q6_P_mpy_RR(a[7], b[7])
+                 + Q6_P_mpy_RR(a[8], b[6])
+                 + Q6_P_mpy_RR(a[9], b[5]);
+    int64_t t15  = Q6_P_mpy_RR(a[6], b[9])
+                 + Q6_P_mpy_RR(a[7], b[8])
+                 + Q6_P_mpy_RR(a[8], b[7])
+                 + Q6_P_mpy_RR(a[9], b[6]);
+    int64_t t16  = Q6_P_mpy_RR(a[7], b[9])
+                 + Q6_P_mpy_RR(a[8], b[8])
+                 + Q6_P_mpy_RR(a[9], b[7]);
+    int64_t t17  = Q6_P_mpy_RR(a[8], b[9])
+                 + Q6_P_mpy_RR(a[9], b[8]);
+    int64_t t18  = Q6_P_mpy_RR(a[9], b[9]);
+
+
+    t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
+    t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
+    t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
+    t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
+    t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
+    t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
+    t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
+    t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
+    t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
+    t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
+    t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
+    t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
+    t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
+    t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
+    t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
+    t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
+    t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
+    t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
+    r[19] = (sp_digit)(t18 >> 26);
+                       r[18] = t18 & 0x3ffffff;
+#endif
+#if 0
+    /* Testing speeds with using HVX_Vectors */
+    {
+    int64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18;
+    HVX_Vector av, splat;
+    HVX_Vector vlow, vhi;
+    
+    av = Q6_V_vzero();
+    vlow = Q6_V_vzero();
+    vhi = Q6_V_vzero();
+    
+    XMEMCPY((byte*)&av, (byte*)a, 40);
+    
+    splat = Q6_V_vsplat_R(b[0]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    unsigned int* loi = (unsigned int*)&vlow;
+    int* hii = (int*)&vhi;
+    
+    /* a[0] * b[0] */
+    t0 = loi[0] | ((int64_t)hii[0] << 31);
+    
+    /* a[1] * b[0] */
+    t1 = loi[1] | ((int64_t)hii[1] << 31);
+    
+    /* a[2] * b[0] */
+    t2 = loi[2] | ((int64_t)hii[2] << 31);
+    
+    /* a[3] * b[0] */
+    t3 = loi[3] | ((int64_t)hii[3] << 31);
+    
+    /* a[4] * b[0] */
+    t4 = loi[4] | ((int64_t)hii[4] << 31);
+    
+    /* a[5] * b[0] */
+    t5 = loi[5] | ((int64_t)hii[5] << 31);
+    
+    /* a[6] * b[0] */
+    t6 = loi[6] | ((int64_t)hii[6] << 31);
+    
+    /* a[7] * b[0] */
+    t7 = loi[7] | ((int64_t)hii[7] << 31);
+    
+    /* a[8] * b[0] */
+    t8 = loi[8] | ((int64_t)hii[8] << 31);
+    
+    /* a[9] * b[0] */
+    t9 = loi[9] | ((int64_t)hii[9] << 31);
+    
+    /* a[*] * b[1] */
+    splat = Q6_V_vsplat_R(b[1]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    /* a[0] * b[1] */
+    t1 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[1] */
+    t2 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[1] */
+    t3 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[1] */
+    t4 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[1] */
+    t5 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[1] */
+    t6 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[1] */
+    t7 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[1] */
+    t8 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[1] */
+    t9 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[1] */
+    t10 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    /* a[*] * b[2] */
+    splat = Q6_V_vsplat_R(b[2]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[2] */
+    t2 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[2] */
+    t3 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[2] */
+    t4 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[2] */
+    t5 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[2] */
+    t6 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[2] */
+    t7 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[2] */
+    t8 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[2] */
+    t9 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[2] */
+    t10 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[2] */
+    t11 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    /* a[*] * b[3] */
+    splat = Q6_V_vsplat_R(b[3]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[3] */
+    t3 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[3] */
+    t4 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[3] */
+    t5 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[3] */
+    t6 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[3] */
+    t7 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[3] */
+    t8 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[3] */
+    t9 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[3] */
+    t10 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[3] */
+    t11 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[3] */
+    t12 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    /* a[*] * b[4] */
+    splat = Q6_V_vsplat_R(b[4]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[4] */
+    t4 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[4] */
+    t5 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[4] */
+    t6 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[4] */
+    t7 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[4] */
+    t8 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[4] */
+    t9 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[4] */
+    t10 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[4] */
+    t11 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[4] */
+    t12 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[4] */
+    t13 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    /* a[*] * b[5] */
+    splat = Q6_V_vsplat_R(b[5]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[5] */
+    t5 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[5] */
+    t6 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[5] */
+    t7 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[5] */
+    t8 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[5] */
+    t9 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[5] */
+    t10 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[5] */
+    t11 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[5] */
+    t12 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[5] */
+    t13 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[5] */
+    t14 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    /* a[*] * b[6] */
+    splat = Q6_V_vsplat_R(b[6]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[6] */
+    t6 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[6] */
+    t7 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[6] */
+    t8 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[6] */
+    t9 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[6] */
+    t10 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[6] */
+    t11 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[6] */
+    t12 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[6] */
+    t13 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[6] */
+    t14 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[6] */
+    t15 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    
+    /* a[*] * b[7] */
+    splat = Q6_V_vsplat_R(b[7]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[7] */
+    t7 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[7] */
+    t8 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[7] */
+    t9 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[7] */
+    t10 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[7] */
+    t11 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[7] */
+    t12 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[7] */
+    t13 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[7] */
+    t14 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[7] */
+    t15 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[7] */
+    t16 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    /* a[*] * b[8] */
+    splat = Q6_V_vsplat_R(b[8]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[8] */
+    t8 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[8] */
+    t9 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[8] */
+    t10 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[8] */
+    t11 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[8] */
+    t12 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[8] */
+    t13 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[8] */
+    t14 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[8] */
+    t15 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[8] */
+    t16 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[8] */
+    t17 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+    
+    /* a[*] * b[9] */
+    splat = Q6_V_vsplat_R(b[9]);
+    vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+    vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+    vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+    vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+    loi = (unsigned int*)&vlow;
+    hii = (int*)&vhi;
+    
+    
+    /* a[0] * b[9] */
+    t9 += (loi[0] | ((int64_t)hii[0] << 31));
+    
+    /* a[1] * b[9] */
+    t10 += (loi[1] | ((int64_t)hii[1] << 31));
+    
+    /* a[2] * b[9] */
+    t11 += (loi[2] | ((int64_t)hii[2] << 31));
+    
+    /* a[3] * b[9] */
+    t12 += (loi[3] | ((int64_t)hii[3] << 31));
+    
+    /* a[4] * b[9] */
+    t13 += (loi[4] | ((int64_t)hii[4] << 31));
+    
+    /* a[5] * b[9] */
+    t14 += (loi[5] | ((int64_t)hii[5] << 31));
+    
+    /* a[6] * b[9] */
+    t15 += (loi[6] | ((int64_t)hii[6] << 31));
+    
+    /* a[7] * b[9] */
+    t16 += (loi[7] | ((int64_t)hii[7] << 31));
+    
+    /* a[8] * b[9] */
+    t17 += (loi[8] | ((int64_t)hii[8] << 31));
+    
+    /* a[9] * b[9] */
+    t18 = (loi[9] | ((int64_t)hii[9] << 31));
+    
+        t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
+        t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
+        t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
+        t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
+        t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
+        t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
+        t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
+        t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
+        t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
+        t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
+        t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
+        t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
+        t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
+        t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
+        t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
+        t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
+        t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
+        t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
+        r[19] = (sp_digit)(t18 >> 26);
+                           r[18] = t18 & 0x3ffffff;
+    }
+#endif
+}
+
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r   Result of multiplication.
+ * a   First number to multiply in Montogmery form.
+ * b   Second number to multiply in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mul_10(r, a, b);
+    sp_256_mont_reduce_10(r, m, mp);
+}
+
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
+{
+    int64_t t0   = Q6_P_mpy_RR(a[0], a[0]);
+    int64_t t1   = Q6_P_mpy_RR(a[0], a[1]) * 2;
+    int64_t t2   = Q6_P_mpy_RR(a[0], a[2]) * 2
+                 + Q6_P_mpy_RR(a[1], a[1]);
+    int64_t t3   = (Q6_P_mpy_RR(a[0], a[3])
+                 + Q6_P_mpy_RR(a[1], a[2])) * 2;
+    int64_t t4   = (Q6_P_mpy_RR(a[ 0], a[ 4])
+                 + Q6_P_mpy_RR(a[ 1], a[ 3])) * 2
+                 + Q6_P_mpy_RR(a[ 2], a[ 2]);
+    int64_t t5   = (Q6_P_mpy_RR(a[ 0], a[ 5])
+                 + Q6_P_mpy_RR(a[ 1], a[ 4])
+                 + Q6_P_mpy_RR(a[ 2], a[ 3])) * 2;
+    int64_t t6   = (Q6_P_mpy_RR(a[ 0], a[ 6])
+                 + Q6_P_mpy_RR(a[ 1], a[ 5])
+                 + Q6_P_mpy_RR(a[ 2], a[ 4])) * 2
+                 + Q6_P_mpy_RR(a[ 3], a[ 3]);
+    int64_t t7   = (Q6_P_mpy_RR(a[ 0], a[ 7])
+                 + Q6_P_mpy_RR(a[ 1], a[ 6])
+                 + Q6_P_mpy_RR(a[ 2], a[ 5])
+                 + Q6_P_mpy_RR(a[ 3], a[ 4])) * 2;
+    int64_t t8   = (Q6_P_mpy_RR(a[ 0], a[ 8])
+                 + Q6_P_mpy_RR(a[ 1], a[ 7])
+                 + Q6_P_mpy_RR(a[ 2], a[ 6])
+                 + Q6_P_mpy_RR(a[ 3], a[ 5])) * 2
+                 + Q6_P_mpy_RR(a[ 4], a[ 4]);
+    int64_t t9   = (Q6_P_mpy_RR(a[ 0], a[ 9])
+                 + Q6_P_mpy_RR(a[ 1], a[ 8])
+                 + Q6_P_mpy_RR(a[ 2], a[ 7])
+                 + Q6_P_mpy_RR(a[ 3], a[ 6])
+                 + Q6_P_mpy_RR(a[ 4], a[ 5])) * 2;
+    int64_t t10  = (Q6_P_mpy_RR(a[ 1], a[ 9])
+                 + Q6_P_mpy_RR(a[ 2], a[ 8])
+                 + Q6_P_mpy_RR(a[ 3], a[ 7])
+                 + Q6_P_mpy_RR(a[ 4], a[ 6])) * 2
+                 + Q6_P_mpy_RR(a[ 5], a[ 5]);
+    int64_t t11  = (Q6_P_mpy_RR(a[ 2], a[ 9])
+                 + Q6_P_mpy_RR(a[ 3], a[ 8])
+                 + Q6_P_mpy_RR(a[ 4], a[ 7])
+                 + Q6_P_mpy_RR(a[ 5], a[ 6])) * 2;
+    int64_t t12  = (Q6_P_mpy_RR(a[ 3], a[ 9])
+                 + Q6_P_mpy_RR(a[ 4], a[ 8])
+                 + Q6_P_mpy_RR(a[ 5], a[ 7])) * 2
+                 + Q6_P_mpy_RR(a[ 6], a[ 6]);
+    int64_t t13  = (Q6_P_mpy_RR(a[ 4], a[ 9])
+                 + Q6_P_mpy_RR(a[ 5], a[ 8])
+                 + Q6_P_mpy_RR(a[ 6], a[ 7])) * 2;
+    int64_t t14  = (Q6_P_mpy_RR(a[ 5], a[ 9])
+                 + Q6_P_mpy_RR(a[ 6], a[ 8])) * 2
+                 + Q6_P_mpy_RR(a[ 7], a[ 7]);
+    int64_t t15  =( Q6_P_mpy_RR(a[ 6], a[ 9])
+                 + Q6_P_mpy_RR(a[ 7], a[ 8])) * 2;
+    int64_t t16  = Q6_P_mpy_RR(a[ 7], a[ 9]) * 2
+                 + Q6_P_mpy_RR(a[ 8], a[ 8]);
+    int64_t t17  = Q6_P_mpy_RR(a[ 8], a[ 9]) * 2;
+    int64_t t18  = Q6_P_mpy_RR(a[ 9], a[ 9]);
+
+    t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
+    t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
+    t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
+    t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
+    t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
+    t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
+    t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
+    t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
+    t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
+    t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
+    t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
+    t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
+    t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
+    t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
+    t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
+    t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
+    t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
+    t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
+    r[19] = (sp_digit)(t18 >> 26);
+                       r[18] = t18 & 0x3ffffff;
+}
+
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m,
+        sp_digit mp)
+{
+    sp_256_sqr_10(r, a);
+    sp_256_mont_reduce_10(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r   Result of squaring.
+ * a   Number to square in Montogmery form.
+ * n   Number of times to square.
+ * m   Modulus (prime).
+ * mp  Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n,
+        const sp_digit* m, sp_digit mp)
+{
+    sp_256_mont_sqr_10(r, a, m, mp);
+    for (; n > 1; n--) {
+        sp_256_mont_sqr_10(r, r, m, mp);
+    }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_2[8] = {
+    0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+    0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 10);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
+        if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
+            sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 10);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + Q6_P_mpy_RR(2, 10);
+    sp_digit* t3 = td + Q6_P_mpy_RR(4, 10);
+
+    /* t = a^2 */
+    sp_256_mont_sqr_10(t, a, p256_mod, p256_mp_mod);
+    /* t = a^3 = t * a */
+    sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_256_mont_sqr_n_10(t2, t, 2, p256_mod, p256_mp_mod);
+    /* t3= a^d = t2 * a */
+    sp_256_mont_mul_10(t3, t2, a, p256_mod, p256_mp_mod);
+    /* t = a^f = t2 * t */
+    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+    /* t2= a^f0 = t ^ 2 ^ 4 */
+    sp_256_mont_sqr_n_10(t2, t, 4, p256_mod, p256_mp_mod);
+    /* t3= a^fd = t2 * t3 */
+    sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
+    /* t = a^ff = t2 * t */
+    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+    /* t2= a^ff00 = t ^ 2 ^ 8 */
+    sp_256_mont_sqr_n_10(t2, t, 8, p256_mod, p256_mp_mod);
+    /* t3= a^fffd = t2 * t3 */
+    sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
+    /* t = a^ffff = t2 * t */
+    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+    sp_256_mont_sqr_n_10(t2, t, 16, p256_mod, p256_mp_mod);
+    /* t3= a^fffffffd = t2 * t3 */
+    sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
+    /* t = a^ffffffff = t2 * t */
+    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
+    sp_256_mont_sqr_n_10(t2, t, 32, p256_mod, p256_mp_mod);
+    /* t2= a^ffffffffffffffff = t2 * t */
+    sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+    /* t2= a^ffffffff00000001 = t2 * a */
+    sp_256_mont_mul_10(t2, t2, a, p256_mod, p256_mp_mod);
+    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
+     *   = t2 ^ 2 ^ 160 */
+    sp_256_mont_sqr_n_10(t2, t2, 160, p256_mod, p256_mp_mod);
+    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
+     *   = t2 * t */
+    sp_256_mont_mul_10(t2, t2, t, p256_mod, p256_mp_mod);
+    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
+     *   = t2 ^ 2 ^ 32 */
+    sp_256_mont_sqr_n_10(t2, t2, 32, p256_mod, p256_mp_mod);
+    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
+     *   = t2 * t3 */
+    sp_256_mont_mul_10(r, t2, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+
+/* Map the Montgomery form projective co-ordinate point to an affine point.
+ *
+ * r  Resulting affine co-ordinate point.
+ * p  Montgomery form projective co-ordinate point.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_map_10(sp_point* r, const sp_point* p, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + Q6_P_mpy_RR(2, 10);
+    int32_t n;
+
+    sp_256_mont_inv_10(t1, p->z, t + 2*10);
+
+    sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    /* x /= z^2 */
+    sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
+    XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U);
+    sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
+    /* Reduce x to less than modulus */
+    n = sp_256_cmp_10(r->x, p256_mod);
+    sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(r->x);
+
+    /* y /= z^3 */
+    sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
+    XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U);
+    sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
+    /* Reduce y to less than modulus */
+    n = sp_256_cmp_10(r->y, p256_mod);
+    sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(r->y);
+
+    XMEMSET(r->z, 0, sizeof(r->z));
+    r->z[0] = 1;
+
+}
+
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+#if 0
+    r[ 0] = Q6_R_add_RR(a[0], b[0]);
+    r[ 1] = Q6_R_add_RR(a[1], b[1]);
+    r[ 2] = Q6_R_add_RR(a[2], b[2]);
+    r[ 3] = Q6_R_add_RR(a[3], b[3]);
+    r[ 4] = Q6_R_add_RR(a[4], b[4]);
+    r[ 5] = Q6_R_add_RR(a[5], b[5]);
+    r[ 6] = Q6_R_add_RR(a[6], b[6]);
+    r[ 7] = Q6_R_add_RR(a[7], b[7]);
+    r[ 8] = Q6_R_add_RR(a[8], b[8]);
+    r[ 9] = Q6_R_add_RR(a[9], b[9]);
+#endif
+#if 1
+    __asm__ __volatile__ (
+        "{ r1 = memw(%[a]+#0)  \n"
+        "  r2 = memw(%[b]+#0) }\n"
+        "{ r3 = memw(%[a]+#4)  \n"
+        "  r19 = add(r1,r2)    \n"
+        "  r4 = memw(%[b]+#4) }\n"
+        "{ r5 = memw(%[a]+#8)  \n"
+        "  r20 = add(r3,r4)    \n"
+        "  r6 = memw(%[b]+#8) }\n"
+        "{ memw(%[r]+#0) = r19 }\n"
+        "{ r7 = memw(%[a]+#12)  \n"
+        "  r21 = add(r5,r6)    \n"
+        "  r8 = memw(%[b]+#12) }\n"
+        "{ memw(%[r]+#4) = r20 }\n"
+        "{ r9 = memw(%[a]+#16)  \n"
+        "  r22 = add(r7,r8)     \n"
+        "  r10 = memw(%[b]+#16) }\n"
+        "{ memw(%[r]+#8) = r21 }\n"
+        "{ r11 = memw(%[a]+#20)  \n"
+        "  r23 = add(r9,r10)     \n"
+        "  r12 = memw(%[b]+#20) }\n"
+        "{ memw(%[r]+#12) = r22 }\n"
+        "{ r13 = memw(%[a]+#24)  \n"
+        "  r24 = add(r11,r12)     \n"
+        "  r14 = memw(%[b]+#24) }\n"
+        "{ memw(%[r]+#16) = r23 }\n"
+        "{ r15 = memw(%[a]+#28)  \n"
+        "  r25 = add(r13,r14)     \n"
+        "  r16 = memw(%[b]+#28) }\n"
+        "{ memw(%[r]+#20) = r24 }\n"
+        "{ r17 = memw(%[a]+#32)  \n"
+        "  r26 = add(r15,r16)     \n"
+        "  r18 = memw(%[b]+#32) }\n"
+        "{ memw(%[r]+#24) = r25 }\n"
+        "{ r5 = memw(%[a]+#36)  \n"
+        "  r19 = add(r17,r18)    \n"
+        "  r6 = memw(%[b]+#36) }\n"
+        "{ memw(%[r]+#28) = r26 }\n"
+        "{ r20 = add(r5,r6)      \n"
+            "  memw(%[r]+#32) = r19 }\n"
+        "{ memw(%[r]+#36) = r20 }\n"
+        : [r] "+r" (r)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26"
+    );
+#endif
+    return 0;
+}
+
+
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r   Result of addition.
+ * a   First number to add in Montogmery form.
+ * b   Second number to add in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_256_add_10(r, a, b);
+    sp_256_norm_10(r);
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(r);
+}
+
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r   Result of doubling.
+ * a   Number to double in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_256_add_10(r, a, a);
+    sp_256_norm_10(r);
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(r);
+}
+
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r   Result of Tripling.
+ * a   Number to triple in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    (void)sp_256_add_10(r, a, a);
+    sp_256_norm_10(r);
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(r);
+    (void)sp_256_add_10(r, r, a);
+    sp_256_norm_10(r);
+    sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+                (sp_digit)1 : (sp_digit)0));
+    sp_256_norm_10(r);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision integer.
+ */
+SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
+        const sp_digit* b)
+{
+#if 0
+    r[ 0] = Q6_R_sub_RR(a[0], b[0]);
+    r[ 1] = Q6_R_sub_RR(a[1], b[1]);
+    r[ 2] = Q6_R_sub_RR(a[2], b[2]);
+    r[ 3] = Q6_R_sub_RR(a[3], b[3]);
+    r[ 4] = Q6_R_sub_RR(a[4], b[4]);
+    r[ 5] = Q6_R_sub_RR(a[5], b[5]);
+    r[ 6] = Q6_R_sub_RR(a[6], b[6]);
+    r[ 7] = Q6_R_sub_RR(a[7], b[7]);
+    r[ 8] = Q6_R_sub_RR(a[8], b[8]);
+    r[ 9] = Q6_R_sub_RR(a[9], b[9]);
+#endif
+#if 1
+    __asm__ __volatile__ (
+        "{ r1 = memw(%[a]+#0)  \n"
+        "  r2 = memw(%[b]+#0) }\n"
+        "{ r3 = memw(%[a]+#4)  \n"
+        "  r19 = sub(r1,r2)    \n"
+        "  r4 = memw(%[b]+#4) }\n"
+        "{ r5 = memw(%[a]+#8)  \n"
+        "  r20 = sub(r3,r4)    \n"
+        "  r6 = memw(%[b]+#8) }\n"
+        "{ memw(%[r]+#0) = r19 }\n"
+        "{ r7 = memw(%[a]+#12)  \n"
+        "  r21 = sub(r5,r6)    \n"
+        "  r8 = memw(%[b]+#12) }\n"
+        "{ memw(%[r]+#4) = r20 }\n"
+        "{ r9 = memw(%[a]+#16)  \n"
+        "  r22 = sub(r7,r8)     \n"
+        "  r10 = memw(%[b]+#16) }\n"
+        "{ memw(%[r]+#8) = r21 }\n"
+        "{ r11 = memw(%[a]+#20)  \n"
+        "  r23 = sub(r9,r10)     \n"
+        "  r12 = memw(%[b]+#20) }\n"
+        "{ memw(%[r]+#12) = r22 }\n"
+        "{ r13 = memw(%[a]+#24)  \n"
+        "  r24 = sub(r11,r12)     \n"
+        "  r14 = memw(%[b]+#24) }\n"
+        "{ memw(%[r]+#16) = r23 }\n"
+        "{ r15 = memw(%[a]+#28)  \n"
+        "  r25 = sub(r13,r14)     \n"
+        "  r16 = memw(%[b]+#28) }\n"
+        "{ memw(%[r]+#20) = r24 }\n"
+        "{ r17 = memw(%[a]+#32)  \n"
+        "  r26 = sub(r15,r16)     \n"
+        "  r18 = memw(%[b]+#32) }\n"
+        "{ memw(%[r]+#24) = r25 }\n"
+        "{ r5 = memw(%[a]+#36)  \n"
+        "  r19 = sub(r17,r18)    \n"
+        "  r6 = memw(%[b]+#36) }\n"
+        "{ memw(%[r]+#28) = r26 }\n"
+        "{ r20 = sub(r5,r6)      \n"
+            "  memw(%[r]+#32) = r19 }\n"
+        "{ memw(%[r]+#36) = r20 }\n"
+        : [r] "+r" (r)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26"
+    );
+#endif
+    return 0;
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r  A single precision number representing conditional add result.
+ * a  A single precision number to add with.
+ * b  A single precision number to add.
+ * m  Mask value to apply.
+ */
+static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a,
+        const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i = 0; i < 10; i++) {
+        r[i] = a[i] + (b[i] & m);
+    }
+#else
+    r[ 0] = Q6_R_add_RR(a[ 0], Q6_R_and_RR(b[ 0], m));
+    r[ 1] = Q6_R_add_RR(a[ 1], Q6_R_and_RR(b[ 1], m));
+    r[ 2] = Q6_R_add_RR(a[ 2], Q6_R_and_RR(b[ 2], m));
+    r[ 3] = Q6_R_add_RR(a[ 3], Q6_R_and_RR(b[ 3], m));
+    r[ 4] = Q6_R_add_RR(a[ 4], Q6_R_and_RR(b[ 4], m));
+    r[ 5] = Q6_R_add_RR(a[ 5], Q6_R_and_RR(b[ 5], m));
+    r[ 6] = Q6_R_add_RR(a[ 6], Q6_R_and_RR(b[ 6], m));
+    r[ 7] = Q6_R_add_RR(a[ 7], Q6_R_and_RR(b[ 7], m));
+    r[ 8] = Q6_R_add_RR(a[ 8], Q6_R_and_RR(b[ 8], m));
+    r[ 9] = Q6_R_add_RR(a[ 9], Q6_R_and_RR(b[ 9], m));
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r   Result of subtration.
+ * a   Number to subtract from in Montogmery form.
+ * b   Number to subtract with in Montogmery form.
+ * m   Modulus (prime).
+ */
+static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+        const sp_digit* m)
+{
+    (void)sp_256_sub_10(r, a, b);
+    sp_256_cond_add_10(r, r, m, r[9] >> 22);
+    sp_256_norm_10(r);
+}
+
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r  Result of shift.
+ * a  Number to shift.
+ */
+SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int i;
+
+    for (i=0; i<9; i++) {
+        r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+    }
+#else
+    r[0] = ((a[0] >> 1) | Q6_R_and_RR((a[1] << 25), 0x3ffffff));
+    r[1] = ((a[1] >> 1) | Q6_R_and_RR((a[2] << 25), 0x3ffffff));
+    r[2] = ((a[2] >> 1) | Q6_R_and_RR((a[3] << 25), 0x3ffffff));
+    r[3] = ((a[3] >> 1) | Q6_R_and_RR((a[4] << 25), 0x3ffffff));
+    r[4] = ((a[4] >> 1) | Q6_R_and_RR((a[5] << 25), 0x3ffffff));
+    r[5] = ((a[5] >> 1) | Q6_R_and_RR((a[6] << 25), 0x3ffffff));
+    r[6] = ((a[6] >> 1) | Q6_R_and_RR((a[7] << 25), 0x3ffffff));
+    r[7] = ((a[7] >> 1) | Q6_R_and_RR((a[8] << 25), 0x3ffffff));
+    r[8] = ((a[8] >> 1) | Q6_R_and_RR((a[9] << 25), 0x3ffffff));
+#endif
+    r[9] = a[9] >> 1;
+}
+
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r  Result of division by 2.
+ * a  Number to divide.
+ * m  Modulus (prime).
+ */
+static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1));
+    sp_256_norm_10(r);
+    sp_256_rshift1_10(r, r);
+}
+
+
+/* Double the Montgomery form projective point p.
+ *
+ * r  Result of doubling point.
+ * p  Point to double.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_10(sp_point* r, const sp_point* p, sp_digit* t)
+{
+    sp_point* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*10;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* When infinity don't double point passed in - constant time. */
+    rp[0] = r;
+
+    /*lint allow cast to different type of pointer*/
+    rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+    XMEMSET(rp[1], 0, sizeof(sp_point));
+    x = rp[p->infinity]->x;
+    y = rp[p->infinity]->y;
+    z = rp[p->infinity]->z;
+    /* Put point to double into result - good for infinity. */
+    if (r != p) {
+        for (i=0; i<10; i++) {
+            r->x[i] = p->x[i];
+        }
+        for (i=0; i<10; i++) {
+            r->y[i] = p->y[i];
+        }
+        for (i=0; i<10; i++) {
+            r->z[i] = p->z[i];
+        }
+        r->infinity = p->infinity;
+    }
+
+    /* T1 = Z * Z */
+    sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod);
+    /* Z = Y * Z */
+    sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod);
+    /* Z = 2Z */
+    sp_256_mont_dbl_10(z, z, p256_mod);
+    /* T2 = X - T1 */
+    sp_256_mont_sub_10(t2, x, t1, p256_mod);
+    /* T1 = X + T1 */
+    sp_256_mont_add_10(t1, x, t1, p256_mod);
+    /* T2 = T1 * T2 */
+    sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
+    /* T1 = 3T2 */
+    sp_256_mont_tpl_10(t1, t2, p256_mod);
+    /* Y = 2Y */
+    sp_256_mont_dbl_10(y, y, p256_mod);
+    /* Y = Y * Y */
+    sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod);
+    /* T2 = Y * Y */
+    sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+    /* T2 = T2/2 */
+    sp_256_div2_10(t2, t2, p256_mod);
+    /* Y = Y * X */
+    sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
+    /* X = T1 * T1 */
+    sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod);
+    /* X = X - Y */
+    sp_256_mont_sub_10(x, x, y, p256_mod);
+    /* X = X - Y */
+    sp_256_mont_sub_10(x, x, y, p256_mod);
+    /* Y = Y - X */
+    sp_256_mont_sub_10(y, y, x, p256_mod);
+    /* Y = Y * T1 */
+    sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod);
+    /* Y = Y - T2 */
+    sp_256_mont_sub_10(y, y, t2, p256_mod);
+
+}
+
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a  First number to compare.
+ * b  Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
+{
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+            (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+            (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_10(sp_point* r, const sp_point* p, const sp_point* q,
+        sp_digit* t)
+{
+    const sp_point* ap[2];
+    sp_point* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*10;
+    sp_digit* t3 = t + 4*10;
+    sp_digit* t4 = t + 6*10;
+    sp_digit* t5 = t + 8*10;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Ensure only the first point is the same as the result. */
+    if (q == r) {
+        const sp_point* a = p;
+        p = q;
+        q = a;
+    }
+
+    /* Check double */
+    (void)sp_256_sub_10(t1, p256_mod, q->y);
+    sp_256_norm_10(t1);
+    if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+        (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
+        sp_256_proj_point_dbl_10(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<10; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<10; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<10; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U1 = X1*Z2^2 */
+        sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod);
+        /* U2 = X2*Z1^2 */
+        sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+        /* S1 = Y1*Z2^3 */
+        sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+        /* H = U2 - U1 */
+        sp_256_mont_sub_10(t2, t2, t1, p256_mod);
+        /* R = S2 - S1 */
+        sp_256_mont_sub_10(t4, t4, t3, p256_mod);
+        /* Z3 = H*Z1*Z2 */
+        sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
+        /* X3 = R^2 - H^3 - 2*U1*H^2 */
+        sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_10(x, x, t5, p256_mod);
+        sp_256_mont_dbl_10(t1, y, p256_mod);
+        sp_256_mont_sub_10(x, x, t1, p256_mod);
+        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+        sp_256_mont_sub_10(y, y, x, p256_mod);
+        sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_10(y, y, t5, p256_mod);
+    }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
+        int map, void* heap)
+{
+    sp_point* td;
+    sp_point* t[3];
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    (void)heap;
+
+    td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (td == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+                                                              DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+
+    if (err == MP_OKAY) {
+        XMEMSET(td, 0, sizeof(*td) * 3);
+
+        t[0] = &td[0];
+        t[1] = &td[1];
+        t[2] = &td[2];
+
+        /* t[0] = {0, 0, 1} * norm */
+        t[0]->infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        err = sp_256_mod_mul_norm_10(t[1]->x, g->x, p256_mod);
+    }
+    if (err == MP_OKAY)
+        err = sp_256_mod_mul_norm_10(t[1]->y, g->y, p256_mod);
+    if (err == MP_OKAY)
+        err = sp_256_mod_mul_norm_10(t[1]->z, g->z, p256_mod);
+
+    if (err == MP_OKAY) {
+        i = 9;
+        c = 22;
+        n = k[i--] << (26 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1)
+                    break;
+
+                n = k[i--];
+                c = 26;
+            }
+
+            y = (n >> 25) & 1;
+            n <<= 1;
+
+            sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp);
+
+            XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+                                  ((size_t)t[1] & addr_mask[y])),
+                    sizeof(sp_point));
+            sp_256_proj_point_dbl_10(t[2], t[2], tmp);
+            XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+                            ((size_t)t[1] & addr_mask[y])), t[2],
+                    sizeof(sp_point));
+        }
+
+        if (map != 0) {
+            sp_256_map_10(r, t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, t[0], sizeof(sp_point));
+        }
+    }
+
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+    if (td != NULL) {
+        XMEMSET(td, 0, sizeof(sp_point) * 3);
+        XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+    }
+
+    return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_point td[3];
+    sp_digit tmpd[2 * 10 * 5];
+#endif
+    sp_point* t;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err = MP_OKAY;
+
+    (void)heap;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    sp_point td[3];
+    t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        t[0] = &td[0];
+        t[1] = &td[1];
+        t[2] = &td[2];
+
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+    }
+    if (err == MP_OKAY)
+        err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+    if (err == MP_OKAY)
+        err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+
+    if (err == MP_OKAY) {
+        i = 9;
+        c = 22;
+        n = k[i--] << (26 - c);
+        for (; ; c--) {
+            if (c == 0) {
+                if (i == -1)
+                    break;
+
+                n = k[i--];
+                c = 26;
+            }
+
+            y = (n >> 25) & 1;
+            n <<= 1;
+
+            sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
+
+            XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                                 ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+            sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
+            XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+                          ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+        }
+
+        if (map != 0) {
+            sp_256_map_10(r, &t[0], tmp);
+        }
+        else {
+            XMEMCPY(r, &t[0], sizeof(sp_point));
+        }
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point) * 3);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+
+    return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry {
+    sp_digit x[10] __attribute__((aligned(128)));
+    sp_digit y[10] __attribute__((aligned(128)));
+} sp_table_entry;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_10(sp_point* r, const sp_point* g, const sp_digit* k,
+        int map, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_point td[16];
+    sp_point rtd;
+    sp_digit tmpd[2 * 10 * 5];
+#endif
+    sp_point* t;
+    sp_point* rt;
+    sp_digit* tmp;
+    sp_digit n;
+    int i;
+    int c, y;
+    int err;
+
+    (void)heap;
+
+    err = sp_ecc_point_new(heap, rtd, rt);
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
+    if (t == NULL)
+        err = MEMORY_E;
+    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+                             DYNAMIC_TYPE_ECC);
+    if (tmp == NULL)
+        err = MEMORY_E;
+#else
+    t = td;
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        /* t[0] = {0, 0, 1} * norm */
+        XMEMSET(&t[0], 0, sizeof(t[0]));
+        t[0].infinity = 1;
+        /* t[1] = {g->x, g->y, g->z} * norm */
+        (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+        (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+        (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+        t[1].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp);
+        t[ 2].infinity = 0;
+        sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp);
+        t[ 3].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp);
+        t[ 4].infinity = 0;
+        sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp);
+        t[ 5].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp);
+        t[ 6].infinity = 0;
+        sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp);
+        t[ 7].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp);
+        t[ 8].infinity = 0;
+        sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp);
+        t[ 9].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp);
+        t[10].infinity = 0;
+        sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp);
+        t[11].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp);
+        t[12].infinity = 0;
+        sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp);
+        t[13].infinity = 0;
+        sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp);
+        t[14].infinity = 0;
+        sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp);
+        t[15].infinity = 0;
+
+        i = 8;
+        n = k[i+1] << 6;
+        c = 18;
+        y = n >> 24;
+        XMEMCPY(rt, &t[y], sizeof(sp_point));
+        n <<= 8;
+        for (; i>=0 || c>=4; ) {
+            if (c < 4) {
+                n |= k[i--] << (6 - c);
+                c += 26;
+            }
+            y = (n >> 28) & 0xf;
+            n <<= 4;
+            c -= 4;
+
+            sp_256_proj_point_dbl_10(rt, rt, tmp);
+            sp_256_proj_point_dbl_10(rt, rt, tmp);
+            sp_256_proj_point_dbl_10(rt, rt, tmp);
+            sp_256_proj_point_dbl_10(rt, rt, tmp);
+
+            sp_256_proj_point_add_10(rt, rt, &t[y], tmp);
+        }
+
+        if (map != 0) {
+            sp_256_map_10(r, rt, tmp);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point));
+        }
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (tmp != NULL) {
+        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+    }
+    if (t != NULL) {
+        XMEMSET(t, 0, sizeof(sp_point) * 16);
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#else
+    ForceZero(tmpd, sizeof(tmpd));
+    ForceZero(td, sizeof(td));
+#endif
+    sp_ecc_point_free(rt, 1, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r  Result of repeated doubling of point.
+ * p  Point to double.
+ * n  Number of times to double
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_10(sp_point* r, const sp_point* p, int n,
+        sp_digit* t)
+{
+    sp_point* rp[2];
+    sp_digit* w = t;
+    sp_digit* a = t + 2*10;
+    sp_digit* b = t + 4*10;
+    sp_digit* t1 = t + 6*10;
+    sp_digit* t2 = t + 8*10;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    rp[0] = r;
+
+    /*lint allow cast to different type of pointer*/
+    rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+    XMEMSET(rp[1], 0, sizeof(sp_point));
+    x = rp[p->infinity]->x;
+    y = rp[p->infinity]->y;
+    z = rp[p->infinity]->z;
+    if (r != p) {
+        for (i=0; i<10; i++) {
+            r->x[i] = p->x[i];
+        }
+        for (i=0; i<10; i++) {
+            r->y[i] = p->y[i];
+        }
+        for (i=0; i<10; i++) {
+            r->z[i] = p->z[i];
+        }
+        r->infinity = p->infinity;
+    }
+
+    /* Y = 2*Y */
+    sp_256_mont_dbl_10(y, y, p256_mod);
+    /* W = Z^4 */
+    sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod);
+    sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod);
+    while (n-- > 0) {
+        /* A = 3*(X^2 - W) */
+        sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_10(t1, t1, w, p256_mod);
+        sp_256_mont_tpl_10(a, t1, p256_mod);
+        /* B = X*Y^2 */
+        sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(b, t2, x, p256_mod, p256_mp_mod);
+        /* X = A^2 - 2B */
+        sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
+        sp_256_mont_dbl_10(t1, b, p256_mod);
+        sp_256_mont_sub_10(x, x, t1, p256_mod);
+        /* Z = Z*Y */
+        sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
+        /* t2 = Y^4 */
+        sp_256_mont_sqr_10(t2, t2, p256_mod, p256_mp_mod);
+        if (n != 0) {
+            /* W = W*Y^4 */
+            sp_256_mont_mul_10(w, w, t2, p256_mod, p256_mp_mod);
+        }
+        /* y = 2*A*(B - X) - Y^4 */
+        sp_256_mont_sub_10(y, b, x, p256_mod);
+        sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
+        sp_256_mont_dbl_10(y, y, p256_mod);
+        sp_256_mont_sub_10(y, y, t2, p256_mod);
+    }
+    /* Y = Y/2 */
+    sp_256_div2_10(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+
+
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r  Result of addition.
+ * p  First point to add.
+ * q  Second point to add.
+ * t  Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_10(sp_point* r, const sp_point* p,
+        const sp_point* q, sp_digit* t)
+{
+    const sp_point* ap[2];
+    sp_point* rp[2];
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2*10;
+    sp_digit* t3 = t + 4*10;
+    sp_digit* t4 = t + 6*10;
+    sp_digit* t5 = t + 8*10;
+    sp_digit* x;
+    sp_digit* y;
+    sp_digit* z;
+    int i;
+
+    /* Check double */
+    (void)sp_256_sub_10(t1, p256_mod, q->y);
+    sp_256_norm_10(t1);
+    if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+        (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
+        sp_256_proj_point_dbl_10(r, p, t);
+    }
+    else {
+        rp[0] = r;
+
+        /*lint allow cast to different type of pointer*/
+        rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+        XMEMSET(rp[1], 0, sizeof(sp_point));
+        x = rp[p->infinity | q->infinity]->x;
+        y = rp[p->infinity | q->infinity]->y;
+        z = rp[p->infinity | q->infinity]->z;
+
+        ap[0] = p;
+        ap[1] = q;
+        for (i=0; i<10; i++) {
+            r->x[i] = ap[p->infinity]->x[i];
+        }
+        for (i=0; i<10; i++) {
+            r->y[i] = ap[p->infinity]->y[i];
+        }
+        for (i=0; i<10; i++) {
+            r->z[i] = ap[p->infinity]->z[i];
+        }
+        r->infinity = ap[p->infinity]->infinity;
+
+        /* U2 = X2*Z1^2 */
+        sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+        /* S2 = Y2*Z1^3 */
+        sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+        /* H = U2 - X1 */
+        sp_256_mont_sub_10(t2, t2, x, p256_mod);
+        /* R = S2 - Y1 */
+        sp_256_mont_sub_10(t4, t4, y, p256_mod);
+        /* Z3 = H*Z1 */
+        sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
+        /* X3 = R^2 - H^3 - 2*X1*H^2 */
+        sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_10(x, t1, t5, p256_mod);
+        sp_256_mont_dbl_10(t1, t3, p256_mod);
+        sp_256_mont_sub_10(x, x, t1, p256_mod);
+        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+        sp_256_mont_sub_10(t3, t3, x, p256_mod);
+        sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod);
+        sp_256_mont_sub_10(y, t3, t5, p256_mod);
+    }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temporary data.
+ */
+static void sp_256_proj_to_affine_10(sp_point* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 10;
+    sp_digit* tmp = t + 4 * 10;
+
+    sp_256_mont_inv_10(t1, a->z, tmp);
+
+    sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod);
+    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a      The base point.
+ * table  Place to store generated point data.
+ * tmp    Temporary data.
+ * heap  Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_10(const sp_point* a,
+        sp_table_entry* table, sp_digit* tmp, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_point td, s1d, s2d;
+#endif
+    sp_point* t;
+    sp_point* s1 = NULL;
+    sp_point* s2 = NULL;
+    int i, j;
+    int err;
+
+    (void)heap;
+
+    err = sp_ecc_point_new(heap, td, t);
+    if (err == MP_OKAY) {
+        err = sp_ecc_point_new(heap, s1d, s1);
+    }
+    if (err == MP_OKAY) {
+        err = sp_ecc_point_new(heap, s2d, s2);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        t->infinity = 0;
+        sp_256_proj_to_affine_10(t, tmp);
+
+        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s1->infinity = 0;
+        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+        s2->infinity = 0;
+
+        /* table[0] = {0, 0, infinity} */
+        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
+        /* table[1] = Affine version of 'a' in Montgomery form */
+        XMEMCPY(table[1].x, t->x, sizeof(table->x));
+        XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+        for (i=1; i<8; i++) {
+            sp_256_proj_point_dbl_n_10(t, t, 32, tmp);
+            sp_256_proj_to_affine_10(t, tmp);
+            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+        }
+
+        for (i=1; i<8; i++) {
+            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+                sp_256_proj_point_add_qz1_10(t, s1, s2, tmp);
+                sp_256_proj_to_affine_10(t, tmp);
+                XMEMCPY(table[j].x, t->x, sizeof(table->x));
+                XMEMCPY(table[j].y, t->y, sizeof(table->y));
+            }
+        }
+    }
+
+    sp_ecc_point_free(s2, 0, heap);
+    sp_ecc_point_free(s1, 0, heap);
+    sp_ecc_point_free( t, 0, heap);
+
+    return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_10(sp_point* r, const sp_point* g,
+        const sp_table_entry* table, const sp_digit* k, int map, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_point rtd;
+    sp_point pd;
+    sp_digit td[2 * 10 * 5];
+#endif
+    sp_point* rt;
+    sp_point* p = NULL;
+    sp_digit* t;
+    int i, j;
+    int y, x;
+    int err;
+
+    (void)g;
+    (void)heap;
+
+    err = sp_ecc_point_new(heap, rtd, rt);
+    if (err == MP_OKAY) {
+        err = sp_ecc_point_new(heap, pd, p);
+    }
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+                           DYNAMIC_TYPE_ECC);
+    if (t == NULL) {
+        err = MEMORY_E;
+    }
+#else
+    t = td;
+#endif
+
+    if (err == MP_OKAY) {
+        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+        y = 0;
+        for (j=0,x=31; j<8; j++,x+=32) {
+            y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+        }
+        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+        rt->infinity = !y;
+        for (i=30; i>=0; i--) {
+            y = 0;
+            for (j=0,x=i; j<8; j++,x+=32) {
+                y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+            }
+
+            sp_256_proj_point_dbl_10(rt, rt, t);
+            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+            p->infinity = !y;
+            sp_256_proj_point_add_qz1_10(rt, rt, p, t);
+        }
+
+        if (map != 0) {
+            sp_256_map_10(r, rt, t);
+        }
+        else {
+            XMEMCPY(r, rt, sizeof(sp_point));
+        }
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (t != NULL) {
+        XFREE(t, heap, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_ecc_point_free(p, 0, heap);
+    sp_ecc_point_free(rt, 0, heap);
+
+    return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+    #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_t {
+    sp_digit x[10] __attribute__((aligned(128)));
+    sp_digit y[10] __attribute__((aligned(128)));
+    sp_table_entry table[256] __attribute__((aligned(128)));
+    uint32_t cnt;
+    int set;
+} sp_cache_t;
+
+static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_last = -1;
+static THREAD_LS_T int sp_cache_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+    static volatile int initCacheMutex = 0;
+    static wolfSSL_Mutex sp_cache_lock;
+#endif
+
+static void sp_ecc_get_cache(const sp_point* g, sp_cache_t** cache)
+{
+    int i, j;
+    uint32_t least;
+
+    if (sp_cache_inited == 0) {
+        for (i=0; i<FP_ENTRIES; i++) {
+            sp_cache[i].set = 0;
+        }
+        sp_cache_inited = 1;
+    }
+
+    /* Compare point with those in cache. */
+    for (i=0; i<FP_ENTRIES; i++) {
+        if (!sp_cache[i].set)
+            continue;
+
+        if (sp_256_cmp_equal_10(g->x, sp_cache[i].x) &
+                           sp_256_cmp_equal_10(g->y, sp_cache[i].y)) {
+            sp_cache[i].cnt++;
+            break;
+        }
+    }
+
+    /* No match. */
+    if (i == FP_ENTRIES) {
+        /* Find empty entry. */
+        i = (sp_cache_last + 1) % FP_ENTRIES;
+        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
+            if (!sp_cache[i].set) {
+                break;
+            }
+        }
+
+        /* Evict least used. */
+        if (i == sp_cache_last) {
+            least = sp_cache[0].cnt;
+            for (j=1; j<FP_ENTRIES; j++) {
+                if (sp_cache[j].cnt < least) {
+                    i = j;
+                    least = sp_cache[i].cnt;
+                }
+            }
+        }
+
+        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
+        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
+        sp_cache[i].set = 1;
+        sp_cache[i].cnt = 1;
+    }
+
+    *cache = &sp_cache[i];
+    sp_cache_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * g     Point to multiply.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
+        int map, void* heap)
+{
+#ifndef FP_ECC
+    return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
+#else
+    sp_digit tmp[2 * 10 * 5];
+    sp_cache_t* cache;
+    int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+    if (initCacheMutex == 0) {
+         wc_InitMutex(&sp_cache_lock);
+         initCacheMutex = 1;
+    }
+    if (wc_LockMutex(&sp_cache_lock) != 0)
+       err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+    if (err == MP_OKAY) {
+        sp_ecc_get_cache(g, &cache);
+        if (cache->cnt == 2)
+            sp_256_gen_stripe_table_10(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+        wc_UnLockMutex(&sp_cache_lock);
+#endif /* HAVE_THREAD_LS */
+
+        if (cache->cnt < 2) {
+            err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
+        }
+        else {
+            err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k,
+                    map, heap);
+        }
+    }
+
+    return err;
+#endif
+}
+
+#endif
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry p256_table[256] = {
+    /* 0 */
+    { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+    /* 1 */
+    { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944,
+        0x3b732b7,0x15719e7,0x376a537,0x0062417 },
+      { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9,
+        0x288688d,0x176174b,0x3182588,0x0215c7f } },
+    /* 2 */
+    { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2,
+        0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 },
+      { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933,
+        0x1961102,0x223cdff,0x37e9eb2,0x0218fae } },
+    /* 3 */
+    { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02,
+        0x28d9544,0x20280f9,0x055b5ff,0x00001d8 },
+      { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981,
+        0x247d398,0x0fb8383,0x3613437,0x020c21d } },
+    /* 4 */
+    { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22,
+        0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a },
+      { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4,
+        0x111b370,0x03dec12,0x1168d6f,0x03d923e } },
+    /* 5 */
+    { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18,
+        0x0427617,0x00056c7,0x285133d,0x016af80 },
+      { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d,
+        0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } },
+    /* 6 */
+    { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171,
+        0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae },
+      { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948,
+        0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } },
+    /* 7 */
+    { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747,
+        0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 },
+      { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca,
+        0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } },
+    /* 8 */
+    { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69,
+        0x093334d,0x120c701,0x39206d5,0x021627e },
+      { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194,
+        0x1045071,0x0713f32,0x16d0254,0x03aec1a } },
+    /* 9 */
+    { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb,
+        0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 },
+      { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c,
+        0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } },
+    /* 10 */
+    { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a,
+        0x047862b,0x1358c9e,0x35905e5,0x00cde92 },
+      { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba,
+        0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } },
+    /* 11 */
+    { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99,
+        0x362ff49,0x288cbc1,0x24252f4,0x0308f68 },
+      { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8,
+        0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } },
+    /* 12 */
+    { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b,
+        0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 },
+      { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b,
+        0x3901273,0x03dfe78,0x3447b4e,0x039d907 } },
+    /* 13 */
+    { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616,
+        0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 },
+      { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5,
+        0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } },
+    /* 14 */
+    { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824,
+        0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 },
+      { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898,
+        0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } },
+    /* 15 */
+    { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8,
+        0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac },
+      { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251,
+        0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } },
+    /* 16 */
+    { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18,
+        0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 },
+      { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f,
+        0x38441e0,0x3bef843,0x2124621,0x03e847f } },
+    /* 17 */
+    { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3,
+        0x103d678,0x2fb72db,0x04c1f13,0x0161bac },
+      { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be,
+        0x0fd6c05,0x13c449e,0x0087086,0x006756b } },
+    /* 18 */
+    { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348,
+        0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 },
+      { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5,
+        0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } },
+    /* 19 */
+    { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba,
+        0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 },
+      { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d,
+        0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } },
+    /* 20 */
+    { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4,
+        0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 },
+      { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09,
+        0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } },
+    /* 21 */
+    { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb,
+        0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 },
+      { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c,
+        0x01dfe0a,0x312341f,0x26d356e,0x0091295 } },
+    /* 22 */
+    { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee,
+        0x26ac1b8,0x3bda498,0x0873581,0x0117963 },
+      { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5,
+        0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } },
+    /* 23 */
+    { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a,
+        0x2b35761,0x1bb1d20,0x097682c,0x00737d7 },
+      { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86,
+        0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } },
+    /* 24 */
+    { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1,
+        0x3161727,0x297cfdb,0x2113b83,0x0011b97 },
+      { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f,
+        0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } },
+    /* 25 */
+    { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad,
+        0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 },
+      { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347,
+        0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } },
+    /* 26 */
+    { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20,
+        0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b },
+      { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b,
+        0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } },
+    /* 27 */
+    { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012,
+        0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f },
+      { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30,
+        0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } },
+    /* 28 */
+    { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8,
+        0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 },
+      { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829,
+        0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } },
+    /* 29 */
+    { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e,
+        0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 },
+      { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b,
+        0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } },
+    /* 30 */
+    { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b,
+        0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 },
+      { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83,
+        0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } },
+    /* 31 */
+    { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6,
+        0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe },
+      { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363,
+        0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } },
+    /* 32 */
+    { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d,
+        0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 },
+      { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c,
+        0x057e74d,0x05d14ac,0x17a9273,0x035215c } },
+    /* 33 */
+    { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7,
+        0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 },
+      { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3,
+        0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } },
+    /* 34 */
+    { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3,
+        0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 },
+      { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a,
+        0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } },
+    /* 35 */
+    { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e,
+        0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 },
+      { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4,
+        0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } },
+    /* 36 */
+    { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8,
+        0x1ce241e,0x149bc99,0x2b01497,0x00afc35 },
+      { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417,
+        0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } },
+    /* 37 */
+    { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d,
+        0x2030504,0x378f7a1,0x169c65e,0x00b0b76 },
+      { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521,
+        0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } },
+    /* 38 */
+    { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1,
+        0x00a050f,0x1eaa14b,0x3335470,0x02abd16 },
+      { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511,
+        0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } },
+    /* 39 */
+    { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d,
+        0x1567722,0x0bfc906,0x0bada9e,0x03c3402 },
+      { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6,
+        0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } },
+    /* 40 */
+    { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7,
+        0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 },
+      { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751,
+        0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } },
+    /* 41 */
+    { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1,
+        0x13c64b4,0x2f89226,0x25896af,0x00a4bfd },
+      { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba,
+        0x3b397fd,0x1c9d825,0x090311b,0x0191383 } },
+    /* 42 */
+    { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c,
+        0x3dce877,0x269de4e,0x393cab7,0x03c96b9 },
+      { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517,
+        0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } },
+    /* 43 */
+    { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce,
+        0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b },
+      { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d,
+        0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } },
+    /* 44 */
+    { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac,
+        0x22b74b1,0x230137e,0x1062e36,0x021c652 },
+      { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414,
+        0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } },
+    /* 45 */
+    { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07,
+        0x052a7b4,0x2422261,0x3adee38,0x039b529 },
+      { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6,
+        0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } },
+    /* 46 */
+    { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659,
+        0x10bf410,0x034afec,0x3d71c83,0x0076971 },
+      { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae,
+        0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } },
+    /* 47 */
+    { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d,
+        0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 },
+      { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af,
+        0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } },
+    /* 48 */
+    { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d,
+        0x02c1175,0x3c11b40,0x0d86962,0x001305f },
+      { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed,
+        0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } },
+    /* 49 */
+    { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48,
+        0x0405538,0x0710749,0x2005213,0x038c7e5 },
+      { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5,
+        0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } },
+    /* 50 */
+    { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6,
+        0x1710fad,0x36bb924,0x1627e79,0x008e637 },
+      { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3,
+        0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } },
+    /* 51 */
+    { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3,
+        0x13d2391,0x0a37be8,0x0560e3c,0x019828b },
+      { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3,
+        0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } },
+    /* 52 */
+    { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08,
+        0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a },
+      { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f,
+        0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } },
+    /* 53 */
+    { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa,
+        0x2999de5,0x11013bd,0x02370c2,0x00e2234 },
+      { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db,
+        0x27edc86,0x08cd860,0x2471810,0x029798b } },
+    /* 54 */
+    { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c,
+        0x23edf79,0x16b7033,0x0e6466a,0x00b11da },
+      { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21,
+        0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } },
+    /* 55 */
+    { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3,
+        0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc },
+      { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c,
+        0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } },
+    /* 56 */
+    { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7,
+        0x0df6825,0x2d4cc40,0x301875f,0x012f8da },
+      { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8,
+        0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } },
+    /* 57 */
+    { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95,
+        0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 },
+      { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d,
+        0x3702760,0x3f06257,0x03a5eb8,0x011c29a } },
+    /* 58 */
+    { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20,
+        0x2441ee0,0x31bbf36,0x290c63d,0x0059041 },
+      { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632,
+        0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } },
+    /* 59 */
+    { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5,
+        0x2d5bb18,0x199ac4b,0x1e30771,0x020591a },
+      { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144,
+        0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } },
+    /* 60 */
+    { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44,
+        0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 },
+      { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435,
+        0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } },
+    /* 61 */
+    { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45,
+        0x3669531,0x296f42e,0x35a4c86,0x01ca049 },
+      { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec,
+        0x30f11a7,0x141658a,0x27ece14,0x00b018b } },
+    /* 62 */
+    { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811,
+        0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d },
+      { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047,
+        0x2caf383,0x0aaf664,0x113554d,0x031c735 } },
+    /* 63 */
+    { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7,
+        0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 },
+      { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14,
+        0x06a2228,0x16370be,0x3a72129,0x02e7b2c } },
+    /* 64 */
+    { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0,
+        0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf },
+      { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f,
+        0x097273a,0x2b70718,0x00e5067,0x03025d1 } },
+    /* 65 */
+    { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b,
+        0x3144591,0x07cc080,0x2d5915f,0x03c6bcc },
+      { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f,
+        0x13a4184,0x0722c18,0x130e2d4,0x008f43c } },
+    /* 66 */
+    { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654,
+        0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 },
+      { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d,
+        0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } },
+    /* 67 */
+    { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5,
+        0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 },
+      { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2,
+        0x29864f6,0x1288073,0x254f6f7,0x00635b6 } },
+    /* 68 */
+    { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff,
+        0x1409262,0x085a90c,0x0d97990,0x0142eed },
+      { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d,
+        0x28099b0,0x1270d06,0x11801fe,0x00ac400 } },
+    /* 69 */
+    { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e,
+        0x04f3aad,0x2b831c5,0x19983fb,0x0375562 },
+      { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a,
+        0x3a9eaf4,0x1810669,0x151149d,0x039b931 } },
+    /* 70 */
+    { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839,
+        0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 },
+      { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de,
+        0x3d51f53,0x245df01,0x2414982,0x0388bd0 } },
+    /* 71 */
+    { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3,
+        0x072c1fb,0x1232725,0x33d52dc,0x03dc24d },
+      { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f,
+        0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } },
+    /* 72 */
+    { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276,
+        0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e },
+      { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85,
+        0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } },
+    /* 73 */
+    { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20,
+        0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf },
+      { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2,
+        0x39cb400,0x191e337,0x0a5ce9f,0x021529a } },
+    /* 74 */
+    { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353,
+        0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a },
+      { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf,
+        0x1d3de8a,0x3bea423,0x11235e4,0x039260b } },
+    /* 75 */
+    { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650,
+        0x345afa1,0x01267ec,0x3f616b2,0x02011ad },
+      { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31,
+        0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } },
+    /* 76 */
+    { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e,
+        0x2035653,0x384c74f,0x0bafab5,0x0025ec0 },
+      { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661,
+        0x33ba11d,0x3327980,0x07bafdb,0x03e571d } },
+    /* 77 */
+    { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1,
+        0x3705b1d,0x360deba,0x01e566e,0x00d4498 },
+      { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879,
+        0x17eee27,0x2adad1d,0x1236068,0x003be5c } },
+    /* 78 */
+    { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4,
+        0x163f744,0x2f25522,0x1333b4f,0x03f05b6 },
+      { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d,
+        0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } },
+    /* 79 */
+    { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241,
+        0x18bef7c,0x08c7762,0x063f59c,0x01015ec },
+      { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6,
+        0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } },
+    /* 80 */
+    { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759,
+        0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c },
+      { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f,
+        0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } },
+    /* 81 */
+    { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3,
+        0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 },
+      { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3,
+        0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } },
+    /* 82 */
+    { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606,
+        0x21b1761,0x2147ee0,0x21fc433,0x015c84d },
+      { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d,
+        0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } },
+    /* 83 */
+    { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf,
+        0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 },
+      { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a,
+        0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } },
+    /* 84 */
+    { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b,
+        0x018403d,0x3a40279,0x1cb91ec,0x030427e },
+      { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126,
+        0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } },
+    /* 85 */
+    { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca,
+        0x1ca665b,0x133051a,0x1525f1a,0x00a5647 },
+      { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862,
+        0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } },
+    /* 86 */
+    { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614,
+        0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 },
+      { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e,
+        0x23e0227,0x3052b0a,0x001add3,0x023ba18 } },
+    /* 87 */
+    { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9,
+        0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 },
+      { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451,
+        0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } },
+    /* 88 */
+    { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316,
+        0x3b27cb5,0x29bc976,0x35d4073,0x024772a },
+      { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd,
+        0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } },
+    /* 89 */
+    { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76,
+        0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 },
+      { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7,
+        0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } },
+    /* 90 */
+    { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a,
+        0x231741a,0x3cf2784,0x0889a0d,0x02b036d },
+      { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed,
+        0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } },
+    /* 91 */
+    { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb,
+        0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 },
+      { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88,
+        0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } },
+    /* 92 */
+    { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2,
+        0x086196d,0x299e46b,0x0802cf6,0x03c6f32 },
+      { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9,
+        0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } },
+    /* 93 */
+    { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f,
+        0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 },
+      { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea,
+        0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } },
+    /* 94 */
+    { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418,
+        0x34ae070,0x0b06686,0x310616b,0x03b7b89 },
+      { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3,
+        0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } },
+    /* 95 */
+    { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560,
+        0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b },
+      { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84,
+        0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } },
+    /* 96 */
+    { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77,
+        0x1581a0f,0x1f99276,0x10ba16d,0x026af88 },
+      { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296,
+        0x1539871,0x112c31f,0x25787f3,0x01e2070 } },
+    /* 97 */
+    { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a,
+        0x3465a2d,0x225023e,0x319a30e,0x00579b8 },
+      { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434,
+        0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } },
+    /* 98 */
+    { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35,
+        0x0158cab,0x195ddac,0x1766fe9,0x035cf42 },
+      { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948,
+        0x0bb8595,0x2942d77,0x3748f15,0x0249428 } },
+    /* 99 */
+    { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9,
+        0x363289e,0x2302fc7,0x082c1cc,0x01dd050 },
+      { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50,
+        0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } },
+    /* 100 */
+    { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10,
+        0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a },
+      { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692,
+        0x066e078,0x1954974,0x2ff3c6e,0x00def28 } },
+    /* 101 */
+    { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef,
+        0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f },
+      { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593,
+        0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } },
+    /* 102 */
+    { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9,
+        0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 },
+      { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3,
+        0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } },
+    /* 103 */
+    { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355,
+        0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 },
+      { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3,
+        0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } },
+    /* 104 */
+    { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b,
+        0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb },
+      { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224,
+        0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } },
+    /* 105 */
+    { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027,
+        0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 },
+      { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f,
+        0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } },
+    /* 106 */
+    { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e,
+        0x256ec0d,0x3898c69,0x3411969,0x02f63c5 },
+      { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6,
+        0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } },
+    /* 107 */
+    { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c,
+        0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 },
+      { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287,
+        0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } },
+    /* 108 */
+    { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e,
+        0x1b49634,0x35d508a,0x39dc269,0x0075105 },
+      { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6,
+        0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } },
+    /* 109 */
+    { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09,
+        0x00f6f2f,0x1c63b3d,0x2310362,0x019109e },
+      { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b,
+        0x1b2c6df,0x035b480,0x3496ae9,0x012766d } },
+    /* 110 */
+    { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290,
+        0x02e2a02,0x151140b,0x01b3f60,0x0240df6 },
+      { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4,
+        0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } },
+    /* 111 */
+    { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4,
+        0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 },
+      { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848,
+        0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } },
+    /* 112 */
+    { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249,
+        0x07861cf,0x023fd05,0x1b0fdb8,0x031712f },
+      { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a,
+        0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } },
+    /* 113 */
+    { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378,
+        0x3ea3c40,0x2e90beb,0x19de503,0x003d5af },
+      { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00,
+        0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } },
+    /* 114 */
+    { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8,
+        0x17208dd,0x0be790a,0x122a07f,0x014dd95 },
+      { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5,
+        0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } },
+    /* 115 */
+    { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e,
+        0x2c000ea,0x099d547,0x2f17a1a,0x01df520 },
+      { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791,
+        0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } },
+    /* 116 */
+    { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4,
+        0x38ce9e6,0x0251986,0x172fbea,0x0337c11 },
+      { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03,
+        0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } },
+    /* 117 */
+    { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61,
+        0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 },
+      { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d,
+        0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } },
+    /* 118 */
+    { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35,
+        0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 },
+      { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0,
+        0x23c137f,0x1331475,0x092c067,0x0013b40 } },
+    /* 119 */
+    { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05,
+        0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 },
+      { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d,
+        0x12c8a15,0x031063c,0x1889ed2,0x002133e } },
+    /* 120 */
+    { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e,
+        0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 },
+      { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082,
+        0x112089d,0x107c753,0x24202d1,0x023853a } },
+    /* 121 */
+    { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8,
+        0x19c194c,0x3456323,0x2372aa4,0x0165f86 },
+      { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1,
+        0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } },
+    /* 122 */
+    { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78,
+        0x363ff14,0x01f928c,0x17e309c,0x02f79ff },
+      { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6,
+        0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } },
+    /* 123 */
+    { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5,
+        0x220fd0d,0x04defe0,0x24658ec,0x035aa8b },
+      { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb,
+        0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } },
+    /* 124 */
+    { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a,
+        0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 },
+      { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f,
+        0x2cdf937,0x1a96574,0x0425220,0x0221a99 } },
+    /* 125 */
+    { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867,
+        0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d },
+      { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258,
+        0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } },
+    /* 126 */
+    { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6,
+        0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b },
+      { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62,
+        0x335adf3,0x27220db,0x2f81642,0x0173ffe } },
+    /* 127 */
+    { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78,
+        0x0a908f6,0x265300e,0x3237dc1,0x01b969a },
+      { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de,
+        0x11c0c67,0x2af3396,0x38d242d,0x0120688 } },
+    /* 128 */
+    { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4,
+        0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 },
+      { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2,
+        0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } },
+    /* 129 */
+    { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007,
+        0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 },
+      { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04,
+        0x01cebaa,0x0be1595,0x175cc12,0x033a39a } },
+    /* 130 */
+    { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd,
+        0x044f1d6,0x2d2a038,0x365250b,0x0023f78 },
+      { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520,
+        0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } },
+    /* 131 */
+    { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c,
+        0x334e9d1,0x00ab953,0x12e9077,0x03196fa },
+      { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef,
+        0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } },
+    /* 132 */
+    { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa,
+        0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 },
+      { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0,
+        0x170e261,0x180a27b,0x2fd58ec,0x014e22b } },
+    /* 133 */
+    { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98,
+        0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 },
+      { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330,
+        0x29f51f8,0x0338838,0x24060f0,0x029a62e } },
+    /* 134 */
+    { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf,
+        0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb },
+      { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677,
+        0x160d1bd,0x141d5af,0x2965851,0x034625a } },
+    /* 135 */
+    { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb,
+        0x016b73a,0x368e462,0x20b7981,0x0099ec3 },
+      { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6,
+        0x10faf03,0x24e4d18,0x07aa111,0x02d538a } },
+    /* 136 */
+    { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd,
+        0x0b31ab1,0x3539814,0x28b5f87,0x0212aec },
+      { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884,
+        0x355c1b6,0x15901d7,0x3671765,0x03950db } },
+    /* 137 */
+    { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8,
+        0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d },
+      { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8,
+        0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } },
+    /* 138 */
+    { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58,
+        0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f },
+      { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f,
+        0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } },
+    /* 139 */
+    { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de,
+        0x3a57702,0x1677348,0x2123aad,0x010d8f8 },
+      { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5,
+        0x3086045,0x26313e6,0x15cd8bb,0x0210384 } },
+    /* 140 */
+    { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777,
+        0x12a2721,0x35b538a,0x2bd30de,0x017835a },
+      { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c,
+        0x27bef91,0x289689a,0x0f42945,0x01f7a92 } },
+    /* 141 */
+    { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2,
+        0x136c540,0x05b473f,0x2beebfd,0x02af0a8 },
+      { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6,
+        0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } },
+    /* 142 */
+    { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770,
+        0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 },
+      { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef,
+        0x230c724,0x1919146,0x10a465e,0x02084a8 } },
+    /* 143 */
+    { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2,
+        0x1dff677,0x2c59334,0x371599c,0x02a9f2a },
+      { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692,
+        0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } },
+    /* 144 */
+    { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738,
+        0x3e53209,0x1549afe,0x300b305,0x038d811 },
+      { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b,
+        0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } },
+    /* 145 */
+    { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd,
+        0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 },
+      { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4,
+        0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } },
+    /* 146 */
+    { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1,
+        0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea },
+      { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3,
+        0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } },
+    /* 147 */
+    { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc,
+        0x0431737,0x04ba4b7,0x2028470,0x012e469 },
+      { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3,
+        0x34360ac,0x072ad23,0x0537232,0x01254d3 } },
+    /* 148 */
+    { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8,
+        0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b },
+      { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac,
+        0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } },
+    /* 149 */
+    { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828,
+        0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 },
+      { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b,
+        0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } },
+    /* 150 */
+    { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c,
+        0x3a0311b,0x331a789,0x0b9729e,0x0201ebf },
+      { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e,
+        0x3afa594,0x3363217,0x0d16568,0x01d46b7 } },
+    /* 151 */
+    { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b,
+        0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 },
+      { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c,
+        0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } },
+    /* 152 */
+    { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4,
+        0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 },
+      { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a,
+        0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } },
+    /* 153 */
+    { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7,
+        0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f },
+      { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d,
+        0x3dbf03c,0x167d857,0x02e0b44,0x013afab } },
+    /* 154 */
+    { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176,
+        0x087633f,0x2bff322,0x07b2c1b,0x037e662 },
+      { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b,
+        0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } },
+    /* 155 */
+    { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f,
+        0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 },
+      { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408,
+        0x36f1be0,0x0d065b0,0x012d046,0x0025623 } },
+    /* 156 */
+    { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca,
+        0x1cf2808,0x1922e55,0x1a9db9f,0x020721e },
+      { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e,
+        0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } },
+    /* 157 */
+    { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5,
+        0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 },
+      { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29,
+        0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } },
+    /* 158 */
+    { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4,
+        0x252dd64,0x095b8b6,0x0872b74,0x022f1da },
+      { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee,
+        0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } },
+    /* 159 */
+    { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230,
+        0x02b4eae,0x2713668,0x0f71b06,0x0132d18 },
+      { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859,
+        0x075f66b,0x1488f85,0x10895ef,0x014b035 } },
+    /* 160 */
+    { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c,
+        0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 },
+      { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf,
+        0x20b7651,0x017d077,0x0c7e397,0x00e0365 } },
+    /* 161 */
+    { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21,
+        0x3a29776,0x0debdac,0x171de26,0x00b38f8 },
+      { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b,
+        0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } },
+    /* 162 */
+    { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2,
+        0x17626e7,0x275502f,0x14742c6,0x03285dd },
+      { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a,
+        0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } },
+    /* 163 */
+    { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0,
+        0x08cfa73,0x12666aa,0x3170a1f,0x021c829 },
+      { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0,
+        0x28d00ab,0x224f23a,0x329c769,0x025c67b } },
+    /* 164 */
+    { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409,
+        0x3999d06,0x013075f,0x176e157,0x02941ca },
+      { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97,
+        0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } },
+    /* 165 */
+    { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b,
+        0x272e897,0x20b91ad,0x2aa202c,0x02a2201 },
+      { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5,
+        0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } },
+    /* 166 */
+    { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0,
+        0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb },
+      { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2,
+        0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } },
+    /* 167 */
+    { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61,
+        0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f },
+      { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73,
+        0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } },
+    /* 168 */
+    { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61,
+        0x1b02218,0x2b44e14,0x3eadefa,0x029c88a },
+      { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e,
+        0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } },
+    /* 169 */
+    { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f,
+        0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 },
+      { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736,
+        0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } },
+    /* 170 */
+    { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5,
+        0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 },
+      { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f,
+        0x25475dc,0x2ef4151,0x3c46860,0x0238934 } },
+    /* 171 */
+    { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1,
+        0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 },
+      { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8,
+        0x1d14b44,0x0d74716,0x049f57d,0x030024b } },
+    /* 172 */
+    { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d,
+        0x2d30d19,0x2513718,0x2347286,0x01d7901 },
+      { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d,
+        0x051a162,0x2161a47,0x258c784,0x016df2d } },
+    /* 173 */
+    { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce,
+        0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f },
+      { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c,
+        0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } },
+    /* 174 */
+    { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff,
+        0x15e9a36,0x2121129,0x0199ef2,0x017088a },
+      { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf,
+        0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } },
+    /* 175 */
+    { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69,
+        0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 },
+      { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa,
+        0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } },
+    /* 176 */
+    { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01,
+        0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 },
+      { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81,
+        0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } },
+    /* 177 */
+    { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982,
+        0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda },
+      { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca,
+        0x336abf6,0x00697fc,0x105ce76,0x0253742 } },
+    /* 178 */
+    { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4,
+        0x004df9c,0x0aba409,0x067d25c,0x02481de },
+      { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63,
+        0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } },
+    /* 179 */
+    { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43,
+        0x37db3fe,0x292c62b,0x362dbbf,0x006e52a },
+      { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0,
+        0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } },
+    /* 180 */
+    { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7,
+        0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb },
+      { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c,
+        0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } },
+    /* 181 */
+    { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd,
+        0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 },
+      { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac,
+        0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } },
+    /* 182 */
+    { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4,
+        0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e },
+      { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52,
+        0x0de4af7,0x3319126,0x15e6412,0x00e65ff } },
+    /* 183 */
+    { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578,
+        0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 },
+      { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1,
+        0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } },
+    /* 184 */
+    { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3,
+        0x18429ed,0x025fa01,0x11d6e93,0x03b016b },
+      { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39,
+        0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } },
+    /* 185 */
+    { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5,
+        0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 },
+      { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e,
+        0x147f346,0x01947e2,0x3017228,0x0365942 } },
+    /* 186 */
+    { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f,
+        0x3532081,0x2de8a98,0x19a804e,0x005359a },
+      { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb,
+        0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } },
+    /* 187 */
+    { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2,
+        0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 },
+      { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0,
+        0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } },
+    /* 188 */
+    { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc,
+        0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 },
+      { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca,
+        0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } },
+    /* 189 */
+    { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7,
+        0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 },
+      { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6,
+        0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } },
+    /* 190 */
+    { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba,
+        0x3c76989,0x0cf2d24,0x032f67b,0x01e005f },
+      { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693,
+        0x37bf552,0x07013e2,0x054de5c,0x014f341 } },
+    /* 191 */
+    { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea,
+        0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 },
+      { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c,
+        0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } },
+    /* 192 */
+    { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358,
+        0x35d8879,0x2f4f55a,0x214187f,0x0008936 },
+      { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b,
+        0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } },
+    /* 193 */
+    { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40,
+        0x097c7a9,0x0590d7d,0x2022d33,0x018dbac },
+      { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407,
+        0x3658a29,0x22eb3d0,0x2b63921,0x022113b } },
+    /* 194 */
+    { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6,
+        0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 },
+      { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92,
+        0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } },
+    /* 195 */
+    { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245,
+        0x24f2e71,0x322cb27,0x17675e7,0x02e643f },
+      { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4,
+        0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } },
+    /* 196 */
+    { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2,
+        0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 },
+      { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829,
+        0x0825275,0x006e85f,0x35e94b0,0x016af58 } },
+    /* 197 */
+    { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837,
+        0x3cd5ea4,0x2d84292,0x321846a,0x016656f },
+      { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9,
+        0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } },
+    /* 198 */
+    { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6,
+        0x32124b7,0x295314f,0x2fb8082,0x007c858 },
+      { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977,
+        0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } },
+    /* 199 */
+    { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315,
+        0x1cfe37b,0x36fe715,0x343926e,0x02c6603 },
+      { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18,
+        0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } },
+    /* 200 */
+    { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c,
+        0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc },
+      { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4,
+        0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } },
+    /* 201 */
+    { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2,
+        0x1a65e01,0x379300d,0x35920b9,0x012c8fb },
+      { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba,
+        0x127863a,0x26939cd,0x134ddf4,0x01375c5 } },
+    /* 202 */
+    { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8,
+        0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c },
+      { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260,
+        0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } },
+    /* 203 */
+    { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11,
+        0x2142408,0x3694366,0x25136ab,0x03b5f1d },
+      { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b,
+        0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } },
+    /* 204 */
+    { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e,
+        0x2479dea,0x3602d24,0x17e87e0,0x0060069 },
+      { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef,
+        0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } },
+    /* 205 */
+    { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c,
+        0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 },
+      { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a,
+        0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } },
+    /* 206 */
+    { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e,
+        0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 },
+      { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010,
+        0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } },
+    /* 207 */
+    { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971,
+        0x1043014,0x2ebb2be,0x1c1097d,0x027d667 },
+      { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8,
+        0x1605a41,0x12de109,0x0e08a29,0x01f554a } },
+    /* 208 */
+    { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20,
+        0x27d79b0,0x05cc3ef,0x201303f,0x00a333a },
+      { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e,
+        0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } },
+    /* 209 */
+    { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192,
+        0x13a1719,0x165af51,0x09db7a9,0x0277be0 },
+      { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862,
+        0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } },
+    /* 210 */
+    { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832,
+        0x309874c,0x2621d28,0x2dd1b48,0x0392806 },
+      { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1,
+        0x295c717,0x330c29c,0x179ce10,0x0119b5f } },
+    /* 211 */
+    { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678,
+        0x24fd288,0x2b46a43,0x122451e,0x0319ca9 },
+      { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9,
+        0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } },
+    /* 212 */
+    { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d,
+        0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e },
+      { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c,
+        0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } },
+    /* 213 */
+    { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8,
+        0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 },
+      { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2,
+        0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } },
+    /* 214 */
+    { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c,
+        0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d },
+      { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8,
+        0x3477600,0x3201523,0x32ecafc,0x03d3506 } },
+    /* 215 */
+    { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba,
+        0x0e29741,0x03c41ab,0x0eae016,0x0060209 },
+      { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1,
+        0x380e345,0x1b49608,0x23983cf,0x019f97d } },
+    /* 216 */
+    { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2,
+        0x2f9276b,0x07f1951,0x09a04c1,0x027fbde },
+      { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb,
+        0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } },
+    /* 217 */
+    { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697,
+        0x0e21715,0x37c431e,0x2629f8c,0x01249c3 },
+      { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e,
+        0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } },
+    /* 218 */
+    { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949,
+        0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e },
+      { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b,
+        0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } },
+    /* 219 */
+    { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7,
+        0x2b83913,0x12c6b81,0x18d623f,0x01d8301 },
+      { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a,
+        0x204110f,0x0102538,0x02d8f19,0x01c5066 } },
+    /* 220 */
+    { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757,
+        0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 },
+      { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26,
+        0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } },
+    /* 221 */
+    { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989,
+        0x1c807dc,0x3c149a4,0x1141543,0x02906bb },
+      { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd,
+        0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } },
+    /* 222 */
+    { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f,
+        0x112bb7b,0x3e3086e,0x210ae9b,0x027271b },
+      { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c,
+        0x344b23a,0x186da27,0x19640fa,0x0159af5 } },
+    /* 223 */
+    { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91,
+        0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c },
+      { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95,
+        0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } },
+    /* 224 */
+    { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3,
+        0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b },
+      { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f,
+        0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } },
+    /* 225 */
+    { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757,
+        0x13e7374,0x3b47855,0x0b86e56,0x02f354c },
+      { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed,
+        0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } },
+    /* 226 */
+    { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675,
+        0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad },
+      { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0,
+        0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } },
+    /* 227 */
+    { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2,
+        0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 },
+      { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7,
+        0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } },
+    /* 228 */
+    { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362,
+        0x21d60f7,0x193d93d,0x1c04754,0x0346b2c },
+      { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb,
+        0x38a8133,0x09b0937,0x39d4056,0x01f7341 } },
+    /* 229 */
+    { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456,
+        0x0f79718,0x066e346,0x189377d,0x002fd4d },
+      { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8,
+        0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } },
+    /* 230 */
+    { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf,
+        0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 },
+      { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e,
+        0x067082a,0x2ec05fd,0x368b303,0x03cad4b } },
+    /* 231 */
+    { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66,
+        0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa },
+      { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14,
+        0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } },
+    /* 232 */
+    { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901,
+        0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 },
+      { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2,
+        0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } },
+    /* 233 */
+    { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab,
+        0x0505db0,0x08f6454,0x05c6285,0x014e880 },
+      { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b,
+        0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } },
+    /* 234 */
+    { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f,
+        0x0005ae8,0x286d851,0x07f39c9,0x000070b },
+      { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429,
+        0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } },
+    /* 235 */
+    { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef,
+        0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 },
+      { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0,
+        0x1591a60,0x0768d45,0x257873e,0x00f36e0 } },
+    /* 236 */
+    { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437,
+        0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 },
+      { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0,
+        0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } },
+    /* 237 */
+    { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f,
+        0x380c46c,0x3358810,0x27df6c5,0x01ca466 },
+      { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615,
+        0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } },
+    /* 238 */
+    { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2,
+        0x226bea5,0x0beaff3,0x371e24c,0x0138294 },
+      { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c,
+        0x3370ae4,0x013bae3,0x018d566,0x03d7fde } },
+    /* 239 */
+    { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405,
+        0x16bd0a2,0x1799082,0x0e9533f,0x039077c },
+      { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1,
+        0x26e1927,0x0557c21,0x16eac4f,0x023b75f } },
+    /* 240 */
+    { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b,
+        0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 },
+      { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6,
+        0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } },
+    /* 241 */
+    { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2,
+        0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 },
+      { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca,
+        0x163605d,0x2b85807,0x17c1c54,0x03cc725 } },
+    /* 242 */
+    { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052,
+        0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 },
+      { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b,
+        0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } },
+    /* 243 */
+    { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3,
+        0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f },
+      { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc,
+        0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } },
+    /* 244 */
+    { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6,
+        0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea },
+      { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1,
+        0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } },
+    /* 245 */
+    { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756,
+        0x050418d,0x3c73db6,0x1bb0c30,0x01673eb },
+      { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219,
+        0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } },
+    /* 246 */
+    { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016,
+        0x2970f1b,0x323c1c0,0x2a79026,0x0186231 },
+      { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d,
+        0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } },
+    /* 247 */
+    { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440,
+        0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 },
+      { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af,
+        0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } },
+    /* 248 */
+    { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a,
+        0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc },
+      { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd,
+        0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } },
+    /* 249 */
+    { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4,
+        0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c },
+      { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c,
+        0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } },
+    /* 250 */
+    { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af,
+        0x10cd913,0x28daa6f,0x3fcb732,0x000a04a },
+      { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586,
+        0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } },
+    /* 251 */
+    { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab,
+        0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a },
+      { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a,
+        0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } },
+    /* 252 */
+    { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07,
+        0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a },
+      { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383,
+        0x313ed56,0x134c9cc,0x168d5bb,0x033310a } },
+    /* 253 */
+    { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2,
+        0x0076406,0x1c73f70,0x342aa48,0x03851fa },
+      { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a,
+        0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } },
+    /* 254 */
+    { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf,
+        0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 },
+      { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec,
+        0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } },
+    /* 255 */
+    { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b,
+        0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c },
+      { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627,
+        0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k,
+        int map, void* heap)
+{
+    return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table,
+                                      k, map, heap);
+}
+
+#endif
+
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a,
+    sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int64_t tb = b;
+    int64_t t = 0;
+    int i;
+
+    for (i = 0; i < 10; i++) {
+        t += tb * a[i];
+        r[i] = t & 0x3ffffff;
+        t >>= 26;
+    }
+    r[10] = (sp_digit)t;
+#else
+    int64_t tb = b;
+    int64_t t[10];
+
+    t[ 0] = Q6_P_mpy_RR(tb, a[0]);
+    t[ 1] = Q6_P_mpy_RR(tb, a[1]);
+    t[ 2] = Q6_P_mpy_RR(tb, a[2]);
+    t[ 3] = Q6_P_mpy_RR(tb, a[3]);
+    t[ 4] = Q6_P_mpy_RR(tb, a[4]);
+    t[ 5] = Q6_P_mpy_RR(tb, a[5]);
+    t[ 6] = Q6_P_mpy_RR(tb, a[6]);
+    t[ 7] = Q6_P_mpy_RR(tb, a[7]);
+    t[ 8] = Q6_P_mpy_RR(tb, a[8]);
+    t[ 9] = Q6_P_mpy_RR(tb, a[9]);
+    r[ 0] =                           Q6_R_and_RR(t[ 0], 0x3ffffff);
+    r[ 1] = (sp_digit)(t[ 0] >> 26) + Q6_R_and_RR(t[ 1], 0x3ffffff);
+    r[ 2] = (sp_digit)(t[ 1] >> 26) + Q6_R_and_RR(t[ 2], 0x3ffffff);
+    r[ 3] = (sp_digit)(t[ 2] >> 26) + Q6_R_and_RR(t[ 3], 0x3ffffff);
+    r[ 4] = (sp_digit)(t[ 3] >> 26) + Q6_R_and_RR(t[ 4], 0x3ffffff);
+    r[ 5] = (sp_digit)(t[ 4] >> 26) + Q6_R_and_RR(t[ 5], 0x3ffffff);
+    r[ 6] = (sp_digit)(t[ 5] >> 26) + Q6_R_and_RR(t[ 6], 0x3ffffff);
+    r[ 7] = (sp_digit)(t[ 6] >> 26) + Q6_R_and_RR(t[ 7], 0x3ffffff);
+    r[ 8] = (sp_digit)(t[ 7] >> 26) + Q6_R_and_RR(t[ 8], 0x3ffffff);
+    r[ 9] = (sp_digit)(t[ 8] >> 26) + Q6_R_and_RR(t[ 9], 0x3ffffff);
+    r[10] = (sp_digit)(t[ 9] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0,
+    sp_digit dv)
+{
+    sp_digit d, r, t, dv;
+    int64_t t0, t1;
+
+    /* dv has 14 bits. */
+    dv = (div >> 12) + 1;
+    /* All 26 bits from d1 and top 5 bits from d0. */
+    d = (d1 << 5) | (d0 >> 21);
+    r = d / dv;
+    d -= r * dv;
+    /* Up to 17 bits in r */
+    /* Next 9 bits from d0. */
+    d <<= 9;
+    r <<= 9;
+    d |= (d0 >> 12) & ((1 << 9) - 1);
+    t = d / dv;
+    d -= t * dv;
+    r += t;
+    /* Up to 26 bits in r */
+
+    /* Handle rounding error with dv - top part */
+    t0 = ((int64_t)d1 << 26) + d0;
+    t1 = (int64_t)r * dv;
+    t1 = t0 - t1;
+    t = (sp_digit)(t1 >> 12) / dv;
+    r += t;
+
+    /* Handle rounding error with dv - bottom 32 bits */
+    t1 = (sp_digit)t0 - (r * dv);
+    t = (sp_digit)t1 / dv;
+    r += t;
+
+    return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a  Number to be divided.
+ * d  Number to divide with.
+ * m  Multiplier result.
+ * r  Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m,
+        sp_digit* r)
+{
+    int i;
+#ifndef WOLFSSL_SP_DIV_32
+    int64_t d1;
+#endif
+    sp_digit dv, r1;
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    sp_digit* td;
+#else
+    sp_digit t1d[20], t2d[10 + 1];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+    (void)m;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+    if (td == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        t1 = td;
+        t2 = td + 2 * 10;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        dv = d[9];
+        XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U);
+        for (i=9; i>=0; i--) {
+            t1[10 + i] += t1[10 + i - 1] >> 26;
+            t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
+#ifndef WOLFSSL_SP_DIV_32
+            d1 = t1[10 + i];
+            d1 <<= 26;
+            d1 += t1[10 + i - 1];
+            r1 = (sp_digit)(d1 / dv);
+#else
+            r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv);
+#endif
+
+            sp_256_mul_d_10(t2, d, r1);
+            (void)sp_256_sub_10(&t1[i], &t1[i], t2);
+            t1[10 + i] -= t2[10];
+            t1[10 + i] += t1[10 + i - 1] >> 26;
+            t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
+            r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv;
+            r1++;
+            sp_256_mul_d_10(t2, d, r1);
+            (void)sp_256_add_10(&t1[i], &t1[i], t2);
+            t1[10 + i] += t1[10 + i - 1] >> 26;
+            t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
+        }
+        t1[10 - 1] += t1[10 - 2] >> 26;
+        t1[10 - 2] &= 0x3ffffff;
+        d1 = t1[10 - 1];
+        r1 = (sp_digit)(d1 / dv);
+
+        sp_256_mul_d_10(t2, d, r1);
+        (void)sp_256_sub_10(t1, t1, t2);
+        XMEMCPY(r, t1, sizeof(*r) * 2U * 10U);
+        for (i=0; i<8; i++) {
+            r[i+1] += r[i] >> 26;
+            r[i] &= 0x3ffffff;
+        }
+        sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ?
+                    (sp_digit)1 : (sp_digit)0));
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (td != NULL) {
+        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    }
+#endif
+
+    return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r  A single precision number that is the reduced result.
+ * a  A single precision number that is to be reduced.
+ * m  A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+    return sp_256_div_10(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_2[8] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+    0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+    0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r  Result of the multiplication.
+ * a  First operand of the multiplication.
+ * b  Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+    sp_256_mul_10(r, a, b);
+    sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a)
+{
+    sp_256_sqr_10(r, a);
+    sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r  Result of the squaring.
+ * a  Number to square.
+ */
+static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n)
+{
+    int i;
+
+    sp_256_mont_sqr_order_10(r, a);
+    for (i=1; i<n; i++) {
+        sp_256_mont_sqr_order_10(r, r);
+    }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r   Inverse result.
+ * a   Number to invert.
+ * td  Temporary data.
+ */
+static void sp_256_mont_inv_order_10(sp_digit* r, const sp_digit* a,
+        sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+    sp_digit* t = td;
+    int i;
+
+    XMEMCPY(t, a, sizeof(sp_digit) * 10);
+    for (i=254; i>=0; i--) {
+        sp_256_mont_sqr_order_10(t, t);
+        if ((p256_order_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_10(t, t, a);
+        }
+    }
+    XMEMCPY(r, t, sizeof(sp_digit) * 10U);
+#else
+    sp_digit* t = td;
+    sp_digit* t2 = td + 2 * 10;
+    sp_digit* t3 = td + 4 * 10;
+    int i;
+
+
+    /* t = a^2 */
+    sp_256_mont_sqr_order_10(t, a);
+    /* t = a^3 = t * a */
+    sp_256_mont_mul_order_10(t, t, a);
+    /* t2= a^c = t ^ 2 ^ 2 */
+    sp_256_mont_sqr_n_order_10(t2, t, 2);
+    /* t3= a^f = t2 * t */
+    sp_256_mont_mul_order_10(t3, t2, t);
+    /* t2= a^f0 = t3 ^ 2 ^ 4 */
+    sp_256_mont_sqr_n_order_10(t2, t3, 4);
+    /* t = a^ff = t2 * t3 */
+    sp_256_mont_mul_order_10(t, t2, t3);
+    /* t3= a^ff00 = t ^ 2 ^ 8 */
+    sp_256_mont_sqr_n_order_10(t2, t, 8);
+    /* t = a^ffff = t2 * t */
+    sp_256_mont_mul_order_10(t, t2, t);
+    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+    sp_256_mont_sqr_n_order_10(t2, t, 16);
+    /* t = a^ffffffff = t2 * t */
+    sp_256_mont_mul_order_10(t, t2, t);
+    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
+    sp_256_mont_sqr_n_order_10(t2, t, 64);
+    /* t2= a^ffffffff00000000ffffffff = t2 * t */
+    sp_256_mont_mul_order_10(t2, t2, t);
+    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
+    sp_256_mont_sqr_n_order_10(t2, t2, 32);
+    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+    sp_256_mont_mul_order_10(t2, t2, t);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+
+    for (i=127; i>=112; i--) {
+        sp_256_mont_sqr_order_10(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_10(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+    sp_256_mont_sqr_n_order_10(t2, t2, 4);
+    sp_256_mont_mul_order_10(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+    for (i=107; i>=64; i--) {
+        sp_256_mont_sqr_order_10(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_10(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+    sp_256_mont_sqr_n_order_10(t2, t2, 4);
+    sp_256_mont_mul_order_10(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+    for (i=59; i>=32; i--) {
+        sp_256_mont_sqr_order_10(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_10(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+    sp_256_mont_sqr_n_order_10(t2, t2, 4);
+    sp_256_mont_mul_order_10(t2, t2, t3);
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+    for (i=27; i>=0; i--) {
+        sp_256_mont_sqr_order_10(t2, t2);
+        if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+            sp_256_mont_mul_order_10(t2, t2, a);
+        }
+    }
+    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+    sp_256_mont_sqr_n_order_10(t2, t2, 4);
+    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+    sp_256_mont_mul_order_10(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_VERIFY
+
+
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 256)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int wolfSSL_DSP_ECC_Verify_256(remote_handle64 h, int32 *u1, int hashLen, int32* r, int rSz, int32* s, int sSz,
+	int32* x, int xSz, int32* y, int ySz, int32* z, int zSz, int* res)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    sp_digit* d = NULL;
+#else
+    sp_digit u2d[2*10] __attribute__((aligned(128)));
+    sp_digit tmpd[2*10 * 5] __attribute__((aligned(128)));
+    sp_point p1d;
+    sp_point p2d;
+#endif
+    sp_digit* u2 = NULL;
+    sp_digit* tmp = NULL;
+    sp_point* p1;
+    sp_point* p2 = NULL;
+    sp_digit carry;
+    int32_t c;
+    int err;
+    void* heap = NULL;
+
+    (void)h;
+    (void)hashLen;
+
+    err = sp_ecc_point_new(heap, p1d, p1);
+    if (err == MP_OKAY) {
+        err = sp_ecc_point_new(heap, p2d, p2);
+    }
+
+    if (err == MP_OKAY) {
+        u2 = u2d;
+        tmp = tmpd;
+
+	XMEMCPY(u2, r, 40);
+	XMEMCPY(p2->x, x, 40);
+	XMEMCPY(p2->y, y, 40);
+	XMEMCPY(p2->z, z, 40);
+
+            sp_256_mul_10(s, s, p256_norm_order);
+        err = sp_256_mod_10(s, s, p256_order);
+    }
+    if (err == MP_OKAY) {
+        sp_256_norm_10(s);
+        {
+
+            sp_256_mont_inv_order_10(s, s, tmp);
+            sp_256_mont_mul_order_10(u1, u1, s);
+            sp_256_mont_mul_order_10(u2, u2, s);
+        }
+
+            err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
+    }
+    if (err == MP_OKAY) {
+            err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
+    }
+
+    if (err == MP_OKAY) {
+            sp_256_proj_point_add_10(p1, p1, p2, tmp);
+
+        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+        /* Reload r and convert to Montgomery form. */
+	XMEMCPY(u2, r, 40);
+        err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
+    }
+
+    if (err == MP_OKAY) {
+        /* u1 = r.z'.z' mod prime */
+        sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod);
+        sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod);
+        *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
+        if (*res == 0) {
+            /* Reload r and add order. */
+	    XMEMCPY(u2, r, 40);
+            carry = sp_256_add_10(u2, u2, p256_order);
+            /* Carry means result is greater than mod and is not valid. */
+            if (carry == 0) {
+                sp_256_norm_10(u2);
+
+                /* Compare with mod and if greater or equal then not valid. */
+                c = sp_256_cmp_10(u2, p256_mod);
+                if (c < 0) {
+                    /* Convert to Montogomery form */
+                    err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
+                    if (err == MP_OKAY) {
+                        /* u1 = (r + 1*order).z'.z' mod prime */
+                        sp_256_mont_mul_10(u1, u2, p1->z, p256_mod,
+                                                                  p256_mp_mod);
+                        *res = (int)(sp_256_cmp_10(p1->x, u2) == 0);
+                    }
+                }
+            }
+        }
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (d != NULL)
+        XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+    sp_ecc_point_free(p1, 0, heap);
+    sp_ecc_point_free(p2, 0, heap);
+
+    return err;
+}
+
+/** Free the Fixed Point cache */
+void wc_ecc_fp_free(void)
+{
+}
+
+
+AEEResult wolfSSL_open(const char *uri, remote_handle64 *handle) 
+{
+   void *tptr;
+  /* can be any value or ignored, rpc layer doesn't care
+   * also ok
+   * *handle = 0;
+   * *handle = 0xdeadc0de;
+   */
+   tptr = (void *)malloc(1);
+   *handle = (remote_handle64)tptr;
+   return 0;
+}
+
+AEEResult wolfSSL_close(remote_handle64 handle) 
+{
+   if (handle)
+      free((void*)handle);
+   return 0;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX   First EC point's X ordinate.
+ * pY   First EC point's Y ordinate.
+ * pZ   First EC point's Z ordinate.
+ * qX   Second EC point's X ordinate.
+ * qY   Second EC point's Y ordinate.
+ * qZ   Second EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_digit tmpd[2 * 10 * 5];
+    sp_point pd;
+    sp_point qd;
+#endif
+    sp_digit* tmp;
+    sp_point* p;
+    sp_point* q = NULL;
+    int err;
+
+    err = sp_ecc_point_new(NULL, pd, p);
+    if (err == MP_OKAY) {
+        err = sp_ecc_point_new(NULL, qd, q);
+    }
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 10, pX);
+        sp_256_from_mp(p->y, 10, pY);
+        sp_256_from_mp(p->z, 10, pZ);
+        sp_256_from_mp(q->x, 10, qX);
+        sp_256_from_mp(q->y, 10, qY);
+        sp_256_from_mp(q->z, 10, qZ);
+
+            sp_256_proj_point_add_10(p, p, q, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, rZ);
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_ecc_point_free(q, 0, NULL);
+    sp_ecc_point_free(p, 0, NULL);
+
+    return err;
+}
+
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * rX   Resultant EC point's X ordinate.
+ * rY   Resultant EC point's Y ordinate.
+ * rZ   Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_digit tmpd[2 * 10 * 2];
+    sp_point pd;
+#endif
+    sp_digit* tmp;
+    sp_point* p;
+    int err;
+
+    err = sp_ecc_point_new(NULL, pd, p);
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 10, pX);
+        sp_256_from_mp(p->y, 10, pY);
+        sp_256_from_mp(p->z, 10, pZ);
+
+            sp_256_proj_point_dbl_10(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, rX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, rY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, rZ);
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_ecc_point_free(p, 0, NULL);
+
+    return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX   EC point's X ordinate.
+ * pY   EC point's Y ordinate.
+ * pZ   EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+    sp_digit tmpd[2 * 10 * 4];
+    sp_point pd;
+#endif
+    sp_digit* tmp;
+    sp_point* p;
+    int err;
+
+    err = sp_ecc_point_new(NULL, pd, p);
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (err == MP_OKAY) {
+        tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL,
+                                                              DYNAMIC_TYPE_ECC);
+        if (tmp == NULL) {
+            err = MEMORY_E;
+        }
+    }
+#else
+    tmp = tmpd;
+#endif
+    if (err == MP_OKAY) {
+        sp_256_from_mp(p->x, 10, pX);
+        sp_256_from_mp(p->y, 10, pY);
+        sp_256_from_mp(p->z, 10, pZ);
+
+        sp_256_map_10(p, p, tmp);
+    }
+
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->x, pX);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->y, pY);
+    }
+    if (err == MP_OKAY) {
+        err = sp_256_to_mp(p->z, pZ);
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (tmp != NULL) {
+        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+    sp_ecc_point_free(p, 0, NULL);
+
+    return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y  The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_10(sp_digit* y)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    sp_digit* d;
+#else
+    sp_digit t1d[2 * 10];
+    sp_digit t2d[2 * 10];
+#endif
+    sp_digit* t1;
+    sp_digit* t2;
+    int err = MP_OKAY;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        t1 = d + 0 * 10;
+        t2 = d + 2 * 10;
+#else
+        t1 = t1d;
+        t2 = t2d;
+#endif
+
+        {
+            /* t2 = y ^ 0x2 */
+            sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0x3 */
+            sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xc */
+            sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xf */
+            sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xf0 */
+            sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xff */
+            sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xff00 */
+            sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffff */
+            sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t2 = y ^ 0xffff0000 */
+            sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff */
+            sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000000 */
+            sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001 */
+            sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+            sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod);
+            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+            sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
+            sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod);
+        }
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm    X ordinate.
+ * odd   Whether the Y ordinate is odd.
+ * ym    Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    sp_digit* d;
+#else
+    sp_digit xd[2 * 10];
+    sp_digit yd[2 * 10];
+#endif
+    sp_digit* x = NULL;
+    sp_digit* y = NULL;
+    int err = MP_OKAY;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+    if (d == NULL) {
+        err = MEMORY_E;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+        x = d + 0 * 10;
+        y = d + 2 * 10;
+#else
+        x = xd;
+        y = yd;
+#endif
+
+        sp_256_from_mp(x, 10, xm);
+        err = sp_256_mod_mul_norm_10(x, x, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        /* y = x^3 */
+        {
+            sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
+            sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
+        }
+        /* y = x^3 - 3x */
+        sp_256_mont_sub_10(y, y, x, p256_mod);
+        sp_256_mont_sub_10(y, y, x, p256_mod);
+        sp_256_mont_sub_10(y, y, x, p256_mod);
+        /* y = x^3 - 3x + b */
+        err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod);
+    }
+    if (err == MP_OKAY) {
+        sp_256_mont_add_10(y, y, x, p256_mod);
+        /* y = sqrt(x^3 - 3x + b) */
+        err = sp_256_mont_sqrt_10(y);
+    }
+    if (err == MP_OKAY) {
+        XMEMSET(y + 10, 0, 10U * sizeof(sp_digit));
+        sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod);
+        if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+            sp_256_mont_sub_10(y, p256_mod, y, p256_mod);
+        }
+
+        err = sp_256_to_mp(y, ym);
+    }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+    if (d != NULL) {
+        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+    }
+#endif
+
+    return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_DSP */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+
+
--- a/wolfcrypt/src/sp_int.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/sp_int.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp_int.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -34,11 +34,56 @@
     #include <wolfcrypt/src/misc.c>
 #endif
 
+/* SP Build Options:
+ * WOLFSSL_HAVE_SP_RSA:         Enable SP RSA support
+ * WOLFSSL_HAVE_SP_DH:          Enable SP DH support
+ * WOLFSSL_HAVE_SP_ECC:         Enable SP ECC support
+ * WOLFSSL_SP_MATH:             Use only single precision math and algorithms it supports (no fastmath tfm.c or normal integer.c)
+ * WOLFSSL_SP_SMALL:            Use smaller version of code and avoid large stack variables
+ * WOLFSSL_SP_NO_MALLOC:        Always use stack, no heap XMALLOC/XFREE allowed
+ * WOLFSSL_SP_NO_3072:          Disable RSA/DH 3072-bit support
+ * WOLFSSL_SP_NO_2048:          Disable RSA/DH 2048-bit support
+ * WOLFSSL_SP_4096:             Enable RSA/RH 4096-bit support
+ * WOLFSSL_SP_384               Enable ECC 384-bit SECP384R1 support
+ * WOLFSSL_SP_NO_256            Disable ECC 256-bit SECP256R1 support
+ * WOLFSSL_SP_CACHE_RESISTANT   Enable cache resistantant code 
+ * WOLFSSL_SP_ASM               Enable assembly speedups (detect platform)
+ * WOLFSSL_SP_X86_64_ASM        Enable Intel x86 assembly speedups like AVX/AVX2
+ * WOLFSSL_SP_ARM32_ASM         Enable Aarch32 assembly speedups
+ * WOLFSSL_SP_ARM64_ASM         Enable Aarch64 assembly speedups
+ * WOLFSSL_SP_ARM_CORTEX_M_ASM  Enable Cortex-M assembly speedups
+ * WOLFSSL_SP_ARM_THUMB_ASM     Enable ARM Thumb assembly speedups (used with -mthumb)
+ */
 
 #ifdef WOLFSSL_SP_MATH
 
 #include <wolfssl/wolfcrypt/sp_int.h>
 
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
+
+WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
+    sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
+    sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
+    sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
+    sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
+    sp_int* res);
+
+#endif
+
+int sp_get_digit_count(sp_int *a)
+{
+    int ret;
+    if (!a)
+        ret = 0;
+    else
+        ret = a->used;
+    return ret;
+}
+
 /* Initialize the big number to be zero.
  *
  * a  SP integer.
@@ -52,6 +97,7 @@
     return MP_OKAY;
 }
 
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
 /* Initialize up to six big numbers to be zero.
  *
  * a  SP integer.
@@ -92,6 +138,7 @@
 
     return MP_OKAY;
 }
+#endif
 
 /* Clear the data from the big number and set to zero.
  *
@@ -99,11 +146,13 @@
  */
 void sp_clear(sp_int* a)
 {
-    int i;
+    if (a != NULL) {
+        int i;
 
-    for (i=0; i<a->used; i++)
-        a->dp[i] = 0;
-    a->used = 0;
+        for (i=0; i<a->used; i++)
+            a->dp[i] = 0;
+        a->used = 0;
+    }
 }
 
 /* Calculate the number of 8-bit values required to represent the big number.
@@ -122,40 +171,47 @@
  * a     SP integer.
  * in    Array of bytes.
  * inSz  Number of data bytes in array.
- * returns MP_OKAY always.
+ * returns BAD_FUNC_ARG when the number is too big to fit in an SP and
+           MP_OKAY otherwise.
  */
-int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
+int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz)
 {
-    int i, j = 0, s = 0;
+    int err = MP_OKAY;
+    int i, j = 0, k;
 
-    a->dp[0] = 0;
-    for (i = inSz-1; i >= 0; i--) {
-        a->dp[j] |= ((sp_int_digit)in[i]) << s;
-        if (s == DIGIT_BIT - 8) {
-            a->dp[++j] = 0;
-            s = 0;
-        }
-        else if (s > DIGIT_BIT - 8) {
-            s = DIGIT_BIT - s;
-            if (j + 1 >= a->size)
-                break;
-            a->dp[++j] = in[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
+    if (inSz > SP_INT_DIGITS * (int)sizeof(a->dp[0])) {
+        err = MP_VAL;
     }
 
-    a->used = j + 1;
-    if (a->dp[j] == 0)
-        a->used--;
+    if (err == MP_OKAY) {
+        for (i = inSz-1; i >= (SP_WORD_SIZE/8); i -= (SP_WORD_SIZE/8), j++) {
+            a->dp[j]  = (((sp_int_digit)in[i-0]) << (0*8))
+                     |  (((sp_int_digit)in[i-1]) << (1*8))
+                     |  (((sp_int_digit)in[i-2]) << (2*8))
+                     |  (((sp_int_digit)in[i-3]) << (3*8));
+    #if SP_WORD_SIZE == 64
+            a->dp[j] |= (((sp_int_digit)in[i-4]) << (4*8))
+                     |  (((sp_int_digit)in[i-5]) << (5*8))
+                     |  (((sp_int_digit)in[i-6]) << (6*8))
+                     |  (((sp_int_digit)in[i-7]) << (7*8));
+    #endif
+        }
+        if (i >= 0) {
+            a->dp[j] = 0;
+            for (k = 0; k <= i; k++) {
+                a->dp[j] <<= 8;
+                a->dp[j] |= in[k];
+            }
+        }
+        a->used = j + 1;
+    }
 
-    for (j++; j < a->size; j++)
-        a->dp[j] = 0;
+    sp_clamp(a);
 
-    return MP_OKAY;
+    return err;
 }
 
+#ifdef HAVE_ECC
 /* Convert a number as string in big-endian format to a big number.
  * Only supports base-16 (hexadecimal).
  * Negative values not supported.
@@ -168,46 +224,59 @@
  */
 int sp_read_radix(sp_int* a, const char* in, int radix)
 {
-    int     i, j, k;
-    char    ch;
+    int  err = MP_OKAY;
+    int  i, j = 0, k = 0;
+    char ch;
 
-    if (radix != 16)
-        return BAD_FUNC_ARG;
+    if ((radix != 16) || (*in == '-')) {
+        err = BAD_FUNC_ARG;
+    }
 
-    if (*in == '-') {
-        return BAD_FUNC_ARG;
+    while (*in == '0') {
+        in++;
     }
 
-    j = 0;
-    k = 0;
-    a->dp[0] = 0;
-    for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
-        ch = in[i];
-        if (ch >= '0' && ch <= '9')
-            ch -= '0';
-        else if (ch >= 'A' && ch <= 'F')
-            ch -= 'A' - 10;
-        else if (ch >= 'a' && ch <= 'f')
-            ch -= 'a' - 10;
-        else
-            return MP_VAL;
+    if (err == MP_OKAY) {
+        a->dp[0] = 0;
+        for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
+            ch = in[i];
+            if (ch >= '0' && ch <= '9')
+                ch -= '0';
+            else if (ch >= 'A' && ch <= 'F')
+                ch -= 'A' - 10;
+            else if (ch >= 'a' && ch <= 'f')
+                ch -= 'a' - 10;
+            else {
+                err = MP_VAL;
+                break;
+            }
 
-        a->dp[k] |= ((sp_int_digit)ch) << j;
-        j += 4;
-        if (j == DIGIT_BIT && k < SP_INT_DIGITS)
-            a->dp[++k] = 0;
-        j &= DIGIT_BIT - 1;
+            a->dp[k] |= ((sp_int_digit)ch) << j;
+            j += 4;
+            if (k >= SP_INT_DIGITS - 1) {
+                err = MP_VAL;
+                break;
+            }
+            if (j == DIGIT_BIT)
+                a->dp[++k] = 0;
+            j &= SP_WORD_SIZE - 1;
+        }
     }
 
-    a->used = k + 1;
-    if (a->dp[k] == 0)
-        a->used--;
+    if (err == MP_OKAY) {
+        a->used = k + 1;
+        if (a->dp[k] == 0)
+            a->used--;
 
-    for (k++; k < a->size; k++)
-        a->dp[k] = 0;
+        for (k++; k < a->size; k++)
+            a->dp[k] = 0;
 
-    return MP_OKAY;
+        sp_clamp(a);
+    }
+
+    return err;
 }
+#endif
 
 /* Compare two big numbers.
  *
@@ -218,20 +287,26 @@
  */
 int sp_cmp(sp_int* a, sp_int* b)
 {
+    int ret = MP_EQ;
     int i;
 
     if (a->used > b->used)
-        return MP_GT;
+        ret = MP_GT;
     else if (a->used < b->used)
-        return MP_LT;
-
-    for (i = a->used - 1; i >= 0; i--) {
-        if (a->dp[i] > b->dp[i])
-            return MP_GT;
-        else if (a->dp[i] < b->dp[i])
-            return MP_LT;
+        ret = MP_LT;
+    else {
+        for (i = a->used - 1; i >= 0; i--) {
+            if (a->dp[i] > b->dp[i]) {
+                ret = MP_GT;
+                break;
+            }
+            else if (a->dp[i] < b->dp[i]) {
+                ret = MP_LT;
+                break;
+            }
+        }
     }
-    return MP_EQ;
+    return ret;
 }
 
 /* Count the number of bits in the big number.
@@ -251,10 +326,19 @@
         r = 0;
     else {
         d = a->dp[r];
-        r *= DIGIT_BIT;
-        while (d != 0) {
-            r++;
-            d >>= 1;
+        r *= SP_WORD_SIZE;
+        if (d >= (1L << (SP_WORD_SIZE / 2))) {
+            r += SP_WORD_SIZE;
+            while ((d & (1UL << (SP_WORD_SIZE - 1))) == 0) {
+                r--;
+                d <<= 1;
+            }
+        }
+        else {
+            while (d != 0) {
+                r++;
+                d >>= 1;
+            }
         }
     }
 
@@ -282,19 +366,53 @@
     return bit;
 }
 
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \
+    !defined(WOLFSSL_RSA_VERIFY_ONLY)
 /* Convert the big number to an array of bytes in big-endian format.
  * The array must be large enough for encoded number - use mp_unsigned_bin_size
  * to calculate the number of bytes required.
  *
- * a  SP integer.
+ * a    SP integer.
+ * out  Array to put encoding into.
  * returns MP_OKAY always.
  */
 int sp_to_unsigned_bin(sp_int* a, byte* out)
 {
     int i, j, b;
+    sp_int_digit d;
 
     j = sp_unsigned_bin_size(a) - 1;
     for (i=0; j>=0; i++) {
+        d = a->dp[i];
+        for (b = 0; b < SP_WORD_SIZE / 8; b++) {
+            out[j] = d;
+            if (--j < 0) {
+                break;
+            }
+            d >>= 8;
+        }
+    }
+
+    return MP_OKAY;
+}
+#endif
+
+/* Convert the big number to an array of bytes in big-endian format.
+ * The array must be large enough for encoded number - use mp_unsigned_bin_size
+ * to calculate the number of bytes required.
+ * Front-pads the output array with zeros make number the size of the array.
+ *
+ * a      SP integer.
+ * out    Array to put encoding into.
+ * outSz  Size of the array.
+ * returns MP_OKAY always.
+ */
+int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
+{
+    int i, j, b;
+
+    j = outSz - 1;
+    for (i=0; j>=0; i++) {
         for (b = 0; b < SP_WORD_SIZE; b += 8) {
             out[j--] = a->dp[i] >> b;
             if (j < 0)
@@ -305,6 +423,7 @@
     return MP_OKAY;
 }
 
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
 /* Ensure the data in the big number is zeroed.
  *
  * a  SP integer.
@@ -314,22 +433,37 @@
     ForceZero(a->dp, a->used * sizeof(sp_int_digit));
     a->used = 0;
 }
+#endif
 
-/* Copy value of big number a into b.
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
+/* Copy value of big number a into r.
  *
  * a  SP integer.
- * b  SP integer.
+ * r  SP integer.
  * returns MP_OKAY always.
  */
-int sp_copy(sp_int* a, sp_int* b)
+int sp_copy(sp_int* a, sp_int* r)
 {
-    if (a != b) {
-        XMEMCPY(b->dp, a->dp, a->used * sizeof(sp_int_digit));
-        b->used = a->used;
+    if (a != r) {
+        XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
+        r->used = a->used;
     }
     return MP_OKAY;
 }
 
+/* creates "a" then copies b into it */
+int sp_init_copy (sp_int * a, sp_int * b)
+{
+  int err;
+  if ((err = sp_init(a)) == MP_OKAY) {
+      if((err = sp_copy (b, a)) != MP_OKAY) {
+          sp_clear(a);
+      }
+  }
+  return err;
+}
+#endif
+
 /* Set the big number to be the value of the digit.
  *
  * a  SP integer.
@@ -338,21 +472,17 @@
  */
 int sp_set(sp_int* a, sp_int_digit d)
 {
-    a->dp[0] = d;
-    a->used = 1;
+    if (d == 0) {
+      a->dp[0] = d;
+      a->used = 0;
+    }
+    else {
+      a->dp[0] = d;
+      a->used = 1;
+    }
     return MP_OKAY;
 }
 
-/* Checks whether the value of the big number is zero.
- *
- * a  SP integer.
- * returns 1 when value is zero and 0 otherwise.
- */
-int sp_iszero(sp_int* a)
-{
-    return a->used == 0;
-}
-
 /* Recalculate the number of digits used.
  *
  * a  SP integer.
@@ -366,21 +496,23 @@
     a->used = i + 1;
 }
 
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
 /* Grow big number to be able to hold l digits.
  * This function does nothing as the number of digits is fixed.
  *
  * a  SP integer.
  * l  Number of digits.
- * retuns MP_MEM if the number of digits requested is more than available and
+ * returns MP_MEM if the number of digits requested is more than available and
  * MP_OKAY otherwise.
  */
 int sp_grow(sp_int* a, int l)
 {
+    int err = MP_OKAY;
+
     if (l > a->size)
-        return MP_MEM;
-    (void)a;
-    (void)l;
-    return MP_OKAY;
+        err = MP_MEM;
+
+    return err;
 }
 
 /* Sub a one digit number from the big number.
@@ -393,21 +525,27 @@
 int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
 {
     int i = 0;
+    sp_int_digit t;
 
     r->used = a->used;
-    r->dp[0] = a->dp[0] - d;
-    if (r->dp[i] > a->dp[i]) {
-        for (; i < a->used; i++) {
+    t = a->dp[0] - d;
+    if (t > a->dp[0]) {
+        for (++i; i < a->used; i++) {
             r->dp[i] = a->dp[i] - 1;
             if (r->dp[i] != (sp_int_digit)-1)
                break;
         }
     }
-    for (; i < a->used; i++)
-        r->dp[i] = a->dp[i];
+    r->dp[0] = t;
+    if (r != a) {
+        for (++i; i < a->used; i++)
+            r->dp[i] = a->dp[i];
+    }
+    sp_clamp(r);
 
     return MP_OKAY;
 }
+#endif
 
 /* Compare a one digit number with a big number.
  *
@@ -418,24 +556,29 @@
  */
 int sp_cmp_d(sp_int *a, sp_int_digit d)
 {
+    int ret = MP_EQ;
+
     /* special case for zero*/
     if (a->used == 0) {
         if (d == 0)
-            return MP_EQ;
+            ret = MP_EQ;
         else
-            return MP_LT;
+            ret = MP_LT;
     }
     else if (a->used > 1)
-        return MP_GT;
+        ret = MP_GT;
+    else {
+        /* compare the only digit of a to d */
+        if (a->dp[0] > d)
+            ret = MP_GT;
+        else if (a->dp[0] < d)
+            ret = MP_LT;
+    }
 
-    /* compare the only digit of a to d */
-    if (a->dp[0] > d)
-        return MP_GT;
-    else if (a->dp[0] < d)
-        return MP_LT;
-    return MP_EQ;
+    return ret;
 }
 
+#if !defined(NO_DH) || defined(HAVE_ECC) || !defined(WOLFSSL_RSA_VERIFY_ONLY)
 /* Left shift the number by number of bits.
  * Bits may be larger than the word size.
  *
@@ -452,16 +595,15 @@
         n %= SP_WORD_SIZE;
     }
 
-    if (n == 0)
-        return MP_OKAY;
-
-    a->dp[a->used] = 0;
-    for (i = a->used - 1; i >= 0; i--) {
-        a->dp[i+1] |= a->dp[i] >> (SP_WORD_SIZE - n);
-        a->dp[i] = a->dp[i] << n;
+    if (n != 0) {
+        a->dp[a->used] = 0;
+        for (i = a->used - 1; i >= 0; i--) {
+            a->dp[i+1] |= a->dp[i] >> (SP_WORD_SIZE - n);
+            a->dp[i] = a->dp[i] << n;
+        }
+        if (a->dp[a->used] != 0)
+            a->used++;
     }
-    if (a->dp[a->used] != 0)
-        a->used++;
 
     return MP_OKAY;
 }
@@ -474,7 +616,7 @@
  * r  SP integer.
  * returns MP_OKAY always.
  */
-static int sp_sub(sp_int* a, sp_int* b, sp_int* r)
+int sp_sub(sp_int* a, sp_int* b, sp_int* r)
 {
     int i;
     sp_int_digit c = 0;
@@ -490,7 +632,7 @@
     }
     for (; i < a->used; i++) {
         r->dp[i] = a->dp[i] - c;
-        c = r->dp[i] == (sp_int_digit)-1;
+        c &= (r->dp[i] == (sp_int_digit)-1);
     }
     r->used = i;
     sp_clamp(r);
@@ -498,50 +640,259 @@
     return MP_OKAY;
 }
 
-/* Calculate the r = a mod m.
+/* Shift a right by n bits into r: r = a >> n
+ *
+ * a    SP integer operand.
+ * n    Number of bits to shift.
+ * r    SP integer result.
+ */
+void sp_rshb(sp_int* a, int n, sp_int* r)
+{
+    int i;
+    int j;
+
+    for (i = n / SP_WORD_SIZE, j = 0; i < a->used-1; i++, j++)
+        r->dp[i] = (a->dp[j] >> n) | (a->dp[j+1] << (SP_WORD_SIZE - n));
+    r->dp[i] = a->dp[j] >> n;
+    r->used = j + 1;
+    sp_clamp(r);
+}
+
+/* Multiply a by digit n and put result into r shifting up o digits.
+ *   r = (a * n) << (o * SP_WORD_SIZE)
+ *
+ * a  SP integer to be multiplied.
+ * n  Number to multiply by.
+ * r  SP integer result.
+ * o  Number of digits to move result up by.
+ */
+static void _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
+{
+    int i;
+    sp_int_word t = 0;
+
+    for (i = 0; i < o; i++)
+        r->dp[i] = 0;
+
+    for (i = 0; i < a->used; i++) {
+        t += (sp_int_word)n * a->dp[i];
+        r->dp[i + o] = (sp_int_digit)t;
+        t >>= SP_WORD_SIZE;
+    }
+
+    r->dp[i+o] = (sp_int_digit)t;
+    r->used = i+o+1;
+    sp_clamp(r);
+}
+
+/* Divide a by d and return the quotient in r and the remainder in rem.
+ *   r = a / d; rem = a % d
+ *
+ * a    SP integer to be divided.
+ * d    SP integer to divide by.
+ * r    SP integer of quotient.
+ * rem  SP integer of remainder.
+ * returns MP_VAL when d is 0, MP_MEM when dynamic memory allocation fails and
+ *         MP_OKAY otherwise.
+ */
+static int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
+{
+    int err = MP_OKAY;
+    int ret;
+    int done = 0;
+    int i;
+    int s;
+#ifndef WOLFSSL_SP_DIV_32
+    sp_int_word w = 0;
+#endif
+    sp_int_digit dt;
+    sp_int_digit t;
+#ifdef WOLFSSL_SMALL_STACK
+    sp_int* sa = NULL;
+    sp_int* sd;
+    sp_int* tr;
+    sp_int* trial;
+#else
+    sp_int sa[1];
+    sp_int sd[1];
+    sp_int tr[1];
+    sp_int trial[1];
+#endif
+
+    if (sp_iszero(d))
+        err = MP_VAL;
+
+    ret = sp_cmp(a, d);
+    if (ret == MP_LT) {
+        if (rem != NULL) {
+            sp_copy(a, rem);
+        }
+        if (r != NULL) {
+            sp_set(r, 0);
+        }
+        done = 1;
+    }
+    else if (ret == MP_EQ) {
+        if (rem != NULL) {
+            sp_set(rem, 0);
+        }
+        if (r != NULL) {
+            sp_set(r, 1);
+        }
+        done = 1;
+    }
+    else if (sp_count_bits(a) == sp_count_bits(d)) {
+        /* a is greater than d but same bit length */
+        if (rem != NULL) {
+            sp_sub(a, d, rem);
+        }
+        if (r != NULL) {
+            sp_set(r, 1);
+        }
+        done = 1;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (!done && err == MP_OKAY) {
+        sa = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT);
+        if (sa == NULL) {
+            err = MP_MEM;
+        }
+    }
+#endif
+
+    if (!done && err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        sd    = &sa[1];
+        tr    = &sa[2];
+        trial = &sa[3];
+#endif
+
+        sp_init(sa);
+        sp_init(sd);
+        sp_init(tr);
+        sp_init(trial);
+
+        s = sp_count_bits(d);
+        s = SP_WORD_SIZE - (s % SP_WORD_SIZE);
+        sp_copy(a, sa);
+        if (s != SP_WORD_SIZE) {
+            sp_lshb(sa, s);
+            sp_copy(d, sd);
+            sp_lshb(sd, s);
+            d = sd;
+        }
+
+        tr->used = sa->used - d->used + 1;
+        sp_clear(tr);
+        tr->used = sa->used - d->used + 1;
+        dt = d->dp[d->used-1];
+#ifndef WOLFSSL_SP_DIV_32
+        for (i = sa->used - 1; i >= d->used; ) {
+            if (sa->dp[i] > dt) {
+                t = (sp_int_digit)-1;
+            }
+            else {
+                w = ((sp_int_word)sa->dp[i] << SP_WORD_SIZE) | sa->dp[i-1];
+                w /= dt;
+                if (w > (sp_int_digit)-1) {
+                    t = (sp_int_digit)-1;
+                }
+                else {
+                    t = (sp_int_digit)w;
+                }
+            }
+
+            if (t > 0) {
+                _sp_mul_d(d, t, trial, i - d->used);
+                while (sp_cmp(trial, sa) == MP_GT) {
+                    t--;
+                    _sp_mul_d(d, t, trial, i - d->used);
+                }
+                sp_sub(sa, trial, sa);
+                tr->dp[i - d->used] += t;
+                if (tr->dp[i - d->used] < t)
+                    tr->dp[i + 1 - d->used]++;
+            }
+            i = sa->used - 1;
+        }
+#else
+     {
+        sp_int_digit div = (dt >> (SP_WORD_SIZE / 2)) + 1;
+        for (i = sa->used - 1; i >= d->used; ) {
+            t = sa->dp[i] / div;
+            if ((t > 0) && (t << (SP_WORD_SIZE / 2) == 0))
+                t = (sp_int_digit)-1;
+            t <<= SP_WORD_SIZE / 2;
+            if (t == 0) {
+               t = sa->dp[i] << (SP_WORD_SIZE / 2);
+               t += sa->dp[i-1] >> (SP_WORD_SIZE / 2);
+               t /= div;
+            }
+
+            if (t > 0) {
+                _sp_mul_d(d, t, trial, i - d->used);
+                while (sp_cmp(trial, sa) == MP_GT) {
+                    t--;
+                    _sp_mul_d(d, t, trial, i - d->used);
+                }
+                sp_sub(sa, trial, sa);
+                tr->dp[i - d->used] += t;
+                if (tr->dp[i - d->used] < t)
+                    tr->dp[i + 1 - d->used]++;
+            }
+            i = sa->used - 1;
+        }
+
+        while (sp_cmp(sa, d) != MP_LT) {
+            sp_sub(sa, d, sa);
+            sp_add_d(tr, 1, tr);
+        }
+    }
+#endif
+
+        sp_clamp(tr);
+
+        if (rem != NULL) {
+            if (s != SP_WORD_SIZE)
+                sp_rshb(sa, s, sa);
+            sp_copy(sa, rem);
+        }
+        if (r != NULL)
+            sp_copy(tr, r);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (sa != NULL)
+        XFREE(sa, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+    return err;
+}
+
+
+#ifndef FREESCALE_LTC_TFM
+/* Calculate the remainder of dividing a by m: r = a mod m.
  *
  * a  SP integer.
  * m  SP integer.
  * r  SP integer.
- * returns MP_OKAY always.
+ * returns MP_VAL when m is 0 and MP_OKAY otherwise.
  */
 int sp_mod(sp_int* a, sp_int* m, sp_int* r)
 {
-    sp_int t;
-    int mBits = sp_count_bits(m);
-    int rBits;
-
-    if (a != r)
-        sp_copy(a, r);
-    sp_init(&t);
-
-    rBits = sp_count_bits(r);
-    while (rBits > mBits) {
-        sp_copy(m, &t);
-        sp_lshb(&t, rBits - mBits);
+    return sp_div(a, m, NULL, r);
+}
+#endif
+#endif
 
-        if (sp_cmp(&t, r) == MP_GT) {
-            sp_copy(m, &t);
-            sp_lshb(&t, rBits - mBits - 1);
-        }
-        sp_sub(r, &t, r);
-
-        rBits = sp_count_bits(r);
-    }
-    if (sp_cmp(r, m) != MP_LT)
-        sp_sub(r, m, r);
-
-    return MP_OKAY;
-}
-
-#if defined(USE_FAST_MATH) || !defined(NO_BIG_INT)
 /* Clear all data in the big number and sets value to zero.
  *
  * a  SP integer.
  */
 void sp_zero(sp_int* a)
 {
-    XMEMSET(a->dp, 0, a->size);
+    XMEMSET(a->dp, 0, a->size * sizeof(*a->dp));
     a->used = 0;
 }
 
@@ -557,6 +908,9 @@
     int i = 0;
 
     r->used = a->used;
+    if (a->used == 0) {
+        r->used = 1;
+    }
     r->dp[0] = a->dp[0] + d;
     if (r->dp[i] < a->dp[i]) {
         for (; i < a->used; i++) {
@@ -576,6 +930,8 @@
     return MP_OKAY;
 }
 
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \
+    !defined(WOLFSSL_RSA_VERIFY_ONLY)
 /* Left shift the big number by a number of digits.
  * WIll chop off digits overflowing maximum size.
  *
@@ -588,15 +944,16 @@
     if (a->used + s > a->size)
         a->used = a->size - s;
 
-    XMEMMOVE(a->dp + s, a->dp, a->used * SP_INT_DIGITS);
+    XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
     a->used += s;
     XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
+    sp_clamp(a);
 
     return MP_OKAY;
 }
 #endif
 
-#ifndef NO_PWDBASED
+#if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
 /* Add two large numbers into result: r = a + b
  *
  * a  SP integer.
@@ -607,8 +964,8 @@
 int sp_add(sp_int* a, sp_int* b, sp_int* r)
 {
     int i;
-    sp_digit c = 0;
-    sp_digit t;
+    sp_int_digit c = 0;
+    sp_int_digit t;
 
     for (i = 0; i < a->used && i < b->used; i++) {
         t = a->dp[i] + b->dp[i] + c;
@@ -620,18 +977,18 @@
     }
     for (; i < a->used; i++) {
         r->dp[i] = a->dp[i] + c;
-        c = r->dp[i] == 0;
+        c = (a->dp[i] != 0) && (r->dp[i] == 0);
     }
     for (; i < b->used; i++) {
         r->dp[i] = b->dp[i] + c;
-        c = r->dp[i] == 0;
+        c = (b->dp[i] != 0) && (r->dp[i] == 0);
     }
     r->dp[i] = c;
     r->used = (int)(i + c);
 
     return MP_OKAY;
 }
-#endif
+#endif /* !NO_PWDBASED || WOLFSSL_KEY_GEN || !NO_DH */
 
 #ifndef NO_RSA
 /* Set a number into the big number.
@@ -642,14 +999,1198 @@
  */
 int sp_set_int(sp_int* a, unsigned long b)
 {
-    a->used = 1;
-    a->dp[0] = b;
+    if (b == 0) {
+        a->used = 0;
+        a->dp[0] = 0;
+    }
+    else {
+        a->used = 1;
+        a->dp[0] = (sp_int_digit)b;
+    }
+
+    return MP_OKAY;
+}
+#endif /* !NO_RSA */
+
+#ifdef WC_MP_TO_RADIX
+/* Hex string characters. */
+static const char sp_hex_char[16] = {
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+};
+
+/* Put the hex string version, big-endian, of a in str.
+ *
+ * a    SP integer.
+ * str  Hex string is stored here.
+ * returns MP_OKAY always.
+ */
+int sp_tohex(sp_int* a, char* str)
+{
+    int i, j;
+
+    /* quick out if its zero */
+    if (sp_iszero(a) == MP_YES) {
+        *str++ = '0';
+        *str = '\0';
+    }
+    else {
+        i = a->used - 1;
+        for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
+            if (((a->dp[i] >> j) & 0xf) != 0)
+                break;
+        }
+        for (; j >= 0; j -= 4)
+            *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf];
+        for (--i; i >= 0; i--) {
+            for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4)
+                *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf];
+        }
+        *str = '\0';
+    }
+
+    return MP_OKAY;
+}
+#endif /* WC_MP_TO_RADIX */
+
+#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) && !defined(WC_NO_RNG)
+/* Set a bit of a: a |= 1 << i
+ * The field 'used' is updated in a.
+ *
+ * a  SP integer to modify.
+ * i  Index of bit to set.
+ * returns MP_OKAY always.
+ */
+int sp_set_bit(sp_int* a, int i)
+{
+    int ret = MP_OKAY;
+
+    if ((a == NULL) ||  (i / SP_WORD_SIZE >= SP_INT_DIGITS)) {
+        ret = BAD_FUNC_ARG;
+    }
+    else {
+        a->dp[i/SP_WORD_SIZE] |= (sp_int_digit)1 << (i % SP_WORD_SIZE);
+        if (a->used <= i / SP_WORD_SIZE)
+            a->used = (i / SP_WORD_SIZE) + 1;
+    }
+    return ret;
+}
+
+/* Exponentiate 2 to the power of e: a = 2^e
+ * This is done by setting the 'e'th bit.
+ *
+ * a  SP integer.
+ * e  Exponent.
+ * returns MP_OKAY always.
+ */
+int sp_2expt(sp_int* a, int e)
+{
+    sp_zero(a);
+    return sp_set_bit(a, e);
+}
+
+/* Generate a random prime for RSA only.
+ *
+ * r     SP integer
+ * len   Number of bytes to prime.
+ * rng   Random number generator.
+ * heap  Unused
+ * returns MP_OKAY on success and MP_VAL when length is not supported or random
+ *         number genrator fails.
+ */
+int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
+{
+    static const int USE_BBS = 1;
+    int   err = 0, type;
+    int   isPrime = MP_NO;
+
+    (void)heap;
+
+    /* get type */
+    if (len < 0) {
+        type = USE_BBS;
+        len = -len;
+    }
+    else {
+        type = 0;
+    }
+
+#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
+    if (len == 32) {
+    }
+    else
+#endif
+    /* Generate RSA primes that are half the modulus length. */
+#ifndef WOLFSSL_SP_NO_3072
+    if (len != 128 && len != 192)
+#else
+    if (len != 128)
+#endif
+    {
+        err = MP_VAL;
+    }
+
+    r->used = len / (SP_WORD_SIZE / 8);
+
+    /* Assume the candidate is probably prime and then test until
+     * it is proven composite. */
+    while (err == 0 && isPrime == MP_NO) {
+#ifdef SHOW_GEN
+        printf(".");
+        fflush(stdout);
+#endif
+        /* generate value */
+        err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
+        if (err != 0) {
+            err = MP_VAL;
+            break;
+        }
+
+        /* munge bits */
+        ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
+        r->dp[0]              |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
+
+        /* test */
+        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+         * of a 1024-bit candidate being a false positive, when it is our
+         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+         * Using 8 because we've always used 8 */
+        sp_prime_is_prime_ex(r, 8, &isPrime, rng);
+    }
+
+    return err;
+}
+
+/* Multiply a by b and store in r: r = a * b
+ *
+ * a  SP integer to multiply.
+ * b  SP integer to multiply.
+ * r  SP integer result.
+ * returns MP_OKAY always.
+ */
+int sp_mul(sp_int* a, sp_int* b, sp_int* r)
+{
+    int err = MP_OKAY;
+    int i;
+#ifdef WOLFSSL_SMALL_STACK
+    sp_int* t = NULL;
+    sp_int* tr;
+#else
+    sp_int t[1];
+    sp_int tr[1];
+#endif
+
+    if (a->used + b->used > SP_INT_DIGITS)
+        err = MP_VAL;
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+        if (t == NULL)
+            err = MP_MEM;
+        else
+            tr = &t[1];
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        sp_init(t);
+        sp_init(tr);
+
+        for (i = 0; i < b->used; i++) {
+            _sp_mul_d(a, b->dp[i], t, i);
+            sp_add(tr, t, tr);
+        }
+        sp_copy(tr, r);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+    return err;
+}
+
+/* Square a mod m and store in r: r = (a * a) mod m
+ *
+ * a  SP integer to square.
+ * m  SP integer modulus.
+ * r  SP integer result.
+ * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails,
+ *         BAD_FUNC_ARG when a is to big and MP_OKAY otherwise.
+ */
+static int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
+{
+    int err = MP_OKAY;
+
+    if (a->used * 2 > SP_INT_DIGITS)
+        err = MP_VAL;
+
+    if (err == MP_OKAY)
+        err = sp_mul(a, a, r);
+    if (err == MP_OKAY)
+        err = sp_mod(r, m, r);
+
+    return err;
+}
+
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_KEY_GEN)
+/* Multiply a by b mod m and store in r: r = (a * b) mod m
+ *
+ * a  SP integer to multiply.
+ * b  SP integer to multiply.
+ * m  SP integer modulus.
+ * r  SP integer result.
+ * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails and
+ *         MP_OKAY otherwise.
+ */
+int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
+{
+    int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+    sp_int* t = NULL;
+#else
+    sp_int t[1];
+#endif
+
+    if (a->used + b->used > SP_INT_DIGITS)
+        err = MP_VAL;
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+        if (t == NULL) {
+            err = MP_MEM;
+        }
+    }
+#endif
+    if (err == MP_OKAY) {
+        err = sp_mul(a, b, t);
+    }
+    if (err == MP_OKAY) {
+        err = sp_mod(t, m, r);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+    return err;
+}
+#endif
+
+/* Calculate a modulo the digit d into r: r = a mod d
+ *
+ * a  SP integer to square.
+ * d  SP integer digit, modulus.
+ * r  SP integer digit, result.
+ * returns MP_VAL when d is 0 and MP_OKAY otherwise.
+ */
+static int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
+{
+    int err = MP_OKAY;
+    int i;
+    sp_int_word w = 0;
+    sp_int_digit t;
+
+    if (d == 0)
+        err = MP_VAL;
+
+    if (err == MP_OKAY) {
+        for (i = a->used - 1; i >= 0; i--) {
+            w = (w << SP_WORD_SIZE) | a->dp[i];
+            t = (sp_int_digit)(w / d);
+            w -= (sp_int_word)t * d;
+        }
+
+        *r = (sp_int_digit)w;
+    }
+
+    return err;
+}
+
+/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
+ *
+ * a  SP integer operand.
+ * b  SP integer operand.
+ * r  SP integer result.
+ * returns MP_MEM when dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
+{
+    int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+    sp_int* u = NULL;
+    sp_int* v;
+    sp_int* t;
+#else
+    sp_int u[1], v[1], t[1];
+#endif
+
+    if (sp_iszero(a))
+        sp_copy(b, r);
+    else if (sp_iszero(b))
+        sp_copy(a, r);
+    else {
+#ifdef WOLFSSL_SMALL_STACK
+        u = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+        if (u == NULL)
+            err = MP_MEM;
+        else {
+            v = &u[1];
+            t = &u[2];
+        }
+#endif
+
+        if (err == MP_OKAY) {
+            sp_init(u);
+            sp_init(v);
+            sp_init(t);
+
+            if (sp_cmp(a, b) != MP_LT) {
+                sp_copy(b, u);
+                /* First iteration - u = a, v = b */
+                if (b->used == 1) {
+                    err = sp_mod_d(a, b->dp[0], &v->dp[0]);
+                    if (err == MP_OKAY)
+                        v->used = (v->dp[0] != 0);
+                }
+                else
+                    err = sp_mod(a, b, v);
+            }
+            else {
+                sp_copy(a, u);
+                /* First iteration - u = b, v = a */
+                if (a->used == 1) {
+                    err = sp_mod_d(b, a->dp[0], &v->dp[0]);
+                    if (err == MP_OKAY)
+                        v->used = (v->dp[0] != 0);
+                }
+                else
+                    err = sp_mod(b, a, v);
+            }
+        }
+
+        if (err == MP_OKAY) {
+            while (!sp_iszero(v)) {
+                if (v->used == 1) {
+                    sp_mod_d(u, v->dp[0], &t->dp[0]);
+                    t->used = (t->dp[0] != 0);
+                }
+                else
+                    sp_mod(u, v, t);
+                sp_copy(v, u);
+                sp_copy(t, v);
+            }
+            sp_copy(u, r);
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (u != NULL)
+        XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+    return err;
+}
+
+/* Divides a by 2 and stores in r: r = a >> 1
+ *
+ * a  SP integer to divide.
+ * r  SP integer result.
+ * returns MP_OKAY always.
+ */
+static int sp_div_2(sp_int* a, sp_int* r)
+{
+    int i;
+
+    for (i = 0; i < a->used-1; i++)
+        r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
+    r->dp[i] = a->dp[i] >> 1;
+    r->used = i + 1;
+    sp_clamp(r);
+
+    return MP_OKAY;
+}
+
+
+/* Calculates the multiplicative inverse in the field.
+ *
+ * a  SP integer to invert.
+ * m  SP integer that is the modulus of the field.
+ * r  SP integer result.
+ * returns MP_VAL when a or m is 0, MP_MEM when dynamic memory allocation fails
+ *         and MP_OKAY otherwise.
+ */
+int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
+{
+    int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+    sp_int* u = NULL;
+    sp_int* v;
+    sp_int* b;
+    sp_int* c;
+#else
+    sp_int u[1], v[1], b[1], c[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+    u = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT);
+    if (u == NULL) {
+        err = MP_MEM;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+        v = &u[1];
+        b = &u[2];
+        c = &u[3];
+#endif
+        sp_init(v);
+
+        if (sp_cmp(a, m) != MP_LT) {
+            err = sp_mod(a, m, v);
+            a = v;
+        }
+    }
+
+    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1)  */
+    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
+        err = MP_VAL;
+    }
+    /* r*2*x != n*2*y + 1  */
+    if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
+        err = MP_VAL;
+    }
+
+    /* 1*1 = 0*m + 1  */
+    if ((err == MP_OKAY) && sp_isone(a)) {
+        sp_set(r, 1);
+    }
+    else if (err != MP_OKAY) {
+    }
+    else if (sp_iseven(m)) {
+        /* a^-1 mod m = m + (1 - m*(m^-1 % a)) / a
+         *            = m - (m*(m^-1 % a) - 1) / a
+         */
+        err = sp_invmod(m, a, r);
+        if (err == MP_OKAY) {
+            err = sp_mul(r, m, r);
+        }
+        if (err == MP_OKAY) {
+            sp_sub_d(r, 1, r);
+            sp_div(r, a, r, NULL);
+            sp_sub(m, r, r);
+        }
+    }
+    else {
+        if (err == MP_OKAY) {
+            sp_init(u);
+            sp_init(b);
+            sp_init(c);
+
+            sp_copy(m, u);
+            sp_copy(a, v);
+            sp_zero(b);
+            sp_set(c, 1);
+
+            while (!sp_isone(v) && !sp_iszero(u)) {
+                if (sp_iseven(u)) {
+                    sp_div_2(u, u);
+                    if (sp_isodd(b)) {
+                        sp_add(b, m, b);
+                    }
+                    sp_div_2(b, b);
+                }
+                else if (sp_iseven(v)) {
+                    sp_div_2(v, v);
+                    if (sp_isodd(c)) {
+                        sp_add(c, m, c);
+                    }
+                    sp_div_2(c, c);
+                }
+                else if (sp_cmp(u, v) != MP_LT) {
+                    sp_sub(u, v, u);
+                    if (sp_cmp(b, c) == MP_LT) {
+                        sp_add(b, m, b);
+                    }
+                    sp_sub(b, c, b);
+                }
+                else {
+                    sp_sub(v, u, v);
+                    if (sp_cmp(c, b) == MP_LT) {
+                        sp_add(c, m, c);
+                    }
+                    sp_sub(c, b, c);
+                }
+            }
+            if  (sp_iszero(u)) {
+                err = MP_VAL;
+            }
+            else {
+                sp_copy(c, r);
+            }
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (u != NULL) {
+        XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
+    }
+#endif
+
+    return err;
+}
+
+/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
+ *
+ * a  SP integer operand.
+ * b  SP integer operand.
+ * r  SP integer result.
+ * returns MP_MEM when dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
+{
+    int     err = MP_OKAY;
+#ifndef WOLFSSL_SMALL_STACK
+    sp_int  t[2];
+#else
+    sp_int  *t = NULL;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+    t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+    if (t == NULL) {
+        err = MP_MEM;
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        sp_init(&t[0]);
+        sp_init(&t[1]);
+        err = sp_gcd(a, b, &t[0]);
+        if (err == MP_OKAY) {
+            if (sp_cmp(a, b) == MP_GT) {
+                err = sp_div(a, &t[0], &t[1], NULL);
+                if (err == MP_OKAY)
+                    err = sp_mul(b, &t[1], r);
+            }
+            else {
+                err = sp_div(b, &t[0], &t[1], NULL);
+                if (err == MP_OKAY)
+                    err = sp_mul(a, &t[1], r);
+            }
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+    return err;
+}
 
+/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
+ *
+ * b  SP integer base.
+ * e  SP integer exponent.
+ * m  SP integer modulus.
+ * r  SP integer result.
+ * returns MP_VAL when m is not 1024, 2048, 1536 or 3072 bits and otherwise
+ *         MP_OKAY.
+ */
+int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
+{
+    int err = MP_OKAY;
+    int done = 0;
+    int mBits = sp_count_bits(m);
+    int bBits = sp_count_bits(b);
+    int eBits = sp_count_bits(e);
+
+    if (sp_iszero(m)) {
+        err = MP_VAL;
+    }
+    else if (sp_isone(m)) {
+        sp_set(r, 0);
+        done = 1;
+    }
+    else if (sp_iszero(e)) {
+        sp_set(r, 1);
+        done = 1;
+    }
+    else if (sp_iszero(b)) {
+        sp_set(r, 0);
+        done = 1;
+    }
+    else if (m->used * 2 > SP_INT_DIGITS) {
+        err = BAD_FUNC_ARG;
+    }
+
+    if (!done && (err == MP_OKAY)) {
+#ifndef WOLFSSL_SP_NO_2048
+        if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
+            (eBits <= 1024)) {
+            err = sp_ModExp_1024(b, e, m, r);
+            done = 1;
+        }
+        else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
+                 (eBits <= 2048)) {
+            err = sp_ModExp_2048(b, e, m, r);
+            done = 1;
+        }
+        else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+        if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
+            (eBits <= 1536)) {
+            err = sp_ModExp_1536(b, e, m, r);
+            done = 1;
+        }
+        else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
+                 (eBits <= 3072)) {
+            err = sp_ModExp_3072(b, e, m, r);
+            done = 1;
+        }
+        else
+#endif
+#ifdef WOLFSSL_SP_NO_4096
+        if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
+            (eBits <= 4096)) {
+            err = sp_ModExp_4096(b, e, m, r);
+            done = 1;
+        }
+        else
+#endif
+        {
+        }
+    }
+#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
+    if (!done && (err == MP_OKAY)) {
+        int i;
+
+    #ifdef WOLFSSL_SMALL_STACK
+        sp_int* t = NULL;
+    #else
+        sp_int t[1];
+    #endif
+
+    #ifdef WOLFSSL_SMALL_STACK
+        if (!done && (err == MP_OKAY)) {
+            t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+            if (t == NULL) {
+                err = MP_MEM;
+            }
+        }
+    #endif
+        if (!done && (err == MP_OKAY)) {
+            sp_init(t);
+
+            if (sp_cmp(b, m) != MP_LT) {
+                err = sp_mod(b, m, t);
+                if (err == MP_OKAY && sp_iszero(t)) {
+                    sp_set(r, 0);
+                    done = 1;
+                }
+            }
+            else {
+                sp_copy(b, t);
+            }
+
+            if (!done && (err == MP_OKAY)) {
+                for (i = eBits-2; err == MP_OKAY && i >= 0; i--) {
+                     err = sp_sqrmod(t, m, t);
+                     if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >>
+                                                      (i % SP_WORD_SIZE)) & 1) {
+                         err = sp_mulmod(t, b, m, t);
+                     }
+                 }
+             }
+        }
+        if (!done && (err == MP_OKAY)) {
+            sp_copy(t, r);
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        if (t != NULL) {
+            XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+        }
+    #endif
+    }
+#else
+    if (!done && (err == MP_OKAY)) {
+        err = MP_VAL;
+    }
+#endif
+
+    (void)mBits;
+    (void)bBits;
+    (void)eBits;
+
+    return err;
+}
+
+
+/* Number of entries in array of number of least significant zero bits. */
+#define SP_LNZ_CNT      16
+/* Number of bits the array checks. */
+#define SP_LNZ_BITS     4
+/* Mask to apply to check with array. */
+#define SP_LNZ_MASK     0xf
+/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
+static const int lnz[SP_LNZ_CNT] = {
+   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+/* Count the number of least significant zero bits.
+ *
+ * a  Number to check
+ * returns the count of least significant zero bits.
+ */
+static int sp_cnt_lsb(sp_int* a)
+{
+    int i, j;
+    int cnt = 0;
+    int bc = 0;
+
+    if (!sp_iszero(a)) {
+        for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
+        }
+
+        for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
+            bc = lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
+            if (bc != 4) {
+                bc += cnt + j;
+                break;
+            }
+        }
+    }
+
+    return bc;
+}
+
+/* Miller-Rabin test of "a" to the base of "b" as described in
+ * HAC pp. 139 Algorithm 4.24
+ *
+ * Sets result to 0 if definitely composite or 1 if probably prime.
+ * Randomly the chance of error is no more than 1/4 and often
+ * very much lower.
+ *
+ * a       SP integer to check.
+ * b       SP integer small prime.
+ * result  Whether a is likely prime: MP_YES or MP_NO.
+ * n1      SP integer operand.
+ * y       SP integer operand.
+ * r       SP integer operand.
+ * returns MP_VAL when a is not 1024, 2048, 1536 or 3072 and MP_OKAY otherwise.
+ */
+static int sp_prime_miller_rabin_ex(sp_int * a, sp_int * b, int *result,
+  sp_int *n1, sp_int *y, sp_int *r)
+{
+    int s, j;
+    int err = MP_OKAY;
+
+    /* default */
+    *result = MP_NO;
+
+    /* ensure b > 1 */
+    if (sp_cmp_d(b, 1) == MP_GT) {
+        /* get n1 = a - 1 */
+        sp_copy(a, n1);
+        sp_sub_d(n1, 1, n1);
+        /* set 2**s * r = n1 */
+        sp_copy(n1, r);
+
+        /* count the number of least significant bits
+         * which are zero
+         */
+        s = sp_cnt_lsb(r);
+
+        /* now divide n - 1 by 2**s */
+        sp_rshb(r, s, r);
+
+        /* compute y = b**r mod a */
+        sp_zero(y);
+
+        err = sp_exptmod(b, r, a, y);
+
+        if (err == MP_OKAY) {
+            /* probably prime until shown otherwise */
+            *result = MP_YES;
+
+            /* if y != 1 and y != n1 do */
+            if (sp_cmp_d(y, 1) != MP_EQ && sp_cmp(y, n1) != MP_EQ) {
+                j = 1;
+                /* while j <= s-1 and y != n1 */
+                while ((j <= (s - 1)) && sp_cmp(y, n1) != MP_EQ) {
+                    sp_sqrmod(y, a, y);
+
+                    /* if y == 1 then composite */
+                    if (sp_cmp_d(y, 1) == MP_EQ) {
+                        *result = MP_NO;
+                        break;
+                    }
+                    ++j;
+                }
+
+                /* if y != n1 then composite */
+                if (*result == MP_YES && sp_cmp(y, n1) != MP_EQ)
+                    *result = MP_NO;
+            }
+        }
+    }
+
+    return err;
+}
+
+/* Miller-Rabin test of "a" to the base of "b" as described in
+ * HAC pp. 139 Algorithm 4.24
+ *
+ * Sets result to 0 if definitely composite or 1 if probably prime.
+ * Randomly the chance of error is no more than 1/4 and often
+ * very much lower.
+ *
+ * a       SP integer to check.
+ * b       SP integer small prime.
+ * result  Whether a is likely prime: MP_YES or MP_NO.
+ * returns MP_MEM when dynamic memory allocation fails, MP_VAL when a is not
+ *         1024, 2048, 1536 or 3072 and MP_OKAY otherwise.
+ */
+static int sp_prime_miller_rabin(sp_int * a, sp_int * b, int *result)
+{
+    int err = MP_OKAY;
+#ifndef WOLFSSL_SMALL_STACK
+    sp_int  n1[1], y[1], r[1];
+#else
+    sp_int *n1 = NULL, *y, *r;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+    n1 = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+    if (n1 == NULL)
+        err = MP_MEM;
+    else {
+        y = &n1[1];
+        r = &n1[2];
+    }
+#endif
+
+    if (err == MP_OKAY) {
+        sp_init(n1);
+        sp_init(y);
+        sp_init(r);
+
+        err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
+
+        sp_clear(n1);
+        sp_clear(y);
+        sp_clear(r);
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (n1 != NULL)
+        XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+    return err;
+}
+
+/* Number of pre-computed primes. First n primes. */
+#define SP_PRIME_SIZE      256
+
+/* a few primes */
+static const sp_int_digit primes[SP_PRIME_SIZE] = {
+    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+
+    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+
+    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+
+    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
+};
+
+
+/* Check whether a is prime.
+ * Checks against a number of small primes and does t iterations of
+ * Miller-Rabin.
+ *
+ * a       SP integer to check.
+ * t       Number of iterations of Muller-Rabin to perform.
+ * result  MP_YES when prime.
+ *         MP_NO when not prime.
+ * returns MP_VAL when t is out of range, MP_MEM when dynamic memory allocation
+ *         fails and otherwise MP_OKAY.
+ */
+int sp_prime_is_prime(sp_int *a, int t, int* result)
+{
+    int         err = MP_OKAY;
+    int         i;
+    int         haveRes = 0;
+#ifndef WOLFSSL_SMALL_STACK
+    sp_int      b[1];
+#else
+    sp_int      *b = NULL;
+#endif
+    sp_int_digit d;
+
+    if (t <= 0 || t > SP_PRIME_SIZE) {
+        *result = MP_NO;
+        err = MP_VAL;
+    }
+
+    if (sp_isone(a)) {
+        *result = MP_NO;
+        return MP_OKAY;
+    }
+
+    if (err == MP_OKAY && a->used == 1) {
+        /* check against primes table */
+        for (i = 0; i < SP_PRIME_SIZE; i++) {
+            if (sp_cmp_d(a, primes[i]) == MP_EQ) {
+                *result = MP_YES;
+                haveRes = 1;
+                break;
+            }
+        }
+    }
+
+    if (err == MP_OKAY && !haveRes) {
+        /* do trial division */
+        for (i = 0; i < SP_PRIME_SIZE; i++) {
+            err = sp_mod_d(a, primes[i], &d);
+            if (err != MP_OKAY || d == 0) {
+                *result = MP_NO;
+                haveRes = 1;
+                break;
+            }
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY && !haveRes) {
+        b = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+        if (b == NULL)
+            err = MP_MEM;
+    }
+#endif
+
+    if (err == MP_OKAY && !haveRes) {
+        /* now do 't' miller rabins */
+        sp_init(b);
+        for (i = 0; i < t; i++) {
+            sp_set(b, primes[i]);
+            err = sp_prime_miller_rabin(a, b, result);
+            if (err != MP_OKAY || *result == MP_NO)
+                break;
+        }
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+     if (b != NULL)
+         XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+     return err;
+}
+
+/* Check whether a is prime.
+ * Checks against a number of small primes and does t iterations of
+ * Miller-Rabin.
+ *
+ * a       SP integer to check.
+ * t       Number of iterations of Muller-Rabin to perform.
+ * result  MP_YES when prime.
+ *         MP_NO when not prime.
+ * rng     Random number generator.
+ * returns MP_VAL when t is out of range, MP_MEM when dynamic memory allocation
+ *         fails and otherwise MP_OKAY.
+ */
+int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
+{
+    int err = MP_OKAY;
+    int ret = MP_YES;
+    int haveRes = 0;
+    int i;
+#ifndef WC_NO_RNG
+    #ifndef WOLFSSL_SMALL_STACK
+        sp_int b[1], c[1], n1[1], y[1], r[1];
+    #else
+        sp_int *b = NULL, *c = NULL, *n1 = NULL, *y = NULL, *r = NULL;
+    #endif
+    word32 baseSz;
+#endif
+
+    if (a == NULL || result == NULL || rng == NULL)
+        err = MP_VAL;
+
+    if (sp_isone(a)) {
+        *result = MP_NO;
+        return MP_OKAY;
+    }
+
+    if (err == MP_OKAY && a->used == 1) {
+        /* check against primes table */
+        for (i = 0; i < SP_PRIME_SIZE; i++) {
+            if (sp_cmp_d(a, primes[i]) == MP_EQ) {
+                ret = MP_YES;
+                haveRes = 1;
+                break;
+            }
+        }
+    }
+
+    if (err == MP_OKAY && !haveRes) {
+        sp_int_digit d;
+
+        /* do trial division */
+        for (i = 0; i < SP_PRIME_SIZE; i++) {
+            err = sp_mod_d(a, primes[i], &d);
+            if (err != MP_OKAY || d == 0) {
+                ret = MP_NO;
+                haveRes = 1;
+                break;
+            }
+        }
+    }
+
+#ifndef WC_NO_RNG
+    /* now do a miller rabin with up to t random numbers, this should
+     * give a (1/4)^t chance of a false prime. */
+    #ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY && !haveRes) {
+        b = (sp_int*)XMALLOC(sizeof(sp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
+        if (b == NULL) {
+            err = MP_MEM;
+        }
+        else {
+            c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4];
+        }
+    }
+    #endif
+
+    if (err == MP_OKAY && !haveRes) {
+        sp_init(b);
+        sp_init(c);
+        sp_init(n1);
+        sp_init(y);
+        sp_init(r);
+
+        err = sp_sub_d(a, 2, c);
+    }
+
+    if (err == MP_OKAY && !haveRes) {
+        baseSz = (sp_count_bits(a) + 7) / 8;
+
+        while (t > 0) {
+            err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
+            if (err != MP_OKAY)
+                break;
+            b->used = a->used;
+
+            if (sp_cmp_d(b, 2) != MP_GT || sp_cmp(b, c) != MP_LT)
+                continue;
+
+            err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
+            if (err != MP_OKAY || ret == MP_NO)
+                break;
+
+            t--;
+        }
+
+        sp_clear(n1);
+        sp_clear(y);
+        sp_clear(r);
+        sp_clear(b);
+        sp_clear(c);
+    }
+
+    #ifdef WOLFSSL_SMALL_STACK
+    if (b != NULL)
+        XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+#else
+    (void)t;
+#endif /* !WC_NO_RNG */
+
+    *result = ret;
+    return err;
+}
+
+#ifndef NO_DH
+int sp_exch(sp_int* a, sp_int* b)
+{
+    int err = MP_OKAY;
+#ifndef WOLFSSL_SMALL_STACK
+    sp_int  t[1];
+#else
+    sp_int *t = NULL;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       err = MP_MEM;
+#endif
+
+    if (err == MP_OKAY) {
+        *t = *a;
+        *a = *b;
+        *b = *t;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (t != NULL)
+        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+    return MP_OKAY;
+}
+#endif
+#endif
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
+/* Multiply a by digit n and put result into r. r = a * n
+ *
+ * a  SP integer to be multiplied.
+ * n  Number to multiply by.
+ * r  SP integer result.
+ * returns MP_OKAY always.
+ */
+int sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r)
+{
+    _sp_mul_d(a, n, r, 0);
     return MP_OKAY;
 }
 #endif
 
-#if !defined(USE_FAST_MATH)
 /* Returns the run time settings.
  *
  * returns the settings value.
@@ -658,8 +2199,6 @@
 {
     return CTC_SETTINGS;
 }
-#endif
 
-#endif
-
+#endif /* WOLFSSL_SP_MATH */
 
--- a/wolfcrypt/src/sp_x86_64.c	Sat Aug 18 22:20:43 2018 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46726 +0,0 @@
-/* sp.c
- *
- * Copyright (C) 2006-2018 wolfSSL Inc.
- *
- * This file is part of wolfSSL.
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
- */
-
-/* Implementation by Sean Parkinson. */
-
-#ifdef HAVE_CONFIG_H
-    #include <config.h>
-#endif
-
-#include <wolfssl/wolfcrypt/settings.h>
-#include <wolfssl/wolfcrypt/error-crypt.h>
-#include <wolfssl/wolfcrypt/cpuid.h>
-#ifdef NO_INLINE
-    #include <wolfssl/wolfcrypt/misc.h>
-#else
-    #define WOLFSSL_MISC_INCLUDED
-    #include <wolfcrypt/src/misc.c>
-#endif
-
-#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
-                                    defined(WOLFSSL_HAVE_SP_ECC)
-
-#ifdef RSA_LOW_MEM
-#define SP_RSA_PRIVATE_EXP_D
-
-#ifndef WOLFSSL_SP_SMALL
-#define WOLFSSL_SP_SMALL
-#endif
-#endif
-
-#include <wolfssl/wolfcrypt/sp.h>
-
-#ifdef WOLFSSL_SP_X86_64_ASM
-#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
-#ifndef WOLFSSL_SP_NO_2048
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
- * a  Byte array.
- * n  Number of bytes in array to read.
- */
-static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 56) {
-            r[j] &= 0xffffffffffffffffl;
-            s = 64 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-}
-
-/* Convert an mp_int to an array of sp_digit.
- *
- * r  A single precision integer.
- * a  A multi-precision integer.
- */
-static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
-{
-#if DIGIT_BIT == 64
-    int j;
-
-    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
-
-    for (j = a->used; j < max; j++)
-        r[j] = 0;
-#elif DIGIT_BIT > 64
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
-        r[j] &= 0xffffffffffffffffl;
-        s = 64 - s;
-        if (j + 1 >= max)
-            break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 64 <= DIGIT_BIT) {
-            s += 64;
-            r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-#else
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= ((sp_digit)a->dp[i]) << s;
-        if (s + DIGIT_BIT >= 64) {
-            r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            s = 64 - s;
-            if (s == DIGIT_BIT) {
-                r[++j] = 0;
-                s = 0;
-            }
-            else {
-                r[++j] = a->dp[i] >> s;
-                s = DIGIT_BIT - s;
-            }
-        }
-        else
-            s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
- * Fixed length number of bytes written: 256
- *
- * r  A single precision integer.
- * a  Byte array.
- */
-static void sp_2048_to_bin(sp_digit* r, byte* a)
-{
-    int i, j, s = 0, b;
-
-    j = 2048 / 8 - 1;
-    a[j] = 0;
-    for (i=0; i<32 && j>=0; i++) {
-        b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
-            break;
-        while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
-        }
-        s = 8 - (b - 64);
-        if (j >= 0)
-            a[j] = 0;
-        if (s != 0)
-            j++;
-    }
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit tmp[16];
-
-    __asm__ __volatile__ (
-        "#  A[0] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "movq	%%rax, (%[tmp])\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "#  A[0] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 8(%[tmp])\n\t"
-        "#  A[0] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 16(%[tmp])\n\t"
-        "#  A[0] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 24(%[tmp])\n\t"
-        "#  A[0] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 32(%[tmp])\n\t"
-        "#  A[0] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 40(%[tmp])\n\t"
-        "#  A[0] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 48(%[tmp])\n\t"
-        "#  A[0] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 56(%[tmp])\n\t"
-        "#  A[0] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 64(%[tmp])\n\t"
-        "#  A[0] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 72(%[tmp])\n\t"
-        "#  A[0] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 80(%[tmp])\n\t"
-        "#  A[0] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 88(%[tmp])\n\t"
-        "#  A[0] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 96(%[tmp])\n\t"
-        "#  A[0] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 104(%[tmp])\n\t"
-        "#  A[0] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 112(%[tmp])\n\t"
-        "#  A[0] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 120(%[tmp])\n\t"
-        "#  A[1] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 128(%[r])\n\t"
-        "#  A[2] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 136(%[r])\n\t"
-        "#  A[3] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 144(%[r])\n\t"
-        "#  A[4] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "#  A[5] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "#  A[6] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 168(%[r])\n\t"
-        "#  A[7] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 176(%[r])\n\t"
-        "#  A[8] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 184(%[r])\n\t"
-        "#  A[9] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 192(%[r])\n\t"
-        "#  A[10] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 200(%[r])\n\t"
-        "#  A[11] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 208(%[r])\n\t"
-        "#  A[12] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 216(%[r])\n\t"
-        "#  A[13] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 224(%[r])\n\t"
-        "#  A[14] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 232(%[r])\n\t"
-        "#  A[15] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 240(%[r])\n\t"
-        "movq	%%rcx, 248(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[16];
-
-    __asm__ __volatile__ (
-        "#  A[0] * A[0]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%rax, (%[tmp])\n\t"
-        "movq	%%rdx, %%r8\n\t"
-        "#  A[0] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 8(%[tmp])\n\t"
-        "#  A[0] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 16(%[tmp])\n\t"
-        "#  A[0] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "#  A[1] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "movq	%%rcx, 24(%[tmp])\n\t"
-        "#  A[0] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 32(%[tmp])\n\t"
-        "#  A[0] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 40(%[tmp])\n\t"
-        "#  A[0] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 48(%[tmp])\n\t"
-        "#  A[0] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 56(%[tmp])\n\t"
-        "#  A[0] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 64(%[tmp])\n\t"
-        "#  A[0] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 72(%[tmp])\n\t"
-        "#  A[0] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 80(%[tmp])\n\t"
-        "#  A[0] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 88(%[tmp])\n\t"
-        "#  A[0] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 96(%[tmp])\n\t"
-        "#  A[0] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 104(%[tmp])\n\t"
-        "#  A[0] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 112(%[tmp])\n\t"
-        "#  A[0] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 120(%[tmp])\n\t"
-        "#  A[1] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[2] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 128(%[r])\n\t"
-        "#  A[2] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[3] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 136(%[r])\n\t"
-        "#  A[3] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[4] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 144(%[r])\n\t"
-        "#  A[4] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[5] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 152(%[r])\n\t"
-        "#  A[5] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[6] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 160(%[r])\n\t"
-        "#  A[6] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[7] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 168(%[r])\n\t"
-        "#  A[7] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[8] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 176(%[r])\n\t"
-        "#  A[8] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[9] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 184(%[r])\n\t"
-        "#  A[9] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[10] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 192(%[r])\n\t"
-        "#  A[10] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[11] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 200(%[r])\n\t"
-        "#  A[11] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 208(%[r])\n\t"
-        "#  A[12] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "#  A[13] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "movq	%%rcx, 216(%[r])\n\t"
-        "#  A[13] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 224(%[r])\n\t"
-        "#  A[14] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 232(%[r])\n\t"
-        "#  A[15] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "movq	%%rcx, 240(%[r])\n\t"
-        "movq	%%r8, 248(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply a and b into r. (r = a * b)
- *
- * r   Result of multiplication.
- * a   First number to multiply.
- * b   Second number to multiply.
- */
-SP_NOINLINE static void sp_2048_mul_avx2_16(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit tmp[2*16];
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "# A[0] * B[0]\n\t"
-        "mulx	0(%[b]), %%r10, %%r11\n\t"
-        "# A[0] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "# A[0] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "movq	%%r10, 0(%[t])\n\t"
-        "movq	%%r11, 8(%[t])\n\t"
-        "movq	%%r12, 16(%[t])\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "# A[0] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "# A[0] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "# A[0] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "# A[0] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "# A[0] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "# A[0] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "# A[0] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "# A[0] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "# A[0] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "# A[0] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adcxq	%%r15, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	8(%[t]), %%r11\n\t"
-        "movq	16(%[t]), %%r12\n\t"
-        "movq	24(%[t]), %%r13\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "# A[1] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[1] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 8(%[t])\n\t"
-        "movq	%%r12, 16(%[t])\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "# A[1] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[1] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[1] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "# A[1] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[1] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[1] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[1] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[1] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	16(%[t]), %%r12\n\t"
-        "movq	24(%[t]), %%r13\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "# A[2] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[2] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 16(%[t])\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "# A[2] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[2] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[2] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "# A[2] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[2] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[2] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[2] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[2] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "adcxq	%%rcx, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	24(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	24(%[t]), %%r13\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "# A[3] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[3] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "# A[3] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[3] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[3] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "# A[3] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[3] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[3] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[3] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[3] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "# A[4] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[4] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "# A[4] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[4] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[4] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[4] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[4] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[4] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[4] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[4] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	40(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "# A[5] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[5] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "# A[5] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[5] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[5] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[5] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[5] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[5] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[5] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[5] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%rcx, %%r13\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	48(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "# A[6] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[6] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "# A[6] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[6] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[6] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[6] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[6] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[6] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[6] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[6] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	56(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "# A[7] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[7] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "# A[7] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[7] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[7] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[7] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[7] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[7] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[7] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[7] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	64(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "# A[8] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[8] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[8] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[8] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[8] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[8] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[8] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[8] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[8] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[8] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "adcxq	%%rcx, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	72(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "# A[9] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[9] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[9] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[9] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[9] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[9] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[9] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[9] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[9] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[9] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	80(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "# A[10] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[10] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[10] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[10] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[10] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[10] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[10] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[10] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[10] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[10] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	88(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "# A[11] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[11] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[11] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[11] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[11] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[11] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[11] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[11] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[11] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[11] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%rcx, %%r13\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	96(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[12] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[12] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[12] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[12] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[12] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[12] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[12] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[12] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[12] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[12] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	104(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[13] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[13] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[13] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[13] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[13] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[13] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[13] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[13] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "# A[13] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[13] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[14] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[14] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[14] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[14] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[14] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[14] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[14] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[14] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "# A[14] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[14] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "adcxq	%%rcx, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	120(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[15] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[15] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[15] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[15] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[15] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[15] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[15] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[15] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "# A[15] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[15] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        :
-        : [a] "r" (a), [b] "r" (b), [t] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx",
-          "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_sqr_avx2_16(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[32];
-
-    __asm__ __volatile__ (
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 1\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "xorq	%%r13, %%r13\n\t"
-        "xorq	%%r14, %%r14\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "# A[1] x A[0]\n\t"
-        "movq	0(%[a]), %%rdx\n\t"
-        "mulxq	8(%[a]), %%r10, %%r11\n\t"
-        "# A[2] x A[0]\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[3] x A[0]\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[4] x A[0]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[5] x A[0]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 8(%[tmp])\n\t"
-        "movq	%%r11, 16(%[tmp])\n\t"
-        "movq	%%r12, 24(%[tmp])\n\t"
-        "movq	%%r13, 32(%[tmp])\n\t"
-        "movq	%%r14, 40(%[tmp])\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "# A[6] x A[0]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[7] x A[0]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[8] x A[0]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[9] x A[0]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[10] x A[0]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 48(%[tmp])\n\t"
-        "movq	%%r10, 56(%[tmp])\n\t"
-        "movq	%%r11, 64(%[tmp])\n\t"
-        "movq	%%r12, 72(%[tmp])\n\t"
-        "movq	%%r13, 80(%[tmp])\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[11] x A[0]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[12] x A[0]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[13] x A[0]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[0]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[15] x A[0]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 88(%[tmp])\n\t"
-        "movq	%%r15, 96(%[tmp])\n\t"
-        "movq	%%r10, 104(%[tmp])\n\t"
-        "movq	%%r11, 112(%[tmp])\n\t"
-        "movq	%%r12, 120(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r13, 128(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 2\n\t"
-        "movq	24(%[tmp]), %%r13\n\t"
-        "movq	32(%[tmp]), %%r14\n\t"
-        "movq	40(%[tmp]), %%r15\n\t"
-        "movq	48(%[tmp]), %%r10\n\t"
-        "movq	56(%[tmp]), %%r11\n\t"
-        "movq	64(%[tmp]), %%r12\n\t"
-        "# A[2] x A[1]\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[3] x A[1]\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[4] x A[1]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[5] x A[1]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[6] x A[1]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 24(%[tmp])\n\t"
-        "movq	%%r14, 32(%[tmp])\n\t"
-        "movq	%%r15, 40(%[tmp])\n\t"
-        "movq	%%r10, 48(%[tmp])\n\t"
-        "movq	%%r11, 56(%[tmp])\n\t"
-        "movq	72(%[tmp]), %%r13\n\t"
-        "movq	80(%[tmp]), %%r14\n\t"
-        "movq	88(%[tmp]), %%r15\n\t"
-        "movq	96(%[tmp]), %%r10\n\t"
-        "movq	104(%[tmp]), %%r11\n\t"
-        "# A[7] x A[1]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[8] x A[1]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[9] x A[1]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[10] x A[1]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[11] x A[1]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 64(%[tmp])\n\t"
-        "movq	%%r13, 72(%[tmp])\n\t"
-        "movq	%%r14, 80(%[tmp])\n\t"
-        "movq	%%r15, 88(%[tmp])\n\t"
-        "movq	%%r10, 96(%[tmp])\n\t"
-        "movq	112(%[tmp]), %%r12\n\t"
-        "movq	120(%[tmp]), %%r13\n\t"
-        "movq	128(%[tmp]), %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "# A[12] x A[1]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[13] x A[1]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[14] x A[1]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[1]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[15] x A[2]\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 104(%[tmp])\n\t"
-        "movq	%%r12, 112(%[tmp])\n\t"
-        "movq	%%r13, 120(%[tmp])\n\t"
-        "movq	%%r14, 128(%[tmp])\n\t"
-        "movq	%%r15, 136(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r10\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r10, 144(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 3\n\t"
-        "movq	40(%[tmp]), %%r10\n\t"
-        "movq	48(%[tmp]), %%r11\n\t"
-        "movq	56(%[tmp]), %%r12\n\t"
-        "movq	64(%[tmp]), %%r13\n\t"
-        "movq	72(%[tmp]), %%r14\n\t"
-        "movq	80(%[tmp]), %%r15\n\t"
-        "# A[3] x A[2]\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[4] x A[2]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[5] x A[2]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[6] x A[2]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[7] x A[2]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 40(%[tmp])\n\t"
-        "movq	%%r11, 48(%[tmp])\n\t"
-        "movq	%%r12, 56(%[tmp])\n\t"
-        "movq	%%r13, 64(%[tmp])\n\t"
-        "movq	%%r14, 72(%[tmp])\n\t"
-        "movq	88(%[tmp]), %%r10\n\t"
-        "movq	96(%[tmp]), %%r11\n\t"
-        "movq	104(%[tmp]), %%r12\n\t"
-        "movq	112(%[tmp]), %%r13\n\t"
-        "movq	120(%[tmp]), %%r14\n\t"
-        "# A[8] x A[2]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[9] x A[2]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[10] x A[2]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[11] x A[2]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[12] x A[2]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 80(%[tmp])\n\t"
-        "movq	%%r10, 88(%[tmp])\n\t"
-        "movq	%%r11, 96(%[tmp])\n\t"
-        "movq	%%r12, 104(%[tmp])\n\t"
-        "movq	%%r13, 112(%[tmp])\n\t"
-        "movq	128(%[tmp]), %%r15\n\t"
-        "movq	136(%[tmp]), %%r10\n\t"
-        "movq	144(%[tmp]), %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[13] x A[2]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[14] x A[2]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[14] x A[3]\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[4]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[14] x A[5]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 120(%[tmp])\n\t"
-        "movq	%%r15, 128(%[tmp])\n\t"
-        "movq	%%r10, 136(%[tmp])\n\t"
-        "movq	%%r11, 144(%[tmp])\n\t"
-        "movq	%%r12, 152(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r13\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r13, 160(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 4\n\t"
-        "movq	56(%[tmp]), %%r13\n\t"
-        "movq	64(%[tmp]), %%r14\n\t"
-        "movq	72(%[tmp]), %%r15\n\t"
-        "movq	80(%[tmp]), %%r10\n\t"
-        "movq	88(%[tmp]), %%r11\n\t"
-        "movq	96(%[tmp]), %%r12\n\t"
-        "# A[4] x A[3]\n\t"
-        "movq	24(%[a]), %%rdx\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[5] x A[3]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[6] x A[3]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[7] x A[3]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[8] x A[3]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 56(%[tmp])\n\t"
-        "movq	%%r14, 64(%[tmp])\n\t"
-        "movq	%%r15, 72(%[tmp])\n\t"
-        "movq	%%r10, 80(%[tmp])\n\t"
-        "movq	%%r11, 88(%[tmp])\n\t"
-        "movq	104(%[tmp]), %%r13\n\t"
-        "movq	112(%[tmp]), %%r14\n\t"
-        "movq	120(%[tmp]), %%r15\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "# A[9] x A[3]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[10] x A[3]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[11] x A[3]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[12] x A[3]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[13] x A[3]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 96(%[tmp])\n\t"
-        "movq	%%r13, 104(%[tmp])\n\t"
-        "movq	%%r14, 112(%[tmp])\n\t"
-        "movq	%%r15, 120(%[tmp])\n\t"
-        "movq	%%r10, 128(%[tmp])\n\t"
-        "movq	144(%[tmp]), %%r12\n\t"
-        "movq	152(%[tmp]), %%r13\n\t"
-        "movq	160(%[tmp]), %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "# A[13] x A[4]\n\t"
-        "movq	104(%[a]), %%rdx\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[13] x A[5]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[13] x A[6]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[13] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[13] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 136(%[tmp])\n\t"
-        "movq	%%r12, 144(%[tmp])\n\t"
-        "movq	%%r13, 152(%[tmp])\n\t"
-        "movq	%%r14, 160(%[tmp])\n\t"
-        "movq	%%r15, 168(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r10\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 5\n\t"
-        "movq	72(%[tmp]), %%r10\n\t"
-        "movq	80(%[tmp]), %%r11\n\t"
-        "movq	88(%[tmp]), %%r12\n\t"
-        "movq	96(%[tmp]), %%r13\n\t"
-        "movq	104(%[tmp]), %%r14\n\t"
-        "movq	112(%[tmp]), %%r15\n\t"
-        "# A[5] x A[4]\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[6] x A[4]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[7] x A[4]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[8] x A[4]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[9] x A[4]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 72(%[tmp])\n\t"
-        "movq	%%r11, 80(%[tmp])\n\t"
-        "movq	%%r12, 88(%[tmp])\n\t"
-        "movq	%%r13, 96(%[tmp])\n\t"
-        "movq	%%r14, 104(%[tmp])\n\t"
-        "movq	120(%[tmp]), %%r10\n\t"
-        "movq	128(%[tmp]), %%r11\n\t"
-        "movq	136(%[tmp]), %%r12\n\t"
-        "movq	144(%[tmp]), %%r13\n\t"
-        "movq	152(%[tmp]), %%r14\n\t"
-        "# A[10] x A[4]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[11] x A[4]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[12] x A[4]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[12] x A[5]\n\t"
-        "movq	96(%[a]), %%rdx\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[12] x A[6]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 112(%[tmp])\n\t"
-        "movq	%%r10, 120(%[tmp])\n\t"
-        "movq	%%r11, 128(%[tmp])\n\t"
-        "movq	%%r12, 136(%[tmp])\n\t"
-        "movq	%%r13, 144(%[tmp])\n\t"
-        "movq	160(%[tmp]), %%r15\n\t"
-        "movq	168(%[tmp]), %%r10\n\t"
-        "movq	176(%[tmp]), %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[12] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[12] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[12] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[12] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[12] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 152(%[tmp])\n\t"
-        "movq	%%r15, 160(%[tmp])\n\t"
-        "movq	%%r10, 168(%[tmp])\n\t"
-        "movq	%%r11, 176(%[tmp])\n\t"
-        "movq	%%r12, 184(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r13\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r13, 192(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 6\n\t"
-        "movq	88(%[tmp]), %%r13\n\t"
-        "movq	96(%[tmp]), %%r14\n\t"
-        "movq	104(%[tmp]), %%r15\n\t"
-        "movq	112(%[tmp]), %%r10\n\t"
-        "movq	120(%[tmp]), %%r11\n\t"
-        "movq	128(%[tmp]), %%r12\n\t"
-        "# A[6] x A[5]\n\t"
-        "movq	40(%[a]), %%rdx\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[7] x A[5]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[8] x A[5]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[9] x A[5]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[10] x A[5]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 88(%[tmp])\n\t"
-        "movq	%%r14, 96(%[tmp])\n\t"
-        "movq	%%r15, 104(%[tmp])\n\t"
-        "movq	%%r10, 112(%[tmp])\n\t"
-        "movq	%%r11, 120(%[tmp])\n\t"
-        "movq	136(%[tmp]), %%r13\n\t"
-        "movq	144(%[tmp]), %%r14\n\t"
-        "movq	152(%[tmp]), %%r15\n\t"
-        "movq	160(%[tmp]), %%r10\n\t"
-        "movq	168(%[tmp]), %%r11\n\t"
-        "# A[11] x A[5]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[11] x A[6]\n\t"
-        "movq	88(%[a]), %%rdx\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[11] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[11] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[11] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 128(%[tmp])\n\t"
-        "movq	%%r13, 136(%[tmp])\n\t"
-        "movq	%%r14, 144(%[tmp])\n\t"
-        "movq	%%r15, 152(%[tmp])\n\t"
-        "movq	%%r10, 160(%[tmp])\n\t"
-        "movq	176(%[tmp]), %%r12\n\t"
-        "movq	184(%[tmp]), %%r13\n\t"
-        "movq	192(%[tmp]), %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "# A[11] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[13] x A[9]\n\t"
-        "movq	104(%[a]), %%rdx\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[13] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[13] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[13] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 168(%[tmp])\n\t"
-        "movq	%%r12, 176(%[tmp])\n\t"
-        "movq	%%r13, 184(%[tmp])\n\t"
-        "movq	%%r14, 192(%[tmp])\n\t"
-        "movq	%%r15, 200(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r10\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r10, 208(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 7\n\t"
-        "movq	104(%[tmp]), %%r10\n\t"
-        "movq	112(%[tmp]), %%r11\n\t"
-        "movq	120(%[tmp]), %%r12\n\t"
-        "movq	128(%[tmp]), %%r13\n\t"
-        "movq	136(%[tmp]), %%r14\n\t"
-        "movq	144(%[tmp]), %%r15\n\t"
-        "# A[7] x A[6]\n\t"
-        "movq	48(%[a]), %%rdx\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[8] x A[6]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[9] x A[6]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[10] x A[6]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[10] x A[7]\n\t"
-        "movq	80(%[a]), %%rdx\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 104(%[tmp])\n\t"
-        "movq	%%r11, 112(%[tmp])\n\t"
-        "movq	%%r12, 120(%[tmp])\n\t"
-        "movq	%%r13, 128(%[tmp])\n\t"
-        "movq	%%r14, 136(%[tmp])\n\t"
-        "movq	152(%[tmp]), %%r10\n\t"
-        "movq	160(%[tmp]), %%r11\n\t"
-        "movq	168(%[tmp]), %%r12\n\t"
-        "movq	176(%[tmp]), %%r13\n\t"
-        "movq	184(%[tmp]), %%r14\n\t"
-        "# A[10] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[10] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[6]\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[14] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[14] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 144(%[tmp])\n\t"
-        "movq	%%r10, 152(%[tmp])\n\t"
-        "movq	%%r11, 160(%[tmp])\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	192(%[tmp]), %%r15\n\t"
-        "movq	200(%[tmp]), %%r10\n\t"
-        "movq	208(%[tmp]), %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[14] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[14] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[14] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[14] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "movq	%%r10, 200(%[tmp])\n\t"
-        "movq	%%r11, 208(%[tmp])\n\t"
-        "movq	%%r12, 216(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r13\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r13, 224(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 8\n\t"
-        "movq	120(%[tmp]), %%r13\n\t"
-        "movq	128(%[tmp]), %%r14\n\t"
-        "movq	136(%[tmp]), %%r15\n\t"
-        "movq	144(%[tmp]), %%r10\n\t"
-        "movq	152(%[tmp]), %%r11\n\t"
-        "movq	160(%[tmp]), %%r12\n\t"
-        "# A[8] x A[7]\n\t"
-        "movq	56(%[a]), %%rdx\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[9] x A[7]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[9] x A[8]\n\t"
-        "movq	64(%[a]), %%rdx\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[15] x A[3]\n\t"
-        "movq	120(%[a]), %%rdx\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[15] x A[4]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 120(%[tmp])\n\t"
-        "movq	%%r14, 128(%[tmp])\n\t"
-        "movq	%%r15, 136(%[tmp])\n\t"
-        "movq	%%r10, 144(%[tmp])\n\t"
-        "movq	%%r11, 152(%[tmp])\n\t"
-        "movq	168(%[tmp]), %%r13\n\t"
-        "movq	176(%[tmp]), %%r14\n\t"
-        "movq	184(%[tmp]), %%r15\n\t"
-        "movq	192(%[tmp]), %%r10\n\t"
-        "movq	200(%[tmp]), %%r11\n\t"
-        "# A[15] x A[5]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[15] x A[6]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[15] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[15] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 160(%[tmp])\n\t"
-        "movq	%%r13, 168(%[tmp])\n\t"
-        "movq	%%r14, 176(%[tmp])\n\t"
-        "movq	%%r15, 184(%[tmp])\n\t"
-        "movq	%%r10, 192(%[tmp])\n\t"
-        "movq	208(%[tmp]), %%r12\n\t"
-        "movq	216(%[tmp]), %%r13\n\t"
-        "movq	224(%[tmp]), %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "# A[15] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[15] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[15] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[15] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 200(%[tmp])\n\t"
-        "movq	%%r12, 208(%[tmp])\n\t"
-        "movq	%%r13, 216(%[tmp])\n\t"
-        "movq	%%r14, 224(%[tmp])\n\t"
-        "movq	%%r15, 232(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r10\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r10, 240(%[tmp])\n\t"
-        "movq	%%r9, 248(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Double and Add in A[i] x A[i]\n\t"
-        "movq	8(%[tmp]), %%r11\n\t"
-        "# A[0] x A[0]\n\t"
-        "movq	0(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "movq	%%rax, 0(%[tmp])\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r11, 8(%[tmp])\n\t"
-        "movq	16(%[tmp]), %%r10\n\t"
-        "movq	24(%[tmp]), %%r11\n\t"
-        "# A[1] x A[1]\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 16(%[tmp])\n\t"
-        "movq	%%r11, 24(%[tmp])\n\t"
-        "movq	32(%[tmp]), %%r10\n\t"
-        "movq	40(%[tmp]), %%r11\n\t"
-        "# A[2] x A[2]\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 32(%[tmp])\n\t"
-        "movq	%%r11, 40(%[tmp])\n\t"
-        "movq	48(%[tmp]), %%r10\n\t"
-        "movq	56(%[tmp]), %%r11\n\t"
-        "# A[3] x A[3]\n\t"
-        "movq	24(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 48(%[tmp])\n\t"
-        "movq	%%r11, 56(%[tmp])\n\t"
-        "movq	64(%[tmp]), %%r10\n\t"
-        "movq	72(%[tmp]), %%r11\n\t"
-        "# A[4] x A[4]\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 64(%[tmp])\n\t"
-        "movq	%%r11, 72(%[tmp])\n\t"
-        "movq	80(%[tmp]), %%r10\n\t"
-        "movq	88(%[tmp]), %%r11\n\t"
-        "# A[5] x A[5]\n\t"
-        "movq	40(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 80(%[tmp])\n\t"
-        "movq	%%r11, 88(%[tmp])\n\t"
-        "movq	96(%[tmp]), %%r10\n\t"
-        "movq	104(%[tmp]), %%r11\n\t"
-        "# A[6] x A[6]\n\t"
-        "movq	48(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 96(%[tmp])\n\t"
-        "movq	%%r11, 104(%[tmp])\n\t"
-        "movq	112(%[tmp]), %%r10\n\t"
-        "movq	120(%[tmp]), %%r11\n\t"
-        "# A[7] x A[7]\n\t"
-        "movq	56(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 112(%[tmp])\n\t"
-        "movq	%%r11, 120(%[tmp])\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "# A[8] x A[8]\n\t"
-        "movq	64(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 128(%[r])\n\t"
-        "movq	%%r11, 136(%[r])\n\t"
-        "movq	144(%[tmp]), %%r10\n\t"
-        "movq	152(%[tmp]), %%r11\n\t"
-        "# A[9] x A[9]\n\t"
-        "movq	72(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 144(%[r])\n\t"
-        "movq	%%r11, 152(%[r])\n\t"
-        "movq	160(%[tmp]), %%r10\n\t"
-        "movq	168(%[tmp]), %%r11\n\t"
-        "# A[10] x A[10]\n\t"
-        "movq	80(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 160(%[r])\n\t"
-        "movq	%%r11, 168(%[r])\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "# A[11] x A[11]\n\t"
-        "movq	88(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 176(%[r])\n\t"
-        "movq	%%r11, 184(%[r])\n\t"
-        "movq	192(%[tmp]), %%r10\n\t"
-        "movq	200(%[tmp]), %%r11\n\t"
-        "# A[12] x A[12]\n\t"
-        "movq	96(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 192(%[r])\n\t"
-        "movq	%%r11, 200(%[r])\n\t"
-        "movq	208(%[tmp]), %%r10\n\t"
-        "movq	216(%[tmp]), %%r11\n\t"
-        "# A[13] x A[13]\n\t"
-        "movq	104(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 208(%[r])\n\t"
-        "movq	%%r11, 216(%[r])\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "movq	232(%[tmp]), %%r11\n\t"
-        "# A[14] x A[14]\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 224(%[r])\n\t"
-        "movq	%%r11, 232(%[r])\n\t"
-        "movq	240(%[tmp]), %%r10\n\t"
-        "movq	248(%[tmp]), %%r11\n\t"
-        "# A[15] x A[15]\n\t"
-        "movq	120(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 240(%[r])\n\t"
-        "movq	%%r11, 248(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp)/2);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "addq	(%[b]), %%rax\n\t"
-        "movq	%%rax, (%[r])\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "adcq	8(%[b]), %%rax\n\t"
-        "movq	%%rax, 8(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "adcq	16(%[b]), %%rax\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "adcq	24(%[b]), %%rax\n\t"
-        "movq	%%rax, 24(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "adcq	32(%[b]), %%rax\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "adcq	40(%[b]), %%rax\n\t"
-        "movq	%%rax, 40(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "adcq	48(%[b]), %%rax\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "adcq	56(%[b]), %%rax\n\t"
-        "movq	%%rax, 56(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "adcq	64(%[b]), %%rax\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "adcq	72(%[b]), %%rax\n\t"
-        "movq	%%rax, 72(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "adcq	80(%[b]), %%rax\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "adcq	88(%[b]), %%rax\n\t"
-        "movq	%%rax, 88(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "adcq	96(%[b]), %%rax\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "adcq	104(%[b]), %%rax\n\t"
-        "movq	%%rax, 104(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "adcq	112(%[b]), %%rax\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "adcq	120(%[b]), %%rax\n\t"
-        "movq	%%rax, 120(%[r])\n\t"
-        "adcq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax"
-    );
-
-    return c;
-}
-
-/* Sub b from a into a. (a -= b)
- *
- * a  A single precision integer and result.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%r8\n\t"
-        "movq	8(%[a]), %%r9\n\t"
-        "movq	0(%[b]), %%rdx\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "subq	%%rdx, %%r8\n\t"
-        "movq	16(%[b]), %%rdx\n\t"
-        "movq	%%r8, 0(%[a])\n\t"
-        "movq	16(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "movq	%%r9, 8(%[a])\n\t"
-        "movq	24(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	32(%[b]), %%rdx\n\t"
-        "movq	%%r8, 16(%[a])\n\t"
-        "movq	32(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "movq	%%r9, 24(%[a])\n\t"
-        "movq	40(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	48(%[b]), %%rdx\n\t"
-        "movq	%%r8, 32(%[a])\n\t"
-        "movq	48(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "movq	%%r9, 40(%[a])\n\t"
-        "movq	56(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	64(%[b]), %%rdx\n\t"
-        "movq	%%r8, 48(%[a])\n\t"
-        "movq	64(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "movq	%%r9, 56(%[a])\n\t"
-        "movq	72(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	80(%[b]), %%rdx\n\t"
-        "movq	%%r8, 64(%[a])\n\t"
-        "movq	80(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "movq	%%r9, 72(%[a])\n\t"
-        "movq	88(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	96(%[b]), %%rdx\n\t"
-        "movq	%%r8, 80(%[a])\n\t"
-        "movq	96(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "movq	%%r9, 88(%[a])\n\t"
-        "movq	104(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	112(%[b]), %%rdx\n\t"
-        "movq	%%r8, 96(%[a])\n\t"
-        "movq	112(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "movq	%%r9, 104(%[a])\n\t"
-        "movq	120(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	128(%[b]), %%rdx\n\t"
-        "movq	%%r8, 112(%[a])\n\t"
-        "movq	128(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	136(%[b]), %%rcx\n\t"
-        "movq	%%r9, 120(%[a])\n\t"
-        "movq	136(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	144(%[b]), %%rdx\n\t"
-        "movq	%%r8, 128(%[a])\n\t"
-        "movq	144(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	152(%[b]), %%rcx\n\t"
-        "movq	%%r9, 136(%[a])\n\t"
-        "movq	152(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	160(%[b]), %%rdx\n\t"
-        "movq	%%r8, 144(%[a])\n\t"
-        "movq	160(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	168(%[b]), %%rcx\n\t"
-        "movq	%%r9, 152(%[a])\n\t"
-        "movq	168(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	176(%[b]), %%rdx\n\t"
-        "movq	%%r8, 160(%[a])\n\t"
-        "movq	176(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	184(%[b]), %%rcx\n\t"
-        "movq	%%r9, 168(%[a])\n\t"
-        "movq	184(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	192(%[b]), %%rdx\n\t"
-        "movq	%%r8, 176(%[a])\n\t"
-        "movq	192(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	200(%[b]), %%rcx\n\t"
-        "movq	%%r9, 184(%[a])\n\t"
-        "movq	200(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	208(%[b]), %%rdx\n\t"
-        "movq	%%r8, 192(%[a])\n\t"
-        "movq	208(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	216(%[b]), %%rcx\n\t"
-        "movq	%%r9, 200(%[a])\n\t"
-        "movq	216(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	224(%[b]), %%rdx\n\t"
-        "movq	%%r8, 208(%[a])\n\t"
-        "movq	224(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	232(%[b]), %%rcx\n\t"
-        "movq	%%r9, 216(%[a])\n\t"
-        "movq	232(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	240(%[b]), %%rdx\n\t"
-        "movq	%%r8, 224(%[a])\n\t"
-        "movq	240(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	248(%[b]), %%rcx\n\t"
-        "movq	%%r9, 232(%[a])\n\t"
-        "movq	248(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	%%r8, 240(%[a])\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	%%r9, 248(%[a])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
-        : "memory", "rdx", "rcx", "r8", "r9"
-    );
-
-    return c;
-}
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "addq	(%[b]), %%rax\n\t"
-        "movq	%%rax, (%[r])\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "adcq	8(%[b]), %%rax\n\t"
-        "movq	%%rax, 8(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "adcq	16(%[b]), %%rax\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "adcq	24(%[b]), %%rax\n\t"
-        "movq	%%rax, 24(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "adcq	32(%[b]), %%rax\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "adcq	40(%[b]), %%rax\n\t"
-        "movq	%%rax, 40(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "adcq	48(%[b]), %%rax\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "adcq	56(%[b]), %%rax\n\t"
-        "movq	%%rax, 56(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "adcq	64(%[b]), %%rax\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "adcq	72(%[b]), %%rax\n\t"
-        "movq	%%rax, 72(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "adcq	80(%[b]), %%rax\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "adcq	88(%[b]), %%rax\n\t"
-        "movq	%%rax, 88(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "adcq	96(%[b]), %%rax\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "adcq	104(%[b]), %%rax\n\t"
-        "movq	%%rax, 104(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "adcq	112(%[b]), %%rax\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "adcq	120(%[b]), %%rax\n\t"
-        "movq	%%rax, 120(%[r])\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "adcq	128(%[b]), %%rax\n\t"
-        "movq	%%rax, 128(%[r])\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "adcq	136(%[b]), %%rax\n\t"
-        "movq	%%rax, 136(%[r])\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "adcq	144(%[b]), %%rax\n\t"
-        "movq	%%rax, 144(%[r])\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "adcq	152(%[b]), %%rax\n\t"
-        "movq	%%rax, 152(%[r])\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "adcq	160(%[b]), %%rax\n\t"
-        "movq	%%rax, 160(%[r])\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "adcq	168(%[b]), %%rax\n\t"
-        "movq	%%rax, 168(%[r])\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "adcq	176(%[b]), %%rax\n\t"
-        "movq	%%rax, 176(%[r])\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "adcq	184(%[b]), %%rax\n\t"
-        "movq	%%rax, 184(%[r])\n\t"
-        "movq	192(%[a]), %%rax\n\t"
-        "adcq	192(%[b]), %%rax\n\t"
-        "movq	%%rax, 192(%[r])\n\t"
-        "movq	200(%[a]), %%rax\n\t"
-        "adcq	200(%[b]), %%rax\n\t"
-        "movq	%%rax, 200(%[r])\n\t"
-        "movq	208(%[a]), %%rax\n\t"
-        "adcq	208(%[b]), %%rax\n\t"
-        "movq	%%rax, 208(%[r])\n\t"
-        "movq	216(%[a]), %%rax\n\t"
-        "adcq	216(%[b]), %%rax\n\t"
-        "movq	%%rax, 216(%[r])\n\t"
-        "movq	224(%[a]), %%rax\n\t"
-        "adcq	224(%[b]), %%rax\n\t"
-        "movq	%%rax, 224(%[r])\n\t"
-        "movq	232(%[a]), %%rax\n\t"
-        "adcq	232(%[b]), %%rax\n\t"
-        "movq	%%rax, 232(%[r])\n\t"
-        "movq	240(%[a]), %%rax\n\t"
-        "adcq	240(%[b]), %%rax\n\t"
-        "movq	%%rax, 240(%[r])\n\t"
-        "movq	248(%[a]), %%rax\n\t"
-        "adcq	248(%[b]), %%rax\n\t"
-        "movq	%%rax, 248(%[r])\n\t"
-        "adcq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax"
-    );
-
-    return c;
-}
-
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_2048_mask_16(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<16; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 16; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-#endif
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[32];
-    sp_digit a1[16];
-    sp_digit b1[16];
-    sp_digit z2[32];
-    sp_digit u, ca, cb;
-
-    ca = sp_2048_add_16(a1, a, &a[16]);
-    cb = sp_2048_add_16(b1, b, &b[16]);
-    u  = ca & cb;
-    sp_2048_mul_16(z1, a1, b1);
-    sp_2048_mul_16(z2, &a[16], &b[16]);
-    sp_2048_mul_16(z0, a, b);
-    sp_2048_mask_16(r + 32, a1, 0 - cb);
-    sp_2048_mask_16(b1, b1, 0 - ca);
-    u += sp_2048_add_16(r + 32, r + 32, b1);
-    u += sp_2048_sub_in_place_32(z1, z2);
-    u += sp_2048_sub_in_place_32(z1, z0);
-    u += sp_2048_add_32(r + 16, r + 16, z1);
-    r[48] = u;
-    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z2[32];
-    sp_digit z1[32];
-    sp_digit a1[16];
-    sp_digit u;
-
-    u = sp_2048_add_16(a1, a, &a[16]);
-    sp_2048_sqr_16(z1, a1);
-    sp_2048_sqr_16(z2, &a[16]);
-    sp_2048_sqr_16(z0, a);
-    sp_2048_mask_16(r + 32, a1, 0 - u);
-    u += sp_2048_add_16(r + 32, r + 32, r + 32);
-    u += sp_2048_sub_in_place_32(z1, z2);
-    u += sp_2048_sub_in_place_32(z1, z0);
-    u += sp_2048_add_32(r + 16, r + 16, z1);
-    r[48] = u;
-    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_mul_avx2_32(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[32];
-    sp_digit a1[16];
-    sp_digit b1[16];
-    sp_digit z2[32];
-    sp_digit u, ca, cb;
-
-    ca = sp_2048_add_16(a1, a, &a[16]);
-    cb = sp_2048_add_16(b1, b, &b[16]);
-    u  = ca & cb;
-    sp_2048_mul_avx2_16(z1, a1, b1);
-    sp_2048_mul_avx2_16(z2, &a[16], &b[16]);
-    sp_2048_mul_avx2_16(z0, a, b);
-    sp_2048_mask_16(r + 32, a1, 0 - cb);
-    sp_2048_mask_16(b1, b1, 0 - ca);
-    u += sp_2048_add_16(r + 32, r + 32, b1);
-    u += sp_2048_sub_in_place_32(z1, z2);
-    u += sp_2048_sub_in_place_32(z1, z0);
-    u += sp_2048_add_32(r + 16, r + 16, z1);
-    r[48] = u;
-    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_2048_sqr_avx2_32(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z2[32];
-    sp_digit z1[32];
-    sp_digit a1[16];
-    sp_digit u;
-
-    u = sp_2048_add_16(a1, a, &a[16]);
-    sp_2048_sqr_avx2_16(z1, a1);
-    sp_2048_sqr_avx2_16(z2, &a[16]);
-    sp_2048_sqr_avx2_16(z0, a);
-    sp_2048_mask_16(r + 32, a1, 0 - u);
-    u += sp_2048_add_16(r + 32, r + 32, r + 32);
-    u += sp_2048_sub_in_place_32(z1, z2);
-    u += sp_2048_sub_in_place_32(z1, z0);
-    u += sp_2048_add_32(r + 16, r + 16, z1);
-    r[48] = u;
-    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
-    sp_2048_add_32(r + 32, r + 32, z2);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
-/* Caclulate the bottom digit of -1/a mod 2^n.
- *
- * a    A single precision number.
- * rho  Bottom word of inverse.
- */
-static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
-{
-    sp_digit x, b;
-
-    b = a[0];
-    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
-
-    /* rho = -1/m mod b */
-    *rho = -x;
-}
-
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-/* Sub b from a into a. (a -= b)
- *
- * a  A single precision integer and result.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%r8\n\t"
-        "movq	8(%[a]), %%r9\n\t"
-        "movq	0(%[b]), %%rdx\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "subq	%%rdx, %%r8\n\t"
-        "movq	16(%[b]), %%rdx\n\t"
-        "movq	%%r8, 0(%[a])\n\t"
-        "movq	16(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "movq	%%r9, 8(%[a])\n\t"
-        "movq	24(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	32(%[b]), %%rdx\n\t"
-        "movq	%%r8, 16(%[a])\n\t"
-        "movq	32(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "movq	%%r9, 24(%[a])\n\t"
-        "movq	40(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	48(%[b]), %%rdx\n\t"
-        "movq	%%r8, 32(%[a])\n\t"
-        "movq	48(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "movq	%%r9, 40(%[a])\n\t"
-        "movq	56(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	64(%[b]), %%rdx\n\t"
-        "movq	%%r8, 48(%[a])\n\t"
-        "movq	64(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "movq	%%r9, 56(%[a])\n\t"
-        "movq	72(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	80(%[b]), %%rdx\n\t"
-        "movq	%%r8, 64(%[a])\n\t"
-        "movq	80(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "movq	%%r9, 72(%[a])\n\t"
-        "movq	88(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	96(%[b]), %%rdx\n\t"
-        "movq	%%r8, 80(%[a])\n\t"
-        "movq	96(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "movq	%%r9, 88(%[a])\n\t"
-        "movq	104(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	112(%[b]), %%rdx\n\t"
-        "movq	%%r8, 96(%[a])\n\t"
-        "movq	112(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "movq	%%r9, 104(%[a])\n\t"
-        "movq	120(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	%%r8, 112(%[a])\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	%%r9, 120(%[a])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
-        : "memory", "rdx", "rcx", "r8", "r9"
-    );
-
-    return c;
-}
-
-/* r = 2^n mod m where n is the number of bits to reduce by.
- * Given m must be 2048 bits, just need to subtract.
- *
- * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_16(sp_digit* r, sp_digit* m)
-{
-    XMEMSET(r, 0, sizeof(sp_digit) * 16);
-
-    /* r = 2^n mod m */
-    sp_2048_sub_in_place_16(r, m);
-}
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not copying.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static sp_digit sp_2048_cond_sub_16(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit m)
-{
-    sp_digit t[16];
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[b]), %%rax\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 0(%[t])\n\t"
-        "movq	%%rcx, 8(%[t])\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 16(%[t])\n\t"
-        "movq	%%rcx, 24(%[t])\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 32(%[t])\n\t"
-        "movq	%%rcx, 40(%[t])\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 48(%[t])\n\t"
-        "movq	%%rcx, 56(%[t])\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 64(%[t])\n\t"
-        "movq	%%rcx, 72(%[t])\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 80(%[t])\n\t"
-        "movq	%%rcx, 88(%[t])\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 96(%[t])\n\t"
-        "movq	%%rcx, 104(%[t])\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 112(%[t])\n\t"
-        "movq	%%rcx, 120(%[t])\n\t"
-        "movq	(%[a]), %%rax\n\t"
-        "movq	(%[t]), %%rdx\n\t"
-        "subq	%%rdx,%%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	8(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "movq	16(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	24(%[a]), %%rcx\n\t"
-        "movq	24(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "movq	32(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 24(%[r])\n\t"
-        "movq	40(%[a]), %%rcx\n\t"
-        "movq	40(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "movq	48(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 40(%[r])\n\t"
-        "movq	56(%[a]), %%rcx\n\t"
-        "movq	56(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "movq	64(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "movq	72(%[a]), %%rcx\n\t"
-        "movq	72(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "movq	80(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 72(%[r])\n\t"
-        "movq	88(%[a]), %%rcx\n\t"
-        "movq	88(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "movq	96(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 88(%[r])\n\t"
-        "movq	104(%[a]), %%rcx\n\t"
-        "movq	104(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "movq	112(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "movq	120(%[a]), %%rcx\n\t"
-        "movq	120(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	%%rcx, 120(%[r])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m), [t] "r" (t)
-        : "memory", "rax", "rcx", "rdx"
-    );
-
-    return c;
-}
-
-/* Reduce the number back to 2048 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "movq	8(%[a]), %%r13\n\t"
-        "\nL_mont_loop_16:\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "imulq	%[mp], %%r10\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	0(%[m])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	8(%[m])\n\t"
-        "movq	%%r13, %%r12\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r12\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	16(%[m])\n\t"
-        "movq	16(%[a]), %%r13\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r13\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[m])\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	32(%[m])\n\t"
-        "movq	32(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 32(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	40(%[m])\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	48(%[m])\n\t"
-        "movq	48(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 48(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	56(%[m])\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	64(%[m])\n\t"
-        "movq	64(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 64(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[m])\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	80(%[m])\n\t"
-        "movq	80(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 80(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	88(%[m])\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	96(%[m])\n\t"
-        "movq	96(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 96(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	104(%[m])\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	112(%[m])\n\t"
-        "movq	112(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 112(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "mulq	120(%[m])\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%[ca], %%rdx\n\t"
-        "movq	$0, %[ca]\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "addq	%%r9, %%r11\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "adcq	%%rdx, 128(%[a])\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$8, %%rcx\n\t"
-        "cmpq	$128, %%rcx\n\t"
-        "jl	L_mont_loop_16\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        "movq	%%r13, 8(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13"
-    );
-
-    sp_2048_cond_sub_16(a - 16, a, m, (sp_digit)0 - ca);
-}
-
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_mul_16(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_2048_mul_16(r, a, b);
-    sp_2048_mont_reduce_16(r, m, mp);
-}
-
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_sqr_16(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_2048_sqr_16(r, a);
-    sp_2048_mont_reduce_16(r, m, mp);
-}
-
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	(%[a])\n\t"
-        "movq	%%rax, %%rbx\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[2] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[3] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 24(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 32(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[5] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 40(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[6] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 48(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[7] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[8] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[9] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 72(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 80(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[11] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 88(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[12] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 96(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[13] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[14] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[15] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 120(%[r])\n\t"
-        "movq	%%rcx, 128(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_2048_mul_d_avx2_16(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rdx\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "mulxq	(%[a]), %%r8, %%r9\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "mulxq	8(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[2] * B\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[3] * B\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 24(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 32(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[5] * B\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 40(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[6] * B\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 48(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[7] * B\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 56(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[8] * B\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[9] * B\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 72(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 80(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[11] * B\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 88(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[12] * B\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 96(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[13] * B\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 104(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[14] * B\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[15] * B\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "adcxq	%%r10, %%r8\n\t"
-        "movq	%%r9, 120(%[r])\n\t"
-        "movq	%%r8, 128(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
- *
- * d1   The high order half of the number to divide.
- * d0   The low order half of the number to divide.
- * div  The dividend.
- * returns the result of the division.
- */
-static sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div)
-{
-    sp_digit r;
-
-    __asm__ __volatile__ (
-        "movq	%[d0], %%rax\n\t"
-        "movq	%[d1], %%rdx\n\t"
-        "divq	%[div]\n\t"
-        "movq	%%rax, %[r]\n\t"
-        : [r] "=r" (r)
-        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
-        : "rax", "rdx"
-    );
-
-    return r;
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static int64_t sp_2048_cmp_16(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-    __asm__ __volatile__ (
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	$-1, %%rdx\n\t"
-        "movq	120(%[a]), %%rbx\n\t"
-        "movq	120(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	112(%[a]), %%rbx\n\t"
-        "movq	112(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	104(%[a]), %%rbx\n\t"
-        "movq	104(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	96(%[a]), %%rbx\n\t"
-        "movq	96(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	88(%[a]), %%rbx\n\t"
-        "movq	88(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	80(%[a]), %%rbx\n\t"
-        "movq	80(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	72(%[a]), %%rbx\n\t"
-        "movq	72(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	64(%[a]), %%rbx\n\t"
-        "movq	64(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	56(%[a]), %%rbx\n\t"
-        "movq	56(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	48(%[a]), %%rbx\n\t"
-        "movq	48(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	40(%[a]), %%rbx\n\t"
-        "movq	40(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	32(%[a]), %%rbx\n\t"
-        "movq	32(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	24(%[a]), %%rbx\n\t"
-        "movq	24(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	16(%[a]), %%rbx\n\t"
-        "movq	16(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	8(%[a]), %%rbx\n\t"
-        "movq	8(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	0(%[a]), %%rbx\n\t"
-        "movq	0(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "xorq	%%rdx, %[r]\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "rax", "rdx", "rcx", "rbx", "r8"
-    );
-
-    return r;
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_2048_div_16(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[32], t2[17];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[15];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 16);
-    for (i=15; i>=0; i--) {
-        r1 = div_2048_word_16(t1[16 + i], t1[16 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_2048_mul_d_avx2_16(t2, d, r1);
-        else
-#endif
-            sp_2048_mul_d_16(t2, d, r1);
-        t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2);
-        t1[16 + i] -= t2[16];
-        sp_2048_mask_16(t2, d, t1[16 + i]);
-        t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2);
-        sp_2048_mask_16(t2, d, t1[16 + i]);
-        t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2);
-    }
-
-    r1 = sp_2048_cmp_16(t1, d) >= 0;
-    sp_2048_cond_sub_16(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_2048_mod_16(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_2048_div_16(a, m, NULL, r);
-}
-
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_2048_mod_exp_16(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][32];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 32, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 32;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_setup(m, &mp);
-        sp_2048_mont_norm_16(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 16);
-        if (reduceA) {
-            err = sp_2048_mod_16(t[1] + 16, a, m);
-            if (err == MP_OKAY)
-                err = sp_2048_mod_16(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
-            err = sp_2048_mod_16(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp);
-        sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp);
-        sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp);
-        sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp);
-        sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_2048_mont_sqr_16(t[10], t[ 5], m, mp);
-        sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp);
-        sp_2048_mont_sqr_16(t[12], t[ 6], m, mp);
-        sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp);
-        sp_2048_mont_sqr_16(t[14], t[ 7], m, mp);
-        sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp);
-        sp_2048_mont_sqr_16(t[16], t[ 8], m, mp);
-        sp_2048_mont_mul_16(t[17], t[ 9], t[ 8], m, mp);
-        sp_2048_mont_sqr_16(t[18], t[ 9], m, mp);
-        sp_2048_mont_mul_16(t[19], t[10], t[ 9], m, mp);
-        sp_2048_mont_sqr_16(t[20], t[10], m, mp);
-        sp_2048_mont_mul_16(t[21], t[11], t[10], m, mp);
-        sp_2048_mont_sqr_16(t[22], t[11], m, mp);
-        sp_2048_mont_mul_16(t[23], t[12], t[11], m, mp);
-        sp_2048_mont_sqr_16(t[24], t[12], m, mp);
-        sp_2048_mont_mul_16(t[25], t[13], t[12], m, mp);
-        sp_2048_mont_sqr_16(t[26], t[13], m, mp);
-        sp_2048_mont_mul_16(t[27], t[14], t[13], m, mp);
-        sp_2048_mont_sqr_16(t[28], t[14], m, mp);
-        sp_2048_mont_mul_16(t[29], t[15], t[14], m, mp);
-        sp_2048_mont_sqr_16(t[30], t[15], m, mp);
-        sp_2048_mont_mul_16(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_2048_mont_sqr_16(r, r, m, mp);
-            sp_2048_mont_sqr_16(r, r, m, mp);
-            sp_2048_mont_sqr_16(r, r, m, mp);
-            sp_2048_mont_sqr_16(r, r, m, mp);
-            sp_2048_mont_sqr_16(r, r, m, mp);
-
-            sp_2048_mont_mul_16(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_2048_mont_sqr_16(r, r, m, mp);
-        sp_2048_mont_mul_16(r, r, t[y], m, mp);
-
-        XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
-        sp_2048_mont_reduce_16(r, m, mp);
-
-        mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
-        sp_2048_cond_sub_16(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Reduce the number back to 2048 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_2048_mont_reduce_avx2_16(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "\nL_mont_loop_avx2_16:\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%r8\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "mulxq	0(%[m]), %%rax, %%r8\n\t"
-        "movq	8(%[a]), %%r12\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r12\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "mulxq	8(%[m]), %%rax, %%r8\n\t"
-        "movq	16(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "mulxq	16(%[m]), %%rax, %%r8\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 16(%[a])\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "mulxq	24(%[m]), %%rax, %%r8\n\t"
-        "movq	32(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "mulxq	32(%[m]), %%rax, %%r8\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 32(%[a])\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "mulxq	40(%[m]), %%rax, %%r8\n\t"
-        "movq	48(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "mulxq	48(%[m]), %%rax, %%r8\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 48(%[a])\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "mulxq	56(%[m]), %%rax, %%r8\n\t"
-        "movq	64(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "mulxq	64(%[m]), %%rax, %%r8\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 64(%[a])\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "mulxq	72(%[m]), %%rax, %%r8\n\t"
-        "movq	80(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "mulxq	80(%[m]), %%rax, %%r8\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 80(%[a])\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "mulxq	88(%[m]), %%rax, %%r8\n\t"
-        "movq	96(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "mulxq	96(%[m]), %%rax, %%r8\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 96(%[a])\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "mulxq	104(%[m]), %%rax, %%r8\n\t"
-        "movq	112(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "mulxq	112(%[m]), %%rax, %%r8\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 112(%[a])\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "mulxq	120(%[m]), %%rax, %%r8\n\t"
-        "movq	128(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "adcxq	%[ca], %%r10\n\t"
-        "movq	%%r9, %[ca]\n\t"
-        "adoxq	%%r9, %[ca]\n\t"
-        "adcxq	%%r9, %[ca]\n\t"
-        "movq	%%r10, 128(%[a])\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$1, %%rcx\n\t"
-        "cmpq	$16, %%rcx\n\t"
-        "jl	L_mont_loop_avx2_16\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    sp_2048_cond_sub_16(a - 16, a, m, (sp_digit)0 - ca);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_mul_avx2_16(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_2048_mul_avx2_16(r, a, b);
-    sp_2048_mont_reduce_avx2_16(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#ifdef HAVE_INTEL_AVX2
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_sqr_avx2_16(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_2048_sqr_avx2_16(r, a);
-    sp_2048_mont_reduce_avx2_16(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#ifdef HAVE_INTEL_AVX2
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_2048_mod_exp_avx2_16(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][32];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 32, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 32;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_setup(m, &mp);
-        sp_2048_mont_norm_16(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 16);
-        if (reduceA) {
-            err = sp_2048_mod_16(t[1] + 16, a, m);
-            if (err == MP_OKAY)
-                err = sp_2048_mod_16(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
-            err = sp_2048_mod_16(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_sqr_avx2_16(t[ 2], t[ 1], m, mp);
-        sp_2048_mont_mul_avx2_16(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[ 4], t[ 2], m, mp);
-        sp_2048_mont_mul_avx2_16(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[ 6], t[ 3], m, mp);
-        sp_2048_mont_mul_avx2_16(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[ 8], t[ 4], m, mp);
-        sp_2048_mont_mul_avx2_16(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[10], t[ 5], m, mp);
-        sp_2048_mont_mul_avx2_16(t[11], t[ 6], t[ 5], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[12], t[ 6], m, mp);
-        sp_2048_mont_mul_avx2_16(t[13], t[ 7], t[ 6], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[14], t[ 7], m, mp);
-        sp_2048_mont_mul_avx2_16(t[15], t[ 8], t[ 7], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[16], t[ 8], m, mp);
-        sp_2048_mont_mul_avx2_16(t[17], t[ 9], t[ 8], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[18], t[ 9], m, mp);
-        sp_2048_mont_mul_avx2_16(t[19], t[10], t[ 9], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[20], t[10], m, mp);
-        sp_2048_mont_mul_avx2_16(t[21], t[11], t[10], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[22], t[11], m, mp);
-        sp_2048_mont_mul_avx2_16(t[23], t[12], t[11], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[24], t[12], m, mp);
-        sp_2048_mont_mul_avx2_16(t[25], t[13], t[12], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[26], t[13], m, mp);
-        sp_2048_mont_mul_avx2_16(t[27], t[14], t[13], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[28], t[14], m, mp);
-        sp_2048_mont_mul_avx2_16(t[29], t[15], t[14], m, mp);
-        sp_2048_mont_sqr_avx2_16(t[30], t[15], m, mp);
-        sp_2048_mont_mul_avx2_16(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_2048_mont_sqr_avx2_16(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_16(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_16(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_16(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_16(r, r, m, mp);
-
-            sp_2048_mont_mul_avx2_16(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_2048_mont_sqr_avx2_16(r, r, m, mp);
-        sp_2048_mont_mul_avx2_16(r, r, t[y], m, mp);
-
-        XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
-        sp_2048_mont_reduce_avx2_16(r, m, mp);
-
-        mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
-        sp_2048_cond_sub_16(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
-/* r = 2^n mod m where n is the number of bits to reduce by.
- * Given m must be 2048 bits, just need to subtract.
- *
- * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_2048_mont_norm_32(sp_digit* r, sp_digit* m)
-{
-    XMEMSET(r, 0, sizeof(sp_digit) * 32);
-
-    /* r = 2^n mod m */
-    sp_2048_sub_in_place_32(r, m);
-}
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not copying.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static sp_digit sp_2048_cond_sub_32(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit m)
-{
-    sp_digit t[32];
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[b]), %%rax\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 0(%[t])\n\t"
-        "movq	%%rcx, 8(%[t])\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 16(%[t])\n\t"
-        "movq	%%rcx, 24(%[t])\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 32(%[t])\n\t"
-        "movq	%%rcx, 40(%[t])\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 48(%[t])\n\t"
-        "movq	%%rcx, 56(%[t])\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 64(%[t])\n\t"
-        "movq	%%rcx, 72(%[t])\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 80(%[t])\n\t"
-        "movq	%%rcx, 88(%[t])\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 96(%[t])\n\t"
-        "movq	%%rcx, 104(%[t])\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 112(%[t])\n\t"
-        "movq	%%rcx, 120(%[t])\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "movq	136(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 128(%[t])\n\t"
-        "movq	%%rcx, 136(%[t])\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "movq	152(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 144(%[t])\n\t"
-        "movq	%%rcx, 152(%[t])\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "movq	168(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 160(%[t])\n\t"
-        "movq	%%rcx, 168(%[t])\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "movq	184(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 176(%[t])\n\t"
-        "movq	%%rcx, 184(%[t])\n\t"
-        "movq	192(%[b]), %%rax\n\t"
-        "movq	200(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 192(%[t])\n\t"
-        "movq	%%rcx, 200(%[t])\n\t"
-        "movq	208(%[b]), %%rax\n\t"
-        "movq	216(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 208(%[t])\n\t"
-        "movq	%%rcx, 216(%[t])\n\t"
-        "movq	224(%[b]), %%rax\n\t"
-        "movq	232(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 224(%[t])\n\t"
-        "movq	%%rcx, 232(%[t])\n\t"
-        "movq	240(%[b]), %%rax\n\t"
-        "movq	248(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 240(%[t])\n\t"
-        "movq	%%rcx, 248(%[t])\n\t"
-        "movq	(%[a]), %%rax\n\t"
-        "movq	(%[t]), %%rdx\n\t"
-        "subq	%%rdx,%%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	8(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "movq	16(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	24(%[a]), %%rcx\n\t"
-        "movq	24(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "movq	32(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 24(%[r])\n\t"
-        "movq	40(%[a]), %%rcx\n\t"
-        "movq	40(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "movq	48(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 40(%[r])\n\t"
-        "movq	56(%[a]), %%rcx\n\t"
-        "movq	56(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "movq	64(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "movq	72(%[a]), %%rcx\n\t"
-        "movq	72(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "movq	80(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 72(%[r])\n\t"
-        "movq	88(%[a]), %%rcx\n\t"
-        "movq	88(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "movq	96(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 88(%[r])\n\t"
-        "movq	104(%[a]), %%rcx\n\t"
-        "movq	104(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "movq	112(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "movq	120(%[a]), %%rcx\n\t"
-        "movq	120(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "movq	128(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 120(%[r])\n\t"
-        "movq	136(%[a]), %%rcx\n\t"
-        "movq	136(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 128(%[r])\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "movq	144(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 136(%[r])\n\t"
-        "movq	152(%[a]), %%rcx\n\t"
-        "movq	152(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 144(%[r])\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "movq	160(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "movq	168(%[a]), %%rcx\n\t"
-        "movq	168(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 160(%[r])\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "movq	176(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 168(%[r])\n\t"
-        "movq	184(%[a]), %%rcx\n\t"
-        "movq	184(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 176(%[r])\n\t"
-        "movq	192(%[a]), %%rax\n\t"
-        "movq	192(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 184(%[r])\n\t"
-        "movq	200(%[a]), %%rcx\n\t"
-        "movq	200(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 192(%[r])\n\t"
-        "movq	208(%[a]), %%rax\n\t"
-        "movq	208(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 200(%[r])\n\t"
-        "movq	216(%[a]), %%rcx\n\t"
-        "movq	216(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 208(%[r])\n\t"
-        "movq	224(%[a]), %%rax\n\t"
-        "movq	224(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 216(%[r])\n\t"
-        "movq	232(%[a]), %%rcx\n\t"
-        "movq	232(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 224(%[r])\n\t"
-        "movq	240(%[a]), %%rax\n\t"
-        "movq	240(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 232(%[r])\n\t"
-        "movq	248(%[a]), %%rcx\n\t"
-        "movq	248(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 240(%[r])\n\t"
-        "movq	%%rcx, 248(%[r])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m), [t] "r" (t)
-        : "memory", "rax", "rcx", "rdx"
-    );
-
-    return c;
-}
-
-/* Reduce the number back to 2048 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "movq	8(%[a]), %%r13\n\t"
-        "\nL_mont_loop_32:\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "imulq	%[mp], %%r10\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	0(%[m])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	8(%[m])\n\t"
-        "movq	%%r13, %%r12\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r12\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	16(%[m])\n\t"
-        "movq	16(%[a]), %%r13\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r13\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[m])\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	32(%[m])\n\t"
-        "movq	32(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 32(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	40(%[m])\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	48(%[m])\n\t"
-        "movq	48(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 48(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	56(%[m])\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	64(%[m])\n\t"
-        "movq	64(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 64(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[m])\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	80(%[m])\n\t"
-        "movq	80(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 80(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	88(%[m])\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	96(%[m])\n\t"
-        "movq	96(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 96(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	104(%[m])\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	112(%[m])\n\t"
-        "movq	112(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 112(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	120(%[m])\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+16] += m[16] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	128(%[m])\n\t"
-        "movq	128(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 128(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+17] += m[17] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	136(%[m])\n\t"
-        "movq	136(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 136(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+18] += m[18] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	144(%[m])\n\t"
-        "movq	144(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 144(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+19] += m[19] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	152(%[m])\n\t"
-        "movq	152(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 152(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+20] += m[20] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	160(%[m])\n\t"
-        "movq	160(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 160(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+21] += m[21] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	168(%[m])\n\t"
-        "movq	168(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 168(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+22] += m[22] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	176(%[m])\n\t"
-        "movq	176(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 176(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+23] += m[23] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	184(%[m])\n\t"
-        "movq	184(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 184(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+24] += m[24] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	192(%[m])\n\t"
-        "movq	192(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 192(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+25] += m[25] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	200(%[m])\n\t"
-        "movq	200(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 200(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+26] += m[26] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	208(%[m])\n\t"
-        "movq	208(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 208(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+27] += m[27] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	216(%[m])\n\t"
-        "movq	216(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 216(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+28] += m[28] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	224(%[m])\n\t"
-        "movq	224(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 224(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+29] += m[29] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	232(%[m])\n\t"
-        "movq	232(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 232(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+30] += m[30] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	240(%[m])\n\t"
-        "movq	240(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 240(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+31] += m[31] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "mulq	248(%[m])\n\t"
-        "movq	248(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%[ca], %%rdx\n\t"
-        "movq	$0, %[ca]\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "addq	%%r9, %%r11\n\t"
-        "movq	%%r11, 248(%[a])\n\t"
-        "adcq	%%rdx, 256(%[a])\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$8, %%rcx\n\t"
-        "cmpq	$256, %%rcx\n\t"
-        "jl	L_mont_loop_32\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        "movq	%%r13, 8(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13"
-    );
-
-    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
-}
-
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_mul_32(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_2048_mul_32(r, a, b);
-    sp_2048_mont_reduce_32(r, m, mp);
-}
-
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_2048_sqr_32(r, a);
-    sp_2048_mont_reduce_32(r, m, mp);
-}
-
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	(%[a])\n\t"
-        "movq	%%rax, %%rbx\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[2] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[3] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 24(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 32(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[5] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 40(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[6] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 48(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[7] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[8] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[9] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 72(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 80(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[11] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 88(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[12] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 96(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[13] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[14] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[15] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 120(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[16] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 128(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[17] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 136(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[18] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 144(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[19] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[20] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[21] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 168(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[22] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 176(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[23] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 184(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[24] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	192(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 192(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[25] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	200(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 200(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[26] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	208(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 208(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[27] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	216(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 216(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[28] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	224(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 224(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[29] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	232(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 232(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[30] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	240(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 240(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[31] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "mulq	248(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "movq	%%rcx, 248(%[r])\n\t"
-        "movq	%%r8, 256(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_2048_mul_d_avx2_32(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rdx\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "mulxq	(%[a]), %%r8, %%r9\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "mulxq	8(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[2] * B\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[3] * B\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 24(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 32(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[5] * B\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 40(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[6] * B\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 48(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[7] * B\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 56(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[8] * B\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[9] * B\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 72(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 80(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[11] * B\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 88(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[12] * B\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 96(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[13] * B\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 104(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[14] * B\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[15] * B\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 120(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[16] * B\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 128(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[17] * B\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 136(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[18] * B\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 144(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[19] * B\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 152(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[20] * B\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[21] * B\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 168(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[22] * B\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 176(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[23] * B\n\t"
-        "mulxq	184(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 184(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[24] * B\n\t"
-        "mulxq	192(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 192(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[25] * B\n\t"
-        "mulxq	200(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 200(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[26] * B\n\t"
-        "mulxq	208(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 208(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[27] * B\n\t"
-        "mulxq	216(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 216(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[28] * B\n\t"
-        "mulxq	224(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 224(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[29] * B\n\t"
-        "mulxq	232(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 232(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[30] * B\n\t"
-        "mulxq	240(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 240(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[31] * B\n\t"
-        "mulxq	248(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "adcxq	%%r10, %%r8\n\t"
-        "movq	%%r9, 248(%[r])\n\t"
-        "movq	%%r8, 256(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
- *
- * d1   The high order half of the number to divide.
- * d0   The low order half of the number to divide.
- * div  The dividend.
- * returns the result of the division.
- */
-static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
-{
-    sp_digit r;
-
-    __asm__ __volatile__ (
-        "movq	%[d0], %%rax\n\t"
-        "movq	%[d1], %%rdx\n\t"
-        "divq	%[div]\n\t"
-        "movq	%%rax, %[r]\n\t"
-        : [r] "=r" (r)
-        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
-        : "rax", "rdx"
-    );
-
-    return r;
-}
-
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<32; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 32; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-#endif
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static int64_t sp_2048_cmp_32(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-    __asm__ __volatile__ (
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	$-1, %%rdx\n\t"
-        "movq	248(%[a]), %%rbx\n\t"
-        "movq	248(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	240(%[a]), %%rbx\n\t"
-        "movq	240(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	232(%[a]), %%rbx\n\t"
-        "movq	232(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	224(%[a]), %%rbx\n\t"
-        "movq	224(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	216(%[a]), %%rbx\n\t"
-        "movq	216(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	208(%[a]), %%rbx\n\t"
-        "movq	208(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	200(%[a]), %%rbx\n\t"
-        "movq	200(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	192(%[a]), %%rbx\n\t"
-        "movq	192(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	184(%[a]), %%rbx\n\t"
-        "movq	184(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	176(%[a]), %%rbx\n\t"
-        "movq	176(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	168(%[a]), %%rbx\n\t"
-        "movq	168(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	160(%[a]), %%rbx\n\t"
-        "movq	160(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	152(%[a]), %%rbx\n\t"
-        "movq	152(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	144(%[a]), %%rbx\n\t"
-        "movq	144(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	136(%[a]), %%rbx\n\t"
-        "movq	136(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	128(%[a]), %%rbx\n\t"
-        "movq	128(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	120(%[a]), %%rbx\n\t"
-        "movq	120(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	112(%[a]), %%rbx\n\t"
-        "movq	112(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	104(%[a]), %%rbx\n\t"
-        "movq	104(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	96(%[a]), %%rbx\n\t"
-        "movq	96(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	88(%[a]), %%rbx\n\t"
-        "movq	88(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	80(%[a]), %%rbx\n\t"
-        "movq	80(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	72(%[a]), %%rbx\n\t"
-        "movq	72(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	64(%[a]), %%rbx\n\t"
-        "movq	64(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	56(%[a]), %%rbx\n\t"
-        "movq	56(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	48(%[a]), %%rbx\n\t"
-        "movq	48(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	40(%[a]), %%rbx\n\t"
-        "movq	40(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	32(%[a]), %%rbx\n\t"
-        "movq	32(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	24(%[a]), %%rbx\n\t"
-        "movq	24(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	16(%[a]), %%rbx\n\t"
-        "movq	16(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	8(%[a]), %%rbx\n\t"
-        "movq	8(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	0(%[a]), %%rbx\n\t"
-        "movq	0(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "xorq	%%rdx, %[r]\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "rax", "rdx", "rcx", "rbx", "r8"
-    );
-
-    return r;
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[64], t2[33];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[31];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
-    for (i=31; i>=0; i--) {
-        r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_2048_mul_d_avx2_32(t2, d, r1);
-        else
-#endif
-            sp_2048_mul_d_32(t2, d, r1);
-        t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
-        t1[32 + i] -= t2[32];
-        sp_2048_mask_32(t2, d, t1[32 + i]);
-        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
-        sp_2048_mask_32(t2, d, t1[32 + i]);
-        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
-    }
-
-    r1 = sp_2048_cmp_32(t1, d) >= 0;
-    sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_2048_mod_32(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_2048_div_32(a, m, NULL, r);
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_2048_div_32_cond(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[64], t2[33];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[31];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
-    for (i=31; i>=0; i--) {
-        r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_2048_mul_d_avx2_32(t2, d, r1);
-        else
-#endif
-            sp_2048_mul_d_32(t2, d, r1);
-        t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
-        t1[32 + i] -= t2[32];
-        if (t1[32 + i] != 0) {
-            t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d);
-            if (t1[32 + i] != 0)
-                t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d);
-        }
-    }
-
-    r1 = sp_2048_cmp_32(t1, d) >= 0;
-    sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_2048_div_32_cond(a, m, NULL, r);
-}
-
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][64];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 64;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_setup(m, &mp);
-        sp_2048_mont_norm_32(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
-        if (reduceA) {
-            err = sp_2048_mod_32(t[1] + 32, a, m);
-            if (err == MP_OKAY)
-                err = sp_2048_mod_32(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
-            err = sp_2048_mod_32(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
-        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
-        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
-        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
-        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
-        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
-        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
-        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
-        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
-        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
-        sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
-        sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
-        sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
-        sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
-        sp_2048_mont_sqr_32(t[20], t[10], m, mp);
-        sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
-        sp_2048_mont_sqr_32(t[22], t[11], m, mp);
-        sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
-        sp_2048_mont_sqr_32(t[24], t[12], m, mp);
-        sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
-        sp_2048_mont_sqr_32(t[26], t[13], m, mp);
-        sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
-        sp_2048_mont_sqr_32(t[28], t[14], m, mp);
-        sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
-        sp_2048_mont_sqr_32(t[30], t[15], m, mp);
-        sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_2048_mont_sqr_32(r, r, m, mp);
-            sp_2048_mont_sqr_32(r, r, m, mp);
-            sp_2048_mont_sqr_32(r, r, m, mp);
-            sp_2048_mont_sqr_32(r, r, m, mp);
-            sp_2048_mont_sqr_32(r, r, m, mp);
-
-            sp_2048_mont_mul_32(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_2048_mont_sqr_32(r, r, m, mp);
-        sp_2048_mont_mul_32(r, r, t[y], m, mp);
-
-        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
-        sp_2048_mont_reduce_32(r, m, mp);
-
-        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
-        sp_2048_cond_sub_32(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#ifdef HAVE_INTEL_AVX2
-/* Reduce the number back to 2048 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_2048_mont_reduce_avx2_32(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "\nL_mont_loop_avx2_32:\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%r8\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "mulxq	0(%[m]), %%rax, %%r8\n\t"
-        "movq	8(%[a]), %%r12\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r12\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "mulxq	8(%[m]), %%rax, %%r8\n\t"
-        "movq	16(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "mulxq	16(%[m]), %%rax, %%r8\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 16(%[a])\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "mulxq	24(%[m]), %%rax, %%r8\n\t"
-        "movq	32(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "mulxq	32(%[m]), %%rax, %%r8\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 32(%[a])\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "mulxq	40(%[m]), %%rax, %%r8\n\t"
-        "movq	48(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "mulxq	48(%[m]), %%rax, %%r8\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 48(%[a])\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "mulxq	56(%[m]), %%rax, %%r8\n\t"
-        "movq	64(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "mulxq	64(%[m]), %%rax, %%r8\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 64(%[a])\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "mulxq	72(%[m]), %%rax, %%r8\n\t"
-        "movq	80(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "mulxq	80(%[m]), %%rax, %%r8\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 80(%[a])\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "mulxq	88(%[m]), %%rax, %%r8\n\t"
-        "movq	96(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "mulxq	96(%[m]), %%rax, %%r8\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 96(%[a])\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "mulxq	104(%[m]), %%rax, %%r8\n\t"
-        "movq	112(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "mulxq	112(%[m]), %%rax, %%r8\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 112(%[a])\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "mulxq	120(%[m]), %%rax, %%r8\n\t"
-        "movq	128(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "# a[i+16] += m[16] * mu\n\t"
-        "mulxq	128(%[m]), %%rax, %%r8\n\t"
-        "movq	136(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 128(%[a])\n\t"
-        "# a[i+17] += m[17] * mu\n\t"
-        "mulxq	136(%[m]), %%rax, %%r8\n\t"
-        "movq	144(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 136(%[a])\n\t"
-        "# a[i+18] += m[18] * mu\n\t"
-        "mulxq	144(%[m]), %%rax, %%r8\n\t"
-        "movq	152(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 144(%[a])\n\t"
-        "# a[i+19] += m[19] * mu\n\t"
-        "mulxq	152(%[m]), %%rax, %%r8\n\t"
-        "movq	160(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 152(%[a])\n\t"
-        "# a[i+20] += m[20] * mu\n\t"
-        "mulxq	160(%[m]), %%rax, %%r8\n\t"
-        "movq	168(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 160(%[a])\n\t"
-        "# a[i+21] += m[21] * mu\n\t"
-        "mulxq	168(%[m]), %%rax, %%r8\n\t"
-        "movq	176(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 168(%[a])\n\t"
-        "# a[i+22] += m[22] * mu\n\t"
-        "mulxq	176(%[m]), %%rax, %%r8\n\t"
-        "movq	184(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 176(%[a])\n\t"
-        "# a[i+23] += m[23] * mu\n\t"
-        "mulxq	184(%[m]), %%rax, %%r8\n\t"
-        "movq	192(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 184(%[a])\n\t"
-        "# a[i+24] += m[24] * mu\n\t"
-        "mulxq	192(%[m]), %%rax, %%r8\n\t"
-        "movq	200(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 192(%[a])\n\t"
-        "# a[i+25] += m[25] * mu\n\t"
-        "mulxq	200(%[m]), %%rax, %%r8\n\t"
-        "movq	208(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 200(%[a])\n\t"
-        "# a[i+26] += m[26] * mu\n\t"
-        "mulxq	208(%[m]), %%rax, %%r8\n\t"
-        "movq	216(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 208(%[a])\n\t"
-        "# a[i+27] += m[27] * mu\n\t"
-        "mulxq	216(%[m]), %%rax, %%r8\n\t"
-        "movq	224(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 216(%[a])\n\t"
-        "# a[i+28] += m[28] * mu\n\t"
-        "mulxq	224(%[m]), %%rax, %%r8\n\t"
-        "movq	232(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 224(%[a])\n\t"
-        "# a[i+29] += m[29] * mu\n\t"
-        "mulxq	232(%[m]), %%rax, %%r8\n\t"
-        "movq	240(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 232(%[a])\n\t"
-        "# a[i+30] += m[30] * mu\n\t"
-        "mulxq	240(%[m]), %%rax, %%r8\n\t"
-        "movq	248(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 240(%[a])\n\t"
-        "# a[i+31] += m[31] * mu\n\t"
-        "mulxq	248(%[m]), %%rax, %%r8\n\t"
-        "movq	256(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 248(%[a])\n\t"
-        "adcxq	%[ca], %%r10\n\t"
-        "movq	%%r9, %[ca]\n\t"
-        "adoxq	%%r9, %[ca]\n\t"
-        "adcxq	%%r9, %[ca]\n\t"
-        "movq	%%r10, 256(%[a])\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$1, %%rcx\n\t"
-        "cmpq	$32, %%rcx\n\t"
-        "jl	L_mont_loop_avx2_32\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_mul_avx2_32(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_2048_mul_avx2_32(r, a, b);
-    sp_2048_mont_reduce_avx2_32(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#ifdef HAVE_INTEL_AVX2
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_2048_mont_sqr_avx2_32(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_2048_sqr_avx2_32(r, a);
-    sp_2048_mont_reduce_avx2_32(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
-#ifdef HAVE_INTEL_AVX2
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_2048_mod_exp_avx2_32(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][64];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 64;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_setup(m, &mp);
-        sp_2048_mont_norm_32(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
-        if (reduceA) {
-            err = sp_2048_mod_32(t[1] + 32, a, m);
-            if (err == MP_OKAY)
-                err = sp_2048_mod_32(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
-            err = sp_2048_mod_32(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_mont_sqr_avx2_32(t[ 2], t[ 1], m, mp);
-        sp_2048_mont_mul_avx2_32(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[ 4], t[ 2], m, mp);
-        sp_2048_mont_mul_avx2_32(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[ 6], t[ 3], m, mp);
-        sp_2048_mont_mul_avx2_32(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[ 8], t[ 4], m, mp);
-        sp_2048_mont_mul_avx2_32(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[10], t[ 5], m, mp);
-        sp_2048_mont_mul_avx2_32(t[11], t[ 6], t[ 5], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[12], t[ 6], m, mp);
-        sp_2048_mont_mul_avx2_32(t[13], t[ 7], t[ 6], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[14], t[ 7], m, mp);
-        sp_2048_mont_mul_avx2_32(t[15], t[ 8], t[ 7], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[16], t[ 8], m, mp);
-        sp_2048_mont_mul_avx2_32(t[17], t[ 9], t[ 8], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[18], t[ 9], m, mp);
-        sp_2048_mont_mul_avx2_32(t[19], t[10], t[ 9], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[20], t[10], m, mp);
-        sp_2048_mont_mul_avx2_32(t[21], t[11], t[10], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[22], t[11], m, mp);
-        sp_2048_mont_mul_avx2_32(t[23], t[12], t[11], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[24], t[12], m, mp);
-        sp_2048_mont_mul_avx2_32(t[25], t[13], t[12], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[26], t[13], m, mp);
-        sp_2048_mont_mul_avx2_32(t[27], t[14], t[13], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[28], t[14], m, mp);
-        sp_2048_mont_mul_avx2_32(t[29], t[15], t[14], m, mp);
-        sp_2048_mont_sqr_avx2_32(t[30], t[15], m, mp);
-        sp_2048_mont_mul_avx2_32(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-            sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-
-            sp_2048_mont_mul_avx2_32(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-        sp_2048_mont_mul_avx2_32(r, r, t[y], m, mp);
-
-        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
-        sp_2048_mont_reduce_avx2_32(r, m, mp);
-
-        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
-        sp_2048_cond_sub_32(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#ifdef WOLFSSL_HAVE_SP_RSA
-/* RSA public key operation.
- *
- * in      Array of bytes representing the number to exponentiate, base.
- * inLen   Number of bytes in base.
- * em      Public exponent.
- * mm      Modulus.
- * out     Buffer to hold big-endian bytes of exponentiation result.
- *         Must be at least 256 bytes long.
- * outLen  Number of bytes in result.
- * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
- * an array is too long and MEMORY_E when dynamic memory allocation fails.
- */
-int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
-    byte* out, word32* outLen)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[64], md[32], rd[64];
-#else
-    sp_digit* d = NULL;
-#endif
-    sp_digit* a;
-    sp_digit *ah;
-    sp_digit* m;
-    sp_digit* r;
-    sp_digit e[1];
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 256)
-        err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 256 ||
-                                                     mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
-    }
-
-    if (err == MP_OKAY) {
-        a = d;
-        r = a + 32 * 2;
-        m = r + 32 * 2;
-        ah = a + 32;
-    }
-#else
-    a = ad;
-    m = md;
-    r = rd;
-    ah = a + 32;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_2048_from_bin(ah, 32, in, inLen);
-#if DIGIT_BIT >= 64
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
-            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
-            err = MP_EXPTMOD_E;
-    }
-    if (err == MP_OKAY) {
-        sp_2048_from_mp(m, 32, mm);
-
-        if (e[0] == 0x3) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-                if (err == MP_OKAY) {
-                    sp_2048_sqr_avx2_32(r, ah);
-                    err = sp_2048_mod_32_cond(r, r, m);
-                }
-                if (err == MP_OKAY) {
-                    sp_2048_mul_avx2_32(r, ah, r);
-                    err = sp_2048_mod_32_cond(r, r, m);
-                }
-            }
-            else
-#endif
-            {
-                if (err == MP_OKAY) {
-                    sp_2048_sqr_32(r, ah);
-                    err = sp_2048_mod_32_cond(r, r, m);
-                }
-                if (err == MP_OKAY) {
-                    sp_2048_mul_32(r, ah, r);
-                    err = sp_2048_mod_32_cond(r, r, m);
-                }
-            }
-        }
-        else {
-            int i;
-            sp_digit mp;
-
-            sp_2048_mont_setup(m, &mp);
-
-            /* Convert to Montgomery form. */
-            XMEMSET(a, 0, sizeof(sp_digit) * 32);
-            err = sp_2048_mod_32_cond(a, a, m);
-
-            if (err == MP_OKAY) {
-                for (i=63; i>=0; i--)
-                    if (e[0] >> i)
-                        break;
-
-                XMEMCPY(r, a, sizeof(sp_digit) * 32);
-#ifdef HAVE_INTEL_AVX2
-                if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-                    for (i--; i>=0; i--) {
-                        sp_2048_mont_sqr_avx2_32(r, r, m, mp);
-                        if (((e[0] >> i) & 1) == 1)
-                            sp_2048_mont_mul_avx2_32(r, r, a, m, mp);
-                    }
-                    XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
-                    sp_2048_mont_reduce_avx2_32(r, m, mp);
-                }
-                else
-#endif
-                {
-                    for (i--; i>=0; i--) {
-                        sp_2048_mont_sqr_32(r, r, m, mp);
-                        if (((e[0] >> i) & 1) == 1)
-                            sp_2048_mont_mul_32(r, r, a, m, mp);
-                    }
-                    XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
-                    sp_2048_mont_reduce_32(r, m, mp);
-                }
-
-                for (i = 31; i > 0; i--) {
-                    if (r[i] != m[i])
-                        break;
-                }
-                if (r[i] >= m[i])
-                    sp_2048_sub_in_place_32(r, m);
-            }
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_to_bin(r, out);
-        *outLen = 256;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-
-/* RSA private key operation.
- *
- * in      Array of bytes representing the number to exponentiate, base.
- * inLen   Number of bytes in base.
- * dm      Private exponent.
- * pm      First prime.
- * qm      Second prime.
- * dpm     First prime's CRT exponent.
- * dqm     Second prime's CRT exponent.
- * qim     Inverse of second prime mod p.
- * mm      Modulus.
- * out     Buffer to hold big-endian bytes of exponentiation result.
- *         Must be at least 256 bytes long.
- * outLen  Number of bytes in result.
- * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
- * an array is too long and MEMORY_E when dynamic memory allocation fails.
- */
-int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
-    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
-    byte* out, word32* outLen)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[32 * 2];
-    sp_digit pd[16], qd[16], dpd[16];
-    sp_digit tmpad[32], tmpbd[32];
-#else
-    sp_digit* t = NULL;
-#endif
-    sp_digit* a;
-    sp_digit* p;
-    sp_digit* q;
-    sp_digit* dp;
-    sp_digit* dq;
-    sp_digit* qi;
-    sp_digit* tmp;
-    sp_digit* tmpa;
-    sp_digit* tmpb;
-    sp_digit* r;
-    sp_digit c;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)dm;
-    (void)mm;
-
-    if (*outLen < 256)
-        err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (t == NULL)
-            err = MEMORY_E;
-    }
-    if (err == MP_OKAY) {
-        a = t;
-        p = a + 32 * 2;
-        q = p + 16;
-        qi = dq = dp = q + 16;
-        tmpa = qi + 16;
-        tmpb = tmpa + 32;
-
-        tmp = t;
-        r = tmp + 32;
-    }
-#else
-    r = a = ad;
-    p = pd;
-    q = qd;
-    qi = dq = dp = dpd;
-    tmpa = tmpad;
-    tmpb = tmpbd;
-    tmp = a + 32;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_2048_from_bin(a, 32, in, inLen);
-        sp_2048_from_mp(p, 16, pm);
-        sp_2048_from_mp(q, 16, qm);
-        sp_2048_from_mp(dp, 16, dpm);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_2048_mod_exp_avx2_16(tmpa, a, dp, 1024, p, 1);
-        else
-#endif
-            err = sp_2048_mod_exp_16(tmpa, a, dp, 1024, p, 1);
-    }
-    if (err == MP_OKAY) {
-        sp_2048_from_mp(dq, 16, dqm);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_2048_mod_exp_avx2_16(tmpb, a, dq, 1024, q, 1);
-       else
-#endif
-            err = sp_2048_mod_exp_16(tmpb, a, dq, 1024, q, 1);
-    }
-
-    if (err == MP_OKAY) {
-        c = sp_2048_sub_in_place_16(tmpa, tmpb);
-        sp_2048_mask_16(tmp, p, c);
-        sp_2048_add_16(tmpa, tmpa, tmp);
-
-        sp_2048_from_mp(qi, 16, qim);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_2048_mul_avx2_16(tmpa, tmpa, qi);
-        else
-#endif
-            sp_2048_mul_16(tmpa, tmpa, qi);
-        err = sp_2048_mod_16(tmpa, tmpa, p);
-    }
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_2048_mul_avx2_16(tmpa, q, tmpa);
-        else
-#endif
-            sp_2048_mul_16(tmpa, q, tmpa);
-        XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16);
-        sp_2048_add_32(r, tmpb, tmpa);
-
-        sp_2048_to_bin(r, out);
-        *outLen = 256;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_digit) * 16 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-#else
-    XMEMSET(tmpad, 0, sizeof(tmpad));
-    XMEMSET(tmpbd, 0, sizeof(tmpbd));
-    XMEMSET(pd, 0, sizeof(pd));
-    XMEMSET(qd, 0, sizeof(qd));
-    XMEMSET(dpd, 0, sizeof(dpd));
-#endif
-
-    return err;
-}
-#endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
-/* Convert an array of sp_digit to an mp_int.
- *
- * a  A single precision integer.
- * r  A multi-precision integer.
- */
-static int sp_2048_to_mp(sp_digit* a, mp_int* r)
-{
-    int err;
-
-    err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
-#if DIGIT_BIT == 64
-        XMEMCPY(r->dp, a, sizeof(sp_digit) * 32);
-        r->used = 32;
-        mp_clamp(r);
-#elif DIGIT_BIT < 64
-        int i, j = 0, s = 0;
-
-        r->dp[0] = 0;
-        for (i = 0; i < 32; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
-            s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
-            while (s + DIGIT_BIT <= 64) {
-                s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
-            }
-            s = 64 - s;
-        }
-        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
-        mp_clamp(r);
-#else
-        int i, j = 0, s = 0;
-
-        r->dp[0] = 0;
-        for (i = 0; i < 32; i++) {
-            r->dp[j] |= ((mp_digit)a[i]) << s;
-            if (s + 64 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-    #endif
-                s = DIGIT_BIT - s;
-                r->dp[++j] = a[i] >> s;
-                s = 64 - s;
-            }
-            else
-                s += 64;
-        }
-        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
-        mp_clamp(r);
-#endif
-    }
-
-    return err;
-}
-
-/* Perform the modular exponentiation for Diffie-Hellman.
- *
- * base  Base. MP integer.
- * exp   Exponent. MP integer.
- * mod   Modulus. MP integer.
- * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
- * and MEMORY_E if memory allocation fails.
- */
-int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
-{
-    int err = MP_OKAY;
-    sp_digit b[64], e[32], m[32];
-    sp_digit* r = b;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-    int expBits = mp_count_bits(exp);
-
-    if (mp_count_bits(base) > 2048 || expBits > 2048 ||
-                                                   mp_count_bits(mod) != 2048) {
-        err = MP_READ_E;
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_from_mp(b, 32, base);
-        sp_2048_from_mp(e, 32, exp);
-        sp_2048_from_mp(m, 32, mod);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_2048_mod_exp_avx2_32(r, b, e, expBits, m, 0);
-        else
-#endif
-            err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
-    }
-
-    if (err == MP_OKAY) {
-        err = sp_2048_to_mp(r, res);
-    }
-
-    XMEMSET(e, 0, sizeof(e));
-
-    return err;
-}
-
-/* Perform the modular exponentiation for Diffie-Hellman.
- *
- * base     Base.
- * exp      Array of bytes that is the exponent.
- * expLen   Length of data, in bytes, in exponent.
- * mod      Modulus.
- * out      Buffer to hold big-endian bytes of exponentiation result.
- *          Must be at least 256 bytes long.
- * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
- * and MEMORY_E if memory allocation fails.
- */
-int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
-    mp_int* mod, byte* out, word32* outLen)
-{
-    int err = MP_OKAY;
-    sp_digit b[64], e[32], m[32];
-    sp_digit* r = b;
-    word32 i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (mp_count_bits(base) > 2048 || expLen > 256 ||
-                                                   mp_count_bits(mod) != 2048) {
-        err = MP_READ_E;
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_from_mp(b, 32, base);
-        sp_2048_from_bin(e, 32, exp, expLen);
-        sp_2048_from_mp(m, 32, mod);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_2048_mod_exp_avx2_32(r, b, e, expLen * 8, m, 0);
-        else
-#endif
-            err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
-    }
-
-    if (err == MP_OKAY) {
-        sp_2048_to_bin(r, out);
-        *outLen = 256;
-        for (i=0; i<256 && out[i] == 0; i++) {
-        }
-        *outLen -= i;
-        XMEMMOVE(out, out + i, *outLen);
-    }
-
-    XMEMSET(e, 0, sizeof(e));
-
-    return err;
-}
-#endif /* WOLFSSL_HAVE_SP_DH */
-
-#endif /* WOLFSSL_SP_NO_2048 */
-
-#ifndef WOLFSSL_SP_NO_3072
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
- * a  Byte array.
- * n  Number of bytes in array to read.
- */
-static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 56) {
-            r[j] &= 0xffffffffffffffffl;
-            s = 64 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-}
-
-/* Convert an mp_int to an array of sp_digit.
- *
- * r  A single precision integer.
- * a  A multi-precision integer.
- */
-static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
-{
-#if DIGIT_BIT == 64
-    int j;
-
-    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
-
-    for (j = a->used; j < max; j++)
-        r[j] = 0;
-#elif DIGIT_BIT > 64
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
-        r[j] &= 0xffffffffffffffffl;
-        s = 64 - s;
-        if (j + 1 >= max)
-            break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 64 <= DIGIT_BIT) {
-            s += 64;
-            r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-#else
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= ((sp_digit)a->dp[i]) << s;
-        if (s + DIGIT_BIT >= 64) {
-            r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            s = 64 - s;
-            if (s == DIGIT_BIT) {
-                r[++j] = 0;
-                s = 0;
-            }
-            else {
-                r[++j] = a->dp[i] >> s;
-                s = DIGIT_BIT - s;
-            }
-        }
-        else
-            s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-#endif
-}
-
-/* Write r as big endian to byte aray.
- * Fixed length number of bytes written: 384
- *
- * r  A single precision integer.
- * a  Byte array.
- */
-static void sp_3072_to_bin(sp_digit* r, byte* a)
-{
-    int i, j, s = 0, b;
-
-    j = 3072 / 8 - 1;
-    a[j] = 0;
-    for (i=0; i<48 && j>=0; i++) {
-        b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
-            break;
-        while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
-        }
-        s = 8 - (b - 64);
-        if (j >= 0)
-            a[j] = 0;
-        if (s != 0)
-            j++;
-    }
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit tmp[24];
-
-    __asm__ __volatile__ (
-        "#  A[0] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "movq	%%rax, (%[tmp])\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "#  A[0] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 8(%[tmp])\n\t"
-        "#  A[0] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 16(%[tmp])\n\t"
-        "#  A[0] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 24(%[tmp])\n\t"
-        "#  A[0] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 32(%[tmp])\n\t"
-        "#  A[0] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 40(%[tmp])\n\t"
-        "#  A[0] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 48(%[tmp])\n\t"
-        "#  A[0] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 56(%[tmp])\n\t"
-        "#  A[0] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 64(%[tmp])\n\t"
-        "#  A[0] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 72(%[tmp])\n\t"
-        "#  A[0] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 80(%[tmp])\n\t"
-        "#  A[0] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 88(%[tmp])\n\t"
-        "#  A[0] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 96(%[tmp])\n\t"
-        "#  A[0] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 104(%[tmp])\n\t"
-        "#  A[0] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 112(%[tmp])\n\t"
-        "#  A[0] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 120(%[tmp])\n\t"
-        "#  A[0] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 128(%[tmp])\n\t"
-        "#  A[0] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 136(%[tmp])\n\t"
-        "#  A[0] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 144(%[tmp])\n\t"
-        "#  A[0] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 152(%[tmp])\n\t"
-        "#  A[0] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 160(%[tmp])\n\t"
-        "#  A[0] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 168(%[tmp])\n\t"
-        "#  A[0] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 176(%[tmp])\n\t"
-        "#  A[0] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 184(%[tmp])\n\t"
-        "#  A[1] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[4] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 192(%[r])\n\t"
-        "#  A[2] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[4] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[5] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 200(%[r])\n\t"
-        "#  A[3] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[4] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[5] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[6] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 208(%[r])\n\t"
-        "#  A[4] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[5] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[6] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[7] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[4]\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 216(%[r])\n\t"
-        "#  A[5] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[6] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[7] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[8] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[5]\n\t"
-        "movq	40(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 224(%[r])\n\t"
-        "#  A[6] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[7] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[8] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[9] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[6]\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 232(%[r])\n\t"
-        "#  A[7] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[8] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[9] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[10] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[7]\n\t"
-        "movq	56(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 240(%[r])\n\t"
-        "#  A[8] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[9] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[10] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[11] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[8]\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 248(%[r])\n\t"
-        "#  A[9] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[10] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[11] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[12] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[9]\n\t"
-        "movq	72(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 256(%[r])\n\t"
-        "#  A[10] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[11] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[12] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[13] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[10]\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 264(%[r])\n\t"
-        "#  A[11] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[12] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[13] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[14] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[11]\n\t"
-        "movq	88(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 272(%[r])\n\t"
-        "#  A[12] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[13] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[14] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[15] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[12]\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 280(%[r])\n\t"
-        "#  A[13] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[14] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[15] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[16] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[13]\n\t"
-        "movq	104(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 288(%[r])\n\t"
-        "#  A[14] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[15] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[16] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[17] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[14]\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 296(%[r])\n\t"
-        "#  A[15] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[16] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[17] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[18] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[15]\n\t"
-        "movq	120(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 304(%[r])\n\t"
-        "#  A[16] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[17] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[18] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[19] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[16]\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 312(%[r])\n\t"
-        "#  A[17] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[18] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[19] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[20] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[17]\n\t"
-        "movq	136(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 320(%[r])\n\t"
-        "#  A[18] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[19] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[20] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[18]\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 328(%[r])\n\t"
-        "#  A[19] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[20] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[21] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[19]\n\t"
-        "movq	152(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 336(%[r])\n\t"
-        "#  A[20] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[21] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[22] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[23] * B[20]\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 344(%[r])\n\t"
-        "#  A[21] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[22] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[23] * B[21]\n\t"
-        "movq	168(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 352(%[r])\n\t"
-        "#  A[22] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[23] * B[22]\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 360(%[r])\n\t"
-        "#  A[23] * B[23]\n\t"
-        "movq	184(%[b]), %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "movq	%%rcx, 368(%[r])\n\t"
-        "movq	%%r8, 376(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[24];
-
-    __asm__ __volatile__ (
-        "#  A[0] * A[0]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%rax, (%[tmp])\n\t"
-        "movq	%%rdx, %%r8\n\t"
-        "#  A[0] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 8(%[tmp])\n\t"
-        "#  A[0] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 16(%[tmp])\n\t"
-        "#  A[0] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "#  A[1] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "movq	%%rcx, 24(%[tmp])\n\t"
-        "#  A[0] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 32(%[tmp])\n\t"
-        "#  A[0] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 40(%[tmp])\n\t"
-        "#  A[0] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 48(%[tmp])\n\t"
-        "#  A[0] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 56(%[tmp])\n\t"
-        "#  A[0] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[4]\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 64(%[tmp])\n\t"
-        "#  A[0] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 72(%[tmp])\n\t"
-        "#  A[0] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[5]\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 80(%[tmp])\n\t"
-        "#  A[0] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 88(%[tmp])\n\t"
-        "#  A[0] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[6]\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 96(%[tmp])\n\t"
-        "#  A[0] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 104(%[tmp])\n\t"
-        "#  A[0] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[7]\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 112(%[tmp])\n\t"
-        "#  A[0] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 120(%[tmp])\n\t"
-        "#  A[0] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[8]\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 128(%[tmp])\n\t"
-        "#  A[0] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 136(%[tmp])\n\t"
-        "#  A[0] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[9]\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 144(%[tmp])\n\t"
-        "#  A[0] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 152(%[tmp])\n\t"
-        "#  A[0] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[10]\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 160(%[tmp])\n\t"
-        "#  A[0] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 168(%[tmp])\n\t"
-        "#  A[0] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[11]\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 176(%[tmp])\n\t"
-        "#  A[0] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[1] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 184(%[tmp])\n\t"
-        "#  A[1] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[2] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[3] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[12]\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 192(%[r])\n\t"
-        "#  A[2] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[3] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[4] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 200(%[r])\n\t"
-        "#  A[3] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[4] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[5] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[13]\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 208(%[r])\n\t"
-        "#  A[4] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	32(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[5] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[6] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 216(%[r])\n\t"
-        "#  A[5] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	40(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[6] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[7] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[14]\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 224(%[r])\n\t"
-        "#  A[6] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	48(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[7] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[8] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 232(%[r])\n\t"
-        "#  A[7] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	56(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[8] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[9] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[15]\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 240(%[r])\n\t"
-        "#  A[8] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	64(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[9] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[10] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 248(%[r])\n\t"
-        "#  A[9] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	72(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[10] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[11] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[16] * A[16]\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 256(%[r])\n\t"
-        "#  A[10] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	80(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[11] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[12] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[16] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 264(%[r])\n\t"
-        "#  A[11] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	88(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[12] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[13] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[16] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[17] * A[17]\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 272(%[r])\n\t"
-        "#  A[12] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	96(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[13] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[14] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[16] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[17] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 280(%[r])\n\t"
-        "#  A[13] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	104(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[14] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[15] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[16] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[17] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[18] * A[18]\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 288(%[r])\n\t"
-        "#  A[14] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	112(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[15] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[16] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[17] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[18] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 296(%[r])\n\t"
-        "#  A[15] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	120(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[16] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[17] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[18] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[19] * A[19]\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 304(%[r])\n\t"
-        "#  A[16] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	128(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[17] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[18] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[19] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%rcx\n\t"
-        "adcq	%%r11, %%r8\n\t"
-        "adcq	%%r12, %%r9\n\t"
-        "movq	%%rcx, 312(%[r])\n\t"
-        "#  A[17] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	136(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[18] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[19] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[20] * A[20]\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r8\n\t"
-        "adcq	%%r11, %%r9\n\t"
-        "adcq	%%r12, %%rcx\n\t"
-        "movq	%%r8, 320(%[r])\n\t"
-        "#  A[18] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	144(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r10\n\t"
-        "movq	%%rdx, %%r11\n\t"
-        "#  A[19] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[20] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "addq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "addq	%%r10, %%r9\n\t"
-        "adcq	%%r11, %%rcx\n\t"
-        "adcq	%%r12, %%r8\n\t"
-        "movq	%%r9, 328(%[r])\n\t"
-        "#  A[19] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	152(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "#  A[20] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "#  A[21] * A[21]\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "movq	%%rcx, 336(%[r])\n\t"
-        "#  A[20] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	160(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[21] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 344(%[r])\n\t"
-        "#  A[21] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	168(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[22] * A[22]\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 352(%[r])\n\t"
-        "#  A[22] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	176(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "movq	%%rcx, 360(%[r])\n\t"
-        "#  A[23] * A[23]\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "movq	%%r8, 368(%[r])\n\t"
-        "movq	%%r9, 376(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply a and b into r. (r = a * b)
- *
- * r   Result of multiplication.
- * a   First number to multiply.
- * b   Second number to multiply.
- */
-SP_NOINLINE static void sp_3072_mul_avx2_24(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit tmp[2*24];
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "# A[0] * B[0]\n\t"
-        "mulx	0(%[b]), %%r10, %%r11\n\t"
-        "# A[0] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "# A[0] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "movq	%%r10, 0(%[t])\n\t"
-        "movq	%%r11, 8(%[t])\n\t"
-        "movq	%%r12, 16(%[t])\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "# A[0] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "# A[0] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "# A[0] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "# A[0] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "# A[0] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "# A[0] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "# A[0] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "# A[0] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "# A[0] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "# A[0] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "# A[0] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "# A[0] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "# A[0] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "# A[0] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "# A[0] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "# A[0] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "# A[0] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "# A[0] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adcxq	%%r15, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	8(%[t]), %%r11\n\t"
-        "movq	16(%[t]), %%r12\n\t"
-        "movq	24(%[t]), %%r13\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "# A[1] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[1] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 8(%[t])\n\t"
-        "movq	%%r12, 16(%[t])\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "# A[1] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[1] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[1] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "# A[1] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[1] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[1] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[1] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[1] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[1] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[1] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[1] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[1] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[1] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[1] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[1] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[1] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	16(%[t]), %%r12\n\t"
-        "movq	24(%[t]), %%r13\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "# A[2] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[2] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 16(%[t])\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "# A[2] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[2] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[2] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "# A[2] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[2] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[2] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[2] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[2] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[2] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[2] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[2] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[2] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[2] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[2] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[2] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[2] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	24(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	24(%[t]), %%r13\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "# A[3] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[3] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 24(%[t])\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "# A[3] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[3] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[3] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "# A[3] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[3] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[3] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[3] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[3] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[3] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[3] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[3] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[3] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[3] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[3] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[3] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[3] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%rcx, %%r13\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	32(%[t]), %%r14\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "# A[4] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[4] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 32(%[t])\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "# A[4] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[4] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[4] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[4] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[4] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[4] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[4] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[4] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[4] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[4] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[4] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[4] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[4] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[4] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[4] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[4] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	40(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	40(%[t]), %%rax\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "# A[5] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[5] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 40(%[t])\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "# A[5] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[5] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[5] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[5] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[5] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[5] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[5] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[5] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[5] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[5] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[5] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[5] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "# A[5] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[5] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[5] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[5] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	48(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	48(%[t]), %%r10\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "# A[6] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[6] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 48(%[t])\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "# A[6] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[6] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[6] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[6] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[6] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[6] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[6] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[6] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[6] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[6] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[6] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[6] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "# A[6] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[6] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[6] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[6] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "adcxq	%%rcx, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	56(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	56(%[t]), %%r11\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "# A[7] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[7] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 56(%[t])\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "# A[7] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[7] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[7] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[7] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[7] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[7] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[7] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[7] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[7] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[7] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[7] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[7] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "# A[7] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[7] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[7] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[7] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	64(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	64(%[t]), %%r12\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "# A[8] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[8] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 64(%[t])\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[8] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[8] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[8] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[8] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[8] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[8] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[8] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[8] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "# A[8] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[8] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[8] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[8] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "# A[8] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[8] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[8] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[8] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	72(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	72(%[t]), %%r13\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "# A[9] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[9] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 72(%[t])\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[9] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[9] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[9] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[9] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[9] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[9] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[9] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[9] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "# A[9] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[9] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[9] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[9] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "# A[9] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[9] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[9] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[9] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%rcx, %%r13\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	80(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	80(%[t]), %%r14\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "# A[10] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[10] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 80(%[t])\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[10] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[10] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[10] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[10] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[10] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[10] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[10] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[10] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "# A[10] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[10] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[10] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[10] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "# A[10] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[10] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[10] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[10] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	88(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	88(%[t]), %%rax\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "# A[11] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[11] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 88(%[t])\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[11] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[11] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[11] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[11] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[11] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[11] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[11] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[11] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "# A[11] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[11] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[11] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[11] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "# A[11] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[11] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[11] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[11] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	96(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	96(%[t]), %%r10\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "# A[12] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[12] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 96(%[t])\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[12] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[12] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[12] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[12] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[12] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[12] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "# A[12] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[12] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "# A[12] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[12] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[12] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[12] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "# A[12] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[12] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[12] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[12] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "adcxq	%%rcx, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	104(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	104(%[t]), %%r11\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "# A[13] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[13] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 104(%[t])\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[13] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[13] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[13] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[13] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[13] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[13] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "# A[13] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[13] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "# A[13] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[13] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[13] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[13] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "# A[13] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[13] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[13] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[13] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	112(%[t]), %%r12\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "# A[14] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[14] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 112(%[t])\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[14] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[14] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[14] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[14] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[14] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[14] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "# A[14] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[14] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "# A[14] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[14] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[14] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[14] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "# A[14] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[14] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[14] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[14] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	120(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	120(%[t]), %%r13\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "# A[15] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[15] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 120(%[t])\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[15] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[15] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[15] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[15] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[15] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[15] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "# A[15] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[15] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "# A[15] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[15] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[15] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[15] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "# A[15] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[15] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[15] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[15] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%rcx, %%r13\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	128(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	128(%[t]), %%r14\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "# A[16] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[16] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[16] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[16] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 128(%[t])\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[16] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[16] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[16] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[16] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "# A[16] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[16] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[16] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[16] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "# A[16] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[16] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[16] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[16] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "# A[16] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[16] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[16] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[16] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "# A[16] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[16] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[16] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[16] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	136(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	136(%[t]), %%rax\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "# A[17] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[17] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[17] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[17] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 136(%[t])\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[17] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[17] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[17] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[17] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "# A[17] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[17] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[17] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[17] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "# A[17] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[17] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[17] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[17] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "# A[17] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[17] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[17] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[17] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "# A[17] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[17] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[17] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[17] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	144(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	144(%[t]), %%r10\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "# A[18] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[18] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[18] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[18] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 144(%[t])\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[18] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[18] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[18] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[18] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "# A[18] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[18] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[18] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[18] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "# A[18] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[18] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[18] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[18] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "# A[18] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[18] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[18] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[18] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "movq	328(%[t]), %%rax\n\t"
-        "# A[18] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[18] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[18] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[18] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r10\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "adcxq	%%rcx, %%r10\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	%%r10, 336(%[t])\n\t"
-        "movq	152(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	152(%[t]), %%r11\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "# A[19] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[19] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[19] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[19] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 152(%[t])\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[19] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[19] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[19] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[19] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "# A[19] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[19] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[19] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[19] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "# A[19] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[19] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[19] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[19] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "# A[19] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[19] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[19] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[19] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "movq	328(%[t]), %%rax\n\t"
-        "movq	336(%[t]), %%r10\n\t"
-        "# A[19] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[19] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[19] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[19] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r11\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	%%r10, 336(%[t])\n\t"
-        "movq	%%r11, 344(%[t])\n\t"
-        "movq	160(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	160(%[t]), %%r12\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "# A[20] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[20] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[20] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[20] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 160(%[t])\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "# A[20] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[20] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[20] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[20] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "# A[20] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[20] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[20] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[20] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "# A[20] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[20] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[20] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[20] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "# A[20] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[20] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[20] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[20] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	328(%[t]), %%rax\n\t"
-        "movq	336(%[t]), %%r10\n\t"
-        "movq	344(%[t]), %%r11\n\t"
-        "# A[20] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[20] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[20] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[20] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r12\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	%%r10, 336(%[t])\n\t"
-        "movq	%%r11, 344(%[t])\n\t"
-        "movq	%%r12, 352(%[t])\n\t"
-        "movq	168(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	168(%[t]), %%r13\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "# A[21] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[21] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[21] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[21] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 168(%[t])\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "# A[21] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[21] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[21] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[21] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "# A[21] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[21] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[21] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[21] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "# A[21] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[21] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[21] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[21] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "movq	328(%[t]), %%rax\n\t"
-        "# A[21] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[21] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[21] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[21] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	336(%[t]), %%r10\n\t"
-        "movq	344(%[t]), %%r11\n\t"
-        "movq	352(%[t]), %%r12\n\t"
-        "# A[21] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[21] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[21] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[21] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r13\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%rcx, %%r13\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	%%r10, 336(%[t])\n\t"
-        "movq	%%r11, 344(%[t])\n\t"
-        "movq	%%r12, 352(%[t])\n\t"
-        "movq	%%r13, 360(%[t])\n\t"
-        "movq	176(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	176(%[t]), %%r14\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "# A[22] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[22] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[22] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[22] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 176(%[t])\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "# A[22] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[22] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[22] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[22] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "# A[22] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[22] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[22] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[22] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "# A[22] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[22] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[22] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[22] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "movq	328(%[t]), %%rax\n\t"
-        "movq	336(%[t]), %%r10\n\t"
-        "# A[22] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[22] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[22] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[22] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	344(%[t]), %%r11\n\t"
-        "movq	352(%[t]), %%r12\n\t"
-        "movq	360(%[t]), %%r13\n\t"
-        "# A[22] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[22] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[22] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[22] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%r14\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, %%rcx\n\t"
-        "adoxq	%%r15, %%rcx\n\t"
-        "adcxq	%%r15, %%rcx\n\t"
-        "movq	%%r10, 336(%[t])\n\t"
-        "movq	%%r11, 344(%[t])\n\t"
-        "movq	%%r12, 352(%[t])\n\t"
-        "movq	%%r13, 360(%[t])\n\t"
-        "movq	%%r14, 368(%[t])\n\t"
-        "movq	184(%[a]), %%rdx\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "movq	184(%[t]), %%rax\n\t"
-        "movq	192(%[t]), %%r10\n\t"
-        "movq	200(%[t]), %%r11\n\t"
-        "movq	208(%[t]), %%r12\n\t"
-        "movq	216(%[t]), %%r13\n\t"
-        "# A[23] * B[0]\n\t"
-        "mulx	0(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[23] * B[1]\n\t"
-        "mulx	8(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[23] * B[2]\n\t"
-        "mulx	16(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[23] * B[3]\n\t"
-        "mulx	24(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 184(%[t])\n\t"
-        "movq	%%r10, 192(%[t])\n\t"
-        "movq	%%r11, 200(%[t])\n\t"
-        "movq	%%r12, 208(%[t])\n\t"
-        "movq	224(%[t]), %%r14\n\t"
-        "movq	232(%[t]), %%rax\n\t"
-        "movq	240(%[t]), %%r10\n\t"
-        "movq	248(%[t]), %%r11\n\t"
-        "# A[23] * B[4]\n\t"
-        "mulx	32(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[23] * B[5]\n\t"
-        "mulx	40(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[23] * B[6]\n\t"
-        "mulx	48(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[23] * B[7]\n\t"
-        "mulx	56(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 216(%[t])\n\t"
-        "movq	%%r14, 224(%[t])\n\t"
-        "movq	%%rax, 232(%[t])\n\t"
-        "movq	%%r10, 240(%[t])\n\t"
-        "movq	256(%[t]), %%r12\n\t"
-        "movq	264(%[t]), %%r13\n\t"
-        "movq	272(%[t]), %%r14\n\t"
-        "movq	280(%[t]), %%rax\n\t"
-        "# A[23] * B[8]\n\t"
-        "mulx	64(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[23] * B[9]\n\t"
-        "mulx	72(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[23] * B[10]\n\t"
-        "mulx	80(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[23] * B[11]\n\t"
-        "mulx	88(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "movq	%%r11, 248(%[t])\n\t"
-        "movq	%%r12, 256(%[t])\n\t"
-        "movq	%%r13, 264(%[t])\n\t"
-        "movq	%%r14, 272(%[t])\n\t"
-        "movq	288(%[t]), %%r10\n\t"
-        "movq	296(%[t]), %%r11\n\t"
-        "movq	304(%[t]), %%r12\n\t"
-        "movq	312(%[t]), %%r13\n\t"
-        "# A[23] * B[12]\n\t"
-        "mulx	96(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[23] * B[13]\n\t"
-        "mulx	104(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "# A[23] * B[14]\n\t"
-        "mulx	112(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[23] * B[15]\n\t"
-        "mulx	120(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "movq	%%rax, 280(%[t])\n\t"
-        "movq	%%r10, 288(%[t])\n\t"
-        "movq	%%r11, 296(%[t])\n\t"
-        "movq	%%r12, 304(%[t])\n\t"
-        "movq	320(%[t]), %%r14\n\t"
-        "movq	328(%[t]), %%rax\n\t"
-        "movq	336(%[t]), %%r10\n\t"
-        "movq	344(%[t]), %%r11\n\t"
-        "# A[23] * B[16]\n\t"
-        "mulx	128(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[23] * B[17]\n\t"
-        "mulx	136(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "# A[23] * B[18]\n\t"
-        "mulx	144(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%r9, %%r10\n\t"
-        "# A[23] * B[19]\n\t"
-        "mulx	152(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r10\n\t"
-        "adoxq	%%r9, %%r11\n\t"
-        "movq	%%r13, 312(%[t])\n\t"
-        "movq	%%r14, 320(%[t])\n\t"
-        "movq	%%rax, 328(%[t])\n\t"
-        "movq	%%r10, 336(%[t])\n\t"
-        "movq	352(%[t]), %%r12\n\t"
-        "movq	360(%[t]), %%r13\n\t"
-        "movq	368(%[t]), %%r14\n\t"
-        "# A[23] * B[20]\n\t"
-        "mulx	160(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r11\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "# A[23] * B[21]\n\t"
-        "mulx	168(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "# A[23] * B[22]\n\t"
-        "mulx	176(%[b]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "# A[23] * B[23]\n\t"
-        "mulx	184(%[b]), %%r8, %%r9\n\t"
-        "movq	%%r15, %%rax\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%rcx, %%rax\n\t"
-        "movq	%%r11, 344(%[t])\n\t"
-        "movq	%%r12, 352(%[t])\n\t"
-        "movq	%%r13, 360(%[t])\n\t"
-        "movq	%%r14, 368(%[t])\n\t"
-        "movq	%%rax, 376(%[t])\n\t"
-        :
-        : [a] "r" (a), [b] "r" (b), [t] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx",
-          "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_avx2_24(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[48];
-
-    __asm__ __volatile__ (
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 1\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "xorq	%%r13, %%r13\n\t"
-        "xorq	%%r14, %%r14\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "# A[1] x A[0]\n\t"
-        "movq	0(%[a]), %%rdx\n\t"
-        "mulxq	8(%[a]), %%r10, %%r11\n\t"
-        "# A[2] x A[0]\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[3] x A[0]\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[4] x A[0]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[5] x A[0]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 8(%[tmp])\n\t"
-        "movq	%%r11, 16(%[tmp])\n\t"
-        "movq	%%r12, 24(%[tmp])\n\t"
-        "movq	%%r13, 32(%[tmp])\n\t"
-        "movq	%%r14, 40(%[tmp])\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "# A[6] x A[0]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[7] x A[0]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[8] x A[0]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[9] x A[0]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[10] x A[0]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 48(%[tmp])\n\t"
-        "movq	%%r10, 56(%[tmp])\n\t"
-        "movq	%%r11, 64(%[tmp])\n\t"
-        "movq	%%r12, 72(%[tmp])\n\t"
-        "movq	%%r13, 80(%[tmp])\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[11] x A[0]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[12] x A[0]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[13] x A[0]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[0]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[15] x A[0]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 88(%[tmp])\n\t"
-        "movq	%%r15, 96(%[tmp])\n\t"
-        "movq	%%r10, 104(%[tmp])\n\t"
-        "movq	%%r11, 112(%[tmp])\n\t"
-        "movq	%%r12, 120(%[tmp])\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "# A[16] x A[0]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[17] x A[0]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[18] x A[0]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[19] x A[0]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[20] x A[0]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 128(%[tmp])\n\t"
-        "movq	%%r14, 136(%[tmp])\n\t"
-        "movq	%%r15, 144(%[tmp])\n\t"
-        "movq	%%r10, 152(%[tmp])\n\t"
-        "movq	%%r11, 160(%[tmp])\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "# A[21] x A[0]\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[22] x A[0]\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[23] x A[0]\n\t"
-        "mulxq	184(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 2\n\t"
-        "movq	24(%[tmp]), %%r15\n\t"
-        "movq	32(%[tmp]), %%r10\n\t"
-        "movq	40(%[tmp]), %%r11\n\t"
-        "movq	48(%[tmp]), %%r12\n\t"
-        "movq	56(%[tmp]), %%r13\n\t"
-        "movq	64(%[tmp]), %%r14\n\t"
-        "# A[2] x A[1]\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[3] x A[1]\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[4] x A[1]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[5] x A[1]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[6] x A[1]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 24(%[tmp])\n\t"
-        "movq	%%r10, 32(%[tmp])\n\t"
-        "movq	%%r11, 40(%[tmp])\n\t"
-        "movq	%%r12, 48(%[tmp])\n\t"
-        "movq	%%r13, 56(%[tmp])\n\t"
-        "movq	72(%[tmp]), %%r15\n\t"
-        "movq	80(%[tmp]), %%r10\n\t"
-        "movq	88(%[tmp]), %%r11\n\t"
-        "movq	96(%[tmp]), %%r12\n\t"
-        "movq	104(%[tmp]), %%r13\n\t"
-        "# A[7] x A[1]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[8] x A[1]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[9] x A[1]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[10] x A[1]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[11] x A[1]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 64(%[tmp])\n\t"
-        "movq	%%r15, 72(%[tmp])\n\t"
-        "movq	%%r10, 80(%[tmp])\n\t"
-        "movq	%%r11, 88(%[tmp])\n\t"
-        "movq	%%r12, 96(%[tmp])\n\t"
-        "movq	112(%[tmp]), %%r14\n\t"
-        "movq	120(%[tmp]), %%r15\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "movq	144(%[tmp]), %%r12\n\t"
-        "# A[12] x A[1]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[13] x A[1]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[14] x A[1]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[15] x A[1]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[16] x A[1]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 104(%[tmp])\n\t"
-        "movq	%%r14, 112(%[tmp])\n\t"
-        "movq	%%r15, 120(%[tmp])\n\t"
-        "movq	%%r10, 128(%[tmp])\n\t"
-        "movq	%%r11, 136(%[tmp])\n\t"
-        "movq	152(%[tmp]), %%r13\n\t"
-        "movq	160(%[tmp]), %%r14\n\t"
-        "movq	168(%[tmp]), %%r15\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "# A[17] x A[1]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[18] x A[1]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[19] x A[1]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[20] x A[1]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[21] x A[1]\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 144(%[tmp])\n\t"
-        "movq	%%r13, 152(%[tmp])\n\t"
-        "movq	%%r14, 160(%[tmp])\n\t"
-        "movq	%%r15, 168(%[tmp])\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "movq	192(%[tmp]), %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "# A[22] x A[1]\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[23] x A[1]\n\t"
-        "mulxq	184(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[23] x A[2]\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "mulxq	184(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	%%r12, 192(%[tmp])\n\t"
-        "movq	%%r13, 200(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r14\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r14, 208(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 3\n\t"
-        "movq	40(%[tmp]), %%r14\n\t"
-        "movq	48(%[tmp]), %%r15\n\t"
-        "movq	56(%[tmp]), %%r10\n\t"
-        "movq	64(%[tmp]), %%r11\n\t"
-        "movq	72(%[tmp]), %%r12\n\t"
-        "movq	80(%[tmp]), %%r13\n\t"
-        "# A[3] x A[2]\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[4] x A[2]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[5] x A[2]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[6] x A[2]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[7] x A[2]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 40(%[tmp])\n\t"
-        "movq	%%r15, 48(%[tmp])\n\t"
-        "movq	%%r10, 56(%[tmp])\n\t"
-        "movq	%%r11, 64(%[tmp])\n\t"
-        "movq	%%r12, 72(%[tmp])\n\t"
-        "movq	88(%[tmp]), %%r14\n\t"
-        "movq	96(%[tmp]), %%r15\n\t"
-        "movq	104(%[tmp]), %%r10\n\t"
-        "movq	112(%[tmp]), %%r11\n\t"
-        "movq	120(%[tmp]), %%r12\n\t"
-        "# A[8] x A[2]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[9] x A[2]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[10] x A[2]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[11] x A[2]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[12] x A[2]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 80(%[tmp])\n\t"
-        "movq	%%r14, 88(%[tmp])\n\t"
-        "movq	%%r15, 96(%[tmp])\n\t"
-        "movq	%%r10, 104(%[tmp])\n\t"
-        "movq	%%r11, 112(%[tmp])\n\t"
-        "movq	128(%[tmp]), %%r13\n\t"
-        "movq	136(%[tmp]), %%r14\n\t"
-        "movq	144(%[tmp]), %%r15\n\t"
-        "movq	152(%[tmp]), %%r10\n\t"
-        "movq	160(%[tmp]), %%r11\n\t"
-        "# A[13] x A[2]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[14] x A[2]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[2]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[16] x A[2]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[17] x A[2]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 120(%[tmp])\n\t"
-        "movq	%%r13, 128(%[tmp])\n\t"
-        "movq	%%r14, 136(%[tmp])\n\t"
-        "movq	%%r15, 144(%[tmp])\n\t"
-        "movq	%%r10, 152(%[tmp])\n\t"
-        "movq	168(%[tmp]), %%r12\n\t"
-        "movq	176(%[tmp]), %%r13\n\t"
-        "movq	184(%[tmp]), %%r14\n\t"
-        "movq	192(%[tmp]), %%r15\n\t"
-        "movq	200(%[tmp]), %%r10\n\t"
-        "# A[18] x A[2]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[19] x A[2]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[20] x A[2]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[21] x A[2]\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[22] x A[2]\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 160(%[tmp])\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "movq	208(%[tmp]), %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[22] x A[3]\n\t"
-        "movq	176(%[a]), %%rdx\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[22] x A[4]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[22] x A[5]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r10, 200(%[tmp])\n\t"
-        "movq	%%r11, 208(%[tmp])\n\t"
-        "movq	%%r12, 216(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r13\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r13, 224(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 4\n\t"
-        "movq	56(%[tmp]), %%r13\n\t"
-        "movq	64(%[tmp]), %%r14\n\t"
-        "movq	72(%[tmp]), %%r15\n\t"
-        "movq	80(%[tmp]), %%r10\n\t"
-        "movq	88(%[tmp]), %%r11\n\t"
-        "movq	96(%[tmp]), %%r12\n\t"
-        "# A[4] x A[3]\n\t"
-        "movq	24(%[a]), %%rdx\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[5] x A[3]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[6] x A[3]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[7] x A[3]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[8] x A[3]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 56(%[tmp])\n\t"
-        "movq	%%r14, 64(%[tmp])\n\t"
-        "movq	%%r15, 72(%[tmp])\n\t"
-        "movq	%%r10, 80(%[tmp])\n\t"
-        "movq	%%r11, 88(%[tmp])\n\t"
-        "movq	104(%[tmp]), %%r13\n\t"
-        "movq	112(%[tmp]), %%r14\n\t"
-        "movq	120(%[tmp]), %%r15\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "# A[9] x A[3]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[10] x A[3]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[11] x A[3]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[12] x A[3]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[13] x A[3]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 96(%[tmp])\n\t"
-        "movq	%%r13, 104(%[tmp])\n\t"
-        "movq	%%r14, 112(%[tmp])\n\t"
-        "movq	%%r15, 120(%[tmp])\n\t"
-        "movq	%%r10, 128(%[tmp])\n\t"
-        "movq	144(%[tmp]), %%r12\n\t"
-        "movq	152(%[tmp]), %%r13\n\t"
-        "movq	160(%[tmp]), %%r14\n\t"
-        "movq	168(%[tmp]), %%r15\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "# A[14] x A[3]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[15] x A[3]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[16] x A[3]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[17] x A[3]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[18] x A[3]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 136(%[tmp])\n\t"
-        "movq	%%r12, 144(%[tmp])\n\t"
-        "movq	%%r13, 152(%[tmp])\n\t"
-        "movq	%%r14, 160(%[tmp])\n\t"
-        "movq	%%r15, 168(%[tmp])\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "movq	192(%[tmp]), %%r12\n\t"
-        "movq	200(%[tmp]), %%r13\n\t"
-        "movq	208(%[tmp]), %%r14\n\t"
-        "movq	216(%[tmp]), %%r15\n\t"
-        "# A[19] x A[3]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[20] x A[3]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[21] x A[3]\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[21] x A[4]\n\t"
-        "movq	168(%[a]), %%rdx\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[21] x A[5]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	%%r12, 192(%[tmp])\n\t"
-        "movq	%%r13, 200(%[tmp])\n\t"
-        "movq	%%r14, 208(%[tmp])\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "# A[21] x A[6]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[21] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[21] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, 216(%[tmp])\n\t"
-        "movq	%%r10, 224(%[tmp])\n\t"
-        "movq	%%r11, 232(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r12\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r12, 240(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 5\n\t"
-        "movq	72(%[tmp]), %%r12\n\t"
-        "movq	80(%[tmp]), %%r13\n\t"
-        "movq	88(%[tmp]), %%r14\n\t"
-        "movq	96(%[tmp]), %%r15\n\t"
-        "movq	104(%[tmp]), %%r10\n\t"
-        "movq	112(%[tmp]), %%r11\n\t"
-        "# A[5] x A[4]\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[6] x A[4]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[7] x A[4]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[8] x A[4]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[9] x A[4]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 72(%[tmp])\n\t"
-        "movq	%%r13, 80(%[tmp])\n\t"
-        "movq	%%r14, 88(%[tmp])\n\t"
-        "movq	%%r15, 96(%[tmp])\n\t"
-        "movq	%%r10, 104(%[tmp])\n\t"
-        "movq	120(%[tmp]), %%r12\n\t"
-        "movq	128(%[tmp]), %%r13\n\t"
-        "movq	136(%[tmp]), %%r14\n\t"
-        "movq	144(%[tmp]), %%r15\n\t"
-        "movq	152(%[tmp]), %%r10\n\t"
-        "# A[10] x A[4]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[11] x A[4]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[12] x A[4]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[13] x A[4]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[14] x A[4]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 112(%[tmp])\n\t"
-        "movq	%%r12, 120(%[tmp])\n\t"
-        "movq	%%r13, 128(%[tmp])\n\t"
-        "movq	%%r14, 136(%[tmp])\n\t"
-        "movq	%%r15, 144(%[tmp])\n\t"
-        "movq	160(%[tmp]), %%r11\n\t"
-        "movq	168(%[tmp]), %%r12\n\t"
-        "movq	176(%[tmp]), %%r13\n\t"
-        "movq	184(%[tmp]), %%r14\n\t"
-        "movq	192(%[tmp]), %%r15\n\t"
-        "# A[15] x A[4]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[16] x A[4]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[17] x A[4]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[18] x A[4]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[19] x A[4]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 152(%[tmp])\n\t"
-        "movq	%%r11, 160(%[tmp])\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "movq	200(%[tmp]), %%r10\n\t"
-        "movq	208(%[tmp]), %%r11\n\t"
-        "movq	216(%[tmp]), %%r12\n\t"
-        "movq	224(%[tmp]), %%r13\n\t"
-        "movq	232(%[tmp]), %%r14\n\t"
-        "# A[20] x A[4]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[20] x A[5]\n\t"
-        "movq	160(%[a]), %%rdx\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[20] x A[6]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[20] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[20] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "movq	%%r10, 200(%[tmp])\n\t"
-        "movq	%%r11, 208(%[tmp])\n\t"
-        "movq	%%r12, 216(%[tmp])\n\t"
-        "movq	%%r13, 224(%[tmp])\n\t"
-        "movq	240(%[tmp]), %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "# A[20] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[20] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[20] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r14, 232(%[tmp])\n\t"
-        "movq	%%r15, 240(%[tmp])\n\t"
-        "movq	%%r10, 248(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r11\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r11, 256(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 6\n\t"
-        "movq	88(%[tmp]), %%r11\n\t"
-        "movq	96(%[tmp]), %%r12\n\t"
-        "movq	104(%[tmp]), %%r13\n\t"
-        "movq	112(%[tmp]), %%r14\n\t"
-        "movq	120(%[tmp]), %%r15\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "# A[6] x A[5]\n\t"
-        "movq	40(%[a]), %%rdx\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[7] x A[5]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[8] x A[5]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[9] x A[5]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[10] x A[5]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 88(%[tmp])\n\t"
-        "movq	%%r12, 96(%[tmp])\n\t"
-        "movq	%%r13, 104(%[tmp])\n\t"
-        "movq	%%r14, 112(%[tmp])\n\t"
-        "movq	%%r15, 120(%[tmp])\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "movq	144(%[tmp]), %%r12\n\t"
-        "movq	152(%[tmp]), %%r13\n\t"
-        "movq	160(%[tmp]), %%r14\n\t"
-        "movq	168(%[tmp]), %%r15\n\t"
-        "# A[11] x A[5]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[12] x A[5]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[13] x A[5]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[14] x A[5]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[5]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 128(%[tmp])\n\t"
-        "movq	%%r11, 136(%[tmp])\n\t"
-        "movq	%%r12, 144(%[tmp])\n\t"
-        "movq	%%r13, 152(%[tmp])\n\t"
-        "movq	%%r14, 160(%[tmp])\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "movq	192(%[tmp]), %%r12\n\t"
-        "movq	200(%[tmp]), %%r13\n\t"
-        "movq	208(%[tmp]), %%r14\n\t"
-        "# A[16] x A[5]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[17] x A[5]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[18] x A[5]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[19] x A[5]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[19] x A[6]\n\t"
-        "movq	152(%[a]), %%rdx\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 168(%[tmp])\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	%%r12, 192(%[tmp])\n\t"
-        "movq	%%r13, 200(%[tmp])\n\t"
-        "movq	216(%[tmp]), %%r15\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "movq	232(%[tmp]), %%r11\n\t"
-        "movq	240(%[tmp]), %%r12\n\t"
-        "movq	248(%[tmp]), %%r13\n\t"
-        "# A[19] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[19] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[19] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[19] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[19] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 208(%[tmp])\n\t"
-        "movq	%%r15, 216(%[tmp])\n\t"
-        "movq	%%r10, 224(%[tmp])\n\t"
-        "movq	%%r11, 232(%[tmp])\n\t"
-        "movq	%%r12, 240(%[tmp])\n\t"
-        "movq	256(%[tmp]), %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "# A[19] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[19] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[19] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r13, 248(%[tmp])\n\t"
-        "movq	%%r14, 256(%[tmp])\n\t"
-        "movq	%%r15, 264(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r10\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r10, 272(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 7\n\t"
-        "movq	104(%[tmp]), %%r10\n\t"
-        "movq	112(%[tmp]), %%r11\n\t"
-        "movq	120(%[tmp]), %%r12\n\t"
-        "movq	128(%[tmp]), %%r13\n\t"
-        "movq	136(%[tmp]), %%r14\n\t"
-        "movq	144(%[tmp]), %%r15\n\t"
-        "# A[7] x A[6]\n\t"
-        "movq	48(%[a]), %%rdx\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[8] x A[6]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[9] x A[6]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[10] x A[6]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[11] x A[6]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 104(%[tmp])\n\t"
-        "movq	%%r11, 112(%[tmp])\n\t"
-        "movq	%%r12, 120(%[tmp])\n\t"
-        "movq	%%r13, 128(%[tmp])\n\t"
-        "movq	%%r14, 136(%[tmp])\n\t"
-        "movq	152(%[tmp]), %%r10\n\t"
-        "movq	160(%[tmp]), %%r11\n\t"
-        "movq	168(%[tmp]), %%r12\n\t"
-        "movq	176(%[tmp]), %%r13\n\t"
-        "movq	184(%[tmp]), %%r14\n\t"
-        "# A[12] x A[6]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[13] x A[6]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[6]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[15] x A[6]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[16] x A[6]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 144(%[tmp])\n\t"
-        "movq	%%r10, 152(%[tmp])\n\t"
-        "movq	%%r11, 160(%[tmp])\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	192(%[tmp]), %%r15\n\t"
-        "movq	200(%[tmp]), %%r10\n\t"
-        "movq	208(%[tmp]), %%r11\n\t"
-        "movq	216(%[tmp]), %%r12\n\t"
-        "movq	224(%[tmp]), %%r13\n\t"
-        "# A[17] x A[6]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[18] x A[6]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[18] x A[7]\n\t"
-        "movq	144(%[a]), %%rdx\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[18] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[18] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "movq	%%r10, 200(%[tmp])\n\t"
-        "movq	%%r11, 208(%[tmp])\n\t"
-        "movq	%%r12, 216(%[tmp])\n\t"
-        "movq	232(%[tmp]), %%r14\n\t"
-        "movq	240(%[tmp]), %%r15\n\t"
-        "movq	248(%[tmp]), %%r10\n\t"
-        "movq	256(%[tmp]), %%r11\n\t"
-        "movq	264(%[tmp]), %%r12\n\t"
-        "# A[18] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[18] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[18] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[18] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[18] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 224(%[tmp])\n\t"
-        "movq	%%r14, 232(%[tmp])\n\t"
-        "movq	%%r15, 240(%[tmp])\n\t"
-        "movq	%%r10, 248(%[tmp])\n\t"
-        "movq	%%r11, 256(%[tmp])\n\t"
-        "movq	272(%[tmp]), %%r13\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "# A[18] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[18] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[18] x A[17]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r12, 264(%[tmp])\n\t"
-        "movq	%%r13, 272(%[tmp])\n\t"
-        "movq	%%r14, 280(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r15\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r15, 288(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 8\n\t"
-        "movq	120(%[tmp]), %%r15\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "movq	144(%[tmp]), %%r12\n\t"
-        "movq	152(%[tmp]), %%r13\n\t"
-        "movq	160(%[tmp]), %%r14\n\t"
-        "# A[8] x A[7]\n\t"
-        "movq	56(%[a]), %%rdx\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[9] x A[7]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[10] x A[7]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[11] x A[7]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[12] x A[7]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 120(%[tmp])\n\t"
-        "movq	%%r10, 128(%[tmp])\n\t"
-        "movq	%%r11, 136(%[tmp])\n\t"
-        "movq	%%r12, 144(%[tmp])\n\t"
-        "movq	%%r13, 152(%[tmp])\n\t"
-        "movq	168(%[tmp]), %%r15\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "movq	192(%[tmp]), %%r12\n\t"
-        "movq	200(%[tmp]), %%r13\n\t"
-        "# A[13] x A[7]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[14] x A[7]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[15] x A[7]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[16] x A[7]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[17] x A[7]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 160(%[tmp])\n\t"
-        "movq	%%r15, 168(%[tmp])\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	%%r12, 192(%[tmp])\n\t"
-        "movq	208(%[tmp]), %%r14\n\t"
-        "movq	216(%[tmp]), %%r15\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "movq	232(%[tmp]), %%r11\n\t"
-        "movq	240(%[tmp]), %%r12\n\t"
-        "# A[17] x A[8]\n\t"
-        "movq	136(%[a]), %%rdx\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[17] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[17] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[17] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[17] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 200(%[tmp])\n\t"
-        "movq	%%r14, 208(%[tmp])\n\t"
-        "movq	%%r15, 216(%[tmp])\n\t"
-        "movq	%%r10, 224(%[tmp])\n\t"
-        "movq	%%r11, 232(%[tmp])\n\t"
-        "movq	248(%[tmp]), %%r13\n\t"
-        "movq	256(%[tmp]), %%r14\n\t"
-        "movq	264(%[tmp]), %%r15\n\t"
-        "movq	272(%[tmp]), %%r10\n\t"
-        "movq	280(%[tmp]), %%r11\n\t"
-        "# A[17] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[17] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[17] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[17] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[19] x A[15]\n\t"
-        "movq	152(%[a]), %%rdx\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 240(%[tmp])\n\t"
-        "movq	%%r13, 248(%[tmp])\n\t"
-        "movq	%%r14, 256(%[tmp])\n\t"
-        "movq	%%r15, 264(%[tmp])\n\t"
-        "movq	%%r10, 272(%[tmp])\n\t"
-        "movq	288(%[tmp]), %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "movq	%%r8, %%r14\n\t"
-        "# A[19] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[19] x A[17]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[19] x A[18]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r11, 280(%[tmp])\n\t"
-        "movq	%%r12, 288(%[tmp])\n\t"
-        "movq	%%r13, 296(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r14\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r14, 304(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 9\n\t"
-        "movq	136(%[tmp]), %%r14\n\t"
-        "movq	144(%[tmp]), %%r15\n\t"
-        "movq	152(%[tmp]), %%r10\n\t"
-        "movq	160(%[tmp]), %%r11\n\t"
-        "movq	168(%[tmp]), %%r12\n\t"
-        "movq	176(%[tmp]), %%r13\n\t"
-        "# A[9] x A[8]\n\t"
-        "movq	64(%[a]), %%rdx\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[10] x A[8]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[11] x A[8]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[12] x A[8]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[13] x A[8]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 136(%[tmp])\n\t"
-        "movq	%%r15, 144(%[tmp])\n\t"
-        "movq	%%r10, 152(%[tmp])\n\t"
-        "movq	%%r11, 160(%[tmp])\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	184(%[tmp]), %%r14\n\t"
-        "movq	192(%[tmp]), %%r15\n\t"
-        "movq	200(%[tmp]), %%r10\n\t"
-        "movq	208(%[tmp]), %%r11\n\t"
-        "movq	216(%[tmp]), %%r12\n\t"
-        "# A[14] x A[8]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[8]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[16] x A[8]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[16] x A[9]\n\t"
-        "movq	128(%[a]), %%rdx\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[16] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "movq	%%r10, 200(%[tmp])\n\t"
-        "movq	%%r11, 208(%[tmp])\n\t"
-        "movq	224(%[tmp]), %%r13\n\t"
-        "movq	232(%[tmp]), %%r14\n\t"
-        "movq	240(%[tmp]), %%r15\n\t"
-        "movq	248(%[tmp]), %%r10\n\t"
-        "movq	256(%[tmp]), %%r11\n\t"
-        "# A[16] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[16] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[16] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[16] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[16] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 216(%[tmp])\n\t"
-        "movq	%%r13, 224(%[tmp])\n\t"
-        "movq	%%r14, 232(%[tmp])\n\t"
-        "movq	%%r15, 240(%[tmp])\n\t"
-        "movq	%%r10, 248(%[tmp])\n\t"
-        "movq	264(%[tmp]), %%r12\n\t"
-        "movq	272(%[tmp]), %%r13\n\t"
-        "movq	280(%[tmp]), %%r14\n\t"
-        "movq	288(%[tmp]), %%r15\n\t"
-        "movq	296(%[tmp]), %%r10\n\t"
-        "# A[20] x A[12]\n\t"
-        "movq	160(%[a]), %%rdx\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[20] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[20] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[20] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[20] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 256(%[tmp])\n\t"
-        "movq	%%r12, 264(%[tmp])\n\t"
-        "movq	%%r13, 272(%[tmp])\n\t"
-        "movq	%%r14, 280(%[tmp])\n\t"
-        "movq	%%r15, 288(%[tmp])\n\t"
-        "movq	304(%[tmp]), %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "movq	%%r8, %%r13\n\t"
-        "# A[20] x A[17]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[20] x A[18]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[20] x A[19]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r10, 296(%[tmp])\n\t"
-        "movq	%%r11, 304(%[tmp])\n\t"
-        "movq	%%r12, 312(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r13\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r13, 320(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 10\n\t"
-        "movq	152(%[tmp]), %%r13\n\t"
-        "movq	160(%[tmp]), %%r14\n\t"
-        "movq	168(%[tmp]), %%r15\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "movq	192(%[tmp]), %%r12\n\t"
-        "# A[10] x A[9]\n\t"
-        "movq	72(%[a]), %%rdx\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[11] x A[9]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[12] x A[9]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[13] x A[9]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[14] x A[9]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r13, 152(%[tmp])\n\t"
-        "movq	%%r14, 160(%[tmp])\n\t"
-        "movq	%%r15, 168(%[tmp])\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	200(%[tmp]), %%r13\n\t"
-        "movq	208(%[tmp]), %%r14\n\t"
-        "movq	216(%[tmp]), %%r15\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "movq	232(%[tmp]), %%r11\n\t"
-        "# A[15] x A[9]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[15] x A[10]\n\t"
-        "movq	120(%[a]), %%rdx\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[15] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[15] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[15] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 192(%[tmp])\n\t"
-        "movq	%%r13, 200(%[tmp])\n\t"
-        "movq	%%r14, 208(%[tmp])\n\t"
-        "movq	%%r15, 216(%[tmp])\n\t"
-        "movq	%%r10, 224(%[tmp])\n\t"
-        "movq	240(%[tmp]), %%r12\n\t"
-        "movq	248(%[tmp]), %%r13\n\t"
-        "movq	256(%[tmp]), %%r14\n\t"
-        "movq	264(%[tmp]), %%r15\n\t"
-        "movq	272(%[tmp]), %%r10\n\t"
-        "# A[15] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[21] x A[9]\n\t"
-        "movq	168(%[a]), %%rdx\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[21] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[21] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[21] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 232(%[tmp])\n\t"
-        "movq	%%r12, 240(%[tmp])\n\t"
-        "movq	%%r13, 248(%[tmp])\n\t"
-        "movq	%%r14, 256(%[tmp])\n\t"
-        "movq	%%r15, 264(%[tmp])\n\t"
-        "movq	280(%[tmp]), %%r11\n\t"
-        "movq	288(%[tmp]), %%r12\n\t"
-        "movq	296(%[tmp]), %%r13\n\t"
-        "movq	304(%[tmp]), %%r14\n\t"
-        "movq	312(%[tmp]), %%r15\n\t"
-        "# A[21] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[21] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[21] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[21] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[21] x A[17]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 272(%[tmp])\n\t"
-        "movq	%%r11, 280(%[tmp])\n\t"
-        "movq	%%r12, 288(%[tmp])\n\t"
-        "movq	%%r13, 296(%[tmp])\n\t"
-        "movq	%%r14, 304(%[tmp])\n\t"
-        "movq	320(%[tmp]), %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "movq	%%r8, %%r12\n\t"
-        "# A[21] x A[18]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[21] x A[19]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[21] x A[20]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "movq	%%r15, 312(%[tmp])\n\t"
-        "movq	%%r10, 320(%[tmp])\n\t"
-        "movq	%%r11, 328(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r12\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r12, 336(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 11\n\t"
-        "movq	168(%[tmp]), %%r12\n\t"
-        "movq	176(%[tmp]), %%r13\n\t"
-        "movq	184(%[tmp]), %%r14\n\t"
-        "movq	192(%[tmp]), %%r15\n\t"
-        "movq	200(%[tmp]), %%r10\n\t"
-        "movq	208(%[tmp]), %%r11\n\t"
-        "# A[11] x A[10]\n\t"
-        "movq	80(%[a]), %%rdx\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[12] x A[10]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[13] x A[10]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[14] x A[10]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[14] x A[11]\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r12, 168(%[tmp])\n\t"
-        "movq	%%r13, 176(%[tmp])\n\t"
-        "movq	%%r14, 184(%[tmp])\n\t"
-        "movq	%%r15, 192(%[tmp])\n\t"
-        "movq	%%r10, 200(%[tmp])\n\t"
-        "movq	216(%[tmp]), %%r12\n\t"
-        "movq	224(%[tmp]), %%r13\n\t"
-        "movq	232(%[tmp]), %%r14\n\t"
-        "movq	240(%[tmp]), %%r15\n\t"
-        "movq	248(%[tmp]), %%r10\n\t"
-        "# A[14] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[14] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[22] x A[6]\n\t"
-        "movq	176(%[a]), %%rdx\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[22] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[22] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 208(%[tmp])\n\t"
-        "movq	%%r12, 216(%[tmp])\n\t"
-        "movq	%%r13, 224(%[tmp])\n\t"
-        "movq	%%r14, 232(%[tmp])\n\t"
-        "movq	%%r15, 240(%[tmp])\n\t"
-        "movq	256(%[tmp]), %%r11\n\t"
-        "movq	264(%[tmp]), %%r12\n\t"
-        "movq	272(%[tmp]), %%r13\n\t"
-        "movq	280(%[tmp]), %%r14\n\t"
-        "movq	288(%[tmp]), %%r15\n\t"
-        "# A[22] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[22] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[22] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[22] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[22] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 248(%[tmp])\n\t"
-        "movq	%%r11, 256(%[tmp])\n\t"
-        "movq	%%r12, 264(%[tmp])\n\t"
-        "movq	%%r13, 272(%[tmp])\n\t"
-        "movq	%%r14, 280(%[tmp])\n\t"
-        "movq	296(%[tmp]), %%r10\n\t"
-        "movq	304(%[tmp]), %%r11\n\t"
-        "movq	312(%[tmp]), %%r12\n\t"
-        "movq	320(%[tmp]), %%r13\n\t"
-        "movq	328(%[tmp]), %%r14\n\t"
-        "# A[22] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[22] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[22] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[22] x A[17]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[22] x A[18]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 288(%[tmp])\n\t"
-        "movq	%%r10, 296(%[tmp])\n\t"
-        "movq	%%r11, 304(%[tmp])\n\t"
-        "movq	%%r12, 312(%[tmp])\n\t"
-        "movq	%%r13, 320(%[tmp])\n\t"
-        "movq	336(%[tmp]), %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "movq	%%r8, %%r11\n\t"
-        "# A[22] x A[19]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[22] x A[20]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[22] x A[21]\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "movq	%%r14, 328(%[tmp])\n\t"
-        "movq	%%r15, 336(%[tmp])\n\t"
-        "movq	%%r10, 344(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r11\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r11, 352(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Diagonal 12\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "movq	192(%[tmp]), %%r12\n\t"
-        "movq	200(%[tmp]), %%r13\n\t"
-        "movq	208(%[tmp]), %%r14\n\t"
-        "movq	216(%[tmp]), %%r15\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "# A[12] x A[11]\n\t"
-        "movq	88(%[a]), %%rdx\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[13] x A[11]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[13] x A[12]\n\t"
-        "movq	96(%[a]), %%rdx\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[23] x A[3]\n\t"
-        "movq	184(%[a]), %%rdx\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[23] x A[4]\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	%%r12, 192(%[tmp])\n\t"
-        "movq	%%r13, 200(%[tmp])\n\t"
-        "movq	%%r14, 208(%[tmp])\n\t"
-        "movq	%%r15, 216(%[tmp])\n\t"
-        "movq	232(%[tmp]), %%r11\n\t"
-        "movq	240(%[tmp]), %%r12\n\t"
-        "movq	248(%[tmp]), %%r13\n\t"
-        "movq	256(%[tmp]), %%r14\n\t"
-        "movq	264(%[tmp]), %%r15\n\t"
-        "# A[23] x A[5]\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[23] x A[6]\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[23] x A[7]\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[23] x A[8]\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[23] x A[9]\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "movq	%%r10, 224(%[tmp])\n\t"
-        "movq	%%r11, 232(%[tmp])\n\t"
-        "movq	%%r12, 240(%[tmp])\n\t"
-        "movq	%%r13, 248(%[tmp])\n\t"
-        "movq	%%r14, 256(%[tmp])\n\t"
-        "movq	272(%[tmp]), %%r10\n\t"
-        "movq	280(%[tmp]), %%r11\n\t"
-        "movq	288(%[tmp]), %%r12\n\t"
-        "movq	296(%[tmp]), %%r13\n\t"
-        "movq	304(%[tmp]), %%r14\n\t"
-        "# A[23] x A[10]\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[23] x A[11]\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[23] x A[12]\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[23] x A[13]\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "# A[23] x A[14]\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "movq	%%r15, 264(%[tmp])\n\t"
-        "movq	%%r10, 272(%[tmp])\n\t"
-        "movq	%%r11, 280(%[tmp])\n\t"
-        "movq	%%r12, 288(%[tmp])\n\t"
-        "movq	%%r13, 296(%[tmp])\n\t"
-        "movq	312(%[tmp]), %%r15\n\t"
-        "movq	320(%[tmp]), %%r10\n\t"
-        "movq	328(%[tmp]), %%r11\n\t"
-        "movq	336(%[tmp]), %%r12\n\t"
-        "movq	344(%[tmp]), %%r13\n\t"
-        "# A[23] x A[15]\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[23] x A[16]\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "# A[23] x A[17]\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%rcx, %%r11\n\t"
-        "# A[23] x A[18]\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "# A[23] x A[19]\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "movq	%%r14, 304(%[tmp])\n\t"
-        "movq	%%r15, 312(%[tmp])\n\t"
-        "movq	%%r10, 320(%[tmp])\n\t"
-        "movq	%%r11, 328(%[tmp])\n\t"
-        "movq	%%r12, 336(%[tmp])\n\t"
-        "movq	352(%[tmp]), %%r14\n\t"
-        "movq	%%r8, %%r15\n\t"
-        "movq	%%r8, %%r10\n\t"
-        "# A[23] x A[20]\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "# A[23] x A[21]\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r14\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "# A[23] x A[22]\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "adcxq	%%rax, %%r15\n\t"
-        "adoxq	%%rcx, %%r10\n\t"
-        "movq	%%r13, 344(%[tmp])\n\t"
-        "movq	%%r14, 352(%[tmp])\n\t"
-        "movq	%%r15, 360(%[tmp])\n\t"
-        "#  Carry\n\t"
-        "adcxq	%%r9, %%r10\n\t"
-        "movq	%%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r9\n\t"
-        "adoxq	%%r8, %%r9\n\t"
-        "movq	%%r10, 368(%[tmp])\n\t"
-        "movq	%%r9, 376(%[tmp])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "# Double and Add in A[i] x A[i]\n\t"
-        "movq	8(%[tmp]), %%r11\n\t"
-        "# A[0] x A[0]\n\t"
-        "movq	0(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "movq	%%rax, 0(%[tmp])\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r11, 8(%[tmp])\n\t"
-        "movq	16(%[tmp]), %%r10\n\t"
-        "movq	24(%[tmp]), %%r11\n\t"
-        "# A[1] x A[1]\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 16(%[tmp])\n\t"
-        "movq	%%r11, 24(%[tmp])\n\t"
-        "movq	32(%[tmp]), %%r10\n\t"
-        "movq	40(%[tmp]), %%r11\n\t"
-        "# A[2] x A[2]\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 32(%[tmp])\n\t"
-        "movq	%%r11, 40(%[tmp])\n\t"
-        "movq	48(%[tmp]), %%r10\n\t"
-        "movq	56(%[tmp]), %%r11\n\t"
-        "# A[3] x A[3]\n\t"
-        "movq	24(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 48(%[tmp])\n\t"
-        "movq	%%r11, 56(%[tmp])\n\t"
-        "movq	64(%[tmp]), %%r10\n\t"
-        "movq	72(%[tmp]), %%r11\n\t"
-        "# A[4] x A[4]\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 64(%[tmp])\n\t"
-        "movq	%%r11, 72(%[tmp])\n\t"
-        "movq	80(%[tmp]), %%r10\n\t"
-        "movq	88(%[tmp]), %%r11\n\t"
-        "# A[5] x A[5]\n\t"
-        "movq	40(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 80(%[tmp])\n\t"
-        "movq	%%r11, 88(%[tmp])\n\t"
-        "movq	96(%[tmp]), %%r10\n\t"
-        "movq	104(%[tmp]), %%r11\n\t"
-        "# A[6] x A[6]\n\t"
-        "movq	48(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 96(%[tmp])\n\t"
-        "movq	%%r11, 104(%[tmp])\n\t"
-        "movq	112(%[tmp]), %%r10\n\t"
-        "movq	120(%[tmp]), %%r11\n\t"
-        "# A[7] x A[7]\n\t"
-        "movq	56(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 112(%[tmp])\n\t"
-        "movq	%%r11, 120(%[tmp])\n\t"
-        "movq	128(%[tmp]), %%r10\n\t"
-        "movq	136(%[tmp]), %%r11\n\t"
-        "# A[8] x A[8]\n\t"
-        "movq	64(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 128(%[tmp])\n\t"
-        "movq	%%r11, 136(%[tmp])\n\t"
-        "movq	144(%[tmp]), %%r10\n\t"
-        "movq	152(%[tmp]), %%r11\n\t"
-        "# A[9] x A[9]\n\t"
-        "movq	72(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 144(%[tmp])\n\t"
-        "movq	%%r11, 152(%[tmp])\n\t"
-        "movq	160(%[tmp]), %%r10\n\t"
-        "movq	168(%[tmp]), %%r11\n\t"
-        "# A[10] x A[10]\n\t"
-        "movq	80(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 160(%[tmp])\n\t"
-        "movq	%%r11, 168(%[tmp])\n\t"
-        "movq	176(%[tmp]), %%r10\n\t"
-        "movq	184(%[tmp]), %%r11\n\t"
-        "# A[11] x A[11]\n\t"
-        "movq	88(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 176(%[tmp])\n\t"
-        "movq	%%r11, 184(%[tmp])\n\t"
-        "movq	192(%[tmp]), %%r10\n\t"
-        "movq	200(%[tmp]), %%r11\n\t"
-        "# A[12] x A[12]\n\t"
-        "movq	96(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 192(%[r])\n\t"
-        "movq	%%r11, 200(%[r])\n\t"
-        "movq	208(%[tmp]), %%r10\n\t"
-        "movq	216(%[tmp]), %%r11\n\t"
-        "# A[13] x A[13]\n\t"
-        "movq	104(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 208(%[r])\n\t"
-        "movq	%%r11, 216(%[r])\n\t"
-        "movq	224(%[tmp]), %%r10\n\t"
-        "movq	232(%[tmp]), %%r11\n\t"
-        "# A[14] x A[14]\n\t"
-        "movq	112(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 224(%[r])\n\t"
-        "movq	%%r11, 232(%[r])\n\t"
-        "movq	240(%[tmp]), %%r10\n\t"
-        "movq	248(%[tmp]), %%r11\n\t"
-        "# A[15] x A[15]\n\t"
-        "movq	120(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 240(%[r])\n\t"
-        "movq	%%r11, 248(%[r])\n\t"
-        "movq	256(%[tmp]), %%r10\n\t"
-        "movq	264(%[tmp]), %%r11\n\t"
-        "# A[16] x A[16]\n\t"
-        "movq	128(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 256(%[r])\n\t"
-        "movq	%%r11, 264(%[r])\n\t"
-        "movq	272(%[tmp]), %%r10\n\t"
-        "movq	280(%[tmp]), %%r11\n\t"
-        "# A[17] x A[17]\n\t"
-        "movq	136(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 272(%[r])\n\t"
-        "movq	%%r11, 280(%[r])\n\t"
-        "movq	288(%[tmp]), %%r10\n\t"
-        "movq	296(%[tmp]), %%r11\n\t"
-        "# A[18] x A[18]\n\t"
-        "movq	144(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 288(%[r])\n\t"
-        "movq	%%r11, 296(%[r])\n\t"
-        "movq	304(%[tmp]), %%r10\n\t"
-        "movq	312(%[tmp]), %%r11\n\t"
-        "# A[19] x A[19]\n\t"
-        "movq	152(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 304(%[r])\n\t"
-        "movq	%%r11, 312(%[r])\n\t"
-        "movq	320(%[tmp]), %%r10\n\t"
-        "movq	328(%[tmp]), %%r11\n\t"
-        "# A[20] x A[20]\n\t"
-        "movq	160(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 320(%[r])\n\t"
-        "movq	%%r11, 328(%[r])\n\t"
-        "movq	336(%[tmp]), %%r10\n\t"
-        "movq	344(%[tmp]), %%r11\n\t"
-        "# A[21] x A[21]\n\t"
-        "movq	168(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 336(%[r])\n\t"
-        "movq	%%r11, 344(%[r])\n\t"
-        "movq	352(%[tmp]), %%r10\n\t"
-        "movq	360(%[tmp]), %%r11\n\t"
-        "# A[22] x A[22]\n\t"
-        "movq	176(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 352(%[r])\n\t"
-        "movq	%%r11, 360(%[r])\n\t"
-        "movq	368(%[tmp]), %%r10\n\t"
-        "movq	376(%[tmp]), %%r11\n\t"
-        "# A[23] x A[23]\n\t"
-        "movq	184(%[a]), %%rdx\n\t"
-        "mulxq	%%rdx, %%rax, %%rcx\n\t"
-        "adoxq	%%r10, %%r10\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r11, %%r11\n\t"
-        "adcxq	%%rcx, %%r11\n\t"
-        "movq	%%r10, 368(%[r])\n\t"
-        "movq	%%r11, 376(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp)/2);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "addq	(%[b]), %%rax\n\t"
-        "movq	%%rax, (%[r])\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "adcq	8(%[b]), %%rax\n\t"
-        "movq	%%rax, 8(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "adcq	16(%[b]), %%rax\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "adcq	24(%[b]), %%rax\n\t"
-        "movq	%%rax, 24(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "adcq	32(%[b]), %%rax\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "adcq	40(%[b]), %%rax\n\t"
-        "movq	%%rax, 40(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "adcq	48(%[b]), %%rax\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "adcq	56(%[b]), %%rax\n\t"
-        "movq	%%rax, 56(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "adcq	64(%[b]), %%rax\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "adcq	72(%[b]), %%rax\n\t"
-        "movq	%%rax, 72(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "adcq	80(%[b]), %%rax\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "adcq	88(%[b]), %%rax\n\t"
-        "movq	%%rax, 88(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "adcq	96(%[b]), %%rax\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "adcq	104(%[b]), %%rax\n\t"
-        "movq	%%rax, 104(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "adcq	112(%[b]), %%rax\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "adcq	120(%[b]), %%rax\n\t"
-        "movq	%%rax, 120(%[r])\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "adcq	128(%[b]), %%rax\n\t"
-        "movq	%%rax, 128(%[r])\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "adcq	136(%[b]), %%rax\n\t"
-        "movq	%%rax, 136(%[r])\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "adcq	144(%[b]), %%rax\n\t"
-        "movq	%%rax, 144(%[r])\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "adcq	152(%[b]), %%rax\n\t"
-        "movq	%%rax, 152(%[r])\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "adcq	160(%[b]), %%rax\n\t"
-        "movq	%%rax, 160(%[r])\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "adcq	168(%[b]), %%rax\n\t"
-        "movq	%%rax, 168(%[r])\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "adcq	176(%[b]), %%rax\n\t"
-        "movq	%%rax, 176(%[r])\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "adcq	184(%[b]), %%rax\n\t"
-        "movq	%%rax, 184(%[r])\n\t"
-        "adcq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax"
-    );
-
-    return c;
-}
-
-/* Sub b from a into a. (a -= b)
- *
- * a  A single precision integer and result.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%r8\n\t"
-        "movq	8(%[a]), %%r9\n\t"
-        "movq	0(%[b]), %%rdx\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "subq	%%rdx, %%r8\n\t"
-        "movq	16(%[b]), %%rdx\n\t"
-        "movq	%%r8, 0(%[a])\n\t"
-        "movq	16(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "movq	%%r9, 8(%[a])\n\t"
-        "movq	24(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	32(%[b]), %%rdx\n\t"
-        "movq	%%r8, 16(%[a])\n\t"
-        "movq	32(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "movq	%%r9, 24(%[a])\n\t"
-        "movq	40(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	48(%[b]), %%rdx\n\t"
-        "movq	%%r8, 32(%[a])\n\t"
-        "movq	48(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "movq	%%r9, 40(%[a])\n\t"
-        "movq	56(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	64(%[b]), %%rdx\n\t"
-        "movq	%%r8, 48(%[a])\n\t"
-        "movq	64(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "movq	%%r9, 56(%[a])\n\t"
-        "movq	72(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	80(%[b]), %%rdx\n\t"
-        "movq	%%r8, 64(%[a])\n\t"
-        "movq	80(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "movq	%%r9, 72(%[a])\n\t"
-        "movq	88(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	96(%[b]), %%rdx\n\t"
-        "movq	%%r8, 80(%[a])\n\t"
-        "movq	96(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "movq	%%r9, 88(%[a])\n\t"
-        "movq	104(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	112(%[b]), %%rdx\n\t"
-        "movq	%%r8, 96(%[a])\n\t"
-        "movq	112(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "movq	%%r9, 104(%[a])\n\t"
-        "movq	120(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	128(%[b]), %%rdx\n\t"
-        "movq	%%r8, 112(%[a])\n\t"
-        "movq	128(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	136(%[b]), %%rcx\n\t"
-        "movq	%%r9, 120(%[a])\n\t"
-        "movq	136(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	144(%[b]), %%rdx\n\t"
-        "movq	%%r8, 128(%[a])\n\t"
-        "movq	144(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	152(%[b]), %%rcx\n\t"
-        "movq	%%r9, 136(%[a])\n\t"
-        "movq	152(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	160(%[b]), %%rdx\n\t"
-        "movq	%%r8, 144(%[a])\n\t"
-        "movq	160(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	168(%[b]), %%rcx\n\t"
-        "movq	%%r9, 152(%[a])\n\t"
-        "movq	168(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	176(%[b]), %%rdx\n\t"
-        "movq	%%r8, 160(%[a])\n\t"
-        "movq	176(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	184(%[b]), %%rcx\n\t"
-        "movq	%%r9, 168(%[a])\n\t"
-        "movq	184(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	192(%[b]), %%rdx\n\t"
-        "movq	%%r8, 176(%[a])\n\t"
-        "movq	192(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	200(%[b]), %%rcx\n\t"
-        "movq	%%r9, 184(%[a])\n\t"
-        "movq	200(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	208(%[b]), %%rdx\n\t"
-        "movq	%%r8, 192(%[a])\n\t"
-        "movq	208(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	216(%[b]), %%rcx\n\t"
-        "movq	%%r9, 200(%[a])\n\t"
-        "movq	216(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	224(%[b]), %%rdx\n\t"
-        "movq	%%r8, 208(%[a])\n\t"
-        "movq	224(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	232(%[b]), %%rcx\n\t"
-        "movq	%%r9, 216(%[a])\n\t"
-        "movq	232(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	240(%[b]), %%rdx\n\t"
-        "movq	%%r8, 224(%[a])\n\t"
-        "movq	240(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	248(%[b]), %%rcx\n\t"
-        "movq	%%r9, 232(%[a])\n\t"
-        "movq	248(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	256(%[b]), %%rdx\n\t"
-        "movq	%%r8, 240(%[a])\n\t"
-        "movq	256(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	264(%[b]), %%rcx\n\t"
-        "movq	%%r9, 248(%[a])\n\t"
-        "movq	264(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	272(%[b]), %%rdx\n\t"
-        "movq	%%r8, 256(%[a])\n\t"
-        "movq	272(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	280(%[b]), %%rcx\n\t"
-        "movq	%%r9, 264(%[a])\n\t"
-        "movq	280(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	288(%[b]), %%rdx\n\t"
-        "movq	%%r8, 272(%[a])\n\t"
-        "movq	288(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	296(%[b]), %%rcx\n\t"
-        "movq	%%r9, 280(%[a])\n\t"
-        "movq	296(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	304(%[b]), %%rdx\n\t"
-        "movq	%%r8, 288(%[a])\n\t"
-        "movq	304(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	312(%[b]), %%rcx\n\t"
-        "movq	%%r9, 296(%[a])\n\t"
-        "movq	312(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	320(%[b]), %%rdx\n\t"
-        "movq	%%r8, 304(%[a])\n\t"
-        "movq	320(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	328(%[b]), %%rcx\n\t"
-        "movq	%%r9, 312(%[a])\n\t"
-        "movq	328(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	336(%[b]), %%rdx\n\t"
-        "movq	%%r8, 320(%[a])\n\t"
-        "movq	336(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	344(%[b]), %%rcx\n\t"
-        "movq	%%r9, 328(%[a])\n\t"
-        "movq	344(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	352(%[b]), %%rdx\n\t"
-        "movq	%%r8, 336(%[a])\n\t"
-        "movq	352(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	360(%[b]), %%rcx\n\t"
-        "movq	%%r9, 344(%[a])\n\t"
-        "movq	360(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	368(%[b]), %%rdx\n\t"
-        "movq	%%r8, 352(%[a])\n\t"
-        "movq	368(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	376(%[b]), %%rcx\n\t"
-        "movq	%%r9, 360(%[a])\n\t"
-        "movq	376(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	%%r8, 368(%[a])\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	%%r9, 376(%[a])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
-        : "memory", "rdx", "rcx", "r8", "r9"
-    );
-
-    return c;
-}
-
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "addq	(%[b]), %%rax\n\t"
-        "movq	%%rax, (%[r])\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "adcq	8(%[b]), %%rax\n\t"
-        "movq	%%rax, 8(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "adcq	16(%[b]), %%rax\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "adcq	24(%[b]), %%rax\n\t"
-        "movq	%%rax, 24(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "adcq	32(%[b]), %%rax\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	40(%[a]), %%rax\n\t"
-        "adcq	40(%[b]), %%rax\n\t"
-        "movq	%%rax, 40(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "adcq	48(%[b]), %%rax\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	56(%[a]), %%rax\n\t"
-        "adcq	56(%[b]), %%rax\n\t"
-        "movq	%%rax, 56(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "adcq	64(%[b]), %%rax\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	72(%[a]), %%rax\n\t"
-        "adcq	72(%[b]), %%rax\n\t"
-        "movq	%%rax, 72(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "adcq	80(%[b]), %%rax\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	88(%[a]), %%rax\n\t"
-        "adcq	88(%[b]), %%rax\n\t"
-        "movq	%%rax, 88(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "adcq	96(%[b]), %%rax\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	104(%[a]), %%rax\n\t"
-        "adcq	104(%[b]), %%rax\n\t"
-        "movq	%%rax, 104(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "adcq	112(%[b]), %%rax\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	120(%[a]), %%rax\n\t"
-        "adcq	120(%[b]), %%rax\n\t"
-        "movq	%%rax, 120(%[r])\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "adcq	128(%[b]), %%rax\n\t"
-        "movq	%%rax, 128(%[r])\n\t"
-        "movq	136(%[a]), %%rax\n\t"
-        "adcq	136(%[b]), %%rax\n\t"
-        "movq	%%rax, 136(%[r])\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "adcq	144(%[b]), %%rax\n\t"
-        "movq	%%rax, 144(%[r])\n\t"
-        "movq	152(%[a]), %%rax\n\t"
-        "adcq	152(%[b]), %%rax\n\t"
-        "movq	%%rax, 152(%[r])\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "adcq	160(%[b]), %%rax\n\t"
-        "movq	%%rax, 160(%[r])\n\t"
-        "movq	168(%[a]), %%rax\n\t"
-        "adcq	168(%[b]), %%rax\n\t"
-        "movq	%%rax, 168(%[r])\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "adcq	176(%[b]), %%rax\n\t"
-        "movq	%%rax, 176(%[r])\n\t"
-        "movq	184(%[a]), %%rax\n\t"
-        "adcq	184(%[b]), %%rax\n\t"
-        "movq	%%rax, 184(%[r])\n\t"
-        "movq	192(%[a]), %%rax\n\t"
-        "adcq	192(%[b]), %%rax\n\t"
-        "movq	%%rax, 192(%[r])\n\t"
-        "movq	200(%[a]), %%rax\n\t"
-        "adcq	200(%[b]), %%rax\n\t"
-        "movq	%%rax, 200(%[r])\n\t"
-        "movq	208(%[a]), %%rax\n\t"
-        "adcq	208(%[b]), %%rax\n\t"
-        "movq	%%rax, 208(%[r])\n\t"
-        "movq	216(%[a]), %%rax\n\t"
-        "adcq	216(%[b]), %%rax\n\t"
-        "movq	%%rax, 216(%[r])\n\t"
-        "movq	224(%[a]), %%rax\n\t"
-        "adcq	224(%[b]), %%rax\n\t"
-        "movq	%%rax, 224(%[r])\n\t"
-        "movq	232(%[a]), %%rax\n\t"
-        "adcq	232(%[b]), %%rax\n\t"
-        "movq	%%rax, 232(%[r])\n\t"
-        "movq	240(%[a]), %%rax\n\t"
-        "adcq	240(%[b]), %%rax\n\t"
-        "movq	%%rax, 240(%[r])\n\t"
-        "movq	248(%[a]), %%rax\n\t"
-        "adcq	248(%[b]), %%rax\n\t"
-        "movq	%%rax, 248(%[r])\n\t"
-        "movq	256(%[a]), %%rax\n\t"
-        "adcq	256(%[b]), %%rax\n\t"
-        "movq	%%rax, 256(%[r])\n\t"
-        "movq	264(%[a]), %%rax\n\t"
-        "adcq	264(%[b]), %%rax\n\t"
-        "movq	%%rax, 264(%[r])\n\t"
-        "movq	272(%[a]), %%rax\n\t"
-        "adcq	272(%[b]), %%rax\n\t"
-        "movq	%%rax, 272(%[r])\n\t"
-        "movq	280(%[a]), %%rax\n\t"
-        "adcq	280(%[b]), %%rax\n\t"
-        "movq	%%rax, 280(%[r])\n\t"
-        "movq	288(%[a]), %%rax\n\t"
-        "adcq	288(%[b]), %%rax\n\t"
-        "movq	%%rax, 288(%[r])\n\t"
-        "movq	296(%[a]), %%rax\n\t"
-        "adcq	296(%[b]), %%rax\n\t"
-        "movq	%%rax, 296(%[r])\n\t"
-        "movq	304(%[a]), %%rax\n\t"
-        "adcq	304(%[b]), %%rax\n\t"
-        "movq	%%rax, 304(%[r])\n\t"
-        "movq	312(%[a]), %%rax\n\t"
-        "adcq	312(%[b]), %%rax\n\t"
-        "movq	%%rax, 312(%[r])\n\t"
-        "movq	320(%[a]), %%rax\n\t"
-        "adcq	320(%[b]), %%rax\n\t"
-        "movq	%%rax, 320(%[r])\n\t"
-        "movq	328(%[a]), %%rax\n\t"
-        "adcq	328(%[b]), %%rax\n\t"
-        "movq	%%rax, 328(%[r])\n\t"
-        "movq	336(%[a]), %%rax\n\t"
-        "adcq	336(%[b]), %%rax\n\t"
-        "movq	%%rax, 336(%[r])\n\t"
-        "movq	344(%[a]), %%rax\n\t"
-        "adcq	344(%[b]), %%rax\n\t"
-        "movq	%%rax, 344(%[r])\n\t"
-        "movq	352(%[a]), %%rax\n\t"
-        "adcq	352(%[b]), %%rax\n\t"
-        "movq	%%rax, 352(%[r])\n\t"
-        "movq	360(%[a]), %%rax\n\t"
-        "adcq	360(%[b]), %%rax\n\t"
-        "movq	%%rax, 360(%[r])\n\t"
-        "movq	368(%[a]), %%rax\n\t"
-        "adcq	368(%[b]), %%rax\n\t"
-        "movq	%%rax, 368(%[r])\n\t"
-        "movq	376(%[a]), %%rax\n\t"
-        "adcq	376(%[b]), %%rax\n\t"
-        "movq	%%rax, 376(%[r])\n\t"
-        "adcq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax"
-    );
-
-    return c;
-}
-
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_3072_mask_24(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<24; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 24; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-#endif
-}
-
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[48];
-    sp_digit a1[24];
-    sp_digit b1[24];
-    sp_digit z2[48];
-    sp_digit u, ca, cb;
-
-    ca = sp_3072_add_24(a1, a, &a[24]);
-    cb = sp_3072_add_24(b1, b, &b[24]);
-    u  = ca & cb;
-    sp_3072_mul_24(z1, a1, b1);
-    sp_3072_mul_24(z2, &a[24], &b[24]);
-    sp_3072_mul_24(z0, a, b);
-    sp_3072_mask_24(r + 48, a1, 0 - cb);
-    sp_3072_mask_24(b1, b1, 0 - ca);
-    u += sp_3072_add_24(r + 48, r + 48, b1);
-    u += sp_3072_sub_in_place_48(z1, z2);
-    u += sp_3072_sub_in_place_48(z1, z0);
-    u += sp_3072_add_48(r + 24, r + 24, z1);
-    r[72] = u;
-    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
-    sp_3072_add_48(r + 48, r + 48, z2);
-}
-
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z2[48];
-    sp_digit z1[48];
-    sp_digit a1[24];
-    sp_digit u;
-
-    u = sp_3072_add_24(a1, a, &a[24]);
-    sp_3072_sqr_24(z1, a1);
-    sp_3072_sqr_24(z2, &a[24]);
-    sp_3072_sqr_24(z0, a);
-    sp_3072_mask_24(r + 48, a1, 0 - u);
-    u += sp_3072_add_24(r + 48, r + 48, r + 48);
-    u += sp_3072_sub_in_place_48(z1, z2);
-    u += sp_3072_sub_in_place_48(z1, z0);
-    u += sp_3072_add_48(r + 24, r + 24, z1);
-    r[72] = u;
-    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
-    sp_3072_add_48(r + 48, r + 48, z2);
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_mul_avx2_48(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit* z0 = r;
-    sp_digit z1[48];
-    sp_digit a1[24];
-    sp_digit b1[24];
-    sp_digit z2[48];
-    sp_digit u, ca, cb;
-
-    ca = sp_3072_add_24(a1, a, &a[24]);
-    cb = sp_3072_add_24(b1, b, &b[24]);
-    u  = ca & cb;
-    sp_3072_mul_avx2_24(z1, a1, b1);
-    sp_3072_mul_avx2_24(z2, &a[24], &b[24]);
-    sp_3072_mul_avx2_24(z0, a, b);
-    sp_3072_mask_24(r + 48, a1, 0 - cb);
-    sp_3072_mask_24(b1, b1, 0 - ca);
-    u += sp_3072_add_24(r + 48, r + 48, b1);
-    u += sp_3072_sub_in_place_48(z1, z2);
-    u += sp_3072_sub_in_place_48(z1, z0);
-    u += sp_3072_add_48(r + 24, r + 24, z1);
-    r[72] = u;
-    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
-    sp_3072_add_48(r + 48, r + 48, z2);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_3072_sqr_avx2_48(sp_digit* r, const sp_digit* a)
-{
-    sp_digit* z0 = r;
-    sp_digit z2[48];
-    sp_digit z1[48];
-    sp_digit a1[24];
-    sp_digit u;
-
-    u = sp_3072_add_24(a1, a, &a[24]);
-    sp_3072_sqr_avx2_24(z1, a1);
-    sp_3072_sqr_avx2_24(z2, &a[24]);
-    sp_3072_sqr_avx2_24(z0, a);
-    sp_3072_mask_24(r + 48, a1, 0 - u);
-    u += sp_3072_add_24(r + 48, r + 48, r + 48);
-    u += sp_3072_sub_in_place_48(z1, z2);
-    u += sp_3072_sub_in_place_48(z1, z0);
-    u += sp_3072_add_48(r + 24, r + 24, z1);
-    r[72] = u;
-    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
-    sp_3072_add_48(r + 48, r + 48, z2);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
-/* Caclulate the bottom digit of -1/a mod 2^n.
- *
- * a    A single precision number.
- * rho  Bottom word of inverse.
- */
-static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
-{
-    sp_digit x, b;
-
-    b = a[0];
-    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
-    x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
-
-    /* rho = -1/m mod b */
-    *rho = -x;
-}
-
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-/* Sub b from a into a. (a -= b)
- *
- * a  A single precision integer and result.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%r8\n\t"
-        "movq	8(%[a]), %%r9\n\t"
-        "movq	0(%[b]), %%rdx\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "subq	%%rdx, %%r8\n\t"
-        "movq	16(%[b]), %%rdx\n\t"
-        "movq	%%r8, 0(%[a])\n\t"
-        "movq	16(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "movq	%%r9, 8(%[a])\n\t"
-        "movq	24(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	32(%[b]), %%rdx\n\t"
-        "movq	%%r8, 16(%[a])\n\t"
-        "movq	32(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "movq	%%r9, 24(%[a])\n\t"
-        "movq	40(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	48(%[b]), %%rdx\n\t"
-        "movq	%%r8, 32(%[a])\n\t"
-        "movq	48(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "movq	%%r9, 40(%[a])\n\t"
-        "movq	56(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	64(%[b]), %%rdx\n\t"
-        "movq	%%r8, 48(%[a])\n\t"
-        "movq	64(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "movq	%%r9, 56(%[a])\n\t"
-        "movq	72(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	80(%[b]), %%rdx\n\t"
-        "movq	%%r8, 64(%[a])\n\t"
-        "movq	80(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "movq	%%r9, 72(%[a])\n\t"
-        "movq	88(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	96(%[b]), %%rdx\n\t"
-        "movq	%%r8, 80(%[a])\n\t"
-        "movq	96(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "movq	%%r9, 88(%[a])\n\t"
-        "movq	104(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	112(%[b]), %%rdx\n\t"
-        "movq	%%r8, 96(%[a])\n\t"
-        "movq	112(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "movq	%%r9, 104(%[a])\n\t"
-        "movq	120(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	128(%[b]), %%rdx\n\t"
-        "movq	%%r8, 112(%[a])\n\t"
-        "movq	128(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	136(%[b]), %%rcx\n\t"
-        "movq	%%r9, 120(%[a])\n\t"
-        "movq	136(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	144(%[b]), %%rdx\n\t"
-        "movq	%%r8, 128(%[a])\n\t"
-        "movq	144(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	152(%[b]), %%rcx\n\t"
-        "movq	%%r9, 136(%[a])\n\t"
-        "movq	152(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	160(%[b]), %%rdx\n\t"
-        "movq	%%r8, 144(%[a])\n\t"
-        "movq	160(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	168(%[b]), %%rcx\n\t"
-        "movq	%%r9, 152(%[a])\n\t"
-        "movq	168(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	176(%[b]), %%rdx\n\t"
-        "movq	%%r8, 160(%[a])\n\t"
-        "movq	176(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	184(%[b]), %%rcx\n\t"
-        "movq	%%r9, 168(%[a])\n\t"
-        "movq	184(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	%%r8, 176(%[a])\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	%%r9, 184(%[a])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
-        : "memory", "rdx", "rcx", "r8", "r9"
-    );
-
-    return c;
-}
-
-/* r = 2^n mod m where n is the number of bits to reduce by.
- * Given m must be 3072 bits, just need to subtract.
- *
- * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_24(sp_digit* r, sp_digit* m)
-{
-    XMEMSET(r, 0, sizeof(sp_digit) * 24);
-
-    /* r = 2^n mod m */
-    sp_3072_sub_in_place_24(r, m);
-}
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not copying.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static sp_digit sp_3072_cond_sub_24(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit m)
-{
-    sp_digit t[24];
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[b]), %%rax\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 0(%[t])\n\t"
-        "movq	%%rcx, 8(%[t])\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 16(%[t])\n\t"
-        "movq	%%rcx, 24(%[t])\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 32(%[t])\n\t"
-        "movq	%%rcx, 40(%[t])\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 48(%[t])\n\t"
-        "movq	%%rcx, 56(%[t])\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 64(%[t])\n\t"
-        "movq	%%rcx, 72(%[t])\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 80(%[t])\n\t"
-        "movq	%%rcx, 88(%[t])\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 96(%[t])\n\t"
-        "movq	%%rcx, 104(%[t])\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 112(%[t])\n\t"
-        "movq	%%rcx, 120(%[t])\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "movq	136(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 128(%[t])\n\t"
-        "movq	%%rcx, 136(%[t])\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "movq	152(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 144(%[t])\n\t"
-        "movq	%%rcx, 152(%[t])\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "movq	168(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 160(%[t])\n\t"
-        "movq	%%rcx, 168(%[t])\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "movq	184(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 176(%[t])\n\t"
-        "movq	%%rcx, 184(%[t])\n\t"
-        "movq	(%[a]), %%rax\n\t"
-        "movq	(%[t]), %%rdx\n\t"
-        "subq	%%rdx,%%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	8(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "movq	16(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	24(%[a]), %%rcx\n\t"
-        "movq	24(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "movq	32(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 24(%[r])\n\t"
-        "movq	40(%[a]), %%rcx\n\t"
-        "movq	40(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "movq	48(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 40(%[r])\n\t"
-        "movq	56(%[a]), %%rcx\n\t"
-        "movq	56(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "movq	64(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "movq	72(%[a]), %%rcx\n\t"
-        "movq	72(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "movq	80(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 72(%[r])\n\t"
-        "movq	88(%[a]), %%rcx\n\t"
-        "movq	88(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "movq	96(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 88(%[r])\n\t"
-        "movq	104(%[a]), %%rcx\n\t"
-        "movq	104(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "movq	112(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "movq	120(%[a]), %%rcx\n\t"
-        "movq	120(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "movq	128(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 120(%[r])\n\t"
-        "movq	136(%[a]), %%rcx\n\t"
-        "movq	136(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 128(%[r])\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "movq	144(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 136(%[r])\n\t"
-        "movq	152(%[a]), %%rcx\n\t"
-        "movq	152(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 144(%[r])\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "movq	160(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "movq	168(%[a]), %%rcx\n\t"
-        "movq	168(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 160(%[r])\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "movq	176(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 168(%[r])\n\t"
-        "movq	184(%[a]), %%rcx\n\t"
-        "movq	184(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 176(%[r])\n\t"
-        "movq	%%rcx, 184(%[r])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m), [t] "r" (t)
-        : "memory", "rax", "rcx", "rdx"
-    );
-
-    return c;
-}
-
-/* Reduce the number back to 3072 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "movq	8(%[a]), %%r13\n\t"
-        "\nL_mont_loop_24:\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "imulq	%[mp], %%r10\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	0(%[m])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	8(%[m])\n\t"
-        "movq	%%r13, %%r12\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r12\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	16(%[m])\n\t"
-        "movq	16(%[a]), %%r13\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r13\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[m])\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	32(%[m])\n\t"
-        "movq	32(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 32(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	40(%[m])\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	48(%[m])\n\t"
-        "movq	48(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 48(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	56(%[m])\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	64(%[m])\n\t"
-        "movq	64(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 64(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[m])\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	80(%[m])\n\t"
-        "movq	80(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 80(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	88(%[m])\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	96(%[m])\n\t"
-        "movq	96(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 96(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	104(%[m])\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	112(%[m])\n\t"
-        "movq	112(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 112(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	120(%[m])\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+16] += m[16] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	128(%[m])\n\t"
-        "movq	128(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 128(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+17] += m[17] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	136(%[m])\n\t"
-        "movq	136(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 136(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+18] += m[18] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	144(%[m])\n\t"
-        "movq	144(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 144(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+19] += m[19] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	152(%[m])\n\t"
-        "movq	152(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 152(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+20] += m[20] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	160(%[m])\n\t"
-        "movq	160(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 160(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+21] += m[21] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	168(%[m])\n\t"
-        "movq	168(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 168(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+22] += m[22] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	176(%[m])\n\t"
-        "movq	176(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 176(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+23] += m[23] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "mulq	184(%[m])\n\t"
-        "movq	184(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%[ca], %%rdx\n\t"
-        "movq	$0, %[ca]\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "addq	%%r9, %%r11\n\t"
-        "movq	%%r11, 184(%[a])\n\t"
-        "adcq	%%rdx, 192(%[a])\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$8, %%rcx\n\t"
-        "cmpq	$192, %%rcx\n\t"
-        "jl	L_mont_loop_24\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        "movq	%%r13, 8(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13"
-    );
-
-    sp_3072_cond_sub_24(a - 24, a, m, (sp_digit)0 - ca);
-}
-
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_mul_24(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_3072_mul_24(r, a, b);
-    sp_3072_mont_reduce_24(r, m, mp);
-}
-
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_sqr_24(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_3072_sqr_24(r, a);
-    sp_3072_mont_reduce_24(r, m, mp);
-}
-
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	(%[a])\n\t"
-        "movq	%%rax, %%rbx\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[2] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[3] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 24(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 32(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[5] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 40(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[6] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 48(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[7] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[8] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[9] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 72(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 80(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[11] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 88(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[12] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 96(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[13] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[14] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[15] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 120(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[16] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 128(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[17] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 136(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[18] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 144(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[19] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[20] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[21] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 168(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[22] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 176(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[23] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "movq	%%r8, 184(%[r])\n\t"
-        "movq	%%rbx, 192(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_3072_mul_d_avx2_24(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rdx\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "mulxq	(%[a]), %%r8, %%r9\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "mulxq	8(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[2] * B\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[3] * B\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 24(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 32(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[5] * B\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 40(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[6] * B\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 48(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[7] * B\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 56(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[8] * B\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[9] * B\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 72(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 80(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[11] * B\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 88(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[12] * B\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 96(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[13] * B\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 104(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[14] * B\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[15] * B\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 120(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[16] * B\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 128(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[17] * B\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 136(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[18] * B\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 144(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[19] * B\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 152(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[20] * B\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[21] * B\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 168(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[22] * B\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 176(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[23] * B\n\t"
-        "mulxq	184(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "adcxq	%%r10, %%r8\n\t"
-        "movq	%%r9, 184(%[r])\n\t"
-        "movq	%%r8, 192(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
- *
- * d1   The high order half of the number to divide.
- * d0   The low order half of the number to divide.
- * div  The dividend.
- * returns the result of the division.
- */
-static sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div)
-{
-    sp_digit r;
-
-    __asm__ __volatile__ (
-        "movq	%[d0], %%rax\n\t"
-        "movq	%[d1], %%rdx\n\t"
-        "divq	%[div]\n\t"
-        "movq	%%rax, %[r]\n\t"
-        : [r] "=r" (r)
-        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
-        : "rax", "rdx"
-    );
-
-    return r;
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static int64_t sp_3072_cmp_24(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-    __asm__ __volatile__ (
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	$-1, %%rdx\n\t"
-        "movq	184(%[a]), %%rbx\n\t"
-        "movq	184(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	176(%[a]), %%rbx\n\t"
-        "movq	176(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	168(%[a]), %%rbx\n\t"
-        "movq	168(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	160(%[a]), %%rbx\n\t"
-        "movq	160(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	152(%[a]), %%rbx\n\t"
-        "movq	152(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	144(%[a]), %%rbx\n\t"
-        "movq	144(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	136(%[a]), %%rbx\n\t"
-        "movq	136(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	128(%[a]), %%rbx\n\t"
-        "movq	128(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	120(%[a]), %%rbx\n\t"
-        "movq	120(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	112(%[a]), %%rbx\n\t"
-        "movq	112(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	104(%[a]), %%rbx\n\t"
-        "movq	104(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	96(%[a]), %%rbx\n\t"
-        "movq	96(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	88(%[a]), %%rbx\n\t"
-        "movq	88(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	80(%[a]), %%rbx\n\t"
-        "movq	80(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	72(%[a]), %%rbx\n\t"
-        "movq	72(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	64(%[a]), %%rbx\n\t"
-        "movq	64(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	56(%[a]), %%rbx\n\t"
-        "movq	56(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	48(%[a]), %%rbx\n\t"
-        "movq	48(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	40(%[a]), %%rbx\n\t"
-        "movq	40(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	32(%[a]), %%rbx\n\t"
-        "movq	32(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	24(%[a]), %%rbx\n\t"
-        "movq	24(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	16(%[a]), %%rbx\n\t"
-        "movq	16(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	8(%[a]), %%rbx\n\t"
-        "movq	8(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	0(%[a]), %%rbx\n\t"
-        "movq	0(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "xorq	%%rdx, %[r]\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "rax", "rdx", "rcx", "rbx", "r8"
-    );
-
-    return r;
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_3072_div_24(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[48], t2[25];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[23];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 24);
-    for (i=23; i>=0; i--) {
-        r1 = div_3072_word_24(t1[24 + i], t1[24 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_3072_mul_d_avx2_24(t2, d, r1);
-        else
-#endif
-            sp_3072_mul_d_24(t2, d, r1);
-        t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2);
-        t1[24 + i] -= t2[24];
-        sp_3072_mask_24(t2, d, t1[24 + i]);
-        t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2);
-        sp_3072_mask_24(t2, d, t1[24 + i]);
-        t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2);
-    }
-
-    r1 = sp_3072_cmp_24(t1, d) >= 0;
-    sp_3072_cond_sub_24(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_3072_mod_24(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_3072_div_24(a, m, NULL, r);
-}
-
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_3072_mod_exp_24(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][48];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 48, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 48;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_24(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 24);
-        if (reduceA) {
-            err = sp_3072_mod_24(t[1] + 24, a, m);
-            if (err == MP_OKAY)
-                err = sp_3072_mod_24(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
-            err = sp_3072_mod_24(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp);
-        sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp);
-        sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp);
-        sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp);
-        sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_3072_mont_sqr_24(t[10], t[ 5], m, mp);
-        sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp);
-        sp_3072_mont_sqr_24(t[12], t[ 6], m, mp);
-        sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp);
-        sp_3072_mont_sqr_24(t[14], t[ 7], m, mp);
-        sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp);
-        sp_3072_mont_sqr_24(t[16], t[ 8], m, mp);
-        sp_3072_mont_mul_24(t[17], t[ 9], t[ 8], m, mp);
-        sp_3072_mont_sqr_24(t[18], t[ 9], m, mp);
-        sp_3072_mont_mul_24(t[19], t[10], t[ 9], m, mp);
-        sp_3072_mont_sqr_24(t[20], t[10], m, mp);
-        sp_3072_mont_mul_24(t[21], t[11], t[10], m, mp);
-        sp_3072_mont_sqr_24(t[22], t[11], m, mp);
-        sp_3072_mont_mul_24(t[23], t[12], t[11], m, mp);
-        sp_3072_mont_sqr_24(t[24], t[12], m, mp);
-        sp_3072_mont_mul_24(t[25], t[13], t[12], m, mp);
-        sp_3072_mont_sqr_24(t[26], t[13], m, mp);
-        sp_3072_mont_mul_24(t[27], t[14], t[13], m, mp);
-        sp_3072_mont_sqr_24(t[28], t[14], m, mp);
-        sp_3072_mont_mul_24(t[29], t[15], t[14], m, mp);
-        sp_3072_mont_sqr_24(t[30], t[15], m, mp);
-        sp_3072_mont_mul_24(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_3072_mont_sqr_24(r, r, m, mp);
-            sp_3072_mont_sqr_24(r, r, m, mp);
-            sp_3072_mont_sqr_24(r, r, m, mp);
-            sp_3072_mont_sqr_24(r, r, m, mp);
-            sp_3072_mont_sqr_24(r, r, m, mp);
-
-            sp_3072_mont_mul_24(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_3072_mont_sqr_24(r, r, m, mp);
-        sp_3072_mont_mul_24(r, r, t[y], m, mp);
-
-        XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
-        sp_3072_mont_reduce_24(r, m, mp);
-
-        mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
-        sp_3072_cond_sub_24(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Reduce the number back to 3072 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_3072_mont_reduce_avx2_24(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "\nL_mont_loop_avx2_24:\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%r8\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "mulxq	0(%[m]), %%rax, %%r8\n\t"
-        "movq	8(%[a]), %%r12\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r12\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "mulxq	8(%[m]), %%rax, %%r8\n\t"
-        "movq	16(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "mulxq	16(%[m]), %%rax, %%r8\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 16(%[a])\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "mulxq	24(%[m]), %%rax, %%r8\n\t"
-        "movq	32(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "mulxq	32(%[m]), %%rax, %%r8\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 32(%[a])\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "mulxq	40(%[m]), %%rax, %%r8\n\t"
-        "movq	48(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "mulxq	48(%[m]), %%rax, %%r8\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 48(%[a])\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "mulxq	56(%[m]), %%rax, %%r8\n\t"
-        "movq	64(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "mulxq	64(%[m]), %%rax, %%r8\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 64(%[a])\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "mulxq	72(%[m]), %%rax, %%r8\n\t"
-        "movq	80(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "mulxq	80(%[m]), %%rax, %%r8\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 80(%[a])\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "mulxq	88(%[m]), %%rax, %%r8\n\t"
-        "movq	96(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "mulxq	96(%[m]), %%rax, %%r8\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 96(%[a])\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "mulxq	104(%[m]), %%rax, %%r8\n\t"
-        "movq	112(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "mulxq	112(%[m]), %%rax, %%r8\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 112(%[a])\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "mulxq	120(%[m]), %%rax, %%r8\n\t"
-        "movq	128(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "# a[i+16] += m[16] * mu\n\t"
-        "mulxq	128(%[m]), %%rax, %%r8\n\t"
-        "movq	136(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 128(%[a])\n\t"
-        "# a[i+17] += m[17] * mu\n\t"
-        "mulxq	136(%[m]), %%rax, %%r8\n\t"
-        "movq	144(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 136(%[a])\n\t"
-        "# a[i+18] += m[18] * mu\n\t"
-        "mulxq	144(%[m]), %%rax, %%r8\n\t"
-        "movq	152(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 144(%[a])\n\t"
-        "# a[i+19] += m[19] * mu\n\t"
-        "mulxq	152(%[m]), %%rax, %%r8\n\t"
-        "movq	160(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 152(%[a])\n\t"
-        "# a[i+20] += m[20] * mu\n\t"
-        "mulxq	160(%[m]), %%rax, %%r8\n\t"
-        "movq	168(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 160(%[a])\n\t"
-        "# a[i+21] += m[21] * mu\n\t"
-        "mulxq	168(%[m]), %%rax, %%r8\n\t"
-        "movq	176(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 168(%[a])\n\t"
-        "# a[i+22] += m[22] * mu\n\t"
-        "mulxq	176(%[m]), %%rax, %%r8\n\t"
-        "movq	184(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 176(%[a])\n\t"
-        "# a[i+23] += m[23] * mu\n\t"
-        "mulxq	184(%[m]), %%rax, %%r8\n\t"
-        "movq	192(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 184(%[a])\n\t"
-        "adcxq	%[ca], %%r10\n\t"
-        "movq	%%r9, %[ca]\n\t"
-        "adoxq	%%r9, %[ca]\n\t"
-        "adcxq	%%r9, %[ca]\n\t"
-        "movq	%%r10, 192(%[a])\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$1, %%rcx\n\t"
-        "cmpq	$24, %%rcx\n\t"
-        "jl	L_mont_loop_avx2_24\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    sp_3072_cond_sub_24(a - 24, a, m, (sp_digit)0 - ca);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_mul_avx2_24(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_3072_mul_avx2_24(r, a, b);
-    sp_3072_mont_reduce_avx2_24(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#ifdef HAVE_INTEL_AVX2
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_sqr_avx2_24(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_3072_sqr_avx2_24(r, a);
-    sp_3072_mont_reduce_avx2_24(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#ifdef HAVE_INTEL_AVX2
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_3072_mod_exp_avx2_24(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][48];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 48, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 48;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_24(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 24);
-        if (reduceA) {
-            err = sp_3072_mod_24(t[1] + 24, a, m);
-            if (err == MP_OKAY)
-                err = sp_3072_mod_24(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
-            err = sp_3072_mod_24(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_sqr_avx2_24(t[ 2], t[ 1], m, mp);
-        sp_3072_mont_mul_avx2_24(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[ 4], t[ 2], m, mp);
-        sp_3072_mont_mul_avx2_24(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[ 6], t[ 3], m, mp);
-        sp_3072_mont_mul_avx2_24(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[ 8], t[ 4], m, mp);
-        sp_3072_mont_mul_avx2_24(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[10], t[ 5], m, mp);
-        sp_3072_mont_mul_avx2_24(t[11], t[ 6], t[ 5], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[12], t[ 6], m, mp);
-        sp_3072_mont_mul_avx2_24(t[13], t[ 7], t[ 6], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[14], t[ 7], m, mp);
-        sp_3072_mont_mul_avx2_24(t[15], t[ 8], t[ 7], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[16], t[ 8], m, mp);
-        sp_3072_mont_mul_avx2_24(t[17], t[ 9], t[ 8], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[18], t[ 9], m, mp);
-        sp_3072_mont_mul_avx2_24(t[19], t[10], t[ 9], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[20], t[10], m, mp);
-        sp_3072_mont_mul_avx2_24(t[21], t[11], t[10], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[22], t[11], m, mp);
-        sp_3072_mont_mul_avx2_24(t[23], t[12], t[11], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[24], t[12], m, mp);
-        sp_3072_mont_mul_avx2_24(t[25], t[13], t[12], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[26], t[13], m, mp);
-        sp_3072_mont_mul_avx2_24(t[27], t[14], t[13], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[28], t[14], m, mp);
-        sp_3072_mont_mul_avx2_24(t[29], t[15], t[14], m, mp);
-        sp_3072_mont_sqr_avx2_24(t[30], t[15], m, mp);
-        sp_3072_mont_mul_avx2_24(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_3072_mont_sqr_avx2_24(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_24(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_24(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_24(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_24(r, r, m, mp);
-
-            sp_3072_mont_mul_avx2_24(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_3072_mont_sqr_avx2_24(r, r, m, mp);
-        sp_3072_mont_mul_avx2_24(r, r, t[y], m, mp);
-
-        XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
-        sp_3072_mont_reduce_avx2_24(r, m, mp);
-
-        mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
-        sp_3072_cond_sub_24(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
-
-/* r = 2^n mod m where n is the number of bits to reduce by.
- * Given m must be 3072 bits, just need to subtract.
- *
- * r  A single precision number.
- * m  A signle precision number.
- */
-static void sp_3072_mont_norm_48(sp_digit* r, sp_digit* m)
-{
-    XMEMSET(r, 0, sizeof(sp_digit) * 48);
-
-    /* r = 2^n mod m */
-    sp_3072_sub_in_place_48(r, m);
-}
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not copying.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static sp_digit sp_3072_cond_sub_48(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit m)
-{
-    sp_digit t[48];
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[b]), %%rax\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 0(%[t])\n\t"
-        "movq	%%rcx, 8(%[t])\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 16(%[t])\n\t"
-        "movq	%%rcx, 24(%[t])\n\t"
-        "movq	32(%[b]), %%rax\n\t"
-        "movq	40(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 32(%[t])\n\t"
-        "movq	%%rcx, 40(%[t])\n\t"
-        "movq	48(%[b]), %%rax\n\t"
-        "movq	56(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 48(%[t])\n\t"
-        "movq	%%rcx, 56(%[t])\n\t"
-        "movq	64(%[b]), %%rax\n\t"
-        "movq	72(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 64(%[t])\n\t"
-        "movq	%%rcx, 72(%[t])\n\t"
-        "movq	80(%[b]), %%rax\n\t"
-        "movq	88(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 80(%[t])\n\t"
-        "movq	%%rcx, 88(%[t])\n\t"
-        "movq	96(%[b]), %%rax\n\t"
-        "movq	104(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 96(%[t])\n\t"
-        "movq	%%rcx, 104(%[t])\n\t"
-        "movq	112(%[b]), %%rax\n\t"
-        "movq	120(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 112(%[t])\n\t"
-        "movq	%%rcx, 120(%[t])\n\t"
-        "movq	128(%[b]), %%rax\n\t"
-        "movq	136(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 128(%[t])\n\t"
-        "movq	%%rcx, 136(%[t])\n\t"
-        "movq	144(%[b]), %%rax\n\t"
-        "movq	152(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 144(%[t])\n\t"
-        "movq	%%rcx, 152(%[t])\n\t"
-        "movq	160(%[b]), %%rax\n\t"
-        "movq	168(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 160(%[t])\n\t"
-        "movq	%%rcx, 168(%[t])\n\t"
-        "movq	176(%[b]), %%rax\n\t"
-        "movq	184(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 176(%[t])\n\t"
-        "movq	%%rcx, 184(%[t])\n\t"
-        "movq	192(%[b]), %%rax\n\t"
-        "movq	200(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 192(%[t])\n\t"
-        "movq	%%rcx, 200(%[t])\n\t"
-        "movq	208(%[b]), %%rax\n\t"
-        "movq	216(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 208(%[t])\n\t"
-        "movq	%%rcx, 216(%[t])\n\t"
-        "movq	224(%[b]), %%rax\n\t"
-        "movq	232(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 224(%[t])\n\t"
-        "movq	%%rcx, 232(%[t])\n\t"
-        "movq	240(%[b]), %%rax\n\t"
-        "movq	248(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 240(%[t])\n\t"
-        "movq	%%rcx, 248(%[t])\n\t"
-        "movq	256(%[b]), %%rax\n\t"
-        "movq	264(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 256(%[t])\n\t"
-        "movq	%%rcx, 264(%[t])\n\t"
-        "movq	272(%[b]), %%rax\n\t"
-        "movq	280(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 272(%[t])\n\t"
-        "movq	%%rcx, 280(%[t])\n\t"
-        "movq	288(%[b]), %%rax\n\t"
-        "movq	296(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 288(%[t])\n\t"
-        "movq	%%rcx, 296(%[t])\n\t"
-        "movq	304(%[b]), %%rax\n\t"
-        "movq	312(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 304(%[t])\n\t"
-        "movq	%%rcx, 312(%[t])\n\t"
-        "movq	320(%[b]), %%rax\n\t"
-        "movq	328(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 320(%[t])\n\t"
-        "movq	%%rcx, 328(%[t])\n\t"
-        "movq	336(%[b]), %%rax\n\t"
-        "movq	344(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 336(%[t])\n\t"
-        "movq	%%rcx, 344(%[t])\n\t"
-        "movq	352(%[b]), %%rax\n\t"
-        "movq	360(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 352(%[t])\n\t"
-        "movq	%%rcx, 360(%[t])\n\t"
-        "movq	368(%[b]), %%rax\n\t"
-        "movq	376(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 368(%[t])\n\t"
-        "movq	%%rcx, 376(%[t])\n\t"
-        "movq	(%[a]), %%rax\n\t"
-        "movq	(%[t]), %%rdx\n\t"
-        "subq	%%rdx,%%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	8(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "movq	16(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	24(%[a]), %%rcx\n\t"
-        "movq	24(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "movq	32(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 24(%[r])\n\t"
-        "movq	40(%[a]), %%rcx\n\t"
-        "movq	40(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 32(%[r])\n\t"
-        "movq	48(%[a]), %%rax\n\t"
-        "movq	48(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 40(%[r])\n\t"
-        "movq	56(%[a]), %%rcx\n\t"
-        "movq	56(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 48(%[r])\n\t"
-        "movq	64(%[a]), %%rax\n\t"
-        "movq	64(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "movq	72(%[a]), %%rcx\n\t"
-        "movq	72(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 64(%[r])\n\t"
-        "movq	80(%[a]), %%rax\n\t"
-        "movq	80(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 72(%[r])\n\t"
-        "movq	88(%[a]), %%rcx\n\t"
-        "movq	88(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 80(%[r])\n\t"
-        "movq	96(%[a]), %%rax\n\t"
-        "movq	96(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 88(%[r])\n\t"
-        "movq	104(%[a]), %%rcx\n\t"
-        "movq	104(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 96(%[r])\n\t"
-        "movq	112(%[a]), %%rax\n\t"
-        "movq	112(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "movq	120(%[a]), %%rcx\n\t"
-        "movq	120(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 112(%[r])\n\t"
-        "movq	128(%[a]), %%rax\n\t"
-        "movq	128(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 120(%[r])\n\t"
-        "movq	136(%[a]), %%rcx\n\t"
-        "movq	136(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 128(%[r])\n\t"
-        "movq	144(%[a]), %%rax\n\t"
-        "movq	144(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 136(%[r])\n\t"
-        "movq	152(%[a]), %%rcx\n\t"
-        "movq	152(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 144(%[r])\n\t"
-        "movq	160(%[a]), %%rax\n\t"
-        "movq	160(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "movq	168(%[a]), %%rcx\n\t"
-        "movq	168(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 160(%[r])\n\t"
-        "movq	176(%[a]), %%rax\n\t"
-        "movq	176(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 168(%[r])\n\t"
-        "movq	184(%[a]), %%rcx\n\t"
-        "movq	184(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 176(%[r])\n\t"
-        "movq	192(%[a]), %%rax\n\t"
-        "movq	192(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 184(%[r])\n\t"
-        "movq	200(%[a]), %%rcx\n\t"
-        "movq	200(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 192(%[r])\n\t"
-        "movq	208(%[a]), %%rax\n\t"
-        "movq	208(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 200(%[r])\n\t"
-        "movq	216(%[a]), %%rcx\n\t"
-        "movq	216(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 208(%[r])\n\t"
-        "movq	224(%[a]), %%rax\n\t"
-        "movq	224(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 216(%[r])\n\t"
-        "movq	232(%[a]), %%rcx\n\t"
-        "movq	232(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 224(%[r])\n\t"
-        "movq	240(%[a]), %%rax\n\t"
-        "movq	240(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 232(%[r])\n\t"
-        "movq	248(%[a]), %%rcx\n\t"
-        "movq	248(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 240(%[r])\n\t"
-        "movq	256(%[a]), %%rax\n\t"
-        "movq	256(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 248(%[r])\n\t"
-        "movq	264(%[a]), %%rcx\n\t"
-        "movq	264(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 256(%[r])\n\t"
-        "movq	272(%[a]), %%rax\n\t"
-        "movq	272(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 264(%[r])\n\t"
-        "movq	280(%[a]), %%rcx\n\t"
-        "movq	280(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 272(%[r])\n\t"
-        "movq	288(%[a]), %%rax\n\t"
-        "movq	288(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 280(%[r])\n\t"
-        "movq	296(%[a]), %%rcx\n\t"
-        "movq	296(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 288(%[r])\n\t"
-        "movq	304(%[a]), %%rax\n\t"
-        "movq	304(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 296(%[r])\n\t"
-        "movq	312(%[a]), %%rcx\n\t"
-        "movq	312(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 304(%[r])\n\t"
-        "movq	320(%[a]), %%rax\n\t"
-        "movq	320(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 312(%[r])\n\t"
-        "movq	328(%[a]), %%rcx\n\t"
-        "movq	328(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 320(%[r])\n\t"
-        "movq	336(%[a]), %%rax\n\t"
-        "movq	336(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 328(%[r])\n\t"
-        "movq	344(%[a]), %%rcx\n\t"
-        "movq	344(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 336(%[r])\n\t"
-        "movq	352(%[a]), %%rax\n\t"
-        "movq	352(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 344(%[r])\n\t"
-        "movq	360(%[a]), %%rcx\n\t"
-        "movq	360(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 352(%[r])\n\t"
-        "movq	368(%[a]), %%rax\n\t"
-        "movq	368(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 360(%[r])\n\t"
-        "movq	376(%[a]), %%rcx\n\t"
-        "movq	376(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 368(%[r])\n\t"
-        "movq	%%rcx, 376(%[r])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m), [t] "r" (t)
-        : "memory", "rax", "rcx", "rdx"
-    );
-
-    return c;
-}
-
-/* Reduce the number back to 3072 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "movq	8(%[a]), %%r13\n\t"
-        "\nL_mont_loop_48:\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "imulq	%[mp], %%r10\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	0(%[m])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	8(%[m])\n\t"
-        "movq	%%r13, %%r12\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r12\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	16(%[m])\n\t"
-        "movq	16(%[a]), %%r13\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r13\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[m])\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	32(%[m])\n\t"
-        "movq	32(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 32(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	40(%[m])\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	48(%[m])\n\t"
-        "movq	48(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 48(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	56(%[m])\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	64(%[m])\n\t"
-        "movq	64(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 64(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[m])\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	80(%[m])\n\t"
-        "movq	80(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 80(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	88(%[m])\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	96(%[m])\n\t"
-        "movq	96(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 96(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	104(%[m])\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	112(%[m])\n\t"
-        "movq	112(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 112(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	120(%[m])\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+16] += m[16] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	128(%[m])\n\t"
-        "movq	128(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 128(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+17] += m[17] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	136(%[m])\n\t"
-        "movq	136(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 136(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+18] += m[18] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	144(%[m])\n\t"
-        "movq	144(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 144(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+19] += m[19] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	152(%[m])\n\t"
-        "movq	152(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 152(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+20] += m[20] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	160(%[m])\n\t"
-        "movq	160(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 160(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+21] += m[21] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	168(%[m])\n\t"
-        "movq	168(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 168(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+22] += m[22] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	176(%[m])\n\t"
-        "movq	176(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 176(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+23] += m[23] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	184(%[m])\n\t"
-        "movq	184(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 184(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+24] += m[24] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	192(%[m])\n\t"
-        "movq	192(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 192(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+25] += m[25] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	200(%[m])\n\t"
-        "movq	200(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 200(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+26] += m[26] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	208(%[m])\n\t"
-        "movq	208(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 208(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+27] += m[27] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	216(%[m])\n\t"
-        "movq	216(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 216(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+28] += m[28] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	224(%[m])\n\t"
-        "movq	224(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 224(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+29] += m[29] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	232(%[m])\n\t"
-        "movq	232(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 232(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+30] += m[30] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	240(%[m])\n\t"
-        "movq	240(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 240(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+31] += m[31] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	248(%[m])\n\t"
-        "movq	248(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 248(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+32] += m[32] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	256(%[m])\n\t"
-        "movq	256(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 256(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+33] += m[33] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	264(%[m])\n\t"
-        "movq	264(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 264(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+34] += m[34] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	272(%[m])\n\t"
-        "movq	272(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 272(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+35] += m[35] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	280(%[m])\n\t"
-        "movq	280(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 280(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+36] += m[36] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	288(%[m])\n\t"
-        "movq	288(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 288(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+37] += m[37] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	296(%[m])\n\t"
-        "movq	296(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 296(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+38] += m[38] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	304(%[m])\n\t"
-        "movq	304(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 304(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+39] += m[39] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	312(%[m])\n\t"
-        "movq	312(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 312(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+40] += m[40] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	320(%[m])\n\t"
-        "movq	320(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 320(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+41] += m[41] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	328(%[m])\n\t"
-        "movq	328(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 328(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+42] += m[42] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	336(%[m])\n\t"
-        "movq	336(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 336(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+43] += m[43] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	344(%[m])\n\t"
-        "movq	344(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 344(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+44] += m[44] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	352(%[m])\n\t"
-        "movq	352(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 352(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+45] += m[45] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	360(%[m])\n\t"
-        "movq	360(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "addq	%%r9,  %%r11\n\t"
-        "movq	%%r11, 360(%[a])\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+46] += m[46] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "mulq	368(%[m])\n\t"
-        "movq	368(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "addq	%%r8,  %%r11\n\t"
-        "movq	%%r11, 368(%[a])\n\t"
-        "adcq	$0, %%r9\n\t"
-        "# a[i+47] += m[47] * mu\n\t"
-        "movq	%%r10, %%rax\n\t"
-        "mulq	376(%[m])\n\t"
-        "movq	376(%[a]), %%r11\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%[ca], %%rdx\n\t"
-        "movq	$0, %[ca]\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "addq	%%r9, %%r11\n\t"
-        "movq	%%r11, 376(%[a])\n\t"
-        "adcq	%%rdx, 384(%[a])\n\t"
-        "adcq	$0, %[ca]\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$8, %%rcx\n\t"
-        "cmpq	$384, %%rcx\n\t"
-        "jl	L_mont_loop_48\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        "movq	%%r13, 8(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13"
-    );
-
-    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
-}
-
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_mul_48(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_3072_mul_48(r, a, b);
-    sp_3072_mont_reduce_48(r, m, mp);
-}
-
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_3072_sqr_48(r, a);
-    sp_3072_mont_reduce_48(r, m, mp);
-}
-
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	(%[a])\n\t"
-        "movq	%%rax, %%rbx\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[2] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[3] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 24(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	32(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 32(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[5] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	40(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 40(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[6] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	48(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 48(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[7] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	56(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[8] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	64(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[9] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	72(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 72(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	80(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 80(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[11] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	88(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 88(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[12] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	96(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 96(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[13] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	104(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 104(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[14] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	112(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[15] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	120(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 120(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[16] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	128(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 128(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[17] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	136(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 136(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[18] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	144(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 144(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[19] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	152(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 152(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[20] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	160(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[21] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	168(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 168(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[22] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	176(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 176(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[23] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	184(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 184(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[24] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	192(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 192(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[25] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	200(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 200(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[26] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	208(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 208(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[27] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	216(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 216(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[28] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	224(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 224(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[29] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	232(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 232(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[30] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	240(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 240(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[31] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	248(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 248(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[32] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	256(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 256(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[33] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	264(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 264(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[34] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	272(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 272(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[35] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	280(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 280(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[36] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	288(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 288(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[37] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	296(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 296(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[38] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	304(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 304(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[39] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	312(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 312(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[40] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	320(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 320(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[41] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	328(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 328(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[42] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	336(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 336(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[43] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	344(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 344(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[44] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	352(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 352(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[45] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	360(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 360(%[r])\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# A[46] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	368(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 368(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[47] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "mulq	376(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "movq	%%r8, 376(%[r])\n\t"
-        "movq	%%rbx, 384(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_3072_mul_d_avx2_48(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rdx\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "mulxq	(%[a]), %%r8, %%r9\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "mulxq	8(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[2] * B\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[3] * B\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 24(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[4] * B\n\t"
-        "mulxq	32(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 32(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[5] * B\n\t"
-        "mulxq	40(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 40(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[6] * B\n\t"
-        "mulxq	48(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 48(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[7] * B\n\t"
-        "mulxq	56(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 56(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[8] * B\n\t"
-        "mulxq	64(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 64(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[9] * B\n\t"
-        "mulxq	72(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 72(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[10] * B\n\t"
-        "mulxq	80(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 80(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[11] * B\n\t"
-        "mulxq	88(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 88(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[12] * B\n\t"
-        "mulxq	96(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 96(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[13] * B\n\t"
-        "mulxq	104(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 104(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[14] * B\n\t"
-        "mulxq	112(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 112(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[15] * B\n\t"
-        "mulxq	120(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 120(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[16] * B\n\t"
-        "mulxq	128(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 128(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[17] * B\n\t"
-        "mulxq	136(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 136(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[18] * B\n\t"
-        "mulxq	144(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 144(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[19] * B\n\t"
-        "mulxq	152(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 152(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[20] * B\n\t"
-        "mulxq	160(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 160(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[21] * B\n\t"
-        "mulxq	168(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 168(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[22] * B\n\t"
-        "mulxq	176(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 176(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[23] * B\n\t"
-        "mulxq	184(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 184(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[24] * B\n\t"
-        "mulxq	192(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 192(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[25] * B\n\t"
-        "mulxq	200(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 200(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[26] * B\n\t"
-        "mulxq	208(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 208(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[27] * B\n\t"
-        "mulxq	216(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 216(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[28] * B\n\t"
-        "mulxq	224(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 224(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[29] * B\n\t"
-        "mulxq	232(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 232(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[30] * B\n\t"
-        "mulxq	240(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 240(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[31] * B\n\t"
-        "mulxq	248(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 248(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[32] * B\n\t"
-        "mulxq	256(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 256(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[33] * B\n\t"
-        "mulxq	264(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 264(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[34] * B\n\t"
-        "mulxq	272(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 272(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[35] * B\n\t"
-        "mulxq	280(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 280(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[36] * B\n\t"
-        "mulxq	288(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 288(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[37] * B\n\t"
-        "mulxq	296(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 296(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[38] * B\n\t"
-        "mulxq	304(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 304(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[39] * B\n\t"
-        "mulxq	312(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 312(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[40] * B\n\t"
-        "mulxq	320(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 320(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[41] * B\n\t"
-        "mulxq	328(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 328(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[42] * B\n\t"
-        "mulxq	336(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 336(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[43] * B\n\t"
-        "mulxq	344(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 344(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[44] * B\n\t"
-        "mulxq	352(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 352(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[45] * B\n\t"
-        "mulxq	360(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 360(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[46] * B\n\t"
-        "mulxq	368(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 368(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[47] * B\n\t"
-        "mulxq	376(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "adcxq	%%r10, %%r8\n\t"
-        "movq	%%r9, 376(%[r])\n\t"
-        "movq	%%r8, 384(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
- *
- * d1   The high order half of the number to divide.
- * d0   The low order half of the number to divide.
- * div  The dividend.
- * returns the result of the division.
- */
-static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
-{
-    sp_digit r;
-
-    __asm__ __volatile__ (
-        "movq	%[d0], %%rax\n\t"
-        "movq	%[d1], %%rdx\n\t"
-        "divq	%[div]\n\t"
-        "movq	%%rax, %[r]\n\t"
-        : [r] "=r" (r)
-        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
-        : "rax", "rdx"
-    );
-
-    return r;
-}
-
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<48; i++)
-        r[i] = a[i] & m;
-#else
-    int i;
-
-    for (i = 0; i < 48; i += 8) {
-        r[i+0] = a[i+0] & m;
-        r[i+1] = a[i+1] & m;
-        r[i+2] = a[i+2] & m;
-        r[i+3] = a[i+3] & m;
-        r[i+4] = a[i+4] & m;
-        r[i+5] = a[i+5] & m;
-        r[i+6] = a[i+6] & m;
-        r[i+7] = a[i+7] & m;
-    }
-#endif
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static int64_t sp_3072_cmp_48(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-    __asm__ __volatile__ (
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	$-1, %%rdx\n\t"
-        "movq	376(%[a]), %%rbx\n\t"
-        "movq	376(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	368(%[a]), %%rbx\n\t"
-        "movq	368(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	360(%[a]), %%rbx\n\t"
-        "movq	360(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	352(%[a]), %%rbx\n\t"
-        "movq	352(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	344(%[a]), %%rbx\n\t"
-        "movq	344(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	336(%[a]), %%rbx\n\t"
-        "movq	336(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	328(%[a]), %%rbx\n\t"
-        "movq	328(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	320(%[a]), %%rbx\n\t"
-        "movq	320(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	312(%[a]), %%rbx\n\t"
-        "movq	312(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	304(%[a]), %%rbx\n\t"
-        "movq	304(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	296(%[a]), %%rbx\n\t"
-        "movq	296(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	288(%[a]), %%rbx\n\t"
-        "movq	288(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	280(%[a]), %%rbx\n\t"
-        "movq	280(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	272(%[a]), %%rbx\n\t"
-        "movq	272(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	264(%[a]), %%rbx\n\t"
-        "movq	264(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	256(%[a]), %%rbx\n\t"
-        "movq	256(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	248(%[a]), %%rbx\n\t"
-        "movq	248(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	240(%[a]), %%rbx\n\t"
-        "movq	240(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	232(%[a]), %%rbx\n\t"
-        "movq	232(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	224(%[a]), %%rbx\n\t"
-        "movq	224(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	216(%[a]), %%rbx\n\t"
-        "movq	216(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	208(%[a]), %%rbx\n\t"
-        "movq	208(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	200(%[a]), %%rbx\n\t"
-        "movq	200(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	192(%[a]), %%rbx\n\t"
-        "movq	192(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	184(%[a]), %%rbx\n\t"
-        "movq	184(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	176(%[a]), %%rbx\n\t"
-        "movq	176(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	168(%[a]), %%rbx\n\t"
-        "movq	168(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	160(%[a]), %%rbx\n\t"
-        "movq	160(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	152(%[a]), %%rbx\n\t"
-        "movq	152(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	144(%[a]), %%rbx\n\t"
-        "movq	144(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	136(%[a]), %%rbx\n\t"
-        "movq	136(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	128(%[a]), %%rbx\n\t"
-        "movq	128(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	120(%[a]), %%rbx\n\t"
-        "movq	120(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	112(%[a]), %%rbx\n\t"
-        "movq	112(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	104(%[a]), %%rbx\n\t"
-        "movq	104(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	96(%[a]), %%rbx\n\t"
-        "movq	96(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	88(%[a]), %%rbx\n\t"
-        "movq	88(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	80(%[a]), %%rbx\n\t"
-        "movq	80(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	72(%[a]), %%rbx\n\t"
-        "movq	72(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	64(%[a]), %%rbx\n\t"
-        "movq	64(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	56(%[a]), %%rbx\n\t"
-        "movq	56(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	48(%[a]), %%rbx\n\t"
-        "movq	48(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	40(%[a]), %%rbx\n\t"
-        "movq	40(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	32(%[a]), %%rbx\n\t"
-        "movq	32(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	24(%[a]), %%rbx\n\t"
-        "movq	24(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	16(%[a]), %%rbx\n\t"
-        "movq	16(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	8(%[a]), %%rbx\n\t"
-        "movq	8(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	0(%[a]), %%rbx\n\t"
-        "movq	0(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "xorq	%%rdx, %[r]\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "rax", "rdx", "rcx", "rbx", "r8"
-    );
-
-    return r;
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[96], t2[49];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[47];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
-    for (i=47; i>=0; i--) {
-        r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_3072_mul_d_avx2_48(t2, d, r1);
-        else
-#endif
-            sp_3072_mul_d_48(t2, d, r1);
-        t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
-        t1[48 + i] -= t2[48];
-        sp_3072_mask_48(t2, d, t1[48 + i]);
-        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
-        sp_3072_mask_48(t2, d, t1[48 + i]);
-        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
-    }
-
-    r1 = sp_3072_cmp_48(t1, d) >= 0;
-    sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_3072_mod_48(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_3072_div_48(a, m, NULL, r);
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_3072_div_48_cond(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[96], t2[49];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[47];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
-    for (i=47; i>=0; i--) {
-        r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_3072_mul_d_avx2_48(t2, d, r1);
-        else
-#endif
-            sp_3072_mul_d_48(t2, d, r1);
-        t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
-        t1[48 + i] -= t2[48];
-        if (t1[48 + i] != 0) {
-            t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d);
-            if (t1[48 + i] != 0)
-                t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d);
-        }
-    }
-
-    r1 = sp_3072_cmp_48(t1, d) >= 0;
-    sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_3072_div_48_cond(a, m, NULL, r);
-}
-
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][96];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 96;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_48(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
-        if (reduceA) {
-            err = sp_3072_mod_48(t[1] + 48, a, m);
-            if (err == MP_OKAY)
-                err = sp_3072_mod_48(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
-            err = sp_3072_mod_48(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
-        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
-        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
-        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
-        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
-        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
-        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
-        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
-        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
-        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
-        sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
-        sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
-        sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
-        sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
-        sp_3072_mont_sqr_48(t[20], t[10], m, mp);
-        sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
-        sp_3072_mont_sqr_48(t[22], t[11], m, mp);
-        sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
-        sp_3072_mont_sqr_48(t[24], t[12], m, mp);
-        sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
-        sp_3072_mont_sqr_48(t[26], t[13], m, mp);
-        sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
-        sp_3072_mont_sqr_48(t[28], t[14], m, mp);
-        sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
-        sp_3072_mont_sqr_48(t[30], t[15], m, mp);
-        sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_3072_mont_sqr_48(r, r, m, mp);
-            sp_3072_mont_sqr_48(r, r, m, mp);
-            sp_3072_mont_sqr_48(r, r, m, mp);
-            sp_3072_mont_sqr_48(r, r, m, mp);
-            sp_3072_mont_sqr_48(r, r, m, mp);
-
-            sp_3072_mont_mul_48(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_3072_mont_sqr_48(r, r, m, mp);
-        sp_3072_mont_mul_48(r, r, t[y], m, mp);
-
-        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
-        sp_3072_mont_reduce_48(r, m, mp);
-
-        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
-        sp_3072_cond_sub_48(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#ifdef HAVE_INTEL_AVX2
-/* Reduce the number back to 3072 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_3072_mont_reduce_avx2_48(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_digit ca = 0;
-
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "movq	0(%[a]), %%r12\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "\nL_mont_loop_avx2_48:\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%r12, %%r10\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	%%r12, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%r8\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "mulxq	0(%[m]), %%rax, %%r8\n\t"
-        "movq	8(%[a]), %%r12\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r12\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "mulxq	8(%[m]), %%rax, %%r8\n\t"
-        "movq	16(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r12\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "mulxq	16(%[m]), %%rax, %%r8\n\t"
-        "movq	24(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 16(%[a])\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "mulxq	24(%[m]), %%rax, %%r8\n\t"
-        "movq	32(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 24(%[a])\n\t"
-        "# a[i+4] += m[4] * mu\n\t"
-        "mulxq	32(%[m]), %%rax, %%r8\n\t"
-        "movq	40(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 32(%[a])\n\t"
-        "# a[i+5] += m[5] * mu\n\t"
-        "mulxq	40(%[m]), %%rax, %%r8\n\t"
-        "movq	48(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 40(%[a])\n\t"
-        "# a[i+6] += m[6] * mu\n\t"
-        "mulxq	48(%[m]), %%rax, %%r8\n\t"
-        "movq	56(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 48(%[a])\n\t"
-        "# a[i+7] += m[7] * mu\n\t"
-        "mulxq	56(%[m]), %%rax, %%r8\n\t"
-        "movq	64(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 56(%[a])\n\t"
-        "# a[i+8] += m[8] * mu\n\t"
-        "mulxq	64(%[m]), %%rax, %%r8\n\t"
-        "movq	72(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 64(%[a])\n\t"
-        "# a[i+9] += m[9] * mu\n\t"
-        "mulxq	72(%[m]), %%rax, %%r8\n\t"
-        "movq	80(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 72(%[a])\n\t"
-        "# a[i+10] += m[10] * mu\n\t"
-        "mulxq	80(%[m]), %%rax, %%r8\n\t"
-        "movq	88(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 80(%[a])\n\t"
-        "# a[i+11] += m[11] * mu\n\t"
-        "mulxq	88(%[m]), %%rax, %%r8\n\t"
-        "movq	96(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 88(%[a])\n\t"
-        "# a[i+12] += m[12] * mu\n\t"
-        "mulxq	96(%[m]), %%rax, %%r8\n\t"
-        "movq	104(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 96(%[a])\n\t"
-        "# a[i+13] += m[13] * mu\n\t"
-        "mulxq	104(%[m]), %%rax, %%r8\n\t"
-        "movq	112(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 104(%[a])\n\t"
-        "# a[i+14] += m[14] * mu\n\t"
-        "mulxq	112(%[m]), %%rax, %%r8\n\t"
-        "movq	120(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 112(%[a])\n\t"
-        "# a[i+15] += m[15] * mu\n\t"
-        "mulxq	120(%[m]), %%rax, %%r8\n\t"
-        "movq	128(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 120(%[a])\n\t"
-        "# a[i+16] += m[16] * mu\n\t"
-        "mulxq	128(%[m]), %%rax, %%r8\n\t"
-        "movq	136(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 128(%[a])\n\t"
-        "# a[i+17] += m[17] * mu\n\t"
-        "mulxq	136(%[m]), %%rax, %%r8\n\t"
-        "movq	144(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 136(%[a])\n\t"
-        "# a[i+18] += m[18] * mu\n\t"
-        "mulxq	144(%[m]), %%rax, %%r8\n\t"
-        "movq	152(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 144(%[a])\n\t"
-        "# a[i+19] += m[19] * mu\n\t"
-        "mulxq	152(%[m]), %%rax, %%r8\n\t"
-        "movq	160(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 152(%[a])\n\t"
-        "# a[i+20] += m[20] * mu\n\t"
-        "mulxq	160(%[m]), %%rax, %%r8\n\t"
-        "movq	168(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 160(%[a])\n\t"
-        "# a[i+21] += m[21] * mu\n\t"
-        "mulxq	168(%[m]), %%rax, %%r8\n\t"
-        "movq	176(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 168(%[a])\n\t"
-        "# a[i+22] += m[22] * mu\n\t"
-        "mulxq	176(%[m]), %%rax, %%r8\n\t"
-        "movq	184(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 176(%[a])\n\t"
-        "# a[i+23] += m[23] * mu\n\t"
-        "mulxq	184(%[m]), %%rax, %%r8\n\t"
-        "movq	192(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 184(%[a])\n\t"
-        "# a[i+24] += m[24] * mu\n\t"
-        "mulxq	192(%[m]), %%rax, %%r8\n\t"
-        "movq	200(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 192(%[a])\n\t"
-        "# a[i+25] += m[25] * mu\n\t"
-        "mulxq	200(%[m]), %%rax, %%r8\n\t"
-        "movq	208(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 200(%[a])\n\t"
-        "# a[i+26] += m[26] * mu\n\t"
-        "mulxq	208(%[m]), %%rax, %%r8\n\t"
-        "movq	216(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 208(%[a])\n\t"
-        "# a[i+27] += m[27] * mu\n\t"
-        "mulxq	216(%[m]), %%rax, %%r8\n\t"
-        "movq	224(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 216(%[a])\n\t"
-        "# a[i+28] += m[28] * mu\n\t"
-        "mulxq	224(%[m]), %%rax, %%r8\n\t"
-        "movq	232(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 224(%[a])\n\t"
-        "# a[i+29] += m[29] * mu\n\t"
-        "mulxq	232(%[m]), %%rax, %%r8\n\t"
-        "movq	240(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 232(%[a])\n\t"
-        "# a[i+30] += m[30] * mu\n\t"
-        "mulxq	240(%[m]), %%rax, %%r8\n\t"
-        "movq	248(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 240(%[a])\n\t"
-        "# a[i+31] += m[31] * mu\n\t"
-        "mulxq	248(%[m]), %%rax, %%r8\n\t"
-        "movq	256(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 248(%[a])\n\t"
-        "# a[i+32] += m[32] * mu\n\t"
-        "mulxq	256(%[m]), %%rax, %%r8\n\t"
-        "movq	264(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 256(%[a])\n\t"
-        "# a[i+33] += m[33] * mu\n\t"
-        "mulxq	264(%[m]), %%rax, %%r8\n\t"
-        "movq	272(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 264(%[a])\n\t"
-        "# a[i+34] += m[34] * mu\n\t"
-        "mulxq	272(%[m]), %%rax, %%r8\n\t"
-        "movq	280(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 272(%[a])\n\t"
-        "# a[i+35] += m[35] * mu\n\t"
-        "mulxq	280(%[m]), %%rax, %%r8\n\t"
-        "movq	288(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 280(%[a])\n\t"
-        "# a[i+36] += m[36] * mu\n\t"
-        "mulxq	288(%[m]), %%rax, %%r8\n\t"
-        "movq	296(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 288(%[a])\n\t"
-        "# a[i+37] += m[37] * mu\n\t"
-        "mulxq	296(%[m]), %%rax, %%r8\n\t"
-        "movq	304(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 296(%[a])\n\t"
-        "# a[i+38] += m[38] * mu\n\t"
-        "mulxq	304(%[m]), %%rax, %%r8\n\t"
-        "movq	312(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 304(%[a])\n\t"
-        "# a[i+39] += m[39] * mu\n\t"
-        "mulxq	312(%[m]), %%rax, %%r8\n\t"
-        "movq	320(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 312(%[a])\n\t"
-        "# a[i+40] += m[40] * mu\n\t"
-        "mulxq	320(%[m]), %%rax, %%r8\n\t"
-        "movq	328(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 320(%[a])\n\t"
-        "# a[i+41] += m[41] * mu\n\t"
-        "mulxq	328(%[m]), %%rax, %%r8\n\t"
-        "movq	336(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 328(%[a])\n\t"
-        "# a[i+42] += m[42] * mu\n\t"
-        "mulxq	336(%[m]), %%rax, %%r8\n\t"
-        "movq	344(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 336(%[a])\n\t"
-        "# a[i+43] += m[43] * mu\n\t"
-        "mulxq	344(%[m]), %%rax, %%r8\n\t"
-        "movq	352(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 344(%[a])\n\t"
-        "# a[i+44] += m[44] * mu\n\t"
-        "mulxq	352(%[m]), %%rax, %%r8\n\t"
-        "movq	360(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 352(%[a])\n\t"
-        "# a[i+45] += m[45] * mu\n\t"
-        "mulxq	360(%[m]), %%rax, %%r8\n\t"
-        "movq	368(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 360(%[a])\n\t"
-        "# a[i+46] += m[46] * mu\n\t"
-        "mulxq	368(%[m]), %%rax, %%r8\n\t"
-        "movq	376(%[a]), %%r11\n\t"
-        "adcxq	%%rax, %%r10\n\t"
-        "adoxq	%%r8, %%r11\n\t"
-        "movq	%%r10, 368(%[a])\n\t"
-        "# a[i+47] += m[47] * mu\n\t"
-        "mulxq	376(%[m]), %%rax, %%r8\n\t"
-        "movq	384(%[a]), %%r10\n\t"
-        "adcxq	%%rax, %%r11\n\t"
-        "adoxq	%%r8, %%r10\n\t"
-        "movq	%%r11, 376(%[a])\n\t"
-        "adcxq	%[ca], %%r10\n\t"
-        "movq	%%r9, %[ca]\n\t"
-        "adoxq	%%r9, %[ca]\n\t"
-        "adcxq	%%r9, %[ca]\n\t"
-        "movq	%%r10, 384(%[a])\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %[a]\n\t"
-        "addq	$1, %%rcx\n\t"
-        "cmpq	$48, %%rcx\n\t"
-        "jl	L_mont_loop_avx2_48\n\t"
-        "movq	%%r12, 0(%[a])\n\t"
-        : [ca] "+r" (ca), [a] "+r" (a)
-        : [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_mul_avx2_48(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    sp_3072_mul_avx2_48(r, a, b);
-    sp_3072_mont_reduce_avx2_48(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#ifdef HAVE_INTEL_AVX2
-/* Square the Montgomery form number. (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_3072_mont_sqr_avx2_48(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    sp_3072_sqr_avx2_48(r, a);
-    sp_3072_mont_reduce_avx2_48(r, m, mp);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
-#ifdef HAVE_INTEL_AVX2
-/* Modular exponentiate a to the e mod m. (r = a^e mod m)
- *
- * r     A single precision number that is the result of the operation.
- * a     A single precision number being exponentiated.
- * e     A single precision number that is the exponent.
- * bits  The number of bits in the exponent.
- * m     A single precision number that is the modulus.
- * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
- */
-static int sp_3072_mod_exp_avx2_48(sp_digit* r, sp_digit* a, sp_digit* e,
-        int bits, sp_digit* m, int reduceA)
-{
-#ifndef WOLFSSL_SMALL_STACK
-    sp_digit t[32][96];
-#else
-    sp_digit* t[32];
-    sp_digit* td;
-#endif
-    sp_digit* norm;
-    sp_digit mp = 1;
-    sp_digit n;
-    sp_digit mask;
-    int i;
-    int c, y;
-    int err = MP_OKAY;
-
-#ifdef WOLFSSL_SMALL_STACK
-    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
-                            DYNAMIC_TYPE_TMP_BUFFER);
-    if (td == NULL)
-        err = MEMORY_E;
-
-    if (err == MP_OKAY) {
-        for (i=0; i<32; i++)
-            t[i] = td + i * 96;
-        norm = t[0];
-    }
-#else
-    norm = t[0];
-#endif
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_setup(m, &mp);
-        sp_3072_mont_norm_48(norm, m);
-
-        XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
-        if (reduceA) {
-            err = sp_3072_mod_48(t[1] + 48, a, m);
-            if (err == MP_OKAY)
-                err = sp_3072_mod_48(t[1], t[1], m);
-        }
-        else {
-            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
-            err = sp_3072_mod_48(t[1], t[1], m);
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_mont_sqr_avx2_48(t[ 2], t[ 1], m, mp);
-        sp_3072_mont_mul_avx2_48(t[ 3], t[ 2], t[ 1], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[ 4], t[ 2], m, mp);
-        sp_3072_mont_mul_avx2_48(t[ 5], t[ 3], t[ 2], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[ 6], t[ 3], m, mp);
-        sp_3072_mont_mul_avx2_48(t[ 7], t[ 4], t[ 3], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[ 8], t[ 4], m, mp);
-        sp_3072_mont_mul_avx2_48(t[ 9], t[ 5], t[ 4], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[10], t[ 5], m, mp);
-        sp_3072_mont_mul_avx2_48(t[11], t[ 6], t[ 5], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[12], t[ 6], m, mp);
-        sp_3072_mont_mul_avx2_48(t[13], t[ 7], t[ 6], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[14], t[ 7], m, mp);
-        sp_3072_mont_mul_avx2_48(t[15], t[ 8], t[ 7], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[16], t[ 8], m, mp);
-        sp_3072_mont_mul_avx2_48(t[17], t[ 9], t[ 8], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[18], t[ 9], m, mp);
-        sp_3072_mont_mul_avx2_48(t[19], t[10], t[ 9], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[20], t[10], m, mp);
-        sp_3072_mont_mul_avx2_48(t[21], t[11], t[10], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[22], t[11], m, mp);
-        sp_3072_mont_mul_avx2_48(t[23], t[12], t[11], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[24], t[12], m, mp);
-        sp_3072_mont_mul_avx2_48(t[25], t[13], t[12], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[26], t[13], m, mp);
-        sp_3072_mont_mul_avx2_48(t[27], t[14], t[13], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[28], t[14], m, mp);
-        sp_3072_mont_mul_avx2_48(t[29], t[15], t[14], m, mp);
-        sp_3072_mont_sqr_avx2_48(t[30], t[15], m, mp);
-        sp_3072_mont_mul_avx2_48(t[31], t[16], t[15], m, mp);
-
-        i = (bits - 1) / 64;
-        n = e[i--];
-        y = n >> 59;
-        n <<= 5;
-        c = 59;
-        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
-        for (; i>=0 || c>=5; ) {
-            if (c == 0) {
-                n = e[i--];
-                y = n >> 59;
-                n <<= 5;
-                c = 59;
-            }
-            else if (c < 5) {
-                y = n >> 59;
-                n = e[i--];
-                c = 5 - c;
-                y |= n >> (64 - c);
-                n <<= c;
-                c = 64 - c;
-            }
-            else {
-                y = (n >> 59) & 0x1f;
-                n <<= 5;
-                c -= 5;
-            }
-
-            sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-            sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-
-            sp_3072_mont_mul_avx2_48(r, r, t[y], m, mp);
-        }
-        y = e[0] & ((1 << c) - 1);
-        for (; c > 0; c--)
-            sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-        sp_3072_mont_mul_avx2_48(r, r, t[y], m, mp);
-
-        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
-        sp_3072_mont_reduce_avx2_48(r, m, mp);
-
-        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
-        sp_3072_cond_sub_48(r, r, m, mask);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    if (td != NULL)
-        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
-
-#ifdef WOLFSSL_HAVE_SP_RSA
-/* RSA public key operation.
- *
- * in      Array of bytes representing the number to exponentiate, base.
- * inLen   Number of bytes in base.
- * em      Public exponent.
- * mm      Modulus.
- * out     Buffer to hold big-endian bytes of exponentiation result.
- *         Must be at least 384 bytes long.
- * outLen  Number of bytes in result.
- * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
- * an array is too long and MEMORY_E when dynamic memory allocation fails.
- */
-int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
-    byte* out, word32* outLen)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[96], md[48], rd[96];
-#else
-    sp_digit* d = NULL;
-#endif
-    sp_digit* a;
-    sp_digit *ah;
-    sp_digit* m;
-    sp_digit* r;
-    sp_digit e[1];
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 384)
-        err = MP_TO_E;
-    if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 384 ||
-                                                     mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (d == NULL)
-            err = MEMORY_E;
-    }
-
-    if (err == MP_OKAY) {
-        a = d;
-        r = a + 48 * 2;
-        m = r + 48 * 2;
-        ah = a + 48;
-    }
-#else
-    a = ad;
-    m = md;
-    r = rd;
-    ah = a + 48;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_3072_from_bin(ah, 48, in, inLen);
-#if DIGIT_BIT >= 64
-        e[0] = em->dp[0];
-#else
-        e[0] = em->dp[0];
-        if (em->used > 1)
-            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
-#endif
-        if (e[0] == 0)
-            err = MP_EXPTMOD_E;
-    }
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(m, 48, mm);
-
-        if (e[0] == 0x3) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-                if (err == MP_OKAY) {
-                    sp_3072_sqr_avx2_48(r, ah);
-                    err = sp_3072_mod_48_cond(r, r, m);
-                }
-                if (err == MP_OKAY) {
-                    sp_3072_mul_avx2_48(r, ah, r);
-                    err = sp_3072_mod_48_cond(r, r, m);
-                }
-            }
-            else
-#endif
-            {
-                if (err == MP_OKAY) {
-                    sp_3072_sqr_48(r, ah);
-                    err = sp_3072_mod_48_cond(r, r, m);
-                }
-                if (err == MP_OKAY) {
-                    sp_3072_mul_48(r, ah, r);
-                    err = sp_3072_mod_48_cond(r, r, m);
-                }
-            }
-        }
-        else {
-            int i;
-            sp_digit mp;
-
-            sp_3072_mont_setup(m, &mp);
-
-            /* Convert to Montgomery form. */
-            XMEMSET(a, 0, sizeof(sp_digit) * 48);
-            err = sp_3072_mod_48_cond(a, a, m);
-
-            if (err == MP_OKAY) {
-                for (i=63; i>=0; i--)
-                    if (e[0] >> i)
-                        break;
-
-                XMEMCPY(r, a, sizeof(sp_digit) * 48);
-#ifdef HAVE_INTEL_AVX2
-                if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-                    for (i--; i>=0; i--) {
-                        sp_3072_mont_sqr_avx2_48(r, r, m, mp);
-                        if (((e[0] >> i) & 1) == 1)
-                            sp_3072_mont_mul_avx2_48(r, r, a, m, mp);
-                    }
-                    XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
-                    sp_3072_mont_reduce_avx2_48(r, m, mp);
-                }
-                else
-#endif
-                {
-                    for (i--; i>=0; i--) {
-                        sp_3072_mont_sqr_48(r, r, m, mp);
-                        if (((e[0] >> i) & 1) == 1)
-                            sp_3072_mont_mul_48(r, r, a, m, mp);
-                    }
-                    XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
-                    sp_3072_mont_reduce_48(r, m, mp);
-                }
-
-                for (i = 47; i > 0; i--) {
-                    if (r[i] != m[i])
-                        break;
-                }
-                if (r[i] >= m[i])
-                    sp_3072_sub_in_place_48(r, m);
-            }
-        }
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_to_bin(r, out);
-        *outLen = 384;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return err;
-}
-
-/* RSA private key operation.
- *
- * in      Array of bytes representing the number to exponentiate, base.
- * inLen   Number of bytes in base.
- * dm      Private exponent.
- * pm      First prime.
- * qm      Second prime.
- * dpm     First prime's CRT exponent.
- * dqm     Second prime's CRT exponent.
- * qim     Inverse of second prime mod p.
- * mm      Modulus.
- * out     Buffer to hold big-endian bytes of exponentiation result.
- *         Must be at least 384 bytes long.
- * outLen  Number of bytes in result.
- * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
- * an array is too long and MEMORY_E when dynamic memory allocation fails.
- */
-int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
-    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
-    byte* out, word32* outLen)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit ad[48 * 2];
-    sp_digit pd[24], qd[24], dpd[24];
-    sp_digit tmpad[48], tmpbd[48];
-#else
-    sp_digit* t = NULL;
-#endif
-    sp_digit* a;
-    sp_digit* p;
-    sp_digit* q;
-    sp_digit* dp;
-    sp_digit* dq;
-    sp_digit* qi;
-    sp_digit* tmp;
-    sp_digit* tmpa;
-    sp_digit* tmpb;
-    sp_digit* r;
-    sp_digit c;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)dm;
-    (void)mm;
-
-    if (*outLen < 384)
-        err = MP_TO_E;
-    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
-        err = MP_READ_E;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL,
-                               DYNAMIC_TYPE_TMP_BUFFER);
-        if (t == NULL)
-            err = MEMORY_E;
-    }
-    if (err == MP_OKAY) {
-        a = t;
-        p = a + 48 * 2;
-        q = p + 24;
-        qi = dq = dp = q + 24;
-        tmpa = qi + 24;
-        tmpb = tmpa + 48;
-
-        tmp = t;
-        r = tmp + 48;
-    }
-#else
-    r = a = ad;
-    p = pd;
-    q = qd;
-    qi = dq = dp = dpd;
-    tmpa = tmpad;
-    tmpb = tmpbd;
-    tmp = a + 48;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_3072_from_bin(a, 48, in, inLen);
-        sp_3072_from_mp(p, 24, pm);
-        sp_3072_from_mp(q, 24, qm);
-        sp_3072_from_mp(dp, 24, dpm);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_3072_mod_exp_avx2_24(tmpa, a, dp, 1536, p, 1);
-        else
-#endif
-            err = sp_3072_mod_exp_24(tmpa, a, dp, 1536, p, 1);
-    }
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(dq, 24, dqm);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_3072_mod_exp_avx2_24(tmpb, a, dq, 1536, q, 1);
-       else
-#endif
-            err = sp_3072_mod_exp_24(tmpb, a, dq, 1536, q, 1);
-    }
-
-    if (err == MP_OKAY) {
-        c = sp_3072_sub_in_place_24(tmpa, tmpb);
-        sp_3072_mask_24(tmp, p, c);
-        sp_3072_add_24(tmpa, tmpa, tmp);
-
-        sp_3072_from_mp(qi, 24, qim);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_3072_mul_avx2_24(tmpa, tmpa, qi);
-        else
-#endif
-            sp_3072_mul_24(tmpa, tmpa, qi);
-        err = sp_3072_mod_24(tmpa, tmpa, p);
-    }
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_3072_mul_avx2_24(tmpa, q, tmpa);
-        else
-#endif
-            sp_3072_mul_24(tmpa, q, tmpa);
-        XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24);
-        sp_3072_add_48(r, tmpb, tmpa);
-
-        sp_3072_to_bin(r, out);
-        *outLen = 384;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_digit) * 24 * 11);
-        XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    }
-#else
-    XMEMSET(tmpad, 0, sizeof(tmpad));
-    XMEMSET(tmpbd, 0, sizeof(tmpbd));
-    XMEMSET(pd, 0, sizeof(pd));
-    XMEMSET(qd, 0, sizeof(qd));
-    XMEMSET(dpd, 0, sizeof(dpd));
-#endif
-
-    return err;
-}
-#endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
-/* Convert an array of sp_digit to an mp_int.
- *
- * a  A single precision integer.
- * r  A multi-precision integer.
- */
-static int sp_3072_to_mp(sp_digit* a, mp_int* r)
-{
-    int err;
-
-    err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
-#if DIGIT_BIT == 64
-        XMEMCPY(r->dp, a, sizeof(sp_digit) * 48);
-        r->used = 48;
-        mp_clamp(r);
-#elif DIGIT_BIT < 64
-        int i, j = 0, s = 0;
-
-        r->dp[0] = 0;
-        for (i = 0; i < 48; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
-            s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
-            while (s + DIGIT_BIT <= 64) {
-                s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
-            }
-            s = 64 - s;
-        }
-        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
-        mp_clamp(r);
-#else
-        int i, j = 0, s = 0;
-
-        r->dp[0] = 0;
-        for (i = 0; i < 48; i++) {
-            r->dp[j] |= ((mp_digit)a[i]) << s;
-            if (s + 64 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-    #endif
-                s = DIGIT_BIT - s;
-                r->dp[++j] = a[i] >> s;
-                s = 64 - s;
-            }
-            else
-                s += 64;
-        }
-        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
-        mp_clamp(r);
-#endif
-    }
-
-    return err;
-}
-
-/* Perform the modular exponentiation for Diffie-Hellman.
- *
- * base  Base. MP integer.
- * exp   Exponent. MP integer.
- * mod   Modulus. MP integer.
- * res   Result. MP integer.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
- * and MEMORY_E if memory allocation fails.
- */
-int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
-{
-    int err = MP_OKAY;
-    sp_digit b[96], e[48], m[48];
-    sp_digit* r = b;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-    int expBits = mp_count_bits(exp);
-
-    if (mp_count_bits(base) > 3072 || expBits > 3072 ||
-                                                   mp_count_bits(mod) != 3072) {
-        err = MP_READ_E;
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(b, 48, base);
-        sp_3072_from_mp(e, 48, exp);
-        sp_3072_from_mp(m, 48, mod);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_3072_mod_exp_avx2_48(r, b, e, expBits, m, 0);
-        else
-#endif
-            err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
-    }
-
-    if (err == MP_OKAY) {
-        err = sp_3072_to_mp(r, res);
-    }
-
-    XMEMSET(e, 0, sizeof(e));
-
-    return err;
-}
-
-/* Perform the modular exponentiation for Diffie-Hellman.
- *
- * base     Base.
- * exp      Array of bytes that is the exponent.
- * expLen   Length of data, in bytes, in exponent.
- * mod      Modulus.
- * out      Buffer to hold big-endian bytes of exponentiation result.
- *          Must be at least 384 bytes long.
- * outLen   Length, in bytes, of exponentiation result.
- * returs 0 on success, MP_READ_E if there are too many bytes in an array
- * and MEMORY_E if memory allocation fails.
- */
-int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
-    mp_int* mod, byte* out, word32* outLen)
-{
-    int err = MP_OKAY;
-    sp_digit b[96], e[48], m[48];
-    sp_digit* r = b;
-    word32 i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (mp_count_bits(base) > 3072 || expLen > 384 ||
-                                                   mp_count_bits(mod) != 3072) {
-        err = MP_READ_E;
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_from_mp(b, 48, base);
-        sp_3072_from_bin(e, 48, exp, expLen);
-        sp_3072_from_mp(m, 48, mod);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_3072_mod_exp_avx2_48(r, b, e, expLen * 8, m, 0);
-        else
-#endif
-            err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
-    }
-
-    if (err == MP_OKAY) {
-        sp_3072_to_bin(r, out);
-        *outLen = 384;
-        for (i=0; i<384 && out[i] == 0; i++) {
-        }
-        *outLen -= i;
-        XMEMMOVE(out, out + i, *outLen);
-    }
-
-    XMEMSET(e, 0, sizeof(e));
-
-    return err;
-}
-#endif /* WOLFSSL_HAVE_SP_DH */
-
-#endif /* WOLFSSL_SP_NO_3072 */
-
-#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
-#ifdef WOLFSSL_HAVE_SP_ECC
-#ifndef WOLFSSL_SP_NO_256
-
-/* Point structure to use. */
-typedef struct sp_point {
-    sp_digit x[2 * 4];
-    sp_digit y[2 * 4];
-    sp_digit z[2 * 4];
-    int infinity;
-} sp_point;
-
-/* The modulus (prime) of the curve P256. */
-static sp_digit p256_mod[4] = {
-    0xffffffffffffffffl,0x00000000ffffffffl,0x0000000000000000l,
-    0xffffffff00000001l
-};
-/* The Montogmery normalizer for modulus of the curve P256. */
-static sp_digit p256_norm_mod[4] = {
-    0x0000000000000001l,0xffffffff00000000l,0xffffffffffffffffl,
-    0x00000000fffffffel
-};
-/* The Montogmery multiplier for modulus of the curve P256. */
-static sp_digit p256_mp_mod = 0x0000000000000001;
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
-                                            defined(HAVE_ECC_VERIFY)
-/* The order of the curve P256. */
-static sp_digit p256_order[4] = {
-    0xf3b9cac2fc632551l,0xbce6faada7179e84l,0xffffffffffffffffl,
-    0xffffffff00000000l
-};
-#endif
-/* The order of the curve P256 minus 2. */
-static sp_digit p256_order2[4] = {
-    0xf3b9cac2fc63254fl,0xbce6faada7179e84l,0xffffffffffffffffl,
-    0xffffffff00000000l
-};
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* The Montogmery normalizer for order of the curve P256. */
-static sp_digit p256_norm_order[4] = {
-    0x0c46353d039cdaafl,0x4319055258e8617bl,0x0000000000000000l,
-    0x00000000ffffffffl
-};
-#endif
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* The Montogmery multiplier for order of the curve P256. */
-static sp_digit p256_mp_order = 0xccd1c8aaee00bc4fl;
-#endif
-#ifdef WOLFSSL_SP_SMALL
-/* The base point of curve P256. */
-static sp_point p256_base = {
-    /* X ordinate */
-    {
-        0xf4a13945d898c296l,0x77037d812deb33a0l,0xf8bce6e563a440f2l,
-        0x6b17d1f2e12c4247l
-    },
-    /* Y ordinate */
-    {
-        0xcbb6406837bf51f5l,0x2bce33576b315ecel,0x8ee7eb4a7c0f9e16l,
-        0x4fe342e2fe1a7f9bl
-    },
-    /* Z ordinate */
-    {
-        0x0000000000000001l,0x0000000000000000l,0x0000000000000000l,
-        0x0000000000000000l
-    },
-    /* infinity */
-    0
-};
-#endif /* WOLFSSL_SP_SMALL */
-#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
-static sp_digit p256_b[4] = {
-    0x3bce3c3e27d2604bl,0x651d06b0cc53b0f6l,0xb3ebbd55769886bcl,
-    0x5ac635d8aa3a93e7l
-};
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-/* Allocate memory for point and return error. */
-#define sp_ecc_point_new(heap, sp, p)                                   \
-    ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
-        MEMORY_E : MP_OKAY
-#else
-/* Set pointer to data and return no error. */
-#define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-/* If valid pointer then clear point data if requested and free data. */
-#define sp_ecc_point_free(p, clear, heap)     \
-    do {                                      \
-        if (p != NULL) {                      \
-            if (clear)                        \
-                XMEMSET(p, 0, sizeof(*p));    \
-            XFREE(p, heap, DYNAMIC_TYPE_ECC); \
-        }                                     \
-    }                                         \
-    while (0)
-#else
-/* Clear point data if requested. */
-#define sp_ecc_point_free(p, clear, heap) \
-    do {                                  \
-        if (clear)                        \
-            XMEMSET(p, 0, sizeof(*p));    \
-    }                                     \
-    while (0)
-#endif
-
-/* Multiply a number by Montogmery normalizer mod modulus (prime).
- *
- * r  The resulting Montgomery form number.
- * a  The number to convert.
- * m  The modulus (prime).
- */
-static int sp_256_mod_mul_norm_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    int64_t t[8];
-    int64_t a32[8];
-    int64_t o;
-
-    (void)m;
-
-    a32[0] = a[0] & 0xffffffff;
-    a32[1] = a[0] >> 32;
-    a32[2] = a[1] & 0xffffffff;
-    a32[3] = a[1] >> 32;
-    a32[4] = a[2] & 0xffffffff;
-    a32[5] = a[2] >> 32;
-    a32[6] = a[3] & 0xffffffff;
-    a32[7] = a[3] >> 32;
-
-    /*  1  1  0 -1 -1 -1 -1  0 */
-    t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
-    /*  0  1  1  0 -1 -1 -1 -1 */
-    t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
-    /*  0  0  1  1  0 -1 -1 -1 */
-    t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
-    /* -1 -1  0  2  2  1  0 -1 */
-    t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
-    /*  0 -1 -1  0  2  2  1  0 */
-    t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
-    /*  0  0 -1 -1  0  2  2  1 */
-    t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
-    /* -1 -1  0  0  0  1  3  2 */
-    t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
-    /*  1  0 -1 -1 -1 -1  0  3 */
-    t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
-
-    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-    o     = t[7] >> 32; t[7] &= 0xffffffff;
-    t[0] += o;
-    t[3] -= o;
-    t[6] -= o;
-    t[7] += o;
-    t[1] += t[0] >> 32; t[0] &= 0xffffffff;
-    t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-    t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-    t[4] += t[3] >> 32; t[3] &= 0xffffffff;
-    t[5] += t[4] >> 32; t[4] &= 0xffffffff;
-    t[6] += t[5] >> 32; t[5] &= 0xffffffff;
-    t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-    r[0] = (t[1] << 32) | t[0];
-    r[1] = (t[3] << 32) | t[2];
-    r[2] = (t[5] << 32) | t[4];
-    r[3] = (t[7] << 32) | t[6];
-
-    return MP_OKAY;
-}
-
-/* Convert an mp_int to an array of sp_digit.
- *
- * r  A single precision integer.
- * a  A multi-precision integer.
- */
-static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
-{
-#if DIGIT_BIT == 64
-    int j;
-
-    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
-
-    for (j = a->used; j < max; j++)
-        r[j] = 0;
-#elif DIGIT_BIT > 64
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= a->dp[i] << s;
-        r[j] &= 0xffffffffffffffffl;
-        s = 64 - s;
-        if (j + 1 >= max)
-            break;
-        r[++j] = a->dp[i] >> s;
-        while (s + 64 <= DIGIT_BIT) {
-            s += 64;
-            r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            if (s < DIGIT_BIT)
-                r[++j] = a->dp[i] >> s;
-            else
-                r[++j] = 0;
-        }
-        s = DIGIT_BIT - s;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-#else
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = 0; i < a->used && j < max; i++) {
-        r[j] |= ((sp_digit)a->dp[i]) << s;
-        if (s + DIGIT_BIT >= 64) {
-            r[j] &= 0xffffffffffffffffl;
-            if (j + 1 >= max)
-                break;
-            s = 64 - s;
-            if (s == DIGIT_BIT) {
-                r[++j] = 0;
-                s = 0;
-            }
-            else {
-                r[++j] = a->dp[i] >> s;
-                s = DIGIT_BIT - s;
-            }
-        }
-        else
-            s += DIGIT_BIT;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-#endif
-}
-
-/* Convert a point of type ecc_point to type sp_point.
- *
- * p   Point of type sp_point (result).
- * pm  Point of type ecc_point.
- */
-static void sp_256_point_from_ecc_point_4(sp_point* p, ecc_point* pm)
-{
-    XMEMSET(p->x, 0, sizeof(p->x));
-    XMEMSET(p->y, 0, sizeof(p->y));
-    XMEMSET(p->z, 0, sizeof(p->z));
-    sp_256_from_mp(p->x, 4, pm->x);
-    sp_256_from_mp(p->y, 4, pm->y);
-    sp_256_from_mp(p->z, 4, pm->z);
-    p->infinity = 0;
-}
-
-/* Convert an array of sp_digit to an mp_int.
- *
- * a  A single precision integer.
- * r  A multi-precision integer.
- */
-static int sp_256_to_mp(sp_digit* a, mp_int* r)
-{
-    int err;
-
-    err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
-    if (err == MP_OKAY) {
-#if DIGIT_BIT == 64
-        XMEMCPY(r->dp, a, sizeof(sp_digit) * 4);
-        r->used = 4;
-        mp_clamp(r);
-#elif DIGIT_BIT < 64
-        int i, j = 0, s = 0;
-
-        r->dp[0] = 0;
-        for (i = 0; i < 4; i++) {
-            r->dp[j] |= a[i] << s;
-            r->dp[j] &= (1l << DIGIT_BIT) - 1;
-            s = DIGIT_BIT - s;
-            r->dp[++j] = a[i] >> s;
-            while (s + DIGIT_BIT <= 64) {
-                s += DIGIT_BIT;
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-                r->dp[++j] = a[i] >> s;
-            }
-            s = 64 - s;
-        }
-        r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
-        mp_clamp(r);
-#else
-        int i, j = 0, s = 0;
-
-        r->dp[0] = 0;
-        for (i = 0; i < 4; i++) {
-            r->dp[j] |= ((mp_digit)a[i]) << s;
-            if (s + 64 >= DIGIT_BIT) {
-    #if DIGIT_BIT < 64
-                r->dp[j] &= (1l << DIGIT_BIT) - 1;
-    #endif
-                s = DIGIT_BIT - s;
-                r->dp[++j] = a[i] >> s;
-                s = 64 - s;
-            }
-            else
-                s += 64;
-        }
-        r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
-        mp_clamp(r);
-#endif
-    }
-
-    return err;
-}
-
-/* Convert a point of type sp_point to type ecc_point.
- *
- * p   Point of type sp_point.
- * pm  Point of type ecc_point (result).
- * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
- * MP_OKAY.
- */
-static int sp_256_point_to_ecc_point_4(sp_point* p, ecc_point* pm)
-{
-    int err;
-
-    err = sp_256_to_mp(p->x, pm->x);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->y, pm->y);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->z, pm->z);
-
-    return err;
-}
-
-/* Conditionally copy a into r using the mask m.
- * m is -1 to copy and 0 when not.
- *
- * r  A single precision number to copy over.
- * a  A single precision number to copy.
- * m  Mask value to apply.
- */
-static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, const sp_digit m)
-{
-    sp_digit t[4];
-    __asm__ __volatile__ (
-        "movq	(%[r]), %%rax\n\t"
-        "movq	8(%[r]), %%rcx\n\t"
-        "movq	16(%[r]), %%rdx\n\t"
-        "movq	24(%[r]), %%r8\n\t"
-        "xorq	(%[a]), %%rax\n\t"
-        "xorq	8(%[a]), %%rcx\n\t"
-        "xorq	16(%[a]), %%rdx\n\t"
-        "xorq	24(%[a]), %%r8\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "andq	%[m], %%rdx\n\t"
-        "andq	%[m], %%r8\n\t"
-        "xorq	%%rax, (%[r])\n\t"
-        "xorq	%%rcx, 8(%[r])\n\t"
-        "xorq	%%rdx, 16(%[r])\n\t"
-        "xorq	%%r8, 24(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [m] "r" (m), [t] "r" (t)
-        : "memory", "rax", "rcx", "rdx", "r8"
-    );
-}
-
-/* Compare a with b in constant time.
- *
- * a  A single precision integer.
- * b  A single precision integer.
- * return -ve, 0 or +ve if a is less than, equal to or greater than b
- * respectively.
- */
-static int64_t sp_256_cmp_4(sp_digit* a, sp_digit* b)
-{
-    sp_digit r = -1;
-    sp_digit one = 1;
-
-    __asm__ __volatile__ (
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	$-1, %%rdx\n\t"
-        "movq	24(%[a]), %%rbx\n\t"
-        "movq	24(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	16(%[a]), %%rbx\n\t"
-        "movq	16(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	8(%[a]), %%rbx\n\t"
-        "movq	8(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "movq	0(%[a]), %%rbx\n\t"
-        "movq	0(%[b]), %%r8\n\t"
-        "andq	%%rdx, %%rbx\n\t"
-        "andq	%%rdx, %%r8\n\t"
-        "subq	%%r8, %%rbx\n\t"
-        "cmova	%[one], %[r]\n\t"
-        "cmovc	%%rdx, %[r]\n\t"
-        "cmovnz	%%rcx, %%rdx\n\t"
-        "xorq	%%rdx, %[r]\n\t"
-        : [r] "+r" (r)
-        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
-        : "rax", "rdx", "rcx", "rbx", "r8"
-    );
-
-    return r;
-}
-
-/* Normalize the values in each word to 64.
- *
- * a  Array of sp_digit to normalize.
- */
-#define sp_256_norm_4(a)
-
-/* Conditionally subtract b from a using the mask m.
- * m is -1 to subtract and 0 when not copying.
- *
- * r  A single precision number representing condition subtract result.
- * a  A single precision number to subtract from.
- * b  A single precision number to subtract.
- * m  Mask value to apply.
- */
-static sp_digit sp_256_cond_sub_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit m)
-{
-    sp_digit t[4];
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[b]), %%rax\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 0(%[t])\n\t"
-        "movq	%%rcx, 8(%[t])\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "andq	%[m], %%rax\n\t"
-        "andq	%[m], %%rcx\n\t"
-        "movq	%%rax, 16(%[t])\n\t"
-        "movq	%%rcx, 24(%[t])\n\t"
-        "movq	(%[a]), %%rax\n\t"
-        "movq	(%[t]), %%rdx\n\t"
-        "subq	%%rdx,%%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	8(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "movq	16(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	24(%[a]), %%rcx\n\t"
-        "movq	24(%[t]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	%%rcx, 24(%[r])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m), [t] "r" (t)
-        : "memory", "rax", "rcx", "rdx"
-    );
-
-    return c;
-}
-
-/* Sub b from a into r. (r = a - b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "movq	(%[b]), %%rdx\n\t"
-        "subq	%%rdx, %%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	8(%[b]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "movq	16(%[b]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rax\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	24(%[a]), %%rcx\n\t"
-        "movq	24(%[b]), %%rdx\n\t"
-        "sbbq	%%rdx, %%rcx\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	%%rcx, 24(%[r])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rcx", "rdx"
-    );
-
-    return c;
-}
-
-/* Reduce the number back to 256 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    __asm__ __volatile__ (
-        "# i = 0\n\t"
-        "xorq	%%r13, %%r13\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "movq	%[a], %%r12\n\t"
-        "\nL_mont_loop_4:\n\t"
-        "# mu = a[i] * mp\n\t"
-        "movq	0(%%r12), %%r11\n\t"
-        "imulq	%[mp], %%r11\n\t"
-        "# a[i+0] += m[0] * mu\n\t"
-        "movq	0(%[m]), %%rax\n\t"
-        "movq	8(%[m]), %%r9\n\t"
-        "mulq	%%r11\n\t"
-        "movq	0(%%r12), %%rbx\n\t"
-        "addq	%%rax,  %%rbx\n\t"
-        "movq	%%rdx, %%r8\n\t"
-        "movq	%%rbx, 0(%%r12)\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+1] += m[1] * mu\n\t"
-        "movq	%%r9, %%rax\n\t"
-        "mulq	%%r11\n\t"
-        "movq	16(%[m]), %%r9\n\t"
-        "movq	8(%%r12), %%rbx\n\t"
-        "addq	%%r8, %%rax\n\t"
-        "movq	%%rdx, %%r10\n\t"
-        "adcq	$0, %%r10\n\t"
-        "addq	%%rax,  %%rbx\n\t"
-        "movq	%%rbx, 8(%%r12)\n\t"
-        "adcq	$0, %%r10\n\t"
-        "# a[i+2] += m[2] * mu\n\t"
-        "movq	%%r9, %%rax\n\t"
-        "mulq	%%r11\n\t"
-        "movq	24(%[m]), %%r9\n\t"
-        "movq	16(%%r12), %%rbx\n\t"
-        "addq	%%r10, %%rax\n\t"
-        "movq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax,  %%rbx\n\t"
-        "movq	%%rbx, 16(%%r12)\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# a[i+3] += m[3] * mu\n\t"
-        "movq	%%r9, %%rax\n\t"
-        "mulq	%%r11\n\t"
-        "movq	24(%%r12), %%rbx\n\t"
-        "addq	%%r8, %%rax\n\t"
-        "adcq	%%r13, %%rdx\n\t"
-        "movq	$0, %%r13\n\t"
-        "adcq	$0, %%r13\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "movq	%%rbx, 24(%%r12)\n\t"
-        "adcq	%%rdx, 32(%%r12)\n\t"
-        "adcq	$0, %%r13\n\t"
-        "# i += 1\n\t"
-        "addq	$8, %%r12\n\t"
-        "addq	$8, %%rcx\n\t"
-        "cmpq	$32, %%rcx\n\t"
-        "jl	L_mont_loop_4\n\t"
-        "xorq	%%rax, %%rax\n\t"
-        "movq	32(%[a]), %%rdx\n\t"
-        "movq	40(%[a]), %%rcx\n\t"
-        "movq	48(%[a]), %%rbx\n\t"
-        "movq	56(%[a]), %%r8\n\t"
-        "subq	%%r13, %%rax\n\t"
-        "movq	0(%[m]), %%r9\n\t"
-        "movq	8(%[m]), %%r10\n\t"
-        "movq	16(%[m]), %%r11\n\t"
-        "movq	24(%[m]), %%r12\n\t"
-        "andq	%%rax, %%r9\n\t"
-        "andq	%%rax, %%r10\n\t"
-        "andq	%%rax, %%r11\n\t"
-        "andq	%%rax, %%r12\n\t"
-        "subq	%%r9, %%rdx\n\t"
-        "sbbq	%%r10, %%rcx\n\t"
-        "sbbq	%%r11, %%rbx\n\t"
-        "sbbq	%%r12, %%r8\n\t"
-        "movq	%%rdx,   (%[a])\n\t"
-        "movq	%%rcx,  8(%[a])\n\t"
-        "movq	%%rbx, 16(%[a])\n\t"
-        "movq	%%r8, 24(%[a])\n\t"
-        :
-        : [a] "r" (a), [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rbx", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13"
-    );
-}
-
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m, sp_digit mp)
-{
-    (void)mp;
-
-    __asm__ __volatile__ (
-        "#  A[0] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "movq	%%rax, %%r8\n\t"
-        "movq	%%rdx, %%r9\n\t"
-        "#  A[0] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%r10\n\t"
-        "#  A[1] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%r10\n\t"
-        "adcq	$0, %%r11\n\t"
-        "#  A[0] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "#  A[1] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[2] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "adcq	$0, %%r12\n\t"
-        "#  A[0] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r13, %%r13\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	$0, %%r13\n\t"
-        "#  A[1] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	$0, %%r13\n\t"
-        "#  A[2] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	$0, %%r13\n\t"
-        "#  A[3] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	$0, %%r13\n\t"
-        "#  A[1] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%r14, %%r14\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "#  A[2] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "#  A[3] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "#  A[2] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "#  A[3] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "#  A[3] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r14\n\t"
-        "adcq	%%rdx, %%r15\n\t"
-        "# Start Reduction\n\t"
-        "movq	%%r8, %%rax\n\t"
-        "movq	%%r9, %[a]\n\t"
-        "movq	%%r10, %[b]\n\t"
-        "movq	%%r11, %%rdx\n\t"
-        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
-        "#    - a[0] << 32 << 192\n\t"
-        "#   + (a[0] * 2) << 192\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "#   a[0]-a[2] << 32\n\t"
-        "shlq	$32, %%r8\n\t"
-        "shldq	$32, %[a], %%r10\n\t"
-        "shldq	$32, %%rax, %%r9\n\t"
-        "#   - a[0] << 32 << 192\n\t"
-        "subq	%%r8, %%rdx\n\t"
-        "#   + a[0]-a[2] << 32 << 64\n\t"
-        "addq	%%r8, %[a]\n\t"
-        "adcq	%%r9, %[b]\n\t"
-        "adcq	%%r10, %%rdx\n\t"
-        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
-        "#   a += mu << 256\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%[a], %%r13\n\t"
-        "adcq	%[b], %%r14\n\t"
-        "adcq	%%rdx, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a += mu << 192\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%[a], %%r12\n\t"
-        "adcq	%[b], %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "# mu <<= 32\n\t"
-        "movq	%%rdx, %[m]\n\t"
-        "shldq	$32, %[b], %%rdx\n\t"
-        "shldq	$32, %[a], %[b]\n\t"
-        "shldq	$32, %%rax, %[a]\n\t"
-        "shlq	$32, %%rax\n\t"
-        "shrq	$32, %[m]\n\t"
-        "#   a += (mu << 32) << 64\n\t"
-        "addq	%[b], %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	%[m], %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a -= (mu << 32) << 192\n\t"
-        "subq	%%rax, %%r11\n\t"
-        "movq	$0xffffffff, %%rax\n\t"
-        "sbbq	%[a], %%r12\n\t"
-        "movq	$0xffffffff00000001, %[a]\n\t"
-        "sbbq	%[b], %%r13\n\t"
-        "sbbq	%%rdx, %%r14\n\t"
-        "sbbq	%[m], %%r15\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "#  m[0] = -1 & mask = mask\n\t"
-        "andq	%%r8, %%rax\n\t"
-        "#  m[2] =  0 & mask = 0\n\t"
-        "andq	%%r8, %[a]\n\t"
-        "subq	%%r8, %%r12\n\t"
-        "sbbq	%%rax, %%r13\n\t"
-        "sbbq	$0, %%r14\n\t"
-        "sbbq	%[a], %%r15\n\t"
-        "movq	%%r12, 0(%[r])\n\t"
-        "movq	%%r13, 8(%[r])\n\t"
-        "movq	%%r14, 16(%[r])\n\t"
-        "movq	%%r15, 24(%[r])\n\t"
-        : [m] "+r" (m), [a] "+r" (a), [b] "+r" (b)
-        : [r] "r" (r)
-        : "memory", "rax", "rdx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    __asm__ __volatile__ (
-        "#  A[0] * A[1]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "movq	%%rax, %%r9\n\t"
-        "movq	%%rdx, %%r10\n\t"
-        "#  A[0] * A[2]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	%%rax, %%r10\n\t"
-        "adcq	%%rdx, %%r11\n\t"
-        "#  A[0] * A[3]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "#  A[1] * A[2]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%r13, %%r13\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	$0, %%r13\n\t"
-        "#  A[1] * A[3]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%%rdx, %%r13\n\t"
-        "#  A[2] * A[3]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "xorq	%%r14, %%r14\n\t"
-        "addq	%%rax, %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "# Double\n\t"
-        "xorq	%%r15, %%r15\n\t"
-        "addq	%%r9, %%r9\n\t"
-        "adcq	%%r10, %%r10\n\t"
-        "adcq	%%r11, %%r11\n\t"
-        "adcq	%%r12, %%r12\n\t"
-        "adcq	%%r13, %%r13\n\t"
-        "adcq	%%r14, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "#  A[0] * A[0]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "movq	%%rax, %%r8\n\t"
-        "movq	%%rdx, %[mp]\n\t"
-        "#  A[1] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%[mp], %%r9\n\t"
-        "adcq	%%rax, %%r10\n\t"
-        "adcq	$0, %%rdx\n\t"
-        "movq	%%rdx, %[mp]\n\t"
-        "#  A[2] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%[mp], %%r11\n\t"
-        "adcq	%%rax, %%r12\n\t"
-        "adcq	$0, %%rdx\n\t"
-        "movq	%%rdx, %[mp]\n\t"
-        "#  A[3] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r14\n\t"
-        "adcq	%%rdx, %%r15\n\t"
-        "addq	%[mp], %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "# Start Reduction\n\t"
-        "movq	%%r8, %%rax\n\t"
-        "movq	%%r9, %[a]\n\t"
-        "movq	%%r10, %[mp]\n\t"
-        "movq	%%r11, %%rdx\n\t"
-        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
-        "#    - a[0] << 32 << 192\n\t"
-        "#   + (a[0] * 2) << 192\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "#   a[0]-a[2] << 32\n\t"
-        "shlq	$32, %%r8\n\t"
-        "shldq	$32, %[a], %%r10\n\t"
-        "shldq	$32, %%rax, %%r9\n\t"
-        "#   - a[0] << 32 << 192\n\t"
-        "subq	%%r8, %%rdx\n\t"
-        "#   + a[0]-a[2] << 32 << 64\n\t"
-        "addq	%%r8, %[a]\n\t"
-        "adcq	%%r9, %[mp]\n\t"
-        "adcq	%%r10, %%rdx\n\t"
-        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
-        "#   a += mu << 256\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%[a], %%r13\n\t"
-        "adcq	%[mp], %%r14\n\t"
-        "adcq	%%rdx, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a += mu << 192\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%[a], %%r12\n\t"
-        "adcq	%[mp], %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "# mu <<= 32\n\t"
-        "movq	%%rdx, %[m]\n\t"
-        "shldq	$32, %[mp], %%rdx\n\t"
-        "shldq	$32, %[a], %[mp]\n\t"
-        "shldq	$32, %%rax, %[a]\n\t"
-        "shlq	$32, %%rax\n\t"
-        "shrq	$32, %[m]\n\t"
-        "#   a += (mu << 32) << 64\n\t"
-        "addq	%[mp], %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	%[m], %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a -= (mu << 32) << 192\n\t"
-        "subq	%%rax, %%r11\n\t"
-        "movq	$0xffffffff, %%rax\n\t"
-        "sbbq	%[a], %%r12\n\t"
-        "movq	$0xffffffff00000001, %[a]\n\t"
-        "sbbq	%[mp], %%r13\n\t"
-        "sbbq	%%rdx, %%r14\n\t"
-        "sbbq	%[m], %%r15\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "#  m[0] = -1 & mask = mask\n\t"
-        "andq	%%r8, %%rax\n\t"
-        "#  m[2] =  0 & mask = 0\n\t"
-        "andq	%%r8, %[a]\n\t"
-        "subq	%%r8, %%r12\n\t"
-        "sbbq	%%rax, %%r13\n\t"
-        "sbbq	$0, %%r14\n\t"
-        "sbbq	%[a], %%r15\n\t"
-        "movq	%%r12, 0(%[r])\n\t"
-        "movq	%%r13, 8(%[r])\n\t"
-        "movq	%%r14, 16(%[r])\n\t"
-        "movq	%%r15, 24(%[r])\n\t"
-        : [m] "+r" (m), [a] "+r" (a), [mp] "+r" (mp)
-        : [r] "r" (r)
-        : "memory", "rax", "rdx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * n   Number of times to square.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_256_mont_sqr_n_4(sp_digit* r, sp_digit* a, int n,
-        sp_digit* m, sp_digit mp)
-{
-    sp_256_mont_sqr_4(r, a, m, mp);
-    for (; n > 1; n--)
-        sp_256_mont_sqr_4(r, r, m, mp);
-}
-
-#else
-/* Mod-2 for the P256 curve. */
-static const uint64_t p256_mod_2[4] = {
-    0xfffffffffffffffd,0x00000000ffffffff,0x0000000000000000,
-    0xffffffff00000001
-};
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
- * P256 curve. (r = 1 / a mod m)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_4(sp_digit* r, sp_digit* a, sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 4);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod);
-        if (p256_mod_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 4;
-    sp_digit* t3 = td + 4 * 4;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_4(t, a, p256_mod, p256_mp_mod);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_4(t2, t, 2, p256_mod, p256_mp_mod);
-    /* t3= a^d = t2 * a */
-    sp_256_mont_mul_4(t3, t2, a, p256_mod, p256_mp_mod);
-    /* t = a^f = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^f0 = t ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_4(t2, t, 4, p256_mod, p256_mp_mod);
-    /* t3= a^fd = t2 * t3 */
-    sp_256_mont_mul_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_4(t2, t, 8, p256_mod, p256_mp_mod);
-    /* t3= a^fffd = t2 * t3 */
-    sp_256_mont_mul_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_4(t2, t, 16, p256_mod, p256_mp_mod);
-    /* t3= a^fffffffd = t2 * t3 */
-    sp_256_mont_mul_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_4(t2, t, 32, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001 = t2 * a */
-    sp_256_mont_mul_4(t2, t2, a, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
-     *   = t2 ^ 2 ^ 160 */
-    sp_256_mont_sqr_n_4(t2, t2, 160, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
-     *   = t2 * t */
-    sp_256_mont_mul_4(t2, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
-     *   = t2 ^ 2 ^ 32 */
-    sp_256_mont_sqr_n_4(t2, t2, 32, p256_mod, p256_mp_mod);
-    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
-     *   = t2 * t3 */
-    sp_256_mont_mul_4(r, t2, t3, p256_mod, p256_mp_mod);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Map the Montgomery form projective co-ordinate point to an affine point.
- *
- * r  Resulting affine co-ordinate point.
- * p  Montgomery form projective co-ordinate point.
- * t  Temporary ordinate data.
- */
-static void sp_256_map_4(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    int64_t n;
-
-    sp_256_mont_inv_4(t1, p->z, t + 2*4);
-
-    sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod);
-
-    /* x /= z^2 */
-    sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod);
-    XMEMSET(r->x + 4, 0, sizeof(r->x) / 2);
-    sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod);
-    /* Reduce x to less than modulus */
-    n = sp_256_cmp_4(r->x, p256_mod);
-    sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - (n >= 0));
-    sp_256_norm_4(r->x);
-
-    /* y /= z^3 */
-    sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod);
-    XMEMSET(r->y + 4, 0, sizeof(r->y) / 2);
-    sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod);
-    /* Reduce y to less than modulus */
-    n = sp_256_cmp_4(r->y, p256_mod);
-    sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - (n >= 0));
-    sp_256_norm_4(r->y);
-
-    XMEMSET(r->z, 0, sizeof(r->z));
-    r->z[0] = 1;
-
-}
-
-/* Add two Montgomery form numbers (r = a + b % m).
- *
- * r   Result of addition.
- * a   First number to add in Montogmery form.
- * b   Second number to add in Montogmery form.
- * m   Modulus (prime).
- */
-static void sp_256_mont_add_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "movq	24(%[a]), %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	0(%[b]), %%rax\n\t"
-        "adcq	8(%[b]), %%rcx\n\t"
-        "movq	$0xffffffff, %%r8\n\t"
-        "adcq	16(%[b]), %%rdx\n\t"
-        "adcq	24(%[b]), %%r10\n\t"
-        "movq	$0xffffffff00000001, %%r9\n\t"
-        "sbbq	$0, %%r11\n\t"
-        "andq	%%r11, %%r8\n\t"
-        "andq	%%r11, %%r9\n\t"
-        "subq	%%r11, %%rax\n\t"
-        "sbbq	%%r8, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "sbbq	$0, %%rdx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "sbbq	%%r9, %%r10\n\t"
-        "movq	%%rdx, 16(%[r])\n\t"
-        "movq	%%r10, 24(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "rax", "rcx", "rdx", "r8", "r9", "r10", "r11"
-    );
-}
-
-/* Double a Montgomery form number (r = a + a % m).
- *
- * r   Result of doubling.
- * a   Number to double in Montogmery form.
- * m   Modulus (prime).
- */
-static void sp_256_mont_dbl_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "movq	24(%[a]), %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	%%rax, %%rax\n\t"
-        "adcq	%%rcx, %%rcx\n\t"
-        "movq	$0xffffffff, %%r8\n\t"
-        "adcq	%%rdx, %%rdx\n\t"
-        "movq	$0xffffffff00000001, %%r9\n\t"
-        "adcq	%%r10, %%r10\n\t"
-        "sbbq	$0, %%r11\n\t"
-        "andq	%%r11, %%r8\n\t"
-        "andq	%%r11, %%r9\n\t"
-        "subq	%%r11, %%rax\n\t"
-        "sbbq	%%r8, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "sbbq	$0, %%rdx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "sbbq	%%r9, %%r10\n\t"
-        "movq	%%rdx, 16(%[r])\n\t"
-        "movq	%%r10, 24(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a)
-        : "memory", "rax", "rcx", "rdx", "r8", "r9", "r10", "r11"
-    );
-
-    (void)m;
-}
-
-/* Triple a Montgomery form number (r = a + a + a % m).
- *
- * r   Result of Tripling.
- * a   Number to triple in Montogmery form.
- * m   Modulus (prime).
- */
-static void sp_256_mont_tpl_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "movq	24(%[a]), %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	%%rax, %%rax\n\t"
-        "adcq	%%rcx, %%rcx\n\t"
-        "movq	$0xffffffff, %%r8\n\t"
-        "adcq	%%rdx, %%rdx\n\t"
-        "adcq	%%r10, %%r10\n\t"
-        "movq	$0xffffffff00000001, %%r9\n\t"
-        "sbbq	$0, %%r11\n\t"
-        "andq	%%r11, %%r8\n\t"
-        "andq	%%r11, %%r9\n\t"
-        "subq	%%r11, %%rax\n\t"
-        "sbbq	%%r8, %%rcx\n\t"
-        "sbbq	$0, %%rdx\n\t"
-        "sbbq	%%r9, %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	(%[a]), %%rax\n\t"
-        "adcq	8(%[a]), %%rcx\n\t"
-        "movq	$0xffffffff, %%r8\n\t"
-        "adcq	16(%[a]), %%rdx\n\t"
-        "adcq	24(%[a]), %%r10\n\t"
-        "movq	$0xffffffff00000001, %%r9\n\t"
-        "sbbq	$0, %%r11\n\t"
-        "andq	%%r11, %%r8\n\t"
-        "andq	%%r11, %%r9\n\t"
-        "subq	%%r11, %%rax\n\t"
-        "sbbq	%%r8, %%rcx\n\t"
-        "sbbq	$0, %%rdx\n\t"
-        "sbbq	%%r9, %%r10\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "movq	%%rdx, 16(%[r])\n\t"
-        "movq	%%r10, 24(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a)
-        : "memory", "rax", "rcx", "rdx", "r8", "r9", "r10", "r11"
-    );
-
-    (void)m;
-}
-
-/* Subtract two Montgomery form numbers (r = a - b % m).
- *
- * r   Result of subtration.
- * a   Number to subtract from in Montogmery form.
- * b   Number to subtract with in Montogmery form.
- * m   Modulus (prime).
- */
-static void sp_256_mont_sub_4(sp_digit* r, sp_digit* a, sp_digit* b,
-        sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%rax\n\t"
-        "movq	8(%[a]), %%rcx\n\t"
-        "movq	16(%[a]), %%rdx\n\t"
-        "movq	24(%[a]), %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "subq	0(%[b]), %%rax\n\t"
-        "sbbq	8(%[b]), %%rcx\n\t"
-        "movq	$0xffffffff, %%r8\n\t"
-        "sbbq	16(%[b]), %%rdx\n\t"
-        "sbbq	24(%[b]), %%r10\n\t"
-        "movq	$0xffffffff00000001, %%r9\n\t"
-        "sbbq	$0, %%r11\n\t"
-        "andq	%%r11, %%r8\n\t"
-        "andq	%%r11, %%r9\n\t"
-        "addq	%%r11, %%rax\n\t"
-        "adcq	%%r8, %%rcx\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "adcq	$0, %%rdx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "adcq	%%r9, %%r10\n\t"
-        "movq	%%rdx, 16(%[r])\n\t"
-        "movq	%%r10, 24(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
-        : "memory", "rax", "rcx", "rdx", "r8", "r9", "r10", "r11"
-    );
-}
-
-/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
- *
- * r  Result of division by 2.
- * a  Number to divide.
- * m  Modulus (prime).
- */
-SP_NOINLINE static void sp_256_div2_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%rax\n\t"
-        "movq	8(%[a]), %%rdx\n\t"
-        "movq	16(%[a]), %%rcx\n\t"
-        "movq	24(%[a]), %%r10\n\t"
-        "movq	$0xffffffff, %%r8\n\t"
-        "movq	$0xffffffff00000001, %%r9\n\t"
-        "xorq	%%r12, %%r12\n\t"
-        "movq	%%rax, %%r11\n\t"
-        "andq	$1, %%r11\n\t"
-        "subq	%%r11, %%r12\n\t"
-        "andq	%%r12, %%r8\n\t"
-        "andq	%%r12, %%r9\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "addq	%%r12, %%rax\n\t"
-        "adcq	%%r8, %%rdx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "adcq	%%r9, %%r10\n\t"
-        "adcq	$0, %%r11\n\t"
-        "shrdq	$1, %%rdx, %%rax\n\t"
-        "shrdq	$1, %%rcx, %%rdx\n\t"
-        "shrdq	$1, %%r10, %%rcx\n\t"
-        "shrdq	$1, %%r11, %%r10\n\t"
-        "movq	%%rax, 0(%[r])\n\t"
-        "movq	%%rdx, 8(%[r])\n\t"
-        "movq	%%rcx, 16(%[r])\n\t"
-        "movq	%%r10, 24(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [m] "r" (m)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-}
-
-/* Double the Montgomery form projective point p.
- *
- * r  Result of doubling point.
- * p  Point to double.
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_dbl_4(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    /* When infinity don't double point passed in - constant time. */
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    /* Put point to double into result - good for infinty. */
-    if (r != p) {
-        for (i=0; i<4; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
-
-    /* T1 = Z * Z */
-    sp_256_mont_sqr_4(t1, z, p256_mod, p256_mp_mod);
-    /* Z = Y * Z */
-    sp_256_mont_mul_4(z, y, z, p256_mod, p256_mp_mod);
-    /* Z = 2Z */
-    sp_256_mont_dbl_4(z, z, p256_mod);
-    /* T2 = X - T1 */
-    sp_256_mont_sub_4(t2, x, t1, p256_mod);
-    /* T1 = X + T1 */
-    sp_256_mont_add_4(t1, x, t1, p256_mod);
-    /* T2 = T1 * T2 */
-    sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod);
-    /* T1 = 3T2 */
-    sp_256_mont_tpl_4(t1, t2, p256_mod);
-    /* Y = 2Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
-    /* Y = Y * Y */
-    sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod);
-    /* T2 = Y * Y */
-    sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
-    /* T2 = T2/2 */
-    sp_256_div2_4(t2, t2, p256_mod);
-    /* Y = Y * X */
-    sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
-    /* X = T1 * T1 */
-    sp_256_mont_mul_4(x, t1, t1, p256_mod, p256_mp_mod);
-    /* X = X - Y */
-    sp_256_mont_sub_4(x, x, y, p256_mod);
-    /* X = X - Y */
-    sp_256_mont_sub_4(x, x, y, p256_mod);
-    /* Y = Y - X */
-    sp_256_mont_sub_4(y, y, x, p256_mod);
-    /* Y = Y * T1 */
-    sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod);
-    /* Y = Y - T2 */
-    sp_256_mont_sub_4(y, y, t2, p256_mod);
-
-}
-
-/* Double the Montgomery form projective point p a number of times.
- *
- * r  Result of repeated doubling of point.
- * p  Point to double.
- * n  Number of times to double
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_dbl_n_4(sp_point* r, sp_point* p, int n,
-        sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* w = t;
-    sp_digit* a = t + 2*4;
-    sp_digit* b = t + 4*4;
-    sp_digit* t1 = t + 6*4;
-    sp_digit* t2 = t + 8*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    if (r != p) {
-        for (i=0; i<4; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
-
-    /* Y = 2*Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
-    /* W = Z^4 */
-    sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
-    while (n--) {
-        /* A = 3*(X^2 - W) */
-        sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(t1, t1, w, p256_mod);
-        sp_256_mont_tpl_4(a, t1, p256_mod);
-        /* B = X*Y^2 */
-        sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod);
-        /* X = A^2 - 2B */
-        sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(t1, b, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Z = Z*Y */
-        sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
-        /* t2 = Y^4 */
-        sp_256_mont_sqr_4(t2, t2, p256_mod, p256_mp_mod);
-        if (n) {
-            /* W = W*Y^4 */
-            sp_256_mont_mul_4(w, w, t2, p256_mod, p256_mp_mod);
-        }
-        /* y = 2*A*(B - X) - Y^4 */
-        sp_256_mont_sub_4(y, b, x, p256_mod);
-        sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(y, y, p256_mod);
-        sp_256_mont_sub_4(y, y, t2, p256_mod);
-    }
-    /* Y = Y/2 */
-    sp_256_div2_4(y, y, p256_mod);
-}
-
-/* Compare two numbers to determine if they are equal.
- * Constant time implementation.
- *
- * a  First number to compare.
- * b  Second number to compare.
- * returns 1 when equal and 0 otherwise.
- */
-static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b)
-{
-    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
-}
-
-/* Add two Montgomery form projective points.
- *
- * r  Result of addition.
- * p  Frist point to add.
- * q  Second point to add.
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_add_4(sp_point* r, sp_point* p, sp_point* q,
-        sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* t3 = t + 4*4;
-    sp_digit* t4 = t + 6*4;
-    sp_digit* t5 = t + 8*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    /* Ensure only the first point is the same as the result. */
-    if (q == r) {
-        sp_point* a = p;
-        p = q;
-        q = a;
-    }
-
-    /* Check double */
-    sp_256_sub_4(t1, p256_mod, q->y);
-    sp_256_norm_4(t1);
-    if (sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
-        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) {
-        sp_256_proj_point_dbl_4(r, p, t);
-    }
-    else {
-        rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
-        x = rp[p->infinity | q->infinity]->x;
-        y = rp[p->infinity | q->infinity]->y;
-        z = rp[p->infinity | q->infinity]->z;
-
-        ap[0] = p;
-        ap[1] = q;
-        for (i=0; i<4; i++)
-            r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = ap[p->infinity]->z[i];
-        r->infinity = ap[p->infinity]->infinity;
-
-        /* U1 = X1*Z2^2 */
-        sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod);
-        /* U2 = X2*Z1^2 */
-        sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
-        /* S1 = Y1*Z2^3 */
-        sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod);
-        /* S2 = Y2*Z1^3 */
-        sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
-        /* H = U2 - U1 */
-        sp_256_mont_sub_4(t2, t2, t1, p256_mod);
-        /* R = S2 - S1 */
-        sp_256_mont_sub_4(t4, t4, t3, p256_mod);
-        /* Z3 = H*Z1*Z2 */
-        sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
-        /* X3 = R^2 - H^3 - 2*U1*H^2 */
-        sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(x, x, t5, p256_mod);
-        sp_256_mont_dbl_4(t1, y, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-        sp_256_mont_sub_4(y, y, x, p256_mod);
-        sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(y, y, t5, p256_mod);
-    }
-}
-
-/* Double the Montgomery form projective point p a number of times.
- *
- * r  Result of repeated doubling of point.
- * p  Point to double.
- * n  Number of times to double
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_dbl_n_store_4(sp_point* r, sp_point* p,
-        int n, int m, sp_digit* t)
-{
-    sp_digit* w = t;
-    sp_digit* a = t + 2*4;
-    sp_digit* b = t + 4*4;
-    sp_digit* t1 = t + 6*4;
-    sp_digit* t2 = t + 8*4;
-    sp_digit* x = r[2*m].x;
-    sp_digit* y = r[(1<<n)*m].y;
-    sp_digit* z = r[2*m].z;
-    int i;
-
-    for (i=0; i<4; i++)
-        x[i] = p->x[i];
-    for (i=0; i<4; i++)
-        y[i] = p->y[i];
-    for (i=0; i<4; i++)
-        z[i] = p->z[i];
-
-    /* Y = 2*Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
-    /* W = Z^4 */
-    sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
-    for (i=1; i<=n; i++) {
-        /* A = 3*(X^2 - W) */
-        sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(t1, t1, w, p256_mod);
-        sp_256_mont_tpl_4(a, t1, p256_mod);
-        /* B = X*Y^2 */
-        sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod);
-        x = r[(1<<i)*m].x;
-        /* X = A^2 - 2B */
-        sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(t1, b, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Z = Z*Y */
-        sp_256_mont_mul_4(r[(1<<i)*m].z, z, y, p256_mod, p256_mp_mod);
-        z = r[(1<<i)*m].z;
-        /* t2 = Y^4 */
-        sp_256_mont_sqr_4(t2, t2, p256_mod, p256_mp_mod);
-        if (i != n) {
-            /* W = W*Y^4 */
-            sp_256_mont_mul_4(w, w, t2, p256_mod, p256_mp_mod);
-        }
-        /* y = 2*A*(B - X) - Y^4 */
-        sp_256_mont_sub_4(y, b, x, p256_mod);
-        sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(y, y, p256_mod);
-        sp_256_mont_sub_4(y, y, t2, p256_mod);
-
-        /* Y = Y/2 */
-        sp_256_div2_4(r[(1<<i)*m].y, y, p256_mod);
-        r[(1<<i)*m].infinity = 0;
-    }
-}
-
-/* Add two Montgomery form projective points.
- *
- * ra  Result of addition.
- * rs  Result of subtraction.
- * p   Frist point to add.
- * q   Second point to add.
- * t   Temporary ordinate data.
- */
-static void sp_256_proj_point_add_sub_4(sp_point* ra, sp_point* rs,
-        sp_point* p, sp_point* q, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* t3 = t + 4*4;
-    sp_digit* t4 = t + 6*4;
-    sp_digit* t5 = t + 8*4;
-    sp_digit* t6 = t + 10*4;
-    sp_digit* x = ra->x;
-    sp_digit* y = ra->y;
-    sp_digit* z = ra->z;
-    sp_digit* xs = rs->x;
-    sp_digit* ys = rs->y;
-    sp_digit* zs = rs->z;
-
-
-    XMEMCPY(x, p->x, sizeof(p->x) / 2);
-    XMEMCPY(y, p->y, sizeof(p->y) / 2);
-    XMEMCPY(z, p->z, sizeof(p->z) / 2);
-    ra->infinity = 0;
-    rs->infinity = 0;
-
-    /* U1 = X1*Z2^2 */
-    sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod);
-    /* U2 = X2*Z1^2 */
-    sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
-    /* S1 = Y1*Z2^3 */
-    sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod);
-    /* S2 = Y2*Z1^3 */
-    sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
-    /* H = U2 - U1 */
-    sp_256_mont_sub_4(t2, t2, t1, p256_mod);
-    /* RS = S2 + S1 */
-    sp_256_mont_add_4(t6, t4, t3, p256_mod);
-    /* R = S2 - S1 */
-    sp_256_mont_sub_4(t4, t4, t3, p256_mod);
-    /* Z3 = H*Z1*Z2 */
-    /* ZS = H*Z1*Z2 */
-    sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
-    XMEMCPY(zs, z, sizeof(p->z)/2);
-    /* X3 = R^2 - H^3 - 2*U1*H^2 */
-    /* XS = RS^2 - H^3 - 2*U1*H^2 */
-    sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_4(xs, t6, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_sub_4(x, x, t5, p256_mod);
-    sp_256_mont_sub_4(xs, xs, t5, p256_mod);
-    sp_256_mont_dbl_4(t1, y, p256_mod);
-    sp_256_mont_sub_4(x, x, t1, p256_mod);
-    sp_256_mont_sub_4(xs, xs, t1, p256_mod);
-    /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-    /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
-    sp_256_mont_sub_4(ys, y, xs, p256_mod);
-    sp_256_mont_sub_4(y, y, x, p256_mod);
-    sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod);
-    sp_256_sub_4(t6, p256_mod, t6);
-    sp_256_mont_mul_4(ys, ys, t6, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod);
-    sp_256_mont_sub_4(y, y, t5, p256_mod);
-    sp_256_mont_sub_4(ys, ys, t5, p256_mod);
-}
-
-/* Structure used to describe recoding of scalar multiplication. */
-typedef struct ecc_recode {
-    /* Index into pre-computation table. */
-    uint8_t i;
-    /* Use the negative of the point. */
-    uint8_t neg;
-} ecc_recode;
-
-/* The index into pre-computation table to use. */
-static uint8_t recode_index_4_6[66] = {
-     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
-    16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,
-     0,  1,
-};
-
-/* Whether to negate y-ordinate. */
-static uint8_t recode_neg_4_6[66] = {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     0,  0,
-};
-
-/* Recode the scalar for multiplication using pre-computed values and
- * subtraction.
- *
- * k  Scalar to multiply by.
- * v  Vector of operations to peform.
- */
-static void sp_256_ecc_recode_6_4(sp_digit* k, ecc_recode* v)
-{
-    int i, j;
-    uint8_t y;
-    int carry = 0;
-    int o;
-    sp_digit n;
-
-    j = 0;
-    n = k[j];
-    o = 0;
-    for (i=0; i<43; i++) {
-        y = n;
-        if (o + 6 < 64) {
-            y &= 0x3f;
-            n >>= 6;
-            o += 6;
-        }
-        else if (o + 6 == 64) {
-            n >>= 6;
-            if (++j < 4)
-                n = k[j];
-            o = 0;
-        }
-        else if (++j < 4) {
-            n = k[j];
-            y |= (n << (64 - o)) & 0x3f;
-            o -= 58;
-            n >>= o;
-        }
-
-        y += carry;
-        v[i].i = recode_index_4_6[y];
-        v[i].neg = recode_neg_4_6[y];
-        carry = (y >> 6) + v[i].neg;
-    }
-}
-
-/* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * g     Point to multiply.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_win_add_sub_4(sp_point* r, sp_point* g,
-        sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[33];
-    sp_point rtd, pd;
-    sp_digit tmpd[2 * 4 * 6];
-#endif
-    sp_point* t;
-    sp_point* rt;
-    sp_point* p = NULL;
-    sp_digit* tmp;
-    sp_digit* negy;
-    int i;
-    ecc_recode v[43];
-    int err;
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 33, heap, DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
-    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap,
-                             DYNAMIC_TYPE_ECC);
-    if (tmp == NULL)
-        err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
-#endif
-
-
-    if (err == MP_OKAY) {
-        /* t[0] = {0, 0, 1} * norm */
-        XMEMSET(&t[0], 0, sizeof(t[0]));
-        t[0].infinity = 1;
-        /* t[1] = {g->x, g->y, g->z} * norm */
-        err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod);
-    }
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod);
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod);
-
-    if (err == MP_OKAY) {
-        t[1].infinity = 0;
-        /* t[2] ... t[32]  */
-    sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp);
-    sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp);
-    sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp);
-    sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp);
-    sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp);
-    sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp);
-    sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[20], &t[10], tmp);
-    sp_256_proj_point_dbl_4(&t[22], &t[11], tmp);
-    sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[24], &t[12], tmp);
-    sp_256_proj_point_dbl_4(&t[26], &t[13], tmp);
-    sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
-    sp_256_proj_point_dbl_4(&t[28], &t[14], tmp);
-    sp_256_proj_point_dbl_4(&t[30], &t[15], tmp);
-    sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
-
-        negy = t[0].y;
-
-        sp_256_ecc_recode_6_4(k, v);
-
-        i = 42;
-        XMEMCPY(rt, &t[v[i].i], sizeof(sp_point));
-        for (--i; i>=0; i--) {
-            sp_256_proj_point_dbl_n_4(rt, rt, 6, tmp);
-
-            XMEMCPY(p, &t[v[i].i], sizeof(sp_point));
-            sp_256_sub_4(negy, p256_mod, p->y);
-            sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
-            sp_256_proj_point_add_4(rt, rt, p, tmp);
-        }
-
-        if (map)
-            sp_256_map_4(r, rt, tmp);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-    if (tmp != NULL)
-        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
-
-    return err;
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply two Montogmery form numbers mod the modulus (prime).
- * (r = a * b mod m)
- *
- * r   Result of multiplication.
- * a   First number to multiply in Montogmery form.
- * b   Second number to multiply in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_mul_avx2_4(sp_digit* r, sp_digit* a,
-        sp_digit* b, sp_digit* m, sp_digit mp)
-{
-    (void)mp;
-
-    __asm__ __volatile__ (
-        "#  A[0] * B[0]\n\t"
-        "movq   0(%[b]), %%rdx\n\t"
-        "mulxq  0(%[a]), %%r8, %%r9\n\t"
-        "#  A[2] * B[0]\n\t"
-        "mulxq  16(%[a]), %%r10, %%r11\n\t"
-        "#  A[1] * B[0]\n\t"
-        "mulxq  8(%[a]), %%rax, %[m]\n\t"
-        "xorq   %%r15, %%r15\n\t"
-        "adcxq  %%rax, %%r9\n\t"
-        "#  A[1] * B[3]\n\t"
-        "movq   24(%[b]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%r12, %%r13\n\t"
-        "adcxq  %[m], %%r10\n\t"
-        "#  A[0] * B[1]\n\t"
-        "movq   8(%[b]), %%rdx\n\t"
-        "mulxq  0(%[a]), %%rax, %[m]\n\t"
-        "adoxq  %%rax, %%r9\n\t"
-        "#  A[2] * B[1]\n\t"
-        "mulxq  16(%[a]), %%rax, %%r14\n\t"
-        "adoxq  %[m], %%r10\n\t"
-        "adcxq  %%rax, %%r11\n\t"
-        "#  A[1] * B[2]\n\t"
-        "movq   16(%[b]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%rax, %[m]\n\t"
-        "adcxq  %%r14, %%r12\n\t"
-        "adoxq  %%rax, %%r11\n\t"
-        "adcxq  %%r15, %%r13\n\t"
-        "adoxq  %[m], %%r12\n\t"
-        "#  A[0] * B[2]\n\t"
-        "mulxq  0(%[a]), %%rax, %[m]\n\t"
-        "adoxq  %%r15, %%r13\n\t"
-        "xorq   %%r14, %%r14\n\t"
-        "adcxq  %%rax, %%r10\n\t"
-        "#  A[1] * B[1]\n\t"
-        "movq   8(%[b]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %[m], %%r11\n\t"
-        "adoxq  %%rdx, %%r10\n\t"
-        "#  A[3] * B[1]\n\t"
-        "movq   8(%[b]), %%rdx\n\t"
-        "adoxq  %%rax, %%r11\n\t"
-        "mulxq  24(%[a]), %%rax, %[m]\n\t"
-        "adcxq  %%rax, %%r12\n\t"
-        "#  A[2] * B[2]\n\t"
-        "movq   16(%[b]), %%rdx\n\t"
-        "mulxq  16(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %[m], %%r13\n\t"
-        "adoxq  %%rdx, %%r12\n\t"
-        "#  A[3] * B[3]\n\t"
-        "movq   24(%[b]), %%rdx\n\t"
-        "adoxq  %%rax, %%r13\n\t"
-        "mulxq  24(%[a]), %%rax, %[m]\n\t"
-        "adoxq  %%r15, %%r14\n\t"
-        "adcxq  %%rax, %%r14\n\t"
-        "#  A[0] * B[3]\n\t"
-        "mulxq  0(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %[m], %%r15\n\t"
-        "xorq   %[m], %[m]\n\t"
-        "adcxq  %%rdx, %%r11\n\t"
-        "#  A[3] * B[0]\n\t"
-        "movq   0(%[b]), %%rdx\n\t"
-        "adcxq  %%rax, %%r12\n\t"
-        "mulxq  24(%[a]), %%rdx, %%rax\n\t"
-        "adoxq  %%rdx, %%r11\n\t"
-        "adoxq  %%rax, %%r12\n\t"
-        "#  A[2] * B[3]\n\t"
-        "movq   24(%[b]), %%rdx\n\t"
-        "mulxq  16(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %%rdx, %%r13\n\t"
-        "#  A[3] * B[2]\n\t"
-        "movq   16(%[b]), %%rdx\n\t"
-        "adcxq  %%rax, %%r14\n\t"
-        "mulxq  24(%[a]), %%rax, %%rdx\n\t"
-        "adcxq  %[m], %%r15\n\t"
-        "adoxq  %%rax, %%r13\n\t"
-        "adoxq  %%rdx, %%r14\n\t"
-        "adoxq  %[m], %%r15\n\t"
-        "# Start Reduction\n\t"
-        "movq	%%r8, %%rax\n\t"
-        "movq	%%r9, %[a]\n\t"
-        "movq	%%r10, %[b]\n\t"
-        "movq	%%r11, %%rdx\n\t"
-        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
-        "#    - a[0] << 32 << 192\n\t"
-        "#   + (a[0] * 2) << 192\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "#   a[0]-a[2] << 32\n\t"
-        "shlq	$32, %%r8\n\t"
-        "shldq	$32, %[a], %%r10\n\t"
-        "shldq	$32, %%rax, %%r9\n\t"
-        "#   - a[0] << 32 << 192\n\t"
-        "subq	%%r8, %%rdx\n\t"
-        "#   + a[0]-a[2] << 32 << 64\n\t"
-        "addq	%%r8, %[a]\n\t"
-        "adcq	%%r9, %[b]\n\t"
-        "adcq	%%r10, %%rdx\n\t"
-        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
-        "#   a += mu << 256\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%[a], %%r13\n\t"
-        "adcq	%[b], %%r14\n\t"
-        "adcq	%%rdx, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a += mu << 192\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%[a], %%r12\n\t"
-        "adcq	%[b], %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "# mu <<= 32\n\t"
-        "movq	%%rdx, %[m]\n\t"
-        "shldq	$32, %[b], %%rdx\n\t"
-        "shldq	$32, %[a], %[b]\n\t"
-        "shldq	$32, %%rax, %[a]\n\t"
-        "shlq	$32, %%rax\n\t"
-        "shrq	$32, %[m]\n\t"
-        "#   a += (mu << 32) << 64\n\t"
-        "addq	%[b], %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	%[m], %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a -= (mu << 32) << 192\n\t"
-        "subq	%%rax, %%r11\n\t"
-        "movq	$0xffffffff, %%rax\n\t"
-        "sbbq	%[a], %%r12\n\t"
-        "movq	$0xffffffff00000001, %[a]\n\t"
-        "sbbq	%[b], %%r13\n\t"
-        "sbbq	%%rdx, %%r14\n\t"
-        "sbbq	%[m], %%r15\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "#  m[0] = -1 & mask = mask\n\t"
-        "andq	%%r8, %%rax\n\t"
-        "#  m[2] =  0 & mask = 0\n\t"
-        "andq	%%r8, %[a]\n\t"
-        "subq	%%r8, %%r12\n\t"
-        "sbbq	%%rax, %%r13\n\t"
-        "sbbq	$0, %%r14\n\t"
-        "sbbq	%[a], %%r15\n\t"
-        "movq	%%r12, 0(%[r])\n\t"
-        "movq	%%r13, 8(%[r])\n\t"
-        "movq	%%r14, 16(%[r])\n\t"
-        "movq	%%r15, 24(%[r])\n\t"
-        : [m] "+r" (m), [a] "+r" (a), [b] "+r" (b)
-        : [r] "r" (r)
-        : "memory", "rax", "rdx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-SP_NOINLINE static void sp_256_mont_sqr_avx2_4(sp_digit* r, sp_digit* a,
-        sp_digit* m, sp_digit mp)
-{
-    __asm__ __volatile__ (
-        "# A[0] * A[1]\n\t"
-        "movq   0(%[a]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%r9, %%r10\n\t"
-        "# A[0] * A[3]\n\t"
-        "mulxq  24(%[a]), %%r11, %%r12\n\t"
-        "# A[2] * A[1]\n\t"
-        "movq   16(%[a]), %%rdx\n\t"
-        "mulxq  8(%[a]), %[mp], %[m]\n\t"
-        "xorq   %%r15, %%r15\n\t"
-        "adoxq  %[mp], %%r11\n\t"
-        "# A[2] * A[3]\n\t"
-        "mulxq  24(%[a]), %%r13, %%r14\n\t"
-        "adoxq  %[m], %%r12\n\t"
-        "# A[2] * A[0]\n\t"
-        "mulxq  0(%[a]), %[mp], %[m]\n\t"
-        "adoxq  %%r15, %%r13\n\t"
-        "adcxq  %[mp], %%r10\n\t"
-        "adoxq  %%r15, %%r14\n\t"
-        "# A[1] * A[3]\n\t"
-        "movq   8(%[a]), %%rdx\n\t"
-        "mulxq  24(%[a]), %%rax, %%r8\n\t"
-        "adcxq  %[m], %%r11\n\t"
-        "adcxq  %%rax, %%r12\n\t"
-        "adcxq  %%r8, %%r13\n\t"
-        "adcxq  %%r15, %%r14\n\t"
-        "# Double with Carry Flag\n\t"
-        "xorq   %%r15, %%r15\n\t"
-        "# A[0] * A[0]\n\t"
-        "movq   0(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%r8, %%rax\n\t"
-        "adcxq  %%r9, %%r9\n\t"
-        "# A[1] * A[1]\n\t"
-        "movq   8(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %[mp], %[m]\n\t"
-        "adcxq  %%r10, %%r10\n\t"
-        "adoxq  %%rax, %%r9\n\t"
-        "adcxq  %%r11, %%r11\n\t"
-        "adoxq  %[mp], %%r10\n\t"
-        "# A[2] * A[2]\n\t"
-        "movq   16(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%rax, %[mp]\n\t"
-        "adcxq  %%r12, %%r12\n\t"
-        "adoxq  %[m], %%r11\n\t"
-        "adcxq  %%r13, %%r13\n\t"
-        "adoxq  %%rax, %%r12\n\t"
-        "# A[3] * A[3]\n\t"
-        "movq   24(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%rax, %[m]\n\t"
-        "adcxq  %%r14, %%r14\n\t"
-        "adoxq  %[mp], %%r13\n\t"
-        "adcxq  %%r15, %%r15\n\t"
-        "adoxq  %%rax, %%r14\n\t"
-        "adoxq  %[m], %%r15\n\t"
-        "# Start Reduction\n\t"
-        "movq	%%r8, %%rax\n\t"
-        "movq	%%r9, %[a]\n\t"
-        "movq	%%r10, %[mp]\n\t"
-        "movq	%%r11, %%rdx\n\t"
-        "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
-        "#    - a[0] << 32 << 192\n\t"
-        "#   + (a[0] * 2) << 192\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "addq	%%r8, %%rdx\n\t"
-        "#   a[0]-a[2] << 32\n\t"
-        "shlq	$32, %%r8\n\t"
-        "shldq	$32, %[a], %%r10\n\t"
-        "shldq	$32, %%rax, %%r9\n\t"
-        "#   - a[0] << 32 << 192\n\t"
-        "subq	%%r8, %%rdx\n\t"
-        "#   + a[0]-a[2] << 32 << 64\n\t"
-        "addq	%%r8, %[a]\n\t"
-        "adcq	%%r9, %[mp]\n\t"
-        "adcq	%%r10, %%rdx\n\t"
-        "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
-        "#   a += mu << 256\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r12\n\t"
-        "adcq	%[a], %%r13\n\t"
-        "adcq	%[mp], %%r14\n\t"
-        "adcq	%%rdx, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a += mu << 192\n\t"
-        "addq	%%rax, %%r11\n\t"
-        "adcq	%[a], %%r12\n\t"
-        "adcq	%[mp], %%r13\n\t"
-        "adcq	%%rdx, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "# mu <<= 32\n\t"
-        "movq	%%rdx, %[m]\n\t"
-        "shldq	$32, %[mp], %%rdx\n\t"
-        "shldq	$32, %[a], %[mp]\n\t"
-        "shldq	$32, %%rax, %[a]\n\t"
-        "shlq	$32, %%rax\n\t"
-        "shrq	$32, %[m]\n\t"
-        "#   a += (mu << 32) << 64\n\t"
-        "addq	%[mp], %%r11\n\t"
-        "adcq	%%rdx, %%r12\n\t"
-        "adcq	%[m], %%r13\n\t"
-        "adcq	$0, %%r14\n\t"
-        "adcq	$0, %%r15\n\t"
-        "sbbq	$0, %%r8\n\t"
-        "#   a -= (mu << 32) << 192\n\t"
-        "subq	%%rax, %%r11\n\t"
-        "movq	$0xffffffff, %%rax\n\t"
-        "sbbq	%[a], %%r12\n\t"
-        "movq	$0xffffffff00000001, %[a]\n\t"
-        "sbbq	%[mp], %%r13\n\t"
-        "sbbq	%%rdx, %%r14\n\t"
-        "sbbq	%[m], %%r15\n\t"
-        "adcq	$0, %%r8\n\t"
-        "# mask m and sub from result if overflow\n\t"
-        "#  m[0] = -1 & mask = mask\n\t"
-        "andq	%%r8, %%rax\n\t"
-        "#  m[2] =  0 & mask = 0\n\t"
-        "andq	%%r8, %[a]\n\t"
-        "subq	%%r8, %%r12\n\t"
-        "sbbq	%%rax, %%r13\n\t"
-        "sbbq	$0, %%r14\n\t"
-        "sbbq	%[a], %%r15\n\t"
-        "movq	%%r12, 0(%[r])\n\t"
-        "movq	%%r13, 8(%[r])\n\t"
-        "movq	%%r14, 16(%[r])\n\t"
-        "movq	%%r15, 24(%[r])\n\t"
-        : [m] "+r" (m), [a] "+r" (a), [mp] "+r" (mp)
-        : [r] "r" (r)
-        : "memory", "rax", "rdx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- * n   Number of times to square.
- * m   Modulus (prime).
- * mp  Montogmery mulitplier.
- */
-static void sp_256_mont_sqr_n_avx2_4(sp_digit* r, sp_digit* a, int n,
-        sp_digit* m, sp_digit mp)
-{
-    sp_256_mont_sqr_avx2_4(r, a, m, mp);
-    for (; n > 1; n--)
-        sp_256_mont_sqr_avx2_4(r, r, m, mp);
-}
-
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
- * P256 curve. (r = 1 / a mod m)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_avx2_4(sp_digit* r, sp_digit* a, sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 4);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_avx2_4(t, t, p256_mod, p256_mp_mod);
-        if (p256_mod_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_avx2_4(t, t, a, p256_mod, p256_mp_mod);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 4;
-    sp_digit* t3 = td + 4 * 4;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_avx2_4(t, a, p256_mod, p256_mp_mod);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_avx2_4(t, t, a, p256_mod, p256_mp_mod);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_avx2_4(t2, t, 2, p256_mod, p256_mp_mod);
-    /* t3= a^d = t2 * a */
-    sp_256_mont_mul_avx2_4(t3, t2, a, p256_mod, p256_mp_mod);
-    /* t = a^f = t2 * t */
-    sp_256_mont_mul_avx2_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^f0 = t ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_avx2_4(t2, t, 4, p256_mod, p256_mp_mod);
-    /* t3= a^fd = t2 * t3 */
-    sp_256_mont_mul_avx2_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ff = t2 * t */
-    sp_256_mont_mul_avx2_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_avx2_4(t2, t, 8, p256_mod, p256_mp_mod);
-    /* t3= a^fffd = t2 * t3 */
-    sp_256_mont_mul_avx2_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_avx2_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_avx2_4(t2, t, 16, p256_mod, p256_mp_mod);
-    /* t3= a^fffffffd = t2 * t3 */
-    sp_256_mont_mul_avx2_4(t3, t2, t3, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_avx2_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_avx2_4(t2, t, 32, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_avx2_4(t, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001 = t2 * a */
-    sp_256_mont_mul_avx2_4(t2, t2, a, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff000000010000000000000000000000000000000000000000
-     *   = t2 ^ 2 ^ 160 */
-    sp_256_mont_sqr_n_avx2_4(t2, t2, 160, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
-     *   = t2 * t */
-    sp_256_mont_mul_avx2_4(t2, t2, t, p256_mod, p256_mp_mod);
-    /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
-     *   = t2 ^ 2 ^ 32 */
-    sp_256_mont_sqr_n_avx2_4(t2, t2, 32, p256_mod, p256_mp_mod);
-    /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
-     *   = t2 * t3 */
-    sp_256_mont_mul_avx2_4(r, t2, t3, p256_mod, p256_mp_mod);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-/* Map the Montgomery form projective co-ordinate point to an affine point.
- *
- * r  Resulting affine co-ordinate point.
- * p  Montgomery form projective co-ordinate point.
- * t  Temporary ordinate data.
- */
-static void sp_256_map_avx2_4(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    int64_t n;
-
-    sp_256_mont_inv_avx2_4(t1, p->z, t + 2*4);
-
-    sp_256_mont_sqr_avx2_4(t2, t1, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t1, t2, t1, p256_mod, p256_mp_mod);
-
-    /* x /= z^2 */
-    sp_256_mont_mul_avx2_4(r->x, p->x, t2, p256_mod, p256_mp_mod);
-    XMEMSET(r->x + 4, 0, sizeof(r->x) / 2);
-    sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod);
-    /* Reduce x to less than modulus */
-    n = sp_256_cmp_4(r->x, p256_mod);
-    sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - (n >= 0));
-    sp_256_norm_4(r->x);
-
-    /* y /= z^3 */
-    sp_256_mont_mul_avx2_4(r->y, p->y, t1, p256_mod, p256_mp_mod);
-    XMEMSET(r->y + 4, 0, sizeof(r->y) / 2);
-    sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod);
-    /* Reduce y to less than modulus */
-    n = sp_256_cmp_4(r->y, p256_mod);
-    sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - (n >= 0));
-    sp_256_norm_4(r->y);
-
-    XMEMSET(r->z, 0, sizeof(r->z));
-    r->z[0] = 1;
-
-}
-
-/* Double the Montgomery form projective point p.
- *
- * r  Result of doubling point.
- * p  Point to double.
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_dbl_avx2_4(sp_point* r, sp_point* p, sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    /* When infinity don't double point passed in - constant time. */
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    /* Put point to double into result - good for infinty. */
-    if (r != p) {
-        for (i=0; i<4; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
-
-    /* T1 = Z * Z */
-    sp_256_mont_sqr_avx2_4(t1, z, p256_mod, p256_mp_mod);
-    /* Z = Y * Z */
-    sp_256_mont_mul_avx2_4(z, y, z, p256_mod, p256_mp_mod);
-    /* Z = 2Z */
-    sp_256_mont_dbl_4(z, z, p256_mod);
-    /* T2 = X - T1 */
-    sp_256_mont_sub_4(t2, x, t1, p256_mod);
-    /* T1 = X + T1 */
-    sp_256_mont_add_4(t1, x, t1, p256_mod);
-    /* T2 = T1 * T2 */
-    sp_256_mont_mul_avx2_4(t2, t1, t2, p256_mod, p256_mp_mod);
-    /* T1 = 3T2 */
-    sp_256_mont_tpl_4(t1, t2, p256_mod);
-    /* Y = 2Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
-    /* Y = Y * Y */
-    sp_256_mont_sqr_avx2_4(y, y, p256_mod, p256_mp_mod);
-    /* T2 = Y * Y */
-    sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
-    /* T2 = T2/2 */
-    sp_256_div2_4(t2, t2, p256_mod);
-    /* Y = Y * X */
-    sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod);
-    /* X = T1 * T1 */
-    sp_256_mont_mul_avx2_4(x, t1, t1, p256_mod, p256_mp_mod);
-    /* X = X - Y */
-    sp_256_mont_sub_4(x, x, y, p256_mod);
-    /* X = X - Y */
-    sp_256_mont_sub_4(x, x, y, p256_mod);
-    /* Y = Y - X */
-    sp_256_mont_sub_4(y, y, x, p256_mod);
-    /* Y = Y * T1 */
-    sp_256_mont_mul_avx2_4(y, y, t1, p256_mod, p256_mp_mod);
-    /* Y = Y - T2 */
-    sp_256_mont_sub_4(y, y, t2, p256_mod);
-
-}
-
-/* Double the Montgomery form projective point p a number of times.
- *
- * r  Result of repeated doubling of point.
- * p  Point to double.
- * n  Number of times to double
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_dbl_n_avx2_4(sp_point* r, sp_point* p, int n,
-        sp_digit* t)
-{
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* w = t;
-    sp_digit* a = t + 2*4;
-    sp_digit* b = t + 4*4;
-    sp_digit* t1 = t + 6*4;
-    sp_digit* t2 = t + 8*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    rp[0] = r;
-    rp[1] = &tp;
-    x = rp[p->infinity]->x;
-    y = rp[p->infinity]->y;
-    z = rp[p->infinity]->z;
-    if (r != p) {
-        for (i=0; i<4; i++)
-            r->x[i] = p->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = p->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = p->z[i];
-        r->infinity = p->infinity;
-    }
-
-    /* Y = 2*Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
-    /* W = Z^4 */
-    sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod);
-    while (n--) {
-        /* A = 3*(X^2 - W) */
-        sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(t1, t1, w, p256_mod);
-        sp_256_mont_tpl_4(a, t1, p256_mod);
-        /* B = X*Y^2 */
-        sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(b, t2, x, p256_mod, p256_mp_mod);
-        /* X = A^2 - 2B */
-        sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(t1, b, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Z = Z*Y */
-        sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod);
-        /* t2 = Y^4 */
-        sp_256_mont_sqr_avx2_4(t2, t2, p256_mod, p256_mp_mod);
-        if (n) {
-            /* W = W*Y^4 */
-            sp_256_mont_mul_avx2_4(w, w, t2, p256_mod, p256_mp_mod);
-        }
-        /* y = 2*A*(B - X) - Y^4 */
-        sp_256_mont_sub_4(y, b, x, p256_mod);
-        sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(y, y, p256_mod);
-        sp_256_mont_sub_4(y, y, t2, p256_mod);
-    }
-    /* Y = Y/2 */
-    sp_256_div2_4(y, y, p256_mod);
-}
-
-/* Add two Montgomery form projective points.
- *
- * r  Result of addition.
- * p  Frist point to add.
- * q  Second point to add.
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_add_avx2_4(sp_point* r, sp_point* p, sp_point* q,
-        sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* t3 = t + 4*4;
-    sp_digit* t4 = t + 6*4;
-    sp_digit* t5 = t + 8*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    /* Ensure only the first point is the same as the result. */
-    if (q == r) {
-        sp_point* a = p;
-        p = q;
-        q = a;
-    }
-
-    /* Check double */
-    sp_256_sub_4(t1, p256_mod, q->y);
-    sp_256_norm_4(t1);
-    if (sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
-        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) {
-        sp_256_proj_point_dbl_4(r, p, t);
-    }
-    else {
-        rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
-        x = rp[p->infinity | q->infinity]->x;
-        y = rp[p->infinity | q->infinity]->y;
-        z = rp[p->infinity | q->infinity]->z;
-
-        ap[0] = p;
-        ap[1] = q;
-        for (i=0; i<4; i++)
-            r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = ap[p->infinity]->z[i];
-        r->infinity = ap[p->infinity]->infinity;
-
-        /* U1 = X1*Z2^2 */
-        sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t1, t1, x, p256_mod, p256_mp_mod);
-        /* U2 = X2*Z1^2 */
-        sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod);
-        /* S1 = Y1*Z2^3 */
-        sp_256_mont_mul_avx2_4(t3, t3, y, p256_mod, p256_mp_mod);
-        /* S2 = Y2*Z1^3 */
-        sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod);
-        /* H = U2 - U1 */
-        sp_256_mont_sub_4(t2, t2, t1, p256_mod);
-        /* R = S2 - S1 */
-        sp_256_mont_sub_4(t4, t4, t3, p256_mod);
-        /* Z3 = H*Z1*Z2 */
-        sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod);
-        /* X3 = R^2 - H^3 - 2*U1*H^2 */
-        sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(x, x, t5, p256_mod);
-        sp_256_mont_dbl_4(t1, y, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-        sp_256_mont_sub_4(y, y, x, p256_mod);
-        sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(y, y, t5, p256_mod);
-    }
-}
-
-/* Double the Montgomery form projective point p a number of times.
- *
- * r  Result of repeated doubling of point.
- * p  Point to double.
- * n  Number of times to double
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_dbl_n_store_avx2_4(sp_point* r, sp_point* p,
-        int n, int m, sp_digit* t)
-{
-    sp_digit* w = t;
-    sp_digit* a = t + 2*4;
-    sp_digit* b = t + 4*4;
-    sp_digit* t1 = t + 6*4;
-    sp_digit* t2 = t + 8*4;
-    sp_digit* x = r[2*m].x;
-    sp_digit* y = r[(1<<n)*m].y;
-    sp_digit* z = r[2*m].z;
-    int i;
-
-    for (i=0; i<4; i++)
-        x[i] = p->x[i];
-    for (i=0; i<4; i++)
-        y[i] = p->y[i];
-    for (i=0; i<4; i++)
-        z[i] = p->z[i];
-
-    /* Y = 2*Y */
-    sp_256_mont_dbl_4(y, y, p256_mod);
-    /* W = Z^4 */
-    sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod);
-    for (i=1; i<=n; i++) {
-        /* A = 3*(X^2 - W) */
-        sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(t1, t1, w, p256_mod);
-        sp_256_mont_tpl_4(a, t1, p256_mod);
-        /* B = X*Y^2 */
-        sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(b, t2, x, p256_mod, p256_mp_mod);
-        x = r[(1<<i)*m].x;
-        /* X = A^2 - 2B */
-        sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(t1, b, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Z = Z*Y */
-        sp_256_mont_mul_avx2_4(r[(1<<i)*m].z, z, y, p256_mod, p256_mp_mod);
-        z = r[(1<<i)*m].z;
-        /* t2 = Y^4 */
-        sp_256_mont_sqr_avx2_4(t2, t2, p256_mod, p256_mp_mod);
-        if (i != n) {
-            /* W = W*Y^4 */
-            sp_256_mont_mul_avx2_4(w, w, t2, p256_mod, p256_mp_mod);
-        }
-        /* y = 2*A*(B - X) - Y^4 */
-        sp_256_mont_sub_4(y, b, x, p256_mod);
-        sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod);
-        sp_256_mont_dbl_4(y, y, p256_mod);
-        sp_256_mont_sub_4(y, y, t2, p256_mod);
-
-        /* Y = Y/2 */
-        sp_256_div2_4(r[(1<<i)*m].y, y, p256_mod);
-        r[(1<<i)*m].infinity = 0;
-    }
-}
-
-/* Add two Montgomery form projective points.
- *
- * ra  Result of addition.
- * rs  Result of subtraction.
- * p   Frist point to add.
- * q   Second point to add.
- * t   Temporary ordinate data.
- */
-static void sp_256_proj_point_add_sub_avx2_4(sp_point* ra, sp_point* rs,
-        sp_point* p, sp_point* q, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* t3 = t + 4*4;
-    sp_digit* t4 = t + 6*4;
-    sp_digit* t5 = t + 8*4;
-    sp_digit* t6 = t + 10*4;
-    sp_digit* x = ra->x;
-    sp_digit* y = ra->y;
-    sp_digit* z = ra->z;
-    sp_digit* xs = rs->x;
-    sp_digit* ys = rs->y;
-    sp_digit* zs = rs->z;
-
-
-    XMEMCPY(x, p->x, sizeof(p->x) / 2);
-    XMEMCPY(y, p->y, sizeof(p->y) / 2);
-    XMEMCPY(z, p->z, sizeof(p->z) / 2);
-    ra->infinity = 0;
-    rs->infinity = 0;
-
-    /* U1 = X1*Z2^2 */
-    sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t1, t1, x, p256_mod, p256_mp_mod);
-    /* U2 = X2*Z1^2 */
-    sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod);
-    /* S1 = Y1*Z2^3 */
-    sp_256_mont_mul_avx2_4(t3, t3, y, p256_mod, p256_mp_mod);
-    /* S2 = Y2*Z1^3 */
-    sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod);
-    /* H = U2 - U1 */
-    sp_256_mont_sub_4(t2, t2, t1, p256_mod);
-    /* RS = S2 + S1 */
-    sp_256_mont_add_4(t6, t4, t3, p256_mod);
-    /* R = S2 - S1 */
-    sp_256_mont_sub_4(t4, t4, t3, p256_mod);
-    /* Z3 = H*Z1*Z2 */
-    /* ZS = H*Z1*Z2 */
-    sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod);
-    XMEMCPY(zs, z, sizeof(p->z)/2);
-    /* X3 = R^2 - H^3 - 2*U1*H^2 */
-    /* XS = RS^2 - H^3 - 2*U1*H^2 */
-    sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_avx2_4(xs, t6, p256_mod, p256_mp_mod);
-    sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_sub_4(x, x, t5, p256_mod);
-    sp_256_mont_sub_4(xs, xs, t5, p256_mod);
-    sp_256_mont_dbl_4(t1, y, p256_mod);
-    sp_256_mont_sub_4(x, x, t1, p256_mod);
-    sp_256_mont_sub_4(xs, xs, t1, p256_mod);
-    /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-    /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
-    sp_256_mont_sub_4(ys, y, xs, p256_mod);
-    sp_256_mont_sub_4(y, y, x, p256_mod);
-    sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod);
-    sp_256_sub_4(t6, p256_mod, t6);
-    sp_256_mont_mul_avx2_4(ys, ys, t6, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod);
-    sp_256_mont_sub_4(y, y, t5, p256_mod);
-    sp_256_mont_sub_4(ys, ys, t5, p256_mod);
-}
-
-/* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * g     Point to multiply.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_win_add_sub_avx2_4(sp_point* r, sp_point* g,
-        sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[33];
-    sp_point rtd, pd;
-    sp_digit tmpd[2 * 4 * 6];
-#endif
-    sp_point* t;
-    sp_point* rt;
-    sp_point* p = NULL;
-    sp_digit* tmp;
-    sp_digit* negy;
-    int i;
-    ecc_recode v[43];
-    int err;
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 33, heap, DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
-    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap,
-                             DYNAMIC_TYPE_ECC);
-    if (tmp == NULL)
-        err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
-#endif
-
-
-    if (err == MP_OKAY) {
-        /* t[0] = {0, 0, 1} * norm */
-        XMEMSET(&t[0], 0, sizeof(t[0]));
-        t[0].infinity = 1;
-        /* t[1] = {g->x, g->y, g->z} * norm */
-        err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod);
-    }
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod);
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod);
-
-    if (err == MP_OKAY) {
-        t[1].infinity = 0;
-        /* t[2] ... t[32]  */
-    sp_256_proj_point_dbl_n_store_avx2_4(t, &t[ 1], 5, 1, tmp);
-    sp_256_proj_point_add_avx2_4(&t[ 3], &t[ 2], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[ 6], &t[ 3], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[10], &t[ 5], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[12], &t[ 6], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[14], &t[ 7], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[18], &t[ 9], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[20], &t[10], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[22], &t[11], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[24], &t[12], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[26], &t[13], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[28], &t[14], tmp);
-    sp_256_proj_point_dbl_avx2_4(&t[30], &t[15], tmp);
-    sp_256_proj_point_add_sub_avx2_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
-
-        negy = t[0].y;
-
-        sp_256_ecc_recode_6_4(k, v);
-
-        i = 42;
-        XMEMCPY(rt, &t[v[i].i], sizeof(sp_point));
-        for (--i; i>=0; i--) {
-            sp_256_proj_point_dbl_n_avx2_4(rt, rt, 6, tmp);
-
-            XMEMCPY(p, &t[v[i].i], sizeof(sp_point));
-            sp_256_sub_4(negy, p256_mod, p->y);
-            sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
-            sp_256_proj_point_add_avx2_4(rt, rt, p, tmp);
-        }
-
-        if (map)
-            sp_256_map_avx2_4(r, rt, tmp);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-    if (tmp != NULL)
-        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
-
-    return err;
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-/* A table entry for pre-computed points. */
-typedef struct sp_table_entry {
-    sp_digit x[4];
-    sp_digit y[4];
-    byte infinity;
-} sp_table_entry;
-
-#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
-#endif /* FP_ECC || WOLFSSL_SP_SMALL */
-/* Add two Montgomery form projective points. The second point has a q value of
- * one.
- * Only the first point can be the same pointer as the result point.
- *
- * r  Result of addition.
- * p  Frist point to add.
- * q  Second point to add.
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_add_qz1_4(sp_point* r, sp_point* p,
-        sp_point* q, sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* t3 = t + 4*4;
-    sp_digit* t4 = t + 6*4;
-    sp_digit* t5 = t + 8*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    /* Check double */
-    sp_256_sub_4(t1, p256_mod, q->y);
-    sp_256_norm_4(t1);
-    if (sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
-        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) {
-        sp_256_proj_point_dbl_4(r, p, t);
-    }
-    else {
-        rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
-        x = rp[p->infinity | q->infinity]->x;
-        y = rp[p->infinity | q->infinity]->y;
-        z = rp[p->infinity | q->infinity]->z;
-
-        ap[0] = p;
-        ap[1] = q;
-        for (i=0; i<4; i++)
-            r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = ap[p->infinity]->z[i];
-        r->infinity = ap[p->infinity]->infinity;
-
-        /* U2 = X2*Z1^2 */
-        sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
-        /* S2 = Y2*Z1^3 */
-        sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
-        /* H = U2 - X1 */
-        sp_256_mont_sub_4(t2, t2, x, p256_mod);
-        /* R = S2 - Y1 */
-        sp_256_mont_sub_4(t4, t4, y, p256_mod);
-        /* Z3 = H*Z1 */
-        sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
-        /* X3 = R^2 - H^3 - 2*X1*H^2 */
-        sp_256_mont_sqr_4(t1, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t3, x, t5, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(x, t1, t5, p256_mod);
-        sp_256_mont_dbl_4(t1, t3, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
-        sp_256_mont_sub_4(t3, t3, x, p256_mod);
-        sp_256_mont_mul_4(t3, t3, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(t5, t5, y, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(y, t3, t5, p256_mod);
-    }
-}
-
-#ifdef FP_ECC
-/* Convert the projective point to affine.
- * Ordinates are in Montgomery form.
- *
- * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_4(sp_point* a, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2 * 4;
-    sp_digit* tmp = t + 4 * 4;
-
-    sp_256_mont_inv_4(t1, a->z, tmp);
-
-    sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod);
-
-    sp_256_mont_mul_4(a->x, a->x, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_4(a->y, a->y, t1, p256_mod, p256_mp_mod);
-    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
-}
-
-/* Generate the pre-computed table of points for the base point.
- *
- * a      The base point.
- * table  Place to store generated point data.
- * tmp    Temprorary data.
- * heap  Heap to use for allocation.
- */
-static int sp_256_gen_stripe_table_4(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
-    int i, j;
-    int err;
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod);
-    if (err == MP_OKAY) {
-        t->infinity = 0;
-        sp_256_proj_to_affine_4(t, tmp);
-
-        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
-        s1->infinity = 0;
-        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
-        s2->infinity = 0;
-
-        /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
-        /* table[1] = Affine version of 'a' in Montgomery form */
-        XMEMCPY(table[1].x, t->x, sizeof(table->x));
-        XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
-
-        for (i=1; i<8; i++) {
-            sp_256_proj_point_dbl_n_4(t, t, 32, tmp);
-            sp_256_proj_to_affine_4(t, tmp);
-            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
-            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
-        }
-
-        for (i=1; i<8; i++) {
-            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
-            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
-            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
-                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
-                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
-                sp_256_proj_point_add_qz1_4(t, s1, s2, tmp);
-                sp_256_proj_to_affine_4(t, tmp);
-                XMEMCPY(table[j].x, t->x, sizeof(table->x));
-                XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
-
-    return err;
-}
-
-#endif /* FP_ECC */
-#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
-/* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_stripe_4(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
-    sp_digit td[2 * 4 * 5];
-#endif
-    sp_point* rt;
-    sp_point* p = NULL;
-    sp_digit* t;
-    int i, j;
-    int y, x;
-    int err;
-
-    (void)g;
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
-                           DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
-#else
-    t = td;
-#endif
-
-    if (err == MP_OKAY) {
-        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
-        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
-
-        y = 0;
-        for (j=0,x=31; j<8; j++,x+=32)
-            y |= ((k[x / 64] >> (x % 64)) & 1) << j;
-        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
-        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
-        for (i=30; i>=0; i--) {
-            y = 0;
-            for (j=0,x=i; j<8; j++,x+=32)
-                y |= ((k[x / 64] >> (x % 64)) & 1) << j;
-
-            sp_256_proj_point_dbl_4(rt, rt, t);
-            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
-            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
-            sp_256_proj_point_add_qz1_4(rt, rt, p, t);
-        }
-
-        if (map)
-            sp_256_map_4(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
-
-    return err;
-}
-
-#endif /* FP_ECC || WOLFSSL_SP_SMALL */
-#ifdef FP_ECC
-#ifndef FP_ENTRIES
-    #define FP_ENTRIES 16
-#endif
-
-typedef struct sp_cache_t {
-    sp_digit x[4];
-    sp_digit y[4];
-    sp_table_entry table[256];
-    uint32_t cnt;
-    int set;
-} sp_cache_t;
-
-static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
-static THREAD_LS_T int sp_cache_last = -1;
-static THREAD_LS_T int sp_cache_inited = 0;
-
-#ifndef HAVE_THREAD_LS
-    static volatile int initCacheMutex = 0;
-    static wolfSSL_Mutex sp_cache_lock;
-#endif
-
-static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
-{
-    int i, j;
-    uint32_t least;
-
-    if (sp_cache_inited == 0) {
-        for (i=0; i<FP_ENTRIES; i++) {
-            sp_cache[i].set = 0;
-        }
-        sp_cache_inited = 1;
-    }
-
-    /* Compare point with those in cache. */
-    for (i=0; i<FP_ENTRIES; i++) {
-        if (!sp_cache[i].set)
-            continue;
-
-        if (sp_256_cmp_equal_4(g->x, sp_cache[i].x) & 
-                           sp_256_cmp_equal_4(g->y, sp_cache[i].y)) {
-            sp_cache[i].cnt++;
-            break;
-        }
-    }
-
-    /* No match. */
-    if (i == FP_ENTRIES) {
-        /* Find empty entry. */
-        i = (sp_cache_last + 1) % FP_ENTRIES;
-        for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
-            if (!sp_cache[i].set) {
-                break;
-            }
-        }
-
-        /* Evict least used. */
-        if (i == sp_cache_last) {
-            least = sp_cache[0].cnt;
-            for (j=1; j<FP_ENTRIES; j++) {
-                if (sp_cache[j].cnt < least) {
-                    i = j;
-                    least = sp_cache[i].cnt;
-                }
-            }
-        }
-
-        XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
-        XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
-        sp_cache[i].set = 1;
-        sp_cache[i].cnt = 1;
-    }
-
-    *cache = &sp_cache[i];
-    sp_cache_last = i;
-}
-#endif /* FP_ECC */
-
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * g     Point to multiply.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_4(sp_point* r, sp_point* g, sp_digit* k,
-        int map, void* heap)
-{
-#ifndef FP_ECC
-    return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
-#else
-    sp_digit tmp[2 * 4 * 5];
-    sp_cache_t* cache;
-    int err = MP_OKAY;
-
-#ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
-       err = BAD_MUTEX_E;
-#endif /* HAVE_THREAD_LS */
-
-    if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
-        if (cache->cnt == 2)
-            sp_256_gen_stripe_table_4(g, cache->table, tmp, heap);
-
-#ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
-#endif /* HAVE_THREAD_LS */
-
-        if (cache->cnt < 2) {
-            err = sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
-        }
-        else {
-            err = sp_256_ecc_mulmod_stripe_4(r, g, cache->table, k,
-                    map, heap);
-        }
-    }
-
-    return err;
-#endif
-}
-
-#ifdef HAVE_INTEL_AVX2
-#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
-#endif /* FP_ECC || WOLFSSL_SP_SMALL */
-/* Add two Montgomery form projective points. The second point has a q value of
- * one.
- * Only the first point can be the same pointer as the result point.
- *
- * r  Result of addition.
- * p  Frist point to add.
- * q  Second point to add.
- * t  Temporary ordinate data.
- */
-static void sp_256_proj_point_add_qz1_avx2_4(sp_point* r, sp_point* p,
-        sp_point* q, sp_digit* t)
-{
-    sp_point *ap[2];
-    sp_point *rp[2];
-    sp_point tp;
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2*4;
-    sp_digit* t3 = t + 4*4;
-    sp_digit* t4 = t + 6*4;
-    sp_digit* t5 = t + 8*4;
-    sp_digit* x;
-    sp_digit* y;
-    sp_digit* z;
-    int i;
-
-    /* Check double */
-    sp_256_sub_4(t1, p256_mod, q->y);
-    sp_256_norm_4(t1);
-    if (sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
-        (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) {
-        sp_256_proj_point_dbl_4(r, p, t);
-    }
-    else {
-        rp[0] = r;
-        rp[1] = &tp;
-        XMEMSET(&tp, 0, sizeof(tp));
-        x = rp[p->infinity | q->infinity]->x;
-        y = rp[p->infinity | q->infinity]->y;
-        z = rp[p->infinity | q->infinity]->z;
-
-        ap[0] = p;
-        ap[1] = q;
-        for (i=0; i<4; i++)
-            r->x[i] = ap[p->infinity]->x[i];
-        for (i=0; i<4; i++)
-            r->y[i] = ap[p->infinity]->y[i];
-        for (i=0; i<4; i++)
-            r->z[i] = ap[p->infinity]->z[i];
-        r->infinity = ap[p->infinity]->infinity;
-
-        /* U2 = X2*Z1^2 */
-        sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod);
-        /* S2 = Y2*Z1^3 */
-        sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod);
-        /* H = U2 - X1 */
-        sp_256_mont_sub_4(t2, t2, x, p256_mod);
-        /* R = S2 - Y1 */
-        sp_256_mont_sub_4(t4, t4, y, p256_mod);
-        /* Z3 = H*Z1 */
-        sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod);
-        /* X3 = R^2 - H^3 - 2*X1*H^2 */
-        sp_256_mont_sqr_avx2_4(t1, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t3, x, t5, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(x, t1, t5, p256_mod);
-        sp_256_mont_dbl_4(t1, t3, p256_mod);
-        sp_256_mont_sub_4(x, x, t1, p256_mod);
-        /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
-        sp_256_mont_sub_4(t3, t3, x, p256_mod);
-        sp_256_mont_mul_avx2_4(t3, t3, t4, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_avx2_4(t5, t5, y, p256_mod, p256_mp_mod);
-        sp_256_mont_sub_4(y, t3, t5, p256_mod);
-    }
-}
-
-#ifdef FP_ECC
-/* Convert the projective point to affine.
- * Ordinates are in Montgomery form.
- *
- * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_avx2_4(sp_point* a, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2 * 4;
-    sp_digit* tmp = t + 4 * 4;
-
-    sp_256_mont_inv_avx2_4(t1, a->z, tmp);
-
-    sp_256_mont_sqr_avx2_4(t2, t1, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(t1, t2, t1, p256_mod, p256_mp_mod);
-
-    sp_256_mont_mul_avx2_4(a->x, a->x, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_avx2_4(a->y, a->y, t1, p256_mod, p256_mp_mod);
-    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
-}
-
-/* Generate the pre-computed table of points for the base point.
- *
- * a      The base point.
- * table  Place to store generated point data.
- * tmp    Temprorary data.
- * heap  Heap to use for allocation.
- */
-static int sp_256_gen_stripe_table_avx2_4(sp_point* a,
-        sp_table_entry* table, sp_digit* tmp, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td, s1d, s2d;
-#endif
-    sp_point* t;
-    sp_point* s1 = NULL;
-    sp_point* s2 = NULL;
-    int i, j;
-    int err;
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, td, t);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s1d, s1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, s2d, s2);
-
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod);
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod);
-    if (err == MP_OKAY)
-        err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod);
-    if (err == MP_OKAY) {
-        t->infinity = 0;
-        sp_256_proj_to_affine_avx2_4(t, tmp);
-
-        XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
-        s1->infinity = 0;
-        XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
-        s2->infinity = 0;
-
-        /* table[0] = {0, 0, infinity} */
-        XMEMSET(&table[0], 0, sizeof(sp_table_entry));
-        table[0].infinity = 1;
-        /* table[1] = Affine version of 'a' in Montgomery form */
-        XMEMCPY(table[1].x, t->x, sizeof(table->x));
-        XMEMCPY(table[1].y, t->y, sizeof(table->y));
-        table[1].infinity = 0;
-
-        for (i=1; i<8; i++) {
-            sp_256_proj_point_dbl_n_avx2_4(t, t, 32, tmp);
-            sp_256_proj_to_affine_avx2_4(t, tmp);
-            XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
-            XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
-            table[1<<i].infinity = 0;
-        }
-
-        for (i=1; i<8; i++) {
-            XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
-            XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
-            for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
-                XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
-                XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
-                sp_256_proj_point_add_qz1_avx2_4(t, s1, s2, tmp);
-                sp_256_proj_to_affine_avx2_4(t, tmp);
-                XMEMCPY(table[j].x, t->x, sizeof(table->x));
-                XMEMCPY(table[j].y, t->y, sizeof(table->y));
-                table[j].infinity = 0;
-            }
-        }
-    }
-
-    sp_ecc_point_free(s2, 0, heap);
-    sp_ecc_point_free(s1, 0, heap);
-    sp_ecc_point_free( t, 0, heap);
-
-    return err;
-}
-
-#endif /* FP_ECC */
-#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
-/* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_stripe_avx2_4(sp_point* r, sp_point* g,
-        sp_table_entry* table, sp_digit* k, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point rtd;
-    sp_point pd;
-    sp_digit td[2 * 4 * 5];
-#endif
-    sp_point* rt;
-    sp_point* p = NULL;
-    sp_digit* t;
-    int i, j;
-    int y, x;
-    int err;
-
-    (void)g;
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, rtd, rt);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
-                           DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
-#else
-    t = td;
-#endif
-
-    if (err == MP_OKAY) {
-        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
-        XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
-
-        y = 0;
-        for (j=0,x=31; j<8; j++,x+=32)
-            y |= ((k[x / 64] >> (x % 64)) & 1) << j;
-        XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
-        XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
-        rt->infinity = table[y].infinity;
-        for (i=30; i>=0; i--) {
-            y = 0;
-            for (j=0,x=i; j<8; j++,x+=32)
-                y |= ((k[x / 64] >> (x % 64)) & 1) << j;
-
-            sp_256_proj_point_dbl_avx2_4(rt, rt, t);
-            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
-            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
-            p->infinity = table[y].infinity;
-            sp_256_proj_point_add_qz1_avx2_4(rt, rt, p, t);
-        }
-
-        if (map)
-            sp_256_map_avx2_4(r, rt, t);
-        else
-            XMEMCPY(r, rt, sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL)
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(rt, 0, heap);
-
-    return err;
-}
-
-#endif /* FP_ECC || WOLFSSL_SP_SMALL */
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * g     Point to multiply.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_avx2_4(sp_point* r, sp_point* g, sp_digit* k,
-        int map, void* heap)
-{
-#ifndef FP_ECC
-    return sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, heap);
-#else
-    sp_digit tmp[2 * 4 * 5];
-    sp_cache_t* cache;
-    int err = MP_OKAY;
-
-#ifndef HAVE_THREAD_LS
-    if (initCacheMutex == 0) {
-         wc_InitMutex(&sp_cache_lock);
-         initCacheMutex = 1;
-    }
-    if (wc_LockMutex(&sp_cache_lock) != 0)
-       err = BAD_MUTEX_E;
-#endif /* HAVE_THREAD_LS */
-
-    if (err == MP_OKAY) {
-        sp_ecc_get_cache(g, &cache);
-        if (cache->cnt == 2)
-            sp_256_gen_stripe_table_avx2_4(g, cache->table, tmp, heap);
-
-#ifndef HAVE_THREAD_LS
-        wc_UnLockMutex(&sp_cache_lock);
-#endif /* HAVE_THREAD_LS */
-
-        if (cache->cnt < 2) {
-            err = sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, heap);
-        }
-        else {
-            err = sp_256_ecc_mulmod_stripe_avx2_4(r, g, cache->table, k,
-                    map, heap);
-        }
-    }
-
-    return err;
-#endif
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-/* Multiply the point by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * km    Scalar to multiply by.
- * p     Point to multiply.
- * r     Resulting point.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
-        void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
-    sp_digit kd[4];
-#endif
-    sp_point* point;
-    sp_digit* k = NULL;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
-    }
-#else
-    k = kd;
-#endif
-    if (err == MP_OKAY) {
-        sp_256_from_mp(k, 4, km);
-        sp_256_point_from_ecc_point_4(point, gm);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(point, point, k, map, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_4(point, point, k, map, heap);
-    }
-    if (err == MP_OKAY)
-        err = sp_256_point_to_ecc_point_4(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
-        XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-
-#ifdef WOLFSSL_SP_SMALL
-static sp_table_entry p256_table[256] = {
-    /* 0 */
-    { { 0x00, 0x00, 0x00, 0x00 },
-      { 0x00, 0x00, 0x00, 0x00 },
-      1 },
-    /* 1 */
-    { { 0x79e730d418a9143cl,0x75ba95fc5fedb601l,0x79fb732b77622510l,
-        0x18905f76a53755c6l },
-      { 0xddf25357ce95560al,0x8b4ab8e4ba19e45cl,0xd2e88688dd21f325l,
-        0x8571ff1825885d85l },
-      0 },
-    /* 2 */
-    { { 0x202886024147519al,0xd0981eac26b372f0l,0xa9d4a7caa785ebc8l,
-        0xd953c50ddbdf58e9l },
-      { 0x9d6361ccfd590f8fl,0x72e9626b44e6c917l,0x7fd9611022eb64cfl,
-        0x863ebb7e9eb288f3l },
-      0 },
-    /* 3 */
-    { { 0x7856b6235cdb6485l,0x808f0ea22f0a2f97l,0x3e68d9544f7e300bl,
-        0x00076055b5ff80a0l },
-      { 0x7634eb9b838d2010l,0x54014fbb3243708al,0xe0e47d39842a6606l,
-        0x8308776134373ee0l },
-      0 },
-    /* 4 */
-    { { 0x4f922fc516a0d2bbl,0x0d5cc16c1a623499l,0x9241cf3a57c62c8bl,
-        0x2f5e6961fd1b667fl },
-      { 0x5c15c70bf5a01797l,0x3d20b44d60956192l,0x04911b37071fdb52l,
-        0xf648f9168d6f0f7bl },
-      0 },
-    /* 5 */
-    { { 0x9e566847e137bbbcl,0xe434469e8a6a0becl,0xb1c4276179d73463l,
-        0x5abe0285133d0015l },
-      { 0x92aa837cc04c7dabl,0x573d9f4c43260c07l,0x0c93156278e6cc37l,
-        0x94bb725b6b6f7383l },
-      0 },
-    /* 6 */
-    { { 0xbbf9b48f720f141cl,0x6199b3cd2df5bc74l,0xdc3f6129411045c4l,
-        0xcdd6bbcb2f7dc4efl },
-      { 0xcca6700beaf436fdl,0x6f647f6db99326bel,0x0c0fa792014f2522l,
-        0xa361bebd4bdae5f6l },
-      0 },
-    /* 7 */
-    { { 0x28aa2558597c13c7l,0xc38d635f50b7c3e1l,0x07039aecf3c09d1dl,
-        0xba12ca09c4b5292cl },
-      { 0x9e408fa459f91dfdl,0x3af43b66ceea07fbl,0x1eceb0899d780b29l,
-        0x53ebb99d701fef4bl },
-      0 },
-    /* 8 */
-    { { 0x4fe7ee31b0e63d34l,0xf4600572a9e54fabl,0xc0493334d5e7b5a4l,
-        0x8589fb9206d54831l },
-      { 0xaa70f5cc6583553al,0x0879094ae25649e5l,0xcc90450710044652l,
-        0xebb0696d02541c4fl },
-      0 },
-    /* 9 */
-    { { 0x4616ca15ac1647c5l,0xb8127d47c4cf5799l,0xdc666aa3764dfbacl,
-        0xeb2820cbd1b27da3l },
-      { 0x9406f8d86a87e008l,0xd87dfa9d922378f3l,0x56ed2e4280ccecb2l,
-        0x1f28289b55a7da1dl },
-      0 },
-    /* 10 */
-    { { 0xabbaa0c03b89da99l,0xa6f2d79eb8284022l,0x27847862b81c05e8l,
-        0x337a4b5905e54d63l },
-      { 0x3c67500d21f7794al,0x207005b77d6d7f61l,0x0a5a378104cfd6e8l,
-        0x0d65e0d5f4c2fbd6l },
-      0 },
-    /* 11 */
-    { { 0xd9d09bbeb5275d38l,0x4268a7450be0a358l,0xf0762ff4973eb265l,
-        0xc23da24252f4a232l },
-      { 0x5da1b84f0b94520cl,0x09666763b05bd78el,0x3a4dcb8694d29ea1l,
-        0x19de3b8cc790cff1l },
-      0 },
-    /* 12 */
-    { { 0x183a716c26c5fe04l,0x3b28de0b3bba1bdbl,0x7432c586a4cb712cl,
-        0xe34dcbd491fccbfdl },
-      { 0xb408d46baaa58403l,0x9a69748682e97a53l,0x9e39012736aaa8afl,
-        0xe7641f447b4e0f7fl },
-      0 },
-    /* 13 */
-    { { 0x7d753941df64ba59l,0xd33f10ec0b0242fcl,0x4f06dfc6a1581859l,
-        0x4a12df57052a57bfl },
-      { 0xbfa6338f9439dbd0l,0xd3c24bd4bde53e1fl,0xfd5e4ffa21f1b314l,
-        0x6af5aa93bb5bea46l },
-      0 },
-    /* 14 */
-    { { 0xda10b69910c91999l,0x0a24b4402a580491l,0x3e0094b4b8cc2090l,
-        0x5fe3475a66a44013l },
-      { 0xb0f8cabdf93e7b4bl,0x292b501a7c23f91al,0x42e889aecd1e6263l,
-        0xb544e308ecfea916l },
-      0 },
-    /* 15 */
-    { { 0x6478c6e916ddfdcel,0x2c329166f89179e6l,0x4e8d6e764d4e67e1l,
-        0xe0b6b2bda6b0c20bl },
-      { 0x0d312df2bb7efb57l,0x1aac0dde790c4007l,0xf90336ad679bc944l,
-        0x71c023de25a63774l },
-      0 },
-    /* 16 */
-    { { 0x62a8c244bfe20925l,0x91c19ac38fdce867l,0x5a96a5d5dd387063l,
-        0x61d587d421d324f6l },
-      { 0xe87673a2a37173eal,0x2384800853778b65l,0x10f8441e05bab43el,
-        0xfa11fe124621efbel },
-      0 },
-    /* 17 */
-    { { 0x1c891f2b2cb19ffdl,0x01ba8d5bb1923c23l,0xb6d03d678ac5ca8el,
-        0x586eb04c1f13bedcl },
-      { 0x0c35c6e527e8ed09l,0x1e81a33c1819ede2l,0x278fd6c056c652fal,
-        0x19d5ac0870864f11l },
-      0 },
-    /* 18 */
-    { { 0x1e99f581309a4e1fl,0xab7de71be9270074l,0x26a5ef0befd28d20l,
-        0xe7c0073f7f9c563fl },
-      { 0x1f6d663a0ef59f76l,0x669b3b5420fcb050l,0xc08c1f7a7a6602d4l,
-        0xe08504fec65b3c0al },
-      0 },
-    /* 19 */
-    { { 0xf098f68da031b3cal,0x6d1cab9ee6da6d66l,0x5bfd81fa94f246e8l,
-        0x78f018825b0996b4l },
-      { 0xb7eefde43a25787fl,0x8016f80d1dccac9bl,0x0cea4877b35bfc36l,
-        0x43a773b87e94747al },
-      0 },
-    /* 20 */
-    { { 0x62577734d2b533d5l,0x673b8af6a1bdddc0l,0x577e7c9aa79ec293l,
-        0xbb6de651c3b266b1l },
-      { 0xe7e9303ab65259b3l,0xd6a0afd3d03a7480l,0xc5ac83d19b3cfc27l,
-        0x60b4619a5d18b99bl },
-      0 },
-    /* 21 */
-    { { 0xbd6a38e11ae5aa1cl,0xb8b7652b49e73658l,0x0b130014ee5f87edl,
-        0x9d0f27b2aeebffcdl },
-      { 0xca9246317a730a55l,0x9c955b2fddbbc83al,0x07c1dfe0ac019a71l,
-        0x244a566d356ec48dl },
-      0 },
-    /* 22 */
-    { { 0x6db0394aeacf1f96l,0x9f2122a9024c271cl,0x2626ac1b82cbd3b9l,
-        0x45e58c873581ef69l },
-      { 0xd3ff479da38f9dbcl,0xa8aaf146e888a040l,0x945adfb246e0bed7l,
-        0xc040e21cc1e4b7a4l },
-      0 },
-    /* 23 */
-    { { 0x847af0006f8117b6l,0x651969ff73a35433l,0x482b35761d9475ebl,
-        0x1cdf5c97682c6ec7l },
-      { 0x7db775b411f04839l,0x7dbeacf448de1698l,0xb2921dd1b70b3219l,
-        0x046755f8a92dff3dl },
-      0 },
-    /* 24 */
-    { { 0xcc8ac5d2bce8ffcdl,0x0d53c48b2fe61a82l,0xf6f161727202d6c7l,
-        0x046e5e113b83a5f3l },
-      { 0xe7b8ff64d8007f01l,0x7fb1ef125af43183l,0x045c5ea635e1a03cl,
-        0x6e0106c3303d005bl },
-      0 },
-    /* 25 */
-    { { 0x48c7358488dd73b1l,0x7670708f995ed0d9l,0x38385ea8c56a2ab7l,
-        0x442594ede901cf1fl },
-      { 0xf8faa2c912d4b65bl,0x94c2343b96c90c37l,0xd326e4a15e978d1fl,
-        0xa796fa514c2ee68el },
-      0 },
-    /* 26 */
-    { { 0x359fb604823addd7l,0x9e2a6183e56693b3l,0xf885b78e3cbf3c80l,
-        0xe4ad2da9c69766e9l },
-      { 0x357f7f428e048a61l,0x082d198cc092d9a0l,0xfc3a1af4c03ed8efl,
-        0xc5e94046c37b5143l },
-      0 },
-    /* 27 */
-    { { 0x476a538c2be75f9el,0x6fd1a9e8cb123a78l,0xd85e4df0b109c04bl,
-        0x63283dafdb464747l },
-      { 0xce728cf7baf2df15l,0xe592c4550ad9a7f4l,0xfab226ade834bcc3l,
-        0x68bd19ab1981a938l },
-      0 },
-    /* 28 */
-    { { 0xc08ead511887d659l,0x3374d5f4b359305al,0x96986981cfe74fe3l,
-        0x495292f53c6fdfd6l },
-      { 0x4a878c9e1acec896l,0xd964b210ec5b4484l,0x6696f7e2664d60a7l,
-        0x0ec7530d26036837l },
-      0 },
-    /* 29 */
-    { { 0x2da13a05ad2687bbl,0xa1f83b6af32e21fal,0x390f5ef51dd4607bl,
-        0x0f6207a664863f0bl },
-      { 0xbd67e3bb0f138233l,0xdd66b96c272aa718l,0x8ed0040726ec88ael,
-        0xff0db07208ed6dcfl },
-      0 },
-    /* 30 */
-    { { 0x749fa1014c95d553l,0xa44052fd5d680a8al,0x183b4317ff3b566fl,
-        0x313b513c88740ea3l },
-      { 0xb402e2ac08d11549l,0x071ee10bb4dee21cl,0x26b987dd47f2320el,
-        0x2d3abcf986f19f81l },
-      0 },
-    /* 31 */
-    { { 0x4c288501815581a2l,0x9a0a6d56632211afl,0x19ba7a0f0cab2e99l,
-        0xc036fa10ded98cdfl },
-      { 0x29ae08bac1fbd009l,0x0b68b19006d15816l,0xc2eb32779b9e0d8fl,
-        0xa6b2a2c4b6d40194l },
-      0 },
-    /* 32 */
-    { { 0xd433e50f6d3549cfl,0x6f33696ffacd665el,0x695bfdacce11fcb4l,
-        0x810ee252af7c9860l },
-      { 0x65450fe17159bb2cl,0xf7dfbebe758b357bl,0x2b057e74d69fea72l,
-        0xd485717a92731745l },
-      0 },
-    /* 33 */
-    { { 0x11741a8af0cb5a98l,0xd3da8f931f3110bfl,0x1994e2cbab382adfl,
-        0x6a6045a72f9a604el },
-      { 0x170c0d3fa2b2411dl,0xbe0eb83e510e96e0l,0x3bcc9f738865b3ccl,
-        0xd3e45cfaf9e15790l },
-      0 },
-    /* 34 */
-    { { 0xce1f69bbe83f7669l,0x09f8ae8272877d6bl,0x9548ae543244278dl,
-        0x207755dee3c2c19cl },
-      { 0x87bd61d96fef1945l,0x18813cefb12d28c3l,0x9fbcd1d672df64aal,
-        0x48dc5ee57154b00dl },
-      0 },
-    /* 35 */
-    { { 0x123790bff7e5a199l,0xe0efb8cf989ccbb7l,0xc27a2bfe0a519c79l,
-        0xf2fb0aeddff6f445l },
-      { 0x41c09575f0b5025fl,0x550543d740fa9f22l,0x8fa3c8ad380bfbd0l,
-        0xa13e9015db28d525l },
-      0 },
-    /* 36 */
-    { { 0xf9f7a350a2b65cbcl,0x0b04b9722a464226l,0x265ce241e23f07a1l,
-        0x2bf0d6b01497526fl },
-      { 0xd3d4dd3f4b216fb7l,0xf7d7b867fbdda26al,0xaeb7b83f6708505cl,
-        0x42a94a5a162fe89fl },
-      0 },
-    /* 37 */
-    { { 0x5846ad0beaadf191l,0x0f8a489025a268d7l,0xe8603050494dc1f6l,
-        0x2c2dd969c65ede3dl },
-      { 0x6d02171d93849c17l,0x460488ba1da250ddl,0x4810c7063c3a5485l,
-        0xf437fa1f42c56dbcl },
-      0 },
-    /* 38 */
-    { { 0x6aa0d7144a0f7dabl,0x0f0497931776e9acl,0x52c0a050f5f39786l,
-        0xaaf45b3354707aa8l },
-      { 0x85e37c33c18d364al,0xd40b9b063e497165l,0xf417168115ec5444l,
-        0xcdf6310df4f272bcl },
-      0 },
-    /* 39 */
-    { { 0x7473c6238ea8b7efl,0x08e9351885bc2287l,0x419567722bda8e34l,
-        0xf0d008bada9e2ff2l },
-      { 0x2912671d2414d3b1l,0xb3754985b019ea76l,0x5c61b96d453bcbdbl,
-        0x5bd5c2f5ca887b8bl },
-      0 },
-    /* 40 */
-    { { 0xef0f469ef49a3154l,0x3e85a5956e2b2e9al,0x45aaec1eaa924a9cl,
-        0xaa12dfc8a09e4719l },
-      { 0x26f272274df69f1dl,0xe0e4c82ca2ff5e73l,0xb9d8ce73b7a9dd44l,
-        0x6c036e73e48ca901l },
-      0 },
-    /* 41 */
-    { { 0x5cfae12a0f6e3138l,0x6966ef0025ad345al,0x8993c64b45672bc5l,
-        0x292ff65896afbe24l },
-      { 0xd5250d445e213402l,0xf6580e274392c9fel,0x097b397fda1c72e8l,
-        0x644e0c90311b7276l },
-      0 },
-    /* 42 */
-    { { 0xe1e421e1a47153f0l,0xb86c3b79920418c9l,0x93bdce87705d7672l,
-        0xf25ae793cab79a77l },
-      { 0x1f3194a36d869d0cl,0x9d55c8824986c264l,0x49fb5ea3096e945el,
-        0x39b8e65313db0a3el },
-      0 },
-    /* 43 */
-    { { 0x37754200b6fd2e59l,0x35e2c0669255c98fl,0xd9dab21a0e2a5739l,
-        0x39122f2f0f19db06l },
-      { 0xcfbce1e003cad53cl,0x225b2c0fe65c17e3l,0x72baf1d29aa13877l,
-        0x8de80af8ce80ff8dl },
-      0 },
-    /* 44 */
-    { { 0xafbea8d9207bbb76l,0x921c7e7c21782758l,0xdfa2b74b1c0436b1l,
-        0x871949062e368c04l },
-      { 0xb5f928bba3993df5l,0x639d75b5f3b3d26al,0x011aa78a85b55050l,
-        0xfc315e6a5b74fde1l },
-      0 },
-    /* 45 */
-    { { 0x561fd41ae8d6ecfal,0x5f8c44f61aec7f86l,0x98452a7b4924741dl,
-        0xe6d4a7adee389088l },
-      { 0x60552ed14593c75dl,0x70a70da4dd271162l,0xd2aede937ba2c7dbl,
-        0x35dfaf9a9be2ae57l },
-      0 },
-    /* 46 */
-    { { 0x6b956fcdaa736636l,0x09f51d97ae2cab7el,0xfb10bf410f349966l,
-        0x1da5c7d71c830d2bl },
-      { 0x5c41e4833cce6825l,0x15ad118ff9573c3bl,0xa28552c7f23036b8l,
-        0x7077c0fddbf4b9d6l },
-      0 },
-    /* 47 */
-    { { 0xbf63ff8d46b9661cl,0xa1dfd36b0d2cfd71l,0x0373e140a847f8f7l,
-        0x53a8632ee50efe44l },
-      { 0x0976ff68696d8051l,0xdaec0c95c74f468al,0x62994dc35e4e26bdl,
-        0x028ca76d34e1fcc1l },
-      0 },
-    /* 48 */
-    { { 0xd11d47dcfc9877eel,0xc8b36210801d0002l,0xd002c11754c260b6l,
-        0x04c17cd86962f046l },
-      { 0x6d9bd094b0daddf5l,0xbea2357524ce55c0l,0x663356e672da03b5l,
-        0xf7ba4de9fed97474l },
-      0 },
-    /* 49 */
-    { { 0xd0dbfa34ebe1263fl,0x5576373571ae7ce6l,0xd244055382a6f523l,
-        0xe31f960052131c41l },
-      { 0xd1bb9216ea6b6ec6l,0x37a1d12e73c2fc44l,0xc10e7eac89d0a294l,
-        0xaa3a6259ce34d47bl },
-      0 },
-    /* 50 */
-    { { 0xfbcf9df536f3dcd3l,0x6ceded50d2bf7360l,0x491710fadf504f5bl,
-        0x2398dd627e79daeel },
-      { 0xcf4705a36d09569el,0xea0619bb5149f769l,0xff9c037735f6034cl,
-        0x5717f5b21c046210l },
-      0 },
-    /* 51 */
-    { { 0x9fe229c921dd895el,0x8e51850040c28451l,0xfa13d2391d637ecdl,
-        0x660a2c560e3c28del },
-      { 0x9cca88aed67fcbd0l,0xc84724780ea9f096l,0x32b2f48172e92b4dl,
-        0x624ee54c4f522453l },
-      0 },
-    /* 52 */
-    { { 0x09549ce4d897ecccl,0x4d49d1d93f9880aal,0x723c2423043a7c20l,
-        0x4f392afb92bdfbc0l },
-      { 0x6969f8fa7de44fd9l,0xb66cfbe457b32156l,0xdb2fa803368ebc3cl,
-        0x8a3e7977ccdb399cl },
-      0 },
-    /* 53 */
-    { { 0xdde1881f06c4b125l,0xae34e300f6e3ca8cl,0xef6999de5c7a13e9l,
-        0x3888d02370c24404l },
-      { 0x7628035644f91081l,0x3d9fcf615f015504l,0x1827edc8632cd36el,
-        0xa5e62e4718102336l },
-      0 },
-    /* 54 */
-    { { 0x1a825ee32facd6c8l,0x699c635454bcbc66l,0x0ce3edf798df9931l,
-        0x2c4768e6466a5adcl },
-      { 0xb346ff8c90a64bc9l,0x630a6020e4779f5cl,0xd949d064bc05e884l,
-        0x7b5e6441f9e652a0l },
-      0 },
-    /* 55 */
-    { { 0x2169422c1d28444al,0xe996c5d8be136a39l,0x2387afe5fb0c7fcel,
-        0xb8af73cb0c8d744al },
-      { 0x5fde83aa338b86fdl,0xfee3f158a58a5cffl,0xc9ee8f6f20ac9433l,
-        0xa036395f7f3f0895l },
-      0 },
-    /* 56 */
-    { { 0x8c73c6bba10f7770l,0xa6f16d81a12a0e24l,0x100df68251bc2b9fl,
-        0x4be36b01875fb533l },
-      { 0x9226086e9fb56dbbl,0x306fef8b07e7a4f8l,0xeeaccc0566d52f20l,
-        0x8cbc9a871bdc00c0l },
-      0 },
-    /* 57 */
-    { { 0xe131895cc0dac4abl,0xa874a440712ff112l,0x6332ae7c6a1cee57l,
-        0x44e7553e0c0835f8l },
-      { 0x6d503fff7734002dl,0x9d35cb8b0b34425cl,0x95f702760e8738b5l,
-        0x470a683a5eb8fc18l },
-      0 },
-    /* 58 */
-    { { 0x81b761dc90513482l,0x0287202a01e9276al,0xcda441ee0ce73083l,
-        0x16410690c63dc6efl },
-      { 0xf5034a066d06a2edl,0xdd4d7745189b100bl,0xd914ae72ab8218c9l,
-        0xd73479fd7abcbb4fl },
-      0 },
-    /* 59 */
-    { { 0x7edefb165ad4c6e5l,0x262cf08f5b06d04dl,0x12ed5bb18575cb14l,
-        0x816469e30771666bl },
-      { 0xd7ab9d79561e291el,0xeb9daf22c1de1661l,0xf49827eb135e0513l,
-        0x0a36dd23f0dd3f9cl },
-      0 },
-    /* 60 */
-    { { 0x098d32c741d5533cl,0x7c5f5a9e8684628fl,0x39a228ade349bd11l,
-        0xe331dfd6fdbab118l },
-      { 0x5100ab686bcc6ed8l,0x7160c3bdef7a260el,0x9063d9a7bce850d7l,
-        0xd3b4782a492e3389l },
-      0 },
-    /* 61 */
-    { { 0xa149b6e8f3821f90l,0x92edd9ed66eb7aadl,0x0bb669531a013116l,
-        0x7281275a4c86a5bdl },
-      { 0x503858f7d3ff47e5l,0x5e1616bc61016441l,0x62b0f11a7dfd9bb1l,
-        0x2c062e7ece145059l },
-      0 },
-    /* 62 */
-    { { 0xa76f996f0159ac2el,0x281e7736cbdb2713l,0x2ad6d28808e46047l,
-        0x282a35f92c4e7ef1l },
-      { 0x9c354b1ec0ce5cd2l,0xcf99efc91379c229l,0x992caf383e82c11el,
-        0xc71cd513554d2abdl },
-      0 },
-    /* 63 */
-    { { 0x4885de9c09b578f4l,0x1884e258e3affa7al,0x8f76b1b759182f1fl,
-        0xc50f6740cf47f3a3l },
-      { 0xa9c4adf3374b68eal,0xa406f32369965fe2l,0x2f86a22285a53050l,
-        0xb9ecb3a7212958dcl },
-      0 },
-    /* 64 */
-    { { 0x56f8410ef4f8b16al,0x97241afec47b266al,0x0a406b8e6d9c87c1l,
-        0x803f3e02cd42ab1bl },
-      { 0x7f0309a804dbec69l,0xa83b85f73bbad05fl,0xc6097273ad8e197fl,
-        0xc097440e5067adc1l },
-      0 },
-    /* 65 */
-    { { 0x846a56f2c379ab34l,0xa8ee068b841df8d1l,0x20314459176c68efl,
-        0xf1af32d5915f1f30l },
-      { 0x99c375315d75bd50l,0x837cffbaf72f67bcl,0x0613a41848d7723fl,
-        0x23d0f130e2d41c8bl },
-      0 },
-    /* 66 */
-    { { 0x857ab6edf41500d9l,0x0d890ae5fcbeada8l,0x52fe864889725951l,
-        0xb0288dd6c0a3faddl },
-      { 0x85320f30650bcb08l,0x71af6313695d6e16l,0x31f520a7b989aa76l,
-        0xffd3724ff408c8d2l },
-      0 },
-    /* 67 */
-    { { 0x53968e64b458e6cbl,0x992dad20317a5d28l,0x3814ae0b7aa75f56l,
-        0xf5590f4ad78c26dfl },
-      { 0x0fc24bd3cf0ba55al,0x0fc4724a0c778bael,0x1ce9864f683b674al,
-        0x18d6da54f6f74a20l },
-      0 },
-    /* 68 */
-    { { 0xed93e225d5be5a2bl,0x6fe799835934f3c6l,0x4314092622626ffcl,
-        0x50bbb4d97990216al },
-      { 0x378191c6e57ec63el,0x65422c40181dcdb2l,0x41a8099b0236e0f6l,
-        0x2b10011801fe49c3l },
-      0 },
-    /* 69 */
-    { { 0xfc68b5c59b391593l,0xc385f5a2598270fcl,0x7144f3aad19adcbbl,
-        0xdd55899983fbae0cl },
-      { 0x93b88b8e74b82ff4l,0xd2e03c4071e734c9l,0x9a7a9eaf43c0322al,
-        0xe6e4c551149d6041l },
-      0 },
-    /* 70 */
-    { { 0x55f655bb1e9af288l,0x647e1a64f7ada931l,0x43697e4bcb2820e5l,
-        0x51e00db107ed56ffl },
-      { 0x43d169b8771c327el,0x29cdb20b4a96c2adl,0xc07d51f53deb4779l,
-        0xe22f424149829177l },
-      0 },
-    /* 71 */
-    { { 0xcd45e8f4635f1abbl,0x7edc0cb568538874l,0xc9472c1fb5a8034dl,
-        0xf709373d52dc48c9l },
-      { 0x401966bba8af30d6l,0x95bf5f4af137b69cl,0x3966162a9361c47el,
-        0xbd52d288e7275b11l },
-      0 },
-    /* 72 */
-    { { 0xab155c7a9c5fa877l,0x17dad6727d3a3d48l,0x43f43f9e73d189d8l,
-        0xa0d0f8e4c8aa77a6l },
-      { 0x0bbeafd8cc94f92dl,0xd818c8be0c4ddb3al,0x22cc65f8b82eba14l,
-        0xa56c78c7946d6a00l },
-      0 },
-    /* 73 */
-    { { 0x2962391b0dd09529l,0x803e0ea63daddfcfl,0x2c77351f5b5bf481l,
-        0xd8befdf8731a367al },
-      { 0xab919d42fc0157f4l,0xf51caed7fec8e650l,0xcdf9cb4002d48b0al,
-        0x854a68a5ce9f6478l },
-      0 },
-    /* 74 */
-    { { 0xdc35f67b63506ea5l,0x9286c489a4fe0d66l,0x3f101d3bfe95cd4dl,
-        0x5cacea0b98846a95l },
-      { 0xa90df60c9ceac44dl,0x3db29af4354d1c3al,0x08dd3de8ad5dbabel,
-        0xe4982d1235e4efa9l },
-      0 },
-    /* 75 */
-    { { 0x23104a22c34cd55el,0x58695bb32680d132l,0xfb345afa1fa1d943l,
-        0x8046b7f616b20499l },
-      { 0xb533581e38e7d098l,0xd7f61e8df46f0b70l,0x30dea9ea44cb78c4l,
-        0xeb17ca7b9082af55l },
-      0 },
-    /* 76 */
-    { { 0x1751b59876a145b9l,0xa5cf6b0fc1bc71ecl,0xd3e03565392715bbl,
-        0x097b00bafab5e131l },
-      { 0xaa66c8e9565f69e1l,0x77e8f75ab5be5199l,0x6033ba11da4fd984l,
-        0xf95c747bafdbcc9el },
-      0 },
-    /* 77 */
-    { { 0x558f01d3bebae45el,0xa8ebe9f0c4bc6955l,0xaeb705b1dbc64fc6l,
-        0x3512601e566ed837l },
-      { 0x9336f1e1fa1161cdl,0x328ab8d54c65ef87l,0x4757eee2724f21e5l,
-        0x0ef971236068ab6bl },
-      0 },
-    /* 78 */
-    { { 0x02598cf754ca4226l,0x5eede138f8642c8el,0x48963f74468e1790l,
-        0xfc16d9333b4fbc95l },
-      { 0xbe96fb31e7c800cal,0x138063312678adaal,0x3d6244976ff3e8b5l,
-        0x14ca4af1b95d7a17l },
-      0 },
-    /* 79 */
-    { { 0x7a4771babd2f81d5l,0x1a5f9d6901f7d196l,0xd898bef7cad9c907l,
-        0x4057b063f59c231dl },
-      { 0xbffd82fe89c05c0al,0xe4911c6f1dc0df85l,0x3befccaea35a16dbl,
-        0x1c3b5d64f1330b13l },
-      0 },
-    /* 80 */
-    { { 0x5fe14bfe80ec21fel,0xf6ce116ac255be82l,0x98bc5a072f4a5d67l,
-        0xfad27148db7e63afl },
-      { 0x90c0b6ac29ab05b3l,0x37a9a83c4e251ae6l,0x0a7dc875c2aade7dl,
-        0x77387de39f0e1a84l },
-      0 },
-    /* 81 */
-    { { 0x1e9ecc49a56c0dd7l,0xa5cffcd846086c74l,0x8f7a1408f505aecel,
-        0xb37b85c0bef0c47el },
-      { 0x3596b6e4cc0e6a8fl,0xfd6d4bbf6b388f23l,0xaba453fac39cef4el,
-        0x9c135ac8f9f628d5l },
-      0 },
-    /* 82 */
-    { { 0x32aa320284e35743l,0x320d6ab185a3cdefl,0xb821b1761df19819l,
-        0x5721361fc433851fl },
-      { 0x1f0db36a71fc9168l,0x5f98ba735e5c403cl,0xf64ca87e37bcd8f5l,
-        0xdcbac3c9e6bb11bdl },
-      0 },
-    /* 83 */
-    { { 0xf01d99684518cbe2l,0xd242fc189c9eb04el,0x727663c7e47feebfl,
-        0xb8c1c89e2d626862l },
-      { 0x51a58bddc8e1d569l,0x563809c8b7d88cd0l,0x26c27fd9f11f31ebl,
-        0x5d23bbda2f9422d4l },
-      0 },
-    /* 84 */
-    { { 0x0a1c729495c8f8bel,0x2961c4803bf362bfl,0x9e418403df63d4acl,
-        0xc109f9cb91ece900l },
-      { 0xc2d095d058945705l,0xb9083d96ddeb85c0l,0x84692b8d7a40449bl,
-        0x9bc3344f2eee1ee1l },
-      0 },
-    /* 85 */
-    { { 0x0d5ae35642913074l,0x55491b2748a542b1l,0x469ca665b310732al,
-        0x29591d525f1a4cc1l },
-      { 0xe76f5b6bb84f983fl,0xbe7eef419f5f84e1l,0x1200d49680baa189l,
-        0x6376551f18ef332cl },
-      0 },
-    /* 86 */
-    { { 0xbda5f14e562976ccl,0x22bca3e60ef12c38l,0xbbfa30646cca9852l,
-        0xbdb79dc808e2987al },
-      { 0xfd2cb5c9cb06a772l,0x38f475aafe536dcel,0xc2a3e0227c2b5db8l,
-        0x8ee86001add3c14al },
-      0 },
-    /* 87 */
-    { { 0xcbe96981a4ade873l,0x7ee9aa4dc4fba48cl,0x2cee28995a054ba5l,
-        0x92e51d7a6f77aa4bl },
-      { 0x948bafa87190a34dl,0xd698f75bf6bd1ed1l,0xd00ee6e30caf1144l,
-        0x5182f86f0a56aaaal },
-      0 },
-    /* 88 */
-    { { 0xfba6212c7a4cc99cl,0xff609b683e6d9ca1l,0x5dbb27cb5ac98c5al,
-        0x91dcab5d4073a6f2l },
-      { 0x01b6cc3d5f575a70l,0x0cb361396f8d87fal,0x165d4e8c89981736l,
-        0x17a0cedb97974f2bl },
-      0 },
-    /* 89 */
-    { { 0x38861e2a076c8d3al,0x701aad39210f924bl,0x94d0eae413a835d9l,
-        0x2e8ce36c7f4cdf41l },
-      { 0x91273dab037a862bl,0x01ba9bb760e4c8fal,0xf964538833baf2ddl,
-        0xf4ccc6cb34f668f3l },
-      0 },
-    /* 90 */
-    { { 0x44ef525cf1f79687l,0x7c59549592efa815l,0xe1231741a5c78d29l,
-        0xac0db4889a0df3c9l },
-      { 0x86bfc711df01747fl,0x592b9358ef17df13l,0xe5880e4f5ccb6bb5l,
-        0x95a64a6194c974a2l },
-      0 },
-    /* 91 */
-    { { 0x72c1efdac15a4c93l,0x40269b7382585141l,0x6a8dfb1c16cb0badl,
-        0x231e54ba29210677l },
-      { 0xa70df9178ae6d2dcl,0x4d6aa63f39112918l,0xf627726b5e5b7223l,
-        0xab0be032d8a731e1l },
-      0 },
-    /* 92 */
-    { { 0x097ad0e98d131f2dl,0x637f09e33b04f101l,0x1ac86196d5e9a748l,
-        0xf1bcc8802cf6a679l },
-      { 0x25c69140e8daacb4l,0x3c4e405560f65009l,0x591cc8fc477937a6l,
-        0x851694695aebb271l },
-      0 },
-    /* 93 */
-    { { 0xde35c143f1dcf593l,0x78202b29b018be3bl,0xe9cdadc29bdd9d3dl,
-        0x8f67d9d2daad55d8l },
-      { 0x841116567481ea5fl,0xe7d2dde9e34c590cl,0xffdd43f405053fa8l,
-        0xf84572b9c0728b5dl },
-      0 },
-    /* 94 */
-    { { 0x5e1a7a7197af71c9l,0xa14494447a736565l,0xa1b4ae070e1d5063l,
-        0xedee2710616b2c19l },
-      { 0xb2f034f511734121l,0x1cac6e554a25e9f0l,0x8dc148f3a40c2ecfl,
-        0x9fd27e9b44ebd7f4l },
-      0 },
-    /* 95 */
-    { { 0x3cc7658af6e2cb16l,0xe3eb7d2cfe5919b6l,0x5a8c5816168d5583l,
-        0xa40c2fb6958ff387l },
-      { 0x8c9ec560fedcc158l,0x7ad804c655f23056l,0xd93967049a307e12l,
-        0x99bc9bb87dc6decfl },
-      0 },
-    /* 96 */
-    { { 0x84a9521d927dafc6l,0x52c1fb695c09cd19l,0x9d9581a0f9366ddel,
-        0x9abe210ba16d7e64l },
-      { 0x480af84a48915220l,0xfa73176a4dd816c6l,0xc7d539871681ca5al,
-        0x7881c25787f344b0l },
-      0 },
-    /* 97 */
-    { { 0x93399b51e0bcf3ffl,0x0d02cbc5127f74f6l,0x8fb465a2dd01d968l,
-        0x15e6e319a30e8940l },
-      { 0x646d6e0d3e0e05f4l,0xfad7bddc43588404l,0xbe61c7d1c4f850d3l,
-        0x0e55facf191172cel },
-      0 },
-    /* 98 */
-    { { 0x7e9d9806f8787564l,0x1a33172131e85ce6l,0x6b0158cab819e8d6l,
-        0xd73d09766fe96577l },
-      { 0x424834251eb7206el,0xa519290fc618bb42l,0x5dcbb8595e30a520l,
-        0x9250a3748f15a50bl },
-      0 },
-    /* 99 */
-    { { 0xcaff08f8be577410l,0xfd408a035077a8c6l,0xf1f63289ec0a63a4l,
-        0x77414082c1cc8c0bl },
-      { 0x05a40fa6eb0991cdl,0xc1ca086649fdc296l,0x3a68a3c7b324fd40l,
-        0x8cb04f4d12eb20b9l },
-      0 },
-    /* 100 */
-    { { 0xb1c2d0556906171cl,0x9073e9cdb0240c3fl,0xdb8e6b4fd8906841l,
-        0xe4e429ef47123b51l },
-      { 0x0b8dd53c38ec36f4l,0xf9d2dc01ff4b6a27l,0x5d066e07879a9a48l,
-        0x37bca2ff3c6e6552l },
-      0 },
-    /* 101 */
-    { { 0x4cd2e3c7df562470l,0x44f272a2c0964ac9l,0x7c6d5df980c793bel,
-        0x59913edc3002b22al },
-      { 0x7a139a835750592al,0x99e01d80e783de02l,0xcf8c0375ea05d64fl,
-        0x43786e4ab013e226l },
-      0 },
-    /* 102 */
-    { { 0xff32b0ed9e56b5a6l,0x0750d9a6d9fc68f9l,0xec15e845597846a7l,
-        0x8638ca98b7e79e7al },
-      { 0x2f5ae0960afc24b2l,0x05398eaf4dace8f2l,0x3b765dd0aecba78fl,
-        0x1ecdd36a7b3aa6f0l },
-      0 },
-    /* 103 */
-    { { 0x5d3acd626c5ff2f3l,0xa2d516c02873a978l,0xad94c9fad2110d54l,
-        0xd85d0f85d459f32dl },
-      { 0x9f700b8d10b11da3l,0xd2c22c30a78318c4l,0x556988f49208decdl,
-        0xa04f19c3b4ed3c62l },
-      0 },
-    /* 104 */
-    { { 0x087924c8ed7f93bdl,0xcb64ac5d392f51f6l,0x7cae330a821b71afl,
-        0x92b2eeea5c0950b0l },
-      { 0x85ac4c9485b6e235l,0xab2ca4a92936c0f0l,0x80faa6b3e0508891l,
-        0x1ee782215834276cl },
-      0 },
-    /* 105 */
-    { { 0xa60a2e00e63e79f7l,0xf590e7b2f399d906l,0x9021054a6607c09dl,
-        0xf3f2ced857a6e150l },
-      { 0x200510f3f10d9b55l,0x9d2fcfacd8642648l,0xe5631aa7e8bd0e7cl,
-        0x0f56a4543da3e210l },
-      0 },
-    /* 106 */
-    { { 0x5b21bffa1043e0dfl,0x6c74b6cc9c007e6dl,0x1a656ec0d4a8517al,
-        0xbd8f17411969e263l },
-      { 0x8a9bbb86beb7494al,0x1567d46f45f3b838l,0xdf7a12a7a4e5a79al,
-        0x2d1a1c3530ccfa09l },
-      0 },
-    /* 107 */
-    { { 0x192e3813506508dal,0x336180c4a1d795a7l,0xcddb59497a9944b3l,
-        0xa107a65eb91fba46l },
-      { 0xe6d1d1c50f94d639l,0x8b4af3758a58b7d7l,0x1a7c5584bd37ca1cl,
-        0x183d760af87a9af2l },
-      0 },
-    /* 108 */
-    { { 0x29d697110dde59a4l,0xf1ad8d070e8bef87l,0x229b49634f2ebe78l,
-        0x1d44179dc269d754l },
-      { 0xb32dc0cf8390d30el,0x0a3b27530de8110cl,0x31af1dc52bc0339al,
-        0x771f9cc29606d262l },
-      0 },
-    /* 109 */
-    { { 0x99993e7785040739l,0x44539db98026a939l,0xcf40f6f2f5f8fc26l,
-        0x64427a310362718el },
-      { 0x4f4f2d8785428aa8l,0x7b7adc3febfb49a8l,0x201b2c6df23d01acl,
-        0x49d9b7496ae90d6dl },
-      0 },
-    /* 110 */
-    { { 0xcc78d8bc435d1099l,0x2adbcd4e8e8d1a08l,0x02c2e2a02cb68a41l,
-        0x9037d81b3f605445l },
-      { 0x7cdbac27074c7b61l,0xfe2031ab57bfd72el,0x61ccec96596d5352l,
-        0x08c3de6a7cc0639cl },
-      0 },
-    /* 111 */
-    { { 0x20fdd020f6d552abl,0x56baff9805cd81f1l,0x06fb7c3e91351291l,
-        0xc690944245796b2fl },
-      { 0x17b3ae9c41231bd1l,0x1eac6e875cc58205l,0x208837abf9d6a122l,
-        0x3fa3db02cafe3ac0l },
-      0 },
-    /* 112 */
-    { { 0xd75a3e6505058880l,0x7da365ef643943f2l,0x4147861cfab24925l,
-        0xc5c4bdb0fdb808ffl },
-      { 0x73513e34b272b56bl,0xc8327e9511b9043al,0xfd8ce37df8844969l,
-        0x2d56db9446c2b6b5l },
-      0 },
-    /* 113 */
-    { { 0x2461782fff46ac6bl,0xd19f792607a2e425l,0xfafea3c409a48de1l,
-        0x0f56bd9de503ba42l },
-      { 0x137d4ed1345cda49l,0x821158fc816f299dl,0xe7c6a54aaeb43402l,
-        0x4003bb9d1173b5f1l },
-      0 },
-    /* 114 */
-    { { 0x3b8e8189a0803387l,0xece115f539cbd404l,0x4297208dd2877f21l,
-        0x53765522a07f2f9el },
-      { 0xa4980a21a8a4182dl,0xa2bbd07a3219df79l,0x674d0a2e1a19a2d4l,
-        0x7a056f586c5d4549l },
-      0 },
-    /* 115 */
-    { { 0x646b25589d8a2a47l,0x5b582948c3df2773l,0x51ec000eabf0d539l,
-        0x77d482f17a1a2675l },
-      { 0xb8a1bd9587853948l,0xa6f817bd6cfbffeel,0xab6ec05780681e47l,
-        0x4115012b2b38b0e4l },
-      0 },
-    /* 116 */
-    { { 0x3c73f0f46de28cedl,0x1d5da7609b13ec47l,0x61b8ce9e6e5c6392l,
-        0xcdf04572fbea0946l },
-      { 0x1cb3c58b6c53c3b0l,0x97fe3c10447b843cl,0xfb2b8ae12cb9780el,
-        0xee703dda97383109l },
-      0 },
-    /* 117 */
-    { { 0x34515140ff57e43al,0xd44660d3b1b811b8l,0x2b3b5dff8f42b986l,
-        0x2a0ad89da162ce21l },
-      { 0x64e4a6946bc277bal,0xc788c954c141c276l,0x141aa64ccabf6274l,
-        0xd62d0b67ac2b4659l },
-      0 },
-    /* 118 */
-    { { 0x39c5d87b2c054ac4l,0x57005859f27df788l,0xedf7cbf3b18128d6l,
-        0xb39a23f2991c2426l },
-      { 0x95284a15f0b16ae5l,0x0c6a05b1a136f51bl,0x1d63c137f2700783l,
-        0x04ed0092c0674cc5l },
-      0 },
-    /* 119 */
-    { { 0x1f4185d19ae90393l,0x3047b4294a3d64e6l,0xae0001a69854fc14l,
-        0xa0a91fc10177c387l },
-      { 0xff0a3f01ae2c831el,0xbb76ae822b727e16l,0x8f12c8a15a3075b4l,
-        0x084cf9889ed20c41l },
-      0 },
-    /* 120 */
-    { { 0xd98509defca6becfl,0x2fceae807dffb328l,0x5d8a15c44778e8b9l,
-        0xd57955b273abf77el },
-      { 0x210da79e31b5d4f1l,0xaa52f04b3cfa7a1cl,0xd4d12089dc27c20bl,
-        0x8e14ea4202d141f1l },
-      0 },
-    /* 121 */
-    { { 0xeed50345f2897042l,0x8d05331f43402c4al,0xc8d9c194c8bdfb21l,
-        0x597e1a372aa4d158l },
-      { 0x0327ec1acf0bd68cl,0x6d4be0dcab024945l,0x5b9c8d7ac9fe3e84l,
-        0xca3f0236199b4deal },
-      0 },
-    /* 122 */
-    { { 0x592a10b56170bd20l,0x0ea897f16d3f5de7l,0xa3363ff144b2ade2l,
-        0xbde7fd7e309c07e4l },
-      { 0x516bb6d2b8f5432cl,0x210dc1cbe043444bl,0x3db01e6ff8f95b5al,
-        0xb623ad0e0a7dd198l },
-      0 },
-    /* 123 */
-    { { 0xa75bd67560c7b65bl,0xab8c559023a4a289l,0xf8220fd0d7b26795l,
-        0xd6aa2e4658ec137bl },
-      { 0x10abc00b5138bb85l,0x8c31d121d833a95cl,0xb24ff00b1702a32el,
-        0x111662e02dcc513al },
-      0 },
-    /* 124 */
-    { { 0x78114015efb42b87l,0xbd9f5d701b6c4dffl,0x66ecccd7a7d7c129l,
-        0xdb3ee1cb94b750f8l },
-      { 0xb26f3db0f34837cfl,0xe7eed18bb9578d4fl,0x5d2cdf937c56657dl,
-        0x886a644252206a59l },
-      0 },
-    /* 125 */
-    { { 0x3c234cfb65b569eal,0x20011141f72119c1l,0x8badc85da15a619el,
-        0xa70cf4eb018a17bcl },
-      { 0x224f97ae8c4a6a65l,0x36e5cf270134378fl,0xbe3a609e4f7e0960l,
-        0xaa4772abd1747b77l },
-      0 },
-    /* 126 */
-    { { 0x676761317aa60cc0l,0xc79163610368115fl,0xded98bb4bbc1bb5al,
-        0x611a6ddc30faf974l },
-      { 0x30e78cbcc15ee47al,0x2e8962824e0d96a5l,0x36f35adf3dd9ed88l,
-        0x5cfffaf816429c88l },
-      0 },
-    /* 127 */
-    { { 0xc0d54cff9b7a99cdl,0x7bf3b99d843c45a1l,0x038a908f62c739e1l,
-        0x6e5a6b237dc1994cl },
-      { 0xef8b454e0ba5db77l,0xb7b8807facf60d63l,0xe591c0c676608378l,
-        0x481a238d242dabccl },
-      0 },
-    /* 128 */
-    { { 0xe3417bc035d0b34al,0x440b386b8327c0a7l,0x8fb7262dac0362d1l,
-        0x2c41114ce0cdf943l },
-      { 0x2ba5cef1ad95a0b1l,0xc09b37a867d54362l,0x26d6cdd201e486c9l,
-        0x20477abf42ff9297l },
-      0 },
-    /* 129 */
-    { { 0x2f75173c18d65dbfl,0x77bf940e339edad8l,0x7022d26bdcf1001cl,
-        0xac66409ac77396b6l },
-      { 0x8b0bb36fc6261cc3l,0x213f7bc9190e7e90l,0x6541cebaa45e6c10l,
-        0xce8e6975cc122f85l },
-      0 },
-    /* 130 */
-    { { 0x0f121b41bc0a67d2l,0x62d4760a444d248al,0x0e044f1d659b4737l,
-        0x08fde365250bb4a8l },
-      { 0xaceec3da848bf287l,0xc2a62182d3369d6el,0x3582dfdc92449482l,
-        0x2f7e2fd2565d6cd7l },
-      0 },
-    /* 131 */
-    { { 0xae4b92dbc3770fa7l,0x095e8d5c379043f9l,0x54f34e9d17761171l,
-        0xc65be92e907702ael },
-      { 0x2758a303f6fd0a40l,0xe7d822e3bcce784bl,0x7ae4f5854f9767bfl,
-        0x4bff8e47d1193b3al },
-      0 },
-    /* 132 */
-    { { 0xcd41d21f00ff1480l,0x2ab8fb7d0754db16l,0xac81d2efbbe0f3eal,
-        0x3e4e4ae65772967dl },
-      { 0x7e18f36d3c5303e6l,0x3bd9994b92262397l,0x9ed70e261324c3c0l,
-        0x5388aefd58ec6028l },
-      0 },
-    /* 133 */
-    { { 0xad1317eb5e5d7713l,0x09b985ee75de49dal,0x32f5bc4fc74fb261l,
-        0x5cf908d14f75be0el },
-      { 0x760435108e657b12l,0xbfd421a5b96ed9e6l,0x0e29f51f8970ccc2l,
-        0xa698ba4060f00ce2l },
-      0 },
-    /* 134 */
-    { { 0x73db1686ef748fecl,0xe6e755a27e9d2cf9l,0x630b6544ce265effl,
-        0xb142ef8a7aebad8dl },
-      { 0xad31af9f17d5770al,0x66af3b672cb3412fl,0x6bd60d1bdf3359del,
-        0xd1896a9658515075l },
-      0 },
-    /* 135 */
-    { { 0xec5957ab33c41c08l,0x87de94ac5468e2e1l,0x18816b73ac472f6cl,
-        0x267b0e0b7981da39l },
-      { 0x6e554e5d8e62b988l,0xd8ddc755116d21e7l,0x4610faf03d2a6f99l,
-        0xb54e287aa1119393l },
-      0 },
-    /* 136 */
-    { { 0x0a0122b5178a876bl,0x51ff96ff085104b4l,0x050b31ab14f29f76l,
-        0x84abb28b5f87d4e6l },
-      { 0xd5ed439f8270790al,0x2d6cb59d85e3f46bl,0x75f55c1b6c1e2212l,
-        0xe5436f6717655640l },
-      0 },
-    /* 137 */
-    { { 0x53f9025e2286e8d5l,0x353c95b4864453bel,0xd832f5bde408e3a0l,
-        0x0404f68b5b9ce99el },
-      { 0xcad33bdea781e8e5l,0x3cdf5018163c2f5bl,0x575769600119caa3l,
-        0x3a4263df0ac1c701l },
-      0 },
-    /* 138 */
-    { { 0xc2965ecc9aeb596dl,0x01ea03e7023c92b4l,0x4704b4b62e013961l,
-        0x0ca8fd3f905ea367l },
-      { 0x92523a42551b2b61l,0x1eb7a89c390fcd06l,0xe7f1d2be0392a63el,
-        0x96dca2644ddb0c33l },
-      0 },
-    /* 139 */
-    { { 0x203bb43a387510afl,0x846feaa8a9a36a01l,0xd23a57702f950378l,
-        0x4363e2123aad59dcl },
-      { 0xca43a1c740246a47l,0xb362b8d2e55dd24dl,0xf9b086045d8faf96l,
-        0x840e115cd8bb98c4l },
-      0 },
-    /* 140 */
-    { { 0xf12205e21023e8a7l,0xc808a8cdd8dc7a0bl,0xe292a272163a5ddfl,
-        0x5e0d6abd30ded6d4l },
-      { 0x07a721c27cfc0f64l,0x42eec01d0e55ed88l,0x26a7bef91d1f9db2l,
-        0x7dea48f42945a25al },
-      0 },
-    /* 141 */
-    { { 0xabdf6f1ce5060a81l,0xe79f9c72f8f95615l,0xcfd36c5406ac268bl,
-        0xabc2a2beebfd16d1l },
-      { 0x8ac66f91d3e2eac7l,0x6f10ba63d2dd0466l,0x6790e3770282d31bl,
-        0x4ea353946c7eefc1l },
-      0 },
-    /* 142 */
-    { { 0xed8a2f8d5266309dl,0x0a51c6c081945a3el,0xcecaf45a578c5dc1l,
-        0x3a76e6891c94ffc3l },
-      { 0x9aace8a47d7b0d0fl,0x963ace968f584a5fl,0x51a30c724e697fbel,
-        0x8212a10a465e6464l },
-      0 },
-    /* 143 */
-    { { 0xef7c61c3cfab8caal,0x18eb8e840e142390l,0xcd1dff677e9733cal,
-        0xaa7cab71599cb164l },
-      { 0x02fc9273bc837bd1l,0xc06407d0c36af5d7l,0x17621292f423da49l,
-        0x40e38073fe0617c3l },
-      0 },
-    /* 144 */
-    { { 0xf4f80824a7bf9b7cl,0x365d23203fbe30d0l,0xbfbe532097cf9ce3l,
-        0xe3604700b3055526l },
-      { 0x4dcb99116cc6c2c7l,0x72683708ba4cbee6l,0xdcded434637ad9ecl,
-        0x6542d677a3dee15fl },
-      0 },
-    /* 145 */
-    { { 0x3f32b6d07b6c377al,0x6cb03847903448bel,0xd6fdd3a820da8af7l,
-        0xa6534aee09bb6f21l },
-      { 0x30a1780d1035facfl,0x35e55a339dcb47e6l,0x6ea50fe1c447f393l,
-        0xf3cb672fdc9aef22l },
-      0 },
-    /* 146 */
-    { { 0xeb3719fe3b55fd83l,0xe0d7a46c875ddd10l,0x33ac9fa905cea784l,
-        0x7cafaa2eaae870e7l },
-      { 0x9b814d041d53b338l,0xe0acc0a0ef87e6c6l,0xfb93d10811672b0fl,
-        0x0aab13c1b9bd522el },
-      0 },
-    /* 147 */
-    { { 0xddcce278d2681297l,0xcb350eb1b509546al,0x2dc431737661aaf2l,
-        0x4b91a602847012e9l },
-      { 0xdcff109572f8ddcfl,0x08ebf61e9a911af4l,0x48f4360ac372430el,
-        0x49534c5372321cabl },
-      0 },
-    /* 148 */
-    { { 0x83df7d71f07b7e9dl,0xa478efa313cd516fl,0x78ef264b6c047ee3l,
-        0xcaf46c4fd65ac5eel },
-      { 0xa04d0c7792aa8266l,0xedf45466913684bbl,0x56e65168ae4b16b0l,
-        0x14ce9e5704c6770fl },
-      0 },
-    /* 149 */
-    { { 0x99445e3e965e8f91l,0xd3aca1bacb0f2492l,0xd31cc70f90c8a0a0l,
-        0x1bb708a53e4c9a71l },
-      { 0xd5ca9e69558bdd7al,0x734a0508018a26b1l,0xb093aa714c9cf1ecl,
-        0xf9d126f2da300102l },
-      0 },
-    /* 150 */
-    { { 0x749bca7aaff9563el,0xdd077afeb49914a0l,0xe27a0311bf5f1671l,
-        0x807afcb9729ecc69l },
-      { 0x7f8a9337c9b08b77l,0x86c3a785443c7e38l,0x85fafa59476fd8bal,
-        0x751adcd16568cd8cl },
-      0 },
-    /* 151 */
-    { { 0x8aea38b410715c0dl,0xd113ea718f7697f7l,0x665eab1493fbf06dl,
-        0x29ec44682537743fl },
-      { 0x3d94719cb50bebbcl,0x399ee5bfe4505422l,0x90cd5b3a8d2dedb1l,
-        0xff9370e392a4077dl },
-      0 },
-    /* 152 */
-    { { 0x59a2d69bc6b75b65l,0x4188f8d5266651c5l,0x28a9f33e3de9d7d2l,
-        0x9776478ba2a9d01al },
-      { 0x8852622d929af2c7l,0x334f5d6d4e690923l,0xce6cc7e5a89a51e9l,
-        0x74a6313fac2f82fal },
-      0 },
-    /* 153 */
-    { { 0xb2f4dfddb75f079cl,0x85b07c9518e36fbbl,0x1b6cfcf0e7cd36ddl,
-        0xab75be150ff4863dl },
-      { 0x81b367c0173fc9b7l,0xb90a7420d2594fd0l,0x15fdbf03c4091236l,
-        0x4ebeac2e0b4459f6l },
-      0 },
-    /* 154 */
-    { { 0xeb6c5fe75c9f2c53l,0xd25220118eae9411l,0xc8887633f95ac5d8l,
-        0xdf99887b2c1baffcl },
-      { 0xbb78eed2850aaecbl,0x9d49181b01d6a272l,0x978dd511b1cdbcacl,
-        0x27b040a7779f4058l },
-      0 },
-    /* 155 */
-    { { 0x90405db7f73b2eb2l,0xe0df85088e1b2118l,0x501b71525962327el,
-        0xb393dd37e4cfa3f5l },
-      { 0xa1230e7b3fd75165l,0xd66344c2bcd33554l,0x6c36f1be0f7b5022l,
-        0x09588c12d0463419l },
-      0 },
-    /* 156 */
-    { { 0xe086093f02601c3bl,0xfb0252f8cf5c335fl,0x955cf280894aff28l,
-        0x81c879a9db9f648bl },
-      { 0x040e687cc6f56c51l,0xfed471693f17618cl,0x44f88a419059353bl,
-        0xfa0d48f55fc11bc4l },
-      0 },
-    /* 157 */
-    { { 0xbc6e1c9de1608e4dl,0x010dda113582822cl,0xf6b7ddc1157ec2d7l,
-        0x8ea0e156b6a367d6l },
-      { 0xa354e02f2383b3b4l,0x69966b943f01f53cl,0x4ff6632b2de03ca5l,
-        0x3f5ab924fa00b5acl },
-      0 },
-    /* 158 */
-    { { 0x337bb0d959739efbl,0xc751b0f4e7ebec0dl,0x2da52dd6411a67d1l,
-        0x8bc768872b74256el },
-      { 0xa5be3b7282d3d253l,0xa9f679a1f58d779fl,0xa1cac168e16767bbl,
-        0xb386f19060fcf34fl },
-      0 },
-    /* 159 */
-    { { 0x31f3c1352fedcfc2l,0x5396bf6262f8af0dl,0x9a02b4eae57288c2l,
-        0x4cb460f71b069c4dl },
-      { 0xae67b4d35b8095eal,0x92bbf8596fc07603l,0xe1475f66b614a165l,
-        0x52c0d50895ef5223l },
-      0 },
-    /* 160 */
-    { { 0x231c210e15339848l,0xe87a28e870778c8dl,0x9d1de6616956e170l,
-        0x4ac3c9382bb09c0bl },
-      { 0x19be05516998987dl,0x8b2376c4ae09f4d6l,0x1de0b7651a3f933dl,
-        0x380d94c7e39705f4l },
-      0 },
-    /* 161 */
-    { { 0x01a355aa81542e75l,0x96c724a1ee01b9b7l,0x6b3a2977624d7087l,
-        0x2ce3e171de2637afl },
-      { 0xcfefeb49f5d5bc1al,0xa655607e2777e2b5l,0x4feaac2f9513756cl,
-        0x2e6cd8520b624e4dl },
-      0 },
-    /* 162 */
-    { { 0x3685954b8c31c31dl,0x68533d005bf21a0cl,0x0bd7626e75c79ec9l,
-        0xca17754742c69d54l },
-      { 0xcc6edafff6d2dbb2l,0xfd0d8cbd174a9d18l,0x875e8793aa4578e8l,
-        0xa976a7139cab2ce6l },
-      0 },
-    /* 163 */
-    { { 0x0a651f1b93fb353dl,0xd75cab8b57fcfa72l,0xaa88cfa731b15281l,
-        0x8720a7170a1f4999l },
-      { 0x8c3e8d37693e1b90l,0xd345dc0b16f6dfc3l,0x8ea8d00ab52a8742l,
-        0x9719ef29c769893cl },
-      0 },
-    /* 164 */
-    { { 0x820eed8d58e35909l,0x9366d8dc33ddc116l,0xd7f999d06e205026l,
-        0xa5072976e15704c1l },
-      { 0x002a37eac4e70b2el,0x84dcf6576890aa8al,0xcd71bf18645b2a5cl,
-        0x99389c9df7b77725l },
-      0 },
-    /* 165 */
-    { { 0x238c08f27ada7a4bl,0x3abe9d03fd389366l,0x6b672e89766f512cl,
-        0xa88806aa202c82e4l },
-      { 0x6602044ad380184el,0xa8cb78c4126a8b85l,0x79d670c0ad844f17l,
-        0x0043bffb4738dcfel },
-      0 },
-    /* 166 */
-    { { 0x8d59b5dc36d5192el,0xacf885d34590b2afl,0x83566d0a11601781l,
-        0x52f3ef01ba6c4866l },
-      { 0x3986732a0edcb64dl,0x0a482c238068379fl,0x16cbe5fa7040f309l,
-        0x3296bd899ef27e75l },
-      0 },
-    /* 167 */
-    { { 0x476aba89454d81d7l,0x9eade7ef51eb9b3cl,0x619a21cd81c57986l,
-        0x3b90febfaee571e9l },
-      { 0x9393023e5496f7cbl,0x55be41d87fb51bc4l,0x03f1dd4899beb5cel,
-        0x6e88069d9f810b18l },
-      0 },
-    /* 168 */
-    { { 0xce37ab11b43ea1dbl,0x0a7ff1a95259d292l,0x851b02218f84f186l,
-        0xa7222beadefaad13l },
-      { 0xa2ac78ec2b0a9144l,0x5a024051f2fa59c5l,0x91d1eca56147ce38l,
-        0xbe94d523bc2ac690l },
-      0 },
-    /* 169 */
-    { { 0x72f4945e0b226ce7l,0xb8afd747967e8b70l,0xedea46f185a6c63el,
-        0x7782defe9be8c766l },
-      { 0x760d2aa43db38626l,0x460ae78776f67ad1l,0x341b86fc54499cdbl,
-        0x03838567a2892e4bl },
-      0 },
-    /* 170 */
-    { { 0x2d8daefd79ec1a0fl,0x3bbcd6fdceb39c97l,0xf5575ffc58f61a95l,
-        0xdbd986c4adf7b420l },
-      { 0x81aa881415f39eb7l,0x6ee2fcf5b98d976cl,0x5465475dcf2f717dl,
-        0x8e24d3c46860bbd0l },
-      0 },
-    /* 171 */
-    { { 0x749d8e549a587390l,0x12bb194f0cbec588l,0x46e07da4b25983c6l,
-        0x541a99c4407bafc8l },
-      { 0xdb241692624c8842l,0x6044c12ad86c05ffl,0xc59d14b44f7fcf62l,
-        0xc0092c49f57d35d1l },
-      0 },
-    /* 172 */
-    { { 0xd3cc75c3df2e61efl,0x7e8841c82e1b35cal,0xc62d30d1909f29f4l,
-        0x75e406347286944dl },
-      { 0xe7d41fc5bbc237d0l,0xc9537bf0ec4f01c9l,0x91c51a16282bd534l,
-        0x5b7cb658c7848586l },
-      0 },
-    /* 173 */
-    { { 0x964a70848a28ead1l,0x802dc508fd3b47f6l,0x9ae4bfd1767e5b39l,
-        0x7ae13eba8df097a1l },
-      { 0xfd216ef8eadd384el,0x0361a2d9b6b2ff06l,0x204b98784bcdb5f3l,
-        0x787d8074e2a8e3fdl },
-      0 },
-    /* 174 */
-    { { 0xc5e25d6b757fbb1cl,0xe47bddb2ca201debl,0x4a55e9a36d2233ffl,
-        0x5c2228199ef28484l },
-      { 0x773d4a8588315250l,0x21b21a2b827097c1l,0xab7c4ea1def5d33fl,
-        0xe45d37abbaf0f2b0l },
-      0 },
-    /* 175 */
-    { { 0xd2df1e3428511c8al,0xebb229c8bdca6cd3l,0x578a71a7627c39a7l,
-        0xed7bc12284dfb9d3l },
-      { 0xcf22a6df93dea561l,0x5443f18dd48f0ed1l,0xd8b861405bad23e8l,
-        0xaac97cc945ca6d27l },
-      0 },
-    /* 176 */
-    { { 0xeb54ea74a16bd00al,0xd839e9adf5c0bcc1l,0x092bb7f11f9bfc06l,
-        0x318f97b31163dc4el },
-      { 0xecc0c5bec30d7138l,0x44e8df23abc30220l,0x2bb7972fb0223606l,
-        0xfa41faa19a84ff4dl },
-      0 },
-    /* 177 */
-    { { 0x4402d974a6642269l,0xc81814ce9bb783bdl,0x398d38e47941e60bl,
-        0x38bb6b2c1d26e9e2l },
-      { 0xc64e4a256a577f87l,0x8b52d253dc11fe1cl,0xff336abf62280728l,
-        0x94dd0905ce7601a5l },
-      0 },
-    /* 178 */
-    { { 0x156cf7dcde93f92al,0xa01333cb89b5f315l,0x02404df9c995e750l,
-        0x92077867d25c2ae9l },
-      { 0xe2471e010bf39d44l,0x5f2c902096bb53d7l,0x4c44b7b35c9c3d8fl,
-        0x81e8428bd29beb51l },
-      0 },
-    /* 179 */
-    { { 0x6dd9c2bac477199fl,0x8cb8eeee6b5ecdd9l,0x8af7db3fee40fd0el,
-        0x1b94ab62dbbfa4b1l },
-      { 0x44f0d8b3ce47f143l,0x51e623fc63f46163l,0xf18f270fcc599383l,
-        0x06a38e28055590eel },
-      0 },
-    /* 180 */
-    { { 0x2e5b0139b3355b49l,0x20e26560b4ebf99bl,0xc08ffa6bd269f3dcl,
-        0xa7b36c2083d9d4f8l },
-      { 0x64d15c3a1b3e8830l,0xd5fceae1a89f9c0bl,0xcfeee4a2e2d16930l,
-        0xbe54c6b4a2822a20l },
-      0 },
-    /* 181 */
-    { { 0xd6cdb3df8d91167cl,0x517c3f79e7a6625el,0x7105648f346ac7f4l,
-        0xbf30a5abeae022bbl },
-      { 0x8e7785be93828a68l,0x5161c3327f3ef036l,0xe11b5feb592146b2l,
-        0xd1c820de2732d13al },
-      0 },
-    /* 182 */
-    { { 0x043e13479038b363l,0x58c11f546b05e519l,0x4fe57abe6026cad1l,
-        0xb7d17bed68a18da3l },
-      { 0x44ca5891e29c2559l,0x4f7a03765bfffd84l,0x498de4af74e46948l,
-        0x3997fd5e6412cc64l },
-      0 },
-    /* 183 */
-    { { 0xf20746828bd61507l,0x29e132d534a64d2al,0xffeddfb08a8a15e3l,
-        0x0eeb89293c6c13e8l },
-      { 0xe9b69a3ea7e259f8l,0xce1db7e6d13e7e67l,0x277318f6ad1fa685l,
-        0x228916f8c922b6efl },
-      0 },
-    /* 184 */
-    { { 0x959ae25b0a12ab5bl,0xcc11171f957bc136l,0x8058429ed16e2b0cl,
-        0xec05ad1d6e93097el },
-      { 0x157ba5beac3f3708l,0x31baf93530b59d77l,0x47b55237118234e5l,
-        0x7d3141567ff11b37l },
-      0 },
-    /* 185 */
-    { { 0x7bd9c05cf6dfefabl,0xbe2f2268dcb37707l,0xe53ead973a38bb95l,
-        0xe9ce66fc9bc1d7a3l },
-      { 0x75aa15766f6a02a1l,0x38c087df60e600edl,0xf8947f3468cdc1b9l,
-        0xd9650b0172280651l },
-      0 },
-    /* 186 */
-    { { 0x504b4c4a5a057e60l,0xcbccc3be8def25e4l,0xa635320817c1ccbdl,
-        0x14d6699a804eb7a2l },
-      { 0x2c8a8415db1f411al,0x09fbaf0bf80d769cl,0xb4deef901c2f77adl,
-        0x6f4c68410d43598al },
-      0 },
-    /* 187 */
-    { { 0x8726df4e96c24a96l,0x534dbc85fcbd99a3l,0x3c466ef28b2ae30al,
-        0x4c4350fd61189abbl },
-      { 0x2967f716f855b8dal,0x41a42394463c38a1l,0xc37e1413eae93343l,
-        0xa726d2425a3118b5l },
-      0 },
-    /* 188 */
-    { { 0xdae6b3ee948c1086l,0xf1de503dcbd3a2e1l,0x3f35ed3f03d022f3l,
-        0x13639e82cc6cf392l },
-      { 0x9ac938fbcdafaa86l,0xf45bc5fb2654a258l,0x1963b26e45051329l,
-        0xca9365e1c1a335a3l },
-      0 },
-    /* 189 */
-    { { 0x3615ac754c3b2d20l,0x742a5417904e241bl,0xb08521c4cc9d071dl,
-        0x9ce29c34970b72a5l },
-      { 0x8cc81f736d3e0ad6l,0x8060da9ef2f8434cl,0x35ed1d1a6ce862d9l,
-        0x48c4abd7ab42af98l },
-      0 },
-    /* 190 */
-    { { 0xd221b0cc40c7485al,0xead455bbe5274dbfl,0x493c76989263d2e8l,
-        0x78017c32f67b33cbl },
-      { 0xb9d35769930cb5eel,0xc0d14e940c408ed2l,0xf8b7bf55272f1a4dl,
-        0x53cd0454de5c1c04l },
-      0 },
-    /* 191 */
-    { { 0xbcd585fa5d28ccacl,0x5f823e56005b746el,0x7c79f0a1cd0123aal,
-        0xeea465c1d3d7fa8fl },
-      { 0x7810659f0551803bl,0x6c0b599f7ce6af70l,0x4195a77029288e70l,
-        0x1b6e42a47ae69193l },
-      0 },
-    /* 192 */
-    { { 0x2e80937cf67d04c3l,0x1e312be289eeb811l,0x56b5d88792594d60l,
-        0x0224da14187fbd3dl },
-      { 0x87abb8630c5fe36fl,0x580f3c604ef51f5fl,0x964fb1bfb3b429ecl,
-        0x60838ef042bfff33l },
-      0 },
-    /* 193 */
-    { { 0x432cb2f27e0bbe99l,0x7bda44f304aa39eel,0x5f497c7a9fa93903l,
-        0x636eb2022d331643l },
-      { 0xfcfd0e6193ae00aal,0x875a00fe31ae6d2fl,0xf43658a29f93901cl,
-        0x8844eeb639218bacl },
-      0 },
-    /* 194 */
-    { { 0x114171d26b3bae58l,0x7db3df7117e39f3el,0xcd37bc7f81a8eadal,
-        0x27ba83dc51fb789el },
-      { 0xa7df439ffbf54de5l,0x7277030bb5fe1a71l,0x42ee8e35db297a48l,
-        0xadb62d3487f3a4abl },
-      0 },
-    /* 195 */
-    { { 0x9b1168a2a175df2al,0x082aa04f618c32e9l,0xc9e4f2e7146b0916l,
-        0xb990fd7675e7c8b2l },
-      { 0x0829d96b4df37313l,0x1c205579d0b40789l,0x66c9ae4a78087711l,
-        0x81707ef94d10d18dl },
-      0 },
-    /* 196 */
-    { { 0x97d7cab203d6ff96l,0x5b851bfc0d843360l,0x268823c4d042db4bl,
-        0x3792daead5a8aa5cl },
-      { 0x52818865941afa0bl,0xf3e9e74142d83671l,0x17c825275be4e0a7l,
-        0x5abd635e94b001bal },
-      0 },
-    /* 197 */
-    { { 0x727fa84e0ac4927cl,0xe3886035a7c8cf23l,0xa4bcd5ea4adca0dfl,
-        0x5995bf21846ab610l },
-      { 0xe90f860b829dfa33l,0xcaafe2ae958fc18bl,0x9b3baf4478630366l,
-        0x44c32ca2d483411el },
-      0 },
-    /* 198 */
-    { { 0xa74a97f1e40ed80cl,0x5f938cb131d2ca82l,0x53f2124b7c2d6ad9l,
-        0x1f2162fb8082a54cl },
-      { 0x7e467cc5720b173el,0x40e8a666085f12f9l,0x8cebc20e4c9d65dcl,
-        0x8f1d402bc3e907c9l },
-      0 },
-    /* 199 */
-    { { 0x4f592f9cfbc4058al,0xb15e14b6292f5670l,0xc55cfe37bc1d8c57l,
-        0xb1980f43926edbf9l },
-      { 0x98c33e0932c76b09l,0x1df5279d33b07f78l,0x6f08ead4863bb461l,
-        0x2828ad9b37448e45l },
-      0 },
-    /* 200 */
-    { { 0x696722c4c4cf4ac5l,0xf5ac1a3fdde64afbl,0x0551baa2e0890832l,
-        0x4973f1275a14b390l },
-      { 0xe59d8335322eac5dl,0x5e07eef50bd9b568l,0xab36720fa2588393l,
-        0x6dac8ed0db168ac7l },
-      0 },
-    /* 201 */
-    { { 0xf7b545aeeda835efl,0x4aa113d21d10ed51l,0x035a65e013741b09l,
-        0x4b23ef5920b9de4cl },
-      { 0xe82bb6803c4c7341l,0xd457706d3f58bc37l,0x73527863a51e3ee8l,
-        0x4dd71534ddf49a4el },
-      0 },
-    /* 202 */
-    { { 0xbf94467295476cd9l,0x648d072fe31a725bl,0x1441c8b8fc4b67e0l,
-        0xfd3170002f4a4dbbl },
-      { 0x1cb43ff48995d0e1l,0x76e695d10ef729aal,0xe0d5f97641798982l,
-        0x14fac58c9569f365l },
-      0 },
-    /* 203 */
-    { { 0xad9a0065f312ae18l,0x51958dc0fcc93fc9l,0xd9a142408a7d2846l,
-        0xed7c765136abda50l },
-      { 0x46270f1a25d4abbcl,0x9b5dd8f3f1a113eal,0xc609b0755b51952fl,
-        0xfefcb7f74d2e9f53l },
-      0 },
-    /* 204 */
-    { { 0xbd09497aba119185l,0xd54e8c30aac45ba4l,0x492479deaa521179l,
-        0x1801a57e87e0d80bl },
-      { 0x073d3f8dfcafffb0l,0x6cf33c0bae255240l,0x781d763b5b5fdfbcl,
-        0x9f8fc11e1ead1064l },
-      0 },
-    /* 205 */
-    { { 0x1583a1715e69544cl,0x0eaf8567f04b7813l,0x1e22a8fd278a4c32l,
-        0xa9d3809d3d3a69a9l },
-      { 0x936c2c2c59a2da3bl,0x38ccbcf61895c847l,0x5e65244e63d50869l,
-        0x3006b9aee1178ef7l },
-      0 },
-    /* 206 */
-    { { 0x0bb1f2b0c9eead28l,0x7eef635d89f4dfbcl,0x074757fdb2ce8939l,
-        0x0ab85fd745f8f761l },
-      { 0xecda7c933e5b4549l,0x4be2bb5c97922f21l,0x261a1274b43b8040l,
-        0xb122d67511e942c2l },
-      0 },
-    /* 207 */
-    { { 0x3be607be66a5ae7al,0x01e703fa76adcbe3l,0xaf9043014eb6e5c5l,
-        0x9f599dc1097dbaecl },
-      { 0x6d75b7180ff250edl,0x8eb91574349a20dcl,0x425605a410b227a3l,
-        0x7d5528e08a294b78l },
-      0 },
-    /* 208 */
-    { { 0xf0f58f6620c26defl,0x025585ea582b2d1el,0xfbe7d79b01ce3881l,
-        0x28ccea01303f1730l },
-      { 0xd1dabcd179644ba5l,0x1fc643e806fff0b8l,0xa60a76fc66b3e17bl,
-        0xc18baf48a1d013bfl },
-      0 },
-    /* 209 */
-    { { 0x34e638c85dc4216dl,0x00c01067206142acl,0xd453a17195f5064al,
-        0x9def809db7a9596bl },
-      { 0x41e8642e67ab8d2cl,0xb42404336237a2b6l,0x7d506a6d64c4218bl,
-        0x0357f8b068808ce5l },
-      0 },
-    /* 210 */
-    { { 0x8e9dbe644cd2cc88l,0xcc61c28df0b8f39dl,0x4a309874cd30a0c8l,
-        0xe4a01add1b489887l },
-      { 0x2ed1eeacf57cd8f9l,0x1b767d3ebd594c48l,0xa7295c717bd2f787l,
-        0x466d7d79ce10cc30l },
-      0 },
-    /* 211 */
-    { { 0x47d318929dada2c7l,0x4fa0a6c38f9aa27dl,0x90e4fd28820a59e1l,
-        0xc672a522451ead1al },
-      { 0x30607cc85d86b655l,0xf0235d3bf9ad4af1l,0x99a08680571172a6l,
-        0x5e3d64faf2a67513l },
-      0 },
-    /* 212 */
-    { { 0xaa6410c79b3b4416l,0xcd8fcf85eab26d99l,0x5ebff74adb656a74l,
-        0x6c8a7a95eb8e42fcl },
-      { 0x10c60ba7b02a63bdl,0x6b2f23038b8f0047l,0x8c6c3738312d90b0l,
-        0x348ae422ad82ca91l },
-      0 },
-    /* 213 */
-    { { 0x7f4746635ccda2fbl,0x22accaa18e0726d2l,0x85adf782492b1f20l,
-        0xc1074de0d9ef2d2el },
-      { 0xfcf3ce44ae9a65b3l,0xfd71e4ac05d7151bl,0xd4711f50ce6a9788l,
-        0xfbadfbdbc9e54ffcl },
-      0 },
-    /* 214 */
-    { { 0x1713f1cd20a99363l,0xb915658f6cf22775l,0x968175cd24d359b2l,
-        0xb7f976b483716fcdl },
-      { 0x5758e24d5d6dbf74l,0x8d23bafd71c3af36l,0x48f477600243dfe3l,
-        0xf4d41b2ecafcc805l },
-      0 },
-    /* 215 */
-    { { 0x51f1cf28fdabd48dl,0xce81be3632c078a4l,0x6ace2974117146e9l,
-        0x180824eae0160f10l },
-      { 0x0387698b66e58358l,0x63568752ce6ca358l,0x82380e345e41e6c5l,
-        0x67e5f63983cf6d25l },
-      0 },
-    /* 216 */
-    { { 0xf89ccb8dcf4899efl,0x949015f09ebb44c0l,0x546f9276b2598ec9l,
-        0x9fef789a04c11fc6l },
-      { 0x6d367ecf53d2a071l,0xb10e1a7fa4519b09l,0xca6b3fb0611e2eefl,
-        0xbc80c181a99c4e20l },
-      0 },
-    /* 217 */
-    { { 0x972536f8e5eb82e6l,0x1a484fc7f56cb920l,0xc78e217150b5da5el,
-        0x49270e629f8cdf10l },
-      { 0x1a39b7bbea6b50adl,0x9a0284c1a2388ffcl,0x5403eb178107197bl,
-        0xd2ee52f961372f7fl },
-      0 },
-    /* 218 */
-    { { 0xd37cd28588e0362al,0x442fa8a78fa5d94dl,0xaff836e5a434a526l,
-        0xdfb478bee5abb733l },
-      { 0xa91f1ce7673eede6l,0xa5390ad42b5b2f04l,0x5e66f7bf5530da2fl,
-        0xd9a140b408df473al },
-      0 },
-    /* 219 */
-    { { 0x0e0221b56e8ea498l,0x623478293563ee09l,0xe06b8391335d2adel,
-        0x760c058d623f4b1al },
-      { 0x0b89b58cc198aa79l,0xf74890d2f07aba7fl,0x4e204110fde2556al,
-        0x7141982d8f190409l },
-      0 },
-    /* 220 */
-    { { 0x6f0a0e334d4b0f45l,0xd9280b38392a94e1l,0x3af324c6b3c61d5el,
-        0x3af9d1ce89d54e47l },
-      { 0xfd8f798120930371l,0xeda2664c21c17097l,0x0e9545dcdc42309bl,
-        0xb1f815c373957dd6l },
-      0 },
-    /* 221 */
-    { { 0x84faa78e89fec44al,0xc8c2ae473caa4cafl,0x691c807dc1b6a624l,
-        0xa41aed141543f052l },
-      { 0x424353997d5ffe04l,0x8bacb2df625b6e20l,0x85d660be87817775l,
-        0xd6e9c1dd86fb60efl },
-      0 },
-    /* 222 */
-    { { 0x3aa2e97ec6853264l,0x771533b7e2304a0bl,0x1b912bb7b8eae9bel,
-        0x9c9c6e10ae9bf8c2l },
-      { 0xa2309a59e030b74cl,0x4ed7494d6a631e90l,0x89f44b23a49b79f2l,
-        0x566bd59640fa61b6l },
-      0 },
-    /* 223 */
-    { { 0x066c0118c18061f3l,0x190b25d37c83fc70l,0xf05fc8e027273245l,
-        0xcf2c7390f525345el },
-      { 0xa09bceb410eb30cfl,0xcfd2ebba0d77703al,0xe842c43a150ff255l,
-        0x02f517558aa20979l },
-      0 },
-    /* 224 */
-    { { 0x396ef794addb7d07l,0x0b4fc74224455500l,0xfaff8eacc78aa3cel,
-        0x14e9ada5e8d4d97dl },
-      { 0xdaa480a12f7079e2l,0x45baa3cde4b0800el,0x01765e2d7838157dl,
-        0xa0ad4fab8e9d9ae8l },
-      0 },
-    /* 225 */
-    { { 0x0bfb76214a653618l,0x1872813c31eaaa5fl,0x1553e73744949d5el,
-        0xbcd530b86e56ed1el },
-      { 0x169be85332e9c47bl,0xdc2776feb50059abl,0xcdba9761192bfbb4l,
-        0x909283cf6979341dl },
-      0 },
-    /* 226 */
-    { { 0x67b0032476e81a13l,0x9bee1a9962171239l,0x08ed361bd32e19d6l,
-        0x35eeb7c9ace1549al },
-      { 0x1280ae5a7e4e5bdcl,0x2dcd2cd3b6ceec6el,0x52e4224c6e266bc1l,
-        0x9a8b2cf4448ae864l },
-      0 },
-    /* 227 */
-    { { 0xf6471bf209d03b59l,0xc90e62a3b65af2abl,0xff7ff168ebd5eec9l,
-        0x6bdb60f4d4491379l },
-      { 0xdadafebc8a55bc30l,0xc79ead1610097fe0l,0x42e197414c1e3bddl,
-        0x01ec3cfd94ba08a9l },
-      0 },
-    /* 228 */
-    { { 0xba6277ebdc9485c2l,0x48cc9a7922fb10c7l,0x4f61d60f70a28d8al,
-        0xd1acb1c0475464f6l },
-      { 0xd26902b126f36612l,0x59c3a44ee0618d8bl,0x4df8a813308357eel,
-        0x7dcd079d405626c2l },
-      0 },
-    /* 229 */
-    { { 0x5ce7d4d3f05a4b48l,0xadcd295237230772l,0xd18f7971812a915al,
-        0x0bf53589377d19b8l },
-      { 0x35ecd95a6c68ea73l,0xc7f3bbca823a584dl,0x9fb674c6f473a723l,
-        0xd28be4d9e16686fcl },
-      0 },
-    /* 230 */
-    { { 0x5d2b990638fa8e4bl,0x559f186e893fd8fcl,0x3a6de2aa436fb6fcl,
-        0xd76007aa510f88cel },
-      { 0x2d10aab6523a4988l,0xb455cf4474dd0273l,0x7f467082a3407278l,
-        0xf2b52f68b303bb01l },
-      0 },
-    /* 231 */
-    { { 0x0d57eafa9835b4cal,0x2d2232fcbb669cbcl,0x8eeeb680c6643198l,
-        0xd8dbe98ecc5aed3al },
-      { 0xcba9be3fc5a02709l,0x30be68e5f5ba1fa8l,0xfebd43cdf10ea852l,
-        0xe01593a3ee559705l },
-      0 },
-    /* 232 */
-    { { 0xd3e5af50ea75a0a6l,0x512226ac57858033l,0x6fe6d50fd0176406l,
-        0xafec07b1aeb8ef06l },
-      { 0x7fb9956780bb0a31l,0x6f1af3cc37309aael,0x9153a15a01abf389l,
-        0xa71b93546e2dbfddl },
-      0 },
-    /* 233 */
-    { { 0xbf8e12e018f593d2l,0xd1a90428a078122bl,0x150505db0ba4f2adl,
-        0x53a2005c628523d9l },
-      { 0x07c8b639e7f2b935l,0x2bff975ac182961al,0x86bceea77518ca2cl,
-        0xbf47d19b3d588e3dl },
-      0 },
-    /* 234 */
-    { { 0x672967a7dd7665d5l,0x4e3030572f2f4de5l,0x144005ae80d4903fl,
-        0x001c2c7f39c9a1b6l },
-      { 0x143a801469efc6d6l,0xc810bdaa7bc7a724l,0x5f65670ba78150a4l,
-        0xfdadf8e786ffb99bl },
-      0 },
-    /* 235 */
-    { { 0xfd38cb88ffc00785l,0x77fa75913b48eb67l,0x0454d055bf368fbcl,
-        0x3a838e4d5aa43c94l },
-      { 0x561663293e97bb9al,0x9eb93363441d94d9l,0x515591a60adb2a83l,
-        0x3cdb8257873e1da3l },
-      0 },
-    /* 236 */
-    { { 0x137140a97de77eabl,0xf7e1c50d41648109l,0x762dcad2ceb1d0dfl,
-        0x5a60cc89f1f57fbal },
-      { 0x80b3638240d45673l,0x1b82be195913c655l,0x057284b8dd64b741l,
-        0x922ff56fdbfd8fc0l },
-      0 },
-    /* 237 */
-    { { 0x1b265deec9a129a1l,0xa5b1ce57cc284e04l,0x04380c46cebfbe3cl,
-        0x72919a7df6c5cd62l },
-      { 0x298f453a8fb90f9al,0xd719c00b88e4031bl,0xe32c0e77796f1856l,
-        0x5e7917803624089al },
-      0 },
-    /* 238 */
-    { { 0x5c16ec557f63cdfbl,0x8e6a3571f1cae4fdl,0xfce26bea560597cal,
-        0x4e0a5371e24c2fabl },
-      { 0x276a40d3a5765357l,0x3c89af440d73a2b4l,0xb8f370ae41d11a32l,
-        0xf5ff7818d56604eel },
-      0 },
-    /* 239 */
-    { { 0xfbf3e3fe1a09df21l,0x26d5d28ee66e8e47l,0x2096bd0a29c89015l,
-        0xe41df0e9533f5e64l },
-      { 0x305fda40b3ba9e3fl,0xf2340ceb2604d895l,0x0866e1927f0367c7l,
-        0x8edd7d6eac4f155fl },
-      0 },
-    /* 240 */
-    { { 0xc9a1dc0e0bfc8ff3l,0x14efd82be936f42fl,0x67016f7ccca381efl,
-        0x1432c1caed8aee96l },
-      { 0xec68482970b23c26l,0xa64fe8730735b273l,0xe389f6e5eaef0f5al,
-        0xcaef480b5ac8d2c6l },
-      0 },
-    /* 241 */
-    { { 0x5245c97875315922l,0xd82951713063cca5l,0xf3ce60d0b64ef2cbl,
-        0xd0ba177e8efae236l },
-      { 0x53a9ae8fb1b3af60l,0x1a796ae53d2da20el,0x01d63605df9eef28l,
-        0xf31c957c1c54ae16l },
-      0 },
-    /* 242 */
-    { { 0xc0f58d5249cc4597l,0xdc5015b0bae0a028l,0xefc5fc55734a814al,
-        0x013404cb96e17c3al },
-      { 0xb29e2585c9a824bfl,0xd593185e001eaed7l,0x8d6ee68261ef68acl,
-        0x6f377c4b91933e6cl },
-      0 },
-    /* 243 */
-    { { 0x9f93bad1a8333fd2l,0xa89302025a2a95b8l,0x211e5037eaf75acel,
-        0x6dba3e4ed2d09506l },
-      { 0xa48ef98cd04399cdl,0x1811c66ee6b73adel,0x72f60752c17ecaf3l,
-        0xf13cf3423becf4a7l },
-      0 },
-    /* 244 */
-    { { 0xceeb9ec0a919e2ebl,0x83a9a195f62c0f68l,0xcfba3bb67aba2299l,
-        0xc83fa9a9274bbad3l },
-      { 0x0d7d1b0b62fa1ce0l,0xe58b60f53418efbfl,0xbfa8ef9e52706f04l,
-        0xb49d70f45d702683l },
-      0 },
-    /* 245 */
-    { { 0x914c7510fad5513bl,0x05f32eecb1751e2dl,0x6d850418d9fb9d59l,
-        0x59cfadbb0c30f1cfl },
-      { 0xe167ac2355cb7fd6l,0x249367b8820426a3l,0xeaeec58c90a78864l,
-        0x5babf362354a4b67l },
-      0 },
-    /* 246 */
-    { { 0x37c981d1ee424865l,0x8b002878f2e5577fl,0x702970f1b9e0c058l,
-        0x6188c6a79026c8f0l },
-      { 0x06f9a19bd0f244dal,0x1ecced5cfb080873l,0x35470f9b9f213637l,
-        0x993fe475df50b9d9l },
-      0 },
-    /* 247 */
-    { { 0x68e31cdf9b2c3609l,0x84eb19c02c46d4eal,0x7ac9ec1a9a775101l,
-        0x81f764664c80616bl },
-      { 0x1d7c2a5a75fbe978l,0x6743fed3f183b356l,0x838d1f04501dd2bfl,
-        0x564a812a5fe9060dl },
-      0 },
-    /* 248 */
-    { { 0x7a5a64f4fa817d1dl,0x55f96844bea82e0fl,0xb5ff5a0fcd57f9aal,
-        0x226bf3cf00e51d6cl },
-      { 0xd6d1a9f92f2833cfl,0x20a0a35a4f4f89a8l,0x11536c498f3f7f77l,
-        0x68779f47ff257836l },
-      0 },
-    /* 249 */
-    { { 0x79b0c1c173043d08l,0xa54467741fc020fal,0xd3767e289a6d26d0l,
-        0x97bcb0d1eb092e0bl },
-      { 0x2ab6eaa8f32ed3c3l,0xc8a4f151b281bc48l,0x4d1bf4f3bfa178f3l,
-        0xa872ffe80a784655l },
-      0 },
-    /* 250 */
-    { { 0xb1ab7935a32b2086l,0xe1eb710e8160f486l,0x9bd0cd913b6ae6bel,
-        0x02812bfcb732a36al },
-      { 0xa63fd7cacf605318l,0x646e5d50fdfd6d1dl,0xa1d683982102d619l,
-        0x07391cc9fe5396afl },
-      0 },
-    /* 251 */
-    { { 0xc50157f08b80d02bl,0x6b8333d162877f7fl,0x7aca1af878d542ael,
-        0x355d2adc7e6d2a08l },
-      { 0xb41f335a287386e1l,0xfd272a94f8e43275l,0x286ca2cde79989eal,
-        0x3dc2b1e37c2a3a79l },
-      0 },
-    /* 252 */
-    { { 0xd689d21c04581352l,0x0a00c825376782bel,0x203bd5909fed701fl,
-        0xc47869103ccd846bl },
-      { 0x5dba770824c768edl,0x72feea026841f657l,0x73313ed56accce0el,
-        0xccc42968d5bb4d32l },
-      0 },
-    /* 253 */
-    { { 0x94e50de13d7620b9l,0xd89a5c8a5992a56al,0xdc007640675487c9l,
-        0xe147eb42aa4871cfl },
-      { 0x274ab4eeacf3ae46l,0xfd4936fb50350fbel,0xdf2afe4748c840eal,
-        0x239ac047080e96e3l },
-      0 },
-    /* 254 */
-    { { 0x481d1f352bfee8d4l,0xce80b5cffa7b0fecl,0x105c4c9e2ce9af3cl,
-        0xc55fa1a3f5f7e59dl },
-      { 0x3186f14e8257c227l,0xc5b1653f342be00bl,0x09afc998aa904fb2l,
-        0x094cd99cd4f4b699l },
-      0 },
-    /* 255 */
-    { { 0x8a981c84d703bebal,0x8631d15032ceb291l,0xa445f2c9e3bd49ecl,
-        0xb90a30b642abad33l },
-      { 0xb465404fb4a5abf9l,0x004750c375db7603l,0x6f9a42ccca35d89fl,
-        0x019f8b9a1b7924f7l },
-      0 },
-};
-
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k,
-        int map, void* heap)
-{
-    return sp_256_ecc_mulmod_stripe_4(r, &p256_base, p256_table,
-                                      k, map, heap);
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_base_avx2_4(sp_point* r, sp_digit* k,
-        int map, void* heap)
-{
-    return sp_256_ecc_mulmod_stripe_avx2_4(r, &p256_base, p256_table,
-                                      k, map, heap);
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#else
-/* A table entry for pre-computed points. */
-typedef struct sp_table_entry_sum {
-    sp_digit x[4];
-    sp_digit y[4];
-    byte infinity;
-} sp_table_entry_sum;
-
-/* Table of pre-computed values for P256 with 3 multiples and width of 8 bits.
- */
-static sp_table_entry_sum p256_table[33][58] = {
-    {
-        /* 0 << 0 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 0 */
-        { { 0x79e730d418a9143cl,0x75ba95fc5fedb601l,0x79fb732b77622510l,
-            0x18905f76a53755c6l },
-          { 0xddf25357ce95560al,0x8b4ab8e4ba19e45cl,0xd2e88688dd21f325l,
-            0x8571ff1825885d85l },
-          0 },
-        /* 3 << 0 */
-        { { 0xffac3f904eebc127l,0xb027f84a087d81fbl,0x66ad77dd87cbbc98l,
-            0x26936a3fb6ff747el },
-          { 0xb04c5c1fc983a7ebl,0x583e47ad0861fe1al,0x788208311a2ee98el,
-            0xd5f06a29e587cc07l },
-          0 },
-        /* 4 << 0 */
-        { { 0x74b0b50d46918dccl,0x4650a6edc623c173l,0x0cdaacace8100af2l,
-            0x577362f541b0176bl },
-          { 0x2d96f24ce4cbaba6l,0x17628471fad6f447l,0x6b6c36dee5ddd22el,
-            0x84b14c394c5ab863l },
-          0 },
-        /* 5 << 0 */
-        { { 0xbe1b8aaec45c61f5l,0x90ec649a94b9537dl,0x941cb5aad076c20cl,
-            0xc9079605890523c8l },
-          { 0xeb309b4ae7ba4f10l,0x73c568efe5eb882bl,0x3540a9877e7a1f68l,
-            0x73a076bb2dd1e916l },
-          0 },
-        /* 7 << 0 */
-        { { 0x0746354ea0173b4fl,0x2bd20213d23c00f7l,0xf43eaab50c23bb08l,
-            0x13ba5119c3123e03l },
-          { 0x2847d0303f5b9d4dl,0x6742f2f25da67bddl,0xef933bdc77c94195l,
-            0xeaedd9156e240867l },
-          0 },
-        /* 9 << 0 */
-        { { 0x75c96e8f264e20e8l,0xabe6bfed59a7a841l,0x2cc09c0444c8eb00l,
-            0xe05b3080f0c4e16bl },
-          { 0x1eb7777aa45f3314l,0x56af7bedce5d45e3l,0x2b6e019a88b12f1al,
-            0x086659cdfd835f9bl },
-          0 },
-        /* 10 << 0 */
-        { { 0x2c18dbd19dc21ec8l,0x98f9868a0fcf8139l,0x737d2cd648250b49l,
-            0xcc61c94724b3428fl },
-          { 0x0c2b407880dd9e76l,0xc43a8991383fbe08l,0x5f7d2d65779be5d2l,
-            0x78719a54eb3b4ab5l },
-          0 },
-        /* 11 << 0 */
-        { { 0xea7d260a6245e404l,0x9de407956e7fdfe0l,0x1ff3a4158dac1ab5l,
-            0x3e7090f1649c9073l },
-          { 0x1a7685612b944e88l,0x250f939ee57f61c8l,0x0c0daa891ead643dl,
-            0x68930023e125b88el },
-          0 },
-        /* 13 << 0 */
-        { { 0xccc425634b2ed709l,0x0e356769856fd30dl,0xbcbcd43f559e9811l,
-            0x738477ac5395b759l },
-          { 0x35752b90c00ee17fl,0x68748390742ed2e3l,0x7cd06422bd1f5bc1l,
-            0xfbc08769c9e7b797l },
-          0 },
-        /* 15 << 0 */
-        { { 0x72bcd8b7bc60055bl,0x03cc23ee56e27e4bl,0xee337424e4819370l,
-            0xe2aa0e430ad3da09l },
-          { 0x40b8524f6383c45dl,0xd766355442a41b25l,0x64efa6de778a4797l,
-            0x2042170a7079adf4l },
-          0 },
-        /* 16 << 0 */
-        { { 0x808b0b650bc6fb80l,0x5882e0753ffe2e6bl,0xd5ef2f7c2c83f549l,
-            0x54d63c809103b723l },
-          { 0xf2f11bd652a23f9bl,0x3670c3194b0b6587l,0x55c4623bb1580e9el,
-            0x64edf7b201efe220l },
-          0 },
-        /* 17 << 0 */
-        { { 0x97091dcbd53c5c9dl,0xf17624b6ac0a177bl,0xb0f139752cfe2dffl,
-            0xc1a35c0a6c7a574el },
-          { 0x227d314693e79987l,0x0575bf30e89cb80el,0x2f4e247f0d1883bbl,
-            0xebd512263274c3d0l },
-          0 },
-        /* 19 << 0 */
-        { { 0xfea912baa5659ae8l,0x68363aba25e1a16el,0xb8842277752c41acl,
-            0xfe545c282897c3fcl },
-          { 0x2d36e9e7dc4c696bl,0x5806244afba977c5l,0x85665e9be39508c1l,
-            0xf720ee256d12597bl },
-          0 },
-        /* 21 << 0 */
-        { { 0x562e4cecc135b208l,0x74e1b2654783f47dl,0x6d2a506c5a3f3b30l,
-            0xecead9f4c16762fcl },
-          { 0xf29dd4b2e286e5b9l,0x1b0fadc083bb3c61l,0x7a75023e7fac29a4l,
-            0xc086d5f1c9477fa3l },
-          0 },
-        /* 23 << 0 */
-        { { 0xf4f876532de45068l,0x37c7a7e89e2e1f6el,0xd0825fa2a3584069l,
-            0xaf2cea7c1727bf42l },
-          { 0x0360a4fb9e4785a9l,0xe5fda49c27299f4al,0x48068e1371ac2f71l,
-            0x83d0687b9077666fl },
-          0 },
-        /* 25 << 0 */
-        { { 0xa4a319acd837879fl,0x6fc1b49eed6b67b0l,0xe395993332f1f3afl,
-            0x966742eb65432a2el },
-          { 0x4b8dc9feb4966228l,0x96cc631243f43950l,0x12068859c9b731eel,
-            0x7b948dc356f79968l },
-          0 },
-        /* 27 << 0 */
-        { { 0x042c2af497e2feb4l,0xd36a42d7aebf7313l,0x49d2c9eb084ffdd7l,
-            0x9f8aa54b2ef7c76al },
-          { 0x9200b7ba09895e70l,0x3bd0c66fddb7fb58l,0x2d97d10878eb4cbbl,
-            0x2d431068d84bde31l },
-          0 },
-        /* 28 << 0 */
-        { { 0x4b523eb7172ccd1fl,0x7323cb2830a6a892l,0x97082ec0cfe153ebl,
-            0xe97f6b6af2aadb97l },
-          { 0x1d3d393ed1a83da1l,0xa6a7f9c7804b2a68l,0x4a688b482d0cb71el,
-            0xa9b4cc5f40585278l },
-          0 },
-        /* 29 << 0 */
-        { { 0x5e5db46acb66e132l,0xf1be963a0d925880l,0x944a70270317b9e2l,
-            0xe266f95948603d48l },
-          { 0x98db66735c208899l,0x90472447a2fb18a3l,0x8a966939777c619fl,
-            0x3798142a2a3be21bl },
-          0 },
-        /* 31 << 0 */
-        { { 0xe2f73c696755ff89l,0xdd3cf7e7473017e6l,0x8ef5689d3cf7600dl,
-            0x948dc4f8b1fc87b4l },
-          { 0xd9e9fe814ea53299l,0x2d921ca298eb6028l,0xfaecedfd0c9803fcl,
-            0xf38ae8914d7b4745l },
-          0 },
-        /* 33 << 0 */
-        { { 0x871514560f664534l,0x85ceae7c4b68f103l,0xac09c4ae65578ab9l,
-            0x33ec6868f044b10cl },
-          { 0x6ac4832b3a8ec1f1l,0x5509d1285847d5efl,0xf909604f763f1574l,
-            0xb16c4303c32f63c4l },
-          0 },
-        /* 34 << 0 */
-        { { 0xb6ab20147ca23cd3l,0xcaa7a5c6a391849dl,0x5b0673a375678d94l,
-            0xc982ddd4dd303e64l },
-          { 0xfd7b000b5db6f971l,0xbba2cb1f6f876f92l,0xc77332a33c569426l,
-            0xa159100c570d74f8l },
-          0 },
-        /* 35 << 0 */
-        { { 0xfd16847fdec67ef5l,0x742ee464233e76b7l,0x0b8e4134efc2b4c8l,
-            0xca640b8642a3e521l },
-          { 0x653a01908ceb6aa9l,0x313c300c547852d5l,0x24e4ab126b237af7l,
-            0x2ba901628bb47af8l },
-          0 },
-        /* 36 << 0 */
-        { { 0x3d5e58d6a8219bb7l,0xc691d0bd1b06c57fl,0x0ae4cb10d257576el,
-            0x3569656cd54a3dc3l },
-          { 0xe5ebaebd94cda03al,0x934e82d3162bfe13l,0x450ac0bae251a0c6l,
-            0x480b9e11dd6da526l },
-          0 },
-        /* 37 << 0 */
-        { { 0x00467bc58cce08b5l,0xb636458c7f178d55l,0xc5748baea677d806l,
-            0x2763a387dfa394ebl },
-          { 0xa12b448a7d3cebb6l,0xe7adda3e6f20d850l,0xf63ebce51558462cl,
-            0x58b36143620088a8l },
-          0 },
-        /* 39 << 0 */
-        { { 0xa9d89488a059c142l,0x6f5ae714ff0b9346l,0x068f237d16fb3664l,
-            0x5853e4c4363186acl },
-          { 0xe2d87d2363c52f98l,0x2ec4a76681828876l,0x47b864fae14e7b1cl,
-            0x0c0bc0e569192408l },
-          0 },
-        /* 40 << 0 */
-        { { 0xe4d7681db82e9f3el,0x83200f0bdf25e13cl,0x8909984c66f27280l,
-            0x462d7b0075f73227l },
-          { 0xd90ba188f2651798l,0x74c6e18c36ab1c34l,0xab256ea35ef54359l,
-            0x03466612d1aa702fl },
-          0 },
-        /* 41 << 0 */
-        { { 0x624d60492ed22e91l,0x6fdfe0b56f072822l,0xeeca111539ce2271l,
-            0x98100a4fdb01614fl },
-          { 0xb6b0daa2a35c628fl,0xb6f94d2ec87e9a47l,0xc67732591d57d9cel,
-            0xf70bfeec03884a7bl },
-          0 },
-        /* 43 << 0 */
-        { { 0x4ff23ffd248a7d06l,0x80c5bfb4878873fal,0xb7d9ad9005745981l,
-            0x179c85db3db01994l },
-          { 0xba41b06261a6966cl,0x4d82d052eadce5a8l,0x9e91cd3ba5e6a318l,
-            0x47795f4f95b2dda0l },
-          0 },
-        /* 44 << 0 */
-        { { 0xecfd7c1fd55a897cl,0x009194abb29110fbl,0x5f0e2046e381d3b0l,
-            0x5f3425f6a98dd291l },
-          { 0xbfa06687730d50dal,0x0423446c4b083b7fl,0x397a247dd69d3417l,
-            0xeb629f90387ba42al },
-          0 },
-        /* 45 << 0 */
-        { { 0x1ee426ccd5cd79bfl,0x0032940b946c6e18l,0x1b1e8ae057477f58l,
-            0xe94f7d346d823278l },
-          { 0xc747cb96782ba21al,0xc5254469f72b33a5l,0x772ef6dec7f80c81l,
-            0xd73acbfe2cd9e6b5l },
-          0 },
-        /* 46 << 0 */
-        { { 0x4075b5b149ee90d9l,0x785c339aa06e9ebal,0xa1030d5babf825e0l,
-            0xcec684c3a42931dcl },
-          { 0x42ab62c9c1586e63l,0x45431d665ab43f2bl,0x57c8b2c055f7835dl,
-            0x033da338c1b7f865l },
-          0 },
-        /* 47 << 0 */
-        { { 0x283c7513caa76097l,0x0a624fa936c83906l,0x6b20afec715af2c7l,
-            0x4b969974eba78bfdl },
-          { 0x220755ccd921d60el,0x9b944e107baeca13l,0x04819d515ded93d4l,
-            0x9bbff86e6dddfd27l },
-          0 },
-        /* 48 << 0 */
-        { { 0x6b34413077adc612l,0xa7496529bbd803a0l,0x1a1baaa76d8805bdl,
-            0xc8403902470343adl },
-          { 0x39f59f66175adff1l,0x0b26d7fbb7d8c5b7l,0xa875f5ce529d75e3l,
-            0x85efc7e941325cc2l },
-          0 },
-        /* 49 << 0 */
-        { { 0x21950b421ff6acd3l,0xffe7048453dc6909l,0xff4cd0b228766127l,
-            0xabdbe6084fb7db2bl },
-          { 0x837c92285e1109e8l,0x26147d27f4645b5al,0x4d78f592f7818ed8l,
-            0xd394077ef247fa36l },
-          0 },
-        /* 51 << 0 */
-        { { 0x508cec1c3b3f64c9l,0xe20bc0ba1e5edf3fl,0xda1deb852f4318d4l,
-            0xd20ebe0d5c3fa443l },
-          { 0x370b4ea773241ea3l,0x61f1511c5e1a5f65l,0x99a5e23d82681c62l,
-            0xd731e383a2f54c2dl },
-          0 },
-        /* 52 << 0 */
-        { { 0x2692f36e83445904l,0x2e0ec469af45f9c0l,0x905a3201c67528b7l,
-            0x88f77f34d0e5e542l },
-          { 0xf67a8d295864687cl,0x23b92eae22df3562l,0x5c27014b9bbec39el,
-            0x7ef2f2269c0f0f8dl },
-          0 },
-        /* 53 << 0 */
-        { { 0x97359638546c4d8dl,0x5f9c3fc492f24679l,0x912e8beda8c8acd9l,
-            0xec3a318d306634b0l },
-          { 0x80167f41c31cb264l,0x3db82f6f522113f2l,0xb155bcd2dcafe197l,
-            0xfba1da5943465283l },
-          0 },
-        /* 55 << 0 */
-        { { 0x258bbbf9e7305683l,0x31eea5bf07ef5be6l,0x0deb0e4a46c814c1l,
-            0x5cee8449a7b730ddl },
-          { 0xeab495c5a0182bdel,0xee759f879e27a6b4l,0xc2cf6a6880e518cal,
-            0x25e8013ff14cf3f4l },
-          0 },
-        /* 57 << 0 */
-        { { 0x3ec832e77acaca28l,0x1bfeea57c7385b29l,0x068212e3fd1eaf38l,
-            0xc13298306acf8cccl },
-          { 0xb909f2db2aac9e59l,0x5748060db661782al,0xc5ab2632c79b7a01l,
-            0xda44c6c600017626l },
-          0 },
-        /* 59 << 0 */
-        { { 0x69d44ed65c46aa8el,0x2100d5d3a8d063d1l,0xcb9727eaa2d17c36l,
-            0x4c2bab1b8add53b7l },
-          { 0xa084e90c15426704l,0x778afcd3a837ebeal,0x6651f7017ce477f8l,
-            0xa062499846fb7a8bl },
-          0 },
-        /* 60 << 0 */
-        { { 0xdc1e6828ed8a6e19l,0x33fc23364189d9c7l,0x026f8fe2671c39bcl,
-            0xd40c4ccdbc6f9915l },
-          { 0xafa135bbf80e75cal,0x12c651a022adff2cl,0xc40a04bd4f51ad96l,
-            0x04820109bbe4e832l },
-          0 },
-        /* 61 << 0 */
-        { { 0x3667eb1a7f4c04ccl,0x59556621a9404f84l,0x71cdf6537eceb50al,
-            0x994a44a69b8335fal },
-          { 0xd7faf819dbeb9b69l,0x473c5680eed4350dl,0xb6658466da44bba2l,
-            0x0d1bc780872bdbf3l },
-          0 },
-        /* 63 << 0 */
-        { { 0xb8d3d9319ff91fe5l,0x039c4800f0518eedl,0x95c376329182cb26l,
-            0x0763a43482fc568dl },
-          { 0x707c04d5383e76bal,0xac98b930824e8197l,0x92bf7c8f91230de0l,
-            0x90876a0140959b70l },
-          0 },
-        /* 64 << 0 */
-        { { 0xdb6d96f305968b80l,0x380a0913089f73b9l,0x7da70b83c2c61e01l,
-            0x95fb8394569b38c7l },
-          { 0x9a3c651280edfe2fl,0x8f726bb98faeaf82l,0x8010a4a078424bf8l,
-            0x296720440e844970l },
-          0 },
-        /* 65 << 0 */
-        { { 0xdc2306ebfcdbb2b2l,0x79527db7ba66f4b9l,0xbf639ed67765765el,
-            0x01628c4706b6090al },
-          { 0x66eb62f1b957b4a1l,0x33cb7691ba659f46l,0x2c90d98cf3e055d6l,
-            0x7d096ac42f174750l },
-          0 },
-        /* 71 << 0 */
-        { { 0xf19f382e92aa7864l,0x49c7cb94fc05804bl,0xf94aa89b40750d01l,
-            0xdd421b5d4a210364l },
-          { 0x56cd001e39df3672l,0x030a119fdd4af1ecl,0x11f947e696cd0572l,
-            0x574cc7b293786791l },
-          0 },
-        /* 77 << 0 */
-        { { 0x0a2193bfc266f85cl,0x719a87be5a0ec9cel,0x9c30c6422b2f9c49l,
-            0xdb15e4963d5baeb1l },
-          { 0x83c3139be0d37321l,0x4788522b2e9fdbb2l,0x2b4f0c7877eb94eal,
-            0x854dc9d595105f9el },
-          0 },
-        /* 83 << 0 */
-        { { 0x2c9ee62dc3363a22l,0x125d4714ec67199al,0xf87abebf2ab80485l,
-            0xcf3086e87a243ca4l },
-          { 0x5c52b051c64e09ddl,0x5e9b16125625aad7l,0x0536a39db19c6126l,
-            0x97f0013247b64be5l },
-          0 },
-        /* 89 << 0 */
-        { { 0xc1ee6264a7eabe67l,0x62d51e29fd54487dl,0x3ea123446310eb5al,
-            0xbd88aca74765b805l },
-          { 0xb7b284be14fb691al,0x640388f83b9fffefl,0x7ab49dd209f98f9al,
-            0x7150f87e7211e445l },
-          0 },
-        /* 95 << 0 */
-        { { 0x263e039bb308cc40l,0x6684ad762b346fd2l,0x9a127f2bcaa12d0dl,
-            0x76a8f9fea974291fl },
-          { 0xc802049b68aa19e4l,0x65499c990c5dbba0l,0xee1b1cb5344455a1l,
-            0x3f293fda2cd6f439l },
-          0 },
-        /* 101 << 0 */
-        { { 0xb7a96e0a4ea6fdf7l,0xbbe914d3b99cd026l,0x6a610374c569a602l,
-            0xe9b1c23914da499el },
-          { 0xb5f6f0feadc19a99l,0x731251826f21687cl,0x5a8a14644be77793l,
-            0x94ce9e0adba8bfc7l },
-          0 },
-        /* 107 << 0 */
-        { { 0x2ca0ba9c3796f4c7l,0x3571e4d1592ce334l,0x28f9cdebe9f6e877l,
-            0xee206023efce1a70l },
-          { 0xb2159e08b76369dcl,0x2754e4260a7f687cl,0xe008039e02de2ff1l,
-            0xccd7e9418ea700c1l },
-          0 },
-        /* 113 << 0 */
-        { { 0xa125e6c1b7ebcb88l,0x3289e86e10ec0d40l,0xcc3a5ecb98353869l,
-            0x734e0d078a2b0d3al },
-          { 0xe0d92e9a51933360l,0xfa6bcdb1786076b9l,0xd13cca90747f19ecl,
-            0x61d8209d49f3a53dl },
-          0 },
-        /* 116 << 0 */
-        { { 0x87f9793bc9826344l,0x4b3de89bb2f5f79cl,0xc9f08a5659cb1b6el,
-            0xd8f1fc5f6a92b9aal },
-          { 0x86357f9eb412595el,0x53c30bbe65b80f16l,0xf06c2c8c70549a57l,
-            0xa9c8a4b42b9157dal },
-          0 },
-        /* 119 << 0 */
-        { { 0x87af199e6cc47305l,0x062afb7c1e314ddel,0x2be22ba0f3a49fb4l,
-            0x6ed0b988157b7f56l },
-          { 0x8162cf502d653fd9l,0x17d29c64877b7497l,0xd7e814380f67b514l,
-            0xfedf1014fe6ee703l },
-          0 },
-        /* 125 << 0 */
-        { { 0xaab54cfc93740130l,0xf72dab6d225733fal,0x04b76d2d1ed32559l,
-            0xa9fe2396bb85b9cbl },
-          { 0x128b0d24bf2219f0l,0x2292393b579f3ce2l,0x51dc5fac145ff0d5l,
-            0xb16d6af8c3febbc1l },
-          0 },
-    },
-    {
-        /* 0 << 8 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 8 */
-        { { 0x486d8ffa696946fcl,0x50fbc6d8b9cba56dl,0x7e3d423e90f35a15l,
-            0x7c3da195c0dd962cl },
-          { 0xe673fdb03cfd5d8bl,0x0704b7c2889dfca5l,0xf6ce581ff52305aal,
-            0x399d49eb914d5e53l },
-          0 },
-        /* 3 << 8 */
-        { { 0x35d6a53eed4c3717l,0x9f8240cf3d0ed2a3l,0x8c0d4d05e5543aa5l,
-            0x45d5bbfbdd33b4b4l },
-          { 0xfa04cc73137fd28el,0x862ac6efc73b3ffdl,0x403ff9f531f51ef2l,
-            0x34d5e0fcbc73f5a2l },
-          0 },
-        /* 4 << 8 */
-        { { 0x4f7081e144cc3addl,0xd5ffa1d687be82cfl,0x89890b6c0edd6472l,
-            0xada26e1a3ed17863l },
-          { 0x276f271563483caal,0xe6924cd92f6077fdl,0x05a7fe980a466e3cl,
-            0xf1c794b0b1902d1fl },
-          0 },
-        /* 5 << 8 */
-        { { 0x33b2385c08369a90l,0x2990c59b190eb4f8l,0x819a6145c68eac80l,
-            0x7a786d622ec4a014l },
-          { 0x33faadbe20ac3a8dl,0x31a217815aba2d30l,0x209d2742dba4f565l,
-            0xdb2ce9e355aa0fbbl },
-          0 },
-        /* 7 << 8 */
-        { { 0x0c4a58d474a86108l,0xf8048a8fee4c5d90l,0xe3c7c924e86d4c80l,
-            0x28c889de056a1e60l },
-          { 0x57e2662eb214a040l,0xe8c48e9837e10347l,0x8774286280ac748al,
-            0xf1c24022186b06f2l },
-          0 },
-        /* 9 << 8 */
-        { { 0xe8cbf1e5d5923359l,0xdb0cea9d539b9fb0l,0x0c5b34cf49859b98l,
-            0x5e583c56a4403cc6l },
-          { 0x11fc1a2dd48185b7l,0xc93fbc7e6e521787l,0x47e7a05805105b8bl,
-            0x7b4d4d58db8260c8l },
-          0 },
-        /* 10 << 8 */
-        { { 0xb31bd6136339c083l,0x39ff8155dfb64701l,0x7c3388d2e29604abl,
-            0x1e19084ba6b10442l },
-          { 0x17cf54c0eccd47efl,0x896933854a5dfb30l,0x69d023fb47daf9f6l,
-            0x9222840b7d91d959l },
-          0 },
-        /* 11 << 8 */
-        { { 0xc510610939842194l,0xb7e2353e49d05295l,0xfc8c1d5cefb42ee0l,
-            0xe04884eb08ce811cl },
-          { 0xf1f75d817419f40el,0x5b0ac162a995c241l,0x120921bbc4c55646l,
-            0x713520c28d33cf97l },
-          0 },
-        /* 13 << 8 */
-        { { 0x41d04ee21726931al,0x0bbbb2c83660ecfdl,0xa6ef6de524818e18l,
-            0xe421cc51e7d57887l },
-          { 0xf127d208bea87be6l,0x16a475d3b1cdd682l,0x9db1b684439b63f7l,
-            0x5359b3dbf0f113b6l },
-          0 },
-        /* 15 << 8 */
-        { { 0x3a5c752edcc18770l,0x4baf1f2f8825c3a5l,0xebd63f7421b153edl,
-            0xa2383e47b2f64723l },
-          { 0xe7bf620a2646d19al,0x56cb44ec03c83ffdl,0xaf7267c94f6be9f1l,
-            0x8b2dfd7bc06bb5e9l },
-          0 },
-        /* 16 << 8 */
-        { { 0x6772b0e5ab4b35a2l,0x1d8b6001f5eeaacfl,0x728f7ce4795b9580l,
-            0x4a20ed2a41fb81dal },
-          { 0x9f685cd44fec01e6l,0x3ed7ddcca7ff50adl,0x460fd2640c2d97fdl,
-            0x3a241426eb82f4f9l },
-          0 },
-        /* 17 << 8 */
-        { { 0xc503cd33bccd9617l,0x365dede4ba7730a3l,0x798c63555ddb0786l,
-            0xa6c3200efc9cd3bcl },
-          { 0x060ffb2ce5e35efdl,0x99a4e25b5555a1c1l,0x11d95375f70b3751l,
-            0x0a57354a160e1bf6l },
-          0 },
-        /* 19 << 8 */
-        { { 0xc033bdc719803511l,0xa9f97b3b8888c3bel,0x3d68aebc85c6d05el,
-            0xc3b88a9d193919ebl },
-          { 0x2d300748c48b0ee3l,0x7506bc7c07a746c1l,0xfc48437c6e6d57f3l,
-            0x5bd71587cfeaa91al },
-          0 },
-        /* 21 << 8 */
-        { { 0xe40736d3df61bc76l,0x13a619c03f778cdbl,0x6dd921a4c56ea28fl,
-            0x76a524332fa647b4l },
-          { 0x23591891ac5bdc5dl,0xff4a1a72bac7dc01l,0x9905e26162df8453l,
-            0x3ac045dfe63b265fl },
-          0 },
-        /* 23 << 8 */
-        { { 0x8435bd6994b03ed1l,0xd9ad1de3634cc546l,0x2cf423fc00e420cal,
-            0xeed26d80a03096ddl },
-          { 0xd7f60be7a4db09d2l,0xf47f569d960622f7l,0xe5925fd77296c729l,
-            0xeff2db2626ca2715l },
-          0 },
-        /* 25 << 8 */
-        { { 0x5dfee80f83774bddl,0x6313160285734485l,0xa1b524ae914a69a9l,
-            0xebc2ffafd4e300d7l },
-          { 0x52c93db77cfa46a5l,0x71e6161f21653b50l,0x3574fc57a4bc580al,
-            0xc09015dde1bc1253l },
-          0 },
-        /* 27 << 8 */
-        { { 0x9c38ddcceb5b76c1l,0x746f528526fc0ab4l,0x52a63a50d62c269fl,
-            0x60049c5599458621l },
-          { 0xe7f48f823c2f7c9el,0x6bd99043917d5cf3l,0xeb1317a88701f469l,
-            0xbd3fe2ed9a449fe0l },
-          0 },
-        /* 28 << 8 */
-        { { 0xe652533b3cef0d7dl,0xd94f7b182bbb4381l,0x838752be0e80f500l,
-            0x8e6e24889e9c9bfbl },
-          { 0xc975169716caca6al,0x866c49d838531ad9l,0xc917e2397151ade1l,
-            0x2d016ec16037c407l },
-          0 },
-        /* 29 << 8 */
-        { { 0x202f6a9c31c71f7bl,0x01f95aa3296ffe5cl,0x5fc0601453cec3a3l,
-            0xeb9912375f498a45l },
-          { 0xae9a935e5d91ba87l,0xc6ac62810b564a19l,0x8a8fe81c3bd44e69l,
-            0x7c8b467f9dd11d45l },
-          0 },
-        /* 31 << 8 */
-        { { 0x21d3634d39eedbbal,0x35cd2e680455a46dl,0xc8cafe65f9d7eb0cl,
-            0xbda3ce9e00cefb3el },
-          { 0xddc17a602c9cf7a4l,0x01572ee47bcb8773l,0xa92b2b018c7548dfl,
-            0x732fd309a84600e3l },
-          0 },
-        /* 33 << 8 */
-        { { 0x65cf89a2e0600afal,0xcf51482f753c5ceal,0x4f2b2d25a5c2bfc5l,
-            0x9381f57187098256l },
-          { 0x89210f676e976e4bl,0xe2cf12f489f47a7bl,0xc21a1658e8484050l,
-            0xa224dbf82f0fff01l },
-          0 },
-        /* 34 << 8 */
-        { { 0xc28961087282513dl,0x9a78c4296a3f8fb8l,0xddfa56f9a31e24b7l,
-            0xb1e14f84fb72611fl },
-          { 0x1d0f70ab45078d65l,0xb247aef3819924d8l,0x8d519f9dbb9877c1l,
-            0x495c2ece8368c7c9l },
-          0 },
-        /* 35 << 8 */
-        { { 0xca9129a0bdb69d12l,0xbe3e319978f39adfl,0xa88506df5fe49438l,
-            0x17ddb7a7aafe894cl },
-          { 0x28d1456f6d1d742fl,0xeec09651917d1268l,0xdecb1c700fd5b4c0l,
-            0x32d14f6acf2861dbl },
-          0 },
-        /* 36 << 8 */
-        { { 0x903f6e3960e913afl,0xb2b58bee98bf140dl,0x9deff025354890b8l,
-            0x155810068d2e924el },
-          { 0xb5755db493c95e5bl,0x3fac42f0dae20eb8l,0x9377c8c109b6d8e0l,
-            0xa43e2b46ab47ceffl },
-          0 },
-        /* 37 << 8 */
-        { { 0x6c3f5a51cb61e7e7l,0x264aebc80d9c73b2l,0xc404b2114a0d9288l,
-            0x5178d3cf8b3a79e9l },
-          { 0x4080be5372a420d7l,0xa39396adef026429l,0x22fbb92e8dde4728l,
-            0x19e42d8874d949fcl },
-          0 },
-        /* 39 << 8 */
-        { { 0xde352d78387f5557l,0x6770149969367413l,0x255bb8c00b0cc102l,
-            0x63cad1be1f4d262el },
-          { 0xf34f9a8a3f8f4fb6l,0x32bc13aae03a969fl,0xb29d4336218371cdl,
-            0x799d76ab285bd210l },
-          0 },
-        /* 40 << 8 */
-        { { 0x5f57b2fbfacfa459l,0x874b1498c1b5aa6bl,0xb9e89acac4db2092l,
-            0x1362bf8ddf4381dal },
-          { 0x25d76830b76328a0l,0x38188b7098572ae4l,0xb43e941429132f7dl,
-            0x7895a29f22dd42c9l },
-          0 },
-        /* 41 << 8 */
-        { { 0x85bded619e808c05l,0x6e0fc2bcc7ef83bbl,0xed70e0b499bedf77l,
-            0x300e777dc1aaffc0l },
-          { 0xe2da2359c43e6d2cl,0xacf6d60a275226e0l,0x18ca38f7f82558bdl,
-            0xd7b017d475ae2591l },
-          0 },
-        /* 43 << 8 */
-        { { 0xed299e2d7cd92ee2l,0x2c08eb37ad847153l,0x7b372aa712acfd81l,
-            0x574d27f5fabda29cl },
-          { 0xbd8247f0f2ee6ebcl,0x8bf76710d06be261l,0x26e95b4bcb186d4cl,
-            0x4fa3ac1d1ebb4a46l },
-          0 },
-        /* 44 << 8 */
-        { { 0xcbde78dd5e22cbb2l,0xf449c85b76bb4391l,0x4289f357b6a4273bl,
-            0x9fce23fd48e84a19l },
-          { 0xcfc32730939eb3b4l,0x8b3d982c16c32280l,0x5ac234bad5f1346cl,
-            0x781954b470769fc9l },
-          0 },
-        /* 45 << 8 */
-        { { 0xff0d4d30062c7dbdl,0x2c483081e6f9fcf0l,0x22f96316d67e070fl,
-            0xdd9be459c0e68c44l },
-          { 0xb9c1edffce2edd4dl,0x1a54782021fc538cl,0x93849be49979aee1l,
-            0x3f313629a590949el },
-          0 },
-        /* 46 << 8 */
-        { { 0x160b836b266be332l,0x49de38215f340575l,0x782e8f6701edce66l,
-            0x83ae008b5df1a93el },
-          { 0x85d33a263ed9ffebl,0xae2f9f961e79db97l,0xf64f209b95ae9e34l,
-            0x2b6b03455e957d49l },
-          0 },
-        /* 47 << 8 */
-        { { 0x7a24a21a331d6bdal,0xfdba302f6328f742l,0x37a36dd47744dca4l,
-            0xda2832ce6fef500fl },
-          { 0x23da304a7b49d73al,0xeede2cebc6ad834fl,0xf21a81248dec3c78l,
-            0x4bc9469b19b721e3l },
-          0 },
-        /* 48 << 8 */
-        { { 0x6faf68feaae6ee70l,0x78f4cc155602b0c9l,0x7e3321a86e94052al,
-            0x2fb3a0d6734d5d80l },
-          { 0xf3b98f3bb25a43bal,0x30bf803119ee2951l,0x7ffee43321b0612al,
-            0x12f775e42eb821d0l },
-          0 },
-        /* 49 << 8 */
-        { { 0x31cc342913e5c1d6l,0x05deaa3cee54e334l,0x21ea2b61cd5087d8l,
-            0x73a1841e70d1b8bcl },
-          { 0xd44e2b41b078bf14l,0xc295732fcea2a30el,0x30cdab42954939f7l,
-            0xc1b4e43a2dba0b7cl },
-          0 },
-        /* 51 << 8 */
-        { { 0x5f33f618b6a20132l,0xc8d73e3cfbbf3022l,0xf3b9844d47ed4320l,
-            0xab5868aa927f00cal },
-          { 0x06cb1113077f6e1cl,0x1417b43a5c94faaal,0x7666cb90cf4cd1e9l,
-            0x99e009f210900566l },
-          0 },
-        /* 52 << 8 */
-        { { 0x4fdff805f57209b5l,0x9bd65ac3f952ac8dl,0x02a3abd3c7969a6fl,
-            0x1359927ef523775fl },
-          { 0xe09b463f88d2e861l,0x661d2199623287c3l,0x821e64495a70eb7al,
-            0x0afbbb1dd67dc684l },
-          0 },
-        /* 53 << 8 */
-        { { 0x2c5a2b2d55750eb2l,0x54d756c29dc28d9fl,0x798c8d113af97f71l,
-            0x54e21ee21f6d1853l },
-          { 0x34e0c8bceffc3f8al,0xed3cc4dda96f193fl,0x86436a84fad97110l,
-            0x8530ca522c97205el },
-          0 },
-        /* 55 << 8 */
-        { { 0x9b6c8452f7236867l,0x21cf260c777b44fdl,0x659fc99dceb00c52l,
-            0xda97098e2439e8dbl },
-          { 0x647efe510ed6e14fl,0x37c8ca122a6600f3l,0x53e89b0badf6f4a7l,
-            0xd9fc8c716645618al },
-          0 },
-        /* 57 << 8 */
-        { { 0x9cecfb8eee6ebd31l,0x4603994b1ff25529l,0x707bc80af4b141c4l,
-            0x3a83d56c07524d3al },
-          { 0x7035c746613a3020l,0x7aa766b286626a1cl,0x3af656095ac76c78l,
-            0x4039c655171e47d6l },
-          0 },
-        /* 59 << 8 */
-        { { 0x79cb147f0ce33b63l,0xa1328a622d160c61l,0xf99538f3cf7eb87el,
-            0x0334d4958e2241d5l },
-          { 0x3ad97e02f3e49e48l,0xdcfcc754037c3679l,0x76078ba61a8ff67cl,
-            0x8054aa55c2a64964l },
-          0 },
-        /* 60 << 8 */
-        { { 0x5852104b87453b28l,0x073e8128b387344dl,0x300e78e4817cfc08l,
-            0x3a82ed4799362088l },
-          { 0xe222304c88de46a4l,0x666c94fd57fadf4al,0x40b2d08ea0c8e108l,
-            0x4b2955b909e050fal },
-          0 },
-        /* 61 << 8 */
-        { { 0x656078565f814881l,0x0fc3d1ce58466117l,0x0ae377d3c6c1e68al,
-            0xe3dd8d5cba566c48l },
-          { 0x9404849ec4b63be6l,0x1e22b03ba5be9c92l,0x08145122a8b03e63l,
-            0x71248243771fe153l },
-          0 },
-        /* 63 << 8 */
-        { { 0xa80a0e83b41ac541l,0xa77570ea533e5f9bl,0x416a14c0216dc452l,
-            0x2a8d728a19f7ee59l },
-          { 0x58494c8cd6552eaal,0x4d635acd60145722l,0xa8e9b127327b1cbcl,
-            0xb429a62e9f8235f0l },
-          0 },
-        /* 64 << 8 */
-        { { 0xf8d112e76e6485b3l,0x4d3e24db771c52f8l,0x48e3ee41684a2f6dl,
-            0x7161957d21d95551l },
-          { 0x19631283cdb12a6cl,0xbf3fa8822e50e164l,0xf6254b633166cc73l,
-            0x3aefa7aeaee8cc38l },
-          0 },
-        /* 65 << 8 */
-        { { 0xd52d2cb746ef1c7el,0xebd4f7c4d8fb6e07l,0x16f77a48cf6dd2b4l,
-            0x6e8f0431e77e4d51l },
-          { 0x59d94cc4e9177bf2l,0xb58a578f7a7181a1l,0xeefbc4cde8f6d330l,
-            0xa66c85560fe05490l },
-          0 },
-        /* 71 << 8 */
-        { { 0x0e6db7a35d9649dal,0x4d2f25193be3d362l,0xcd891fd5a6b137b5l,
-            0xa4b7e4ddacd377a9l },
-          { 0x20ccd6f24355f258l,0x842c08673aafb413l,0xdd55db99d6873b88l,
-            0x04d15f4fea5a2a55l },
-          0 },
-        /* 77 << 8 */
-        { { 0x679cd93dfae289c2l,0x84cadd61ff92ba1bl,0x548b5a6f2cd734aal,
-            0x1827507db8267082l },
-          { 0xa903a6010c6d5b4cl,0xde0d96befdfb952bl,0x2fc9419c6a2e24f9l,
-            0x27333e3936bb3203l },
-          0 },
-        /* 83 << 8 */
-        { { 0x3eb7f062dde4aa6al,0x40effae07f354cc0l,0xe9a14bc2a066c05el,
-            0x7817b11356afc543l },
-          { 0x5f0ed1f28bdda262l,0x001e23d2e007ec13l,0x435878a59c57de6al,
-            0x84d0e20895ac263cl },
-          0 },
-        /* 89 << 8 */
-        { { 0xedf24aec97a66678l,0xd1f93cf8ccf55671l,0x4ed2ce8a9379a49dl,
-            0x64991862c39b0ac9l },
-          { 0xc15b24e31ff67e04l,0x4ee8fc76c3c084fel,0x262012b4f64bcd46l,
-            0x3b5086732425c622l },
-          0 },
-        /* 95 << 8 */
-        { { 0xaa3e451fe65002f7l,0xf5ff2617eb46d253l,0x918d146e572afca2l,
-            0x0a9333b7e56a8553l },
-          { 0x9b7e232d94127dc0l,0xcd0687d6831014e6l,0x725ce5baf08e1c71l,
-            0x56e26f48cde0e4edl },
-          0 },
-        /* 101 << 8 */
-        { { 0xae78dde8db833460l,0xaf1736fe762cb78al,0x5cd85742eae5ac60l,
-            0x7b6c52fe955e981al },
-          { 0x9f823e8555599f97l,0xb9ce70d21a4b46b3l,0xb6076175d7d09829l,
-            0x21e77d22abf390a4l },
-          0 },
-        /* 107 << 8 */
-        { { 0xf704f09da142ad7el,0xb60ec2e1bab9f5d2l,0x4180314681e54d0dl,
-            0x0de50506309335e6l },
-          { 0x4135374e05aec64fl,0xb5d31041b556808al,0x0092eb86049033a8l,
-            0x5b7a2fa0bde0d737l },
-          0 },
-        /* 113 << 8 */
-        { { 0xc0dfa6bbefb40cfal,0x86a6fe279c5037f3l,0xf153cd37f71155f4l,
-            0xf16d6029767664f9l },
-          { 0x7441aa54c635aa57l,0x547f82e9e8186b2el,0x330b464bfbf7c7fel,
-            0xb5556770a1f6fddel },
-          0 },
-        /* 116 << 8 */
-        { { 0xa0a9c5d1e8f9edf1l,0x9814c26b6946cea3l,0xcbb47a37d8e6a08dl,
-            0x517a3d9b2cba11b1l },
-          { 0x94edc73dab43c540l,0x4fd0b82a753e552cl,0x419aab8bd14ae853l,
-            0x94955f9ca68abad8l },
-          0 },
-        /* 119 << 8 */
-        { { 0x3a162e06ed169150l,0x8c9683a6ba1194a8l,0x53fead66ccc28d04l,
-            0xdbb2a85bef09809al },
-          { 0x58e677439d3ab018l,0xff9a2046b6e56bd0l,0xf4b8215eb28061e9l,
-            0xcf16d9f7b10e358fl },
-          0 },
-        /* 125 << 8 */
-        { { 0x265ceae9a55abe39l,0x9e3783f796a98f84l,0xb799628af0757d99l,
-            0xebb5f12665472fb3l },
-          { 0xd83619f52ba517d8l,0x5672105f50382bdfl,0x32c5681c4a12ee9fl,
-            0x31e6f60d834a9fedl },
-          0 },
-    },
-    {
-        /* 0 << 16 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 16 */
-        { { 0x0f0165fce3779ee3l,0xe00e7f9dbd495d9el,0x1fa4efa220284e7al,
-            0x4564bade47ac6219l },
-          { 0x90e6312ac4708e8el,0x4f5725fba71e9adfl,0xe95f55ae3d684b9fl,
-            0x47f7ccb11e94b415l },
-          0 },
-        /* 3 << 16 */
-        { { 0xbd9b8b1dbe7a2af3l,0xec51caa94fb74a72l,0xb9937a4b63879697l,
-            0x7c9a9d20ec2687d5l },
-          { 0x1773e44f6ef5f014l,0x8abcf412e90c6900l,0x387bd0228142161el,
-            0x50393755fcb6ff2al },
-          0 },
-        /* 4 << 16 */
-        { { 0xfabf770977f7195al,0x8ec86167adeb838fl,0xea1285a8bb4f012dl,
-            0xd68835039a3eab3fl },
-          { 0xee5d24f8309004c2l,0xa96e4b7613ffe95el,0x0cdffe12bd223ea4l,
-            0x8f5c2ee5b6739a53l },
-          0 },
-        /* 5 << 16 */
-        { { 0x3d61333959145a65l,0xcd9bc368fa406337l,0x82d11be32d8a52a0l,
-            0xf6877b2797a1c590l },
-          { 0x837a819bf5cbdb25l,0x2a4fd1d8de090249l,0x622a7de774990e5fl,
-            0x840fa5a07945511bl },
-          0 },
-        /* 7 << 16 */
-        { { 0x26e08c07e3533d77l,0xd7222e6a2e341c99l,0x9d60ec3d8d2dc4edl,
-            0xbdfe0d8f7c476cf8l },
-          { 0x1fe59ab61d056605l,0xa9ea9df686a8551fl,0x8489941e47fb8d8cl,
-            0xfeb874eb4a7f1b10l },
-          0 },
-        /* 9 << 16 */
-        { { 0x9164088d977eab40l,0x51f4c5b62760b390l,0xd238238f340dd553l,
-            0x358566c3db1d31c9l },
-          { 0x3a5ad69e5068f5ffl,0xf31435fcdaff6b06l,0xae549a5bd6debff0l,
-            0x59e5f0b775e01331l },
-          0 },
-        /* 10 << 16 */
-        { { 0x2cc5226138634818l,0x501814f4b44c2e0bl,0xf7e181aa54dfdba3l,
-            0xcfd58ff0e759718cl },
-          { 0xf90cdb14d3b507a8l,0x57bd478ec50bdad8l,0x29c197e250e5f9aal,
-            0x4db6eef8e40bc855l },
-          0 },
-        /* 11 << 16 */
-        { { 0xd5d5cdd35958cd79l,0x3580a1b51d373114l,0xa36e4c91fa935726l,
-            0xa38c534def20d760l },
-          { 0x7088e40a2ff5845bl,0xe5bb40bdbd78177fl,0x4f06a7a8857f9920l,
-            0xe3cc3e50e968f05dl },
-          0 },
-        /* 13 << 16 */
-        { { 0x10595b5696a71cbal,0x944938b2fdcadeb7l,0xa282da4cfccd8471l,
-            0x98ec05f30d37bfe1l },
-          { 0xe171ce1b0698304al,0x2d69144421bdf79bl,0xd0cd3b741b21dec1l,
-            0x712ecd8b16a15f71l },
-          0 },
-        /* 15 << 16 */
-        { { 0xe89f48c85963a46el,0x658ab875a99e61c7l,0x6e296f874b8517b4l,
-            0x36c4fcdcfc1bc656l },
-          { 0xde5227a1a3906defl,0x9fe95f5762418945l,0x20c91e81fdd96cdel,
-            0x5adbe47eda4480del },
-          0 },
-        /* 16 << 16 */
-        { { 0xa7a8746a584c5e20l,0x267e4ea1b9dc7035l,0x593a15cfb9548c9bl,
-            0x5e6e21354bd012f3l },
-          { 0xdf31cc6a8c8f936el,0x8af84d04b5c241dcl,0x63990a6f345efb86l,
-            0x6fef4e61b9b962cbl },
-          0 },
-        /* 17 << 16 */
-        { { 0xaa35809ddfe6e2a0l,0xebb4d7d4356a2222l,0x7d500a6a319f33b7l,
-            0x4895a47d4ac99011l },
-          { 0x300ab40bdf3812b2l,0xd0764ec88aec8b9fl,0x86b61d95e591b2a7l,
-            0xc1b2a0b72ed74603l },
-          0 },
-        /* 19 << 16 */
-        { { 0x6001bf5d3849c680l,0xd7a1a4e4c1d3faccl,0xa0f2776418c5e351l,
-            0x0849c0736c29c623l },
-          { 0x3317e143ac751c0cl,0x9bcb1f3eda06200bl,0x40a63a75541419b5l,
-            0x8fad9c983f62c513l },
-          0 },
-        /* 21 << 16 */
-        { { 0xacff0828d03b2242l,0x5a9375c43abb7389l,0x41b1a318d0192baal,
-            0x105bd3100458e97bl },
-          { 0x71582dc7ed496315l,0x8ab2884a4d4bda18l,0xb8b638b494bc5bb8l,
-            0xb42ed1309500bb04l },
-          0 },
-        /* 23 << 16 */
-        { { 0x73e04f02ad1ed952l,0x680051cadfa5bdb7l,0xbe0bef3c0c7437b9l,
-            0x45d6f3a40e65e627l },
-          { 0x5295e060c9436a75l,0xbe84ba78d289ba9el,0x350887fd69c09364l,
-            0xf27bfd17671c64a7l },
-          0 },
-        /* 25 << 16 */
-        { { 0xc8afbdc3adf6ffc5l,0x4a4fb35876385891l,0xc7fa86424d41453fl,
-            0x19490b7672eedd06l },
-          { 0xc883e45337d22d6al,0x8e6e38e4a9009f96l,0x44e2811eb1c560c6l,
-            0x8a0021bf4439cfcfl },
-          0 },
-        /* 27 << 16 */
-        { { 0xba768f8b7615a327l,0x6c8b320d7b15bbe7l,0x5d8d5bcbaaa9ca64l,
-            0x19a2b99f3d13cdfdl },
-          { 0x858288a26f172e10l,0x2412a4da37a00f94l,0xfc67fd2edaa7f6c6l,
-            0x4aea0eadafa2a5c5l },
-          0 },
-        /* 28 << 16 */
-        { { 0x5c80ccef6cd77b30l,0x49978299ec99b6d0l,0x6bf4485eb939d335l,
-            0xc53e61ab86d7c147l },
-          { 0xdd948052fb601dddl,0x34c5eb393511dd48l,0x91f5c67600e6f61cl,
-            0x33f1b525b1e71f34l },
-          0 },
-        /* 29 << 16 */
-        { { 0xb4cb4a151d2dad36l,0x709a61631e60b60dl,0x2f18f3bd932ece4fl,
-            0x70f495a8e92368bel },
-          { 0x6e88be2bb7aeaa6fl,0x4efebd9ae1bf1d6el,0x49925e6e44e94993l,
-            0x33b7aba0ef0517dcl },
-          0 },
-        /* 31 << 16 */
-        { { 0x69ce1f207afe6c37l,0xe1148ba984f68db5l,0x32668bdc2c594a8al,
-            0x2cb60d3063ac4fb3l },
-          { 0x5e6efe1dd9e036f8l,0x917cb2a27db4739fl,0x70ea601ded4e0b5el,
-            0x5928f068ae7ac8a6l },
-          0 },
-        /* 33 << 16 */
-        { { 0x9e4ad0073f2d96abl,0x51a9697f2d058c03l,0xcd5c0a7522d1e795l,
-            0xaa1a121c2ac4f019l },
-          { 0xa837c14c3e3631f4l,0x6a997381236a5576l,0xb305e7db2753782bl,
-            0xae561b0237243afbl },
-          0 },
-        /* 34 << 16 */
-        { { 0x20176baca787897bl,0x057b8b979a9f67d9l,0xe7d5c4f761e14e09l,
-            0x8e4856901e6cd6d0l },
-          { 0x3eeffbba9b925d52l,0xe651a5383046927bl,0x02326d1fe92d4352l,
-            0xad2d6493d697369fl },
-          0 },
-        /* 35 << 16 */
-        { { 0xe9de299c548c4ca5l,0x66f64ef54be3bde3l,0xcf6d39ebf2d5ebc9l,
-            0x665ca727898953e1l },
-          { 0x521ec435e33ac1b4l,0x8418fa7534ab2b82l,0x94d6c0c4771a3a87l,
-            0x21feb6054859ee22l },
-          0 },
-        /* 36 << 16 */
-        { { 0xde7153f8eed9dd1dl,0xba09ad1152ebcb2el,0xaa41b015e1843fb6l,
-            0xf933a2abdd4ce6f0l },
-          { 0x777f834313f6b83fl,0x28df7da4db113a75l,0x6d7d1b3c72a5d143l,
-            0x6f789698966c6ddfl },
-          0 },
-        /* 37 << 16 */
-        { { 0x57d11ed7a95e704el,0x7d5ac6dc380ad582l,0xb175421d5ab6e377l,
-            0x4e383b0ba760dd4dl },
-          { 0xde07b81a352b6cb3l,0x342abe825c2e1704l,0x90988de20dd48537l,
-            0x4a7fec0544821591l },
-          0 },
-        /* 39 << 16 */
-        { { 0xb0e4d17c90a94eb7l,0x27555067aceb0176l,0x587576e15c38c4e2l,
-            0xe647d9dd445f2880l },
-          { 0x00beb2f5ca502f83l,0x4e89e638c44767c7l,0xbef361da154a5757l,
-            0x2dc632a2dc0675f2l },
-          0 },
-        /* 40 << 16 */
-        { { 0xed439a33a72ba054l,0xa3170a15ead265bal,0xcf7eb903fe99a58el,
-            0xcf6db0c633d80c26l },
-          { 0xd031255ef613e71al,0x12ccbe5718ca255cl,0xdd21d0537808c40dl,
-            0xf5488ebc3af2be6bl },
-          0 },
-        /* 41 << 16 */
-        { { 0x589a125ac10f8157l,0x3c8a15bde1353e49l,0x7d9bbd0c22ce2dd0l,
-            0xdfcd019211ac7bb1l },
-          { 0x0e1d67151193c5b1l,0xd4de115ab0e8c285l,0x0b3e94c2272c29fel,
-            0xea640843c8213581l },
-          0 },
-        /* 43 << 16 */
-        { { 0x7a01aeed6aca2231l,0x8135cf2ace80abbel,0xdc1a41b2ae5fdec9l,
-            0xde34ea4da0174364l },
-          { 0xa5104e453cf8b845l,0x4b6fd986675ba557l,0x4bc750af29c8cb4al,
-            0x8bebb266583f9391l },
-          0 },
-        /* 44 << 16 */
-        { { 0x47110d7c1be3f9c5l,0x12b9e4485eadb4ddl,0x6e8c09870b713d41l,
-            0xe1e20356733d56ael },
-          { 0xe68d6bab445ea727l,0x9ef4f6eac934a1a4l,0xe0155547f8cef1c3l,
-            0xdb5c3909159bdcbfl },
-          0 },
-        /* 45 << 16 */
-        { { 0xef0449cb32fa8a37l,0x95071f5dcd246405l,0x1c56ad776c598891l,
-            0x981781de0fa9cd42l },
-          { 0x0f93d456d29c0500l,0x43aa7bc1483f52c4l,0xd7c8736666c8abadl,
-            0x47552530ea5050efl },
-          0 },
-        /* 46 << 16 */
-        { { 0x40dd9ca9fa9b8d3dl,0xf27b7bc056da41d9l,0x87967f4b66db8845l,
-            0xf6918c9444de6bc7l },
-          { 0x4d76d51135568d4dl,0x7ab18f9a40e7fa5al,0x069a44bba5bbbdc6l,
-            0x19e6c04bb4c8f808l },
-          0 },
-        /* 47 << 16 */
-        { { 0x5fd2501108b2b6c7l,0xcce85a3ec41cad21l,0x90857daffdd70387l,
-            0x7a679062c63789f4l },
-          { 0x9c462134ef8666e2l,0xcb7dba108c8505bdl,0x7c4a7e2fc610f2e7l,
-            0x22906f65d68315f9l },
-          0 },
-        /* 48 << 16 */
-        { { 0xf2efe23d442a8ad1l,0xc3816a7d06b9c164l,0xa9df2d8bdc0aa5e5l,
-            0x191ae46f120a8e65l },
-          { 0x83667f8700611c5bl,0x83171ed7ff109948l,0x33a2ecf8ca695952l,
-            0xfa4a73eef48d1a13l },
-          0 },
-        /* 49 << 16 */
-        { { 0x41dd38c1118de9a0l,0x3485cb3be2d8f6f5l,0xd4bac751b1dcc577l,
-            0x2148d93fed12ea6bl },
-          { 0xde3504729da8cb18l,0x6046daf89eb85925l,0xddbc357b942b1044l,
-            0x248e7afe815b8b7cl },
-          0 },
-        /* 51 << 16 */
-        { { 0xd4bb77b3acb21004l,0xe9f236cf83392035l,0xa9894c5c52133743l,
-            0x4d6112749a7b054al },
-          { 0xa61675ea4ba2a553l,0x59c199681da6aa78l,0x3988c36590f474del,
-            0x73e751bbd001be43l },
-          0 },
-        /* 52 << 16 */
-        { { 0x97cacf846604007dl,0x1e92b4b22d47a9f1l,0x858ae0d6374ed165l,
-            0x4c973e6f307aefb8l },
-          { 0x6f524a238a10eb72l,0x7b4a92a9eb2849d6l,0x3678bda42fe91eddl,
-            0x56092acd7c0fc35cl },
-          0 },
-        /* 53 << 16 */
-        { { 0x93bea99b1b9b43c4l,0x2f6af6f3e145fda2l,0x862f0607278adf0dl,
-            0x647be08398456ccal },
-          { 0xce79ba1487250c28l,0x1c1c4fc8efedab42l,0x966f612af90caa8dl,
-            0xb1a2cf6e72c440f8l },
-          0 },
-        /* 55 << 16 */
-        { { 0x2fca1be45b3b7dd5l,0x453c19853c211bcal,0x313cb21969a46484l,
-            0x66082837414bd5dfl },
-          { 0xab7a97bf2ac1cdf7l,0x45cd1792676d778fl,0x42fb6c4f6a5b560al,
-            0x45747fe30b8f17e9l },
-          0 },
-        /* 57 << 16 */
-        { { 0x38b6db6235db6218l,0xa10cdfe1bb54bacal,0x56fd4a1d610f7f6bl,
-            0xc4bea78b76d183d7l },
-          { 0xc0e6ca9fbf730d26l,0x1b1e271aed6cf535l,0x6fef275faadbe375l,
-            0xfa2e8da903e489bal },
-          0 },
-        /* 59 << 16 */
-        { { 0x6f79d25c7c4626ecl,0xfe27690232d55d6cl,0x3f5c5768afa19ce3l,
-            0xa1373777f8834739l },
-          { 0x761d67a8a4ce960al,0xb34de1ea459e656al,0x8725b0f09db6f269l,
-            0x75316f250dbfe22el },
-          0 },
-        /* 60 << 16 */
-        { { 0x091d5b631a093b40l,0xb85c1c075862f24al,0xc5d74eb53e8f85bfl,
-            0xf51c7746cab22456l },
-          { 0xc25cb8d9e761da89l,0x2670ec2fc0f028b5l,0x873fd30d2db9af5cl,
-            0x3d0f1ea18262565el },
-          0 },
-        /* 61 << 16 */
-        { { 0x8f9492c261c23b3cl,0xd366baeb631688a4l,0x55e759e78093bb07l,
-            0xf6d0eaf47218f765l },
-          { 0xb8a174ff54ca583bl,0x790f10e0b23d14cel,0xfebe7333be83cbbal,
-            0xfeb6dcc5eed67536l },
-          0 },
-        /* 63 << 16 */
-        { { 0x175b3bacce027e5bl,0xe0728a99c48252c4l,0x0be25d4507a39c7cl,
-            0xcb9c2d3aba8e8c72l },
-          { 0x6185a48d1abd459al,0x27207feadff9a27bl,0xfd92e8231d34393fl,
-            0x738511534351d965l },
-          0 },
-        /* 64 << 16 */
-        { { 0xfcde7cc8f43a730fl,0xe89b6f3c33ab590el,0xc823f529ad03240bl,
-            0x82b79afe98bea5dbl },
-          { 0x568f2856962fe5del,0x0c590adb60c591f3l,0x1fc74a144a28a858l,
-            0x3b662498b3203f4cl },
-          0 },
-        /* 65 << 16 */
-        { { 0x8ede0fcdc11682eel,0x41e3faa1b2ab5664l,0x58b2a7dc26a35ff5l,
-            0x939bcd6b701b89e9l },
-          { 0x55f66fd188e0838fl,0x99d1a77b4ff1f975l,0x103abbf72e060cc5l,
-            0x91c77beb6bc4bdbbl },
-          0 },
-        /* 71 << 16 */
-        { { 0xcd048abca380cc72l,0x91cab1bbd0e13662l,0x68115b18686de4cel,
-            0x484724e63deccbf5l },
-          { 0xf164ba54f176137el,0x5189793662ab2728l,0x6afdecf9b60a5458l,
-            0xca40472d0aabafd2l },
-          0 },
-        /* 77 << 16 */
-        { { 0x7a9439183b98d725l,0x1c1763e8ece1ea3cl,0x45c44ef639840476l,
-            0x689271e69c009133l },
-          { 0xa017405f56a51fe1l,0xd54cc7253e0d0970l,0x212ad075cfe09e8bl,
-            0x999f21c37af7bf30l },
-          0 },
-        /* 83 << 16 */
-        { { 0xdc2a2af12bf95f73l,0xb88b4ca76de82cbel,0xa31a21aaecb8e84el,
-            0x86d19a601b74f5bel },
-          { 0xc68bf64406008019l,0xe52ab50e9431c694l,0x6375463d627ab11cl,
-            0xdd3eeaa03c0ef241l },
-          0 },
-        /* 89 << 16 */
-        { { 0x608d9cb323f1caf8l,0x95069450b1700741l,0xe3132bd2bc2fa7aal,
-            0xc4f363e7f64e4f06l },
-          { 0xb059c4191ca888c2l,0x1004cb1f8d17bf5dl,0x6b6ba6f934ea5711l,
-            0x071d94abd79b2c8al },
-          0 },
-        /* 95 << 16 */
-        { { 0xc7ef9b42d147a39dl,0x36dd5d770a10cd5bl,0x3bf6cc77d0eea34bl,
-            0x60c84591197479c7l },
-          { 0xf95860ac50ba50edl,0xe1c94a8dc4cdc8fal,0x780818d685e24a23l,
-            0x1950e3c0c8abbd27l },
-          0 },
-        /* 101 << 16 */
-        { { 0x9908c694ae04778el,0x2e37a6790a0d36ffl,0x212a340f52b067bdl,
-            0xec89c9fad080b914l },
-          { 0x920dc2f005ab8a23l,0xecff5c78655e8984l,0x80eedd34f66211acl,
-            0xa7a56366ef58d4d8l },
-          0 },
-        /* 107 << 16 */
-        { { 0x4f95debe2bca42f0l,0xf0346307844334d2l,0x7003a60521d600aal,
-            0x1eb98c6365c5248al },
-          { 0x6757b3822fa202cal,0x32765d399fb12f36l,0xe851b476d7b44c9al,
-            0x27cd7d1b4e0bab4cl },
-          0 },
-        /* 113 << 16 */
-        { { 0xd0c1f7c9c43ea1a3l,0x73d944f49f42907dl,0xd113f34619352c92l,
-            0x86a1ad53b149cdc1l },
-          { 0x32c34e8f848d1be4l,0xba8afda7c3d9360bl,0x17e8bc32eea8bf96l,
-            0x3174cae499c87febl },
-          0 },
-        /* 116 << 16 */
-        { { 0x4b215f016671b47el,0xb67633ca4a8dae2al,0x2915120f79fd3cdbl,
-            0xc1f8a06fb064e6del },
-          { 0xf4d5368cc1d57420l,0x6ada51a8e18de475l,0xa0f0d47cc749d4b0l,
-            0xabfa2c0074526aa5l },
-          0 },
-        /* 119 << 16 */
-        { { 0xf752f6659e5ce44fl,0x7b97ebfa189d35ecl,0x9540cbb90fc609abl,
-            0x19c1dc6999632cc8l },
-          { 0x0a957700e08ca9a8l,0xb0cd0ab7a3246a4el,0xca687cfcc8d6a544l,
-            0xb6281f0035f82a77l },
-          0 },
-        /* 125 << 16 */
-        { { 0x547027012b818036l,0xf72315f729c8f14cl,0x95f1bc15230e74bel,
-            0x2e7c492f1abe20d4l },
-          { 0xe1ea8b1cd7e78ab1l,0xc3f6ba59043585adl,0xac404ea9477ac053l,
-            0xaa6872914ec6d0e3l },
-          0 },
-    },
-    {
-        /* 0 << 24 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 24 */
-        { { 0xd9d0c8c4868af75dl,0xd7325cff45c8c7eal,0xab471996cc81ecb0l,
-            0xff5d55f3611824edl },
-          { 0xbe3145411977a0eel,0x5085c4c5722038c6l,0x2d5335bff94bb495l,
-            0x894ad8a6c8e2a082l },
-          0 },
-        /* 3 << 24 */
-        { { 0xd1e059b21994ef20l,0x2a653b69638ae318l,0x70d5eb582f699010l,
-            0x279739f709f5f84al },
-          { 0x5da4663c8b799336l,0xfdfdf14d203c37ebl,0x32d8a9dca1dbfb2dl,
-            0xab40cff077d48f9bl },
-          0 },
-        /* 4 << 24 */
-        { { 0xf2369f0b879fbbedl,0x0ff0ae86da9d1869l,0x5251d75956766f45l,
-            0x4984d8c02be8d0fcl },
-          { 0x7ecc95a6d21008f0l,0x29bd54a03a1a1c49l,0xab9828c5d26c50f3l,
-            0x32c0087c51d0d251l },
-          0 },
-        /* 5 << 24 */
-        { { 0xf61790abfbaf50a5l,0xdf55e76b684e0750l,0xec516da7f176b005l,
-            0x575553bb7a2dddc7l },
-          { 0x37c87ca3553afa73l,0x315f3ffc4d55c251l,0xe846442aaf3e5d35l,
-            0x61b911496495ff28l },
-          0 },
-        /* 7 << 24 */
-        { { 0x4bdf3a4956f90823l,0xba0f5080741d777bl,0x091d71c3f38bf760l,
-            0x9633d50f9b625b02l },
-          { 0x03ecb743b8c9de61l,0xb47512545de74720l,0x9f9defc974ce1cb2l,
-            0x774a4f6a00bd32efl },
-          0 },
-        /* 9 << 24 */
-        { { 0x327bc002b0131e5bl,0x1739e6d5cb2514d9l,0xc8cbdafe55a81543l,
-            0x5bb1a36ce1137243l },
-          { 0x205da3c517325327l,0xc35c1a36515a057el,0xf00f64c942925f9bl,
-            0xbd14633cb7d59f7al },
-          0 },
-        /* 10 << 24 */
-        { { 0xae2ad171656e8c3al,0xc0e2a4631acd0705l,0x006f6a8aa0b6055cl,
-            0xaf4513d72b65a26el },
-          { 0x3f549e14d616d5bcl,0x64ee395571253b1fl,0xe8b10bc1b8ce243al,
-            0xbcbeace5913a4e77l },
-          0 },
-        /* 11 << 24 */
-        { { 0x47c1004341f37dbdl,0x96eccae36168ecf6l,0x65bde59d1ca46aa3l,
-            0x38a7027ab8698ffal },
-          { 0xa2b89dc86dc34437l,0x5a0a118d43a4153fl,0x9e330a861ce22fd8l,
-            0x28382af6b3bbd3bcl },
-          0 },
-        /* 13 << 24 */
-        { { 0x0b2e27c0d81e0271l,0xa67a7596117a317cl,0x17f08928a6723d99l,
-            0x71a75681485310a3l },
-          { 0x90465462afb66ca9l,0x185e97ccfbbe229dl,0x6a1a606addad8fc2l,
-            0x2431f316b3c797cfl },
-          0 },
-        /* 15 << 24 */
-        { { 0x4703401193529432l,0x1f106bdd30743462l,0xabfb9964cd66d8cal,
-            0x934d9d5ae9bdadd5l },
-          { 0x5976d815908e3d22l,0x344a362f28e057bdl,0xf92cdadc5443dfb3l,
-            0x001297adf089603bl },
-          0 },
-        /* 16 << 24 */
-        { { 0x7f99824f20151427l,0x206828b692430206l,0xaa9097d7e1112357l,
-            0xacf9a2f209e414ecl },
-          { 0xdbdac9da27915356l,0x7e0734b7001efee3l,0x54fab5bbd2b288e2l,
-            0x4c630fc4f62dd09cl },
-          0 },
-        /* 17 << 24 */
-        { { 0x4a2fce605044066bl,0x904a019cfa3a47f4l,0xba81ea9c0c5c0a60l,
-            0xd7e4ea0d96c098bdl },
-          { 0xefe700419cd50a02l,0xc0c839d42d7f048cl,0xe2daf264e09b561fl,
-            0x0cbc13185034b18bl },
-          0 },
-        /* 19 << 24 */
-        { { 0x11e5f2e388323f7al,0xe07a74c2927584cdl,0x1e774b3495613d2dl,
-            0x9c9b52c52c787488l },
-          { 0x3cdd3c3ebe421f08l,0x5ff7819e223e3d5fl,0xba8739b2c1da09b9l,
-            0x6b7263164e8b491bl },
-          0 },
-        /* 21 << 24 */
-        { { 0xb5afd13ca0943befl,0xd651772957abb1ccl,0x9d5a52dc9b61b5bcl,
-            0x85cefaa6806e31cdl },
-          { 0xab84257a720a1deal,0x6a60261bced70d35l,0xc023f94db9d6da61l,
-            0x947f7eec54a0ae0el },
-          0 },
-        /* 23 << 24 */
-        { { 0xc3b787569f83b787l,0xd6d249263694ddd7l,0x58d248945d70a02el,
-            0xac16670e8c278c6al },
-          { 0x71a94d58e370b6e6l,0xe4d763840253db05l,0x99b1c98814b32cfel,
-            0x4e6bd870cc78cc95l },
-          0 },
-        /* 25 << 24 */
-        { { 0xf5f7ca79c8b63614l,0xf3bfb2158af4903cl,0x2bdb9f5496d47bd3l,
-            0xd6e715300e8a63bal },
-          { 0x67e90a497a93bec4l,0x8613478b8c1e63eel,0xe36bd9c8f2dde561l,
-            0x681486518a768689l },
-          0 },
-        /* 27 << 24 */
-        { { 0xef617a9494aa531cl,0x9ac35e2fd6f4ad87l,0xbcd2a047122468fbl,
-            0xbd7a423fef7c5ca6l },
-          { 0xab58cb52064c8040l,0x93ef4ed54a644716l,0xf7d17097c32cd48dl,
-            0xb249a173d17fcf42l },
-          0 },
-        /* 28 << 24 */
-        { { 0x66fe0fffe298cdf5l,0x3f61bea47b2e51b6l,0x7d372117bad3afa4l,
-            0x6521a09cef656e2fl },
-          { 0xb3b8c966e8a58fe7l,0x25203a115a47ebc7l,0xfe81588d5c4be573l,
-            0x6132e2f31f49a03cl },
-          0 },
-        /* 29 << 24 */
-        { { 0xbbe5c108b7a7ecc4l,0x62a5a78ebfd22e4cl,0xb7974033df188bd2l,
-            0xcf11deea4df7d1ael },
-          { 0x99cc774a53ace3eal,0xe0373a71105cc1f6l,0xd751987f133d7a20l,
-            0xab86ee04ae215871l },
-          0 },
-        /* 31 << 24 */
-        { { 0x2094f9a280cd10e6l,0x045232aa7b8a0da7l,0x969a81b69c03244el,
-            0x1293b4ca7e98d955l },
-          { 0x1631421dd68f3ab0l,0xa0106422c3738c82l,0xc5f43845f82c4ff9l,
-            0xb479acbe1aa0f58fl },
-          0 },
-        /* 33 << 24 */
-        { { 0xf1db0267f67683cfl,0xa6b13c9e44ce009dl,0x04b4eed505884a69l,
-            0xf2ff9c16d9087a0bl },
-          { 0x2c53699b3e35b4a6l,0x5020c0142369afb8l,0xf83bfe0095be37f1l,
-            0xd300d8c553b29d80l },
-          0 },
-        /* 34 << 24 */
-        { { 0x16893055811cf4bbl,0x580dd1e55aeb5027l,0xcaf47fba5ae3c71cl,
-            0xde79698129ebbb07l },
-          { 0xbed1db33d262cdd3l,0x78315e3748c7313bl,0xfc9561f02fe1368dl,
-            0xe0209698ccacacc7l },
-          0 },
-        /* 35 << 24 */
-        { { 0xd61af89a781ece24l,0xf3b90626008f41e9l,0xd715dbf7c5693191l,
-            0x8d6c05de6f299edel },
-          { 0xf18d62637ca50aacl,0x7987bf5cb0dd5fdcl,0x424136bd2cfa702bl,
-            0xaa7e237ded859db2l },
-          0 },
-        /* 36 << 24 */
-        { { 0xde7169e4e5d41796l,0x6700333e33c0a380l,0xe20b95780343a994l,
-            0xa745455e1fb3a1c3l },
-          { 0x97e0ff88ce029a7fl,0x3b3481c976e384bcl,0x028b339dddad5951l,
-            0xa1fdcdbae4b95cfcl },
-          0 },
-        /* 37 << 24 */
-        { { 0xcc9221baed20c6adl,0xf2619a51fa9c73aal,0xfc2cff847d7f55a5l,
-            0xd56c23d65f01d4dal },
-          { 0x6d20f88cb3d84d5fl,0x048825f75dcc615dl,0x73634d3f85631a6el,
-            0xa57a02e3ad7b2e2dl },
-          0 },
-        /* 39 << 24 */
-        { { 0x067a8dcf08aa81ffl,0x62948258c23f3d16l,0xb61bd04316f2fe7bl,
-            0xf250f769b6a766b1l },
-          { 0x32df97246d0b241el,0xb736e4bb714e5f88l,0x50da15022c1d40d7l,
-            0x013e0edebdd285a4l },
-          0 },
-        /* 40 << 24 */
-        { { 0x1b92c3a0181a5d8fl,0x6429531d9adb77c7l,0x629152b53af710eel,
-            0x4e3f27370bd5647el },
-          { 0xfb7c392b77553c7dl,0xa930abacefe78c87l,0xf80c8cd6a05a6991l,
-            0x751469b71be5f6f5l },
-          0 },
-        /* 41 << 24 */
-        { { 0xf89f2b0b3e2f2af0l,0x52f634099eefc39al,0x505005c679906cb6l,
-            0x820c2216b2de0b1el },
-          { 0x96f0f2831f20ad7al,0xcd33125c718ffcb0l,0xf6130ef278f0c578l,
-            0x4cda2471d0b76b95l },
-          0 },
-        /* 43 << 24 */
-        { { 0x611dd83f39485581l,0x96c47051803e1b20l,0xefacc736830f44c7l,
-            0x5588d8ce688b12bal },
-          { 0x44f4edf3eee70fadl,0x1026dfd8869539f7l,0xa4c146ee8ddb0e00l,
-            0x9f4f55816efb41c8l },
-          0 },
-        /* 44 << 24 */
-        { { 0x6036ed0236cbace7l,0x5a70e4abada837ddl,0xf06918aff10b2fefl,
-            0x08a8a9f69fd31590l },
-          { 0x6c4a1ba6916af88dl,0x4868bc1466016037l,0x06d345af164228a9l,
-            0x2c1961d19b550dd9l },
-          0 },
-        /* 45 << 24 */
-        { { 0x8b72775c6851f0acl,0x7827242bd70f5975l,0x2de91f1e34db4a6fl,
-            0x586bf3d58538f5eel },
-          { 0xf0a15aed25d9a09bl,0x43018e56f74deb46l,0xc2af1ad0f50e0e67l,
-            0x49cc9528b10cff6fl },
-          0 },
-        /* 46 << 24 */
-        { { 0x05eb146c9d55c425l,0xe2b557ccbc62261fl,0x2a716301bd077089l,
-            0x83a63c81e0527d02l },
-          { 0x055ff7f8a0d9203bl,0x05d09f0525bf5a04l,0x2e44545fb3eb0b30l,
-            0xed7c57c4d279a1adl },
-          0 },
-        /* 47 << 24 */
-        { { 0x6928f6e45e0ebdd5l,0xd7e44ddf092d233bl,0xe7148066d1b7026fl,
-            0xf645a2e53d5f25c3l },
-          { 0x6eeb25ee58ff9eb4l,0x60f1fcf737f87ebfl,0x9eaaf1e5c4679c70l,
-            0x4609fb13b7b7dc7el },
-          0 },
-        /* 48 << 24 */
-        { { 0xae915f5d5fa067d1l,0x4134b57f9668960cl,0xbd3656d6a48edaacl,
-            0xdac1e3e4fc1d7436l },
-          { 0x674ff869d81fbb26l,0x449ed3ecb26c33d4l,0x85138705d94203e8l,
-            0xccde538bbeeb6f4al },
-          0 },
-        /* 49 << 24 */
-        { { 0x27f317af2b33987fl,0xd2d3cf5d51e59588l,0x333999bd031f27c9l,
-            0x6ddfa3f22e0a3306l },
-          { 0x23e0e651990041b0l,0xf028aba1585837acl,0x1c6ad72b25226f53l,
-            0xf243c991d1fca64al },
-          0 },
-        /* 51 << 24 */
-        { { 0x72b8a13272cbae1fl,0xfe0b1c4fbfdbd64al,0x98bc7876c5e76921l,
-            0x51c726bfdb1f5af7l },
-          { 0x97e88a842c186e8bl,0x9ed99516ed8eb7b4l,0x3e54a17dafc818ebl,
-            0xfcfbf25a1e8f77d8l },
-          0 },
-        /* 52 << 24 */
-        { { 0x7780d7d68f7d5c6el,0x6725b49a454101e6l,0xceddc26586b0770cl,
-            0xc26624615666f504l },
-          { 0x16b77477ce040f75l,0x13f9113c293f8b45l,0xff0cfa07e2dcc91el,
-            0x1948d8bd41c202f5l },
-          0 },
-        /* 53 << 24 */
-        { { 0x4c6ae39a1dfbe13al,0xafb1e5c46be9c200l,0x39e728d168bb08c3l,
-            0xc794b905acc9166fl },
-          { 0x1cb0dec2d9c7c3e4l,0xc4c3053289f14d65l,0x4af80801a6a9d609l,
-            0x79d7e82de0d6ab24l },
-          0 },
-        /* 55 << 24 */
-        { { 0xb905c6af8ad4cf6el,0x785590b0f6d1be13l,0x78f402c2a0ef76bel,
-            0x739b22ea5c19a40bl },
-          { 0xd4d3262553d596b6l,0x01598eb4d571666bl,0xf8dc150b8173486al,
-            0xd8aa43af15e94f09l },
-          0 },
-        /* 57 << 24 */
-        { { 0xcfa387cd984393b5l,0x1645659e21a1bf92l,0xb4ab3966dd46c7eel,
-            0xcf8c296d89482623l },
-          { 0x72e4d01cf976b4c0l,0x44ad07e8fa0fa5ebl,0xd6c82681b486fdd2l,
-            0x2d9074f89b8845b4l },
-          0 },
-        /* 59 << 24 */
-        { { 0x96e4fc08d96862dbl,0xf9e29bb6c50c14b2l,0xfedaad64f8f9be75l,
-            0xab6b2d79ae9e1274l },
-          { 0x033e3eb58d84dec0l,0xc136904ccbd113e7l,0xb82b0aed6061f289l,
-            0x3476d9247b699e25l },
-          0 },
-        /* 60 << 24 */
-        { { 0x8fb5ceeb969231dcl,0xaed13be1686ff6cdl,0x71d7c67bdd69db87l,
-            0x49613e08fb53f33al },
-          { 0x2899729ead8e802fl,0x83bfde49d1982a1dl,0x675c45ea878239d2l,
-            0xb7bf59cd0d8240d3l },
-          0 },
-        /* 61 << 24 */
-        { { 0x853d8cd1baf53b8bl,0x9c73d04cff95fc18l,0xae8a94412d1d6aacl,
-            0xd8a15ce901500b70l },
-          { 0xaef813499aacba59l,0x2cd2ba0ac493cd8dl,0x01c37ee1f398f034l,
-            0xed72d51d0f7299fcl },
-          0 },
-        /* 63 << 24 */
-        { { 0x2c204940e7592fb1l,0xcc1bb19b49366f08l,0x31855e8a7c927935l,
-            0x16f7e9a2c590b81dl },
-          { 0xa5fbb7c1ed8df240l,0x7b5204122de2d7f5l,0x7eb1eb989a637588l,
-            0x5ef4eca89540d2e8l },
-          0 },
-        /* 64 << 24 */
-        { { 0x55d5c68da61a76fal,0x598b441dca1554dcl,0xd39923b9773b279cl,
-            0x33331d3c36bf9efcl },
-          { 0x2d4c848e298de399l,0xcfdb8e77a1a27f56l,0x94c855ea57b8ab70l,
-            0xdcdb9dae6f7879bal },
-          0 },
-        /* 65 << 24 */
-        { { 0x811e14dd9594afb8l,0xaf6c1b10d349124al,0x8488021b6528a642l,
-            0xecf6834341cf1447l },
-          { 0x7a40acb756924446l,0xd9c11bbed98ec4cfl,0x0cef00bfb2bff163l,
-            0xfaaad8015432803bl },
-          0 },
-        /* 71 << 24 */
-        { { 0x5a217d5e6b075cbel,0x7ef88d1dc89b513bl,0xb6d015da0531c93bl,
-            0x477b502a6333834al },
-          { 0x4655e48b2fb458d5l,0x93f21a7cb7674ca8l,0xa0616786502d1f3al,
-            0x82d16d17f26bb6ccl },
-          0 },
-        /* 77 << 24 */
-        { { 0x3d995aa9183c1688l,0xa125906c3766d2e8l,0x23ed7871c5f10d5bl,
-            0xdfe1e1cc6df80368l },
-          { 0x8bfcb54271eaae2cl,0xe94e6f910945a7bbl,0xd543ef90862f650al,
-            0x0dc043b803eed66bl },
-          0 },
-        /* 83 << 24 */
-        { { 0x0c6a5620060d2ccdl,0xcd8200e37a8a03a4l,0x6018d304793867e6l,
-            0xad23dd61a74d054dl },
-          { 0x5a856faeebc21eb4l,0x66be16714b5cd7dbl,0xe0d0441ec75f8c9dl,
-            0xb80ca9ecf90dbc6dl },
-          0 },
-        /* 89 << 24 */
-        { { 0xbd6902ccd24692cbl,0xbcce6bbc21920408l,0x40f120ca55dec4c5l,
-            0xd9f1f5ef5361c8b3l },
-          { 0x535d368226935dffl,0x9635447b01a9998al,0x8c4ec40d99e36d12l,
-            0xbaeef8912b793369l },
-          0 },
-        /* 95 << 24 */
-        { { 0xded3a51c1cd887ebl,0xd43225568376515cl,0xdaf3a2271ca7c097l,
-            0x089156fdecd4d90cl },
-          { 0x2b354810ca0727c9l,0xb7257c1966c19d8cl,0x5e68a379432d5072l,
-            0x75c04c2443e585c7l },
-          0 },
-        /* 101 << 24 */
-        { { 0xb5ba2a8fe5e0952fl,0x2c2d086811040b4el,0x27448bd5f818e253l,
-            0x720f677987a92c85l },
-          { 0x2c9b2367b9d035fal,0xf18ad8ce16c15ab9l,0xd65a360841bd57eel,
-            0xeb4b07c9ff6ae897l },
-          0 },
-        /* 107 << 24 */
-        { { 0xcffb6d71d38589acl,0x812372920fa509d3l,0x94db5ba6e54725e8l,
-            0x1ad2b4206cfbb825l },
-          { 0x8592c1f238cfb9f2l,0xbe8e917e0eec6a27l,0x53921bfe9d93d42fl,
-            0x1aa95e6269454a35l },
-          0 },
-        /* 113 << 24 */
-        { { 0xc25e8934d898049dl,0xeeaf4e6d3bb3d459l,0xc3ac44447d29ad10l,
-            0xccdf9fcbcef8fa04l },
-          { 0x1d995a3fb9679cb9l,0x3d6c5eab46fabc14l,0xd3849ff066385d4dl,
-            0xc0eb21bacff08be2l },
-          0 },
-        /* 116 << 24 */
-        { { 0x8213c71e90d13fd6l,0x114321149bb6b733l,0xaaf8037880ac4902l,
-            0xb24e046b555f7557l },
-          { 0x5f6ed2881db79832l,0xd493a758ac760e5dl,0xbc30a2a7a1c0f570l,
-            0xa5009807161174e3l },
-          0 },
-        /* 119 << 24 */
-        { { 0x9e9b864a6889e952l,0xee908932f352f31al,0xe421f2423166b932l,
-            0x6dd4aa3b7ddbdb35l },
-          { 0x553cc5639e8b88a4l,0x05457f171f04704dl,0x1dcc3004c9554e6bl,
-            0x3a4a3a253f1b61e7l },
-          0 },
-        /* 125 << 24 */
-        { { 0x7ac0a5e7c56e303al,0x7c7bab64037b0a19l,0x11f103fcc8d29a2bl,
-            0x7d99dc46cf0b1340l },
-          { 0x0481588ceffba92el,0x8a817356b04e77bcl,0x19edf4dbce1b708dl,
-            0xa2a1f7a6e6f9d52cl },
-          0 },
-    },
-    {
-        /* 0 << 32 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 32 */
-        { { 0x202886024147519al,0xd0981eac26b372f0l,0xa9d4a7caa785ebc8l,
-            0xd953c50ddbdf58e9l },
-          { 0x9d6361ccfd590f8fl,0x72e9626b44e6c917l,0x7fd9611022eb64cfl,
-            0x863ebb7e9eb288f3l },
-          0 },
-        /* 3 << 32 */
-        { { 0xa18f07e0e90fb21el,0x00fd2b80bba7fca1l,0x20387f2795cd67b5l,
-            0x5b89a4e7d39707f7l },
-          { 0x8f83ad3f894407cel,0xa0025b946c226132l,0xc79563c7f906c13bl,
-            0x5f548f314e7bb025l },
-          0 },
-        /* 4 << 32 */
-        { { 0x0ee6d3a7c35d8794l,0x042e65580356bae5l,0x9f59698d643322fdl,
-            0x9379ae1550a61967l },
-          { 0x64b9ae62fcc9981el,0xaed3d6316d2934c6l,0x2454b3025e4e65ebl,
-            0xab09f647f9950428l },
-          0 },
-        /* 5 << 32 */
-        { { 0xc1b3d3d331b85f09l,0x0f45354aa88ae64al,0xa8b626d32fec50fdl,
-            0x1bdcfbd4e828834fl },
-          { 0xe45a2866cd522539l,0xfa9d4732810f7ab3l,0xd8c1d6b4c905f293l,
-            0x10ac80473461b597l },
-          0 },
-        /* 7 << 32 */
-        { { 0xbbb175146fc627e2l,0xa0569bc591573a51l,0xa7016d9e358243d5l,
-            0x0dac0c56ac1d6692l },
-          { 0x993833b5da590d5fl,0xa8067803de817491l,0x65b4f2124dbf75d0l,
-            0xcc960232ccf80cfbl },
-          0 },
-        /* 9 << 32 */
-        { { 0x35d742806cf3d65bl,0x4b7c790678b28dd9l,0xc4fcdd2f95e1f85fl,
-            0xcf6fb7ba591350b6l },
-          { 0x9f8e3287edfc26afl,0xe2dd9e73c2d0ed9al,0xeab5d67f24cbb703l,
-            0x60c293999a759a5al },
-          0 },
-        /* 10 << 32 */
-        { { 0xcf8625d7708f97cdl,0xfb6c5119ea419de4l,0xe8cb234dc03f9b06l,
-            0x5a7822c335e23972l },
-          { 0x9b876319a284ff10l,0xefcc49977093fdcel,0xdddfd62a878fe39al,
-            0x44bfbe53910aa059l },
-          0 },
-        /* 11 << 32 */
-        { { 0xfb93ca3d7ca53d5fl,0x432649f004379cbfl,0xf506113acba2ff75l,
-            0x4594ae2103718b35l },
-          { 0x1aa6cee50d044627l,0xc0e0d2b7f5c94aa2l,0x0bf33d3dee4dd3f5l,
-            0xaca96e288477c97al },
-          0 },
-        /* 13 << 32 */
-        { { 0x995c068e6861a713l,0xa9ba339463de88dcl,0xab954344689a964fl,
-            0x58195aec0f5a0d6cl },
-          { 0xc5f207d5c98f8b50l,0x6600cd280c98ccf6l,0x1a680fe339c3e6c2l,
-            0xa23f3931660e87c0l },
-          0 },
-        /* 15 << 32 */
-        { { 0x43bc1b42c78440a1l,0x9a07e22632ac6c3fl,0xaf3d7ba10f4bcd15l,
-            0x3ad43c9da36814c6l },
-          { 0xca11f742a0c9c162l,0xd3e06fc6c90b96ecl,0xeace6e766bf2d03fl,
-            0x8bcd98e8f8032795l },
-          0 },
-        /* 16 << 32 */
-        { { 0xe27a6dbe305406ddl,0x8eb7dc7fdd5d1957l,0xf54a6876387d4d8fl,
-            0x9c479409c7762de4l },
-          { 0xbe4d5b5d99b30778l,0x25380c566e793682l,0x602d37f3dac740e3l,
-            0x140deabe1566e4ael },
-          0 },
-        /* 17 << 32 */
-        { { 0x7be3ddb77099ae96l,0x83d6157306e0da6al,0x31bcac5f74bf9870l,
-            0x7f7aa3b422b256f1l },
-          { 0xff84d63caa212e20l,0x7d636556decdc8b5l,0x8fed824dbf909d62l,
-            0x62d70186e5fb1445l },
-          0 },
-        /* 19 << 32 */
-        { { 0x8796989f67d8ab8al,0xa46282253700b772l,0xa353cadf05f799abl,
-            0x7a8be2741eeb06bbl },
-          { 0xf74a367e4653b134l,0x4e43449660c70340l,0xc99b6d6b72e10b18l,
-            0xcf1adf0f1ba636e1l },
-          0 },
-        /* 21 << 32 */
-        { { 0xb0260fb57c6a0958l,0xae791b9c2fc2731el,0xb339f2bf8ce6e575l,
-            0x769214a816e2639fl },
-          { 0xbaf422e1346da10el,0xc7805fdf7a56f463l,0xf47b6b766f845428l,
-            0x8f21369e38492948l },
-          0 },
-        /* 23 << 32 */
-        { { 0x2bac716a17931a90l,0x42a5e27cc8267236l,0xfd4b367c0bafeb78l,
-            0x5856e69c6173db02l },
-          { 0xfaac7358973d73c4l,0xbfbffcc36768d285l,0x05444ff2be3eb243l,
-            0x9f8d3692f3c323fel },
-          0 },
-        /* 25 << 32 */
-        { { 0xac296863221c31a9l,0x46f3a24ef1ca99a9l,0xd927648a7535a864l,
-            0xd7e3c47d5848e497l },
-          { 0xc19595b782a98ac7l,0x9a9bf627273ff554l,0xe29aa48fb62298a1l,
-            0xed3f068ee797e9e3l },
-          0 },
-        /* 27 << 32 */
-        { { 0x8d16a1660eb9227bl,0xe04c6bc58c37c74bl,0xd1be9585cc1ef78cl,
-            0xa5cfe1962e929d9bl },
-          { 0xc9b0ea21417c1cc6l,0x316352d345b79599l,0xc1502c4dc2d54af7l,
-            0xe7f4412990f83445l },
-          0 },
-        /* 28 << 32 */
-        { { 0x0f6704abd95917e8l,0x168dafaeaec6e899l,0xd2833e8cde710027l,
-            0x34ea277e68ee3c59l },
-          { 0x3689e2350054d4e5l,0x6f3a568d11013943l,0xb5ce1ff69bc2b144l,
-            0x705bfe7e72b33a59l },
-          0 },
-        /* 29 << 32 */
-        { { 0x1baa4f02c8e93284l,0xec6b93ea3c97d3e8l,0xb656c149034f8b32l,
-            0x3cab9063cd4cc69fl },
-          { 0xd8de5989d61031ccl,0xcf85329fc1b1de1dl,0xf18b78b323d8cb9al,
-            0x6dc04bc61a6b69eal },
-          0 },
-        /* 31 << 32 */
-        { { 0x79cf86314a1d4f8fl,0xda5ba331aa47394el,0x36f9c0be8ff20527l,
-            0xccdc719bbc7097f6l },
-          { 0x2304a3ba5cb052bbl,0xab80cdea392f0ab5l,0x0ac1858bf38de03bl,
-            0xd6e2119878a8f55dl },
-          0 },
-        /* 33 << 32 */
-        { { 0x6bdebc26584bc618l,0x499f0f1894591499l,0xd35ed50bf4a573dal,
-            0x5a622e73ff2792d0l },
-          { 0x8510cbce68d41a3bl,0x6610f43c94e919afl,0x4527373dc163c8a1l,
-            0x50afb46f280a8a7dl },
-          0 },
-        /* 34 << 32 */
-        { { 0x33e779cd8de7707al,0xf94bbd94438f535bl,0x61159864be144878l,
-            0xb6623235f098ce4al },
-          { 0x6813b71ba65568d8l,0x6603dd4c2f796451l,0x9a97d88c8b9ee5b2l,
-            0xaaa4593549d5926cl },
-          0 },
-        /* 35 << 32 */
-        { { 0x2e01fc75ebe75bf2l,0x8270318d6cbdd09cl,0x534e4f21d3f1a196l,
-            0x6c9eaeca9459173el },
-          { 0xda454fe0b642a1d4l,0xe45b69bfc4664c4al,0x4724bd423e078dc8l,
-            0x39ac8fe603336b81l },
-          0 },
-        /* 36 << 32 */
-        { { 0x0a2e53dd302e9485l,0x75882a19deaa9ff4l,0xe283242eac8de4ddl,
-            0x2742105cc678dba7l },
-          { 0x9f6f0a88cdb3a8a2l,0x5c9d3338f722e894l,0xf1fa3143c38c31c1l,
-            0x22137e2db18c77acl },
-          0 },
-        /* 37 << 32 */
-        { { 0xd821665e368d7835l,0x3300c012b596c6ecl,0xb60da7353557b2ddl,
-            0x6c3d9db6fb8cf9ael },
-          { 0x092d8b0b8b4b0d34l,0x900a0bf4b3d4107dl,0x75371a245e813ec3l,
-            0x91125a17f2ad56d5l },
-          0 },
-        /* 39 << 32 */
-        { { 0x5e6594e2fe0073e6l,0x908a93778be13cb7l,0xa2c3d5c8ac26617cl,
-            0xa0bab085c317c6b9l },
-          { 0x0bdc183b83664109l,0x6bbba2b468f9dcd9l,0x697a50785814be41l,
-            0x12a59b183a5e5f98l },
-          0 },
-        /* 40 << 32 */
-        { { 0xbd9802e6c30fa92bl,0x5a70d96d9a552784l,0x9085c4ea3f83169bl,
-            0xfa9423bb06908228l },
-          { 0x2ffebe12fe97a5b9l,0x85da604971b99118l,0x9cbc2f7f63178846l,
-            0xfd96bc709153218el },
-          0 },
-        /* 41 << 32 */
-        { { 0xb5a85c61bfa70ca6l,0x4edc7f2d4c1f745fl,0x05aea9aa3ded1eb5l,
-            0x750385efb82e5918l },
-          { 0xdcbc53221fdc5164l,0x32a5721f6794184el,0x5c5b2269ff09c90bl,
-            0x96d009115323ca42l },
-          0 },
-        /* 43 << 32 */
-        { { 0x12c73403f43f1440l,0xc94813eb66cc1f50l,0x04d5957b9b035151l,
-            0x76011bca4bfaafa8l },
-          { 0x56806c13574f1f0al,0x98f63a4697652a62l,0x17c63ef4a3178de9l,
-            0xf7ce961a65009a52l },
-          0 },
-        /* 44 << 32 */
-        { { 0x58f92aebe4173516l,0xdc37d99275e42d44l,0x76dcec5b4d48e1bal,
-            0x07e0608e25676448l },
-          { 0xa1877bcd1d4af36al,0x38b62b3c5a8ccf0cl,0x60522e88aeab7f75l,
-            0xbef213ed5e03547al },
-          0 },
-        /* 45 << 32 */
-        { { 0x8acd5ba4e6ed0282l,0x792328f06a04531dl,0xe95de8aa80297e50l,
-            0x79d33ce07d60e05cl },
-          { 0xcb84646dd827d602l,0xd3421521302a608cl,0x867970a4524f9751l,
-            0x05e2f7e347a75734l },
-          0 },
-        /* 46 << 32 */
-        { { 0x64e4de4a01c66263l,0xbcfe16a4d0033d4cl,0x359e23d4817de1dcl,
-            0xb01e812ec259449cl },
-          { 0x90c9ade2df53499fl,0xabbeaa27288c6862l,0x5a655db4cd1b896fl,
-            0x416f10a5a022a3d6l },
-          0 },
-        /* 47 << 32 */
-        { { 0x0d17e1ef98601fd5l,0x9a3f85e0eab76a6fl,0x0b9eaed1510b80a1l,
-            0x3282fd747ec30422l },
-          { 0xaca5815a70a4a402l,0xfad3121cf2439cb2l,0xba251af81fccabd6l,
-            0xb382843fa5c127d5l },
-          0 },
-        /* 48 << 32 */
-        { { 0x958381db1782269bl,0xae34bf792597e550l,0xbb5c60645f385153l,
-            0x6f0e96afe3088048l },
-          { 0xbf6a021577884456l,0xb3b5688c69310ea7l,0x17c9429504fad2del,
-            0xe020f0e517896d4dl },
-          0 },
-        /* 49 << 32 */
-        { { 0x442fdfe920cd1ebel,0xa8317dfa6a250d62l,0x5214576d082d5a2dl,
-            0xc1a5d31930803c33l },
-          { 0x33eee5b25e4a2cd0l,0x7df181b3b4db8011l,0x249285145b5c6b0bl,
-            0x464c1c5828bf8837l },
-          0 },
-        /* 51 << 32 */
-        { { 0x5464da65d55babd1l,0x50eaad2a0048d80fl,0x782ca3dd2b9bce90l,
-            0x41107164ab526844l },
-          { 0xad3f0602d56e0a5fl,0xc1f0248018455114l,0xe05d8dcab1527931l,
-            0x87818cf5bb1295d7l },
-          0 },
-        /* 52 << 32 */
-        { { 0x95aeb5bd483e333al,0x003af31effeaededl,0xfc5532e87efb1e4fl,
-            0xb37e0fb52dfa24a5l },
-          { 0x485d4cecdc140b08l,0xb81a0d23983bd787l,0xd19928dae8d489fdl,
-            0x3fa0312c177b9dbdl },
-          0 },
-        /* 53 << 32 */
-        { { 0xade391470c6d7e88l,0x4fd1e8cd47072c45l,0x145760fed5a65c56l,
-            0x198960c7be4887del },
-          { 0xfe7974a82640257al,0xf838a19b774febefl,0xb2aecad11b6e988el,
-            0x643f44fa448e4a8fl },
-          0 },
-        /* 55 << 32 */
-        { { 0xc35ceffdee756e71l,0x2c1364d88ea932c4l,0xbd594d8d837d2d9fl,
-            0x5b334bdac9d74d48l },
-          { 0x72dc3e03b8fac08bl,0x38f01de006fdf70fl,0x4bde74b31d298ba4l,
-            0x2598d183ad5f42a9l },
-          0 },
-        /* 57 << 32 */
-        { { 0x02c6ba15f62befa2l,0x6399ceb55c8ccee9l,0x3638bd6e08d3473el,
-            0xb8f1f13d2f8f4a9cl },
-          { 0x50d7560655827a74l,0x8d6e65f33fb4f32cl,0x40a5d21189ee621al,
-            0x6d3f9e11c4474716l },
-          0 },
-        /* 59 << 32 */
-        { { 0xcb633a4ce9b2bb8fl,0x0475703f8c529253l,0x61e007b5a8878873l,
-            0x342d77ba14504159l },
-          { 0x2925175c313578dfl,0x4e631897b6b097f1l,0xe64d138929350e41l,
-            0x2fb20608ec7adccdl },
-          0 },
-        /* 60 << 32 */
-        { { 0xa560c234d5c0f5d1l,0x74f84bf62bdef0efl,0x61ed00005cbd3d0bl,
-            0xc74262d087fb408bl },
-          { 0xad30a6496cc64128l,0x708e3a31a4a8b154l,0xaf21ce2637f82074l,
-            0x31d33b38204c9a74l },
-          0 },
-        /* 61 << 32 */
-        { { 0x8f609fe04cc2f575l,0xe44f9784b35488c4l,0x0d464bb6180fa375l,
-            0x4f44d5d2de2247b8l },
-          { 0xf538eb38141ef077l,0x781f8f6e8fa456a4l,0x67e9a46429b4f39dl,
-            0x245d21e8b704c3e9l },
-          0 },
-        /* 63 << 32 */
-        { { 0x45a94ee858ffa7cdl,0x4d38bc6818053549l,0x0b4bc65a499d79f3l,
-            0xa81e3ab09159cab7l },
-          { 0xf13716efb47898cel,0xb7ee597c2e2d9044l,0x09396b90e6158276l,
-            0x5c644dc36a533fcel },
-          0 },
-        /* 64 << 32 */
-        { { 0xcca4428dbbe5a1a9l,0x8187fd5f3126bd67l,0x0036973a48105826l,
-            0xa39b6663b8bd61a0l },
-          { 0x6d42deef2d65a808l,0x4969044f94636b19l,0xf611ee47dd5d564cl,
-            0x7b2f3a49d2873077l },
-          0 },
-        /* 65 << 32 */
-        { { 0xbe4c16c3bf429668l,0xd32f56f0ef35db3bl,0xae8355de9ea4e3f1l,
-            0x8f66c4a2a450944el },
-          { 0xafab94c8b798fbe2l,0x18c57baff7f3d5cfl,0x692d191c5cfa5c7dl,
-            0xc0c25f69a689daebl },
-          0 },
-        /* 71 << 32 */
-        { { 0x15fb3ae398340d4cl,0xa8b9233a7de82134l,0x44971a545fc0dbc6l,
-            0xb2b4f0f3a1d3f094l },
-          { 0x8d9eaba1b6242bd4l,0xd8aad777787cc557l,0xb1ab8b7870d1a2bbl,
-            0x5d20f48cead3bfe3l },
-          0 },
-        /* 77 << 32 */
-        { { 0x4dacbf09a2bf9772l,0x969a4c4357aa8457l,0xadbe673b273ebfc5l,
-            0xb85582bb927778c9l },
-          { 0x748371855c03752cl,0xc337bc6bc2f60d11l,0x2c3838e4ad456a09l,
-            0xaf479c897e381842l },
-          0 },
-        /* 83 << 32 */
-        { { 0x8530ae751b1aea77l,0xf43b923ba8310cb9l,0x9c1a60c6bf4dd6c5l,
-            0x11885b863e3aaaa5l },
-          { 0x594a8fa90f69821el,0x1eece3d66bc37998l,0x1fd718f518df32bfl,
-            0x1c00c7d461d84082l },
-          0 },
-        /* 89 << 32 */
-        { { 0xd67ee3a4c763c3cfl,0x760b128305969234l,0x1a5ff331ec17f2d1l,
-            0x25f0392a84fecfefl },
-          { 0xb1bc004a3a80d47el,0xf450bf08182fee3bl,0xf11117681e19751el,
-            0x5b4127dae28ed23fl },
-          0 },
-        /* 95 << 32 */
-        { { 0x91e00defdaf08f09l,0x7ef41724f4738a07l,0x990fbbceaf1263fcl,
-            0x779121e3e6eeb5aal },
-          { 0x3e162c7a5a3ecf52l,0x73ae568a51be5faal,0x8bea1bfa451be8a9l,
-            0x3e8cd5db90e11097l },
-          0 },
-        /* 101 << 32 */
-        { { 0x90390f7224d27159l,0x685c139efd07e5d4l,0x4e21e44a3bc234a8l,
-            0x61b50f34eeb14dacl },
-          { 0x7beb0aa087555d58l,0x781326bcc806f0d2l,0xc289537a1eb7199fl,
-            0x44a31a037b42766el },
-          0 },
-        /* 107 << 32 */
-        { { 0x7d778206edde4b40l,0x34539fa18eb92fcdl,0x5a0bdd79bf52a552l,
-            0x066d3672fdcca75el },
-          { 0xd73fa893e28b5a5bl,0xb495135876c38698l,0x44469b0114ae16cfl,
-            0xb428c763691d6618l },
-          0 },
-        /* 113 << 32 */
-        { { 0x9022db8b69196353l,0x152ebb7dd7a4afd0l,0xea36fae57fcf1765l,
-            0xa8fc00ba0decea8al },
-          { 0x1047206a0c0b0414l,0x6607d8ade076df28l,0xf343e19966b8aba1l,
-            0x7f03c1ad311e208dl },
-          0 },
-        /* 116 << 32 */
-        { { 0xe6b4c96e888f3870l,0xa21bb618fe544042l,0x7122ee88bd817699l,
-            0xcb38ecebfa66e173l },
-          { 0x6ed5b3482c9cc05fl,0x591affc84ae0fd9el,0x7cf325ac6e7aaac0l,
-            0x2397c053d05e5be0l },
-          0 },
-        /* 119 << 32 */
-        { { 0x95363f61eaa96552l,0xe03bc6b38fb15b73l,0xa5c5808f2c389053l,
-            0xcd021e6c11b2030cl },
-          { 0x349ca9bdc038e30al,0x0a3368d4165afa2cl,0x043630debbfa1cc6l,
-            0xb8c4456ba7cdbf69l },
-          0 },
-        /* 125 << 32 */
-        { { 0x63aa3315fd7d2983l,0xaf4c96afa6a04bedl,0x3a5c0b5410814a74l,
-            0x9906f5e30f9b0770l },
-          { 0x622be6523676986fl,0x09ac5bc0173e7cb5l,0x1c40e56a502c8b3cl,
-            0xabb9a0f7253ce8f6l },
-          0 },
-    },
-    {
-        /* 0 << 40 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 40 */
-        { { 0x889f6d65533ef217l,0x7158c7e4c3ca2e87l,0xfb670dfbdc2b4167l,
-            0x75910a01844c257fl },
-          { 0xf336bf07cf88577dl,0x22245250e45e2acel,0x2ed92e8d7ca23d85l,
-            0x29f8be4c2b812f58l },
-          0 },
-        /* 3 << 40 */
-        { { 0xc51e414351facc61l,0xbaf2647de68a25bcl,0x8f5271a00ff872edl,
-            0x8f32ef993d2d9659l },
-          { 0xca12488c7593cbd4l,0xed266c5d02b82fabl,0x0a2f78ad14eb3f16l,
-            0xc34049484d47afe3l },
-          0 },
-        /* 4 << 40 */
-        { { 0xa6f3d574c005979dl,0xc2072b426a40e350l,0xfca5c1568de2ecf9l,
-            0xa8c8bf5ba515344el },
-          { 0x97aee555114df14al,0xd4374a4dfdc5ec6bl,0x754cc28f2ca85418l,
-            0x71cb9e27d3c41f78l },
-          0 },
-        /* 5 << 40 */
-        { { 0x09c1670209470496l,0xa489a5edebd23815l,0xc4dde4648edd4398l,
-            0x3ca7b94a80111696l },
-          { 0x3c385d682ad636a4l,0x6702702508dc5f1el,0x0c1965deafa21943l,
-            0x18666e16610be69el },
-          0 },
-        /* 7 << 40 */
-        { { 0x45beb4ca2a604b3bl,0x56f651843a616762l,0xf52f5a70978b806el,
-            0x7aa3978711dc4480l },
-          { 0xe13fac2a0e01fabcl,0x7c6ee8a5237d99f9l,0x251384ee05211ffel,
-            0x4ff6976d1bc9d3ebl },
-          0 },
-        /* 9 << 40 */
-        { { 0xdde0492316e043a2l,0x98a452611dd3d209l,0xeaf9f61bd431ebe8l,
-            0x00919f4dbaf56abdl },
-          { 0xe42417db6d8774b1l,0x5fc5279c58e0e309l,0x64aa40613adf81eal,
-            0xef419edabc627c7fl },
-          0 },
-        /* 10 << 40 */
-        { { 0x3919759239ef620fl,0x9d47284074fa29c4l,0x4e428fa39d416d83l,
-            0xd1a7c25129f30269l },
-          { 0x46076e1cd746218fl,0xf3ad6ee8110d967el,0xfbb5f434a00ae61fl,
-            0x3cd2c01980d4c929l },
-          0 },
-        /* 11 << 40 */
-        { { 0xfa24d0537a4af00fl,0x3f938926ca294614l,0x0d700c183982182el,
-            0x801334434cc59947l },
-          { 0xf0397106ec87c925l,0x62bd59fc0ed6665cl,0xe8414348c7cca8b5l,
-            0x574c76209f9f0a30l },
-          0 },
-        /* 13 << 40 */
-        { { 0x95be42e2bb8b6a07l,0x64be74eeca23f86al,0xa73d74fd154ce470l,
-            0x1c2d2857d8dc076al },
-          { 0xb1fa1c575a887868l,0x38df8e0b3de64818l,0xd88e52f9c34e8967l,
-            0x274b4f018b4cc76cl },
-          0 },
-        /* 15 << 40 */
-        { { 0x3f5c05b4f8b7559dl,0x0be4c7acfae29200l,0xdd6d3ef756532accl,
-            0xf6c3ed87eea7a285l },
-          { 0xe463b0a8f46ec59bl,0x531d9b14ecea6c83l,0x3d6bdbafc2dc836bl,
-            0x3ee501e92ab27f0bl },
-          0 },
-        /* 16 << 40 */
-        { { 0x8df275455922ac1cl,0xa7b3ef5ca52b3f63l,0x8e77b21471de57c4l,
-            0x31682c10834c008bl },
-          { 0xc76824f04bd55d31l,0xb6d1c08617b61c71l,0x31db0903c2a5089dl,
-            0x9c092172184e5d3fl },
-          0 },
-        /* 17 << 40 */
-        { { 0x7b1a921ea6b3340bl,0x6d7c4d7d7438a53el,0x2b9ef73c5bf71d8fl,
-            0xb5f6e0182b167a7cl },
-          { 0x5ada98ab0ce536a3l,0xee0f16f9e1fea850l,0xf6424e9d74f1c0c5l,
-            0x4d00de0cd3d10b41l },
-          0 },
-        /* 19 << 40 */
-        { { 0xd542f522a6533610l,0xfdde15a734ec439al,0x696560fedc87dd0dl,
-            0x69eab421e01fd05fl },
-          { 0xca4febdc95cc5988l,0x839be396c44d92fbl,0x7bedff6daffe543bl,
-            0xd2bb97296f6da43al },
-          0 },
-        /* 21 << 40 */
-        { { 0x5bc6dea80b8d0077l,0xb2adf5d1ea9c49efl,0x7104c20eaafe8659l,
-            0x1e3604f37866ee7el },
-          { 0x0cfc7e7b3075c8c5l,0x5281d9bb639c5a2bl,0xcbdf42494bc44ee3l,
-            0x835ab066655e9209l },
-          0 },
-        /* 23 << 40 */
-        { { 0x78fbda4b90b94ffal,0x447e52eb7beb993cl,0x920011bc92620d15l,
-            0x7bad6ecf481fd396l },
-          { 0xad3bd28ba989a09el,0x20491784a3e62b78l,0xcdcd7096b07bd9efl,
-            0x9bf5bb7337d780adl },
-          0 },
-        /* 25 << 40 */
-        { { 0xbe911a71a976c8d4l,0xba0346743fdd778el,0x2359e7434cf87ea1l,
-            0x8dccf65f07ebb691l },
-          { 0x6c2c18eb09746d87l,0x6a19945fd2ecc8fal,0xc67121ff2ffa0339l,
-            0x408c95ba9bd9fc31l },
-          0 },
-        /* 27 << 40 */
-        { { 0xa317204bcaa5da39l,0xd390df7468bf53d7l,0x56de18b2dbd71c0dl,
-            0xcb4d3bee75184779l },
-          { 0x815a219499d920a5l,0x9e10fb4ecf3d3a64l,0x7fd4901dfe92e1eel,
-            0x5d86d10d3ab87b2el },
-          0 },
-        /* 28 << 40 */
-        { { 0x24f2a692840bb336l,0x7c353bdca669fa7bl,0xda20d6fcdec9c300l,
-            0x625fbe2fa13a4f17l },
-          { 0xa2b1b61adbc17328l,0x008965bfa9515621l,0x49690939c620ff46l,
-            0x182dd27d8717e91cl },
-          0 },
-        /* 29 << 40 */
-        { { 0x98e9136c878303e4l,0x2769e74fd1e65efdl,0x6154c545809da56el,
-            0x8c5d50a04301638cl },
-          { 0x10f3d2068214b763l,0x2da9a2fc44df0644l,0xca912bab588a6fcdl,
-            0xe9e82d9b227e1932l },
-          0 },
-        /* 31 << 40 */
-        { { 0xcbdc4d66d080e55bl,0xad3f11e5b8f98d6bl,0x31bea68e18a32480l,
-            0xdf1c6fd52c1bcf6el },
-          { 0xadcda7ee118a3f39l,0xbd02f857ac060d5fl,0xd2d0265d86631997l,
-            0xb866a7d33818f2d4l },
-          0 },
-        /* 33 << 40 */
-        { { 0xfbcce2d31892d98dl,0x2e34bc9507de73dcl,0x3a48d1a94891eec1l,
-            0xe64499c24d31060bl },
-          { 0xe9674b7149745520l,0xf126ccaca6594a2cl,0x33e5c1a079945342l,
-            0x02aa0629066e061fl },
-          0 },
-        /* 34 << 40 */
-        { { 0xdfd7c0ae7af3191el,0x923ec111d68c70d9l,0xb6f1380bb675f013l,
-            0x9192a224f23d45bal },
-          { 0xbe7890f9524891e3l,0x45b24c47eba996bbl,0x59331e48320447e9l,
-            0x0e4d8753ac9afad4l },
-          0 },
-        /* 35 << 40 */
-        { { 0x49e49c38c9f5a6c3l,0x3f5eea44d8ee2a65l,0x02bf3e761c74bbb4l,
-            0x50d291cdef565571l },
-          { 0xf4edc290a36dd5fal,0x3015df9556dd6b85l,0x4494926aa5549a16l,
-            0x5de6c59390399e4al },
-          0 },
-        /* 36 << 40 */
-        { { 0x29be11c6ce800998l,0x72bb1752b90360d9l,0x2c1931975a4ad590l,
-            0x2ba2f5489fc1dbc0l },
-          { 0x7fe4eebbe490ebe0l,0x12a0a4cd7fae11c0l,0x7197cf81e903ba37l,
-            0xcf7d4aa8de1c6dd8l },
-          0 },
-        /* 37 << 40 */
-        { { 0x961fa6317e249e7bl,0x5c4f707796caed50l,0x6b176e62d7e50885l,
-            0x4dd5de72f390cbecl },
-          { 0x91fa29954b2bd762l,0x80427e6395b8dadel,0xd565bf1de2c34743l,
-            0x911da39d16e6c841l },
-          0 },
-        /* 39 << 40 */
-        { { 0x48365465802ff016l,0x6d2a561f71beece6l,0xdd299ce6f9707052l,
-            0x62a32698a23407bbl },
-          { 0x1d55bdb147004afbl,0xfadec124369b1084l,0x1ce78adf291c89f7l,
-            0x9f2eaf03278bc529l },
-          0 },
-        /* 40 << 40 */
-        { { 0x92af6bf43fd5684cl,0x2b26eecf80360aa1l,0xbd960f3000546a82l,
-            0x407b3c43f59ad8fel },
-          { 0x86cae5fe249c82bal,0x9e0faec72463744cl,0x87f551e894916272l,
-            0x033f93446ceb0615l },
-          0 },
-        /* 41 << 40 */
-        { { 0x04658ad212dba0cel,0x9e600624068822f0l,0x84661f11b26d368bl,
-            0xbca867d894ebb87al },
-          { 0x79506dc42f1bad89l,0x1a8322d3ebcbe7a1l,0xb4f1e102ac197178l,
-            0x29a950b779f7198cl },
-          0 },
-        /* 43 << 40 */
-        { { 0x19a6fb0984a3d1d5l,0x6c75c3a2ba5f5307l,0x7983485bf9698447l,
-            0x689f41b88b1cdc1el },
-          { 0x18f6fbd74c1979d0l,0x3e6be9a27a0b6708l,0x06acb615f63d5a8al,
-            0x8a817c098d0f64b1l },
-          0 },
-        /* 44 << 40 */
-        { { 0x1e5eb0d18be82e84l,0x89967f0e7a582fefl,0xbcf687d5a6e921fal,
-            0xdfee4cf3d37a09bal },
-          { 0x94f06965b493c465l,0x638b9a1c7635c030l,0x7666786466f05e9fl,
-            0xccaf6808c04da725l },
-          0 },
-        /* 45 << 40 */
-        { { 0xa9b3479b1b53a173l,0xc041eda3392eddc0l,0xdb8f804755edd7eel,
-            0xaf1f7a37ab60683cl },
-          { 0x9318603a72c0accbl,0xab1bb9fe401cbf3cl,0xc40e991e88afe245l,
-            0x9298a4580d06ac35l },
-          0 },
-        /* 46 << 40 */
-        { { 0x58e127d5036c2fe7l,0x5fe5020555b93361l,0xc1373d850f74a045l,
-            0x28cd79dbe8228e4bl },
-          { 0x0ae82320c2018d9al,0xf6d0049c78f8016al,0x381b6fe2149b31fbl,
-            0x33a0e8adec3cfbcfl },
-          0 },
-        /* 47 << 40 */
-        { { 0x23a6612e9eab5da7l,0xb645fe29d94d6431l,0xe3d74594ca1210c4l,
-            0xdc1376bceeca0674l },
-          { 0xfd40dfef657f0154l,0x7952a548d52cbac5l,0x0ee189583685ad28l,
-            0xd13639409ba9ca46l },
-          0 },
-        /* 48 << 40 */
-        { { 0xca2eb690768fccfcl,0xf402d37db835b362l,0x0efac0d0e2fdfccel,
-            0xefc9cdefb638d990l },
-          { 0x2af12b72d1669a8bl,0x33c536bc5774ccbdl,0x30b21909fb34870el,
-            0xc38fa2f77df25acal },
-          0 },
-        /* 49 << 40 */
-        { { 0x1337902f1c982cd6l,0x222e08fe14ec53eal,0x6c8abd0d330ef3e5l,
-            0xeb59e01531f6fd9dl },
-          { 0xd74ae554a8532df4l,0xbc010db1ab44c83el,0xe98016561b8f9285l,
-            0x65a9612783acc546l },
-          0 },
-        /* 51 << 40 */
-        { { 0x36a8b0a76770cfb1l,0x3338d52f9bb578fcl,0x5136c785f5ed12a4l,
-            0x652d47ed87bf129el },
-          { 0x9c6c827e6067c2d0l,0x61fc2f410345533al,0x2d7fb182130cea19l,
-            0x71a0186330b3ef85l },
-          0 },
-        /* 52 << 40 */
-        { { 0x74c5f02bbf81f3f5l,0x0525a5aeaf7e4581l,0x88d2aaba433c54ael,
-            0xed9775db806a56c5l },
-          { 0xd320738ac0edb37dl,0x25fdb6ee66cc1f51l,0xac661d1710600d76l,
-            0x931ec1f3bdd1ed76l },
-          0 },
-        /* 53 << 40 */
-        { { 0xb81e239161faa569l,0xb379f759bb40eebfl,0x9f2fd1b2a2c54549l,
-            0x0a968f4b0d6ba0ael },
-          { 0xaa869e6eedfe8c75l,0x0e36b298645ab173l,0x5a76282b0bcdefd7l,
-            0x9e949331d05293f2l },
-          0 },
-        /* 55 << 40 */
-        { { 0xc1cfa9a1c59fac6el,0x2648bffcb72747cel,0x5f8a39805f2e2637l,
-            0x8bd3a8eb73e65758l },
-          { 0xd9c43f1df14381a7l,0xecc1c3b0d6a86c10l,0xffcf4fa8a4a6dc74l,
-            0x7304fa834cea0a46l },
-          0 },
-        /* 57 << 40 */
-        { { 0x4460760c34dca952l,0xeac9cf2444c70444l,0xb879297b8493c87el,
-            0x295941a54b2dccb7l },
-          { 0x1e5cecede58721cdl,0xc8b58db74ca0d12bl,0x1927965c6da1d034l,
-            0x7220b02839ed1369l },
-          0 },
-        /* 59 << 40 */
-        { { 0xc38746c83c2e34b6l,0x9f27362e38a51042l,0x26febec02067afebl,
-            0xd9c4e15544e7371fl },
-          { 0x6035f469f92930d1l,0xe6ed7c08b4431b8bl,0xa25bf5903e16410dl,
-            0x147d83368adf4c18l },
-          0 },
-        /* 60 << 40 */
-        { { 0x7f01c9ecaa80ba59l,0x3083411a68538e51l,0x970370f1e88128afl,
-            0x625cc3db91dec14bl },
-          { 0xfef9666c01ac3107l,0xb2a8d577d5057ac3l,0xb0f2629992be5df7l,
-            0xf579c8e500353924l },
-          0 },
-        /* 61 << 40 */
-        { { 0xbd9398d6ca02669fl,0x896e053bf9ad11a1l,0xe024b699a3556f9fl,
-            0x23b4b96ad53cbca3l },
-          { 0x549d2d6c89733dd6l,0x3dae193f394f3179l,0x8bf7ec1cdfeda825l,
-            0xf6a1db7a8a4844b4l },
-          0 },
-        /* 63 << 40 */
-        { { 0x3b5403d56437a027l,0xda32bbd233ed30aal,0xd2ad3baa906de0cal,
-            0x3b6df514533f736el },
-          { 0x986f1cab5df9b9c4l,0x41cd2088970d330el,0xaae7c2238c20a923l,
-            0x52760a6e1e951dc0l },
-          0 },
-        /* 64 << 40 */
-        { { 0xb8fa3d931341ed7al,0x4223272ca7b59d49l,0x3dcb194783b8c4a4l,
-            0x4e413c01ed1302e4l },
-          { 0x6d999127e17e44cel,0xee86bf7533b3adfbl,0xf6902fe625aa96cal,
-            0xb73540e4e5aae47dl },
-          0 },
-        /* 65 << 40 */
-        { { 0x55318a525e34036cl,0xc3acafaaf9884e3fl,0xe5ba15cea042ba04l,
-            0x56a1d8960ada550el },
-          { 0xa5198cae87b76764l,0xd079d1f0b6fd84fbl,0xb22b637bcbe363edl,
-            0xbe8ab7d64499deaal },
-          0 },
-        /* 71 << 40 */
-        { { 0xbe8eba5eb4925f25l,0x00f8bf582e3159d6l,0xb1aa24fa18856070l,
-            0x22ea8b74e4c30b22l },
-          { 0x512f633e55bbe4e8l,0x82ba62318678aee9l,0xea05da90fdf72b7el,
-            0x616b9bc7a4fc65eel },
-          0 },
-        /* 77 << 40 */
-        { { 0xe31ee3b3b7c221e7l,0x10353824e353fa43l,0x9d2f3df69dd2a86fl,
-            0x8a12ab9322ccffecl },
-          { 0x25c8e326d666f9e5l,0x33ea98a0598da7fbl,0x2fc1de0917f74e17l,
-            0x0d0b6c7a35efb211l },
-          0 },
-        /* 83 << 40 */
-        { { 0x22a82c6c804e6ecel,0x824a170b1d8fce9el,0x621802becee65ed0l,
-            0x4a4e9e7895ec4285l },
-          { 0x8da0988fa8940b7al,0xaff89c5b86445aa5l,0x386fdbdad689cde9l,
-            0x3aeaae7d9f5caaccl },
-          0 },
-        /* 89 << 40 */
-        { { 0xe9cb9e68a7b62f4cl,0x515cae0ec3b7092el,0xb8abec354b491f52l,
-            0x672673fd01eeabc1l },
-          { 0x65e5739f7ad6e8a1l,0xc2da8e003d91b2f9l,0xcc43229cced84319l,
-            0x0f8cbf9574ccf2d1l },
-          0 },
-        /* 95 << 40 */
-        { { 0xb03d1cfb1b2f872al,0x88aef4670872b6f7l,0xaafe55e48ea9170cl,
-            0xd5cc4875f24aa689l },
-          { 0x7e5732908458ce84l,0xef4e143d58bfc16dl,0xc58626efaa222836l,
-            0x01c60ec0ca5e0cb8l },
-          0 },
-        /* 101 << 40 */
-        { { 0x123901aa36337c09l,0x1697acadd2f5e675l,0xc0a1ddd022fe2bael,
-            0xf68ea88cff0210ddl },
-          { 0x665d11e014168709l,0x912a575f45f25321l,0x7e7ed38070c78934l,
-            0x663d692cb0a46322l },
-          0 },
-        /* 107 << 40 */
-        { { 0x912ab8bd8642cba4l,0x97fab1a3b6b50b73l,0x76666b3cb86ef354l,
-            0x16d41330fa5ecce9l },
-          { 0x77c7c138c7da404bl,0xc6508cb78c983fb0l,0xe5881733f9004984l,
-            0x76dea7794182c7abl },
-          0 },
-        /* 113 << 40 */
-        { { 0x16db18583556b765l,0x39c18c200263755al,0x7b6691f591c15201l,
-            0x4e4c17b168514ea9l },
-          { 0xacbe449e06f5f20al,0xeb9119d2541ddfb6l,0x2f6e687bf2eac86fl,
-            0xb161471ec14ac508l },
-          0 },
-        /* 116 << 40 */
-        { { 0x58846d32c4744733l,0x40517c71379f9e34l,0x2f65655f130ef6cal,
-            0x526e4488f1f3503fl },
-          { 0x8467bd177ee4a976l,0x1d9dc913921363d1l,0xd8d24c33b069e041l,
-            0x5eb5da0a2cdf7f51l },
-          0 },
-        /* 119 << 40 */
-        { { 0x81c2cc32951ab3e7l,0xc86d9a109b0c7e87l,0x0b7a18bd606ef408l,
-            0x099b5bbfe6c2251el },
-          { 0x46d627d0bfce880fl,0xbfaddcbbe1c6865al,0xa9ab6183d2bb9a00l,
-            0x23cb9a2720ad9789l },
-          0 },
-        /* 125 << 40 */
-        { { 0x1592d0630c25fbebl,0x13869ec24995a3fal,0x6413f494861d0a73l,
-            0xa3b782342f9f1b89l },
-          { 0x113689e2b6cad351l,0x53be2014a873dcc1l,0xccf405e0c6bb1be7l,
-            0x4fff7b4ca9061ca9l },
-          0 },
-    },
-    {
-        /* 0 << 48 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 48 */
-        { { 0xcc7a64880a750c0fl,0x39bacfe34e548e83l,0x3d418c760c110f05l,
-            0x3e4daa4cb1f11588l },
-          { 0x2733e7b55ffc69ffl,0x46f147bc92053127l,0x885b2434d722df94l,
-            0x6a444f65e6fc6b7cl },
-          0 },
-        /* 3 << 48 */
-        { { 0x6d0b16f4bdaedfbdl,0x23fd326086746cedl,0x8bfb1d2fff4b3e17l,
-            0xc7f2ec2d019c14c8l },
-          { 0x3e0832f245104b0dl,0x5f00dafbadea2b7el,0x29e5cf6699fbfb0fl,
-            0x264f972361827cdal },
-          0 },
-        /* 4 << 48 */
-        { { 0x97b14f7ea90567e6l,0x513257b7b6ae5cb7l,0x85454a3c9f10903dl,
-            0xd8d2c9ad69bc3724l },
-          { 0x38da93246b29cb44l,0xb540a21d77c8cbacl,0x9bbfe43501918e42l,
-            0xfffa707a56c3614el },
-          0 },
-        /* 5 << 48 */
-        { { 0x6eb1a2f3e30bc27fl,0xe5f0c05ab0836511l,0x4d741bbf4965ab0el,
-            0xfeec41ca83464bbdl },
-          { 0x1aca705f99d0b09fl,0xc5d6cc56f42da5fal,0x49964eddcc52b931l,
-            0x8ae59615c884d8d8l },
-          0 },
-        /* 7 << 48 */
-        { { 0xf634b57b39f8868al,0xe27f4fd475cc69afl,0xa47e58cbd0d5496el,
-            0x8a26793fd323e07fl },
-          { 0xc61a9b72fa30f349l,0x94c9d9c9b696d134l,0x792beca85880a6d1l,
-            0xbdcc4645af039995l },
-          0 },
-        /* 9 << 48 */
-        { { 0xce7ef8e58c796c3cl,0x9adaae84dd66e57al,0x784ae13e45227f33l,
-            0xb046c5b82a85e757l },
-          { 0xb7aa50aeec37631fl,0xbedc4fca3b300758l,0x0f82567e0ac9700bl,
-            0x1071d9d44ff5f8d2l },
-          0 },
-        /* 10 << 48 */
-        { { 0x61360ee99e240d18l,0x057cdcacb4b94466l,0xe7667cd12fe5325cl,
-            0x1fa297b521974e3bl },
-          { 0xfa4081e7db083d76l,0x31993be6f206bd15l,0x8949269b14c19f8cl,
-            0x21468d72a9d92357l },
-          0 },
-        /* 11 << 48 */
-        { { 0xd09ef6c4e51a2811l,0x39f6862bb8fb66b9l,0x64e77f8d22dfaa99l,
-            0x7b10504461b08aacl },
-          { 0x71704e4c4a7df332l,0xd09734342ffe015bl,0xab0eaf4408d3020el,
-            0x28b1909eed63b97al },
-          0 },
-        /* 13 << 48 */
-        { { 0x2f3fa882cdadcd4fl,0xa4ef68595f631995l,0xe52ca2f9e531766fl,
-            0x20af5c3057e2c1d3l },
-          { 0x1e4828f6e51e94b8l,0xf900a1751a2f5d4fl,0xe831adb3392c58a0l,
-            0x4c5a90ca1b6e5866l },
-          0 },
-        /* 15 << 48 */
-        { { 0x5f3dcba86182827cl,0xd1a448ddbd7e7252l,0x2d8f96fcf493b815l,
-            0xba0a4c263b0aa95fl },
-          { 0x88a1514063a0007fl,0x9564c25e6a9c5846l,0x5a4d7b0fdc0fcbcal,
-            0x2275daa33f8a740el },
-          0 },
-        /* 16 << 48 */
-        { { 0x83f49167ceca9754l,0x426d2cf64b7939a0l,0x2555e355723fd0bfl,
-            0xa96e6d06c4f144e2l },
-          { 0x4768a8dd87880e61l,0x15543815e508e4d5l,0x09d7e772b1b65e15l,
-            0x63439dd6ac302fa0l },
-          0 },
-        /* 17 << 48 */
-        { { 0x159591cc0461086bl,0xb695aa9495e66e51l,0x2d4c946779ded531l,
-            0xbd2482ba89c2be79l },
-          { 0x8ee2658aa20bbf19l,0xc000528a32247917l,0xd924be4affeae845l,
-            0x51312bebed992c8bl },
-          0 },
-        /* 19 << 48 */
-        { { 0x3a01b958dc752bd9l,0x2babdbc20c215d45l,0xe689d79a131641c1l,
-            0x48e8f0da80e05ed4l },
-          { 0x4b505feb77bb70c4l,0xefbd3e2bb6057ef7l,0x7583e22dce603ca5l,
-            0xfbe3b1f22c5c70c7l },
-          0 },
-        /* 21 << 48 */
-        { { 0x8ec1ecf029e5e35al,0x2f3168e58645c2b3l,0xe9297362c7f94cb2l,
-            0x4fbf1466d1c90b39l },
-          { 0x3e4f7656920bae2al,0x805d04b9f1beb172l,0x729a7208dbdbd4b4l,
-            0x1aade45687aeca53l },
-          0 },
-        /* 23 << 48 */
-        { { 0xb0ff1f541934a508l,0x19e1397604bbf31al,0xb2a8e6033717a6b4l,
-            0xd601e45d0ef12cb9l },
-          { 0x563f0af5b515e98el,0x9b129db633984f9bl,0xe34aba2fa47e4a65l,
-            0xb56f82d19e3f9d82l },
-          0 },
-        /* 25 << 48 */
-        { { 0x0203effdb1209b86l,0x21f063edb19d6cbfl,0x59f53476980f275bl,
-            0x202456d7b7ac5e80l },
-          { 0xe5a8c05f4900edc9l,0x04c08eb470f01e86l,0xf74ac2241dcd98cel,
-            0x7e77cc0ce2e830dbl },
-          0 },
-        /* 27 << 48 */
-        { { 0x74e37234a9747edel,0x4fc9fbb1361b1013l,0xe7b533733cf357efl,
-            0x6aa2dd2c991c4193l },
-          { 0x7887e4d2a770917al,0xdd1809b4c20d24cbl,0x004cd7c38e9c2d3el,
-            0xc77c5baba9970abel },
-          0 },
-        /* 28 << 48 */
-        { { 0x20ac0351d598d710l,0x272c4166cb3a4da4l,0xdb82fe1aca71de1fl,
-            0x746e79f2d8f54b0fl },
-          { 0x6e7fc7364b573e9bl,0x75d03f46fd4b5040l,0x5c1cc36d0b98d87bl,
-            0x513ba3f11f472da1l },
-          0 },
-        /* 29 << 48 */
-        { { 0x52927eaac3af237fl,0xfaa06065d7398767l,0x042e72b497c6ce0bl,
-            0xdaed0cc40a9f2361l },
-          { 0xddc2e11c2fc1bb4al,0x631da5770c1a9ef8l,0x8a4cfe44680272bfl,
-            0xc76b9f7262fb5cc3l },
-          0 },
-        /* 31 << 48 */
-        { { 0x248f814538b3aae3l,0xb5345864bc204334l,0x66d6b5bc1d127524l,
-            0xe312080d14f572d3l },
-          { 0x13ed15a716abafebl,0x6f18ce27dba967bel,0x96c9e826ef08552dl,
-            0x2c191b06be2b63e0l },
-          0 },
-        /* 33 << 48 */
-        { { 0xde4be45dc115ca51l,0xa028cafe934dabd6l,0x7e875663d1c0f8c5l,
-            0xa8e32ab063d17473l },
-          { 0x33f55bd5543199aal,0x79d2c937a2071d6el,0xa6a6758ceff16f28l,
-            0x9c5f93ef87d85201l },
-          0 },
-        /* 34 << 48 */
-        { { 0x7f2e440381e9ede3l,0x243c3894caf6df0al,0x7c605bb11c073b11l,
-            0xcd06a541ba6a4a62l },
-          { 0x2916894949d4e2e5l,0x33649d074af66880l,0xbfc0c885e9a85035l,
-            0xb4e52113fc410f4bl },
-          0 },
-        /* 35 << 48 */
-        { { 0xe86f21bc3ad4c81el,0x53b408403a37dcebl,0xaa606087383402cdl,
-            0xc248caf185452b1dl },
-          { 0x38853772576b57cdl,0xe2798e5441b7a6edl,0x7c2f1eed95ef4a33l,
-            0xccd7e776adb1873cl },
-          0 },
-        /* 36 << 48 */
-        { { 0xdca3b70678a6513bl,0x92ea4a2a9edb1943l,0x02642216db6e2dd8l,
-            0x9b45d0b49fd57894l },
-          { 0x114e70dbc69d11ael,0x1477dd194c57595fl,0xbc2208b4ec77c272l,
-            0x95c5b4d7db68f59cl },
-          0 },
-        /* 37 << 48 */
-        { { 0xd978bb791c61030al,0xa47325d2218222f3l,0x65ad4d4832e67d97l,
-            0x31e4ed632e0d162al },
-          { 0x7308ea317f76da37l,0xcfdffe87d93f35d8l,0xf4b2d60ee6f96cc4l,
-            0x8028f3bd0117c421l },
-          0 },
-        /* 39 << 48 */
-        { { 0x7df80cbb9543edb6l,0xa07a54df40b0b3bcl,0xacbd067cc1888488l,
-            0x61ad61318a00c721l },
-          { 0x67e7599ebe2e6fe6l,0x8349d568f7270e06l,0x5630aabc307bc0c7l,
-            0x97210b3f71af442fl },
-          0 },
-        /* 40 << 48 */
-        { { 0xfe541fa47ea67c77l,0x952bd2afe3ea810cl,0x791fef568d01d374l,
-            0xa3a1c6210f11336el },
-          { 0x5ad0d5a9c7ec6d79l,0xff7038af3225c342l,0x003c6689bc69601bl,
-            0x25059bc745e8747dl },
-          0 },
-        /* 41 << 48 */
-        { { 0x58bdabb7ef701b5fl,0x64f987aee00c3a96l,0x533b391e2d585679l,
-            0x30ad79d97a862e03l },
-          { 0xd941471e8177b261l,0x33f65cb856a9018el,0x985ce9f607759fc4l,
-            0x9b085f33aefdbd9el },
-          0 },
-        /* 43 << 48 */
-        { { 0xab2fa51a9c43ee15l,0x457f338263f30575l,0xce8dcd863e75a6e0l,
-            0x67a03ab86e70421al },
-          { 0xe72c37893e174230l,0x45ffff6c066f4816l,0x3a3dd84879a2d4a7l,
-            0xefa4b7e68b76c24cl },
-          0 },
-        /* 44 << 48 */
-        { { 0x9a75c80676cb2566l,0x8f76acb1b24892d9l,0x7ae7b9cc1f08fe45l,
-            0x19ef73296a4907d8l },
-          { 0x2db4ab715f228bf0l,0xf3cdea39817032d7l,0x0b1f482edcabe3c0l,
-            0x3baf76b4bb86325cl },
-          0 },
-        /* 45 << 48 */
-        { { 0xd6be8f00e39e056al,0xb58f87a6232fa3bcl,0xd5cb09dc6b18c772l,
-            0x3177256da8e7e17bl },
-          { 0x1877fd34230bf92cl,0x6f9031175a36f632l,0x526a288728e2c9d9l,
-            0xc373fc94415ec45cl },
-          0 },
-        /* 46 << 48 */
-        { { 0xd49065e010089465l,0x3bab5d298e77c596l,0x7636c3a6193dbd95l,
-            0xdef5d294b246e499l },
-          { 0xb22c58b9286b2475l,0xa0b93939cd80862bl,0x3002c83af0992388l,
-            0x6de01f9beacbe14cl },
-          0 },
-        /* 47 << 48 */
-        { { 0x70fa6e2a2bf5e373l,0x501691739271694cl,0xd6ebb98c5d2ed9f1l,
-            0x11fd0b3f225bf92dl },
-          { 0x51ffbcea1e3d5520l,0xa7c549875513ad47l,0xe9689750b431d46dl,
-            0x6e69fecbb620cb9al },
-          0 },
-        /* 48 << 48 */
-        { { 0x6aac688eadd70482l,0x708de92a7b4a4e8al,0x75b6dd73758a6eefl,
-            0xea4bf352725b3c43l },
-          { 0x10041f2c87912868l,0xb1b1be95ef09297al,0x19ae23c5a9f3860al,
-            0xc4f0f839515dcf4bl },
-          0 },
-        /* 49 << 48 */
-        { { 0xf3c22398e04b5734l,0x4fba59b275f2579dl,0xbf95182d691901b3l,
-            0x4c139534eb599496l },
-          { 0xf3f821de33b77e8bl,0x66e580743785d42fl,0xe3ba3d5abdc89c2dl,
-            0x7ee988bdd19f37b9l },
-          0 },
-        /* 51 << 48 */
-        { { 0xe9ba62ca2ee53eb0l,0x64295ae23401d7dal,0x70ed8be24e493580l,
-            0x702caa624502732fl },
-          { 0xb1f4e21278d0cedfl,0x130b114bdc97057bl,0x9c5d0bd3c38c77b5l,
-            0xd9d641e18bad68e7l },
-          0 },
-        /* 52 << 48 */
-        { { 0xc71e27bf8538a5c6l,0x195c63dd89abff17l,0xfd3152851b71e3dal,
-            0x9cbdfda7fa680fa0l },
-          { 0x9db876ca849d7eabl,0xebe2764b3c273271l,0x663357e3f208dceal,
-            0x8c5bd833565b1b70l },
-          0 },
-        /* 53 << 48 */
-        { { 0x7c2dea1d122aebd4l,0x090bee4a138c1e4dl,0x94a9ffe59e4aca6cl,
-            0x8f3212ba5d405c7fl },
-          { 0x6618185f180b5e85l,0x76298d46f455ab9fl,0x0c804076476b2d88l,
-            0x45ea9d03d5a40b39l },
-          0 },
-        /* 55 << 48 */
-        { { 0xdf325ac76a2ed772l,0x35da47ccb0da2765l,0x94ce6f460bc9b166l,
-            0xe0fc82fb5f7f3628l },
-          { 0x2b26d588c055f576l,0xb9d37c97ec2bae98l,0xffbbead856908806l,
-            0xa8c2df87437f4c84l },
-          0 },
-        /* 57 << 48 */
-        { { 0x47d11c3528430994l,0x0183df71cf13d9d3l,0x98604c89aa138fe5l,
-            0xb1432e1c32c09aa1l },
-          { 0xf19bc45d99bd5e34l,0xb198be72108e9b89l,0xee500ae9dacde648l,
-            0x5936cf98746870a9l },
-          0 },
-        /* 59 << 48 */
-        { { 0x6d8efb98ed1d5a9bl,0x2e0b08e697f778fal,0xda728454dc5e0835l,
-            0x2c28a45f8e3651c4l },
-          { 0x667fab6f7ee77088l,0xd94429c8f29a94b4l,0xd83d594d9deea5b2l,
-            0x2dc08ccbbea58080l },
-          0 },
-        /* 60 << 48 */
-        { { 0xba5514df3fd165e8l,0x499fd6a9061f8811l,0x72cd1fe0bfef9f00l,
-            0x120a4bb979ad7e8al },
-          { 0xf2ffd0955f4a5ac5l,0xcfd174f195a7a2f0l,0xd42301ba9d17baf1l,
-            0xd2fa487a77f22089l },
-          0 },
-        /* 61 << 48 */
-        { { 0xfb5f53ba20a9a01el,0x3adb174fd20d6a9cl,0x6db8bb6d80e0f64fl,
-            0x596e428df6a26f76l },
-          { 0xbab1f846e6a4e362l,0x8bdb22af9b1becbdl,0x62b48335f31352adl,
-            0xd72c26409634f727l },
-          0 },
-        /* 63 << 48 */
-        { { 0xaaa61cb22b1ec1c3l,0x3b5156722cb6f00el,0x67d1be0a8bf83f60l,
-            0x88f1627aa4b804bcl },
-          { 0xc52b11a7cdade2abl,0xa6a8b71a606a4e9dl,0x04e0e6697b900551l,
-            0x35cfa33c8d5ad0d2l },
-          0 },
-        /* 64 << 48 */
-        { { 0xb93452381d531696l,0x57201c0088cdde69l,0xdde922519a86afc7l,
-            0xe3043895bd35cea8l },
-          { 0x7608c1e18555970dl,0x8267dfa92535935el,0xd4c60a57322ea38bl,
-            0xe0bf7977804ef8b5l },
-          0 },
-        /* 65 << 48 */
-        { { 0x375ca189b60f0d5al,0xc9458cf949a78362l,0x61c1c5024262c03al,
-            0x299353db4363d5bel },
-          { 0xe3565124dac407fel,0x16ea66cd5b93c532l,0xe5c6aec2749df8e3l,
-            0x59181317ce3ee4bfl },
-          0 },
-        /* 71 << 48 */
-        { { 0xd46ea34af41c2a3cl,0x9936184916545c98l,0xd7cb800ccf2498b4l,
-            0xe71d088d9353fe87l },
-          { 0x43443cbeae2e172cl,0x77131656ca905cb3l,0x76471fd1dce63594l,
-            0x346b1d1738f5e264l },
-          0 },
-        /* 77 << 48 */
-        { { 0x22b1e639f6d0a419l,0x8bbb1fad7cea278cl,0xf07f6c01370cc86al,
-            0x661bd027d39b837fl },
-          { 0x042c7a69de606098l,0x93433b154e44eb12l,0x20f44ada88d8bfe8l,
-            0xb44f66e64ccbfab6l },
-          0 },
-        /* 83 << 48 */
-        { { 0x1cc32158583d9745l,0x9306223cad1c2201l,0x76aa8d0995748039l,
-            0x29425391707e9b59l },
-          { 0x8501c0d4487cdf9el,0xbe08e89c205c5611l,0xa950400b04ccc48bl,
-            0xb614b69b637e966bl },
-          0 },
-        /* 89 << 48 */
-        { { 0xd9c3c1238ffa5c4bl,0xc65765f7f3593988l,0x9a7e5d2728242119l,
-            0x0ad27b5097ad7620l },
-          { 0x154cc5eb413a8b23l,0xae93d8de7afa8254l,0x9ce5116cab9907b5l,
-            0x9a163d78063103b9l },
-          0 },
-        /* 95 << 48 */
-        { { 0x5c4c299291086d2al,0x42c6ca9de8e2d951l,0xe67ecf93dd353f30l,
-            0xba54557fe7167c2el },
-          { 0x04a7eb2db734c779l,0x8f345605e300711al,0x4811c1ad67b27de6l,
-            0xb7ac8e842731d5f0l },
-          0 },
-        /* 101 << 48 */
-        { { 0xee33a1d8e449ac46l,0x2500ba0aaaebfa2dl,0x8fb914ebc424eff4l,
-            0x3a36545d3989255el },
-          { 0xd24f2484761235e6l,0x2fc5d5ddd9b2c04bl,0x73660f86070ab0dbl,
-            0x2e266d0479d20c7bl },
-          0 },
-        /* 107 << 48 */
-        { { 0x143752d5316d19a3l,0x56a55e01915497b8l,0x44ba4b2609a5fd15l,
-            0xe4fc3e7fd9bee4eel },
-          { 0x6f9d8609878a9f26l,0xdf36b5bd2ede7a20l,0x8e03e712a9a3e435l,
-            0x4ced555b56546d33l },
-          0 },
-        /* 113 << 48 */
-        { { 0x89a6aaab0882717el,0x56a9736b43fa5153l,0xdb07dcc9d0e1fb1al,
-            0xe7c986d34145e227l },
-          { 0x57be66abb10dad51l,0xa47b964e4aa01ea7l,0xd851d9f36bb837cbl,
-            0x9851ab3d652e13f7l },
-          0 },
-        /* 116 << 48 */
-        { { 0x22b88a805616ee30l,0xfb09548fe7ab1083l,0x8ad6ab0d511270cdl,
-            0x61f6c57a6924d9abl },
-          { 0xa0f7bf7290aecb08l,0x849f87c90df784a4l,0x27c79c15cfaf1d03l,
-            0xbbf9f675c463facel },
-          0 },
-        /* 119 << 48 */
-        { { 0x65512fb716dd6ce1l,0xfa76ebc960d53b35l,0x31e5322e19ada3bel,
-            0x7e259b75d0ccc3cdl },
-          { 0xd36d03f0e025fd69l,0xbefab782eea9e5f3l,0x1569969dd09ce6a7l,
-            0x2df5396178c385b0l },
-          0 },
-        /* 125 << 48 */
-        { { 0x4201652fce0ccac7l,0x12f8e93df1d29d2dl,0x6c2ac9b2220f00c1l,
-            0x4ee6a685a850baa9l },
-          { 0x2c2371f163ee8829l,0xddff16488f464433l,0xeab6cd8869a2c413l,
-            0xcae34beb85e4c2a8l },
-          0 },
-    },
-    {
-        /* 0 << 56 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 56 */
-        { { 0xc7913e91991724f3l,0x5eda799c39cbd686l,0xddb595c763d4fc1el,
-            0x6b63b80bac4fed54l },
-          { 0x6ea0fc697e5fb516l,0x737708bad0f1c964l,0x9628745f11a92ca5l,
-            0x61f379589a86967al },
-          0 },
-        /* 3 << 56 */
-        { { 0x46a8c4180d738dedl,0x6f1a5bb0e0de5729l,0xf10230b98ba81675l,
-            0x32c6f30c112b33d4l },
-          { 0x7559129dd8fffb62l,0x6a281b47b459bf05l,0x77c1bd3afa3b6776l,
-            0x0709b3807829973al },
-          0 },
-        /* 4 << 56 */
-        { { 0x8c26b232a3326505l,0x38d69272ee1d41bfl,0x0459453effe32afal,
-            0xce8143ad7cb3ea87l },
-          { 0x932ec1fa7e6ab666l,0x6cd2d23022286264l,0x459a46fe6736f8edl,
-            0x50bf0d009eca85bbl },
-          0 },
-        /* 5 << 56 */
-        { { 0x0b825852877a21ecl,0x300414a70f537a94l,0x3f1cba4021a9a6a2l,
-            0x50824eee76943c00l },
-          { 0xa0dbfcecf83cba5dl,0xf953814893b4f3c0l,0x6174416248f24dd7l,
-            0x5322d64de4fb09ddl },
-          0 },
-        /* 7 << 56 */
-        { { 0xa337c447f1f0ced1l,0x800cc7939492dd2bl,0x4b93151dbea08efal,
-            0x820cf3f8de0a741el },
-          { 0xff1982dc1c0f7d13l,0xef92196084dde6cal,0x1ad7d97245f96ee3l,
-            0x319c8dbe29dea0c7l },
-          0 },
-        /* 9 << 56 */
-        { { 0x0ae1d63b0eb919b0l,0xd74ee51da74b9620l,0x395458d0a674290cl,
-            0x324c930f4620a510l },
-          { 0x2d1f4d19fbac27d4l,0x4086e8ca9bedeeacl,0x0cdd211b9b679ab8l,
-            0x5970167d7090fec4l },
-          0 },
-        /* 10 << 56 */
-        { { 0x3420f2c9faf1fc63l,0x616d333a328c8bb4l,0x7d65364c57f1fe4al,
-            0x9343e87755e5c73al },
-          { 0x5795176be970e78cl,0xa36ccebf60533627l,0xfc7c738009cdfc1bl,
-            0xb39a2afeb3fec326l },
-          0 },
-        /* 11 << 56 */
-        { { 0xb7ff1ba16224408al,0xcc856e92247cfc5el,0x01f102e7c18bc493l,
-            0x4613ab742091c727l },
-          { 0xaa25e89cc420bf2bl,0x00a5317690337ec2l,0xd2be9f437d025fc7l,
-            0x3316fb856e6fe3dcl },
-          0 },
-        /* 13 << 56 */
-        { { 0x67332cfc2064cfd1l,0x339c31deb0651934l,0x719b28d52a3bcbeal,
-            0xee74c82b9d6ae5c6l },
-          { 0x0927d05ebaf28ee6l,0x82cecf2c9d719028l,0x0b0d353eddb30289l,
-            0xfe4bb977fddb2e29l },
-          0 },
-        /* 15 << 56 */
-        { { 0xe10b2ab817a91cael,0xb89aab6508e27f63l,0x7b3074a7dba3ddf9l,
-            0x1c20ce09330c2972l },
-          { 0x6b9917b45fcf7e33l,0xe6793743945ceb42l,0x18fc22155c633d19l,
-            0xad1adb3cc7485474l },
-          0 },
-        /* 16 << 56 */
-        { { 0x646f96796424c49bl,0xf888dfe867c241c9l,0xe12d4b9324f68b49l,
-            0x9a6b62d8a571df20l },
-          { 0x81b4b26d179483cbl,0x666f96329511fae2l,0xd281b3e4d53aa51fl,
-            0x7f96a7657f3dbd16l },
-          0 },
-        /* 17 << 56 */
-        { { 0xa7f8b5bf074a30cel,0xd7f52107005a32e6l,0x6f9e090750237ed4l,
-            0x2f21da478096fa2bl },
-          { 0xf3e19cb4eec863a0l,0xd18f77fd9527620al,0x9505c81c407c1cf8l,
-            0x9998db4e1b6ec284l },
-          0 },
-        /* 19 << 56 */
-        { { 0x794e2d5984ac066cl,0xf5954a92e68c69a0l,0x28c524584fd99dccl,
-            0x60e639fcb1012517l },
-          { 0xc2e601257de79248l,0xe9ef6404f12fc6d7l,0x4c4f28082a3b5d32l,
-            0x865ad32ec768eb8al },
-          0 },
-        /* 21 << 56 */
-        { { 0x4f4ddf91b2f1ac7al,0xf99eaabb760fee27l,0x57f4008a49c228e5l,
-            0x090be4401cf713bbl },
-          { 0xac91fbe45004f022l,0xd838c2c2569e1af6l,0xd6c7d20b0f1daaa5l,
-            0xaa063ac11bbb02c0l },
-          0 },
-        /* 23 << 56 */
-        { { 0x54935fcb81d73c9el,0x6d07e9790a5e97abl,0x4dc7b30acf3a6babl,
-            0x147ab1f3170bee11l },
-          { 0x0aaf8e3d9fafdee4l,0xfab3dbcb538a8b95l,0x405df4b36ef13871l,
-            0xf1f4e9cb088d5a49l },
-          0 },
-        /* 25 << 56 */
-        { { 0x43c01b87459afccdl,0x6bd45143b7432652l,0x8473453055b5d78el,
-            0x81088fdb1554ba7dl },
-          { 0xada0a52c1e269375l,0xf9f037c42dc5ec10l,0xc066060794bfbc11l,
-            0xc0a630bbc9c40d2fl },
-          0 },
-        /* 27 << 56 */
-        { { 0x9a730ed44763eb50l,0x24a0e221c1ab0d66l,0x643b6393648748f3l,
-            0x1982daa16d3c6291l },
-          { 0x6f00a9f78bbc5549l,0x7a1783e17f36384el,0xe8346323de977f50l,
-            0x91ab688db245502al },
-          0 },
-        /* 28 << 56 */
-        { { 0x331ab6b56d0bdd66l,0x0a6ef32e64b71229l,0x1028150efe7c352fl,
-            0x27e04350ce7b39d3l },
-          { 0x2a3c8acdc1070c82l,0xfb2034d380c9feefl,0x2d729621709f3729l,
-            0x8df290bf62cb4549l },
-          0 },
-        /* 29 << 56 */
-        { { 0x02f99f33fc2e4326l,0x3b30076d5eddf032l,0xbb21f8cf0c652fb5l,
-            0x314fb49eed91cf7bl },
-          { 0xa013eca52f700750l,0x2b9e3c23712a4575l,0xe5355557af30fbb0l,
-            0x1ada35167c77e771l },
-          0 },
-        /* 31 << 56 */
-        { { 0xdc9f46fc609e4a74l,0x2a44a143ba667f91l,0xbc3d8b95b4d83436l,
-            0xa01e4bd0c7bd2958l },
-          { 0x7b18293273483c90l,0xa79c6aa1a7c7b598l,0xbf3983c6eaaac07el,
-            0x8f18181e96e0d4e6l },
-          0 },
-        /* 33 << 56 */
-        { { 0x0bfc27eeacee5043l,0xae419e732eb10f02l,0x19c028d18943fb05l,
-            0x71f01cf7ff13aa2al },
-          { 0x7790737e8887a132l,0x6751330966318410l,0x9819e8a37ddb795el,
-            0xfecb8ef5dad100b2l },
-          0 },
-        /* 34 << 56 */
-        { { 0x59f74a223021926al,0xb7c28a496f9b4c1cl,0xed1a733f912ad0abl,
-            0x42a910af01a5659cl },
-          { 0x3842c6e07bd68cabl,0x2b57fa3876d70ac8l,0x8a6707a83c53aaebl,
-            0x62c1c51065b4db18l },
-          0 },
-        /* 35 << 56 */
-        { { 0x8de2c1fbb2d09dc7l,0xc3dfed12266bd23bl,0x927d039bd5b27db6l,
-            0x2fb2f0f1103243dal },
-          { 0xf855a07b80be7399l,0xed9327ce1f9f27a8l,0xa0bd99c7729bdef7l,
-            0x2b67125e28250d88l },
-          0 },
-        /* 36 << 56 */
-        { { 0x784b26e88670ced7l,0xe3dfe41fc31bd3b4l,0x9e353a06bcc85cbcl,
-            0x302e290960178a9dl },
-          { 0x860abf11a6eac16el,0x76447000aa2b3aacl,0x46ff9d19850afdabl,
-            0x35bdd6a5fdb2d4c1l },
-          0 },
-        /* 37 << 56 */
-        { { 0xe82594b07e5c9ce9l,0x0f379e5320af346el,0x608b31e3bc65ad4al,
-            0x710c6b12267c4826l },
-          { 0x51c966f971954cf1l,0xb1cec7930d0aa215l,0x1f15598986bd23a8l,
-            0xae2ff99cf9452e86l },
-          0 },
-        /* 39 << 56 */
-        { { 0xb5a741a76b2515cfl,0x71c416019585c749l,0x78350d4fe683de97l,
-            0x31d6152463d0b5f5l },
-          { 0x7a0cc5e1fbce090bl,0xaac927edfbcb2a5bl,0xe920de4920d84c35l,
-            0x8c06a0b622b4de26l },
-          0 },
-        /* 40 << 56 */
-        { { 0xd34dd58bafe7ddf3l,0x55851fedc1e6e55bl,0xd1395616960696e7l,
-            0x940304b25f22705fl },
-          { 0x6f43f861b0a2a860l,0xcf1212820e7cc981l,0x121862120ab64a96l,
-            0x09215b9ab789383cl },
-          0 },
-        /* 41 << 56 */
-        { { 0x311eb30537387c09l,0xc5832fcef03ee760l,0x30358f5832f7ea19l,
-            0xe01d3c3491d53551l },
-          { 0x1ca5ee41da48ea80l,0x34e71e8ecf4fa4c1l,0x312abd257af1e1c7l,
-            0xe3afcdeb2153f4a5l },
-          0 },
-        /* 43 << 56 */
-        { { 0x2a17747fa6d74081l,0x60ea4c0555a26214l,0x53514bb41f88c5fel,
-            0xedd645677e83426cl },
-          { 0xd5d6cbec96460b25l,0xa12fd0ce68dc115el,0xc5bc3ed2697840eal,
-            0x969876a8a6331e31l },
-          0 },
-        /* 44 << 56 */
-        { { 0x60c36217472ff580l,0xf42297054ad41393l,0x4bd99ef0a03b8b92l,
-            0x501c7317c144f4f6l },
-          { 0x159009b318464945l,0x6d5e594c74c5c6bel,0x2d587011321a3660l,
-            0xd1e184b13898d022l },
-          0 },
-        /* 45 << 56 */
-        { { 0x5ba047524c6a7e04l,0x47fa1e2b45550b65l,0x9419daf048c0a9a5l,
-            0x663629537c243236l },
-          { 0xcd0744b15cb12a88l,0x561b6f9a2b646188l,0x599415a566c2c0c0l,
-            0xbe3f08590f83f09al },
-          0 },
-        /* 46 << 56 */
-        { { 0x9141c5beb92041b8l,0x01ae38c726477d0dl,0xca8b71f3d12c7a94l,
-            0xfab5b31f765c70dbl },
-          { 0x76ae7492487443e9l,0x8595a310990d1349l,0xf8dbeda87d460a37l,
-            0x7f7ad0821e45a38fl },
-          0 },
-        /* 47 << 56 */
-        { { 0xed1d4db61059705al,0xa3dd492ae6b9c697l,0x4b92ee3a6eb38bd5l,
-            0xbab2609d67cc0bb7l },
-          { 0x7fc4fe896e70ee82l,0xeff2c56e13e6b7e3l,0x9b18959e34d26fcal,
-            0x2517ab66889d6b45l },
-          0 },
-        /* 48 << 56 */
-        { { 0xf167b4e0bdefdd4fl,0x69958465f366e401l,0x5aa368aba73bbec0l,
-            0x121487097b240c21l },
-          { 0x378c323318969006l,0xcb4d73cee1fe53d1l,0x5f50a80e130c4361l,
-            0xd67f59517ef5212bl },
-          0 },
-        /* 49 << 56 */
-        { { 0xf145e21e9e70c72el,0xb2e52e295566d2fbl,0x44eaba4a032397f5l,
-            0x5e56937b7e31a7del },
-          { 0x68dcf517456c61e1l,0xbc2e954aa8b0a388l,0xe3552fa760a8b755l,
-            0x03442dae73ad0cdel },
-          0 },
-        /* 51 << 56 */
-        { { 0x3fcbdbce478e2135l,0x7547b5cfbda35342l,0xa97e81f18a677af6l,
-            0xc8c2bf8328817987l },
-          { 0xdf07eaaf45580985l,0xc68d1f05c93b45cbl,0x106aa2fec77b4cacl,
-            0x4c1d8afc04a7ae86l },
-          0 },
-        /* 52 << 56 */
-        { { 0xdb41c3fd9eb45ab2l,0x5b234b5bd4b22e74l,0xda253decf215958al,
-            0x67e0606ea04edfa0l },
-          { 0xabbbf070ef751b11l,0xf352f175f6f06dcel,0xdfc4b6af6839f6b4l,
-            0x53ddf9a89959848el },
-          0 },
-        /* 53 << 56 */
-        { { 0xda49c379c21520b0l,0x90864ff0dbd5d1b6l,0x2f055d235f49c7f7l,
-            0xe51e4e6aa796b2d8l },
-          { 0xc361a67f5c9dc340l,0x5ad53c37bca7c620l,0xda1d658832c756d0l,
-            0xad60d9118bb67e13l },
-          0 },
-        /* 55 << 56 */
-        { { 0xd1183316fd6f7140l,0xf9fadb5bbd8e81f7l,0x701d5e0c5a02d962l,
-            0xfdee4dbf1b601324l },
-          { 0xbed1740735d7620el,0x04e3c2c3f48c0012l,0x9ee29da73455449al,
-            0x562cdef491a836c4l },
-          0 },
-        /* 57 << 56 */
-        { { 0x147ebf01fad097a5l,0x49883ea8610e815dl,0xe44d60ba8a11de56l,
-            0xa970de6e827a7a6dl },
-          { 0x2be414245e17fc19l,0xd833c65701214057l,0x1375813b363e723fl,
-            0x6820bb88e6a52e9bl },
-          0 },
-        /* 59 << 56 */
-        { { 0xe1b6f60c08191224l,0xc4126ebbde4ec091l,0xe1dff4dc4ae38d84l,
-            0xde3f57db4f2ef985l },
-          { 0x34964337d446a1ddl,0x7bf217a0859e77f6l,0x8ff105278e1d13f5l,
-            0xa304ef0374eeae27l },
-          0 },
-        /* 60 << 56 */
-        { { 0xfc6f5e47d19dfa5al,0xdb007de37fad982bl,0x28205ad1613715f5l,
-            0x251e67297889529el },
-          { 0x727051841ae98e78l,0xf818537d271cac32l,0xc8a15b7eb7f410f5l,
-            0xc474356f81f62393l },
-          0 },
-        /* 61 << 56 */
-        { { 0x92dbdc5ac242316bl,0xabe060acdbf4aff5l,0x6e8c38fe909a8ec6l,
-            0x43e514e56116cb94l },
-          { 0x2078fa3807d784f9l,0x1161a880f4b5b357l,0x5283ce7913adea3dl,
-            0x0756c3e6cc6a910bl },
-          0 },
-        /* 63 << 56 */
-        { { 0xa573a4966d17fbc7l,0x0cd1a70a73d2b24el,0x34e2c5cab2676937l,
-            0xe7050b06bf669f21l },
-          { 0xfbe948b61ede9046l,0xa053005197662659l,0x58cbd4edf10124c5l,
-            0xde2646e4dd6c06c8l },
-          0 },
-        /* 64 << 56 */
-        { { 0x332f81088cad38c0l,0x471b7e906bd68ae2l,0x56ac3fb20d8e27a3l,
-            0xb54660db136b4b0dl },
-          { 0x123a1e11a6fd8de4l,0x44dbffeaa37799efl,0x4540b977ce6ac17cl,
-            0x495173a8af60acefl },
-          0 },
-        /* 65 << 56 */
-        { { 0xc48b1478db447d0bl,0xe1b85f5d46104fbbl,0x4ab31e7d991c60b9l,
-            0xaa674a9258a0cfd0l },
-          { 0x179fc2cd316f4297l,0x90c18642dcccbc82l,0x65d4309e56a4c163l,
-            0xf211a9c7145a33ecl },
-          0 },
-        /* 71 << 56 */
-        { { 0x9669170cdc32717fl,0x52d69b5138133e34l,0xaed24e5fb079c3b2l,
-            0xaba44a91a21ea3d2l },
-          { 0xd6814f1938d40105l,0x38289fe463462e7al,0x1793eefa3a80cbf5l,
-            0x05816a0795f29bacl },
-          0 },
-        /* 77 << 56 */
-        { { 0xdca88ad98f850641l,0x8c1152c447999b0dl,0x509f654e654aff33l,
-            0x2228550f08a12f14l },
-          { 0x60fe99dbb6a0ccdbl,0x80d6829bfc2cddccl,0x190f454dd5617aa4l,
-            0x0aea05fe36295d2dl },
-          0 },
-        /* 83 << 56 */
-        { { 0x1de06c8af9bef9a5l,0xe24d85d3fb2d3164l,0x3dbe455e8d203d3el,
-            0x439bee4735ea47a9l },
-          { 0xcc143432784893d7l,0x9b71073bd9bebd00l,0x6c106b343aa2fe88l,
-            0x9df2a42734746f7al },
-          0 },
-        /* 89 << 56 */
-        { { 0x1ad0b3725a8c2168l,0x64e52d6d143f0402l,0xd933c783e320f31fl,
-            0x1ccf90a80ff14f52l },
-          { 0xd3a3133ee1e6d0c0l,0xfd75a2d5b4acc8cal,0x62659b8e5559d171l,
-            0x5087d6e9f13ad52al },
-          0 },
-        /* 95 << 56 */
-        { { 0xb4d647a5deef31a4l,0x95bf4ab180975ea9l,0x2f92d15adf57b03el,
-            0x5ee808ab746b26d6l },
-          { 0x4341597c1082f261l,0x027795eb40c45e95l,0xcb77744b3b690c30l,
-            0xdd87c084af3f88d1l },
-          0 },
-        /* 101 << 56 */
-        { { 0x469f177572109785l,0xf365e55123f84d6cl,0x8006a9c28a046dbbl,
-            0x1b9fbe892fa09f52l },
-          { 0xac18a88016075e9el,0x4a3069bc1e3fd628l,0x20c61eaa60c61c14l,
-            0x315b59daf61f004bl },
-          0 },
-        /* 107 << 56 */
-        { { 0x0a94387f26d04857l,0x952a4ebc43d6de95l,0xb422e15cf14abdfal,
-            0x5b7a0153324ef90cl },
-          { 0x6aefa20e9826ec5bl,0x0e529886ad2fe161l,0xb710a74ec0d416e8l,
-            0x6cf4b0a5fb6c90bcl },
-          0 },
-        /* 113 << 56 */
-        { { 0x822aea4031979d3bl,0xb504eafde215a109l,0xa8761ead84bf2377l,
-            0xb55c1e55efb3d942l },
-          { 0xd01f9b0212b7f17bl,0x41b62c2a891bfbbfl,0x50800e6b08938149l,
-            0x527b50a9b0a55d82l },
-          0 },
-        /* 116 << 56 */
-        { { 0x6bc84d8d1d9ce3c4l,0x53b465072a308df0l,0x6c3da9bfca79c88al,
-            0x9636ad9c36372acfl },
-          { 0x8840e92c425ef14cl,0x863191f96af3225bl,0xd56d82d0d369b857l,
-            0x2053a2527a4c41f9l },
-          0 },
-        /* 119 << 56 */
-        { { 0x20aecd6609ca8805l,0x945d9b31dc818ee6l,0x1424647c2119b44bl,
-            0xbe934d7e5a6641f9l },
-          { 0xe91d53184559e55el,0xc2fb8e0b4dfbc3d4l,0x9e92e20676cb937fl,
-            0x0f5582e4f2932429l },
-          0 },
-        /* 125 << 56 */
-        { { 0xb5fc22a42d31809fl,0x6d582d2b0e35b7b4l,0x5fac415158c5f576l,
-            0xdff239371e4cd7c9l },
-          { 0x0f62b329ed4d1925l,0x00994a2e6010fb16l,0xb4b91076bd754837l,
-            0xfde219463345103al },
-          0 },
-    },
-    {
-        /* 0 << 64 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 64 */
-        { { 0x4f922fc516a0d2bbl,0x0d5cc16c1a623499l,0x9241cf3a57c62c8bl,
-            0x2f5e6961fd1b667fl },
-          { 0x5c15c70bf5a01797l,0x3d20b44d60956192l,0x04911b37071fdb52l,
-            0xf648f9168d6f0f7bl },
-          0 },
-        /* 3 << 64 */
-        { { 0x4090914bb5def996l,0x1cb69c83233dd1e7l,0xc1e9c1d39b3d5e76l,
-            0x1f3338edfccf6012l },
-          { 0xb1e95d0d2f5378a8l,0xacf4c2c72f00cd21l,0x6e984240eb5fe290l,
-            0xd66c038d248088ael },
-          0 },
-        /* 4 << 64 */
-        { { 0x9ad5462bb4d8bc50l,0x181c0b16a9195770l,0xebd4fe1c78412a68l,
-            0xae0341bcc0dff48cl },
-          { 0xb6bc45cf7003e866l,0xf11a6dea8a24a41bl,0x5407151ad04c24c2l,
-            0x62c9d27dda5b7b68l },
-          0 },
-        /* 5 << 64 */
-        { { 0xd4992b30614c0900l,0xda98d121bd00c24bl,0x7f534dc87ec4bfa1l,
-            0x4a5ff67437dc34bcl },
-          { 0x68c196b81d7ea1d7l,0x38cf289380a6d208l,0xfd56cd09e3cbbd6el,
-            0xec72e27e4205a5b6l },
-          0 },
-        /* 7 << 64 */
-        { { 0xe8b97932b88756ddl,0xed4e8652f17e3e61l,0xc2dd14993ee1c4a4l,
-            0xc0aaee17597f8c0el },
-          { 0x15c4edb96c168af3l,0x6563c7bfb39ae875l,0xadfadb6f20adb436l,
-            0xad55e8c99a042ac0l },
-          0 },
-        /* 9 << 64 */
-        { { 0x65c29219909523c8l,0xa62f648fa3a1c741l,0x88598d4f60c9e55al,
-            0xbce9141b0e4f347al },
-          { 0x9af97d8435f9b988l,0x0210da62320475b6l,0x3c076e229191476cl,
-            0x7520dbd944fc7834l },
-          0 },
-        /* 10 << 64 */
-        { { 0x87a7ebd1e0a1b12al,0x1e4ef88d770ba95fl,0x8c33345cdc2ae9cbl,
-            0xcecf127601cc8403l },
-          { 0x687c012e1b39b80fl,0xfd90d0ad35c33ba4l,0xa3ef5a675c9661c2l,
-            0x368fc88ee017429el },
-          0 },
-        /* 11 << 64 */
-        { { 0x664300b07850ec06l,0xac5a38b97d3a10cfl,0x9233188de34ab39dl,
-            0xe77057e45072cbb9l },
-          { 0xbcf0c042b59e78dfl,0x4cfc91e81d97de52l,0x4661a26c3ee0ca4al,
-            0x5620a4c1fb8507bcl },
-          0 },
-        /* 13 << 64 */
-        { { 0x84b9ca1504b6c5a0l,0x35216f3918f0e3a3l,0x3ec2d2bcbd986c00l,
-            0x8bf546d9d19228fel },
-          { 0xd1c655a44cd623c3l,0x366ce718502b8e5al,0x2cfc84b4eea0bfe7l,
-            0xe01d5ceecf443e8el },
-          0 },
-        /* 15 << 64 */
-        { { 0xa75feacabe063f64l,0x9b392f43bce47a09l,0xd42415091ad07acal,
-            0x4b0c591b8d26cd0fl },
-          { 0x2d42ddfd92f1169al,0x63aeb1ac4cbf2392l,0x1de9e8770691a2afl,
-            0xebe79af7d98021dal },
-          0 },
-        /* 16 << 64 */
-        { { 0x58af2010f5b343bcl,0x0f2e400af2f142fel,0x3483bfdea85f4bdfl,
-            0xf0b1d09303bfeaa9l },
-          { 0x2ea01b95c7081603l,0xe943e4c93dba1097l,0x47be92adb438f3a6l,
-            0x00bb7742e5bf6636l },
-          0 },
-        /* 17 << 64 */
-        { { 0x66917ce63b5f1cc4l,0x37ae52eace872e62l,0xbb087b722905f244l,
-            0x120770861e6af74fl },
-          { 0x4b644e491058edeal,0x827510e3b638ca1dl,0x8cf2b7046038591cl,
-            0xffc8b47afe635063l },
-          0 },
-        /* 19 << 64 */
-        { { 0x7677408d6dfafed3l,0x33a0165339661588l,0x3c9c15ec0b726fa0l,
-            0x090cfd936c9b56dal },
-          { 0xe34f4baea3c40af5l,0x3469eadbd21129f1l,0xcc51674a1e207ce8l,
-            0x1e293b24c83b1ef9l },
-          0 },
-        /* 21 << 64 */
-        { { 0x796d3a85825808bdl,0x51dc3cb73fd6e902l,0x643c768a916219d1l,
-            0x36cd7685a2ad7d32l },
-          { 0xe3db9d05b22922a4l,0x6494c87edba29660l,0xf0ac91dfbcd2ebc7l,
-            0x4deb57a045107f8dl },
-          0 },
-        /* 23 << 64 */
-        { { 0xb6c69ac82094cec3l,0x9976fb88403b770cl,0x1dea026c4859590dl,
-            0xb6acbb468562d1fdl },
-          { 0x7cd6c46144569d85l,0xc3190a3697f0891dl,0xc6f5319548d5a17dl,
-            0x7d919966d749abc8l },
-          0 },
-        /* 25 << 64 */
-        { { 0xb53b7de561906373l,0x858dbadeeb999595l,0x8cbb47b2a59e5c36l,
-            0x660318b3dcf4e842l },
-          { 0xbd161ccd12ba4b7al,0xf399daabf8c8282al,0x1587633aeeb2130dl,
-            0xa465311ada38dd7dl },
-          0 },
-        /* 27 << 64 */
-        { { 0x2dae9082be7cf3a6l,0xcc86ba92bc967274l,0xf28a2ce8aea0a8a9l,
-            0x404ca6d96ee988b3l },
-          { 0xfd7e9c5d005921b8l,0xf56297f144e79bf9l,0xa163b4600d75ddc2l,
-            0x30b23616a1f2be87l },
-          0 },
-        /* 28 << 64 */
-        { { 0x19e6125dec3f1decl,0x07b1f040911178dal,0xd93ededa904a6738l,
-            0x55187a5a0bebedcdl },
-          { 0xf7d04722eb329d41l,0xf449099ef170b391l,0xfd317a69ca99f828l,
-            0x50c3db2b34a4976dl },
-          0 },
-        /* 29 << 64 */
-        { { 0x0064d8585499fb32l,0x7b67bad977a8aeb7l,0x1d3eb9772d08eec5l,
-            0x5fc047a6cbabae1dl },
-          { 0x0577d159e54a64bbl,0x8862201bc43497e4l,0xad6b4e282ce0608dl,
-            0x8b687b7d0b167aacl },
-          0 },
-        /* 31 << 64 */
-        { { 0xe9f9669cda94951el,0x4b6af58d66b8d418l,0xfa32107417d426a4l,
-            0xc78e66a99dde6027l },
-          { 0x0516c0834a53b964l,0xfc659d38ff602330l,0x0ab55e5c58c5c897l,
-            0x985099b2838bc5dfl },
-          0 },
-        /* 33 << 64 */
-        { { 0xe7a935fa1684cb3bl,0x571650b5a7d7e69dl,0x6ba9ffa40328c168l,
-            0xac43f6bc7e46f358l },
-          { 0x54f75e567cb6a779l,0x4e4e2cc8c61320del,0xb94258bc2b8903d0l,
-            0xc7f32d57ceecabe0l },
-          0 },
-        /* 34 << 64 */
-        { { 0x34739f16cd7d9d89l,0x6daab4267ca080b5l,0x772086ff40e19f45l,
-            0x43caa56118c61b42l },
-          { 0x0ba3d4a8dbf365f1l,0xa0db435ee760ad97l,0xfd6f30d56916c59bl,
-            0xab34cb5dafe12f5dl },
-          0 },
-        /* 35 << 64 */
-        { { 0x445b86ea02a3260al,0x8c51d6428d689babl,0x183334d65588904cl,
-            0xf8a3b84d479d6422l },
-          { 0x581acfa0f0833d00l,0xc50827bc3b567d2dl,0x2c935e6daddcf73el,
-            0x2a645f7704dd19f2l },
-          0 },
-        /* 36 << 64 */
-        { { 0x78d2e8dfcb564473l,0x4349a97357d5621al,0x9d835d89218f8b24l,
-            0x01fe7bc5079b6ee2l },
-          { 0xe57f2a2b5b3b5dcel,0x5a8637b75fe55565l,0x83ff34aea41dbae7l,
-            0xfce1199c950a7a8fl },
-          0 },
-        /* 37 << 64 */
-        { { 0x0ca5d25bf8e71ce2l,0x204edc4a062685dal,0x06fe407d87678ec2l,
-            0xd16936a07defa39al },
-          { 0x3b108d84af3d16d0l,0xf2e9616d0305cad0l,0xbc9537e6f27bed97l,
-            0x71c2d699ebc9f45cl },
-          0 },
-        /* 39 << 64 */
-        { { 0x203bdd84cdcd3a85l,0x1107b901ade3ccfal,0xa7da89e95533159dl,
-            0x8d834005860e8c64l },
-          { 0x914bc0eb2a7638f7l,0xc66ce0a6620e8606l,0x11ef98c2e6c12dc0l,
-            0x25666b1d7780fc0el },
-          0 },
-        /* 40 << 64 */
-        { { 0x374f541f3e707706l,0x9a4d3638a831d0cfl,0x4ab4f4831518ca04l,
-            0x54e3ee5dfe38c318l },
-          { 0x383ae36403c8819bl,0xa9d1daa12e17864cl,0x245a97b350eeaa5bl,
-            0x5362d00999bf4e83l },
-          0 },
-        /* 41 << 64 */
-        { { 0x6667e89f4ded8a4fl,0xa59161abc36a7795l,0x1c96f6f9331ccf94l,
-            0xf2727e879a686d49l },
-          { 0x0f94894bb841295fl,0xb0fe8f744a0503d1l,0x60c581c7ef407926l,
-            0x1980c8e13edb7e1cl },
-          0 },
-        /* 43 << 64 */
-        { { 0x47948c84c5de1a41l,0xd595d14a48959688l,0x3bfca4be86ff21c9l,
-            0xb5ff59b86a4191cal },
-          { 0xced1dd1d65094c86l,0xd57b86559dc9d001l,0xbcac6fa3486e51d7l,
-            0x8e97e2637b774c1bl },
-          0 },
-        /* 44 << 64 */
-        { { 0xfc0313c29bd43980l,0x9c954b70f172db29l,0x679bdcb7f954a21al,
-            0x6b48170954e2e4fcl },
-          { 0x318af5f530baf1d0l,0x26ea8a3ccbf92060l,0xc3c69d7ccd5ae258l,
-            0xa73ba0470ead07c9l },
-          0 },
-        /* 45 << 64 */
-        { { 0xe82eb003e35dca85l,0xfd0000fa31e39180l,0xbca90f746735f378l,
-            0xe6aa783158c943edl },
-          { 0x0e94ecd5b6a438d7l,0xc02b60faf9a5f114l,0x4063568b8b1611ebl,
-            0x1398bdc1272509ecl },
-          0 },
-        /* 46 << 64 */
-        { { 0xc2ef6a01be3e92d1l,0x1bce9c27282bd5ddl,0xf7e488f3adda0568l,
-            0xd4f15fdb1af9bb8bl },
-          { 0x8c490ade4da846efl,0x76229da17f0b825el,0xc8b812082a6711c6l,
-            0x511f5e23b4c523aal },
-          0 },
-        /* 47 << 64 */
-        { { 0xbdf4e7049970f46el,0x70e220288dadbd1al,0x2b86c97fb1223d26l,
-            0x042ad22ecf62f51al },
-          { 0x72944339ba2ed2e9l,0x0ba0d10ef94fa61dl,0x3f86164194e68f15l,
-            0x1312a74acb86c545l },
-          0 },
-        /* 48 << 64 */
-        { { 0x3a63c39731815e69l,0x6df9cbd6dcdd2802l,0x4c47ed4a15b4f6afl,
-            0x62009d826ac0f978l },
-          { 0x664d80d28b898fc7l,0x72f1eeda2c17c91fl,0x9e84d3bc7aae6609l,
-            0x58c7c19528376895l },
-          0 },
-        /* 49 << 64 */
-        { { 0x640ebf5d5b8d354al,0xa5f3a8fdb396ff64l,0xd53f041d8378ed81l,
-            0x1969d61bc1234ad2l },
-          { 0x16d7acffeb68bde2l,0x63767a68f23e9368l,0x937a533c38928d95l,
-            0xee2190bbbeb0f1f2l },
-          0 },
-        /* 51 << 64 */
-        { { 0xb6860c9a73a4aafbl,0xb2f996290488870dl,0x16ef6232572d9e25l,
-            0x5b9eb1bad1383389l },
-          { 0xabf713a7ed8d77f8l,0xd2b4a2e9e2b69e64l,0xa1a22cfd6d6f17c2l,
-            0x4bfd6f992d604511l },
-          0 },
-        /* 52 << 64 */
-        { { 0xdcff7630d9294f07l,0x89b765d68dba8fd0l,0x553e55de8dbcaccdl,
-            0x9b4a009eed702bf8l },
-          { 0xf6e534dd27b8ca0dl,0xc4496b346177fd52l,0x378ce6f6c87bb7b7l,
-            0x68633d4844cc19f0l },
-          0 },
-        /* 53 << 64 */
-        { { 0xfe550021bc84c625l,0x8d7169986d45e4a3l,0xa09c6ded4c0c66b7l,
-            0xe32313aeb9e1d547l },
-          { 0x8ce775b4d1e8e0b9l,0xa899f9102654dd15l,0x7c38aa066cc8b2a9l,
-            0xe6ebb291d6ce6cc0l },
-          0 },
-        /* 55 << 64 */
-        { { 0x5963df62a6991216l,0x4c17f72246996010l,0x131dc2b840477722l,
-            0x78bf50b0d1765a75l },
-          { 0x360afd587ceaca12l,0xebc55dbb139cd470l,0x9083e27e4c05541cl,
-            0xc10057a3b873d757l },
-          0 },
-        /* 57 << 64 */
-        { { 0x440009c3deed7769l,0xde2fa58a14fd8a44l,0x509e7df35b627596l,
-            0x3d76a87cc3bb07a7l },
-          { 0x8018fee5b8ef000al,0x71ce33e9823fd4b6l,0x3a1cac37469c0bb1l,
-            0x92fe7aeaf3eec8eel },
-          0 },
-        /* 59 << 64 */
-        { { 0x37ad0eb8de64e568l,0x4ac669bca1e3e20el,0x240d0ac22ce944edl,
-            0xd532039a3c1b28fbl },
-          { 0xa2bb899a23acba6cl,0xd472af671af937e1l,0x04478f7b8851e753l,
-            0x74030eef5ea05307l },
-          0 },
-        /* 60 << 64 */
-        { { 0x3559e7b67dc17874l,0xd0caf0ef8195cc2al,0x07c067880cd24dd9l,
-            0x01a99ea002857c41l },
-          { 0xd86579e490f82f63l,0xb1e0658ae41c9237l,0x075ffafd93fd1e79l,
-            0x6e70403547f60b8fl },
-          0 },
-        /* 61 << 64 */
-        { { 0x2246ad76c1d68c31l,0x9126202b0d5c4677l,0x5f40de81638882dcl,
-            0xb131988ca3253a7fl },
-          { 0x766f1897ba9ae0a8l,0xf0e01dd41d8b5fefl,0x03e28ce3ed7b12c8l,
-            0x44b3a2be1fd20e1el },
-          0 },
-        /* 63 << 64 */
-        { { 0xd4c8e8e5f2a5f247l,0x42ffd816c2c7c979l,0x89e1485211093d1al,
-            0x98f44a4613871ebbl },
-          { 0x374849964b032e2dl,0x28a430f445995a61l,0xf2f9acbad5be16b6l,
-            0xac98a5402d8e02aal },
-          0 },
-        /* 64 << 64 */
-        { { 0x0d53f5c7a3e6fcedl,0xe8cbbdd5f45fbdebl,0xf85c01df13339a70l,
-            0x0ff71880142ceb81l },
-          { 0x4c4e8774bd70437al,0x5fb32891ba0bda6al,0x1cdbebd2f18bd26el,
-            0x2f9526f103a9d522l },
-          0 },
-        /* 65 << 64 */
-        { { 0x48334fdcc20b8d30l,0x25f887d749414fddl,0x9ccd513311a2cf0dl,
-            0x7e7799e4d08975a4l },
-          { 0xb5993a53729b951cl,0x0cf14a5a62dbc6a8l,0xb39ed36efe4d16eel,
-            0xb75f3fb681bda63al },
-          0 },
-        /* 71 << 64 */
-        { { 0xac7db8706d4f68b5l,0x819a13c7be49b3a4l,0x646ae2b1418bf1e9l,
-            0x25b53a5f69b3a5ccl },
-          { 0xd23d94d37de26578l,0x8bb581caecdd138al,0x9e053f67f857b0dal,
-            0xe679cc7a255ff474l },
-          0 },
-        /* 77 << 64 */
-        { { 0x4a4b8d990df097f9l,0x0ae1227a0b4173cal,0x0d401778adb72178l,
-            0xd29848b43f421e0cl },
-          { 0xc5eec6096eb0722dl,0x527d72877e12c028l,0xed12a9e71b5dcc0cl,
-            0x26b27344dcf4b4dal },
-          0 },
-        /* 83 << 64 */
-        { { 0x695c502565e4408al,0x2d23768fcbce94e6l,0x1505fa1e5080b88dl,
-            0x5c8fbab6855f7cc1l },
-          { 0x70d876f275fb125dl,0x456421330a252007l,0xfe99249a8ee05be1l,
-            0x0893b620f4bf5490l },
-          0 },
-        /* 89 << 64 */
-        { { 0x2a59df1ed9fe6bdfl,0x96a9c791785e057fl,0x4b0d795f86a1d751l,
-            0x196c8e0aec642886l },
-          { 0x6df67899bc0e055cl,0x4173204a63007433l,0xb5ee4efec21c9245l,
-            0x2f7d4c75c1451bael },
-          0 },
-        /* 95 << 64 */
-        { { 0x2ad7f836b1047b7fl,0x368d431a71f6bfe1l,0xfcd933b103db4667l,
-            0xfff77ed3ecb81330l },
-          { 0x3677935b44958bd4l,0xa6cfcda8a1d5a9e7l,0xb2b73bc699ff9fael,
-            0x1c2cd628f866d3c4l },
-          0 },
-        /* 101 << 64 */
-        { { 0x2756873495031ceel,0xebed373d51091c1bl,0x398fef0819aa2f27l,
-            0x2f26174e2c0a9feal },
-          { 0xedca72b6b219be3fl,0x001a8fdc80503df8l,0x9a2fadbb6b93f643l,
-            0xd48e552cd44cebc3l },
-          0 },
-        /* 107 << 64 */
-        { { 0x6c0dbb68667a7ab6l,0x00490ce757630e91l,0x04976cd57eb2f382l,
-            0x9ee486b655dda4a3l },
-          { 0x4ea5c9c9cca0d01cl,0xa6e054b639f69c6dl,0xb3b7ac992ecab239l,
-            0x80c9f6d17597512el },
-          0 },
-        /* 113 << 64 */
-        { { 0x64dfdd68b942fad9l,0xe7d8e88da5eb3d14l,0xb7281dc2382f6301l,
-            0xcfa2ee6dbfe00a7fl },
-          { 0x6e617657dc7be39fl,0x22d58dd6591c6e3al,0xd3a4003918318c13l,
-            0xcac6c830981b6b72l },
-          0 },
-        /* 116 << 64 */
-        { { 0x009690ffb4fbfaa0l,0x8bbbdab73619c6dbl,0xc6d44273728356e8l,
-            0xfd76f0d8e453ec35l },
-          { 0x775c2554aac28a29l,0x28f7af9d5c55e4f0l,0xbacf54a688e8ad4dl,
-            0x85b018e80aa76ddfl },
-          0 },
-        /* 119 << 64 */
-        { { 0x27893f7983ce88e4l,0x9556c9977785f13dl,0x83d3c38d3a35831el,
-            0x3856c829d12f0a1dl },
-          { 0xb308d84c93259c1al,0x4ef87ab4691ffd28l,0x76a18d5321a88c58l,
-            0xf13cd5d53503cb4dl },
-          0 },
-        /* 125 << 64 */
-        { { 0x669d93dba8cc0db3l,0x403cb9200dfcfcf4l,0x5def4a03e77c3979l,
-            0x2a05c9423e2e2522l },
-          { 0xd86dca52b5f48bf0l,0x174766de5828a135l,0x116290b40d3a96d0l,
-            0xe1999457aeea1193l },
-          0 },
-    },
-    {
-        /* 0 << 72 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 72 */
-        { { 0x0db2fb5ed005832al,0x5f5efd3b91042e4fl,0x8c4ffdc6ed70f8cal,
-            0xe4645d0bb52da9ccl },
-          { 0x9596f58bc9001d1fl,0x52c8f0bc4e117205l,0xfd4aa0d2e398a084l,
-            0x815bfe3a104f49del },
-          0 },
-        /* 3 << 72 */
-        { { 0x524d226ad7ab9a2dl,0x9c00090d7dfae958l,0x0ba5f5398751d8c2l,
-            0x8afcbcdd3ab8262dl },
-          { 0x57392729e99d043bl,0xef51263baebc943al,0x9feace9320862935l,
-            0x639efc03b06c817bl },
-          0 },
-        /* 4 << 72 */
-        { { 0xe839be7d341d81dcl,0xcddb688932148379l,0xda6211a1f7026eadl,
-            0xf3b2575ff4d1cc5el },
-          { 0x40cfc8f6a7a73ae6l,0x83879a5e61d5b483l,0xc5acb1ed41a50ebcl,
-            0x59a60cc83c07d8fal },
-          0 },
-        /* 5 << 72 */
-        { { 0xdec98d4ac3b81990l,0x1cb837229e0cc8fel,0xfe0b0491d2b427b9l,
-            0x0f2386ace983a66cl },
-          { 0x930c4d1eb3291213l,0xa2f82b2e59a62ae4l,0x77233853f93e89e3l,
-            0x7f8063ac11777c7fl },
-          0 },
-        /* 7 << 72 */
-        { { 0x36e607cf02ff6072l,0xa47d2ca98ad98cdcl,0xbf471d1ef5f56609l,
-            0xbcf86623f264ada0l },
-          { 0xb70c0687aa9e5cb6l,0xc98124f217401c6cl,0x8189635fd4a61435l,
-            0xd28fb8afa9d98ea6l },
-          0 },
-        /* 9 << 72 */
-        { { 0x3d4da8c3017025f3l,0xefcf628cfb9579b4l,0x5c4d00161f3716ecl,
-            0x9c27ebc46801116el },
-          { 0x5eba0ea11da1767el,0xfe15145247004c57l,0x3ace6df68c2373b7l,
-            0x75c3dffe5dbc37acl },
-          0 },
-        /* 10 << 72 */
-        { { 0xa2a147dba28a0749l,0x246c20d6ee519165l,0x5068d1b1d3810715l,
-            0xb1e7018c748160b9l },
-          { 0x03f5b1faf380ff62l,0xef7fb1ddf3cb2c1el,0xeab539a8fc91a7dal,
-            0x83ddb707f3f9b561l },
-          0 },
-        /* 11 << 72 */
-        { { 0xb57276d980101b98l,0x760883fdb82f0f66l,0x89d7de754bc3eff3l,
-            0x03b606435dc2ab40l },
-          { 0xcd6e53dfe05beeacl,0xf2f1e862bc3325cdl,0xdd0f7921774f03c3l,
-            0x97ca72214552cc1bl },
-          0 },
-        /* 13 << 72 */
-        { { 0x760cb3b5e224c5d7l,0xfa3baf8c68616919l,0x9fbca1138d142552l,
-            0x1ab18bf17669ebf5l },
-          { 0x55e6f53e9bdf25ddl,0x04cc0bf3cb6cd154l,0x595bef4995e89080l,
-            0xfe9459a8104a9ac1l },
-          0 },
-        /* 15 << 72 */
-        { { 0x694b64c5abb020e8l,0x3d18c18419c4eec7l,0x9c4673ef1c4793e5l,
-            0xc7b8aeb5056092e6l },
-          { 0x3aa1ca43f0f8c16bl,0x224ed5ecd679b2f6l,0x0d56eeaf55a205c9l,
-            0xbfe115ba4b8e028bl },
-          0 },
-        /* 16 << 72 */
-        { { 0x3e22a7b397acf4ecl,0x0426c4005ea8b640l,0x5e3295a64e969285l,
-            0x22aabc59a6a45670l },
-          { 0xb929714c5f5942bcl,0x9a6168bdfa3182edl,0x2216a665104152bal,
-            0x46908d03b6926368l },
-          0 },
-        /* 17 << 72 */
-        { { 0x9b8be0247fcba850l,0x81eb5797820a181el,0xa0f2812230a01211l,
-            0x7e9cdc3cae7b8821l },
-          { 0x202332cc72ce15e7l,0xcd3cb2bbcb8238d7l,0xe4ab63dfc6e82c43l,
-            0x58bd00283183d717l },
-          0 },
-        /* 19 << 72 */
-        { { 0x02d57b7e717ed7b5l,0xd22e5b244dbce1a2l,0x174bd7712a4cdcf5l,
-            0xa6fdb801408205bbl },
-          { 0x67b4b0695e1387e9l,0x332b19a10591a442l,0x24edd916ccacf366l,
-            0xbe34cc4534958a50l },
-          0 },
-        /* 21 << 72 */
-        { { 0xa3f46e1e3e66d391l,0xb4a732cd7d6369b2l,0x99c3b85d402c1022l,
-            0x7dccfcbe2b54932el },
-          { 0xa6ddaa7b56b1dfe2l,0x31dc78a5e34a82c9l,0x8abeb3da704f3941l,
-            0xdf11a36cca55fa98l },
-          0 },
-        /* 23 << 72 */
-        { { 0x6c01f77a16e00c1bl,0x82515490839eaaacl,0x62f3a4ef3470d334l,
-            0x5a29a6491c1dcd6cl },
-          { 0x46b6782ece997a25l,0x9978fb35d3579953l,0x98f5a9df0960e0cel,
-            0x547dc8391f527a4cl },
-          0 },
-        /* 25 << 72 */
-        { { 0x395b15835d9dc24fl,0xa4256932c73ae680l,0x0542960efaa2c8e9l,
-            0x2bb3adee71068c6al },
-          { 0xa706099b570b4554l,0x85d12bb5f4e278d6l,0xd78af6f664296843l,
-            0xc7d3b3888428c633l },
-          0 },
-        /* 27 << 72 */
-        { { 0x34d44f9343b7e597l,0xdde440a7c2530f42l,0x7270a0817856bdb9l,
-            0x86a945eb5353032fl },
-          { 0x6c2f8e9966d39810l,0x0642a31b9b8b4b6bl,0x51679e62d1509d82l,
-            0x0120001c90f8ff16l },
-          0 },
-        /* 28 << 72 */
-        { { 0x50a1c1062e36e34al,0x74e8f58ce024ed1al,0x3f0f1dfa1300d726l,
-            0x6680df267b4a2d18l },
-          { 0x12b5979d8235b3b7l,0x1d2fafcb8a611493l,0x73ebda968848ece5l,
-            0xe996c275a413e399l },
-          0 },
-        /* 29 << 72 */
-        { { 0x46b7d7c7495ff000l,0xe60ed097baed95d1l,0xaa8804ac6e38f9c0l,
-            0x92990c0645c6f9bbl },
-          { 0xcae6a439c0919851l,0x713dff151bf5e1f2l,0x5d262c302eb38cdbl,
-            0xb73d505190df31dfl },
-          0 },
-        /* 31 << 72 */
-        { { 0x921e7b1c32d9268cl,0x34db2b964276fad4l,0x0ec56d34cc44e730l,
-            0x59be3a46096545b7l },
-          { 0xe9fdbc9766cf3a6al,0x7b2f83edd04e9b53l,0x6d99b3cc8fbae3e7l,
-            0x8eb5646c7ada3a40l },
-          0 },
-        /* 33 << 72 */
-        { { 0xa69ab906fc3302bfl,0x49ae6ba7d0872e90l,0xc9e2d6d1f3a1bfc3l,
-            0x11dfe85f1a033500l },
-          { 0x45189c2998666dbdl,0xba6aab88bbfd13cel,0xcf9c8b43dbd38cd4l,
-            0xa0cb581b68009236l },
-          0 },
-        /* 34 << 72 */
-        { { 0xff18c42a16288a7al,0x6363ace430699163l,0x8546d6332a2ce353l,
-            0x5e0379ef7b6b3418l },
-          { 0x2df2bb463e941bb2l,0xae7c091888e1aacel,0x6bc0982d83f5a37al,
-            0x8521bd02676d09e0l },
-          0 },
-        /* 35 << 72 */
-        { { 0x6531dff33d361aacl,0x59b954477c8cac2el,0xcc104df6c5cb7363l,
-            0x68b571c519364acdl },
-          { 0x7521e962979c3bc0l,0xbe0544c9c4aa1f92l,0x59127fe92a31eabbl,
-            0x760ac28593d8b55bl },
-          0 },
-        /* 36 << 72 */
-        { { 0x62ed534c6115164bl,0xaebe9e4cdce84ceal,0xd81c91a1c83f64c3l,
-            0x325a8ca8ecacd09al },
-          { 0x7ea57ad968b45df1l,0xa555636fd530c5d2l,0x23aff510591cfe32l,
-            0x46ff147637bedab9l },
-          0 },
-        /* 37 << 72 */
-        { { 0xa5a7e81ecb2edb3bl,0x9b0dc5f4f8fbe238l,0xc6f258087c66dd34l,
-            0xb4a57503a3f8f38al },
-          { 0x195b433513571b5bl,0xa32840763ccbc30bl,0x64ae1ffccf99ddd5l,
-            0x0dfc8772aa844e76l },
-          0 },
-        /* 39 << 72 */
-        { { 0x8b471afbfb22341dl,0xbf448b43397afdd2l,0x4cb08409682c37edl,
-            0xc3acfae6a948f1f6l },
-          { 0xf58462549e634707l,0x50161a78bd949f52l,0xf0529e752fe73566l,
-            0xe7e3fdef6fda53e0l },
-          0 },
-        /* 40 << 72 */
-        { { 0x56dab1c8321a518cl,0xfd4439a68bce226fl,0xe0b30d194facb9fal,
-            0xb5052f307583571bl },
-          { 0x1442641012afd476l,0xd02e417203fe624al,0xfc394f65531c92e6l,
-            0x16d4bf5ad4bc0b52l },
-          0 },
-        /* 41 << 72 */
-        { { 0xa38ac25eb4ec4f0fl,0x5399c024de72b27dl,0x08318aafd81a3d65l,
-            0x1af227a70c20e5d9l },
-          { 0x6389cc9a26c54e25l,0x438298bba47dc27fl,0x75386cca1a63fa0el,
-            0xc941e84cdf7bc1b0l },
-          0 },
-        /* 43 << 72 */
-        { { 0x81cad748fdfe3faal,0x752107b453ff1988l,0x8d8bb7001a8fd829l,
-            0x69838e15ca821d8el },
-          { 0x24371ede3b9f6b34l,0x19b4bb24d91e1495l,0x90899ca1e598ded1l,
-            0xbbb78b167c14e9e3l },
-          0 },
-        /* 44 << 72 */
-        { { 0xa577e84cbef239aal,0x656d2b6f8904b4d4l,0x2f6defe6ca4007edl,
-            0xca6e517737770796l },
-          { 0x4c62fcba298b6448l,0x046849660f62e00dl,0x806c2f0390b07d82l,
-            0x730855795e8d1e60l },
-          0 },
-        /* 45 << 72 */
-        { { 0x24488802f4703b78l,0x6c9323bee9eaa1e0l,0x242990e2aa94c170l,
-            0x3292bc42a15b5886l },
-          { 0x60ccb5bc908af203l,0x8fd63583713b09bdl,0x40791ecad693fa28l,
-            0xea80abf2941af8a1l },
-          0 },
-        /* 46 << 72 */
-        { { 0xf9c0315071145fe3l,0x80a71b55d7873a7dl,0xd134244b5e10bac7l,
-            0x303f7e12ded3a4b4l },
-          { 0x58e6f17e803b7a3bl,0xcd6f64130b1ca6b4l,0x25e744ce2ce65aa2l,
-            0xf2bbc66b952efa51l },
-          0 },
-        /* 47 << 72 */
-        { { 0xc8b212e75913e1f3l,0xf018ab208d416886l,0x28249e15b617cac4l,
-            0x837fcba1693ed09al },
-          { 0x9c457e511c15a1bcl,0x9354758756c7f3f1l,0x1afd80348be18306l,
-            0xa43d56982256ab14l },
-          0 },
-        /* 48 << 72 */
-        { { 0xce06b88210395755l,0x117ce6345ec1df80l,0xfefae513eff55e96l,
-            0xcf36cba6fd7fed1el },
-          { 0x7340eca9a40ebf88l,0xe6ec1bcfb3d37e12l,0xca51b64e86bbf9ffl,
-            0x4e0dbb588b40e05el },
-          0 },
-        /* 49 << 72 */
-        { { 0xf9c063f62f2be34bl,0x9ca32fa99c20f16bl,0xe02e350d0125a01al,
-            0x62d66c54e6516c25l },
-          { 0x21b154ad5120bedbl,0xb1077f4e8d6ff9d8l,0xd01a46c300bb4941l,
-            0x9d381847d1460588l },
-          0 },
-        /* 51 << 72 */
-        { { 0xf3a9b311581cb57bl,0x65fb3fb649727d13l,0xb8496e3d35131142l,
-            0xf7642f554d0cdab9l },
-          { 0xe2f66f0e9f6d7e45l,0xbae14cedaa22fcd4l,0x1f769f0e49b2e05al,
-            0x08c4d7784ac5191el },
-          0 },
-        /* 52 << 72 */
-        { { 0x86f9108ece4aa825l,0xbe5b2f317e5a5fbfl,0x2772c1b49254bb78l,
-            0xae6cdf5f4ff8ac5cl },
-          { 0x106cd94bf6b7a12el,0xbe0915d6d1c7a1a5l,0x8bf6bc8d3b40ac5el,
-            0xbb89180423ee3acal },
-          0 },
-        /* 53 << 72 */
-        { { 0x76f15eaa618b5ea1l,0xec1ea62e6d4ad0c8l,0x301b60c8168d57fal,
-            0x454d5f771edbfb05l },
-          { 0xea888e29a936031al,0x01303d3f0174dd17l,0x8b5e06b4244254e7l,
-            0x00ebf03509724acfl },
-          0 },
-        /* 55 << 72 */
-        { { 0x66ce3ded8e66d509l,0x368e38d05a488586l,0x7b9ae220c7eedf5el,
-            0x67e9ea52bfbf9d62l },
-          { 0xe9cbf53d99b7ecb3l,0xfde3e8c0908bf072l,0x288400ab1107e21fl,
-            0x24c8856256532667l },
-          0 },
-        /* 57 << 72 */
-        { { 0x0d5f9955ca9d3ad1l,0x545feba13a1daec0l,0xd22972016cb30f23l,
-            0x9660175ccef6cf6el },
-          { 0xbf3e341a395738dcl,0x74a5efbc80f7cca4l,0xc4f9a07bbebc6a60l,
-            0x2f1e3dad4b1f915al },
-          0 },
-        /* 59 << 72 */
-        { { 0xada4423f0d5e2e34l,0x2d31f4920b372358l,0xd7f469370e2d6a8cl,
-            0xf5e7ccfe0028e4ael },
-          { 0x20fcb1f3928854b2l,0x2a8973c507271bf6l,0xe87de33e5fa88fe1l,
-            0xe9af2dce7bd3c2a6l },
-          0 },
-        /* 60 << 72 */
-        { { 0x185a19d959d097b2l,0xb1c72a3a0dea2875l,0x3b371628f9021f08l,
-            0x45f1255bfa9d6ac1l },
-          { 0x9ff36a90cfd72c0dl,0x8c7315db24fe2376l,0x9aebcde04b34d42cl,
-            0x2129ab16923025f3l },
-          0 },
-        /* 61 << 72 */
-        { { 0x341b9dd714b4cf50l,0x7c6e4634d619d00el,0x571d6e2fdf2165ael,
-            0xdedf9cd18dbe9db5l },
-          { 0x52a152777c5f3dc3l,0x7d27c97ef2901cf7l,0x5e098b54d02a85dfl,
-            0x6fce3e13088e3640l },
-          0 },
-        /* 63 << 72 */
-        { { 0xfa95be147a939904l,0xdfcf5b9bb56365ccl,0xdbb546bdd2d66922l,
-            0xf26a8b9cda03ca7fl },
-          { 0x96a8042d16821c0cl,0xe6729970e88ede60l,0xd028130d1285e303l,
-            0x1678b01688b7de75l },
-          0 },
-        /* 64 << 72 */
-        { { 0x96649933aed1d1f7l,0x566eaff350563090l,0x345057f0ad2e39cfl,
-            0x148ff65b1f832124l },
-          { 0x042e89d4cf94cf0dl,0x319bec84520c58b3l,0x2a2676265361aa0dl,
-            0xc86fa3028fbc87adl },
-          0 },
-        /* 65 << 72 */
-        { { 0x5db4884124627d04l,0xf92740766f7e3febl,0xd09eb11773496240l,
-            0xd48e51419a6b9ec9l },
-          { 0xcbb2ac97b7336e27l,0xe794fb760640bf6cl,0xc0b7f78dc7c7fa3fl,
-            0x1355d071fd2edbb9l },
-          0 },
-        /* 71 << 72 */
-        { { 0x575d9724e84e25a3l,0x068690a13d4d8708l,0x8a7b1c6c54dd62d0l,
-            0x8c45e1b37f88e231l },
-          { 0x38c665466d85afe2l,0x65231642e1d69f1bl,0xb71c53a090687ec1l,
-            0xdf8469d777fb5981l },
-          0 },
-        /* 77 << 72 */
-        { { 0xb920b503144fe6bcl,0x54b0f0593914c130l,0x63188d5a8269b650l,
-            0x8d7780962fc7064dl },
-          { 0xbf7b0eec5e50839al,0xaf8a7ddbe242cd06l,0x93df850809cecdb9l,
-            0x4db58a72410659e9l },
-          0 },
-        /* 83 << 72 */
-        { { 0x460d9b383baba3cdl,0x52386e4d2cf860b8l,0xd224fe5da3924b9al,
-            0xe4a4be7bcf14d813l },
-          { 0xb0759e82ed3774fdl,0x57c064b38d9b6c59l,0x301ab902aee183d0l,
-            0xf1c873495ba207c3l },
-          0 },
-        /* 89 << 72 */
-        { { 0xe8245b0a6dd58696l,0x0714eedb61091043l,0x7d9874459101129bl,
-            0x4a7f1f03a0b27a21l },
-          { 0x282e5cff71ee2045l,0x25c694a3da5c6b41l,0xb3d8e21f5542ca55l,
-            0x57d64170e3601af0l },
-          0 },
-        /* 95 << 72 */
-        { { 0x9c8e86c6c6c4fee6l,0x70194db5a596119bl,0xfc6271d30e06050cl,
-            0x17d94c89b15f18d2l },
-          { 0x76c9e9bd49817224l,0x42621638b989c5bcl,0x1e9c4cbeb769d70cl,
-            0x85e227c3b87f2783l },
-          0 },
-        /* 101 << 72 */
-        { { 0x146185d2117e73c5l,0xbf6214696dc38116l,0x9af9d9b5459e72cbl,
-            0x7512882fb3930b85l },
-          { 0xfe935379d36583b8l,0xb83ad35e7c7fdcdel,0x093ca0ab2658ae4bl,
-            0xc9b16d60a756681bl },
-          0 },
-        /* 107 << 72 */
-        { { 0x12c24d9195d3519bl,0x1fc6db1bdb43fd06l,0x1ae49fed25bbde51l,
-            0x27072e0b76d2827bl },
-          { 0xdcb92e05aeb8c47fl,0x601d414056145f67l,0xcb7002652a39e8f7l,
-            0x6ce9facc35620d8cl },
-          0 },
-        /* 113 << 72 */
-        { { 0x5c428a5ebd702c22l,0xcb6863291616129dl,0xe6278994eabcb9a1l,
-            0xb409a10b9327e540l },
-          { 0x6899f7cb66cf96aal,0xa9225f051c64b545l,0x00c5522ee3feec21l,
-            0x35503728e083315cl },
-          0 },
-        /* 116 << 72 */
-        { { 0x1916d88cf1600077l,0x1ac9c238e3a58b2bl,0x3080df8535f3508dl,
-            0x86cc18712744912bl },
-          { 0x56aec9d5ccd15044l,0x8dd9061a5db0ab17l,0x84d6bc4e2c84171dl,
-            0xd569c7d70989a5bdl },
-          0 },
-        /* 119 << 72 */
-        { { 0x24446b2702af35abl,0x071710478eea4565l,0xba4989db728306e6l,
-            0x2cd692a85954a558l },
-          { 0x644e02763576b32el,0x7efdb65c1f9fe65dl,0x04b2828e8796c048l,
-            0xcfd22481187b979bl },
-          0 },
-        /* 125 << 72 */
-        { { 0xa10d104084ea9701l,0x27dd0dcb415e187dl,0xf667c5e939bfe45cl,
-            0x3995e4ae55b67506l },
-          { 0xb25117d9b5a14801l,0xeee58525fe142e92l,0x100b856a6dbae9f1l,
-            0xada7057629586658l },
-          0 },
-    },
-    {
-        /* 0 << 80 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 80 */
-        { { 0xe4050f1cf1c367cal,0x9bc85a9bc90fbc7dl,0xa373c4a2e1a11032l,
-            0xb64232b7ad0393a9l },
-          { 0xf5577eb0167dad29l,0x1604f30194b78ab2l,0x0baa94afe829348bl,
-            0x77fbd8dd41654342l },
-          0 },
-        /* 3 << 80 */
-        { { 0xa2f7932c68af43eel,0x5502468e703d00bdl,0xe5dc978f2fb061f5l,
-            0xc9a1904a28c815adl },
-          { 0xd3af538d470c56a4l,0x159abc5f193d8cedl,0x2a37245f20108ef3l,
-            0xfa17081e223f7178l },
-          0 },
-        /* 4 << 80 */
-        { { 0x1fe2a9b2b4b4b67cl,0xc1d10df0e8020604l,0x9d64abfcbc8058d8l,
-            0x8943b9b2712a0fbbl },
-          { 0x90eed9143b3def04l,0x85ab3aa24ce775ffl,0x605fd4ca7bbc9040l,
-            0x8b34a564e2c75dfbl },
-          0 },
-        /* 5 << 80 */
-        { { 0x5c18acf88e2f7d90l,0xfdbf33d777be32cdl,0x0a085cd7d2eb5ee9l,
-            0x2d702cfbb3201115l },
-          { 0xb6e0ebdb85c88ce8l,0x23a3ce3c1e01d617l,0x3041618e567333acl,
-            0x9dd0fd8f157edb6bl },
-          0 },
-        /* 7 << 80 */
-        { { 0x516ff3a36fa6110cl,0x74fb1eb1fb93561fl,0x6c0c90478457522bl,
-            0xcfd321046bb8bdc6l },
-          { 0x2d6884a2cc80ad57l,0x7c27fc3586a9b637l,0x3461baedadf4e8cdl,
-            0x1d56251a617242f0l },
-          0 },
-        /* 9 << 80 */
-        { { 0x892c81a321175ec1l,0x9159a505ee018109l,0xc70130532d8be316l,
-            0x76060c21426fa2e5l },
-          { 0x074d2dfc6b6f0f22l,0x9725fc64ca01a671l,0x3f6679b92770bd8el,
-            0x8fe6604fd7c9b3fel },
-          0 },
-        /* 10 << 80 */
-        { { 0xce711154b6e00a84l,0xd9fe7e4224890e60l,0xd10bc6c34560988fl,
-            0xbdc2ef526859b004l },
-          { 0xdcf0d868d5c890eel,0x893115e6119c47dcl,0xe97966fbee714567l,
-            0x117813355c85aa53l },
-          0 },
-        /* 11 << 80 */
-        { { 0x71d530cc73204349l,0xc9df473d94a0679cl,0xc572f0014261e031l,
-            0x9786b71f22f135fel },
-          { 0xed6505fa6b64e56fl,0xe2fb48e905219c46l,0x0dbec45bedf53d71l,
-            0xd7d782f2c589f406l },
-          0 },
-        /* 13 << 80 */
-        { { 0x06513c8a446cd7f4l,0x158c423b906d52a6l,0x71503261c423866cl,
-            0x4b96f57093c148eel },
-          { 0x5daf9cc7239a8523l,0x611b597695ac4b8bl,0xde3981db724bf7f6l,
-            0x7e7d0f7867afc443l },
-          0 },
-        /* 15 << 80 */
-        { { 0x3d1ab80c8ce59954l,0x742c5a9478222ac0l,0x3ddacbf894f878ddl,
-            0xfc085117e7d54a99l },
-          { 0xfb0f1dfa21e38ec2l,0x1c7b59cb16f4ff7fl,0x988752397ea888fel,
-            0x705d270cb10dc889l },
-          0 },
-        /* 16 << 80 */
-        { { 0xe5aa692a87dec0e1l,0x010ded8df7b39d00l,0x7b1b80c854cfa0b5l,
-            0x66beb876a0f8ea28l },
-          { 0x50d7f5313476cd0el,0xa63d0e65b08d3949l,0x1a09eea953479fc6l,
-            0x82ae9891f499e742l },
-          0 },
-        /* 17 << 80 */
-        { { 0xd7c89ba1e7d1cefdl,0xcb33553a9a91e03dl,0xa01caaff59f01e54l,
-            0x4a71c141de07def7l },
-          { 0xe1616a4034d467d1l,0x6f395ab2e8ba8817l,0xf781ea64e45869abl,
-            0x8b9513bb7134f484l },
-          0 },
-        /* 19 << 80 */
-        { { 0x0b0ec9035948c135l,0xaee219539a990127l,0x9d15ba0eb185dda1l,
-            0xd87bc2fb2c7d6802l },
-          { 0x05a480307a82d7f8l,0x7b591ce4e7e11ec3l,0x14d4cc22a0e15fdbl,
-            0xf2d4213576def955l },
-          0 },
-        /* 21 << 80 */
-        { { 0xd56d69e4117a5f59l,0xcae6008a01286e97l,0x716a0a282dab13b0l,
-            0xc821da99b3a8d2d0l },
-          { 0x6898b66239c305e6l,0xe42d3394c8b61142l,0x54c1d2b253b16712l,
-            0x3cec3953a01f4be6l },
-          0 },
-        /* 23 << 80 */
-        { { 0x5bd1e3036951b85el,0x1a73f1fb164d79a4l,0x6e77abd39fb22bc3l,
-            0x8ae4c181b3d18dfdl },
-          { 0xdd4226f5a6a14ed1l,0x620e111feb4e1d92l,0xffce6e59edca4fe8l,
-            0x39f5fc053d0a717dl },
-          0 },
-        /* 25 << 80 */
-        { { 0xef8fa78cd91aff44l,0x6f3f9749bdc03be7l,0x171545f8b8596075l,
-            0xbe31a73e2af132cel },
-          { 0x5b4e174123884e1dl,0x4373357ea9fa75f0l,0x8dba2731bc06f49el,
-            0xa09aebc877fa6de8l },
-          0 },
-        /* 27 << 80 */
-        { { 0xd4974e518293e18cl,0x1e4cfc5331ec0e8fl,0x80b4258325d40b1el,
-            0x5cfb73a2a85f7588l },
-          { 0xe553efd204c0e00bl,0xdaa6750e9a48ac39l,0xf20936b00abda06al,
-            0xbfd3c7e4bf85771cl },
-          0 },
-        /* 28 << 80 */
-        { { 0x72669c3c7292495cl,0xa627e2dd82786572l,0xbdbfce5cd39c3e3dl,
-            0xba6164927feed3d6l },
-          { 0x4eb5f513e77b7318l,0x133f2e834337c2e0l,0xdea20f07f408bec6l,
-            0x848a8396e3c87655l },
-          0 },
-        /* 29 << 80 */
-        { { 0x3086643551138f2bl,0x1176d8e6108a36bal,0xd78b3b400d4d4b66l,
-            0x99ddd9bd956dbff1l },
-          { 0x91dfe72822f08e5fl,0x7fd8cfe6a081ac4el,0x8ebb278ed75285c2l,
-            0x2335fe00ef457ac0l },
-          0 },
-        /* 31 << 80 */
-        { { 0xe9d79c50f058191al,0x6749c3b05d3183f8l,0x5edc2708dbfeb1ecl,
-            0x2c18f93621275986l },
-          { 0x3a093e1f0703389fl,0xdf065e4a3ef60f44l,0x6860e4df87e7c458l,
-            0xdb22d96e8bfe4c7dl },
-          0 },
-        /* 33 << 80 */
-        { { 0xb7193811b48dad42l,0x23b9dca320ad0f0cl,0x55511ffb54efb61bl,
-            0xac8ed94626f9ce42l },
-          { 0xa42b4bc73fc4cbd9l,0x2a4670905c6f8e39l,0xb50040f87eb592del,
-            0x6633f81bdc2541f3l },
-          0 },
-        /* 34 << 80 */
-        { { 0xc104e02ed2d6d9c2l,0xa4876e870302517al,0x0263c9b2912f5005l,
-            0x902f364a3d89d268l },
-          { 0x76070565bb20a5a8l,0xa3a8977452109e98l,0x51fbffec463aa476l,
-            0xfa8519625daa1503l },
-          0 },
-        /* 35 << 80 */
-        { { 0xe449dd8f82a9a4f3l,0xa1a2f405797e6b36l,0x76913537787785e8l,
-            0x0315a3cfe064481el },
-          { 0xc02291ee83df11e2l,0x5b59a0e9bcd178f0l,0xd5e8d10ce6b4c63al,
-            0x9eee599f3fc60a82l },
-          0 },
-        /* 36 << 80 */
-        { { 0x051e589759621468l,0xb92c06327293621el,0xee17ea647762e4f2l,
-            0x412107a771abd28cl },
-          { 0xa083d87bf02d65ebl,0xbd4a3f165594395el,0x1d5694337c8882f3l,
-            0xc5eb10c55f9c63cfl },
-          0 },
-        /* 37 << 80 */
-        { { 0x4b196728c8e62c4el,0x03dbd04cb74a757cl,0xe960a65b8520f044l,
-            0x9eda0f33f7937337l },
-          { 0x06ff0b86b6dc7dfbl,0x3bd276c11fc1ac35l,0x0e67055b1b255c27l,
-            0xe43ae552eff899f8l },
-          0 },
-        /* 39 << 80 */
-        { { 0xc64c914d3b156d76l,0x784c1f61d794345dl,0xcda0c77c365d7a50l,
-            0xcc5a1e205b32dbd0l },
-          { 0x2f4e78bff90b6ac0l,0xbead62f9a2d4862dl,0xa8f67e7dcc346b53l,
-            0xa38d7ae947e59dbdl },
-          0 },
-        /* 40 << 80 */
-        { { 0x7dc1605d480aca4dl,0x08c37750ef263aabl,0xd5c6b7c93f166725l,
-            0xf99982f30ff2853bl },
-          { 0xc61b9583a8ecb64al,0x041211a91b771741l,0x50ba64154e156f97l,
-            0xb6595ea871b8954el },
-          0 },
-        /* 41 << 80 */
-        { { 0x4ae760845eb3b4eel,0xcafefdc6c62ed274l,0x4eabeacf113f790bl,
-            0x10c2cc88a5ff64c9l },
-          { 0xe7b59f8a49965d80l,0xd04884b50df07712l,0x6316ac5ba5f7bab1l,
-            0x388111d99e78a075l },
-          0 },
-        /* 43 << 80 */
-        { { 0x8d437128f24804efl,0x12a687dd7b71dd53l,0x8b8f71d96139a60el,
-            0xb047fed42a095ec7l },
-          { 0xef238041fba59ee8l,0x61b17fac64045514l,0x45b1cf4857afa184l,
-            0x8592c50a4bff5fc5l },
-          0 },
-        /* 44 << 80 */
-        { { 0x2830592394b745dcl,0x53e9ec16b09cb993l,0x59d0b57f9a134ed1l,
-            0x89d7b439c56ee0ebl },
-          { 0xc3656539991e22a2l,0xd27a89372a345043l,0x55dd5341064038eel,
-            0xc9ee3f0348cb42efl },
-          0 },
-        /* 45 << 80 */
-        { { 0x08518c631d56c1cbl,0x5650f79f31235521l,0x33fc08d648911017l,
-            0xbb8b58538a0a33c8l },
-          { 0xb54554f2f869a62al,0x67f8cf48222457e5l,0x46e13911f276cc0dl,
-            0x4b3a2ad6943b389el },
-          0 },
-        /* 46 << 80 */
-        { { 0x0e72b816b11a4c9dl,0x919b2738e9028fa4l,0xab80e1117698a5d6l,
-            0xcd7950f56cd49adal },
-          { 0x0db75c908dfb13a5l,0x2178578770f12cebl,0xfab72d5243486ff6l,
-            0x66d55d726a0673ebl },
-          0 },
-        /* 47 << 80 */
-        { { 0xe98014b922667519l,0x7fcab2b3a95da9c0l,0x9bdbccd8438d5060l,
-            0xa72fff5455a726b6l },
-          { 0x7ae032943a5e769bl,0xf7291e9b559a0734l,0x18ae4f182ce18eeel,
-            0x88e49f7328b7b4f0l },
-          0 },
-        /* 48 << 80 */
-        { { 0x90fe7a1d214aeb18l,0x1506af3c741432f7l,0xbb5565f9e591a0c4l,
-            0x10d41a77b44f1bc3l },
-          { 0xa09d65e4a84bde96l,0x42f060d8f20a6a1cl,0x652a3bfdf27f9ce7l,
-            0xb6bdb65c3b3d739fl },
-          0 },
-        /* 49 << 80 */
-        { { 0xc6a2923e60ef9d87l,0xac66cdd8c3a64f1cl,0x069292d26e0bb0ccl,
-            0x9e491414451e52a0l },
-          { 0x2e76cedf0e0d35b3l,0x311b7ae9af682b84l,0xaa1017a02f90b176l,
-            0xac0b43a794feb6e8l },
-          0 },
-        /* 51 << 80 */
-        { { 0x7ddb42f9214e82f5l,0x91c88566f67269d7l,0x1763ed8cdd0ff422l,
-            0x045dd690ad284ddfl },
-          { 0x5713bbb141e48fe7l,0xdc5bef28f8eb580fl,0x4bd0b288ed2992c2l,
-            0x436587faaf5ef2b3l },
-          0 },
-        /* 52 << 80 */
-        { { 0xbbc1a48d6e5822c4l,0x16c3135daacebd02l,0xd0c6c543b56157dfl,
-            0xae249a0ef49f44a1l },
-          { 0x1f2c23ce72c47341l,0x8f52dc2a25974313l,0x2c99bc0a958e0e6bl,
-            0xe57eab6b950cd492l },
-          0 },
-        /* 53 << 80 */
-        { { 0xea66db638934efc0l,0x7bfe479193c6f7c7l,0x78438d535ef90d99l,
-            0xe63b87c9c665736dl },
-          { 0x6de32d82db49e1bbl,0xbfa877dcd0ad1648l,0xdb2e85de1197806dl,
-            0x74e9dbd3cfee7854l },
-          0 },
-        /* 55 << 80 */
-        { { 0xd2c26e2edb6d7e0al,0x9103119a531009cdl,0xb5dc49869a8b9d54l,
-            0x4781b83bb408b427l },
-          { 0x70d98b2ccb4ba2f7l,0x112ed5d7fa8a36b8l,0x97257bc6fdde1675l,
-            0xd2a9c711db211cb7l },
-          0 },
-        /* 57 << 80 */
-        { { 0xe4aa6a06ee79fe8cl,0x06e210233dff8a54l,0x63e11ac5bf50731al,
-            0xb8b9944f544125b8l },
-          { 0xcba92c41d359aeb0l,0xd201c893249bca36l,0xfe79bd77cb501216l,
-            0x694b21488d525ba4l },
-          0 },
-        /* 59 << 80 */
-        { { 0x60c90e11ee3dde2al,0x7df08e17bb36c4a2l,0xb6c3210dcc5b3c17l,
-            0xa814180955cec91cl },
-          { 0xf4ecbc05a8193dffl,0xf43cdef8da5744fal,0x4895a6c6f12f8a2el,
-            0x44282692eb7b910al },
-          0 },
-        /* 60 << 80 */
-        { { 0x1a405e1886d6e13al,0x6a18c91827a7c67cl,0xc34877ebe127bfd7l,
-            0x3c9fab08c098e692l },
-          { 0xfe2dc65bc2066586l,0xb107603a8f68a0a9l,0x74ef0ef8127cd340l,
-            0xfe577b5b86788d87l },
-          0 },
-        /* 61 << 80 */
-        { { 0xdc7ff83c71234c81l,0xee48d9c6d868c82fl,0xb80bac5e37e4f365l,
-            0x2bfbe94efcb951c2l },
-          { 0x55829049a374d0b0l,0x2a502cada87a5fb4l,0x0742ac9d9ee840bal,
-            0x7689bf53eecd05b1l },
-          0 },
-        /* 63 << 80 */
-        { { 0x0e7f459320059c22l,0x47c273e0e49368a2l,0x5ccb960ac6946ee2l,
-            0xd8209ec48b3271b6l },
-          { 0x7fd5142cdfb9e947l,0x46a89c83ff737ab1l,0xa45f6b0282d875ecl,
-            0x19a16e0e34c296d6l },
-          0 },
-        /* 64 << 80 */
-        { { 0xeb5ddcb6ec7fae9fl,0x995f2714efb66e5al,0xdee95d8e69445d52l,
-            0x1b6c2d4609e27620l },
-          { 0x32621c318129d716l,0xb03909f10958c1aal,0x8c468ef91af4af63l,
-            0x162c429ffba5cdf6l },
-          0 },
-        /* 65 << 80 */
-        { { 0x65c93be33607927bl,0x86feaaecdae5411dl,0x4a1686c6dd2e2c3dl,
-            0xf78200068acdf51dl },
-          { 0xf82c4d0239ed3e50l,0x5ac04047b4c3a4a4l,0xbdd14d7ec34b07a7l,
-            0x9911d7027cc12db5l },
-          0 },
-        /* 71 << 80 */
-        { { 0x4ed5dbbd1751abc9l,0xaf374229a23cc54al,0x9b5fa66ea4ed3f9al,
-            0xc56dd9613d380643l },
-          { 0x7d77897144b38021l,0xdf4712d0d3584508l,0x0018e2eecd7ab168l,
-            0xc8a3a166293d29a7l },
-          0 },
-        /* 77 << 80 */
-        { { 0x34681bdb3a5a0214l,0xe188d6f1f718797el,0xaa751de7db761c5fl,
-            0x347c50324959a5cel },
-          { 0x108705fc338be49cl,0x1dc5eada95abf7a8l,0xb863808f0fc3f0b7l,
-            0x529c27c1a05c4d43l },
-          0 },
-        /* 83 << 80 */
-        { { 0xa75f90677f699f79l,0xd01cf9c866356f99l,0xf90f9b73fdfbaae7l,
-            0xe0b5f4412c304d2fl },
-          { 0x17cbfb11807f3f57l,0xe902d542af8a9eb4l,0x3335285461f89b4al,
-            0x3a51c54d3628c0ael },
-          0 },
-        /* 89 << 80 */
-        { { 0xae5fd487c704212dl,0x82dd07a565e2e32cl,0x46d4c9646c19c199l,
-            0xe7f428593778eedcl },
-          { 0x084a4e9b6dcc5ec9l,0x757e04ba2d0538b7l,0x4ec0a573a3fba4cdl,
-            0x2432a4e5c627c2fcl },
-          0 },
-        /* 95 << 80 */
-        { { 0xfde00b3094c8a424l,0x20a57d8cd224c232l,0xd6ace1a170019992l,
-            0x1a648d40697e67a3l },
-          { 0xed1fb10691338d84l,0x828004a08372bfc8l,0xb93030fefad3bfedl,
-            0x883dea23f27369ecl },
-          0 },
-        /* 101 << 80 */
-        { { 0xfbbf36a62a710d73l,0x8db834024b3cc6bbl,0xa60c47cf16d7b1fcl,
-            0xf9778fa6cd16ce8fl },
-          { 0xd77023086d14a1a6l,0x01f139cb06e8247cl,0xd89af2979770b9c1l,
-            0x94bf1ca97d9fb550l },
-          0 },
-        /* 107 << 80 */
-        { { 0xe17e2e6dc2d45f34l,0x5969d8ee26efc6cbl,0x6f175231b9219cfbl,
-            0x027f333c189f1175l },
-          { 0x5bc60fad54f6da49l,0xc52e09af8ae5c3f3l,0x6c0e3927ed07f46dl,
-            0xbfd9e598f39cf16bl },
-          0 },
-        /* 113 << 80 */
-        { { 0x9dffd95b090aefb9l,0x26db7b73637224fel,0xb78a679e92e2aa0cl,
-            0xfc7c824ffc8f895dl },
-          { 0xdc8287e8e636b3a8l,0x6b3ccc0f28b7a639l,0x38e6e2cc653de56al,
-            0x998cf6985392c3cal },
-          0 },
-        /* 116 << 80 */
-        { { 0xe68de79e57f0d6fal,0xe707b252ff9c06f7l,0x5613698a4a061697l,
-            0xd83d6453b5390352l },
-          { 0x59b007599867c708l,0xcfe24fd7b41ea7adl,0x4692abf3da5b7de6l,
-            0xd99a6f3bf0c54e8fl },
-          0 },
-        /* 119 << 80 */
-        { { 0xe8ee870dea4addc3l,0x0d1fb29559841f3el,0xdc05b5581dba2f14l,
-            0xb8bf38324e3f4600l },
-          { 0x1a909e66fd57c48al,0xb65ca4c24e2d76dfl,0x0b27755ae7c60d89l,
-            0x9fcfa75acb9003f6l },
-          0 },
-        /* 125 << 80 */
-        { { 0xbbbdf4c49e5325aal,0x6879fe11d0d1f281l,0x7a400f890633002el,
-            0xc3633c779bb79ac9l },
-          { 0x15a4cfae93ab9bc3l,0x379bbdea42594603l,0x7c61dfa257d2af3fl,
-            0x20190537b51bfb62l },
-          0 },
-    },
-    {
-        /* 0 << 88 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 88 */
-        { { 0xa80d1db6f79588c0l,0xfa52fc69b55768ccl,0x0b4df1ae7f54438al,
-            0x0cadd1a7f9b46a4fl },
-          { 0xb40ea6b31803dd6fl,0x488e4fa555eaae35l,0x9f047d55382e4e16l,
-            0xc9b5b7e02f6e0c98l },
-          0 },
-        /* 3 << 88 */
-        { { 0x4b7d0e0683a7337bl,0x1e3416d4ffecf249l,0x24840eff66a2b71fl,
-            0xd0d9a50ab37cc26dl },
-          { 0xe21981506fe28ef7l,0x3cc5ef1623324c7fl,0x220f3455769b5263l,
-            0xe2ade2f1a10bf475l },
-          0 },
-        /* 4 << 88 */
-        { { 0x9894344f3a29467al,0xde81e949c51eba6dl,0xdaea066ba5e5c2f2l,
-            0x3fc8a61408c8c7b3l },
-          { 0x7adff88f06d0de9fl,0xbbc11cf53b75ce0al,0x9fbb7accfbbc87d5l,
-            0xa1458e267badfde2l },
-          0 },
-        /* 5 << 88 */
-        { { 0x03b6c8c7dacddb7dl,0x92ed50047e1edcadl,0xa0e46c2f54080633l,
-            0xcd37663d46dec1cel },
-          { 0x396984c5f365b7ccl,0x294e3a2ae79bb95dl,0x9aa17d7727b1d3c1l,
-            0x3ffd3cfae49440f5l },
-          0 },
-        /* 7 << 88 */
-        { { 0x26679d11399f9cf3l,0x78e7a48e1e3c4394l,0x08722dea0d98daf1l,
-            0x37e7ed5880030ea3l },
-          { 0xf3731ad43c8aae72l,0x7878be95ac729695l,0x6a643affbbc28352l,
-            0xef8b801b78759b61l },
-          0 },
-        /* 9 << 88 */
-        { { 0xdcdd3709b63afe75l,0xad9d7f0b3f1af8ffl,0xdd6a8045194f4beel,
-            0x867724cc2f7d998cl },
-          { 0xd51d0aa5837751bel,0x21d6754a959a0658l,0xd2212611695f7e58l,
-            0xec4b93c2297363efl },
-          0 },
-        /* 10 << 88 */
-        { { 0x0ac1c5fab6ef26cfl,0xcd8ba0c5a39de8eel,0x11ba7537dd7796e0l,
-            0x1215933476d58d6dl },
-          { 0xf51eb76f529fda4cl,0x2fd9209ddedaa8a3l,0x555a675615efac65l,
-            0xb784c9ca7fd42fe9l },
-          0 },
-        /* 11 << 88 */
-        { { 0x8165ec11b9d1a70fl,0x01347efc384f6cael,0xe95c01a0ab7aeca9l,
-            0x459ba1c5c6c99530l },
-          { 0x38967a635cf3416bl,0x5c3761fd1e5457e2l,0x43e6077af03e9df6l,
-            0xb15d34628bd1c7f6l },
-          0 },
-        /* 13 << 88 */
-        { { 0xad87d3db35a75c49l,0xc69d800961af03c5l,0x31aef61a3a6a6c4cl,
-            0xb3292640aa10a993l },
-          { 0x959aae80aaee340fl,0xf900528e7f381a3bl,0x44ecf76e853691a3l,
-            0xa081663ce749e68el },
-          0 },
-        /* 15 << 88 */
-        { { 0x4f2782136283e34al,0x6f9fcf60fbfa315fl,0x224a2ab99b701364l,
-            0xb4b1b418f9fecadcl },
-          { 0xbf7280fe50ba1b9al,0x7e68259c33f36db9l,0x8ccb754e154c9fb0l,
-            0xf281adb1db2328f1l },
-          0 },
-        /* 16 << 88 */
-        { { 0xf92dda31be24319al,0x03f7d28be095a8e7l,0xa52fe84098782185l,
-            0x276ddafe29c24dbcl },
-          { 0x80cd54961d7a64ebl,0xe43608897f1dbe42l,0x2f81a8778438d2d5l,
-            0x7e4d52a885169036l },
-          0 },
-        /* 17 << 88 */
-        { { 0xc2a950ad2d6608bel,0xab415e2a51c3c2b6l,0xffbd2a65f5c803e7l,
-            0x3f81dc3eca908532l },
-          { 0x0ec47397c28c04f4l,0xf6c632e8153f58e8l,0xccac35f8efb4a6d8l,
-            0x22a1b677ee6d7407l },
-          0 },
-        /* 19 << 88 */
-        { { 0x276662435243c119l,0x79cb8580e707363el,0x5bf5ebf4d01682d6l,
-            0x8a980173762811e0l },
-          { 0xe2f2be1fc7547d77l,0x21a50fffb925fec6l,0x5e6cf2ef40115509l,
-            0xb69beae18faa0fc0l },
-          0 },
-        /* 21 << 88 */
-        { { 0xfa147da8cec36e75l,0xba184e5a42860484l,0xe8ec25df222fb1e6l,
-            0xce91dcb18ff8403cl },
-          { 0xf1b0e27ead7faa32l,0x097d881d42a3a205l,0xa8865dd43f8f56d4l,
-            0x624d7a451aef929dl },
-          0 },
-        /* 23 << 88 */
-        { { 0x3db0238ad01698e8l,0xbb7186dc00306082l,0x542f4377250f830el,
-            0x34b8a67dae438c50l },
-          { 0xada528a0858d8048l,0x561aa3336b57afc1l,0x8d9188e0fda35f7al,
-            0x5838d1211dcad0c5l },
-          0 },
-        /* 25 << 88 */
-        { { 0x4f97d1529f17511dl,0x8b9f012776fdb9ebl,0x53a0a72d4056e6a7l,
-            0x5ff937d64e262eeel },
-          { 0xaa64a8dc489fbe6dl,0xc19947dfea02bc69l,0x76f0bbb91492c9bel,
-            0xe53881098d89cd01l },
-          0 },
-        /* 27 << 88 */
-        { { 0x16083309456057b7l,0x2810c08040a331f6l,0x0561656c3c166929l,
-            0x16f0d8d6ed1c3999l },
-          { 0x37b6da7294697927l,0xd821c2cc23ca6c9cl,0x42ef1bdb8ca4351cl,
-            0x7ca32bad5edfa682l },
-          0 },
-        /* 28 << 88 */
-        { { 0xdc1de17d98119f10l,0x74353c5d488c36a6l,0x14aaf33a3d8e23dfl,
-            0x31e075c078baf593l },
-          { 0x0f7ca03a46d1ca3cl,0x99c5e3ac47b660c7l,0x70d0241388fe2e59l,
-            0x2e9a6be12a7ec005l },
-          0 },
-        /* 29 << 88 */
-        { { 0x4d1f087f184252b1l,0xfd3ace273f5b49c6l,0x6e874447bbb04da2l,
-            0x2347e3a1b3767ff0l },
-          { 0x990d4010f868966al,0x35320090dd658b5el,0x1105bfb974fe972al,
-            0x3961f7dc8e7ad2c6l },
-          0 },
-        /* 31 << 88 */
-        { { 0x100d8b54741e3286l,0x65d9108ef3abc7afl,0x172b450620ef8fbcl,
-            0x11bd7db2d81b8a2el },
-          { 0xf89210e1e8e41de5l,0x910613f3d98a868bl,0xbfc85241849aa909l,
-            0x68a43e21c7d3a7cal },
-          0 },
-        /* 33 << 88 */
-        { { 0x68f891479a4f8293l,0x48262328a5eb9101l,0x7eca2a178fe218b5l,
-            0xde6c22dbc733f768l },
-          { 0xde7171d108d6084dl,0xd153827a0f0f8092l,0xc7b52d8f85a9252fl,
-            0xfa29ca3a5708b31fl },
-          0 },
-        /* 34 << 88 */
-        { { 0x20518ddf9e0ad7e7l,0x33d5d079e8d28b9bl,0x1149b393d13058b0l,
-            0x708cc65586d4651dl },
-          { 0xd7fefaa694207435l,0xce882c0d96312f8fl,0x2fd5cb2059d091a7l,
-            0x4533a88a0e1ece94l },
-          0 },
-        /* 35 << 88 */
-        { { 0xceddd9b5a59c28bcl,0xaa4808f9572e2a5dl,0x38bc191999014a1el,
-            0x1aacefdaa6d85686l },
-          { 0xa59283d42a573fddl,0x84359db29c387594l,0x79994773dca3acc8l,
-            0xe4323e7654cf7653l },
-          0 },
-        /* 36 << 88 */
-        { { 0xac449695241fbd6fl,0x67c9b170081c1223l,0x16868f21b56aac6fl,
-            0x34bd8fa3f8bcb721l },
-          { 0x06b6bd33b6691c76l,0x6c924766381a7973l,0x6a12444ca54078dbl,
-            0xd02e91a96d1051ccl },
-          0 },
-        /* 37 << 88 */
-        { { 0x512f5fb35f30b344l,0xb13ade169d516885l,0x18812e9b2b468802l,
-            0xf15d730e6b28979al },
-          { 0x5015616f6889348bl,0xe0b02a0a96af0401l,0x3b02007b61204c89l,
-            0x9ece2aa7432742a4l },
-          0 },
-        /* 39 << 88 */
-        { { 0xd5f7e09c7c1cc4a1l,0x313ac04218b2d854l,0xbc4fe2a04c253b10l,
-            0x25a696a3c7080b5cl },
-          { 0x6de3cb6aef811877l,0x4d242fecd15f9644l,0xb9bfa2480ee6a136l,
-            0x8122679e9c8d181el },
-          0 },
-        /* 40 << 88 */
-        { { 0x37e5684744ddfa35l,0x9ccfc5c5dab3f747l,0x9ac1df3f1ee96cf4l,
-            0x0c0571a13b480b8fl },
-          { 0x2fbeb3d54b3a7b3cl,0x35c036695dcdbb99l,0x52a0f5dcb2415b3al,
-            0xd57759b44413ed9al },
-          0 },
-        /* 41 << 88 */
-        { { 0xc2c7daec96a8d727l,0x8a11631a17f3abf9l,0x06aba65c0ae8940al,
-            0xfca280c7873d3635l },
-          { 0x57496889ddb72b87l,0xaa9a3359320793d4l,0x11b6864d43120741l,
-            0x1877cd4e51527639l },
-          0 },
-        /* 43 << 88 */
-        { { 0x8b35ce4e6f43dfc6l,0x4114b2fe9a19f3bfl,0x8c4af8024ffa45cal,
-            0xa3ab5f869328b847l },
-          { 0x0986de3e555f30f0l,0xaae6e3eac8cb84c4l,0x2a7dcdbaa4ba01f7l,
-            0xfa32efa729f5dc6cl },
-          0 },
-        /* 44 << 88 */
-        { { 0x077379c00b33d3f8l,0x421883c67064e409l,0x2d0873d76c29c8f6l,
-            0xbfa433a3d274c0c8l },
-          { 0x56dc778f23a5891el,0xd663bf6535e2de04l,0x488fdb485db517cel,
-            0x00bba55e19b226c2l },
-          0 },
-        /* 45 << 88 */
-        { { 0x879b30ead7260d78l,0x04954ba2eac5201fl,0x3210c0e3ff2529d1l,
-            0x0743823488b470b3l },
-          { 0x8b618de48854cc0dl,0x98270d5e35b795eel,0x0e47d651aa33ca37l,
-            0x77d75fda1e87d0cfl },
-          0 },
-        /* 46 << 88 */
-        { { 0x789dbe987803fbf9l,0x940589aa17ede316l,0x032902bd85a1988cl,
-            0x43cbc0031c47f7f0l },
-          { 0xc6ff73714709148fl,0x769957122d9b8a5el,0xb4520e462597b70el,
-            0x00d19f39f67ff3b8l },
-          0 },
-        /* 47 << 88 */
-        { { 0xe2dfcef9b159f403l,0xe8e9e8d8855644afl,0x2796247163fa1068l,
-            0x400e992a968a5400l },
-          { 0xe2b9d29f56e563c1l,0xed66759c2885fabfl,0x788b6263750abdffl,
-            0x30adb00d6cbbdcacl },
-          0 },
-        /* 48 << 88 */
-        { { 0x1fe647d83d30a2c5l,0x0857f77ef78a81dcl,0x11d5a334131a4a9bl,
-            0xc0a94af929d393f5l },
-          { 0xbc3a5c0bdaa6ec1al,0xba9fe49388d2d7edl,0xbb4335b4bb614797l,
-            0x991c4d6872f83533l },
-          0 },
-        /* 49 << 88 */
-        { { 0x5548d3423fa17b28l,0x38587952823ee731l,0x8ee9b90a0a28bcd1l,
-            0xcfc029bf6676917el },
-          { 0x7e08306d2a212358l,0x66a9488dc88a66bcl,0x7a09db327d7c9e65l,
-            0x20eaf4e72cbc1790l },
-          0 },
-        /* 51 << 88 */
-        { { 0xb3095b491f2a9605l,0x7cfc4205f72691c7l,0x1544bf964d889b90l,
-            0xdc44d20ba0bbae7al },
-          { 0xee369b670b1f0b23l,0xf3ec25e818a7bdcbl,0xf614ab5df47ecf65l,
-            0x4869762f80a4a09dl },
-          0 },
-        /* 52 << 88 */
-        { { 0xedbbeee78a058fb6l,0xb9d19ddcfb09121al,0xa41bb45bd34dddcel,
-            0x2dbc80b900964bc4l },
-          { 0x4ed9137d1d6cb654l,0x1b9016db483d01c5l,0x5fc501bc6528e22el,
-            0xb2d2f8816cad646bl },
-          0 },
-        /* 53 << 88 */
-        { { 0xb57aa72a89043e56l,0x8fbca2435c5319fdl,0xe66aef43b13ce900l,
-            0x2c7c3927c3382934l },
-          { 0x434d9104a835fdf5l,0x419470b81b3b85bel,0xeaec374abeb4d448l,
-            0x26a53b51f33cda51l },
-          0 },
-        /* 55 << 88 */
-        { { 0x421f1725bb1db793l,0x20214d4f558c94a9l,0x3371233b7696092cl,
-            0x774d3fcb1902ab0el },
-          { 0x4ce223ded149aecel,0x174b260e33057bc7l,0xdf70cfa3f6effee4l,
-            0x3d8cd01f80880678l },
-          0 },
-        /* 57 << 88 */
-        { { 0x32db21862e59985cl,0x448865abaa1b39e1l,0x250ce79cd89fe98dl,
-            0x962710e763e3fb10l },
-          { 0xa8fc70561ac10e3el,0x9eed208fa3b132fbl,0xf499d638937051f5l,
-            0x27acf7ec21a9f78fl },
-          0 },
-        /* 59 << 88 */
-        { { 0x148e572a4c7b445el,0xdc10a0214dc95a4fl,0xe60e9c2e02237869l,
-            0xbfdfcb3aa393c3a4l },
-          { 0x8b799db211a64cf0l,0x1ca865ea2e16f59fl,0x865441fbd3a17e46l,
-            0x23315b9753409692l },
-          0 },
-        /* 60 << 88 */
-        { { 0x5e76fb2f286bad39l,0xbad9efe39dcad1e2l,0x60e75190edc7e904l,
-            0x6a6f063e0fecb5a5l },
-          { 0x5150ed85aed8acc3l,0xb56ccfbc6d20af6cl,0x7e0d1e982c69dbfal,
-            0xabf5628a7c7e10a9l },
-          0 },
-        /* 61 << 88 */
-        { { 0xb84af2c00df6d61fl,0x02c651c52acbaf4bl,0xfb605754afaaa0bfl,
-            0xa03f5257dff61017l },
-          { 0x9e3ffb1672762093l,0x4f9a5da0c4f40bd3l,0x37dce5220d26f8e1l,
-            0x260f736fc06a1a07l },
-          0 },
-        /* 63 << 88 */
-        { { 0xb92aba79b1077d55l,0xc52f81081a42f5f5l,0x9913f04f86e5aa99l,
-            0x6814b0b1f3c7f504l },
-          { 0xb7d61fd34d354bdal,0xf27926e39581d25el,0x97724001c2dc21adl,
-            0x835778231d5c4788l },
-          0 },
-        /* 64 << 88 */
-        { { 0x77b868cee978a1d3l,0xe3a68b337ab92d04l,0x5102979487a5b862l,
-            0x5f0606c33a61d41dl },
-          { 0x2814be276f9326f1l,0x2f521c14c6fe3c2el,0x17464d7dacdf7351l,
-            0x10f5f9d3777f7e44l },
-          0 },
-        /* 65 << 88 */
-        { { 0x53857462ff9727a2l,0xe6870e7dc68488e7l,0x276da72808c79656l,
-            0x1308eb61d86c24ebl },
-          { 0x34c43a84db0a3e56l,0x03961b5525335a59l,0xf9bc2d5805689d86l,
-            0xfa4d3c01eb29d6d6l },
-          0 },
-        /* 71 << 88 */
-        { { 0xd07dac3037d10ffal,0xb2b0a0fd8bef0a79l,0xa2e804510ec02505l,
-            0xf256c18962f55f5fl },
-          { 0x0ca3f9b10b39f4f0l,0x7bf4e1cf3bb7c8e9l,0x7a8a43f8ee11f227l,
-            0x2ad8431a3e4056ebl },
-          0 },
-        /* 77 << 88 */
-        { { 0xb8cf71ed031c1871l,0x702431806f703102l,0x9a87e1c24ec6f1b0l,
-            0xf7e6e5b4664f275dl },
-          { 0xc70a8b4e8c76b505l,0x6ba69bf2a002e9cfl,0x33ed74f7a0d8c9bfl,
-            0x17f5f4b18d9989del },
-          0 },
-        /* 83 << 88 */
-        { { 0xcd116dcb1b13a4a1l,0x591adb831c369877l,0x697be1aca6b8e80bl,
-            0xb2d4baa1b975d781l },
-          { 0xd4a9a496b16b48e7l,0x64de2d7af293997dl,0x039ae039af09a492l,
-            0x66e31a2665f3a485l },
-          0 },
-        /* 89 << 88 */
-        { { 0x110a8a42fec01a53l,0x1f5fcc1b38affab8l,0x757310ca9941a19el,
-            0x11ef95f76c29d6cbl },
-          { 0x0756bdb22dd427bal,0x8de8d44af3e16c33l,0xf9d28355e25aec52l,
-            0xeb761efc02f36465l },
-          0 },
-        /* 95 << 88 */
-        { { 0xfc83bf7454bfcd7al,0x51d861794837b6bel,0x8165b3f9801a324dl,
-            0x3a5972bc634cfd61l },
-          { 0xeecfe6d825258ed6l,0x51d968df1451ced0l,0x3010cdb8316aa0ael,
-            0xc295b8522900eaf2l },
-          0 },
-        /* 101 << 88 */
-        { { 0x5ad434a3890cc798l,0x4c17ff5e1531bce4l,0x825b5b5a5ea8e26fl,
-            0xacca9d5dd66fd7b3l },
-          { 0xb647dbde37ae6f92l,0xa5594868f3600416l,0x7b90ac53ab0c5d63l,
-            0x4b66ad7ceb43e1d0l },
-          0 },
-        /* 107 << 88 */
-        { { 0x04a211fac09ccbffl,0x9c96ad9ee873d898l,0x9eb1deb69c481f86l,
-            0xb3616ce8b2d70298l },
-          { 0x67a6fe9b9073726dl,0x5b8aa37d4c9bf744l,0xf558603ebb6aa0efl,
-            0x72767f5103d304fbl },
-          0 },
-        /* 113 << 88 */
-        { { 0x787cb8b8d6e9b7e3l,0x8bb30222e079fc68l,0x651a2ea6e3145a0bl,
-            0x0254c5da9ab18fa8l },
-          { 0x83722ffc12e1611fl,0xb0ddf1ffa7cc61bel,0x7c9c7e10ac0ac8d7l,
-            0x8241a8191da12218l },
-          0 },
-        /* 116 << 88 */
-        { { 0x70bb7719bc407e6el,0x231328efd84ceb41l,0x8bca6a1fc104bb20l,
-            0xd6f4e425280b9071l },
-          { 0xb41b95a292896a82l,0x735cf435fa34df67l,0xbc331a08d9d6d769l,
-            0x579786052682747el },
-          0 },
-        /* 119 << 88 */
-        { { 0x048ba499eb3af9a9l,0x43a8c367d50b82cel,0xedf9e2b21e0724d9l,
-            0x3098aab3d607140bl },
-          { 0xd1f18f1e5ed49eb9l,0xf9c6bb6ae0bb02a2l,0x204f96aa0cd245ddl,
-            0xdaadaf4afb011ed5l },
-          0 },
-        /* 125 << 88 */
-        { { 0xb298ce2de50404b1l,0x04dd38c45bf9b581l,0x229deabdfada51e8l,
-            0x74bd233f8788a132l },
-          { 0x951ba5ecf03e6c30l,0x9da2f5aa45bf1a41l,0x6bec7fea7e52b860l,
-            0x76e3778964b0a9ddl },
-          0 },
-    },
-    {
-        /* 0 << 96 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 96 */
-        { { 0x4fe7ee31b0e63d34l,0xf4600572a9e54fabl,0xc0493334d5e7b5a4l,
-            0x8589fb9206d54831l },
-          { 0xaa70f5cc6583553al,0x0879094ae25649e5l,0xcc90450710044652l,
-            0xebb0696d02541c4fl },
-          0 },
-        /* 3 << 96 */
-        { { 0xb99f0e0399375235l,0x7614c847b9917970l,0xfec93ce9524ec067l,
-            0xe40e7bf89b122520l },
-          { 0xb5670631ee4c4774l,0x6f03847a3b04914cl,0xc96e9429dc9dd226l,
-            0x43489b6c8c57c1f8l },
-          0 },
-        /* 4 << 96 */
-        { { 0x0e299d23fe67ba66l,0x9145076093cf2f34l,0xf45b5ea997fcf913l,
-            0x5be008438bd7dddal },
-          { 0x358c3e05d53ff04dl,0xbf7ccdc35de91ef7l,0xad684dbfb69ec1a0l,
-            0x367e7cf2801fd997l },
-          0 },
-        /* 5 << 96 */
-        { { 0x46ffd227cc2338fbl,0x89ff6fa990e26153l,0xbe570779331a0076l,
-            0x43d241c506e1f3afl },
-          { 0xfdcdb97dde9b62a3l,0x6a06e984a0ae30eal,0xc9bf16804fbddf7dl,
-            0x170471a2d36163c4l },
-          0 },
-        /* 7 << 96 */
-        { { 0x361619e455950cc3l,0xc71d665c56b66bb8l,0xea034b34afac6d84l,
-            0xa987f832e5e4c7e3l },
-          { 0xa07427727a79a6a7l,0x56e5d017e26d6c23l,0x7e50b97638167e10l,
-            0xaa6c81efe88aa84el },
-          0 },
-        /* 9 << 96 */
-        { { 0x473959d74d325bbfl,0x2a61beec8d6114b9l,0x25672a94924be2eel,
-            0xa48595dbf2c23d0cl },
-          { 0xe476848b6a221838l,0xe743e69a35c1b673l,0x2ab42499d8468503l,
-            0x62aa0054e9e90ba7l },
-          0 },
-        /* 10 << 96 */
-        { { 0x358d13f1bc482911l,0x685d1971b7fa7f26l,0x3e67a51d2be1aee4l,
-            0xe041850998d114a9l },
-          { 0x59639f604e052561l,0x32075c49155d0818l,0x2aa2343b67b64b1cl,
-            0x1b445e2967f53e6al },
-          0 },
-        /* 11 << 96 */
-        { { 0xbdfb271773a904e0l,0x7ce1e40b28888d73l,0x2e7e35f6eaa97d1bl,
-            0xd061772aa9afa097l },
-          { 0x434ac7c47a1f7c59l,0x6e21124ae79b7b9al,0x055acff3bb22ecc7l,
-            0x8bfd7ac984c858d3l },
-          0 },
-        /* 13 << 96 */
-        { { 0x2fd57df59f1f68adl,0x5ddcc6dbb06470c8l,0x801b6451a9b47307l,
-            0x6b51c8e376551bf4l },
-          { 0xef0bd1f7d44e1da9l,0x714bcb1d4d4e600cl,0xc57bb9e40c6540c7l,
-            0x71bd1ec2327cc644l },
-          0 },
-        /* 15 << 96 */
-        { { 0x9a52cf7e7f4dd81fl,0xa0132be15e69c05el,0x90dab7472a0f4d72l,
-            0xc142f911312d6706l },
-          { 0xe8d3631f8261998bl,0xf0f42fae615c1c94l,0x2f4e948caec3fa5dl,
-            0x242ae7a8a374101el },
-          0 },
-        /* 16 << 96 */
-        { { 0x0f893a5dc8de610bl,0xe8c515fb67e223cel,0x7774bfa64ead6dc5l,
-            0x89d20f95925c728fl },
-          { 0x7a1e0966098583cel,0xa2eedb9493f2a7d7l,0x1b2820974c304d4al,
-            0x0842e3dac077282dl },
-          0 },
-        /* 17 << 96 */
-        { { 0x1fa878cad088be52l,0x89c2cb07a9e1e656l,0x385bc5c3219d62dbl,
-            0xd82b676b5fda2752l },
-          { 0x2449dc9ee304eafcl,0x1e9e7991632f4ea2l,0x3036e061cdd5e0b9l,
-            0x75a6f6ff830825bcl },
-          0 },
-        /* 19 << 96 */
-        { { 0xb10fcddc449dedb4l,0x2c890042d1244acfl,0x9b3072cac7fc7017l,
-            0x1acda6859ce8063fl },
-          { 0xd243313c7f51e2f5l,0x52a3f1a4d73d9578l,0xda785b7a64f0ce6el,
-            0x2e766315442a4c2dl },
-          0 },
-        /* 21 << 96 */
-        { { 0x94f9b004151f111al,0xc7a5035b07dbc5fal,0x53958ea7609e49d7l,
-            0x0526b4d79013f4c0l },
-          { 0x66de5ebb593e2fbdl,0x6e7cf8b44c2e0c37l,0x6f72fc8b8c983e78l,
-            0x6fab9b632348f9d7l },
-          0 },
-        /* 23 << 96 */
-        { { 0xc748a3526a3d8468l,0x3fab479927e38032l,0x91ad3629fa430ce7l,
-            0xc5af0b2c71614c44l },
-          { 0xcede3fa50c211611l,0x6e6889ba02338083l,0xee0a195977f0fe32l,
-            0x01ea905d0f4bbc5al },
-          0 },
-        /* 25 << 96 */
-        { { 0x12cfb25e8193db48l,0xddb4ae633bea708cl,0xdaae102ef181f821l,
-            0x9d9d923024a089d9l },
-          { 0x71c4122da0876aeal,0x1a63ea3bbbe19c09l,0x3b898076016f8d0cl,
-            0xa5cccc5daea6b713l },
-          0 },
-        /* 27 << 96 */
-        { { 0xc3f22baf4a8e2f61l,0x77d29ede176da6a6l,0x40a55f211607da63l,
-            0x858b38561452e391l },
-          { 0x0dd3c267fe1b3c56l,0x66c04bdd7d55227al,0xfbd2fe55e6404e09l,
-            0x5981cf49ea9cfcbcl },
-          0 },
-        /* 28 << 96 */
-        { { 0xe549237f78890732l,0xc443bef953fcb4d9l,0x9884d8a6eb3480d6l,
-            0x8a35b6a13048b186l },
-          { 0xb4e4471665e9a90al,0x45bf380d653006c0l,0x8f3f820d4fe9ae3bl,
-            0x244a35a0979a3b71l },
-          0 },
-        /* 29 << 96 */
-        { { 0xae46a902aea870afl,0xa9b9fcf57cbedc99l,0x74f2ca3f79b7e793l,
-            0xadb8f2231dbeeb28l },
-          { 0x6302060e6764df85l,0x363320d257ebd554l,0xd9fd573e798d22e1l,
-            0x285f85f5ebb67dedl },
-          0 },
-        /* 31 << 96 */
-        { { 0xd86b329211caa2b5l,0x2a26258e39337bd1l,0x4dc5a9b579c8c291l,
-            0x16443d87741942e6l },
-          { 0x6bc9a2f8f811400cl,0x819c69359eeb4e0el,0xe1be7273ce0c214bl,
-            0x429afb8184b61581l },
-          0 },
-        /* 33 << 96 */
-        { { 0xb37e188756af5812l,0xd662bdb485aff83el,0xc89742d07bc63de7l,
-            0xea103f9d0279f487l },
-          { 0x4d26916a3a6cc639l,0x4eea3a3c7c743b94l,0x6a3e0dc7007376d9l,
-            0xdb6ef3cf573f904el },
-          0 },
-        /* 34 << 96 */
-        { { 0x9b1058ecb0b0fb53l,0x8955f5f75f8a9a9fl,0xf5f92e7f9f6f9e6dl,
-            0x03f5df6c50ec198bl },
-          { 0x6c8741f2b8aedbcel,0x8f4e60cfed8018f7l,0x6ca5297c9fa01f89l,
-            0x8591cf7a864995dbl },
-          0 },
-        /* 35 << 96 */
-        { { 0xa126147eb0a11b9bl,0xeedcc9e198900232l,0x15d94f8c2bead119l,
-            0x042423cfefc38691l },
-          { 0x6ce86fbe77165d91l,0xa07732126b3fd565l,0x8cdc409150b1f9c7l,
-            0x7f5ad1af064595acl },
-          0 },
-        /* 36 << 96 */
-        { { 0xed374a6658926dddl,0x138b2d49908015b8l,0x886c6579de1f7ab8l,
-            0x888b9aa0c3020b7al },
-          { 0xd3ec034e3a96e355l,0xba65b0b8f30fbe9al,0x064c8e50ff21367al,
-            0x1f508ea40b04b46el },
-          0 },
-        /* 37 << 96 */
-        { { 0x73644c158f8402a0l,0x0d9b5354f4730eb9l,0x78542af4e94cc278l,
-            0xf4dbede3e395f33al },
-          { 0x8fe8cbc590c70b00l,0x9c35bb2d7db197f6l,0x229b4973e6599746l,
-            0x0817d04e1a84b986l },
-          0 },
-        /* 39 << 96 */
-        { { 0x8ffe34e95ecd09b3l,0x6a7c3de4153b7cael,0xf02713e4a81044b7l,
-            0x85ca6158c70545c8l },
-          { 0xd3ff392845d88bffl,0x3a251a07f0bafe89l,0x61290e1287cea7f4l,
-            0xa360a17efa4808adl },
-          0 },
-        /* 40 << 96 */
-        { { 0x98561a49747c866cl,0xbbb1e5fe0518a062l,0x20ff4e8becdc3608l,
-            0x7f55cded20184027l },
-          { 0x8d73ec95f38c85f0l,0x5b589fdf8bc3b8c3l,0xbe95dd980f12b66fl,
-            0xf5bd1a090e338e01l },
-          0 },
-        /* 41 << 96 */
-        { { 0x2d1751083edf4e2bl,0x30e6e90fa29c10d0l,0xfee1eb14c9c6ccd2l,
-            0x244670c756a81453l },
-          { 0x90b33eefc5185c22l,0xd77ae4b63db82d28l,0xce5ee034f228f940l,
-            0x5d7660847bb47be5l },
-          0 },
-        /* 43 << 96 */
-        { { 0x88b7eec499b9a8c6l,0x56048d9e14e8ef0cl,0xa18f93215c89cf78l,
-            0xbd2087616d327e66l },
-          { 0x5b187225d9e53e27l,0xa57ca6c7bf4d0317l,0x187731d2e9557736l,
-            0xd4ce2f78a874982el },
-          0 },
-        /* 44 << 96 */
-        { { 0x65163ae55e915918l,0x6158d6d986f8a46bl,0x8466b538eeebf99cl,
-            0xca8761f6bca477efl },
-          { 0xaf3449c29ebbc601l,0xef3b0f41e0c3ae2fl,0xaa6c577d5de63752l,
-            0xe916660164682a51l },
-          0 },
-        /* 45 << 96 */
-        { { 0xf5b602bb29f47deal,0x42853c9659ddd679l,0x5c25be4041d7c001l,
-            0x8e069399d4a3b307l },
-          { 0x1782152e736ce467l,0x2e264109c9cb4f08l,0xf900cb11ab124698l,
-            0x1bbed1d02d6e05b1l },
-          0 },
-        /* 46 << 96 */
-        { { 0x9cc3fedc7da08b1fl,0x0f44949361d5ed38l,0xc8cbc4209b991b6bl,
-            0xee62a342891c42e1l },
-          { 0x11c496bb1a179139l,0x94ece2892eac4d8el,0x35f303a5a98d5570l,
-            0x69d4340514a31552l },
-          0 },
-        /* 47 << 96 */
-        { { 0x29d45e50892dfcbal,0x653e613e5c30cee3l,0x7b8c1ae61868a348l,
-            0x40ab51654f2c612al },
-          { 0x56e977f9891cdc8cl,0xee1ca12a34ca7cd1l,0xa4e283ee17b5ddf8l,
-            0x4e36f2fb6f536205l },
-          0 },
-        /* 48 << 96 */
-        { { 0x5a3097befc15aa1el,0x40d12548b54b0745l,0x5bad4706519a5f12l,
-            0xed03f717a439dee6l },
-          { 0x0794bb6c4a02c499l,0xf725083dcffe71d2l,0x2cad75190f3adcafl,
-            0x7f68ea1c43729310l },
-          0 },
-        /* 49 << 96 */
-        { { 0xa3834d85e89ea13fl,0x2ca00f942db803bbl,0x0f378681400ed3dal,
-            0x1028af6b54854da3l },
-          { 0x3928c2da06400c7fl,0x21119785d82aac92l,0x06618c17724e4af0l,
-            0x22b42b161470736bl },
-          0 },
-        /* 51 << 96 */
-        { { 0x7d0cfd48f7f2ac65l,0x46e1ac705f641b60l,0x0ab9566a0fcf0137l,
-            0xbd4380e0db460fb8l },
-          { 0x4550efbf6db99b55l,0x33846e669764b744l,0xacffa0cae34ca007l,
-            0xce642d6a077e646cl },
-          0 },
-        /* 52 << 96 */
-        { { 0xe747c8c7b7ffd977l,0xec104c3580761a22l,0x8395ebaf5a3ffb83l,
-            0xfb3261f4e4b63db7l },
-          { 0x53544960d883e544l,0x13520d708cc2eeb8l,0x08f6337bd3d65f99l,
-            0x83997db2781cf95bl },
-          0 },
-        /* 53 << 96 */
-        { { 0xd89112c47d8037a3l,0xcba48ad3464c2025l,0x3afea8399814a09dl,
-            0x69e52260269030b5l },
-          { 0x5b7067365c674805l,0x8c3fd33d87343f56l,0xc572c858b1c61edfl,
-            0x43d8f4ded06749cbl },
-          0 },
-        /* 55 << 96 */
-        { { 0x04da1f06b4066003l,0xf7d4e52f372749e8l,0x56cd667114b38747l,
-            0x1943a22a22eb6d9el },
-          { 0xc2c5391990714b0al,0xb6e3abb7d13cf3ael,0xfcd8d671676115cbl,
-            0x178ce1a0c06a0d3al },
-          0 },
-        /* 57 << 96 */
-        { { 0x94485b36913508f8l,0x92f87fe36de83b42l,0xedd476f0ed77e666l,
-            0xee90fbc68da2cf53l },
-          { 0x6f4afc53fc6cf3d9l,0x231bceb9f21f6ecfl,0x6504a11d494c6e9cl,
-            0xd3728f032c211461l },
-          0 },
-        /* 59 << 96 */
-        { { 0x09a9b93799562ca2l,0xb7d5c5cf6a5a5aa8l,0x52f5d7b9987b219dl,
-            0x33849f9ec38014d4l },
-          { 0x299adaf628f23880l,0x738ecc8874875588l,0x39d707adca2af665l,
-            0xc8c11f688f4c5f73l },
-          0 },
-        /* 60 << 96 */
-        { { 0x68e4f15e9afdfb3cl,0x49a561435bdfb6dfl,0xa9bc1bd45f823d97l,
-            0xbceb5970ea111c2al },
-          { 0x366b455fb269bbc4l,0x7cd85e1ee9bc5d62l,0xc743c41c4f18b086l,
-            0xa4b4099095294fb9l },
-          0 },
-        /* 61 << 96 */
-        { { 0x2ae046d66aa34757l,0x34db1addaa6d7e9dl,0x2b4b7e017ccf432bl,
-            0xfbe0bfa590d319c6l },
-          { 0xfb2981687ec7a7f2l,0x346cc46004f5132el,0x782b2e53b40aceddl,
-            0x402e1d64e3f0b8b9l },
-          0 },
-        /* 63 << 96 */
-        { { 0x2aa3b21d25a56088l,0xae6ee57543d08962l,0x669e42bff1e22297l,
-            0x7b4c635732e3a47al },
-          { 0x22b16260ea464a25l,0xad8ca59072d5cd7al,0x7c244266104eb96al,
-            0x1def95e28e7c11d2l },
-          0 },
-        /* 64 << 96 */
-        { { 0x9c7c581d26ee8382l,0xcf17dcc5359d638el,0xee8273abb728ae3dl,
-            0x1d112926f821f047l },
-          { 0x1149847750491a74l,0x687fa761fde0dfb9l,0x2c2580227ea435abl,
-            0x6b8bdb9491ce7e3fl },
-          0 },
-        /* 65 << 96 */
-        { { 0x1f04524cdc27e1f7l,0xa0c74f61572eab14l,0xdd5d0cfced272074l,
-            0x95533c1d5bfe4f65l },
-          { 0x3039d57ecce817cal,0x029967d73b822082l,0x9fca43866c4a10d3l,
-            0xf8b2a7f0bb4968ebl },
-          0 },
-        /* 71 << 96 */
-        { { 0x933cd6dcbfbf6407l,0xd08f21504be673f8l,0x0e1c4d0db1140a2el,
-            0x0502a092431b270al },
-          { 0x5d99f9508768c00al,0xda3ce5079b3ff3c7l,0x1c648b75031c11abl,
-            0x5e3de47bf2776305l },
-          0 },
-        /* 77 << 96 */
-        { { 0xe22af9274d2b9de4l,0xf3690f55a69609ecl,0x20260a6e453fbe18l,
-            0x8edcb46b42d0b085l },
-          { 0xd4ef250b7d9c7f58l,0x5e8578dfc83c3433l,0x9751d9b9e46e320al,
-            0xb02bd03cf3c58af6l },
-          0 },
-        /* 83 << 96 */
-        { { 0x0ab299ede1b4d1ccl,0x22e7301cec4d18d2l,0xf2380f2a7b86d4ffl,
-            0xca19ef9e40753713l },
-          { 0x52bb0d24678c38a1l,0xcc9d6fd499001c02l,0xa2dd6b00bc5876e4l,
-            0xfe04b402409fe2b3l },
-          0 },
-        /* 89 << 96 */
-        { { 0x7db986b1ff69f8d3l,0x648865e59d6266b9l,0x7ccfe96183f7dae5l,
-            0x0f59a8bd6828379bl },
-          { 0xad97e5ef0ac7c4e8l,0xa75914be784e9c18l,0x053e015bb18c1bb8l,
-            0x18f6cefcb347043el },
-          0 },
-        /* 95 << 96 */
-        { { 0xb4d641bdf257c38al,0xadcea4d0c1372574l,0x7f8d20be71c8f0d0l,
-            0x14a1d24c41dc6344l },
-          { 0xe446054e41f35526l,0x4664213823c952ddl,0xfbde483401f6b0acl,
-            0xc89eee66d75b6318l },
-          0 },
-        /* 101 << 96 */
-        { { 0x700242937a087392l,0xd42bd3aad5da04del,0xee64cb5b1f803414l,
-            0xd6341ecbbab52988l },
-          { 0x7ad522f343170a74l,0x5fba22536d61d9del,0x230304c1e845a6e5l,
-            0xd69feabfbc9e326bl },
-          0 },
-        /* 107 << 96 */
-        { { 0xef7e49412e8a11d7l,0x4cb8963662c8bae1l,0xecc741198aad5816l,
-            0x13490782c7af5175l },
-          { 0x10c701f73e91a604l,0xcb8c6c7124cc30c1l,0xce0d479c071eb382l,
-            0xa3dc71fb058087d4l },
-          0 },
-        /* 113 << 96 */
-        { { 0xec368492541eb6d1l,0x567735d6e09a94abl,0xb8039ec172350329l,
-            0x3bd83a8f4894ddafl },
-          { 0x740ef2a39c07063dl,0xba25e72277da7b59l,0xb09e248e3bf42e82l,
-            0x7ff36da0b017d037l },
-          0 },
-        /* 116 << 96 */
-        { { 0xca80416651b8d9a3l,0x42531bc90ffb0db1l,0x72ce4718aa82e7cel,
-            0x6e199913df574741l },
-          { 0xd5f1b13dd5d36946l,0x8255dc65f68f0194l,0xdc9df4cd8710d230l,
-            0x3453c20f138c1988l },
-          0 },
-        /* 119 << 96 */
-        { { 0x913f23b9ed08ac04l,0x18e336643590d098l,0xd3f72934e67536dcl,
-            0xf949a757ec7ecde9l },
-          { 0x37fc6583cf9cbd37l,0xcbe62cc043b1228el,0x777124948a743274l,
-            0x3ea3668c716ce6f1l },
-          0 },
-        /* 125 << 96 */
-        { { 0xc89ce010a90d375bl,0x39ac669340503fe3l,0x9036f782d33ecb0el,
-            0x5190656841fdc7d1l },
-          { 0xbefd136e917d94cdl,0x05fea2f22a511b24l,0x80e62d76f9076e0cl,
-            0x8c57635e418ba653l },
-          0 },
-    },
-    {
-        /* 0 << 104 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 104 */
-        { { 0x20d3c982cf7d62d2l,0x1f36e29d23ba8150l,0x48ae0bf092763f9el,
-            0x7a527e6b1d3a7007l },
-          { 0xb4a89097581a85e3l,0x1f1a520fdc158be5l,0xf98db37d167d726el,
-            0x8802786e1113e862l },
-          0 },
-        /* 3 << 104 */
-        { { 0xf6e894d1f4c6b6ecl,0x526b082718b3cd9bl,0x73f952a812117fbfl,
-            0x2be864b011945bf5l },
-          { 0x86f18ea542099b64l,0x2770b28a07548ce2l,0x97390f28295c1c9cl,
-            0x672e6a43cb5206c3l },
-          0 },
-        /* 4 << 104 */
-        { { 0xc37c7dd0c55c4496l,0xa6a9635725bbabd2l,0x5b7e63f2add7f363l,
-            0x9dce37822e73f1dfl },
-          { 0xe1e5a16ab2b91f71l,0xe44898235ba0163cl,0xf2759c32f6e515adl,
-            0xa5e2f1f88615eecfl },
-          0 },
-        /* 5 << 104 */
-        { { 0xcacce2c847c64367l,0x6a496b9f45af4ec0l,0x2a0836f36034042cl,
-            0x14a1f3900b6c62eal },
-          { 0xe7fa93633ef1f540l,0xd323b30a72a76d93l,0xffeec8b50feae451l,
-            0x4eafc172bd04ef87l },
-          0 },
-        /* 7 << 104 */
-        { { 0xe4435a51b3e59b89l,0x136139554133a1c9l,0x87f46973440bee59l,
-            0x714710f800c401e4l },
-          { 0xc0cf4bced6c446c9l,0xe0aa7fd66c4d5368l,0xde5d811afc68fc37l,
-            0x61febd72b7c2a057l },
-          0 },
-        /* 9 << 104 */
-        { { 0x27375fe665f837e2l,0x93f8c68bd882179fl,0x584feadc59b16187l,
-            0xe5b50be9483bc162l },
-          { 0x7ad9d6f1a2776625l,0xe9d1008004ff457bl,0x5b56d322677618a6l,
-            0x036694eae3e68673l },
-          0 },
-        /* 10 << 104 */
-        { { 0x6ca4f87e822e37bel,0x73f237b4253bda4el,0xf747f3a241190aebl,
-            0xf06fa36f804cf284l },
-          { 0x0a6bbb6efc621c12l,0x5d624b6440b80ec6l,0x4b0724257ba556f3l,
-            0x7fa0c3543e2d20a8l },
-          0 },
-        /* 11 << 104 */
-        { { 0x6feaffc51d8a4fd1l,0x59663b205f1ad208l,0xefc93cef24acb46al,
-            0x54929de05967118cl },
-          { 0x885708009acffb1cl,0x492bbf2b145639ecl,0x71f495a638f0018el,
-            0xe24365dbc2792847l },
-          0 },
-        /* 13 << 104 */
-        { { 0x4bedae86a6f29002l,0x7abedb56e034457al,0x8bf3eec6179bff2al,
-            0x9d626d57390f4e6bl },
-          { 0x653fe0e914dd6ea3l,0x7483715989bd6d08l,0x85fb05b4ebd9b03dl,
-            0x7dc3f2214a768bbcl },
-          0 },
-        /* 15 << 104 */
-        { { 0xaacc63f132b0ed8fl,0x041237242bafefd2l,0x0df9a7987e2d2a13l,
-            0x09bd13cf9c27591fl },
-          { 0xaa5f5e476e1afb50l,0xcd146a42b66eb646l,0x3f07561d1442ec3cl,
-            0x7e5471738ae8ec47l },
-          0 },
-        /* 16 << 104 */
-        { { 0x8de2b7bc453cadd6l,0x203900a7bc0bc1f8l,0xbcd86e47a6abd3afl,
-            0x911cac128502effbl },
-          { 0x2d550242ec965469l,0x0e9f769229e0017el,0x633f078f65979885l,
-            0xfb87d4494cf751efl },
-          0 },
-        /* 17 << 104 */
-        { { 0x2c3e61196c0c6cd5l,0x5e01a49a99f4aac8l,0xfa518fc92ef1565el,
-            0xf64ff8714f772366l },
-          { 0x52fcbc2b726420d0l,0x30fbf6eb76cfa9eel,0x0bd17139fa618268l,
-            0x23ed6e122087535dl },
-          0 },
-        /* 19 << 104 */
-        { { 0x76098e38bb4ccb2cl,0x44e88aeeafbad6d1l,0x5c4d286771928778l,
-            0xb1df868138534c94l },
-          { 0x67eb8f4d77ce9debl,0x2a86d0461a77c55dl,0xc327181e46a6a3e7l,
-            0x68fd611b8710e206l },
-          0 },
-        /* 21 << 104 */
-        { { 0xc093f3fc0c82bdf1l,0x21db25894f76c4a6l,0xf3dcb22ee410a7ael,
-            0x1db37114f3c22ffel },
-          { 0x9bd0a1fb58f6801dl,0x2cab103bd1b55cc8l,0x2ae1a7f5077ba4b2l,
-            0x82b46642ce5ab2b3l },
-          0 },
-        /* 23 << 104 */
-        { { 0xc8477ec52546684cl,0xe3f9387702ff02b5l,0xefb72133ae5d04cdl,
-            0x644905c339f10d02l },
-          { 0x1750c87c13d8d356l,0x0e9b8063b41e7640l,0xc7ece04f5647b05bl,
-            0x89a43da7ca9df9c4l },
-          0 },
-        /* 25 << 104 */
-        { { 0x02610ef1920eb7d9l,0x34bd2fc2e1ea1dc0l,0xcb89da255170b890l,
-            0xaaa2796461cff827l },
-          { 0xc308c9d37103ed6al,0xe82d63d5a467564al,0x94c897c4a0fa7732l,
-            0x75eb52fa64c7aa5fl },
-          0 },
-        /* 27 << 104 */
-        { { 0x52582f9cb985fcb6l,0xaaef8d9f8508a691l,0x494c2c346e505131l,
-            0x6d062362d55f30f6l },
-          { 0x70059e9122e1e32fl,0x1507c3fe9e51abb0l,0xd8aba31b2b7bda72l,
-            0x5acbc5f77b753f13l },
-          0 },
-        /* 28 << 104 */
-        { { 0x15bfb8bf5116f937l,0x7c64a586c1268943l,0x71e25cc38419a2c8l,
-            0x9fd6b0c48335f463l },
-          { 0x4bf0ba3ce8ee0e0el,0x6f6fba60298c21fal,0x57d57b39ae66bee0l,
-            0x292d513022672544l },
-          0 },
-        /* 29 << 104 */
-        { { 0x075dc81953952ff6l,0xd4d9eeda20b7384dl,0x8a81c1bfd2d6c6a5l,
-            0x319368a0db050f3bl },
-          { 0x91f476de31f1cee2l,0x1b38604500d0e17fl,0xed2081889a820384l,
-            0x8d00c411a0f1a637l },
-          0 },
-        /* 31 << 104 */
-        { { 0xb029b687a47fd8f0l,0xa531360696371a05l,0x7b84e88c5ab09140l,
-            0x87dad7c85eeb1d14l },
-          { 0xef0749b9d0edf6f3l,0x29fc7310e2ef198bl,0x01e05df5069ed399l,
-            0x121db4ecdf4e2fcal },
-          0 },
-        /* 33 << 104 */
-        { { 0xe730f3f62826bee0l,0xb9bdbe3fce332a8fl,0x1ecad11766ec00aal,
-            0x7503d835617a62d1l },
-          { 0x9f34e161b862b139l,0xde42194cf30f6a67l,0x5037a953c1e879fel,
-            0x62f321f89bda45dbl },
-          0 },
-        /* 34 << 104 */
-        { { 0xe87771d8033f2876l,0xb0186ec67d5cc3dbl,0x58e8bb803bc9bc1dl,
-            0x4d1395cc6f6ef60el },
-          { 0xa73c62d6186244a0l,0x918e5f23110a5b53l,0xed4878ca741b7eabl,
-            0x3038d71adbe03e51l },
-          0 },
-        /* 35 << 104 */
-        { { 0xcbdba27c40234d55l,0x24352b6cb3eb56c9l,0xae681b85a8e9295al,
-            0x2a6cfba1f1171664l },
-          { 0x49f045838ca40c3cl,0xe56da25c6eb0f8eal,0x8e62f86fc4341a4el,
-            0x7f68bdc64c3f947fl },
-          0 },
-        /* 36 << 104 */
-        { { 0x840204b7a93c3246l,0x21ab6069a0b9b4cdl,0xf5fa6e2bb1d64218l,
-            0x1de6ad0ef3d56191l },
-          { 0x570aaa88ff1929c7l,0xc6df4c6b640e87b5l,0xde8a74f2c65f0cccl,
-            0x8b972fd5e6f6cc01l },
-          0 },
-        /* 37 << 104 */
-        { { 0x862013c00bf22173l,0xfd004c834acd8e23l,0x50e422ca310b1649l,
-            0xe6d04de65bbe1854l },
-          { 0x651f646385761ef3l,0x3b17d38652cf85c9l,0xbdce284a5f54ecc7l,
-            0x72efcd3ec7c2106cl },
-          0 },
-        /* 39 << 104 */
-        { { 0x34324b182ff07e3el,0x29938f38f50bcb71l,0xd0e3d7b977e2bcc3l,
-            0x8e78f007c0a3292bl },
-          { 0xfa28c530005c2c00l,0x6f9c21d51faa0c5al,0x3df01abd7b9c78f3l,
-            0x0e5618c1ccaaeb7el },
-          0 },
-        /* 40 << 104 */
-        { { 0xaa6778fce7560b90l,0xb4073e61a7e824cel,0xff0d693cd642eba8l,
-            0x7ce2e57a5dccef38l },
-          { 0x89c2c7891df1ad46l,0x83a06922098346fdl,0x2d715d72da2fc177l,
-            0x7b6dd71d85b6cf1dl },
-          0 },
-        /* 41 << 104 */
-        { { 0x4601a6a492ad3889l,0xdc8e3364d9a0709fl,0x0c687f2b2c260327l,
-            0xe882af62e1a79573l },
-          { 0x0cfd00ab945d9017l,0xe6df7505d0e3c188l,0xb389a66dbde825a2l,
-            0x126d77b6bcd8e14fl },
-          0 },
-        /* 43 << 104 */
-        { { 0xc800acc7db18ec73l,0x0ebecc78d86e99efl,0x675796cdbd05bc5fl,
-            0x254498126afd7c7fl },
-          { 0x96293b695969b165l,0xd8514d83c162c8dal,0xe174f8b674a15a5cl,
-            0x880d687389a2f73cl },
-          0 },
-        /* 44 << 104 */
-        { { 0x53703a328300129fl,0x1f63766268c43bfdl,0xbcbd191300e54051l,
-            0x812fcc627bf5a8c5l },
-          { 0x3f969d5f29fb85dal,0x72f4e00a694759e8l,0x426b6e52790726b7l,
-            0x617bbc873bdbb209l },
-          0 },
-        /* 45 << 104 */
-        { { 0xf536f07cad1deb2el,0x2a13a11ea87a710el,0x0ce2ccab64f4dc96l,
-            0x16178694f5a55464l },
-          { 0x1496168da2cb3986l,0xb079a5b9d56a93a9l,0x97005e99092893d3l,
-            0x55df5ed6e8fcc6c3l },
-          0 },
-        /* 46 << 104 */
-        { { 0x511f8bb997aee317l,0x812a4096e81536a8l,0x137dfe593ac09b9bl,
-            0x0682238fba8c9a7al },
-          { 0x7072ead6aeccb4bdl,0x6a34e9aa692ba633l,0xc82eaec26fff9d33l,
-            0xfb7535121d4d2b62l },
-          0 },
-        /* 47 << 104 */
-        { { 0x821dca8bbf328b1cl,0x24596ddd5a3d6830l,0x061c4c15635b5b4cl,
-            0x0e2b3bef4fa3560al },
-          { 0xffced37498906c43l,0x10ebd174e26b3784l,0x7cd068c470039bb5l,
-            0xc47dda0f88404e59l },
-          0 },
-        /* 48 << 104 */
-        { { 0x1a0445ff1d7aadabl,0x65d38260d5f6a67cl,0x6e62fb0891cfb26fl,
-            0xef1e0fa55c7d91d6l },
-          { 0x47e7c7ba33db72cdl,0x017cbc09fa7c74b2l,0x3c931590f50a503cl,
-            0xcac54f60616baa42l },
-          0 },
-        /* 49 << 104 */
-        { { 0x7ad7d13569185235l,0x19771949fb69e030l,0xd4de9717bc45fb4fl,
-            0x5657b076167e5739l },
-          { 0x9503a71fdd27449el,0xfa2fabf73cc01347l,0xf8ecef24c83fb301l,
-            0x527012bd5a8d5078l },
-          0 },
-        /* 51 << 104 */
-        { { 0x70a550d7e6fc3a32l,0x8e5875841951fe57l,0x5e6d43eaaab9788bl,
-            0x1e406fed80599794l },
-          { 0xd8164ace9ed2557cl,0xf9648f30ff593e10l,0x53af2fd80c2ff879l,
-            0x6705993cc9409bf4l },
-          0 },
-        /* 52 << 104 */
-        { { 0x04b005b6c6458293l,0x36bb5276e8d10af7l,0xacf2dc138ee617b8l,
-            0x470d2d35b004b3d4l },
-          { 0x06790832feeb1b77l,0x2bb75c3985657f9cl,0xd70bd4edc0f60004l,
-            0xfe797ecc219b018bl },
-          0 },
-        /* 53 << 104 */
-        { { 0xeca02ebf0ef19ceel,0xac691fbe2de090a4l,0x1f3866641b374547l,
-            0xbd8018c6a12ee85fl },
-          { 0x3e851318ee63e0f1l,0x45b0c37a161987d3l,0x67fe36056eb567c4l,
-            0x07c291b563200c5bl },
-          0 },
-        /* 55 << 104 */
-        { { 0xc85535ac1a956a8al,0x7bf4d70bc0ade321l,0xaf2efc48237bc56fl,
-            0xf9bfe13e31ba97e7l },
-          { 0x2ca5fac4cf7c6c65l,0xc23b14ff03ec3e35l,0xc5109923217bcfd2l,
-            0xf02f96a1c58f32f3l },
-          0 },
-        /* 57 << 104 */
-        { { 0x3b1f715b0d0aeff4l,0xbe406d62f0d44536l,0xe413843d567bcb38l,
-            0x75b7fb43791e705al },
-          { 0x5b831d4b224f85e5l,0x3fea6659d9a35eael,0xd6f8bd097c85480bl,
-            0x2a9561a34a959267l },
-          0 },
-        /* 59 << 104 */
-        { { 0x4a96a3535a303c10l,0x9aa3ad71c37c8d7el,0x4e2d077fde52014fl,
-            0x4d8bec5df8e3964dl },
-          { 0xda88ab94e865e142l,0x52df506d10a88091l,0x9aebff0092fc38a2l,
-            0xdfc034395608b0a2l },
-          0 },
-        /* 60 << 104 */
-        { { 0xee23fa819966e7eel,0x64ec4aa805b7920dl,0x2d44462d2d90aad4l,
-            0xf44dd195df277ad5l },
-          { 0x8d6471f1bb46b6a1l,0x1e65d313fd885090l,0x33a800f513a977b4l,
-            0xaca9d7210797e1efl },
-          0 },
-        /* 61 << 104 */
-        { { 0xb1557be2a4ea787el,0x59324973019f667fl,0x262ceced5595367cl,
-            0x8a676897ec598640l },
-          { 0x2df6cebfc7f06f4fl,0xb255723138078f9al,0xad553c46524a0dd1l,
-            0xe20bb20a5a68d62al },
-          0 },
-        /* 63 << 104 */
-        { { 0x6f47e3779589e263l,0x7cb83e3d35106bb8l,0x2642d87bcc632fc2l,
-            0x4d18f34d8b77eb36l },
-          { 0x7de6bf6d19ca4d1cl,0x438e8f02f7e926aal,0xb539021250ac930al,
-            0xe34ddfc15b219a9fl },
-          0 },
-        /* 64 << 104 */
-        { { 0x98857ceb1bf4581cl,0xe635e186aca7b166l,0x278ddd22659722acl,
-            0xa0903c4c1db68007l },
-          { 0x366e458948f21402l,0x31b49c14b96abda2l,0x329c4b09e0403190l,
-            0x97197ca3d29f43fel },
-          0 },
-        /* 65 << 104 */
-        { { 0xfe4de13781479db4l,0x307331f012f08ea5l,0x7f59a64758c04c13l,
-            0x6b41189abdc9b3c9l },
-          { 0xb10f11e5a6f8c5edl,0x757fb7a3f5b0579el,0x456d0a873c90d027l,
-            0x7e8bb6bf32361796l },
-          0 },
-        /* 71 << 104 */
-        { { 0x6aa1dc6c9e689d8dl,0xaa5fa015479cdd09l,0x7eb4dbb582fc000al,
-            0x4a57b689eff4e701l },
-          { 0x7bfe8d2a8e15cd8cl,0xab109b1cc9074e1al,0x5716715fee1619a5l,
-            0xf29a51eccdcb40bcl },
-          0 },
-        /* 77 << 104 */
-        { { 0x14c76234ddf03c6el,0xdfb5d388baeb2eddl,0x4bd85da26d413d2dl,
-            0x5b0dd9be3ae38469l },
-          { 0xe4d8a9d89ab3ae61l,0xb9e37b880ee63951l,0x17f08e9b21a7f30fl,
-            0x173db1e8119af788l },
-          0 },
-        /* 83 << 104 */
-        { { 0x2352ad4a170d43f6l,0x098d74f65a0ae4b0l,0x290f5236c3a46c2al,
-            0xea9266102dd87e7fl },
-          { 0xd7ee90f6848e6911l,0xebe8f4cce0d8886fl,0xa2038320558ff6a0l,
-            0x1f716534f37c38cfl },
-          0 },
-        /* 89 << 104 */
-        { { 0x9754209439a4a159l,0xe6135412fed24278l,0xbba62254d70e2cabl,
-            0x4ac6a8ac85895130l },
-          { 0xc01614fee1a45363l,0x720ad3f8b67294f2l,0x724ea95cb420ea51l,
-            0x1f40ab2d712b856cl },
-          0 },
-        /* 95 << 104 */
-        { { 0x708e1c7975f3d30cl,0x423f1535e2172da3l,0x7a29be342a06a0b1l,
-            0x9de5c9eb32c68ba2l },
-          { 0x70217b0232d48793l,0x3cf3855bac1471cfl,0x6762d03f8321e179l,
-            0x06ee12ea236fa7cfl },
-          0 },
-        /* 101 << 104 */
-        { { 0x1718e7428779109bl,0x6188008d0aca350bl,0xbbe227e00594bc15l,
-            0x4a7b6423ddbdea35l },
-          { 0x06ad632dfa44e1bfl,0xaf9c163d1e97b409l,0x64dafec3c61f2b2fl,
-            0xc6759d905525c0c9l },
-          0 },
-        /* 107 << 104 */
-        { { 0x76d6294787517149l,0x2bda339baa77d325l,0x04b1bec067ad1fd1l,
-            0x49f63fcc0aec7c73l },
-          { 0x005cb459ec1bf494l,0x8fa99c1b1ec6f8bbl,0x70a4e6d78b59dd43l,
-            0xfd70bcb313d6594dl },
-          0 },
-        /* 113 << 104 */
-        { { 0x2987a7cb13966c11l,0x74ad0a26a783f283l,0xf011200ae54d27f0l,
-            0xbd8632963fb38396l },
-          { 0x7ec7fe8c9b86d059l,0xfa94ca76d0cd33a7l,0xf6ad741cdc646993l,
-            0x83054a427ebc34e9l },
-          0 },
-        /* 116 << 104 */
-        { { 0xadef8c5a192ef710l,0x88afbd4b3b7431f9l,0x7e1f740764250c9el,
-            0x6e31318db58bec07l },
-          { 0xfd4fc4b824f89b4el,0x65a5dd8848c36a2al,0x4f1eccfff024baa7l,
-            0x22a21cf2cba94650l },
-          0 },
-        /* 119 << 104 */
-        { { 0x7b45865478f39754l,0xcbb8b96c4564e003l,0xb492d2bf69b35752l,
-            0x4e6287e065ee5ad3l },
-          { 0x07906c14eb1ffe62l,0xf350390c681fcdf8l,0xc351386f6be3eec3l,
-            0x8480d00ee5df919dl },
-          0 },
-        /* 125 << 104 */
-        { { 0x399861ecf8a2d5aal,0xb179adeb046f78cbl,0x056a6cd88792f647l,
-            0xd3dfc91c3d411820l },
-          { 0x4ccf92d179693be1l,0x12ecd9a3f65cb250l,0x58e5d2102538b9e7l,
-            0x4e655882ff977ccal },
-          0 },
-    },
-    {
-        /* 0 << 112 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 112 */
-        { { 0x8ce9b6bfc360e25al,0xe6425195075a1a78l,0x9dc756a8481732f4l,
-            0x83c0440f5432b57al },
-          { 0xc670b3f1d720281fl,0x2205910ed135e051l,0xded14b0edb052be7l,
-            0x697b3d27c568ea39l },
-          0 },
-        /* 3 << 112 */
-        { { 0x0b89de9314092ebbl,0xf17256bd428e240cl,0xcf89a7f393d2f064l,
-            0x4f57841ee1ed3b14l },
-          { 0x4ee14405e708d855l,0x856aae7203f1c3d0l,0xc8e5424fbdd7eed5l,
-            0x3333e4ef73ab4270l },
-          0 },
-        /* 4 << 112 */
-        { { 0x3bc77adedda492f8l,0xc11a3aea78297205l,0x5e89a3e734931b4cl,
-            0x17512e2e9f5694bbl },
-          { 0x5dc349f3177bf8b6l,0x232ea4ba08c7ff3el,0x9c4f9d16f511145dl,
-            0xccf109a333b379c3l },
-          0 },
-        /* 5 << 112 */
-        { { 0xe75e7a88a1f25897l,0x7ac6961fa1b5d4d8l,0xe3e1077308f3ed5cl,
-            0x208a54ec0a892dfbl },
-          { 0xbe826e1978660710l,0x0cf70a97237df2c8l,0x418a7340ed704da5l,
-            0xa3eeb9a908ca33fdl },
-          0 },
-        /* 7 << 112 */
-        { { 0xb4323d588434a920l,0xc0af8e93622103c5l,0x667518ef938dbf9al,
-            0xa184307383a9cdf2l },
-          { 0x350a94aa5447ab80l,0xe5e5a325c75a3d61l,0x74ba507f68411a9el,
-            0x10581fc1594f70c5l },
-          0 },
-        /* 9 << 112 */
-        { { 0x5aaa98a7cb0c9c8cl,0x75105f3081c4375cl,0xceee50575ef1c90fl,
-            0xb31e065fc23a17bfl },
-          { 0x5364d275d4b6d45al,0xd363f3ad62ec8996l,0xb5d212394391c65bl,
-            0x84564765ebb41b47l },
-          0 },
-        /* 10 << 112 */
-        { { 0x20d18ecc37107c78l,0xacff3b6b570c2a66l,0x22f975d99bd0d845l,
-            0xef0a0c46ba178fa0l },
-          { 0x1a41965176b6028el,0xc49ec674248612d4l,0x5b6ac4f27338af55l,
-            0x06145e627bee5a36l },
-          0 },
-        /* 11 << 112 */
-        { { 0x33e95d07e75746b5l,0x1c1e1f6dc40c78bel,0x967833ef222ff8e2l,
-            0x4bedcf6ab49180adl },
-          { 0x6b37e9c13d7a4c8al,0x2748887c6ddfe760l,0xf7055123aa3a5bbcl,
-            0x954ff2257bbb8e74l },
-          0 },
-        /* 13 << 112 */
-        { { 0x4e23ca446d3fea55l,0xb4ae9c86f4810568l,0x47bfb91b2a62f27dl,
-            0x60deb4c9d9bac28cl },
-          { 0xa892d8947de6c34cl,0x4ee682594494587dl,0x914ee14e1a3f8a5bl,
-            0xbb113eaa28700385l },
-          0 },
-        /* 15 << 112 */
-        { { 0xef9dc899a7b56eafl,0x00c0e52c34ef7316l,0x5b1e4e24fe818a86l,
-            0x9d31e20dc538be47l },
-          { 0x22eb932d3ed68974l,0xe44bbc087c4e87c4l,0x4121086e0dde9aefl,
-            0x8e6b9cff134f4345l },
-          0 },
-        /* 16 << 112 */
-        { { 0x96892c1f711b0eb9l,0xb905f2c8780ab954l,0xace26309a20792dbl,
-            0xec8ac9b30684e126l },
-          { 0x486ad8b6b40a2447l,0x60121fc19fe3fb24l,0x5626fccf1a8e3b3fl,
-            0x4e5686226ad1f394l },
-          0 },
-        /* 17 << 112 */
-        { { 0xda7aae0d196aa5a1l,0xe0df8c771041b5fbl,0x451465d926b318b7l,
-            0xc29b6e557ab136e9l },
-          { 0x2c2ab48b71148463l,0xb5738de364454a76l,0x54ccf9a05a03abe4l,
-            0x377c02960427d58el },
-          0 },
-        /* 19 << 112 */
-        { { 0x90e4f7c92d7d1413l,0x67e2d6b59834f597l,0x4fd4f4f9a808c3e8l,
-            0xaf8237e0d5281ec1l },
-          { 0x25ab5fdc84687ceel,0xc5ded6b1a5b26c09l,0x8e4a5aecc8ea7650l,
-            0x23b73e5c14cc417fl },
-          0 },
-        /* 21 << 112 */
-        { { 0xb4293fdcf50225f9l,0xc52e175cb0e12b03l,0xf649c3bad0a8bf64l,
-            0x745a8fefeb8ae3c6l },
-          { 0x30d7e5a358321bc3l,0xb1732be70bc4df48l,0x1f217993e9ea5058l,
-            0xf7a71cde3e4fd745l },
-          0 },
-        /* 23 << 112 */
-        { { 0xa188b2502d0f39aal,0x622118bb15a85947l,0x2ebf520ffde0f4fal,
-            0xa40e9f294860e539l },
-          { 0x7b6a51eb22b57f0fl,0x849a33b97e80644al,0x50e5d16f1cf095fel,
-            0xd754b54eec55f002l },
-          0 },
-        /* 25 << 112 */
-        { { 0xcd821dfb988baf01l,0xe6331a7ddbb16647l,0x1eb8ad33094cb960l,
-            0x593cca38c91bbca5l },
-          { 0x384aac8d26567456l,0x40fa0309c04b6490l,0x97834cd6dab6c8f6l,
-            0x68a7318d3f91e55fl },
-          0 },
-        /* 27 << 112 */
-        { { 0xc7bfd486605daaa6l,0x46fd72b7bb9a6c9el,0xe4847fb1a124fb89l,
-            0x75959cbda2d8ffbcl },
-          { 0x42579f65c8a588eel,0x368c92e6b80b499dl,0xea4ef6cd999a5df1l,
-            0xaa73bb7f936fe604l },
-          0 },
-        /* 28 << 112 */
-        { { 0xf347a70d6457d188l,0x86eda86b8b7a388bl,0xb7cdff060ccd6013l,
-            0xbeb1b6c7d0053fb2l },
-          { 0x0b02238799240a9fl,0x1bbb384f776189b2l,0x8695e71e9066193al,
-            0x2eb5009706ffac7el },
-          0 },
-        /* 29 << 112 */
-        { { 0x0654a9c04a7d2caal,0x6f3fb3d1a5aaa290l,0x835db041ff476e8fl,
-            0x540b8b0bc42295e4l },
-          { 0xa5c73ac905e214f5l,0x9a74075a56a0b638l,0x2e4b1090ce9e680bl,
-            0x57a5b4796b8d9afal },
-          0 },
-        /* 31 << 112 */
-        { { 0x2a2bfa7f650006f0l,0xdfd7dad350c0fbb2l,0x92452495ccf9ad96l,
-            0x183bf494d95635f9l },
-          { 0x02d5df434a7bd989l,0x505385cca5431095l,0xdd98e67dfd43f53el,
-            0xd61e1a6c500c34a9l },
-          0 },
-        /* 33 << 112 */
-        { { 0x41d85ea1ef74c45bl,0x2cfbfa66ae328506l,0x98b078f53ada7da9l,
-            0xd985fe37ec752fbbl },
-          { 0xeece68fe5a0148b4l,0x6f9a55c72d78136dl,0x232dccc4d2b729cel,
-            0xa27e0dfd90aafbc4l },
-          0 },
-        /* 34 << 112 */
-        { { 0x9647445212b4603el,0xa876c5516b706d14l,0xdf145fcf69a9d412l,
-            0xe2ab75b72d479c34l },
-          { 0x12df9a761a23ff97l,0xc61389925d359d10l,0x6e51c7aefa835f22l,
-            0x69a79cb1c0fcc4d9l },
-          0 },
-        /* 35 << 112 */
-        { { 0xf57f350d594cc7e1l,0x3079ca633350ab79l,0x226fb6149aff594al,
-            0x35afec026d59a62bl },
-          { 0x9bee46f406ed2c6el,0x58da17357d939a57l,0x44c504028fd1797el,
-            0xd8853e7c5ccea6cal },
-          0 },
-        /* 36 << 112 */
-        { { 0x4065508da35fcd5fl,0x8965df8c495ccaebl,0x0f2da85012e1a962l,
-            0xee471b94c1cf1cc4l },
-          { 0xcef19bc80a08fb75l,0x704958f581de3591l,0x2867f8b23aef4f88l,
-            0x8d749384ea9f9a5fl },
-          0 },
-        /* 37 << 112 */
-        { { 0x1b3855378c9049f4l,0x5be948f37b92d8b6l,0xd96f725db6e2bd6bl,
-            0x37a222bc958c454dl },
-          { 0xe7c61abb8809bf61l,0x46f07fbc1346f18dl,0xfb567a7ae87c0d1cl,
-            0x84a461c87ef3d07al },
-          0 },
-        /* 39 << 112 */
-        { { 0x3ab3d5afbd76e195l,0x478dd1ad6938a810l,0x6ffab3936ee3d5cbl,
-            0xdfb693db22b361e4l },
-          { 0xf969449651dbf1a7l,0xcab4b4ef08a2e762l,0xe8c92f25d39bba9al,
-            0x850e61bcf1464d96l },
-          0 },
-        /* 40 << 112 */
-        { { 0xb7e830e3dc09508bl,0xfaf6d2cf74317655l,0x72606cebdf690355l,
-            0x48bb92b3d0c3ded6l },
-          { 0x65b754845c7cf892l,0xf6cd7ac9d5d5f01fl,0xc2c30a5996401d69l,
-            0x91268650ed921878l },
-          0 },
-        /* 41 << 112 */
-        { { 0x380bf913b78c558fl,0x43c0baebc8afdaa9l,0x377f61d554f169d3l,
-            0xf8da07e3ae5ff20bl },
-          { 0xb676c49da8a90ea8l,0x81c1ff2b83a29b21l,0x383297ac2ad8d276l,
-            0x3001122fba89f982l },
-          0 },
-        /* 43 << 112 */
-        { { 0xbbe1e6a6c93f72d6l,0xd5f75d12cad800eal,0xfa40a09fe7acf117l,
-            0x32c8cdd57581a355l },
-          { 0x742219927023c499l,0xa8afe5d738ec3901l,0x5691afcba90e83f0l,
-            0x41bcaa030b8f8eacl },
-          0 },
-        /* 44 << 112 */
-        { { 0xe38b5ff98d2668d5l,0x0715281a7ad81965l,0x1bc8fc7c03c6ce11l,
-            0xcbbee6e28b650436l },
-          { 0x06b00fe80cdb9808l,0x17d6e066fe3ed315l,0x2e9d38c64d0b5018l,
-            0xab8bfd56844dcaefl },
-          0 },
-        /* 45 << 112 */
-        { { 0x42894a59513aed8bl,0xf77f3b6d314bd07al,0xbbdecb8f8e42b582l,
-            0xf10e2fa8d2390fe6l },
-          { 0xefb9502262a2f201l,0x4d59ea5050ee32b0l,0xd87f77286da789a8l,
-            0xcf98a2cff79492c4l },
-          0 },
-        /* 46 << 112 */
-        { { 0xf9577239720943c2l,0xba044cf53990b9d0l,0x5aa8e82395f2884al,
-            0x834de6ed0278a0afl },
-          { 0xc8e1ee9a5f25bd12l,0x9259ceaa6f7ab271l,0x7e6d97a277d00b76l,
-            0x5c0c6eeaa437832al },
-          0 },
-        /* 47 << 112 */
-        { { 0x5232c20f5606b81dl,0xabd7b3750d991ee5l,0x4d2bfe358632d951l,
-            0x78f8514698ed9364l },
-          { 0x951873f0f30c3282l,0x0da8ac80a789230bl,0x3ac7789c5398967fl,
-            0xa69b8f7fbdda0fb5l },
-          0 },
-        /* 48 << 112 */
-        { { 0xe5db77176add8545l,0x1b71cb6672c49b66l,0xd856073968421d77l,
-            0x03840fe883e3afeal },
-          { 0xb391dad51ec69977l,0xae243fb9307f6726l,0xc88ac87be8ca160cl,
-            0x5174cced4ce355f4l },
-          0 },
-        /* 49 << 112 */
-        { { 0x98a35966e58ba37dl,0xfdcc8da27817335dl,0x5b75283083fbc7bfl,
-            0x68e419d4d9c96984l },
-          { 0x409a39f402a40380l,0x88940faf1fe977bcl,0xc640a94b8f8edea6l,
-            0x1e22cd17ed11547dl },
-          0 },
-        /* 51 << 112 */
-        { { 0x17ba93b1a20ef103l,0xad8591306ba6577bl,0x65c91cf66fa214a0l,
-            0xd7d49c6c27990da5l },
-          { 0xecd9ec8d20bb569dl,0xbd4b2502eeffbc33l,0x2056ca5a6bed0467l,
-            0x7916a1f75b63728cl },
-          0 },
-        /* 52 << 112 */
-        { { 0xd4f9497d53a4f566l,0x8973466497b56810l,0xf8e1da740494a621l,
-            0x82546a938d011c68l },
-          { 0x1f3acb19c61ac162l,0x52f8fa9cabad0d3el,0x15356523b4b7ea43l,
-            0x5a16ad61ae608125l },
-          0 },
-        /* 53 << 112 */
-        { { 0xb0bcb87f4faed184l,0x5f236b1d5029f45fl,0xd42c76070bc6b1fcl,
-            0xc644324e68aefce3l },
-          { 0x8e191d595c5d8446l,0xc020807713ae1979l,0xadcaee553ba59cc7l,
-            0x20ed6d6ba2cb81bal },
-          0 },
-        /* 55 << 112 */
-        { { 0x7392b41a530ccbbdl,0x87c82146ea823525l,0xa52f984c05d98d0cl,
-            0x2ae57d735ef6974cl },
-          { 0x9377f7bf3042a6ddl,0xb1a007c019647a64l,0xfaa9079a0cca9767l,
-            0x3d81a25bf68f72d5l },
-          0 },
-        /* 57 << 112 */
-        { { 0xc110d830b0f2ac95l,0x48d0995aab20e64el,0x0f3e00e17729cd9al,
-            0x2a570c20dd556946l },
-          { 0x912dbcfd4e86214dl,0x2d014ee2cf615498l,0x55e2b1e63530d76el,
-            0xc5135ae4fd0fd6d1l },
-          0 },
-        /* 59 << 112 */
-        { { 0x1854daa5061f1658l,0xc0016df1df0cd2b3l,0xc2a3f23e833d50del,
-            0x73b681d2bbbd3017l },
-          { 0x2f046dc43ac343c0l,0x9c847e7d85716421l,0xe1e13c910917eed4l,
-            0x3fc9eebd63a1b9c6l },
-          0 },
-        /* 60 << 112 */
-        { { 0x0f816a727fe02299l,0x6335ccc2294f3319l,0x3820179f4745c5bel,
-            0xe647b782922f066el },
-          { 0xc22e49de02cafb8al,0x299bc2fffcc2ecccl,0x9a8feea26e0e8282l,
-            0xa627278bfe893205l },
-          0 },
-        /* 61 << 112 */
-        { { 0xa7e197337933e47bl,0xf4ff6b132e766402l,0xa4d8be0a98440d9fl,
-            0x658f5c2f38938808l },
-          { 0x90b75677c95b3b3el,0xfa0442693137b6ffl,0x077b039b43c47c29l,
-            0xcca95dd38a6445b2l },
-          0 },
-        /* 63 << 112 */
-        { { 0x583f3703f9374ab6l,0x864f91956e564145l,0x33bc3f4822526d50l,
-            0x9f323c801262a496l },
-          { 0xaa97a7ae3f046a9al,0x70da183edf8a039al,0x5b68f71c52aa0ba6l,
-            0x9be0fe5121459c2dl },
-          0 },
-        /* 64 << 112 */
-        { { 0xc1e17eb6cbc613e5l,0x33131d55497ea61cl,0x2f69d39eaf7eded5l,
-            0x73c2f434de6af11bl },
-          { 0x4ca52493a4a375fal,0x5f06787cb833c5c2l,0x814e091f3e6e71cfl,
-            0x76451f578b746666l },
-          0 },
-        /* 65 << 112 */
-        { { 0xa700767eabd0cc76l,0xa14ae98015889273l,0x5acf2cc466ea6380l,
-            0xb942cc40d08d18b9l },
-          { 0x9b5daa763ae45782l,0x61a25e0fb72f0ce0l,0xf94c0e80435fefe3l,
-            0x73d552cf1620e1c9l },
-          0 },
-        /* 71 << 112 */
-        { { 0x57130582727185c1l,0x8f2b8ebc163897ecl,0x4a059cc7a04e4a6bl,
-            0x4b1de9fe0908a366l },
-          { 0xa4f7738688d0fef0l,0x55e3bb1d9ebfc138l,0x9022bbef005ae362l,
-            0xf5669edc8741d349l },
-          0 },
-        /* 77 << 112 */
-        { { 0xf192c0f7ede937a4l,0xd2e91d62810c1b1el,0xf2b40b64dcc39c69l,
-            0xe125fbd028f03b0el },
-          { 0x52966dd78da708f9l,0x92d400a3cc0e7f32l,0x4e35aae36b0842b8l,
-            0x0b4fe66ded3ad3cfl },
-          0 },
-        /* 83 << 112 */
-        { { 0x14b81d951f1ff6b5l,0x1d82f132ed9b03b8l,0x52f6f029b4fa4047l,
-            0xea653682601e5913l },
-          { 0x4e900375edeee046l,0xd22ed267f9428714l,0xb004fb3b1753e873l,
-            0xfef061ba245b2c09l },
-          0 },
-        /* 89 << 112 */
-        { { 0x5e2376eaf9deba2bl,0x1ed1e9e5269a18cfl,0x8dffd66dcb1cada8l,
-            0xb13239f068369c77l },
-          { 0x2fede3a67f25426fl,0xc885cf0c6f90a2a6l,0xd950162d4eeac543l,
-            0x53011aa09abc201bl },
-          0 },
-        /* 95 << 112 */
-        { { 0x7a63925d432b798al,0x92e762cfc9bd6da9l,0xf22fb9706a190382l,
-            0x19919b847b18a9b3l },
-          { 0x16793b803adfde86l,0xf9ce15ace8b1d44cl,0x4bf74144c0a140b8l,
-            0x680468616f853f6cl },
-          0 },
-        /* 101 << 112 */
-        { { 0xd4e0d8460db84ba2l,0x9a162a3a360b68bbl,0x7297f3939233146cl,
-            0xbc93c2f4ec77412dl },
-          { 0x13ddf0a7e07e1065l,0x000a8d45fb5e5131l,0xb4373078cf61d467l,
-            0xa4a1fd67bf3bb6f9l },
-          0 },
-        /* 107 << 112 */
-        { { 0x6f2473f9d7585098l,0x45a29448d4f23c1al,0x47fe40f1c22bdc25l,
-            0x4e46ed1f31347673l },
-          { 0x5e43a8624148898cl,0x4a02ededa993954el,0x83d830b52f8a1847l,
-            0x007e3156a7f6a378l },
-          0 },
-        /* 113 << 112 */
-        { { 0x01a39fe7e847ca18l,0xaf2722418fed2772l,0x3104ef891fbb1748l,
-            0x5b55331b2b9dd5ffl },
-          { 0xe7806e31cec6a787l,0x9f49ed881e9c0af2l,0xf5a66373a3905b36l,
-            0x77b5bca9efab75f3l },
-          0 },
-        /* 116 << 112 */
-        { { 0xd4d75f4bf0831932l,0x5e770ac477fe8cc9l,0x52b5e748862e72a2l,
-            0xe9a45482501d35fel },
-          { 0x8a93e7424a9ab187l,0x5a72506de88ca017l,0xe680dcb201eb2defl,
-            0xdc5aa4e6ba68209dl },
-          0 },
-        /* 119 << 112 */
-        { { 0x2defa3dc3d01a344l,0x11fd939b162e459al,0x928453b97313d720l,
-            0x08696dc053184a65l },
-          { 0xd9f8a69c721f7415l,0x304eb0e079539019l,0xc9b0ca6dbb0c6313l,
-            0xa10133eba93dc74el },
-          0 },
-        /* 125 << 112 */
-        { { 0xee0b164004393f1el,0x511547dfe1301979l,0xc00dfc3516d26d87l,
-            0x06227c8aab847494l },
-          { 0x178ca86748b2fdc7l,0xb51296f01a8ba1dcl,0xf252787731e1dd14l,
-            0x7ecb5456c0ba2a1fl },
-          0 },
-    },
-    {
-        /* 0 << 120 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 120 */
-        { { 0x3e0e5c9dd111f8ecl,0xbcc33f8db7c4e760l,0x702f9a91bd392a51l,
-            0x7da4a795c132e92dl },
-          { 0x1a0b0ae30bb1151bl,0x54febac802e32251l,0xea3a5082694e9e78l,
-            0xe58ffec1e4fe40b8l },
-          0 },
-        /* 3 << 120 */
-        { { 0x7b23c513516e19e4l,0x56e2e847c5c4d593l,0x9f727d735ce71ef6l,
-            0x5b6304a6f79a44c5l },
-          { 0x6638a7363ab7e433l,0x1adea470fe742f83l,0xe054b8545b7fc19fl,
-            0xf935381aba1d0698l },
-          0 },
-        /* 4 << 120 */
-        { { 0xb5504f9d918e4936l,0x65035ef6b2513982l,0x0553a0c26f4d9cb9l,
-            0x6cb10d56bea85509l },
-          { 0x48d957b7a242da11l,0x16a4d3dd672b7268l,0x3d7e637c8502a96bl,
-            0x27c7032b730d463bl },
-          0 },
-        /* 5 << 120 */
-        { { 0x55366b7d5846426fl,0xe7d09e89247d441dl,0x510b404d736fbf48l,
-            0x7fa003d0e784bd7dl },
-          { 0x25f7614f17fd9596l,0x49e0e0a135cb98dbl,0x2c65957b2e83a76al,
-            0x5d40da8dcddbe0f8l },
-          0 },
-        /* 7 << 120 */
-        { { 0x9fb3bba354530bb2l,0xbde3ef77cb0869eal,0x89bc90460b431163l,
-            0x4d03d7d2e4819a35l },
-          { 0x33ae4f9e43b6a782l,0x216db3079c88a686l,0x91dd88e000ffedd9l,
-            0xb280da9f12bd4840l },
-          0 },
-        /* 9 << 120 */
-        { { 0xa37f3573f37f5937l,0xeb0f6c7dd1e4fca5l,0x2965a554ac8ab0fcl,
-            0x17fbf56c274676acl },
-          { 0x2e2f6bd9acf7d720l,0x41fc8f8810224766l,0x517a14b385d53befl,
-            0xdae327a57d76a7d1l },
-          0 },
-        /* 10 << 120 */
-        { { 0x515d5c891f5f82dcl,0x9a7f67d76361079el,0xa8da81e311a35330l,
-            0xe44990c44b18be1bl },
-          { 0xc7d5ed95af103e59l,0xece8aba78dac9261l,0xbe82b0999394b8d3l,
-            0x6830f09a16adfe83l },
-          0 },
-        /* 11 << 120 */
-        { { 0x43c41ac194d7d9b1l,0x5bafdd82c82e7f17l,0xdf0614c15fda0fcal,
-            0x74b043a7a8ae37adl },
-          { 0x3ba6afa19e71734cl,0x15d5437e9c450f2el,0x4a5883fe67e242b1l,
-            0x5143bdc22c1953c2l },
-          0 },
-        /* 13 << 120 */
-        { { 0xc676d7f2b1f3390bl,0x9f7a1b8ca5b61272l,0x4ebebfc9c2e127a9l,
-            0x4602500c5dd997bfl },
-          { 0x7f09771c4711230fl,0x058eb37c020f09c1l,0xab693d4bfee5e38bl,
-            0x9289eb1f4653cbc0l },
-          0 },
-        /* 15 << 120 */
-        { { 0x54da9dc7ab952578l,0xb5423df226e84d0bl,0xa8b64eeb9b872042l,
-            0xac2057825990f6dfl },
-          { 0x4ff696eb21f4c77al,0x1a79c3e4aab273afl,0x29bc922e9436b3f1l,
-            0xff807ef8d6d9a27al },
-          0 },
-        /* 16 << 120 */
-        { { 0xc7f3a8f833f6746cl,0x21e46f65fea990cal,0x915fd5c5caddb0a9l,
-            0xbd41f01678614555l },
-          { 0x346f4434426ffb58l,0x8055943614dbc204l,0xf3dd20fe5a969b7fl,
-            0x9d59e956e899a39al },
-          0 },
-        /* 17 << 120 */
-        { { 0xe4ca688fd06f56c0l,0xa48af70ddf027972l,0x691f0f045e9a609dl,
-            0xa9dd82cdee61270el },
-          { 0x8903ca63a0ef18d3l,0x9fb7ee353d6ca3bdl,0xa7b4a09cabf47d03l,
-            0x4cdada011c67de8el },
-          0 },
-        /* 19 << 120 */
-        { { 0xac127dc1e038a675l,0x729deff38c5c6320l,0xb7df8fd4a90d2c53l,
-            0x9b74b0ec681e7cd3l },
-          { 0x5cb5a623dab407e5l,0xcdbd361576b340c6l,0xa184415a7d28392cl,
-            0xc184c1d8e96f7830l },
-          0 },
-        /* 21 << 120 */
-        { { 0x86a9303b2f7e85c3l,0x5fce462171988f9bl,0x5b935bf6c138acb5l,
-            0x30ea7d6725661212l },
-          { 0xef1eb5f4e51ab9a2l,0x0587c98aae067c78l,0xb3ce1b3c77ca9ca6l,
-            0x2a553d4d54b5f057l },
-          0 },
-        /* 23 << 120 */
-        { { 0x2c7156e10b1894a0l,0x92034001d81c68c0l,0xed225d00c8b115b5l,
-            0x237f9c2283b907f2l },
-          { 0x0ea2f32f4470e2c0l,0xb725f7c158be4e95l,0x0f1dcafab1ae5463l,
-            0x59ed51871ba2fc04l },
-          0 },
-        /* 25 << 120 */
-        { { 0xd1b0ccdec9520711l,0x55a9e4ed3c8b84bfl,0x9426bd39a1fef314l,
-            0x4f5f638e6eb93f2bl },
-          { 0xba2a1ed32bf9341bl,0xd63c13214d42d5a9l,0xd2964a89316dc7c5l,
-            0xd1759606ca511851l },
-          0 },
-        /* 27 << 120 */
-        { { 0xedf69feaf8c51187l,0x05bb67ec741e4da7l,0x47df0f3208114345l,
-            0x56facb07bb9792b1l },
-          { 0xf3e007e98f6229e4l,0x62d103f4526fba0fl,0x4f33bef7b0339d79l,
-            0x9841357bb59bfec1l },
-          0 },
-        /* 28 << 120 */
-        { { 0xae1e0b67e28ef5bal,0x2c9a4699cb18e169l,0x0ecd0e331e6bbd20l,
-            0x571b360eaf5e81d2l },
-          { 0xcd9fea58101c1d45l,0x6651788e18880452l,0xa99726351f8dd446l,
-            0x44bed022e37281d0l },
-          0 },
-        /* 29 << 120 */
-        { { 0x830e6eea60dbac1fl,0x23d8c484da06a2f7l,0x896714b050ca535bl,
-            0xdc8d3644ebd97a9bl },
-          { 0x106ef9fab12177b4l,0xf79bf464534d5d9cl,0x2537a349a6ab360bl,
-            0xc7c54253a00c744fl },
-          0 },
-        /* 31 << 120 */
-        { { 0x24d661d168754ab0l,0x801fce1d6f429a76l,0xc068a85fa58ce769l,
-            0xedc35c545d5eca2bl },
-          { 0xea31276fa3f660d1l,0xa0184ebeb8fc7167l,0x0f20f21a1d8db0ael,
-            0xd96d095f56c35e12l },
-          0 },
-        /* 33 << 120 */
-        { { 0x57d2046b59da06ebl,0x3c076d5fa49f6d74l,0x6b4c96e616f82ea0l,
-            0xaf7b0f1f90536c0bl },
-          { 0x7999f86d204a9b2dl,0x7e420264126c9f87l,0x4c967a1f262ac4e5l,
-            0xe8174a09900e79adl },
-          0 },
-        /* 34 << 120 */
-        { { 0xd51687f2cb82516bl,0x8a440cfc040e4670l,0xeafd2bcfe7738d32l,
-            0x7071e9162a1e911al },
-          { 0xbd3abd44cfea57bbl,0x9c3add16085b19e2l,0xb194c01d6baa5aa6l,
-            0x6f3d3faf92f85c64l },
-          0 },
-        /* 35 << 120 */
-        { { 0xe23e0769488a280el,0x8e55a728e63a5904l,0x01690716ab84cccfl,
-            0xfe796130b78b3c98l },
-          { 0x15cc475b9117f211l,0xbdc178761d1b9d56l,0x8df5594a3e37b9b9l,
-            0x97747e341e37e494l },
-          0 },
-        /* 36 << 120 */
-        { { 0xf2a6370ed2f896e1l,0x27100e63802987afl,0xb4db1cff4678ebc7l,
-            0x6e5f28d937b4b263l },
-          { 0xd29030009711ebc4l,0xf14dcb9ff8712484l,0x7a46ec3eea449146l,
-            0x200155e9c1c51179l },
-          0 },
-        /* 37 << 120 */
-        { { 0x8130f007f1968d55l,0x18823e7097ed9803l,0xdc9fec559402762dl,
-            0x9e0bd57e278f5abbl },
-          { 0xaa41b913c9ebf303l,0x1105ec43a76b9353l,0xf8e4ee4cf4e6c6b5l,
-            0x3a630972bd7be696l },
-          0 },
-        /* 39 << 120 */
-        { { 0x5c7da7e16356b3eel,0x951bfe458ccf9b48l,0x6f2c6e91d0555d0cl,
-            0x47d7f7b58efd38eel },
-          { 0x957256c8af6fd630l,0xa690c65bdc01774cl,0xad52b27c7c8dafdal,
-            0x81fbc16af44a145fl },
-          0 },
-        /* 40 << 120 */
-        { { 0x497c3a3481b0493al,0x2b3ab20d71bc8408l,0x0c60226aa03769d1l,
-            0x4ac89c7ad10708b0l },
-          { 0x62398ea5092f7e6al,0x7f408f54de96d526l,0x025bde6f85bf102cl,
-            0xcc2f85120a4aa72el },
-          0 },
-        /* 41 << 120 */
-        { { 0x8a65e0386884a9c3l,0xd2e6ac047bf8c794l,0xc9c5d3d3f7bcdfb9l,
-            0x0000ce42a33f2c12l },
-          { 0xea1c0a9a7dd13b2bl,0xbfd97d7f0c35c3b1l,0x0ba75cf3347fcefel,
-            0xc3c5f28f1333460dl },
-          0 },
-        /* 43 << 120 */
-        { { 0x7810ebf575baa708l,0xe7fa7a0dd7440549l,0x25b813baf0667e4al,
-            0x30a46740d15838a8l },
-          { 0x13207b1ad04b22f7l,0x09e601ffd1419699l,0xb1038fc77f687b27l,
-            0xa4547dc9a127f95bl },
-          0 },
-        /* 44 << 120 */
-        { { 0x83b2e3b3056ecd2cl,0xd17dcdaaf03dfd36l,0xee24a5f81dcef956l,
-            0xb6746cd0b7239f16l },
-          { 0xed6cb311c8458c48l,0xe8c0fc9805d27da4l,0x4610e9a0a1bf0970l,
-            0x1947f01d9906c19el },
-          0 },
-        /* 45 << 120 */
-        { { 0x8b979126217c7cd7l,0x65c57a378050067el,0x6a50c6383f34838cl,
-            0x3de617c29b7bc81fl },
-          { 0x58488d24253a0ac7l,0x3fe53ec75520ba0bl,0x9156dca763f0607el,
-            0xdd08c5705d1fe134l },
-          0 },
-        /* 46 << 120 */
-        { { 0xbfb1d9e1e33ba77fl,0x0985311ccaef6c01l,0xc8b59e9accca8948l,
-            0x1256280945416f25l },
-          { 0xc90edbc257f53218l,0xcaa08c05125d8fb5l,0x33ea3fd49a1aad3bl,
-            0x2aa8bd83d005e8bel },
-          0 },
-        /* 47 << 120 */
-        { { 0xcbd2f1a3c2b22963l,0x0f7bd29c0c8ac2b3l,0xddb932432d405bfdl,
-            0xeabd4805328413b5l },
-          { 0xcc79d31748ebb6b9l,0x09604f831f521aael,0xd3487fdf4c7d188cl,
-            0xd219c318d1552ea9l },
-          0 },
-        /* 48 << 120 */
-        { { 0xef4f115c775d6ecel,0x69d2e3bbe8c0e78dl,0xb0264ef1145cfc81l,
-            0x0a41e9fa1b69788bl },
-          { 0x0d9233be909a1f0bl,0x150a84520ae76b30l,0xea3375370632bb69l,
-            0x15f7b3cfaa25584al },
-          0 },
-        /* 49 << 120 */
-        { { 0xfc4c623e321f7b11l,0xd36c1066f9cbc693l,0x8165235835dc0c0al,
-            0xa3ce2e18c824e97el },
-          { 0x59ea7cbcc6ff405el,0xced5a94a1e56a1e2l,0x88d744c53ab64b39l,
-            0x8963d029073a36e7l },
-          0 },
-        /* 51 << 120 */
-        { { 0x97aa902cb19f3edbl,0x8e605ff9bbf2975bl,0x0536fa8ba6eb299bl,
-            0xfd96da4f7cd03ac0l },
-          { 0x29c5b5b578f9a265l,0x1f025a6d5fd0bc1bl,0x440486ee58e0f8e1l,
-            0x8f191f7d593e49e9l },
-          0 },
-        /* 52 << 120 */
-        { { 0xbddf656baea9c13fl,0x083c5d514c678b37l,0x975431b630878ed4l,
-            0x6de13d4608d9cf1cl },
-          { 0xfbb639cc02427c45l,0x6190ca0c5a6cd989l,0x35a6aa26c53f11b7l,
-            0x73f9e17dddfd86f6l },
-          0 },
-        /* 53 << 120 */
-        { { 0xd30478a317be7689l,0x6fc3f634e358f7a7l,0x4057ece515688d9fl,
-            0xb5397495d3d91eefl },
-          { 0x62fac49e2f49bde4l,0xeb4a3e1860125c73l,0x15f38be8dabdac55l,
-            0x18bf29f7d334d52al },
-          0 },
-        /* 55 << 120 */
-        { { 0xf684162b68777538l,0x3e2a770bbb3381f4l,0x1b7562c1b374577cl,
-            0x9eec22dc5cf21688l },
-          { 0xc35014b1d472be2cl,0xafe2317035f086fbl,0xb9c9c4c9a1491ce1l,
-            0x2df1e669b56792ddl },
-          0 },
-        /* 57 << 120 */
-        { { 0xcf7d36fe1830f624l,0x176c3c12ed0474bdl,0x25b802c8f82b493dl,
-            0x683c2a744c78147el },
-          { 0x0db99444f8f3e446l,0x437bcac6800a56c7l,0xb4e592264d08b25fl,
-            0xcaf1b4142e691ca7l },
-          0 },
-        /* 59 << 120 */
-        { { 0x378bd47b9d231cafl,0xde3aa2f01f4db832l,0xf609d16ab29bd7d5l,
-            0x13feab54bdfb54dfl },
-          { 0x274abbbc22fc1a12l,0x267febb47d30ef1bl,0xeffa996d80717cd8l,
-            0x065a86d1118d0812l },
-          0 },
-        /* 60 << 120 */
-        { { 0xc681a8656a3cb3afl,0x528f25a981751414l,0x6669f07cc7eac946l,
-            0x9fb3a53f3cc6cc6bl },
-          { 0x2919d92a11ae224al,0xa59141110b170a19l,0xdc16c611e2042f16l,
-            0x58ace12decd4180bl },
-          0 },
-        /* 61 << 120 */
-        { { 0x689bb1ec107bb59fl,0x8129702adad2b385l,0x10bd3baeb1630603l,
-            0xaadec5d15f23e7cfl },
-          { 0x572f234f4586f7fbl,0x13abdec95ec11b32l,0xa462a7ec6191c26al,
-            0x4a7d92a06685c8d3l },
-          0 },
-        /* 63 << 120 */
-        { { 0xdd4e2b63b16628eal,0xdf0c8fc8eefa5e86l,0xb0ec710205662720l,
-            0x3f4c6956fe81e9dal },
-          { 0x5732ad8f52e356f7l,0x045a103968a658f0l,0x9c40b0b6506ba33al,
-            0x0a426010cb54258dl },
-          0 },
-        /* 64 << 120 */
-        { { 0x09891641d4c5105fl,0x1ae80f8e6d7fbd65l,0x9d67225fbee6bdb0l,
-            0x3b433b597fc4d860l },
-          { 0x44e66db693e85638l,0xf7b59252e3e9862fl,0xdb785157665c32ecl,
-            0x702fefd7ae362f50l },
-          0 },
-        /* 65 << 120 */
-        { { 0x3902ab14c3254641l,0xa63cfd9fd8c001c8l,0x597d155c52d0af3cl,
-            0xc5a2cbc4a0dbe688l },
-          { 0xac8a841b249195aal,0xc98f01aaed14426fl,0xeb4a8ce8353905f1l,
-            0x4d6668171ecee1b7l },
-          0 },
-        /* 71 << 120 */
-        { { 0xbd66e7d9a94da8cdl,0x7bc04735801ef314l,0x90f3eba1c5cc2904l,
-            0x3c7dfed6f71bb36dl },
-          { 0x89a50c8da75e3086l,0x88b8b4746f8e3418l,0x26fe17f4a44a5dbdl,
-            0x98bf74c16a1e24fel },
-          0 },
-        /* 77 << 120 */
-        { { 0xca7b470679e0db85l,0x7f46c7716fc897fdl,0x9537e7918edfc0f3l,
-            0xa46d4b4405e91ddfl },
-          { 0x97d21061ee5575e7l,0x1f4f32da59650429l,0x2d1d6af878995129l,
-            0x41d6fc228a0e4260l },
-          0 },
-        /* 83 << 120 */
-        { { 0xb30a1a89107d2282l,0x5433d7673a5e1323l,0xb9eeab822abdfeafl,
-            0x9579cb46df3e0dbfl },
-          { 0x6fc3ff2c7e088e79l,0x94b32360d7314326l,0xd2e82b59e5ad82e4l,
-            0x7372dc4a55bc24e3l },
-          0 },
-        /* 89 << 120 */
-        { { 0x355697215f3c03cbl,0x4150adf2a146edcdl,0x16ec1a421a252e1cl,
-            0xdf4d0f94424984eal },
-          { 0x15142b5f5fabe961l,0xe6a73c29567ec13al,0xe6d370795d12070al,
-            0x437743d0206fd7c6l },
-          0 },
-        /* 95 << 120 */
-        { { 0x483b7a95d66bc594l,0xf6a7064e8a6113bbl,0x373ce20f4ed34f72l,
-            0x6aa876ab24f429b2l },
-          { 0x378d5c25412c3102l,0xe4219a97b493199cl,0x01c7cafaa0b37332l,
-            0x9305cc85f7633f7dl },
-          0 },
-        /* 101 << 120 */
-        { { 0x0259b43aaadf2273l,0x869c5bd3cf9dc1c2l,0x4f18a6e4068d6628l,
-            0xd110637fec2d4547l },
-          { 0x1ae88a791e94aaddl,0xe8b4be39de64f5f9l,0x85cbd9b24dc6b2bbl,
-            0xb65091fa1bc352b2l },
-          0 },
-        /* 107 << 120 */
-        { { 0x7c5cea5d20f6a354l,0xe936ff1582f3ed39l,0x54e7a775b779368el,
-            0x8ca8a46e3cb17c9el },
-          { 0x753ca1fa0138974dl,0x9ce311eba72902ffl,0xcb727e56973f72b6l,
-            0xde72538d91685710l },
-          0 },
-        /* 113 << 120 */
-        { { 0xf423569f1bec8f85l,0x23376da5ca844ac4l,0xce7b407a111523f4l,
-            0x736fb92dde7aa46dl },
-          { 0xd9139edcc7662640l,0x520fbf0656a85e24l,0x14e3b5857e5284b5l,
-            0xcbae4e8321d56ef3l },
-          0 },
-        /* 116 << 120 */
-        { { 0x69830a05564470a1l,0x1a1e26cf5b702e8el,0xe5fdf7d9d8fae645l,
-            0xe4774f74a9950c66l },
-          { 0x18bdda7cd1466825l,0xe6ab4ce6d115218al,0xfcb8c50064528629l,
-            0xd705f429e70deed9l },
-          0 },
-        /* 119 << 120 */
-        { { 0x3f992d7ba99df096l,0x08993b4125e78725l,0x79eaad13117c4cafl,
-            0x7230594c9fa87285l },
-          { 0xac23d7edf2673e27l,0xc9d76fb53b9eb111l,0x7a0a036a9e9db78al,
-            0x7c6ec39df9565cffl },
-          0 },
-        /* 125 << 120 */
-        { { 0x956ad1441fd4f7a1l,0x6c511ffecb7546cal,0x11becdaef5ae6ddbl,
-            0x67587741946168b2l },
-          { 0x99cd45edf54379a7l,0x687f8462e2748decl,0x2b2be1e1837bd066l,
-            0x3862659c0c45a5a9l },
-          0 },
-    },
-    {
-        /* 0 << 128 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 128 */
-        { { 0x62a8c244bfe20925l,0x91c19ac38fdce867l,0x5a96a5d5dd387063l,
-            0x61d587d421d324f6l },
-          { 0xe87673a2a37173eal,0x2384800853778b65l,0x10f8441e05bab43el,
-            0xfa11fe124621efbel },
-          0 },
-        /* 3 << 128 */
-        { { 0xc0f734a3b2335834l,0x9526205a90ef6860l,0xcb8be71704e2bb0dl,
-            0x2418871e02f383fal },
-          { 0xd71776814082c157l,0xcc914ad029c20073l,0xf186c1ebe587e728l,
-            0x6fdb3c2261bcd5fdl },
-          0 },
-        /* 4 << 128 */
-        { { 0xb4480f0441c23fa3l,0xb4712eb0c1989a2el,0x3ccbba0f93a29ca7l,
-            0x6e205c14d619428cl },
-          { 0x90db7957b3641686l,0x0432691d45ac8b4el,0x07a759acf64e0350l,
-            0x0514d89c9c972517l },
-          0 },
-        /* 5 << 128 */
-        { { 0xcc7c4c1c2cf9d7c1l,0x1320886aee95e5abl,0xbb7b9056beae170cl,
-            0xc8a5b250dbc0d662l },
-          { 0x4ed81432c11d2303l,0x7da669121f03769fl,0x3ac7a5fd84539828l,
-            0x14dada943bccdd02l },
-          0 },
-        /* 7 << 128 */
-        { { 0x51b90651cbae2f70l,0xefc4bc0593aaa8ebl,0x8ecd8689dd1df499l,
-            0x1aee99a822f367a5l },
-          { 0x95d485b9ae8274c5l,0x6c14d4457d30b39cl,0xbafea90bbcc1ef81l,
-            0x7c5f317aa459a2edl },
-          0 },
-        /* 9 << 128 */
-        { { 0x410dc6a90deeaf52l,0xb003fb024c641c15l,0x1384978c5bc504c4l,
-            0x37640487864a6a77l },
-          { 0x05991bc6222a77dal,0x62260a575e47eb11l,0xc7af6613f21b432cl,
-            0x22f3acc9ab4953e9l },
-          0 },
-        /* 10 << 128 */
-        { { 0x27c8919240be34e8l,0xc7162b3791907f35l,0x90188ec1a956702bl,
-            0xca132f7ddf93769cl },
-          { 0x3ece44f90e2025b4l,0x67aaec690c62f14cl,0xad74141822e3cc11l,
-            0xcf9b75c37ff9a50el },
-          0 },
-        /* 11 << 128 */
-        { { 0x0d0942770c24efc8l,0x0349fd04bef737a4l,0x6d1c9dd2514cdd28l,
-            0x29c135ff30da9521l },
-          { 0xea6e4508f78b0b6fl,0x176f5dd2678c143cl,0x081484184be21e65l,
-            0x27f7525ce7df38c4l },
-          0 },
-        /* 13 << 128 */
-        { { 0x9faaccf5e4652f1dl,0xbd6fdd2ad56157b2l,0xa4f4fb1f6261ec50l,
-            0x244e55ad476bcd52l },
-          { 0x881c9305047d320bl,0x1ca983d56181263fl,0x354e9a44278fb8eel,
-            0xad2dbc0f396e4964l },
-          0 },
-        /* 15 << 128 */
-        { { 0xfce0176788a2ffe4l,0xdc506a3528e169a5l,0x0ea108617af9c93al,
-            0x1ed2436103fa0e08l },
-          { 0x96eaaa92a3d694e7l,0xc0f43b4def50bc74l,0xce6aa58c64114db4l,
-            0x8218e8ea7c000fd4l },
-          0 },
-        /* 16 << 128 */
-        { { 0x6a7091c2e48fb889l,0x26882c137b8a9d06l,0xa24986631b82a0e2l,
-            0x844ed7363518152dl },
-          { 0x282f476fd86e27c7l,0xa04edaca04afefdcl,0x8b256ebc6119e34dl,
-            0x56a413e90787d78bl },
-          0 },
-        /* 17 << 128 */
-        { { 0xd1ffd160deb58b9bl,0x78492428c007273cl,0x47c908048ef06073l,
-            0x746cd0dfe48c659el },
-          { 0xbd7e8e109d47055bl,0xe070967e39711c04l,0x3d8869c99c9444f6l,
-            0x6c67ccc834ac85fcl },
-          0 },
-        /* 19 << 128 */
-        { { 0x8a42d8b087b05be1l,0xef00df8d3e4e1456l,0x148cc8e8fbfc8cd2l,
-            0x0288ae4c4878804fl },
-          { 0x44e669a73b4f6872l,0xa4a8dbd4aab53c5bl,0x843fa963c9660052l,
-            0x128e2d2571c05dd2l },
-          0 },
-        /* 21 << 128 */
-        { { 0x3ea86174a9f1b59bl,0xc747ea076a9a8845l,0x733710b5ab242123l,
-            0x6381b546d386a60cl },
-          { 0xba0e286366a44904l,0x770f618de9db556cl,0x39e567f828fb198dl,
-            0xb5f1bef040147ee8l },
-          0 },
-        /* 23 << 128 */
-        { { 0x1adee1d516391617l,0x962d9184a3315fd9l,0x91c229750c805d59l,
-            0x4575eaf2cd9a1877l },
-          { 0x83fef163451831b9l,0x829d6bdd6f09e30fl,0x9379272dcc6b4e6al,
-            0xd7a049bd95fbee4al },
-          0 },
-        /* 25 << 128 */
-        { { 0x695f70da44ae09c6l,0x79793892bb99de1dl,0xde269352f696b429l,
-            0xe37ea97f8104c825l },
-          { 0x3166cac6b0e72e63l,0xa82e633ca03ba670l,0x1106e3843e505667l,
-            0xc2994f3dffb788b6l },
-          0 },
-        /* 27 << 128 */
-        { { 0xd36a5ab37c53073bl,0xc44a9940ebdc7e35l,0x7dd86c8bf3ded136l,
-            0x9fe9879fd5a0eb14l },
-          { 0xa210726c9b99bf9cl,0x3faf4456861036afl,0x1661f1c9615d091al,
-            0x2c63f630911551bcl },
-          0 },
-        /* 28 << 128 */
-        { { 0x1554d46da670ff1dl,0x24833d88cb97a1ccl,0x8fa6ab3cded97493l,
-            0x215e037189926498l },
-          { 0x549bd592e56d74ffl,0x58a8caf543b5e1ecl,0x3c6087a323e93cb9l,
-            0x8b0549875648b83cl },
-          0 },
-        /* 29 << 128 */
-        { { 0x232974230554f94fl,0x4f445a380f3a7618l,0xb9fb40bee4abefd6l,
-            0xfbf3eaf9c15eb07cl },
-          { 0xed469c23aca0c8b3l,0xc5209f68846e3f8fl,0x33d51d13d75da468l,
-            0x9406e10a3d5c6e29l },
-          0 },
-        /* 31 << 128 */
-        { { 0xb9a44b1f5c6cad21l,0xaa9947751ee60a83l,0xc89af3858c390401l,
-            0xef1e450b8dd51056l },
-          { 0x5f5f069879ac84d1l,0x68d82982ef57b1afl,0x31f1d90f50849555l,
-            0xff9577e57d9fc8f6l },
-          0 },
-        /* 33 << 128 */
-        { { 0xaeebc5c0b430d6a1l,0x39b87a13dc3a9c04l,0xf0c445252db4a631l,
-            0xe32d95482c66fcf6l },
-          { 0x16f11bafb17849c4l,0xdd1c76615eca71f7l,0x4389ad2e32e6c944l,
-            0x727c11a5889a06bbl },
-          0 },
-        /* 34 << 128 */
-        { { 0x38dd1ac021e5781al,0x578318dbfd019ee2l,0x096b677d5f88e574l,
-            0xdbec82b216ad9f4fl },
-          { 0x348debe23260e8d9l,0x9334126064dfcda1l,0xdc5fb34cefc8faael,
-            0x5fa048beb4a6fc25l },
-          0 },
-        /* 35 << 128 */
-        { { 0xe18806fd60b3258cl,0xb7d2926b1364df47l,0xe208300fa107ce99l,
-            0x8d2f29fe7918df0el },
-          { 0x0b012d77a1244f4cl,0xf01076f4213a11cfl,0x8e623223181c559dl,
-            0x9df196ee995a281dl },
-          0 },
-        /* 36 << 128 */
-        { { 0xc431a238013ff83bl,0x7c0018b2fad69d08l,0x99aeb52a4c9589eal,
-            0x121f41ab9b1cf19fl },
-          { 0x0cfbbcbaef0f5958l,0x8deb3aeb7be8fbdcl,0x12b954081f15aa31l,
-            0x5acc09b34c0c06fdl },
-          0 },
-        /* 37 << 128 */
-        { { 0xfaa821383a721940l,0xdd70f54dd0008b83l,0x00decb507d32a52dl,
-            0x04563529cdd87deal },
-          { 0xb0e7e2a2db81643dl,0x445f4c383a6fef50l,0x5c0ef211df694ae1l,
-            0xa5a8fead923d0f1cl },
-          0 },
-        /* 39 << 128 */
-        { { 0xbc0e08b0325b2601l,0xae9e4c6105815b7al,0x07f664faf944a4a1l,
-            0x0ad19d29288f83b3l },
-          { 0x8615cd677232c458l,0x98edff6e9038e7d1l,0x082e0c4395a4dfccl,
-            0x336267afeceee00el },
-          0 },
-        /* 40 << 128 */
-        { { 0x775cbfa86d518ffbl,0xdecee1f6930f124bl,0x9a402804f5e81d0fl,
-            0x0e8225c52a0eeb2fl },
-          { 0x884a5d39fee9e867l,0x9540428ffb505454l,0xb2bf2e20107a70d1l,
-            0xd9917c3ba010b2aal },
-          0 },
-        /* 41 << 128 */
-        { { 0xc88ad4452a29bfdel,0x3072ebfa998368b7l,0xa754cbf7f5384692l,
-            0x85f7e16906b13146l },
-          { 0x42a7095f6a549fbel,0xef44edf91f7f1f42l,0xbea2989737b0c863l,
-            0x13b096d87a1e7fc3l },
-          0 },
-        /* 43 << 128 */
-        { { 0x51add77ce2a3a251l,0x840ca1384d8476adl,0x08d01d26f6096478l,
-            0x10d501a532f1662bl },
-          { 0xc8d63f811165a955l,0x587aa2e34095046al,0x759506c617af9000l,
-            0xd6201fe4a32ab8d2l },
-          0 },
-        /* 44 << 128 */
-        { { 0xa98f42fa3d843d53l,0x33777cc613ef927al,0xc440cdbecb84ca74l,
-            0x8c22f9631dc7c5ddl },
-          { 0x4bc82b70c8d94708l,0x7e0b43fcc814364fl,0x286d4e2486f59b7el,
-            0x1abc895e4d6bf4c4l },
-          0 },
-        /* 45 << 128 */
-        { { 0x7c52500cfc8c9bbdl,0x635563381534d9f7l,0xf55f38cbfd52c990l,
-            0xc585ae85058f52e7l },
-          { 0xb710a28bf9f19a01l,0x891861bdf0273ca4l,0x38a7aa2b034b0b7cl,
-            0xa2ecead52a809fb1l },
-          0 },
-        /* 46 << 128 */
-        { { 0x3df614f1ec3ca8eal,0x6bb24e9f9505bc08l,0x23ba1afbf37ace22l,
-            0x2e51b03b3463c261l },
-          { 0x59a0fca9c39e6558l,0x819f271ca342ccd9l,0x0c913d54df7ac033l,
-            0xba0f83de573257d3l },
-          0 },
-        /* 47 << 128 */
-        { { 0xdf62817ab3b32fbcl,0x616d74b0964670d4l,0xa37bc6270e26020bl,
-            0xda46d655b7d40bdal },
-          { 0x2840f155b5773f84l,0xbb633777897774b6l,0x59ff1df79a1ed3fal,
-            0xf7011ee2bac571f9l },
-          0 },
-        /* 48 << 128 */
-        { { 0x38151e274d559d96l,0x4f18c0d3b8db6c01l,0x49a3aa836f9921afl,
-            0xdbeab27b8c046029l },
-          { 0x242b9eaa7040bf3bl,0x39c479e51614b091l,0x338ede2b0e4baf5dl,
-            0x5bb192b7f0a53945l },
-          0 },
-        /* 49 << 128 */
-        { { 0xd612951861535bb0l,0xbf14364016f6a954l,0x3e0931eedde18024l,
-            0x79d791c8139441c0l },
-          { 0xba4fe7ecb67b8269l,0x7f30d848224b96c1l,0xa7e0a6abf0341068l,
-            0x78db42c37198ea2dl },
-          0 },
-        /* 51 << 128 */
-        { { 0x13354044185ce776l,0x109a6e059ff0100cl,0xafa3b61b03144cb1l,
-            0x4e4c814585265586l },
-          { 0xa8dafd33edb35364l,0x6691781bfd2606bel,0x2e06a9786182f5ccl,
-            0x588784ebe77faeecl },
-          0 },
-        /* 52 << 128 */
-        { { 0x896d572337e440d7l,0x685c5fd9ade23f68l,0xb5b1a26dc2c64918l,
-            0xb9390e30dad6580cl },
-          { 0x87911c4e7dee5b9bl,0xb90c5053deb04f6el,0x37b942a18f065aa6l,
-            0x34acdf2a1ca0928dl },
-          0 },
-        /* 53 << 128 */
-        { { 0xc773f525606f8f04l,0x75ae4a4b41b0a5bbl,0xb2aa058eaf7df93cl,
-            0xf15bea4feafed676l },
-          { 0xd2967b236a3c4fd7l,0xa698628090e30e7fl,0xf1b5166d316418bdl,
-            0x5748682e1c13cb29l },
-          0 },
-        /* 55 << 128 */
-        { { 0xe7b11babfff3605bl,0xdbce1b74cbac080fl,0xa0be39bd6535f082l,
-            0x2b6501805f826684l },
-          { 0xf90cea2400f5244fl,0xe279f2fadd244a1cl,0xd3fca77c9421c3ael,
-            0xe66bc7ee81a5210al },
-          0 },
-        /* 57 << 128 */
-        { { 0x114085dac40c6461l,0xaf78cb47f47d41b8l,0x7a9ae851755b0adbl,
-            0x8d2e8c66a0600b6dl },
-          { 0x5fb19045389758c0l,0xfa6e2cdabe7c91b2l,0x6472a432663983a2l,
-            0xc9370829e0e19363l },
-          0 },
-        /* 59 << 128 */
-        { { 0xd335856ec50bf2ffl,0x89b42295dfa708c2l,0x5dfb42241b201b4el,
-            0x6c94d6b94eecbf9cl },
-          { 0xabe5a47a7a634097l,0xf3d53b1643febecfl,0xff18619faca9846el,
-            0x80ad8629a4066177l },
-          0 },
-        /* 60 << 128 */
-        { { 0x7872e34b3390ff23l,0x968ce4abde7d18efl,0x9b4a745e627fe7b1l,
-            0x9607b0a0caff3e2al },
-          { 0x1b05818eeb40e3a5l,0x6ac62204c0fa8d7al,0xb5b9058571ed4809l,
-            0xb2432ef0f7cb65f2l },
-          0 },
-        /* 61 << 128 */
-        { { 0xc1203418f8a144b7l,0xb3413f808378f901l,0xf6badea161857095l,
-            0xcd2816c2b2e93efel },
-          { 0x6a8303ea174a0ee6l,0x98b62f29150b28b6l,0x68071bbc9c2a05b6l,
-            0xcfcf41a39f00e36el },
-          0 },
-        /* 63 << 128 */
-        { { 0xcaf564f234d6bc29l,0x9e9a6507f3c8edb0l,0x2fb889edd4e5502el,
-            0xb70d4ceb6cc9d8edl },
-          { 0x0de25356b020f740l,0xa68d9263d11fe5e6l,0xe86400679d85dd77l,
-            0xa95dfa7dec2c8c8dl },
-          0 },
-        /* 64 << 128 */
-        { { 0x715c9f973112795fl,0xe8244437984e6ee1l,0x55cb4858ecb66bcdl,
-            0x7c136735abaffbeel },
-          { 0x546615955dbec38el,0x51c0782c388ad153l,0x9ba4c53ac6e0952fl,
-            0x27e6782a1b21dfa8l },
-          0 },
-        /* 65 << 128 */
-        { { 0x3f9bc63ece59397dl,0x3f0f98a93eaa6104l,0x2f82c37c002d9271l,
-            0x6ac0495d4985353cl },
-          { 0xbde52f629191527bl,0xa3a13fce475aa640l,0x1d71ae17ce673f89l,
-            0x2b5cc61529120ec1l },
-          0 },
-        /* 71 << 128 */
-        { { 0xa0ab0f9924318c1cl,0x0cc5ca7da80ca60bl,0x24e27598abb965bal,
-            0xc4863198b44d1351l },
-          { 0x4d913783a28f04bel,0x404e78088cce8960l,0x2973b4e46286873el,
-            0x7b6e0f3219f42b50l },
-          0 },
-        /* 77 << 128 */
-        { { 0x0091a786306a6349l,0x4640ceab2098622dl,0x9928022be8182233l,
-            0xf261bee4514d0bedl },
-          { 0x70cdcc44c5f64fedl,0x4e19fec4f9eb2dfel,0xd05bdc09058b0b69l,
-            0x16f3007ed3bc6190l },
-          0 },
-        /* 83 << 128 */
-        { { 0x8f7f16957f136df1l,0x6d7547019b4f4215l,0xfb22d55eb4cc46a6l,
-            0x0b53ef53a8563034l },
-          { 0x8b105acc42bc9353l,0xe44c0a396079d59dl,0x78441fee35ee38ddl,
-            0x87ad93e43dcc0119l },
-          0 },
-        /* 89 << 128 */
-        { { 0x98a1c55358d9f73al,0xaa0843f0540e2b91l,0x701f8831d0647459l,
-            0xc4ae9d0484673005l },
-          { 0x9c37bc9f30b3ea20l,0x24cb4e2dbcbfb2b2l,0x8513e6f313cbf070l,
-            0x0c4db4334e76c79el },
-          0 },
-        /* 95 << 128 */
-        { { 0x882a2b9cbc8320b8l,0x16e9c11e3ad9e222l,0x24399ac19b23cb1dl,
-            0x334c5496799a89c7l },
-          { 0x72b6f9b8df3d774cl,0x42955bcbb11b6704l,0x3c4d6021ad2d4eafl,
-            0x5416b309afe2b671l },
-          0 },
-        /* 101 << 128 */
-        { { 0x1bbe9e662bf7c2a6l,0x22a3a10ca4acfddbl,0x2424eaab46bae581l,
-            0xebec1bbf40d6bdadl },
-          { 0xd7e3fa1a5b012aedl,0xc0f82c23f1dc6204l,0x42787c82e319477dl,
-            0xca1ae7a14cf57573l },
-          0 },
-        /* 107 << 128 */
-        { { 0x44b7d589d51bbde9l,0x15de755fd6a4cc98l,0x9b6ea8e582fb8e2el,
-            0x9d9294f04332bc22l },
-          { 0x53c6b2b7d1fa239al,0x286bf536693ca4f1l,0xc3fa754603c00f65l,
-            0xc046713af49cdb48l },
-          0 },
-        /* 113 << 128 */
-        { { 0xe356f5f11d82d5d6l,0xa0346a73d035ca0cl,0x14c76adee1884448l,
-            0xd8369bdd1c23dde9l },
-          { 0x13017862fe025eafl,0x6b5ac5e9a76be1d7l,0x52d621a94933bb6el,
-            0xb045b53baa8c1d3fl },
-          0 },
-        /* 116 << 128 */
-        { { 0x242da39e4e40466al,0xc03cb184ac322b07l,0x776b744f9aaa10bfl,
-            0xb80d9f14fe7d4beal },
-          { 0x75cd14308f9c4908l,0xa4e59ce9087b3d7al,0x3bbdce598cdca614l,
-            0x58c57113bc1a5df1l },
-          0 },
-        /* 119 << 128 */
-        { { 0x2a70af1abd79d467l,0x68dc4f23f63e2b73l,0x4345572f1f67b23dl,
-            0xc012b08f3a340718l },
-          { 0x9458585cc963dbe2l,0x21d84032223a495cl,0x0d54a4ea0dc28159l,
-            0xd9549e2c9b927dafl },
-          0 },
-        /* 125 << 128 */
-        { { 0xcd54ebd2d43c8cd2l,0x5ff4ded6a817b9f9l,0x6f59bc31245386d3l,
-            0x65b67cb0a2077821l },
-          { 0x36407956405ffa07l,0x723e0252d589f27al,0x052004b888e1239el,
-            0x8e6d188d69fdf94dl },
-          0 },
-    },
-    {
-        /* 0 << 136 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 136 */
-        { { 0xc16c236e846e364fl,0x7f33527cdea50ca0l,0xc48107750926b86dl,
-            0x6c2a36090598e70cl },
-          { 0xa6755e52f024e924l,0xe0fa07a49db4afcal,0x15c3ce7d66831790l,
-            0x5b4ef350a6cbb0d6l },
-          0 },
-        /* 3 << 136 */
-        { { 0xe2a37598a9d82abfl,0x5f188ccbe6c170f5l,0x816822005066b087l,
-            0xda22c212c7155adal },
-          { 0x151e5d3afbddb479l,0x4b606b846d715b99l,0x4a73b54bf997cb2el,
-            0x9a1bfe433ecd8b66l },
-          0 },
-        /* 4 << 136 */
-        { { 0xe13122f3dbfb894el,0xbe9b79f6ce274b18l,0x85a49de5ca58aadfl,
-            0x2495775811487351l },
-          { 0x111def61bb939099l,0x1d6a974a26d13694l,0x4474b4ced3fc253bl,
-            0x3a1485e64c5db15el },
-          0 },
-        /* 5 << 136 */
-        { { 0x5afddab61430c9abl,0x0bdd41d32238e997l,0xf0947430418042ael,
-            0x71f9addacdddc4cbl },
-          { 0x7090c016c52dd907l,0xd9bdf44d29e2047fl,0xe6f1fe801b1011a6l,
-            0xb63accbcd9acdc78l },
-          0 },
-        /* 7 << 136 */
-        { { 0x0ad7337ac0b7eff3l,0x8552225ec5e48b3cl,0xe6f78b0c73f13a5fl,
-            0x5e70062e82349cbel },
-          { 0x6b8d5048e7073969l,0x392d2a29c33cb3d2l,0xee4f727c4ecaa20fl,
-            0xa068c99e2ccde707l },
-          0 },
-        /* 9 << 136 */
-        { { 0x5b826fcb1b3ec67bl,0xece1b4b041356616l,0x7d5ce77e56a3ab4fl,
-            0xf6087f13aa212da0l },
-          { 0xe63015054db92129l,0xb8ae4c9940407d11l,0x2b6de222dfab8385l,
-            0x9b323022b7d6c3b4l },
-          0 },
-        /* 10 << 136 */
-        { { 0x057ef17a5ae6ad84l,0x9feae00b293a6ae0l,0xd18bb6c154266408l,
-            0xd3d3e1209c8e8e48l },
-          { 0xba8d4ca80e94fc8fl,0x80262ffc8a8ea0fel,0xac5b2855f71655fdl,
-            0xa348f8fae9aced89l },
-          0 },
-        /* 11 << 136 */
-        { { 0x60684b69a5660af3l,0x69aad23b9066d14bl,0x4d9f9b49fa4d020al,
-            0xafb54ec1b5cd6a4al },
-          { 0x2b25fe1832fd864dl,0xee6945062b6b64d0l,0x954a2a515001d8aal,
-            0x5e1008557082b5b3l },
-          0 },
-        /* 13 << 136 */
-        { { 0x20ecf71cbc90eb1bl,0x4234facf651c1df4l,0xc720fce9e681f678l,
-            0x680becdda7c007f4l },
-          { 0x7c08dc063181afeal,0x75c1b050a34eca91l,0x7d3479d54b9e2333l,
-            0xed16640af3951aa3l },
-          0 },
-        /* 15 << 136 */
-        { { 0x911b596264723e54l,0x34384f8c004b327cl,0x06ca5c61b85435f2l,
-            0x12e0cd25e2c1075cl },
-          { 0xa4b84cb8ac727394l,0x50bd720492b352c1l,0xe85524a49cbd0fb4l,
-            0x10b9274be7876024l },
-          0 },
-        /* 16 << 136 */
-        { { 0xef0a3fecfa181e69l,0x9ea02f8130d69a98l,0xb2e9cf8e66eab95dl,
-            0x520f2beb24720021l },
-          { 0x621c540a1df84361l,0x1203772171fa6d5dl,0x6e3c7b510ff5f6ffl,
-            0x817a069babb2bef3l },
-          0 },
-        /* 17 << 136 */
-        { { 0xb7cf93c3aace2c6al,0x017a96e658ff1bbfl,0x3b401301624a8250l,
-            0xf5ef158529266518l },
-          { 0x3c968bef7585838dl,0x8e97d023853191abl,0x175022e4f6823389l,
-            0xb6a3bfc2f6a9b4c1l },
-          0 },
-        /* 19 << 136 */
-        { { 0x515acf174591d77el,0xb393c89e3c3b25b6l,0x291e068e9c95abd7l,
-            0x256b72c046c02544l },
-          { 0x8172af03915ea92fl,0xc1b324ae4fcd0f03l,0x8abc779215108993l,
-            0xe05fe6867ab815ael },
-          0 },
-        /* 21 << 136 */
-        { { 0xca08d4095bc42740l,0xdd2c19d3e26e2e60l,0x27afdeded7c091fal,
-            0x3b943b0faf25cb22l },
-          { 0x400af8be026047e9l,0x3149b35f772b8ff9l,0x3ddb2c06f17229d9l,
-            0xcd604aeadac152fcl },
-          0 },
-        /* 23 << 136 */
-        { { 0xea2275311c0f6803l,0x9ae82d5ea394cc08l,0xc107a2cfbe32080cl,
-            0x550f35a76429f6d7l },
-          { 0x483c94dacfb70c0cl,0xf26f8e5d90190c94l,0x8574b3cf86bf2620l,
-            0xe7258e45df9f482fl },
-          0 },
-        /* 25 << 136 */
-        { { 0x8f8dc582da46f1cfl,0x61d76cf91e1e7427l,0x8aceb48b306c84aal,
-            0xecaa142f28ebff98l },
-          { 0xac5bd940401d80fel,0x0caacb8fe800cf9el,0x99068da9b3359af5l,
-            0x92fdd5795225b8c0l },
-          0 },
-        /* 27 << 136 */
-        { { 0x5a29d1c5ab56a3fbl,0x4e46ffc0a9aab4afl,0xa210472624d83080l,
-            0xb5820998007f08b6l },
-          { 0x9ce1188e4bc07b3el,0xbf6d0dbe32a19898l,0x5d5c68ea5b2350bal,
-            0xd6c794eb3aa20b45l },
-          0 },
-        /* 28 << 136 */
-        { { 0x3de605ba9ec598cfl,0x1933d3ae4d3029ael,0x6bf2fabd9b140516l,
-            0x712dfc5559a7d01cl },
-          { 0xff3eaae0d2576366l,0x36e407f948701cf8l,0xede21d89b41f4bd4l,
-            0xc5292f5c666eefa9l },
-          0 },
-        /* 29 << 136 */
-        { { 0x30045782c3ebcd77l,0xaa0cf3c73fdbe72el,0x719ec58ef8f43b39l,
-            0x9716fb9972574d3al },
-          { 0x300afc2b0d03ccd6l,0xb60016a34f3fac41l,0x8898910ea3a439f6l,
-            0xdc00a99707ca11f5l },
-          0 },
-        /* 31 << 136 */
-        { { 0x291b15ee8ed34662l,0xb780d54b2ee422a7l,0x5b9e3788fcfe4ccbl,
-            0x4554cb8cbe8b7c3al },
-          { 0xfdaccc2209a85a7fl,0x51f4a8ec555497edl,0x07dc69037da33505l,
-            0xa3bc8bfcbc1fc1dbl },
-          0 },
-        /* 33 << 136 */
-        { { 0x661638c151e25257l,0x0a6fd99c53304974l,0x29d8ae165078eec6l,
-            0xed7512ad447b73del },
-          { 0x0e21de607a4d0e9bl,0x842abd422462be01l,0x3be82afa5cddc709l,
-            0x25bb9da99b52797dl },
-          0 },
-        /* 34 << 136 */
-        { { 0x80613af28adc986al,0x4602284935776a41l,0x17d33e0f4665d03cl,
-            0xeb12eb6c0df12b50l },
-          { 0x0f0effa0ee41527fl,0x8ca2edb680531563l,0x4c354679f28c52c3l,
-            0x67f1ba5c2f6df66dl },
-          0 },
-        /* 35 << 136 */
-        { { 0x9c27207a2479fb3fl,0xef6e0f13515fb902l,0x3f7ad9e9d0d9436el,
-            0x36eb4ea5893bbcf5l },
-          { 0x5c53a2ac02b316b7l,0x10c75ee1f54f7585l,0x29e5879c3c7a4c1bl,
-            0x77da3c82f29c67d6l },
-          0 },
-        /* 36 << 136 */
-        { { 0xf2b75d21ef78a852l,0xba38cd34dd31a900l,0x72b3a68658ffe18al,
-            0x7464190cbfd95745l },
-          { 0x406e532177ed6e81l,0x1af0975bde535eabl,0x66ba22c760c54c82l,
-            0x88e3b1ceb00a2fe0l },
-          0 },
-        /* 37 << 136 */
-        { { 0xb6099b7df7e5c69bl,0x84aa1e26ba34ee2fl,0x5952600405c338bbl,
-            0xe9a134374951a539l },
-          { 0xb12276526ec196bdl,0x26a7be264b6dce36l,0x052e10a4e2a68458l,
-            0x475fc74c1f38898bl },
-          0 },
-        /* 39 << 136 */
-        { { 0x120167fc0a3eb4e1l,0xaa94bc70c0c21204l,0x313cd835e1243b75l,
-            0x3bb63fb20bfd6a4al },
-          { 0xa615dcae21ef05cfl,0x63774c2ec23c3ee5l,0x39365b1fed0dfd65l,
-            0xb610e6ff5d2a2d7dl },
-          0 },
-        /* 40 << 136 */
-        { { 0x55b7f977f0337b15l,0x3bc872a30e94973al,0x624ad983770deea0l,
-            0xcaab336413a5efdbl },
-          { 0x391dd0027a0d4247l,0x39590d5df312aed5l,0x532802c9351365acl,
-            0xdd2e824578a2e22al },
-          0 },
-        /* 41 << 136 */
-        { { 0x81b0d7be7f774fb8l,0x62f32bb3aa412425l,0xbe7afe26bbcd2162l,
-            0xa6ce167c53c7fa7dl },
-          { 0x8deca64fc5c4fc5bl,0x70e546aba6efd2fel,0xf2d8495987ff672al,
-            0x2ca551f249c3059el },
-          0 },
-        /* 43 << 136 */
-        { { 0x40b62d528eb99155l,0xe6b048947420a7e0l,0x9ebecb2bc685e58al,
-            0x3ea642d8d3c8d2cbl },
-          { 0x5340ac6ed489d0dfl,0xf3846d08c2b7588el,0x4cecd8a0611c289bl,
-            0xdddc39c50dd71421l },
-          0 },
-        /* 44 << 136 */
-        { { 0x98c6a6a52ebee687l,0xcdf65bfa56c1c731l,0x48e8132772def210l,
-            0x4ea119418083b5a5l },
-          { 0x3fdcea4fffebb525l,0x55aaea19fb50bf72l,0x5fbedc0a2a85b40cl,
-            0x0d6fd954bf44f29fl },
-          0 },
-        /* 45 << 136 */
-        { { 0x83a8302a9db4071el,0x52f104436f8ae934l,0x96de829d175b800al,
-            0x20ff5035373e97cel },
-          { 0xf58660185f65356al,0x992c15054c8cd782l,0x0b962c8eb57d727fl,
-            0xe8a9abc92bba8bc7l },
-          0 },
-        /* 46 << 136 */
-        { { 0x81a85ddd7cf2b565l,0x5e51e6afc34a0305l,0xa8d94ccefbc89faal,
-            0x2bfd97c1e68cd288l },
-          { 0x16d79c21af2958b8l,0x5e5d989defda7df8l,0x6d2f0ca6ff734c8al,
-            0xfa5b8dd32cc9bafel },
-          0 },
-        /* 47 << 136 */
-        { { 0x5787a9934e6ed688l,0x6815f3b5aab42f46l,0x7960f45b093c6c66l,
-            0xb2b9829728be10cfl },
-          { 0x1d4c7790296568cdl,0xa279a877f048e194l,0xcf7c20f4c6a58b4el,
-            0xf0c717afa1f9c00fl },
-          0 },
-        /* 48 << 136 */
-        { { 0x8a10b53189e800cal,0x50fe0c17145208fdl,0x9e43c0d3b714ba37l,
-            0x427d200e34189accl },
-          { 0x05dee24fe616e2c0l,0x9c25f4c8ee1854c1l,0x4d3222a58f342a73l,
-            0x0807804fa027c952l },
-          0 },
-        /* 49 << 136 */
-        { { 0x79730084ba196afcl,0x17d38e98054bd539l,0xc5cfff3918583239l,
-            0x4b0db5a2d9adbee6l },
-          { 0x9bc9f1e3c2a304e8l,0xbaa61de7de406fa8l,0x8e921ca9e4bec498l,
-            0xd9f4e5ae6604ab02l },
-          0 },
-        /* 51 << 136 */
-        { { 0xdf6b97b5b37f2097l,0x7576c3f9b4a5d2b9l,0x6eb697ed3588cabbl,
-            0x4d75b38622598d8fl },
-          { 0x4e6d93b522ff55e8l,0x4620ec635b8f7edal,0xd5006209f97b7749l,
-            0x9e22e3a84da8b464l },
-          0 },
-        /* 52 << 136 */
-        { { 0xbabfb7f82e8f326fl,0xed9cac225625a519l,0xf1109c1a0edae0a9l,
-            0x45f80a9858521259l },
-          { 0x37a44b075ab71f44l,0x21699eb64a21161bl,0xb523fddf56fe67eel,
-            0x9f5c3a2120b9f72el },
-          0 },
-        /* 53 << 136 */
-        { { 0x12c1131508b75673l,0xfa20121823b096d6l,0x839f01aeeacd6537l,
-            0x0e592be787df32cal },
-          { 0xfe3f65ff8b7dd0fcl,0xed09b4875c1d9a80l,0x8c09dd97b79786d8l,
-            0x74eba2806c5bc983l },
-          0 },
-        /* 55 << 136 */
-        { { 0xf917704862987b50l,0xcc84cdc6bc4ac456l,0x8bd2c922ae08fe12l,
-            0x09d5f661fc2d06c7l },
-          { 0xd10ac6dd9457d47fl,0x65aa30a23668060cl,0x33cddac6745161fcl,
-            0xf4c18b5ea51e540fl },
-          0 },
-        /* 57 << 136 */
-        { { 0x591c064ede723c1fl,0x92e5d4e601a4adael,0x3d7ee8a3145716ecl,
-            0x0ef4c62061727816l },
-          { 0x0e17c576f1bf6d6el,0x173104015ae18045l,0xdad620aae9589b75l,
-            0xb10c7e2d0eda4905l },
-          0 },
-        /* 59 << 136 */
-        { { 0xb8020f16aa08df6fl,0x03cf58ffd67054e9l,0x302e003c11fe3d1al,
-            0x9c194bc1c638a3ecl },
-          { 0x8ed3cb3adefd3f1el,0xc4115e079bf39de4l,0x8dece48bdf46fdf6l,
-            0xebd1dbcf30eafeafl },
-          0 },
-        /* 60 << 136 */
-        { { 0x058eb276fba319c5l,0xd33a91127f7fa54al,0xf060c1b4932a2dabl,
-            0xce3a224e79c7d9bfl },
-          { 0x6fb0388c0ba92823l,0x8d31738a69787881l,0x2d86eb0203cd00b7l,
-            0x4e6e44512b69911bl },
-          0 },
-        /* 61 << 136 */
-        { { 0xff2efe1cfdcca1cfl,0x08f22c69b5bb71e3l,0xc63f4a9f7023076el,
-            0x88fb2aa0ce0c490el },
-          { 0xcc7c97f91f77783cl,0x360026d942ab36b7l,0x547c34ecefd68f70l,
-            0xebe7f99efbabfdabl },
-          0 },
-        /* 63 << 136 */
-        { { 0xe7c1c1788613e87al,0xb035d65e60b82654l,0x055a82d03583a254l,
-            0x27ce1ffc9b3b22fal },
-          { 0x0cf904917ec83cd5l,0xfc6c21805604aa40l,0x1330604099357428l,
-            0x9b0982f9ad4818b7l },
-          0 },
-        /* 64 << 136 */
-        { { 0xc222653a4f0d56f3l,0x961e4047ca28b805l,0x2c03f8b04a73434bl,
-            0x4c966787ab712a19l },
-          { 0xcc196c42864fee42l,0xc1be93da5b0ece5cl,0xa87d9f22c131c159l,
-            0x2bb6d593dce45655l },
-          0 },
-        /* 65 << 136 */
-        { { 0x3a6080d9fb56bc3al,0xf1552dcad6212d7el,0x977ac5b59420f4f6l,
-            0xef914d370e3cd97fl },
-          { 0x807bd6e69c04f768l,0x743a7b552bb803f6l,0x7f5c20804215f4b0l,
-            0x41e331288fc6ce42l },
-          0 },
-        /* 71 << 136 */
-        { { 0x5a31c9ac61e6a460l,0x55102e4093e7eeddl,0x969fe0612da6adcel,
-            0xe8cddc2f3ffea1d9l },
-          { 0xaa26c6b1f0f327c5l,0x9e5b63743544f5e1l,0x5159fa1ddbaa685bl,
-            0x9892d03aa7f44b99l },
-          0 },
-        /* 77 << 136 */
-        { { 0x4dfcbf12e2c6fc1fl,0x703f2f5b7535ac29l,0x78f8617e82f7dc0fl,
-            0x54b835ff853e792dl },
-          { 0x3cc7f000df9f7353l,0x0d7ffd68db5a157al,0x2c1c33691672b21cl,
-            0x694b4904ac970ef8l },
-          0 },
-        /* 83 << 136 */
-        { { 0xd655bc42c1d2c45cl,0x572f603cbd22b05fl,0xa7fbf09388e4531al,
-            0x8d38bbd91fdde98dl },
-          { 0x16cc2aaa73b0fa01l,0x515019a25e8ffb04l,0xb075990611e792ccl,
-            0x89df06f399112c90l },
-          0 },
-        /* 89 << 136 */
-        { { 0x26d435c2481b46dal,0x73ab7e96266e9b3al,0x22d5b1db3c613c40l,
-            0x9de4021c6727e399l },
-          { 0x451ebba56051f8c9l,0xa37f6ec52c281a58l,0x3d7a28fe0e9f4cc5l,
-            0x0f45bcd655b64df7l },
-          0 },
-        /* 95 << 136 */
-        { { 0xba2a718c66616fbel,0x4b27810b3369a9acl,0x50b8391a2b426d5fl,
-            0x420c88efa626fa05l },
-          { 0xe39cef97b9c39a30l,0xcae7cde85e67e5d0l,0x3821f8319a58e521l,
-            0xbf474d1941479509l },
-          0 },
-        /* 101 << 136 */
-        { { 0x401bbab58fb15118l,0xb0376892dbf38b39l,0x10e4b9dd3a3ca42al,
-            0xa69c2693f8063ffel },
-          { 0xe10facdde07cb761l,0x96f4dde831d7759al,0xd702fdecc2cc7f9fl,
-            0x9e87e46e1ac0162cl },
-          0 },
-        /* 107 << 136 */
-        { { 0xb6cd60518479ca8fl,0xcca345e60968f6c7l,0x7b57248a64a9afe7l,
-            0x5552e3511d0d4db9l },
-          { 0x8f749b199dc68aabl,0x0fb86f06db1f7819l,0x23b300963143ac09l,
-            0x61c166d8abfbcb9bl },
-          0 },
-        /* 113 << 136 */
-        { { 0x4c96e85a43101165l,0x393a882fcf39bd19l,0xef9e1d42c2df6f33l,
-            0xe1775c990278f088l },
-          { 0xb1581929a9250d4al,0x582b0608c4168873l,0x0b3ffba3a1e68cd8l,
-            0x3f78147ef9490897l },
-          0 },
-        /* 116 << 136 */
-        { { 0x277b5177eb18ff20l,0x48002e9828f06d62l,0xece8d6c30e506d8dl,
-            0x5cde0a58cd9ff963l },
-          { 0x3b97cdb74e3baa0el,0x50560c0b631238f9l,0xe1c31b35cf79793dl,
-            0x95d12f14355e2178l },
-          0 },
-        /* 119 << 136 */
-        { { 0x0143f695bcc31b77l,0x3627aed14c49b65al,0x6e4f7a9ce441c183l,
-            0xb708c79de1bfa0a3l },
-          { 0xdbf0fc313a0726b8l,0xe04d82a8852d78bbl,0xb859001e3be5d398l,
-            0x92dcc20c8e89bd11l },
-          0 },
-        /* 125 << 136 */
-        { { 0x5f2416a3df9026b4l,0xffc01f3afcb29a1bl,0x18d02c9f1d94b20fl,
-            0xd93b0f2f81cfdef3l },
-          { 0xe6b0fd4713adf5f2l,0xcc9067b7ba06dff3l,0xb48c0cbb2256f842l,
-            0xc2ae741dfd34df2fl },
-          0 },
-    },
-    {
-        /* 0 << 144 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 144 */
-        { { 0x80531fe1c63c4962l,0x50541e89981fdb25l,0xdc1291a1fd4c2b6bl,
-            0xc0693a17a6df4fcal },
-          { 0xb2c4604e0117f203l,0x245f19630a99b8d0l,0xaedc20aac6212c44l,
-            0xb1ed4e56520f52a8l },
-          0 },
-        /* 3 << 144 */
-        { { 0x18f37a9c6bdf22dal,0xefbc432f90dc82dfl,0xc52cef8e5d703651l,
-            0x82887ba0d99881a5l },
-          { 0x7cec9ddab920ec1dl,0xd0d7e8c3ec3e8d3bl,0x445bc3954ca88747l,
-            0xedeaa2e09fd53535l },
-          0 },
-        /* 4 << 144 */
-        { { 0xa12b384ece53c2d0l,0x779d897d5e4606dal,0xa53e47b073ec12b0l,
-            0x462dbbba5756f1adl },
-          { 0x69fe09f2cafe37b6l,0x273d1ebfecce2e17l,0x8ac1d5383cf607fdl,
-            0x8035f7ff12e10c25l },
-          0 },
-        /* 5 << 144 */
-        { { 0xb7d4cc0f296c9005l,0x4b9094fa7b0aebdbl,0xe1bf10f1c00ec8d4l,
-            0xd807b1c4d667c101l },
-          { 0xa9412cdfbe713383l,0x435e063e81142ba1l,0x984c15ecaf0a6bdcl,
-            0x592c246092a3dab9l },
-          0 },
-        /* 7 << 144 */
-        { { 0x9365690016e23e9dl,0xcb220c6ba7cc41e1l,0xb36b20c369d6245cl,
-            0x2d63c348b62e9a6al },
-          { 0xa3473e19cdc0bcb5l,0x70f18b3f8f601b98l,0x8ad7a2c7cde346e4l,
-            0xae9f6ec3bd3aaa64l },
-          0 },
-        /* 9 << 144 */
-        { { 0x030223503274c7e1l,0x61ee8c934c4b6c26l,0x3c4397e3199389cel,
-            0xe0082600488757cel },
-          { 0xaac3a2df06b4dafbl,0x45af0700ddff5b6al,0x0a5974248c1d9fa0l,
-            0x1640087d391fc68bl },
-          0 },
-        /* 10 << 144 */
-        { { 0x26a43e41d07fa53dl,0x3154a78a74e35bc5l,0x7b768924e0da2f8cl,
-            0xba964a2b23613f9al },
-          { 0x5a548d35ba1d16c4l,0x2e1bfed1fb54d057l,0xff992136bc640205l,
-            0xf39cb9148156df29l },
-          0 },
-        /* 11 << 144 */
-        { { 0xf4873fcf4e5548bdl,0x8725da3f03ce57f0l,0xd82f5c95ca953258l,
-            0xac647f127cf0747el },
-          { 0xff2038b02d570bd5l,0xb0c2a767a13ae03fl,0xebaa27cde9932d16l,
-            0xa686e3fc1234e901l },
-          0 },
-        /* 13 << 144 */
-        { { 0x9f80435e63261eccl,0x6302a62e4337d6c9l,0x91916a49ca4958a0l,
-            0x554958993149d5d3l },
-          { 0x378d020b9f91de3cl,0x47b839a34dd25170l,0x2825854138b7f258l,
-            0xea5b14f7437e7decl },
-          0 },
-        /* 15 << 144 */
-        { { 0x74f08736b0018f44l,0xf4a03417b446d0f5l,0x66a4aa2fa40ca6b2l,
-            0x215679f0badb60edl },
-          { 0x3871195a323e4eefl,0x8f0940c320952b16l,0xfe8dac62879d5f7dl,
-            0x649cb623c1a6e875l },
-          0 },
-        /* 16 << 144 */
-        { { 0xecaff541338d6e43l,0x56f7dd734541d5ccl,0xb5d426de96bc88cal,
-            0x48d94f6b9ed3a2c3l },
-          { 0x6354a3bb2ef8279cl,0xd575465b0b1867f2l,0xef99b0ff95225151l,
-            0xf3e19d88f94500d8l },
-          0 },
-        /* 17 << 144 */
-        { { 0xa26a9087133ec108l,0x5dc5699f2712bdc0l,0x96903f4dd14224a9l,
-            0x3da5992429e47b80l },
-          { 0xb717712ff9dbba5al,0x9e52004b756391c9l,0xe669a11dcc9d219cl,
-            0x3b6e6b84d1d6c07dl },
-          0 },
-        /* 19 << 144 */
-        { { 0x5feec06a676feadbl,0xfc449bc59d69f322l,0x1d8d7b5e7cda8895l,
-            0x5ed54dc11a3314a7l },
-          { 0x1a11d2ae6de889c0l,0xb2a979724ced2bd9l,0x6ecf6989306a5ef6l,
-            0x1611d57b8cc8a249l },
-          0 },
-        /* 21 << 144 */
-        { { 0x2d9942ba007cbf87l,0x4e62bce6df3fc926l,0xe7eee5b0e4560affl,
-            0xe51963bb7cb009b7l },
-          { 0xaa5118cee29b37ddl,0x5cd84a4747263903l,0x3050caa6620055d8l,
-            0x7ef576a76c4b1e3dl },
-          0 },
-        /* 23 << 144 */
-        { { 0x9026a4dde6008ff1l,0x49e995ad1c8cd96cl,0x80722e73503e589bl,
-            0x05bcbce184c2bc26l },
-          { 0x255f9abbd4682c2cl,0xc42bcfc2f084d456l,0xa0eae9b0641c0767l,
-            0x1b45632d864c9a2dl },
-          0 },
-        /* 25 << 144 */
-        { { 0xcf25793b6ae024e0l,0x1b6607b484b5c4b0l,0x9579fa903f1624c8l,
-            0x37fb65be68bd57e8l },
-          { 0xd693a55efc39c203l,0x4e267ac4c87252e9l,0xb8d78bb09f899413l,
-            0xe4c014070b3b8508l },
-          0 },
-        /* 27 << 144 */
-        { { 0x662906e5bc3f3553l,0xde38d53531459684l,0x8f46a8c634f7280dl,
-            0xaaf91b873d24198el },
-          { 0xecd5ee115f9b117el,0xce00ffbe50ae8ddal,0x263a3d4e7710a9ael,
-            0x0ff3f721f26ba74fl },
-          0 },
-        /* 28 << 144 */
-        { { 0x4a8a4f47f0cefa69l,0xdc8e4cbaa4546866l,0x359ba69b23f603c1l,
-            0xdab4d601187b7ac5l },
-          { 0xa6ca4337c1ebc8d9l,0x9fa6585452b4074bl,0x1a4b4f81902fb733l,
-            0xd2bb5d7aa525deaal },
-          0 },
-        /* 29 << 144 */
-        { { 0xcc287ac2e6b3577al,0xd7528ca7f612003bl,0x8afdb6f12c1400b8l,
-            0x103a2ed346a2dd8dl },
-          { 0xc8f8c54d2ee21339l,0x8f011b92355a2d20l,0x81c6fc9f1346f2acl,
-            0xdb6042f005a6d24bl },
-          0 },
-        /* 31 << 144 */
-        { { 0xfc90e3630da4f996l,0x8ceca49daa6d6fe4l,0x1084affdbdfc619bl,
-            0x2029f672c1140b04l },
-          { 0x606ec25f136f3e5el,0x6d24149b02224c4al,0xabb0f142cfdfcf4cl,
-            0xe40d0419fab1a0edl },
-          0 },
-        /* 33 << 144 */
-        { { 0xcfdd08265cbccb84l,0x2258a16e88ad93c4l,0xb3ac365e728c5ad3l,
-            0x0bbf97808560df1fl },
-          { 0x42d08a39bad8c7b8l,0x1e3960106d3e8b91l,0xc332b39910274f58l,
-            0xe0a84dacce2ea778l },
-          0 },
-        /* 34 << 144 */
-        { { 0x113e1189ff432945l,0x4a0d2c3d04e1106cl,0xcde487744f3597b1l,
-            0x853b029174fa26eal },
-          { 0x2149e0ff02662e26l,0xb3181eaa5e6a030fl,0x086fc2159b006340l,
-            0xa1df84a694a4e0bbl },
-          0 },
-        /* 35 << 144 */
-        { { 0xc2cbd80ac99f8d3dl,0xe24b9d8f50ecf4f4l,0xf18d34728ecb126al,
-            0x83966662e1670aael },
-          { 0x1cece80fda5f594el,0x545e94ae65f391e0l,0xf3286dff93f98bb7l,
-            0xf945e6cdf5abf176l },
-          0 },
-        /* 36 << 144 */
-        { { 0x00ba5995dd95ac33l,0xa4957a40738f3bf4l,0x073539f599438a85l,
-            0xcc9c43acc2eb1411l },
-          { 0xe27501b5be2ec3d2l,0xa88d4ed057a85458l,0x870ae236755c8777l,
-            0x0933c5af89216cbal },
-          0 },
-        /* 37 << 144 */
-        { { 0xb5feea219e40e37fl,0x8c5ccb159e20fd60l,0xaeddc502ce8209a1l,
-            0xbdf873cc11e793b3l },
-          { 0xbc938103f0de8db5l,0x619fb72fb0e9d3d5l,0x800147cb588ed2adl,
-            0x260f92bb7901ced8l },
-          0 },
-        /* 39 << 144 */
-        { { 0x72dd9b089848c699l,0xc6086381185dacc1l,0x9489f11ff7d5a4c8l,
-            0xedb41d5628dee90fl },
-          { 0x1091db6b09af693cl,0xc7587551ae4b6413l,0x806aefb0768227adl,
-            0x4214b83eafb3c88el },
-          0 },
-        /* 40 << 144 */
-        { { 0xddfb02c4c753c45fl,0x18ca81b6f9c840fel,0x846fd09ab0f8a3e6l,
-            0xb1162adde7733dbcl },
-          { 0x7070ad20236e3ab6l,0xf88cdaf5b2a56326l,0x05fc8719997cbc7al,
-            0x442cd4524b665272l },
-          0 },
-        /* 41 << 144 */
-        { { 0x748819f9aa9c0ef5l,0xd7227d8ba458ad48l,0x8d67399f27aef626l,
-            0xc6241a1859bf0a4cl },
-          { 0xed9b0bfcc31cb9bbl,0x591254f896142555l,0x80e4bab461134151l,
-            0x7c5e680243efbd83l },
-          0 },
-        /* 43 << 144 */
-        { { 0x7f3f5a1706b9b7ddl,0x392132e75faeb417l,0x508ac4788fae38a2l,
-            0x2b854ead0d3499c3l },
-          { 0x26a687d8ef18bf0fl,0x62ff0c4a8ae00b61l,0x84111011f48578f2l,
-            0xa879f383cd0fcd3al },
-          0 },
-        /* 44 << 144 */
-        { { 0xeb7615aa202992f0l,0xde0562b38361d0b3l,0x789a302862027ee0l,
-            0xe3e3e9921048f899l },
-          { 0x07945c246deadab4l,0xeb06a15ec77d894el,0xb825af36bab1416bl,
-            0x99083c4df4b4e04fl },
-          0 },
-        /* 45 << 144 */
-        { { 0x4684a8f27b3ad6c3l,0x58238dbd928d9b6bl,0x31865b998da2c495l,
-            0xc1ca784fb8e7cda1l },
-          { 0xc9605dc71e081572l,0x8f560bcdef8ed104l,0x51f73981bd3feaedl,
-            0xc778aa4e4251c88dl },
-          0 },
-        /* 46 << 144 */
-        { { 0x9c0daa63aa502800l,0x73c7959a1e15b9bdl,0xd0447bcb7ab10f6cl,
-            0x05b8fbc8b8311bdel },
-          { 0xa8a74be1915d5c4el,0x38d41c1e0b7c0351l,0x5bb2d49ff52d6568l,
-            0x6c48d8eed5e43593l },
-          0 },
-        /* 47 << 144 */
-        { { 0x387b26d554159498l,0x92e92fad1ec34eb4l,0x0f88705e7a51b635l,
-            0x66bcbf4dedca735fl },
-          { 0x0a4c6112dcb896ccl,0x148e1dfe6fc72ad9l,0x3de977fd2b4c9585l,
-            0x0cd6e65f741e62cal },
-          0 },
-        /* 48 << 144 */
-        { { 0x7807f364b71698f5l,0x6ba418d29f7b605el,0xfd20b00fa03b2cbbl,
-            0x883eca37da54386fl },
-          { 0xff0be43ff3437f24l,0xe910b432a48bb33cl,0x4963a128329df765l,
-            0xac1dd556be2fe6f7l },
-          0 },
-        /* 49 << 144 */
-        { { 0x98ae40d53ce533bal,0x10342e1931fdd9c2l,0x54a255c8abf8b2bfl,
-            0x8facc41b15f6fef7l },
-          { 0x2e195565bc65b38bl,0xb9f3abaaeaea63cbl,0xede2ab9bf2b7518bl,
-            0x5e84102ce9ea3d81l },
-          0 },
-        /* 51 << 144 */
-        { { 0x162abc35113bc262l,0x8012f06829eb3fd4l,0x0e2727eb2c1ccf9cl,
-            0x89561ff44b455b20l },
-          { 0xc48db835ee3b1fd4l,0x4075ca86095bbfa7l,0x0c498d7d98745182l,
-            0x828fb93c5dfb5205l },
-          0 },
-        /* 52 << 144 */
-        { { 0xf95c7a5f0a76333bl,0x07603929cd607927l,0xabde328591028d3el,
-            0x55765e8fa032a400l },
-          { 0x3041f2cabed17cd7l,0x018a5b7b9a9e5923l,0xca4867975bb9bae3l,
-            0x741c802ecc382cb5l },
-          0 },
-        /* 53 << 144 */
-        { { 0x182a10311e5a3d8el,0xc352b8c8986c4d10l,0x7c50a172434c02ebl,
-            0x121d728c4420c41cl },
-          { 0x0f8eca2a8a51812fl,0xdb6c4a4ea5158430l,0x67944e0b8d8f4144l,
-            0x387cc2052405c77al },
-          0 },
-        /* 55 << 144 */
-        { { 0x98b36eb47e95ad76l,0x1973fa7d5f7e5ff7l,0xc4827abc6cc8a25cl,
-            0x4263a0d3ec822ae4l },
-          { 0x49f113f35217a6f4l,0xf27cc9bb81748aa6l,0x9cb81d97d822e08el,
-            0x698d2826b5c360bcl },
-          0 },
-        /* 57 << 144 */
-        { { 0x895f81514eb6d0b8l,0x32ef71df9f786536l,0x032a449430379a79l,
-            0xa8c1076218bdb83fl },
-          { 0x7a3b0b8fe53a4064l,0x0e724a54e2ce89b7l,0x565baeba7a31f6bcl,
-            0x12b9fa6387d18a7bl },
-          0 },
-        /* 59 << 144 */
-        { { 0x027231a3585bcfbdl,0x8690e977dca24269l,0x229c021afc6f1422l,
-            0xd98050d044084cabl },
-          { 0x6add95d79d4fd09al,0x12484c68c15b24ddl,0xa79a8f4facf4f551l,
-            0xf53204e27a83cbecl },
-          0 },
-        /* 60 << 144 */
-        { { 0xbc006413a906f7aal,0x9c8cd648bbeaf464l,0xaf5c7c64fb78cdf2l,
-            0xe45839eafabc2375l },
-          { 0x1eb89bd150012172l,0x9d0d76194488518cl,0xd55a7238bd534d32l,
-            0x48f35d5e95b4fe55l },
-          0 },
-        /* 61 << 144 */
-        { { 0xa6c5574f3e70a35al,0x35c11b5a8df97d97l,0x8f629f6cda85dd27l,
-            0x94dab294c218452el },
-          { 0xa2e1882e8916c731l,0xc02ce77c8929e350l,0xa7ed351fe4eff8afl,
-            0xeb76ef0654c3e1c1l },
-          0 },
-        /* 63 << 144 */
-        { { 0xc31d7cf87e3f5be5l,0x1472af0d3ce7f3a0l,0x226414f8f962e1afl,
-            0xd318e3df16f54295l },
-          { 0x9a3f6aaf41477cd3l,0x7034172f66ec6b2el,0xbea54eb537413a62l,
-            0x79f81262dc515e73l },
-          0 },
-        /* 64 << 144 */
-        { { 0x994f523a626332d5l,0x7bc388335561bb44l,0x005ed4b03d845ea2l,
-            0xd39d3ee1c2a1f08al },
-          { 0x6561fdd3e7676b0dl,0x620e35fffb706017l,0x36ce424ff264f9a8l,
-            0xc4c3419fda2681f7l },
-          0 },
-        /* 65 << 144 */
-        { { 0xb71a52b8b6bf8719l,0x0c7701f73196db36l,0xff1b936f53141cf4l,
-            0x684d8a3c1b94a31cl },
-          { 0xe555633ab52386e1l,0x9353a2af91450578l,0xc53db6fab99b14bcl,
-            0x1f2d42adcf619d36l },
-          0 },
-        /* 71 << 144 */
-        { { 0xbeb535ef3851c573l,0x3105fff585589843l,0xbe9f62a1d47aaf06l,
-            0x6bb2ee5d107e1131l },
-          { 0x82530247a4a7699fl,0x3fb475e144872afbl,0x8ad43fd73c4c49f2l,
-            0x3f7632882e045fc4l },
-          0 },
-        /* 77 << 144 */
-        { { 0x48440beb2924d7b2l,0x234163809c88fc57l,0xdc1d23d54ab08c2bl,
-            0x576400b6e70feab0l },
-          { 0x3b8afb8ba66da779l,0x7a7e3bf445468f16l,0x1976ddf3231f79dfl,
-            0xbe61c170b8531a9el },
-          0 },
-        /* 83 << 144 */
-        { { 0xf8d2dc768bf191b2l,0x3269e68813a39eb9l,0x104bb84be755eccfl,
-            0xb8d1330f2868f807l },
-          { 0x2b29c74cb06c6059l,0x3648baa1a6440a26l,0x5dfae323f1e6b2c9l,
-            0x9d0319b79330ac0al },
-          0 },
-        /* 89 << 144 */
-        { { 0x526ba3770e708bb2l,0x95c21ba327565dd9l,0x7071f46d48a0a873l,
-            0xe4b9959efed6cc74l },
-          { 0x1b16bfd1e08a5afal,0xc87fec98d1789782l,0x200186e946cfd068l,
-            0x88ea35a7280bf3ebl },
-          0 },
-        /* 95 << 144 */
-        { { 0x9e31943d42ac0e6cl,0xe61374cf1db8e40fl,0xbe27ea35a27db609l,
-            0x7c5b91d67bf192e9l },
-          { 0xc2af846defd0a24bl,0x1b2efc37669b647al,0xbfc3c38e5e58ef8al,
-            0xb6afb167e13ab5a2l },
-          0 },
-        /* 101 << 144 */
-        { { 0x08612d29b9f2aad4l,0x43c41330ad09dd17l,0xa45cb84a9f740519l,
-            0x0a9ea9a7512ec031l },
-          { 0x6e90dccaee747f35l,0xe4388bd1f0a1479bl,0x966140c4e20a9029l,
-            0x1bb1f65d7dd956abl },
-          0 },
-        /* 107 << 144 */
-        { { 0x066d206ea8f12bb3l,0xc9023b1b4325ec13l,0x1f56c72c96ead8ddl,
-            0x454050fd8003e4c2l },
-          { 0x9ca258a58917aa9dl,0xfe24b282d94593cfl,0xea66c203752741cfl,
-            0x5714268c295a895el },
-          0 },
-        /* 113 << 144 */
-        { { 0x72a9fbecc177d694l,0x38bb9387d68454d3l,0xa3d347bf590bc7d2l,
-            0xcb6e292605ccc234l },
-          { 0x588abfcf0d393c01l,0xf053dadf539e5568l,0xad7480fef2a8b157l,
-            0xff28c8bb018cac8fl },
-          0 },
-        /* 116 << 144 */
-        { { 0x12f1a00e7f5b8821l,0x0afa44e489b4b0cel,0x2dcaad8f6006338el,
-            0x79c022cdba41242bl },
-          { 0x7f6ef7e17871d350l,0x946c2a91674253adl,0xf686d137a9cbbdd9l,
-            0xa47ce2eaf7d4f9f2l },
-          0 },
-        /* 119 << 144 */
-        { { 0x1824991b205d40d6l,0x49cca1c085046a90l,0x7e23c1acd005e3c2l,
-            0x093a9ae6d102c8ffl },
-          { 0xf4791082d2f40843l,0xe456021811645483l,0x8a59c3b0fd3a6b39l,
-            0x39130e7f820de158l },
-          0 },
-        /* 125 << 144 */
-        { { 0xf7eef88d83b90783l,0xff60762af336d581l,0xf64f2d5dd801f5a0l,
-            0x672b6ee7d6b3b8b9l },
-          { 0xa2a2dceb08034d69l,0x3eca27f635638218l,0xe7065986fa17fefdl,
-            0xf1b74445f5803af1l },
-          0 },
-    },
-    {
-        /* 0 << 152 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 152 */
-        { { 0x32670d2f7189e71fl,0xc64387485ecf91e7l,0x15758e57db757a21l,
-            0x427d09f8290a9ce5l },
-          { 0x846a308f38384a7al,0xaac3acb4b0732b99l,0x9e94100917845819l,
-            0x95cba111a7ce5e03l },
-          0 },
-        /* 3 << 152 */
-        { { 0x37a01e48a105fc8el,0x769d754a289ba48cl,0xc08c6fe1d51c2180l,
-            0xb032dd33b7bd1387l },
-          { 0x953826db020b0aa6l,0x05137e800664c73cl,0xc66302c4660cf95dl,
-            0x99004e11b2cef28al },
-          0 },
-        /* 4 << 152 */
-        { { 0x214bc9a7d298c241l,0xe3b697ba56807cfdl,0xef1c78024564eadbl,
-            0xdde8cdcfb48149c5l },
-          { 0x946bf0a75a4d2604l,0x27154d7f6c1538afl,0x95cc9230de5b1fccl,
-            0xd88519e966864f82l },
-          0 },
-        /* 5 << 152 */
-        { { 0x1013e4f796ea6ca1l,0x567cdc2a1f792871l,0xadb728705c658d45l,
-            0xf7c1ff4ace600e98l },
-          { 0xa1ba86574b6cad39l,0x3d58d634ba20b428l,0xc0011cdea2e6fdfbl,
-            0xa832367a7b18960dl },
-          0 },
-        /* 7 << 152 */
-        { { 0x1ecc032af416448dl,0x4a7e8c10ec76d971l,0x854f9805b90b6eael,
-            0xfd0b15324bed0594l },
-          { 0x89f71848d98b5ca3l,0xd01fe5fcf039b3efl,0x4481332e627bda2el,
-            0xe67cecd7a5073e41l },
-          0 },
-        /* 9 << 152 */
-        { { 0x2ab0bce94595a859l,0x4d8c2da082084ee7l,0x21ff8be5acca3d3cl,
-            0xd8b805337827f633l },
-          { 0xf74e8c026becabbfl,0x9fae4dbefede4828l,0xd3885a5b3cc46bcfl,
-            0x2d535e2b6e6ad144l },
-          0 },
-        /* 10 << 152 */
-        { { 0x63d3444507d9e240l,0x6fbadf4338cff7e6l,0x8717624a959c9461l,
-            0xd7d951c411fb775bl },
-          { 0x4049161af6fc3a2bl,0x0dfa2547a1a8e98dl,0xeca780d439c2139cl,
-            0xd8c2d8cbd73ea8efl },
-          0 },
-        /* 11 << 152 */
-        { { 0x3aa1974f07605b28l,0x4f3d82a71e296255l,0xbbe5ea03b4e23f16l,
-            0x8f5c6c6b4e654193l },
-          { 0x27181182d3e8ab01l,0xc68bb231f3ba6bc2l,0x90a244d820af1fd7l,
-            0x605abc055b713f4fl },
-          0 },
-        /* 13 << 152 */
-        { { 0xca5fe19bd221991al,0x271ff066f05f400el,0x9d46ec4c9cf09896l,
-            0xdcaa8dfdec4febc3l },
-          { 0xaa3995a0adf19d04l,0xc98634239da573a6l,0x378058b2f2465b2bl,
-            0x20d389f9b4c31612l },
-          0 },
-        /* 15 << 152 */
-        { { 0xd7d199c7b7631c9dl,0x1322c2b8bb123942l,0xe662b68fbe8b6848l,
-            0xc970faf2cde99b14l },
-          { 0x61b27134b06655e5l,0xadcef8f781365d89l,0x917b5ab521b851aal,
-            0x4f4472121cf694a7l },
-          0 },
-        /* 16 << 152 */
-        { { 0x488f1185ca8d9d1al,0xadf2c77dd987ded2l,0x5f3039f060c46124l,
-            0xe5d70b7571e095f4l },
-          { 0x82d586506260e70fl,0x39d75ea7f750d105l,0x8cf3d0b175bac364l,
-            0xf3a7564d21d01329l },
-          0 },
-        /* 17 << 152 */
-        { { 0x241e3907fe44e547l,0x42d464c36b992187l,0xeaa8fa989ba72f28l,
-            0x965a8b8f6afbb81fl },
-          { 0x69356a7a8b375ea5l,0x22501ec741bdcc83l,0xf80f4e1445fb180cl,
-            0xc0b12e95f5e1b822l },
-          0 },
-        /* 19 << 152 */
-        { { 0x977234e05483dc02l,0x0167430c13d8dcb2l,0xa9971278049912edl,
-            0xab044b18ca40fa39l },
-          { 0xac9587449ff3896cl,0x75bb32eb860d1240l,0xf807071f6b958654l,
-            0x67d2d3dc7121b4b6l },
-          0 },
-        /* 21 << 152 */
-        { { 0x3b61e67722f9f017l,0x9c593eb1a8541696l,0xbeba950050eda653l,
-            0x07b5a48f5e673f6al },
-          { 0x748dca0013257aa3l,0x6bbddf9a7372e942l,0xc012f4badde83977l,
-            0x6e59b327392ddb53l },
-          0 },
-        /* 23 << 152 */
-        { { 0xb2f3fff641356603l,0x50e63537545f042bl,0x55e5149770eb530dl,
-            0x5a7383c310860c3bl },
-          { 0x7be30382ea669a09l,0xfdf735d289cc1c7fl,0x6e51ed844e0607cfl,
-            0xdab566df4893795el },
-          0 },
-        /* 25 << 152 */
-        { { 0x20e3be0f8920690dl,0x98db80eaac279c05l,0x4cd5c60a44b8a4f8l,
-            0xeda7e91c7b0335f4l },
-          { 0x45c1302a41ee5713l,0x1f6455fe588508d0l,0x82cb7311163d2fc3l,
-            0xe866b90322f10b71l },
-          0 },
-        /* 27 << 152 */
-        { { 0xc217a2e259b4041el,0x85b96ce274526cbfl,0xcbfc4f5473f12687l,
-            0x097caa5fd40225e7l },
-          { 0x0871ad406e91293fl,0x5f2ea207033b98ecl,0x0b3b8fac1f27d37al,
-            0x7d72dd4c7f03876cl },
-          0 },
-        /* 28 << 152 */
-        { { 0xb51a40a51e6a75c1l,0x24327c760ea7d817l,0x0663018207774597l,
-            0xd6fdbec397fa7164l },
-          { 0x20c99dfb13c90f48l,0xd6ac5273686ef263l,0xc6a50bdcfef64eebl,
-            0xcd87b28186fdfc32l },
-          0 },
-        /* 29 << 152 */
-        { { 0x2f0c49ac95861439l,0xcdcb051b2e36e38al,0x459474080ae20c0cl,
-            0x374baad2dddf0aabl },
-          { 0x291abc85d5d104a4l,0x0758001958a0657cl,0xd0f428e1a905ea13l,
-            0x12599ddcf7241dbfl },
-          0 },
-        /* 31 << 152 */
-        { { 0x16222ce81bc3c403l,0xbacc1508fc13ca02l,0xfa98db4d920ee8e9l,
-            0xe5fc39c4df12a359l },
-          { 0x4e8c9b90188733e8l,0x04283dd81394936cl,0x93b3db51cd130432l,
-            0x33bfe3163c93ce31l },
-          0 },
-        /* 33 << 152 */
-        { { 0xb48591e9840b1724l,0x1009559f5885ec6fl,0x45ee51121b077620l,
-            0x848f9800f1f4cc8al },
-          { 0x6ec1e0f74e97bceal,0x953bc23a98e80642l,0x9f0d1e8194ce7181l,
-            0xeb3e6b9700eec596l },
-          0 },
-        /* 34 << 152 */
-        { { 0x6d34b39bff7514dal,0x29ffe49825be3634l,0x63e56598f28c8b82l,
-            0x78b99133aab41bcel },
-          { 0x11febd5a52563180l,0xa3be94c5c356a8c0l,0x5e9b422e0d61f864l,
-            0x2bf4ca1278fd259el },
-          0 },
-        /* 35 << 152 */
-        { { 0x8f60e40266914514l,0x6d9e280fef178167l,0x2ff7aec9e2949a48l,
-            0x422389ce72d37511l },
-          { 0xe9b156f3307ac1d2l,0x1cb581a78518e79fl,0x56d43f302185cf82l,
-            0x8d46c5aade59562cl },
-          0 },
-        /* 36 << 152 */
-        { { 0x50fc0711745edc11l,0x9dd9ad7d3dc87558l,0xce6931fbb49d1e64l,
-            0x6c77a0a2c98bd0f9l },
-          { 0x62b9a6296baf7cb1l,0xcf065f91ccf72d22l,0x7203cce979639071l,
-            0x09ae4885f9cb732fl },
-          0 },
-        /* 37 << 152 */
-        { { 0xd007d682e4b35428l,0x80c162315bcdc0d6l,0xe55a86bd36fce9b2l,
-            0x16772edb969a87cfl },
-          { 0xff323a2d3f370c94l,0x8d3c8028bf3c1afcl,0x4e1591e73b0c3fafl,
-            0xfbd6475cb981ce83l },
-          0 },
-        /* 39 << 152 */
-        { { 0xcf414ae3315b2471l,0xf54abf8033168de6l,0x6883efc5df5cdb24l,
-            0x3eca788c8efe81acl },
-          { 0xdb58c6c778eeccadl,0x3c77939082fecfb7l,0x5736cdd9c9b513f3l,
-            0xab7e6ea57b02aaf2l },
-          0 },
-        /* 40 << 152 */
-        { { 0x5e7c3becee8314f3l,0x1c068aeddbea298fl,0x08d381f17c80acecl,
-            0x03b56be8e330495bl },
-          { 0xaeffb8f29222882dl,0x95ff38f6c4af8bf7l,0x50e32d351fc57d8cl,
-            0x6635be5217b444f0l },
-          0 },
-        /* 41 << 152 */
-        { { 0x2cec7ba64805d895l,0x4c8399870ac78e7cl,0x031ad6c7f79416c5l,
-            0x1b2f2621f1838d2fl },
-          { 0x60835eac91447f90l,0x59147af1f9bab5d9l,0x7a3005d6f393f175l,
-            0x8cf3c468c4120ba2l },
-          0 },
-        /* 43 << 152 */
-        { { 0xeccffc7d8a2c1f08l,0x308916d37e384bd4l,0x6b8c2ff55e366384l,
-            0xf4b2850d03e4747cl },
-          { 0xe839c569e96c1488l,0xa46ff7f956c9cb10l,0xd968c74c362fd172l,
-            0x2aa7fe4cad6bb601l },
-          0 },
-        /* 44 << 152 */
-        { { 0x04d15276a5177900l,0x4e1dbb47f6858752l,0x5b475622c615796cl,
-            0xa6fa0387691867bfl },
-          { 0xed7f5d562844c6d0l,0xc633cf9b03a2477dl,0xf6be5c402d3721d6l,
-            0xaf312eb7e9fd68e6l },
-          0 },
-        /* 45 << 152 */
-        { { 0xf3b8164eec04c847l,0xa305ca93fe65816cl,0xa65f9963c7e2ce52l,
-            0xc448005198882cfcl },
-          { 0x46a998df05c165bbl,0xc38f4edf9dfe1e98l,0xb96ec43f8739f77al,
-            0x10a23af9313b40bfl },
-          0 },
-        /* 46 << 152 */
-        { { 0xe476c3e3ee668e0cl,0xcec6a984478197c2l,0xc9fa1d68897147c1l,
-            0x4e6aec0ea6465793l },
-          { 0xedca9db76b219c3bl,0xa2cd57942e508d3bl,0x38b384663936e02al,
-            0x0b8d3b4ca54ce90fl },
-          0 },
-        /* 47 << 152 */
-        { { 0x66e06537af08e0fcl,0x70fe0f2a907f1a93l,0x8c25245285ec1647l,
-            0x0b8b2964d5560eddl },
-          { 0xda45a326f3ef8e14l,0xf3adf9a6abc3494bl,0xbbdd93c11eda0d92l,
-            0x1b5e12c609912773l },
-          0 },
-        /* 48 << 152 */
-        { { 0x242792d2e7417ce1l,0xff42bc71970ee7f5l,0x1ff4dc6d5c67a41el,
-            0x77709b7b20882a58l },
-          { 0x3554731dbe217f2cl,0x2af2a8cd5bb72177l,0x58eee769591dd059l,
-            0xbb2930c94bba6477l },
-          0 },
-        /* 49 << 152 */
-        { { 0x5d9d507551d01848l,0x53dadb405b600d1el,0x7ba5b4dc5cb0a9a3l,
-            0xdb85b04c6795e547l },
-          { 0x480e7443f0354843l,0xc7efe6e813012322l,0x479b674a2aeee1e6l,
-            0xf5481f19704f4ea3l },
-          0 },
-        /* 51 << 152 */
-        { { 0x76a38d6978c7816el,0xe020c87df84ec554l,0x99af2f78f9818010l,
-            0x31cf103d988136eal },
-          { 0x6b095a114816a5aal,0x5a4cd2a4eff0a4afl,0x543041a5892e5e04l,
-            0x460f94c30aab9ee1l },
-          0 },
-        /* 52 << 152 */
-        { { 0x863ee0477d930cfcl,0x4c262ad1396fd1f4l,0xf4765bc8039af7e1l,
-            0x2519834b5ba104f6l },
-          { 0x7cd61b4cd105f961l,0xa5415da5d63bca54l,0x778280a088a1f17cl,
-            0xc49689492329512cl },
-          0 },
-        /* 53 << 152 */
-        { { 0x282d92b48cd3948al,0x95d219dfe168205bl,0xf6111a6f87bf3abcl,
-            0x910f8ce655fee9f2l },
-          { 0xb6c806f74f71ac89l,0xd0cc300fb7235f73l,0xfe37ccb47d0d45bbl,
-            0x5b2445f6952f0eaal },
-          0 },
-        /* 55 << 152 */
-        { { 0x03870be447141962l,0x8b79033f4a2b3f7fl,0xb6983b5ed2e5e274l,
-            0x2a2f8018501ed99cl },
-          { 0x07a92eb9feb49656l,0x063f0a9e482e2972l,0x413be27a57435832l,
-            0x56363c5f6f9d3de1l },
-          0 },
-        /* 57 << 152 */
-        { { 0xd247153163b50214l,0x32b435eeb2b897del,0xc49f0b01b05df4del,
-            0x97b6aa40b7df9b91l },
-          { 0x58ff34ec8ec39d78l,0xab0889005e0114a3l,0x6872b4de4822b7b8l,
-            0x7614c0d0ab239073l },
-          0 },
-        /* 59 << 152 */
-        { { 0x81891d378aa5d80al,0xf48ca24292e45f2cl,0xba711b6c0d04904cl,
-            0x5992cda349f16ed6l },
-          { 0x18b9a739790593eel,0x8b98e84dc4ba16d1l,0xac55701cb7b81615l,
-            0xadb4533b15822291l },
-          0 },
-        /* 60 << 152 */
-        { { 0x6210db7181236c97l,0x74f7685b3ee0781fl,0x4df7da7ba3e41372l,
-            0x2aae38b1b1a1553el },
-          { 0x1688e222f6dd9d1bl,0x576954485b8b6487l,0x478d21274b2edeaal,
-            0xb2818fa51e85956al },
-          0 },
-        /* 61 << 152 */
-        { { 0xc0677533f255ba8el,0x2bdae2a1efa2aabel,0xf7aebbd4b086c8a6l,
-            0x148455d992cb1147l },
-          { 0xa084e8d715402565l,0x33f111a8fa41bf23l,0x4bc990d627ac189bl,
-            0x48dbe6569d505f76l },
-          0 },
-        /* 63 << 152 */
-        { { 0x59df7fab596766f3l,0x4cadcbfe604f26e4l,0x0cf199338a6af592l,
-            0x3af1ace287b826c1l },
-          { 0xf09a5b38ee60684el,0xa04cbeda4ed7c711l,0xdb28c42eb1731040l,
-            0x75fcc0ec2e6e6523l },
-          0 },
-        /* 64 << 152 */
-        { { 0x1e6adddaf176f2c0l,0x01ca4604e2572658l,0x0a404ded85342ffbl,
-            0x8cf60f96441838d6l },
-          { 0x9bbc691cc9071c4al,0xfd58874434442803l,0x97101c85809c0d81l,
-            0xa7fb754c8c456f7fl },
-          0 },
-        /* 65 << 152 */
-        { { 0x4374020072196f30l,0x59ed0dc0dcd6c935l,0x17d4ed8e5034161bl,
-            0x8abe3e13009e7170l },
-          { 0xe51c41c96c791456l,0xc671807704d72bb6l,0xd4309cf56bba424al,
-            0x6122b951d0ca4ceal },
-          0 },
-        /* 71 << 152 */
-        { { 0xdfdb2e9c4278982bl,0xf3a282b32d6a2a61l,0x5611650cd2f2b03cl,
-            0xa62c177f43f7f83al },
-          { 0x372310ab4c593d32l,0x2bb6903a2b570f9cl,0x2930da3df43af904l,
-            0x2bbd04aa2c8a5a7dl },
-          0 },
-        /* 77 << 152 */
-        { { 0x10c324c007e536del,0xc456836d377be1b4l,0x9a627d75d785af3fl,
-            0xde74559118b58b31l },
-          { 0xeac83ea60c47239al,0x35da24abbc02f670l,0x2d4abde0c3af6e63l,
-            0xac53acba5a7ebf1bl },
-          0 },
-        /* 83 << 152 */
-        { { 0x2b03ec2efd9a9f3el,0xc967cd2b9d898a09l,0xb24bcba8039dc4f6l,
-            0x0ea1d297061ada1el },
-          { 0x3a7a25fbc134b8bcl,0x846282d6f61cd312l,0xfa1de0d2e0d778d9l,
-            0xf75fad4ef09be264l },
-          0 },
-        /* 89 << 152 */
-        { { 0x7d35695bcf74afb3l,0x34d43d9f15bb36fbl,0x15f0b43960b45fbel,
-            0xb15db8d84f38ec06l },
-          { 0x93ce7d50f7da1406l,0x2db97edd9f076aaal,0x27ebb9aa354429dcl,
-            0xf97eb5c446ace469l },
-          0 },
-        /* 95 << 152 */
-        { { 0x758fa2312dcf498fl,0xaa8c14d15cf3853al,0x416f5dab097d786al,
-            0xceec00ef38f242a0l },
-          { 0x2f8b10b9d8b75ef2l,0xee64912b2281be6al,0xa883481aa382a51el,
-            0x9442300f61b16b8al },
-          0 },
-        /* 101 << 152 */
-        { { 0x80e7fbc4f4b171e1l,0xdd2246f5661564a4l,0xcf08d73cd00d4e54l,
-            0xf725f5389fca9a30l },
-          { 0xd9607358af20debel,0xa97c81e16f7d1cf2l,0x72794ae70dedfb2al,
-            0xc328cb93159ff29dl },
-          0 },
-        /* 107 << 152 */
-        { { 0xaf9491d6252f6d59l,0x6744d7518feda60dl,0xa485f8aa34c5c048l,
-            0x2ed794b4b50ea53bl },
-          { 0x0da82650db26c289l,0xed3ab4c50904af55l,0x425eda1176544463l,
-            0x917be5f48939b29bl },
-          0 },
-        /* 113 << 152 */
-        { { 0xa2e72d0f8e208e5dl,0x5a5e4344234a5fedl,0x6dcc56535005bee8l,
-            0x09d0c254854e2e04l },
-          { 0xade4bcdba82f0789l,0x5a3e3cd4ec460a91l,0x6b1a867be76695b2l,
-            0xd1eb9df0a28b9331l },
-          0 },
-        /* 116 << 152 */
-        { { 0x3f5cf5f678e62ddcl,0x2267c45407fd752bl,0x5e361b6b5e437bbel,
-            0x95c595018354e075l },
-          { 0xec725f85f2b254d9l,0x844b617d2cb52b4el,0xed8554f5cf425fb5l,
-            0xab67703e2af9f312l },
-          0 },
-        /* 119 << 152 */
-        { { 0x8dcc920005fb96bbl,0x29d2442470f84705l,0x540bb6e63f09628fl,
-            0x07f8b4de2a9c2359l },
-          { 0xb8e002d1957e41dcl,0x9a0fe82b9e683a3fl,0x996b1a5250e633fdl,
-            0x748a11e500c669cal },
-          0 },
-        /* 125 << 152 */
-        { { 0x0593a788581dfd6el,0x99f1164f64e1b329l,0x1142c44b1defddbbl,
-            0xbc95c9c7660b9036l },
-          { 0xf24b5a47079179ccl,0x6175b52c21f7033bl,0x8b5d84183bc2eec0l,
-            0xc1332c8272d12670l },
-          0 },
-    },
-    {
-        /* 0 << 160 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 160 */
-        { { 0xd433e50f6d3549cfl,0x6f33696ffacd665el,0x695bfdacce11fcb4l,
-            0x810ee252af7c9860l },
-          { 0x65450fe17159bb2cl,0xf7dfbebe758b357bl,0x2b057e74d69fea72l,
-            0xd485717a92731745l },
-          0 },
-        /* 3 << 160 */
-        { { 0x6c8d0aa9b898fd52l,0x2fb38a57be9af1a7l,0xe1f2b9a93b4f03f8l,
-            0x2b1aad44c3f0cc6fl },
-          { 0x58b5332e7cf2c084l,0x1c57d96f0367d26dl,0x2297eabdfa6e4a8dl,
-            0x65a947ee4a0e2b6al },
-          0 },
-        /* 4 << 160 */
-        { { 0xaaafafb0285b9491l,0x01a0be881e4c705el,0xff1d4f5d2ad9caabl,
-            0x6e349a4ac37a233fl },
-          { 0xcf1c12464a1c6a16l,0xd99e6b6629383260l,0xea3d43665f6d5471l,
-            0x36974d04ff8cc89bl },
-          0 },
-        /* 5 << 160 */
-        { { 0xf535b616fdd5b854l,0x592549c85728719fl,0xe231468606921cadl,
-            0x98c8ce34311b1ef8l },
-          { 0x28b937e7e9090b36l,0x67fc3ab90bf7bbb7l,0x12337097a9d87974l,
-            0x3e5adca1f970e3fel },
-          0 },
-        /* 7 << 160 */
-        { { 0xcdcc68a7b3f85ff0l,0xacd21cdd1a888044l,0xb6719b2e05dbe894l,
-            0xfae1d3d88b8260d4l },
-          { 0xedfedece8a1c5d92l,0xbca01a94dc52077el,0xc085549c16dd13edl,
-            0xdc5c3bae495ebaadl },
-          0 },
-        /* 9 << 160 */
-        { { 0xcc17063fbe7b643al,0x7872e1c846085760l,0x86b0fffbb4214c9el,
-            0xb18bbc0e72bf3638l },
-          { 0x8b17de0c722591c9l,0x1edeab1948c29e0cl,0x9fbfd98ef4304f20l,
-            0x2d1dbb6b9c77ffb6l },
-          0 },
-        /* 10 << 160 */
-        { { 0xf53f2c658ead09f7l,0x1335e1d59780d14dl,0x69cc20e0cd1b66bcl,
-            0x9b670a37bbe0bfc8l },
-          { 0xce53dc8128efbeedl,0x0c74e77c8326a6e5l,0x3604e0d2b88e9a63l,
-            0xbab38fca13dc2248l },
-          0 },
-        /* 11 << 160 */
-        { { 0x255616d3c7141771l,0xa86691ab2f226b66l,0xda19fea4b3ca63a9l,
-            0xfc05dc42ae672f2bl },
-          { 0xa9c6e786718ba28fl,0x07b7995b9c66b984l,0x0f434f551b3702f2l,
-            0xd6f6212fda84eeffl },
-          0 },
-        /* 13 << 160 */
-        { { 0x4b0e7987b5b41d78l,0xea7df9074bf0c4f8l,0xb4d03560fab80ecdl,
-            0x6cf306f6fb1db7e5l },
-          { 0x0d59fb5689fd4773l,0xab254f4000f9be33l,0x18a09a9277352da4l,
-            0xf81862f5641ea3efl },
-          0 },
-        /* 15 << 160 */
-        { { 0xb59b01579f759d01l,0xa2923d2f7eae4fdel,0x18327757690ba8c0l,
-            0x4bf7e38b44f51443l },
-          { 0xb6812563b413fc26l,0xedb7d36379e53b36l,0x4fa585c4c389f66dl,
-            0x8e1adc3154bd3416l },
-          0 },
-        /* 16 << 160 */
-        { { 0xd3b3a13f1402b9d0l,0x573441c32c7bc863l,0x4b301ec4578c3e6el,
-            0xc26fc9c40adaf57el },
-          { 0x96e71bfd7493cea3l,0xd05d4b3f1af81456l,0xdaca2a8a6a8c608fl,
-            0x53ef07f60725b276l },
-          0 },
-        /* 17 << 160 */
-        { { 0x971e9eedd5098497l,0x97692be63077d8a7l,0xb57e02ad79625a8al,
-            0x5e3d20f6a688ecd5l },
-          { 0xa4431a28188f964dl,0xd4eb23bd5a11c1dbl,0xfcda853eadc7446fl,
-            0x9e2e98b593c94046l },
-          0 },
-        /* 19 << 160 */
-        { { 0x4a649b66eddaa4f1l,0x35a04f185e690c50l,0x1639bdcff908bc53l,
-            0xce6d525c121726e8l },
-          { 0x70f34948902b402cl,0x3a40c6950e290579l,0x7b0ed90f469a0085l,
-            0xecb979c60189c501l },
-          0 },
-        /* 21 << 160 */
-        { { 0x847e2bde5cee8d07l,0x1bed198cd3340037l,0x439ffb3ce41586e3l,
-            0x594980f1856f15b0l },
-          { 0x22c3b86c6e9307c6l,0xf8b3ee08876382dbl,0x850c628e628f3f30l,
-            0x22ec0acb51ee3659l },
-          0 },
-        /* 23 << 160 */
-        { { 0xa4052591efcef5a0l,0x82692a47106d55afl,0xdac3ea88e6ead453l,
-            0xaa1368fcf3dfd875l },
-          { 0x87bc688aa0c539eal,0x905e206040b1de3el,0x072240b8f1d52452l,
-            0x3ebf0644d57b6580l },
-          0 },
-        /* 25 << 160 */
-        { { 0x12109bcc07a0b2f8l,0x336f87d2ca23f14cl,0xb39ae282452a2ea2l,
-            0x8e085f5bab59a500l },
-          { 0xf7daeb69b63f015cl,0x44c555bcacb47b38l,0x96190454b623910al,
-            0x4b666e2255b41b70l },
-          0 },
-        /* 27 << 160 */
-        { { 0xf146914eb53419fdl,0xd2109b07493e88bfl,0x30bf9cbccc54bcd5l,
-            0xcf9ea59750e34a1fl },
-          { 0x70ade8a59588591dl,0xf668be676b41c269l,0x3497c58f78df2e6bl,
-            0x0fad05cc71042b56l },
-          0 },
-        /* 28 << 160 */
-        { { 0x27f536e049ce89e7l,0x18908539cc890cb5l,0x308909abd83c2aa1l,
-            0xecd3142b1ab73bd3l },
-          { 0x6a85bf59b3f5ab84l,0x3c320a68f2bea4c6l,0xad8dc5386da4541fl,
-            0xeaf34eb0b7c41186l },
-          0 },
-        /* 29 << 160 */
-        { { 0x709da836093aa5f6l,0x567a9becb4644edel,0xae02a46044466b0cl,
-            0xc80b237a407f1b3bl },
-          { 0x451df45ab4168a98l,0xdc9b40ef24a3f7c9l,0x23593ef32671341dl,
-            0x40f4533190b90faal },
-          0 },
-        /* 31 << 160 */
-        { { 0x7f97768e922f36e3l,0x936943f8491034a2l,0x72f6c17f21483753l,
-            0x5489fa0cb2918619l },
-          { 0x55b31aa59cc21a46l,0xde4cc71a8e54ab14l,0x942cb8be9eaff8b0l,
-            0xe38f6116d1755231l },
-          0 },
-        /* 33 << 160 */
-        { { 0xf0c0606a395b39abl,0x0efcbc699b5166a5l,0x85995e6895453d85l,
-            0xadc9a2920806ee5cl },
-          { 0xc3662e804928fe09l,0x2a2ddcc6969c87e7l,0xa02d7947111d319dl,
-            0xde23bcf12d20f66dl },
-          0 },
-        /* 34 << 160 */
-        { { 0xc47cb3395f6d4a09l,0x6b4f355cee52b826l,0x3d100f5df51b930al,
-            0xf4512fac9f668f69l },
-          { 0x546781d5206c4c74l,0xd021d4d4cb4d2e48l,0x494a54c2ca085c2dl,
-            0xf1dbaca4520850a8l },
-          0 },
-        /* 35 << 160 */
-        { { 0xb2d15b14a911cc2bl,0xab2dfaf7643e28eal,0xfccc9ed1f52c4c2dl,
-            0xfb4b1d4a09d8faa3l },
-          { 0x6fd72a9b7f5ce767l,0x0233c856a287e2b5l,0xd42135e05775ebb9l,
-            0xb3c9dada7376568bl },
-          0 },
-        /* 36 << 160 */
-        { { 0x63c79326490a1acal,0xcb64dd9c41526b02l,0xbb772591a2979258l,
-            0x3f58297048d97846l },
-          { 0xd66b70d17c213ba7l,0xc28febb5e8a0ced4l,0x6b911831c10338c1l,
-            0x0d54e389bf0126f3l },
-          0 },
-        /* 37 << 160 */
-        { { 0x5952996b5306af1bl,0x99f444f4354b67bel,0x6f670181633a2928l,
-            0x289023f0e9bdc4a6l },
-          { 0xcbed12148f7455a2l,0x501ace2f659a4858l,0x83ee678d5f8e1784l,
-            0x95c984587335c5bdl },
-          0 },
-        /* 39 << 160 */
-        { { 0x2e25a1f3e0233000l,0xed0028cd44fe8ba9l,0x447501a6021d43b3l,
-            0x4ec203906b4dffccl },
-          { 0x50642f9ad0169740l,0x9360003373cc58adl,0x825f1a82fe9cf9acl,
-            0x456194c653242bd6l },
-          0 },
-        /* 40 << 160 */
-        { { 0x40242efeb483689bl,0x2575d3f6513ac262l,0xf30037c80ca6db72l,
-            0xc9fcce8298864be2l },
-          { 0x84a112ff0149362dl,0x95e575821c4ae971l,0x1fa4b1a8945cf86cl,
-            0x4525a7340b024a2fl },
-          0 },
-        /* 41 << 160 */
-        { { 0x83205e8f5db5e2b1l,0x94e7a2621e311c12l,0xe1cac7333e37068fl,
-            0xe3f43f6d39965acfl },
-          { 0xd28db9e854d905bal,0x686f372a101f2162l,0x409cfe5d3d1b46d4l,
-            0x17648f1cbd0bb63al },
-          0 },
-        /* 43 << 160 */
-        { { 0xef83315b821f4ee4l,0xb90766998ba78b4dl,0xee6a15880fce5260l,
-            0x828f4a72d754affbl },
-          { 0x4650ec7daaae54d2l,0x3174301f1057efe9l,0x174e0683eb7704cel,
-            0xb7e6aeb357eb0b14l },
-          0 },
-        /* 44 << 160 */
-        { { 0xcaead1c2c905d85fl,0xe9d7f7900733ae57l,0x24c9a65cf07cdd94l,
-            0x7389359ca4b55931l },
-          { 0xf58709b7367e45f7l,0x1f203067cb7e7adcl,0x82444bffc7b72818l,
-            0x07303b35baac8033l },
-          0 },
-        /* 45 << 160 */
-        { { 0xd59528fb38a0dc96l,0x8179dc9088d0e857l,0x55e9ba039ed4b1afl,
-            0x8a2c0dc787b74cacl },
-          { 0xe8ca91aeef1c0006l,0x67f59ab2de0e15d4l,0xba0cddf86e6634d2l,
-            0x352803657b7ba591l },
-          0 },
-        /* 46 << 160 */
-        { { 0x1e1ee4e4d13b7ea1l,0xe6489b24e0e74180l,0xa5f2c6107e70ef70l,
-            0xa1655412bdd10894l },
-          { 0x555ebefb7af4194el,0x533c1c3c8e89bd9cl,0x735b9b5789895856l,
-            0x15fb3cd2567f5c15l },
-          0 },
-        /* 47 << 160 */
-        { { 0xef07bfedfb0986c7l,0xde138afe47c1659al,0x8b79c159a555e907l,
-            0x21d572f1125518bbl },
-          { 0x2005999ad320410cl,0x4167dc469484414bl,0x0cd965c34c6aaefdl,
-            0x2a1abc9a0e1d5e9dl },
-          0 },
-        /* 48 << 160 */
-        { { 0x057fed45526f09fdl,0xe8a4f10c8128240al,0x9332efc4ff2bfd8dl,
-            0x214e77a0bd35aa31l },
-          { 0x32896d7314faa40el,0x767867ec01e5f186l,0xc9adf8f117a1813el,
-            0xcb6cda7854741795l },
-          0 },
-        /* 49 << 160 */
-        { { 0xadfaf39b888dedf1l,0x4f8b178aab1750b9l,0x26418617ffe6b0eal,
-            0x01d1be82af04a59fl },
-          { 0x41584147e652db64l,0xf7775ac5727f9ea7l,0x58052a20e72ad8bbl,
-            0x5badf0dc6021160el },
-          0 },
-        /* 51 << 160 */
-        { { 0x8490ea99183de59dl,0xc95f72146f5c6f8cl,0x89b55d15df00c334l,
-            0x84386ad8a0ec36f7l },
-          { 0x24dadaefe4dc1ed1l,0xc606ba4c1e717227l,0x7e4756c0bbfa62eal,
-            0x3916cf14afc29cf3l },
-          0 },
-        /* 52 << 160 */
-        { { 0xb7b4d00101dae185l,0x45434e0b9b7a94bcl,0xf54339affbd8cb0bl,
-            0xdcc4569ee98ef49el },
-          { 0x7789318a09a51299l,0x81b4d206b2b025d8l,0xf64aa418fae85792l,
-            0x3e50258facd7baf7l },
-          0 },
-        /* 53 << 160 */
-        { { 0x4152c508492d91f3l,0x59d6cf9c678f9db4l,0xb0a8c966404608d1l,
-            0xdced55d0e3fed558l },
-          { 0x0914a3cb33a76188l,0x79df212423d35d46l,0x2322507fca13b364l,
-            0x0aed41d60078ab93l },
-          0 },
-        /* 55 << 160 */
-        { { 0x7acdaa7f6b2ebfc2l,0xb5ab1a9a80d9f67fl,0x53ba8173ff8aa8b0l,
-            0x9cd85cf874ca56a6l },
-          { 0xabac57f49c4fad81l,0x2325bb8521078995l,0xbac5e3a1b928a054l,
-            0x7219047a2394cc2al },
-          0 },
-        /* 57 << 160 */
-        { { 0xa33410d2aa75fd37l,0x821093affc0f1192l,0xe45e85ed155e39a9l,
-            0xd0e87cd12de67188l },
-          { 0xdeca97d965d43d87l,0x8c73826f9d2c99ecl,0x1bfe111e33237ddbl,
-            0xda32e865587bfb28l },
-          0 },
-        /* 59 << 160 */
-        { { 0xde456d92c89e9e4el,0xe45688a98e47f3cdl,0x3deacfca3bacbde0l,
-            0xdf9b32efc9683a70l },
-          { 0x749bc007e1691106l,0x788a05342a5154d7l,0x1a06baecf7c7b70dl,
-            0xb5b608eeae6ffc4cl },
-          0 },
-        /* 60 << 160 */
-        { { 0x4cd296df5579bea4l,0x10e35ac85ceedaf1l,0x04c4c5fde3bcc5b1l,
-            0x95f9ee8a89412cf9l },
-          { 0x2c9459ee82b6eb0fl,0x2e84576595c2aaddl,0x774a84aed327fcfel,
-            0xd8c937220368d476l },
-          0 },
-        /* 61 << 160 */
-        { { 0x39ebf947ccd25abbl,0x74e7a868cb49ebael,0x576ea108332e6147l,
-            0xcf3ba166150c1e5dl },
-          { 0xb5411fc3515c0e93l,0x51b15761f15c8a34l,0x362a4a3a0d213f38l,
-            0xf6f63c2e24e93aeal },
-          0 },
-        /* 63 << 160 */
-        { { 0x0cb3a2dcb78528d5l,0xa1888c18d585bb41l,0x210cca40de402a6el,
-            0x10c6339d9ed7c381l },
-          { 0xcd3558d561fe2a0cl,0xc97db05dad5140b1l,0x3366b028b21f8d11l,
-            0x878b09033e38be13l },
-          0 },
-        /* 64 << 160 */
-        { { 0x211cde10296c36efl,0x7ee8967282c4da77l,0xb617d270a57836dal,
-            0xf0cd9c319cb7560bl },
-          { 0x01fdcbf7e455fe90l,0x3fb53cbb7e7334f3l,0x781e2ea44e7de4ecl,
-            0x8adab3ad0b384fd0l },
-          0 },
-        /* 65 << 160 */
-        { { 0x081e505aa353ba05l,0x244ab34a288b86b1l,0x1155f06214e3a829l,
-            0x383300daf2118a6bl },
-          { 0xe8fc17cef27032b9l,0xed7f05c9c7bd2389l,0x78f70d14202f8a88l,
-            0x8a8310c0647b3f20l },
-          0 },
-        /* 71 << 160 */
-        { { 0xc80786e1a3633369l,0x496d55de9073f5b9l,0x10deeb6a89ae93cel,
-            0x6a2dd5c8b12e00c6l },
-          { 0xc25cd2f90c68e26dl,0x29d7ad8b53f0bb64l,0x2dd0d027d7fc9b00l,
-            0xad21e1f7ca9c4d5dl },
-          0 },
-        /* 77 << 160 */
-        { { 0xd45cb932d83465f3l,0x95830c0faf22fdbdl,0x41d830e007cd2a0al,
-            0x4a08500e3616e716l },
-          { 0x5931fc9f277755a5l,0x7d11680731006764l,0xa409a0ad1b3999aal,
-            0xec70368c9939d566l },
-          0 },
-        /* 83 << 160 */
-        { { 0x3905cb59f2030370l,0x7e9bdee56dcc8fd7l,0xb1b7b04e9806e06fl,
-            0xfbdadce22c73eb57l },
-          { 0xfb1ab2e98d5b2eb3l,0x58fbf2df7699338bl,0x81b1c54a63b5a032l,
-            0xefd1a1896a5d7ff4l },
-          0 },
-        /* 89 << 160 */
-        { { 0x0265189da1f769eal,0x22fa0bbbfdb5a502l,0xf69f0d1b21027534l,
-            0x64302b81f6066b99l },
-          { 0xdef85fc98a717e80l,0xe066166386879a3bl,0xe5489b347f95b22cl,
-            0x106dca9aa054a563l },
-          0 },
-        /* 95 << 160 */
-        { { 0xd624b4f4b4be9a77l,0x21a11ed77d50acb1l,0x707181f43d406e11l,
-            0x3f324d203ef158bcl },
-          { 0xb29a2a34aa8cc8del,0x482f4a15315db969l,0x42ce4fc7d9af272el,
-            0x784665b1f8f4cdc4l },
-          0 },
-        /* 101 << 160 */
-        { { 0x66ff7f73ab43a863l,0xa90be2cba77fd07el,0x84843997f76e5288l,
-            0x288c197f3cee129bl },
-          { 0x39acc080c0a060a6l,0x4c8e574bd24e27cal,0x1dd6170ffcd3d5e9l,
-            0x9736bb51f75e5150l },
-          0 },
-        /* 107 << 160 */
-        { { 0x2133810e6ba75716l,0x4debf728712886a8l,0x351e46a1f527d1f3l,
-            0x29709ae8e9591564l },
-          { 0x696163d3a3dc1780l,0xd5b7825ae02aadf3l,0x23579d7cd565ae68l,
-            0x105380124fa42cecl },
-          0 },
-        /* 113 << 160 */
-        { { 0x04eb554d13ffa704l,0x7441a62f2ed33d20l,0xaa926fa0b5b81324l,
-            0xb981bcb829836f61l },
-          { 0x313a78d4cc9a7a15l,0xff1242d11b3921d2l,0xc0053fd36a209d4dl,
-            0x95ac85caf7e92ca9l },
-          0 },
-        /* 116 << 160 */
-        { { 0x6d2a483d6f73c51el,0xa4cb2412ea0dc2ddl,0x50663c411eb917ffl,
-            0x3d3a74cfeade299el },
-          { 0x29b3990f4a7a9202l,0xa9bccf59a7b15c3dl,0x66a3ccdca5df9208l,
-            0x48027c1443f2f929l },
-          0 },
-        /* 119 << 160 */
-        { { 0xdf8a6f9673c3f6fbl,0xe4b1f0d98cc03220l,0x5ddacd618350480cl,
-            0x485c4fababdfb016l },
-          { 0xdc840628b4d424b7l,0x07d3a99c215b2359l,0xad3dc5af56dff52el,
-            0x5a3a6754973b6825l },
-          0 },
-        /* 125 << 160 */
-        { { 0xcfe231b83539a06dl,0xb36d1f72f46770ddl,0x126049747bb900d6l,
-            0x8d0990973fc31661l },
-          { 0x03b2749c920bc39el,0xf933d510b0486e23l,0x09cc958f0e9b0bb5l,
-            0x0b254dd1aa1e23abl },
-          0 },
-    },
-    {
-        /* 0 << 168 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 168 */
-        { { 0x263a2cfb9db3b381l,0x9c3a2deed4df0a4bl,0x728d06e97d04e61fl,
-            0x8b1adfbc42449325l },
-          { 0x6ec1d9397e053a1bl,0xee2be5c766daf707l,0x80ba1e14810ac7abl,
-            0xdd2ae778f530f174l },
-          0 },
-        /* 3 << 168 */
-        { { 0xadbaeb79b6828f36l,0x9d7a025801bd5b9el,0xeda01e0d1e844b0cl,
-            0x4b625175887edfc9l },
-          { 0x14109fdd9669b621l,0x88a2ca56f6f87b98l,0xfe2eb788170df6bcl,
-            0x0cea06f4ffa473f9l },
-          0 },
-        /* 4 << 168 */
-        { { 0x43ed81b5c4e83d33l,0xd9f358795efd488bl,0x164a620f9deb4d0fl,
-            0xc6927bdbac6a7394l },
-          { 0x45c28df79f9e0f03l,0x2868661efcd7e1a9l,0x7cf4e8d0ffa348f1l,
-            0x6bd4c284398538e0l },
-          0 },
-        /* 5 << 168 */
-        { { 0x2618a091289a8619l,0xef796e606671b173l,0x664e46e59090c632l,
-            0xa38062d41e66f8fbl },
-          { 0x6c744a200573274el,0xd07b67e4a9271394l,0x391223b26bdc0e20l,
-            0xbe2d93f1eb0a05a7l },
-          0 },
-        /* 7 << 168 */
-        { { 0x7efa14b84444896bl,0x64974d2ff94027fbl,0xefdcd0e8de84487dl,
-            0x8c45b2602b48989bl },
-          { 0xa8fcbbc2d8463487l,0xd1b2b3f73fbc476cl,0x21d005b7c8f443c0l,
-            0x518f2e6740c0139cl },
-          0 },
-        /* 9 << 168 */
-        { { 0xae51dca2a91f6791l,0x2abe41909baa9efcl,0xd9d2e2f4559c7ac1l,
-            0xe82f4b51fc9f773al },
-          { 0xa77130274073e81cl,0xc0276facfbb596fcl,0x1d819fc9a684f70cl,
-            0x29b47fddc9f7b1e0l },
-          0 },
-        /* 10 << 168 */
-        { { 0x358de103459b1940l,0xec881c595b013e93l,0x51574c9349532ad3l,
-            0x2db1d445b37b46del },
-          { 0xc6445b87df239fd8l,0xc718af75151d24eel,0xaea1c4a4f43c6259l,
-            0x40c0e5d770be02f7l },
-          0 },
-        /* 11 << 168 */
-        { { 0x6a4590f4721b33f2l,0x2124f1fbfedf04eal,0xf8e53cde9745efe7l,
-            0xe7e1043265f046d9l },
-          { 0xc3fca28ee4d0c7e6l,0x847e339a87253b1bl,0x9b5953483743e643l,
-            0xcb6a0a0b4fd12fc5l },
-          0 },
-        /* 13 << 168 */
-        { { 0xec1214eda714181dl,0x609ac13b6067b341l,0xff4b4c97a545df1fl,
-            0xa124050134d2076bl },
-          { 0x6efa0c231409ca97l,0x254cc1a820638c43l,0xd4e363afdcfb46cdl,
-            0x62c2adc303942a27l },
-          0 },
-        /* 15 << 168 */
-        { { 0x27b6a8ab3fd40e09l,0xe455842e77313ea9l,0x8b51d1e21f55988bl,
-            0x5716dd73062bbbfcl },
-          { 0x633c11e54e8bf3del,0x9a0e77b61b85be3bl,0x565107290911cca6l,
-            0x27e76495efa6590fl },
-          0 },
-        /* 16 << 168 */
-        { { 0xe4ac8b33070d3aabl,0x2643672b9a2cd5e5l,0x52eff79b1cfc9173l,
-            0x665ca49b90a7c13fl },
-          { 0x5a8dda59b3efb998l,0x8a5b922d052f1341l,0xae9ebbab3cf9a530l,
-            0x35986e7bf56da4d7l },
-          0 },
-        /* 17 << 168 */
-        { { 0x3a636b5cff3513ccl,0xbb0cf8ba3198f7ddl,0xb8d4052241f16f86l,
-            0x760575d8de13a7bfl },
-          { 0x36f74e169f7aa181l,0x163a3ecff509ed1cl,0x6aead61f3c40a491l,
-            0x158c95fcdfe8fcaal },
-          0 },
-        /* 19 << 168 */
-        { { 0x6b47accdd9eee96cl,0x0ca277fbe58cec37l,0x113fe413e702c42al,
-            0xdd1764eec47cbe51l },
-          { 0x041e7cde7b3ed739l,0x50cb74595ce9e1c0l,0x355685132925b212l,
-            0x7cff95c4001b081cl },
-          0 },
-        /* 21 << 168 */
-        { { 0x726f0973da50c991l,0x48afcd5b822d6ee2l,0xe5fc718b20fd7771l,
-            0xb9e8e77dfd0807a1l },
-          { 0x7f5e0f4499a7703dl,0x6972930e618e36f3l,0x2b7c77b823807bbel,
-            0xe5b82405cb27ff50l },
-          0 },
-        /* 23 << 168 */
-        { { 0x98cb1ae9255c0980l,0x4bd863812b4a739fl,0x5a5c31e11e4a45a1l,
-            0x1e5d55fe9cb0db2fl },
-          { 0x74661b068ff5cc29l,0x026b389f0eb8a4f4l,0x536b21a458848c24l,
-            0x2e5bf8ec81dc72b0l },
-          0 },
-        /* 25 << 168 */
-        { { 0x9f0af483d309cbe6l,0x5b020d8ae0bced4fl,0x606e986db38023e3l,
-            0xad8f2c9d1abc6933l },
-          { 0x19292e1de7400e93l,0xfe3e18a952be5e4dl,0xe8e9771d2e0680bfl,
-            0x8c5bec98c54db063l },
-          0 },
-        /* 27 << 168 */
-        { { 0x4c23f62a2c160dcdl,0x34e6c5e38f90eaefl,0x35865519a9a65d5al,
-            0x07c48aae8fd38a3dl },
-          { 0xb7e7aeda50068527l,0x2c09ef231c90936al,0x31ecfeb6e879324cl,
-            0xa0871f6bfb0ec938l },
-          0 },
-        /* 28 << 168 */
-        { { 0xb1f0fb68d84d835dl,0xc90caf39861dc1e6l,0x12e5b0467594f8d7l,
-            0x26897ae265012b92l },
-          { 0xbcf68a08a4d6755dl,0x403ee41c0991fbdal,0x733e343e3bbf17e8l,
-            0xd2c7980d679b3d65l },
-          0 },
-        /* 29 << 168 */
-        { { 0x33056232d2e11305l,0x966be492f3c07a6fl,0x6a8878ffbb15509dl,
-            0xff2211010a9b59a4l },
-          { 0x6c9f564aabe30129l,0xc6f2c940336e64cfl,0x0fe752628b0c8022l,
-            0xbe0267e96ae8db87l },
-          0 },
-        /* 31 << 168 */
-        { { 0x9d031369a5e829e5l,0xcbb4c6fc1607aa41l,0x75ac59a6241d84c1l,
-            0xc043f2bf8829e0eel },
-          { 0x82a38f758ea5e185l,0x8bda40b9d87cbd9fl,0x9e65e75e2d8fc601l,
-            0x3d515f74a35690b3l },
-          0 },
-        /* 33 << 168 */
-        { { 0xf6b5b2d0bc8fa5bcl,0x8a5ead67500c277bl,0x214625e6dfa08a5dl,
-            0x51fdfedc959cf047l },
-          { 0x6bc9430b289fca32l,0xe36ff0cf9d9bdc3fl,0x2fe187cb58ea0edel,
-            0xed66af205a900b3fl },
-          0 },
-        /* 34 << 168 */
-        { { 0x00e0968b5fa9f4d6l,0x2d4066ce37a362e7l,0xa99a9748bd07e772l,
-            0x710989c006a4f1d0l },
-          { 0xd5dedf35ce40cbd8l,0xab55c5f01743293dl,0x766f11448aa24e2cl,
-            0x94d874f8605fbcb4l },
-          0 },
-        /* 35 << 168 */
-        { { 0xa365f0e8a518001bl,0xee605eb69d04ef0fl,0x5a3915cdba8d4d25l,
-            0x44c0e1b8b5113472l },
-          { 0xcbb024e88b6740dcl,0x89087a53ee1d4f0cl,0xa88fa05c1fc4e372l,
-            0x8bf395cbaf8b3af2l },
-          0 },
-        /* 36 << 168 */
-        { { 0x1e71c9a1deb8568bl,0xa35daea080fb3d32l,0xe8b6f2662cf8fb81l,
-            0x6d51afe89490696al },
-          { 0x81beac6e51803a19l,0xe3d24b7f86219080l,0x727cfd9ddf6f463cl,
-            0x8c6865ca72284ee8l },
-          0 },
-        /* 37 << 168 */
-        { { 0x32c88b7db743f4efl,0x3793909be7d11dcel,0xd398f9222ff2ebe8l,
-            0x2c70ca44e5e49796l },
-          { 0xdf4d9929cb1131b1l,0x7826f29825888e79l,0x4d3a112cf1d8740al,
-            0x00384cb6270afa8bl },
-          0 },
-        /* 39 << 168 */
-        { { 0xbe7e990ff0d796a0l,0x5fc62478df0e8b02l,0x8aae8bf4030c00adl,
-            0x3d2db93b9004ba0fl },
-          { 0xe48c8a79d85d5ddcl,0xe907caa76bb07f34l,0x58db343aa39eaed5l,
-            0x0ea6e007adaf5724l },
-          0 },
-        /* 40 << 168 */
-        { { 0xe00df169d23233f3l,0x3e32279677cb637fl,0x1f897c0e1da0cf6cl,
-            0xa651f5d831d6bbddl },
-          { 0xdd61af191a230c76l,0xbd527272cdaa5e4al,0xca753636d0abcd7el,
-            0x78bdd37c370bd8dcl },
-          0 },
-        /* 41 << 168 */
-        { { 0xc23916c217cd93fel,0x65b97a4ddadce6e2l,0xe04ed4eb174e42f8l,
-            0x1491ccaabb21480al },
-          { 0x145a828023196332l,0x3c3862d7587b479al,0x9f4a88a301dcd0edl,
-            0x4da2b7ef3ea12f1fl },
-          0 },
-        /* 43 << 168 */
-        { { 0x71965cbfc3dd9b4dl,0xce23edbffc068a87l,0xb78d4725745b029bl,
-            0x74610713cefdd9bdl },
-          { 0x7116f75f1266bf52l,0x0204672218e49bb6l,0xdf43df9f3d6f19e3l,
-            0xef1bc7d0e685cb2fl },
-          0 },
-        /* 44 << 168 */
-        { { 0xcddb27c17078c432l,0xe1961b9cb77fedb7l,0x1edc2f5cc2290570l,
-            0x2c3fefca19cbd886l },
-          { 0xcf880a36c2af389al,0x96c610fdbda71ceal,0xf03977a932aa8463l,
-            0x8eb7763f8586d90al },
-          0 },
-        /* 45 << 168 */
-        { { 0x3f3424542a296e77l,0xc871868342837a35l,0x7dc710906a09c731l,
-            0x54778ffb51b816dbl },
-          { 0x6b33bfecaf06defdl,0xfe3c105f8592b70bl,0xf937fda461da6114l,
-            0x3c13e6514c266ad7l },
-          0 },
-        /* 46 << 168 */
-        { { 0xe363a829855938e8l,0x2eeb5d9e9de54b72l,0xbeb93b0e20ccfab9l,
-            0x3dffbb5f25e61a25l },
-          { 0x7f655e431acc093dl,0x0cb6cc3d3964ce61l,0x6ab283a1e5e9b460l,
-            0x55d787c5a1c7e72dl },
-          0 },
-        /* 47 << 168 */
-        { { 0x4d2efd47deadbf02l,0x11e80219ac459068l,0x810c762671f311f0l,
-            0xfa17ef8d4ab6ef53l },
-          { 0xaf47fd2593e43bffl,0x5cb5ff3f0be40632l,0x546871068ee61da3l,
-            0x7764196eb08afd0fl },
-          0 },
-        /* 48 << 168 */
-        { { 0x831ab3edf0290a8fl,0xcae81966cb47c387l,0xaad7dece184efb4fl,
-            0xdcfc53b34749110el },
-          { 0x6698f23c4cb632f9l,0xc42a1ad6b91f8067l,0xb116a81d6284180al,
-            0xebedf5f8e901326fl },
-          0 },
-        /* 49 << 168 */
-        { { 0xf2274c9f97e3e044l,0x4201852011d09fc9l,0x56a65f17d18e6e23l,
-            0x2ea61e2a352b683cl },
-          { 0x27d291bc575eaa94l,0x9e7bc721b8ff522dl,0x5f7268bfa7f04d6fl,
-            0x5868c73faba41748l },
-          0 },
-        /* 51 << 168 */
-        { { 0x1c52e63596e78cc4l,0x5385c8b20c06b4a8l,0xd84ddfdbb0e87d03l,
-            0xc49dfb66934bafadl },
-          { 0x7071e17059f70772l,0x3a073a843a1db56bl,0x034949033b8af190l,
-            0x7d882de3d32920f0l },
-          0 },
-        /* 52 << 168 */
-        { { 0x91633f0ab2cf8940l,0x72b0b1786f948f51l,0x2d28dc30782653c8l,
-            0x88829849db903a05l },
-          { 0xb8095d0c6a19d2bbl,0x4b9e7f0c86f782cbl,0x7af739882d907064l,
-            0xd12be0fe8b32643cl },
-          0 },
-        /* 53 << 168 */
-        { { 0x358ed23d0e165dc3l,0x3d47ce624e2378cel,0x7e2bb0b9feb8a087l,
-            0x3246e8aee29e10b9l },
-          { 0x459f4ec703ce2b4dl,0xe9b4ca1bbbc077cfl,0x2613b4f20e9940c1l,
-            0xfc598bb9047d1eb1l },
-          0 },
-        /* 55 << 168 */
-        { { 0x52fb0c9d7fc63668l,0x6886c9dd0c039cdel,0x602bd59955b22351l,
-            0xb00cab02360c7c13l },
-          { 0x8cb616bc81b69442l,0x41486700b55c3ceel,0x71093281f49ba278l,
-            0xad956d9c64a50710l },
-          0 },
-        /* 57 << 168 */
-        { { 0xbaca6591d4b66947l,0xb452ce9804460a8cl,0x6830d24643768f55l,
-            0xf4197ed87dff12dfl },
-          { 0x6521b472400dd0f7l,0x59f5ca8f4b1e7093l,0x6feff11b080338ael,
-            0x0ada31f6a29ca3c6l },
-          0 },
-        /* 59 << 168 */
-        { { 0x04e5dfe0d809c7bdl,0xd7b2580c8f1050abl,0x6d91ad78d8a4176fl,
-            0x0af556ee4e2e897cl },
-          { 0x162a8b73921de0acl,0x52ac9c227ea78400l,0xee2a4eeaefce2174l,
-            0xbe61844e6d637f79l },
-          0 },
-        /* 60 << 168 */
-        { { 0x0491f1bc789a283bl,0x72d3ac3d880836f4l,0xaa1c5ea388e5402dl,
-            0x1b192421d5cc473dl },
-          { 0x5c0b99989dc84cacl,0xb0a8482d9c6e75b8l,0x639961d03a191ce2l,
-            0xda3bc8656d837930l },
-          0 },
-        /* 61 << 168 */
-        { { 0xca990653056e6f8fl,0x84861c4164d133a7l,0x8b403276746abe40l,
-            0xb7b4d51aebf8e303l },
-          { 0x05b43211220a255dl,0xc997152c02419e6el,0x76ff47b6630c2feal,
-            0x50518677281fdadel },
-          0 },
-        /* 63 << 168 */
-        { { 0x6d2d99b7ea7b979bl,0xcd78cd74e6fb3bcdl,0x11e45a9e86cffbfel,
-            0x78a61cf4637024f6l },
-          { 0xd06bc8723d502295l,0xf1376854458cb288l,0xb9db26a1342f8586l,
-            0xf33effcf4beee09el },
-          0 },
-        /* 64 << 168 */
-        { { 0xd7e0c4cdb30cfb3al,0x6d09b8c16c9db4c8l,0x40ba1a4207c8d9dfl,
-            0x6fd495f71c52c66dl },
-          { 0xfb0e169f275264dal,0x80c2b746e57d8362l,0xedd987f749ad7222l,
-            0xfdc229af4398ec7bl },
-          0 },
-        /* 65 << 168 */
-        { { 0xfe81af4609418a51l,0xdbb60b836f18e3a5l,0x5e7a86ea4566ec9cl,
-            0xb76ff40f25093925l },
-          { 0x5fe6662c429c5554l,0xfc9ec35384e478cfl,0x73dbb5f3e8cfa761l,
-            0x031e506592f82709l },
-          0 },
-        /* 71 << 168 */
-        { { 0x108c736abd49f2e0l,0xe230f2417487dcc8l,0x073fc4f8f74d939cl,
-            0x98532487e9745bbel },
-          { 0x5208eb981714b10bl,0xec35d0510458725dl,0x35dbb60bf203f4b6l,
-            0x064299b27781ab38l },
-          0 },
-        /* 77 << 168 */
-        { { 0x43cc7bbc02d26929l,0xeb00a683162d9607l,0x2af152b8ed9fa224l,
-            0xf24e8bee12257f0cl },
-          { 0xdf065dd5d004b1cbl,0x6aa20bcf9f9908c6l,0x8e5e86b6941c593dl,
-            0x0e0034b398969717l },
-          0 },
-        /* 83 << 168 */
-        { { 0x5be62e155c43b8fcl,0xd9e0adfc3c445636l,0xc5141df0e0d78f48l,
-            0xd134bbed2c277716l },
-          { 0x79033a84598fe069l,0x6c704367b081614cl,0x55c45d66bf5bf772l,
-            0xf08744c57a444730l },
-          0 },
-        /* 89 << 168 */
-        { { 0x866752091422b528l,0xdb297411c3e028eel,0x1f5575b040e1c3ccl,
-            0x85367b84d333b04fl },
-          { 0x57864c86e9804aa9l,0xf13fa8e3439156dfl,0xa3b337e0464e0aecl,
-            0x0018dfd7f2ae382bl },
-          0 },
-        /* 95 << 168 */
-        { { 0xe93cece9cea132fcl,0x985542d8f74e867al,0x2a3d18a5cc8fcf87l,
-            0xa0561055479d0039l },
-          { 0x3513c7eaac4b3f9dl,0xc095967256477606l,0xa63960f330df8ad6l,
-            0x59ca8d53cc9ddcb3l },
-          0 },
-        /* 101 << 168 */
-        { { 0x6d8e942b2f208191l,0xd49a6d9453fe5457l,0x2b55e391003010bal,
-            0x3dd1fd9fdf4605ebl },
-          { 0xdc006a3358682886l,0x60a5e86c1bd9ac88l,0xc4bd320ed0cab8f2l,
-            0x7281e7cb7751855bl },
-          0 },
-        /* 107 << 168 */
-        { { 0x7d564222e1881e7al,0x59061a89db0673c2l,0x1f9d607213f27313l,
-            0x5b3b29368ff3aeb7l },
-          { 0x6cf2304ccf969f43l,0x8eff4a25e7f69ae5l,0xbaeb6411d17da4ffl,
-            0x666af0af9eea17ecl },
-          0 },
-        /* 113 << 168 */
-        { { 0x6c0b811697f4cd0bl,0xcd7825d40e4ea852l,0x80158fb0677fef3dl,
-            0x5bb1a3aaa10ee693l },
-          { 0xc5df66678066fc9bl,0x3200dc11f404d4a6l,0x58868950a8686d8el,
-            0xbdaaffb53770fabal },
-          0 },
-        /* 116 << 168 */
-        { { 0xba6a9f84660326f5l,0x61c1e44161bc3e88l,0xfbf992a0bde85cf8l,
-            0xe704dd1e6f8c8f5fl },
-          { 0x231caa0ab1d7d486l,0xd10616d8891cd571l,0x2ddada75c008833cl,
-            0x44337d6dad514c94l },
-          0 },
-        /* 119 << 168 */
-        { { 0xd48678b8f6933cf0l,0x7b4d623e0b739471l,0x4ad620287b216238l,
-            0xb4d4918959c4fabel },
-          { 0x8c2a1bdc296d42d5l,0x9235d0ec2fd3eb96l,0xfe271972f81c135bl,
-            0x82b5181741471e16l },
-          0 },
-        /* 125 << 168 */
-        { { 0xe9aa8ce4051f8e81l,0x14484af67cd1391fl,0x53a361dcafb1656el,
-            0x6ad8ba02f4d9d0cbl },
-          { 0xfb4385466c50a722l,0x2f1c5bbc7edb37f4l,0x8dc90ccb16e4b795l,
-            0xbcb32e1508127094l },
-          0 },
-    },
-    {
-        /* 0 << 176 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 176 */
-        { { 0xb81d783e979f3925l,0x1efd130aaf4c89a7l,0x525c2144fd1bf7fal,
-            0x4b2969041b265a9el },
-          { 0xed8e9634b9db65b6l,0x35c82e3203599d8al,0xdaa7a54f403563f3l,
-            0x9df088ad022c38abl },
-          0 },
-        /* 3 << 176 */
-        { { 0x9e93ba24f111661el,0xedced484b105eb04l,0x96dc9ba1f424b578l,
-            0xbf8f66b7e83e9069l },
-          { 0x872d4df4d7ed8216l,0xbf07f3778e2cbecfl,0x4281d89998e73754l,
-            0xfec85fbb8aab8708l },
-          0 },
-        /* 4 << 176 */
-        { { 0x13b5bf22765fa7d0l,0x59805bf01d6a5370l,0x67a5e29d4280db98l,
-            0x4f53916f776b1ce3l },
-          { 0x714ff61f33ddf626l,0x4206238ea085d103l,0x1c50d4b7e5809ee3l,
-            0x999f450d85f8eb1dl },
-          0 },
-        /* 5 << 176 */
-        { { 0x82eebe731a3a93bcl,0x42bbf465a21adc1al,0xc10b6fa4ef030efdl,
-            0x247aa4c787b097bbl },
-          { 0x8b8dc632f60c77dal,0x6ffbc26ac223523el,0xa4f6ff11344579cfl,
-            0x5825653c980250f6l },
-          0 },
-        /* 7 << 176 */
-        { { 0xeda6c595d314e7bcl,0x2ee7464b467899edl,0x1cef423c0a1ed5d3l,
-            0x217e76ea69cc7613l },
-          { 0x27ccce1fe7cda917l,0x12d8016b8a893f16l,0xbcd6de849fc74f6bl,
-            0xfa5817e2f3144e61l },
-          0 },
-        /* 9 << 176 */
-        { { 0xc0b48d4e49ccd6d7l,0xff8fb02c88bd5580l,0xc75235e907d473b2l,
-            0x4fab1ac5a2188af3l },
-          { 0x030fa3bc97576ec0l,0xe8c946e80b7e7d2fl,0x40a5c9cc70305600l,
-            0x6d8260a9c8b013b4l },
-          0 },
-        /* 10 << 176 */
-        { { 0xe6c51073615cd9e4l,0x498ec047f1243c06l,0x3e5a8809b17b3d8cl,
-            0x5cd99e610cc565f1l },
-          { 0x81e312df7851dafel,0xf156f5baa79061e2l,0x80d62b71880c590el,
-            0xbec9746f0a39faa1l },
-          0 },
-        /* 11 << 176 */
-        { { 0x2b09d2c3cfdcf7ddl,0x41a9fce3723fcab4l,0x73d905f707f57ca3l,
-            0x080f9fb1ac8e1555l },
-          { 0x7c088e849ba7a531l,0x07d35586ed9a147fl,0x602846abaf48c336l,
-            0x7320fd320ccf0e79l },
-          0 },
-        /* 13 << 176 */
-        { { 0x92eb40907f8f875dl,0x9c9d754e56c26bbfl,0x158cea618110bbe7l,
-            0x62a6b802745f91eal },
-          { 0xa79c41aac6e7394bl,0x445b6a83ad57ef10l,0x0c5277eb6ea6f40cl,
-            0x319fe96b88633365l },
-          0 },
-        /* 15 << 176 */
-        { { 0x77f84203d39b8c34l,0xed8b1be63125eddbl,0x5bbf2441f6e39dc5l,
-            0xb00f6ee66a5d678al },
-          { 0xba456ecf57d0ea99l,0xdcae0f5817e06c43l,0x01643de40f5b4baal,
-            0x2c324341d161b9bel },
-          0 },
-        /* 16 << 176 */
-        { { 0x949c9976e1337c26l,0x6faadebdd73d68e5l,0x9e158614f1b768d9l,
-            0x22dfa5579cc4f069l },
-          { 0xccd6da17be93c6d6l,0x24866c61a504f5b9l,0x2121353c8d694da1l,
-            0x1c6ca5800140b8c6l },
-          0 },
-        /* 17 << 176 */
-        { { 0x4e77c5575b45afb4l,0xe9ded649efb8912dl,0x7ec9bbf542f6e557l,
-            0x2570dfff62671f00l },
-          { 0x2b3bfb7888e084bdl,0xa024b238f37fe5b4l,0x44e7dc0495649aeel,
-            0x498ca2555e7ec1d8l },
-          0 },
-        /* 19 << 176 */
-        { { 0x2e44d22526a1fc90l,0x0d6d10d24d70705dl,0xd94b6b10d70c45f4l,
-            0x0f201022b216c079l },
-          { 0xcec966c5658fde41l,0xa8d2bc7d7e27601dl,0xbfcce3e1ff230be7l,
-            0x3394ff6b0033ffb5l },
-          0 },
-        /* 21 << 176 */
-        { { 0x05d99be8b9c20cdal,0x89f7aad5d5cd0c98l,0x7ef936fe5bb94183l,
-            0x92ca0753b05cd7f2l },
-          { 0x9d65db1174a1e035l,0x02628cc813eaea92l,0xf2d9e24249e4fbf2l,
-            0x94fdfd9be384f8b7l },
-          0 },
-        /* 23 << 176 */
-        { { 0x29882d7c98379d44l,0xd000bdfb509edc8al,0xc6f95979e66fe464l,
-            0x504a6115fa61bde0l },
-          { 0x56b3b871effea31al,0x2d3de26df0c21a54l,0x21dbff31834753bfl,
-            0xe67ecf4969269d86l },
-          0 },
-        /* 25 << 176 */
-        { { 0xed29a56da16d4b34l,0x7fba9d09dca21c4fl,0x66d7ac006d8de486l,
-            0x6006198773a2a5e1l },
-          { 0x8b400f869da28ff0l,0x3133f70843c4599cl,0x9911c9b8ee28cb0dl,
-            0xcd7e28748e0af61dl },
-          0 },
-        /* 27 << 176 */
-        { { 0x6a7bb6a93b5bdb83l,0x08da65c0a4a72318l,0xc58d22aa63eb065fl,
-            0x1717596c1b15d685l },
-          { 0x112df0d0b266d88bl,0xf688ae975941945al,0x487386e37c292cacl,
-            0x42f3b50d57d6985cl },
-          0 },
-        /* 28 << 176 */
-        { { 0x69e3be0427596893l,0xb6bb02a645bf452bl,0x0875c11af4c698c8l,
-            0x6652b5c7bece3794l },
-          { 0x7b3755fd4f5c0499l,0x6ea16558b5532b38l,0xd1c69889a2e96ef7l,
-            0x9c773c3a61ed8f48l },
-          0 },
-        /* 29 << 176 */
-        { { 0x5a304ada8545d185l,0x82ae44ea738bb8cbl,0x628a35e3df87e10el,
-            0xd3624f3da15b9fe3l },
-          { 0xcc44209b14be4254l,0x7d0efcbcbdbc2ea5l,0x1f60336204c37bbel,
-            0x21f363f556a5852cl },
-          0 },
-        /* 31 << 176 */
-        { { 0x81262e4225346689l,0x716da290b07c7004l,0x35f911eab7950ee3l,
-            0x6fd72969261d21b5l },
-          { 0x5238980308b640d3l,0x5b0026ee887f12a1l,0x20e21660742e9311l,
-            0x0ef6d5415ff77ff7l },
-          0 },
-        /* 33 << 176 */
-        { { 0x64aa0874925dd0b0l,0x5ffd503851c474c6l,0x4478c72c8ebd4157l,
-            0xb98694cb8c8375e2l },
-          { 0xeda4edeecd8e208cl,0xf98a053d2c0670a6l,0x564bd3057f346b9dl,
-            0xafbbf3e94c318fddl },
-          0 },
-        /* 34 << 176 */
-        { { 0x8a03410aa96c4685l,0xef1b6b16a978a31bl,0x44738a3b629df6cfl,
-            0xa1dc65da807713e9l },
-          { 0x569cc7884c373442l,0x1f30a2464965fb52l,0x56822f1677ff5e2el,
-            0x63f18812e303748bl },
-          0 },
-        /* 35 << 176 */
-        { { 0x2abdc403dd0983ecl,0xec0c08c7f365c6f5l,0xe555083fbdb66b8bl,
-            0x593685bc4e8973ffl },
-          { 0x737df3f920e9c705l,0x00c7bcc309c31a5al,0x5f1d23e2efdcb34dl,
-            0x79d9b382470f7949l },
-          0 },
-        /* 36 << 176 */
-        { { 0x44a315645fd2eb1dl,0x4e7397263fdd1356l,0x9b96735463200efel,
-            0xcb70402e520bbb6al },
-          { 0xcbc90d7e693d2642l,0x6fb00064bc9b4002l,0x95f2eab3d96f7150l,
-            0xb1619e3fe035f47al },
-          0 },
-        /* 37 << 176 */
-        { { 0xd22d6073d1561bb7l,0x40666e4ba9928683l,0x90654dab8ab3f9b1l,
-            0x7625c507b8773421l },
-          { 0x288f28220ca88cd2l,0xbb88114ed8d005c1l,0xbeec2b0af603a11bl,
-            0x8fdda60325f7949el },
-          0 },
-        /* 39 << 176 */
-        { { 0x6503632d6ee4f1d0l,0xd5449747ea394840l,0xd696167a8abe13a1l,
-            0xc080f76e609ebaa9l },
-          { 0x181acf0c10aa70d6l,0x70614461291e5e50l,0x7ade8e84b9f0c0a3l,
-            0xef1de9f2cb11b41el },
-          0 },
-        /* 40 << 176 */
-        { { 0x2d5c3c848e592413l,0x727022961832ba2cl,0x22979b51596c6321l,
-            0x738f31cb5a04db64l },
-          { 0x0bdaa6ca98f84ee5l,0x4e9e827c15e21eeel,0x4c59dbcc3ea632e0l,
-            0xed3404db5bc6f027l },
-          0 },
-        /* 41 << 176 */
-        { { 0x2841f05cfbaf8b26l,0xac9830db5b243770l,0xde3ab1707787f324l,
-            0x1ee12efe079209bcl },
-          { 0x2d3fd62d5bcf6e3cl,0x8a680655d60b0582l,0xdafc5061bc2b64a1l,
-            0xe0d91e7526a88788l },
-          0 },
-        /* 43 << 176 */
-        { { 0x2d49c685426b1b1el,0x6c2149caeabb02f7l,0xa4697d7fde11984fl,
-            0xa0e32fb3ed3c8707l },
-          { 0xb783e825f4ca12dal,0xb2666e2448770a50l,0x82d47f478660e923l,
-            0x6e36cd71fb4a984fl },
-          0 },
-        /* 44 << 176 */
-        { { 0x3295a8ea43c66b92l,0x99387af6ac5d19d4l,0x545f9b1b8e9d2090l,
-            0x138b1c4c2660f530l },
-          { 0xbfb05fd2ff872627l,0xb6614b0f4c3bc45cl,0x13defece62ca0fb0l,
-            0x82ddae134fededd8l },
-          0 },
-        /* 45 << 176 */
-        { { 0x5a34499b871c4cbbl,0x3ab0e69a2eb6084bl,0xa8d0160025ef7755l,
-            0x5db8f611d9e70f5dl },
-          { 0x63f9eb9a7afa95d7l,0x328b97f9706d7964l,0x8bcf9a0f4b71dfcal,
-            0x53d4c3042a5c7934l },
-          0 },
-        /* 46 << 176 */
-        { { 0x0c87dd3a8768d9aal,0x201ce5a082f6a55fl,0xa3de6f3049ca4602l,
-            0x36f421422aeb5f17l },
-          { 0x5c9962399817b77al,0x2584a10ae8d165acl,0x80f683d0c726f4aal,
-            0x524307502dcdfa48l },
-          0 },
-        /* 47 << 176 */
-        { { 0x0c04399f94683df2l,0x0978e9d4e954838dl,0x01faa5e8cf4a7a7bl,
-            0x92f6e6a90dae61cfl },
-          { 0x0c0f1293373dc957l,0x8320178fd8cc6b67l,0x4af977ed4b6444f2l,
-            0xd8c9a401ad8e5f84l },
-          0 },
-        /* 48 << 176 */
-        { { 0xbd5660ed9aed9f40l,0x70ca6ad1532a8c99l,0xc4978bfb95c371eal,
-            0xe5464d0d7003109dl },
-          { 0x1af32fdfd9e535efl,0xabf57ea798c9185bl,0xed7a741712b42488l,
-            0x8e0296a7e97286fal },
-          0 },
-        /* 49 << 176 */
-        { { 0x79ee35ac16fca804l,0x8f16e6165f59782el,0x8fbef1011737694el,
-            0xb34b7625462be08bl },
-          { 0x7e63e1b016e75c91l,0xb6a18edd2d23728dl,0xcf761a1e7f299ab6l,
-            0x796dcdebf16c770el },
-          0 },
-        /* 51 << 176 */
-        { { 0x47354f22308ee4afl,0x96959a538ecd6f4bl,0xf60b5f104055cbd2l,
-            0x04b1c9599bd86095l },
-          { 0x26accd8486008564l,0x46b2fe0478f31ea7l,0x5500dbf72dd76f23l,
-            0x36bcdf584c496c6fl },
-          0 },
-        /* 52 << 176 */
-        { { 0x8836cd431527d7cel,0x1f236623187a50eal,0x6470c0ae847221f0l,
-            0xc61f86b47e449110l },
-          { 0x7cc9cc20fa9fcec1l,0xa394903019134349l,0xafe5a08ff53ab467l,
-            0x9caba02301ed2919l },
-          0 },
-        /* 53 << 176 */
-        { { 0xffecbdce406abf1el,0x0ef4bcd73ae340d4l,0x7e37bae0e19d5613l,
-            0xe191669be4c6e97al },
-          { 0x9fafe59797292db7l,0xab7ef3713172d716l,0x9f0fff330ce3b533l,
-            0xca94ff8f932dd8cfl },
-          0 },
-        /* 55 << 176 */
-        { { 0x659c8b5d78aea69el,0xdde7ab46476a8fb9l,0x26bfe303bd01b5e6l,
-            0xf3dfb08a726a937cl },
-          { 0xe7a591fa0a263670l,0xe872c3f8f97434a0l,0x4881a82e2e0f2c21l,
-            0x17624e48788ef958l },
-          0 },
-        /* 57 << 176 */
-        { { 0xd526d66da7222e5bl,0xd33bb78efeb00e25l,0x9a7d670b932c8d08l,
-            0xea31e5273cee093fl },
-          { 0x55cc091bd04b7a43l,0x12b08d6dd01a123dl,0x1d98a6467fb0e7bal,
-            0xdabb09483535fd0dl },
-          0 },
-        /* 59 << 176 */
-        { { 0x2862314d08b69b19l,0x9cf302e191effcfal,0x43bdc8462ead917al,
-            0x21b238bbf94b3d8fl },
-          { 0xa3736160e2f465d3l,0x4d7fb6818541e255l,0x46fa089a23551edcl,
-            0xf7c41d17c1fefa8cl },
-          0 },
-        /* 60 << 176 */
-        { { 0x8ed0807fed113000l,0x8e1672d04c691484l,0x33a13ab31ee86ca0l,
-            0x9df0d9573bcaee4fl },
-          { 0x0cf0c638ef0dfb71l,0x1e0fe22ac2c9510al,0x43f506716fcc6a21l,
-            0xccb58404cec03a94l },
-          0 },
-        /* 61 << 176 */
-        { { 0x59547e37fd0936c1l,0x81e0517df45140b1l,0xcc6ccd89ed49e3fcl,
-            0xc2fa23eff3b897del },
-          { 0x149511ef2050c80al,0xf66bea6b3140b833l,0xbbe1401e2786d723l,
-            0x0aeb549c887509bcl },
-          0 },
-        /* 63 << 176 */
-        { { 0xf938e85060f5867al,0x806e1fff72429adcl,0x5ff7962a45f43b52l,
-            0xd8375ab6b2bbb403l },
-          { 0x00d5819b21b287fcl,0x15c7190ebae37d58l,0x075ce5ce05fcfb07l,
-            0x76368d06dbc003cbl },
-          0 },
-        /* 64 << 176 */
-        { { 0x01079383171b445fl,0x9bcf21e38131ad4cl,0x8cdfe205c93987e8l,
-            0xe63f4152c92e8c8fl },
-          { 0x729462a930add43dl,0x62ebb143c980f05al,0x4f3954e53b06e968l,
-            0xfe1d75ad242cf6b1l },
-          0 },
-        /* 65 << 176 */
-        { { 0x1cf508197630655el,0x9b4685c408d417f5l,0x6ea942619b049259l,
-            0x31c29b54fe73b755l },
-          { 0x3d2872a1f1f2af17l,0xbcd1139956bcbc4bl,0x4d14f59890d7a85cl,
-            0xd2c46040dbcbe998l },
-          0 },
-        /* 71 << 176 */
-        { { 0x3c8a06ca9792c42al,0x92535628602460ddl,0xa95e13f2ddd4c676l,
-            0xe823841d3b20d463l },
-          { 0x0248605bbfad6051l,0x82985dd61af51233l,0x3d243a5cdef7d742l,
-            0x0a88ce55ff6aa911l },
-          0 },
-        /* 77 << 176 */
-        { { 0xcf5b5962449aec98l,0x40322a6531a41389l,0xcd15606fd72c0527l,
-            0xfe91eac7b90d65a0l },
-          { 0xcd32415487636360l,0x82f2c7bdfc653a6fl,0xd04d138ae315ce7cl,
-            0x40ebfd5e78118dbcl },
-          0 },
-        /* 83 << 176 */
-        { { 0x0f9ea6ae4144660fl,0x02345c6513279b25l,0x139497b65c7671cbl,
-            0x7259f14b2ebed1d5l },
-          { 0xa1e5d98ce9b29988l,0xaed0efcd8df73ac8l,0x88339f073b81a77cl,
-            0x28f2bbca7109c8a6l },
-          0 },
-        /* 89 << 176 */
-        { { 0xa264f99d811472ddl,0x0e7eae0afc07a80cl,0x77f264d4a683cdc6l,
-            0x0512df49d053c668l },
-          { 0x2b4dfbade61dea15l,0x83de61acfd74890al,0xd2552bab32d41182l,
-            0x1fb9411435924e6al },
-          0 },
-        /* 95 << 176 */
-        { { 0x85efe53ade23c988l,0x89d41dbbf897f91bl,0x1357f91e7873fa8dl,
-            0x7a6ec2e3718d911cl },
-          { 0xf9e4f92e8f209a01l,0x4ffb96a70fdd67f3l,0x4c81a787f83dde1cl,
-            0x0d68fce15e163b60l },
-          0 },
-        /* 101 << 176 */
-        { { 0xbc79b4b26ab6da9dl,0xb4be5c278bb005f1l,0x63624530cd3b280bl,
-            0x543142f04e880026l },
-          { 0xbf7fb14cad90ddbfl,0xfe456e8a3966732dl,0x85499fb987ce35e9l,
-            0x8af09e6b24f1305dl },
-          0 },
-        /* 107 << 176 */
-        { { 0x5fc563ec16dc2b4bl,0xfe5631b25d0e535fl,0xbf4c489f9a93e36cl,
-            0x56badff1da2a07c4l },
-          { 0x72ac6b77fb7c5595l,0x4b25b9428e6645d9l,0xeeae127251f0657el,
-            0x30779ca51abeb76bl },
-          0 },
-        /* 113 << 176 */
-        { { 0x3d602ef5d909f43dl,0x2b2951a6bb347c79l,0x44903bfaa0d88896l,
-            0xd4ab20e8684c104fl },
-          { 0x55f70b4dd9b7e626l,0x084b3ee646a5f9ecl,0x1799cbe3da4ae81al,
-            0xc7cfac937fd6b80fl },
-          0 },
-        /* 116 << 176 */
-        { { 0x45647911ca20c525l,0x78f83186004706abl,0x5596377d97510538l,
-            0x047863defe041f8cl },
-          { 0xaea784896ec82367l,0x9d4eac2601eee8fcl,0xb32728f19b57d9dbl,
-            0x60a158f5313c0f65l },
-          0 },
-        /* 119 << 176 */
-        { { 0xf78caf129754377bl,0xa7fce16b6966f0c4l,0xfea937555a54a2b7l,
-            0x52d7f79b7cdfe951l },
-          { 0x3e14b92e94b1dac0l,0x363f2e5af168b73bl,0xcc0e9dcb6436a8c2l,
-            0x2dbece4bb52cbd27l },
-          0 },
-        /* 125 << 176 */
-        { { 0x7e7907ed8df38ffel,0xa68ec827e24e8a24l,0x5093a97e5f168732l,
-            0xa9ffea2f39ebb6dbl },
-          { 0x89e02c12284276d4l,0xc1179e3b3f9502d6l,0x01becb51d8f69eb6l,
-            0x86eee2935eb1c73cl },
-          0 },
-    },
-    {
-        /* 0 << 184 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 184 */
-        { { 0xf3b7963f4c830320l,0x842c7aa0903203e3l,0xaf22ca0ae7327afbl,
-            0x38e13092967609b6l },
-          { 0x73b8fb62757558f1l,0x3cc3e831f7eca8c1l,0xe4174474f6331627l,
-            0xa77989cac3c40234l },
-          0 },
-        /* 3 << 184 */
-        { { 0xb32cb8b0b796d219l,0xc3e95f4f34741dd9l,0x8721212568edf6f5l,
-            0x7a03aee4a2b9cb8el },
-          { 0x0cd3c376f53a89aal,0x0d8af9b1948a28dcl,0xcf86a3f4902ab04fl,
-            0x8aacb62a7f42002dl },
-          0 },
-        /* 4 << 184 */
-        { { 0xfd8e139f8f5fcda8l,0xf3e558c4bdee5bfdl,0xd76cbaf4e33f9f77l,
-            0x3a4c97a471771969l },
-          { 0xda27e84bf6dce6a7l,0xff373d9613e6c2d1l,0xf115193cd759a6e9l,
-            0x3f9b702563d2262cl },
-          0 },
-        /* 5 << 184 */
-        { { 0x9cb0ae6c252bd479l,0x05e0f88a12b5848fl,0x78f6d2b2a5c97663l,
-            0x6f6e149bc162225cl },
-          { 0xe602235cde601a89l,0xd17bbe98f373be1fl,0xcaf49a5ba8471827l,
-            0x7e1a0a8518aaa116l },
-          0 },
-        /* 7 << 184 */
-        { { 0x8b1e572235e6fc06l,0x3477728f0b3e13d5l,0x150c294daa8a7372l,
-            0xc0291d433bfa528al },
-          { 0xc6c8bc67cec5a196l,0xdeeb31e45c2e8a7cl,0xba93e244fb6e1c51l,
-            0xb9f8b71b2e28e156l },
-          0 },
-        /* 9 << 184 */
-        { { 0x343ac0a3ee9523f0l,0xbb75eab2975ea978l,0x1bccf332107387f4l,
-            0x790f92599ab0062el },
-          { 0xf1a363ad1e4f6a5fl,0x06e08b8462519a50l,0x609151877265f1eel,
-            0x6a80ca3493ae985el },
-          0 },
-        /* 10 << 184 */
-        { { 0xa3f4f521e447f2c4l,0x81b8da7a604291f0l,0xd680bc467d5926del,
-            0x84f21fd534a1202fl },
-          { 0x1d1e31814e9df3d8l,0x1ca4861a39ab8d34l,0x809ddeec5b19aa4al,
-            0x59f72f7e4d329366l },
-          0 },
-        /* 11 << 184 */
-        { { 0x2dfb9e08be0f4492l,0x3ff0da03e9d5e517l,0x03dbe9a1f79466a8l,
-            0x0b87bcd015ea9932l },
-          { 0xeb64fc83ab1f58abl,0x6d9598da817edc8al,0x699cff661d3b67e5l,
-            0x645c0f2992635853l },
-          0 },
-        /* 13 << 184 */
-        { { 0xd50e57c7d7fe71f3l,0x15342190bc97ce38l,0x51bda2de4df07b63l,
-            0xba12aeae200eb87dl },
-          { 0xabe135d2a9b4f8f6l,0x04619d65fad6d99cl,0x4a6683a77994937cl,
-            0x7a778c8b6f94f09al },
-          0 },
-        /* 15 << 184 */
-        { { 0x8dd1fb83425c6559l,0x7fc00ee60af06fdal,0xe98c922533d956dfl,
-            0x0f1ef3354fbdc8a2l },
-          { 0x2abb5145b79b8ea2l,0x40fd2945bdbff288l,0x6a814ac4d7185db7l,
-            0xc4329d6fc084609al },
-          0 },
-        /* 16 << 184 */
-        { { 0x511053e453544774l,0x834d0ecc3adba2bcl,0x4215d7f7bae371f5l,
-            0xfcfd57bf6c8663bcl },
-          { 0xded2383dd6901b1dl,0x3b49fbb4b5587dc3l,0xfd44a08d07625f62l,
-            0x3ee4d65b9de9b762l },
-          0 },
-        /* 17 << 184 */
-        { { 0x55ef9d3dcc26e8b0l,0xf869c827729b707al,0xdbbf450d8c47e00cl,
-            0x73d546ea60972ed7l },
-          { 0x9563e11f0dcd6821l,0xe48e1af57d80de7fl,0xbe7139b49057838dl,
-            0xf3f0ad4d7e5ca535l },
-          0 },
-        /* 19 << 184 */
-        { { 0xac66d1d49f8f8cc2l,0x43fe5c154ef18941l,0xbae77b6ddc30fcbfl,
-            0xdb95ea7d945723b7l },
-          { 0x43298e2bda8097e2l,0x8004167baf22ea9bl,0x9cf5974196a83d57l,
-            0xb35c9aba3cf67d5el },
-          0 },
-        /* 21 << 184 */
-        { { 0x0569a48df766f793l,0x6b4c7b16706b3442l,0xcc97754416ff41e0l,
-            0x800c56e31fee2e86l },
-          { 0xce0c3d0fcdf93450l,0x6ec3703582f35916l,0x902520d5bbc11e68l,
-            0x7e2b988505078223l },
-          0 },
-        /* 23 << 184 */
-        { { 0xb30d1769101da00bl,0xb26872d5113cfdb6l,0x7b0491da44e48db5l,
-            0x810e73bb2013f8c9l },
-          { 0xc86e579a570f0b59l,0xf34107e37a918f34l,0x49286d00277473f1l,
-            0x74423f5abc85905dl },
-          0 },
-        /* 25 << 184 */
-        { { 0x90d7417879de6b48l,0xe762caf0d14fa75bl,0xa309dcf3bd91ec5dl,
-            0x7aafe1ddf526d04fl },
-          { 0x76911342d39e36ffl,0xe28994d2fabb34b8l,0xac23a92c863110cbl,
-            0x9f0f69673aabd166l },
-          0 },
-        /* 27 << 184 */
-        { { 0x7436bdf47e333f98l,0x879cf31f2455af64l,0x07933a9cf6cfde92l,
-            0xfcac38a5b6e3203fl },
-          { 0xa39b6a8098e5a6e0l,0x1d600b5da4837528l,0x54718de7c32d412bl,
-            0x02870f46317937ccl },
-          0 },
-        /* 28 << 184 */
-        { { 0x1f13756db1761ec8l,0xe53c8b98a4b97e55l,0xb2aee3f84096cc28l,
-            0x48c361a0920f1a8dl },
-          { 0xa98b672d8c31190al,0x7bc1e7d1001855d4l,0x242cfb07bf3f4b2al,
-            0x9bf44a3f32a28bc4l },
-          0 },
-        /* 29 << 184 */
-        { { 0x96d4b271e36eeccdl,0x2d8c01b859237e23l,0x24f7a6eb8adf2653l,
-            0xc08ac4ab41183d80l },
-          { 0xc35e5bb7036367c3l,0xd8c97cbc0ba59f61l,0x296b1f4c5aafe986l,
-            0xa519c7a17d179c37l },
-          0 },
-        /* 31 << 184 */
-        { { 0x4043490790ae5f49l,0x8ac8f73649556b81l,0xb57a89b0f4e77a16l,
-            0xe1a1565d071020eal },
-          { 0x4a27f34d3dda8450l,0x65af18b9bc395814l,0xaf21939f9ff49991l,
-            0x47e00639b4af7691l },
-          0 },
-        /* 33 << 184 */
-        { { 0x4b3e263246b1f9b2l,0x6457d838efde99d3l,0x77d5142325e56171l,
-            0xb45de3df7d54996cl },
-          { 0x1ee2dd3194098d98l,0x986896141f3ebdc5l,0x2704a107997efb47l,
-            0x96b502eecb11e520l },
-          0 },
-        /* 34 << 184 */
-        { { 0x58c8039ec19f866el,0xc84c053e386c2644l,0xb3708ab049435704l,
-            0x1b70c3c86fc47b24l },
-          { 0x235582a27f095649l,0x0d344b66673c9a9el,0x777c9e71e2b00efdl,
-            0x91691d6e5b877856l },
-          0 },
-        /* 35 << 184 */
-        { { 0x11c663c49cd31e22l,0x46ae0bd95fb943d7l,0x6e36bca6a392fc01l,
-            0x4f8cc3a77948716fl },
-          { 0x10ae9d6b3aa4bbb0l,0xcc9b6cb5d8001a86l,0x012c8e3aa0a4ceedl,
-            0xe462971e52274942l },
-          0 },
-        /* 36 << 184 */
-        { { 0x9982e2ac42e176a5l,0x324eba46e2782b64l,0x3d8caaafe18350f5l,
-            0xf3d82af2f5d674cal },
-          { 0xc2090fed56600d1el,0x4548e0ef5950de07l,0xb2f0023f765a4febl,
-            0xb303103339f16790l },
-          0 },
-        /* 37 << 184 */
-        { { 0xb94095dc7bdacf7al,0x0e73db39509b310al,0x76e99a6b41b5f772l,
-            0xef40e9c596f3dbd7l },
-          { 0xd0d644f980f2179el,0xe0db831d5a89807el,0xa0188493c2a2d6c6l,
-            0xf2d9a85e5ba9faa9l },
-          0 },
-        /* 39 << 184 */
-        { { 0x598b7876cdd95b93l,0x5f7cc827336966e8l,0x01887109e797f102l,
-            0x665671c446c7c296l },
-          { 0xb314793c6e019c72l,0x5a6c81580e0329acl,0x4faf2f1b44281b98l,
-            0x825884072e1fc97el },
-          0 },
-        /* 40 << 184 */
-        { { 0xa692781d61a3c8b3l,0x08bc385432876d0el,0xbecf05fb28027b03l,
-            0x636c687da4b1e12fl },
-          { 0x00e3003d07217c58l,0x613ba9375e01b2a3l,0xa58c8405881de16el,
-            0xc653c43014f8f48bl },
-          0 },
-        /* 41 << 184 */
-        { { 0x68e53c7c89c0c7c2l,0xf2e680b23c423272l,0xacd47fae60f50133l,
-            0x4c484c6534f05605l },
-          { 0x663bdcf9ebffbb7dl,0xb49cff3be42421c6l,0x0549f7b13f53f261l,
-            0xc516aeda7c374766l },
-          0 },
-        /* 43 << 184 */
-        { { 0xa515fe0f76a0ec26l,0xf727c0797b0b8b21l,0xaeed4c671993651el,
-            0x1465a7f828ac7c87l },
-          { 0x776bd5131f0ef90bl,0x57515d2cd9773e61l,0x235455e95564c50bl,
-            0xf44daef80bf06a24l },
-          0 },
-        /* 44 << 184 */
-        { { 0xbc1c6897d6a0d0f9l,0xd8e0ea0e3b0d7f55l,0xb35baa92b85b7aadl,
-            0x2becd1b7674e48f4l },
-          { 0xe2d7f78d6d7a9ac2l,0xf5074262f99c95d0l,0x4852470a89f611e9l,
-            0xf7aa911992869decl },
-          0 },
-        /* 45 << 184 */
-        { { 0x0bd1755b0ac4840fl,0x0f4c6c2aa22eef10l,0x3f72fe2d78d16dd9l,
-            0xb2d49200ff7096a4l },
-          { 0xa5dead555ffca031l,0x1d013c320b65f4cfl,0x67e498582a23f441l,
-            0x55bae166d02412c0l },
-          0 },
-        /* 46 << 184 */
-        { { 0x546dd4545739a62al,0x353dc1422a30b836l,0x1462449d99cbd704l,
-            0xda02d0772da69411l },
-          { 0xcb115fe565b1a1adl,0x395235f501230a22l,0x8ae630eed164d970l,
-            0x60b679f0074e3a7el },
-          0 },
-        /* 47 << 184 */
-        { { 0x2e64695245d231e1l,0xc96663ac00d8a0fbl,0xc1fbaa0cd07e1f41l,
-            0x4b31484488758781l },
-          { 0xd6971a835183e72el,0xd1d01f174cbe99b7l,0xe90b438c5a2f7512l,
-            0xf858fa452957c620l },
-          0 },
-        /* 48 << 184 */
-        { { 0xed7f2e774e6daae2l,0x7b3ae0e39e0a19bcl,0xd3293f8a91ae677el,
-            0xd363b0cb45c8611fl },
-          { 0xbe1d1ccf309ae93bl,0xa3f80be73920cae1l,0xaaacba74498edf01l,
-            0x1e6d2a4ab2f5ac90l },
-          0 },
-        /* 49 << 184 */
-        { { 0xb5c5bb67b972a778l,0xc2423a4a190f9b5al,0x4e693cf365247948l,
-            0xc37d129ea94a65a3l },
-          { 0xbea4736b6e9cd47bl,0xf3d1bd212338f524l,0xa2a0278e067a45dal,
-            0xc86d631b5b5dce9bl },
-          0 },
-        /* 51 << 184 */
-        { { 0xc2d75f46116952cel,0xd2b66269b75e40dal,0x024f670f921c4111l,
-            0x37ffd854c91fd490l },
-          { 0x6be44d0385b2f613l,0x040cd7d9ba11c4f9l,0x04c1cb762c0efb1fl,
-            0xd905ff4f505e4698l },
-          0 },
-        /* 52 << 184 */
-        { { 0x60c5f03f233550f1l,0xd4d09411925afd2el,0xa95b65c3d258e5a6l,
-            0x1a19cfb59f902c6al },
-          { 0xb486013af5ad5c68l,0xa2506776979638f3l,0x1232b4d0a38e0b28l,
-            0xa64784b8d36a7b4fl },
-          0 },
-        /* 53 << 184 */
-        { { 0x22c75830a13dcb47l,0xd6e81258efd7a08fl,0x6db703b6e4fc49b8l,
-            0x8a5ac636f01817e9l },
-          { 0x8d27b6e1b3f24514l,0x40edc3bc708c51d7l,0x9a1eec7765bb086dl,
-            0x812ccb42b10800f8l },
-          0 },
-        /* 55 << 184 */
-        { { 0x1a39c6acd4338453l,0x3d93822954b1295dl,0x7bf0bf45e0d81165l,
-            0x83d58ca5972804d2l },
-          { 0x105d3ddb00524b94l,0x65d516e7920378ecl,0x1d28f5f1aea33926l,
-            0xa0b354313901c906l },
-          0 },
-        /* 57 << 184 */
-        { { 0x000442a1e4f354del,0x165b44d9d1d112f5l,0x67fd9ced0d05c0a9l,
-            0xd6ce074360bd5d60l },
-          { 0x9ac80c931522af2al,0x8232d522fa07d449l,0x287b5534c3fdb652l,
-            0x9f0548b3abd2ab98l },
-          0 },
-        /* 59 << 184 */
-        { { 0xde8d7086b9aea1d4l,0x692180d98a7dc3fcl,0xd64ffb53bad3e6f3l,
-            0x84628acf36ce3f91l },
-          { 0xf76e470b6d498ac5l,0xa16945547abad602l,0x5b8fd6a5a255c1f6l,
-            0xffe24e4a8576ae2al },
-          0 },
-        /* 60 << 184 */
-        { { 0x5655179de7d70e03l,0x3e780c5c72a84570l,0xc102b4cb1d50029cl,
-            0x3e71bdd5f075e839l },
-          { 0x6460f4f0b498b822l,0x2682e06c6d4b8da5l,0x4eae53c996a740d4l,
-            0xc19d8bef6389702cl },
-          0 },
-        /* 61 << 184 */
-        { { 0x711be2081025fe1dl,0x2e562c89f0bc6a99l,0xcfd2be3a28bf4150l,
-            0x33037b4a38e5bc91l },
-          { 0x10c6da9df52fea02l,0x511f62444f0ea410l,0x19d37ca81a294c3fl,
-            0x7e40f444618e6fd3l },
-          0 },
-        /* 63 << 184 */
-        { { 0x4095f5ddbedb8734l,0x9c16027c4432f51al,0xced8179d873d0f11l,
-            0x70c2bc9f6ebe6e61l },
-          { 0x5c31035d616cf2f4l,0xf92e0fbd00a4af3dl,0xe6048a03511893c4l,
-            0x639a804b52e2f462l },
-          0 },
-        /* 64 << 184 */
-        { { 0x8735728dc2c6ff70l,0x79d6122fc5dc2235l,0x23f5d00319e277f9l,
-            0x7ee84e25dded8cc7l },
-          { 0x91a8afb063cd880al,0x3f3ea7c63574af60l,0x0cfcdc8402de7f42l,
-            0x62d0792fb31aa152l },
-          0 },
-        /* 65 << 184 */
-        { { 0x0f4bcefd9da373e4l,0x7278f44d119271a3l,0xb2dff94449e111c0l,
-            0xb0a3abf8e5d2b2d4l },
-          { 0x01baabb48ea80631l,0x27517ed3da305f85l,0x0a1ca6fc3f56aa86l,
-            0x183d9c7694c22839l },
-          0 },
-        /* 71 << 184 */
-        { { 0xe9a0dfbf22e238d7l,0x8690dfd97e8d8d31l,0xb3cb2a0d4006c59cl,
-            0xe4d297caa1850d74l },
-          { 0x066f10517842d14cl,0x68dd32737d43602bl,0x1f9f5cf931345f39l,
-            0x44f18c2b10593890l },
-          0 },
-        /* 77 << 184 */
-        { { 0x8d8c0233a7c3f60bl,0xfb59fe2d2bcbbd4cl,0xfa311680dc3e5b44l,
-            0xb3cba9f3fbea5eedl },
-          { 0xcb353b2f61e0e690l,0x06edf0c1b6e0efe0l,0xa29578cb1d0c02a2l,
-            0xaeb2d677937fec07l },
-          0 },
-        /* 83 << 184 */
-        { { 0xa19a81c5cdd0cac9l,0x5c10b942ec9cf85bl,0x0843ef4639e8c298l,
-            0xcfd45d0e6c043258l },
-          { 0x1011bcb9fb7e4b58l,0xae6362a544402bbdl,0x9ecc8c68ec15d751l,
-            0xbc05998869d1a00bl },
-          0 },
-        /* 89 << 184 */
-        { { 0xe9a43619460147e3l,0x881a6af423067448l,0x94f93ae6cee17a6bl,
-            0x469e692f10782558l },
-          { 0x01e244a1289bdb32l,0x240645779dddf970l,0x664cbd92d8f521ecl,
-            0xadaf8ffb600222d0l },
-          0 },
-        /* 95 << 184 */
-        { { 0x68314c740dbec437l,0x2095e1295ec75e2cl,0x8e88a3ddf0e6c606l,
-            0x40ac647d1230f6b2l },
-          { 0x09d124aaa2e6b991l,0xa22f9e2bcc81037cl,0xc842b64d15c3a1c2l,
-            0x4d822becce808c65l },
-          0 },
-        /* 101 << 184 */
-        { { 0xb02204d06ffb396bl,0x82eb6ecc881bead6l,0xf58432cebd6896c8l,
-            0xc243468da38f4b9dl },
-          { 0x8486402df8e628bdl,0x5dd338a1a4df2401l,0x748a41ab0daac953l,
-            0xaa121d13e51e6235l },
-          0 },
-        /* 107 << 184 */
-        { { 0x6daa0a4e50abc6aal,0x99fcc5bdeafb7cf2l,0xc705f64c4b8dbd2al,
-            0x7deff836e7b51e90l },
-          { 0xd92f42b859a8180fl,0x3bb298f8618d24acl,0x2433aa7357a56438l,
-            0xcf29895b48a6a238l },
-          0 },
-        /* 113 << 184 */
-        { { 0x74079dc59ed25aafl,0x7988245c023d5143l,0x7edfc6a6feb79c24l,
-            0x7ed03c50a6baa70fl },
-          { 0x71d3413596a753b4l,0x59efbafcef976246l,0xed050260a4a6947fl,
-            0xabbc1f8066254247l },
-          0 },
-        /* 116 << 184 */
-        { { 0x1f804e00caa4646fl,0x8643dc8870944924l,0xa37f1ca273f86de9l,
-            0xa3199f9228889898l },
-          { 0xc273ba580c1e4adfl,0x0f0d38af65bc82f0l,0xd8b28ab5f8a6cd3bl,
-            0xeea6e08575894d8el },
-          0 },
-        /* 119 << 184 */
-        { { 0x398f39132c1620f7l,0x9046d2dea921f3a3l,0x40a25a2785b50bb0l,
-            0xb9adeca0d32e95f3l },
-          { 0xa4199b1bdede5cbfl,0x9068aee084f5410bl,0x6665e4f5730f0397l,
-            0x2e9ba18c8ae20659l },
-          0 },
-        /* 125 << 184 */
-        { { 0xd76e9b2351835897l,0x72a0e000012deda6l,0x5bf08922bfec23e4l,
-            0x8c2fcf1385cf2b7bl },
-          { 0x6c42f935c63332c6l,0x8736c58395eccce9l,0x2d2abbb10721afc8l,
-            0x1f7a76cc42d4e029l },
-          0 },
-    },
-    {
-        /* 0 << 192 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 192 */
-        { { 0x56f8410ef4f8b16al,0x97241afec47b266al,0x0a406b8e6d9c87c1l,
-            0x803f3e02cd42ab1bl },
-          { 0x7f0309a804dbec69l,0xa83b85f73bbad05fl,0xc6097273ad8e197fl,
-            0xc097440e5067adc1l },
-          0 },
-        /* 3 << 192 */
-        { { 0x266344a43794f8dcl,0xdcca923a483c5c36l,0x2d6b6bbf3f9d10a0l,
-            0xb320c5ca81d9bdf3l },
-          { 0x620e28ff47b50a95l,0x933e3b01cef03371l,0xf081bf8599100153l,
-            0x183be9a0c3a8c8d6l },
-          0 },
-        /* 4 << 192 */
-        { { 0xb6c185c341dca566l,0x7de7fedad8622aa3l,0x99e84d92901b6dfbl,
-            0x30a02b0e7c4ad288l },
-          { 0xc7c81daa2fd3cf36l,0xd1319547df89e59fl,0xb2be8184cd496733l,
-            0xd5f449eb93d3412bl },
-          0 },
-        /* 5 << 192 */
-        { { 0x25470fabe085116bl,0x04a4337587285310l,0x4e39187ee2bfd52fl,
-            0x36166b447d9ebc74l },
-          { 0x92ad433cfd4b322cl,0x726aa817ba79ab51l,0xf96eacd8c1db15ebl,
-            0xfaf71e910476be63l },
-          0 },
-        /* 7 << 192 */
-        { { 0x72cfd2e949dee168l,0x1ae052233e2af239l,0x009e75be1d94066al,
-            0x6cca31c738abf413l },
-          { 0xb50bd61d9bc49908l,0x4a9b4a8cf5e2bc1el,0xeb6cc5f7946f83acl,
-            0x27da93fcebffab28l },
-          0 },
-        /* 9 << 192 */
-        { { 0x3ce519ef76257c51l,0x6f5818d318d477e7l,0xab022e037963edc0l,
-            0xf0403a898bd1f5f3l },
-          { 0xe43b8da0496033cal,0x0994e10ea1cfdd72l,0xb1ec6d20ba73c0e2l,
-            0x0329c9ecb6bcfad1l },
-          0 },
-        /* 10 << 192 */
-        { { 0xf1ff42a12c84bd9dl,0x751f3ec4390c674al,0x27bb36f701e5e0cal,
-            0x65dfff515caf6692l },
-          { 0x5df579c4cd7bbd3fl,0xef8fb29785591205l,0x1ded7203e47ac732l,
-            0xa93dc45ccd1c331al },
-          0 },
-        /* 11 << 192 */
-        { { 0xbdec338e3318d2d4l,0x733dd7bbbe8de963l,0x61bcc3baa2c47ebdl,
-            0xa821ad1935efcbdel },
-          { 0x91ac668c024cdd5cl,0x7ba558e4c1cdfa49l,0x491d4ce0908fb4dal,
-            0x7ba869f9f685bde8l },
-          0 },
-        /* 13 << 192 */
-        { { 0xed1b5ec279f464bal,0x2d65e42c47d72e26l,0x8198e5749e67f926l,
-            0x4106673834747e44l },
-          { 0x4637acc1e37e5447l,0x02cbc9ecf3e15822l,0x58a8e98e805aa83cl,
-            0x73facd6e5595e800l },
-          0 },
-        /* 15 << 192 */
-        { { 0x468ff80338330507l,0x06f34ddf4037a53el,0x70cd1a408d6993a4l,
-            0xf85a159743e5c022l },
-          { 0x396fc9c2c125a67dl,0x03b7bebf1064bfcbl,0x7c444592a9806dcbl,
-            0x1b02614b4487cd54l },
-          0 },
-        /* 16 << 192 */
-        { { 0x8303604f692ac542l,0xf079ffe1227b91d3l,0x19f63e6315aaf9bdl,
-            0xf99ee565f1f344fbl },
-          { 0x8a1d661fd6219199l,0x8c883bc6d48ce41cl,0x1065118f3c74d904l,
-            0x713889ee0faf8b1bl },
-          0 },
-        /* 17 << 192 */
-        { { 0xb47b60f70de21bb6l,0x64acae4fdcd836cal,0x3375ea6dc744ce63l,
-            0xb764265fb047955bl },
-          { 0xc68a5d4c9841c2c3l,0x60e98fd7cf454f60l,0xc701fbe2756aea0cl,
-            0x09c8885eaab21c79l },
-          0 },
-        /* 19 << 192 */
-        { { 0x45bb810869d2d46cl,0xe47c8b3968c8365al,0xf3b87663267551bdl,
-            0x1590768f5b67547al },
-          { 0x371c1db2fb2ed3ffl,0xe316691917a59440l,0x03c0d178df242c14l,
-            0x40c93fceed862ac1l },
-          0 },
-        /* 21 << 192 */
-        { { 0x1286da692bc982d6l,0x5f6d80f27bdae7e3l,0x3d9c5647a6f064fbl,
-            0xfdc8e6a1d74c1540l },
-          { 0x97da48c6d68b135al,0xc2097979d66dbfffl,0x0296adb9ea20531dl,
-            0xa333730d4ab2c8f0l },
-          0 },
-        /* 23 << 192 */
-        { { 0x0eb3565429847fedl,0xfdc142860a673dd0l,0x721b36278b62dd0bl,
-            0x105a293e711a5771l },
-          { 0xdf001cce7f761927l,0xf7b681b011d04c7dl,0x16dff792a3ac1996l,
-            0x580c120b0fc4ae30l },
-          0 },
-        /* 25 << 192 */
-        { { 0x31ea3d4f7ee8d0bcl,0x3832f22a0f42c3dcl,0xc661061a1a87a2f4l,
-            0x0978c9f64b45576bl },
-          { 0xb7abac3c6dfb5fd2l,0x27f36a00b7e01b90l,0x68f733cde9429e36l,
-            0x953a4681dcbfe8cbl },
-          0 },
-        /* 27 << 192 */
-        { { 0xbfb7c41067fe1eafl,0xa2073c6a6929a785l,0x6f2536f4a75fdb79l,
-            0x859ad26d809bca69l },
-          { 0x06f2c0693b197e7bl,0x656ad9f48ec0a573l,0xe7c7901f9a4d0262l,
-            0xbec29443b938602bl },
-          0 },
-        /* 28 << 192 */
-        { { 0xd00397fc0f0073a4l,0x5b668fa46f8d675fl,0x14374ac91522108cl,
-            0x92efa7d10283e42el },
-          { 0x673e6df90b6d024al,0x05f914d457581f26l,0xf5c8516267df8c12l,
-            0x1197f1b4e06c2462l },
-          0 },
-        /* 29 << 192 */
-        { { 0x6e2d1cb3dd9c90c1l,0x28f82d5a7990579el,0x90e189cd06226195l,
-            0xbd2939df19b0dc74l },
-          { 0x18b18505c0917177l,0xeed5470d3117d9c4l,0x39ef92eb6c893ca0l,
-            0x4533ef8244a41940l },
-          0 },
-        /* 31 << 192 */
-        { { 0xcaee9dec34943ddal,0x8e50e98e8b4b6782l,0x24358ea591ea3a1fl,
-            0x71c4c827a9e1c194l },
-          { 0xa38baa5d09bb7a94l,0xfb4ab4c057b58f9cl,0x4a01065e24e0ee19l,
-            0xb9cf805107b877bfl },
-          0 },
-        /* 33 << 192 */
-        { { 0xd38c1ce0a2980d5el,0x8b84cca4541face7l,0x93298136dbd8d05dl,
-            0x582708d03f85c85al },
-          { 0x6545eec7282960e4l,0x92e184aebaadec07l,0x05452564fd27a20fl,
-            0x79d4668abddce6ebl },
-          0 },
-        /* 34 << 192 */
-        { { 0xf5cc5cccf5191707l,0xe800328bd5d01f67l,0x0572012ebd9b1599l,
-            0xf5be11a6863d0125l },
-          { 0x4da7ca876ea441e0l,0x47dbf83b321b134al,0x5cbadcdac1acfb4al,
-            0x19ac798a734f8e25l },
-          0 },
-        /* 35 << 192 */
-        { { 0xe312623a7002114fl,0xb888b637e047686bl,0x23b2c270cbac91bdl,
-            0xb50b31884dbfe02dl },
-          { 0x8335ce43de97eef6l,0x6a4e65502bac193al,0xf2b35aac3101f720l,
-            0x5b2c88d5379a2015l },
-          0 },
-        /* 36 << 192 */
-        { { 0xf445e77131547128l,0x22761665e27811cal,0x9b944e91a37c6681l,
-            0xc0aa06a536899860l },
-          { 0x8c2b5816cfcd557el,0xf2734a19945aa357l,0x536ca07ca55a0049l,
-            0x8328fdccc636d967l },
-          0 },
-        /* 37 << 192 */
-        { { 0x52b513616aca06bdl,0x8d19b893cdf16560l,0x06b28179c3b438cdl,
-            0xde1ef747cd1819e4l },
-          { 0xbc6cc43b5f557985l,0xa277e11f61e0142al,0x58890f1e429cc392l,
-            0x28d17dbfe5fc8f5el },
-          0 },
-        /* 39 << 192 */
-        { { 0x556df61a29a8f7cbl,0x5cf554dfd14ab27al,0x243f933ba755b886l,
-            0xa4d0b06ff2d4ce87l },
-          { 0xa745eb8d2c0f1d39l,0xc228747aea3047a5l,0xced774c41d2cecc0l,
-            0x54a55c3a774fb01al },
-          0 },
-        /* 40 << 192 */
-        { { 0xa691398a4a9eb3f0l,0x56c1dbff3b99a48fl,0x9a87e1b91b4b5b32l,
-            0xad6396145378b5fel },
-          { 0x437a243ec26b5302l,0x0275878c3ccb4c10l,0x0e81e4a21de07015l,
-            0x0c6265c9850df3c0l },
-          0 },
-        /* 41 << 192 */
-        { { 0x182c3f0e6be95db0l,0x8c5ab38cae065c62l,0xcce8294ebe23abacl,
-            0xed5b65c47d0add6dl },
-          { 0xbce57d78cc9494cal,0x76f75c717f435877l,0xb3084b2eb06560a9l,
-            0x67216bc850b55981l },
-          0 },
-        /* 43 << 192 */
-        { { 0x49c9fd92557de68bl,0x357aa44fc3151b7al,0xd36286d11e4aebd0l,
-            0x84562cd736a51203l },
-          { 0x42a57e7c3cacc002l,0x794a47751b1e25a3l,0x2c2ab68cac0d4356l,
-            0xececb6addb31afdcl },
-          0 },
-        /* 44 << 192 */
-        { { 0x47a5f010b4c21bfel,0x45c5610f0ac3dc20l,0x20e689fcea3bf4dcl,
-            0xf244ea49fb5f46e4l },
-          { 0xd918e59e8ca38e45l,0x7d6c601d96189a6fl,0x1a40f03854138471l,
-            0xfe867d7308a9d034l },
-          0 },
-        /* 45 << 192 */
-        { { 0x3b49e489100c0410l,0x8831d3992adc2b29l,0xb6726cd1247a8116l,
-            0x83a71a59d1d56d8el },
-          { 0x82ade2fe5cd333e9l,0x3b087ef83ea11f1al,0x17b96ca66ce879cel,
-            0xc2f74a971871dc43l },
-          0 },
-        /* 46 << 192 */
-        { { 0xa11a1e3680b576cel,0xf91278bbce2683e8l,0xc3bab95fbae8bc5bl,
-            0x642ca26397351715l },
-          { 0x5ffc14726fecbbc1l,0x2465e996a23f36d4l,0x06fc53bf5187d428l,
-            0x54b4014351fbce91l },
-          0 },
-        /* 47 << 192 */
-        { { 0x081ca6f0eafc7b2cl,0x1ba047a38c48703fl,0xe84865046663accfl,
-            0xde1f97568d43689cl },
-          { 0xf5373e1d5bc19f75l,0x4e48c493d64b0a54l,0x0c43f4e25807dbf6l,
-            0x73bef15167778c36l },
-          0 },
-        /* 48 << 192 */
-        { { 0xca6c0937b1b76ba6l,0x1a2eab854d2026dcl,0xb1715e1519d9ae0al,
-            0xf1ad9199bac4a026l },
-          { 0x35b3dfb807ea7b0el,0xedf5496f3ed9eb89l,0x8932e5ff2d6d08abl,
-            0xf314874e25bd2731l },
-          0 },
-        /* 49 << 192 */
-        { { 0x9d5322e89e9bba53l,0xdd7c9ceb989ff350l,0xd76147eadab0d7b3l,
-            0x8e45b1c6d7a9a9a1l },
-          { 0x8f896a91d4f10c10l,0x999a73c54068de06l,0x84a9d0839cf0a779l,
-            0x4d7cc7689f608ab2l },
-          0 },
-        /* 51 << 192 */
-        { { 0x1833ccddaee93c82l,0x6a05ef7b9f35f20fl,0xc538dac9ae413bc2l,
-            0x1e74f4658b4784bdl },
-          { 0xccb2bc4a49ffd544l,0x9b88183d2b17ae88l,0x96037a136e43824fl,
-            0xbbb61441480bf3dfl },
-          0 },
-        /* 52 << 192 */
-        { { 0x13319d20e090ad42l,0x4ff3186e12cbb719l,0xf38e504913fc0a46l,
-            0x83185a1254e60378l },
-          { 0x08c4057797ea8935l,0x7b2212a946b614f9l,0xedcdfa520634cfb3l,
-            0xdbc60eed9e7d5726l },
-          0 },
-        /* 53 << 192 */
-        { { 0x9b0785c6c7e1070fl,0xec112f53cbf561e5l,0xc93511e37fab3464l,
-            0x9e6dc4da9de8e0c2l },
-          { 0x7733c425e206b4eel,0xb8b254ef50cedf29l,0xfaee4bbbd50ad285l,
-            0x216e76d58c4eb6cfl },
-          0 },
-        /* 55 << 192 */
-        { { 0x9d6a28641d51f254l,0x26c5062a0c2822c3l,0xd74ebba8334bf4eel,
-            0x6e5446eb0b8f7305l },
-          { 0x5988ae8eb629beccl,0x71e576d0a1de7d1dl,0x15e39592a8873970l,
-            0x2b1f9a9342ecc74el },
-          0 },
-        /* 57 << 192 */
-        { { 0xcbdb70727c519bf9l,0x112986bbcaaf48e6l,0x64d4c6d1a13baf3cl,
-            0x85ccf6f7a065e77el },
-          { 0x183be337749beaedl,0xb3703096cba6c9b1l,0x1edf81f0e42b8afel,
-            0xf04ed594ccb73ad7l },
-          0 },
-        /* 59 << 192 */
-        { { 0xfa954ebc38491e9fl,0xf75a5808d32f0b03l,0x196d4a828083b9d3l,
-            0x92d5a0be5e8dc9fel },
-          { 0x4a507ae9aea628bal,0xeea5861e11a02fb5l,0xa033b84fd23ec8f7l,
-            0x1a68c36ec60f11d5l },
-          0 },
-        /* 60 << 192 */
-        { { 0x3dfb55bdab920ef2l,0xe0090971e6244484l,0xdc39fd08f7c6e1a3l,
-            0x1ca765356ee79e72l },
-          { 0x472c8985287d590cl,0x67635e35ad6daeb4l,0x06ec4e7980f9fee3l,
-            0x0aceb39921dc5fdbl },
-          0 },
-        /* 61 << 192 */
-        { { 0xdb2478fd9410a756l,0xd106aefe3a53a1e6l,0x1f4c940d14286333l,
-            0x6a98659d04950958l },
-          { 0x3232a1c6a6bbe060l,0x19ad132ca5e7ca9bl,0x3c9c13ef800fae29l,
-            0x9b0d9068b8660f49l },
-          0 },
-        /* 63 << 192 */
-        { { 0x1e7f043795c53027l,0x5221e5c0da9a3806l,0xf297d8e379d9385fl,
-            0x4d69e95f78ba697el },
-          { 0xdda936cee76d13c1l,0xd9a5790a485b12f5l,0xeab84add51efbfd0l,
-            0xc9a3ee9ca9f44aa4l },
-          0 },
-        /* 64 << 192 */
-        { { 0xefb26a753f73f449l,0x1d1c94f88d44fc79l,0x49f0fbc53bc0dc4dl,
-            0xb747ea0b3698a0d0l },
-          { 0x5218c3fe228d291el,0x35b804b543c129d6l,0xfac859b8d1acc516l,
-            0x6c10697d95d6e668l },
-          0 },
-        /* 65 << 192 */
-        { { 0x8c12e87a15454db4l,0xbc1fc546908e8fbcl,0xc35d83c7e4cf1636l,
-            0xcb2f5ac820641524l },
-          { 0x2400aae2e644ecd0l,0x9b01e2d14be37119l,0x6cffd52831b54857l,
-            0xb3fd5d864b5cbf81l },
-          0 },
-        /* 71 << 192 */
-        { { 0x2e999a4739709fb9l,0x4cb4bbdb62c2b30fl,0x4c7259ac09de0c92l,
-            0x73c1e34f8c59a0ffl },
-          { 0x0a9e5f2e48cb0a12l,0x5e07449fcf499bb0l,0x0527a8b4b02c4a54l,
-            0x7381287159da01e4l },
-          0 },
-        /* 77 << 192 */
-        { { 0xe0b876ca0548ff87l,0x74b5a9b25e03bae3l,0xd5564cc5dd0642d2l,
-            0x29ed211b668c4977l },
-          { 0xf29d3b7aa7422b11l,0x17f2d3586d29b8bal,0x2e35cdda2bb887del,
-            0x650f148078e4444bl },
-          0 },
-        /* 83 << 192 */
-        { { 0x8c75532fb47435ebl,0x2234e2c5a113f905l,0x27b75fea31508ae9l,
-            0x09733e40d489ad0bl },
-          { 0x73b38464a1b06da1l,0x0aed522dc5b7ccf2l,0xcc04783e78d7e5afl,
-            0xa81c8a8ff23eaab7l },
-          0 },
-        /* 89 << 192 */
-        { { 0x6bb5eca73c149ffal,0x4593d851c536487al,0x3675daaad85eb9edl,
-            0xbf65d0f9b8a58ffbl },
-          { 0x1dc6ddddc22e83eel,0xb673397ee10d3c17l,0x6bdc20600ca62c93l,
-            0x260389c30b821f6dl },
-          0 },
-        /* 95 << 192 */
-        { { 0x45f5cf07b417be10l,0x0acb1a44e5d561d8l,0x54b7baeafb1dfbe9l,
-            0x0e6e66219044672el },
-          { 0xa9b6db6d9a793601l,0xd70eadb8a4a0ba4al,0xaedace846098b89el,
-            0x970f2c23ac39d40fl },
-          0 },
-        /* 101 << 192 */
-        { { 0x9dff8d289c7eaaa8l,0x38bcd076db0cc361l,0x25760147cdea9db8l,
-            0x44c89dd40163f343l },
-          { 0x18815d7544db8365l,0xa186d57b37f3e4b3l,0xa71de7806e84a7fal,
-            0xf1c08989e56646b3l },
-          0 },
-        /* 107 << 192 */
-        { { 0xad73e1448fb56a43l,0x078c14fb715543c9l,0xa57770fd64b92d54l,
-            0xf0420a9277e9b919l },
-          { 0xc660d0cb588ccc1dl,0x069baa1471415c2el,0x747438dc32982740l,
-            0x4782ce08767381eel },
-          0 },
-        /* 113 << 192 */
-        { { 0xc2a1ee5fdb3b6b5dl,0x08ce544820e1339fl,0x3cb954b77073955fl,
-            0xb9ed2ee7f32d0832l },
-          { 0xc0a998b1b4aac98el,0x4912273dbca4bac7l,0xac0f5014c3f92c4al,
-            0xbf3dc27f9e916e78l },
-          0 },
-        /* 116 << 192 */
-        { { 0x222c7bae28833944l,0xbb78a867f5e3cf67l,0x590cbd96faf6cfd6l,
-            0x1c50aecb3b0d842el },
-          { 0x8f2c5df1dbade9a5l,0x60923fb7e3840cecl,0xe8f2db6b03a67512l,
-            0x90af187be0d7c628l },
-          0 },
-        /* 119 << 192 */
-        { { 0xb4162b615fee3ccbl,0xe9786e7d7327e651l,0x6c85bd938812d9c1l,
-            0xfe4905083dc9e838l },
-          { 0xe66f25178a6765dfl,0x72fd294edeee184cl,0x07608bd27b6ec227l,
-            0x9df7b664dfdaa5e6l },
-          0 },
-        /* 125 << 192 */
-        { { 0x4aea16602d53a155l,0x7285069a32ab07fdl,0xf6f3000d8b6fcd19l,
-            0x010b1f246e98953fl },
-          { 0xe180bc559f9aa221l,0x7717ee383cba4534l,0x5997f3aa36cbda06l,
-            0x54c6090064a04b05l },
-          0 },
-    },
-    {
-        /* 0 << 200 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 200 */
-        { { 0x25914f7881fdad90l,0xcf638f560d2cf6abl,0xb90bc03fcc054de5l,
-            0x932811a718b06350l },
-          { 0x2f00b3309bbd11ffl,0x76108a6fb4044974l,0x801bb9e0a851d266l,
-            0x0dd099bebf8990c1l },
-          0 },
-        /* 3 << 200 */
-        { { 0xebd6a6777b0ac93dl,0xa6e37b0d78f5e0d7l,0x2516c09676f5492bl,
-            0x1e4bf8889ac05f3al },
-          { 0xcdb42ce04df0ba2bl,0x935d5cfd5062341bl,0x8a30333382acac20l,
-            0x429438c45198b00el },
-          0 },
-        /* 4 << 200 */
-        { { 0xfb2838be67e573e0l,0x05891db94084c44bl,0x9131137396c1c2c5l,
-            0x6aebfa3fd958444bl },
-          { 0xac9cdce9e56e55c1l,0x7148ced32caa46d0l,0x2e10c7efb61fe8ebl,
-            0x9fd835daff97cf4dl },
-          0 },
-        /* 5 << 200 */
-        { { 0x6c626f56c1770616l,0x5351909e09da9a2dl,0xe58e6825a3730e45l,
-            0x9d8c8bc003ef0a79l },
-          { 0x543f78b6056becfdl,0x33f13253a090b36dl,0x82ad4997794432f9l,
-            0x1386493c4721f502l },
-          0 },
-        /* 7 << 200 */
-        { { 0xe566f400b008733al,0xcba0697d512e1f57l,0x9537c2b240509cd0l,
-            0x5f989c6957353d8cl },
-          { 0x7dbec9724c3c2b2fl,0x90e02fa8ff031fa8l,0xf4d15c53cfd5d11fl,
-            0xb3404fae48314dfcl },
-          0 },
-        /* 9 << 200 */
-        { { 0xf02cc3a9f327a07fl,0xefb27a9b4490937dl,0x81451e96b1b3afa5l,
-            0x67e24de891883be4l },
-          { 0x1ad65d4770869e54l,0xd36291a464a3856al,0x070a1abf7132e880l,
-            0x9511d0a30e28dfdfl },
-          0 },
-        /* 10 << 200 */
-        { { 0xfdeed650f8d1cac4l,0xeb99194b6d16bda5l,0xb53b19f71cabbe46l,
-            0x5f45af5039b9276cl },
-          { 0xd0784c6126ee9d77l,0xf7a1558b0c02ca5dl,0xb61d6c59f032e720l,
-            0xae3ffb95470cf3f7l },
-          0 },
-        /* 11 << 200 */
-        { { 0x9b185facc72a4be5l,0xf66de2364d848089l,0xba14d07c717afea9l,
-            0x25bfbfc02d551c1cl },
-          { 0x2cef0ecd4cdf3d88l,0x8cee2aa3647f73c4l,0xc10a7d3d722d67f7l,
-            0x090037a294564a21l },
-          0 },
-        /* 13 << 200 */
-        { { 0x6ac07bb84f3815c4l,0xddb9f6241aa9017el,0x31e30228ca85720al,
-            0xe59d63f57cb75838l },
-          { 0x69e18e777baad2d0l,0x2cfdb784d42f5d73l,0x025dd53df5774983l,
-            0x2f80e7cee042cd52l },
-          0 },
-        /* 15 << 200 */
-        { { 0x43f18d7f4d6ee4abl,0xd3ac8cde9570c3dcl,0x527e49070b8c9b2al,
-            0x716709a7c5a4c0f1l },
-          { 0x930852b0916a26b1l,0x3cc17fcf4e071177l,0x34f5e3d459694868l,
-            0xee0341aba28f655dl },
-          0 },
-        /* 16 << 200 */
-        { { 0xf431f462060b5f61l,0xa56f46b47bd057c2l,0x348dca6c47e1bf65l,
-            0x9a38783e41bcf1ffl },
-          { 0x7a5d33a9da710718l,0x5a7799872e0aeaf6l,0xca87314d2d29d187l,
-            0xfa0edc3ec687d733l },
-          0 },
-        /* 17 << 200 */
-        { { 0x4b764317aa365220l,0x7a24affe68cc0355l,0x76732ed0ceb3df5el,
-            0x2ce1332aae096ed0l },
-          { 0x89ce70a7b8adac9dl,0xfdddcf05b3fc85c8l,0xbd7b29c6f2ee8bfel,
-            0xa1effcb9457d50f3l },
-          0 },
-        /* 19 << 200 */
-        { { 0x6053972dac953207l,0xc2ca9a8408ad12f6l,0x9ed6cd386ba36190l,
-            0xa5b50a48539d18a4l },
-          { 0xd9491347dbf18c2al,0x2cdce4662e9697cfl,0x4e97db5ca9e31819l,
-            0x0fb02e2d4c044b74l },
-          0 },
-        /* 21 << 200 */
-        { { 0x66a4dd414aa5e9ddl,0x6ec7576e64f6aeb9l,0x3f08ce06c7e980b5l,
-            0x52fe9fd6c1a2aa7el },
-          { 0xfe46e6d95074326al,0xd570ed734c126c1dl,0x86c7ec257217d55al,
-            0x3cb434057c3de2b2l },
-          0 },
-        /* 23 << 200 */
-        { { 0x48e0295dcc9e79bfl,0x2419485693eb403dl,0x9386fb7709dd8194l,
-            0xb6e89bb101a242f6l },
-          { 0xc7994f3924d308d7l,0xf0fbc392de673d88l,0x43eed52ea11abb62l,
-            0xc900f9d0c83e7fbel },
-          0 },
-        /* 25 << 200 */
-        { { 0x214a10dca8152891l,0xe6787b4c64f1abb2l,0x276333d9fa1a10edl,
-            0xc0e1c88e47dbccbcl },
-          { 0x8a3c37c4849dd12el,0x2144a8c8d86e109fl,0xbb6891f7286c140cl,
-            0xb0b8c5e29cce5e6fl },
-          0 },
-        /* 27 << 200 */
-        { { 0x3f9e0e3499753288l,0x6b26f1ebe559d93al,0x647fe21d9841faf1l,
-            0x48a4b6efa786ea02l },
-          { 0x6e09cd22665a882dl,0x95390d81b63ccda6l,0x5b014db4b026a44al,
-            0x5b96efb22ad30ff1l },
-          0 },
-        /* 28 << 200 */
-        { { 0x64c50c8b4a3b99e9l,0x2489a675d0a26f4fl,0xe2aacaeed85bc6fdl,
-            0x556882038a6019bal },
-          { 0x7ceb9da645cfac07l,0xe1ad3d25652dbd09l,0x086adf348d3b5d2bl,
-            0xf9256d8aec3654a0l },
-          0 },
-        /* 29 << 200 */
-        { { 0x571c246bf009a690l,0x8fe54231ccd90d3al,0x8adde6adfe173b79l,
-            0x75d9a392b05a5e3bl },
-          { 0x607f47b0d1bb3a84l,0xe4e3b472058e691al,0xfc0f793bf3d956e3l,
-            0x6a6730b605de54dal },
-          0 },
-        /* 31 << 200 */
-        { { 0x4daf7f540d80aaa1l,0xc571d04c229c4574l,0x469e2da5fffca53dl,
-            0x9fffe29513ff7f59l },
-          { 0x2075da5a33a254f7l,0x769f33acd35e575dl,0x7b940d2c3d35001al,
-            0x2d606b57e34c95b7l },
-          0 },
-        /* 33 << 200 */
-        { { 0xc7e4f8b899365f86l,0x8f6f959faae69527l,0x749ffedffdfaeeeal,
-            0x2b91f0221b54c2a0l },
-          { 0xe75c2352addbdf83l,0xe7329922fff2694cl,0xbb65ae06badadeacl,
-            0x16cbb9d1f56be3b5l },
-          0 },
-        /* 34 << 200 */
-        { { 0xb100a4c67a07bd70l,0x222fee7634787efel,0xa4dafc14f1e79d1bl,
-            0x0d3a82dad18b8be4l },
-          { 0xe0181445fc06922fl,0x0873d99b714a90b6l,0xdf43082fa5087a0el,
-            0x195e49367399e0dbl },
-          0 },
-        /* 35 << 200 */
-        { { 0x7e83545aae6fcc9cl,0x1a24fce819e15ce2l,0x4a3465c536d8c6a8l,
-            0xd1e5f24109436ae0l },
-          { 0xed334bfc6be463d5l,0xc46a600b934fbdcfl,0xbd2fd65b920321ffl,
-            0x953fa91767fa154el },
-          0 },
-        /* 36 << 200 */
-        { { 0x5dca4995f93ddad1l,0x061efcabf72470c2l,0xad78d54d5e7e0741l,
-            0xa91f4e839c4e0ab4l },
-          { 0xdd4403af5c75aa0dl,0x4308c8ee13c69113l,0x3a3b66f51ebc36adl,
-            0xc07cc3f0f4bf777al },
-          0 },
-        /* 37 << 200 */
-        { { 0x3fd1963e37a86b32l,0x22e236d60bd0880el,0xb87467cf89f0fa5cl,
-            0x85b9c6c0310e0265l },
-          { 0x82979a96783459ael,0xd19b0919bd529ed3l,0xa21f771808434f94l,
-            0x3dd130a9195369c6l },
-          0 },
-        /* 39 << 200 */
-        { { 0xc61e62767915d157l,0xc48244279e07fb0el,0x8980c1cc8420ea49l,
-            0x10d82e4a588d4e2bl },
-          { 0xdddecd52b17eff2dl,0xe44c7b2ded8492a4l,0x96ca89ebb9bea6afl,
-            0x724166fe1b03ed03l },
-          0 },
-        /* 40 << 200 */
-        { { 0xfc87975f8fb54738l,0x3516078827c3ead3l,0x834116d2b74a085al,
-            0x53c99a73a62fe996l },
-          { 0x87585be05b81c51bl,0x925bafa8be0852b7l,0x76a4fafda84d19a7l,
-            0x39a45982585206d4l },
-          0 },
-        /* 41 << 200 */
-        { { 0x8bbc484ed551f3e1l,0x6e058a90b7eb06d2l,0xfaccd9a0e5cd281al,
-            0xe7661b78d5b44900l },
-          { 0x03afe115725fde22l,0xbe929230c7229fd1l,0x5cd0d16a0000035el,
-            0x1f6a9df0c8f5a910l },
-          0 },
-        /* 43 << 200 */
-        { { 0xe54bbcfd535dfc82l,0x89be0b89a9012196l,0xa67831ee71011beal,
-            0x2ea7a8292db43878l },
-          { 0xff7c144378ffe871l,0xa67dc3d4c63f65eal,0xbbfc7fc2a1527419l,
-            0x6440380bf6c36b8fl },
-          0 },
-        /* 44 << 200 */
-        { { 0x71ab9f69d812d7e6l,0x2847c5516e142126l,0x9e27755bb31e7753l,
-            0xb89533e2943b8c7fl },
-          { 0xbe7f0c6e14fa7dc6l,0x782a06388cee1f7al,0x7069292938e13a6bl,
-            0x1e1221f0c63f4d28l },
-          0 },
-        /* 45 << 200 */
-        { { 0x9030aa9a63a431f4l,0x0fa7b5d45039a318l,0x6a0cf40af083687dl,
-            0x46689cec659fa752l },
-          { 0x8259727a456fa97el,0x4f618a355b08d7fcl,0x2c44217b72028d15l,
-            0x8083b09935111e32l },
-          0 },
-        /* 46 << 200 */
-        { { 0xaa5976523b5b29f1l,0xb07f10ab37432a54l,0x16e3e2236e36556fl,
-            0xf1c7c9bd47cd4586l },
-          { 0xa4eef99d3f87216dl,0x4e54d3c52e1eaa79l,0x534c5901d2540d91l,
-            0x718df7c9b6f0fcfcl },
-          0 },
-        /* 47 << 200 */
-        { { 0x99497f8a2eb0ee3bl,0x87e550c1caeb3a20l,0xd23e053dfb91627cl,
-            0xb971c043873124e6l },
-          { 0x3581ab853b16e467l,0x24541c926145187bl,0x4423ec5c010c2527l,
-            0x775f13029fa82a68l },
-          0 },
-        /* 48 << 200 */
-        { { 0x499b6ab65eb03c0el,0xf19b795472bc3fdel,0xa86b5b9c6e3a80d2l,
-            0xe43775086d42819fl },
-          { 0xc1663650bb3ee8a3l,0x75eb14fcb132075fl,0xa8ccc9067ad834f6l,
-            0xea6a2474e6e92ffdl },
-          0 },
-        /* 49 << 200 */
-        { { 0xbaebdd8a0c40aec4l,0x5eccafb563e8cfd0l,0x1c204c0eb5159938l,
-            0x607109d34b996aa9l },
-          { 0x024c6c4b9cef59fel,0xbc846e216ed4b6f1l,0xf6a50ff3ff652c0al,
-            0x368af2c72d95220cl },
-          0 },
-        /* 51 << 200 */
-        { { 0xec9c2e35cbd3ccafl,0xb9eeff3ddcda8f30l,0x82012e191062d02el,
-            0xed964cc94efc6b6el },
-          { 0x8853ea0a6bf54c22l,0xea40fcc0f3cbe264l,0x21f9c01ddecf114el,
-            0x05e754c63da71e59l },
-          0 },
-        /* 52 << 200 */
-        { { 0xe6a26d38046dfc72l,0x70409579c2175175l,0x2a575ac5d44e0c1dl,
-            0xb35395e01479ab5al },
-          { 0x1550a5d4f7bfbd8el,0x01daeb680778807bl,0xe0aa940321294dbal,
-            0x84bcdc8c5b5a93b7l },
-          0 },
-        /* 53 << 200 */
-        { { 0x876cc4d2520f04abl,0x6e320f5da85ff6a8l,0x7c504720ce17bc80l,
-            0xe7907079a62089f9l },
-          { 0xa45c4ac7bca45feel,0xd8f3facd5bd54b0cl,0xc0b036277b3e4a24l,
-            0xaabe96dfe4cd4b57l },
-          0 },
-        /* 55 << 200 */
-        { { 0xdc85a54773862ce4l,0x169051a3cc6f5d85l,0x8e3d3be0355f4df7l,
-            0xa139d6fac72bac76l },
-          { 0xddc95d0dfeb0a6f0l,0xd53f70e545cd6955l,0x18eede5e47e54112l,
-            0x4a135dc9cbc6a52el },
-          0 },
-        /* 57 << 200 */
-        { { 0x705a08ba90a58fb4l,0x10eef880fb3f8a64l,0x4ced9ba2f8e585ffl,
-            0xb4f0f955fc6ebef5l },
-          { 0x152c1a338d8b739el,0xb2be701db495bee5l,0xd27141a8d3540a74l,
-            0x20c8a00247f9e9d7l },
-          0 },
-        /* 59 << 200 */
-        { { 0x6d5ae921f5adcb3fl,0xaed1047003a3b610l,0x7c75e36f22256df9l,
-            0xe664b36fb97dae99l },
-          { 0x138b5eca91e746ael,0xb3e01ef5648674a7l,0xa3f256da9e375c74l,
-            0xa00e82bc6a82d6f3l },
-          0 },
-        /* 60 << 200 */
-        { { 0xe7a01eae6e28b4a8l,0xb3bf8224782166c9l,0x0b7ba2a06a244510l,
-            0x9751a69c2abbb4dbl },
-          { 0xb611adc1b3f9fcbcl,0x1d08eb3b436c4675l,0x1c71e98a20f96a64l,
-            0x33d9b58c7ffd3f08l },
-          0 },
-        /* 61 << 200 */
-        { { 0x7c7b03c1affa2d6cl,0x5f189bb9aec6e624l,0xe77a1eedadeff5e7l,
-            0xfc58b90f4280b467l },
-          { 0x561e5d579b71cb4el,0x8ed767aa36d6a17el,0x38d8671e8aa9e188l,
-            0x7bc68f07a95350c0l },
-          0 },
-        /* 63 << 200 */
-        { { 0xe0cd38cf98c01384l,0xc6741123a4226d9fl,0xdd1d42dbf877a0b8l,
-            0xc5986ef0110b3cbal },
-          { 0xeba949f809c8cebel,0x96b47bc4bd39f1dcl,0xbad140b6e07a2a3cl,
-            0x2a8d80999ac5ca8al },
-          0 },
-        /* 64 << 200 */
-        { { 0x39d934abd3c095f1l,0x04b261bee4b76d71l,0x1d2e6970e73e6984l,
-            0x879fb23b5e5fcb11l },
-          { 0x11506c72dfd75490l,0x3a97d08561bcf1c1l,0x43201d82bf5e7007l,
-            0x7f0ac52f798232a7l },
-          0 },
-        /* 65 << 200 */
-        { { 0x8cf27618590ca850l,0x58134f6f44bb94f2l,0x0a147562b78b4eecl,
-            0x2e5986e39f1ed647l },
-          { 0x9becf893348393b0l,0xaea21b92c31c2a86l,0x3d69859e5ff1b9a6l,
-            0x6fcd19f4cd805691l },
-          0 },
-        /* 71 << 200 */
-        { { 0x81619bd4841f43c3l,0x3a3325538e5c61f0l,0x2b68921eda862151l,
-            0x97f5c8a741a491f8l },
-          { 0x8b452094d3b9afa0l,0x93b2b7b4f2124dbcl,0x53285e7d26e0e26dl,
-            0x3f003fc5c8a24edel },
-          0 },
-        /* 77 << 200 */
-        { { 0x4cdabb586c025824l,0x5935ad1586bfcd7dl,0x8ce2c3101b7c5533l,
-            0x761c9fe96cae8808l },
-          { 0x8a0723f5d9e66d70l,0xb640b323dcced11dl,0x5768528051ae548cl,
-            0x83576f75d53f3f2cl },
-          0 },
-        /* 83 << 200 */
-        { { 0xc715edc47b532ec3l,0x159765e6c4a6e14bl,0x4a74f15228cd2d45l,
-            0xbfd309edae8c753bl },
-          { 0xf56bb5315d6d5245l,0x2c89c21833b30a55l,0xe436141acd4ed5fal,
-            0x7eb7a5c707868ee6l },
-          0 },
-        /* 89 << 200 */
-        { { 0x9a3ad3ffb0c7c48cl,0x25e8d977738e3638l,0xbb6c6c9d1c024074l,
-            0xeda1ac0f8cfdf416l },
-          { 0x93059ba538de49e2l,0xdb199cfc1b9ce741l,0x49b05e9446f3b494l,
-            0x717cafc606480902l },
-          0 },
-        /* 95 << 200 */
-        { { 0x8d27421052885708l,0x9d2297fd74e5b9b5l,0xe7cb6a68dc4d7318l,
-            0x0b60b0d276357b31l },
-          { 0x57301994532c2095l,0xfbae2ba203373452l,0xe8020b20ba700583l,
-            0x1ca7772c2988919cl },
-          0 },
-        /* 101 << 200 */
-        { { 0x723296eb918f3eecl,0x358c9ff0b79901c6l,0x64a1934c8d5e814cl,
-            0x7e5a9afced165177l },
-          { 0xd783840168733e7al,0xfcf3c0b6f61ede6dl,0x94ec0bf08434e804l,
-            0xa5a70153c192c1cdl },
-          0 },
-        /* 107 << 200 */
-        { { 0x03cdf976c23e49d4l,0x51e5cfa5a2ae72d5l,0x7716faa3100f7a51l,
-            0xc53153a2c14dc015l },
-          { 0xe7c69b052b47ec18l,0xff4756907ea93b01l,0x55fde3c540a2f205l,
-            0x0263d0b12f85aed6l },
-          0 },
-        /* 113 << 200 */
-        { { 0x668c56619686fe30l,0x382a8ccd8f73a476l,0xda012cbfb40a85e7l,
-            0x55ea1e72e9e88b91l },
-          { 0x8312556088cc5afcl,0x44ae54cbc45b19c7l,0xc91fffa8f86a02cdl,
-            0xc79f573752d7e89bl },
-          0 },
-        /* 116 << 200 */
-        { { 0x652b50523e357579l,0x08ce7d3a2afe5746l,0x9dc1cca6f71a12efl,
-            0x80a221c24f6c4196l },
-          { 0xdde40eff0f49f508l,0x7995bb46913b0dc3l,0x4adbdeb385e44f6el,
-            0x6816bb3ab222e4bbl },
-          0 },
-        /* 119 << 200 */
-        { { 0xce1ee518579a1a4dl,0x5d86e8912bc3870al,0x230878d18da907c4l,
-            0xc648392777ae7ea8l },
-          { 0x64319653016c0ad7l,0x7cbfa0b0b71f20dal,0xbf087dc3395ed4d8l,
-            0x59512add307d218dl },
-          0 },
-        /* 125 << 200 */
-        { { 0x7378a969d8ae335el,0x11c69965506d3a42l,0x212539769949468al,
-            0x570cf87e64995050l },
-          { 0xf300ad2e30b94e22l,0xbc159cf8f36dad32l,0xdff3b3767ca8aa6al,
-            0xa5de93b5627fb9e7l },
-          0 },
-    },
-    {
-        /* 0 << 208 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 208 */
-        { { 0x75d9bc15adf7cccfl,0x81a3e5d6dfa1e1b0l,0x8c39e444249bc17el,
-            0xf37dccb28ea7fd43l },
-          { 0xda654873907fba12l,0x35daa6da4a372904l,0x0564cfc66283a6c5l,
-            0xd09fa4f64a9395bfl },
-          0 },
-        /* 3 << 208 */
-        { { 0xc51aa29e5cfe5c48l,0x82c020ae815ee096l,0x7848ad827549a68al,
-            0x7933d48960471355l },
-          { 0x04998d2e67c51e57l,0x0f64020ad9944afcl,0x7a299fe1a7fadac6l,
-            0x40c73ff45aefe92cl },
-          0 },
-        /* 4 << 208 */
-        { { 0xe5f649be9d8e68fdl,0xdb0f05331b044320l,0xf6fde9b3e0c33398l,
-            0x92f4209b66c8cfael },
-          { 0xe9d1afcc1a739d4bl,0x09aea75fa28ab8del,0x14375fb5eac6f1d0l,
-            0x6420b560708f7aa5l },
-          0 },
-        /* 5 << 208 */
-        { { 0xbf44ffc75488771al,0xcb76e3f17f2f2191l,0x4197bde394f86a42l,
-            0x45c25bb970641d9al },
-          { 0xd8a29e31f88ce6dcl,0xbe2becfd4bb7ac7dl,0x13094214b5670cc7l,
-            0xe90a8fd560af8433l },
-          0 },
-        /* 7 << 208 */
-        { { 0x0ecf9b8b4ebd3f02l,0xa47acd9d86b770eal,0x93b84a6a2da213cel,
-            0xd760871b53e7c8cfl },
-          { 0x7a5f58e536e530d7l,0x7abc52a51912ad51l,0x7ad43db02ea0252al,
-            0x498b00ecc176b742l },
-          0 },
-        /* 9 << 208 */
-        { { 0x9ff713ef888ae17fl,0x6007f68fb34b7bebl,0x5d2b18983b653d64l,
-            0xcbf73e91d3ca4b1bl },
-          { 0x4b050ad56cdfb3a1l,0x41bd3ec3d1f833a4l,0x78d7e2ee719d7bf5l,
-            0xea4604672a27412el },
-          0 },
-        /* 10 << 208 */
-        { { 0x7dad6d1b42cd7900l,0xb6e6b439e058f8a4l,0x8836f1e662aa3bbcl,
-            0xd45bf2c811142b0al },
-          { 0xae324bac3c045ed1l,0x372be24d270a8333l,0xeeda7a3a6b7c73b6l,
-            0xf6675402db49562al },
-          0 },
-        /* 11 << 208 */
-        { { 0xc312ba68441e760dl,0x84d0d061a50e512el,0xfe764f4e4bbdd849l,
-            0xa924adcf9dadd5c0l },
-          { 0x08685961debfe976l,0xd3d846c529fba601l,0x43bf8227dc3f4040l,
-            0x05e767b8a49e9ff5l },
-          0 },
-        /* 13 << 208 */
-        { { 0xc4689c309953e453l,0x5e355a2e1712dca5l,0x1ff83c81f1cd96f7l,
-            0xb06b89fb44cf56dbl },
-          { 0x1827705365f16e0dl,0x6403b91de5618672l,0xba3f9475be384bc6l,
-            0x7f691cbe303ce5f3l },
-          0 },
-        /* 15 << 208 */
-        { { 0x4589ba03210f4045l,0xd5e7366301e8012al,0x1c26052d74462ffal,
-            0xe78f600c4f989519l },
-          { 0xc63ca0c97cee0b2fl,0xbe588573af760b5fl,0x05906fc4593773cdl,
-            0xd5970fb0e322d5afl },
-          0 },
-        /* 16 << 208 */
-        { { 0x103c46e60ebcf726l,0x4482b8316231470el,0x6f6dfaca487c2109l,
-            0x2e0ace9762e666efl },
-          { 0x3246a9d31f8d1f42l,0x1b1e83f1574944d2l,0x13dfa63aa57f334bl,
-            0x0cf8daed9f025d81l },
-          0 },
-        /* 17 << 208 */
-        { { 0xf67c098aae0690aal,0x1a4656422b7bc62bl,0xaffc6b917220dea2l,
-            0xd97ac543d2552deel },
-          { 0x1f84514a7e816b8el,0xe9887e81a8f38552l,0x2e6358e6847ad46bl,
-            0x1f67871e6bc9895el },
-          0 },
-        /* 19 << 208 */
-        { { 0x2462b6e0d47f43fal,0x71db3610d8a245e5l,0x0c26b0e734208974l,
-            0x0cd6d49d2029bd2el },
-          { 0xf207c9f6091922b8l,0x0c476c5c7f0fbf66l,0x6de7efb2295d6da8l,
-            0xea054ee10ced6cfel },
-          0 },
-        /* 21 << 208 */
-        { { 0xd21496e3e9bd795cl,0xf293f617c6a557del,0x9d041b7239a45642l,
-            0xe8353dab4ac87f80l },
-          { 0x21e9f35620d8d019l,0x1f4adca9d2fb2668l,0xe5f68227dfecd64al,
-            0x10d71b79d7f09ec0l },
-          0 },
-        /* 23 << 208 */
-        { { 0xca3f068999f87118l,0x99a933911b2417f0l,0xa383481a3d1f70e5l,
-            0x7a31a6c833b14414l },
-          { 0x9d60f4368b2a9931l,0xd4c97ded80588534l,0x7cb29e82ab6a8bdal,
-            0x3799bdad97b4c45al },
-          0 },
-        /* 25 << 208 */
-        { { 0x51da0ff629011af3l,0xcbb03c809a4f0855l,0xea3536725555b10bl,
-            0x4bf94e025c7da97el },
-          { 0x384352f5ff713300l,0xb2c2b675192d41e6l,0x4ff66861625ca046l,
-            0xf0f5e472013dddc4l },
-          0 },
-        /* 27 << 208 */
-        { { 0x38c44cdc59987914l,0xad7f2829757fb853l,0x9aabf1c8688e3342l,
-            0xbe0f1e4ef534c850l },
-          { 0x732cac652ec24ecal,0x9328b657933bb5e4l,0xe2747ff60bb31033l,
-            0xdbaab72cfcdc36acl },
-          0 },
-        /* 28 << 208 */
-        { { 0x0e5e3049a639fc6bl,0xe75c35d986003625l,0x0cf35bd85dcc1646l,
-            0x8bcaced26c26273al },
-          { 0xe22ecf1db5536742l,0x013dd8971a9e068bl,0x17f411cb8a7909c5l,
-            0x5757ac98861dd506l },
-          0 },
-        /* 29 << 208 */
-        { { 0xaf410d5aac66a3e8l,0x39fcbffb2031f658l,0xd29e58c947ce11fbl,
-            0x7f0b874965f73e49l },
-          { 0xedc30f4b27fea6c6l,0xe03b9103d2baa340l,0xa7bb3f17ae680612l,
-            0xe06656a8197af6f0l },
-          0 },
-        /* 31 << 208 */
-        { { 0x84562095bff86165l,0x994194e916bc7589l,0xb1320c7ec14c6710l,
-            0x508a8d7f766e978fl },
-          { 0xd04adc9ec7e1f6fel,0x7bafaff68398cecfl,0x906df2fccef3b934l,
-            0xc65afe18f3008c38l },
-          0 },
-        /* 33 << 208 */
-        { { 0x477ffeeeab983130l,0x5426363a96e83d55l,0xcf0370a15204af42l,
-            0x99834414b5a6ea8fl },
-          { 0xf475ba711ab4ee8al,0x8486da5d0102d8f2l,0x55082e713839c821l,
-            0xa57e58395b65defal },
-          0 },
-        /* 34 << 208 */
-        { { 0x34b2185bbbb33a76l,0x189038b7d48158c2l,0xfa32eb90e9e90217l,
-            0x79271771730e74dfl },
-          { 0x315ed8c2a5d01ffdl,0x9799dae723e6a95el,0x40070aa016f5715al,
-            0x40e6c0ca5ea51f8cl },
-          0 },
-        /* 35 << 208 */
-        { { 0x099c0570d8132163l,0xcd5508a3023dbbf3l,0x18162ff526bfe6a6l,
-            0xf39e071144bbb455l },
-          { 0x49664996eaa3cf96l,0x1c6442d5e2649be9l,0x6199f740c01d269dl,
-            0x4be605ee37542c11l },
-          0 },
-        /* 36 << 208 */
-        { { 0xc7313e9cf36658f0l,0xc433ef1c71f8057el,0x853262461b6a835al,
-            0xc8f053987c86394cl },
-          { 0xff398cdfe983c4a1l,0xbf5e816203b7b931l,0x93193c46b7b9045bl,
-            0x1e4ebf5da4a6e46bl },
-          0 },
-        /* 37 << 208 */
-        { { 0xd032fbfd0dbf82b4l,0x707181f668e58969l,0xef434381e7be2d5el,
-            0x290669176f2c64ddl },
-          { 0xf66cffc3772769abl,0x68d8a76a17aad01cl,0xdd3991c590f6e078l,
-            0xdb74db06ea4ac7dcl },
-          0 },
-        /* 39 << 208 */
-        { { 0x9f34a7c11c78be71l,0x7bf2f2d149ca6987l,0xb528a514dcd34afcl,
-            0x4dddb3f1183a68b1l },
-          { 0x54d2626660b83883l,0x9073e4e0e0cd8dadl,0xbd2b837d9eb818b2l,
-            0x5fa5f9086ae2e32dl },
-          0 },
-        /* 40 << 208 */
-        { { 0xf9942a6043a24fe7l,0x29c1191effb3492bl,0x9f662449902fde05l,
-            0xc792a7ac6713c32dl },
-          { 0x2fd88ad8b737982cl,0x7e3a0319a21e60e3l,0x09b0de447383591al,
-            0x6df141ee8310a456l },
-          0 },
-        /* 41 << 208 */
-        { { 0xcd02ba1e0df98a64l,0x301b6bfa03f5676el,0x41e1a8d4a2fe4090l,
-            0x489c1cbf47f0e1dcl },
-          { 0x4171a98c20760847l,0xdcb21cee77af4796l,0x5fb0f0c9d0b7e981l,
-            0x4c2791dff33b9f8dl },
-          0 },
-        /* 43 << 208 */
-        { { 0x95d7ec0c50420a50l,0x5794665c2a6756d5l,0x73558c6e9101e7f5l,
-            0xa3fa0f8c1642af0el },
-          { 0xa11b309b4ee43551l,0x3939de30cb8fc712l,0x9710f2320fde8921l,
-            0x2a4db2d5cae8b41cl },
-          0 },
-        /* 44 << 208 */
-        { { 0xaec1a039e6d6f471l,0x14b2ba0f1198d12el,0xebc1a1603aeee5acl,
-            0x401f4836e0b964cel },
-          { 0x2ee437964fd03f66l,0x3fdb4e49dd8f3f12l,0x6ef267f629380f18l,
-            0x3e8e96708da64d16l },
-          0 },
-        /* 45 << 208 */
-        { { 0xdf6cdac0bc4c78adl,0xbe9e32182e97376el,0xa37f9d8b1a139274l,
-            0x7640c3982807128el },
-          { 0xe9735166c05b5f85l,0xbccd3675100e5716l,0x51376a293e5c9682l,
-            0x95efe088848f6aeal },
-          0 },
-        /* 46 << 208 */
-        { { 0xfac2d7dd23d14105l,0xdda17149a9136f52l,0xb9f3a9c672d1a99bl,
-            0x2fcf532a142c3b20l },
-          { 0xc2731f1e61190c1bl,0x26dbe810a76509e4l,0xc96cc431908bb92fl,
-            0x5661a84d80e3e694l },
-          0 },
-        /* 47 << 208 */
-        { { 0x5194d144150ba121l,0x8de57c48b6b11561l,0x803228da96c156d9l,
-            0x2112e4250a8f6376l },
-          { 0x15436294643449ffl,0xfc3880add4118cd0l,0x16ed90731e3f7413l,
-            0xa400699901d38d6dl },
-          0 },
-        /* 48 << 208 */
-        { { 0xbc19180c207674f1l,0x112e09a733ae8fdbl,0x996675546aaeb71el,
-            0x79432af1e101b1c7l },
-          { 0xd5eb558fde2ddec6l,0x81392d1f5357753fl,0xa7a76b973ae1158al,
-            0x416fbbff4a899991l },
-          0 },
-        /* 49 << 208 */
-        { { 0xf84c9147c52d7384l,0x86391accec01efa6l,0xffd68616f9c6f3f4l,
-            0xc7536461b17c2de6l },
-          { 0xa81f4ba10121abdfl,0xa068a2e26f6eae27l,0xe0ee90350eb159f0l,
-            0x4c48f761fd8c4b9cl },
-          0 },
-        /* 51 << 208 */
-        { { 0x4b6d71e87790000cl,0xced195744ce9293el,0xc25626a3747585e8l,
-            0xb8307d22d7044270l },
-          { 0xf08e7ef6117c24cbl,0xae6403162f660d04l,0xbc3ffdcff224a2fdl,
-            0x1ebc0328d0586c7el },
-          0 },
-        /* 52 << 208 */
-        { { 0x9e65fdfd0d4a9dcfl,0x7bc29e48944ddf12l,0xbc1a92d93c856866l,
-            0x273c69056e98dfe2l },
-          { 0x69fce418cdfaa6b8l,0x606bd8235061c69fl,0x42d495a06af75e27l,
-            0x8ed3d5056d873a1fl },
-          0 },
-        /* 53 << 208 */
-        { { 0x46b160e5a6022278l,0x86b1d50cc30a51fcl,0xe898ac0e684b81b7l,
-            0x04d591e277b93597l },
-          { 0xd20cac347626e18al,0xb49c941f0a968733l,0x054e6e7e21631627l,
-            0xd6d33db9d4c716b1l },
-          0 },
-        /* 55 << 208 */
-        { { 0xaa79ab4bf91e9b75l,0x7df3235bd34d961dl,0x9f3954e6534a40e1l,
-            0x80f88d2c790b4456l },
-          { 0x98f7711b21e9fb2al,0x0a04c318877d27e6l,0x499b7c2412338848l,
-            0x0b1dbe9ccd5e7ec3l },
-          0 },
-        /* 57 << 208 */
-        { { 0xb430ff44e04715ffl,0x671358d565d076d0l,0x3946d38f22c3aa06l,
-            0x80919ea363b2d627l },
-          { 0x14ffa219e8790922l,0xfe1d895ae8d89c48l,0x717e9e51748e806el,
-            0xb91e1ddf550d711dl },
-          0 },
-        /* 59 << 208 */
-        { { 0x8aac26225f540127l,0x57cd5d7cba25f742l,0x87006a6b1df7a0fcl,
-            0x88e9ab863ecbf26cl },
-          { 0xe1b8155f9143b314l,0xc00196130b679bddl,0x819e7b61a1871d07l,
-            0xc36e7892cc2c9cc9l },
-          0 },
-        /* 60 << 208 */
-        { { 0x4b03c55b8e33787fl,0xef42f975a6384673l,0xff7304f75051b9f0l,
-            0x18aca1dc741c87c2l },
-          { 0x56f120a72d4bfe80l,0xfd823b3d053e732cl,0x11bccfe47537ca16l,
-            0xdf6c9c741b5a996bl },
-          0 },
-        /* 61 << 208 */
-        { { 0x65729b05301ee370l,0x3ed09a2a24c2824cl,0x781ef66a33481977l,
-            0xf2ccdeec193506d0l },
-          { 0x92b4f70d703422d6l,0x7f004a43f80a1b99l,0x47db23607a856445l,
-            0x783a8dd1ce5b0622l },
-          0 },
-        /* 63 << 208 */
-        { { 0x7febefd34e9aac5al,0x601c89e2bdd6173el,0x79b08930c257431el,
-            0x915d601d399ee099l },
-          { 0xfa48347eca02acd2l,0xc33249baeeb7ccedl,0xd76e408755704722l,
-            0xd3709c600dcf4878l },
-          0 },
-        /* 64 << 208 */
-        { { 0xee7332c7904fc3fal,0x14a23f45c7e3636al,0xc38659c3f091d9aal,
-            0x4a995e5db12d8540l },
-          { 0x20a53becf3a5598al,0x56534b17b1eaa995l,0x9ed3dca4bf04e03cl,
-            0x716c563ad8d56268l },
-          0 },
-        /* 65 << 208 */
-        { { 0x963353201580f3adl,0x6c495304b0cd50d4l,0xd035cdc7555ff981l,
-            0xe65cd063c6b6bdfbl },
-          { 0x7deb3cbb437e749cl,0xa9de9f3db5dc24a1l,0xe2e76a2b35c29ffal,
-            0x4d35e261323ba650l },
-          0 },
-        /* 71 << 208 */
-        { { 0x52c46fc8c89e2766l,0x7330b02bb945e5f2l,0xc77ef75c2673ebbcl,
-            0x1740e72657c33783l },
-          { 0xf0312d29623565fbl,0xff9f707af0ca1ed9l,0xb98609ca5ea51a4al,
-            0xde86b9a87b5cc91fl },
-          0 },
-        /* 77 << 208 */
-        { { 0x0dece4badca158b7l,0x5e39baf6a3e9f837l,0xcf14e6dc4d57b640l,
-            0x0548aaa4b67bcbe7l },
-          { 0xb6cf5b393c90e434l,0xf8b3c5645006f3abl,0xa74e92859bf04bd9l,
-            0xf59a3a6bf99c8977l },
-          0 },
-        /* 83 << 208 */
-        { { 0x652ca66ac5b072d5l,0x2102b55993ad4928l,0x1b5f192d88210f9bl,
-            0xb18710144c6ad7e5l },
-          { 0x3979fde3bc0abf13l,0xb5cb4c7dac3fd631l,0x4aedffa6c200ec7bl,
-            0x8aed81ceaddf3610l },
-          0 },
-        /* 89 << 208 */
-        { { 0x72b48105abeefbael,0x0e9e6e41827bb22bl,0xf45ada151e52a848l,
-            0xb8e94579534867a2l },
-          { 0x3a08773b7adb0fdcl,0xe7133a28b83316dfl,0xc8b7b08c5bb41470l,
-            0x28719eb4aaf140c7l },
-          0 },
-        /* 95 << 208 */
-        { { 0x398996cd430007cel,0x20d8c0e07642d616l,0x81566639a7eb2397l,
-            0x74aa0b692e133732l },
-          { 0x326745907ba80aa7l,0x56a491c39bd69d64l,0xc8c8b040e54dcce0l,
-            0x3f991872d571d037l },
-          0 },
-        /* 101 << 208 */
-        { { 0x70e681fa4fb595c9l,0xf0635d6386b4d97bl,0xfc029284c1347081l,
-            0x5a4e9cbe4fee0303l },
-          { 0xd43da8609c31094fl,0x0412cfed6515b4aal,0x10fc06da8d53be86l,
-            0x4b7b380b4bccc94dl },
-          0 },
-        /* 107 << 208 */
-        { { 0x560d57408e7d6738l,0xa82268a8937f12a2l,0x87787b2d3d95b463l,
-            0xb36539b2030e23bfl },
-          { 0x60d16b8fd61e761dl,0x96ba2949fe8efccdl,0x8c170eda667fa7ebl,
-            0xc880d74cf800d7c3l },
-          0 },
-        /* 113 << 208 */
-        { { 0x7c05d6c1efcbfea0l,0xae7ba3291a2f6dd8l,0x521598ed5bd42ecfl,
-            0x58e07842ef0ab40cl },
-          { 0xae65105f66c752a5l,0x4910fba45f99d499l,0xbfdaf5fce9e44357l,
-            0x6aaf4053796ee5b6l },
-          0 },
-        /* 116 << 208 */
-        { { 0xf58fecb16f640f62l,0xe274b92b39f51946l,0x7f4dfc046288af44l,
-            0x0a91f32aeac329e5l },
-          { 0x43ad274bd6aaba31l,0x719a16400f6884f9l,0x685d29f6daf91e20l,
-            0x5ec1cc3327e49d52l },
-          0 },
-        /* 119 << 208 */
-        { { 0x615ac02527ba93edl,0x0d43915d3556ef47l,0x8c739fd1cb0cda89l,
-            0xa2318169625f7a16l },
-          { 0x17d486113e0479cel,0x814beb6038ee541el,0x09c9807fb98ef355l,
-            0x4ad3668752d07af6l },
-          0 },
-        /* 125 << 208 */
-        { { 0x5c1f42e444f3f568l,0xd743b7c078fb409bl,0xe09edccb6224362cl,
-            0x7f13d140c5fe872cl },
-          { 0x85e8cb88f403c0ebl,0x918a231b688d20a0l,0xc65b7ab9f246c73fl,
-            0xda743fbf76dbd6adl },
-          0 },
-    },
-    {
-        /* 0 << 216 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 216 */
-        { { 0xa0158eeae457a477l,0xd19857dbee6ddc05l,0xb326522418c41671l,
-            0x3ffdfc7e3c2c0d58l },
-          { 0x3a3a525426ee7cdal,0x341b0869df02c3a8l,0xa023bf42723bbfc8l,
-            0x3d15002a14452691l },
-          0 },
-        /* 3 << 216 */
-        { { 0xf3cae7e9262a3539l,0x78a49d1d6670d59el,0x37de0f63c1c5e1b9l,
-            0x3072c30c69cb7c1cl },
-          { 0x1d278a5277c850e6l,0x84f15f8f1f6a3de6l,0x46a8bb45592ca7adl,
-            0x1912e3eee4d424b8l },
-          0 },
-        /* 4 << 216 */
-        { { 0x6ba7a92079e5fb67l,0xe1331feb70aa725el,0x5080ccf57df5d837l,
-            0xe4cae01d7ff72e21l },
-          { 0xd9243ee60412a77dl,0x06ff7cacdf449025l,0xbe75f7cd23ef5a31l,
-            0xbc9578220ddef7a8l },
-          0 },
-        /* 5 << 216 */
-        { { 0xdc988086365e668bl,0xada8dcdaaabda5fbl,0xbc146b4c255f1fbel,
-            0x9cfcde29cf34cfc3l },
-          { 0xacbb453e7e85d1e4l,0x9ca09679f92358b5l,0x15fc2d96240823ffl,
-            0x8d65adf70c11d11el },
-          0 },
-        /* 7 << 216 */
-        { { 0x775557f10296f4fdl,0x1dca76a3ea51b436l,0xf3e98f60fb950805l,
-            0x31ff32ea831cf7f1l },
-          { 0x643e7bf18d2c714bl,0x64b5c3392e9d2acal,0xa9fd9ccc6adc2d23l,
-            0xfc2397eccc721b9bl },
-          0 },
-        /* 9 << 216 */
-        { { 0xf031182db48ec57dl,0x515d32f804b233b9l,0x06bbb1d4093aad26l,
-            0x88a142fe0d83d1ecl },
-          { 0x3b95c099245c73f8l,0xb126d4af52edcd32l,0xf8022c1e8fcb52e6l,
-            0x5a51ac4c0106d339l },
-          0 },
-        /* 10 << 216 */
-        { { 0xc589e1ce44ace150l,0xe0f8d3d94381e97cl,0x59e99b1162c5a4b8l,
-            0x90d262f7fd0ec9f9l },
-          { 0xfbc854c9283e13c9l,0x2d04fde7aedc7085l,0x057d776547dcbecbl,
-            0x8dbdf5919a76fa5fl },
-          0 },
-        /* 11 << 216 */
-        { { 0xb7f70a1a7c64a054l,0x0dc1c0df9db43e79l,0x6d0a4ae251fe63d6l,
-            0xe0d5e3327f0c8abfl },
-          { 0xff5500362b7ecee8l,0x3ea0e6f75d055008l,0x30deb62ff24ac84fl,
-            0x936969fd5d7116b7l },
-          0 },
-        /* 13 << 216 */
-        { { 0x02da76122617cf7fl,0xd6e25d4eeee35260l,0xb2fa5b0afd3533e9l,
-            0xe76bb7b0b9126f88l },
-          { 0x692e6a9988856866l,0x3fdf394f49db65cal,0x2529699122d8d606l,
-            0xe815bfbf3dd7c4cfl },
-          0 },
-        /* 15 << 216 */
-        { { 0x69c984ed4d844e7fl,0xd354b2174a2e8a82l,0x25bd4addfb2c4136l,
-            0xf72df4de144b26e1l },
-          { 0xd0aa9db0e6101afdl,0x4445efaae49bd1b8l,0x5dc54eee331593b2l,
-            0xfa35e3b9094bf10bl },
-          0 },
-        /* 16 << 216 */
-        { { 0xdb567d6ac42bd6d2l,0x6df86468bb1f96ael,0x0efe5b1a4843b28el,
-            0x961bbb056379b240l },
-          { 0xb6caf5f070a6a26bl,0x70686c0d328e6e39l,0x80da06cf895fc8d3l,
-            0x804d8810b363fdc9l },
-          0 },
-        /* 17 << 216 */
-        { { 0x660a0f893ea089c3l,0xa25823aac9009b09l,0xb2262d7ba681f5e5l,
-            0x4fc30c8c3413863al },
-          { 0x691544b7c32059f7l,0xf65cf276b21c6134l,0xe3a96b2a5104dabal,
-            0xbb08d109a43ee42fl },
-          0 },
-        /* 19 << 216 */
-        { { 0x85a52d69f9916861l,0x595469a4da4fa813l,0x1dd7786e3338502fl,
-            0x34b8ef2853963ac5l },
-          { 0xc0f019f81a891b25l,0xb619970c4f4bd775l,0x8c2a5af3be19f681l,
-            0x9463db0498ec1728l },
-          0 },
-        /* 21 << 216 */
-        { { 0xeb62c27801f39eabl,0x27de39340ab3a4aal,0xfbd17520a982ca8dl,
-            0x58817ec2e4bdc6edl },
-          { 0x312d78de31c6ac13l,0x9483bf7609202ea6l,0xf64ab8b622c6d8e1l,
-            0xdddf589ce580de74l },
-          0 },
-        /* 23 << 216 */
-        { { 0xe0fa3336ee98a92al,0x7d80eeef66a4d745l,0xb612531bba0119d3l,
-            0x86e770c1b351fe15l },
-          { 0xafbad6f882d5a397l,0x1e5f1cb80dbf0110l,0x25138ac09f79063dl,
-            0x089ed22f2746a156l },
-          0 },
-        /* 25 << 216 */
-        { { 0x198d1b5d7d8b8ddel,0xf32c11078dab37fbl,0xf15fcb6d42b93874l,
-            0x91ddb74f41f94f84l },
-          { 0x6a64540a271524b2l,0x950a0c12758b5a64l,0xf9f237933dce9580l,
-            0xc8edd0ab2cf8ce32l },
-          0 },
-        /* 27 << 216 */
-        { { 0xefc6357eae1046b7l,0xe6704929612932e4l,0xa20305d4b1355b17l,
-            0x88a9136a58b4a156l },
-          { 0xbc379985b4d275ecl,0x718b91316eaf338bl,0x61229a7ad152a509l,
-            0x1109f7c445157ae9l },
-          0 },
-        /* 28 << 216 */
-        { { 0xcf197ca7fb8088fal,0x014272474ddc96c5l,0xa2d2550a30777176l,
-            0x534698984d0cf71dl },
-          { 0x6ce937b83a2aaac6l,0xe9f91dc35af38d9bl,0x2598ad83c8bf2899l,
-            0x8e706ac9b5536c16l },
-          0 },
-        /* 29 << 216 */
-        { { 0x2bde42140df85c2cl,0x4fb839f4058a7a63l,0x7c10572a47f51231l,
-            0x878826231989824el },
-          { 0xa8293d2016e1564al,0xcb11c0f818c04576l,0x83b91e7d9740c631l,
-            0xbdcb23d0cbffcea0l },
-          0 },
-        /* 31 << 216 */
-        { { 0x64bdfd2a9094bfc8l,0x8558acc60fc54d1el,0x3992848faf27721el,
-            0x7a8fcbdaa14cd009l },
-          { 0x6de6120900a4b9c2l,0xbd192b1b20cf8f28l,0x2356b90168d9be83l,
-            0xce1e7a944a49a48al },
-          0 },
-        /* 33 << 216 */
-        { { 0x7630103b6ac189b9l,0x15d35edc6f1f5549l,0x9051799d31cb58edl,
-            0xb4f32694a7a8579el },
-          { 0x6f037435f2abe306l,0xf0595696410fb2f7l,0x2a0d347a5cc98f59l,
-            0x9c19a9a87e3bbd69l },
-          0 },
-        /* 34 << 216 */
-        { { 0x87f8df7c0e58d493l,0xb1ae5ed058b73f12l,0xc368f784dea0c34dl,
-            0x9bd0a120859a91a0l },
-          { 0xb00d88b7cc863c68l,0x3a1cc11e3d1f4d65l,0xea38e0e70aa85593l,
-            0x37f13e987dc4aee8l },
-          0 },
-        /* 35 << 216 */
-        { { 0x91dbe00e49430cd2l,0xcc67c0b17aa8ef6bl,0x769985b8a273f1a5l,
-            0x358371dc360e5dafl },
-          { 0xbf9b9127d6d8b5e8l,0x748ae12cb45588c1l,0x9c609eb556076c58l,
-            0xf287489109733e89l },
-          0 },
-        /* 36 << 216 */
-        { { 0x10d38667bc947badl,0x738e07ce2a36ee2el,0xc93470cdc577fcacl,
-            0xdee1b6162782470dl },
-          { 0x36a25e672e793d12l,0xd6aa6caee0f186dal,0x474d0fd980e07af7l,
-            0xf7cdc47dba8a5cd4l },
-          0 },
-        /* 37 << 216 */
-        { { 0xceb6aa80f8a08fddl,0xd98fc56f46fead7bl,0xe26bd3f8b07b3f1fl,
-            0x3547e9b99d361c3el },
-          { 0x1a89f802e94b8eccl,0x2210a590c0a40ef2l,0xe7e5b965afc01bf2l,
-            0xca3d57fe234b936bl },
-          0 },
-        /* 39 << 216 */
-        { { 0x9230a70db9f9e8cdl,0xa63cebfcb81ba2ecl,0x8482ca87a8f664d6l,
-            0xa8ae78e00b137064l },
-          { 0xb787bd558384c687l,0xfde1d1bdb29ae830l,0xc4a9b2e39f0b7535l,
-            0x7e6c9a15efde2d01l },
-          0 },
-        /* 40 << 216 */
-        { { 0x7d2e5c054f7269b1l,0xfcf30777e287c385l,0x10edc84ff2a46f21l,
-            0x354417574f43fa36l },
-          { 0xf1327899fd703431l,0xa438d7a616dd587al,0x65c34c57e9c8352dl,
-            0xa728edab5cc5a24el },
-          0 },
-        /* 41 << 216 */
-        { { 0xcd6e6db872896d4fl,0x324afa99896c4640l,0x37d18c3d33a292bdl,
-            0x98dba3b44143421fl },
-          { 0x2406f3c949c61b84l,0x402d974754899588l,0xc73b7fd634a485e5l,
-            0x75c9bae08587f0c3l },
-          0 },
-        /* 43 << 216 */
-        { { 0x6c32fa8cb0b4a04dl,0xeb58d0d875fda587l,0x61d8a157c4b86563l,
-            0x92191bf01006b8afl },
-          { 0xd04d3eff32d3478bl,0x3cc52eab2a684fc8l,0xb19a0f1625de54ccl,
-            0x5c5295973620db2dl },
-          0 },
-        /* 44 << 216 */
-        { { 0xa97b51265c3427b0l,0x6401405cd282c9bdl,0x3629f8d7222c5c45l,
-            0xb1c02c16e8d50aedl },
-          { 0xbea2ed75d9635bc9l,0x226790c76e24552fl,0x3c33f2a365f1d066l,
-            0x2a43463e6dfccc2el },
-          0 },
-        /* 45 << 216 */
-        { { 0x09b2e0d3b8da1e01l,0xa3a1a8fee9c0eb04l,0x59af5afe8bf653bal,
-            0xba979f8bd0a54836l },
-          { 0xa0d8194b51ee6ffbl,0x451c29e2f4b0586cl,0x7eb5fddb7471ee3dl,
-            0x84b627d4bcb3afd8l },
-          0 },
-        /* 46 << 216 */
-        { { 0x8cc3453adb483761l,0xe7cc608565d5672bl,0x277ed6cbde3efc87l,
-            0x19f2f36869234eafl },
-          { 0x9aaf43175c0b800bl,0x1f1e7c898b6da6e2l,0x6cfb4715b94ec75el,
-            0xd590dd5f453118c2l },
-          0 },
-        /* 47 << 216 */
-        { { 0xa70e9b0afb54e812l,0x092a0d7d8d86819bl,0x5421ff042e669090l,
-            0x8af770c6b133c952l },
-          { 0xc8e8dd596c8b1426l,0x1c92eb0e9523b483l,0x5a7c88f2cf3d40edl,
-            0x4cc0c04bf5dd98f8l },
-          0 },
-        /* 48 << 216 */
-        { { 0x14e49da11f17a34cl,0x5420ab39235a1456l,0xb76372412f50363bl,
-            0x7b15d623c3fabb6el },
-          { 0xa0ef40b1e274e49cl,0x5cf5074496b1860al,0xd6583fbf66afe5a4l,
-            0x44240510f47e3e9al },
-          0 },
-        /* 49 << 216 */
-        { { 0xb3939a8ffd617288l,0x3d37e5c2d68c2636l,0x4a595fac9d666c0el,
-            0xfebcad9edb3a4978l },
-          { 0x6d284a49c125016fl,0x05a7b9c80ee246a2l,0xe8b351739436c6e9l,
-            0xffb89032d4be40b7l },
-          0 },
-        /* 51 << 216 */
-        { { 0xba1387a5436ebf33l,0xc351a400e8d05267l,0x18645dde4259dbe8l,
-            0x5fc32895c10fd676l },
-          { 0x1ef7a944807f040el,0x9486b5c625738e5fl,0xc9e56cf4a7e3e96cl,
-            0x34c7dc87a20be832l },
-          0 },
-        /* 52 << 216 */
-        { { 0xe10d49996fe8393fl,0x0f809a3fe91f3a32l,0x61096d1c802f63c8l,
-            0x289e146257750d3dl },
-          { 0xed06167e9889feeal,0xd5c9c0e2e0993909l,0x46fca0d856508ac6l,
-            0x918260474f1b8e83l },
-          0 },
-        /* 53 << 216 */
-        { { 0x1d5f2ad7a9bf79cbl,0x228fb24fca9c2f98l,0x5f7c3883701c4b71l,
-            0x18cf76c4ec42d686l },
-          { 0x3680d2e94dcdec8dl,0x6d58e87ba0d60cb6l,0x72fbf086a0e513cfl,
-            0xb922d3c5346ed99al },
-          0 },
-        /* 55 << 216 */
-        { { 0x1678d658c2b9b874l,0x0e0b2c47f6360d4dl,0x01a45c02a0c9b9acl,
-            0x05e82e9d0da69afbl },
-          { 0x50be4001f28b8018l,0x503d967b667d8241l,0x6cd816534981da04l,
-            0x9b18c3117f09c35fl },
-          0 },
-        /* 57 << 216 */
-        { { 0xdfdfd5b409d22331l,0xf445126817f0c6a2l,0xe51d1aa8a5cde27bl,
-            0xb61a12a37aaf9513l },
-          { 0xe43a241d3b3ea114l,0x5c62b624366ae28dl,0x085a530db5f237eal,
-            0x7c4ed375651205afl },
-          0 },
-        /* 59 << 216 */
-        { { 0xf9de879dce842decl,0xe505320a94cedb89l,0xee55dae7f05ad888l,
-            0x44ffbfa7f028b4efl },
-          { 0xa3c1b32e63b2cd31l,0x201a058910c5ab29l,0x20f930afcd4085d6l,
-            0xda79ed169f6ff24bl },
-          0 },
-        /* 60 << 216 */
-        { { 0x7e8cfbcf704e23c6l,0xc71b7d2228aaa65bl,0xa041b2bd245e3c83l,
-            0x69b98834d21854ffl },
-          { 0x89d227a3963bfeecl,0x99947aaade7da7cbl,0x1d9ee9dbee68a9b1l,
-            0x0a08f003698ec368l },
-          0 },
-        /* 61 << 216 */
-        { { 0x04c64f33b0959be5l,0x182332ba396a7fe2l,0x4c5401e302e15b97l,
-            0x92880f9877db104bl },
-          { 0x0bf0b9cc21726a33l,0x780264741acc7b6dl,0x9721f621a26f08e3l,
-            0xe3935b434197fed1l },
-          0 },
-        /* 63 << 216 */
-        { { 0x0bffae503652be69l,0x395a9c6afb3fd5d8l,0x17f66adaa4fadfbfl,
-            0x1ee92a35f9268f8cl },
-          { 0x40ded34d6827781al,0xcd36224e34e63dccl,0xec90cf571cd1ef7al,
-            0xf6067d578f72a3bfl },
-          0 },
-        /* 64 << 216 */
-        { { 0x142b55021a93507al,0xb4cd11878d3c06cfl,0xdf70e76a91ec3f40l,
-            0x484e81ad4e7553c2l },
-          { 0x830f87b5272e9d6el,0xea1c93e5c6ff514al,0x67cc2adcc4192a8el,
-            0xc77e27e242f4535al },
-          0 },
-        /* 65 << 216 */
-        { { 0x537388d299e2f9d2l,0x15ead88612cd6d08l,0x33dfe3a769082d86l,
-            0x0ef25f4266d79d40l },
-          { 0x8035b4e546ba5cf1l,0x4e48f53711eec591l,0x40b56cda122a7aael,
-            0x78e270211dbb79a7l },
-          0 },
-        /* 71 << 216 */
-        { { 0x520b655355b4a5b1l,0xeee835cafb4f5fdel,0xb2ae86e59a823d7fl,
-            0x24325f4fc084497fl },
-          { 0x542bed4e6f0eefa4l,0x2909233b141792fdl,0x74bfc3bfc847a946l,
-            0x8ec1d009e212cb44l },
-          0 },
-        /* 77 << 216 */
-        { { 0xc2082b6d5cedd516l,0xaf148eadeafa3a10l,0x104cd5855ad63aa6l,
-            0xe3fdbf8c78c11e1el },
-          { 0x78651c493c25c24el,0x8064c4f37b7cce0el,0xa55441d4a6d8a928l,
-            0x4525c40eb0db3adcl },
-          0 },
-        /* 83 << 216 */
-        { { 0x5f69e49cfde6001el,0xc61e753aee59b47el,0xd0d4559971b0db5bl,
-            0x7f76f7b45ad4acc3l },
-          { 0xb0318a9c39830897l,0x2b15da22feef3822l,0x34049400acfb0753l,
-            0x16f4fb51a5114ed4l },
-          0 },
-        /* 89 << 216 */
-        { { 0x0b5c76928defbf10l,0xb9f1795cb79cdb6el,0xba17e7759a90317cl,
-            0x3cb69cf950cf514bl },
-          { 0x076cc4c1e5b892ffl,0x75724e8fb548b73cl,0x2ebcdb33248ff2e6l,
-            0x1f12967be109b08fl },
-          0 },
-        /* 95 << 216 */
-        { { 0x3f514c63461b7bb3l,0x3bdca5aa70afbad7l,0x368ce251eab3e38bl,
-            0xdc0fb3300d101049l },
-          { 0x7ce09abdff5013eel,0x926dd7dd7d10729dl,0xe6fe47ab6f486197l,
-            0xd23964eaa6eb6903l },
-          0 },
-        /* 101 << 216 */
-        { { 0x537ceb74eca30797l,0xf171bba557b0f338l,0x220a31fee831f1f8l,
-            0xabbc2c7c5ae6bbbcl },
-          { 0xaf7609f27eadfb60l,0x22cff1d58f28b51bl,0x63c3d76d6d1863bdl,
-            0x3a6a2fb489e8a4c8l },
-          0 },
-        /* 107 << 216 */
-        { { 0x9e74f8beb26e38f0l,0xc4c73fc4ea8bd55bl,0x086f688e1429e1fcl,
-            0x91438ff40f78159fl },
-          { 0x3571ae5f20810acbl,0x305edafe7451eb00l,0x8443c96d5704385cl,
-            0xc03b234e542605b5l },
-          0 },
-        /* 113 << 216 */
-        { { 0x2e5ff4fed85567c2l,0x136f49c7e4abd0c6l,0x5a68730cfb8a62d1l,
-            0x101ebfd030bcb848l },
-          { 0x634b0618fee950bbl,0xfa748d21c8aa65bal,0xc1d67c3e699f5560l,
-            0x6fb0546cb22889d2l },
-          0 },
-        /* 116 << 216 */
-        { { 0xa9784ebd9c95f0f9l,0x5ed9deb224640771l,0x31244af7035561c4l,
-            0x87332f3a7ee857del },
-          { 0x09e16e9e2b9e0d88l,0x52d910f456a06049l,0x507ed477a9592f48l,
-            0x85cb917b2365d678l },
-          0 },
-        /* 119 << 216 */
-        { { 0x6108f2b458a9d40dl,0xb036034838e15a52l,0xcc5610a3fd5625d6l,
-            0x79825dd083b0418el },
-          { 0xf83a95fc6324b6e5l,0x2463114deedfc4ebl,0x58b177e32250707fl,
-            0x778dcd454af8d942l },
-          0 },
-        /* 125 << 216 */
-        { { 0x1ecf2670eb816bf8l,0xa2d6e73aaa6d59c6l,0xf9a11434156852ebl,
-            0x9bc9bb70f6f82c83l },
-          { 0xd23a018d9c874836l,0xd26bf8bc6db5a8b5l,0x1d648846bec0c624l,
-            0x39f15d97ef90302fl },
-          0 },
-    },
-    {
-        /* 0 << 224 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 224 */
-        { { 0xe3417bc035d0b34al,0x440b386b8327c0a7l,0x8fb7262dac0362d1l,
-            0x2c41114ce0cdf943l },
-          { 0x2ba5cef1ad95a0b1l,0xc09b37a867d54362l,0x26d6cdd201e486c9l,
-            0x20477abf42ff9297l },
-          0 },
-        /* 3 << 224 */
-        { { 0x126f35b51e706ad9l,0xb99cebb4c3a9ebdfl,0xa75389afbf608d90l,
-            0x76113c4fc6c89858l },
-          { 0x80de8eb097e2b5aal,0x7e1022cc63b91304l,0x3bdab6056ccc066cl,
-            0x33cbb144b2edf900l },
-          0 },
-        /* 4 << 224 */
-        { { 0xc41764717af715d2l,0xe2f7f594d0134a96l,0x2c1873efa41ec956l,
-            0xe4e7b4f677821304l },
-          { 0xe5c8ff9788d5374al,0x2b915e6380823d5bl,0xea6bc755b2ee8fe2l,
-            0x6657624ce7112651l },
-          0 },
-        /* 5 << 224 */
-        { { 0x157af101dace5acal,0xc4fdbcf211a6a267l,0xdaddf340c49c8609l,
-            0x97e49f52e9604a65l },
-          { 0x9be8e790937e2ad5l,0x846e2508326e17f1l,0x3f38007a0bbbc0dcl,
-            0xcf03603fb11e16d6l },
-          0 },
-        /* 7 << 224 */
-        { { 0x5ed0c007f8ae7c38l,0x6db07a5c3d740192l,0xbe5e9c2a5fe36db3l,
-            0xd5b9d57a76e95046l },
-          { 0x54ac32e78eba20f2l,0xef11ca8f71b9a352l,0x305e373eff98a658l,
-            0xffe5a100823eb667l },
-          0 },
-        /* 9 << 224 */
-        { { 0x5c8ed8d5da64309dl,0x61a6de5691b30704l,0xd6b52f6a2f9b5808l,
-            0x0eee419498c958a7l },
-          { 0xcddd9aab771e4caal,0x83965dfd78bc21bel,0x02affce3b3b504f5l,
-            0x30847a21561c8291l },
-          0 },
-        /* 10 << 224 */
-        { { 0xd2eb2cf152bfda05l,0xe0e4c4e96197b98cl,0x1d35076cf8a1726fl,
-            0x6c06085b2db11e3dl },
-          { 0x15c0c4d74463ba14l,0x9d292f830030238cl,0x1311ee8b3727536dl,
-            0xfeea86efbeaedc1el },
-          0 },
-        /* 11 << 224 */
-        { { 0xb9d18cd366131e2el,0xf31d974f80fe2682l,0xb6e49e0fe4160289l,
-            0x7c48ec0b08e92799l },
-          { 0x818111d8d1989aa7l,0xb34fa0aaebf926f9l,0xdb5fe2f5a245474al,
-            0xf80a6ebb3c7ca756l },
-          0 },
-        /* 13 << 224 */
-        { { 0x8ea610593de9abe3l,0x404348819cdc03bel,0x9b261245cfedce8cl,
-            0x78c318b4cf5234a1l },
-          { 0x510bcf16fde24c99l,0x2a77cb75a2c2ff5dl,0x9c895c2b27960fb4l,
-            0xd30ce975b0eda42bl },
-          0 },
-        /* 15 << 224 */
-        { { 0x09521177ff57d051l,0x2ff38037fb6a1961l,0xfc0aba74a3d76ad4l,
-            0x7c76480325a7ec17l },
-          { 0x7532d75f48879bc8l,0xea7eacc058ce6bc1l,0xc82176b48e896c16l,
-            0x9a30e0b22c750fedl },
-          0 },
-        /* 16 << 224 */
-        { { 0xc37e2c2e421d3aa4l,0xf926407ce84fa840l,0x18abc03d1454e41cl,
-            0x26605ecd3f7af644l },
-          { 0x242341a6d6a5eabfl,0x1edb84f4216b668el,0xd836edb804010102l,
-            0x5b337ce7945e1d8cl },
-          0 },
-        /* 17 << 224 */
-        { { 0xd2075c77c055dc14l,0x2a0ffa2581d89cdfl,0x8ce815ea6ffdcbafl,
-            0xa3428878fb648867l },
-          { 0x277699cf884655fbl,0xfa5b5bd6364d3e41l,0x01f680c6441e1cb7l,
-            0x3fd61e66b70a7d67l },
-          0 },
-        /* 19 << 224 */
-        { { 0xfd5bb657b1fa70fbl,0xfa07f50fd8073a00l,0xf72e3aa7bca02500l,
-            0xf68f895d9975740dl },
-          { 0x301120605cae2a6al,0x01bd721802874842l,0x3d4238917ce47bd3l,
-            0xa66663c1789544f6l },
-          0 },
-        /* 21 << 224 */
-        { { 0xb4b9a39b36194d40l,0xe857a7c577612601l,0xf4209dd24ecf2f58l,
-            0x82b9e66d5a033487l },
-          { 0xc1e36934e4e8b9ddl,0xd2372c9da42377d7l,0x51dc94c70e3ae43bl,
-            0x4c57761e04474f6fl },
-          0 },
-        /* 23 << 224 */
-        { { 0xa39114e24415503bl,0xc08ff7c64cbb17e9l,0x1eff674dd7dec966l,
-            0x6d4690af53376f63l },
-          { 0xff6fe32eea74237bl,0xc436d17ecd57508el,0x15aa28e1edcc40fel,
-            0x0d769c04581bbb44l },
-          0 },
-        /* 25 << 224 */
-        { { 0xfe51d0296ae55043l,0x8931e98f44a87de1l,0xe57f1cc609e4fee2l,
-            0x0d063b674e072d92l },
-          { 0x70a998b9ed0e4316l,0xe74a736b306aca46l,0xecf0fbf24fda97c7l,
-            0xa40f65cb3e178d93l },
-          0 },
-        /* 27 << 224 */
-        { { 0x8667e981c27253c9l,0x05a6aefb92b36a45l,0xa62c4b369cb7bb46l,
-            0x8394f37511f7027bl },
-          { 0x747bc79c5f109d0fl,0xcad88a765b8cc60al,0x80c5a66b58f09e68l,
-            0xe753d451f6127eacl },
-          0 },
-        /* 28 << 224 */
-        { { 0xc44b74a15b0ec6f5l,0x47989fe45289b2b8l,0x745f848458d6fc73l,
-            0xec362a6ff61c70abl },
-          { 0x070c98a7b3a8ad41l,0x73a20fc07b63db51l,0xed2c2173f44c35f4l,
-            0x8a56149d9acc9dcal },
-          0 },
-        /* 29 << 224 */
-        { { 0x98f178819ac6e0f4l,0x360fdeafa413b5edl,0x0625b8f4a300b0fdl,
-            0xf1f4d76a5b3222d3l },
-          { 0x9d6f5109587f76b8l,0x8b4ee08d2317fdb5l,0x88089bb78c68b095l,
-            0x95570e9a5808d9b9l },
-          0 },
-        /* 31 << 224 */
-        { { 0x2e1284943fb42622l,0x3b2700ac500907d5l,0xf370fb091a95ec63l,
-            0xf8f30be231b6dfbdl },
-          { 0xf2b2f8d269e55f15l,0x1fead851cc1323e9l,0xfa366010d9e5eef6l,
-            0x64d487b0e316107el },
-          0 },
-        /* 33 << 224 */
-        { { 0xc9a9513929607745l,0x0ca07420a26f2b28l,0xcb2790e74bc6f9ddl,
-            0x345bbb58adcaffc0l },
-          { 0xc65ea38cbe0f27a2l,0x67c24d7c641fcb56l,0x2c25f0a7a9e2c757l,
-            0x93f5cdb016f16c49l },
-          0 },
-        /* 34 << 224 */
-        { { 0x2ca5a9d7c5ee30a1l,0xd1593635b909b729l,0x804ce9f3dadeff48l,
-            0xec464751b07c30c3l },
-          { 0x89d65ff39e49af6al,0xf2d6238a6f3d01bcl,0x1095561e0bced843l,
-            0x51789e12c8a13fd8l },
-          0 },
-        /* 35 << 224 */
-        { { 0xd633f929763231dfl,0x46df9f7de7cbddefl,0x01c889c0cb265da8l,
-            0xfce1ad10af4336d2l },
-          { 0x8d110df6fc6a0a7el,0xdd431b986da425dcl,0xcdc4aeab1834aabel,
-            0x84deb1248439b7fcl },
-          0 },
-        /* 36 << 224 */
-        { { 0x8796f1693c2a5998l,0x9b9247b47947190dl,0x55b9d9a511597014l,
-            0x7e9dd70d7b1566eel },
-          { 0x94ad78f7cbcd5e64l,0x0359ac179bd4c032l,0x3b11baaf7cc222ael,
-            0xa6a6e284ba78e812l },
-          0 },
-        /* 37 << 224 */
-        { { 0x8392053f24cea1a0l,0xc97bce4a33621491l,0x7eb1db3435399ee9l,
-            0x473f78efece81ad1l },
-          { 0x41d72fe0f63d3d0dl,0xe620b880afab62fcl,0x92096bc993158383l,
-            0x41a213578f896f6cl },
-          0 },
-        /* 39 << 224 */
-        { { 0x6fb4d4e42bad4d5fl,0xfa4c3590fef0059bl,0x6a10218af5122294l,
-            0x9a78a81aa85751d1l },
-          { 0x04f20579a98e84e7l,0xfe1242c04997e5b5l,0xe77a273bca21e1e4l,
-            0xfcc8b1ef9411939dl },
-          0 },
-        /* 40 << 224 */
-        { { 0xe20ea30292d0487al,0x1442dbec294b91fel,0x1f7a4afebb6b0e8fl,
-            0x1700ef746889c318l },
-          { 0xf5bbffc370f1fc62l,0x3b31d4b669c79ccal,0xe8bc2aaba7f6340dl,
-            0xb0b08ab4a725e10al },
-          0 },
-        /* 41 << 224 */
-        { { 0x44f05701ae340050l,0xba4b30161cf0c569l,0x5aa29f83fbe19a51l,
-            0x1b9ed428b71d752el },
-          { 0x1666e54eeb4819f5l,0x616cdfed9e18b75bl,0x112ed5be3ee27b0bl,
-            0xfbf2831944c7de4dl },
-          0 },
-        /* 43 << 224 */
-        { { 0x722eb104e2b4e075l,0x49987295437c4926l,0xb1e4c0e446a9b82dl,
-            0xd0cb319757a006f5l },
-          { 0xf3de0f7dd7808c56l,0xb5c54d8f51f89772l,0x500a114aadbd31aal,
-            0x9afaaaa6295f6cabl },
-          0 },
-        /* 44 << 224 */
-        { { 0x94705e2104cf667al,0xfc2a811b9d3935d7l,0x560b02806d09267cl,
-            0xf19ed119f780e53bl },
-          { 0xf0227c09067b6269l,0x967b85335caef599l,0x155b924368efeebcl,
-            0xcd6d34f5c497bae6l },
-          0 },
-        /* 45 << 224 */
-        { { 0x1dd8d5d36cceb370l,0x2aeac579a78d7bf9l,0x5d65017d70b67a62l,
-            0x70c8e44f17c53f67l },
-          { 0xd1fc095086a34d09l,0xe0fca256e7134907l,0xe24fa29c80fdd315l,
-            0x2c4acd03d87499adl },
-          0 },
-        /* 46 << 224 */
-        { { 0xbaaf75173b5a9ba6l,0xb9cbe1f612e51a51l,0xd88edae35e154897l,
-            0xe4309c3c77b66ca0l },
-          { 0xf5555805f67f3746l,0x85fc37baa36401ffl,0xdf86e2cad9499a53l,
-            0x6270b2a3ecbc955bl },
-          0 },
-        /* 47 << 224 */
-        { { 0xafae64f5974ad33bl,0x04d85977fe7b2df1l,0x2a3db3ff4ab03f73l,
-            0x0b87878a8702740al },
-          { 0x6d263f015a061732l,0xc25430cea32a1901l,0xf7ebab3ddb155018l,
-            0x3a86f69363a9b78el },
-          0 },
-        /* 48 << 224 */
-        { { 0x349ae368da9f3804l,0x470f07fea164349cl,0xd52f4cc98562baa5l,
-            0xc74a9e862b290df3l },
-          { 0xd3a1aa3543471a24l,0x239446beb8194511l,0xbec2dd0081dcd44dl,
-            0xca3d7f0fc42ac82dl },
-          0 },
-        /* 49 << 224 */
-        { { 0x1f3db085fdaf4520l,0xbb6d3e804549daf2l,0xf5969d8a19ad5c42l,
-            0x7052b13ddbfd1511l },
-          { 0x11890d1b682b9060l,0xa71d3883ac34452cl,0xa438055b783805b4l,
-            0x432412774725b23el },
-          0 },
-        /* 51 << 224 */
-        { { 0x40b08f7443b30ca8l,0xe10b5bbad9934583l,0xe8a546d6b51110adl,
-            0x1dd50e6628e0b6c5l },
-          { 0x292e9d54cff2b821l,0x3882555d47281760l,0x134838f83724d6e3l,
-            0xf2c679e022ddcda1l },
-          0 },
-        /* 52 << 224 */
-        { { 0x40ee88156d2a5768l,0x7f227bd21c1e7e2dl,0x487ba134d04ff443l,
-            0x76e2ff3dc614e54bl },
-          { 0x36b88d6fa3177ec7l,0xbf731d512328fff5l,0x758caea249ba158el,
-            0x5ab8ff4c02938188l },
-          0 },
-        /* 53 << 224 */
-        { { 0x33e1605635edc56dl,0x5a69d3497e940d79l,0x6c4fd00103866dcbl,
-            0x20a38f574893cdefl },
-          { 0xfbf3e790fac3a15bl,0x6ed7ea2e7a4f8e6bl,0xa663eb4fbc3aca86l,
-            0x22061ea5080d53f7l },
-          0 },
-        /* 55 << 224 */
-        { { 0x635a8e5ec3a0ee43l,0x70aaebca679898ffl,0x9ee9f5475dc63d56l,
-            0xce987966ffb34d00l },
-          { 0xf9f86b195e26310al,0x9e435484382a8ca8l,0x253bcb81c2352fe4l,
-            0xa4eac8b04474b571l },
-          0 },
-        /* 57 << 224 */
-        { { 0x2617f91c93aa96b8l,0x0fc8716b7fca2e13l,0xa7106f5e95328723l,
-            0xd1c9c40b262e6522l },
-          { 0xb9bafe8642b7c094l,0x1873439d1543c021l,0xe1baa5de5cbefd5dl,
-            0xa363fc5e521e8affl },
-          0 },
-        /* 59 << 224 */
-        { { 0xbc00fc2f2f8ba2c7l,0x0966eb2f7c67aa28l,0x13f7b5165a786972l,
-            0x3bfb75578a2fbba0l },
-          { 0x131c4f235a2b9620l,0xbff3ed276faf46bel,0x9b4473d17e172323l,
-            0x421e8878339f6246l },
-          0 },
-        /* 60 << 224 */
-        { { 0x0fa8587a25a41632l,0xc0814124a35b6c93l,0x2b18a9f559ebb8dbl,
-            0x264e335776edb29cl },
-          { 0xaf245ccdc87c51e2l,0x16b3015b501e6214l,0xbb31c5600a3882cel,
-            0x6961bb94fec11e04l },
-          0 },
-        /* 61 << 224 */
-        { { 0x3b825b8deff7a3a0l,0xbec33738b1df7326l,0x68ad747c99604a1fl,
-            0xd154c9349a3bd499l },
-          { 0xac33506f1cc7a906l,0x73bb53926c560e8fl,0x6428fcbe263e3944l,
-            0xc11828d51c387434l },
-          0 },
-        /* 63 << 224 */
-        { { 0x659b17c8d8ceb147l,0x9b649eeeb70a5554l,0x6b7fa0b5ac6bc634l,
-            0xd99fe2c71d6e732fl },
-          { 0x30e6e7628d3abba2l,0x18fee6e7a797b799l,0x5c9d360dc696464dl,
-            0xe3baeb4827bfde12l },
-          0 },
-        /* 64 << 224 */
-        { { 0x2bf5db47f23206d5l,0x2f6d34201d260152l,0x17b876533f8ff89al,
-            0x5157c30c378fa458l },
-          { 0x7517c5c52d4fb936l,0xef22f7ace6518cdcl,0xdeb483e6bf847a64l,
-            0xf508455892e0fa89l },
-          0 },
-        /* 65 << 224 */
-        { { 0xf77bb113a74ed3bel,0x89e4eb8f074f2637l,0x7fbfa84df7ce2aebl,
-            0xe7c6ecd5baaefe4cl },
-          { 0x176bba7df6319542l,0x70098120f6080799l,0x2e2118339054d9aal,
-            0x1be4c6a78295a912l },
-          0 },
-        /* 71 << 224 */
-        { { 0x6bb4d8c35df1455fl,0xb839f08f0384b033l,0x718868af11f95d50l,
-            0xae256a92e07a8801l },
-          { 0xa5bafaf24d71a273l,0x18ff04ea2a30e68fl,0x364c193287ba727el,
-            0x4bb8cf99befcaf73l },
-          0 },
-        /* 77 << 224 */
-        { { 0xc79f5b1f4e9fb3d7l,0x52854970a51cccddl,0xa4e27e97f00054a3l,
-            0x26a79792240e1232l },
-          { 0xb15579fecb5ff465l,0x6ef54c3bd1722a84l,0xee211bfa5239a4d8l,
-            0x36c7db27270b7059l },
-          0 },
-        /* 83 << 224 */
-        { { 0x5e7da0a9f9858cd3l,0x67459de5b633de49l,0x2db0d54b2e73892el,
-            0x37f50877adae399al },
-          { 0x83c28b83b65e6179l,0xae5a915ca39faf17l,0x6ab8f3fbe841b53cl,
-            0x7c30997b0df7d004l },
-          0 },
-        /* 89 << 224 */
-        { { 0x87904ca7b3b862bdl,0x7593db93cf9ea671l,0x8a2670f8739aa783l,
-            0x3921d779f5154ca6l },
-          { 0xe81ca56468f65ebbl,0x0c600603bc4e64d4l,0xdf170049cb83b2d1l,
-            0x373893b863487064l },
-          0 },
-        /* 95 << 224 */
-        { { 0x7c3c52b9c0c4e88el,0x0f0484d06f0c2446l,0xeb876827000fe87bl,
-            0xa749b3136d20f94al },
-          { 0x0876dae9d55abda6l,0xe6e4367620726911l,0xf85e8a8c4a2676b4l,
-            0x4e8c97f1b4a890ebl },
-          0 },
-        /* 101 << 224 */
-        { { 0xa992f482a3c0a4f4l,0xe1536f3f7a8d961al,0x26fc79ae000752b0l,
-            0xdbfb706b76ad8508l },
-          { 0x2642b2ed6f4cf9e4l,0xa013db54557fa7e2l,0x2ef711821d326116l,
-            0x8dc3f5bcbafc83ecl },
-          0 },
-        /* 107 << 224 */
-        { { 0x9671258578e5a201l,0xc71aca1de9125569l,0x360c45c0e2231379l,
-            0x2d71783512e82369l },
-          { 0x392432d3d84b2153l,0x502fd3f6d6939ffel,0x33c440ae6e766cacl,
-            0x99f1fbee28062416l },
-          0 },
-        /* 113 << 224 */
-        { { 0xe51ad841861604cbl,0x1ec9c54f630283a7l,0xcc42cad582a39473l,
-            0xa2eb053709929c4al },
-          { 0xe374459767f655a3l,0x9f54c2451d7f2674l,0xd85e9163fbc8aba5l,
-            0x12fd0b55866bc892l },
-          0 },
-        /* 116 << 224 */
-        { { 0x4f2c3063d7bd4661l,0xe533798d57a974ccl,0x44860d503ea02d85l,
-            0xf2a7f4e5acaa0521l },
-          { 0x05593061abb108f0l,0x56d1056044528309l,0x1f674df9c88b6d1el,
-            0x19fdc4cbd8744c4dl },
-          0 },
-        /* 119 << 224 */
-        { { 0xfd1488ec00f2f1d5l,0x24fcc67b44a825ddl,0xc7bfae2ea925a0f4l,
-            0x5e03249cad59cf48l },
-          { 0x1dc5a8e11af4844cl,0x89b2fbc58a598c20l,0xb0f56afff2078121l,
-            0x8194012d4878bb0dl },
-          0 },
-        /* 125 << 224 */
-        { { 0xc1cbe9d3a5ae1031l,0x38da74435706b987l,0x01844b55b353f188l,
-            0x390c59ca87a807c5l },
-          { 0x55ac7b1fb13b780cl,0x060970bff375c1cbl,0x8dd1f378c7ab4e5cl,
-            0xcca782e5cf726645l },
-          0 },
-    },
-    {
-        /* 0 << 232 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 232 */
-        { { 0x91213462f23f2d92l,0x6cab71bd60b94078l,0x6bdd0a63176cde20l,
-            0x54c9b20cee4d54bcl },
-          { 0x3cd2d8aa9f2ac02fl,0x03f8e617206eedb0l,0xc7f68e1693086434l,
-            0x831469c592dd3db9l },
-          0 },
-        /* 3 << 232 */
-        { { 0x4a9090cde36d0757l,0xf722d7b1d9a29382l,0xfb7fb04c04b48ddfl,
-            0x628ad2a7ebe16f43l },
-          { 0xcd3fbfb520226040l,0x6c34ecb15104b6c4l,0x30c0754ec903c188l,
-            0xec336b082d23cab0l },
-          0 },
-        /* 4 << 232 */
-        { { 0x9f51439e558df019l,0x230da4baac712b27l,0x518919e355185a24l,
-            0x4dcefcdd84b78f50l },
-          { 0xa7d90fb2a47d4c5al,0x55ac9abfb30e009el,0xfd2fc35974eed273l,
-            0xb72d824cdbea8fafl },
-          0 },
-        /* 5 << 232 */
-        { { 0xd213f923cbb13d1bl,0x98799f425bfb9bfel,0x1ae8ddc9701144a9l,
-            0x0b8b3bb64c5595eel },
-          { 0x0ea9ef2e3ecebb21l,0x17cb6c4b3671f9a7l,0x47ef464f726f1d1fl,
-            0x171b94846943a276l },
-          0 },
-        /* 7 << 232 */
-        { { 0xc9941109a607419dl,0xfaa71e62bb6bca80l,0x34158c1307c431f3l,
-            0x594abebc992bc47al },
-          { 0x6dfea691eb78399fl,0x48aafb353f42cba4l,0xedcd65af077c04f0l,
-            0x1a29a366e884491al },
-          0 },
-        /* 9 << 232 */
-        { { 0x7bf6a5c1f7ea25aal,0xd165e6bffbb07d5fl,0xe353936189e78671l,
-            0xa3fcac892bac4219l },
-          { 0xdfab6fd4f0baa8abl,0x5a4adac1e2c1c2e5l,0x6cd75e3140d85849l,
-            0xce263fea19b39181l },
-          0 },
-        /* 10 << 232 */
-        { { 0xb8d804a3315980cdl,0x693bc492fa3bebf7l,0x3578aeee2253c504l,
-            0x158de498cd2474a2l },
-          { 0x1331f5c7cfda8368l,0xd2d7bbb378d7177el,0xdf61133af3c1e46el,
-            0x5836ce7dd30e7be8l },
-          0 },
-        /* 11 << 232 */
-        { { 0xe042ece59a29a5c5l,0xb19b3c073b6c8402l,0xc97667c719d92684l,
-            0xb5624622ebc66372l },
-          { 0x0cb96e653c04fa02l,0x83a7176c8eaa39aal,0x2033561deaa1633fl,
-            0x45a9d0864533df73l },
-          0 },
-        /* 13 << 232 */
-        { { 0xa29ae9df5ece6e7cl,0x0603ac8f0facfb55l,0xcfe85b7adda233a5l,
-            0xe618919fbd75f0b8l },
-          { 0xf555a3d299bf1603l,0x1f43afc9f184255al,0xdcdaf341319a3e02l,
-            0xd3b117ef03903a39l },
-          0 },
-        /* 15 << 232 */
-        { { 0xb6b82fa74d82f4c2l,0x90725a606804efb3l,0xbc82ec46adc3425el,
-            0xb7b805812787843el },
-          { 0xdf46d91cdd1fc74cl,0xdc1c62cbe783a6c4l,0x59d1b9f31a04cbbal,
-            0xd87f6f7295e40764l },
-          0 },
-        /* 16 << 232 */
-        { { 0x196860411e84e0e5l,0xa5db84d3aea34c93l,0xf9d5bb197073a732l,
-            0xb8d2fe566bcfd7c0l },
-          { 0x45775f36f3eb82fal,0x8cb20cccfdff8b58l,0x1659b65f8374c110l,
-            0xb8b4a422330c789al },
-          0 },
-        /* 17 << 232 */
-        { { 0xa6312c9e8977d99bl,0xbe94433183f531e7l,0x8232c0c218d3b1d4l,
-            0x617aae8be1247b73l },
-          { 0x40153fc4282aec3bl,0xc6063d2ff7b8f823l,0x68f10e583304f94cl,
-            0x31efae74ee676346l },
-          0 },
-        /* 19 << 232 */
-        { { 0xd98bf2a43734e520l,0x5e3abbe3209bdcbal,0x77c76553bc945b35l,
-            0x5331c093c6ef14aal },
-          { 0x518ffe2976b60c80l,0x2285593b7ace16f8l,0xab1f64ccbe2b9784l,
-            0xe8f2c0d9ab2421b6l },
-          0 },
-        /* 21 << 232 */
-        { { 0x481dae5fd5ecfefcl,0x07084fd8c2bff8fcl,0x8040a01aea324596l,
-            0x4c646980d4de4036l },
-          { 0x9eb8ab4ed65abfc3l,0xe01cb91f13541ec7l,0x8f029adbfd695012l,
-            0x9ae284833c7569ecl },
-          0 },
-        /* 23 << 232 */
-        { { 0xc83605f6f10ff927l,0xd387145123739fc6l,0x6d163450cac1c2ccl,
-            0x6b521296a2ec1ac5l },
-          { 0x0606c4f96e3cb4a5l,0xe47d3f41778abff7l,0x425a8d5ebe8e3a45l,
-            0x53ea9e97a6102160l },
-          0 },
-        /* 25 << 232 */
-        { { 0x6b72fab526bc2797l,0x13670d1699f16771l,0x001700521e3e48d1l,
-            0x978fe401b7adf678l },
-          { 0x55ecfb92d41c5dd4l,0x5ff8e247c7b27da5l,0xe7518272013fb606l,
-            0x5768d7e52f547a3cl },
-          0 },
-        /* 27 << 232 */
-        { { 0x0e966e64c73b2383l,0x49eb3447d17d8762l,0xde1078218da05dabl,
-            0x443d8baa016b7236l },
-          { 0x163b63a5ea7610d6l,0xe47e4185ce1ca979l,0xae648b6580baa132l,
-            0xebf53de20e0d5b64l },
-          0 },
-        /* 28 << 232 */
-        { { 0x6ba535da9a85788bl,0xd21f03aebd0626d4l,0x099f8c47e873dc64l,
-            0xcda8564d018ec97el },
-          { 0x3e8d7a5cde92c68cl,0x78e035a173323cc4l,0x3ef26275f880ff7cl,
-            0xa4ee3dff273eedaal },
-          0 },
-        /* 29 << 232 */
-        { { 0x8bbaec49571d92acl,0x569e85fe4692517fl,0x8333b014a14ea4afl,
-            0x32f2a62f12e5c5adl },
-          { 0x98c2ce3a06d89b85l,0xb90741aa2ff77a08l,0x2530defc01f795a2l,
-            0xd6e5ba0b84b3c199l },
-          0 },
-        /* 31 << 232 */
-        { { 0x3d1b24cb28c682c6l,0x27f252288612575bl,0xb587c779e8e66e98l,
-            0x7b0c03e9405eb1fel },
-          { 0xfdf0d03015b548e7l,0xa8be76e038b36af7l,0x4cdab04a4f310c40l,
-            0x6287223ef47ecaecl },
-          0 },
-        /* 33 << 232 */
-        { { 0x0a4c6f3670ad54aal,0xc24cfd0d2a543909l,0xe1b0bc5b745c1a97l,
-            0xb8431cfd68f0ddbfl },
-          { 0x326357989ed8cb06l,0xa00a80ff759d2b7dl,0x81f335c190570e02l,
-            0xbfccd89849c4e4d9l },
-          0 },
-        /* 34 << 232 */
-        { { 0x4dcb646bfd16d8c4l,0x76a6b640e38ba57bl,0xd92de1f79d8ae7e2l,
-            0x126f48f13f77f23bl },
-          { 0xb7b53ca977e8abc2l,0x3faa17112c0787ffl,0xf8f9308c8e5762f8l,
-            0x600a8a7f6b83aea8l },
-          0 },
-        /* 35 << 232 */
-        { { 0xa2aed4a799aa03c0l,0x1f93b93da18b79c5l,0x7b4550b7314192c3l,
-            0x9da00676272bb08el },
-          { 0xe42f0d7e23e072edl,0x7ce76494888b5783l,0x4c7900203680b63bl,
-            0x6040c83f662a8718l },
-          0 },
-        /* 36 << 232 */
-        { { 0xba9e5c88a56d73edl,0x6c24f7712ca054d3l,0x4a37c235083beae1l,
-            0x04a883b26483e9fdl },
-          { 0x0c63f3aee27c2c5dl,0x0e1da88dae4671f1l,0xa577e8e25995e1dbl,
-            0xbfc4b1b16ed6066al },
-          0 },
-        /* 37 << 232 */
-        { { 0x8b398541f53d9e63l,0x4ab045bb019395cbl,0x69a1b90371dd70c7l,
-            0xdedf284b38aaa431l },
-          { 0xb45e245aaed3efe7l,0x49460905079f2facl,0xde4dee470845bd78l,
-            0x0540524039d02ec3l },
-          0 },
-        /* 39 << 232 */
-        { { 0x300cf051675cc986l,0x758afea99324219fl,0xf524c3fad5a93b5fl,
-            0xb73385abc3864a8al },
-          { 0xbde19289f6be9050l,0xbb9018558205a3f3l,0x99a9d14d229f6b89l,
-            0x4c3a802f4336e68fl },
-          0 },
-        /* 40 << 232 */
-        { { 0xdd4a12d8e12b31f8l,0x577e29bc177736e6l,0x2353722ba88935e8l,
-            0xca1d3729015f286dl },
-          { 0x86c7b6a239a3e035l,0x6e5250bfd3b03a9fl,0x79d98930fd0d536el,
-            0x8c4cbbabfa0c3832l },
-          0 },
-        /* 41 << 232 */
-        { { 0x92ecff374f8e6163l,0x171cc8830f35faeal,0xc5434242bcd36142l,
-            0x707049adb28b63bbl },
-          { 0xa1f4d1dbf6443da9l,0x002bb062dabc108bl,0x17287f171a272b08l,
-            0x2a3aac8c884cf6bbl },
-          0 },
-        /* 43 << 232 */
-        { { 0x55524645651c0a5al,0x14624a9703cf0d12l,0xca9315a8f884a9e2l,
-            0x9840c6e2df7c9d59l },
-          { 0xd96bd10a7438e8d5l,0x12be73d2b2f887del,0x5e47445dca2493efl,
-            0x85aef555e9fff03el },
-          0 },
-        /* 44 << 232 */
-        { { 0x169b38c9a43b2339l,0x884308d91732bfabl,0xe4b593a28ff202ddl,
-            0xaf51d11f1e65376cl },
-          { 0x6ec648de741525ffl,0xf93cbd369ff4c628l,0xc76df9efb1129c79l,
-            0x31a5f2e2b7a67294l },
-          0 },
-        /* 45 << 232 */
-        { { 0x0661bc02801d0e38l,0x4a37dc0e71fc46b7l,0x0b224cfc80c3e311l,
-            0x2dd3d2779646a957l },
-          { 0xfa45aa18ef524012l,0x5d2a2d0916185a09l,0x34d5c630b5313dcel,
-            0xd9581ed151e4cf84l },
-          0 },
-        /* 46 << 232 */
-        { { 0x5845aa4a8ebd2af8l,0x141404ecd3df43ccl,0xff3fc7681ffd48d9l,
-            0x8a096e72e0cefb65l },
-          { 0xc9c81cfdffc3a5cdl,0x7550aa3029b27cf9l,0x34dca72b65fa0380l,
-            0xe8c5f6059ddd032bl },
-          0 },
-        /* 47 << 232 */
-        { { 0xe53da8a46bfbadb3l,0x4a9dfa55afaeeb5el,0x076245ea6644b1d4l,
-            0xc19be4012307bbcbl },
-          { 0x097774c19d77318bl,0xacc8a1519cfd51c4l,0x736ef6b3ecaa7b08l,
-            0x107479132d643a80l },
-          0 },
-        /* 48 << 232 */
-        { { 0x2d500910cab91f1el,0xbedd9e444d1cd216l,0xd634b74fedd02252l,
-            0xbd60f8e11258617al },
-          { 0xd8c7537b9e05614al,0xfd26c766e7af5fc5l,0x0660b581582bd926l,
-            0x87019244acf07fc8l },
-          0 },
-        /* 49 << 232 */
-        { { 0xd4889fdf6220ae8el,0x745d67ec1abf1549l,0x957b2e3d2fb89c36l,
-            0x9768c90edc62ada9l },
-          { 0x90332fd748e6c46el,0x5aa5a4e54e90ef0dl,0x58838fd3ddcc8571l,
-            0xd12f6c6f9a721126l },
-          0 },
-        /* 51 << 232 */
-        { { 0x2f0fd0b2cec757bal,0x46a7a9c63032cd1dl,0x9af3a600547d7a77l,
-            0x828e16eca43da1bal },
-          { 0x0b303a66092a8d92l,0x78ba0389c23d08bal,0x52aed08d4616bd29l,
-            0x4c0ff1210539c9fal },
-          0 },
-        /* 52 << 232 */
-        { { 0x2c3b7322badcfe8el,0x6e0616fac5e25a04l,0x0a3c12753da6e4a2l,
-            0xe46c957e077bca01l },
-          { 0xb46ca4e3da4be64bl,0xa59bda668e75ee78l,0x41835184a4de98f2l,
-            0x6efb1f924ed6a568l },
-          0 },
-        /* 53 << 232 */
-        { { 0xbb8cdc094af1dd72l,0x93c0aa38a2460633l,0xf66f5d238a7ebc93l,
-            0x43ecda843e8e37a6l },
-          { 0x399da8265fd5139el,0x8b39930fd446f38el,0x114414135d2b68efl,
-            0x8be163b8d1637c38l },
-          0 },
-        /* 55 << 232 */
-        { { 0x488e2a35b70ddbd3l,0xb4aa5f718da50077l,0xb38b74b1d8752bbdl,
-            0x7007f328416106a3l },
-          { 0xe6a62e4fcec4ea68l,0x9fdfb79741ef920bl,0x1a19d7dfe3c337a6l,
-            0x08f643558be0f586l },
-          0 },
-        /* 57 << 232 */
-        { { 0x91a5d8ff60343a1fl,0x921e442173ef8cdfl,0x4358f27b975138cdl,
-            0x36fd8577a4992b08l },
-          { 0xc07c8ca1f8d044c6l,0xcf42903687747b6bl,0x0932ffb0867c8632l,
-            0x7e565213250e5a89l },
-          0 },
-        /* 59 << 232 */
-        { { 0xae7c3b9b06255feal,0x2eb1d9a78a6fe229l,0xf81548e77601e6f8l,
-            0x777394eb7bd96d6cl },
-          { 0x54734187000a3509l,0xaeec146492d43c04l,0xc9b7f0d7c428b4acl,
-            0x9d4bcedccd7f7018l },
-          0 },
-        /* 60 << 232 */
-        { { 0x4741b9b311370605l,0x47fa72f75d09b355l,0x391a71ac7a144c6al,
-            0x0808c0f498b6e3cal },
-          { 0x7eaed9ef7fe53900l,0xf157a2a5e5a830bal,0xd13ec09127974afcl,
-            0x78d710a70b87997dl },
-          0 },
-        /* 61 << 232 */
-        { { 0xcbb96ecb4e263f81l,0x093e0d1509084351l,0x7af3232629220a81l,
-            0xd721b415c60f36dcl },
-          { 0xe3340a87fe9387a1l,0x6088bf482ff2b126l,0xd31028f1d2bc982cl,
-            0x9794e106630d52cbl },
-          0 },
-        /* 63 << 232 */
-        { { 0x1dac76780b11e972l,0x46e814c62698dafel,0x553f7370c37640d6l,
-            0xdcf588cc51cede93l },
-          { 0x4d6b56d3c3f6215bl,0x07edc6621b8f8f03l,0xdfef9d60b9a5dfbcl,
-            0x377edf4d10af7a5bl },
-          0 },
-        /* 64 << 232 */
-        { { 0x8928e99aeeaf8c49l,0xee7aa73d6e24d728l,0x4c5007c2e72b156cl,
-            0x5fcf57c5ed408a1dl },
-          { 0x9f719e39b6057604l,0x7d343c01c2868bbfl,0x2cca254b7e103e2dl,
-            0xe6eb38a9f131bea2l },
-          0 },
-        /* 65 << 232 */
-        { { 0x26ae28bede7a4b7el,0xd2f07569d2664163l,0x798690d4ff69266al,
-            0x77093d356ef3695dl },
-          { 0xaca9903d567dd3dfl,0x259c59a3a274c67bl,0x9f34bc0bfc1198b0l,
-            0x51a7726290b1521cl },
-          0 },
-        /* 71 << 232 */
-        { { 0xa20644bc80ca5391l,0xf9cdb4f7e5b36ea3l,0xe7936c0641426e22l,
-            0x39bc23033eef8a52l },
-          { 0x31253f43e5d8f896l,0xb0e5a588dc3df499l,0x1d03519a2d7e66d5l,
-            0x923de91f6d7da5e3l },
-          0 },
-        /* 77 << 232 */
-        { { 0x17a833ffedf861e4l,0x0ee3d0af4ebec965l,0xd0fac1c1ea66870el,
-            0x325756d0ae810cf4l },
-          { 0x4ed78d2c78e9a415l,0x6cc65685192046e4l,0x03e4243d8498a91el,
-            0x56a02dd25ab97794l },
-          0 },
-        /* 83 << 232 */
-        { { 0xc2fd373748e2b156l,0x259e9a98139645bel,0xe90106fb9877b4f1l,
-            0x49e5bac5889ce002l },
-          { 0x936a7dd18cf14e0bl,0x70bf6d304e3a8a01l,0x99d3e8bfeb748b62l,
-            0xa52a27c99b31c55cl },
-          0 },
-        /* 89 << 232 */
-        { { 0x9db1d41d300637d5l,0xe38744397c2dd836l,0x36179baf0d04ceb3l,
-            0xe9ccd17b251b3f2dl },
-          { 0xd8228073442b6d1dl,0x59a038363eed2971l,0xb443732046979f5cl,
-            0x54ad4113ae63937cl },
-          0 },
-        /* 95 << 232 */
-        { { 0x092c34e6d9246e9fl,0xb4b3b63d3eeb18a7l,0x8b3778beed9d1383l,
-            0xe4cb7be9d70d5d80l },
-          { 0xcff12e9b3d059203l,0x277af117ba86699fl,0x9bd4e8e363603585l,
-            0x0750b0f28e89c8d5l },
-          0 },
-        /* 101 << 232 */
-        { { 0x38b77e5958f7187bl,0x31c7068de0cb618el,0xa0f8e0d6c11ebe62l,
-            0x07adc8010473d7ebl },
-          { 0x36161a2c5c3e9510l,0xb2ec90d64ad04815l,0x01e2dd1f917d8166l,
-            0x549bcbdd6aa0f794l },
-          0 },
-        /* 107 << 232 */
-        { { 0x4ab27c3a8e4e45e5l,0xf6bd9d82f2bb99e7l,0xcab48c735e9da59fl,
-            0xdeb09eb2b9727353l },
-          { 0xc4a7954bafb8fa3el,0x34af2a49abf6803dl,0xc1ee1416d63e13bbl,
-            0xd49bf42d7a949193l },
-          0 },
-        /* 113 << 232 */
-        { { 0x504823ea9c9c07c6l,0x9dbec902bee2288cl,0x018d7875f0ceb6bbl,
-            0x678b997304f7022cl },
-          { 0x74d658238c5fb369l,0x7d4e1f114ca89ee8l,0x148316399905abc0l,
-            0xc107324e2c4deff4l },
-          0 },
-        /* 116 << 232 */
-        { { 0x1bc4fa8bdadc4404l,0x0edb9534daa12ee3l,0x084481b6a5f7289cl,
-            0x7f42461d9d8fb3d2l },
-          { 0xf93f1d3212293c70l,0xc14706596bb73ea3l,0xf80834afde339cadl,
-            0x99dcfc0081f22953l },
-          0 },
-        /* 119 << 232 */
-        { { 0x497e544f9fca737el,0x7f6342210e91e1afl,0x638e500c78d7b20bl,
-            0xb1ffed3f7ebaa947l },
-          { 0x751aa54871086f83l,0x8100bb703cf97848l,0xc32f91ace19ad68fl,
-            0x7dffb6851fb9157el },
-          0 },
-        /* 125 << 232 */
-        { { 0x5108589778e25060l,0x33e3cb7316cfe6cbl,0x0884cb8d410c0822l,
-            0xaa806ecc0be3fc94l },
-          { 0x9f9121f5f692353el,0xb9ab0310f8ee3349l,0x390032ce2561973el,
-            0xc07b6c6c8856b766l },
-          0 },
-    },
-    {
-        /* 0 << 240 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 240 */
-        { { 0x1083e2ea1f095615l,0x0a28ad7714e68c33l,0x6bfc02523d8818bel,
-            0xb585113af35850cdl },
-          { 0x7d935f0b30df8aa1l,0xaddda07c4ab7e3acl,0x92c34299552f00cbl,
-            0xc33ed1de2909df6cl },
-          0 },
-        /* 3 << 240 */
-        { { 0xabe7905a83cdd60el,0x50602fb5a1170184l,0x689886cdb023642al,
-            0xd568d090a6e1fb00l },
-          { 0x5b1922c70259217fl,0x93831cd9c43141e4l,0xdfca35870c95f86el,
-            0xdec2057a568ae828l },
-          0 },
-        /* 4 << 240 */
-        { { 0x568f8925913cc16dl,0x18bc5b6de1a26f5al,0xdfa413bef5f499ael,
-            0xf8835decc3f0ae84l },
-          { 0xb6e60bd865a40ab0l,0x65596439194b377el,0xbcd8562592084a69l,
-            0x5ce433b94f23ede0l },
-          0 },
-        /* 5 << 240 */
-        { { 0x860d523d42e06189l,0xbf0779414e3aff13l,0x0b616dcac1b20650l,
-            0xe66dd6d12131300dl },
-          { 0xd4a0fd67ff99abdel,0xc9903550c7aac50dl,0x022ecf8b7c46b2d7l,
-            0x3333b1e83abf92afl },
-          0 },
-        /* 7 << 240 */
-        { { 0xefecdef7be42a582l,0xd3fc608065046be6l,0xc9af13c809e8dba9l,
-            0x1e6c9847641491ffl },
-          { 0x3b574925d30c31f7l,0xb7eb72baac2a2122l,0x776a0dacef0859e7l,
-            0x06fec31421900942l },
-          0 },
-        /* 9 << 240 */
-        { { 0x7ec62fbbf4737f21l,0xd8dba5ab6209f5acl,0x24b5d7a9a5f9adbel,
-            0x707d28f7a61dc768l },
-          { 0x7711460bcaa999eal,0xba7b174d1c92e4ccl,0x3c4bab6618d4bf2dl,
-            0xb8f0c980eb8bd279l },
-          0 },
-        /* 10 << 240 */
-        { { 0x9d658932790691bfl,0xed61058906b736ael,0x712c2f04c0d63b6el,
-            0x5cf06fd5c63d488fl },
-          { 0x97363facd9588e41l,0x1f9bf7622b93257el,0xa9d1ffc4667acacel,
-            0x1cf4a1aa0a061ecfl },
-          0 },
-        /* 11 << 240 */
-        { { 0x28d675b2c0519a23l,0x9ebf94fe4f6952e3l,0xf28bb767a2294a8al,
-            0x85512b4dfe0af3f5l },
-          { 0x18958ba899b16a0dl,0x95c2430cba7548a7l,0xb30d1b10a16be615l,
-            0xe3ebbb9785bfb74cl },
-          0 },
-        /* 13 << 240 */
-        { { 0x81eeb865d2fdca23l,0x5a15ee08cc8ef895l,0x768fa10a01905614l,
-            0xeff5b8ef880ee19bl },
-          { 0xf0c0cabbcb1c8a0el,0x2e1ee9cdb8c838f9l,0x0587d8b88a4a14c0l,
-            0xf6f278962ff698e5l },
-          0 },
-        /* 15 << 240 */
-        { { 0x9c4b646e9e2fce99l,0x68a210811e80857fl,0x06d54e443643b52al,
-            0xde8d6d630d8eb843l },
-          { 0x7032156342146a0al,0x8ba826f25eaa3622l,0x227a58bd86138787l,
-            0x43b6c03c10281d37l },
-          0 },
-        /* 16 << 240 */
-        { { 0x02b37a952f41deffl,0x0e44a59ae63b89b7l,0x673257dc143ff951l,
-            0x19c02205d752baf4l },
-          { 0x46c23069c4b7d692l,0x2e6392c3fd1502acl,0x6057b1a21b220846l,
-            0xe51ff9460c1b5b63l },
-          0 },
-        /* 17 << 240 */
-        { { 0x7aca2632f02fc0f0l,0xb92b337dc7f01c86l,0x624bc4bf5afbdc7dl,
-            0x812b07bc4de21a5el },
-          { 0x29d137240b2090ccl,0x0403c5095a1b2132l,0x1dca34d50e35e015l,
-            0xf085ed7d3bbbb66fl },
-          0 },
-        /* 19 << 240 */
-        { { 0xc27b98f9f781e865l,0x51e1f692994e1345l,0x0807d516e19361eel,
-            0x13885ceffb998aefl },
-          { 0xd223d5e92f0f8a17l,0x48672010e8d20280l,0x6f02fd60237eac98l,
-            0xcc51bfad9ada7ee7l },
-          0 },
-        /* 21 << 240 */
-        { { 0x2756bcdd1e09701dl,0x94e31db990d45c80l,0xb9e856a98566e584l,
-            0x4f87d9deab10e3f3l },
-          { 0x166ecb373ded9cb2l,0xfd14c7073f653d3el,0x105d049b92aec425l,
-            0x7f657e4909a42e11l },
-          0 },
-        /* 23 << 240 */
-        { { 0xea6490076a159594l,0x3e424d6b1f97ce52l,0xac6df30a185e8ccbl,
-            0xad56ec80517747bfl },
-          { 0xf0935ccf4391fe93l,0x866b260f03811d40l,0x792047b99f7b9abel,
-            0xb1600bc88ee42d84l },
-          0 },
-        /* 25 << 240 */
-        { { 0x2d97b3db7768a85fl,0x2b78f6334287e038l,0x86c947676f892bb1l,
-            0x920bfb1ac0a9c200l },
-          { 0x4292f6ec332041b2l,0xa30bb937c9989d54l,0x39f941ebc6d5879el,
-            0x76a450fcdfdbb187l },
-          0 },
-        /* 27 << 240 */
-        { { 0x31256089ee430db6l,0xaece9bd8f6836f56l,0x484cfc4bfb85a046l,
-            0xee1e3e2c1599b2b9l },
-          { 0x7e3c38903d122eafl,0xaa940ce0c770556cl,0x4802d6631b08fae8l,
-            0xb08a85807f69f8bal },
-          0 },
-        /* 28 << 240 */
-        { { 0x70ed0a0405411eael,0x60deb08f16494c66l,0x8cf20fc6133797bbl,
-            0x3e30f4f50c6bc310l },
-          { 0x1a677c29749c46c7l,0xfe1d93f4f11e981cl,0x937303d82e3e688bl,
-            0x01aef5a7a6aa9e85l },
-          0 },
-        /* 29 << 240 */
-        { { 0x4902f495b959b920l,0x13b0fdbdfca2d885l,0x41cbd9e7b6a2f0fal,
-            0xf9bdf11056430b87l },
-          { 0xd705a223954d19b9l,0x74d0fc5c972a4fdel,0xcbcbfed6912977eal,
-            0x870611fdcc59a5afl },
-          0 },
-        /* 31 << 240 */
-        { { 0xf4f19bd04089236al,0x3b206c12313d0e0bl,0x73e70df303feaeb2l,
-            0x09dba0eb9bd1efe0l },
-          { 0x4c7fd532fc4e5305l,0xd792ffede93d787al,0xc72dc4e2e4245010l,
-            0xe7e0d47d0466bbbdl },
-          0 },
-        /* 33 << 240 */
-        { { 0x549c861983e4f8bbl,0xf70133fbd8e06829l,0xc962b8e28c64e849l,
-            0xad87f5b1901e4c25l },
-          { 0xd005bde568a1cab5l,0x6a591acf0d2a95bal,0x728f14ce30ebcae4l,
-            0x303cec99a3459b0fl },
-          0 },
-        /* 34 << 240 */
-        { { 0x62e62f258350e6bcl,0x5a5ea94d96adba1fl,0x36c2a2844a23c7b3l,
-            0x32f50a72992f5c8bl },
-          { 0x55d685204136c6afl,0x1aafd32992794f20l,0x69f5d820b59aa9bfl,
-            0x218966a8570e209al },
-          0 },
-        /* 35 << 240 */
-        { { 0xf3204feb2f9a31fcl,0x77f33a360429f463l,0xfb9f3a5a59a1d6a7l,
-            0x4445a2e93b1a78e0l },
-          { 0xc77a9b6fd58e32d3l,0xa44e23c8302e6390l,0x7d8e00b4c0f7bcb0l,
-            0xd2e2237b0ffa46f4l },
-          0 },
-        /* 36 << 240 */
-        { { 0xb3046cb13c8ea6d3l,0xf0151b5efce2f445l,0xa968e60b55e5715el,
-            0x39e52662587dce61l },
-          { 0xfde176e0b7de2862l,0x298d83e68e8db497l,0x1042136773641bfbl,
-            0xd72ac78d36e0bb0dl },
-          0 },
-        /* 37 << 240 */
-        { { 0x2cabb94fff6b8340l,0xf425a35a21771acbl,0x564fec3d12c4a758l,
-            0x57a61af39ba8f281l },
-          { 0x5807e78c97e9a71dl,0x991d9be75b8314e6l,0x1cd90b16ec4133b9l,
-            0xff043efa0f1ac621l },
-          0 },
-        /* 39 << 240 */
-        { { 0xea6e5527d7e58321l,0xfb95c13c04056ff1l,0x9447361f2fc4e732l,
-            0x63cbc655786d0154l },
-          { 0x302c0d668610fb71l,0xbf692d6920d06613l,0x8465b74b4be8355al,
-            0xcc883c95c31356b7l },
-          0 },
-        /* 40 << 240 */
-        { { 0x4ab6e919b33eabcal,0xb58f0998a1acacbfl,0xa747e5782ddbc28fl,
-            0xf9dd04ca59866cbcl },
-          { 0x084c062ff7a0073fl,0x6d22acdfb577fc38l,0x0870ee08eacd907cl,
-            0x710b4b266c9fcf95l },
-          0 },
-        /* 41 << 240 */
-        { { 0xa99546faf1c835a7l,0x1514a5a30d59f933l,0x1f6ad0f81bedd730l,
-            0x24de76287b528aaal },
-          { 0x4d9e7845c02fff87l,0xba74f8a942c79e67l,0x5bf5015f476e285bl,
-            0x0b1a5d8b1b93b364l },
-          0 },
-        /* 43 << 240 */
-        { { 0x8c7c0d7ff839819fl,0xc82b819827a95965l,0xce7294d377270519l,
-            0xfb508d6cad47aff7l },
-          { 0xf6de15431035076al,0x697d60ac5dd465c6l,0x88d771b8a76dcd26l,
-            0x8c7ce11ab10c9c44l },
-          0 },
-        /* 44 << 240 */
-        { { 0x215ea44a08216060l,0xccfa18a187996cf6l,0xccfb2483f7eccdd2l,
-            0x07aa601ad453c66al },
-          { 0xd43cf263cffee9e2l,0x230bc099718f69bfl,0xc43de21300c193e8l,
-            0x94cf251799c8746fl },
-          0 },
-        /* 45 << 240 */
-        { { 0x4785d7f87d1320c5l,0x84bed8c3d0771dcbl,0xff28044d22254edbl,
-            0x2e5992a445f71504l },
-          { 0xcb92695b72bbf5cdl,0x9bcbde35c42422e5l,0x856594fd1d07ed86l,
-            0x3aaf0b717716b4ffl },
-          0 },
-        /* 46 << 240 */
-        { { 0x3edf24f9eebed405l,0x9e3141360eccb503l,0xf7704c25b85c2bc2l,
-            0x4cb7c1de9a3247eel },
-          { 0x798ac8f2f0b507c5l,0x6e6217206851bbf1l,0xc0b89398c0d9ed16l,
-            0xf7d5d2a09f20728fl },
-          0 },
-        /* 47 << 240 */
-        { { 0x7358a94a19f0ededl,0x5e08c4c3e32ccfbbl,0x84a8eeeb0089f071l,
-            0xdaf0514c41fc436el },
-          { 0x30fe216f310309afl,0xe72f77bd564e6fc9l,0xe7ef3bddfdc59fd5l,
-            0xd199b1c9a8e1169cl },
-          0 },
-        /* 48 << 240 */
-        { { 0xb9dc857c5b0f7bd4l,0x6990c2c9108ea1cdl,0x84730b83b984c7a9l,
-            0x552723d2eab18a78l },
-          { 0x9752c2e2919ba0f9l,0x075a3bd94bf40890l,0x71e52a04a6d98212l,
-            0x3fb6607a9f18a4c8l },
-          0 },
-        /* 49 << 240 */
-        { { 0xa0305d01e8c3214dl,0x025b3cae8d51cea3l,0xeeaf7ab239923274l,
-            0x51179407c876b72cl },
-          { 0xcf0241c7d4549a68l,0xffae7f4c793dab3dl,0xdfb5917b4bdf2280l,
-            0xcf25c870a652e391l },
-          0 },
-        /* 51 << 240 */
-        { { 0xb1345466b922e1c8l,0xae42f46ab5bf8a34l,0x1e1ab6053310e604l,
-            0x64093cd9b4d7a658l },
-          { 0x5d3b385ab3d9242cl,0x2225b99ae56f8ec7l,0x19a8cbfc9a916e11l,
-            0x11c5df831f957c03l },
-          0 },
-        /* 52 << 240 */
-        { { 0x09f1d04af381147bl,0x7be13628b26b345fl,0xd8371966d1c60b78l,
-            0xf1743c2c5d91808fl },
-          { 0x8a2966acafc71cc3l,0x0ba9702efdfc24c3l,0x60c80158e6fbb539l,
-            0x58eaee49812c32f4l },
-          0 },
-        /* 53 << 240 */
-        { { 0x31af7f5ee89d0b84l,0xa776dada6caa110bl,0xd67b7891df6d54ddl,
-            0x831613cab82b8a5cl },
-          { 0x7a4eb86ef020af6dl,0x2914fd11bd795a7bl,0xc038a273fcb54a17l,
-            0x6b2dc8e18219cc75l },
-          0 },
-        /* 55 << 240 */
-        { { 0x031fc875464ba9b5l,0xe268cf45bd812dd3l,0x443f57defbfb664al,
-            0xfd1a38544e28c2fal },
-          { 0xb8799782cb96515bl,0xa12d3e3f1138c95dl,0x0cc5ee117748ee57l,
-            0x6ab167cf955a7dfcl },
-          0 },
-        /* 57 << 240 */
-        { { 0x0d54aaca4dc1c74fl,0x74af1807bf2e0d61l,0x151254f87aebe0f1l,
-            0x4072f38bf6376095l },
-          { 0x31ebe17a26646abfl,0xdc8cb6b40ecc1282l,0x4f6326bbbc095a66l,
-            0x37dad65a0363636dl },
-          0 },
-        /* 59 << 240 */
-        { { 0xc851860a70f8c15al,0xb2d4555488368381l,0xbfd46e197019c7b6l,
-            0xa1a9b12f6bb6f33bl },
-          { 0xecfd5fe6f170c82bl,0x6d58bb52d601afc3l,0xb8b3de15fe6eb102l,
-            0xad07336886a47964l },
-          0 },
-        /* 60 << 240 */
-        { { 0x89f514c91911840fl,0xc9fa6b504cc106bcl,0x70a97f0dfe55b4f1l,
-            0xada6306be5888609l },
-          { 0xa9437881c6dc8d15l,0x0fc0f5368411f3dfl,0xd26162087a913dd2l,
-            0x4fe1c7c4e92848cdl },
-          0 },
-        /* 61 << 240 */
-        { { 0xaa18eb262e07383dl,0xb948c35c34e90f3dl,0x95e97f81d3653565l,
-            0x4a821a2687b5b75dl },
-          { 0x87b4d81c892db882l,0xa69e65d689f3bfadl,0xe475f532eb371cacl,
-            0xd8cc23fa17194d5dl },
-          0 },
-        /* 63 << 240 */
-        { { 0x3fc0052ad789d484l,0xe8c67aac29324323l,0x133fd07cf54c43d3l,
-            0xd4a0848fb91d4faal },
-          { 0xf683ce065ea5098fl,0xe84348f9887c8a76l,0x38f8c2cf79b224b6l,
-            0x327e4c534a818cb1l },
-          0 },
-        /* 64 << 240 */
-        { { 0xb6d92a7f3e5f9f11l,0x9afe153ad6cb3b8el,0x4d1a6dd7ddf800bdl,
-            0xf6c13cc0caf17e19l },
-          { 0x15f6c58e325fc3eel,0x71095400a31dc3b2l,0x168e7c07afa3d3e7l,
-            0x3f8417a194c7ae2dl },
-          0 },
-        /* 65 << 240 */
-        { { 0x0c9e9237d5f812bcl,0xdae5b7e9595f02e5l,0x5ec1dece42b1e9a8l,
-            0x506a6ef8e527a685l },
-          { 0xe3049290236af251l,0x6322dd1bf81970acl,0x1459d39c516d5e61l,
-            0x672f502d9455b694l },
-          0 },
-        /* 71 << 240 */
-        { { 0xf83788e06b228af2l,0xaafc823911f596fal,0x6d47fa592f0fcb13l,
-            0x0b7af65f1c99c5d4l },
-          { 0xbc4c185dca961e6fl,0xec02b09f158481a4l,0x4bbfd9f31423fdd4l,
-            0x0ff44a53b619644bl },
-          0 },
-        /* 77 << 240 */
-        { { 0x23e255a3ea3f59d8l,0x1f4a47a8261ac30bl,0x346bf409c8faf0b3l,
-            0xd13e73fbc03a226bl },
-          { 0x670ddc792fe8a79bl,0x335fa172f1aac412l,0xe2347de1a5ceff20l,
-            0x66e02c73381130f2l },
-          0 },
-        /* 83 << 240 */
-        { { 0xa6b874c51db717cdl,0x027d318ab00f160bl,0x578f89f49be791afl,
-            0x659ef2f01f3b5e9bl },
-          { 0xa0c593033835d84cl,0xb71e261fdb6f9a60l,0x65837c7f44b7813fl,
-            0xea776163ea4bcc96l },
-          0 },
-        /* 89 << 240 */
-        { { 0x208234118df3f15fl,0xe0514d4694f341acl,0xdc66282d6486d704l,
-            0xd5fb354ad2548389l },
-          { 0xf3e98d72df273295l,0x27ded7fa50cd09fcl,0x4f486af3c5c1c169l,
-            0xe51044150aa41ba3l },
-          0 },
-        /* 95 << 240 */
-        { { 0x66b14d296fce0aecl,0x35fe5e60c8915ceal,0x06a023b736c5da39l,
-            0x0977c9f0404e932fl },
-          { 0x1dd6f95db54866del,0xe5ec79359387430cl,0x98dee57b5ef42e67l,
-            0x1707f01912ed3ad0l },
-          0 },
-        /* 101 << 240 */
-        { { 0xeb3abdedeec82495l,0x587a696e764a41c7l,0x13fdcce2add1a6a3l,
-            0x299a0d43286b2162l },
-          { 0x2c4e71e18131f1b4l,0x48f0e806ada3d04fl,0x91d2de80c57491b2l,
-            0x1b1266236cc355cbl },
-          0 },
-        /* 107 << 240 */
-        { { 0xdc28afe5a6d44444l,0xb5ad8d3cfe0b947bl,0x50c6126c96ce9fb9l,
-            0x5384a998d1fc7d39l },
-          { 0xa43ff8898788f51cl,0x30359593a6bc7b87l,0x3e1691dccc0d019al,
-            0xda0ef5ad7943abcdl },
-          0 },
-        /* 113 << 240 */
-        { { 0x5bc58b6f020b5cd7l,0x9098e202e103ff4el,0xc1f1a3d9f6fce7c7l,
-            0xf9dc32a856090ccel },
-          { 0x4c7d2520a9cc3b09l,0x98d47b5dd8c4dfcel,0xdcee788297e689b4l,
-            0xe5eec71815f982b9l },
-          0 },
-        /* 116 << 240 */
-        { { 0xff154bb8a1e1538cl,0xb9883276f7dcfae9l,0x1ac0a4d2c1c8cba4l,
-            0x511a54cc76e6b284l },
-          { 0xe2da436f00011f6dl,0x4d357a190f43a8adl,0xf36899c95458655bl,
-            0xe5f75c768d613ed9l },
-          0 },
-        /* 119 << 240 */
-        { { 0x15b4af1d93f12ef8l,0x3f4c5868fd032f88l,0x39f67a08f27d86bdl,
-            0x2f551820da32db6bl },
-          { 0x72fe295ac2c16214l,0x39927c381a2cf9afl,0x8dda23d6b1dc1ae7l,
-            0x1209ff3ed32071d4l },
-          0 },
-        /* 125 << 240 */
-        { { 0x861fdceb9a3c6c6fl,0x76d7a01386778453l,0xbf8d147cd5e422cbl,
-            0xd16f532e51772d19l },
-          { 0x72025ee2570d02cdl,0xe8e7737be80c7664l,0x81b7d56c334a8d8fl,
-            0x42477a0ff1b79308l },
-          0 },
-    },
-    {
-        /* 0 << 248 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 248 */
-        { { 0xf306a3c8ee3c76cbl,0x3cf11623d32a1f6el,0xe6d5ab646863e956l,
-            0x3b8a4cbe5c005c26l },
-          { 0xdcd529a59ce6bb27l,0xc4afaa5204d4b16fl,0xb0624a267923798dl,
-            0x85e56df66b307fabl },
-          0 },
-        /* 3 << 248 */
-        { { 0x896895959884aaf7l,0xb1959be307b348a6l,0x96250e573c147c87l,
-            0xae0efb3add0c61f8l },
-          { 0xed00745eca8c325el,0x3c911696ecff3f70l,0x73acbc65319ad41dl,
-            0x7b01a020f0b1c7efl },
-          0 },
-        /* 4 << 248 */
-        { { 0x9910ba6b23a5d896l,0x1fe19e357fe4364el,0x6e1da8c39a33c677l,
-            0x15b4488b29fd9fd0l },
-          { 0x1f4392541a1f22bfl,0x920a8a70ab8163e8l,0x3fd1b24907e5658el,
-            0xf2c4f79cb6ec839bl },
-          0 },
-        /* 5 << 248 */
-        { { 0x262143b5224c08dcl,0x2bbb09b481b50c91l,0xc16ed709aca8c84fl,
-            0xa6210d9db2850ca8l },
-          { 0x6d8df67a09cb54d6l,0x91eef6e0500919a4l,0x90f613810f132857l,
-            0x9acede47f8d5028bl },
-          0 },
-        /* 7 << 248 */
-        { { 0x45e21446de673629l,0x57f7aa1e703c2d21l,0xa0e99b7f98c868c7l,
-            0x4e42f66d8b641676l },
-          { 0x602884dc91077896l,0xa0d690cfc2c9885bl,0xfeb4da333b9a5187l,
-            0x5f789598153c87eel },
-          0 },
-        /* 9 << 248 */
-        { { 0xb19b1c4fca66eca8l,0xf04a20b55663de54l,0x42a29a33c223b617l,
-            0x86c68d0d44827e11l },
-          { 0x71f90ddeadba1206l,0xeeffb4167a6ceeeal,0x9e302fbac543e8afl,
-            0xcf07f7471aa77b96l },
-          0 },
-        /* 10 << 248 */
-        { { 0xcf57fca29849e95bl,0x96e9793ed510053cl,0x89fa443d07d3e75el,
-            0xfe2bc235e52800a0l },
-          { 0x1c208b8c0ac7e740l,0xb5852a49e7222263l,0x217e4005e541e592l,
-            0xee52747dc960b0e1l },
-          0 },
-        /* 11 << 248 */
-        { { 0x5fd7cafb475952afl,0x23a6d71954a43337l,0xa83a7523b1617941l,
-            0x0b7f35d412b37dd4l },
-          { 0x81ec51292ae27eafl,0x7ca92fb3318169dfl,0xc01bfd6078d0875al,
-            0xcc6074e3c99c436el },
-          0 },
-        /* 13 << 248 */
-        { { 0x4ca6bdebf57912b8l,0x9a17577e98507b5al,0x8ed4ab7759e51dfcl,
-            0x103b7b2a470f5a36l },
-          { 0x0c8545ac12553321l,0xab5861a760482817l,0xf4b5f602b9b856cfl,
-            0x609955787adf2e5fl },
-          0 },
-        /* 15 << 248 */
-        { { 0x60ce25b1ee5cb44fl,0xddcc7d182c2d7598l,0x1765a1b301847b5cl,
-            0xf5d9c3635d0d23b7l },
-          { 0x42ff1ba7928b65d0l,0x587ac69d6148e043l,0x3099be0dd320390bl,
-            0xa7b88dfc4278329fl },
-          0 },
-        /* 16 << 248 */
-        { { 0x80802dc91ec34f9el,0xd8772d3533810603l,0x3f06d66c530cb4f3l,
-            0x7be5ed0dc475c129l },
-          { 0xcb9e3c1931e82b10l,0xc63d2857c9ff6b4cl,0xb92118c692a1b45el,
-            0x0aec44147285bbcal },
-          0 },
-        /* 17 << 248 */
-        { { 0x7685bb9e0ba4e0b7l,0x330a7ebc5e58c29bl,0xbc1d9173e8a3797al,
-            0x7c506a16ea60f86cl },
-          { 0x9defb9248c099445l,0xcf1ddcc0256df210l,0x4844ce293d07e990l,
-            0x92318e37e2628503l },
-          0 },
-        /* 19 << 248 */
-        { { 0x61acd597fdf968d7l,0x7321a8b26598c381l,0xcb86a2809f448a0cl,
-            0x38534a01855df66al },
-          { 0xc119ec141e29037fl,0xe23c20ad0b42ba67l,0xefb1c4e033fb4f22l,
-            0xf088358f445a5032l },
-          0 },
-        /* 21 << 248 */
-        { { 0x2d73f5d1b8475744l,0xcc297e0a9d399b06l,0xa8c61d4038d3df06l,
-            0xacc6e8651a2d27a0l },
-          { 0x63dd6f6230153bf2l,0x6b23ad7bd73b83b7l,0x25382bf767ff7dcdl,
-            0x7e268c8fcf7ce2d1l },
-          0 },
-        /* 23 << 248 */
-        { { 0x4b9161c3cb2ebef1l,0x6009716b669ed801l,0x97c65219aacefe44l,
-            0xde13597d71aae4b5l },
-          { 0x3a077a816141d651l,0xe1b4e80129f876eal,0x729aed6d5c00c96cl,
-            0x0c6f404374cc645el },
-          0 },
-        /* 25 << 248 */
-        { { 0x22c51812df5a66e1l,0x1c8069c9ae7dedeal,0xcff9d86f0eea5180l,
-            0x676dbd6f44235ddal },
-          { 0xa53f01383db1ad42l,0xd079e571bcf19029l,0x1e37b9ecfab0cf82l,
-            0x93ae35ed4844e9c4l },
-          0 },
-        /* 27 << 248 */
-        { { 0xdaee55a543756358l,0x0ace18d41b2d3f89l,0x3391fa36824dd7d4l,
-            0x7b9963d1770e5f3fl },
-          { 0xc1fb9a78c94f724dl,0x94ff86fe76c4da6bl,0xb5d928c64170609bl,
-            0xc9372becfb015a9fl },
-          0 },
-        /* 28 << 248 */
-        { { 0x9c34b650e16e05e9l,0x965a774094e74640l,0xa3fd22fbcea3f029l,
-            0x1eb6a9688f95277cl },
-          { 0x2520a63d7bad84f6l,0xad917201f58f2feel,0xea92c1669b840d48l,
-            0x12109c4aacef5cbdl },
-          0 },
-        /* 29 << 248 */
-        { { 0xd85850d0d407a252l,0x6fa3b14de63909d4l,0x2ff9f6593e0fba69l,
-            0x7f9fd2a2d1b2cd0bl },
-          { 0x611233d745ad896al,0xfe4211648df850f9l,0x7808832399e32983l,
-            0x4b040859dee6741dl },
-          0 },
-        /* 31 << 248 */
-        { { 0x7dd2afd456e1ed5cl,0xd48429ec41ba4992l,0x97a02188968bab27l,
-            0x09ecf813e63c4168l },
-          { 0xf4ac65e77288b10cl,0x10630ab2afac7410l,0x4e3e59c3bb049e56l,
-            0x25972fff40fea0b1l },
-          0 },
-        /* 33 << 248 */
-        { { 0xfd8363da98365c18l,0x8aa57b1a8d47bf91l,0x423dce57695f4dd6l,
-            0xfccf54d4cc17f034l },
-          { 0x8fdba27c3610ea51l,0xcc0a06d654306b06l,0xb97a121c389b9dfdl,
-            0x7dbb90eb1ed0ca42l },
-          0 },
-        /* 34 << 248 */
-        { { 0xd32d7cec0094e84cl,0x862ae25e2ece8f72l,0x8644ef1cdfceb8abl,
-            0x68a9969c8e225628l },
-          { 0xdf209e27b3117876l,0x308a6e1882ba242bl,0xcbd09a659bf0cdb6l,
-            0x79f2826cc85b9705l },
-          0 },
-        /* 35 << 248 */
-        { { 0x3b36b6bf8f011496l,0xea6acc1a9bcf6ef8l,0x6db132263b101f12l,
-            0x4fc4e35e3b7585c3l },
-          { 0x641de27556eb64c6l,0x9b2834d3f3b08519l,0xebb76a2ba1f44b40l,
-            0x1b545ccd3cd31677l },
-          0 },
-        /* 36 << 248 */
-        { { 0xab293027aad991c1l,0x598d0bf8849be4b7l,0x8c94a21ab972da90l,
-            0xada4cfdd7ecfa840l },
-          { 0x93d4b9c0fbcec63al,0x7ca617a203219a34l,0x900424eb6a652a55l,
-            0xaf9346e9eb8562e0l },
-          0 },
-        /* 37 << 248 */
-        { { 0x9681a73d2d8bc904l,0x8b5f9b317b1553bel,0xfb03b874f6bc852fl,
-            0x8e658fb8cbbec8b0l },
-          { 0x9b2ff17bb9e9f9d1l,0xf46e9bf3e8679854l,0x7fbb1323618ed3aal,
-            0x064a1c5d714ebc3dl },
-          0 },
-        /* 39 << 248 */
-        { { 0xac0bdfc39f0e69dcl,0x71957386ae12f132l,0xa263ef2e6aa90b5bl,
-            0xa94b152390d42976l },
-          { 0xfb2d17741bcdbf7bl,0xba77b77c3a04f72fl,0xa6818ed8ec3e25a1l,
-            0x2e0e01743733e251l },
-          0 },
-        /* 40 << 248 */
-        { { 0xc3e04d7902381461l,0xb1643ab5911bc478l,0xc92becfa390b3ef2l,
-            0x54476778acd2f1b6l },
-          { 0x8daa0c4d66bf3aafl,0x2bc1287b2c21c65al,0xee182910b5a13ac3l,
-            0xbb04730090b0790al },
-          0 },
-        /* 41 << 248 */
-        { { 0x8bdd6f35a8540489l,0x788c03e5ee390d4el,0x203323c18f653017l,
-            0x39953308c4bc0094l },
-          { 0x6ee0857118308d0bl,0x70e9f90b450b0002l,0x191662aa8139f145l,
-            0xd7c5415b62d71124l },
-          0 },
-        /* 43 << 248 */
-        { { 0x41b37d72b927231cl,0xca17b5429e4de13al,0x7bc03469cded2ce3l,
-            0x961b0ecb4f4560f9l },
-          { 0x7c5bd41b43d31fa1l,0x3ed047f643f44dc3l,0x5b02083efe1a4d14l,
-            0xcc2c66ac18b330bcl },
-          0 },
-        /* 44 << 248 */
-        { { 0x83766947d17d4e0bl,0xc5772beefdc3a47bl,0x765a50db1a6fd0ffl,
-            0x17f904ba45b0995el },
-          { 0xcee643832883487el,0xf56db7f3c270aaedl,0x6738d94f46cb1fd9l,
-            0xc8fa426a142fd4d5l },
-          0 },
-        /* 45 << 248 */
-        { { 0xc85bef5b5a78efcel,0xaf380c6b0580e41el,0x6c093256a43b8d9bl,
-            0xed9d07bbea670933l },
-          { 0xfdb9a295f1682c6el,0x4cc29a63532b6bb7l,0x21a918f9f8e42dd0l,
-            0x9ac935ce0edacca0l },
-          0 },
-        /* 46 << 248 */
-        { { 0xbfe48a8ff43daf9dl,0xd7799b31b313c052l,0x46d480d77119c60el,
-            0x5090d91f0b80bcb9l },
-          { 0xc94c4c1e873bd7bfl,0x16e69b4f9915aa0al,0x769be02bb1d5928cl,
-            0x3fdaf62162e1d85al },
-          0 },
-        /* 47 << 248 */
-        { { 0x03497a57371c1b5cl,0x11e4c0b3552ab6abl,0xf857061f0a169ee7l,
-            0xc21c6c43e6d1bc66l },
-          { 0x706283a82832be7al,0xd35b143299aba62cl,0x7f4da83de9aef62dl,
-            0x2b7e5fc8723fa4e5l },
-          0 },
-        /* 48 << 248 */
-        { { 0xae485bb72b724759l,0x945353e1b2d4c63al,0x82159d07de7d6f2cl,
-            0x389caef34ec5b109l },
-          { 0x4a8ebb53db65ef14l,0x2dc2cb7edd99de43l,0x816fa3ed83f2405fl,
-            0x73429bb9c14208a3l },
-          0 },
-        /* 49 << 248 */
-        { { 0xc086e737eb4cfa54l,0x9400e1ad3c44aad9l,0x210bba94336959b4l,
-            0x08621a809106f0cal },
-          { 0x2ae66096c510ee9cl,0x2ba21617fc76a895l,0xc0707f8b0c186f1el,
-            0x1fe170a3ed0bfe25l },
-          0 },
-        /* 51 << 248 */
-        { { 0x3780fe2084759c5cl,0x716ec626b7050aa7l,0x6a43fb8b84b63bd1l,
-            0xb01098a039bc449fl },
-          { 0x96b3ff8ebb7daa4dl,0x2d146882654a7f01l,0x2500f701dcae6143l,
-            0xc13d51d01626fd3bl },
-          0 },
-        /* 52 << 248 */
-        { { 0x08ed8febd56daf06l,0x8d98277b4a837f69l,0x9947c636a9b6e05al,
-            0x58c8a77ac0d58abdl },
-          { 0xf45496a45f121e4fl,0x16cd67c71076d3d3l,0xecbd1958e3fb0c5dl,
-            0xfbe185ec38e1eb47l },
-          0 },
-        /* 53 << 248 */
-        { { 0x65b067eb740216e3l,0x1e19a71479db8760l,0x8d30dca18878de5al,
-            0x627d03e8aa47c005l },
-          { 0x096d58c0d2536c96l,0x232e6a4d69b12c2al,0x850eb8c0e7044bcel,
-            0xd9cf923bef2ee9a1l },
-          0 },
-        /* 55 << 248 */
-        { { 0x8b301094c8eaee90l,0x9a96950b8330928fl,0x472ba105faccc3bal,
-            0x00f8620e9153172al },
-          { 0x019b8164303fcdf5l,0x614d5c3c41fb4c73l,0x632d98f2c5992f89l,
-            0xfbeb29d790e2dea5l },
-          0 },
-        /* 57 << 248 */
-        { { 0xefd48b577f91d6e0l,0x8575605595bcf5d4l,0x7677b4a7bb9d891bl,
-            0xdc9931e9685912c9l },
-          { 0x69bca306f31a07c8l,0x3dd729534962a7f0l,0xdcea49cc9d366c2al,
-            0xce664ba7dc79a57dl },
-          0 },
-        /* 59 << 248 */
-        { { 0x7842d547013ec3b5l,0xa2785ceb433cf990l,0x9d667e5f700ab14al,
-            0x4b46f362a0f46d55l },
-          { 0x152c0e80cc7a3487l,0x7f3a88cef86f5e68l,0x6f950a73f1b2a75fl,
-            0x9be5b1aa51d24f3bl },
-          0 },
-        /* 60 << 248 */
-        { { 0xaea68626dc4ad4f4l,0x5dc516824ddbc0b6l,0xa76697bd602e9065l,
-            0xbeeb3ea58c37888el },
-          { 0x1ec4a2f214569113l,0xe48b820ca35f4484l,0x9fb560949ae44df2l,
-            0x6ca1346292cc09fdl },
-          0 },
-        /* 61 << 248 */
-        { { 0x887e0b87bcdc3a36l,0x6b0d617d503dee65l,0x96bda1f6cebcb893l,
-            0xdc0dd17341e20b3el },
-          { 0x812fbacfa6657c11l,0x32492fcbc94a6f4bl,0x854a0bcb6a772123l,
-            0x1ed573f65d463f31l },
-          0 },
-        /* 63 << 248 */
-        { { 0x22c7ef7bd022cc4dl,0xeec383d61e63b4bcl,0x52e0aaa06502b46fl,
-            0x9224187ded5e41bfl },
-          { 0x3a01f53dd26faf1cl,0x9bc4ee2e4e591d10l,0x10b7a98eea7e4c88l,
-            0xe521c150e2c1beccl },
-          0 },
-        /* 64 << 248 */
-        { { 0xb618d590b01e6e27l,0x047e2ccde180b2dcl,0xd1b299b504aea4a9l,
-            0x412c9e1e9fa403a4l },
-          { 0x88d28a3679407552l,0x49c50136f332b8e3l,0x3a1b6fcce668de19l,
-            0x178851bc75122b97l },
-          0 },
-        /* 65 << 248 */
-        { { 0x26f9b9322ed53a71l,0x0bac7348c72ef2e0l,0x7e96001da5c6faf1l,
-            0x5d43f76dea00eb2dl },
-          { 0x1327370f44f1c478l,0x1c83a9ac6bb964c8l,0xa3a9769f76ffbd25l,
-            0xdf045fb6b04f1bddl },
-          0 },
-        /* 71 << 248 */
-        { { 0x4283898d556b975el,0x6e2301ffe3880361l,0xc6d3b2bbe9198077l,
-            0xc4799578d21cac02l },
-          { 0x11448ff8f784eb7cl,0xb775973fbb81898dl,0x4e51f061519c76b9l,
-            0xaba1f3ef3cad0393l },
-          0 },
-        /* 77 << 248 */
-        { { 0x59d60c1c9b339830l,0x5af60a44ac32746dl,0x5ac006bc9dea8d80l,
-            0x4a2a56d97f2b1180l },
-          { 0x2032845a46946fc4l,0xe25b911226a3b503l,0xfed89db9a28827d3l,
-            0xdd2d7e90c6b74593l },
-          0 },
-        /* 83 << 248 */
-        { { 0x9b047a26cda38ecfl,0x6889284f5f6cb442l,0x4d128bcb14753820l,
-            0x8f9937c160eedd78l },
-          { 0xe333bad751ab9127l,0xd31b01c67ace3b19l,0x0732de39d7c0b4bel,
-            0xad04fa4c649e2b9bl },
-          0 },
-        /* 89 << 248 */
-        { { 0x02e042689d1495bal,0x95dca5a85591b5f8l,0xb10488d856f46c71l,
-            0x97829baf3590000al },
-          { 0xaeda5cb378c9e78al,0x3615873a7ba1c71cl,0x7c9f9f4d4333aa12l,
-            0x893fab42cea8e6d3l },
-          0 },
-        /* 95 << 248 */
-        { { 0x9eb09fff69aaa09fl,0xf36678a926731322l,0x8be61ee1cafcabafl,
-            0x77a172f558ddb763l },
-          { 0x7e09dfc66471130el,0x7f8909791039771el,0x0e44071d37800b9bl,
-            0x09123d27fe762d10l },
-          0 },
-        /* 101 << 248 */
-        { { 0xffd455a7a1b7fdd6l,0xb6162cb4dabdffael,0xf859519ec89c0e56l,
-            0x07406c1b421f2846l },
-          { 0x42db24ed9e96ddbbl,0x03bcae092dc5da85l,0x75099cd217aa7493l,
-            0x8cd1aa4266b8740dl },
-          0 },
-        /* 107 << 248 */
-        { { 0xe94333d5dde7fec3l,0x894fd673745a9be3l,0xaf3d97c725683748l,
-            0xeaa469a2c9ec165fl },
-          { 0xc9a18decdc7abd3bl,0xf059008082717b02l,0x9816374a4fdf4300l,
-            0x449d3eb74fb5a6cel },
-          0 },
-        /* 113 << 248 */
-        { { 0x7fc983ebd28001a6l,0xeabf5276dae74b6bl,0x50adb67d742ed0a5l,
-            0x1d2ad363650e1446l },
-          { 0x5a564253d122f5d0l,0x7e5aefc7e30471del,0xdc64cbb3e5dc2f2cl,
-            0xe645b9fa9437be4el },
-          0 },
-        /* 116 << 248 */
-        { { 0x0f58cec54e27d357l,0x08dcf2b70004539el,0xb1ead64104f96709l,
-            0x350fed185a914c72l },
-          { 0x44f43523c5147854l,0x45f8b46f46d04ac7l,0x62c306869a449d51l,
-            0xaacc0f0d9e66d9a3l },
-          0 },
-        /* 119 << 248 */
-        { { 0x94cb62e5bdd61b63l,0xe6ce5b5104a0ec57l,0x0461cb95f0bda8a4l,
-            0xca2d6220cbadfe8fl },
-          { 0x6c19bdf03c1ad65el,0x774a49bae04239d5l,0xf78cb7404a2fd59dl,
-            0xaebf90ed66a09130l },
-          0 },
-        /* 125 << 248 */
-        { { 0x10e4074857cc8d54l,0x29985831918e3cf9l,0x3d87def9f2e344eel,
-            0x8899992c68977860l },
-          { 0xbdc8d73b210f3c50l,0x98aa042fa9857f46l,0x76a34daf6c71357fl,
-            0x086289d3200bcb6dl },
-          0 },
-    },
-    {
-        /* 0 << 256 */
-        { { 0x00, 0x00, 0x00, 0x00 },
-          { 0x00, 0x00, 0x00, 0x00 },
-          1 },
-        /* 1 << 256 */
-        { { 0xb4e370af3aeac968l,0xe4f7fee9c4b63266l,0xb4acd4c2e3ac5664l,
-            0xf8910bd2ceb38cbfl },
-          { 0x1c3ae50cc9c0726el,0x15309569d97b40bfl,0x70884b7ffd5a5a1bl,
-            0x3890896aef8314cdl },
-          0 },
-        /* 3 << 256 */
-        { { 0x996884f5903fa271l,0xe6da0fd2b9da921el,0xa6f2f2695db01e54l,
-            0x1ee3e9bd6876214el },
-          { 0xa26e181ce27a9497l,0x36d254e48e215e04l,0x42f32a6c252cabcal,
-            0x9948148780b57614l },
-          0 },
-        /* 4 << 256 */
-        { { 0xab41b43a43228d83l,0x24ae1c304ad63f99l,0x8e525f1a46a51229l,
-            0x14af860fcd26d2b4l },
-          { 0xd6baef613f714aa1l,0xf51865adeb78795el,0xd3e21fcee6a9d694l,
-            0x82ceb1dd8a37b527l },
-          0 },
-        /* 5 << 256 */
-        { { 0x4a665bfd2f9fd51al,0x7f2f1fe2481b97f7l,0xcad05d69ad36ce50l,
-            0x314fc2a4844f4dedl },
-          { 0xd5593d8cb55fc5c6l,0xe3510ce8bfb1e23dl,0xf9b7be6937453ccel,
-            0xd3541b7969fae631l },
-          0 },
-        /* 7 << 256 */
-        { { 0x711b8a4176a9f05dl,0x06ca4e4b9011d488l,0x543bc62ba248a65el,
-            0x017535ffc9290894l },
-          { 0x840b84ce406851d7l,0xafa3acdf90e960b4l,0xac3394af7128fd34l,
-            0x54eb4d5b2ac0f92cl },
-          0 },
-        /* 9 << 256 */
-        { { 0x3549a0f14df48fecl,0x6ae7b1eec239f83al,0x001dcf253eb90ff3l,
-            0x02ff0f02581e90edl },
-          { 0x72921d8ca103dcefl,0x2c513c3c5876293el,0xc07064ca6b68875el,
-            0x7198d44653b9537cl },
-          0 },
-        /* 10 << 256 */
-        { { 0x58349b77685e089bl,0x1c678441219b7b8cl,0xba8da91f61e2e20dl,
-            0xf9c50b8c309fd4e6l },
-          { 0x99b0164996d0ef64l,0xac334ded60cdb63al,0x6b9ada19fb0bce4fl,
-            0x39dc9375c7896377l },
-          0 },
-        /* 11 << 256 */
-        { { 0x068dda8b7e1bc126l,0x77c7c58176243a21l,0xcc8ba55c875f9dael,
-            0xdde7afe2ce469f95l },
-          { 0xde2a15f5e9523b85l,0x447512c6d85674ael,0x5691f89e12c6c20cl,
-            0xd64ef40e0fae4513l },
-          0 },
-        /* 13 << 256 */
-        { { 0x10db2041c4d9eb40l,0x420eccb724f03f8al,0x64470fd17d29080el,
-            0xf66c5b4416e52414l },
-          { 0xa32cc70e4ca94031l,0xa67931592c8401bal,0x34f2dc29abfcc58dl,
-            0x6f340f9a07325d7dl },
-          0 },
-        /* 15 << 256 */
-        { { 0xf55d446b060a52bbl,0x2f33cb9f02939f24l,0x0f27a01bc8953718l,
-            0x362882917fcd3932l },
-          { 0x7485613488ed4436l,0xcfe69e27195f089el,0xd6ab040a8ff10bd8l,
-            0x9741c5472e4a1623l },
-          0 },
-        /* 16 << 256 */
-        { { 0xc52d8d8b6d55d6a4l,0xc4130fb3be58e8f9l,0x5f55c345e1275627l,
-            0xb881a03c117042d3l },
-          { 0x00a970a53238d301l,0x40d7cf2412a2c4f1l,0xace4a2f5d770ea74l,
-            0x36a2e587e96940b2l },
-          0 },
-        /* 17 << 256 */
-        { { 0x84793d9fef12d4c8l,0x04b89b152d8a163cl,0x0fdb566fb4a87740l,
-            0xf7e6e5cf9e595680l },
-          { 0xbcb973e41c5cd74el,0xafcb439fe4ed49d8l,0xd5c0820aebbae8eel,
-            0x23483d836f56e2a2l },
-          0 },
-        /* 19 << 256 */
-        { { 0x91f9b8be5e8ad115l,0xf1fd6a2e225db496l,0xf362d2cf4a444085l,
-            0x033d9201eea043ebl },
-          { 0x1e50c0989951a150l,0x4814fca5cfcf1f94l,0xaf3e8ef41bf82de5l,
-            0xba0e2991038cff53l },
-          0 },
-        /* 21 << 256 */
-        { { 0x904a41ae5fc373fal,0x235556d61a6a3fc4l,0xe44eb3ea36eeb570l,
-            0xa4e1b34a26ba5ca6l },
-          { 0x210e7c9131180257l,0x2c28669622158b0cl,0xc78b69c783ddd341l,
-            0xfc05941b294e1750l },
-          0 },
-        /* 23 << 256 */
-        { { 0x70666f51fc167dedl,0x47e9e289fe75b8d1l,0x8a5f59739605a03el,
-            0x19876a58dd579094l },
-          { 0x69a5c8cca964e426l,0xed74a652ccf20306l,0x5c93ae3cf06d31d5l,
-            0x51922fa2127a8a12l },
-          0 },
-        /* 25 << 256 */
-        { { 0xa18e26f99e3d509el,0xbc296dd2c10814fal,0x5dadd6eeaa24e147l,
-            0xdba2121a8340f12el },
-          { 0xd348e7f3e245ca21l,0x1e45a42978e3eb5bl,0x252bf89c169677bbl,
-            0xfb33a2564021ac55l },
-          0 },
-        /* 27 << 256 */
-        { { 0x30dc46586e7d72b8l,0x38df46fb0d81c3d6l,0x901bab6e10e84162l,
-            0x25d7303ff7932801l },
-          { 0xe781d5f37500be42l,0x9a7104c3380ff208l,0xfa801181652121a1l,
-            0xef89f4f18d3bed43l },
-          0 },
-        /* 28 << 256 */
-        { { 0xbe4ae5683594917al,0xef7c1c47a04bf81el,0xa1dc3612046d91a0l,
-            0x3eee37affb11b338l },
-          { 0x7e90278fd03d8f51l,0x3045a6da4fa183c6l,0xb39e573391cd16a9l,
-            0xc748a504e54e9411l },
-          0 },
-        /* 29 << 256 */
-        { { 0x07804331a1c6ec56l,0x25358e795b347123l,0x1ab9b39acf9432a4l,
-            0x9628501d0a7881cel },
-          { 0x749d58988a46d98el,0x01ea43346a17c321l,0xe2b197f9b1f9160fl,
-            0x2052c7c07815f2a2l },
-          0 },
-        /* 31 << 256 */
-        { { 0xaa691bfbc57a1a6dl,0x06cae127d737d525l,0x5be04b2f963c7c98l,
-            0x936b1f5bfc00bc4al },
-          { 0x3fed4ac77eda6a34l,0xba6ca7aa2500a438l,0x1e979fa6786c2a75l,
-            0xa3db26bec13f37d4l },
-          0 },
-        /* 33 << 256 */
-        { { 0x20afae333d7006d1l,0xdcbca6fbbda467d1l,0x2714b3827df4006cl,
-            0x9abc0510c8e94549l },
-          { 0x5b30a6d464c14915l,0xba91d0c35752b44fl,0x7ad9b19bbb389f1fl,
-            0xe4c7aa04ef7c6e13l },
-          0 },
-        /* 34 << 256 */
-        { { 0x1e24a3f23d12e2b6l,0xf99df403febd6db3l,0x61e580a6b0c8e12fl,
-            0x819341b7c2bfe085l },
-          { 0xd53002d640828921l,0x31e1eb65cea010efl,0xc48d0cfe85b3279fl,
-            0xb90de69089f35fa5l },
-          0 },
-        /* 35 << 256 */
-        { { 0xa3f6fd3c88ed748fl,0x6d72613af48127b9l,0xe85ed703d1e6f7e5l,
-            0xbb563db449636f40l },
-          { 0x23bae3c9708497bal,0x89dbff163aa65cf4l,0x70861847e6c0850al,
-            0x5ef19d5d48b2e90cl },
-          0 },
-        /* 36 << 256 */
-        { { 0xab6a1e13107f7bacl,0x83a8bc57972091f5l,0x3c65b454f6dcba41l,
-            0xd7606ff96abc431dl },
-          { 0xa3af9c189bd09971l,0x6ddd3bbf276bad63l,0xd2aba9beab4f0816l,
-            0x8f13063c151581edl },
-          0 },
-        /* 37 << 256 */
-        { { 0xf9c02364f5761b15l,0x3cfa250afd478139l,0x67d51e7416e26191l,
-            0x0281bbf65eda396cl },
-          { 0xbd38d4d70d1f4510l,0x2032a930edff593el,0x0ab74a0cf2ea4ad7l,
-            0xb95aa9c3302498d6l },
-          0 },
-        /* 39 << 256 */
-        { { 0x2995495dd7da3c7cl,0x28d579d0a0bb703el,0xabec6afec8288837l,
-            0x93c34dfd05ab989bl },
-          { 0xcc94f05dde5ea3dfl,0xc3e3d4ef90f436e6l,0x32b3dee1cf59dc4el,
-            0x5eab01635447d9d9l },
-          0 },
-        /* 40 << 256 */
-        { { 0xd31c5e8e2c23464el,0x5bcc382f50cfbde7l,0x6cee3d8da93c3d9bl,
-            0xbee2948909ee62acl },
-          { 0x4848d59c10742b84l,0x2486796fe35e9c84l,0x1a1d9570cd8f391al,
-            0x839aa0913eedb743l },
-          0 },
-        /* 41 << 256 */
-        { { 0xae02a7ce0f83f369l,0x3b67c56097994835l,0x715def441ae4bbeal,
-            0x11e764ee59f6b9eel },
-          { 0x70c775051c962c3al,0x42811507d937a258l,0x06dbdceed03e6e86l,
-            0x39a3a7ed48cae79el },
-          0 },
-        /* 43 << 256 */
-        { { 0xa32e729fb220eef8l,0x12d876baf37ac5d7l,0x9376ab45105a7f34l,
-            0xb422331a4deb7275l },
-          { 0x6ea07fb7686dea5el,0xba67ed3e1d8e32c9l,0x5ae52632bbc6bb9cl,
-            0xdca55b86d1397575l },
-          0 },
-        /* 44 << 256 */
-        { { 0xd9183f74378200b1l,0xe5ea1645762f5605l,0x78b42e2f7bd6290fl,
-            0xa0bdfccc07fa0899l },
-          { 0x2f92ea52dacda629l,0x810b4e6c48de27e2l,0x013d8587d9d1250dl,
-            0xc153d519dd5141d5l },
-          0 },
-        /* 45 << 256 */
-        { { 0x8f1f6cb5b8f1d719l,0xa9abc27b04e15a4el,0xc0d944a92ad42296l,
-            0x69ecc877f3d2b0e5l },
-          { 0xec60dbea16a5581al,0x2a0ead5fb85130d6l,0x7b3d2ebb6fddac23l,
-            0x06213269ac448663l },
-          0 },
-        /* 46 << 256 */
-        { { 0xe1074008ac11e180l,0xdff3339c14b8f830l,0x136e22be636504f3l,
-            0xb07ae98aa09c5c4cl },
-          { 0x9b0a0517192168e9l,0x39e09fac86ad0865l,0x24f90705adb08d41l,
-            0x9c699cc759d3be24l },
-          0 },
-        /* 47 << 256 */
-        { { 0xd9e16551907e36b0l,0x57f24b6caf91cb5al,0xbdb7dfdb062edae4l,
-            0x99e3bffe4b85f424l },
-          { 0x250774f4b2961ba7l,0xe7c0f2386d993c51l,0xcd0aae29f559b4bdl,
-            0x3b12893a09a6859bl },
-          0 },
-        /* 48 << 256 */
-        { { 0xac177eb985ae12c3l,0x8e6cb5cc6cf76537l,0x134abb19f265f9e3l,
-            0xc37309b71ba3f55dl },
-          { 0x570833b4392d564bl,0xaa273a27d8c22f00l,0x9ba6b6276006773al,
-            0x2156c94f0a16c092l },
-          0 },
-        /* 49 << 256 */
-        { { 0x2be0436b408e1258l,0xb179a2e34f47f121l,0x140b948fa42d3cfcl,
-            0x96649c6700d2b4e6l },
-          { 0x2bf934c7d08a4b34l,0x371c770136b472ddl,0x36297876e06adc73l,
-            0x59e0d8251c3e6558l },
-          0 },
-        /* 51 << 256 */
-        { { 0x9368cfd304a8bc81l,0x145249d4c49e58c7l,0x8c7ac1891392be01l,
-            0x58cbcb5fbc7b0903l },
-          { 0x502218a1a0377b0al,0x5c17eb8afb625836l,0x845c09ef349f4d26l,
-            0x15fdeb2554ddce85l },
-          0 },
-        /* 52 << 256 */
-        { { 0xf773535a64e8344dl,0xb8486a33d0dbabe6l,0x43c2df99b578862dl,
-            0xcead29a11a39820el },
-          { 0x3e5466fe63134d63l,0xc37ea88fdf43a104l,0x3b34ac34bbaacb5al,
-            0x8281c240bc20be5al },
-          0 },
-        /* 53 << 256 */
-        { { 0x55113d5e0f8dec77l,0xdfe59f251d7e1543l,0x3b2837e0a63a849al,
-            0xdfbdb8b67a5691afl },
-          { 0x8dd6faf0bd4cf444l,0x28b2bdfaab128b6cl,0x44af3ee24b1098ebl,
-            0xbbf328ebe50b2d02l },
-          0 },
-        /* 55 << 256 */
-        { { 0xf231b1f4e4e6151al,0x6ac7130413258c6al,0x6f9cb1c1a09b9f86l,
-            0xbfc9291ee52ed880l },
-          { 0x2a7d8230bea258a2l,0xd52a0da6baf386acl,0x5166764b3af00b7el,
-            0x84792b043c985be2l },
-          0 },
-        /* 57 << 256 */
-        { { 0x914ca588a906d9e4l,0xb4e4e86abc27a876l,0x97e6ed27724324f2l,
-            0xda7e9aa5c0b87d2cl },
-          { 0xafccbe6b33a56f84l,0x69e8fd4ac892d90al,0xb47512910bb5457fl,
-            0xad65e4d05cb136fal },
-          0 },
-        /* 59 << 256 */
-        { { 0xb09974d2fd679a1bl,0x17abc2a54578faf0l,0xe7da92828c830388l,
-            0x7e455d8b0edf6146l },
-          { 0xdff3b2f0c324bdb6l,0xe7a1718769f4a4f9l,0xfb4e0b3129c500a4l,
-            0x1ed50799a09c5a07l },
-          0 },
-        /* 60 << 256 */
-        { { 0x6b669496c679d9f9l,0x3b741f36e78f0830l,0xf99d4857eb3f9e53l,
-            0x41be594276f7d4ael },
-          { 0x75f44d57c09a112bl,0xa5139fd68475eeb7l,0xa4560cd5c6bc9df6l,
-            0x8ce2c4cf50845434l },
-          0 },
-        /* 61 << 256 */
-        { { 0x96b515c32b3cb0a6l,0x65836de3930d5344l,0xfb032d5b00e6d403l,
-            0x2648301843c93bd1l },
-          { 0xfc4525dd4b572363l,0x12b7923e7b28ab5cl,0xf376b633e22ac5e6l,
-            0xd6ff6582e30b4707l },
-          0 },
-        /* 63 << 256 */
-        { { 0x8bdce75c83b09e07l,0x64228b19227717c4l,0xeae8f8a2dc6a1f02l,
-            0x1081031be72f3b6dl },
-          { 0xba0f876072c3f736l,0xde38a0c5246a28adl,0x0b116fe08596c412l,
-            0xb9e37be3fa135d11l },
-          0 },
-        /* 64 << 256 */
-        { { 0x09800dc1b48d4168l,0xa740b282bfee87a2l,0x80c6b75dc94a547al,
-            0x8cb622f0099c1985l },
-          { 0xe6c789631467e05dl,0x027b658822fd3064l,0xe14735e2c2fdb68cl,
-            0xfd2869947d853158l },
-          0 },
-        /* 65 << 256 */
-        { { 0x301916a5bbd7caf1l,0xef563fda4e2076c2l,0xccbc56088467f279l,
-            0xd7de3088b8d0f1bfl },
-          { 0x3d9adcce8586910dl,0x3fa3b8b9d775e0e9l,0x4b7a4a1d88136503l,
-            0xc748656de4994fcel },
-          0 },
-        /* 71 << 256 */
-        { { 0x18cc605c2d9f8646l,0x3764f1c29e441b64l,0xb0ea7f7fc4b64ee3l,
-            0xb5c22d0c042f8678l },
-          { 0x3761f7f89b3057fdl,0xc85b8de64a207ce4l,0x11da715bc5c04cf7l,
-            0x0cb1fa77c8e99c1fl },
-          0 },
-        /* 77 << 256 */
-        { { 0x35f9cfc8045dab4el,0x08a65c6771a7d720l,0xf076767b8eef1351l,
-            0x5351dbff8638fbe5l },
-          { 0x5aead6f7772ad54cl,0x5f6b441fafe93e69l,0xb7b83d1aeeb876b5l,
-            0xbe1ba4a7cdc094d9l },
-          0 },
-        /* 83 << 256 */
-        { { 0x005d8f04ec0377bal,0x036b8e1ace58f05dl,0xdd6ffc6f1b28cf58l,
-            0xc3d95a58e206189fl },
-          { 0xcb2873c1f52e8b8cl,0xcffdb18d80142af1l,0x7cf88eb64c77ed78l,
-            0xb3a3141981ef2c12l },
-          0 },
-        /* 89 << 256 */
-        { { 0xbb17e6f957c175b1l,0xf33abc63260a6f6dl,0x9435f2de620ddd6bl,
-            0x90bdde59ff3e99eal },
-          { 0x3d7875e0567b520fl,0xdd6954aa813b4978l,0x1af3dc24de7b631cl,
-            0x82ddcd08934d3c97l },
-          0 },
-        /* 95 << 256 */
-        { { 0x7a9d60affc5ce598l,0xc6f507597c37abfdl,0xaa1b32f3a79355d0l,
-            0xac581b94d7e4fcf3l },
-          { 0x2669cefd139f6466l,0x560a98bb26f97570l,0x32e1c1db2837b908l,
-            0x7823d7922d252781l },
-          0 },
-        /* 101 << 256 */
-        { { 0xea018b4cdedf9af0l,0x4b64c0a380c1d2f9l,0x527a0b1c36992c44l,
-            0x72a2408142b7adffl },
-          { 0x0023d10f97a502eel,0xc0f9ed067b401ac4l,0xabd1bd03d6d3b516l,
-            0xc320e3e478c5d0bel },
-          0 },
-        /* 107 << 256 */
-        { { 0x9f5d2a6a37dd009cl,0x88c0f42ac2c3cbacl,0x3155636977552a1el,
-            0xe78ec89d02f8098fl },
-          { 0x276c2ad71b6eeff9l,0xf4c49a28f7f91856l,0x698a2368dc795124l,
-            0x5502810de92a6c0fl },
-          0 },
-        /* 113 << 256 */
-        { { 0x82a5042e9f5e5192l,0x64da65fac0965a88l,0xf4c80dd56668399el,
-            0x635323757e33c233l },
-          { 0x5e5339b1a0048616l,0x4a17b1931c91741fl,0x65fdc7c213dcf3d0l,
-            0x230181426d10c410l },
-          0 },
-        /* 116 << 256 */
-        { { 0x090a04220f46c635l,0xc7eac842a04de3f5l,0x45b69d4c8990d4b2l,
-            0x032aeb50b8e0cdc6l },
-          { 0x02ce332a4ee3f307l,0x3c80c1545043980fl,0xc774838bcbd5287cl,
-            0x052661074a37d0ael },
-          0 },
-        /* 119 << 256 */
-        { { 0xc401b9c0f4d70fbfl,0xf82bbfde98ee47fel,0x94965118c84d91afl,
-            0xdd9a67c4d3b6ad1dl },
-          { 0x85c9cf1eb66a3ad4l,0x05580a0fbf5f514cl,0xf3ef0fd00218536el,
-            0x1dc2cf2bd14a7ca9l },
-          0 },
-        /* 125 << 256 */
-        { { 0x18c83e337c1e24d4l,0x30911165563657c6l,0xf9be1af679e53083l,
-            0x9b058059637753cel },
-          { 0x6a37fa24e54522b9l,0xc11d38b426dbf4c4l,0xbc6738655ebd4d9al,
-            0x2b40e9427fd4e2ecl },
-          0 },
-    },
-};
-
-/* Structure used to describe recoding of scalar multiplication. */
-typedef struct ecc_recode_sum {
-    /* Index into pre-computation table. */
-    uint8_t i;
-    /* Multiplier to add point into. */
-    uint8_t mul;
-    /* Use the negative of the point. */
-    uint8_t neg;
-} ecc_recode_sum;
-
-/* The index into pre-computation table to use. */
-static uint8_t recode_index_4_8[258] = {
-     0,  1,  1,  1,  3,  4,  2,  5,  3,  2,  4,  8,  3,  9,  5,  4,
-    11, 12,  6, 13,  7,  5,  8, 15, 55, 16,  9,  6, 18, 19,  7, 20,
-    11,  8, 12, 23, 24, 25, 13,  9, 27, 28, 14, 29, 30, 10, 15, 33,
-    11, 35, 16, 12, 37, 38, 17, 39, 18, 13, 19, 41, 42, 43, 20, 14,
-    45, 46, 21, 44, 22, 15, 23, 47, 24, 43, 25, 16, 42, 48, 26, 41,
-    27, 17, 28, 49, 18, 40, 29, 19, 30, 50, 31, 39, 32, 20, 33, 51,
-    34, 38, 35, 21, 37, 52, 22, 36, 37, 23, 38, 53, 24, 35, 39, 25,
-    34, 54, 40, 33, 55, 26, 32, 56, 27, 31, 43, 28, 30, 57, 44, 29,
-    45, 29, 44, 57, 30, 28, 43, 31, 27, 56, 32, 26, 55, 33, 40, 54,
-    34, 25, 39, 35, 24, 53, 38, 23, 37, 36, 22, 52, 37, 21, 35, 38,
-    34, 51, 33, 20, 32, 39, 31, 50, 30, 19, 29, 40, 18, 49, 28, 17,
-    27, 41, 26, 48, 42, 16, 25, 43, 24, 47, 23, 15, 22, 44, 21, 46,
-    45, 14, 20, 43, 42, 41, 19, 13, 18, 39, 17, 38, 37, 12, 16, 35,
-    11, 33, 15, 10, 30, 29, 14, 28, 27,  9, 13, 25, 24, 23, 12,  8,
-    11, 20,  7, 19, 18,  6,  9, 16, 55, 15,  8,  5,  7, 13,  6, 12,
-    11,  4,  5,  9,  3,  8,  4,  2,  3,  5,  2,  4,  3,  1,  1,  1,
-     0,  1,
-};
-
-/* Multiple to add point into. */
-static uint8_t recode_mul_4_8[258] = {
-     0,  1,  2,  3,  1,  1,  2,  1,  2,  3,  2,  1,  3,  1,  2,  3,
-     1,  1,  2,  1,  2,  3,  2,  1,  2,  1,  2,  3,  1,  1,  3,  1,
-     2,  3,  2,  1,  1,  1,  2,  3,  1,  1,  2,  1,  1,  3,  2,  1,
-     3,  1,  2,  3,  1,  1,  2,  1,  2,  3,  2,  1,  1,  1,  2,  3,
-     1,  1,  2,  3,  2,  3,  2,  1,  2,  3,  2,  3,  3,  1,  2,  3,
-     2,  3,  2,  1,  3,  3,  2,  3,  2,  1,  2,  3,  2,  3,  2,  1,
-     2,  3,  2,  3,  3,  1,  3,  3,  2,  3,  2,  1,  3,  3,  2,  3,
-     3,  1,  2,  3,  1,  3,  3,  1,  3,  3,  2,  3,  3,  1,  2,  3,
-     2,  3,  2,  1,  3,  3,  2,  3,  3,  1,  3,  3,  1,  3,  2,  1,
-     3,  3,  2,  3,  3,  1,  2,  3,  2,  3,  3,  1,  3,  3,  2,  3,
-     2,  1,  2,  3,  2,  3,  2,  1,  2,  3,  2,  3,  3,  1,  2,  3,
-     2,  3,  2,  1,  3,  3,  2,  3,  2,  1,  2,  3,  2,  3,  2,  1,
-     1,  3,  2,  1,  1,  1,  2,  3,  2,  1,  2,  1,  1,  3,  2,  1,
-     3,  1,  2,  3,  1,  1,  2,  1,  1,  3,  2,  1,  1,  1,  2,  3,
-     2,  1,  3,  1,  1,  3,  2,  1,  2,  1,  2,  3,  2,  1,  2,  1,
-     1,  3,  2,  1,  3,  1,  2,  3,  2,  1,  2,  1,  1,  3,  2,  1,
-     0,  1,
-};
-
-/* Whether to negate y-ordinate. */
-static uint8_t recode_neg_4_8[258] = {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  1,  0,  0,  1,
-     0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,
-     0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,
-     1,  0,  0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  1,
-     0,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,
-     0,  1,  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  0,  1,  1,  0,
-     1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,
-     1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     0,  0,
-};
-
-/* Recode the scalar for multiplication using pre-computed values, multipliers
- * and subtraction.
- *
- * k  Scalar to multiply by.
- * v  Vector of operations to peform.
- */
-static void sp_256_ecc_recode_sum_8_4(sp_digit* k, ecc_recode_sum* v)
-{
-    int i, j;
-    uint16_t y;
-    int carry = 0;
-    int o;
-    sp_digit n;
-
-    j = 0;
-    n = k[j];
-    o = 0;
-    for (i=0; i<33; i++) {
-        y = n;
-        if (o + 8 < 64) {
-            y &= 0xff;
-            n >>= 8;
-            o += 8;
-        }
-        else if (o + 8 == 64) {
-            n >>= 8;
-            if (++j < 4)
-                n = k[j];
-            o = 0;
-        }
-        else if (++j < 4) {
-            n = k[j];
-            y |= (n << (64 - o)) & 0xff;
-            o -= 56;
-            n >>= o;
-        }
-
-        y += carry;
-        v[i].i = recode_index_4_8[y];
-        v[i].mul = recode_mul_4_8[y];
-        v[i].neg = recode_neg_4_8[y];
-        carry = (y >> 8) + v[i].neg;
-    }
-}
-
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
-        void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[4];
-    sp_point pd;
-    sp_digit tmpd[2 * 4 * 5];
-#endif
-    sp_point* t;
-    sp_point* p;
-    sp_digit* tmp;
-    sp_digit* negy;
-    int i;
-    ecc_recode_sum v[33];
-    int err;
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 4, heap, DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
-    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
-                             DYNAMIC_TYPE_ECC);
-    if (tmp == NULL)
-        err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
-#endif
-    negy = tmp;
-
-    if (err == MP_OKAY) {
-        sp_256_ecc_recode_sum_8_4(k, v);
-
-        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
-        XMEMSET(t, 0, sizeof(sp_point) * 4);
-        for (i=0; i<4; i++) {
-            XMEMCPY(t[i].z, p256_norm_mod, sizeof(p256_norm_mod));
-            t[i].infinity = 1;
-        }
-
-        i = 32;
-        XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
-        XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
-        t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
-        for (--i; i>=0; i--) {
-            XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
-            XMEMCPY(p->y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
-            p->infinity = p256_table[i][v[i].i].infinity;
-            sp_256_sub_4(negy, p256_mod, p->y);
-            sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
-            sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
-        }
-        sp_256_proj_point_add_4(&t[2], &t[2], &t[3], tmp);
-        sp_256_proj_point_add_4(&t[1], &t[1], &t[3], tmp);
-        sp_256_proj_point_dbl_4(&t[2], &t[2], tmp);
-        sp_256_proj_point_add_4(&t[1], &t[1], &t[2], tmp);
-
-        if (map)
-            sp_256_map_4(r, &t[1], tmp);
-        else
-            XMEMCPY(r, &t[1], sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 4);
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-    }
-    if (tmp != NULL) {
-        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5);
-        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
-    }
-#else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
-#endif
-    sp_ecc_point_free(p, 0, heap);
-
-    return MP_OKAY;
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * r     Resulting point.
- * k     Scalar to multiply by.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-static int sp_256_ecc_mulmod_base_avx2_4(sp_point* r, sp_digit* k, int map,
-        void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point td[4];
-    sp_point pd;
-    sp_digit tmpd[2 * 4 * 5];
-#endif
-    sp_point* t;
-    sp_point* p;
-    sp_digit* tmp;
-    sp_digit* negy;
-    int i;
-    ecc_recode_sum v[33];
-    int err;
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    t = (sp_point*)XMALLOC(sizeof(sp_point) * 4, heap, DYNAMIC_TYPE_ECC);
-    if (t == NULL)
-        err = MEMORY_E;
-    tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
-                             DYNAMIC_TYPE_ECC);
-    if (tmp == NULL)
-        err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
-#endif
-    negy = tmp;
-
-    if (err == MP_OKAY) {
-        sp_256_ecc_recode_sum_8_4(k, v);
-
-        XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
-        XMEMSET(t, 0, sizeof(sp_point) * 4);
-        for (i=0; i<4; i++) {
-            XMEMCPY(t[i].z, p256_norm_mod, sizeof(p256_norm_mod));
-            t[i].infinity = 1;
-        }
-
-        i = 32;
-        XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
-        XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
-        t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
-        for (--i; i>=0; i--) {
-            XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
-            XMEMCPY(p->y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
-            p->infinity = p256_table[i][v[i].i].infinity;
-            sp_256_sub_4(negy, p256_mod, p->y);
-            sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
-            sp_256_proj_point_add_qz1_avx2_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
-        }
-        sp_256_proj_point_add_avx2_4(&t[2], &t[2], &t[3], tmp);
-        sp_256_proj_point_add_avx2_4(&t[1], &t[1], &t[3], tmp);
-        sp_256_proj_point_dbl_avx2_4(&t[2], &t[2], tmp);
-        sp_256_proj_point_add_avx2_4(&t[1], &t[1], &t[2], tmp);
-
-        if (map)
-            sp_256_map_avx2_4(r, &t[1], tmp);
-        else
-            XMEMCPY(r, &t[1], sizeof(sp_point));
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (t != NULL) {
-        XMEMSET(t, 0, sizeof(sp_point) * 4);
-        XFREE(t, heap, DYNAMIC_TYPE_ECC);
-    }
-    if (tmp != NULL) {
-        XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5);
-        XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
-    }
-#else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
-#endif
-    sp_ecc_point_free(p, 0, heap);
-
-    return MP_OKAY;
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* WOLFSSL_SP_SMALL */
-/* Multiply the base point of P256 by the scalar and return the result.
- * If map is true then convert result to affine co-ordinates.
- *
- * km    Scalar to multiply by.
- * r     Resulting point.
- * map   Indicates whether to convert result to affine.
- * heap  Heap to use for allocation.
- * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
-    sp_digit kd[4];
-#endif
-    sp_point* point;
-    sp_digit* k = NULL;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
-    }
-#else
-    k = kd;
-#endif
-    if (err == MP_OKAY) {
-        sp_256_from_mp(k, 4, km);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(point, k, map, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_base_4(point, k, map, heap);
-    }
-    if (err == MP_OKAY)
-        err = sp_256_point_to_ecc_point_4(point, r);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
-        XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-
-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
-/* Returns 1 if the number of zero.
- * Implementation is constant time.
- *
- * a  Number to check.
- * returns 1 if the number is zero and 0 otherwise.
- */
-static int sp_256_iszero_4(const sp_digit* a)
-{
-    return (a[0] | a[1] | a[2] | a[3]) == 0;
-}
-
-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
-/* Add 1 to a. (a = a + 1)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-static void sp_256_add_one_4(sp_digit* a)
-{
-    __asm__ __volatile__ (
-        "addq	$1, (%[a])\n\t"
-        "adcq	$0, 8(%[a])\n\t"
-        "adcq	$0, 16(%[a])\n\t"
-        "adcq	$0, 24(%[a])\n\t"
-        :
-        : [a] "r" (a)
-        : "memory"
-    );
-}
-
-/* Read big endian unsigned byte aray into r.
- *
- * r  A single precision integer.
- * a  Byte array.
- * n  Number of bytes in array to read.
- */
-static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
-{
-    int i, j = 0, s = 0;
-
-    r[0] = 0;
-    for (i = n-1; i >= 0; i--) {
-        r[j] |= ((sp_digit)a[i]) << s;
-        if (s >= 56) {
-            r[j] &= 0xffffffffffffffffl;
-            s = 64 - s;
-            if (j + 1 >= max)
-                break;
-            r[++j] = a[i] >> s;
-            s = 8 - s;
-        }
-        else
-            s += 8;
-    }
-
-    for (j++; j < max; j++)
-        r[j] = 0;
-}
-
-/* Generates a scalar that is in the range 1..order-1.
- *
- * rng  Random number generator.
- * k    Scalar value.
- * returns RNG failures, MEMORY_E when memory allocation fails and
- * MP_OKAY on success.
- */
-static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k)
-{
-    int err;
-    byte buf[32];
-
-    do {
-        err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
-        if (err == 0) {
-            sp_256_from_bin(k, 4, buf, sizeof(buf));
-            if (sp_256_cmp_4(k, p256_order2) < 0) {
-                sp_256_add_one_4(k);
-                break;
-            }
-        }
-    }
-    while (err == 0);
-
-    return err;
-}
-
-/* Makes a random EC key pair.
- *
- * rng   Random number generator.
- * priv  Generated private value.
- * pub   Generated public point.
- * heap  Heap to use for allocation.
- * returns ECC_INF_E when the point does not have the correct order, RNG
- * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
-    sp_digit kd[4];
-#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point inf;
-#endif
-#endif
-    sp_point* point;
-    sp_digit* k = NULL;
-#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_point* infinity;
-#endif
-    int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, p, point);
-#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, inf, infinity);
-#endif
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
-    }
-#else
-    k = kd;
-#endif
-
-    if (err == MP_OKAY)
-        err = sp_256_ecc_gen_k_4(rng, k);
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, NULL);
-        else
-#endif
-            err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
-    }
-
-#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1,
-                                                                          NULL);
-        }
-        else
-#endif
-            err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL);
-    }
-    if (err == MP_OKAY) {
-        if (!sp_256_iszero_4(point->x) || !sp_256_iszero_4(point->y))
-            err = ECC_INF_E;
-    }
-#endif
-
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(k, priv);
-    if (err == MP_OKAY)
-        err = sp_256_point_to_ecc_point_4(point, pub);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
-        XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
-    sp_ecc_point_free(infinity, 1, heap);
-#endif
-    sp_ecc_point_free(point, 1, heap);
-
-    return err;
-}
-
-#ifdef HAVE_ECC_DHE
-/* Write r as big endian to byte aray.
- * Fixed length number of bytes written: 32
- *
- * r  A single precision integer.
- * a  Byte array.
- */
-static void sp_256_to_bin(sp_digit* r, byte* a)
-{
-    int i, j, s = 0, b;
-
-    j = 256 / 8 - 1;
-    a[j] = 0;
-    for (i=0; i<4 && j>=0; i++) {
-        b = 0;
-        a[j--] |= r[i] << s; b += 8 - s;
-        if (j < 0)
-            break;
-        while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
-            if (j < 0)
-                break;
-        }
-        s = 8 - (b - 64);
-        if (j >= 0)
-            a[j] = 0;
-        if (s != 0)
-            j++;
-    }
-}
-
-/* Multiply the point by the scalar and serialize the X ordinate.
- * The number is 0 padded to maximum size on output.
- *
- * priv    Scalar to multiply the point by.
- * pub     Point to multiply.
- * out     Buffer to hold X ordinate.
- * outLen  On entry, size of the buffer in bytes.
- *         On exit, length of data in buffer in bytes.
- * heap    Heap to use for allocation.
- * returns BUFFER_E if the buffer is to small for output size,
- * MEMORY_E when memory allocation fails and MP_OKAY on success.
- */
-int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
-                          word32* outLen, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point p;
-    sp_digit kd[4];
-#endif
-    sp_point* point = NULL;
-    sp_digit* k = NULL;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    if (*outLen < 32)
-        err = BUFFER_E;
-
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        k = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (k == NULL)
-            err = MEMORY_E;
-    }
-#else
-    k = kd;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_256_from_mp(k, 4, priv);
-        sp_256_point_from_ecc_point_4(point, pub);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(point, point, k, 1, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_4(point, point, k, 1, heap);
-    }
-    if (err == MP_OKAY) {
-        sp_256_to_bin(point->x, out);
-        *outLen = 32;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (k != NULL)
-        XFREE(k, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(point, 0, heap);
-
-    return err;
-}
-#endif /* HAVE_ECC_DHE */
-
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* Add b to a into r. (r = a + b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	(%[a]), %%rax\n\t"
-        "addq	(%[b]), %%rax\n\t"
-        "movq	%%rax, (%[r])\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "adcq	8(%[b]), %%rax\n\t"
-        "movq	%%rax, 8(%[r])\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "adcq	16(%[b]), %%rax\n\t"
-        "movq	%%rax, 16(%[r])\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "adcq	24(%[b]), %%rax\n\t"
-        "movq	%%rax, 24(%[r])\n\t"
-        "adcq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax"
-    );
-
-    return c;
-}
-
-#endif
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* Multiply a and b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision integer.
- */
-SP_NOINLINE static void sp_256_mul_4(sp_digit* r, const sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit tmp[4];
-
-    __asm__ __volatile__ (
-        "#  A[0] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "movq	%%rax, (%[tmp])\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "#  A[0] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[1] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 8(%[tmp])\n\t"
-        "#  A[0] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[1] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 16(%[tmp])\n\t"
-        "#  A[0] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[2] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[3] * B[0]\n\t"
-        "movq	0(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%rbx, 24(%[tmp])\n\t"
-        "#  A[1] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[2] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "#  A[3] * B[1]\n\t"
-        "movq	8(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "movq	%%rcx, 32(%[r])\n\t"
-        "#  A[2] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[3] * B[2]\n\t"
-        "movq	16(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 40(%[r])\n\t"
-        "#  A[3] * B[3]\n\t"
-        "movq	24(%[b]), %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 48(%[r])\n\t"
-        "movq	%%rcx, 56(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Multiply a and b into r. (r = a * b)
- *
- * r   Result of multiplication.
- * a   First number to multiply.
- * b   Second number to multiply.
- */
-SP_NOINLINE static void sp_256_mul_avx2_4(sp_digit* r, const sp_digit* a,
-        const sp_digit* b)
-{
-    __asm__ __volatile__ (
-        "#  A[0] * B[0]\n\t"
-        "movq   0(%[b]), %%rdx\n\t"
-        "mulxq  0(%[a]), %%r8, %%r9\n\t"
-        "#  A[2] * B[0]\n\t"
-        "mulxq  16(%[a]), %%r10, %%r11\n\t"
-        "#  A[1] * B[0]\n\t"
-        "mulxq  8(%[a]), %%rax, %%rcx\n\t"
-        "xorq   %%r15, %%r15\n\t"
-        "adcxq  %%rax, %%r9\n\t"
-        "#  A[1] * B[3]\n\t"
-        "movq   24(%[b]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%r12, %%r13\n\t"
-        "adcxq  %%rcx, %%r10\n\t"
-        "#  A[0] * B[1]\n\t"
-        "movq   8(%[b]), %%rdx\n\t"
-        "mulxq  0(%[a]), %%rax, %%rcx\n\t"
-        "adoxq  %%rax, %%r9\n\t"
-        "#  A[2] * B[1]\n\t"
-        "mulxq  16(%[a]), %%rax, %%r14\n\t"
-        "adoxq  %%rcx, %%r10\n\t"
-        "adcxq  %%rax, %%r11\n\t"
-        "#  A[1] * B[2]\n\t"
-        "movq   16(%[b]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%rax, %%rcx\n\t"
-        "adcxq  %%r14, %%r12\n\t"
-        "adoxq  %%rax, %%r11\n\t"
-        "adcxq  %%r15, %%r13\n\t"
-        "adoxq  %%rcx, %%r12\n\t"
-        "#  A[0] * B[2]\n\t"
-        "mulxq  0(%[a]), %%rax, %%rcx\n\t"
-        "adoxq  %%r15, %%r13\n\t"
-        "xorq   %%r14, %%r14\n\t"
-        "adcxq  %%rax, %%r10\n\t"
-        "#  A[1] * B[1]\n\t"
-        "movq   8(%[b]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %%rcx, %%r11\n\t"
-        "adoxq  %%rdx, %%r10\n\t"
-        "#  A[3] * B[1]\n\t"
-        "movq   8(%[b]), %%rdx\n\t"
-        "adoxq  %%rax, %%r11\n\t"
-        "mulxq  24(%[a]), %%rax, %%rcx\n\t"
-        "adcxq  %%rax, %%r12\n\t"
-        "#  A[2] * B[2]\n\t"
-        "movq   16(%[b]), %%rdx\n\t"
-        "mulxq  16(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %%rcx, %%r13\n\t"
-        "adoxq  %%rdx, %%r12\n\t"
-        "#  A[3] * B[3]\n\t"
-        "movq   24(%[b]), %%rdx\n\t"
-        "adoxq  %%rax, %%r13\n\t"
-        "mulxq  24(%[a]), %%rax, %%rcx\n\t"
-        "adoxq  %%r15, %%r14\n\t"
-        "adcxq  %%rax, %%r14\n\t"
-        "#  A[0] * B[3]\n\t"
-        "mulxq  0(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %%rcx, %%r15\n\t"
-        "xorq   %%rcx, %%rcx\n\t"
-        "adcxq  %%rdx, %%r11\n\t"
-        "#  A[3] * B[0]\n\t"
-        "movq   0(%[b]), %%rdx\n\t"
-        "adcxq  %%rax, %%r12\n\t"
-        "mulxq  24(%[a]), %%rdx, %%rax\n\t"
-        "adoxq  %%rdx, %%r11\n\t"
-        "adoxq  %%rax, %%r12\n\t"
-        "#  A[2] * B[3]\n\t"
-        "movq   24(%[b]), %%rdx\n\t"
-        "mulxq  16(%[a]), %%rdx, %%rax\n\t"
-        "adcxq  %%rdx, %%r13\n\t"
-        "#  A[3] * B[2]\n\t"
-        "movq   16(%[b]), %%rdx\n\t"
-        "adcxq  %%rax, %%r14\n\t"
-        "mulxq  24(%[a]), %%rax, %%rdx\n\t"
-        "adcxq  %%rcx, %%r15\n\t"
-        "adoxq  %%rax, %%r13\n\t"
-        "adoxq  %%rdx, %%r14\n\t"
-        "adoxq  %%rcx, %%r15\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "movq	%%r10, 16(%[r])\n\t"
-        "movq	%%r11, 24(%[r])\n\t"
-        "movq	%%r12, 32(%[r])\n\t"
-        "movq	%%r13, 40(%[r])\n\t"
-        "movq	%%r14, 48(%[r])\n\t"
-        "movq	%%r15, 56(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#endif
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* Sub b from a into a. (a -= b)
- *
- * a  A single precision integer and result.
- * b  A single precision integer.
- */
-SP_NOINLINE static sp_digit sp_256_sub_in_place_4(sp_digit* a,
-    const sp_digit* b)
-{
-    sp_digit c = 0;
-
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%r8\n\t"
-        "movq	8(%[a]), %%r9\n\t"
-        "movq	0(%[b]), %%rdx\n\t"
-        "movq	8(%[b]), %%rcx\n\t"
-        "subq	%%rdx, %%r8\n\t"
-        "movq	16(%[b]), %%rdx\n\t"
-        "movq	%%r8, 0(%[a])\n\t"
-        "movq	16(%[a]), %%r8\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	24(%[b]), %%rcx\n\t"
-        "movq	%%r9, 8(%[a])\n\t"
-        "movq	24(%[a]), %%r9\n\t"
-        "sbbq	%%rdx, %%r8\n\t"
-        "movq	%%r8, 16(%[a])\n\t"
-        "sbbq	%%rcx, %%r9\n\t"
-        "movq	%%r9, 24(%[a])\n\t"
-        "sbbq	$0, %[c]\n\t"
-        : [c] "+r" (c)
-        : [a] "r" (a), [b] "r" (b)
-        : "memory", "rdx", "rcx", "r8", "r9"
-    );
-
-    return c;
-}
-
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_256_mul_d_4(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "mulq	(%[a])\n\t"
-        "movq	%%rax, %%rbx\n\t"
-        "movq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rbx, %%rbx\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "movq	%%rcx, 8(%[r])\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%rbx\n\t"
-        "# A[2] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "mulq	16(%[a])\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adcq	%%rdx, %%rbx\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "# A[3] * B\n\t"
-        "movq	%[b], %%rax\n\t"
-        "mulq	24(%[a])\n\t"
-        "addq	%%rax, %%rbx\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "movq	%%rbx, 24(%[r])\n\t"
-        "movq	%%rcx, 32(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rbx", "rcx", "r8"
-    );
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_256_mul_d_avx2_4(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "# A[0] * B\n\t"
-        "movq	%[b], %%rdx\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "mulxq	(%[a]), %%r8, %%r9\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "# A[1] * B\n\t"
-        "mulxq	8(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "# A[2] * B\n\t"
-        "mulxq	16(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r9\n\t"
-        "adcxq	%%rax, %%r8\n\t"
-        "movq	%%r8, 16(%[r])\n\t"
-        "adoxq	%%rcx, %%r9\n\t"
-        "# A[3] * B\n\t"
-        "mulxq	24(%[a]), %%rax, %%rcx\n\t"
-        "movq	%%r10, %%r8\n\t"
-        "adcxq	%%rax, %%r9\n\t"
-        "adoxq	%%rcx, %%r8\n\t"
-        "adcxq	%%r10, %%r8\n\t"
-        "movq	%%r9, 24(%[r])\n\t"
-        "movq	%%r8, 32(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10"
-    );
-}
-#endif /* HAVE_INTEL_AVX2 */
-
-/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
- *
- * d1   The high order half of the number to divide.
- * d0   The low order half of the number to divide.
- * div  The dividend.
- * returns the result of the division.
- */
-static sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div)
-{
-    sp_digit r;
-
-    __asm__ __volatile__ (
-        "movq	%[d0], %%rax\n\t"
-        "movq	%[d1], %%rdx\n\t"
-        "divq	%[div]\n\t"
-        "movq	%%rax, %[r]\n\t"
-        : [r] "=r" (r)
-        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
-        : "rax", "rdx"
-    );
-
-    return r;
-}
-
-/* AND m into each word of a and store in r.
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * m  Mask to AND against each digit.
- */
-static void sp_256_mask_4(sp_digit* r, sp_digit* a, sp_digit m)
-{
-#ifdef WOLFSSL_SP_SMALL
-    int i;
-
-    for (i=0; i<4; i++)
-        r[i] = a[i] & m;
-#else
-    r[0] = a[0] & m;
-    r[1] = a[1] & m;
-    r[2] = a[2] & m;
-    r[3] = a[3] & m;
-#endif
-}
-
-/* Divide d in a and put remainder into r (m*d + r = a)
- * m is not calculated as it is not needed at this time.
- *
- * a  Nmber to be divided.
- * d  Number to divide with.
- * m  Multiplier result.
- * r  Remainder from the division.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_256_div_4(sp_digit* a, sp_digit* d, sp_digit* m,
-        sp_digit* r)
-{
-    sp_digit t1[8], t2[5];
-    sp_digit div, r1;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)m;
-
-    div = d[3];
-    XMEMCPY(t1, a, sizeof(*t1) * 2 * 4);
-    for (i=3; i>=0; i--) {
-        r1 = div_256_word_4(t1[4 + i], t1[4 + i - 1], div);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_mul_d_avx2_4(t2, d, r1);
-        else
-#endif
-            sp_256_mul_d_4(t2, d, r1);
-        t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2);
-        t1[4 + i] -= t2[4];
-        sp_256_mask_4(t2, d, t1[4 + i]);
-        t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2);
-        sp_256_mask_4(t2, d, t1[4 + i]);
-        t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2);
-    }
-
-    r1 = sp_256_cmp_4(t1, d) >= 0;
-    sp_256_cond_sub_4(r, t1, t2, (sp_digit)0 - r1);
-
-    return MP_OKAY;
-}
-
-/* Reduce a modulo m into r. (r = a mod m)
- *
- * r  A single precision number that is the reduced result.
- * a  A single precision number that is to be reduced.
- * m  A single precision number that is the modulus to reduce with.
- * returns MP_OKAY indicating success.
- */
-static WC_INLINE int sp_256_mod_4(sp_digit* r, sp_digit* a, sp_digit* m)
-{
-    return sp_256_div_4(a, m, NULL, r);
-}
-
-#endif
-#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
-/* Square a and put result in r. (r = a * a)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- */
-SP_NOINLINE static void sp_256_sqr_4(sp_digit* r, const sp_digit* a)
-{
-    sp_digit tmp[4];
-
-    __asm__ __volatile__ (
-        "#  A[0] * A[0]\n\t"
-        "movq	0(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "movq	%%rax, (%[tmp])\n\t"
-        "movq	%%rdx, %%r8\n\t"
-        "#  A[0] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 8(%[tmp])\n\t"
-        "#  A[0] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "#  A[1] * A[1]\n\t"
-        "movq	8(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 16(%[tmp])\n\t"
-        "#  A[0] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	0(%[a])\n\t"
-        "xorq	%%r9, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "#  A[1] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "adcq	$0, %%r9\n\t"
-        "movq	%%rcx, 24(%[tmp])\n\t"
-        "#  A[1] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	8(%[a])\n\t"
-        "xorq	%%rcx, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "#  A[2] * A[2]\n\t"
-        "movq	16(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%r8\n\t"
-        "adcq	%%rdx, %%r9\n\t"
-        "adcq	$0, %%rcx\n\t"
-        "movq	%%r8, 32(%[r])\n\t"
-        "#  A[2] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	16(%[a])\n\t"
-        "xorq	%%r8, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "addq	%%rax, %%r9\n\t"
-        "adcq	%%rdx, %%rcx\n\t"
-        "adcq	$0, %%r8\n\t"
-        "movq	%%r9, 40(%[r])\n\t"
-        "#  A[3] * A[3]\n\t"
-        "movq	24(%[a]), %%rax\n\t"
-        "mulq	%%rax\n\t"
-        "addq	%%rax, %%rcx\n\t"
-        "adcq	%%rdx, %%r8\n\t"
-        "movq	%%rcx, 48(%[r])\n\t"
-        "movq	%%r8, 56(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
-        : "memory", "rax", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
-    );
-
-    XMEMCPY(r, tmp, sizeof(tmp));
-}
-
-#ifdef WOLFSSL_SP_SMALL
-/* Order-2 for the P256 curve. */
-static const uint64_t p256_order_2[4] = {
-    0xf3b9cac2fc63254f,0xbce6faada7179e84,0xffffffffffffffff,
-    0xffffffff00000000
-};
-#else
-/* The low half of the order-2 of the P256 curve. */
-static const uint64_t p256_order_low[2] = {
-    0xf3b9cac2fc63254f,0xbce6faada7179e84
-};
-#endif /* WOLFSSL_SP_SMALL */
-
-/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
- *
- * r  Result of the multiplication.
- * a  First operand of the multiplication.
- * b  Second operand of the multiplication.
- */
-static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
-{
-    sp_256_mul_4(r, a, b);
-    sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
-}
-
-/* Square number mod the order of P256 curve. (r = a * a mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_order_4(sp_digit* r, sp_digit* a)
-{
-    sp_256_sqr_4(r, a);
-    sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square number mod the order of P256 curve a number of times.
- * (r = a ^ n mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_n_order_4(sp_digit* r, sp_digit* a, int n)
-{
-    int i;
-
-    sp_256_mont_sqr_order_4(r, a);
-    for (i=1; i<n; i++)
-        sp_256_mont_sqr_order_4(r, r);
-}
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
- * (r = 1 / a mod order)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_order_4(sp_digit* r, sp_digit* a,
-        sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 4);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_order_4(t, t);
-        if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_4(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 4;
-    sp_digit* t3 = td + 4 * 4;
-    int i;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_order_4(t, a);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_order_4(t, t, a);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_order_4(t2, t, 2);
-    /* t3= a^f = t2 * t */
-    sp_256_mont_mul_order_4(t3, t2, t);
-    /* t2= a^f0 = t3 ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_order_4(t2, t3, 4);
-    /* t = a^ff = t2 * t3 */
-    sp_256_mont_mul_order_4(t, t2, t3);
-    /* t3= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_order_4(t2, t, 8);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_order_4(t, t2, t);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_order_4(t2, t, 16);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_order_4(t, t2, t);
-    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
-    sp_256_mont_sqr_n_order_4(t2, t, 64);
-    /* t2= a^ffffffff00000000ffffffff = t2 * t */
-    sp_256_mont_mul_order_4(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_order_4(t2, t2, 32);
-    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_order_4(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
-    for (i=127; i>=112; i--) {
-        sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
-    sp_256_mont_sqr_n_order_4(t2, t2, 4);
-    sp_256_mont_mul_order_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
-    for (i=107; i>=64; i--) {
-        sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
-    sp_256_mont_sqr_n_order_4(t2, t2, 4);
-    sp_256_mont_mul_order_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
-    for (i=59; i>=32; i--) {
-        sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
-    sp_256_mont_sqr_n_order_4(t2, t2, 4);
-    sp_256_mont_mul_order_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
-    for (i=27; i>=0; i--) {
-        sp_256_mont_sqr_order_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
-    sp_256_mont_sqr_n_order_4(t2, t2, 4);
-    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
-    sp_256_mont_mul_order_4(r, t2, t3);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-#ifdef HAVE_INTEL_AVX2
-/* Square a and put result in r. (r = a * a)
- *
- * r   Result of squaring.
- * a   Number to square in Montogmery form.
- */
-SP_NOINLINE static void sp_256_sqr_avx2_4(sp_digit* r, const sp_digit* a)
-{
-    __asm__ __volatile__ (
-        "# A[0] * A[1]\n\t"
-        "movq   0(%[a]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%r9, %%r10\n\t"
-        "# A[0] * A[3]\n\t"
-        "mulxq  24(%[a]), %%r11, %%r12\n\t"
-        "# A[2] * A[1]\n\t"
-        "movq   16(%[a]), %%rdx\n\t"
-        "mulxq  8(%[a]), %%rcx, %%rbx\n\t"
-        "xorq   %%r15, %%r15\n\t"
-        "adoxq  %%rcx, %%r11\n\t"
-        "# A[2] * A[3]\n\t"
-        "mulxq  24(%[a]), %%r13, %%r14\n\t"
-        "adoxq  %%rbx, %%r12\n\t"
-        "# A[2] * A[0]\n\t"
-        "mulxq  0(%[a]), %%rcx, %%rbx\n\t"
-        "adoxq  %%r15, %%r13\n\t"
-        "adcxq  %%rcx, %%r10\n\t"
-        "adoxq  %%r15, %%r14\n\t"
-        "# A[1] * A[3]\n\t"
-        "movq   8(%[a]), %%rdx\n\t"
-        "mulxq  24(%[a]), %%rax, %%r8\n\t"
-        "adcxq  %%rbx, %%r11\n\t"
-        "adcxq  %%rax, %%r12\n\t"
-        "adcxq  %%r8, %%r13\n\t"
-        "adcxq  %%r15, %%r14\n\t"
-        "# Double with Carry Flag\n\t"
-        "xorq   %%r15, %%r15\n\t"
-        "# A[0] * A[0]\n\t"
-        "movq   0(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%r8, %%rax\n\t"
-        "adcxq  %%r9, %%r9\n\t"
-        "# A[1] * A[1]\n\t"
-        "movq   8(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%rcx, %%rbx\n\t"
-        "adcxq  %%r10, %%r10\n\t"
-        "adoxq  %%rax, %%r9\n\t"
-        "adcxq  %%r11, %%r11\n\t"
-        "adoxq  %%rcx, %%r10\n\t"
-        "# A[2] * A[2]\n\t"
-        "movq   16(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%rax, %%rcx\n\t"
-        "adcxq  %%r12, %%r12\n\t"
-        "adoxq  %%rbx, %%r11\n\t"
-        "adcxq  %%r13, %%r13\n\t"
-        "adoxq  %%rax, %%r12\n\t"
-        "# A[3] * A[3]\n\t"
-        "movq   24(%[a]), %%rdx\n\t"
-        "mulxq  %%rdx, %%rax, %%rbx\n\t"
-        "adcxq  %%r14, %%r14\n\t"
-        "adoxq  %%rcx, %%r13\n\t"
-        "adcxq  %%r15, %%r15\n\t"
-        "adoxq  %%rax, %%r14\n\t"
-        "adoxq  %%rbx, %%r15\n\t"
-        "movq	%%r8, 0(%[r])\n\t"
-        "movq	%%r9, 8(%[r])\n\t"
-        "movq	%%r10, 16(%[r])\n\t"
-        "movq	%%r11, 24(%[r])\n\t"
-        "movq	%%r12, 32(%[r])\n\t"
-        "movq	%%r13, 40(%[r])\n\t"
-        "movq	%%r14, 48(%[r])\n\t"
-        "movq	%%r15, 56(%[r])\n\t"
-        :
-        : [r] "r" (r), [a] "r" (a)
-        : "memory", "rax", "rdx", "rcx", "rbx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-/* Reduce the number back to 256 bits using Montgomery reduction.
- *
- * a   A single precision number to reduce in place.
- * m   The single precision number representing the modulus.
- * mp  The digit representing the negative inverse of m mod 2^n.
- */
-SP_NOINLINE static void sp_256_mont_reduce_avx2_4(sp_digit* a, sp_digit* m,
-        sp_digit mp)
-{
-    __asm__ __volatile__ (
-        "movq	0(%[a]), %%r12\n\t"
-        "movq	8(%[a]), %%r13\n\t"
-        "movq	16(%[a]), %%r14\n\t"
-        "movq	24(%[a]), %%r15\n\t"
-        "xorq	%%r10, %%r10\n\t"
-        "xorq	%%r11, %%r11\n\t"
-        "# a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp)\n\t"
-        "movq	32(%[a]), %%rax\n\t"
-        "#   mu = a[0] * mp\n\t"
-        "movq	%%r12, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%rcx\n\t"
-        "#   a[0] += m[0] * mu\n\t"
-        "mulx	0(%[m]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "#   a[1] += m[1] * mu\n\t"
-        "mulx	8(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "#   a[2] += m[2] * mu\n\t"
-        "mulx	16(%[m]), %%r8, %%r9\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "#   a[3] += m[3] * mu\n\t"
-        "mulx	24(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%r15\n\t"
-        "adcxq	%%r8, %%r15\n\t"
-        "adoxq	%%rcx, %%rax\n\t"
-        "adcxq	%%r11, %%rax\n\t"
-        "adoxq	%%r11, %%r10\n\t"
-        "adcxq	%%r11, %%r10\n\t"
-        "# a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp)\n\t"
-        "movq	40(%[a]), %%r12\n\t"
-        "#   mu = a[1] * mp\n\t"
-        "movq	%%r13, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%rcx\n\t"
-        "#   a[1] += m[0] * mu\n\t"
-        "mulx	0(%[m]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "#   a[2] += m[1] * mu\n\t"
-        "mulx	8(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%r14\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "#   a[3] += m[2] * mu\n\t"
-        "mulx	16(%[m]), %%r8, %%r9\n\t"
-        "adoxq	%%rcx, %%r15\n\t"
-        "adcxq	%%r8, %%r15\n\t"
-        "#   a[4] += m[3] * mu\n\t"
-        "mulx	24(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "adcxq	%%r10, %%r12\n\t"
-        "movq	%%r11, %%r10\n\t"
-        "adoxq	%%r11, %%r10\n\t"
-        "adcxq	%%r11, %%r10\n\t"
-        "# a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp)\n\t"
-        "movq	48(%[a]), %%r13\n\t"
-        "#   mu = a[2] * mp\n\t"
-        "movq	%%r14, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%rcx\n\t"
-        "#   a[2] += m[0] * mu\n\t"
-        "mulx	0(%[m]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r14\n\t"
-        "#   a[3] += m[1] * mu\n\t"
-        "mulx	8(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%r15\n\t"
-        "adcxq	%%r8, %%r15\n\t"
-        "#   a[4] += m[2] * mu\n\t"
-        "mulx	16(%[m]), %%r8, %%r9\n\t"
-        "adoxq	%%rcx, %%rax\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "#   a[5] += m[3] * mu\n\t"
-        "mulx	24(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%r12\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "adoxq	%%rcx, %%r13\n\t"
-        "adcxq	%%r10, %%r13\n\t"
-        "movq	%%r11, %%r10\n\t"
-        "adoxq	%%r11, %%r10\n\t"
-        "adcxq	%%r11, %%r10\n\t"
-        "# a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp)\n\t"
-        "movq	56(%[a]), %%r14\n\t"
-        "#   mu = a[3] * mp\n\t"
-        "movq	%%r15, %%rdx\n\t"
-        "mulxq	%[mp], %%rdx, %%rcx\n\t"
-        "#   a[3] += m[0] * mu\n\t"
-        "mulx	0(%[m]), %%r8, %%r9\n\t"
-        "adcxq	%%r8, %%r15\n\t"
-        "#   a[4] += m[1] * mu\n\t"
-        "mulx	8(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%rax\n\t"
-        "adcxq	%%r8, %%rax\n\t"
-        "#   a[5] += m[2] * mu\n\t"
-        "mulx	16(%[m]), %%r8, %%r9\n\t"
-        "adoxq	%%rcx, %%r12\n\t"
-        "adcxq	%%r8, %%r12\n\t"
-        "#   a[6] += m[3] * mu\n\t"
-        "mulx	24(%[m]), %%r8, %%rcx\n\t"
-        "adoxq	%%r9, %%r13\n\t"
-        "adcxq	%%r8, %%r13\n\t"
-        "adoxq	%%rcx, %%r14\n\t"
-        "adcxq	%%r10, %%r14\n\t"
-        "movq	%%r11, %%r10\n\t"
-        "adoxq	%%r11, %%r10\n\t"
-        "adcxq	%%r11, %%r10\n\t"
-        "# Subtract mod if carry\n\t"
-        "subq	%%r10, %%r11\n\t"
-        "movq	0(%[m]), %%r8\n\t"
-        "movq	8(%[m]), %%r9\n\t"
-        "movq	16(%[m]), %%r10\n\t"
-        "movq	24(%[m]), %%rdx\n\t"
-        "andq	%%r11, %%r8\n\t"
-        "andq	%%r11, %%r9\n\t"
-        "andq	%%r11, %%r10\n\t"
-        "andq	%%r11, %%rdx\n\t"
-        "subq	%%r8, %%rax\n\t"
-        "sbbq	%%r9, %%r12\n\t"
-        "sbbq	%%r10, %%r13\n\t"
-        "sbbq	%%rdx, %%r14\n\t"
-        "movq	%%rax,   (%[a])\n\t"
-        "movq	%%r12,  8(%[a])\n\t"
-        "movq	%%r13, 16(%[a])\n\t"
-        "movq	%%r14, 24(%[a])\n\t"
-        :
-        : [a] "r" (a), [m] "r" (m), [mp] "r" (mp)
-        : "memory", "rax", "rcx", "rdx", "r8", "r9", "r10", "r11",
-          "r12", "r13", "r14", "r15"
-    );
-}
-
-/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
- *
- * r  Result of the multiplication.
- * a  First operand of the multiplication.
- * b  Second operand of the multiplication.
- */
-static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
-{
-    sp_256_mul_avx2_4(r, a, b);
-    sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
-}
-
-/* Square number mod the order of P256 curve. (r = a * a mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, sp_digit* a)
-{
-    sp_256_sqr_avx2_4(r, a);
-    sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
-}
-
-#ifndef WOLFSSL_SP_SMALL
-/* Square number mod the order of P256 curve a number of times.
- * (r = a ^ n mod order)
- *
- * r  Result of the squaring.
- * a  Number to square.
- */
-static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, sp_digit* a, int n)
-{
-    int i;
-
-    sp_256_mont_sqr_order_avx2_4(r, a);
-    for (i=1; i<n; i++)
-        sp_256_mont_sqr_order_avx2_4(r, r);
-}
-#endif /* !WOLFSSL_SP_SMALL */
-
-/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
- * (r = 1 / a mod order)
- *
- * r   Inverse result.
- * a   Number to invert.
- * td  Temporary data.
- */
-static void sp_256_mont_inv_order_avx2_4(sp_digit* r, sp_digit* a,
-        sp_digit* td)
-{
-#ifdef WOLFSSL_SP_SMALL
-    sp_digit* t = td;
-    int i;
-
-    XMEMCPY(t, a, sizeof(sp_digit) * 4);
-    for (i=254; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_4(t, t);
-        if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t, t, a);
-    }
-    XMEMCPY(r, t, sizeof(sp_digit) * 4);
-#else
-    sp_digit* t = td;
-    sp_digit* t2 = td + 2 * 4;
-    sp_digit* t3 = td + 4 * 4;
-    int i;
-
-    /* t = a^2 */
-    sp_256_mont_sqr_order_avx2_4(t, a);
-    /* t = a^3 = t * a */
-    sp_256_mont_mul_order_avx2_4(t, t, a);
-    /* t2= a^c = t ^ 2 ^ 2 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 2);
-    /* t3= a^f = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t3, t2, t);
-    /* t2= a^f0 = t3 ^ 2 ^ 4 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t3, 4);
-    /* t = a^ff = t2 * t3 */
-    sp_256_mont_mul_order_avx2_4(t, t2, t3);
-    /* t3= a^ff00 = t ^ 2 ^ 8 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 8);
-    /* t = a^ffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t, t2, t);
-    /* t2= a^ffff0000 = t ^ 2 ^ 16 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 16);
-    /* t = a^ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t, t2, t);
-    /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t, 64);
-    /* t2= a^ffffffff00000000ffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 32);
-    /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
-    sp_256_mont_mul_order_avx2_4(t2, t2, t);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
-    for (i=127; i>=112; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
-    for (i=107; i>=64; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
-    for (i=59; i>=32; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    sp_256_mont_mul_order_avx2_4(t2, t2, t3);
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
-    for (i=27; i>=0; i--) {
-        sp_256_mont_sqr_order_avx2_4(t2, t2);
-        if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
-            sp_256_mont_mul_order_avx2_4(t2, t2, a);
-    }
-    /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
-    sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
-    /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
-    sp_256_mont_mul_order_avx2_4(r, t2, t3);
-#endif /* WOLFSSL_SP_SMALL */
-}
-
-#endif /* HAVE_INTEL_AVX2 */
-#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
-#ifdef HAVE_ECC_SIGN
-#ifndef SP_ECC_MAX_SIG_GEN
-#define SP_ECC_MAX_SIG_GEN  64
-#endif
-
-/* Sign the hash using the private key.
- *   e = [hash, 256 bits] from binary
- *   r = (k.G)->x mod order
- *   s = (r * x + e) / k mod order
- * The hash is truncated to the first 256 bits.
- *
- * hash     Hash to sign.
- * hashLen  Length of the hash data.
- * rng      Random number generator.
- * priv     Private part of key - scalar.
- * rm       First part of result as an mp_int.
- * sm       Sirst part of result as an mp_int.
- * heap     Heap to use for allocation.
- * returns RNG failures, MEMORY_E when memory allocation fails and
- * MP_OKAY on success.
- */
-int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
-                    mp_int* rm, mp_int* sm, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
-#else
-    sp_digit ed[2*4];
-    sp_digit xd[2*4];
-    sp_digit kd[2*4];
-    sp_digit rd[2*4];
-    sp_digit td[3 * 2*4];
-    sp_point p;
-#endif
-    sp_digit* e = NULL;
-    sp_digit* x = NULL;
-    sp_digit* k = NULL;
-    sp_digit* r = NULL;
-    sp_digit* tmp = NULL;
-    sp_point* point = NULL;
-    sp_digit carry;
-    sp_digit* s;
-    sp_digit* kInv;
-    int err = MP_OKAY;
-    int64_t c;
-    int i;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    (void)heap;
-
-    err = sp_ecc_point_new(heap, p, point);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            e = d + 0 * 4;
-            x = d + 2 * 4;
-            k = d + 4 * 4;
-            r = d + 6 * 4;
-            tmp = d + 8 * 4;
-        }
-        else
-            err = MEMORY_E;
-    }
-#else
-    e = ed;
-    x = xd;
-    k = kd;
-    r = rd;
-    tmp = td;
-#endif
-    s = e;
-    kInv = k;
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(e, 4, hash, hashLen);
-        sp_256_from_mp(x, 4, priv);
-    }
-
-    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
-        /* New random point. */
-        err = sp_256_ecc_gen_k_4(rng, k);
-        if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, heap);
-            else
-#endif
-                err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
-        }
-
-        if (err == MP_OKAY) {
-            /* r = point->x mod order */
-            XMEMCPY(r, point->x, sizeof(sp_digit) * 4);
-            sp_256_norm_4(r);
-            c = sp_256_cmp_4(r, p256_order);
-            sp_256_cond_sub_4(r, r, p256_order, 0 - (c >= 0));
-            sp_256_norm_4(r);
-
-            /* Conv k to Montgomery form (mod order) */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_4(k, k, p256_norm_order);
-            else
-#endif
-                sp_256_mul_4(k, k, p256_norm_order);
-            err = sp_256_mod_4(k, k, p256_order);
-        }
-        if (err == MP_OKAY) {
-            sp_256_norm_4(k);
-            /* kInv = 1/k mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_inv_order_avx2_4(kInv, k, tmp);
-            else
-#endif
-                sp_256_mont_inv_order_4(kInv, k, tmp);
-            sp_256_norm_4(kInv);
-
-            /* s = r * x + e */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mul_avx2_4(x, x, r);
-            else
-#endif
-                sp_256_mul_4(x, x, r);
-            err = sp_256_mod_4(x, x, p256_order);
-        }
-        if (err == MP_OKAY) {
-            sp_256_norm_4(x);
-            carry = sp_256_add_4(s, e, x);
-            sp_256_cond_sub_4(s, s, p256_order, 0 - carry);
-            sp_256_norm_4(s);
-            c = sp_256_cmp_4(s, p256_order);
-            sp_256_cond_sub_4(s, s, p256_order, 0 - (c >= 0));
-            sp_256_norm_4(s);
-
-            /* s = s * k^-1 mod order */
-#ifdef HAVE_INTEL_AVX2
-            if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-                sp_256_mont_mul_order_avx2_4(s, s, kInv);
-            else
-#endif
-                sp_256_mont_mul_order_4(s, s, kInv);
-            sp_256_norm_4(s);
-
-            /* Check that signature is usable. */
-            if (!sp_256_iszero_4(s))
-                break;
-        }
-    }
-
-    if (i == 0)
-        err = RNG_FAILURE_E;
-
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(r, rm);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(s, sm);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL) {
-        XMEMSET(d, 0, sizeof(sp_digit) * 8 * 4);
-        XFREE(d, heap, DYNAMIC_TYPE_ECC);
-    }
-#else
-    XMEMSET(e, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(x, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(k, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(r, 0, sizeof(sp_digit) * 2 * 4);
-    XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*4);
-#endif
-    sp_ecc_point_free(point, 1, heap);
-
-    return err;
-}
-#endif /* HAVE_ECC_SIGN */
-
-#ifdef HAVE_ECC_VERIFY
-/* Verify the signature values with the hash and public key.
- *   e = Truncate(hash, 256)
- *   u1 = e/s mod order
- *   u2 = r/s mod order
- *   r == (u1.G + u2.Q)->x mod order
- * Optimization: Leave point in projective form.
- *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
- *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
- * The hash is truncated to the first 256 bits.
- *
- * hash     Hash to sign.
- * hashLen  Length of the hash data.
- * rng      Random number generator.
- * priv     Private part of key - scalar.
- * rm       First part of result as an mp_int.
- * sm       Sirst part of result as an mp_int.
- * heap     Heap to use for allocation.
- * returns RNG failures, MEMORY_E when memory allocation fails and
- * MP_OKAY on success.
- */
-int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
-    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d = NULL;
-#else
-    sp_digit u1d[2*4];
-    sp_digit u2d[2*4];
-    sp_digit sd[2*4];
-    sp_digit tmpd[2*4 * 5];
-    sp_point p1d;
-    sp_point p2d;
-#endif
-    sp_digit* u1;
-    sp_digit* u2;
-    sp_digit* s;
-    sp_digit* tmp;
-    sp_point* p1;
-    sp_point* p2 = NULL;
-    sp_digit carry;
-    int64_t c;
-    int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, p1d, p1);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, p2d, p2);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        d = XMALLOC(sizeof(sp_digit) * 16 * 4, heap, DYNAMIC_TYPE_ECC);
-        if (d != NULL) {
-            u1  = d + 0 * 4;
-            u2  = d + 2 * 4;
-            s   = d + 4 * 4;
-            tmp = d + 6 * 4;
-        }
-        else
-            err = MEMORY_E;
-    }
-#else
-    u1 = u1d;
-    u2 = u2d;
-    s  = sd;
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        if (hashLen > 32)
-            hashLen = 32;
-
-        sp_256_from_bin(u1, 4, hash, hashLen);
-        sp_256_from_mp(u2, 4, r);
-        sp_256_from_mp(s, 4, sm);
-        sp_256_from_mp(p2->x, 4, pX);
-        sp_256_from_mp(p2->y, 4, pY);
-        sp_256_from_mp(p2->z, 4, pZ);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_mul_avx2_4(s, s, p256_norm_order);
-        else
-#endif
-            sp_256_mul_4(s, s, p256_norm_order);
-        err = sp_256_mod_4(s, s, p256_order);
-    }
-    if (err == MP_OKAY) {
-        sp_256_norm_4(s);
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_inv_order_avx2_4(s, s, tmp);
-            sp_256_mont_mul_order_avx2_4(u1, u1, s);
-            sp_256_mont_mul_order_avx2_4(u2, u2, s);
-        }
-        else
-#endif
-        {
-            sp_256_mont_inv_order_4(s, s, tmp);
-            sp_256_mont_mul_order_4(u1, u1, s);
-            sp_256_mont_mul_order_4(u2, u2, s);
-        }
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(p1, u1, 0, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap);
-    }
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(p2, p2, u2, 0, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap);
-    }
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_4(p1, p1, p2, tmp);
-        else
-#endif
-            sp_256_proj_point_add_4(p1, p1, p2, tmp);
-
-        /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
-        /* Reload r and convert to Montgomery form. */
-        sp_256_from_mp(u2, 4, r);
-        err = sp_256_mod_mul_norm_4(u2, u2, p256_mod);
-    }
-
-    if (err == MP_OKAY) {
-        /* u1 = r.z'.z' mod prime */
-        sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod);
-        sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod);
-        *res = sp_256_cmp_4(p1->x, u1) == 0;
-        if (*res == 0) {
-            /* Reload r and add order. */
-            sp_256_from_mp(u2, 4, r);
-            carry = sp_256_add_4(u2, u2, p256_order);
-            /* Carry means result is greater than mod and is not valid. */
-            if (!carry) {
-                sp_256_norm_4(u2);
-
-                /* Compare with mod and if greater or equal then not valid. */
-                c = sp_256_cmp_4(u2, p256_mod);
-                if (c < 0) {
-                    /* Convert to Montogomery form */
-                    err = sp_256_mod_mul_norm_4(u2, u2, p256_mod);
-                    if (err == MP_OKAY) {
-                        /* u1 = (r + 1*order).z'.z' mod prime */
-                        sp_256_mont_mul_4(u1, u2, p1->z, p256_mod,
-                                                                  p256_mp_mod);
-                        *res = sp_256_cmp_4(p1->x, u2) == 0;
-                    }
-                }
-            }
-        }
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p1, 0, heap);
-    sp_ecc_point_free(p2, 0, heap);
-
-    return err;
-}
-#endif /* HAVE_ECC_VERIFY */
-
-#ifdef HAVE_ECC_CHECK_KEY
-/* Check that the x and y oridinates are a valid point on the curve.
- *
- * point  EC point.
- * heap   Heap to use if dynamically allocating.
- * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
- * not on the curve and MP_OKAY otherwise.
- */
-static int sp_256_ecc_is_point_4(sp_point* point, void* heap)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d = NULL;
-#else
-    sp_digit t1d[2*4];
-    sp_digit t2d[2*4];
-#endif
-    sp_digit* t1;
-    sp_digit* t2;
-    int err = MP_OKAY;
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
-        t1 = d + 0 * 4;
-        t2 = d + 2 * 4;
-    }
-    else
-        err = MEMORY_E;
-#else
-    (void)heap;
-
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_256_sqr_4(t1, point->y);
-        sp_256_mod_4(t1, t1, p256_mod);
-        sp_256_sqr_4(t2, point->x);
-        sp_256_mod_4(t2, t2, p256_mod);
-        sp_256_mul_4(t2, t2, point->x);
-        sp_256_mod_4(t2, t2, p256_mod);
-	sp_256_sub_4(t2, p256_mod, t2);
-        sp_256_mont_add_4(t1, t1, t2, p256_mod);
-
-        sp_256_mont_add_4(t1, t1, point->x, p256_mod);
-        sp_256_mont_add_4(t1, t1, point->x, p256_mod);
-        sp_256_mont_add_4(t1, t1, point->x, p256_mod);
-
-        if (sp_256_cmp_4(t1, p256_b) != 0)
-            err = MP_VAL;
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, heap, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
-
-/* Check that the x and y oridinates are a valid point on the curve.
- *
- * pX  X ordinate of EC point.
- * pY  Y ordinate of EC point.
- * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
- * not on the curve and MP_OKAY otherwise.
- */
-int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point pubd;
-#endif
-    sp_point* pub;
-    byte one[1] = { 1 };
-    int err;
-
-    err = sp_ecc_point_new(NULL, pubd, pub);
-    if (err == MP_OKAY) {
-        sp_256_from_mp(pub->x, 4, pX);
-        sp_256_from_mp(pub->y, 4, pY);
-        sp_256_from_bin(pub->z, 4, one, sizeof(one));
-
-        err = sp_256_ecc_is_point_4(pub, NULL);
-    }
-
-    sp_ecc_point_free(pub, 0, NULL);
-
-    return err;
-}
-
-/* Check that the private scalar generates the EC point (px, py), the point is
- * on the curve and the point has the correct order.
- *
- * pX     X ordinate of EC point.
- * pY     Y ordinate of EC point.
- * privm  Private scalar that generates EC point.
- * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
- * not on the curve, ECC_INF_E if the point does not have the correct order,
- * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
- * MP_OKAY otherwise.
- */
-int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit privd[4];
-    sp_point pubd;
-    sp_point pd;
-#endif
-    sp_digit* priv = NULL;
-    sp_point* pub;
-    sp_point* p = NULL;
-    byte one[1] = { 1 };
-    int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(heap, pubd, pub);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(heap, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        priv = XMALLOC(sizeof(sp_digit) * 4, heap, DYNAMIC_TYPE_ECC);
-        if (priv == NULL)
-            err = MEMORY_E;
-    }
-#else
-    priv = privd;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_256_from_mp(pub->x, 4, pX);
-        sp_256_from_mp(pub->y, 4, pY);
-        sp_256_from_bin(pub->z, 4, one, sizeof(one));
-        sp_256_from_mp(priv, 4, privm);
-
-        /* Check point at infinitiy. */
-        if (sp_256_iszero_4(pub->x) &&
-            sp_256_iszero_4(pub->y))
-            err = ECC_INF_E;
-    }
-
-    if (err == MP_OKAY) {
-        /* Check range of X and Y */
-        if (sp_256_cmp_4(pub->x, p256_mod) >= 0 ||
-            sp_256_cmp_4(pub->y, p256_mod) >= 0)
-            err = ECC_OUT_OF_RANGE_E;
-    }
-
-    if (err == MP_OKAY) {
-        /* Check point is on curve */
-        err = sp_256_ecc_is_point_4(pub, heap);
-    }
-
-    if (err == MP_OKAY) {
-        /* Point * order = infinity */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_avx2_4(p, pub, p256_order, 1, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap);
-    }
-    if (err == MP_OKAY) {
-        /* Check result is infinity */
-        if (!sp_256_iszero_4(p->x) ||
-            !sp_256_iszero_4(p->y)) {
-            err = ECC_INF_E;
-        }
-    }
-
-    if (err == MP_OKAY) {
-        /* Base * private = point */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            err = sp_256_ecc_mulmod_base_avx2_4(p, priv, 1, heap);
-        else
-#endif
-            err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap);
-    }
-    if (err == MP_OKAY) {
-        /* Check result is public key */
-        if (sp_256_cmp_4(p->x, pub->x) != 0 ||
-            sp_256_cmp_4(p->y, pub->y) != 0) {
-            err = ECC_PRIV_KEY_E;
-        }
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (priv != NULL)
-        XFREE(priv, heap, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, heap);
-    sp_ecc_point_free(pub, 0, heap);
-
-    return err;
-}
-#endif
-#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
-/* Add two projective EC points together.
- * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
- *
- * pX   First EC point's X ordinate.
- * pY   First EC point's Y ordinate.
- * pZ   First EC point's Z ordinate.
- * qX   Second EC point's X ordinate.
- * qY   Second EC point's Y ordinate.
- * qZ   Second EC point's Z ordinate.
- * rX   Resultant EC point's X ordinate.
- * rY   Resultant EC point's Y ordinate.
- * rZ   Resultant EC point's Z ordinate.
- * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
- */
-int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
-                              mp_int* qX, mp_int* qY, mp_int* qZ,
-                              mp_int* rX, mp_int* rY, mp_int* rZ)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit tmpd[2 * 4 * 5];
-    sp_point pd;
-    sp_point qd;
-#endif
-    sp_digit* tmp;
-    sp_point* p;
-    sp_point* q = NULL;
-    int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-    if (err == MP_OKAY)
-        err = sp_ecc_point_new(NULL, qd, q);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
-    }
-#else
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_256_from_mp(p->x, 4, pX);
-        sp_256_from_mp(p->y, 4, pY);
-        sp_256_from_mp(p->z, 4, pZ);
-        sp_256_from_mp(q->x, 4, qX);
-        sp_256_from_mp(q->y, 4, qY);
-        sp_256_from_mp(q->z, 4, qZ);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_add_avx2_4(p, p, q, tmp);
-        else
-#endif
-            sp_256_proj_point_add_4(p, p, q, tmp);
-    }
-
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
-        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(q, 0, NULL);
-    sp_ecc_point_free(p, 0, NULL);
-
-    return err;
-}
-
-/* Double a projective EC point.
- * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
- *
- * pX   EC point's X ordinate.
- * pY   EC point's Y ordinate.
- * pZ   EC point's Z ordinate.
- * rX   Resultant EC point's X ordinate.
- * rY   Resultant EC point's Y ordinate.
- * rZ   Resultant EC point's Z ordinate.
- * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
- */
-int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
-                              mp_int* rX, mp_int* rY, mp_int* rZ)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit tmpd[2 * 4 * 2];
-    sp_point pd;
-#endif
-    sp_digit* tmp;
-    sp_point* p;
-    int err;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
-    }
-#else
-    tmp = tmpd;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_256_from_mp(p->x, 4, pX);
-        sp_256_from_mp(p->y, 4, pY);
-        sp_256_from_mp(p->z, 4, pZ);
-
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
-            sp_256_proj_point_dbl_avx2_4(p, p, tmp);
-        else
-#endif
-            sp_256_proj_point_dbl_4(p, p, tmp);
-    }
-
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->x, rX);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->y, rY);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->z, rZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
-        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
-
-    return err;
-}
-
-/* Map a projective EC point to affine in place.
- * pZ will be one.
- *
- * pX   EC point's X ordinate.
- * pY   EC point's Y ordinate.
- * pZ   EC point's Z ordinate.
- * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
- */
-int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
-{
-#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_digit tmpd[2 * 4 * 4];
-    sp_point pd;
-#endif
-    sp_digit* tmp;
-    sp_point* p;
-    int err;
-
-    err = sp_ecc_point_new(NULL, pd, p);
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (err == MP_OKAY) {
-        tmp = XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
-        if (tmp == NULL)
-            err = MEMORY_E;
-    }
-#else
-    tmp = tmpd;
-#endif
-    if (err == MP_OKAY) {
-        sp_256_from_mp(p->x, 4, pX);
-        sp_256_from_mp(p->y, 4, pY);
-        sp_256_from_mp(p->z, 4, pZ);
-
-        sp_256_map_4(p, p, tmp);
-    }
-
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->x, pX);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->y, pY);
-    if (err == MP_OKAY)
-        err = sp_256_to_mp(p->z, pZ);
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (tmp != NULL)
-        XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
-#endif
-    sp_ecc_point_free(p, 0, NULL);
-
-    return err;
-}
-#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
-#ifdef HAVE_COMP_KEY
-/* Find the square root of a number mod the prime of the curve.
- *
- * y  The number to operate on and the result.
- * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
- */
-static int sp_256_mont_sqrt_4(sp_digit* y)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
-#else
-    sp_digit t1d[2 * 4];
-    sp_digit t2d[2 * 4];
-#endif
-    sp_digit* t1;
-    sp_digit* t2;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
-        t1 = d + 0 * 4;
-        t2 = d + 2 * 4;
-    }
-    else
-        err = MEMORY_E;
-#else
-    t1 = t1d;
-    t2 = t2d;
-#endif
-
-    if (err == MP_OKAY) {
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            /* t2 = y ^ 0x2 */
-            sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0x3 */
-            sp_256_mont_mul_avx2_4(t1, t2, y, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xc */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xf */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xf0 */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 4, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xff */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xff00 */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 8, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffff */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xffff0000 */
-            sp_256_mont_sqr_n_avx2_4(t2, t1, 16, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff */
-            sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000000 */
-            sp_256_mont_sqr_n_avx2_4(t1, t1, 32, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001 */
-            sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
-            sp_256_mont_sqr_n_avx2_4(t1, t1, 96, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
-            sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
-            sp_256_mont_sqr_n_avx2_4(y, t1, 94, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
-        {
-            /* t2 = y ^ 0x2 */
-            sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0x3 */
-            sp_256_mont_mul_4(t1, t2, y, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xc */
-            sp_256_mont_sqr_n_4(t2, t1, 2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xf */
-            sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xf0 */
-            sp_256_mont_sqr_n_4(t2, t1, 4, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xff */
-            sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xff00 */
-            sp_256_mont_sqr_n_4(t2, t1, 8, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffff */
-            sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t2 = y ^ 0xffff0000 */
-            sp_256_mont_sqr_n_4(t2, t1, 16, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff */
-            sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000000 */
-            sp_256_mont_sqr_n_4(t1, t1, 32, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001 */
-            sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
-            sp_256_mont_sqr_n_4(t1, t1, 96, p256_mod, p256_mp_mod);
-            /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
-            sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod);
-            sp_256_mont_sqr_n_4(y, t1, 94, p256_mod, p256_mp_mod);
-        }
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
-
-/* Uncompress the point given the X ordinate.
- *
- * xm    X ordinate.
- * odd   Whether the Y ordinate is odd.
- * ym    Calculated Y ordinate.
- * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
- */
-int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
-{
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_digit* d;
-#else
-    sp_digit xd[2 * 4];
-    sp_digit yd[2 * 4];
-#endif
-    sp_digit* x;
-    sp_digit* y;
-    int err = MP_OKAY;
-#ifdef HAVE_INTEL_AVX2
-    word32 cpuid_flags = cpuid_get_flags();
-#endif
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
-    if (d != NULL) {
-        x = d + 0 * 4;
-        y = d + 2 * 4;
-    }
-    else
-        err = MEMORY_E;
-#else
-    x = xd;
-    y = yd;
-#endif
-
-    if (err == MP_OKAY) {
-        sp_256_from_mp(x, 4, xm);
-
-        err = sp_256_mod_mul_norm_4(x, x, p256_mod);
-    }
-
-    if (err == MP_OKAY) {
-        /* y = x^3 */
-#ifdef HAVE_INTEL_AVX2
-        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
-            sp_256_mont_sqr_avx2_4(y, x, p256_mod, p256_mp_mod);
-            sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod);
-        }
-        else
-#endif
-        {
-            sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod);
-            sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
-        }
-        /* y = x^3 - 3x */
-        sp_256_mont_sub_4(y, y, x, p256_mod);
-        sp_256_mont_sub_4(y, y, x, p256_mod);
-        sp_256_mont_sub_4(y, y, x, p256_mod);
-        /* y = x^3 - 3x + b */
-        err = sp_256_mod_mul_norm_4(x, p256_b, p256_mod);
-    }
-    if (err == MP_OKAY) {
-        sp_256_mont_add_4(y, y, x, p256_mod);
-        /* y = sqrt(x^3 - 3x + b) */
-        err = sp_256_mont_sqrt_4(y);
-    }
-    if (err == MP_OKAY) {
-        XMEMSET(y + 4, 0, 4 * sizeof(sp_digit));
-        sp_256_mont_reduce_4(y, p256_mod, p256_mp_mod);
-        if (((y[0] ^ odd) & 1) != 0)
-            sp_256_mont_sub_4(y, p256_mod, y, p256_mod);
-
-        err = sp_256_to_mp(y, ym);
-    }
-
-#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    if (d != NULL)
-        XFREE(d, NULL, DYNAMIC_TYPE_ECC);
-#endif
-
-    return err;
-}
-#endif
-#endif /* WOLFSSL_SP_NO_256 */
-#endif /* WOLFSSL_HAVE_SP_ECC */
-#endif /* WOLFSSL_SP_X86_64_ASM */
-#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
-
--- a/wolfcrypt/src/srp.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/srp.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* srp.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/tfm.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/tfm.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* tfm.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -58,13 +58,41 @@
     #include <stdio.h>
 #endif
 
-
+#ifdef USE_WINDOWS_API
+    #pragma warning(disable:4127)
+    /* Disables the warning:
+     *   4127: conditional expression is constant
+     * in this file.
+     */
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef __cplusplus
+    extern "C" {
+#endif
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+#endif
+
+
+#ifndef WOLFSSL_SP_MATH
 /* math settings check */
 word32 CheckRunTimeSettings(void)
 {
     return CTC_SETTINGS;
 }
-
+#endif
 
 /* math settings size check */
 word32 CheckRunTimeFastMath(void)
@@ -198,10 +226,17 @@
 }
 
 /* c = a * b */
-void fp_mul(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul(fp_int *A, fp_int *B, fp_int *C)
 {
+    int   ret = 0;
     int   y, yy, oldused;
 
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+  ret = esp_mp_mul(A, B, C);
+  if(ret != -2) return ret;
+#endif
+
     oldused = C->used;
 
     y  = MAX(A->used, B->used);
@@ -209,7 +244,7 @@
 
     /* call generic if we're out of range */
     if (y + yy > FP_SIZE) {
-       fp_mul_comba(A, B, C);
+       ret = fp_mul_comba(A, B, C);
        goto clean;
     }
 
@@ -221,102 +256,104 @@
 
 #if defined(TFM_MUL3) && FP_SIZE >= 6
         if (y <= 3) {
-           fp_mul_comba3(A,B,C);
+           ret = fp_mul_comba3(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL4) && FP_SIZE >= 8
         if (y == 4) {
-           fp_mul_comba4(A,B,C);
+           ret = fp_mul_comba4(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL6) && FP_SIZE >= 12
         if (y <= 6) {
-           fp_mul_comba6(A,B,C);
+           ret = fp_mul_comba6(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL7) && FP_SIZE >= 14
         if (y == 7) {
-           fp_mul_comba7(A,B,C);
+           ret = fp_mul_comba7(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL8) && FP_SIZE >= 16
         if (y == 8) {
-           fp_mul_comba8(A,B,C);
+           ret = fp_mul_comba8(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL9) && FP_SIZE >= 18
         if (y == 9) {
-           fp_mul_comba9(A,B,C);
+           ret = fp_mul_comba9(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL12) && FP_SIZE >= 24
         if (y <= 12) {
-           fp_mul_comba12(A,B,C);
+           ret = fp_mul_comba12(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL17) && FP_SIZE >= 34
         if (y <= 17) {
-           fp_mul_comba17(A,B,C);
+           ret = fp_mul_comba17(A,B,C);
            goto clean;
         }
 #endif
 
 #if defined(TFM_SMALL_SET) && FP_SIZE >= 32
         if (y <= 16) {
-           fp_mul_comba_small(A,B,C);
+           ret = fp_mul_comba_small(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL20) && FP_SIZE >= 40
         if (y <= 20) {
-           fp_mul_comba20(A,B,C);
+           ret = fp_mul_comba20(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL24) && FP_SIZE >= 48
         if (yy >= 16 && y <= 24) {
-           fp_mul_comba24(A,B,C);
+           ret = fp_mul_comba24(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL28) && FP_SIZE >= 56
         if (yy >= 20 && y <= 28) {
-           fp_mul_comba28(A,B,C);
+           ret = fp_mul_comba28(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL32) && FP_SIZE >= 64
         if (yy >= 24 && y <= 32) {
-           fp_mul_comba32(A,B,C);
+           ret = fp_mul_comba32(A,B,C);
            goto clean;
         }
 #endif
 #if defined(TFM_MUL48) && FP_SIZE >= 96
         if (yy >= 40 && y <= 48) {
-          fp_mul_comba48(A,B,C);
+          ret = fp_mul_comba48(A,B,C);
           goto clean;
         }
 #endif
 #if defined(TFM_MUL64) && FP_SIZE >= 128
         if (yy >= 56 && y <= 64) {
-           fp_mul_comba64(A,B,C);
+           ret = fp_mul_comba64(A,B,C);
            goto clean;
         }
 #endif
-        fp_mul_comba(A,B,C);
+        ret = fp_mul_comba(A,B,C);
 
 clean:
     /* zero any excess digits on the destination that we didn't write to */
     for (y = C->used; y >= 0 && y < oldused; y++) {
         C->dp[y] = 0;
     }
+
+    return ret;
 }
 
 void fp_mul_2(fp_int * a, fp_int * b)
@@ -390,7 +427,8 @@
    }
 
    /* zero any excess digits on the destination that we didn't write to */
-   for (; x < oldused; x++) {
+   /* also checking FP_SIZE here for static analysis */
+   for (; x < oldused && x < FP_SIZE; x++) {
       c->dp[x] = 0;
    }
    fp_clamp(c);
@@ -431,11 +469,25 @@
 /* generic PxQ multiplier */
 #if defined(HAVE_INTEL_MULX)
 
-WC_INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
+WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
 
 {
    int       ix, iy, iz, pa;
-   fp_int    tmp, *dst;
+   fp_int    *dst;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int    tmp[1];
+#else
+   fp_int    *tmp;
+#endif
+ 
+   /* Variables used but not seen by cppcheck. */
+   (void)ix; (void)iy; (void)iz;
+
+#ifdef WOLFSSL_SMALL_STACK
+   tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (tmp == NULL)
+       return FP_MEM;
+#endif
 
    /* get size of output and trim */
    pa = A->used + B->used;
@@ -446,8 +498,8 @@
    /* Always take branch to use tmp variable. This avoids a cache attack for
     * determining if C equals A */
    if (1) {
-      fp_init(&tmp);
-      dst = &tmp;
+      fp_init(tmp);
+      dst = tmp;
    }
 
    TFM_INTEL_MUL_COMBA(A, B, dst) ;
@@ -456,16 +508,34 @@
   dst->sign = A->sign ^ B->sign;
   fp_clamp(dst);
   fp_copy(dst, C);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+  return FP_OKAY;
 }
 #endif
 
-void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
 {
+   int       ret = 0;
    int       ix, iy, iz, tx, ty, pa;
    fp_digit  c0, c1, c2, *tmpx, *tmpy;
-   fp_int    tmp, *dst;
-
-   IF_HAVE_INTEL_MULX(fp_mul_comba_mulx(A, B, C), return) ;
+   fp_int    *dst;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int    tmp[1];
+#else
+   fp_int    *tmp;
+#endif
+
+   IF_HAVE_INTEL_MULX(ret = fp_mul_comba_mulx(A, B, C), return ret) ;
+
+#ifdef WOLFSSL_SMALL_STACK
+   tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (tmp == NULL)
+       return FP_MEM;
+#endif
 
    COMBA_START;
    COMBA_CLEAR;
@@ -479,8 +549,8 @@
    /* Always take branch to use tmp variable. This avoids a cache attack for
     * determining if C equals A */
    if (1) {
-      fp_init(&tmp);
-      dst = &tmp;
+      fp_init(tmp);
+      dst = tmp;
    }
 
    for (ix = 0; ix < pa; ix++) {
@@ -514,13 +584,25 @@
   dst->sign = A->sign ^ B->sign;
   fp_clamp(dst);
   fp_copy(dst, C);
+  
+  /* Variables used but not seen by cppcheck. */
+  (void)c0; (void)c1; (void)c2;
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return ret;
 }
 
 /* a/b => cb + d == a */
 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 {
-  fp_int  q, x, y, t1, t2;
   int     n, t, i, norm, neg;
+#ifndef WOLFSSL_SMALL_STACK
+  fp_int  q[1], x[1], y[1], t1[1], t2[1];
+#else
+  fp_int  *q, *x, *y, *t1, *t2;
+#endif
 
   /* is divisor zero ? */
   if (fp_iszero (b) == FP_YES) {
@@ -538,59 +620,67 @@
     return FP_OKAY;
   }
 
-  fp_init(&q);
-  q.used = a->used + 2;
-
-  fp_init(&t1);
-  fp_init(&t2);
-  fp_init_copy(&x, a);
-  fp_init_copy(&y, b);
+#ifdef WOLFSSL_SMALL_STACK
+  q = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
+  if (q == NULL) {
+      return FP_MEM;
+  }
+  x = &q[1]; y = &q[2]; t1 = &q[3]; t2 = &q[4];
+#endif
+
+  fp_init(q);
+  q->used = a->used + 2;
+
+  fp_init(t1);
+  fp_init(t2);
+  fp_init_copy(x, a);
+  fp_init_copy(y, b);
 
   /* fix the sign */
   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
-  x.sign = y.sign = FP_ZPOS;
+  x->sign = y->sign = FP_ZPOS;
 
   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
-  norm = fp_count_bits(&y) % DIGIT_BIT;
+  norm = fp_count_bits(y) % DIGIT_BIT;
   if (norm < (int)(DIGIT_BIT-1)) {
      norm = (DIGIT_BIT-1) - norm;
-     fp_mul_2d (&x, norm, &x);
-     fp_mul_2d (&y, norm, &y);
+     fp_mul_2d (x, norm, x);
+     fp_mul_2d (y, norm, y);
   } else {
      norm = 0;
   }
 
   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
-  n = x.used - 1;
-  t = y.used - 1;
+  n = x->used - 1;
+  t = y->used - 1;
 
   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
-  fp_lshd (&y, n - t); /* y = y*b**{n-t} */
-
-  while (fp_cmp (&x, &y) != FP_LT) {
-    ++(q.dp[n - t]);
-    fp_sub (&x, &y, &x);
+  fp_lshd (y, n - t); /* y = y*b**{n-t} */
+
+  while (fp_cmp (x, y) != FP_LT) {
+    ++(q->dp[n - t]);
+    fp_sub (x, y, x);
   }
 
   /* reset y by shifting it back down */
-  fp_rshd (&y, n - t);
+  fp_rshd (y, n - t);
 
   /* step 3. for i from n down to (t + 1) */
   for (i = n; i >= (t + 1); i--) {
-    if (i > x.used) {
+    if (i > x->used) {
       continue;
     }
 
     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
-    if (x.dp[i] == y.dp[t]) {
-      q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
+    if (x->dp[i] == y->dp[t]) {
+      q->dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
     } else {
       fp_word tmp;
-      tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
-      tmp |= ((fp_word) x.dp[i - 1]);
-      tmp /= ((fp_word)y.dp[t]);
-      q.dp[i - t - 1] = (fp_digit) (tmp);
+      tmp = ((fp_word) x->dp[i]) << ((fp_word) DIGIT_BIT);
+      tmp |= ((fp_word) x->dp[i - 1]);
+      tmp /= ((fp_word)y->dp[t]);
+      q->dp[i - t - 1] = (fp_digit) (tmp);
     }
 
     /* while (q{i-t-1} * (yt * b + y{t-1})) >
@@ -598,35 +688,35 @@
 
        do q{i-t-1} -= 1;
     */
-    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
+    q->dp[i - t - 1] = (q->dp[i - t - 1] + 1);
     do {
-      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
+      q->dp[i - t - 1] = (q->dp[i - t - 1] - 1);
 
       /* find left hand */
-      fp_zero (&t1);
-      t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
-      t1.dp[1] = y.dp[t];
-      t1.used = 2;
-      fp_mul_d (&t1, q.dp[i - t - 1], &t1);
+      fp_zero (t1);
+      t1->dp[0] = (t - 1 < 0) ? 0 : y->dp[t - 1];
+      t1->dp[1] = y->dp[t];
+      t1->used = 2;
+      fp_mul_d (t1, q->dp[i - t - 1], t1);
 
       /* find right hand */
-      t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
-      t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
-      t2.dp[2] = x.dp[i];
-      t2.used = 3;
-    } while (fp_cmp_mag(&t1, &t2) == FP_GT);
+      t2->dp[0] = (i - 2 < 0) ? 0 : x->dp[i - 2];
+      t2->dp[1] = (i - 1 < 0) ? 0 : x->dp[i - 1];
+      t2->dp[2] = x->dp[i];
+      t2->used = 3;
+    } while (fp_cmp_mag(t1, t2) == FP_GT);
 
     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
-    fp_mul_d (&y, q.dp[i - t - 1], &t1);
-    fp_lshd  (&t1, i - t - 1);
-    fp_sub   (&x, &t1, &x);
+    fp_mul_d (y, q->dp[i - t - 1], t1);
+    fp_lshd  (t1, i - t - 1);
+    fp_sub   (x, t1, x);
 
     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
-    if (x.sign == FP_NEG) {
-      fp_copy (&y, &t1);
-      fp_lshd (&t1, i - t - 1);
-      fp_add (&x, &t1, &x);
-      q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
+    if (x->sign == FP_NEG) {
+      fp_copy (y, t1);
+      fp_lshd (t1, i - t - 1);
+      fp_add (x, t1, x);
+      q->dp[i - t - 1] = q->dp[i - t - 1] - 1;
     }
   }
 
@@ -635,25 +725,28 @@
    */
 
   /* get sign before writing to c */
-  x.sign = x.used == 0 ? FP_ZPOS : a->sign;
+  x->sign = x->used == 0 ? FP_ZPOS : a->sign;
 
   if (c != NULL) {
-    fp_clamp (&q);
-    fp_copy (&q, c);
+    fp_clamp (q);
+    fp_copy (q, c);
     c->sign = neg;
   }
 
   if (d != NULL) {
-    fp_div_2d (&x, norm, &x, NULL);
+    fp_div_2d (x, norm, x, NULL);
 
     /* zero any excess digits on the destination that we didn't write to */
-    for (i = b->used; i < x.used; i++) {
-        x.dp[i] = 0;
+    for (i = b->used; i < x->used; i++) {
+        x->dp[i] = 0;
     }
-    fp_clamp(&x);
-    fp_copy (&x, d);
+    fp_clamp(x);
+    fp_copy (x, d);
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return FP_OKAY;
 }
 
@@ -700,7 +793,6 @@
 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
 {
   int      D;
-  fp_int   t;
 
   /* if the shift count is <= 0 then we do no work */
   if (b <= 0) {
@@ -711,11 +803,9 @@
     return;
   }
 
-  fp_init(&t);
-
-  /* get the remainder */
-  if (d != NULL) {
-    fp_mod_2d (a, b, &t);
+  /* get the remainder before a is changed in calculating c */
+  if (a == c && d != NULL) {
+    fp_mod_2d (a, b, d);
   }
 
   /* copy */
@@ -731,28 +821,45 @@
   if (D != 0) {
     fp_rshb(c, D);
   }
+
+  /* get the remainder if a is not changed in calculating c */
+  if (a != c && d != NULL) {
+    fp_mod_2d (a, b, d);
+  }
+
   fp_clamp (c);
-  if (d != NULL) {
-    fp_copy (&t, d);
-  }
 }
 
 /* c = a mod b, 0 <= c < b  */
 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
 {
-   fp_int t;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
    int    err;
 
-   fp_init(&t);
-   if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
-      return err;
-   }
-   if (t.sign != b->sign) {
-      fp_add(&t, b, c);
-   } else {
-      fp_copy(&t, c);
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+   fp_init(t);
+   err = fp_div(a, b, NULL, t);
+   if (err == FP_OKAY) {
+      if (t->sign != b->sign) {
+         fp_add(t, b, c);
+      } else {
+         fp_copy(t, c);
+     }
   }
-  return FP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return err;
 }
 
 /* c = a mod 2**d */
@@ -785,198 +892,340 @@
 
 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
 {
-  fp_int  x, y, u, v, A, B, C, D;
-  int     res;
+#ifndef WOLFSSL_SMALL_STACK
+  fp_int  x[1], y[1], u[1], v[1], A[1], B[1], C[1], D[1];
+#else
+  fp_int  *x, *y, *u, *v, *A, *B, *C, *D;
+#endif
+  int     err;
 
   /* b cannot be negative */
   if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
     return FP_VAL;
   }
+  if (fp_iszero(a) == FP_YES) {
+    return FP_VAL;
+  }
+
+#ifdef WOLFSSL_SMALL_STACK
+  x = (fp_int*)XMALLOC(sizeof(fp_int) * 8, NULL, DYNAMIC_TYPE_BIGINT);
+  if (x == NULL) {
+      return FP_MEM;
+  }
+  y = &x[1]; u = &x[2]; v = &x[3]; A = &x[4]; B = &x[5]; C = &x[6]; D = &x[7];
+#endif
 
   /* init temps */
-  fp_init(&x);    fp_init(&y);
-  fp_init(&u);    fp_init(&v);
-  fp_init(&A);    fp_init(&B);
-  fp_init(&C);    fp_init(&D);
+  fp_init(x);    fp_init(y);
+  fp_init(u);    fp_init(v);
+  fp_init(A);    fp_init(B);
+  fp_init(C);    fp_init(D);
 
   /* x = a, y = b */
-  if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
-      return res;
+  if ((err = fp_mod(a, b, x)) != FP_OKAY) {
+  #ifdef WOLFSSL_SMALL_STACK
+    XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+  #endif
+    return err;
   }
-  fp_copy(b, &y);
+  fp_copy(b, y);
 
   /* 2. [modified] if x,y are both even then return an error! */
-  if (fp_iseven (&x) == FP_YES && fp_iseven (&y) == FP_YES) {
+  if (fp_iseven(x) == FP_YES && fp_iseven(y) == FP_YES) {
+  #ifdef WOLFSSL_SMALL_STACK
+    XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+  #endif
     return FP_VAL;
   }
 
   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
-  fp_copy (&x, &u);
-  fp_copy (&y, &v);
-  fp_set (&A, 1);
-  fp_set (&D, 1);
+  fp_copy (x, u);
+  fp_copy (y, v);
+  fp_set (A, 1);
+  fp_set (D, 1);
 
 top:
   /* 4.  while u is even do */
-  while (fp_iseven (&u) == FP_YES) {
+  while (fp_iseven (u) == FP_YES) {
     /* 4.1 u = u/2 */
-    fp_div_2 (&u, &u);
+    fp_div_2 (u, u);
 
     /* 4.2 if A or B is odd then */
-    if (fp_isodd (&A) == FP_YES || fp_isodd (&B) == FP_YES) {
+    if (fp_isodd (A) == FP_YES || fp_isodd (B) == FP_YES) {
       /* A = (A+y)/2, B = (B-x)/2 */
-      fp_add (&A, &y, &A);
-      fp_sub (&B, &x, &B);
+      fp_add (A, y, A);
+      fp_sub (B, x, B);
     }
     /* A = A/2, B = B/2 */
-    fp_div_2 (&A, &A);
-    fp_div_2 (&B, &B);
+    fp_div_2 (A, A);
+    fp_div_2 (B, B);
   }
 
   /* 5.  while v is even do */
-  while (fp_iseven (&v) == FP_YES) {
+  while (fp_iseven (v) == FP_YES) {
     /* 5.1 v = v/2 */
-    fp_div_2 (&v, &v);
+    fp_div_2 (v, v);
 
     /* 5.2 if C or D is odd then */
-    if (fp_isodd (&C) == FP_YES || fp_isodd (&D) == FP_YES) {
+    if (fp_isodd (C) == FP_YES || fp_isodd (D) == FP_YES) {
       /* C = (C+y)/2, D = (D-x)/2 */
-      fp_add (&C, &y, &C);
-      fp_sub (&D, &x, &D);
+      fp_add (C, y, C);
+      fp_sub (D, x, D);
     }
     /* C = C/2, D = D/2 */
-    fp_div_2 (&C, &C);
-    fp_div_2 (&D, &D);
+    fp_div_2 (C, C);
+    fp_div_2 (D, D);
   }
 
   /* 6.  if u >= v then */
-  if (fp_cmp (&u, &v) != FP_LT) {
+  if (fp_cmp (u, v) != FP_LT) {
     /* u = u - v, A = A - C, B = B - D */
-    fp_sub (&u, &v, &u);
-    fp_sub (&A, &C, &A);
-    fp_sub (&B, &D, &B);
+    fp_sub (u, v, u);
+    fp_sub (A, C, A);
+    fp_sub (B, D, B);
   } else {
     /* v - v - u, C = C - A, D = D - B */
-    fp_sub (&v, &u, &v);
-    fp_sub (&C, &A, &C);
-    fp_sub (&D, &B, &D);
+    fp_sub (v, u, v);
+    fp_sub (C, A, C);
+    fp_sub (D, B, D);
   }
 
   /* if not zero goto step 4 */
-  if (fp_iszero (&u) == FP_NO)
+  if (fp_iszero (u) == FP_NO)
     goto top;
 
   /* now a = C, b = D, gcd == g*v */
 
   /* if v != 1 then there is no inverse */
-  if (fp_cmp_d (&v, 1) != FP_EQ) {
+  if (fp_cmp_d (v, 1) != FP_EQ) {
+  #ifdef WOLFSSL_SMALL_STACK
+    XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+  #endif
     return FP_VAL;
   }
 
   /* if its too low */
-  while (fp_cmp_d(&C, 0) == FP_LT) {
-      fp_add(&C, b, &C);
+  while (fp_cmp_d(C, 0) == FP_LT) {
+      fp_add(C, b, C);
   }
 
   /* too big */
-  while (fp_cmp_mag(&C, b) != FP_LT) {
-      fp_sub(&C, b, &C);
+  while (fp_cmp_mag(C, b) != FP_LT) {
+      fp_sub(C, b, C);
   }
 
   /* C is now the inverse */
-  fp_copy(&C, c);
+  fp_copy(C, c);
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return FP_OKAY;
 }
 
 /* c = 1/a (mod b) for odd b only */
 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
 {
-  fp_int  x, y, u, v, B, D;
+#ifndef WOLFSSL_SMALL_STACK
+  fp_int  x[1], y[1], u[1], v[1], B[1], D[1];
+#else
+  fp_int  *x, *y, *u, *v, *B, *D;
+#endif
   int     neg;
+  int     err;
+
+  if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
+    return FP_VAL;
+  }
+
+  /* [modified] sanity check on "a" */
+  if (fp_iszero(a) == FP_YES) {
+    return FP_VAL; /* can not divide by 0 here */
+  }
 
   /* 2. [modified] b must be odd   */
-  if (fp_iseven (b) == FP_YES) {
+  if (fp_iseven(b) == FP_YES) {
     return fp_invmod_slow(a,b,c);
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  x = (fp_int*)XMALLOC(sizeof(fp_int) * 6, NULL, DYNAMIC_TYPE_BIGINT);
+  if (x == NULL) {
+      return FP_MEM;
+  }
+  y = &x[1]; u = &x[2]; v = &x[3]; B = &x[4]; D = &x[5];
+#endif
+
   /* init all our temps */
-  fp_init(&x);  fp_init(&y);
-  fp_init(&u);  fp_init(&v);
-  fp_init(&B);  fp_init(&D);
+  fp_init(x);  fp_init(y);
+  fp_init(u);  fp_init(v);
+  fp_init(B);  fp_init(D);
+
+  if (fp_cmp(a, b) != MP_LT) {
+    err = mp_mod(a, b, y);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
+    a = y;
+  }
+
+  if (fp_iszero(a) == FP_YES) {
+  #ifdef WOLFSSL_SMALL_STACK
+    XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+  #endif
+    return FP_VAL;
+  }
 
   /* x == modulus, y == value to invert */
-  fp_copy(b, &x);
+  fp_copy(b, x);
 
   /* we need y = |a| */
-  fp_abs(a, &y);
+  fp_abs(a, y);
 
   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
-  fp_copy(&x, &u);
-  fp_copy(&y, &v);
-  fp_set (&D, 1);
+  fp_copy(x, u);
+  fp_copy(y, v);
+  fp_set (D, 1);
 
 top:
   /* 4.  while u is even do */
-  while (fp_iseven (&u) == FP_YES) {
+  while (fp_iseven (u) == FP_YES) {
     /* 4.1 u = u/2 */
-    fp_div_2 (&u, &u);
+    fp_div_2 (u, u);
 
     /* 4.2 if B is odd then */
-    if (fp_isodd (&B) == FP_YES) {
-      fp_sub (&B, &x, &B);
+    if (fp_isodd (B) == FP_YES) {
+      fp_sub (B, x, B);
     }
     /* B = B/2 */
-    fp_div_2 (&B, &B);
+    fp_div_2 (B, B);
   }
 
   /* 5.  while v is even do */
-  while (fp_iseven (&v) == FP_YES) {
+  while (fp_iseven (v) == FP_YES) {
     /* 5.1 v = v/2 */
-    fp_div_2 (&v, &v);
+    fp_div_2 (v, v);
 
     /* 5.2 if D is odd then */
-    if (fp_isodd (&D) == FP_YES) {
+    if (fp_isodd (D) == FP_YES) {
       /* D = (D-x)/2 */
-      fp_sub (&D, &x, &D);
+      fp_sub (D, x, D);
     }
     /* D = D/2 */
-    fp_div_2 (&D, &D);
+    fp_div_2 (D, D);
   }
 
   /* 6.  if u >= v then */
-  if (fp_cmp (&u, &v) != FP_LT) {
+  if (fp_cmp (u, v) != FP_LT) {
     /* u = u - v, B = B - D */
-    fp_sub (&u, &v, &u);
-    fp_sub (&B, &D, &B);
+    fp_sub (u, v, u);
+    fp_sub (B, D, B);
   } else {
     /* v - v - u, D = D - B */
-    fp_sub (&v, &u, &v);
-    fp_sub (&D, &B, &D);
+    fp_sub (v, u, v);
+    fp_sub (D, B, D);
   }
 
   /* if not zero goto step 4 */
-  if (fp_iszero (&u) == FP_NO) {
+  if (fp_iszero (u) == FP_NO) {
     goto top;
   }
 
   /* now a = C, b = D, gcd == g*v */
 
   /* if v != 1 then there is no inverse */
-  if (fp_cmp_d (&v, 1) != FP_EQ) {
+  if (fp_cmp_d (v, 1) != FP_EQ) {
+  #ifdef WOLFSSL_SMALL_STACK
+    XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+  #endif
     return FP_VAL;
   }
 
   /* b is now the inverse */
   neg = a->sign;
-  while (D.sign == FP_NEG) {
-    fp_add (&D, b, &D);
+  while (D->sign == FP_NEG) {
+    fp_add (D, b, D);
   }
   /* too big */
-  while (fp_cmp_mag(&D, b) != FP_LT) {
-    fp_sub(&D, b, &D);
+  while (fp_cmp_mag(D, b) != FP_LT) {
+    fp_sub(D, b, D);
   }
-  fp_copy (&D, c);
+  fp_copy (D, c);
   c->sign = neg;
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return FP_OKAY;
+}
+
+#define CT_INV_MOD_PRE_CNT      8
+
+/* modulus (b) must be greater than 2 and a prime */
+int fp_invmod_mont_ct(fp_int *a, fp_int *b, fp_int *c, fp_digit mp)
+{
+  int i, j;
+#ifndef WOLFSSL_SMALL_STACK
+  fp_int t[1], e[1];
+  fp_int pre[CT_INV_MOD_PRE_CNT];
+#else
+  fp_int* t;
+  fp_int* e;
+  fp_int* pre;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+  t = (fp_int*)XMALLOC(sizeof(fp_int) * (2 + CT_INV_MOD_PRE_CNT), NULL,
+                                                           DYNAMIC_TYPE_BIGINT);
+  if (t == NULL)
+    return FP_MEM;
+  e = t + 1;
+  pre = t + 2;
+#endif
+
+  fp_init(t);
+  fp_init(e);
+
+  fp_init(&pre[0]);
+  fp_copy(a, &pre[0]);
+  for (i = 1; i < CT_INV_MOD_PRE_CNT; i++) {
+    fp_init(&pre[i]);
+    fp_sqr(&pre[i-1], &pre[i]);
+    fp_montgomery_reduce(&pre[i], b, mp);
+    fp_mul(&pre[i], a, &pre[i]);
+    fp_montgomery_reduce(&pre[i], b, mp);
+  }
+
+  fp_sub_d(b, 2, e);
+  /* Highest bit is always set. */
+  for (i = fp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
+      if (!fp_is_bit_set(e, i) || j == CT_INV_MOD_PRE_CNT)
+          break;
+  }
+  fp_copy(&pre[j-1], t);
+  for (j = 0; i >= 0; i--) {
+    int set = fp_is_bit_set(e, i);
+
+    if ((j == CT_INV_MOD_PRE_CNT) || (!set && j > 0)) {
+      fp_mul(t, &pre[j-1], t);
+      fp_montgomery_reduce(t, b, mp);
+      j = 0;
+    }
+    fp_sqr(t, t);
+    fp_montgomery_reduce(t, b, mp);
+    j += set;
+  }
+  if (j > 0) {
+    fp_mul(t, &pre[j-1], c);
+    fp_montgomery_reduce(c, b, mp);
+  }
+  else 
+    fp_copy(t, c);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return FP_OKAY;
 }
 
@@ -984,20 +1233,35 @@
 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 {
   int err;
-  fp_int t;
-
-  fp_init(&t);
-  fp_mul(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
-  if (d->size < FP_SIZE) {
-    err = fp_mod(&t, c, &t);
-    fp_copy(&t, d);
-  } else
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
 #endif
-  {
-    err = fp_mod(&t, c, d);
+
+  fp_init(t);
+  err = fp_mul(a, b, t);
+  if (err == FP_OKAY) {
+  #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+    if (d->size < FP_SIZE) {
+      err = fp_mod(t, c, t);
+      fp_copy(t, d);
+    } else
+  #endif
+    {
+      err = fp_mod(t, c, d);
+    }
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return err;
 }
 
@@ -1005,20 +1269,33 @@
 int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 {
   int err;
-  fp_int t;
-
-  fp_init(&t);
-  fp_sub(a, b, &t);
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+  fp_init(t);
+  fp_sub(a, b, t);
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
-    err = fp_mod(&t, c, &t);
-    fp_copy(&t, d);
+    err = fp_mod(t, c, t);
+    fp_copy(t, d);
   } else
 #endif
   {
-    err = fp_mod(&t, c, d);
+    err = fp_mod(t, c, d);
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return err;
 }
 
@@ -1026,36 +1303,279 @@
 int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 {
   int err;
-  fp_int t;
-
-  fp_init(&t);
-  fp_add(a, b, &t);
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+  fp_init(t);
+  fp_add(a, b, t);
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
-    err = fp_mod(&t, c, &t);
-    fp_copy(&t, d);
+    err = fp_mod(t, c, t);
+    fp_copy(t, d);
   } else
 #endif
   {
-    err = fp_mod(&t, c, d);
+    err = fp_mod(t, c, d);
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return err;
 }
 
 #ifdef TFM_TIMING_RESISTANT
 
+#ifdef WC_RSA_NONBLOCK
+
+#ifdef WC_RSA_NONBLOCK_TIME
+  /* User can override the check-time at build-time using the
+   * FP_EXPTMOD_NB_CHECKTIME macro to define your own function */
+  #ifndef FP_EXPTMOD_NB_CHECKTIME
+    /* instruction count for each type of operation */
+    /* array lookup is using TFM_EXPTMOD_NB_* states */
+    static const word32 exptModNbInst[TFM_EXPTMOD_NB_COUNT] = {
+    #ifdef TFM_PPC32
+      #ifdef _DEBUG
+        11098, 8701, 3971, 178394, 858093, 1040, 822, 178056, 181574, 90883, 184339, 236813
+      #else
+        7050,  2554, 3187, 43178,  200422, 384,  275, 43024,  43550,  30450, 46270,  61376
+      #endif
+    #elif defined(TFM_X86_64)
+      #ifdef _DEBUG
+        954, 2377, 858, 19027, 90840, 287, 407, 20140, 7874,  11385, 8005,  6151
+      #else
+        765, 1007, 771, 5216,  34993, 248, 193, 4975,  4201,  3947,  4275,  3811
+      #endif
+    #else /* software only fast math */
+      #ifdef _DEBUG
+        798, 2245, 802, 16657, 66920, 352, 186, 16997, 16145, 12789, 16742, 15006
+      #else
+        775, 1084, 783, 4692,  37510, 207, 183, 4374,  4392,  3097,  4442,  4079
+      #endif
+    #endif
+    };
+
+    static int fp_exptmod_nb_checktime(exptModNb_t* nb)
+    {
+      word32 totalInst;
+
+      /* if no max time has been set then stop (do not block) */
+      if (nb->maxBlockInst == 0 || nb->state >= TFM_EXPTMOD_NB_COUNT) {
+        return TFM_EXPTMOD_NB_STOP;
+      }
+
+      /* if instruction table not set then use maxBlockInst as simple counter */
+      if (exptModNbInst[nb->state] == 0) {
+        if (++nb->totalInst < nb->maxBlockInst)
+          return TFM_EXPTMOD_NB_CONTINUE;
+
+        nb->totalInst = 0; /* reset counter */
+        return TFM_EXPTMOD_NB_STOP;
+      }
+
+      /* get total instruction count including next operation */
+      totalInst = nb->totalInst + exptModNbInst[nb->state];
+      /* if the next operation can completed within the maximum then continue */
+      if (totalInst <= nb->maxBlockInst) {
+        return TFM_EXPTMOD_NB_CONTINUE;
+      }
+
+      return TFM_EXPTMOD_NB_STOP;
+    }
+    #define FP_EXPTMOD_NB_CHECKTIME(nb) fp_exptmod_nb_checktime((nb))
+  #endif /* !FP_EXPTMOD_NB_CHECKTIME */
+#endif /* WC_RSA_NONBLOCK_TIME */
+
+/* non-blocking version of timing resistant fp_exptmod function */
+/* supports cache resistance */
+int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y)
+{
+  int err, ret = FP_WOULDBLOCK;
+
+  if (nb == NULL)
+    return FP_VAL;
+
+#ifdef WC_RSA_NONBLOCK_TIME
+  nb->totalInst = 0;
+  do {
+    nb->totalInst += exptModNbInst[nb->state];
+#endif
+
+  switch (nb->state) {
+  case TFM_EXPTMOD_NB_INIT:
+    /* now setup montgomery */
+    if ((err = fp_montgomery_setup(P, &nb->mp)) != FP_OKAY) {
+      nb->state = TFM_EXPTMOD_NB_INIT;
+      return err;
+    }
+
+    /* init ints */
+    fp_init(&nb->R[0]);
+    fp_init(&nb->R[1]);
+  #ifndef WC_NO_CACHE_RESISTANT
+    fp_init(&nb->R[2]);
+  #endif
+    nb->state = TFM_EXPTMOD_NB_MONT;
+    break;
+
+  case TFM_EXPTMOD_NB_MONT:
+    /* mod m -> R[0] */
+    fp_montgomery_calc_normalization(&nb->R[0], P);
+
+    nb->state = TFM_EXPTMOD_NB_MONT_RED;
+    break;
+
+  case TFM_EXPTMOD_NB_MONT_RED:
+    /* reduce G -> R[1] */
+    if (fp_cmp_mag(P, G) != FP_GT) {
+       /* G > P so we reduce it first */
+       fp_mod(G, P, &nb->R[1]);
+    } else {
+       fp_copy(G, &nb->R[1]);
+    }
+
+    nb->state = TFM_EXPTMOD_NB_MONT_MUL;
+    break;
+
+  case TFM_EXPTMOD_NB_MONT_MUL:
+    /* G (R[1]) * m (R[0]) */
+    err = fp_mul(&nb->R[1], &nb->R[0], &nb->R[1]);
+    if (err != FP_OKAY) {
+      nb->state = TFM_EXPTMOD_NB_INIT;
+      return err;
+    }
+
+    nb->state = TFM_EXPTMOD_NB_MONT_MOD;
+    break;
+
+  case TFM_EXPTMOD_NB_MONT_MOD:
+    /* mod m */
+    err = fp_div(&nb->R[1], P, NULL, &nb->R[1]);
+    if (err != FP_OKAY) {
+      nb->state = TFM_EXPTMOD_NB_INIT;
+      return err;
+    }
+
+    nb->state = TFM_EXPTMOD_NB_MONT_MODCHK;
+    break;
+
+  case TFM_EXPTMOD_NB_MONT_MODCHK:
+    /* m matches sign of (G * R mod m) */
+    if (nb->R[1].sign != P->sign) {
+       fp_add(&nb->R[1], P, &nb->R[1]);
+    }
+
+    /* set initial mode and bit cnt */
+    nb->bitcnt = 1;
+    nb->buf    = 0;
+    nb->digidx = X->used - 1;
+
+    nb->state = TFM_EXPTMOD_NB_NEXT;
+    break;
+
+  case TFM_EXPTMOD_NB_NEXT:
+    /* grab next digit as required */
+    if (--nb->bitcnt == 0) {
+      /* if nb->digidx == -1 we are out of digits so break */
+      if (nb->digidx == -1) {
+        nb->state = TFM_EXPTMOD_NB_RED;
+        break;
+      }
+      /* read next digit and reset nb->bitcnt */
+      nb->buf    = X->dp[nb->digidx--];
+      nb->bitcnt = (int)DIGIT_BIT;
+    }
+
+    /* grab the next msb from the exponent */
+    nb->y     = (int)(nb->buf >> (DIGIT_BIT - 1)) & 1;
+    nb->buf <<= (fp_digit)1;
+    nb->state = TFM_EXPTMOD_NB_MUL;
+    FALL_THROUGH;
+
+  case TFM_EXPTMOD_NB_MUL:
+    fp_mul(&nb->R[0], &nb->R[1], &nb->R[nb->y^1]);
+    nb->state = TFM_EXPTMOD_NB_MUL_RED;
+    break;
+
+  case TFM_EXPTMOD_NB_MUL_RED:
+    fp_montgomery_reduce(&nb->R[nb->y^1], P, nb->mp);
+    nb->state = TFM_EXPTMOD_NB_SQR;
+    break;
+
+  case TFM_EXPTMOD_NB_SQR:
+  #ifdef WC_NO_CACHE_RESISTANT
+    fp_sqr(&nb->R[nb->y], &nb->R[nb->y]);
+  #else
+    fp_copy((fp_int*) ( ((wolfssl_word)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
+                        ((wolfssl_word)&nb->R[1] & wc_off_on_addr[nb->y]) ),
+            &nb->R[2]);
+    fp_sqr(&nb->R[2], &nb->R[2]);
+  #endif /* WC_NO_CACHE_RESISTANT */
+
+    nb->state = TFM_EXPTMOD_NB_SQR_RED;
+    break;
+
+  case TFM_EXPTMOD_NB_SQR_RED:
+  #ifdef WC_NO_CACHE_RESISTANT
+    fp_montgomery_reduce(&nb->R[nb->y], P, nb->mp);
+  #else
+    fp_montgomery_reduce(&nb->R[2], P, nb->mp);
+    fp_copy(&nb->R[2],
+            (fp_int*) ( ((wolfssl_word)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
+                        ((wolfssl_word)&nb->R[1] & wc_off_on_addr[nb->y]) ) );
+  #endif /* WC_NO_CACHE_RESISTANT */
+
+    nb->state = TFM_EXPTMOD_NB_NEXT;
+    break;
+
+  case TFM_EXPTMOD_NB_RED:
+    /* final reduce */
+    fp_montgomery_reduce(&nb->R[0], P, nb->mp);
+    fp_copy(&nb->R[0], Y);
+
+    nb->state = TFM_EXPTMOD_NB_INIT;
+    ret = FP_OKAY;
+    break;
+  } /* switch */
+
+#ifdef WC_RSA_NONBLOCK_TIME
+  /* determine if maximum blocking time has been reached */
+  } while (ret == FP_WOULDBLOCK &&
+    FP_EXPTMOD_NB_CHECKTIME(nb) == TFM_EXPTMOD_NB_CONTINUE);
+#endif
+
+  return ret;
+}
+
+#endif /* WC_RSA_NONBLOCK */
+
+
 /* timing resistant montgomery ladder based exptmod
    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
    Cryptographic Hardware and Embedded Systems, CHES 2002
 */
-static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P,
+                          fp_int * Y)
 {
+#ifndef WOLFSSL_SMALL_STACK
 #ifdef WC_NO_CACHE_RESISTANT
   fp_int   R[2];
 #else
   fp_int   R[3];   /* need a temp for cache resistance */
 #endif
+#else
+   fp_int  *R;
+#endif
   fp_digit buf, mp;
   int      err, bitcnt, digidx, y;
 
@@ -1064,6 +1584,15 @@
      return err;
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+#ifndef WC_NO_CACHE_RESISTANT
+   R = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+#else
+   R = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+   if (R == NULL)
+       return FP_MEM;
+#endif
   fp_init(&R[0]);
   fp_init(&R[1]);
 #ifndef WC_NO_CACHE_RESISTANT
@@ -1089,7 +1618,7 @@
   /* set initial mode and bit cnt */
   bitcnt = 1;
   buf    = 0;
-  digidx = X->used - 1;
+  digidx = digits - 1;
 
   for (;;) {
     /* grab next digit as required */
@@ -1108,10 +1637,36 @@
     buf <<= (fp_digit)1;
 
     /* do ops */
-    fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
+    err = fp_mul(&R[0], &R[1], &R[y^1]);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
+    err = fp_montgomery_reduce(&R[y^1], P, mp);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
 
 #ifdef WC_NO_CACHE_RESISTANT
-    fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
+    err = fp_sqr(&R[y], &R[y]);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
+    err = fp_montgomery_reduce(&R[y], P, mp);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
 #else
     /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
      * use R[2] as temp, make sure address calc is constant, keep
@@ -1119,33 +1674,45 @@
     fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
                         ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ),
             &R[2]);
-    fp_sqr(&R[2], &R[2]);          fp_montgomery_reduce(&R[2], P, mp);
+    err = fp_sqr(&R[2], &R[2]);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
+    err = fp_montgomery_reduce(&R[2], P, mp);
+    if (err != FP_OKAY) {
+    #ifdef WOLFSSL_SMALL_STACK
+      XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+    #endif
+      return err;
+    }
     fp_copy(&R[2],
             (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
                         ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) );
 #endif /* WC_NO_CACHE_RESISTANT */
   }
 
-   fp_montgomery_reduce(&R[0], P, mp);
+   err = fp_montgomery_reduce(&R[0], P, mp);
    fp_copy(&R[0], Y);
-   return FP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+   XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+   return err;
 }
 
-#else /* TFM_TIMING_RESISTANT */
+#endif /* TFM_TIMING_RESISTANT */
 
 /* y = g**x (mod b)
  * Some restrictions... x must be positive and < b
  */
-static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+static int _fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
 {
-  fp_int   res;
+  fp_int  *res;
+  fp_int  *M;
   fp_digit buf, mp;
   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
-#ifdef WOLFSSL_SMALL_STACK
-  fp_int  *M;
-#else
-  fp_int   M[64];
-#endif
 
   /* find window size */
   x = fp_count_bits (X);
@@ -1166,20 +1733,20 @@
      return err;
   }
 
-#ifdef WOLFSSL_SMALL_STACK
-  /* only allocate space for what's needed */
-  M = (fp_int*)XMALLOC(sizeof(fp_int)*(1 << winsize), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  /* only allocate space for what's needed for window plus res */
+  M = (fp_int*)XMALLOC(sizeof(fp_int)*((1 << winsize) + 1), NULL,
+                                                           DYNAMIC_TYPE_BIGINT);
   if (M == NULL) {
      return FP_MEM;
   }
-#endif
+  res = &M[1 << winsize];
 
   /* init M array */
   for(x = 0; x < (1 << winsize); x++)
     fp_init(&M[x]);
 
   /* setup result */
-  fp_init(&res);
+  fp_init(res);
 
   /* create M table
    *
@@ -1189,7 +1756,7 @@
    */
 
    /* now we need R mod m */
-   fp_montgomery_calc_normalization (&res, P);
+   fp_montgomery_calc_normalization (res, P);
 
    /* now set M[1] to G * R mod m */
    if (fp_cmp_mag(P, G) != FP_GT) {
@@ -1198,25 +1765,37 @@
    } else {
       fp_copy(G, &M[1]);
    }
-   fp_mulmod (&M[1], &res, P, &M[1]);
+   fp_mulmod (&M[1], res, P, &M[1]);
 
   /* compute the value at M[1<<(winsize-1)] by
    * squaring M[1] (winsize-1) times */
   fp_copy (&M[1], &M[1 << (winsize - 1)]);
   for (x = 0; x < (winsize - 1); x++) {
     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
-    fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
+    err = fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
+    if (err != FP_OKAY) {
+      XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+      return err;
+    }
   }
 
   /* create upper table */
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
-    fp_mul(&M[x - 1], &M[1], &M[x]);
-    fp_montgomery_reduce(&M[x], P, mp);
+    err = fp_mul(&M[x - 1], &M[1], &M[x]);
+    if (err != FP_OKAY) {
+      XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+      return err;
+    }
+    err = fp_montgomery_reduce(&M[x], P, mp);
+    if (err != FP_OKAY) {
+      XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+      return err;
+    }
   }
 
   /* set initial mode and bit cnt */
   mode   = 0;
-  bitcnt = 1;
+  bitcnt = (x % DIGIT_BIT) + 1;
   buf    = 0;
   digidx = X->used - 1;
   bitcpy = 0;
@@ -1249,8 +1828,16 @@
 
     /* if the bit is zero and mode == 1 then we square */
     if (mode == 1 && y == 0) {
-      fp_sqr(&res, &res);
-      fp_montgomery_reduce(&res, P, mp);
+      err = fp_sqr(res, res);
+      if (err != FP_OKAY) {
+        XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+        return err;
+      }
+      fp_montgomery_reduce(res, P, mp);
+      if (err != FP_OKAY) {
+        XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+        return err;
+      }
       continue;
     }
 
@@ -1262,13 +1849,29 @@
       /* ok window is filled so square as required and multiply  */
       /* square first */
       for (x = 0; x < winsize; x++) {
-        fp_sqr(&res, &res);
-        fp_montgomery_reduce(&res, P, mp);
+        err = fp_sqr(res, res);
+        if (err != FP_OKAY) {
+          XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+          return err;
+        }
+        err = fp_montgomery_reduce(res, P, mp);
+        if (err != FP_OKAY) {
+          XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+          return err;
+        }
       }
 
       /* then multiply */
-      fp_mul(&res, &M[bitbuf], &res);
-      fp_montgomery_reduce(&res, P, mp);
+      err = fp_mul(res, &M[bitbuf], res);
+      if (err != FP_OKAY) {
+        XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+        return err;
+      }
+      err = fp_montgomery_reduce(res, P, mp);
+      if (err != FP_OKAY) {
+        XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+        return err;
+      }
 
       /* empty window and reset */
       bitcpy = 0;
@@ -1281,15 +1884,31 @@
   if (mode == 2 && bitcpy > 0) {
     /* square then multiply if the bit is set */
     for (x = 0; x < bitcpy; x++) {
-      fp_sqr(&res, &res);
-      fp_montgomery_reduce(&res, P, mp);
+      err = fp_sqr(res, res);
+      if (err != FP_OKAY) {
+        XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+        return err;
+      }
+      err = fp_montgomery_reduce(res, P, mp);
+      if (err != FP_OKAY) {
+        XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+        return err;
+      }
 
       /* get next bit of the window */
       bitbuf <<= 1;
       if ((bitbuf & (1 << winsize)) != 0) {
         /* then multiply */
-        fp_mul(&res, &M[1], &res);
-        fp_montgomery_reduce(&res, P, mp);
+        err = fp_mul(res, &M[1], res);
+        if (err != FP_OKAY) {
+          XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+          return err;
+        }
+        err = fp_montgomery_reduce(res, P, mp);
+        if (err != FP_OKAY) {
+          XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+          return err;
+        }
       }
     }
   }
@@ -1300,42 +1919,473 @@
    * to reduce one more time to cancel out the factor
    * of R.
    */
-  fp_montgomery_reduce(&res, P, mp);
+  err = fp_montgomery_reduce(res, P, mp);
 
   /* swap res with Y */
-  fp_copy (&res, Y);
+  fp_copy (res, Y);
+
+  XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+  return err;
+}
+
+
+#ifdef TFM_TIMING_RESISTANT
+#if DIGIT_BIT <= 16
+    #define WINSIZE    2
+#elif DIGIT_BIT <= 32
+    #define WINSIZE    3
+#elif DIGIT_BIT <= 64
+    #define WINSIZE    4
+#elif DIGIT_BIT <= 128
+    #define WINSIZE    5
+#endif
+
+/* y = 2**x (mod b)
+ * Some restrictions... x must be positive and < b
+ */
+static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
+                              fp_int * Y)
+{
+  fp_digit buf, mp;
+  int      err, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+  fp_int  *res;
+  fp_int  *tmp;
+#else
+  fp_int   res[1];
+  fp_int   tmp[1];
+#endif
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  res = (fp_int*)XMALLOC(2*sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (res == NULL) {
+     return FP_MEM;
+  }
+  tmp = &res[1];
+#endif
+
+  /* now setup montgomery  */
+  if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+     XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+     return err;
+  }
+
+  /* setup result */
+  fp_init(res);
+  fp_init(tmp);
+
+  fp_mul_2d(P, 1 << WINSIZE, tmp);
+
+  /* now we need R mod m */
+  fp_montgomery_calc_normalization(res, P);
+
+  /* Get the top bits left over after taking WINSIZE bits starting at the
+   * least-significant.
+   */
+  digidx = digits - 1;
+  bitcpy = (digits * DIGIT_BIT) % WINSIZE;
+  if (bitcpy > 0) {
+      bitcnt = (int)DIGIT_BIT - bitcpy;
+      buf    = X->dp[digidx--];
+      bitbuf = (int)(buf >> bitcnt);
+      /* Multiply montgomery representation of 1 by 2 ^ top */
+      fp_mul_2d(res, bitbuf, res);
+      fp_add(res, tmp, res);
+      err = fp_mod(res, P, res);
+      if (err != FP_OKAY) {
+      #ifdef WOLFSSL_SMALL_STACK
+        XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      #endif
+        return err;
+      }
+      /* Move out bits used */
+      buf  <<= bitcpy;
+      bitcnt++;
+  }
+  else {
+      bitcnt = 1;
+      buf    = 0;
+  }
+
+  /* empty window and reset  */
+  bitbuf = 0;
+  bitcpy = 0;
+
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      /* if digidx == -1 we are out of digits so break */
+      if (digidx == -1) {
+        break;
+      }
+      /* read next digit and reset bitcnt */
+      buf    = X->dp[digidx--];
+      bitcnt = (int)DIGIT_BIT;
+    }
+
+    /* grab the next msb from the exponent */
+    y       = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+    buf   <<= (fp_digit)1;
+    /* add bit to the window */
+    bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+    if (bitcpy == WINSIZE) {
+      /* ok window is filled so square as required and multiply  */
+      /* square first */
+      for (x = 0; x < WINSIZE; x++) {
+        err = fp_sqr(res, res);
+        if (err != FP_OKAY) {
+        #ifdef WOLFSSL_SMALL_STACK
+          XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+          return err;
+        }
+        err = fp_montgomery_reduce(res, P, mp);
+        if (err != FP_OKAY) {
+        #ifdef WOLFSSL_SMALL_STACK
+          XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+          return err;
+        }
+      }
+
+      /* then multiply by 2^bitbuf */
+      fp_mul_2d(res, bitbuf, res);
+      /* Add in value to make mod operation take same time */
+      fp_add(res, tmp, res);
+      err = fp_mod(res, P, res);
+      if (err != FP_OKAY) {
+      #ifdef WOLFSSL_SMALL_STACK
+        XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      #endif
+        return err;
+      }
+
+      /* empty window and reset */
+      bitcpy = 0;
+      bitbuf = 0;
+    }
+  }
+
+  /* fixup result if Montgomery reduction is used
+   * recall that any value in a Montgomery system is
+   * actually multiplied by R mod n.  So we have
+   * to reduce one more time to cancel out the factor
+   * of R.
+   */
+  err = fp_montgomery_reduce(res, P, mp);
+
+  /* swap res with Y */
+  fp_copy(res, Y);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-
-  return FP_OKAY;
+  return err;
 }
 
-#endif /* TFM_TIMING_RESISTANT */
+#undef WINSIZE
+#else
+#if DIGIT_BIT < 16
+    #define WINSIZE    3
+#elif DIGIT_BIT < 32
+    #define WINSIZE    4
+#elif DIGIT_BIT < 64
+    #define WINSIZE    5
+#elif DIGIT_BIT < 128
+    #define WINSIZE    6
+#elif DIGIT_BIT == 128
+    #define WINSIZE    7
+#endif
+
+/* y = 2**x (mod b)
+ * Some restrictions... x must be positive and < b
+ */
+static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
+                              fp_int * Y)
+{
+  fp_digit buf, mp;
+  int      err, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+  fp_int  *res;
+#else
+  fp_int   res[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+  res = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (res == NULL) {
+     return FP_MEM;
+  }
+#endif
+
+  /* now setup montgomery  */
+  if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
+     return err;
+  }
+
+  /* setup result */
+  fp_init(res);
+
+  /* now we need R mod m */
+  fp_montgomery_calc_normalization(res, P);
+
+  /* Get the top bits left over after taking WINSIZE bits starting at the
+   * least-significant.
+   */
+  digidx = digits - 1;
+  bitcpy = (digits * DIGIT_BIT) % WINSIZE;
+  if (bitcpy > 0) {
+      bitcnt = (int)DIGIT_BIT - bitcpy;
+      buf    = X->dp[digidx--];
+      bitbuf = (int)(buf >> bitcnt);
+      /* Multiply montgomery representation of 1 by 2 ^ top */
+      fp_mul_2d(res, bitbuf, res);
+      err = fp_mod(res, P, res);
+      if (err != FP_OKAY) {
+      #ifdef WOLFSSL_SMALL_STACK
+        XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      #endif
+        return err;
+      }
+      /* Move out bits used */
+      buf  <<= bitcpy;
+      bitcnt++;
+  }
+  else {
+      bitcnt = 1;
+      buf    = 0;
+  }
+
+  /* empty window and reset  */
+  bitbuf = 0;
+  bitcpy = 0;
+
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      /* if digidx == -1 we are out of digits so break */
+      if (digidx == -1) {
+        break;
+      }
+      /* read next digit and reset bitcnt */
+      buf    = X->dp[digidx--];
+      bitcnt = (int)DIGIT_BIT;
+    }
+
+    /* grab the next msb from the exponent */
+    y       = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+    buf   <<= (fp_digit)1;
+    /* add bit to the window */
+    bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+    if (bitcpy == WINSIZE) {
+      /* ok window is filled so square as required and multiply  */
+      /* square first */
+      for (x = 0; x < WINSIZE; x++) {
+        err = fp_sqr(res, res);
+        if (err != FP_OKAY) {
+        #ifdef WOLFSSL_SMALL_STACK
+          XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+          return err;
+        }
+        err = fp_montgomery_reduce(res, P, mp);
+        if (err != FP_OKAY) {
+        #ifdef WOLFSSL_SMALL_STACK
+          XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+          return err;
+        }
+      }
+
+      /* then multiply by 2^bitbuf */
+      fp_mul_2d(res, bitbuf, res);
+      err = fp_mod(res, P, res);
+      if (err != FP_OKAY) {
+      #ifdef WOLFSSL_SMALL_STACK
+        XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      #endif
+        return err;
+      }
+
+      /* empty window and reset */
+      bitcpy = 0;
+      bitbuf = 0;
+    }
+  }
+
+  /* fixup result if Montgomery reduction is used
+   * recall that any value in a Montgomery system is
+   * actually multiplied by R mod n.  So we have
+   * to reduce one more time to cancel out the factor
+   * of R.
+   */
+  err = fp_montgomery_reduce(res, P, mp);
+
+  /* swap res with Y */
+  fp_copy(res, Y);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+  return err;
+}
+
+#undef WINSIZE
+#endif
+
 
 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
 {
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+   int x = fp_count_bits (X);
+#endif
+
+   /* handle modulus of zero and prevent overflows */
+   if (fp_iszero(P) || (P->used > (FP_SIZE/2))) {
+      return FP_VAL;
+   }
+   if (fp_isone(P)) {
+      fp_set(Y, 0);
+      return FP_OKAY;
+   }
+   if (fp_iszero(X)) {
+      fp_set(Y, 1);
+      return FP_OKAY;
+   }
+   if (fp_iszero(G)) {
+      fp_set(Y, 0);
+      return FP_OKAY;
+   }
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+   if(x > EPS_RSA_EXPT_XBTIS) {
+      return esp_mp_exptmod(G, X, x, P, Y);
+   }
+#endif
+
+   if (X->sign == FP_NEG) {
+#ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
+      int    err;
+   #ifndef WOLFSSL_SMALL_STACK
+      fp_int tmp[2];
+   #else
+      fp_int *tmp;
+   #endif
+
+   #ifdef WOLFSSL_SMALL_STACK
+      tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+      if (tmp == NULL)
+          return FP_MEM;
+   #endif
+
+      /* yes, copy G and invmod it */
+      fp_init_copy(&tmp[0], G);
+      fp_init_copy(&tmp[1], P);
+      tmp[1].sign = FP_ZPOS;
+      err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
+      if (err == FP_OKAY) {
+         fp_copy(X, &tmp[1]);
+         tmp[1].sign = FP_ZPOS;
+#ifdef TFM_TIMING_RESISTANT
+         err =  _fp_exptmod_ct(&tmp[0], &tmp[1], tmp[1].used, P, Y);
+#else
+         err =  _fp_exptmod_nct(&tmp[0], &tmp[1], P, Y);
+#endif
+         if (P->sign == FP_NEG) {
+            fp_add(Y, P, Y);
+         }
+      }
+   #ifdef WOLFSSL_SMALL_STACK
+      XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+   #endif
+      return err;
+#else
+      return FP_VAL;
+#endif
+   }
+   else if (G->used == 1 && G->dp[0] == 2) {
+      return _fp_exptmod_base_2(X, X->used, P, Y);
+   }
+   else {
+      /* Positive exponent so just exptmod */
+#ifdef TFM_TIMING_RESISTANT
+      return _fp_exptmod_ct(G, X, X->used, P, Y);
+#else
+      return _fp_exptmod_nct(G, X, P, Y);
+#endif
+   }
+}
+
+int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y)
+{
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+   int x = fp_count_bits (X);
+#endif
+
+   if (fp_iszero(G)) {
+      fp_set(G, 0);
+      return FP_OKAY;
+   }
+
    /* prevent overflows */
    if (P->used > (FP_SIZE/2)) {
       return FP_VAL;
    }
 
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+   if(x > EPS_RSA_EXPT_XBTIS) {
+      return esp_mp_exptmod(G, X, x, P, Y);
+   }
+#endif
+
    if (X->sign == FP_NEG) {
 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
       int    err;
-      fp_int tmp;
+   #ifndef WOLFSSL_SMALL_STACK
+      fp_int tmp[2];
+   #else
+      fp_int *tmp;
+   #endif
+
+   #ifdef WOLFSSL_SMALL_STACK
+      tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      if (tmp == NULL)
+          return FP_MEM;
+   #endif
 
       /* yes, copy G and invmod it */
-      fp_init_copy(&tmp, G);
-      if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
-         return err;
+      fp_init_copy(&tmp[0], G);
+      fp_init_copy(&tmp[1], P);
+      tmp[1].sign = FP_ZPOS;
+      err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
+      if (err == FP_OKAY) {
+         X->sign = FP_ZPOS;
+#ifdef TFM_TIMING_RESISTANT
+         err =  _fp_exptmod_ct(&tmp[0], X, digits, P, Y);
+#else
+         err =  _fp_exptmod_nct(&tmp[0], X, P, Y);
+         (void)digits;
+#endif
+         if (X != Y) {
+            X->sign = FP_NEG;
+         }
+         if (P->sign == FP_NEG) {
+            fp_add(Y, P, Y);
+         }
       }
-      X->sign = FP_ZPOS;
-      err =  _fp_exptmod(&tmp, X, P, Y);
-      if (X != Y) {
-         X->sign = FP_NEG;
-      }
+   #ifdef WOLFSSL_SMALL_STACK
+      XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+   #endif
       return err;
 #else
       return FP_VAL;
@@ -1343,7 +2393,79 @@
    }
    else {
       /* Positive exponent so just exptmod */
-      return _fp_exptmod(G, X, P, Y);
+#ifdef TFM_TIMING_RESISTANT
+      return _fp_exptmod_ct(G, X, digits, P, Y);
+#else
+      return  _fp_exptmod_nct(G, X, P, Y);
+#endif
+   }
+}
+
+int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+{
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+   int x = fp_count_bits (X);
+#endif
+
+   if (fp_iszero(G)) {
+      fp_set(G, 0);
+      return FP_OKAY;
+   }
+
+   /* prevent overflows */
+   if (P->used > (FP_SIZE/2)) {
+      return FP_VAL;
+   }
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+   if(x > EPS_RSA_EXPT_XBTIS) {
+      return esp_mp_exptmod(G, X, x, P, Y);
+   }
+#endif
+
+   if (X->sign == FP_NEG) {
+#ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
+      int    err;
+   #ifndef WOLFSSL_SMALL_STACK
+      fp_int tmp[2];
+   #else
+      fp_int *tmp;
+   #endif
+
+   #ifdef WOLFSSL_SMALL_STACK
+      tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      if (tmp == NULL)
+          return FP_MEM;
+   #endif
+
+      /* yes, copy G and invmod it */
+      fp_init_copy(&tmp[0], G);
+      fp_init_copy(&tmp[1], P);
+      tmp[1].sign = FP_ZPOS;
+      err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
+      if (err == FP_OKAY) {
+         X->sign = FP_ZPOS;
+         err =  _fp_exptmod_nct(&tmp[0], X, P, Y);
+         if (X != Y) {
+            X->sign = FP_NEG;
+         }
+         if (P->sign == FP_NEG) {
+            fp_add(Y, P, Y);
+         }
+      }
+   #ifdef WOLFSSL_SMALL_STACK
+      XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+   #endif
+      return err;
+#else
+      return FP_VAL;
+#endif
+   }
+   else {
+      /* Positive exponent so just exptmod */
+      return  _fp_exptmod_nct(G, X, P, Y);
    }
 }
 
@@ -1372,8 +2494,9 @@
 }
 
 /* b = a*a  */
-void fp_sqr(fp_int *A, fp_int *B)
+int fp_sqr(fp_int *A, fp_int *B)
 {
+    int err;
     int y, oldused;
 
     oldused = B->used;
@@ -1381,118 +2504,131 @@
 
     /* call generic if we're out of range */
     if (y + y > FP_SIZE) {
-       fp_sqr_comba(A, B);
+       err = fp_sqr_comba(A, B);
        goto clean;
     }
 
 #if defined(TFM_SQR3) && FP_SIZE >= 6
         if (y <= 3) {
-           fp_sqr_comba3(A,B);
+           err = fp_sqr_comba3(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR4) && FP_SIZE >= 8
         if (y == 4) {
-           fp_sqr_comba4(A,B);
+           err = fp_sqr_comba4(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR6) && FP_SIZE >= 12
         if (y <= 6) {
-           fp_sqr_comba6(A,B);
+           err = fp_sqr_comba6(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR7) && FP_SIZE >= 14
         if (y == 7) {
-           fp_sqr_comba7(A,B);
+           err = fp_sqr_comba7(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR8) && FP_SIZE >= 16
         if (y == 8) {
-           fp_sqr_comba8(A,B);
+           err = fp_sqr_comba8(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR9) && FP_SIZE >= 18
         if (y == 9) {
-           fp_sqr_comba9(A,B);
+           err = fp_sqr_comba9(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR12) && FP_SIZE >= 24
         if (y <= 12) {
-           fp_sqr_comba12(A,B);
+           err = fp_sqr_comba12(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR17) && FP_SIZE >= 34
         if (y <= 17) {
-           fp_sqr_comba17(A,B);
+           err = fp_sqr_comba17(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SMALL_SET)
         if (y <= 16) {
-           fp_sqr_comba_small(A,B);
+           err = fp_sqr_comba_small(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR20) && FP_SIZE >= 40
         if (y <= 20) {
-           fp_sqr_comba20(A,B);
+           err = fp_sqr_comba20(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR24) && FP_SIZE >= 48
         if (y <= 24) {
-           fp_sqr_comba24(A,B);
+           err = fp_sqr_comba24(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR28) && FP_SIZE >= 56
         if (y <= 28) {
-           fp_sqr_comba28(A,B);
+           err = fp_sqr_comba28(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR32) && FP_SIZE >= 64
         if (y <= 32) {
-           fp_sqr_comba32(A,B);
+           err = fp_sqr_comba32(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR48) && FP_SIZE >= 96
         if (y <= 48) {
-           fp_sqr_comba48(A,B);
+           err = fp_sqr_comba48(A,B);
            goto clean;
         }
 #endif
 #if defined(TFM_SQR64) && FP_SIZE >= 128
         if (y <= 64) {
-           fp_sqr_comba64(A,B);
+           err = fp_sqr_comba64(A,B);
            goto clean;
         }
 #endif
-       fp_sqr_comba(A, B);
+       err = fp_sqr_comba(A, B);
 
 clean:
   /* zero any excess digits on the destination that we didn't write to */
   for (y = B->used; y >= 0 && y < oldused; y++) {
     B->dp[y] = 0;
   }
+
+  return err;
 }
 
 /* generic comba squarer */
-void fp_sqr_comba(fp_int *A, fp_int *B)
+int fp_sqr_comba(fp_int *A, fp_int *B)
 {
   int       pa, ix, iz;
   fp_digit  c0, c1, c2;
-  fp_int    tmp, *dst;
 #ifdef TFM_ISO
   fp_word   tt;
 #endif
+   fp_int    *dst;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int    tmp[1];
+#else
+   fp_int    *tmp;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (tmp == NULL)
+       return FP_MEM;
+#endif
 
   /* get size of output and trim */
   pa = A->used + A->used;
@@ -1505,8 +2641,8 @@
   COMBA_CLEAR;
 
   if (A == B) {
-     fp_init(&tmp);
-     dst = &tmp;
+     fp_init(tmp);
+     dst = tmp;
   } else {
      fp_zero(B);
      dst = B;
@@ -1562,6 +2698,17 @@
   if (dst != B) {
      fp_copy(dst, B);
   }
+
+  /* Variables used but not seen by cppcheck. */
+  (void)c0; (void)c1; (void)c2;
+#ifdef TFM_ISO
+  (void)tt;
+#endif
+   
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return FP_OKAY;
 }
 
 int fp_cmp(fp_int *a, fp_int *b)
@@ -1704,27 +2851,38 @@
 }
 
 /* computes x/R == x (mod N) via Montgomery Reduction */
-static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
+static int fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
 {
-   fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_digit c[FP_SIZE+1];
+#else
+   fp_digit *c;
+#endif
+   fp_digit *_c, *tmpm, mu = 0;
    int      oldused, x, y, pa;
 
    /* bail if too large */
    if (m->used > (FP_SIZE/2)) {
       (void)mu;                     /* shut up compiler */
-      return;
+      return FP_OKAY;
    }
 
 #ifdef TFM_SMALL_MONT_SET
    if (m->used <= 16) {
-      fp_montgomery_reduce_small(a, m, mp);
-      return;
+      return fp_montgomery_reduce_small(a, m, mp);
    }
 #endif
 
+#ifdef WOLFSSL_SMALL_STACK
+   /* only allocate space for what's needed for window plus res */
+   c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
+   if (c == NULL) {
+      return FP_MEM;
+   }
+#endif
 
    /* now zero the buff */
-   XMEMSET(c, 0, sizeof(c));
+   XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
    pa = m->used;
 
    /* copy the input */
@@ -1778,33 +2936,50 @@
   if (fp_cmp_mag (a, m) != FP_LT) {
     s_fp_sub (a, m, a);
   }
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return FP_OKAY;
 }
 #endif
 
 /* computes x/R == x (mod N) via Montgomery Reduction */
-void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
+int fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
 {
-   fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
-   int      oldused, x, y, pa;
-
-   IF_HAVE_INTEL_MULX(fp_montgomery_reduce_mulx(a, m, mp), return) ;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_digit c[FP_SIZE+1];
+#else
+   fp_digit *c;
+#endif
+   fp_digit *_c, *tmpm, mu = 0;
+   int      oldused, x, y, pa, err = 0;
+
+   IF_HAVE_INTEL_MULX(err = fp_montgomery_reduce_mulx(a, m, mp), return err) ;
+   (void)err;
 
    /* bail if too large */
    if (m->used > (FP_SIZE/2)) {
       (void)mu;                     /* shut up compiler */
-      return;
+      return FP_OKAY;
    }
 
 #ifdef TFM_SMALL_MONT_SET
    if (m->used <= 16) {
-      fp_montgomery_reduce_small(a, m, mp);
-      return;
+      return fp_montgomery_reduce_small(a, m, mp);
    }
 #endif
 
+#ifdef WOLFSSL_SMALL_STACK
+   /* only allocate space for what's needed for window plus res */
+   c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
+   if (c == NULL) {
+      return FP_MEM;
+   }
+#endif
 
    /* now zero the buff */
-   XMEMSET(c, 0, sizeof(c));
+   XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
    pa = m->used;
 
    /* copy the input */
@@ -1860,6 +3035,11 @@
   if (fp_cmp_mag (a, m) != FP_LT) {
     s_fp_sub (a, m, a);
   }
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return FP_OKAY;
 }
 
 void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
@@ -1898,10 +3078,10 @@
        /* Use Duff's device to unroll the loop. */
        int idx = (c - 1) & ~3;
        switch (c % 4) {
-       case 0:    do { pd[idx+0] = *b++;
-       case 3:         pd[idx+1] = *b++;
-       case 2:         pd[idx+2] = *b++;
-       case 1:         pd[idx+3] = *b++;
+       case 0:    do { pd[idx+0] = *b++; // fallthrough
+       case 3:         pd[idx+1] = *b++; // fallthrough
+       case 2:         pd[idx+2] = *b++; // fallthrough
+       case 1:         pd[idx+3] = *b++; // fallthrough
                      idx -= 4;
                  } while ((c -= 4) > 0);
        }
@@ -1953,15 +3133,72 @@
 #endif
 }
 
-void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
+int fp_to_unsigned_bin(fp_int *a, unsigned char *b)
 {
   int     x;
-  fp_int  t;
-
-  fp_init_copy(&t, a);
-
-  x = fp_to_unsigned_bin_at_pos(0, &t, b);
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+  fp_init_copy(t, a);
+
+  x = fp_to_unsigned_bin_at_pos(0, t, b);
   fp_reverse (b, x);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return FP_OKAY;
+}
+
+int fp_to_unsigned_bin_len(fp_int *a, unsigned char *b, int c)
+{
+#if DIGIT_BIT == 64 || DIGIT_BIT == 32
+  int i, j, x;
+
+  for (x=c-1,j=0,i=0; x >= 0; x--) {
+     b[x] = (unsigned char)(a->dp[i] >> j);
+     j += 8;
+     i += j == DIGIT_BIT;
+     j &= DIGIT_BIT - 1;
+  }
+
+  return FP_OKAY;
+#else
+  int     x;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+  fp_init_copy(t, a);
+
+  for (x = 0; x < c; x++) {
+      b[x] = (unsigned char) (t->dp[0] & 255);
+      fp_div_2d (t, 8, t, NULL);
+  }
+  fp_reverse (b, x);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+  return FP_OKAY;
+#endif
 }
 
 int fp_unsigned_bin_size(fp_int *a)
@@ -2122,6 +3359,8 @@
     fp_digit r, rr;
     fp_digit D = x;
 
+    if (fp_iszero(c)) return;
+
     /* mask */
     mask = (((fp_digit)1) << D) - 1;
 
@@ -2194,20 +3433,36 @@
 
 
 /* c = a - b */
-void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
+int fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
 {
-   fp_int tmp;
-   fp_init(&tmp);
-   fp_set(&tmp, b);
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int    tmp[1];
+#else
+   fp_int    *tmp;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (tmp == NULL)
+       return FP_MEM;
+#endif
+
+   fp_init(tmp);
+   fp_set(tmp, b);
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
    if (c->size < FP_SIZE) {
-     fp_sub(a, &tmp, &tmp);
-     fp_copy(&tmp, c);
+     fp_sub(a, tmp, tmp);
+     fp_copy(tmp, c);
    } else
 #endif
    {
-     fp_sub(a, &tmp, c);
+     fp_sub(a, tmp, c);
    }
+
+#ifdef WOLFSSL_SMALL_STACK
+   XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+   return FP_OKAY;
 }
 
 
@@ -2234,22 +3489,26 @@
 
 void fp_zero(fp_int *a)
 {
-    int size = FP_SIZE;
+    int size;
     a->used = 0;
     a->sign = FP_ZPOS;
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
+#else
+    size = FP_SIZE;
 #endif
     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
 }
 
 void fp_clear(fp_int *a)
 {
-    int size = FP_SIZE;
+    int size;
     a->used = 0;
     a->sign = FP_ZPOS;
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
+#else
+    size = FP_SIZE;
 #endif
     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
     fp_free(a);
@@ -2257,11 +3516,13 @@
 
 void fp_forcezero (mp_int * a)
 {
-    int size = FP_SIZE;
+    int size;
     a->used = 0;
     a->sign = FP_ZPOS;
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
+#else
+    size = FP_SIZE;
 #endif
     ForceZero(a->dp, size * sizeof(fp_digit));
 #ifdef HAVE_WOLF_BIGINT
@@ -2339,8 +3600,7 @@
 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
 #endif
 {
-  fp_mul(a, b, c);
-  return MP_OKAY;
+  return fp_mul(a, b, c);
 }
 
 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
@@ -2356,7 +3616,16 @@
 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
 #endif
 {
-  return fp_mulmod(a, b, c, d);
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+    int A = fp_count_bits (a);
+    int B = fp_count_bits (b);
+
+    if( A >= ESP_RSA_MULM_BITS && B >= ESP_RSA_MULM_BITS)
+        return esp_mp_mulmod(a, b, c, d);
+    else
+ #endif
+   return fp_mulmod(a, b, c, d);
 }
 
 /* d = a - b (mod c) */
@@ -2391,9 +3660,15 @@
   return fp_invmod(a, b, c);
 }
 
+/* hac 14.61, pp608 */
+int mp_invmod_mont_ct (mp_int * a, mp_int * b, mp_int * c, mp_digit mp)
+{
+  return fp_invmod_mont_ct(a, b, c, mp);
+}
+
 /* this is a shell function that calls either the normal or Montgomery
  * exptmod functions.  Originally the call to the montgomery code was
- * embedded in the normal function but that wasted alot of stack space
+ * embedded in the normal function but that wasted a lot of stack space
  * for nothing (since 99% of the time the Montgomery code would be called)
  */
 #if defined(FREESCALE_LTC_TFM)
@@ -2405,6 +3680,17 @@
   return fp_exptmod(G, X, P, Y);
 }
 
+int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
+{
+  return fp_exptmod_ex(G, X, digits, P, Y);
+}
+
+int mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+{
+  return fp_exptmod_nct(G, X, P, Y);
+}
+
+
 /* compare two ints (signed)*/
 int mp_cmp (mp_int * a, mp_int * b)
 {
@@ -2431,10 +3717,13 @@
 /* store in unsigned [big endian] format */
 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
 {
-  fp_to_unsigned_bin(a,b);
-  return MP_OKAY;
+  return fp_to_unsigned_bin(a,b);
 }
 
+int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
+{
+  return fp_to_unsigned_bin_len(a, b, c);
+}
 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
 {
@@ -2445,8 +3734,7 @@
 
 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
 {
-    fp_sub_d(a, b, c);
-    return MP_OKAY;
+    return fp_sub_d(a, b, c);
 }
 
 int mp_mul_2d(fp_int *a, int b, fp_int *c)
@@ -2564,28 +3852,43 @@
     return fp_set_bit(a, b);
 }
 
-#if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC)
+#if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC) || !defined(NO_DH) || \
+    !defined(NO_DSA) || !defined(NO_RSA)
 
 /* c = a * a (mod b) */
 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
 {
   int err;
-  fp_int t;
-
-  fp_init(&t);
-  fp_sqr(a, &t);
-
-#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
-  if (c->size < FP_SIZE) {
-    err = fp_mod(&t, b, &t);
-    fp_copy(&t, c);
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int t[1];
+#else
+   fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+  fp_init(t);
+  err = fp_sqr(a, t);
+  if (err == FP_OKAY) {
+  #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+    if (c->size < FP_SIZE) {
+      err = fp_mod(t, b, t);
+      fp_copy(t, c);
+    }
+    else
+  #endif
+    {
+      err = fp_mod(t, b, c);
+    }
   }
-  else
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
-  {
-    err = fp_mod(&t, b, c);
-  }
-
   return err;
 }
 
@@ -2605,21 +3908,35 @@
 #endif /* WOLFSSL_KEYGEN || HAVE_ECC */
 
 
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
-    defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
-    defined(WOLFSSL_PUBLIC_MP)
+#if defined(WC_MP_TO_RADIX) || !defined(NO_DH) || !defined(NO_DSA) || \
+    !defined(NO_RSA)
 
 #ifdef WOLFSSL_KEY_GEN
 /* swap the elements of two integers, for cases where you can't simply swap the
  * mp_int pointers around
  */
-static void fp_exch (fp_int * a, fp_int * b)
+static int fp_exch (fp_int * a, fp_int * b)
 {
-    fp_int  t;
-
-    t  = *a;
+#ifndef WOLFSSL_SMALL_STACK
+    fp_int  t[1];
+#else
+    fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL)
+       return FP_MEM;
+#endif
+
+    *t = *a;
     *a = *b;
-    *b = t;
+    *b = *t;
+
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+    return FP_OKAY;
 }
 #endif
 
@@ -2676,13 +3993,15 @@
 /* a/b => cb + d == a */
 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
 {
-  fp_int   q;
+#ifndef WOLFSSL_SMALL_STACK
+  fp_int   q[1];
+#else
+  fp_int   *q;
+#endif
   fp_word  w;
   fp_digit t;
   int      ix;
 
-  fp_init(&q);
-
   /* cannot divide by zero */
   if (b == 0) {
      return FP_VAL;
@@ -2710,9 +4029,17 @@
      return FP_OKAY;
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  q = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+  if (q == NULL)
+      return FP_MEM;
+#endif
+
+  fp_init(q);
+
   if (c != NULL) {
-    q.used = a->used;
-    q.sign = a->sign;
+    q->used = a->used;
+    q->sign = a->sign;
   }
 
   w = 0;
@@ -2726,7 +4053,7 @@
         t = 0;
       }
       if (c != NULL)
-        q.dp[ix] = (fp_digit)t;
+        q->dp[ix] = (fp_digit)t;
   }
 
   if (d != NULL) {
@@ -2734,10 +4061,13 @@
   }
 
   if (c != NULL) {
-     fp_clamp(&q);
-     fp_copy(&q, c);
+     fp_clamp(q);
+     fp_copy(q, c);
   }
 
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
   return FP_OKAY;
 }
 
@@ -2753,58 +4083,18 @@
    return fp_mod_d(a, b, c);
 }
 
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
-
-#ifdef WOLFSSL_KEY_GEN
-
-static void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
-static void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
-static int  fp_isprime_ex(fp_int *a, int t);
-static int  fp_isprime(fp_int *a);
-static int  fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
-
-int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
-{
-    fp_gcd(a, b, c);
-    return MP_OKAY;
-}
-
-
-int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
-{
-    fp_lcm(a, b, c);
-    return MP_OKAY;
-}
+#endif /* WC_MP_TO_RADIX || !NO_DH || !NO_DSA || !NO_RSA */
+
+
+#if !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) || \
+    defined(WOLFSSL_KEY_GEN)
+
+static int  fp_isprime_ex(fp_int *a, int t, int* result);
 
 
 int mp_prime_is_prime(mp_int* a, int t, int* result)
 {
-    (void)t;
-    *result = fp_isprime(a);
-    return MP_OKAY;
-}
-
-int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
-{
-    int err;
-
-    err = fp_randprime(N, len, rng, heap);
-    switch(err) {
-        case FP_VAL:
-            return MP_VAL;
-        case FP_MEM:
-            return MP_MEM;
-        default:
-            break;
-    }
-
-    return MP_OKAY;
-}
-
-int mp_exch (mp_int * a, mp_int * b)
-{
-    fp_exch(a, b);
-    return MP_OKAY;
+    return fp_isprime_ex(a, t, result);
 }
 
 /* Miller-Rabin test of "a" to the base of "b" as described in
@@ -2814,60 +4104,122 @@
  * Randomly the chance of error is no more than 1/4 and often
  * very much lower.
  */
-static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
+static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result,
+  fp_int *n1, fp_int *y, fp_int *r)
 {
-  fp_int  n1, y, r;
-  int     s, j;
+  int s, j;
+  int err;
 
   /* default */
   *result = FP_NO;
 
   /* ensure b > 1 */
   if (fp_cmp_d(b, 1) != FP_GT) {
-     return;
+     return FP_OKAY;
   }
 
   /* get n1 = a - 1 */
-  fp_init_copy(&n1, a);
-  fp_sub_d(&n1, 1, &n1);
+  fp_copy(a, n1);
+  err = fp_sub_d(n1, 1, n1);
+  if (err != FP_OKAY) {
+     return err;
+  }
 
   /* set 2**s * r = n1 */
-  fp_init_copy(&r, &n1);
+  fp_copy(n1, r);
 
   /* count the number of least significant bits
    * which are zero
    */
-  s = fp_cnt_lsb(&r);
+  s = fp_cnt_lsb(r);
 
   /* now divide n - 1 by 2**s */
-  fp_div_2d (&r, s, &r, NULL);
+  fp_div_2d (r, s, r, NULL);
 
   /* compute y = b**r mod a */
-  fp_init(&y);
-  fp_exptmod(b, &r, a, &y);
+  fp_zero(y);
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+                                                     defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+  if (fp_count_bits(a) == 1024)
+      sp_ModExp_1024(b, r, a, y);
+  else if (fp_count_bits(a) == 2048)
+      sp_ModExp_2048(b, r, a, y);
+  else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+  if (fp_count_bits(a) == 1536)
+      sp_ModExp_1536(b, r, a, y);
+  else if (fp_count_bits(a) == 3072)
+      sp_ModExp_3072(b, r, a, y);
+  else
+#endif
+#ifdef WOLFSSL_SP_4096
+  if (fp_count_bits(a) == 4096)
+      sp_ModExp_4096(b, r, a, y);
+  else
+#endif
+#endif
+      fp_exptmod(b, r, a, y);
 
   /* if y != 1 and y != n1 do */
-  if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
+  if (fp_cmp_d (y, 1) != FP_EQ && fp_cmp (y, n1) != FP_EQ) {
     j = 1;
     /* while j <= s-1 and y != n1 */
-    while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
-      fp_sqrmod (&y, a, &y);
+    while ((j <= (s - 1)) && fp_cmp (y, n1) != FP_EQ) {
+      fp_sqrmod (y, a, y);
 
       /* if y == 1 then composite */
-      if (fp_cmp_d (&y, 1) == FP_EQ) {
-         return;
+      if (fp_cmp_d (y, 1) == FP_EQ) {
+         return FP_OKAY;
       }
       ++j;
     }
 
     /* if y != n1 then composite */
-    if (fp_cmp (&y, &n1) != FP_EQ) {
-       return;
+    if (fp_cmp (y, n1) != FP_EQ) {
+       return FP_OKAY;
     }
   }
 
   /* probably prime now */
   *result = FP_YES;
+
+  return FP_OKAY;
+}
+
+static int fp_prime_miller_rabin(fp_int * a, fp_int * b, int *result)
+{
+  int err;
+#ifndef WOLFSSL_SMALL_STACK
+  fp_int  n1[1], y[1], r[1];
+#else
+  fp_int *n1, *y, *r;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+  n1 = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+  if (n1 == NULL) {
+      return FP_MEM;
+  }
+  y = &n1[1]; r = &n1[2];
+#endif
+
+  fp_init(n1);
+  fp_init(y);
+  fp_init(r);
+
+  err = fp_prime_miller_rabin_ex(a, b, result, n1, y, r);
+
+  fp_clear(n1);
+  fp_clear(y);
+  fp_clear(r);
+
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+  return err;
 }
 
 
@@ -2910,47 +4262,244 @@
   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
 };
 
-int fp_isprime_ex(fp_int *a, int t)
+int fp_isprime_ex(fp_int *a, int t, int* result)
 {
-   fp_int   b;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int   b[1];
+#else
+   fp_int   *b;
+#endif
    fp_digit d;
    int      r, res;
 
    if (t <= 0 || t > FP_PRIME_SIZE) {
-     return FP_NO;
+     *result = FP_NO;
+     return FP_VAL;
+   }
+
+   if (fp_isone(a)) {
+       *result = FP_NO;
+       return FP_OKAY;
+   }
+
+   /* check against primes table */
+   for (r = 0; r < FP_PRIME_SIZE; r++) {
+       if (fp_cmp_d(a, primes[r]) == FP_EQ) {
+           *result = FP_YES;
+           return FP_OKAY;
+       }
    }
 
    /* do trial division */
    for (r = 0; r < FP_PRIME_SIZE; r++) {
        res = fp_mod_d(a, primes[r], &d);
        if (res != MP_OKAY || d == 0) {
-           return FP_NO;
+           *result = FP_NO;
+           return FP_OKAY;
+       }
+   }
+
+#ifdef WOLFSSL_SMALL_STACK
+  b = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+  if (b == NULL)
+      return FP_MEM;
+#endif
+   /* now do 't' miller rabins */
+   fp_init(b);
+   for (r = 0; r < t; r++) {
+       fp_set(b, primes[r]);
+       fp_prime_miller_rabin(a, b, &res);
+       if (res == FP_NO) {
+          *result = FP_NO;
+       #ifdef WOLFSSL_SMALL_STACK
+          XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+       #endif
+          return FP_OKAY;
        }
    }
-
-   /* now do 't' miller rabins */
-   fp_init(&b);
-   for (r = 0; r < t; r++) {
-       fp_set(&b, primes[r]);
-       fp_prime_miller_rabin(a, &b, &res);
-       if (res == FP_NO) {
-          return FP_NO;
-       }
-   }
-   return FP_YES;
+   *result = FP_YES;
+#ifdef WOLFSSL_SMALL_STACK
+   XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+   return FP_OKAY;
 }
 
-int fp_isprime(fp_int *a)
+
+int mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng)
 {
-  return fp_isprime_ex(a, 8);
+    int ret = FP_YES;
+    fp_digit d;
+    int i;
+
+    if (a == NULL || result == NULL || rng == NULL)
+        return FP_VAL;
+
+    if (fp_isone(a)) {
+        *result = FP_NO;
+        return FP_OKAY;
+    }
+
+    /* check against primes table */
+    for (i = 0; i < FP_PRIME_SIZE; i++) {
+        if (fp_cmp_d(a, primes[i]) == FP_EQ) {
+            *result = FP_YES;
+            return FP_OKAY;
+        }
+    }
+
+    /* do trial division */
+    for (i = 0; i < FP_PRIME_SIZE; i++) {
+        if (fp_mod_d(a, primes[i], &d) == MP_OKAY) {
+            if (d == 0) {
+                *result = FP_NO;
+                return FP_OKAY;
+            }
+        }
+        else
+            return FP_VAL;
+    }
+
+#ifndef WC_NO_RNG
+    /* now do a miller rabin with up to t random numbers, this should
+     * give a (1/4)^t chance of a false prime. */
+    {
+    #ifndef WOLFSSL_SMALL_STACK
+        fp_int b[1], c[1], n1[1], y[1], r[1];
+        byte   base[FP_MAX_PRIME_SIZE];
+    #else
+        fp_int *b, *c, *n1, *y, *r;
+        byte*  base;
+    #endif
+        word32 baseSz;
+        int    err;
+
+        baseSz = fp_count_bits(a);
+        /* The base size is the number of bits / 8. One is added if the number
+         * of bits isn't an even 8. */
+        baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
+
+    #ifndef WOLFSSL_SMALL_STACK
+        if (baseSz > sizeof(base))
+            return FP_MEM;
+    #else
+        base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (base == NULL)
+            return FP_MEM;
+
+        b = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
+        if (b == NULL) {
+            return FP_MEM;
+        }
+        c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4];
+    #endif
+
+        fp_init(b);
+        fp_init(c);
+        fp_init(n1);
+        fp_init(y);
+        fp_init(r);
+
+        err = fp_sub_d(a, 2, c);
+        if (err != FP_OKAY) {
+        #ifdef WOLFSSL_SMALL_STACK
+           XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+           XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+           return err;
+        }
+        while (t > 0) {
+            if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
+            #ifdef WOLFSSL_SMALL_STACK
+               XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+               XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
+               return err;
+            }
+
+            fp_read_unsigned_bin(b, base, baseSz);
+            if (fp_cmp_d(b, 2) != FP_GT || fp_cmp(b, c) != FP_LT) {
+                continue;
+            }
+
+            fp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
+            if (ret == FP_NO)
+                break;
+            fp_zero(b);
+            t--;
+        }
+
+        fp_clear(n1);
+        fp_clear(y);
+        fp_clear(r);
+        fp_clear(b);
+        fp_clear(c);
+     #ifdef WOLFSSL_SMALL_STACK
+        XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+        XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+     #endif
+    }
+#else
+    (void)t;
+#endif /* !WC_NO_RNG */
+
+    *result = ret;
+    return FP_OKAY;
 }
+#endif /* !NO_RSA || !NO_DSA || !NO_DH || WOLFSSL_KEY_GEN */
+
+
+#ifdef WOLFSSL_KEY_GEN
+
+static int  fp_gcd(fp_int *a, fp_int *b, fp_int *c);
+static int  fp_lcm(fp_int *a, fp_int *b, fp_int *c);
+static int  fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
+
+int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
+{
+    return fp_gcd(a, b, c);
+}
+
+
+int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
+{
+    return fp_lcm(a, b, c);
+}
+
+int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
+{
+    int err;
+
+    err = fp_randprime(N, len, rng, heap);
+    switch(err) {
+        case FP_VAL:
+            return MP_VAL;
+        case FP_MEM:
+            return MP_MEM;
+        default:
+            break;
+    }
+
+    return MP_OKAY;
+}
+
+int mp_exch (mp_int * a, mp_int * b)
+{
+    return fp_exch(a, b);
+}
+
+
 
 int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
 {
     static const int USE_BBS = 1;
     int   err, type;
+    int   isPrime = FP_YES;
+        /* Assume the candidate is probably prime and then test until
+         * it is proven composite. */
     byte* buf;
 
+    (void)heap;
+
     /* get type */
     if (len < 0) {
         type = USE_BBS;
@@ -2991,7 +4540,12 @@
         fp_read_unsigned_bin(N, buf, len);
 
         /* test */
-    } while (fp_isprime(N) == FP_NO);
+        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+         * of a 1024-bit candidate being a false positive, when it is our
+         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+         * Using 8 because we've always used 8 */
+        mp_prime_is_prime_ex(N, 8, &isPrime, rng);
+    } while (isPrime == FP_NO);
 
     XMEMSET(buf, 0, len);
     XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -3000,37 +4554,62 @@
 }
 
 /* c = [a, b] */
-void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
+int fp_lcm(fp_int *a, fp_int *b, fp_int *c)
 {
-   fp_int  t1, t2;
-
-   fp_init(&t1);
-   fp_init(&t2);
-   fp_gcd(a, b, &t1);
-   if (fp_cmp_mag(a, b) == FP_GT) {
-      fp_div(a, &t1, &t2, NULL);
-      fp_mul(b, &t2, c);
-   } else {
-      fp_div(b, &t1, &t2, NULL);
-      fp_mul(a, &t2, c);
+   int     err;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int  t[2];
+#else
+   fp_int  *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+   t = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+   if (t == NULL) {
+       return FP_MEM;
    }
+#endif
+
+   fp_init(&t[0]);
+   fp_init(&t[1]);
+   err = fp_gcd(a, b, &t[0]);
+   if (err == FP_OKAY) {
+      if (fp_cmp_mag(a, b) == FP_GT) {
+        err = fp_div(a, &t[0], &t[1], NULL);
+        if (err == FP_OKAY)
+          err = fp_mul(b, &t[1], c);
+     } else {
+        err = fp_div(b, &t[0], &t[1], NULL);
+        if (err == FP_OKAY)
+          err = fp_mul(a, &t[1], c);
+     }
+   }
+
+#ifdef WOLFSSL_SMALL_STACK
+   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+   return err;
 }
 
 
 
 /* c = (a, b) */
-void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
+int fp_gcd(fp_int *a, fp_int *b, fp_int *c)
 {
-   fp_int u, v, r;
+#ifndef WOLFSSL_SMALL_STACK
+   fp_int u[1], v[1], r[1];
+#else
+   fp_int *u, *v, *r;
+#endif
 
    /* either zero than gcd is the largest */
    if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) {
      fp_abs (b, c);
-     return;
+     return FP_OKAY;
    }
    if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) {
      fp_abs (a, c);
-     return;
+     return FP_OKAY;
    }
 
    /* optimized.  At this point if a == 0 then
@@ -3038,39 +4617,72 @@
     */
    if (fp_iszero (a) == FP_YES) {
      fp_zero(c);
-     return;
+     return FP_OKAY;
    }
 
+#ifdef WOLFSSL_SMALL_STACK
+   u = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+   if (u == NULL) {
+       return FP_MEM;
+   }
+   v = &u[1]; r = &u[2];
+#endif
+
    /* sort inputs */
    if (fp_cmp_mag(a, b) != FP_LT) {
-      fp_init_copy(&u, a);
-      fp_init_copy(&v, b);
+      fp_init_copy(u, a);
+      fp_init_copy(v, b);
    } else {
-      fp_init_copy(&u, b);
-      fp_init_copy(&v, a);
+      fp_init_copy(u, b);
+      fp_init_copy(v, a);
    }
 
-   fp_init(&r);
-   while (fp_iszero(&v) == FP_NO) {
-      fp_mod(&u, &v, &r);
-      fp_copy(&v, &u);
-      fp_copy(&r, &v);
+   u->sign = FP_ZPOS;
+   v->sign = FP_ZPOS;
+
+   fp_init(r);
+   while (fp_iszero(v) == FP_NO) {
+      fp_mod(u, v, r);
+      fp_copy(v, u);
+      fp_copy(r, v);
    }
-   fp_copy(&u, c);
+   fp_copy(u, c);
+
+#ifdef WOLFSSL_SMALL_STACK
+   XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+   return FP_OKAY;
 }
 
 #endif /* WOLFSSL_KEY_GEN */
 
 
 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \
-    defined(WC_RSA_BLINDING)
+    defined(WC_RSA_BLINDING) || !defined(NO_DSA) || \
+    (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
 /* c = a + b */
 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
 {
+#ifndef WOLFSSL_SMALL_STACK
    fp_int tmp;
    fp_init(&tmp);
    fp_set(&tmp, b);
    fp_add(a, &tmp, c);
+#else
+   int i;
+   fp_word t = b;
+
+   fp_copy(a, c);
+   for (i = 0; t != 0 && i < FP_SIZE && i < c->used; i++) {
+     t += c->dp[i];
+     c->dp[i] = (fp_digit)t;
+     t >>= DIGIT_BIT;
+   }
+   if (i == c->used && i < FP_SIZE && t != 0) {
+       c->dp[i] = t;
+       c->used++;
+   }
+#endif
 }
 
 /* external compatibility */
@@ -3080,15 +4692,16 @@
     return MP_OKAY;
 }
 
-#endif  /* HAVE_ECC || !NO_PWDBASED */
+#endif  /* HAVE_ECC || !NO_PWDBASED || OPENSSL_EXTRA || WC_RSA_BLINDING ||
+  !NO_DSA || (!NO_RSA && !NO_RSA_BOUNDS_CHECK) */
 
 
 #if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
     defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
-    defined(DEBUG_WOLFSSL)
+    defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
 
 /* chars used in radix conversions */
-static const char* const fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+static wcchar fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                      "abcdefghijklmnopqrstuvwxyz+/";
 #endif
 
@@ -3212,15 +4825,13 @@
 /* fast math conversion */
 int mp_sqr(fp_int *A, fp_int *B)
 {
-    fp_sqr(A, B);
-    return MP_OKAY;
+    return fp_sqr(A, B);
 }
 
 /* fast math conversion */
 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
 {
-    fp_montgomery_reduce(a, m, mp);
-    return MP_OKAY;
+    return fp_montgomery_reduce(a, m, mp);
 }
 
 
@@ -3254,7 +4865,8 @@
 
 #endif /* HAVE_ECC */
 
-#if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA)
+#if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA) || \
+    defined(WOLFSSL_KEY_GEN)
 /* fast math conversion */
 int mp_set(fp_int *a, fp_digit b)
 {
@@ -3263,16 +4875,18 @@
 }
 #endif
 
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
-    defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
-    defined(WOLFSSL_PUBLIC_MP)
+#ifdef WC_MP_TO_RADIX
 
 /* returns size of ASCII representation */
 int mp_radix_size (mp_int *a, int radix, int *size)
 {
-    int     res, digs;
-    fp_int  t;
+    int      res, digs;
     fp_digit d;
+#ifndef WOLFSSL_SMALL_STACK
+    fp_int   t[1];
+#else
+    fp_int   *t;
+#endif
 
     *size = 0;
 
@@ -3300,34 +4914,50 @@
         ++digs;
     }
 
+#ifdef WOLFSSL_SMALL_STACK
+    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+    if (t == NULL)
+        return FP_MEM;
+#endif
+
     /* init a copy of the input */
-    fp_init_copy (&t, a);
+    fp_init_copy (t, a);
 
     /* force temp to positive */
-    t.sign = FP_ZPOS;
+    t->sign = FP_ZPOS;
 
     /* fetch out all of the digits */
-    while (fp_iszero (&t) == FP_NO) {
-        if ((res = fp_div_d (&t, (mp_digit) radix, &t, &d)) != FP_OKAY) {
-            fp_zero (&t);
+    while (fp_iszero (t) == FP_NO) {
+        if ((res = fp_div_d (t, (mp_digit) radix, t, &d)) != FP_OKAY) {
+            fp_zero (t);
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+        #endif
             return res;
         }
         ++digs;
     }
-    fp_zero (&t);
+    fp_zero (t);
 
     /* return digs + 1, the 1 is for the NULL byte that would be required. */
     *size = digs + 1;
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
     return FP_OKAY;
 }
 
 /* stores a bignum as a ASCII string in a given radix (2..64) */
 int mp_toradix (mp_int *a, char *str, int radix)
 {
-    int     res, digs;
-    fp_int  t;
+    int      res, digs;
     fp_digit d;
-    char   *_s = str;
+    char     *_s = str;
+#ifndef WOLFSSL_SMALL_STACK
+    fp_int   t[1];
+#else
+    fp_int   *t;
+#endif
 
     /* check range of the radix */
     if (radix < 2 || radix > 64) {
@@ -3338,29 +4968,44 @@
     if (fp_iszero(a) == FP_YES) {
         *str++ = '0';
         *str = '\0';
-        return FP_YES;
+        return FP_OKAY;
     }
 
+#ifdef WOLFSSL_SMALL_STACK
+    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+    if (t == NULL)
+        return FP_MEM;
+#endif
+
     /* init a copy of the input */
-    fp_init_copy (&t, a);
+    fp_init_copy (t, a);
 
     /* if it is negative output a - */
-    if (t.sign == FP_NEG) {
+    if (t->sign == FP_NEG) {
         ++_s;
         *str++ = '-';
-        t.sign = FP_ZPOS;
+        t->sign = FP_ZPOS;
     }
 
     digs = 0;
-    while (fp_iszero (&t) == FP_NO) {
-        if ((res = fp_div_d (&t, (fp_digit) radix, &t, &d)) != FP_OKAY) {
-            fp_zero (&t);
+    while (fp_iszero (t) == FP_NO) {
+        if ((res = fp_div_d (t, (fp_digit) radix, t, &d)) != FP_OKAY) {
+            fp_zero (t);
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+        #endif
             return res;
         }
         *str++ = fp_s_rmap[d];
         ++digs;
     }
-
+#ifndef WC_DISABLE_RADIX_ZERO_PAD
+    /* For hexadecimal output, add zero padding when number of digits is odd */
+    if ((digs & 1) && (radix == 16)) {
+        *str++ = fp_s_rmap[0];
+        ++digs;
+    }
+#endif
     /* reverse the digits of the string.  In this case _s points
      * to the first digit [excluding the sign] of the number]
      */
@@ -3369,7 +5014,10 @@
     /* append a NULL so the string is properly terminated */
     *str = '\0';
 
-    fp_zero (&t);
+    fp_zero (t);
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
     return FP_OKAY;
 }
 
@@ -3377,10 +5025,12 @@
 void mp_dump(const char* desc, mp_int* a, byte verbose)
 {
   char buffer[FP_SIZE * sizeof(fp_digit) * 2];
-  int size = FP_SIZE;
+  int size;
 
 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   size = a->size;
+#else
+  size = FP_SIZE;
 #endif
 
   printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n",
@@ -3399,7 +5049,7 @@
 }
 #endif /* WOLFSSL_DEBUG_MATH */
 
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
+#endif /* WC_MP_TO_RADIX */
 
 
 int mp_abs(mp_int* a, mp_int* b)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/wc_dsp.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,328 @@
+/* wc_dsp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_DSP)
+#include "remote.h"
+#include "rpcmem.h"
+static wolfSSL_DSP_Handle_cb handle_function = NULL;
+static remote_handle64 defaultHandle;
+static wolfSSL_Mutex handle_mutex; /* mutex for access to single default handle */
+
+#define WOLFSSL_HANDLE_DONE 1
+#define WOLFSSL_HANDLE_GET 0
+
+/* callback function for setting the default handle in single threaded
+ * use cases */
+static int default_handle_cb(remote_handle64 *handle, int finished, void *ctx)
+{
+    (void)ctx;
+    if (finished == WOLFSSL_HANDLE_DONE) {
+        if (wc_UnLockMutex(&handle_mutex) != 0) {
+            WOLFSSL_MSG("Unlock handle mutex failed");
+            return -1;
+        }
+    }
+    else {
+        if (wc_LockMutex(&handle_mutex) != 0) {
+            WOLFSSL_MSG("Lock handle mutex failed");
+            return -1;
+        }
+        *handle = defaultHandle;
+    }
+    return 0;
+}
+
+
+/* Set global callback for getting handle to use
+ * return 0 on success */
+int wolfSSL_SetHandleCb(wolfSSL_DSP_Handle_cb in)
+{
+    handle_function = in;
+    return 0;
+}
+
+
+/* returns 1 if global handle callback is set and 0 if not */
+int wolfSSL_GetHandleCbSet()
+{
+    return (handle_function != NULL)? 1: 0;
+}
+
+
+/* Local function for setting up default handle
+ * returns 0 on success */
+int wolfSSL_InitHandle()
+{
+    char *sp_URI_value;
+    int ret;
+
+    sp_URI_value = wolfSSL_URI "&_dom=adsp";
+    ret = wolfSSL_open(sp_URI_value, &defaultHandle);
+    if (ret != 0) {
+        WOLFSSL_MSG("Unable to open aDSP?");
+        return -1;
+    }
+    wolfSSL_SetHandleCb(default_handle_cb);
+    ret = wc_InitMutex(&handle_mutex);
+    if (ret != 0) {
+        WOLFSSL_MSG("Unable to init handle mutex");
+        return -1;
+    }
+    return 0;
+}
+
+
+/* internal function that closes default handle and frees mutex */
+void wolfSSL_CleanupHandle()
+{
+    wolfSSL_close(defaultHandle);
+    wc_FreeMutex(&handle_mutex);
+}
+#if defined(WOLFSSL_HAVE_SP_ECC)
+
+/* ecc conversion from sp_c32.c */
+#include <wolfssl/wolfcrypt/sp.h>
+
+
+#ifndef WOLFSSL_SP_NO_256
+
+#ifdef HAVE_ECC_VERIFY
+/* Read big endian unsigned byte array into r.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  Byte array.
+ * n  Number of bytes in array to read.
+ */
+static void int_256_from_bin(int32* r, int size, const byte* a, int n)
+{
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = n-1; i >= 0; i--) {
+        r[j] |= (((int32)a[i]) << s);
+        if (s >= 18U) {
+            r[j] &= 0x3ffffff;
+            s = 26U - s;
+            if (j + 1 >= size) {
+                break;
+            }
+            r[++j] = (int32)a[i] >> s;
+            s = 8U - s;
+        }
+        else {
+            s += 8U;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r  A single precision integer.
+ * size  Maximum number of bytes to convert
+ * a  A multi-precision integer.
+ */
+static void int_256_from_mp(int32* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 26
+    int j;
+
+    XMEMCPY(r, a->dp, sizeof(int32) * a->used);
+
+    for (j = a->used; j < size; j++) {
+        r[j] = 0;
+    }
+#elif DIGIT_BIT > 26
+    int i, j = 0;
+    word32 s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((int32)a->dp[i] << s);
+        r[j] &= 0x3ffffff;
+        s = 26U - s;
+        if (j + 1 >= size) {
+            break;
+        }
+        /* lint allow cast of mismatch word32 and mp_digit */
+        r[++j] = (int32)(a->dp[i] >> s); /*lint !e9033*/
+        while ((s + 26U) <= (word32)DIGIT_BIT) {
+            s += 26U;
+            r[j] &= 0x3ffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            if (s < (word32)DIGIT_BIT) {
+                /* lint allow cast of mismatch word32 and mp_digit */
+                r[++j] = (int32)(a->dp[i] >> s); /*lint !e9033*/
+            }
+            else {
+                r[++j] = 0L;
+            }
+        }
+        s = (word32)DIGIT_BIT - s;
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#else
+    int i, j = 0, s = 0;
+
+    r[0] = 0;
+    for (i = 0; i < a->used && j < size; i++) {
+        r[j] |= ((int32)a->dp[i]) << s;
+        if (s + DIGIT_BIT >= 26) {
+            r[j] &= 0x3ffffff;
+            if (j + 1 >= size) {
+                break;
+            }
+            s = 26 - s;
+            if (s == DIGIT_BIT) {
+                r[++j] = 0;
+                s = 0;
+            }
+            else {
+                r[++j] = a->dp[i] >> s;
+                s = DIGIT_BIT - s;
+            }
+        }
+        else {
+            s += DIGIT_BIT;
+        }
+    }
+
+    for (j++; j < size; j++) {
+        r[j] = 0;
+    }
+#endif
+}
+
+/* Verify the signature values with the hash and public key.
+ *   e = Truncate(hash, 256)
+ *   u1 = e/s mod order
+ *   u2 = r/s mod order
+ *   r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash     Hash to sign.
+ * hashLen  Length of the hash data.
+ * rng      Random number generator.
+ * priv     Private part of key - scalar.
+ * rm       First part of result as an mp_int.
+ * sm       Sirst part of result as an mp_int.
+ * heap     Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_dsp_ecc_verify_256(remote_handle64 handleIn, const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+    int ret;
+    remote_handle64 handle = handleIn;
+
+#if 0
+    /* calling to alloc memory on the ION using these settings slowed the performance down slightly */
+    int32 *x = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+    int32 *y = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+    int32 *z = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+    int32 *s = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+    int32 *u1 = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+    int32 *u2 = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+#endif
+    int32 x[10] __attribute__((aligned(128)));
+    int32 y[10] __attribute__((aligned(128)));
+    int32 z[10] __attribute__((aligned(128)));
+    int32 s[10] __attribute__((aligned(128)));
+    int32 u1[10] __attribute__((aligned(128)));
+    int32 u2[10] __attribute__((aligned(128)));
+
+    if (hashLen > 32U) {
+        hashLen = 32U;
+    }
+
+    int_256_from_bin(u1, 10, hash, (int)hashLen);
+    int_256_from_mp(u2, 10, r);
+    int_256_from_mp(s, 10, sm);
+    int_256_from_mp(x, 10, pX);
+    int_256_from_mp(y, 10, pY);
+    int_256_from_mp(z, 10, pZ);
+
+    if (handle_function != NULL) {
+        handle_function(&handle, WOLFSSL_HANDLE_GET, NULL);
+    }
+
+    *res = 0;
+    ret = wolfSSL_DSP_ECC_Verify_256(handle, u1, 10, u2, 10, s, 10, x, 10, y, 10, z, 10, res);
+
+    if (handle_function != NULL) {
+        handle_function(&handle, WOLFSSL_HANDLE_DONE, NULL);
+    }
+#if 0
+    rpcmem_free(x);
+    rpcmem_free(y);
+    rpcmem_free(z);
+    rpcmem_free(s);
+    rpcmem_free(u1);
+    rpcmem_free(u2);
+#endif
+    return ret;
+}
+
+
+/* Used to assign a handle to an ecc_key structure.
+ * returns 0 on success */
+int wc_ecc_set_handle(ecc_key* key, remote_handle64 handle)
+{
+    if (key == NULL) {
+        return BAD_FUNC_ARG;
+    }
+    key->handle = handle;
+    return 0;
+}
+#endif /* HAVE_ECC_VERIFY */
+#endif /* !WOLFSSL_SP_NO_256 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_DSP */
+
--- a/wolfcrypt/src/wc_encrypt.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/wc_encrypt.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wc_encrypt.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -87,7 +87,7 @@
 {
     int  ret = 0;
 #ifdef WOLFSSL_SMALL_STACK
-    Aes* aes = NULL;
+    Aes* aes;
 #else
     Aes  aes[1];
 #endif
@@ -116,13 +116,13 @@
 #endif /* !NO_AES && HAVE_AES_CBC */
 
 
-#ifndef NO_DES3
+#if !defined(NO_DES3) && !defined(WOLFSSL_TI_CRYPT)
 int wc_Des_CbcEncryptWithKey(byte* out, const byte* in, word32 sz,
                              const byte* key, const byte* iv)
 {
     int ret  = 0;
 #ifdef WOLFSSL_SMALL_STACK
-    Des* des = NULL;
+    Des* des;
 #else
     Des  des[1];
 #endif
@@ -149,7 +149,7 @@
 {
     int ret  = 0;
 #ifdef WOLFSSL_SMALL_STACK
-    Des* des = NULL;
+    Des* des;
 #else
     Des  des[1];
 #endif
@@ -177,7 +177,7 @@
 {
     int ret    = 0;
 #ifdef WOLFSSL_SMALL_STACK
-    Des3* des3 = NULL;
+    Des3* des3;
 #else
     Des3  des3[1];
 #endif
@@ -209,7 +209,7 @@
 {
     int ret    = 0;
 #ifdef WOLFSSL_SMALL_STACK
-    Des3* des3 = NULL;
+    Des3* des3;
 #else
     Des3  des3[1];
 #endif
@@ -266,17 +266,19 @@
         return BUFFER_E;
 
 #ifdef WOLFSSL_SMALL_STACK
-    key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMETRIC_KEY);
+    key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
     if (key == NULL) {
         return MEMORY_E;
     }
 #endif
 
+    (void)XMEMSET(key, 0, WC_MAX_SYM_KEY_SIZE);
+
 #ifndef NO_PWDBASED
     if ((ret = wc_PBKDF1(key, password, passwordSz, info->iv, PKCS5_SALT_SZ, 1,
                                         info->keySz, hashType)) != 0) {
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(key, NULL, DYNAMIC_TYPE_SYMETRIC_KEY);
+        XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
 #endif
         return ret;
     }
@@ -295,7 +297,7 @@
 #endif /* !NO_AES && HAVE_AES_CBC && HAVE_AES_DECRYPT */
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(key, NULL, DYNAMIC_TYPE_SYMETRIC_KEY);
+    XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
 #endif
 
     return ret;
@@ -321,17 +323,19 @@
     }
 
 #ifdef WOLFSSL_SMALL_STACK
-    key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMETRIC_KEY);
+    key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
     if (key == NULL) {
         return MEMORY_E;
     }
 #endif /* WOLFSSL_SMALL_STACK */
 
+    (void)XMEMSET(key, 0, WC_MAX_SYM_KEY_SIZE);
+
 #ifndef NO_PWDBASED
     if ((ret = wc_PBKDF1(key, password, passwordSz, info->iv, PKCS5_SALT_SZ, 1,
                                         info->keySz, hashType)) != 0) {
 #ifdef WOLFSSL_SMALL_STACK
-        XFREE(key, NULL, DYNAMIC_TYPE_SYMETRIC_KEY);
+        XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
 #endif
         return ret;
     }
@@ -343,14 +347,14 @@
     if (info->cipherType == WC_CIPHER_DES3)
         ret = wc_Des3_CbcEncryptWithKey(der, der, derSz, key, info->iv);
 #endif /* NO_DES3 */
-#ifndef NO_AES
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
     if (info->cipherType == WC_CIPHER_AES_CBC)
         ret = wc_AesCbcEncryptWithKey(der, der, derSz, key, info->keySz,
             info->iv);
-#endif /* NO_AES */
+#endif /* !NO_AES && HAVE_AES_CBC */
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(key, NULL, DYNAMIC_TYPE_SYMETRIC_KEY);
+    XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
 #endif
 
     return ret;
@@ -359,18 +363,19 @@
 #endif /* WOLFSSL_ENCRYPTED_KEYS */
 
 
-#ifndef NO_PWDBASED
+#if !defined(NO_PWDBASED) && !defined(NO_ASN)
 
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
 /* Decrypt/Encrypt input in place from parameters based on id
  *
  * returns a negative value on fail case
  */
 int wc_CryptKey(const char* password, int passwordSz, byte* salt,
                       int saltSz, int iterations, int id, byte* input,
-                      int length, int version, byte* cbcIv, int enc)
+                      int length, int version, byte* cbcIv, int enc, int shaOid)
 {
     int typeH;
-    int derivedLen;
+    int derivedLen = 0;
     int ret = 0;
 #ifdef WOLFSSL_SMALL_STACK
     byte* key;
@@ -399,9 +404,17 @@
             break;
 
         case PBE_SHA1_DES3:
-            typeH = WC_SHA;
-            derivedLen = 32;           /* may need iv for v1.5 */
-            break;
+            switch(shaOid) {
+                case HMAC_SHA256_OID:
+                    typeH = WC_SHA256;
+                    derivedLen = 32;
+                    break;
+                default:
+                    typeH = WC_SHA;
+                    derivedLen = 32;           /* may need iv for v1.5 */
+                    break;
+            }
+        break;
         #endif /* !NO_SHA */
     #endif /* !NO_DES3 */
     #if !defined(NO_SHA) && !defined(NO_RC4)
@@ -410,14 +423,41 @@
             derivedLen = 16;
             break;
     #endif
-    #ifdef WOLFSSL_AES_256
+    #if defined(WOLFSSL_AES_256)
         case PBE_AES256_CBC:
-            typeH = WC_SHA256;
-            derivedLen = 32;
+            switch(shaOid) {
+                case HMAC_SHA256_OID:
+                    typeH = WC_SHA256;
+                    derivedLen = 32;
+                    break;
+            #ifndef NO_SHA
+                default:
+                    typeH = WC_SHA;
+                    derivedLen = 32;
+                    break;
+            #endif
+            }
             break;
-    #endif
+    #endif /* WOLFSSL_AES_256 && !NO_SHA */
+    #if defined(WOLFSSL_AES_128)
+        case PBE_AES128_CBC:
+            switch(shaOid) {
+                case HMAC_SHA256_OID:
+                    typeH = WC_SHA256;
+                    derivedLen = 16;
+                    break;
+            #ifndef NO_SHA
+                default:
+                    typeH = WC_SHA;
+                    derivedLen = 16;
+                    break;
+            #endif
+            }
+            break;
+    #endif /* WOLFSSL_AES_128 && !NO_SHA */
         default:
             WOLFSSL_MSG("Unknown/Unsupported encrypt/decrypt id");
+            (void)shaOid;
             return ALGO_ID_E;
     }
 
@@ -435,6 +475,7 @@
         ret = wc_PBKDF1(key, (byte*)password, passwordSz,
                         salt, saltSz, iterations, derivedLen, typeH);
 #endif
+#ifdef HAVE_PKCS12
     else if (version == PKCS12v1) {
         int  i, idx = 0;
         byte unicodePasswd[MAX_UNICODE_SZ];
@@ -460,6 +501,7 @@
             ret += wc_PKCS12_PBKDF(cbcIv, unicodePasswd, idx, salt, saltSz,
                                 iterations, 8, typeH, 2);
     }
+#endif /* HAVE_PKCS12 */
     else {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -564,28 +606,40 @@
             break;
         }
 #endif
-#ifndef NO_AES
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
     #ifdef WOLFSSL_AES_256
         case PBE_AES256_CBC:
+        case PBE_AES128_CBC:
         {
-            Aes dec;
-            ret = wc_AesInit(&dec, NULL, INVALID_DEVID);
-            if (ret == 0)
-                ret = wc_AesSetKey(&dec, key, derivedLen,
-                                   cbcIv, AES_DECRYPTION);
-            if (ret == 0)
-                ret = wc_AesCbcDecrypt(&dec, input, input, length);
+            Aes aes;
+            ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+            if (ret == 0) {
+                if (enc) {
+                    ret = wc_AesSetKey(&aes, key, derivedLen, cbcIv,
+                                                                AES_ENCRYPTION);
+                }
+                else {
+                    ret = wc_AesSetKey(&aes, key, derivedLen, cbcIv,
+                                                                AES_DECRYPTION);
+                }
+            }
+            if (ret == 0) {
+                if (enc)
+                    ret = wc_AesCbcEncrypt(&aes, input, input, length);
+                else
+                    ret = wc_AesCbcDecrypt(&aes, input, input, length);
+            }
             if (ret != 0) {
 #ifdef WOLFSSL_SMALL_STACK
                 XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
                 return ret;
             }
-            ForceZero(&dec, sizeof(Aes));
+            ForceZero(&aes, sizeof(Aes));
             break;
         }
     #endif /* WOLFSSL_AES_256 */
-#endif
+#endif /* !NO_AES && HAVE_AES_CBC */
 
         default:
 #ifdef WOLFSSL_SMALL_STACK
@@ -602,5 +656,6 @@
     return ret;
 }
 
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
 #endif /* !NO_PWDBASED */
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfcrypt/src/wc_pkcs11.c	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,2547 @@
+/* wc_pkcs11.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifdef HAVE_CONFIG_H
+    #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_PKCS11
+
+#include <dlfcn.h>
+
+#include <wolfssl/wolfcrypt/wc_pkcs11.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/asn.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#ifndef NO_RSA
+    #include <wolfssl/wolfcrypt/rsa.h>
+#endif
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
+#define MAX_EC_PARAM_LEN   16
+
+#if defined(NO_PKCS11_RSA) && !defined(NO_RSA)
+    #define NO_RSA
+#endif
+#if defined(NO_PKCS11_ECC) && defined(HAVE_ECC)
+    #undef HAVE_ECC
+#endif
+#if defined(NO_PKCS11_AES) && !defined(NO_AES)
+    #define NO_AES
+#endif
+#if defined(NO_PKCS11_AESGCM) && defined(HAVE_AESGCM)
+    #undef HAVE_AESGCM
+#endif
+#if defined(NO_PKCS11_AESCBC) && defined(HAVE_AES_CBC)
+    #undef HAVE_AES_CBC
+#endif
+#if defined(NO_PKCS11_HMAC) && !defined(NO_HMAC)
+    #define NO_HMAC
+#endif
+#if defined(NO_PKCS11_RNG) && !defined(WC_NO_RNG)
+    #define WC_NO_RNG
+#endif
+
+
+#if defined(HAVE_ECC) && !defined(NO_PKCS11_ECDH)
+static CK_BBOOL ckFalse = CK_FALSE;
+#endif
+#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \
+           (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC)
+static CK_BBOOL ckTrue  = CK_TRUE;
+#endif
+
+#ifndef NO_RSA
+static CK_KEY_TYPE rsaKeyType  = CKK_RSA;
+#endif
+#ifdef HAVE_ECC
+static CK_KEY_TYPE ecKeyType   = CKK_EC;
+#endif
+#if !defined(NO_RSA) || defined(HAVE_ECC)
+static CK_OBJECT_CLASS pubKeyClass     = CKO_PUBLIC_KEY;
+static CK_OBJECT_CLASS privKeyClass    = CKO_PRIVATE_KEY;
+#endif
+#if (!defined(NO_AES) && (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || \
+            !defined(NO_HMAC) || (defined(HAVE_ECC) && !defined(NO_PKCS11_ECDH))
+static CK_OBJECT_CLASS secretKeyClass  = CKO_SECRET_KEY;
+#endif
+
+/**
+ * Load library, get function list and initialize PKCS#11.
+ *
+ * @param  dev     [in]  Device object.
+ * @param  library [in]  Library name including path.
+ * @return  BAD_FUNC_ARG when dev or library are NULL pointers.
+ *          BAD_PATH_ERROR when dynamic library cannot be opened.
+ *          WC_INIT_E when the initialization PKCS#11 fails.
+ *          WC_HW_E when unable to get PKCS#11 function list.
+ *          0 on success.
+ */
+int wc_Pkcs11_Initialize(Pkcs11Dev* dev, const char* library, void* heap)
+{
+    int                  ret = 0;
+    void*                func;
+    CK_C_INITIALIZE_ARGS args;
+
+    if (dev == NULL || library == NULL)
+        ret = BAD_FUNC_ARG;
+
+    if (ret == 0) {
+        dev->heap = heap;
+        dev->dlHandle = dlopen(library, RTLD_NOW | RTLD_LOCAL);
+        if (dev->dlHandle == NULL) {
+            WOLFSSL_MSG(dlerror());
+            ret = BAD_PATH_ERROR;
+        }
+    }
+
+    if (ret == 0) {
+        dev->func = NULL;
+        func = dlsym(dev->dlHandle, "C_GetFunctionList");
+        if (func == NULL)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        if (((CK_C_GetFunctionList)func)(&dev->func) != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ret == 0) {
+        XMEMSET(&args, 0x00, sizeof(args));
+        args.flags = CKF_OS_LOCKING_OK;
+        if (dev->func->C_Initialize(&args) != CKR_OK)
+            ret = WC_INIT_E;
+    }
+
+    if (ret != 0)
+        wc_Pkcs11_Finalize(dev);
+
+    return ret;
+}
+
+/**
+ * Close the Pkcs#11 library.
+ *
+ * @param  dev  [in]  Device object.
+ */
+void wc_Pkcs11_Finalize(Pkcs11Dev* dev)
+{
+    if (dev != NULL && dev->dlHandle != NULL) {
+        if (dev->func != NULL) {
+            dev->func->C_Finalize(NULL);
+            dev->func = NULL;
+        }
+        dlclose(dev->dlHandle);
+        dev->dlHandle = NULL;
+    }
+}
+
+/**
+ * Set up a token for use.
+ *
+ * @param  token      [in]  Token object.
+ * @param  dev        [in]  PKCS#11 device object.
+ * @param  slotId     [in]  Slot number of the token.<br>
+ *                          Passing -1 uses the first available slot.
+ * @param  tokenName  [in]  Name of token to initialize.
+ * @param  userPin    [in]  PIN to use to login as user.
+ * @param  userPinSz  [in]  Number of bytes in PIN.
+ * @return  BAD_FUNC_ARG when token, dev and/or tokenName is NULL.
+ *          WC_INIT_E when initializing token fails.
+ *          WC_HW_E when another PKCS#11 library call fails.
+ *          -1 when no slot available.
+ *          0 on success.
+ */
+int wc_Pkcs11Token_Init(Pkcs11Token* token, Pkcs11Dev* dev, int slotId,
+    const char* tokenName, const unsigned char* userPin, int userPinSz)
+{
+    int         ret = 0;
+    CK_RV       rv;
+    CK_SLOT_ID* slot = NULL;
+    CK_ULONG    slotCnt = 0;
+
+    if (token == NULL || dev == NULL || tokenName == NULL)
+        ret = BAD_FUNC_ARG;
+
+    if (ret == 0) {
+        if (slotId < 0) {
+            /* Use first available slot with a token. */
+            rv = dev->func->C_GetSlotList(CK_TRUE, NULL, &slotCnt);
+            if (rv != CKR_OK)
+                ret = WC_HW_E;
+            if (ret == 0) {
+                slot = (CK_SLOT_ID*)XMALLOC(slotCnt * sizeof(*slot), dev->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (slot == NULL)
+                    ret = MEMORY_E;
+            }
+            if (ret == 0) {
+                rv = dev->func->C_GetSlotList(CK_TRUE, slot, &slotCnt);
+                if (rv != CKR_OK)
+                    ret = WC_HW_E;
+            }
+            if (ret == 0) {
+                if (slotCnt > 0)
+                    slotId = (int)slot[0];
+                else
+                    ret = WC_HW_E;
+            }
+        }
+    }
+    if (ret == 0) {
+        token->func = dev->func;
+        token->slotId = (CK_SLOT_ID)slotId;
+        token->handle = NULL_PTR;
+        token->userPin = (CK_UTF8CHAR_PTR)userPin;
+        token->userPinSz = (CK_ULONG)userPinSz;
+    }
+
+    if (slot != NULL)
+        XFREE(slot, dev->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret;
+}
+
+/**
+ * Finalize token.
+ * Closes all sessions on token.
+ *
+ * @param  token  [in]  Token object.
+ */
+void wc_Pkcs11Token_Final(Pkcs11Token* token)
+{
+    if (token != NULL && token->func != NULL) {
+        token->func->C_CloseAllSessions(token->slotId);
+        token->handle = NULL_PTR;
+        ForceZero(token->userPin, (word32)token->userPinSz);
+    }
+}
+
+/**
+ * Open a session on a token.
+ *
+ * @param  token      [in]  Token object.
+ * @param  session    [in]  Session object.
+ * @param  readWrite  [in]  Boolean indicating to open session for Read/Write.
+ * @return  BAD_FUNC_ARG when token or session is NULL.
+ *          WC_HW_E when opening the session fails.
+ *          0 on success.
+ */
+static int Pkcs11OpenSession(Pkcs11Token* token, Pkcs11Session* session,
+                             int readWrite)
+{
+    int   ret = 0;
+    CK_RV rv;
+
+    if (token == NULL || session == NULL)
+        ret = BAD_FUNC_ARG;
+
+    if (ret == 0) {
+        if (token->handle != NULL_PTR)
+            session->handle = token->handle;
+        else {
+            /* Create a new session. */
+            CK_FLAGS flags = CKF_SERIAL_SESSION;
+
+            if (readWrite)
+                flags |= CKF_RW_SESSION;
+
+            rv = token->func->C_OpenSession(token->slotId, flags,
+                                            (CK_VOID_PTR)NULL, (CK_NOTIFY)NULL,
+                                            &session->handle);
+            if (rv != CKR_OK)
+                ret = WC_HW_E;
+            if (ret == 0 && token->userPin != NULL) {
+                rv = token->func->C_Login(session->handle, CKU_USER,
+                                              token->userPin, token->userPinSz);
+                if (rv != CKR_OK)
+                    ret = WC_HW_E;
+            }
+        }
+    }
+    if (ret == 0) {
+        session->func = token->func;
+        session->slotId = token->slotId;
+    }
+
+    return ret;
+}
+
+/**
+ * Close a session on a token.
+ * Won't close a session created externally.
+ *
+ * @param  token    [in]  Token object.
+ * @param  session  [in]  Session object.
+ */
+static void Pkcs11CloseSession(Pkcs11Token* token, Pkcs11Session* session)
+{
+    if (token != NULL && session != NULL && token->handle != session->handle) {
+        if (token->userPin != NULL)
+            session->func->C_Logout(session->handle);
+        session->func->C_CloseSession(session->handle);
+    }
+}
+
+/**
+ * Open a session on the token to be used for all operations.
+ *
+ * @param  token      [in]  Token object.
+ * @param  readWrite  [in]  Boolean indicating to open session for Read/Write.
+ * @return  BAD_FUNC_ARG when token is NULL.
+ *          WC_HW_E when opening the session fails.
+ *          0 on success.
+ */
+int wc_Pkcs11Token_Open(Pkcs11Token* token, int readWrite)
+{
+    int ret = 0;
+    Pkcs11Session session;
+
+    if (token == NULL)
+        ret = BAD_FUNC_ARG;
+
+    if (ret == 0) {
+        ret = Pkcs11OpenSession(token, &session, readWrite);
+        token->handle = session.handle;
+    }
+
+    return ret;
+}
+
+/**
+ * Close the token's session.
+ * All object, like keys, will be destroyed.
+ *
+ * @param  token    [in]  Token object.
+ */
+void wc_Pkcs11Token_Close(Pkcs11Token* token)
+{
+    Pkcs11Session session;
+
+    if (token != NULL) {
+        session.func = token->func;
+        session.handle = token->handle;
+        token->handle = NULL_PTR;
+        Pkcs11CloseSession(token, &session);
+    }
+}
+
+
+#if (!defined(NO_AES) && (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || \
+                                                               !defined(NO_HMAC)
+static int Pkcs11CreateSecretKey(CK_OBJECT_HANDLE* key, Pkcs11Session* session,
+                                 CK_KEY_TYPE keyType, unsigned char* data,
+                                 int len, unsigned char* id, int idLen)
+{
+    int              ret = 0;
+    CK_RV            rv;
+    CK_ATTRIBUTE     keyTemplate[] = {
+        { CKA_CLASS,    &secretKeyClass, sizeof(secretKeyClass) },
+        { CKA_KEY_TYPE, &keyType,        sizeof(keyType)        },
+        { CKA_ENCRYPT,  &ckTrue,         sizeof(ckTrue)         },
+        { CKA_VALUE,    NULL,            0                      },
+        { CKA_ID,       id,              (CK_ULONG)idLen        }
+    };
+    int              keyTmplCnt = 4;
+
+    WOLFSSL_MSG("PKCS#11: Create Secret Key");
+
+    /* Set the modulus and public exponent data. */
+    keyTemplate[3].pValue     = data;
+    keyTemplate[3].ulValueLen = (CK_ULONG)len;
+
+    if (idLen > 0)
+        keyTmplCnt++;
+
+    /* Create an object containing key data for device to use. */
+    rv = session->func->C_CreateObject(session->handle, keyTemplate, keyTmplCnt,
+                                                                           key);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+
+    return ret;
+}
+#endif
+
+#ifndef NO_RSA
+/**
+ * Create a PKCS#11 object containing the RSA private key data.
+ *
+ * @param  privateKey [out]  Henadle to private key object.
+ * @param  session    [in]   Session object.
+ * @param  rsaKey     [in]   RSA key with private key data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11CreateRsaPrivateKey(CK_OBJECT_HANDLE* privateKey,
+                                     Pkcs11Session* session,
+                                     RsaKey* rsaKey)
+{
+    int             ret = 0;
+    CK_RV           rv;
+    CK_ATTRIBUTE    keyTemplate[] = {
+        { CKA_CLASS,            &privKeyClass, sizeof(privKeyClass) },
+        { CKA_KEY_TYPE,         &rsaKeyType,   sizeof(rsaKeyType)   },
+        { CKA_DECRYPT,          &ckTrue,       sizeof(ckTrue)       },
+        { CKA_MODULUS,          NULL,          0                    },
+        { CKA_PRIVATE_EXPONENT, NULL,          0                    },
+        { CKA_PRIME_1,          NULL,          0                    },
+        { CKA_PRIME_2,          NULL,          0                    },
+        { CKA_EXPONENT_1,       NULL,          0                    },
+        { CKA_EXPONENT_2,       NULL,          0                    },
+        { CKA_COEFFICIENT,      NULL,          0                    },
+        { CKA_PUBLIC_EXPONENT,  NULL,          0                    }
+    };
+    CK_ULONG        keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+    /* Set the modulus and private key data. */
+    keyTemplate[ 3].pValue     = rsaKey->n.raw.buf;
+    keyTemplate[ 3].ulValueLen = rsaKey->n.raw.len;
+    keyTemplate[ 4].pValue     = rsaKey->d.raw.buf;
+    keyTemplate[ 4].ulValueLen = rsaKey->d.raw.len;
+    keyTemplate[ 5].pValue     = rsaKey->p.raw.buf;
+    keyTemplate[ 5].ulValueLen = rsaKey->p.raw.len;
+    keyTemplate[ 6].pValue     = rsaKey->q.raw.buf;
+    keyTemplate[ 6].ulValueLen = rsaKey->q.raw.len;
+    keyTemplate[ 7].pValue     = rsaKey->dP.raw.buf;
+    keyTemplate[ 7].ulValueLen = rsaKey->dP.raw.len;
+    keyTemplate[ 8].pValue     = rsaKey->dQ.raw.buf;
+    keyTemplate[ 8].ulValueLen = rsaKey->dQ.raw.len;
+    keyTemplate[ 9].pValue     = rsaKey->u.raw.buf;
+    keyTemplate[ 9].ulValueLen = rsaKey->u.raw.len;
+    keyTemplate[10].pValue     = rsaKey->e.raw.buf;
+    keyTemplate[10].ulValueLen = rsaKey->e.raw.len;
+
+    rv = session->func->C_CreateObject(session->handle, keyTemplate, keyTmplCnt,
+                                                                    privateKey);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+
+    return ret;
+}
+#endif
+
+#ifdef HAVE_ECC
+/**
+ * Set the ECC parameters into the template.
+ *
+ * @param  key   [in]  ECC key.
+ * @param  tmpl  [in]  PKCS#11 template.
+ * @param  idx   [in]  Index of template to put parameters into.
+ * @return NOT_COMPILE_IN when the EC parameters are not known.
+ *         0 on success.
+ */
+static int Pkcs11EccSetParams(ecc_key* key, CK_ATTRIBUTE* tmpl, int idx)
+{
+    int ret = 0;
+
+    if (key->dp != NULL && key->dp->oid != NULL) {
+        unsigned char* derParams = tmpl[idx].pValue;
+        /* ASN.1 encoding: OBJ + ecc parameters OID */
+        tmpl[idx].ulValueLen = key->dp->oidSz + 2;
+        derParams[0] = ASN_OBJECT_ID;
+        derParams[1] = key->dp->oidSz;
+        XMEMCPY(derParams + 2, key->dp->oid, key->dp->oidSz);
+    }
+    else
+        ret = NOT_COMPILED_IN;
+
+    return ret;
+}
+
+/**
+ * Create a PKCS#11 object containing the ECC private key data.
+ *
+ * @param  privateKey   [out]  Henadle to private key object.
+ * @param  session      [in]   Session object.
+ * @param  private_key  [in]   ECC private key.
+ * @param  operation    [in]   Cryptographic operation key is to be used for.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11CreateEccPrivateKey(CK_OBJECT_HANDLE* privateKey,
+                                     Pkcs11Session* session,
+                                     ecc_key* private_key,
+                                     CK_ATTRIBUTE_TYPE operation)
+{
+    int             ret = 0;
+    CK_RV           rv;
+    CK_UTF8CHAR     params[MAX_EC_PARAM_LEN];
+    CK_ATTRIBUTE    keyTemplate[] = {
+        { CKA_CLASS,     &privKeyClass, sizeof(privKeyClass) },
+        { CKA_KEY_TYPE,  &ecKeyType,    sizeof(ecKeyType)    },
+        { operation,     &ckTrue,       sizeof(ckTrue)       },
+        { CKA_EC_PARAMS, params,        0                    },
+        { CKA_VALUE,     NULL,          0                    }
+    };
+    CK_ULONG        keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+    ret = Pkcs11EccSetParams(private_key, keyTemplate, 3);
+    if (ret == 0) {
+        keyTemplate[4].pValue     = private_key->k.raw.buf;
+        keyTemplate[4].ulValueLen = private_key->k.raw.len;
+
+        rv = session->func->C_CreateObject(session->handle, keyTemplate,
+                                                        keyTmplCnt, privateKey);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    return ret;
+}
+#endif
+
+#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \
+           (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC)
+/**
+ * Check if mechanism is available in session on token.
+ *
+ * @param  session  [in]  Session object.
+ * @param  mech     [in]  Mechanism to look for.
+ * @return  NOT_COMPILED_IN when mechanism not available.
+ *          0 when mechanism is available.
+ */
+static int Pkcs11MechAvail(Pkcs11Session* session, CK_MECHANISM_TYPE mech)
+{
+    int               ret = 0;
+    CK_RV             rv;
+    CK_MECHANISM_INFO mechInfo;
+
+    rv = session->func->C_GetMechanismInfo(session->slotId, mech, &mechInfo);
+    if (rv != CKR_OK)
+        ret = NOT_COMPILED_IN;
+
+    return ret;
+}
+#endif
+
+#ifndef NO_HMAC
+/**
+ * Return the mechanism type and key type for the digest type when using HMAC.
+ *
+ * @param  macType   [in]  Digest type - e.g. WC_SHA256.
+ * @param  mechType  [in]  Mechanism type - e.g. CKM_SHA256_HMAC.
+ * @param  keyType   [in]  Key type - e.g. CKK_SHA256_HMAC.
+ * @return  NOT_COMPILED_IN if the digest algorithm isn't recognised.
+ *          0 otherwise.
+ */
+static int Pkcs11HmacTypes(int macType, int* mechType, int* keyType)
+{
+    int ret = 0;
+
+    switch (macType)
+    {
+    #ifndef NO_MD5
+        case WC_MD5:
+            *mechType = CKM_MD5_HMAC;
+            *keyType = CKK_MD5_HMAC;
+            break;
+    #endif
+    #ifndef NO_SHA
+        case WC_SHA:
+            *mechType = CKM_SHA_1_HMAC;
+            *keyType = CKK_SHA_1_HMAC;
+            break;
+    #endif
+    #ifdef WOLFSSL_SHA224
+        case WC_SHA224:
+            *mechType = CKM_SHA224_HMAC;
+            *keyType = CKK_SHA224_HMAC;
+            break;
+    #endif
+    #ifndef NO_SHA256
+        case WC_SHA256:
+            *mechType = CKM_SHA256_HMAC;
+            *keyType = CKK_SHA256_HMAC;
+            break;
+    #endif
+    #ifdef WOLFSSL_SHA384
+        case WC_SHA384:
+            *mechType = CKM_SHA384_HMAC;
+            *keyType = CKK_SHA384_HMAC;
+            break;
+    #endif
+    #ifdef WOLFSSL_SHA512
+        case WC_SHA512:
+            *mechType = CKM_SHA512_HMAC;
+            *keyType = CKK_SHA512_HMAC;
+            break;
+    #endif
+        default:
+            ret = NOT_COMPILED_IN;
+            break;
+    }
+
+    return ret;
+}
+#endif
+
+/**
+ * Store the private key on the token in the session.
+ *
+ * @param  token  [in]  Token to store private key on.
+ * @param  type   [in]  Key type.
+ * @param  clear  [in]  Clear out the private data from software key.
+ * @param  key    [in]  Key type specific object.
+ * @return  NOT_COMPILED_IN when mechanism not available.
+ *          0 on success.
+ */
+int wc_Pkcs11StoreKey(Pkcs11Token* token, int type, int clear, void* key)
+{
+    int               ret = 0;
+    Pkcs11Session     session;
+    CK_OBJECT_HANDLE  privKey = NULL_PTR;
+
+    ret = Pkcs11OpenSession(token, &session, 1);
+    if (ret == 0) {
+        switch (type) {
+    #if !defined(NO_AES) && defined(HAVE_AESGCM)
+            case PKCS11_KEY_TYPE_AES_GCM: {
+                Aes* aes = (Aes*)key;
+
+                ret = Pkcs11MechAvail(&session, CKM_AES_GCM);
+                if (ret == 0) {
+                    ret = Pkcs11CreateSecretKey(&privKey, &session, CKK_AES,
+                                                (unsigned char*)aes->devKey,
+                                                aes->keylen,
+                                                (unsigned char*)aes->id,
+                                                aes->idLen);
+                }
+                if (ret == 0 && clear)
+                    ForceZero(aes->devKey, aes->keylen);
+                break;
+            }
+    #endif
+    #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+            case PKCS11_KEY_TYPE_AES_CBC: {
+                Aes* aes = (Aes*)key;
+
+                ret = Pkcs11MechAvail(&session, CKM_AES_CBC);
+                if (ret == 0) {
+                    ret = Pkcs11CreateSecretKey(&privKey, &session, CKK_AES,
+                                                (unsigned char*)aes->devKey,
+                                                aes->keylen,
+                                                (unsigned char*)aes->id,
+                                                aes->idLen);
+                }
+                if (ret == 0 && clear)
+                    ForceZero(aes->devKey, aes->keylen);
+                break;
+            }
+    #endif
+    #ifndef NO_HMAC
+            case PKCS11_KEY_TYPE_HMAC: {
+                Hmac* hmac = (Hmac*)key;
+                int mechType;
+                int keyType;
+
+                ret = Pkcs11HmacTypes(hmac->macType, &mechType, &keyType);
+                if (ret == NOT_COMPILED_IN)
+                    break;
+
+                if (ret == 0)
+                    ret = Pkcs11MechAvail(&session, mechType);
+                if (ret == 0) {
+                    ret = Pkcs11CreateSecretKey(&privKey, &session, keyType,
+                                                (unsigned char*)hmac->keyRaw,
+                                                hmac->keyLen,
+                                                (unsigned char*)hmac->id,
+                                                hmac->idLen);
+                    if (ret == WC_HW_E) {
+                        ret = Pkcs11CreateSecretKey(&privKey, &session,
+                                                   CKK_GENERIC_SECRET,
+                                                   (unsigned char*)hmac->keyRaw,
+                                                   hmac->keyLen,
+                                                   (unsigned char*)hmac->id,
+                                                   hmac->idLen);
+                    }
+                }
+                break;
+            }
+    #endif
+    #ifndef NO_RSA
+            case PKCS11_KEY_TYPE_RSA: {
+                RsaKey* rsaKey = (RsaKey*)key;
+
+                ret = Pkcs11MechAvail(&session, CKM_RSA_X_509);
+                if (ret == 0)
+                    ret = Pkcs11CreateRsaPrivateKey(&privKey, &session, rsaKey);
+                if (ret == 0 && clear) {
+                    mp_forcezero(&rsaKey->u);
+                    mp_forcezero(&rsaKey->dQ);
+                    mp_forcezero(&rsaKey->dP);
+                    mp_forcezero(&rsaKey->q);
+                    mp_forcezero(&rsaKey->p);
+                    mp_forcezero(&rsaKey->d);
+                }
+                break;
+            }
+    #endif
+    #ifdef HAVE_ECC
+            case PKCS11_KEY_TYPE_EC: {
+                ecc_key* eccKey = (ecc_key*)key;
+                int      ret2 = NOT_COMPILED_IN;
+
+        #ifndef NO_PKCS11_ECDH
+                /* Try ECDH mechanism first. */
+                ret = Pkcs11MechAvail(&session, CKM_ECDH1_DERIVE);
+                if (ret == 0) {
+                    ret = Pkcs11CreateEccPrivateKey(&privKey, &session, eccKey,
+                                                                    CKA_DERIVE);
+                }
+         #endif
+                if (ret == 0 || ret == NOT_COMPILED_IN) {
+                    /* Try ECDSA mechanism next. */
+                    ret2 = Pkcs11MechAvail(&session, CKM_ECDSA);
+                    if (ret2 == 0) {
+                        ret2 = Pkcs11CreateEccPrivateKey(&privKey, &session,
+                                                              eccKey, CKA_SIGN);
+                    }
+                    /* OK for this to fail if set for ECDH. */
+                    if (ret == NOT_COMPILED_IN)
+                        ret = ret2;
+                }
+                if (ret == 0 && clear)
+                    mp_forcezero(&eccKey->k);
+                break;
+            }
+    #endif
+            default:
+                ret = NOT_COMPILED_IN;
+                break;
+        }
+
+        Pkcs11CloseSession(token, &session);
+    }
+
+    (void)privKey;
+    (void)clear;
+    (void)key;
+
+    return ret;
+}
+
+#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \
+           (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC)
+/**
+ * Find the PKCS#11 object containing the RSA public or private key data with
+ * the modulus specified.
+ *
+ * @param  key       [out]  Henadle to key object.
+ * @param  keyClass  [in]   Public or private key class.
+ * @param  keyType   [in]   Type of key.
+ * @param  session   [in]   Session object.
+ * @param  id        [in]   Identifier set against a key.
+ * @param  idLen     [in]   Length of identifier.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11FindKeyById(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass,
+                             CK_KEY_TYPE keyType, Pkcs11Session* session,
+                             byte* id, int idLen)
+{
+    int             ret = 0;
+    CK_RV           rv;
+    CK_ULONG        count;
+    CK_ATTRIBUTE    keyTemplate[] = {
+        { CKA_CLASS,           &keyClass, sizeof(keyClass) },
+        { CKA_KEY_TYPE,        &keyType,  sizeof(keyType)  },
+        { CKA_ID,              id,        (CK_ULONG)idLen  }
+    };
+    CK_ULONG        keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+    WOLFSSL_MSG("PKCS#11: Find Key By Id");
+
+    rv = session->func->C_FindObjectsInit(session->handle, keyTemplate,
+                                                                    keyTmplCnt);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+    if (ret == 0) {
+        rv = session->func->C_FindObjects(session->handle, key, 1, &count);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+        rv = session->func->C_FindObjectsFinal(session->handle);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0 && count == 0)
+        ret = WC_HW_E;
+
+    return ret;
+}
+#endif
+
+#ifndef NO_RSA
+/**
+ * Find the PKCS#11 object containing the RSA public or private key data with
+ * the modulus specified.
+ *
+ * @param  key       [out]  Henadle to key object.
+ * @param  keyClass  [in]   Public or private key class.
+ * @param  session   [in]   Session object.
+ * @param  rsaKey    [in]   RSA key with modulus to search on.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11FindRsaKey(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass,
+                            Pkcs11Session* session, RsaKey* rsaKey)
+{
+    int             ret = 0;
+    CK_RV           rv;
+    CK_ULONG        count;
+    CK_ATTRIBUTE    keyTemplate[] = {
+        { CKA_CLASS,           &keyClass,   sizeof(keyClass)   },
+        { CKA_KEY_TYPE,        &rsaKeyType, sizeof(rsaKeyType) },
+        { CKA_MODULUS,         NULL,        0                  },
+    };
+    CK_ULONG        keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+    /* Set the modulus. */
+    keyTemplate[2].pValue     = rsaKey->n.raw.buf;
+    keyTemplate[2].ulValueLen = rsaKey->n.raw.len;
+
+    rv = session->func->C_FindObjectsInit(session->handle, keyTemplate,
+                                                                    keyTmplCnt);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+    if (ret == 0) {
+        rv = session->func->C_FindObjects(session->handle, key, 1, &count);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+        rv = session->func->C_FindObjectsFinal(session->handle);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    return ret;
+}
+
+/**
+ * Exponentiate the input with the public part of the RSA key.
+ * Used in public encrypt and decrypt.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11RsaPublic(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int              ret = 0;
+    CK_RV            rv;
+    CK_MECHANISM     mech;
+    CK_ULONG         outLen;
+    CK_OBJECT_HANDLE publicKey = NULL_PTR;
+    int              sessionKey = 0;
+    CK_ATTRIBUTE     keyTemplate[] = {
+        { CKA_CLASS,           &pubKeyClass, sizeof(pubKeyClass) },
+        { CKA_KEY_TYPE,        &rsaKeyType,  sizeof(rsaKeyType)  },
+        { CKA_ENCRYPT,         &ckTrue,      sizeof(ckTrue)      },
+        { CKA_MODULUS,         NULL,         0                   },
+        { CKA_PUBLIC_EXPONENT, NULL,         0                   }
+    };
+    CK_ULONG        keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+    WOLFSSL_MSG("PKCS#11: RSA Public Key Operation");
+
+    if (info->pk.rsa.outLen == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        if ((sessionKey = !mp_iszero(&info->pk.rsa.key->e))) {
+            /* Set the modulus and public exponent data. */
+            keyTemplate[3].pValue     = info->pk.rsa.key->n.raw.buf;
+            keyTemplate[3].ulValueLen = info->pk.rsa.key->n.raw.len;
+            keyTemplate[4].pValue     = info->pk.rsa.key->e.raw.buf;
+            keyTemplate[4].ulValueLen = info->pk.rsa.key->e.raw.len;
+
+            /* Create an object containing public key data for device to use. */
+            rv = session->func->C_CreateObject(session->handle, keyTemplate,
+                                                        keyTmplCnt, &publicKey);
+            if (rv != CKR_OK)
+                ret = WC_HW_E;
+        }
+        else {
+            ret = Pkcs11FindKeyById(&publicKey, CKO_PUBLIC_KEY, CKK_RSA,
+                                    session, info->pk.rsa.key->id,
+                                    info->pk.rsa.key->idLen);
+        }
+    }
+
+    if (ret == 0) {
+        /* Raw RSA encrypt/decrypt operation. */
+        mech.mechanism      = CKM_RSA_X_509;
+        mech.ulParameterLen = 0;
+        mech.pParameter     = NULL;
+
+        rv = session->func->C_EncryptInit(session->handle, &mech, publicKey);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = (CK_ULONG)*info->pk.rsa.outLen;
+        rv = session->func->C_Encrypt(session->handle,
+                (CK_BYTE_PTR)info->pk.rsa.in, info->pk.rsa.inLen,
+                info->pk.rsa.out, &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0)
+        *info->pk.rsa.outLen = (word32)outLen;
+
+    if (sessionKey)
+        session->func->C_DestroyObject(session->handle, publicKey);
+
+    return ret;
+}
+
+/**
+ * Exponentiate the input with the private part of the RSA key.
+ * Used in private encrypt and decrypt.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11RsaPrivate(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int              ret = 0;
+    CK_RV            rv;
+    CK_MECHANISM     mech;
+    CK_ULONG         outLen;
+    CK_OBJECT_HANDLE privateKey = NULL_PTR;
+    int              sessionKey = 0;
+
+    WOLFSSL_MSG("PKCS#11: RSA Private Key Operation");
+
+    if (info->pk.rsa.outLen == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        if ((sessionKey = !mp_iszero(&info->pk.rsa.key->d))) {
+            ret = Pkcs11CreateRsaPrivateKey(&privateKey, session,
+                                                              info->pk.rsa.key);
+        }
+        else if (info->pk.rsa.key->idLen > 0) {
+            ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_RSA,
+                                    session, info->pk.rsa.key->id,
+                                    info->pk.rsa.key->idLen);
+        }
+        else {
+            ret = Pkcs11FindRsaKey(&privateKey, CKO_PRIVATE_KEY, session,
+                                                              info->pk.rsa.key);
+        }
+    }
+
+    if (ret == 0) {
+        /* Raw RSA encrypt/decrypt operation. */
+        mech.mechanism      = CKM_RSA_X_509;
+        mech.ulParameterLen = 0;
+        mech.pParameter     = NULL;
+
+        rv = session->func->C_DecryptInit(session->handle, &mech, privateKey);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = (CK_ULONG)*info->pk.rsa.outLen;
+        rv = session->func->C_Decrypt(session->handle,
+                (CK_BYTE_PTR)info->pk.rsa.in, info->pk.rsa.inLen,
+                info->pk.rsa.out, &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0)
+        *info->pk.rsa.outLen = (word32)outLen;
+
+    if (sessionKey)
+        session->func->C_DestroyObject(session->handle, privateKey);
+
+    return ret;
+}
+
+/**
+ * Perform an RSA operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11Rsa(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int               ret = 0;
+    CK_RV             rv;
+    CK_MECHANISM_INFO mechInfo;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_RSA_X_509,
+                                                                     &mechInfo);
+    if (rv != CKR_OK)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0) {
+        if (info->pk.rsa.type == RSA_PUBLIC_ENCRYPT ||
+                                      info->pk.rsa.type == RSA_PUBLIC_DECRYPT) {
+            if ((mechInfo.flags & CKF_ENCRYPT) == 0)
+                ret = NOT_COMPILED_IN;
+            else
+                ret = Pkcs11RsaPublic(session, info);
+        }
+        else if (info->pk.rsa.type == RSA_PRIVATE_ENCRYPT ||
+                                     info->pk.rsa.type == RSA_PRIVATE_DECRYPT) {
+            if ((mechInfo.flags & CKF_DECRYPT) == 0)
+                ret = NOT_COMPILED_IN;
+            else
+                ret = Pkcs11RsaPrivate(session, info);
+        }
+        else
+            ret = NOT_COMPILED_IN;
+    }
+
+    return ret;
+}
+
+#ifdef WOLFSSL_KEY_GEN
+/**
+ * Get the RSA public key data from the PKCS#11 object.
+ *
+ * @param  key      [in]  RSA key to put the data into.
+ * @param  session  [in]  Session object.
+ * @param  pubkey   [in]  Public key object.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11GetRsaPublicKey(RsaKey* key, Pkcs11Session* session,
+                                 CK_OBJECT_HANDLE pubKey)
+{
+    int            ret = 0;
+    unsigned char* mod = NULL;
+    unsigned char* exp = NULL;
+    int            modSz, expSz;
+    CK_ATTRIBUTE   tmpl[] = {
+        { CKA_MODULUS,         NULL_PTR, 0 },
+        { CKA_PUBLIC_EXPONENT, NULL_PTR, 0 }
+    };
+    CK_ULONG       tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+    CK_RV rv;
+
+    rv = session->func->C_GetAttributeValue(session->handle, pubKey, tmpl,
+                                                                       tmplCnt);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+
+    if (ret == 0) {
+        modSz = tmpl[0].ulValueLen;
+        expSz = tmpl[1].ulValueLen;
+        mod = (unsigned char*)XMALLOC(modSz, key->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+        if (mod == NULL)
+            ret = MEMORY_E;
+    }
+    if (ret == 0) {
+        exp = (unsigned char*)XMALLOC(expSz, key->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+        if (exp == NULL)
+            ret = MEMORY_E;
+    }
+    if (ret == 0) {
+        tmpl[0].pValue = mod;
+        tmpl[1].pValue = exp;
+
+        rv = session->func->C_GetAttributeValue(session->handle, pubKey,
+                                                                 tmpl, tmplCnt);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0)
+        ret = wc_RsaPublicKeyDecodeRaw(mod, modSz, exp, expSz, key);
+
+    if (exp != NULL)
+        XFREE(exp, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (mod != NULL)
+        XFREE(mod, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret;
+}
+
+/**
+ * Perform an RSA key generation operation.
+ * The private key data stays on the device.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11RsaKeyGen(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int               ret = 0;
+    RsaKey*           key = info->pk.rsakg.key;
+    CK_RV             rv;
+    CK_ULONG          bits = info->pk.rsakg.size;
+    CK_OBJECT_HANDLE  pubKey = NULL_PTR, privKey = NULL_PTR;
+    CK_MECHANISM      mech;
+    static CK_BYTE    pub_exp[] = { 0x01, 0x00, 0x01, 0x00 };
+    CK_ATTRIBUTE      pubKeyTmpl[] = {
+        { CKA_MODULUS_BITS,    &bits,    sizeof(bits)    },
+        { CKA_ENCRYPT,         &ckTrue,  sizeof(ckTrue)  },
+        { CKA_VERIFY,          &ckTrue,  sizeof(ckTrue)  },
+        { CKA_PUBLIC_EXPONENT, &pub_exp, sizeof(pub_exp) }
+    };
+    CK_ULONG          pubTmplCnt = sizeof(pubKeyTmpl)/sizeof(*pubKeyTmpl);
+    CK_ATTRIBUTE      privKeyTmpl[] = {
+        {CKA_DECRYPT,  &ckTrue, sizeof(ckTrue) },
+        {CKA_SIGN,     &ckTrue, sizeof(ckTrue) },
+        {CKA_ID,       NULL,    0              }
+    };
+    int               privTmplCnt = 2;
+    int               i;
+
+    ret = Pkcs11MechAvail(session, CKM_RSA_PKCS_KEY_PAIR_GEN);
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: RSA Key Generation Operation");
+
+        /* Most commonly used public exponent value (array initialized). */
+        if (info->pk.rsakg.e != WC_RSA_EXPONENT) {
+            for (i = 0; i < (int)sizeof(pub_exp); i++)
+                pub_exp[i] = (info->pk.rsakg.e >> (8 * i)) & 0xff;
+        }
+        for (i = (int)sizeof(pub_exp) - 1; pub_exp[i] == 0; i--) {
+        }
+        pubKeyTmpl[3].ulValueLen = i + 1;
+
+        if (key->idLen != 0) {
+            privKeyTmpl[privTmplCnt].pValue     = key->id;
+            privKeyTmpl[privTmplCnt].ulValueLen = key->idLen;
+            privTmplCnt++;
+        }
+
+        mech.mechanism      = CKM_RSA_PKCS_KEY_PAIR_GEN;
+        mech.ulParameterLen = 0;
+        mech.pParameter     = NULL;
+
+        rv = session->func->C_GenerateKeyPair(session->handle, &mech,
+                                                       pubKeyTmpl, pubTmplCnt,
+                                                       privKeyTmpl, privTmplCnt,
+                                                       &pubKey, &privKey);
+        if (rv != CKR_OK)
+            ret = -1;
+    }
+
+    if (ret == 0)
+        ret = Pkcs11GetRsaPublicKey(key, session, pubKey);
+
+    if (pubKey != NULL_PTR)
+        ret = session->func->C_DestroyObject(session->handle, pubKey);
+    if (ret != 0 && privKey != NULL_PTR)
+        ret = session->func->C_DestroyObject(session->handle, privKey);
+
+    return ret;
+}
+#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_RSA */
+
+#ifdef HAVE_ECC
+/**
+ * Find the PKCS#11 object containing the ECC public or private key data with
+ * the modulus specified.
+ *
+ * @param  key       [out]  Henadle to key object.
+ * @param  keyClass  [in]   Public or private key class.
+ * @param  session   [in]   Session object.
+ * @param  eccKey    [in]   ECC key with parameters.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11FindEccKey(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass,
+                            Pkcs11Session* session, ecc_key* eccKey)
+{
+    int             ret = 0;
+    int             i;
+    unsigned char*  ecPoint = NULL;
+    word32          len = 0;
+    CK_RV           rv;
+    CK_ULONG        count;
+    CK_UTF8CHAR     params[MAX_EC_PARAM_LEN];
+    CK_ATTRIBUTE    keyTemplate[] = {
+        { CKA_CLASS,           &keyClass,  sizeof(keyClass)  },
+        { CKA_KEY_TYPE,        &ecKeyType, sizeof(ecKeyType) },
+        { CKA_EC_PARAMS,       params,     0                 },
+        { CKA_EC_POINT,        NULL,       0                 },
+    };
+    CK_ULONG        attrCnt = 3;
+
+    ret = Pkcs11EccSetParams(eccKey, keyTemplate, 2);
+    if (ret == 0 && keyClass == CKO_PUBLIC_KEY) {
+        /* ASN1 encoded: OCT + uncompressed point */
+        len = 3 + 1 + 2 * eccKey->dp->size;
+        ecPoint = (unsigned char*)XMALLOC(len, eccKey->heap, DYNAMIC_TYPE_ECC);
+        if (ecPoint == NULL)
+            ret = MEMORY_E;
+    }
+    if (ret == 0 && keyClass == CKO_PUBLIC_KEY) {
+        len -= 3;
+        i = 0;
+        ecPoint[i++] = ASN_OCTET_STRING;
+        if (len >= ASN_LONG_LENGTH)
+            ecPoint[i++] = (ASN_LONG_LENGTH | 1);
+        ecPoint[i++] = len;
+        ret = wc_ecc_export_x963(eccKey, ecPoint + i, &len);
+    }
+    if (ret == 0 && keyClass == CKO_PUBLIC_KEY) {
+        keyTemplate[3].pValue     = ecPoint;
+        keyTemplate[3].ulValueLen = len + i;
+        attrCnt++;
+    }
+    if (ret == 0) {
+        rv = session->func->C_FindObjectsInit(session->handle, keyTemplate,
+                                                                       attrCnt);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        rv = session->func->C_FindObjects(session->handle, key, 1, &count);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+        rv = session->func->C_FindObjectsFinal(session->handle);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ecPoint != NULL)
+        XFREE(ecPoint, eccKey->heap, DYNAMIC_TYPE_ECC);
+
+    return ret;
+}
+
+/**
+ * Create a PKCS#11 object containing the ECC public key data.
+ * Encode the public key as an OCTET_STRING of the encoded point.
+ *
+ * @param  publicKey    [out]  Henadle to public key object.
+ * @param  session      [in]   Session object.
+ * @param  public_key   [in]   ECC public key.
+ * @param  operation    [in]   Cryptographic operation key is to be used for.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11CreateEccPublicKey(CK_OBJECT_HANDLE* publicKey,
+                                    Pkcs11Session* session,
+                                    ecc_key* public_key,
+                                    CK_ATTRIBUTE_TYPE operation)
+{
+    int             ret = 0;
+    int             i;
+    unsigned char*  ecPoint = NULL;
+    word32          len;
+    CK_RV           rv;
+    CK_UTF8CHAR     params[MAX_EC_PARAM_LEN];
+    CK_ATTRIBUTE    keyTemplate[] = {
+        { CKA_CLASS,     &pubKeyClass, sizeof(pubKeyClass) },
+        { CKA_KEY_TYPE,  &ecKeyType,   sizeof(ecKeyType)   },
+        { operation,     &ckTrue,      sizeof(ckTrue)      },
+        { CKA_EC_PARAMS, params,       0                   },
+        { CKA_EC_POINT,  NULL,         0                   }
+    };
+    CK_ULONG        keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+    ret = Pkcs11EccSetParams(public_key, keyTemplate, 3);
+    if (ret == 0) {
+        /* ASN1 encoded: OCT + uncompressed point */
+        len = 3 + 1 + 2 * public_key->dp->size;
+        ecPoint = (unsigned char*)XMALLOC(len, public_key->heap,
+                                                              DYNAMIC_TYPE_ECC);
+        if (ecPoint == NULL)
+            ret = MEMORY_E;
+    }
+    if (ret == 0) {
+        len -= 3;
+        i = 0;
+        ecPoint[i++] = ASN_OCTET_STRING;
+        if (len >= ASN_LONG_LENGTH)
+            ecPoint[i++] = ASN_LONG_LENGTH | 1;
+        ecPoint[i++] = len;
+        ret = wc_ecc_export_x963(public_key, ecPoint + i, &len);
+    }
+    if (ret == 0) {
+        keyTemplate[4].pValue     = ecPoint;
+        keyTemplate[4].ulValueLen = len + i;
+
+        rv = session->func->C_CreateObject(session->handle, keyTemplate,
+                                                         keyTmplCnt, publicKey);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ecPoint != NULL)
+        XFREE(ecPoint, public_key->heap, DYNAMIC_TYPE_ECC);
+
+    return ret;
+}
+
+#ifndef NO_PKCS11_EC_KEYGEN
+/**
+ * Gets the public key data from the PKCS#11 object and puts into the ECC key.
+ *
+ * @param  key      [in]  ECC public key.
+ * @param  session  [in]  Session object.
+ * @param  pubKey   [in]  ECC public key PKCS#11 object.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11GetEccPublicKey(ecc_key* key, Pkcs11Session* session,
+                                 CK_OBJECT_HANDLE pubKey)
+{
+    int            ret = 0;
+    word32         i = 0;
+    int            curveIdx;
+    unsigned char* point = NULL;
+    int            pointSz;
+    byte           tag;
+    CK_RV          rv;
+    CK_ATTRIBUTE   tmpl[] = {
+        { CKA_EC_POINT,  NULL_PTR, 0 },
+    };
+    CK_ULONG       tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+
+    rv = session->func->C_GetAttributeValue(session->handle, pubKey, tmpl,
+                                                                       tmplCnt);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+
+    if (ret == 0) {
+        pointSz = (int)tmpl[0].ulValueLen;
+        point = (unsigned char*)XMALLOC(pointSz, key->heap, DYNAMIC_TYPE_ECC);
+        if (point == NULL)
+            ret = MEMORY_E;
+    }
+    if (ret == 0) {
+        tmpl[0].pValue = point;
+
+        rv = session->func->C_GetAttributeValue(session->handle, pubKey,
+                                                                 tmpl, tmplCnt);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    /* Make sure the data is big enough for ASN.1: OCT + uncompressed point */
+    if (ret == 0 && pointSz < key->dp->size * 2 + 1 + 2)
+        ret = ASN_PARSE_E;
+    /* Step over the OCTET_STRING wrapper. */
+    if (ret == 0 && GetASNTag(point, &i, &tag, pointSz) != 0)
+        ret = ASN_PARSE_E;
+    if (ret == 0 && tag != ASN_OCTET_STRING)
+        ret = ASN_PARSE_E;
+    if (ret == 0 && point[i] >= ASN_LONG_LENGTH) {
+        if (point[i++] != (ASN_LONG_LENGTH | 1))
+            ret = ASN_PARSE_E;
+        else if (pointSz < key->dp->size * 2 + 1 + 3)
+            ret = ASN_PARSE_E;
+    }
+    if (ret == 0 && point[i++] != key->dp->size * 2 + 1)
+        ret = ASN_PARSE_E;
+
+    if (ret == 0) {
+        curveIdx = wc_ecc_get_curve_idx(key->dp->id);
+        ret = wc_ecc_import_point_der(point + i, pointSz - i, curveIdx,
+                                                                  &key->pubkey);
+    }
+
+    if (point != NULL)
+        XFREE(point, key->heap, DYNAMIC_TYPE_ECC);
+
+    return ret;
+}
+
+/**
+ * Perform an ECC key generation operation.
+ * The private key data stays on the device.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11EcKeyGen(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int               ret = 0;
+    ecc_key*          key = info->pk.eckg.key;
+    CK_RV             rv;
+    CK_OBJECT_HANDLE  pubKey = NULL_PTR, privKey = NULL_PTR;
+    CK_MECHANISM      mech;
+    CK_UTF8CHAR       params[MAX_EC_PARAM_LEN];
+    CK_ATTRIBUTE      pubKeyTmpl[] = {
+        { CKA_EC_PARAMS,       params,   0               },
+        { CKA_ENCRYPT,         &ckTrue,  sizeof(ckTrue)  },
+        { CKA_VERIFY,          &ckTrue,  sizeof(ckTrue)  },
+    };
+    int               pubTmplCnt = sizeof(pubKeyTmpl)/sizeof(*pubKeyTmpl);
+    CK_ATTRIBUTE      privKeyTmpl[] = {
+        { CKA_DECRYPT,  &ckTrue, sizeof(ckTrue) },
+        { CKA_SIGN,     &ckTrue, sizeof(ckTrue) },
+        { CKA_DERIVE,   &ckTrue, sizeof(ckTrue) },
+        { CKA_ID,       NULL,    0              },
+    };
+    int               privTmplCnt = 3;
+
+    ret = Pkcs11MechAvail(session, CKM_EC_KEY_PAIR_GEN);
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: EC Key Generation Operation");
+
+        ret = Pkcs11EccSetParams(key, pubKeyTmpl, 0);
+    }
+    if (ret == 0) {
+        if (key->idLen != 0) {
+            privKeyTmpl[privTmplCnt].pValue     = key->id;
+            privKeyTmpl[privTmplCnt].ulValueLen = key->idLen;
+            privTmplCnt++;
+        }
+
+        mech.mechanism      = CKM_EC_KEY_PAIR_GEN;
+        mech.ulParameterLen = 0;
+        mech.pParameter     = NULL;
+
+        rv = session->func->C_GenerateKeyPair(session->handle, &mech,
+                                                       pubKeyTmpl, pubTmplCnt,
+                                                       privKeyTmpl, privTmplCnt,
+                                                       &pubKey, &privKey);
+        if (rv != CKR_OK)
+            ret = -1;
+    }
+
+    if (ret == 0)
+        ret = Pkcs11GetEccPublicKey(key, session, pubKey);
+
+    if (pubKey != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, pubKey);
+    if (ret != 0 && privKey != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, privKey);
+
+    return ret;
+}
+#endif
+
+#ifndef NO_PKCS11_ECDH
+/**
+ * Extracts the secret key data from the PKCS#11 object.
+ *
+ * @param  session  [in]      Session object.
+ * @param  secret   [in]      PKCS#11 object with the secret key data.
+ * @param  out      [in]      Buffer to hold secret data.
+ * @param  outLen   [in,out]  On in, length of buffer.
+ *                            On out, the length of data in buffer.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11ExtractSecret(Pkcs11Session* session, CK_OBJECT_HANDLE secret,
+    byte* out, word32* outLen)
+{
+    int ret = 0;
+    CK_ATTRIBUTE tmpl[] = {
+      {CKA_VALUE, NULL_PTR, 0}
+    };
+    CK_ULONG     tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+    CK_RV rv;
+
+    rv = session->func->C_GetAttributeValue(session->handle, secret, tmpl,
+                                                                       tmplCnt);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+    if (ret == 0) {
+        if (tmpl[0].ulValueLen > *outLen)
+            ret = BUFFER_E;
+    }
+    if (ret == 0) {
+        tmpl[0].pValue = out;
+        rv = session->func->C_GetAttributeValue(session->handle, secret,
+                                                                 tmpl, tmplCnt);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+        *outLen = (word32)tmpl[0].ulValueLen;
+    }
+
+    return ret;
+}
+
+/**
+ * Performs the ECDH secret generation operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11ECDH(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                    ret = 0;
+    int                    sessionKey = 0;
+    unsigned char*         point = NULL;
+    word32                 pointLen;
+    CK_RV                  rv;
+    CK_KEY_TYPE            keyType = CKK_GENERIC_SECRET;
+    CK_MECHANISM           mech;
+    CK_ECDH1_DERIVE_PARAMS params;
+    CK_OBJECT_HANDLE       privateKey = NULL_PTR;
+    CK_OBJECT_HANDLE       secret = CK_INVALID_HANDLE;
+    CK_ULONG               secSz;
+    CK_ATTRIBUTE           tmpl[] = {
+        { CKA_CLASS,       &secretKeyClass, sizeof(secretKeyClass) },
+        { CKA_KEY_TYPE,    &keyType,        sizeof(keyType)        },
+        { CKA_PRIVATE,     &ckFalse,        sizeof(ckFalse)        },
+        { CKA_SENSITIVE,   &ckFalse,        sizeof(ckFalse)        },
+        { CKA_EXTRACTABLE, &ckTrue,         sizeof(ckTrue)         },
+        { CKA_VALUE_LEN,   &secSz,          sizeof(secSz)          }
+    };
+    CK_ULONG               tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+
+    ret = Pkcs11MechAvail(session, CKM_ECDH1_DERIVE);
+    if (ret == 0 && info->pk.ecdh.outlen == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: EC Key Derivation Operation");
+
+
+        if ((sessionKey = !mp_iszero(&info->pk.ecdh.private_key->k)))
+            ret = Pkcs11CreateEccPrivateKey(&privateKey, session,
+                                         info->pk.ecdh.private_key, CKA_DERIVE);
+        else if (info->pk.ecdh.private_key->idLen > 0) {
+            ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_EC,
+                                    session, info->pk.ecdh.private_key->id,
+                                    info->pk.ecdh.private_key->idLen);
+        }
+        else {
+            ret = Pkcs11FindEccKey(&privateKey, CKO_PRIVATE_KEY, session,
+                                                      info->pk.ecdh.public_key);
+        }
+    }
+    if (ret == 0) {
+        ret = wc_ecc_export_x963(info->pk.ecdh.public_key, NULL, &pointLen);
+        if (ret == LENGTH_ONLY_E) {
+            point = (unsigned char*)XMALLOC(pointLen,
+                                                 info->pk.ecdh.public_key->heap,
+                                                       DYNAMIC_TYPE_ECC_BUFFER);
+            ret = wc_ecc_export_x963(info->pk.ecdh.public_key, point,
+                                                                     &pointLen);
+        }
+    }
+
+    if (ret == 0) {
+        secSz = *info->pk.ecdh.outlen;
+        if (secSz > (CK_ULONG)info->pk.ecdh.private_key->dp->size)
+            secSz = info->pk.ecdh.private_key->dp->size;
+
+        params.kdf             = CKD_NULL;
+        params.pSharedData     = NULL;
+        params.ulSharedDataLen = 0;
+        params.pPublicData     = point;
+        params.ulPublicDataLen = pointLen;
+
+        mech.mechanism      = CKM_ECDH1_DERIVE;
+        mech.ulParameterLen = sizeof(params);
+        mech.pParameter     = &params;
+
+        rv = session->func->C_DeriveKey(session->handle, &mech, privateKey,
+                                                        tmpl, tmplCnt, &secret);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ret == 0) {
+        ret = Pkcs11ExtractSecret(session, secret, info->pk.ecdh.out,
+                                                          info->pk.ecdh.outlen);
+    }
+
+    if (sessionKey)
+        session->func->C_DestroyObject(session->handle, privateKey);
+
+    if (point != NULL)
+        XFREE(point, info->pk.ecdh.public_key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+
+    return ret;
+}
+#endif
+
+/**
+ * Encode, in place, the ECDSA signature.
+ * Two fixed width values into ASN.1 DER encoded SEQ { INT, INT }
+ *
+ * @param  sig  [in,out]  Signature data.
+ * @param  sz   [in]      Size of original signature data.
+ * @return  Length of the ASN.1 DER enencoded signature.
+ */
+static word32 Pkcs11ECDSASig_Encode(byte* sig, word32 sz)
+{
+    word32 rHigh, sHigh, seqLen;
+    word32 rStart = 0, sStart = 0;
+    word32 sigSz, rSz, rLen, sSz, sLen;
+    word32 i;
+
+    /* Find first byte of data in r and s. */
+    while (rStart < sz - 1 && sig[rStart] == 0x00)
+        rStart++;
+    while (sStart < sz - 1 && sig[sz + sStart] == 0x00)
+        sStart++;
+    /* Check if 0 needs to be prepended to make integer a positive number. */
+    rHigh = sig[rStart] >> 7;
+    sHigh = sig[sz + sStart] >> 7;
+    /* Calculate length of integer to put into ASN.1 encoding. */
+    rLen = sz - rStart;
+    sLen = sz - sStart;
+    /* r and s: INT (2 bytes) + [ 0x00 ] + integer */
+    rSz = 2 + rHigh + rLen;
+    sSz = 2 + sHigh + sLen;
+    /* Calculate the complete ASN.1 DER encoded size. */
+    sigSz = rSz + sSz;
+    if (sigSz >= ASN_LONG_LENGTH)
+        seqLen = 3;
+    else
+        seqLen = 2;
+
+    /* Move s and then r integers into their final places. */
+    XMEMMOVE(sig + seqLen + rSz + (sSz - sLen), sig + sz + sStart, sLen);
+    XMEMMOVE(sig + seqLen       + (rSz - rLen), sig      + rStart, rLen);
+
+    /* Put the ASN.1 DER encoding around data. */
+    i = 0;
+    sig[i++] = ASN_CONSTRUCTED | ASN_SEQUENCE;
+    if (seqLen == 3)
+        sig[i++] = ASN_LONG_LENGTH | 0x01;
+    sig[i++] = sigSz;
+    sig[i++] = ASN_INTEGER;
+    sig[i++] = rHigh + (sz - rStart);
+    if (rHigh)
+        sig[i++] = 0x00;
+    i += sz - rStart;
+    sig[i++] = ASN_INTEGER;
+    sig[i++] = sHigh + (sz - sStart);
+    if (sHigh)
+        sig[i] = 0x00;
+
+    return seqLen + sigSz;
+}
+
+/**
+ * Decode the ECDSA signature.
+ * ASN.1 DER encode SEQ { INT, INT } converted to two fixed with values.
+ *
+ * @param  in    [in]  ASN.1 DER encoded signature.
+ * @param  inSz  [in]  Size of ASN.1 signature.
+ * @param  sig   [in]  Output buffer.
+ * @param  sz    [in]  Size of output buffer.
+ * @return  ASN_PARSE_E when the ASN.1 encoding is invalid.
+ *          0 on success.
+ */
+static int Pkcs11ECDSASig_Decode(const byte* in, word32 inSz, byte* sig,
+                                 word32 sz)
+{
+    int ret = 0;
+    word32 i = 0;
+    byte   tag;
+    int len, seqLen = 2;
+
+    /* Make sure zeros in place when decoding short integers. */
+    XMEMSET(sig, 0, sz * 2);
+
+    /* Check min data for: SEQ + INT. */
+    if (inSz < 5)
+        ret = ASN_PARSE_E;
+    /* Check SEQ */
+    if (ret == 0 && in[i++] != (ASN_CONSTRUCTED | ASN_SEQUENCE))
+        ret = ASN_PARSE_E;
+    if (ret == 0 && in[i] >= ASN_LONG_LENGTH) {
+        if (in[i] != (ASN_LONG_LENGTH | 0x01))
+            ret = ASN_PARSE_E;
+        else {
+            i++;
+            seqLen++;
+        }
+    }
+    if (ret == 0 && in[i++] != inSz - seqLen)
+        ret = ASN_PARSE_E;
+
+    /* Check INT */
+    if (ret == 0 && GetASNTag(in, &i, &tag, inSz) != 0)
+        ret = ASN_PARSE_E;
+    if (ret == 0 && tag != ASN_INTEGER)
+        ret = ASN_PARSE_E;
+    if (ret == 0 && (len = in[i++]) > sz + 1)
+        ret = ASN_PARSE_E;
+    /* Check there is space for INT data */
+    if (ret == 0 && i + len > inSz)
+        ret = ASN_PARSE_E;
+    if (ret == 0) {
+        /* Skip leading zero */
+        if (in[i] == 0x00) {
+            i++;
+            len--;
+        }
+        /* Copy r into sig. */
+        XMEMCPY(sig + sz - len, in + i, len);
+        i += len;
+    }
+
+    /* Check min data for: INT. */
+    if (ret == 0 && i + 2 > inSz)
+        ret = ASN_PARSE_E;
+    /* Check INT */
+    if (ret == 0 && GetASNTag(in, &i, &tag, inSz) != 0)
+        ret = ASN_PARSE_E;
+    if (ret == 0 && tag != ASN_INTEGER)
+        ret = ASN_PARSE_E;
+    if (ret == 0 && (len = in[i++]) > sz + 1)
+        ret = ASN_PARSE_E;
+    /* Check there is space for INT data */
+    if (ret == 0 && i + len > inSz)
+        ret = ASN_PARSE_E;
+    if (ret == 0) {
+        /* Skip leading zero */
+        if (in[i] == 0x00) {
+            i++;
+            len--;
+        }
+        /* Copy s into sig. */
+        XMEMCPY(sig + sz + sz - len, in + i, len);
+    }
+
+    return ret;
+}
+
+/**
+ * Get the parameters from the private key on the device.
+ *
+ * @param  session  [in]  Session object.
+ * @param  privKey  [in]  PKCS #11 object handle of private key..
+ * @param  key      [in]  Ecc key to set parameters against.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11GetEccParams(Pkcs11Session* session, CK_OBJECT_HANDLE privKey,
+                              ecc_key* key)
+{
+    int          ret = 0;
+    int          curveId;
+    CK_RV        rv;
+    byte         oid[16];
+    CK_ATTRIBUTE template[] = {
+        { CKA_EC_PARAMS, (CK_VOID_PTR)oid, sizeof(oid) }
+    };
+
+    rv = session->func->C_GetAttributeValue(session->handle, privKey, template,
+                                                                             1);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+    if (ret == 0) {
+        /* PKCS #11 wraps the OID in ASN.1 */
+        curveId = wc_ecc_get_curve_id_from_oid(oid + 2,
+                                            (word32)template[0].ulValueLen - 2);
+        if (curveId == ECC_CURVE_INVALID)
+            ret = WC_HW_E;
+    }
+    if (ret == 0)
+        ret = wc_ecc_set_curve(key, 0, curveId);
+
+    return ret;
+}
+
+/**
+ * Performs the ECDSA signing operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11ECDSA_Sign(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                    ret = 0;
+    int                    sessionKey = 0;
+    word32                 sz;
+    CK_RV                  rv;
+    CK_ULONG               outLen;
+    CK_MECHANISM           mech;
+    CK_MECHANISM_INFO      mechInfo;
+    CK_OBJECT_HANDLE       privateKey = NULL_PTR;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_ECDSA,
+                                                                     &mechInfo);
+    if (rv != CKR_OK || (mechInfo.flags & CKF_SIGN) == 0)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0 && info->pk.eccsign.outlen == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: EC Signing Operation");
+
+        if ((sessionKey = !mp_iszero(&info->pk.eccsign.key->k)))
+            ret = Pkcs11CreateEccPrivateKey(&privateKey, session,
+                                                info->pk.eccsign.key, CKA_SIGN);
+        else if (info->pk.eccsign.key->idLen > 0) {
+            ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_EC,
+                                    session, info->pk.eccsign.key->id,
+                                    info->pk.eccsign.key->idLen);
+            if (ret == 0 && info->pk.eccsign.key->dp == NULL) {
+                ret = Pkcs11GetEccParams(session, privateKey,
+                                                          info->pk.eccsign.key);
+            }
+        }
+        else {
+            ret = Pkcs11FindEccKey(&privateKey, CKO_PRIVATE_KEY, session,
+                                                          info->pk.eccsign.key);
+        }
+    }
+
+    if (ret == 0) {
+        sz = info->pk.eccsign.key->dp->size;
+        /* Maximum encoded size is two ordinates + 8 bytes of ASN.1. */
+        if (*info->pk.eccsign.outlen < (word32)wc_ecc_sig_size_calc(sz))
+            ret = BUFFER_E;
+    }
+
+    if (ret == 0) {
+        mech.mechanism      = CKM_ECDSA;
+        mech.ulParameterLen = 0;
+        mech.pParameter     = NULL;
+
+        rv = session->func->C_SignInit(session->handle, &mech, privateKey);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ret == 0) {
+        outLen = *info->pk.eccsign.outlen;
+        rv = session->func->C_Sign(session->handle,
+                                   (CK_BYTE_PTR)info->pk.eccsign.in,
+                                   info->pk.eccsign.inlen, info->pk.eccsign.out,
+                                   &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ret == 0) {
+        *info->pk.eccsign.outlen = Pkcs11ECDSASig_Encode(info->pk.eccsign.out,
+                                                         sz);
+    }
+
+    if (sessionKey)
+        session->func->C_DestroyObject(session->handle, privateKey);
+
+    return ret;
+}
+
+/**
+ * Performs the ECDSA verification operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11ECDSA_Verify(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                    ret = 0;
+    CK_RV                  rv;
+    CK_MECHANISM           mech;
+    CK_MECHANISM_INFO      mechInfo;
+    CK_OBJECT_HANDLE       publicKey = NULL_PTR;
+    unsigned char*         sig = NULL;
+    word32                 sz = info->pk.eccverify.key->dp->size;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_ECDSA,
+                                                                     &mechInfo);
+    if (rv != CKR_OK || (mechInfo.flags & CKF_VERIFY) == 0)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0 && info->pk.eccverify.res == NULL) {
+        ret = BAD_FUNC_ARG;
+    }
+
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: EC Verification Operation");
+
+        ret = Pkcs11CreateEccPublicKey(&publicKey, session,
+                                            info->pk.eccverify.key, CKA_VERIFY);
+    }
+
+    if (ret == 0) {
+        sig = XMALLOC(sz * 2, info->pk.eccverify.key->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+        if (sig == NULL)
+            ret = MEMORY_E;
+    }
+
+    if (ret == 0) {
+        ret = Pkcs11ECDSASig_Decode(info->pk.eccverify.sig,
+                                    info->pk.eccverify.siglen, sig, sz);
+    }
+    if (ret == 0) {
+        mech.mechanism      = CKM_ECDSA;
+        mech.ulParameterLen = 0;
+        mech.pParameter     = NULL;
+
+        rv = session->func->C_VerifyInit(session->handle, &mech, publicKey);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (ret == 0) {
+        *info->pk.eccverify.res = 0;
+        rv = session->func->C_Verify(session->handle,
+                                     (CK_BYTE_PTR)info->pk.eccverify.hash,
+                                     info->pk.eccverify.hashlen,
+                                     (CK_BYTE_PTR)sig, sz * 2);
+        if (rv == CKR_SIGNATURE_INVALID) {
+        }
+        else if (rv != CKR_OK)
+            ret = WC_HW_E;
+        else
+            *info->pk.eccverify.res = 1;
+    }
+
+    if (publicKey != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, publicKey);
+
+    if (sig != NULL)
+        XFREE(sig, info->pk.eccverify.key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret;
+}
+#endif
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+/**
+ * Performs the AES-GCM encryption operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11AesGcmEncrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+    Aes*               aes = info->cipher.aesgcm_enc.aes;
+    CK_GCM_PARAMS      params;
+    CK_MECHANISM_INFO  mechInfo;
+    CK_OBJECT_HANDLE   key = NULL_PTR;
+    CK_MECHANISM       mech;
+    CK_ULONG           outLen;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_GCM,
+                                                                     &mechInfo);
+    if (rv != CKR_OK || (mechInfo.flags & CKF_ENCRYPT) == 0)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: AES-GCM Encryption Operation");
+    }
+
+    /* Create a private key object or find by id. */
+    if (ret == 0 && aes->idLen == 0) {
+        ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+                                    (unsigned char*)aes->devKey, aes->keylen,
+                                    NULL, 0);
+
+    }
+    else if (ret == 0) {
+        ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+                                                                    aes->idLen);
+    }
+
+    if (ret == 0) {
+        params.pIv       = (CK_BYTE_PTR)info->cipher.aesgcm_enc.iv;
+        params.ulIvLen   = info->cipher.aesgcm_enc.ivSz;
+        params.pAAD      = (CK_BYTE_PTR)info->cipher.aesgcm_enc.authIn;
+        params.ulAADLen  = info->cipher.aesgcm_enc.authInSz;
+        params.ulTagBits = info->cipher.aesgcm_enc.authTagSz * 8;
+
+        mech.mechanism      = CKM_AES_GCM;
+        mech.ulParameterLen = sizeof(params);
+        mech.pParameter     = &params;
+
+        rv = session->func->C_EncryptInit(session->handle, &mech, key);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = info->cipher.aesgcm_enc.sz;
+        rv = session->func->C_EncryptUpdate(session->handle,
+                                        (CK_BYTE_PTR)info->cipher.aesgcm_enc.in,
+                                        info->cipher.aesgcm_enc.sz,
+                                        info->cipher.aesgcm_enc.out,
+                                        &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        /* Authentication tag comes out in final block. */
+        outLen = info->cipher.aesgcm_enc.authTagSz;
+        rv = session->func->C_EncryptFinal(session->handle,
+                                           info->cipher.aesgcm_enc.authTag,
+                                           &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (aes->idLen == 0 && key != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, key);
+
+    return ret;
+}
+
+/**
+ * Performs the AES-GCM decryption operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11AesGcmDecrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+    Aes*               aes = info->cipher.aesgcm_enc.aes;
+    CK_GCM_PARAMS      params;
+    CK_MECHANISM_INFO  mechInfo;
+    CK_OBJECT_HANDLE   key = NULL_PTR;
+    CK_MECHANISM       mech;
+    CK_ULONG           outLen;
+    word32             len;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_GCM,
+                                                                     &mechInfo);
+    if (rv != CKR_OK || (mechInfo.flags & CKF_DECRYPT) == 0)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: AES-GCM Decryption Operation");
+    }
+
+    /* Create a private key object or find by id. */
+    if (ret == 0 && aes->idLen == 0) {
+        ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+                                    (unsigned char*)aes->devKey, aes->keylen,
+                                    NULL, 0);
+    }
+    else if (ret == 0) {
+        ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+                                                                    aes->idLen);
+    }
+
+    if (ret == 0) {
+        params.pIv       = (CK_BYTE_PTR)info->cipher.aesgcm_dec.iv;
+        params.ulIvLen   = info->cipher.aesgcm_dec.ivSz;
+        params.pAAD      = (CK_BYTE_PTR)info->cipher.aesgcm_dec.authIn;
+        params.ulAADLen  = info->cipher.aesgcm_dec.authInSz;
+        params.ulTagBits = info->cipher.aesgcm_dec.authTagSz * 8;
+
+        mech.mechanism      = CKM_AES_GCM;
+        mech.ulParameterLen = sizeof(params);
+        mech.pParameter     = &params;
+
+        rv = session->func->C_DecryptInit(session->handle, &mech, key);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = len = info->cipher.aesgcm_dec.sz;
+        rv = session->func->C_DecryptUpdate(session->handle,
+                                        (CK_BYTE_PTR)info->cipher.aesgcm_dec.in,
+                                        info->cipher.aesgcm_dec.sz,
+                                        info->cipher.aesgcm_dec.out,
+                                        &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        /* Put authentication tag in as encrypted data. */
+        outLen = len = (len + info->cipher.aesgcm_dec.authTagSz -
+                                                                (word32)outLen);
+        rv = session->func->C_DecryptUpdate(session->handle,
+                                   (CK_BYTE_PTR)info->cipher.aesgcm_dec.authTag,
+                                   info->cipher.aesgcm_dec.authTagSz,
+                                   info->cipher.aesgcm_dec.out,
+                                   &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = len = (len - (word32)outLen);
+        /* Decrypted data comes out now. */
+        rv = session->func->C_DecryptFinal(session->handle,
+                                           info->cipher.aesgcm_dec.out,
+                                           &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (aes->idLen == 0 && key != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, key);
+
+    return ret;
+}
+#endif
+
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
+/**
+ * Performs the AES-CBC encryption operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11AesCbcEncrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+    Aes*               aes = info->cipher.aescbc.aes;
+    CK_MECHANISM_INFO  mechInfo;
+    CK_OBJECT_HANDLE   key = NULL_PTR;
+    CK_MECHANISM       mech;
+    CK_ULONG           outLen;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_CBC,
+                                                                     &mechInfo);
+    if (rv != CKR_OK || (mechInfo.flags & CKF_ENCRYPT) == 0)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: AES-CBC Encryption Operation");
+    }
+
+    /* Create a private key object or find by id. */
+    if (ret == 0 && aes->idLen == 0) {
+        ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+                                    (unsigned char*)aes->devKey, aes->keylen,
+                                    NULL, 0);
+
+    }
+    else if (ret == 0) {
+        ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+                                                                    aes->idLen);
+    }
+
+    if (ret == 0) {
+        mech.mechanism      = CKM_AES_CBC;
+        mech.ulParameterLen = AES_BLOCK_SIZE;
+        mech.pParameter     = (CK_BYTE_PTR)info->cipher.aescbc.aes->reg;
+
+        rv = session->func->C_EncryptInit(session->handle, &mech, key);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = info->cipher.aescbc.sz;
+        rv = session->func->C_Encrypt(session->handle,
+                                      (CK_BYTE_PTR)info->cipher.aescbc.in,
+                                      info->cipher.aescbc.sz,
+                                      info->cipher.aescbc.out,
+                                      &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (aes->idLen == 0 && key != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, key);
+
+    return ret;
+}
+
+/**
+ * Performs the AES-CBC decryption operation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          MEMORY_E when a memory allocation fails.
+ *          0 on success.
+ */
+static int Pkcs11AesCbcDecrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+    Aes*               aes = info->cipher.aescbc.aes;
+    CK_MECHANISM_INFO  mechInfo;
+    CK_OBJECT_HANDLE   key = NULL_PTR;
+    CK_MECHANISM       mech;
+    CK_ULONG           outLen;
+
+    /* Check operation is supported. */
+    rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_CBC,
+                                                                     &mechInfo);
+    if (rv != CKR_OK || (mechInfo.flags & CKF_DECRYPT) == 0)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0) {
+        WOLFSSL_MSG("PKCS#11: AES-CBC Decryption Operation");
+    }
+
+    /* Create a private key object or find by id. */
+    if (ret == 0 && aes->idLen == 0) {
+        ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+                                    (unsigned char*)aes->devKey, aes->keylen,
+                                    NULL, 0);
+    }
+    else if (ret == 0) {
+        ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+                                                                    aes->idLen);
+    }
+
+    if (ret == 0) {
+        mech.mechanism      = CKM_AES_CBC;
+        mech.ulParameterLen = AES_BLOCK_SIZE;
+        mech.pParameter     = (CK_BYTE_PTR)info->cipher.aescbc.aes->reg;
+
+        rv = session->func->C_DecryptInit(session->handle, &mech, key);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    if (ret == 0) {
+        outLen = info->cipher.aescbc.sz;
+        rv = session->func->C_DecryptUpdate(session->handle,
+                                        (CK_BYTE_PTR)info->cipher.aescbc.in,
+                                        info->cipher.aescbc.sz,
+                                        info->cipher.aescbc.out,
+                                        &outLen);
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+
+    if (aes->idLen == 0 && key != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, key);
+
+    return ret;
+}
+#endif
+
+#ifndef NO_HMAC
+/**
+ * Updates or calculates the HMAC of the data.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11Hmac(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+    Hmac*              hmac = info->hmac.hmac;
+    CK_MECHANISM_INFO  mechInfo;
+    CK_OBJECT_HANDLE   key = NULL_PTR;
+    CK_MECHANISM       mech;
+    CK_ULONG           outLen;
+    int                mechType;
+    int                keyType;
+
+    if (hmac->innerHashKeyed == WC_HMAC_INNER_HASH_KEYED_SW)
+        ret = NOT_COMPILED_IN;
+
+    if (ret == 0)
+        ret = Pkcs11HmacTypes(info->hmac.macType, &mechType, &keyType);
+    if (ret == 0) {
+        /* Check operation is supported. */
+        rv = session->func->C_GetMechanismInfo(session->slotId, mechType,
+                                                                     &mechInfo);
+        if (rv != CKR_OK || (mechInfo.flags & CKF_SIGN) == 0)
+            ret = NOT_COMPILED_IN;
+    }
+
+    /* Check whether key been used to initialized. */
+    if (ret == 0 && !hmac->innerHashKeyed) {
+        WOLFSSL_MSG("PKCS#11: HMAC Init");
+
+        /* Check device supports key length. */
+        if (mechInfo.ulMaxKeySize > 0 &&
+                                       (hmac->keyLen < mechInfo.ulMinKeySize ||
+                                        hmac->keyLen > mechInfo.ulMaxKeySize)) {
+            WOLFSSL_MSG("PKCS#11: Key Length not supported");
+            ret = NOT_COMPILED_IN;
+        }
+
+        /* Create a private key object or find by id. */
+        if (ret == 0 && hmac->idLen == 0) {
+            ret = Pkcs11CreateSecretKey(&key, session, keyType,
+                                    (unsigned char*)hmac->keyRaw, hmac->keyLen,
+                                    NULL, 0);
+            if (ret == WC_HW_E) {
+                ret = Pkcs11CreateSecretKey(&key, session, CKK_GENERIC_SECRET,
+                                    (unsigned char*)hmac->keyRaw, hmac->keyLen,
+                                    NULL, 0);
+            }
+
+        }
+        else if (ret == 0) {
+            ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, keyType, session,
+                                                         hmac->id, hmac->idLen);
+            if (ret == WC_HW_E) {
+                ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY,
+                                          CKK_GENERIC_SECRET, session, hmac->id,
+                                          hmac->idLen);
+            }
+        }
+
+        /* Initialize HMAC operation */
+        if (ret == 0) {
+            mech.mechanism      = mechType;
+            mech.ulParameterLen = 0;
+            mech.pParameter     = NULL;
+
+            rv = session->func->C_SignInit(session->handle, &mech, key);
+            if (rv != CKR_OK)
+                ret = WC_HW_E;
+        }
+
+        /* Don't imitialize HMAC again if this succeeded */
+        if (ret == 0)
+            hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_DEV;
+    }
+    /* Update the HMAC if input data passed in. */
+    if (ret == 0 && info->hmac.inSz > 0) {
+        WOLFSSL_MSG("PKCS#11: HMAC Update");
+
+        rv = session->func->C_SignUpdate(session->handle,
+                                         (CK_BYTE_PTR)info->hmac.in,
+                                         info->hmac.inSz);
+        /* Some algorithm implementations only support C_Sign. */
+        if (rv == CKR_MECHANISM_INVALID) {
+            WOLFSSL_MSG("PKCS#11: HMAC Update/Final not supported");
+            ret = NOT_COMPILED_IN;
+            /* Allow software implementation to set key. */
+            hmac->innerHashKeyed = 0;
+        }
+        else if (rv != CKR_OK)
+            ret = WC_HW_E;
+    }
+    /* Calculate the HMAC result if output buffer specified. */
+    if (ret == 0 && info->hmac.digest != NULL) {
+        WOLFSSL_MSG("PKCS#11: HMAC Final");
+
+        outLen = WC_MAX_DIGEST_SIZE;
+        rv = session->func->C_SignFinal(session->handle,
+                                        (CK_BYTE_PTR)info->hmac.digest,
+                                        &outLen);
+        /* Some algorithm implementations only support C_Sign. */
+        if (rv != CKR_OK)
+            ret = WC_HW_E;
+        else
+            hmac->innerHashKeyed = 0;
+    }
+
+    if (hmac->idLen == 0 && key != NULL_PTR)
+        session->func->C_DestroyObject(session->handle, key);
+
+    return ret;
+}
+#endif
+
+#ifndef WC_NO_RNG
+#ifndef HAVE_HASHDRBG
+/**
+ * Performs random number generation.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11RandomBlock(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+
+    rv = session->func->C_GenerateRandom(session->handle, info->rng.out,
+                                                                  info->rng.sz);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+    return ret;
+}
+#endif
+
+/**
+ * Generates entropy (seed) data.
+ *
+ * @param  session  [in]  Session object.
+ * @param  info     [in]  Cryptographic operation data.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+static int Pkcs11RandomSeed(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+    int                ret = 0;
+    CK_RV              rv;
+
+    rv = session->func->C_GenerateRandom(session->handle, info->seed.seed,
+                                                                 info->seed.sz);
+    if (rv != CKR_OK)
+        ret = WC_HW_E;
+    return ret;
+}
+#endif
+
+/**
+ * Perform a cryptographic operation using PKCS#11 device.
+ *
+ * @param  devId  [in]  Device identifier.
+ * @param  info   [in]  Cryptographic operation data.
+ * @param  ctx    [in]  Context data for device - the token object.
+ * @return  WC_HW_E when a PKCS#11 library call fails.
+ *          0 on success.
+ */
+int wc_Pkcs11_CryptoDevCb(int devId, wc_CryptoInfo* info, void* ctx)
+{
+    int ret = 0;
+    Pkcs11Token* token = (Pkcs11Token*)ctx;
+    Pkcs11Session session;
+    int readWrite = 0;
+
+    if (devId <= INVALID_DEVID || info == NULL || ctx == NULL)
+        ret = BAD_FUNC_ARG;
+
+    if (ret == 0) {
+        ret = Pkcs11OpenSession(token, &session, readWrite);
+        if (ret == 0) {
+            if (info->algo_type == WC_ALGO_TYPE_PK) {
+#if !defined(NO_RSA) || defined(HAVE_ECC)
+                switch (info->pk.type) {
+    #ifndef NO_RSA
+                    case WC_PK_TYPE_RSA:
+                        ret = Pkcs11Rsa(&session, info);
+                        break;
+        #ifdef WOLFSSL_KEY_GEN
+                    case WC_PK_TYPE_RSA_KEYGEN:
+                        ret = Pkcs11RsaKeyGen(&session, info);
+                        break;
+        #endif
+    #endif
+    #ifdef HAVE_ECC
+        #ifndef NO_PKCS11_EC_KEYGEN
+                    case WC_PK_TYPE_EC_KEYGEN:
+                        ret = Pkcs11EcKeyGen(&session, info);
+                        break;
+        #endif
+        #ifndef NO_PKCS11_ECDH
+                    case WC_PK_TYPE_ECDH:
+                        ret = Pkcs11ECDH(&session, info);
+                        break;
+        #endif
+                    case WC_PK_TYPE_ECDSA_SIGN:
+                        ret = Pkcs11ECDSA_Sign(&session, info);
+                        break;
+                    case WC_PK_TYPE_ECDSA_VERIFY:
+                        ret = Pkcs11ECDSA_Verify(&session, info);
+                        break;
+    #endif
+                    default:
+                        ret = NOT_COMPILED_IN;
+                        break;
+                }
+#else
+                ret = NOT_COMPILED_IN;
+#endif /* !NO_RSA || HAVE_ECC */
+            }
+            else if (info->algo_type == WC_ALGO_TYPE_CIPHER) {
+    #ifndef NO_AES
+                switch (info->cipher.type) {
+        #ifdef HAVE_AESGCM
+                    case WC_CIPHER_AES_GCM:
+                        if (info->cipher.enc)
+                            ret = Pkcs11AesGcmEncrypt(&session, info);
+                        else
+                            ret = Pkcs11AesGcmDecrypt(&session, info);
+                        break;
+        #endif
+        #ifdef HAVE_AES_CBC
+                    case WC_CIPHER_AES_CBC:
+                        if (info->cipher.enc)
+                            ret = Pkcs11AesCbcEncrypt(&session, info);
+                        else
+                            ret = Pkcs11AesCbcDecrypt(&session, info);
+                        break;
+        #endif
+                }
+    #else
+                ret = NOT_COMPILED_IN;
+    #endif
+            }
+            else if (info->algo_type == WC_ALGO_TYPE_HMAC) {
+    #ifndef NO_HMAC
+                ret = Pkcs11Hmac(&session, info);
+    #else
+                ret = NOT_COMPILED_IN;
+    #endif
+            }
+            else if (info->algo_type == WC_ALGO_TYPE_RNG) {
+    #if !defined(WC_NO_RNG) && !defined(HAVE_HASHDRBG)
+                ret = Pkcs11RandomBlock(&session, info);
+    #else
+                ret = NOT_COMPILED_IN;
+    #endif
+            }
+            else if (info->algo_type == WC_ALGO_TYPE_SEED) {
+    #ifndef WC_NO_RNG
+                ret = Pkcs11RandomSeed(&session, info);
+    #else
+                ret = NOT_COMPILED_IN;
+    #endif
+            }
+            else
+                ret = NOT_COMPILED_IN;
+
+            Pkcs11CloseSession(token, &session);
+        }
+    }
+
+    return ret;
+}
+
+#endif /* HAVE_PKCS11 */
+
+
--- a/wolfcrypt/src/wc_port.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/wc_port.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* port.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -49,6 +49,12 @@
 #if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A)
     #include <wolfssl/wolfcrypt/port/atmel/atmel.h>
 #endif
+#if defined(WOLFSSL_RENESAS_TSIP)
+    #include <wolfssl/wolfcrypt/port/Renesas/renesas-tsip-crypt.h>
+#endif
+#if defined(WOLFSSL_STSAFEA100)
+    #include <wolfssl/wolfcrypt/port/st/stsafe.h>
+#endif
 
 #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
     #include <wolfssl/openssl/evp.h>
@@ -64,8 +70,24 @@
     #include <wolfssl/wolfcrypt/port/caam/wolfcaam.h>
 #endif
 
-#ifdef WOLF_CRYPTO_DEV
-    #include <wolfssl/wolfcrypt/cryptodev.h>
+#ifdef WOLF_CRYPTO_CB
+    #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
+#ifdef HAVE_INTEL_QA_SYNC
+    #include <wolfssl/wolfcrypt/port/intel/quickassist_sync.h>
+#endif
+
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+    #include <wolfssl/wolfcrypt/port/cavium/cavium_octeon_sync.h>
+#endif
+
+#ifdef WOLFSSL_SCE
+    #include "hal_data.h"
+#endif
+
+#if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD)
+    #include "rpcmem.h"
 #endif
 
 #ifdef _MSC_VER
@@ -86,8 +108,19 @@
     if (initRefCount == 0) {
         WOLFSSL_ENTER("wolfCrypt_Init");
 
-    #ifdef WOLF_CRYPTO_DEV
-        wc_CryptoDev_Init();
+    #ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+        {
+            word32 rngMallocFail;
+            time_t seed = time(NULL);
+            srand((word32)seed);
+            rngMallocFail = rand() % 2000; /* max 2000 */
+            printf("\n--- RNG MALLOC FAIL AT %d---\n", rngMallocFail);
+            wolfSSL_SetMemFailCount(rngMallocFail);
+        }
+    #endif
+
+    #ifdef WOLF_CRYPTO_CB
+        wc_CryptoCb_Init();
     #endif
 
     #ifdef WOLFSSL_ASYNC_CRYPT
@@ -98,6 +131,16 @@
         }
     #endif
 
+    #if defined(WOLFSSL_RENESAS_TSIP_CRYPT)
+        ret = tsip_Open( );
+        if( ret != TSIP_SUCCESS ) {
+            WOLFSSL_MSG("RENESAS TSIP Open failed");
+            /* not return 1 since WOLFSSL_SUCCESS=1*/
+            ret = -1;/* FATAL ERROR */
+            return ret;
+        }
+    #endif
+
     #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
         ret = InitMemoryTracker();
         if (ret != 0) {
@@ -138,15 +181,33 @@
     #endif
 
     #if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A)
-        atmel_init();
+        ret = atmel_init();
+        if (ret != 0) {
+            WOLFSSL_MSG("CryptoAuthLib init failed");
+            return ret;
+        }
+    #endif
+    #if defined(WOLFSSL_CRYPTOCELL)
+        /* enable and initialize the ARM CryptoCell 3xx runtime library */
+        ret = cc310_Init();
+        if (ret != 0) {
+            WOLFSSL_MSG("CRYPTOCELL init failed");
+            return ret;
+        }
+    #endif
+    #if defined(WOLFSSL_STSAFEA100)
+        stsafe_interface_init();
     #endif
 
     #ifdef WOLFSSL_ARMASM
         WOLFSSL_MSG("Using ARM hardware acceleration");
     #endif
 
-    #if !defined(WOLFCRYPT_ONLY) && \
-        ( defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER) )
+    #ifdef WOLFSSL_AFALG
+	WOLFSSL_MSG("Using AF_ALG for crypto acceleration");
+    #endif
+
+    #if !defined(WOLFCRYPT_ONLY) && defined(OPENSSL_EXTRA)
         wolfSSL_EVP_init();
     #endif
 
@@ -166,6 +227,19 @@
     #endif
 #endif
 
+#ifdef WOLFSSL_SCE
+        ret = (int)WOLFSSL_SCE_GSCE_HANDLE.p_api->open(
+                WOLFSSL_SCE_GSCE_HANDLE.p_ctrl, WOLFSSL_SCE_GSCE_HANDLE.p_cfg);
+        if (ret == SSP_ERR_CRYPTO_SCE_ALREADY_OPEN) {
+            WOLFSSL_MSG("SCE already open");
+            ret = 0;
+        }
+        if (ret != SSP_SUCCESS) {
+            WOLFSSL_MSG("Error opening SCE");
+            return -1; /* FATAL_ERROR */
+        }
+#endif
+
 #if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \
     defined(WOLFSSL_IMX6_CAAM_BLOB)
         if ((ret = wc_caamInit()) != 0) {
@@ -173,8 +247,14 @@
         }
 #endif
 
-        initRefCount = 1;
+#if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD)
+	if ((ret = wolfSSL_InitHandle()) != 0) {
+            return ret;
+        }
+        rpcmem_init();
+#endif
     }
+    initRefCount++;
 
     return ret;
 }
@@ -185,7 +265,11 @@
 {
     int ret = 0;
 
-    if (initRefCount == 1) {
+    initRefCount--;
+    if (initRefCount < 0)
+        initRefCount = 0;
+
+    if (initRefCount == 0) {
         WOLFSSL_ENTER("wolfCrypt_Cleanup");
 
 #ifdef HAVE_ECC
@@ -208,13 +292,23 @@
     #ifdef WOLFSSL_ASYNC_CRYPT
         wolfAsync_HardwareStop();
     #endif
-
+    #ifdef WOLFSSL_SCE
+        WOLFSSL_SCE_GSCE_HANDLE.p_api->close(WOLFSSL_SCE_GSCE_HANDLE.p_ctrl);
+    #endif
     #if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \
         defined(WOLFSSL_IMX6_CAAM_BLOB)
         wc_caamFree();
     #endif
-
-        initRefCount = 0; /* allow re-init */
+    #if defined(WOLFSSL_CRYPTOCELL)
+        cc310_Free();
+    #endif
+    #if defined(WOLFSSL_RENESAS_TSIP_CRYPT)
+        tsip_Close();
+    #endif
+    #if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD)
+        rpcmem_deinit();
+        wolfSSL_CleanupHandle();
+    #endif
     }
 
     return ret;
@@ -224,10 +318,12 @@
 	!defined(WOLFSSL_NUCLEUS) && !defined(WOLFSSL_NUCLEUS_1_2)
 
 /* File Handling Helpers */
-/* returns 0 if file found, -1 if no files or negative error */
+/* returns 0 if file found, WC_READDIR_NOFILE if no files or negative error */
 int wc_ReadDirFirst(ReadDirCtx* ctx, const char* path, char** name)
 {
-    int ret = -1; /* default to no files found */
+    int ret = WC_READDIR_NOFILE; /* default to no files found */
+    int pathLen = 0;
+    int dnameLen = 0;
 
     if (name)
         *name = NULL;
@@ -237,10 +333,14 @@
     }
 
     XMEMSET(ctx->name, 0, MAX_FILENAME_SZ);
+    pathLen = (int)XSTRLEN(path);
 
 #ifdef USE_WINDOWS_API
-    XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ - 4);
-    XSTRNCAT(ctx->name, "\\*", 3);
+    if (pathLen > MAX_FILENAME_SZ - 3)
+        return BAD_PATH_ERROR;
+
+    XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ - 3);
+    XSTRNCPY(ctx->name + pathLen, "\\*", MAX_FILENAME_SZ - pathLen);
 
     ctx->hFind = FindFirstFileA(ctx->name, &ctx->FindFileData);
     if (ctx->hFind == INVALID_HANDLE_VALUE) {
@@ -249,15 +349,86 @@
     }
 
     do {
-        if (ctx->FindFileData.dwFileAttributes != FILE_ATTRIBUTE_DIRECTORY) {
-            XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ/2 - 3);
-            XSTRNCAT(ctx->name, "\\", 2);
-            XSTRNCAT(ctx->name, ctx->FindFileData.cFileName, MAX_FILENAME_SZ/2);
+        if (!(ctx->FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+            dnameLen = (int)XSTRLEN(ctx->FindFileData.cFileName);
+
+            if (pathLen + dnameLen + 2 > MAX_FILENAME_SZ) {
+                return BAD_PATH_ERROR;
+            }
+            XSTRNCPY(ctx->name, path, pathLen + 1);
+            ctx->name[pathLen] = '\\';
+            XSTRNCPY(ctx->name + pathLen + 1,
+                     ctx->FindFileData.cFileName,
+                     MAX_FILENAME_SZ - pathLen - 1);
             if (name)
                 *name = ctx->name;
             return 0;
         }
     } while (FindNextFileA(ctx->hFind, &ctx->FindFileData));
+#elif defined(WOLFSSL_ZEPHYR)
+    if (fs_opendir(&ctx->dir, path) != 0) {
+        WOLFSSL_MSG("opendir path verify locations failed");
+        return BAD_PATH_ERROR;
+    }
+    ctx->dirp = &ctx->dir;
+
+    while ((fs_readdir(&ctx->dir, &ctx->entry)) != 0) {
+        dnameLen = (int)XSTRLEN(ctx->entry.name);
+
+        if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        XSTRNCPY(ctx->name, path, pathLen + 1);
+        ctx->name[pathLen] = '/';
+
+        /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+         * of earlier check it is known that dnameLen is less than
+         * MAX_FILENAME_SZ - (pathLen + 2)  so dnameLen +1 will fit */
+        XSTRNCPY(ctx->name + pathLen + 1, ctx->entry.name, dnameLen + 1);
+        if (fs_stat(ctx->name, &ctx->s) != 0) {
+            WOLFSSL_MSG("stat on name failed");
+            ret = BAD_PATH_ERROR;
+            break;
+        } else if (ctx->s.type == FS_DIR_ENTRY_FILE) {
+            if (name)
+                *name = ctx->name;
+            return 0;
+        }
+    }
+#elif defined(WOLFSSL_TELIT_M2MB)
+    ctx->dir = m2mb_fs_opendir((const CHAR*)path);
+    if (ctx->dir == NULL) {
+        WOLFSSL_MSG("opendir path verify locations failed");
+        return BAD_PATH_ERROR;
+    }
+
+    while ((ctx->entry = m2mb_fs_readdir(ctx->dir)) != NULL) {
+        dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+        if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        XSTRNCPY(ctx->name, path, pathLen + 1);
+        ctx->name[pathLen] = '/';
+
+        /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+         * of earlier check it is known that dnameLen is less than
+         * MAX_FILENAME_SZ - (pathLen + 2)  so dnameLen +1 will fit */
+        XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
+
+        if (m2mb_fs_stat(ctx->name, &ctx->s) != 0) {
+            WOLFSSL_MSG("stat on name failed");
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        else if (ctx->s.st_mode & M2MB_S_IFREG) {
+            if (name)
+                *name = ctx->name;
+            return 0;
+        }
+    }
 #else
     ctx->dir = opendir(path);
     if (ctx->dir == NULL) {
@@ -266,10 +437,19 @@
     }
 
     while ((ctx->entry = readdir(ctx->dir)) != NULL) {
-        XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ/2 - 2);
-        XSTRNCAT(ctx->name, "/", 1);
-        XSTRNCAT(ctx->name, ctx->entry->d_name, MAX_FILENAME_SZ/2);
+        dnameLen = (int)XSTRLEN(ctx->entry->d_name);
 
+        if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        XSTRNCPY(ctx->name, path, pathLen + 1);
+        ctx->name[pathLen] = '/';
+
+        /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+         * of earlier check it is known that dnameLen is less than
+         * MAX_FILENAME_SZ - (pathLen + 2)  so dnameLen +1 will fit */
+        XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
         if (stat(ctx->name, &ctx->s) != 0) {
             WOLFSSL_MSG("stat on name failed");
             ret = BAD_PATH_ERROR;
@@ -286,10 +466,12 @@
     return ret;
 }
 
-/* returns 0 if file found, -1 if no more files */
+/* returns 0 if file found, WC_READDIR_NOFILE if no more files */
 int wc_ReadDirNext(ReadDirCtx* ctx, const char* path, char** name)
 {
-    int ret = -1; /* default to no file found */
+    int ret = WC_READDIR_NOFILE; /* default to no file found */
+    int pathLen = 0;
+    int dnameLen = 0;
 
     if (name)
         *name = NULL;
@@ -299,13 +481,73 @@
     }
 
     XMEMSET(ctx->name, 0, MAX_FILENAME_SZ);
+    pathLen = (int)XSTRLEN(path);
 
 #ifdef USE_WINDOWS_API
     while (FindNextFileA(ctx->hFind, &ctx->FindFileData)) {
-        if (ctx->FindFileData.dwFileAttributes != FILE_ATTRIBUTE_DIRECTORY) {
-            XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ/2 - 3);
-            XSTRNCAT(ctx->name, "\\", 2);
-            XSTRNCAT(ctx->name, ctx->FindFileData.cFileName, MAX_FILENAME_SZ/2);
+        if (!(ctx->FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+            dnameLen = (int)XSTRLEN(ctx->FindFileData.cFileName);
+
+            if (pathLen + dnameLen + 2 > MAX_FILENAME_SZ) {
+                return BAD_PATH_ERROR;
+            }
+            XSTRNCPY(ctx->name, path, pathLen + 1);
+            ctx->name[pathLen] = '\\';
+            XSTRNCPY(ctx->name + pathLen + 1,
+                     ctx->FindFileData.cFileName,
+                     MAX_FILENAME_SZ - pathLen - 1);
+            if (name)
+                *name = ctx->name;
+            return 0;
+        }
+    }
+#elif defined(WOLFSSL_ZEPHYR)
+    while ((fs_readdir(&ctx->dir, &ctx->entry)) != 0) {
+        dnameLen = (int)XSTRLEN(ctx->entry.name);
+
+        if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        XSTRNCPY(ctx->name, path, pathLen + 1);
+        ctx->name[pathLen] = '/';
+        /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+         * of earlier check it is known that dnameLen is less than
+         * MAX_FILENAME_SZ - (pathLen + 2) so that dnameLen +1 will fit */
+        XSTRNCPY(ctx->name + pathLen + 1, ctx->entry.name, dnameLen + 1);
+
+        if (fs_stat(ctx->name, &ctx->s) != 0) {
+            WOLFSSL_MSG("stat on name failed");
+            ret = BAD_PATH_ERROR;
+            break;
+        } else if (ctx->s.type == FS_DIR_ENTRY_FILE) {
+            if (name)
+                *name = ctx->name;
+            return 0;
+        }
+    }
+#elif defined(WOLFSSL_TELIT_M2MB)
+    while ((ctx->entry = m2mb_fs_readdir(ctx->dir)) != NULL) {
+        dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+        if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        XSTRNCPY(ctx->name, path, pathLen + 1);
+        ctx->name[pathLen] = '/';
+
+        /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+         * of earlier check it is known that dnameLen is less than
+         * MAX_FILENAME_SZ - (pathLen + 2)  so dnameLen +1 will fit */
+        XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
+
+        if (m2mb_fs_stat(ctx->name, &ctx->s) != 0) {
+            WOLFSSL_MSG("stat on name failed");
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        else if (ctx->s.st_mode & M2MB_S_IFREG) {
             if (name)
                 *name = ctx->name;
             return 0;
@@ -313,9 +555,18 @@
     }
 #else
     while ((ctx->entry = readdir(ctx->dir)) != NULL) {
-        XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ/2 - 2);
-        XSTRNCAT(ctx->name, "/", 1);
-        XSTRNCAT(ctx->name, ctx->entry->d_name, MAX_FILENAME_SZ/2);
+        dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+        if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+            ret = BAD_PATH_ERROR;
+            break;
+        }
+        XSTRNCPY(ctx->name, path, pathLen + 1);
+        ctx->name[pathLen] = '/';
+        /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+         * of earlier check it is known that dnameLen is less than
+         * MAX_FILENAME_SZ - (pathLen + 2) so that dnameLen +1 will fit */
+        XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
 
         if (stat(ctx->name, &ctx->s) != 0) {
             WOLFSSL_MSG("stat on name failed");
@@ -345,6 +596,16 @@
         FindClose(ctx->hFind);
         ctx->hFind = INVALID_HANDLE_VALUE;
     }
+#elif defined(WOLFSSL_ZEPHYR)
+    if (ctx->dirp) {
+        fs_closedir(ctx->dirp);
+        ctx->dirp = NULL;
+    }
+#elif defined(WOLFSSL_TELIT_M2MB)
+    if (ctx->dir) {
+        m2mb_fs_closedir(ctx->dir);
+        ctx->dir = NULL;
+    }
 #else
     if (ctx->dir) {
         closedir(ctx->dir);
@@ -355,6 +616,37 @@
 
 #endif /* !NO_FILESYSTEM && !NO_WOLFSSL_DIR */
 
+#if !defined(NO_FILESYSTEM) && defined(WOLFSSL_ZEPHYR)
+XFILE z_fs_open(const char* filename, const char* perm)
+{
+    XFILE file;
+
+    file = XMALLOC(sizeof(*file), NULL, DYNAMIC_TYPE_FILE);
+    if (file != NULL) {
+        if (fs_open(file, filename) != 0) {
+            XFREE(file, NULL, DYNAMIC_TYPE_FILE);
+            file = NULL;
+        }
+    }
+
+    return file;
+}
+
+int z_fs_close(XFILE file)
+{
+    int ret;
+
+    if (file == NULL)
+        return -1;
+    ret = (fs_close(file) == 0) ? 0 : -1;
+
+    XFREE(file, NULL, DYNAMIC_TYPE_FILE);
+
+    return ret;
+}
+
+#endif /* !NO_FILESYSTEM && !WOLFSSL_ZEPHYR */
+
 
 wolfSSL_Mutex* wc_InitAndAllocMutex(void)
 {
@@ -689,6 +981,47 @@
             return BAD_MUTEX_E;
     }
 
+#elif defined(WOLFSSL_VXWORKS)
+
+    int wc_InitMutex(wolfSSL_Mutex* m)
+    {
+        if (m) {
+            if ((*m = semMCreate(0)) != SEM_ID_NULL)
+                return 0;
+        }
+        return BAD_MUTEX_E;
+    }
+
+
+    int wc_FreeMutex(wolfSSL_Mutex* m)
+    {
+        if (m) {
+            if (semDelete(*m) == OK)
+                return 0;
+        }
+        return BAD_MUTEX_E;
+    }
+
+
+    int wc_LockMutex(wolfSSL_Mutex* m)
+    {
+        if (m) {
+            if (semTake(*m, WAIT_FOREVER) == OK)
+                return 0;
+        }
+        return BAD_MUTEX_E;
+    }
+
+
+    int wc_UnLockMutex(wolfSSL_Mutex* m)
+    {
+        if (m) {
+            if (semGive(*m) == OK)
+                return 0;
+        }
+        return BAD_MUTEX_E;
+    }
+
 #elif defined(THREADX)
 
     int wc_InitMutex(wolfSSL_Mutex* m)
@@ -725,6 +1058,72 @@
             return BAD_MUTEX_E;
     }
 
+#elif defined(WOLFSSL_DEOS)
+
+    int wc_InitMutex(wolfSSL_Mutex* m)
+    {
+        mutexStatus mutStat;
+        /*
+        The empty string "" denotes an anonymous mutex, so objects do not cause name collisions.
+        `protectWolfSSLTemp` in an XML configuration element template describing a mutex.
+        */
+        if (m) {
+            mutStat = createMutex("", "protectWolfSSLTemp", m);
+            if (mutStat == mutexSuccess)
+                return 0;
+            else{
+                WOLFSSL_MSG("wc_InitMutex failed");
+                return mutStat;
+            }
+        }
+        return BAD_MUTEX_E;
+    }
+
+    int wc_FreeMutex(wolfSSL_Mutex* m)
+    {
+        mutexStatus mutStat;
+        if (m) {
+            mutStat = deleteMutex(*m);
+            if (mutStat == mutexSuccess)
+                return 0;
+            else{
+                WOLFSSL_MSG("wc_FreeMutex failed");
+                return mutStat;
+            }
+        }
+        return BAD_MUTEX_E;
+    }
+
+    int wc_LockMutex(wolfSSL_Mutex* m)
+    {
+        mutexStatus mutStat;
+        if (m) {
+            mutStat = lockMutex(*m);
+            if (mutStat == mutexSuccess)
+                return 0;
+            else{
+                WOLFSSL_MSG("wc_LockMutex failed");
+                return mutStat;
+            }
+        }
+        return BAD_MUTEX_E;
+    }
+
+    int wc_UnLockMutex(wolfSSL_Mutex* m)
+    {
+        mutexStatus mutStat;
+        if (m) {
+            mutStat = unlockMutex(*m);
+            if (mutStat== mutexSuccess)
+                return 0;
+            else{
+                WOLFSSL_MSG("wc_UnLockMutex failed");
+                return mutStat;
+            }
+        }
+        return BAD_MUTEX_E;
+    }
+
 #elif defined(MICRIUM)
 
     int wc_InitMutex(wolfSSL_Mutex* m)
@@ -809,6 +1208,19 @@
         return 0;
     }
 
+    int ebsnet_fseek(int a, long b, int c)
+    {
+        int retval;
+
+        retval = vf_lseek(a, b, c);
+        if (retval > 0)
+            retval = 0;
+        else
+            retval =  -1;
+
+        return(retval);
+    }
+
 #elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
 
     int wc_InitMutex(wolfSSL_Mutex* m)
@@ -941,7 +1353,7 @@
 
     void *uITRON4_malloc(size_t sz) {
         ER ercd;
-        void *p;
+        void *p = NULL;
         ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p);
         if (ercd == E_OK) {
             return p;
@@ -1035,7 +1447,7 @@
 
     void *uTKernel_malloc(unsigned int sz) {
         ER ercd;
-        void *p;
+        void *p = NULL;
         ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p, TMO_FEVR);
         if (ercd == E_OK) {
             return p;
@@ -1152,6 +1564,43 @@
         return 0;
     }
 
+#elif defined(WOLFSSL_CMSIS_RTOSv2)
+    int wc_InitMutex(wolfSSL_Mutex *m)
+    {
+        static const osMutexAttr_t attr = {
+            "wolfSSL_mutex", osMutexRecursive, NULL, 0};
+
+        if ((*m = osMutexNew(&attr)) != NULL)
+            return 0;
+        else
+            return BAD_MUTEX_E;
+    }
+
+    int wc_FreeMutex(wolfSSL_Mutex *m)
+    {
+        if (osMutexDelete(*m) == osOK)
+            return 0;
+        else
+            return BAD_MUTEX_E;
+    }
+
+
+    int wc_LockMutex(wolfSSL_Mutex *m)
+    {
+        if (osMutexAcquire(*m, osWaitForever) == osOK)
+            return 0;
+        else
+            return BAD_MUTEX_E;
+    }
+
+    int wc_UnLockMutex(wolfSSL_Mutex *m)
+    {
+        if (osMutexRelease(*m) == osOK)
+            return 0;
+        else
+            return BAD_MUTEX_E;
+    }
+
 #elif defined(WOLFSSL_MDK_ARM)
 
     int wc_InitMutex(wolfSSL_Mutex* m)
@@ -1293,6 +1742,109 @@
         return BAD_MUTEX_E;
     }
 
+#elif defined(WOLFSSL_ZEPHYR)
+
+    int wc_InitMutex(wolfSSL_Mutex* m)
+    {
+        k_mutex_init(m);
+
+        return 0;
+    }
+
+    int wc_FreeMutex(wolfSSL_Mutex* m)
+    {
+        return 0;
+    }
+
+    int wc_LockMutex(wolfSSL_Mutex* m)
+    {
+        int ret = 0;
+
+        if (k_mutex_lock(m, K_FOREVER) != 0)
+            ret = BAD_MUTEX_E;
+
+        return ret;
+    }
+
+    int wc_UnLockMutex(wolfSSL_Mutex* m)
+    {
+        k_mutex_unlock(m);
+
+        return 0;
+    }
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+
+    int wc_InitMutex(wolfSSL_Mutex* m)
+    {
+        M2MB_OS_RESULT_E        osRes;
+        M2MB_OS_MTX_ATTR_HANDLE mtxAttrHandle;
+        UINT32                  inheritVal = 1;
+
+        osRes = m2mb_os_mtx_setAttrItem(&mtxAttrHandle,
+                                    CMDS_ARGS(
+                                      M2MB_OS_MTX_SEL_CMD_CREATE_ATTR, NULL,
+                                      M2MB_OS_MTX_SEL_CMD_NAME, "wolfMtx",
+                                      M2MB_OS_MTX_SEL_CMD_INHERIT, inheritVal
+                                    )
+                                );
+        if (osRes != M2MB_OS_SUCCESS) {
+            return BAD_MUTEX_E;
+        }
+
+        osRes = m2mb_os_mtx_init(m, &mtxAttrHandle);
+        if (osRes != M2MB_OS_SUCCESS) {
+            return BAD_MUTEX_E;
+        }
+
+        return 0;
+    }
+
+    int wc_FreeMutex(wolfSSL_Mutex* m)
+    {
+        M2MB_OS_RESULT_E osRes;
+
+        if (m == NULL)
+            return BAD_MUTEX_E;
+
+        osRes = m2mb_os_mtx_deinit(*m);
+        if (osRes != M2MB_OS_SUCCESS) {
+            return BAD_MUTEX_E;
+        }
+
+        return 0;
+    }
+
+    int wc_LockMutex(wolfSSL_Mutex* m)
+    {
+        M2MB_OS_RESULT_E osRes;
+
+        if (m == NULL)
+            return BAD_MUTEX_E;
+
+        osRes = m2mb_os_mtx_get(*m, M2MB_OS_WAIT_FOREVER);
+        if (osRes != M2MB_OS_SUCCESS) {
+            return BAD_MUTEX_E;
+        }
+
+        return 0;
+    }
+
+    int wc_UnLockMutex(wolfSSL_Mutex* m)
+    {
+        M2MB_OS_RESULT_E osRes;
+
+        if (m == NULL)
+            return BAD_MUTEX_E;
+
+        osRes = m2mb_os_mtx_put(*m);
+        if (osRes != M2MB_OS_SUCCESS) {
+            return BAD_MUTEX_E;
+        }
+
+        return 0;
+    }
+
 #else
     #warning No mutex handling defined
 
@@ -1324,6 +1876,22 @@
 }
 #endif /*  _WIN32_WCE */
 
+#if defined(WOLFSSL_APACHE_MYNEWT)
+#include "os/os_time.h"
+
+time_t mynewt_time(time_t* timer)
+{
+    time_t now;
+    struct os_timeval tv;
+    os_gettimeofday(&tv, NULL);
+    now = (time_t)tv.tv_sec;
+    if(timer != NULL) {
+        *timer = now;
+    }
+    return now;
+}
+#endif /* WOLFSSL_APACHE_MYNEWT */
+
 #if defined(WOLFSSL_GMTIME)
 struct tm* gmtime(const time_t* timer)
 {
@@ -1429,6 +1997,25 @@
 
 #endif /* MICROCHIP_TCPIP || MICROCHIP_TCPIP_V5 */
 
+#if defined(WOLFSSL_DEOS)
+
+time_t deos_time(time_t* timer)
+{
+    const uint32_t systemTickTimeInHz = 1000000 / systemTickInMicroseconds();
+    uint32_t *systemTickPtr = systemTickPointer();
+
+    if (timer != NULL)
+        *timer = *systemTickPtr/systemTickTimeInHz;
+
+    #if defined(CURRENT_UNIX_TIMESTAMP)
+        /* CURRENT_UNIX_TIMESTAMP is seconds since Jan 01 1970. (UTC) */
+        return (time_t) *systemTickPtr/systemTickTimeInHz + CURRENT_UNIX_TIMESTAMP;
+    #else
+        return (time_t) *systemTickPtr/systemTickTimeInHz;
+    #endif
+}
+#endif /* WOLFSSL_DEOS */
+
 #if defined(MICRIUM)
 
 time_t micrium_time(time_t* timer)
@@ -1437,6 +2024,9 @@
 
     Clk_GetTS_Unix(&sec);
 
+    if (timer != NULL)
+        *timer = sec;
+
     return (time_t) sec;
 }
 
@@ -1461,7 +2051,7 @@
 #endif /* FREESCALE_MQX || FREESCALE_KSDK_MQX */
 
 
-#if defined(WOLFSSL_TIRTOS)
+#if defined(WOLFSSL_TIRTOS) && defined(USER_TIME)
 
 time_t XTIME(time_t * timer)
 {
@@ -1503,6 +2093,93 @@
 }
 
 #endif /* WOLFSSL_XILINX */
+
+#if defined(WOLFSSL_ZEPHYR)
+
+time_t z_time(time_t * timer)
+{
+    struct timespec ts;
+
+    if (clock_gettime(CLOCK_REALTIME, &ts) == 0)
+        if (timer != NULL)
+            *timer = ts.tv_sec;
+
+    return ts.tv_sec;
+}
+
+#endif /* WOLFSSL_ZEPHYR */
+
+
+#if defined(WOLFSSL_WICED)
+    #ifndef WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME
+        #error Please define WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME at build time.
+    #endif /* WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME */
+
+time_t wiced_pseudo_unix_epoch_time(time_t * timer)
+{
+    time_t epoch_time;
+    /* The time() function return uptime on WICED platform. */
+    epoch_time = time(NULL) + WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME;
+
+    if (timer != NULL) {
+        *timer = epoch_time;
+    }
+    return epoch_time;
+}
+#endif /* WOLFSSL_WICED */
+
+#ifdef WOLFSSL_TELIT_M2MB
+    time_t m2mb_xtime(time_t * timer)
+    {
+        time_t myTime = 0;
+        INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+        if (fd != -1) {
+            M2MB_RTC_TIMEVAL_T timeval;
+
+            m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+
+            myTime = timeval.sec;
+
+            m2mb_rtc_close(fd);
+        }
+        return myTime;
+    }
+    #ifdef WOLFSSL_TLS13
+    time_t m2mb_xtime_ms(time_t * timer)
+    {
+        time_t myTime = 0;
+        INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+        if (fd != -1) {
+            M2MB_RTC_TIMEVAL_T timeval;
+
+            m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+
+            myTime = timeval.sec + timeval.msec;
+
+            m2mb_rtc_close(fd);
+        }
+        return myTime;
+    }
+    #endif /* WOLFSSL_TLS13 */
+    #ifndef NO_CRYPT_BENCHMARK
+    double m2mb_xtime_bench(int reset)
+    {
+        double myTime = 0;
+        INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+        if (fd != -1) {
+            M2MB_RTC_TIMEVAL_T timeval;
+
+            m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+
+            myTime = (double)timeval.sec + ((double)timeval.msec / 1000);
+
+            m2mb_rtc_close(fd);
+        }
+        return myTime;
+    }
+    #endif /* !NO_CRYPT_BENCHMARK */
+#endif /* WOLFSSL_TELIT_M2MB */
+
 #endif /* !NO_ASN_TIME */
 
 #ifndef WOLFSSL_LEANPSK
@@ -1547,7 +2224,6 @@
 
     void* nucleus_realloc(void* ptr, unsigned long size, void* heap, int type)
     {
-        STATUS     status;
         DM_HEADER* old_header;
         word32     old_size, copy_size;
         void*      new_mem;
@@ -1589,4 +2265,13 @@
     #include <wolfcrypt/src/port/ti/ti-ccm.c>  /* initialize and Mutex for TI Crypt Engine */
     #include <wolfcrypt/src/port/ti/ti-hash.c> /* md5, sha1, sha224, sha256 */
 #endif
+
+#if defined(WOLFSSL_CRYPTOCELL)
+    #define WOLFSSL_CRYPTOCELL_C
+    #include <wolfcrypt/src/port/arm/cryptoCell.c> /* CC310, RTC and RNG */
+    #if !defined(NO_SHA256)
+        #define WOLFSSL_CRYPTOCELL_HASH_C
+        #include <wolfcrypt/src/port/arm/cryptoCellHash.c> /* sha256 */
+    #endif
+#endif
 
--- a/wolfcrypt/src/wolfevent.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/wolfevent.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wolfevent.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfcrypt/src/wolfmath.c	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfcrypt/src/wolfmath.c	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wolfmath.c
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -29,11 +29,7 @@
 /* in case user set USE_FAST_MATH there */
 #include <wolfssl/wolfcrypt/settings.h>
 
-#ifdef USE_FAST_MATH
-    #include <wolfssl/wolfcrypt/tfm.h>
-#else
-    #include <wolfssl/wolfcrypt/integer.h>
-#endif
+#include <wolfssl/wolfcrypt/integer.h>
 
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/logging.h>
@@ -75,6 +71,7 @@
 #endif
 
 
+#if !defined(WOLFSSL_SP_MATH)
 int get_digit_count(mp_int* a)
 {
     if (a == NULL)
@@ -82,6 +79,7 @@
 
     return a->used;
 }
+#endif
 
 mp_digit get_digit(mp_int* a, int n)
 {
@@ -91,6 +89,46 @@
     return (n >= a->used || n < 0) ? 0 : a->dp[n];
 }
 
+/* Conditionally copy a into b. Performed in constant time.
+ *
+ * a     MP integer to copy.
+ * copy  On 1, copy a into b. on 0 leave b unchanged.
+ * b     MP integer to copy into.
+ * returns BAD_FUNC_ARG when a or b is NULL, MEMORY_E when growing b fails and
+ *         MP_OKAY otherwise.
+ */
+int mp_cond_copy(mp_int* a, int copy, mp_int* b)
+{
+    int err = MP_OKAY;
+    int i;
+    mp_digit mask = (mp_digit)0 - copy;
+
+    if (a == NULL || b == NULL)
+        err = BAD_FUNC_ARG;
+
+    /* Ensure b has enough space to copy a into */
+    if (err == MP_OKAY)
+        err = mp_grow(b, a->used + 1);
+    if (err == MP_OKAY) {
+        /* When mask 0, b is unchanged2
+         * When mask all set, b ^ b ^ a = a
+         */
+        /* Conditionaly copy all digits and then number of used diigits.
+         * get_digit() returns 0 when index greater than available digit.
+         */
+        for (i = 0; i < a->used; i++) {
+            b->dp[i] ^= (get_digit(a, i) ^ get_digit(b, i)) & mask;
+        }
+        for (; i < b->used; i++) {
+            b->dp[i] ^= (get_digit(a, i) ^ get_digit(b, i)) & mask;
+        }
+        b->used ^= (a->used ^ b->used) & (int)mask;
+    }
+
+    return err;
+}
+
+#ifndef WC_NO_RNG
 int get_rand_digit(WC_RNG* rng, mp_digit* d)
 {
     return wc_RNG_GenerateBlock(rng, (byte*)d, sizeof(mp_digit));
@@ -100,55 +138,94 @@
 int mp_rand(mp_int* a, int digits, WC_RNG* rng)
 {
     int ret = 0;
-    DECLARE_VAR(d, mp_digit, 1, rng->heap);
+    int cnt = digits * sizeof(mp_digit);
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+    int i;
+#endif
 
     if (rng == NULL) {
-        ret = MISSING_RNG_E; goto exit;
+        ret = MISSING_RNG_E;
     }
-
-    if (a == NULL
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        || d == NULL
-    #endif
-    ) {
-        ret = BAD_FUNC_ARG; goto exit;
-    }
-
-    mp_zero(a);
-    if (digits <= 0) {
-        ret = MP_OKAY; goto exit;
+    else if (a == NULL) {
+        ret = BAD_FUNC_ARG;
     }
 
-    /* first place a random non-zero digit */
-    do {
-        ret = get_rand_digit(rng, d);
-        if (ret != 0) {
-            goto exit;
-        }
-    } while (*d == 0);
-
-    if ((ret = mp_add_d(a, *d, a)) != MP_OKAY) {
-        goto exit;
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+    /* allocate space for digits */
+    if (ret == MP_OKAY) {
+        ret = mp_set_bit(a, digits * DIGIT_BIT - 1);
+    }
+#else
+#if defined(WOLFSSL_SP_MATH)
+    if ((ret == MP_OKAY) && (digits > SP_INT_DIGITS))
+#else
+    if ((ret == MP_OKAY) && (digits > FP_SIZE))
+#endif
+    {
+        ret = BAD_FUNC_ARG;
+    }
+    if (ret == MP_OKAY) {
+        a->used = digits;
     }
-
-    while (--digits > 0) {
-        if ((ret = mp_lshd(a, 1)) != MP_OKAY) {
-            goto exit;
+#endif
+    /* fill the data with random bytes */
+    if (ret == MP_OKAY) {
+        ret = wc_RNG_GenerateBlock(rng, (byte*)a->dp, cnt);
+    }
+    if (ret == MP_OKAY) {
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+        /* Mask down each digit to only bits used */
+        for (i = 0; i < a->used; i++) {
+            a->dp[i] &= MP_MASK;
         }
-        if ((ret = get_rand_digit(rng, d)) != 0) {
-            goto exit;
-        }
-        if ((ret = mp_add_d(a, *d, a)) != MP_OKAY) {
-            goto exit;
+#endif
+        /* ensure top digit is not zero */
+        while ((ret == MP_OKAY) && (a->dp[a->used - 1] == 0)) {
+            ret = get_rand_digit(rng, &a->dp[a->used - 1]);
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+            a->dp[a->used - 1] &= MP_MASK;
+#endif
         }
     }
 
-exit:
-    FREE_VAR(d, rng->heap);
-
     return ret;
 }
 #endif /* WC_RSA_BLINDING */
+#endif
+
+/* export an mp_int as unsigned char or hex string
+ * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR
+ * return MP_OKAY on success */
+int wc_export_int(mp_int* mp, byte* buf, word32* len, word32 keySz,
+    int encType)
+{
+    int err;
+
+    if (mp == NULL)
+        return BAD_FUNC_ARG;
+
+    /* check buffer size */
+    if (*len < keySz) {
+        *len = keySz;
+        return BUFFER_E;
+    }
+
+    *len = keySz;
+    XMEMSET(buf, 0, *len);
+
+    if (encType == WC_TYPE_HEX_STR) {
+    #ifdef WC_MP_TO_RADIX
+        err = mp_tohex(mp, (char*)buf);
+    #else
+        err = NOT_COMPILED_IN;
+    #endif
+    }
+    else {
+        err = mp_to_unsigned_bin(mp, buf + (keySz - mp_unsigned_bin_size(mp)));
+    }
+
+    return err;
+}
 
 
 #ifdef HAVE_WOLF_BIGINT
@@ -174,9 +251,9 @@
         }
         if (a->buf == NULL) {
             a->buf = (byte*)XMALLOC(sz, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
-        }
-        if (a->buf == NULL) {
-            err = MP_MEM;
+            if (a->buf == NULL) {
+                err = MP_MEM;
+            }
         }
         else {
             XMEMSET(a->buf, 0, sz);
@@ -299,7 +376,6 @@
 
     return err;
 }
-
 #endif /* HAVE_WOLF_BIGINT */
 
 #endif /* USE_FAST_MATH || !NO_BIG_INT */
--- a/wolfssl/callbacks.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/callbacks.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* callbacks.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -31,7 +31,7 @@
 #endif
 
 
-enum { /* CALLBACK CONTSTANTS */
+enum { /* CALLBACK CONSTANTS */
     MAX_PACKETNAME_SZ     =  24,
     MAX_CIPHERNAME_SZ     =  24,
     MAX_TIMEOUT_NAME_SZ   =  24,
@@ -52,12 +52,22 @@
 } HandShakeInfo;
 
 
-typedef struct timeval Timeval;
-
+#if defined(HAVE_SYS_TIME_H) && !defined(NO_TIMEVAL)
+    typedef struct timeval WOLFSSL_TIMEVAL;
+#else /* HAVE_SYS_TIME_H */
+    /* Define the timeval explicitly. */
+    typedef struct {
+        long tv_sec;  /* Seconds. */
+        long tv_usec; /* Microseconds. */
+    } WOLFSSL_TIMEVAL;
+#endif /* HAVE_SYS_TIME_H */
+#if !defined(NO_OLD_TIMEVAL_NAME)
+    #define Timeval WOLFSSL_TIMEVAL
+#endif
 
 typedef struct packetInfo_st {
     char           packetName[MAX_PACKETNAME_SZ + 1]; /* SSL packet name */
-    Timeval        timestamp;                       /* when it occurred    */
+    WOLFSSL_TIMEVAL timestamp;                       /* when it occurred    */
     unsigned char  value[MAX_VALUE_SZ];             /* if fits, it's here */
     unsigned char* bufferValue;                     /* otherwise here (non 0) */
     int            valueSz;                         /* sz of value or buffer */
@@ -69,7 +79,7 @@
     int        flags;                              /* for future use */
     int        numberPackets;                      /* actual # of packets */
     PacketInfo packets[MAX_PACKETS_HANDSHAKE];     /* list of all packets  */
-    Timeval    timeoutValue;                       /* timer that caused it */
+    WOLFSSL_TIMEVAL timeoutValue;                  /* timer that caused it */
 } TimeoutInfo;
 
 
--- a/wolfssl/certs_test.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/certs_test.h	Thu Jun 04 23:57:22 2020 +0000
@@ -8,398 +8,398 @@
 /* ./certs/1024/client-key.der, 1024-bit */
 static const unsigned char client_key_der_1024[] =
 {
-	0x30, 0x82, 0x02, 0x5C, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81, 
-	0x00, 0xBC, 0x73, 0x0E, 0xA8, 0x49, 0xF3, 0x74, 0xA2, 0xA9, 
-	0xEF, 0x18, 0xA5, 0xDA, 0x55, 0x99, 0x21, 0xF9, 0xC8, 0xEC, 
-	0xB3, 0x6D, 0x48, 0xE5, 0x35, 0x35, 0x75, 0x77, 0x37, 0xEC, 
-	0xD1, 0x61, 0x90, 0x5F, 0x3E, 0xD9, 0xE4, 0xD5, 0xDF, 0x94, 
-	0xCA, 0xC1, 0xA9, 0xD7, 0x19, 0xDA, 0x86, 0xC9, 0xE8, 0x4D, 
-	0xC4, 0x61, 0x36, 0x82, 0xFE, 0xAB, 0xAD, 0x7E, 0x77, 0x25, 
-	0xBB, 0x8D, 0x11, 0xA5, 0xBC, 0x62, 0x3A, 0xA8, 0x38, 0xCC, 
-	0x39, 0xA2, 0x04, 0x66, 0xB4, 0xF7, 0xF7, 0xF3, 0xAA, 0xDA, 
-	0x4D, 0x02, 0x0E, 0xBB, 0x5E, 0x8D, 0x69, 0x48, 0xDC, 0x77, 
-	0xC9, 0x28, 0x0E, 0x22, 0xE9, 0x6B, 0xA4, 0x26, 0xBA, 0x4C, 
-	0xE8, 0xC1, 0xFD, 0x4A, 0x6F, 0x2B, 0x1F, 0xEF, 0x8A, 0xAE, 
-	0xF6, 0x90, 0x62, 0xE5, 0x64, 0x1E, 0xEB, 0x2B, 0x3C, 0x67, 
-	0xC8, 0xDC, 0x27, 0x00, 0xF6, 0x91, 0x68, 0x65, 0xA9, 0x02, 
-	0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x13, 0x97, 0xEA, 
-	0xE8, 0x38, 0x78, 0x25, 0xA2, 0x5C, 0x04, 0xCE, 0x0D, 0x40, 
-	0x7C, 0x31, 0xE5, 0xC4, 0x70, 0xCD, 0x9B, 0x82, 0x3B, 0x58, 
-	0x09, 0x86, 0x3B, 0x66, 0x5F, 0xDC, 0x31, 0x90, 0xF1, 0x4F, 
-	0xD5, 0xDB, 0x15, 0xDD, 0xDE, 0xD7, 0x3B, 0x95, 0x93, 0x31, 
-	0x18, 0x31, 0x0E, 0x5E, 0xA3, 0xD6, 0xA2, 0x1A, 0x71, 0x6E, 
-	0x81, 0x48, 0x1C, 0x4B, 0xCF, 0xDB, 0x8E, 0x7A, 0x86, 0x61, 
-	0x32, 0xDC, 0xFB, 0x55, 0xC1, 0x16, 0x6D, 0x27, 0x92, 0x24, 
-	0x45, 0x8B, 0xF1, 0xB8, 0x48, 0xB1, 0x4B, 0x1D, 0xAC, 0xDE, 
-	0xDA, 0xDD, 0x8E, 0x2F, 0xC2, 0x91, 0xFB, 0xA5, 0xA9, 0x6E, 
-	0xF8, 0x3A, 0x6A, 0xF1, 0xFD, 0x50, 0x18, 0xEF, 0x9F, 0xE7, 
-	0xC3, 0xCA, 0x78, 0xEA, 0x56, 0xD3, 0xD3, 0x72, 0x5B, 0x96, 
-	0xDD, 0x4E, 0x06, 0x4E, 0x3A, 0xC3, 0xD9, 0xBE, 0x72, 0xB6, 
-	0x65, 0x07, 0x07, 0x4C, 0x01, 0x02, 0x41, 0x00, 0xFA, 0x47, 
-	0xD4, 0x7A, 0x7C, 0x92, 0x3C, 0x55, 0xEF, 0x81, 0xF0, 0x41, 
-	0x30, 0x2D, 0xA3, 0xCF, 0x8F, 0x1C, 0xE6, 0x87, 0x27, 0x05, 
-	0x70, 0x0D, 0xDF, 0x98, 0x35, 0xD6, 0xF1, 0x8B, 0x38, 0x2F, 
-	0x24, 0xB5, 0xD0, 0x84, 0xB6, 0x79, 0x4F, 0x71, 0x29, 0x94, 
-	0x5A, 0xF0, 0x64, 0x6A, 0xAC, 0xE7, 0x72, 0xC6, 0xED, 0x4D, 
-	0x59, 0x98, 0x3E, 0x67, 0x3A, 0xF3, 0x74, 0x2C, 0xF9, 0x61, 
-	0x17, 0x69, 0x02, 0x41, 0x00, 0xC0, 0xC1, 0x82, 0x0D, 0x0C, 
-	0xEB, 0xC6, 0x2F, 0xDC, 0x92, 0xF9, 0x9D, 0x82, 0x1A, 0x31, 
-	0xE9, 0xE9, 0xF7, 0x4B, 0xF2, 0x82, 0x87, 0x1C, 0xEE, 0x16, 
-	0x6A, 0xD1, 0x1D, 0x18, 0x82, 0x70, 0xF3, 0xC0, 0xB6, 0x2F, 
-	0xF6, 0xF3, 0xF7, 0x1D, 0xF1, 0x86, 0x23, 0xC8, 0x4E, 0xEB, 
-	0x8F, 0x56, 0x8E, 0x8F, 0xF5, 0xBF, 0xF1, 0xF7, 0x2B, 0xB5, 
-	0xCC, 0x3D, 0xC6, 0x57, 0x39, 0x0C, 0x1B, 0x54, 0x41, 0x02, 
-	0x41, 0x00, 0x9D, 0x7E, 0x05, 0xDE, 0xED, 0xF4, 0xB7, 0xB2, 
-	0xFB, 0xFC, 0x30, 0x4B, 0x55, 0x1D, 0xE3, 0x2F, 0x01, 0x47, 
-	0x96, 0x69, 0x05, 0xCD, 0x0E, 0x2E, 0x2C, 0xBD, 0x83, 0x63, 
-	0xB6, 0xAB, 0x7C, 0xB7, 0x6D, 0xCA, 0x5B, 0x64, 0xA7, 0xCE, 
-	0xBE, 0x86, 0xDF, 0x3B, 0x53, 0xDE, 0x61, 0xD2, 0x1E, 0xEB, 
-	0xA5, 0xF6, 0x37, 0xED, 0xAC, 0xAB, 0x78, 0xD9, 0x4C, 0xE7, 
-	0x55, 0xFB, 0xD7, 0x11, 0x99, 0xC1, 0x02, 0x40, 0x18, 0x98, 
-	0x18, 0x29, 0xE6, 0x1E, 0x27, 0x39, 0x70, 0x21, 0x68, 0xAC, 
-	0x0A, 0x2F, 0xA1, 0x72, 0xC1, 0x21, 0x86, 0x95, 0x38, 0xC6, 
-	0x58, 0x90, 0xA0, 0x57, 0x9C, 0xBA, 0xE3, 0xA7, 0xB1, 0x15, 
-	0xC8, 0xDE, 0xF6, 0x1B, 0xC2, 0x61, 0x23, 0x76, 0xEF, 0xB0, 
-	0x9D, 0x1C, 0x44, 0xBE, 0x13, 0x43, 0x39, 0x67, 0x17, 0xC8, 
-	0x9D, 0xCA, 0xFB, 0xF5, 0x45, 0x64, 0x8B, 0x38, 0x82, 0x2C, 
-	0xF2, 0x81, 0x02, 0x40, 0x39, 0x89, 0xE5, 0x9C, 0x19, 0x55, 
-	0x30, 0xBA, 0xB7, 0x48, 0x8C, 0x48, 0x14, 0x0E, 0xF4, 0x9F, 
-	0x7E, 0x77, 0x97, 0x43, 0xE1, 0xB4, 0x19, 0x35, 0x31, 0x23, 
-	0x75, 0x9C, 0x3B, 0x44, 0xAD, 0x69, 0x12, 0x56, 0xEE, 0x00, 
-	0x61, 0x64, 0x16, 0x66, 0xD3, 0x7C, 0x74, 0x2B, 0x15, 0xB4, 
-	0xA2, 0xFE, 0xBF, 0x08, 0x6B, 0x1A, 0x5D, 0x3F, 0x90, 0x12, 
-	0xB1, 0x05, 0x86, 0x31, 0x29, 0xDB, 0xD9, 0xE2
+        0x30, 0x82, 0x02, 0x5C, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81,
+        0x00, 0xBC, 0x73, 0x0E, 0xA8, 0x49, 0xF3, 0x74, 0xA2, 0xA9,
+        0xEF, 0x18, 0xA5, 0xDA, 0x55, 0x99, 0x21, 0xF9, 0xC8, 0xEC,
+        0xB3, 0x6D, 0x48, 0xE5, 0x35, 0x35, 0x75, 0x77, 0x37, 0xEC,
+        0xD1, 0x61, 0x90, 0x5F, 0x3E, 0xD9, 0xE4, 0xD5, 0xDF, 0x94,
+        0xCA, 0xC1, 0xA9, 0xD7, 0x19, 0xDA, 0x86, 0xC9, 0xE8, 0x4D,
+        0xC4, 0x61, 0x36, 0x82, 0xFE, 0xAB, 0xAD, 0x7E, 0x77, 0x25,
+        0xBB, 0x8D, 0x11, 0xA5, 0xBC, 0x62, 0x3A, 0xA8, 0x38, 0xCC,
+        0x39, 0xA2, 0x04, 0x66, 0xB4, 0xF7, 0xF7, 0xF3, 0xAA, 0xDA,
+        0x4D, 0x02, 0x0E, 0xBB, 0x5E, 0x8D, 0x69, 0x48, 0xDC, 0x77,
+        0xC9, 0x28, 0x0E, 0x22, 0xE9, 0x6B, 0xA4, 0x26, 0xBA, 0x4C,
+        0xE8, 0xC1, 0xFD, 0x4A, 0x6F, 0x2B, 0x1F, 0xEF, 0x8A, 0xAE,
+        0xF6, 0x90, 0x62, 0xE5, 0x64, 0x1E, 0xEB, 0x2B, 0x3C, 0x67,
+        0xC8, 0xDC, 0x27, 0x00, 0xF6, 0x91, 0x68, 0x65, 0xA9, 0x02,
+        0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x13, 0x97, 0xEA,
+        0xE8, 0x38, 0x78, 0x25, 0xA2, 0x5C, 0x04, 0xCE, 0x0D, 0x40,
+        0x7C, 0x31, 0xE5, 0xC4, 0x70, 0xCD, 0x9B, 0x82, 0x3B, 0x58,
+        0x09, 0x86, 0x3B, 0x66, 0x5F, 0xDC, 0x31, 0x90, 0xF1, 0x4F,
+        0xD5, 0xDB, 0x15, 0xDD, 0xDE, 0xD7, 0x3B, 0x95, 0x93, 0x31,
+        0x18, 0x31, 0x0E, 0x5E, 0xA3, 0xD6, 0xA2, 0x1A, 0x71, 0x6E,
+        0x81, 0x48, 0x1C, 0x4B, 0xCF, 0xDB, 0x8E, 0x7A, 0x86, 0x61,
+        0x32, 0xDC, 0xFB, 0x55, 0xC1, 0x16, 0x6D, 0x27, 0x92, 0x24,
+        0x45, 0x8B, 0xF1, 0xB8, 0x48, 0xB1, 0x4B, 0x1D, 0xAC, 0xDE,
+        0xDA, 0xDD, 0x8E, 0x2F, 0xC2, 0x91, 0xFB, 0xA5, 0xA9, 0x6E,
+        0xF8, 0x3A, 0x6A, 0xF1, 0xFD, 0x50, 0x18, 0xEF, 0x9F, 0xE7,
+        0xC3, 0xCA, 0x78, 0xEA, 0x56, 0xD3, 0xD3, 0x72, 0x5B, 0x96,
+        0xDD, 0x4E, 0x06, 0x4E, 0x3A, 0xC3, 0xD9, 0xBE, 0x72, 0xB6,
+        0x65, 0x07, 0x07, 0x4C, 0x01, 0x02, 0x41, 0x00, 0xFA, 0x47,
+        0xD4, 0x7A, 0x7C, 0x92, 0x3C, 0x55, 0xEF, 0x81, 0xF0, 0x41,
+        0x30, 0x2D, 0xA3, 0xCF, 0x8F, 0x1C, 0xE6, 0x87, 0x27, 0x05,
+        0x70, 0x0D, 0xDF, 0x98, 0x35, 0xD6, 0xF1, 0x8B, 0x38, 0x2F,
+        0x24, 0xB5, 0xD0, 0x84, 0xB6, 0x79, 0x4F, 0x71, 0x29, 0x94,
+        0x5A, 0xF0, 0x64, 0x6A, 0xAC, 0xE7, 0x72, 0xC6, 0xED, 0x4D,
+        0x59, 0x98, 0x3E, 0x67, 0x3A, 0xF3, 0x74, 0x2C, 0xF9, 0x61,
+        0x17, 0x69, 0x02, 0x41, 0x00, 0xC0, 0xC1, 0x82, 0x0D, 0x0C,
+        0xEB, 0xC6, 0x2F, 0xDC, 0x92, 0xF9, 0x9D, 0x82, 0x1A, 0x31,
+        0xE9, 0xE9, 0xF7, 0x4B, 0xF2, 0x82, 0x87, 0x1C, 0xEE, 0x16,
+        0x6A, 0xD1, 0x1D, 0x18, 0x82, 0x70, 0xF3, 0xC0, 0xB6, 0x2F,
+        0xF6, 0xF3, 0xF7, 0x1D, 0xF1, 0x86, 0x23, 0xC8, 0x4E, 0xEB,
+        0x8F, 0x56, 0x8E, 0x8F, 0xF5, 0xBF, 0xF1, 0xF7, 0x2B, 0xB5,
+        0xCC, 0x3D, 0xC6, 0x57, 0x39, 0x0C, 0x1B, 0x54, 0x41, 0x02,
+        0x41, 0x00, 0x9D, 0x7E, 0x05, 0xDE, 0xED, 0xF4, 0xB7, 0xB2,
+        0xFB, 0xFC, 0x30, 0x4B, 0x55, 0x1D, 0xE3, 0x2F, 0x01, 0x47,
+        0x96, 0x69, 0x05, 0xCD, 0x0E, 0x2E, 0x2C, 0xBD, 0x83, 0x63,
+        0xB6, 0xAB, 0x7C, 0xB7, 0x6D, 0xCA, 0x5B, 0x64, 0xA7, 0xCE,
+        0xBE, 0x86, 0xDF, 0x3B, 0x53, 0xDE, 0x61, 0xD2, 0x1E, 0xEB,
+        0xA5, 0xF6, 0x37, 0xED, 0xAC, 0xAB, 0x78, 0xD9, 0x4C, 0xE7,
+        0x55, 0xFB, 0xD7, 0x11, 0x99, 0xC1, 0x02, 0x40, 0x18, 0x98,
+        0x18, 0x29, 0xE6, 0x1E, 0x27, 0x39, 0x70, 0x21, 0x68, 0xAC,
+        0x0A, 0x2F, 0xA1, 0x72, 0xC1, 0x21, 0x86, 0x95, 0x38, 0xC6,
+        0x58, 0x90, 0xA0, 0x57, 0x9C, 0xBA, 0xE3, 0xA7, 0xB1, 0x15,
+        0xC8, 0xDE, 0xF6, 0x1B, 0xC2, 0x61, 0x23, 0x76, 0xEF, 0xB0,
+        0x9D, 0x1C, 0x44, 0xBE, 0x13, 0x43, 0x39, 0x67, 0x17, 0xC8,
+        0x9D, 0xCA, 0xFB, 0xF5, 0x45, 0x64, 0x8B, 0x38, 0x82, 0x2C,
+        0xF2, 0x81, 0x02, 0x40, 0x39, 0x89, 0xE5, 0x9C, 0x19, 0x55,
+        0x30, 0xBA, 0xB7, 0x48, 0x8C, 0x48, 0x14, 0x0E, 0xF4, 0x9F,
+        0x7E, 0x77, 0x97, 0x43, 0xE1, 0xB4, 0x19, 0x35, 0x31, 0x23,
+        0x75, 0x9C, 0x3B, 0x44, 0xAD, 0x69, 0x12, 0x56, 0xEE, 0x00,
+        0x61, 0x64, 0x16, 0x66, 0xD3, 0x7C, 0x74, 0x2B, 0x15, 0xB4,
+        0xA2, 0xFE, 0xBF, 0x08, 0x6B, 0x1A, 0x5D, 0x3F, 0x90, 0x12,
+        0xB1, 0x05, 0x86, 0x31, 0x29, 0xDB, 0xD9, 0xE2
 };
 static const int sizeof_client_key_der_1024 = sizeof(client_key_der_1024);
 
 /* ./certs/1024/client-keyPub.der, 1024-bit */
 static const unsigned char client_keypub_der_1024[] =
 {
-	0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 
-	0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03, 0x81, 
-	0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 0x81, 0x00, 0xBC, 
-	0x73, 0x0E, 0xA8, 0x49, 0xF3, 0x74, 0xA2, 0xA9, 0xEF, 0x18, 
-	0xA5, 0xDA, 0x55, 0x99, 0x21, 0xF9, 0xC8, 0xEC, 0xB3, 0x6D, 
-	0x48, 0xE5, 0x35, 0x35, 0x75, 0x77, 0x37, 0xEC, 0xD1, 0x61, 
-	0x90, 0x5F, 0x3E, 0xD9, 0xE4, 0xD5, 0xDF, 0x94, 0xCA, 0xC1, 
-	0xA9, 0xD7, 0x19, 0xDA, 0x86, 0xC9, 0xE8, 0x4D, 0xC4, 0x61, 
-	0x36, 0x82, 0xFE, 0xAB, 0xAD, 0x7E, 0x77, 0x25, 0xBB, 0x8D, 
-	0x11, 0xA5, 0xBC, 0x62, 0x3A, 0xA8, 0x38, 0xCC, 0x39, 0xA2, 
-	0x04, 0x66, 0xB4, 0xF7, 0xF7, 0xF3, 0xAA, 0xDA, 0x4D, 0x02, 
-	0x0E, 0xBB, 0x5E, 0x8D, 0x69, 0x48, 0xDC, 0x77, 0xC9, 0x28, 
-	0x0E, 0x22, 0xE9, 0x6B, 0xA4, 0x26, 0xBA, 0x4C, 0xE8, 0xC1, 
-	0xFD, 0x4A, 0x6F, 0x2B, 0x1F, 0xEF, 0x8A, 0xAE, 0xF6, 0x90, 
-	0x62, 0xE5, 0x64, 0x1E, 0xEB, 0x2B, 0x3C, 0x67, 0xC8, 0xDC, 
-	0x27, 0x00, 0xF6, 0x91, 0x68, 0x65, 0xA9, 0x02, 0x03, 0x01, 
-	0x00, 0x01
+        0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48,
+        0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03, 0x81,
+        0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 0x81, 0x00, 0xBC,
+        0x73, 0x0E, 0xA8, 0x49, 0xF3, 0x74, 0xA2, 0xA9, 0xEF, 0x18,
+        0xA5, 0xDA, 0x55, 0x99, 0x21, 0xF9, 0xC8, 0xEC, 0xB3, 0x6D,
+        0x48, 0xE5, 0x35, 0x35, 0x75, 0x77, 0x37, 0xEC, 0xD1, 0x61,
+        0x90, 0x5F, 0x3E, 0xD9, 0xE4, 0xD5, 0xDF, 0x94, 0xCA, 0xC1,
+        0xA9, 0xD7, 0x19, 0xDA, 0x86, 0xC9, 0xE8, 0x4D, 0xC4, 0x61,
+        0x36, 0x82, 0xFE, 0xAB, 0xAD, 0x7E, 0x77, 0x25, 0xBB, 0x8D,
+        0x11, 0xA5, 0xBC, 0x62, 0x3A, 0xA8, 0x38, 0xCC, 0x39, 0xA2,
+        0x04, 0x66, 0xB4, 0xF7, 0xF7, 0xF3, 0xAA, 0xDA, 0x4D, 0x02,
+        0x0E, 0xBB, 0x5E, 0x8D, 0x69, 0x48, 0xDC, 0x77, 0xC9, 0x28,
+        0x0E, 0x22, 0xE9, 0x6B, 0xA4, 0x26, 0xBA, 0x4C, 0xE8, 0xC1,
+        0xFD, 0x4A, 0x6F, 0x2B, 0x1F, 0xEF, 0x8A, 0xAE, 0xF6, 0x90,
+        0x62, 0xE5, 0x64, 0x1E, 0xEB, 0x2B, 0x3C, 0x67, 0xC8, 0xDC,
+        0x27, 0x00, 0xF6, 0x91, 0x68, 0x65, 0xA9, 0x02, 0x03, 0x01,
+        0x00, 0x01
 };
 static const int sizeof_client_keypub_der_1024 = sizeof(client_keypub_der_1024);
 
 /* ./certs/1024/client-cert.der, 1024-bit */
 static const unsigned char client_cert_der_1024[] =
 {
-	0x30, 0x82, 0x03, 0xC5, 0x30, 0x82, 0x03, 0x2E, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xBB, 0xD3, 0x10, 0x03, 
-	0xE6, 0x9D, 0x28, 0x03, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30, 
-	0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 
-	0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 
-	0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 
-	0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 
-	0x31, 0x30, 0x32, 0x34, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 
-	0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 
-	0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x31, 0x30, 0x32, 
-	0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 
-	0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A, 0x17, 0x0D, 0x32, 
-	0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x30, 
-	0x39, 0x5A, 0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 
-	0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 
-	0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 
-	0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 
-	0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 
-	0x53, 0x4C, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x19, 0x30, 
-	0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 
-	0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 
-	0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 
-	0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 
-	0x00, 0x03, 0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 
-	0x81, 0x00, 0xBC, 0x73, 0x0E, 0xA8, 0x49, 0xF3, 0x74, 0xA2, 
-	0xA9, 0xEF, 0x18, 0xA5, 0xDA, 0x55, 0x99, 0x21, 0xF9, 0xC8, 
-	0xEC, 0xB3, 0x6D, 0x48, 0xE5, 0x35, 0x35, 0x75, 0x77, 0x37, 
-	0xEC, 0xD1, 0x61, 0x90, 0x5F, 0x3E, 0xD9, 0xE4, 0xD5, 0xDF, 
-	0x94, 0xCA, 0xC1, 0xA9, 0xD7, 0x19, 0xDA, 0x86, 0xC9, 0xE8, 
-	0x4D, 0xC4, 0x61, 0x36, 0x82, 0xFE, 0xAB, 0xAD, 0x7E, 0x77, 
-	0x25, 0xBB, 0x8D, 0x11, 0xA5, 0xBC, 0x62, 0x3A, 0xA8, 0x38, 
-	0xCC, 0x39, 0xA2, 0x04, 0x66, 0xB4, 0xF7, 0xF7, 0xF3, 0xAA, 
-	0xDA, 0x4D, 0x02, 0x0E, 0xBB, 0x5E, 0x8D, 0x69, 0x48, 0xDC, 
-	0x77, 0xC9, 0x28, 0x0E, 0x22, 0xE9, 0x6B, 0xA4, 0x26, 0xBA, 
-	0x4C, 0xE8, 0xC1, 0xFD, 0x4A, 0x6F, 0x2B, 0x1F, 0xEF, 0x8A, 
-	0xAE, 0xF6, 0x90, 0x62, 0xE5, 0x64, 0x1E, 0xEB, 0x2B, 0x3C, 
-	0x67, 0xC8, 0xDC, 0x27, 0x00, 0xF6, 0x91, 0x68, 0x65, 0xA9, 
-	0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x07, 0x30, 
-	0x82, 0x01, 0x03, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 
-	0x04, 0x16, 0x04, 0x14, 0x81, 0x69, 0x0F, 0xF8, 0xDF, 0xDD, 
-	0xCF, 0x34, 0x29, 0xD5, 0x67, 0x75, 0x71, 0x85, 0xC7, 0x75, 
-	0x10, 0x69, 0x59, 0xEC, 0x30, 0x81, 0xD3, 0x06, 0x03, 0x55, 
-	0x1D, 0x23, 0x04, 0x81, 0xCB, 0x30, 0x81, 0xC8, 0x80, 0x14, 
-	0x81, 0x69, 0x0F, 0xF8, 0xDF, 0xDD, 0xCF, 0x34, 0x29, 0xD5, 
-	0x67, 0x75, 0x71, 0x85, 0xC7, 0x75, 0x10, 0x69, 0x59, 0xEC, 
-	0xA1, 0x81, 0xA4, 0xA4, 0x81, 0xA1, 0x30, 0x81, 0x9E, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 
-	0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 
-	0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 
-	0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 0x31, 0x30, 0x32, 
-	0x34, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 
-	0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 
-	0x69, 0x6E, 0x67, 0x2D, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 
-	0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 
-	0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 
-	0xBB, 0xD3, 0x10, 0x03, 0xE6, 0x9D, 0x28, 0x03, 0x30, 0x0C, 
-	0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 
-	0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x81, 0x81, 
-	0x00, 0x84, 0x99, 0xD9, 0xE5, 0x37, 0xC4, 0x44, 0x7D, 0xCE, 
-	0x29, 0xB8, 0xB6, 0x80, 0x0E, 0xEA, 0xA3, 0xE2, 0xFA, 0xA2, 
-	0x2F, 0x5C, 0xD2, 0x4A, 0x85, 0x67, 0xB9, 0x8B, 0xFA, 0x9F, 
-	0x7D, 0xDA, 0x6D, 0x85, 0x2A, 0xC2, 0x20, 0xF3, 0x18, 0xC8, 
-	0xD4, 0x6B, 0x26, 0xB2, 0x7A, 0x68, 0xE7, 0x82, 0x52, 0x87, 
-	0xE7, 0x0C, 0x5B, 0x08, 0x47, 0x7A, 0x55, 0xA5, 0x0D, 0xFA, 
-	0x72, 0xCE, 0x6B, 0xA1, 0xB2, 0xAE, 0x5A, 0xA1, 0x63, 0xFF, 
-	0x68, 0xDB, 0xE5, 0x49, 0xEF, 0xF1, 0x0E, 0x98, 0x96, 0x09, 
-	0xB5, 0x04, 0x5F, 0xD4, 0x0A, 0x9B, 0x8A, 0xAF, 0xD2, 0x31, 
-	0x1F, 0x95, 0xE5, 0x0F, 0xA8, 0xCD, 0xBB, 0xA1, 0x2D, 0x64, 
-	0xB0, 0xB7, 0xEE, 0x47, 0xA7, 0x58, 0xD9, 0xC7, 0xDB, 0xB0, 
-	0x92, 0xBB, 0xAA, 0xCF, 0xB8, 0x8A, 0x04, 0x5B, 0x0F, 0x9F, 
-	0x3E, 0xE0, 0xD2, 0x42, 0x52, 0xBD, 0x5D, 0xA7, 0x48
+        0x30, 0x82, 0x03, 0xC5, 0x30, 0x82, 0x03, 0x2E, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xBB, 0xD3, 0x10, 0x03,
+        0xE6, 0x9D, 0x28, 0x03, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30,
+        0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F,
+        0x31, 0x30, 0x32, 0x34, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03,
+        0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72,
+        0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x31, 0x30, 0x32,
+        0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33,
+        0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A, 0x17, 0x0D, 0x32,
+        0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x30,
+        0x39, 0x5A, 0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D,
+        0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A,
+        0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03,
+        0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53,
+        0x53, 0x4C, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x19, 0x30,
+        0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72,
+        0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D,
+        0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05,
+        0x00, 0x03, 0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81,
+        0x81, 0x00, 0xBC, 0x73, 0x0E, 0xA8, 0x49, 0xF3, 0x74, 0xA2,
+        0xA9, 0xEF, 0x18, 0xA5, 0xDA, 0x55, 0x99, 0x21, 0xF9, 0xC8,
+        0xEC, 0xB3, 0x6D, 0x48, 0xE5, 0x35, 0x35, 0x75, 0x77, 0x37,
+        0xEC, 0xD1, 0x61, 0x90, 0x5F, 0x3E, 0xD9, 0xE4, 0xD5, 0xDF,
+        0x94, 0xCA, 0xC1, 0xA9, 0xD7, 0x19, 0xDA, 0x86, 0xC9, 0xE8,
+        0x4D, 0xC4, 0x61, 0x36, 0x82, 0xFE, 0xAB, 0xAD, 0x7E, 0x77,
+        0x25, 0xBB, 0x8D, 0x11, 0xA5, 0xBC, 0x62, 0x3A, 0xA8, 0x38,
+        0xCC, 0x39, 0xA2, 0x04, 0x66, 0xB4, 0xF7, 0xF7, 0xF3, 0xAA,
+        0xDA, 0x4D, 0x02, 0x0E, 0xBB, 0x5E, 0x8D, 0x69, 0x48, 0xDC,
+        0x77, 0xC9, 0x28, 0x0E, 0x22, 0xE9, 0x6B, 0xA4, 0x26, 0xBA,
+        0x4C, 0xE8, 0xC1, 0xFD, 0x4A, 0x6F, 0x2B, 0x1F, 0xEF, 0x8A,
+        0xAE, 0xF6, 0x90, 0x62, 0xE5, 0x64, 0x1E, 0xEB, 0x2B, 0x3C,
+        0x67, 0xC8, 0xDC, 0x27, 0x00, 0xF6, 0x91, 0x68, 0x65, 0xA9,
+        0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x07, 0x30,
+        0x82, 0x01, 0x03, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E,
+        0x04, 0x16, 0x04, 0x14, 0x81, 0x69, 0x0F, 0xF8, 0xDF, 0xDD,
+        0xCF, 0x34, 0x29, 0xD5, 0x67, 0x75, 0x71, 0x85, 0xC7, 0x75,
+        0x10, 0x69, 0x59, 0xEC, 0x30, 0x81, 0xD3, 0x06, 0x03, 0x55,
+        0x1D, 0x23, 0x04, 0x81, 0xCB, 0x30, 0x81, 0xC8, 0x80, 0x14,
+        0x81, 0x69, 0x0F, 0xF8, 0xDF, 0xDD, 0xCF, 0x34, 0x29, 0xD5,
+        0x67, 0x75, 0x71, 0x85, 0xC7, 0x75, 0x10, 0x69, 0x59, 0xEC,
+        0xA1, 0x81, 0xA4, 0xA4, 0x81, 0xA1, 0x30, 0x81, 0x9E, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C,
+        0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15,
+        0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77,
+        0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 0x31, 0x30, 0x32,
+        0x34, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 0x55, 0x04, 0x0B,
+        0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D,
+        0x69, 0x6E, 0x67, 0x2D, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77,
+        0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16,
+        0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00,
+        0xBB, 0xD3, 0x10, 0x03, 0xE6, 0x9D, 0x28, 0x03, 0x30, 0x0C,
+        0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01,
+        0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x81, 0x81,
+        0x00, 0x84, 0x99, 0xD9, 0xE5, 0x37, 0xC4, 0x44, 0x7D, 0xCE,
+        0x29, 0xB8, 0xB6, 0x80, 0x0E, 0xEA, 0xA3, 0xE2, 0xFA, 0xA2,
+        0x2F, 0x5C, 0xD2, 0x4A, 0x85, 0x67, 0xB9, 0x8B, 0xFA, 0x9F,
+        0x7D, 0xDA, 0x6D, 0x85, 0x2A, 0xC2, 0x20, 0xF3, 0x18, 0xC8,
+        0xD4, 0x6B, 0x26, 0xB2, 0x7A, 0x68, 0xE7, 0x82, 0x52, 0x87,
+        0xE7, 0x0C, 0x5B, 0x08, 0x47, 0x7A, 0x55, 0xA5, 0x0D, 0xFA,
+        0x72, 0xCE, 0x6B, 0xA1, 0xB2, 0xAE, 0x5A, 0xA1, 0x63, 0xFF,
+        0x68, 0xDB, 0xE5, 0x49, 0xEF, 0xF1, 0x0E, 0x98, 0x96, 0x09,
+        0xB5, 0x04, 0x5F, 0xD4, 0x0A, 0x9B, 0x8A, 0xAF, 0xD2, 0x31,
+        0x1F, 0x95, 0xE5, 0x0F, 0xA8, 0xCD, 0xBB, 0xA1, 0x2D, 0x64,
+        0xB0, 0xB7, 0xEE, 0x47, 0xA7, 0x58, 0xD9, 0xC7, 0xDB, 0xB0,
+        0x92, 0xBB, 0xAA, 0xCF, 0xB8, 0x8A, 0x04, 0x5B, 0x0F, 0x9F,
+        0x3E, 0xE0, 0xD2, 0x42, 0x52, 0xBD, 0x5D, 0xA7, 0x48
 };
 static const int sizeof_client_cert_der_1024 = sizeof(client_cert_der_1024);
 
 /* ./certs/1024/dh1024.der, 1024-bit */
 static const unsigned char dh_key_der_1024[] =
 {
-	0x30, 0x81, 0x87, 0x02, 0x81, 0x81, 0x00, 0xA4, 0xD2, 0xB8, 
-	0x6E, 0x78, 0xF5, 0xD9, 0xED, 0x2D, 0x7C, 0xDD, 0xB6, 0x16, 
-	0x86, 0x5A, 0x4B, 0x05, 0x76, 0x90, 0xDD, 0x66, 0x61, 0xB9, 
-	0x6D, 0x52, 0xA7, 0x1C, 0xAF, 0x62, 0xC6, 0x69, 0x47, 0x7B, 
-	0x39, 0xF2, 0xFB, 0x94, 0xEC, 0xBC, 0x79, 0xFF, 0x24, 0x5E, 
-	0xEF, 0x79, 0xBB, 0x59, 0xB2, 0xFC, 0xCA, 0x07, 0xD6, 0xF4, 
-	0xE9, 0x34, 0xF7, 0xE8, 0x38, 0xE7, 0xD7, 0x33, 0x44, 0x1D, 
-	0xA3, 0x64, 0x76, 0x1A, 0x84, 0x97, 0x54, 0x74, 0x40, 0x84, 
-	0x1F, 0x15, 0xFE, 0x7C, 0x25, 0x2A, 0x2B, 0x25, 0xFD, 0x9E, 
-	0xC1, 0x89, 0x33, 0x8C, 0x39, 0x25, 0x2B, 0x40, 0xE6, 0xCD, 
-	0xF8, 0xA8, 0xA1, 0x8A, 0x53, 0xC6, 0x47, 0xB2, 0xA0, 0xD7, 
-	0x8F, 0xEB, 0x2E, 0x60, 0x0A, 0x0D, 0x4B, 0xF8, 0xB4, 0x94, 
-	0x8C, 0x63, 0x0A, 0xAD, 0xC7, 0x10, 0xEA, 0xC7, 0xA1, 0xB9, 
-	0x9D, 0xF2, 0xA8, 0x37, 0x73, 0x02, 0x01, 0x02
+        0x30, 0x81, 0x87, 0x02, 0x81, 0x81, 0x00, 0xA4, 0xD2, 0xB8,
+        0x6E, 0x78, 0xF5, 0xD9, 0xED, 0x2D, 0x7C, 0xDD, 0xB6, 0x16,
+        0x86, 0x5A, 0x4B, 0x05, 0x76, 0x90, 0xDD, 0x66, 0x61, 0xB9,
+        0x6D, 0x52, 0xA7, 0x1C, 0xAF, 0x62, 0xC6, 0x69, 0x47, 0x7B,
+        0x39, 0xF2, 0xFB, 0x94, 0xEC, 0xBC, 0x79, 0xFF, 0x24, 0x5E,
+        0xEF, 0x79, 0xBB, 0x59, 0xB2, 0xFC, 0xCA, 0x07, 0xD6, 0xF4,
+        0xE9, 0x34, 0xF7, 0xE8, 0x38, 0xE7, 0xD7, 0x33, 0x44, 0x1D,
+        0xA3, 0x64, 0x76, 0x1A, 0x84, 0x97, 0x54, 0x74, 0x40, 0x84,
+        0x1F, 0x15, 0xFE, 0x7C, 0x25, 0x2A, 0x2B, 0x25, 0xFD, 0x9E,
+        0xC1, 0x89, 0x33, 0x8C, 0x39, 0x25, 0x2B, 0x40, 0xE6, 0xCD,
+        0xF8, 0xA8, 0xA1, 0x8A, 0x53, 0xC6, 0x47, 0xB2, 0xA0, 0xD7,
+        0x8F, 0xEB, 0x2E, 0x60, 0x0A, 0x0D, 0x4B, 0xF8, 0xB4, 0x94,
+        0x8C, 0x63, 0x0A, 0xAD, 0xC7, 0x10, 0xEA, 0xC7, 0xA1, 0xB9,
+        0x9D, 0xF2, 0xA8, 0x37, 0x73, 0x02, 0x01, 0x02
 };
 static const int sizeof_dh_key_der_1024 = sizeof(dh_key_der_1024);
 
 /* ./certs/1024/dsa1024.der, 1024-bit */
 static const unsigned char dsa_key_der_1024[] =
 {
-	0x30, 0x82, 0x01, 0xBC, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81, 
-	0x00, 0xF7, 0x4B, 0xF9, 0xBB, 0x15, 0x98, 0xEB, 0xDD, 0xDE, 
-	0x1E, 0x4E, 0x71, 0x88, 0x85, 0xF2, 0xB7, 0xBA, 0xE2, 0x4A, 
-	0xDA, 0x76, 0x40, 0xCD, 0x69, 0x48, 0x9E, 0x83, 0x7C, 0x11, 
-	0xF7, 0x65, 0x31, 0x78, 0xF5, 0x25, 0x2D, 0xF7, 0xB7, 0xF8, 
-	0x52, 0x3F, 0xBE, 0xD8, 0xB6, 0xC5, 0xFE, 0x18, 0x15, 0x5B, 
-	0xB9, 0xD5, 0x92, 0x86, 0xBC, 0xB2, 0x17, 0x7C, 0xD8, 0xB0, 
-	0xBE, 0xA0, 0x7C, 0xF2, 0xD5, 0x73, 0x7A, 0x58, 0x8F, 0x8D, 
-	0xE5, 0x4A, 0x00, 0x99, 0x83, 0x4A, 0xC0, 0x9E, 0x16, 0x09, 
-	0xA1, 0x10, 0x34, 0xD5, 0x19, 0xBB, 0x63, 0xE3, 0xDD, 0x83, 
-	0x74, 0x7F, 0x10, 0xCA, 0x73, 0x75, 0xEE, 0x31, 0x4A, 0xDD, 
-	0x9F, 0xE0, 0x02, 0x6A, 0x9D, 0xEE, 0xB2, 0x4B, 0xA7, 0x6B, 
-	0x2A, 0x6C, 0xC7, 0x86, 0x77, 0xE8, 0x04, 0x15, 0xDC, 0x92, 
-	0xB4, 0x7A, 0x29, 0x1F, 0x4E, 0x83, 0x63, 0x85, 0x55, 0x02, 
-	0x15, 0x00, 0xD2, 0x05, 0xE4, 0x73, 0xFB, 0xC1, 0x99, 0xC5, 
-	0xDC, 0x68, 0xA4, 0x8D, 0x92, 0x27, 0x3D, 0xE2, 0x52, 0x5F, 
-	0x89, 0x8B, 0x02, 0x81, 0x81, 0x00, 0xAA, 0x21, 0x02, 0x09, 
-	0x43, 0x6E, 0xFB, 0xA2, 0x54, 0x14, 0x85, 0x0A, 0xF4, 0x28, 
-	0x7C, 0xCB, 0xCC, 0xDB, 0xF5, 0x1E, 0xA2, 0x18, 0xA9, 0x21, 
-	0xDE, 0x88, 0x88, 0x33, 0x8C, 0x2E, 0xEB, 0x8D, 0xA3, 0xF0, 
-	0x1D, 0xC8, 0x8F, 0xF6, 0x7E, 0xF8, 0xCF, 0x12, 0xF5, 0xB4, 
-	0xA1, 0x11, 0x6F, 0x0C, 0xD4, 0xF0, 0x06, 0xAD, 0xC4, 0xFC, 
-	0x14, 0x45, 0xC7, 0x94, 0x15, 0xBC, 0x19, 0x4B, 0xAE, 0xEF, 
-	0x93, 0x6A, 0x4F, 0xCC, 0x14, 0xD8, 0x47, 0x8B, 0x39, 0x66, 
-	0x87, 0x02, 0xD4, 0x28, 0x0A, 0xB8, 0xEE, 0x09, 0x37, 0xF4, 
-	0x00, 0xA0, 0x04, 0xA7, 0x79, 0xA7, 0xD2, 0x3C, 0xF7, 0x34, 
-	0x43, 0x56, 0x8E, 0xD0, 0x7C, 0xC2, 0xD8, 0x4D, 0x0F, 0x89, 
-	0xED, 0x14, 0xC1, 0x2C, 0x9C, 0x4C, 0x19, 0x9B, 0x9E, 0xDC, 
-	0x53, 0x09, 0x9F, 0xDF, 0x2D, 0xF0, 0x0C, 0x27, 0x54, 0x3A, 
-	0x77, 0x14, 0x2D, 0xDE, 0x02, 0x81, 0x81, 0x00, 0xE8, 0x1F, 
-	0x7C, 0xB7, 0xC0, 0x54, 0x51, 0xA7, 0x28, 0x2D, 0x58, 0x7C, 
-	0xDE, 0xD4, 0x5C, 0xDD, 0xD5, 0x76, 0x84, 0x3C, 0x36, 0x20, 
-	0xC0, 0xC3, 0x25, 0xD7, 0x3A, 0x38, 0xE1, 0x54, 0xC8, 0xFD, 
-	0x40, 0x68, 0x1A, 0x21, 0x54, 0x26, 0x39, 0x14, 0xBF, 0xF6, 
-	0xA3, 0x9C, 0x5E, 0xD9, 0x2B, 0xF7, 0xC9, 0x25, 0xBA, 0x00, 
-	0x09, 0xCB, 0x7F, 0x0C, 0x4A, 0x24, 0xFD, 0x15, 0x16, 0x15, 
-	0x48, 0xCD, 0x0B, 0x52, 0x44, 0x40, 0x7B, 0x90, 0x63, 0x2B, 
-	0x90, 0x22, 0xC5, 0x18, 0x05, 0x80, 0x53, 0xAF, 0x83, 0x1F, 
-	0x54, 0xE2, 0xB0, 0xA2, 0x0B, 0x5A, 0x92, 0x24, 0xE1, 0x62, 
-	0x28, 0x3F, 0xB7, 0xCA, 0xB9, 0x89, 0xD6, 0xA0, 0xB7, 0xAD, 
-	0xAE, 0x05, 0xE1, 0xC1, 0x59, 0x40, 0xED, 0x4A, 0x1B, 0x68, 
-	0xA7, 0x7B, 0xFB, 0xC3, 0x20, 0x81, 0xEF, 0x4B, 0xF3, 0x69, 
-	0x91, 0xB0, 0xCE, 0x3A, 0xB0, 0x38, 0x02, 0x14, 0x25, 0x38, 
-	0x3B, 0xA1, 0x19, 0x75, 0xDF, 0x9B, 0xF5, 0x72, 0x53, 0x4F, 
-	0x39, 0xE1, 0x1C, 0xEC, 0x13, 0x84, 0x82, 0x18
+        0x30, 0x82, 0x01, 0xBC, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81,
+        0x00, 0xF7, 0x4B, 0xF9, 0xBB, 0x15, 0x98, 0xEB, 0xDD, 0xDE,
+        0x1E, 0x4E, 0x71, 0x88, 0x85, 0xF2, 0xB7, 0xBA, 0xE2, 0x4A,
+        0xDA, 0x76, 0x40, 0xCD, 0x69, 0x48, 0x9E, 0x83, 0x7C, 0x11,
+        0xF7, 0x65, 0x31, 0x78, 0xF5, 0x25, 0x2D, 0xF7, 0xB7, 0xF8,
+        0x52, 0x3F, 0xBE, 0xD8, 0xB6, 0xC5, 0xFE, 0x18, 0x15, 0x5B,
+        0xB9, 0xD5, 0x92, 0x86, 0xBC, 0xB2, 0x17, 0x7C, 0xD8, 0xB0,
+        0xBE, 0xA0, 0x7C, 0xF2, 0xD5, 0x73, 0x7A, 0x58, 0x8F, 0x8D,
+        0xE5, 0x4A, 0x00, 0x99, 0x83, 0x4A, 0xC0, 0x9E, 0x16, 0x09,
+        0xA1, 0x10, 0x34, 0xD5, 0x19, 0xBB, 0x63, 0xE3, 0xDD, 0x83,
+        0x74, 0x7F, 0x10, 0xCA, 0x73, 0x75, 0xEE, 0x31, 0x4A, 0xDD,
+        0x9F, 0xE0, 0x02, 0x6A, 0x9D, 0xEE, 0xB2, 0x4B, 0xA7, 0x6B,
+        0x2A, 0x6C, 0xC7, 0x86, 0x77, 0xE8, 0x04, 0x15, 0xDC, 0x92,
+        0xB4, 0x7A, 0x29, 0x1F, 0x4E, 0x83, 0x63, 0x85, 0x55, 0x02,
+        0x15, 0x00, 0xD2, 0x05, 0xE4, 0x73, 0xFB, 0xC1, 0x99, 0xC5,
+        0xDC, 0x68, 0xA4, 0x8D, 0x92, 0x27, 0x3D, 0xE2, 0x52, 0x5F,
+        0x89, 0x8B, 0x02, 0x81, 0x81, 0x00, 0xAA, 0x21, 0x02, 0x09,
+        0x43, 0x6E, 0xFB, 0xA2, 0x54, 0x14, 0x85, 0x0A, 0xF4, 0x28,
+        0x7C, 0xCB, 0xCC, 0xDB, 0xF5, 0x1E, 0xA2, 0x18, 0xA9, 0x21,
+        0xDE, 0x88, 0x88, 0x33, 0x8C, 0x2E, 0xEB, 0x8D, 0xA3, 0xF0,
+        0x1D, 0xC8, 0x8F, 0xF6, 0x7E, 0xF8, 0xCF, 0x12, 0xF5, 0xB4,
+        0xA1, 0x11, 0x6F, 0x0C, 0xD4, 0xF0, 0x06, 0xAD, 0xC4, 0xFC,
+        0x14, 0x45, 0xC7, 0x94, 0x15, 0xBC, 0x19, 0x4B, 0xAE, 0xEF,
+        0x93, 0x6A, 0x4F, 0xCC, 0x14, 0xD8, 0x47, 0x8B, 0x39, 0x66,
+        0x87, 0x02, 0xD4, 0x28, 0x0A, 0xB8, 0xEE, 0x09, 0x37, 0xF4,
+        0x00, 0xA0, 0x04, 0xA7, 0x79, 0xA7, 0xD2, 0x3C, 0xF7, 0x34,
+        0x43, 0x56, 0x8E, 0xD0, 0x7C, 0xC2, 0xD8, 0x4D, 0x0F, 0x89,
+        0xED, 0x14, 0xC1, 0x2C, 0x9C, 0x4C, 0x19, 0x9B, 0x9E, 0xDC,
+        0x53, 0x09, 0x9F, 0xDF, 0x2D, 0xF0, 0x0C, 0x27, 0x54, 0x3A,
+        0x77, 0x14, 0x2D, 0xDE, 0x02, 0x81, 0x81, 0x00, 0xE8, 0x1F,
+        0x7C, 0xB7, 0xC0, 0x54, 0x51, 0xA7, 0x28, 0x2D, 0x58, 0x7C,
+        0xDE, 0xD4, 0x5C, 0xDD, 0xD5, 0x76, 0x84, 0x3C, 0x36, 0x20,
+        0xC0, 0xC3, 0x25, 0xD7, 0x3A, 0x38, 0xE1, 0x54, 0xC8, 0xFD,
+        0x40, 0x68, 0x1A, 0x21, 0x54, 0x26, 0x39, 0x14, 0xBF, 0xF6,
+        0xA3, 0x9C, 0x5E, 0xD9, 0x2B, 0xF7, 0xC9, 0x25, 0xBA, 0x00,
+        0x09, 0xCB, 0x7F, 0x0C, 0x4A, 0x24, 0xFD, 0x15, 0x16, 0x15,
+        0x48, 0xCD, 0x0B, 0x52, 0x44, 0x40, 0x7B, 0x90, 0x63, 0x2B,
+        0x90, 0x22, 0xC5, 0x18, 0x05, 0x80, 0x53, 0xAF, 0x83, 0x1F,
+        0x54, 0xE2, 0xB0, 0xA2, 0x0B, 0x5A, 0x92, 0x24, 0xE1, 0x62,
+        0x28, 0x3F, 0xB7, 0xCA, 0xB9, 0x89, 0xD6, 0xA0, 0xB7, 0xAD,
+        0xAE, 0x05, 0xE1, 0xC1, 0x59, 0x40, 0xED, 0x4A, 0x1B, 0x68,
+        0xA7, 0x7B, 0xFB, 0xC3, 0x20, 0x81, 0xEF, 0x4B, 0xF3, 0x69,
+        0x91, 0xB0, 0xCE, 0x3A, 0xB0, 0x38, 0x02, 0x14, 0x25, 0x38,
+        0x3B, 0xA1, 0x19, 0x75, 0xDF, 0x9B, 0xF5, 0x72, 0x53, 0x4F,
+        0x39, 0xE1, 0x1C, 0xEC, 0x13, 0x84, 0x82, 0x18
 };
 static const int sizeof_dsa_key_der_1024 = sizeof(dsa_key_der_1024);
 
 /* ./certs/1024/rsa1024.der, 1024-bit */
 static const unsigned char rsa_key_der_1024[] =
 {
-	0x30, 0x82, 0x02, 0x5D, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81, 
-	0x00, 0xBE, 0x70, 0x70, 0xB8, 0x04, 0x18, 0xE5, 0x28, 0xFE, 
-	0x66, 0xD8, 0x90, 0x88, 0xE0, 0xF1, 0xB7, 0xC3, 0xD0, 0xD2, 
-	0x3E, 0xE6, 0x4B, 0x94, 0x74, 0xB0, 0xFF, 0xB0, 0xF7, 0x63, 
-	0xA5, 0xAB, 0x7E, 0xAF, 0xB6, 0x2B, 0xB7, 0x38, 0x16, 0x1A, 
-	0x50, 0xBF, 0xF1, 0xCA, 0x87, 0x3A, 0xD5, 0xB0, 0xDA, 0xF8, 
-	0x43, 0x7A, 0x15, 0xB9, 0x7E, 0xEA, 0x2A, 0x80, 0xD2, 0x51, 
-	0xB0, 0x35, 0xAF, 0x07, 0xF3, 0xF2, 0x5D, 0x24, 0x3A, 0x4B, 
-	0x87, 0x56, 0x48, 0x1B, 0x3C, 0x24, 0x9A, 0xDA, 0x70, 0x80, 
-	0xBD, 0x3C, 0x8B, 0x03, 0x4A, 0x0C, 0x83, 0x71, 0xDE, 0xE3, 
-	0x03, 0x70, 0xA2, 0xB7, 0x60, 0x09, 0x1B, 0x5E, 0xC7, 0x3D, 
-	0xA0, 0x64, 0x60, 0xE3, 0xA9, 0x06, 0x8D, 0xD3, 0xFF, 0x42, 
-	0xBB, 0x0A, 0x94, 0x27, 0x2D, 0x57, 0x42, 0x0D, 0xB0, 0x2D, 
-	0xE0, 0xBA, 0x18, 0x25, 0x60, 0x92, 0x11, 0x92, 0xF3, 0x02, 
-	0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x0E, 0xEE, 0x1D, 
-	0xC8, 0x2F, 0x7A, 0x0C, 0x2D, 0x44, 0x94, 0xA7, 0x91, 0xDD, 
-	0x49, 0x55, 0x6A, 0x04, 0xCE, 0x10, 0x4D, 0xA2, 0x1C, 0x76, 
-	0xCD, 0x17, 0x3B, 0x54, 0x92, 0x70, 0x9B, 0x82, 0x70, 0x72, 
-	0x32, 0x24, 0x07, 0x3F, 0x3C, 0x6C, 0x5F, 0xBC, 0x4C, 0xA6, 
-	0x86, 0x27, 0x94, 0xAD, 0x42, 0xDD, 0x87, 0xDC, 0xC0, 0x6B, 
-	0x44, 0x89, 0xF3, 0x3F, 0x1A, 0x3E, 0x11, 0x44, 0x84, 0x2E, 
-	0x69, 0x4C, 0xBB, 0x4A, 0x71, 0x1A, 0xBB, 0x9A, 0x52, 0x3C, 
-	0x6B, 0xDE, 0xBC, 0xB2, 0x7C, 0x51, 0xEF, 0x4F, 0x8F, 0x3A, 
-	0xDC, 0x50, 0x04, 0x4E, 0xB6, 0x31, 0x66, 0xA8, 0x8E, 0x06, 
-	0x3B, 0x51, 0xA9, 0xC1, 0x8A, 0xCB, 0xC4, 0x81, 0xCA, 0x2D, 
-	0x69, 0xEC, 0x88, 0xFC, 0x33, 0x88, 0xD1, 0xD4, 0x29, 0x47, 
-	0x87, 0x37, 0xF9, 0x6A, 0x22, 0x69, 0xB9, 0xC9, 0xFE, 0xEB, 
-	0x8C, 0xC5, 0x21, 0x41, 0x71, 0x02, 0x41, 0x00, 0xFD, 0x17, 
-	0x98, 0x42, 0x54, 0x1C, 0x23, 0xF8, 0xD7, 0x5D, 0xEF, 0x49, 
-	0x4F, 0xAF, 0xD9, 0x35, 0x6F, 0x08, 0xC6, 0xC7, 0x40, 0x5C, 
-	0x7E, 0x58, 0x86, 0xC2, 0xB2, 0x16, 0x39, 0x24, 0xC5, 0x06, 
-	0xB0, 0x3D, 0xAF, 0x02, 0xD2, 0x87, 0x77, 0xD2, 0x76, 0xBA, 
-	0xE3, 0x59, 0x60, 0x42, 0xF1, 0x16, 0xEF, 0x33, 0x0B, 0xF2, 
-	0x0B, 0xBA, 0x99, 0xCC, 0xB6, 0x4C, 0x46, 0x3F, 0x33, 0xE4, 
-	0xD4, 0x67, 0x02, 0x41, 0x00, 0xC0, 0xA0, 0x91, 0x6D, 0xFE, 
-	0x28, 0xE0, 0x81, 0x5A, 0x15, 0xA7, 0xC9, 0xA8, 0x98, 0xC6, 
-	0x0A, 0xAB, 0x00, 0xC5, 0x40, 0xC9, 0x21, 0xBB, 0xB2, 0x33, 
-	0x5A, 0xA7, 0xCB, 0x6E, 0xB8, 0x08, 0x56, 0x4A, 0x76, 0x28, 
-	0xE8, 0x6D, 0xBD, 0xF5, 0x26, 0x7B, 0xBF, 0xC5, 0x46, 0x45, 
-	0x0D, 0xEC, 0x7D, 0xEE, 0x82, 0xD6, 0xCA, 0x5F, 0x3D, 0x6E, 
-	0xCC, 0x94, 0x73, 0xCD, 0xCE, 0x86, 0x6E, 0x95, 0x95, 0x02, 
-	0x40, 0x38, 0xFD, 0x28, 0x1E, 0xBF, 0x5B, 0xBA, 0xC9, 0xDC, 
-	0x8C, 0xDD, 0x45, 0xAF, 0xB8, 0xD3, 0xFB, 0x11, 0x2E, 0x73, 
-	0xBC, 0x08, 0x05, 0x0B, 0xBA, 0x19, 0x56, 0x1B, 0xCD, 0x9F, 
-	0x3E, 0x65, 0x53, 0x15, 0x3A, 0x3E, 0x7F, 0x2F, 0x32, 0xAB, 
-	0xCB, 0x6B, 0x4A, 0xB7, 0xC8, 0xB7, 0x41, 0x3B, 0x92, 0x43, 
-	0x78, 0x46, 0x17, 0x51, 0x86, 0xC9, 0xFC, 0xEB, 0x8B, 0x8F, 
-	0x41, 0xCA, 0x08, 0x9B, 0xBF, 0x02, 0x41, 0x00, 0xAD, 0x9B, 
-	0x89, 0xB6, 0xF2, 0x8C, 0x70, 0xDA, 0xE4, 0x10, 0x04, 0x6B, 
-	0x11, 0x92, 0xAF, 0x5A, 0xCA, 0x08, 0x25, 0xBF, 0x60, 0x07, 
-	0x11, 0x1D, 0x68, 0x7F, 0x5A, 0x1F, 0x55, 0x28, 0x74, 0x0B, 
-	0x21, 0x8D, 0x21, 0x0D, 0x6A, 0x6A, 0xFB, 0xD9, 0xB5, 0x4A, 
-	0x7F, 0x47, 0xF7, 0xD0, 0xB6, 0xC6, 0x41, 0x02, 0x97, 0x07, 
-	0x49, 0x93, 0x1A, 0x9B, 0x33, 0x68, 0xB3, 0xA2, 0x61, 0x32, 
-	0xA5, 0x89, 0x02, 0x41, 0x00, 0x8F, 0xEF, 0xAD, 0xB5, 0xB0, 
-	0xB0, 0x7E, 0x86, 0x03, 0x43, 0x93, 0x6E, 0xDD, 0x3C, 0x2D, 
-	0x9B, 0x6A, 0x55, 0xFF, 0x6F, 0x3E, 0x70, 0x2A, 0xD4, 0xBF, 
-	0x1F, 0x8C, 0x93, 0x60, 0x9E, 0x6D, 0x2F, 0x18, 0x6C, 0x11, 
-	0x36, 0x98, 0x3F, 0x10, 0x78, 0xE8, 0x3E, 0x8F, 0xFE, 0x55, 
-	0xB9, 0x9E, 0xD5, 0x5B, 0x2E, 0x87, 0x1C, 0x58, 0xD0, 0x37, 
-	0x89, 0x96, 0xEC, 0x48, 0x54, 0xF5, 0x9F, 0x0F, 0xB3
+        0x30, 0x82, 0x02, 0x5D, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81,
+        0x00, 0xBE, 0x70, 0x70, 0xB8, 0x04, 0x18, 0xE5, 0x28, 0xFE,
+        0x66, 0xD8, 0x90, 0x88, 0xE0, 0xF1, 0xB7, 0xC3, 0xD0, 0xD2,
+        0x3E, 0xE6, 0x4B, 0x94, 0x74, 0xB0, 0xFF, 0xB0, 0xF7, 0x63,
+        0xA5, 0xAB, 0x7E, 0xAF, 0xB6, 0x2B, 0xB7, 0x38, 0x16, 0x1A,
+        0x50, 0xBF, 0xF1, 0xCA, 0x87, 0x3A, 0xD5, 0xB0, 0xDA, 0xF8,
+        0x43, 0x7A, 0x15, 0xB9, 0x7E, 0xEA, 0x2A, 0x80, 0xD2, 0x51,
+        0xB0, 0x35, 0xAF, 0x07, 0xF3, 0xF2, 0x5D, 0x24, 0x3A, 0x4B,
+        0x87, 0x56, 0x48, 0x1B, 0x3C, 0x24, 0x9A, 0xDA, 0x70, 0x80,
+        0xBD, 0x3C, 0x8B, 0x03, 0x4A, 0x0C, 0x83, 0x71, 0xDE, 0xE3,
+        0x03, 0x70, 0xA2, 0xB7, 0x60, 0x09, 0x1B, 0x5E, 0xC7, 0x3D,
+        0xA0, 0x64, 0x60, 0xE3, 0xA9, 0x06, 0x8D, 0xD3, 0xFF, 0x42,
+        0xBB, 0x0A, 0x94, 0x27, 0x2D, 0x57, 0x42, 0x0D, 0xB0, 0x2D,
+        0xE0, 0xBA, 0x18, 0x25, 0x60, 0x92, 0x11, 0x92, 0xF3, 0x02,
+        0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x0E, 0xEE, 0x1D,
+        0xC8, 0x2F, 0x7A, 0x0C, 0x2D, 0x44, 0x94, 0xA7, 0x91, 0xDD,
+        0x49, 0x55, 0x6A, 0x04, 0xCE, 0x10, 0x4D, 0xA2, 0x1C, 0x76,
+        0xCD, 0x17, 0x3B, 0x54, 0x92, 0x70, 0x9B, 0x82, 0x70, 0x72,
+        0x32, 0x24, 0x07, 0x3F, 0x3C, 0x6C, 0x5F, 0xBC, 0x4C, 0xA6,
+        0x86, 0x27, 0x94, 0xAD, 0x42, 0xDD, 0x87, 0xDC, 0xC0, 0x6B,
+        0x44, 0x89, 0xF3, 0x3F, 0x1A, 0x3E, 0x11, 0x44, 0x84, 0x2E,
+        0x69, 0x4C, 0xBB, 0x4A, 0x71, 0x1A, 0xBB, 0x9A, 0x52, 0x3C,
+        0x6B, 0xDE, 0xBC, 0xB2, 0x7C, 0x51, 0xEF, 0x4F, 0x8F, 0x3A,
+        0xDC, 0x50, 0x04, 0x4E, 0xB6, 0x31, 0x66, 0xA8, 0x8E, 0x06,
+        0x3B, 0x51, 0xA9, 0xC1, 0x8A, 0xCB, 0xC4, 0x81, 0xCA, 0x2D,
+        0x69, 0xEC, 0x88, 0xFC, 0x33, 0x88, 0xD1, 0xD4, 0x29, 0x47,
+        0x87, 0x37, 0xF9, 0x6A, 0x22, 0x69, 0xB9, 0xC9, 0xFE, 0xEB,
+        0x8C, 0xC5, 0x21, 0x41, 0x71, 0x02, 0x41, 0x00, 0xFD, 0x17,
+        0x98, 0x42, 0x54, 0x1C, 0x23, 0xF8, 0xD7, 0x5D, 0xEF, 0x49,
+        0x4F, 0xAF, 0xD9, 0x35, 0x6F, 0x08, 0xC6, 0xC7, 0x40, 0x5C,
+        0x7E, 0x58, 0x86, 0xC2, 0xB2, 0x16, 0x39, 0x24, 0xC5, 0x06,
+        0xB0, 0x3D, 0xAF, 0x02, 0xD2, 0x87, 0x77, 0xD2, 0x76, 0xBA,
+        0xE3, 0x59, 0x60, 0x42, 0xF1, 0x16, 0xEF, 0x33, 0x0B, 0xF2,
+        0x0B, 0xBA, 0x99, 0xCC, 0xB6, 0x4C, 0x46, 0x3F, 0x33, 0xE4,
+        0xD4, 0x67, 0x02, 0x41, 0x00, 0xC0, 0xA0, 0x91, 0x6D, 0xFE,
+        0x28, 0xE0, 0x81, 0x5A, 0x15, 0xA7, 0xC9, 0xA8, 0x98, 0xC6,
+        0x0A, 0xAB, 0x00, 0xC5, 0x40, 0xC9, 0x21, 0xBB, 0xB2, 0x33,
+        0x5A, 0xA7, 0xCB, 0x6E, 0xB8, 0x08, 0x56, 0x4A, 0x76, 0x28,
+        0xE8, 0x6D, 0xBD, 0xF5, 0x26, 0x7B, 0xBF, 0xC5, 0x46, 0x45,
+        0x0D, 0xEC, 0x7D, 0xEE, 0x82, 0xD6, 0xCA, 0x5F, 0x3D, 0x6E,
+        0xCC, 0x94, 0x73, 0xCD, 0xCE, 0x86, 0x6E, 0x95, 0x95, 0x02,
+        0x40, 0x38, 0xFD, 0x28, 0x1E, 0xBF, 0x5B, 0xBA, 0xC9, 0xDC,
+        0x8C, 0xDD, 0x45, 0xAF, 0xB8, 0xD3, 0xFB, 0x11, 0x2E, 0x73,
+        0xBC, 0x08, 0x05, 0x0B, 0xBA, 0x19, 0x56, 0x1B, 0xCD, 0x9F,
+        0x3E, 0x65, 0x53, 0x15, 0x3A, 0x3E, 0x7F, 0x2F, 0x32, 0xAB,
+        0xCB, 0x6B, 0x4A, 0xB7, 0xC8, 0xB7, 0x41, 0x3B, 0x92, 0x43,
+        0x78, 0x46, 0x17, 0x51, 0x86, 0xC9, 0xFC, 0xEB, 0x8B, 0x8F,
+        0x41, 0xCA, 0x08, 0x9B, 0xBF, 0x02, 0x41, 0x00, 0xAD, 0x9B,
+        0x89, 0xB6, 0xF2, 0x8C, 0x70, 0xDA, 0xE4, 0x10, 0x04, 0x6B,
+        0x11, 0x92, 0xAF, 0x5A, 0xCA, 0x08, 0x25, 0xBF, 0x60, 0x07,
+        0x11, 0x1D, 0x68, 0x7F, 0x5A, 0x1F, 0x55, 0x28, 0x74, 0x0B,
+        0x21, 0x8D, 0x21, 0x0D, 0x6A, 0x6A, 0xFB, 0xD9, 0xB5, 0x4A,
+        0x7F, 0x47, 0xF7, 0xD0, 0xB6, 0xC6, 0x41, 0x02, 0x97, 0x07,
+        0x49, 0x93, 0x1A, 0x9B, 0x33, 0x68, 0xB3, 0xA2, 0x61, 0x32,
+        0xA5, 0x89, 0x02, 0x41, 0x00, 0x8F, 0xEF, 0xAD, 0xB5, 0xB0,
+        0xB0, 0x7E, 0x86, 0x03, 0x43, 0x93, 0x6E, 0xDD, 0x3C, 0x2D,
+        0x9B, 0x6A, 0x55, 0xFF, 0x6F, 0x3E, 0x70, 0x2A, 0xD4, 0xBF,
+        0x1F, 0x8C, 0x93, 0x60, 0x9E, 0x6D, 0x2F, 0x18, 0x6C, 0x11,
+        0x36, 0x98, 0x3F, 0x10, 0x78, 0xE8, 0x3E, 0x8F, 0xFE, 0x55,
+        0xB9, 0x9E, 0xD5, 0x5B, 0x2E, 0x87, 0x1C, 0x58, 0xD0, 0x37,
+        0x89, 0x96, 0xEC, 0x48, 0x54, 0xF5, 0x9F, 0x0F, 0xB3
 };
 static const int sizeof_rsa_key_der_1024 = sizeof(rsa_key_der_1024);
 
 /* ./certs/1024/ca-key.der, 1024-bit */
 static const unsigned char ca_key_der_1024[] =
 {
-	0x30, 0x82, 0x02, 0x5E, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81, 
-	0x00, 0xCD, 0xAC, 0xDD, 0x47, 0xEC, 0xBE, 0xB7, 0x24, 0xC3, 
-	0x63, 0x1B, 0x54, 0x98, 0x79, 0xE1, 0xC7, 0x31, 0x16, 0x59, 
-	0xD6, 0x9D, 0x77, 0x9D, 0x8D, 0xE2, 0x8B, 0xED, 0x04, 0x17, 
-	0xB2, 0xC6, 0xEB, 0xE4, 0x9B, 0x91, 0xBE, 0x31, 0x50, 0x62, 
-	0x97, 0x58, 0xB5, 0x7F, 0x29, 0xDE, 0xB3, 0x71, 0x24, 0x0B, 
-	0xBF, 0x97, 0x09, 0x7F, 0x26, 0xDC, 0x2D, 0xEC, 0xA8, 0x2E, 
-	0xB2, 0x64, 0x2B, 0x7A, 0x2B, 0x35, 0x19, 0x2D, 0xA2, 0x80, 
-	0xCB, 0x99, 0xFD, 0x94, 0x71, 0x1B, 0x23, 0x8D, 0x54, 0xDB, 
-	0x2E, 0x62, 0x8D, 0x81, 0x08, 0x2D, 0xF4, 0x24, 0x72, 0x27, 
-	0x6C, 0xF9, 0xC9, 0x8E, 0xDB, 0x4C, 0x75, 0xBA, 0x9B, 0x01, 
-	0xF8, 0x3F, 0x18, 0xF4, 0xE6, 0x7F, 0xFB, 0x57, 0x94, 0x92, 
-	0xCC, 0x88, 0xC4, 0xB4, 0x00, 0xC2, 0xAA, 0xD4, 0xE5, 0x88, 
-	0x18, 0xB3, 0x11, 0x2F, 0x73, 0xC0, 0xD6, 0x29, 0x09, 0x02, 
-	0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x52, 0x35, 0x3D, 
-	0x01, 0x29, 0xA4, 0x95, 0x29, 0x71, 0x9B, 0x64, 0x6A, 0x2C, 
-	0xC3, 0xD2, 0xB5, 0xBE, 0x6E, 0x13, 0x9C, 0x8F, 0xB6, 0x26, 
-	0xD8, 0x76, 0x6B, 0xBD, 0x61, 0xBC, 0x63, 0x2D, 0xD5, 0x4D, 
-	0xBB, 0xCC, 0xC6, 0x3B, 0x89, 0xC8, 0xCE, 0x7B, 0x9B, 0x97, 
-	0xE7, 0x51, 0x67, 0x61, 0xDA, 0xA9, 0x83, 0x7B, 0xC8, 0x44, 
-	0xF5, 0x70, 0x5E, 0x3E, 0xD0, 0x7E, 0x51, 0xB9, 0x6E, 0x13, 
-	0x57, 0x08, 0x5C, 0xE1, 0x67, 0x4F, 0x61, 0x5E, 0xA5, 0x09, 
-	0xEC, 0x11, 0xDD, 0xE4, 0xB8, 0xB4, 0xF4, 0xE0, 0x63, 0x34, 
-	0x4C, 0xDA, 0x32, 0x20, 0x1F, 0x85, 0x41, 0x5D, 0xBC, 0xDB, 
-	0x24, 0xC5, 0xAF, 0xBE, 0x02, 0x5F, 0x22, 0xF1, 0x7C, 0xCC, 
-	0x05, 0x56, 0xA6, 0xA6, 0x37, 0x9A, 0xEB, 0xFF, 0x52, 0x2D, 
-	0xBF, 0x30, 0x4B, 0x9A, 0x1D, 0xEE, 0xAB, 0x9C, 0x2C, 0xE2, 
-	0xC1, 0xB8, 0x9D, 0xC9, 0x31, 0x02, 0x41, 0x00, 0xE9, 0x89, 
-	0x16, 0xCD, 0xAC, 0x2E, 0xF2, 0x4D, 0x66, 0x17, 0xBD, 0x78, 
-	0x12, 0x12, 0x8D, 0x8E, 0x84, 0x24, 0xDE, 0x2D, 0x50, 0x41, 
-	0x85, 0x8C, 0x34, 0x09, 0xFA, 0xFB, 0x6D, 0x87, 0x51, 0x4C, 
-	0x13, 0x28, 0xF0, 0x60, 0x11, 0x86, 0x3D, 0xC2, 0xA4, 0xCF, 
-	0x5E, 0xC5, 0x6F, 0x5B, 0x11, 0x32, 0x0A, 0xB5, 0x28, 0xD0, 
-	0x82, 0x47, 0x44, 0x26, 0x92, 0xE2, 0x78, 0x59, 0xB4, 0x08, 
-	0xB3, 0xFD, 0x02, 0x41, 0x00, 0xE1, 0x75, 0xB4, 0x6A, 0xB5, 
-	0x8C, 0x11, 0xFB, 0xCC, 0x42, 0x02, 0xC5, 0xDA, 0x48, 0xCE, 
-	0x29, 0x43, 0x14, 0x01, 0x9A, 0x2C, 0xB3, 0xA4, 0xCB, 0x73, 
-	0xEB, 0xA1, 0x35, 0x57, 0xAD, 0xB5, 0x16, 0x17, 0x80, 0x03, 
-	0x5F, 0x32, 0x37, 0xBE, 0xA2, 0x6F, 0xF9, 0x31, 0x84, 0xBF, 
-	0x00, 0x6E, 0x8D, 0x03, 0x0E, 0x30, 0x1C, 0xD0, 0x2F, 0x37, 
-	0xF0, 0x7E, 0xC2, 0x64, 0xBF, 0xEE, 0x4B, 0xE8, 0xFD, 0x02, 
-	0x41, 0x00, 0xE1, 0x99, 0x8B, 0x2B, 0xD8, 0x9F, 0xE9, 0x76, 
-	0x97, 0x9F, 0x6B, 0x6B, 0x28, 0x9A, 0x3F, 0xA1, 0x63, 0x4A, 
-	0x72, 0x4E, 0xF7, 0xEE, 0xB3, 0xE2, 0x43, 0x0B, 0x39, 0x27, 
-	0xD6, 0x21, 0x18, 0x8A, 0x13, 0x20, 0x43, 0x45, 0xAA, 0xE8, 
-	0x31, 0x95, 0x6C, 0xBC, 0xDE, 0xE2, 0x7F, 0xB6, 0x4B, 0xA0, 
-	0x39, 0xF3, 0xD3, 0x9F, 0xC9, 0x9A, 0xAA, 0xDD, 0x50, 0x9B, 
-	0xF2, 0x83, 0x45, 0x85, 0xFA, 0xC9, 0x02, 0x41, 0x00, 0xAF, 
-	0xB0, 0xC7, 0x7C, 0xF8, 0x28, 0x44, 0xC3, 0x50, 0xF2, 0x87, 
-	0xB2, 0xA2, 0x5D, 0x65, 0xBA, 0x25, 0xB9, 0x6B, 0x5E, 0x37, 
-	0x43, 0x6E, 0x41, 0xD4, 0xFD, 0x63, 0x4C, 0x6C, 0x1C, 0xC3, 
-	0x26, 0x89, 0xFD, 0x89, 0xA3, 0x1F, 0x40, 0xED, 0x5F, 0x2B, 
-	0x9E, 0xA6, 0x85, 0xE9, 0x49, 0x6E, 0xDC, 0x97, 0xEA, 0xF0, 
-	0x77, 0x23, 0x8C, 0x08, 0x2D, 0x72, 0xBA, 0x0D, 0x44, 0xBB, 
-	0x6F, 0x90, 0x09, 0x02, 0x41, 0x00, 0x91, 0xE4, 0x2E, 0xCA, 
-	0x8C, 0x0A, 0x69, 0x2F, 0x62, 0xE2, 0x62, 0x3B, 0xA5, 0x8D, 
-	0x5A, 0x2C, 0x56, 0x3E, 0x7F, 0x67, 0x42, 0x92, 0x12, 0x92, 
-	0x5F, 0xF3, 0x97, 0xDD, 0xE1, 0xA9, 0x7F, 0xAD, 0x2E, 0x2D, 
-	0xF4, 0x4A, 0x57, 0xB3, 0x7A, 0x10, 0xBD, 0xD7, 0xE4, 0xEC, 
-	0x6A, 0x08, 0x21, 0xE9, 0xF2, 0x46, 0x49, 0xD2, 0x69, 0x47, 
-	0x8A, 0x20, 0x4B, 0xF2, 0xB1, 0x52, 0x83, 0xAB, 0x6F, 0x10
+        0x30, 0x82, 0x02, 0x5E, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81,
+        0x00, 0xCD, 0xAC, 0xDD, 0x47, 0xEC, 0xBE, 0xB7, 0x24, 0xC3,
+        0x63, 0x1B, 0x54, 0x98, 0x79, 0xE1, 0xC7, 0x31, 0x16, 0x59,
+        0xD6, 0x9D, 0x77, 0x9D, 0x8D, 0xE2, 0x8B, 0xED, 0x04, 0x17,
+        0xB2, 0xC6, 0xEB, 0xE4, 0x9B, 0x91, 0xBE, 0x31, 0x50, 0x62,
+        0x97, 0x58, 0xB5, 0x7F, 0x29, 0xDE, 0xB3, 0x71, 0x24, 0x0B,
+        0xBF, 0x97, 0x09, 0x7F, 0x26, 0xDC, 0x2D, 0xEC, 0xA8, 0x2E,
+        0xB2, 0x64, 0x2B, 0x7A, 0x2B, 0x35, 0x19, 0x2D, 0xA2, 0x80,
+        0xCB, 0x99, 0xFD, 0x94, 0x71, 0x1B, 0x23, 0x8D, 0x54, 0xDB,
+        0x2E, 0x62, 0x8D, 0x81, 0x08, 0x2D, 0xF4, 0x24, 0x72, 0x27,
+        0x6C, 0xF9, 0xC9, 0x8E, 0xDB, 0x4C, 0x75, 0xBA, 0x9B, 0x01,
+        0xF8, 0x3F, 0x18, 0xF4, 0xE6, 0x7F, 0xFB, 0x57, 0x94, 0x92,
+        0xCC, 0x88, 0xC4, 0xB4, 0x00, 0xC2, 0xAA, 0xD4, 0xE5, 0x88,
+        0x18, 0xB3, 0x11, 0x2F, 0x73, 0xC0, 0xD6, 0x29, 0x09, 0x02,
+        0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x52, 0x35, 0x3D,
+        0x01, 0x29, 0xA4, 0x95, 0x29, 0x71, 0x9B, 0x64, 0x6A, 0x2C,
+        0xC3, 0xD2, 0xB5, 0xBE, 0x6E, 0x13, 0x9C, 0x8F, 0xB6, 0x26,
+        0xD8, 0x76, 0x6B, 0xBD, 0x61, 0xBC, 0x63, 0x2D, 0xD5, 0x4D,
+        0xBB, 0xCC, 0xC6, 0x3B, 0x89, 0xC8, 0xCE, 0x7B, 0x9B, 0x97,
+        0xE7, 0x51, 0x67, 0x61, 0xDA, 0xA9, 0x83, 0x7B, 0xC8, 0x44,
+        0xF5, 0x70, 0x5E, 0x3E, 0xD0, 0x7E, 0x51, 0xB9, 0x6E, 0x13,
+        0x57, 0x08, 0x5C, 0xE1, 0x67, 0x4F, 0x61, 0x5E, 0xA5, 0x09,
+        0xEC, 0x11, 0xDD, 0xE4, 0xB8, 0xB4, 0xF4, 0xE0, 0x63, 0x34,
+        0x4C, 0xDA, 0x32, 0x20, 0x1F, 0x85, 0x41, 0x5D, 0xBC, 0xDB,
+        0x24, 0xC5, 0xAF, 0xBE, 0x02, 0x5F, 0x22, 0xF1, 0x7C, 0xCC,
+        0x05, 0x56, 0xA6, 0xA6, 0x37, 0x9A, 0xEB, 0xFF, 0x52, 0x2D,
+        0xBF, 0x30, 0x4B, 0x9A, 0x1D, 0xEE, 0xAB, 0x9C, 0x2C, 0xE2,
+        0xC1, 0xB8, 0x9D, 0xC9, 0x31, 0x02, 0x41, 0x00, 0xE9, 0x89,
+        0x16, 0xCD, 0xAC, 0x2E, 0xF2, 0x4D, 0x66, 0x17, 0xBD, 0x78,
+        0x12, 0x12, 0x8D, 0x8E, 0x84, 0x24, 0xDE, 0x2D, 0x50, 0x41,
+        0x85, 0x8C, 0x34, 0x09, 0xFA, 0xFB, 0x6D, 0x87, 0x51, 0x4C,
+        0x13, 0x28, 0xF0, 0x60, 0x11, 0x86, 0x3D, 0xC2, 0xA4, 0xCF,
+        0x5E, 0xC5, 0x6F, 0x5B, 0x11, 0x32, 0x0A, 0xB5, 0x28, 0xD0,
+        0x82, 0x47, 0x44, 0x26, 0x92, 0xE2, 0x78, 0x59, 0xB4, 0x08,
+        0xB3, 0xFD, 0x02, 0x41, 0x00, 0xE1, 0x75, 0xB4, 0x6A, 0xB5,
+        0x8C, 0x11, 0xFB, 0xCC, 0x42, 0x02, 0xC5, 0xDA, 0x48, 0xCE,
+        0x29, 0x43, 0x14, 0x01, 0x9A, 0x2C, 0xB3, 0xA4, 0xCB, 0x73,
+        0xEB, 0xA1, 0x35, 0x57, 0xAD, 0xB5, 0x16, 0x17, 0x80, 0x03,
+        0x5F, 0x32, 0x37, 0xBE, 0xA2, 0x6F, 0xF9, 0x31, 0x84, 0xBF,
+        0x00, 0x6E, 0x8D, 0x03, 0x0E, 0x30, 0x1C, 0xD0, 0x2F, 0x37,
+        0xF0, 0x7E, 0xC2, 0x64, 0xBF, 0xEE, 0x4B, 0xE8, 0xFD, 0x02,
+        0x41, 0x00, 0xE1, 0x99, 0x8B, 0x2B, 0xD8, 0x9F, 0xE9, 0x76,
+        0x97, 0x9F, 0x6B, 0x6B, 0x28, 0x9A, 0x3F, 0xA1, 0x63, 0x4A,
+        0x72, 0x4E, 0xF7, 0xEE, 0xB3, 0xE2, 0x43, 0x0B, 0x39, 0x27,
+        0xD6, 0x21, 0x18, 0x8A, 0x13, 0x20, 0x43, 0x45, 0xAA, 0xE8,
+        0x31, 0x95, 0x6C, 0xBC, 0xDE, 0xE2, 0x7F, 0xB6, 0x4B, 0xA0,
+        0x39, 0xF3, 0xD3, 0x9F, 0xC9, 0x9A, 0xAA, 0xDD, 0x50, 0x9B,
+        0xF2, 0x83, 0x45, 0x85, 0xFA, 0xC9, 0x02, 0x41, 0x00, 0xAF,
+        0xB0, 0xC7, 0x7C, 0xF8, 0x28, 0x44, 0xC3, 0x50, 0xF2, 0x87,
+        0xB2, 0xA2, 0x5D, 0x65, 0xBA, 0x25, 0xB9, 0x6B, 0x5E, 0x37,
+        0x43, 0x6E, 0x41, 0xD4, 0xFD, 0x63, 0x4C, 0x6C, 0x1C, 0xC3,
+        0x26, 0x89, 0xFD, 0x89, 0xA3, 0x1F, 0x40, 0xED, 0x5F, 0x2B,
+        0x9E, 0xA6, 0x85, 0xE9, 0x49, 0x6E, 0xDC, 0x97, 0xEA, 0xF0,
+        0x77, 0x23, 0x8C, 0x08, 0x2D, 0x72, 0xBA, 0x0D, 0x44, 0xBB,
+        0x6F, 0x90, 0x09, 0x02, 0x41, 0x00, 0x91, 0xE4, 0x2E, 0xCA,
+        0x8C, 0x0A, 0x69, 0x2F, 0x62, 0xE2, 0x62, 0x3B, 0xA5, 0x8D,
+        0x5A, 0x2C, 0x56, 0x3E, 0x7F, 0x67, 0x42, 0x92, 0x12, 0x92,
+        0x5F, 0xF3, 0x97, 0xDD, 0xE1, 0xA9, 0x7F, 0xAD, 0x2E, 0x2D,
+        0xF4, 0x4A, 0x57, 0xB3, 0x7A, 0x10, 0xBD, 0xD7, 0xE4, 0xEC,
+        0x6A, 0x08, 0x21, 0xE9, 0xF2, 0x46, 0x49, 0xD2, 0x69, 0x47,
+        0x8A, 0x20, 0x4B, 0xF2, 0xB1, 0x52, 0x83, 0xAB, 0x6F, 0x10
 
 };
 static const int sizeof_ca_key_der_1024 = sizeof(ca_key_der_1024);
@@ -407,270 +407,270 @@
 /* ./certs/1024/ca-cert.der, 1024-bit */
 static const unsigned char ca_cert_der_1024[] =
 {
-	0x30, 0x82, 0x03, 0xB5, 0x30, 0x82, 0x03, 0x1E, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D, 
-	0xFE, 0xCF, 0x9B, 0x47, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30, 
-	0x81, 0x99, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 
-	0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 
-	0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 
-	0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 
-	0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 
-	0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 
-	0x67, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 
-	0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 
-	0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 
-	0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 
-	0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 
-	0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 
-	0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31, 
-	0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 
-	0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x99, 
-	0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 
-	0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 
-	0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 
-	0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 
-	0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 
-	0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43, 
-	0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F, 
-	0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 
-	0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 
-	0x00, 0x03, 0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 
-	0x81, 0x00, 0xCD, 0xAC, 0xDD, 0x47, 0xEC, 0xBE, 0xB7, 0x24, 
-	0xC3, 0x63, 0x1B, 0x54, 0x98, 0x79, 0xE1, 0xC7, 0x31, 0x16, 
-	0x59, 0xD6, 0x9D, 0x77, 0x9D, 0x8D, 0xE2, 0x8B, 0xED, 0x04, 
-	0x17, 0xB2, 0xC6, 0xEB, 0xE4, 0x9B, 0x91, 0xBE, 0x31, 0x50, 
-	0x62, 0x97, 0x58, 0xB5, 0x7F, 0x29, 0xDE, 0xB3, 0x71, 0x24, 
-	0x0B, 0xBF, 0x97, 0x09, 0x7F, 0x26, 0xDC, 0x2D, 0xEC, 0xA8, 
-	0x2E, 0xB2, 0x64, 0x2B, 0x7A, 0x2B, 0x35, 0x19, 0x2D, 0xA2, 
-	0x80, 0xCB, 0x99, 0xFD, 0x94, 0x71, 0x1B, 0x23, 0x8D, 0x54, 
-	0xDB, 0x2E, 0x62, 0x8D, 0x81, 0x08, 0x2D, 0xF4, 0x24, 0x72, 
-	0x27, 0x6C, 0xF9, 0xC9, 0x8E, 0xDB, 0x4C, 0x75, 0xBA, 0x9B, 
-	0x01, 0xF8, 0x3F, 0x18, 0xF4, 0xE6, 0x7F, 0xFB, 0x57, 0x94, 
-	0x92, 0xCC, 0x88, 0xC4, 0xB4, 0x00, 0xC2, 0xAA, 0xD4, 0xE5, 
-	0x88, 0x18, 0xB3, 0x11, 0x2F, 0x73, 0xC0, 0xD6, 0x29, 0x09, 
-	0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x01, 0x30, 
-	0x81, 0xFE, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 
-	0x16, 0x04, 0x14, 0xD3, 0x22, 0x8F, 0x28, 0x2C, 0xE0, 0x05, 
-	0xEE, 0xD3, 0xED, 0xC3, 0x71, 0x3D, 0xC9, 0xB2, 0x36, 0x3A, 
-	0x1D, 0xBF, 0xA8, 0x30, 0x81, 0xCE, 0x06, 0x03, 0x55, 0x1D, 
-	0x23, 0x04, 0x81, 0xC6, 0x30, 0x81, 0xC3, 0x80, 0x14, 0xD3, 
-	0x22, 0x8F, 0x28, 0x2C, 0xE0, 0x05, 0xEE, 0xD3, 0xED, 0xC3, 
-	0x71, 0x3D, 0xC9, 0xB2, 0x36, 0x3A, 0x1D, 0xBF, 0xA8, 0xA1, 
-	0x81, 0x9F, 0xA4, 0x81, 0x9C, 0x30, 0x81, 0x99, 0x31, 0x0B, 
-	0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 
-	0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 
-	0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 
-	0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 
-	0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 
-	0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 
-	0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18, 0x30, 0x16, 
-	0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E, 
-	0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30, 
-	0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 
-	0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 
-	0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 
-	0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 
-	0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 
-	0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 
-	0x6D, 0x82, 0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D, 0xFE, 0xCF, 
-	0x9B, 0x47, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 
-	0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 
-	0x00, 0x03, 0x81, 0x81, 0x00, 0x1D, 0x48, 0xF6, 0x40, 0x41, 
-	0x04, 0x06, 0xF2, 0xE4, 0x72, 0x2F, 0xEA, 0xFF, 0xC1, 0x67, 
-	0x6B, 0x15, 0xBB, 0x0A, 0x28, 0x23, 0x28, 0x07, 0xC6, 0xD7, 
-	0x13, 0x2C, 0xBE, 0x00, 0x00, 0xAC, 0x1D, 0xF7, 0xF4, 0x92, 
-	0xD3, 0x2B, 0xAF, 0x23, 0xEB, 0x9F, 0x1A, 0xE2, 0x11, 0x3C, 
-	0x2D, 0x97, 0xF2, 0x0F, 0xAC, 0xAE, 0x97, 0x86, 0x0A, 0xFB, 
-	0xA8, 0x4F, 0x74, 0x1B, 0xDE, 0x19, 0x51, 0xDB, 0xCD, 0xE2, 
-	0x11, 0x38, 0xC1, 0xA4, 0x9D, 0x56, 0xAB, 0x47, 0x5C, 0xDE, 
-	0xBA, 0xEB, 0x27, 0xDF, 0x6D, 0xC8, 0x7E, 0x3A, 0xBD, 0x2E, 
-	0x9B, 0x2A, 0xAD, 0x22, 0x3B, 0x95, 0xA9, 0xF2, 0x28, 0x03, 
-	0xBC, 0xE5, 0xEC, 0xCC, 0xF2, 0x08, 0xD4, 0xC8, 0x2F, 0xDB, 
-	0xEA, 0xFB, 0x2E, 0x52, 0x16, 0x8C, 0x42, 0x02, 0xA4, 0x59, 
-	0x6D, 0x4C, 0x33, 0xB4, 0x9A, 0xD2, 0x73, 0x4A, 0x1E, 0x9F, 
-	0xD9, 0xC8, 0x83
+        0x30, 0x82, 0x03, 0xB5, 0x30, 0x82, 0x03, 0x1E, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D,
+        0xFE, 0xCF, 0x9B, 0x47, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30,
+        0x81, 0x99, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68,
+        0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C,
+        0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E,
+        0x67, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77,
+        0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69,
+        0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73,
+        0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31,
+        0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31,
+        0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37,
+        0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x99,
+        0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
+        0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E,
+        0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07,
+        0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31,
+        0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08,
+        0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43,
+        0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F,
+        0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05,
+        0x00, 0x03, 0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81,
+        0x81, 0x00, 0xCD, 0xAC, 0xDD, 0x47, 0xEC, 0xBE, 0xB7, 0x24,
+        0xC3, 0x63, 0x1B, 0x54, 0x98, 0x79, 0xE1, 0xC7, 0x31, 0x16,
+        0x59, 0xD6, 0x9D, 0x77, 0x9D, 0x8D, 0xE2, 0x8B, 0xED, 0x04,
+        0x17, 0xB2, 0xC6, 0xEB, 0xE4, 0x9B, 0x91, 0xBE, 0x31, 0x50,
+        0x62, 0x97, 0x58, 0xB5, 0x7F, 0x29, 0xDE, 0xB3, 0x71, 0x24,
+        0x0B, 0xBF, 0x97, 0x09, 0x7F, 0x26, 0xDC, 0x2D, 0xEC, 0xA8,
+        0x2E, 0xB2, 0x64, 0x2B, 0x7A, 0x2B, 0x35, 0x19, 0x2D, 0xA2,
+        0x80, 0xCB, 0x99, 0xFD, 0x94, 0x71, 0x1B, 0x23, 0x8D, 0x54,
+        0xDB, 0x2E, 0x62, 0x8D, 0x81, 0x08, 0x2D, 0xF4, 0x24, 0x72,
+        0x27, 0x6C, 0xF9, 0xC9, 0x8E, 0xDB, 0x4C, 0x75, 0xBA, 0x9B,
+        0x01, 0xF8, 0x3F, 0x18, 0xF4, 0xE6, 0x7F, 0xFB, 0x57, 0x94,
+        0x92, 0xCC, 0x88, 0xC4, 0xB4, 0x00, 0xC2, 0xAA, 0xD4, 0xE5,
+        0x88, 0x18, 0xB3, 0x11, 0x2F, 0x73, 0xC0, 0xD6, 0x29, 0x09,
+        0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x01, 0x30,
+        0x81, 0xFE, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04,
+        0x16, 0x04, 0x14, 0xD3, 0x22, 0x8F, 0x28, 0x2C, 0xE0, 0x05,
+        0xEE, 0xD3, 0xED, 0xC3, 0x71, 0x3D, 0xC9, 0xB2, 0x36, 0x3A,
+        0x1D, 0xBF, 0xA8, 0x30, 0x81, 0xCE, 0x06, 0x03, 0x55, 0x1D,
+        0x23, 0x04, 0x81, 0xC6, 0x30, 0x81, 0xC3, 0x80, 0x14, 0xD3,
+        0x22, 0x8F, 0x28, 0x2C, 0xE0, 0x05, 0xEE, 0xD3, 0xED, 0xC3,
+        0x71, 0x3D, 0xC9, 0xB2, 0x36, 0x3A, 0x1D, 0xBF, 0xA8, 0xA1,
+        0x81, 0x9F, 0xA4, 0x81, 0x9C, 0x30, 0x81, 0x99, 0x31, 0x0B,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55,
+        0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08,
+        0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07,
+        0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30,
+        0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61,
+        0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E,
+        0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30,
+        0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x82, 0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D, 0xFE, 0xCF,
+        0x9B, 0x47, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04,
+        0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05,
+        0x00, 0x03, 0x81, 0x81, 0x00, 0x1D, 0x48, 0xF6, 0x40, 0x41,
+        0x04, 0x06, 0xF2, 0xE4, 0x72, 0x2F, 0xEA, 0xFF, 0xC1, 0x67,
+        0x6B, 0x15, 0xBB, 0x0A, 0x28, 0x23, 0x28, 0x07, 0xC6, 0xD7,
+        0x13, 0x2C, 0xBE, 0x00, 0x00, 0xAC, 0x1D, 0xF7, 0xF4, 0x92,
+        0xD3, 0x2B, 0xAF, 0x23, 0xEB, 0x9F, 0x1A, 0xE2, 0x11, 0x3C,
+        0x2D, 0x97, 0xF2, 0x0F, 0xAC, 0xAE, 0x97, 0x86, 0x0A, 0xFB,
+        0xA8, 0x4F, 0x74, 0x1B, 0xDE, 0x19, 0x51, 0xDB, 0xCD, 0xE2,
+        0x11, 0x38, 0xC1, 0xA4, 0x9D, 0x56, 0xAB, 0x47, 0x5C, 0xDE,
+        0xBA, 0xEB, 0x27, 0xDF, 0x6D, 0xC8, 0x7E, 0x3A, 0xBD, 0x2E,
+        0x9B, 0x2A, 0xAD, 0x22, 0x3B, 0x95, 0xA9, 0xF2, 0x28, 0x03,
+        0xBC, 0xE5, 0xEC, 0xCC, 0xF2, 0x08, 0xD4, 0xC8, 0x2F, 0xDB,
+        0xEA, 0xFB, 0x2E, 0x52, 0x16, 0x8C, 0x42, 0x02, 0xA4, 0x59,
+        0x6D, 0x4C, 0x33, 0xB4, 0x9A, 0xD2, 0x73, 0x4A, 0x1E, 0x9F,
+        0xD9, 0xC8, 0x83
 };
 static const int sizeof_ca_cert_der_1024 = sizeof(ca_cert_der_1024);
 
 /* ./certs/1024/server-key.der, 1024-bit */
 static const unsigned char server_key_der_1024[] =
 {
-	0x30, 0x82, 0x02, 0x5D, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81, 
-	0x00, 0xAA, 0x3E, 0xA5, 0x9C, 0xD3, 0x17, 0x49, 0x65, 0x43, 
-	0xDE, 0xD0, 0xF3, 0x4B, 0x1C, 0xDB, 0x49, 0x0C, 0xFC, 0x7A, 
-	0x65, 0x05, 0x6D, 0xDE, 0x6A, 0xC4, 0xE4, 0x73, 0x2C, 0x8A, 
-	0x96, 0x82, 0x8F, 0x23, 0xA5, 0x06, 0x71, 0x1C, 0x06, 0x3E, 
-	0x2F, 0x92, 0x8D, 0x0B, 0x29, 0x34, 0x45, 0x59, 0xE9, 0xA9, 
-	0xBC, 0x61, 0xD7, 0x24, 0x37, 0x5D, 0xB5, 0xC4, 0x37, 0x8D, 
-	0xBA, 0x67, 0xB2, 0xEF, 0x03, 0x27, 0xFA, 0xC1, 0xB4, 0xCD, 
-	0x6B, 0x00, 0x66, 0xB4, 0xD6, 0x73, 0x70, 0x1F, 0x08, 0x3A, 
-	0xCC, 0x77, 0xAD, 0xE9, 0xF9, 0x34, 0xD4, 0xF3, 0xA0, 0x2D, 
-	0xA9, 0xE7, 0x58, 0xA9, 0xC0, 0x61, 0x84, 0xB6, 0xEC, 0x3D, 
-	0x0A, 0xAD, 0xFD, 0x5C, 0x86, 0x73, 0xAA, 0x6B, 0x47, 0xD8, 
-	0x8B, 0x2E, 0x58, 0x4B, 0x69, 0x12, 0x82, 0x26, 0x55, 0xE6, 
-	0x14, 0xBF, 0x55, 0x70, 0x88, 0xFE, 0xF9, 0x75, 0xE1, 0x02, 
-	0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x0A, 0x4C, 0xC1, 
-	0xFE, 0x4B, 0xF3, 0x23, 0xB8, 0xA1, 0xB3, 0x90, 0x56, 0xB7, 
-	0xDB, 0xA6, 0x14, 0xB4, 0x59, 0x6E, 0x1A, 0x40, 0x8A, 0xD6, 
-	0x23, 0x05, 0x88, 0x80, 0xC3, 0x58, 0x1B, 0x25, 0x08, 0xFD, 
-	0xF2, 0x15, 0x02, 0xB0, 0xDC, 0x5B, 0xD4, 0xCA, 0xFC, 0x07, 
-	0x89, 0xD5, 0xA4, 0xC0, 0x7C, 0xD7, 0x8D, 0x13, 0x2A, 0x4E, 
-	0x01, 0x9F, 0x84, 0xC8, 0xBB, 0x47, 0xB2, 0xD8, 0x65, 0x45, 
-	0xFA, 0x84, 0x9F, 0x88, 0xD0, 0xF4, 0xF5, 0x22, 0x35, 0x77, 
-	0x11, 0x67, 0x1C, 0xDE, 0x5F, 0x85, 0x6D, 0x55, 0xD8, 0xA7, 
-	0x07, 0x15, 0x8C, 0xE1, 0xB0, 0xA7, 0x79, 0xB4, 0x47, 0x9D, 
-	0x70, 0xB3, 0xD2, 0xF1, 0x1F, 0x41, 0x4C, 0x65, 0x72, 0x26, 
-	0xEB, 0x66, 0xC8, 0x95, 0xF6, 0x6D, 0x87, 0x35, 0x53, 0xFE, 
-	0xB1, 0x52, 0x4D, 0x76, 0x5B, 0x61, 0x53, 0x89, 0xB1, 0x20, 
-	0x1A, 0x8B, 0xE4, 0x7D, 0xF1, 0x02, 0x41, 0x00, 0xD9, 0x6E, 
-	0xE1, 0xD9, 0x06, 0x56, 0xA1, 0xF6, 0xDF, 0x54, 0x45, 0xC5, 
-	0xEC, 0x6A, 0xC8, 0x2A, 0x38, 0x4E, 0x6B, 0xC6, 0xE8, 0xEA, 
-	0xFB, 0x6F, 0x65, 0x2D, 0xBA, 0xDE, 0x27, 0x63, 0x37, 0x21, 
-	0x2E, 0xA4, 0x55, 0xAB, 0xE7, 0xDB, 0xCE, 0x71, 0xE1, 0x08, 
-	0xFC, 0xF2, 0xCA, 0x52, 0x33, 0x55, 0xE8, 0x39, 0xB3, 0xDA, 
-	0xC5, 0xB0, 0x69, 0x84, 0x6E, 0xE3, 0xCF, 0x47, 0x80, 0xA6, 
-	0xB6, 0x85, 0x02, 0x41, 0x00, 0xC8, 0x71, 0x0D, 0x37, 0x47, 
-	0xE1, 0x7B, 0x21, 0x2D, 0x11, 0x2D, 0x95, 0x2E, 0xC7, 0xD0, 
-	0xB6, 0xD3, 0x7C, 0x5C, 0x93, 0x3C, 0x5B, 0x22, 0xE5, 0xE0, 
-	0x8B, 0x6D, 0x47, 0xF9, 0x14, 0x0F, 0x9E, 0x08, 0x1B, 0x53, 
-	0xAB, 0x0A, 0xA9, 0xE4, 0x7F, 0x40, 0xD3, 0xDF, 0x62, 0x74, 
-	0x10, 0xA2, 0xFE, 0x83, 0x1F, 0xCF, 0x55, 0x66, 0xEB, 0x5D, 
-	0xC5, 0x83, 0xBA, 0xEC, 0x9F, 0xD2, 0xB5, 0x06, 0xAD, 0x02, 
-	0x41, 0x00, 0xB7, 0x68, 0x19, 0xA7, 0xC7, 0xF9, 0xF1, 0x9A, 
-	0xDD, 0x5D, 0x27, 0x91, 0xC1, 0x4F, 0x7D, 0x52, 0x67, 0xB6, 
-	0x76, 0xA1, 0x0D, 0x3D, 0x91, 0x23, 0xB0, 0xB3, 0xF7, 0x49, 
-	0x86, 0xED, 0xE0, 0xC5, 0xE3, 0xA3, 0x09, 0x04, 0xFD, 0x89, 
-	0xE2, 0xC5, 0x1A, 0x6E, 0x4B, 0x77, 0xBD, 0x03, 0xC3, 0x7B, 
-	0xB6, 0x6C, 0x5D, 0xF2, 0xAF, 0x08, 0x94, 0xA8, 0xFA, 0x24, 
-	0xBD, 0x66, 0x71, 0xF5, 0xAE, 0x45, 0x02, 0x40, 0x15, 0x52, 
-	0xD1, 0x91, 0x1B, 0xF8, 0x84, 0xDC, 0xD6, 0xAA, 0x89, 0x2A, 
-	0xE1, 0xBB, 0x28, 0x1D, 0x0B, 0x0A, 0xA3, 0xDE, 0x96, 0x01, 
-	0x2C, 0x09, 0x40, 0x86, 0x14, 0xAE, 0x1F, 0x75, 0x5E, 0xE3, 
-	0xF5, 0x00, 0xD3, 0x39, 0xD2, 0xFC, 0x97, 0xEE, 0x61, 0xBB, 
-	0x28, 0x7C, 0x94, 0xD4, 0x60, 0x42, 0xAB, 0x38, 0x6B, 0x1A, 
-	0x2E, 0xC4, 0xC3, 0x49, 0x0B, 0xE6, 0x8A, 0xDD, 0xC5, 0xD0, 
-	0xB4, 0x51, 0x02, 0x41, 0x00, 0xA9, 0x8B, 0xA7, 0xA9, 0xEE, 
-	0xAE, 0xBB, 0x17, 0xCB, 0x72, 0xF2, 0x50, 0x22, 0x9D, 0xB3, 
-	0xDF, 0xE0, 0x40, 0x37, 0x08, 0xD5, 0x7F, 0x19, 0x58, 0x80, 
-	0x70, 0x79, 0x69, 0x99, 0xDF, 0x62, 0x0D, 0x21, 0xAB, 0xDD, 
-	0xB2, 0xCE, 0x68, 0xB3, 0x9F, 0x87, 0xAF, 0x55, 0xF4, 0xAA, 
-	0xE1, 0x00, 0x72, 0xBE, 0x6E, 0xC3, 0x94, 0x49, 0xDC, 0xBB, 
-	0x8E, 0x1A, 0x78, 0xE5, 0x49, 0x1F, 0x55, 0x41, 0xA1
+        0x30, 0x82, 0x02, 0x5D, 0x02, 0x01, 0x00, 0x02, 0x81, 0x81,
+        0x00, 0xAA, 0x3E, 0xA5, 0x9C, 0xD3, 0x17, 0x49, 0x65, 0x43,
+        0xDE, 0xD0, 0xF3, 0x4B, 0x1C, 0xDB, 0x49, 0x0C, 0xFC, 0x7A,
+        0x65, 0x05, 0x6D, 0xDE, 0x6A, 0xC4, 0xE4, 0x73, 0x2C, 0x8A,
+        0x96, 0x82, 0x8F, 0x23, 0xA5, 0x06, 0x71, 0x1C, 0x06, 0x3E,
+        0x2F, 0x92, 0x8D, 0x0B, 0x29, 0x34, 0x45, 0x59, 0xE9, 0xA9,
+        0xBC, 0x61, 0xD7, 0x24, 0x37, 0x5D, 0xB5, 0xC4, 0x37, 0x8D,
+        0xBA, 0x67, 0xB2, 0xEF, 0x03, 0x27, 0xFA, 0xC1, 0xB4, 0xCD,
+        0x6B, 0x00, 0x66, 0xB4, 0xD6, 0x73, 0x70, 0x1F, 0x08, 0x3A,
+        0xCC, 0x77, 0xAD, 0xE9, 0xF9, 0x34, 0xD4, 0xF3, 0xA0, 0x2D,
+        0xA9, 0xE7, 0x58, 0xA9, 0xC0, 0x61, 0x84, 0xB6, 0xEC, 0x3D,
+        0x0A, 0xAD, 0xFD, 0x5C, 0x86, 0x73, 0xAA, 0x6B, 0x47, 0xD8,
+        0x8B, 0x2E, 0x58, 0x4B, 0x69, 0x12, 0x82, 0x26, 0x55, 0xE6,
+        0x14, 0xBF, 0x55, 0x70, 0x88, 0xFE, 0xF9, 0x75, 0xE1, 0x02,
+        0x03, 0x01, 0x00, 0x01, 0x02, 0x81, 0x80, 0x0A, 0x4C, 0xC1,
+        0xFE, 0x4B, 0xF3, 0x23, 0xB8, 0xA1, 0xB3, 0x90, 0x56, 0xB7,
+        0xDB, 0xA6, 0x14, 0xB4, 0x59, 0x6E, 0x1A, 0x40, 0x8A, 0xD6,
+        0x23, 0x05, 0x88, 0x80, 0xC3, 0x58, 0x1B, 0x25, 0x08, 0xFD,
+        0xF2, 0x15, 0x02, 0xB0, 0xDC, 0x5B, 0xD4, 0xCA, 0xFC, 0x07,
+        0x89, 0xD5, 0xA4, 0xC0, 0x7C, 0xD7, 0x8D, 0x13, 0x2A, 0x4E,
+        0x01, 0x9F, 0x84, 0xC8, 0xBB, 0x47, 0xB2, 0xD8, 0x65, 0x45,
+        0xFA, 0x84, 0x9F, 0x88, 0xD0, 0xF4, 0xF5, 0x22, 0x35, 0x77,
+        0x11, 0x67, 0x1C, 0xDE, 0x5F, 0x85, 0x6D, 0x55, 0xD8, 0xA7,
+        0x07, 0x15, 0x8C, 0xE1, 0xB0, 0xA7, 0x79, 0xB4, 0x47, 0x9D,
+        0x70, 0xB3, 0xD2, 0xF1, 0x1F, 0x41, 0x4C, 0x65, 0x72, 0x26,
+        0xEB, 0x66, 0xC8, 0x95, 0xF6, 0x6D, 0x87, 0x35, 0x53, 0xFE,
+        0xB1, 0x52, 0x4D, 0x76, 0x5B, 0x61, 0x53, 0x89, 0xB1, 0x20,
+        0x1A, 0x8B, 0xE4, 0x7D, 0xF1, 0x02, 0x41, 0x00, 0xD9, 0x6E,
+        0xE1, 0xD9, 0x06, 0x56, 0xA1, 0xF6, 0xDF, 0x54, 0x45, 0xC5,
+        0xEC, 0x6A, 0xC8, 0x2A, 0x38, 0x4E, 0x6B, 0xC6, 0xE8, 0xEA,
+        0xFB, 0x6F, 0x65, 0x2D, 0xBA, 0xDE, 0x27, 0x63, 0x37, 0x21,
+        0x2E, 0xA4, 0x55, 0xAB, 0xE7, 0xDB, 0xCE, 0x71, 0xE1, 0x08,
+        0xFC, 0xF2, 0xCA, 0x52, 0x33, 0x55, 0xE8, 0x39, 0xB3, 0xDA,
+        0xC5, 0xB0, 0x69, 0x84, 0x6E, 0xE3, 0xCF, 0x47, 0x80, 0xA6,
+        0xB6, 0x85, 0x02, 0x41, 0x00, 0xC8, 0x71, 0x0D, 0x37, 0x47,
+        0xE1, 0x7B, 0x21, 0x2D, 0x11, 0x2D, 0x95, 0x2E, 0xC7, 0xD0,
+        0xB6, 0xD3, 0x7C, 0x5C, 0x93, 0x3C, 0x5B, 0x22, 0xE5, 0xE0,
+        0x8B, 0x6D, 0x47, 0xF9, 0x14, 0x0F, 0x9E, 0x08, 0x1B, 0x53,
+        0xAB, 0x0A, 0xA9, 0xE4, 0x7F, 0x40, 0xD3, 0xDF, 0x62, 0x74,
+        0x10, 0xA2, 0xFE, 0x83, 0x1F, 0xCF, 0x55, 0x66, 0xEB, 0x5D,
+        0xC5, 0x83, 0xBA, 0xEC, 0x9F, 0xD2, 0xB5, 0x06, 0xAD, 0x02,
+        0x41, 0x00, 0xB7, 0x68, 0x19, 0xA7, 0xC7, 0xF9, 0xF1, 0x9A,
+        0xDD, 0x5D, 0x27, 0x91, 0xC1, 0x4F, 0x7D, 0x52, 0x67, 0xB6,
+        0x76, 0xA1, 0x0D, 0x3D, 0x91, 0x23, 0xB0, 0xB3, 0xF7, 0x49,
+        0x86, 0xED, 0xE0, 0xC5, 0xE3, 0xA3, 0x09, 0x04, 0xFD, 0x89,
+        0xE2, 0xC5, 0x1A, 0x6E, 0x4B, 0x77, 0xBD, 0x03, 0xC3, 0x7B,
+        0xB6, 0x6C, 0x5D, 0xF2, 0xAF, 0x08, 0x94, 0xA8, 0xFA, 0x24,
+        0xBD, 0x66, 0x71, 0xF5, 0xAE, 0x45, 0x02, 0x40, 0x15, 0x52,
+        0xD1, 0x91, 0x1B, 0xF8, 0x84, 0xDC, 0xD6, 0xAA, 0x89, 0x2A,
+        0xE1, 0xBB, 0x28, 0x1D, 0x0B, 0x0A, 0xA3, 0xDE, 0x96, 0x01,
+        0x2C, 0x09, 0x40, 0x86, 0x14, 0xAE, 0x1F, 0x75, 0x5E, 0xE3,
+        0xF5, 0x00, 0xD3, 0x39, 0xD2, 0xFC, 0x97, 0xEE, 0x61, 0xBB,
+        0x28, 0x7C, 0x94, 0xD4, 0x60, 0x42, 0xAB, 0x38, 0x6B, 0x1A,
+        0x2E, 0xC4, 0xC3, 0x49, 0x0B, 0xE6, 0x8A, 0xDD, 0xC5, 0xD0,
+        0xB4, 0x51, 0x02, 0x41, 0x00, 0xA9, 0x8B, 0xA7, 0xA9, 0xEE,
+        0xAE, 0xBB, 0x17, 0xCB, 0x72, 0xF2, 0x50, 0x22, 0x9D, 0xB3,
+        0xDF, 0xE0, 0x40, 0x37, 0x08, 0xD5, 0x7F, 0x19, 0x58, 0x80,
+        0x70, 0x79, 0x69, 0x99, 0xDF, 0x62, 0x0D, 0x21, 0xAB, 0xDD,
+        0xB2, 0xCE, 0x68, 0xB3, 0x9F, 0x87, 0xAF, 0x55, 0xF4, 0xAA,
+        0xE1, 0x00, 0x72, 0xBE, 0x6E, 0xC3, 0x94, 0x49, 0xDC, 0xBB,
+        0x8E, 0x1A, 0x78, 0xE5, 0x49, 0x1F, 0x55, 0x41, 0xA1
 };
 static const int sizeof_server_key_der_1024 = sizeof(server_key_der_1024);
 
 /* ./certs/1024/server-cert.der, 1024-bit */
 static const unsigned char server_cert_der_1024[] =
 {
-	0x30, 0x82, 0x03, 0xA9, 0x30, 0x82, 0x03, 0x12, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x01, 0x01, 0x30, 0x0D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 
-	0x00, 0x30, 0x81, 0x99, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 
-	0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 
-	0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 
-	0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 
-	0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 
-	0x74, 0x68, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 
-	0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 
-	0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 
-	0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 
-	0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 
-	0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 0x35, 0x32, 
-	0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31, 0x30, 0x31, 
-	0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x30, 
-	0x81, 0x95, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 
-	0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 
-	0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 
-	0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 
-	0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0C, 
-	0x53, 0x75, 0x70, 0x70, 0x6F, 0x72, 0x74, 0x5F, 0x31, 0x30, 
-	0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 
-	0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 
-	0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 
-	0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 
-	0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 
-	0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 
-	0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03, 
-	0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 0x81, 0x00, 
-	0xAA, 0x3E, 0xA5, 0x9C, 0xD3, 0x17, 0x49, 0x65, 0x43, 0xDE, 
-	0xD0, 0xF3, 0x4B, 0x1C, 0xDB, 0x49, 0x0C, 0xFC, 0x7A, 0x65, 
-	0x05, 0x6D, 0xDE, 0x6A, 0xC4, 0xE4, 0x73, 0x2C, 0x8A, 0x96, 
-	0x82, 0x8F, 0x23, 0xA5, 0x06, 0x71, 0x1C, 0x06, 0x3E, 0x2F, 
-	0x92, 0x8D, 0x0B, 0x29, 0x34, 0x45, 0x59, 0xE9, 0xA9, 0xBC, 
-	0x61, 0xD7, 0x24, 0x37, 0x5D, 0xB5, 0xC4, 0x37, 0x8D, 0xBA, 
-	0x67, 0xB2, 0xEF, 0x03, 0x27, 0xFA, 0xC1, 0xB4, 0xCD, 0x6B, 
-	0x00, 0x66, 0xB4, 0xD6, 0x73, 0x70, 0x1F, 0x08, 0x3A, 0xCC, 
-	0x77, 0xAD, 0xE9, 0xF9, 0x34, 0xD4, 0xF3, 0xA0, 0x2D, 0xA9, 
-	0xE7, 0x58, 0xA9, 0xC0, 0x61, 0x84, 0xB6, 0xEC, 0x3D, 0x0A, 
-	0xAD, 0xFD, 0x5C, 0x86, 0x73, 0xAA, 0x6B, 0x47, 0xD8, 0x8B, 
-	0x2E, 0x58, 0x4B, 0x69, 0x12, 0x82, 0x26, 0x55, 0xE6, 0x14, 
-	0xBF, 0x55, 0x70, 0x88, 0xFE, 0xF9, 0x75, 0xE1, 0x02, 0x03, 
-	0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x01, 0x30, 0x81, 0xFE, 
-	0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04, 
-	0x14, 0xD9, 0x3C, 0x35, 0xEA, 0x74, 0x0E, 0x23, 0xBE, 0x9C, 
-	0xFC, 0xFA, 0x29, 0x90, 0x09, 0xC1, 0xE7, 0x84, 0x16, 0x9F, 
-	0x7C, 0x30, 0x81, 0xCE, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 
-	0x81, 0xC6, 0x30, 0x81, 0xC3, 0x80, 0x14, 0xD3, 0x22, 0x8F, 
-	0x28, 0x2C, 0xE0, 0x05, 0xEE, 0xD3, 0xED, 0xC3, 0x71, 0x3D, 
-	0xC9, 0xB2, 0x36, 0x3A, 0x1D, 0xBF, 0xA8, 0xA1, 0x81, 0x9F, 
-	0xA4, 0x81, 0x9C, 0x30, 0x81, 0x99, 0x31, 0x0B, 0x30, 0x09, 
-	0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 
-	0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 
-	0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 
-	0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 
-	0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 
-	0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75, 
-	0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30, 0x32, 0x34, 
-	0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 
-	0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 
-	0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 
-	0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 
-	0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 
-	0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D, 0xFE, 0xCF, 0x9B, 0x47, 
-	0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 
-	0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 
-	0x81, 0x81, 0x00, 0x0B, 0xC3, 0xAF, 0x43, 0x85, 0x64, 0x61, 
-	0xE7, 0xAB, 0x5A, 0x2A, 0x1B, 0xB2, 0x29, 0xD5, 0x66, 0x68, 
-	0x44, 0x1A, 0x6D, 0x66, 0xFC, 0x3D, 0xB1, 0x88, 0xEC, 0xA5, 
-	0x41, 0x18, 0x67, 0x62, 0x34, 0xA4, 0x5E, 0xC9, 0x69, 0xCD, 
-	0x40, 0xC8, 0x56, 0x7E, 0xBF, 0xEB, 0xBC, 0x61, 0x1F, 0x33, 
-	0x34, 0x58, 0xBE, 0x57, 0xFD, 0xE6, 0x98, 0xDD, 0x51, 0x27, 
-	0x7C, 0xB7, 0x2C, 0xBC, 0xC9, 0x39, 0xE5, 0xE5, 0x95, 0x82, 
-	0xE1, 0x3F, 0xD9, 0xB9, 0x97, 0x30, 0x4E, 0x33, 0x2C, 0xEF, 
-	0xF8, 0xDB, 0xB4, 0xEE, 0x35, 0x75, 0x9E, 0x7A, 0x3F, 0x22, 
-	0x8F, 0xA5, 0x71, 0xD4, 0x01, 0x64, 0x6C, 0xF2, 0x85, 0xF7, 
-	0x72, 0x99, 0x2C, 0x80, 0x0F, 0xA4, 0x31, 0x1D, 0xD4, 0x0B, 
-	0x1E, 0xA5, 0x0F, 0xE7, 0x53, 0x0A, 0xDE, 0x15, 0x0D, 0xB2, 
-	0xD0, 0x6B, 0xF4, 0xD6, 0x2F, 0xE2, 0x0B, 0xA3, 0x8A, 0x5A, 
-	0x6E
+        0x30, 0x82, 0x03, 0xA9, 0x30, 0x82, 0x03, 0x12, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x01, 0x01, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05,
+        0x00, 0x30, 0x81, 0x99, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03,
+        0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F,
+        0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65,
+        0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55,
+        0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F,
+        0x74, 0x68, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74,
+        0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77,
+        0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16,
+        0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17,
+        0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 0x35, 0x32,
+        0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31, 0x30, 0x31,
+        0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x30,
+        0x81, 0x95, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31,
+        0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0C,
+        0x53, 0x75, 0x70, 0x70, 0x6F, 0x72, 0x74, 0x5F, 0x31, 0x30,
+        0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03,
+        0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 0x81, 0x00,
+        0xAA, 0x3E, 0xA5, 0x9C, 0xD3, 0x17, 0x49, 0x65, 0x43, 0xDE,
+        0xD0, 0xF3, 0x4B, 0x1C, 0xDB, 0x49, 0x0C, 0xFC, 0x7A, 0x65,
+        0x05, 0x6D, 0xDE, 0x6A, 0xC4, 0xE4, 0x73, 0x2C, 0x8A, 0x96,
+        0x82, 0x8F, 0x23, 0xA5, 0x06, 0x71, 0x1C, 0x06, 0x3E, 0x2F,
+        0x92, 0x8D, 0x0B, 0x29, 0x34, 0x45, 0x59, 0xE9, 0xA9, 0xBC,
+        0x61, 0xD7, 0x24, 0x37, 0x5D, 0xB5, 0xC4, 0x37, 0x8D, 0xBA,
+        0x67, 0xB2, 0xEF, 0x03, 0x27, 0xFA, 0xC1, 0xB4, 0xCD, 0x6B,
+        0x00, 0x66, 0xB4, 0xD6, 0x73, 0x70, 0x1F, 0x08, 0x3A, 0xCC,
+        0x77, 0xAD, 0xE9, 0xF9, 0x34, 0xD4, 0xF3, 0xA0, 0x2D, 0xA9,
+        0xE7, 0x58, 0xA9, 0xC0, 0x61, 0x84, 0xB6, 0xEC, 0x3D, 0x0A,
+        0xAD, 0xFD, 0x5C, 0x86, 0x73, 0xAA, 0x6B, 0x47, 0xD8, 0x8B,
+        0x2E, 0x58, 0x4B, 0x69, 0x12, 0x82, 0x26, 0x55, 0xE6, 0x14,
+        0xBF, 0x55, 0x70, 0x88, 0xFE, 0xF9, 0x75, 0xE1, 0x02, 0x03,
+        0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x01, 0x30, 0x81, 0xFE,
+        0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04,
+        0x14, 0xD9, 0x3C, 0x35, 0xEA, 0x74, 0x0E, 0x23, 0xBE, 0x9C,
+        0xFC, 0xFA, 0x29, 0x90, 0x09, 0xC1, 0xE7, 0x84, 0x16, 0x9F,
+        0x7C, 0x30, 0x81, 0xCE, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04,
+        0x81, 0xC6, 0x30, 0x81, 0xC3, 0x80, 0x14, 0xD3, 0x22, 0x8F,
+        0x28, 0x2C, 0xE0, 0x05, 0xEE, 0xD3, 0xED, 0xC3, 0x71, 0x3D,
+        0xC9, 0xB2, 0x36, 0x3A, 0x1D, 0xBF, 0xA8, 0xA1, 0x81, 0x9F,
+        0xA4, 0x81, 0x9C, 0x30, 0x81, 0x99, 0x31, 0x0B, 0x30, 0x09,
+        0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07,
+        0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F,
+        0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06,
+        0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74,
+        0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75,
+        0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30, 0x32, 0x34,
+        0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C,
+        0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D,
+        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09,
+        0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82,
+        0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D, 0xFE, 0xCF, 0x9B, 0x47,
+        0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30,
+        0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03,
+        0x81, 0x81, 0x00, 0x0B, 0xC3, 0xAF, 0x43, 0x85, 0x64, 0x61,
+        0xE7, 0xAB, 0x5A, 0x2A, 0x1B, 0xB2, 0x29, 0xD5, 0x66, 0x68,
+        0x44, 0x1A, 0x6D, 0x66, 0xFC, 0x3D, 0xB1, 0x88, 0xEC, 0xA5,
+        0x41, 0x18, 0x67, 0x62, 0x34, 0xA4, 0x5E, 0xC9, 0x69, 0xCD,
+        0x40, 0xC8, 0x56, 0x7E, 0xBF, 0xEB, 0xBC, 0x61, 0x1F, 0x33,
+        0x34, 0x58, 0xBE, 0x57, 0xFD, 0xE6, 0x98, 0xDD, 0x51, 0x27,
+        0x7C, 0xB7, 0x2C, 0xBC, 0xC9, 0x39, 0xE5, 0xE5, 0x95, 0x82,
+        0xE1, 0x3F, 0xD9, 0xB9, 0x97, 0x30, 0x4E, 0x33, 0x2C, 0xEF,
+        0xF8, 0xDB, 0xB4, 0xEE, 0x35, 0x75, 0x9E, 0x7A, 0x3F, 0x22,
+        0x8F, 0xA5, 0x71, 0xD4, 0x01, 0x64, 0x6C, 0xF2, 0x85, 0xF7,
+        0x72, 0x99, 0x2C, 0x80, 0x0F, 0xA4, 0x31, 0x1D, 0xD4, 0x0B,
+        0x1E, 0xA5, 0x0F, 0xE7, 0x53, 0x0A, 0xDE, 0x15, 0x0D, 0xB2,
+        0xD0, 0x6B, 0xF4, 0xD6, 0x2F, 0xE2, 0x0B, 0xA3, 0x8A, 0x5A,
+        0x6E
 };
 static const int sizeof_server_cert_der_1024 = sizeof(server_cert_der_1024);
 
@@ -681,291 +681,291 @@
 /* ./certs/client-key.der, 2048-bit */
 static const unsigned char client_key_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0xA4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x01, 0x00, 0xC3, 0x03, 0xD1, 0x2B, 0xFE, 0x39, 0xA4, 0x32, 
-	0x45, 0x3B, 0x53, 0xC8, 0x84, 0x2B, 0x2A, 0x7C, 0x74, 0x9A, 
-	0xBD, 0xAA, 0x2A, 0x52, 0x07, 0x47, 0xD6, 0xA6, 0x36, 0xB2, 
-	0x07, 0x32, 0x8E, 0xD0, 0xBA, 0x69, 0x7B, 0xC6, 0xC3, 0x44, 
-	0x9E, 0xD4, 0x81, 0x48, 0xFD, 0x2D, 0x68, 0xA2, 0x8B, 0x67, 
-	0xBB, 0xA1, 0x75, 0xC8, 0x36, 0x2C, 0x4A, 0xD2, 0x1B, 0xF7, 
-	0x8B, 0xBA, 0xCF, 0x0D, 0xF9, 0xEF, 0xEC, 0xF1, 0x81, 0x1E, 
-	0x7B, 0x9B, 0x03, 0x47, 0x9A, 0xBF, 0x65, 0xCC, 0x7F, 0x65, 
-	0x24, 0x69, 0xA6, 0xE8, 0x14, 0x89, 0x5B, 0xE4, 0x34, 0xF7, 
-	0xC5, 0xB0, 0x14, 0x93, 0xF5, 0x67, 0x7B, 0x3A, 0x7A, 0x78, 
-	0xE1, 0x01, 0x56, 0x56, 0x91, 0xA6, 0x13, 0x42, 0x8D, 0xD2, 
-	0x3C, 0x40, 0x9C, 0x4C, 0xEF, 0xD1, 0x86, 0xDF, 0x37, 0x51, 
-	0x1B, 0x0C, 0xA1, 0x3B, 0xF5, 0xF1, 0xA3, 0x4A, 0x35, 0xE4, 
-	0xE1, 0xCE, 0x96, 0xDF, 0x1B, 0x7E, 0xBF, 0x4E, 0x97, 0xD0, 
-	0x10, 0xE8, 0xA8, 0x08, 0x30, 0x81, 0xAF, 0x20, 0x0B, 0x43, 
-	0x14, 0xC5, 0x74, 0x67, 0xB4, 0x32, 0x82, 0x6F, 0x8D, 0x86, 
-	0xC2, 0x88, 0x40, 0x99, 0x36, 0x83, 0xBA, 0x1E, 0x40, 0x72, 
-	0x22, 0x17, 0xD7, 0x52, 0x65, 0x24, 0x73, 0xB0, 0xCE, 0xEF, 
-	0x19, 0xCD, 0xAE, 0xFF, 0x78, 0x6C, 0x7B, 0xC0, 0x12, 0x03, 
-	0xD4, 0x4E, 0x72, 0x0D, 0x50, 0x6D, 0x3B, 0xA3, 0x3B, 0xA3, 
-	0x99, 0x5E, 0x9D, 0xC8, 0xD9, 0x0C, 0x85, 0xB3, 0xD9, 0x8A, 
-	0xD9, 0x54, 0x26, 0xDB, 0x6D, 0xFA, 0xAC, 0xBB, 0xFF, 0x25, 
-	0x4C, 0xC4, 0xD1, 0x79, 0xF4, 0x71, 0xD3, 0x86, 0x40, 0x18, 
-	0x13, 0xB0, 0x63, 0xB5, 0x72, 0x4E, 0x30, 0xC4, 0x97, 0x84, 
-	0x86, 0x2D, 0x56, 0x2F, 0xD7, 0x15, 0xF7, 0x7F, 0xC0, 0xAE, 
-	0xF5, 0xFC, 0x5B, 0xE5, 0xFB, 0xA1, 0xBA, 0xD3, 0x02, 0x03, 
-	0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x01, 0x00, 0xA2, 0xE6, 
-	0xD8, 0x5F, 0x10, 0x71, 0x64, 0x08, 0x9E, 0x2E, 0x6D, 0xD1, 
-	0x6D, 0x1E, 0x85, 0xD2, 0x0A, 0xB1, 0x8C, 0x47, 0xCE, 0x2C, 
-	0x51, 0x6A, 0xA0, 0x12, 0x9E, 0x53, 0xDE, 0x91, 0x4C, 0x1D, 
-	0x6D, 0xEA, 0x59, 0x7B, 0xF2, 0x77, 0xAA, 0xD9, 0xC6, 0xD9, 
-	0x8A, 0xAB, 0xD8, 0xE1, 0x16, 0xE4, 0x63, 0x26, 0xFF, 0xB5, 
-	0x6C, 0x13, 0x59, 0xB8, 0xE3, 0xA5, 0xC8, 0x72, 0x17, 0x2E, 
-	0x0C, 0x9F, 0x6F, 0xE5, 0x59, 0x3F, 0x76, 0x6F, 0x49, 0xB1, 
-	0x11, 0xC2, 0x5A, 0x2E, 0x16, 0x29, 0x0D, 0xDE, 0xB7, 0x8E, 
-	0xDC, 0x40, 0xD5, 0xA2, 0xEE, 0xE0, 0x1E, 0xA1, 0xF4, 0xBE, 
-	0x97, 0xDB, 0x86, 0x63, 0x96, 0x14, 0xCD, 0x98, 0x09, 0x60, 
-	0x2D, 0x30, 0x76, 0x9C, 0x3C, 0xCD, 0xE6, 0x88, 0xEE, 0x47, 
-	0x92, 0x79, 0x0B, 0x5A, 0x00, 0xE2, 0x5E, 0x5F, 0x11, 0x7C, 
-	0x7D, 0xF9, 0x08, 0xB7, 0x20, 0x06, 0x89, 0x2A, 0x5D, 0xFD, 
-	0x00, 0xAB, 0x22, 0xE1, 0xF0, 0xB3, 0xBC, 0x24, 0xA9, 0x5E, 
-	0x26, 0x0E, 0x1F, 0x00, 0x2D, 0xFE, 0x21, 0x9A, 0x53, 0x5B, 
-	0x6D, 0xD3, 0x2B, 0xAB, 0x94, 0x82, 0x68, 0x43, 0x36, 0xD8, 
-	0xF6, 0x2F, 0xC6, 0x22, 0xFC, 0xB5, 0x41, 0x5D, 0x0D, 0x33, 
-	0x60, 0xEA, 0xA4, 0x7D, 0x7E, 0xE8, 0x4B, 0x55, 0x91, 0x56, 
-	0xD3, 0x5C, 0x57, 0x8F, 0x1F, 0x94, 0x17, 0x2F, 0xAA, 0xDE, 
-	0xE9, 0x9E, 0xA8, 0xF4, 0xCF, 0x8A, 0x4C, 0x8E, 0xA0, 0xE4, 
-	0x56, 0x73, 0xB2, 0xCF, 0x4F, 0x86, 0xC5, 0x69, 0x3C, 0xF3, 
-	0x24, 0x20, 0x8B, 0x5C, 0x96, 0x0C, 0xFA, 0x6B, 0x12, 0x3B, 
-	0x9A, 0x67, 0xC1, 0xDF, 0xC6, 0x96, 0xB2, 0xA5, 0xD5, 0x92, 
-	0x0D, 0x9B, 0x09, 0x42, 0x68, 0x24, 0x10, 0x45, 0xD4, 0x50, 
-	0xE4, 0x17, 0x39, 0x48, 0xD0, 0x35, 0x8B, 0x94, 0x6D, 0x11, 
-	0xDE, 0x8F, 0xCA, 0x59, 0x02, 0x81, 0x81, 0x00, 0xEA, 0x24, 
-	0xA7, 0xF9, 0x69, 0x33, 0xE9, 0x71, 0xDC, 0x52, 0x7D, 0x88, 
-	0x21, 0x28, 0x2F, 0x49, 0xDE, 0xBA, 0x72, 0x16, 0xE9, 0xCC, 
-	0x47, 0x7A, 0x88, 0x0D, 0x94, 0x57, 0x84, 0x58, 0x16, 0x3A, 
-	0x81, 0xB0, 0x3F, 0xA2, 0xCF, 0xA6, 0x6C, 0x1E, 0xB0, 0x06, 
-	0x29, 0x00, 0x8F, 0xE7, 0x77, 0x76, 0xAC, 0xDB, 0xCA, 0xC7, 
-	0xD9, 0x5E, 0x9B, 0x3F, 0x26, 0x90, 0x52, 0xAE, 0xFC, 0x38, 
-	0x90, 0x00, 0x14, 0xBB, 0xB4, 0x0F, 0x58, 0x94, 0xE7, 0x2F, 
-	0x6A, 0x7E, 0x1C, 0x4F, 0x41, 0x21, 0xD4, 0x31, 0x59, 0x1F, 
-	0x4E, 0x8A, 0x1A, 0x8D, 0xA7, 0x57, 0x6C, 0x22, 0xD8, 0xE5, 
-	0xF4, 0x7E, 0x32, 0xA6, 0x10, 0xCB, 0x64, 0xA5, 0x55, 0x03, 
-	0x87, 0xA6, 0x27, 0x05, 0x8C, 0xC3, 0xD7, 0xB6, 0x27, 0xB2, 
-	0x4D, 0xBA, 0x30, 0xDA, 0x47, 0x8F, 0x54, 0xD3, 0x3D, 0x8B, 
-	0x84, 0x8D, 0x94, 0x98, 0x58, 0xA5, 0x02, 0x81, 0x81, 0x00, 
-	0xD5, 0x38, 0x1B, 0xC3, 0x8F, 0xC5, 0x93, 0x0C, 0x47, 0x0B, 
-	0x6F, 0x35, 0x92, 0xC5, 0xB0, 0x8D, 0x46, 0xC8, 0x92, 0x18, 
-	0x8F, 0xF5, 0x80, 0x0A, 0xF7, 0xEF, 0xA1, 0xFE, 0x80, 0xB9, 
-	0xB5, 0x2A, 0xBA, 0xCA, 0x18, 0xB0, 0x5D, 0xA5, 0x07, 0xD0, 
-	0x93, 0x8D, 0xD8, 0x9C, 0x04, 0x1C, 0xD4, 0x62, 0x8E, 0xA6, 
-	0x26, 0x81, 0x01, 0xFF, 0xCE, 0x8A, 0x2A, 0x63, 0x34, 0x35, 
-	0x40, 0xAA, 0x6D, 0x80, 0xDE, 0x89, 0x23, 0x6A, 0x57, 0x4D, 
-	0x9E, 0x6E, 0xAD, 0x93, 0x4E, 0x56, 0x90, 0x0B, 0x6D, 0x9D, 
-	0x73, 0x8B, 0x0C, 0xAE, 0x27, 0x3D, 0xDE, 0x4E, 0xF0, 0xAA, 
-	0xC5, 0x6C, 0x78, 0x67, 0x6C, 0x94, 0x52, 0x9C, 0x37, 0x67, 
-	0x6C, 0x2D, 0xEF, 0xBB, 0xAF, 0xDF, 0xA6, 0x90, 0x3C, 0xC4, 
-	0x47, 0xCF, 0x8D, 0x96, 0x9E, 0x98, 0xA9, 0xB4, 0x9F, 0xC5, 
-	0xA6, 0x50, 0xDC, 0xB3, 0xF0, 0xFB, 0x74, 0x17, 0x02, 0x81, 
-	0x80, 0x5E, 0x83, 0x09, 0x62, 0xBD, 0xBA, 0x7C, 0xA2, 0xBF, 
-	0x42, 0x74, 0xF5, 0x7C, 0x1C, 0xD2, 0x69, 0xC9, 0x04, 0x0D, 
-	0x85, 0x7E, 0x3E, 0x3D, 0x24, 0x12, 0xC3, 0x18, 0x7B, 0xF3, 
-	0x29, 0xF3, 0x5F, 0x0E, 0x76, 0x6C, 0x59, 0x75, 0xE4, 0x41, 
-	0x84, 0x69, 0x9D, 0x32, 0xF3, 0xCD, 0x22, 0xAB, 0xB0, 0x35, 
-	0xBA, 0x4A, 0xB2, 0x3C, 0xE5, 0xD9, 0x58, 0xB6, 0x62, 0x4F, 
-	0x5D, 0xDE, 0xE5, 0x9E, 0x0A, 0xCA, 0x53, 0xB2, 0x2C, 0xF7, 
-	0x9E, 0xB3, 0x6B, 0x0A, 0x5B, 0x79, 0x65, 0xEC, 0x6E, 0x91, 
-	0x4E, 0x92, 0x20, 0xF6, 0xFC, 0xFC, 0x16, 0xED, 0xD3, 0x76, 
-	0x0C, 0xE2, 0xEC, 0x7F, 0xB2, 0x69, 0x13, 0x6B, 0x78, 0x0E, 
-	0x5A, 0x46, 0x64, 0xB4, 0x5E, 0xB7, 0x25, 0xA0, 0x5A, 0x75, 
-	0x3A, 0x4B, 0xEF, 0xC7, 0x3C, 0x3E, 0xF7, 0xFD, 0x26, 0xB8, 
-	0x20, 0xC4, 0x99, 0x0A, 0x9A, 0x73, 0xBE, 0xC3, 0x19, 0x02, 
-	0x81, 0x81, 0x00, 0xBA, 0x44, 0x93, 0x14, 0xAC, 0x34, 0x19, 
-	0x3B, 0x5F, 0x91, 0x60, 0xAC, 0xF7, 0xB4, 0xD6, 0x81, 0x05, 
-	0x36, 0x51, 0x53, 0x3D, 0xE8, 0x65, 0xDC, 0xAF, 0x2E, 0xDC, 
-	0x61, 0x3E, 0xC9, 0x7D, 0xB8, 0x7F, 0x87, 0xF0, 0x3B, 0x9B, 
-	0x03, 0x82, 0x29, 0x37, 0xCE, 0x72, 0x4E, 0x11, 0xD5, 0xB1, 
-	0xC1, 0x0C, 0x07, 0xA0, 0x99, 0x91, 0x4A, 0x8D, 0x7F, 0xEC, 
-	0x79, 0xCF, 0xF1, 0x39, 0xB5, 0xE9, 0x85, 0xEC, 0x62, 0xF7, 
-	0xDA, 0x7D, 0xBC, 0x64, 0x4D, 0x22, 0x3C, 0x0E, 0xF2, 0xD6, 
-	0x51, 0xF5, 0x87, 0xD8, 0x99, 0xC0, 0x11, 0x20, 0x5D, 0x0F, 
-	0x29, 0xFD, 0x5B, 0xE2, 0xAE, 0xD9, 0x1C, 0xD9, 0x21, 0x56, 
-	0x6D, 0xFC, 0x84, 0xD0, 0x5F, 0xED, 0x10, 0x15, 0x1C, 0x18, 
-	0x21, 0xE7, 0xC4, 0x3D, 0x4B, 0xD7, 0xD0, 0x9E, 0x6A, 0x95, 
-	0xCF, 0x22, 0xC9, 0x03, 0x7B, 0x9E, 0xE3, 0x60, 0x01, 0xFC, 
-	0x2F, 0x02, 0x81, 0x80, 0x11, 0xD0, 0x4B, 0xCF, 0x1B, 0x67, 
-	0xB9, 0x9F, 0x10, 0x75, 0x47, 0x86, 0x65, 0xAE, 0x31, 0xC2, 
-	0xC6, 0x30, 0xAC, 0x59, 0x06, 0x50, 0xD9, 0x0F, 0xB5, 0x70, 
-	0x06, 0xF7, 0xF0, 0xD3, 0xC8, 0x62, 0x7C, 0xA8, 0xDA, 0x6E, 
-	0xF6, 0x21, 0x3F, 0xD3, 0x7F, 0x5F, 0xEA, 0x8A, 0xAB, 0x3F, 
-	0xD9, 0x2A, 0x5E, 0xF3, 0x51, 0xD2, 0xC2, 0x30, 0x37, 0xE3, 
-	0x2D, 0xA3, 0x75, 0x0D, 0x1E, 0x4D, 0x21, 0x34, 0xD5, 0x57, 
-	0x70, 0x5C, 0x89, 0xBF, 0x72, 0xEC, 0x4A, 0x6E, 0x68, 0xD5, 
-	0xCD, 0x18, 0x74, 0x33, 0x4E, 0x8C, 0x3A, 0x45, 0x8F, 0xE6, 
-	0x96, 0x40, 0xEB, 0x63, 0xF9, 0x19, 0x86, 0x3A, 0x51, 0xDD, 
-	0x89, 0x4B, 0xB0, 0xF3, 0xF9, 0x9F, 0x5D, 0x28, 0x95, 0x38, 
-	0xBE, 0x35, 0xAB, 0xCA, 0x5C, 0xE7, 0x93, 0x53, 0x34, 0xA1, 
-	0x45, 0x5D, 0x13, 0x39, 0x65, 0x42, 0x46, 0xA1, 0x9F, 0xCD, 
-	0xF5, 0xBF
+        0x30, 0x82, 0x04, 0xA4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x01, 0x00, 0xC3, 0x03, 0xD1, 0x2B, 0xFE, 0x39, 0xA4, 0x32,
+        0x45, 0x3B, 0x53, 0xC8, 0x84, 0x2B, 0x2A, 0x7C, 0x74, 0x9A,
+        0xBD, 0xAA, 0x2A, 0x52, 0x07, 0x47, 0xD6, 0xA6, 0x36, 0xB2,
+        0x07, 0x32, 0x8E, 0xD0, 0xBA, 0x69, 0x7B, 0xC6, 0xC3, 0x44,
+        0x9E, 0xD4, 0x81, 0x48, 0xFD, 0x2D, 0x68, 0xA2, 0x8B, 0x67,
+        0xBB, 0xA1, 0x75, 0xC8, 0x36, 0x2C, 0x4A, 0xD2, 0x1B, 0xF7,
+        0x8B, 0xBA, 0xCF, 0x0D, 0xF9, 0xEF, 0xEC, 0xF1, 0x81, 0x1E,
+        0x7B, 0x9B, 0x03, 0x47, 0x9A, 0xBF, 0x65, 0xCC, 0x7F, 0x65,
+        0x24, 0x69, 0xA6, 0xE8, 0x14, 0x89, 0x5B, 0xE4, 0x34, 0xF7,
+        0xC5, 0xB0, 0x14, 0x93, 0xF5, 0x67, 0x7B, 0x3A, 0x7A, 0x78,
+        0xE1, 0x01, 0x56, 0x56, 0x91, 0xA6, 0x13, 0x42, 0x8D, 0xD2,
+        0x3C, 0x40, 0x9C, 0x4C, 0xEF, 0xD1, 0x86, 0xDF, 0x37, 0x51,
+        0x1B, 0x0C, 0xA1, 0x3B, 0xF5, 0xF1, 0xA3, 0x4A, 0x35, 0xE4,
+        0xE1, 0xCE, 0x96, 0xDF, 0x1B, 0x7E, 0xBF, 0x4E, 0x97, 0xD0,
+        0x10, 0xE8, 0xA8, 0x08, 0x30, 0x81, 0xAF, 0x20, 0x0B, 0x43,
+        0x14, 0xC5, 0x74, 0x67, 0xB4, 0x32, 0x82, 0x6F, 0x8D, 0x86,
+        0xC2, 0x88, 0x40, 0x99, 0x36, 0x83, 0xBA, 0x1E, 0x40, 0x72,
+        0x22, 0x17, 0xD7, 0x52, 0x65, 0x24, 0x73, 0xB0, 0xCE, 0xEF,
+        0x19, 0xCD, 0xAE, 0xFF, 0x78, 0x6C, 0x7B, 0xC0, 0x12, 0x03,
+        0xD4, 0x4E, 0x72, 0x0D, 0x50, 0x6D, 0x3B, 0xA3, 0x3B, 0xA3,
+        0x99, 0x5E, 0x9D, 0xC8, 0xD9, 0x0C, 0x85, 0xB3, 0xD9, 0x8A,
+        0xD9, 0x54, 0x26, 0xDB, 0x6D, 0xFA, 0xAC, 0xBB, 0xFF, 0x25,
+        0x4C, 0xC4, 0xD1, 0x79, 0xF4, 0x71, 0xD3, 0x86, 0x40, 0x18,
+        0x13, 0xB0, 0x63, 0xB5, 0x72, 0x4E, 0x30, 0xC4, 0x97, 0x84,
+        0x86, 0x2D, 0x56, 0x2F, 0xD7, 0x15, 0xF7, 0x7F, 0xC0, 0xAE,
+        0xF5, 0xFC, 0x5B, 0xE5, 0xFB, 0xA1, 0xBA, 0xD3, 0x02, 0x03,
+        0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x01, 0x00, 0xA2, 0xE6,
+        0xD8, 0x5F, 0x10, 0x71, 0x64, 0x08, 0x9E, 0x2E, 0x6D, 0xD1,
+        0x6D, 0x1E, 0x85, 0xD2, 0x0A, 0xB1, 0x8C, 0x47, 0xCE, 0x2C,
+        0x51, 0x6A, 0xA0, 0x12, 0x9E, 0x53, 0xDE, 0x91, 0x4C, 0x1D,
+        0x6D, 0xEA, 0x59, 0x7B, 0xF2, 0x77, 0xAA, 0xD9, 0xC6, 0xD9,
+        0x8A, 0xAB, 0xD8, 0xE1, 0x16, 0xE4, 0x63, 0x26, 0xFF, 0xB5,
+        0x6C, 0x13, 0x59, 0xB8, 0xE3, 0xA5, 0xC8, 0x72, 0x17, 0x2E,
+        0x0C, 0x9F, 0x6F, 0xE5, 0x59, 0x3F, 0x76, 0x6F, 0x49, 0xB1,
+        0x11, 0xC2, 0x5A, 0x2E, 0x16, 0x29, 0x0D, 0xDE, 0xB7, 0x8E,
+        0xDC, 0x40, 0xD5, 0xA2, 0xEE, 0xE0, 0x1E, 0xA1, 0xF4, 0xBE,
+        0x97, 0xDB, 0x86, 0x63, 0x96, 0x14, 0xCD, 0x98, 0x09, 0x60,
+        0x2D, 0x30, 0x76, 0x9C, 0x3C, 0xCD, 0xE6, 0x88, 0xEE, 0x47,
+        0x92, 0x79, 0x0B, 0x5A, 0x00, 0xE2, 0x5E, 0x5F, 0x11, 0x7C,
+        0x7D, 0xF9, 0x08, 0xB7, 0x20, 0x06, 0x89, 0x2A, 0x5D, 0xFD,
+        0x00, 0xAB, 0x22, 0xE1, 0xF0, 0xB3, 0xBC, 0x24, 0xA9, 0x5E,
+        0x26, 0x0E, 0x1F, 0x00, 0x2D, 0xFE, 0x21, 0x9A, 0x53, 0x5B,
+        0x6D, 0xD3, 0x2B, 0xAB, 0x94, 0x82, 0x68, 0x43, 0x36, 0xD8,
+        0xF6, 0x2F, 0xC6, 0x22, 0xFC, 0xB5, 0x41, 0x5D, 0x0D, 0x33,
+        0x60, 0xEA, 0xA4, 0x7D, 0x7E, 0xE8, 0x4B, 0x55, 0x91, 0x56,
+        0xD3, 0x5C, 0x57, 0x8F, 0x1F, 0x94, 0x17, 0x2F, 0xAA, 0xDE,
+        0xE9, 0x9E, 0xA8, 0xF4, 0xCF, 0x8A, 0x4C, 0x8E, 0xA0, 0xE4,
+        0x56, 0x73, 0xB2, 0xCF, 0x4F, 0x86, 0xC5, 0x69, 0x3C, 0xF3,
+        0x24, 0x20, 0x8B, 0x5C, 0x96, 0x0C, 0xFA, 0x6B, 0x12, 0x3B,
+        0x9A, 0x67, 0xC1, 0xDF, 0xC6, 0x96, 0xB2, 0xA5, 0xD5, 0x92,
+        0x0D, 0x9B, 0x09, 0x42, 0x68, 0x24, 0x10, 0x45, 0xD4, 0x50,
+        0xE4, 0x17, 0x39, 0x48, 0xD0, 0x35, 0x8B, 0x94, 0x6D, 0x11,
+        0xDE, 0x8F, 0xCA, 0x59, 0x02, 0x81, 0x81, 0x00, 0xEA, 0x24,
+        0xA7, 0xF9, 0x69, 0x33, 0xE9, 0x71, 0xDC, 0x52, 0x7D, 0x88,
+        0x21, 0x28, 0x2F, 0x49, 0xDE, 0xBA, 0x72, 0x16, 0xE9, 0xCC,
+        0x47, 0x7A, 0x88, 0x0D, 0x94, 0x57, 0x84, 0x58, 0x16, 0x3A,
+        0x81, 0xB0, 0x3F, 0xA2, 0xCF, 0xA6, 0x6C, 0x1E, 0xB0, 0x06,
+        0x29, 0x00, 0x8F, 0xE7, 0x77, 0x76, 0xAC, 0xDB, 0xCA, 0xC7,
+        0xD9, 0x5E, 0x9B, 0x3F, 0x26, 0x90, 0x52, 0xAE, 0xFC, 0x38,
+        0x90, 0x00, 0x14, 0xBB, 0xB4, 0x0F, 0x58, 0x94, 0xE7, 0x2F,
+        0x6A, 0x7E, 0x1C, 0x4F, 0x41, 0x21, 0xD4, 0x31, 0x59, 0x1F,
+        0x4E, 0x8A, 0x1A, 0x8D, 0xA7, 0x57, 0x6C, 0x22, 0xD8, 0xE5,
+        0xF4, 0x7E, 0x32, 0xA6, 0x10, 0xCB, 0x64, 0xA5, 0x55, 0x03,
+        0x87, 0xA6, 0x27, 0x05, 0x8C, 0xC3, 0xD7, 0xB6, 0x27, 0xB2,
+        0x4D, 0xBA, 0x30, 0xDA, 0x47, 0x8F, 0x54, 0xD3, 0x3D, 0x8B,
+        0x84, 0x8D, 0x94, 0x98, 0x58, 0xA5, 0x02, 0x81, 0x81, 0x00,
+        0xD5, 0x38, 0x1B, 0xC3, 0x8F, 0xC5, 0x93, 0x0C, 0x47, 0x0B,
+        0x6F, 0x35, 0x92, 0xC5, 0xB0, 0x8D, 0x46, 0xC8, 0x92, 0x18,
+        0x8F, 0xF5, 0x80, 0x0A, 0xF7, 0xEF, 0xA1, 0xFE, 0x80, 0xB9,
+        0xB5, 0x2A, 0xBA, 0xCA, 0x18, 0xB0, 0x5D, 0xA5, 0x07, 0xD0,
+        0x93, 0x8D, 0xD8, 0x9C, 0x04, 0x1C, 0xD4, 0x62, 0x8E, 0xA6,
+        0x26, 0x81, 0x01, 0xFF, 0xCE, 0x8A, 0x2A, 0x63, 0x34, 0x35,
+        0x40, 0xAA, 0x6D, 0x80, 0xDE, 0x89, 0x23, 0x6A, 0x57, 0x4D,
+        0x9E, 0x6E, 0xAD, 0x93, 0x4E, 0x56, 0x90, 0x0B, 0x6D, 0x9D,
+        0x73, 0x8B, 0x0C, 0xAE, 0x27, 0x3D, 0xDE, 0x4E, 0xF0, 0xAA,
+        0xC5, 0x6C, 0x78, 0x67, 0x6C, 0x94, 0x52, 0x9C, 0x37, 0x67,
+        0x6C, 0x2D, 0xEF, 0xBB, 0xAF, 0xDF, 0xA6, 0x90, 0x3C, 0xC4,
+        0x47, 0xCF, 0x8D, 0x96, 0x9E, 0x98, 0xA9, 0xB4, 0x9F, 0xC5,
+        0xA6, 0x50, 0xDC, 0xB3, 0xF0, 0xFB, 0x74, 0x17, 0x02, 0x81,
+        0x80, 0x5E, 0x83, 0x09, 0x62, 0xBD, 0xBA, 0x7C, 0xA2, 0xBF,
+        0x42, 0x74, 0xF5, 0x7C, 0x1C, 0xD2, 0x69, 0xC9, 0x04, 0x0D,
+        0x85, 0x7E, 0x3E, 0x3D, 0x24, 0x12, 0xC3, 0x18, 0x7B, 0xF3,
+        0x29, 0xF3, 0x5F, 0x0E, 0x76, 0x6C, 0x59, 0x75, 0xE4, 0x41,
+        0x84, 0x69, 0x9D, 0x32, 0xF3, 0xCD, 0x22, 0xAB, 0xB0, 0x35,
+        0xBA, 0x4A, 0xB2, 0x3C, 0xE5, 0xD9, 0x58, 0xB6, 0x62, 0x4F,
+        0x5D, 0xDE, 0xE5, 0x9E, 0x0A, 0xCA, 0x53, 0xB2, 0x2C, 0xF7,
+        0x9E, 0xB3, 0x6B, 0x0A, 0x5B, 0x79, 0x65, 0xEC, 0x6E, 0x91,
+        0x4E, 0x92, 0x20, 0xF6, 0xFC, 0xFC, 0x16, 0xED, 0xD3, 0x76,
+        0x0C, 0xE2, 0xEC, 0x7F, 0xB2, 0x69, 0x13, 0x6B, 0x78, 0x0E,
+        0x5A, 0x46, 0x64, 0xB4, 0x5E, 0xB7, 0x25, 0xA0, 0x5A, 0x75,
+        0x3A, 0x4B, 0xEF, 0xC7, 0x3C, 0x3E, 0xF7, 0xFD, 0x26, 0xB8,
+        0x20, 0xC4, 0x99, 0x0A, 0x9A, 0x73, 0xBE, 0xC3, 0x19, 0x02,
+        0x81, 0x81, 0x00, 0xBA, 0x44, 0x93, 0x14, 0xAC, 0x34, 0x19,
+        0x3B, 0x5F, 0x91, 0x60, 0xAC, 0xF7, 0xB4, 0xD6, 0x81, 0x05,
+        0x36, 0x51, 0x53, 0x3D, 0xE8, 0x65, 0xDC, 0xAF, 0x2E, 0xDC,
+        0x61, 0x3E, 0xC9, 0x7D, 0xB8, 0x7F, 0x87, 0xF0, 0x3B, 0x9B,
+        0x03, 0x82, 0x29, 0x37, 0xCE, 0x72, 0x4E, 0x11, 0xD5, 0xB1,
+        0xC1, 0x0C, 0x07, 0xA0, 0x99, 0x91, 0x4A, 0x8D, 0x7F, 0xEC,
+        0x79, 0xCF, 0xF1, 0x39, 0xB5, 0xE9, 0x85, 0xEC, 0x62, 0xF7,
+        0xDA, 0x7D, 0xBC, 0x64, 0x4D, 0x22, 0x3C, 0x0E, 0xF2, 0xD6,
+        0x51, 0xF5, 0x87, 0xD8, 0x99, 0xC0, 0x11, 0x20, 0x5D, 0x0F,
+        0x29, 0xFD, 0x5B, 0xE2, 0xAE, 0xD9, 0x1C, 0xD9, 0x21, 0x56,
+        0x6D, 0xFC, 0x84, 0xD0, 0x5F, 0xED, 0x10, 0x15, 0x1C, 0x18,
+        0x21, 0xE7, 0xC4, 0x3D, 0x4B, 0xD7, 0xD0, 0x9E, 0x6A, 0x95,
+        0xCF, 0x22, 0xC9, 0x03, 0x7B, 0x9E, 0xE3, 0x60, 0x01, 0xFC,
+        0x2F, 0x02, 0x81, 0x80, 0x11, 0xD0, 0x4B, 0xCF, 0x1B, 0x67,
+        0xB9, 0x9F, 0x10, 0x75, 0x47, 0x86, 0x65, 0xAE, 0x31, 0xC2,
+        0xC6, 0x30, 0xAC, 0x59, 0x06, 0x50, 0xD9, 0x0F, 0xB5, 0x70,
+        0x06, 0xF7, 0xF0, 0xD3, 0xC8, 0x62, 0x7C, 0xA8, 0xDA, 0x6E,
+        0xF6, 0x21, 0x3F, 0xD3, 0x7F, 0x5F, 0xEA, 0x8A, 0xAB, 0x3F,
+        0xD9, 0x2A, 0x5E, 0xF3, 0x51, 0xD2, 0xC2, 0x30, 0x37, 0xE3,
+        0x2D, 0xA3, 0x75, 0x0D, 0x1E, 0x4D, 0x21, 0x34, 0xD5, 0x57,
+        0x70, 0x5C, 0x89, 0xBF, 0x72, 0xEC, 0x4A, 0x6E, 0x68, 0xD5,
+        0xCD, 0x18, 0x74, 0x33, 0x4E, 0x8C, 0x3A, 0x45, 0x8F, 0xE6,
+        0x96, 0x40, 0xEB, 0x63, 0xF9, 0x19, 0x86, 0x3A, 0x51, 0xDD,
+        0x89, 0x4B, 0xB0, 0xF3, 0xF9, 0x9F, 0x5D, 0x28, 0x95, 0x38,
+        0xBE, 0x35, 0xAB, 0xCA, 0x5C, 0xE7, 0x93, 0x53, 0x34, 0xA1,
+        0x45, 0x5D, 0x13, 0x39, 0x65, 0x42, 0x46, 0xA1, 0x9F, 0xCD,
+        0xF5, 0xBF
 };
 static const int sizeof_client_key_der_2048 = sizeof(client_key_der_2048);
 
 /* ./certs/client-keyPub.der, 2048-bit */
 static const unsigned char client_keypub_der_2048[] =
 {
-	0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03, 
-	0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01, 0x0A, 0x02, 0x82, 
-	0x01, 0x01, 0x00, 0xC3, 0x03, 0xD1, 0x2B, 0xFE, 0x39, 0xA4, 
-	0x32, 0x45, 0x3B, 0x53, 0xC8, 0x84, 0x2B, 0x2A, 0x7C, 0x74, 
-	0x9A, 0xBD, 0xAA, 0x2A, 0x52, 0x07, 0x47, 0xD6, 0xA6, 0x36, 
-	0xB2, 0x07, 0x32, 0x8E, 0xD0, 0xBA, 0x69, 0x7B, 0xC6, 0xC3, 
-	0x44, 0x9E, 0xD4, 0x81, 0x48, 0xFD, 0x2D, 0x68, 0xA2, 0x8B, 
-	0x67, 0xBB, 0xA1, 0x75, 0xC8, 0x36, 0x2C, 0x4A, 0xD2, 0x1B, 
-	0xF7, 0x8B, 0xBA, 0xCF, 0x0D, 0xF9, 0xEF, 0xEC, 0xF1, 0x81, 
-	0x1E, 0x7B, 0x9B, 0x03, 0x47, 0x9A, 0xBF, 0x65, 0xCC, 0x7F, 
-	0x65, 0x24, 0x69, 0xA6, 0xE8, 0x14, 0x89, 0x5B, 0xE4, 0x34, 
-	0xF7, 0xC5, 0xB0, 0x14, 0x93, 0xF5, 0x67, 0x7B, 0x3A, 0x7A, 
-	0x78, 0xE1, 0x01, 0x56, 0x56, 0x91, 0xA6, 0x13, 0x42, 0x8D, 
-	0xD2, 0x3C, 0x40, 0x9C, 0x4C, 0xEF, 0xD1, 0x86, 0xDF, 0x37, 
-	0x51, 0x1B, 0x0C, 0xA1, 0x3B, 0xF5, 0xF1, 0xA3, 0x4A, 0x35, 
-	0xE4, 0xE1, 0xCE, 0x96, 0xDF, 0x1B, 0x7E, 0xBF, 0x4E, 0x97, 
-	0xD0, 0x10, 0xE8, 0xA8, 0x08, 0x30, 0x81, 0xAF, 0x20, 0x0B, 
-	0x43, 0x14, 0xC5, 0x74, 0x67, 0xB4, 0x32, 0x82, 0x6F, 0x8D, 
-	0x86, 0xC2, 0x88, 0x40, 0x99, 0x36, 0x83, 0xBA, 0x1E, 0x40, 
-	0x72, 0x22, 0x17, 0xD7, 0x52, 0x65, 0x24, 0x73, 0xB0, 0xCE, 
-	0xEF, 0x19, 0xCD, 0xAE, 0xFF, 0x78, 0x6C, 0x7B, 0xC0, 0x12, 
-	0x03, 0xD4, 0x4E, 0x72, 0x0D, 0x50, 0x6D, 0x3B, 0xA3, 0x3B, 
-	0xA3, 0x99, 0x5E, 0x9D, 0xC8, 0xD9, 0x0C, 0x85, 0xB3, 0xD9, 
-	0x8A, 0xD9, 0x54, 0x26, 0xDB, 0x6D, 0xFA, 0xAC, 0xBB, 0xFF, 
-	0x25, 0x4C, 0xC4, 0xD1, 0x79, 0xF4, 0x71, 0xD3, 0x86, 0x40, 
-	0x18, 0x13, 0xB0, 0x63, 0xB5, 0x72, 0x4E, 0x30, 0xC4, 0x97, 
-	0x84, 0x86, 0x2D, 0x56, 0x2F, 0xD7, 0x15, 0xF7, 0x7F, 0xC0, 
-	0xAE, 0xF5, 0xFC, 0x5B, 0xE5, 0xFB, 0xA1, 0xBA, 0xD3, 0x02, 
-	0x03, 0x01, 0x00, 0x01
+        0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03,
+        0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01, 0x0A, 0x02, 0x82,
+        0x01, 0x01, 0x00, 0xC3, 0x03, 0xD1, 0x2B, 0xFE, 0x39, 0xA4,
+        0x32, 0x45, 0x3B, 0x53, 0xC8, 0x84, 0x2B, 0x2A, 0x7C, 0x74,
+        0x9A, 0xBD, 0xAA, 0x2A, 0x52, 0x07, 0x47, 0xD6, 0xA6, 0x36,
+        0xB2, 0x07, 0x32, 0x8E, 0xD0, 0xBA, 0x69, 0x7B, 0xC6, 0xC3,
+        0x44, 0x9E, 0xD4, 0x81, 0x48, 0xFD, 0x2D, 0x68, 0xA2, 0x8B,
+        0x67, 0xBB, 0xA1, 0x75, 0xC8, 0x36, 0x2C, 0x4A, 0xD2, 0x1B,
+        0xF7, 0x8B, 0xBA, 0xCF, 0x0D, 0xF9, 0xEF, 0xEC, 0xF1, 0x81,
+        0x1E, 0x7B, 0x9B, 0x03, 0x47, 0x9A, 0xBF, 0x65, 0xCC, 0x7F,
+        0x65, 0x24, 0x69, 0xA6, 0xE8, 0x14, 0x89, 0x5B, 0xE4, 0x34,
+        0xF7, 0xC5, 0xB0, 0x14, 0x93, 0xF5, 0x67, 0x7B, 0x3A, 0x7A,
+        0x78, 0xE1, 0x01, 0x56, 0x56, 0x91, 0xA6, 0x13, 0x42, 0x8D,
+        0xD2, 0x3C, 0x40, 0x9C, 0x4C, 0xEF, 0xD1, 0x86, 0xDF, 0x37,
+        0x51, 0x1B, 0x0C, 0xA1, 0x3B, 0xF5, 0xF1, 0xA3, 0x4A, 0x35,
+        0xE4, 0xE1, 0xCE, 0x96, 0xDF, 0x1B, 0x7E, 0xBF, 0x4E, 0x97,
+        0xD0, 0x10, 0xE8, 0xA8, 0x08, 0x30, 0x81, 0xAF, 0x20, 0x0B,
+        0x43, 0x14, 0xC5, 0x74, 0x67, 0xB4, 0x32, 0x82, 0x6F, 0x8D,
+        0x86, 0xC2, 0x88, 0x40, 0x99, 0x36, 0x83, 0xBA, 0x1E, 0x40,
+        0x72, 0x22, 0x17, 0xD7, 0x52, 0x65, 0x24, 0x73, 0xB0, 0xCE,
+        0xEF, 0x19, 0xCD, 0xAE, 0xFF, 0x78, 0x6C, 0x7B, 0xC0, 0x12,
+        0x03, 0xD4, 0x4E, 0x72, 0x0D, 0x50, 0x6D, 0x3B, 0xA3, 0x3B,
+        0xA3, 0x99, 0x5E, 0x9D, 0xC8, 0xD9, 0x0C, 0x85, 0xB3, 0xD9,
+        0x8A, 0xD9, 0x54, 0x26, 0xDB, 0x6D, 0xFA, 0xAC, 0xBB, 0xFF,
+        0x25, 0x4C, 0xC4, 0xD1, 0x79, 0xF4, 0x71, 0xD3, 0x86, 0x40,
+        0x18, 0x13, 0xB0, 0x63, 0xB5, 0x72, 0x4E, 0x30, 0xC4, 0x97,
+        0x84, 0x86, 0x2D, 0x56, 0x2F, 0xD7, 0x15, 0xF7, 0x7F, 0xC0,
+        0xAE, 0xF5, 0xFC, 0x5B, 0xE5, 0xFB, 0xA1, 0xBA, 0xD3, 0x02,
+        0x03, 0x01, 0x00, 0x01
 };
 static const int sizeof_client_keypub_der_2048 = sizeof(client_keypub_der_2048);
 
 /* ./certs/client-cert.der, 2048-bit */
 static const unsigned char client_cert_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0xCA, 0x30, 0x82, 0x03, 0xB2, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xAA, 0xC4, 0xBF, 0x4C, 
-	0x50, 0xBD, 0x55, 0x77, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30, 
-	0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 
-	0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 
-	0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 
-	0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 
-	0x32, 0x30, 0x34, 0x38, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 
-	0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 
-	0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x32, 0x30, 0x34, 
-	0x38, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 
-	0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A, 0x17, 0x0D, 0x32, 
-	0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x30, 
-	0x39, 0x5A, 0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 
-	0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 
-	0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 
-	0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 
-	0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 
-	0x53, 0x4C, 0x5F, 0x32, 0x30, 0x34, 0x38, 0x31, 0x19, 0x30, 
-	0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 
-	0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 
-	0x32, 0x30, 0x34, 0x38, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 
-	0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06, 
-	0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 
-	0x05, 0x00, 0x03, 0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01, 
-	0x0A, 0x02, 0x82, 0x01, 0x01, 0x00, 0xC3, 0x03, 0xD1, 0x2B, 
-	0xFE, 0x39, 0xA4, 0x32, 0x45, 0x3B, 0x53, 0xC8, 0x84, 0x2B, 
-	0x2A, 0x7C, 0x74, 0x9A, 0xBD, 0xAA, 0x2A, 0x52, 0x07, 0x47, 
-	0xD6, 0xA6, 0x36, 0xB2, 0x07, 0x32, 0x8E, 0xD0, 0xBA, 0x69, 
-	0x7B, 0xC6, 0xC3, 0x44, 0x9E, 0xD4, 0x81, 0x48, 0xFD, 0x2D, 
-	0x68, 0xA2, 0x8B, 0x67, 0xBB, 0xA1, 0x75, 0xC8, 0x36, 0x2C, 
-	0x4A, 0xD2, 0x1B, 0xF7, 0x8B, 0xBA, 0xCF, 0x0D, 0xF9, 0xEF, 
-	0xEC, 0xF1, 0x81, 0x1E, 0x7B, 0x9B, 0x03, 0x47, 0x9A, 0xBF, 
-	0x65, 0xCC, 0x7F, 0x65, 0x24, 0x69, 0xA6, 0xE8, 0x14, 0x89, 
-	0x5B, 0xE4, 0x34, 0xF7, 0xC5, 0xB0, 0x14, 0x93, 0xF5, 0x67, 
-	0x7B, 0x3A, 0x7A, 0x78, 0xE1, 0x01, 0x56, 0x56, 0x91, 0xA6, 
-	0x13, 0x42, 0x8D, 0xD2, 0x3C, 0x40, 0x9C, 0x4C, 0xEF, 0xD1, 
-	0x86, 0xDF, 0x37, 0x51, 0x1B, 0x0C, 0xA1, 0x3B, 0xF5, 0xF1, 
-	0xA3, 0x4A, 0x35, 0xE4, 0xE1, 0xCE, 0x96, 0xDF, 0x1B, 0x7E, 
-	0xBF, 0x4E, 0x97, 0xD0, 0x10, 0xE8, 0xA8, 0x08, 0x30, 0x81, 
-	0xAF, 0x20, 0x0B, 0x43, 0x14, 0xC5, 0x74, 0x67, 0xB4, 0x32, 
-	0x82, 0x6F, 0x8D, 0x86, 0xC2, 0x88, 0x40, 0x99, 0x36, 0x83, 
-	0xBA, 0x1E, 0x40, 0x72, 0x22, 0x17, 0xD7, 0x52, 0x65, 0x24, 
-	0x73, 0xB0, 0xCE, 0xEF, 0x19, 0xCD, 0xAE, 0xFF, 0x78, 0x6C, 
-	0x7B, 0xC0, 0x12, 0x03, 0xD4, 0x4E, 0x72, 0x0D, 0x50, 0x6D, 
-	0x3B, 0xA3, 0x3B, 0xA3, 0x99, 0x5E, 0x9D, 0xC8, 0xD9, 0x0C, 
-	0x85, 0xB3, 0xD9, 0x8A, 0xD9, 0x54, 0x26, 0xDB, 0x6D, 0xFA, 
-	0xAC, 0xBB, 0xFF, 0x25, 0x4C, 0xC4, 0xD1, 0x79, 0xF4, 0x71, 
-	0xD3, 0x86, 0x40, 0x18, 0x13, 0xB0, 0x63, 0xB5, 0x72, 0x4E, 
-	0x30, 0xC4, 0x97, 0x84, 0x86, 0x2D, 0x56, 0x2F, 0xD7, 0x15, 
-	0xF7, 0x7F, 0xC0, 0xAE, 0xF5, 0xFC, 0x5B, 0xE5, 0xFB, 0xA1, 
-	0xBA, 0xD3, 0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 
-	0x07, 0x30, 0x82, 0x01, 0x03, 0x30, 0x1D, 0x06, 0x03, 0x55, 
-	0x1D, 0x0E, 0x04, 0x16, 0x04, 0x14, 0x33, 0xD8, 0x45, 0x66, 
-	0xD7, 0x68, 0x87, 0x18, 0x7E, 0x54, 0x0D, 0x70, 0x27, 0x91, 
-	0xC7, 0x26, 0xD7, 0x85, 0x65, 0xC0, 0x30, 0x81, 0xD3, 0x06, 
-	0x03, 0x55, 0x1D, 0x23, 0x04, 0x81, 0xCB, 0x30, 0x81, 0xC8, 
-	0x80, 0x14, 0x33, 0xD8, 0x45, 0x66, 0xD7, 0x68, 0x87, 0x18, 
-	0x7E, 0x54, 0x0D, 0x70, 0x27, 0x91, 0xC7, 0x26, 0xD7, 0x85, 
-	0x65, 0xC0, 0xA1, 0x81, 0xA4, 0xA4, 0x81, 0xA1, 0x30, 0x81, 
-	0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 
-	0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 
-	0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 
-	0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 
-	0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 
-	0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 0x32, 
-	0x30, 0x34, 0x38, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 0x55, 
-	0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 0x61, 
-	0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x32, 0x30, 0x34, 0x38, 
-	0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 
-	0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 
-	0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 
-	0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 
-	0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 
-	0x09, 0x00, 0xAA, 0xC4, 0xBF, 0x4C, 0x50, 0xBD, 0x55, 0x77, 
-	0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 
-	0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 
-	0x82, 0x01, 0x01, 0x00, 0x80, 0x52, 0x54, 0x61, 0x2A, 0x77, 
-	0x80, 0x53, 0x44, 0xA9, 0x80, 0x6D, 0x45, 0xFF, 0x0D, 0x25, 
-	0x7D, 0x1A, 0x8F, 0x23, 0x93, 0x53, 0x74, 0x35, 0x12, 0x6F, 
-	0xF0, 0x2E, 0x20, 0xEA, 0xED, 0x80, 0x63, 0x69, 0x88, 0xE6, 
-	0x0C, 0xA1, 0x49, 0x30, 0xE0, 0x82, 0xDB, 0x68, 0x0F, 0x7E, 
-	0x84, 0xAC, 0xFF, 0xFF, 0x7B, 0x42, 0xFA, 0x7E, 0x2F, 0xB2, 
-	0x52, 0x9F, 0xD2, 0x79, 0x5E, 0x35, 0x12, 0x27, 0x36, 0xBC, 
-	0xDF, 0x96, 0x58, 0x44, 0x96, 0x55, 0xC8, 0x4A, 0x94, 0x02, 
-	0x5F, 0x4A, 0x9D, 0xDC, 0xD3, 0x3A, 0xF7, 0x6D, 0xAC, 0x8B, 
-	0x79, 0x6E, 0xFC, 0xBE, 0x8F, 0x23, 0x58, 0x6A, 0x8A, 0xF5, 
-	0x38, 0x0A, 0x42, 0xF6, 0x98, 0x74, 0x88, 0x53, 0x2E, 0x02, 
-	0xAF, 0xE1, 0x0E, 0xBE, 0x6F, 0xCC, 0x74, 0x33, 0x7C, 0xEC, 
-	0xB4, 0xCB, 0xA7, 0x49, 0x6D, 0x82, 0x42, 0x4F, 0xEB, 0x73, 
-	0x29, 0xC3, 0x32, 0x00, 0x2B, 0x15, 0xF8, 0x88, 0x7A, 0x8F, 
-	0x6D, 0x20, 0x1B, 0xAE, 0x65, 0x5F, 0xC5, 0xD0, 0x8A, 0xD1, 
-	0xE2, 0x64, 0x6D, 0xA3, 0xA8, 0xFE, 0x64, 0xE1, 0xA9, 0x5B, 
-	0xE6, 0xD0, 0x23, 0xD6, 0x02, 0x72, 0x5A, 0xEC, 0x03, 0x8E, 
-	0x87, 0x67, 0x19, 0x8D, 0xE4, 0xA8, 0x99, 0x15, 0xC1, 0x3D, 
-	0x91, 0x48, 0x99, 0x8D, 0xFE, 0xAE, 0x1C, 0xBF, 0xF6, 0x28, 
-	0x1B, 0x45, 0xBE, 0xAD, 0xEF, 0x72, 0x83, 0x9A, 0xF6, 0xC7, 
-	0x3B, 0x51, 0xA3, 0x6E, 0x7A, 0x73, 0xBD, 0x83, 0xAA, 0x97, 
-	0xFD, 0x63, 0xB4, 0xF4, 0x6B, 0x1C, 0x14, 0x81, 0x9A, 0xEF, 
-	0x14, 0x24, 0xD3, 0xE1, 0x8B, 0xF4, 0x04, 0x04, 0x84, 0x54, 
-	0x0F, 0x61, 0xA2, 0xA8, 0xF2, 0x50, 0x37, 0x0C, 0x17, 0x0C, 
-	0xBC, 0xE0, 0xC2, 0x84, 0x85, 0xF4, 0x0B, 0xAE, 0x00, 0xCA, 
-	0x9F, 0x27, 0xE2, 0x44, 0x4F, 0x15, 0x0B, 0x8B, 0x1D, 0xB4
+        0x30, 0x82, 0x04, 0xCA, 0x30, 0x82, 0x03, 0xB2, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xAA, 0xC4, 0xBF, 0x4C,
+        0x50, 0xBD, 0x55, 0x77, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30,
+        0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F,
+        0x32, 0x30, 0x34, 0x38, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03,
+        0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72,
+        0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x32, 0x30, 0x34,
+        0x38, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33,
+        0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A, 0x17, 0x0D, 0x32,
+        0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x30,
+        0x39, 0x5A, 0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D,
+        0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A,
+        0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03,
+        0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53,
+        0x53, 0x4C, 0x5F, 0x32, 0x30, 0x34, 0x38, 0x31, 0x19, 0x30,
+        0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72,
+        0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D,
+        0x32, 0x30, 0x34, 0x38, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06,
+        0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01,
+        0x05, 0x00, 0x03, 0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01,
+        0x0A, 0x02, 0x82, 0x01, 0x01, 0x00, 0xC3, 0x03, 0xD1, 0x2B,
+        0xFE, 0x39, 0xA4, 0x32, 0x45, 0x3B, 0x53, 0xC8, 0x84, 0x2B,
+        0x2A, 0x7C, 0x74, 0x9A, 0xBD, 0xAA, 0x2A, 0x52, 0x07, 0x47,
+        0xD6, 0xA6, 0x36, 0xB2, 0x07, 0x32, 0x8E, 0xD0, 0xBA, 0x69,
+        0x7B, 0xC6, 0xC3, 0x44, 0x9E, 0xD4, 0x81, 0x48, 0xFD, 0x2D,
+        0x68, 0xA2, 0x8B, 0x67, 0xBB, 0xA1, 0x75, 0xC8, 0x36, 0x2C,
+        0x4A, 0xD2, 0x1B, 0xF7, 0x8B, 0xBA, 0xCF, 0x0D, 0xF9, 0xEF,
+        0xEC, 0xF1, 0x81, 0x1E, 0x7B, 0x9B, 0x03, 0x47, 0x9A, 0xBF,
+        0x65, 0xCC, 0x7F, 0x65, 0x24, 0x69, 0xA6, 0xE8, 0x14, 0x89,
+        0x5B, 0xE4, 0x34, 0xF7, 0xC5, 0xB0, 0x14, 0x93, 0xF5, 0x67,
+        0x7B, 0x3A, 0x7A, 0x78, 0xE1, 0x01, 0x56, 0x56, 0x91, 0xA6,
+        0x13, 0x42, 0x8D, 0xD2, 0x3C, 0x40, 0x9C, 0x4C, 0xEF, 0xD1,
+        0x86, 0xDF, 0x37, 0x51, 0x1B, 0x0C, 0xA1, 0x3B, 0xF5, 0xF1,
+        0xA3, 0x4A, 0x35, 0xE4, 0xE1, 0xCE, 0x96, 0xDF, 0x1B, 0x7E,
+        0xBF, 0x4E, 0x97, 0xD0, 0x10, 0xE8, 0xA8, 0x08, 0x30, 0x81,
+        0xAF, 0x20, 0x0B, 0x43, 0x14, 0xC5, 0x74, 0x67, 0xB4, 0x32,
+        0x82, 0x6F, 0x8D, 0x86, 0xC2, 0x88, 0x40, 0x99, 0x36, 0x83,
+        0xBA, 0x1E, 0x40, 0x72, 0x22, 0x17, 0xD7, 0x52, 0x65, 0x24,
+        0x73, 0xB0, 0xCE, 0xEF, 0x19, 0xCD, 0xAE, 0xFF, 0x78, 0x6C,
+        0x7B, 0xC0, 0x12, 0x03, 0xD4, 0x4E, 0x72, 0x0D, 0x50, 0x6D,
+        0x3B, 0xA3, 0x3B, 0xA3, 0x99, 0x5E, 0x9D, 0xC8, 0xD9, 0x0C,
+        0x85, 0xB3, 0xD9, 0x8A, 0xD9, 0x54, 0x26, 0xDB, 0x6D, 0xFA,
+        0xAC, 0xBB, 0xFF, 0x25, 0x4C, 0xC4, 0xD1, 0x79, 0xF4, 0x71,
+        0xD3, 0x86, 0x40, 0x18, 0x13, 0xB0, 0x63, 0xB5, 0x72, 0x4E,
+        0x30, 0xC4, 0x97, 0x84, 0x86, 0x2D, 0x56, 0x2F, 0xD7, 0x15,
+        0xF7, 0x7F, 0xC0, 0xAE, 0xF5, 0xFC, 0x5B, 0xE5, 0xFB, 0xA1,
+        0xBA, 0xD3, 0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01,
+        0x07, 0x30, 0x82, 0x01, 0x03, 0x30, 0x1D, 0x06, 0x03, 0x55,
+        0x1D, 0x0E, 0x04, 0x16, 0x04, 0x14, 0x33, 0xD8, 0x45, 0x66,
+        0xD7, 0x68, 0x87, 0x18, 0x7E, 0x54, 0x0D, 0x70, 0x27, 0x91,
+        0xC7, 0x26, 0xD7, 0x85, 0x65, 0xC0, 0x30, 0x81, 0xD3, 0x06,
+        0x03, 0x55, 0x1D, 0x23, 0x04, 0x81, 0xCB, 0x30, 0x81, 0xC8,
+        0x80, 0x14, 0x33, 0xD8, 0x45, 0x66, 0xD7, 0x68, 0x87, 0x18,
+        0x7E, 0x54, 0x0D, 0x70, 0x27, 0x91, 0xC7, 0x26, 0xD7, 0x85,
+        0x65, 0xC0, 0xA1, 0x81, 0xA4, 0xA4, 0x81, 0xA1, 0x30, 0x81,
+        0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06,
+        0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61,
+        0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E,
+        0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C,
+        0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 0x32,
+        0x30, 0x34, 0x38, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 0x55,
+        0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 0x61,
+        0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x32, 0x30, 0x34, 0x38,
+        0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C,
+        0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D,
+        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09,
+        0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82,
+        0x09, 0x00, 0xAA, 0xC4, 0xBF, 0x4C, 0x50, 0xBD, 0x55, 0x77,
+        0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30,
+        0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03,
+        0x82, 0x01, 0x01, 0x00, 0x80, 0x52, 0x54, 0x61, 0x2A, 0x77,
+        0x80, 0x53, 0x44, 0xA9, 0x80, 0x6D, 0x45, 0xFF, 0x0D, 0x25,
+        0x7D, 0x1A, 0x8F, 0x23, 0x93, 0x53, 0x74, 0x35, 0x12, 0x6F,
+        0xF0, 0x2E, 0x20, 0xEA, 0xED, 0x80, 0x63, 0x69, 0x88, 0xE6,
+        0x0C, 0xA1, 0x49, 0x30, 0xE0, 0x82, 0xDB, 0x68, 0x0F, 0x7E,
+        0x84, 0xAC, 0xFF, 0xFF, 0x7B, 0x42, 0xFA, 0x7E, 0x2F, 0xB2,
+        0x52, 0x9F, 0xD2, 0x79, 0x5E, 0x35, 0x12, 0x27, 0x36, 0xBC,
+        0xDF, 0x96, 0x58, 0x44, 0x96, 0x55, 0xC8, 0x4A, 0x94, 0x02,
+        0x5F, 0x4A, 0x9D, 0xDC, 0xD3, 0x3A, 0xF7, 0x6D, 0xAC, 0x8B,
+        0x79, 0x6E, 0xFC, 0xBE, 0x8F, 0x23, 0x58, 0x6A, 0x8A, 0xF5,
+        0x38, 0x0A, 0x42, 0xF6, 0x98, 0x74, 0x88, 0x53, 0x2E, 0x02,
+        0xAF, 0xE1, 0x0E, 0xBE, 0x6F, 0xCC, 0x74, 0x33, 0x7C, 0xEC,
+        0xB4, 0xCB, 0xA7, 0x49, 0x6D, 0x82, 0x42, 0x4F, 0xEB, 0x73,
+        0x29, 0xC3, 0x32, 0x00, 0x2B, 0x15, 0xF8, 0x88, 0x7A, 0x8F,
+        0x6D, 0x20, 0x1B, 0xAE, 0x65, 0x5F, 0xC5, 0xD0, 0x8A, 0xD1,
+        0xE2, 0x64, 0x6D, 0xA3, 0xA8, 0xFE, 0x64, 0xE1, 0xA9, 0x5B,
+        0xE6, 0xD0, 0x23, 0xD6, 0x02, 0x72, 0x5A, 0xEC, 0x03, 0x8E,
+        0x87, 0x67, 0x19, 0x8D, 0xE4, 0xA8, 0x99, 0x15, 0xC1, 0x3D,
+        0x91, 0x48, 0x99, 0x8D, 0xFE, 0xAE, 0x1C, 0xBF, 0xF6, 0x28,
+        0x1B, 0x45, 0xBE, 0xAD, 0xEF, 0x72, 0x83, 0x9A, 0xF6, 0xC7,
+        0x3B, 0x51, 0xA3, 0x6E, 0x7A, 0x73, 0xBD, 0x83, 0xAA, 0x97,
+        0xFD, 0x63, 0xB4, 0xF4, 0x6B, 0x1C, 0x14, 0x81, 0x9A, 0xEF,
+        0x14, 0x24, 0xD3, 0xE1, 0x8B, 0xF4, 0x04, 0x04, 0x84, 0x54,
+        0x0F, 0x61, 0xA2, 0xA8, 0xF2, 0x50, 0x37, 0x0C, 0x17, 0x0C,
+        0xBC, 0xE0, 0xC2, 0x84, 0x85, 0xF4, 0x0B, 0xAE, 0x00, 0xCA,
+        0x9F, 0x27, 0xE2, 0x44, 0x4F, 0x15, 0x0B, 0x8B, 0x1D, 0xB4
 
 };
 static const int sizeof_client_cert_der_2048 = sizeof(client_cert_der_2048);
@@ -973,752 +973,974 @@
 /* ./certs/dh2048.der, 2048-bit */
 static const unsigned char dh_key_der_2048[] =
 {
-	0x30, 0x82, 0x01, 0x08, 0x02, 0x82, 0x01, 0x01, 0x00, 0xB0, 
-	0xA1, 0x08, 0x06, 0x9C, 0x08, 0x13, 0xBA, 0x59, 0x06, 0x3C, 
-	0xBC, 0x30, 0xD5, 0xF5, 0x00, 0xC1, 0x4F, 0x44, 0xA7, 0xD6, 
-	0xEF, 0x4A, 0xC6, 0x25, 0x27, 0x1C, 0xE8, 0xD2, 0x96, 0x53, 
-	0x0A, 0x5C, 0x91, 0xDD, 0xA2, 0xC2, 0x94, 0x84, 0xBF, 0x7D, 
-	0xB2, 0x44, 0x9F, 0x9B, 0xD2, 0xC1, 0x8A, 0xC5, 0xBE, 0x72, 
-	0x5C, 0xA7, 0xE7, 0x91, 0xE6, 0xD4, 0x9F, 0x73, 0x07, 0x85, 
-	0x5B, 0x66, 0x48, 0xC7, 0x70, 0xFA, 0xB4, 0xEE, 0x02, 0xC9, 
-	0x3D, 0x9A, 0x4A, 0xDA, 0x3D, 0xC1, 0x46, 0x3E, 0x19, 0x69, 
-	0xD1, 0x17, 0x46, 0x07, 0xA3, 0x4D, 0x9F, 0x2B, 0x96, 0x17, 
-	0x39, 0x6D, 0x30, 0x8D, 0x2A, 0xF3, 0x94, 0xD3, 0x75, 0xCF, 
-	0xA0, 0x75, 0xE6, 0xF2, 0x92, 0x1F, 0x1A, 0x70, 0x05, 0xAA, 
-	0x04, 0x83, 0x57, 0x30, 0xFB, 0xDA, 0x76, 0x93, 0x38, 0x50, 
-	0xE8, 0x27, 0xFD, 0x63, 0xEE, 0x3C, 0xE5, 0xB7, 0xC8, 0x09, 
-	0xAE, 0x6F, 0x50, 0x35, 0x8E, 0x84, 0xCE, 0x4A, 0x00, 0xE9, 
-	0x12, 0x7E, 0x5A, 0x31, 0xD7, 0x33, 0xFC, 0x21, 0x13, 0x76, 
-	0xCC, 0x16, 0x30, 0xDB, 0x0C, 0xFC, 0xC5, 0x62, 0xA7, 0x35, 
-	0xB8, 0xEF, 0xB7, 0xB0, 0xAC, 0xC0, 0x36, 0xF6, 0xD9, 0xC9, 
-	0x46, 0x48, 0xF9, 0x40, 0x90, 0x00, 0x2B, 0x1B, 0xAA, 0x6C, 
-	0xE3, 0x1A, 0xC3, 0x0B, 0x03, 0x9E, 0x1B, 0xC2, 0x46, 0xE4, 
-	0x48, 0x4E, 0x22, 0x73, 0x6F, 0xC3, 0x5F, 0xD4, 0x9A, 0xD6, 
-	0x30, 0x07, 0x48, 0xD6, 0x8C, 0x90, 0xAB, 0xD4, 0xF6, 0xF1, 
-	0xE3, 0x48, 0xD3, 0x58, 0x4B, 0xA6, 0xB9, 0xCD, 0x29, 0xBF, 
-	0x68, 0x1F, 0x08, 0x4B, 0x63, 0x86, 0x2F, 0x5C, 0x6B, 0xD6, 
-	0xB6, 0x06, 0x65, 0xF7, 0xA6, 0xDC, 0x00, 0x67, 0x6B, 0xBB, 
-	0xC3, 0xA9, 0x41, 0x83, 0xFB, 0xC7, 0xFA, 0xC8, 0xE2, 0x1E, 
-	0x7E, 0xAF, 0x00, 0x3F, 0x93, 0x02, 0x01, 0x02
+        0x30, 0x82, 0x01, 0x08, 0x02, 0x82, 0x01, 0x01, 0x00, 0xB0,
+        0xA1, 0x08, 0x06, 0x9C, 0x08, 0x13, 0xBA, 0x59, 0x06, 0x3C,
+        0xBC, 0x30, 0xD5, 0xF5, 0x00, 0xC1, 0x4F, 0x44, 0xA7, 0xD6,
+        0xEF, 0x4A, 0xC6, 0x25, 0x27, 0x1C, 0xE8, 0xD2, 0x96, 0x53,
+        0x0A, 0x5C, 0x91, 0xDD, 0xA2, 0xC2, 0x94, 0x84, 0xBF, 0x7D,
+        0xB2, 0x44, 0x9F, 0x9B, 0xD2, 0xC1, 0x8A, 0xC5, 0xBE, 0x72,
+        0x5C, 0xA7, 0xE7, 0x91, 0xE6, 0xD4, 0x9F, 0x73, 0x07, 0x85,
+        0x5B, 0x66, 0x48, 0xC7, 0x70, 0xFA, 0xB4, 0xEE, 0x02, 0xC9,
+        0x3D, 0x9A, 0x4A, 0xDA, 0x3D, 0xC1, 0x46, 0x3E, 0x19, 0x69,
+        0xD1, 0x17, 0x46, 0x07, 0xA3, 0x4D, 0x9F, 0x2B, 0x96, 0x17,
+        0x39, 0x6D, 0x30, 0x8D, 0x2A, 0xF3, 0x94, 0xD3, 0x75, 0xCF,
+        0xA0, 0x75, 0xE6, 0xF2, 0x92, 0x1F, 0x1A, 0x70, 0x05, 0xAA,
+        0x04, 0x83, 0x57, 0x30, 0xFB, 0xDA, 0x76, 0x93, 0x38, 0x50,
+        0xE8, 0x27, 0xFD, 0x63, 0xEE, 0x3C, 0xE5, 0xB7, 0xC8, 0x09,
+        0xAE, 0x6F, 0x50, 0x35, 0x8E, 0x84, 0xCE, 0x4A, 0x00, 0xE9,
+        0x12, 0x7E, 0x5A, 0x31, 0xD7, 0x33, 0xFC, 0x21, 0x13, 0x76,
+        0xCC, 0x16, 0x30, 0xDB, 0x0C, 0xFC, 0xC5, 0x62, 0xA7, 0x35,
+        0xB8, 0xEF, 0xB7, 0xB0, 0xAC, 0xC0, 0x36, 0xF6, 0xD9, 0xC9,
+        0x46, 0x48, 0xF9, 0x40, 0x90, 0x00, 0x2B, 0x1B, 0xAA, 0x6C,
+        0xE3, 0x1A, 0xC3, 0x0B, 0x03, 0x9E, 0x1B, 0xC2, 0x46, 0xE4,
+        0x48, 0x4E, 0x22, 0x73, 0x6F, 0xC3, 0x5F, 0xD4, 0x9A, 0xD6,
+        0x30, 0x07, 0x48, 0xD6, 0x8C, 0x90, 0xAB, 0xD4, 0xF6, 0xF1,
+        0xE3, 0x48, 0xD3, 0x58, 0x4B, 0xA6, 0xB9, 0xCD, 0x29, 0xBF,
+        0x68, 0x1F, 0x08, 0x4B, 0x63, 0x86, 0x2F, 0x5C, 0x6B, 0xD6,
+        0xB6, 0x06, 0x65, 0xF7, 0xA6, 0xDC, 0x00, 0x67, 0x6B, 0xBB,
+        0xC3, 0xA9, 0x41, 0x83, 0xFB, 0xC7, 0xFA, 0xC8, 0xE2, 0x1E,
+        0x7E, 0xAF, 0x00, 0x3F, 0x93, 0x02, 0x01, 0x02
 };
 static const int sizeof_dh_key_der_2048 = sizeof(dh_key_der_2048);
 
 /* ./certs/dsa2048.der, 2048-bit */
 static const unsigned char dsa_key_der_2048[] =
 {
-	0x30, 0x82, 0x03, 0x3F, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x01, 0x00, 0xCC, 0x8E, 0xC9, 0xA0, 0xD5, 0x9A, 0x27, 0x1C, 
-	0xDA, 0x52, 0xDF, 0xC7, 0xC0, 0xE6, 0x06, 0xA4, 0x3E, 0x8A, 
-	0x66, 0x49, 0xD0, 0x59, 0x33, 0x51, 0x69, 0xC4, 0x9C, 0x5E, 
-	0x64, 0x85, 0xC7, 0xF1, 0xAB, 0xD5, 0xD9, 0x62, 0xAC, 0xFD, 
-	0xA1, 0xE0, 0x1B, 0x57, 0xFF, 0x96, 0xEF, 0x0C, 0x9F, 0xC8, 
-	0x44, 0x87, 0xEB, 0x5C, 0x91, 0xD0, 0x46, 0x42, 0x09, 0x50, 
-	0x6A, 0x23, 0xCB, 0x89, 0x6F, 0x55, 0xE9, 0x6A, 0x11, 0xA9, 
-	0xA8, 0x32, 0xAB, 0x33, 0x0D, 0x51, 0xB5, 0x79, 0x51, 0xB4, 
-	0xAB, 0xA2, 0x25, 0x11, 0x8D, 0xE5, 0x24, 0xBE, 0xD8, 0xF1, 
-	0x9D, 0x4E, 0x12, 0x6F, 0xAC, 0x44, 0x54, 0x80, 0xA9, 0xB4, 
-	0x81, 0x68, 0x4E, 0x44, 0x0E, 0xB8, 0x39, 0xF3, 0xBE, 0x83, 
-	0x08, 0x74, 0xA2, 0xC6, 0x7A, 0xD7, 0x6A, 0x7D, 0x0A, 0x88, 
-	0x57, 0x83, 0x48, 0xDC, 0xCF, 0x5E, 0x6F, 0xEE, 0x68, 0x0C, 
-	0xF7, 0xFF, 0x03, 0x04, 0x90, 0xAA, 0xF7, 0x07, 0x98, 0xF8, 
-	0x67, 0x5A, 0x83, 0x23, 0x66, 0x47, 0x60, 0xC3, 0x43, 0x6E, 
-	0x03, 0x91, 0xAC, 0x28, 0x66, 0xCB, 0xF0, 0xD3, 0x05, 0xC8, 
-	0x09, 0x97, 0xB5, 0xAE, 0x01, 0x5E, 0x80, 0x3B, 0x9D, 0x4F, 
-	0xDE, 0x3E, 0x94, 0xFE, 0xCB, 0x82, 0xB0, 0xB1, 0xFC, 0x91, 
-	0x8B, 0x1D, 0x8A, 0xEE, 0xC6, 0x06, 0x1F, 0x37, 0x91, 0x48, 
-	0xD2, 0xF8, 0x6C, 0x5D, 0x60, 0x13, 0x83, 0xA7, 0x81, 0xAC, 
-	0xCA, 0x8D, 0xD0, 0x6A, 0x04, 0x0A, 0xEA, 0x3E, 0x22, 0x4E, 
-	0x13, 0xF1, 0x0D, 0xBB, 0x60, 0x6B, 0xCD, 0xBC, 0x5C, 0x87, 
-	0xA3, 0x67, 0x2B, 0x42, 0xA1, 0x9F, 0xCD, 0x39, 0x58, 0xBE, 
-	0x55, 0xB1, 0x93, 0x84, 0xCE, 0xB2, 0x10, 0x4E, 0xE4, 0xC3, 
-	0x9F, 0xB2, 0x53, 0x61, 0x01, 0x29, 0xAA, 0x96, 0xCB, 0x20, 
-	0x60, 0x42, 0x1D, 0xBA, 0x75, 0x4B, 0x63, 0xC1, 0x02, 0x15, 
-	0x00, 0xE7, 0xA5, 0x39, 0xD4, 0x6A, 0x37, 0x5E, 0x95, 0x06, 
-	0x39, 0x07, 0x77, 0x0A, 0xEB, 0xA0, 0x03, 0xEB, 0x78, 0x82, 
-	0x9B, 0x02, 0x82, 0x01, 0x01, 0x00, 0x9A, 0xD4, 0x4C, 0x71, 
-	0x2F, 0xEC, 0xFA, 0x32, 0xB2, 0x80, 0x7E, 0x61, 0x4A, 0x6B, 
-	0x5F, 0x18, 0x76, 0x43, 0xC3, 0x69, 0xBA, 0x41, 0xC7, 0xA7, 
-	0x1D, 0x79, 0x01, 0xEC, 0xAF, 0x34, 0x87, 0x67, 0x4F, 0x29, 
-	0x80, 0xA8, 0x3B, 0x87, 0xF6, 0xE8, 0xA1, 0xE8, 0xCD, 0x1B, 
-	0x1C, 0x86, 0x38, 0xF6, 0xD1, 0x0C, 0x46, 0x2E, 0xC8, 0xE0, 
-	0xC9, 0x30, 0x26, 0xD5, 0x2C, 0x7F, 0xC1, 0x08, 0xBF, 0xCC, 
-	0x5A, 0x82, 0x8E, 0xD4, 0xD4, 0x49, 0xAA, 0xA2, 0xFA, 0xE6, 
-	0xC1, 0x9D, 0xF0, 0xD9, 0x96, 0xB0, 0xFF, 0x0C, 0x5B, 0x33, 
-	0x8E, 0x06, 0xDD, 0x9D, 0x28, 0xA9, 0xE9, 0x80, 0x41, 0x3B, 
-	0xD8, 0x7A, 0x94, 0x21, 0x8F, 0x56, 0xF1, 0xA2, 0xB4, 0x2B, 
-	0x89, 0x1C, 0x74, 0xFF, 0x7E, 0x91, 0xDC, 0x1F, 0x91, 0x13, 
-	0x98, 0xAF, 0xC7, 0x06, 0xD2, 0x4C, 0x90, 0xA2, 0xBD, 0xDA, 
-	0x16, 0xBA, 0x65, 0xB0, 0x2D, 0x68, 0x87, 0x3C, 0x6E, 0x25, 
-	0x8D, 0x90, 0xC7, 0xBC, 0x0D, 0xA9, 0x43, 0x03, 0xC9, 0xBE, 
-	0xCF, 0x85, 0x6F, 0xDB, 0x07, 0x7B, 0x8C, 0xF8, 0xB1, 0xC2, 
-	0x49, 0x10, 0x69, 0x63, 0x56, 0x37, 0xC5, 0x30, 0xD2, 0xFB, 
-	0x71, 0x9A, 0xE8, 0x82, 0x07, 0x2E, 0x3E, 0x95, 0x50, 0xF3, 
-	0x73, 0xCF, 0x34, 0x5B, 0xD5, 0xAB, 0x02, 0x15, 0xF2, 0xCC, 
-	0xD7, 0x52, 0xC5, 0x28, 0xD8, 0x41, 0x19, 0x55, 0x6F, 0xB8, 
-	0x5F, 0xF1, 0x99, 0xB3, 0xC7, 0xD9, 0xB3, 0x71, 0xF4, 0x2D, 
-	0xDF, 0x22, 0x59, 0x35, 0x86, 0xDB, 0x39, 0xCA, 0x1B, 0x4D, 
-	0x35, 0x90, 0x19, 0x6B, 0x31, 0xE3, 0xC8, 0xC6, 0x09, 0xBF, 
-	0x7C, 0xED, 0x01, 0xB4, 0xB2, 0xF5, 0x6E, 0xDA, 0x63, 0x41, 
-	0x3C, 0xE6, 0x3A, 0x72, 0x2D, 0x65, 0x48, 0xF6, 0x07, 0xCD, 
-	0x92, 0x84, 0x8B, 0x1D, 0xA7, 0x31, 0x6B, 0xD6, 0xF0, 0xFB, 
-	0xD9, 0xF4, 0x02, 0x82, 0x01, 0x00, 0x66, 0x4B, 0xBB, 0xB7, 
-	0xC9, 0x48, 0x95, 0x0D, 0x5A, 0xA6, 0x2D, 0xA1, 0x7F, 0xDF, 
-	0x1F, 0x67, 0x6D, 0xED, 0x52, 0x4B, 0x16, 0x6C, 0x17, 0xC6, 
-	0xAE, 0xF8, 0x6A, 0xC4, 0x57, 0xED, 0x2F, 0xB3, 0xF0, 0x2A, 
-	0x55, 0xAB, 0xBA, 0xCA, 0xEA, 0x17, 0xE8, 0x35, 0x7C, 0xE5, 
-	0x31, 0x0D, 0x4A, 0x95, 0xFC, 0x43, 0x6F, 0x97, 0x3C, 0x5C, 
-	0x67, 0xAC, 0xBE, 0x67, 0x7F, 0xE9, 0x4E, 0xAA, 0x48, 0xB3, 
-	0x92, 0xA1, 0x76, 0x75, 0xEA, 0x04, 0x34, 0x7F, 0x87, 0x33, 
-	0x2D, 0x24, 0xB6, 0x29, 0x97, 0xE3, 0x04, 0x77, 0x93, 0x89, 
-	0x13, 0xDB, 0x1B, 0x93, 0xB8, 0x2C, 0x90, 0x1A, 0x09, 0x3B, 
-	0x26, 0xD9, 0x59, 0xF3, 0x2A, 0x09, 0x58, 0xDC, 0xAC, 0x25, 
-	0xB4, 0xA9, 0x45, 0x3B, 0xA2, 0x3A, 0x6C, 0x61, 0x84, 0xBF, 
-	0x68, 0xD4, 0xEA, 0x9B, 0xC5, 0x29, 0x48, 0x60, 0x15, 0x10, 
-	0x35, 0x2C, 0x44, 0x1D, 0xB5, 0x9A, 0xEE, 0xAC, 0xC1, 0x68, 
-	0xE8, 0x47, 0xB7, 0x41, 0x34, 0x39, 0x9A, 0xF8, 0xA5, 0x20, 
-	0xE9, 0x24, 0xC4, 0x2C, 0x58, 0x3F, 0x4C, 0x41, 0x30, 0x3A, 
-	0x14, 0x6E, 0x8D, 0xEA, 0xAD, 0xBA, 0x9B, 0x43, 0xD3, 0x98, 
-	0x2F, 0x83, 0xD8, 0x14, 0x67, 0xE8, 0xF8, 0xD5, 0x4F, 0xAC, 
-	0xE0, 0x3B, 0xBF, 0xA7, 0x54, 0x16, 0x5E, 0x49, 0x64, 0x26, 
-	0x54, 0xA4, 0x6B, 0x69, 0x7C, 0xBA, 0x8A, 0x83, 0xD9, 0x2E, 
-	0x65, 0x0A, 0xA2, 0x27, 0xEF, 0x99, 0x99, 0x08, 0xD7, 0xB5, 
-	0x9F, 0xA0, 0x01, 0xEF, 0x7E, 0x17, 0xBF, 0x83, 0x6B, 0x2E, 
-	0xDD, 0xC0, 0x39, 0x38, 0x23, 0x68, 0xB4, 0x76, 0x6B, 0xE5, 
-	0xCA, 0xF7, 0x7C, 0xEE, 0xC0, 0x52, 0xE2, 0xDD, 0xAD, 0x59, 
-	0x3A, 0x42, 0x06, 0x45, 0xB0, 0xC7, 0xC1, 0x77, 0x05, 0xB2, 
-	0x0C, 0x32, 0x40, 0x46, 0xAA, 0xDA, 0x79, 0x77, 0x04, 0x71, 
-	0xDF, 0x7A, 0x02, 0x15, 0x00, 0x98, 0xEE, 0xB9, 0x51, 0x37, 
-	0x3E, 0x75, 0x13, 0x13, 0x06, 0x8F, 0x94, 0xD3, 0xE6, 0xE9, 
-	0x00, 0xCB, 0x62, 0x6D, 0x9A
+        0x30, 0x82, 0x03, 0x3F, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x01, 0x00, 0xCC, 0x8E, 0xC9, 0xA0, 0xD5, 0x9A, 0x27, 0x1C,
+        0xDA, 0x52, 0xDF, 0xC7, 0xC0, 0xE6, 0x06, 0xA4, 0x3E, 0x8A,
+        0x66, 0x49, 0xD0, 0x59, 0x33, 0x51, 0x69, 0xC4, 0x9C, 0x5E,
+        0x64, 0x85, 0xC7, 0xF1, 0xAB, 0xD5, 0xD9, 0x62, 0xAC, 0xFD,
+        0xA1, 0xE0, 0x1B, 0x57, 0xFF, 0x96, 0xEF, 0x0C, 0x9F, 0xC8,
+        0x44, 0x87, 0xEB, 0x5C, 0x91, 0xD0, 0x46, 0x42, 0x09, 0x50,
+        0x6A, 0x23, 0xCB, 0x89, 0x6F, 0x55, 0xE9, 0x6A, 0x11, 0xA9,
+        0xA8, 0x32, 0xAB, 0x33, 0x0D, 0x51, 0xB5, 0x79, 0x51, 0xB4,
+        0xAB, 0xA2, 0x25, 0x11, 0x8D, 0xE5, 0x24, 0xBE, 0xD8, 0xF1,
+        0x9D, 0x4E, 0x12, 0x6F, 0xAC, 0x44, 0x54, 0x80, 0xA9, 0xB4,
+        0x81, 0x68, 0x4E, 0x44, 0x0E, 0xB8, 0x39, 0xF3, 0xBE, 0x83,
+        0x08, 0x74, 0xA2, 0xC6, 0x7A, 0xD7, 0x6A, 0x7D, 0x0A, 0x88,
+        0x57, 0x83, 0x48, 0xDC, 0xCF, 0x5E, 0x6F, 0xEE, 0x68, 0x0C,
+        0xF7, 0xFF, 0x03, 0x04, 0x90, 0xAA, 0xF7, 0x07, 0x98, 0xF8,
+        0x67, 0x5A, 0x83, 0x23, 0x66, 0x47, 0x60, 0xC3, 0x43, 0x6E,
+        0x03, 0x91, 0xAC, 0x28, 0x66, 0xCB, 0xF0, 0xD3, 0x05, 0xC8,
+        0x09, 0x97, 0xB5, 0xAE, 0x01, 0x5E, 0x80, 0x3B, 0x9D, 0x4F,
+        0xDE, 0x3E, 0x94, 0xFE, 0xCB, 0x82, 0xB0, 0xB1, 0xFC, 0x91,
+        0x8B, 0x1D, 0x8A, 0xEE, 0xC6, 0x06, 0x1F, 0x37, 0x91, 0x48,
+        0xD2, 0xF8, 0x6C, 0x5D, 0x60, 0x13, 0x83, 0xA7, 0x81, 0xAC,
+        0xCA, 0x8D, 0xD0, 0x6A, 0x04, 0x0A, 0xEA, 0x3E, 0x22, 0x4E,
+        0x13, 0xF1, 0x0D, 0xBB, 0x60, 0x6B, 0xCD, 0xBC, 0x5C, 0x87,
+        0xA3, 0x67, 0x2B, 0x42, 0xA1, 0x9F, 0xCD, 0x39, 0x58, 0xBE,
+        0x55, 0xB1, 0x93, 0x84, 0xCE, 0xB2, 0x10, 0x4E, 0xE4, 0xC3,
+        0x9F, 0xB2, 0x53, 0x61, 0x01, 0x29, 0xAA, 0x96, 0xCB, 0x20,
+        0x60, 0x42, 0x1D, 0xBA, 0x75, 0x4B, 0x63, 0xC1, 0x02, 0x15,
+        0x00, 0xE7, 0xA5, 0x39, 0xD4, 0x6A, 0x37, 0x5E, 0x95, 0x06,
+        0x39, 0x07, 0x77, 0x0A, 0xEB, 0xA0, 0x03, 0xEB, 0x78, 0x82,
+        0x9B, 0x02, 0x82, 0x01, 0x01, 0x00, 0x9A, 0xD4, 0x4C, 0x71,
+        0x2F, 0xEC, 0xFA, 0x32, 0xB2, 0x80, 0x7E, 0x61, 0x4A, 0x6B,
+        0x5F, 0x18, 0x76, 0x43, 0xC3, 0x69, 0xBA, 0x41, 0xC7, 0xA7,
+        0x1D, 0x79, 0x01, 0xEC, 0xAF, 0x34, 0x87, 0x67, 0x4F, 0x29,
+        0x80, 0xA8, 0x3B, 0x87, 0xF6, 0xE8, 0xA1, 0xE8, 0xCD, 0x1B,
+        0x1C, 0x86, 0x38, 0xF6, 0xD1, 0x0C, 0x46, 0x2E, 0xC8, 0xE0,
+        0xC9, 0x30, 0x26, 0xD5, 0x2C, 0x7F, 0xC1, 0x08, 0xBF, 0xCC,
+        0x5A, 0x82, 0x8E, 0xD4, 0xD4, 0x49, 0xAA, 0xA2, 0xFA, 0xE6,
+        0xC1, 0x9D, 0xF0, 0xD9, 0x96, 0xB0, 0xFF, 0x0C, 0x5B, 0x33,
+        0x8E, 0x06, 0xDD, 0x9D, 0x28, 0xA9, 0xE9, 0x80, 0x41, 0x3B,
+        0xD8, 0x7A, 0x94, 0x21, 0x8F, 0x56, 0xF1, 0xA2, 0xB4, 0x2B,
+        0x89, 0x1C, 0x74, 0xFF, 0x7E, 0x91, 0xDC, 0x1F, 0x91, 0x13,
+        0x98, 0xAF, 0xC7, 0x06, 0xD2, 0x4C, 0x90, 0xA2, 0xBD, 0xDA,
+        0x16, 0xBA, 0x65, 0xB0, 0x2D, 0x68, 0x87, 0x3C, 0x6E, 0x25,
+        0x8D, 0x90, 0xC7, 0xBC, 0x0D, 0xA9, 0x43, 0x03, 0xC9, 0xBE,
+        0xCF, 0x85, 0x6F, 0xDB, 0x07, 0x7B, 0x8C, 0xF8, 0xB1, 0xC2,
+        0x49, 0x10, 0x69, 0x63, 0x56, 0x37, 0xC5, 0x30, 0xD2, 0xFB,
+        0x71, 0x9A, 0xE8, 0x82, 0x07, 0x2E, 0x3E, 0x95, 0x50, 0xF3,
+        0x73, 0xCF, 0x34, 0x5B, 0xD5, 0xAB, 0x02, 0x15, 0xF2, 0xCC,
+        0xD7, 0x52, 0xC5, 0x28, 0xD8, 0x41, 0x19, 0x55, 0x6F, 0xB8,
+        0x5F, 0xF1, 0x99, 0xB3, 0xC7, 0xD9, 0xB3, 0x71, 0xF4, 0x2D,
+        0xDF, 0x22, 0x59, 0x35, 0x86, 0xDB, 0x39, 0xCA, 0x1B, 0x4D,
+        0x35, 0x90, 0x19, 0x6B, 0x31, 0xE3, 0xC8, 0xC6, 0x09, 0xBF,
+        0x7C, 0xED, 0x01, 0xB4, 0xB2, 0xF5, 0x6E, 0xDA, 0x63, 0x41,
+        0x3C, 0xE6, 0x3A, 0x72, 0x2D, 0x65, 0x48, 0xF6, 0x07, 0xCD,
+        0x92, 0x84, 0x8B, 0x1D, 0xA7, 0x31, 0x6B, 0xD6, 0xF0, 0xFB,
+        0xD9, 0xF4, 0x02, 0x82, 0x01, 0x00, 0x66, 0x4B, 0xBB, 0xB7,
+        0xC9, 0x48, 0x95, 0x0D, 0x5A, 0xA6, 0x2D, 0xA1, 0x7F, 0xDF,
+        0x1F, 0x67, 0x6D, 0xED, 0x52, 0x4B, 0x16, 0x6C, 0x17, 0xC6,
+        0xAE, 0xF8, 0x6A, 0xC4, 0x57, 0xED, 0x2F, 0xB3, 0xF0, 0x2A,
+        0x55, 0xAB, 0xBA, 0xCA, 0xEA, 0x17, 0xE8, 0x35, 0x7C, 0xE5,
+        0x31, 0x0D, 0x4A, 0x95, 0xFC, 0x43, 0x6F, 0x97, 0x3C, 0x5C,
+        0x67, 0xAC, 0xBE, 0x67, 0x7F, 0xE9, 0x4E, 0xAA, 0x48, 0xB3,
+        0x92, 0xA1, 0x76, 0x75, 0xEA, 0x04, 0x34, 0x7F, 0x87, 0x33,
+        0x2D, 0x24, 0xB6, 0x29, 0x97, 0xE3, 0x04, 0x77, 0x93, 0x89,
+        0x13, 0xDB, 0x1B, 0x93, 0xB8, 0x2C, 0x90, 0x1A, 0x09, 0x3B,
+        0x26, 0xD9, 0x59, 0xF3, 0x2A, 0x09, 0x58, 0xDC, 0xAC, 0x25,
+        0xB4, 0xA9, 0x45, 0x3B, 0xA2, 0x3A, 0x6C, 0x61, 0x84, 0xBF,
+        0x68, 0xD4, 0xEA, 0x9B, 0xC5, 0x29, 0x48, 0x60, 0x15, 0x10,
+        0x35, 0x2C, 0x44, 0x1D, 0xB5, 0x9A, 0xEE, 0xAC, 0xC1, 0x68,
+        0xE8, 0x47, 0xB7, 0x41, 0x34, 0x39, 0x9A, 0xF8, 0xA5, 0x20,
+        0xE9, 0x24, 0xC4, 0x2C, 0x58, 0x3F, 0x4C, 0x41, 0x30, 0x3A,
+        0x14, 0x6E, 0x8D, 0xEA, 0xAD, 0xBA, 0x9B, 0x43, 0xD3, 0x98,
+        0x2F, 0x83, 0xD8, 0x14, 0x67, 0xE8, 0xF8, 0xD5, 0x4F, 0xAC,
+        0xE0, 0x3B, 0xBF, 0xA7, 0x54, 0x16, 0x5E, 0x49, 0x64, 0x26,
+        0x54, 0xA4, 0x6B, 0x69, 0x7C, 0xBA, 0x8A, 0x83, 0xD9, 0x2E,
+        0x65, 0x0A, 0xA2, 0x27, 0xEF, 0x99, 0x99, 0x08, 0xD7, 0xB5,
+        0x9F, 0xA0, 0x01, 0xEF, 0x7E, 0x17, 0xBF, 0x83, 0x6B, 0x2E,
+        0xDD, 0xC0, 0x39, 0x38, 0x23, 0x68, 0xB4, 0x76, 0x6B, 0xE5,
+        0xCA, 0xF7, 0x7C, 0xEE, 0xC0, 0x52, 0xE2, 0xDD, 0xAD, 0x59,
+        0x3A, 0x42, 0x06, 0x45, 0xB0, 0xC7, 0xC1, 0x77, 0x05, 0xB2,
+        0x0C, 0x32, 0x40, 0x46, 0xAA, 0xDA, 0x79, 0x77, 0x04, 0x71,
+        0xDF, 0x7A, 0x02, 0x15, 0x00, 0x98, 0xEE, 0xB9, 0x51, 0x37,
+        0x3E, 0x75, 0x13, 0x13, 0x06, 0x8F, 0x94, 0xD3, 0xE6, 0xE9,
+        0x00, 0xCB, 0x62, 0x6D, 0x9A
 };
 static const int sizeof_dsa_key_der_2048 = sizeof(dsa_key_der_2048);
 
 /* ./certs/rsa2048.der, 2048-bit */
 static const unsigned char rsa_key_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0xA3, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x01, 0x00, 0xE9, 0x8A, 0x5D, 0x15, 0xA4, 0xD4, 0x34, 0xB9, 
-	0x59, 0xA2, 0xDA, 0xAF, 0x74, 0xC8, 0xC9, 0x03, 0x26, 0x38, 
-	0xFA, 0x48, 0xFC, 0x4D, 0x30, 0x6E, 0xEA, 0x76, 0x89, 0xCE, 
-	0x4F, 0xF6, 0x87, 0xDE, 0x32, 0x3A, 0x46, 0x6E, 0x38, 0x12, 
-	0x58, 0x37, 0x22, 0x0D, 0x80, 0xAC, 0x2D, 0xAF, 0x2F, 0x12, 
-	0x3E, 0x62, 0x73, 0x60, 0x66, 0x68, 0x90, 0xB2, 0x6F, 0x47, 
-	0x17, 0x04, 0x2B, 0xCA, 0xB7, 0x26, 0xB7, 0x10, 0xC2, 0x13, 
-	0xF9, 0x7A, 0x62, 0x0A, 0x93, 0x32, 0x90, 0x42, 0x0D, 0x16, 
-	0x2E, 0xFA, 0xD7, 0x29, 0xD7, 0x9F, 0x54, 0xE4, 0xFC, 0x65, 
-	0x74, 0xF8, 0xF6, 0x43, 0x6B, 0x4E, 0x9E, 0x34, 0x7F, 0xCB, 
-	0x6B, 0x1C, 0x1A, 0xDE, 0x82, 0x81, 0xBF, 0x08, 0x5D, 0x3F, 
-	0xC0, 0xB6, 0xB1, 0xA8, 0xA5, 0x9C, 0x81, 0x70, 0xA7, 0x4E, 
-	0x32, 0x87, 0x15, 0x1C, 0x78, 0x0E, 0xF0, 0x18, 0xFE, 0xEB, 
-	0x4B, 0x37, 0x2B, 0xE9, 0xE1, 0xF7, 0xFA, 0x51, 0xC6, 0x58, 
-	0xB9, 0xD8, 0x06, 0x03, 0xED, 0xC0, 0x03, 0x18, 0x55, 0x8B, 
-	0x98, 0xFE, 0xB1, 0xF6, 0xD0, 0x3D, 0xFA, 0x63, 0xC0, 0x38, 
-	0x19, 0xC7, 0x00, 0xEF, 0x4D, 0x99, 0x60, 0xB4, 0xBA, 0xCE, 
-	0xE3, 0xCE, 0xD9, 0x6B, 0x2D, 0x76, 0x94, 0xFF, 0xFB, 0x77, 
-	0x18, 0x4A, 0xFE, 0x65, 0xF0, 0x0A, 0x91, 0x5C, 0x3B, 0x22, 
-	0x94, 0x85, 0xD0, 0x20, 0x18, 0x59, 0x2E, 0xA5, 0x33, 0x03, 
-	0xAC, 0x1B, 0x5F, 0x78, 0x32, 0x11, 0x25, 0xEE, 0x7F, 0x96, 
-	0x21, 0xA9, 0xD6, 0x76, 0x97, 0x8D, 0x66, 0x7E, 0xB2, 0x91, 
-	0xD0, 0x36, 0x2E, 0xA3, 0x1D, 0xBF, 0xF1, 0x85, 0xED, 0xC0, 
-	0x3E, 0x60, 0xB8, 0x5A, 0x9F, 0xAB, 0x80, 0xE0, 0xEA, 0x5D, 
-	0x5F, 0x75, 0x56, 0xC7, 0x4D, 0x51, 0x8E, 0xD4, 0x1F, 0x34, 
-	0xA6, 0x36, 0xF1, 0x30, 0x1F, 0x51, 0x99, 0x2F, 0x02, 0x03, 
-	0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x00, 0x52, 0x11, 0x33, 
-	0x40, 0xC5, 0xD9, 0x64, 0x65, 0xB5, 0xE0, 0x0A, 0xA5, 0x19, 
-	0x8E, 0xED, 0x44, 0x54, 0x0C, 0x35, 0xB7, 0xAC, 0x21, 0x9B, 
-	0xE1, 0x7E, 0x37, 0x05, 0x9A, 0x20, 0x73, 0x6B, 0xAF, 0x63, 
-	0x4B, 0x23, 0x30, 0xDC, 0x37, 0x66, 0x14, 0x89, 0xBC, 0xE0, 
-	0xF8, 0xA0, 0x5D, 0x2D, 0x57, 0x65, 0xE0, 0xC6, 0xD6, 0x9B, 
-	0x66, 0x27, 0x62, 0xEC, 0xC3, 0xB8, 0x8C, 0xD8, 0xAE, 0xB5, 
-	0xC9, 0xBF, 0x0E, 0xFE, 0x84, 0x72, 0x68, 0xD5, 0x47, 0x0E, 
-	0x0E, 0xF8, 0xAE, 0x9D, 0x56, 0xAC, 0x4F, 0xAD, 0x88, 0xA0, 
-	0xA2, 0xF6, 0xFC, 0x38, 0xCD, 0x96, 0x5B, 0x5E, 0x7E, 0xB6, 
-	0x98, 0xBB, 0xF3, 0x8A, 0xEC, 0xFA, 0xC8, 0xB7, 0x90, 0x75, 
-	0xA0, 0x0E, 0x77, 0x6B, 0xFD, 0x59, 0x45, 0x5A, 0x0C, 0xFF, 
-	0x95, 0x8D, 0xCE, 0xFE, 0x9B, 0xF6, 0x19, 0x8E, 0x0B, 0xA1, 
-	0x0C, 0xEE, 0xC6, 0x79, 0xDD, 0x9D, 0x61, 0x85, 0x5C, 0x19, 
-	0x6C, 0x47, 0xCC, 0x08, 0xFF, 0xA5, 0x62, 0xDB, 0xE4, 0x2D, 
-	0x2D, 0xDD, 0x14, 0x67, 0xD6, 0x4A, 0x64, 0x2A, 0x66, 0x49, 
-	0x54, 0x9C, 0xE3, 0x85, 0x18, 0xE7, 0x31, 0x42, 0xE2, 0xD0, 
-	0x2C, 0x20, 0xA0, 0x74, 0x0F, 0x1F, 0x20, 0x89, 0xBA, 0xAB, 
-	0x80, 0xD8, 0x38, 0xD9, 0x46, 0x69, 0xBB, 0xEF, 0xCC, 0x8B, 
-	0xA1, 0x73, 0xA7, 0xF2, 0xE4, 0x38, 0x5D, 0xD6, 0x75, 0x9F, 
-	0x88, 0x0E, 0x56, 0xCD, 0xD8, 0x84, 0x59, 0x29, 0x73, 0xF5, 
-	0xA1, 0x79, 0xDA, 0x7A, 0x1F, 0xBF, 0x73, 0x83, 0xC0, 0x6D, 
-	0x9F, 0x8B, 0x34, 0x15, 0xC0, 0x6D, 0x69, 0x6A, 0x20, 0xE6, 
-	0x51, 0xCF, 0x45, 0x6E, 0xCC, 0x05, 0xC4, 0x3A, 0xC0, 0x9E, 
-	0xAA, 0xC1, 0x06, 0x2F, 0xAB, 0x99, 0x30, 0xE1, 0x6E, 0x9D, 
-	0x45, 0x7A, 0xFF, 0xA9, 0xCE, 0x70, 0xB8, 0x16, 0x1A, 0x0E, 
-	0x20, 0xFA, 0xC1, 0x02, 0x81, 0x81, 0x00, 0xFF, 0x30, 0x11, 
-	0xC2, 0x3C, 0x6B, 0xB4, 0xD6, 0x9E, 0x6B, 0xC1, 0x93, 0xD1, 
-	0x48, 0xCE, 0x80, 0x2D, 0xBE, 0xAF, 0xF7, 0xBA, 0xB2, 0xD7, 
-	0xC3, 0xC4, 0x53, 0x6E, 0x15, 0x02, 0xAA, 0x61, 0xB9, 0xEA, 
-	0x05, 0x9B, 0x79, 0x67, 0x0B, 0xCE, 0xD9, 0xFB, 0x98, 0x8C, 
-	0x1D, 0x6B, 0xF4, 0x5A, 0xA7, 0xA0, 0x5E, 0x54, 0x18, 0xE9, 
-	0x31, 0x44, 0x7C, 0xC7, 0x52, 0xD8, 0x6D, 0xA0, 0x3E, 0xD6, 
-	0x14, 0x2D, 0x7B, 0x15, 0x9D, 0x1E, 0x39, 0x87, 0x96, 0xDD, 
-	0xA8, 0x33, 0x55, 0x2A, 0x8E, 0x32, 0xC0, 0xC4, 0xE5, 0xB8, 
-	0xCB, 0xCD, 0x32, 0x8D, 0xAD, 0x7B, 0xE5, 0xC6, 0x7E, 0x4D, 
-	0x6F, 0xF3, 0xA4, 0xC5, 0xA6, 0x40, 0xBE, 0x90, 0x3A, 0x33, 
-	0x6A, 0x24, 0xB2, 0x80, 0x81, 0x12, 0xAC, 0xE3, 0x7B, 0x26, 
-	0x63, 0xCF, 0x88, 0xB9, 0xFF, 0x74, 0x23, 0x37, 0x52, 0xF0, 
-	0xC4, 0x27, 0x5D, 0x45, 0x1F, 0x02, 0x81, 0x81, 0x00, 0xEA, 
-	0x48, 0xA7, 0xDD, 0x73, 0x41, 0x56, 0x21, 0x15, 0xF7, 0x42, 
-	0x45, 0x4D, 0xA9, 0xE1, 0x66, 0x5B, 0xBD, 0x25, 0x7D, 0xF7, 
-	0xA8, 0x65, 0x13, 0xAE, 0x2D, 0x38, 0x11, 0xCD, 0x93, 0xFC, 
-	0x30, 0xA3, 0x2C, 0x44, 0xBB, 0xCF, 0xD0, 0x21, 0x8F, 0xFB, 
-	0xC1, 0xF9, 0xAD, 0x1D, 0xEE, 0x96, 0xCF, 0x97, 0x49, 0x60, 
-	0x53, 0x80, 0xA5, 0xA2, 0xF8, 0xEE, 0xB9, 0xD5, 0x77, 0x44, 
-	0xDD, 0xFD, 0x19, 0x2A, 0xF1, 0x81, 0xF4, 0xD9, 0x3C, 0xEC, 
-	0x73, 0xD0, 0x2A, 0xD8, 0x3C, 0x27, 0x87, 0x79, 0x12, 0x86, 
-	0xE7, 0x57, 0x0C, 0x59, 0xD1, 0x44, 0x55, 0xAE, 0xC3, 0x4D, 
-	0x42, 0xAD, 0xA9, 0xB3, 0x28, 0x61, 0xB4, 0x9C, 0xA6, 0x63, 
-	0xD3, 0x96, 0xB1, 0x75, 0x9F, 0x2A, 0x78, 0x99, 0xE3, 0x1E, 
-	0x71, 0x47, 0x39, 0xF4, 0x52, 0xE3, 0x66, 0xF1, 0xEB, 0x7F, 
-	0xEF, 0xC6, 0x81, 0x93, 0x4C, 0x99, 0xF1, 0x02, 0x81, 0x81, 
-	0x00, 0xC5, 0xB6, 0x20, 0x8C, 0x34, 0xF3, 0xDD, 0xF0, 0x4A, 
-	0x5D, 0x82, 0x65, 0x5C, 0x48, 0xE4, 0x75, 0x3A, 0xFB, 0xFA, 
-	0xAA, 0x1C, 0xE4, 0x63, 0x77, 0x31, 0xAC, 0xD2, 0x25, 0x45, 
-	0x23, 0x6D, 0x03, 0xF5, 0xE4, 0xD2, 0x48, 0x85, 0x26, 0x08, 
-	0xE5, 0xAA, 0xA0, 0xCE, 0x2E, 0x1D, 0x6D, 0xFC, 0xAE, 0xD2, 
-	0xF9, 0x42, 0x7E, 0xEA, 0x6D, 0x59, 0x7A, 0xB3, 0x93, 0xE4, 
-	0x4B, 0x4B, 0x54, 0x63, 0xD8, 0xCE, 0x44, 0x06, 0xC2, 0xEC, 
-	0x9F, 0xF6, 0x05, 0x55, 0x46, 0xF4, 0x3E, 0x8F, 0xF2, 0x0C, 
-	0x30, 0x7E, 0x5C, 0xDD, 0x88, 0x49, 0x3B, 0x59, 0xB9, 0x87, 
-	0xBC, 0xC6, 0xC5, 0x24, 0x8A, 0x10, 0x63, 0x21, 0x1F, 0x66, 
-	0x1A, 0x3E, 0xF4, 0x58, 0xD1, 0x6C, 0x0D, 0x40, 0xB2, 0xC0, 
-	0x1D, 0x63, 0x42, 0x0E, 0xC4, 0x56, 0x0E, 0xC0, 0xCC, 0xC2, 
-	0xD6, 0x66, 0x0E, 0xC4, 0xAB, 0xB5, 0x33, 0xF6, 0x51, 0x02, 
-	0x81, 0x80, 0x19, 0x7E, 0xE6, 0xA5, 0xB6, 0xD1, 0x39, 0x6A, 
-	0x48, 0x55, 0xAC, 0x24, 0x96, 0x9B, 0x12, 0x28, 0x6D, 0x7B, 
-	0x5C, 0x05, 0x25, 0x5A, 0x72, 0x05, 0x7E, 0x42, 0xF5, 0x83, 
-	0x1A, 0x78, 0x2C, 0x4D, 0xAE, 0xB4, 0x36, 0x96, 0xA9, 0xBA, 
-	0xE0, 0xAC, 0x26, 0x9D, 0xA9, 0x6A, 0x29, 0x83, 0xB9, 0x6D, 
-	0xC5, 0xEC, 0xFA, 0x4A, 0x9C, 0x09, 0x6A, 0x7E, 0xE4, 0x9B, 
-	0xDC, 0x9B, 0x2A, 0x27, 0x6E, 0x4F, 0xBA, 0xD8, 0xA5, 0x67, 
-	0xDB, 0xEC, 0x41, 0x5F, 0x29, 0x1C, 0x40, 0x83, 0xEB, 0x59, 
-	0x56, 0xD7, 0xA9, 0x4E, 0xAB, 0xAE, 0x70, 0x67, 0xD1, 0xA3, 
-	0xF1, 0x6C, 0xD7, 0x8F, 0x96, 0x0E, 0x8D, 0xAC, 0xAB, 0x55, 
-	0x58, 0x66, 0xD3, 0x1E, 0x47, 0x9B, 0xF0, 0x4C, 0xED, 0xF6, 
-	0x49, 0xE8, 0xE9, 0x7B, 0x32, 0x61, 0x20, 0x31, 0x95, 0x05, 
-	0xB2, 0xF6, 0x09, 0xEA, 0x32, 0x14, 0x0F, 0xCF, 0x9A, 0x41, 
-	0x02, 0x81, 0x80, 0x77, 0x3F, 0xB6, 0x14, 0x8D, 0xC5, 0x13, 
-	0x08, 0x7E, 0xC9, 0xC4, 0xEA, 0xD4, 0xBA, 0x0D, 0xA4, 0x9E, 
-	0xB3, 0x6E, 0xDE, 0x1A, 0x7A, 0xF8, 0x89, 0x88, 0xEF, 0x36, 
-	0x3C, 0x11, 0xBC, 0x83, 0xE8, 0x30, 0x6C, 0x81, 0x7C, 0x47, 
-	0xF3, 0x4D, 0xCA, 0xEA, 0x56, 0x01, 0x62, 0x55, 0x2E, 0x4B, 
-	0x89, 0xA9, 0xBD, 0x6F, 0x01, 0xF6, 0x74, 0x02, 0xAA, 0xE3, 
-	0x84, 0x66, 0x06, 0x95, 0x34, 0xA1, 0xE2, 0xCA, 0x65, 0xFE, 
-	0xA3, 0x2D, 0x43, 0x97, 0x95, 0x6C, 0x6F, 0xD5, 0xB4, 0x38, 
-	0xF6, 0xF9, 0x95, 0x30, 0xFA, 0xF8, 0x9C, 0x25, 0x2B, 0xB6, 
-	0x14, 0x51, 0xCC, 0x2E, 0xB3, 0x5B, 0xD6, 0xDC, 0x1A, 0xEC, 
-	0x2D, 0x09, 0x5B, 0x3F, 0x3A, 0xD0, 0xB8, 0x4E, 0x27, 0x1F, 
-	0xDC, 0x2A, 0xEE, 0xAC, 0xA9, 0x59, 0x5D, 0x07, 0x63, 0x11, 
-	0x83, 0x0B, 0xD4, 0x74, 0x80, 0xB6, 0x7D, 0x62, 0x45, 0xBF, 
-	0x56
+        0x30, 0x82, 0x04, 0xA3, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x01, 0x00, 0xE9, 0x8A, 0x5D, 0x15, 0xA4, 0xD4, 0x34, 0xB9,
+        0x59, 0xA2, 0xDA, 0xAF, 0x74, 0xC8, 0xC9, 0x03, 0x26, 0x38,
+        0xFA, 0x48, 0xFC, 0x4D, 0x30, 0x6E, 0xEA, 0x76, 0x89, 0xCE,
+        0x4F, 0xF6, 0x87, 0xDE, 0x32, 0x3A, 0x46, 0x6E, 0x38, 0x12,
+        0x58, 0x37, 0x22, 0x0D, 0x80, 0xAC, 0x2D, 0xAF, 0x2F, 0x12,
+        0x3E, 0x62, 0x73, 0x60, 0x66, 0x68, 0x90, 0xB2, 0x6F, 0x47,
+        0x17, 0x04, 0x2B, 0xCA, 0xB7, 0x26, 0xB7, 0x10, 0xC2, 0x13,
+        0xF9, 0x7A, 0x62, 0x0A, 0x93, 0x32, 0x90, 0x42, 0x0D, 0x16,
+        0x2E, 0xFA, 0xD7, 0x29, 0xD7, 0x9F, 0x54, 0xE4, 0xFC, 0x65,
+        0x74, 0xF8, 0xF6, 0x43, 0x6B, 0x4E, 0x9E, 0x34, 0x7F, 0xCB,
+        0x6B, 0x1C, 0x1A, 0xDE, 0x82, 0x81, 0xBF, 0x08, 0x5D, 0x3F,
+        0xC0, 0xB6, 0xB1, 0xA8, 0xA5, 0x9C, 0x81, 0x70, 0xA7, 0x4E,
+        0x32, 0x87, 0x15, 0x1C, 0x78, 0x0E, 0xF0, 0x18, 0xFE, 0xEB,
+        0x4B, 0x37, 0x2B, 0xE9, 0xE1, 0xF7, 0xFA, 0x51, 0xC6, 0x58,
+        0xB9, 0xD8, 0x06, 0x03, 0xED, 0xC0, 0x03, 0x18, 0x55, 0x8B,
+        0x98, 0xFE, 0xB1, 0xF6, 0xD0, 0x3D, 0xFA, 0x63, 0xC0, 0x38,
+        0x19, 0xC7, 0x00, 0xEF, 0x4D, 0x99, 0x60, 0xB4, 0xBA, 0xCE,
+        0xE3, 0xCE, 0xD9, 0x6B, 0x2D, 0x76, 0x94, 0xFF, 0xFB, 0x77,
+        0x18, 0x4A, 0xFE, 0x65, 0xF0, 0x0A, 0x91, 0x5C, 0x3B, 0x22,
+        0x94, 0x85, 0xD0, 0x20, 0x18, 0x59, 0x2E, 0xA5, 0x33, 0x03,
+        0xAC, 0x1B, 0x5F, 0x78, 0x32, 0x11, 0x25, 0xEE, 0x7F, 0x96,
+        0x21, 0xA9, 0xD6, 0x76, 0x97, 0x8D, 0x66, 0x7E, 0xB2, 0x91,
+        0xD0, 0x36, 0x2E, 0xA3, 0x1D, 0xBF, 0xF1, 0x85, 0xED, 0xC0,
+        0x3E, 0x60, 0xB8, 0x5A, 0x9F, 0xAB, 0x80, 0xE0, 0xEA, 0x5D,
+        0x5F, 0x75, 0x56, 0xC7, 0x4D, 0x51, 0x8E, 0xD4, 0x1F, 0x34,
+        0xA6, 0x36, 0xF1, 0x30, 0x1F, 0x51, 0x99, 0x2F, 0x02, 0x03,
+        0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x00, 0x52, 0x11, 0x33,
+        0x40, 0xC5, 0xD9, 0x64, 0x65, 0xB5, 0xE0, 0x0A, 0xA5, 0x19,
+        0x8E, 0xED, 0x44, 0x54, 0x0C, 0x35, 0xB7, 0xAC, 0x21, 0x9B,
+        0xE1, 0x7E, 0x37, 0x05, 0x9A, 0x20, 0x73, 0x6B, 0xAF, 0x63,
+        0x4B, 0x23, 0x30, 0xDC, 0x37, 0x66, 0x14, 0x89, 0xBC, 0xE0,
+        0xF8, 0xA0, 0x5D, 0x2D, 0x57, 0x65, 0xE0, 0xC6, 0xD6, 0x9B,
+        0x66, 0x27, 0x62, 0xEC, 0xC3, 0xB8, 0x8C, 0xD8, 0xAE, 0xB5,
+        0xC9, 0xBF, 0x0E, 0xFE, 0x84, 0x72, 0x68, 0xD5, 0x47, 0x0E,
+        0x0E, 0xF8, 0xAE, 0x9D, 0x56, 0xAC, 0x4F, 0xAD, 0x88, 0xA0,
+        0xA2, 0xF6, 0xFC, 0x38, 0xCD, 0x96, 0x5B, 0x5E, 0x7E, 0xB6,
+        0x98, 0xBB, 0xF3, 0x8A, 0xEC, 0xFA, 0xC8, 0xB7, 0x90, 0x75,
+        0xA0, 0x0E, 0x77, 0x6B, 0xFD, 0x59, 0x45, 0x5A, 0x0C, 0xFF,
+        0x95, 0x8D, 0xCE, 0xFE, 0x9B, 0xF6, 0x19, 0x8E, 0x0B, 0xA1,
+        0x0C, 0xEE, 0xC6, 0x79, 0xDD, 0x9D, 0x61, 0x85, 0x5C, 0x19,
+        0x6C, 0x47, 0xCC, 0x08, 0xFF, 0xA5, 0x62, 0xDB, 0xE4, 0x2D,
+        0x2D, 0xDD, 0x14, 0x67, 0xD6, 0x4A, 0x64, 0x2A, 0x66, 0x49,
+        0x54, 0x9C, 0xE3, 0x85, 0x18, 0xE7, 0x31, 0x42, 0xE2, 0xD0,
+        0x2C, 0x20, 0xA0, 0x74, 0x0F, 0x1F, 0x20, 0x89, 0xBA, 0xAB,
+        0x80, 0xD8, 0x38, 0xD9, 0x46, 0x69, 0xBB, 0xEF, 0xCC, 0x8B,
+        0xA1, 0x73, 0xA7, 0xF2, 0xE4, 0x38, 0x5D, 0xD6, 0x75, 0x9F,
+        0x88, 0x0E, 0x56, 0xCD, 0xD8, 0x84, 0x59, 0x29, 0x73, 0xF5,
+        0xA1, 0x79, 0xDA, 0x7A, 0x1F, 0xBF, 0x73, 0x83, 0xC0, 0x6D,
+        0x9F, 0x8B, 0x34, 0x15, 0xC0, 0x6D, 0x69, 0x6A, 0x20, 0xE6,
+        0x51, 0xCF, 0x45, 0x6E, 0xCC, 0x05, 0xC4, 0x3A, 0xC0, 0x9E,
+        0xAA, 0xC1, 0x06, 0x2F, 0xAB, 0x99, 0x30, 0xE1, 0x6E, 0x9D,
+        0x45, 0x7A, 0xFF, 0xA9, 0xCE, 0x70, 0xB8, 0x16, 0x1A, 0x0E,
+        0x20, 0xFA, 0xC1, 0x02, 0x81, 0x81, 0x00, 0xFF, 0x30, 0x11,
+        0xC2, 0x3C, 0x6B, 0xB4, 0xD6, 0x9E, 0x6B, 0xC1, 0x93, 0xD1,
+        0x48, 0xCE, 0x80, 0x2D, 0xBE, 0xAF, 0xF7, 0xBA, 0xB2, 0xD7,
+        0xC3, 0xC4, 0x53, 0x6E, 0x15, 0x02, 0xAA, 0x61, 0xB9, 0xEA,
+        0x05, 0x9B, 0x79, 0x67, 0x0B, 0xCE, 0xD9, 0xFB, 0x98, 0x8C,
+        0x1D, 0x6B, 0xF4, 0x5A, 0xA7, 0xA0, 0x5E, 0x54, 0x18, 0xE9,
+        0x31, 0x44, 0x7C, 0xC7, 0x52, 0xD8, 0x6D, 0xA0, 0x3E, 0xD6,
+        0x14, 0x2D, 0x7B, 0x15, 0x9D, 0x1E, 0x39, 0x87, 0x96, 0xDD,
+        0xA8, 0x33, 0x55, 0x2A, 0x8E, 0x32, 0xC0, 0xC4, 0xE5, 0xB8,
+        0xCB, 0xCD, 0x32, 0x8D, 0xAD, 0x7B, 0xE5, 0xC6, 0x7E, 0x4D,
+        0x6F, 0xF3, 0xA4, 0xC5, 0xA6, 0x40, 0xBE, 0x90, 0x3A, 0x33,
+        0x6A, 0x24, 0xB2, 0x80, 0x81, 0x12, 0xAC, 0xE3, 0x7B, 0x26,
+        0x63, 0xCF, 0x88, 0xB9, 0xFF, 0x74, 0x23, 0x37, 0x52, 0xF0,
+        0xC4, 0x27, 0x5D, 0x45, 0x1F, 0x02, 0x81, 0x81, 0x00, 0xEA,
+        0x48, 0xA7, 0xDD, 0x73, 0x41, 0x56, 0x21, 0x15, 0xF7, 0x42,
+        0x45, 0x4D, 0xA9, 0xE1, 0x66, 0x5B, 0xBD, 0x25, 0x7D, 0xF7,
+        0xA8, 0x65, 0x13, 0xAE, 0x2D, 0x38, 0x11, 0xCD, 0x93, 0xFC,
+        0x30, 0xA3, 0x2C, 0x44, 0xBB, 0xCF, 0xD0, 0x21, 0x8F, 0xFB,
+        0xC1, 0xF9, 0xAD, 0x1D, 0xEE, 0x96, 0xCF, 0x97, 0x49, 0x60,
+        0x53, 0x80, 0xA5, 0xA2, 0xF8, 0xEE, 0xB9, 0xD5, 0x77, 0x44,
+        0xDD, 0xFD, 0x19, 0x2A, 0xF1, 0x81, 0xF4, 0xD9, 0x3C, 0xEC,
+        0x73, 0xD0, 0x2A, 0xD8, 0x3C, 0x27, 0x87, 0x79, 0x12, 0x86,
+        0xE7, 0x57, 0x0C, 0x59, 0xD1, 0x44, 0x55, 0xAE, 0xC3, 0x4D,
+        0x42, 0xAD, 0xA9, 0xB3, 0x28, 0x61, 0xB4, 0x9C, 0xA6, 0x63,
+        0xD3, 0x96, 0xB1, 0x75, 0x9F, 0x2A, 0x78, 0x99, 0xE3, 0x1E,
+        0x71, 0x47, 0x39, 0xF4, 0x52, 0xE3, 0x66, 0xF1, 0xEB, 0x7F,
+        0xEF, 0xC6, 0x81, 0x93, 0x4C, 0x99, 0xF1, 0x02, 0x81, 0x81,
+        0x00, 0xC5, 0xB6, 0x20, 0x8C, 0x34, 0xF3, 0xDD, 0xF0, 0x4A,
+        0x5D, 0x82, 0x65, 0x5C, 0x48, 0xE4, 0x75, 0x3A, 0xFB, 0xFA,
+        0xAA, 0x1C, 0xE4, 0x63, 0x77, 0x31, 0xAC, 0xD2, 0x25, 0x45,
+        0x23, 0x6D, 0x03, 0xF5, 0xE4, 0xD2, 0x48, 0x85, 0x26, 0x08,
+        0xE5, 0xAA, 0xA0, 0xCE, 0x2E, 0x1D, 0x6D, 0xFC, 0xAE, 0xD2,
+        0xF9, 0x42, 0x7E, 0xEA, 0x6D, 0x59, 0x7A, 0xB3, 0x93, 0xE4,
+        0x4B, 0x4B, 0x54, 0x63, 0xD8, 0xCE, 0x44, 0x06, 0xC2, 0xEC,
+        0x9F, 0xF6, 0x05, 0x55, 0x46, 0xF4, 0x3E, 0x8F, 0xF2, 0x0C,
+        0x30, 0x7E, 0x5C, 0xDD, 0x88, 0x49, 0x3B, 0x59, 0xB9, 0x87,
+        0xBC, 0xC6, 0xC5, 0x24, 0x8A, 0x10, 0x63, 0x21, 0x1F, 0x66,
+        0x1A, 0x3E, 0xF4, 0x58, 0xD1, 0x6C, 0x0D, 0x40, 0xB2, 0xC0,
+        0x1D, 0x63, 0x42, 0x0E, 0xC4, 0x56, 0x0E, 0xC0, 0xCC, 0xC2,
+        0xD6, 0x66, 0x0E, 0xC4, 0xAB, 0xB5, 0x33, 0xF6, 0x51, 0x02,
+        0x81, 0x80, 0x19, 0x7E, 0xE6, 0xA5, 0xB6, 0xD1, 0x39, 0x6A,
+        0x48, 0x55, 0xAC, 0x24, 0x96, 0x9B, 0x12, 0x28, 0x6D, 0x7B,
+        0x5C, 0x05, 0x25, 0x5A, 0x72, 0x05, 0x7E, 0x42, 0xF5, 0x83,
+        0x1A, 0x78, 0x2C, 0x4D, 0xAE, 0xB4, 0x36, 0x96, 0xA9, 0xBA,
+        0xE0, 0xAC, 0x26, 0x9D, 0xA9, 0x6A, 0x29, 0x83, 0xB9, 0x6D,
+        0xC5, 0xEC, 0xFA, 0x4A, 0x9C, 0x09, 0x6A, 0x7E, 0xE4, 0x9B,
+        0xDC, 0x9B, 0x2A, 0x27, 0x6E, 0x4F, 0xBA, 0xD8, 0xA5, 0x67,
+        0xDB, 0xEC, 0x41, 0x5F, 0x29, 0x1C, 0x40, 0x83, 0xEB, 0x59,
+        0x56, 0xD7, 0xA9, 0x4E, 0xAB, 0xAE, 0x70, 0x67, 0xD1, 0xA3,
+        0xF1, 0x6C, 0xD7, 0x8F, 0x96, 0x0E, 0x8D, 0xAC, 0xAB, 0x55,
+        0x58, 0x66, 0xD3, 0x1E, 0x47, 0x9B, 0xF0, 0x4C, 0xED, 0xF6,
+        0x49, 0xE8, 0xE9, 0x7B, 0x32, 0x61, 0x20, 0x31, 0x95, 0x05,
+        0xB2, 0xF6, 0x09, 0xEA, 0x32, 0x14, 0x0F, 0xCF, 0x9A, 0x41,
+        0x02, 0x81, 0x80, 0x77, 0x3F, 0xB6, 0x14, 0x8D, 0xC5, 0x13,
+        0x08, 0x7E, 0xC9, 0xC4, 0xEA, 0xD4, 0xBA, 0x0D, 0xA4, 0x9E,
+        0xB3, 0x6E, 0xDE, 0x1A, 0x7A, 0xF8, 0x89, 0x88, 0xEF, 0x36,
+        0x3C, 0x11, 0xBC, 0x83, 0xE8, 0x30, 0x6C, 0x81, 0x7C, 0x47,
+        0xF3, 0x4D, 0xCA, 0xEA, 0x56, 0x01, 0x62, 0x55, 0x2E, 0x4B,
+        0x89, 0xA9, 0xBD, 0x6F, 0x01, 0xF6, 0x74, 0x02, 0xAA, 0xE3,
+        0x84, 0x66, 0x06, 0x95, 0x34, 0xA1, 0xE2, 0xCA, 0x65, 0xFE,
+        0xA3, 0x2D, 0x43, 0x97, 0x95, 0x6C, 0x6F, 0xD5, 0xB4, 0x38,
+        0xF6, 0xF9, 0x95, 0x30, 0xFA, 0xF8, 0x9C, 0x25, 0x2B, 0xB6,
+        0x14, 0x51, 0xCC, 0x2E, 0xB3, 0x5B, 0xD6, 0xDC, 0x1A, 0xEC,
+        0x2D, 0x09, 0x5B, 0x3F, 0x3A, 0xD0, 0xB8, 0x4E, 0x27, 0x1F,
+        0xDC, 0x2A, 0xEE, 0xAC, 0xA9, 0x59, 0x5D, 0x07, 0x63, 0x11,
+        0x83, 0x0B, 0xD4, 0x74, 0x80, 0xB6, 0x7D, 0x62, 0x45, 0xBF,
+        0x56
 };
 static const int sizeof_rsa_key_der_2048 = sizeof(rsa_key_der_2048);
 
 /* ./certs/ca-key.der, 2048-bit */
 static const unsigned char ca_key_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0xA4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x01, 0x00, 0xBF, 0x0C, 0xCA, 0x2D, 0x14, 0xB2, 0x1E, 0x84, 
-	0x42, 0x5B, 0xCD, 0x38, 0x1F, 0x4A, 0xF2, 0x4D, 0x75, 0x10, 
-	0xF1, 0xB6, 0x35, 0x9F, 0xDF, 0xCA, 0x7D, 0x03, 0x98, 0xD3, 
-	0xAC, 0xDE, 0x03, 0x66, 0xEE, 0x2A, 0xF1, 0xD8, 0xB0, 0x7D, 
-	0x6E, 0x07, 0x54, 0x0B, 0x10, 0x98, 0x21, 0x4D, 0x80, 0xCB, 
-	0x12, 0x20, 0xE7, 0xCC, 0x4F, 0xDE, 0x45, 0x7D, 0xC9, 0x72, 
-	0x77, 0x32, 0xEA, 0xCA, 0x90, 0xBB, 0x69, 0x52, 0x10, 0x03, 
-	0x2F, 0xA8, 0xF3, 0x95, 0xC5, 0xF1, 0x8B, 0x62, 0x56, 0x1B, 
-	0xEF, 0x67, 0x6F, 0xA4, 0x10, 0x41, 0x95, 0xAD, 0x0A, 0x9B, 
-	0xE3, 0xA5, 0xC0, 0xB0, 0xD2, 0x70, 0x76, 0x50, 0x30, 0x5B, 
-	0xA8, 0xE8, 0x08, 0x2C, 0x7C, 0xED, 0xA7, 0xA2, 0x7A, 0x8D, 
-	0x38, 0x29, 0x1C, 0xAC, 0xC7, 0xED, 0xF2, 0x7C, 0x95, 0xB0, 
-	0x95, 0x82, 0x7D, 0x49, 0x5C, 0x38, 0xCD, 0x77, 0x25, 0xEF, 
-	0xBD, 0x80, 0x75, 0x53, 0x94, 0x3C, 0x3D, 0xCA, 0x63, 0x5B, 
-	0x9F, 0x15, 0xB5, 0xD3, 0x1D, 0x13, 0x2F, 0x19, 0xD1, 0x3C, 
-	0xDB, 0x76, 0x3A, 0xCC, 0xB8, 0x7D, 0xC9, 0xE5, 0xC2, 0xD7, 
-	0xDA, 0x40, 0x6F, 0xD8, 0x21, 0xDC, 0x73, 0x1B, 0x42, 0x2D, 
-	0x53, 0x9C, 0xFE, 0x1A, 0xFC, 0x7D, 0xAB, 0x7A, 0x36, 0x3F, 
-	0x98, 0xDE, 0x84, 0x7C, 0x05, 0x67, 0xCE, 0x6A, 0x14, 0x38, 
-	0x87, 0xA9, 0xF1, 0x8C, 0xB5, 0x68, 0xCB, 0x68, 0x7F, 0x71, 
-	0x20, 0x2B, 0xF5, 0xA0, 0x63, 0xF5, 0x56, 0x2F, 0xA3, 0x26, 
-	0xD2, 0xB7, 0x6F, 0xB1, 0x5A, 0x17, 0xD7, 0x38, 0x99, 0x08, 
-	0xFE, 0x93, 0x58, 0x6F, 0xFE, 0xC3, 0x13, 0x49, 0x08, 0x16, 
-	0x0B, 0xA7, 0x4D, 0x67, 0x00, 0x52, 0x31, 0x67, 0x23, 0x4E, 
-	0x98, 0xED, 0x51, 0x45, 0x1D, 0xB9, 0x04, 0xD9, 0x0B, 0xEC, 
-	0xD8, 0x28, 0xB3, 0x4B, 0xBD, 0xED, 0x36, 0x79, 0x02, 0x03, 
-	0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x00, 0x3D, 0x6E, 0x4E, 
-	0x60, 0x1A, 0x84, 0x7F, 0x9D, 0x85, 0x7C, 0xE1, 0x4B, 0x07, 
-	0x7C, 0xE0, 0xD6, 0x99, 0x2A, 0xDE, 0x9D, 0xF9, 0x36, 0x34, 
-	0x0E, 0x77, 0x0E, 0x3E, 0x08, 0xEA, 0x4F, 0xE5, 0x06, 0x26, 
-	0xD4, 0xF6, 0x38, 0xF7, 0xDF, 0x0D, 0x0F, 0x1C, 0x2E, 0x06, 
-	0xA2, 0xF4, 0x2A, 0x68, 0x9C, 0x63, 0x72, 0xE3, 0x35, 0xE6, 
-	0x04, 0x91, 0x91, 0xB5, 0xC1, 0xB1, 0xA4, 0x54, 0xAC, 0xD7, 
-	0xC6, 0xFB, 0x41, 0xA0, 0xD6, 0x75, 0x6F, 0xBD, 0x0B, 0x4E, 
-	0xBF, 0xB1, 0x52, 0xE8, 0x5F, 0x49, 0x26, 0x98, 0x56, 0x47, 
-	0xC7, 0xDE, 0xE9, 0xEA, 0x3C, 0x60, 0x01, 0xBF, 0x28, 0xDC, 
-	0x31, 0xBF, 0x49, 0x5F, 0x93, 0x49, 0x87, 0x7A, 0x81, 0x5B, 
-	0x96, 0x4B, 0x4D, 0xCA, 0x5C, 0x38, 0x4F, 0xB7, 0xE1, 0xB2, 
-	0xD3, 0xC7, 0x21, 0xDA, 0x3C, 0x12, 0x87, 0x07, 0xE4, 0x1B, 
-	0xDC, 0x43, 0xEC, 0xE8, 0xEC, 0x54, 0x61, 0xE7, 0xF6, 0xED, 
-	0xA6, 0x0B, 0x2E, 0xF5, 0xDF, 0x82, 0x7F, 0xC6, 0x1F, 0x61, 
-	0x19, 0x9C, 0xA4, 0x83, 0x39, 0xDF, 0x21, 0x85, 0x89, 0x6F, 
-	0x77, 0xAF, 0x86, 0x15, 0x32, 0x08, 0xA2, 0x5A, 0x0B, 0x26, 
-	0x61, 0xFB, 0x70, 0x0C, 0xCA, 0x9C, 0x38, 0x7D, 0xBC, 0x22, 
-	0xEE, 0xEB, 0xA3, 0xA8, 0x16, 0x00, 0xF9, 0x8A, 0x80, 0x1E, 
-	0x00, 0x84, 0xA8, 0x4A, 0x41, 0xF8, 0x84, 0x03, 0x67, 0x2F, 
-	0x23, 0x5B, 0x2F, 0x9B, 0x6B, 0x26, 0xC3, 0x07, 0x34, 0x94, 
-	0xA3, 0x03, 0x3B, 0x72, 0xD5, 0x9F, 0x72, 0xE0, 0xAD, 0xCC, 
-	0x34, 0xAB, 0xBD, 0xC7, 0xD5, 0xF5, 0x26, 0x30, 0x85, 0x0F, 
-	0x30, 0x23, 0x39, 0x52, 0xFF, 0x3C, 0xCB, 0x99, 0x21, 0x4D, 
-	0x88, 0xA5, 0xAB, 0xEE, 0x62, 0xB9, 0xC7, 0xE0, 0xBB, 0x47, 
-	0x87, 0xC1, 0x69, 0xCF, 0x73, 0xF3, 0x30, 0xBE, 0xCE, 0x39, 
-	0x04, 0x9C, 0xE5, 0x02, 0x81, 0x81, 0x00, 0xE1, 0x76, 0x45, 
-	0x80, 0x59, 0xB6, 0xD3, 0x49, 0xDF, 0x0A, 0xEF, 0x12, 0xD6, 
-	0x0F, 0xF0, 0xB7, 0xCB, 0x2A, 0x37, 0xBF, 0xA7, 0xF8, 0xB5, 
-	0x4D, 0xF5, 0x31, 0x35, 0xAD, 0xE4, 0xA3, 0x94, 0xA1, 0xDB, 
-	0xF1, 0x96, 0xAD, 0xB5, 0x05, 0x64, 0x85, 0x83, 0xFC, 0x1B, 
-	0x5B, 0x29, 0xAA, 0xBE, 0xF8, 0x26, 0x3F, 0x76, 0x7E, 0xAD, 
-	0x1C, 0xF0, 0xCB, 0xD7, 0x26, 0xB4, 0x1B, 0x05, 0x8E, 0x56, 
-	0x86, 0x7E, 0x08, 0x62, 0x21, 0xC1, 0x86, 0xD6, 0x47, 0x79, 
-	0x3E, 0xB7, 0x5D, 0xA4, 0xC6, 0x3A, 0xD7, 0xB1, 0x74, 0x20, 
-	0xF6, 0x50, 0x97, 0x41, 0x04, 0x53, 0xED, 0x3F, 0x26, 0xD6, 
-	0x6F, 0x91, 0xFA, 0x68, 0x26, 0xEC, 0x2A, 0xDC, 0x9A, 0xF1, 
-	0xE7, 0xDC, 0xFB, 0x73, 0xF0, 0x79, 0x43, 0x1B, 0x21, 0xA3, 
-	0x59, 0x04, 0x63, 0x52, 0x07, 0xC9, 0xD7, 0xE6, 0xD1, 0x1B, 
-	0x5D, 0x5E, 0x96, 0xFA, 0x53, 0x02, 0x81, 0x81, 0x00, 0xD8, 
-	0xED, 0x4E, 0x64, 0x61, 0x6B, 0x91, 0x0C, 0x61, 0x01, 0xB5, 
-	0x0F, 0xBB, 0x44, 0x67, 0x53, 0x1E, 0xDC, 0x07, 0xC4, 0x24, 
-	0x7E, 0x9E, 0x6C, 0x84, 0x23, 0x91, 0x0C, 0xE4, 0x12, 0x04, 
-	0x16, 0x4D, 0x78, 0x98, 0xCC, 0x96, 0x3D, 0x20, 0x4E, 0x0F, 
-	0x45, 0x9A, 0xB6, 0xF8, 0xB3, 0x93, 0x0D, 0xB2, 0xA2, 0x1B, 
-	0x29, 0xF2, 0x26, 0x79, 0xC8, 0xC5, 0xD2, 0x78, 0x7E, 0x5E, 
-	0x73, 0xF2, 0xD7, 0x70, 0x61, 0xBB, 0x40, 0xCE, 0x61, 0x05, 
-	0xFE, 0x69, 0x1E, 0x82, 0x29, 0xE6, 0x14, 0xB8, 0xA1, 0xE7, 
-	0x96, 0xD0, 0x23, 0x3F, 0x05, 0x93, 0x00, 0xF2, 0xE1, 0x4D, 
-	0x7E, 0xED, 0xB7, 0x96, 0x6C, 0xF7, 0xF0, 0xE4, 0xD1, 0xCF, 
-	0x01, 0x98, 0x4F, 0xDC, 0x74, 0x54, 0xAA, 0x6D, 0x5E, 0x5A, 
-	0x41, 0x31, 0xFE, 0xFF, 0x9A, 0xB6, 0xA0, 0x05, 0xDD, 0xA9, 
-	0x10, 0x54, 0xF8, 0x6B, 0xD0, 0xAA, 0x83, 0x02, 0x81, 0x80, 
-	0x21, 0xD3, 0x04, 0x8A, 0x44, 0xEB, 0x50, 0xB7, 0x7C, 0x66, 
-	0xBF, 0x87, 0x2B, 0xE6, 0x28, 0x4E, 0xEA, 0x83, 0xE2, 0xE9, 
-	0x35, 0xE1, 0xF2, 0x11, 0x47, 0xFF, 0xA1, 0xF5, 0xFC, 0x9F, 
-	0x2D, 0xE5, 0x3A, 0x81, 0xFC, 0x01, 0x03, 0x6F, 0x53, 0xAD, 
-	0x54, 0x27, 0xB6, 0x52, 0xEE, 0xE5, 0x56, 0xD1, 0x13, 0xAB, 
-	0xE1, 0xB3, 0x0F, 0x75, 0x90, 0x0A, 0x84, 0xB4, 0xA1, 0xC0, 
-	0x8C, 0x0C, 0xD6, 0x9E, 0x46, 0xBA, 0x2B, 0x3E, 0xB5, 0x31, 
-	0xED, 0x63, 0xBB, 0xA4, 0xD5, 0x0D, 0x8F, 0x72, 0xCD, 0xD1, 
-	0x1E, 0x26, 0x35, 0xEB, 0xBE, 0x1B, 0x72, 0xFD, 0x9B, 0x39, 
-	0xB4, 0x87, 0xB7, 0x13, 0xF5, 0xEA, 0x83, 0x45, 0x93, 0x98, 
-	0xBA, 0x8F, 0xE4, 0x4A, 0xCC, 0xB4, 0x4C, 0xA8, 0x7F, 0x08, 
-	0xBA, 0x41, 0x49, 0xA8, 0x49, 0x28, 0x3D, 0x5E, 0x3D, 0xC1, 
-	0xCE, 0x37, 0x00, 0xCB, 0xF9, 0x2C, 0xDD, 0x51, 0x02, 0x81, 
-	0x81, 0x00, 0xA1, 0x57, 0x9F, 0x3E, 0xB9, 0xD6, 0xAF, 0x83, 
-	0x6D, 0x83, 0x3F, 0x8F, 0xFB, 0xD0, 0xDC, 0xA8, 0xCE, 0x03, 
-	0x09, 0x23, 0xB1, 0xA1, 0x1B, 0x63, 0xCA, 0xC4, 0x49, 0x56, 
-	0x35, 0x2B, 0xD1, 0x2E, 0x65, 0x60, 0x95, 0x05, 0x55, 0x99, 
-	0x11, 0x35, 0xFD, 0xD5, 0xDF, 0x44, 0xC7, 0xA5, 0x88, 0x72, 
-	0x5F, 0xB2, 0x82, 0x51, 0xA8, 0x71, 0x45, 0x93, 0x36, 0xCF, 
-	0x5C, 0x1F, 0x61, 0x51, 0x0C, 0x05, 0x80, 0xE8, 0xAF, 0xC5, 
-	0x7B, 0xBA, 0x5E, 0x22, 0xE3, 0x3C, 0x75, 0xC3, 0x84, 0x05, 
-	0x55, 0x6D, 0xD6, 0x3A, 0x2D, 0x84, 0x89, 0x93, 0x33, 0xCB, 
-	0x38, 0xDA, 0xAA, 0x31, 0x05, 0xCD, 0xCE, 0x6C, 0x2D, 0xDD, 
-	0x55, 0xD3, 0x57, 0x0B, 0xF0, 0xA5, 0x35, 0x6A, 0xB0, 0xAE, 
-	0x31, 0xBA, 0x43, 0x96, 0xCA, 0x00, 0xC7, 0x4B, 0xE3, 0x19, 
-	0x12, 0x43, 0xD3, 0x42, 0xFA, 0x6F, 0xEA, 0x80, 0xC0, 0xD1, 
-	0x02, 0x81, 0x81, 0x00, 0xB9, 0xDB, 0x89, 0x20, 0x34, 0x27, 
-	0x70, 0x62, 0x34, 0xEA, 0x5F, 0x25, 0x62, 0x12, 0xF3, 0x9D, 
-	0x81, 0xBF, 0x48, 0xEE, 0x9A, 0x0E, 0xC1, 0x8D, 0x10, 0xFF, 
-	0x65, 0x9A, 0x9D, 0x2D, 0x1A, 0x8A, 0x94, 0x5A, 0xC8, 0xC0, 
-	0xA5, 0xA5, 0x84, 0x61, 0x9E, 0xD4, 0x24, 0xB9, 0xEF, 0xA9, 
-	0x9D, 0xC9, 0x77, 0x0B, 0xC7, 0x70, 0x66, 0x3D, 0xBA, 0xC8, 
-	0x54, 0xDF, 0xD2, 0x33, 0xE1, 0xF5, 0x7F, 0xF9, 0x27, 0x61, 
-	0xBE, 0x57, 0x45, 0xDD, 0xB7, 0x45, 0x17, 0x24, 0xF5, 0x23, 
-	0xE4, 0x38, 0x0E, 0x91, 0x27, 0xEE, 0xE3, 0x20, 0xD8, 0x14, 
-	0xC8, 0x94, 0x47, 0x77, 0x40, 0x77, 0x45, 0x18, 0x9E, 0x0D, 
-	0xCE, 0x79, 0x3F, 0x57, 0x31, 0x56, 0x09, 0x49, 0x67, 0xBE, 
-	0x94, 0x58, 0x4F, 0xF6, 0xC4, 0xAB, 0xE2, 0x89, 0xE3, 0xE3, 
-	0x8A, 0xC0, 0x05, 0x55, 0x2C, 0x24, 0xC0, 0x4A, 0x97, 0x04, 
-	0x27, 0x9A
+        0x30, 0x82, 0x04, 0xA4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x01, 0x00, 0xBF, 0x0C, 0xCA, 0x2D, 0x14, 0xB2, 0x1E, 0x84,
+        0x42, 0x5B, 0xCD, 0x38, 0x1F, 0x4A, 0xF2, 0x4D, 0x75, 0x10,
+        0xF1, 0xB6, 0x35, 0x9F, 0xDF, 0xCA, 0x7D, 0x03, 0x98, 0xD3,
+        0xAC, 0xDE, 0x03, 0x66, 0xEE, 0x2A, 0xF1, 0xD8, 0xB0, 0x7D,
+        0x6E, 0x07, 0x54, 0x0B, 0x10, 0x98, 0x21, 0x4D, 0x80, 0xCB,
+        0x12, 0x20, 0xE7, 0xCC, 0x4F, 0xDE, 0x45, 0x7D, 0xC9, 0x72,
+        0x77, 0x32, 0xEA, 0xCA, 0x90, 0xBB, 0x69, 0x52, 0x10, 0x03,
+        0x2F, 0xA8, 0xF3, 0x95, 0xC5, 0xF1, 0x8B, 0x62, 0x56, 0x1B,
+        0xEF, 0x67, 0x6F, 0xA4, 0x10, 0x41, 0x95, 0xAD, 0x0A, 0x9B,
+        0xE3, 0xA5, 0xC0, 0xB0, 0xD2, 0x70, 0x76, 0x50, 0x30, 0x5B,
+        0xA8, 0xE8, 0x08, 0x2C, 0x7C, 0xED, 0xA7, 0xA2, 0x7A, 0x8D,
+        0x38, 0x29, 0x1C, 0xAC, 0xC7, 0xED, 0xF2, 0x7C, 0x95, 0xB0,
+        0x95, 0x82, 0x7D, 0x49, 0x5C, 0x38, 0xCD, 0x77, 0x25, 0xEF,
+        0xBD, 0x80, 0x75, 0x53, 0x94, 0x3C, 0x3D, 0xCA, 0x63, 0x5B,
+        0x9F, 0x15, 0xB5, 0xD3, 0x1D, 0x13, 0x2F, 0x19, 0xD1, 0x3C,
+        0xDB, 0x76, 0x3A, 0xCC, 0xB8, 0x7D, 0xC9, 0xE5, 0xC2, 0xD7,
+        0xDA, 0x40, 0x6F, 0xD8, 0x21, 0xDC, 0x73, 0x1B, 0x42, 0x2D,
+        0x53, 0x9C, 0xFE, 0x1A, 0xFC, 0x7D, 0xAB, 0x7A, 0x36, 0x3F,
+        0x98, 0xDE, 0x84, 0x7C, 0x05, 0x67, 0xCE, 0x6A, 0x14, 0x38,
+        0x87, 0xA9, 0xF1, 0x8C, 0xB5, 0x68, 0xCB, 0x68, 0x7F, 0x71,
+        0x20, 0x2B, 0xF5, 0xA0, 0x63, 0xF5, 0x56, 0x2F, 0xA3, 0x26,
+        0xD2, 0xB7, 0x6F, 0xB1, 0x5A, 0x17, 0xD7, 0x38, 0x99, 0x08,
+        0xFE, 0x93, 0x58, 0x6F, 0xFE, 0xC3, 0x13, 0x49, 0x08, 0x16,
+        0x0B, 0xA7, 0x4D, 0x67, 0x00, 0x52, 0x31, 0x67, 0x23, 0x4E,
+        0x98, 0xED, 0x51, 0x45, 0x1D, 0xB9, 0x04, 0xD9, 0x0B, 0xEC,
+        0xD8, 0x28, 0xB3, 0x4B, 0xBD, 0xED, 0x36, 0x79, 0x02, 0x03,
+        0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x00, 0x3D, 0x6E, 0x4E,
+        0x60, 0x1A, 0x84, 0x7F, 0x9D, 0x85, 0x7C, 0xE1, 0x4B, 0x07,
+        0x7C, 0xE0, 0xD6, 0x99, 0x2A, 0xDE, 0x9D, 0xF9, 0x36, 0x34,
+        0x0E, 0x77, 0x0E, 0x3E, 0x08, 0xEA, 0x4F, 0xE5, 0x06, 0x26,
+        0xD4, 0xF6, 0x38, 0xF7, 0xDF, 0x0D, 0x0F, 0x1C, 0x2E, 0x06,
+        0xA2, 0xF4, 0x2A, 0x68, 0x9C, 0x63, 0x72, 0xE3, 0x35, 0xE6,
+        0x04, 0x91, 0x91, 0xB5, 0xC1, 0xB1, 0xA4, 0x54, 0xAC, 0xD7,
+        0xC6, 0xFB, 0x41, 0xA0, 0xD6, 0x75, 0x6F, 0xBD, 0x0B, 0x4E,
+        0xBF, 0xB1, 0x52, 0xE8, 0x5F, 0x49, 0x26, 0x98, 0x56, 0x47,
+        0xC7, 0xDE, 0xE9, 0xEA, 0x3C, 0x60, 0x01, 0xBF, 0x28, 0xDC,
+        0x31, 0xBF, 0x49, 0x5F, 0x93, 0x49, 0x87, 0x7A, 0x81, 0x5B,
+        0x96, 0x4B, 0x4D, 0xCA, 0x5C, 0x38, 0x4F, 0xB7, 0xE1, 0xB2,
+        0xD3, 0xC7, 0x21, 0xDA, 0x3C, 0x12, 0x87, 0x07, 0xE4, 0x1B,
+        0xDC, 0x43, 0xEC, 0xE8, 0xEC, 0x54, 0x61, 0xE7, 0xF6, 0xED,
+        0xA6, 0x0B, 0x2E, 0xF5, 0xDF, 0x82, 0x7F, 0xC6, 0x1F, 0x61,
+        0x19, 0x9C, 0xA4, 0x83, 0x39, 0xDF, 0x21, 0x85, 0x89, 0x6F,
+        0x77, 0xAF, 0x86, 0x15, 0x32, 0x08, 0xA2, 0x5A, 0x0B, 0x26,
+        0x61, 0xFB, 0x70, 0x0C, 0xCA, 0x9C, 0x38, 0x7D, 0xBC, 0x22,
+        0xEE, 0xEB, 0xA3, 0xA8, 0x16, 0x00, 0xF9, 0x8A, 0x80, 0x1E,
+        0x00, 0x84, 0xA8, 0x4A, 0x41, 0xF8, 0x84, 0x03, 0x67, 0x2F,
+        0x23, 0x5B, 0x2F, 0x9B, 0x6B, 0x26, 0xC3, 0x07, 0x34, 0x94,
+        0xA3, 0x03, 0x3B, 0x72, 0xD5, 0x9F, 0x72, 0xE0, 0xAD, 0xCC,
+        0x34, 0xAB, 0xBD, 0xC7, 0xD5, 0xF5, 0x26, 0x30, 0x85, 0x0F,
+        0x30, 0x23, 0x39, 0x52, 0xFF, 0x3C, 0xCB, 0x99, 0x21, 0x4D,
+        0x88, 0xA5, 0xAB, 0xEE, 0x62, 0xB9, 0xC7, 0xE0, 0xBB, 0x47,
+        0x87, 0xC1, 0x69, 0xCF, 0x73, 0xF3, 0x30, 0xBE, 0xCE, 0x39,
+        0x04, 0x9C, 0xE5, 0x02, 0x81, 0x81, 0x00, 0xE1, 0x76, 0x45,
+        0x80, 0x59, 0xB6, 0xD3, 0x49, 0xDF, 0x0A, 0xEF, 0x12, 0xD6,
+        0x0F, 0xF0, 0xB7, 0xCB, 0x2A, 0x37, 0xBF, 0xA7, 0xF8, 0xB5,
+        0x4D, 0xF5, 0x31, 0x35, 0xAD, 0xE4, 0xA3, 0x94, 0xA1, 0xDB,
+        0xF1, 0x96, 0xAD, 0xB5, 0x05, 0x64, 0x85, 0x83, 0xFC, 0x1B,
+        0x5B, 0x29, 0xAA, 0xBE, 0xF8, 0x26, 0x3F, 0x76, 0x7E, 0xAD,
+        0x1C, 0xF0, 0xCB, 0xD7, 0x26, 0xB4, 0x1B, 0x05, 0x8E, 0x56,
+        0x86, 0x7E, 0x08, 0x62, 0x21, 0xC1, 0x86, 0xD6, 0x47, 0x79,
+        0x3E, 0xB7, 0x5D, 0xA4, 0xC6, 0x3A, 0xD7, 0xB1, 0x74, 0x20,
+        0xF6, 0x50, 0x97, 0x41, 0x04, 0x53, 0xED, 0x3F, 0x26, 0xD6,
+        0x6F, 0x91, 0xFA, 0x68, 0x26, 0xEC, 0x2A, 0xDC, 0x9A, 0xF1,
+        0xE7, 0xDC, 0xFB, 0x73, 0xF0, 0x79, 0x43, 0x1B, 0x21, 0xA3,
+        0x59, 0x04, 0x63, 0x52, 0x07, 0xC9, 0xD7, 0xE6, 0xD1, 0x1B,
+        0x5D, 0x5E, 0x96, 0xFA, 0x53, 0x02, 0x81, 0x81, 0x00, 0xD8,
+        0xED, 0x4E, 0x64, 0x61, 0x6B, 0x91, 0x0C, 0x61, 0x01, 0xB5,
+        0x0F, 0xBB, 0x44, 0x67, 0x53, 0x1E, 0xDC, 0x07, 0xC4, 0x24,
+        0x7E, 0x9E, 0x6C, 0x84, 0x23, 0x91, 0x0C, 0xE4, 0x12, 0x04,
+        0x16, 0x4D, 0x78, 0x98, 0xCC, 0x96, 0x3D, 0x20, 0x4E, 0x0F,
+        0x45, 0x9A, 0xB6, 0xF8, 0xB3, 0x93, 0x0D, 0xB2, 0xA2, 0x1B,
+        0x29, 0xF2, 0x26, 0x79, 0xC8, 0xC5, 0xD2, 0x78, 0x7E, 0x5E,
+        0x73, 0xF2, 0xD7, 0x70, 0x61, 0xBB, 0x40, 0xCE, 0x61, 0x05,
+        0xFE, 0x69, 0x1E, 0x82, 0x29, 0xE6, 0x14, 0xB8, 0xA1, 0xE7,
+        0x96, 0xD0, 0x23, 0x3F, 0x05, 0x93, 0x00, 0xF2, 0xE1, 0x4D,
+        0x7E, 0xED, 0xB7, 0x96, 0x6C, 0xF7, 0xF0, 0xE4, 0xD1, 0xCF,
+        0x01, 0x98, 0x4F, 0xDC, 0x74, 0x54, 0xAA, 0x6D, 0x5E, 0x5A,
+        0x41, 0x31, 0xFE, 0xFF, 0x9A, 0xB6, 0xA0, 0x05, 0xDD, 0xA9,
+        0x10, 0x54, 0xF8, 0x6B, 0xD0, 0xAA, 0x83, 0x02, 0x81, 0x80,
+        0x21, 0xD3, 0x04, 0x8A, 0x44, 0xEB, 0x50, 0xB7, 0x7C, 0x66,
+        0xBF, 0x87, 0x2B, 0xE6, 0x28, 0x4E, 0xEA, 0x83, 0xE2, 0xE9,
+        0x35, 0xE1, 0xF2, 0x11, 0x47, 0xFF, 0xA1, 0xF5, 0xFC, 0x9F,
+        0x2D, 0xE5, 0x3A, 0x81, 0xFC, 0x01, 0x03, 0x6F, 0x53, 0xAD,
+        0x54, 0x27, 0xB6, 0x52, 0xEE, 0xE5, 0x56, 0xD1, 0x13, 0xAB,
+        0xE1, 0xB3, 0x0F, 0x75, 0x90, 0x0A, 0x84, 0xB4, 0xA1, 0xC0,
+        0x8C, 0x0C, 0xD6, 0x9E, 0x46, 0xBA, 0x2B, 0x3E, 0xB5, 0x31,
+        0xED, 0x63, 0xBB, 0xA4, 0xD5, 0x0D, 0x8F, 0x72, 0xCD, 0xD1,
+        0x1E, 0x26, 0x35, 0xEB, 0xBE, 0x1B, 0x72, 0xFD, 0x9B, 0x39,
+        0xB4, 0x87, 0xB7, 0x13, 0xF5, 0xEA, 0x83, 0x45, 0x93, 0x98,
+        0xBA, 0x8F, 0xE4, 0x4A, 0xCC, 0xB4, 0x4C, 0xA8, 0x7F, 0x08,
+        0xBA, 0x41, 0x49, 0xA8, 0x49, 0x28, 0x3D, 0x5E, 0x3D, 0xC1,
+        0xCE, 0x37, 0x00, 0xCB, 0xF9, 0x2C, 0xDD, 0x51, 0x02, 0x81,
+        0x81, 0x00, 0xA1, 0x57, 0x9F, 0x3E, 0xB9, 0xD6, 0xAF, 0x83,
+        0x6D, 0x83, 0x3F, 0x8F, 0xFB, 0xD0, 0xDC, 0xA8, 0xCE, 0x03,
+        0x09, 0x23, 0xB1, 0xA1, 0x1B, 0x63, 0xCA, 0xC4, 0x49, 0x56,
+        0x35, 0x2B, 0xD1, 0x2E, 0x65, 0x60, 0x95, 0x05, 0x55, 0x99,
+        0x11, 0x35, 0xFD, 0xD5, 0xDF, 0x44, 0xC7, 0xA5, 0x88, 0x72,
+        0x5F, 0xB2, 0x82, 0x51, 0xA8, 0x71, 0x45, 0x93, 0x36, 0xCF,
+        0x5C, 0x1F, 0x61, 0x51, 0x0C, 0x05, 0x80, 0xE8, 0xAF, 0xC5,
+        0x7B, 0xBA, 0x5E, 0x22, 0xE3, 0x3C, 0x75, 0xC3, 0x84, 0x05,
+        0x55, 0x6D, 0xD6, 0x3A, 0x2D, 0x84, 0x89, 0x93, 0x33, 0xCB,
+        0x38, 0xDA, 0xAA, 0x31, 0x05, 0xCD, 0xCE, 0x6C, 0x2D, 0xDD,
+        0x55, 0xD3, 0x57, 0x0B, 0xF0, 0xA5, 0x35, 0x6A, 0xB0, 0xAE,
+        0x31, 0xBA, 0x43, 0x96, 0xCA, 0x00, 0xC7, 0x4B, 0xE3, 0x19,
+        0x12, 0x43, 0xD3, 0x42, 0xFA, 0x6F, 0xEA, 0x80, 0xC0, 0xD1,
+        0x02, 0x81, 0x81, 0x00, 0xB9, 0xDB, 0x89, 0x20, 0x34, 0x27,
+        0x70, 0x62, 0x34, 0xEA, 0x5F, 0x25, 0x62, 0x12, 0xF3, 0x9D,
+        0x81, 0xBF, 0x48, 0xEE, 0x9A, 0x0E, 0xC1, 0x8D, 0x10, 0xFF,
+        0x65, 0x9A, 0x9D, 0x2D, 0x1A, 0x8A, 0x94, 0x5A, 0xC8, 0xC0,
+        0xA5, 0xA5, 0x84, 0x61, 0x9E, 0xD4, 0x24, 0xB9, 0xEF, 0xA9,
+        0x9D, 0xC9, 0x77, 0x0B, 0xC7, 0x70, 0x66, 0x3D, 0xBA, 0xC8,
+        0x54, 0xDF, 0xD2, 0x33, 0xE1, 0xF5, 0x7F, 0xF9, 0x27, 0x61,
+        0xBE, 0x57, 0x45, 0xDD, 0xB7, 0x45, 0x17, 0x24, 0xF5, 0x23,
+        0xE4, 0x38, 0x0E, 0x91, 0x27, 0xEE, 0xE3, 0x20, 0xD8, 0x14,
+        0xC8, 0x94, 0x47, 0x77, 0x40, 0x77, 0x45, 0x18, 0x9E, 0x0D,
+        0xCE, 0x79, 0x3F, 0x57, 0x31, 0x56, 0x09, 0x49, 0x67, 0xBE,
+        0x94, 0x58, 0x4F, 0xF6, 0xC4, 0xAB, 0xE2, 0x89, 0xE3, 0xE3,
+        0x8A, 0xC0, 0x05, 0x55, 0x2C, 0x24, 0xC0, 0x4A, 0x97, 0x04,
+        0x27, 0x9A
 };
 static const int sizeof_ca_key_der_2048 = sizeof(ca_key_der_2048);
 
 /* ./certs/ca-cert.der, 2048-bit */
 static const unsigned char ca_cert_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0xAA, 0x30, 0x82, 0x03, 0x92, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x86, 0xFF, 0xF5, 0x8E, 
-	0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30, 
-	0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 
-	0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 
-	0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 
-	0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 
-	0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 
-	0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 
-	0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 
-	0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A, 0x17, 0x0D, 0x32, 
-	0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x30, 
-	0x39, 0x5A, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 
-	0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 
-	0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 
-	0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 
-	0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 
-	0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 
-	0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 
-	0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 
-	0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06, 
-	0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 
-	0x05, 0x00, 0x03, 0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01, 
-	0x0A, 0x02, 0x82, 0x01, 0x01, 0x00, 0xBF, 0x0C, 0xCA, 0x2D, 
-	0x14, 0xB2, 0x1E, 0x84, 0x42, 0x5B, 0xCD, 0x38, 0x1F, 0x4A, 
-	0xF2, 0x4D, 0x75, 0x10, 0xF1, 0xB6, 0x35, 0x9F, 0xDF, 0xCA, 
-	0x7D, 0x03, 0x98, 0xD3, 0xAC, 0xDE, 0x03, 0x66, 0xEE, 0x2A, 
-	0xF1, 0xD8, 0xB0, 0x7D, 0x6E, 0x07, 0x54, 0x0B, 0x10, 0x98, 
-	0x21, 0x4D, 0x80, 0xCB, 0x12, 0x20, 0xE7, 0xCC, 0x4F, 0xDE, 
-	0x45, 0x7D, 0xC9, 0x72, 0x77, 0x32, 0xEA, 0xCA, 0x90, 0xBB, 
-	0x69, 0x52, 0x10, 0x03, 0x2F, 0xA8, 0xF3, 0x95, 0xC5, 0xF1, 
-	0x8B, 0x62, 0x56, 0x1B, 0xEF, 0x67, 0x6F, 0xA4, 0x10, 0x41, 
-	0x95, 0xAD, 0x0A, 0x9B, 0xE3, 0xA5, 0xC0, 0xB0, 0xD2, 0x70, 
-	0x76, 0x50, 0x30, 0x5B, 0xA8, 0xE8, 0x08, 0x2C, 0x7C, 0xED, 
-	0xA7, 0xA2, 0x7A, 0x8D, 0x38, 0x29, 0x1C, 0xAC, 0xC7, 0xED, 
-	0xF2, 0x7C, 0x95, 0xB0, 0x95, 0x82, 0x7D, 0x49, 0x5C, 0x38, 
-	0xCD, 0x77, 0x25, 0xEF, 0xBD, 0x80, 0x75, 0x53, 0x94, 0x3C, 
-	0x3D, 0xCA, 0x63, 0x5B, 0x9F, 0x15, 0xB5, 0xD3, 0x1D, 0x13, 
-	0x2F, 0x19, 0xD1, 0x3C, 0xDB, 0x76, 0x3A, 0xCC, 0xB8, 0x7D, 
-	0xC9, 0xE5, 0xC2, 0xD7, 0xDA, 0x40, 0x6F, 0xD8, 0x21, 0xDC, 
-	0x73, 0x1B, 0x42, 0x2D, 0x53, 0x9C, 0xFE, 0x1A, 0xFC, 0x7D, 
-	0xAB, 0x7A, 0x36, 0x3F, 0x98, 0xDE, 0x84, 0x7C, 0x05, 0x67, 
-	0xCE, 0x6A, 0x14, 0x38, 0x87, 0xA9, 0xF1, 0x8C, 0xB5, 0x68, 
-	0xCB, 0x68, 0x7F, 0x71, 0x20, 0x2B, 0xF5, 0xA0, 0x63, 0xF5, 
-	0x56, 0x2F, 0xA3, 0x26, 0xD2, 0xB7, 0x6F, 0xB1, 0x5A, 0x17, 
-	0xD7, 0x38, 0x99, 0x08, 0xFE, 0x93, 0x58, 0x6F, 0xFE, 0xC3, 
-	0x13, 0x49, 0x08, 0x16, 0x0B, 0xA7, 0x4D, 0x67, 0x00, 0x52, 
-	0x31, 0x67, 0x23, 0x4E, 0x98, 0xED, 0x51, 0x45, 0x1D, 0xB9, 
-	0x04, 0xD9, 0x0B, 0xEC, 0xD8, 0x28, 0xB3, 0x4B, 0xBD, 0xED, 
-	0x36, 0x79, 0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x81, 0xFC, 
-	0x30, 0x81, 0xF9, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 
-	0x04, 0x16, 0x04, 0x14, 0x27, 0x8E, 0x67, 0x11, 0x74, 0xC3, 
-	0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63, 0xB3, 0xA4, 0xD8, 0x1D, 
-	0x30, 0xE5, 0xE8, 0xD5, 0x30, 0x81, 0xC9, 0x06, 0x03, 0x55, 
-	0x1D, 0x23, 0x04, 0x81, 0xC1, 0x30, 0x81, 0xBE, 0x80, 0x14, 
-	0x27, 0x8E, 0x67, 0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED, 
-	0x33, 0x63, 0xB3, 0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5, 
-	0xA1, 0x81, 0x9A, 0xA4, 0x81, 0x97, 0x30, 0x81, 0x94, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 
-	0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 
-	0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 
-	0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 
-	0x11, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 
-	0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 
-	0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 
-	0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 
-	0x86, 0xFF, 0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0C, 
-	0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 
-	0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x01, 
-	0x01, 0x00, 0x9E, 0x28, 0x88, 0x72, 0x00, 0xCA, 0xE6, 0xE7, 
-	0x97, 0xCA, 0xC1, 0xF1, 0x1F, 0x9E, 0x12, 0xB2, 0xB8, 0xC7, 
-	0x51, 0xEA, 0x28, 0xE1, 0x36, 0xB5, 0x2D, 0xE6, 0x2F, 0x08, 
-	0x23, 0xCB, 0xA9, 0x4A, 0x87, 0x25, 0xC6, 0x5D, 0x89, 0x45, 
-	0xEA, 0xF5, 0x00, 0x98, 0xAC, 0x76, 0xFB, 0x1B, 0xAF, 0xF0, 
-	0xCE, 0x64, 0x9E, 0xDA, 0x08, 0xBF, 0xB6, 0xEB, 0xB4, 0xB5, 
-	0x0C, 0xA0, 0xE7, 0xF6, 0x47, 0x59, 0x1C, 0x61, 0xCF, 0x2E, 
-	0x0E, 0x58, 0xA4, 0x82, 0xAC, 0x0F, 0x3F, 0xEC, 0xC4, 0xAE, 
-	0x80, 0xF7, 0xB0, 0x8A, 0x1E, 0x85, 0x41, 0xE8, 0xFF, 0xFE, 
-	0xFE, 0x4F, 0x1A, 0x24, 0xD5, 0x49, 0xFA, 0xFB, 0xFE, 0x5E, 
-	0xE5, 0xD3, 0x91, 0x0E, 0x4F, 0x4E, 0x0C, 0x21, 0x51, 0x71, 
-	0x83, 0x04, 0x6B, 0x62, 0x7B, 0x4F, 0x59, 0x76, 0x48, 0x81, 
-	0x1E, 0xB4, 0xF7, 0x04, 0x47, 0x8A, 0x91, 0x57, 0xA3, 0x11, 
-	0xA9, 0xF2, 0x20, 0xB4, 0x78, 0x33, 0x62, 0x3D, 0xB0, 0x5E, 
-	0x0D, 0xF9, 0x86, 0x38, 0x82, 0xDA, 0xA1, 0x98, 0x8D, 0x19, 
-	0x06, 0x87, 0x21, 0x39, 0xB7, 0x02, 0xF7, 0xDA, 0x7D, 0x58, 
-	0xBA, 0x52, 0x15, 0xD8, 0x3B, 0xC9, 0x7B, 0x58, 0x34, 0xA0, 
-	0xC7, 0xE2, 0x7C, 0xA9, 0x83, 0x13, 0xE1, 0xB6, 0xEC, 0x01, 
-	0xBF, 0x52, 0x33, 0x0B, 0xC4, 0xFE, 0x43, 0xD3, 0xC6, 0xA4, 
-	0x8E, 0x2F, 0x87, 0x7F, 0x7A, 0x44, 0xEA, 0xCA, 0x53, 0x6C, 
-	0x85, 0xED, 0x65, 0x76, 0x73, 0x31, 0x03, 0x4E, 0xEA, 0xBD, 
-	0x35, 0x54, 0x13, 0xF3, 0x64, 0x87, 0x6B, 0xDF, 0x34, 0xDD, 
-	0x34, 0xA1, 0x88, 0x3B, 0xDB, 0x4D, 0xAF, 0x1B, 0x64, 0x90, 
-	0x92, 0x71, 0x30, 0x8E, 0xC8, 0xCC, 0xE5, 0x60, 0x24, 0xAF, 
-	0x31, 0x16, 0x39, 0x33, 0x91, 0x50, 0xF9, 0xAB, 0x68, 0x42, 
-	0x74, 0x7A, 0x35, 0xD9, 0xDD, 0xC8, 0xC4, 0x52
+        0x30, 0x82, 0x04, 0xAA, 0x30, 0x82, 0x03, 0x92, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x86, 0xFF, 0xF5, 0x8E,
+        0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30,
+        0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68,
+        0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C,
+        0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E,
+        0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33,
+        0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A, 0x17, 0x0D, 0x32,
+        0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x30,
+        0x39, 0x5A, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D,
+        0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A,
+        0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03,
+        0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F,
+        0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55,
+        0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C,
+        0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06,
+        0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01,
+        0x05, 0x00, 0x03, 0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01,
+        0x0A, 0x02, 0x82, 0x01, 0x01, 0x00, 0xBF, 0x0C, 0xCA, 0x2D,
+        0x14, 0xB2, 0x1E, 0x84, 0x42, 0x5B, 0xCD, 0x38, 0x1F, 0x4A,
+        0xF2, 0x4D, 0x75, 0x10, 0xF1, 0xB6, 0x35, 0x9F, 0xDF, 0xCA,
+        0x7D, 0x03, 0x98, 0xD3, 0xAC, 0xDE, 0x03, 0x66, 0xEE, 0x2A,
+        0xF1, 0xD8, 0xB0, 0x7D, 0x6E, 0x07, 0x54, 0x0B, 0x10, 0x98,
+        0x21, 0x4D, 0x80, 0xCB, 0x12, 0x20, 0xE7, 0xCC, 0x4F, 0xDE,
+        0x45, 0x7D, 0xC9, 0x72, 0x77, 0x32, 0xEA, 0xCA, 0x90, 0xBB,
+        0x69, 0x52, 0x10, 0x03, 0x2F, 0xA8, 0xF3, 0x95, 0xC5, 0xF1,
+        0x8B, 0x62, 0x56, 0x1B, 0xEF, 0x67, 0x6F, 0xA4, 0x10, 0x41,
+        0x95, 0xAD, 0x0A, 0x9B, 0xE3, 0xA5, 0xC0, 0xB0, 0xD2, 0x70,
+        0x76, 0x50, 0x30, 0x5B, 0xA8, 0xE8, 0x08, 0x2C, 0x7C, 0xED,
+        0xA7, 0xA2, 0x7A, 0x8D, 0x38, 0x29, 0x1C, 0xAC, 0xC7, 0xED,
+        0xF2, 0x7C, 0x95, 0xB0, 0x95, 0x82, 0x7D, 0x49, 0x5C, 0x38,
+        0xCD, 0x77, 0x25, 0xEF, 0xBD, 0x80, 0x75, 0x53, 0x94, 0x3C,
+        0x3D, 0xCA, 0x63, 0x5B, 0x9F, 0x15, 0xB5, 0xD3, 0x1D, 0x13,
+        0x2F, 0x19, 0xD1, 0x3C, 0xDB, 0x76, 0x3A, 0xCC, 0xB8, 0x7D,
+        0xC9, 0xE5, 0xC2, 0xD7, 0xDA, 0x40, 0x6F, 0xD8, 0x21, 0xDC,
+        0x73, 0x1B, 0x42, 0x2D, 0x53, 0x9C, 0xFE, 0x1A, 0xFC, 0x7D,
+        0xAB, 0x7A, 0x36, 0x3F, 0x98, 0xDE, 0x84, 0x7C, 0x05, 0x67,
+        0xCE, 0x6A, 0x14, 0x38, 0x87, 0xA9, 0xF1, 0x8C, 0xB5, 0x68,
+        0xCB, 0x68, 0x7F, 0x71, 0x20, 0x2B, 0xF5, 0xA0, 0x63, 0xF5,
+        0x56, 0x2F, 0xA3, 0x26, 0xD2, 0xB7, 0x6F, 0xB1, 0x5A, 0x17,
+        0xD7, 0x38, 0x99, 0x08, 0xFE, 0x93, 0x58, 0x6F, 0xFE, 0xC3,
+        0x13, 0x49, 0x08, 0x16, 0x0B, 0xA7, 0x4D, 0x67, 0x00, 0x52,
+        0x31, 0x67, 0x23, 0x4E, 0x98, 0xED, 0x51, 0x45, 0x1D, 0xB9,
+        0x04, 0xD9, 0x0B, 0xEC, 0xD8, 0x28, 0xB3, 0x4B, 0xBD, 0xED,
+        0x36, 0x79, 0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x81, 0xFC,
+        0x30, 0x81, 0xF9, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E,
+        0x04, 0x16, 0x04, 0x14, 0x27, 0x8E, 0x67, 0x11, 0x74, 0xC3,
+        0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63, 0xB3, 0xA4, 0xD8, 0x1D,
+        0x30, 0xE5, 0xE8, 0xD5, 0x30, 0x81, 0xC9, 0x06, 0x03, 0x55,
+        0x1D, 0x23, 0x04, 0x81, 0xC1, 0x30, 0x81, 0xBE, 0x80, 0x14,
+        0x27, 0x8E, 0x67, 0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED,
+        0x33, 0x63, 0xB3, 0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5,
+        0xA1, 0x81, 0x9A, 0xA4, 0x81, 0x97, 0x30, 0x81, 0x94, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C,
+        0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11,
+        0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53,
+        0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30,
+        0x11, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F,
+        0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77,
+        0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16,
+        0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00,
+        0x86, 0xFF, 0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0C,
+        0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01,
+        0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x01,
+        0x01, 0x00, 0x9E, 0x28, 0x88, 0x72, 0x00, 0xCA, 0xE6, 0xE7,
+        0x97, 0xCA, 0xC1, 0xF1, 0x1F, 0x9E, 0x12, 0xB2, 0xB8, 0xC7,
+        0x51, 0xEA, 0x28, 0xE1, 0x36, 0xB5, 0x2D, 0xE6, 0x2F, 0x08,
+        0x23, 0xCB, 0xA9, 0x4A, 0x87, 0x25, 0xC6, 0x5D, 0x89, 0x45,
+        0xEA, 0xF5, 0x00, 0x98, 0xAC, 0x76, 0xFB, 0x1B, 0xAF, 0xF0,
+        0xCE, 0x64, 0x9E, 0xDA, 0x08, 0xBF, 0xB6, 0xEB, 0xB4, 0xB5,
+        0x0C, 0xA0, 0xE7, 0xF6, 0x47, 0x59, 0x1C, 0x61, 0xCF, 0x2E,
+        0x0E, 0x58, 0xA4, 0x82, 0xAC, 0x0F, 0x3F, 0xEC, 0xC4, 0xAE,
+        0x80, 0xF7, 0xB0, 0x8A, 0x1E, 0x85, 0x41, 0xE8, 0xFF, 0xFE,
+        0xFE, 0x4F, 0x1A, 0x24, 0xD5, 0x49, 0xFA, 0xFB, 0xFE, 0x5E,
+        0xE5, 0xD3, 0x91, 0x0E, 0x4F, 0x4E, 0x0C, 0x21, 0x51, 0x71,
+        0x83, 0x04, 0x6B, 0x62, 0x7B, 0x4F, 0x59, 0x76, 0x48, 0x81,
+        0x1E, 0xB4, 0xF7, 0x04, 0x47, 0x8A, 0x91, 0x57, 0xA3, 0x11,
+        0xA9, 0xF2, 0x20, 0xB4, 0x78, 0x33, 0x62, 0x3D, 0xB0, 0x5E,
+        0x0D, 0xF9, 0x86, 0x38, 0x82, 0xDA, 0xA1, 0x98, 0x8D, 0x19,
+        0x06, 0x87, 0x21, 0x39, 0xB7, 0x02, 0xF7, 0xDA, 0x7D, 0x58,
+        0xBA, 0x52, 0x15, 0xD8, 0x3B, 0xC9, 0x7B, 0x58, 0x34, 0xA0,
+        0xC7, 0xE2, 0x7C, 0xA9, 0x83, 0x13, 0xE1, 0xB6, 0xEC, 0x01,
+        0xBF, 0x52, 0x33, 0x0B, 0xC4, 0xFE, 0x43, 0xD3, 0xC6, 0xA4,
+        0x8E, 0x2F, 0x87, 0x7F, 0x7A, 0x44, 0xEA, 0xCA, 0x53, 0x6C,
+        0x85, 0xED, 0x65, 0x76, 0x73, 0x31, 0x03, 0x4E, 0xEA, 0xBD,
+        0x35, 0x54, 0x13, 0xF3, 0x64, 0x87, 0x6B, 0xDF, 0x34, 0xDD,
+        0x34, 0xA1, 0x88, 0x3B, 0xDB, 0x4D, 0xAF, 0x1B, 0x64, 0x90,
+        0x92, 0x71, 0x30, 0x8E, 0xC8, 0xCC, 0xE5, 0x60, 0x24, 0xAF,
+        0x31, 0x16, 0x39, 0x33, 0x91, 0x50, 0xF9, 0xAB, 0x68, 0x42,
+        0x74, 0x7A, 0x35, 0xD9, 0xDD, 0xC8, 0xC4, 0x52
 };
 static const int sizeof_ca_cert_der_2048 = sizeof(ca_cert_der_2048);
 
+/* ./certs/ca-cert-chain.der, 2048-bit */
+static const unsigned char ca_cert_chain_der[] =
+{
+        0x30, 0x82, 0x03, 0xB5, 0x30, 0x82, 0x03, 0x1E, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D,
+        0xFE, 0xCF, 0x9B, 0x47, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x30,
+        0x81, 0x99, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68,
+        0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C,
+        0x0F, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E,
+        0x67, 0x5F, 0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77,
+        0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69,
+        0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73,
+        0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31,
+        0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31,
+        0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37,
+        0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x99,
+        0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
+        0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E,
+        0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07,
+        0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31,
+        0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08,
+        0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43,
+        0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F,
+        0x31, 0x30, 0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x81, 0x9F, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05,
+        0x00, 0x03, 0x81, 0x8D, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81,
+        0x81, 0x00, 0xCD, 0xAC, 0xDD, 0x47, 0xEC, 0xBE, 0xB7, 0x24,
+        0xC3, 0x63, 0x1B, 0x54, 0x98, 0x79, 0xE1, 0xC7, 0x31, 0x16,
+        0x59, 0xD6, 0x9D, 0x77, 0x9D, 0x8D, 0xE2, 0x8B, 0xED, 0x04,
+        0x17, 0xB2, 0xC6, 0xEB, 0xE4, 0x9B, 0x91, 0xBE, 0x31, 0x50,
+        0x62, 0x97, 0x58, 0xB5, 0x7F, 0x29, 0xDE, 0xB3, 0x71, 0x24,
+        0x0B, 0xBF, 0x97, 0x09, 0x7F, 0x26, 0xDC, 0x2D, 0xEC, 0xA8,
+        0x2E, 0xB2, 0x64, 0x2B, 0x7A, 0x2B, 0x35, 0x19, 0x2D, 0xA2,
+        0x80, 0xCB, 0x99, 0xFD, 0x94, 0x71, 0x1B, 0x23, 0x8D, 0x54,
+        0xDB, 0x2E, 0x62, 0x8D, 0x81, 0x08, 0x2D, 0xF4, 0x24, 0x72,
+        0x27, 0x6C, 0xF9, 0xC9, 0x8E, 0xDB, 0x4C, 0x75, 0xBA, 0x9B,
+        0x01, 0xF8, 0x3F, 0x18, 0xF4, 0xE6, 0x7F, 0xFB, 0x57, 0x94,
+        0x92, 0xCC, 0x88, 0xC4, 0xB4, 0x00, 0xC2, 0xAA, 0xD4, 0xE5,
+        0x88, 0x18, 0xB3, 0x11, 0x2F, 0x73, 0xC0, 0xD6, 0x29, 0x09,
+        0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x01, 0x30,
+        0x81, 0xFE, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04,
+        0x16, 0x04, 0x14, 0xD3, 0x22, 0x8F, 0x28, 0x2C, 0xE0, 0x05,
+        0xEE, 0xD3, 0xED, 0xC3, 0x71, 0x3D, 0xC9, 0xB2, 0x36, 0x3A,
+        0x1D, 0xBF, 0xA8, 0x30, 0x81, 0xCE, 0x06, 0x03, 0x55, 0x1D,
+        0x23, 0x04, 0x81, 0xC6, 0x30, 0x81, 0xC3, 0x80, 0x14, 0xD3,
+        0x22, 0x8F, 0x28, 0x2C, 0xE0, 0x05, 0xEE, 0xD3, 0xED, 0xC3,
+        0x71, 0x3D, 0xC9, 0xB2, 0x36, 0x3A, 0x1D, 0xBF, 0xA8, 0xA1,
+        0x81, 0x9F, 0xA4, 0x81, 0x9C, 0x30, 0x81, 0x99, 0x31, 0x0B,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55,
+        0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08,
+        0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07,
+        0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30,
+        0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61,
+        0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x43, 0x6F, 0x6E,
+        0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x5F, 0x31, 0x30,
+        0x32, 0x34, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x82, 0x09, 0x00, 0xDA, 0xFB, 0x6A, 0x0D, 0xFE, 0xCF,
+        0x9B, 0x47, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04,
+        0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05,
+        0x00, 0x03, 0x81, 0x81, 0x00, 0x1D, 0x48, 0xF6, 0x40, 0x41,
+        0x04, 0x06, 0xF2, 0xE4, 0x72, 0x2F, 0xEA, 0xFF, 0xC1, 0x67,
+        0x6B, 0x15, 0xBB, 0x0A, 0x28, 0x23, 0x28, 0x07, 0xC6, 0xD7,
+        0x13, 0x2C, 0xBE, 0x00, 0x00, 0xAC, 0x1D, 0xF7, 0xF4, 0x92,
+        0xD3, 0x2B, 0xAF, 0x23, 0xEB, 0x9F, 0x1A, 0xE2, 0x11, 0x3C,
+        0x2D, 0x97, 0xF2, 0x0F, 0xAC, 0xAE, 0x97, 0x86, 0x0A, 0xFB,
+        0xA8, 0x4F, 0x74, 0x1B, 0xDE, 0x19, 0x51, 0xDB, 0xCD, 0xE2,
+        0x11, 0x38, 0xC1, 0xA4, 0x9D, 0x56, 0xAB, 0x47, 0x5C, 0xDE,
+        0xBA, 0xEB, 0x27, 0xDF, 0x6D, 0xC8, 0x7E, 0x3A, 0xBD, 0x2E,
+        0x9B, 0x2A, 0xAD, 0x22, 0x3B, 0x95, 0xA9, 0xF2, 0x28, 0x03,
+        0xBC, 0xE5, 0xEC, 0xCC, 0xF2, 0x08, 0xD4, 0xC8, 0x2F, 0xDB,
+        0xEA, 0xFB, 0x2E, 0x52, 0x16, 0x8C, 0x42, 0x02, 0xA4, 0x59,
+        0x6D, 0x4C, 0x33, 0xB4, 0x9A, 0xD2, 0x73, 0x4A, 0x1E, 0x9F,
+        0xD9, 0xC8, 0x83, 0x30, 0x82, 0x04, 0xAA, 0x30, 0x82, 0x03,
+        0x92, 0xA0, 0x03, 0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x86,
+        0xFF, 0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0D, 0x06,
+        0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B,
+        0x05, 0x00, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D,
+        0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A,
+        0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03,
+        0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F,
+        0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55,
+        0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C,
+        0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30,
+        0x34, 0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x30, 0x39, 0x5A,
+        0x17, 0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35,
+        0x32, 0x33, 0x30, 0x39, 0x5A, 0x30, 0x81, 0x94, 0x31, 0x0B,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55,
+        0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08,
+        0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07,
+        0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30,
+        0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61,
+        0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11,
+        0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E,
+        0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x82, 0x01, 0x22,
+        0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0F, 0x00,
+        0x30, 0x82, 0x01, 0x0A, 0x02, 0x82, 0x01, 0x01, 0x00, 0xBF,
+        0x0C, 0xCA, 0x2D, 0x14, 0xB2, 0x1E, 0x84, 0x42, 0x5B, 0xCD,
+        0x38, 0x1F, 0x4A, 0xF2, 0x4D, 0x75, 0x10, 0xF1, 0xB6, 0x35,
+        0x9F, 0xDF, 0xCA, 0x7D, 0x03, 0x98, 0xD3, 0xAC, 0xDE, 0x03,
+        0x66, 0xEE, 0x2A, 0xF1, 0xD8, 0xB0, 0x7D, 0x6E, 0x07, 0x54,
+        0x0B, 0x10, 0x98, 0x21, 0x4D, 0x80, 0xCB, 0x12, 0x20, 0xE7,
+        0xCC, 0x4F, 0xDE, 0x45, 0x7D, 0xC9, 0x72, 0x77, 0x32, 0xEA,
+        0xCA, 0x90, 0xBB, 0x69, 0x52, 0x10, 0x03, 0x2F, 0xA8, 0xF3,
+        0x95, 0xC5, 0xF1, 0x8B, 0x62, 0x56, 0x1B, 0xEF, 0x67, 0x6F,
+        0xA4, 0x10, 0x41, 0x95, 0xAD, 0x0A, 0x9B, 0xE3, 0xA5, 0xC0,
+        0xB0, 0xD2, 0x70, 0x76, 0x50, 0x30, 0x5B, 0xA8, 0xE8, 0x08,
+        0x2C, 0x7C, 0xED, 0xA7, 0xA2, 0x7A, 0x8D, 0x38, 0x29, 0x1C,
+        0xAC, 0xC7, 0xED, 0xF2, 0x7C, 0x95, 0xB0, 0x95, 0x82, 0x7D,
+        0x49, 0x5C, 0x38, 0xCD, 0x77, 0x25, 0xEF, 0xBD, 0x80, 0x75,
+        0x53, 0x94, 0x3C, 0x3D, 0xCA, 0x63, 0x5B, 0x9F, 0x15, 0xB5,
+        0xD3, 0x1D, 0x13, 0x2F, 0x19, 0xD1, 0x3C, 0xDB, 0x76, 0x3A,
+        0xCC, 0xB8, 0x7D, 0xC9, 0xE5, 0xC2, 0xD7, 0xDA, 0x40, 0x6F,
+        0xD8, 0x21, 0xDC, 0x73, 0x1B, 0x42, 0x2D, 0x53, 0x9C, 0xFE,
+        0x1A, 0xFC, 0x7D, 0xAB, 0x7A, 0x36, 0x3F, 0x98, 0xDE, 0x84,
+        0x7C, 0x05, 0x67, 0xCE, 0x6A, 0x14, 0x38, 0x87, 0xA9, 0xF1,
+        0x8C, 0xB5, 0x68, 0xCB, 0x68, 0x7F, 0x71, 0x20, 0x2B, 0xF5,
+        0xA0, 0x63, 0xF5, 0x56, 0x2F, 0xA3, 0x26, 0xD2, 0xB7, 0x6F,
+        0xB1, 0x5A, 0x17, 0xD7, 0x38, 0x99, 0x08, 0xFE, 0x93, 0x58,
+        0x6F, 0xFE, 0xC3, 0x13, 0x49, 0x08, 0x16, 0x0B, 0xA7, 0x4D,
+        0x67, 0x00, 0x52, 0x31, 0x67, 0x23, 0x4E, 0x98, 0xED, 0x51,
+        0x45, 0x1D, 0xB9, 0x04, 0xD9, 0x0B, 0xEC, 0xD8, 0x28, 0xB3,
+        0x4B, 0xBD, 0xED, 0x36, 0x79, 0x02, 0x03, 0x01, 0x00, 0x01,
+        0xA3, 0x81, 0xFC, 0x30, 0x81, 0xF9, 0x30, 0x1D, 0x06, 0x03,
+        0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04, 0x14, 0x27, 0x8E, 0x67,
+        0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63, 0xB3,
+        0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5, 0x30, 0x81, 0xC9,
+        0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 0x81, 0xC1, 0x30, 0x81,
+        0xBE, 0x80, 0x14, 0x27, 0x8E, 0x67, 0x11, 0x74, 0xC3, 0x26,
+        0x1D, 0x3F, 0xED, 0x33, 0x63, 0xB3, 0xA4, 0xD8, 0x1D, 0x30,
+        0xE5, 0xE8, 0xD5, 0xA1, 0x81, 0x9A, 0xA4, 0x81, 0x97, 0x30,
+        0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74,
+        0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61,
+        0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 0x74, 0x68,
+        0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C,
+        0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 0x69, 0x6E,
+        0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x82, 0x09, 0x00, 0x86, 0xFF, 0xF5, 0x8E, 0x10, 0xDE, 0xB8,
+        0xFB, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05,
+        0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00,
+        0x03, 0x82, 0x01, 0x01, 0x00, 0x9E, 0x28, 0x88, 0x72, 0x00,
+        0xCA, 0xE6, 0xE7, 0x97, 0xCA, 0xC1, 0xF1, 0x1F, 0x9E, 0x12,
+        0xB2, 0xB8, 0xC7, 0x51, 0xEA, 0x28, 0xE1, 0x36, 0xB5, 0x2D,
+        0xE6, 0x2F, 0x08, 0x23, 0xCB, 0xA9, 0x4A, 0x87, 0x25, 0xC6,
+        0x5D, 0x89, 0x45, 0xEA, 0xF5, 0x00, 0x98, 0xAC, 0x76, 0xFB,
+        0x1B, 0xAF, 0xF0, 0xCE, 0x64, 0x9E, 0xDA, 0x08, 0xBF, 0xB6,
+        0xEB, 0xB4, 0xB5, 0x0C, 0xA0, 0xE7, 0xF6, 0x47, 0x59, 0x1C,
+        0x61, 0xCF, 0x2E, 0x0E, 0x58, 0xA4, 0x82, 0xAC, 0x0F, 0x3F,
+        0xEC, 0xC4, 0xAE, 0x80, 0xF7, 0xB0, 0x8A, 0x1E, 0x85, 0x41,
+        0xE8, 0xFF, 0xFE, 0xFE, 0x4F, 0x1A, 0x24, 0xD5, 0x49, 0xFA,
+        0xFB, 0xFE, 0x5E, 0xE5, 0xD3, 0x91, 0x0E, 0x4F, 0x4E, 0x0C,
+        0x21, 0x51, 0x71, 0x83, 0x04, 0x6B, 0x62, 0x7B, 0x4F, 0x59,
+        0x76, 0x48, 0x81, 0x1E, 0xB4, 0xF7, 0x04, 0x47, 0x8A, 0x91,
+        0x57, 0xA3, 0x11, 0xA9, 0xF2, 0x20, 0xB4, 0x78, 0x33, 0x62,
+        0x3D, 0xB0, 0x5E, 0x0D, 0xF9, 0x86, 0x38, 0x82, 0xDA, 0xA1,
+        0x98, 0x8D, 0x19, 0x06, 0x87, 0x21, 0x39, 0xB7, 0x02, 0xF7,
+        0xDA, 0x7D, 0x58, 0xBA, 0x52, 0x15, 0xD8, 0x3B, 0xC9, 0x7B,
+        0x58, 0x34, 0xA0, 0xC7, 0xE2, 0x7C, 0xA9, 0x83, 0x13, 0xE1,
+        0xB6, 0xEC, 0x01, 0xBF, 0x52, 0x33, 0x0B, 0xC4, 0xFE, 0x43,
+        0xD3, 0xC6, 0xA4, 0x8E, 0x2F, 0x87, 0x7F, 0x7A, 0x44, 0xEA,
+        0xCA, 0x53, 0x6C, 0x85, 0xED, 0x65, 0x76, 0x73, 0x31, 0x03,
+        0x4E, 0xEA, 0xBD, 0x35, 0x54, 0x13, 0xF3, 0x64, 0x87, 0x6B,
+        0xDF, 0x34, 0xDD, 0x34, 0xA1, 0x88, 0x3B, 0xDB, 0x4D, 0xAF,
+        0x1B, 0x64, 0x90, 0x92, 0x71, 0x30, 0x8E, 0xC8, 0xCC, 0xE5,
+        0x60, 0x24, 0xAF, 0x31, 0x16, 0x39, 0x33, 0x91, 0x50, 0xF9,
+        0xAB, 0x68, 0x42, 0x74, 0x7A, 0x35, 0xD9, 0xDD, 0xC8, 0xC4,
+        0x52
+};
+static const int sizeof_ca_cert_chain_der = sizeof(ca_cert_chain_der);
+
 /* ./certs/server-key.der, 2048-bit */
 static const unsigned char server_key_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0xA5, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x01, 0x00, 0xC0, 0x95, 0x08, 0xE1, 0x57, 0x41, 0xF2, 0x71, 
-	0x6D, 0xB7, 0xD2, 0x45, 0x41, 0x27, 0x01, 0x65, 0xC6, 0x45, 
-	0xAE, 0xF2, 0xBC, 0x24, 0x30, 0xB8, 0x95, 0xCE, 0x2F, 0x4E, 
-	0xD6, 0xF6, 0x1C, 0x88, 0xBC, 0x7C, 0x9F, 0xFB, 0xA8, 0x67, 
-	0x7F, 0xFE, 0x5C, 0x9C, 0x51, 0x75, 0xF7, 0x8A, 0xCA, 0x07, 
-	0xE7, 0x35, 0x2F, 0x8F, 0xE1, 0xBD, 0x7B, 0xC0, 0x2F, 0x7C, 
-	0xAB, 0x64, 0xA8, 0x17, 0xFC, 0xCA, 0x5D, 0x7B, 0xBA, 0xE0, 
-	0x21, 0xE5, 0x72, 0x2E, 0x6F, 0x2E, 0x86, 0xD8, 0x95, 0x73, 
-	0xDA, 0xAC, 0x1B, 0x53, 0xB9, 0x5F, 0x3F, 0xD7, 0x19, 0x0D, 
-	0x25, 0x4F, 0xE1, 0x63, 0x63, 0x51, 0x8B, 0x0B, 0x64, 0x3F, 
-	0xAD, 0x43, 0xB8, 0xA5, 0x1C, 0x5C, 0x34, 0xB3, 0xAE, 0x00, 
-	0xA0, 0x63, 0xC5, 0xF6, 0x7F, 0x0B, 0x59, 0x68, 0x78, 0x73, 
-	0xA6, 0x8C, 0x18, 0xA9, 0x02, 0x6D, 0xAF, 0xC3, 0x19, 0x01, 
-	0x2E, 0xB8, 0x10, 0xE3, 0xC6, 0xCC, 0x40, 0xB4, 0x69, 0xA3, 
-	0x46, 0x33, 0x69, 0x87, 0x6E, 0xC4, 0xBB, 0x17, 0xA6, 0xF3, 
-	0xE8, 0xDD, 0xAD, 0x73, 0xBC, 0x7B, 0x2F, 0x21, 0xB5, 0xFD, 
-	0x66, 0x51, 0x0C, 0xBD, 0x54, 0xB3, 0xE1, 0x6D, 0x5F, 0x1C, 
-	0xBC, 0x23, 0x73, 0xD1, 0x09, 0x03, 0x89, 0x14, 0xD2, 0x10, 
-	0xB9, 0x64, 0xC3, 0x2A, 0xD0, 0xA1, 0x96, 0x4A, 0xBC, 0xE1, 
-	0xD4, 0x1A, 0x5B, 0xC7, 0xA0, 0xC0, 0xC1, 0x63, 0x78, 0x0F, 
-	0x44, 0x37, 0x30, 0x32, 0x96, 0x80, 0x32, 0x23, 0x95, 0xA1, 
-	0x77, 0xBA, 0x13, 0xD2, 0x97, 0x73, 0xE2, 0x5D, 0x25, 0xC9, 
-	0x6A, 0x0D, 0xC3, 0x39, 0x60, 0xA4, 0xB4, 0xB0, 0x69, 0x42, 
-	0x42, 0x09, 0xE9, 0xD8, 0x08, 0xBC, 0x33, 0x20, 0xB3, 0x58, 
-	0x22, 0xA7, 0xAA, 0xEB, 0xC4, 0xE1, 0xE6, 0x61, 0x83, 0xC5, 
-	0xD2, 0x96, 0xDF, 0xD9, 0xD0, 0x4F, 0xAD, 0xD7, 0x02, 0x03, 
-	0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x01, 0x00, 0x9A, 0xD0, 
-	0x34, 0x0F, 0x52, 0x62, 0x05, 0x50, 0x01, 0xEF, 0x9F, 0xED, 
-	0x64, 0x6E, 0xC2, 0xC4, 0xDA, 0x1A, 0xF2, 0x84, 0xD7, 0x92, 
-	0x10, 0x48, 0x92, 0xC4, 0xE9, 0x6A, 0xEB, 0x8B, 0x75, 0x6C, 
-	0xC6, 0x79, 0x38, 0xF2, 0xC9, 0x72, 0x4A, 0x86, 0x64, 0x54, 
-	0x95, 0x77, 0xCB, 0xC3, 0x9A, 0x9D, 0xB7, 0xD4, 0x1D, 0xA4, 
-	0x00, 0xC8, 0x9E, 0x4E, 0xE4, 0xDD, 0xC7, 0xBA, 0x67, 0x16, 
-	0xC1, 0x74, 0xBC, 0xA9, 0xD6, 0x94, 0x8F, 0x2B, 0x30, 0x1A, 
-	0xFB, 0xED, 0xDF, 0x21, 0x05, 0x23, 0xD9, 0x4A, 0x39, 0xBD, 
-	0x98, 0x6B, 0x65, 0x9A, 0xB8, 0xDC, 0xC4, 0x7D, 0xEE, 0xA6, 
-	0x43, 0x15, 0x2E, 0x3D, 0xBE, 0x1D, 0x22, 0x60, 0x2A, 0x73, 
-	0x30, 0xD5, 0x3E, 0xD8, 0xA2, 0xAC, 0x86, 0x43, 0x2E, 0xC4, 
-	0xF5, 0x64, 0x5E, 0x3F, 0x89, 0x75, 0x0F, 0x11, 0xD8, 0x51, 
-	0x25, 0x4E, 0x9F, 0xD8, 0xAA, 0xA3, 0xCE, 0x60, 0xB3, 0xE2, 
-	0x8A, 0xD9, 0x7E, 0x1B, 0xF0, 0x64, 0xCA, 0x9A, 0x5B, 0x05, 
-	0x0B, 0x5B, 0xAA, 0xCB, 0xE5, 0xE3, 0x3F, 0x6E, 0x32, 0x22, 
-	0x05, 0xF3, 0xD0, 0xFA, 0xEF, 0x74, 0x52, 0x81, 0xE2, 0x5F, 
-	0x74, 0xD3, 0xBD, 0xFF, 0x31, 0x83, 0x45, 0x75, 0xFA, 0x63, 
-	0x7A, 0x97, 0x2E, 0xD6, 0xB6, 0x19, 0xC6, 0x92, 0x26, 0xE4, 
-	0x28, 0x06, 0x50, 0x50, 0x0E, 0x78, 0x2E, 0xA9, 0x78, 0x0D, 
-	0x14, 0x97, 0xB4, 0x12, 0xD8, 0x31, 0x40, 0xAB, 0xA1, 0x01, 
-	0x41, 0xC2, 0x30, 0xF8, 0x07, 0x5F, 0x16, 0xE4, 0x61, 0x77, 
-	0xD2, 0x60, 0xF2, 0x9F, 0x8D, 0xE8, 0xF4, 0xBA, 0xEB, 0x63, 
-	0xDE, 0x2A, 0x97, 0x81, 0xEF, 0x4C, 0x6C, 0xE6, 0x55, 0x34, 
-	0x51, 0x2B, 0x28, 0x34, 0xF4, 0x53, 0x1C, 0xC4, 0x58, 0x0A, 
-	0x3F, 0xBB, 0xAF, 0xB5, 0xF7, 0x4A, 0x85, 0x43, 0x2D, 0x3C, 
-	0xF1, 0x58, 0x58, 0x81, 0x02, 0x81, 0x81, 0x00, 0xF2, 0x2C, 
-	0x54, 0x76, 0x39, 0x23, 0x63, 0xC9, 0x10, 0x32, 0xB7, 0x93, 
-	0xAD, 0xAF, 0xBE, 0x19, 0x75, 0x96, 0x81, 0x64, 0xE6, 0xB5, 
-	0xB8, 0x89, 0x42, 0x41, 0xD1, 0x6D, 0xD0, 0x1C, 0x1B, 0xF8, 
-	0x1B, 0xAC, 0x69, 0xCB, 0x36, 0x3C, 0x64, 0x7D, 0xDC, 0xF4, 
-	0x19, 0xB8, 0xC3, 0x60, 0xB1, 0x57, 0x48, 0x5F, 0x52, 0x4F, 
-	0x59, 0x3A, 0x55, 0x7F, 0x32, 0xC0, 0x19, 0x43, 0x50, 0x3F, 
-	0xAE, 0xCE, 0x6F, 0x17, 0xF3, 0x0E, 0x9F, 0x40, 0xCA, 0x4E, 
-	0xAD, 0x15, 0x3B, 0xC9, 0x79, 0xE9, 0xC0, 0x59, 0x38, 0x73, 
-	0x70, 0x9C, 0x0A, 0x7C, 0xC9, 0x3A, 0x48, 0x32, 0xA7, 0xD8, 
-	0x49, 0x75, 0x0A, 0x85, 0xC2, 0xC2, 0xFD, 0x15, 0x73, 0xDA, 
-	0x99, 0x09, 0x2A, 0x69, 0x9A, 0x9F, 0x0A, 0x71, 0xBF, 0xB0, 
-	0x04, 0xA6, 0x8C, 0x7A, 0x5A, 0x6F, 0x48, 0x5A, 0x54, 0x3B, 
-	0xC6, 0xB1, 0x53, 0x17, 0xDF, 0xE7, 0x02, 0x81, 0x81, 0x00, 
-	0xCB, 0x93, 0xDE, 0x77, 0x15, 0x5D, 0xB7, 0x5C, 0x5C, 0x7C, 
-	0xD8, 0x90, 0xA9, 0x98, 0x2D, 0xD6, 0x69, 0x0E, 0x63, 0xB3, 
-	0xA3, 0xDC, 0xA6, 0xCC, 0x8B, 0x6A, 0xA4, 0xA2, 0x12, 0x8C, 
-	0x8E, 0x7B, 0x48, 0x2C, 0xB2, 0x4B, 0x37, 0xDC, 0x06, 0x18, 
-	0x7D, 0xEA, 0xFE, 0x76, 0xA1, 0xD4, 0xA1, 0xE9, 0x3F, 0x0D, 
-	0xCD, 0x1B, 0x5F, 0xAF, 0x5F, 0x9E, 0x96, 0x5B, 0x5B, 0x0F, 
-	0xA1, 0x7C, 0xAF, 0xB3, 0x9B, 0x90, 0xDB, 0x57, 0x73, 0x3A, 
-	0xED, 0xB0, 0x23, 0x44, 0xAE, 0x41, 0x4F, 0x1F, 0x07, 0x42, 
-	0x13, 0x23, 0x4C, 0xCB, 0xFA, 0xF4, 0x14, 0xA4, 0xD5, 0xF7, 
-	0x9E, 0x36, 0x7C, 0x5B, 0x9F, 0xA8, 0x3C, 0xC1, 0x85, 0x5F, 
-	0x74, 0xD2, 0x39, 0x2D, 0xFF, 0xD0, 0x84, 0xDF, 0xFB, 0xB3, 
-	0x20, 0x7A, 0x2E, 0x9B, 0x17, 0xAE, 0xE6, 0xBA, 0x0B, 0xAE, 
-	0x5F, 0x53, 0xA4, 0x52, 0xED, 0x1B, 0xC4, 0x91, 0x02, 0x81, 
-	0x81, 0x00, 0xEC, 0x98, 0xDA, 0xBB, 0xD5, 0xFE, 0xF9, 0x52, 
-	0x4A, 0x7D, 0x02, 0x55, 0x49, 0x6F, 0x55, 0x6E, 0x52, 0x2F, 
-	0x84, 0xA3, 0x2B, 0xB3, 0x86, 0x62, 0xB3, 0x54, 0xD2, 0x63, 
-	0x52, 0xDA, 0xE3, 0x88, 0x76, 0xA0, 0xEF, 0x8B, 0x15, 0xA5, 
-	0xD3, 0x18, 0x14, 0x72, 0x77, 0x5E, 0xC7, 0xA3, 0x04, 0x1F, 
-	0x9E, 0x19, 0x62, 0xB5, 0x1B, 0x1B, 0x9E, 0xC3, 0xF2, 0xB5, 
-	0x32, 0xF9, 0x4C, 0xC1, 0xAA, 0xEB, 0x0C, 0x26, 0x7D, 0xD4, 
-	0x5F, 0x4A, 0x51, 0x5C, 0xA4, 0x45, 0x06, 0x70, 0x44, 0xA7, 
-	0x56, 0xC0, 0xD4, 0x22, 0x14, 0x76, 0x9E, 0xD8, 0x63, 0x50, 
-	0x89, 0x90, 0xD3, 0xE2, 0xBF, 0x81, 0x95, 0x92, 0x31, 0x41, 
-	0x87, 0x39, 0x1A, 0x43, 0x0B, 0x18, 0xA5, 0x53, 0x1F, 0x39, 
-	0x1A, 0x5F, 0x1F, 0x43, 0xBC, 0x87, 0x6A, 0xDF, 0x6E, 0xD3, 
-	0x22, 0x00, 0xFE, 0x22, 0x98, 0x70, 0x4E, 0x1A, 0x19, 0x29, 
-	0x02, 0x81, 0x81, 0x00, 0x8A, 0x41, 0x56, 0x28, 0x51, 0x9E, 
-	0x5F, 0xD4, 0x9E, 0x0B, 0x3B, 0x98, 0xA3, 0x54, 0xF2, 0x6C, 
-	0x56, 0xD4, 0xAA, 0xE9, 0x69, 0x33, 0x85, 0x24, 0x0C, 0xDA, 
-	0xD4, 0x0C, 0x2D, 0xC4, 0xBF, 0x4F, 0x02, 0x69, 0x38, 0x7C, 
-	0xD4, 0xE6, 0xDC, 0x4C, 0xED, 0xD7, 0x16, 0x11, 0xC3, 0x3E, 
-	0x00, 0xE7, 0xC3, 0x26, 0xC0, 0x51, 0x02, 0xDE, 0xBB, 0x75, 
-	0x9C, 0x6F, 0x56, 0x9C, 0x7A, 0xF3, 0x8E, 0xEF, 0xCF, 0x8A, 
-	0xC5, 0x2B, 0xD2, 0xDA, 0x06, 0x6A, 0x44, 0xC9, 0x73, 0xFE, 
-	0x6E, 0x99, 0x87, 0xF8, 0x5B, 0xBE, 0xF1, 0x7C, 0xE6, 0x65, 
-	0xB5, 0x4F, 0x6C, 0xF0, 0xC9, 0xC5, 0xFF, 0x16, 0xCA, 0x8B, 
-	0x1B, 0x17, 0xE2, 0x58, 0x3D, 0xA2, 0x37, 0xAB, 0x01, 0xBC, 
-	0xBF, 0x40, 0xCE, 0x53, 0x8C, 0x8E, 0xED, 0xEF, 0xEE, 0x59, 
-	0x9D, 0xE0, 0x63, 0xE6, 0x7C, 0x5E, 0xF5, 0x8E, 0x4B, 0xF1, 
-	0x3B, 0xC1, 0x02, 0x81, 0x80, 0x4D, 0x45, 0xF9, 0x40, 0x8C, 
-	0xC5, 0x5B, 0xF4, 0x2A, 0x1A, 0x8A, 0xB4, 0xF2, 0x1C, 0xAC, 
-	0x6B, 0xE9, 0x0C, 0x56, 0x36, 0xB7, 0x4E, 0x72, 0x96, 0xD5, 
-	0xE5, 0x8A, 0xD2, 0xE2, 0xFF, 0xF1, 0xF1, 0x18, 0x13, 0x3D, 
-	0x86, 0x09, 0xB8, 0xD8, 0x76, 0xA7, 0xC9, 0x1C, 0x71, 0x52, 
-	0x94, 0x30, 0x43, 0xE0, 0xF1, 0x78, 0x74, 0xFD, 0x61, 0x1B, 
-	0x4C, 0x09, 0xCC, 0xE6, 0x68, 0x2A, 0x71, 0xAD, 0x1C, 0xDF, 
-	0x43, 0xBC, 0x56, 0xDB, 0xA5, 0xA4, 0xBE, 0x35, 0x70, 0xA4, 
-	0x5E, 0xCF, 0x4F, 0xFC, 0x00, 0x55, 0x99, 0x3A, 0x3D, 0x23, 
-	0xCF, 0x67, 0x5A, 0xF5, 0x22, 0xF8, 0xB5, 0x29, 0xD0, 0x44, 
-	0x11, 0xEB, 0x35, 0x2E, 0x46, 0xBE, 0xFD, 0x8E, 0x18, 0xB2, 
-	0x5F, 0xA8, 0xBF, 0x19, 0x32, 0xA1, 0xF5, 0xDC, 0x03, 0xE6, 
-	0x7C, 0x9A, 0x1F, 0x0C, 0x7C, 0xA9, 0xB0, 0x0E, 0x21, 0x37, 
-	0x3B, 0xF1, 0xB0
+        0x30, 0x82, 0x04, 0xA5, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x01, 0x00, 0xC0, 0x95, 0x08, 0xE1, 0x57, 0x41, 0xF2, 0x71,
+        0x6D, 0xB7, 0xD2, 0x45, 0x41, 0x27, 0x01, 0x65, 0xC6, 0x45,
+        0xAE, 0xF2, 0xBC, 0x24, 0x30, 0xB8, 0x95, 0xCE, 0x2F, 0x4E,
+        0xD6, 0xF6, 0x1C, 0x88, 0xBC, 0x7C, 0x9F, 0xFB, 0xA8, 0x67,
+        0x7F, 0xFE, 0x5C, 0x9C, 0x51, 0x75, 0xF7, 0x8A, 0xCA, 0x07,
+        0xE7, 0x35, 0x2F, 0x8F, 0xE1, 0xBD, 0x7B, 0xC0, 0x2F, 0x7C,
+        0xAB, 0x64, 0xA8, 0x17, 0xFC, 0xCA, 0x5D, 0x7B, 0xBA, 0xE0,
+        0x21, 0xE5, 0x72, 0x2E, 0x6F, 0x2E, 0x86, 0xD8, 0x95, 0x73,
+        0xDA, 0xAC, 0x1B, 0x53, 0xB9, 0x5F, 0x3F, 0xD7, 0x19, 0x0D,
+        0x25, 0x4F, 0xE1, 0x63, 0x63, 0x51, 0x8B, 0x0B, 0x64, 0x3F,
+        0xAD, 0x43, 0xB8, 0xA5, 0x1C, 0x5C, 0x34, 0xB3, 0xAE, 0x00,
+        0xA0, 0x63, 0xC5, 0xF6, 0x7F, 0x0B, 0x59, 0x68, 0x78, 0x73,
+        0xA6, 0x8C, 0x18, 0xA9, 0x02, 0x6D, 0xAF, 0xC3, 0x19, 0x01,
+        0x2E, 0xB8, 0x10, 0xE3, 0xC6, 0xCC, 0x40, 0xB4, 0x69, 0xA3,
+        0x46, 0x33, 0x69, 0x87, 0x6E, 0xC4, 0xBB, 0x17, 0xA6, 0xF3,
+        0xE8, 0xDD, 0xAD, 0x73, 0xBC, 0x7B, 0x2F, 0x21, 0xB5, 0xFD,
+        0x66, 0x51, 0x0C, 0xBD, 0x54, 0xB3, 0xE1, 0x6D, 0x5F, 0x1C,
+        0xBC, 0x23, 0x73, 0xD1, 0x09, 0x03, 0x89, 0x14, 0xD2, 0x10,
+        0xB9, 0x64, 0xC3, 0x2A, 0xD0, 0xA1, 0x96, 0x4A, 0xBC, 0xE1,
+        0xD4, 0x1A, 0x5B, 0xC7, 0xA0, 0xC0, 0xC1, 0x63, 0x78, 0x0F,
+        0x44, 0x37, 0x30, 0x32, 0x96, 0x80, 0x32, 0x23, 0x95, 0xA1,
+        0x77, 0xBA, 0x13, 0xD2, 0x97, 0x73, 0xE2, 0x5D, 0x25, 0xC9,
+        0x6A, 0x0D, 0xC3, 0x39, 0x60, 0xA4, 0xB4, 0xB0, 0x69, 0x42,
+        0x42, 0x09, 0xE9, 0xD8, 0x08, 0xBC, 0x33, 0x20, 0xB3, 0x58,
+        0x22, 0xA7, 0xAA, 0xEB, 0xC4, 0xE1, 0xE6, 0x61, 0x83, 0xC5,
+        0xD2, 0x96, 0xDF, 0xD9, 0xD0, 0x4F, 0xAD, 0xD7, 0x02, 0x03,
+        0x01, 0x00, 0x01, 0x02, 0x82, 0x01, 0x01, 0x00, 0x9A, 0xD0,
+        0x34, 0x0F, 0x52, 0x62, 0x05, 0x50, 0x01, 0xEF, 0x9F, 0xED,
+        0x64, 0x6E, 0xC2, 0xC4, 0xDA, 0x1A, 0xF2, 0x84, 0xD7, 0x92,
+        0x10, 0x48, 0x92, 0xC4, 0xE9, 0x6A, 0xEB, 0x8B, 0x75, 0x6C,
+        0xC6, 0x79, 0x38, 0xF2, 0xC9, 0x72, 0x4A, 0x86, 0x64, 0x54,
+        0x95, 0x77, 0xCB, 0xC3, 0x9A, 0x9D, 0xB7, 0xD4, 0x1D, 0xA4,
+        0x00, 0xC8, 0x9E, 0x4E, 0xE4, 0xDD, 0xC7, 0xBA, 0x67, 0x16,
+        0xC1, 0x74, 0xBC, 0xA9, 0xD6, 0x94, 0x8F, 0x2B, 0x30, 0x1A,
+        0xFB, 0xED, 0xDF, 0x21, 0x05, 0x23, 0xD9, 0x4A, 0x39, 0xBD,
+        0x98, 0x6B, 0x65, 0x9A, 0xB8, 0xDC, 0xC4, 0x7D, 0xEE, 0xA6,
+        0x43, 0x15, 0x2E, 0x3D, 0xBE, 0x1D, 0x22, 0x60, 0x2A, 0x73,
+        0x30, 0xD5, 0x3E, 0xD8, 0xA2, 0xAC, 0x86, 0x43, 0x2E, 0xC4,
+        0xF5, 0x64, 0x5E, 0x3F, 0x89, 0x75, 0x0F, 0x11, 0xD8, 0x51,
+        0x25, 0x4E, 0x9F, 0xD8, 0xAA, 0xA3, 0xCE, 0x60, 0xB3, 0xE2,
+        0x8A, 0xD9, 0x7E, 0x1B, 0xF0, 0x64, 0xCA, 0x9A, 0x5B, 0x05,
+        0x0B, 0x5B, 0xAA, 0xCB, 0xE5, 0xE3, 0x3F, 0x6E, 0x32, 0x22,
+        0x05, 0xF3, 0xD0, 0xFA, 0xEF, 0x74, 0x52, 0x81, 0xE2, 0x5F,
+        0x74, 0xD3, 0xBD, 0xFF, 0x31, 0x83, 0x45, 0x75, 0xFA, 0x63,
+        0x7A, 0x97, 0x2E, 0xD6, 0xB6, 0x19, 0xC6, 0x92, 0x26, 0xE4,
+        0x28, 0x06, 0x50, 0x50, 0x0E, 0x78, 0x2E, 0xA9, 0x78, 0x0D,
+        0x14, 0x97, 0xB4, 0x12, 0xD8, 0x31, 0x40, 0xAB, 0xA1, 0x01,
+        0x41, 0xC2, 0x30, 0xF8, 0x07, 0x5F, 0x16, 0xE4, 0x61, 0x77,
+        0xD2, 0x60, 0xF2, 0x9F, 0x8D, 0xE8, 0xF4, 0xBA, 0xEB, 0x63,
+        0xDE, 0x2A, 0x97, 0x81, 0xEF, 0x4C, 0x6C, 0xE6, 0x55, 0x34,
+        0x51, 0x2B, 0x28, 0x34, 0xF4, 0x53, 0x1C, 0xC4, 0x58, 0x0A,
+        0x3F, 0xBB, 0xAF, 0xB5, 0xF7, 0x4A, 0x85, 0x43, 0x2D, 0x3C,
+        0xF1, 0x58, 0x58, 0x81, 0x02, 0x81, 0x81, 0x00, 0xF2, 0x2C,
+        0x54, 0x76, 0x39, 0x23, 0x63, 0xC9, 0x10, 0x32, 0xB7, 0x93,
+        0xAD, 0xAF, 0xBE, 0x19, 0x75, 0x96, 0x81, 0x64, 0xE6, 0xB5,
+        0xB8, 0x89, 0x42, 0x41, 0xD1, 0x6D, 0xD0, 0x1C, 0x1B, 0xF8,
+        0x1B, 0xAC, 0x69, 0xCB, 0x36, 0x3C, 0x64, 0x7D, 0xDC, 0xF4,
+        0x19, 0xB8, 0xC3, 0x60, 0xB1, 0x57, 0x48, 0x5F, 0x52, 0x4F,
+        0x59, 0x3A, 0x55, 0x7F, 0x32, 0xC0, 0x19, 0x43, 0x50, 0x3F,
+        0xAE, 0xCE, 0x6F, 0x17, 0xF3, 0x0E, 0x9F, 0x40, 0xCA, 0x4E,
+        0xAD, 0x15, 0x3B, 0xC9, 0x79, 0xE9, 0xC0, 0x59, 0x38, 0x73,
+        0x70, 0x9C, 0x0A, 0x7C, 0xC9, 0x3A, 0x48, 0x32, 0xA7, 0xD8,
+        0x49, 0x75, 0x0A, 0x85, 0xC2, 0xC2, 0xFD, 0x15, 0x73, 0xDA,
+        0x99, 0x09, 0x2A, 0x69, 0x9A, 0x9F, 0x0A, 0x71, 0xBF, 0xB0,
+        0x04, 0xA6, 0x8C, 0x7A, 0x5A, 0x6F, 0x48, 0x5A, 0x54, 0x3B,
+        0xC6, 0xB1, 0x53, 0x17, 0xDF, 0xE7, 0x02, 0x81, 0x81, 0x00,
+        0xCB, 0x93, 0xDE, 0x77, 0x15, 0x5D, 0xB7, 0x5C, 0x5C, 0x7C,
+        0xD8, 0x90, 0xA9, 0x98, 0x2D, 0xD6, 0x69, 0x0E, 0x63, 0xB3,
+        0xA3, 0xDC, 0xA6, 0xCC, 0x8B, 0x6A, 0xA4, 0xA2, 0x12, 0x8C,
+        0x8E, 0x7B, 0x48, 0x2C, 0xB2, 0x4B, 0x37, 0xDC, 0x06, 0x18,
+        0x7D, 0xEA, 0xFE, 0x76, 0xA1, 0xD4, 0xA1, 0xE9, 0x3F, 0x0D,
+        0xCD, 0x1B, 0x5F, 0xAF, 0x5F, 0x9E, 0x96, 0x5B, 0x5B, 0x0F,
+        0xA1, 0x7C, 0xAF, 0xB3, 0x9B, 0x90, 0xDB, 0x57, 0x73, 0x3A,
+        0xED, 0xB0, 0x23, 0x44, 0xAE, 0x41, 0x4F, 0x1F, 0x07, 0x42,
+        0x13, 0x23, 0x4C, 0xCB, 0xFA, 0xF4, 0x14, 0xA4, 0xD5, 0xF7,
+        0x9E, 0x36, 0x7C, 0x5B, 0x9F, 0xA8, 0x3C, 0xC1, 0x85, 0x5F,
+        0x74, 0xD2, 0x39, 0x2D, 0xFF, 0xD0, 0x84, 0xDF, 0xFB, 0xB3,
+        0x20, 0x7A, 0x2E, 0x9B, 0x17, 0xAE, 0xE6, 0xBA, 0x0B, 0xAE,
+        0x5F, 0x53, 0xA4, 0x52, 0xED, 0x1B, 0xC4, 0x91, 0x02, 0x81,
+        0x81, 0x00, 0xEC, 0x98, 0xDA, 0xBB, 0xD5, 0xFE, 0xF9, 0x52,
+        0x4A, 0x7D, 0x02, 0x55, 0x49, 0x6F, 0x55, 0x6E, 0x52, 0x2F,
+        0x84, 0xA3, 0x2B, 0xB3, 0x86, 0x62, 0xB3, 0x54, 0xD2, 0x63,
+        0x52, 0xDA, 0xE3, 0x88, 0x76, 0xA0, 0xEF, 0x8B, 0x15, 0xA5,
+        0xD3, 0x18, 0x14, 0x72, 0x77, 0x5E, 0xC7, 0xA3, 0x04, 0x1F,
+        0x9E, 0x19, 0x62, 0xB5, 0x1B, 0x1B, 0x9E, 0xC3, 0xF2, 0xB5,
+        0x32, 0xF9, 0x4C, 0xC1, 0xAA, 0xEB, 0x0C, 0x26, 0x7D, 0xD4,
+        0x5F, 0x4A, 0x51, 0x5C, 0xA4, 0x45, 0x06, 0x70, 0x44, 0xA7,
+        0x56, 0xC0, 0xD4, 0x22, 0x14, 0x76, 0x9E, 0xD8, 0x63, 0x50,
+        0x89, 0x90, 0xD3, 0xE2, 0xBF, 0x81, 0x95, 0x92, 0x31, 0x41,
+        0x87, 0x39, 0x1A, 0x43, 0x0B, 0x18, 0xA5, 0x53, 0x1F, 0x39,
+        0x1A, 0x5F, 0x1F, 0x43, 0xBC, 0x87, 0x6A, 0xDF, 0x6E, 0xD3,
+        0x22, 0x00, 0xFE, 0x22, 0x98, 0x70, 0x4E, 0x1A, 0x19, 0x29,
+        0x02, 0x81, 0x81, 0x00, 0x8A, 0x41, 0x56, 0x28, 0x51, 0x9E,
+        0x5F, 0xD4, 0x9E, 0x0B, 0x3B, 0x98, 0xA3, 0x54, 0xF2, 0x6C,
+        0x56, 0xD4, 0xAA, 0xE9, 0x69, 0x33, 0x85, 0x24, 0x0C, 0xDA,
+        0xD4, 0x0C, 0x2D, 0xC4, 0xBF, 0x4F, 0x02, 0x69, 0x38, 0x7C,
+        0xD4, 0xE6, 0xDC, 0x4C, 0xED, 0xD7, 0x16, 0x11, 0xC3, 0x3E,
+        0x00, 0xE7, 0xC3, 0x26, 0xC0, 0x51, 0x02, 0xDE, 0xBB, 0x75,
+        0x9C, 0x6F, 0x56, 0x9C, 0x7A, 0xF3, 0x8E, 0xEF, 0xCF, 0x8A,
+        0xC5, 0x2B, 0xD2, 0xDA, 0x06, 0x6A, 0x44, 0xC9, 0x73, 0xFE,
+        0x6E, 0x99, 0x87, 0xF8, 0x5B, 0xBE, 0xF1, 0x7C, 0xE6, 0x65,
+        0xB5, 0x4F, 0x6C, 0xF0, 0xC9, 0xC5, 0xFF, 0x16, 0xCA, 0x8B,
+        0x1B, 0x17, 0xE2, 0x58, 0x3D, 0xA2, 0x37, 0xAB, 0x01, 0xBC,
+        0xBF, 0x40, 0xCE, 0x53, 0x8C, 0x8E, 0xED, 0xEF, 0xEE, 0x59,
+        0x9D, 0xE0, 0x63, 0xE6, 0x7C, 0x5E, 0xF5, 0x8E, 0x4B, 0xF1,
+        0x3B, 0xC1, 0x02, 0x81, 0x80, 0x4D, 0x45, 0xF9, 0x40, 0x8C,
+        0xC5, 0x5B, 0xF4, 0x2A, 0x1A, 0x8A, 0xB4, 0xF2, 0x1C, 0xAC,
+        0x6B, 0xE9, 0x0C, 0x56, 0x36, 0xB7, 0x4E, 0x72, 0x96, 0xD5,
+        0xE5, 0x8A, 0xD2, 0xE2, 0xFF, 0xF1, 0xF1, 0x18, 0x13, 0x3D,
+        0x86, 0x09, 0xB8, 0xD8, 0x76, 0xA7, 0xC9, 0x1C, 0x71, 0x52,
+        0x94, 0x30, 0x43, 0xE0, 0xF1, 0x78, 0x74, 0xFD, 0x61, 0x1B,
+        0x4C, 0x09, 0xCC, 0xE6, 0x68, 0x2A, 0x71, 0xAD, 0x1C, 0xDF,
+        0x43, 0xBC, 0x56, 0xDB, 0xA5, 0xA4, 0xBE, 0x35, 0x70, 0xA4,
+        0x5E, 0xCF, 0x4F, 0xFC, 0x00, 0x55, 0x99, 0x3A, 0x3D, 0x23,
+        0xCF, 0x67, 0x5A, 0xF5, 0x22, 0xF8, 0xB5, 0x29, 0xD0, 0x44,
+        0x11, 0xEB, 0x35, 0x2E, 0x46, 0xBE, 0xFD, 0x8E, 0x18, 0xB2,
+        0x5F, 0xA8, 0xBF, 0x19, 0x32, 0xA1, 0xF5, 0xDC, 0x03, 0xE6,
+        0x7C, 0x9A, 0x1F, 0x0C, 0x7C, 0xA9, 0xB0, 0x0E, 0x21, 0x37,
+        0x3B, 0xF1, 0xB0
 };
 static const int sizeof_server_key_der_2048 = sizeof(server_key_der_2048);
 
 /* ./certs/server-cert.der, 2048-bit */
 static const unsigned char server_cert_der_2048[] =
 {
-	0x30, 0x82, 0x04, 0x9E, 0x30, 0x82, 0x03, 0x86, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x01, 0x01, 0x30, 0x0D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 
-	0x00, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 
-	0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 
-	0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 
-	0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 
-	0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 
-	0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 
-	0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 
-	0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 
-	0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 
-	0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 
-	0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 
-	0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 
-	0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 
-	0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 
-	0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 
-	0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 
-	0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x90, 0x31, 0x0B, 0x30, 
-	0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 
-	0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 
-	0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x10, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 
-	0x66, 0x53, 0x53, 0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 
-	0x55, 0x04, 0x0B, 0x0C, 0x07, 0x53, 0x75, 0x70, 0x70, 0x6F, 
-	0x72, 0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 
-	0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 
-	0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 
-	0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 
-	0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 
-	0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 
-	0x6D, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06, 0x09, 0x2A, 
-	0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 
-	0x03, 0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01, 0x0A, 0x02, 
-	0x82, 0x01, 0x01, 0x00, 0xC0, 0x95, 0x08, 0xE1, 0x57, 0x41, 
-	0xF2, 0x71, 0x6D, 0xB7, 0xD2, 0x45, 0x41, 0x27, 0x01, 0x65, 
-	0xC6, 0x45, 0xAE, 0xF2, 0xBC, 0x24, 0x30, 0xB8, 0x95, 0xCE, 
-	0x2F, 0x4E, 0xD6, 0xF6, 0x1C, 0x88, 0xBC, 0x7C, 0x9F, 0xFB, 
-	0xA8, 0x67, 0x7F, 0xFE, 0x5C, 0x9C, 0x51, 0x75, 0xF7, 0x8A, 
-	0xCA, 0x07, 0xE7, 0x35, 0x2F, 0x8F, 0xE1, 0xBD, 0x7B, 0xC0, 
-	0x2F, 0x7C, 0xAB, 0x64, 0xA8, 0x17, 0xFC, 0xCA, 0x5D, 0x7B, 
-	0xBA, 0xE0, 0x21, 0xE5, 0x72, 0x2E, 0x6F, 0x2E, 0x86, 0xD8, 
-	0x95, 0x73, 0xDA, 0xAC, 0x1B, 0x53, 0xB9, 0x5F, 0x3F, 0xD7, 
-	0x19, 0x0D, 0x25, 0x4F, 0xE1, 0x63, 0x63, 0x51, 0x8B, 0x0B, 
-	0x64, 0x3F, 0xAD, 0x43, 0xB8, 0xA5, 0x1C, 0x5C, 0x34, 0xB3, 
-	0xAE, 0x00, 0xA0, 0x63, 0xC5, 0xF6, 0x7F, 0x0B, 0x59, 0x68, 
-	0x78, 0x73, 0xA6, 0x8C, 0x18, 0xA9, 0x02, 0x6D, 0xAF, 0xC3, 
-	0x19, 0x01, 0x2E, 0xB8, 0x10, 0xE3, 0xC6, 0xCC, 0x40, 0xB4, 
-	0x69, 0xA3, 0x46, 0x33, 0x69, 0x87, 0x6E, 0xC4, 0xBB, 0x17, 
-	0xA6, 0xF3, 0xE8, 0xDD, 0xAD, 0x73, 0xBC, 0x7B, 0x2F, 0x21, 
-	0xB5, 0xFD, 0x66, 0x51, 0x0C, 0xBD, 0x54, 0xB3, 0xE1, 0x6D, 
-	0x5F, 0x1C, 0xBC, 0x23, 0x73, 0xD1, 0x09, 0x03, 0x89, 0x14, 
-	0xD2, 0x10, 0xB9, 0x64, 0xC3, 0x2A, 0xD0, 0xA1, 0x96, 0x4A, 
-	0xBC, 0xE1, 0xD4, 0x1A, 0x5B, 0xC7, 0xA0, 0xC0, 0xC1, 0x63, 
-	0x78, 0x0F, 0x44, 0x37, 0x30, 0x32, 0x96, 0x80, 0x32, 0x23, 
-	0x95, 0xA1, 0x77, 0xBA, 0x13, 0xD2, 0x97, 0x73, 0xE2, 0x5D, 
-	0x25, 0xC9, 0x6A, 0x0D, 0xC3, 0x39, 0x60, 0xA4, 0xB4, 0xB0, 
-	0x69, 0x42, 0x42, 0x09, 0xE9, 0xD8, 0x08, 0xBC, 0x33, 0x20, 
-	0xB3, 0x58, 0x22, 0xA7, 0xAA, 0xEB, 0xC4, 0xE1, 0xE6, 0x61, 
-	0x83, 0xC5, 0xD2, 0x96, 0xDF, 0xD9, 0xD0, 0x4F, 0xAD, 0xD7, 
-	0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x81, 0xFC, 0x30, 0x81, 
-	0xF9, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 
-	0x04, 0x14, 0xB3, 0x11, 0x32, 0xC9, 0x92, 0x98, 0x84, 0xE2, 
-	0xC9, 0xF8, 0xD0, 0x3B, 0x6E, 0x03, 0x42, 0xCA, 0x1F, 0x0E, 
-	0x8E, 0x3C, 0x30, 0x81, 0xC9, 0x06, 0x03, 0x55, 0x1D, 0x23, 
-	0x04, 0x81, 0xC1, 0x30, 0x81, 0xBE, 0x80, 0x14, 0x27, 0x8E, 
-	0x67, 0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63, 
-	0xB3, 0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5, 0xA1, 0x81, 
-	0x9A, 0xA4, 0x81, 0x97, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 
-	0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 
-	0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 
-	0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 
-	0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 
-	0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 
-	0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 
-	0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 
-	0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 
-	0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 
-	0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 
-	0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 
-	0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 0x86, 0xFF, 
-	0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0C, 0x06, 0x03, 
-	0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 
-	0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 
-	0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00, 
-	0xB4, 0x54, 0x60, 0xAD, 0xA0, 0x03, 0x32, 0xDE, 0x02, 0x7F, 
-	0x21, 0x4A, 0x81, 0xC6, 0xED, 0xCD, 0xCD, 0xD8, 0x12, 0x8A, 
-	0xC0, 0xBA, 0x82, 0x5B, 0x75, 0xAD, 0x54, 0xE3, 0x7C, 0x80, 
-	0x6A, 0xAC, 0x2E, 0x6C, 0x20, 0x4E, 0xBE, 0x4D, 0x82, 0xA7, 
-	0x47, 0x13, 0x5C, 0xF4, 0xC6, 0x6A, 0x2B, 0x10, 0x99, 0x58, 
-	0xDE, 0xAB, 0x6B, 0x7C, 0x22, 0x05, 0xC1, 0x83, 0x9D, 0xCB, 
-	0xFF, 0x3C, 0xE4, 0x2D, 0x57, 0x6A, 0xA6, 0x96, 0xDF, 0xD3, 
-	0xC1, 0x68, 0xE3, 0xD2, 0xC6, 0x83, 0x4B, 0x97, 0xE2, 0xC6, 
-	0x32, 0x0E, 0xBE, 0xC4, 0x03, 0xB9, 0x07, 0x8A, 0x5B, 0xB8, 
-	0x84, 0xBA, 0xC5, 0x39, 0x3F, 0x1C, 0x58, 0xA7, 0x55, 0xD7, 
-	0xF0, 0x9B, 0xE8, 0xD2, 0x45, 0xB9, 0xE3, 0x83, 0x2E, 0xEE, 
-	0xB6, 0x71, 0x56, 0xB9, 0x3A, 0xEE, 0x3F, 0x27, 0xD8, 0x77, 
-	0xE8, 0xFB, 0x44, 0x48, 0x65, 0x27, 0x47, 0x4C, 0xFB, 0xFE, 
-	0x72, 0xC3, 0xAC, 0x05, 0x7B, 0x1D, 0xCB, 0xEB, 0x5E, 0x65, 
-	0x9A, 0xAB, 0x02, 0xE4, 0x88, 0x5B, 0x3B, 0x8B, 0x0B, 0xC7, 
-	0xCC, 0xA9, 0xA6, 0x8B, 0xE1, 0x87, 0xB0, 0x19, 0x1A, 0x0C, 
-	0x28, 0x58, 0x6F, 0x99, 0x52, 0x7E, 0xED, 0xB0, 0x3A, 0x68, 
-	0x3B, 0x8C, 0x0A, 0x08, 0x74, 0x72, 0xAB, 0xB9, 0x09, 0xC5, 
-	0xED, 0x04, 0x7E, 0x6F, 0x0B, 0x1C, 0x09, 0x21, 0xD0, 0xCD, 
-	0x7F, 0xF9, 0xC4, 0x5E, 0x27, 0x20, 0xE4, 0x85, 0x73, 0x52, 
-	0x05, 0xD2, 0xBA, 0xF8, 0xD5, 0x8F, 0x41, 0xCC, 0x23, 0x2E, 
-	0x12, 0x6D, 0xBC, 0x31, 0x98, 0xE7, 0x63, 0xA3, 0x8E, 0x26, 
-	0xCD, 0xE8, 0x2B, 0x88, 0xEE, 0xE2, 0xFE, 0x3A, 0x74, 0x52, 
-	0x34, 0x0E, 0xFD, 0x12, 0xE5, 0x5E, 0x69, 0x50, 0x20, 0x31, 
-	0x34, 0xE4, 0x31, 0xF1, 0xE7, 0xE4, 0x5B, 0x03, 0x13, 0xDA, 
-	0xAC, 0x41, 0x6C, 0xE7, 0xCF, 0x2B
+        0x30, 0x82, 0x04, 0x9E, 0x30, 0x82, 0x03, 0x86, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x01, 0x01, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05,
+        0x00, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03,
+        0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F,
+        0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65,
+        0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55,
+        0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F,
+        0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74,
+        0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55,
+        0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31,
+        0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7,
+        0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F,
+        0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34,
+        0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17,
+        0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32,
+        0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x90, 0x31, 0x0B, 0x30,
+        0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C,
+        0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42,
+        0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C,
+        0x66, 0x53, 0x53, 0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x0B, 0x0C, 0x07, 0x53, 0x75, 0x70, 0x70, 0x6F,
+        0x72, 0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00,
+        0x03, 0x82, 0x01, 0x0F, 0x00, 0x30, 0x82, 0x01, 0x0A, 0x02,
+        0x82, 0x01, 0x01, 0x00, 0xC0, 0x95, 0x08, 0xE1, 0x57, 0x41,
+        0xF2, 0x71, 0x6D, 0xB7, 0xD2, 0x45, 0x41, 0x27, 0x01, 0x65,
+        0xC6, 0x45, 0xAE, 0xF2, 0xBC, 0x24, 0x30, 0xB8, 0x95, 0xCE,
+        0x2F, 0x4E, 0xD6, 0xF6, 0x1C, 0x88, 0xBC, 0x7C, 0x9F, 0xFB,
+        0xA8, 0x67, 0x7F, 0xFE, 0x5C, 0x9C, 0x51, 0x75, 0xF7, 0x8A,
+        0xCA, 0x07, 0xE7, 0x35, 0x2F, 0x8F, 0xE1, 0xBD, 0x7B, 0xC0,
+        0x2F, 0x7C, 0xAB, 0x64, 0xA8, 0x17, 0xFC, 0xCA, 0x5D, 0x7B,
+        0xBA, 0xE0, 0x21, 0xE5, 0x72, 0x2E, 0x6F, 0x2E, 0x86, 0xD8,
+        0x95, 0x73, 0xDA, 0xAC, 0x1B, 0x53, 0xB9, 0x5F, 0x3F, 0xD7,
+        0x19, 0x0D, 0x25, 0x4F, 0xE1, 0x63, 0x63, 0x51, 0x8B, 0x0B,
+        0x64, 0x3F, 0xAD, 0x43, 0xB8, 0xA5, 0x1C, 0x5C, 0x34, 0xB3,
+        0xAE, 0x00, 0xA0, 0x63, 0xC5, 0xF6, 0x7F, 0x0B, 0x59, 0x68,
+        0x78, 0x73, 0xA6, 0x8C, 0x18, 0xA9, 0x02, 0x6D, 0xAF, 0xC3,
+        0x19, 0x01, 0x2E, 0xB8, 0x10, 0xE3, 0xC6, 0xCC, 0x40, 0xB4,
+        0x69, 0xA3, 0x46, 0x33, 0x69, 0x87, 0x6E, 0xC4, 0xBB, 0x17,
+        0xA6, 0xF3, 0xE8, 0xDD, 0xAD, 0x73, 0xBC, 0x7B, 0x2F, 0x21,
+        0xB5, 0xFD, 0x66, 0x51, 0x0C, 0xBD, 0x54, 0xB3, 0xE1, 0x6D,
+        0x5F, 0x1C, 0xBC, 0x23, 0x73, 0xD1, 0x09, 0x03, 0x89, 0x14,
+        0xD2, 0x10, 0xB9, 0x64, 0xC3, 0x2A, 0xD0, 0xA1, 0x96, 0x4A,
+        0xBC, 0xE1, 0xD4, 0x1A, 0x5B, 0xC7, 0xA0, 0xC0, 0xC1, 0x63,
+        0x78, 0x0F, 0x44, 0x37, 0x30, 0x32, 0x96, 0x80, 0x32, 0x23,
+        0x95, 0xA1, 0x77, 0xBA, 0x13, 0xD2, 0x97, 0x73, 0xE2, 0x5D,
+        0x25, 0xC9, 0x6A, 0x0D, 0xC3, 0x39, 0x60, 0xA4, 0xB4, 0xB0,
+        0x69, 0x42, 0x42, 0x09, 0xE9, 0xD8, 0x08, 0xBC, 0x33, 0x20,
+        0xB3, 0x58, 0x22, 0xA7, 0xAA, 0xEB, 0xC4, 0xE1, 0xE6, 0x61,
+        0x83, 0xC5, 0xD2, 0x96, 0xDF, 0xD9, 0xD0, 0x4F, 0xAD, 0xD7,
+        0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x81, 0xFC, 0x30, 0x81,
+        0xF9, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16,
+        0x04, 0x14, 0xB3, 0x11, 0x32, 0xC9, 0x92, 0x98, 0x84, 0xE2,
+        0xC9, 0xF8, 0xD0, 0x3B, 0x6E, 0x03, 0x42, 0xCA, 0x1F, 0x0E,
+        0x8E, 0x3C, 0x30, 0x81, 0xC9, 0x06, 0x03, 0x55, 0x1D, 0x23,
+        0x04, 0x81, 0xC1, 0x30, 0x81, 0xBE, 0x80, 0x14, 0x27, 0x8E,
+        0x67, 0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63,
+        0xB3, 0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5, 0xA1, 0x81,
+        0x9A, 0xA4, 0x81, 0x97, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30,
+        0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C,
+        0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42,
+        0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F,
+        0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77,
+        0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73,
+        0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77,
+        0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69,
+        0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73,
+        0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 0x86, 0xFF,
+        0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0C, 0x06, 0x03,
+        0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 0xFF,
+        0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+        0xB4, 0x54, 0x60, 0xAD, 0xA0, 0x03, 0x32, 0xDE, 0x02, 0x7F,
+        0x21, 0x4A, 0x81, 0xC6, 0xED, 0xCD, 0xCD, 0xD8, 0x12, 0x8A,
+        0xC0, 0xBA, 0x82, 0x5B, 0x75, 0xAD, 0x54, 0xE3, 0x7C, 0x80,
+        0x6A, 0xAC, 0x2E, 0x6C, 0x20, 0x4E, 0xBE, 0x4D, 0x82, 0xA7,
+        0x47, 0x13, 0x5C, 0xF4, 0xC6, 0x6A, 0x2B, 0x10, 0x99, 0x58,
+        0xDE, 0xAB, 0x6B, 0x7C, 0x22, 0x05, 0xC1, 0x83, 0x9D, 0xCB,
+        0xFF, 0x3C, 0xE4, 0x2D, 0x57, 0x6A, 0xA6, 0x96, 0xDF, 0xD3,
+        0xC1, 0x68, 0xE3, 0xD2, 0xC6, 0x83, 0x4B, 0x97, 0xE2, 0xC6,
+        0x32, 0x0E, 0xBE, 0xC4, 0x03, 0xB9, 0x07, 0x8A, 0x5B, 0xB8,
+        0x84, 0xBA, 0xC5, 0x39, 0x3F, 0x1C, 0x58, 0xA7, 0x55, 0xD7,
+        0xF0, 0x9B, 0xE8, 0xD2, 0x45, 0xB9, 0xE3, 0x83, 0x2E, 0xEE,
+        0xB6, 0x71, 0x56, 0xB9, 0x3A, 0xEE, 0x3F, 0x27, 0xD8, 0x77,
+        0xE8, 0xFB, 0x44, 0x48, 0x65, 0x27, 0x47, 0x4C, 0xFB, 0xFE,
+        0x72, 0xC3, 0xAC, 0x05, 0x7B, 0x1D, 0xCB, 0xEB, 0x5E, 0x65,
+        0x9A, 0xAB, 0x02, 0xE4, 0x88, 0x5B, 0x3B, 0x8B, 0x0B, 0xC7,
+        0xCC, 0xA9, 0xA6, 0x8B, 0xE1, 0x87, 0xB0, 0x19, 0x1A, 0x0C,
+        0x28, 0x58, 0x6F, 0x99, 0x52, 0x7E, 0xED, 0xB0, 0x3A, 0x68,
+        0x3B, 0x8C, 0x0A, 0x08, 0x74, 0x72, 0xAB, 0xB9, 0x09, 0xC5,
+        0xED, 0x04, 0x7E, 0x6F, 0x0B, 0x1C, 0x09, 0x21, 0xD0, 0xCD,
+        0x7F, 0xF9, 0xC4, 0x5E, 0x27, 0x20, 0xE4, 0x85, 0x73, 0x52,
+        0x05, 0xD2, 0xBA, 0xF8, 0xD5, 0x8F, 0x41, 0xCC, 0x23, 0x2E,
+        0x12, 0x6D, 0xBC, 0x31, 0x98, 0xE7, 0x63, 0xA3, 0x8E, 0x26,
+        0xCD, 0xE8, 0x2B, 0x88, 0xEE, 0xE2, 0xFE, 0x3A, 0x74, 0x52,
+        0x34, 0x0E, 0xFD, 0x12, 0xE5, 0x5E, 0x69, 0x50, 0x20, 0x31,
+        0x34, 0xE4, 0x31, 0xF1, 0xE7, 0xE4, 0x5B, 0x03, 0x13, 0xDA,
+        0xAC, 0x41, 0x6C, 0xE7, 0xCF, 0x2B
 };
 static const int sizeof_server_cert_der_2048 = sizeof(server_cert_der_2048);
 
@@ -1729,483 +1951,1424 @@
 /* ./certs/dh3072.der, 3072-bit */
 static const unsigned char dh_key_der_3072[] =
 {
-	0x30, 0x82, 0x01, 0x88, 0x02, 0x82, 0x01, 0x81, 0x00, 0x89, 
-	0x1B, 0x75, 0x3F, 0x84, 0xB6, 0x11, 0xED, 0x21, 0xF1, 0x08, 
-	0x0F, 0xB8, 0x06, 0xC9, 0xA3, 0xC9, 0x41, 0xDB, 0x5A, 0xC8, 
-	0xF8, 0x82, 0x73, 0x0F, 0xEB, 0x89, 0x1E, 0x54, 0x18, 0xBE, 
-	0xE6, 0x48, 0x41, 0x9E, 0xFA, 0xC2, 0x0C, 0x50, 0x67, 0xC3, 
-	0x5D, 0xB5, 0xF5, 0x0F, 0x23, 0x6A, 0x43, 0x33, 0x91, 0xD9, 
-	0x40, 0xF3, 0x66, 0xC6, 0x99, 0xFF, 0x97, 0xB6, 0x7B, 0xAF, 
-	0x27, 0x72, 0x3B, 0x9F, 0x7E, 0x58, 0x18, 0x14, 0x9F, 0x91, 
-	0x6E, 0x2B, 0x11, 0xC1, 0x57, 0x49, 0x27, 0x36, 0x78, 0xE1, 
-	0x09, 0x68, 0x9C, 0x05, 0x5A, 0xAC, 0xE6, 0x00, 0x38, 0xBE, 
-	0x95, 0x74, 0x81, 0x53, 0x28, 0xF0, 0xAD, 0xDF, 0xB5, 0x87, 
-	0x1C, 0x72, 0x17, 0x4E, 0xEC, 0x00, 0x91, 0x22, 0xAA, 0xE4, 
-	0x88, 0xD7, 0xF5, 0x3D, 0x1F, 0x03, 0x13, 0x2D, 0x1C, 0xFB, 
-	0xDE, 0x59, 0x68, 0xAD, 0xE0, 0x17, 0xA1, 0xEE, 0x8D, 0xCC, 
-	0xBF, 0xFE, 0xCF, 0x24, 0x42, 0xED, 0x26, 0xDD, 0x29, 0xD0, 
-	0x4E, 0x62, 0x3C, 0x85, 0x36, 0x1B, 0x5F, 0x6A, 0x47, 0x88, 
-	0x21, 0xE5, 0x1B, 0x85, 0x0A, 0x2C, 0xE9, 0x2F, 0xE0, 0x20, 
-	0xFC, 0x1D, 0xCD, 0x55, 0x66, 0xF5, 0xAC, 0x32, 0x00, 0x8E, 
-	0xA3, 0xE9, 0xED, 0xFB, 0x35, 0xA7, 0xE6, 0x76, 0x53, 0x42, 
-	0xC6, 0x77, 0x77, 0xAB, 0x90, 0x99, 0x7C, 0xC2, 0xEC, 0xC9, 
-	0x18, 0x4A, 0x3C, 0xF4, 0x11, 0x75, 0x27, 0x83, 0xBD, 0x9E, 
-	0xC2, 0x8F, 0x23, 0xAB, 0x52, 0x46, 0xE2, 0x52, 0x5D, 0x9A, 
-	0x04, 0xC3, 0x15, 0x1F, 0x69, 0x9C, 0x72, 0x69, 0x59, 0x52, 
-	0xD4, 0x69, 0x3D, 0x19, 0x77, 0x36, 0x25, 0xAF, 0x07, 0x71, 
-	0x82, 0xDE, 0xB7, 0x24, 0x60, 0x82, 0x6A, 0x72, 0xBB, 0xED, 
-	0xB6, 0x76, 0xAE, 0x7E, 0xBC, 0x7D, 0x2F, 0x73, 0x4B, 0x04, 
-	0x16, 0xD5, 0xA4, 0xF3, 0x03, 0x26, 0xFB, 0xF3, 0xCD, 0x7B, 
-	0x77, 0x7E, 0x7C, 0x8D, 0x65, 0xAE, 0xA5, 0xDC, 0x6C, 0xE3, 
-	0x70, 0xD2, 0x29, 0x6B, 0xF2, 0xEB, 0x76, 0xC9, 0xE5, 0x46, 
-	0x18, 0x12, 0x57, 0xB0, 0x55, 0xA5, 0x7C, 0xCD, 0x41, 0x93, 
-	0x26, 0x99, 0xF7, 0xA5, 0xC5, 0x34, 0xBE, 0x59, 0x79, 0xDE, 
-	0x0A, 0x57, 0x5F, 0x21, 0xF8, 0x98, 0x52, 0xF0, 0x2F, 0x7B, 
-	0x57, 0xB6, 0x9D, 0xFC, 0x40, 0xA6, 0x55, 0xFB, 0xAF, 0xD9, 
-	0x16, 0x9B, 0x20, 0x4F, 0xA8, 0xA3, 0x0B, 0x04, 0x48, 0xE3, 
-	0x77, 0x22, 0xC4, 0xCC, 0x57, 0x14, 0x33, 0xA2, 0xF0, 0x9A, 
-	0xE3, 0x12, 0xBD, 0xFF, 0x72, 0x8B, 0xEE, 0x52, 0xF3, 0xC9, 
-	0x59, 0xC2, 0xA2, 0x6B, 0xA5, 0x75, 0x48, 0x51, 0x82, 0x0E, 
-	0x7A, 0xFF, 0xFE, 0x41, 0xCD, 0x7C, 0x63, 0xD2, 0x53, 0xA8, 
-	0x11, 0x03, 0xB9, 0x03, 0x07, 0xFE, 0x66, 0x38, 0x5F, 0xA2, 
-	0x3E, 0x9C, 0x1B, 0x02, 0x01, 0x02
+        0x30, 0x82, 0x01, 0x88, 0x02, 0x82, 0x01, 0x81, 0x00, 0x89,
+        0x1B, 0x75, 0x3F, 0x84, 0xB6, 0x11, 0xED, 0x21, 0xF1, 0x08,
+        0x0F, 0xB8, 0x06, 0xC9, 0xA3, 0xC9, 0x41, 0xDB, 0x5A, 0xC8,
+        0xF8, 0x82, 0x73, 0x0F, 0xEB, 0x89, 0x1E, 0x54, 0x18, 0xBE,
+        0xE6, 0x48, 0x41, 0x9E, 0xFA, 0xC2, 0x0C, 0x50, 0x67, 0xC3,
+        0x5D, 0xB5, 0xF5, 0x0F, 0x23, 0x6A, 0x43, 0x33, 0x91, 0xD9,
+        0x40, 0xF3, 0x66, 0xC6, 0x99, 0xFF, 0x97, 0xB6, 0x7B, 0xAF,
+        0x27, 0x72, 0x3B, 0x9F, 0x7E, 0x58, 0x18, 0x14, 0x9F, 0x91,
+        0x6E, 0x2B, 0x11, 0xC1, 0x57, 0x49, 0x27, 0x36, 0x78, 0xE1,
+        0x09, 0x68, 0x9C, 0x05, 0x5A, 0xAC, 0xE6, 0x00, 0x38, 0xBE,
+        0x95, 0x74, 0x81, 0x53, 0x28, 0xF0, 0xAD, 0xDF, 0xB5, 0x87,
+        0x1C, 0x72, 0x17, 0x4E, 0xEC, 0x00, 0x91, 0x22, 0xAA, 0xE4,
+        0x88, 0xD7, 0xF5, 0x3D, 0x1F, 0x03, 0x13, 0x2D, 0x1C, 0xFB,
+        0xDE, 0x59, 0x68, 0xAD, 0xE0, 0x17, 0xA1, 0xEE, 0x8D, 0xCC,
+        0xBF, 0xFE, 0xCF, 0x24, 0x42, 0xED, 0x26, 0xDD, 0x29, 0xD0,
+        0x4E, 0x62, 0x3C, 0x85, 0x36, 0x1B, 0x5F, 0x6A, 0x47, 0x88,
+        0x21, 0xE5, 0x1B, 0x85, 0x0A, 0x2C, 0xE9, 0x2F, 0xE0, 0x20,
+        0xFC, 0x1D, 0xCD, 0x55, 0x66, 0xF5, 0xAC, 0x32, 0x00, 0x8E,
+        0xA3, 0xE9, 0xED, 0xFB, 0x35, 0xA7, 0xE6, 0x76, 0x53, 0x42,
+        0xC6, 0x77, 0x77, 0xAB, 0x90, 0x99, 0x7C, 0xC2, 0xEC, 0xC9,
+        0x18, 0x4A, 0x3C, 0xF4, 0x11, 0x75, 0x27, 0x83, 0xBD, 0x9E,
+        0xC2, 0x8F, 0x23, 0xAB, 0x52, 0x46, 0xE2, 0x52, 0x5D, 0x9A,
+        0x04, 0xC3, 0x15, 0x1F, 0x69, 0x9C, 0x72, 0x69, 0x59, 0x52,
+        0xD4, 0x69, 0x3D, 0x19, 0x77, 0x36, 0x25, 0xAF, 0x07, 0x71,
+        0x82, 0xDE, 0xB7, 0x24, 0x60, 0x82, 0x6A, 0x72, 0xBB, 0xED,
+        0xB6, 0x76, 0xAE, 0x7E, 0xBC, 0x7D, 0x2F, 0x73, 0x4B, 0x04,
+        0x16, 0xD5, 0xA4, 0xF3, 0x03, 0x26, 0xFB, 0xF3, 0xCD, 0x7B,
+        0x77, 0x7E, 0x7C, 0x8D, 0x65, 0xAE, 0xA5, 0xDC, 0x6C, 0xE3,
+        0x70, 0xD2, 0x29, 0x6B, 0xF2, 0xEB, 0x76, 0xC9, 0xE5, 0x46,
+        0x18, 0x12, 0x57, 0xB0, 0x55, 0xA5, 0x7C, 0xCD, 0x41, 0x93,
+        0x26, 0x99, 0xF7, 0xA5, 0xC5, 0x34, 0xBE, 0x59, 0x79, 0xDE,
+        0x0A, 0x57, 0x5F, 0x21, 0xF8, 0x98, 0x52, 0xF0, 0x2F, 0x7B,
+        0x57, 0xB6, 0x9D, 0xFC, 0x40, 0xA6, 0x55, 0xFB, 0xAF, 0xD9,
+        0x16, 0x9B, 0x20, 0x4F, 0xA8, 0xA3, 0x0B, 0x04, 0x48, 0xE3,
+        0x77, 0x22, 0xC4, 0xCC, 0x57, 0x14, 0x33, 0xA2, 0xF0, 0x9A,
+        0xE3, 0x12, 0xBD, 0xFF, 0x72, 0x8B, 0xEE, 0x52, 0xF3, 0xC9,
+        0x59, 0xC2, 0xA2, 0x6B, 0xA5, 0x75, 0x48, 0x51, 0x82, 0x0E,
+        0x7A, 0xFF, 0xFE, 0x41, 0xCD, 0x7C, 0x63, 0xD2, 0x53, 0xA8,
+        0x11, 0x03, 0xB9, 0x03, 0x07, 0xFE, 0x66, 0x38, 0x5F, 0xA2,
+        0x3E, 0x9C, 0x1B, 0x02, 0x01, 0x02
 };
 static const int sizeof_dh_key_der_3072 = sizeof(dh_key_der_3072);
 
 /* ./certs/dsa3072.der, 3072-bit */
 static const unsigned char dsa_key_der_3072[] =
 {
-	0x30, 0x82, 0x04, 0xD7, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x81, 0x00, 0xB5, 0xD0, 0x2F, 0x55, 0xC1, 0x27, 0x4C, 0x5B, 
-	0x28, 0x81, 0x4E, 0xA4, 0x32, 0x0D, 0x73, 0x54, 0x68, 0x4F, 
-	0x0A, 0x36, 0x68, 0x4A, 0x51, 0xBE, 0xDE, 0x49, 0xD4, 0x9D, 
-	0xCE, 0xC6, 0xF7, 0x01, 0x70, 0xD2, 0x88, 0x90, 0x1D, 0x60, 
-	0x30, 0x9B, 0x0A, 0x9C, 0x23, 0xDA, 0xE0, 0x74, 0x46, 0x5B, 
-	0xC7, 0x41, 0x40, 0x5C, 0xD9, 0x7A, 0xBE, 0x78, 0xCA, 0x49, 
-	0xF5, 0x2D, 0x7B, 0xD7, 0xBF, 0x67, 0x0D, 0x84, 0x28, 0xBB, 
-	0x9D, 0xC2, 0xAB, 0x23, 0x06, 0x28, 0x0C, 0x98, 0x46, 0x43, 
-	0xCE, 0x6F, 0x9E, 0xD0, 0xE9, 0x0E, 0xF3, 0x7E, 0x30, 0x5D, 
-	0xD3, 0x45, 0x44, 0x7B, 0x0C, 0x7A, 0x73, 0xA6, 0x95, 0x65, 
-	0xAA, 0x8B, 0xD8, 0x75, 0x6A, 0x11, 0xB3, 0x10, 0x7C, 0x57, 
-	0xAF, 0xCE, 0xBE, 0x5B, 0xF7, 0xC8, 0xFE, 0x42, 0xA3, 0x77, 
-	0xB7, 0x0B, 0x3D, 0x66, 0xB5, 0x08, 0x74, 0x22, 0x74, 0x26, 
-	0xE6, 0xDB, 0x8E, 0xEF, 0xA3, 0x99, 0xAE, 0x0B, 0x42, 0x8C, 
-	0x5F, 0x7E, 0x48, 0xE9, 0x19, 0x90, 0xA8, 0x35, 0xA9, 0xFC, 
-	0x48, 0x0D, 0xC8, 0xB8, 0xE4, 0x1A, 0x0C, 0x26, 0xC7, 0x1A, 
-	0x20, 0x02, 0xEB, 0x72, 0x2E, 0x94, 0xD6, 0x19, 0x34, 0x39, 
-	0x55, 0x4E, 0xFC, 0x53, 0x48, 0xD8, 0x10, 0x89, 0xA1, 0x6E, 
-	0x22, 0x39, 0x71, 0x15, 0xA6, 0x13, 0xBC, 0x77, 0x49, 0x53, 
-	0xCB, 0x16, 0x4B, 0x56, 0x3D, 0x08, 0xA2, 0x71, 0x0E, 0x06, 
-	0x0C, 0x3A, 0xDE, 0x82, 0xC0, 0xDF, 0xE7, 0x96, 0x57, 0xD7, 
-	0x3F, 0x6B, 0xF0, 0xAE, 0xD1, 0x38, 0xB8, 0x5B, 0x83, 0x77, 
-	0x8B, 0xEB, 0x2B, 0xDA, 0x38, 0xC8, 0x4C, 0xA9, 0x48, 0x52, 
-	0xD8, 0x41, 0x03, 0xD3, 0x11, 0x1C, 0x66, 0x9E, 0xDE, 0xC9, 
-	0x78, 0x5A, 0xE1, 0x7B, 0xEA, 0x6F, 0xD6, 0xCA, 0x6A, 0x2F, 
-	0x01, 0xB2, 0x83, 0x37, 0x25, 0xD9, 0x9C, 0xD4, 0xB0, 0x21, 
-	0xD9, 0x8F, 0xA6, 0xF8, 0xD6, 0x21, 0x82, 0xBB, 0x08, 0x64, 
-	0x28, 0x0E, 0x0C, 0x26, 0xE6, 0xA5, 0x69, 0xE0, 0x23, 0xE9, 
-	0xB3, 0xC4, 0xF9, 0xDE, 0xC6, 0xD6, 0x32, 0x00, 0x66, 0x9B, 
-	0x8A, 0x0B, 0x6F, 0xDE, 0xB8, 0xDD, 0x68, 0x7F, 0x9D, 0x68, 
-	0x59, 0x6B, 0x55, 0xD9, 0x53, 0x01, 0x7B, 0x1A, 0x1C, 0x8D, 
-	0xBF, 0xAF, 0xC0, 0xB1, 0x14, 0x9E, 0xC1, 0x8D, 0x3E, 0x1E, 
-	0xFB, 0x40, 0xF9, 0x6D, 0x48, 0x43, 0xCD, 0x6C, 0xE8, 0xBC, 
-	0x3C, 0x7C, 0x35, 0x3C, 0x65, 0x6D, 0xA0, 0x25, 0x87, 0xBF, 
-	0xEC, 0x9B, 0x12, 0x74, 0x48, 0xC8, 0xE4, 0xBF, 0x53, 0x53, 
-	0x47, 0x78, 0xD9, 0x9B, 0x1A, 0xA5, 0x07, 0x46, 0x15, 0x16, 
-	0xD2, 0x33, 0x93, 0xCC, 0x41, 0x9B, 0xB7, 0x22, 0xDF, 0x07, 
-	0xDD, 0x72, 0xC6, 0x1A, 0x9B, 0x92, 0xE7, 0x32, 0x04, 0xAB, 
-	0x94, 0x80, 0xBD, 0x58, 0xF2, 0x35, 0x02, 0x21, 0x00, 0x9A, 
-	0xDD, 0x98, 0x1A, 0x6F, 0xEA, 0xB5, 0x8B, 0xC9, 0x68, 0x18, 
-	0x81, 0xE4, 0x4C, 0xFD, 0x8E, 0x45, 0xCF, 0x5F, 0x0E, 0x62, 
-	0x1E, 0x7D, 0x2D, 0x4A, 0x4C, 0x5D, 0x7F, 0xF8, 0xD8, 0x52, 
-	0xD7, 0x02, 0x82, 0x01, 0x81, 0x00, 0x84, 0xDF, 0xAB, 0x91, 
-	0x61, 0xE4, 0x2B, 0x07, 0x0A, 0x1C, 0xC7, 0x9C, 0xD7, 0xAC, 
-	0x8D, 0xA5, 0xAA, 0x41, 0x65, 0x9E, 0x4A, 0xED, 0x21, 0x45, 
-	0x96, 0xF7, 0xF7, 0xCB, 0x7A, 0x88, 0x19, 0x0F, 0x36, 0x80, 
-	0x25, 0x2F, 0x23, 0x0D, 0xFF, 0x6C, 0x0D, 0x02, 0xBB, 0x6A, 
-	0x79, 0x6A, 0xCB, 0x05, 0x00, 0x9B, 0x77, 0xED, 0x6B, 0xF3, 
-	0xC2, 0xEA, 0x1A, 0xDF, 0xB8, 0x15, 0xA8, 0x92, 0x19, 0x5A, 
-	0x51, 0x3B, 0x76, 0x06, 0x98, 0x47, 0xC7, 0x6F, 0x76, 0x99, 
-	0xAD, 0x50, 0xC5, 0x98, 0xE7, 0xFF, 0x88, 0xBC, 0x49, 0x77, 
-	0xEF, 0x96, 0x75, 0xE2, 0x36, 0x66, 0x1F, 0x0C, 0xFA, 0x57, 
-	0x1E, 0x11, 0xFF, 0x8F, 0x3C, 0xD0, 0xEA, 0x97, 0x25, 0x3F, 
-	0xFA, 0xD1, 0x4F, 0xBA, 0xDF, 0xE3, 0x35, 0xFB, 0x6E, 0x5C, 
-	0x65, 0xF9, 0xA2, 0x26, 0x43, 0xF2, 0xF4, 0xE0, 0x05, 0x3D, 
-	0xC6, 0x5B, 0xC4, 0x21, 0xE7, 0xB1, 0x02, 0xEB, 0xF2, 0xA9, 
-	0x06, 0x5E, 0xB7, 0x1B, 0xC1, 0xD8, 0x86, 0x34, 0xED, 0x84, 
-	0x89, 0xCE, 0xCE, 0xC2, 0x63, 0x78, 0x67, 0xF8, 0xC3, 0xAA, 
-	0x7C, 0x1C, 0x59, 0x32, 0xE4, 0x77, 0xA2, 0x36, 0x31, 0xFE, 
-	0x4B, 0x9C, 0x98, 0xCE, 0x01, 0x55, 0x61, 0xCE, 0x23, 0xAE, 
-	0x0F, 0x7E, 0x24, 0x8B, 0x54, 0x8A, 0xE4, 0xCB, 0x8E, 0xDC, 
-	0x7A, 0x94, 0x4C, 0xF9, 0x3C, 0xF8, 0x67, 0x68, 0x9D, 0x7A, 
-	0x82, 0xA1, 0xA0, 0x01, 0xC7, 0x1B, 0x8D, 0xA0, 0xC0, 0x53, 
-	0x1E, 0x93, 0xC7, 0x86, 0x12, 0xD3, 0x16, 0xDC, 0x28, 0xA0, 
-	0xD1, 0x0D, 0x1E, 0x42, 0x9A, 0xCB, 0x55, 0x8C, 0x22, 0x7F, 
-	0x41, 0xC3, 0xC9, 0x14, 0xF2, 0xB0, 0x73, 0xA1, 0x4D, 0x72, 
-	0xFD, 0x88, 0xB6, 0xDE, 0xE5, 0xF0, 0x3C, 0x3A, 0x7E, 0x68, 
-	0x3E, 0x82, 0x58, 0x60, 0xCD, 0xB4, 0x08, 0x64, 0x18, 0xB2, 
-	0x24, 0x97, 0x13, 0xA6, 0x07, 0x75, 0xBE, 0xE0, 0x14, 0x92, 
-	0x9A, 0x98, 0x6C, 0x08, 0x94, 0xD1, 0x0D, 0x48, 0x44, 0xC3, 
-	0xE3, 0xD5, 0xC0, 0x93, 0x49, 0x79, 0x2F, 0x67, 0x15, 0x76, 
-	0xD8, 0x90, 0x11, 0xDB, 0xEC, 0xA7, 0xE2, 0xDB, 0xD4, 0x4F, 
-	0x49, 0x5E, 0xEF, 0xC5, 0xB9, 0x77, 0x69, 0xDA, 0x02, 0xB7, 
-	0x23, 0xBC, 0xEA, 0xDC, 0x84, 0xD4, 0xA5, 0x5C, 0xA2, 0x6C, 
-	0xAD, 0x4A, 0x9F, 0xF0, 0x65, 0x48, 0xE9, 0xBF, 0xDF, 0xA5, 
-	0xB3, 0x99, 0xD6, 0x76, 0x08, 0x87, 0x2C, 0xF2, 0x29, 0x79, 
-	0xB2, 0x20, 0x7C, 0x6F, 0xC1, 0xC5, 0x3C, 0xB0, 0x50, 0x3F, 
-	0x72, 0xA5, 0x57, 0xE3, 0xB0, 0x62, 0x18, 0x80, 0x71, 0xB9, 
-	0x3F, 0x4D, 0x4E, 0x7C, 0xF6, 0x29, 0xDB, 0xB8, 0xAD, 0xF6, 
-	0x41, 0x69, 0x06, 0x90, 0x45, 0x7B, 0x95, 0x03, 0xE1, 0x9E, 
-	0xA5, 0xA1, 0x5A, 0xE3, 0x08, 0x26, 0x73, 0xFC, 0x2B, 0x20, 
-	0x02, 0x82, 0x01, 0x81, 0x00, 0xA5, 0x52, 0x8F, 0x53, 0xF0, 
-	0xB9, 0x4F, 0x06, 0xB9, 0xC8, 0xB4, 0x50, 0xA4, 0x39, 0xBA, 
-	0x12, 0x92, 0x75, 0x27, 0x43, 0xA8, 0x30, 0xA9, 0xF2, 0x2A, 
-	0xC6, 0x93, 0x26, 0x3C, 0x8C, 0x9F, 0xA2, 0x6F, 0x53, 0xD9, 
-	0x14, 0xAB, 0x3F, 0x00, 0xC6, 0x11, 0x13, 0x90, 0x6A, 0x42, 
-	0xF2, 0x9D, 0xA3, 0x8F, 0x31, 0x32, 0x46, 0x73, 0xA3, 0x93, 
-	0x57, 0x5D, 0x76, 0x45, 0x49, 0x6C, 0xBD, 0xEA, 0xAF, 0xAA, 
-	0xB3, 0x55, 0x25, 0x11, 0x8E, 0xA5, 0x2A, 0xB1, 0xBA, 0xA5, 
-	0x06, 0x4A, 0x66, 0xAA, 0x78, 0x9E, 0xF6, 0x5C, 0x1E, 0xB1, 
-	0x4A, 0xCA, 0x5C, 0x3F, 0x1D, 0x33, 0x75, 0x91, 0xF2, 0xF9, 
-	0x53, 0x14, 0x2F, 0xDC, 0xF0, 0x4C, 0xA4, 0xF4, 0x50, 0x04, 
-	0x1F, 0xFF, 0xC9, 0x0C, 0xC6, 0x8A, 0x04, 0x8B, 0x80, 0x87, 
-	0xA7, 0x70, 0x49, 0xD7, 0xE4, 0xE7, 0x83, 0xF1, 0x86, 0x1A, 
-	0xB0, 0x85, 0x3C, 0x59, 0x04, 0x96, 0xD1, 0x85, 0x47, 0xA1, 
-	0x57, 0x7D, 0xC6, 0x8E, 0x60, 0x7D, 0xC6, 0xE8, 0x18, 0xB3, 
-	0x1F, 0xB8, 0x99, 0xF0, 0xC4, 0xE5, 0x1E, 0xBC, 0x34, 0x07, 
-	0x8E, 0x40, 0x57, 0xA5, 0x8D, 0x3A, 0xA3, 0x88, 0x96, 0xF1, 
-	0xB3, 0x61, 0xF1, 0x1C, 0x96, 0x8A, 0xA4, 0x9E, 0xCD, 0x21, 
-	0xA2, 0x94, 0xAE, 0x5E, 0x1F, 0xCD, 0x5B, 0x5B, 0xE3, 0x88, 
-	0x1E, 0x17, 0x4A, 0x46, 0xAB, 0x9C, 0xE0, 0x59, 0x03, 0x4A, 
-	0xB8, 0xC8, 0x83, 0xE7, 0xFF, 0x39, 0x27, 0x68, 0x80, 0xA0, 
-	0x8E, 0xB3, 0xA2, 0x00, 0xC6, 0x2D, 0x2C, 0x76, 0xBA, 0x90, 
-	0x7C, 0x03, 0x1B, 0x19, 0xC8, 0x33, 0xB2, 0x12, 0x3A, 0xC8, 
-	0x8D, 0x32, 0xFE, 0xC0, 0xF9, 0xA5, 0x6A, 0x63, 0xE2, 0xA4, 
-	0x12, 0x43, 0x19, 0xF5, 0x14, 0xF2, 0x27, 0xF8, 0x0B, 0xBD, 
-	0x1A, 0x22, 0x64, 0x2D, 0xC9, 0x05, 0xFA, 0xD8, 0xDD, 0x11, 
-	0x1A, 0xD3, 0xF2, 0xBC, 0x99, 0x3A, 0xCD, 0x21, 0xCF, 0x10, 
-	0x14, 0x36, 0xDF, 0xED, 0x66, 0x02, 0x03, 0x4A, 0x42, 0x70, 
-	0x71, 0x22, 0xAD, 0xE7, 0x53, 0x91, 0xF4, 0x40, 0x8F, 0x72, 
-	0x7E, 0x54, 0xA0, 0x5D, 0x58, 0x93, 0xD6, 0xF6, 0xBC, 0x87, 
-	0x1A, 0x68, 0x0F, 0xAB, 0x94, 0x20, 0x70, 0xC2, 0x11, 0xA1, 
-	0x14, 0xBC, 0x06, 0xA8, 0x44, 0xB9, 0x1F, 0x04, 0x49, 0x7E, 
-	0xB3, 0x9A, 0x53, 0x46, 0x05, 0x75, 0x5D, 0x29, 0x77, 0x28, 
-	0xA9, 0xB1, 0xDC, 0xF1, 0x0D, 0x8A, 0x1C, 0x5E, 0xCD, 0xD7, 
-	0x4C, 0x16, 0x6F, 0x88, 0xBF, 0xB3, 0x34, 0xE2, 0xAD, 0x9A, 
-	0xC4, 0x89, 0xE2, 0x2E, 0x5C, 0x20, 0xE1, 0x5F, 0x39, 0xBF, 
-	0xB7, 0x45, 0xD3, 0x0F, 0x98, 0xB0, 0xD8, 0xC9, 0x18, 0x91, 
-	0x17, 0x25, 0xBC, 0x53, 0x62, 0xFF, 0x27, 0x85, 0xBD, 0xE2, 
-	0xE3, 0x9C, 0xA8, 0x06, 0x7A, 0x54, 0xEA, 0xFD, 0xEA, 0x02, 
-	0x20, 0x4C, 0xAC, 0x69, 0x62, 0x08, 0xE5, 0xCD, 0x14, 0xC8, 
-	0x2D, 0x4E, 0xDF, 0x1F, 0x60, 0x1D, 0x93, 0x44, 0x86, 0x5D, 
-	0x73, 0x99, 0x40, 0x1B, 0xDC, 0xA9, 0xBA, 0xC4, 0x1B, 0x12, 
-	0x6C, 0xFF, 0x53
+        0x30, 0x82, 0x04, 0xD7, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x81, 0x00, 0xB5, 0xD0, 0x2F, 0x55, 0xC1, 0x27, 0x4C, 0x5B,
+        0x28, 0x81, 0x4E, 0xA4, 0x32, 0x0D, 0x73, 0x54, 0x68, 0x4F,
+        0x0A, 0x36, 0x68, 0x4A, 0x51, 0xBE, 0xDE, 0x49, 0xD4, 0x9D,
+        0xCE, 0xC6, 0xF7, 0x01, 0x70, 0xD2, 0x88, 0x90, 0x1D, 0x60,
+        0x30, 0x9B, 0x0A, 0x9C, 0x23, 0xDA, 0xE0, 0x74, 0x46, 0x5B,
+        0xC7, 0x41, 0x40, 0x5C, 0xD9, 0x7A, 0xBE, 0x78, 0xCA, 0x49,
+        0xF5, 0x2D, 0x7B, 0xD7, 0xBF, 0x67, 0x0D, 0x84, 0x28, 0xBB,
+        0x9D, 0xC2, 0xAB, 0x23, 0x06, 0x28, 0x0C, 0x98, 0x46, 0x43,
+        0xCE, 0x6F, 0x9E, 0xD0, 0xE9, 0x0E, 0xF3, 0x7E, 0x30, 0x5D,
+        0xD3, 0x45, 0x44, 0x7B, 0x0C, 0x7A, 0x73, 0xA6, 0x95, 0x65,
+        0xAA, 0x8B, 0xD8, 0x75, 0x6A, 0x11, 0xB3, 0x10, 0x7C, 0x57,
+        0xAF, 0xCE, 0xBE, 0x5B, 0xF7, 0xC8, 0xFE, 0x42, 0xA3, 0x77,
+        0xB7, 0x0B, 0x3D, 0x66, 0xB5, 0x08, 0x74, 0x22, 0x74, 0x26,
+        0xE6, 0xDB, 0x8E, 0xEF, 0xA3, 0x99, 0xAE, 0x0B, 0x42, 0x8C,
+        0x5F, 0x7E, 0x48, 0xE9, 0x19, 0x90, 0xA8, 0x35, 0xA9, 0xFC,
+        0x48, 0x0D, 0xC8, 0xB8, 0xE4, 0x1A, 0x0C, 0x26, 0xC7, 0x1A,
+        0x20, 0x02, 0xEB, 0x72, 0x2E, 0x94, 0xD6, 0x19, 0x34, 0x39,
+        0x55, 0x4E, 0xFC, 0x53, 0x48, 0xD8, 0x10, 0x89, 0xA1, 0x6E,
+        0x22, 0x39, 0x71, 0x15, 0xA6, 0x13, 0xBC, 0x77, 0x49, 0x53,
+        0xCB, 0x16, 0x4B, 0x56, 0x3D, 0x08, 0xA2, 0x71, 0x0E, 0x06,
+        0x0C, 0x3A, 0xDE, 0x82, 0xC0, 0xDF, 0xE7, 0x96, 0x57, 0xD7,
+        0x3F, 0x6B, 0xF0, 0xAE, 0xD1, 0x38, 0xB8, 0x5B, 0x83, 0x77,
+        0x8B, 0xEB, 0x2B, 0xDA, 0x38, 0xC8, 0x4C, 0xA9, 0x48, 0x52,
+        0xD8, 0x41, 0x03, 0xD3, 0x11, 0x1C, 0x66, 0x9E, 0xDE, 0xC9,
+        0x78, 0x5A, 0xE1, 0x7B, 0xEA, 0x6F, 0xD6, 0xCA, 0x6A, 0x2F,
+        0x01, 0xB2, 0x83, 0x37, 0x25, 0xD9, 0x9C, 0xD4, 0xB0, 0x21,
+        0xD9, 0x8F, 0xA6, 0xF8, 0xD6, 0x21, 0x82, 0xBB, 0x08, 0x64,
+        0x28, 0x0E, 0x0C, 0x26, 0xE6, 0xA5, 0x69, 0xE0, 0x23, 0xE9,
+        0xB3, 0xC4, 0xF9, 0xDE, 0xC6, 0xD6, 0x32, 0x00, 0x66, 0x9B,
+        0x8A, 0x0B, 0x6F, 0xDE, 0xB8, 0xDD, 0x68, 0x7F, 0x9D, 0x68,
+        0x59, 0x6B, 0x55, 0xD9, 0x53, 0x01, 0x7B, 0x1A, 0x1C, 0x8D,
+        0xBF, 0xAF, 0xC0, 0xB1, 0x14, 0x9E, 0xC1, 0x8D, 0x3E, 0x1E,
+        0xFB, 0x40, 0xF9, 0x6D, 0x48, 0x43, 0xCD, 0x6C, 0xE8, 0xBC,
+        0x3C, 0x7C, 0x35, 0x3C, 0x65, 0x6D, 0xA0, 0x25, 0x87, 0xBF,
+        0xEC, 0x9B, 0x12, 0x74, 0x48, 0xC8, 0xE4, 0xBF, 0x53, 0x53,
+        0x47, 0x78, 0xD9, 0x9B, 0x1A, 0xA5, 0x07, 0x46, 0x15, 0x16,
+        0xD2, 0x33, 0x93, 0xCC, 0x41, 0x9B, 0xB7, 0x22, 0xDF, 0x07,
+        0xDD, 0x72, 0xC6, 0x1A, 0x9B, 0x92, 0xE7, 0x32, 0x04, 0xAB,
+        0x94, 0x80, 0xBD, 0x58, 0xF2, 0x35, 0x02, 0x21, 0x00, 0x9A,
+        0xDD, 0x98, 0x1A, 0x6F, 0xEA, 0xB5, 0x8B, 0xC9, 0x68, 0x18,
+        0x81, 0xE4, 0x4C, 0xFD, 0x8E, 0x45, 0xCF, 0x5F, 0x0E, 0x62,
+        0x1E, 0x7D, 0x2D, 0x4A, 0x4C, 0x5D, 0x7F, 0xF8, 0xD8, 0x52,
+        0xD7, 0x02, 0x82, 0x01, 0x81, 0x00, 0x84, 0xDF, 0xAB, 0x91,
+        0x61, 0xE4, 0x2B, 0x07, 0x0A, 0x1C, 0xC7, 0x9C, 0xD7, 0xAC,
+        0x8D, 0xA5, 0xAA, 0x41, 0x65, 0x9E, 0x4A, 0xED, 0x21, 0x45,
+        0x96, 0xF7, 0xF7, 0xCB, 0x7A, 0x88, 0x19, 0x0F, 0x36, 0x80,
+        0x25, 0x2F, 0x23, 0x0D, 0xFF, 0x6C, 0x0D, 0x02, 0xBB, 0x6A,
+        0x79, 0x6A, 0xCB, 0x05, 0x00, 0x9B, 0x77, 0xED, 0x6B, 0xF3,
+        0xC2, 0xEA, 0x1A, 0xDF, 0xB8, 0x15, 0xA8, 0x92, 0x19, 0x5A,
+        0x51, 0x3B, 0x76, 0x06, 0x98, 0x47, 0xC7, 0x6F, 0x76, 0x99,
+        0xAD, 0x50, 0xC5, 0x98, 0xE7, 0xFF, 0x88, 0xBC, 0x49, 0x77,
+        0xEF, 0x96, 0x75, 0xE2, 0x36, 0x66, 0x1F, 0x0C, 0xFA, 0x57,
+        0x1E, 0x11, 0xFF, 0x8F, 0x3C, 0xD0, 0xEA, 0x97, 0x25, 0x3F,
+        0xFA, 0xD1, 0x4F, 0xBA, 0xDF, 0xE3, 0x35, 0xFB, 0x6E, 0x5C,
+        0x65, 0xF9, 0xA2, 0x26, 0x43, 0xF2, 0xF4, 0xE0, 0x05, 0x3D,
+        0xC6, 0x5B, 0xC4, 0x21, 0xE7, 0xB1, 0x02, 0xEB, 0xF2, 0xA9,
+        0x06, 0x5E, 0xB7, 0x1B, 0xC1, 0xD8, 0x86, 0x34, 0xED, 0x84,
+        0x89, 0xCE, 0xCE, 0xC2, 0x63, 0x78, 0x67, 0xF8, 0xC3, 0xAA,
+        0x7C, 0x1C, 0x59, 0x32, 0xE4, 0x77, 0xA2, 0x36, 0x31, 0xFE,
+        0x4B, 0x9C, 0x98, 0xCE, 0x01, 0x55, 0x61, 0xCE, 0x23, 0xAE,
+        0x0F, 0x7E, 0x24, 0x8B, 0x54, 0x8A, 0xE4, 0xCB, 0x8E, 0xDC,
+        0x7A, 0x94, 0x4C, 0xF9, 0x3C, 0xF8, 0x67, 0x68, 0x9D, 0x7A,
+        0x82, 0xA1, 0xA0, 0x01, 0xC7, 0x1B, 0x8D, 0xA0, 0xC0, 0x53,
+        0x1E, 0x93, 0xC7, 0x86, 0x12, 0xD3, 0x16, 0xDC, 0x28, 0xA0,
+        0xD1, 0x0D, 0x1E, 0x42, 0x9A, 0xCB, 0x55, 0x8C, 0x22, 0x7F,
+        0x41, 0xC3, 0xC9, 0x14, 0xF2, 0xB0, 0x73, 0xA1, 0x4D, 0x72,
+        0xFD, 0x88, 0xB6, 0xDE, 0xE5, 0xF0, 0x3C, 0x3A, 0x7E, 0x68,
+        0x3E, 0x82, 0x58, 0x60, 0xCD, 0xB4, 0x08, 0x64, 0x18, 0xB2,
+        0x24, 0x97, 0x13, 0xA6, 0x07, 0x75, 0xBE, 0xE0, 0x14, 0x92,
+        0x9A, 0x98, 0x6C, 0x08, 0x94, 0xD1, 0x0D, 0x48, 0x44, 0xC3,
+        0xE3, 0xD5, 0xC0, 0x93, 0x49, 0x79, 0x2F, 0x67, 0x15, 0x76,
+        0xD8, 0x90, 0x11, 0xDB, 0xEC, 0xA7, 0xE2, 0xDB, 0xD4, 0x4F,
+        0x49, 0x5E, 0xEF, 0xC5, 0xB9, 0x77, 0x69, 0xDA, 0x02, 0xB7,
+        0x23, 0xBC, 0xEA, 0xDC, 0x84, 0xD4, 0xA5, 0x5C, 0xA2, 0x6C,
+        0xAD, 0x4A, 0x9F, 0xF0, 0x65, 0x48, 0xE9, 0xBF, 0xDF, 0xA5,
+        0xB3, 0x99, 0xD6, 0x76, 0x08, 0x87, 0x2C, 0xF2, 0x29, 0x79,
+        0xB2, 0x20, 0x7C, 0x6F, 0xC1, 0xC5, 0x3C, 0xB0, 0x50, 0x3F,
+        0x72, 0xA5, 0x57, 0xE3, 0xB0, 0x62, 0x18, 0x80, 0x71, 0xB9,
+        0x3F, 0x4D, 0x4E, 0x7C, 0xF6, 0x29, 0xDB, 0xB8, 0xAD, 0xF6,
+        0x41, 0x69, 0x06, 0x90, 0x45, 0x7B, 0x95, 0x03, 0xE1, 0x9E,
+        0xA5, 0xA1, 0x5A, 0xE3, 0x08, 0x26, 0x73, 0xFC, 0x2B, 0x20,
+        0x02, 0x82, 0x01, 0x81, 0x00, 0xA5, 0x52, 0x8F, 0x53, 0xF0,
+        0xB9, 0x4F, 0x06, 0xB9, 0xC8, 0xB4, 0x50, 0xA4, 0x39, 0xBA,
+        0x12, 0x92, 0x75, 0x27, 0x43, 0xA8, 0x30, 0xA9, 0xF2, 0x2A,
+        0xC6, 0x93, 0x26, 0x3C, 0x8C, 0x9F, 0xA2, 0x6F, 0x53, 0xD9,
+        0x14, 0xAB, 0x3F, 0x00, 0xC6, 0x11, 0x13, 0x90, 0x6A, 0x42,
+        0xF2, 0x9D, 0xA3, 0x8F, 0x31, 0x32, 0x46, 0x73, 0xA3, 0x93,
+        0x57, 0x5D, 0x76, 0x45, 0x49, 0x6C, 0xBD, 0xEA, 0xAF, 0xAA,
+        0xB3, 0x55, 0x25, 0x11, 0x8E, 0xA5, 0x2A, 0xB1, 0xBA, 0xA5,
+        0x06, 0x4A, 0x66, 0xAA, 0x78, 0x9E, 0xF6, 0x5C, 0x1E, 0xB1,
+        0x4A, 0xCA, 0x5C, 0x3F, 0x1D, 0x33, 0x75, 0x91, 0xF2, 0xF9,
+        0x53, 0x14, 0x2F, 0xDC, 0xF0, 0x4C, 0xA4, 0xF4, 0x50, 0x04,
+        0x1F, 0xFF, 0xC9, 0x0C, 0xC6, 0x8A, 0x04, 0x8B, 0x80, 0x87,
+        0xA7, 0x70, 0x49, 0xD7, 0xE4, 0xE7, 0x83, 0xF1, 0x86, 0x1A,
+        0xB0, 0x85, 0x3C, 0x59, 0x04, 0x96, 0xD1, 0x85, 0x47, 0xA1,
+        0x57, 0x7D, 0xC6, 0x8E, 0x60, 0x7D, 0xC6, 0xE8, 0x18, 0xB3,
+        0x1F, 0xB8, 0x99, 0xF0, 0xC4, 0xE5, 0x1E, 0xBC, 0x34, 0x07,
+        0x8E, 0x40, 0x57, 0xA5, 0x8D, 0x3A, 0xA3, 0x88, 0x96, 0xF1,
+        0xB3, 0x61, 0xF1, 0x1C, 0x96, 0x8A, 0xA4, 0x9E, 0xCD, 0x21,
+        0xA2, 0x94, 0xAE, 0x5E, 0x1F, 0xCD, 0x5B, 0x5B, 0xE3, 0x88,
+        0x1E, 0x17, 0x4A, 0x46, 0xAB, 0x9C, 0xE0, 0x59, 0x03, 0x4A,
+        0xB8, 0xC8, 0x83, 0xE7, 0xFF, 0x39, 0x27, 0x68, 0x80, 0xA0,
+        0x8E, 0xB3, 0xA2, 0x00, 0xC6, 0x2D, 0x2C, 0x76, 0xBA, 0x90,
+        0x7C, 0x03, 0x1B, 0x19, 0xC8, 0x33, 0xB2, 0x12, 0x3A, 0xC8,
+        0x8D, 0x32, 0xFE, 0xC0, 0xF9, 0xA5, 0x6A, 0x63, 0xE2, 0xA4,
+        0x12, 0x43, 0x19, 0xF5, 0x14, 0xF2, 0x27, 0xF8, 0x0B, 0xBD,
+        0x1A, 0x22, 0x64, 0x2D, 0xC9, 0x05, 0xFA, 0xD8, 0xDD, 0x11,
+        0x1A, 0xD3, 0xF2, 0xBC, 0x99, 0x3A, 0xCD, 0x21, 0xCF, 0x10,
+        0x14, 0x36, 0xDF, 0xED, 0x66, 0x02, 0x03, 0x4A, 0x42, 0x70,
+        0x71, 0x22, 0xAD, 0xE7, 0x53, 0x91, 0xF4, 0x40, 0x8F, 0x72,
+        0x7E, 0x54, 0xA0, 0x5D, 0x58, 0x93, 0xD6, 0xF6, 0xBC, 0x87,
+        0x1A, 0x68, 0x0F, 0xAB, 0x94, 0x20, 0x70, 0xC2, 0x11, 0xA1,
+        0x14, 0xBC, 0x06, 0xA8, 0x44, 0xB9, 0x1F, 0x04, 0x49, 0x7E,
+        0xB3, 0x9A, 0x53, 0x46, 0x05, 0x75, 0x5D, 0x29, 0x77, 0x28,
+        0xA9, 0xB1, 0xDC, 0xF1, 0x0D, 0x8A, 0x1C, 0x5E, 0xCD, 0xD7,
+        0x4C, 0x16, 0x6F, 0x88, 0xBF, 0xB3, 0x34, 0xE2, 0xAD, 0x9A,
+        0xC4, 0x89, 0xE2, 0x2E, 0x5C, 0x20, 0xE1, 0x5F, 0x39, 0xBF,
+        0xB7, 0x45, 0xD3, 0x0F, 0x98, 0xB0, 0xD8, 0xC9, 0x18, 0x91,
+        0x17, 0x25, 0xBC, 0x53, 0x62, 0xFF, 0x27, 0x85, 0xBD, 0xE2,
+        0xE3, 0x9C, 0xA8, 0x06, 0x7A, 0x54, 0xEA, 0xFD, 0xEA, 0x02,
+        0x20, 0x4C, 0xAC, 0x69, 0x62, 0x08, 0xE5, 0xCD, 0x14, 0xC8,
+        0x2D, 0x4E, 0xDF, 0x1F, 0x60, 0x1D, 0x93, 0x44, 0x86, 0x5D,
+        0x73, 0x99, 0x40, 0x1B, 0xDC, 0xA9, 0xBA, 0xC4, 0x1B, 0x12,
+        0x6C, 0xFF, 0x53
 };
 static const int sizeof_dsa_key_der_3072 = sizeof(dsa_key_der_3072);
 
 /* ./certs/rsa3072.der, 3072-bit */
 static const unsigned char rsa_key_der_3072[] =
 {
-	0x30, 0x82, 0x06, 0xE4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01, 
-	0x81, 0x00, 0xBC, 0x6D, 0x68, 0xFF, 0xC0, 0x07, 0x0E, 0x0C, 
-	0x4A, 0xE6, 0x76, 0x1F, 0x7A, 0x25, 0x3A, 0x75, 0xA7, 0xE2, 
-	0xF1, 0x17, 0x00, 0xF8, 0x85, 0xE6, 0x8F, 0x59, 0x14, 0xA7, 
-	0xDE, 0x8C, 0x74, 0x4B, 0xEB, 0x85, 0xEC, 0x49, 0x9B, 0xFF, 
-	0x4B, 0x43, 0x0A, 0x08, 0xA1, 0xEC, 0x64, 0x58, 0x47, 0x28, 
-	0xD5, 0xCE, 0x48, 0xE9, 0xCF, 0x34, 0xDF, 0x15, 0x20, 0x37, 
-	0x99, 0x0E, 0x3C, 0x81, 0xBE, 0x2E, 0xE4, 0x6C, 0xBB, 0xDE, 
-	0xD1, 0x93, 0xC5, 0xEC, 0x6C, 0xCC, 0x40, 0x0B, 0x46, 0xA1, 
-	0xE6, 0xCA, 0xA0, 0xD5, 0x3B, 0x44, 0x48, 0x79, 0x67, 0x52, 
-	0x6F, 0xDA, 0xED, 0x73, 0x8B, 0x7C, 0x33, 0xDA, 0x17, 0x96, 
-	0xE8, 0xA2, 0x91, 0x3C, 0x57, 0xDD, 0xC9, 0x2E, 0x01, 0x74, 
-	0x87, 0x33, 0xA0, 0x12, 0x7C, 0xBB, 0xF9, 0x53, 0xF4, 0xC4, 
-	0x31, 0x48, 0x53, 0xCB, 0xBB, 0x3C, 0x42, 0x43, 0x0C, 0x7A, 
-	0x7B, 0xB8, 0x2A, 0xFC, 0xDC, 0x70, 0xD5, 0x64, 0x16, 0x74, 
-	0xA8, 0x80, 0xDE, 0x16, 0xE0, 0xB2, 0x6C, 0x04, 0x47, 0x6C, 
-	0x25, 0xA6, 0x7F, 0xB4, 0x73, 0x49, 0xBC, 0xF3, 0xAE, 0xE3, 
-	0x93, 0x36, 0x87, 0x2B, 0xB7, 0x8F, 0xB5, 0x88, 0x88, 0x22, 
-	0x47, 0xDF, 0xBF, 0x4D, 0x3C, 0x2A, 0xBD, 0x3F, 0x2F, 0x11, 
-	0x29, 0xCC, 0x1C, 0x33, 0x40, 0x4E, 0x23, 0xF6, 0x25, 0xF0, 
-	0xAF, 0x02, 0x16, 0x48, 0xED, 0x1C, 0xD8, 0xC9, 0x92, 0x2F, 
-	0x5B, 0xAF, 0xBA, 0xDB, 0x60, 0x1E, 0x0E, 0xE1, 0x65, 0x91, 
-	0x96, 0xF8, 0x7D, 0x73, 0x4C, 0x72, 0x23, 0x33, 0xD5, 0x32, 
-	0x2B, 0x0F, 0x4F, 0xBC, 0x81, 0x45, 0x9E, 0x31, 0x76, 0xEF, 
-	0xE1, 0x76, 0x2D, 0x3F, 0x8F, 0xC4, 0x19, 0x8F, 0x27, 0x2A, 
-	0x8F, 0x6E, 0x76, 0xCC, 0xE0, 0x5D, 0xB0, 0x86, 0x66, 0xFE, 
-	0x72, 0xD9, 0x06, 0x40, 0xB6, 0xCE, 0x85, 0xC6, 0x2D, 0x34, 
-	0x33, 0xAA, 0x8E, 0xE5, 0x54, 0x8E, 0xB8, 0xBA, 0xEE, 0x92, 
-	0x07, 0x5D, 0xB5, 0xF1, 0x67, 0xBF, 0xCA, 0xE4, 0xCA, 0xCB, 
-	0xD9, 0x01, 0x73, 0x22, 0x01, 0x32, 0x39, 0xF4, 0x0A, 0xEC, 
-	0x5F, 0x4A, 0x00, 0x10, 0x3F, 0x01, 0x3D, 0x15, 0xBB, 0x55, 
-	0x91, 0x80, 0xBE, 0xD8, 0xD3, 0x59, 0xCC, 0xB0, 0x7C, 0x56, 
-	0xF7, 0xFF, 0xE0, 0x28, 0x40, 0x02, 0xB3, 0x98, 0x8A, 0x54, 
-	0x52, 0x60, 0xA5, 0x0B, 0x95, 0x53, 0x86, 0x6B, 0xA4, 0x35, 
-	0xCA, 0x04, 0xC7, 0xFB, 0x0A, 0xC8, 0x9D, 0x5A, 0x11, 0x40, 
-	0xF7, 0x60, 0x07, 0xB1, 0xB3, 0x42, 0xB6, 0x80, 0x8F, 0xE4, 
-	0x25, 0xC9, 0xE8, 0xBC, 0x8E, 0x21, 0x0D, 0x47, 0xCF, 0xB8, 
-	0x37, 0x09, 0xAF, 0xBF, 0x2C, 0x34, 0x09, 0x22, 0xC2, 0x6E, 
-	0x0D, 0x06, 0x30, 0x80, 0x1E, 0xA5, 0x8A, 0x46, 0x2D, 0xDC, 
-	0x57, 0xD4, 0x57, 0x82, 0x6A, 0x11, 0x02, 0x03, 0x01, 0x00, 
-	0x01, 0x02, 0x82, 0x01, 0x81, 0x00, 0xAD, 0x99, 0xAF, 0xCF, 
-	0x51, 0x40, 0x2E, 0xB5, 0x2C, 0x9C, 0xBF, 0xDF, 0xA8, 0x4D, 
-	0x7C, 0x5A, 0xC1, 0xDE, 0xD8, 0x78, 0x75, 0x30, 0x83, 0x4D, 
-	0x34, 0x6C, 0xC2, 0x17, 0x17, 0x77, 0x17, 0xFE, 0x8A, 0x73, 
-	0xCC, 0x8A, 0xD4, 0xEA, 0x94, 0x90, 0xA3, 0x41, 0xE8, 0xCD, 
-	0x3E, 0x76, 0x06, 0xB9, 0x9C, 0xA2, 0x7D, 0x92, 0xCC, 0x90, 
-	0xCD, 0xA7, 0x4D, 0x13, 0x6C, 0x34, 0x2D, 0x92, 0xEB, 0x81, 
-	0x90, 0x7A, 0x8D, 0x6C, 0x70, 0x72, 0x51, 0x3B, 0xCD, 0xD1, 
-	0x30, 0x80, 0x33, 0x07, 0x1E, 0xF7, 0x38, 0xCE, 0xBB, 0xD7, 
-	0xE1, 0x5D, 0xD8, 0xCF, 0x9E, 0xB6, 0x79, 0x66, 0xA6, 0xF0, 
-	0x3B, 0x65, 0x87, 0xAE, 0x45, 0x8E, 0xE1, 0x78, 0x53, 0x0B, 
-	0xC7, 0x3A, 0x57, 0xA4, 0xE0, 0x9B, 0xB3, 0xB2, 0xD4, 0xB0, 
-	0xEA, 0xB9, 0x6B, 0x1D, 0x06, 0xBA, 0xB8, 0x59, 0x4F, 0x9B, 
-	0xE9, 0x00, 0x95, 0x12, 0x93, 0xC1, 0xCD, 0xF9, 0x41, 0xAF, 
-	0xC3, 0x2A, 0x7F, 0x75, 0xE3, 0x79, 0x37, 0x24, 0xA4, 0xC8, 
-	0x3D, 0xB4, 0x83, 0x89, 0x23, 0xF7, 0x0E, 0x59, 0x56, 0x8E, 
-	0x6D, 0x43, 0xA5, 0xB1, 0x8E, 0x04, 0x02, 0xED, 0x48, 0x25, 
-	0x62, 0xFE, 0xF3, 0x4D, 0x82, 0x22, 0xA6, 0xC1, 0xA5, 0xD9, 
-	0x4A, 0x9A, 0x57, 0xE6, 0xDC, 0x37, 0x6D, 0x13, 0xDA, 0xFF, 
-	0x23, 0x2A, 0xB9, 0x31, 0xD2, 0x4B, 0x7D, 0xF3, 0x02, 0x90, 
-	0xF6, 0x28, 0x3D, 0x98, 0x3C, 0xF6, 0x43, 0x45, 0xAE, 0xAB, 
-	0x91, 0x15, 0xC7, 0xC4, 0x90, 0x9C, 0x3E, 0xDA, 0xD4, 0x20, 
-	0x12, 0xB2, 0xE1, 0x2B, 0x56, 0xE2, 0x38, 0x32, 0x9C, 0xE6, 
-	0xA9, 0x1D, 0xFE, 0xA5, 0xEE, 0xD7, 0x52, 0xB4, 0xE3, 0xE4, 
-	0x65, 0xEA, 0x41, 0x9D, 0xD4, 0x91, 0x83, 0x5D, 0xFF, 0x52, 
-	0xA7, 0xC3, 0x42, 0x9F, 0x14, 0x70, 0x9F, 0x98, 0x14, 0xB2, 
-	0x33, 0xEE, 0x4C, 0x5A, 0xC9, 0x5F, 0x16, 0xF6, 0x06, 0xE9, 
-	0xF3, 0x39, 0xD2, 0xC5, 0x31, 0x53, 0x2A, 0x39, 0xED, 0x3A, 
-	0x4D, 0x2A, 0xC1, 0x4C, 0x87, 0x82, 0xC6, 0xCA, 0xCF, 0xF5, 
-	0x9A, 0x71, 0x27, 0xAE, 0xFB, 0xFE, 0xD0, 0x66, 0xDB, 0xAA, 
-	0x03, 0x16, 0x4B, 0xEF, 0xB4, 0x28, 0xAB, 0xCF, 0xBE, 0x9B, 
-	0x58, 0xCF, 0xA4, 0x58, 0x82, 0xD2, 0x37, 0x8C, 0xEA, 0x3D, 
-	0x75, 0x4D, 0x0B, 0x46, 0x7A, 0x04, 0xDE, 0xF1, 0x6E, 0xBB, 
-	0x03, 0xBF, 0xF7, 0x8E, 0xE6, 0xF4, 0x9A, 0xE1, 0xCA, 0x26, 
-	0x2C, 0x41, 0x08, 0xAD, 0x21, 0xA7, 0xC2, 0x40, 0xF5, 0x9C, 
-	0xDD, 0xAB, 0xC5, 0x5A, 0x4C, 0xF4, 0xE6, 0x9A, 0x50, 0xFD, 
-	0xAA, 0x47, 0xD6, 0xA6, 0x07, 0x25, 0xB2, 0x4B, 0x9C, 0x1D, 
-	0x90, 0xA2, 0x4A, 0x98, 0xE0, 0x05, 0x8A, 0x5C, 0xD1, 0x2C, 
-	0xC0, 0x28, 0xD1, 0x84, 0x3C, 0x72, 0xFF, 0x83, 0xEA, 0xB1, 
-	0x02, 0x81, 0xC1, 0x00, 0xF8, 0xA0, 0x5F, 0x25, 0x2E, 0x23, 
-	0x73, 0x30, 0xB6, 0x97, 0xAF, 0x08, 0xE7, 0xD2, 0xD8, 0xC3, 
-	0x95, 0xEA, 0x9D, 0x8E, 0x9F, 0xF1, 0x36, 0x81, 0xD7, 0x7A, 
-	0x21, 0x2B, 0x90, 0x38, 0x9C, 0xA6, 0x08, 0x40, 0xEA, 0xD2, 
-	0x6E, 0x29, 0xB5, 0x0B, 0x3E, 0x91, 0xB2, 0x04, 0x92, 0xCF, 
-	0x94, 0xFF, 0xA6, 0xA7, 0x1A, 0x5F, 0x93, 0x0C, 0x86, 0xE6, 
-	0x4B, 0x61, 0xD4, 0x5E, 0xD7, 0xE3, 0x66, 0x0B, 0x83, 0xDB, 
-	0x16, 0x49, 0x27, 0xD5, 0xA3, 0xB3, 0xF5, 0x5D, 0x8F, 0xC9, 
-	0x48, 0x10, 0xD7, 0x77, 0x1E, 0x7B, 0x01, 0xC4, 0xFD, 0x14, 
-	0x0C, 0xAB, 0x40, 0xF7, 0x9B, 0x07, 0xDE, 0x55, 0xEF, 0x36, 
-	0x4C, 0x22, 0x37, 0x37, 0x09, 0x9D, 0x2A, 0x73, 0xA6, 0xA5, 
-	0xF4, 0xAF, 0x39, 0x2B, 0x87, 0xB4, 0xB2, 0x28, 0x9E, 0x08, 
-	0xA6, 0xCA, 0xB4, 0x39, 0x5A, 0x3A, 0xFB, 0x41, 0x93, 0xEC, 
-	0x44, 0xBB, 0xD2, 0x7C, 0x3B, 0x27, 0x3E, 0x26, 0xFD, 0x7B, 
-	0x20, 0xFC, 0x44, 0x67, 0xC0, 0x84, 0xD1, 0xA0, 0xCC, 0xBB, 
-	0x26, 0xC7, 0x32, 0x0E, 0x01, 0x9B, 0x2B, 0x1F, 0x58, 0x85, 
-	0x5A, 0x6C, 0xD0, 0xC1, 0xAC, 0x14, 0x5E, 0x06, 0x07, 0xCA, 
-	0x69, 0x52, 0xF5, 0xA6, 0x16, 0x75, 0x42, 0x8A, 0xE1, 0xBA, 
-	0x8B, 0x46, 0x38, 0x17, 0x7B, 0xF1, 0x7D, 0x79, 0x1F, 0x7E, 
-	0x4C, 0x6A, 0x75, 0xDC, 0xA8, 0x3B, 0x02, 0x81, 0xC1, 0x00, 
-	0xC2, 0x03, 0xFE, 0x57, 0xDF, 0x26, 0xD8, 0x79, 0xDC, 0x2C, 
-	0x47, 0x9B, 0x92, 0x9B, 0x53, 0x40, 0x82, 0xEC, 0xBD, 0x0B, 
-	0xC0, 0x96, 0x89, 0x21, 0xC5, 0x26, 0x7E, 0x7A, 0x59, 0xA7, 
-	0x85, 0x11, 0xCC, 0x39, 0x33, 0xA7, 0xE6, 0x42, 0x9C, 0x12, 
-	0x81, 0xA0, 0x87, 0xBC, 0x57, 0x07, 0xC4, 0x51, 0x93, 0x59, 
-	0xC6, 0xAB, 0x11, 0xCC, 0xCB, 0xC8, 0xC1, 0x40, 0xDF, 0xCB, 
-	0xE8, 0x45, 0x31, 0x20, 0x91, 0x88, 0x5F, 0x76, 0x76, 0xEE, 
-	0x30, 0x37, 0xFA, 0xA7, 0x22, 0x72, 0x82, 0x50, 0x31, 0xE9, 
-	0xA0, 0x44, 0xCA, 0xDD, 0xD6, 0xAC, 0xEC, 0x82, 0xE8, 0x62, 
-	0xD8, 0x43, 0xFD, 0x77, 0x0F, 0x1C, 0x23, 0x12, 0x91, 0x1C, 
-	0xFE, 0x93, 0x2C, 0x87, 0x52, 0xBF, 0x96, 0x79, 0x5E, 0x3A, 
-	0x5A, 0x33, 0x28, 0x27, 0x3F, 0x20, 0x2C, 0xB3, 0x26, 0xE2, 
-	0x0D, 0x44, 0xA9, 0x2F, 0x39, 0x7B, 0x7B, 0xAD, 0xA3, 0x21, 
-	0xD2, 0x7F, 0x3C, 0x89, 0x63, 0xDD, 0x13, 0xB1, 0x2E, 0xD6, 
-	0x34, 0xFB, 0x2A, 0x83, 0x29, 0xE7, 0x8A, 0x88, 0xD7, 0xA3, 
-	0x38, 0x3C, 0x43, 0x62, 0x8F, 0x69, 0xFA, 0x4B, 0x15, 0xB5, 
-	0xF6, 0x59, 0x90, 0x62, 0x7D, 0xCF, 0x1D, 0xDD, 0x49, 0x43, 
-	0x33, 0x96, 0xA9, 0xF7, 0x76, 0x9F, 0xE4, 0x0D, 0x6E, 0x1C, 
-	0xEA, 0x18, 0x5B, 0xBD, 0x5C, 0x98, 0x90, 0x09, 0xCA, 0x59, 
-	0x9E, 0x23, 0x02, 0x81, 0xC0, 0x66, 0xFF, 0x99, 0x2A, 0xFF, 
-	0xF8, 0x33, 0xAA, 0x44, 0x9A, 0x86, 0x2A, 0xBC, 0x4F, 0x3E, 
-	0xF9, 0x97, 0xCB, 0xC0, 0x45, 0xEB, 0xC0, 0xB4, 0x02, 0x0A, 
-	0x50, 0x50, 0x19, 0x89, 0xFF, 0xC9, 0xF5, 0x86, 0x89, 0xCE, 
-	0x3E, 0x2A, 0xE1, 0x20, 0x5D, 0x6E, 0x28, 0x51, 0x85, 0x4F, 
-	0x84, 0xAB, 0x87, 0x55, 0x74, 0xF8, 0x9A, 0x0B, 0x83, 0x2F, 
-	0x07, 0x8C, 0xC7, 0x14, 0x81, 0xCE, 0x12, 0x28, 0x9E, 0x30, 
-	0x9B, 0xBC, 0x99, 0xC5, 0xE4, 0xDD, 0x92, 0x99, 0xDD, 0x8E, 
-	0xC9, 0xA6, 0x0F, 0x44, 0x13, 0xD7, 0x0E, 0xC2, 0x66, 0xE7, 
-	0x29, 0x3D, 0x2E, 0x5D, 0x15, 0xB6, 0xA6, 0x05, 0xD7, 0xB7, 
-	0xE7, 0xD8, 0x96, 0x7C, 0x25, 0x52, 0xD8, 0x47, 0x53, 0xED, 
-	0xFF, 0xE6, 0x64, 0x08, 0xDD, 0x1D, 0xB5, 0x1F, 0xF1, 0x6F, 
-	0xB6, 0xC9, 0xD2, 0x43, 0xE3, 0x56, 0x9C, 0x04, 0xA6, 0xE0, 
-	0x2F, 0x0B, 0x32, 0x7C, 0x3A, 0x77, 0x0F, 0x04, 0xD2, 0x86, 
-	0x44, 0x52, 0x1F, 0xEF, 0xFE, 0xC3, 0x64, 0xC2, 0xAB, 0x48, 
-	0xE5, 0x67, 0x65, 0x32, 0x39, 0x57, 0x34, 0xFF, 0x22, 0x57, 
-	0x3B, 0xB7, 0x80, 0x48, 0xE3, 0x52, 0xF4, 0x85, 0x17, 0x1E, 
-	0x77, 0x1E, 0x36, 0xFE, 0x09, 0x36, 0x58, 0x91, 0x9E, 0x93, 
-	0x71, 0x02, 0x6D, 0xAE, 0xA3, 0x1B, 0xF7, 0xA9, 0x31, 0x5A, 
-	0x78, 0xAA, 0x13, 0x98, 0x8C, 0x37, 0x2D, 0x02, 0x81, 0xC1, 
-	0x00, 0xBE, 0x01, 0xD9, 0x3A, 0xC7, 0x81, 0xAC, 0xAA, 0x13, 
-	0x75, 0x8E, 0x1F, 0x8F, 0x41, 0xED, 0x13, 0x95, 0xE5, 0x31, 
-	0xF3, 0x6B, 0x86, 0x42, 0x00, 0xBF, 0xAA, 0xC6, 0x5D, 0x1E, 
-	0xA6, 0x90, 0x0C, 0xF1, 0x1B, 0xE8, 0x39, 0xFB, 0xA8, 0xAA, 
-	0x5E, 0xF9, 0x72, 0x74, 0xDC, 0x7F, 0xC3, 0x4C, 0x81, 0xB3, 
-	0xB4, 0x4D, 0x7B, 0xC6, 0x2F, 0xF2, 0x37, 0xC7, 0x03, 0xB8, 
-	0xE9, 0x62, 0xAD, 0x38, 0xC2, 0xB3, 0xA4, 0x82, 0x11, 0x6B, 
-	0xC2, 0x33, 0x98, 0xEF, 0x32, 0x75, 0xEA, 0xFD, 0x32, 0x7A, 
-	0xDF, 0x59, 0xA5, 0x65, 0xA4, 0x42, 0x95, 0x11, 0xFF, 0xD6, 
-	0x84, 0xCF, 0x56, 0x2E, 0xCA, 0x46, 0x13, 0x01, 0x4A, 0x32, 
-	0xB1, 0xD9, 0xA3, 0xDB, 0x0D, 0x20, 0x7E, 0x1F, 0x68, 0xF7, 
-	0x5E, 0x60, 0x6E, 0x0F, 0x59, 0xF8, 0x59, 0x93, 0x4D, 0x54, 
-	0xBC, 0x37, 0xD0, 0x51, 0x7C, 0xBD, 0x67, 0xF0, 0xA5, 0x09, 
-	0xC9, 0x9A, 0xF4, 0x1F, 0x1E, 0x52, 0x9D, 0xF5, 0xA6, 0x25, 
-	0xBF, 0x85, 0x1D, 0xA1, 0xF1, 0xD8, 0xBD, 0x39, 0x10, 0x71, 
-	0x57, 0x19, 0x40, 0xF3, 0xA1, 0x77, 0xE0, 0x8B, 0x4E, 0xB3, 
-	0x91, 0x84, 0x15, 0x0C, 0xF1, 0x58, 0x52, 0xD9, 0xE5, 0x98, 
-	0xD5, 0x66, 0x95, 0x9C, 0x19, 0x8D, 0xA4, 0x63, 0x5C, 0xBF, 
-	0xC5, 0x33, 0x81, 0xED, 0x7E, 0x93, 0x4B, 0x9A, 0x6C, 0xEC, 
-	0x2E, 0x3E, 0x4F, 0x02, 0x81, 0xC0, 0x34, 0xF8, 0xDF, 0x74, 
-	0xC6, 0xC1, 0xD9, 0x03, 0x9B, 0x3B, 0x53, 0x19, 0xEB, 0x43, 
-	0xC4, 0xAA, 0x1E, 0x73, 0xE3, 0x13, 0x25, 0x32, 0x04, 0x22, 
-	0x79, 0x4A, 0x07, 0xF0, 0x06, 0x38, 0xBD, 0x57, 0xE6, 0x01, 
-	0x33, 0x8C, 0xF1, 0x02, 0xCC, 0x34, 0x2C, 0x60, 0x32, 0xA4, 
-	0x22, 0x1D, 0x0E, 0x39, 0x6B, 0xAB, 0xF7, 0xCE, 0xDB, 0xA7, 
-	0xC3, 0xD8, 0xA2, 0x3B, 0x70, 0x31, 0x91, 0x68, 0xB9, 0xBF, 
-	0xE0, 0xA1, 0x39, 0x80, 0xFE, 0x47, 0x99, 0x56, 0x6D, 0x76, 
-	0x90, 0x17, 0xF5, 0x67, 0x41, 0x44, 0x27, 0x10, 0x07, 0x98, 
-	0x4D, 0x4C, 0x53, 0xD4, 0x15, 0xDC, 0x0A, 0x2F, 0xE0, 0x83, 
-	0x28, 0x22, 0x8D, 0x61, 0x3B, 0xE4, 0x8E, 0xE5, 0xE7, 0x24, 
-	0x98, 0x19, 0xA8, 0xA3, 0xED, 0x70, 0x59, 0x06, 0x86, 0x76, 
-	0xC2, 0x4B, 0xCB, 0x17, 0xC5, 0x77, 0x12, 0x07, 0xB8, 0xAB, 
-	0x1A, 0x91, 0xFC, 0x72, 0x8E, 0xB7, 0xB1, 0xE6, 0x74, 0xDD, 
-	0x3D, 0x92, 0xA7, 0xDE, 0x6C, 0x6E, 0xCB, 0x50, 0x44, 0x2F, 
-	0xAC, 0x99, 0xF7, 0x36, 0x4D, 0x62, 0xC7, 0xAC, 0xCE, 0x7D, 
-	0x26, 0xC9, 0xD2, 0x4E, 0x49, 0xD7, 0x8E, 0x66, 0x6C, 0xC1, 
-	0x53, 0xDF, 0x31, 0xAB, 0x25, 0x35, 0xCA, 0xD6, 0xC4, 0xA3, 
-	0xA6, 0x9F, 0x7E, 0x3D, 0x2D, 0x1A, 0x44, 0x31, 0x3D, 0x81, 
-	0x91, 0xB8, 0x36, 0x08, 0x27, 0x42, 0x9E, 0x08
+        0x30, 0x82, 0x06, 0xE4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x81, 0x00, 0xBC, 0x6D, 0x68, 0xFF, 0xC0, 0x07, 0x0E, 0x0C,
+        0x4A, 0xE6, 0x76, 0x1F, 0x7A, 0x25, 0x3A, 0x75, 0xA7, 0xE2,
+        0xF1, 0x17, 0x00, 0xF8, 0x85, 0xE6, 0x8F, 0x59, 0x14, 0xA7,
+        0xDE, 0x8C, 0x74, 0x4B, 0xEB, 0x85, 0xEC, 0x49, 0x9B, 0xFF,
+        0x4B, 0x43, 0x0A, 0x08, 0xA1, 0xEC, 0x64, 0x58, 0x47, 0x28,
+        0xD5, 0xCE, 0x48, 0xE9, 0xCF, 0x34, 0xDF, 0x15, 0x20, 0x37,
+        0x99, 0x0E, 0x3C, 0x81, 0xBE, 0x2E, 0xE4, 0x6C, 0xBB, 0xDE,
+        0xD1, 0x93, 0xC5, 0xEC, 0x6C, 0xCC, 0x40, 0x0B, 0x46, 0xA1,
+        0xE6, 0xCA, 0xA0, 0xD5, 0x3B, 0x44, 0x48, 0x79, 0x67, 0x52,
+        0x6F, 0xDA, 0xED, 0x73, 0x8B, 0x7C, 0x33, 0xDA, 0x17, 0x96,
+        0xE8, 0xA2, 0x91, 0x3C, 0x57, 0xDD, 0xC9, 0x2E, 0x01, 0x74,
+        0x87, 0x33, 0xA0, 0x12, 0x7C, 0xBB, 0xF9, 0x53, 0xF4, 0xC4,
+        0x31, 0x48, 0x53, 0xCB, 0xBB, 0x3C, 0x42, 0x43, 0x0C, 0x7A,
+        0x7B, 0xB8, 0x2A, 0xFC, 0xDC, 0x70, 0xD5, 0x64, 0x16, 0x74,
+        0xA8, 0x80, 0xDE, 0x16, 0xE0, 0xB2, 0x6C, 0x04, 0x47, 0x6C,
+        0x25, 0xA6, 0x7F, 0xB4, 0x73, 0x49, 0xBC, 0xF3, 0xAE, 0xE3,
+        0x93, 0x36, 0x87, 0x2B, 0xB7, 0x8F, 0xB5, 0x88, 0x88, 0x22,
+        0x47, 0xDF, 0xBF, 0x4D, 0x3C, 0x2A, 0xBD, 0x3F, 0x2F, 0x11,
+        0x29, 0xCC, 0x1C, 0x33, 0x40, 0x4E, 0x23, 0xF6, 0x25, 0xF0,
+        0xAF, 0x02, 0x16, 0x48, 0xED, 0x1C, 0xD8, 0xC9, 0x92, 0x2F,
+        0x5B, 0xAF, 0xBA, 0xDB, 0x60, 0x1E, 0x0E, 0xE1, 0x65, 0x91,
+        0x96, 0xF8, 0x7D, 0x73, 0x4C, 0x72, 0x23, 0x33, 0xD5, 0x32,
+        0x2B, 0x0F, 0x4F, 0xBC, 0x81, 0x45, 0x9E, 0x31, 0x76, 0xEF,
+        0xE1, 0x76, 0x2D, 0x3F, 0x8F, 0xC4, 0x19, 0x8F, 0x27, 0x2A,
+        0x8F, 0x6E, 0x76, 0xCC, 0xE0, 0x5D, 0xB0, 0x86, 0x66, 0xFE,
+        0x72, 0xD9, 0x06, 0x40, 0xB6, 0xCE, 0x85, 0xC6, 0x2D, 0x34,
+        0x33, 0xAA, 0x8E, 0xE5, 0x54, 0x8E, 0xB8, 0xBA, 0xEE, 0x92,
+        0x07, 0x5D, 0xB5, 0xF1, 0x67, 0xBF, 0xCA, 0xE4, 0xCA, 0xCB,
+        0xD9, 0x01, 0x73, 0x22, 0x01, 0x32, 0x39, 0xF4, 0x0A, 0xEC,
+        0x5F, 0x4A, 0x00, 0x10, 0x3F, 0x01, 0x3D, 0x15, 0xBB, 0x55,
+        0x91, 0x80, 0xBE, 0xD8, 0xD3, 0x59, 0xCC, 0xB0, 0x7C, 0x56,
+        0xF7, 0xFF, 0xE0, 0x28, 0x40, 0x02, 0xB3, 0x98, 0x8A, 0x54,
+        0x52, 0x60, 0xA5, 0x0B, 0x95, 0x53, 0x86, 0x6B, 0xA4, 0x35,
+        0xCA, 0x04, 0xC7, 0xFB, 0x0A, 0xC8, 0x9D, 0x5A, 0x11, 0x40,
+        0xF7, 0x60, 0x07, 0xB1, 0xB3, 0x42, 0xB6, 0x80, 0x8F, 0xE4,
+        0x25, 0xC9, 0xE8, 0xBC, 0x8E, 0x21, 0x0D, 0x47, 0xCF, 0xB8,
+        0x37, 0x09, 0xAF, 0xBF, 0x2C, 0x34, 0x09, 0x22, 0xC2, 0x6E,
+        0x0D, 0x06, 0x30, 0x80, 0x1E, 0xA5, 0x8A, 0x46, 0x2D, 0xDC,
+        0x57, 0xD4, 0x57, 0x82, 0x6A, 0x11, 0x02, 0x03, 0x01, 0x00,
+        0x01, 0x02, 0x82, 0x01, 0x81, 0x00, 0xAD, 0x99, 0xAF, 0xCF,
+        0x51, 0x40, 0x2E, 0xB5, 0x2C, 0x9C, 0xBF, 0xDF, 0xA8, 0x4D,
+        0x7C, 0x5A, 0xC1, 0xDE, 0xD8, 0x78, 0x75, 0x30, 0x83, 0x4D,
+        0x34, 0x6C, 0xC2, 0x17, 0x17, 0x77, 0x17, 0xFE, 0x8A, 0x73,
+        0xCC, 0x8A, 0xD4, 0xEA, 0x94, 0x90, 0xA3, 0x41, 0xE8, 0xCD,
+        0x3E, 0x76, 0x06, 0xB9, 0x9C, 0xA2, 0x7D, 0x92, 0xCC, 0x90,
+        0xCD, 0xA7, 0x4D, 0x13, 0x6C, 0x34, 0x2D, 0x92, 0xEB, 0x81,
+        0x90, 0x7A, 0x8D, 0x6C, 0x70, 0x72, 0x51, 0x3B, 0xCD, 0xD1,
+        0x30, 0x80, 0x33, 0x07, 0x1E, 0xF7, 0x38, 0xCE, 0xBB, 0xD7,
+        0xE1, 0x5D, 0xD8, 0xCF, 0x9E, 0xB6, 0x79, 0x66, 0xA6, 0xF0,
+        0x3B, 0x65, 0x87, 0xAE, 0x45, 0x8E, 0xE1, 0x78, 0x53, 0x0B,
+        0xC7, 0x3A, 0x57, 0xA4, 0xE0, 0x9B, 0xB3, 0xB2, 0xD4, 0xB0,
+        0xEA, 0xB9, 0x6B, 0x1D, 0x06, 0xBA, 0xB8, 0x59, 0x4F, 0x9B,
+        0xE9, 0x00, 0x95, 0x12, 0x93, 0xC1, 0xCD, 0xF9, 0x41, 0xAF,
+        0xC3, 0x2A, 0x7F, 0x75, 0xE3, 0x79, 0x37, 0x24, 0xA4, 0xC8,
+        0x3D, 0xB4, 0x83, 0x89, 0x23, 0xF7, 0x0E, 0x59, 0x56, 0x8E,
+        0x6D, 0x43, 0xA5, 0xB1, 0x8E, 0x04, 0x02, 0xED, 0x48, 0x25,
+        0x62, 0xFE, 0xF3, 0x4D, 0x82, 0x22, 0xA6, 0xC1, 0xA5, 0xD9,
+        0x4A, 0x9A, 0x57, 0xE6, 0xDC, 0x37, 0x6D, 0x13, 0xDA, 0xFF,
+        0x23, 0x2A, 0xB9, 0x31, 0xD2, 0x4B, 0x7D, 0xF3, 0x02, 0x90,
+        0xF6, 0x28, 0x3D, 0x98, 0x3C, 0xF6, 0x43, 0x45, 0xAE, 0xAB,
+        0x91, 0x15, 0xC7, 0xC4, 0x90, 0x9C, 0x3E, 0xDA, 0xD4, 0x20,
+        0x12, 0xB2, 0xE1, 0x2B, 0x56, 0xE2, 0x38, 0x32, 0x9C, 0xE6,
+        0xA9, 0x1D, 0xFE, 0xA5, 0xEE, 0xD7, 0x52, 0xB4, 0xE3, 0xE4,
+        0x65, 0xEA, 0x41, 0x9D, 0xD4, 0x91, 0x83, 0x5D, 0xFF, 0x52,
+        0xA7, 0xC3, 0x42, 0x9F, 0x14, 0x70, 0x9F, 0x98, 0x14, 0xB2,
+        0x33, 0xEE, 0x4C, 0x5A, 0xC9, 0x5F, 0x16, 0xF6, 0x06, 0xE9,
+        0xF3, 0x39, 0xD2, 0xC5, 0x31, 0x53, 0x2A, 0x39, 0xED, 0x3A,
+        0x4D, 0x2A, 0xC1, 0x4C, 0x87, 0x82, 0xC6, 0xCA, 0xCF, 0xF5,
+        0x9A, 0x71, 0x27, 0xAE, 0xFB, 0xFE, 0xD0, 0x66, 0xDB, 0xAA,
+        0x03, 0x16, 0x4B, 0xEF, 0xB4, 0x28, 0xAB, 0xCF, 0xBE, 0x9B,
+        0x58, 0xCF, 0xA4, 0x58, 0x82, 0xD2, 0x37, 0x8C, 0xEA, 0x3D,
+        0x75, 0x4D, 0x0B, 0x46, 0x7A, 0x04, 0xDE, 0xF1, 0x6E, 0xBB,
+        0x03, 0xBF, 0xF7, 0x8E, 0xE6, 0xF4, 0x9A, 0xE1, 0xCA, 0x26,
+        0x2C, 0x41, 0x08, 0xAD, 0x21, 0xA7, 0xC2, 0x40, 0xF5, 0x9C,
+        0xDD, 0xAB, 0xC5, 0x5A, 0x4C, 0xF4, 0xE6, 0x9A, 0x50, 0xFD,
+        0xAA, 0x47, 0xD6, 0xA6, 0x07, 0x25, 0xB2, 0x4B, 0x9C, 0x1D,
+        0x90, 0xA2, 0x4A, 0x98, 0xE0, 0x05, 0x8A, 0x5C, 0xD1, 0x2C,
+        0xC0, 0x28, 0xD1, 0x84, 0x3C, 0x72, 0xFF, 0x83, 0xEA, 0xB1,
+        0x02, 0x81, 0xC1, 0x00, 0xF8, 0xA0, 0x5F, 0x25, 0x2E, 0x23,
+        0x73, 0x30, 0xB6, 0x97, 0xAF, 0x08, 0xE7, 0xD2, 0xD8, 0xC3,
+        0x95, 0xEA, 0x9D, 0x8E, 0x9F, 0xF1, 0x36, 0x81, 0xD7, 0x7A,
+        0x21, 0x2B, 0x90, 0x38, 0x9C, 0xA6, 0x08, 0x40, 0xEA, 0xD2,
+        0x6E, 0x29, 0xB5, 0x0B, 0x3E, 0x91, 0xB2, 0x04, 0x92, 0xCF,
+        0x94, 0xFF, 0xA6, 0xA7, 0x1A, 0x5F, 0x93, 0x0C, 0x86, 0xE6,
+        0x4B, 0x61, 0xD4, 0x5E, 0xD7, 0xE3, 0x66, 0x0B, 0x83, 0xDB,
+        0x16, 0x49, 0x27, 0xD5, 0xA3, 0xB3, 0xF5, 0x5D, 0x8F, 0xC9,
+        0x48, 0x10, 0xD7, 0x77, 0x1E, 0x7B, 0x01, 0xC4, 0xFD, 0x14,
+        0x0C, 0xAB, 0x40, 0xF7, 0x9B, 0x07, 0xDE, 0x55, 0xEF, 0x36,
+        0x4C, 0x22, 0x37, 0x37, 0x09, 0x9D, 0x2A, 0x73, 0xA6, 0xA5,
+        0xF4, 0xAF, 0x39, 0x2B, 0x87, 0xB4, 0xB2, 0x28, 0x9E, 0x08,
+        0xA6, 0xCA, 0xB4, 0x39, 0x5A, 0x3A, 0xFB, 0x41, 0x93, 0xEC,
+        0x44, 0xBB, 0xD2, 0x7C, 0x3B, 0x27, 0x3E, 0x26, 0xFD, 0x7B,
+        0x20, 0xFC, 0x44, 0x67, 0xC0, 0x84, 0xD1, 0xA0, 0xCC, 0xBB,
+        0x26, 0xC7, 0x32, 0x0E, 0x01, 0x9B, 0x2B, 0x1F, 0x58, 0x85,
+        0x5A, 0x6C, 0xD0, 0xC1, 0xAC, 0x14, 0x5E, 0x06, 0x07, 0xCA,
+        0x69, 0x52, 0xF5, 0xA6, 0x16, 0x75, 0x42, 0x8A, 0xE1, 0xBA,
+        0x8B, 0x46, 0x38, 0x17, 0x7B, 0xF1, 0x7D, 0x79, 0x1F, 0x7E,
+        0x4C, 0x6A, 0x75, 0xDC, 0xA8, 0x3B, 0x02, 0x81, 0xC1, 0x00,
+        0xC2, 0x03, 0xFE, 0x57, 0xDF, 0x26, 0xD8, 0x79, 0xDC, 0x2C,
+        0x47, 0x9B, 0x92, 0x9B, 0x53, 0x40, 0x82, 0xEC, 0xBD, 0x0B,
+        0xC0, 0x96, 0x89, 0x21, 0xC5, 0x26, 0x7E, 0x7A, 0x59, 0xA7,
+        0x85, 0x11, 0xCC, 0x39, 0x33, 0xA7, 0xE6, 0x42, 0x9C, 0x12,
+        0x81, 0xA0, 0x87, 0xBC, 0x57, 0x07, 0xC4, 0x51, 0x93, 0x59,
+        0xC6, 0xAB, 0x11, 0xCC, 0xCB, 0xC8, 0xC1, 0x40, 0xDF, 0xCB,
+        0xE8, 0x45, 0x31, 0x20, 0x91, 0x88, 0x5F, 0x76, 0x76, 0xEE,
+        0x30, 0x37, 0xFA, 0xA7, 0x22, 0x72, 0x82, 0x50, 0x31, 0xE9,
+        0xA0, 0x44, 0xCA, 0xDD, 0xD6, 0xAC, 0xEC, 0x82, 0xE8, 0x62,
+        0xD8, 0x43, 0xFD, 0x77, 0x0F, 0x1C, 0x23, 0x12, 0x91, 0x1C,
+        0xFE, 0x93, 0x2C, 0x87, 0x52, 0xBF, 0x96, 0x79, 0x5E, 0x3A,
+        0x5A, 0x33, 0x28, 0x27, 0x3F, 0x20, 0x2C, 0xB3, 0x26, 0xE2,
+        0x0D, 0x44, 0xA9, 0x2F, 0x39, 0x7B, 0x7B, 0xAD, 0xA3, 0x21,
+        0xD2, 0x7F, 0x3C, 0x89, 0x63, 0xDD, 0x13, 0xB1, 0x2E, 0xD6,
+        0x34, 0xFB, 0x2A, 0x83, 0x29, 0xE7, 0x8A, 0x88, 0xD7, 0xA3,
+        0x38, 0x3C, 0x43, 0x62, 0x8F, 0x69, 0xFA, 0x4B, 0x15, 0xB5,
+        0xF6, 0x59, 0x90, 0x62, 0x7D, 0xCF, 0x1D, 0xDD, 0x49, 0x43,
+        0x33, 0x96, 0xA9, 0xF7, 0x76, 0x9F, 0xE4, 0x0D, 0x6E, 0x1C,
+        0xEA, 0x18, 0x5B, 0xBD, 0x5C, 0x98, 0x90, 0x09, 0xCA, 0x59,
+        0x9E, 0x23, 0x02, 0x81, 0xC0, 0x66, 0xFF, 0x99, 0x2A, 0xFF,
+        0xF8, 0x33, 0xAA, 0x44, 0x9A, 0x86, 0x2A, 0xBC, 0x4F, 0x3E,
+        0xF9, 0x97, 0xCB, 0xC0, 0x45, 0xEB, 0xC0, 0xB4, 0x02, 0x0A,
+        0x50, 0x50, 0x19, 0x89, 0xFF, 0xC9, 0xF5, 0x86, 0x89, 0xCE,
+        0x3E, 0x2A, 0xE1, 0x20, 0x5D, 0x6E, 0x28, 0x51, 0x85, 0x4F,
+        0x84, 0xAB, 0x87, 0x55, 0x74, 0xF8, 0x9A, 0x0B, 0x83, 0x2F,
+        0x07, 0x8C, 0xC7, 0x14, 0x81, 0xCE, 0x12, 0x28, 0x9E, 0x30,
+        0x9B, 0xBC, 0x99, 0xC5, 0xE4, 0xDD, 0x92, 0x99, 0xDD, 0x8E,
+        0xC9, 0xA6, 0x0F, 0x44, 0x13, 0xD7, 0x0E, 0xC2, 0x66, 0xE7,
+        0x29, 0x3D, 0x2E, 0x5D, 0x15, 0xB6, 0xA6, 0x05, 0xD7, 0xB7,
+        0xE7, 0xD8, 0x96, 0x7C, 0x25, 0x52, 0xD8, 0x47, 0x53, 0xED,
+        0xFF, 0xE6, 0x64, 0x08, 0xDD, 0x1D, 0xB5, 0x1F, 0xF1, 0x6F,
+        0xB6, 0xC9, 0xD2, 0x43, 0xE3, 0x56, 0x9C, 0x04, 0xA6, 0xE0,
+        0x2F, 0x0B, 0x32, 0x7C, 0x3A, 0x77, 0x0F, 0x04, 0xD2, 0x86,
+        0x44, 0x52, 0x1F, 0xEF, 0xFE, 0xC3, 0x64, 0xC2, 0xAB, 0x48,
+        0xE5, 0x67, 0x65, 0x32, 0x39, 0x57, 0x34, 0xFF, 0x22, 0x57,
+        0x3B, 0xB7, 0x80, 0x48, 0xE3, 0x52, 0xF4, 0x85, 0x17, 0x1E,
+        0x77, 0x1E, 0x36, 0xFE, 0x09, 0x36, 0x58, 0x91, 0x9E, 0x93,
+        0x71, 0x02, 0x6D, 0xAE, 0xA3, 0x1B, 0xF7, 0xA9, 0x31, 0x5A,
+        0x78, 0xAA, 0x13, 0x98, 0x8C, 0x37, 0x2D, 0x02, 0x81, 0xC1,
+        0x00, 0xBE, 0x01, 0xD9, 0x3A, 0xC7, 0x81, 0xAC, 0xAA, 0x13,
+        0x75, 0x8E, 0x1F, 0x8F, 0x41, 0xED, 0x13, 0x95, 0xE5, 0x31,
+        0xF3, 0x6B, 0x86, 0x42, 0x00, 0xBF, 0xAA, 0xC6, 0x5D, 0x1E,
+        0xA6, 0x90, 0x0C, 0xF1, 0x1B, 0xE8, 0x39, 0xFB, 0xA8, 0xAA,
+        0x5E, 0xF9, 0x72, 0x74, 0xDC, 0x7F, 0xC3, 0x4C, 0x81, 0xB3,
+        0xB4, 0x4D, 0x7B, 0xC6, 0x2F, 0xF2, 0x37, 0xC7, 0x03, 0xB8,
+        0xE9, 0x62, 0xAD, 0x38, 0xC2, 0xB3, 0xA4, 0x82, 0x11, 0x6B,
+        0xC2, 0x33, 0x98, 0xEF, 0x32, 0x75, 0xEA, 0xFD, 0x32, 0x7A,
+        0xDF, 0x59, 0xA5, 0x65, 0xA4, 0x42, 0x95, 0x11, 0xFF, 0xD6,
+        0x84, 0xCF, 0x56, 0x2E, 0xCA, 0x46, 0x13, 0x01, 0x4A, 0x32,
+        0xB1, 0xD9, 0xA3, 0xDB, 0x0D, 0x20, 0x7E, 0x1F, 0x68, 0xF7,
+        0x5E, 0x60, 0x6E, 0x0F, 0x59, 0xF8, 0x59, 0x93, 0x4D, 0x54,
+        0xBC, 0x37, 0xD0, 0x51, 0x7C, 0xBD, 0x67, 0xF0, 0xA5, 0x09,
+        0xC9, 0x9A, 0xF4, 0x1F, 0x1E, 0x52, 0x9D, 0xF5, 0xA6, 0x25,
+        0xBF, 0x85, 0x1D, 0xA1, 0xF1, 0xD8, 0xBD, 0x39, 0x10, 0x71,
+        0x57, 0x19, 0x40, 0xF3, 0xA1, 0x77, 0xE0, 0x8B, 0x4E, 0xB3,
+        0x91, 0x84, 0x15, 0x0C, 0xF1, 0x58, 0x52, 0xD9, 0xE5, 0x98,
+        0xD5, 0x66, 0x95, 0x9C, 0x19, 0x8D, 0xA4, 0x63, 0x5C, 0xBF,
+        0xC5, 0x33, 0x81, 0xED, 0x7E, 0x93, 0x4B, 0x9A, 0x6C, 0xEC,
+        0x2E, 0x3E, 0x4F, 0x02, 0x81, 0xC0, 0x34, 0xF8, 0xDF, 0x74,
+        0xC6, 0xC1, 0xD9, 0x03, 0x9B, 0x3B, 0x53, 0x19, 0xEB, 0x43,
+        0xC4, 0xAA, 0x1E, 0x73, 0xE3, 0x13, 0x25, 0x32, 0x04, 0x22,
+        0x79, 0x4A, 0x07, 0xF0, 0x06, 0x38, 0xBD, 0x57, 0xE6, 0x01,
+        0x33, 0x8C, 0xF1, 0x02, 0xCC, 0x34, 0x2C, 0x60, 0x32, 0xA4,
+        0x22, 0x1D, 0x0E, 0x39, 0x6B, 0xAB, 0xF7, 0xCE, 0xDB, 0xA7,
+        0xC3, 0xD8, 0xA2, 0x3B, 0x70, 0x31, 0x91, 0x68, 0xB9, 0xBF,
+        0xE0, 0xA1, 0x39, 0x80, 0xFE, 0x47, 0x99, 0x56, 0x6D, 0x76,
+        0x90, 0x17, 0xF5, 0x67, 0x41, 0x44, 0x27, 0x10, 0x07, 0x98,
+        0x4D, 0x4C, 0x53, 0xD4, 0x15, 0xDC, 0x0A, 0x2F, 0xE0, 0x83,
+        0x28, 0x22, 0x8D, 0x61, 0x3B, 0xE4, 0x8E, 0xE5, 0xE7, 0x24,
+        0x98, 0x19, 0xA8, 0xA3, 0xED, 0x70, 0x59, 0x06, 0x86, 0x76,
+        0xC2, 0x4B, 0xCB, 0x17, 0xC5, 0x77, 0x12, 0x07, 0xB8, 0xAB,
+        0x1A, 0x91, 0xFC, 0x72, 0x8E, 0xB7, 0xB1, 0xE6, 0x74, 0xDD,
+        0x3D, 0x92, 0xA7, 0xDE, 0x6C, 0x6E, 0xCB, 0x50, 0x44, 0x2F,
+        0xAC, 0x99, 0xF7, 0x36, 0x4D, 0x62, 0xC7, 0xAC, 0xCE, 0x7D,
+        0x26, 0xC9, 0xD2, 0x4E, 0x49, 0xD7, 0x8E, 0x66, 0x6C, 0xC1,
+        0x53, 0xDF, 0x31, 0xAB, 0x25, 0x35, 0xCA, 0xD6, 0xC4, 0xA3,
+        0xA6, 0x9F, 0x7E, 0x3D, 0x2D, 0x1A, 0x44, 0x31, 0x3D, 0x81,
+        0x91, 0xB8, 0x36, 0x08, 0x27, 0x42, 0x9E, 0x08
 };
 static const int sizeof_rsa_key_der_3072 = sizeof(rsa_key_der_3072);
 
+/* ./certs/3072/client-key.der, 3072-bit */
+static const unsigned char client_key_der_3072[] =
+{
+        0x30, 0x82, 0x06, 0xE4, 0x02, 0x01, 0x00, 0x02, 0x82, 0x01,
+        0x81, 0x00, 0xAC, 0x39, 0x50, 0x68, 0x8F, 0x78, 0xF8, 0x10,
+        0x9B, 0x68, 0x96, 0xD3, 0xE1, 0x9C, 0x56, 0x68, 0x5A, 0x41,
+        0x62, 0xE3, 0xB3, 0x41, 0xB0, 0x55, 0x80, 0x17, 0xB0, 0x88,
+        0x16, 0x9B, 0xE0, 0x97, 0x74, 0x5F, 0x42, 0x79, 0x73, 0x42,
+        0xDF, 0x93, 0xF3, 0xAA, 0x9D, 0xEE, 0x2D, 0x6F, 0xAA, 0xBC,
+        0x27, 0x90, 0x84, 0xC0, 0x5D, 0xC7, 0xEC, 0x49, 0xEA, 0x5C,
+        0x66, 0x1D, 0x70, 0x9C, 0x53, 0x5C, 0xBA, 0xA1, 0xB3, 0x58,
+        0xC9, 0x3E, 0x8E, 0x9B, 0x72, 0x3D, 0x6E, 0x02, 0x02, 0x00,
+        0x9C, 0x65, 0x56, 0x82, 0xA3, 0x22, 0xB4, 0x08, 0x5F, 0x2A,
+        0xEF, 0xDF, 0x9A, 0xD0, 0xE7, 0x31, 0x59, 0x26, 0x5B, 0x0B,
+        0x1C, 0x63, 0x61, 0xFF, 0xD5, 0x69, 0x32, 0x19, 0x06, 0x7E,
+        0x0F, 0x40, 0x3C, 0x7A, 0x1E, 0xC8, 0xFC, 0x58, 0x6C, 0x64,
+        0xAE, 0x10, 0x3D, 0xA8, 0x23, 0xFF, 0x8E, 0x1A, 0xCA, 0x6A,
+        0x82, 0xE2, 0xF9, 0x01, 0x64, 0x2C, 0x97, 0xA0, 0x1A, 0x89,
+        0xA0, 0x74, 0xD3, 0xB6, 0x05, 0x11, 0xF2, 0x62, 0x06, 0x48,
+        0x2A, 0xF7, 0x66, 0xCE, 0xC1, 0x85, 0xE1, 0xD2, 0x27, 0xEA,
+        0xCA, 0x12, 0xA5, 0x91, 0x97, 0x3E, 0xFC, 0x94, 0x06, 0x59,
+        0x51, 0xC0, 0xE7, 0x13, 0xB6, 0x87, 0x7B, 0x5F, 0xD2, 0xC0,
+        0x56, 0x2F, 0x5E, 0x1D, 0x02, 0xC3, 0x11, 0x2C, 0xDF, 0xF7,
+        0x01, 0xDA, 0xBD, 0x85, 0x54, 0x35, 0x32, 0x5F, 0xC5, 0xC8,
+        0xF9, 0x7A, 0x9F, 0x89, 0xF7, 0x03, 0x0E, 0x7E, 0x79, 0x5D,
+        0x04, 0x82, 0x35, 0x10, 0xFE, 0x6D, 0x9B, 0xBF, 0xB8, 0xEE,
+        0xE2, 0x62, 0x87, 0x26, 0x5E, 0x2F, 0x50, 0x2F, 0x78, 0x0C,
+        0xE8, 0x73, 0x4F, 0x88, 0x6A, 0xD6, 0x26, 0xA4, 0xC9, 0xFC,
+        0xFA, 0x1E, 0x8A, 0xB0, 0xF4, 0x32, 0xCF, 0x57, 0xCD, 0xA1,
+        0x58, 0x8A, 0x49, 0x0F, 0xBB, 0xA9, 0x1D, 0x86, 0xAB, 0xB9,
+        0x8F, 0x8D, 0x57, 0x19, 0xB2, 0x5A, 0x7E, 0xA4, 0xEA, 0xCC,
+        0xB7, 0x96, 0x7A, 0x3B, 0x38, 0xCD, 0xDE, 0xE0, 0x61, 0xFC,
+        0xC9, 0x06, 0x8F, 0x93, 0x5A, 0xCE, 0xAD, 0x2A, 0xE3, 0x2D,
+        0x3E, 0x39, 0x5D, 0x41, 0x83, 0x01, 0x1F, 0x0F, 0xE1, 0x7F,
+        0x76, 0xC7, 0x28, 0xDA, 0x56, 0xEF, 0xBF, 0xDC, 0x26, 0x35,
+        0x40, 0xBE, 0xAD, 0xC7, 0x38, 0xAD, 0xA4, 0x06, 0xAC, 0xCA,
+        0xE8, 0x51, 0xEB, 0xC0, 0xF8, 0x68, 0x02, 0x2C, 0x9B, 0xA1,
+        0x14, 0xBC, 0xF8, 0x61, 0x86, 0xD7, 0x56, 0xD7, 0x73, 0xF4,
+        0xAB, 0xBB, 0x6A, 0x21, 0xD3, 0x88, 0x22, 0xB4, 0xE7, 0x6F,
+        0x7F, 0x91, 0xE5, 0x0E, 0xC6, 0x08, 0x49, 0xDE, 0xEA, 0x13,
+        0x58, 0x72, 0xA0, 0xAA, 0x3A, 0xF9, 0x36, 0x03, 0x45, 0x57,
+        0x5E, 0x87, 0xD2, 0x73, 0x65, 0xC4, 0x8C, 0xA3, 0xEE, 0xC9,
+        0xD6, 0x73, 0x7C, 0x96, 0x41, 0x93, 0x02, 0x03, 0x01, 0x00,
+        0x01, 0x02, 0x82, 0x01, 0x80, 0x40, 0x19, 0x74, 0xDB, 0xF5,
+        0xCA, 0x48, 0x49, 0xA6, 0x0D, 0xDF, 0x55, 0x2C, 0xFB, 0x4B,
+        0x0D, 0xBB, 0xC9, 0xEA, 0x4C, 0x65, 0x43, 0x65, 0xA5, 0xEC,
+        0xEE, 0xE4, 0x3D, 0x42, 0x6C, 0xF1, 0xC2, 0x6D, 0x05, 0xA7,
+        0x70, 0x1C, 0x7E, 0x1F, 0x48, 0xA9, 0xC0, 0x2E, 0xD7, 0x9F,
+        0x01, 0x98, 0xC2, 0x3E, 0xD7, 0x83, 0x11, 0x35, 0xD6, 0x5B,
+        0x13, 0x87, 0xAE, 0xAC, 0x32, 0xF8, 0xDE, 0xB6, 0x08, 0x25,
+        0x4E, 0x59, 0xBA, 0x09, 0xEC, 0xC6, 0x97, 0x04, 0x85, 0xE8,
+        0x93, 0xC6, 0xBB, 0x03, 0x7A, 0x94, 0x20, 0x3B, 0x27, 0x87,
+        0x6A, 0x36, 0x41, 0x7C, 0xD5, 0xF4, 0x81, 0x1C, 0x0B, 0x39,
+        0xEB, 0x14, 0xA7, 0xA6, 0x01, 0x37, 0x50, 0x48, 0xD5, 0xC6,
+        0x57, 0x9A, 0x1B, 0x01, 0x02, 0x1F, 0x80, 0x34, 0x45, 0x09,
+        0xE6, 0xBF, 0x31, 0x19, 0xB7, 0xE1, 0xBA, 0xDA, 0xEB, 0x1A,
+        0xB0, 0xCD, 0xF5, 0xA6, 0x91, 0x63, 0xAC, 0x28, 0xE4, 0x8F,
+        0xEA, 0x7E, 0xF6, 0x0A, 0x4A, 0x71, 0x21, 0xA5, 0xF1, 0x70,
+        0x0D, 0x1B, 0xD9, 0x70, 0x64, 0x74, 0x57, 0x2F, 0x9F, 0xEC,
+        0xD4, 0x93, 0x16, 0xC7, 0xEE, 0xF8, 0xC0, 0x9F, 0x52, 0x4A,
+        0x1F, 0xAD, 0xDD, 0x40, 0x98, 0x53, 0x68, 0xFA, 0xDE, 0xA2,
+        0x04, 0xA0, 0x24, 0x05, 0xEF, 0xCB, 0x4F, 0x70, 0xDF, 0xB9,
+        0x5C, 0xC2, 0x5E, 0xE4, 0xC9, 0xCD, 0x0F, 0x5E, 0x4B, 0x77,
+        0xBB, 0x84, 0x69, 0x54, 0x98, 0x41, 0xB7, 0x9C, 0x0E, 0x38,
+        0xD8, 0xF7, 0xF3, 0x9F, 0xEF, 0xE5, 0x9B, 0xB6, 0x4B, 0xD6,
+        0x7A, 0x65, 0xF5, 0x69, 0xFA, 0xC2, 0x13, 0x70, 0x6C, 0x28,
+        0xA4, 0x29, 0xAC, 0xD9, 0xBF, 0xEC, 0x6A, 0x2E, 0xED, 0xE4,
+        0xBA, 0xDF, 0xD0, 0xF1, 0xF3, 0x3C, 0x6C, 0x84, 0xDF, 0xB7,
+        0x5A, 0x94, 0xCF, 0xD9, 0x2D, 0xEA, 0xEA, 0xB4, 0xD0, 0x91,
+        0x2E, 0x77, 0x15, 0x18, 0x0D, 0x6B, 0xBA, 0x2A, 0x0C, 0xF1,
+        0x92, 0x9D, 0xD6, 0x04, 0x05, 0xB6, 0x38, 0xC2, 0xE0, 0xA7,
+        0x2D, 0x64, 0xF8, 0xDF, 0x0C, 0x3A, 0x93, 0x83, 0xE1, 0x88,
+        0x83, 0x5F, 0x67, 0x90, 0x9F, 0x2B, 0xE0, 0x60, 0x8E, 0xCA,
+        0x30, 0x13, 0xCA, 0x9F, 0xCF, 0x7B, 0x6D, 0xD8, 0xCD, 0xEE,
+        0xF9, 0x96, 0xDD, 0x5E, 0xF4, 0x47, 0xC9, 0x4C, 0xE6, 0x8F,
+        0x7F, 0x33, 0x2A, 0x38, 0x30, 0xAF, 0xD5, 0x4A, 0x79, 0x47,
+        0x06, 0xCC, 0x96, 0x44, 0x29, 0x8C, 0x60, 0x2B, 0x08, 0xC7,
+        0xD0, 0xD3, 0xC3, 0xC5, 0x2C, 0x63, 0x6C, 0x87, 0xD2, 0xAE,
+        0x2A, 0xA4, 0x86, 0xE7, 0x76, 0x74, 0x90, 0xD1, 0x04, 0x37,
+        0x64, 0x1A, 0xED, 0x08, 0xD9, 0x98, 0x07, 0x1A, 0x98, 0x0B,
+        0x89, 0x99, 0xA4, 0xB0, 0x8C, 0x1A, 0x10, 0xEB, 0xEC, 0xF4,
+        0xEE, 0x3C, 0xC4, 0x00, 0xCC, 0x30, 0x9C, 0x43, 0x01, 0x02,
+        0x81, 0xC1, 0x00, 0xD9, 0x43, 0xF6, 0x2C, 0x78, 0x26, 0xD2,
+        0xE7, 0x15, 0xA7, 0x0A, 0x88, 0x5E, 0xDB, 0x2D, 0xAF, 0xC6,
+        0xA9, 0x6F, 0x73, 0x88, 0x3B, 0x6A, 0x08, 0x1F, 0xF5, 0x80,
+        0xB5, 0x2E, 0x29, 0x8B, 0x72, 0xF8, 0x35, 0xC8, 0x23, 0x18,
+        0x1C, 0x0D, 0x0E, 0x38, 0x82, 0xBB, 0x5B, 0x2F, 0xB4, 0x5C,
+        0x4E, 0x24, 0x05, 0xA7, 0x4C, 0x79, 0x48, 0x89, 0x8D, 0x1C,
+        0x1D, 0x0A, 0x2C, 0xFE, 0xD9, 0x99, 0xDF, 0x25, 0x8A, 0x2D,
+        0xF8, 0xEB, 0x2F, 0xDA, 0x1B, 0x63, 0xE1, 0xCD, 0x09, 0x97,
+        0x64, 0x14, 0xAB, 0xEA, 0x0B, 0xD8, 0xE2, 0xA8, 0x2A, 0x63,
+        0x35, 0x90, 0xEE, 0x7F, 0xEA, 0xCE, 0xA5, 0xEF, 0x7F, 0xAB,
+        0x87, 0x47, 0x9B, 0x45, 0x35, 0x9A, 0xDA, 0x8C, 0xF4, 0xD3,
+        0x8A, 0x0B, 0x9B, 0xE6, 0xEA, 0x92, 0xBB, 0x05, 0xE1, 0xAC,
+        0x3E, 0x35, 0xDB, 0xED, 0x65, 0x1D, 0xB6, 0x92, 0xEB, 0x29,
+        0x79, 0xF8, 0x3F, 0xC2, 0x58, 0x40, 0x32, 0x66, 0x87, 0x56,
+        0x50, 0xFF, 0xBF, 0x3E, 0xBD, 0xE9, 0x94, 0xBF, 0x31, 0xBE,
+        0x87, 0x2D, 0xEF, 0x64, 0x1E, 0x0E, 0x67, 0x3A, 0x9C, 0x94,
+        0xDA, 0x5B, 0x0C, 0x8C, 0x3D, 0xEE, 0x9D, 0xCD, 0x92, 0xDE,
+        0x40, 0x02, 0x65, 0x36, 0xC9, 0x1B, 0xF5, 0x7E, 0x4E, 0x07,
+        0xB4, 0x7F, 0x14, 0x0E, 0x03, 0x2E, 0x86, 0xF0, 0x45, 0x5F,
+        0xDC, 0xA2, 0xE8, 0xC7, 0x83, 0x02, 0x81, 0xC1, 0x00, 0xCA,
+        0xED, 0xA5, 0x3F, 0x59, 0xAC, 0x4C, 0xAD, 0xAB, 0x23, 0x02,
+        0x95, 0x80, 0xA0, 0xAF, 0x35, 0x17, 0xDB, 0xE7, 0x7F, 0x72,
+        0x41, 0x2C, 0x5C, 0xB4, 0x43, 0x85, 0x46, 0x73, 0x9F, 0x58,
+        0xE9, 0x40, 0x8B, 0xEC, 0xB0, 0xEF, 0x86, 0x4C, 0x31, 0xDE,
+        0xC8, 0x6C, 0x74, 0x75, 0xA2, 0xDB, 0x65, 0xF4, 0x50, 0xC6,
+        0x99, 0xA2, 0x70, 0xDE, 0xB6, 0x22, 0xC2, 0x01, 0x15, 0x49,
+        0x13, 0xA0, 0xE2, 0x20, 0x78, 0x44, 0xEC, 0x1F, 0x42, 0xB3,
+        0x25, 0x09, 0xCE, 0x75, 0x13, 0x75, 0x36, 0x11, 0x47, 0x2C,
+        0x3C, 0x15, 0x1F, 0xF0, 0x54, 0xD5, 0x18, 0xAE, 0x61, 0x07,
+        0xAC, 0x3D, 0x83, 0x46, 0x03, 0x8C, 0xBF, 0x63, 0x26, 0xA8,
+        0x19, 0x7C, 0xFF, 0xDE, 0x20, 0x78, 0xD0, 0xDA, 0x70, 0x2E,
+        0xBD, 0xFA, 0x96, 0xDD, 0x15, 0x78, 0x9B, 0xEF, 0xED, 0x17,
+        0x90, 0x6F, 0x14, 0x35, 0x50, 0x8E, 0x1D, 0x78, 0xB0, 0x8A,
+        0xA0, 0x53, 0x10, 0x15, 0x64, 0xCC, 0x47, 0x05, 0xB6, 0xC6,
+        0x48, 0xC0, 0x5D, 0xB4, 0x4B, 0x1A, 0x5F, 0xB8, 0x9E, 0x75,
+        0xCD, 0xC3, 0x64, 0x66, 0x88, 0x10, 0x9C, 0x8B, 0x87, 0x14,
+        0x34, 0xE6, 0x60, 0x3C, 0xA5, 0xB7, 0x81, 0x1D, 0x0B, 0x79,
+        0x93, 0x5D, 0x4A, 0x42, 0x7A, 0x7F, 0x33, 0xF0, 0x3E, 0x9E,
+        0x63, 0xBD, 0xB6, 0x5F, 0xF9, 0x47, 0xA7, 0x0A, 0x49, 0x70,
+        0xB1, 0x02, 0x81, 0xC0, 0x6F, 0xC6, 0xF4, 0x3E, 0xDA, 0xAD,
+        0xF6, 0xB1, 0x66, 0xC5, 0x62, 0xB8, 0xD8, 0x3C, 0x61, 0x1B,
+        0xDE, 0xD4, 0x4A, 0xFF, 0xA0, 0x66, 0x18, 0xDE, 0x07, 0x3B,
+        0x32, 0x35, 0x84, 0x83, 0x61, 0x38, 0x0C, 0x14, 0xF7, 0x5B,
+        0x7E, 0xCA, 0xE7, 0xB8, 0x9A, 0x40, 0x40, 0x0D, 0xE0, 0xD4,
+        0x24, 0xED, 0x1A, 0xC1, 0x41, 0xDA, 0x29, 0x47, 0xB5, 0x64,
+        0xC0, 0xC2, 0xFB, 0xFA, 0x3C, 0x3F, 0x4D, 0x57, 0xAD, 0xA3,
+        0x92, 0x95, 0x4E, 0xC2, 0x76, 0xAE, 0xC2, 0xCB, 0x67, 0xC6,
+        0x78, 0x79, 0xC7, 0xDC, 0xCE, 0x73, 0xBB, 0xE8, 0x98, 0x65,
+        0xFE, 0x56, 0x8F, 0xB2, 0xF4, 0x62, 0xA4, 0x60, 0x60, 0x80,
+        0x49, 0x8A, 0x36, 0xBF, 0xDE, 0x72, 0x7E, 0xB1, 0xD3, 0xF5,
+        0x1D, 0x64, 0x17, 0x26, 0xE5, 0x3D, 0x67, 0xB2, 0x0A, 0x8B,
+        0x99, 0x27, 0x04, 0x64, 0x9A, 0x94, 0xFC, 0x1D, 0x73, 0x26,
+        0xC3, 0x56, 0xF9, 0xEE, 0x2B, 0x99, 0x65, 0xA5, 0xC8, 0x73,
+        0xF6, 0x67, 0x83, 0xBC, 0x2B, 0x96, 0x5F, 0x36, 0xE4, 0xCA,
+        0xBD, 0xE0, 0x24, 0x34, 0xD6, 0x48, 0x54, 0x56, 0xAD, 0xA3,
+        0xE3, 0x3D, 0x17, 0xBC, 0xB3, 0xE6, 0x24, 0xFE, 0x50, 0xC6,
+        0x2F, 0xCB, 0xB4, 0xAF, 0xC7, 0xE8, 0xDD, 0x96, 0x86, 0x9D,
+        0xB4, 0x7F, 0x1B, 0x26, 0x01, 0x33, 0x87, 0xDB, 0x6A, 0x7F,
+        0xF6, 0x9A, 0xB7, 0xC1, 0x94, 0xEB, 0x02, 0x81, 0xC1, 0x00,
+        0xB0, 0x6D, 0x20, 0x68, 0x0D, 0x7C, 0x81, 0x45, 0xD4, 0x2E,
+        0x22, 0x06, 0xFC, 0xC7, 0xB6, 0xCC, 0x40, 0x2C, 0x0D, 0xFE,
+        0x7D, 0xC5, 0x2F, 0xDE, 0x81, 0x52, 0xDA, 0xC2, 0x3F, 0xAF,
+        0xE0, 0x4B, 0x1A, 0xB5, 0x0C, 0x59, 0x60, 0x45, 0xB0, 0x65,
+        0x03, 0x3D, 0xD9, 0x1C, 0xFF, 0x51, 0x51, 0xD2, 0x38, 0x31,
+        0x2A, 0x19, 0x54, 0x63, 0x31, 0x1D, 0xC4, 0xE6, 0x4A, 0xAE,
+        0xC8, 0xD3, 0xE9, 0xE1, 0xEF, 0x3C, 0xE1, 0x1F, 0x30, 0xA6,
+        0x7A, 0xBD, 0xCE, 0xE2, 0xD2, 0x62, 0xD2, 0x5A, 0xE9, 0x76,
+        0xA9, 0x7C, 0xAB, 0x19, 0x13, 0x87, 0x8D, 0xA5, 0x61, 0xA6,
+        0x36, 0x57, 0x87, 0x3B, 0x64, 0x59, 0x9D, 0xBA, 0x9F, 0x67,
+        0x72, 0x6A, 0x86, 0x84, 0xA6, 0x08, 0x31, 0x41, 0xD3, 0x48,
+        0x09, 0x3B, 0x5E, 0x6C, 0x5F, 0x56, 0x55, 0x7F, 0xAD, 0x7E,
+        0xC2, 0x27, 0xEE, 0x8A, 0xF1, 0x37, 0x51, 0xF7, 0x49, 0x80,
+        0xA3, 0x65, 0x74, 0x11, 0xDD, 0xA7, 0xBE, 0xFA, 0x58, 0x7B,
+        0x69, 0xB4, 0xC2, 0x9A, 0x35, 0x2F, 0xBE, 0x84, 0x4E, 0x2C,
+        0x66, 0x5B, 0x38, 0x6F, 0x47, 0xBD, 0x30, 0x44, 0x0A, 0x02,
+        0xAC, 0x8C, 0xB9, 0x66, 0x1E, 0x14, 0x2D, 0x90, 0x71, 0x42,
+        0x12, 0xB7, 0x0E, 0x3A, 0x8B, 0xC5, 0x98, 0x65, 0xFD, 0x8F,
+        0x53, 0x81, 0x7F, 0xE4, 0xD9, 0x58, 0x0E, 0xF5, 0xA9, 0x39,
+        0xE4, 0x61, 0x02, 0x81, 0xC1, 0x00, 0xB3, 0x94, 0x8F, 0x2B,
+        0xFD, 0x84, 0x2E, 0x83, 0x42, 0x86, 0x56, 0x7E, 0xB5, 0xF8,
+        0x3C, 0xC5, 0x0C, 0xCB, 0xBD, 0x32, 0x0C, 0xD7, 0xAA, 0xA7,
+        0xB0, 0xE9, 0xA4, 0x6A, 0xD1, 0x01, 0xDB, 0x87, 0x2A, 0xF7,
+        0xDF, 0xEC, 0xC2, 0x03, 0x5D, 0x55, 0xA8, 0x66, 0x73, 0x79,
+        0xA9, 0xAB, 0xBD, 0xAF, 0x69, 0x37, 0xFE, 0x41, 0xB5, 0x53,
+        0xB3, 0xB2, 0xC0, 0xB1, 0x80, 0x34, 0xE6, 0xE1, 0x7B, 0xAE,
+        0x67, 0xC7, 0xF3, 0x57, 0xFE, 0x12, 0xBC, 0x78, 0xAA, 0x75,
+        0x0D, 0xAC, 0x79, 0x90, 0x14, 0x49, 0xFE, 0x6B, 0x51, 0xE3,
+        0xE4, 0x46, 0xB2, 0x10, 0x4D, 0x05, 0x6A, 0x12, 0x80, 0x2A,
+        0x8F, 0x39, 0x42, 0x0E, 0x3B, 0x24, 0x2B, 0x50, 0x5D, 0xF3,
+        0xA7, 0x7F, 0x2F, 0x82, 0x89, 0x87, 0x9F, 0xF8, 0x7B, 0x1E,
+        0x05, 0x6E, 0x75, 0x83, 0x04, 0x35, 0x66, 0x4A, 0x06, 0x57,
+        0x39, 0xAB, 0x21, 0x0B, 0x94, 0x41, 0x6A, 0x2A, 0xC7, 0xDE,
+        0x98, 0x45, 0x8F, 0x96, 0x1C, 0xF2, 0xD8, 0xFB, 0x9C, 0x10,
+        0x8E, 0x41, 0x7A, 0xDD, 0xDD, 0x1D, 0xEF, 0xA5, 0x67, 0xEC,
+        0xFE, 0xA3, 0x2D, 0xA9, 0xFD, 0xF3, 0xEE, 0x35, 0xF4, 0xA7,
+        0xBC, 0xF9, 0x71, 0xCC, 0xB9, 0xC0, 0x5F, 0x58, 0x5B, 0xBD,
+        0x1A, 0x9E, 0xC7, 0x08, 0x67, 0x7C, 0xC7, 0x51, 0x5B, 0xBE,
+        0xE3, 0xF8, 0xBE, 0x1E, 0xC7, 0xD2, 0x28, 0x97
+};
+static const int sizeof_client_key_der_3072 = sizeof(client_key_der_3072);
+
+/* ./certs/3072/client-keyPub.der, 3072-bit */
+static const unsigned char client_keypub_der_3072[] =
+{
+        0x30, 0x82, 0x01, 0xA2, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03,
+        0x82, 0x01, 0x8F, 0x00, 0x30, 0x82, 0x01, 0x8A, 0x02, 0x82,
+        0x01, 0x81, 0x00, 0xAC, 0x39, 0x50, 0x68, 0x8F, 0x78, 0xF8,
+        0x10, 0x9B, 0x68, 0x96, 0xD3, 0xE1, 0x9C, 0x56, 0x68, 0x5A,
+        0x41, 0x62, 0xE3, 0xB3, 0x41, 0xB0, 0x55, 0x80, 0x17, 0xB0,
+        0x88, 0x16, 0x9B, 0xE0, 0x97, 0x74, 0x5F, 0x42, 0x79, 0x73,
+        0x42, 0xDF, 0x93, 0xF3, 0xAA, 0x9D, 0xEE, 0x2D, 0x6F, 0xAA,
+        0xBC, 0x27, 0x90, 0x84, 0xC0, 0x5D, 0xC7, 0xEC, 0x49, 0xEA,
+        0x5C, 0x66, 0x1D, 0x70, 0x9C, 0x53, 0x5C, 0xBA, 0xA1, 0xB3,
+        0x58, 0xC9, 0x3E, 0x8E, 0x9B, 0x72, 0x3D, 0x6E, 0x02, 0x02,
+        0x00, 0x9C, 0x65, 0x56, 0x82, 0xA3, 0x22, 0xB4, 0x08, 0x5F,
+        0x2A, 0xEF, 0xDF, 0x9A, 0xD0, 0xE7, 0x31, 0x59, 0x26, 0x5B,
+        0x0B, 0x1C, 0x63, 0x61, 0xFF, 0xD5, 0x69, 0x32, 0x19, 0x06,
+        0x7E, 0x0F, 0x40, 0x3C, 0x7A, 0x1E, 0xC8, 0xFC, 0x58, 0x6C,
+        0x64, 0xAE, 0x10, 0x3D, 0xA8, 0x23, 0xFF, 0x8E, 0x1A, 0xCA,
+        0x6A, 0x82, 0xE2, 0xF9, 0x01, 0x64, 0x2C, 0x97, 0xA0, 0x1A,
+        0x89, 0xA0, 0x74, 0xD3, 0xB6, 0x05, 0x11, 0xF2, 0x62, 0x06,
+        0x48, 0x2A, 0xF7, 0x66, 0xCE, 0xC1, 0x85, 0xE1, 0xD2, 0x27,
+        0xEA, 0xCA, 0x12, 0xA5, 0x91, 0x97, 0x3E, 0xFC, 0x94, 0x06,
+        0x59, 0x51, 0xC0, 0xE7, 0x13, 0xB6, 0x87, 0x7B, 0x5F, 0xD2,
+        0xC0, 0x56, 0x2F, 0x5E, 0x1D, 0x02, 0xC3, 0x11, 0x2C, 0xDF,
+        0xF7, 0x01, 0xDA, 0xBD, 0x85, 0x54, 0x35, 0x32, 0x5F, 0xC5,
+        0xC8, 0xF9, 0x7A, 0x9F, 0x89, 0xF7, 0x03, 0x0E, 0x7E, 0x79,
+        0x5D, 0x04, 0x82, 0x35, 0x10, 0xFE, 0x6D, 0x9B, 0xBF, 0xB8,
+        0xEE, 0xE2, 0x62, 0x87, 0x26, 0x5E, 0x2F, 0x50, 0x2F, 0x78,
+        0x0C, 0xE8, 0x73, 0x4F, 0x88, 0x6A, 0xD6, 0x26, 0xA4, 0xC9,
+        0xFC, 0xFA, 0x1E, 0x8A, 0xB0, 0xF4, 0x32, 0xCF, 0x57, 0xCD,
+        0xA1, 0x58, 0x8A, 0x49, 0x0F, 0xBB, 0xA9, 0x1D, 0x86, 0xAB,
+        0xB9, 0x8F, 0x8D, 0x57, 0x19, 0xB2, 0x5A, 0x7E, 0xA4, 0xEA,
+        0xCC, 0xB7, 0x96, 0x7A, 0x3B, 0x38, 0xCD, 0xDE, 0xE0, 0x61,
+        0xFC, 0xC9, 0x06, 0x8F, 0x93, 0x5A, 0xCE, 0xAD, 0x2A, 0xE3,
+        0x2D, 0x3E, 0x39, 0x5D, 0x41, 0x83, 0x01, 0x1F, 0x0F, 0xE1,
+        0x7F, 0x76, 0xC7, 0x28, 0xDA, 0x56, 0xEF, 0xBF, 0xDC, 0x26,
+        0x35, 0x40, 0xBE, 0xAD, 0xC7, 0x38, 0xAD, 0xA4, 0x06, 0xAC,
+        0xCA, 0xE8, 0x51, 0xEB, 0xC0, 0xF8, 0x68, 0x02, 0x2C, 0x9B,
+        0xA1, 0x14, 0xBC, 0xF8, 0x61, 0x86, 0xD7, 0x56, 0xD7, 0x73,
+        0xF4, 0xAB, 0xBB, 0x6A, 0x21, 0xD3, 0x88, 0x22, 0xB4, 0xE7,
+        0x6F, 0x7F, 0x91, 0xE5, 0x0E, 0xC6, 0x08, 0x49, 0xDE, 0xEA,
+        0x13, 0x58, 0x72, 0xA0, 0xAA, 0x3A, 0xF9, 0x36, 0x03, 0x45,
+        0x57, 0x5E, 0x87, 0xD2, 0x73, 0x65, 0xC4, 0x8C, 0xA3, 0xEE,
+        0xC9, 0xD6, 0x73, 0x7C, 0x96, 0x41, 0x93, 0x02, 0x03, 0x01,
+        0x00, 0x01
+};
+static const int sizeof_client_keypub_der_3072 = sizeof(client_keypub_der_3072);
+
+/* ./certs/3072/client-cert.der, 3072-bit */
+static const unsigned char client_cert_der_3072[] =
+{
+        0x30, 0x82, 0x05, 0xF8, 0x30, 0x82, 0x04, 0x60, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x14, 0x2F, 0x06, 0x07, 0xA8, 0xB6,
+        0xF4, 0xEE, 0x10, 0x91, 0x43, 0xDE, 0xE1, 0x46, 0x99, 0xC4,
+        0x90, 0x79, 0xE6, 0xF1, 0xD1, 0x30, 0x0D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00,
+        0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55,
+        0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E,
+        0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D,
+        0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04,
+        0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C,
+        0x5F, 0x33, 0x30, 0x37, 0x32, 0x31, 0x19, 0x30, 0x17, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67,
+        0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x33, 0x30,
+        0x37, 0x32, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x32, 0x30, 0x30, 0x31, 0x32,
+        0x32, 0x30, 0x35, 0x35, 0x31, 0x34, 0x38, 0x5A, 0x17, 0x0D,
+        0x32, 0x32, 0x31, 0x30, 0x31, 0x38, 0x30, 0x35, 0x35, 0x31,
+        0x34, 0x38, 0x5A, 0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09,
+        0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07,
+        0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F,
+        0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06,
+        0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66,
+        0x53, 0x53, 0x4C, 0x5F, 0x33, 0x30, 0x37, 0x32, 0x31, 0x19,
+        0x30, 0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50,
+        0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67,
+        0x2D, 0x33, 0x30, 0x37, 0x32, 0x31, 0x18, 0x30, 0x16, 0x06,
+        0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48,
+        0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E,
+        0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x82, 0x01, 0xA2, 0x30, 0x0D,
+        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+        0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x8F, 0x00, 0x30, 0x82,
+        0x01, 0x8A, 0x02, 0x82, 0x01, 0x81, 0x00, 0xAC, 0x39, 0x50,
+        0x68, 0x8F, 0x78, 0xF8, 0x10, 0x9B, 0x68, 0x96, 0xD3, 0xE1,
+        0x9C, 0x56, 0x68, 0x5A, 0x41, 0x62, 0xE3, 0xB3, 0x41, 0xB0,
+        0x55, 0x80, 0x17, 0xB0, 0x88, 0x16, 0x9B, 0xE0, 0x97, 0x74,
+        0x5F, 0x42, 0x79, 0x73, 0x42, 0xDF, 0x93, 0xF3, 0xAA, 0x9D,
+        0xEE, 0x2D, 0x6F, 0xAA, 0xBC, 0x27, 0x90, 0x84, 0xC0, 0x5D,
+        0xC7, 0xEC, 0x49, 0xEA, 0x5C, 0x66, 0x1D, 0x70, 0x9C, 0x53,
+        0x5C, 0xBA, 0xA1, 0xB3, 0x58, 0xC9, 0x3E, 0x8E, 0x9B, 0x72,
+        0x3D, 0x6E, 0x02, 0x02, 0x00, 0x9C, 0x65, 0x56, 0x82, 0xA3,
+        0x22, 0xB4, 0x08, 0x5F, 0x2A, 0xEF, 0xDF, 0x9A, 0xD0, 0xE7,
+        0x31, 0x59, 0x26, 0x5B, 0x0B, 0x1C, 0x63, 0x61, 0xFF, 0xD5,
+        0x69, 0x32, 0x19, 0x06, 0x7E, 0x0F, 0x40, 0x3C, 0x7A, 0x1E,
+        0xC8, 0xFC, 0x58, 0x6C, 0x64, 0xAE, 0x10, 0x3D, 0xA8, 0x23,
+        0xFF, 0x8E, 0x1A, 0xCA, 0x6A, 0x82, 0xE2, 0xF9, 0x01, 0x64,
+        0x2C, 0x97, 0xA0, 0x1A, 0x89, 0xA0, 0x74, 0xD3, 0xB6, 0x05,
+        0x11, 0xF2, 0x62, 0x06, 0x48, 0x2A, 0xF7, 0x66, 0xCE, 0xC1,
+        0x85, 0xE1, 0xD2, 0x27, 0xEA, 0xCA, 0x12, 0xA5, 0x91, 0x97,
+        0x3E, 0xFC, 0x94, 0x06, 0x59, 0x51, 0xC0, 0xE7, 0x13, 0xB6,
+        0x87, 0x7B, 0x5F, 0xD2, 0xC0, 0x56, 0x2F, 0x5E, 0x1D, 0x02,
+        0xC3, 0x11, 0x2C, 0xDF, 0xF7, 0x01, 0xDA, 0xBD, 0x85, 0x54,
+        0x35, 0x32, 0x5F, 0xC5, 0xC8, 0xF9, 0x7A, 0x9F, 0x89, 0xF7,
+        0x03, 0x0E, 0x7E, 0x79, 0x5D, 0x04, 0x82, 0x35, 0x10, 0xFE,
+        0x6D, 0x9B, 0xBF, 0xB8, 0xEE, 0xE2, 0x62, 0x87, 0x26, 0x5E,
+        0x2F, 0x50, 0x2F, 0x78, 0x0C, 0xE8, 0x73, 0x4F, 0x88, 0x6A,
+        0xD6, 0x26, 0xA4, 0xC9, 0xFC, 0xFA, 0x1E, 0x8A, 0xB0, 0xF4,
+        0x32, 0xCF, 0x57, 0xCD, 0xA1, 0x58, 0x8A, 0x49, 0x0F, 0xBB,
+        0xA9, 0x1D, 0x86, 0xAB, 0xB9, 0x8F, 0x8D, 0x57, 0x19, 0xB2,
+        0x5A, 0x7E, 0xA4, 0xEA, 0xCC, 0xB7, 0x96, 0x7A, 0x3B, 0x38,
+        0xCD, 0xDE, 0xE0, 0x61, 0xFC, 0xC9, 0x06, 0x8F, 0x93, 0x5A,
+        0xCE, 0xAD, 0x2A, 0xE3, 0x2D, 0x3E, 0x39, 0x5D, 0x41, 0x83,
+        0x01, 0x1F, 0x0F, 0xE1, 0x7F, 0x76, 0xC7, 0x28, 0xDA, 0x56,
+        0xEF, 0xBF, 0xDC, 0x26, 0x35, 0x40, 0xBE, 0xAD, 0xC7, 0x38,
+        0xAD, 0xA4, 0x06, 0xAC, 0xCA, 0xE8, 0x51, 0xEB, 0xC0, 0xF8,
+        0x68, 0x02, 0x2C, 0x9B, 0xA1, 0x14, 0xBC, 0xF8, 0x61, 0x86,
+        0xD7, 0x56, 0xD7, 0x73, 0xF4, 0xAB, 0xBB, 0x6A, 0x21, 0xD3,
+        0x88, 0x22, 0xB4, 0xE7, 0x6F, 0x7F, 0x91, 0xE5, 0x0E, 0xC6,
+        0x08, 0x49, 0xDE, 0xEA, 0x13, 0x58, 0x72, 0xA0, 0xAA, 0x3A,
+        0xF9, 0x36, 0x03, 0x45, 0x57, 0x5E, 0x87, 0xD2, 0x73, 0x65,
+        0xC4, 0x8C, 0xA3, 0xEE, 0xC9, 0xD6, 0x73, 0x7C, 0x96, 0x41,
+        0x93, 0x02, 0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x2A,
+        0x30, 0x82, 0x01, 0x26, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D,
+        0x0E, 0x04, 0x16, 0x04, 0x14, 0x3D, 0xD1, 0x84, 0xC2, 0xAF,
+        0xB0, 0x20, 0x49, 0xBC, 0x74, 0x87, 0x41, 0x38, 0xAB, 0xBA,
+        0xD2, 0xD4, 0x0C, 0xA3, 0xA8, 0x30, 0x81, 0xDE, 0x06, 0x03,
+        0x55, 0x1D, 0x23, 0x04, 0x81, 0xD6, 0x30, 0x81, 0xD3, 0x80,
+        0x14, 0x3D, 0xD1, 0x84, 0xC2, 0xAF, 0xB0, 0x20, 0x49, 0xBC,
+        0x74, 0x87, 0x41, 0x38, 0xAB, 0xBA, 0xD2, 0xD4, 0x0C, 0xA3,
+        0xA8, 0xA1, 0x81, 0xA4, 0xA4, 0x81, 0xA1, 0x30, 0x81, 0x9E,
+        0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
+        0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E,
+        0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07,
+        0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31,
+        0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0C,
+        0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 0x33, 0x30,
+        0x37, 0x32, 0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D,
+        0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x33, 0x30, 0x37, 0x32, 0x31,
+        0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F,
+        0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73,
+        0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06,
+        0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01,
+        0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x14,
+        0x2F, 0x06, 0x07, 0xA8, 0xB6, 0xF4, 0xEE, 0x10, 0x91, 0x43,
+        0xDE, 0xE1, 0x46, 0x99, 0xC4, 0x90, 0x79, 0xE6, 0xF1, 0xD1,
+        0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30,
+        0x03, 0x01, 0x01, 0xFF, 0x30, 0x16, 0x06, 0x03, 0x55, 0x1D,
+        0x11, 0x04, 0x0F, 0x30, 0x0D, 0x82, 0x0B, 0x65, 0x78, 0x61,
+        0x6D, 0x70, 0x6C, 0x65, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x0D,
+        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+        0x0B, 0x05, 0x00, 0x03, 0x82, 0x01, 0x81, 0x00, 0x04, 0xF5,
+        0xE0, 0xE5, 0x75, 0x6B, 0xCF, 0xEE, 0x19, 0xEF, 0x3C, 0xB5,
+        0xB6, 0x78, 0xCE, 0xB2, 0xC3, 0xF2, 0x3E, 0x0D, 0x3F, 0xB7,
+        0x6D, 0x59, 0x7D, 0xB6, 0x7E, 0x6A, 0x91, 0x0F, 0x85, 0xAC,
+        0xCA, 0x56, 0x30, 0x3C, 0x3F, 0x5D, 0x30, 0x10, 0x7C, 0x5E,
+        0x7F, 0x98, 0xB2, 0x9D, 0x95, 0x04, 0xE1, 0xEE, 0xC0, 0x9E,
+        0x1B, 0x01, 0x39, 0xCB, 0x02, 0x05, 0xB9, 0x9B, 0x02, 0x88,
+        0xEB, 0xD0, 0xAD, 0x06, 0xD5, 0x39, 0x2D, 0x24, 0xE6, 0xDC,
+        0x4E, 0xCE, 0x8C, 0x36, 0x7D, 0xB6, 0x8E, 0x1D, 0xE8, 0xB7,
+        0xEF, 0xFF, 0xB4, 0x17, 0xC1, 0xA5, 0xD8, 0xFA, 0x34, 0xDD,
+        0x99, 0x3D, 0x30, 0x4B, 0x45, 0xA2, 0x14, 0x6A, 0x88, 0x93,
+        0xCA, 0x25, 0xE2, 0x5C, 0xD5, 0xBA, 0xE8, 0x9D, 0xEF, 0xD7,
+        0x68, 0x76, 0x05, 0x92, 0x48, 0x19, 0x92, 0x11, 0x79, 0xC2,
+        0xFE, 0x11, 0x49, 0x4D, 0xD6, 0xD1, 0x8F, 0x32, 0x1C, 0x5F,
+        0x3B, 0x41, 0x2C, 0x08, 0xB2, 0x72, 0x65, 0x1C, 0xE5, 0x86,
+        0x02, 0x94, 0xB3, 0x9D, 0x30, 0xDA, 0x59, 0x42, 0xA1, 0xB0,
+        0x1E, 0x00, 0x92, 0x93, 0x6E, 0x0D, 0x27, 0xCF, 0xDF, 0xD8,
+        0xCF, 0x2B, 0xCD, 0xCB, 0x8C, 0xFF, 0xB5, 0x6F, 0x83, 0x92,
+        0x27, 0x00, 0x58, 0x59, 0xA2, 0x0B, 0x91, 0xB0, 0x39, 0xCA,
+        0xA8, 0x78, 0xFD, 0x83, 0x56, 0x4F, 0xA1, 0x6E, 0xC3, 0xE0,
+        0x2B, 0xAE, 0xEF, 0x3C, 0x09, 0x04, 0xF0, 0x9B, 0x5B, 0x00,
+        0xD3, 0xED, 0xB6, 0x06, 0xF6, 0x9C, 0xDA, 0xAF, 0x61, 0x68,
+        0x8F, 0xE3, 0x2A, 0xC3, 0x85, 0x20, 0x66, 0x2C, 0xAC, 0xDD,
+        0x65, 0x37, 0x36, 0xC2, 0x2F, 0xBA, 0xB8, 0x90, 0x66, 0x6E,
+        0x9E, 0x58, 0xA2, 0x4B, 0xD6, 0xA7, 0x30, 0xC8, 0xC9, 0x6C,
+        0xBD, 0x13, 0x40, 0xA0, 0xCA, 0x59, 0x7C, 0xC5, 0x86, 0x9C,
+        0x55, 0xC5, 0x68, 0xC7, 0x0C, 0x7F, 0x94, 0x73, 0xA5, 0x4B,
+        0xEB, 0xF1, 0x27, 0x96, 0xB5, 0xF9, 0x69, 0x6B, 0x2B, 0xB6,
+        0x62, 0xD7, 0x3A, 0x0D, 0x40, 0x65, 0xF0, 0x0D, 0xDE, 0x91,
+        0x77, 0xD4, 0xF6, 0xBB, 0x13, 0x7B, 0x4A, 0x55, 0x8F, 0x7E,
+        0x49, 0x65, 0x89, 0x37, 0x46, 0x05, 0x2F, 0x90, 0x14, 0x73,
+        0x0D, 0x2D, 0x1E, 0xA4, 0xD4, 0xBB, 0x4E, 0x6D, 0x29, 0xDA,
+        0x79, 0x6E, 0x73, 0x08, 0xDE, 0x5F, 0x27, 0xDC, 0x23, 0x14,
+        0xDB, 0x7B, 0xE7, 0x02, 0x13, 0x2E, 0xC7, 0x94, 0x19, 0xF3,
+        0x7D, 0x2E, 0xC4, 0x8A, 0x69, 0xBA, 0xF5, 0xBA, 0x62, 0xC2,
+        0x88, 0xB5, 0xCB, 0xC7, 0x92, 0xA2, 0x8A, 0xE3, 0x69, 0x10,
+        0x6E, 0xC5, 0xB8, 0xB2, 0x10, 0x7E, 0xB6, 0x0C, 0x71, 0x2A,
+        0xC3, 0xE9, 0x71, 0x0C, 0xA2, 0x8B, 0x9A, 0x1D, 0x2C, 0x4E,
+        0x21, 0x68, 0x53, 0x51, 0x6D, 0x0C, 0xD2, 0xB4, 0x4B, 0x50,
+        0x4B, 0x0A
+};
+static const int sizeof_client_cert_der_3072 = sizeof(client_cert_der_3072);
+
 #endif /* USE_CERT_BUFFERS_3072 */
 
+#ifdef USE_CERT_BUFFERS_4096
+
+/* ./certs/4096/client-key.der, 4096-bit */
+static const unsigned char client_key_der_4096[] =
+{
+        0x30, 0x82, 0x09, 0x28, 0x02, 0x01, 0x00, 0x02, 0x82, 0x02,
+        0x01, 0x00, 0xF5, 0xD0, 0x31, 0xE4, 0x71, 0x59, 0x58, 0xB3,
+        0x07, 0x50, 0xDD, 0x16, 0x79, 0xFC, 0xC6, 0x95, 0x50, 0xFC,
+        0x46, 0x0E, 0x57, 0x12, 0x86, 0x71, 0x8D, 0xE3, 0x9B, 0x4A,
+        0x33, 0xEA, 0x4F, 0xD9, 0x17, 0x13, 0x6D, 0x48, 0x69, 0xDF,
+        0x59, 0x11, 0x08, 0x02, 0x9D, 0xAF, 0x2B, 0xC7, 0x30, 0xBE,
+        0x0C, 0xDC, 0x87, 0xD4, 0x5A, 0x12, 0x09, 0x23, 0x5D, 0xE1,
+        0x76, 0x5A, 0x62, 0x37, 0x46, 0x74, 0xEF, 0x03, 0x05, 0xBB,
+        0x1E, 0x6D, 0x29, 0x75, 0x6C, 0x2E, 0x9D, 0x87, 0x0D, 0x8F,
+        0x87, 0xCB, 0x14, 0x95, 0x9B, 0xBE, 0x17, 0x6B, 0x51, 0xD1,
+        0x4C, 0xDA, 0xD7, 0x91, 0x66, 0xC5, 0x36, 0xEB, 0xE0, 0x07,
+        0x1A, 0x76, 0x4D, 0xB0, 0xFB, 0xC1, 0xF5, 0x5E, 0x05, 0xDB,
+        0xBA, 0xCB, 0x25, 0xD9, 0x99, 0x13, 0x1C, 0xC0, 0x35, 0xDC,
+        0x40, 0xE9, 0x36, 0xCD, 0xC4, 0xD5, 0x7A, 0x41, 0x70, 0x0F,
+        0x36, 0xEB, 0xA5, 0x4E, 0x17, 0x05, 0xD5, 0x75, 0x1B, 0x64,
+        0x62, 0x7A, 0x3F, 0x0D, 0x28, 0x48, 0x6A, 0xE3, 0xAC, 0x9C,
+        0xA8, 0x8F, 0xE9, 0xED, 0xF7, 0xCD, 0x24, 0xA0, 0xB1, 0xA0,
+        0x03, 0xAC, 0xE3, 0x03, 0xF5, 0x3F, 0xD1, 0x96, 0xFF, 0x2A,
+        0x7E, 0x08, 0xB1, 0xD3, 0xE0, 0x18, 0x14, 0xEC, 0x65, 0x37,
+        0x50, 0x43, 0xC2, 0x6A, 0x8C, 0xF4, 0x5B, 0xFE, 0xC4, 0xCB,
+        0x8D, 0x3F, 0x81, 0x02, 0xF7, 0xC2, 0xDD, 0xE4, 0xC1, 0x8E,
+        0x80, 0x0C, 0x04, 0x25, 0x2D, 0x80, 0x5A, 0x2E, 0x0F, 0x22,
+        0x35, 0x4A, 0xF4, 0x85, 0xED, 0x51, 0xD8, 0xAB, 0x6D, 0x8F,
+        0xA2, 0x3B, 0x24, 0x00, 0x6E, 0x81, 0xE2, 0x1E, 0x76, 0xD6,
+        0xAC, 0x31, 0x12, 0xDB, 0xF3, 0x8E, 0x07, 0xA1, 0xDE, 0x89,
+        0x4A, 0x39, 0x60, 0x77, 0xC5, 0xAA, 0xF1, 0x51, 0xE6, 0x06,
+        0xF1, 0x95, 0x56, 0x2A, 0xE1, 0x8E, 0x92, 0x30, 0x9F, 0xFE,
+        0x58, 0x44, 0xAC, 0x46, 0xF2, 0xFD, 0x9A, 0xFC, 0xA8, 0x1D,
+        0xA1, 0xD3, 0x55, 0x37, 0x4A, 0x8B, 0xFC, 0x9C, 0x33, 0xF8,
+        0xA7, 0x61, 0x48, 0x41, 0x7C, 0x9C, 0x77, 0x3F, 0xF5, 0x80,
+        0x23, 0x7D, 0x43, 0xB4, 0xD5, 0x88, 0x0A, 0xC9, 0x75, 0xD7,
+        0x44, 0x19, 0x4D, 0x77, 0x6C, 0x0B, 0x0A, 0x49, 0xAA, 0x1C,
+        0x2F, 0xD6, 0x5A, 0x44, 0xA6, 0x47, 0x4D, 0xE5, 0x36, 0x96,
+        0x40, 0x99, 0x2C, 0x56, 0x26, 0xB1, 0xF2, 0x92, 0x31, 0x59,
+        0xD7, 0x2C, 0xD4, 0xB4, 0x21, 0xD6, 0x65, 0x13, 0x0B, 0x3E,
+        0xFB, 0xFF, 0x04, 0xEB, 0xB9, 0x85, 0xB9, 0xD8, 0xD8, 0x28,
+        0x4F, 0x5C, 0x17, 0x96, 0xA3, 0x51, 0xBE, 0xFE, 0x7D, 0x0B,
+        0x1B, 0x48, 0x40, 0x25, 0x76, 0x94, 0xDC, 0x41, 0xFB, 0xBF,
+        0x73, 0x76, 0xDA, 0xEB, 0xB3, 0x62, 0xE7, 0xC1, 0xC8, 0x54,
+        0x6A, 0x93, 0xE1, 0x8D, 0x31, 0xE8, 0x3E, 0x3E, 0xDF, 0xBC,
+        0x87, 0x02, 0x30, 0x22, 0x57, 0xC4, 0xE0, 0x18, 0x7A, 0xD3,
+        0xAE, 0xE4, 0x02, 0x9B, 0xAA, 0xBD, 0x4E, 0x49, 0x47, 0x72,
+        0xE9, 0x8D, 0x13, 0x2D, 0x54, 0x9B, 0x00, 0xA7, 0x91, 0x61,
+        0x71, 0xC9, 0xCC, 0x48, 0x4F, 0xEE, 0xDF, 0x5E, 0x1B, 0x1A,
+        0xDF, 0x67, 0xD3, 0x20, 0xE6, 0x44, 0x45, 0x98, 0x7E, 0xE7,
+        0x0E, 0x63, 0x16, 0x83, 0xC9, 0x26, 0x5D, 0x90, 0xC1, 0xE5,
+        0x2A, 0x5C, 0x45, 0x54, 0x13, 0xB2, 0x81, 0x18, 0x06, 0x20,
+        0x2E, 0x2E, 0x66, 0x5A, 0xB5, 0x7B, 0x6E, 0xD6, 0x0C, 0x4E,
+        0x89, 0x01, 0x56, 0x70, 0xBB, 0xAE, 0xDE, 0xE9, 0x99, 0x5E,
+        0xD1, 0xB9, 0x3A, 0xB7, 0x6C, 0x17, 0xB6, 0x03, 0xA9, 0x08,
+        0xDD, 0x9C, 0xF4, 0x14, 0xC9, 0xC9, 0x59, 0x39, 0x72, 0xD4,
+        0x7E, 0x02, 0x37, 0x31, 0xCD, 0x0E, 0xA7, 0x3D, 0xF8, 0xF2,
+        0xCF, 0x6B, 0x15, 0xAB, 0x02, 0x03, 0x01, 0x00, 0x01, 0x02,
+        0x82, 0x02, 0x01, 0x00, 0xC5, 0x76, 0x57, 0x7D, 0xF1, 0x68,
+        0x1A, 0x8E, 0xC6, 0x63, 0xB9, 0x16, 0xA3, 0x2B, 0xE1, 0xC2,
+        0x74, 0xEA, 0x12, 0xC4, 0xD6, 0x41, 0x75, 0x6A, 0xA6, 0xD6,
+        0x9E, 0x1A, 0x7F, 0x95, 0xCC, 0x4A, 0xD1, 0xF4, 0xB3, 0x27,
+        0x26, 0x95, 0x5A, 0x91, 0x09, 0xE4, 0x40, 0x13, 0x45, 0x91,
+        0x9F, 0xA0, 0x2B, 0xE8, 0xC3, 0xDC, 0x5B, 0xF6, 0x7D, 0x0C,
+        0xC2, 0x0F, 0xA9, 0xE9, 0x75, 0x58, 0x7D, 0xEA, 0xD5, 0x4D,
+        0x92, 0x3E, 0xFC, 0x74, 0x28, 0x87, 0xC1, 0x3D, 0xB9, 0x21,
+        0x92, 0x4D, 0x28, 0x82, 0x84, 0xA8, 0xA2, 0x11, 0x93, 0xF2,
+        0x8C, 0x29, 0x1C, 0x19, 0xF8, 0x6D, 0x3F, 0x27, 0x51, 0xB5,
+        0x2D, 0xA3, 0xC7, 0x28, 0x1D, 0xC4, 0xFC, 0x98, 0x94, 0xA8,
+        0xD0, 0xFF, 0xF0, 0x0F, 0xDC, 0xF9, 0xED, 0xB3, 0xA2, 0xB6,
+        0xED, 0x0D, 0x5F, 0xBF, 0x78, 0x5C, 0xD7, 0xAF, 0xBD, 0xA3,
+        0xEF, 0x86, 0xE9, 0x51, 0x66, 0xDB, 0x52, 0x37, 0x47, 0x7F,
+        0xE9, 0x5F, 0x3C, 0x94, 0x83, 0x2D, 0xE8, 0x9C, 0x33, 0xF1,
+        0x6C, 0xE9, 0xF3, 0xA6, 0x97, 0xFE, 0xA7, 0xBF, 0x4D, 0x9B,
+        0x20, 0xD5, 0x2F, 0xDE, 0xA4, 0x06, 0xBB, 0xEE, 0x66, 0x49,
+        0x6B, 0xF5, 0x10, 0x85, 0x9F, 0x84, 0x5A, 0x52, 0x3E, 0x0C,
+        0xA0, 0x4A, 0x4C, 0xDA, 0x01, 0xC5, 0x62, 0x31, 0xB1, 0xEC,
+        0xF8, 0xDD, 0xA3, 0x3B, 0xCE, 0x41, 0x3A, 0x12, 0x79, 0xF9,
+        0x97, 0x5B, 0x07, 0x95, 0x9F, 0x86, 0xD6, 0x04, 0x73, 0x6C,
+        0xE8, 0x8F, 0x4C, 0x4C, 0x48, 0x1D, 0x85, 0xC4, 0xE7, 0xCE,
+        0xDE, 0x16, 0x31, 0xF6, 0x5C, 0x37, 0x54, 0x8E, 0x55, 0xBC,
+        0xAF, 0x2E, 0x47, 0xE8, 0xAC, 0x03, 0xB0, 0xA4, 0xF9, 0x90,
+        0x98, 0x99, 0xA4, 0xDC, 0x6E, 0x98, 0x08, 0x5C, 0x07, 0xBB,
+        0x08, 0x93, 0xAF, 0x61, 0x8D, 0x74, 0xA8, 0xF8, 0xC4, 0x89,
+        0x64, 0x10, 0xE1, 0xE6, 0xC0, 0xCD, 0x1D, 0x39, 0x20, 0xD6,
+        0x5A, 0x89, 0x83, 0xFC, 0x37, 0xE2, 0x12, 0x66, 0xA8, 0x12,
+        0xCC, 0x72, 0xBB, 0x1E, 0xFB, 0x6A, 0xE3, 0x7C, 0x71, 0x7E,
+        0xB9, 0x2E, 0x8E, 0x84, 0x66, 0xE1, 0xB9, 0xD0, 0x25, 0x9A,
+        0x6F, 0x9D, 0x19, 0xE6, 0x7E, 0xE8, 0xD8, 0xF0, 0xC5, 0x23,
+        0x16, 0x9A, 0x68, 0x2C, 0x1D, 0x55, 0xAE, 0x8E, 0x90, 0xEE,
+        0x8E, 0xEC, 0x5E, 0x46, 0x9D, 0x60, 0x52, 0x32, 0x17, 0x28,
+        0x59, 0xC4, 0x49, 0x2A, 0x20, 0x3E, 0x95, 0xC5, 0xDF, 0xF6,
+        0x3D, 0xF7, 0xC5, 0xCF, 0xB1, 0xC2, 0xC9, 0x76, 0xF8, 0x3D,
+        0xBE, 0xF4, 0x63, 0xFC, 0x2A, 0x00, 0x6F, 0x99, 0xA6, 0xB6,
+        0xAD, 0x35, 0xEE, 0xDE, 0xC5, 0xE0, 0x97, 0xC6, 0x73, 0xEE,
+        0x33, 0xA0, 0xA8, 0xFC, 0x4C, 0x8F, 0xF2, 0x8C, 0x61, 0xFB,
+        0x03, 0x19, 0xA1, 0xE8, 0x17, 0x4E, 0xE3, 0x21, 0x58, 0xCE,
+        0xFE, 0xF2, 0x5F, 0xBB, 0xDD, 0x4F, 0xF7, 0x18, 0xCB, 0x35,
+        0x57, 0xDD, 0xE5, 0x50, 0x2A, 0x7B, 0x1A, 0xE9, 0x12, 0xF2,
+        0x7A, 0x11, 0xB1, 0x43, 0xB9, 0x70, 0x07, 0x0C, 0x8F, 0x69,
+        0xB9, 0xE5, 0xA5, 0xC9, 0xE2, 0x1B, 0x96, 0x74, 0x11, 0xF5,
+        0x95, 0xB9, 0x58, 0xC0, 0xBD, 0x37, 0xFB, 0x28, 0x2A, 0xBD,
+        0x84, 0xB1, 0x2B, 0x67, 0x42, 0x82, 0xC3, 0x95, 0x55, 0x45,
+        0xD5, 0xEA, 0xC3, 0x8A, 0x42, 0x3A, 0x43, 0x17, 0x5E, 0xCD,
+        0xD2, 0xEA, 0xFC, 0xDF, 0x67, 0xEC, 0xE1, 0x6C, 0xA8, 0x03,
+        0x19, 0xB2, 0x1D, 0x4A, 0x5F, 0x4F, 0xE7, 0xD3, 0xE0, 0x86,
+        0xC5, 0x1A, 0x10, 0xC3, 0x08, 0xD2, 0xED, 0x85, 0x93, 0x08,
+        0x51, 0x05, 0xA6, 0x37, 0x15, 0x32, 0xBD, 0x6C, 0x73, 0x63,
+        0x01, 0x5D, 0x5B, 0x4F, 0x6A, 0xDC, 0x6D, 0x1D, 0x55, 0x91,
+        0x21, 0xE4, 0x8E, 0xB7, 0xF0, 0x81, 0x02, 0x82, 0x01, 0x01,
+        0x00, 0xFD, 0x27, 0xC8, 0xFE, 0x76, 0x5C, 0x89, 0x32, 0xCB,
+        0x8A, 0x22, 0x87, 0x61, 0x48, 0x91, 0x4A, 0x05, 0xAD, 0xA4,
+        0x5C, 0x8A, 0xCA, 0x5C, 0x02, 0x88, 0x7E, 0x51, 0xC5, 0x66,
+        0x90, 0x2C, 0xA3, 0xED, 0xA7, 0x43, 0x19, 0x0B, 0xA2, 0x42,
+        0xB4, 0xE0, 0xE0, 0x45, 0xBF, 0xFE, 0xA0, 0xF2, 0x75, 0x0B,
+        0x8E, 0x7D, 0x9D, 0x73, 0x67, 0xD3, 0x10, 0x09, 0xC5, 0xD9,
+        0x8C, 0xAD, 0x3A, 0x64, 0x72, 0xAD, 0x96, 0x35, 0x91, 0x0F,
+        0x4B, 0xC9, 0xBD, 0x4F, 0x65, 0x47, 0xA6, 0x2D, 0xEB, 0x3F,
+        0xE2, 0x99, 0x72, 0x66, 0x12, 0xED, 0xEB, 0xD2, 0x7C, 0xFF,
+        0x3A, 0x20, 0x37, 0x2A, 0xD3, 0x65, 0x51, 0x9B, 0xC3, 0xAA,
+        0x18, 0xB1, 0x1F, 0x6E, 0x9D, 0x40, 0x47, 0xA4, 0x1F, 0x82,
+        0x9B, 0xDB, 0x50, 0x6B, 0x86, 0x2F, 0xFB, 0x3F, 0x31, 0xB9,
+        0x81, 0x11, 0x04, 0x14, 0x63, 0x86, 0x4F, 0x40, 0x2A, 0xF5,
+        0xF9, 0x7C, 0xA1, 0x78, 0x19, 0x13, 0xD0, 0x51, 0x51, 0x0F,
+        0x79, 0x88, 0x8D, 0x14, 0xA3, 0xDE, 0xB6, 0x33, 0x29, 0x42,
+        0xB9, 0xE8, 0x59, 0x76, 0xF7, 0x43, 0x1A, 0xB6, 0xA6, 0xDF,
+        0x0A, 0xC1, 0x42, 0xC7, 0x3F, 0x1C, 0x7E, 0x5C, 0x2C, 0x91,
+        0x4B, 0x1E, 0xF8, 0x46, 0x91, 0x1F, 0xEE, 0x56, 0xB3, 0x0E,
+        0xC8, 0xD0, 0x31, 0xD3, 0x3D, 0xED, 0x3D, 0xD9, 0xC5, 0x30,
+        0x0C, 0x58, 0xD8, 0xB7, 0xB5, 0xEC, 0x14, 0xAC, 0x41, 0x64,
+        0x6D, 0xE4, 0xC6, 0x59, 0xFD, 0x14, 0x05, 0x60, 0x65, 0xD8,
+        0xC4, 0x84, 0x44, 0x7E, 0x1B, 0xB4, 0xA4, 0x16, 0x75, 0xC1,
+        0x27, 0x96, 0xB2, 0x19, 0xD6, 0x39, 0x54, 0xC0, 0x93, 0xF3,
+        0xD7, 0x1F, 0xCD, 0x1B, 0xDF, 0xF8, 0x12, 0x88, 0x14, 0x9F,
+        0x98, 0x05, 0x47, 0x46, 0x71, 0x81, 0x6C, 0xDF, 0x91, 0xEF,
+        0x53, 0xE3, 0xC5, 0xB1, 0x89, 0x2F, 0xE1, 0x02, 0x82, 0x01,
+        0x01, 0x00, 0xF8, 0x93, 0x4A, 0x28, 0x77, 0x94, 0xEF, 0xE9,
+        0xC4, 0x0A, 0xC3, 0xE8, 0x52, 0x59, 0xB6, 0x1D, 0x8D, 0xCE,
+        0x14, 0xE7, 0x43, 0xC6, 0xED, 0x09, 0x27, 0x5D, 0xF3, 0x8E,
+        0x08, 0x6A, 0x19, 0x6B, 0x2C, 0x97, 0x9B, 0x88, 0x53, 0x2B,
+        0xDA, 0xFE, 0x4B, 0x94, 0x66, 0x84, 0xD5, 0xA9, 0xCE, 0xA5,
+        0x43, 0x70, 0xFB, 0x01, 0x5A, 0x6F, 0xCD, 0xF7, 0xD1, 0x9D,
+        0x51, 0xEE, 0xA0, 0xDC, 0x46, 0xF5, 0x7D, 0xA7, 0xEE, 0xA0,
+        0x86, 0xB7, 0x83, 0xFF, 0x21, 0x8B, 0x76, 0x05, 0x7D, 0xDE,
+        0xC4, 0x26, 0x36, 0xBC, 0xB4, 0x8A, 0x48, 0xC3, 0x06, 0x90,
+        0x97, 0xE5, 0xA6, 0x38, 0xC3, 0xE6, 0x7C, 0xD0, 0xF8, 0x23,
+        0xD2, 0x33, 0x1F, 0x81, 0xC3, 0xE3, 0x7D, 0x85, 0x5A, 0x38,
+        0x10, 0x03, 0xE6, 0x88, 0xDB, 0xC8, 0x4C, 0xD0, 0xF7, 0xB2,
+        0x4D, 0x27, 0x33, 0x85, 0xCD, 0x3A, 0x74, 0x83, 0x6B, 0x82,
+        0x58, 0xD9, 0xDF, 0xEE, 0xF5, 0xD3, 0xE9, 0xFE, 0x1C, 0xEF,
+        0x06, 0x12, 0x16, 0xD1, 0x4C, 0xAE, 0x54, 0x4B, 0x0D, 0x1A,
+        0xBD, 0xE2, 0xCF, 0x56, 0xB3, 0x74, 0xBE, 0x44, 0x4F, 0xA4,
+        0x73, 0x0A, 0x98, 0x8D, 0x61, 0x84, 0x38, 0x46, 0xDC, 0x95,
+        0xCF, 0x3F, 0x6B, 0xE7, 0x65, 0x87, 0x02, 0xBF, 0x4B, 0x57,
+        0xE2, 0x3D, 0xC4, 0x2B, 0x1C, 0x82, 0x1D, 0xCC, 0x13, 0x7F,
+        0xC0, 0x06, 0x12, 0x8C, 0x6F, 0x97, 0x50, 0x7B, 0x8C, 0x81,
+        0xC3, 0x23, 0x15, 0xEB, 0x70, 0x07, 0x8E, 0xA1, 0x07, 0x1E,
+        0x59, 0xFA, 0x10, 0xCA, 0x7E, 0x0F, 0xE2, 0xBB, 0xEE, 0x86,
+        0x26, 0x1E, 0x55, 0xB9, 0x98, 0x66, 0x85, 0xEC, 0x27, 0xC5,
+        0xD9, 0x63, 0x8D, 0x51, 0x77, 0xAA, 0xA0, 0x36, 0x55, 0x33,
+        0x10, 0x21, 0x5E, 0xEC, 0x47, 0x67, 0x71, 0xD1, 0xAF, 0xFC,
+        0x3E, 0x50, 0xF5, 0xBE, 0xD6, 0x92, 0xE7, 0x0B, 0x02, 0x82,
+        0x01, 0x00, 0x21, 0x7C, 0x8A, 0xC4, 0xC6, 0x29, 0x55, 0x68,
+        0xA7, 0xAD, 0xDD, 0x05, 0x65, 0x63, 0xF0, 0xFC, 0x06, 0xA6,
+        0x42, 0x70, 0x8F, 0x57, 0x57, 0x36, 0x6A, 0x91, 0xB3, 0x05,
+        0x56, 0x9C, 0xC9, 0x9A, 0xE1, 0x8B, 0xD7, 0x7F, 0x4F, 0x9F,
+        0xA6, 0x0D, 0x41, 0x15, 0xC9, 0x84, 0x2D, 0x0D, 0x63, 0x25,
+        0x02, 0x63, 0x55, 0xD0, 0x66, 0xFC, 0x9B, 0xD9, 0xAA, 0x41,
+        0x46, 0x96, 0xAA, 0x2F, 0x68, 0x2C, 0x17, 0x34, 0x20, 0x5F,
+        0xD0, 0xD3, 0x28, 0x9B, 0x67, 0x0E, 0x31, 0x9D, 0x14, 0xC3,
+        0xE2, 0x8E, 0x79, 0xD7, 0xBD, 0x12, 0xD1, 0xEF, 0xF8, 0xC6,
+        0xDA, 0x07, 0xF9, 0x4C, 0xF2, 0xD8, 0x45, 0xB5, 0xB6, 0xD1,
+        0xFA, 0x05, 0x0C, 0x20, 0xE9, 0x43, 0xD9, 0xC5, 0xE0, 0x3A,
+        0xDE, 0xCE, 0xF9, 0x02, 0xB9, 0x46, 0x65, 0xC0, 0x69, 0x4A,
+        0x8D, 0x8C, 0x3A, 0x10, 0xFD, 0x15, 0x71, 0x25, 0xB8, 0x8A,
+        0x36, 0x41, 0x4B, 0x30, 0x1C, 0xAF, 0xCC, 0x84, 0x28, 0xCD,
+        0x7D, 0x2B, 0x89, 0x59, 0x88, 0x1A, 0x69, 0x12, 0x56, 0xD0,
+        0x25, 0x68, 0x6C, 0x08, 0xB1, 0x88, 0xE1, 0x92, 0x7E, 0x08,
+        0xB2, 0xC6, 0x3C, 0x6C, 0x35, 0xE8, 0xEE, 0x3E, 0xF4, 0xB8,
+        0x5C, 0x7B, 0xC0, 0x5B, 0xFD, 0x11, 0xA3, 0x54, 0xA6, 0x99,
+        0x46, 0xE2, 0x5F, 0x4F, 0xC7, 0xEE, 0x90, 0x1C, 0x37, 0x5B,
+        0x33, 0x10, 0xDF, 0x0B, 0xC3, 0xB9, 0x47, 0xC2, 0x30, 0x4A,
+        0xF2, 0x1A, 0xEB, 0x41, 0x25, 0x94, 0x29, 0x7A, 0xD0, 0x96,
+        0x88, 0x46, 0xEE, 0x6C, 0x14, 0xF6, 0x5B, 0x3D, 0xBD, 0x4E,
+        0xD4, 0x3F, 0x05, 0x5B, 0x07, 0xB9, 0xE3, 0x99, 0x87, 0x63,
+        0xCA, 0xC4, 0x71, 0x0B, 0x73, 0x9D, 0x7B, 0xB6, 0x0F, 0xD4,
+        0x12, 0x8C, 0x4C, 0x5E, 0x72, 0x3D, 0xFF, 0x6D, 0xC4, 0x61,
+        0x0C, 0x74, 0x5F, 0x53, 0xBE, 0x39, 0x34, 0x61, 0x02, 0x82,
+        0x01, 0x00, 0x5F, 0xF2, 0xF2, 0xB0, 0x16, 0x20, 0x8E, 0x4E,
+        0xCC, 0x96, 0x5F, 0x32, 0x80, 0xFF, 0x11, 0xF5, 0xEC, 0x73,
+        0xBC, 0xCB, 0xDB, 0xF4, 0xA0, 0x30, 0x65, 0x5A, 0xB5, 0x95,
+        0x80, 0x97, 0xFB, 0xC1, 0xCB, 0xCF, 0xA5, 0x80, 0x84, 0xA2,
+        0x2C, 0x00, 0xF6, 0x89, 0x8C, 0xDC, 0xFF, 0x60, 0x71, 0x5C,
+        0x87, 0x60, 0xC7, 0xF2, 0xA8, 0xC6, 0xF9, 0x59, 0x0C, 0x37,
+        0x4E, 0x95, 0xEE, 0xCF, 0xB8, 0x30, 0x30, 0x55, 0xAF, 0x1D,
+        0x95, 0x82, 0xA6, 0xD7, 0xC7, 0x49, 0xFE, 0xBF, 0x75, 0xEB,
+        0x94, 0x09, 0x30, 0x1D, 0xBD, 0x0E, 0x97, 0xB1, 0x78, 0x0A,
+        0x3E, 0x27, 0xAD, 0xF6, 0xC1, 0x5F, 0x69, 0x94, 0x7C, 0x03,
+        0xCF, 0xB2, 0x5E, 0x1A, 0x07, 0xD3, 0xFA, 0xF2, 0x8B, 0x75,
+        0x92, 0x70, 0xFE, 0xFE, 0x9A, 0xDF, 0x81, 0x0F, 0x34, 0x5D,
+        0x45, 0xBC, 0xB8, 0xFD, 0x8F, 0xCF, 0x5D, 0x84, 0x10, 0xEE,
+        0x9A, 0x7F, 0x57, 0x19, 0xF5, 0x17, 0xDC, 0x7D, 0x73, 0x0B,
+        0xAC, 0x6B, 0x35, 0x15, 0x8B, 0x24, 0xCB, 0x72, 0xC0, 0xD7,
+        0x2E, 0xAE, 0xAA, 0xDB, 0xCB, 0x9F, 0x67, 0x86, 0x14, 0xBB,
+        0xE4, 0x90, 0x15, 0x7C, 0x95, 0x44, 0xA5, 0x38, 0x6D, 0x13,
+        0x02, 0x91, 0x77, 0x84, 0x35, 0x43, 0x5D, 0x03, 0x1C, 0x01,
+        0x0B, 0x5A, 0x4E, 0x2B, 0x59, 0xF0, 0xBB, 0xB1, 0xB7, 0x61,
+        0x1B, 0x6C, 0xFC, 0xA1, 0xEA, 0xBD, 0x1C, 0x9A, 0xE4, 0x0C,
+        0x7E, 0x97, 0x3F, 0x71, 0xC6, 0xA7, 0x94, 0x1D, 0x82, 0x12,
+        0xEC, 0x26, 0x43, 0x6E, 0xF6, 0x24, 0x09, 0xA0, 0x03, 0x1D,
+        0x12, 0xFF, 0xA8, 0x95, 0x60, 0x47, 0x4A, 0xB0, 0x72, 0x55,
+        0xC3, 0x68, 0xD2, 0xF6, 0xBC, 0x5B, 0x47, 0x46, 0x51, 0xB2,
+        0xC9, 0x2A, 0x28, 0x6A, 0xC9, 0xD1, 0x1B, 0x35, 0x16, 0x5A,
+        0x26, 0x6F, 0xB7, 0xBB, 0xF7, 0x35, 0x73, 0x2B, 0x02, 0x82,
+        0x01, 0x00, 0x56, 0xBA, 0xD8, 0x02, 0xD7, 0x4B, 0x30, 0x5E,
+        0x1B, 0x1E, 0x2F, 0xF3, 0x0D, 0xBC, 0xF1, 0x05, 0x6A, 0x68,
+        0x4A, 0xE1, 0xEA, 0xB3, 0xDE, 0x61, 0x8C, 0x89, 0x44, 0xBA,
+        0x63, 0x5E, 0xDF, 0x05, 0x24, 0x32, 0x71, 0x65, 0x1A, 0x36,
+        0x2F, 0xBC, 0x07, 0x75, 0xA3, 0xCE, 0x9E, 0x52, 0x92, 0x95,
+        0x4D, 0x3F, 0xC9, 0x06, 0xBC, 0xA1, 0x14, 0x33, 0x37, 0x95,
+        0xAB, 0x9A, 0xEB, 0x04, 0xF6, 0x15, 0xC3, 0x9B, 0x10, 0x56,
+        0x53, 0xA2, 0x28, 0xF2, 0x68, 0xDA, 0x7D, 0x97, 0x52, 0x63,
+        0xAC, 0x9B, 0x56, 0xA9, 0xAB, 0x2E, 0x1E, 0x9E, 0x01, 0x70,
+        0xFF, 0x2B, 0x6D, 0x0C, 0x4B, 0xA6, 0xC3, 0x3A, 0xB3, 0xD1,
+        0xA7, 0x4B, 0x5E, 0x49, 0x2E, 0x95, 0xD6, 0x6A, 0xAE, 0x58,
+        0x13, 0x66, 0x8F, 0x2F, 0x93, 0xE4, 0x6E, 0x8B, 0xFA, 0x94,
+        0x30, 0x3E, 0xEC, 0x96, 0xAB, 0x46, 0x20, 0x3E, 0xC5, 0x30,
+        0xB4, 0xEB, 0x41, 0x00, 0x39, 0x60, 0x1D, 0xE1, 0x20, 0xCE,
+        0x31, 0x70, 0x17, 0x39, 0xCB, 0x76, 0x56, 0x6C, 0x55, 0x7B,
+        0x90, 0x20, 0xBC, 0x39, 0xB2, 0x5B, 0xD1, 0x28, 0x6F, 0x0C,
+        0x4F, 0x45, 0x6B, 0x82, 0xC4, 0x57, 0x23, 0x0C, 0x3F, 0x3F,
+        0x2D, 0x83, 0xB3, 0x3D, 0x8E, 0xF9, 0x1A, 0xDA, 0x77, 0x54,
+        0x2E, 0xFE, 0x16, 0x2E, 0xBA, 0x99, 0xDD, 0xCA, 0xB3, 0xD1,
+        0xD8, 0xBB, 0x87, 0xE1, 0xD0, 0xA9, 0xD4, 0xE6, 0x8F, 0xE8,
+        0x00, 0x3E, 0x49, 0x8A, 0xDD, 0xA6, 0x32, 0x91, 0x00, 0x31,
+        0x31, 0x21, 0x98, 0x18, 0x94, 0xC9, 0x2D, 0x27, 0x05, 0xB7,
+        0x9B, 0x09, 0x2E, 0xBB, 0x5D, 0xBF, 0x67, 0xE8, 0x0E, 0xD1,
+        0x44, 0x75, 0x80, 0x1D, 0x0A, 0x21, 0x8F, 0x95, 0x76, 0xB0,
+        0xFC, 0x19, 0x3C, 0xFF, 0x92, 0xEA, 0x01, 0x45, 0x89, 0xD1,
+        0x4E, 0xFE, 0x4D, 0x2B, 0x4B, 0x18, 0xE6, 0xCE
+};
+static const int sizeof_client_key_der_4096 = sizeof(client_key_der_4096);
+
+/* ./certs/4096/client-keyPub.der, 4096-bit */
+static const unsigned char client_keypub_der_4096[] =
+{
+        0x30, 0x82, 0x02, 0x22, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03,
+        0x82, 0x02, 0x0F, 0x00, 0x30, 0x82, 0x02, 0x0A, 0x02, 0x82,
+        0x02, 0x01, 0x00, 0xF5, 0xD0, 0x31, 0xE4, 0x71, 0x59, 0x58,
+        0xB3, 0x07, 0x50, 0xDD, 0x16, 0x79, 0xFC, 0xC6, 0x95, 0x50,
+        0xFC, 0x46, 0x0E, 0x57, 0x12, 0x86, 0x71, 0x8D, 0xE3, 0x9B,
+        0x4A, 0x33, 0xEA, 0x4F, 0xD9, 0x17, 0x13, 0x6D, 0x48, 0x69,
+        0xDF, 0x59, 0x11, 0x08, 0x02, 0x9D, 0xAF, 0x2B, 0xC7, 0x30,
+        0xBE, 0x0C, 0xDC, 0x87, 0xD4, 0x5A, 0x12, 0x09, 0x23, 0x5D,
+        0xE1, 0x76, 0x5A, 0x62, 0x37, 0x46, 0x74, 0xEF, 0x03, 0x05,
+        0xBB, 0x1E, 0x6D, 0x29, 0x75, 0x6C, 0x2E, 0x9D, 0x87, 0x0D,
+        0x8F, 0x87, 0xCB, 0x14, 0x95, 0x9B, 0xBE, 0x17, 0x6B, 0x51,
+        0xD1, 0x4C, 0xDA, 0xD7, 0x91, 0x66, 0xC5, 0x36, 0xEB, 0xE0,
+        0x07, 0x1A, 0x76, 0x4D, 0xB0, 0xFB, 0xC1, 0xF5, 0x5E, 0x05,
+        0xDB, 0xBA, 0xCB, 0x25, 0xD9, 0x99, 0x13, 0x1C, 0xC0, 0x35,
+        0xDC, 0x40, 0xE9, 0x36, 0xCD, 0xC4, 0xD5, 0x7A, 0x41, 0x70,
+        0x0F, 0x36, 0xEB, 0xA5, 0x4E, 0x17, 0x05, 0xD5, 0x75, 0x1B,
+        0x64, 0x62, 0x7A, 0x3F, 0x0D, 0x28, 0x48, 0x6A, 0xE3, 0xAC,
+        0x9C, 0xA8, 0x8F, 0xE9, 0xED, 0xF7, 0xCD, 0x24, 0xA0, 0xB1,
+        0xA0, 0x03, 0xAC, 0xE3, 0x03, 0xF5, 0x3F, 0xD1, 0x96, 0xFF,
+        0x2A, 0x7E, 0x08, 0xB1, 0xD3, 0xE0, 0x18, 0x14, 0xEC, 0x65,
+        0x37, 0x50, 0x43, 0xC2, 0x6A, 0x8C, 0xF4, 0x5B, 0xFE, 0xC4,
+        0xCB, 0x8D, 0x3F, 0x81, 0x02, 0xF7, 0xC2, 0xDD, 0xE4, 0xC1,
+        0x8E, 0x80, 0x0C, 0x04, 0x25, 0x2D, 0x80, 0x5A, 0x2E, 0x0F,
+        0x22, 0x35, 0x4A, 0xF4, 0x85, 0xED, 0x51, 0xD8, 0xAB, 0x6D,
+        0x8F, 0xA2, 0x3B, 0x24, 0x00, 0x6E, 0x81, 0xE2, 0x1E, 0x76,
+        0xD6, 0xAC, 0x31, 0x12, 0xDB, 0xF3, 0x8E, 0x07, 0xA1, 0xDE,
+        0x89, 0x4A, 0x39, 0x60, 0x77, 0xC5, 0xAA, 0xF1, 0x51, 0xE6,
+        0x06, 0xF1, 0x95, 0x56, 0x2A, 0xE1, 0x8E, 0x92, 0x30, 0x9F,
+        0xFE, 0x58, 0x44, 0xAC, 0x46, 0xF2, 0xFD, 0x9A, 0xFC, 0xA8,
+        0x1D, 0xA1, 0xD3, 0x55, 0x37, 0x4A, 0x8B, 0xFC, 0x9C, 0x33,
+        0xF8, 0xA7, 0x61, 0x48, 0x41, 0x7C, 0x9C, 0x77, 0x3F, 0xF5,
+        0x80, 0x23, 0x7D, 0x43, 0xB4, 0xD5, 0x88, 0x0A, 0xC9, 0x75,
+        0xD7, 0x44, 0x19, 0x4D, 0x77, 0x6C, 0x0B, 0x0A, 0x49, 0xAA,
+        0x1C, 0x2F, 0xD6, 0x5A, 0x44, 0xA6, 0x47, 0x4D, 0xE5, 0x36,
+        0x96, 0x40, 0x99, 0x2C, 0x56, 0x26, 0xB1, 0xF2, 0x92, 0x31,
+        0x59, 0xD7, 0x2C, 0xD4, 0xB4, 0x21, 0xD6, 0x65, 0x13, 0x0B,
+        0x3E, 0xFB, 0xFF, 0x04, 0xEB, 0xB9, 0x85, 0xB9, 0xD8, 0xD8,
+        0x28, 0x4F, 0x5C, 0x17, 0x96, 0xA3, 0x51, 0xBE, 0xFE, 0x7D,
+        0x0B, 0x1B, 0x48, 0x40, 0x25, 0x76, 0x94, 0xDC, 0x41, 0xFB,
+        0xBF, 0x73, 0x76, 0xDA, 0xEB, 0xB3, 0x62, 0xE7, 0xC1, 0xC8,
+        0x54, 0x6A, 0x93, 0xE1, 0x8D, 0x31, 0xE8, 0x3E, 0x3E, 0xDF,
+        0xBC, 0x87, 0x02, 0x30, 0x22, 0x57, 0xC4, 0xE0, 0x18, 0x7A,
+        0xD3, 0xAE, 0xE4, 0x02, 0x9B, 0xAA, 0xBD, 0x4E, 0x49, 0x47,
+        0x72, 0xE9, 0x8D, 0x13, 0x2D, 0x54, 0x9B, 0x00, 0xA7, 0x91,
+        0x61, 0x71, 0xC9, 0xCC, 0x48, 0x4F, 0xEE, 0xDF, 0x5E, 0x1B,
+        0x1A, 0xDF, 0x67, 0xD3, 0x20, 0xE6, 0x44, 0x45, 0x98, 0x7E,
+        0xE7, 0x0E, 0x63, 0x16, 0x83, 0xC9, 0x26, 0x5D, 0x90, 0xC1,
+        0xE5, 0x2A, 0x5C, 0x45, 0x54, 0x13, 0xB2, 0x81, 0x18, 0x06,
+        0x20, 0x2E, 0x2E, 0x66, 0x5A, 0xB5, 0x7B, 0x6E, 0xD6, 0x0C,
+        0x4E, 0x89, 0x01, 0x56, 0x70, 0xBB, 0xAE, 0xDE, 0xE9, 0x99,
+        0x5E, 0xD1, 0xB9, 0x3A, 0xB7, 0x6C, 0x17, 0xB6, 0x03, 0xA9,
+        0x08, 0xDD, 0x9C, 0xF4, 0x14, 0xC9, 0xC9, 0x59, 0x39, 0x72,
+        0xD4, 0x7E, 0x02, 0x37, 0x31, 0xCD, 0x0E, 0xA7, 0x3D, 0xF8,
+        0xF2, 0xCF, 0x6B, 0x15, 0xAB, 0x02, 0x03, 0x01, 0x00, 0x01
+
+};
+static const int sizeof_client_keypub_der_4096 = sizeof(client_keypub_der_4096);
+
+/* ./certs/4096/client-cert.der, 4096-bit */
+static const unsigned char client_cert_der_4096[] =
+{
+        0x30, 0x82, 0x06, 0xE0, 0x30, 0x82, 0x04, 0xC8, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x14, 0x2F, 0x0F, 0xAB, 0x23, 0xBC,
+        0xA3, 0x14, 0x07, 0x91, 0x06, 0x55, 0x35, 0x01, 0x63, 0x7F,
+        0x42, 0xBD, 0xFB, 0xF2, 0x43, 0x30, 0x0D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00,
+        0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55,
+        0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E,
+        0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D,
+        0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06, 0x03, 0x55, 0x04,
+        0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C,
+        0x5F, 0x34, 0x30, 0x39, 0x36, 0x31, 0x19, 0x30, 0x17, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50, 0x72, 0x6F, 0x67,
+        0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67, 0x2D, 0x34, 0x30,
+        0x39, 0x36, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x39, 0x30, 0x37, 0x30,
+        0x39, 0x30, 0x33, 0x30, 0x36, 0x30, 0x32, 0x5A, 0x17, 0x0D,
+        0x32, 0x32, 0x30, 0x34, 0x30, 0x34, 0x30, 0x33, 0x30, 0x36,
+        0x30, 0x32, 0x5A, 0x30, 0x81, 0x9E, 0x31, 0x0B, 0x30, 0x09,
+        0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07,
+        0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F,
+        0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30, 0x13, 0x06,
+        0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F, 0x6C, 0x66,
+        0x53, 0x53, 0x4C, 0x5F, 0x34, 0x30, 0x39, 0x36, 0x31, 0x19,
+        0x30, 0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x10, 0x50,
+        0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69, 0x6E, 0x67,
+        0x2D, 0x34, 0x30, 0x39, 0x36, 0x31, 0x18, 0x30, 0x16, 0x06,
+        0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48,
+        0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E,
+        0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x82, 0x02, 0x22, 0x30, 0x0D,
+        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+        0x01, 0x05, 0x00, 0x03, 0x82, 0x02, 0x0F, 0x00, 0x30, 0x82,
+        0x02, 0x0A, 0x02, 0x82, 0x02, 0x01, 0x00, 0xF5, 0xD0, 0x31,
+        0xE4, 0x71, 0x59, 0x58, 0xB3, 0x07, 0x50, 0xDD, 0x16, 0x79,
+        0xFC, 0xC6, 0x95, 0x50, 0xFC, 0x46, 0x0E, 0x57, 0x12, 0x86,
+        0x71, 0x8D, 0xE3, 0x9B, 0x4A, 0x33, 0xEA, 0x4F, 0xD9, 0x17,
+        0x13, 0x6D, 0x48, 0x69, 0xDF, 0x59, 0x11, 0x08, 0x02, 0x9D,
+        0xAF, 0x2B, 0xC7, 0x30, 0xBE, 0x0C, 0xDC, 0x87, 0xD4, 0x5A,
+        0x12, 0x09, 0x23, 0x5D, 0xE1, 0x76, 0x5A, 0x62, 0x37, 0x46,
+        0x74, 0xEF, 0x03, 0x05, 0xBB, 0x1E, 0x6D, 0x29, 0x75, 0x6C,
+        0x2E, 0x9D, 0x87, 0x0D, 0x8F, 0x87, 0xCB, 0x14, 0x95, 0x9B,
+        0xBE, 0x17, 0x6B, 0x51, 0xD1, 0x4C, 0xDA, 0xD7, 0x91, 0x66,
+        0xC5, 0x36, 0xEB, 0xE0, 0x07, 0x1A, 0x76, 0x4D, 0xB0, 0xFB,
+        0xC1, 0xF5, 0x5E, 0x05, 0xDB, 0xBA, 0xCB, 0x25, 0xD9, 0x99,
+        0x13, 0x1C, 0xC0, 0x35, 0xDC, 0x40, 0xE9, 0x36, 0xCD, 0xC4,
+        0xD5, 0x7A, 0x41, 0x70, 0x0F, 0x36, 0xEB, 0xA5, 0x4E, 0x17,
+        0x05, 0xD5, 0x75, 0x1B, 0x64, 0x62, 0x7A, 0x3F, 0x0D, 0x28,
+        0x48, 0x6A, 0xE3, 0xAC, 0x9C, 0xA8, 0x8F, 0xE9, 0xED, 0xF7,
+        0xCD, 0x24, 0xA0, 0xB1, 0xA0, 0x03, 0xAC, 0xE3, 0x03, 0xF5,
+        0x3F, 0xD1, 0x96, 0xFF, 0x2A, 0x7E, 0x08, 0xB1, 0xD3, 0xE0,
+        0x18, 0x14, 0xEC, 0x65, 0x37, 0x50, 0x43, 0xC2, 0x6A, 0x8C,
+        0xF4, 0x5B, 0xFE, 0xC4, 0xCB, 0x8D, 0x3F, 0x81, 0x02, 0xF7,
+        0xC2, 0xDD, 0xE4, 0xC1, 0x8E, 0x80, 0x0C, 0x04, 0x25, 0x2D,
+        0x80, 0x5A, 0x2E, 0x0F, 0x22, 0x35, 0x4A, 0xF4, 0x85, 0xED,
+        0x51, 0xD8, 0xAB, 0x6D, 0x8F, 0xA2, 0x3B, 0x24, 0x00, 0x6E,
+        0x81, 0xE2, 0x1E, 0x76, 0xD6, 0xAC, 0x31, 0x12, 0xDB, 0xF3,
+        0x8E, 0x07, 0xA1, 0xDE, 0x89, 0x4A, 0x39, 0x60, 0x77, 0xC5,
+        0xAA, 0xF1, 0x51, 0xE6, 0x06, 0xF1, 0x95, 0x56, 0x2A, 0xE1,
+        0x8E, 0x92, 0x30, 0x9F, 0xFE, 0x58, 0x44, 0xAC, 0x46, 0xF2,
+        0xFD, 0x9A, 0xFC, 0xA8, 0x1D, 0xA1, 0xD3, 0x55, 0x37, 0x4A,
+        0x8B, 0xFC, 0x9C, 0x33, 0xF8, 0xA7, 0x61, 0x48, 0x41, 0x7C,
+        0x9C, 0x77, 0x3F, 0xF5, 0x80, 0x23, 0x7D, 0x43, 0xB4, 0xD5,
+        0x88, 0x0A, 0xC9, 0x75, 0xD7, 0x44, 0x19, 0x4D, 0x77, 0x6C,
+        0x0B, 0x0A, 0x49, 0xAA, 0x1C, 0x2F, 0xD6, 0x5A, 0x44, 0xA6,
+        0x47, 0x4D, 0xE5, 0x36, 0x96, 0x40, 0x99, 0x2C, 0x56, 0x26,
+        0xB1, 0xF2, 0x92, 0x31, 0x59, 0xD7, 0x2C, 0xD4, 0xB4, 0x21,
+        0xD6, 0x65, 0x13, 0x0B, 0x3E, 0xFB, 0xFF, 0x04, 0xEB, 0xB9,
+        0x85, 0xB9, 0xD8, 0xD8, 0x28, 0x4F, 0x5C, 0x17, 0x96, 0xA3,
+        0x51, 0xBE, 0xFE, 0x7D, 0x0B, 0x1B, 0x48, 0x40, 0x25, 0x76,
+        0x94, 0xDC, 0x41, 0xFB, 0xBF, 0x73, 0x76, 0xDA, 0xEB, 0xB3,
+        0x62, 0xE7, 0xC1, 0xC8, 0x54, 0x6A, 0x93, 0xE1, 0x8D, 0x31,
+        0xE8, 0x3E, 0x3E, 0xDF, 0xBC, 0x87, 0x02, 0x30, 0x22, 0x57,
+        0xC4, 0xE0, 0x18, 0x7A, 0xD3, 0xAE, 0xE4, 0x02, 0x9B, 0xAA,
+        0xBD, 0x4E, 0x49, 0x47, 0x72, 0xE9, 0x8D, 0x13, 0x2D, 0x54,
+        0x9B, 0x00, 0xA7, 0x91, 0x61, 0x71, 0xC9, 0xCC, 0x48, 0x4F,
+        0xEE, 0xDF, 0x5E, 0x1B, 0x1A, 0xDF, 0x67, 0xD3, 0x20, 0xE6,
+        0x44, 0x45, 0x98, 0x7E, 0xE7, 0x0E, 0x63, 0x16, 0x83, 0xC9,
+        0x26, 0x5D, 0x90, 0xC1, 0xE5, 0x2A, 0x5C, 0x45, 0x54, 0x13,
+        0xB2, 0x81, 0x18, 0x06, 0x20, 0x2E, 0x2E, 0x66, 0x5A, 0xB5,
+        0x7B, 0x6E, 0xD6, 0x0C, 0x4E, 0x89, 0x01, 0x56, 0x70, 0xBB,
+        0xAE, 0xDE, 0xE9, 0x99, 0x5E, 0xD1, 0xB9, 0x3A, 0xB7, 0x6C,
+        0x17, 0xB6, 0x03, 0xA9, 0x08, 0xDD, 0x9C, 0xF4, 0x14, 0xC9,
+        0xC9, 0x59, 0x39, 0x72, 0xD4, 0x7E, 0x02, 0x37, 0x31, 0xCD,
+        0x0E, 0xA7, 0x3D, 0xF8, 0xF2, 0xCF, 0x6B, 0x15, 0xAB, 0x02,
+        0x03, 0x01, 0x00, 0x01, 0xA3, 0x82, 0x01, 0x12, 0x30, 0x82,
+        0x01, 0x0E, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04,
+        0x16, 0x04, 0x14, 0xFA, 0x54, 0x89, 0x67, 0xE5, 0x5F, 0xB7,
+        0x31, 0x40, 0xEA, 0xFD, 0xE7, 0xF6, 0xA3, 0xC6, 0x5A, 0x56,
+        0x16, 0xA5, 0x6E, 0x30, 0x81, 0xDE, 0x06, 0x03, 0x55, 0x1D,
+        0x23, 0x04, 0x81, 0xD6, 0x30, 0x81, 0xD3, 0x80, 0x14, 0xFA,
+        0x54, 0x89, 0x67, 0xE5, 0x5F, 0xB7, 0x31, 0x40, 0xEA, 0xFD,
+        0xE7, 0xF6, 0xA3, 0xC6, 0x5A, 0x56, 0x16, 0xA5, 0x6E, 0xA1,
+        0x81, 0xA4, 0xA4, 0x81, 0xA1, 0x30, 0x81, 0x9E, 0x31, 0x0B,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55,
+        0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08,
+        0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07,
+        0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x15, 0x30,
+        0x13, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0C, 0x77, 0x6F,
+        0x6C, 0x66, 0x53, 0x53, 0x4C, 0x5F, 0x34, 0x30, 0x39, 0x36,
+        0x31, 0x19, 0x30, 0x17, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C,
+        0x10, 0x50, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x6D, 0x69,
+        0x6E, 0x67, 0x2D, 0x34, 0x30, 0x39, 0x36, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x14, 0x2F, 0x0F,
+        0xAB, 0x23, 0xBC, 0xA3, 0x14, 0x07, 0x91, 0x06, 0x55, 0x35,
+        0x01, 0x63, 0x7F, 0x42, 0xBD, 0xFB, 0xF2, 0x43, 0x30, 0x0C,
+        0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01,
+        0x01, 0xFF, 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x02,
+        0x01, 0x00, 0x57, 0x0D, 0x97, 0x98, 0x78, 0xBF, 0x2A, 0x31,
+        0x9A, 0x39, 0x41, 0x38, 0x33, 0x46, 0xD5, 0x50, 0x47, 0xE8,
+        0x19, 0x62, 0xA8, 0x36, 0x1E, 0xB7, 0xFD, 0xD1, 0xBC, 0x50,
+        0x5C, 0x3A, 0xEB, 0x96, 0x1A, 0x9B, 0x43, 0xB0, 0x67, 0x5D,
+        0xF4, 0x51, 0x77, 0x87, 0x33, 0x0B, 0x90, 0x6F, 0xE8, 0xD3,
+        0x82, 0x4D, 0x1A, 0xAA, 0x93, 0x5F, 0x7D, 0x78, 0xB1, 0xE0,
+        0x7B, 0xEE, 0x88, 0x01, 0xE7, 0xB3, 0xFA, 0x7E, 0x0B, 0x76,
+        0x9C, 0x9E, 0x81, 0x36, 0xE4, 0xA3, 0xC1, 0x41, 0x62, 0xA4,
+        0x0A, 0x7E, 0x24, 0xD0, 0xAB, 0x9F, 0xBA, 0xD8, 0x1E, 0x38,
+        0xAD, 0xF1, 0x12, 0x52, 0x0D, 0xF2, 0x96, 0x8A, 0x0B, 0x25,
+        0xA2, 0x49, 0x3F, 0x88, 0x5B, 0xEA, 0x23, 0x87, 0x26, 0x22,
+        0x7A, 0xB9, 0x60, 0x6B, 0xD6, 0x7A, 0x88, 0x37, 0xAC, 0x64,
+        0x9B, 0x18, 0x51, 0x07, 0xEA, 0xDF, 0x00, 0x96, 0x70, 0x95,
+        0x88, 0x9D, 0x8F, 0xAF, 0xBE, 0x3C, 0x4E, 0xC7, 0x5E, 0x55,
+        0x15, 0x3D, 0x1F, 0xE4, 0x2D, 0xDC, 0xC9, 0xA3, 0xAE, 0xAF,
+        0xFA, 0x44, 0xA8, 0xE2, 0xF4, 0xDF, 0x8E, 0xCD, 0xF9, 0x10,
+        0x7F, 0x8B, 0x86, 0xCC, 0x6D, 0x45, 0x91, 0x91, 0x4F, 0xE3,
+        0xD0, 0xA7, 0xD2, 0xD9, 0x8E, 0x09, 0xC6, 0xF8, 0xEB, 0xE7,
+        0xBD, 0x17, 0x19, 0xD6, 0xE7, 0x1A, 0xB8, 0xCA, 0x4D, 0xEC,
+        0x34, 0x07, 0x7D, 0x2D, 0xE8, 0x23, 0x9D, 0x82, 0xE9, 0xF7,
+        0x47, 0x03, 0xAB, 0x5F, 0x7C, 0xF5, 0x41, 0x6F, 0x70, 0x11,
+        0xCB, 0x24, 0xD8, 0x23, 0xC2, 0x65, 0x31, 0xB7, 0x0B, 0x8F,
+        0x0A, 0x26, 0x5B, 0x0F, 0xF6, 0x9B, 0x11, 0x7F, 0x9A, 0x8D,
+        0x94, 0x6D, 0x5A, 0x9C, 0x5E, 0x73, 0x35, 0x15, 0x7B, 0xE3,
+        0x09, 0xE8, 0x08, 0xD0, 0x3F, 0xB4, 0xE5, 0x29, 0x2C, 0xF6,
+        0x3E, 0x71, 0x6E, 0xF4, 0x1B, 0x20, 0x55, 0x34, 0x40, 0x2F,
+        0xB0, 0x9B, 0xDD, 0xF1, 0xDC, 0xBF, 0x17, 0x1D, 0xA7, 0x2D,
+        0x85, 0x01, 0xD6, 0xD2, 0xB2, 0x56, 0x56, 0x98, 0x33, 0x85,
+        0xED, 0xF6, 0xA3, 0xF6, 0x3E, 0x7B, 0xF4, 0x03, 0xA4, 0x58,
+        0x8E, 0xC5, 0x5B, 0xAB, 0x66, 0xE8, 0x0F, 0x34, 0x17, 0x2D,
+        0x33, 0x36, 0x71, 0x0C, 0xB8, 0xD9, 0x78, 0xE7, 0x06, 0xFC,
+        0xDA, 0x4F, 0xA1, 0xFA, 0xDB, 0x74, 0xCE, 0xEA, 0x85, 0x27,
+        0xF9, 0x75, 0xA9, 0xAD, 0x50, 0x86, 0x6E, 0xEA, 0x01, 0x01,
+        0x19, 0x0D, 0x28, 0x4A, 0xED, 0x06, 0xBE, 0x65, 0x70, 0xB2,
+        0x06, 0x46, 0x2E, 0x16, 0x57, 0xDF, 0x55, 0xC7, 0x8E, 0xCD,
+        0x5B, 0xAD, 0x66, 0x28, 0xB8, 0x74, 0x87, 0xBF, 0xC4, 0xC7,
+        0x08, 0x3F, 0x37, 0xA3, 0x23, 0x84, 0x9F, 0x4E, 0xE8, 0x48,
+        0x6C, 0x8D, 0x54, 0x9F, 0xFB, 0xE0, 0xFB, 0x53, 0xA3, 0x41,
+        0xE1, 0x68, 0x8A, 0x94, 0xC9, 0xF5, 0xEE, 0x3E, 0x15, 0x46,
+        0xD2, 0x62, 0x33, 0x86, 0x86, 0x06, 0x34, 0xB4, 0xE4, 0x2F,
+        0xDA, 0x28, 0x2E, 0x2F, 0xC0, 0xBD, 0x75, 0xE8, 0x2C, 0x3F,
+        0xE2, 0xA5, 0x43, 0x7D, 0x02, 0xEB, 0x25, 0xB9, 0xEF, 0x87,
+        0x8A, 0xD7, 0x57, 0x61, 0x16, 0xE8, 0x9E, 0x83, 0x65, 0xF9,
+        0x10, 0xF4, 0x5E, 0x5F, 0x1C, 0x7A, 0x25, 0xD6, 0x47, 0xBD,
+        0x29, 0xC5, 0x4F, 0x8B, 0xB9, 0x6A, 0x48, 0x7A, 0x9B, 0x1E,
+        0x6D, 0x77, 0x8E, 0x72, 0x6C, 0x0C, 0x07, 0xFE, 0x4C, 0xC5,
+        0xCF, 0x55, 0x0E, 0xCB, 0x4B, 0xAD, 0x16, 0xE1, 0xE2, 0x54,
+        0xB8, 0x9D, 0x34, 0x03, 0xD1, 0x8D, 0xB7, 0x37, 0x9B, 0xE3,
+        0x5A, 0x32, 0x60, 0x03, 0x7F, 0x61, 0x0F, 0x50, 0x0B, 0x72,
+        0x54, 0x8B, 0x0D, 0xC7, 0x97, 0x7E, 0xBB, 0x9B, 0xB2, 0xF7,
+        0x73, 0x47, 0x71, 0x7B, 0x78, 0x65, 0x36, 0xDF, 0x57, 0x72,
+        0x9E, 0x42, 0x9C, 0x8A
+};
+static const int sizeof_client_cert_der_4096 = sizeof(client_cert_der_4096);
+
+/* ./certs/dh4096.der, 4096-bit */
+static const unsigned char dh_key_der_4096[] =
+{
+        0x30, 0x82, 0x02, 0x08, 0x02, 0x82, 0x02, 0x01, 0x00, 0xE9,
+        0x0E, 0x3E, 0x79, 0x4F, 0xC9, 0xB2, 0xA0, 0xB1, 0xDB, 0x2F,
+        0x1E, 0x24, 0x21, 0x90, 0x5C, 0x50, 0xA4, 0x34, 0xDB, 0x99,
+        0x90, 0xAC, 0xF7, 0xBF, 0x2F, 0x01, 0x4B, 0xAC, 0x87, 0x70,
+        0xBA, 0xEC, 0xD1, 0x64, 0xDE, 0x04, 0xCA, 0xFC, 0xF9, 0x51,
+        0x69, 0x1E, 0xB7, 0x99, 0xE2, 0xB4, 0x0D, 0xDB, 0x5D, 0x78,
+        0x38, 0x38, 0x41, 0x05, 0xE8, 0x67, 0x48, 0x65, 0x54, 0x71,
+        0xCC, 0xC9, 0xAA, 0x95, 0x1E, 0xD4, 0xBF, 0xBC, 0xCA, 0x5D,
+        0xC2, 0x9C, 0x9E, 0x7E, 0x5E, 0x94, 0x5B, 0x2F, 0x60, 0x72,
+        0xED, 0xEB, 0x54, 0x0C, 0x48, 0x2B, 0x21, 0x74, 0x4D, 0x37,
+        0x04, 0x5A, 0x2F, 0x8B, 0x24, 0x4A, 0xDB, 0xEE, 0xFA, 0xA9,
+        0x94, 0x13, 0x8F, 0x52, 0x4A, 0x1B, 0xAE, 0xE6, 0xC8, 0x7F,
+        0x99, 0x09, 0x23, 0x84, 0x89, 0xE9, 0xA6, 0x53, 0x82, 0xB6,
+        0x03, 0x6D, 0x38, 0x5D, 0x2E, 0xEB, 0x0B, 0xF0, 0xE6, 0xAA,
+        0xB1, 0x8B, 0x51, 0xFC, 0xD6, 0x13, 0xFB, 0x20, 0xCB, 0xDF,
+        0x79, 0x97, 0xDB, 0x55, 0x74, 0xC2, 0x21, 0xE8, 0xDB, 0x8C,
+        0x6A, 0x95, 0x2D, 0x51, 0x91, 0xA7, 0xA1, 0x3C, 0x9B, 0xEF,
+        0xF5, 0x43, 0xAC, 0xA6, 0x69, 0xCE, 0x66, 0x5C, 0xD5, 0xB1,
+        0xF8, 0xBA, 0xD4, 0x86, 0x25, 0x29, 0x2E, 0x0E, 0x23, 0x05,
+        0xDA, 0x7C, 0x7C, 0xC2, 0x7B, 0xC8, 0xB5, 0x79, 0x84, 0x6D,
+        0x68, 0x2D, 0x82, 0x4A, 0x35, 0x9F, 0xDC, 0x0E, 0x63, 0x2B,
+        0x58, 0x5F, 0x34, 0x7E, 0xA8, 0x73, 0xCE, 0x44, 0x53, 0x11,
+        0xE3, 0xDB, 0x46, 0xFA, 0x3A, 0xC3, 0xDA, 0x63, 0xA5, 0x65,
+        0x56, 0x99, 0xA5, 0x91, 0x27, 0xD6, 0xE7, 0xDF, 0x2D, 0xEF,
+        0xA0, 0x81, 0xB6, 0x07, 0x3A, 0xC6, 0xC1, 0x2B, 0xA1, 0x3A,
+        0x74, 0xB4, 0xE9, 0xE1, 0x2F, 0x6B, 0x2B, 0xE4, 0xF0, 0x98,
+        0xBE, 0x6F, 0xCB, 0xBB, 0xAE, 0x8D, 0xD2, 0x7E, 0x1B, 0x6F,
+        0xBA, 0xF2, 0xB2, 0xB8, 0xB1, 0x5D, 0x9E, 0x79, 0x19, 0xF7,
+        0x94, 0xB2, 0xC1, 0x17, 0x5E, 0x9B, 0xB3, 0x05, 0x67, 0x6D,
+        0x5C, 0x62, 0x64, 0xA8, 0x2B, 0xB0, 0x36, 0x3D, 0xF9, 0x4C,
+        0x65, 0x53, 0xEE, 0x2E, 0x55, 0x69, 0xCC, 0x1C, 0xF5, 0x96,
+        0xDC, 0xBE, 0x60, 0x5E, 0x37, 0xEE, 0xD4, 0x63, 0x96, 0x51,
+        0x97, 0x96, 0x14, 0x3C, 0x61, 0xBF, 0x53, 0xAA, 0x24, 0xB5,
+        0x24, 0x5B, 0x26, 0x67, 0xAD, 0x02, 0x67, 0xB8, 0xD3, 0x05,
+        0x6E, 0xA4, 0x8F, 0x46, 0x91, 0x9D, 0x84, 0xA6, 0x2C, 0x44,
+        0x9F, 0x2D, 0x18, 0x2F, 0x73, 0xA5, 0xE5, 0xC4, 0xD9, 0x4F,
+        0xD9, 0x9F, 0xF5, 0xC0, 0xC5, 0x48, 0xE8, 0x23, 0x32, 0xC4,
+        0x4A, 0xCE, 0xFF, 0x3B, 0x16, 0x87, 0x85, 0xA5, 0x1F, 0x22,
+        0xA8, 0x0B, 0x91, 0x97, 0x24, 0x95, 0x07, 0xC8, 0x73, 0xD2,
+        0xB0, 0x01, 0xF8, 0x20, 0xA9, 0xAB, 0x6B, 0x71, 0x79, 0x24,
+        0xF3, 0x79, 0xB5, 0x9B, 0x00, 0xF5, 0xF9, 0xAE, 0x23, 0xAC,
+        0xEA, 0xE1, 0x48, 0x88, 0x28, 0x53, 0xE0, 0xC8, 0x76, 0x29,
+        0xAE, 0x3E, 0x25, 0x9F, 0x1C, 0xC5, 0x8A, 0x86, 0x33, 0x02,
+        0x21, 0xAB, 0xA5, 0x10, 0xF0, 0x07, 0x1B, 0x56, 0x8F, 0xCD,
+        0xFC, 0x87, 0x9E, 0x2E, 0xD0, 0x44, 0x98, 0x44, 0x99, 0xB3,
+        0xC2, 0x14, 0xCE, 0xD8, 0x93, 0xEA, 0xD1, 0x82, 0x3C, 0x1B,
+        0x49, 0xE8, 0x6F, 0x04, 0xB2, 0xF5, 0xAF, 0x9B, 0x37, 0x7D,
+        0xE5, 0xE0, 0x56, 0xE9, 0xEE, 0x00, 0x58, 0x25, 0x16, 0x23,
+        0xC3, 0x8E, 0xF0, 0xB9, 0xE2, 0x98, 0x5D, 0xF2, 0x4F, 0x5C,
+        0xC3, 0x27, 0x2A, 0x67, 0x7D, 0x43, 0xF6, 0x36, 0x76, 0xD4,
+        0x2C, 0x7E, 0x16, 0x80, 0xCB, 0xF1, 0x07, 0xDC, 0xB9, 0xF5,
+        0xF3, 0x56, 0xBD, 0xF0, 0xFC, 0x00, 0x78, 0x00, 0x56, 0xB4,
+        0x3B, 0x02, 0x01, 0x02
+};
+static const int sizeof_dh_key_der_4096 = sizeof(dh_key_der_4096);
+
+#endif /* USE_CERT_BUFFERS_4096 */
+
 #if defined(HAVE_ECC) && defined(USE_CERT_BUFFERS_256)
 
 /* ./certs/ecc-client-key.der, ECC */
 static const unsigned char ecc_clikey_der_256[] =
 {
-	0x30, 0x77, 0x02, 0x01, 0x01, 0x04, 0x20, 0xF8, 0xCF, 0x92, 
-	0x6B, 0xBD, 0x1E, 0x28, 0xF1, 0xA8, 0xAB, 0xA1, 0x23, 0x4F, 
-	0x32, 0x74, 0x18, 0x88, 0x50, 0xAD, 0x7E, 0xC7, 0xEC, 0x92, 
-	0xF8, 0x8F, 0x97, 0x4D, 0xAF, 0x56, 0x89, 0x65, 0xC7, 0xA0, 
-	0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 
-	0x07, 0xA1, 0x44, 0x03, 0x42, 0x00, 0x04, 0x55, 0xBF, 0xF4, 
-	0x0F, 0x44, 0x50, 0x9A, 0x3D, 0xCE, 0x9B, 0xB7, 0xF0, 0xC5, 
-	0x4D, 0xF5, 0x70, 0x7B, 0xD4, 0xEC, 0x24, 0x8E, 0x19, 0x80, 
-	0xEC, 0x5A, 0x4C, 0xA2, 0x24, 0x03, 0x62, 0x2C, 0x9B, 0xDA, 
-	0xEF, 0xA2, 0x35, 0x12, 0x43, 0x84, 0x76, 0x16, 0xC6, 0x56, 
-	0x95, 0x06, 0xCC, 0x01, 0xA9, 0xBD, 0xF6, 0x75, 0x1A, 0x42, 
-	0xF7, 0xBD, 0xA9, 0xB2, 0x36, 0x22, 0x5F, 0xC7, 0x5D, 0x7F, 
-	0xB4
+        0x30, 0x77, 0x02, 0x01, 0x01, 0x04, 0x20, 0xF8, 0xCF, 0x92,
+        0x6B, 0xBD, 0x1E, 0x28, 0xF1, 0xA8, 0xAB, 0xA1, 0x23, 0x4F,
+        0x32, 0x74, 0x18, 0x88, 0x50, 0xAD, 0x7E, 0xC7, 0xEC, 0x92,
+        0xF8, 0x8F, 0x97, 0x4D, 0xAF, 0x56, 0x89, 0x65, 0xC7, 0xA0,
+        0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01,
+        0x07, 0xA1, 0x44, 0x03, 0x42, 0x00, 0x04, 0x55, 0xBF, 0xF4,
+        0x0F, 0x44, 0x50, 0x9A, 0x3D, 0xCE, 0x9B, 0xB7, 0xF0, 0xC5,
+        0x4D, 0xF5, 0x70, 0x7B, 0xD4, 0xEC, 0x24, 0x8E, 0x19, 0x80,
+        0xEC, 0x5A, 0x4C, 0xA2, 0x24, 0x03, 0x62, 0x2C, 0x9B, 0xDA,
+        0xEF, 0xA2, 0x35, 0x12, 0x43, 0x84, 0x76, 0x16, 0xC6, 0x56,
+        0x95, 0x06, 0xCC, 0x01, 0xA9, 0xBD, 0xF6, 0x75, 0x1A, 0x42,
+        0xF7, 0xBD, 0xA9, 0xB2, 0x36, 0x22, 0x5F, 0xC7, 0x5D, 0x7F,
+        0xB4
 };
 static const int sizeof_ecc_clikey_der_256 = sizeof(ecc_clikey_der_256);
 
 /* ./certs/ecc-client-keyPub.der, ECC */
 static const unsigned char ecc_clikeypub_der_256[] =
 {
-	0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 
-	0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 
-	0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0x55, 0xBF, 0xF4, 
-	0x0F, 0x44, 0x50, 0x9A, 0x3D, 0xCE, 0x9B, 0xB7, 0xF0, 0xC5, 
-	0x4D, 0xF5, 0x70, 0x7B, 0xD4, 0xEC, 0x24, 0x8E, 0x19, 0x80, 
-	0xEC, 0x5A, 0x4C, 0xA2, 0x24, 0x03, 0x62, 0x2C, 0x9B, 0xDA, 
-	0xEF, 0xA2, 0x35, 0x12, 0x43, 0x84, 0x76, 0x16, 0xC6, 0x56, 
-	0x95, 0x06, 0xCC, 0x01, 0xA9, 0xBD, 0xF6, 0x75, 0x1A, 0x42, 
-	0xF7, 0xBD, 0xA9, 0xB2, 0x36, 0x22, 0x5F, 0xC7, 0x5D, 0x7F, 
-	0xB4
+        0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE,
+        0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D,
+        0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0x55, 0xBF, 0xF4,
+        0x0F, 0x44, 0x50, 0x9A, 0x3D, 0xCE, 0x9B, 0xB7, 0xF0, 0xC5,
+        0x4D, 0xF5, 0x70, 0x7B, 0xD4, 0xEC, 0x24, 0x8E, 0x19, 0x80,
+        0xEC, 0x5A, 0x4C, 0xA2, 0x24, 0x03, 0x62, 0x2C, 0x9B, 0xDA,
+        0xEF, 0xA2, 0x35, 0x12, 0x43, 0x84, 0x76, 0x16, 0xC6, 0x56,
+        0x95, 0x06, 0xCC, 0x01, 0xA9, 0xBD, 0xF6, 0x75, 0x1A, 0x42,
+        0xF7, 0xBD, 0xA9, 0xB2, 0x36, 0x22, 0x5F, 0xC7, 0x5D, 0x7F,
+        0xB4
 };
 static const int sizeof_ecc_clikeypub_der_256 = sizeof(ecc_clikeypub_der_256);
 
 /* ./certs/client-ecc-cert.der, ECC */
 static const unsigned char cliecc_cert_der_256[] =
 {
-	0x30, 0x82, 0x03, 0x08, 0x30, 0x82, 0x02, 0xAF, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x93, 0xBF, 0x6A, 0xDE, 
-	0x9B, 0x41, 0x9D, 0xAD, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86, 
-	0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 0x81, 0x8D, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x0F, 0x30, 0x0D, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x06, 0x4F, 0x72, 0x65, 0x67, 0x6F, 0x6E, 0x31, 
-	0x0E, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x05, 
-	0x53, 0x61, 0x6C, 0x65, 0x6D, 0x31, 0x13, 0x30, 0x11, 0x06, 
-	0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0A, 0x43, 0x6C, 0x69, 0x65, 
-	0x6E, 0x74, 0x20, 0x45, 0x43, 0x43, 0x31, 0x0D, 0x30, 0x0B, 
-	0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x04, 0x46, 0x61, 0x73, 
-	0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 
-	0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 
-	0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 
-	0x30, 0x5A, 0x30, 0x81, 0x8D, 0x31, 0x0B, 0x30, 0x09, 0x06, 
-	0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x0F, 
-	0x30, 0x0D, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x06, 0x4F, 
-	0x72, 0x65, 0x67, 0x6F, 0x6E, 0x31, 0x0E, 0x30, 0x0C, 0x06, 
-	0x03, 0x55, 0x04, 0x07, 0x0C, 0x05, 0x53, 0x61, 0x6C, 0x65, 
-	0x6D, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x0A, 0x43, 0x6C, 0x69, 0x65, 0x6E, 0x74, 0x20, 0x45, 
-	0x43, 0x43, 0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 
-	0x0B, 0x0C, 0x04, 0x46, 0x61, 0x73, 0x74, 0x31, 0x18, 0x30, 
-	0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 
-	0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 
-	0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 
-	0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x59, 0x30, 0x13, 
-	0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 
-	0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03, 
-	0x42, 0x00, 0x04, 0x55, 0xBF, 0xF4, 0x0F, 0x44, 0x50, 0x9A, 
-	0x3D, 0xCE, 0x9B, 0xB7, 0xF0, 0xC5, 0x4D, 0xF5, 0x70, 0x7B, 
-	0xD4, 0xEC, 0x24, 0x8E, 0x19, 0x80, 0xEC, 0x5A, 0x4C, 0xA2, 
-	0x24, 0x03, 0x62, 0x2C, 0x9B, 0xDA, 0xEF, 0xA2, 0x35, 0x12, 
-	0x43, 0x84, 0x76, 0x16, 0xC6, 0x56, 0x95, 0x06, 0xCC, 0x01, 
-	0xA9, 0xBD, 0xF6, 0x75, 0x1A, 0x42, 0xF7, 0xBD, 0xA9, 0xB2, 
-	0x36, 0x22, 0x5F, 0xC7, 0x5D, 0x7F, 0xB4, 0xA3, 0x81, 0xF5, 
-	0x30, 0x81, 0xF2, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 
-	0x04, 0x16, 0x04, 0x14, 0xEB, 0xD4, 0x4B, 0x59, 0x6B, 0x95, 
-	0x61, 0x3F, 0x51, 0x57, 0xB6, 0x04, 0x4D, 0x89, 0x41, 0x88, 
-	0x44, 0x5C, 0xAB, 0xF2, 0x30, 0x81, 0xC2, 0x06, 0x03, 0x55, 
-	0x1D, 0x23, 0x04, 0x81, 0xBA, 0x30, 0x81, 0xB7, 0x80, 0x14, 
-	0xEB, 0xD4, 0x4B, 0x59, 0x6B, 0x95, 0x61, 0x3F, 0x51, 0x57, 
-	0xB6, 0x04, 0x4D, 0x89, 0x41, 0x88, 0x44, 0x5C, 0xAB, 0xF2, 
-	0xA1, 0x81, 0x93, 0xA4, 0x81, 0x90, 0x30, 0x81, 0x8D, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x0F, 0x30, 0x0D, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x06, 0x4F, 0x72, 0x65, 0x67, 0x6F, 0x6E, 0x31, 
-	0x0E, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x05, 
-	0x53, 0x61, 0x6C, 0x65, 0x6D, 0x31, 0x13, 0x30, 0x11, 0x06, 
-	0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0A, 0x43, 0x6C, 0x69, 0x65, 
-	0x6E, 0x74, 0x20, 0x45, 0x43, 0x43, 0x31, 0x0D, 0x30, 0x0B, 
-	0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x04, 0x46, 0x61, 0x73, 
-	0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x82, 0x09, 0x00, 0x93, 0xBF, 0x6A, 0xDE, 0x9B, 0x41, 0x9D, 
-	0xAD, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 
-	0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x0A, 0x06, 0x08, 0x2A, 
-	0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x03, 0x47, 0x00, 
-	0x30, 0x44, 0x02, 0x20, 0x61, 0xBC, 0x9D, 0x4D, 0x88, 0x64, 
-	0x86, 0xB8, 0x71, 0xAA, 0x35, 0x59, 0x68, 0xB8, 0xEE, 0x2C, 
-	0xF3, 0x23, 0xB5, 0x1A, 0xB9, 0xBA, 0x41, 0x50, 0xA8, 0xC6, 
-	0xC3, 0x58, 0xEB, 0x58, 0xBD, 0x60, 0x02, 0x20, 0x61, 0xAA, 
-	0xEB, 0xB5, 0x73, 0x0D, 0x01, 0xDB, 0x69, 0x8F, 0x52, 0xF5, 
-	0x72, 0x6D, 0x37, 0x42, 0xB5, 0xFD, 0x94, 0xB6, 0x6E, 0xB1, 
-	0xC4, 0x25, 0x2E, 0x96, 0x96, 0xF3, 0x39, 0xB2, 0x5D, 0xEA
+        0x30, 0x82, 0x03, 0x08, 0x30, 0x82, 0x02, 0xAF, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x93, 0xBF, 0x6A, 0xDE,
+        0x9B, 0x41, 0x9D, 0xAD, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86,
+        0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 0x81, 0x8D, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x0F, 0x30, 0x0D, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x06, 0x4F, 0x72, 0x65, 0x67, 0x6F, 0x6E, 0x31,
+        0x0E, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x05,
+        0x53, 0x61, 0x6C, 0x65, 0x6D, 0x31, 0x13, 0x30, 0x11, 0x06,
+        0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0A, 0x43, 0x6C, 0x69, 0x65,
+        0x6E, 0x74, 0x20, 0x45, 0x43, 0x43, 0x31, 0x0D, 0x30, 0x0B,
+        0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x04, 0x46, 0x61, 0x73,
+        0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33,
+        0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32,
+        0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31,
+        0x30, 0x5A, 0x30, 0x81, 0x8D, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x0F,
+        0x30, 0x0D, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x06, 0x4F,
+        0x72, 0x65, 0x67, 0x6F, 0x6E, 0x31, 0x0E, 0x30, 0x0C, 0x06,
+        0x03, 0x55, 0x04, 0x07, 0x0C, 0x05, 0x53, 0x61, 0x6C, 0x65,
+        0x6D, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x0A, 0x43, 0x6C, 0x69, 0x65, 0x6E, 0x74, 0x20, 0x45,
+        0x43, 0x43, 0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x04, 0x46, 0x61, 0x73, 0x74, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x59, 0x30, 0x13,
+        0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06,
+        0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03,
+        0x42, 0x00, 0x04, 0x55, 0xBF, 0xF4, 0x0F, 0x44, 0x50, 0x9A,
+        0x3D, 0xCE, 0x9B, 0xB7, 0xF0, 0xC5, 0x4D, 0xF5, 0x70, 0x7B,
+        0xD4, 0xEC, 0x24, 0x8E, 0x19, 0x80, 0xEC, 0x5A, 0x4C, 0xA2,
+        0x24, 0x03, 0x62, 0x2C, 0x9B, 0xDA, 0xEF, 0xA2, 0x35, 0x12,
+        0x43, 0x84, 0x76, 0x16, 0xC6, 0x56, 0x95, 0x06, 0xCC, 0x01,
+        0xA9, 0xBD, 0xF6, 0x75, 0x1A, 0x42, 0xF7, 0xBD, 0xA9, 0xB2,
+        0x36, 0x22, 0x5F, 0xC7, 0x5D, 0x7F, 0xB4, 0xA3, 0x81, 0xF5,
+        0x30, 0x81, 0xF2, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E,
+        0x04, 0x16, 0x04, 0x14, 0xEB, 0xD4, 0x4B, 0x59, 0x6B, 0x95,
+        0x61, 0x3F, 0x51, 0x57, 0xB6, 0x04, 0x4D, 0x89, 0x41, 0x88,
+        0x44, 0x5C, 0xAB, 0xF2, 0x30, 0x81, 0xC2, 0x06, 0x03, 0x55,
+        0x1D, 0x23, 0x04, 0x81, 0xBA, 0x30, 0x81, 0xB7, 0x80, 0x14,
+        0xEB, 0xD4, 0x4B, 0x59, 0x6B, 0x95, 0x61, 0x3F, 0x51, 0x57,
+        0xB6, 0x04, 0x4D, 0x89, 0x41, 0x88, 0x44, 0x5C, 0xAB, 0xF2,
+        0xA1, 0x81, 0x93, 0xA4, 0x81, 0x90, 0x30, 0x81, 0x8D, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x0F, 0x30, 0x0D, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x06, 0x4F, 0x72, 0x65, 0x67, 0x6F, 0x6E, 0x31,
+        0x0E, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x05,
+        0x53, 0x61, 0x6C, 0x65, 0x6D, 0x31, 0x13, 0x30, 0x11, 0x06,
+        0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0A, 0x43, 0x6C, 0x69, 0x65,
+        0x6E, 0x74, 0x20, 0x45, 0x43, 0x43, 0x31, 0x0D, 0x30, 0x0B,
+        0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x04, 0x46, 0x61, 0x73,
+        0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x82, 0x09, 0x00, 0x93, 0xBF, 0x6A, 0xDE, 0x9B, 0x41, 0x9D,
+        0xAD, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05,
+        0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x0A, 0x06, 0x08, 0x2A,
+        0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x03, 0x47, 0x00,
+        0x30, 0x44, 0x02, 0x20, 0x61, 0xBC, 0x9D, 0x4D, 0x88, 0x64,
+        0x86, 0xB8, 0x71, 0xAA, 0x35, 0x59, 0x68, 0xB8, 0xEE, 0x2C,
+        0xF3, 0x23, 0xB5, 0x1A, 0xB9, 0xBA, 0x41, 0x50, 0xA8, 0xC6,
+        0xC3, 0x58, 0xEB, 0x58, 0xBD, 0x60, 0x02, 0x20, 0x61, 0xAA,
+        0xEB, 0xB5, 0x73, 0x0D, 0x01, 0xDB, 0x69, 0x8F, 0x52, 0xF5,
+        0x72, 0x6D, 0x37, 0x42, 0xB5, 0xFD, 0x94, 0xB6, 0x6E, 0xB1,
+        0xC4, 0x25, 0x2E, 0x96, 0x96, 0xF3, 0x39, 0xB2, 0x5D, 0xEA
 
 };
 static const int sizeof_cliecc_cert_der_256 = sizeof(cliecc_cert_der_256);
@@ -2213,512 +3376,512 @@
 /* ./certs/ecc-key.der, ECC */
 static const unsigned char ecc_key_der_256[] =
 {
-	0x30, 0x77, 0x02, 0x01, 0x01, 0x04, 0x20, 0x45, 0xB6, 0x69, 
-	0x02, 0x73, 0x9C, 0x6C, 0x85, 0xA1, 0x38, 0x5B, 0x72, 0xE8, 
-	0xE8, 0xC7, 0xAC, 0xC4, 0x03, 0x8D, 0x53, 0x35, 0x04, 0xFA, 
-	0x6C, 0x28, 0xDC, 0x34, 0x8D, 0xE1, 0xA8, 0x09, 0x8C, 0xA0, 
-	0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 
-	0x07, 0xA1, 0x44, 0x03, 0x42, 0x00, 0x04, 0xBB, 0x33, 0xAC, 
-	0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 0xA5, 0x04, 0xC3, 0x3C, 
-	0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 0xCE, 0x94, 0xEA, 0x2B, 
-	0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 0x16, 0xE8, 0x61, 0x02, 
-	0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 0x9A, 0x31, 0x5B, 0x97, 
-	0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 0xDA, 0x91, 0x11, 0x02, 
-	0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 0x0B, 0x80, 0x34, 0x89, 
-	0xD8
+        0x30, 0x77, 0x02, 0x01, 0x01, 0x04, 0x20, 0x45, 0xB6, 0x69,
+        0x02, 0x73, 0x9C, 0x6C, 0x85, 0xA1, 0x38, 0x5B, 0x72, 0xE8,
+        0xE8, 0xC7, 0xAC, 0xC4, 0x03, 0x8D, 0x53, 0x35, 0x04, 0xFA,
+        0x6C, 0x28, 0xDC, 0x34, 0x8D, 0xE1, 0xA8, 0x09, 0x8C, 0xA0,
+        0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01,
+        0x07, 0xA1, 0x44, 0x03, 0x42, 0x00, 0x04, 0xBB, 0x33, 0xAC,
+        0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 0xA5, 0x04, 0xC3, 0x3C,
+        0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 0xCE, 0x94, 0xEA, 0x2B,
+        0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 0x16, 0xE8, 0x61, 0x02,
+        0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 0x9A, 0x31, 0x5B, 0x97,
+        0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 0xDA, 0x91, 0x11, 0x02,
+        0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 0x0B, 0x80, 0x34, 0x89,
+        0xD8
 };
 static const int sizeof_ecc_key_der_256 = sizeof(ecc_key_der_256);
 
 /* ./certs/ecc-keyPub.der, ECC */
 static const unsigned char ecc_key_pub_der_256[] =
 {
-	0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 
-	0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 
-	0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0xBB, 0x33, 0xAC, 
-	0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 0xA5, 0x04, 0xC3, 0x3C, 
-	0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 0xCE, 0x94, 0xEA, 0x2B, 
-	0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 0x16, 0xE8, 0x61, 0x02, 
-	0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 0x9A, 0x31, 0x5B, 0x97, 
-	0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 0xDA, 0x91, 0x11, 0x02, 
-	0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 0x0B, 0x80, 0x34, 0x89, 
-	0xD8
+        0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE,
+        0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D,
+        0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0xBB, 0x33, 0xAC,
+        0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 0xA5, 0x04, 0xC3, 0x3C,
+        0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 0xCE, 0x94, 0xEA, 0x2B,
+        0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 0x16, 0xE8, 0x61, 0x02,
+        0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 0x9A, 0x31, 0x5B, 0x97,
+        0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 0xDA, 0x91, 0x11, 0x02,
+        0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 0x0B, 0x80, 0x34, 0x89,
+        0xD8
 };
 static const int sizeof_ecc_key_pub_der_256 = sizeof(ecc_key_pub_der_256);
 
 /* ./certs/server-ecc-comp.der, ECC */
 static const unsigned char serv_ecc_comp_der_256[] =
 {
-	0x30, 0x82, 0x03, 0x23, 0x30, 0x82, 0x02, 0xCA, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x80, 0x78, 0xC9, 0xB7, 
-	0x06, 0x5A, 0xC5, 0x83, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86, 
-	0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 0x81, 0xA0, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 
-	0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0F, 0x45, 
-	0x6C, 0x6C, 0x69, 0x70, 0x74, 0x69, 0x63, 0x20, 0x2D, 0x20, 
-	0x63, 0x6F, 0x6D, 0x70, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x53, 0x65, 0x72, 0x76, 0x65, 
-	0x72, 0x20, 0x45, 0x43, 0x43, 0x2D, 0x63, 0x6F, 0x6D, 0x70, 
-	0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 
-	0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 
-	0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 
-	0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 
-	0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 
-	0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 
-	0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31, 
-	0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 
-	0x5A, 0x30, 0x81, 0xA0, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 
-	0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 
-	0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 
-	0x6D, 0x61, 0x6E, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 
-	0x04, 0x0A, 0x0C, 0x0F, 0x45, 0x6C, 0x6C, 0x69, 0x70, 0x74, 
-	0x69, 0x63, 0x20, 0x2D, 0x20, 0x63, 0x6F, 0x6D, 0x70, 0x31, 
-	0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F, 
-	0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x20, 0x45, 0x43, 0x43, 
-	0x2D, 0x63, 0x6F, 0x6D, 0x70, 0x31, 0x18, 0x30, 0x16, 0x06, 
-	0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 
-	0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 
-	0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 
-	0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 
-	0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x39, 0x30, 0x13, 0x06, 0x07, 
-	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 
-	0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03, 0x22, 0x00, 
-	0x02, 0xBB, 0x33, 0xAC, 0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 
-	0xA5, 0x04, 0xC3, 0x3C, 0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 
-	0xCE, 0x94, 0xEA, 0x2B, 0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 
-	0x16, 0xE8, 0x61, 0xA3, 0x82, 0x01, 0x09, 0x30, 0x82, 0x01, 
-	0x05, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 
-	0x04, 0x14, 0x8C, 0x38, 0x3A, 0x6B, 0xB8, 0x24, 0xB7, 0xDF, 
-	0x6E, 0xF4, 0x59, 0xAC, 0x56, 0x4E, 0xAA, 0xE2, 0x58, 0xA6, 
-	0x5A, 0x18, 0x30, 0x81, 0xD5, 0x06, 0x03, 0x55, 0x1D, 0x23, 
-	0x04, 0x81, 0xCD, 0x30, 0x81, 0xCA, 0x80, 0x14, 0x8C, 0x38, 
-	0x3A, 0x6B, 0xB8, 0x24, 0xB7, 0xDF, 0x6E, 0xF4, 0x59, 0xAC, 
-	0x56, 0x4E, 0xAA, 0xE2, 0x58, 0xA6, 0x5A, 0x18, 0xA1, 0x81, 
-	0xA6, 0xA4, 0x81, 0xA3, 0x30, 0x81, 0xA0, 0x31, 0x0B, 0x30, 
-	0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 
-	0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 
-	0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x18, 0x30, 0x16, 
-	0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0F, 0x45, 0x6C, 0x6C, 
-	0x69, 0x70, 0x74, 0x69, 0x63, 0x20, 0x2D, 0x20, 0x63, 0x6F, 
-	0x6D, 0x70, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 
-	0x0B, 0x0C, 0x0F, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x20, 
-	0x45, 0x43, 0x43, 0x2D, 0x63, 0x6F, 0x6D, 0x70, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 
-	0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 
-	0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 
-	0x80, 0x78, 0xC9, 0xB7, 0x06, 0x5A, 0xC5, 0x83, 0x30, 0x0C, 
-	0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 
-	0x01, 0xFF, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 
-	0x3D, 0x04, 0x03, 0x02, 0x03, 0x47, 0x00, 0x30, 0x44, 0x02, 
-	0x20, 0x31, 0x44, 0xD0, 0x4E, 0xD7, 0xC4, 0xB4, 0x96, 0xA3, 
-	0xE6, 0x25, 0xFD, 0xFA, 0xD6, 0x28, 0xA8, 0x67, 0x51, 0x72, 
-	0x90, 0x95, 0x31, 0xF9, 0xCD, 0x10, 0xBF, 0x11, 0xE4, 0xEC, 
-	0xB7, 0x42, 0x5B, 0x02, 0x20, 0x45, 0xDB, 0x45, 0x0A, 0x24, 
-	0x58, 0x8E, 0x2E, 0xE6, 0xEA, 0x0C, 0x6C, 0xBC, 0x72, 0x4F, 
-	0x0A, 0x1B, 0xF3, 0x2D, 0x97, 0xE9, 0xC2, 0x19, 0xF9, 0x97, 
-	0x3A, 0x60, 0xDD, 0x08, 0xD3, 0x52, 0x3E
+        0x30, 0x82, 0x03, 0x23, 0x30, 0x82, 0x02, 0xCA, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0x80, 0x78, 0xC9, 0xB7,
+        0x06, 0x5A, 0xC5, 0x83, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86,
+        0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 0x81, 0xA0, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C,
+        0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0F, 0x45,
+        0x6C, 0x6C, 0x69, 0x70, 0x74, 0x69, 0x63, 0x20, 0x2D, 0x20,
+        0x63, 0x6F, 0x6D, 0x70, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x0B, 0x0C, 0x0F, 0x53, 0x65, 0x72, 0x76, 0x65,
+        0x72, 0x20, 0x45, 0x43, 0x43, 0x2D, 0x63, 0x6F, 0x6D, 0x70,
+        0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C,
+        0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D,
+        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09,
+        0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30,
+        0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 0x31,
+        0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 0x31,
+        0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30,
+        0x5A, 0x30, 0x81, 0xA0, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03,
+        0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F,
+        0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65,
+        0x6D, 0x61, 0x6E, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55,
+        0x04, 0x0A, 0x0C, 0x0F, 0x45, 0x6C, 0x6C, 0x69, 0x70, 0x74,
+        0x69, 0x63, 0x20, 0x2D, 0x20, 0x63, 0x6F, 0x6D, 0x70, 0x31,
+        0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0F,
+        0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x20, 0x45, 0x43, 0x43,
+        0x2D, 0x63, 0x6F, 0x6D, 0x70, 0x31, 0x18, 0x30, 0x16, 0x06,
+        0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48,
+        0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E,
+        0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x39, 0x30, 0x13, 0x06, 0x07,
+        0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A,
+        0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03, 0x22, 0x00,
+        0x02, 0xBB, 0x33, 0xAC, 0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A,
+        0xA5, 0x04, 0xC3, 0x3C, 0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D,
+        0xCE, 0x94, 0xEA, 0x2B, 0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C,
+        0x16, 0xE8, 0x61, 0xA3, 0x82, 0x01, 0x09, 0x30, 0x82, 0x01,
+        0x05, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16,
+        0x04, 0x14, 0x8C, 0x38, 0x3A, 0x6B, 0xB8, 0x24, 0xB7, 0xDF,
+        0x6E, 0xF4, 0x59, 0xAC, 0x56, 0x4E, 0xAA, 0xE2, 0x58, 0xA6,
+        0x5A, 0x18, 0x30, 0x81, 0xD5, 0x06, 0x03, 0x55, 0x1D, 0x23,
+        0x04, 0x81, 0xCD, 0x30, 0x81, 0xCA, 0x80, 0x14, 0x8C, 0x38,
+        0x3A, 0x6B, 0xB8, 0x24, 0xB7, 0xDF, 0x6E, 0xF4, 0x59, 0xAC,
+        0x56, 0x4E, 0xAA, 0xE2, 0x58, 0xA6, 0x5A, 0x18, 0xA1, 0x81,
+        0xA6, 0xA4, 0x81, 0xA3, 0x30, 0x81, 0xA0, 0x31, 0x0B, 0x30,
+        0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C,
+        0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42,
+        0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x0F, 0x45, 0x6C, 0x6C,
+        0x69, 0x70, 0x74, 0x69, 0x63, 0x20, 0x2D, 0x20, 0x63, 0x6F,
+        0x6D, 0x70, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x0F, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x20,
+        0x45, 0x43, 0x43, 0x2D, 0x63, 0x6F, 0x6D, 0x70, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77,
+        0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16,
+        0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00,
+        0x80, 0x78, 0xC9, 0xB7, 0x06, 0x5A, 0xC5, 0x83, 0x30, 0x0C,
+        0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01,
+        0x01, 0xFF, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE,
+        0x3D, 0x04, 0x03, 0x02, 0x03, 0x47, 0x00, 0x30, 0x44, 0x02,
+        0x20, 0x31, 0x44, 0xD0, 0x4E, 0xD7, 0xC4, 0xB4, 0x96, 0xA3,
+        0xE6, 0x25, 0xFD, 0xFA, 0xD6, 0x28, 0xA8, 0x67, 0x51, 0x72,
+        0x90, 0x95, 0x31, 0xF9, 0xCD, 0x10, 0xBF, 0x11, 0xE4, 0xEC,
+        0xB7, 0x42, 0x5B, 0x02, 0x20, 0x45, 0xDB, 0x45, 0x0A, 0x24,
+        0x58, 0x8E, 0x2E, 0xE6, 0xEA, 0x0C, 0x6C, 0xBC, 0x72, 0x4F,
+        0x0A, 0x1B, 0xF3, 0x2D, 0x97, 0xE9, 0xC2, 0x19, 0xF9, 0x97,
+        0x3A, 0x60, 0xDD, 0x08, 0xD3, 0x52, 0x3E
 };
 static const int sizeof_serv_ecc_comp_der_256 = sizeof(serv_ecc_comp_der_256);
 
 /* ./certs/server-ecc-rsa.der, ECC */
 static const unsigned char serv_ecc_rsa_der_256[] =
 {
-	0x30, 0x82, 0x03, 0xE0, 0x30, 0x82, 0x02, 0xC8, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x01, 0x01, 0x30, 0x0D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05, 
-	0x00, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 
-	0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 
-	0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 
-	0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55, 
-	0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F, 
-	0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 
-	0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74, 
-	0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 
-	0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 
-	0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 
-	0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 
-	0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 
-	0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 
-	0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 
-	0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 
-	0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 
-	0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x9D, 0x31, 0x0B, 0x30, 
-	0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 
-	0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 
-	0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x1A, 0x30, 0x18, 
-	0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x11, 0x45, 0x6C, 0x6C, 
-	0x69, 0x70, 0x74, 0x69, 0x63, 0x20, 0x2D, 0x20, 0x52, 0x53, 
-	0x41, 0x73, 0x69, 0x67, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 
-	0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x45, 0x43, 0x43, 0x2D, 0x52, 
-	0x53, 0x41, 0x73, 0x69, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 
-	0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 
-	0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 
-	0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 
-	0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 
-	0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 
-	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 
-	0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 
-	0x04, 0xBB, 0x33, 0xAC, 0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 
-	0xA5, 0x04, 0xC3, 0x3C, 0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 
-	0xCE, 0x94, 0xEA, 0x2B, 0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 
-	0x16, 0xE8, 0x61, 0x02, 0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 
-	0x9A, 0x31, 0x5B, 0x97, 0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 
-	0xDA, 0x91, 0x11, 0x02, 0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 
-	0x0B, 0x80, 0x34, 0x89, 0xD8, 0xA3, 0x81, 0xFC, 0x30, 0x81, 
-	0xF9, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 
-	0x04, 0x14, 0x5D, 0x5D, 0x26, 0xEF, 0xAC, 0x7E, 0x36, 0xF9, 
-	0x9B, 0x76, 0x15, 0x2B, 0x4A, 0x25, 0x02, 0x23, 0xEF, 0xB2, 
-	0x89, 0x30, 0x30, 0x81, 0xC9, 0x06, 0x03, 0x55, 0x1D, 0x23, 
-	0x04, 0x81, 0xC1, 0x30, 0x81, 0xBE, 0x80, 0x14, 0x27, 0x8E, 
-	0x67, 0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63, 
-	0xB3, 0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5, 0xA1, 0x81, 
-	0x9A, 0xA4, 0x81, 0x97, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 
-	0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 
-	0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 
-	0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 
-	0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 
-	0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 
-	0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 
-	0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 
-	0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 
-	0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 
-	0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 
-	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 
-	0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 
-	0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 0x86, 0xFF, 
-	0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0C, 0x06, 0x03, 
-	0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 
-	0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 
-	0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00, 
-	0x0C, 0xBB, 0x67, 0xBD, 0xFC, 0xCD, 0x53, 0x6C, 0xFB, 0x4E, 
-	0x58, 0xC8, 0xEA, 0x52, 0x92, 0xEB, 0xE4, 0xC8, 0xBC, 0x57, 
-	0x0F, 0x08, 0x20, 0xC8, 0x83, 0xB0, 0xD5, 0xEA, 0x57, 0x27, 
-	0xBD, 0x68, 0x91, 0xFB, 0x99, 0x84, 0x8D, 0x15, 0x9E, 0x4F, 
-	0x8F, 0xC4, 0xCB, 0x34, 0x61, 0xC0, 0x59, 0x12, 0x9B, 0xC8, 
-	0x82, 0x17, 0x38, 0x4F, 0x9E, 0x53, 0x08, 0xA3, 0x69, 0x2E, 
-	0x2F, 0xC0, 0xB4, 0x2F, 0xA2, 0x4E, 0x10, 0x64, 0xB0, 0x07, 
-	0xA1, 0x51, 0x08, 0x1D, 0x91, 0x53, 0xA2, 0x79, 0x55, 0x20, 
-	0x41, 0x65, 0x35, 0x3E, 0x0B, 0x38, 0x01, 0x57, 0x02, 0x8C, 
-	0x25, 0xE7, 0xAB, 0x4F, 0x8B, 0x59, 0xF0, 0xED, 0x8E, 0x4A, 
-	0x15, 0x0B, 0x32, 0xFB, 0x7A, 0x8B, 0x02, 0xEA, 0x9D, 0xE1, 
-	0xAB, 0xC4, 0x07, 0xCC, 0xDA, 0x0F, 0xA3, 0x16, 0xDB, 0x8E, 
-	0x5B, 0xBC, 0x96, 0xAB, 0x10, 0xB8, 0xDE, 0x09, 0x8B, 0xF7, 
-	0xCB, 0xA7, 0x78, 0x66, 0x17, 0xE3, 0x25, 0x6E, 0x57, 0x9D, 
-	0x13, 0x61, 0x7B, 0x55, 0x1A, 0xDF, 0x8F, 0x39, 0x15, 0x4E, 
-	0x42, 0x22, 0x00, 0x85, 0xC4, 0x51, 0x0B, 0x6B, 0xA6, 0x67, 
-	0xC0, 0xFB, 0xEA, 0x22, 0x77, 0x7D, 0x48, 0x76, 0xAB, 0x39, 
-	0x20, 0x09, 0xD5, 0x52, 0x89, 0x3E, 0x6B, 0x30, 0x7B, 0x50, 
-	0x18, 0xE8, 0x62, 0x05, 0xBE, 0xBB, 0x7F, 0x16, 0x77, 0x9C, 
-	0xBB, 0x5A, 0x22, 0x96, 0x99, 0xB0, 0x96, 0x83, 0xB7, 0x43, 
-	0x31, 0x97, 0xCF, 0xFD, 0x85, 0x52, 0xD8, 0x52, 0xC8, 0x67, 
-	0x5C, 0xF8, 0x22, 0x72, 0x35, 0x93, 0x92, 0x6C, 0xEC, 0x3C, 
-	0x6A, 0xC6, 0x81, 0x20, 0xA5, 0xCD, 0x50, 0xF9, 0x21, 0x7A, 
-	0xA6, 0x7A, 0x1E, 0xE7, 0x59, 0x22, 0x5D, 0x8A, 0x93, 0x51, 
-	0x8E, 0xFB, 0x29, 0x56, 0xFB, 0xBE, 0x9B, 0x87, 0x48, 0x5F, 
-	0xA5, 0x72, 0xE7, 0x4E, 0xFE, 0x5E
+        0x30, 0x82, 0x03, 0xE0, 0x30, 0x82, 0x02, 0xC8, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x01, 0x01, 0x30, 0x0D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x05,
+        0x00, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03,
+        0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F,
+        0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65,
+        0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F, 0x06, 0x03, 0x55,
+        0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77, 0x74, 0x6F, 0x6F,
+        0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73, 0x75, 0x6C, 0x74,
+        0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55,
+        0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31,
+        0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7,
+        0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F,
+        0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34,
+        0x31, 0x33, 0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17,
+        0x0D, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32,
+        0x33, 0x31, 0x30, 0x5A, 0x30, 0x81, 0x9D, 0x31, 0x0B, 0x30,
+        0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C,
+        0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42,
+        0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x1A, 0x30, 0x18,
+        0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x11, 0x45, 0x6C, 0x6C,
+        0x69, 0x70, 0x74, 0x69, 0x63, 0x20, 0x2D, 0x20, 0x52, 0x53,
+        0x41, 0x73, 0x69, 0x67, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03,
+        0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x45, 0x43, 0x43, 0x2D, 0x52,
+        0x53, 0x41, 0x73, 0x69, 0x67, 0x31, 0x18, 0x30, 0x16, 0x06,
+        0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48,
+        0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E,
+        0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x59, 0x30, 0x13, 0x06, 0x07,
+        0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A,
+        0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03, 0x42, 0x00,
+        0x04, 0xBB, 0x33, 0xAC, 0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A,
+        0xA5, 0x04, 0xC3, 0x3C, 0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D,
+        0xCE, 0x94, 0xEA, 0x2B, 0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C,
+        0x16, 0xE8, 0x61, 0x02, 0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93,
+        0x9A, 0x31, 0x5B, 0x97, 0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18,
+        0xDA, 0x91, 0x11, 0x02, 0x34, 0x86, 0xE8, 0x20, 0x58, 0x33,
+        0x0B, 0x80, 0x34, 0x89, 0xD8, 0xA3, 0x81, 0xFC, 0x30, 0x81,
+        0xF9, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16,
+        0x04, 0x14, 0x5D, 0x5D, 0x26, 0xEF, 0xAC, 0x7E, 0x36, 0xF9,
+        0x9B, 0x76, 0x15, 0x2B, 0x4A, 0x25, 0x02, 0x23, 0xEF, 0xB2,
+        0x89, 0x30, 0x30, 0x81, 0xC9, 0x06, 0x03, 0x55, 0x1D, 0x23,
+        0x04, 0x81, 0xC1, 0x30, 0x81, 0xBE, 0x80, 0x14, 0x27, 0x8E,
+        0x67, 0x11, 0x74, 0xC3, 0x26, 0x1D, 0x3F, 0xED, 0x33, 0x63,
+        0xB3, 0xA4, 0xD8, 0x1D, 0x30, 0xE5, 0xE8, 0xD5, 0xA1, 0x81,
+        0x9A, 0xA4, 0x81, 0x97, 0x30, 0x81, 0x94, 0x31, 0x0B, 0x30,
+        0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C,
+        0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42,
+        0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x11, 0x30, 0x0F,
+        0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x08, 0x53, 0x61, 0x77,
+        0x74, 0x6F, 0x6F, 0x74, 0x68, 0x31, 0x13, 0x30, 0x11, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0A, 0x43, 0x6F, 0x6E, 0x73,
+        0x75, 0x6C, 0x74, 0x69, 0x6E, 0x67, 0x31, 0x18, 0x30, 0x16,
+        0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77,
+        0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86,
+        0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69,
+        0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73,
+        0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 0x86, 0xFF,
+        0xF5, 0x8E, 0x10, 0xDE, 0xB8, 0xFB, 0x30, 0x0C, 0x06, 0x03,
+        0x55, 0x1D, 0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 0xFF,
+        0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x01, 0x0B, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+        0x0C, 0xBB, 0x67, 0xBD, 0xFC, 0xCD, 0x53, 0x6C, 0xFB, 0x4E,
+        0x58, 0xC8, 0xEA, 0x52, 0x92, 0xEB, 0xE4, 0xC8, 0xBC, 0x57,
+        0x0F, 0x08, 0x20, 0xC8, 0x83, 0xB0, 0xD5, 0xEA, 0x57, 0x27,
+        0xBD, 0x68, 0x91, 0xFB, 0x99, 0x84, 0x8D, 0x15, 0x9E, 0x4F,
+        0x8F, 0xC4, 0xCB, 0x34, 0x61, 0xC0, 0x59, 0x12, 0x9B, 0xC8,
+        0x82, 0x17, 0x38, 0x4F, 0x9E, 0x53, 0x08, 0xA3, 0x69, 0x2E,
+        0x2F, 0xC0, 0xB4, 0x2F, 0xA2, 0x4E, 0x10, 0x64, 0xB0, 0x07,
+        0xA1, 0x51, 0x08, 0x1D, 0x91, 0x53, 0xA2, 0x79, 0x55, 0x20,
+        0x41, 0x65, 0x35, 0x3E, 0x0B, 0x38, 0x01, 0x57, 0x02, 0x8C,
+        0x25, 0xE7, 0xAB, 0x4F, 0x8B, 0x59, 0xF0, 0xED, 0x8E, 0x4A,
+        0x15, 0x0B, 0x32, 0xFB, 0x7A, 0x8B, 0x02, 0xEA, 0x9D, 0xE1,
+        0xAB, 0xC4, 0x07, 0xCC, 0xDA, 0x0F, 0xA3, 0x16, 0xDB, 0x8E,
+        0x5B, 0xBC, 0x96, 0xAB, 0x10, 0xB8, 0xDE, 0x09, 0x8B, 0xF7,
+        0xCB, 0xA7, 0x78, 0x66, 0x17, 0xE3, 0x25, 0x6E, 0x57, 0x9D,
+        0x13, 0x61, 0x7B, 0x55, 0x1A, 0xDF, 0x8F, 0x39, 0x15, 0x4E,
+        0x42, 0x22, 0x00, 0x85, 0xC4, 0x51, 0x0B, 0x6B, 0xA6, 0x67,
+        0xC0, 0xFB, 0xEA, 0x22, 0x77, 0x7D, 0x48, 0x76, 0xAB, 0x39,
+        0x20, 0x09, 0xD5, 0x52, 0x89, 0x3E, 0x6B, 0x30, 0x7B, 0x50,
+        0x18, 0xE8, 0x62, 0x05, 0xBE, 0xBB, 0x7F, 0x16, 0x77, 0x9C,
+        0xBB, 0x5A, 0x22, 0x96, 0x99, 0xB0, 0x96, 0x83, 0xB7, 0x43,
+        0x31, 0x97, 0xCF, 0xFD, 0x85, 0x52, 0xD8, 0x52, 0xC8, 0x67,
+        0x5C, 0xF8, 0x22, 0x72, 0x35, 0x93, 0x92, 0x6C, 0xEC, 0x3C,
+        0x6A, 0xC6, 0x81, 0x20, 0xA5, 0xCD, 0x50, 0xF9, 0x21, 0x7A,
+        0xA6, 0x7A, 0x1E, 0xE7, 0x59, 0x22, 0x5D, 0x8A, 0x93, 0x51,
+        0x8E, 0xFB, 0x29, 0x56, 0xFB, 0xBE, 0x9B, 0x87, 0x48, 0x5F,
+        0xA5, 0x72, 0xE7, 0x4E, 0xFE, 0x5E
 };
 static const int sizeof_serv_ecc_rsa_der_256 = sizeof(serv_ecc_rsa_der_256);
 
 /* ./certs/server-ecc.der, ECC */
 static const unsigned char serv_ecc_der_256[] =
 {
-	0x30, 0x82, 0x03, 0x50, 0x30, 0x82, 0x02, 0xF5, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x02, 0x10, 0x00, 0x30, 0x0A, 0x06, 
-	0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 
-	0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 
-	0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 
-	0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 
-	0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 
-	0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 
-	0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 
-	0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 
-	0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70, 
-	0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 
-	0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 
-	0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 
-	0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x37, 0x31, 
-	0x30, 0x32, 0x30, 0x31, 0x38, 0x31, 0x39, 0x30, 0x36, 0x5A, 
-	0x17, 0x0D, 0x32, 0x37, 0x31, 0x30, 0x31, 0x38, 0x31, 0x38, 
-	0x31, 0x39, 0x30, 0x36, 0x5A, 0x30, 0x81, 0x8F, 0x31, 0x0B, 
-	0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 
-	0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 
-	0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 
-	0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 
-	0x07, 0x45, 0x6C, 0x69, 0x70, 0x74, 0x69, 0x63, 0x31, 0x0C, 
-	0x30, 0x0A, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x03, 0x45, 
-	0x43, 0x43, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 
-	0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 
-	0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 
-	0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 
-	0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 
-	0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 
-	0x6D, 0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2A, 0x86, 0x48, 
-	0xCE, 0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 
-	0x3D, 0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0xBB, 0x33, 
-	0xAC, 0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 0xA5, 0x04, 0xC3, 
-	0x3C, 0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 0xCE, 0x94, 0xEA, 
-	0x2B, 0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 0x16, 0xE8, 0x61, 
-	0x02, 0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 0x9A, 0x31, 0x5B, 
-	0x97, 0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 0xDA, 0x91, 0x11, 
-	0x02, 0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 0x0B, 0x80, 0x34, 
-	0x89, 0xD8, 0xA3, 0x82, 0x01, 0x35, 0x30, 0x82, 0x01, 0x31, 
-	0x30, 0x09, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x02, 0x30, 
-	0x00, 0x30, 0x11, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x86, 
-	0xF8, 0x42, 0x01, 0x01, 0x04, 0x04, 0x03, 0x02, 0x06, 0x40, 
-	0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04, 
-	0x14, 0x5D, 0x5D, 0x26, 0xEF, 0xAC, 0x7E, 0x36, 0xF9, 0x9B, 
-	0x76, 0x15, 0x2B, 0x4A, 0x25, 0x02, 0x23, 0xEF, 0xB2, 0x89, 
-	0x30, 0x30, 0x81, 0xCC, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 
-	0x81, 0xC4, 0x30, 0x81, 0xC1, 0x80, 0x14, 0x56, 0x8E, 0x9A, 
-	0xC3, 0xF0, 0x42, 0xDE, 0x18, 0xB9, 0x45, 0x55, 0x6E, 0xF9, 
-	0x93, 0xCF, 0xEA, 0xC3, 0xF3, 0xA5, 0x21, 0xA1, 0x81, 0x9D, 
-	0xA4, 0x81, 0x9A, 0x30, 0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 
-	0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 
-	0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 
-	0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 
-	0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 
-	0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 
-	0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 
-	0x65, 0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 
-	0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 
-	0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 
-	0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 
-	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 
-	0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00, 
-	0x97, 0xB4, 0xBD, 0x16, 0x78, 0xF8, 0x47, 0xF2, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 0x01, 0xFF, 0x04, 0x04, 
-	0x03, 0x02, 0x03, 0xA8, 0x30, 0x13, 0x06, 0x03, 0x55, 0x1D, 
-	0x25, 0x04, 0x0C, 0x30, 0x0A, 0x06, 0x08, 0x2B, 0x06, 0x01, 
-	0x05, 0x05, 0x07, 0x03, 0x01, 0x30, 0x0A, 0x06, 0x08, 0x2A, 
-	0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x03, 0x49, 0x00, 
-	0x30, 0x46, 0x02, 0x21, 0x00, 0xBE, 0xB8, 0x58, 0xF0, 0xE4, 
-	0x15, 0x01, 0x1F, 0xDF, 0x70, 0x54, 0x73, 0x4A, 0x6C, 0x40, 
-	0x1F, 0x77, 0xA8, 0xB4, 0xEB, 0x52, 0x1E, 0xBF, 0xF5, 0x0D, 
-	0xB1, 0x33, 0xCA, 0x6A, 0xC4, 0x76, 0xB9, 0x02, 0x21, 0x00, 
-	0x97, 0x08, 0xDE, 0x2C, 0x28, 0xC1, 0x45, 0x71, 0xB6, 0x2C, 
-	0x54, 0x87, 0x98, 0x63, 0x76, 0xA8, 0x21, 0x34, 0x90, 0xA8, 
-	0xF7, 0x9E, 0x3F, 0xFC, 0x02, 0xB0, 0xE7, 0xD3, 0x09, 0x31, 
-	0x27, 0xE4
+        0x30, 0x82, 0x03, 0x50, 0x30, 0x82, 0x02, 0xF5, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x02, 0x10, 0x00, 0x30, 0x0A, 0x06,
+        0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30,
+        0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04,
+        0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06,
+        0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68,
+        0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x53, 0x65, 0x61,
+        0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53,
+        0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04,
+        0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70,
+        0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03,
+        0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86,
+        0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66,
+        0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x30, 0x1E, 0x17, 0x0D, 0x31, 0x37, 0x31,
+        0x30, 0x32, 0x30, 0x31, 0x38, 0x31, 0x39, 0x30, 0x36, 0x5A,
+        0x17, 0x0D, 0x32, 0x37, 0x31, 0x30, 0x31, 0x38, 0x31, 0x38,
+        0x31, 0x39, 0x30, 0x36, 0x5A, 0x30, 0x81, 0x8F, 0x31, 0x0B,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55,
+        0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08,
+        0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74,
+        0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C,
+        0x07, 0x45, 0x6C, 0x69, 0x70, 0x74, 0x69, 0x63, 0x31, 0x0C,
+        0x30, 0x0A, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x03, 0x45,
+        0x43, 0x43, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04,
+        0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F,
+        0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+        0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40,
+        0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F,
+        0x6D, 0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2A, 0x86, 0x48,
+        0xCE, 0x3D, 0x02, 0x01, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE,
+        0x3D, 0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0xBB, 0x33,
+        0xAC, 0x4C, 0x27, 0x50, 0x4A, 0xC6, 0x4A, 0xA5, 0x04, 0xC3,
+        0x3C, 0xDE, 0x9F, 0x36, 0xDB, 0x72, 0x2D, 0xCE, 0x94, 0xEA,
+        0x2B, 0xFA, 0xCB, 0x20, 0x09, 0x39, 0x2C, 0x16, 0xE8, 0x61,
+        0x02, 0xE9, 0xAF, 0x4D, 0xD3, 0x02, 0x93, 0x9A, 0x31, 0x5B,
+        0x97, 0x92, 0x21, 0x7F, 0xF0, 0xCF, 0x18, 0xDA, 0x91, 0x11,
+        0x02, 0x34, 0x86, 0xE8, 0x20, 0x58, 0x33, 0x0B, 0x80, 0x34,
+        0x89, 0xD8, 0xA3, 0x82, 0x01, 0x35, 0x30, 0x82, 0x01, 0x31,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x1D, 0x13, 0x04, 0x02, 0x30,
+        0x00, 0x30, 0x11, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x86,
+        0xF8, 0x42, 0x01, 0x01, 0x04, 0x04, 0x03, 0x02, 0x06, 0x40,
+        0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04,
+        0x14, 0x5D, 0x5D, 0x26, 0xEF, 0xAC, 0x7E, 0x36, 0xF9, 0x9B,
+        0x76, 0x15, 0x2B, 0x4A, 0x25, 0x02, 0x23, 0xEF, 0xB2, 0x89,
+        0x30, 0x30, 0x81, 0xCC, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04,
+        0x81, 0xC4, 0x30, 0x81, 0xC1, 0x80, 0x14, 0x56, 0x8E, 0x9A,
+        0xC3, 0xF0, 0x42, 0xDE, 0x18, 0xB9, 0x45, 0x55, 0x6E, 0xF9,
+        0x93, 0xCF, 0xEA, 0xC3, 0xF3, 0xA5, 0x21, 0xA1, 0x81, 0x9D,
+        0xA4, 0x81, 0x9A, 0x30, 0x81, 0x97, 0x31, 0x0B, 0x30, 0x09,
+        0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31,
+        0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A,
+        0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C,
+        0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77,
+        0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x14, 0x30, 0x12,
+        0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76,
+        0x65, 0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18,
+        0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77,
+        0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C,
+        0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09,
+        0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16,
+        0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x82, 0x09, 0x00,
+        0x97, 0xB4, 0xBD, 0x16, 0x78, 0xF8, 0x47, 0xF2, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 0x01, 0xFF, 0x04, 0x04,
+        0x03, 0x02, 0x03, 0xA8, 0x30, 0x13, 0x06, 0x03, 0x55, 0x1D,
+        0x25, 0x04, 0x0C, 0x30, 0x0A, 0x06, 0x08, 0x2B, 0x06, 0x01,
+        0x05, 0x05, 0x07, 0x03, 0x01, 0x30, 0x0A, 0x06, 0x08, 0x2A,
+        0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x03, 0x49, 0x00,
+        0x30, 0x46, 0x02, 0x21, 0x00, 0xBE, 0xB8, 0x58, 0xF0, 0xE4,
+        0x15, 0x01, 0x1F, 0xDF, 0x70, 0x54, 0x73, 0x4A, 0x6C, 0x40,
+        0x1F, 0x77, 0xA8, 0xB4, 0xEB, 0x52, 0x1E, 0xBF, 0xF5, 0x0D,
+        0xB1, 0x33, 0xCA, 0x6A, 0xC4, 0x76, 0xB9, 0x02, 0x21, 0x00,
+        0x97, 0x08, 0xDE, 0x2C, 0x28, 0xC1, 0x45, 0x71, 0xB6, 0x2C,
+        0x54, 0x87, 0x98, 0x63, 0x76, 0xA8, 0x21, 0x34, 0x90, 0xA8,
+        0xF7, 0x9E, 0x3F, 0xFC, 0x02, 0xB0, 0xE7, 0xD3, 0x09, 0x31,
+        0x27, 0xE4
 };
 static const int sizeof_serv_ecc_der_256 = sizeof(serv_ecc_der_256);
 
 /* ./certs/ca-ecc-key.der, ECC */
 static const unsigned char ca_ecc_key_der_256[] =
 {
-	0x30, 0x77, 0x02, 0x01, 0x01, 0x04, 0x20, 0x02, 0xE1, 0x33, 
-	0x98, 0x77, 0x97, 0xAC, 0x4A, 0x59, 0x6D, 0x28, 0x9B, 0x6E, 
-	0xA0, 0x93, 0x9B, 0x07, 0x71, 0x8B, 0x4D, 0x60, 0x63, 0x85, 
-	0x99, 0xE6, 0xBB, 0x16, 0x70, 0xE9, 0x0A, 0xF6, 0x80, 0xA0, 
-	0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 
-	0x07, 0xA1, 0x44, 0x03, 0x42, 0x00, 0x04, 0x02, 0xD3, 0xD9, 
-	0x6E, 0xD6, 0x01, 0x8E, 0x45, 0xC8, 0xB9, 0x90, 0x31, 0xE5, 
-	0xC0, 0x4C, 0xE3, 0x9E, 0xAD, 0x29, 0x38, 0x98, 0xBA, 0x10, 
-	0xD6, 0xE9, 0x09, 0x2A, 0x80, 0xA9, 0x2E, 0x17, 0x2A, 0xB9, 
-	0x8A, 0xBF, 0x33, 0x83, 0x46, 0xE3, 0x95, 0x0B, 0xE4, 0x77, 
-	0x40, 0xB5, 0x3B, 0x43, 0x45, 0x33, 0x0F, 0x61, 0x53, 0x7C, 
-	0x37, 0x44, 0xC1, 0xCB, 0xFC, 0x80, 0xCA, 0xE8, 0x43, 0xEA, 
-	0xA7
+        0x30, 0x77, 0x02, 0x01, 0x01, 0x04, 0x20, 0x02, 0xE1, 0x33,
+        0x98, 0x77, 0x97, 0xAC, 0x4A, 0x59, 0x6D, 0x28, 0x9B, 0x6E,
+        0xA0, 0x93, 0x9B, 0x07, 0x71, 0x8B, 0x4D, 0x60, 0x63, 0x85,
+        0x99, 0xE6, 0xBB, 0x16, 0x70, 0xE9, 0x0A, 0xF6, 0x80, 0xA0,
+        0x0A, 0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01,
+        0x07, 0xA1, 0x44, 0x03, 0x42, 0x00, 0x04, 0x02, 0xD3, 0xD9,
+        0x6E, 0xD6, 0x01, 0x8E, 0x45, 0xC8, 0xB9, 0x90, 0x31, 0xE5,
+        0xC0, 0x4C, 0xE3, 0x9E, 0xAD, 0x29, 0x38, 0x98, 0xBA, 0x10,
+        0xD6, 0xE9, 0x09, 0x2A, 0x80, 0xA9, 0x2E, 0x17, 0x2A, 0xB9,
+        0x8A, 0xBF, 0x33, 0x83, 0x46, 0xE3, 0x95, 0x0B, 0xE4, 0x77,
+        0x40, 0xB5, 0x3B, 0x43, 0x45, 0x33, 0x0F, 0x61, 0x53, 0x7C,
+        0x37, 0x44, 0xC1, 0xCB, 0xFC, 0x80, 0xCA, 0xE8, 0x43, 0xEA,
+        0xA7
 };
 static const int sizeof_ca_ecc_key_der_256 = sizeof(ca_ecc_key_der_256);
 
 /* ./certs/ca-ecc-cert.der, ECC */
 static const unsigned char ca_ecc_cert_der_256[] =
 {
-	0x30, 0x82, 0x02, 0x8B, 0x30, 0x82, 0x02, 0x30, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xFD, 0x0E, 0x29, 0x21, 
-	0x66, 0xCB, 0x48, 0xA3, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86, 
-	0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 0x81, 0x97, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 
-	0x74, 0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 
-	0x65, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 
-	0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 
-	0x44, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 
-	0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 
-	0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 
-	0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 
-	0x30, 0x5A, 0x30, 0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 0x06, 
-	0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x13, 
-	0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 0x57, 
-	0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 0x31, 
-	0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 
-	0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 
-	0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 0x06, 
-	0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 0x65, 
-	0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 0x30, 
-	0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 
-	0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 
-	0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 
-	0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x59, 0x30, 0x13, 
-	0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 
-	0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03, 
-	0x42, 0x00, 0x04, 0x02, 0xD3, 0xD9, 0x6E, 0xD6, 0x01, 0x8E, 
-	0x45, 0xC8, 0xB9, 0x90, 0x31, 0xE5, 0xC0, 0x4C, 0xE3, 0x9E, 
-	0xAD, 0x29, 0x38, 0x98, 0xBA, 0x10, 0xD6, 0xE9, 0x09, 0x2A, 
-	0x80, 0xA9, 0x2E, 0x17, 0x2A, 0xB9, 0x8A, 0xBF, 0x33, 0x83, 
-	0x46, 0xE3, 0x95, 0x0B, 0xE4, 0x77, 0x40, 0xB5, 0x3B, 0x43, 
-	0x45, 0x33, 0x0F, 0x61, 0x53, 0x7C, 0x37, 0x44, 0xC1, 0xCB, 
-	0xFC, 0x80, 0xCA, 0xE8, 0x43, 0xEA, 0xA7, 0xA3, 0x63, 0x30, 
-	0x61, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 
-	0x04, 0x14, 0x56, 0x8E, 0x9A, 0xC3, 0xF0, 0x42, 0xDE, 0x18, 
-	0xB9, 0x45, 0x55, 0x6E, 0xF9, 0x93, 0xCF, 0xEA, 0xC3, 0xF3, 
-	0xA5, 0x21, 0x30, 0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 
-	0x18, 0x30, 0x16, 0x80, 0x14, 0x56, 0x8E, 0x9A, 0xC3, 0xF0, 
-	0x42, 0xDE, 0x18, 0xB9, 0x45, 0x55, 0x6E, 0xF9, 0x93, 0xCF, 
-	0xEA, 0xC3, 0xF3, 0xA5, 0x21, 0x30, 0x0F, 0x06, 0x03, 0x55, 
-	0x1D, 0x13, 0x01, 0x01, 0xFF, 0x04, 0x05, 0x30, 0x03, 0x01, 
-	0x01, 0xFF, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 
-	0x01, 0xFF, 0x04, 0x04, 0x03, 0x02, 0x01, 0x86, 0x30, 0x0A, 
-	0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 
-	0x03, 0x49, 0x00, 0x30, 0x46, 0x02, 0x21, 0x00, 0xF0, 0x7B, 
-	0xCC, 0x24, 0x73, 0x19, 0x3F, 0x61, 0x68, 0xED, 0xC8, 0x0A, 
-	0x54, 0x4A, 0xB8, 0xAC, 0x79, 0xEF, 0x10, 0x32, 0x91, 0x52, 
-	0x2C, 0x3E, 0xBF, 0x50, 0xAA, 0x5F, 0x18, 0xC1, 0x97, 0xF5, 
-	0x02, 0x21, 0x00, 0xD9, 0x4B, 0x63, 0x67, 0x6F, 0x9B, 0x29, 
-	0xA9, 0xD7, 0x6B, 0x63, 0x9B, 0x98, 0x9F, 0x32, 0x82, 0x36, 
-	0xDA, 0xF0, 0xA9, 0xF7, 0x51, 0xB4, 0x97, 0xAA, 0xFA, 0xFA, 
-	0xDD, 0xEF, 0xEF, 0x4A, 0xAE
+        0x30, 0x82, 0x02, 0x8B, 0x30, 0x82, 0x02, 0x30, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xFD, 0x0E, 0x29, 0x21,
+        0x66, 0xCB, 0x48, 0xA3, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86,
+        0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x30, 0x81, 0x97, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67,
+        0x74, 0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C,
+        0x65, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31,
+        0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B,
+        0x44, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E,
+        0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33,
+        0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32,
+        0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31,
+        0x30, 0x5A, 0x30, 0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x13,
+        0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 0x57,
+        0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07,
+        0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F,
+        0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 0x65,
+        0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x59, 0x30, 0x13,
+        0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06,
+        0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x03,
+        0x42, 0x00, 0x04, 0x02, 0xD3, 0xD9, 0x6E, 0xD6, 0x01, 0x8E,
+        0x45, 0xC8, 0xB9, 0x90, 0x31, 0xE5, 0xC0, 0x4C, 0xE3, 0x9E,
+        0xAD, 0x29, 0x38, 0x98, 0xBA, 0x10, 0xD6, 0xE9, 0x09, 0x2A,
+        0x80, 0xA9, 0x2E, 0x17, 0x2A, 0xB9, 0x8A, 0xBF, 0x33, 0x83,
+        0x46, 0xE3, 0x95, 0x0B, 0xE4, 0x77, 0x40, 0xB5, 0x3B, 0x43,
+        0x45, 0x33, 0x0F, 0x61, 0x53, 0x7C, 0x37, 0x44, 0xC1, 0xCB,
+        0xFC, 0x80, 0xCA, 0xE8, 0x43, 0xEA, 0xA7, 0xA3, 0x63, 0x30,
+        0x61, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16,
+        0x04, 0x14, 0x56, 0x8E, 0x9A, 0xC3, 0xF0, 0x42, 0xDE, 0x18,
+        0xB9, 0x45, 0x55, 0x6E, 0xF9, 0x93, 0xCF, 0xEA, 0xC3, 0xF3,
+        0xA5, 0x21, 0x30, 0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04,
+        0x18, 0x30, 0x16, 0x80, 0x14, 0x56, 0x8E, 0x9A, 0xC3, 0xF0,
+        0x42, 0xDE, 0x18, 0xB9, 0x45, 0x55, 0x6E, 0xF9, 0x93, 0xCF,
+        0xEA, 0xC3, 0xF3, 0xA5, 0x21, 0x30, 0x0F, 0x06, 0x03, 0x55,
+        0x1D, 0x13, 0x01, 0x01, 0xFF, 0x04, 0x05, 0x30, 0x03, 0x01,
+        0x01, 0xFF, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01,
+        0x01, 0xFF, 0x04, 0x04, 0x03, 0x02, 0x01, 0x86, 0x30, 0x0A,
+        0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02,
+        0x03, 0x49, 0x00, 0x30, 0x46, 0x02, 0x21, 0x00, 0xF0, 0x7B,
+        0xCC, 0x24, 0x73, 0x19, 0x3F, 0x61, 0x68, 0xED, 0xC8, 0x0A,
+        0x54, 0x4A, 0xB8, 0xAC, 0x79, 0xEF, 0x10, 0x32, 0x91, 0x52,
+        0x2C, 0x3E, 0xBF, 0x50, 0xAA, 0x5F, 0x18, 0xC1, 0x97, 0xF5,
+        0x02, 0x21, 0x00, 0xD9, 0x4B, 0x63, 0x67, 0x6F, 0x9B, 0x29,
+        0xA9, 0xD7, 0x6B, 0x63, 0x9B, 0x98, 0x9F, 0x32, 0x82, 0x36,
+        0xDA, 0xF0, 0xA9, 0xF7, 0x51, 0xB4, 0x97, 0xAA, 0xFA, 0xFA,
+        0xDD, 0xEF, 0xEF, 0x4A, 0xAE
 };
 static const int sizeof_ca_ecc_cert_der_256 = sizeof(ca_ecc_cert_der_256);
 
 /* ./certs/ca-ecc384-key.der, ECC */
 static const unsigned char ca_ecc_key_der_384[] =
 {
-	0x30, 0x81, 0xA4, 0x02, 0x01, 0x01, 0x04, 0x30, 0x7B, 0x16, 
-	0xE3, 0xD6, 0xD2, 0x81, 0x94, 0x6C, 0x8A, 0xDD, 0xA8, 0x78, 
-	0xEE, 0xC7, 0x7E, 0xB3, 0xC5, 0xD1, 0xDB, 0x2E, 0xF3, 0xED, 
-	0x0E, 0x48, 0x85, 0xB1, 0xF2, 0xE1, 0x7A, 0x39, 0x56, 0xC0, 
-	0xF1, 0x62, 0x12, 0x0F, 0x35, 0xB7, 0x39, 0xBC, 0x9C, 0x25, 
-	0xC0, 0x76, 0xEB, 0xFE, 0x55, 0x70, 0xA0, 0x07, 0x06, 0x05, 
-	0x2B, 0x81, 0x04, 0x00, 0x22, 0xA1, 0x64, 0x03, 0x62, 0x00, 
-	0x04, 0xEE, 0x82, 0xD4, 0x39, 0x9A, 0xB1, 0x27, 0x82, 0xF4, 
-	0xD7, 0xEA, 0xC6, 0xBC, 0x03, 0x1D, 0x4D, 0x83, 0x61, 0xF4, 
-	0x03, 0xAE, 0x7E, 0xBD, 0xD8, 0x5A, 0xA5, 0xB9, 0xF0, 0x8E, 
-	0xA2, 0xA5, 0xDA, 0xCE, 0x87, 0x3B, 0x5A, 0xAB, 0x44, 0x16, 
-	0x9C, 0xF5, 0x9F, 0x62, 0xDD, 0xF6, 0x20, 0xCD, 0x9C, 0x76, 
-	0x3C, 0x40, 0xB1, 0x3F, 0x97, 0x17, 0xDF, 0x59, 0xF6, 0xCD, 
-	0xDE, 0xCD, 0x46, 0x35, 0xC0, 0xED, 0x5E, 0x2E, 0x48, 0xB6, 
-	0x66, 0x91, 0x71, 0x74, 0xB7, 0x0C, 0x3F, 0xB9, 0x9A, 0xB7, 
-	0x83, 0xBD, 0x93, 0x3F, 0x5F, 0x50, 0x2D, 0x70, 0x3F, 0xDE, 
-	0x35, 0x25, 0xE1, 0x90, 0x3B, 0x86, 0xE0
+        0x30, 0x81, 0xA4, 0x02, 0x01, 0x01, 0x04, 0x30, 0x7B, 0x16,
+        0xE3, 0xD6, 0xD2, 0x81, 0x94, 0x6C, 0x8A, 0xDD, 0xA8, 0x78,
+        0xEE, 0xC7, 0x7E, 0xB3, 0xC5, 0xD1, 0xDB, 0x2E, 0xF3, 0xED,
+        0x0E, 0x48, 0x85, 0xB1, 0xF2, 0xE1, 0x7A, 0x39, 0x56, 0xC0,
+        0xF1, 0x62, 0x12, 0x0F, 0x35, 0xB7, 0x39, 0xBC, 0x9C, 0x25,
+        0xC0, 0x76, 0xEB, 0xFE, 0x55, 0x70, 0xA0, 0x07, 0x06, 0x05,
+        0x2B, 0x81, 0x04, 0x00, 0x22, 0xA1, 0x64, 0x03, 0x62, 0x00,
+        0x04, 0xEE, 0x82, 0xD4, 0x39, 0x9A, 0xB1, 0x27, 0x82, 0xF4,
+        0xD7, 0xEA, 0xC6, 0xBC, 0x03, 0x1D, 0x4D, 0x83, 0x61, 0xF4,
+        0x03, 0xAE, 0x7E, 0xBD, 0xD8, 0x5A, 0xA5, 0xB9, 0xF0, 0x8E,
+        0xA2, 0xA5, 0xDA, 0xCE, 0x87, 0x3B, 0x5A, 0xAB, 0x44, 0x16,
+        0x9C, 0xF5, 0x9F, 0x62, 0xDD, 0xF6, 0x20, 0xCD, 0x9C, 0x76,
+        0x3C, 0x40, 0xB1, 0x3F, 0x97, 0x17, 0xDF, 0x59, 0xF6, 0xCD,
+        0xDE, 0xCD, 0x46, 0x35, 0xC0, 0xED, 0x5E, 0x2E, 0x48, 0xB6,
+        0x66, 0x91, 0x71, 0x74, 0xB7, 0x0C, 0x3F, 0xB9, 0x9A, 0xB7,
+        0x83, 0xBD, 0x93, 0x3F, 0x5F, 0x50, 0x2D, 0x70, 0x3F, 0xDE,
+        0x35, 0x25, 0xE1, 0x90, 0x3B, 0x86, 0xE0
 };
 static const int sizeof_ca_ecc_key_der_384 = sizeof(ca_ecc_key_der_384);
 
 /* ./certs/ca-ecc384-cert.der, ECC */
 static const unsigned char ca_ecc_cert_der_384[] =
 {
-	0x30, 0x82, 0x02, 0xC7, 0x30, 0x82, 0x02, 0x4D, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xFC, 0x39, 0x04, 0xA4, 
-	0x0E, 0xA5, 0x6C, 0x87, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86, 
-	0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03, 0x30, 0x81, 0x97, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 
-	0x74, 0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 
-	0x65, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 
-	0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 
-	0x44, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 
-	0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 
-	0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32, 
-	0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31, 
-	0x30, 0x5A, 0x30, 0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 0x06, 
-	0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x13, 
-	0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 0x57, 
-	0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 0x31, 
-	0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 
-	0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 0x30, 
-	0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 
-	0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 0x06, 
-	0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 0x65, 
-	0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 0x30, 
-	0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 
-	0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 
-	0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 
-	0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x76, 0x30, 0x10, 
-	0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06, 
-	0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x03, 0x62, 0x00, 0x04, 
-	0xEE, 0x82, 0xD4, 0x39, 0x9A, 0xB1, 0x27, 0x82, 0xF4, 0xD7, 
-	0xEA, 0xC6, 0xBC, 0x03, 0x1D, 0x4D, 0x83, 0x61, 0xF4, 0x03, 
-	0xAE, 0x7E, 0xBD, 0xD8, 0x5A, 0xA5, 0xB9, 0xF0, 0x8E, 0xA2, 
-	0xA5, 0xDA, 0xCE, 0x87, 0x3B, 0x5A, 0xAB, 0x44, 0x16, 0x9C, 
-	0xF5, 0x9F, 0x62, 0xDD, 0xF6, 0x20, 0xCD, 0x9C, 0x76, 0x3C, 
-	0x40, 0xB1, 0x3F, 0x97, 0x17, 0xDF, 0x59, 0xF6, 0xCD, 0xDE, 
-	0xCD, 0x46, 0x35, 0xC0, 0xED, 0x5E, 0x2E, 0x48, 0xB6, 0x66, 
-	0x91, 0x71, 0x74, 0xB7, 0x0C, 0x3F, 0xB9, 0x9A, 0xB7, 0x83, 
-	0xBD, 0x93, 0x3F, 0x5F, 0x50, 0x2D, 0x70, 0x3F, 0xDE, 0x35, 
-	0x25, 0xE1, 0x90, 0x3B, 0x86, 0xE0, 0xA3, 0x63, 0x30, 0x61, 
-	0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04, 
-	0x14, 0xAB, 0xE0, 0xC3, 0x26, 0x4C, 0x18, 0xD4, 0x72, 0xBB, 
-	0xD2, 0x84, 0x8C, 0x9C, 0x0A, 0x05, 0x92, 0x80, 0x12, 0x53, 
-	0x52, 0x30, 0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 0x18, 
-	0x30, 0x16, 0x80, 0x14, 0xAB, 0xE0, 0xC3, 0x26, 0x4C, 0x18, 
-	0xD4, 0x72, 0xBB, 0xD2, 0x84, 0x8C, 0x9C, 0x0A, 0x05, 0x92, 
-	0x80, 0x12, 0x53, 0x52, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x1D, 
-	0x13, 0x01, 0x01, 0xFF, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 
-	0xFF, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 0x01, 
-	0xFF, 0x04, 0x04, 0x03, 0x02, 0x01, 0x86, 0x30, 0x0A, 0x06, 
-	0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03, 0x03, 
-	0x68, 0x00, 0x30, 0x65, 0x02, 0x30, 0x0D, 0x0A, 0x62, 0xFB, 
-	0xE6, 0x3A, 0xFE, 0x71, 0xD8, 0x2B, 0x44, 0xE5, 0x97, 0x34, 
-	0x04, 0xA9, 0x8C, 0x0A, 0x99, 0x88, 0xA0, 0xBD, 0x1F, 0xB0, 
-	0xDF, 0x94, 0x59, 0x27, 0xBB, 0x2B, 0xC6, 0x2A, 0xBE, 0xA4, 
-	0x69, 0x1B, 0xCF, 0x97, 0x78, 0x2A, 0x28, 0x96, 0xEE, 0xBA, 
-	0xD4, 0x87, 0x45, 0xFD, 0x02, 0x31, 0x00, 0xC0, 0x73, 0x19, 
-	0x66, 0x76, 0x5E, 0x9F, 0xA3, 0x65, 0x85, 0x41, 0xEF, 0xB7, 
-	0x7B, 0x3D, 0x63, 0x6D, 0x98, 0x71, 0x99, 0x6F, 0x9C, 0xDB, 
-	0xA8, 0x5E, 0x53, 0x6E, 0xA0, 0x68, 0x11, 0x65, 0xBC, 0x78, 
-	0x74, 0x28, 0x69, 0xC7, 0x64, 0x9D, 0x88, 0xF2, 0xD8, 0xC2, 
-	0x3D, 0x29, 0x03, 0x83, 0x23
+        0x30, 0x82, 0x02, 0xC7, 0x30, 0x82, 0x02, 0x4D, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x09, 0x00, 0xFC, 0x39, 0x04, 0xA4,
+        0x0E, 0xA5, 0x6C, 0x87, 0x30, 0x0A, 0x06, 0x08, 0x2A, 0x86,
+        0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03, 0x30, 0x81, 0x97, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x0A, 0x57, 0x61, 0x73, 0x68, 0x69, 0x6E, 0x67,
+        0x74, 0x6F, 0x6E, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x07, 0x0C, 0x07, 0x53, 0x65, 0x61, 0x74, 0x74, 0x6C,
+        0x65, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31,
+        0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B,
+        0x44, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E,
+        0x74, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x1E, 0x17, 0x0D, 0x31, 0x38, 0x30, 0x34, 0x31, 0x33,
+        0x31, 0x35, 0x32, 0x33, 0x31, 0x30, 0x5A, 0x17, 0x0D, 0x32,
+        0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 0x33, 0x31,
+        0x30, 0x5A, 0x30, 0x81, 0x97, 0x31, 0x0B, 0x30, 0x09, 0x06,
+        0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 0x31, 0x13,
+        0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 0x0A, 0x57,
+        0x61, 0x73, 0x68, 0x69, 0x6E, 0x67, 0x74, 0x6F, 0x6E, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07,
+        0x53, 0x65, 0x61, 0x74, 0x74, 0x6C, 0x65, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F,
+        0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x14, 0x30, 0x12, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x0B, 0x44, 0x65, 0x76, 0x65,
+        0x6C, 0x6F, 0x70, 0x6D, 0x65, 0x6E, 0x74, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x76, 0x30, 0x10,
+        0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x06,
+        0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x03, 0x62, 0x00, 0x04,
+        0xEE, 0x82, 0xD4, 0x39, 0x9A, 0xB1, 0x27, 0x82, 0xF4, 0xD7,
+        0xEA, 0xC6, 0xBC, 0x03, 0x1D, 0x4D, 0x83, 0x61, 0xF4, 0x03,
+        0xAE, 0x7E, 0xBD, 0xD8, 0x5A, 0xA5, 0xB9, 0xF0, 0x8E, 0xA2,
+        0xA5, 0xDA, 0xCE, 0x87, 0x3B, 0x5A, 0xAB, 0x44, 0x16, 0x9C,
+        0xF5, 0x9F, 0x62, 0xDD, 0xF6, 0x20, 0xCD, 0x9C, 0x76, 0x3C,
+        0x40, 0xB1, 0x3F, 0x97, 0x17, 0xDF, 0x59, 0xF6, 0xCD, 0xDE,
+        0xCD, 0x46, 0x35, 0xC0, 0xED, 0x5E, 0x2E, 0x48, 0xB6, 0x66,
+        0x91, 0x71, 0x74, 0xB7, 0x0C, 0x3F, 0xB9, 0x9A, 0xB7, 0x83,
+        0xBD, 0x93, 0x3F, 0x5F, 0x50, 0x2D, 0x70, 0x3F, 0xDE, 0x35,
+        0x25, 0xE1, 0x90, 0x3B, 0x86, 0xE0, 0xA3, 0x63, 0x30, 0x61,
+        0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04,
+        0x14, 0xAB, 0xE0, 0xC3, 0x26, 0x4C, 0x18, 0xD4, 0x72, 0xBB,
+        0xD2, 0x84, 0x8C, 0x9C, 0x0A, 0x05, 0x92, 0x80, 0x12, 0x53,
+        0x52, 0x30, 0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 0x18,
+        0x30, 0x16, 0x80, 0x14, 0xAB, 0xE0, 0xC3, 0x26, 0x4C, 0x18,
+        0xD4, 0x72, 0xBB, 0xD2, 0x84, 0x8C, 0x9C, 0x0A, 0x05, 0x92,
+        0x80, 0x12, 0x53, 0x52, 0x30, 0x0F, 0x06, 0x03, 0x55, 0x1D,
+        0x13, 0x01, 0x01, 0xFF, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01,
+        0xFF, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 0x01,
+        0xFF, 0x04, 0x04, 0x03, 0x02, 0x01, 0x86, 0x30, 0x0A, 0x06,
+        0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03, 0x03,
+        0x68, 0x00, 0x30, 0x65, 0x02, 0x30, 0x0D, 0x0A, 0x62, 0xFB,
+        0xE6, 0x3A, 0xFE, 0x71, 0xD8, 0x2B, 0x44, 0xE5, 0x97, 0x34,
+        0x04, 0xA9, 0x8C, 0x0A, 0x99, 0x88, 0xA0, 0xBD, 0x1F, 0xB0,
+        0xDF, 0x94, 0x59, 0x27, 0xBB, 0x2B, 0xC6, 0x2A, 0xBE, 0xA4,
+        0x69, 0x1B, 0xCF, 0x97, 0x78, 0x2A, 0x28, 0x96, 0xEE, 0xBA,
+        0xD4, 0x87, 0x45, 0xFD, 0x02, 0x31, 0x00, 0xC0, 0x73, 0x19,
+        0x66, 0x76, 0x5E, 0x9F, 0xA3, 0x65, 0x85, 0x41, 0xEF, 0xB7,
+        0x7B, 0x3D, 0x63, 0x6D, 0x98, 0x71, 0x99, 0x6F, 0x9C, 0xDB,
+        0xA8, 0x5E, 0x53, 0x6E, 0xA0, 0x68, 0x11, 0x65, 0xBC, 0x78,
+        0x74, 0x28, 0x69, 0xC7, 0x64, 0x9D, 0x88, 0xF2, 0xD8, 0xC2,
+        0x3D, 0x29, 0x03, 0x83, 0x23
 };
 static const int sizeof_ca_ecc_cert_der_384 = sizeof(ca_ecc_cert_der_384);
 
@@ -2751,137 +3914,234 @@
 /* ./certs/ed25519/server-ed25519.der, ED25519 */
 static const unsigned char server_ed25519_cert[] =
 {
-	0x30, 0x82, 0x02, 0x52, 0x30, 0x82, 0x02, 0x04, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x10, 0x00, 0xCD, 0xF2, 0x2F, 0xBE, 
-	0xDC, 0x07, 0xFA, 0xBB, 0x65, 0x03, 0xE2, 0xFF, 0xEA, 0x6A, 
-	0x99, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 0x70, 0x30, 0x81, 
-	0x9D, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 
-	0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 
-	0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 
-	0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 
-	0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C, 
-	0x02, 0x43, 0x41, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 
-	0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0B, 
-	0x0C, 0x07, 0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 0x39, 0x31, 
-	0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 
-	0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 
-	0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 
-	0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 
-	0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 
-	0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x22, 
-	0x18, 0x0F, 0x32, 0x30, 0x31, 0x38, 0x30, 0x34, 0x31, 0x32, 
-	0x31, 0x36, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x18, 0x0F, 0x32, 
-	0x30, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32, 
-	0x32, 0x31, 0x37, 0x5A, 0x30, 0x81, 0x9F, 0x31, 0x0B, 0x30, 
-	0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C, 
-	0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10, 
-	0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42, 
-	0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x0D, 0x30, 0x0B, 
-	0x06, 0x03, 0x55, 0x04, 0x04, 0x0C, 0x04, 0x4C, 0x65, 0x61, 
-	0x66, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 
-	0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x07, 
-	0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 0x39, 0x31, 0x18, 0x30, 
-	0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 
-	0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 
-	0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 
-	0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x2A, 0x30, 0x05, 
-	0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x21, 0x00, 0x61, 0x5D, 
-	0xEC, 0xB7, 0x45, 0x93, 0xC9, 0x84, 0x7B, 0x68, 0x21, 0x4A, 
-	0x4D, 0xF4, 0x04, 0x8B, 0xBD, 0xCD, 0x6C, 0x5D, 0x3D, 0xB7, 
-	0x62, 0x2C, 0x2D, 0x25, 0xC3, 0x22, 0x49, 0xC8, 0x86, 0xF2, 
-	0xA3, 0x52, 0x30, 0x50, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 
-	0x0E, 0x04, 0x16, 0x04, 0x14, 0x33, 0xC8, 0x28, 0x63, 0x8C, 
-	0xF4, 0x57, 0xEE, 0x1E, 0xB0, 0xC7, 0x12, 0x12, 0x76, 0x8A, 
-	0x80, 0x30, 0x3A, 0xCB, 0x10, 0x30, 0x1F, 0x06, 0x03, 0x55, 
-	0x1D, 0x23, 0x04, 0x18, 0x30, 0x16, 0x80, 0x14, 0x92, 0x3F, 
-	0x96, 0x72, 0x02, 0xFA, 0x61, 0x1C, 0x21, 0x6D, 0x88, 0xDD, 
-	0xEB, 0xDD, 0x3C, 0x9B, 0x17, 0xC4, 0x9F, 0xB7, 0x30, 0x0E, 
-	0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 0x01, 0xFF, 0x04, 0x04, 
-	0x03, 0x02, 0x06, 0xC0, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 
-	0x70, 0x03, 0x41, 0x00, 0x15, 0x88, 0x86, 0xFC, 0x66, 0xD1, 
-	0xE0, 0xF6, 0xCF, 0xC9, 0x09, 0x46, 0xD0, 0x50, 0xE2, 0x01, 
-	0x5D, 0xF7, 0xCF, 0x57, 0xB8, 0xBA, 0x90, 0x84, 0xCB, 0xF1, 
-	0x24, 0x4B, 0xEF, 0xA5, 0x95, 0x7D, 0x69, 0x92, 0x88, 0xA8, 
-	0x89, 0x63, 0xCC, 0x90, 0x40, 0xC2, 0x41, 0x3A, 0x40, 0x76, 
-	0xB1, 0x2D, 0xA8, 0xA8, 0x97, 0xC9, 0x73, 0xC7, 0x82, 0x30, 
-	0x24, 0x61, 0xB0, 0xAA, 0xCA, 0xAA, 0x68, 0x00
+        0x30, 0x82, 0x02, 0x52, 0x30, 0x82, 0x02, 0x04, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x10, 0x00, 0xCD, 0xF2, 0x2F, 0xBE,
+        0xDC, 0x07, 0xFA, 0xBB, 0x65, 0x03, 0xE2, 0xFF, 0xEA, 0x6A,
+        0x99, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 0x70, 0x30, 0x81,
+        0x9D, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06,
+        0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61,
+        0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E,
+        0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C,
+        0x02, 0x43, 0x41, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53,
+        0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0B,
+        0x0C, 0x07, 0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 0x39, 0x31,
+        0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F,
+        0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73,
+        0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06,
+        0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01,
+        0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C,
+        0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x22,
+        0x18, 0x0F, 0x32, 0x30, 0x31, 0x38, 0x30, 0x34, 0x31, 0x32,
+        0x31, 0x36, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x18, 0x0F, 0x32,
+        0x30, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35, 0x32,
+        0x32, 0x31, 0x37, 0x5A, 0x30, 0x81, 0x9F, 0x31, 0x0B, 0x30,
+        0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 0x55, 0x53,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x08, 0x0C,
+        0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 0x31, 0x10,
+        0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 0x07, 0x42,
+        0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x0D, 0x30, 0x0B,
+        0x06, 0x03, 0x55, 0x04, 0x04, 0x0C, 0x04, 0x4C, 0x65, 0x61,
+        0x66, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x07,
+        0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 0x39, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x2A, 0x30, 0x05,
+        0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x21, 0x00, 0x61, 0x5D,
+        0xEC, 0xB7, 0x45, 0x93, 0xC9, 0x84, 0x7B, 0x68, 0x21, 0x4A,
+        0x4D, 0xF4, 0x04, 0x8B, 0xBD, 0xCD, 0x6C, 0x5D, 0x3D, 0xB7,
+        0x62, 0x2C, 0x2D, 0x25, 0xC3, 0x22, 0x49, 0xC8, 0x86, 0xF2,
+        0xA3, 0x52, 0x30, 0x50, 0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D,
+        0x0E, 0x04, 0x16, 0x04, 0x14, 0x33, 0xC8, 0x28, 0x63, 0x8C,
+        0xF4, 0x57, 0xEE, 0x1E, 0xB0, 0xC7, 0x12, 0x12, 0x76, 0x8A,
+        0x80, 0x30, 0x3A, 0xCB, 0x10, 0x30, 0x1F, 0x06, 0x03, 0x55,
+        0x1D, 0x23, 0x04, 0x18, 0x30, 0x16, 0x80, 0x14, 0x92, 0x3F,
+        0x96, 0x72, 0x02, 0xFA, 0x61, 0x1C, 0x21, 0x6D, 0x88, 0xDD,
+        0xEB, 0xDD, 0x3C, 0x9B, 0x17, 0xC4, 0x9F, 0xB7, 0x30, 0x0E,
+        0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 0x01, 0xFF, 0x04, 0x04,
+        0x03, 0x02, 0x06, 0xC0, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65,
+        0x70, 0x03, 0x41, 0x00, 0x15, 0x88, 0x86, 0xFC, 0x66, 0xD1,
+        0xE0, 0xF6, 0xCF, 0xC9, 0x09, 0x46, 0xD0, 0x50, 0xE2, 0x01,
+        0x5D, 0xF7, 0xCF, 0x57, 0xB8, 0xBA, 0x90, 0x84, 0xCB, 0xF1,
+        0x24, 0x4B, 0xEF, 0xA5, 0x95, 0x7D, 0x69, 0x92, 0x88, 0xA8,
+        0x89, 0x63, 0xCC, 0x90, 0x40, 0xC2, 0x41, 0x3A, 0x40, 0x76,
+        0xB1, 0x2D, 0xA8, 0xA8, 0x97, 0xC9, 0x73, 0xC7, 0x82, 0x30,
+        0x24, 0x61, 0xB0, 0xAA, 0xCA, 0xAA, 0x68, 0x00
 };
 static const int sizeof_server_ed25519_cert = sizeof(server_ed25519_cert);
 
+/* ./certs/ed25519/server-ed25519-key.der, ED25519 */
+static const unsigned char server_ed25519_key[] =
+{
+        0x30, 0x52, 0x02, 0x01, 0x00, 0x30, 0x05, 0x06, 0x03, 0x2B,
+        0x65, 0x70, 0x04, 0x22, 0x04, 0x20, 0x02, 0x2F, 0xC5, 0xFF,
+        0xBA, 0x8E, 0xD0, 0xD2, 0xBF, 0x03, 0x8E, 0x76, 0x8F, 0xC8,
+        0x86, 0x80, 0x71, 0x87, 0x97, 0x31, 0xE2, 0x40, 0xAC, 0xDF,
+        0xBB, 0x90, 0x15, 0x52, 0x6E, 0x24, 0xA1, 0x39, 0xA1, 0x22,
+        0x04, 0x20, 0x61, 0x5D, 0xEC, 0xB7, 0x45, 0x93, 0xC9, 0x84,
+        0x7B, 0x68, 0x21, 0x4A, 0x4D, 0xF4, 0x04, 0x8B, 0xBD, 0xCD,
+        0x6C, 0x5D, 0x3D, 0xB7, 0x62, 0x2C, 0x2D, 0x25, 0xC3, 0x22,
+        0x49, 0xC8, 0x86, 0xF2
+};
+static const int sizeof_server_ed25519_key = sizeof(server_ed25519_key);
+
 /* ./certs/ed25519/ca-ed25519.der, ED25519 */
 static const unsigned char ca_ed25519_cert[] =
 {
-	0x30, 0x82, 0x02, 0x60, 0x30, 0x82, 0x02, 0x12, 0xA0, 0x03, 
-	0x02, 0x01, 0x02, 0x02, 0x10, 0x00, 0x80, 0xBA, 0x68, 0x77, 
-	0xEF, 0xA5, 0xE5, 0x42, 0x7D, 0xC6, 0x73, 0x2C, 0x54, 0x85, 
-	0xB8, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 0x70, 0x30, 0x81, 
-	0x9F, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 
-	0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 
-	0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 
-	0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 
-	0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C, 
-	0x04, 0x52, 0x6F, 0x6F, 0x74, 0x31, 0x10, 0x30, 0x0E, 0x06, 
-	0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 
-	0x53, 0x53, 0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 
-	0x04, 0x0B, 0x0C, 0x07, 0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 
-	0x39, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 
-	0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 
-	0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 
-	0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 
-	0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 
-	0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 
-	0x30, 0x22, 0x18, 0x0F, 0x32, 0x30, 0x31, 0x38, 0x30, 0x34, 
-	0x31, 0x32, 0x31, 0x36, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x18, 
-	0x0F, 0x32, 0x30, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 
-	0x35, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x30, 0x81, 0x9D, 0x31, 
-	0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02, 
-	0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 
-	0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61, 
-	0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C, 
-	0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x0B, 
-	0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C, 0x02, 0x43, 
-	0x41, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 
-	0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 
-	0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x07, 
-	0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 0x39, 0x31, 0x18, 0x30, 
-	0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 
-	0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 
-	0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 
-	0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 
-	0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 
-	0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x2A, 0x30, 0x05, 
-	0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x21, 0x00, 0x65, 0xAA, 
-	0x7F, 0x05, 0xA4, 0x04, 0x34, 0xA0, 0xEA, 0xAD, 0x1F, 0xA9, 
-	0x86, 0xF0, 0xD8, 0x7F, 0x72, 0xDF, 0xA9, 0x0E, 0x13, 0xA0, 
-	0x38, 0x66, 0x26, 0x5E, 0xEB, 0x48, 0x30, 0x80, 0x48, 0x49, 
-	0xA3, 0x60, 0x30, 0x5E, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D, 
-	0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x1D, 
-	0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04, 0x14, 0x92, 
-	0x3F, 0x96, 0x72, 0x02, 0xFA, 0x61, 0x1C, 0x21, 0x6D, 0x88, 
-	0xDD, 0xEB, 0xDD, 0x3C, 0x9B, 0x17, 0xC4, 0x9F, 0xB7, 0x30, 
-	0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 0x18, 0x30, 0x16, 
-	0x80, 0x14, 0xFE, 0x01, 0x46, 0x7F, 0x6F, 0x2B, 0x3E, 0x1C, 
-	0xB0, 0x6F, 0xE1, 0xCC, 0x4D, 0x02, 0x25, 0xF7, 0x4D, 0x0A, 
-	0x95, 0xB8, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01, 
-	0x01, 0xFF, 0x04, 0x04, 0x03, 0x02, 0x01, 0xC6, 0x30, 0x05, 
-	0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x41, 0x00, 0x4C, 0x40, 
-	0xD0, 0x7F, 0xBC, 0xFB, 0xF4, 0xA2, 0x1A, 0x58, 0xF6, 0x72, 
-	0xE3, 0xE8, 0xDA, 0x18, 0x0D, 0x94, 0xDC, 0x0E, 0xFD, 0xC1, 
-	0xE7, 0x02, 0xA5, 0x7A, 0xEE, 0xCB, 0xC2, 0x7E, 0xFA, 0xA1, 
-	0xFC, 0x15, 0x9A, 0xFE, 0x1E, 0xE0, 0x37, 0xDF, 0x7F, 0xAB, 
-	0x76, 0x50, 0x06, 0xD4, 0x3D, 0x1A, 0x65, 0x73, 0x3F, 0x92, 
-	0xD4, 0x44, 0x62, 0xA7, 0x4C, 0xB3, 0x2A, 0x01, 0x87, 0xE3, 
-	0x06, 0x06
+        0x30, 0x82, 0x02, 0x60, 0x30, 0x82, 0x02, 0x12, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x10, 0x00, 0x80, 0xBA, 0x68, 0x77,
+        0xEF, 0xA5, 0xE5, 0x42, 0x7D, 0xC6, 0x73, 0x2C, 0x54, 0x85,
+        0xB8, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 0x70, 0x30, 0x81,
+        0x9F, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06,
+        0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61,
+        0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E,
+        0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C,
+        0x04, 0x52, 0x6F, 0x6F, 0x74, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66,
+        0x53, 0x53, 0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55,
+        0x04, 0x0B, 0x0C, 0x07, 0x45, 0x44, 0x32, 0x35, 0x35, 0x31,
+        0x39, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55, 0x04, 0x03,
+        0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66,
+        0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30,
+        0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
+        0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77,
+        0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D,
+        0x30, 0x22, 0x18, 0x0F, 0x32, 0x30, 0x31, 0x38, 0x30, 0x34,
+        0x31, 0x32, 0x31, 0x36, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x18,
+        0x0F, 0x32, 0x30, 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31,
+        0x35, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x30, 0x81, 0x9D, 0x31,
+        0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, 0x02,
+        0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61, 0x6E, 0x61,
+        0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x07, 0x0C,
+        0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E, 0x31, 0x0B,
+        0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C, 0x02, 0x43,
+        0x41, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A,
+        0x0C, 0x07, 0x77, 0x6F, 0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31,
+        0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04, 0x0B, 0x0C, 0x07,
+        0x45, 0x44, 0x32, 0x35, 0x35, 0x31, 0x39, 0x31, 0x18, 0x30,
+        0x16, 0x06, 0x03, 0x55, 0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77,
+        0x77, 0x2E, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E,
+        0x63, 0x6F, 0x6D, 0x31, 0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A,
+        0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x01, 0x16, 0x10,
+        0x69, 0x6E, 0x66, 0x6F, 0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73,
+        0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x30, 0x2A, 0x30, 0x05,
+        0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x21, 0x00, 0x65, 0xAA,
+        0x7F, 0x05, 0xA4, 0x04, 0x34, 0xA0, 0xEA, 0xAD, 0x1F, 0xA9,
+        0x86, 0xF0, 0xD8, 0x7F, 0x72, 0xDF, 0xA9, 0x0E, 0x13, 0xA0,
+        0x38, 0x66, 0x26, 0x5E, 0xEB, 0x48, 0x30, 0x80, 0x48, 0x49,
+        0xA3, 0x60, 0x30, 0x5E, 0x30, 0x0C, 0x06, 0x03, 0x55, 0x1D,
+        0x13, 0x04, 0x05, 0x30, 0x03, 0x01, 0x01, 0xFF, 0x30, 0x1D,
+        0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04, 0x14, 0x92,
+        0x3F, 0x96, 0x72, 0x02, 0xFA, 0x61, 0x1C, 0x21, 0x6D, 0x88,
+        0xDD, 0xEB, 0xDD, 0x3C, 0x9B, 0x17, 0xC4, 0x9F, 0xB7, 0x30,
+        0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 0x18, 0x30, 0x16,
+        0x80, 0x14, 0xFE, 0x01, 0x46, 0x7F, 0x6F, 0x2B, 0x3E, 0x1C,
+        0xB0, 0x6F, 0xE1, 0xCC, 0x4D, 0x02, 0x25, 0xF7, 0x4D, 0x0A,
+        0x95, 0xB8, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D, 0x0F, 0x01,
+        0x01, 0xFF, 0x04, 0x04, 0x03, 0x02, 0x01, 0xC6, 0x30, 0x05,
+        0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x41, 0x00, 0x4C, 0x40,
+        0xD0, 0x7F, 0xBC, 0xFB, 0xF4, 0xA2, 0x1A, 0x58, 0xF6, 0x72,
+        0xE3, 0xE8, 0xDA, 0x18, 0x0D, 0x94, 0xDC, 0x0E, 0xFD, 0xC1,
+        0xE7, 0x02, 0xA5, 0x7A, 0xEE, 0xCB, 0xC2, 0x7E, 0xFA, 0xA1,
+        0xFC, 0x15, 0x9A, 0xFE, 0x1E, 0xE0, 0x37, 0xDF, 0x7F, 0xAB,
+        0x76, 0x50, 0x06, 0xD4, 0x3D, 0x1A, 0x65, 0x73, 0x3F, 0x92,
+        0xD4, 0x44, 0x62, 0xA7, 0x4C, 0xB3, 0x2A, 0x01, 0x87, 0xE3,
+        0x06, 0x06
 };
 static const int sizeof_ca_ed25519_cert = sizeof(ca_ed25519_cert);
 
+/* ./certs/ed25519/client-ed25519.der, ED25519 */
+static const unsigned char client_ed25519_cert[] =
+{
+        0x30, 0x82, 0x02, 0x58, 0x30, 0x82, 0x02, 0x0A, 0xA0, 0x03,
+        0x02, 0x01, 0x02, 0x02, 0x10, 0x00, 0x8F, 0x2F, 0x35, 0xB2,
+        0x53, 0xBD, 0x4F, 0x92, 0xD1, 0xFF, 0x1D, 0x4B, 0x40, 0xA5,
+        0x49, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 0x70, 0x30, 0x81,
+        0xA1, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06,
+        0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61,
+        0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E,
+        0x31, 0x0F, 0x30, 0x0D, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C,
+        0x06, 0x63, 0x6C, 0x69, 0x65, 0x6E, 0x74, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F,
+        0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x07, 0x45, 0x44, 0x32, 0x35,
+        0x35, 0x31, 0x39, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55,
+        0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31,
+        0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7,
+        0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F,
+        0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x30, 0x22, 0x18, 0x0F, 0x32, 0x30, 0x31, 0x38,
+        0x30, 0x34, 0x31, 0x32, 0x31, 0x36, 0x32, 0x32, 0x31, 0x37,
+        0x5A, 0x18, 0x0F, 0x32, 0x30, 0x32, 0x31, 0x30, 0x31, 0x30,
+        0x37, 0x31, 0x35, 0x32, 0x32, 0x31, 0x37, 0x5A, 0x30, 0x81,
+        0xA1, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06,
+        0x13, 0x02, 0x55, 0x53, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03,
+        0x55, 0x04, 0x08, 0x0C, 0x07, 0x4D, 0x6F, 0x6E, 0x74, 0x61,
+        0x6E, 0x61, 0x31, 0x10, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x04,
+        0x07, 0x0C, 0x07, 0x42, 0x6F, 0x7A, 0x65, 0x6D, 0x61, 0x6E,
+        0x31, 0x0F, 0x30, 0x0D, 0x06, 0x03, 0x55, 0x04, 0x04, 0x0C,
+        0x06, 0x63, 0x6C, 0x69, 0x65, 0x6E, 0x74, 0x31, 0x10, 0x30,
+        0x0E, 0x06, 0x03, 0x55, 0x04, 0x0A, 0x0C, 0x07, 0x77, 0x6F,
+        0x6C, 0x66, 0x53, 0x53, 0x4C, 0x31, 0x10, 0x30, 0x0E, 0x06,
+        0x03, 0x55, 0x04, 0x0B, 0x0C, 0x07, 0x45, 0x44, 0x32, 0x35,
+        0x35, 0x31, 0x39, 0x31, 0x18, 0x30, 0x16, 0x06, 0x03, 0x55,
+        0x04, 0x03, 0x0C, 0x0F, 0x77, 0x77, 0x77, 0x2E, 0x77, 0x6F,
+        0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63, 0x6F, 0x6D, 0x31,
+        0x1F, 0x30, 0x1D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7,
+        0x0D, 0x01, 0x09, 0x01, 0x16, 0x10, 0x69, 0x6E, 0x66, 0x6F,
+        0x40, 0x77, 0x6F, 0x6C, 0x66, 0x73, 0x73, 0x6C, 0x2E, 0x63,
+        0x6F, 0x6D, 0x30, 0x2A, 0x30, 0x05, 0x06, 0x03, 0x2B, 0x65,
+        0x70, 0x03, 0x21, 0x00, 0xA2, 0xF1, 0x26, 0x40, 0x9B, 0xA2,
+        0x59, 0xDA, 0xDB, 0xE6, 0x15, 0x7F, 0x9A, 0x11, 0xB5, 0x48,
+        0x5F, 0x55, 0xBA, 0x5E, 0xED, 0x46, 0xF7, 0x98, 0x67, 0xBE,
+        0x0C, 0x93, 0xE3, 0xA4, 0x8E, 0x18, 0xA3, 0x52, 0x30, 0x50,
+        0x30, 0x1D, 0x06, 0x03, 0x55, 0x1D, 0x0E, 0x04, 0x16, 0x04,
+        0x14, 0xFE, 0x01, 0x46, 0x7F, 0x6F, 0x2B, 0x3E, 0x1C, 0xB0,
+        0x6F, 0xE1, 0xCC, 0x4D, 0x02, 0x25, 0xF7, 0x4D, 0x0A, 0x95,
+        0xB8, 0x30, 0x1F, 0x06, 0x03, 0x55, 0x1D, 0x23, 0x04, 0x18,
+        0x30, 0x16, 0x80, 0x14, 0xFE, 0x01, 0x46, 0x7F, 0x6F, 0x2B,
+        0x3E, 0x1C, 0xB0, 0x6F, 0xE1, 0xCC, 0x4D, 0x02, 0x25, 0xF7,
+        0x4D, 0x0A, 0x95, 0xB8, 0x30, 0x0E, 0x06, 0x03, 0x55, 0x1D,
+        0x0F, 0x01, 0x01, 0xFF, 0x04, 0x04, 0x03, 0x02, 0x06, 0xC0,
+        0x30, 0x05, 0x06, 0x03, 0x2B, 0x65, 0x70, 0x03, 0x41, 0x00,
+        0x29, 0xF6, 0x69, 0xE2, 0xB9, 0x73, 0x12, 0xD1, 0x64, 0xEB,
+        0x8F, 0xE9, 0x6B, 0x61, 0xDB, 0x5F, 0xE9, 0xA7, 0x62, 0x6C,
+        0x10, 0x89, 0x41, 0x80, 0xE3, 0xE8, 0xFD, 0x1F, 0xD0, 0x13,
+        0xAE, 0x95, 0x00, 0xAF, 0xF7, 0x77, 0xE1, 0x22, 0x32, 0xAD,
+        0x46, 0x4F, 0xDC, 0x7E, 0xFE, 0xAE, 0xBC, 0x8A, 0x1F, 0x96,
+        0x0A, 0xDA, 0x9F, 0xC9, 0x93, 0x52, 0x27, 0x18, 0xB0, 0x8B,
+        0xDA, 0xBE, 0x81, 0x09
+};
+static const int sizeof_client_ed25519_cert = sizeof(client_ed25519_cert);
+
+/* ./certs/ed25519/client-ed25519-key.der, ED25519 */
+static const unsigned char client_ed25519_key[] =
+{
+        0x30, 0x52, 0x02, 0x01, 0x00, 0x30, 0x05, 0x06, 0x03, 0x2B,
+        0x65, 0x70, 0x04, 0x22, 0x04, 0x20, 0x27, 0xA3, 0x34, 0x2A,
+        0x35, 0xD4, 0xBB, 0xB8, 0xE1, 0xDC, 0xD8, 0xEC, 0x0F, 0xC1,
+        0xA0, 0xD1, 0xA2, 0x5C, 0xF9, 0x06, 0xF0, 0x44, 0x5D, 0x3B,
+        0x97, 0x4D, 0xBD, 0xDF, 0x4A, 0x3B, 0xA3, 0x4E, 0xA1, 0x22,
+        0x04, 0x20, 0xA2, 0xF1, 0x26, 0x40, 0x9B, 0xA2, 0x59, 0xDA,
+        0xDB, 0xE6, 0x15, 0x7F, 0x9A, 0x11, 0xB5, 0x48, 0x5F, 0x55,
+        0xBA, 0x5E, 0xED, 0x46, 0xF7, 0x98, 0x67, 0xBE, 0x0C, 0x93,
+        0xE3, 0xA4, 0x8E, 0x18
+};
+static const int sizeof_client_ed25519_key = sizeof(client_ed25519_key);
+
 #endif /* HAVE_ED25519 */
 
 #endif /* WOLFSSL_CERTS_TEST_H */
--- a/wolfssl/crl.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/crl.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* crl.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/error-ssl.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/error-ssl.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* error-ssl.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -57,7 +57,7 @@
     DOMAIN_NAME_MISMATCH         = -322,   /* peer subject name mismatch */
     WANT_READ                    = -323,   /* want read, call again    */
     NOT_READY_ERROR              = -324,   /* handshake layer not ready */
-
+    IPADDR_MISMATCH              = -325,   /* peer ip address mismatch */
     VERSION_ERROR                = -326,   /* record layer version error */
     WANT_WRITE                   = -327,   /* want write, call again   */
     BUFFER_ERROR                 = -328,   /* malformed buffer input   */
@@ -66,9 +66,6 @@
     CLIENT_ID_ERROR              = -331,   /* psk client identity error  */
     SERVER_HINT_ERROR            = -332,   /* psk server hint error  */
     PSK_KEY_ERROR                = -333,   /* psk key error  */
-    ZLIB_INIT_ERROR              = -334,   /* zlib init error  */
-    ZLIB_COMPRESS_ERROR          = -335,   /* zlib compression error  */
-    ZLIB_DECOMPRESS_ERROR        = -336,   /* zlib decompression error  */
 
     GETTIME_ERROR                = -337,   /* gettimeofday failed ??? */
     GETITIMER_ERROR              = -338,   /* getitimer failed ??? */
@@ -108,11 +105,11 @@
     OUT_OF_ORDER_E               = -373,   /* out of order message */
     BAD_KEA_TYPE_E               = -374,   /* bad KEA type found */
     SANITY_CIPHER_E              = -375,   /* sanity check on cipher error */
-    RECV_OVERFLOW_E              = -376,   /* RXCB returned more than rqed */
+    RECV_OVERFLOW_E              = -376,   /* RXCB returned more than read */
     GEN_COOKIE_E                 = -377,   /* Generate Cookie Error */
     NO_PEER_VERIFY               = -378,   /* Need peer cert verify Error */
     FWRITE_ERROR                 = -379,   /* fwrite problem */
-    CACHE_MATCH_ERROR            = -380,   /* chache hdr match error */
+    CACHE_MATCH_ERROR            = -380,   /* Cache hdr match error */
     UNKNOWN_SNI_HOST_NAME_E      = -381,   /* Unrecognized host name Error */
     UNKNOWN_MAX_FRAG_LEN_E       = -382,   /* Unrecognized max frag len Error */
     KEYUSE_SIGNATURE_E           = -383,   /* KeyUse digSignature error */
@@ -129,23 +126,19 @@
     DUPLICATE_MSG_E              = -395,   /* Duplicate message error */
     SNI_UNSUPPORTED              = -396,   /* SSL 3.0 does not support SNI */
     SOCKET_PEER_CLOSED_E         = -397,   /* Underlying transport closed */
-
     BAD_TICKET_KEY_CB_SZ         = -398,   /* Bad session ticket key cb size */
     BAD_TICKET_MSG_SZ            = -399,   /* Bad session ticket msg size    */
     BAD_TICKET_ENCRYPT           = -400,   /* Bad user ticket encrypt        */
-
     DH_KEY_SIZE_E                = -401,   /* DH Key too small */
     SNI_ABSENT_ERROR             = -402,   /* No SNI request. */
     RSA_SIGN_FAULT               = -403,   /* RSA Sign fault */
     HANDSHAKE_SIZE_ERROR         = -404,   /* Handshake message too large */
-
     UNKNOWN_ALPN_PROTOCOL_NAME_E = -405,   /* Unrecognized protocol name Error*/
     BAD_CERTIFICATE_STATUS_ERROR = -406,   /* Bad certificate status message */
     OCSP_INVALID_STATUS          = -407,   /* Invalid OCSP Status */
     OCSP_WANT_READ               = -408,   /* OCSP callback response WOLFSSL_CBIO_ERR_WANT_READ */
     RSA_KEY_SIZE_E               = -409,   /* RSA key too small */
     ECC_KEY_SIZE_E               = -410,   /* ECC key too small */
-
     DTLS_EXPORT_VER_E            = -411,   /* export version error */
     INPUT_SIZE_E                 = -412,   /* input size too big error */
     CTX_INIT_MUTEX_E             = -413,   /* initialize ctx mutex error */
@@ -165,6 +158,16 @@
     ALERT_COUNT_E                = -427,   /* Alert Count exceeded err */
     EXT_MISSING                  = -428,   /* Required extension not found */
     UNSUPPORTED_EXTENSION        = -429,   /* TLSX not requested by client */
+    PRF_MISSING                  = -430,   /* PRF not compiled in */
+    DTLS_RETX_OVER_TX            = -431,   /* Retransmit DTLS flight over */
+    DH_PARAMS_NOT_FFDHE_E        = -432,   /* DH params from server not FFDHE */
+    TCA_INVALID_ID_TYPE          = -433,   /* TLSX TCA ID type invalid */
+    TCA_ABSENT_ERROR             = -434,   /* TLSX TCA ID no response */
+    TSIP_MAC_DIGSZ_E             = -435,   /* Invalid MAC size for TSIP */
+    CLIENT_CERT_CB_ERROR         = -436,   /* Client cert callback error */
+    SSL_SHUTDOWN_ALREADY_DONE_E  = -437,   /* Shutdown called redundantly */
+    TLS13_SECRET_CB_E            = -438,   /* TLS1.3 secret Cb fcn failure */
+
     /* add strings to wolfSSL_ERR_reason_error_string in internal.c !!!!! */
 
     /* begin negotiation parameter errors */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/include.am	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,33 @@
+# vim:ft=automake
+# All paths should be given relative to the root
+#
+
+include wolfssl/wolfcrypt/include.am
+include wolfssl/openssl/include.am
+
+EXTRA_DIST+= wolfssl/sniffer_error.rc
+
+nobase_include_HEADERS+= \
+                         wolfssl/error-ssl.h \
+                         wolfssl/ssl.h \
+                         wolfssl/sniffer_error.h \
+                         wolfssl/sniffer.h \
+                         wolfssl/callbacks.h \
+                         wolfssl/certs_test.h \
+                         wolfssl/test.h \
+                         wolfssl/version.h \
+                         wolfssl/ocsp.h \
+                         wolfssl/crl.h \
+                         wolfssl/wolfio.h
+
+noinst_HEADERS+= \
+                         wolfssl/internal.h
+
+# For distro build don't install options.h.
+# It depends on the architecture and conflicts with Multi-Arch.
+if BUILD_DISTRO
+noinst_HEADERS+=         wolfssl/options.h
+else
+nobase_include_HEADERS+= wolfssl/options.h
+endif
+
--- a/wolfssl/internal.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/internal.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* internal.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -105,6 +105,12 @@
 #ifdef HAVE_CURVE25519
     #include <wolfssl/wolfcrypt/curve25519.h>
 #endif
+#ifdef HAVE_ED448
+    #include <wolfssl/wolfcrypt/ed448.h>
+#endif
+#ifdef HAVE_CURVE448
+    #include <wolfssl/wolfcrypt/curve448.h>
+#endif
 
 #include <wolfssl/wolfcrypt/wc_encrypt.h>
 #include <wolfssl/wolfcrypt/hash.h>
@@ -130,6 +136,9 @@
     #ifndef SINGLE_THREADED
         #include "tx_api.h"
     #endif
+
+#elif defined(WOLFSSL_DEOS)
+    /* do nothing, just don't pick Unix */
 #elif defined(MICRIUM)
     /* do nothing, just don't pick Unix */
 #elif defined(FREERTOS) || defined(FREERTOS_TCP) || defined(WOLFSSL_SAFERTOS)
@@ -146,14 +155,14 @@
         /* do nothing */
 #elif defined(WOLFSSL_CMSIS_RTOS)
     #include "cmsis_os.h"
+#elif defined(WOLFSSL_CMSIS_RTOSv2)
+    #include "cmsis_os2.h"
 #elif defined(WOLFSSL_MDK_ARM)
     #if defined(WOLFSSL_MDK5)
-         #include "cmsis_os.h"
+        #include "cmsis_os.h"
     #else
         #include <rtl.h>
     #endif
-#elif defined(WOLFSSL_CMSIS_RTOS)
-    #include "cmsis_os.h"
 #elif defined(MBED)
 #elif defined(WOLFSSL_TIRTOS)
     /* do nothing */
@@ -161,12 +170,23 @@
     #include <rt.h>
 #elif defined(WOLFSSL_NUCLEUS_1_2)
     /* do nothing */
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+    #if !defined(WOLFSSL_LWIP)
+        void mynewt_ctx_clear(void *ctx);
+        void* mynewt_ctx_new();
+    #endif
+#elif defined(WOLFSSL_ZEPHYR)
+    #ifndef SINGLE_THREADED
+        #include <kernel.h>
+    #endif
+#elif defined(WOLFSSL_TELIT_M2MB)
+    /* do nothing */
 #else
     #ifndef SINGLE_THREADED
         #define WOLFSSL_PTHREADS
         #include <pthread.h>
     #endif
-    #ifdef OPENSSL_EXTRA
+    #if defined(OPENSSL_EXTRA) && !defined(NO_FILESYSTEM)
         #include <unistd.h>      /* for close of BIO */
     #endif
 #endif
@@ -242,8 +262,16 @@
     #define BUILD_TLS_QSH
 #endif
 
+#ifndef WOLFSSL_NO_TLS12
+
 #ifndef WOLFSSL_MAX_STRENGTH
 
+#ifdef WOLFSSL_AEAD_ONLY
+    /* AES CBC ciphers are not allowed in AEAD only mode */
+    #undef HAVE_AES_CBC
+#endif
+
+#ifndef WOLFSSL_AEAD_ONLY
     #if !defined(NO_RSA) && !defined(NO_RC4)
         #if defined(WOLFSSL_STATIC_RSA)
             #if !defined(NO_SHA)
@@ -276,9 +304,10 @@
             #define BUILD_SSL_RSA_WITH_IDEA_CBC_SHA
         #endif
     #endif
+#endif /* !WOLFSSL_AEAD_ONLY */
 
     #if !defined(NO_RSA) && !defined(NO_AES) && !defined(NO_TLS)
-        #if !defined(NO_SHA)
+        #if !defined(NO_SHA) && defined(HAVE_AES_CBC)
             #if defined(WOLFSSL_STATIC_RSA)
                 #ifdef WOLFSSL_AES_128
                     #define BUILD_TLS_RSA_WITH_AES_128_CBC_SHA
@@ -297,7 +326,7 @@
             #endif
         #endif
         #if defined(WOLFSSL_STATIC_RSA)
-            #if !defined (NO_SHA256)
+            #if !defined (NO_SHA256) && defined(HAVE_AES_CBC)
                 #ifdef WOLFSSL_AES_128
                     #define BUILD_TLS_RSA_WITH_AES_128_CBC_SHA256
                 #endif
@@ -321,18 +350,10 @@
                     #define BUILD_TLS_RSA_WITH_AES_256_CCM_8
                 #endif
             #endif
-            #if defined(HAVE_BLAKE2)
-                #ifdef WOLFSSL_AES_128
-                    #define BUILD_TLS_RSA_WITH_AES_128_CBC_B2B256
-                #endif
-                #ifdef WOLFSSL_AES_256
-                    #define BUILD_TLS_RSA_WITH_AES_256_CBC_B2B256
-                #endif
-            #endif
         #endif
     #endif
 
-    #if defined(HAVE_CAMELLIA) && !defined(NO_TLS)
+    #if defined(HAVE_CAMELLIA) && !defined(NO_TLS) && !defined(NO_CAMELLIA_CBC)
         #ifndef NO_RSA
           #if defined(WOLFSSL_STATIC_RSA)
             #if !defined(NO_SHA)
@@ -369,10 +390,12 @@
         #endif
         #ifndef NO_SHA256
             #ifdef WOLFSSL_AES_128
-            #define BUILD_TLS_PSK_WITH_AES_128_CBC_SHA256
-            #ifdef HAVE_AESGCM
-                #define BUILD_TLS_PSK_WITH_AES_128_GCM_SHA256
-            #endif
+                #ifdef HAVE_AES_CBC
+                    #define BUILD_TLS_PSK_WITH_AES_128_CBC_SHA256
+                #endif
+                #ifdef HAVE_AESGCM
+                    #define BUILD_TLS_PSK_WITH_AES_128_GCM_SHA256
+                #endif
             #endif /* WOLFSSL_AES_128 */
             #ifdef HAVE_AESCCM
                 #ifdef WOLFSSL_AES_128
@@ -386,7 +409,9 @@
             #endif
         #endif
         #if defined(WOLFSSL_SHA384) && defined(WOLFSSL_AES_256)
-            #define BUILD_TLS_PSK_WITH_AES_256_CBC_SHA384
+            #ifdef HAVE_AES_CBC
+                #define BUILD_TLS_PSK_WITH_AES_256_CBC_SHA384
+            #endif
             #ifdef HAVE_AESGCM
                 #define BUILD_TLS_PSK_WITH_AES_256_GCM_SHA384
             #endif
@@ -397,6 +422,9 @@
     #if !defined(NO_TLS) && defined(HAVE_NULL_CIPHER)
         #if !defined(NO_RSA)
             #if defined(WOLFSSL_STATIC_RSA)
+                #ifndef NO_MD5
+                    #define BUILD_TLS_RSA_WITH_NULL_MD5
+                #endif
                 #if !defined(NO_SHA)
                     #define BUILD_TLS_RSA_WITH_NULL_SHA
                 #endif
@@ -426,9 +454,6 @@
         #if !defined(NO_SHA)
             #define BUILD_TLS_RSA_WITH_HC_128_SHA
         #endif
-        #if defined(HAVE_BLAKE2)
-            #define BUILD_TLS_RSA_WITH_HC_128_B2B256
-        #endif
     #endif
 
     #if !defined(NO_RABBIT) && !defined(NO_TLS) && !defined(NO_RSA)
@@ -442,10 +467,10 @@
         !defined(NO_RSA)
 
         #if !defined(NO_SHA)
-            #ifdef WOLFSSL_AES_128
+            #if defined(WOLFSSL_AES_128) && defined(HAVE_AES_CBC)
                 #define BUILD_TLS_DHE_RSA_WITH_AES_128_CBC_SHA
             #endif
-            #ifdef WOLFSSL_AES_256
+            #if defined(WOLFSSL_AES_256) && defined(HAVE_AES_CBC)
                 #define BUILD_TLS_DHE_RSA_WITH_AES_256_CBC_SHA
             #endif
             #if !defined(NO_DES3)
@@ -464,7 +489,9 @@
 
     #if defined(HAVE_ANON) && !defined(NO_TLS) && !defined(NO_DH) && \
         !defined(NO_AES) && !defined(NO_SHA) && defined(WOLFSSL_AES_128)
-        #define BUILD_TLS_DH_anon_WITH_AES_128_CBC_SHA
+        #ifdef HAVE_AES_CBC
+            #define BUILD_TLS_DH_anon_WITH_AES_128_CBC_SHA
+        #endif
 
         #if defined(WOLFSSL_SHA384) && defined(HAVE_AESGCM)
             #define BUILD_TLS_DH_anon_WITH_AES_256_GCM_SHA384
@@ -473,7 +500,8 @@
 
     #if !defined(NO_DH) && !defined(NO_PSK) && !defined(NO_TLS)
         #ifndef NO_SHA256
-            #if !defined(NO_AES) && defined(WOLFSSL_AES_128)
+            #if !defined(NO_AES) && defined(WOLFSSL_AES_128) && \
+                                                           defined(HAVE_AES_CBC)
                 #define BUILD_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256
             #endif
             #ifdef HAVE_NULL_CIPHER
@@ -481,7 +509,8 @@
             #endif
         #endif
         #ifdef WOLFSSL_SHA384
-            #if !defined(NO_AES) && defined(WOLFSSL_AES_256)
+            #if !defined(NO_AES) && defined(WOLFSSL_AES_256) && \
+                                                           defined(HAVE_AES_CBC)
                 #define BUILD_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384
             #endif
             #ifdef HAVE_NULL_CIPHER
@@ -490,9 +519,10 @@
         #endif
     #endif
 
-    #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && !defined(NO_TLS)
+    #if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || \
+                                     defined(HAVE_CURVE448)) && !defined(NO_TLS)
         #if !defined(NO_AES)
-            #if !defined(NO_SHA)
+            #if !defined(NO_SHA) && defined(HAVE_AES_CBC)
                 #if !defined(NO_RSA)
                     #ifdef WOLFSSL_AES_128
                         #define BUILD_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA
@@ -510,8 +540,9 @@
                     #endif
                 #endif
 
-                #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+                #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                     #ifdef WOLFSSL_AES_128
                         #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA
                     #endif
@@ -529,15 +560,17 @@
                     #endif
                 #endif
             #endif /* NO_SHA */
-            #if !defined(NO_SHA256) && defined(WOLFSSL_AES_128)
+            #if !defined(NO_SHA256) && defined(WOLFSSL_AES_128) && \
+                                                           defined(HAVE_AES_CBC)
                 #if !defined(NO_RSA)
                     #define BUILD_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
                     #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
                         #define BUILD_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256
                     #endif
                 #endif
-                #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+                #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                     #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256
                 #endif
                 #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
@@ -545,15 +578,17 @@
                 #endif
             #endif
 
-            #if defined(WOLFSSL_SHA384) && defined(WOLFSSL_AES_256)
+            #if defined(WOLFSSL_SHA384) && defined(WOLFSSL_AES_256) && \
+                                                           defined(HAVE_AES_CBC)
                 #if !defined(NO_RSA)
                     #define BUILD_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384
                     #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
                         #define BUILD_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384
                     #endif
                 #endif
-                #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+                #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                     #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384
                 #endif
                 #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
@@ -593,15 +628,20 @@
         #if !defined(NO_RC4)
             #if !defined(NO_SHA)
                 #if !defined(NO_RSA)
-                    #define BUILD_TLS_ECDHE_RSA_WITH_RC4_128_SHA
+                    #ifndef WOLFSSL_AEAD_ONLY
+                        #define BUILD_TLS_ECDHE_RSA_WITH_RC4_128_SHA
+                    #endif
                     #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
                         #define BUILD_TLS_ECDH_RSA_WITH_RC4_128_SHA
                     #endif
                 #endif
 
-                #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
-                    #define BUILD_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA
+                #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
+                    #ifndef WOLFSSL_AEAD_ONLY
+                        #define BUILD_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA
+                    #endif
                 #endif
                 #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
                     #define BUILD_TLS_ECDH_ECDSA_WITH_RC4_128_SHA
@@ -617,8 +657,9 @@
                     #endif
                 #endif
 
-                #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+                #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                     #define BUILD_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA
                 #endif
                 #if defined(WOLFSSL_STATIC_DH) && defined(HAVE_ECC)
@@ -628,8 +669,9 @@
         #endif
         #if defined(HAVE_NULL_CIPHER)
             #if !defined(NO_SHA)
-                #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+                #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                     #define BUILD_TLS_ECDHE_ECDSA_WITH_NULL_SHA
                 #endif
             #endif
@@ -638,17 +680,18 @@
             #endif
         #endif
         #if !defined(NO_PSK) && !defined(NO_SHA256) && !defined(NO_AES) && \
-            defined(WOLFSSL_AES_128)
+            defined(WOLFSSL_AES_128) && defined(HAVE_AES_CBC)
             #define BUILD_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256
         #endif
     #endif
     #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305) && !defined(NO_SHA256)
         #if !defined(NO_OLD_POLY1305)
-        #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+        #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
             #define BUILD_TLS_ECDHE_ECDSA_WITH_CHACHA20_OLD_POLY1305_SHA256
         #endif
-        #ifndef NO_RSA
+        #if !defined(NO_RSA) && defined(HAVE_ECC)
             #define BUILD_TLS_ECDHE_RSA_WITH_CHACHA20_OLD_POLY1305_SHA256
         #endif
         #if !defined(NO_DH) && !defined(NO_RSA)
@@ -657,7 +700,8 @@
         #endif /* NO_OLD_POLY1305 */
         #if !defined(NO_PSK)
             #define BUILD_TLS_PSK_WITH_CHACHA20_POLY1305_SHA256
-            #if defined(HAVE_ECC) || defined(HAVE_ED25519)
+            #if defined(HAVE_ECC) || defined(HAVE_ED25519) || \
+                                                             defined(HAVE_ED448)
                 #define BUILD_TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256
             #endif
             #ifndef NO_DH
@@ -700,12 +744,13 @@
     #endif
 #endif
 
-#if (defined(HAVE_ECC) || defined(HAVE_CURVE25519)) && !defined(NO_TLS) && \
-                                                                !defined(NO_AES)
+#if (defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)) \
+                                         && !defined(NO_TLS) && !defined(NO_AES)
     #ifdef HAVE_AESGCM
         #if !defined(NO_SHA256) && defined(WOLFSSL_AES_128)
-            #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+            #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                 #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
             #endif
             #ifndef NO_RSA
@@ -713,8 +758,9 @@
             #endif
         #endif
         #if defined(WOLFSSL_SHA384) && defined(WOLFSSL_AES_256)
-            #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+            #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
                 #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
             #endif
             #ifndef NO_RSA
@@ -723,8 +769,9 @@
         #endif
     #endif
     #if defined(HAVE_AESCCM) && !defined(NO_SHA256)
-        #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+        #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
             #ifdef WOLFSSL_AES_128
                 #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CCM
                 #define BUILD_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8
@@ -737,9 +784,10 @@
 #endif
 
 #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305) && !defined(NO_SHA256)
-    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
-        #if defined(HAVE_ECC) || (defined(HAVE_CURVE25519) && \
-                                                          defined(HAVE_ED25519))
+    #if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
+        #if defined(HAVE_ECC) || \
+                        (defined(HAVE_CURVE25519) && defined(HAVE_ED25519)) || \
+                        (defined(HAVE_CURVE448) && defined(HAVE_ED448))
             #define BUILD_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256
         #endif
         #ifndef NO_RSA
@@ -751,6 +799,8 @@
     #endif
 #endif
 
+#endif
+
 #if defined(WOLFSSL_TLS13)
     #ifdef HAVE_AESGCM
         #if !defined(NO_SHA256) && defined(WOLFSSL_AES_128)
@@ -773,6 +823,14 @@
             #define BUILD_TLS_AES_128_CCM_8_SHA256
         #endif
     #endif
+    #ifdef HAVE_NULL_CIPHER
+        #ifndef NO_SHA256
+            #define BUILD_TLS_SHA256_SHA256
+        #endif
+        #ifdef WOLFSSL_SHA384
+            #define BUILD_TLS_SHA384_SHA384
+        #endif
+    #endif
 #endif
 
 #ifdef WOLFSSL_MULTICAST
@@ -807,7 +865,9 @@
     defined(BUILD_TLS_DHE_RSA_WITH_AES_256_GCM_SHA384) || \
     defined(BUILD_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384) || \
     defined(BUILD_TLS_PSK_WITH_AES_256_GCM_SHA384) || \
-    defined(BUILD_TLS_DHE_PSK_WITH_AES_256_GCM_SHA384)
+    defined(BUILD_TLS_DHE_PSK_WITH_AES_256_GCM_SHA384) || \
+    defined(BUILD_TLS_AES_128_GCM_SHA256) || \
+    defined(BUILD_TLS_AES_256_GCM_SHA384)
     #define BUILD_AESGCM
 #else
     /* No AES-GCM cipher suites available with build */
@@ -831,8 +891,7 @@
 #endif
 
 #if defined(BUILD_TLS_RSA_WITH_HC_128_SHA) || \
-    defined(BUILD_TLS_RSA_WITH_HC_128_MD5) || \
-    defined(BUILD_TLS_RSA_WITH_HC_128_B2B256)
+    defined(BUILD_TLS_RSA_WITH_HC_128_MD5)
     #define BUILD_HC128
 #endif
 
@@ -867,7 +926,9 @@
 #if defined(WOLFSSL_MAX_STRENGTH) || \
     (defined(HAVE_AESGCM) && !defined(NO_AESGCM_AEAD)) || \
      defined(HAVE_AESCCM) || \
-    (defined(HAVE_CHACHA) && defined(HAVE_POLY1305) && !defined(NO_CHAPOL_AEAD))
+    (defined(HAVE_CHACHA) && defined(HAVE_POLY1305) && \
+     !defined(NO_CHAPOL_AEAD)) || \
+    (defined(WOLFSSL_TLS13) && defined(HAVE_NULL_CIPHER))
 
     #define HAVE_AEAD
 #endif
@@ -890,6 +951,7 @@
     TLS_DH_anon_WITH_AES_128_CBC_SHA  = 0x34,
     TLS_RSA_WITH_AES_256_CBC_SHA      = 0x35,
     TLS_RSA_WITH_AES_128_CBC_SHA      = 0x2F,
+    TLS_RSA_WITH_NULL_MD5             = 0x01,
     TLS_RSA_WITH_NULL_SHA             = 0x02,
     TLS_PSK_WITH_AES_256_CBC_SHA      = 0x8d,
     TLS_PSK_WITH_AES_128_CBC_SHA256   = 0xae,
@@ -940,11 +1002,6 @@
     TLS_RSA_WITH_RABBIT_SHA       = 0xFD,
     WDM_WITH_NULL_SHA256          = 0xFE, /* wolfSSL DTLS Multicast */
 
-    /* wolfSSL extension - Blake2b 256 */
-    TLS_RSA_WITH_AES_128_CBC_B2B256   = 0xF8,
-    TLS_RSA_WITH_AES_256_CBC_B2B256   = 0xF9,
-    TLS_RSA_WITH_HC_128_B2B256        = 0xFA,   /* eSTREAM too */
-
     /* wolfSSL extension - NTRU */
     TLS_NTRU_RSA_WITH_RC4_128_SHA      = 0xe5,
     TLS_NTRU_RSA_WITH_3DES_EDE_CBC_SHA = 0xe6,
@@ -1035,6 +1092,12 @@
     TLS_AES_128_CCM_SHA256       = 0x04,
     TLS_AES_128_CCM_8_SHA256     = 0x05,
 
+    /* TLS v1.3 Integity only cipher suites - 0xC0 (ECC) first byte */
+    TLS_SHA256_SHA256            = 0xB4,
+    TLS_SHA384_SHA384            = 0xB5,
+
+    /* Fallback SCSV (Signaling Cipher Suite Value) */
+    TLS_FALLBACK_SCSV                        = 0x56,
     /* Renegotiation Indication Extension Special Suite */
     TLS_EMPTY_RENEGOTIATION_INFO_SCSV        = 0xff
 };
@@ -1085,7 +1148,11 @@
 #define MIN_DHKEY_SZ (WOLFSSL_MIN_DHKEY_BITS / 8)
 /* set maximum DH key size allowed */
 #ifndef WOLFSSL_MAX_DHKEY_BITS
-    #define WOLFSSL_MAX_DHKEY_BITS 4096
+    #if (defined(USE_FAST_MATH) && defined(FP_MAX_BITS) && FP_MAX_BITS >= 16384)
+        #define WOLFSSL_MAX_DHKEY_BITS 8192
+    #else
+        #define WOLFSSL_MAX_DHKEY_BITS 4096
+    #endif
 #endif
 #if (WOLFSSL_MAX_DHKEY_BITS % 8)
     #error DH maximum bit size must be multiple of 8
@@ -1095,7 +1162,19 @@
 #endif
 #define MAX_DHKEY_SZ (WOLFSSL_MAX_DHKEY_BITS / 8)
 
-
+#ifndef MAX_PSK_ID_LEN
+    /* max psk identity/hint supported */
+    #if defined(WOLFSSL_TLS13)
+        #define MAX_PSK_ID_LEN 256
+    #else
+        #define MAX_PSK_ID_LEN 128
+    #endif
+#endif
+
+#ifndef MAX_EARLY_DATA_SZ
+    /* maximum early data size */
+    #define MAX_EARLY_DATA_SZ  4096
+#endif
 
 enum Misc {
     CIPHER_BYTE = 0x00,            /* Default ciphers */
@@ -1116,8 +1195,8 @@
     TLSv1_1_MINOR   = 2,        /* TLSv1_1 minor version number */
     TLSv1_2_MINOR   = 3,        /* TLSv1_2 minor version number */
     TLSv1_3_MINOR   = 4,        /* TLSv1_3 minor version number */
-#ifndef WOLFSSL_TLS13_FINAL
     TLS_DRAFT_MAJOR = 0x7f,     /* Draft TLS major version number */
+#ifdef WOLFSSL_TLS13_DRAFT
 #ifdef WOLFSSL_TLS13_DRAFT_18
     TLS_DRAFT_MINOR = 0x12,     /* Minor version number of TLS draft */
 #elif defined(WOLFSSL_TLS13_DRAFT_22)
@@ -1138,14 +1217,29 @@
     HELLO_EXT_EXTMS = 0x0017,   /* ID for the extended master secret ext */
     SECRET_LEN      = WOLFSSL_MAX_MASTER_KEY_LENGTH,
                                 /* pre RSA and all master */
-#if defined(WOLFSSL_MYSQL_COMPATIBLE)
-    ENCRYPT_LEN     = 1024,     /* allow larger static buffer with mysql */
+#if defined(WOLFSSL_MYSQL_COMPATIBLE) || \
+    (defined(USE_FAST_MATH) && defined(FP_MAX_BITS) && FP_MAX_BITS > 8192)
+#ifndef NO_PSK
+    ENCRYPT_LEN     = 1024 + MAX_PSK_ID_LEN + 2,   /* 8192 bit static buffer */
+#else
+    ENCRYPT_LEN     = 1024,     /* allow 8192 bit static buffer */
+#endif
+#else
+#ifndef NO_PSK
+    ENCRYPT_LEN     = 512 + MAX_PSK_ID_LEN + 2,    /* 4096 bit static buffer */
 #else
     ENCRYPT_LEN     = 512,      /* allow 4096 bit static buffer */
 #endif
+#endif
     SIZEOF_SENDER   =  4,       /* clnt or srvr           */
     FINISHED_SZ     = 36,       /* WC_MD5_DIGEST_SIZE + WC_SHA_DIGEST_SIZE */
     MAX_RECORD_SIZE = 16384,    /* 2^14, max size by standard */
+    MAX_PLAINTEXT_SZ   = (1 << 14),        /* Max plaintext sz   */
+    MAX_TLS_CIPHER_SZ  = (1 << 14) + 2048, /* Max TLS encrypted data sz */
+#ifdef WOLFSSL_TLS13
+    MAX_TLS13_PLAIN_SZ = (1 << 14) + 1,    /* Max unencrypted data sz */
+    MAX_TLS13_ENC_SZ   = (1 << 14) + 256,  /* Max encrypted data sz   */
+#endif
     MAX_MSG_EXTRA   = 38 + WC_MAX_DIGEST_SIZE,
                                 /* max added to msg, mac + pad  from */
                                 /* RECORD_HEADER_SZ + BLOCK_SZ (pad) + Max
@@ -1153,7 +1247,7 @@
     MAX_COMP_EXTRA  = 1024,     /* max compression extra */
     MAX_MTU         = WOLFSSL_MAX_MTU,     /* max expected MTU */
     MAX_UDP_SIZE    = 8192 - 100, /* was MAX_MTU - 100 */
-    MAX_DH_SZ       = (MAX_DHKEY_SZ * 2) + 12,
+    MAX_DH_SZ       = (MAX_DHKEY_SZ * 3) + 12, /* DH_P, DH_G and DH_Pub */
                                 /* 4096 p, pub, g + 2 byte size for each */
     MAX_STR_VERSION = 8,        /* string rep of protocol version */
 
@@ -1171,13 +1265,14 @@
     MAX_DH_SIZE    = MAX_DHKEY_SZ+1,
                                /* Max size plus possible leading 0 */
     NAMED_DH_MASK  = 0x100,    /* Named group mask for DH parameters  */
+    MIN_FFHDE_GROUP = 0x100,   /* Named group minimum for FFDHE parameters  */
+    MAX_FFHDE_GROUP = 0x1FF,   /* Named group maximum for FFDHE parameters  */
     SESSION_HINT_SZ = 4,       /* session timeout hint */
     SESSION_ADD_SZ = 4,        /* session age add */
     TICKET_NONCE_LEN_SZ = 1,   /* Ticket nonce length size */
     DEF_TICKET_NONCE_SZ = 1,   /* Default ticket nonce size */
-    MAX_TICKET_NONCE_SZ = 4,   /* maximum ticket nonce size */
+    MAX_TICKET_NONCE_SZ = 8,   /* maximum ticket nonce size */
     MAX_LIFETIME   = 604800,   /* maximum ticket lifetime */
-    MAX_EARLY_DATA_SZ = 4096,  /* maximum early data size */
 
     RAN_LEN      = 32,         /* random length           */
     SEED_LEN     = RAN_LEN * 2, /* tls prf seed length    */
@@ -1216,26 +1311,26 @@
     DTLS_HANDSHAKE_FRAG_SZ   = 3,  /* fragment offset and length are 24 bit */
     DTLS_POOL_SZ             = 255,/* allowed number of list items in TX pool */
     DTLS_EXPORT_PRO          = 165,/* wolfSSL protocol for serialized session */
+    DTLS_EXPORT_STATE_PRO    = 166,/* wolfSSL protocol for serialized state */
     DTLS_EXPORT_VERSION      = 4,  /* wolfSSL version for serialized session */
     DTLS_EXPORT_OPT_SZ       = 60, /* amount of bytes used from Options */
     DTLS_EXPORT_VERSION_3    = 3,  /* wolfSSL version before TLS 1.3 addition */
     DTLS_EXPORT_OPT_SZ_3     = 59, /* amount of bytes used from Options */
     DTLS_EXPORT_KEY_SZ       = 325 + (DTLS_SEQ_SZ * 2),
                                    /* max amount of bytes used from Keys */
-    DTLS_EXPORT_MIN_KEY_SZ   = 78 + (DTLS_SEQ_SZ * 2),
+    DTLS_EXPORT_MIN_KEY_SZ   = 85 + (DTLS_SEQ_SZ * 2),
                                    /* min amount of bytes used from Keys */
     DTLS_EXPORT_SPC_SZ       = 16, /* amount of bytes used from CipherSpecs */
     DTLS_EXPORT_LEN          = 2,  /* 2 bytes for length and protocol */
     DTLS_EXPORT_IP           = 46, /* max ip size IPv4 mapped IPv6 */
     MAX_EXPORT_BUFFER        = 514, /* max size of buffer for exporting */
+    MAX_EXPORT_STATE_BUFFER  = (DTLS_EXPORT_MIN_KEY_SZ) + (3 * DTLS_EXPORT_LEN),
+                                    /* max size of buffer for exporting state */
     FINISHED_LABEL_SZ   = 15,  /* TLS finished label size */
     TLS_FINISHED_SZ     = 12,  /* TLS has a shorter size  */
     EXT_MASTER_LABEL_SZ = 22,  /* TLS extended master secret label sz */
     MASTER_LABEL_SZ     = 13,  /* TLS master secret label sz */
     KEY_LABEL_SZ        = 13,  /* TLS key block expansion sz */
-    MAX_PRF_HALF        = 256, /* Maximum half secret len */
-    MAX_PRF_LABSEED     = 128, /* Maximum label + seed len */
-    MAX_PRF_DIG         = 224, /* Maximum digest len      */
     PROTOCOL_LABEL_SZ   = 9,   /* Length of the protocol label */
     MAX_LABEL_SZ        = 34,  /* Maximum length of a label */
     MAX_HKDF_LABEL_SZ   = OPAQUE16_LEN +
@@ -1253,9 +1348,13 @@
 #endif
 
 #ifdef HAVE_SELFTEST
-    AES_256_KEY_SIZE    = 32,
+    #ifndef WOLFSSL_AES_KEY_SIZE_ENUM
+    #define WOLFSSL_AES_KEY_SIZE_ENUM
     AES_IV_SIZE         = 16,
     AES_128_KEY_SIZE    = 16,
+    AES_192_KEY_SIZE    = 24,
+    AES_256_KEY_SIZE    = 32,
+    #endif
 #endif
 
     MAX_IV_SZ           = AES_BLOCK_SIZE,
@@ -1293,6 +1392,8 @@
 
     POLY1305_AUTH_SZ    = 16,  /* 128 bits                */
 
+    HMAC_NONCE_SZ       = 12,  /* Size of HMAC nonce */
+
     HC_128_KEY_SIZE     = 16,  /* 128 bits                */
     HC_128_IV_SIZE      = 16,  /* also 128 bits           */
 
@@ -1301,11 +1402,13 @@
 
     EVP_SALT_SIZE       =  8,  /* evp salt size 64 bits   */
 
+#ifndef ECDHE_SIZE /* allow this to be overridden at compile-time */
     ECDHE_SIZE          = 32,  /* ECHDE server size defaults to 256 bit */
+#endif
     MAX_EXPORT_ECC_SZ   = 256, /* Export ANS X9.62 max future size */
     MAX_CURVE_NAME_SZ   = 16,  /* Maximum size of curve name string */
 
-    NEW_SA_MAJOR        = 8,   /* Most signicant byte used with new sig algos */
+    NEW_SA_MAJOR        = 8,   /* Most significant byte used with new sig algos */
     ED25519_SA_MAJOR    = 8,   /* Most significant byte for ED25519 */
     ED25519_SA_MINOR    = 7,   /* Least significant byte for ED25519 */
     ED448_SA_MAJOR      = 8,   /* Most significant byte for ED448 */
@@ -1314,7 +1417,17 @@
     MIN_RSA_SHA512_PSS_BITS = 512 * 2 + 8 * 8, /* Min key size */
     MIN_RSA_SHA384_PSS_BITS = 384 * 2 + 8 * 8, /* Min key size */
 
-    MAX_CERT_VERIFY_SZ = 1024, /* max   */
+#ifndef NO_RSA
+    MAX_CERT_VERIFY_SZ = 4096 / 8, /* max RSA - default 4096-bits */
+#elif defined(HAVE_ECC)
+    MAX_CERT_VERIFY_SZ = ECC_MAX_SIG_SIZE, /* max ECC  */
+#elif defined(HAVE_ED448)
+    MAX_CERT_VERIFY_SZ = ED448_SIG_SIZE,   /* max Ed448  */
+#elif defined(HAVE_ED25519)
+    MAX_CERT_VERIFY_SZ = ED25519_SIG_SIZE, /* max Ed25519  */
+#else
+    MAX_CERT_VERIFY_SZ = 1024, /* max default  */
+#endif
     CLIENT_HELLO_FIRST =  35,  /* Protocol + RAN_LEN + sizeof(id_len) */
     MAX_SUITE_NAME     =  48,  /* maximum length of cipher suite string */
 
@@ -1322,17 +1435,14 @@
     DTLS_TIMEOUT_MAX        = 64, /* default max timeout for DTLS receive */
     DTLS_TIMEOUT_MULTIPLIER =  2, /* default timeout multiplier for DTLS recv */
 
-    MAX_PSK_ID_LEN     = 128,  /* max psk identity/hint supported */
     NULL_TERM_LEN      =   1,  /* length of null '\0' termination character */
     MAX_PSK_KEY_LEN    =  64,  /* max psk key supported */
     MIN_PSK_ID_LEN     =   6,  /* min length of identities */
     MIN_PSK_BINDERS_LEN=  33,  /* min length of binders */
     MAX_TICKET_AGE_SECS=  10,  /* maximum ticket age in seconds */
 
-    MAX_WOLFSSL_FILE_SIZE = 1024 * 1024 * 4,  /* 4 mb file size alloc limit */
-
-#if defined(HAVE_EX_DATA) || defined(FORTRESS)
-    MAX_EX_DATA        =   5,  /* allow for five items of ex_data */
+#ifndef MAX_WOLFSSL_FILE_SIZE
+    MAX_WOLFSSL_FILE_SIZE = 1024ul * 1024ul * 4,  /* 4 mb file size alloc limit */
 #endif
 
     MAX_X509_SIZE      = 2048, /* max static x509 buffer size */
@@ -1394,9 +1504,9 @@
     #endif
 #endif /* WOLFSSL_MIN_ECC_BITS */
 #if (WOLFSSL_MIN_ECC_BITS % 8)
-    /* Some ECC keys are not divisable by 8 such as prime239v1 or sect131r1.
-       In these cases round down to the nearest value divisable by 8. The
-       restriction of being divisable by 8 is in place to match wc_ecc_size
+    /* Some ECC keys are not divisible by 8 such as prime239v1 or sect131r1.
+       In these cases round down to the nearest value divisible by 8. The
+       restriction of being divisible by 8 is in place to match wc_ecc_size
        function from wolfSSL.
      */
     #error ECC minimum bit size must be a multiple of 8
@@ -1413,7 +1523,7 @@
 #endif /* WOLFSSL_MIN_RSA_BITS */
 #if (WOLFSSL_MIN_RSA_BITS % 8)
     /* This is to account for the example case of a min size of 2050 bits but
-       still allows 2049 bit key. So we need the measurment to be in bytes. */
+       still allows 2049 bit key. So we need the measurement to be in bytes. */
     #error RSA minimum bit size must be a multiple of 8
 #endif
 #define MIN_RSAKEY_SZ (WOLFSSL_MIN_RSA_BITS / 8)
@@ -1474,6 +1584,7 @@
 	SERVER_CHANGECIPHERSPEC_COMPLETE,
     SERVER_FINISHED_COMPLETE,
 
+    CLIENT_HELLO_RETRY,
     CLIENT_HELLO_COMPLETE,
     CLIENT_KEYEXCHANGE_COMPLETE,
 	CLIENT_CHANGECIPHERSPEC_COMPLETE,
@@ -1504,42 +1615,15 @@
                                                                      word32 sz);
     WOLFSSL_LOCAL int wolfSSL_dtls_export_internal(WOLFSSL* ssl, byte* buf,
                                                                      word32 sz);
+    WOLFSSL_LOCAL int wolfSSL_dtls_export_state_internal(WOLFSSL* ssl,
+                                                          byte* buf, word32 sz);
+    WOLFSSL_LOCAL int wolfSSL_dtls_import_state_internal(WOLFSSL* ssl,
+                                                          byte* buf, word32 sz);
     WOLFSSL_LOCAL int wolfSSL_send_session(WOLFSSL* ssl);
     #endif
 #endif
 
 
-/* wolfSSL BIO_METHOD type */
-struct WOLFSSL_BIO_METHOD {
-    byte type;               /* method type */
-};
-
-
-/* wolfSSL BIO type */
-struct WOLFSSL_BIO {
-    WOLFSSL_BUF_MEM* mem_buf;
-    WOLFSSL*     ssl;           /* possible associated ssl */
-#ifndef NO_FILESYSTEM
-    XFILE        file;
-#endif
-    WOLFSSL_BIO* prev;          /* previous in chain */
-    WOLFSSL_BIO* next;          /* next in chain */
-    WOLFSSL_BIO* pair;          /* BIO paired with */
-    void*        heap;          /* user heap hint */
-    byte*        mem;           /* memory buffer */
-    int         wrSz;          /* write buffer size (mem) */
-    int         wrIdx;         /* current index for write buffer */
-    int         rdIdx;         /* current read index */
-    int         readRq;        /* read request */
-    int         memLen;        /* memory buffer length */
-    int         fd;            /* possible file descriptor */
-    int         eof;           /* eof flag */
-    int         flags;
-    byte        type;          /* method type */
-    byte        close;         /* close flag */
-};
-
-
 /* wolfSSL method type */
 struct WOLFSSL_METHOD {
     ProtocolVersion version;
@@ -1556,6 +1640,9 @@
 /* defaults to client */
 WOLFSSL_LOCAL void InitSSL_Method(WOLFSSL_METHOD*, ProtocolVersion);
 
+WOLFSSL_LOCAL int InitSSL_Suites(WOLFSSL* ssl);
+WOLFSSL_LOCAL int InitSSL_Side(WOLFSSL* ssl, word16 side);
+
 /* for sniffer */
 WOLFSSL_LOCAL int DoFinished(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             word32 size, word32 totalSz, int sniff);
@@ -1563,6 +1650,9 @@
 /* TLS v1.3 needs these */
 WOLFSSL_LOCAL int  HandleTlsResumption(WOLFSSL* ssl, int bogusID,
                                        Suites* clSuites);
+#ifdef WOLFSSL_TLS13
+WOLFSSL_LOCAL int FindSuite(Suites* suites, byte first, byte second);
+#endif
 WOLFSSL_LOCAL int  DoClientHello(WOLFSSL* ssl, const byte* input, word32*,
                                  word32);
 #ifdef WOLFSSL_TLS13
@@ -1573,7 +1663,7 @@
                                  word32);
 WOLFSSL_LOCAL int  CompleteServerHello(WOLFSSL *ssl);
 WOLFSSL_LOCAL int  CheckVersion(WOLFSSL *ssl, ProtocolVersion pv);
-WOLFSSL_LOCAL void PickHashSigAlgo(WOLFSSL* ssl, const byte* hashSigAlgo,
+WOLFSSL_LOCAL int  PickHashSigAlgo(WOLFSSL* ssl, const byte* hashSigAlgo,
                                    word32 hashSigAlgoSz);
 WOLFSSL_LOCAL int  DecodePrivateKey(WOLFSSL *ssl, word16* length);
 #ifdef HAVE_PK_CALLBACKS
@@ -1583,10 +1673,14 @@
 #endif
 #endif
 WOLFSSL_LOCAL void FreeKeyExchange(WOLFSSL* ssl);
+WOLFSSL_LOCAL void FreeSuites(WOLFSSL* ssl);
 WOLFSSL_LOCAL int  ProcessPeerCerts(WOLFSSL* ssl, byte* input, word32* inOutIdx, word32 size);
 WOLFSSL_LOCAL int  MatchDomainName(const char* pattern, int len, const char* str);
 #ifndef NO_CERTS
 WOLFSSL_LOCAL int  CheckAltNames(DecodedCert* dCert, char* domain);
+#ifdef OPENSSL_EXTRA
+WOLFSSL_LOCAL int  CheckIPAddr(DecodedCert* dCert, char* ipasc);
+#endif
 #endif
 WOLFSSL_LOCAL int  CreateTicket(WOLFSSL* ssl);
 WOLFSSL_LOCAL int  HashOutputRaw(WOLFSSL* ssl, const byte* output, int sz);
@@ -1697,6 +1791,9 @@
     byte   setSuites;               /* user set suites from default */
     byte   hashAlgo;                /* selected hash algorithm */
     byte   sigAlgo;                 /* selected sig algorithm */
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+    WOLF_STACK_OF(WOLFSSL_CIPHER)* stack; /* stack of available cipher suites */
+#endif
 };
 
 
@@ -1713,6 +1810,13 @@
                           unsigned int, unsigned char*, unsigned int);
     typedef unsigned int (*wc_psk_server_callback)(WOLFSSL*, const char*,
                           unsigned char*, unsigned int);
+#ifdef WOLFSSL_TLS13
+    typedef unsigned int (*wc_psk_client_tls13_callback)(WOLFSSL*, const char*,
+                          char*, unsigned int, unsigned char*, unsigned int,
+                          const char**);
+    typedef unsigned int (*wc_psk_server_tls13_callback)(WOLFSSL*, const char*,
+                          unsigned char*, unsigned int, const char**);
+#endif
 #endif /* PSK_TYPES_DEFINED */
 #if defined(WOLFSSL_DTLS) && defined(WOLFSSL_SESSION_EXPORT) && \
    !defined(WOLFSSL_DTLS_EXPORT_TYPES)
@@ -1722,48 +1826,45 @@
 #endif /* WOLFSSL_DTLS_EXPORT_TYPES */
 
 
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+#define MAX_DESCRIPTION_SZ 255
+#endif
 /* wolfSSL Cipher type just points back to SSL */
 struct WOLFSSL_CIPHER {
+    byte cipherSuite0;
+    byte cipherSuite;
     WOLFSSL* ssl;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    char description[MAX_DESCRIPTION_SZ];
+    unsigned long offset;
+    unsigned int in_stack; /* TRUE if added to stack in wolfSSL_get_ciphers_compat */
+    int bits;
+#endif
 };
 
 
-typedef struct OcspEntry OcspEntry;
-
-#ifdef NO_SHA
-    #define OCSP_DIGEST_SIZE WC_SHA256_DIGEST_SIZE
-#else
-    #define OCSP_DIGEST_SIZE WC_SHA_DIGEST_SIZE
-#endif
-
 #ifdef NO_ASN
     /* no_asn won't have */
     typedef struct CertStatus CertStatus;
 #endif
 
-struct OcspEntry {
-    OcspEntry*  next;                            /* next entry             */
-    byte        issuerHash[OCSP_DIGEST_SIZE];    /* issuer hash            */
-    byte        issuerKeyHash[OCSP_DIGEST_SIZE]; /* issuer public key hash */
-    CertStatus* status;                          /* OCSP response list     */
-    int         totalStatus;                     /* number on list         */
-};
-
-
 #ifndef HAVE_OCSP
     typedef struct WOLFSSL_OCSP WOLFSSL_OCSP;
 #endif
 
 /* wolfSSL OCSP controller */
+#ifdef HAVE_OCSP
 struct WOLFSSL_OCSP {
     WOLFSSL_CERT_MANAGER* cm;            /* pointer back to cert manager */
     OcspEntry*            ocspList;      /* OCSP response list */
     wolfSSL_Mutex         ocspLock;      /* OCSP list lock */
+    int                   error;
 #if defined(OPENSSL_ALL) || defined(OPENSSL_EXTRA) || \
     defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
     int(*statusCb)(WOLFSSL*, void*);
 #endif
 };
+#endif
 
 #ifndef MAX_DATE_SIZE
 #define MAX_DATE_SIZE 32
@@ -1799,7 +1900,7 @@
     byte*   signature;
     word32  signatureSz;
     word32  signatureOID;
-#if !defined(NO_SKID) && defined(CRL_SKID_READY)
+#if !defined(NO_SKID) && !defined(NO_ASN)
     byte    extAuthKeyIdSet;
     byte    extAuthKeyId[KEYID_SIZE];
 #endif
@@ -1869,6 +1970,9 @@
 #endif
     char*           ocspOverrideURL;     /* use this responder */
     void*           ocspIOCtx;           /* I/O callback CTX */
+#ifndef NO_WOLFSSL_CM_VERIFY
+    VerifyCallback  verifyCallback;      /* Verify callback */
+#endif
     CallbackCACache caCacheCallback;     /* CA cache addition callback */
     CbMissingCRL    cbMissingCRL;        /* notify through cb of missing crl */
     CbOCSPIO        ocspIOCb;            /* I/O callback for OCSP lookup */
@@ -1885,7 +1989,7 @@
 #ifndef NO_RSA
     short           minRsaKeySz;         /* minimum allowed RSA key size */
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     short           minEccKeySz;         /* minimum allowed ECC key size */
 #endif
 };
@@ -1895,6 +1999,42 @@
 WOLFSSL_LOCAL int CM_MemSaveCertCache(WOLFSSL_CERT_MANAGER*, void*, int, int*);
 WOLFSSL_LOCAL int CM_MemRestoreCertCache(WOLFSSL_CERT_MANAGER*, const void*, int);
 WOLFSSL_LOCAL int CM_GetCertCacheMemSize(WOLFSSL_CERT_MANAGER*);
+WOLFSSL_LOCAL int CM_VerifyBuffer_ex(WOLFSSL_CERT_MANAGER* cm, const byte* buff,
+                                    long sz, int format, int err_val);
+
+
+#ifndef NO_CERTS
+#if !defined NOCERTS &&\
+    (!defined(NO_WOLFSSL_CLIENT) || !defined(WOLFSSL_NO_CLIENT_AUTH))
+typedef struct ProcPeerCertArgs {
+    buffer*      certs;
+#ifdef WOLFSSL_TLS13
+    buffer*      exts; /* extensions */
+#endif
+    DecodedCert* dCert;
+    word32 idx;
+    word32 begin;
+    int    totalCerts; /* number of certs in certs buffer */
+    int    count;
+    int    certIdx;
+    int    lastErr;
+#ifdef WOLFSSL_TLS13
+    byte   ctxSz;
+#endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+    char   untrustedDepth;
+#endif
+    word16 fatal:1;
+    word16 verifyErr:1;
+    word16 dCertInit:1;
+#ifdef WOLFSSL_TRUST_PEER_CERT
+    word16 haveTrustPeer:1; /* was cert verified by loaded trusted peer cert */
+#endif
+} ProcPeerCertArgs;
+WOLFSSL_LOCAL int DoVerifyCallback(WOLFSSL_CERT_MANAGER* cm, WOLFSSL* ssl,
+        int ret, ProcPeerCertArgs* args);
+#endif /* !defined(NO_WOLFSSL_CLIENT) || !defined(WOLFSSL_NO_CLIENT_AUTH) */
+#endif /* !defined NO_CERTS */
 
 /* wolfSSL Sock Addr */
 struct WOLFSSL_SOCKADDR {
@@ -1933,8 +2073,10 @@
 /* keys and secrets
  * keep as a constant size (no additional ifdefs) for session export */
 typedef struct Keys {
+#if !defined(WOLFSSL_AEAD_ONLY) || defined(WOLFSSL_TLS13)
     byte client_write_MAC_secret[WC_MAX_DIGEST_SIZE];   /* max sizes */
     byte server_write_MAC_secret[WC_MAX_DIGEST_SIZE];
+#endif
     byte client_write_key[MAX_SYM_KEY_SIZE];         /* max sizes */
     byte server_write_key[MAX_SYM_KEY_SIZE];
     byte client_write_IV[MAX_WRITE_IV_SZ];               /* max sizes */
@@ -1978,6 +2120,10 @@
     byte   updateResponseReq:1;   /* KeyUpdate response from peer required. */
     byte   keyUpdateRespond:1;    /* KeyUpdate is to be responded to. */
 #endif
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    byte tsip_client_write_MAC_secret[TSIP_TLS_HMAC_KEY_INDEX_WORDSIZE];
+    byte tsip_server_write_MAC_secret[TSIP_TLS_HMAC_KEY_INDEX_WORDSIZE];
+#endif
 } Keys;
 
 
@@ -1988,13 +2134,19 @@
 typedef enum {
     TLSX_SERVER_NAME                = 0x0000, /* a.k.a. SNI  */
     TLSX_MAX_FRAGMENT_LENGTH        = 0x0001,
+    TLSX_TRUSTED_CA_KEYS            = 0x0003,
     TLSX_TRUNCATED_HMAC             = 0x0004,
     TLSX_STATUS_REQUEST             = 0x0005, /* a.k.a. OCSP stapling   */
     TLSX_SUPPORTED_GROUPS           = 0x000a, /* a.k.a. Supported Curves */
     TLSX_EC_POINT_FORMATS           = 0x000b,
+#if !defined(WOLFSSL_NO_SIGALG)
     TLSX_SIGNATURE_ALGORITHMS       = 0x000d,
+#endif
     TLSX_APPLICATION_LAYER_PROTOCOL = 0x0010, /* a.k.a. ALPN */
     TLSX_STATUS_REQUEST_V2          = 0x0011, /* a.k.a. OCSP stapling v2 */
+#if defined(HAVE_ENCRYPT_THEN_MAC) && !defined(WOLFSSL_AEAD_ONLY)
+    TLSX_ENCRYPT_THEN_MAC           = 0x0016, /* RFC 7366 */
+#endif
     TLSX_QUANTUM_SAFE_HYBRID        = 0x0018, /* a.k.a. QSH  */
     TLSX_SESSION_TICKET             = 0x0023,
 #ifdef WOLFSSL_TLS13
@@ -2037,7 +2189,7 @@
 WOLFSSL_LOCAL int   TLSX_PopulateExtensions(WOLFSSL* ssl, byte isRequest);
 
 #if defined(WOLFSSL_TLS13) || !defined(NO_WOLFSSL_CLIENT)
-WOLFSSL_LOCAL int   TLSX_GetRequestSize(WOLFSSL* ssl, byte msgType, 
+WOLFSSL_LOCAL int   TLSX_GetRequestSize(WOLFSSL* ssl, byte msgType,
                                          word16* pLength);
 WOLFSSL_LOCAL int   TLSX_WriteRequest(WOLFSSL* ssl, byte* output,
                                        byte msgType, word16* pOffset);
@@ -2045,17 +2197,20 @@
 
 #if defined(WOLFSSL_TLS13) || !defined(NO_WOLFSSL_SERVER)
 /* TLS 1.3 Certificate messages have extensions. */
-WOLFSSL_LOCAL int   TLSX_GetResponseSize(WOLFSSL* ssl, byte msgType, 
+WOLFSSL_LOCAL int   TLSX_GetResponseSize(WOLFSSL* ssl, byte msgType,
                                           word16* pLength);
-WOLFSSL_LOCAL int   TLSX_WriteResponse(WOLFSSL *ssl, byte* output, byte msgType, 
+WOLFSSL_LOCAL int   TLSX_WriteResponse(WOLFSSL *ssl, byte* output, byte msgType,
                                         word16* pOffset);
 #endif
 
+WOLFSSL_LOCAL int   TLSX_ParseVersion(WOLFSSL* ssl, byte* input, word16 length,
+                                      byte msgType, int* found);
 WOLFSSL_LOCAL int   TLSX_Parse(WOLFSSL* ssl, byte* input, word16 length,
                                byte msgType, Suites *suites);
 
 #elif defined(HAVE_SNI)                           \
    || defined(HAVE_MAX_FRAGMENT)                  \
+   || defined(HAVE_TRUSTED_CA)                    \
    || defined(HAVE_TRUNCATED_HMAC)                \
    || defined(HAVE_CERTIFICATE_STATUS_REQUEST)    \
    || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2) \
@@ -2098,6 +2253,21 @@
 
 #endif /* HAVE_SNI */
 
+/* Trusted CA Key Indication - RFC 6066 (section 6) */
+#ifdef HAVE_TRUSTED_CA
+
+typedef struct TCA {
+    byte                       type;    /* TCA Type            */
+    byte*                      id;      /* TCA identifier      */
+    word16                     idSz;    /* TCA identifier size */
+    struct TCA*                next;    /* List Behavior       */
+} TCA;
+
+WOLFSSL_LOCAL int TLSX_UseTrustedCA(TLSX** extensions, byte type,
+                    const byte* id, word16 idSz, void* heap);
+
+#endif /* HAVE_TRUSTED_CA */
+
 /* Application-Layer Protocol Negotiation - RFC 7301 */
 #ifdef HAVE_ALPN
 typedef struct ALPN {
@@ -2205,6 +2375,7 @@
 WOLFSSL_LOCAL int TLSX_ValidateSupportedCurves(WOLFSSL* ssl, byte first,
                                                                    byte second);
 WOLFSSL_LOCAL int TLSX_SupportedCurve_CheckPriority(WOLFSSL* ssl);
+WOLFSSL_LOCAL int TLSX_SupportedFFDHE_Set(WOLFSSL* ssl);
 #endif
 WOLFSSL_LOCAL int TLSX_SupportedCurve_Preferred(WOLFSSL* ssl,
                                                             int checkSupported);
@@ -2226,11 +2397,13 @@
 /* Additional Connection State according to rfc5746 section 3.1 */
 typedef struct SecureRenegotiation {
    byte                 enabled;  /* secure_renegotiation flag in rfc */
+   byte                 verifySet;
    byte                 startScr; /* server requested client to start scr */
    enum key_cache_state cache_status;  /* track key cache state */
    byte                 client_verify_data[TLS_FINISHED_SZ];  /* cached */
    byte                 server_verify_data[TLS_FINISHED_SZ];  /* cached */
-   byte                 subject_hash[WC_SHA_DIGEST_SIZE];  /* peer cert hash */
+   byte                 subject_hash_set; /* if peer cert hash is set */
+   byte                 subject_hash[KEYID_SIZE];  /* peer cert hash */
    Keys                 tmp_keys;  /* can't overwrite real keys yet */
 } SecureRenegotiation;
 
@@ -2356,17 +2529,18 @@
     byte                 cipherSuite0;            /* Cipher Suite       */
     byte                 cipherSuite;             /* Cipher Suite       */
     word32               binderLen;               /* Length of HMAC     */
-    byte                 binder[WC_MAX_DIGEST_SIZE]; /* HMAC of hanshake   */
+    byte                 binder[WC_MAX_DIGEST_SIZE]; /* HMAC of handshake */
     byte                 hmac;                    /* HMAC algorithm     */
     byte                 resumption:1;            /* Resumption PSK     */
     byte                 chosen:1;                /* Server's choice    */
     struct PreSharedKey* next;                    /* List pointer       */
 } PreSharedKey;
 
-WOLFSSL_LOCAL word16 TLSX_PreSharedKey_WriteBinders(PreSharedKey* list,
-                                                    byte* output, byte msgType);
-WOLFSSL_LOCAL word16 TLSX_PreSharedKey_GetSizeBinders(PreSharedKey* list,
-                                                      byte msgType);
+WOLFSSL_LOCAL int TLSX_PreSharedKey_WriteBinders(PreSharedKey* list,
+                                                 byte* output, byte msgType,
+                                                 word16* pSz);
+WOLFSSL_LOCAL int TLSX_PreSharedKey_GetSizeBinders(PreSharedKey* list,
+                                                   byte msgType, word16* pSz);
 WOLFSSL_LOCAL int TLSX_PreSharedKey_Use(WOLFSSL* ssl, byte* identity,
                                         word16 len, word32 age, byte hmac,
                                         byte cipherSuite0, byte cipherSuite,
@@ -2413,7 +2587,7 @@
 enum SetCBIO {
     WOLFSSL_CBIO_NONE = 0,
     WOLFSSL_CBIO_RECV = 0x1,
-    WOLFSSL_CBIO_SEND = 0x2, 
+    WOLFSSL_CBIO_SEND = 0x2,
 };
 #endif
 
@@ -2434,19 +2608,22 @@
     DerBuffer*  certificate;
     DerBuffer*  certChain;
                  /* chain after self, in DER, with leading size for each cert */
-    #ifdef OPENSSL_EXTRA
+    #if defined(OPENSSL_EXTRA) || defined(WOLFSSL_EXTRA)
     WOLF_STACK_OF(WOLFSSL_X509_NAME)* ca_names;
     #endif
     #if defined(OPENSSL_ALL) || defined(OPENSSL_EXTRA) || \
         defined(WOLFSSL_NGINX) || defined (WOLFSSL_HAPROXY)
     WOLF_STACK_OF(WOLFSSL_X509)* x509Chain;
+    client_cert_cb CBClientCert;  /* client certificate callback */
     #endif
 #ifdef WOLFSSL_TLS13
     int         certChainCnt;
 #endif
     DerBuffer*  privateKey;
-    byte        privateKeyType;
+    byte        privateKeyType:7;
+    byte        privateKeyId:1;
     int         privateKeySz;
+    int         privateKeyDevId;
     WOLFSSL_CERT_MANAGER* cm;      /* our cert manager, ctx owns SSL will use */
 #endif
 #ifdef KEEP_OUR_CERT
@@ -2465,7 +2642,7 @@
 #ifdef HAVE_EXT_CACHE
     byte        internalCacheOff:1;
 #endif
-    byte        sendVerify;       /* for client side (can not be single bit) */
+    byte        sendVerify:2;     /* for client side (can not be single bit) */
     byte        haveRSA:1;        /* RSA available */
     byte        haveECC:1;        /* ECC available */
     byte        haveDH:1;         /* server DH parms set by user */
@@ -2482,15 +2659,34 @@
     byte        noTicketTls13:1;  /* Server won't create new Ticket */
     byte        noPskDheKe:1;     /* Don't use (EC)DHE with PSK */
 #endif
+    byte        mutualAuth:1;     /* Mutual authentication required */
 #if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
     byte        postHandshakeAuth:1;  /* Post-handshake auth supported. */
 #endif
+#ifndef NO_DH
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && !defined(HAVE_FIPS) && \
+        !defined(HAVE_SELFTEST)
+    byte        dhKeyTested:1;   /* Set when key has been tested. */
+    #endif
+#endif
+#ifdef HAVE_SECURE_RENEGOTIATION
+    byte        useSecureReneg:1; /* when set will set WOLFSSL objects generated to enable */
+#endif
+#ifdef HAVE_ENCRYPT_THEN_MAC
+    byte        disallowEncThenMac:1;  /* Don't do Encrypt-Then-MAC */
+#endif
+#ifdef WOLFSSL_STATIC_MEMORY
+    byte        onHeap:1; /* whether the ctx/method is put on heap hint */
+#endif
 #ifdef WOLFSSL_MULTICAST
     byte        haveMcast;        /* multicast requested */
     byte        mcastID;          /* multicast group ID */
 #endif
 #if defined(WOLFSSL_SCTP) && defined(WOLFSSL_DTLS)
     byte        dtlsSctp;         /* DTLS-over-SCTP mode */
+#endif
+#if (defined(WOLFSSL_SCTP) || defined(WOLFSSL_DTLS_MTU)) && \
+                                                           defined(WOLFSSL_DTLS)
     word16      dtlsMtuSz;        /* DTLS MTU size */
 #endif
 #ifndef NO_DH
@@ -2500,13 +2696,15 @@
 #ifndef NO_RSA
     short       minRsaKeySz;      /* minimum RSA key size */
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     short       minEccKeySz;      /* minimum ECC key size */
 #endif
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+    unsigned long     mask;             /* store SSL_OP_ flags */
+#endif
 #ifdef OPENSSL_EXTRA
     byte              sessionCtx[ID_LEN]; /* app session context ID */
     word32            disabledCurves;   /* curves disabled by user */
-    unsigned long     mask;             /* store SSL_OP_ flags */
     const unsigned char *alpn_cli_protos;/* ALPN client protocol list */
     unsigned int         alpn_cli_protos_len;
     byte              sessionCtxSz;
@@ -2524,20 +2722,28 @@
 #endif
 #endif /* WOLFSSL_DTLS */
     VerifyCallback  verifyCallback;     /* cert verification callback */
+#ifdef OPENSSL_ALL
+    CertVerifyCallback verifyCertCb;
+    void*              verifyCertCbArg;
+#endif /* OPENSSL_ALL */
     word32          timeout;            /* session timeout */
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_ED448)
     word32          ecdhCurveOID;       /* curve Ecc_Sum */
 #endif
 #ifdef HAVE_ECC
     word16          eccTempKeySz;       /* in octets 20 - 66 */
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     word32          pkCurveOID;         /* curve Ecc_Sum */
 #endif
 #if defined(HAVE_SESSION_TICKET) || !defined(NO_PSK)
     byte        havePSK;                /* psk key set by user */
     wc_psk_client_callback client_psk_cb;  /* client callback */
     wc_psk_server_callback server_psk_cb;  /* server callback */
+#ifdef WOLFSSL_TLS13
+    wc_psk_client_tls13_callback client_psk_tls13_cb;  /* client callback */
+    wc_psk_server_tls13_callback server_psk_tls13_cb;  /* server callback */
+#endif
     char        server_hint[MAX_PSK_ID_LEN + NULL_TERM_LEN];
 #endif /* HAVE_SESSION_TICKET || !NO_PSK */
 #ifdef WOLFSSL_TLS13
@@ -2561,20 +2767,22 @@
     void*           userPRFArg; /* passed to prf callback */
 #endif
 #ifdef HAVE_EX_DATA
-    void*           ex_data[MAX_EX_DATA];
+    WOLFSSL_CRYPTO_EX_DATA ex_data;
 #endif
 #if defined(HAVE_ALPN) && (defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY))
     CallbackALPNSelect alpnSelect;
     void*              alpnSelectArg;
 #endif
-#if defined(OPENSSL_ALL) || (defined(OPENSSL_EXTRA) && (defined(HAVE_STUNNEL) || defined(WOLFSSL_NGINX) || defined(HAVE_LIGHTY)))
+#if defined(OPENSSL_ALL) || (defined(OPENSSL_EXTRA) && (defined(HAVE_STUNNEL) || \
+                             defined(WOLFSSL_NGINX) || defined(HAVE_LIGHTY) || \
+                             defined(WOLFSSL_HAPROXY) || defined(WOLFSSL_OPENSSH) ))
     CallbackSniRecv sniRecvCb;
     void*           sniRecvCbArg;
 #endif
 #if defined(WOLFSSL_MULTICAST) && defined(WOLFSSL_DTLS)
     CallbackMcastHighwater mcastHwCb; /* Sequence number highwater callback */
     word32      mcastFirstSeq;    /* first trigger level */
-    word32      mcastSecondSeq;   /* second tigger level */
+    word32      mcastSecondSeq;   /* second trigger level */
     word32      mcastMaxSeq;      /* max level */
 #endif
 #ifdef HAVE_OCSP
@@ -2604,6 +2812,10 @@
 #ifdef ATOMIC_USER
     CallbackMacEncrypt    MacEncryptCb;    /* Atomic User Mac/Encrypt Cb */
     CallbackDecryptVerify DecryptVerifyCb; /* Atomic User Decrypt/Verify Cb */
+    #ifdef HAVE_ENCRYPT_THEN_MAC
+        CallbackEncryptMac    EncryptMacCb;    /* Atomic User Mac/Enc Cb */
+        CallbackVerifyDecrypt VerifyDecryptCb; /* Atomic User Dec/Verify Cb */
+    #endif
 #endif
 #ifdef HAVE_PK_CALLBACKS
     #ifdef HAVE_ECC
@@ -2623,6 +2835,18 @@
             /* User X25519 SharedSecret Callback handler */
             CallbackX25519SharedSecret X25519SharedSecretCb;
         #endif
+        #ifdef HAVE_ED448
+            /* User Ed448Sign   Callback handler */
+            CallbackEd448Sign   Ed448SignCb;
+            /* User Ed448Verify Callback handler */
+            CallbackEd448Verify Ed448VerifyCb;
+        #endif
+        #ifdef HAVE_CURVE448
+            /* User X448 KeyGen Callback Handler */
+            CallbackX448KeyGen X448KeyGenCb;
+            /* User X448 SharedSecret Callback handler */
+            CallbackX448SharedSecret X448SharedSecretCb;
+        #endif
     #endif /* HAVE_ECC */
     #ifndef NO_DH
         CallbackDhAgree DhAgreeCb;      /* User DH Agree Callback handler */
@@ -2655,8 +2879,6 @@
 };
 
 WOLFSSL_LOCAL
-WOLFSSL_CTX* wolfSSL_CTX_new_ex(WOLFSSL_METHOD* method, void* heap);
-WOLFSSL_LOCAL
 int InitSSL_Ctx(WOLFSSL_CTX*, WOLFSSL_METHOD*, void* heap);
 WOLFSSL_LOCAL
 void FreeSSL_Ctx(WOLFSSL_CTX*);
@@ -2703,20 +2925,6 @@
 void InitCipherSpecs(CipherSpecs* cs);
 
 
-/* Supported Message Authentication Codes from page 43 */
-enum MACAlgorithm {
-    no_mac,
-    md5_mac,
-    sha_mac,
-    sha224_mac,
-    sha256_mac,     /* needs to match external KDF_MacAlgorithm */
-    sha384_mac,
-    sha512_mac,
-    rmd_mac,
-    blake2b_mac
-};
-
-
 /* Supported Key Exchange Protocols */
 enum KeyExchangeAlgorithm {
     no_kea,
@@ -2739,7 +2947,21 @@
     dsa_sa_algo       = 2,
     ecc_dsa_sa_algo   = 3,
     rsa_pss_sa_algo   = 8,
-    ed25519_sa_algo   = 9
+    ed25519_sa_algo   = 9,
+    rsa_pss_pss_algo  = 10,
+    ed448_sa_algo     = 11
+};
+
+#define PSS_RSAE_TO_PSS_PSS(macAlgo) \
+    (macAlgo + (pss_sha256 - sha256_mac))
+
+#define PSS_PSS_HASH_TO_MAC(macAlgo) \
+    (macAlgo - (pss_sha256 - sha256_mac))
+
+enum SigAlgRsaPss {
+    pss_sha256  = 0x09,
+    pss_sha384  = 0x0a,
+    pss_sha512  = 0x0b,
 };
 
 
@@ -2764,11 +2986,17 @@
 };
 
 
+#ifndef WOLFSSL_AEAD_ONLY
 enum CipherType { stream, block, aead };
-
-
-
-
+#else
+enum CipherType { aead };
+#endif
+
+
+#if defined(BUILD_AES) || defined(BUILD_AESGCM) || (defined(HAVE_CHACHA) && \
+                               defined(HAVE_POLY1305)) || defined(WOLFSSL_TLS13)
+    #define CIPHER_NONCE
+#endif
 
 
 /* cipher for now */
@@ -2781,11 +3009,14 @@
 #endif
 #if defined(BUILD_AES) || defined(BUILD_AESGCM)
     Aes*    aes;
-    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM) || defined(WOLFSSL_TLS13)
+    #if (defined(BUILD_AESGCM) || defined(HAVE_AESCCM)) && \
+                                                      !defined(WOLFSSL_NO_TLS12)
         byte* additional;
-        byte* nonce;
     #endif
 #endif
+#ifdef CIPHER_NONCE
+    byte* nonce;
+#endif
 #ifdef HAVE_CAMELLIA
     Camellia* cam;
 #endif
@@ -2801,6 +3032,9 @@
 #ifdef HAVE_IDEA
     Idea* idea;
 #endif
+#if defined(WOLFSSL_TLS13) && defined(HAVE_NULL_CIPHER)
+    Hmac* hmac;
+#endif
     byte    state;
     byte    setup;       /* have we set it up flag for detection */
 } Ciphers;
@@ -2880,6 +3114,9 @@
     byte               masterSecret[SECRET_LEN];  /* stored secret            */
     word16             haveEMS;                   /* ext master secret flag   */
 #ifdef SESSION_CERTS
+#ifdef OPENSSL_EXTRA
+    WOLFSSL_X509*      peer;                      /* peer cert */
+#endif
     WOLFSSL_X509_CHAIN chain;                     /* peer cert chain, static  */
     #ifdef WOLFSSL_ALT_CERT_CHAINS
     WOLFSSL_X509_CHAIN altChain;                  /* peer alt cert chain, static */
@@ -2888,6 +3125,9 @@
 #if defined(SESSION_CERTS) || (defined(WOLFSSL_TLS13) && \
                                defined(HAVE_SESSION_TICKET))
     ProtocolVersion    version;                   /* which version was used   */
+#endif
+#if defined(SESSION_CERTS) || !defined(NO_RESUME_SUITE_CHECK) || \
+                        (defined(WOLFSSL_TLS13) && defined(HAVE_SESSION_TICKET))
     byte               cipherSuite0;              /* first byte, normally 0   */
     byte               cipherSuite;               /* 2nd byte, actual suite   */
 #endif
@@ -2924,7 +3164,7 @@
     byte               isAlloced;
 #endif
 #ifdef HAVE_EX_DATA
-    void*              ex_data[MAX_EX_DATA];
+    WOLFSSL_CRYPTO_EX_DATA ex_data;
 #endif
 };
 
@@ -2959,6 +3199,7 @@
 /* server accept state for nonblocking restart */
 enum AcceptState {
     ACCEPT_BEGIN = 0,
+    ACCEPT_BEGIN_RENEG,
     ACCEPT_CLIENT_HELLO_DONE,
     ACCEPT_HELLO_RETRY_REQUEST_DONE,
     ACCEPT_FIRST_REPLY_DONE,
@@ -2980,6 +3221,7 @@
 /* TLS 1.3 server accept state for nonblocking restart */
 enum AcceptStateTls13 {
     TLS13_ACCEPT_BEGIN = 0,
+    TLS13_ACCEPT_BEGIN_RENEG,
     TLS13_ACCEPT_CLIENT_HELLO_DONE,
     TLS13_ACCEPT_HELLO_RETRY_REQUEST_DONE,
     TLS13_ACCEPT_FIRST_REPLY_DONE,
@@ -3022,8 +3264,10 @@
 #ifndef NO_CERTS
     DerBuffer*      certificate;           /* WOLFSSL_CTX owns, unless we own */
     DerBuffer*      key;                   /* WOLFSSL_CTX owns, unless we own */
-    byte            keyType;               /* Type of key: RSA, ECC, Ed25519 */
+    byte            keyType:7;             /* Type of key: RSA, ECC, Ed25519 */
+    byte            keyId:1;               /* Key data is an id not data */
     int             keySz;                 /* Size of RSA key */
+    int             keyDevId;              /* Device Id for key */
     DerBuffer*      certChain;             /* WOLFSSL_CTX owns, unless we own */
                  /* chain after self, in DER, with leading size for each cert */
 #ifdef WOLFSSL_TLS13
@@ -3047,6 +3291,9 @@
     #ifdef HAVE_ED25519
         buffer peerEd25519Key;             /* for Ed25519 Verify Callbacks */
     #endif /* HAVE_ED25519 */
+    #ifdef HAVE_ED448
+        buffer peerEd448Key;             /* for Ed448 Verify Callbacks */
+    #endif /* HAVE_ED448 */
     #ifndef NO_RSA
         buffer peerRsaKey;                 /* we own for Rsa Verify Callbacks */
     #endif /* NO_RSA */
@@ -3070,6 +3317,7 @@
     BUILD_MSG_HASH,
     BUILD_MSG_VERIFY_MAC,
     BUILD_MSG_ENCRYPT,
+    BUILD_MSG_ENCRYPTED_VERIFY_MAC,
 };
 
 /* sub-states for cipher operations */
@@ -3083,8 +3331,12 @@
 #ifndef NO_PSK
     wc_psk_client_callback client_psk_cb;
     wc_psk_server_callback server_psk_cb;
+#ifdef WOLFSSL_TLS13
+    wc_psk_client_tls13_callback client_psk_tls13_cb;  /* client callback */
+    wc_psk_server_tls13_callback server_psk_tls13_cb;  /* server callback */
+#endif
 #endif /* NO_PSK */
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
     unsigned long     mask; /* store SSL_OP_ flags */
 #endif
 
@@ -3098,7 +3350,7 @@
 #ifdef HAVE_EXT_CACHE
     word16            internalCacheOff:1;
 #endif
-    word16            side:1;             /* client or server end */
+    word16            side:2;             /* client, server or neither end */
     word16            verifyPeer:1;
     word16            verifyNone:1;
     word16            failNoCert:1;
@@ -3163,9 +3415,10 @@
 #endif
     word16            keepResources:1;    /* Keep resources after handshake */
     word16            useClientOrder:1;   /* Use client's cipher order */
+    word16            mutualAuth:1;       /* Mutual authentication is rquired */
 #if defined(WOLFSSL_TLS13) && defined(WOLFSSL_POST_HANDSHAKE_AUTH)
     word16            postHandshakeAuth:1;/* Client send post_handshake_auth
-                                           * extendion. */
+                                           * extension */
 #endif
 #if defined(WOLFSSL_TLS13) && !defined(NO_WOLFSSL_SERVER)
     word16            sendCookie:1;       /* Server creates a Cookie in HRR */
@@ -3176,10 +3429,27 @@
 #if defined(WOLFSSL_TLS13) && defined(WOLFSSL_TLS13_MIDDLEBOX_COMPAT)
     word16            sentChangeCipher:1; /* Change Cipher Spec sent */
 #endif
-#if !defined(WOLFSSL_NO_CLIENT_AUTH) && defined(HAVE_ED25519) && \
-                                                !defined(NO_ED25519_CLIENT_AUTH)
+#if !defined(WOLFSSL_NO_CLIENT_AUTH) && \
+               ((defined(HAVE_ED25519) && !defined(NO_ED25519_CLIENT_AUTH)) || \
+                (defined(HAVE_ED448) && !defined(NO_ED448_CLIENT_AUTH)))
     word16            cacheMessages:1;    /* Cache messages for sign/verify */
 #endif
+#ifndef NO_DH
+    #if !defined(WOLFSSL_OLD_PRIME_CHECK) && \
+        !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+        word16        dhDoKeyTest:1;      /* Need to do the DH Key prime test */
+        word16        dhKeyTested:1;      /* Set when key has been tested. */
+    #endif
+#endif
+#ifdef SINGLE_THREADED
+    word16            ownSuites:1;        /* if suites are malloced in ssl object */
+#endif
+#ifdef HAVE_ENCRYPT_THEN_MAC
+    word16            disallowEncThenMac:1;   /* Don't do Encrypt-Then-MAC */
+    word16            encThenMac:1;           /* Doing Encrypt-Then-MAC */
+    word16            startedETMRead:1;       /* Doing Encrypt-Then-MAC read */
+    word16            startedETMWrite:1;      /* Doing Encrypt-Then-MAC write */
+#endif
 
     /* need full byte values for this section */
     byte            processReply;           /* nonblocking resume */
@@ -3206,10 +3476,10 @@
 #ifndef NO_RSA
     short           minRsaKeySz;      /* minimum RSA key size */
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     short           minEccKeySz;      /* minimum ECC key size */
 #endif
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     byte            verifyDepth;      /* maximum verification depth */
 #endif
 #ifdef WOLFSSL_EARLY_DATA
@@ -3238,11 +3508,13 @@
     byte            sessionID[ID_LEN];
     byte            sessionIDSz;
 #ifdef WOLFSSL_TLS13
-    byte            clientSecret[SECRET_LEN];
-    byte            serverSecret[SECRET_LEN];
     byte            secret[SECRET_LEN];
 #endif
     byte            masterSecret[SECRET_LEN];
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+   !defined(NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION)
+    byte            tsip_masterSecret[TSIP_TLS_MASTERSECRET_SIZE];
+#endif
 #ifdef WOLFSSL_DTLS
     byte            cookie[MAX_COOKIE_LEN];
     byte            cookieSz;
@@ -3258,20 +3530,45 @@
 #define MAX_DATE_SZ 32
 #endif
 
+#define STACK_TYPE_X509               0
+#define STACK_TYPE_GEN_NAME           1
+#define STACK_TYPE_BIO                2
+#define STACK_TYPE_OBJ                3
+#define STACK_TYPE_STRING             4
+#define STACK_TYPE_CIPHER             5
+#define STACK_TYPE_ACCESS_DESCRIPTION 6
+#define STACK_TYPE_X509_EXT           7
+#define STACK_TYPE_NULL               8
+#define STACK_TYPE_X509_NAME          9
+#define STACK_TYPE_CONF_VALUE         10
+#define STACK_TYPE_X509_INFO          11
+
 struct WOLFSSL_STACK {
     unsigned long num; /* number of nodes in stack
-                        * (saftey measure for freeing and shortcut for count) */
+                        * (safety measure for freeing and shortcut for count) */
+    #if defined(OPENSSL_ALL)
+    wolf_sk_compare_cb comp;
+    #endif
+
     union {
-        WOLFSSL_X509*        x509;
-        WOLFSSL_X509_NAME*   name;
-        WOLFSSL_BIO*         bio;
-        WOLFSSL_ASN1_OBJECT* obj;
-        char*                string;
+        WOLFSSL_X509*          x509;
+        WOLFSSL_X509_NAME*     name;
+        WOLFSSL_X509_INFO*     info;
+        WOLFSSL_BIO*           bio;
+        WOLFSSL_ASN1_OBJECT*   obj;
+        WOLFSSL_CIPHER         cipher;
+        WOLFSSL_ACCESS_DESCRIPTION* access;
+        WOLFSSL_X509_EXTENSION* ext;
+        WOLFSSL_CONF_VALUE*    conf;
+        void*                  generic;
+        char*                  string;
+        WOLFSSL_GENERAL_NAME*  gn;
     } data;
+    void* heap; /* memory heap hint */
     WOLFSSL_STACK* next;
+    byte type;     /* Identifies type of stack. */
 };
 
-
 struct WOLFSSL_X509_NAME {
     char  *name;
     int   dynamicName;
@@ -3308,27 +3605,33 @@
     byte             hwType[EXTERNAL_SERIAL_SIZE];
     int              hwSerialNumSz;
     byte             hwSerialNum[EXTERNAL_SERIAL_SIZE];
-    #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
-        byte             certPolicySet;
-        byte             certPolicyCrit;
-    #endif /* OPENSSL_EXTRA */
-#endif
-    int              notBeforeSz;
-    int              notAfterSz;
-    byte             notBefore[MAX_DATE_SZ];
-    byte             notAfter[MAX_DATE_SZ];
+#endif /* WOLFSSL_SEP */
+#if (defined(WOLFSSL_SEP) || defined(WOLFSSL_QT) || defined (OPENSSL_ALL)) && \
+    (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+    byte             certPolicySet;
+    byte             certPolicyCrit;
+#endif /* (WOLFSSL_SEP || WOLFSSL_QT) && (OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL) */
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    WOLFSSL_STACK* ext_sk; /* Store X509_EXTENSIONS from wolfSSL_X509_get_ext */
+    WOLFSSL_STACK* ext_d2i;/* Store d2i extensions from wolfSSL_X509_get_ext_d2i */
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+#ifdef OPENSSL_EXTRA
+    WOLFSSL_ASN1_INTEGER* serialNumber; /* Stores SN from wolfSSL_X509_get_serialNumber */
+#endif
+    WOLFSSL_ASN1_TIME notBefore;
+    WOLFSSL_ASN1_TIME notAfter;
     buffer           sig;
     int              sigOID;
     DNS_entry*       altNames;                       /* alt names list */
     buffer           pubKey;
     int              pubKeyOID;
     DNS_entry*       altNamesNext;                   /* hint for retrieval */
-    #if defined(HAVE_ECC) || defined(HAVE_ED25519)
-        word32       pkCurveOID;
-    #endif /* HAVE_ECC */
-    #ifndef NO_CERTS
-        DerBuffer*   derCert;                        /* may need  */
-    #endif
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
+    word32       pkCurveOID;
+#endif /* HAVE_ECC */
+#ifndef NO_CERTS
+    DerBuffer*   derCert;                            /* may need  */
+#endif
     void*            heap;                           /* heap hint */
     byte             dynamicMemory;                  /* dynamic memory flag */
     byte             isCa:1;
@@ -3336,15 +3639,23 @@
     char             certPolicies[MAX_CERTPOL_NB][MAX_CERTPOL_SZ];
     int              certPoliciesNb;
 #endif /* WOLFSSL_CERT_EXT */
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+    wolfSSL_Mutex    refMutex;                       /* ref count mutex */
+    int              refCount;                       /* reference count */
+#endif
 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
 #ifdef HAVE_EX_DATA
-    void*            ex_data[MAX_EX_DATA];
+    WOLFSSL_CRYPTO_EX_DATA ex_data;
 #endif
     byte*            authKeyId;
     byte*            subjKeyId;
     byte*            extKeyUsageSrc;
-    byte*            CRLInfo;
+    const byte*      CRLInfo;
     byte*            authInfo;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    byte*            authInfoCaIssuer;
+    int              authInfoCaIssuerSz;
+#endif
     word32           pathLength;
     word16           keyUsage;
     int              CRLInfoSz;
@@ -3379,6 +3690,11 @@
 #endif
     WOLFSSL_X509_NAME issuer;
     WOLFSSL_X509_NAME subject;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_HAPROXY) || defined(WOLFSSL_WPAS)
+    WOLFSSL_X509_ALGOR algor;
+    WOLFSSL_X509_PUBKEY key;
+#endif
+    byte issuerSet:1;
 };
 
 
@@ -3415,7 +3731,7 @@
     DtlsFrag*       fragList;
     word32          fragSz;    /* Length of fragments received */
     word32          seq;       /* Handshake sequence number    */
-    word32          sz;        /* Length of whole mesage       */
+    word32          sz;        /* Length of whole message      */
     byte            type;
 } DtlsMsg;
 
@@ -3432,7 +3748,6 @@
 
 #endif
 
-
 /* Handshake messages received from peer (plus change cipher */
 typedef struct MsgsReceived {
     word16 got_hello_request:1;
@@ -3475,9 +3790,10 @@
 #ifdef WOLFSSL_SHA512
     wc_Sha512       hashSha512;         /* sha512 hash of handshake msgs */
 #endif
-#if defined(HAVE_ED25519) && !defined(WOLFSSL_NO_CLIENT_AUTH)
+#if (defined(HAVE_ED25519) || defined(HAVE_ED448)) && \
+                                                !defined(WOLFSSL_NO_CLIENT_AUTH)
     byte*           messages;           /* handshake messages */
-    int             length;             /* length of handhsake messages' data */
+    int             length;             /* length of handshake messages' data */
     int             prevLen;            /* length of messages but last */
 #endif
 } HS_Hashes;
@@ -3522,6 +3838,7 @@
 #ifdef WOLFSSL_EARLY_DATA
 typedef enum EarlyDataState {
     no_early_data,
+    early_data_ext,
     expecting_early_data,
     process_early_data,
     done_early_data
@@ -3533,6 +3850,10 @@
     WOLFSSL_CTX*    ctx;
     Suites*         suites;             /* only need during handshake */
     Arrays*         arrays;
+#ifdef WOLFSSL_TLS13
+    byte            clientSecret[SECRET_LEN];
+    byte            serverSecret[SECRET_LEN];
+#endif
     HS_Hashes*      hsHashes;
     void*           IOCB_ReadCtx;
     void*           IOCB_WriteCtx;
@@ -3565,7 +3886,9 @@
     void*           hsKey;              /* Handshake key (RsaKey or ecc_key) allocated from heap */
     word32          hsType;             /* Type of Handshake key (hsKey) */
     WOLFSSL_CIPHER  cipher;
+#ifndef WOLFSSL_AEAD_ONLY
     hmacfp          hmac;
+#endif
     Ciphers         encrypt;
     Ciphers         decrypt;
     Buffers         buffers;
@@ -3597,7 +3920,12 @@
     WOLFSSL_BIO*     biord;              /* socket bio read  to free/close */
     WOLFSSL_BIO*     biowr;              /* socket bio write to free/close */
     byte             sessionCtx[ID_LEN]; /* app session context ID */
+    WOLFSSL_X509_VERIFY_PARAM* param;    /* verification parameters*/
+#endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     unsigned long    peerVerifyRet;
+#endif
+#ifdef OPENSSL_EXTRA
     byte             readAhead;
     byte             sessionCtxSz;       /* size of sessionCtx stored */
 #ifdef HAVE_PK_CALLBACKS
@@ -3606,6 +3934,9 @@
 #endif /* OPENSSL_EXTRA */
 #ifndef NO_RSA
     RsaKey*         peerRsaKey;
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    byte            *peerTsipEncRsaKeyIndex;
+#endif
     byte            peerRsaKeyPresent;
 #endif
 #ifdef HAVE_QSH
@@ -3620,12 +3951,14 @@
     byte            maxRequest;
     byte            user_set_QSHSchemes;
 #endif
+#if defined(WOLFSSL_TLS13) || defined(HAVE_FFDHE)
+    word16          namedGroup;
+#endif
 #ifdef WOLFSSL_TLS13
-    word16          namedGroup;
     word16          group[WOLFSSL_MAX_GROUP_COUNT];
     byte            numGroups;
 #endif
-    byte            pssAlgo;
+    word16          pssAlgo;
 #ifdef WOLFSSL_TLS13
     #if !defined(WOLFSSL_TLS13_DRAFT_18) && !defined(WOLFSSL_TLS13_DRAFT_22)
     word16          certHashSigAlgoSz;  /* SigAlgoCert ext length in bytes */
@@ -3638,10 +3971,10 @@
     byte            peerNtruKey[MAX_NTRU_PUB_KEY_SZ];
     byte            peerNtruKeyPresent;
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     int             eccVerifyRes;
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_CURVE25519)
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448)
     word32          ecdhCurveOID;            /* curve Ecc_Sum     */
     ecc_key*        eccTempKey;              /* private ECDHE key */
     byte            eccTempKeyPresent;       /* also holds type */
@@ -3653,7 +3986,7 @@
     word16          eccTempKeySz;            /* in octets 20 - 66 */
     byte            peerEccDsaKeyPresent;
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_CURVE448)
     word32          pkCurveOID;              /* curve Ecc_Sum     */
 #endif
 #ifdef HAVE_ED25519
@@ -3664,6 +3997,14 @@
     curve25519_key* peerX25519Key;
     byte            peerX25519KeyPresent;
 #endif
+#ifdef HAVE_ED448
+    ed448_key*      peerEd448Key;
+    byte            peerEd448KeyPresent;
+#endif
+#ifdef HAVE_CURVE448
+    curve448_key*   peerX448Key;
+    byte            peerX448KeyPresent;
+#endif
 #ifdef HAVE_LIBZ
     z_stream        c_stream;           /* compression   stream */
     z_stream        d_stream;           /* decompression stream */
@@ -3676,15 +4017,16 @@
     word32          dtls_tx_msg_list_sz;
     word32          dtls_rx_msg_list_sz;
     DtlsMsg*        dtls_tx_msg_list;
+    DtlsMsg*        dtls_tx_msg;
     DtlsMsg*        dtls_rx_msg_list;
     void*           IOCB_CookieCtx;     /* gen cookie ctx */
     word32          dtls_expected_rx;
 #ifdef WOLFSSL_SESSION_EXPORT
     wc_dtls_export  dtls_export;        /* export function for session */
 #endif
-#ifdef WOLFSSL_SCTP
+#if defined(WOLFSSL_SCTP) || defined(WOLFSSL_DTLS_MTU)
     word16          dtlsMtuSz;
-#endif /* WOLFSSL_SCTP */
+#endif /* WOLFSSL_SCTP || WOLFSSL_DTLS_MTU */
 #ifdef WOLFSSL_MULTICAST
     void*           mcastHwCbCtx;       /* Multicast highwater callback ctx */
 #endif /* WOLFSSL_MULTICAST */
@@ -3722,7 +4064,7 @@
 #endif
     byte             keepCert;           /* keep certificate after handshake */
 #if defined(HAVE_EX_DATA) || defined(FORTRESS)
-    void*            ex_data[MAX_EX_DATA]; /* external data, for Fortress */
+    WOLFSSL_CRYPTO_EX_DATA ex_data; /* external data, for Fortress */
 #endif
     int              devId;             /* async device id to use */
 #ifdef HAVE_ONE_TIME_AUTH
@@ -3744,6 +4086,7 @@
     #endif
     #if defined(HAVE_SECURE_RENEGOTIATION) \
         || defined(HAVE_SERVER_RENEGOTIATION_INFO)
+        int                  secure_rene_count;    /* how many times */
         SecureRenegotiation* secure_renegotiation; /* valid pointer indicates */
     #endif                                         /* user turned on */
     #ifdef HAVE_ALPN
@@ -3772,12 +4115,22 @@
 #ifdef HAVE_NETX
     NetX_Ctx        nxCtx;             /* NetX IO Context */
 #endif
+#if defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+    void*           mnCtx;             /* mynewt mn_socket IO Context */
+#endif /* defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP) */
+#ifdef WOLFSSL_GNRC
+    struct gnrc_wolfssl_ctx *gnrcCtx;  /* Riot-OS GNRC UDP/IP context */
+#endif
 #ifdef SESSION_INDEX
     int sessionIndex;                  /* Session's location in the cache. */
 #endif
 #ifdef ATOMIC_USER
     void*    MacEncryptCtx;    /* Atomic User Mac/Encrypt Callback Context */
     void*    DecryptVerifyCtx; /* Atomic User Decrypt/Verify Callback Context */
+    #ifdef HAVE_ENCRYPT_THEN_MAC
+        void*    EncryptMacCtx;    /* Atomic User Encrypt/Mac Callback Ctx */
+        void*    VerifyDecryptCtx; /* Atomic User Verify/Decrypt Callback Ctx */
+    #endif
 #endif
 #ifdef HAVE_PK_CALLBACKS
     #ifdef HAVE_ECC
@@ -3793,6 +4146,14 @@
             void* X25519KeyGenCtx;       /* X25519 KeyGen Callback Context */
             void* X25519SharedSecretCtx; /* X25519 Pms    Callback Context */
         #endif
+        #ifdef HAVE_ED448
+            void* Ed448SignCtx;          /* ED448 Sign   Callback Context */
+            void* Ed448VerifyCtx;        /* ED448 Verify Callback Context */
+        #endif
+        #ifdef HAVE_CURVE448
+            void* X448KeyGenCtx;         /* X448 KeyGen Callback Context */
+            void* X448SharedSecretCtx;   /* X448 Pms    Callback Context */
+        #endif
     #endif /* HAVE_ECC */
     #ifndef NO_DH
         void* DhAgreeCtx; /* DH Pms Callback Context */
@@ -3811,6 +4172,10 @@
 #ifdef HAVE_SECRET_CALLBACK
         SessionSecretCb sessionSecretCb;
         void*           sessionSecretCtx;
+    #ifdef WOLFSSL_TLS13
+        Tls13SecretCb   tls13SecretCb;
+        void*           tls13SecretCtx;
+    #endif
 #endif /* HAVE_SECRET_CALLBACK */
 #ifdef WOLFSSL_JNI
         void* jObjectRef;     /* reference to WolfSSLSession in JNI wrapper */
@@ -3819,27 +4184,31 @@
     EarlyDataState earlyData;
     word32 earlyDataSz;
 #endif
+#ifdef OPENSSL_ALL
+    long verifyCallbackResult;
+#endif
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    WOLFSSL_STACK* supportedCiphers; /* Used in wolfSSL_get_ciphers_compat */
+    WOLFSSL_STACK* peerCertChain;    /* Used in wolfSSL_get_peer_cert_chain */
+#endif
 };
 
 
-WOLFSSL_LOCAL
-int  SetSSL_CTX(WOLFSSL*, WOLFSSL_CTX*, int);
-WOLFSSL_LOCAL
-int  InitSSL(WOLFSSL*, WOLFSSL_CTX*, int);
-WOLFSSL_LOCAL
-void FreeSSL(WOLFSSL*, void* heap);
-WOLFSSL_API void SSL_ResourceFree(WOLFSSL*);   /* Micrium uses */
-
+WOLFSSL_LOCAL int  SSL_CTX_RefCount(WOLFSSL_CTX* ctx, int incr);
+WOLFSSL_LOCAL int  SetSSL_CTX(WOLFSSL*, WOLFSSL_CTX*, int);
+WOLFSSL_LOCAL int  InitSSL(WOLFSSL*, WOLFSSL_CTX*, int);
+WOLFSSL_LOCAL void FreeSSL(WOLFSSL*, void* heap);
+WOLFSSL_API   void SSL_ResourceFree(WOLFSSL*);   /* Micrium uses */
 
 
 #ifndef NO_CERTS
 
     WOLFSSL_LOCAL int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                                     long sz, int format, int type, WOLFSSL* ssl,
-                                    long* used, int userChain);
+                                    long* used, int userChain, int verify);
     WOLFSSL_LOCAL int ProcessFile(WOLFSSL_CTX* ctx, const char* fname, int format,
                                  int type, WOLFSSL* ssl, int userChain,
-                                WOLFSSL_CRL* crl);
+                                WOLFSSL_CRL* crl, int verify);
 
     #ifdef OPENSSL_EXTRA
     WOLFSSL_LOCAL int CheckHostName(DecodedCert* dCert, char *domainName,
@@ -3935,6 +4304,17 @@
 static const byte tls_client[FINISHED_LABEL_SZ + 1] = "client finished";
 static const byte tls_server[FINISHED_LABEL_SZ + 1] = "server finished";
 
+#ifdef OPENSSL_EXTRA
+typedef struct {
+    int name_len;
+    const char *name;
+    int nid;
+} WOLF_EC_NIST_NAME;
+extern const WOLF_EC_NIST_NAME kNistCurves[];
+/* This is the longest and shortest curve name in the kNistCurves list */
+#define kNistCurves_MIN_NAME_LEN 5
+#define kNistCurves_MAX_NAME_LEN 7
+#endif
 
 /* internal functions */
 WOLFSSL_LOCAL int SendChangeCipher(WOLFSSL*);
@@ -3954,6 +4334,10 @@
  || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
 WOLFSSL_LOCAL int CreateOcspResponse(WOLFSSL*, OcspRequest**, buffer*);
 #endif
+#if defined(HAVE_SECURE_RENEGOTIATION) && \
+    defined(HAVE_SERVER_RENEGOTIATION_INFO)
+WOLFSSL_LOCAL int SendHelloRequest(WOLFSSL*);
+#endif
 WOLFSSL_LOCAL int SendCertificateStatus(WOLFSSL*);
 WOLFSSL_LOCAL int SendServerKeyExchange(WOLFSSL*);
 WOLFSSL_LOCAL int SendBuffered(WOLFSSL*);
@@ -3982,12 +4366,20 @@
 WOLFSSL_LOCAL int SetTicket(WOLFSSL*, const byte*, word32);
 WOLFSSL_LOCAL int wolfSSL_GetMaxRecordSize(WOLFSSL* ssl, int maxFragment);
 
+#if defined(OPENSSL_EXTRA) && defined(HAVE_ECC)
+WOLFSSL_LOCAL int SetECKeyInternal(WOLFSSL_EC_KEY* eckey);
+WOLFSSL_LOCAL int SetECKeyExternal(WOLFSSL_EC_KEY* eckey);
+#endif
+
+WOLFSSL_LOCAL WC_RNG* WOLFSSL_RSA_GetRNG(WOLFSSL_RSA *rsa, WC_RNG **tmpRNG,
+                                         int *initTmpRng);
+
 #ifndef NO_CERTS
     #ifndef NO_RSA
         #ifdef WC_RSA_PSS
             WOLFSSL_LOCAL int CheckRsaPssPadding(const byte* plain, word32 plainSz,
                 byte* out, word32 sigSz, enum wc_HashType hashType);
-            WOLFSSL_LOCAL int ConvertHashPss(int hashAlgo, 
+            WOLFSSL_LOCAL int ConvertHashPss(int hashAlgo,
                 enum wc_HashType* hashType, int* mgf);
         #endif
         WOLFSSL_LOCAL int VerifyRsaSign(WOLFSSL* ssl, byte* verifySig,
@@ -4022,6 +4414,14 @@
             word32 inSz, const byte* msg, word32 msgSz, ed25519_key* key,
             buffer* keyBufInfo);
     #endif /* HAVE_ED25519 */
+    #ifdef HAVE_ED448
+        WOLFSSL_LOCAL int Ed448CheckPubKey(WOLFSSL* ssl);
+        WOLFSSL_LOCAL int Ed448Sign(WOLFSSL* ssl, const byte* in, word32 inSz,
+            byte* out, word32* outSz, ed448_key* key, DerBuffer* keyBufInfo);
+        WOLFSSL_LOCAL int Ed448Verify(WOLFSSL* ssl, const byte* in,
+            word32 inSz, const byte* msg, word32 msgSz, ed448_key* key,
+            buffer* keyBufInfo);
+    #endif /* HAVE_ED448 */
 
 
     #ifdef WOLFSSL_TRUST_PEER_CERT
@@ -4051,9 +4451,11 @@
 
 #ifndef NO_TLS
     WOLFSSL_LOCAL int  MakeTlsMasterSecret(WOLFSSL*);
+#ifndef WOLFSSL_AEAD_ONLY
     WOLFSSL_LOCAL int  TLS_hmac(WOLFSSL* ssl, byte* digest, const byte* in,
                                 word32 sz, int padSz, int content, int verify);
 #endif
+#endif
 
 #ifndef NO_WOLFSSL_CLIENT
     WOLFSSL_LOCAL int SendClientHello(WOLFSSL*);
@@ -4105,21 +4507,49 @@
     WOLFSSL_LOCAL int  CopyDecodedToX509(WOLFSSL_X509*, DecodedCert*);
 #endif
 
+#ifndef MAX_CIPHER_NAME
+#define MAX_CIPHER_NAME 50
+#endif
+
+#ifdef WOLFSSL_NAMES_STATIC
+typedef char cipher_name[MAX_CIPHER_NAME];
+#else
+typedef const char* cipher_name;
+#endif
+
 typedef struct CipherSuiteInfo {
-    const char* name;
+    cipher_name name;
 #ifndef NO_ERROR_STRINGS
-    const char* name_iana;
+    cipher_name name_iana;
 #endif
     byte cipherSuite0;
     byte cipherSuite;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    byte minor;
+    byte major;
+#endif
 } CipherSuiteInfo;
 
 WOLFSSL_LOCAL const CipherSuiteInfo* GetCipherNames(void);
 WOLFSSL_LOCAL int GetCipherNamesSize(void);
 WOLFSSL_LOCAL const char* GetCipherNameInternal(const byte cipherSuite0, const byte cipherSuite);
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+/* used in wolfSSL_sk_CIPHER_description */
+#define MAX_SEGMENTS    5
+#define MAX_SEGMENT_SZ 20
+WOLFSSL_LOCAL int wolfSSL_sk_CIPHER_description(WOLFSSL_CIPHER*);
+WOLFSSL_LOCAL const char* GetCipherProtocol(const byte minor);
+WOLFSSL_LOCAL const char* GetCipherKeaStr(char n[][MAX_SEGMENT_SZ]);
+WOLFSSL_LOCAL const char* GetCipherAuthStr(char n[][MAX_SEGMENT_SZ]);
+WOLFSSL_LOCAL const char* GetCipherEncStr(char n[][MAX_SEGMENT_SZ]);
+WOLFSSL_LOCAL const char* GetCipherMacStr(char n[][MAX_SEGMENT_SZ]);
+WOLFSSL_LOCAL int SetCipherBits(const char* enc);
+#endif
 WOLFSSL_LOCAL const char* GetCipherNameIana(const byte cipherSuite0, const byte cipherSuite);
 WOLFSSL_LOCAL const char* wolfSSL_get_cipher_name_internal(WOLFSSL* ssl);
 WOLFSSL_LOCAL const char* wolfSSL_get_cipher_name_iana(WOLFSSL* ssl);
+WOLFSSL_LOCAL int GetCipherSuiteFromName(const char* name, byte* cipherSuite0,
+                                         byte* cipherSuite);
 
 enum encrypt_side {
     ENCRYPT_SIDE_ONLY = 1,
@@ -4129,6 +4559,15 @@
 
 WOLFSSL_LOCAL int SetKeysSide(WOLFSSL*, enum encrypt_side);
 
+/* Set*Internal and Set*External functions */
+WOLFSSL_LOCAL int SetDsaInternal(WOLFSSL_DSA* dsa);
+WOLFSSL_LOCAL int SetDsaExternal(WOLFSSL_DSA* dsa);
+#ifndef HAVE_USER_RSA
+WOLFSSL_LOCAL int SetRsaExternal(WOLFSSL_RSA* rsa);
+WOLFSSL_LOCAL int SetRsaInternal(WOLFSSL_RSA* rsa);
+#endif
+WOLFSSL_LOCAL int SetDhInternal(WOLFSSL_DH* dh);
+WOLFSSL_LOCAL int SetDhExternal(WOLFSSL_DH *dh);
 
 #ifndef NO_DH
     WOLFSSL_LOCAL int DhGenKeyPair(WOLFSSL* ssl, DhKey* dhKey,
@@ -4142,6 +4581,7 @@
 
 #ifdef HAVE_ECC
     WOLFSSL_LOCAL int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer);
+    WOLFSSL_LOCAL word16 GetCurveByOID(int oidSum);
 #endif
 
 WOLFSSL_LOCAL int InitHandshakeHashes(WOLFSSL* ssl);
--- a/wolfssl/ocsp.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/ocsp.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ocsp.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -37,12 +37,15 @@
 
 typedef struct WOLFSSL_OCSP WOLFSSL_OCSP;
 
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+#if defined(OPENSSL_ALL) || defined(OPENSSL_EXTRA) || defined(WOLFSSL_NGINX) ||\
+    defined(WOLFSSL_HAPROXY)
 typedef struct OcspResponse WOLFSSL_OCSP_BASICRESP;
 
 typedef struct OcspRequest WOLFSSL_OCSP_CERTID;
 
 typedef struct OcspRequest WOLFSSL_OCSP_ONEREQ;
+
+typedef struct OcspRequest WOLFSSL_OCSP_REQUEST;
 #endif
 
 WOLFSSL_LOCAL int  InitOCSP(WOLFSSL_OCSP*, WOLFSSL_CERT_MANAGER*);
@@ -54,14 +57,17 @@
                              WOLFSSL_BUFFER_INFO* responseBuffer, WOLFSSL* ssl);
 WOLFSSL_LOCAL int  CheckOcspRequest(WOLFSSL_OCSP* ocsp,
                  OcspRequest* ocspRequest, WOLFSSL_BUFFER_INFO* responseBuffer);
-
-
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+WOLFSSL_LOCAL int CheckOcspResponse(WOLFSSL_OCSP *ocsp, byte *response, int responseSz,
+                                    WOLFSSL_BUFFER_INFO *responseBuffer, CertStatus *status,
+                                    OcspEntry *entry, OcspRequest *ocspRequest);
 
-WOLFSSL_API int wolfSSL_OCSP_resp_find_status(WOLFSSL_OCSP_BASICRESP *bs,
-    WOLFSSL_OCSP_CERTID* id, int* status, int* reason,
-    WOLFSSL_ASN1_TIME** revtime, WOLFSSL_ASN1_TIME** thisupd,
-    WOLFSSL_ASN1_TIME** nextupd);
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
+    defined(WOLFSSL_APACHE_HTTPD)
+
+    WOLFSSL_API int wolfSSL_OCSP_resp_find_status(WOLFSSL_OCSP_BASICRESP *bs,
+                                                  WOLFSSL_OCSP_CERTID *id, int *status, int *reason,
+                                                  WOLFSSL_ASN1_TIME **revtime, WOLFSSL_ASN1_TIME **thisupd,
+                                                  WOLFSSL_ASN1_TIME **nextupd);
 WOLFSSL_API const char *wolfSSL_OCSP_cert_status_str(long s);
 WOLFSSL_API int wolfSSL_OCSP_check_validity(WOLFSSL_ASN1_TIME* thisupd,
     WOLFSSL_ASN1_TIME* nextupd, long sec, long maxsec);
@@ -93,8 +99,27 @@
     unsigned char** data);
 WOLFSSL_API WOLFSSL_OCSP_ONEREQ* wolfSSL_OCSP_request_add0_id(OcspRequest *req,
     WOLFSSL_OCSP_CERTID *cid);
+WOLFSSL_API WOLFSSL_OCSP_CERTID* wolfSSL_OCSP_CERTID_dup(WOLFSSL_OCSP_CERTID*);
+WOLFSSL_API int wolfSSL_i2d_OCSP_REQUEST_bio(WOLFSSL_BIO* out,
+    WOLFSSL_OCSP_REQUEST *req);
 
 #endif
+#ifdef OPENSSL_EXTRA
+WOLFSSL_API int wolfSSL_OCSP_REQUEST_add_ext(OcspRequest* req,
+        WOLFSSL_X509_EXTENSION* ext, int idx);
+WOLFSSL_API OcspResponse* wolfSSL_OCSP_response_create(int status,
+    WOLFSSL_OCSP_BASICRESP* bs);
+WOLFSSL_API const char* wolfSSL_OCSP_crl_reason_str(long s);
+
+WOLFSSL_API int wolfSSL_OCSP_id_get0_info(WOLFSSL_ASN1_STRING**,
+  WOLFSSL_ASN1_OBJECT**, WOLFSSL_ASN1_STRING**,
+  WOLFSSL_ASN1_INTEGER**, WOLFSSL_OCSP_CERTID*);
+
+WOLFSSL_API int wolfSSL_OCSP_request_add1_nonce(OcspRequest* req,
+        unsigned char* val, int sz);
+WOLFSSL_API int wolfSSL_OCSP_check_nonce(OcspRequest* req,
+        WOLFSSL_OCSP_BASICRESP* bs);
+#endif
 
 
 #ifdef __cplusplus
--- a/wolfssl/openssl/aes.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/aes.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* aes.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -43,11 +43,25 @@
  * to need the size of the structure. */
 typedef struct WOLFSSL_AES_KEY {
     /* aligned and big enough for Aes from wolfssl/wolfcrypt/aes.h */
-    ALIGN16 void* holder[(360 + WC_ASYNC_DEV_SIZE)/ sizeof(void*)];
+    ALIGN16 void* holder[(376 + WC_ASYNC_DEV_SIZE)/ sizeof(void*)];
     #ifdef GCM_TABLE
     /* key-based fast multiplication table. */
     ALIGN16 void* M0[4096 / sizeof(void*)];
     #endif /* GCM_TABLE */
+    #if defined(WOLFSSL_DEVCRYPTO) && \
+        (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+    /* large enough for additional devcrypto information */
+    void* devKey[288 / sizeof(void*)];
+    #endif
+    #ifdef WOLFSSL_AFALG
+    void* afalg_holder[288 / sizeof(void*)];
+    #endif
+    #ifdef HAVE_PKCS11
+    void* pkcs11_holder[(AES_MAX_ID_LEN + sizeof(int)) / sizeof(void*)];
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+    void* async_holder[128 / sizeof(void*)];
+    #endif
 } WOLFSSL_AES_KEY;
 typedef WOLFSSL_AES_KEY AES_KEY;
 
--- a/wolfssl/openssl/asn1.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/asn1.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* asn1.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,11 +26,13 @@
 
 #include <wolfssl/openssl/ssl.h>
 
-#define ASN1_STRING_new      wolfSSL_ASN1_STRING_type_new
+#define ASN1_STRING_new      wolfSSL_ASN1_STRING_new
 #define ASN1_STRING_type_new wolfSSL_ASN1_STRING_type_new
+#define ASN1_STRING_type     wolfSSL_ASN1_STRING_type
 #define ASN1_STRING_set      wolfSSL_ASN1_STRING_set
 #define ASN1_STRING_free     wolfSSL_ASN1_STRING_free
 
+#define V_ASN1_INTEGER                   0x02
 #define V_ASN1_OCTET_STRING              0x04 /* tag for ASN1_OCTET_STRING */
 #define V_ASN1_NEG                       0x100
 #define V_ASN1_NEG_INTEGER               (2 | V_ASN1_NEG)
@@ -53,5 +55,99 @@
                                           ASN1_STRFLGS_UTF8_CONVERT | \
                                           ASN1_STRFLGS_DUMP_UNKNOWN | \
                                           ASN1_STRFLGS_DUMP_DER)
+
+#define MBSTRING_UTF8                    0x1000
+#define MBSTRING_ASC                     0x1001
+#define MBSTRING_BMP                     0x1002
+#define MBSTRING_UNIV                    0x1004
+
+#define ASN1_UTCTIME_print              wolfSSL_ASN1_UTCTIME_print
+#define ASN1_TIME_check                 wolfSSL_ASN1_TIME_check
+#define ASN1_TIME_diff                  wolfSSL_ASN1_TIME_diff
+#define ASN1_TIME_set                   wolfSSL_ASN1_TIME_set
+
+#define V_ASN1_OBJECT                   6
+#define V_ASN1_UTCTIME                  23
+#define V_ASN1_GENERALIZEDTIME          24
+
+#define ASN1_STRING_FLAG_BITS_LEFT       0x008
+#define ASN1_STRING_FLAG_NDEF            0x010
+#define ASN1_STRING_FLAG_CONT            0x020
+#define ASN1_STRING_FLAG_MSTRING         0x040
+#define ASN1_STRING_FLAG_EMBED           0x080
+
+
+WOLFSSL_API WOLFSSL_ASN1_INTEGER *wolfSSL_BN_to_ASN1_INTEGER(
+    const WOLFSSL_BIGNUM*, WOLFSSL_ASN1_INTEGER*);
+
+WOLFSSL_API void wolfSSL_ASN1_TYPE_set(WOLFSSL_ASN1_TYPE *a, int type, void *value);
+
+#ifdef OPENSSL_ALL
+/* IMPLEMENT_ASN1_FUNCTIONS is strictly for external use only. Internally
+ * we don't use this. Some projects use OpenSSL to implement ASN1 types and
+ * this section is only to provide those projects with ASN1 functionality. */
+typedef struct {
+    size_t offset;              /* Offset of this field in structure */
+    byte type;                  /* The type of the member as defined in
+                                 * WOLFSSL_ASN1_TYPES */
+} WOLFSSL_ASN1_TEMPLATE;
+
+typedef struct {
+    byte type;                              /* One of the ASN_Tags types */
+    const WOLFSSL_ASN1_TEMPLATE *members;   /* If SEQUENCE or CHOICE this
+                                             * contains the contents */
+    size_t mcount;                          /* Number of members if SEQUENCE
+                                             * or CHOICE */
+    size_t size;                            /* Structure size */
+} WOLFSSL_ASN1_ITEM;
+
+typedef enum {
+    WOLFSSL_X509_ALGOR_ASN1 = 0,
+    WOLFSSL_ASN1_BIT_STRING_ASN1,
+} WOLFSSL_ASN1_TYPES;
+
+#define ASN1_SEQUENCE(type) \
+    static const type __##type##_dummy_struct;\
+    static const WOLFSSL_ASN1_TEMPLATE type##_member_data[]
+
+#define ASN1_SIMPLE(type, member, member_type) \
+    { (char*)&__##type##_dummy_struct.member - (char*)&__##type##_dummy_struct, \
+        WOLFSSL_##member_type##_ASN1 }
+
+#define ASN1_SEQUENCE_END(type) \
+    ; \
+    const WOLFSSL_ASN1_ITEM type##_template_data = { \
+            ASN_SEQUENCE, \
+            type##_member_data, \
+            sizeof(type##_member_data) / sizeof(WOLFSSL_ASN1_TEMPLATE), \
+            sizeof(type) \
+    };
+
+WOLFSSL_API void *wolfSSL_ASN1_item_new(const WOLFSSL_ASN1_ITEM *template);
+WOLFSSL_API void wolfSSL_ASN1_item_free(void *val, const WOLFSSL_ASN1_ITEM *template);
+WOLFSSL_API int wolfSSL_ASN1_item_i2d(const void *src, byte **dest,
+                                      const WOLFSSL_ASN1_ITEM *template);
+
+/* Need function declaration otherwise compiler complains */
+#define IMPLEMENT_ASN1_FUNCTIONS(type) \
+    type *type##_new(void); \
+    type *type##_new(void){ \
+        return (type*)wolfSSL_ASN1_item_new(&type##_template_data); \
+    } \
+    void type##_free(type *t); \
+    void type##_free(type *t){ \
+        wolfSSL_ASN1_item_free(t, &type##_template_data); \
+    } \
+    int i2d_##type(type *src, byte **dest); \
+    int i2d_##type(type *src, byte **dest) \
+    { \
+        return wolfSSL_ASN1_item_i2d(src, dest, &type##_template_data);\
+    }
+
+#endif /* OPENSSL_ALL */
+
+#define BN_to_ASN1_INTEGER          wolfSSL_BN_to_ASN1_INTEGER
+#define ASN1_TYPE_set               wolfSSL_ASN1_TYPE_set
+
 #endif /* WOLFSSL_ASN1_H_ */
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/asn1t.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,31 @@
+/* asn1t.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* asn1t.h for openssl */
+
+#ifndef WOLFSSL_ASN1T_H_
+#define WOLFSSL_ASN1T_H_
+
+#include <wolfssl/wolfcrypt/asn.h>
+#include <wolfssl/openssl/asn1.h>
+
+#endif /* WOLFSSL_ASN1T_H_ */
+
--- a/wolfssl/openssl/bio.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/bio.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* bio.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -32,25 +32,131 @@
     extern "C" {
 #endif
 
+
 #define BIO_FLAG_BASE64_NO_NL WOLFSSL_BIO_FLAG_BASE64_NO_NL
 #define BIO_FLAG_READ         WOLFSSL_BIO_FLAG_READ
 #define BIO_FLAG_WRITE        WOLFSSL_BIO_FLAG_WRITE
 #define BIO_FLAG_IO_SPECIAL   WOLFSSL_BIO_FLAG_IO_SPECIAL
 #define BIO_FLAG_RETRY        WOLFSSL_BIO_FLAG_RETRY
 
+#define BIO_new_fp                      wolfSSL_BIO_new_fp
+#define BIO_new_file                    wolfSSL_BIO_new_file
+#define BIO_new_fp                      wolfSSL_BIO_new_fp
+#define BIO_ctrl                        wolfSSL_BIO_ctrl
+#define BIO_ctrl_pending                wolfSSL_BIO_ctrl_pending
+#define BIO_wpending                    wolfSSL_BIO_wpending
+#define BIO_get_mem_ptr                 wolfSSL_BIO_get_mem_ptr
+#define BIO_int_ctrl                    wolfSSL_BIO_int_ctrl
+#define BIO_reset                       wolfSSL_BIO_reset
+#define BIO_s_file                      wolfSSL_BIO_s_file
+#define BIO_s_bio                       wolfSSL_BIO_s_bio
+#define BIO_s_socket                    wolfSSL_BIO_s_socket
+#define BIO_set_fd                      wolfSSL_BIO_set_fd
+#define BIO_set_close                   wolfSSL_BIO_set_close
+#define BIO_ctrl_reset_read_request     wolfSSL_BIO_ctrl_reset_read_request
+#define BIO_set_write_buf_size          wolfSSL_BIO_set_write_buf_size
+#define BIO_make_bio_pair               wolfSSL_BIO_make_bio_pair
+
+#define BIO_set_fp                      wolfSSL_BIO_set_fp
+#define BIO_get_fp                      wolfSSL_BIO_get_fp
+#define BIO_seek                        wolfSSL_BIO_seek
+#define BIO_write_filename              wolfSSL_BIO_write_filename
+#define BIO_set_mem_eof_return          wolfSSL_BIO_set_mem_eof_return
+
 #define BIO_find_type wolfSSL_BIO_find_type
 #define BIO_next      wolfSSL_BIO_next
 #define BIO_gets      wolfSSL_BIO_gets
+#define BIO_puts      wolfSSL_BIO_puts
 
+#define BIO_should_retry                wolfSSL_BIO_should_retry
 
 #define BIO_TYPE_FILE WOLFSSL_BIO_FILE
 #define BIO_TYPE_BIO  WOLFSSL_BIO_BIO
 #define BIO_TYPE_MEM  WOLFSSL_BIO_MEMORY
 #define BIO_TYPE_BASE64 WOLFSSL_BIO_BASE64
 
+#define BIO_vprintf wolfSSL_BIO_vprintf
+#define BIO_printf  wolfSSL_BIO_printf
+#define BIO_dump    wolfSSL_BIO_dump
+
+/* BIO info callback */
+#define BIO_CB_FREE   WOLFSSL_BIO_CB_FREE
+#define BIO_CB_READ   WOLFSSL_BIO_CB_READ
+#define BIO_CB_WRITE  WOLFSSL_BIO_CB_WRITE
+#define BIO_CB_PUTS   WOLFSSL_BIO_CB_PUTS
+#define BIO_CB_GETS   WOLFSSL_BIO_CB_GETS
+#define BIO_CB_CTRL   WOLFSSL_BIO_CB_CTRL
+#define BIO_CB_RETURN WOLFSSL_BIO_CB_RETURN
+
+#define BIO_set_callback         wolfSSL_BIO_set_callback
+#define BIO_get_callback         wolfSSL_BIO_get_callback
+#define BIO_set_callback_arg     wolfSSL_BIO_set_callback_arg
+#define BIO_get_callback_arg     wolfSSL_BIO_get_callback_arg
+
+/* BIO for 1.1.0 or later */
+#define BIO_set_init               wolfSSL_BIO_set_init
+#define BIO_get_data               wolfSSL_BIO_get_data
+#define BIO_set_data               wolfSSL_BIO_set_data
+#define BIO_get_shutdown           wolfSSL_BIO_get_shutdown
+#define BIO_set_shutdown           wolfSSL_BIO_set_shutdown
+
+#define BIO_clear_flags            wolfSSL_BIO_clear_flags
+#define BIO_set_ex_data            wolfSSL_BIO_set_ex_data
+#define BIO_get_ex_data            wolfSSL_BIO_get_ex_data
+
+/* helper to set specific retry/read flags */
+#define BIO_set_retry_read(bio)\
+    wolfSSL_BIO_set_flags((bio), WOLFSSL_BIO_FLAG_RETRY | WOLFSSL_BIO_FLAG_READ)
+#define BIO_set_retry_write(bio)\
+    wolfSSL_BIO_set_flags((bio), WOLFSSL_BIO_FLAG_RETRY | WOLFSSL_BIO_FLAG_WRITE)
+
+#define BIO_clear_retry_flags      wolfSSL_BIO_clear_retry_flags
+
+#define BIO_meth_new               wolfSSL_BIO_meth_new
+#define BIO_meth_set_write         wolfSSL_BIO_meth_set_write
+#define BIO_meth_free              wolfSSL_BIO_meth_free
+#define BIO_meth_set_write         wolfSSL_BIO_meth_set_write
+#define BIO_meth_set_read          wolfSSL_BIO_meth_set_read
+#define BIO_meth_set_puts          wolfSSL_BIO_meth_set_puts
+#define BIO_meth_set_gets          wolfSSL_BIO_meth_set_gets
+#define BIO_meth_set_ctrl          wolfSSL_BIO_meth_set_ctrl
+#define BIO_meth_set_create        wolfSSL_BIO_meth_set_create
+#define BIO_meth_set_destroy       wolfSSL_BIO_meth_set_destroy
+
+
+/* BIO CTRL */
+#define BIO_CTRL_RESET             1
+#define BIO_CTRL_EOF               2
+#define BIO_CTRL_INFO              3
+#define BIO_CTRL_PUSH              6
+#define BIO_CTRL_POP               7
+#define BIO_CTRL_GET_CLOSE         8
+#define BIO_CTRL_SET_CLOSE         9
+#define BIO_CTRL_PENDING           10
+#define BIO_CTRL_FLUSH             11
+#define BIO_CTRL_DUP               12
+#define BIO_CTRL_WPENDING          13
+
+#define BIO_C_SET_FILE_PTR              106
+#define BIO_C_GET_FILE_PTR              107
+#define BIO_C_SET_FILENAME              108
+#define BIO_C_SET_BUF_MEM               114
+#define BIO_C_GET_BUF_MEM_PTR           115
+#define BIO_C_FILE_SEEK                 128
+#define BIO_C_SET_BUF_MEM_EOF_RETURN    130
+#define BIO_C_SET_WRITE_BUF_SIZE        136
+#define BIO_C_MAKE_BIO_PAIR             138
+
+#define BIO_CTRL_DGRAM_QUERY_MTU   40
+
+#define BIO_NOCLOSE                0x00
+#define BIO_CLOSE                  0x01
+
+#define BIO_FP_WRITE               0x04
+
 
 #ifdef __cplusplus
-    }  /* extern "C" */ 
+    }  /* extern "C" */
 #endif
 
 
--- a/wolfssl/openssl/bn.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/bn.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* bn.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -38,11 +38,17 @@
 #endif
 
 typedef struct WOLFSSL_BIGNUM {
-    int   neg;              /* openssh deference */
-    void* internal;         /* our big num */
+    int neg;        /* openssh deference */
+    void *internal; /* our big num */
+#ifdef WOLFSSL_SP_MATH
+    sp_int fp;
+#elif defined(USE_FAST_MATH) && !defined(HAVE_WOLF_BIGINT)
+    fp_int fp;
+#endif
 } WOLFSSL_BIGNUM;
 
 
+#define BN_ULONG WOLFSSL_BN_ULONG
 #define WOLFSSL_BN_ULONG mp_digit
 
 typedef struct WOLFSSL_BN_CTX WOLFSSL_BN_CTX;
@@ -53,8 +59,12 @@
 WOLFSSL_API void           wolfSSL_BN_CTX_free(WOLFSSL_BN_CTX*);
 
 WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_BN_new(void);
+#if defined(USE_FAST_MATH) && !defined(HAVE_WOLF_BIGINT)
+WOLFSSL_API void           wolfSSL_BN_init(WOLFSSL_BIGNUM *);
+#endif
 WOLFSSL_API void           wolfSSL_BN_free(WOLFSSL_BIGNUM*);
 WOLFSSL_API void           wolfSSL_BN_clear_free(WOLFSSL_BIGNUM*);
+WOLFSSL_API void           wolfSSL_BN_clear(WOLFSSL_BIGNUM*);
 
 
 WOLFSSL_API int wolfSSL_BN_sub(WOLFSSL_BIGNUM*, const WOLFSSL_BIGNUM*,
@@ -74,6 +84,8 @@
 WOLFSSL_API int wolfSSL_BN_is_zero(const WOLFSSL_BIGNUM*);
 WOLFSSL_API int wolfSSL_BN_is_one(const WOLFSSL_BIGNUM*);
 WOLFSSL_API int wolfSSL_BN_is_odd(const WOLFSSL_BIGNUM*);
+WOLFSSL_API int wolfSSL_BN_is_negative(const WOLFSSL_BIGNUM*);
+WOLFSSL_API int wolfSSL_BN_is_word(const WOLFSSL_BIGNUM*, WOLFSSL_BN_ULONG);
 
 WOLFSSL_API int wolfSSL_BN_cmp(const WOLFSSL_BIGNUM*, const WOLFSSL_BIGNUM*);
 
@@ -99,11 +111,15 @@
 WOLFSSL_API int wolfSSL_BN_lshift(WOLFSSL_BIGNUM*, const WOLFSSL_BIGNUM*, int);
 WOLFSSL_API int wolfSSL_BN_add_word(WOLFSSL_BIGNUM*, WOLFSSL_BN_ULONG);
 WOLFSSL_API int wolfSSL_BN_set_bit(WOLFSSL_BIGNUM*, int);
+WOLFSSL_API int wolfSSL_BN_clear_bit(WOLFSSL_BIGNUM*, int);
 WOLFSSL_API int wolfSSL_BN_set_word(WOLFSSL_BIGNUM*, WOLFSSL_BN_ULONG);
 WOLFSSL_API unsigned long wolfSSL_BN_get_word(const WOLFSSL_BIGNUM*);
 
 WOLFSSL_API int wolfSSL_BN_add(WOLFSSL_BIGNUM*, WOLFSSL_BIGNUM*,
                                WOLFSSL_BIGNUM*);
+WOLFSSL_API int wolfSSL_BN_mod_add(WOLFSSL_BIGNUM *r, const WOLFSSL_BIGNUM *a,
+                                   const WOLFSSL_BIGNUM *b, const WOLFSSL_BIGNUM *m,
+                                   WOLFSSL_BN_CTX *ctx);
 WOLFSSL_API char *wolfSSL_BN_bn2hex(const WOLFSSL_BIGNUM*);
 WOLFSSL_API int wolfSSL_BN_is_prime_ex(const WOLFSSL_BIGNUM*, int,
                                        WOLFSSL_BN_CTX*, WOLFSSL_BN_GENCB*);
@@ -117,6 +133,7 @@
 WOLFSSL_API void wolfSSL_BN_CTX_start(WOLFSSL_BN_CTX *ctx);
 WOLFSSL_API WOLFSSL_BIGNUM *wolfSSL_BN_mod_inverse(WOLFSSL_BIGNUM*, WOLFSSL_BIGNUM*,
                                         const WOLFSSL_BIGNUM*, WOLFSSL_BN_CTX *ctx);
+
 typedef WOLFSSL_BIGNUM BIGNUM;
 typedef WOLFSSL_BN_CTX BN_CTX;
 typedef WOLFSSL_BN_GENCB BN_GENCB;
@@ -126,8 +143,10 @@
 #define BN_CTX_free       wolfSSL_BN_CTX_free
 
 #define BN_new        wolfSSL_BN_new
+#define BN_init       wolfSSL_BN_init
 #define BN_free       wolfSSL_BN_free
 #define BN_clear_free wolfSSL_BN_clear_free
+#define BN_clear      wolfSSL_BN_clear
 
 #define BN_num_bytes wolfSSL_BN_num_bytes
 #define BN_num_bits  wolfSSL_BN_num_bits
@@ -135,6 +154,8 @@
 #define BN_is_zero  wolfSSL_BN_is_zero
 #define BN_is_one   wolfSSL_BN_is_one
 #define BN_is_odd   wolfSSL_BN_is_odd
+#define BN_is_negative wolfSSL_BN_is_negative
+#define BN_is_word  wolfSSL_BN_is_word
 
 #define BN_cmp    wolfSSL_BN_cmp
 
@@ -167,8 +188,10 @@
 #define BN_lshift wolfSSL_BN_lshift
 #define BN_add_word wolfSSL_BN_add_word
 #define BN_add wolfSSL_BN_add
+#define BN_mod_add wolfSSL_BN_mod_add
 #define BN_set_word wolfSSL_BN_set_word
 #define BN_set_bit wolfSSL_BN_set_bit
+#define BN_clear_bit wolfSSL_BN_clear_bit
 
 
 #define BN_is_prime_ex wolfSSL_BN_is_prime_ex
@@ -181,6 +204,20 @@
 
 #define BN_mod_inverse wolfSSL_BN_mod_inverse
 
+#define BN_set_flags(x1, x2)
+
+#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x10100000L
+#define BN_get_rfc2409_prime_768   wolfSSL_DH_768_prime
+#define BN_get_rfc2409_prime_1024  wolfSSL_DH_1024_prime
+#define BN_get_rfc3526_prime_1536  wolfSSL_DH_1536_prime
+#define BN_get_rfc3526_prime_2048  wolfSSL_DH_2048_prime
+#define BN_get_rfc3526_prime_3072  wolfSSL_DH_3072_prime
+#define BN_get_rfc3526_prime_4096  wolfSSL_DH_4096_prime
+#define BN_get_rfc3526_prime_6144  wolfSSL_DH_6144_prime
+#define BN_get_rfc3526_prime_8192  wolfSSL_DH_8192_prime
+#endif
+
+
 #ifdef __cplusplus
     }  /* extern "C" */
 #endif
--- a/wolfssl/openssl/buffer.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/buffer.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* buffer.h
  *
- * Copyright (C) 2006-2016 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -40,10 +40,6 @@
 #define BUF_MEM_grow wolfSSL_BUF_MEM_grow
 #define BUF_MEM_free wolfSSL_BUF_MEM_free
 
-/* error codes */
-#define ERR_R_MALLOC_FAILURE  MEMORY_E
-
-
 #ifdef __cplusplus
     }  /* extern "C" */
 #endif
--- a/wolfssl/openssl/conf.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/conf.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,3 +1,49 @@
-/* conf.h for openssl */
+/* conf.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* conf.h for openSSL */
 
+#ifndef WOLFSSL_conf_H_
+#define WOLFSSL_conf_H_
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+struct WOLFSSL_CONF_VALUE {
+    char *section;
+    char *name;
+    char *value;
+};
+
+struct WOLFSSL_INIT_SETTINGS {
+    char* appname;
+};
+
+typedef struct WOLFSSL_CONF_VALUE CONF_VALUE;
+typedef struct WOLFSSL_INIT_SETTINGS OPENSSL_INIT_SETTINGS;
+
+#ifdef  __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* WOLFSSL_conf_H_ */
 
--- a/wolfssl/openssl/crypto.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/crypto.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* crypto.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -35,6 +35,7 @@
 
 WOLFSSL_API const char*   wolfSSLeay_version(int type);
 WOLFSSL_API unsigned long wolfSSLeay(void);
+WOLFSSL_API unsigned long wolfSSL_OpenSSL_version_num(void);
 
 #ifdef OPENSSL_EXTRA
 WOLFSSL_API void wolfSSL_OPENSSL_free(void*);
@@ -45,18 +46,28 @@
 
 #define SSLeay_version wolfSSLeay_version
 #define SSLeay wolfSSLeay
-
+#define OpenSSL_version_num wolfSSL_OpenSSL_version_num
 
-#define SSLEAY_VERSION 0x0090600fL
+#ifdef WOLFSSL_QT
+    #define SSLEAY_VERSION 0x10001000L
+#else
+    #define SSLEAY_VERSION 0x0090600fL
+#endif
 #define SSLEAY_VERSION_NUMBER SSLEAY_VERSION
 #define CRYPTO_lock wc_LockMutex_ex
 
 /* this function was used to set the default malloc, free, and realloc */
-#define CRYPTO_malloc_init() /* CRYPTO_malloc_init is not needed */
+#define CRYPTO_malloc_init() 0 /* CRYPTO_malloc_init is not needed */
 
 #define OPENSSL_free wolfSSL_OPENSSL_free
 #define OPENSSL_malloc wolfSSL_OPENSSL_malloc
 
+#ifdef WOLFSSL_QT
+    #define OPENSSL_INIT_ADD_ALL_CIPHERS    0x00000004L
+    #define OPENSSL_INIT_ADD_ALL_DIGESTS    0x00000008L
+    #define OPENSSL_INIT_LOAD_CONFIG        0x00000040L
+#endif
+
 #if defined(OPENSSL_ALL) || defined(HAVE_STUNNEL) || defined(WOLFSSL_NGINX) || \
     defined(WOLFSSL_HAPROXY) || defined(OPENSSL_EXTRA)
 #define CRYPTO_set_mem_ex_functions      wolfSSL_CRYPTO_set_mem_ex_functions
--- a/wolfssl/openssl/des.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/des.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* des.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -84,6 +84,7 @@
 WOLFSSL_API void wolfSSL_DES_set_odd_parity(WOLFSSL_DES_cblock*);
 WOLFSSL_API void wolfSSL_DES_ecb_encrypt(WOLFSSL_DES_cblock*, WOLFSSL_DES_cblock*,
                                        WOLFSSL_DES_key_schedule*, int);
+WOLFSSL_API int wolfSSL_DES_check_key_parity(WOLFSSL_DES_cblock*);
 
 
 typedef WOLFSSL_DES_cblock DES_cblock;
@@ -103,6 +104,7 @@
 #define DES_ecb_encrypt       wolfSSL_DES_ecb_encrypt
 #define DES_ede3_cbc_encrypt  wolfSSL_DES_ede3_cbc_encrypt
 #define DES_cbc_cksum         wolfSSL_DES_cbc_cksum
+#define DES_check_key_parity  wolfSSL_DES_check_key_parity
 
 #ifdef __cplusplus
     } /* extern "C" */
--- a/wolfssl/openssl/dh.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/dh.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* dh.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -25,13 +25,19 @@
 #ifndef WOLFSSL_DH_H_
 #define WOLFSSL_DH_H_
 
-#include <wolfssl/openssl/ssl.h>
 #include <wolfssl/openssl/bn.h>
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+#ifndef WOLFSSL_DH_TYPE_DEFINED /* guard on redeclaration */
+typedef struct WOLFSSL_DH            WOLFSSL_DH;
+#define WOLFSSL_DH_TYPE_DEFINED
+#endif
+
+typedef WOLFSSL_DH                   DH;
+
 struct WOLFSSL_DH {
     WOLFSSL_BIGNUM* p;
     WOLFSSL_BIGNUM* g;
@@ -47,25 +53,55 @@
      int length;
 };
 
-
+WOLFSSL_API WOLFSSL_DH *wolfSSL_d2i_DHparams(WOLFSSL_DH **dh,
+                                         const unsigned char **pp, long length);
+WOLFSSL_API int wolfSSL_i2d_DHparams(const WOLFSSL_DH *dh, unsigned char **out);
 WOLFSSL_API WOLFSSL_DH* wolfSSL_DH_new(void);
-WOLFSSL_API void       wolfSSL_DH_free(WOLFSSL_DH*);
+WOLFSSL_API void        wolfSSL_DH_free(WOLFSSL_DH*);
 
+WOLFSSL_API int wolfSSL_DH_check(const WOLFSSL_DH *dh, int *codes);
 WOLFSSL_API int wolfSSL_DH_size(WOLFSSL_DH*);
 WOLFSSL_API int wolfSSL_DH_generate_key(WOLFSSL_DH*);
 WOLFSSL_API int wolfSSL_DH_compute_key(unsigned char* key, WOLFSSL_BIGNUM* pub,
                                      WOLFSSL_DH*);
-
-typedef WOLFSSL_DH DH;
+WOLFSSL_API int wolfSSL_DH_LoadDer(WOLFSSL_DH*, const unsigned char*, int sz);
+WOLFSSL_API int wolfSSL_DH_set0_pqg(WOLFSSL_DH*, WOLFSSL_BIGNUM*,
+    WOLFSSL_BIGNUM*, WOLFSSL_BIGNUM*);
 
 #define DH_new  wolfSSL_DH_new
 #define DH_free wolfSSL_DH_free
 
+#define d2i_DHparams    wolfSSL_d2i_DHparams
+#define i2d_DHparams    wolfSSL_i2d_DHparams
+#define DH_check        wolfSSL_DH_check
+
 #define DH_size         wolfSSL_DH_size
 #define DH_generate_key wolfSSL_DH_generate_key
 #define DH_compute_key  wolfSSL_DH_compute_key
-#define get_rfc3526_prime_1536 wolfSSL_DH_1536_prime
+#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x10100000L
+#define DH_set0_pqg     wolfSSL_DH_set0_pqg
+#endif
+#define DH_bits(x)      (BN_num_bits(x->p))
+
+#define DH_GENERATOR_2                  2
+#define DH_CHECK_P_NOT_PRIME            0x01
+#define DH_CHECK_P_NOT_SAFE_PRIME       0x02
+#define DH_NOT_SUITABLE_GENERATOR       0x08
 
+/* Temporary values for wolfSSL_DH_Check*/
+#define DH_CHECK_INVALID_Q_VALUE        0x10
+#define DH_CHECK_Q_NOT_PRIME            0x11
+/* end temp */
+
+/* for pre 1.1.0 */
+#define get_rfc2409_prime_768      wolfSSL_DH_768_prime
+#define get_rfc2409_prime_1024     wolfSSL_DH_1024_prime
+#define get_rfc3526_prime_1536     wolfSSL_DH_1536_prime
+#define get_rfc3526_prime_2048     wolfSSL_DH_2048_prime
+#define get_rfc3526_prime_3072     wolfSSL_DH_3072_prime
+#define get_rfc3526_prime_4096     wolfSSL_DH_4096_prime
+#define get_rfc3526_prime_6144     wolfSSL_DH_6144_prime
+#define get_rfc3526_prime_8192     wolfSSL_DH_8192_prime
 
 #ifdef __cplusplus
     }  /* extern "C" */
--- a/wolfssl/openssl/dsa.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/dsa.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* dsa.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -31,6 +31,11 @@
     extern "C" {
 #endif
 
+typedef struct WOLFSSL_DSA_SIG {
+    WOLFSSL_BIGNUM *r;
+    WOLFSSL_BIGNUM *s;
+} WOLFSSL_DSA_SIG;
+
 #ifndef WOLFSSL_DSA_TYPE_DEFINED /* guard on redeclaration */
 typedef struct WOLFSSL_DSA            WOLFSSL_DSA;
 #define WOLFSSL_DSA_TYPE_DEFINED
@@ -65,6 +70,9 @@
 
 WOLFSSL_API int wolfSSL_DSA_LoadDer(WOLFSSL_DSA*, const unsigned char*, int sz);
 
+WOLFSSL_API int wolfSSL_DSA_LoadDer_ex(WOLFSSL_DSA*, const unsigned char*,
+                                       int sz, int opt);
+
 WOLFSSL_API int wolfSSL_DSA_do_sign(const unsigned char* d,
                                     unsigned char* sigRet, WOLFSSL_DSA* dsa);
 
@@ -72,13 +80,31 @@
                                       unsigned char* sig,
                                       WOLFSSL_DSA* dsa, int *dsacheck);
 
+WOLFSSL_API WOLFSSL_DSA_SIG* wolfSSL_DSA_SIG_new(void);
+WOLFSSL_API void wolfSSL_DSA_SIG_free(WOLFSSL_DSA_SIG *sig);
+WOLFSSL_API WOLFSSL_DSA_SIG* wolfSSL_DSA_do_sign_ex(const unsigned char* digest,
+                                                    int outLen, WOLFSSL_DSA* dsa);
+WOLFSSL_API int wolfSSL_DSA_do_verify_ex(const unsigned char* digest, int digest_len,
+                                         WOLFSSL_DSA_SIG* sig, WOLFSSL_DSA* dsa);
+
+#define WOLFSSL_DSA_LOAD_PRIVATE 1
+#define WOLFSSL_DSA_LOAD_PUBLIC  2
+
 #define DSA_new wolfSSL_DSA_new
 #define DSA_free wolfSSL_DSA_free
 
+#define DSA_LoadDer                wolfSSL_DSA_LoadDer
 #define DSA_generate_key           wolfSSL_DSA_generate_key
 #define DSA_generate_parameters    wolfSSL_DSA_generate_parameters
 #define DSA_generate_parameters_ex wolfSSL_DSA_generate_parameters_ex
 
+#define DSA_SIG_new                wolfSSL_DSA_SIG_new
+#define DSA_SIG_free               wolfSSL_DSA_SIG_free
+#define DSA_do_sign                wolfSSL_DSA_do_sign_ex
+#define DSA_do_verify              wolfSSL_DSA_do_verify_ex
+
+
+#define DSA_SIG                    WOLFSSL_DSA_SIG
 
 #ifdef __cplusplus
     }  /* extern "C" */ 
--- a/wolfssl/openssl/ec.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ec.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ec.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -33,32 +33,39 @@
 
 /* Map OpenSSL NID value */
 enum {
+    POINT_CONVERSION_COMPRESSED = 2,
     POINT_CONVERSION_UNCOMPRESSED = 4,
 
 #ifdef HAVE_ECC
-    /* Use ecc_curve_type enum values for NID */
-    NID_X9_62_prime192v1 = ECC_SECP192R1,
-    NID_X9_62_prime256v1 = ECC_SECP256R1,
-    NID_secp112r1 = ECC_SECP112R1,
-    NID_secp112r2 = ECC_SECP112R2,
-    NID_secp128r1 = ECC_SECP128R1,
-    NID_secp128r2 = ECC_SECP128R2,
-    NID_secp160r1 = ECC_SECP160R1,
-    NID_secp160r2 = ECC_SECP160R2,
-    NID_secp224r1 = ECC_SECP224R1,
-    NID_secp384r1 = ECC_SECP384R1,
-    NID_secp521r1 = ECC_SECP521R1,
-    NID_secp160k1 = ECC_SECP160K1,
-    NID_secp192k1 = ECC_SECP192K1,
-    NID_secp224k1 = ECC_SECP224K1,
-    NID_secp256k1 = ECC_SECP256K1,
-    NID_brainpoolP160r1 = ECC_BRAINPOOLP160R1,
-    NID_brainpoolP192r1 = ECC_BRAINPOOLP192R1,
-    NID_brainpoolP224r1 = ECC_BRAINPOOLP224R1,
-    NID_brainpoolP256r1 = ECC_BRAINPOOLP256R1,
-    NID_brainpoolP320r1 = ECC_BRAINPOOLP320R1,
-    NID_brainpoolP384r1 = ECC_BRAINPOOLP384R1,
-    NID_brainpoolP512r1 = ECC_BRAINPOOLP512R1,
+    /* Use OpenSSL NIDs. NIDs can be mapped to ecc_curve_id enum values by
+        calling NIDToEccEnum() in ssl.c */
+    NID_X9_62_prime192v1 = 409,
+    NID_X9_62_prime192v2 = 410,
+    NID_X9_62_prime192v3 = 411,
+    NID_X9_62_prime239v1 = 412,
+    NID_X9_62_prime239v2 = 413,
+    NID_X9_62_prime239v3 = 414,
+    NID_X9_62_prime256v1 = 415,
+    NID_secp112r1 = 704,
+    NID_secp112r2 = 705,
+    NID_secp128r1 = 706,
+    NID_secp128r2 = 707,
+    NID_secp160r1 = 709,
+    NID_secp160r2 = 710,
+    NID_secp224r1 = 713,
+    NID_secp384r1 = 715,
+    NID_secp521r1 = 716,
+    NID_secp160k1 = 708,
+    NID_secp192k1 = 711,
+    NID_secp224k1 = 712,
+    NID_secp256k1 = 714,
+    NID_brainpoolP160r1 = 921,
+    NID_brainpoolP192r1 = 923,
+    NID_brainpoolP224r1 = 925,
+    NID_brainpoolP256r1 = 927,
+    NID_brainpoolP320r1 = 929,
+    NID_brainpoolP384r1 = 931,
+    NID_brainpoolP512r1 = 933,
 #endif
 
     OPENSSL_EC_NAMED_CURVE  = 0x001
@@ -68,12 +75,17 @@
 typedef struct WOLFSSL_EC_KEY         WOLFSSL_EC_KEY;
 typedef struct WOLFSSL_EC_POINT       WOLFSSL_EC_POINT;
 typedef struct WOLFSSL_EC_GROUP       WOLFSSL_EC_GROUP;
+typedef struct WOLFSSL_EC_BUILTIN_CURVE WOLFSSL_EC_BUILTIN_CURVE;
+/* WOLFSSL_EC_METHOD is just an alias of WOLFSSL_EC_GROUP for now */
+typedef struct WOLFSSL_EC_GROUP       WOLFSSL_EC_METHOD;
 #define WOLFSSL_EC_TYPE_DEFINED
 #endif
 
 typedef WOLFSSL_EC_KEY                EC_KEY;
 typedef WOLFSSL_EC_GROUP              EC_GROUP;
+typedef WOLFSSL_EC_GROUP              EC_METHOD;
 typedef WOLFSSL_EC_POINT              EC_POINT;
+typedef WOLFSSL_EC_BUILTIN_CURVE      EC_builtin_curve;
 
 struct WOLFSSL_EC_POINT {
     WOLFSSL_BIGNUM *X;
@@ -99,8 +111,24 @@
     void*          internal;     /* our ECC Key */
     char           inSet;        /* internal set from external ? */
     char           exSet;        /* external set from internal ? */
+    char           form;         /* Either POINT_CONVERSION_UNCOMPRESSED or
+                                  * POINT_CONVERSION_COMPRESSED */
 };
 
+struct WOLFSSL_EC_BUILTIN_CURVE {
+    int nid;
+    const char *comment;
+};
+
+#define WOLFSSL_EC_KEY_LOAD_PRIVATE 1
+#define WOLFSSL_EC_KEY_LOAD_PUBLIC  2
+
+WOLFSSL_API
+size_t wolfSSL_EC_get_builtin_curves(WOLFSSL_EC_BUILTIN_CURVE *r,size_t nitems);
+
+WOLFSSL_API
+WOLFSSL_EC_KEY *wolfSSL_EC_KEY_dup(const WOLFSSL_EC_KEY *src);
+
 WOLFSSL_API
 int wolfSSL_ECPoint_i2d(const WOLFSSL_EC_GROUP *curve,
                         const WOLFSSL_EC_POINT *p,
@@ -109,9 +137,31 @@
 int wolfSSL_ECPoint_d2i(unsigned char *in, unsigned int len,
                         const WOLFSSL_EC_GROUP *curve, WOLFSSL_EC_POINT *p);
 WOLFSSL_API
+size_t wolfSSL_EC_POINT_point2oct(const WOLFSSL_EC_GROUP *group,
+                                  const WOLFSSL_EC_POINT *p,
+                                  char form,
+                                  byte *buf, size_t len, WOLFSSL_BN_CTX *ctx);
+WOLFSSL_API
+int wolfSSL_EC_POINT_oct2point(const WOLFSSL_EC_GROUP *group,
+                               WOLFSSL_EC_POINT *p, const unsigned char *buf,
+                               size_t len, WOLFSSL_BN_CTX *ctx);
+WOLFSSL_API
+int wolfSSL_i2o_ECPublicKey(const WOLFSSL_EC_KEY *in, unsigned char **out);
+WOLFSSL_API
+void wolfSSL_EC_KEY_set_conv_form(WOLFSSL_EC_KEY *eckey, char form);
+WOLFSSL_API
+WOLFSSL_BIGNUM *wolfSSL_EC_POINT_point2bn(const WOLFSSL_EC_GROUP *group,
+                                          const WOLFSSL_EC_POINT *p,
+                                          char form,
+                                          WOLFSSL_BIGNUM *in, WOLFSSL_BN_CTX *ctx);
+
+WOLFSSL_API
 int wolfSSL_EC_KEY_LoadDer(WOLFSSL_EC_KEY* key,
                            const unsigned char* der, int derSz);
 WOLFSSL_API
+int wolfSSL_EC_KEY_LoadDer_ex(WOLFSSL_EC_KEY* key,
+                              const unsigned char* der, int derSz, int opt);
+WOLFSSL_API
 void wolfSSL_EC_KEY_free(WOLFSSL_EC_KEY *key);
 WOLFSSL_API
 WOLFSSL_EC_POINT *wolfSSL_EC_KEY_get0_public_key(const WOLFSSL_EC_KEY *key);
@@ -124,6 +174,7 @@
 WOLFSSL_BIGNUM *wolfSSL_EC_KEY_get0_private_key(const WOLFSSL_EC_KEY *key);
 WOLFSSL_API
 WOLFSSL_EC_KEY *wolfSSL_EC_KEY_new_by_curve_name(int nid);
+WOLFSSL_API const char* wolfSSL_EC_curve_nid2nist(int nid);
 WOLFSSL_API
 WOLFSSL_EC_KEY *wolfSSL_EC_KEY_new(void);
 WOLFSSL_API
@@ -135,6 +186,10 @@
 WOLFSSL_API
 int wolfSSL_EC_KEY_set_public_key(WOLFSSL_EC_KEY *key,
                                   const WOLFSSL_EC_POINT *pub);
+WOLFSSL_API int wolfSSL_ECDSA_size(const WOLFSSL_EC_KEY *key);
+WOLFSSL_API int wolfSSL_ECDSA_sign(int type, const unsigned char *digest,
+                                   int digestSz, unsigned char *sig,
+                                   unsigned int *sigSz, WOLFSSL_EC_KEY *key);
 WOLFSSL_API
 void wolfSSL_EC_GROUP_set_asn1_flag(WOLFSSL_EC_GROUP *group, int flag);
 WOLFSSL_API
@@ -150,8 +205,15 @@
 int wolfSSL_EC_GROUP_get_order(const WOLFSSL_EC_GROUP *group,
                                WOLFSSL_BIGNUM *order, WOLFSSL_BN_CTX *ctx);
 WOLFSSL_API
+int wolfSSL_EC_GROUP_order_bits(const WOLFSSL_EC_GROUP *group);
+WOLFSSL_API
 void wolfSSL_EC_GROUP_free(WOLFSSL_EC_GROUP *group);
 WOLFSSL_API
+const WOLFSSL_EC_METHOD* wolfSSL_EC_GROUP_method_of(
+                                                const WOLFSSL_EC_GROUP *group);
+WOLFSSL_API
+int wolfSSL_EC_METHOD_get_field_type(const WOLFSSL_EC_METHOD *meth);
+WOLFSSL_API
 WOLFSSL_EC_POINT *wolfSSL_EC_POINT_new(const WOLFSSL_EC_GROUP *group);
 WOLFSSL_API
 int wolfSSL_EC_POINT_get_affine_coordinates_GFp(const WOLFSSL_EC_GROUP *group,
@@ -160,6 +222,12 @@
                                                 WOLFSSL_BIGNUM *y,
                                                 WOLFSSL_BN_CTX *ctx);
 WOLFSSL_API
+int wolfSSL_EC_POINT_set_affine_coordinates_GFp(const WOLFSSL_EC_GROUP *group,
+                                                WOLFSSL_EC_POINT *point,
+                                                const WOLFSSL_BIGNUM *x,
+                                                const WOLFSSL_BIGNUM *y,
+                                                WOLFSSL_BN_CTX *ctx);
+WOLFSSL_API
 int wolfSSL_EC_POINT_mul(const WOLFSSL_EC_GROUP *group, WOLFSSL_EC_POINT *r,
                          const WOLFSSL_BIGNUM *n,
                          const WOLFSSL_EC_POINT *q, const WOLFSSL_BIGNUM *m,
@@ -170,42 +238,83 @@
 int wolfSSL_EC_POINT_cmp(const WOLFSSL_EC_GROUP *group,
                          const WOLFSSL_EC_POINT *a, const WOLFSSL_EC_POINT *b,
                          WOLFSSL_BN_CTX *ctx);
+WOLFSSL_API int wolfSSL_EC_POINT_copy(WOLFSSL_EC_POINT *dest,
+                                      const WOLFSSL_EC_POINT *src);
 WOLFSSL_API
 void wolfSSL_EC_POINT_free(WOLFSSL_EC_POINT *point);
 WOLFSSL_API
 int wolfSSL_EC_POINT_is_at_infinity(const WOLFSSL_EC_GROUP *group,
                                     const WOLFSSL_EC_POINT *a);
 
-#define EC_KEY_free wolfSSL_EC_KEY_free
-#define EC_KEY_get0_public_key wolfSSL_EC_KEY_get0_public_key
-#define EC_KEY_get0_group wolfSSL_EC_KEY_get0_group
-#define EC_KEY_set_private_key wolfSSL_EC_KEY_set_private_key
-#define EC_KEY_get0_private_key wolfSSL_EC_KEY_get0_private_key
-#define EC_KEY_new_by_curve_name wolfSSL_EC_KEY_new_by_curve_name
-#define EC_KEY_set_group wolfSSL_EC_KEY_set_group
-#define EC_KEY_generate_key wolfSSL_EC_KEY_generate_key
-#define EC_KEY_set_asn1_flag wolfSSL_EC_KEY_set_asn1_flag
-#define EC_KEY_set_public_key wolfSSL_EC_KEY_set_public_key
-#define EC_KEY_new wolfSSL_EC_KEY_new
+#ifndef HAVE_SELFTEST
+WOLFSSL_API
+char* wolfSSL_EC_POINT_point2hex(const WOLFSSL_EC_GROUP* group,
+                                 const WOLFSSL_EC_POINT* point, int form,
+                                 WOLFSSL_BN_CTX* ctx);
+#endif
+
+#ifndef HAVE_ECC
+#define OPENSSL_NO_EC
+#endif
+
+#define EC_KEY_new                      wolfSSL_EC_KEY_new
+#define EC_KEY_free                     wolfSSL_EC_KEY_free
+#define EC_KEY_dup                      wolfSSL_EC_KEY_dup
+#define EC_KEY_get0_public_key          wolfSSL_EC_KEY_get0_public_key
+#define EC_KEY_get0_group               wolfSSL_EC_KEY_get0_group
+#define EC_KEY_set_private_key          wolfSSL_EC_KEY_set_private_key
+#define EC_KEY_get0_private_key         wolfSSL_EC_KEY_get0_private_key
+#define EC_KEY_new_by_curve_name        wolfSSL_EC_KEY_new_by_curve_name
+#define EC_KEY_set_group                wolfSSL_EC_KEY_set_group
+#define EC_KEY_generate_key             wolfSSL_EC_KEY_generate_key
+#define EC_KEY_set_asn1_flag            wolfSSL_EC_KEY_set_asn1_flag
+#define EC_KEY_set_public_key           wolfSSL_EC_KEY_set_public_key
+
+#define ECDSA_size                      wolfSSL_ECDSA_size
+#define ECDSA_sign                      wolfSSL_ECDSA_sign
 
-#define EC_GROUP_set_asn1_flag wolfSSL_EC_GROUP_set_asn1_flag
-#define EC_GROUP_new_by_curve_name wolfSSL_EC_GROUP_new_by_curve_name
-#define EC_GROUP_cmp wolfSSL_EC_GROUP_cmp
-#define EC_GROUP_get_curve_name wolfSSL_EC_GROUP_get_curve_name
-#define EC_GROUP_get_degree wolfSSL_EC_GROUP_get_degree
-#define EC_GROUP_get_order wolfSSL_EC_GROUP_get_order
-#define EC_GROUP_free wolfSSL_EC_GROUP_free
+#define EC_GROUP_free                   wolfSSL_EC_GROUP_free
+#define EC_GROUP_set_asn1_flag          wolfSSL_EC_GROUP_set_asn1_flag
+#define EC_GROUP_new_by_curve_name      wolfSSL_EC_GROUP_new_by_curve_name
+#define EC_GROUP_cmp                    wolfSSL_EC_GROUP_cmp
+#define EC_GROUP_get_curve_name         wolfSSL_EC_GROUP_get_curve_name
+#define EC_GROUP_get_degree             wolfSSL_EC_GROUP_get_degree
+#define EC_GROUP_get_order              wolfSSL_EC_GROUP_get_order
+#define EC_GROUP_order_bits             wolfSSL_EC_GROUP_order_bits
+#define EC_GROUP_method_of              wolfSSL_EC_GROUP_method_of
+
+#define EC_METHOD_get_field_type        wolfSSL_EC_METHOD_get_field_type
 
-#define EC_POINT_new wolfSSL_EC_POINT_new
+#define EC_POINT_new                    wolfSSL_EC_POINT_new
+#define EC_POINT_free                   wolfSSL_EC_POINT_free
 #define EC_POINT_get_affine_coordinates_GFp \
-            wolfSSL_EC_POINT_get_affine_coordinates_GFp
-#define EC_POINT_mul wolfSSL_EC_POINT_mul
-#define EC_POINT_clear_free wolfSSL_EC_POINT_clear_free
-#define EC_POINT_cmp wolfSSL_EC_POINT_cmp
-#define EC_POINT_free wolfSSL_EC_POINT_free
-#define EC_POINT_is_at_infinity wolfSSL_EC_POINT_is_at_infinity
+                                     wolfSSL_EC_POINT_get_affine_coordinates_GFp
+#define EC_POINT_set_affine_coordinates_GFp \
+                                     wolfSSL_EC_POINT_set_affine_coordinates_GFp
+#define EC_POINT_mul                    wolfSSL_EC_POINT_mul
+#define EC_POINT_clear_free             wolfSSL_EC_POINT_clear_free
+#define EC_POINT_cmp                    wolfSSL_EC_POINT_cmp
+#define EC_POINT_copy                   wolfSSL_EC_POINT_copy
+#define EC_POINT_is_at_infinity         wolfSSL_EC_POINT_is_at_infinity
+
+#define EC_get_builtin_curves           wolfSSL_EC_get_builtin_curves
 
-#define EC_POINT_dump wolfSSL_EC_POINT_dump
+#define ECPoint_i2d                     wolfSSL_ECPoint_i2d
+#define ECPoint_d2i                     wolfSSL_ECPoint_d2i
+#define EC_POINT_point2oct              wolfSSL_EC_POINT_point2oct
+#define EC_POINT_oct2point              wolfSSL_EC_POINT_oct2point
+#define EC_POINT_point2bn               wolfSSL_EC_POINT_point2bn
+#define i2o_ECPublicKey                 wolfSSL_i2o_ECPublicKey
+#define EC_KEY_set_conv_form            wolfSSL_EC_KEY_set_conv_form
+
+#ifndef HAVE_SELFTEST
+    #define EC_POINT_point2hex          wolfSSL_EC_POINT_point2hex
+#endif
+
+#define EC_POINT_dump                   wolfSSL_EC_POINT_dump
+#define EC_get_builtin_curves           wolfSSL_EC_get_builtin_curves
+
+#define EC_curve_nid2nist               wolfSSL_EC_curve_nid2nist
 
 #ifdef __cplusplus
 }  /* extern "C" */
--- a/wolfssl/openssl/ec25519.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ec25519.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ec25519.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/ec448.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,45 @@
+/* ec448.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* ec448.h */
+
+#ifndef WOLFSSL_EC448_H_
+#define WOLFSSL_EC448_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+WOLFSSL_API
+int wolfSSL_EC448_generate_key(unsigned char *priv, unsigned int *privSz,
+                               unsigned char *pub, unsigned int *pubSz);
+
+WOLFSSL_API
+int wolfSSL_EC448_shared_key(unsigned char *shared, unsigned int *sharedSz,
+                             const unsigned char *priv, unsigned int privSz,
+                             const unsigned char *pub, unsigned int pubSz);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif /* header */
+
--- a/wolfssl/openssl/ecdh.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ecdh.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ecdh.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/openssl/ecdsa.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ecdsa.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ecdsa.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -25,6 +25,7 @@
 #define WOLFSSL_ECDSA_H_
 
 #include <wolfssl/openssl/bn.h>
+#include <wolfssl/openssl/ec.h>
 
 
 #ifdef __cplusplus
@@ -53,10 +54,18 @@
                                         const WOLFSSL_ECDSA_SIG *sig,
                                         WOLFSSL_EC_KEY *eckey);
 
-#define ECDSA_SIG_free wolfSSL_ECDSA_SIG_free
-#define ECDSA_SIG_new wolfSSL_ECDSA_SIG_new
-#define ECDSA_do_sign wolfSSL_ECDSA_do_sign
-#define ECDSA_do_verify wolfSSL_ECDSA_do_verify
+WOLFSSL_API WOLFSSL_ECDSA_SIG *wolfSSL_d2i_ECDSA_SIG(WOLFSSL_ECDSA_SIG **sig,
+                                                     const unsigned char **pp,
+                                                     long len);
+WOLFSSL_API int wolfSSL_i2d_ECDSA_SIG(const WOLFSSL_ECDSA_SIG *sig,
+                                      unsigned char **pp);
+
+#define ECDSA_SIG_free         wolfSSL_ECDSA_SIG_free
+#define ECDSA_SIG_new          wolfSSL_ECDSA_SIG_new
+#define ECDSA_do_sign          wolfSSL_ECDSA_do_sign
+#define ECDSA_do_verify        wolfSSL_ECDSA_do_verify
+#define d2i_ECDSA_SIG          wolfSSL_d2i_ECDSA_SIG
+#define i2d_ECDSA_SIG          wolfSSL_i2d_ECDSA_SIG
 
 #ifdef __cplusplus
 }  /* extern "C" */
--- a/wolfssl/openssl/ed25519.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ed25519.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ed25519.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/ed448.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,48 @@
+/* ed448.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* ed448.h */
+
+#ifndef WOLFSSL_ED448_H_
+#define WOLFSSL_ED448_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+WOLFSSL_API
+int wolfSSL_ED448_generate_key(unsigned char *priv, unsigned int *privSz,
+                               unsigned char *pub, unsigned int *pubSz);
+WOLFSSL_API
+int wolfSSL_ED448_sign(const unsigned char *msg, unsigned int msgSz,
+                       const unsigned char *priv, unsigned int privSz,
+                       unsigned char *sig, unsigned int *sigSz);
+WOLFSSL_API
+int wolfSSL_ED448_verify(const unsigned char *msg, unsigned int msgSz,
+                         const unsigned char *pub, unsigned int pubSz,
+                         const unsigned char *sig, unsigned int sigSz);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif /* header */
+
--- a/wolfssl/openssl/engine.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/engine.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,5 +1,7 @@
 /* engine.h for libcurl */
 
+#include <wolfssl/openssl/err.h>
+
 #undef HAVE_OPENSSL_ENGINE_H
 
 #define ENGINE_load_builtin_engines() /*ENGINE_load_builtin_engines not needed*/
--- a/wolfssl/openssl/err.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/err.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* err.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -22,12 +22,35 @@
 #ifndef WOLFSSL_OPENSSL_ERR_
 #define WOLFSSL_OPENSSL_ERR_
 
-#include <wolfssl/openssl/ssl.h>
+#include <wolfssl/wolfcrypt/logging.h>
 
 /* err.h for openssl */
 #define ERR_load_crypto_strings          wolfSSL_ERR_load_crypto_strings
+#define ERR_load_CRYPTO_strings          wolfSSL_ERR_load_crypto_strings
 #define ERR_peek_last_error              wolfSSL_ERR_peek_last_error
 
+/* fatal error */
+#define ERR_R_MALLOC_FAILURE                    MEMORY_E
+#define ERR_R_PASSED_NULL_PARAMETER             BAD_FUNC_ARG
+#define ERR_R_DISABLED                          NOT_COMPILED_IN
+#define ERR_R_PASSED_INVALID_ARGUMENT           BAD_FUNC_ARG
+#define RSA_R_UNKNOWN_PADDING_TYPE              RSA_PAD_E
+#define EC_R_BUFFER_TOO_SMALL                   BUFFER_E
+
+/* SSL function codes */
+#define RSA_F_RSA_OSSL_PRIVATE_ENCRYPT          1
+#define SSL_F_SSL_CTX_USE_CERTIFICATE_FILE      2
+#define SSL_F_SSL_USE_PRIVATEKEY                3
+#define EC_F_EC_GFP_SIMPLE_POINT2OCT            4
+
+/* reasons */
+#define ERR_R_SYS_LIB                           1
+#define PKCS12_R_MAC_VERIFY_FAILURE             2
+
+#define RSAerr(f,r)  ERR_put_error(0,(f),(r),__FILE__,__LINE__)
+#define SSLerr(f,r)  ERR_put_error(0,(f),(r),__FILE__,__LINE__)
+#define ECerr(f,r)   ERR_put_error(0,(f),(r),__FILE__,__LINE__)
+
 #endif /* WOLFSSL_OPENSSL_ERR_ */
 
 
--- a/wolfssl/openssl/evp.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/evp.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* evp.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -43,10 +43,12 @@
     #include <wolfssl/openssl/md5.h>
 #endif
 #include <wolfssl/openssl/sha.h>
+#include <wolfssl/openssl/sha3.h>
 #include <wolfssl/openssl/ripemd.h>
 #include <wolfssl/openssl/rsa.h>
 #include <wolfssl/openssl/dsa.h>
 #include <wolfssl/openssl/ec.h>
+#include <wolfssl/openssl/dh.h>
 
 #include <wolfssl/wolfcrypt/aes.h>
 #include <wolfssl/wolfcrypt/des3.h>
@@ -65,7 +67,8 @@
 typedef char WOLFSSL_EVP_CIPHER;
 #ifndef WOLFSSL_EVP_TYPE_DEFINED /* guard on redeclaration */
 typedef char   WOLFSSL_EVP_MD;
-typedef struct WOLFSSL_EVP_PKEY WOLFSSL_EVP_PKEY;
+typedef struct WOLFSSL_EVP_PKEY     WOLFSSL_EVP_PKEY;
+typedef struct WOLFSSL_EVP_MD_CTX   WOLFSSL_EVP_MD_CTX;
 #define WOLFSSL_EVP_TYPE_DEFINED
 #endif
 
@@ -78,6 +81,7 @@
 #ifndef NO_MD5
     WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_md5(void);
 #endif
+WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_mdc2(void);
 WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha1(void);
 WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha224(void);
 WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha256(void);
@@ -85,12 +89,46 @@
 WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha512(void);
 WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_ripemd160(void);
 
+WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_224(void);
+WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_256(void);
+WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_384(void);
+WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_512(void);
+
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ecb(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ecb(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ecb(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cbc(void);
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cbc(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cbc(void);
+#endif
+#ifndef NO_AES
+#ifdef WOLFSSL_AES_CFB
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb1(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb1(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb1(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb8(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb8(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb8(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb128(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb128(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb128(void);
+#endif
+#ifdef WOLFSSL_AES_OFB
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ofb(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ofb(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ofb(void);
+#endif
+#ifdef WOLFSSL_AES_XTS
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_xts(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_xts(void);
+#endif
+#endif /* NO_AES */
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_gcm(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_gcm(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_gcm(void);
+#endif
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ctr(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ctr(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ctr(void);
@@ -101,6 +139,7 @@
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc4(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_idea_cbc(void);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_enc_null(void);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc2_cbc(void);
 
 
 typedef union {
@@ -124,21 +163,39 @@
     #ifdef WOLFSSL_RIPEMD
         WOLFSSL_RIPEMD_CTX ripemd;
     #endif
+    #ifndef WOLFSSL_NOSHA3_224
+        WOLFSSL_SHA3_224_CTX sha3_224;
+    #endif
+    #ifndef WOLFSSL_NOSHA3_256
+        WOLFSSL_SHA3_256_CTX sha3_256;
+    #endif
+        WOLFSSL_SHA3_384_CTX sha3_384;
+    #ifndef WOLFSSL_NOSHA3_512
+        WOLFSSL_SHA3_512_CTX sha3_512;
+    #endif
 } WOLFSSL_Hasher;
 
+typedef struct WOLFSSL_EVP_PKEY_CTX WOLFSSL_EVP_PKEY_CTX;
+typedef struct WOLFSSL_EVP_CIPHER_CTX WOLFSSL_EVP_CIPHER_CTX;
 
-typedef struct WOLFSSL_EVP_MD_CTX {
+struct WOLFSSL_EVP_MD_CTX {
     union {
         WOLFSSL_Hasher digest;
+    #ifndef NO_HMAC
         Hmac hmac;
+    #endif
     } hash;
-    unsigned char macType;
-} WOLFSSL_EVP_MD_CTX;
+    int macType;
+    WOLFSSL_EVP_PKEY_CTX *pctx;
+};
 
 
 typedef union {
 #ifndef NO_AES
     Aes  aes;
+#ifdef WOLFSSL_AES_XTS
+    XtsAes xts;
+#endif
 #endif
 #ifndef NO_DES3
     Des  des;
@@ -148,6 +205,9 @@
 #ifdef HAVE_IDEA
     Idea idea;
 #endif
+#ifdef WOLFSSL_QT
+    int (*ctrl) (WOLFSSL_EVP_CIPHER_CTX *, int type, int arg, void *ptr);
+#endif
 } WOLFSSL_Cipher;
 
 
@@ -173,18 +233,65 @@
 #ifdef HAVE_IDEA
     IDEA_CBC_TYPE     = 19,
 #endif
+    AES_128_GCM_TYPE  = 21,
+    AES_192_GCM_TYPE  = 22,
+    AES_256_GCM_TYPE  = 23,
     NID_sha1          = 64,
     NID_sha224        = 65,
     NID_md2           = 77,
+    NID_md4           = 257,
     NID_md5           =  4,
     NID_hmac          = 855,
-    EVP_PKEY_HMAC     = NID_hmac
+    NID_dhKeyAgreement= 28,
+    EVP_PKEY_DH       = NID_dhKeyAgreement,
+    EVP_PKEY_HMAC     = NID_hmac,
+    AES_128_CFB1_TYPE = 24,
+    AES_192_CFB1_TYPE = 25,
+    AES_256_CFB1_TYPE = 26,
+    AES_128_CFB8_TYPE = 27,
+    AES_192_CFB8_TYPE = 28,
+    AES_256_CFB8_TYPE = 29,
+    AES_128_CFB128_TYPE = 30,
+    AES_192_CFB128_TYPE = 31,
+    AES_256_CFB128_TYPE = 32,
+    AES_128_OFB_TYPE = 33,
+    AES_192_OFB_TYPE = 34,
+    AES_256_OFB_TYPE = 35,
+    AES_128_XTS_TYPE = 36,
+    AES_256_XTS_TYPE = 37
+};
+
+enum {
+    NID_md5WithRSA    = 104,
+    NID_md5WithRSAEncryption = 8,
+    NID_dsaWithSHA1   = 113,
+    NID_dsaWithSHA1_2 = 70,
+    NID_sha1WithRSA   = 115,
+    NID_sha1WithRSAEncryption = 65,
+    NID_sha224WithRSAEncryption = 671,
+    NID_sha256WithRSAEncryption = 668,
+    NID_sha384WithRSAEncryption = 669,
+    NID_sha512WithRSAEncryption = 670,
+    NID_ecdsa_with_SHA1 = 416,
+    NID_ecdsa_with_SHA224 = 793,
+    NID_ecdsa_with_SHA256 = 794,
+    NID_ecdsa_with_SHA384 = 795,
+    NID_ecdsa_with_SHA512 = 796,
+    NID_dsa_with_SHA224 = 802,
+    NID_dsa_with_SHA256 = 803,
+    NID_sha3_224        = 1096,
+    NID_sha3_256        = 1097,
+    NID_sha3_384        = 1098,
+    NID_sha3_512        = 1099,
 };
 
 enum {
     NID_aes_128_cbc = 419,
     NID_aes_192_cbc = 423,
     NID_aes_256_cbc = 427,
+    NID_aes_128_gcm = 895,
+    NID_aes_192_gcm = 898,
+    NID_aes_256_gcm = 901,
     NID_aes_128_ctr = 904,
     NID_aes_192_ctr = 905,
     NID_aes_256_ctr = 906,
@@ -196,10 +303,29 @@
     NID_des_ede3_cbc=  44,
     NID_des_ede3_ecb=  33,
     NID_idea_cbc    =  34,
+    NID_aes_128_cfb1= 650,
+    NID_aes_192_cfb1= 651,
+    NID_aes_256_cfb1= 652,
+    NID_aes_128_cfb8= 653,
+    NID_aes_192_cfb8= 654,
+    NID_aes_256_cfb8= 655,
+    NID_aes_128_cfb128 = 421,
+    NID_aes_192_cfb128 = 425,
+    NID_aes_256_cfb128 = 429,
+    NID_aes_128_ofb = 420,
+    NID_aes_192_ofb = 424,
+    NID_aes_256_ofb = 428,
+    NID_aes_128_xts = 913,
+    NID_aes_256_xts = 914
 };
 
+#define NID_X9_62_id_ecPublicKey EVP_PKEY_EC
+#define NID_dhKeyAgreement       EVP_PKEY_DH
+#define NID_rsaEncryption        EVP_PKEY_RSA
+#define NID_dsa                  EVP_PKEY_DSA
+
 #define WOLFSSL_EVP_BUF_SIZE 16
-typedef struct WOLFSSL_EVP_CIPHER_CTX {
+struct WOLFSSL_EVP_CIPHER_CTX {
     int            keyLen;         /* user may set for variable */
     int            block_size;
     unsigned long  flags;
@@ -211,30 +337,45 @@
 #elif !defined(NO_DES3)
     /* working iv pointer into cipher */
     ALIGN16 unsigned char  iv[DES_BLOCK_SIZE];
+#elif defined(HAVE_IDEA)
+    /* working iv pointer into cipher */
+    ALIGN16 unsigned char  iv[IDEA_BLOCK_SIZE];
 #endif
     WOLFSSL_Cipher  cipher;
     ALIGN16 byte buf[WOLFSSL_EVP_BUF_SIZE];
     int  bufUsed;
     ALIGN16 byte lastBlock[WOLFSSL_EVP_BUF_SIZE];
     int  lastUsed;
-} WOLFSSL_EVP_CIPHER_CTX;
+#if !defined(NO_AES) || !defined(NO_DES3) || defined(HAVE_IDEA) || \
+    defined(HAVE_AESGCM) || defined (WOLFSSL_AES_XTS)
+#define HAVE_WOLFSSL_EVP_CIPHER_CTX_IV
+    int    ivSz;
+    ALIGN16 unsigned char authTag[AES_BLOCK_SIZE];
+    int     authTagSz;
+#endif
+};
 
-typedef struct  WOLFSSL_EVP_PKEY_CTX {
+struct WOLFSSL_EVP_PKEY_CTX {
     WOLFSSL_EVP_PKEY *pkey;
+    WOLFSSL_EVP_PKEY *peerKey;
     int op; /* operation */
     int padding;
-} WOLFSSL_EVP_PKEY_CTX;
+    int nbits;
+};
 
 typedef int WOLFSSL_ENGINE  ;
 typedef WOLFSSL_ENGINE ENGINE;
 typedef WOLFSSL_EVP_PKEY_CTX EVP_PKEY_CTX;
 
+#define EVP_PKEY_OP_SIGN    (1 << 3)
 #define EVP_PKEY_OP_ENCRYPT (1 << 6)
 #define EVP_PKEY_OP_DECRYPT (1 << 7)
+#define EVP_PKEY_OP_DERIVE  (1 << 8)
 
 WOLFSSL_API void wolfSSL_EVP_init(void);
 WOLFSSL_API int  wolfSSL_EVP_MD_size(const WOLFSSL_EVP_MD* md);
 WOLFSSL_API int  wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md);
+WOLFSSL_API int  wolfSSL_EVP_MD_block_size(const WOLFSSL_EVP_MD *md);
 
 WOLFSSL_API WOLFSSL_EVP_MD_CTX *wolfSSL_EVP_MD_CTX_new (void);
 WOLFSSL_API void                wolfSSL_EVP_MD_CTX_free(WOLFSSL_EVP_MD_CTX* ctx);
@@ -243,9 +384,12 @@
 WOLFSSL_API int  wolfSSL_EVP_MD_CTX_copy(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in);
 WOLFSSL_API int  wolfSSL_EVP_MD_CTX_copy_ex(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in);
 WOLFSSL_API int  wolfSSL_EVP_MD_CTX_type(const WOLFSSL_EVP_MD_CTX *ctx);
+WOLFSSL_API int  wolfSSL_EVP_MD_CTX_size(const WOLFSSL_EVP_MD_CTX *ctx);
+WOLFSSL_API int  wolfSSL_EVP_MD_CTX_block_size(const WOLFSSL_EVP_MD_CTX *ctx);
 WOLFSSL_API const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx);
 WOLFSSL_API const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbyname(const char *name);
 WOLFSSL_API const WOLFSSL_EVP_MD     *wolfSSL_EVP_get_digestbyname(const char *name);
+WOLFSSL_API int wolfSSL_EVP_CIPHER_nid(const WOLFSSL_EVP_CIPHER *cipher);
 
 WOLFSSL_API int wolfSSL_EVP_DigestInit(WOLFSSL_EVP_MD_CTX* ctx,
                                      const WOLFSSL_EVP_MD* type);
@@ -269,6 +413,21 @@
 WOLFSSL_API int wolfSSL_EVP_DigestSignFinal(WOLFSSL_EVP_MD_CTX *ctx,
                                             unsigned char *sig, size_t *siglen);
 
+WOLFSSL_API int wolfSSL_EVP_DigestVerifyInit(WOLFSSL_EVP_MD_CTX *ctx,
+                                             WOLFSSL_EVP_PKEY_CTX **pctx,
+                                             const WOLFSSL_EVP_MD *type,
+                                             WOLFSSL_ENGINE *e,
+                                             WOLFSSL_EVP_PKEY *pkey);
+WOLFSSL_API int wolfSSL_EVP_DigestVerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx,
+                                               const void *d, size_t cnt);
+WOLFSSL_API int wolfSSL_EVP_DigestVerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
+                                              const unsigned char *sig,
+                                              size_t siglen);
+WOLFSSL_API int wolfSSL_EVP_Digest(const unsigned char* in, int inSz, unsigned char* out,
+                              unsigned int* outSz, const WOLFSSL_EVP_MD* evp,
+                              WOLFSSL_ENGINE* eng);
+
+
 WOLFSSL_API int wolfSSL_EVP_BytesToKey(const WOLFSSL_EVP_CIPHER*,
                               const WOLFSSL_EVP_MD*, const unsigned char*,
                               const unsigned char*, int, int, unsigned char*,
@@ -276,7 +435,8 @@
 
 WOLFSSL_API void wolfSSL_EVP_CIPHER_CTX_init(WOLFSSL_EVP_CIPHER_CTX* ctx);
 WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_cleanup(WOLFSSL_EVP_CIPHER_CTX* ctx);
-
+WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_ctrl(WOLFSSL_EVP_CIPHER_CTX *ctx, \
+                                             int type, int arg, void *ptr);
 WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_iv_length(const WOLFSSL_EVP_CIPHER_CTX*);
 WOLFSSL_API int  wolfSSL_EVP_CIPHER_iv_length(const WOLFSSL_EVP_CIPHER*);
 WOLFSSL_API int wolfSSL_EVP_Cipher_key_length(const WOLFSSL_EVP_CIPHER* c);
@@ -326,32 +486,67 @@
                                    unsigned char *out, int *outl);
 WOLFSSL_API int  wolfSSL_EVP_DecryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
                                    unsigned char *out, int *outl);
+WOLFSSL_API int  wolfSSL_EVP_DecryptFinal_legacy(WOLFSSL_EVP_CIPHER_CTX *ctx,
+                                   unsigned char *out, int *outl);
 
 WOLFSSL_API WOLFSSL_EVP_CIPHER_CTX *wolfSSL_EVP_CIPHER_CTX_new(void);
 WOLFSSL_API void wolfSSL_EVP_CIPHER_CTX_free(WOLFSSL_EVP_CIPHER_CTX *ctx);
+WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_reset(WOLFSSL_EVP_CIPHER_CTX *ctx);
 WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx);
 WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_set_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
                                                      int keylen);
+WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_set_iv_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
+                                                     int ivLen);
+WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_set_iv(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* iv,
+                                                     int ivLen);
 WOLFSSL_API int  wolfSSL_EVP_Cipher(WOLFSSL_EVP_CIPHER_CTX* ctx,
                           unsigned char* dst, unsigned char* src,
                           unsigned int len);
 
 WOLFSSL_API const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_get_cipherbynid(int);
 WOLFSSL_API const WOLFSSL_EVP_MD* wolfSSL_EVP_get_digestbynid(int);
+WOLFSSL_API const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_CIPHER_CTX_cipher(const WOLFSSL_EVP_CIPHER_CTX *ctx);
 
+WOLFSSL_API int wolfSSL_EVP_PKEY_assign_RSA(WOLFSSL_EVP_PKEY* pkey,
+                                            WOLFSSL_RSA* key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_assign_EC_KEY(WOLFSSL_EVP_PKEY* pkey,
+                                               WOLFSSL_EC_KEY* key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_assign_DSA(EVP_PKEY* pkey, WOLFSSL_DSA* key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_assign_DH(EVP_PKEY* pkey, WOLFSSL_DH* key);
+WOLFSSL_API WOLFSSL_RSA* wolfSSL_EVP_PKEY_get0_RSA(struct WOLFSSL_EVP_PKEY *pkey);
 WOLFSSL_API WOLFSSL_RSA* wolfSSL_EVP_PKEY_get1_RSA(WOLFSSL_EVP_PKEY*);
 WOLFSSL_API WOLFSSL_DSA* wolfSSL_EVP_PKEY_get1_DSA(WOLFSSL_EVP_PKEY*);
+WOLFSSL_API WOLFSSL_EC_KEY *wolfSSL_EVP_PKEY_get0_EC_KEY(WOLFSSL_EVP_PKEY *pkey);
 WOLFSSL_API WOLFSSL_EC_KEY *wolfSSL_EVP_PKEY_get1_EC_KEY(WOLFSSL_EVP_PKEY *key);
+WOLFSSL_API WOLFSSL_DH* wolfSSL_EVP_PKEY_get0_DH(WOLFSSL_EVP_PKEY* key);
+WOLFSSL_API WOLFSSL_DH* wolfSSL_EVP_PKEY_get1_DH(WOLFSSL_EVP_PKEY* key);
 WOLFSSL_API int wolfSSL_EVP_PKEY_set1_RSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_RSA *key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_set1_DSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DSA *key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_set1_DH(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DH *key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_set1_EC_KEY(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_EC_KEY *key);
+WOLFSSL_API int wolfSSL_EVP_PKEY_assign(WOLFSSL_EVP_PKEY *pkey, int type, void *key);
 
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_mac_key(int type, ENGINE* e,
                                           const unsigned char* key, int keylen);
 WOLFSSL_API const unsigned char* wolfSSL_EVP_PKEY_get0_hmac(const WOLFSSL_EVP_PKEY* pkey,
         size_t* len);
+WOLFSSL_API int wolfSSL_EVP_PKEY_sign_init(WOLFSSL_EVP_PKEY_CTX *ctx);
+WOLFSSL_API int wolfSSL_EVP_PKEY_sign(WOLFSSL_EVP_PKEY_CTX *ctx,
+  unsigned char *sig, size_t *siglen, const unsigned char *tbs, size_t tbslen);
+WOLFSSL_API int wolfSSL_EVP_PKEY_keygen_init(WOLFSSL_EVP_PKEY_CTX *ctx);
+WOLFSSL_API int wolfSSL_EVP_PKEY_keygen(WOLFSSL_EVP_PKEY_CTX *ctx,
+  WOLFSSL_EVP_PKEY **ppkey);
 WOLFSSL_API int wolfSSL_EVP_PKEY_bits(const WOLFSSL_EVP_PKEY *pkey);
 WOLFSSL_API int wolfSSL_EVP_PKEY_CTX_free(WOLFSSL_EVP_PKEY_CTX *ctx);
 WOLFSSL_API WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_ENGINE *e);
 WOLFSSL_API int wolfSSL_EVP_PKEY_CTX_set_rsa_padding(WOLFSSL_EVP_PKEY_CTX *ctx, int padding);
+WOLFSSL_API WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new_id(int id, WOLFSSL_ENGINE *e);
+WOLFSSL_API int wolfSSL_EVP_PKEY_CTX_set_rsa_keygen_bits(WOLFSSL_EVP_PKEY_CTX *ctx, int bits);
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_derive_init(WOLFSSL_EVP_PKEY_CTX *ctx);
+WOLFSSL_API int wolfSSL_EVP_PKEY_derive_set_peer(WOLFSSL_EVP_PKEY_CTX *ctx, WOLFSSL_EVP_PKEY *peer);
+WOLFSSL_API int wolfSSL_EVP_PKEY_derive(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen);
+
 WOLFSSL_API int wolfSSL_EVP_PKEY_decrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
                      unsigned char *out, size_t *outlen,
                      const unsigned char *in, size_t inlen);
@@ -361,13 +556,20 @@
                      const unsigned char *in, size_t inlen);
 WOLFSSL_API int wolfSSL_EVP_PKEY_encrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx);
 WOLFSSL_API WOLFSSL_EVP_PKEY *wolfSSL_EVP_PKEY_new(void);
+WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_ex(void* heap);
 WOLFSSL_API void wolfSSL_EVP_PKEY_free(WOLFSSL_EVP_PKEY*);
 WOLFSSL_API int wolfSSL_EVP_PKEY_size(WOLFSSL_EVP_PKEY *pkey);
+WOLFSSL_API int wolfSSL_EVP_PKEY_missing_parameters(WOLFSSL_EVP_PKEY *pkey);
+WOLFSSL_API int wolfSSL_EVP_PKEY_cmp(const WOLFSSL_EVP_PKEY *a, const WOLFSSL_EVP_PKEY *b);
 WOLFSSL_API int wolfSSL_EVP_PKEY_type(int type);
+WOLFSSL_API int wolfSSL_EVP_PKEY_id(const EVP_PKEY *pkey);
 WOLFSSL_API int wolfSSL_EVP_PKEY_base_id(const EVP_PKEY *pkey);
 WOLFSSL_API int wolfSSL_EVP_SignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sigret,
                   unsigned int *siglen, WOLFSSL_EVP_PKEY *pkey);
 WOLFSSL_API int wolfSSL_EVP_SignInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type);
+WOLFSSL_API int wolfSSL_EVP_SignInit_ex(WOLFSSL_EVP_MD_CTX* ctx,
+                                     const WOLFSSL_EVP_MD* type,
+                                     WOLFSSL_ENGINE *impl);
 WOLFSSL_API int wolfSSL_EVP_SignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len);
 WOLFSSL_API int wolfSSL_EVP_VerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
         unsigned char* sig, unsigned int sig_len, WOLFSSL_EVP_PKEY *pkey);
@@ -393,22 +595,31 @@
 WOLFSSL_API unsigned long WOLFSSL_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher);
 WOLFSSL_API unsigned long wolfSSL_EVP_CIPHER_flags(const WOLFSSL_EVP_CIPHER *cipher);
 WOLFSSL_API void wolfSSL_EVP_CIPHER_CTX_set_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags);
+WOLFSSL_API void wolfSSL_EVP_CIPHER_CTX_clear_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags);
 WOLFSSL_API unsigned long wolfSSL_EVP_CIPHER_CTX_mode(const WOLFSSL_EVP_CIPHER_CTX *ctx);
 WOLFSSL_API int  wolfSSL_EVP_CIPHER_CTX_set_padding(WOLFSSL_EVP_CIPHER_CTX *c, int pad);
 WOLFSSL_API int  wolfSSL_EVP_add_digest(const WOLFSSL_EVP_MD *digest);
 WOLFSSL_API int  wolfSSL_EVP_add_cipher(const WOLFSSL_EVP_CIPHER *cipher);
 WOLFSSL_API void wolfSSL_EVP_cleanup(void);
 WOLFSSL_API int  wolfSSL_add_all_algorithms(void);
-
-#ifdef OPENSSL_EXTRA
-WOLFSSL_API int  wolfSSL_OPENSSL_add_all_algorithms_noconf(void);
-#endif
+WOLFSSL_API int  wolfSSL_OpenSSL_add_all_algorithms_conf(void);
+WOLFSSL_API int  wolfSSL_OpenSSL_add_all_algorithms_noconf(void);
+WOLFSSL_API int wolfSSL_EVP_read_pw_string(char*, int, const char*, int);
 
 WOLFSSL_API int wolfSSL_PKCS5_PBKDF2_HMAC_SHA1(const char * pass, int passlen,
                                                const unsigned char * salt,
                                                int saltlen, int iter,
                                                int keylen, unsigned char *out);
 
+WOLFSSL_API int wolfSSL_PKCS5_PBKDF2_HMAC(const char *pass, int passlen,
+                                           const unsigned char *salt,
+                                           int saltlen, int iter,
+                                           const WOLFSSL_EVP_MD *digest,
+                                           int keylen, unsigned char *out);
+
+WOLFSSL_LOCAL int wolfSSL_EVP_get_hashinfo(const WOLFSSL_EVP_MD* evp,
+                                           int* pHash, int* pHashSz);
+
 #define EVP_CIPH_STREAM_CIPHER WOLFSSL_EVP_CIPH_STREAM_CIPHER
 #define EVP_CIPH_ECB_MODE WOLFSSL_EVP_CIPH_ECB_MODE
 #define EVP_CIPH_CBC_MODE WOLFSSL_EVP_CIPH_CBC_MODE
@@ -417,6 +628,7 @@
 #define EVP_CIPH_CTR_MODE WOLFSSL_EVP_CIPH_CTR_MODE
 #define EVP_CIPH_GCM_MODE WOLFSSL_EVP_CIPH_GCM_MODE
 #define EVP_CIPH_CCM_MODE WOLFSSL_EVP_CIPH_CCM_MODE
+#define EVP_CIPH_XTS_MODE WOLFSSL_EVP_CIPH_XTS_MODE
 
 #define WOLFSSL_EVP_CIPH_MODE           0x0007
 #define WOLFSSL_EVP_CIPH_STREAM_CIPHER      0x0
@@ -427,7 +639,9 @@
 #define WOLFSSL_EVP_CIPH_CTR_MODE           0x5
 #define WOLFSSL_EVP_CIPH_GCM_MODE           0x6
 #define WOLFSSL_EVP_CIPH_CCM_MODE           0x7
+#define WOLFSSL_EVP_CIPH_XTS_MODE          0x10
 #define WOLFSSL_EVP_CIPH_NO_PADDING       0x100
+#define EVP_CIPH_VARIABLE_LENGTH          0x200
 #define WOLFSSL_EVP_CIPH_TYPE_INIT         0xff
 
 /* end OpenSSH compat */
@@ -444,6 +658,7 @@
     #define EVP_md5       wolfSSL_EVP_md5
 #endif
 #define EVP_sha1      wolfSSL_EVP_sha1
+#define EVP_mdc2      wolfSSL_EVP_mdc2
 #define EVP_dds1      wolfSSL_EVP_sha1
 #define EVP_sha224    wolfSSL_EVP_sha224
 #define EVP_sha256    wolfSSL_EVP_sha256
@@ -451,43 +666,72 @@
 #define EVP_sha512    wolfSSL_EVP_sha512
 #define EVP_ripemd160 wolfSSL_EVP_ripemd160
 
-#define EVP_aes_128_cbc  wolfSSL_EVP_aes_128_cbc
-#define EVP_aes_192_cbc  wolfSSL_EVP_aes_192_cbc
-#define EVP_aes_256_cbc  wolfSSL_EVP_aes_256_cbc
-#define EVP_aes_128_ecb  wolfSSL_EVP_aes_128_ecb
-#define EVP_aes_192_ecb  wolfSSL_EVP_aes_192_ecb
-#define EVP_aes_256_ecb  wolfSSL_EVP_aes_256_ecb
-#define EVP_aes_128_ctr  wolfSSL_EVP_aes_128_ctr
-#define EVP_aes_192_ctr  wolfSSL_EVP_aes_192_ctr
-#define EVP_aes_256_ctr  wolfSSL_EVP_aes_256_ctr
-#define EVP_des_cbc      wolfSSL_EVP_des_cbc
-#define EVP_des_ecb      wolfSSL_EVP_des_ecb
-#define EVP_des_ede3_cbc wolfSSL_EVP_des_ede3_cbc
-#define EVP_des_ede3_ecb wolfSSL_EVP_des_ede3_ecb
-#define EVP_rc4          wolfSSL_EVP_rc4
-#define EVP_idea_cbc     wolfSSL_EVP_idea_cbc
-#define EVP_enc_null     wolfSSL_EVP_enc_null
+#define EVP_sha3_224    wolfSSL_EVP_sha3_224
+#define EVP_sha3_256    wolfSSL_EVP_sha3_256
+#define EVP_sha3_384    wolfSSL_EVP_sha3_384
+#define EVP_sha3_512    wolfSSL_EVP_sha3_512
 
-#define EVP_MD_size        wolfSSL_EVP_MD_size
-#define EVP_MD_CTX_new     wolfSSL_EVP_MD_CTX_new
-#define EVP_MD_CTX_create  wolfSSL_EVP_MD_CTX_new
-#define EVP_MD_CTX_free    wolfSSL_EVP_MD_CTX_free
-#define EVP_MD_CTX_destroy wolfSSL_EVP_MD_CTX_free
-#define EVP_MD_CTX_init    wolfSSL_EVP_MD_CTX_init
-#define EVP_MD_CTX_cleanup wolfSSL_EVP_MD_CTX_cleanup
-#define EVP_MD_CTX_md      wolfSSL_EVP_MD_CTX_md
-#define EVP_MD_CTX_type    wolfSSL_EVP_MD_CTX_type
-#define EVP_MD_type        wolfSSL_EVP_MD_type
+#define EVP_aes_128_cbc    wolfSSL_EVP_aes_128_cbc
+#define EVP_aes_192_cbc    wolfSSL_EVP_aes_192_cbc
+#define EVP_aes_256_cbc    wolfSSL_EVP_aes_256_cbc
+#define EVP_aes_128_cfb1   wolfSSL_EVP_aes_128_cfb1
+#define EVP_aes_192_cfb1   wolfSSL_EVP_aes_192_cfb1
+#define EVP_aes_256_cfb1   wolfSSL_EVP_aes_256_cfb1
+#define EVP_aes_128_cfb8   wolfSSL_EVP_aes_128_cfb8
+#define EVP_aes_192_cfb8   wolfSSL_EVP_aes_192_cfb8
+#define EVP_aes_256_cfb8   wolfSSL_EVP_aes_256_cfb8
+#define EVP_aes_128_cfb128 wolfSSL_EVP_aes_128_cfb128
+#define EVP_aes_192_cfb128 wolfSSL_EVP_aes_192_cfb128
+#define EVP_aes_256_cfb128 wolfSSL_EVP_aes_256_cfb128
+#define EVP_aes_128_ofb    wolfSSL_EVP_aes_128_ofb
+#define EVP_aes_192_ofb    wolfSSL_EVP_aes_192_ofb
+#define EVP_aes_256_ofb    wolfSSL_EVP_aes_256_ofb
+#define EVP_aes_128_xts    wolfSSL_EVP_aes_128_xts
+#define EVP_aes_256_xts    wolfSSL_EVP_aes_256_xts
+#define EVP_aes_128_gcm    wolfSSL_EVP_aes_128_gcm
+#define EVP_aes_192_gcm    wolfSSL_EVP_aes_192_gcm
+#define EVP_aes_256_gcm    wolfSSL_EVP_aes_256_gcm
+#define EVP_aes_128_ecb    wolfSSL_EVP_aes_128_ecb
+#define EVP_aes_192_ecb    wolfSSL_EVP_aes_192_ecb
+#define EVP_aes_256_ecb    wolfSSL_EVP_aes_256_ecb
+#define EVP_aes_128_ctr    wolfSSL_EVP_aes_128_ctr
+#define EVP_aes_192_ctr    wolfSSL_EVP_aes_192_ctr
+#define EVP_aes_256_ctr    wolfSSL_EVP_aes_256_ctr
+#define EVP_des_cbc        wolfSSL_EVP_des_cbc
+#define EVP_des_ecb        wolfSSL_EVP_des_ecb
+#define EVP_des_ede3_cbc   wolfSSL_EVP_des_ede3_cbc
+#define EVP_des_ede3_ecb   wolfSSL_EVP_des_ede3_ecb
+#define EVP_rc4            wolfSSL_EVP_rc4
+#define EVP_idea_cbc       wolfSSL_EVP_idea_cbc
+#define EVP_enc_null       wolfSSL_EVP_enc_null
 
-#define EVP_DigestInit     wolfSSL_EVP_DigestInit
-#define EVP_DigestInit_ex  wolfSSL_EVP_DigestInit_ex
-#define EVP_DigestUpdate   wolfSSL_EVP_DigestUpdate
-#define EVP_DigestFinal    wolfSSL_EVP_DigestFinal
-#define EVP_DigestFinal_ex wolfSSL_EVP_DigestFinal_ex
-#define EVP_DigestSignInit   wolfSSL_EVP_DigestSignInit
-#define EVP_DigestSignUpdate wolfSSL_EVP_DigestSignUpdate
-#define EVP_DigestSignFinal  wolfSSL_EVP_DigestSignFinal
-#define EVP_BytesToKey     wolfSSL_EVP_BytesToKey
+#define EVP_MD_size             wolfSSL_EVP_MD_size
+#define EVP_MD_CTX_new          wolfSSL_EVP_MD_CTX_new
+#define EVP_MD_CTX_create       wolfSSL_EVP_MD_CTX_new
+#define EVP_MD_CTX_free         wolfSSL_EVP_MD_CTX_free
+#define EVP_MD_CTX_destroy      wolfSSL_EVP_MD_CTX_free
+#define EVP_MD_CTX_init         wolfSSL_EVP_MD_CTX_init
+#define EVP_MD_CTX_cleanup      wolfSSL_EVP_MD_CTX_cleanup
+#define EVP_MD_CTX_reset        wolfSSL_EVP_MD_CTX_cleanup
+#define EVP_MD_CTX_md           wolfSSL_EVP_MD_CTX_md
+#define EVP_MD_CTX_type         wolfSSL_EVP_MD_CTX_type
+#define EVP_MD_CTX_size         wolfSSL_EVP_MD_CTX_size
+#define EVP_MD_CTX_block_size   wolfSSL_EVP_MD_CTX_block_size
+#define EVP_MD_type             wolfSSL_EVP_MD_type
+
+#define EVP_Digest             wolfSSL_EVP_Digest
+#define EVP_DigestInit         wolfSSL_EVP_DigestInit
+#define EVP_DigestInit_ex      wolfSSL_EVP_DigestInit_ex
+#define EVP_DigestUpdate       wolfSSL_EVP_DigestUpdate
+#define EVP_DigestFinal        wolfSSL_EVP_DigestFinal
+#define EVP_DigestFinal_ex     wolfSSL_EVP_DigestFinal_ex
+#define EVP_DigestSignInit     wolfSSL_EVP_DigestSignInit
+#define EVP_DigestSignUpdate   wolfSSL_EVP_DigestSignUpdate
+#define EVP_DigestSignFinal    wolfSSL_EVP_DigestSignFinal
+#define EVP_DigestVerifyInit   wolfSSL_EVP_DigestVerifyInit
+#define EVP_DigestVerifyUpdate wolfSSL_EVP_DigestVerifyUpdate
+#define EVP_DigestVerifyFinal  wolfSSL_EVP_DigestVerifyFinal
+#define EVP_BytesToKey         wolfSSL_EVP_BytesToKey
 
 #define EVP_get_cipherbyname wolfSSL_EVP_get_cipherbyname
 #define EVP_get_digestbyname wolfSSL_EVP_get_digestbyname
@@ -498,6 +742,7 @@
 #define EVP_CIPHER_CTX_key_length     wolfSSL_EVP_CIPHER_CTX_key_length
 #define EVP_CIPHER_CTX_set_key_length wolfSSL_EVP_CIPHER_CTX_set_key_length
 #define EVP_CIPHER_CTX_mode           wolfSSL_EVP_CIPHER_CTX_mode
+#define EVP_CIPHER_CTX_cipher         wolfSSL_EVP_CIPHER_CTX_cipher
 
 #define EVP_CIPHER_iv_length          wolfSSL_EVP_CIPHER_iv_length
 #define EVP_CIPHER_key_length         wolfSSL_EVP_Cipher_key_length
@@ -521,6 +766,7 @@
 #define EVP_DecryptFinal_ex           wolfSSL_EVP_CipherFinal
 
 #define EVP_CIPHER_CTX_free           wolfSSL_EVP_CIPHER_CTX_free
+#define EVP_CIPHER_CTX_reset          wolfSSL_EVP_CIPHER_CTX_reset
 #define EVP_CIPHER_CTX_new            wolfSSL_EVP_CIPHER_CTX_new
 
 #define EVP_get_cipherbynid           wolfSSL_EVP_get_cipherbynid
@@ -528,66 +774,158 @@
 #define EVP_get_cipherbyname          wolfSSL_EVP_get_cipherbyname
 #define EVP_get_digestbyname          wolfSSL_EVP_get_digestbyname
 
-#define EVP_PKEY_get1_RSA   wolfSSL_EVP_PKEY_get1_RSA
-#define EVP_PKEY_get1_DSA   wolfSSL_EVP_PKEY_get1_DSA
-#define EVP_PKEY_set1_RSA   wolfSSL_EVP_PKEY_set1_RSA
-#define EVP_PKEY_get1_EC_KEY wolfSSL_EVP_PKEY_get1_EC_KEY
-#define EVP_PKEY_get0_hmac   wolfSSL_EVP_PKEY_get0_hmac
-#define EVP_PKEY_new_mac_key wolfSSL_EVP_PKEY_new_mac_key
-#define EVP_MD_CTX_copy     wolfSSL_EVP_MD_CTX_copy
-#define EVP_MD_CTX_copy_ex  wolfSSL_EVP_MD_CTX_copy_ex
-#define EVP_PKEY_bits       wolfSSL_EVP_PKEY_bits
-#define EVP_PKEY_CTX_free   wolfSSL_EVP_PKEY_CTX_free
-#define EVP_PKEY_CTX_new    wolfSSL_EVP_PKEY_CTX_new
-#define EVP_PKEY_CTX_set_rsa_padding wolfSSL_EVP_PKEY_CTX_set_rsa_padding
-#define EVP_PKEY_decrypt    wolfSSL_EVP_PKEY_decrypt
-#define EVP_PKEY_decrypt_init wolfSSL_EVP_PKEY_decrypt_init
-#define EVP_PKEY_encrypt    wolfSSL_EVP_PKEY_encrypt
-#define EVP_PKEY_encrypt_init wolfSSL_EVP_PKEY_encrypt_init
-#define EVP_PKEY_new        wolfSSL_PKEY_new
-#define EVP_PKEY_free       wolfSSL_EVP_PKEY_free
-#define EVP_PKEY_size       wolfSSL_EVP_PKEY_size
-#define EVP_PKEY_type       wolfSSL_EVP_PKEY_type
-#define EVP_PKEY_base_id    wolfSSL_EVP_PKEY_base_id
-#define EVP_SignFinal       wolfSSL_EVP_SignFinal
-#define EVP_SignInit        wolfSSL_EVP_SignInit
-#define EVP_SignUpdate      wolfSSL_EVP_SignUpdate
-#define EVP_VerifyFinal     wolfSSL_EVP_VerifyFinal
-#define EVP_VerifyInit      wolfSSL_EVP_VerifyInit
-#define EVP_VerifyUpdate    wolfSSL_EVP_VerifyUpdate
+#define EVP_PKEY_assign                wolfSSL_EVP_PKEY_assign
+#define EVP_PKEY_assign_RSA            wolfSSL_EVP_PKEY_assign_RSA
+#define EVP_PKEY_assign_DSA            wolfSSL_EVP_PKEY_assign_DSA
+#define EVP_PKEY_assign_DH             wolfSSL_EVP_PKEY_assign_DH
+#define EVP_PKEY_assign_EC_KEY         wolfSSL_EVP_PKEY_assign_EC_KEY
+#define EVP_PKEY_get1_DSA              wolfSSL_EVP_PKEY_get1_DSA
+#define EVP_PKEY_set1_DSA              wolfSSL_EVP_PKEY_set1_DSA
+#define EVP_PKEY_get0_RSA              wolfSSL_EVP_PKEY_get0_RSA
+#define EVP_PKEY_get1_RSA              wolfSSL_EVP_PKEY_get1_RSA
+#define EVP_PKEY_set1_RSA              wolfSSL_EVP_PKEY_set1_RSA
+#define EVP_PKEY_set1_EC_KEY           wolfSSL_EVP_PKEY_set1_EC_KEY
+#define EVP_PKEY_get1_EC_KEY           wolfSSL_EVP_PKEY_get1_EC_KEY
+#define EVP_PKEY_set1_DH               wolfSSL_EVP_PKEY_set1_DH
+#define EVP_PKEY_get0_DH               wolfSSL_EVP_PKEY_get0_DH
+#define EVP_PKEY_get1_DH               wolfSSL_EVP_PKEY_get1_DH
+#define EVP_PKEY_get0_EC_KEY           wolfSSL_EVP_PKEY_get0_EC_KEY
+#define EVP_PKEY_get0_hmac             wolfSSL_EVP_PKEY_get0_hmac
+#define EVP_PKEY_new_mac_key           wolfSSL_EVP_PKEY_new_mac_key
+#define EVP_MD_CTX_copy                wolfSSL_EVP_MD_CTX_copy
+#define EVP_MD_CTX_copy_ex             wolfSSL_EVP_MD_CTX_copy_ex
+#define EVP_PKEY_sign_init             wolfSSL_EVP_PKEY_sign_init
+#define EVP_PKEY_sign                  wolfSSL_EVP_PKEY_sign
+#define EVP_PKEY_keygen                wolfSSL_EVP_PKEY_keygen
+#define EVP_PKEY_keygen_init           wolfSSL_EVP_PKEY_keygen_init
+#define EVP_PKEY_bits                  wolfSSL_EVP_PKEY_bits
+#define EVP_PKEY_CTX_free              wolfSSL_EVP_PKEY_CTX_free
+#define EVP_PKEY_CTX_new               wolfSSL_EVP_PKEY_CTX_new
+#define EVP_PKEY_CTX_set_rsa_padding   wolfSSL_EVP_PKEY_CTX_set_rsa_padding
+#define EVP_PKEY_CTX_new_id            wolfSSL_EVP_PKEY_CTX_new_id
+#define EVP_PKEY_CTX_set_rsa_keygen_bits wolfSSL_EVP_PKEY_CTX_set_rsa_keygen_bits
+#define EVP_PKEY_derive_init           wolfSSL_EVP_PKEY_derive_init
+#define EVP_PKEY_derive_set_peer       wolfSSL_EVP_PKEY_derive_set_peer
+#define EVP_PKEY_derive                wolfSSL_EVP_PKEY_derive
+#define EVP_PKEY_decrypt               wolfSSL_EVP_PKEY_decrypt
+#define EVP_PKEY_decrypt_init          wolfSSL_EVP_PKEY_decrypt_init
+#define EVP_PKEY_encrypt               wolfSSL_EVP_PKEY_encrypt
+#define EVP_PKEY_encrypt_init          wolfSSL_EVP_PKEY_encrypt_init
+#define EVP_PKEY_new                   wolfSSL_EVP_PKEY_new
+#define EVP_PKEY_free                  wolfSSL_EVP_PKEY_free
+#define EVP_PKEY_up_ref                wolfSSL_EVP_PKEY_up_ref
+#define EVP_PKEY_size                  wolfSSL_EVP_PKEY_size
+#define EVP_PKEY_missing_parameters    wolfSSL_EVP_PKEY_missing_parameters
+#define EVP_PKEY_cmp                   wolfSSL_EVP_PKEY_cmp
+#define EVP_PKEY_type                  wolfSSL_EVP_PKEY_type
+#define EVP_PKEY_base_id               wolfSSL_EVP_PKEY_base_id
+#define EVP_PKEY_id                    wolfSSL_EVP_PKEY_id
+#define EVP_SignFinal                  wolfSSL_EVP_SignFinal
+#define EVP_SignInit                   wolfSSL_EVP_SignInit
+#define EVP_SignInit_ex                wolfSSL_EVP_SignInit_ex
+#define EVP_SignUpdate                 wolfSSL_EVP_SignUpdate
+#define EVP_VerifyFinal                wolfSSL_EVP_VerifyFinal
+#define EVP_VerifyInit                 wolfSSL_EVP_VerifyInit
+#define EVP_VerifyUpdate               wolfSSL_EVP_VerifyUpdate
 
+#define EVP_CIPHER_CTX_ctrl        wolfSSL_EVP_CIPHER_CTX_ctrl
 #define EVP_CIPHER_CTX_block_size  wolfSSL_EVP_CIPHER_CTX_block_size
 #define EVP_CIPHER_block_size      wolfSSL_EVP_CIPHER_block_size
 #define EVP_CIPHER_flags           wolfSSL_EVP_CIPHER_flags
 #define EVP_CIPHER_CTX_set_flags   wolfSSL_EVP_CIPHER_CTX_set_flags
+#define EVP_CIPHER_CTX_clear_flags wolfSSL_EVP_CIPHER_CTX_clear_flags
 #define EVP_CIPHER_CTX_set_padding wolfSSL_EVP_CIPHER_CTX_set_padding
 #define EVP_CIPHER_CTX_flags       wolfSSL_EVP_CIPHER_CTX_flags
+#define EVP_CIPHER_CTX_set_iv      wolfSSL_EVP_CIPHER_CTX_set_iv
 #define EVP_add_digest             wolfSSL_EVP_add_digest
 #define EVP_add_cipher             wolfSSL_EVP_add_cipher
 #define EVP_cleanup                wolfSSL_EVP_cleanup
+#define EVP_read_pw_string         wolfSSL_EVP_read_pw_string
+#define EVP_rc2_cbc                wolfSSL_EVP_rc2_cbc
 
-#define OpenSSL_add_all_digests()  wolfCrypt_Init()
-#define OpenSSL_add_all_ciphers()  wolfCrypt_Init()
+#define OpenSSL_add_all_digests()  wolfSSL_EVP_init()
+#define OpenSSL_add_all_ciphers()  wolfSSL_EVP_init()
 #define OpenSSL_add_all_algorithms wolfSSL_add_all_algorithms
-#define OPENSSL_add_all_algorithms_noconf wolfSSL_OPENSSL_add_all_algorithms_noconf
+#define OpenSSL_add_all_algorithms_noconf wolfSSL_OpenSSL_add_all_algorithms_noconf
+#define OpenSSL_add_all_algorithms_conf   wolfSSL_OpenSSL_add_all_algorithms_conf
+
+#define wolfSSL_OPENSSL_add_all_algorithms_noconf wolfSSL_OpenSSL_add_all_algorithms_noconf
+#define wolfSSL_OPENSSL_add_all_algorithms_conf   wolfSSL_OpenSSL_add_all_algorithms_conf
+
+/* provides older OpenSSL API compatibility  */
+#define OPENSSL_add_all_algorithms        OpenSSL_add_all_algorithms
+#define OPENSSL_add_all_algorithms_noconf OpenSSL_add_all_algorithms_noconf
+#define OPENSSL_add_all_algorithms_conf   OpenSSL_add_all_algorithms_conf
+
+#define NO_PADDING_BLOCK_SIZE      1
 
 #define PKCS5_PBKDF2_HMAC_SHA1     wolfSSL_PKCS5_PBKDF2_HMAC_SHA1
+#define PKCS5_PBKDF2_HMAC          wolfSSL_PKCS5_PBKDF2_HMAC
+
+/* OpenSSL compat. ctrl values */
+#define EVP_CTRL_INIT                  0x0
+#define EVP_CTRL_SET_KEY_LENGTH        0x1
+#define EVP_CTRL_SET_RC2_KEY_BITS      0x3  /* needed for qt compilation */
+
+#define EVP_CTRL_AEAD_SET_IVLEN        0x9
+#define EVP_CTRL_AEAD_GET_TAG          0x10
+#define EVP_CTRL_AEAD_SET_TAG          0x11
+#define EVP_CTRL_AEAD_SET_IV_FIXED     0x12
+#define EVP_CTRL_GCM_IV_GEN            0x13
+#define EVP_CTRL_GCM_SET_IVLEN         EVP_CTRL_AEAD_SET_IVLEN
+#define EVP_CTRL_GCM_GET_TAG           EVP_CTRL_AEAD_GET_TAG
+#define EVP_CTRL_GCM_SET_TAG           EVP_CTRL_AEAD_SET_TAG
+#define EVP_CTRL_GCM_SET_IV_FIXED      EVP_CTRL_AEAD_SET_IV_FIXED
+
+#define EVP_PKEY_print_private(arg1, arg2, arg3, arg4)
 
 #ifndef EVP_MAX_MD_SIZE
     #define EVP_MAX_MD_SIZE   64     /* sha512 */
 #endif
 
+#ifndef EVP_MAX_KEY_LENGTH
+#define EVP_MAX_KEY_LENGTH    64
+#endif
+
+#ifndef EVP_MAX_IV_LENGTH
+#define EVP_MAX_IV_LENGTH     16
+#endif
+
 #ifndef EVP_MAX_BLOCK_LENGTH
     #define EVP_MAX_BLOCK_LENGTH   32  /* 2 * blocklen(AES)? */
     /* They define this as 32. Using the same value here. */
 #endif
 
+#ifndef EVP_MAX_IV_LENGTH
+    #define EVP_MAX_IV_LENGTH       16
+#endif
+
+
+#define EVP_R_BAD_DECRYPT               (-MIN_CODE_E + 100 + 1)
+#define EVP_R_BN_DECODE_ERROR           (-MIN_CODE_E + 100 + 2)
+#define EVP_R_DECODE_ERROR              (-MIN_CODE_E + 100 + 3)
+#define EVP_R_PRIVATE_KEY_DECODE_ERROR  (-MIN_CODE_E + 100 + 4)
+
+#define EVP_PKEY_NONE                   NID_undef
+#define EVP_PKEY_RSA                    6
+#define EVP_PKEY_RSA2                   19
+#define EVP_PKEY_DH                     28
+#define EVP_CIPHER_mode                 WOLFSSL_CIPHER_mode
+/* WOLFSSL_EVP_CIPHER is just the string name of the cipher */
+#define EVP_CIPHER_name(x)              x
+#define EVP_MD_CTX_reset                wolfSSL_EVP_MD_CTX_cleanup
+/* WOLFSSL_EVP_MD is just the string name of the digest */
+#define EVP_MD_name(x)                  x
+#define EVP_CIPHER_nid                  wolfSSL_EVP_CIPHER_nid
+
+
 WOLFSSL_API void printPKEY(WOLFSSL_EVP_PKEY *k);
 
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
 
+#include <wolfssl/openssl/objects.h>
 
 #endif /* WOLFSSL_EVP_H_ */
 
--- a/wolfssl/openssl/hmac.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/hmac.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hmac.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -58,9 +58,11 @@
 } WOLFSSL_HMAC_CTX;
 
 
+WOLFSSL_API WOLFSSL_HMAC_CTX* wolfSSL_HMAC_CTX_new(void);
 WOLFSSL_API int wolfSSL_HMAC_CTX_Init(WOLFSSL_HMAC_CTX* ctx);
 WOLFSSL_API int wolfSSL_HMAC_CTX_copy(WOLFSSL_HMAC_CTX* des,
                                        WOLFSSL_HMAC_CTX* src);
+WOLFSSL_LOCAL int wolfSSL_HmacCopy(Hmac* des, Hmac* src);
 WOLFSSL_API int wolfSSL_HMAC_Init(WOLFSSL_HMAC_CTX* ctx, const void* key,
                                  int keylen, const EVP_MD* type);
 WOLFSSL_API int wolfSSL_HMAC_Init_ex(WOLFSSL_HMAC_CTX* ctx, const void* key,
@@ -70,18 +72,24 @@
 WOLFSSL_API int wolfSSL_HMAC_Final(WOLFSSL_HMAC_CTX* ctx, unsigned char* hash,
                                   unsigned int* len);
 WOLFSSL_API int wolfSSL_HMAC_cleanup(WOLFSSL_HMAC_CTX* ctx);
+WOLFSSL_API void wolfSSL_HMAC_CTX_free(WOLFSSL_HMAC_CTX* ctx);
+WOLFSSL_API size_t wolfSSL_HMAC_size(const WOLFSSL_HMAC_CTX *ctx);
 
 typedef struct WOLFSSL_HMAC_CTX HMAC_CTX;
 
 #define HMAC(a,b,c,d,e,f,g) wolfSSL_HMAC((a),(b),(c),(d),(e),(f),(g))
 
+#define HMAC_CTX_new wolfSSL_HMAC_CTX_new
 #define HMAC_CTX_init wolfSSL_HMAC_CTX_Init
 #define HMAC_CTX_copy wolfSSL_HMAC_CTX_copy
+#define HMAC_CTX_free wolfSSL_HMAC_CTX_free
+#define HMAC_CTX_reset wolfSSL_HMAC_cleanup
 #define HMAC_Init_ex  wolfSSL_HMAC_Init_ex
 #define HMAC_Init     wolfSSL_HMAC_Init
 #define HMAC_Update   wolfSSL_HMAC_Update
 #define HMAC_Final    wolfSSL_HMAC_Final
 #define HMAC_cleanup  wolfSSL_HMAC_cleanup
+#define HMAC_size     wolfSSL_HMAC_size
 
 
 #ifdef __cplusplus
--- a/wolfssl/openssl/include.am	Sat Aug 18 22:20:43 2018 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-# vim:ft=automake
-# All paths should be given relative to the root
-
-nobase_include_HEADERS+= \
-                         wolfssl/openssl/asn1.h \
-                         wolfssl/openssl/aes.h\
-                         wolfssl/openssl/bio.h \
-                         wolfssl/openssl/bn.h \
-                         wolfssl/openssl/buffer.h \
-                         wolfssl/openssl/conf.h \
-                         wolfssl/openssl/crypto.h \
-                         wolfssl/openssl/des.h \
-                         wolfssl/openssl/dh.h \
-                         wolfssl/openssl/dsa.h \
-                         wolfssl/openssl/ecdsa.h \
-                         wolfssl/openssl/ecdh.h \
-                         wolfssl/openssl/ec.h \
-                         wolfssl/openssl/ec25519.h \
-                         wolfssl/openssl/ed25519.h \
-                         wolfssl/openssl/engine.h \
-                         wolfssl/openssl/err.h \
-                         wolfssl/openssl/evp.h \
-                         wolfssl/openssl/hmac.h \
-                         wolfssl/openssl/lhash.h \
-                         wolfssl/openssl/md4.h \
-                         wolfssl/openssl/md5.h \
-                         wolfssl/openssl/ripemd.h \
-                         wolfssl/openssl/objects.h \
-                         wolfssl/openssl/ocsp.h \
-                         wolfssl/openssl/opensslconf.h \
-                         wolfssl/openssl/opensslv.h \
-                         wolfssl/openssl/ossl_typ.h \
-                         wolfssl/openssl/pem.h \
-                         wolfssl/openssl/pkcs12.h \
-                         wolfssl/openssl/rand.h \
-                         wolfssl/openssl/rsa.h \
-                         wolfssl/openssl/sha.h \
-                         wolfssl/openssl/ssl23.h \
-                         wolfssl/openssl/ssl.h \
-                         wolfssl/openssl/stack.h \
-                         wolfssl/openssl/ui.h \
-                         wolfssl/openssl/x509.h \
-                         wolfssl/openssl/x509v3.h \
-                         wolfssl/openssl/rc4.h
-
--- a/wolfssl/openssl/md4.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/md4.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md4.h
  *
- * Copyright (C) 2006-2016 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/openssl/md5.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/md5.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md5.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -41,7 +41,12 @@
 
 
 typedef struct WOLFSSL_MD5_CTX {
-    void* holder[(112 + WC_ASYNC_DEV_SIZE) / sizeof(void*)];   /* big enough to hold wolfcrypt md5, but check on init */
+    /* big enough to hold wolfcrypt md5, but check on init */
+#ifdef STM32_HASH
+    void* holder[(112 + WC_ASYNC_DEV_SIZE + sizeof(STM32_HASH_Context)) / sizeof(void*)];
+#else
+    void* holder[(112 + WC_ASYNC_DEV_SIZE) / sizeof(void*)];
+#endif
 } WOLFSSL_MD5_CTX;
 
 WOLFSSL_API int wolfSSL_MD5_Init(WOLFSSL_MD5_CTX*);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/obj_mac.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,51 @@
+/* obj_mac.h
+ *
+ * Copyright (C) 2006-2017 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* obj_mac.h for openSSL */
+
+#ifndef WOLFSSL_OBJ_MAC_H_
+#define WOLFSSL_OBJ_MAC_H_
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#define NID_sect163k1 721
+#define NID_sect163r1 722
+#define NID_sect163r2 723
+#define NID_sect193r1 724
+#define NID_sect193r2 725
+#define NID_sect233k1 726
+#define NID_sect233r1 727
+#define NID_sect239k1 728
+#define NID_sect283k1 729
+#define NID_sect283r1 730
+#define NID_sect409k1 731
+#define NID_sect409r1 732
+#define NID_sect571k1 733
+#define NID_sect571r1 734
+
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+
+#endif /* WOLFSSL_OBJ_MAC_H_ */
+
+
--- a/wolfssl/openssl/objects.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/objects.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* objects.h
  *
- * Copyright (C) 2006-2016 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -24,7 +24,11 @@
 #define WOLFSSL_OBJECTS_H_
 
 #include <wolfssl/wolfcrypt/settings.h>
-#include <wolfssl/openssl/ssl.h>
+//#include <wolfssl/openssl/ssl.h>
+#ifndef OPENSSL_EXTRA_SSL_GUARD
+#define OPENSSL_EXTRA_SSL_GUARD
+#include <wolfssl/ssl.h>
+#endif /* OPENSSL_EXTRA_SSL_GUARD */
 
 #ifdef __cplusplus
     extern "C" {
@@ -34,12 +38,23 @@
 #define OBJ_obj2nid wolfSSL_OBJ_obj2nid
 #define OBJ_sn2nid  wolfSSL_OBJ_sn2nid
 #define OBJ_nid2ln  wolfSSL_OBJ_nid2ln
+#define OBJ_ln2nid  wolfSSL_OBJ_ln2nid
 #define OBJ_txt2nid wolfSSL_OBJ_txt2nid
+#define OBJ_txt2obj wolfSSL_OBJ_txt2obj
 #define OBJ_nid2obj wolfSSL_OBJ_nid2obj
 #define OBJ_obj2txt wolfSSL_OBJ_obj2txt
 #define OBJ_cleanup wolfSSL_OBJ_cleanup
+#define OBJ_cmp     wolfSSL_OBJ_cmp
+#define OBJ_create  wolfSSL_OBJ_create
 #define ASN1_OBJECT_free wolfSSL_ASN1_OBJECT_free
 
+/* not required for wolfSSL */
+#define OPENSSL_load_builtin_modules()
+
+
+#define NID_ad_OCSP                     178
+#define NID_ad_ca_issuers               179
+
 
 #ifdef __cplusplus
     }  /* extern "C" */
--- a/wolfssl/openssl/ocsp.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ocsp.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ocsp.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -33,8 +33,15 @@
 #define OCSP_CERTID               WOLFSSL_OCSP_CERTID
 #define OCSP_ONEREQ               WOLFSSL_OCSP_ONEREQ
 
+#define OCSP_REVOKED_STATUS_NOSTATUS     -1
+
+
 #define OCSP_RESPONSE_STATUS_SUCCESSFUL  0
+#define OCSP_RESPONSE_STATUS_TRYLATER    3
+
 #define V_OCSP_CERTSTATUS_GOOD           0
+#define V_OCSP_CERTSTATUS_REVOKED        1
+#define V_OCSP_CERTSTATUS_UNKNOWN        2
 
 #define OCSP_resp_find_status     wolfSSL_OCSP_resp_find_status
 #define OCSP_cert_status_str      wolfSSL_OCSP_cert_status_str
@@ -53,11 +60,21 @@
 #define OCSP_response_status      wolfSSL_OCSP_response_status
 #define OCSP_response_status_str  wolfSSL_OCSP_response_status_str
 #define OCSP_response_get1_basic  wolfSSL_OCSP_response_get1_basic
+#define OCSP_response_create      wolfSSL_OCSP_response_create
 
 #define OCSP_REQUEST_new          wolfSSL_OCSP_REQUEST_new
 #define OCSP_REQUEST_free         wolfSSL_OCSP_REQUEST_free
 #define i2d_OCSP_REQUEST          wolfSSL_i2d_OCSP_REQUEST
 #define OCSP_request_add0_id      wolfSSL_OCSP_request_add0_id
+#define OCSP_request_add1_nonce   wolfSSL_OCSP_request_add1_nonce
+#define OCSP_check_nonce          wolfSSL_OCSP_check_nonce
+#define OCSP_id_get0_info         wolfSSL_OCSP_id_get0_info
+#define OCSP_crl_reason_str       wolfSSL_OCSP_crl_reason_str
+#define OCSP_REQUEST_add_ext      wolfSSL_OCSP_REQUEST_add_ext
+
+#define OCSP_CERTID_dup           wolfSSL_OCSP_CERTID_dup
+
+#define i2d_OCSP_REQUEST_bio      wolfSSL_i2d_OCSP_REQUEST_bio
 
 #endif /* HAVE_OCSP */
 
--- a/wolfssl/openssl/opensslv.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/opensslv.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* opensslv.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,17 +26,21 @@
 
 
 /* api version compatibility */
-#if defined(OPENSSL_ALL) || defined(HAVE_STUNNEL) || defined(HAVE_LIGHTY) || \
-    defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+#if defined(WOLFSSL_APACHE_HTTPD)
+    /* For Apache httpd, Use 1.1.0 compatibility */
+     #define OPENSSL_VERSION_NUMBER 0x10100000L
+#elif defined(OPENSSL_ALL) || defined(HAVE_STUNNEL) || defined(HAVE_LIGHTY) || \
+    defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
+    defined(WOLFSSL_OPENSSH) || defined(WOLFSSL_QT)
      /* version number can be increased for Lighty after compatibility for ECDH
         is added */
-     #define OPENSSL_VERSION_NUMBER 0x10001000L
+     #define OPENSSL_VERSION_NUMBER 0x1000100fL
 #else
      #define OPENSSL_VERSION_NUMBER 0x0090810fL
 #endif
 
 #define OPENSSL_VERSION_TEXT             LIBWOLFSSL_VERSION_STRING
-
+#define OPENSSL_VERSION                  0
 
 #endif /* header */
 
--- a/wolfssl/openssl/ossl_typ.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ossl_typ.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,3 +1,33 @@
-/* ossl_typ.h for openssl */
+/* ossl_typ.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
 
+/*!
+    \file wolfssl/openssl/ossl_typ.h
+*/
+
+
+#ifndef WOLFSSL_OSSL_TYP_H_
+#define WOLFSSL_OSSL_TYP_H_
+
+#include <wolfssl/openssl/ssl.h>
+
+#endif /* !WOLFSSL_OSSL_TYP_H_ */
 
--- a/wolfssl/openssl/pem.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/pem.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pem.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -33,6 +33,7 @@
 #include <wolfssl/openssl/bio.h>
 #include <wolfssl/openssl/rsa.h>
 #include <wolfssl/openssl/dsa.h>
+#include <wolfssl/ssl.h>
 
 #ifdef __cplusplus
     extern "C" {
@@ -49,24 +50,38 @@
                                                   WOLFSSL_RSA**,
                                                   pem_password_cb* cb,
                                                   void* arg);
+
+WOLFSSL_API
+int wolfSSL_PEM_write_bio_RSA_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_RSA* rsa);
+
+WOLFSSL_API
+WOLFSSL_RSA *wolfSSL_PEM_read_bio_RSA_PUBKEY(WOLFSSL_BIO* bio,
+                                             WOLFSSL_RSA** rsa,
+                                             pem_password_cb* cb, void *u);
+
+WOLFSSL_API
+WOLFSSL_EC_GROUP* wolfSSL_PEM_read_bio_ECPKParameters(WOLFSSL_BIO* bio,
+                                                      WOLFSSL_EC_GROUP** group,
+                                                      pem_password_cb* cb,
+                                                      void* pass);
 WOLFSSL_API
 int wolfSSL_PEM_write_mem_RSAPrivateKey(RSA* rsa, const EVP_CIPHER* cipher,
                                         unsigned char* passwd, int len,
                                         unsigned char **pem, int *plen);
 #if !defined(NO_FILESYSTEM)
 WOLFSSL_API
-int wolfSSL_PEM_write_RSAPrivateKey(FILE *fp, WOLFSSL_RSA *rsa,
+int wolfSSL_PEM_write_RSAPrivateKey(XFILE fp, WOLFSSL_RSA *rsa,
                                     const EVP_CIPHER *enc,
                                     unsigned char *kstr, int klen,
                                     pem_password_cb *cb, void *u);
 WOLFSSL_API
-WOLFSSL_RSA *wolfSSL_PEM_read_RSAPublicKey(FILE *fp, WOLFSSL_RSA **x,
+WOLFSSL_RSA *wolfSSL_PEM_read_RSAPublicKey(XFILE fp, WOLFSSL_RSA **x,
                                            pem_password_cb *cb, void *u);
 WOLFSSL_API
-int wolfSSL_PEM_write_RSAPublicKey(FILE *fp, WOLFSSL_RSA *x);
+int wolfSSL_PEM_write_RSAPublicKey(XFILE fp, WOLFSSL_RSA *x);
 
 WOLFSSL_API
-int wolfSSL_PEM_write_RSA_PUBKEY(FILE *fp, WOLFSSL_RSA *x);
+int wolfSSL_PEM_write_RSA_PUBKEY(XFILE fp, WOLFSSL_RSA *x);
 #endif /* NO_FILESYSTEM */
 
 /* DSA */
@@ -76,6 +91,19 @@
                                         const EVP_CIPHER* cipher,
                                         unsigned char* passwd, int len,
                                         pem_password_cb* cb, void* arg);
+
+WOLFSSL_API
+WOLFSSL_DSA* wolfSSL_PEM_read_bio_DSAPrivateKey(WOLFSSL_BIO* bio,
+                                                WOLFSSL_DSA** dsa,
+                                                pem_password_cb* cb,void *pass);
+
+WOLFSSL_API
+WOLFSSL_DSA *wolfSSL_PEM_read_bio_DSA_PUBKEY(WOLFSSL_BIO* bio,WOLFSSL_DSA** dsa,
+                                               pem_password_cb* cb, void *pass);
+
+WOLFSSL_API
+int wolfSSL_PEM_write_bio_DSA_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_DSA* dsa);
+
 WOLFSSL_API
 int wolfSSL_PEM_write_mem_DSAPrivateKey(WOLFSSL_DSA* dsa,
                                         const EVP_CIPHER* cipher,
@@ -83,12 +111,12 @@
                                         unsigned char **pem, int *plen);
 #if !defined(NO_FILESYSTEM)
 WOLFSSL_API
-int wolfSSL_PEM_write_DSAPrivateKey(FILE *fp, WOLFSSL_DSA *dsa,
+int wolfSSL_PEM_write_DSAPrivateKey(XFILE fp, WOLFSSL_DSA *dsa,
                                     const EVP_CIPHER *enc,
                                     unsigned char *kstr, int klen,
                                     pem_password_cb *cb, void *u);
 WOLFSSL_API
-int wolfSSL_PEM_write_DSA_PUBKEY(FILE *fp, WOLFSSL_DSA *x);
+int wolfSSL_PEM_write_DSA_PUBKEY(XFILE fp, WOLFSSL_DSA *x);
 #endif /* NO_FILESYSTEM */
 
 /* ECC */
@@ -98,18 +126,31 @@
                                        unsigned char* passwd, int len,
                                        pem_password_cb* cb, void* arg);
 WOLFSSL_API
+WOLFSSL_EC_KEY* wolfSSL_PEM_read_bio_ECPrivateKey(WOLFSSL_BIO* bio,
+                                                  WOLFSSL_EC_KEY** ec,
+                                                  pem_password_cb* cb,
+                                                  void *pass);
+WOLFSSL_API
+int wolfSSL_PEM_write_bio_EC_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_EC_KEY* ec);
+
+WOLFSSL_API
 int wolfSSL_PEM_write_mem_ECPrivateKey(WOLFSSL_EC_KEY* key,
                                        const EVP_CIPHER* cipher,
                                        unsigned char* passwd, int len,
                                        unsigned char **pem, int *plen);
 #if !defined(NO_FILESYSTEM)
 WOLFSSL_API
-int wolfSSL_PEM_write_ECPrivateKey(FILE *fp, WOLFSSL_EC_KEY *key,
+int wolfSSL_PEM_write_ECPrivateKey(XFILE fp, WOLFSSL_EC_KEY *key,
                                    const EVP_CIPHER *enc,
                                    unsigned char *kstr, int klen,
                                    pem_password_cb *cb, void *u);
 WOLFSSL_API
-int wolfSSL_PEM_write_EC_PUBKEY(FILE *fp, WOLFSSL_EC_KEY *key);
+int wolfSSL_PEM_write_EC_PUBKEY(XFILE fp, WOLFSSL_EC_KEY *key);
+
+WOLFSSL_API
+WOLFSSL_EC_KEY* wolfSSL_PEM_read_bio_EC_PUBKEY(WOLFSSL_BIO* bio,
+                                               WOLFSSL_EC_KEY** ec,
+                                               pem_password_cb* cb, void *pass);
 #endif /* NO_FILESYSTEM */
 
 /* EVP_KEY */
@@ -119,44 +160,93 @@
                                                   pem_password_cb* cb,
                                                   void* arg);
 WOLFSSL_API
+WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_bio_PUBKEY(WOLFSSL_BIO* bio,
+                                              WOLFSSL_EVP_PKEY **key,
+                                              pem_password_cb *cb, void *pass);
+WOLFSSL_API
 int wolfSSL_PEM_write_bio_PrivateKey(WOLFSSL_BIO* bio, WOLFSSL_EVP_PKEY* key,
                                         const WOLFSSL_EVP_CIPHER* cipher,
                                         unsigned char* passwd, int len,
                                         pem_password_cb* cb, void* arg);
+WOLFSSL_API
+int wolfSSL_PEM_write_bio_PUBKEY(WOLFSSL_BIO* bio, WOLFSSL_EVP_PKEY* key);
+
+
+WOLFSSL_API
+int wolfSSL_PEM_read_bio(WOLFSSL_BIO* bio, char **name, char **header,
+                         unsigned char **data, long *len);
+WOLFSSL_API
+int wolfSSL_PEM_write_bio(WOLFSSL_BIO *bio, const char *name,
+                          const char *header, const unsigned char *data,
+                          long len);
+#if !defined(NO_FILESYSTEM)
+WOLFSSL_API
+int wolfSSL_PEM_read(XFILE fp, char **name, char **header, unsigned char **data,
+                     long *len);
+WOLFSSL_API
+int wolfSSL_PEM_write(XFILE fp, const char *name, const char *header,
+                      const unsigned char *data, long len);
+#endif
 
 #if !defined(NO_FILESYSTEM)
 WOLFSSL_API
-WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PUBKEY(FILE *fp, EVP_PKEY **x,
-										  pem_password_cb *cb, void *u);
+WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PUBKEY(XFILE fp, EVP_PKEY **x,
+                                          pem_password_cb *cb, void *u);
 WOLFSSL_API
-WOLFSSL_X509 *wolfSSL_PEM_read_X509(FILE *fp, WOLFSSL_X509 **x,
+WOLFSSL_X509 *wolfSSL_PEM_read_X509(XFILE fp, WOLFSSL_X509 **x,
                                           pem_password_cb *cb, void *u);
 WOLFSSL_API
-WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PrivateKey(FILE *fp, WOLFSSL_EVP_PKEY **x,
+WOLFSSL_EVP_PKEY *wolfSSL_PEM_read_PrivateKey(XFILE fp, WOLFSSL_EVP_PKEY **x,
                                           pem_password_cb *cb, void *u);
+
+WOLFSSL_API
+int wolfSSL_PEM_write_X509(XFILE fp, WOLFSSL_X509 *x);
+WOLFSSL_API
+int wolfSSL_PEM_write_DHparams(XFILE fp, WOLFSSL_DH* dh);
 #endif /* NO_FILESYSTEM */
 
-#define PEM_read_X509               wolfSSL_PEM_read_X509
-#define PEM_read_PrivateKey         wolfSSL_PEM_read_PrivateKey
-#define PEM_write_bio_PrivateKey    wolfSSL_PEM_write_bio_PrivateKey
+#define PEM_read                        wolfSSL_PEM_read
+#define PEM_read_bio                    wolfSSL_PEM_read_bio
+#define PEM_write                       wolfSSL_PEM_write
+#define PEM_write_bio                   wolfSSL_PEM_write_bio
+
+#define PEM_read_X509                   wolfSSL_PEM_read_X509
+#define PEM_read_PrivateKey             wolfSSL_PEM_read_PrivateKey
+#define PEM_write_X509                  wolfSSL_PEM_write_X509
+#define PEM_write_bio_PrivateKey        wolfSSL_PEM_write_bio_PrivateKey
+#define PEM_write_bio_PKCS8PrivateKey   wolfSSL_PEM_write_bio_PKCS8PrivateKey
+
+/* DH */
+#define PEM_write_DHparams              wolfSSL_PEM_write_DHparams
 /* RSA */
-#define PEM_write_bio_RSAPrivateKey wolfSSL_PEM_write_bio_RSAPrivateKey
-#define PEM_read_bio_RSAPrivateKey  wolfSSL_PEM_read_bio_RSAPrivateKey
-#define PEM_write_RSAPrivateKey     wolfSSL_PEM_write_RSAPrivateKey
-#define PEM_write_RSA_PUBKEY        wolfSSL_PEM_write_RSA_PUBKEY
-#define PEM_write_RSAPublicKey      wolfSSL_PEM_write_RSAPublicKey
-#define PEM_read_RSAPublicKey       wolfSSL_PEM_read_RSAPublicKey
+#define PEM_write_bio_RSAPrivateKey     wolfSSL_PEM_write_bio_RSAPrivateKey
+#define PEM_read_bio_RSAPrivateKey      wolfSSL_PEM_read_bio_RSAPrivateKey
+#define PEM_write_bio_RSA_PUBKEY        wolfSSL_PEM_write_bio_RSA_PUBKEY
+#define PEM_read_bio_RSA_PUBKEY         wolfSSL_PEM_read_bio_RSA_PUBKEY
+#define PEM_read_bio_ECPKParameters     wolfSSL_PEM_read_bio_ECPKParameters
+#define PEM_write_RSAPrivateKey         wolfSSL_PEM_write_RSAPrivateKey
+#define PEM_write_RSA_PUBKEY            wolfSSL_PEM_write_RSA_PUBKEY
+#define PEM_write_RSAPublicKey          wolfSSL_PEM_write_RSAPublicKey
+#define PEM_read_RSAPublicKey           wolfSSL_PEM_read_RSAPublicKey
 /* DSA */
-#define PEM_write_bio_DSAPrivateKey wolfSSL_PEM_write_bio_DSAPrivateKey
-#define PEM_write_DSAPrivateKey     wolfSSL_PEM_write_DSAPrivateKey
-#define PEM_write_DSA_PUBKEY        wolfSSL_PEM_write_DSA_PUBKEY
+#define PEM_write_bio_DSAPrivateKey     wolfSSL_PEM_write_bio_DSAPrivateKey
+#define PEM_write_DSAPrivateKey         wolfSSL_PEM_write_DSAPrivateKey
+#define PEM_write_bio_DSA_PUBKEY        wolfSSL_PEM_write_bio_DSA_PUBKEY
+#define PEM_write_DSA_PUBKEY            wolfSSL_PEM_write_DSA_PUBKEY
+#define PEM_read_bio_DSAPrivateKey      wolfSSL_PEM_read_bio_DSAPrivateKey
+#define PEM_read_bio_DSA_PUBKEY         wolfSSL_PEM_read_bio_DSA_PUBKEY
 /* ECC */
-#define PEM_write_bio_ECPrivateKey wolfSSL_PEM_write_bio_ECPrivateKey
-#define PEM_write_EC_PUBKEY        wolfSSL_PEM_write_EC_PUBKEY
-#define PEM_write_ECPrivateKey     wolfSSL_PEM_write_ECPrivateKey
+#define PEM_write_bio_ECPrivateKey      wolfSSL_PEM_write_bio_ECPrivateKey
+#define PEM_write_bio_EC_PUBKEY         wolfSSL_PEM_write_bio_EC_PUBKEY
+#define PEM_write_EC_PUBKEY             wolfSSL_PEM_write_EC_PUBKEY
+#define PEM_write_ECPrivateKey          wolfSSL_PEM_write_ECPrivateKey
+#define PEM_read_bio_ECPrivateKey       wolfSSL_PEM_read_bio_ECPrivateKey
+#define PEM_read_bio_EC_PUBKEY          wolfSSL_PEM_read_bio_EC_PUBKEY
 /* EVP_KEY */
-#define PEM_read_bio_PrivateKey wolfSSL_PEM_read_bio_PrivateKey
-#define PEM_read_PUBKEY         wolfSSL_PEM_read_PUBKEY
+#define PEM_read_bio_PrivateKey         wolfSSL_PEM_read_bio_PrivateKey
+#define PEM_read_PUBKEY                 wolfSSL_PEM_read_PUBKEY
+#define PEM_read_bio_PUBKEY             wolfSSL_PEM_read_bio_PUBKEY
+#define PEM_write_bio_PUBKEY            wolfSSL_PEM_write_bio_PUBKEY
 
 #ifdef __cplusplus
     }  /* extern "C" */ 
--- a/wolfssl/openssl/pkcs12.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/pkcs12.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pkcs12.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/pkcs7.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,80 @@
+/* pkcs7.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* pkcs7.h for openSSL */
+
+
+#ifndef WOLFSSL_PKCS7_H_
+#define WOLFSSL_PKCS7_H_
+
+#include <wolfssl/openssl/ssl.h>
+#include <wolfssl/wolfcrypt/pkcs7.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#if defined(OPENSSL_ALL) && defined(HAVE_PKCS7)
+
+#define PKCS7_NOINTERN         0x0010
+#define PKCS7_NOVERIFY         0x0020
+
+
+typedef struct WOLFSSL_PKCS7
+{
+    PKCS7 pkcs7;
+    unsigned char* data;
+    int len;
+} WOLFSSL_PKCS7;
+
+
+WOLFSSL_API PKCS7* wolfSSL_PKCS7_new(void);
+WOLFSSL_API PKCS7_SIGNED* wolfSSL_PKCS7_SIGNED_new(void);
+WOLFSSL_API void wolfSSL_PKCS7_free(PKCS7* p7);
+WOLFSSL_API void wolfSSL_PKCS7_SIGNED_free(PKCS7_SIGNED* p7);
+WOLFSSL_API PKCS7* wolfSSL_d2i_PKCS7(PKCS7** p7, const unsigned char** in,
+    int len);
+WOLFSSL_API PKCS7* wolfSSL_d2i_PKCS7_bio(WOLFSSL_BIO* bio, PKCS7** p7);
+WOLFSSL_API int wolfSSL_PKCS7_verify(PKCS7* p7, WOLFSSL_STACK* certs,
+    WOLFSSL_X509_STORE* store, WOLFSSL_BIO* in, WOLFSSL_BIO* out, int flags);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_PKCS7_get0_signers(PKCS7* p7,
+    WOLFSSL_STACK* certs, int flags);
+WOLFSSL_API int wolfSSL_PEM_write_bio_PKCS7(WOLFSSL_BIO* bio, PKCS7* p7);
+
+#define PKCS7_new                      wolfSSL_PKCS7_new
+#define PKCS7_SIGNED_new               wolfSSL_PKCS7_SIGNED_new
+#define PKCS7_free                     wolfSSL_PKCS7_free
+#define PKCS7_SIGNED_free              wolfSSL_PKCS7_SIGNED_free
+#define d2i_PKCS7                      wolfSSL_d2i_PKCS7
+#define d2i_PKCS7_bio                  wolfSSL_d2i_PKCS7_bio
+#define PKCS7_verify                   wolfSSL_PKCS7_verify
+#define PKCS7_get0_signers             wolfSSL_PKCS7_get0_signers
+#define PEM_write_bio_PKCS7            wolfSSL_PEM_write_bio_PKCS7
+
+#endif /* OPENSSL_ALL && HAVE_PKCS7 */
+
+#ifdef __cplusplus
+    }  /* extern "C" */
+#endif
+
+#endif /* WOLFSSL_PKCS7_H_ */
+
+
--- a/wolfssl/openssl/rand.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/rand.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rand.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/openssl/rc4.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/rc4.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rc4.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/openssl/ripemd.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ripemd.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ripemd.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/openssl/rsa.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/rsa.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rsa.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,7 +26,8 @@
 #define WOLFSSL_RSA_H_
 
 #include <wolfssl/openssl/bn.h>
-
+#include <wolfssl/openssl/err.h>
+#include <wolfssl/wolfcrypt/types.h>
 
 #ifdef __cplusplus
     extern "C" {
@@ -35,33 +36,65 @@
 /* Padding types */
 #define RSA_PKCS1_PADDING      0
 #define RSA_PKCS1_OAEP_PADDING 1
+#define RSA_PKCS1_PSS_PADDING  2
+#define RSA_NO_PADDING         3
+
+/* Emulate OpenSSL flags */
+#define RSA_METHOD_FLAG_NO_CHECK        (1 << 1)
+#define RSA_FLAG_CACHE_PUBLIC           (1 << 2)
+#define RSA_FLAG_CACHE_PRIVATE          (1 << 3)
+#define RSA_FLAG_BLINDING               (1 << 4)
+#define RSA_FLAG_THREAD_SAFE            (1 << 5)
+#define RSA_FLAG_EXT_PKEY               (1 << 6)
+#define RSA_FLAG_NO_BLINDING            (1 << 7)
+#define RSA_FLAG_NO_CONSTTIME           (1 << 8)
+
+/* Salt length same as digest length */
+#define RSA_PSS_SALTLEN_DIGEST   -1
+/* Old max salt length */
+#define RSA_PSS_SALTLEN_MAX_SIGN -2
+/* Max salt length */
+#define RSA_PSS_SALTLEN_MAX      -3
+
+typedef struct WOLFSSL_RSA_METHOD {
+    int flags;
+    char *name;
+} WOLFSSL_RSA_METHOD;
 
 #ifndef WOLFSSL_RSA_TYPE_DEFINED /* guard on redeclaration */
-typedef struct WOLFSSL_RSA            WOLFSSL_RSA;
 #define WOLFSSL_RSA_TYPE_DEFINED
-#endif
-
-typedef WOLFSSL_RSA                   RSA;
-
-struct WOLFSSL_RSA {
+typedef struct WOLFSSL_RSA {
 #ifdef WC_RSA_BLINDING
     WC_RNG* rng;              /* for PrivateDecrypt blinding */
 #endif
-	WOLFSSL_BIGNUM* n;
-	WOLFSSL_BIGNUM* e;
-	WOLFSSL_BIGNUM* d;
-	WOLFSSL_BIGNUM* p;
-	WOLFSSL_BIGNUM* q;
-	WOLFSSL_BIGNUM* dmp1;      /* dP */
-	WOLFSSL_BIGNUM* dmq1;      /* dQ */
-	WOLFSSL_BIGNUM* iqmp;      /* u */
+    WOLFSSL_BIGNUM* n;
+    WOLFSSL_BIGNUM* e;
+    WOLFSSL_BIGNUM* d;
+    WOLFSSL_BIGNUM* p;
+    WOLFSSL_BIGNUM* q;
+    WOLFSSL_BIGNUM* dmp1;      /* dP */
+    WOLFSSL_BIGNUM* dmq1;      /* dQ */
+    WOLFSSL_BIGNUM* iqmp;      /* u */
     void*          heap;
     void*          internal;  /* our RSA */
     char           inSet;     /* internal set from external ? */
     char           exSet;     /* external set from internal ? */
     char           ownRng;    /* flag for if the rng should be free'd */
-};
+#if defined(OPENSSL_EXTRA)
+    WOLFSSL_RSA_METHOD* meth;
+#endif
+#if defined(HAVE_EX_DATA)
+    WOLFSSL_CRYPTO_EX_DATA ex_data;  /* external data */
+#endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_ALL)
+    wolfSSL_Mutex    refMutex;                       /* ref count mutex */
+    int              refCount;                       /* reference count */
+#endif
+} WOLFSSL_RSA;
+#endif
 
+typedef WOLFSSL_RSA                   RSA;
+typedef WOLFSSL_RSA_METHOD            RSA_METHOD;
 
 WOLFSSL_API WOLFSSL_RSA* wolfSSL_RSA_new(void);
 WOLFSSL_API void        wolfSSL_RSA_free(WOLFSSL_RSA*);
@@ -93,6 +126,26 @@
 WOLFSSL_API int wolfSSL_RSA_LoadDer(WOLFSSL_RSA*, const unsigned char*, int sz);
 WOLFSSL_API int wolfSSL_RSA_LoadDer_ex(WOLFSSL_RSA*, const unsigned char*, int sz, int opt);
 
+WOLFSSL_API WOLFSSL_RSA_METHOD *wolfSSL_RSA_meth_new(const char *name, int flags);
+WOLFSSL_API void wolfSSL_RSA_meth_free(WOLFSSL_RSA_METHOD *meth);
+WOLFSSL_API int wolfSSL_RSA_meth_set(WOLFSSL_RSA_METHOD *rsa, void* p);
+WOLFSSL_API int wolfSSL_RSA_set_method(WOLFSSL_RSA *rsa, WOLFSSL_RSA_METHOD *meth);
+WOLFSSL_API const WOLFSSL_RSA_METHOD* wolfSSL_RSA_get_method(const WOLFSSL_RSA *rsa);
+WOLFSSL_API const WOLFSSL_RSA_METHOD* wolfSSL_RSA_get_default_method(void);
+
+WOLFSSL_API void wolfSSL_RSA_get0_key(const WOLFSSL_RSA *r, const WOLFSSL_BIGNUM **n,
+                                      const WOLFSSL_BIGNUM **e, const WOLFSSL_BIGNUM **d);
+WOLFSSL_API int wolfSSL_RSA_set0_key(WOLFSSL_RSA *r, WOLFSSL_BIGNUM *n, WOLFSSL_BIGNUM *e,
+                                     WOLFSSL_BIGNUM *d);
+WOLFSSL_API int wolfSSL_RSA_flags(const WOLFSSL_RSA *r);
+WOLFSSL_API void wolfSSL_RSA_set_flags(WOLFSSL_RSA *r, int flags);
+
+WOLFSSL_API WOLFSSL_RSA* wolfSSL_RSAPublicKey_dup(WOLFSSL_RSA *rsa);
+
+WOLFSSL_API void* wolfSSL_RSA_get_ex_data(const WOLFSSL_RSA *rsa, int idx);
+WOLFSSL_API int wolfSSL_RSA_set_ex_data(WOLFSSL_RSA *rsa, int idx, void *data);
+
+
 #define WOLFSSL_RSA_LOAD_PRIVATE 1
 #define WOLFSSL_RSA_LOAD_PUBLIC  2
 #define WOLFSSL_RSA_F4           0x10001L
@@ -109,9 +162,32 @@
 
 #define RSA_size           wolfSSL_RSA_size
 #define RSA_sign           wolfSSL_RSA_sign
-#define RSA_verify          wolfSSL_RSA_verify
+#define RSA_verify         wolfSSL_RSA_verify
 #define RSA_public_decrypt wolfSSL_RSA_public_decrypt
 
+#define RSA_meth_new            wolfSSL_RSA_meth_new
+#define RSA_meth_free           wolfSSL_RSA_meth_free
+#define RSA_meth_set_pub_enc    wolfSSL_RSA_meth_set
+#define RSA_meth_set_pub_dec    wolfSSL_RSA_meth_set
+#define RSA_meth_set_priv_enc   wolfSSL_RSA_meth_set
+#define RSA_meth_set_priv_dec   wolfSSL_RSA_meth_set
+#define RSA_meth_set_init       wolfSSL_RSA_meth_set
+#define RSA_meth_set_finish     wolfSSL_RSA_meth_set
+#define RSA_meth_set0_app_data  wolfSSL_RSA_meth_set
+#define RSA_get_default_method  wolfSSL_RSA_get_default_method
+#define RSA_get_method          wolfSSL_RSA_get_method
+#define RSA_set_method          wolfSSL_RSA_set_method
+#define RSA_get0_key            wolfSSL_RSA_get0_key
+#define RSA_set0_key            wolfSSL_RSA_set0_key
+#define RSA_flags               wolfSSL_RSA_flags
+#define RSA_set_flags           wolfSSL_RSA_set_flags
+
+#define RSAPublicKey_dup        wolfSSL_RSAPublicKey_dup
+#define RSA_get_ex_data        wolfSSL_RSA_get_ex_data
+#define RSA_set_ex_data        wolfSSL_RSA_set_ex_data
+
+#define RSA_get0_key       wolfSSL_RSA_get0_key
+
 #define RSA_F4             WOLFSSL_RSA_F4
 
 #ifdef __cplusplus
--- a/wolfssl/openssl/sha.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/sha.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,7 +39,14 @@
 
 typedef struct WOLFSSL_SHA_CTX {
     /* big enough to hold wolfcrypt Sha, but check on init */
+#if defined(STM32_HASH)
+    void* holder[(112 + WC_ASYNC_DEV_SIZE + sizeof(STM32_HASH_Context)) / sizeof(void*)];
+#else
     void* holder[(112 + WC_ASYNC_DEV_SIZE) / sizeof(void*)];
+#endif
+    #ifdef WOLF_CRYPTO_CB
+    void* cryptocb_holder[(sizeof(int) + sizeof(void*) + 4) / sizeof(void*)];
+    #endif
 } WOLFSSL_SHA_CTX;
 
 WOLFSSL_API int wolfSSL_SHA_Init(WOLFSSL_SHA_CTX*);
@@ -70,8 +77,8 @@
 #ifdef WOLFSSL_SHA224
 
 /* Using ALIGN16 because when AES-NI is enabled digest and buffer in Sha256
- * struct are 16 byte aligned. Any derefrence to those elements after casting to
- * Sha224, is expected to also be 16 byte aligned addresses.  */
+ * struct are 16 byte aligned. Any dereference to those elements after casting
+ * to Sha224, is expected to also be 16 byte aligned addresses.  */
 typedef struct WOLFSSL_SHA224_CTX {
     /* big enough to hold wolfcrypt Sha224, but check on init */
     ALIGN16 void* holder[(272 + WC_ASYNC_DEV_SIZE) / sizeof(void*)];
@@ -97,8 +104,8 @@
 
 
 /* Using ALIGN16 because when AES-NI is enabled digest and buffer in Sha256
- * struct are 16 byte aligned. Any derefrence to those elements after casting to
- * Sha256, is expected to also be 16 byte aligned addresses.  */
+ * struct are 16 byte aligned. Any dereference to those elements after casting
+ * to Sha256, is expected to also be 16 byte aligned addresses.  */
 typedef struct WOLFSSL_SHA256_CTX {
     /* big enough to hold wolfcrypt Sha256, but check on init */
     ALIGN16 void* holder[(272 + WC_ASYNC_DEV_SIZE) / sizeof(void*)];
@@ -119,7 +126,7 @@
 #define SHA256_Init   wolfSSL_SHA256_Init
 #define SHA256_Update wolfSSL_SHA256_Update
 #define SHA256_Final  wolfSSL_SHA256_Final
-#if defined(NO_OLD_SHA_NAMES) && !defined(HAVE_FIPS)
+#if defined(NO_OLD_SHA_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
     /* SHA256 is only available in non-fips mode because of SHA256 enum in FIPS
      * build. */
     #define SHA256 wolfSSL_SHA256
@@ -148,7 +155,7 @@
 #define SHA384_Init   wolfSSL_SHA384_Init
 #define SHA384_Update wolfSSL_SHA384_Update
 #define SHA384_Final  wolfSSL_SHA384_Final
-#if defined(NO_OLD_SHA_NAMES) && !defined(HAVE_FIPS)
+#if defined(NO_OLD_SHA_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
     /* SHA384 is only available in non-fips mode because of SHA384 enum in FIPS
      * build. */
     #define SHA384 wolfSSL_SHA384
@@ -177,7 +184,7 @@
 #define SHA512_Init   wolfSSL_SHA512_Init
 #define SHA512_Update wolfSSL_SHA512_Update
 #define SHA512_Final  wolfSSL_SHA512_Final
-#if defined(NO_OLD_SHA_NAMES) && !defined(HAVE_FIPS)
+#if defined(NO_OLD_SHA_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
     /* SHA512 is only available in non-fips mode because of SHA512 enum in FIPS
      * build. */
     #define SHA512 wolfSSL_SHA512
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/sha3.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,151 @@
+/* sha3.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* sha3.h for openssl */
+
+
+#ifndef WOLFSSL_SHA3_H_
+#define WOLFSSL_SHA3_H_
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef WOLFSSL_PREFIX
+#include "prefix_sha.h"
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+
+/* Using ALIGN16 because when AES-NI is enabled digest and buffer in Sha3
+ * struct are 16 byte aligned. Any dereference to those elements after casting
+ * to Sha3 is expected to also be 16 byte aligned addresses.  */
+struct WOLFSSL_SHA3_CTX {
+    /* big enough to hold wolfcrypt Sha3, but check on init */
+    ALIGN16 void* holder[(424 + WC_ASYNC_DEV_SIZE) / sizeof(void*)];
+};
+
+#ifndef WOLFSSL_NOSHA3_224
+typedef struct WOLFSSL_SHA3_CTX WOLFSSL_SHA3_224_CTX;
+
+WOLFSSL_API int wolfSSL_SHA3_224_Init(WOLFSSL_SHA3_224_CTX*);
+WOLFSSL_API int wolfSSL_SHA3_224_Update(WOLFSSL_SHA3_224_CTX*, const void*,
+                                     unsigned long);
+WOLFSSL_API int wolfSSL_SHA3_224_Final(unsigned char*, WOLFSSL_SHA3_224_CTX*);
+
+enum {
+    SHA3_224_DIGEST_LENGTH = 28
+};
+
+typedef WOLFSSL_SHA3_224_CTX SHA3_224_CTX;
+
+#define SHA3_224_Init   wolfSSL_SHA3_224_Init
+#define SHA3_224_Update wolfSSL_SHA3_224_Update
+#define SHA3_224_Final  wolfSSL_SHA3_224_Final
+#if defined(NO_OLD_WC_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+    #define SHA3_224 wolfSSL_SHA3_224
+#endif
+#endif /* WOLFSSL_NOSHA3_224 */
+
+
+#ifndef WOLFSSL_NOSHA3_256
+typedef struct WOLFSSL_SHA3_CTX WOLFSSL_SHA3_256_CTX;
+
+
+WOLFSSL_API int wolfSSL_SHA3_256_Init(WOLFSSL_SHA3_256_CTX*);
+WOLFSSL_API int wolfSSL_SHA3_256_Update(WOLFSSL_SHA3_256_CTX*, const void*,
+                                     unsigned long);
+WOLFSSL_API int wolfSSL_SHA3_256_Final(unsigned char*, WOLFSSL_SHA3_256_CTX*);
+
+enum {
+    SHA3_256_DIGEST_LENGTH = 32
+};
+
+
+typedef WOLFSSL_SHA3_256_CTX SHA3_256_CTX;
+
+#define SHA3_256_Init   wolfSSL_SHA3_256_Init
+#define SHA3_256_Update wolfSSL_SHA3_256_Update
+#define SHA3_256_Final  wolfSSL_SHA3_256_Final
+#if defined(NO_OLD_WC_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+    #define SHA3_256 wolfSSL_SHA3_256
+#endif
+#endif /* WOLFSSL_NOSHA3_256 */
+
+
+typedef struct WOLFSSL_SHA3_CTX WOLFSSL_SHA3_384_CTX;
+
+WOLFSSL_API int wolfSSL_SHA3_384_Init(WOLFSSL_SHA3_384_CTX*);
+WOLFSSL_API int wolfSSL_SHA3_384_Update(WOLFSSL_SHA3_384_CTX*, const void*,
+	                                 unsigned long);
+WOLFSSL_API int wolfSSL_SHA3_384_Final(unsigned char*, WOLFSSL_SHA3_384_CTX*);
+
+enum {
+    SHA3_384_DIGEST_LENGTH = 48
+};
+
+typedef WOLFSSL_SHA3_384_CTX SHA3_384_CTX;
+
+#define SHA3_384_Init   wolfSSL_SHA3_384_Init
+#define SHA3_384_Update wolfSSL_SHA3_384_Update
+#define SHA3_384_Final  wolfSSL_SHA3_384_Final
+#if defined(NO_OLD_WC_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+    #define SHA3_384 wolfSSL_SHA3_384
+#endif
+
+
+#ifndef WOLFSSL_NOSHA3_512
+
+typedef struct WOLFSSL_SHA3_CTX WOLFSSL_SHA3_512_CTX;
+
+WOLFSSL_API int wolfSSL_SHA3_512_Init(WOLFSSL_SHA3_512_CTX*);
+WOLFSSL_API int wolfSSL_SHA3_512_Update(WOLFSSL_SHA3_512_CTX*, const void*,
+	                                 unsigned long);
+WOLFSSL_API int wolfSSL_SHA3_512_Final(unsigned char*, WOLFSSL_SHA3_512_CTX*);
+
+enum {
+    SHA3_512_DIGEST_LENGTH = 64
+};
+
+
+typedef WOLFSSL_SHA3_512_CTX SHA3_512_CTX;
+
+#define SHA3_512_Init   wolfSSL_SHA3_512_Init
+#define SHA3_512_Update wolfSSL_SHA3_512_Update
+#define SHA3_512_Final  wolfSSL_SHA3_512_Final
+#if defined(NO_OLD_WC_NAMES) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+    #define SHA3_512 wolfSSL_SHA3_512
+#endif
+#endif /* WOLFSSL_NOSHA3_512 */
+
+
+
+
+#ifdef __cplusplus
+    }  /* extern "C" */
+#endif
+
+
+#endif /* WOLFSSL_SHA3_H_ */
+
+
--- a/wolfssl/openssl/ssl.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/ssl.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ssl.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -35,11 +35,24 @@
 #include <wolfssl/ssl.h>
 #endif /* OPENSSL_EXTRA_SSL_GUARD */
 
+#include <wolfssl/openssl/tls1.h>
 #include <wolfssl/openssl/evp.h>
+#include <wolfssl/openssl/bio.h>
 #ifdef OPENSSL_EXTRA
 #include <wolfssl/openssl/crypto.h>
 #endif
 
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+#include <wolfssl/openssl/dh.h>
+#include <wolfssl/openssl/objects.h>
+#endif
+
+/* need MIN_CODE_E to determine wolfSSL error range */
+#include <wolfssl/wolfcrypt/error-crypt.h>
+
+/* all NID_* values are in asn.h */
+#include <wolfssl/wolfcrypt/asn.h>
+
 #ifdef __cplusplus
     extern "C" {
 #endif
@@ -63,8 +76,11 @@
 typedef WOLFSSL_X509       X509;
 typedef WOLFSSL_X509       X509_REQ;
 typedef WOLFSSL_X509_NAME  X509_NAME;
+typedef WOLFSSL_X509_INFO  X509_INFO;
 typedef WOLFSSL_X509_CHAIN X509_CHAIN;
 
+typedef WOLFSSL_STACK      EXTENDED_KEY_USAGE;
+
 
 /* redeclare guard */
 #define WOLFSSL_TYPES_DEFINED
@@ -76,37 +92,78 @@
 typedef WOLFSSL_X509_LOOKUP_METHOD X509_LOOKUP_METHOD;
 typedef WOLFSSL_X509_CRL       X509_CRL;
 typedef WOLFSSL_X509_EXTENSION X509_EXTENSION;
+typedef WOLFSSL_X509_PUBKEY    X509_PUBKEY;
+typedef WOLFSSL_X509_ALGOR     X509_ALGOR;
 typedef WOLFSSL_ASN1_TIME      ASN1_TIME;
 typedef WOLFSSL_ASN1_INTEGER   ASN1_INTEGER;
 typedef WOLFSSL_ASN1_OBJECT    ASN1_OBJECT;
 typedef WOLFSSL_ASN1_STRING    ASN1_STRING;
+typedef WOLFSSL_ASN1_TYPE      ASN1_TYPE;
+typedef WOLFSSL_ASN1_BIT_STRING ASN1_BIT_STRING;
 typedef WOLFSSL_dynlock_value  CRYPTO_dynlock_value;
 typedef WOLFSSL_BUF_MEM        BUF_MEM;
-
-/* GENERAL_NAME and BASIC_CONSTRAINTS structs may need implemented as
- * compatibility layer expands. For now treating them as an ASN1_OBJECT */
-typedef WOLFSSL_ASN1_OBJECT GENERAL_NAME;
-typedef WOLFSSL_ASN1_OBJECT BASIC_CONSTRAINTS;
+typedef WOLFSSL_GENERAL_NAMES  GENERAL_NAMES;
+typedef WOLFSSL_GENERAL_NAME GENERAL_NAME;
 
 #define ASN1_UTCTIME         WOLFSSL_ASN1_TIME
 #define ASN1_GENERALIZEDTIME WOLFSSL_ASN1_TIME
 
 typedef WOLFSSL_COMP_METHOD    COMP_METHOD;
+typedef WOLFSSL_COMP           SSL_COMP;
 typedef WOLFSSL_X509_REVOKED   X509_REVOKED;
 typedef WOLFSSL_X509_OBJECT    X509_OBJECT;
 typedef WOLFSSL_X509_STORE     X509_STORE;
 typedef WOLFSSL_X509_STORE_CTX X509_STORE_CTX;
+typedef WOLFSSL_X509_VERIFY_PARAM X509_VERIFY_PARAM;
 
-#define CRYPTO_free   XFREE
-#define CRYPTO_malloc XMALLOC
-#define CRYPTO_EX_new  WOLFSSL_CRYPTO_EX_new
-#define CRYPTO_EX_dup  WOLFSSL_CRYPTO_EX_dup
-#define CRYPTO_EX_free WOLFSSL_CRYPTO_EX_free
+#define EVP_CIPHER_INFO        EncryptedInfo
 
 #define STACK_OF(x) WOLFSSL_STACK
+#define OPENSSL_STACK WOLFSSL_STACK
+#define _STACK OPENSSL_STACK
+
+#define CONF_get1_default_config_file   wolfSSL_CONF_get1_default_config_file
+typedef STACK_OF(ACCESS_DESCRIPTION) AUTHORITY_INFO_ACCESS;
+
+#ifdef WOLFSSL_QT
+    #if defined(NO_WOLFSSL_MEMORY)
+        #define CRYPTO_free(xp)         XFREE(xp, NULL, NULL);
+    #else
+        #define CRYPTO_free(xp) { if((xp)) wolfSSL_Free((xp));}
+    #endif
+#else
+  #define CRYPTO_free                     XFREE
+#endif
+
+#define CRYPTO_malloc                   XMALLOC
+#define CRYPTO_EX_new                   WOLFSSL_CRYPTO_EX_new
+#define CRYPTO_EX_dup                   WOLFSSL_CRYPTO_EX_dup
+#define CRYPTO_EX_free                  WOLFSSL_CRYPTO_EX_free
+#define CRYPTO_EX_DATA                  WOLFSSL_CRYPTO_EX_DATA
+
+/* depreciated */
+#define CRYPTO_thread_id                wolfSSL_thread_id
+#define CRYPTO_set_id_callback          wolfSSL_set_id_callback
+
+#define CRYPTO_LOCK             0x01
+#define CRYPTO_UNLOCK           0x02
+#define CRYPTO_READ             0x04
+#define CRYPTO_WRITE            0x08
+
+#define CRYPTO_set_locking_callback     wolfSSL_set_locking_callback
+#define CRYPTO_set_dynlock_create_callback  wolfSSL_set_dynlock_create_callback
+#define CRYPTO_set_dynlock_lock_callback wolfSSL_set_dynlock_lock_callback
+#define CRYPTO_set_dynlock_destroy_callback wolfSSL_set_dynlock_destroy_callback
+#define CRYPTO_num_locks                wolfSSL_num_locks
+#define CRYPTO_dynlock_value            WOLFSSL_dynlock_value
+
+#define CRYPTO_cleanup_all_ex_data      wolfSSL_cleanup_all_ex_data
+#define set_ex_data                     wolfSSL_CRYPTO_set_ex_data
+#define get_ex_data                     wolfSSL_CRYPTO_get_ex_data
 
 /* this function was used to set the default malloc, free, and realloc */
-#define CRYPTO_malloc_init() /* CRYPTO_malloc_init is not needed */
+#define CRYPTO_malloc_init() 0 /* CRYPTO_malloc_init is not needed */
+#define OPENSSL_malloc_init() 0 /* OPENSSL_malloc_init is not needed */
 
 #define SSL_get_client_random(ssl,out,outSz) \
                                   wolfSSL_get_client_random((ssl),(out),(outSz))
@@ -115,337 +172,582 @@
 #define SSL_get_shared_ciphers(ctx,buf,len) \
                                    wolfSSL_get_shared_ciphers((ctx),(buf),(len))
 
-#define ERR_print_errors_fp(file) wolfSSL_ERR_dump_errors_fp((file))
-
 /* at the moment only returns ok */
-#define SSL_get_verify_result         wolfSSL_get_verify_result
-#define SSL_get_verify_mode           wolfSSL_SSL_get_mode
-#define SSL_get_verify_depth          wolfSSL_get_verify_depth
-#define SSL_CTX_get_verify_mode       wolfSSL_CTX_get_verify_mode
-#define SSL_CTX_get_verify_depth      wolfSSL_CTX_get_verify_depth
-#define SSL_get_certificate           wolfSSL_get_certificate
-#define SSL_use_certificate           wolfSSL_use_certificate
-#define SSL_use_certificate_ASN1      wolfSSL_use_certificate_ASN1
-#define d2i_PKCS8_PRIV_KEY_INFO_bio   wolfSSL_d2i_PKCS8_PKEY_bio
-#define PKCS8_PRIV_KEY_INFO_free      wolfSSL_EVP_PKEY_free
-#define d2i_PKCS12_fp                 wolfSSL_d2i_PKCS12_fp
+#define SSL_get_verify_result           wolfSSL_get_verify_result
+#define SSL_get_verify_mode             wolfSSL_SSL_get_mode
+#define SSL_get_verify_depth            wolfSSL_get_verify_depth
+#define SSL_CTX_get_verify_mode         wolfSSL_CTX_get_verify_mode
+#define SSL_CTX_get_verify_depth        wolfSSL_CTX_get_verify_depth
+#define SSL_get_certificate             wolfSSL_get_certificate
+#define SSL_use_certificate             wolfSSL_use_certificate
+#define SSL_use_certificate_ASN1        wolfSSL_use_certificate_ASN1
+#define d2i_PKCS8_PRIV_KEY_INFO_bio     wolfSSL_d2i_PKCS8_PKEY_bio
+#define d2i_PKCS8PrivateKey_bio         wolfSSL_d2i_PKCS8PrivateKey_bio
+#define PKCS8_PRIV_KEY_INFO_free        wolfSSL_EVP_PKEY_free
+#define d2i_PKCS12_fp                   wolfSSL_d2i_PKCS12_fp
 
-#define d2i_PUBKEY_bio             wolfSSL_d2i_PUBKEY_bio
-#define d2i_PrivateKey             wolfSSL_d2i_PrivateKey
-#define SSL_use_PrivateKey         wolfSSL_use_PrivateKey
-#define SSL_use_PrivateKey_ASN1    wolfSSL_use_PrivateKey_ASN1
-#define SSL_use_RSAPrivateKey_ASN1 wolfSSL_use_RSAPrivateKey_ASN1
-#define SSL_get_privatekey         wolfSSL_get_privatekey
+#define d2i_PUBKEY                      wolfSSL_d2i_PUBKEY
+#define d2i_PUBKEY_bio                  wolfSSL_d2i_PUBKEY_bio
+#define d2i_PrivateKey                  wolfSSL_d2i_PrivateKey
+#define d2i_AutoPrivateKey              wolfSSL_d2i_AutoPrivateKey
+#define i2d_PrivateKey                  wolfSSL_i2d_PrivateKey
+#define SSL_use_PrivateKey              wolfSSL_use_PrivateKey
+#define SSL_use_PrivateKey_ASN1         wolfSSL_use_PrivateKey_ASN1
+#define SSL_use_RSAPrivateKey_ASN1      wolfSSL_use_RSAPrivateKey_ASN1
+#define SSL_get_privatekey              wolfSSL_get_privatekey
+#define SSL_CTX_use_PrivateKey_ASN1     wolfSSL_CTX_use_PrivateKey_ASN1
 
-#define SSLv23_method       wolfSSLv23_method
-#define SSLv3_server_method wolfSSLv3_server_method
-#define SSLv3_client_method wolfSSLv3_client_method
-#define TLSv1_method        wolfTLSv1_method
-#define TLSv1_server_method wolfTLSv1_server_method
-#define TLSv1_client_method wolfTLSv1_client_method
-#define TLSv1_1_method        wolfTLSv1_1_method
-#define TLSv1_1_server_method wolfTLSv1_1_server_method
-#define TLSv1_1_client_method wolfTLSv1_1_client_method
-#define TLSv1_2_method        wolfTLSv1_2_method
-#define TLSv1_2_server_method wolfTLSv1_2_server_method
-#define TLSv1_2_client_method wolfTLSv1_2_client_method
+#define SSLv23_method                   wolfSSLv23_method
+#define SSLv23_client_method            wolfSSLv23_client_method
+#define SSLv2_client_method             wolfSSLv2_client_method
+#define SSLv2_server_method             wolfSSLv2_server_method
+#define SSLv3_server_method             wolfSSLv3_server_method
+#define SSLv3_client_method             wolfSSLv3_client_method
+#define TLS_client_method               wolfTLS_client_method
+#define TLS_server_method               wolfTLS_server_method
+#define TLSv1_method                    wolfTLSv1_method
+#define TLSv1_server_method             wolfTLSv1_server_method
+#define TLSv1_client_method             wolfTLSv1_client_method
+#define TLSv1_1_method                  wolfTLSv1_1_method
+#define TLSv1_1_server_method           wolfTLSv1_1_server_method
+#define TLSv1_1_client_method           wolfTLSv1_1_client_method
+#define TLSv1_2_method                  wolfTLSv1_2_method
+#define TLSv1_2_server_method           wolfTLSv1_2_server_method
+#define TLSv1_2_client_method           wolfTLSv1_2_client_method
+#define TLSv1_3_method                  wolfTLSv1_3_method
+#define TLSv1_3_server_method           wolfTLSv1_3_server_method
+#define TLSv1_3_client_method           wolfTLSv1_3_client_method
+#define TLS_method                      wolfSSLv23_method
 
 #define X509_FILETYPE_ASN1 SSL_FILETYPE_ASN1
 
+#define X509_F_X509_CHECK_PRIVATE_KEY   128
+
 #ifdef WOLFSSL_DTLS
-    #define DTLSv1_client_method wolfDTLSv1_client_method
-    #define DTLSv1_server_method wolfDTLSv1_server_method
-    #define DTLSv1_2_client_method wolfDTLSv1_2_client_method
-    #define DTLSv1_2_server_method wolfDTLSv1_2_server_method
+    #define DTLSv1_client_method        wolfDTLSv1_client_method
+    #define DTLSv1_server_method        wolfDTLSv1_server_method
+    #define DTLSv1_2_client_method      wolfDTLSv1_2_client_method
+    #define DTLSv1_2_server_method      wolfDTLSv1_2_server_method
+    #define DTLS_method                 wolfDTLS_method
 #endif
 
 
 #ifndef NO_FILESYSTEM
-    #define SSL_CTX_use_certificate_file wolfSSL_CTX_use_certificate_file
-    #define SSL_CTX_use_PrivateKey_file wolfSSL_CTX_use_PrivateKey_file
-    #define SSL_CTX_load_verify_locations wolfSSL_CTX_load_verify_locations
+    #define SSL_CTX_use_certificate_file      wolfSSL_CTX_use_certificate_file
+    #define SSL_CTX_use_PrivateKey_file       wolfSSL_CTX_use_PrivateKey_file
+#ifdef WOLFSSL_APACHE_HTTPD
+    #define SSL_CTX_load_verify_locations(ctx,file,path) \
+        wolfSSL_CTX_load_verify_locations_ex(ctx,file,path,\
+                                                   WOLFSSL_LOAD_FLAG_IGNORE_ERR)
+#else
+    #define SSL_CTX_load_verify_locations     wolfSSL_CTX_load_verify_locations
+#endif
     #define SSL_CTX_use_certificate_chain_file wolfSSL_CTX_use_certificate_chain_file
-    #define SSL_CTX_use_RSAPrivateKey_file wolfSSL_CTX_use_RSAPrivateKey_file
+    #define SSL_CTX_use_RSAPrivateKey_file    wolfSSL_CTX_use_RSAPrivateKey_file
 
-    #define SSL_use_certificate_file wolfSSL_use_certificate_file
-    #define SSL_use_PrivateKey_file wolfSSL_use_PrivateKey_file
-    #define SSL_use_certificate_chain_file wolfSSL_use_certificate_chain_file
-    #define SSL_use_RSAPrivateKey_file wolfSSL_use_RSAPrivateKey_file
+    #define SSL_use_certificate_file          wolfSSL_use_certificate_file
+    #define SSL_use_PrivateKey_file           wolfSSL_use_PrivateKey_file
+    #define SSL_use_certificate_chain_file    wolfSSL_use_certificate_chain_file
+    #define SSL_use_RSAPrivateKey_file        wolfSSL_use_RSAPrivateKey_file
 #endif
 
-#define SSL_CTX_new wolfSSL_CTX_new
-#define SSL_new     wolfSSL_new
-#define SSL_set_fd  wolfSSL_set_fd
-#define SSL_get_fd  wolfSSL_get_fd
-#define SSL_connect wolfSSL_connect
-#define SSL_clear   wolfSSL_clear
-#define SSL_state   wolfSSL_state
+#define SSL_CTX_new(method)             wolfSSL_CTX_new((WOLFSSL_METHOD*)(method))
+#ifdef OPENSSL_EXTRA
+#define SSL_CTX_up_ref                  wolfSSL_CTX_up_ref
+#endif
+#define SSL_new                         wolfSSL_new
+#define SSL_set_fd                      wolfSSL_set_fd
+#define SSL_get_fd                      wolfSSL_get_fd
+#define SSL_connect                     wolfSSL_connect
+#define SSL_clear                       wolfSSL_clear
+#define SSL_state                       wolfSSL_state
 
-#define SSL_write    wolfSSL_write
-#define SSL_read     wolfSSL_read
-#define SSL_peek     wolfSSL_peek
-#define SSL_accept   wolfSSL_accept
-#define SSL_CTX_free wolfSSL_CTX_free
-#define SSL_free     wolfSSL_free
-#define SSL_shutdown wolfSSL_shutdown
+#define SSL_write                       wolfSSL_write
+#define SSL_read                        wolfSSL_read
+#define SSL_peek                        wolfSSL_peek
+#define SSL_accept                      wolfSSL_accept
+#define SSL_CTX_free                    wolfSSL_CTX_free
+#define SSL_free                        wolfSSL_free
+#define SSL_shutdown                    wolfSSL_shutdown
+#define SSL_set_timeout                 wolfSSL_set_timeout
 
-#define SSL_CTX_set_quiet_shutdown wolfSSL_CTX_set_quiet_shutdown
-#define SSL_set_quiet_shutdown wolfSSL_set_quiet_shutdown
-#define SSL_get_error wolfSSL_get_error
-#define SSL_set_session wolfSSL_set_session
-#define SSL_get_session wolfSSL_get_session
-#define SSL_flush_sessions wolfSSL_flush_sessions
+#define SSL_CTX_set_quiet_shutdown      wolfSSL_CTX_set_quiet_shutdown
+#define SSL_set_quiet_shutdown          wolfSSL_set_quiet_shutdown
+#define SSL_get_error                   wolfSSL_get_error
+#define SSL_set_session                 wolfSSL_set_session
+#define SSL_get_session(x)              wolfSSL_get_session((WOLFSSL*) (x))
+#define SSL_SESSION_get0_peer           wolfSSL_SESSION_get0_peer
+#define SSL_flush_sessions              wolfSSL_flush_sessions
 /* assume unlimited temporarily */
 #define SSL_CTX_get_session_cache_mode(ctx) 0
 
-#define SSL_CTX_set_verify wolfSSL_CTX_set_verify
-#define SSL_set_verify wolfSSL_set_verify
-#define SSL_pending wolfSSL_pending
-#define SSL_load_error_strings wolfSSL_load_error_strings
-#define SSL_library_init wolfSSL_library_init
-#define SSL_CTX_set_session_cache_mode wolfSSL_CTX_set_session_cache_mode
-#define SSL_CTX_set_cipher_list wolfSSL_CTX_set_cipher_list
-#define SSL_set_cipher_list     wolfSSL_set_cipher_list
-
-#define ERR_error_string wolfSSL_ERR_error_string
-#define ERR_error_string_n wolfSSL_ERR_error_string_n
-#define ERR_reason_error_string wolfSSL_ERR_reason_error_string
+#define SSL_CTX_set_verify              wolfSSL_CTX_set_verify
+#define SSL_CTX_set_cert_verify_callback wolfSSL_CTX_set_cert_verify_callback
+#define SSL_set_verify                  wolfSSL_set_verify
+#define SSL_set_verify_result           wolfSSL_set_verify_result
+#define SSL_pending                     wolfSSL_pending
+#define SSL_load_error_strings          wolfSSL_load_error_strings
+#define SSL_library_init                wolfSSL_library_init
+#define OpenSSL_add_ssl_algorithms      wolfSSL_library_init
+#define SSL_CTX_set_session_cache_mode  wolfSSL_CTX_set_session_cache_mode
+#define SSL_CTX_set_cipher_list         wolfSSL_CTX_set_cipher_list
+#define SSL_CTX_set_ciphersuites        wolfSSL_CTX_set_cipher_list
+#define SSL_set_cipher_list             wolfSSL_set_cipher_list
+/* wolfSSL does not support security levels */
+#define SSL_CTX_set_security_level(...)
+/* wolfSSL does not support exporting keying material */
+#define SSL_export_keying_material(...) 0
 
-#define SSL_set_ex_data wolfSSL_set_ex_data
-#define SSL_get_shutdown wolfSSL_get_shutdown
-#define SSL_set_rfd wolfSSL_set_rfd
-#define SSL_set_wfd wolfSSL_set_wfd
-#define SSL_set_shutdown wolfSSL_set_shutdown
-#define SSL_set_session_id_context wolfSSL_set_session_id_context
-#define SSL_set_connect_state wolfSSL_set_connect_state
-#define SSL_set_accept_state wolfSSL_set_accept_state
-#define SSL_session_reused wolfSSL_session_reused
-#define SSL_SESSION_free wolfSSL_SESSION_free
-#define SSL_is_init_finished wolfSSL_is_init_finished
+#define SSL_CTX_set1_groups_list        wolfSSL_CTX_set1_groups_list
+#define SSL_set1_groups_list            wolfSSL_set1_groups_list
 
-#define SSL_get_version        wolfSSL_get_version
-#define SSL_get_current_cipher wolfSSL_get_current_cipher
+#define SSL_set_ex_data                 wolfSSL_set_ex_data
+#define SSL_get_shutdown                wolfSSL_get_shutdown
+#define SSL_set_rfd                     wolfSSL_set_rfd
+#define SSL_set_wfd                     wolfSSL_set_wfd
+#define SSL_set_shutdown                wolfSSL_set_shutdown
+#define SSL_set_session_id_context      wolfSSL_set_session_id_context
+#define SSL_set_connect_state           wolfSSL_set_connect_state
+#define SSL_set_accept_state            wolfSSL_set_accept_state
+#define SSL_session_reused              wolfSSL_session_reused
+#define SSL_SESSION_dup                 wolfSSL_SESSION_dup
+#define SSL_SESSION_free                wolfSSL_SESSION_free
+#define SSL_is_init_finished            wolfSSL_is_init_finished
+
+#define SSL_get_version                 wolfSSL_get_version
+#define SSL_get_current_cipher          wolfSSL_get_current_cipher
 
 /* use wolfSSL_get_cipher_name for its return format */
-#define SSL_get_cipher         wolfSSL_get_cipher_name
-#define SSL_CIPHER_description wolfSSL_CIPHER_description
-#define SSL_CIPHER_get_name    wolfSSL_CIPHER_get_name
-#define SSL_get1_session       wolfSSL_get1_session
+#define SSL_get_cipher                  wolfSSL_get_cipher_name
+#define SSL_CIPHER_description          wolfSSL_CIPHER_description
+#define SSL_CIPHER_get_name             wolfSSL_CIPHER_get_name
+#define SSL_CIPHER_get_version          wolfSSL_CIPHER_get_version
+#define SSL_CIPHER_get_id               wolfSSL_CIPHER_get_id
+#define SSL_CIPHER_get_rfc_name         wolfSSL_CIPHER_get_name
+#define SSL_CIPHER_standard_name        wolfSSL_CIPHER_get_name
+#define SSL_get_cipher_by_value         wolfSSL_get_cipher_by_value
 
-#define SSL_get_keyblock_size wolfSSL_get_keyblock_size
-#define SSL_get_keys          wolfSSL_get_keys
-#define SSL_SESSION_get_master_key        wolfSSL_SESSION_get_master_key
+#define SSL_get1_session                wolfSSL_get1_session
+
+#define SSL_get_keyblock_size           wolfSSL_get_keyblock_size
+#define SSL_get_keys                    wolfSSL_get_keys
+#define SSL_SESSION_get_master_key      wolfSSL_SESSION_get_master_key
 #define SSL_SESSION_get_master_key_length wolfSSL_SESSION_get_master_key_length
 
-#define DSA_dup_DH            wolfSSL_DSA_dup_DH
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    #define SSL_MODE_RELEASE_BUFFERS    0x00000010U
+    #define ASN1_BOOLEAN                WOLFSSL_ASN1_BOOLEAN
+    #define X509_get_ext                wolfSSL_X509_get_ext
+    #define X509_cmp                    wolfSSL_X509_cmp
+    #define X509_EXTENSION_get_object   wolfSSL_X509_EXTENSION_get_object
+    #define X509_EXTENSION_get_critical wolfSSL_X509_EXTENSION_get_critical
+    #define X509_EXTENSION_get_data     wolfSSL_X509_EXTENSION_get_data
+    #define X509_EXTENSION_new          wolfSSL_X509_EXTENSION_new
+    #define X509_EXTENSION_free         wolfSSL_X509_EXTENSION_free
+    #define X509_gmtime_adj             wolfSSL_X509_gmtime_adj
+#endif
+
+#define DSA_dup_DH                      wolfSSL_DSA_dup_DH
+/* wolfSSL does not support DSA as the cert public key */
+#define EVP_PKEY_get0_DSA(...)          NULL
+#define DSA_bits(...)                   0
 
-#define X509_load_certificate_file wolfSSL_X509_load_certificate_file
-#define X509_NAME_get_text_by_NID wolfSSL_X509_NAME_get_text_by_NID
-#define X509_get_ext_d2i wolfSSL_X509_get_ext_d2i
-#define X509_digest wolfSSL_X509_digest
-#define X509_free wolfSSL_X509_free
-#define X509_new  wolfSSL_X509_new
+#define i2d_X509_bio                    wolfSSL_i2d_X509_bio
+#define d2i_X509_bio                    wolfSSL_d2i_X509_bio
+#define d2i_X509_fp                     wolfSSL_d2i_X509_fp
+#define i2d_X509                        wolfSSL_i2d_X509
+#define d2i_X509                        wolfSSL_d2i_X509
+#define PEM_read_bio_X509               wolfSSL_PEM_read_bio_X509
+#define PEM_read_bio_X509_CRL           wolfSSL_PEM_read_bio_X509_CRL
+#define PEM_read_bio_X509_AUX           wolfSSL_PEM_read_bio_X509_AUX
+#define PEM_read_X509                   wolfSSL_PEM_read_X509
+#define PEM_X509_INFO_read_bio          wolfSSL_PEM_X509_INFO_read_bio
+#define PEM_write_bio_X509              wolfSSL_PEM_write_bio_X509
+#define PEM_write_bio_X509_AUX          wolfSSL_PEM_write_bio_X509_AUX
+#define PEM_X509_INFO_read_bio          wolfSSL_PEM_X509_INFO_read_bio
+#define i2d_PrivateKey                  wolfSSL_i2d_PrivateKey
+
+#define i2d_X509_REQ                    wolfSSL_i2d_X509_REQ
+#define X509_REQ_new                    wolfSSL_X509_REQ_new
+#define X509_REQ_free                   wolfSSL_X509_REQ_free
+#define X509_REQ_sign                   wolfSSL_X509_REQ_sign
+#define X509_REQ_add_extensions         wolfSSL_X509_REQ_add_extensions
+#define X509_REQ_set_subject_name       wolfSSL_X509_REQ_set_subject_name
+#define X509_REQ_set_pubkey             wolfSSL_X509_REQ_set_pubkey
+#define PEM_write_bio_X509_REQ          wolfSSL_PEM_write_bio_X509_REQ
 
-#define OCSP_parse_url wolfSSL_OCSP_parse_url
-#define SSLv23_client_method wolfSSLv23_client_method
-#define SSLv2_client_method wolfSSLv2_client_method
-#define SSLv2_server_method wolfSSLv2_server_method
+#define X509_new                        wolfSSL_X509_new
+#define X509_up_ref                     wolfSSL_X509_up_ref
+#define X509_free                       wolfSSL_X509_free
+#define X509_load_certificate_file      wolfSSL_X509_load_certificate_file
+#define X509_digest                     wolfSSL_X509_digest
+#define X509_get_ext_count              wolfSSL_X509_get_ext_count
+#define X509_get_ext_d2i                wolfSSL_X509_get_ext_d2i
+#define X509_get_ext                    wolfSSL_X509_get_ext
+#define X509_get_ext_by_NID             wolfSSL_X509_get_ext_by_NID
+#define X509_get_issuer_name            wolfSSL_X509_get_issuer_name
+#define X509_get_subject_name           wolfSSL_X509_get_subject_name
+#define X509_get_pubkey                 wolfSSL_X509_get_pubkey
+#define X509_get0_pubkey                wolfSSL_X509_get_pubkey
+#define X509_get_notBefore              wolfSSL_X509_get_notBefore
+#define X509_get_notAfter               wolfSSL_X509_get_notAfter
+#define X509_get_serialNumber           wolfSSL_X509_get_serialNumber
+#define X509_get0_pubkey_bitstr         wolfSSL_X509_get0_pubkey_bitstr
+#define X509_get_ex_new_index           wolfSSL_X509_get_ex_new_index
+#define X509_get_ex_data                wolfSSL_X509_get_ex_data
+#define X509_set_ex_data                wolfSSL_X509_set_ex_data
+#define X509_get1_ocsp                  wolfSSL_X509_get1_ocsp
+#ifndef WOLFSSL_HAPROXY
+#define X509_get_version                wolfSSL_X509_get_version
+#endif
+#define X509_get_signature_nid          wolfSSL_X509_get_signature_nid
+#define X509_set_subject_name           wolfSSL_X509_set_subject_name
+#define X509_set_issuer_name            wolfSSL_X509_set_issuer_name
+#define X509_set_pubkey                 wolfSSL_X509_set_pubkey
+#define X509_set_notAfter               wolfSSL_X509_set_notAfter
+#define X509_set_notBefore              wolfSSL_X509_set_notBefore
+#define X509_set_serialNumber           wolfSSL_X509_set_serialNumber
+#define X509_set_version                wolfSSL_X509_set_version
+#define X509_sign                       wolfSSL_X509_sign
+#define X509_print                      wolfSSL_X509_print
+#define X509_print_ex                   wolfSSL_X509_print_ex
+#define X509_verify_cert_error_string   wolfSSL_X509_verify_cert_error_string
+#define X509_verify_cert                wolfSSL_X509_verify_cert
+#define X509_check_private_key          wolfSSL_X509_check_private_key
+#define X509_check_ca                   wolfSSL_X509_check_ca
+#define X509_check_host                 wolfSSL_X509_check_host
+#define X509_email_free                 wolfSSL_X509_email_free
+#define X509_check_issued               wolfSSL_X509_check_issued
+#define X509_dup                        wolfSSL_X509_dup
+
+#define X509_EXTENSION_get_object       wolfSSL_X509_EXTENSION_get_object
+#define X509_EXTENSION_get_data         wolfSSL_X509_EXTENSION_get_data
 
-#define MD4_Init   wolfSSL_MD4_Init
-#define MD4_Update wolfSSL_MD4_Update
-#define MD4_Final  wolfSSL_MD4_Final
+#define sk_X509_new                     wolfSSL_sk_X509_new
+#define sk_X509_new_null                wolfSSL_sk_X509_new
+#define sk_X509_num                     wolfSSL_sk_X509_num
+#define sk_X509_value                   wolfSSL_sk_X509_value
+#define sk_X509_shift                   wolfSSL_sk_X509_shift
+#define sk_X509_push                    wolfSSL_sk_X509_push
+#define sk_X509_pop                     wolfSSL_sk_X509_pop
+#define sk_X509_pop_free                wolfSSL_sk_X509_pop_free
+#define sk_X509_dup                     wolfSSL_sk_X509_dup
+#define sk_X509_free                    wolfSSL_sk_X509_free
+
+#define sk_X509_EXTENSION_num           wolfSSL_sk_X509_EXTENSION_num
+#define sk_X509_EXTENSION_value         wolfSSL_sk_X509_EXTENSION_value
+#define sk_X509_EXTENSION_new_null      wolfSSL_sk_X509_EXTENSION_new_null
+#define sk_X509_EXTENSION_pop_free      wolfSSL_sk_X509_EXTENSION_pop_free
+#define sk_X509_EXTENSION_push          wolfSSL_sk_X509_EXTENSION_push
+#define X509_EXTENSION_free             wolfSSL_X509_EXTENSION_free
+
+#define X509_INFO_new                   wolfSSL_X509_INFO_new
+#define X509_INFO_free                  wolfSSL_X509_INFO_free
+
+#define sk_X509_INFO_new_null           wolfSSL_sk_X509_INFO_new_null
+#define sk_X509_INFO_num                wolfSSL_sk_X509_INFO_num
+#define sk_X509_INFO_value              wolfSSL_sk_X509_INFO_value
+#define sk_X509_INFO_push               wolfSSL_sk_X509_INFO_push
+#define sk_X509_INFO_pop                wolfSSL_sk_X509_INFO_pop
+#define sk_X509_INFO_pop_free           wolfSSL_sk_X509_INFO_pop_free
+#define sk_X509_INFO_free               wolfSSL_sk_X509_INFO_free
+
+#define i2d_X509_NAME                   wolfSSL_i2d_X509_NAME
+#define X509_NAME_new                   wolfSSL_X509_NAME_new
+#define X509_NAME_free                  wolfSSL_X509_NAME_free
+#define X509_NAME_dup                   wolfSSL_X509_NAME_dup
+#define X509_NAME_get_text_by_NID       wolfSSL_X509_NAME_get_text_by_NID
+#define X509_NAME_get_index_by_OBJ      wolfSSL_X509_NAME_get_index_by_OBJ
+#define X509_NAME_cmp                   wolfSSL_X509_NAME_cmp
+#define X509_NAME_ENTRY_new             wolfSSL_X509_NAME_ENTRY_new
+#define X509_NAME_ENTRY_free            wolfSSL_X509_NAME_ENTRY_free
+#define X509_NAME_ENTRY_create_by_NID   wolfSSL_X509_NAME_ENTRY_create_by_NID
+#define X509_NAME_ENTRY_create_by_txt   wolfSSL_X509_NAME_ENTRY_create_by_txt
+#define X509_NAME_add_entry             wolfSSL_X509_NAME_add_entry
+#define X509_NAME_add_entry_by_txt      wolfSSL_X509_NAME_add_entry_by_txt
+#define X509_NAME_add_entry_by_NID      wolfSSL_X509_NAME_add_entry_by_NID
+#define X509_NAME_oneline               wolfSSL_X509_NAME_oneline
+#define X509_NAME_get_index_by_NID      wolfSSL_X509_NAME_get_index_by_NID
+#define X509_NAME_print_ex              wolfSSL_X509_NAME_print_ex
+#define X509_NAME_digest                wolfSSL_X509_NAME_digest
+#define X509_cmp_current_time           wolfSSL_X509_cmp_current_time
+#define X509_cmp_time                   wolfSSL_X509_cmp_time
+#define X509_time_adj                   wolfSSL_X509_time_adj
+#define X509_time_adj_ex                wolfSSL_X509_time_adj_ex
+
+#define sk_ACCESS_DESCRIPTION_num       wolfSSL_sk_ACCESS_DESCRIPTION_num
+#define sk_ACCESS_DESCRIPTION_value     wolfSSL_sk_ACCESS_DESCRIPTION_value
 
-#define BIO_new      wolfSSL_BIO_new
-#define BIO_free     wolfSSL_BIO_free
-#define BIO_free_all wolfSSL_BIO_free_all
-#define BIO_nread0   wolfSSL_BIO_nread0
-#define BIO_nread    wolfSSL_BIO_nread
-#define BIO_read     wolfSSL_BIO_read
-#define BIO_nwrite0  wolfSSL_BIO_nwrite0
-#define BIO_nwrite   wolfSSL_BIO_nwrite
-#define BIO_write    wolfSSL_BIO_write
-#define BIO_push     wolfSSL_BIO_push
-#define BIO_pop      wolfSSL_BIO_pop
-#define BIO_flush    wolfSSL_BIO_flush
-#define BIO_pending  wolfSSL_BIO_pending
+#define sk_X509_NAME_new                wolfSSL_sk_X509_NAME_new
+#define sk_X509_NAME_push               wolfSSL_sk_X509_NAME_push
+#define sk_X509_NAME_find               wolfSSL_sk_X509_NAME_find
+#define sk_X509_NAME_set_cmp_func       wolfSSL_sk_X509_NAME_set_cmp_func
+#define sk_X509_NAME_num                wolfSSL_sk_X509_NAME_num
+#define sk_X509_NAME_value              wolfSSL_sk_X509_NAME_value
+#define sk_X509_NAME_pop                wolfSSL_sk_X509_NAME_pop
+#define sk_X509_NAME_pop_free           wolfSSL_sk_X509_NAME_pop_free
+#define sk_X509_NAME_free               wolfSSL_sk_X509_NAME_free
+
+typedef WOLFSSL_X509_NAME_ENTRY X509_NAME_ENTRY;
+
+#define X509_NAME_entry_count           wolfSSL_X509_NAME_entry_count
+#define X509_NAME_ENTRY_get_object      wolfSSL_X509_NAME_ENTRY_get_object
+#define X509_NAME_get_entry             wolfSSL_X509_NAME_get_entry
+#define X509_NAME_ENTRY_get_data        wolfSSL_X509_NAME_ENTRY_get_data
+#define X509_NAME_ENTRY_get_object      wolfSSL_X509_NAME_ENTRY_get_object
+
+#define X509_V_FLAG_CRL_CHECK     WOLFSSL_CRL_CHECK
+#define X509_V_FLAG_CRL_CHECK_ALL WOLFSSL_CRL_CHECKALL
+
+#define X509_V_FLAG_USE_CHECK_TIME WOLFSSL_USE_CHECK_TIME
+#define X509_V_FLAG_NO_CHECK_TIME  WOLFSSL_NO_CHECK_TIME
+#define X509_CHECK_FLAG_NO_WILDCARDS WOLFSSL_NO_WILDCARDS
 
-#define BIO_get_mem_data wolfSSL_BIO_get_mem_data
-#define BIO_new_mem_buf  wolfSSL_BIO_new_mem_buf
+#define X509_STORE_CTX_get_current_cert wolfSSL_X509_STORE_CTX_get_current_cert
+#define X509_STORE_CTX_set_verify_cb    wolfSSL_X509_STORE_CTX_set_verify_cb
+#define X509_STORE_CTX_new              wolfSSL_X509_STORE_CTX_new
+#define X509_STORE_CTX_free             wolfSSL_X509_STORE_CTX_free
+#define X509_STORE_CTX_get_chain        wolfSSL_X509_STORE_CTX_get_chain
+#define X509_STORE_CTX_get1_chain       wolfSSL_X509_STORE_CTX_get1_chain
+#define X509_STORE_CTX_get_error        wolfSSL_X509_STORE_CTX_get_error
+#define X509_STORE_CTX_get_error_depth  wolfSSL_X509_STORE_CTX_get_error_depth
+#define X509_STORE_CTX_init             wolfSSL_X509_STORE_CTX_init
+#define X509_STORE_CTX_cleanup          wolfSSL_X509_STORE_CTX_cleanup
+#define X509_STORE_CTX_set_error        wolfSSL_X509_STORE_CTX_set_error
+#define X509_STORE_CTX_set_error_depth  wolfSSL_X509_STORE_CTX_set_error_depth
+#define X509_STORE_CTX_get_ex_data      wolfSSL_X509_STORE_CTX_get_ex_data
+#define X509_STORE_CTX_set_ex_data      wolfSSL_X509_STORE_CTX_set_ex_data
+#define X509_STORE_CTX_set_depth        wolfSSL_X509_STORE_CTX_set_depth
+#define X509_STORE_CTX_verify_cb        WOLFSSL_X509_STORE_CTX_verify_cb
+#define X509_STORE_CTX_get0_current_issuer \
+                                      wolfSSL_X509_STORE_CTX_get0_current_issuer
+#define X509_STORE_CTX_get0_store       wolfSSL_X509_STORE_CTX_get0_store
+#define X509_STORE_CTX_get0_cert        wolfSSL_X509_STORE_CTX_get0_cert
+
+#define X509_STORE_set_verify_cb(s, c) \
+wolfSSL_X509_STORE_set_verify_cb((WOLFSSL_X509_STORE *)(s), (WOLFSSL_X509_STORE_CTX_verify_cb)(c))
+#define X509_STORE_set_verify_cb_func(s, c) \
+wolfSSL_X509_STORE_set_verify_cb((WOLFSSL_X509_STORE *)(s), (WOLFSSL_X509_STORE_CTX_verify_cb)(c))
+
+
+#define X509_STORE_new                  wolfSSL_X509_STORE_new
+#define X509_STORE_free                 wolfSSL_X509_STORE_free
+#define X509_STORE_add_lookup           wolfSSL_X509_STORE_add_lookup
+#define X509_STORE_add_cert             wolfSSL_X509_STORE_add_cert
+#define X509_STORE_add_crl              wolfSSL_X509_STORE_add_crl
+#define X509_STORE_set_flags            wolfSSL_X509_STORE_set_flags
+#define X509_STORE_get1_certs           wolfSSL_X509_STORE_get1_certs
+#define X509_STORE_get_by_subject       wolfSSL_X509_STORE_get_by_subject
+#define X509_STORE_CTX_get1_issuer      wolfSSL_X509_STORE_CTX_get1_issuer
+#define X509_STORE_CTX_set_time         wolfSSL_X509_STORE_CTX_set_time
+#define X509_VERIFY_PARAM_set_hostflags wolfSSL_X509_VERIFY_PARAM_set_hostflags
+#define X509_VERIFY_PARAM_set1_host     wolfSSL_X509_VERIFY_PARAM_set1_host
+#define X509_VERIFY_PARAM_set1_ip_asc   wolfSSL_X509_VERIFY_PARAM_set1_ip_asc
+#define X509_STORE_load_locations       wolfSSL_X509_STORE_load_locations
+
+#define X509_LOOKUP_add_dir             wolfSSL_X509_LOOKUP_add_dir
+#define X509_LOOKUP_load_file           wolfSSL_X509_LOOKUP_load_file
+#define X509_LOOKUP_hash_dir            wolfSSL_X509_LOOKUP_hash_dir
+#define X509_LOOKUP_file                wolfSSL_X509_LOOKUP_file
+
+#define d2i_X509_CRL                    wolfSSL_d2i_X509_CRL
+#define d2i_X509_CRL_fp                 wolfSSL_d2i_X509_CRL_fp
+#define PEM_read_X509_CRL               wolfSSL_PEM_read_X509_CRL
 
-#define BIO_f_buffer              wolfSSL_BIO_f_buffer
-#define BIO_set_write_buffer_size wolfSSL_BIO_set_write_buffer_size
-#define BIO_f_ssl                 wolfSSL_BIO_f_ssl
-#define BIO_new_socket            wolfSSL_BIO_new_socket
-#define SSL_set_bio               wolfSSL_set_bio
-#define BIO_eof                   wolfSSL_BIO_eof
-#define BIO_set_ss                wolfSSL_BIO_set_ss
+#define X509_CRL_free                   wolfSSL_X509_CRL_free
+#define X509_CRL_get_lastUpdate         wolfSSL_X509_CRL_get_lastUpdate
+#define X509_CRL_get_nextUpdate         wolfSSL_X509_CRL_get_nextUpdate
+#define X509_CRL_verify                 wolfSSL_X509_CRL_verify
+#define X509_CRL_get_REVOKED            wolfSSL_X509_CRL_get_REVOKED
+
+#define X509_get_X509_PUBKEY            wolfSSL_X509_get_X509_PUBKEY
+#define X509_get0_tbs_sigalg            wolfSSL_X509_get0_tbs_sigalg
+#define X509_PUBKEY_get0_param          wolfSSL_X509_PUBKEY_get0_param
+#define X509_PUBKEY_get                 wolfSSL_X509_PUBKEY_get
+#define X509_PUBKEY_set                 wolfSSL_X509_PUBKEY_set
+#define X509_ALGOR_get0                 wolfSSL_X509_ALGOR_get0
+#define X509_ALGOR_set0                 wolfSSL_X509_ALGOR_set0
 
-#define BIO_s_mem     wolfSSL_BIO_s_mem
-#define BIO_f_base64  wolfSSL_BIO_f_base64
-#define BIO_set_flags wolfSSL_BIO_set_flags
+#define X509_ALGOR_new                  wolfSSL_X509_ALGOR_new
+#define X509_ALGOR_free                 wolfSSL_X509_ALGOR_free
+#define X509_PUBKEY_new                 wolfSSL_X509_PUBKEY_new
+#define X509_PUBKEY_free                wolfSSL_X509_PUBKEY_free
+
+#define sk_X509_REVOKED_num             wolfSSL_sk_X509_REVOKED_num
+#define sk_X509_REVOKED_value           wolfSSL_sk_X509_REVOKED_value
+
+#define X509_OBJECT_free_contents       wolfSSL_X509_OBJECT_free_contents
+#define X509_subject_name_hash          wolfSSL_X509_subject_name_hash
+
+#define X509_check_purpose(...)         0
+
+#define OCSP_parse_url                  wolfSSL_OCSP_parse_url
+
+#define MD4_Init                        wolfSSL_MD4_Init
+#define MD4_Update                      wolfSSL_MD4_Update
+#define MD4_Final                       wolfSSL_MD4_Final
 
-#define SSLeay_add_ssl_algorithms  wolfSSL_add_all_algorithms
-#define SSLeay_add_all_algorithms  wolfSSL_add_all_algorithms
+#define BIO_new                         wolfSSL_BIO_new
+#define BIO_free                        wolfSSL_BIO_free
+#define BIO_vfree                       wolfSSL_BIO_vfree
+#define BIO_free_all                    wolfSSL_BIO_free_all
+#define BIO_nread0                      wolfSSL_BIO_nread0
+#define BIO_nread                       wolfSSL_BIO_nread
+#define BIO_read                        wolfSSL_BIO_read
+#define BIO_nwrite0                     wolfSSL_BIO_nwrite0
+#define BIO_nwrite                      wolfSSL_BIO_nwrite
+#define BIO_write                       wolfSSL_BIO_write
+#define BIO_push                        wolfSSL_BIO_push
+#define BIO_pop                         wolfSSL_BIO_pop
+#define BIO_flush                       wolfSSL_BIO_flush
+#define BIO_pending                     wolfSSL_BIO_pending
+
+#define BIO_get_mem_data                wolfSSL_BIO_get_mem_data
+#define BIO_new_mem_buf                 wolfSSL_BIO_new_mem_buf
 
-#define RAND_screen     wolfSSL_RAND_screen
-#define RAND_file_name  wolfSSL_RAND_file_name
-#define RAND_write_file wolfSSL_RAND_write_file
-#define RAND_load_file  wolfSSL_RAND_load_file
-#define RAND_egd        wolfSSL_RAND_egd
-#define RAND_seed       wolfSSL_RAND_seed
-#define RAND_cleanup    wolfSSL_RAND_Cleanup
-#define RAND_add        wolfSSL_RAND_add
-#define RAND_poll       wolfSSL_RAND_poll
+#define BIO_f_buffer                    wolfSSL_BIO_f_buffer
+#define BIO_set_write_buffer_size       wolfSSL_BIO_set_write_buffer_size
+#define BIO_f_ssl                       wolfSSL_BIO_f_ssl
+#define BIO_new_socket                  wolfSSL_BIO_new_socket
+#define SSL_set_bio                     wolfSSL_set_bio
+#define BIO_set_ssl                     wolfSSL_BIO_set_ssl
+#define BIO_eof                         wolfSSL_BIO_eof
+#define BIO_set_ss                      wolfSSL_BIO_set_ss
+
+#define BIO_f_md                        wolfSSL_BIO_f_md
+#define BIO_get_md_ctx                  wolfSSL_BIO_get_md_ctx
+#define BIO_s_mem                       wolfSSL_BIO_s_mem
+#define BIO_f_base64                    wolfSSL_BIO_f_base64
+#define BIO_set_flags                   wolfSSL_BIO_set_flags
+#define BIO_set_nbio                    wolfSSL_BIO_set_nbio
+
+#define SSLeay_add_ssl_algorithms       wolfSSL_add_all_algorithms
+#define SSLeay_add_all_algorithms       wolfSSL_add_all_algorithms
+
+#define RAND_screen                     wolfSSL_RAND_screen
+#define RAND_file_name                  wolfSSL_RAND_file_name
+#define RAND_write_file                 wolfSSL_RAND_write_file
+#define RAND_load_file                  wolfSSL_RAND_load_file
+#define RAND_egd                        wolfSSL_RAND_egd
+#define RAND_seed                       wolfSSL_RAND_seed
+#define RAND_cleanup                    wolfSSL_RAND_Cleanup
+#define RAND_add                        wolfSSL_RAND_add
+#define RAND_poll                       wolfSSL_RAND_poll
+#define RAND_status                     wolfSSL_RAND_status
+#define RAND_bytes                      wolfSSL_RAND_bytes
+#define RAND_pseudo_bytes               wolfSSL_RAND_pseudo_bytes
 
 #define COMP_zlib                       wolfSSL_COMP_zlib
 #define COMP_rle                        wolfSSL_COMP_rle
 #define SSL_COMP_add_compression_method wolfSSL_COMP_add_compression_method
 
-#define SSL_get_ex_new_index wolfSSL_get_ex_new_index
-
-/* depreciated */
-#define CRYPTO_thread_id       wolfSSL_thread_id
-#define CRYPTO_set_id_callback wolfSSL_set_id_callback
+#define SSL_get_ex_new_index            wolfSSL_get_ex_new_index
+#define RSA_get_ex_new_index            wolfSSL_get_ex_new_index
 
-#define CRYPTO_set_locking_callback wolfSSL_set_locking_callback
-#define CRYPTO_set_dynlock_create_callback wolfSSL_set_dynlock_create_callback
-#define CRYPTO_set_dynlock_lock_callback wolfSSL_set_dynlock_lock_callback
-#define CRYPTO_set_dynlock_destroy_callback wolfSSL_set_dynlock_destroy_callback
-#define CRYPTO_num_locks wolfSSL_num_locks
+#define ASN1_BIT_STRING_new             wolfSSL_ASN1_BIT_STRING_new
+#define ASN1_BIT_STRING_free            wolfSSL_ASN1_BIT_STRING_free
+#define ASN1_BIT_STRING_get_bit         wolfSSL_ASN1_BIT_STRING_get_bit
+#define ASN1_BIT_STRING_set_bit         wolfSSL_ASN1_BIT_STRING_set_bit
 
-
-#define CRYPTO_LOCK             1
-#define CRYPTO_UNLOCK           2
-#define CRYPTO_READ             4
-#define CRYPTO_WRITE            8
+#define sk_ASN1_OBJECT_free             wolfSSL_sk_ASN1_OBJECT_free
 
-#define X509_STORE_CTX_get_current_cert wolfSSL_X509_STORE_CTX_get_current_cert
-#define X509_STORE_add_cert             wolfSSL_X509_STORE_add_cert
-#define X509_STORE_add_crl              wolfSSL_X509_STORE_add_crl
-#define X509_STORE_set_flags            wolfSSL_X509_STORE_set_flags
-#define X509_STORE_CTX_set_verify_cb    wolfSSL_X509_STORE_CTX_set_verify_cb
-#define X509_STORE_CTX_free             wolfSSL_X509_STORE_CTX_free
-#define X509_STORE_CTX_new              wolfSSL_X509_STORE_CTX_new
-#define X509_STORE_CTX_get_chain        wolfSSL_X509_STORE_CTX_get_chain
-#define X509_STORE_CTX_get_error wolfSSL_X509_STORE_CTX_get_error
-#define X509_STORE_CTX_get_error_depth wolfSSL_X509_STORE_CTX_get_error_depth
+#define ASN1_TIME_free                  wolfSSL_ASN1_TIME_free
+#define ASN1_TIME_adj                   wolfSSL_ASN1_TIME_adj
+#define ASN1_TIME_print                 wolfSSL_ASN1_TIME_print
+#define ASN1_TIME_to_generalizedtime    wolfSSL_ASN1_TIME_to_generalizedtime
+#define ASN1_GENERALIZEDTIME_print      wolfSSL_ASN1_GENERALIZEDTIME_print
+#define ASN1_GENERALIZEDTIME_free       wolfSSL_ASN1_GENERALIZEDTIME_free
+
+#define ASN1_tag2str                    wolfSSL_ASN1_tag2str
 
-#define X509_print                    wolfSSL_X509_print
-#define X509_NAME_cmp                 wolfSSL_X509_NAME_cmp
-#define i2d_X509_NAME                 wolfSSL_i2d_X509_NAME
-#define X509_NAME_ENTRY_free          wolfSSL_X509_NAME_ENTRY_free
-#define X509_NAME_ENTRY_create_by_NID wolfSSL_X509_NAME_ENTRY_create_by_NID
-#define X509_NAME_add_entry           wolfSSL_X509_NAME_add_entry
-#define X509_NAME_oneline             wolfSSL_X509_NAME_oneline
-#define X509_get_issuer_name          wolfSSL_X509_get_issuer_name
-#define X509_get_subject_name         wolfSSL_X509_get_subject_name
-#define X509_verify_cert_error_string wolfSSL_X509_verify_cert_error_string
-#define X509_verify_cert              wolfSSL_X509_verify_cert
-
-#define X509_LOOKUP_add_dir wolfSSL_X509_LOOKUP_add_dir
-#define X509_LOOKUP_load_file wolfSSL_X509_LOOKUP_load_file
-#define X509_LOOKUP_hash_dir wolfSSL_X509_LOOKUP_hash_dir
-#define X509_LOOKUP_file wolfSSL_X509_LOOKUP_file
+#define i2a_ASN1_INTEGER                wolfSSL_i2a_ASN1_INTEGER
+#define i2c_ASN1_INTEGER                wolfSSL_i2c_ASN1_INTEGER
+#define ASN1_INTEGER_new                wolfSSL_ASN1_INTEGER_new
+#define ASN1_INTEGER_free               wolfSSL_ASN1_INTEGER_free
+#define ASN1_INTEGER_cmp                wolfSSL_ASN1_INTEGER_cmp
+#define ASN1_INTEGER_get                wolfSSL_ASN1_INTEGER_get
+#define ASN1_INTEGER_set                wolfSSL_ASN1_INTEGER_set
+#define ASN1_INTEGER_to_BN              wolfSSL_ASN1_INTEGER_to_BN
 
-#define X509_STORE_add_lookup wolfSSL_X509_STORE_add_lookup
-#define X509_STORE_new        wolfSSL_X509_STORE_new
-#define X509_STORE_free       wolfSSL_X509_STORE_free
-#define X509_STORE_get_by_subject wolfSSL_X509_STORE_get_by_subject
-#define X509_STORE_CTX_init wolfSSL_X509_STORE_CTX_init
-#define X509_STORE_CTX_cleanup wolfSSL_X509_STORE_CTX_cleanup
-
-#define X509_CRL_get_lastUpdate wolfSSL_X509_CRL_get_lastUpdate
-#define X509_CRL_get_nextUpdate wolfSSL_X509_CRL_get_nextUpdate
+#define i2a_ASN1_OBJECT                 wolfSSL_i2a_ASN1_OBJECT
 
-#define X509_get_pubkey           wolfSSL_X509_get_pubkey
-#define X509_CRL_verify           wolfSSL_X509_CRL_verify
-#define X509_STORE_CTX_set_error  wolfSSL_X509_STORE_CTX_set_error
-#define X509_OBJECT_free_contents wolfSSL_X509_OBJECT_free_contents
-#define d2i_PUBKEY                wolfSSL_d2i_PUBKEY
-#define X509_cmp_current_time     wolfSSL_X509_cmp_current_time
-#define sk_X509_REVOKED_num       wolfSSL_sk_X509_REVOKED_num
-#define X509_CRL_get_REVOKED      wolfSSL_X509_CRL_get_REVOKED
-#define sk_X509_REVOKED_value     wolfSSL_sk_X509_REVOKED_value
-#define X509_get_notBefore(cert)  (ASN1_TIME*)wolfSSL_X509_notBefore((cert))
-#define X509_get_notAfter(cert)   (ASN1_TIME*)wolfSSL_X509_notAfter((cert))
+#define ASN1_STRING_data                wolfSSL_ASN1_STRING_data
+#define ASN1_STRING_get0_data           wolfSSL_ASN1_STRING_data
+#define ASN1_STRING_length              wolfSSL_ASN1_STRING_length
+#define ASN1_STRING_to_UTF8             wolfSSL_ASN1_STRING_to_UTF8
+#define ASN1_STRING_print_ex            wolfSSL_ASN1_STRING_print_ex
+#define ASN1_STRING_print(x, y)         wolfSSL_ASN1_STRING_print ((WOLFSSL_BIO*)(x), (WOLFSSL_ASN1_STRING*)(y))
+#define d2i_DISPLAYTEXT                 wolfSSL_d2i_DISPLAYTEXT
 
+#define ASN1_UTCTIME_pr                 wolfSSL_ASN1_UTCTIME_pr
 
-#define X509_get_serialNumber wolfSSL_X509_get_serialNumber
+#define ASN1_IA5STRING                  WOLFSSL_ASN1_STRING
 
-#define ASN1_TIME_print              wolfSSL_ASN1_TIME_print
-#define ASN1_GENERALIZEDTIME_print   wolfSSL_ASN1_GENERALIZEDTIME_print
-#define ASN1_TIME_adj                wolfSSL_ASN1_TIME_adj
-#define ASN1_GENERALIZEDTIME_free    wolfSSL_ASN1_GENERALIZEDTIME_free
-#define ASN1_STRING_print_ex         wolfSSL_ASN1_STRING_print_ex
-#define ASN1_tag2str                 wolfSSL_ASN1_tag2str
-#define ASN1_TIME_to_generalizedtime wolfSSL_ASN1_TIME_to_generalizedtime
+#define ASN1_OCTET_STRING               WOLFSSL_ASN1_STRING
+#define ASN1_BOOLEAN                    WOLFSSL_ASN1_BOOLEAN
+
+#define SSL_load_client_CA_file         wolfSSL_load_client_CA_file
 
-#define ASN1_INTEGER_new wolfSSL_ASN1_INTEGER_new
-#define ASN1_INTEGER_free wolfSSL_ASN1_INTEGER_free
-#define ASN1_INTEGER_cmp wolfSSL_ASN1_INTEGER_cmp
-#define ASN1_INTEGER_get wolfSSL_ASN1_INTEGER_get
-#define ASN1_INTEGER_to_BN wolfSSL_ASN1_INTEGER_to_BN
-#define ASN1_STRING_to_UTF8 wolfSSL_ASN1_STRING_to_UTF8
-
-#define SSL_load_client_CA_file wolfSSL_load_client_CA_file
-
-#define SSL_CTX_get_client_CA_list         wolfSSL_SSL_CTX_get_client_CA_list
-#define SSL_CTX_set_client_CA_list         wolfSSL_CTX_set_client_CA_list
-#define SSL_CTX_set_cert_store             wolfSSL_CTX_set_cert_store
-#define SSL_CTX_get_cert_store             wolfSSL_CTX_get_cert_store
-#define X509_STORE_CTX_get_ex_data         wolfSSL_X509_STORE_CTX_get_ex_data
+#define SSL_CTX_get_client_CA_list      wolfSSL_CTX_get_client_CA_list
+#define SSL_CTX_set_client_CA_list      wolfSSL_CTX_set_client_CA_list
+#define SSL_CTX_set_client_cert_cb      wolfSSL_CTX_set_client_cert_cb
+#define SSL_CTX_set_cert_store          wolfSSL_CTX_set_cert_store
+#define SSL_CTX_get_cert_store(x)       wolfSSL_CTX_get_cert_store ((WOLFSSL_CTX*) (x))
+#define SSL_get_client_CA_list          wolfSSL_get_client_CA_list
 #define SSL_get_ex_data_X509_STORE_CTX_idx wolfSSL_get_ex_data_X509_STORE_CTX_idx
-#define SSL_get_ex_data wolfSSL_get_ex_data
+#define SSL_get_ex_data                 wolfSSL_get_ex_data
 
 #define SSL_CTX_set_default_passwd_cb_userdata wolfSSL_CTX_set_default_passwd_cb_userdata
-#define SSL_CTX_set_default_passwd_cb wolfSSL_CTX_set_default_passwd_cb
+#define SSL_CTX_set_default_passwd_cb   wolfSSL_CTX_set_default_passwd_cb
 
-#define SSL_CTX_set_timeout(ctx, to) wolfSSL_CTX_set_timeout(ctx, (unsigned int) to)
-#define SSL_CTX_set_info_callback wolfSSL_CTX_set_info_callback
-#define SSL_CTX_set_alpn_protos   wolfSSL_CTX_set_alpn_protos
-#define ERR_peek_error wolfSSL_ERR_peek_error
-#define ERR_peek_last_error_line  wolfSSL_ERR_peek_last_error_line
-#define ERR_peek_errors_fp         wolfSSL_ERR_peek_errors_fp
-#define ERR_GET_REASON wolfSSL_ERR_GET_REASON
+#define SSL_CTX_set_timeout(ctx, to)    \
+                                 wolfSSL_CTX_set_timeout(ctx, (unsigned int) to)
+#define SSL_CTX_set_info_callback       wolfSSL_CTX_set_info_callback
+#define SSL_CTX_set_alpn_protos         wolfSSL_CTX_set_alpn_protos
+
+#define SSL_alert_type_string           wolfSSL_alert_type_string
+#define SSL_alert_desc_string           wolfSSL_alert_desc_string
+#define SSL_state_string                wolfSSL_state_string
 
-#define SSL_alert_type_string wolfSSL_alert_type_string
-#define SSL_alert_desc_string wolfSSL_alert_desc_string
-#define SSL_state_string wolfSSL_state_string
+#define RSA_free                        wolfSSL_RSA_free
+#define RSA_generate_key                wolfSSL_RSA_generate_key
+#define SSL_CTX_set_tmp_rsa_callback    wolfSSL_CTX_set_tmp_rsa_callback
+#define RSA_print                       wolfSSL_RSA_print
+#define RSA_bits                        wolfSSL_RSA_size
+#define RSA_up_ref                      wolfSSL_RSA_up_ref
+#define RSA_padding_add_PKCS1_PSS       wolfSSL_RSA_padding_add_PKCS1_PSS
+#define RSA_verify_PKCS1_PSS            wolfSSL_RSA_verify_PKCS1_PSS
 
-#define RSA_free wolfSSL_RSA_free
-#define RSA_generate_key wolfSSL_RSA_generate_key
-#define SSL_CTX_set_tmp_rsa_callback wolfSSL_CTX_set_tmp_rsa_callback
+#define PEM_def_callback                wolfSSL_PEM_def_callback
 
-#define PEM_def_callback wolfSSL_PEM_def_callback
-
-#define SSL_CTX_sess_accept wolfSSL_CTX_sess_accept
-#define SSL_CTX_sess_connect wolfSSL_CTX_sess_connect
-#define SSL_CTX_sess_accept_good wolfSSL_CTX_sess_accept_good
-#define SSL_CTX_sess_connect_good wolfSSL_CTX_sess_connect_good
+#define SSL_CTX_sess_accept             wolfSSL_CTX_sess_accept
+#define SSL_CTX_sess_connect            wolfSSL_CTX_sess_connect
+#define SSL_CTX_sess_accept_good        wolfSSL_CTX_sess_accept_good
+#define SSL_CTX_sess_connect_good       wolfSSL_CTX_sess_connect_good
 #define SSL_CTX_sess_accept_renegotiate wolfSSL_CTX_sess_accept_renegotiate
 #define SSL_CTX_sess_connect_renegotiate wolfSSL_CTX_sess_connect_renegotiate
-#define SSL_CTX_sess_hits wolfSSL_CTX_sess_hits
-#define SSL_CTX_sess_cb_hits wolfSSL_CTX_sess_cb_hits
-#define SSL_CTX_sess_cache_full wolfSSL_CTX_sess_cache_full
-#define SSL_CTX_sess_misses wolfSSL_CTX_sess_misses
-#define SSL_CTX_sess_timeouts wolfSSL_CTX_sess_timeouts
-#define SSL_CTX_sess_number wolfSSL_CTX_sess_number
-#define SSL_CTX_sess_get_cache_size wolfSSL_CTX_sess_get_cache_size
+#define SSL_CTX_sess_hits               wolfSSL_CTX_sess_hits
+#define SSL_CTX_sess_cb_hits            wolfSSL_CTX_sess_cb_hits
+#define SSL_CTX_sess_cache_full         wolfSSL_CTX_sess_cache_full
+#define SSL_CTX_sess_misses             wolfSSL_CTX_sess_misses
+#define SSL_CTX_sess_timeouts           wolfSSL_CTX_sess_timeouts
+#define SSL_CTX_sess_number             wolfSSL_CTX_sess_number
+#define SSL_CTX_sess_get_cache_size     wolfSSL_CTX_sess_get_cache_size
 
 
 #define SSL_DEFAULT_CIPHER_LIST WOLFSSL_DEFAULT_CIPHER_LIST
 
 #define SSL_CTX_set_psk_client_callback wolfSSL_CTX_set_psk_client_callback
-#define SSL_set_psk_client_callback wolfSSL_set_psk_client_callback
+#define SSL_set_psk_client_callback     wolfSSL_set_psk_client_callback
 
-#define SSL_get_psk_identity_hint wolfSSL_get_psk_identity_hint
-#define SSL_get_psk_identity wolfSSL_get_psk_identity
+#define SSL_get_psk_identity_hint       wolfSSL_get_psk_identity_hint
+#define SSL_get_psk_identity            wolfSSL_get_psk_identity
 
-#define SSL_CTX_use_psk_identity_hint wolfSSL_CTX_use_psk_identity_hint
-#define SSL_use_psk_identity_hint wolfSSL_use_psk_identity_hint
+#define SSL_CTX_use_psk_identity_hint   wolfSSL_CTX_use_psk_identity_hint
+#define SSL_use_psk_identity_hint       wolfSSL_use_psk_identity_hint
 
 #define SSL_CTX_set_psk_server_callback wolfSSL_CTX_set_psk_server_callback
-#define SSL_set_psk_server_callback wolfSSL_set_psk_server_callback
+#define SSL_set_psk_server_callback     wolfSSL_set_psk_server_callback
 
 /* system file ints for ERR_put_error */
 #define SYS_F_ACCEPT      WOLFSSL_SYS_ACCEPT
@@ -465,265 +767,235 @@
 #define SYS_F_IOCTLSOCKET    WOLFSSL_SYS_IOCTLSOCKET
 #define SYS_F_LISTEN         WOLFSSL_SYS_LISTEN
 
-#define ERR_put_error           wolfSSL_ERR_put_error
-#define ERR_get_error_line      wolfSSL_ERR_get_error_line
-#define ERR_get_error_line_data wolfSSL_ERR_get_error_line_data
-
-#define ERR_get_error wolfSSL_ERR_get_error
-#define ERR_clear_error wolfSSL_ERR_clear_error
+#define ERR_GET_LIB                     wolfSSL_ERR_GET_LIB
+#define ERR_GET_REASON                  wolfSSL_ERR_GET_REASON
 
-#define RAND_status wolfSSL_RAND_status
-#define RAND_bytes wolfSSL_RAND_bytes
-#define RAND_pseudo_bytes wolfSSL_RAND_pseudo_bytes
-#define SSLv23_server_method  wolfSSLv23_server_method
-#define SSL_CTX_set_options   wolfSSL_CTX_set_options
-#define SSL_CTX_get_options   wolfSSL_CTX_get_options
-#define SSL_CTX_clear_options wolfSSL_CTX_clear_options
+#define ERR_put_error                   wolfSSL_ERR_put_error
+#define ERR_peek_error                  wolfSSL_ERR_peek_error
+#define ERR_peek_errors_fp              wolfSSL_ERR_peek_errors_fp
+#define ERR_peek_error_line_data        wolfSSL_ERR_peek_error_line_data
+#define ERR_peek_last_error             wolfSSL_ERR_peek_last_error
+#define ERR_peek_last_error_line        wolfSSL_ERR_peek_last_error_line
+#define ERR_get_error_line              wolfSSL_ERR_get_error_line
+#define ERR_get_error_line_data         wolfSSL_ERR_get_error_line_data
+#define ERR_get_error                   wolfSSL_ERR_get_error
+#define ERR_print_errors_fp(file)       wolfSSL_ERR_dump_errors_fp((file))
+#define ERR_print_errors_cb             wolfSSL_ERR_print_errors_cb
+#define ERR_print_errors                wolfSSL_ERR_print_errors
+#define ERR_clear_error                 wolfSSL_ERR_clear_error
+#define ERR_free_strings                wolfSSL_ERR_free_strings
+#define ERR_remove_state                wolfSSL_ERR_remove_state
+#define ERR_remove_thread_state         wolfSSL_ERR_remove_thread_state
+#define ERR_error_string                wolfSSL_ERR_error_string
+#define ERR_error_string_n              wolfSSL_ERR_error_string_n
+#define ERR_reason_error_string         wolfSSL_ERR_reason_error_string
+#define ERR_load_BIO_strings            wolfSSL_ERR_load_BIO_strings
 
-#define SSL_CTX_check_private_key wolfSSL_CTX_check_private_key
-#define SSL_check_private_key     wolfSSL_check_private_key
-
-#define ERR_free_strings wolfSSL_ERR_free_strings
-#define ERR_remove_state wolfSSL_ERR_remove_state
+#ifndef WOLFCRYPT_ONLY
+#define PEMerr(func, reason)            wolfSSL_ERR_put_error(ERR_LIB_PEM, \
+                                        (func), (reason), __FILE__, __LINE__)
+#else
+#define PEMerr(func, reason)            WOLFSSL_ERROR_LINE((reason), \
+                                        NULL, __LINE__, __FILE__, NULL)
+#endif
 
-#define CRYPTO_cleanup_all_ex_data wolfSSL_cleanup_all_ex_data
-#define SSL_CTX_set_mode wolfSSL_CTX_set_mode
-#define SSL_CTX_get_mode wolfSSL_CTX_get_mode
-#define SSL_CTX_set_default_read_ahead wolfSSL_CTX_set_default_read_ahead
+#define SSLv23_server_method            wolfSSLv23_server_method
+#define SSL_CTX_set_options             wolfSSL_CTX_set_options
+#define SSL_CTX_get_options             wolfSSL_CTX_get_options
+#define SSL_CTX_clear_options           wolfSSL_CTX_clear_options
 
-#define SSL_CTX_sess_set_cache_size wolfSSL_CTX_sess_set_cache_size
+#define SSL_CTX_check_private_key       wolfSSL_CTX_check_private_key
+#define SSL_check_private_key           wolfSSL_check_private_key
+
+#define SSL_CTX_set_mode                wolfSSL_CTX_set_mode
+#define SSL_CTX_get_mode                wolfSSL_CTX_get_mode
+#define SSL_CTX_set_default_read_ahead  wolfSSL_CTX_set_default_read_ahead
+
+#define SSL_CTX_sess_set_cache_size     wolfSSL_CTX_sess_set_cache_size
 #define SSL_CTX_set_default_verify_paths wolfSSL_CTX_set_default_verify_paths
 
-#define SSL_CTX_set_session_id_context wolfSSL_CTX_set_session_id_context
-#define SSL_get_peer_certificate wolfSSL_get_peer_certificate
-#define SSL_get_peer_cert_chain  wolfSSL_get_peer_cert_chain
+#define SSL_CTX_set_session_id_context  wolfSSL_CTX_set_session_id_context
+#define SSL_get_peer_certificate        wolfSSL_get_peer_certificate
+#define SSL_get_peer_cert_chain         wolfSSL_get_peer_cert_chain
+
+#define SSL_want                        wolfSSL_want
+#define SSL_want_read                   wolfSSL_want_read
+#define SSL_want_write                  wolfSSL_want_write
 
-#define SSL_want_read wolfSSL_want_read
-#define SSL_want_write wolfSSL_want_write
+#define BIO_prf                         wolfSSL_BIO_prf
 
-#define BIO_prf wolfSSL_BIO_prf
-#define ASN1_UTCTIME_pr wolfSSL_ASN1_UTCTIME_pr
+#define sk_num                          wolfSSL_sk_num
+#define sk_ASN1_OBJECT_num              wolfSSL_sk_num
+#define sk_value                        wolfSSL_sk_value
+#define sk_ASN1_OBJECT_value            wolfSSL_sk_value
+
+#define d2i_PKCS12_bio                  wolfSSL_d2i_PKCS12_bio
+#define d2i_PKCS12_fp                   wolfSSL_d2i_PKCS12_fp
+#define i2d_PKCS12_bio                  wolfSSL_i2d_PKCS12_bio
 
-#define sk_num wolfSSL_sk_num
-#define sk_value wolfSSL_sk_value
-#define sk_X509_pop  wolfSSL_sk_X509_pop
-#define sk_X509_free wolfSSL_sk_X509_free
-#define i2d_X509_bio wolfSSL_i2d_X509_bio
-#define d2i_X509_bio wolfSSL_d2i_X509_bio
-#define d2i_X509_fp wolfSSL_d2i_X509_fp
-#define i2d_X509     wolfSSL_i2d_X509
-#define d2i_X509     wolfSSL_d2i_X509
-#define d2i_PKCS12_bio   wolfSSL_d2i_PKCS12_bio
-#define d2i_PKCS12_fp   wolfSSL_d2i_PKCS12_fp
-#define d2i_RSAPublicKey wolfSSL_d2i_RSAPublicKey
-#define d2i_RSAPrivateKey wolfSSL_d2i_RSAPrivateKey
-#define i2d_RSAPrivateKey wolfSSL_i2d_RSAPrivateKey
-#define i2d_RSAPublicKey wolfSSL_i2d_RSAPublicKey
-#define d2i_X509_CRL wolfSSL_d2i_X509_CRL
-#define d2i_X509_CRL_fp wolfSSL_d2i_X509_CRL_fp
-#define X509_CRL_free wolfSSL_X509_CRL_free
+#define d2i_RSAPublicKey                wolfSSL_d2i_RSAPublicKey
+#define d2i_RSAPrivateKey               wolfSSL_d2i_RSAPrivateKey
+#define i2d_RSAPrivateKey               wolfSSL_i2d_RSAPrivateKey
+#define i2d_RSAPublicKey                wolfSSL_i2d_RSAPublicKey
+
+#define SSL_CTX_get_ex_data             wolfSSL_CTX_get_ex_data
+#define SSL_CTX_set_ex_data             wolfSSL_CTX_set_ex_data
+#define SSL_CTX_sess_set_get_cb         wolfSSL_CTX_sess_set_get_cb
+#define SSL_CTX_sess_set_new_cb         wolfSSL_CTX_sess_set_new_cb
+#define SSL_CTX_sess_set_remove_cb      wolfSSL_CTX_sess_set_remove_cb
 
-#define SSL_CTX_get_ex_data wolfSSL_CTX_get_ex_data
-#define SSL_CTX_set_ex_data wolfSSL_CTX_set_ex_data
-#define SSL_CTX_sess_set_get_cb wolfSSL_CTX_sess_set_get_cb
-#define SSL_CTX_sess_set_new_cb wolfSSL_CTX_sess_set_new_cb
-#define SSL_CTX_sess_set_remove_cb wolfSSL_CTX_sess_set_remove_cb
+#define i2d_SSL_SESSION                 wolfSSL_i2d_SSL_SESSION
+#define d2i_SSL_SESSION                 wolfSSL_d2i_SSL_SESSION
+#define SSL_SESSION_set_timeout         wolfSSL_SSL_SESSION_set_timeout
+#define SSL_SESSION_get_timeout         wolfSSL_SESSION_get_timeout
+#define SSL_SESSION_get_time            wolfSSL_SESSION_get_time
 
-#define i2d_SSL_SESSION wolfSSL_i2d_SSL_SESSION
-#define d2i_SSL_SESSION wolfSSL_d2i_SSL_SESSION
-#define SSL_SESSION_set_timeout wolfSSL_SSL_SESSION_set_timeout
-#define SSL_SESSION_get_timeout wolfSSL_SESSION_get_timeout
-#define SSL_SESSION_get_time wolfSSL_SESSION_get_time
-#define SSL_CTX_get_ex_new_index wolfSSL_CTX_get_ex_new_index
-#define PEM_read_bio_X509 wolfSSL_PEM_read_bio_X509
-#define PEM_read_bio_X509_AUX wolfSSL_PEM_read_bio_X509_AUX
-#define PEM_read_X509_CRL wolfSSL_PEM_read_X509_CRL
+#define SSL_CTX_get_ex_new_index        wolfSSL_CTX_get_ex_new_index
+#define PEM_read                        wolfSSL_PEM_read
+#define PEM_write                       wolfSSL_PEM_write
+#define PEM_get_EVP_CIPHER_INFO         wolfSSL_PEM_get_EVP_CIPHER_INFO
+#define PEM_do_header                   wolfSSL_PEM_do_header
 
 /*#if OPENSSL_API_COMPAT < 0x10100000L*/
 #define CONF_modules_free()
 #define ENGINE_cleanup()
-#define HMAC_CTX_cleanup wolfSSL_HMAC_cleanup
-#define SSL_CTX_need_tmp_RSA(ctx)            0
-#define SSL_CTX_set_tmp_rsa(ctx,rsa)         1
-#define SSL_need_tmp_RSA(ssl)                0
-#define SSL_set_tmp_rsa(ssl,rsa)             1
+#define HMAC_CTX_cleanup                wolfSSL_HMAC_cleanup
+#define SSL_CTX_need_tmp_RSA(ctx)       0
+#define SSL_CTX_set_tmp_rsa(ctx,rsa)    1
+#define SSL_need_tmp_RSA(ssl)           0
+#define SSL_set_tmp_rsa(ssl,rsa)        1
 /*#endif*/
 
 #define CONF_modules_unload(a)
+#define CONF_get1_default_config_file wolfSSL_CONF_get1_default_config_file
 
-#define SSL_get_hit wolfSSL_session_reused
+#define SSL_get_hit                     wolfSSL_session_reused
 
 /* yassl had set the default to be 500 */
-#define SSL_get_default_timeout(ctx) 500
+#define SSL_get_default_timeout(ctx)    500
 
-#define X509_NAME_free wolfSSL_X509_NAME_free
-#define X509_NAME_new  wolfSSL_X509_NAME_new
+#define DTLSv1_get_timeout(ssl, timeleft)   wolfSSL_DTLSv1_get_timeout((ssl), (WOLFSSL_TIMEVAL*)(timeleft))
+#define DTLSv1_handle_timeout               wolfSSL_DTLSv1_handle_timeout
+#define DTLSv1_set_initial_timeout_duration wolfSSL_DTLSv1_set_initial_timeout_duration
 
-    typedef WOLFSSL_X509_NAME_ENTRY X509_NAME_ENTRY;
+#ifndef NO_WOLFSSL_STUB
+#define SSL_CTX_set_current_time_cb(ssl, cb) ({ (void)ssl; (void)cb; })
+#endif
 
-#define SSL_CTX_use_certificate wolfSSL_CTX_use_certificate
-#define SSL_CTX_use_PrivateKey wolfSSL_CTX_use_PrivateKey
-#define BIO_read_filename wolfSSL_BIO_read_filename
-#define BIO_s_file wolfSSL_BIO_s_file
-#define SSL_CTX_set_verify_depth wolfSSL_CTX_set_verify_depth
-#define SSL_set_verify_depth wolfSSL_set_verify_depth
-#define SSL_get_app_data wolfSSL_get_app_data
-#define SSL_set_app_data wolfSSL_set_app_data
-#define X509_NAME_entry_count wolfSSL_X509_NAME_entry_count
-#define X509_NAME_ENTRY_get_object wolfSSL_X509_NAME_ENTRY_get_object
-#define X509_NAME_get_entry wolfSSL_X509_NAME_get_entry
-#define ASN1_STRING_data wolfSSL_ASN1_STRING_data
-#define ASN1_STRING_length wolfSSL_ASN1_STRING_length
-#define X509_NAME_get_index_by_NID wolfSSL_X509_NAME_get_index_by_NID
-#define X509_NAME_ENTRY_get_data wolfSSL_X509_NAME_ENTRY_get_data
-#define sk_X509_NAME_pop_free  wolfSSL_sk_X509_NAME_pop_free
-#define SHA1 wolfSSL_SHA1
+#define SSL_CTX_use_certificate         wolfSSL_CTX_use_certificate
+#define SSL_CTX_add1_chain_cert         wolfSSL_CTX_add1_chain_cert
+#define SSL_CTX_use_PrivateKey          wolfSSL_CTX_use_PrivateKey
+#define BIO_read_filename               wolfSSL_BIO_read_filename
+#define SSL_CTX_set_verify_depth        wolfSSL_CTX_set_verify_depth
+#define SSL_set_verify_depth            wolfSSL_set_verify_depth
+#define SSL_get_app_data                wolfSSL_get_app_data
+#define SSL_set_app_data                wolfSSL_set_app_data
+#define SHA1                            wolfSSL_SHA1
 
-#define X509_check_private_key wolfSSL_X509_check_private_key
-#define SSL_dup_CA_list wolfSSL_dup_CA_list
-#define X509_check_ca   wolfSSL_X509_check_ca
-
+#define SSL_dup_CA_list                 wolfSSL_dup_CA_list
 
-/* NIDs */
-enum {
-    NID_des    = 66,
-    NID_des3   = 67,
-    NID_sha256 = 672,
-    NID_sha384 = 673,
-    NID_sha512 = 674,
-    NID_hw_name_oid = 73,
-    NID_id_pkix_OCSP_basic = 74,
-    NID_any_policy = 75,
-    NID_anyExtendedKeyUsage = 76,
-};
+#define sk_X509_NAME_find               wolfSSL_sk_X509_NAME_find
 
 enum {
     GEN_DNS   = 0x02, /* ASN_DNS_TYPE */
     GEN_EMAIL = 0x01, /* ASN_RFC822_TYPE */
-    GEN_URI   = 0x06  /* ASN_URI_TYPE */
+    GEN_URI   = 0x06, /* ASN_URI_TYPE */
+    GEN_IPADD = 0x07,
+    GEN_RID   = 0x08, /* Registered ID, not supported */
 };
 
-#define PEM_write_bio_X509_REQ wolfSSL_PEM_write_bio_X509_REQ
-#define PEM_write_bio_X509_AUX wolfSSL_PEM_write_bio_X509_AUX
-
-#define PEM_read_bio_DHparams wolfSSL_PEM_read_bio_DHparams
-#define PEM_read_bio_DSAparams wolfSSL_PEM_read_bio_DSAparams
-#define PEM_write_bio_X509     wolfSSL_PEM_write_bio_X509
-#define PEM_write_bio_X509_REQ wolfSSL_PEM_write_bio_X509_REQ
-#define PEM_write_bio_X509_AUX wolfSSL_PEM_write_bio_X509_AUX
+#define PEM_read_bio_DHparams           wolfSSL_PEM_read_bio_DHparams
+#define PEM_read_bio_DSAparams          wolfSSL_PEM_read_bio_DSAparams
 
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_HAPROXY)
-#define SSL_get_rbio                      wolfSSL_SSL_get_rbio
-#define SSL_get_wbio                      wolfSSL_SSL_get_wbio
-#define SSL_do_handshake                  wolfSSL_SSL_do_handshake
-#define SSL_get_ciphers(x)                wolfSSL_get_ciphers_compat(x)
-#define SSL_SESSION_get_id                wolfSSL_SESSION_get_id
-#define ASN1_STRING_get0_data             wolfSSL_ASN1_STRING_data
-#define SSL_get_cipher_bits(s,np)         wolfSSL_CIPHER_get_bits(SSL_get_current_cipher(s),np)
-#define sk_SSL_CIPHER_num                 wolfSSL_sk_SSL_CIPHER_num
-#define sk_SSL_COMP_zero                  wolfSSL_sk_SSL_COMP_zero
-#define sk_SSL_CIPHER_value               wolfSSL_sk_SSL_CIPHER_value
+#define SSL_get_rbio                    wolfSSL_SSL_get_rbio
+#define SSL_get_wbio                    wolfSSL_SSL_get_wbio
+#define SSL_do_handshake                wolfSSL_SSL_do_handshake
+#define SSL_get_ciphers(x)              wolfSSL_get_ciphers_compat(x)
+#define SSL_SESSION_get_id              wolfSSL_SESSION_get_id
+#define SSL_get_cipher_bits(s,np)       \
+                          wolfSSL_CIPHER_get_bits(SSL_get_current_cipher(s),np)
+#define sk_SSL_CIPHER_num               wolfSSL_sk_SSL_CIPHER_num
+#define sk_SSL_COMP_zero                wolfSSL_sk_SSL_COMP_zero
+#define sk_SSL_CIPHER_value             wolfSSL_sk_SSL_CIPHER_value
 #endif /* OPENSSL_ALL || WOLFSSL_HAPROXY */
+#define sk_SSL_CIPHER_dup               wolfSSL_sk_SSL_CIPHER_dup
+#define sk_SSL_CIPHER_free              wolfSSL_sk_SSL_CIPHER_free
+#define sk_SSL_CIPHER_find              wolfSSL_sk_SSL_CIPHER_find
 
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO)
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO) || defined(WOLFSSL_HAPROXY) \
+    || defined(WOLFSSL_NGINX)
 #include <wolfssl/openssl/pem.h>
 
-typedef STACK_OF(WOLFSSL_ASN1_OBJECT) GENERAL_NAMES;
 #define SSL_CTRL_CHAIN       88
-#define BIO_CTRL_WPENDING    13
 #define GEN_IPADD            7
 #define ERR_LIB_SSL          20
 #define SSL_R_SHORT_READ     10
 #define ERR_R_PEM_LIB        9
 #define V_ASN1_IA5STRING     22
-#define SSL_CTRL_MODE        33       
+#define V_ASN1_UTF8STRING    12
+#define SSL_CTRL_MODE        33
+
+#define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS        83
 
-#define SSL_CTX_clear_chain_certs(ctx)   SSL_CTX_set0_chain(ctx,NULL)
-#define d2i_RSAPrivateKey_bio            wolfSSL_d2i_RSAPrivateKey_bio
-#define SSL_CTX_use_RSAPrivateKey        wolfSSL_CTX_use_RSAPrivateKey
-#define d2i_PrivateKey_bio               wolfSSL_d2i_PrivateKey_bio
-#define ASN1_IA5STRING                   WOLFSSL_ASN1_STRING
-#define ASN1_OCTET_STRING                WOLFSSL_ASN1_STRING
-#define BIO_new_bio_pair                 wolfSSL_BIO_new_bio_pair
-#define SSL_get_verify_callback          wolfSSL_get_verify_callback
-#define GENERAL_NAMES_free(GENERAL_NAMES)NULL
-
-#define SSL_set_mode(ssl,op) wolfSSL_ctrl((ssl),SSL_CTRL_MODE,(op),NULL)
-#define BIO_wpending(b)      wolfSSL_BIO_ctrl(b,BIO_CTRL_WPENDING,0,NULL)
-#define SSL_CTX_use_certificate_ASN1 wolfSSL_CTX_use_certificate_ASN1
-#define SSL_CTX_set0_chain(ctx,sk) \
-        wolfSSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)(sk))
-#define SSL_CTX_get_app_data(ctx)        wolfSSL_CTX_get_ex_data(ctx,0)
-#define SSL_CTX_set_app_data(ctx,arg)    wolfSSL_CTX_set_ex_data(ctx,0, \
-                                                              (char *)(arg))
-#endif /* OPENSSL_ALL || WOLFSSL_ASIO */
+#define SSL_CTX_clear_chain_certs(ctx) SSL_CTX_set0_chain(ctx,NULL)
+#define d2i_RSAPrivateKey_bio           wolfSSL_d2i_RSAPrivateKey_bio
+#define SSL_CTX_use_RSAPrivateKey       wolfSSL_CTX_use_RSAPrivateKey
+#define d2i_PrivateKey_bio              wolfSSL_d2i_PrivateKey_bio
+#define BIO_new_bio_pair                wolfSSL_BIO_new_bio_pair
+#define SSL_get_verify_callback         wolfSSL_get_verify_callback
 
-#define SSL_CTX_set_tmp_dh wolfSSL_CTX_set_tmp_dh
+#define SSL_set_mode(ssl,op)         wolfSSL_ctrl((ssl),SSL_CTRL_MODE,(op),NULL)
 
-#define BIO_new_file        wolfSSL_BIO_new_file
-#define BIO_ctrl            wolfSSL_BIO_ctrl
-#define BIO_ctrl_pending    wolfSSL_BIO_ctrl_pending
-#define BIO_get_mem_ptr     wolfSSL_BIO_get_mem_ptr
-#define BIO_int_ctrl        wolfSSL_BIO_int_ctrl
-#define BIO_reset           wolfSSL_BIO_reset
-#define BIO_s_file          wolfSSL_BIO_s_file
-#define BIO_s_bio           wolfSSL_BIO_s_bio
-#define BIO_s_socket        wolfSSL_BIO_s_socket
-#define BIO_set_fd          wolfSSL_BIO_set_fd
-#define BIO_ctrl_reset_read_request wolfSSL_BIO_ctrl_reset_read_request
+#define SSL_CTX_use_certificate_ASN1    wolfSSL_CTX_use_certificate_ASN1
+#define SSL_CTX_set0_chain(ctx,sk) \
+                             wolfSSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)(sk))
+#define SSL_CTX_get_app_data(ctx)       wolfSSL_CTX_get_ex_data(ctx,0)
+#define SSL_CTX_set_app_data(ctx,arg)   wolfSSL_CTX_set_ex_data(ctx,0, \
+                                                                  (char *)(arg))
+#endif /* OPENSSL_ALL || WOLFSSL_ASIO || WOLFSSL_HAPROXY */
 
-#define BIO_set_write_buf_size wolfSSL_BIO_set_write_buf_size
-#define BIO_make_bio_pair   wolfSSL_BIO_make_bio_pair
-
-#define BIO_set_fp          wolfSSL_BIO_set_fp
-#define BIO_get_fp          wolfSSL_BIO_get_fp
-#define BIO_seek            wolfSSL_BIO_seek
-#define BIO_write_filename  wolfSSL_BIO_write_filename
-#define BIO_set_mem_eof_return wolfSSL_BIO_set_mem_eof_return
+#define SSL_CTX_set_tmp_dh              wolfSSL_CTX_set_tmp_dh
 
 #define TLSEXT_STATUSTYPE_ocsp  1
 
-#define SSL_set_options      wolfSSL_set_options
-#define SSL_get_options      wolfSSL_get_options
-#define SSL_clear_options    wolfSSL_clear_options
-#define SSL_set_tmp_dh       wolfSSL_set_tmp_dh
+#define SSL_set_options                 wolfSSL_set_options
+#define SSL_get_options                 wolfSSL_get_options
+#define SSL_clear_options               wolfSSL_clear_options
+#define SSL_set_tmp_dh                  wolfSSL_set_tmp_dh
 #define SSL_clear_num_renegotiations    wolfSSL_clear_num_renegotiations
-#define SSL_total_renegotiations       wolfSSL_total_renegotiations
+#define SSL_total_renegotiations        wolfSSL_total_renegotiations
+#define SSL_num_renegotiations          wolfSSL_num_renegotiations
+#define SSL_renegotiate                 wolfSSL_Rehandshake
+#define SSL_get_secure_renegotiation_support wolfSSL_SSL_get_secure_renegotiation_support
 #define SSL_set_tlsext_debug_arg        wolfSSL_set_tlsext_debug_arg
 #define SSL_set_tlsext_status_type      wolfSSL_set_tlsext_status_type
 #define SSL_set_tlsext_status_exts      wolfSSL_set_tlsext_status_exts
 #define SSL_get_tlsext_status_ids       wolfSSL_get_tlsext_status_ids
 #define SSL_set_tlsext_status_ids       wolfSSL_set_tlsext_status_ids
-#define SSL_get_tlsext_status_ocsp_resp wolfSSL_get_tlsext_status_ocsp_resp
-#define SSL_set_tlsext_status_ocsp_resp wolfSSL_set_tlsext_status_ocsp_resp
-
-#define SSL_CTX_add_extra_chain_cert wolfSSL_CTX_add_extra_chain_cert
-#define SSL_CTX_get_read_ahead wolfSSL_CTX_get_read_ahead
-#define SSL_CTX_set_read_ahead wolfSSL_CTX_set_read_ahead
-#define SSL_CTX_set_tlsext_status_arg wolfSSL_CTX_set_tlsext_status_arg
-#define SSL_CTX_set_tlsext_opaque_prf_input_callback_arg \
-                   wolfSSL_CTX_set_tlsext_opaque_prf_input_callback_arg
-#define SSL_get_server_random wolfSSL_get_server_random
-
-#define SSL_get_tlsext_status_exts wolfSSL_get_tlsext_status_exts
+#define SSL_get_tlsext_status_ocsp_res  wolfSSL_get_tlsext_status_ocsp_resp
+#define SSL_set_tlsext_status_ocsp_res  wolfSSL_set_tlsext_status_ocsp_resp
+#define SSL_set_tlsext_status_ocsp_resp  wolfSSL_set_tlsext_status_ocsp_resp
+#define SSL_get_tlsext_status_ocsp_resp  wolfSSL_get_tlsext_status_ocsp_resp
 
-#define BIO_C_SET_FILE_PTR                      106
-#define BIO_C_GET_FILE_PTR                      107
-#define BIO_C_SET_FILENAME                      108
-#define BIO_C_FILE_SEEK                         128
-#define BIO_C_SET_BUF_MEM_EOF_RETURN            130
-#define BIO_C_SET_WRITE_BUF_SIZE                136
-#define BIO_C_MAKE_BIO_PAIR                     138
+#define SSL_CTX_add_extra_chain_cert    wolfSSL_CTX_add_extra_chain_cert
+#define SSL_CTX_get_read_ahead          wolfSSL_CTX_get_read_ahead
+#define SSL_CTX_set_read_ahead          wolfSSL_CTX_set_read_ahead
+#define SSL_CTX_set_tlsext_status_arg   wolfSSL_CTX_set_tlsext_status_arg
+#define SSL_CTX_set_tlsext_opaque_prf_input_callback_arg \
+                            wolfSSL_CTX_set_tlsext_opaque_prf_input_callback_arg
+#define SSL_get_server_random           wolfSSL_get_server_random
+#define SSL_get_server_tmp_key          wolfSSL_get_server_tmp_key
 
-#define BIO_CTRL_RESET          1
-#define BIO_CTRL_INFO           3
-#define BIO_CTRL_FLUSH          11
-#define BIO_CLOSE               0x01
-#define BIO_FP_WRITE            0x04
+#define SSL_CTX_set_min_proto_version   wolfSSL_CTX_set_min_proto_version
+#define SSL_CTX_set_max_proto_version   wolfSSL_CTX_set_max_proto_version
+
+#define SSL_get_tlsext_status_exts      wolfSSL_get_tlsext_status_exts
 
 #define SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS         11
 #define SSL_CTRL_GET_TOTAL_RENEGOTIATIONS         12
 #define SSL_CTRL_SET_TMP_DH                       3
+#define SSL_CTRL_SET_TMP_ECDH                     4
 #define SSL_CTRL_SET_TLSEXT_DEBUG_ARG             57
 #define SSL_CTRL_SET_TLSEXT_STATUS_REQ_TYPE       65
 #define SSL_CTRL_GET_TLSEXT_STATUS_REQ_EXTS       66
@@ -733,8 +1005,8 @@
 #define SSL_CTRL_GET_TLSEXT_STATUS_REQ_OCSP_RESP  70
 #define SSL_CTRL_SET_TLSEXT_STATUS_REQ_OCSP_RESP  71
 
-#define SSL_CTRL_SET_TMP_DH                     3
 #define SSL_CTRL_EXTRA_CHAIN_CERT               14
+#define SSL_CTRL_OPTIONS                        32
 
 #define SSL_CTRL_SET_SESS_CACHE_SIZE            42
 #define SSL_CTRL_GET_READ_AHEAD                 40
@@ -744,131 +1016,117 @@
 #define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB_ARG   64
 
 #define SSL_CTRL_GET_EXTRA_CHAIN_CERTS          82
-
-#define SSL_ctrl     wolfSSL_ctrl
-#define SSL_CTX_ctrl wolfSSL_CTX_ctrl
-
-#define X509_V_FLAG_CRL_CHECK     WOLFSSL_CRL_CHECK
-#define X509_V_FLAG_CRL_CHECK_ALL WOLFSSL_CRL_CHECKALL
+#define SSL_CTRL_GET_SESSION_REUSED             0
 
-#define X509_V_FLAG_USE_CHECK_TIME WOLFSSL_USE_CHECK_TIME
-#define X509_V_FLAG_NO_CHECK_TIME  WOLFSSL_NO_CHECK_TIME
-#define X509_CHECK_FLAG_NO_WILDCARDS WOLFSSL_NO_WILDCARDS
+#define SSL_ctrl                        wolfSSL_ctrl
+#define SSL_CTX_ctrl                    wolfSSL_CTX_ctrl
+#define SSL_CTX_callback_ctrl           wolfSSL_CTX_callback_ctrl
 
-#define SSL3_RANDOM_SIZE                 32 /* same as RAN_LEN in internal.h */
-#if defined(HAVE_STUNNEL) || defined(WOLFSSL_NGINX) || defined(OPENSSL_EXTRA) \
-                                                         || defined(OPENSSL_ALL)
-#include <wolfssl/openssl/asn1.h>
+#define SSL3_RANDOM_SIZE                32 /* same as RAN_LEN in internal.h */
 
 #define SSL2_VERSION                     0x0002
 #define SSL3_VERSION                     0x0300
 #define TLS1_VERSION                     0x0301
+#define TLS1_1_VERSION                   0x0302
+#define TLS1_2_VERSION                   0x0303
+#define TLS1_3_VERSION                   0x0304
 #define DTLS1_VERSION                    0xFEFF
+#define DTLS1_2_VERSION                  0xFEFD
+
+#if defined(HAVE_STUNNEL) || defined(WOLFSSL_NGINX) || defined(OPENSSL_EXTRA) \
+                                                         || defined(OPENSSL_ALL)
+#include <wolfssl/openssl/asn1.h>
+
 #define SSL23_ST_SR_CLNT_HELLO_A        (0x210|0x2000)
 #define SSL3_ST_SR_CLNT_HELLO_A         (0x110|0x2000)
+
+#define SSL3_AD_BAD_CERTIFICATE          bad_certificate
+#define SSL_AD_BAD_CERTIFICATE           SSL3_AD_BAD_CERTIFICATE
+
 #define ASN1_STRFLGS_ESC_MSB             4
 
-#define SSL_MAX_MASTER_KEY_LENGTH        WOLFSSL_MAX_MASTER_KEY_LENGTH
+#define SSL_MAX_MASTER_KEY_LENGTH       WOLFSSL_MAX_MASTER_KEY_LENGTH
 
-#define SSL_alert_desc_string_long       wolfSSL_alert_desc_string_long
-#define SSL_alert_type_string_long       wolfSSL_alert_type_string_long
-#define SSL_CIPHER_get_bits              wolfSSL_CIPHER_get_bits
-#define sk_X509_NAME_num                 wolfSSL_sk_X509_NAME_num
-#define sk_GENERAL_NAME_num              wolfSSL_sk_GENERAL_NAME_num
-#define sk_X509_num                      wolfSSL_sk_X509_num
-#define X509_NAME_print_ex               wolfSSL_X509_NAME_print_ex
-#define X509_get0_pubkey_bitstr          wolfSSL_X509_get0_pubkey_bitstr
-#define SSL_CTX_get_options              wolfSSL_CTX_get_options
+#define SSL_alert_desc_string_long      wolfSSL_alert_desc_string_long
+#define SSL_alert_type_string_long      wolfSSL_alert_type_string_long
+#define SSL_CIPHER_get_bits             wolfSSL_CIPHER_get_bits
+#define sk_GENERAL_NAME_num             wolfSSL_sk_GENERAL_NAME_num
+#define SSL_CTX_get_options             wolfSSL_CTX_get_options
 
-#define SSL_CTX_flush_sessions           wolfSSL_flush_sessions
-#define SSL_CTX_add_session              wolfSSL_CTX_add_session
-#define SSL_get_SSL_CTX                  wolfSSL_get_SSL_CTX
-#define SSL_version                      wolfSSL_version
-#define SSL_get_state                    wolfSSL_get_state
-#define SSL_state_string_long            wolfSSL_state_string_long
+#define SSL_CTX_flush_sessions          wolfSSL_flush_sessions
+#define SSL_CTX_add_session             wolfSSL_CTX_add_session
+#define SSL_version(x)                  wolfSSL_version ((WOLFSSL*) (x))
+#define SSL_get_state                   wolfSSL_get_state
+#define SSL_state_string_long           wolfSSL_state_string_long
 
-#define sk_X509_NAME_value               wolfSSL_sk_X509_NAME_value
-#define sk_X509_value                    wolfSSL_sk_X509_value
-#define sk_GENERAL_NAME_value            wolfSSL_sk_GENERAL_NAME_value
-#define SSL_SESSION_get_ex_data          wolfSSL_SESSION_get_ex_data
-#define SSL_SESSION_set_ex_data          wolfSSL_SESSION_set_ex_data
-#define SSL_SESSION_get_ex_new_index     wolfSSL_SESSION_get_ex_new_index
-#define SSL_SESSION_get_id               wolfSSL_SESSION_get_id
-#define CRYPTO_dynlock_value             WOLFSSL_dynlock_value
-typedef WOLFSSL_ASN1_BIT_STRING    ASN1_BIT_STRING;
-#define X509_STORE_get1_certs            wolfSSL_X509_STORE_get1_certs
-#define sk_X509_pop_free                 wolfSSL_sk_X509_pop_free
-#define sk_GENERAL_NAME_pop_free         wolfSSL_sk_GENERAL_NAME_pop_free
-#define GENERAL_NAME_free                NULL
+#define GENERAL_NAME_new                wolfSSL_GENERAL_NAME_new
+#define GENERAL_NAME_free               wolfSSL_GENERAL_NAME_free
+#define sk_GENERAL_NAME_push            wolfSSL_sk_GENERAL_NAME_push
+#define sk_GENERAL_NAME_value           wolfSSL_sk_GENERAL_NAME_value
+#define SSL_SESSION_get_ex_data         wolfSSL_SESSION_get_ex_data
+#define SSL_SESSION_set_ex_data         wolfSSL_SESSION_set_ex_data
+#define SSL_SESSION_get_ex_new_index    wolfSSL_SESSION_get_ex_new_index
+#define SSL_SESSION_get_id              wolfSSL_SESSION_get_id
+#define SSL_SESSION_print               wolfSSL_SESSION_print
+#define sk_GENERAL_NAME_pop_free        wolfSSL_sk_GENERAL_NAME_pop_free
+#define sk_GENERAL_NAME_free            wolfSSL_sk_GENERAL_NAME_free
+#define sk_ASN1_OBJECT_pop_free         wolfSSL_sk_ASN1_OBJECT_pop_free
+#define GENERAL_NAME_free               wolfSSL_GENERAL_NAME_free
+#define GENERAL_NAMES_free              wolfSSL_GENERAL_NAMES_free
 
-#define SSL3_AL_FATAL                        2
-#define SSL_TLSEXT_ERR_OK                    0
-#define SSL_TLSEXT_ERR_ALERT_FATAL           alert_fatal
-#define SSL_TLSEXT_ERR_NOACK                 alert_warning
-#define TLSEXT_NAMETYPE_host_name            WOLFSSL_SNI_HOST_NAME
+#define AUTHORITY_INFO_ACCESS_free      wolfSSL_AUTHORITY_INFO_ACCESS_free
+#define sk_ACCESS_DESCRIPTION_pop_free  wolfSSL_sk_ACCESS_DESCRIPTION_pop_free
+#define sk_ACCESS_DESCRIPTION_free      wolfSSL_sk_ACCESS_DESCRIPTION_free
+#define ACCESS_DESCRIPTION_free         wolfSSL_ACCESS_DESCRIPTION_free
+
+#define SSL3_AL_FATAL                   2
+#define SSL_TLSEXT_ERR_OK               0
+#define SSL_TLSEXT_ERR_ALERT_FATAL      alert_fatal
+#define SSL_TLSEXT_ERR_NOACK            alert_warning
+#define TLSEXT_NAMETYPE_host_name       WOLFSSL_SNI_HOST_NAME
 
-#define SSL_set_tlsext_host_name wolfSSL_set_tlsext_host_name
-#define SSL_get_servername wolfSSL_get_servername
-#define SSL_set_SSL_CTX                  wolfSSL_set_SSL_CTX
-#define SSL_CTX_get_verify_callback      wolfSSL_CTX_get_verify_callback
+#define SSL_set_tlsext_host_name        wolfSSL_set_tlsext_host_name
+#define SSL_get_servername              wolfSSL_get_servername
+#define SSL_set_SSL_CTX                 wolfSSL_set_SSL_CTX
+#define SSL_CTX_get_verify_callback     wolfSSL_CTX_get_verify_callback
 #define SSL_CTX_set_tlsext_servername_callback wolfSSL_CTX_set_tlsext_servername_callback
-#define SSL_CTX_set_tlsext_servername_arg      wolfSSL_CTX_set_servername_arg
+#define SSL_CTX_set_tlsext_servername_arg wolfSSL_CTX_set_servername_arg
 
-#define PSK_MAX_PSK_LEN                      256
-#define PSK_MAX_IDENTITY_LEN                 128
-#define ERR_remove_thread_state wolfSSL_ERR_remove_thread_state
-#define SSL_CTX_clear_options wolfSSL_CTX_clear_options
+#define PSK_MAX_PSK_LEN                 256
+#define PSK_MAX_IDENTITY_LEN            128
+#define SSL_CTX_clear_options           wolfSSL_CTX_clear_options
 
 
 #endif /* HAVE_STUNNEL || WOLFSSL_NGINX */
-#define SSL_CTX_get_default_passwd_cb          wolfSSL_CTX_get_default_passwd_cb
+#define SSL_CTX_get_default_passwd_cb   wolfSSL_CTX_get_default_passwd_cb
 #define SSL_CTX_get_default_passwd_cb_userdata wolfSSL_CTX_get_default_passwd_cb_userdata
 
-/* certificate extension NIDs */
-#define NID_basic_constraints         133
-#define NID_key_usage                 129  /* 2.5.29.15 */
-#define NID_ext_key_usage             151  /* 2.5.29.37 */
-#define NID_subject_key_identifier    128
-#define NID_authority_key_identifier  149
-#define NID_private_key_usage_period  130  /* 2.5.29.16 */
-#define NID_subject_alt_name          131
-#define NID_issuer_alt_name           132
-#define NID_info_access               69
-#define NID_sinfo_access              79  /* id-pe 11 */
-#define NID_name_constraints          144 /* 2.5.29.30 */
-#define NID_certificate_policies      146
-#define NID_policy_mappings           147
-#define NID_policy_constraints        150
-#define NID_inhibit_any_policy        168 /* 2.5.29.54 */
-#define NID_tlsfeature                92  /* id-pe 24 */
-#define NID_commonName 0x03 /* matchs ASN_COMMON_NAME in asn.h */
-#define NID_domainComponent 0x19
-                            /* matchs ASN_DOMAIN_COMPONENT in asn.h */
-
- /* matchs ASN_..._NAME in asn.h */
-#define NID_surname      0x04,   /* SN */
-#define NID_serialNumber 0x05,   /* serialNumber */
-#define NID_countryName  0x06,   /* C  */
-#define NID_localityName 0x07,   /* L  */
-#define NID_stateOrProvinceName    0x08,   /* ST */
-#define NID_organizationName       0x0a,   /* O  */
-#define NID_organizationalUnitName 0x0b,   /* OU */
-
-
 #define SSL_CTX_set_msg_callback        wolfSSL_CTX_set_msg_callback
 #define SSL_set_msg_callback            wolfSSL_set_msg_callback
 #define SSL_CTX_set_msg_callback_arg    wolfSSL_CTX_set_msg_callback_arg
 #define SSL_set_msg_callback_arg        wolfSSL_set_msg_callback_arg
 
+#define SSL_CTX_clear_extra_chain_certs wolfSSL_CTX_clear_extra_chain_certs
+
 
 /* Nginx uses this to determine if reached end of certs in file.
  * PEM_read_bio_X509 is called and the return error is lost.
  * The error that needs to be detected is: SSL_NO_PEM_HEADER.
  */
-#define ERR_GET_LIB(l)  (int)((((unsigned long)l)>>24L)&0xffL)
-#define PEM_R_NO_START_LINE     108
+#define ERR_GET_FUNC(l) (int)((((unsigned long)l) >> 12L) & 0xfffL)
+
+#define PEM_F_PEM_DEF_CALLBACK  100
+
+/* Avoid wolfSSL error code range */
+#define PEM_R_NO_START_LINE             (-MIN_CODE_E + 1)
+#define PEM_R_PROBLEMS_GETTING_PASSWORD (-MIN_CODE_E + 2)
+#define PEM_R_BAD_PASSWORD_READ         (-MIN_CODE_E + 3)
+#define PEM_R_BAD_DECRYPT               (-MIN_CODE_E + 4)
+
 #define ERR_LIB_PEM             9
 #define ERR_LIB_X509            10
+#define ERR_LIB_EVP             11
+#define ERR_LIB_ASN1            12
 
 #if defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
     defined(WOLFSSL_MYSQL_COMPATIBLE) || defined(OPENSSL_ALL) || \
@@ -901,58 +1159,84 @@
 #define SSL_R_UNKNOWN_PROTOCOL                     VERSION_ERROR
 #define SSL_R_WRONG_VERSION_NUMBER                 VERSION_ERROR
 #define SSL_R_DECRYPTION_FAILED_OR_BAD_RECORD_MAC  ENCRYPT_ERROR
+#define SSL_R_HTTPS_PROXY_REQUEST                  PARSE_ERROR
+#define SSL_R_HTTP_REQUEST                         PARSE_ERROR
+#define SSL_R_UNSUPPORTED_PROTOCOL                 VERSION_ERROR
+
 
 #ifdef HAVE_SESSION_TICKET
 #define SSL_OP_NO_TICKET                  SSL_OP_NO_TICKET
 #define SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB 72
 #endif
 
-#define OPENSSL_config	                  wolfSSL_OPENSSL_config
-#define OPENSSL_memdup                    wolfSSL_OPENSSL_memdup
-#define X509_get_ex_new_index             wolfSSL_X509_get_ex_new_index
-#define X509_get_ex_data                  wolfSSL_X509_get_ex_data
-#define X509_set_ex_data                  wolfSSL_X509_set_ex_data
-#define X509_NAME_digest                  wolfSSL_X509_NAME_digest
-#define SSL_CTX_get_timeout               wolfSSL_SSL_CTX_get_timeout
-#define SSL_CTX_set_tmp_ecdh              wolfSSL_SSL_CTX_set_tmp_ecdh
-#define SSL_CTX_remove_session            wolfSSL_SSL_CTX_remove_session
-#define SSL_get_rbio                      wolfSSL_SSL_get_rbio
-#define SSL_get_wbio                      wolfSSL_SSL_get_wbio
-#define SSL_do_handshake                  wolfSSL_SSL_do_handshake
-#define SSL_in_init                       wolfSSL_SSL_in_init
-#define SSL_get0_session                  wolfSSL_SSL_get0_session
-#define X509_check_host                   wolfSSL_X509_check_host
-#define i2a_ASN1_INTEGER                  wolfSSL_i2a_ASN1_INTEGER
-#define i2c_ASN1_INTEGER                  wolfSSL_i2c_ASN1_INTEGER
-#define ERR_peek_error_line_data          wolfSSL_ERR_peek_error_line_data
-#define ERR_load_BIO_strings              wolfSSL_ERR_load_BIO_strings
-#define SSL_CTX_set_tlsext_ticket_key_cb  wolfSSL_CTX_set_tlsext_ticket_key_cb
-#define X509_email_free                   wolfSSL_X509_email_free
-#define X509_get1_ocsp                    wolfSSL_X509_get1_ocsp
-#define SSL_CTX_set_tlsext_status_cb      wolfSSL_CTX_set_tlsext_status_cb
-#define X509_check_issued                 wolfSSL_X509_check_issued
-#define X509_dup                          wolfSSL_X509_dup
-#define X509_STORE_CTX_new                wolfSSL_X509_STORE_CTX_new
-#define X509_STORE_CTX_free               wolfSSL_X509_STORE_CTX_free
-#define SSL_CTX_get_extra_chain_certs     wolfSSL_CTX_get_extra_chain_certs
-#define X509_STORE_CTX_get1_issuer        wolfSSL_X509_STORE_CTX_get1_issuer
-#define sk_OPENSSL_STRING_value           wolfSSL_sk_WOLFSSL_STRING_value
-#define SSL_get0_alpn_selected            wolfSSL_get0_alpn_selected
-#define SSL_select_next_proto             wolfSSL_select_next_proto
-#define SSL_CTX_set_alpn_select_cb        wolfSSL_CTX_set_alpn_select_cb
-#define SSL_CTX_set_next_protos_advertised_cb wolfSSL_CTX_set_next_protos_advertised_cb
-#define SSL_CTX_set_next_proto_select_cb  wolfSSL_CTX_set_next_proto_select_cb
-#define SSL_get0_next_proto_negotiated    wolfSSL_get0_next_proto_negotiated
-#define SSL_is_server                     wolfSSL_is_server
-#define SSL_CTX_set1_curves_list          wolfSSL_CTX_set1_curves_list
+#define OPENSSL_config	                wolfSSL_OPENSSL_config
+#define OPENSSL_memdup                  wolfSSL_OPENSSL_memdup
+#define SSL_CTX_get_timeout             wolfSSL_SSL_CTX_get_timeout
+#define SSL_CTX_set_tmp_ecdh            wolfSSL_SSL_CTX_set_tmp_ecdh
+#define SSL_CTX_remove_session          wolfSSL_SSL_CTX_remove_session
+#define SSL_get_rbio                    wolfSSL_SSL_get_rbio
+#define SSL_get_wbio                    wolfSSL_SSL_get_wbio
+#define SSL_do_handshake                wolfSSL_SSL_do_handshake
+#define SSL_in_init                     wolfSSL_SSL_in_init
+#define SSL_in_connect_init             wolfSSL_SSL_in_connect_init
+#define SSL_get0_session                wolfSSL_SSL_get0_session
+#define SSL_CTX_set_tlsext_ticket_key_cb wolfSSL_CTX_set_tlsext_ticket_key_cb
+#define SSL_CTX_set_tlsext_status_cb    wolfSSL_CTX_set_tlsext_status_cb
+#define SSL_CTX_get_extra_chain_certs   wolfSSL_CTX_get_extra_chain_certs
+#define sk_OPENSSL_STRING_value         wolfSSL_sk_WOLFSSL_STRING_value
+#define SSL_get0_alpn_selected          wolfSSL_get0_alpn_selected
+#define SSL_select_next_proto           wolfSSL_select_next_proto
+#define SSL_CTX_set_alpn_select_cb      wolfSSL_CTX_set_alpn_select_cb
+#define SSL_CTX_set_next_protos_advertised_cb  wolfSSL_CTX_set_next_protos_advertised_cb
+#define SSL_CTX_set_next_proto_select_cb wolfSSL_CTX_set_next_proto_select_cb
+#define SSL_set_alpn_protos             wolfSSL_set_alpn_protos
+#define SSL_get0_next_proto_negotiated  wolfSSL_get0_next_proto_negotiated
+#define SSL_is_server                   wolfSSL_is_server
 
-#endif /* WOLFSSL_NGINX || WOLFSSL_HAPROXY || WOLFSSL_MYSQL_COMPATIBLE || 
+#endif /* WOLFSSL_NGINX || WOLFSSL_HAPROXY || WOLFSSL_MYSQL_COMPATIBLE ||
           OPENSSL_ALL || HAVE_LIGHTY */
 
-#define X509_STORE_CTX_set_time           wolfSSL_X509_STORE_CTX_set_time
-#define SSL_CTX_add_client_CA             wolfSSL_CTX_add_client_CA
-#define SSL_CTX_set_srp_password          wolfSSL_CTX_set_srp_password
-#define SSL_CTX_set_srp_username          wolfSSL_CTX_set_srp_username
+#if defined(OPENSSL_EXTRA) && defined(HAVE_ECC)
+#define SSL_CTX_set1_curves_list        wolfSSL_CTX_set1_curves_list
+#define SSL_set1_curves_list            wolfSSL_set1_curves_list
+#endif
+
+#ifdef OPENSSL_EXTRA
+#define SSL_CTX_add_client_CA           wolfSSL_CTX_add_client_CA
+#define SSL_CTX_set_srp_password        wolfSSL_CTX_set_srp_password
+#define SSL_CTX_set_srp_username        wolfSSL_CTX_set_srp_username
+#define SSL_get_SSL_CTX                 wolfSSL_get_SSL_CTX
+#define SSL_get0_param                  wolfSSL_get0_param
+
+#define ERR_NUM_ERRORS                  16
+#define SN_pkcs9_emailAddress           "Email"
+#define LN_pkcs9_emailAddress           "emailAddress"
+#define NID_pkcs9_emailAddress          48
+#define OBJ_pkcs9_emailAddress          1L,2L,840L,113539L,1L,9L,1L
+
+#define SSL_get_rbio                    wolfSSL_SSL_get_rbio
+#define SSL_get_wbio                    wolfSSL_SSL_get_wbio
+#define SSL_do_handshake                wolfSSL_SSL_do_handshake
+#endif  /* OPENSSL_EXTRA */
+
+/* cipher suites for compatibility */
+#define TLS1_CK_ECDHE_RSA_WITH_AES_128_CBC_SHA            (0xc013)
+#define TLS1_CK_ECDHE_RSA_WITH_AES_256_CBC_SHA            (0xc014)
+#define TLS1_CK_ECDHE_RSA_WITH_AES_128_GCM_SHA256         (0xc02f)
+#define TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256   (0xcca8)
+#define TLS1_CK_ECDHE_ECDSA_WITH_AES_128_CBC_SHA          (0xc009)
+#define TLS1_CK_ECDHE_ECDSA_WITH_AES_256_CBC_SHA          (0xc00a)
+#define TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256       (0xc02b)
+#define TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 (0xcca9)
+
+#define X509_STORE_get0_objects         wolfSSL_X509_STORE_get0_objects
+#define sk_X509_OBJECT_num              wolfSSL_sk_X509_OBJECT_num
+#define sk_X509_OBJECT_value            wolfSSL_sk_X509_OBJECT_value
+#define sk_X509_OBJECT_delete           wolfSSL_sk_X509_OBJECT_delete
+#define X509_OBJECT_free                wolfSSL_X509_OBJECT_free
+#define X509_OBJECT_get_type(x)         0
+
+#define OpenSSL_version(x)              wolfSSL_lib_version()
 
 #ifdef __cplusplus
     } /* extern "C" */
--- a/wolfssl/openssl/stack.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/stack.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,3 +1,60 @@
-/* stack.h for openssl */
+/* stack.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* stack.h for openSSL */
+
+#ifndef WOLFSSL_STACK_H_
+#define WOLFSSL_STACK_H_
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
 
+typedef void (*wolfSSL_sk_freefunc)(void *);
+
+WOLFSSL_API void wolfSSL_sk_GENERIC_pop_free(WOLFSSL_STACK* sk, wolfSSL_sk_freefunc);
+WOLFSSL_API void wolfSSL_sk_GENERIC_free(WOLFSSL_STACK *);
+WOLFSSL_API int wolfSSL_sk_GENERIC_push(WOLFSSL_STACK *sk, void *data);
+WOLFSSL_API void wolfSSL_sk_pop_free(WOLFSSL_STACK *st, void (*func) (void *));
+WOLFSSL_API void wolfSSL_sk_CONF_VALUE_free(WOLF_STACK_OF(WOLFSSL_CONF_VALUE)* sk);
+WOLFSSL_API WOLFSSL_STACK *wolfSSL_sk_new_null(void);
+
+WOLFSSL_API int wolfSSL_sk_CIPHER_push(WOLFSSL_STACK *st,WOLFSSL_CIPHER *cipher);
+WOLFSSL_API WOLFSSL_CIPHER* wolfSSL_sk_CIPHER_pop(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_new_cipher(void);
+
+#define OPENSSL_sk_free       wolfSSL_sk_free
+#define OPENSSL_sk_pop_free   wolfSSL_sk_pop_free
+#define OPENSSL_sk_new_null   wolfSSL_sk_new_null
+#define OPENSSL_sk_push       wolfSSL_sk_push
+
+/* provides older OpenSSL API compatibility  */
+#define sk_free         OPENSSL_sk_free
+#define sk_pop_free     OPENSSL_sk_pop_free
+#define sk_new_null     OPENSSL_sk_new_null
+#define sk_push         OPENSSL_sk_push
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/tls1.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,47 @@
+/* tls1.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifndef WOLFSSL_OPENSSL_TLS1_H_
+#define WOLFSSL_OPENSSL_TLS1_H_
+
+#ifndef TLS1_VERSION
+#define TLS1_VERSION                    0x0301
+#endif
+
+#ifndef TLS1_1_VERSION
+#define TLS1_1_VERSION                  0x0302
+#endif
+
+#ifndef TLS1_2_VERSION
+#define TLS1_2_VERSION                  0x0303
+#endif
+
+#ifndef TLS1_3_VERSION
+#define TLS1_3_VERSION                  0x0304
+#endif
+
+#ifndef TLS_MAX_VERSION
+#define TLS_MAX_VERSION                 TLS1_3_VERSION
+#endif
+
+#endif /* WOLFSSL_OPENSSL_TLS1_H_ */
+
--- a/wolfssl/openssl/x509.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/x509.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,4 +1,27 @@
 /* x509.h for openssl */
 
 #include <wolfssl/openssl/ssl.h>
+#include <wolfssl/openssl/crypto.h>
+#include <wolfssl/openssl/dh.h>
+#include <wolfssl/openssl/ec.h>
+#include <wolfssl/openssl/ecdsa.h>
+
+/* wolfSSL_X509_print_ex flags */
+#define X509_FLAG_COMPAT        (0UL)
+#define X509_FLAG_NO_HEADER     (1UL << 0)
+#define X509_FLAG_NO_VERSION    (1UL << 1)
+#define X509_FLAG_NO_SERIAL     (1UL << 2)
+#define X509_FLAG_NO_SIGNAME    (1UL << 3)
+#define X509_FLAG_NO_ISSUER     (1UL << 4)
+#define X509_FLAG_NO_VALIDITY   (1UL << 5)
+#define X509_FLAG_NO_SUBJECT    (1UL << 6)
+#define X509_FLAG_NO_PUBKEY     (1UL << 7)
+#define X509_FLAG_NO_EXTENSIONS (1UL << 8)
+#define X509_FLAG_NO_SIGDUMP    (1UL << 9)
+#define X509_FLAG_NO_AUX        (1UL << 10)
+#define X509_FLAG_NO_ATTRIBUTES (1UL << 11)
+#define X509_FLAG_NO_IDS        (1UL << 12)
+
+#define XN_FLAG_FN_SN           0
+#define XN_FLAG_SEP_CPLUS_SPC   2
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/openssl/x509_vfy.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,46 @@
+/* x509_vfy.h
+ *
+ * Copyright (C) 2006-2017 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* x509_vfy.h for openSSL */
+
+#ifndef WOLFSSL_x509_vfy_H_
+#define WOLFSSL_x509_vfy_H_
+
+#include <wolfssl/openssl/x509v3.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    WOLFSSL_API int wolfSSL_X509_STORE_CTX_set_purpose(WOLFSSL_X509_STORE_CTX *ctx, int purpose);
+#endif
+
+#ifdef WOLFSSL_QT
+    #define X509_STORE_CTX_set_purpose  wolfSSL_X509_STORE_CTX_set_purpose
+#endif
+
+#ifdef  __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* WOLFSSL_x509_vfy_H_ */
+
--- a/wolfssl/openssl/x509v3.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/openssl/x509v3.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,3 +1,117 @@
-/* x509v3.h for openssl */
+/* x509v3.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* x509v3.h for openSSL */
+
+#ifndef WOLFSSL_x509v3_H
+#define WOLFSSL_x509v3_H
+
+#include <wolfssl/openssl/conf.h>
+#include <wolfssl/openssl/bio.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#define X509_PURPOSE_SSL_CLIENT       0
+#define X509_PURPOSE_SSL_SERVER       1
+
+#define NS_SSL_CLIENT                 0
+#define NS_SSL_SERVER                 1
+
+/* Forward reference */
+
+typedef void *(*X509V3_EXT_D2I)(void *, const unsigned char **, long);
+typedef STACK_OF(CONF_VALUE) *(*X509V3_EXT_I2V) (
+                                struct WOLFSSL_v3_ext_method *method,
+                                void *ext, STACK_OF(CONF_VALUE) *extlist);
+typedef char *(*X509V3_EXT_I2S)(struct WOLFSSL_v3_ext_method *method, void *ext);
+typedef int (*X509V3_EXT_I2R) (struct WOLFSSL_v3_ext_method *method,
+                               void *ext, BIO *out, int indent);
+typedef struct WOLFSSL_v3_ext_method X509V3_EXT_METHOD;
 
+struct WOLFSSL_v3_ext_method {
+    int ext_nid;
+    int ext_flags;
+    void *usr_data;
+    X509V3_EXT_D2I d2i;
+    X509V3_EXT_I2V i2v;
+    X509V3_EXT_I2S i2s;
+    X509V3_EXT_I2R i2r;
+};
+
+struct WOLFSSL_X509_EXTENSION {
+    WOLFSSL_ASN1_OBJECT *obj;
+    WOLFSSL_ASN1_BOOLEAN crit;
+    WOLFSSL_ASN1_STRING value;
+    WOLFSSL_v3_ext_method ext_method;
+    WOLFSSL_STACK* ext_sk; /* For extension specific data */
+};
+
+#define WOLFSSL_ASN1_BOOLEAN int
+#define GEN_OTHERNAME   0
+#define GEN_EMAIL       1
+#define GEN_DNS         2
+#define GEN_X400        3
+#define GEN_DIRNAME     4
+#define GEN_EDIPARTY    5
+#define GEN_URI         6
+#define GEN_IPADD       7
+#define GEN_RID         8
+
+#define GENERAL_NAME       WOLFSSL_GENERAL_NAME
+
+#define X509V3_CTX         WOLFSSL_X509V3_CTX
+
+typedef struct WOLFSSL_AUTHORITY_KEYID AUTHORITY_KEYID;
+typedef struct WOLFSSL_BASIC_CONSTRAINTS BASIC_CONSTRAINTS;
+typedef struct WOLFSSL_ACCESS_DESCRIPTION ACCESS_DESCRIPTION;
+typedef WOLF_STACK_OF(WOLFSSL_ACCESS_DESCRIPTION) WOLFSSL_AUTHORITY_INFO_ACCESS;
+
+WOLFSSL_API void wolfSSL_BASIC_CONSTRAINTS_free(WOLFSSL_BASIC_CONSTRAINTS *bc);
+WOLFSSL_API void wolfSSL_AUTHORITY_KEYID_free(WOLFSSL_AUTHORITY_KEYID *id);
+WOLFSSL_API const WOLFSSL_v3_ext_method* wolfSSL_X509V3_EXT_get(
+                                                    WOLFSSL_X509_EXTENSION* ex);
+WOLFSSL_API void* wolfSSL_X509V3_EXT_d2i(WOLFSSL_X509_EXTENSION* ex);
+WOLFSSL_API char* wolfSSL_i2s_ASN1_STRING(WOLFSSL_v3_ext_method *method,
+                                          const WOLFSSL_ASN1_STRING *s);
+WOLFSSL_API int wolfSSL_X509V3_EXT_print(WOLFSSL_BIO *out,
+        WOLFSSL_X509_EXTENSION *ext, unsigned long flag, int indent);
+
+#define BASIC_CONSTRAINTS_free    wolfSSL_BASIC_CONSTRAINTS_free
+#define AUTHORITY_KEYID_free      wolfSSL_AUTHORITY_KEYID_free
+#define SSL_CTX_get_cert_store(x) wolfSSL_CTX_get_cert_store ((WOLFSSL_CTX*) (x))
+#define ASN1_INTEGER              WOLFSSL_ASN1_INTEGER
+#define ASN1_OCTET_STRING         WOLFSSL_ASN1_STRING
+#define X509V3_EXT_get            wolfSSL_X509V3_EXT_get
+#define X509V3_EXT_d2i            wolfSSL_X509V3_EXT_d2i
+#define i2s_ASN1_OCTET_STRING     wolfSSL_i2s_ASN1_STRING
+#define X509V3_EXT_print          wolfSSL_X509V3_EXT_print
+#define X509V3_EXT_conf_nid wolfSSL_X509V3_EXT_conf_nid
+#define X509V3_set_ctx      wolfSSL_X509V3_set_ctx
+#define X509V3_set_ctx_nodb wolfSSL_X509V3_set_ctx_nodb
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
 
--- a/wolfssl/options.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/options.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,7 +1,7 @@
 /* wolfssl options.h
  * generated from configure options
  *
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL. (formerly known as CyaSSL)
  *
@@ -15,8 +15,8 @@
 extern "C" {
 #endif
 
-#undef  WOLFSSL_PUBLIC_MP
-#define WOLFSSL_PUBLIC_MP
+#undef  HAVE_FFDHE_2048
+#define HAVE_FFDHE_2048
 
 #ifndef WOLFSSL_OPTIONS_IGNORE_SYS
 #undef  _POSIX_THREADS
@@ -74,6 +74,9 @@
 #undef  WOLFSSL_SHA3
 #define WOLFSSL_SHA3
 
+#undef  WOLFSSL_SHAKE256
+#define WOLFSSL_SHAKE256
+
 #undef  HAVE_POLY1305
 #define HAVE_POLY1305
 
@@ -95,6 +98,12 @@
 #undef  HAVE_EXTENDED_MASTER
 #define HAVE_EXTENDED_MASTER
 
+#undef  NO_RC4
+#define NO_RC4
+
+#undef  HAVE_ENCRYPT_THEN_MAC
+#define HAVE_ENCRYPT_THEN_MAC
+
 #undef  NO_PSK
 #define NO_PSK
 
@@ -113,11 +122,14 @@
 #undef  WC_NO_ASYNC_THREADING
 #define WC_NO_ASYNC_THREADING
 
+#undef  HAVE_DH_DEFAULT_PARAMS
+#define HAVE_DH_DEFAULT_PARAMS
+
 #undef  NO_DES3
 #define NO_DES3
 
 #undef  HAVE___UINT128_T
-#define HAVE___UINT128_T
+#define HAVE___UINT128_T 1
 
 
 #ifdef __cplusplus
--- a/wolfssl/options.h.in	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/options.h.in	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* options.h.in
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/sniffer.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/sniffer.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sniffer.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -93,6 +93,126 @@
 };
 
 
+/*
+ * New Sniffer API that provides read-only access to the TLS and cipher
+ * information associated with the SSL session.
+ */
+
+typedef struct SSLInfo
+{
+    unsigned char  isValid;
+            /* indicates if the info in this struct is valid: 0 = no, 1 = yes */
+    unsigned char  protocolVersionMajor;    /* SSL Version: major */
+    unsigned char  protocolVersionMinor;    /* SSL Version: minor */
+    unsigned char  serverCipherSuite0;      /* first byte, normally 0 */
+    unsigned char  serverCipherSuite;       /* second byte, actual suite */
+    unsigned char  serverCipherSuiteName[256];
+            /* cipher name, e.g., "TLS_RSA_..." */
+    unsigned char  serverNameIndication[128];
+    unsigned int   keySize;
+} SSLInfo;
+
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_DecodePacketWithSessionInfo(
+                        const unsigned char* packet, int length,
+                        unsigned char** data, SSLInfo* sslInfo, char* error);
+
+typedef void (*SSLConnCb)(const void* session, SSLInfo* info, void* ctx);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetConnectionCb(SSLConnCb cb);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetConnectionCtx(void* ctx);
+
+
+typedef struct SSLStats
+{
+    unsigned long int sslStandardConns;
+    unsigned long int sslClientAuthConns;
+    unsigned long int sslResumedConns;
+    unsigned long int sslEphemeralMisses;
+    unsigned long int sslResumeMisses;
+    unsigned long int sslCiphersUnsupported;
+    unsigned long int sslKeysUnmatched;
+    unsigned long int sslKeyFails;
+    unsigned long int sslDecodeFails;
+    unsigned long int sslAlerts;
+    unsigned long int sslDecryptedBytes;
+    unsigned long int sslEncryptedBytes;
+    unsigned long int sslEncryptedPackets;
+    unsigned long int sslDecryptedPackets;
+    unsigned long int sslKeyMatches;
+    unsigned long int sslEncryptedConns;
+
+    unsigned long int sslResumptionValid;
+    unsigned long int sslResumptionInserts;
+} SSLStats;
+
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_ResetStatistics(void);
+
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_ReadStatistics(SSLStats* stats);
+
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_ReadResetStatistics(SSLStats* stats);
+
+
+typedef int (*SSLWatchCb)(void* vSniffer,
+                        const unsigned char* certHash,
+                        unsigned int certHashSz,
+                        const unsigned char* certChain,
+                        unsigned int certChainSz,
+                        void* ctx, char* error);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetWatchKeyCallback(SSLWatchCb cb, char* error);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetWatchKeyCallback_ex(SSLWatchCb cb, int devId,
+                        char* error);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetWatchKeyCtx(void* ctx, char* error);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetWatchKey_buffer(void* vSniffer,
+                        const unsigned char* key, unsigned int keySz,
+                        int keyType, char* error);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetWatchKey_file(void* vSniffer,
+                        const char* keyFile, int keyType,
+                        const char* password, char* error);
+
+
+typedef int (*SSLStoreDataCb)(const unsigned char* decryptBuf,
+        unsigned int decryptBufSz, unsigned int decryptBufOffset, void* ctx);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_SetStoreDataCallback(SSLStoreDataCb cb);
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_DecodePacketWithSessionInfoStoreData(
+        const unsigned char* packet, int length, void* ctx,
+        SSLInfo* sslInfo, char* error);
+
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_DecodePacketWithChain(void* vChain,
+        unsigned int chainSz, unsigned char** data, char* error);
+
+
+WOLFSSL_API
+SSL_SNIFFER_API int ssl_DecodePacketWithChainSessionInfoStoreData(
+        void* vChain, unsigned int chainSz, void* ctx, SSLInfo* sslInfo,
+        char* error);
+
 #ifdef __cplusplus
     }  /* extern "C" */
 #endif
--- a/wolfssl/sniffer_error.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/sniffer_error.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sniffer_error.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -118,6 +118,18 @@
 
 #define BAD_DECRYPT_SIZE 81
 #define EXTENDED_MASTER_HASH_STR 82
+#define SPLIT_HANDSHAKE_MSG_STR 83
+#define ECC_DECODE_STR 84
+#define ECC_PUB_DECODE_STR 85
+#define WATCH_CB_MISSING_STR 86
+#define WATCH_HASH_STR 87
+#define WATCH_FAIL_STR 88
+#define BAD_CERT_MSG_STR 89
+#define STORE_DATA_CB_MISSING_STR 90
+
+#define NO_DATA_DEST_STR 91
+#define STORE_DATA_FAIL_STR 92
+#define CHAIN_INPUT_STR 93
 /* !!!! also add to msgTable in sniffer.c and .rc file !!!! */
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/sniffer_error.rc	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,117 @@
+
+STRINGTABLE 
+{
+    1, "Out of Memory"
+    2, "New SSL Sniffer Server Registered"
+    3, "Checking IP Header"
+    4, "SSL Sniffer Server Not Registered"
+    5, "Checking TCP Header"
+
+    6, "SSL Sniffer Server Port Not Registered"
+    7, "RSA Private Decrypt Error"
+    8, "RSA Private Decode Error"
+    9, "Set Cipher Spec Error"
+    10, "Server Hello Input Malformed"
+
+    11, "Couldn't Resume Session Error"
+    12, "Server Did Resumption"
+    13, "Client Hello Input Malformed"
+    14, "Client Trying to Resume"
+    15, "Handshake Input Malformed"
+
+    16, "Got Hello Verify msg"
+    17, "Got Server Hello msg"
+    18, "Got Cert Request msg"
+    19, "Got Server Key Exchange msg"
+    20, "Got Cert msg"
+
+    21, "Got Server Hello Done msg"
+    22, "Got Finished msg"
+    23, "Got Client Hello msg"
+    24, "Got Client Key Exchange msg"
+    25, "Got Cert Verify msg"
+
+    26, "Got Unknown Handshake msg"
+    27, "New SSL Sniffer Session created"
+    28, "Couldn't create new SSL"
+    29, "Got a Packet to decode"
+    30, "No data present"
+
+    31, "Session Not Found"
+    32, "Got an Old Client Hello msg"
+    33, "Old Client Hello Input Malformed"
+    34, "Old Client Hello OK"
+    35, "Bad Old Client Hello"
+
+    36, "Bad Record Header"
+    37, "Record Header Input Malformed"
+    38, "Got a HandShake msg"
+    39, "Bad HandShake msg"
+    40, "Got a Change Cipher Spec msg"
+
+    41, "Got Application Data msg"
+    42, "Bad Application Data"
+    43, "Got an Alert msg"
+    44, "Another msg to Process"
+    45, "Removing Session From Table"
+
+    46, "Bad Key File"
+    47, "Wrong IP Version"
+    48, "Wrong Protocol type"
+    49, "Packet Short for header processing"
+    50, "Got Unknown Record Type"
+    
+    51, "Can't Open Trace File"
+    52, "Session in Fatal Error State"
+    53, "Partial SSL record received"
+    54, "Buffer Error, malformed input"
+    55, "Added to Partial Input"
+
+    56, "Received a Duplicate Packet"
+    57, "Received an Out of Order Packet"
+    58, "Received an Overlap Duplicate Packet"
+    59, "Received an Overlap Reassembly Begin Duplicate Packet"
+    60, "Received an Overlap Reassembly End Duplicate Packet"
+    
+    61, "Missed the Client Hello Entirely"
+    62, "Got Hello Request msg"
+    63, "Got Session Ticket msg"
+    64, "Bad Input"
+    65, "Bad Decrypt Type"
+
+    66, "Bad Finished Message Processing"
+    67, "Bad Compression Type"
+    68, "Bad DeriveKeys Error"
+    69, "Saw ACK for Missing Packet Error"
+    70, "Bad Decrypt Operation"
+
+    71, "Decrypt Keys Not Set Up"
+    72, "Late Key Load Error"
+    73, "Got Certificate Status msg"
+    74, "RSA Key Missing Error"
+    75, "Secure Renegotiation Not Supported"
+
+    76, "Get Session Stats Failure"
+    77, "Reassembly Buffer Size Exceeded"
+    78, "Dropping Lost Fragment"
+    79, "Dropping Partial Record"
+    80, "Clear ACK Fault"
+
+    81, "Bad Decrypt Size"
+    82, "Extended Master Secret Hash Error"
+    83, "Handshake Message Split Across TLS Records"
+    84, "ECC Private Decode Error"
+    85, "ECC Public Decode Error"
+
+    86, "Watch callback not set"
+    87, "Watch hash failed"
+    88, "Watch callback failed"
+    89, "Bad Certificate Message"
+    90, "Store data callback not set"
+
+    91, "No data destination Error"
+    92, "Store Data callback failed"
+    93, "Loading chain input"
+}
+
+
--- a/wolfssl/ssl.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/ssl.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ssl.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -34,20 +34,14 @@
 #include <wolfssl/version.h>
 #include <wolfssl/wolfcrypt/logging.h>
 #include <wolfssl/wolfcrypt/asn_public.h>
+#include <wolfssl/wolfcrypt/types.h>
 
 #ifdef HAVE_WOLF_EVENT
     #include <wolfssl/wolfcrypt/wolfevent.h>
 #endif
 
-#ifndef NO_FILESYSTEM
-    #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        #if MQX_USE_IO_OLD
-            #include <fio.h>
-        #else
-            #include <nio.h>
-        #endif
-    #endif
-#endif
+/* used internally by wolfSSL while OpenSSL types aren't */
+#include <wolfssl/callbacks.h>
 
 #ifdef WOLFSSL_PREFIX
     #include "prefix_ssl.h"
@@ -100,7 +94,6 @@
     #endif
 #endif
 
-
 #ifdef __cplusplus
     extern "C" {
 #endif
@@ -120,14 +113,18 @@
 typedef struct WOLFSSL_X509       WOLFSSL_X509;
 typedef struct WOLFSSL_X509_NAME  WOLFSSL_X509_NAME;
 typedef struct WOLFSSL_X509_NAME_ENTRY  WOLFSSL_X509_NAME_ENTRY;
+typedef struct WOLFSSL_X509_PUBKEY WOLFSSL_X509_PUBKEY;
+typedef struct WOLFSSL_X509_ALGOR WOLFSSL_X509_ALGOR;
 typedef struct WOLFSSL_X509_CHAIN WOLFSSL_X509_CHAIN;
 typedef struct WC_PKCS12          WOLFSSL_X509_PKCS12;
+typedef struct WOLFSSL_X509_INFO  WOLFSSL_X509_INFO;
 
 typedef struct WOLFSSL_CERT_MANAGER WOLFSSL_CERT_MANAGER;
 typedef struct WOLFSSL_SOCKADDR     WOLFSSL_SOCKADDR;
 typedef struct WOLFSSL_CRL          WOLFSSL_CRL;
-
-typedef void  *WOLFSSL_X509_STORE_CTX_verify_cb; /* verify callback */
+typedef struct WOLFSSL_X509_STORE_CTX WOLFSSL_X509_STORE_CTX;
+
+typedef int (*WOLFSSL_X509_STORE_CTX_verify_cb)(int, WOLFSSL_X509_STORE_CTX *);
 
 /* redeclare guard */
 #define WOLFSSL_TYPES_DEFINED
@@ -154,6 +151,9 @@
 typedef struct WOLFSSL_EC_KEY         WOLFSSL_EC_KEY;
 typedef struct WOLFSSL_EC_POINT       WOLFSSL_EC_POINT;
 typedef struct WOLFSSL_EC_GROUP       WOLFSSL_EC_GROUP;
+typedef struct WOLFSSL_EC_BUILTIN_CURVE WOLFSSL_EC_BUILTIN_CURVE;
+/* WOLFSSL_EC_METHOD is just an alias of WOLFSSL_EC_GROUP for now */
+typedef struct WOLFSSL_EC_GROUP       WOLFSSL_EC_METHOD;
 #define WOLFSSL_EC_TYPE_DEFINED
 #endif
 
@@ -167,71 +167,160 @@
 typedef struct WOLFSSL_X509_LOOKUP_METHOD WOLFSSL_X509_LOOKUP_METHOD;
 typedef struct WOLFSSL_CRL            WOLFSSL_X509_CRL;
 typedef struct WOLFSSL_X509_STORE     WOLFSSL_X509_STORE;
-typedef struct WOLFSSL_X509_VERIFY_PARAM  WOLFSSL_X509_VERIFY_PARAM;
+typedef struct WOLFSSL_X509_VERIFY_PARAM WOLFSSL_X509_VERIFY_PARAM;
 typedef struct WOLFSSL_BIO            WOLFSSL_BIO;
 typedef struct WOLFSSL_BIO_METHOD     WOLFSSL_BIO_METHOD;
 typedef struct WOLFSSL_X509_EXTENSION WOLFSSL_X509_EXTENSION;
-typedef struct WOLFSSL_ASN1_TIME      WOLFSSL_ASN1_TIME;
-typedef struct WOLFSSL_ASN1_INTEGER   WOLFSSL_ASN1_INTEGER;
+typedef struct WOLFSSL_CONF_VALUE     WOLFSSL_CONF_VALUE;
 typedef struct WOLFSSL_ASN1_OBJECT    WOLFSSL_ASN1_OBJECT;
+typedef struct WOLFSSL_ASN1_OTHERNAME WOLFSSL_ASN1_OTHERNAME;
+typedef struct WOLFSSL_X509V3_CTX     WOLFSSL_X509V3_CTX;
+typedef struct WOLFSSL_v3_ext_method  WOLFSSL_v3_ext_method;
 
 typedef struct WOLFSSL_ASN1_STRING      WOLFSSL_ASN1_STRING;
 typedef struct WOLFSSL_dynlock_value    WOLFSSL_dynlock_value;
+#ifndef WOLFSSL_DH_TYPE_DEFINED /* guard on redeclaration */
 typedef struct WOLFSSL_DH               WOLFSSL_DH;
+#define WOLFSSL_DH_TYPE_DEFINED /* guard on redeclaration */
+#endif
 typedef struct WOLFSSL_ASN1_BIT_STRING  WOLFSSL_ASN1_BIT_STRING;
+typedef struct WOLFSSL_ASN1_TYPE        WOLFSSL_ASN1_TYPE;
+
+typedef struct WOLFSSL_GENERAL_NAME WOLFSSL_GENERAL_NAME;
+typedef struct WOLFSSL_AUTHORITY_KEYID  WOLFSSL_AUTHORITY_KEYID;
+typedef struct WOLFSSL_BASIC_CONSTRAINTS WOLFSSL_BASIC_CONSTRAINTS;
+typedef struct WOLFSSL_ACCESS_DESCRIPTION WOLFSSL_ACCESS_DESCRIPTION;
+
+#if defined(OPENSSL_ALL) || defined(OPENSSL_EXTRA)
+
+struct WOLFSSL_AUTHORITY_KEYID {
+    WOLFSSL_ASN1_STRING *keyid;
+    WOLFSSL_ASN1_OBJECT *issuer;
+    WOLFSSL_ASN1_INTEGER *serial;
+};
+
+struct WOLFSSL_BASIC_CONSTRAINTS {
+    int ca;
+    WOLFSSL_ASN1_INTEGER *pathlen;
+};
+
+#endif /* OPENSSL_ALL || OPENSSL_EXTRA*/
 
 #define WOLFSSL_ASN1_UTCTIME          WOLFSSL_ASN1_TIME
 #define WOLFSSL_ASN1_GENERALIZEDTIME  WOLFSSL_ASN1_TIME
 
-#define WOLFSSL_ASN1_INTEGER_MAX 20
-struct WOLFSSL_ASN1_INTEGER {
-    /* size can be increased set at 20 for tag, length then to hold at least 16
-     * byte type */
-    unsigned char  intData[WOLFSSL_ASN1_INTEGER_MAX];
-    /* ASN_INTEGER | LENGTH | hex of number */
-    unsigned char  negative;   /* negative number flag */
-
-    unsigned char* data;
-    unsigned int   dataMax;   /* max size of data buffer */
-    unsigned int   isDynamic:1; /* flag for if data pointer dynamic (1 is yes 0 is no) */
-};
-
-struct WOLFSSL_ASN1_TIME {
-    /* MAX_DATA_SIZE is 32 */
-    unsigned char data[32 + 2];
-    /* ASN_TIME | LENGTH | date bytes */
-};
-
 struct WOLFSSL_ASN1_STRING {
+    char strData[CTC_NAME_SIZE];
     int length;
     int type; /* type of string i.e. CTC_UTF8 */
     char* data;
     long flags;
+    unsigned int   isDynamic:1; /* flag for if data pointer dynamic (1 is yes 0 is no) */
 };
 
 #define WOLFSSL_MAX_SNAME 40
+
+
+#define WOLFSSL_ASN1_DYNAMIC 0x1
+#define WOLFSSL_ASN1_DYNAMIC_DATA 0x2
+
+struct WOLFSSL_ASN1_OTHERNAME {
+    WOLFSSL_ASN1_OBJECT* type_id;
+    WOLFSSL_ASN1_TYPE*   value;
+};
+
+struct WOLFSSL_GENERAL_NAME {
+    int type;
+    union {
+        char* ptr;
+        WOLFSSL_ASN1_OTHERNAME* otherName;
+        WOLFSSL_ASN1_STRING* rfc822Name;
+        WOLFSSL_ASN1_STRING* dNSName;
+        WOLFSSL_ASN1_TYPE* x400Address;
+        WOLFSSL_X509_NAME* directoryName;
+        WOLFSSL_ASN1_STRING* uniformResourceIdentifier;
+        WOLFSSL_ASN1_STRING* iPAddress;
+        WOLFSSL_ASN1_OBJECT* registeredID;
+
+        WOLFSSL_ASN1_STRING* ip;
+        WOLFSSL_X509_NAME* dirn;
+        WOLFSSL_ASN1_STRING* ia5;
+        WOLFSSL_ASN1_OBJECT* rid;
+        WOLFSSL_ASN1_TYPE* other;
+    } d; /* dereference */
+};
+
+struct WOLFSSL_ACCESS_DESCRIPTION {
+    WOLFSSL_ASN1_OBJECT*  method;
+    WOLFSSL_GENERAL_NAME* location;
+};
+
+struct WOLFSSL_X509V3_CTX {
+    WOLFSSL_X509* x509;
+};
+
+
+
 struct WOLFSSL_ASN1_OBJECT {
     void*  heap;
-    unsigned char* obj;
+    const unsigned char* obj;
     /* sName is short name i.e sha256 rather than oid (null terminated) */
     char   sName[WOLFSSL_MAX_SNAME];
     int    type; /* oid */
     int    grp;  /* type of OID, i.e. oidCertPolicyType */
+    int    nid;
     unsigned int  objSz;
-    unsigned char dynamic; /* if 1 then obj was dynamiclly created, 0 otherwise */
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT) || defined(WOLFSSL_APACHE_HTTPD)
+    int ca;
+    WOLFSSL_ASN1_INTEGER *pathlen;
+#endif
+    unsigned char dynamic; /* if 1 then obj was dynamically created, 0 otherwise */
+
+#if defined(WOLFSSL_APACHE_HTTPD)
+    WOLFSSL_GENERAL_NAME* gn;
+#endif
+
     struct d { /* derefrenced */
+        WOLFSSL_ASN1_STRING* dNSName;
         WOLFSSL_ASN1_STRING  ia5_internal;
         WOLFSSL_ASN1_STRING* ia5; /* points to ia5_internal */
-        WOLFSSL_ASN1_STRING* dNSName;
-        WOLFSSL_ASN1_STRING* iPAddress;
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+        WOLFSSL_ASN1_STRING* uniformResourceIdentifier;
+        WOLFSSL_ASN1_STRING  iPAddress_internal;
+        WOLFSSL_ASN1_OTHERNAME* otherName; /* added for Apache httpd */
+#endif
+        WOLFSSL_ASN1_STRING* iPAddress; /* points to iPAddress_internal */
     } d;
 };
 
+/* wrap ASN1 types */
+struct WOLFSSL_ASN1_TYPE {
+    int type;
+    union {
+        char *ptr;
+        WOLFSSL_ASN1_STRING*     asn1_string;
+        WOLFSSL_ASN1_OBJECT*     object;
+        WOLFSSL_ASN1_INTEGER*    integer;
+        WOLFSSL_ASN1_BIT_STRING* bit_string;
+        WOLFSSL_ASN1_STRING*     octet_string;
+        WOLFSSL_ASN1_STRING*     printablestring;
+        WOLFSSL_ASN1_STRING*     ia5string;
+        WOLFSSL_ASN1_UTCTIME*    utctime;
+        WOLFSSL_ASN1_GENERALIZEDTIME* generalizedtime;
+        WOLFSSL_ASN1_STRING*     utf8string;
+        WOLFSSL_ASN1_STRING*     set;
+        WOLFSSL_ASN1_STRING*     sequence;
+    } value;
+};
+
 struct WOLFSSL_EVP_PKEY {
     void* heap;
     int type;         /* openssh dereference */
     int save_type;    /* openssh dereference */
     int pkey_sz;
+    int references;  /*number of times free should be called for complete free*/
+    wolfSSL_Mutex    refMutex; /* ref count mutex */
+
     union {
         char* ptr; /* der format of key / or raw for NTRU */
     } pkey;
@@ -240,10 +329,18 @@
         WOLFSSL_RSA* rsa;
         byte      ownRsa; /* if struct owns RSA and should free it */
     #endif
+    #ifndef NO_DSA
+        WOLFSSL_DSA* dsa;
+        byte      ownDsa; /* if struct owns DSA and should free it */
+    #endif
     #ifdef HAVE_ECC
         WOLFSSL_EC_KEY* ecc;
         byte      ownEcc; /* if struct owns ECC and should free it */
     #endif
+    #ifndef NO_DH
+        WOLFSSL_DH* dh;
+        byte      ownDh; /* if struct owns DH and should free it */
+    #endif
     WC_RNG rng;
     #endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
     #ifdef HAVE_ECC
@@ -251,15 +348,47 @@
     #endif
 };
 typedef struct WOLFSSL_EVP_PKEY WOLFSSL_PKCS8_PRIV_KEY_INFO;
-
 #ifndef WOLFSSL_EVP_TYPE_DEFINED /* guard on redeclaration */
 typedef struct WOLFSSL_EVP_PKEY     WOLFSSL_EVP_PKEY;
+typedef struct WOLFSSL_EVP_MD_CTX   WOLFSSL_EVP_MD_CTX;
 typedef char   WOLFSSL_EVP_MD;
 #define WOLFSSL_EVP_TYPE_DEFINED
 #endif
 
+struct WOLFSSL_X509_PKEY {
+    WOLFSSL_EVP_PKEY* dec_pkey; /* dereferenced by Apache */
+    void* heap;
+};
+typedef struct WOLFSSL_X509_PKEY WOLFSSL_X509_PKEY;
+
+struct WOLFSSL_X509_INFO {
+    WOLFSSL_X509      *x509;
+    WOLFSSL_X509_CRL  *crl;
+    WOLFSSL_X509_PKEY  *x_pkey; /* dereferenced by Apache */
+    EncryptedInfo     enc_cipher;
+    int               enc_len;
+    char              *enc_data;
+    int               num;
+};
+
 #define WOLFSSL_EVP_PKEY_DEFAULT EVP_PKEY_RSA /* default key type */
 
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    #define wolfSSL_SSL_MODE_RELEASE_BUFFERS    0x00000010U
+    #define wolfSSL_SSL_CTRL_SET_TMP_ECDH       4
+#endif
+
+struct WOLFSSL_X509_ALGOR {
+    WOLFSSL_ASN1_OBJECT* algorithm;
+    WOLFSSL_ASN1_TYPE* parameter;
+};
+
+struct WOLFSSL_X509_PUBKEY {
+    WOLFSSL_X509_ALGOR* algor;
+    WOLFSSL_EVP_PKEY* pkey;
+    int pubKeyOID;
+};
+
 
 enum BIO_TYPE {
     WOLFSSL_BIO_BUFFER = 1,
@@ -268,7 +397,8 @@
     WOLFSSL_BIO_MEMORY = 4,
     WOLFSSL_BIO_BIO    = 5,
     WOLFSSL_BIO_FILE   = 6,
-    WOLFSSL_BIO_BASE64 = 7
+    WOLFSSL_BIO_BASE64 = 7,
+    WOLFSSL_BIO_MD     = 8
 };
 
 enum BIO_FLAGS {
@@ -279,16 +409,91 @@
     WOLFSSL_BIO_FLAG_RETRY        = 0x10
 };
 
+enum BIO_CB_OPS {
+    WOLFSSL_BIO_CB_FREE   = 0x01,
+    WOLFSSL_BIO_CB_READ   = 0x02,
+    WOLFSSL_BIO_CB_WRITE  = 0x03,
+    WOLFSSL_BIO_CB_PUTS   = 0x04,
+    WOLFSSL_BIO_CB_GETS   = 0x05,
+    WOLFSSL_BIO_CB_CTRL   = 0x06,
+    WOLFSSL_BIO_CB_RETURN = 0x80
+};
+
 typedef struct WOLFSSL_BUF_MEM {
     char*  data;   /* dereferenced */
     size_t length; /* current length */
     size_t max;    /* maximum length */
 } WOLFSSL_BUF_MEM;
 
+/* custom method with user set callbacks */
+typedef int  (*wolfSSL_BIO_meth_write_cb)(WOLFSSL_BIO*, const char*, int);
+typedef int  (*wolfSSL_BIO_meth_read_cb)(WOLFSSL_BIO *, char *, int);
+typedef int  (*wolfSSL_BIO_meth_puts_cb)(WOLFSSL_BIO*, const char*);
+typedef int  (*wolfSSL_BIO_meth_gets_cb)(WOLFSSL_BIO*, char*, int);
+typedef long (*wolfSSL_BIO_meth_ctrl_get_cb)(WOLFSSL_BIO*, int, long, void*);
+typedef int  (*wolfSSL_BIO_meth_create_cb)(WOLFSSL_BIO*);
+typedef int  (*wolfSSL_BIO_meth_destroy_cb)(WOLFSSL_BIO*);
+
+typedef int wolfSSL_BIO_info_cb(WOLFSSL_BIO *, int, int);
+typedef long (*wolfssl_BIO_meth_ctrl_info_cb)(WOLFSSL_BIO*, int, wolfSSL_BIO_info_cb*);
+
+/* wolfSSL BIO_METHOD type */
+#ifndef MAX_BIO_METHOD_NAME
+#define MAX_BIO_METHOD_NAME 256
+#endif
+struct WOLFSSL_BIO_METHOD {
+    byte type;               /* method type */
+    char name[MAX_BIO_METHOD_NAME];
+    wolfSSL_BIO_meth_write_cb writeCb;
+    wolfSSL_BIO_meth_read_cb readCb;
+    wolfSSL_BIO_meth_puts_cb putsCb;
+    wolfSSL_BIO_meth_gets_cb getsCb;
+    wolfSSL_BIO_meth_ctrl_get_cb ctrlCb;
+    wolfSSL_BIO_meth_create_cb createCb;
+    wolfSSL_BIO_meth_destroy_cb freeCb;
+    wolfssl_BIO_meth_ctrl_info_cb ctrlInfoCb;
+};
+
+/* wolfSSL BIO type */
+typedef long (*wolf_bio_info_cb)(WOLFSSL_BIO *bio, int event, const char *parg,
+                                 int iarg, long larg, long return_value);
+
+struct WOLFSSL_BIO {
+    WOLFSSL_BUF_MEM* mem_buf;
+    WOLFSSL_BIO_METHOD* method;
+    WOLFSSL_BIO* prev;          /* previous in chain */
+    WOLFSSL_BIO* next;          /* next in chain */
+    WOLFSSL_BIO* pair;          /* BIO paired with */
+    void*        heap;          /* user heap hint */
+    void*        ptr;           /* WOLFSSL, file descriptor, MD, or mem buf */
+    void*        usrCtx;        /* user set pointer */
+    char*        infoArg;       /* BIO callback argument */
+    wolf_bio_info_cb infoCb;    /* BIO callback */
+    int          wrSz;          /* write buffer size (mem) */
+    int          wrIdx;         /* current index for write buffer */
+    int          rdIdx;         /* current read index */
+    int          readRq;        /* read request */
+    int          num;           /* socket num or length */
+    int          eof;           /* eof flag */
+    int          flags;
+    byte         type;          /* method type */
+    byte         init:1;        /* bio has been initialized */
+    byte         shutdown:1;    /* close flag */
+#ifdef HAVE_EX_DATA
+    WOLFSSL_CRYPTO_EX_DATA ex_data;
+#endif
+};
+
 typedef struct WOLFSSL_COMP_METHOD {
     int type;            /* stunnel dereference */
 } WOLFSSL_COMP_METHOD;
 
+typedef struct WOLFSSL_COMP {
+    int id;
+    const char *name;
+    WOLFSSL_COMP_METHOD *method;
+} WOLFSSL_COMP;
+
 struct WOLFSSL_X509_LOOKUP_METHOD {
     int type;
 };
@@ -303,6 +508,13 @@
     WOLFSSL_X509_LOOKUP   lookup;
 #ifdef OPENSSL_EXTRA
     int                   isDynamic;
+    WOLFSSL_X509_VERIFY_PARAM* param;    /* certificate validation parameter */
+#endif
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    WOLFSSL_X509_STORE_CTX_verify_cb verify_cb;
+#endif
+#ifdef HAVE_EX_DATA
+    WOLFSSL_CRYPTO_EX_DATA ex_data;
 #endif
 #if defined(OPENSSL_EXTRA) && defined(HAVE_CRL)
     WOLFSSL_X509_CRL *crl;
@@ -313,9 +525,14 @@
 #define WOLFSSL_USE_CHECK_TIME 0x2
 #define WOLFSSL_NO_CHECK_TIME  0x200000
 #define WOLFSSL_NO_WILDCARDS   0x4
+#define WOLFSSL_HOST_NAME_MAX  256
+#define WOLFSSL_MAX_IPSTR 46 /* max ip size IPv4 mapped IPv6 */
 struct WOLFSSL_X509_VERIFY_PARAM {
-    time_t  check_time;
-    unsigned long flags;
+    time_t         check_time;
+    unsigned long  flags;
+    char           hostName[WOLFSSL_HOST_NAME_MAX];
+    unsigned int  hostFlags;
+    char ipasc[WOLFSSL_MAX_IPSTR];
 };
 #endif
 
@@ -342,22 +559,31 @@
     } data;
 } WOLFSSL_X509_OBJECT;
 
+#define WOLFSSL_ASN1_BOOLEAN                int
+
 typedef struct WOLFSSL_BUFFER_INFO {
     unsigned char* buffer;
     unsigned int length;
 } WOLFSSL_BUFFER_INFO;
 
-typedef struct WOLFSSL_X509_STORE_CTX {
+struct WOLFSSL_X509_STORE_CTX {
     WOLFSSL_X509_STORE* store;    /* Store full of a CA cert chain */
-    WOLFSSL_X509* current_cert;   /* stunnel dereference */
+    WOLFSSL_X509* current_cert;   /* current X509 (OPENSSL_EXTRA) */
+#ifdef WOLFSSL_ASIO
     WOLFSSL_X509* current_issuer; /* asio dereference */
+#endif
     WOLFSSL_X509_CHAIN* sesChain; /* pointer to WOLFSSL_SESSION peer chain */
     WOLFSSL_STACK* chain;
 #ifdef OPENSSL_EXTRA
     WOLFSSL_X509_VERIFY_PARAM* param; /* certificate validation parameter */
 #endif
     char* domain;                /* subject CN domain name */
-    void* ex_data;               /* external data, for fortress build */
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
+    WOLFSSL_CRYPTO_EX_DATA ex_data;  /* external data */
+#endif
+#if defined(WOLFSSL_APACHE_HTTPD) || defined(OPENSSL_EXTRA)
+    int depth;                   /* used in X509_STORE_CTX_*_depth */
+#endif
     void* userCtx;               /* user ctx */
     int   error;                 /* current error */
     int   error_depth;           /* index of cert depth for this error */
@@ -365,7 +591,7 @@
     int   totalCerts;            /* number of peer cert buffers */
     WOLFSSL_BUFFER_INFO* certs;  /* peer certs */
     WOLFSSL_X509_STORE_CTX_verify_cb verify_cb; /* verify callback */
-} WOLFSSL_X509_STORE_CTX;
+};
 
 typedef char* WOLFSSL_STRING;
 
@@ -386,6 +612,7 @@
     certificate_expired             =  45,
     certificate_unknown             =  46,
     illegal_parameter               =  47,
+    unknown_ca                      =  48,
     decode_error                    =  50,
     decrypt_error                   =  51,
     #ifdef WOLFSSL_MYSQL_COMPATIBLE
@@ -394,11 +621,14 @@
     #else
     protocol_version                =  70,
     #endif
+    inappropriate_fallback          =  86,
     no_renegotiation                = 100,
+    missing_extension               = 109,
     unsupported_extension           = 110, /**< RFC 5246, section 7.2.2 */
     unrecognized_name               = 112, /**< RFC 6066, section 3 */
     bad_certificate_status_response = 113, /**< RFC 6066, section 8 */
     unknown_psk_identity            = 115, /**< RFC 4279, section 2 */
+    certificate_required            = 116, /**< RFC 8446, section 8.2 */
     no_application_protocol         = 120
 };
 
@@ -413,32 +643,66 @@
 /* Maximum number of groups that can be set */
 #define WOLFSSL_MAX_GROUP_COUNT       10
 
+#if defined(HAVE_SECRET_CALLBACK) && defined(WOLFSSL_TLS13)
+enum Tls13Secret {
+    CLIENT_EARLY_TRAFFIC_SECRET,
+    CLIENT_HANDSHAKE_TRAFFIC_SECRET,
+    SERVER_HANDSHAKE_TRAFFIC_SECRET,
+    CLIENT_TRAFFIC_SECRET,
+    SERVER_TRAFFIC_SECRET,
+    EARLY_EXPORTER_SECRET,
+    EXPORTER_SECRET
+};
+#endif
+
+
 typedef WOLFSSL_METHOD* (*wolfSSL_method_func)(void* heap);
+
+/* CTX Method EX Constructor Functions */
+WOLFSSL_API WOLFSSL_METHOD *wolfTLS_client_method_ex(void* heap);
+WOLFSSL_API WOLFSSL_METHOD *wolfTLS_server_method_ex(void* heap);
+WOLFSSL_API WOLFSSL_METHOD *wolfSSLv3_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv3_server_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv3_client_method_ex(void* heap);
+WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_server_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_client_method_ex(void* heap);
+WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_1_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_1_server_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_1_client_method_ex(void* heap);
+WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_server_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_client_method_ex(void* heap);
 #ifdef WOLFSSL_TLS13
+    WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_method_ex(void* heap);
     WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_server_method_ex(void* heap);
     WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_client_method_ex(void* heap);
 #endif
+
+WOLFSSL_API WOLFSSL_METHOD *wolfSSLv23_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv23_server_method_ex(void* heap);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv23_client_method_ex(void* heap);
 
 #ifdef WOLFSSL_DTLS
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLS_method_ex(void* heap);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLS_client_method_ex(void* heap);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLS_server_method_ex(void* heap);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_method_ex(void* heap);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_client_method_ex(void* heap);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_server_method_ex(void* heap);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_2_method_ex(void* heap);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_2_client_method_ex(void* heap);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_2_server_method_ex(void* heap);
 #endif
+
+/* CTX Method Constructor Functions */
+WOLFSSL_API WOLFSSL_METHOD *wolfTLS_client_method(void);
+WOLFSSL_API WOLFSSL_METHOD *wolfTLS_server_method(void);
+WOLFSSL_API WOLFSSL_METHOD *wolfSSLv3_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv23_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv3_server_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfSSLv3_client_method(void);
-WOLFSSL_API WOLFSSL_METHOD* wolfTLSv1_method(void);
+WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_server_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_client_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_1_method(void);
@@ -446,15 +710,21 @@
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_1_client_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_method(void);
 WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_server_method(void);
-WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_client_method(void);
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_2_client_method(void);
 #ifdef WOLFSSL_TLS13
+    WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_method(void);
     WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_server_method(void);
-    WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_client_method(void);
+    WOLFSSL_ABI WOLFSSL_API WOLFSSL_METHOD *wolfTLSv1_3_client_method(void);
 #endif
 
 #ifdef WOLFSSL_DTLS
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLS_method(void);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLS_server_method(void);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLS_client_method(void);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_method(void);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_client_method(void);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_server_method(void);
+    WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_2_method(void);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_2_client_method(void);
     WOLFSSL_API WOLFSSL_METHOD *wolfDTLSv1_2_server_method(void);
 #endif
@@ -479,6 +749,8 @@
 WOLFSSL_API int wolfSSL_dtls_set_export(WOLFSSL* ssl, wc_dtls_export func);
 WOLFSSL_API int wolfSSL_dtls_export(WOLFSSL* ssl, unsigned char* buf,
                                                               unsigned int* sz);
+WOLFSSL_API int wolfSSL_dtls_export_state_only(WOLFSSL* ssl, unsigned char* buf,
+                                                              unsigned int* sz);
 #endif /* WOLFSSL_DTLS */
 #endif /* WOLFSSL_SESSION_EXPORT */
 
@@ -500,15 +772,35 @@
 
 #if !defined(NO_FILESYSTEM) && !defined(NO_CERTS)
 
-WOLFSSL_API int wolfSSL_CTX_use_certificate_file(WOLFSSL_CTX*, const char*, int);
-WOLFSSL_API int wolfSSL_CTX_use_PrivateKey_file(WOLFSSL_CTX*, const char*, int);
-WOLFSSL_API int wolfSSL_CTX_load_verify_locations(WOLFSSL_CTX*, const char*,
-                                                const char*);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_use_certificate_file(WOLFSSL_CTX*,
+                                                              const char*, int);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_use_PrivateKey_file(WOLFSSL_CTX*,
+                                                              const char*, int);
+
+#endif
+
+#ifndef NO_CERTS
+#define WOLFSSL_LOAD_FLAG_NONE          0x00000000
+#define WOLFSSL_LOAD_FLAG_IGNORE_ERR    0x00000001
+#define WOLFSSL_LOAD_FLAG_DATE_ERR_OKAY 0x00000002
+#define WOLFSSL_LOAD_FLAG_PEM_CA_ONLY   0x00000004
+
+#ifndef WOLFSSL_LOAD_VERIFY_DEFAULT_FLAGS
+#define WOLFSSL_LOAD_VERIFY_DEFAULT_FLAGS WOLFSSL_LOAD_FLAG_NONE
+#endif
+#endif /* !NO_CERTS */
+
+#if !defined(NO_FILESYSTEM) && !defined(NO_CERTS)
+
+WOLFSSL_API int wolfSSL_CTX_load_verify_locations_ex(WOLFSSL_CTX*, const char*,
+                                                const char*, unsigned int);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_load_verify_locations(WOLFSSL_CTX*,
+                                                      const char*, const char*);
 #ifdef WOLFSSL_TRUST_PEER_CERT
 WOLFSSL_API int wolfSSL_CTX_trust_peer_cert(WOLFSSL_CTX*, const char*, int);
 #endif
-WOLFSSL_API int wolfSSL_CTX_use_certificate_chain_file(WOLFSSL_CTX *,
-                                                     const char *file);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_use_certificate_chain_file(
+                                                     WOLFSSL_CTX*, const char*);
 WOLFSSL_API int wolfSSL_CTX_use_certificate_chain_file_format(WOLFSSL_CTX *,
                                                   const char *file, int format);
 WOLFSSL_API int wolfSSL_CTX_use_RSAPrivateKey_file(WOLFSSL_CTX*, const char*, int);
@@ -516,9 +808,12 @@
 WOLFSSL_API long wolfSSL_get_verify_depth(WOLFSSL* ssl);
 WOLFSSL_API long wolfSSL_CTX_get_verify_depth(WOLFSSL_CTX* ctx);
 WOLFSSL_API void wolfSSL_CTX_set_verify_depth(WOLFSSL_CTX *ctx,int depth);
-WOLFSSL_API int wolfSSL_use_certificate_file(WOLFSSL*, const char*, int);
-WOLFSSL_API int wolfSSL_use_PrivateKey_file(WOLFSSL*, const char*, int);
-WOLFSSL_API int wolfSSL_use_certificate_chain_file(WOLFSSL*, const char *file);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_use_certificate_file(WOLFSSL*, const char*,
+                                                                           int);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_use_PrivateKey_file(WOLFSSL*, const char*,
+                                                                           int);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_use_certificate_chain_file(WOLFSSL*,
+                                                                   const char*);
 WOLFSSL_API int wolfSSL_use_certificate_chain_file_format(WOLFSSL*,
                                                   const char *file, int format);
 WOLFSSL_API int wolfSSL_use_RSAPrivateKey_file(WOLFSSL*, const char*, int);
@@ -535,29 +830,40 @@
 
 #endif /* !NO_FILESYSTEM && !NO_CERTS */
 
-WOLFSSL_API WOLFSSL_CTX* wolfSSL_CTX_new(WOLFSSL_METHOD*);
-WOLFSSL_API WOLFSSL* wolfSSL_new(WOLFSSL_CTX*);
+WOLFSSL_API WOLFSSL_CTX* wolfSSL_CTX_new_ex(WOLFSSL_METHOD* method, void* heap);
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_CTX* wolfSSL_CTX_new(WOLFSSL_METHOD*);
+#ifdef OPENSSL_EXTRA
+WOLFSSL_API int wolfSSL_CTX_up_ref(WOLFSSL_CTX*);
+#endif
+WOLFSSL_ABI WOLFSSL_API WOLFSSL* wolfSSL_new(WOLFSSL_CTX*);
+WOLFSSL_API WOLFSSL_CTX* wolfSSL_get_SSL_CTX(WOLFSSL* ssl);
+WOLFSSL_API WOLFSSL_X509_VERIFY_PARAM* wolfSSL_get0_param(WOLFSSL* ssl);
 WOLFSSL_API int  wolfSSL_is_server(WOLFSSL*);
 WOLFSSL_API WOLFSSL* wolfSSL_write_dup(WOLFSSL*);
-WOLFSSL_API int  wolfSSL_set_fd (WOLFSSL*, int);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_set_fd (WOLFSSL*, int);
 WOLFSSL_API int  wolfSSL_set_write_fd (WOLFSSL*, int);
 WOLFSSL_API int  wolfSSL_set_read_fd (WOLFSSL*, int);
 WOLFSSL_API char* wolfSSL_get_cipher_list(int priority);
 WOLFSSL_API char* wolfSSL_get_cipher_list_ex(WOLFSSL* ssl, int priority);
 WOLFSSL_API int  wolfSSL_get_ciphers(char*, int);
+WOLFSSL_API int wolfSSL_get_ciphers_iana(char*, int);
 WOLFSSL_API const char* wolfSSL_get_cipher_name(WOLFSSL* ssl);
-WOLFSSL_API const char* wolfSSL_get_cipher_name_from_suite(const unsigned char, 
+WOLFSSL_API const char* wolfSSL_get_cipher_name_from_suite(const unsigned char,
     const unsigned char);
+WOLFSSL_API const char* wolfSSL_get_cipher_name_iana_from_suite(
+    const unsigned char, const unsigned char);
 WOLFSSL_API const char* wolfSSL_get_shared_ciphers(WOLFSSL* ssl, char* buf,
     int len);
 WOLFSSL_API const char* wolfSSL_get_curve_name(WOLFSSL* ssl);
 WOLFSSL_API int  wolfSSL_get_fd(const WOLFSSL*);
 /* please see note at top of README if you get an error from connect */
-WOLFSSL_API int  wolfSSL_connect(WOLFSSL*);
-WOLFSSL_API int  wolfSSL_write(WOLFSSL*, const void*, int);
-WOLFSSL_API int  wolfSSL_read(WOLFSSL*, void*, int);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_connect(WOLFSSL*);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_write(WOLFSSL*, const void*, int);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_read(WOLFSSL*, void*, int);
 WOLFSSL_API int  wolfSSL_peek(WOLFSSL*, void*, int);
 WOLFSSL_API int  wolfSSL_accept(WOLFSSL*);
+WOLFSSL_API int  wolfSSL_CTX_mutual_auth(WOLFSSL_CTX* ctx, int req);
+WOLFSSL_API int  wolfSSL_mutual_auth(WOLFSSL* ssl, int req);
 #ifdef WOLFSSL_TLS13
 WOLFSSL_API int  wolfSSL_send_hrr_cookie(WOLFSSL* ssl,
     const unsigned char* secret, unsigned int secretSz);
@@ -570,6 +876,9 @@
 WOLFSSL_API int  wolfSSL_allow_post_handshake_auth(WOLFSSL* ssl);
 WOLFSSL_API int  wolfSSL_request_certificate(WOLFSSL* ssl);
 
+WOLFSSL_API int  wolfSSL_CTX_set1_groups_list(WOLFSSL_CTX *ctx, char *list);
+WOLFSSL_API int  wolfSSL_set1_groups_list(WOLFSSL *ssl, char *list);
+
 WOLFSSL_API int  wolfSSL_preferred_group(WOLFSSL* ssl);
 WOLFSSL_API int  wolfSSL_CTX_set_groups(WOLFSSL_CTX* ctx, int* groups,
                                         int count);
@@ -586,30 +895,37 @@
 WOLFSSL_API int  wolfSSL_read_early_data(WOLFSSL*, void*, int, int*);
 #endif
 #endif
-WOLFSSL_API void wolfSSL_CTX_free(WOLFSSL_CTX*);
-WOLFSSL_API void wolfSSL_free(WOLFSSL*);
-WOLFSSL_API int  wolfSSL_shutdown(WOLFSSL*);
+WOLFSSL_ABI WOLFSSL_API void wolfSSL_CTX_free(WOLFSSL_CTX*);
+WOLFSSL_ABI WOLFSSL_API void wolfSSL_free(WOLFSSL*);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_shutdown(WOLFSSL*);
 WOLFSSL_API int  wolfSSL_send(WOLFSSL*, const void*, int sz, int flags);
 WOLFSSL_API int  wolfSSL_recv(WOLFSSL*, void*, int sz, int flags);
 
 WOLFSSL_API void wolfSSL_CTX_set_quiet_shutdown(WOLFSSL_CTX*, int);
 WOLFSSL_API void wolfSSL_set_quiet_shutdown(WOLFSSL*, int);
 
-WOLFSSL_API int  wolfSSL_get_error(WOLFSSL*, int);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_get_error(WOLFSSL*, int);
 WOLFSSL_API int  wolfSSL_get_alert_history(WOLFSSL*, WOLFSSL_ALERT_HISTORY *);
 
-WOLFSSL_API int  wolfSSL_set_session(WOLFSSL*, WOLFSSL_SESSION*);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_set_session(WOLFSSL*, WOLFSSL_SESSION*);
 WOLFSSL_API long wolfSSL_SSL_SESSION_set_timeout(WOLFSSL_SESSION*, long);
-WOLFSSL_API WOLFSSL_SESSION* wolfSSL_get_session(WOLFSSL*);
-WOLFSSL_API void wolfSSL_flush_sessions(WOLFSSL_CTX*, long);
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_SESSION* wolfSSL_get_session(WOLFSSL*);
+WOLFSSL_ABI WOLFSSL_API void wolfSSL_flush_sessions(WOLFSSL_CTX*, long);
 WOLFSSL_API int  wolfSSL_SetServerID(WOLFSSL*, const unsigned char*, int, int);
 
-#if defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO)
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_ASIO) || defined(WOLFSSL_HAPROXY) \
+    || defined(WOLFSSL_NGINX)
 WOLFSSL_API int  wolfSSL_BIO_new_bio_pair(WOLFSSL_BIO**, size_t,
                      WOLFSSL_BIO**, size_t);
 
+WOLFSSL_API int wolfSSL_RSA_padding_add_PKCS1_PSS(WOLFSSL_RSA *rsa, unsigned char *EM,
+                                                  const unsigned char *mHash,
+                                                  const WOLFSSL_EVP_MD *Hash, int saltLen);
+WOLFSSL_API int wolfSSL_RSA_verify_PKCS1_PSS(WOLFSSL_RSA *rsa, const unsigned char *mHash,
+                                          const WOLFSSL_EVP_MD *hashAlg,
+                                          const unsigned char *EM, int saltLen);
 WOLFSSL_API WOLFSSL_RSA* wolfSSL_d2i_RSAPrivateKey_bio(WOLFSSL_BIO*, WOLFSSL_RSA**);
-WOLFSSL_API int wolfSSL_CTX_use_certificate_ASN1(WOLFSSL_CTX*, 
+WOLFSSL_API int wolfSSL_CTX_use_certificate_ASN1(WOLFSSL_CTX*,
                                            int, const unsigned char*);
 WOLFSSL_API int wolfSSL_CTX_use_RSAPrivateKey(WOLFSSL_CTX*, WOLFSSL_RSA*);
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_PrivateKey_bio(WOLFSSL_BIO*, WOLFSSL_EVP_PKEY**);
@@ -620,19 +936,16 @@
 WOLFSSL_API int wolfSSL_GetSessionAtIndex(int index, WOLFSSL_SESSION* session);
 #endif /* SESSION_INDEX */
 
-#if defined(SESSION_INDEX) && defined(SESSION_CERTS)
+#if defined(SESSION_CERTS)
 WOLFSSL_API
     WOLFSSL_X509_CHAIN* wolfSSL_SESSION_get_peer_chain(WOLFSSL_SESSION* session);
+WOLFSSL_API WOLFSSL_X509* wolfSSL_SESSION_get0_peer(WOLFSSL_SESSION* session);
 #endif /* SESSION_INDEX && SESSION_CERTS */
 
 typedef int (*VerifyCallback)(int, WOLFSSL_X509_STORE_CTX*);
-#ifdef OPENSSL_EXTRA
 typedef void (CallbackInfoState)(const WOLFSSL*, int, int);
 
-typedef struct WOLFSSL_CRYPTO_EX_DATA {
-    WOLFSSL_STACK* data;
-} WOLFSSL_CRYPTO_EX_DATA;
-
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
 typedef int  (WOLFSSL_CRYPTO_EX_new)(void* p, void* ptr,
         WOLFSSL_CRYPTO_EX_DATA* a, int idx, long argValue, void* arg);
 typedef int  (WOLFSSL_CRYPTO_EX_dup)(WOLFSSL_CRYPTO_EX_DATA* out,
@@ -647,19 +960,34 @@
 
 WOLFSSL_API void wolfSSL_CTX_set_verify(WOLFSSL_CTX*, int,
                                       VerifyCallback verify_callback);
+
+#ifdef OPENSSL_ALL
+typedef int (*CertVerifyCallback)(WOLFSSL_X509_STORE_CTX* store, void* arg);
+WOLFSSL_API void wolfSSL_CTX_set_cert_verify_callback(WOLFSSL_CTX* ctx,
+    CertVerifyCallback cb, void* arg);
+#endif
+
 WOLFSSL_API void wolfSSL_set_verify(WOLFSSL*, int, VerifyCallback verify_callback);
+WOLFSSL_API void wolfSSL_set_verify_result(WOLFSSL*, long);
 WOLFSSL_API void wolfSSL_SetCertCbCtx(WOLFSSL*, void*);
 
-WOLFSSL_API int  wolfSSL_pending(WOLFSSL*);
+WOLFSSL_ABI WOLFSSL_API int  wolfSSL_pending(WOLFSSL*);
 
 WOLFSSL_API void wolfSSL_load_error_strings(void);
 WOLFSSL_API int  wolfSSL_library_init(void);
-WOLFSSL_API long wolfSSL_CTX_set_session_cache_mode(WOLFSSL_CTX*, long);
+WOLFSSL_ABI WOLFSSL_API long wolfSSL_CTX_set_session_cache_mode(WOLFSSL_CTX*,
+                                                                          long);
 
 #ifdef HAVE_SECRET_CALLBACK
-typedef int (*SessionSecretCb)(WOLFSSL* ssl,
-                                        void* secret, int* secretSz, void* ctx);
-WOLFSSL_API int  wolfSSL_set_session_secret_cb(WOLFSSL*, SessionSecretCb, void*);
+typedef int (*SessionSecretCb)(WOLFSSL* ssl, void* secret, int* secretSz,
+                               void* ctx);
+WOLFSSL_API int  wolfSSL_set_session_secret_cb(WOLFSSL*, SessionSecretCb,
+                                               void*);
+#ifdef WOLFSSL_TLS13
+typedef int (*Tls13SecretCb)(WOLFSSL* ssl, int id, const unsigned char* secret,
+                             int secretSz, void* ctx);
+WOLFSSL_API int  wolfSSL_set_tls13_secret_cb(WOLFSSL*, Tls13SecretCb, void*);
+#endif
 #endif /* HAVE_SECRET_CALLBACK */
 
 /* session cache persistence */
@@ -687,9 +1015,16 @@
 #define wolfSSL_get_using_nonblock wolfSSL_dtls_get_using_nonblock
     /* The old names are deprecated. */
 WOLFSSL_API int  wolfSSL_dtls_get_current_timeout(WOLFSSL* ssl);
+WOLFSSL_API int  wolfSSL_DTLSv1_get_timeout(WOLFSSL* ssl,
+        WOLFSSL_TIMEVAL* timeleft);
+WOLFSSL_API void wolfSSL_DTLSv1_set_initial_timeout_duration(WOLFSSL* ssl,
+    word32 duration_ms);
+WOLFSSL_API int  wolfSSL_DTLSv1_handle_timeout(WOLFSSL* ssl);
+
 WOLFSSL_API int  wolfSSL_dtls_set_timeout_init(WOLFSSL* ssl, int);
 WOLFSSL_API int  wolfSSL_dtls_set_timeout_max(WOLFSSL* ssl, int);
 WOLFSSL_API int  wolfSSL_dtls_got_timeout(WOLFSSL* ssl);
+WOLFSSL_API int  wolfSSL_dtls_retransmit(WOLFSSL*);
 WOLFSSL_API int  wolfSSL_dtls(WOLFSSL* ssl);
 
 WOLFSSL_API int  wolfSSL_dtls_set_peer(WOLFSSL*, void*, unsigned int);
@@ -721,6 +1056,7 @@
                                                     CallbackMcastHighwater);
 WOLFSSL_API int  wolfSSL_mcast_set_highwater_ctx(WOLFSSL*, void*);
 
+WOLFSSL_API int   wolfSSL_ERR_GET_LIB(unsigned long err);
 WOLFSSL_API int   wolfSSL_ERR_GET_REASON(unsigned long err);
 WOLFSSL_API char* wolfSSL_ERR_error_string(unsigned long,char*);
 WOLFSSL_API void  wolfSSL_ERR_error_string_n(unsigned long e, char* buf,
@@ -729,6 +1065,13 @@
 
 /* extras */
 
+
+/* for now LHASH is not implemented */
+typedef int WOLFSSL_LHASH;
+#ifndef WOLF_LHASH_OF
+    #define WOLF_LHASH_OF(x) WOLFSSL_LHASH
+#endif
+
 #ifndef WOLF_STACK_OF
     #define WOLF_STACK_OF(x) WOLFSSL_STACK
 #endif
@@ -736,24 +1079,70 @@
     #define DECLARE_STACK_OF(x) WOLF_STACK_OF(x);
 #endif
 
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_new_node(void* heap);
+WOLFSSL_API void wolfSSL_sk_free(WOLFSSL_STACK* sk);
+WOLFSSL_API void wolfSSL_sk_free_node(WOLFSSL_STACK* in);
+WOLFSSL_API int wolfSSL_sk_push_node(WOLFSSL_STACK** stack, WOLFSSL_STACK* in);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_get_node(WOLFSSL_STACK* sk, int idx);
+WOLFSSL_API int wolfSSL_sk_push(WOLFSSL_STACK *st, const void *data);
+
+#if defined(HAVE_OCSP)
+#include "wolfssl/ocsp.h"
+#include "wolfssl/wolfcrypt/asn.h"
+#endif
+
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+WOLFSSL_API int wolfSSL_sk_ACCESS_DESCRIPTION_push(
+                                       WOLF_STACK_OF(ACCESS_DESCRIPTION)* sk,
+                                       WOLFSSL_ACCESS_DESCRIPTION* access);
+#endif /* defined(OPENSSL_ALL) || defined(WOLFSSL_QT) */
+
+typedef WOLF_STACK_OF(WOLFSSL_GENERAL_NAME) WOLFSSL_GENERAL_NAMES;
+
 WOLFSSL_API int wolfSSL_sk_X509_push(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk,
                                                             WOLFSSL_X509* x509);
 WOLFSSL_API WOLFSSL_X509* wolfSSL_sk_X509_pop(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_X509_dup(WOLFSSL_STACK* sk);
 WOLFSSL_API void wolfSSL_sk_X509_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk);
-WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_sk_GENERAL_NAME_value(
+WOLFSSL_API WOLFSSL_GENERAL_NAME* wolfSSL_GENERAL_NAME_new(void);
+WOLFSSL_API void wolfSSL_GENERAL_NAME_free(WOLFSSL_GENERAL_NAME* gn);
+WOLFSSL_API int wolfSSL_sk_GENERAL_NAME_push(WOLF_STACK_OF(WOLFSSL_GENERAL_NAME)* sk,
+                                                      WOLFSSL_GENERAL_NAME* gn);
+WOLFSSL_API WOLFSSL_GENERAL_NAME* wolfSSL_sk_GENERAL_NAME_value(
         WOLFSSL_STACK* sk, int i);
 WOLFSSL_API int wolfSSL_sk_GENERAL_NAME_num(WOLFSSL_STACK* sk);
 WOLFSSL_API void wolfSSL_sk_GENERAL_NAME_pop_free(WOLFSSL_STACK* sk,
-        void f (WOLFSSL_ASN1_OBJECT*));
+                                       void (*f) (WOLFSSL_GENERAL_NAME*));
+WOLFSSL_API void wolfSSL_sk_GENERAL_NAME_free(WOLFSSL_STACK* sk);
+WOLFSSL_API void wolfSSL_GENERAL_NAMES_free(WOLFSSL_GENERAL_NAMES* name);
+WOLFSSL_API int wolfSSL_sk_ACCESS_DESCRIPTION_num(WOLFSSL_STACK* sk);
+WOLFSSL_API void wolfSSL_AUTHORITY_INFO_ACCESS_free(
+        WOLF_STACK_OF(WOLFSSL_ACCESS_DESCRIPTION)* sk);
+WOLFSSL_API WOLFSSL_ACCESS_DESCRIPTION* wolfSSL_sk_ACCESS_DESCRIPTION_value(
+        WOLFSSL_STACK* sk, int idx);
+WOLFSSL_API void wolfSSL_sk_ACCESS_DESCRIPTION_free(WOLFSSL_STACK* sk);
+WOLFSSL_API void wolfSSL_sk_ACCESS_DESCRIPTION_pop_free(WOLFSSL_STACK* sk,
+        void (*f) (WOLFSSL_ACCESS_DESCRIPTION*));
+WOLFSSL_API void wolfSSL_ACCESS_DESCRIPTION_free(WOLFSSL_ACCESS_DESCRIPTION* access);
+WOLFSSL_API void wolfSSL_sk_X509_EXTENSION_pop_free(
+        WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* sk,
+        void (*f) (WOLFSSL_X509_EXTENSION*));
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* wolfSSL_sk_X509_EXTENSION_new_null(void);
 WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_ASN1_OBJECT_new(void);
 WOLFSSL_API void wolfSSL_ASN1_OBJECT_free(WOLFSSL_ASN1_OBJECT* obj);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_new_asn1_obj(void);
 WOLFSSL_API int wolfSSL_sk_ASN1_OBJECT_push(WOLF_STACK_OF(WOLFSSL_ASN1_OBJEXT)* sk,
                                                       WOLFSSL_ASN1_OBJECT* obj);
-WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_sk_ASN1_OBJCET_pop(
+WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_sk_ASN1_OBJECT_pop(
                                             WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk);
 WOLFSSL_API void wolfSSL_sk_ASN1_OBJECT_free(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk);
+WOLFSSL_API void wolfSSL_sk_ASN1_OBJECT_pop_free(
+                WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)* sk,
+                void (*f)(WOLFSSL_ASN1_OBJECT*));
 WOLFSSL_API int wolfSSL_ASN1_STRING_to_UTF8(unsigned char **out, WOLFSSL_ASN1_STRING *in);
-
+WOLFSSL_API int wolfSSL_sk_X509_EXTENSION_num(WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* sk);
+WOLFSSL_API WOLFSSL_X509_EXTENSION* wolfSSL_sk_X509_EXTENSION_value(
+                            WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* sk, int idx);
 WOLFSSL_API int  wolfSSL_set_ex_data(WOLFSSL*, int, void*);
 WOLFSSL_API int  wolfSSL_get_shutdown(const WOLFSSL*);
 WOLFSSL_API int  wolfSSL_set_rfd(WOLFSSL*, int);
@@ -764,6 +1153,7 @@
 WOLFSSL_API void wolfSSL_set_connect_state(WOLFSSL*);
 WOLFSSL_API void wolfSSL_set_accept_state(WOLFSSL*);
 WOLFSSL_API int  wolfSSL_session_reused(WOLFSSL*);
+WOLFSSL_API WOLFSSL_SESSION* wolfSSL_SESSION_dup(WOLFSSL_SESSION* session);
 WOLFSSL_API void wolfSSL_SESSION_free(WOLFSSL_SESSION* session);
 WOLFSSL_API int  wolfSSL_is_init_finished(WOLFSSL*);
 
@@ -772,12 +1162,21 @@
 WOLFSSL_API WOLFSSL_CIPHER*  wolfSSL_get_current_cipher(WOLFSSL*);
 WOLFSSL_API char* wolfSSL_CIPHER_description(const WOLFSSL_CIPHER*, char*, int);
 WOLFSSL_API const char*  wolfSSL_CIPHER_get_name(const WOLFSSL_CIPHER* cipher);
+WOLFSSL_API const char*  wolfSSL_CIPHER_get_version(const WOLFSSL_CIPHER* cipher);
+WOLFSSL_API word32       wolfSSL_CIPHER_get_id(const WOLFSSL_CIPHER* cipher);
+WOLFSSL_API const WOLFSSL_CIPHER* wolfSSL_get_cipher_by_value(word16 value);
 WOLFSSL_API const char*  wolfSSL_SESSION_CIPHER_get_name(WOLFSSL_SESSION* session);
 WOLFSSL_API const char*  wolfSSL_get_cipher(WOLFSSL*);
+WOLFSSL_API void wolfSSL_sk_CIPHER_free(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk);
 WOLFSSL_API WOLFSSL_SESSION* wolfSSL_get1_session(WOLFSSL* ssl);
                            /* what's ref count */
 
 WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_new(void);
+#if defined(OPENSSL_EXTRA_X509_SMALL) || defined(OPENSSL_ALL)
+WOLFSSL_API int wolfSSL_RSA_up_ref(WOLFSSL_RSA* rsa);
+WOLFSSL_API int wolfSSL_X509_up_ref(WOLFSSL_X509* x509);
+WOLFSSL_API int wolfSSL_EVP_PKEY_up_ref(WOLFSSL_EVP_PKEY* pkey);
+#endif
 
 WOLFSSL_API int wolfSSL_OCSP_parse_url(char* url, char** host, char** port,
                                      char** path, int* ssl);
@@ -788,8 +1187,10 @@
 
 WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_new(WOLFSSL_BIO_METHOD*);
 WOLFSSL_API int  wolfSSL_BIO_free(WOLFSSL_BIO*);
+WOLFSSL_API void wolfSSL_BIO_vfree(WOLFSSL_BIO*);
 WOLFSSL_API int  wolfSSL_BIO_free_all(WOLFSSL_BIO*);
 WOLFSSL_API int wolfSSL_BIO_gets(WOLFSSL_BIO* bio, char* buf, int sz);
+WOLFSSL_API int wolfSSL_BIO_puts(WOLFSSL_BIO* bio, const char* buf);
 WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_next(WOLFSSL_BIO* bio);
 WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_find_type(WOLFSSL_BIO* bio, int type);
 WOLFSSL_API int  wolfSSL_BIO_read(WOLFSSL_BIO*, void*, int);
@@ -798,6 +1199,15 @@
 WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_pop(WOLFSSL_BIO*);
 WOLFSSL_API int  wolfSSL_BIO_flush(WOLFSSL_BIO*);
 WOLFSSL_API int  wolfSSL_BIO_pending(WOLFSSL_BIO*);
+WOLFSSL_API void wolfSSL_BIO_set_callback(WOLFSSL_BIO *bio,
+                                          wolf_bio_info_cb callback_func);
+WOLFSSL_API wolf_bio_info_cb wolfSSL_BIO_get_callback(WOLFSSL_BIO *bio);
+WOLFSSL_API void  wolfSSL_BIO_set_callback_arg(WOLFSSL_BIO *bio, char *arg);
+WOLFSSL_API char* wolfSSL_BIO_get_callback_arg(const WOLFSSL_BIO *bio);
+
+WOLFSSL_API WOLFSSL_BIO_METHOD* wolfSSL_BIO_f_md(void);
+WOLFSSL_API int wolfSSL_BIO_get_md_ctx(WOLFSSL_BIO *bio,
+                                                WOLFSSL_EVP_MD_CTX **mdcp);
 
 WOLFSSL_API WOLFSSL_BIO_METHOD* wolfSSL_BIO_f_buffer(void);
 WOLFSSL_API long wolfSSL_BIO_set_write_buffer_size(WOLFSSL_BIO*, long size);
@@ -808,13 +1218,37 @@
 WOLFSSL_API WOLFSSL_BIO_METHOD* wolfSSL_BIO_s_mem(void);
 WOLFSSL_API WOLFSSL_BIO_METHOD* wolfSSL_BIO_f_base64(void);
 WOLFSSL_API void wolfSSL_BIO_set_flags(WOLFSSL_BIO*, int);
+WOLFSSL_API void wolfSSL_BIO_clear_flags(WOLFSSL_BIO *bio, int flags);
+WOLFSSL_API int wolfSSL_BIO_set_ex_data(WOLFSSL_BIO *bio, int idx, void *data);
+WOLFSSL_API void *wolfSSL_BIO_get_ex_data(WOLFSSL_BIO *bio, int idx);
+WOLFSSL_API long wolfSSL_BIO_set_nbio(WOLFSSL_BIO*, long);
 
 WOLFSSL_API int wolfSSL_BIO_get_mem_data(WOLFSSL_BIO* bio,void* p);
-WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_new_mem_buf(void* buf, int len);
-
+
+WOLFSSL_API void wolfSSL_BIO_set_init(WOLFSSL_BIO*, int);
+WOLFSSL_API void wolfSSL_BIO_set_data(WOLFSSL_BIO*, void*);
+WOLFSSL_API void* wolfSSL_BIO_get_data(WOLFSSL_BIO*);
+WOLFSSL_API void wolfSSL_BIO_set_shutdown(WOLFSSL_BIO*, int);
+WOLFSSL_API int wolfSSL_BIO_get_shutdown(WOLFSSL_BIO*);
+WOLFSSL_API void wolfSSL_BIO_clear_retry_flags(WOLFSSL_BIO*);
+WOLFSSL_API int wolfSSL_BIO_should_retry(WOLFSSL_BIO *bio);
+
+WOLFSSL_API WOLFSSL_BIO_METHOD *wolfSSL_BIO_meth_new(int, const char*);
+WOLFSSL_API void wolfSSL_BIO_meth_free(WOLFSSL_BIO_METHOD*);
+WOLFSSL_API int wolfSSL_BIO_meth_set_write(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_write_cb);
+WOLFSSL_API int wolfSSL_BIO_meth_set_read(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_read_cb);
+WOLFSSL_API int wolfSSL_BIO_meth_set_puts(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_puts_cb);
+WOLFSSL_API int wolfSSL_BIO_meth_set_gets(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_gets_cb);
+WOLFSSL_API int wolfSSL_BIO_meth_set_ctrl(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_ctrl_get_cb);
+WOLFSSL_API int wolfSSL_BIO_meth_set_create(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_create_cb);
+WOLFSSL_API int wolfSSL_BIO_meth_set_destroy(WOLFSSL_BIO_METHOD*, wolfSSL_BIO_meth_destroy_cb);
+WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_new_mem_buf(const void* buf, int len);
 
 WOLFSSL_API long wolfSSL_BIO_set_ssl(WOLFSSL_BIO*, WOLFSSL*, int flag);
+#ifndef NO_FILESYSTEM
 WOLFSSL_API long wolfSSL_BIO_set_fd(WOLFSSL_BIO* b, int fd, int flag);
+#endif
+WOLFSSL_API int wolfSSL_BIO_set_close(WOLFSSL_BIO *b, long flag);
 WOLFSSL_API void wolfSSL_set_bio(WOLFSSL*, WOLFSSL_BIO* rd, WOLFSSL_BIO* wr);
 
 #ifndef NO_FILESYSTEM
@@ -839,6 +1273,7 @@
 WOLFSSL_API int  wolfSSL_BIO_write_filename(WOLFSSL_BIO *bio, char *name);
 WOLFSSL_API long wolfSSL_BIO_set_mem_eof_return(WOLFSSL_BIO *bio, int v);
 WOLFSSL_API long wolfSSL_BIO_get_mem_ptr(WOLFSSL_BIO *bio, WOLFSSL_BUF_MEM **m);
+WOLFSSL_API int wolfSSL_BIO_get_len(WOLFSSL_BIO *bio);
 
 WOLFSSL_API void        wolfSSL_RAND_screen(void);
 WOLFSSL_API const char* wolfSSL_RAND_file_name(char*, unsigned long);
@@ -873,12 +1308,25 @@
 
 WOLFSSL_API void  wolfSSL_X509_STORE_CTX_set_verify_cb(WOLFSSL_X509_STORE_CTX *ctx,
                                   WOLFSSL_X509_STORE_CTX_verify_cb verify_cb);
+WOLFSSL_API void wolfSSL_X509_STORE_set_verify_cb(WOLFSSL_X509_STORE *st,
+                                 WOLFSSL_X509_STORE_CTX_verify_cb verify_cb);
 WOLFSSL_API int wolfSSL_i2d_X509_NAME(WOLFSSL_X509_NAME* n,
                                                            unsigned char** out);
+#ifndef NO_RSA
+WOLFSSL_API int wolfSSL_RSA_print(WOLFSSL_BIO* bio, WOLFSSL_RSA* rsa, int offset);
+#endif
+WOLFSSL_API int wolfSSL_X509_print_ex(WOLFSSL_BIO* bio, WOLFSSL_X509* x509,
+    unsigned long nmflags, unsigned long cflag);
 WOLFSSL_API int wolfSSL_X509_print(WOLFSSL_BIO* bio, WOLFSSL_X509* x509);
-WOLFSSL_API char*       wolfSSL_X509_NAME_oneline(WOLFSSL_X509_NAME*, char*, int);
-WOLFSSL_API WOLFSSL_X509_NAME*  wolfSSL_X509_get_issuer_name(WOLFSSL_X509*);
-WOLFSSL_API WOLFSSL_X509_NAME*  wolfSSL_X509_get_subject_name(WOLFSSL_X509*);
+WOLFSSL_ABI WOLFSSL_API char* wolfSSL_X509_NAME_oneline(WOLFSSL_X509_NAME*,
+                                                                    char*, int);
+#if defined(OPENSSL_EXTRA) && defined(XSNPRINTF)
+WOLFSSL_API char* wolfSSL_X509_get_name_oneline(WOLFSSL_X509_NAME*, char*, int);
+#endif
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_X509_NAME* wolfSSL_X509_get_issuer_name(
+                                                                 WOLFSSL_X509*);
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_X509_NAME* wolfSSL_X509_get_subject_name(
+                                                                 WOLFSSL_X509*);
 WOLFSSL_API int  wolfSSL_X509_ext_isSet_by_NID(WOLFSSL_X509*, int);
 WOLFSSL_API int  wolfSSL_X509_ext_get_critical_by_NID(WOLFSSL_X509*, int);
 WOLFSSL_API int  wolfSSL_X509_get_isCA(WOLFSSL_X509*);
@@ -889,14 +1337,37 @@
                                             WOLFSSL_X509*, unsigned char*, int*);
 WOLFSSL_API unsigned char* wolfSSL_X509_get_subjectKeyID(
                                             WOLFSSL_X509*, unsigned char*, int*);
+
+WOLFSSL_API int wolfSSL_X509_verify(WOLFSSL_X509* x509, WOLFSSL_EVP_PKEY* pkey);
+WOLFSSL_API int wolfSSL_X509_set_subject_name(WOLFSSL_X509*,
+                                              WOLFSSL_X509_NAME*);
+WOLFSSL_API int wolfSSL_X509_set_issuer_name(WOLFSSL_X509*,
+                                              WOLFSSL_X509_NAME*);
+WOLFSSL_API int wolfSSL_X509_set_pubkey(WOLFSSL_X509*, WOLFSSL_EVP_PKEY*);
+WOLFSSL_API int wolfSSL_X509_set_notAfter(WOLFSSL_X509* x509,
+        const WOLFSSL_ASN1_TIME* t);
+WOLFSSL_API int wolfSSL_X509_set_notBefore(WOLFSSL_X509* x509,
+        const WOLFSSL_ASN1_TIME* t);
+WOLFSSL_API WOLFSSL_ASN1_TIME* wolfSSL_X509_get_notBefore(const WOLFSSL_X509* x509);
+WOLFSSL_API WOLFSSL_ASN1_TIME* wolfSSL_X509_get_notAfter(const WOLFSSL_X509* x509);
+WOLFSSL_API int wolfSSL_X509_set_serialNumber(WOLFSSL_X509* x509,
+        WOLFSSL_ASN1_INTEGER* s);
+WOLFSSL_API int wolfSSL_X509_set_version(WOLFSSL_X509* x509, long v);
+WOLFSSL_API int wolfSSL_X509_sign(WOLFSSL_X509* x509, WOLFSSL_EVP_PKEY* pkey,
+        const WOLFSSL_EVP_MD* md);
+
+
 WOLFSSL_API int wolfSSL_X509_NAME_entry_count(WOLFSSL_X509_NAME*);
 WOLFSSL_API int wolfSSL_X509_NAME_get_text_by_NID(
                                             WOLFSSL_X509_NAME*, int, char*, int);
 WOLFSSL_API int wolfSSL_X509_NAME_get_index_by_NID(
                                            WOLFSSL_X509_NAME*, int, int);
 WOLFSSL_API WOLFSSL_ASN1_STRING* wolfSSL_X509_NAME_ENTRY_get_data(WOLFSSL_X509_NAME_ENTRY*);
+
 WOLFSSL_API WOLFSSL_ASN1_STRING* wolfSSL_ASN1_STRING_new(void);
 WOLFSSL_API WOLFSSL_ASN1_STRING* wolfSSL_ASN1_STRING_type_new(int type);
+WOLFSSL_API int wolfSSL_ASN1_STRING_type(const WOLFSSL_ASN1_STRING* asn1);
+WOLFSSL_API WOLFSSL_ASN1_STRING* wolfSSL_d2i_DISPLAYTEXT(WOLFSSL_ASN1_STRING **asn, const unsigned char **in, long len);
 WOLFSSL_API void wolfSSL_ASN1_STRING_free(WOLFSSL_ASN1_STRING* asn1);
 WOLFSSL_API int wolfSSL_ASN1_STRING_set(WOLFSSL_ASN1_STRING* asn1,
                                                   const void* data, int dataSz);
@@ -906,6 +1377,9 @@
 WOLFSSL_API const char* wolfSSL_X509_verify_cert_error_string(long);
 WOLFSSL_API int wolfSSL_X509_get_signature_type(WOLFSSL_X509*);
 WOLFSSL_API int wolfSSL_X509_get_signature(WOLFSSL_X509*, unsigned char*, int*);
+WOLFSSL_API int wolfSSL_X509_get_pubkey_buffer(WOLFSSL_X509*, unsigned char*,
+        int*);
+WOLFSSL_API int wolfSSL_X509_get_pubkey_type(WOLFSSL_X509* x509);
 
 WOLFSSL_API int wolfSSL_X509_LOOKUP_add_dir(WOLFSSL_X509_LOOKUP*,const char*,long);
 WOLFSSL_API int wolfSSL_X509_LOOKUP_load_file(WOLFSSL_X509_LOOKUP*, const char*,
@@ -921,6 +1395,8 @@
                                               WOLFSSL_X509_STORE*, WOLFSSL_X509*);
 WOLFSSL_API WOLFSSL_STACK* wolfSSL_X509_STORE_CTX_get_chain(
                                                    WOLFSSL_X509_STORE_CTX* ctx);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_X509_STORE_CTX_get1_chain(
+                                                   WOLFSSL_X509_STORE_CTX* ctx);
 WOLFSSL_API int wolfSSL_X509_STORE_set_flags(WOLFSSL_X509_STORE* store,
                                                             unsigned long flag);
 WOLFSSL_API int          wolfSSL_X509_STORE_set_default_paths(WOLFSSL_X509_STORE*);
@@ -934,6 +1410,7 @@
 
 WOLFSSL_API WOLFSSL_ASN1_TIME* wolfSSL_X509_CRL_get_lastUpdate(WOLFSSL_X509_CRL*);
 WOLFSSL_API WOLFSSL_ASN1_TIME* wolfSSL_X509_CRL_get_nextUpdate(WOLFSSL_X509_CRL*);
+WOLFSSL_ASN1_TIME* wolfSSL_X509_gmtime_adj(WOLFSSL_ASN1_TIME *s, long adj);
 
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_X509_get_pubkey(WOLFSSL_X509*);
 WOLFSSL_API int       wolfSSL_X509_CRL_verify(WOLFSSL_X509_CRL*, WOLFSSL_EVP_PKEY*);
@@ -943,19 +1420,32 @@
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_PUBKEY_bio(WOLFSSL_BIO* bio,
                                          WOLFSSL_EVP_PKEY** out);
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_PUBKEY(WOLFSSL_EVP_PKEY** key,
-        unsigned char** in, long inSz);
+        const unsigned char** in, long inSz);
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_PrivateKey(int type,
         WOLFSSL_EVP_PKEY** out, const unsigned char **in, long inSz);
 WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_PrivateKey_EVP(WOLFSSL_EVP_PKEY** key,
         unsigned char** in, long inSz);
-WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_PKEY_new_ex(void* heap);
-WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_PKEY_new(void);
+WOLFSSL_API int wolfSSL_i2d_PrivateKey(WOLFSSL_EVP_PKEY* key,
+        unsigned char** der);
 WOLFSSL_API int       wolfSSL_X509_cmp_current_time(const WOLFSSL_ASN1_TIME*);
+#ifdef OPENSSL_EXTRA
+WOLFSSL_API int wolfSSL_X509_cmp_time(const WOLFSSL_ASN1_TIME* asnTime,
+        time_t *cmpTime);
+WOLFSSL_API WOLFSSL_ASN1_TIME *wolfSSL_X509_time_adj_ex(WOLFSSL_ASN1_TIME *asnTime,
+    int offset_day, long offset_sec, time_t *in_tm);
+WOLFSSL_API WOLFSSL_ASN1_TIME *wolfSSL_X509_time_adj(WOLFSSL_ASN1_TIME *asnTime,
+    long offset_sec, time_t *in_tm);
 WOLFSSL_API int       wolfSSL_sk_X509_REVOKED_num(WOLFSSL_X509_REVOKED*);
-#ifdef OPENSSL_EXTRA
 WOLFSSL_API void      wolfSSL_X509_STORE_CTX_set_time(WOLFSSL_X509_STORE_CTX*,
                                                       unsigned long flags,
                                                       time_t t);
+WOLFSSL_API void wolfSSL_X509_VERIFY_PARAM_set_hostflags(
+                WOLFSSL_X509_VERIFY_PARAM* param, unsigned int flags);
+WOLFSSL_API int wolfSSL_X509_VERIFY_PARAM_set1_host(WOLFSSL_X509_VERIFY_PARAM* pParam,
+                                                    const char* name,
+                                                    unsigned int nameSz);
+WOLFSSL_API int wolfSSL_X509_VERIFY_PARAM_set1_ip_asc(
+        WOLFSSL_X509_VERIFY_PARAM *param, const char *ipasc);
 #endif
 WOLFSSL_API WOLFSSL_X509_REVOKED* wolfSSL_X509_CRL_get_REVOKED(WOLFSSL_X509_CRL*);
 WOLFSSL_API WOLFSSL_X509_REVOKED* wolfSSL_sk_X509_REVOKED_value(
@@ -963,6 +1453,9 @@
 WOLFSSL_API WOLFSSL_ASN1_INTEGER* wolfSSL_X509_get_serialNumber(WOLFSSL_X509*);
 WOLFSSL_API void wolfSSL_ASN1_INTEGER_free(WOLFSSL_ASN1_INTEGER*);
 WOLFSSL_API WOLFSSL_ASN1_INTEGER* wolfSSL_ASN1_INTEGER_new(void);
+WOLFSSL_API WOLFSSL_ASN1_INTEGER* wolfSSL_ASN1_INTEGER_dup(
+                                              const WOLFSSL_ASN1_INTEGER* src);
+WOLFSSL_API int wolfSSL_ASN1_INTEGER_set(WOLFSSL_ASN1_INTEGER *a, long v);
 
 WOLFSSL_API int wolfSSL_ASN1_TIME_print(WOLFSSL_BIO*, const WOLFSSL_ASN1_TIME*);
 
@@ -975,19 +1468,44 @@
 #ifdef OPENSSL_EXTRA
 WOLFSSL_API WOLFSSL_BIGNUM *wolfSSL_ASN1_INTEGER_to_BN(const WOLFSSL_ASN1_INTEGER *ai,
                                        WOLFSSL_BIGNUM *bn);
-WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_load_client_CA_file(const char*);
 WOLFSSL_API WOLFSSL_ASN1_TIME* wolfSSL_ASN1_TIME_adj(WOLFSSL_ASN1_TIME*, time_t,
                                                      int, long);
+WOLFSSL_API WOLFSSL_ASN1_TIME* wolfSSL_ASN1_TIME_new(void);
+WOLFSSL_API void wolfSSL_ASN1_TIME_free(WOLFSSL_ASN1_TIME* t);
 #endif
 
-WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_SSL_CTX_get_client_CA_list(
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_load_client_CA_file(const char*);
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_CTX_get_client_CA_list(
         const WOLFSSL_CTX *s);
+/* deprecated function name */
+#define wolfSSL_SSL_CTX_get_client_CA_list wolfSSL_CTX_get_client_CA_list
+
 WOLFSSL_API void  wolfSSL_CTX_set_client_CA_list(WOLFSSL_CTX*,
                                                WOLF_STACK_OF(WOLFSSL_X509_NAME)*);
-WOLFSSL_API void* wolfSSL_X509_STORE_CTX_get_ex_data(WOLFSSL_X509_STORE_CTX*, int);
-WOLFSSL_API int   wolfSSL_get_ex_data_X509_STORE_CTX_idx(void);
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_get_client_CA_list(
+            const WOLFSSL* ssl);
+
+typedef int (*client_cert_cb)(WOLFSSL *ssl, WOLFSSL_X509 **x509,
+                              WOLFSSL_EVP_PKEY **pkey);
+WOLFSSL_API void wolfSSL_CTX_set_client_cert_cb(WOLFSSL_CTX *ctx, client_cert_cb);
+
+WOLFSSL_API void* wolfSSL_X509_STORE_CTX_get_ex_data(
+        WOLFSSL_X509_STORE_CTX* ctx, int idx);
+WOLFSSL_API int  wolfSSL_X509_STORE_CTX_set_ex_data(WOLFSSL_X509_STORE_CTX* ctx,
+        int idx, void *data);
+WOLFSSL_API void wolfSSL_X509_STORE_CTX_set_depth(WOLFSSL_X509_STORE_CTX* ctx,
+        int depth);
+WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_STORE_CTX_get0_current_issuer(
+        WOLFSSL_X509_STORE_CTX* ctx);
+WOLFSSL_API WOLFSSL_X509_STORE* wolfSSL_X509_STORE_CTX_get0_store(
+        WOLFSSL_X509_STORE_CTX* ctx);
+WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_STORE_CTX_get0_cert(
+        WOLFSSL_X509_STORE_CTX*);
+WOLFSSL_API int  wolfSSL_get_ex_data_X509_STORE_CTX_idx(void);
 WOLFSSL_API void wolfSSL_X509_STORE_CTX_set_error(
                                            WOLFSSL_X509_STORE_CTX* ctx, int er);
+void wolfSSL_X509_STORE_CTX_set_error_depth(WOLFSSL_X509_STORE_CTX* ctx,
+                                                                     int depth);
 WOLFSSL_API void* wolfSSL_get_ex_data(const WOLFSSL*, int);
 
 WOLFSSL_API void wolfSSL_CTX_set_default_passwd_cb_userdata(WOLFSSL_CTX*,
@@ -1009,9 +1527,9 @@
 
 WOLFSSL_API WOLFSSL_RSA* wolfSSL_RSA_generate_key(int, unsigned long,
                                                void(*)(int, int, void*), void*);
-WOLFSSL_API WOLFSSL_RSA *wolfSSL_d2i_RSAPublicKey(WOLFSSL_RSA **r, 
+WOLFSSL_API WOLFSSL_RSA *wolfSSL_d2i_RSAPublicKey(WOLFSSL_RSA **r,
                                             const unsigned char **pp, long len);
-WOLFSSL_API WOLFSSL_RSA *wolfSSL_d2i_RSAPrivateKey(WOLFSSL_RSA**, 
+WOLFSSL_API WOLFSSL_RSA *wolfSSL_d2i_RSAPrivateKey(WOLFSSL_RSA**,
                                             const unsigned char**, long);
 WOLFSSL_API int wolfSSL_i2d_RSAPublicKey(WOLFSSL_RSA *r, const unsigned char **pp);
 WOLFSSL_API int wolfSSL_i2d_RSAPrivateKey(WOLFSSL_RSA *r, unsigned char **pp);
@@ -1052,6 +1570,7 @@
 WOLFSSL_API long wolfSSL_clear_options(WOLFSSL *s,  long op);
 WOLFSSL_API long wolfSSL_clear_num_renegotiations(WOLFSSL *s);
 WOLFSSL_API long wolfSSL_total_renegotiations(WOLFSSL *s);
+WOLFSSL_API long wolfSSL_num_renegotiations(WOLFSSL* s);
 WOLFSSL_API long wolfSSL_set_tmp_dh(WOLFSSL *s, WOLFSSL_DH *dh);
 WOLFSSL_API long wolfSSL_set_tlsext_debug_arg(WOLFSSL *s, void *arg);
 WOLFSSL_API long wolfSSL_set_tlsext_status_type(WOLFSSL *s, int type);
@@ -1062,22 +1581,25 @@
 WOLFSSL_API long wolfSSL_set_tlsext_status_ocsp_resp(WOLFSSL *s, unsigned char *resp, int len);
 
 WOLFSSL_API void wolfSSL_CONF_modules_unload(int all);
+WOLFSSL_API char* wolfSSL_CONF_get1_default_config_file(void);
 WOLFSSL_API long wolfSSL_get_tlsext_status_exts(WOLFSSL *s, void *arg);
 WOLFSSL_API long wolfSSL_get_verify_result(const WOLFSSL *ssl);
 
 #define WOLFSSL_DEFAULT_CIPHER_LIST ""   /* default all */
 
+/* These are bit-masks */
 enum {
     WOLFSSL_OCSP_URL_OVERRIDE = 1,
     WOLFSSL_OCSP_NO_NONCE     = 2,
     WOLFSSL_OCSP_CHECKALL     = 4,
 
     WOLFSSL_CRL_CHECKALL = 1,
-    WOLFSSL_CRL_CHECK    = 27,
+    WOLFSSL_CRL_CHECK    = 2,
 };
 
-#ifdef OPENSSL_EXTRA
-/* seperated out from other enums because of size */
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) || \
+    defined(HAVE_WEBSERVER)
+/* Separated out from other enums because of size */
 enum {
     SSL_OP_MICROSOFT_SESS_ID_BUG                  = 0x00000001,
     SSL_OP_NETSCAPE_CHALLENGE_BUG                 = 0x00000002,
@@ -1089,10 +1611,9 @@
     SSL_OP_TLS_D5_BUG                             = 0x00000080,
     SSL_OP_TLS_BLOCK_PADDING_BUG                  = 0x00000100,
     SSL_OP_TLS_ROLLBACK_BUG                       = 0x00000200,
-    SSL_OP_ALL                                    = 0x00000400,
     SSL_OP_EPHEMERAL_RSA                          = 0x00000800,
-    SSL_OP_NO_SSLv3                               = 0x00001000,
-    SSL_OP_NO_TLSv1                               = 0x00002000,
+    WOLFSSL_OP_NO_SSLv3                           = 0x00001000,
+    WOLFSSL_OP_NO_TLSv1                           = 0x00002000,
     SSL_OP_PKCS1_CHECK_1                          = 0x00004000,
     SSL_OP_PKCS1_CHECK_2                          = 0x00008000,
     SSL_OP_NETSCAPE_CA_DN_BUG                     = 0x00010000,
@@ -1105,12 +1626,42 @@
     SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION = 0x00800000,
     SSL_OP_SINGLE_ECDH_USE                        = 0x01000000,
     SSL_OP_CIPHER_SERVER_PREFERENCE               = 0x02000000,
-    SSL_OP_NO_TLSv1_1                             = 0x04000000,
-    SSL_OP_NO_TLSv1_2                             = 0x08000000,
+    WOLFSSL_OP_NO_TLSv1_1                         = 0x04000000,
+    WOLFSSL_OP_NO_TLSv1_2                         = 0x08000000,
     SSL_OP_NO_COMPRESSION                         = 0x10000000,
-    SSL_OP_NO_TLSv1_3                             = 0x20000000,
+    WOLFSSL_OP_NO_TLSv1_3                         = 0x20000000,
+    WOLFSSL_OP_NO_SSLv2                           = 0x40000000,
+    SSL_OP_ALL   =
+                    (SSL_OP_MICROSOFT_SESS_ID_BUG
+                  | SSL_OP_NETSCAPE_CHALLENGE_BUG
+                  | SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG
+                  | SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG
+                  | SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER
+                  | SSL_OP_MSIE_SSLV2_RSA_PADDING
+                  | SSL_OP_SSLEAY_080_CLIENT_DH_BUG
+                  | SSL_OP_TLS_D5_BUG
+                  | SSL_OP_TLS_BLOCK_PADDING_BUG
+                  | SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS
+                  | SSL_OP_TLS_ROLLBACK_BUG),
 };
 
+/* for compatibility these must be macros */
+#define SSL_OP_NO_SSLv2   WOLFSSL_OP_NO_SSLv2
+#define SSL_OP_NO_SSLv3   WOLFSSL_OP_NO_SSLv3
+#define SSL_OP_NO_TLSv1   WOLFSSL_OP_NO_TLSv1
+#define SSL_OP_NO_TLSv1_1 WOLFSSL_OP_NO_TLSv1_1
+#define SSL_OP_NO_TLSv1_2 WOLFSSL_OP_NO_TLSv1_2
+#if !(!defined(WOLFSSL_TLS13) && defined(WOLFSSL_APACHE_HTTPD)) /* apache uses this to determine if TLS 1.3 is enabled */
+#define SSL_OP_NO_TLSv1_3 WOLFSSL_OP_NO_TLSv1_3
+#endif
+
+#define SSL_OP_NO_SSL_MASK (SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | \
+    SSL_OP_NO_TLSv1_1 | SSL_OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3)
+
+#define SSL_NOTHING 1
+#define SSL_WRITING 2
+#define SSL_READING 3
+
 enum {
 #ifdef HAVE_OCSP
     /* OCSP Flags */
@@ -1137,8 +1688,6 @@
     ASN1_GENERALIZEDTIME = 4,
     SSL_MAX_SSL_SESSION_ID_LENGTH = 32,
 
-    EVP_R_BAD_DECRYPT = 2,
-
     SSL_ST_CONNECT = 0x1000,
     SSL_ST_ACCEPT  = 0x2000,
     SSL_ST_MASK    = 0x0FFF,
@@ -1156,17 +1705,18 @@
     SSL_CB_ACCEPT_EXIT = (SSL_ST_ACCEPT | SSL_CB_EXIT),
     SSL_CB_CONNECT_LOOP = (SSL_ST_CONNECT | SSL_CB_LOOP),
     SSL_CB_CONNECT_EXIT = (SSL_ST_CONNECT | SSL_CB_EXIT),
-	SSL_CB_MODE_READ = 1,
-	SSL_CB_MODE_WRITE = 2,
+    SSL_CB_MODE_READ = 1,
+    SSL_CB_MODE_WRITE = 2,
 
     SSL_MODE_ENABLE_PARTIAL_WRITE = 2,
+    SSL_MODE_AUTO_RETRY = 3, /* wolfSSL default is to block with blocking io
+                              * and auto retry */
+    SSL_MODE_RELEASE_BUFFERS = -1, /* For libwebsockets build. No current use. */
 
     BIO_FLAGS_BASE64_NO_NL = 1,
     BIO_CLOSE   = 1,
     BIO_NOCLOSE = 0,
 
-    NID_undef = 0,
-
     X509_FILETYPE_PEM = 8,
     X509_LU_X509      = 9,
     X509_LU_CRL       = 12,
@@ -1217,12 +1767,15 @@
     X509_V_ERR_INVALID_POLICY_EXTENSION,
     X509_V_ERR_NO_EXPLICIT_POLICY,
     X509_V_ERR_UNNESTED_RESOURCE,
+    X509_V_ERR_APPLICATION_VERIFICATION,
 
     X509_R_CERT_ALREADY_IN_HASH_TABLE,
 
     XN_FLAG_SPC_EQ  = (1 << 23),
+    XN_FLAG_SEP_CPLUS_SPC = (2 << 16),
     XN_FLAG_ONELINE = 0,
     XN_FLAG_RFC2253 = 1,
+    XN_FLAG_DN_REV = (1 << 20),
 
     CRYPTO_LOCK = 1,
     CRYPTO_NUM_LOCKS = 10,
@@ -1240,8 +1793,11 @@
 WOLFSSL_API void  wolfSSL_ERR_print_errors_fp(XFILE, int err);
 #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
 WOLFSSL_API void wolfSSL_ERR_dump_errors_fp(XFILE fp);
+WOLFSSL_API void wolfSSL_ERR_print_errors_cb(int (*cb)(const char *str,
+                                                size_t len, void *u), void *u);
 #endif
 #endif
+WOLFSSL_API void wolfSSL_ERR_print_errors(WOLFSSL_BIO *bio);
 
 
 #ifndef NO_OLD_SSL_NAMES
@@ -1292,7 +1848,6 @@
     #define SSL_SENT_SHUTDOWN WOLFSSL_SENT_SHUTDOWN
     #define SSL_RECEIVED_SHUTDOWN WOLFSSL_RECEIVED_SHUTDOWN
     #define SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER WOLFSSL_MODE_ACCEPT_MOVING_WRITE_BUFFER
-    #define SSL_OP_NO_SSLv2 WOLFSSL_OP_NO_SSLv2
 
     #define SSL_R_SSL_HANDSHAKE_FAILURE WOLFSSL_R_SSL_HANDSHAKE_FAILURE
     #define SSL_R_TLSV1_ALERT_UNKNOWN_CA WOLFSSL_R_TLSV1_ALERT_UNKNOWN_CA
@@ -1350,7 +1905,6 @@
     WOLFSSL_SENT_SHUTDOWN     = 1,
     WOLFSSL_RECEIVED_SHUTDOWN = 2,
     WOLFSSL_MODE_ACCEPT_MOVING_WRITE_BUFFER = 4,
-    WOLFSSL_OP_NO_SSLv2       = 8,
 
     WOLFSSL_R_SSL_HANDSHAKE_FAILURE           = 101,
     WOLFSSL_R_TLSV1_ALERT_UNKNOWN_CA          = 102,
@@ -1367,6 +1921,14 @@
                                                     wc_psk_client_callback);
     WOLFSSL_API void wolfSSL_set_psk_client_callback(WOLFSSL*,
                                                     wc_psk_client_callback);
+#ifdef WOLFSSL_TLS13
+    typedef unsigned int (*wc_psk_client_tls13_callback)(WOLFSSL*, const char*,
+               char*, unsigned int, unsigned char*, unsigned int, const char**);
+    WOLFSSL_API void wolfSSL_CTX_set_psk_client_tls13_callback(WOLFSSL_CTX*,
+                                                  wc_psk_client_tls13_callback);
+    WOLFSSL_API void wolfSSL_set_psk_client_tls13_callback(WOLFSSL*,
+                                                  wc_psk_client_tls13_callback);
+#endif
 
     WOLFSSL_API const char* wolfSSL_get_psk_identity_hint(const WOLFSSL*);
     WOLFSSL_API const char* wolfSSL_get_psk_identity(const WOLFSSL*);
@@ -1380,6 +1942,14 @@
                                                     wc_psk_server_callback);
     WOLFSSL_API void wolfSSL_set_psk_server_callback(WOLFSSL*,
                                                     wc_psk_server_callback);
+#ifdef WOLFSSL_TLS13
+    typedef unsigned int (*wc_psk_server_tls13_callback)(WOLFSSL*, const char*,
+                          unsigned char*, unsigned int, const char**);
+    WOLFSSL_API void wolfSSL_CTX_set_psk_server_tls13_callback(WOLFSSL_CTX*,
+                                                  wc_psk_server_tls13_callback);
+    WOLFSSL_API void wolfSSL_set_psk_server_tls13_callback(WOLFSSL*,
+                                                  wc_psk_server_tls13_callback);
+#endif
 
     #define PSK_TYPES_DEFINED
 #endif /* NO_PSK */
@@ -1441,20 +2011,46 @@
 WOLFSSL_API int  wolfSSL_CTX_set_default_verify_paths(WOLFSSL_CTX*);
 WOLFSSL_API int  wolfSSL_CTX_set_session_id_context(WOLFSSL_CTX*,
                                             const unsigned char*, unsigned int);
-WOLFSSL_API WOLFSSL_X509* wolfSSL_get_peer_certificate(WOLFSSL* ssl);
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_X509* wolfSSL_get_peer_certificate(WOLFSSL*);
 WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_get_peer_cert_chain(const WOLFSSL*);
-
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_set_peer_cert_chain(WOLFSSL* ssl);
+#endif
+
+#ifdef OPENSSL_EXTRA
+WOLFSSL_API int wolfSSL_want(WOLFSSL*);
+#endif
 WOLFSSL_API int wolfSSL_want_read(WOLFSSL*);
 WOLFSSL_API int wolfSSL_want_write(WOLFSSL*);
 
+#if !defined(NO_FILESYSTEM) && defined (OPENSSL_EXTRA)
+#include <stdarg.h> /* var_arg */
+WOLFSSL_API int wolfSSL_BIO_vprintf(WOLFSSL_BIO* bio, const char* format,
+                                                            va_list args);
+#endif
 WOLFSSL_API int wolfSSL_BIO_printf(WOLFSSL_BIO*, const char*, ...);
+WOLFSSL_API int wolfSSL_BIO_dump(WOLFSSL_BIO *bio, const char*, int);
 WOLFSSL_API int wolfSSL_ASN1_UTCTIME_print(WOLFSSL_BIO*,
                                          const WOLFSSL_ASN1_UTCTIME*);
 WOLFSSL_API int wolfSSL_ASN1_GENERALIZEDTIME_print(WOLFSSL_BIO*,
                                          const WOLFSSL_ASN1_GENERALIZEDTIME*);
 WOLFSSL_API void wolfSSL_ASN1_GENERALIZEDTIME_free(WOLFSSL_ASN1_GENERALIZEDTIME*);
-WOLFSSL_API int   wolfSSL_sk_num(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)*);
-WOLFSSL_API void* wolfSSL_sk_value(WOLF_STACK_OF(WOLFSSL_ASN1_OBJECT)*, int);
+WOLFSSL_API int wolfSSL_ASN1_TIME_check(const WOLFSSL_ASN1_TIME*);
+WOLFSSL_API int wolfSSL_ASN1_TIME_diff(int *pday, int *psec,
+                   const WOLFSSL_ASN1_TIME *from, const WOLFSSL_ASN1_TIME *to);
+#ifdef OPENSSL_EXTRA
+WOLFSSL_API WOLFSSL_ASN1_TIME *wolfSSL_ASN1_TIME_set(WOLFSSL_ASN1_TIME *s, time_t t);
+#endif
+
+WOLFSSL_API int wolfSSL_sk_num(WOLFSSL_STACK* sk);
+WOLFSSL_API void* wolfSSL_sk_value(WOLFSSL_STACK* sk, int i);
+
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
+WOLFSSL_API void* wolfSSL_CRYPTO_get_ex_data(const WOLFSSL_CRYPTO_EX_DATA* ex_data,
+                                            int idx);
+WOLFSSL_API int wolfSSL_CRYPTO_set_ex_data(WOLFSSL_CRYPTO_EX_DATA* ex_data, int idx,
+                                            void *data);
+#endif
 
 /* stunnel 4.28 needs */
 WOLFSSL_API void* wolfSSL_CTX_get_ex_data(const WOLFSSL_CTX*, int);
@@ -1474,6 +2070,7 @@
 WOLFSSL_API long wolfSSL_SESSION_get_time(const WOLFSSL_SESSION*);
 WOLFSSL_API int  wolfSSL_CTX_get_ex_new_index(long, void*, void*, void*, void*);
 
+
 /* extra ends */
 
 
@@ -1481,25 +2078,27 @@
 
 /* call before SSL_connect, if verifying will add name check to
    date check and signature check */
-WOLFSSL_API int wolfSSL_check_domain_name(WOLFSSL* ssl, const char* dn);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_check_domain_name(WOLFSSL*, const char*);
 
 /* need to call once to load library (session cache) */
-WOLFSSL_API int wolfSSL_Init(void);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_Init(void);
 /* call when done to cleanup/free session cache mutex / resources  */
-WOLFSSL_API int wolfSSL_Cleanup(void);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_Cleanup(void);
 
 /* which library version do we have */
 WOLFSSL_API const char* wolfSSL_lib_version(void);
 /* which library version do we have in hex */
-WOLFSSL_API unsigned int wolfSSL_lib_version_hex(void);
+WOLFSSL_API word32 wolfSSL_lib_version_hex(void);
 
 /* do accept or connect depedning on side */
 WOLFSSL_API int wolfSSL_negotiate(WOLFSSL* ssl);
 /* turn on wolfSSL data compression */
 WOLFSSL_API int wolfSSL_set_compression(WOLFSSL* ssl);
 
-WOLFSSL_API int wolfSSL_set_timeout(WOLFSSL*, unsigned int);
-WOLFSSL_API int wolfSSL_CTX_set_timeout(WOLFSSL_CTX*, unsigned int);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_set_timeout(WOLFSSL*, unsigned int);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_set_timeout(WOLFSSL_CTX*, unsigned int);
+WOLFSSL_API void wolfSSL_CTX_set_current_time_cb(WOLFSSL_CTX* ctx,
+    void (*cb)(const WOLFSSL* ssl, WOLFSSL_TIMEVAL* out_clock));
 
 /* get wolfSSL peer X509_CHAIN */
 WOLFSSL_API WOLFSSL_X509_CHAIN* wolfSSL_get_peer_chain(WOLFSSL* ssl);
@@ -1518,21 +2117,24 @@
 WOLFSSL_API WOLFSSL_X509* wolfSSL_get_chain_X509(WOLFSSL_X509_CHAIN*, int idx);
 /* free X509 */
 #define wolfSSL_FreeX509(x509) wolfSSL_X509_free((x509))
-WOLFSSL_API void wolfSSL_X509_free(WOLFSSL_X509*);
+WOLFSSL_ABI WOLFSSL_API void wolfSSL_X509_free(WOLFSSL_X509*);
 /* get index cert in PEM */
 WOLFSSL_API int  wolfSSL_get_chain_cert_pem(WOLFSSL_X509_CHAIN*, int idx,
                                 unsigned char* buf, int inLen, int* outLen);
-WOLFSSL_API const unsigned char* wolfSSL_get_sessionID(const WOLFSSL_SESSION* s);
+WOLFSSL_ABI WOLFSSL_API const unsigned char* wolfSSL_get_sessionID(
+                                                      const WOLFSSL_SESSION* s);
 WOLFSSL_API int  wolfSSL_X509_get_serial_number(WOLFSSL_X509*,unsigned char*,int*);
 WOLFSSL_API char*  wolfSSL_X509_get_subjectCN(WOLFSSL_X509*);
 WOLFSSL_API const unsigned char* wolfSSL_X509_get_der(WOLFSSL_X509*, int*);
-WOLFSSL_API const unsigned char* wolfSSL_X509_notBefore(WOLFSSL_X509*);
-WOLFSSL_API const unsigned char* wolfSSL_X509_notAfter(WOLFSSL_X509*);
+WOLFSSL_API const unsigned char* wolfSSL_X509_get_tbs(WOLFSSL_X509*, int*);
+WOLFSSL_ABI WOLFSSL_API const byte* wolfSSL_X509_notBefore(WOLFSSL_X509*);
+WOLFSSL_ABI WOLFSSL_API const byte* wolfSSL_X509_notAfter(WOLFSSL_X509*);
 WOLFSSL_API int wolfSSL_X509_version(WOLFSSL_X509*);
 
 WOLFSSL_API int wolfSSL_cmp_peer_cert_to_file(WOLFSSL*, const char*);
 
-WOLFSSL_API char* wolfSSL_X509_get_next_altname(WOLFSSL_X509*);
+WOLFSSL_ABI WOLFSSL_API char* wolfSSL_X509_get_next_altname(WOLFSSL_X509*);
+WOLFSSL_API int wolfSSL_X509_add_altname(WOLFSSL_X509*, const char*, int);
 
 WOLFSSL_API WOLFSSL_X509* wolfSSL_d2i_X509(WOLFSSL_X509** x509,
         const unsigned char** in, int len);
@@ -1551,7 +2153,7 @@
     WOLFSSL_API WOLFSSL_X509*
         wolfSSL_X509_d2i_fp(WOLFSSL_X509** x509, XFILE file);
     #endif
-WOLFSSL_API WOLFSSL_X509*
+WOLFSSL_ABI WOLFSSL_API WOLFSSL_X509*
     wolfSSL_X509_load_certificate_file(const char* fname, int format);
 #endif
 WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_load_certificate_buffer(
@@ -1575,6 +2177,7 @@
 typedef struct WC_PKCS12 WC_PKCS12;
 WOLFSSL_API WC_PKCS12* wolfSSL_d2i_PKCS12_bio(WOLFSSL_BIO* bio,
                                        WC_PKCS12** pkcs12);
+WOLFSSL_API int wolfSSL_i2d_PKCS12_bio(WOLFSSL_BIO *bio, WC_PKCS12 *pkcs12);
 #ifndef NO_FILESYSTEM
 WOLFSSL_API WOLFSSL_X509_PKCS12* wolfSSL_d2i_PKCS12_fp(XFILE fp,
                                        WOLFSSL_X509_PKCS12** pkcs12);
@@ -1596,6 +2199,7 @@
                                 const unsigned char* g, int gSz);
 WOLFSSL_API int  wolfSSL_SetTmpDH_buffer(WOLFSSL*, const unsigned char* b, long sz,
                                        int format);
+WOLFSSL_API int wolfSSL_SetEnableDhKeyTest(WOLFSSL*, int);
 #ifndef NO_FILESYSTEM
     WOLFSSL_API int  wolfSSL_SetTmpDH_file(WOLFSSL*, const char* f, int format);
 #endif
@@ -1611,10 +2215,10 @@
                                              int format);
 #endif
 
-WOLFSSL_API int wolfSSL_CTX_SetMinDhKey_Sz(WOLFSSL_CTX*, unsigned short);
-WOLFSSL_API int wolfSSL_SetMinDhKey_Sz(WOLFSSL*, unsigned short);
-WOLFSSL_API int wolfSSL_CTX_SetMaxDhKey_Sz(WOLFSSL_CTX*, unsigned short);
-WOLFSSL_API int wolfSSL_SetMaxDhKey_Sz(WOLFSSL*, unsigned short);
+WOLFSSL_API int wolfSSL_CTX_SetMinDhKey_Sz(WOLFSSL_CTX*, word16);
+WOLFSSL_API int wolfSSL_SetMinDhKey_Sz(WOLFSSL*, word16);
+WOLFSSL_API int wolfSSL_CTX_SetMaxDhKey_Sz(WOLFSSL_CTX*, word16);
+WOLFSSL_API int wolfSSL_SetMaxDhKey_Sz(WOLFSSL*, word16);
 WOLFSSL_API int wolfSSL_GetDhKey_Sz(WOLFSSL*);
 #endif /* NO_DH */
 
@@ -1628,8 +2232,8 @@
 WOLFSSL_API int wolfSSL_SetMinEccKey_Sz(WOLFSSL*, short);
 #endif /* NO_RSA */
 
-WOLFSSL_API int  wolfSSL_SetTmpEC_DHE_Sz(WOLFSSL*, unsigned short);
-WOLFSSL_API int  wolfSSL_CTX_SetTmpEC_DHE_Sz(WOLFSSL_CTX*, unsigned short);
+WOLFSSL_API int  wolfSSL_SetTmpEC_DHE_Sz(WOLFSSL*, word16);
+WOLFSSL_API int  wolfSSL_CTX_SetTmpEC_DHE_Sz(WOLFSSL_CTX*, word16);
 
 /* keyblock size in bytes or -1 */
 /* need to call wolfSSL_KeepArrays before handshake to save keys */
@@ -1651,7 +2255,8 @@
         #elif !defined(WOLFSSL_MDK_ARM) && !defined(WOLFSSL_IAR_ARM) && \
               !defined(WOLFSSL_PICOTCP) && !defined(WOLFSSL_ROWLEY_ARM) && \
               !defined(WOLFSSL_EMBOS) && !defined(WOLFSSL_FROSTED) && \
-              !defined(WOLFSSL_CHIBIOS)
+              !defined(WOLFSSL_CHIBIOS) && !defined(WOLFSSL_CONTIKI) && \
+              !defined(WOLFSSL_ZEPHYR)
             #include <sys/uio.h>
         #endif
         /* allow writev style writing */
@@ -1669,12 +2274,19 @@
     WOLFSSL_API int wolfSSL_CTX_trust_peer_buffer(WOLFSSL_CTX*,
                                                const unsigned char*, long, int);
 #endif
+    WOLFSSL_API int wolfSSL_CTX_load_verify_buffer_ex(WOLFSSL_CTX*,
+                                               const unsigned char*, long, int,
+                                               int, word32);
     WOLFSSL_API int wolfSSL_CTX_load_verify_buffer(WOLFSSL_CTX*,
                                                const unsigned char*, long, int);
+    WOLFSSL_API int wolfSSL_CTX_load_verify_chain_buffer_format(WOLFSSL_CTX*,
+                                               const unsigned char*, long, int);
     WOLFSSL_API int wolfSSL_CTX_use_certificate_buffer(WOLFSSL_CTX*,
                                                const unsigned char*, long, int);
     WOLFSSL_API int wolfSSL_CTX_use_PrivateKey_buffer(WOLFSSL_CTX*,
                                                const unsigned char*, long, int);
+    WOLFSSL_API int wolfSSL_CTX_use_PrivateKey_id(WOLFSSL_CTX*,
+                                         const unsigned char*, long, int, long);
     WOLFSSL_API int wolfSSL_CTX_use_certificate_chain_buffer_format(WOLFSSL_CTX*,
                                                const unsigned char*, long, int);
     WOLFSSL_API int wolfSSL_CTX_use_certificate_chain_buffer(WOLFSSL_CTX*,
@@ -1683,15 +2295,20 @@
     /* SSL versions */
     WOLFSSL_API int wolfSSL_use_certificate_buffer(WOLFSSL*, const unsigned char*,
                                                long, int);
+    WOLFSSL_API int wolfSSL_use_certificate_ASN1(WOLFSSL* ssl,
+                                           const unsigned char* der, int derSz);
     WOLFSSL_API int wolfSSL_use_PrivateKey_buffer(WOLFSSL*, const unsigned char*,
                                                long, int);
+    WOLFSSL_API int wolfSSL_use_PrivateKey_id(WOLFSSL*, const unsigned char*,
+                                                               long, int, long);
     WOLFSSL_API int wolfSSL_use_certificate_chain_buffer_format(WOLFSSL*,
                                                const unsigned char*, long, int);
     WOLFSSL_API int wolfSSL_use_certificate_chain_buffer(WOLFSSL*,
                                                const unsigned char*, long);
     WOLFSSL_API int wolfSSL_UnloadCertsKeys(WOLFSSL*);
 
-    #if defined(OPENSSL_EXTRA) && defined(KEEP_OUR_CERT)
+    #if (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)) && \
+        defined(KEEP_OUR_CERT)
         WOLFSSL_API WOLFSSL_X509* wolfSSL_get_certificate(WOLFSSL* ssl);
     #endif
 #endif
@@ -1716,9 +2333,7 @@
 #endif
 
 
-WOLFSSL_API int   wolfSSL_DTLS_SetCookieSecret(WOLFSSL*,
-                                               const unsigned char*,
-                                               unsigned int);
+WOLFSSL_API int   wolfSSL_DTLS_SetCookieSecret(WOLFSSL*, const byte*, word32);
 
 
 /* I/O Callback default errors */
@@ -1744,10 +2359,10 @@
     WOLFSSL_CHAIN_CA = 2           /* added to cache from trusted chain */
 };
 
-WOLFSSL_API WC_RNG* wolfSSL_GetRNG(WOLFSSL*);
-
-WOLFSSL_API int wolfSSL_CTX_SetMinVersion(WOLFSSL_CTX* ctx, int version);
-WOLFSSL_API int wolfSSL_SetMinVersion(WOLFSSL* ssl, int version);
+WOLFSSL_ABI WOLFSSL_API WC_RNG* wolfSSL_GetRNG(WOLFSSL*);
+
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_SetMinVersion(WOLFSSL_CTX*, int);
+WOLFSSL_API int wolfSSL_SetMinVersion(WOLFSSL*, int);
 WOLFSSL_API int wolfSSL_GetObjectSize(void);  /* object size based on build */
 WOLFSSL_API int wolfSSL_CTX_GetObjectSize(void);
 WOLFSSL_API int wolfSSL_METHOD_GetObjectSize(void);
@@ -1756,7 +2371,7 @@
 WOLFSSL_API int wolfSSL_GetVersion(WOLFSSL* ssl);
 WOLFSSL_API int wolfSSL_SetVersion(WOLFSSL* ssl, int version);
 
-/* moved to asn.c, old names kept for backwards compatability */
+/* moved to asn.c, old names kept for backwards compatibility */
 #define wolfSSL_KeyPemToDer    wc_KeyPemToDer
 #define wolfSSL_CertPemToDer   wc_CertPemToDer
 #define wolfSSL_PemPubKeyToDer wc_PemPubKeyToDer
@@ -1788,10 +2403,26 @@
        unsigned int decSz, int content, int verify, unsigned int* padSz,
        void* ctx);
 WOLFSSL_API void  wolfSSL_CTX_SetDecryptVerifyCb(WOLFSSL_CTX*,
-                                               CallbackDecryptVerify);
+                                                 CallbackDecryptVerify);
 WOLFSSL_API void  wolfSSL_SetDecryptVerifyCtx(WOLFSSL* ssl, void *ctx);
 WOLFSSL_API void* wolfSSL_GetDecryptVerifyCtx(WOLFSSL* ssl);
 
+typedef int (*CallbackEncryptMac)(WOLFSSL* ssl, unsigned char* macOut,
+       int content, int macVerify, unsigned char* encOut,
+       const unsigned char* encIn, unsigned int encSz, void* ctx);
+WOLFSSL_API void  wolfSSL_CTX_SetEncryptMacCb(WOLFSSL_CTX*, CallbackEncryptMac);
+WOLFSSL_API void  wolfSSL_SetEncryptMacCtx(WOLFSSL* ssl, void *ctx);
+WOLFSSL_API void* wolfSSL_GetEncryptMacCtx(WOLFSSL* ssl);
+
+typedef int (*CallbackVerifyDecrypt)(WOLFSSL* ssl,
+       unsigned char* decOut, const unsigned char* decIn,
+       unsigned int decSz, int content, int verify, unsigned int* padSz,
+       void* ctx);
+WOLFSSL_API void  wolfSSL_CTX_SetVerifyDecryptCb(WOLFSSL_CTX*,
+                                                 CallbackVerifyDecrypt);
+WOLFSSL_API void  wolfSSL_SetVerifyDecryptCtx(WOLFSSL* ssl, void *ctx);
+WOLFSSL_API void* wolfSSL_GetVerifyDecryptCtx(WOLFSSL* ssl);
+
 WOLFSSL_API const unsigned char* wolfSSL_GetMacSecret(WOLFSSL*, int);
 WOLFSSL_API const unsigned char* wolfSSL_GetClientWriteKey(WOLFSSL*);
 WOLFSSL_API const unsigned char* wolfSSL_GetClientWriteIV(WOLFSSL*);
@@ -1808,7 +2439,7 @@
 WOLFSSL_API int                  wolfSSL_GetHmacType(WOLFSSL*);
 WOLFSSL_API int                  wolfSSL_GetCipherType(WOLFSSL*);
 WOLFSSL_API int                  wolfSSL_SetTlsHmacInner(WOLFSSL*, unsigned char*,
-                                                       unsigned int, int, int);
+                                                       word32, int, int);
 
 /* Atomic User Needs */
 enum {
@@ -1844,7 +2475,7 @@
 
 /* for KDF TLS 1.2 mac types */
 enum KDF_MacAlgorithm {
-    wolfssl_sha256 = 4,     /* needs to match internal MACAlgorithm */
+    wolfssl_sha256 = 4,     /* needs to match hash.h wc_MACAlgorithm */
     wolfssl_sha384,
     wolfssl_sha512
 };
@@ -1856,7 +2487,7 @@
 
 struct ecc_key;
 
-typedef int (*CallbackEccKeyGen)(WOLFSSL* ssl, struct ecc_key* key, 
+typedef int (*CallbackEccKeyGen)(WOLFSSL* ssl, struct ecc_key* key,
     unsigned int keySz, int ecc_curve, void* ctx);
 WOLFSSL_API void  wolfSSL_CTX_SetEccKeyGenCb(WOLFSSL_CTX*, CallbackEccKeyGen);
 WOLFSSL_API void  wolfSSL_SetEccKeyGenCtx(WOLFSSL* ssl, void *ctx);
@@ -1864,10 +2495,11 @@
 
 typedef int (*CallbackEccSign)(WOLFSSL* ssl,
        const unsigned char* in, unsigned int inSz,
-       unsigned char* out, unsigned int* outSz,
+       unsigned char* out, word32* outSz,
        const unsigned char* keyDer, unsigned int keySz,
        void* ctx);
-WOLFSSL_API void  wolfSSL_CTX_SetEccSignCb(WOLFSSL_CTX*, CallbackEccSign);
+WOLFSSL_ABI WOLFSSL_API void  wolfSSL_CTX_SetEccSignCb(WOLFSSL_CTX*,
+                                                               CallbackEccSign);
 WOLFSSL_API void  wolfSSL_SetEccSignCtx(WOLFSSL* ssl, void *ctx);
 WOLFSSL_API void* wolfSSL_GetEccSignCtx(WOLFSSL* ssl);
 
@@ -1881,8 +2513,8 @@
 WOLFSSL_API void* wolfSSL_GetEccVerifyCtx(WOLFSSL* ssl);
 
 typedef int (*CallbackEccSharedSecret)(WOLFSSL* ssl, struct ecc_key* otherKey,
-        unsigned char* pubKeyDer, unsigned int* pubKeySz,
-        unsigned char* out, unsigned int* outlen,
+        unsigned char* pubKeyDer, word32* pubKeySz,
+        unsigned char* out, word32* outlen,
         int side, void* ctx); /* side is WOLFSSL_CLIENT_END or WOLFSSL_SERVER_END */
 WOLFSSL_API void  wolfSSL_CTX_SetEccSharedSecretCb(WOLFSSL_CTX*, CallbackEccSharedSecret);
 WOLFSSL_API void  wolfSSL_SetEccSharedSecretCtx(WOLFSSL* ssl, void *ctx);
@@ -1928,7 +2560,7 @@
 #ifdef HAVE_CURVE25519
 struct curve25519_key;
 
-typedef int (*CallbackX25519KeyGen)(WOLFSSL* ssl, struct curve25519_key* key, 
+typedef int (*CallbackX25519KeyGen)(WOLFSSL* ssl, struct curve25519_key* key,
     unsigned int keySz, void* ctx);
 WOLFSSL_API void  wolfSSL_CTX_SetX25519KeyGenCb(WOLFSSL_CTX*, CallbackX25519KeyGen);
 WOLFSSL_API void  wolfSSL_SetX25519KeyGenCtx(WOLFSSL* ssl, void *ctx);
@@ -1946,6 +2578,50 @@
 WOLFSSL_API void* wolfSSL_GetX25519SharedSecretCtx(WOLFSSL* ssl);
 #endif
 
+#ifdef HAVE_ED448
+struct ed448_key;
+typedef int (*CallbackEd448Sign)(WOLFSSL* ssl,
+       const unsigned char* in, unsigned int inSz,
+       unsigned char* out, unsigned int* outSz,
+       const unsigned char* keyDer, unsigned int keySz,
+       void* ctx);
+WOLFSSL_API void  wolfSSL_CTX_SetEd448SignCb(WOLFSSL_CTX*,
+                                               CallbackEd448Sign);
+WOLFSSL_API void  wolfSSL_SetEd448SignCtx(WOLFSSL* ssl, void *ctx);
+WOLFSSL_API void* wolfSSL_GetEd448SignCtx(WOLFSSL* ssl);
+
+typedef int (*CallbackEd448Verify)(WOLFSSL* ssl,
+       const unsigned char* sig, unsigned int sigSz,
+       const unsigned char* msg, unsigned int msgSz,
+       const unsigned char* keyDer, unsigned int keySz,
+       int* result, void* ctx);
+WOLFSSL_API void  wolfSSL_CTX_SetEd448VerifyCb(WOLFSSL_CTX*,
+                                                 CallbackEd448Verify);
+WOLFSSL_API void  wolfSSL_SetEd448VerifyCtx(WOLFSSL* ssl, void *ctx);
+WOLFSSL_API void* wolfSSL_GetEd448VerifyCtx(WOLFSSL* ssl);
+#endif
+
+#ifdef HAVE_CURVE448
+struct curve448_key;
+
+typedef int (*CallbackX448KeyGen)(WOLFSSL* ssl, struct curve448_key* key,
+    unsigned int keySz, void* ctx);
+WOLFSSL_API void  wolfSSL_CTX_SetX448KeyGenCb(WOLFSSL_CTX*, CallbackX448KeyGen);
+WOLFSSL_API void  wolfSSL_SetX448KeyGenCtx(WOLFSSL* ssl, void *ctx);
+WOLFSSL_API void* wolfSSL_GetX448KeyGenCtx(WOLFSSL* ssl);
+
+typedef int (*CallbackX448SharedSecret)(WOLFSSL* ssl,
+        struct curve448_key* otherKey,
+        unsigned char* pubKeyDer, unsigned int* pubKeySz,
+        unsigned char* out, unsigned int* outlen,
+        int side, void* ctx);
+        /* side is WOLFSSL_CLIENT_END or WOLFSSL_SERVER_END */
+WOLFSSL_API void  wolfSSL_CTX_SetX448SharedSecretCb(WOLFSSL_CTX*,
+        CallbackX448SharedSecret);
+WOLFSSL_API void  wolfSSL_SetX448SharedSecretCtx(WOLFSSL* ssl, void *ctx);
+WOLFSSL_API void* wolfSSL_GetX448SharedSecretCtx(WOLFSSL* ssl);
+#endif
+
 #ifndef NO_RSA
 typedef int (*CallbackRsaSign)(WOLFSSL* ssl,
        const unsigned char* in, unsigned int inSz,
@@ -2039,16 +2715,24 @@
     WOLFSSL_API int wolfSSL_CertManagerEnableCRL(WOLFSSL_CERT_MANAGER*,
                                                                    int options);
     WOLFSSL_API int wolfSSL_CertManagerDisableCRL(WOLFSSL_CERT_MANAGER*);
+    WOLFSSL_API void wolfSSL_CertManagerSetVerify(WOLFSSL_CERT_MANAGER* cm,
+            VerifyCallback vc);
     WOLFSSL_API int wolfSSL_CertManagerLoadCRL(WOLFSSL_CERT_MANAGER*,
                                                          const char*, int, int);
     WOLFSSL_API int wolfSSL_CertManagerLoadCRLBuffer(WOLFSSL_CERT_MANAGER*,
                                             const unsigned char*, long sz, int);
     WOLFSSL_API int wolfSSL_CertManagerSetCRL_Cb(WOLFSSL_CERT_MANAGER*,
                                                                   CbMissingCRL);
+    WOLFSSL_API int wolfSSL_CertManagerFreeCRL(WOLFSSL_CERT_MANAGER *);
 #ifdef HAVE_CRL_IO
     WOLFSSL_API int wolfSSL_CertManagerSetCRL_IOCb(WOLFSSL_CERT_MANAGER*,
                                                                        CbCrlIO);
 #endif
+#if defined(HAVE_OCSP)
+    WOLFSSL_API int wolfSSL_CertManagerCheckOCSPResponse(WOLFSSL_CERT_MANAGER *,
+        byte *response, int responseSz, WOLFSSL_BUFFER_INFO *responseBuffer,
+        CertStatus *status, OcspEntry *entry, OcspRequest *ocspRequest);
+#endif
     WOLFSSL_API int wolfSSL_CertManagerCheckOCSP(WOLFSSL_CERT_MANAGER*,
                                                         unsigned char*, int sz);
     WOLFSSL_API int wolfSSL_CertManagerEnableOCSP(WOLFSSL_CERT_MANAGER*,
@@ -2063,7 +2747,9 @@
                                                       WOLFSSL_CERT_MANAGER* cm);
     WOLFSSL_API int wolfSSL_CertManagerDisableOCSPStapling(
                                                       WOLFSSL_CERT_MANAGER* cm);
-
+#if defined(OPENSSL_EXTRA) && defined(WOLFSSL_SIGNER_DER_CERT) && !defined(NO_FILESYSTEM)
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_CertManagerGetCerts(WOLFSSL_CERT_MANAGER* cm);
+#endif
     WOLFSSL_API int wolfSSL_EnableCRL(WOLFSSL* ssl, int options);
     WOLFSSL_API int wolfSSL_DisableCRL(WOLFSSL* ssl);
     WOLFSSL_API int wolfSSL_LoadCRL(WOLFSSL*, const char*, int, int);
@@ -2119,11 +2805,11 @@
 /* async additions */
 #define wolfSSL_UseAsync wolfSSL_SetDevId
 #define wolfSSL_CTX_UseAsync wolfSSL_CTX_SetDevId
-WOLFSSL_API int wolfSSL_SetDevId(WOLFSSL*, int devId);
-WOLFSSL_API int wolfSSL_CTX_SetDevId(WOLFSSL_CTX*, int devId);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_SetDevId(WOLFSSL*, int devId);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_SetDevId(WOLFSSL_CTX*, int devId);
 
 /* helpers to get device id and heap */
-WOLFSSL_API int   wolfSSL_CTX_GetDevId(WOLFSSL_CTX* ctx, WOLFSSL* ssl);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_GetDevId(WOLFSSL_CTX*, WOLFSSL*);
 WOLFSSL_API void* wolfSSL_CTX_GetHeap(WOLFSSL_CTX* ctx, WOLFSSL* ssl);
 
 /* TLS Extensions */
@@ -2136,10 +2822,10 @@
     WOLFSSL_SNI_HOST_NAME = 0
 };
 
-WOLFSSL_API int wolfSSL_UseSNI(WOLFSSL* ssl, unsigned char type,
-                                         const void* data, unsigned short size);
-WOLFSSL_API int wolfSSL_CTX_UseSNI(WOLFSSL_CTX* ctx, unsigned char type,
-                                         const void* data, unsigned short size);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_UseSNI(WOLFSSL*, unsigned char,
+                                                   const void*, unsigned short);
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_CTX_UseSNI(WOLFSSL_CTX*, unsigned char,
+                                                   const void*, unsigned short);
 
 #ifndef NO_WOLFSSL_SERVER
 
@@ -2181,6 +2867,21 @@
 
 #endif /* HAVE_SNI */
 
+/* Trusted CA Key Indication - RFC 6066 (Section 6) */
+#ifdef HAVE_TRUSTED_CA
+
+/* TCA Identifier Type */
+enum {
+    WOLFSSL_TRUSTED_CA_PRE_AGREED = 0,
+    WOLFSSL_TRUSTED_CA_KEY_SHA1 = 1,
+    WOLFSSL_TRUSTED_CA_X509_NAME = 2,
+    WOLFSSL_TRUSTED_CA_CERT_SHA1 = 3
+};
+
+WOLFSSL_API int wolfSSL_UseTrustedCA(WOLFSSL* ssl, unsigned char type,
+            const unsigned char* certId, unsigned int certIdSz);
+#endif /* HAVE_TRUSTED_CA */
+
 /* Application-Layer Protocol Negotiation */
 #ifdef HAVE_ALPN
 
@@ -2203,7 +2904,8 @@
     void *arg);
 #endif
 
-WOLFSSL_API int wolfSSL_UseALPN(WOLFSSL* ssl, char *protocol_name_list,
+WOLFSSL_ABI WOLFSSL_API int wolfSSL_UseALPN(WOLFSSL* ssl,
+                                char *protocol_name_list,
                                 unsigned int protocol_name_listSz,
                                 unsigned char options);
 
@@ -2224,7 +2926,10 @@
     WOLFSSL_MFL_2_10 = 2, /* 1024 bytes */
     WOLFSSL_MFL_2_11 = 3, /* 2048 bytes */
     WOLFSSL_MFL_2_12 = 4, /* 4096 bytes */
-    WOLFSSL_MFL_2_13 = 5  /* 8192 bytes *//* wolfSSL ONLY!!! */
+    WOLFSSL_MFL_2_13 = 5, /* 8192 bytes *//* wolfSSL ONLY!!! */
+    WOLFSSL_MFL_2_8  = 6, /*  256 bytes *//* wolfSSL ONLY!!! */
+    WOLFSSL_MFL_MIN  = WOLFSSL_MFL_2_9,
+    WOLFSSL_MFL_MAX  = WOLFSSL_MFL_2_8,
 };
 
 #ifndef NO_WOLFSSL_CLIENT
@@ -2233,7 +2938,7 @@
 WOLFSSL_API int wolfSSL_CTX_UseMaxFragment(WOLFSSL_CTX* ctx, unsigned char mfl);
 
 #endif
-#endif
+#endif /* HAVE_MAX_FRAGMENT */
 
 /* Truncated HMAC */
 #ifdef HAVE_TRUNCATED_HMAC
@@ -2325,8 +3030,6 @@
     WOLFSSL_ECC_BRAINPOOLP384R1 = 27,
     WOLFSSL_ECC_BRAINPOOLP512R1 = 28,
     WOLFSSL_ECC_X25519    = 29,
-#ifdef WOLFSSL_TLS13
-    /* Not implemented. */
     WOLFSSL_ECC_X448      = 30,
 
     WOLFSSL_FFDHE_2048    = 256,
@@ -2334,7 +3037,6 @@
     WOLFSSL_FFDHE_4096    = 258,
     WOLFSSL_FFDHE_6144    = 259,
     WOLFSSL_FFDHE_8192    = 260,
-#endif
 };
 
 enum {
@@ -2348,15 +3050,15 @@
 #ifdef HAVE_SUPPORTED_CURVES
 #ifndef NO_WOLFSSL_CLIENT
 
-WOLFSSL_API int wolfSSL_UseSupportedCurve(WOLFSSL* ssl, unsigned short name);
+WOLFSSL_API int wolfSSL_UseSupportedCurve(WOLFSSL* ssl, word16 name);
 WOLFSSL_API int wolfSSL_CTX_UseSupportedCurve(WOLFSSL_CTX* ctx,
-                                                           unsigned short name);
+                                                           word16 name);
 
 #endif
 #endif
 
 #ifdef WOLFSSL_TLS13
-WOLFSSL_API int wolfSSL_UseKeyShare(WOLFSSL* ssl, unsigned short group);
+WOLFSSL_API int wolfSSL_UseKeyShare(WOLFSSL* ssl, word16 group);
 WOLFSSL_API int wolfSSL_NoKeyShares(WOLFSSL* ssl);
 #endif
 
@@ -2365,7 +3067,11 @@
 #ifdef HAVE_SECURE_RENEGOTIATION
 
 WOLFSSL_API int wolfSSL_UseSecureRenegotiation(WOLFSSL* ssl);
+WOLFSSL_API int wolfSSL_CTX_UseSecureRenegotiation(WOLFSSL_CTX* ctx);
+WOLFSSL_API int wolfSSL_StartSecureRenegotiation(WOLFSSL* ssl, int resume);
 WOLFSSL_API int wolfSSL_Rehandshake(WOLFSSL* ssl);
+WOLFSSL_API int wolfSSL_SecureResume(WOLFSSL* ssl);
+WOLFSSL_API long wolfSSL_SSL_get_secure_renegotiation_support(WOLFSSL* ssl);
 
 #endif
 
@@ -2375,8 +3081,8 @@
 #ifndef NO_WOLFSSL_CLIENT
 WOLFSSL_API int wolfSSL_UseSessionTicket(WOLFSSL* ssl);
 WOLFSSL_API int wolfSSL_CTX_UseSessionTicket(WOLFSSL_CTX* ctx);
-WOLFSSL_API int wolfSSL_get_SessionTicket(WOLFSSL*, unsigned char*, unsigned int*);
-WOLFSSL_API int wolfSSL_set_SessionTicket(WOLFSSL*, const unsigned char*, unsigned int);
+WOLFSSL_API int wolfSSL_get_SessionTicket(WOLFSSL*, unsigned char*, word32*);
+WOLFSSL_API int wolfSSL_set_SessionTicket(WOLFSSL*, const unsigned char*, word32);
 typedef int (*CallbackSessionTicket)(WOLFSSL*, const unsigned char*, int, void*);
 WOLFSSL_API int wolfSSL_set_SessionTicket_cb(WOLFSSL*,
                                                   CallbackSessionTicket, void*);
@@ -2455,37 +3161,34 @@
                                           unsigned int* maxSessions);
 /* External facing KDF */
 WOLFSSL_API
-int wolfSSL_MakeTlsMasterSecret(unsigned char* ms, unsigned int msLen,
-                               const unsigned char* pms, unsigned int pmsLen,
+int wolfSSL_MakeTlsMasterSecret(unsigned char* ms, word32 msLen,
+                               const unsigned char* pms, word32 pmsLen,
                                const unsigned char* cr, const unsigned char* sr,
                                int tls1_2, int hash_type);
 
 WOLFSSL_API
-int wolfSSL_MakeTlsExtendedMasterSecret(unsigned char* ms, unsigned int msLen,
-                              const unsigned char* pms, unsigned int pmsLen,
-                              const unsigned char* sHash, unsigned int sHashLen,
+int wolfSSL_MakeTlsExtendedMasterSecret(unsigned char* ms, word32 msLen,
+                              const unsigned char* pms, word32 pmsLen,
+                              const unsigned char* sHash, word32 sHashLen,
                               int tls1_2, int hash_type);
 
 WOLFSSL_API
-int wolfSSL_DeriveTlsKeys(unsigned char* key_data, unsigned int keyLen,
-                               const unsigned char* ms, unsigned int msLen,
+int wolfSSL_DeriveTlsKeys(unsigned char* key_data, word32 keyLen,
+                               const unsigned char* ms, word32 msLen,
                                const unsigned char* sr, const unsigned char* cr,
                                int tls1_2, int hash_type);
 
 #ifdef WOLFSSL_CALLBACKS
 
-/* used internally by wolfSSL while OpenSSL types aren't */
-#include <wolfssl/callbacks.h>
-
 typedef int (*HandShakeCallBack)(HandShakeInfo*);
 typedef int (*TimeoutCallBack)(TimeoutInfo*);
 
 /* wolfSSL connect extension allowing HandShakeCallBack and/or TimeoutCallBack
    for diagnostics */
 WOLFSSL_API int wolfSSL_connect_ex(WOLFSSL*, HandShakeCallBack, TimeoutCallBack,
-                                 Timeval);
+                                 WOLFSSL_TIMEVAL);
 WOLFSSL_API int wolfSSL_accept_ex(WOLFSSL*, HandShakeCallBack, TimeoutCallBack,
-                                Timeval);
+                                WOLFSSL_TIMEVAL);
 
 #endif /* WOLFSSL_CALLBACKS */
 
@@ -2504,13 +3207,18 @@
 
 #include <wolfssl/openssl/asn1.h>
 struct WOLFSSL_X509_NAME_ENTRY {
-    WOLFSSL_ASN1_OBJECT* object; /* not defined yet */
+    WOLFSSL_ASN1_OBJECT  object;  /* static object just for keeping grp, type */
     WOLFSSL_ASN1_STRING  data;
     WOLFSSL_ASN1_STRING* value;  /* points to data, for lighttpd port */
     int nid; /* i.e. ASN_COMMON_NAME */
     int set;
     int size;
 };
+
+WOLFSSL_API int wolfSSL_X509_NAME_get_index_by_OBJ(WOLFSSL_X509_NAME *name,
+                                                   const WOLFSSL_ASN1_OBJECT *obj,
+                                                   int idx);
+
 #endif /* OPENSSL_ALL || OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
 
 
@@ -2536,48 +3244,97 @@
 };
 
 /* Object functions */
-WOLFSSL_API const char *  wolfSSL_OBJ_nid2sn(int n);
+WOLFSSL_API const char* wolfSSL_OBJ_nid2sn(int n);
 WOLFSSL_API int wolfSSL_OBJ_obj2nid(const WOLFSSL_ASN1_OBJECT *o);
+WOLFSSL_API int wolfSSL_OBJ_get_type(const WOLFSSL_ASN1_OBJECT *o);
 WOLFSSL_API int wolfSSL_OBJ_sn2nid(const char *sn);
 
-WOLFSSL_API char* wolfSSL_OBJ_nid2ln(int n);
+WOLFSSL_API const char* wolfSSL_OBJ_nid2ln(int n);
+WOLFSSL_API int wolfSSL_OBJ_ln2nid(const char *ln);
+WOLFSSL_API int wolfSSL_OBJ_cmp(const WOLFSSL_ASN1_OBJECT* a,
+            const WOLFSSL_ASN1_OBJECT* b);
 WOLFSSL_API int wolfSSL_OBJ_txt2nid(const char *sn);
+WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_OBJ_txt2obj(const char* s, int no_name);
 
 WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_OBJ_nid2obj(int n);
+WOLFSSL_LOCAL WOLFSSL_ASN1_OBJECT* wolfSSL_OBJ_nid2obj_ex(int n, WOLFSSL_ASN1_OBJECT *arg_obj);
 WOLFSSL_API int wolfSSL_OBJ_obj2txt(char *buf, int buf_len, WOLFSSL_ASN1_OBJECT *a, int no_name);
 
 WOLFSSL_API void wolfSSL_OBJ_cleanup(void);
+WOLFSSL_API int wolfSSL_OBJ_create(const char *oid, const char *sn, const char *ln);
+#ifdef HAVE_ECC
+WOLFSSL_LOCAL int NIDToEccEnum(int n);
+#endif
 /* end of object functions */
 
 WOLFSSL_API unsigned long wolfSSL_ERR_peek_last_error_line(const char **file, int *line);
 WOLFSSL_API long wolfSSL_ctrl(WOLFSSL* ssl, int cmd, long opt, void* pt);
 WOLFSSL_API long wolfSSL_CTX_ctrl(WOLFSSL_CTX* ctx, int cmd, long opt,void* pt);
+WOLFSSL_API long wolfSSL_CTX_callback_ctrl(WOLFSSL_CTX* ctx, int cmd, void (*fp)(void));
+WOLFSSL_API long wolfSSL_CTX_clear_extra_chain_certs(WOLFSSL_CTX* ctx);
 
 #ifndef NO_CERTS
 WOLFSSL_API WOLFSSL_X509_NAME_ENTRY* wolfSSL_X509_NAME_ENTRY_create_by_NID(
             WOLFSSL_X509_NAME_ENTRY** out, int nid, int type,
-            unsigned char* data, int dataSz);
+            const unsigned char* data, int dataSz);
+WOLFSSL_API WOLFSSL_X509_NAME_ENTRY* wolfSSL_X509_NAME_ENTRY_create_by_txt(
+            WOLFSSL_X509_NAME_ENTRY **neIn, const char *txt, int format,
+            const unsigned char *data, int dataSz);
 WOLFSSL_API int wolfSSL_X509_NAME_add_entry(WOLFSSL_X509_NAME* name,
                               WOLFSSL_X509_NAME_ENTRY* entry, int idx, int set);
+WOLFSSL_API int wolfSSL_X509_NAME_add_entry_by_txt(WOLFSSL_X509_NAME *name,
+    const char *field, int type, const unsigned char *bytes, int len, int loc,
+    int set);
+WOLFSSL_API int wolfSSL_X509_NAME_add_entry_by_NID(WOLFSSL_X509_NAME *name, int nid,
+                                           int type, const unsigned char *bytes,
+                                           int len, int loc, int set);
 WOLFSSL_API int wolfSSL_X509_NAME_cmp(const WOLFSSL_X509_NAME* x,
             const WOLFSSL_X509_NAME* y);
 WOLFSSL_API WOLFSSL_X509_NAME* wolfSSL_X509_NAME_new(void);
+WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_dup(WOLFSSL_X509*);
+WOLFSSL_API WOLFSSL_X509_NAME* wolfSSL_X509_NAME_dup(WOLFSSL_X509_NAME*);
 WOLFSSL_API int wolfSSL_check_private_key(const WOLFSSL* ssl);
 WOLFSSL_API void* wolfSSL_X509_get_ext_d2i(const WOLFSSL_X509* x509,
                                                      int nid, int* c, int* idx);
+WOLFSSL_API int wolfSSL_X509_get_ext_count(const WOLFSSL_X509* passedCert);
+WOLFSSL_API int wolfSSL_X509_get_ext_by_NID(const WOLFSSL_X509 *x, int nid, int lastpos);
+WOLFSSL_API int wolfSSL_X509_add_ext(WOLFSSL_X509 *x, WOLFSSL_X509_EXTENSION *ex, int loc);
+WOLFSSL_API WOLFSSL_X509_EXTENSION* wolfSSL_X509V3_EXT_conf_nid(
+        WOLF_LHASH_OF(CONF_VALUE)* conf, WOLFSSL_X509V3_CTX* ctx, int nid,
+        char* value);
+WOLFSSL_API void wolfSSL_X509V3_set_ctx(WOLFSSL_X509V3_CTX* ctx,
+        WOLFSSL_X509* issuer, WOLFSSL_X509* subject, WOLFSSL_X509* req,
+        WOLFSSL_X509_CRL* crl, int flag);
+WOLFSSL_API void wolfSSL_X509V3_set_ctx_nodb(WOLFSSL_X509V3_CTX* ctx);
 WOLFSSL_API int wolfSSL_X509_digest(const WOLFSSL_X509* x509,
         const WOLFSSL_EVP_MD* digest, unsigned char* buf, unsigned int* len);
 WOLFSSL_API int wolfSSL_use_certificate(WOLFSSL* ssl, WOLFSSL_X509* x509);
-WOLFSSL_API int wolfSSL_use_certificate_ASN1(WOLFSSL* ssl, unsigned char* der,
-                                                                     int derSz);
 WOLFSSL_API int wolfSSL_use_PrivateKey(WOLFSSL* ssl, WOLFSSL_EVP_PKEY* pkey);
 WOLFSSL_API int wolfSSL_use_PrivateKey_ASN1(int pri, WOLFSSL* ssl,
-                                            unsigned char* der, long derSz);
+                                            const unsigned char* der, long derSz);
 WOLFSSL_API WOLFSSL_EVP_PKEY *wolfSSL_get_privatekey(const WOLFSSL *ssl);
 #ifndef NO_RSA
 WOLFSSL_API int wolfSSL_use_RSAPrivateKey_ASN1(WOLFSSL* ssl, unsigned char* der,
                                                                 long derSz);
 #endif
+WOLFSSL_API int wolfSSL_CTX_use_PrivateKey_ASN1(int pri, WOLFSSL_CTX* ctx,
+                                            unsigned char* der, long derSz);
+
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+WOLFSSL_API int wolfSSL_X509_cmp(const WOLFSSL_X509* a, const WOLFSSL_X509* b);
+WOLFSSL_API WOLFSSL_X509_EXTENSION* wolfSSL_X509_get_ext(const WOLFSSL_X509* x, int loc);
+WOLFSSL_API WOLFSSL_X509_EXTENSION* wolfSSL_X509_set_ext(WOLFSSL_X509* x, int loc);
+WOLFSSL_API int wolfSSL_X509_EXTENSION_get_critical(const WOLFSSL_X509_EXTENSION* ex);
+WOLFSSL_API WOLFSSL_X509_EXTENSION* wolfSSL_X509_EXTENSION_new(void);
+WOLFSSL_API int wolfSSL_sk_X509_EXTENSION_push(WOLFSSL_STACK* sk,
+                                       WOLFSSL_X509_EXTENSION* ext);
+WOLFSSL_API void wolfSSL_sk_X509_EXTENSION_free(WOLFSSL_STACK* sk);
+WOLFSSL_API void wolfSSL_X509_EXTENSION_free(WOLFSSL_X509_EXTENSION* ext_to_free);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_new_x509_ext(void);
+#endif
+
+WOLFSSL_API WOLFSSL_ASN1_OBJECT* wolfSSL_X509_EXTENSION_get_object(WOLFSSL_X509_EXTENSION* ext);
+WOLFSSL_API WOLFSSL_ASN1_STRING* wolfSSL_X509_EXTENSION_get_data(WOLFSSL_X509_EXTENSION* ext);
 #endif /* NO_CERTS */
 
 WOLFSSL_API WOLFSSL_DH *wolfSSL_DSA_dup_DH(const WOLFSSL_DSA *r);
@@ -2592,31 +3349,49 @@
 #if !defined(NO_FILESYSTEM)
 WOLFSSL_API WOLFSSL_X509* wolfSSL_d2i_X509_fp(XFILE fp,
                                                WOLFSSL_X509** x509);
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_X509_STORE_GetCerts(WOLFSSL_X509_STORE_CTX* s);
 #endif
 WOLFSSL_API WOLFSSL_X509* wolfSSL_d2i_X509_bio(WOLFSSL_BIO* bio,
                                                WOLFSSL_X509** x509);
 WOLFSSL_API WOLFSSL_X509_STORE* wolfSSL_CTX_get_cert_store(WOLFSSL_CTX* ctx);
 
+WOLFSSL_API size_t wolfSSL_BIO_wpending(const WOLFSSL_BIO *bio);
 WOLFSSL_API size_t wolfSSL_BIO_ctrl_pending(WOLFSSL_BIO *b);
+
 WOLFSSL_API size_t wolfSSL_get_server_random(const WOLFSSL *ssl,
                                              unsigned char *out, size_t outlen);
+WOLFSSL_API int wolfSSL_get_server_tmp_key(const WOLFSSL*, WOLFSSL_EVP_PKEY**);
+
+WOLFSSL_API int wolfSSL_CTX_set_min_proto_version(WOLFSSL_CTX*, int);
+WOLFSSL_API int wolfSSL_CTX_set_max_proto_version(WOLFSSL_CTX*, int);
+
 WOLFSSL_API size_t wolfSSL_get_client_random(const WOLFSSL* ssl,
                                               unsigned char* out, size_t outSz);
 WOLFSSL_API int wolfSSL_CTX_use_PrivateKey(WOLFSSL_CTX *ctx, WOLFSSL_EVP_PKEY *pkey);
 WOLFSSL_API WOLFSSL_X509 *wolfSSL_PEM_read_bio_X509(WOLFSSL_BIO *bp, WOLFSSL_X509 **x, pem_password_cb *cb, void *u);
+WOLFSSL_API WOLFSSL_X509_CRL *wolfSSL_PEM_read_bio_X509_CRL(WOLFSSL_BIO *bp,
+        WOLFSSL_X509_CRL **x, pem_password_cb *cb, void *u);
 WOLFSSL_API WOLFSSL_X509 *wolfSSL_PEM_read_bio_X509_AUX
         (WOLFSSL_BIO *bp, WOLFSSL_X509 **x, pem_password_cb *cb, void *u);
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_INFO)* wolfSSL_PEM_X509_INFO_read_bio(
+        WOLFSSL_BIO* bio, WOLF_STACK_OF(WOLFSSL_X509_INFO)* sk,
+        pem_password_cb* cb, void* u);
 #ifndef NO_FILESYSTEM
-WOLFSSL_API WOLFSSL_X509_CRL *wolfSSL_PEM_read_X509_CRL(XFILE fp, WOLFSSL_X509_CRL **x,
-                                                    pem_password_cb *cb, void *u);
+WOLFSSL_API WOLFSSL_X509_CRL *wolfSSL_PEM_read_X509_CRL(XFILE fp,
+        WOLFSSL_X509_CRL **x, pem_password_cb *cb, void *u);
 #endif
+WOLFSSL_API int wolfSSL_PEM_get_EVP_CIPHER_INFO(char* header,
+                                                EncryptedInfo* cipher);
+WOLFSSL_API int wolfSSL_PEM_do_header(EncryptedInfo* cipher,
+                                      unsigned char* data, long* len,
+                                      pem_password_cb* callback, void* ctx);
 
 /*lighttp compatibility */
 
 struct WOLFSSL_ASN1_BIT_STRING {
     int length;
     int type;
-    char* data;
+    byte* data;
     long flags;
 };
 
@@ -2631,7 +3406,8 @@
 WOLFSSL_API void wolfSSL_X509_NAME_ENTRY_free(WOLFSSL_X509_NAME_ENTRY* ne);
 WOLFSSL_API WOLFSSL_X509_NAME_ENTRY* wolfSSL_X509_NAME_ENTRY_new(void);
 WOLFSSL_API void wolfSSL_X509_NAME_free(WOLFSSL_X509_NAME* name);
-WOLFSSL_API char wolfSSL_CTX_use_certificate(WOLFSSL_CTX *ctx, WOLFSSL_X509 *x);
+WOLFSSL_API char wolfSSL_CTX_use_certificate(WOLFSSL_CTX*, WOLFSSL_X509*);
+WOLFSSL_API int wolfSSL_CTX_add1_chain_cert(WOLFSSL_CTX*, WOLFSSL_X509*);
 WOLFSSL_API int wolfSSL_BIO_read_filename(WOLFSSL_BIO *b, const char *name);
 /* These are to be merged shortly */
 WOLFSSL_API void wolfSSL_set_verify_depth(WOLFSSL *ssl,int depth);
@@ -2639,7 +3415,6 @@
 WOLFSSL_API int wolfSSL_set_app_data(WOLFSSL *ssl, void *arg);
 WOLFSSL_API WOLFSSL_ASN1_OBJECT * wolfSSL_X509_NAME_ENTRY_get_object(WOLFSSL_X509_NAME_ENTRY *ne);
 WOLFSSL_API WOLFSSL_X509_NAME_ENTRY *wolfSSL_X509_NAME_get_entry(WOLFSSL_X509_NAME *name, int loc);
-WOLFSSL_API void wolfSSL_sk_X509_NAME_pop_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)* sk, void f (WOLFSSL_X509_NAME*));
 WOLFSSL_API unsigned char *wolfSSL_SHA1(const unsigned char *d, size_t n, unsigned char *md);
 WOLFSSL_API unsigned char *wolfSSL_SHA256(const unsigned char *d, size_t n, unsigned char *md);
 WOLFSSL_API unsigned char *wolfSSL_SHA384(const unsigned char *d, size_t n, unsigned char *md);
@@ -2651,6 +3426,7 @@
 #ifndef NO_FILESYSTEM
 WOLFSSL_API long wolfSSL_BIO_set_fp(WOLFSSL_BIO *bio, XFILE fp, int c);
 WOLFSSL_API long wolfSSL_BIO_get_fp(WOLFSSL_BIO *bio, XFILE* fp);
+WOLFSSL_API WOLFSSL_BIO* wolfSSL_BIO_new_fp(XFILE fp, int c);
 #endif
 
 #endif /* OPENSSL_EXTRA || OPENSSL_ALL || HAVE_LIGHTY || WOLFSSL_MYSQL_COMPATIBLE || HAVE_STUNNEL || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
@@ -2674,9 +3450,23 @@
 WOLFSSL_API int wolfSSL_PEM_write_bio_X509_REQ(WOLFSSL_BIO *bp,WOLFSSL_X509 *x);
 WOLFSSL_API int wolfSSL_PEM_write_bio_X509_AUX(WOLFSSL_BIO *bp,WOLFSSL_X509 *x);
 WOLFSSL_API int wolfSSL_PEM_write_bio_X509(WOLFSSL_BIO *bp, WOLFSSL_X509 *x);
-
 #endif /* HAVE_STUNNEL || HAVE_LIGHTY */
 
+#if defined(OPENSSL_EXTRA) && !defined(NO_CERTS) && defined(WOLFSSL_CERT_GEN) && \
+                                                       defined(WOLFSSL_CERT_REQ)
+WOLFSSL_API int wolfSSL_i2d_X509_REQ(WOLFSSL_X509* req, unsigned char** out);
+WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_REQ_new(void);
+WOLFSSL_API void wolfSSL_X509_REQ_free(WOLFSSL_X509* req);
+WOLFSSL_API int wolfSSL_X509_REQ_sign(WOLFSSL_X509 *req, WOLFSSL_EVP_PKEY *pkey,
+                                      const WOLFSSL_EVP_MD *md);
+WOLFSSL_API int wolfSSL_X509_REQ_add_extensions(WOLFSSL_X509* req,
+        WOLF_STACK_OF(WOLFSSL_X509_EXTENSION)* ext);
+WOLFSSL_API int wolfSSL_X509_REQ_set_subject_name(WOLFSSL_X509 *req,
+                                                  WOLFSSL_X509_NAME *name);
+WOLFSSL_API int wolfSSL_X509_REQ_set_pubkey(WOLFSSL_X509 *req,
+                                            WOLFSSL_EVP_PKEY *pkey);
+#endif
+
 
 #if defined(OPENSSL_ALL) \
     || defined(HAVE_STUNNEL) \
@@ -2695,7 +3485,15 @@
 
 WOLFSSL_API void wolfSSL_CRYPTO_cleanup_all_ex_data(void);
 
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_768_prime(WOLFSSL_BIGNUM* bn);
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_1024_prime(WOLFSSL_BIGNUM* bn);
 WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_1536_prime(WOLFSSL_BIGNUM* bn);
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_2048_prime(WOLFSSL_BIGNUM* bn);
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_3072_prime(WOLFSSL_BIGNUM* bn);
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_4096_prime(WOLFSSL_BIGNUM* bn);
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_6144_prime(WOLFSSL_BIGNUM* bn);
+WOLFSSL_API WOLFSSL_BIGNUM* wolfSSL_DH_8192_prime(WOLFSSL_BIGNUM* bn);
+
 WOLFSSL_API WOLFSSL_DH *wolfSSL_DH_generate_parameters(int prime_len, int generator,
     void (*callback) (int, int, void *), void *cb_arg);
 
@@ -2714,27 +3512,65 @@
 
 WOLFSSL_API int wolfSSL_CIPHER_get_bits(const WOLFSSL_CIPHER *c, int *alg_bits);
 
-WOLFSSL_API int wolfSSL_sk_X509_NAME_num(const WOLF_STACK_OF(WOLFSSL_X509_NAME) *s);
-
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_X509_new(void);
 WOLFSSL_API int wolfSSL_sk_X509_num(const WOLF_STACK_OF(WOLFSSL_X509) *s);
 
+WOLFSSL_API WOLFSSL_X509_INFO *wolfSSL_X509_INFO_new(void);
+WOLFSSL_API void wolfSSL_X509_INFO_free(WOLFSSL_X509_INFO* info);
+
+WOLFSSL_API WOLFSSL_STACK* wolfSSL_sk_X509_INFO_new_null(void);
+WOLFSSL_API int wolfSSL_sk_X509_INFO_num(const WOLF_STACK_OF(WOLFSSL_X509_INFO)*);
+WOLFSSL_API WOLFSSL_X509_INFO* wolfSSL_sk_X509_INFO_value(
+    const WOLF_STACK_OF(WOLFSSL_X509_INFO)*, int);
+WOLFSSL_API int wolfSSL_sk_X509_INFO_push(WOLF_STACK_OF(WOLFSSL_X509_INFO)*,
+    WOLFSSL_X509_INFO*);
+WOLFSSL_API WOLFSSL_X509_INFO* wolfSSL_sk_X509_INFO_pop(WOLF_STACK_OF(WOLFSSL_X509_INFO)*);
+WOLFSSL_API void wolfSSL_sk_X509_INFO_pop_free(WOLF_STACK_OF(WOLFSSL_X509_INFO)*,
+    void (*f) (WOLFSSL_X509_INFO*));
+WOLFSSL_API void wolfSSL_sk_X509_INFO_free(WOLF_STACK_OF(WOLFSSL_X509_INFO)*);
+
+typedef int (*wolf_sk_compare_cb)(const void* const *a,
+                                  const void* const *b);
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_NAME)* wolfSSL_sk_X509_NAME_new(
+    wolf_sk_compare_cb);
+WOLFSSL_API int wolfSSL_sk_X509_NAME_push(WOLF_STACK_OF(WOLFSSL_X509_NAME)*,
+    WOLFSSL_X509_NAME*);
+WOLFSSL_API int wolfSSL_sk_X509_NAME_find(const WOLF_STACK_OF(WOLFSSL_X509_NAME)*,
+    WOLFSSL_X509_NAME*);
+WOLFSSL_API int wolfSSL_sk_X509_NAME_set_cmp_func(
+    WOLF_STACK_OF(WOLFSSL_X509_NAME)*, wolf_sk_compare_cb);
+WOLFSSL_API WOLFSSL_X509_NAME* wolfSSL_sk_X509_NAME_value(const WOLF_STACK_OF(WOLFSSL_X509_NAME)*, int);
+WOLFSSL_API int wolfSSL_sk_X509_NAME_num(const WOLF_STACK_OF(WOLFSSL_X509_NAME)*);
+WOLFSSL_API WOLFSSL_X509_NAME* wolfSSL_sk_X509_NAME_pop(WOLF_STACK_OF(WOLFSSL_X509_NAME)*);
+WOLFSSL_API void wolfSSL_sk_X509_NAME_pop_free(WOLF_STACK_OF(WOLFSSL_X509_NAME)*,
+    void (*f) (WOLFSSL_X509_NAME*));
+WOLFSSL_API void wolfSSL_sk_X509_NAME_free(WOLF_STACK_OF(WOLFSSL_X509_NAME) *);
+
+WOLFSSL_API int wolfSSL_sk_X509_OBJECT_num(const WOLF_STACK_OF(WOLFSSL_X509_OBJECT) *s);
+
 WOLFSSL_API int wolfSSL_X509_NAME_print_ex(WOLFSSL_BIO*,WOLFSSL_X509_NAME*,int,
         unsigned long);
 
+WOLFSSL_API WOLFSSL_ASN1_BIT_STRING* wolfSSL_ASN1_BIT_STRING_new(void);
+WOLFSSL_API void wolfSSL_ASN1_BIT_STRING_free(WOLFSSL_ASN1_BIT_STRING*);
 WOLFSSL_API WOLFSSL_ASN1_BIT_STRING* wolfSSL_X509_get0_pubkey_bitstr(
                             const WOLFSSL_X509*);
+WOLFSSL_API int wolfSSL_ASN1_BIT_STRING_get_bit(
+                            const WOLFSSL_ASN1_BIT_STRING*, int);
+WOLFSSL_API int wolfSSL_ASN1_BIT_STRING_set_bit(
+                            WOLFSSL_ASN1_BIT_STRING*, int, int);
 
 WOLFSSL_API int        wolfSSL_CTX_add_session(WOLFSSL_CTX*, WOLFSSL_SESSION*);
 
-WOLFSSL_API WOLFSSL_CTX* wolfSSL_get_SSL_CTX(WOLFSSL* ssl);
-
 WOLFSSL_API int  wolfSSL_version(WOLFSSL*);
 
 WOLFSSL_API int wolfSSL_get_state(const WOLFSSL*);
 
-WOLFSSL_API void* wolfSSL_sk_X509_NAME_value(const WOLF_STACK_OF(WOLFSSL_X509_NAME)*, int);
-
-WOLFSSL_API void* wolfSSL_sk_X509_value(WOLF_STACK_OF(WOLFSSL_X509)*, int);
+WOLFSSL_API WOLFSSL_X509* wolfSSL_sk_X509_value(WOLF_STACK_OF(WOLFSSL_X509)*, int);
+
+WOLFSSL_API WOLFSSL_X509* wolfSSL_sk_X509_shift(WOLF_STACK_OF(WOLFSSL_X509)*);
+
+WOLFSSL_API void* wolfSSL_sk_X509_OBJECT_value(WOLF_STACK_OF(WOLFSSL_X509_OBJECT)*, int);
 
 WOLFSSL_API void* wolfSSL_SESSION_get_ex_data(const WOLFSSL_SESSION*, int);
 
@@ -2749,6 +3585,8 @@
 WOLFSSL_API const unsigned char* wolfSSL_SESSION_get_id(WOLFSSL_SESSION*,
         unsigned int*);
 
+WOLFSSL_API int wolfSSL_SESSION_print(WOLFSSL_BIO*, const WOLFSSL_SESSION*);
+
 WOLFSSL_API int wolfSSL_set_tlsext_host_name(WOLFSSL *, const char *);
 
 WOLFSSL_API const char* wolfSSL_get_servername(WOLFSSL *, unsigned char);
@@ -2764,15 +3602,15 @@
 WOLFSSL_API int wolfSSL_CTX_set_tlsext_servername_callback(WOLFSSL_CTX *,
         CallbackSniRecv);
 
-WOLFSSL_API void wolfSSL_CTX_set_servername_arg(WOLFSSL_CTX *, void*);
+WOLFSSL_API int  wolfSSL_CTX_set_servername_arg(WOLFSSL_CTX *, void*);
 
 WOLFSSL_API void wolfSSL_ERR_remove_thread_state(void*);
 
-/* support for depricated old name */
+/* support for deprecated old name */
 #define WOLFSSL_ERR_remove_thread_state wolfSSL_ERR_remove_thread_state
 
 #ifndef NO_FILESYSTEM
-WOLFSSL_API void wolfSSL_print_all_errors_fp(XFILE *fp);
+WOLFSSL_API void wolfSSL_print_all_errors_fp(XFILE fp);
 #endif
 
 WOLFSSL_API void wolfSSL_THREADID_set_callback(void (*threadid_func)(void*));
@@ -2781,10 +3619,20 @@
 
 WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509)* wolfSSL_X509_STORE_get1_certs(
                                WOLFSSL_X509_STORE_CTX*, WOLFSSL_X509_NAME*);
-
-WOLFSSL_API void wolfSSL_sk_X509_pop_free(WOLF_STACK_OF(WOLFSSL_X509)* sk, void f (WOLFSSL_X509*));
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_X509_OBJECT)*
+        wolfSSL_X509_STORE_get0_objects(WOLFSSL_X509_STORE *);
+WOLFSSL_API WOLFSSL_X509_OBJECT*
+        wolfSSL_sk_X509_OBJECT_delete(WOLF_STACK_OF(WOLFSSL_X509_OBJECT)* sk, int i);
+WOLFSSL_API void wolfSSL_X509_OBJECT_free(WOLFSSL_X509_OBJECT *a);
+
+WOLFSSL_API void wolfSSL_sk_X509_pop_free(WOLF_STACK_OF(WOLFSSL_X509)* sk, void (*f) (WOLFSSL_X509*));
 #endif /* OPENSSL_ALL || HAVE_STUNNEL || WOLFSSL_NGINX || WOLFSSL_HAPROXY || HAVE_LIGHTY */
 
+#if defined(OPENSSL_EXTRA) && defined(HAVE_ECC)
+WOLFSSL_API int wolfSSL_CTX_set1_curves_list(WOLFSSL_CTX* ctx, const char* names);
+WOLFSSL_API int wolfSSL_set1_curves_list(WOLFSSL* ssl, const char* names);
+#endif /* OPENSSL_EXTRA && HAVE_ECC */
+
 #if defined(OPENSSL_ALL) || \
     defined(HAVE_STUNNEL) || defined(WOLFSSL_MYSQL_COMPATIBLE) || \
     defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
@@ -2806,8 +3654,6 @@
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef OPENSSL_EXTRA
-WOLFSSL_API int wolfSSL_CTX_set1_curves_list(WOLFSSL_CTX* ctx, char* names);
-
 typedef void (*SSL_Msg_Cb)(int write_p, int version, int content_type,
     const void *buf, size_t len, WOLFSSL *ssl, void *arg);
 
@@ -2819,6 +3665,8 @@
     int *line, const char **data, int *flags);
 WOLFSSL_API int wolfSSL_CTX_set_alpn_protos(WOLFSSL_CTX *ctx,
     const unsigned char *protos, unsigned int protos_len);
+WOLFSSL_API int wolfSSL_set_alpn_protos(WOLFSSL* ssl,
+        const unsigned char* protos, unsigned int protos_len);
 WOLFSSL_API void *wolfSSL_OPENSSL_memdup(const void *data,
     size_t siz, const char* file, int line);
 WOLFSSL_API void wolfSSL_ERR_load_BIO_strings(void);
@@ -2848,11 +3696,11 @@
 WOLFSSL_API void *wolfSSL_X509_get_ex_data(WOLFSSL_X509 *x509, int idx);
 WOLFSSL_API int wolfSSL_X509_set_ex_data(WOLFSSL_X509 *x509, int idx,
     void *data);
-
 WOLFSSL_API int wolfSSL_X509_NAME_digest(const WOLFSSL_X509_NAME *data,
     const WOLFSSL_EVP_MD *type, unsigned char *md, unsigned int *len);
 
 WOLFSSL_API long wolfSSL_SSL_CTX_get_timeout(const WOLFSSL_CTX *ctx);
+WOLFSSL_API long wolfSSL_get_timeout(WOLFSSL* ssl);
 WOLFSSL_API int wolfSSL_SSL_CTX_set_tmp_ecdh(WOLFSSL_CTX *ctx,
     WOLFSSL_EC_KEY *ecdh);
 WOLFSSL_API int wolfSSL_SSL_CTX_remove_session(WOLFSSL_CTX *,
@@ -2861,7 +3709,9 @@
 WOLFSSL_API WOLFSSL_BIO *wolfSSL_SSL_get_rbio(const WOLFSSL *s);
 WOLFSSL_API WOLFSSL_BIO *wolfSSL_SSL_get_wbio(const WOLFSSL *s);
 WOLFSSL_API int wolfSSL_SSL_do_handshake(WOLFSSL *s);
-WOLFSSL_API int wolfSSL_SSL_in_init(WOLFSSL *a); /* #define in OpenSSL */
+WOLFSSL_API int wolfSSL_SSL_in_init(WOLFSSL*);
+WOLFSSL_API int wolfSSL_SSL_in_connect_init(WOLFSSL*);
+
 #ifndef NO_SESSION_CACHE
     WOLFSSL_API WOLFSSL_SESSION *wolfSSL_SSL_get0_session(const WOLFSSL *s);
 #endif
@@ -2893,8 +3743,6 @@
 WOLFSSL_API int wolfSSL_X509_check_issued(WOLFSSL_X509 *issuer,
     WOLFSSL_X509 *subject);
 
-WOLFSSL_API WOLFSSL_X509* wolfSSL_X509_dup(WOLFSSL_X509 *x);
-
 WOLFSSL_API char* wolfSSL_sk_WOLFSSL_STRING_value(
     WOLF_STACK_OF(WOLFSSL_STRING)* strings, int idx);
 #endif /* HAVE_OCSP */
@@ -2902,7 +3750,7 @@
 WOLFSSL_API int PEM_write_bio_WOLFSSL_X509(WOLFSSL_BIO *bio,
     WOLFSSL_X509 *cert);
 
-#endif /* OPENSSL_ALL || WOLFSSL_NGINX || WOLFSSL_HAPROXY || 
+#endif /* OPENSSL_ALL || WOLFSSL_NGINX || WOLFSSL_HAPROXY ||
     OPENSSL_EXTRA || HAVE_LIGHTY*/
 
 WOLFSSL_API void wolfSSL_get0_alpn_selected(const WOLFSSL *ssl,
@@ -2945,32 +3793,71 @@
 
 WOLFSSL_API int SSL_SESSION_set1_id(WOLFSSL_SESSION *s, const unsigned char *sid, unsigned int sid_len);
 WOLFSSL_API int SSL_SESSION_set1_id_context(WOLFSSL_SESSION *s, const unsigned char *sid_ctx, unsigned int sid_ctx_len);
-WOLFSSL_API void *X509_get0_tbs_sigalg(const WOLFSSL_X509 *x);
-WOLFSSL_API void X509_ALGOR_get0(WOLFSSL_ASN1_OBJECT **paobj, int *pptype, const void **ppval, const void *algor);
-WOLFSSL_API void *X509_get_X509_PUBKEY(void * x);
-WOLFSSL_API int X509_PUBKEY_get0_param(WOLFSSL_ASN1_OBJECT **ppkalg, const unsigned char **pk, int *ppklen, void **pa, WOLFSSL_EVP_PKEY *pub);
+WOLFSSL_API WOLFSSL_X509_ALGOR* wolfSSL_X509_ALGOR_new(void);
+WOLFSSL_API void wolfSSL_X509_ALGOR_free(WOLFSSL_X509_ALGOR *alg);
+WOLFSSL_API const WOLFSSL_X509_ALGOR* wolfSSL_X509_get0_tbs_sigalg(const WOLFSSL_X509 *x);
+WOLFSSL_API void wolfSSL_X509_ALGOR_get0(const WOLFSSL_ASN1_OBJECT **paobj, int *pptype, const void **ppval, const WOLFSSL_X509_ALGOR *algor);
+WOLFSSL_API int wolfSSL_X509_ALGOR_set0(WOLFSSL_X509_ALGOR *algor, WOLFSSL_ASN1_OBJECT *aobj, int ptype, void *pval);
+WOLFSSL_API WOLFSSL_ASN1_TYPE* wolfSSL_ASN1_TYPE_new(void);
+WOLFSSL_API void wolfSSL_ASN1_TYPE_free(WOLFSSL_ASN1_TYPE* at);
+WOLFSSL_API WOLFSSL_X509_PUBKEY *wolfSSL_X509_PUBKEY_new(void);
+WOLFSSL_API void wolfSSL_X509_PUBKEY_free(WOLFSSL_X509_PUBKEY *x);
+WOLFSSL_API WOLFSSL_X509_PUBKEY *wolfSSL_X509_get_X509_PUBKEY(const WOLFSSL_X509* x509);
+WOLFSSL_API int wolfSSL_X509_PUBKEY_get0_param(WOLFSSL_ASN1_OBJECT **ppkalg, const unsigned char **pk, int *ppklen, WOLFSSL_X509_ALGOR **pa, WOLFSSL_X509_PUBKEY *pub);
+WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_X509_PUBKEY_get(WOLFSSL_X509_PUBKEY* key);
+WOLFSSL_API int wolfSSL_X509_PUBKEY_set(WOLFSSL_X509_PUBKEY **x, WOLFSSL_EVP_PKEY *key);
 WOLFSSL_API int i2t_ASN1_OBJECT(char *buf, int buf_len, WOLFSSL_ASN1_OBJECT *a);
+WOLFSSL_API int wolfSSL_i2a_ASN1_OBJECT(WOLFSSL_BIO *bp, WOLFSSL_ASN1_OBJECT *a);
 WOLFSSL_API void SSL_CTX_set_tmp_dh_callback(WOLFSSL_CTX *ctx, WOLFSSL_DH *(*dh) (WOLFSSL *ssl, int is_export, int keylength));
 WOLFSSL_API WOLF_STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void);
-WOLFSSL_API int X509_STORE_load_locations(WOLFSSL_X509_STORE *ctx, const char *file, const char *dir);
+WOLFSSL_API int wolfSSL_X509_STORE_load_locations(WOLFSSL_X509_STORE *str, const char *file, const char *dir);
 WOLFSSL_API int wolfSSL_X509_STORE_add_crl(WOLFSSL_X509_STORE *ctx, WOLFSSL_X509_CRL *x);
-WOLFSSL_API int wolfSSL_sk_SSL_CIPHER_num(const void * p);
+WOLFSSL_API int wolfSSL_sk_SSL_CIPHER_num(const WOLF_STACK_OF(WOLFSSL_CIPHER)* p);
+WOLFSSL_API int wolfSSL_sk_SSL_CIPHER_find(
+        WOLF_STACK_OF(WOLFSSL_CIPHER)* sk, const WOLFSSL_CIPHER* toFind);
+WOLFSSL_API WOLF_STACK_OF(WOLFSSL_CIPHER)* wolfSSL_sk_SSL_CIPHER_dup(
+        WOLF_STACK_OF(WOLFSSL_CIPHER)* in);
+WOLFSSL_API void wolfSSL_sk_SSL_CIPHER_free(WOLF_STACK_OF(WOLFSSL_CIPHER)* sk);
 WOLFSSL_API int wolfSSL_sk_SSL_COMP_zero(WOLFSSL_STACK* st);
+WOLFSSL_API int wolfSSL_sk_SSL_COMP_num(WOLF_STACK_OF(WOLFSSL_COMP)* sk);
 WOLFSSL_API WOLFSSL_CIPHER* wolfSSL_sk_SSL_CIPHER_value(void *ciphers, int idx);
 WOLFSSL_API void ERR_load_SSL_strings(void);
 WOLFSSL_API void wolfSSL_EC_POINT_dump(const char *msg, const WOLFSSL_EC_POINT *p);
 
 WOLFSSL_API const char *wolfSSL_ASN1_tag2str(int tag);
 WOLFSSL_API int wolfSSL_ASN1_STRING_print_ex(WOLFSSL_BIO *out, WOLFSSL_ASN1_STRING *str, unsigned long flags);
+WOLFSSL_API int wolfSSL_ASN1_STRING_print(WOLFSSL_BIO *out, WOLFSSL_ASN1_STRING *str);
+WOLFSSL_API int wolfSSL_ASN1_TIME_get_length(WOLFSSL_ASN1_TIME *t);
+WOLFSSL_API unsigned char* wolfSSL_ASN1_TIME_get_data(WOLFSSL_ASN1_TIME *t);
 WOLFSSL_API WOLFSSL_ASN1_TIME *wolfSSL_ASN1_TIME_to_generalizedtime(WOLFSSL_ASN1_TIME *t,
                                                                 WOLFSSL_ASN1_TIME **out);
 WOLFSSL_API int wolfSSL_i2c_ASN1_INTEGER(WOLFSSL_ASN1_INTEGER *a, unsigned char **pp);
+WOLFSSL_API int wolfSSL_X509_CA_num(WOLFSSL_X509_STORE *store);
+WOLFSSL_API long wolfSSL_X509_get_version(const WOLFSSL_X509 *x);
+WOLFSSL_API int wolfSSL_X509_get_signature_nid(const WOLFSSL_X509* x);
+
+WOLFSSL_API int wolfSSL_PEM_write_bio_PKCS8PrivateKey(WOLFSSL_BIO* bio,
+    WOLFSSL_EVP_PKEY* pkey, const WOLFSSL_EVP_CIPHER* enc, char* passwd,
+    int passwdSz, pem_password_cb* cb, void* ctx);
+WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_PKCS8PrivateKey_bio(WOLFSSL_BIO* bio,
+    WOLFSSL_EVP_PKEY** pkey, pem_password_cb* cb, void* u);
+WOLFSSL_API WOLFSSL_EVP_PKEY* wolfSSL_d2i_AutoPrivateKey(
+    WOLFSSL_EVP_PKEY** pkey, const unsigned char** data, long length);
+WOLFSSL_API unsigned long  wolfSSL_X509_subject_name_hash(const WOLFSSL_X509* x509);
+
+
 #endif /* OPENSSL_EXTRA */
 
 #ifdef HAVE_PK_CALLBACKS
+WOLFSSL_API int wolfSSL_IsPrivatePkSet(WOLFSSL* ssl);
 WOLFSSL_API int wolfSSL_CTX_IsPrivatePkSet(WOLFSSL_CTX* ctx);
 #endif
 
+#ifdef HAVE_ENCRYPT_THEN_MAC
+WOLFSSL_API int wolfSSL_CTX_AllowEncryptThenMac(WOLFSSL_CTX *, int);
+WOLFSSL_API int wolfSSL_AllowEncryptThenMac(WOLFSSL *s, int);
+#endif
+
 #ifdef __cplusplus
     }  /* extern "C" */
 #endif
--- a/wolfssl/test.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/test.h	Thu Jun 04 23:57:22 2020 +0000
@@ -11,8 +11,9 @@
 #include <wolfssl/wolfcrypt/error-crypt.h>
 #include <wolfssl/wolfcrypt/random.h>
 #include <wolfssl/wolfcrypt/mem_track.h>
-#if defined(OPENSSL_EXTRA) && defined(SHOW_CERTS)
-    #include <wolfssl/openssl/ssl.h> /* for domain component NID value */
+#if defined(SHOW_CERTS) && \
+    (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+    #include <wolfssl/wolfcrypt/asn.h> /* for domain component NID value */
 #endif
 
 #ifdef ATOMIC_USER
@@ -37,6 +38,12 @@
     #ifdef HAVE_CURVE25519
         #include <wolfssl/wolfcrypt/curve25519.h>
     #endif /* HAVE_ECC */
+    #ifdef HAVE_ED448
+        #include <wolfssl/wolfcrypt/ed448.h>
+    #endif /* HAVE_ED448 */
+    #ifdef HAVE_CURVE448
+        #include <wolfssl/wolfcrypt/curve448.h>
+    #endif /* HAVE_ECC */
 #endif /*HAVE_PK_CALLBACKS */
 
 #ifdef USE_WINDOWS_API
@@ -52,18 +59,19 @@
     #include <string.h>
     #include "rl_net.h"
     #define SOCKET_T int
-        typedef int socklen_t ;
-        static unsigned long inet_addr(const char *cp)
+    typedef int socklen_t ;
+    #define inet_addr wolfSSL_inet_addr
+    static unsigned long wolfSSL_inet_addr(const char *cp)
     {
         unsigned int a[4] ; unsigned long ret ;
         sscanf(cp, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]) ;
         ret = ((a[3]<<24) + (a[2]<<16) + (a[1]<<8) + a[0]) ;
         return(ret) ;
     }
-        #if defined(HAVE_KEIL_RTX)
-        #define sleep(t) os_dly_wait(t/1000+1) ;
-    #elif defined (WOLFSSL_CMSIS_RTOS)
-        #define sleep(t)  osDelay(t/1000+1) ;
+    #if defined(HAVE_KEIL_RTX)
+        #define sleep(t) os_dly_wait(t/1000+1);
+    #elif defined(WOLFSSL_CMSIS_RTOS) || defined(WOLFSSL_CMSIS_RTOSv2)
+        #define sleep(t) osDelay(t/1000+1);
     #endif
 #elif defined(WOLFSSL_TIRTOS)
     #include <string.h>
@@ -93,6 +101,30 @@
     #include <netdb.h>
     #include <pthread.h>
     #define SOCKET_T int
+#elif defined(WOLFSSL_ZEPHYR)
+    #include <string.h>
+    #include <sys/types.h>
+    #include <net/socket.h>
+    #define SOCKET_T int
+    #define SOL_SOCKET 1
+    #define SO_REUSEADDR 201
+    #define WOLFSSL_USE_GETADDRINFO
+
+    static unsigned long inet_addr(const char *cp)
+    {
+        unsigned int a[4]; unsigned long ret;
+        int i, j;
+        for (i=0, j=0; i<4; i++) {
+            a[i] = 0;
+            while (cp[j] != '.' && cp[j] != '\0') {
+                a[i] *= 10;
+                a[i] += cp[j] - '0';
+                j++;
+            }
+        }
+        ret = ((a[3]<<24) + (a[2]<<16) + (a[1]<<8) + a[0]) ;
+        return(ret) ;
+    }
 #else
     #include <string.h>
     #include <sys/types.h>
@@ -130,9 +162,12 @@
     #pragma warning(disable:4244 4996)
 #endif
 
+#ifndef WOLFSSL_CIPHER_LIST_MAX_SIZE
+    #define WOLFSSL_CIPHER_LIST_MAX_SIZE 4096
+#endif
 /* Buffer for benchmark tests */
 #ifndef TEST_BUFFER_SIZE
-#define TEST_BUFFER_SIZE 16384
+    #define TEST_BUFFER_SIZE 16384
 #endif
 
 #ifndef WOLFSSL_HAVE_MIN
@@ -203,6 +238,10 @@
         typedef void          THREAD_RETURN;
         typedef Task_Handle   THREAD_TYPE;
         #define WOLFSSL_THREAD
+    #elif defined(WOLFSSL_ZEPHYR)
+        typedef void            THREAD_RETURN;
+        typedef struct k_thread THREAD_TYPE;
+        #define WOLFSSL_THREAD
     #else
         typedef unsigned int  THREAD_RETURN;
         typedef intptr_t      THREAD_TYPE;
@@ -236,6 +275,7 @@
 #define CLIENT_DTLS_DEFAULT_VERSION (-2)
 #define CLIENT_INVALID_VERSION (-99)
 #define CLIENT_DOWNGRADE_VERSION (-98)
+#define EITHER_DOWNGRADE_VERSION (-97)
 #if !defined(NO_FILESYSTEM) && defined(WOLFSSL_MAX_STRENGTH)
     #define DEFAULT_MIN_DHKEY_BITS 2048
     #define DEFAULT_MAX_DHKEY_BITS 3072
@@ -256,56 +296,70 @@
 
 /* all certs relative to wolfSSL home directory now */
 #if defined(WOLFSSL_NO_CURRDIR) || defined(WOLFSSL_MDK_SHELL)
-#define caCertFile     "certs/ca-cert.pem"
-#define eccCertFile    "certs/server-ecc.pem"
-#define eccKeyFile     "certs/ecc-key.pem"
-#define eccRsaCertFile "certs/server-ecc-rsa.pem"
-#define svrCertFile    "certs/server-cert.pem"
-#define svrKeyFile     "certs/server-key.pem"
-#define cliCertFile    "certs/client-cert.pem"
-#define cliCertDerFile "certs/client-cert.der"
-#define cliKeyFile     "certs/client-key.pem"
-#define ntruCertFile   "certs/ntru-cert.pem"
-#define ntruKeyFile    "certs/ntru-key.raw"
-#define dhParamFile    "certs/dh2048.pem"
-#define cliEccKeyFile  "certs/ecc-client-key.pem"
-#define cliEccCertFile "certs/client-ecc-cert.pem"
-#define caEccCertFile  "certs/ca-ecc-cert/pem"
-#define crlPemDir      "certs/crl"
-#define edCertFile     "certs/ed25519/server-ed25519-cert.pem"
-#define edKeyFile      "certs/ed25519/server-ed25519-priv.pem"
-#define cliEdCertFile  "certs/ed25519/client-ed25519.pem"
-#define cliEdKeyFile   "certs/ed25519/client-ed25519-priv.pem"
-#define caEdCertFile   "certs/ed25519/ca-ed25519.pem"
+#define caCertFile        "certs/ca-cert.pem"
+#define eccCertFile       "certs/server-ecc.pem"
+#define eccKeyFile        "certs/ecc-key.pem"
+#define eccRsaCertFile    "certs/server-ecc-rsa.pem"
+#define svrCertFile       "certs/server-cert.pem"
+#define svrKeyFile        "certs/server-key.pem"
+#define cliCertFile       "certs/client-cert.pem"
+#define cliCertDerFile    "certs/client-cert.der"
+#define cliCertFileExt    "certs/client-cert-ext.pem"
+#define cliCertDerFileExt "certs/client-cert-ext.der"
+#define cliKeyFile        "certs/client-key.pem"
+#define ntruCertFile      "certs/ntru-cert.pem"
+#define ntruKeyFile       "certs/ntru-key.raw"
+#define dhParamFile       "certs/dh2048.pem"
+#define cliEccKeyFile     "certs/ecc-client-key.pem"
+#define cliEccCertFile    "certs/client-ecc-cert.pem"
+#define caEccCertFile     "certs/ca-ecc-cert.pem"
+#define crlPemDir         "certs/crl"
+#define edCertFile        "certs/ed25519/server-ed25519-cert.pem"
+#define edKeyFile         "certs/ed25519/server-ed25519-priv.pem"
+#define cliEdCertFile     "certs/ed25519/client-ed25519.pem"
+#define cliEdKeyFile      "certs/ed25519/client-ed25519-priv.pem"
+#define caEdCertFile      "certs/ed25519/ca-ed25519.pem"
+#define ed448CertFile     "certs/ed448/server-ed448-cert.pem"
+#define ed448KeyFile      "certs/ed448/server-ed448-priv.pem"
+#define cliEd448CertFile  "certs/ed448/client-ed448.pem"
+#define cliEd448KeyFile   "certs/ed448/client-ed448-priv.pem"
+#define caEd448CertFile   "certs/ed448/ca-ed448.pem"
 #ifdef HAVE_WNR
     /* Whitewood netRandom default config file */
-    #define wnrConfig  "wnr-example.conf"
+    #define wnrConfig     "wnr-example.conf"
 #endif
 #else
-#define caCertFile     "./certs/ca-cert.pem"
-#define eccCertFile    "./certs/server-ecc.pem"
-#define eccKeyFile     "./certs/ecc-key.pem"
-#define eccRsaCertFile "./certs/server-ecc-rsa.pem"
-#define svrCertFile    "./certs/server-cert.pem"
-#define svrKeyFile     "./certs/server-key.pem"
-#define cliCertFile    "./certs/client-cert.pem"
-#define cliCertDerFile "./certs/client-cert.der"
-#define cliKeyFile     "./certs/client-key.pem"
-#define ntruCertFile   "./certs/ntru-cert.pem"
-#define ntruKeyFile    "./certs/ntru-key.raw"
-#define dhParamFile    "./certs/dh2048.pem"
-#define cliEccKeyFile  "./certs/ecc-client-key.pem"
-#define cliEccCertFile "./certs/client-ecc-cert.pem"
-#define caEccCertFile  "./certs/ca-ecc-cert.pem"
-#define crlPemDir      "./certs/crl"
-#define edCertFile     "./certs/ed25519/server-ed25519.pem"
-#define edKeyFile      "./certs/ed25519/server-ed25519-priv.pem"
-#define cliEdCertFile  "./certs/ed25519/client-ed25519.pem"
-#define cliEdKeyFile   "./certs/ed25519/client-ed25519-priv.pem"
-#define caEdCertFile   "./certs/ed25519/root-ed25519.pem"
+#define caCertFile        "./certs/ca-cert.pem"
+#define eccCertFile       "./certs/server-ecc.pem"
+#define eccKeyFile        "./certs/ecc-key.pem"
+#define eccRsaCertFile    "./certs/server-ecc-rsa.pem"
+#define svrCertFile       "./certs/server-cert.pem"
+#define svrKeyFile        "./certs/server-key.pem"
+#define cliCertFile       "./certs/client-cert.pem"
+#define cliCertDerFile    "./certs/client-cert.der"
+#define cliCertFileExt    "./certs/client-cert-ext.pem"
+#define cliCertDerFileExt "./certs/client-cert-ext.der"
+#define cliKeyFile        "./certs/client-key.pem"
+#define ntruCertFile      "./certs/ntru-cert.pem"
+#define ntruKeyFile       "./certs/ntru-key.raw"
+#define dhParamFile       "./certs/dh2048.pem"
+#define cliEccKeyFile     "./certs/ecc-client-key.pem"
+#define cliEccCertFile    "./certs/client-ecc-cert.pem"
+#define caEccCertFile     "./certs/ca-ecc-cert.pem"
+#define crlPemDir         "./certs/crl"
+#define edCertFile        "./certs/ed25519/server-ed25519-cert.pem"
+#define edKeyFile         "./certs/ed25519/server-ed25519-priv.pem"
+#define cliEdCertFile     "./certs/ed25519/client-ed25519.pem"
+#define cliEdKeyFile      "./certs/ed25519/client-ed25519-priv.pem"
+#define caEdCertFile      "./certs/ed25519/ca-ed25519.pem"
+#define ed448CertFile     "./certs/ed448/server-ed448-cert.pem"
+#define ed448KeyFile      "./certs/ed448/server-ed448-priv.pem"
+#define cliEd448CertFile  "./certs/ed448/client-ed448.pem"
+#define cliEd448KeyFile   "./certs/ed448/client-ed448-priv.pem"
+#define caEd448CertFile   "./certs/ed448/ca-ed448.pem"
 #ifdef HAVE_WNR
     /* Whitewood netRandom default config file */
-    #define wnrConfig  "./wnr-example.conf"
+    #define wnrConfig     "./wnr-example.conf"
 #endif
 #endif
 
@@ -354,6 +408,7 @@
     ctx_callback ctx_ready;
     ssl_callback ssl_ready;
     ssl_callback on_result;
+    WOLFSSL_CTX* ctx;
 } callback_functions;
 
 typedef struct func_args {
@@ -369,7 +424,11 @@
 
 void wait_tcp_ready(func_args*);
 
+#ifdef WOLFSSL_ZEPHYR
+typedef void THREAD_FUNC(void*, void*, void*);
+#else
 typedef THREAD_RETURN WOLFSSL_THREAD THREAD_FUNC(void*);
+#endif
 
 void start_thread(THREAD_FUNC, func_args*, THREAD_TYPE*);
 void join_thread(THREAD_TYPE);
@@ -383,7 +442,34 @@
 static const word16      wolfSSLPort = 11111;
 
 
-static WC_INLINE WC_NORETURN void err_sys(const char* msg)
+
+#ifndef MY_EX_USAGE
+#define MY_EX_USAGE 2
+#endif
+
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+#if defined(WOLFSSL_FORCE_MALLOC_FAIL_TEST) || defined(WOLFSSL_ZEPHYR)
+    #ifndef EXIT_SUCCESS
+        #define EXIT_SUCCESS   0
+    #endif
+    #define XEXIT(rc)   return rc
+    #define XEXIT_T(rc) return (THREAD_RETURN)rc
+#else
+    #define XEXIT(rc)   exit((int)(rc))
+    #define XEXIT_T(rc) exit((int)(rc))
+#endif
+
+
+static WC_INLINE
+#if defined(WOLFSSL_FORCE_MALLOC_FAIL_TEST) || defined(WOLFSSL_ZEPHYR)
+THREAD_RETURN
+#else
+WC_NORETURN void
+#endif
+err_sys(const char* msg)
 {
     printf("wolfSSL error: %s\n", msg);
 
@@ -397,13 +483,11 @@
     if (msg)
 #endif
     {
-        exit(EXIT_FAILURE);
+        XEXIT_T(EXIT_FAILURE);
     }
 }
 
 
-#define MY_EX_USAGE 2
-
 extern int   myoptind;
 extern char* myoptarg;
 
@@ -414,6 +498,13 @@
     char  c;
     char* cp;
 
+    /* Added sanity check becuase scan-build complains argv[myoptind] access 
+     * results in a null pointer dereference. */
+    if (argv == NULL)  {
+        myoptarg = NULL;
+        return -1;
+    }
+
     if (myoptind == 0)
         next = NULL;   /* we're starting new/over */
 
@@ -421,8 +512,8 @@
         if (myoptind == 0)
             myoptind++;
 
-        if (myoptind >= argc || argv[myoptind][0] != '-' ||
-                                argv[myoptind][1] == '\0') {
+        if (myoptind >= argc || argv[myoptind] == NULL ||
+                argv[myoptind][0] != '-' || argv[myoptind][1] == '\0') {
             myoptarg = NULL;
             if (myoptind < argc)
                 myoptarg = argv[myoptind];
@@ -489,10 +580,59 @@
 
 #endif
 
-
-#if defined(KEEP_PEER_CERT) || defined(SESSION_CERTS)
-
-static WC_INLINE void ShowX509(WOLFSSL_X509* x509, const char* hdr)
+static const char* client_showpeer_msg[][8] = {
+    /* English */
+    {
+        "SSL version is",
+        "SSL cipher suite is",
+        "SSL curve name is",
+        "SSL DH size is",
+        "SSL reused session",
+        "Alternate cert chain used",
+        "peer's cert info:",
+        NULL
+    },
+#ifndef NO_MULTIBYTE_PRINT
+    /* Japanese */
+    {
+        "SSL バージョンは",
+        "SSL 暗号スイートは",
+        "SSL 曲線名は",
+        "SSL DH サイズは",
+        "SSL 再利用セッション",
+        "代替証明チェーンを使用",
+        "相手方証明書情報",
+        NULL
+    },
+#endif
+};
+
+#if defined(KEEP_PEER_CERT) || defined(KEEP_OUR_CERT) || defined(SESSION_CERTS)
+static const char* client_showx509_msg[][5] = {
+    /* English */
+    {
+        "issuer",
+        "subject",
+        "altname",
+        "serial number",
+        NULL
+    },
+#ifndef NO_MULTIBYTE_PRINT
+    /* Japanese */
+    {
+        "発行者",
+        "サブジェクト",
+        "代替名",
+        "シリアル番号",
+        NULL
+    },
+#endif
+};
+
+/* lng_index is to specify the language for displaying message.              */
+/* 0:English, 1:Japanese                                                     */
+static WC_INLINE void ShowX509Ex(WOLFSSL_X509* x509, const char* hdr,
+                                                                 int lng_index)
 {
     char* altName;
     char* issuer;
@@ -500,6 +640,7 @@
     byte  serial[32];
     int   ret;
     int   sz = sizeof(serial);
+    const char** words = client_showx509_msg[lng_index];
 
     if (x509 == NULL) {
         printf("%s No Cert\n", hdr);
@@ -511,10 +652,10 @@
     subject = wolfSSL_X509_NAME_oneline(
                                      wolfSSL_X509_get_subject_name(x509), 0, 0);
 
-    printf("%s\n issuer : %s\n subject: %s\n", hdr, issuer, subject);
+    printf("%s\n %s : %s\n %s: %s\n", hdr, words[0], issuer, words[1], subject);
 
     while ( (altName = wolfSSL_X509_get_next_altname(x509)) != NULL)
-        printf(" altname = %s\n", altName);
+        printf(" %s = %s\n", words[2], altName);
 
     ret = wolfSSL_X509_get_serial_number(x509, serial, &sz);
     if (ret == WOLFSSL_SUCCESS) {
@@ -524,7 +665,7 @@
 
         /* testsuite has multiple threads writing to stdout, get output
            message ready to write once */
-        strLen = sprintf(serialMsg, " serial number");
+        strLen = sprintf(serialMsg, " %s", words[3]);
         for (i = 0; i < sz; i++)
             sprintf(serialMsg + strLen + (i*3), ":%02x ", serial[i]);
         printf("%s\n", serialMsg);
@@ -533,13 +674,12 @@
     XFREE(subject, 0, DYNAMIC_TYPE_OPENSSL);
     XFREE(issuer,  0, DYNAMIC_TYPE_OPENSSL);
 
-#if defined(OPENSSL_EXTRA) && defined(SHOW_CERTS)
+#if defined(SHOW_CERTS) && defined(OPENSSL_EXTRA)
     {
         WOLFSSL_BIO* bio;
         char buf[256]; /* should be size of ASN_NAME_MAX */
         int  textSz;
 
-
         /* print out domain component if certificate has it */
         textSz = wolfSSL_X509_NAME_get_text_by_NID(
                 wolfSSL_X509_get_subject_name(x509), NID_domainComponent,
@@ -555,12 +695,18 @@
             wolfSSL_BIO_free(bio);
         }
     }
-#endif
+#endif /* SHOW_CERTS && OPENSSL_EXTRA */
 }
-
-#endif /* KEEP_PEER_CERT || SESSION_CERTS */
-
-#if defined(SESSION_CERTS) && defined(SHOW_CERTS)
+/* original ShowX509 to maintain compatibility */
+static WC_INLINE void ShowX509(WOLFSSL_X509* x509, const char* hdr)
+{
+    ShowX509Ex(x509, hdr, 0);
+}
+
+#endif /* KEEP_PEER_CERT || KEEP_OUR_CERT || SESSION_CERTS */
+
+#if defined(SHOW_CERTS) && defined(SESSION_CERTS) && \
+    (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
 static WC_INLINE void ShowX509Chain(WOLFSSL_X509_CHAIN* chain, int count,
     const char* hdr)
 {
@@ -582,12 +728,17 @@
         wolfSSL_FreeX509(chainX509);
     }
 }
-#endif
-
-static WC_INLINE void showPeer(WOLFSSL* ssl)
+#endif /* SHOW_CERTS && SESSION_CERTS */
+
+/* lng_index is to specify the language for displaying message.              */
+/* 0:English, 1:Japanese                                                     */
+static WC_INLINE void showPeerEx(WOLFSSL* ssl, int lng_index)
 {
     WOLFSSL_CIPHER* cipher;
-#ifdef HAVE_ECC
+    const char** words = client_showpeer_msg[lng_index];
+
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448) || \
+                                                                 !defined(NO_DH)
     const char *name;
 #endif
 #ifndef NO_DH
@@ -596,40 +747,43 @@
 #ifdef KEEP_PEER_CERT
     WOLFSSL_X509* peer = wolfSSL_get_peer_certificate(ssl);
     if (peer)
-        ShowX509(peer, "peer's cert info:");
+        ShowX509Ex(peer, words[6], lng_index);
     else
         printf("peer has no cert!\n");
     wolfSSL_FreeX509(peer);
 #endif
-#if defined(SHOW_CERTS) && defined(OPENSSL_EXTRA) && defined(KEEP_OUR_CERT)
+#if defined(SHOW_CERTS) && defined(KEEP_OUR_CERT) && \
+    (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
     ShowX509(wolfSSL_get_certificate(ssl), "our cert info:");
     printf("Peer verify result = %lu\n", wolfSSL_get_verify_result(ssl));
-#endif /* SHOW_CERTS */
-    printf("SSL version is %s\n", wolfSSL_get_version(ssl));
+#endif /* SHOW_CERTS && KEEP_OUR_CERT */
+    printf("%s %s\n", words[0], wolfSSL_get_version(ssl));
 
     cipher = wolfSSL_get_current_cipher(ssl);
 #ifdef HAVE_QSH
-    printf("SSL cipher suite is %s%s\n", (wolfSSL_isQSH(ssl))? "QSH:": "",
+    printf("%s %s%s\n", words[1], (wolfSSL_isQSH(ssl))? "QSH:": "",
             wolfSSL_CIPHER_get_name(cipher));
 #else
-    printf("SSL cipher suite is %s\n", wolfSSL_CIPHER_get_name(cipher));
+    printf("%s %s\n", words[1], wolfSSL_CIPHER_get_name(cipher));
 #endif
-#ifdef HAVE_ECC
+#if defined(HAVE_ECC) || defined(HAVE_CURVE25519) || defined(HAVE_CURVE448) || \
+                                                                 !defined(NO_DH)
     if ((name = wolfSSL_get_curve_name(ssl)) != NULL)
-        printf("SSL curve name is %s\n", name);
+        printf("%s %s\n", words[2], name);
 #endif
 #ifndef NO_DH
-    if ((bits = wolfSSL_GetDhKey_Sz(ssl)) > 0)
-        printf("SSL DH size is %d bits\n", bits);
+    else if ((bits = wolfSSL_GetDhKey_Sz(ssl)) > 0)
+        printf("%s %d bits\n", words[3], bits);
 #endif
     if (wolfSSL_session_reused(ssl))
-        printf("SSL reused session\n");
+        printf("%s\n", words[4]);
 #ifdef WOLFSSL_ALT_CERT_CHAINS
     if (wolfSSL_is_peer_alt_cert_chain(ssl))
-        printf("Alternate cert chain used\n");
+        printf("%s\n", words[5]);
 #endif
 
-#if defined(SESSION_CERTS) && defined(SHOW_CERTS)
+#if defined(SHOW_CERTS) && defined(SESSION_CERTS) && \
+    (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
     {
         WOLFSSL_X509_CHAIN* chain;
 
@@ -643,10 +797,14 @@
         }
     #endif
     }
-#endif /* SESSION_CERTS && SHOW_CERTS */
+#endif /* SHOW_CERTS && SESSION_CERTS */
   (void)ssl;
 }
-
+/* original showPeer to maintain compatibility */
+static WC_INLINE void showPeer(WOLFSSL* ssl)
+{
+    showPeerEx(ssl, 0);
+}
 
 static WC_INLINE void build_addr(SOCKADDR_IN_T* addr, const char* peer,
                               word16 port, int udp, int sctp)
@@ -664,6 +822,7 @@
 #ifndef TEST_IPV6
     /* peer could be in human readable form */
     if ( ((size_t)peer != INADDR_ANY) && isalpha((int)peer[0])) {
+    #ifndef WOLFSSL_USE_GETADDRINFO
         #if defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET)
             int err;
             struct hostent* entry = gethostbyname(peer, &err);
@@ -680,6 +839,19 @@
                    entry->h_length);
             useLookup = 1;
         }
+    #else
+        struct zsock_addrinfo hints, *addrInfo;
+        char portStr[6];
+        XSNPRINTF(portStr, sizeof(portStr), "%d", port);
+        memset(&hints, 0, sizeof(hints));
+        hints.ai_family = AF_UNSPEC;
+        hints.ai_socktype = udp ? SOCK_DGRAM : SOCK_STREAM;
+        hints.ai_protocol = udp ? IPPROTO_UDP : IPPROTO_TCP;
+        if (getaddrinfo((char*)peer, portStr, &hints, &addrInfo) == 0) {
+            XMEMCPY(addr, addrInfo->ai_addr, sizeof(*addr));
+            useLookup = 1;
+        }
+    #endif
         else
             err_sys("no entry for host");
     }
@@ -706,7 +878,7 @@
         addr->sin6_addr = in6addr_any;
     }
     else {
-        #ifdef HAVE_GETADDRINFO
+        #if defined(HAVE_GETADDRINFO) || defined(WOLF_C99)
             struct addrinfo  hints;
             struct addrinfo* answer = NULL;
             int    ret;
@@ -775,7 +947,7 @@
             err_sys("setsockopt SO_NOSIGPIPE failed\n");
     }
 #elif defined(WOLFSSL_MDK_ARM) || defined (WOLFSSL_TIRTOS) ||\
-                                          defined(WOLFSSL_KEIL_TCP_NET)
+                        defined(WOLFSSL_KEIL_TCP_NET) || defined(WOLFSSL_ZEPHYR)
     /* nothing to define */
 #else  /* no S_NOSIGPIPE */
     signal(SIGPIPE, SIG_IGN);
@@ -822,15 +994,18 @@
     TEST_SELECT_FAIL,
     TEST_TIMEOUT,
     TEST_RECV_READY,
+    TEST_SEND_READY,
     TEST_ERROR_READY
 };
 
 
 #if !defined(WOLFSSL_MDK_ARM) && !defined(WOLFSSL_KEIL_TCP_NET) && \
                                  !defined(WOLFSSL_TIRTOS)
-static WC_INLINE int tcp_select(SOCKET_T socketfd, int to_sec)
+static WC_INLINE int tcp_select_ex(SOCKET_T socketfd, int to_sec, int rx)
 {
-    fd_set recvfds, errfds;
+    fd_set fds, errfds;
+    fd_set* recvfds = NULL;
+    fd_set* sendfds = NULL;
     SOCKET_T nfds = socketfd + 1;
 #if !defined(__INTEGRITY)
     struct timeval timeout = {(to_sec > 0) ? to_sec : 0, 0};
@@ -839,32 +1014,56 @@
 #endif
     int result;
 
-    FD_ZERO(&recvfds);
-    FD_SET(socketfd, &recvfds);
+    FD_ZERO(&fds);
+    FD_SET(socketfd, &fds);
     FD_ZERO(&errfds);
     FD_SET(socketfd, &errfds);
 
+    if (rx)
+        recvfds = &fds;
+    else
+        sendfds = &fds;
+
 #if defined(__INTEGRITY)
     timeout.tv_sec = (long long)(to_sec > 0) ? to_sec : 0, 0;
 #endif
-    result = select(nfds, &recvfds, NULL, &errfds, &timeout);
+    result = select(nfds, recvfds, sendfds, &errfds, &timeout);
 
     if (result == 0)
         return TEST_TIMEOUT;
     else if (result > 0) {
-        if (FD_ISSET(socketfd, &recvfds))
-            return TEST_RECV_READY;
+        if (FD_ISSET(socketfd, &fds)) {
+            if (rx)
+                return TEST_RECV_READY;
+            else
+                return TEST_SEND_READY;
+        }
         else if(FD_ISSET(socketfd, &errfds))
             return TEST_ERROR_READY;
     }
 
     return TEST_SELECT_FAIL;
 }
+
+static WC_INLINE int tcp_select(SOCKET_T socketfd, int to_sec)
+{
+    return tcp_select_ex(socketfd, to_sec, 1);
+}
+
+static WC_INLINE int tcp_select_tx(SOCKET_T socketfd, int to_sec)
+{
+    return tcp_select_ex(socketfd, to_sec, 0);
+}
+
 #elif defined(WOLFSSL_TIRTOS) || defined(WOLFSSL_KEIL_TCP_NET)
 static WC_INLINE int tcp_select(SOCKET_T socketfd, int to_sec)
 {
     return TEST_RECV_READY;
 }
+static WC_INLINE int tcp_select_tx(SOCKET_T socketfd, int to_sec)
+{
+    return TEST_SEND_READY;
+}
 #endif /* !WOLFSSL_MDK_ARM */
 
 
@@ -879,7 +1078,7 @@
     tcp_socket(sockfd, udp, sctp);
 
 #if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_MDK_ARM)\
-                              && !defined(WOLFSSL_KEIL_TCP_NET)
+                   && !defined(WOLFSSL_KEIL_TCP_NET) && !defined(WOLFSSL_ZEPHYR)
     {
         int       res, on  = 1;
         socklen_t len = sizeof(on);
@@ -900,7 +1099,8 @@
         if (listen(*sockfd, SOCK_LISTEN_MAX_QUEUE) != 0)
                 err_sys("tcp listen failed");
     }
-    #if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_TIRTOS)
+    #if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_TIRTOS) \
+                                                     && !defined(WOLFSSL_ZEPHYR)
         if (*port == 0) {
             socklen_t len = sizeof(addr);
             if (getsockname(*sockfd, (struct sockaddr*)&addr, &len) == 0) {
@@ -948,7 +1148,7 @@
 
 
 #if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_MDK_ARM) \
-                              && !defined(WOLFSSL_KEIL_TCP_NET)
+                   && !defined(WOLFSSL_KEIL_TCP_NET) && !defined(WOLFSSL_ZEPHYR)
     {
         int       res, on  = 1;
         socklen_t len = sizeof(on);
@@ -989,6 +1189,8 @@
     tcp_ready* ready = args->signal;
     ready->ready = 1;
     ready->port = port;
+#else
+    (void)port;
 #endif
 
     *clientfd = *sockfd;
@@ -1035,12 +1237,12 @@
 
         if (ready_file) {
         #if !defined(NO_FILESYSTEM) || defined(FORCE_BUFFER_TEST)
-            FILE* srf = NULL;
+            XFILE srf = NULL;
             if (args)
                 ready = args->signal;
 
             if (ready) {
-                srf = fopen(ready->srfName, "w");
+                srf = XFOPEN(ready->srfName, "w");
 
                 if (srf) {
                     /* let's write port sever is listening on to ready file
@@ -1072,7 +1274,8 @@
         if (ret == SOCKET_ERROR)
             err_sys("ioctlsocket failed");
     #elif defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET) \
-        || defined (WOLFSSL_TIRTOS)|| defined(WOLFSSL_VXWORKS)
+        || defined (WOLFSSL_TIRTOS)|| defined(WOLFSSL_VXWORKS) \
+        || defined(WOLFSSL_ZEPHYR)
          /* non blocking not supported, for now */
     #else
         int flags = fcntl(*sockfd, F_GETFL, 0);
@@ -1109,7 +1312,6 @@
         key[2] = 0x3c;
         key[3] = 0x4d;
 
-
         return 4;   /* length of key in octets or 0 for error */
     }
     else {
@@ -1145,7 +1347,6 @@
         key[2] = 0x3c;
         key[3] = 0x4d;
 
-
         return 4;   /* length of key in octets or 0 for error */
     }
     else {
@@ -1162,6 +1363,58 @@
     }
 }
 
+
+static WC_INLINE unsigned int my_psk_client_tls13_cb(WOLFSSL* ssl,
+        const char* hint, char* identity, unsigned int id_max_len,
+        unsigned char* key, unsigned int key_max_len, const char** ciphersuite)
+{
+    int i;
+    int b = 0x01;
+
+    (void)ssl;
+    (void)hint;
+    (void)key_max_len;
+
+    /* see internal.h MAX_PSK_ID_LEN for PSK identity limit */
+    strncpy(identity, kIdentityStr, id_max_len);
+
+    for (i = 0; i < 32; i++, b += 0x22) {
+        if (b >= 0x100)
+            b = 0x01;
+        key[i] = b;
+    }
+
+    *ciphersuite = "TLS13-AES128-GCM-SHA256";
+
+    return 32;   /* length of key in octets or 0 for error */
+}
+
+
+static WC_INLINE unsigned int my_psk_server_tls13_cb(WOLFSSL* ssl,
+        const char* identity, unsigned char* key, unsigned int key_max_len,
+        const char** ciphersuite)
+{
+    int i;
+    int b = 0x01;
+
+    (void)ssl;
+    (void)key_max_len;
+
+    /* see internal.h MAX_PSK_ID_LEN for PSK identity limit */
+    if (strncmp(identity, kIdentityStr, strlen(kIdentityStr)) != 0)
+        return 0;
+
+    for (i = 0; i < 32; i++, b += 0x22) {
+        if (b >= 0x100)
+            b = 0x01;
+        key[i] = b;
+    }
+
+    *ciphersuite = "TLS13-AES128-GCM-SHA256";
+
+    return 32;   /* length of key in octets or 0 for error */
+}
+
 #endif /* NO_PSK */
 
 
@@ -1193,6 +1446,8 @@
 
 #elif defined(WOLFSSL_TIRTOS)
     extern double current_time();
+#elif defined(WOLFSSL_ZEPHYR)
+    extern double current_time();
 #else
 
 #if !defined(WOLFSSL_MDK_ARM) && !defined(WOLFSSL_KEIL_TCP_NET) && !defined(WOLFSSL_CHIBIOS)
@@ -1257,7 +1512,7 @@
     {
         int ret;
         long int fileSz;
-        FILE* file;
+        XFILE file;
 
         if (fname == NULL || buf == NULL || bufLen == NULL)
             return BAD_FUNC_ARG;
@@ -1267,7 +1522,7 @@
         *bufLen = 0;
 
         /* open file (read-only binary) */
-        file = fopen(fname, "rb");
+        file = XFOPEN(fname, "rb");
         if (!file) {
             printf("Error loading %s\n", fname);
             return BAD_PATH_ERROR;
@@ -1345,6 +1600,48 @@
             free(buff);
     }
 
+    static WC_INLINE void load_ssl_buffer(WOLFSSL* ssl, const char* fname, int type)
+    {
+        int format = WOLFSSL_FILETYPE_PEM;
+        byte* buff = NULL;
+        size_t sz = 0;
+
+        if (load_file(fname, &buff, &sz) != 0) {
+            err_sys("can't open file for buffer load "
+                    "Please run from wolfSSL home directory if not");
+        }
+
+        /* determine format */
+        if (strstr(fname, ".der"))
+            format = WOLFSSL_FILETYPE_ASN1;
+
+        if (type == WOLFSSL_CA) {
+            /* verify certs (CA's) use the shared ctx->cm (WOLFSSL_CERT_MANAGER) */
+            WOLFSSL_CTX* ctx = wolfSSL_get_SSL_CTX(ssl);
+            if (wolfSSL_CTX_load_verify_buffer(ctx, buff, (long)sz, format)
+                                              != WOLFSSL_SUCCESS)
+                err_sys("can't load buffer ca file");
+        }
+        else if (type == WOLFSSL_CERT) {
+            if (wolfSSL_use_certificate_buffer(ssl, buff, (long)sz,
+                        format) != WOLFSSL_SUCCESS)
+                err_sys("can't load buffer cert file");
+        }
+        else if (type == WOLFSSL_KEY) {
+            if (wolfSSL_use_PrivateKey_buffer(ssl, buff, (long)sz,
+                        format) != WOLFSSL_SUCCESS)
+                err_sys("can't load buffer key file");
+        }
+        else if (type == WOLFSSL_CERT_CHAIN) {
+            if (wolfSSL_use_certificate_chain_buffer_format(ssl, buff,
+                    (long)sz, format) != WOLFSSL_SUCCESS)
+                err_sys("can't load cert chain buffer");
+        }
+
+        if (buff)
+            free(buff);
+    }
+
     #ifdef TEST_PK_PRIVKEY
     static WC_INLINE int load_key_file(const char* fname, byte** derBuf, word32* derLen)
     {
@@ -1378,17 +1675,47 @@
     #endif /* !NO_FILESYSTEM || (NO_FILESYSTEM && FORCE_BUFFER_TEST) */
 #endif /* !NO_CERTS */
 
+static int myVerifyFail = 0;
+
+/* The verify callback is called for every certificate only when
+ * --enable-opensslextra is defined because it sets WOLFSSL_ALWAYS_VERIFY_CB and
+ * WOLFSSL_VERIFY_CB_ALL_CERTS.
+ * Normal cases of the verify callback only occur on certificate failures when the
+ * wolfSSL_set_verify(ssl, SSL_VERIFY_PEER, myVerifyCb); is called
+*/
+
 static WC_INLINE int myVerify(int preverify, WOLFSSL_X509_STORE_CTX* store)
 {
     char buffer[WOLFSSL_MAX_ERROR_SZ];
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     WOLFSSL_X509* peer;
+#if defined(SHOW_CERTS) && !defined(NO_FILESYSTEM)
+    WOLFSSL_BIO* bio = NULL;
+    WOLFSSL_STACK* sk = NULL;
+    X509* x509 = NULL;
+    int i = 0;
+#endif
 #endif
     (void)preverify;
 
+    /* Verify Callback Arguments:
+     * preverify:           1=Verify Okay, 0=Failure
+     * store->error:        Failure error code (0 indicates no failure)
+     * store->current_cert: Current WOLFSSL_X509 object (only with OPENSSL_EXTRA)
+     * store->error_depth:  Current Index
+     * store->domain:       Subject CN as string (null term)
+     * store->totalCerts:   Number of certs presented by peer
+     * store->certs[i]:     A `WOLFSSL_BUFFER_INFO` with plain DER for each cert
+     * store->store:        WOLFSSL_X509_STORE with CA cert chain
+     * store->store->cm:    WOLFSSL_CERT_MANAGER
+     * store->ex_data:      The WOLFSSL object pointer
+     * store->discardSessionCerts: When set to non-zero value session certs
+        will be discarded (only with SESSION_CERTS)
+     */
+
     printf("In verification callback, error = %d, %s\n", store->error,
                                  wolfSSL_ERR_error_string(store->error, buffer));
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
     peer = store->current_cert;
     if (peer) {
         char* issuer  = wolfSSL_X509_NAME_oneline(
@@ -1399,6 +1726,24 @@
                                                                   subject);
         XFREE(subject, 0, DYNAMIC_TYPE_OPENSSL);
         XFREE(issuer,  0, DYNAMIC_TYPE_OPENSSL);
+#if defined(SHOW_CERTS) && !defined(NO_FILESYSTEM)
+/* avoid printing duplicate certs */
+        if (store->depth == 1) {
+            /* retrieve x509 certs and display them on stdout */
+            sk = wolfSSL_X509_STORE_GetCerts(store);
+
+            for (i = 0; i < wolfSSL_sk_X509_num(sk); i++) {
+                x509 = wolfSSL_sk_X509_value(sk, i);
+                bio = wolfSSL_BIO_new(wolfSSL_BIO_s_file());
+                if (bio != NULL) {
+                    wolfSSL_BIO_set_fp(bio, stdout, BIO_NOCLOSE);
+                    wolfSSL_X509_print(bio, x509);
+                    wolfSSL_BIO_free(bio);
+                }
+            }
+            wolfSSL_sk_X509_free(sk);
+        }
+#endif
     }
     else
         printf("\tPeer has no cert!\n");
@@ -1411,12 +1756,23 @@
             printf("\t\tCert %d: Ptr %p, Len %u\n", i, cert->buffer, cert->length);
         }
     }
-    #endif
-#endif
-
-    printf("\tSubject's domain name is %s\n", store->domain);
-
-    printf("\tAllowing to continue anyway (shouldn't do this, EVER!!!)\n");
+    #endif /* SHOW_CERTS */
+#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
+
+    printf("\tSubject's domain name at %d is %s\n", store->error_depth, store->domain);
+
+    /* Testing forced fail case by return zero */
+    if (myVerifyFail) {
+        return 0; /* test failure case */
+    }
+
+    /* If error indicate we are overriding it for testing purposes */
+    if (store->error != 0) {
+        printf("\tAllowing failed certificate check, testing only "
+            "(shouldn't do this in production)\n");
+    }
+
+    /* A non-zero return code indicates failure override */
     return 1;
 }
 
@@ -1560,9 +1916,9 @@
     {
         #if !defined(NO_FILESYSTEM) || defined(FORCE_BUFFER_TEST)
             int depth, res;
-            FILE* file;
+            XFILE file;
             for(depth = 0; depth <= MAX_WOLF_ROOT_DEPTH; depth++) {
-                file = fopen(ntruKeyFile, "rb");
+                file = XFOPEN(ntruKeyFile, "rb");
                 if (file != NULL) {
                     fclose(file);
                     return depth;
@@ -1596,7 +1952,7 @@
     int            ret, i, used;
     void*          status;
     unsigned char* myStack = NULL;
-    int            stackSize = 1024*128;
+    int            stackSize = 1024*152;
     pthread_attr_t myAttr;
     pthread_t      threadId;
 
@@ -1682,7 +2038,7 @@
 #endif /* STACK_TRAP */
 
 
-#ifdef ATOMIC_USER
+#if defined(ATOMIC_USER) && !defined(WOLFSSL_AEAD_ONLY)
 
 /* Atomic Encrypt Context example */
 typedef struct AtomicEncCtx {
@@ -1723,6 +2079,9 @@
     /* hmac, not needed if aead mode */
     wolfSSL_SetTlsHmacInner(ssl, myInner, macInSz, macContent, macVerify);
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return ret;
     ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
                wolfSSL_GetMacSecret(ssl, macVerify), wolfSSL_GetHmacSize(ssl));
     if (ret != 0)
@@ -1840,6 +2199,9 @@
 
     wolfSSL_SetTlsHmacInner(ssl, myInner, macInSz, macContent, macVerify);
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return ret;
     ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
                wolfSSL_GetMacSecret(ssl, macVerify), digestSz);
     if (ret != 0)
@@ -1863,6 +2225,162 @@
     return ret;
 }
 
+#if defined(HAVE_ENCRYPT_THEN_MAC)
+
+static WC_INLINE int myEncryptMacCb(WOLFSSL* ssl, unsigned char* macOut,
+       int content, int macVerify, unsigned char* encOut,
+       const unsigned char* encIn, unsigned int encSz, void* ctx)
+{
+    int  ret;
+    Hmac hmac;
+    AtomicEncCtx* encCtx = (AtomicEncCtx*)ctx;
+    byte myInner[WOLFSSL_TLS_HMAC_INNER_SZ];
+    const char* tlsStr = "TLS";
+
+    /* example supports (d)tls aes */
+    if (wolfSSL_GetBulkCipher(ssl) != wolfssl_aes) {
+        printf("myMacEncryptCb not using AES\n");
+        return -1;
+    }
+
+    if (strstr(wolfSSL_get_version(ssl), tlsStr) == NULL) {
+        printf("myMacEncryptCb not using (D)TLS\n");
+        return -1;
+    }
+
+    /* encrypt setup on first time */
+    if (encCtx->keySetup == 0) {
+        int   keyLen = wolfSSL_GetKeySize(ssl);
+        const byte* key;
+        const byte* iv;
+
+        if (wolfSSL_GetSide(ssl) == WOLFSSL_CLIENT_END) {
+            key = wolfSSL_GetClientWriteKey(ssl);
+            iv  = wolfSSL_GetClientWriteIV(ssl);
+        }
+        else {
+            key = wolfSSL_GetServerWriteKey(ssl);
+            iv  = wolfSSL_GetServerWriteIV(ssl);
+        }
+
+        ret = wc_AesSetKey(&encCtx->aes, key, keyLen, iv, AES_ENCRYPTION);
+        if (ret != 0) {
+            printf("AesSetKey failed in myMacEncryptCb\n");
+            return ret;
+        }
+        encCtx->keySetup = 1;
+    }
+
+    /* encrypt */
+    ret = wc_AesCbcEncrypt(&encCtx->aes, encOut, encIn, encSz);
+    if (ret != 0)
+        return ret;
+
+    /* Reconstruct record header. */
+    wolfSSL_SetTlsHmacInner(ssl, myInner, encSz, content, macVerify);
+
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
+               wolfSSL_GetMacSecret(ssl, macVerify), wolfSSL_GetHmacSize(ssl));
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacUpdate(&hmac, myInner, sizeof(myInner));
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacUpdate(&hmac, encOut, encSz);
+    if (ret != 0)
+        return ret;
+    return wc_HmacFinal(&hmac, macOut);
+}
+
+
+static WC_INLINE int myVerifyDecryptCb(WOLFSSL* ssl,
+       unsigned char* decOut, const unsigned char* decIn,
+       unsigned int decSz, int content, int macVerify,
+       unsigned int* padSz, void* ctx)
+{
+    AtomicDecCtx* decCtx = (AtomicDecCtx*)ctx;
+    int ret      = 0;
+    int digestSz = wolfSSL_GetHmacSize(ssl);
+    Hmac hmac;
+    byte myInner[WOLFSSL_TLS_HMAC_INNER_SZ];
+    byte verify[WC_MAX_DIGEST_SIZE];
+    const char* tlsStr = "TLS";
+
+    /* example supports (d)tls aes */
+    if (wolfSSL_GetBulkCipher(ssl) != wolfssl_aes) {
+        printf("myMacEncryptCb not using AES\n");
+        return -1;
+    }
+
+    if (strstr(wolfSSL_get_version(ssl), tlsStr) == NULL) {
+        printf("myMacEncryptCb not using (D)TLS\n");
+        return -1;
+    }
+
+    /* Reconstruct record header. */
+    wolfSSL_SetTlsHmacInner(ssl, myInner, decSz, content, macVerify);
+
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
+               wolfSSL_GetMacSecret(ssl, macVerify), digestSz);
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacUpdate(&hmac, myInner, sizeof(myInner));
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacUpdate(&hmac, decIn, decSz);
+    if (ret != 0)
+        return ret;
+    ret = wc_HmacFinal(&hmac, verify);
+    if (ret != 0)
+        return ret;
+
+    if (XMEMCMP(verify, decOut + decSz, digestSz) != 0) {
+        printf("myDecryptVerify verify failed\n");
+        return -1;
+    }
+
+    /* decrypt */
+    if (decCtx->keySetup == 0) {
+        int   keyLen = wolfSSL_GetKeySize(ssl);
+        const byte* key;
+        const byte* iv;
+
+        /* decrypt is from other side (peer) */
+        if (wolfSSL_GetSide(ssl) == WOLFSSL_SERVER_END) {
+            key = wolfSSL_GetClientWriteKey(ssl);
+            iv  = wolfSSL_GetClientWriteIV(ssl);
+        }
+        else {
+            key = wolfSSL_GetServerWriteKey(ssl);
+            iv  = wolfSSL_GetServerWriteIV(ssl);
+        }
+
+        ret = wc_AesSetKey(&decCtx->aes, key, keyLen, iv, AES_DECRYPTION);
+        if (ret != 0) {
+            printf("AesSetKey failed in myDecryptVerifyCb\n");
+            return ret;
+        }
+        decCtx->keySetup = 1;
+    }
+
+    /* decrypt */
+    ret = wc_AesCbcDecrypt(&decCtx->aes, decOut, decIn, decSz);
+    if (ret != 0)
+        return ret;
+
+    *padSz  = *(decOut + decSz - 1) + 1;
+
+    return 0;
+}
+
+#endif
+
 
 static WC_INLINE void SetupAtomicUser(WOLFSSL_CTX* ctx, WOLFSSL* ssl)
 {
@@ -1886,6 +2404,14 @@
 
     wolfSSL_CTX_SetDecryptVerifyCb(ctx, myDecryptVerifyCb);
     wolfSSL_SetDecryptVerifyCtx(ssl, decCtx);
+
+#if defined(HAVE_ENCRYPT_THEN_MAC)
+    wolfSSL_CTX_SetEncryptMacCb(ctx, myEncryptMacCb);
+    wolfSSL_SetEncryptMacCtx(ssl, encCtx);
+
+    wolfSSL_CTX_SetVerifyDecryptCb(ctx, myVerifyDecryptCb);
+    wolfSSL_SetVerifyDecryptCtx(ssl, decCtx);
+#endif
 }
 
 
@@ -1894,6 +2420,8 @@
     AtomicEncCtx* encCtx = (AtomicEncCtx*)wolfSSL_GetMacEncryptCtx(ssl);
     AtomicDecCtx* decCtx = (AtomicDecCtx*)wolfSSL_GetDecryptVerifyCtx(ssl);
 
+    /* Encrypt-Then-MAC callbacks use same contexts. */
+
     free(decCtx);
     free(encCtx);
 }
@@ -1938,28 +2466,42 @@
     #ifdef HAVE_CURVE25519
         curve25519_key curve;
     #endif
+    #ifdef HAVE_CURVE448
+        curve448_key curve;
+    #endif
     } keyGen;
 #endif
 } PkCbInfo;
 
+#if defined(DEBUG_PK_CB) || defined(TEST_PK_PRIVKEY)
+    #define WOLFSSL_PKMSG(_f_, ...) printf(_f_, ##__VA_ARGS__)
+#else
+    #define WOLFSSL_PKMSG(_f_, ...)
+#endif
+
 #ifdef HAVE_ECC
 
-static WC_INLINE int myEccKeyGen(WOLFSSL* ssl, ecc_key* key, word32 keySz, 
+static WC_INLINE int myEccKeyGen(WOLFSSL* ssl, ecc_key* key, word32 keySz,
     int ecc_curve, void* ctx)
 {
     int       ret;
     WC_RNG    rng;
     PkCbInfo* cbInfo = (PkCbInfo*)ctx;
-    ecc_key*  new_key = key;
+    ecc_key*  new_key;
 #ifdef TEST_PK_PRIVKEY
     byte qx[MAX_ECC_BYTES], qy[MAX_ECC_BYTES];
     word32 qxLen = sizeof(qx), qyLen = sizeof(qy);
+
     new_key = &cbInfo->keyGen.ecc;
+#else
+    new_key = key;
 #endif
 
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK ECC KeyGen: keySz %d, Curve ID %d\n", keySz, ecc_curve);
+
     ret = wc_InitRng(&rng);
     if (ret != 0)
         return ret;
@@ -1983,6 +2525,8 @@
     #endif
     }
 
+    WOLFSSL_PKMSG("PK ECC KeyGen: ret %d\n", ret);
+
     wc_FreeRng(&rng);
 
     return ret;
@@ -2001,6 +2545,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK ECC Sign: inSz %d, keySz %d\n", inSz, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2014,8 +2560,10 @@
     ret = wc_ecc_init(&myKey);
     if (ret == 0) {
         ret = wc_EccPrivateKeyDecode(keyBuf, &idx, &myKey, keySz);
-        if (ret == 0)
+        if (ret == 0) {
+            WOLFSSL_PKMSG("PK ECC Sign: Curve ID %d\n", myKey.dp->id);
             ret = wc_ecc_sign_hash(in, inSz, out, outSz, &rng, &myKey);
+        }
         wc_ecc_free(&myKey);
     }
     wc_FreeRng(&rng);
@@ -2024,6 +2572,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK ECC Sign: ret %d outSz %d\n", ret, *outSz);
+
     return ret;
 }
 
@@ -2040,6 +2590,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK ECC Verify: sigSz %d, hashSz %d, keySz %d\n", sigSz, hashSz, keySz);
+
     ret = wc_ecc_init(&myKey);
     if (ret == 0) {
         ret = wc_EccPublicKeyDecode(key, &idx, &myKey, keySz);
@@ -2048,6 +2600,8 @@
         wc_ecc_free(&myKey);
     }
 
+    WOLFSSL_PKMSG("PK ECC Verify: ret %d, result %d\n", ret, *result);
+
     return ret;
 }
 
@@ -2065,6 +2619,9 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK ECC PMS: Side %s, Peer Curve %d\n",
+        side == WOLFSSL_CLIENT_END ? "client" : "server", otherKey->dp->id);
+
     ret = wc_ecc_init(&tmpKey);
     if (ret != 0) {
         return ret;
@@ -2126,6 +2683,8 @@
 
     wc_ecc_free(&tmpKey);
 
+    WOLFSSL_PKMSG("PK ECC PMS: ret %d, PubKeySz %d, OutLen %d\n", ret, *pubKeySz, *outlen);
+
     return ret;
 }
 
@@ -2142,6 +2701,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK 25519 Sign: inSz %d, keySz %d\n", inSz, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2160,6 +2721,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK 25519 Sign: ret %d, outSz %d\n", ret, *outSz);
+
     return ret;
 }
 
@@ -2175,6 +2738,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK 25519 Verify: sigSz %d, msgSz %d, keySz %d\n", sigSz, msgSz, keySz);
+
     ret = wc_ed25519_init(&myKey);
     if (ret == 0) {
         ret = wc_ed25519_import_public(key, keySz, &myKey);
@@ -2184,12 +2749,14 @@
         wc_ed25519_free(&myKey);
     }
 
+    WOLFSSL_PKMSG("PK 25519 Verify: ret %d, result %d\n", ret, *result);
+
     return ret;
 }
 #endif /* HAVE_ED25519 */
 
 #ifdef HAVE_CURVE25519
-static WC_INLINE int myX25519KeyGen(WOLFSSL* ssl, curve25519_key* key, 
+static WC_INLINE int myX25519KeyGen(WOLFSSL* ssl, curve25519_key* key,
     unsigned int keySz, void* ctx)
 {
     int       ret;
@@ -2199,6 +2766,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK 25519 KeyGen: keySz %d\n", keySz);
+
     ret = wc_InitRng(&rng);
     if (ret != 0)
         return ret;
@@ -2207,6 +2776,8 @@
 
     wc_FreeRng(&rng);
 
+    WOLFSSL_PKMSG("PK 25519 KeyGen: ret %d\n", ret);
+
     return ret;
 }
 
@@ -2224,6 +2795,9 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK 25519 PMS: side %s\n",
+        side == WOLFSSL_CLIENT_END ? "client" : "server");
+
     ret = wc_curve25519_init(&tmpKey);
     if (ret != 0) {
         return ret;
@@ -2267,10 +2841,174 @@
 
     wc_curve25519_free(&tmpKey);
 
+    WOLFSSL_PKMSG("PK 25519 PMS: ret %d, pubKeySz %d, outLen %d\n",
+        ret, *pubKeySz, *outlen);
+
     return ret;
 }
 #endif /* HAVE_CURVE25519 */
 
+#ifdef HAVE_ED448
+static WC_INLINE int myEd448Sign(WOLFSSL* ssl, const byte* in, word32 inSz,
+        byte* out, word32* outSz, const byte* key, word32 keySz, void* ctx)
+{
+    int         ret;
+    word32      idx = 0;
+    ed448_key   myKey;
+    byte*       keyBuf = (byte*)key;
+    PkCbInfo*   cbInfo = (PkCbInfo*)ctx;
+
+    (void)ssl;
+    (void)cbInfo;
+
+    WOLFSSL_PKMSG("PK 448 Sign: inSz %d, keySz %d\n", inSz, keySz);
+
+#ifdef TEST_PK_PRIVKEY
+    ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
+    if (ret != 0)
+        return ret;
+#endif
+
+    ret = wc_ed448_init(&myKey);
+    if (ret == 0) {
+        ret = wc_Ed448PrivateKeyDecode(keyBuf, &idx, &myKey, keySz);
+        if (ret == 0)
+            ret = wc_ed448_sign_msg(in, inSz, out, outSz, &myKey);
+        wc_ed448_free(&myKey);
+    }
+
+#ifdef TEST_PK_PRIVKEY
+    free(keyBuf);
+#endif
+
+    WOLFSSL_PKMSG("PK 448 Sign: ret %d, outSz %d\n", ret, *outSz);
+
+    return ret;
+}
+
+
+static WC_INLINE int myEd448Verify(WOLFSSL* ssl, const byte* sig, word32 sigSz,
+        const byte* msg, word32 msgSz, const byte* key, word32 keySz,
+        int* result, void* ctx)
+{
+    int         ret;
+    ed448_key   myKey;
+    PkCbInfo*   cbInfo = (PkCbInfo*)ctx;
+
+    (void)ssl;
+    (void)cbInfo;
+
+    WOLFSSL_PKMSG("PK 448 Verify: sigSz %d, msgSz %d, keySz %d\n", sigSz, msgSz,
+                  keySz);
+
+    ret = wc_ed448_init(&myKey);
+    if (ret == 0) {
+        ret = wc_ed448_import_public(key, keySz, &myKey);
+        if (ret == 0) {
+            ret = wc_ed448_verify_msg(sig, sigSz, msg, msgSz, result, &myKey);
+        }
+        wc_ed448_free(&myKey);
+    }
+
+    WOLFSSL_PKMSG("PK 448 Verify: ret %d, result %d\n", ret, *result);
+
+    return ret;
+}
+#endif /* HAVE_ED448 */
+
+#ifdef HAVE_CURVE448
+static WC_INLINE int myX448KeyGen(WOLFSSL* ssl, curve448_key* key,
+    unsigned int keySz, void* ctx)
+{
+    int       ret;
+    WC_RNG    rng;
+    PkCbInfo* cbInfo = (PkCbInfo*)ctx;
+
+    (void)ssl;
+    (void)cbInfo;
+
+    WOLFSSL_PKMSG("PK 448 KeyGen: keySz %d\n", keySz);
+
+    ret = wc_InitRng(&rng);
+    if (ret != 0)
+        return ret;
+
+    ret = wc_curve448_make_key(&rng, keySz, key);
+
+    wc_FreeRng(&rng);
+
+    WOLFSSL_PKMSG("PK 448 KeyGen: ret %d\n", ret);
+
+    return ret;
+}
+
+static WC_INLINE int myX448SharedSecret(WOLFSSL* ssl, curve448_key* otherKey,
+        unsigned char* pubKeyDer, unsigned int* pubKeySz,
+        unsigned char* out, unsigned int* outlen,
+        int side, void* ctx)
+{
+    int           ret;
+    curve448_key* privKey = NULL;
+    curve448_key* pubKey = NULL;
+    curve448_key  tmpKey;
+    PkCbInfo*     cbInfo = (PkCbInfo*)ctx;
+
+    (void)ssl;
+    (void)cbInfo;
+
+    WOLFSSL_PKMSG("PK 448 PMS: side %s\n",
+        side == WOLFSSL_CLIENT_END ? "client" : "server");
+
+    ret = wc_curve448_init(&tmpKey);
+    if (ret != 0) {
+        return ret;
+    }
+
+    /* for client: create and export public key */
+    if (side == WOLFSSL_CLIENT_END) {
+        WC_RNG rng;
+
+        privKey = &tmpKey;
+        pubKey = otherKey;
+
+        ret = wc_InitRng(&rng);
+        if (ret == 0) {
+            ret = wc_curve448_make_key(&rng, CURVE448_KEY_SIZE, privKey);
+            if (ret == 0) {
+                ret = wc_curve448_export_public_ex(privKey, pubKeyDer,
+                    pubKeySz, EC448_LITTLE_ENDIAN);
+            }
+            wc_FreeRng(&rng);
+        }
+    }
+
+    /* for server: import public key */
+    else if (side == WOLFSSL_SERVER_END) {
+        privKey = otherKey;
+        pubKey = &tmpKey;
+
+        ret = wc_curve448_import_public_ex(pubKeyDer, *pubKeySz, pubKey,
+            EC448_LITTLE_ENDIAN);
+    }
+    else {
+        ret = BAD_FUNC_ARG;
+    }
+
+    /* generate shared secret and return it */
+    if (ret == 0) {
+        ret = wc_curve448_shared_secret_ex(privKey, pubKey, out, outlen,
+            EC448_LITTLE_ENDIAN);
+    }
+
+    wc_curve448_free(&tmpKey);
+
+    WOLFSSL_PKMSG("PK 448 PMS: ret %d, pubKeySz %d, outLen %d\n",
+        ret, *pubKeySz, *outlen);
+
+    return ret;
+}
+#endif /* HAVE_CURVE448 */
+
 #endif /* HAVE_ECC */
 
 #ifndef NO_DH
@@ -2280,13 +3018,19 @@
         unsigned char* out, unsigned int* outlen,
         void* ctx)
 {
+    int ret;
     PkCbInfo* cbInfo = (PkCbInfo*)ctx;
 
     (void)ssl;
     (void)cbInfo;
 
     /* return 0 on success */
-    return wc_DhAgree(key, out, outlen, priv, privSz, pubKeyDer, pubKeySz);
+    ret = wc_DhAgree(key, out, outlen, priv, privSz, pubKeyDer, pubKeySz);
+
+    WOLFSSL_PKMSG("PK ED Agree: ret %d, privSz %d, pubKeySz %d, outlen %d\n",
+        ret, privSz, pubKeySz, *outlen);
+
+    return ret;
 };
 
 #endif /* !NO_DH */
@@ -2306,6 +3050,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA Sign: inSz %d, keySz %d\n", inSz, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2333,6 +3079,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK RSA Sign: ret %d, outSz %d\n", ret, *outSz);
+
     return ret;
 }
 
@@ -2348,6 +3096,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA Verify: sigSz %d, keySz %d\n", sigSz, keySz);
+
     ret = wc_InitRsaKey(&myKey, NULL);
     if (ret == 0) {
         ret = wc_RsaPublicKeyDecode(key, &idx, &myKey, keySz);
@@ -2356,6 +3106,8 @@
         wc_FreeRsaKey(&myKey);
     }
 
+    WOLFSSL_PKMSG("PK RSA Verify: ret %d\n", ret);
+
     return ret;
 }
 
@@ -2371,6 +3123,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA SignCheck: sigSz %d, keySz %d\n", sigSz, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2388,6 +3142,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK RSA SignCheck: ret %d\n", ret);
+
     return ret;
 }
 
@@ -2407,6 +3163,9 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA PSS Sign: inSz %d, hash %d, mgf %d, keySz %d\n",
+        inSz, hash, mgf, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2454,6 +3213,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK RSA PSS Sign: ret %d, outSz %d\n", ret, *outSz);
+
     return ret;
 }
 
@@ -2470,6 +3231,9 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA PSS Verify: sigSz %d, hash %d, mgf %d, keySz %d\n",
+        sigSz, hash, mgf, keySz);
+
     switch (hash) {
 #ifndef NO_SHA256
         case SHA256h:
@@ -2498,6 +3262,8 @@
         wc_FreeRsaKey(&myKey);
     }
 
+    WOLFSSL_PKMSG("PK RSA PSS Verify: ret %d\n", ret);
+
     return ret;
 }
 
@@ -2514,6 +3280,9 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA PSS SignCheck: sigSz %d, hash %d, mgf %d, keySz %d\n",
+        sigSz, hash, mgf, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2552,6 +3321,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK RSA PSS SignCheck: ret %d\n", ret);
+
     return ret;
 }
 #endif
@@ -2570,6 +3341,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA Enc: inSz %d, keySz %d\n", inSz, keySz);
+
     ret = wc_InitRng(&rng);
     if (ret != 0)
         return ret;
@@ -2588,6 +3361,8 @@
     }
     wc_FreeRng(&rng);
 
+    WOLFSSL_PKMSG("PK RSA Enc: ret %d, outSz %d\n", ret, *outSz);
+
     return ret;
 }
 
@@ -2604,6 +3379,8 @@
     (void)ssl;
     (void)cbInfo;
 
+    WOLFSSL_PKMSG("PK RSA Dec: inSz %d, keySz %d\n", inSz, keySz);
+
 #ifdef TEST_PK_PRIVKEY
     ret = load_key_file(cbInfo->ourKey, &keyBuf, &keySz);
     if (ret != 0)
@@ -2630,6 +3407,8 @@
     free(keyBuf);
 #endif
 
+    WOLFSSL_PKMSG("PK RSA Dec: ret %d\n", ret);
+
     return ret;
 }
 
@@ -2656,6 +3435,14 @@
         wolfSSL_CTX_SetX25519KeyGenCb(ctx, myX25519KeyGen);
         wolfSSL_CTX_SetX25519SharedSecretCb(ctx, myX25519SharedSecret);
     #endif
+    #ifdef HAVE_ED448
+        wolfSSL_CTX_SetEd448SignCb(ctx, myEd448Sign);
+        wolfSSL_CTX_SetEd448VerifyCb(ctx, myEd448Verify);
+    #endif
+    #ifdef HAVE_CURVE448
+        wolfSSL_CTX_SetX448KeyGenCb(ctx, myX448KeyGen);
+        wolfSSL_CTX_SetX448SharedSecretCb(ctx, myX448SharedSecret);
+    #endif
     #ifndef NO_RSA
         wolfSSL_CTX_SetRsaSignCb(ctx, myRsaSign);
         wolfSSL_CTX_SetRsaVerifyCb(ctx, myRsaVerify);
@@ -2689,6 +3476,14 @@
         wolfSSL_SetX25519KeyGenCtx(ssl, myCtx);
         wolfSSL_SetX25519SharedSecretCtx(ssl, myCtx);
     #endif
+    #ifdef HAVE_ED448
+        wolfSSL_SetEd448SignCtx(ssl, myCtx);
+        wolfSSL_SetEd448VerifyCtx(ssl, myCtx);
+    #endif
+    #ifdef HAVE_CURVE448
+        wolfSSL_SetX448KeyGenCtx(ssl, myCtx);
+        wolfSSL_SetX448SharedSecretCtx(ssl, myCtx);
+    #endif
     #ifndef NO_RSA
         wolfSSL_SetRsaSignCtx(ssl, myCtx);
         wolfSSL_SetRsaVerifyCtx(ssl, myCtx);
@@ -2764,6 +3559,7 @@
     tempfn[len] = '\0';
 
     wc_FreeRng(&rng);
+    (void)rng; /* for WC_NO_RNG case */
 
     return tempfn;
 }
@@ -2780,8 +3576,8 @@
         byte key[CHACHA20_POLY1305_AEAD_KEYSIZE]; /* cipher key */
     } key_ctx;
 
-    static key_ctx myKey_ctx;
-    static WC_RNG myKey_rng;
+    static THREAD_LS_T key_ctx myKey_ctx;
+    static THREAD_LS_T WC_RNG myKey_rng;
 
     static WC_INLINE int TicketInit(void)
     {
@@ -2875,10 +3671,12 @@
     /* Generate random port for testing */
     WC_RNG rng;
     if (wc_InitRng(&rng) == 0) {
-        wc_RNG_GenerateBlock(&rng, (byte*)&port, sizeof(port));
-        port |= 0xC000; /* Make sure its in the 49152 - 65535 range */
+        if (wc_RNG_GenerateBlock(&rng, (byte*)&port, sizeof(port)) == 0) {
+            port |= 0xC000; /* Make sure its in the 49152 - 65535 range */
+        }
         wc_FreeRng(&rng);
     }
+    (void)rng; /* for WC_NO_RNG case */
     return port;
 }
 
--- a/wolfssl/version.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/version.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wolfssl_version.h.in
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -28,8 +28,8 @@
 extern "C" {
 #endif
 
-#define LIBWOLFSSL_VERSION_STRING "3.15.3"
-#define LIBWOLFSSL_VERSION_HEX 0x03015003
+#define LIBWOLFSSL_VERSION_STRING "4.4.0"
+#define LIBWOLFSSL_VERSION_HEX 0x04004000
 
 #ifdef __cplusplus
 }
--- a/wolfssl/wolfcrypt/aes.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/aes.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* aes.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -48,6 +48,12 @@
 #endif
 #endif
 
+#ifndef WC_NO_RNG
+    #include <wolfssl/wolfcrypt/random.h>
+#endif
+#ifdef STM32_CRYPTO
+    #include <wolfssl/wolfcrypt/port/st/stm32.h>
+#endif
 
 #ifdef WOLFSSL_AESNI
 
@@ -62,15 +68,34 @@
 #include "xsecure_aes.h"
 #endif
 
+#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES)
+/* included for struct msghdr */
+#include <wolfssl/wolfcrypt/port/af_alg/wc_afalg.h>
+#endif
+
+#if defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)
+#include <wolfssl/wolfcrypt/port/devcrypto/wc_devcrypto.h>
+#endif
+
 #if defined(HAVE_AESGCM) && !defined(WC_NO_RNG)
     #include <wolfssl/wolfcrypt/random.h>
 #endif
 
+#if defined(WOLFSSL_CRYPTOCELL)
+    #include <wolfssl/wolfcrypt/port/arm/cryptoCell.h>
+#endif
+
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    defined(WOLFSSL_RENESAS_TSIP_TLS_AES_CRYPT)
+    #include <wolfssl/wolfcrypt/port/Renesas/renesas-tsip-crypt.h>
+#endif
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+#ifndef WOLFSSL_AES_KEY_SIZE_ENUM
+#define WOLFSSL_AES_KEY_SIZE_ENUM
 /* these are required for FIPS and non-FIPS */
 enum {
     AES_128_KEY_SIZE    = 16,  /* for 128 bit             */
@@ -79,7 +104,7 @@
 
     AES_IV_SIZE         = 16,  /* always block size       */
 };
-
+#endif
 
 /* avoid redefinition of structs */
 #if !defined(HAVE_FIPS) || \
@@ -104,11 +129,24 @@
     CCM_NONCE_MIN_SZ = 7,
     CCM_NONCE_MAX_SZ = 13,
     CTR_SZ   = 4,
-    AES_IV_FIXED_SZ = 4
+    AES_IV_FIXED_SZ = 4,
+#ifdef WOLFSSL_AES_CFB
+    AES_CFB_MODE = 1,
+#endif
+#ifdef WOLFSSL_AES_OFB
+    AES_OFB_MODE = 2,
+#endif
+#ifdef WOLFSSL_AES_XTS
+    AES_XTS_MODE = 3,
+#endif
+
+#ifdef HAVE_PKCS11
+    AES_MAX_ID_LEN   = 32,
+#endif
 };
 
 
-typedef struct Aes {
+struct Aes {
     /* AESNI needs key first, rounds 2nd, not sure why yet */
     ALIGN16 word32 key[60];
     word32  rounds;
@@ -123,20 +161,35 @@
 #endif
 #ifdef HAVE_AESGCM
     ALIGN16 byte H[AES_BLOCK_SIZE];
+#ifdef OPENSSL_EXTRA
+    word32 aadH[4]; /* additional authenticated data GHASH */
+    word32 aadLen;  /* additional authenticated data len */
+#endif
+
 #ifdef GCM_TABLE
     /* key-based fast multiplication table. */
     ALIGN16 byte M0[256][AES_BLOCK_SIZE];
 #endif /* GCM_TABLE */
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+    word32 y0;
+#endif
 #endif /* HAVE_AESGCM */
 #ifdef WOLFSSL_AESNI
     byte use_aesni;
 #endif /* WOLFSSL_AESNI */
+#ifdef WOLF_CRYPTO_CB
+    int    devId;
+    void*  devCtx;
+#endif
+#ifdef HAVE_PKCS11
+    byte id[AES_MAX_ID_LEN];
+    int  idLen;
+#endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    word32 asyncKey[AES_MAX_KEY_SIZE/8/sizeof(word32)]; /* raw key */
-    word32 asyncIv[AES_BLOCK_SIZE/sizeof(word32)]; /* raw IV */
     WC_ASYNC_DEV asyncDev;
 #endif /* WOLFSSL_ASYNC_CRYPT */
-#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \
+    defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_XTS)
     word32  left;            /* unused bytes left from last call */
 #endif
 #ifdef WOLFSSL_XILINX_CRYPT
@@ -145,8 +198,42 @@
     word32      key_init[8];
     word32      kup;
 #endif
+#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES)
+    int alFd; /* server socket to bind to */
+    int rdFd; /* socket to read from */
+    struct msghdr msg;
+    int dir;  /* flag for encrpyt or decrypt */
+#ifdef WOLFSSL_AFALG_XILINX_AES
+    word32 msgBuf[CMSG_SPACE(4) + CMSG_SPACE(sizeof(struct af_alg_iv) +
+                  GCM_NONCE_MID_SZ)];
+#endif
+#endif
+#if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \
+    (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \
+    (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES))
+    word32 devKey[AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE/sizeof(word32)]; /* raw key */
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+    int    keySet;
+#endif
+#endif
+#if defined(WOLFSSL_DEVCRYPTO) && \
+    (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+    WC_CRYPTODEV ctx;
+#endif
+#if defined(WOLFSSL_CRYPTOCELL)
+    aes_context_t ctx;
+#endif
+#if defined(WOLFSSL_RENESAS_TSIP_TLS) && \
+    defined(WOLFSSL_RENESAS_TSIP_TLS_AES_CRYPT)
+    TSIP_AES_CTX ctx;
+#endif
     void*  heap; /* memory hint to use */
-} Aes;
+};
+
+#ifndef WC_AES_TYPE_DEFINED
+    typedef struct Aes Aes;
+    #define WC_AES_TYPE_DEFINED
+#endif
 
 #ifdef WOLFSSL_AES_XTS
 typedef struct XtsAes {
@@ -179,20 +266,40 @@
 WOLFSSL_API int  wc_AesSetKey(Aes* aes, const byte* key, word32 len,
                               const byte* iv, int dir);
 WOLFSSL_API int  wc_AesSetIV(Aes* aes, const byte* iv);
+
+#ifdef HAVE_AES_CBC
 WOLFSSL_API int  wc_AesCbcEncrypt(Aes* aes, byte* out,
                                   const byte* in, word32 sz);
 WOLFSSL_API int  wc_AesCbcDecrypt(Aes* aes, byte* out,
                                   const byte* in, word32 sz);
+#endif
 
 #ifdef WOLFSSL_AES_CFB
 WOLFSSL_API int wc_AesCfbEncrypt(Aes* aes, byte* out,
                                     const byte* in, word32 sz);
+WOLFSSL_API int wc_AesCfb1Encrypt(Aes* aes, byte* out,
+                                    const byte* in, word32 sz);
+WOLFSSL_API int wc_AesCfb8Encrypt(Aes* aes, byte* out,
+                                    const byte* in, word32 sz);
 #ifdef HAVE_AES_DECRYPT
 WOLFSSL_API int wc_AesCfbDecrypt(Aes* aes, byte* out,
                                     const byte* in, word32 sz);
+WOLFSSL_API int wc_AesCfb1Decrypt(Aes* aes, byte* out,
+                                    const byte* in, word32 sz);
+WOLFSSL_API int wc_AesCfb8Decrypt(Aes* aes, byte* out,
+                                    const byte* in, word32 sz);
 #endif /* HAVE_AES_DECRYPT */
 #endif /* WOLFSSL_AES_CFB */
 
+#ifdef WOLFSSL_AES_OFB
+WOLFSSL_API int wc_AesOfbEncrypt(Aes* aes, byte* out,
+                                    const byte* in, word32 sz);
+#ifdef HAVE_AES_DECRYPT
+WOLFSSL_API int wc_AesOfbDecrypt(Aes* aes, byte* out,
+                                    const byte* in, word32 sz);
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_OFB */
+
 #ifdef HAVE_AES_ECB
 WOLFSSL_API int wc_AesEcbEncrypt(Aes* aes, byte* out,
                                   const byte* in, word32 sz);
@@ -217,6 +324,9 @@
 #ifdef WOLFSSL_XILINX_CRYPT
  WOLFSSL_API int  wc_AesGcmSetKey_ex(Aes* aes, const byte* key, word32 len,
          word32 kup);
+#elif defined(WOLFSSL_AFALG_XILINX_AES)
+ WOLFSSL_LOCAL int  wc_AesGcmSetKey_ex(Aes* aes, const byte* key, word32 len,
+         word32 kup);
 #endif
  WOLFSSL_API int  wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len);
  WOLFSSL_API int  wc_AesGcmEncrypt(Aes* aes, byte* out,
@@ -311,8 +421,12 @@
 
 WOLFSSL_API int wc_AesGetKeySize(Aes* aes, word32* keySize);
 
-WOLFSSL_API int  wc_AesInit(Aes*, void*, int);
-WOLFSSL_API void wc_AesFree(Aes*);
+WOLFSSL_API int  wc_AesInit(Aes* aes, void* heap, int devId);
+#ifdef HAVE_PKCS11
+WOLFSSL_API int  wc_AesInit_Id(Aes* aes, unsigned char* id, int len, void* heap,
+        int devId);
+#endif
+WOLFSSL_API void wc_AesFree(Aes* aes);
 
 #ifdef __cplusplus
     } /* extern "C" */
--- a/wolfssl/wolfcrypt/arc4.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/arc4.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* arc4.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/asn.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/asn.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* asn.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -58,23 +58,29 @@
 #include <wolfssl/wolfcrypt/sha256.h>
 #include <wolfssl/wolfcrypt/asn_public.h>   /* public interface */
 
+#if defined(NO_SHA) && defined(NO_SHA256)
+    #define WC_SHA256_DIGEST_SIZE 32
+#endif
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+#ifndef EXTERNAL_SERIAL_SIZE
+    #define EXTERNAL_SERIAL_SIZE 32
+#endif
+
 enum {
     ISSUER  = 0,
     SUBJECT = 1,
 
-    EXTERNAL_SERIAL_SIZE = 32,
-
     BEFORE  = 0,
     AFTER   = 1
 };
 
 /* ASN Tags   */
 enum ASN_Tags {
+    ASN_EOC               = 0x00,
     ASN_BOOLEAN           = 0x01,
     ASN_INTEGER           = 0x02,
     ASN_BIT_STRING        = 0x03,
@@ -85,12 +91,14 @@
     ASN_UTF8STRING        = 0x0c,
     ASN_SEQUENCE          = 0x10,
     ASN_SET               = 0x11,
+    ASN_PRINTABLE_STRING  = 0x13,
     ASN_UTC_TIME          = 0x17,
     ASN_OTHER_TYPE        = 0x00,
     ASN_RFC822_TYPE       = 0x01,
     ASN_DNS_TYPE          = 0x02,
     ASN_DIR_TYPE          = 0x04,
     ASN_URI_TYPE          = 0x06, /* the value 6 is from GeneralName OID */
+    ASN_IP_TYPE           = 0x07, /* the value 7 is from GeneralName OID */
     ASN_GENERALIZED_TIME  = 0x18,
     CRL_EXTENSIONS        = 0xa0,
     ASN_EXTENSIONS        = 0xa3,
@@ -99,13 +107,16 @@
 
     /* ASN_Flags - Bitmask */
     ASN_CONSTRUCTED       = 0x20,
+    ASN_APPLICATION       = 0x40,
     ASN_CONTEXT_SPECIFIC  = 0x80,
 };
 
 #define ASN_UTC_TIME_SIZE 14
 #define ASN_GENERALIZED_TIME_SIZE 16
+#define ASN_GENERALIZED_TIME_MAX 68
 
 enum DN_Tags {
+    ASN_DN_NULL       = 0x00,
     ASN_COMMON_NAME   = 0x03,   /* CN */
     ASN_SUR_NAME      = 0x04,   /* SN */
     ASN_SERIAL_NUMBER = 0x05,   /* serialNumber */
@@ -113,8 +124,9 @@
     ASN_LOCALITY_NAME = 0x07,   /* L  */
     ASN_STATE_NAME    = 0x08,   /* ST */
     ASN_ORG_NAME      = 0x0a,   /* O  */
-    ASN_ORGUNIT_NAME  = 0x0b,    /* OU */
-    ASN_EMAIL_NAME    = 0x98,    /* not oid number there is 97 in 2.5.4.0-97 */
+    ASN_ORGUNIT_NAME  = 0x0b,   /* OU */
+    ASN_BUS_CAT       = 0x0f,   /* businessCategory */
+    ASN_EMAIL_NAME    = 0x98,   /* not oid number there is 97 in 2.5.4.0-97 */
 
     /* pilot attribute types
      * OID values of 0.9.2342.19200300.100.1.* */
@@ -122,24 +134,151 @@
     ASN_DOMAIN_COMPONENT = 0x19  /* DC */
 };
 
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+typedef struct WOLFSSL_ObjectInfo {
+    int nid;
+    int id;
+    word32 type;
+    const char* sName;
+    const char* lName;
+} WOLFSSL_ObjectInfo;
+extern const size_t wolfssl_object_info_sz;
+extern const WOLFSSL_ObjectInfo wolfssl_object_info[];
+#endif /* defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) */
+
 /* DN Tag Strings */
 #define WOLFSSL_COMMON_NAME      "/CN="
+#define WOLFSSL_LN_COMMON_NAME   "/commonName="
 #define WOLFSSL_SUR_NAME         "/SN="
 #define WOLFSSL_SERIAL_NUMBER    "/serialNumber="
 #define WOLFSSL_COUNTRY_NAME     "/C="
+#define WOLFSSL_LN_COUNTRY_NAME  "/countryName="
 #define WOLFSSL_LOCALITY_NAME    "/L="
+#define WOLFSSL_LN_LOCALITY_NAME "/localityName="
 #define WOLFSSL_STATE_NAME       "/ST="
+#define WOLFSSL_LN_STATE_NAME    "/stateOrProvinceName="
 #define WOLFSSL_ORG_NAME         "/O="
+#define WOLFSSL_LN_ORG_NAME      "/organizationName="
 #define WOLFSSL_ORGUNIT_NAME     "/OU="
+#define WOLFSSL_LN_ORGUNIT_NAME  "/organizationalUnitName="
+#define WOLFSSL_DOMAIN_COMPONENT "/DC="
+#define WOLFSSL_LN_DOMAIN_COMPONENT "/domainComponent="
+#define WOLFSSL_BUS_CAT          "/businessCategory="
+#define WOLFSSL_JOI_C            "/jurisdictionC="
+#define WOLFSSL_JOI_ST           "/jurisdictionST="
+#define WOLFSSL_EMAIL_ADDR       "/emailAddress="
+
+#define WOLFSSL_USER_ID          "/UID="
 #define WOLFSSL_DOMAIN_COMPONENT "/DC="
 
-enum ECC_TYPES {
+#if defined(WOLFSSL_APACHE_HTTPD)
+    /* otherName strings */
+    #define WOLFSSL_SN_MS_UPN       "msUPN"
+    #define WOLFSSL_LN_MS_UPN       "Microsoft User Principal Name"
+    #define WOLFSSL_MS_UPN_SUM 265
+    #define WOLFSSL_SN_DNS_SRV      "id-on-dnsSRV"
+    #define WOLFSSL_LN_DNS_SRV      "SRVName"
+    /* TLS features extension strings */
+    #define WOLFSSL_SN_TLS_FEATURE  "tlsfeature"
+    #define WOLFSSL_LN_TLS_FEATURE  "TLS Feature"
+    #define WOLFSSL_TLS_FEATURE_SUM 92
+#endif
+
+/* NIDs */
+enum
+{
+    NID_undef = 0,
+    NID_netscape_cert_type = NID_undef,
+    NID_des = 66,
+    NID_des3 = 67,
+    NID_sha256 = 672,
+    NID_sha384 = 673,
+    NID_sha512 = 674,
+    NID_hw_name_oid = 73,
+    NID_id_pkix_OCSP_basic = 74,
+    NID_any_policy = 75,
+    NID_anyExtendedKeyUsage = 76,
+    NID_givenName = 99,
+    NID_initials = 101,
+    NID_title = 106,
+    NID_description = 107,
+    NID_basic_constraints = 133,
+    NID_key_usage = 129,     /* 2.5.29.15 */
+    NID_ext_key_usage = 151, /* 2.5.29.37 */
+    NID_subject_key_identifier = 128,
+    NID_authority_key_identifier = 149,
+    NID_private_key_usage_period = 130, /* 2.5.29.16 */
+    NID_subject_alt_name = 131,
+    NID_issuer_alt_name = 132,
+    NID_info_access = 69,
+    NID_sinfo_access = 79,      /* id-pe 11 */
+    NID_name_constraints = 144, /* 2.5.29.30 */
+    NID_crl_distribution_points = 145, /* 2.5.29.31 */
+    NID_certificate_policies = 146,
+    NID_policy_mappings = 147,
+    NID_policy_constraints = 150,
+    NID_inhibit_any_policy = 168,      /* 2.5.29.54 */
+    NID_tlsfeature = 1020,             /* id-pe 24 */
+    NID_commonName = 0x03,             /* matches ASN_COMMON_NAME in asn.h */
+
+
+    NID_surname = 0x04,                /* SN */
+    NID_serialNumber = 0x05,           /* serialNumber */
+    NID_countryName = 0x06,            /* C  */
+    NID_localityName = 0x07,           /* L  */
+    NID_stateOrProvinceName = 0x08,    /* ST */
+    NID_organizationName = 0x0a,       /* O  */
+    NID_organizationalUnitName = 0x0b, /* OU */
+    NID_jurisdictionCountryName = 0xc,
+    NID_jurisdictionStateOrProvinceName = 0xd,
+    NID_businessCategory = ASN_BUS_CAT,
+    NID_domainComponent = ASN_DOMAIN_COMPONENT,
+    NID_emailAddress = 0x30,           /* emailAddress */
+    NID_id_on_dnsSRV = 82,             /* 1.3.6.1.5.5.7.8.7 */
+    NID_ms_upn = 265,                  /* 1.3.6.1.4.1.311.20.2.3 */
+
+    NID_X9_62_prime_field = 406        /* 1.2.840.10045.1.1 */
+};
+
+enum ECC_TYPES
+{
     ECC_PREFIX_0 = 160,
     ECC_PREFIX_1 = 161
 };
 
+#ifdef WOLFSSL_CERT_PIV
+    enum PIV_Tags {
+        ASN_PIV_CERT          = 0x0A,
+        ASN_PIV_NONCE         = 0x0B,
+        ASN_PIV_SIGNED_NONCE  = 0x0C,
+
+        ASN_PIV_TAG_CERT      = 0x70,
+        ASN_PIV_TAG_CERT_INFO = 0x71,
+        ASN_PIV_TAG_MSCUID    = 0x72,
+        ASN_PIV_TAG_ERR_DET   = 0xFE,
+
+        /* certificate info masks */
+        ASN_PIV_CERT_INFO_COMPRESSED = 0x03,
+        ASN_PIV_CERT_INFO_ISX509     = 0x04,
+    };
+#endif /* WOLFSSL_CERT_PIV */
+
+
+#define ASN_JOI_PREFIX_SZ       10
+#define ASN_JOI_PREFIX          "\x2b\x06\x01\x04\x01\x82\x37\x3c\x02\x01"
+#define ASN_JOI_C               0x3
+#define ASN_JOI_ST              0x2
+
+#ifndef WC_ASN_NAME_MAX
+    #ifdef OPENSSL_EXTRA
+        #define WC_ASN_NAME_MAX 300
+    #else
+        #define WC_ASN_NAME_MAX 256
+    #endif
+#endif
+#define ASN_NAME_MAX WC_ASN_NAME_MAX
+
 enum Misc_ASN {
-    ASN_NAME_MAX        = 256,
     MAX_SALT_SIZE       =  64,     /* MAX PKCS Salt length */
     MAX_IV_SIZE         =  64,     /* MAX PKCS Iv length */
     ASN_BOOL_SIZE       =   2,     /* including type */
@@ -155,7 +294,15 @@
     MIN_DATE_SIZE       =  13,
     MAX_DATE_SIZE       =  32,
     ASN_GEN_TIME_SZ     =  15,     /* 7 numbers * 2 + Zulu tag */
+#ifndef NO_RSA
     MAX_ENCODED_SIG_SZ  = 512,
+#elif defined(HAVE_ECC)
+    MAX_ENCODED_SIG_SZ  = 140,
+#elif defined(HAVE_CURVE448)
+    MAX_ENCODED_SIG_SZ  = 114,
+#else
+    MAX_ENCODED_SIG_SZ  =  64,
+#endif
     MAX_SIG_SZ          = 256,
     MAX_ALGO_SZ         =  20,
     MAX_SHORT_SZ        =   6,     /* asn int + byte len + 4 byte length */
@@ -165,15 +312,20 @@
     MAX_EXP_SZ          =   5,     /* enum(contextspec|con|exp) + length(4) */
     MAX_PRSTR_SZ        =   5,     /* enum(prstr) + length(4) */
     MAX_VERSION_SZ      =   5,     /* enum + id + version(byte) + (header(2))*/
-    MAX_ENCODED_DIG_SZ  =  73,     /* sha512 + enum(bit or octet) + length(4) */
+    MAX_ENCODED_DIG_ASN_SZ= 9,     /* enum(bit or octet) + length(4) */
+    MAX_ENCODED_DIG_SZ  =  64 + MAX_ENCODED_DIG_ASN_SZ, /* asn header + sha512 */
     MAX_RSA_INT_SZ      = 517,     /* RSA raw sz 4096 for bits + tag + len(4) */
+    MAX_DSA_INT_SZ      = 261,     /* DSA raw sz 2048 for bits + tag + len(4) */
     MAX_NTRU_KEY_SZ     = 610,     /* NTRU 112 bit public key */
     MAX_NTRU_ENC_SZ     = 628,     /* NTRU 112 bit DER public encoding */
     MAX_LENGTH_SZ       =   4,     /* Max length size for DER encoding */
     MAX_RSA_E_SZ        =  16,     /* Max RSA public e size */
     MAX_CA_SZ           =  32,     /* Max encoded CA basic constraint length */
     MAX_SN_SZ           =  35,     /* Max encoded serial number (INT) length */
-    MAX_DER_DIGEST_SZ   = MAX_ENCODED_DIG_SZ + MAX_ALGO_SZ + MAX_SEQ_SZ, /* Maximum DER digest size */
+    MAX_DER_DIGEST_SZ     = MAX_ENCODED_DIG_SZ + MAX_ALGO_SZ + MAX_SEQ_SZ,
+                            /* Maximum DER digest size */
+    MAX_DER_DIGEST_ASN_SZ = MAX_ENCODED_DIG_ASN_SZ + MAX_ALGO_SZ + MAX_SEQ_SZ,
+                            /* Maximum DER digest ASN header size */
 #ifdef WOLFSSL_CERT_GEN
     #ifdef WOLFSSL_CERT_REQ
                           /* Max encoded cert req attributes length */
@@ -187,19 +339,21 @@
     #endif
                                    /* Max total extensions, id + len + others */
 #endif
-#if defined(WOLFSSL_CERT_EXT) || defined(OPENSSL_EXTRA)
+#if defined(WOLFSSL_CERT_EXT) || defined(OPENSSL_EXTRA) || defined(HAVE_PKCS7)
     MAX_OID_SZ          = 32,      /* Max DER length of OID*/
     MAX_OID_STRING_SZ   = 64,      /* Max string length representation of OID*/
 #endif
 #ifdef WOLFSSL_CERT_EXT
-    MAX_KID_SZ			= 45,	   /* Max encoded KID length (SHA-256 case) */
+    MAX_KID_SZ          = 45,      /* Max encoded KID length (SHA-256 case) */
     MAX_KEYUSAGE_SZ     = 18,      /* Max encoded Key Usage length */
     MAX_EXTKEYUSAGE_SZ  = 12 + (6 * (8 + 2)) +
                           CTC_MAX_EKU_OID_SZ, /* Max encoded ExtKeyUsage
-                        (SEQ/LEN + OBJID + OCTSTR/LEN + SEQ + (6 * (SEQ + OID))) */
+                          (SEQ/LEN + OBJID + OCTSTR/LEN + SEQ +
+                          (6 * (SEQ + OID))) */
     MAX_CERTPOL_NB      = CTC_MAX_CERTPOL_NB,/* Max number of Cert Policy */
     MAX_CERTPOL_SZ      = CTC_MAX_CERTPOL_SZ,
 #endif
+    MAX_AIA_SZ          = 2,       /* Max Authority Info Access extension size*/
     MAX_NAME_ENTRIES    = 5,       /* extra entries added to x509 name struct */
     OCSP_NONCE_EXT_SZ   = 35,      /* OCSP Nonce Extension size */
     MAX_OCSP_EXT_SZ     = 58,      /* Max OCSP Extension length */
@@ -213,9 +367,11 @@
     HEADER_ENCRYPTED_KEY_SIZE = 0,
 #endif
     TRAILING_ZERO       = 1,       /* Used for size of zero pad */
+    ASN_TAG_SZ          = 1,       /* single byte ASN.1 tag */
     MIN_VERSION_SZ      = 3,       /* Min bytes needed for GetMyVersion */
-#if defined(OPENSSL_ALL)  || defined(WOLFSSL_MYSQL_COMPATIBLE) || defined(WOLFSSL_NGINX) || \
-    defined(WOLFSSL_HAPROXY) || defined(OPENSSL_EXTRA)
+#if defined(OPENSSL_ALL)  || defined(WOLFSSL_MYSQL_COMPATIBLE) || \
+    defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
+    defined(OPENSSL_EXTRA) || defined(HAVE_PKCS7)
     MAX_TIME_STRING_SZ  = 25,      /* Max length of formatted time string */
 #endif
 
@@ -242,18 +398,26 @@
     oidCmsKeyAgreeType  = 13,
     oidPBEType          = 14,
     oidHmacType         = 15,
+    oidCompressType     = 16,
+    oidCertNameType     = 17,
+    oidTlsExtType       = 18,
+    oidCrlExtType       = 19,
     oidIgnoreType
 };
 
 
 enum Hash_Sum  {
-    MD2h    = 646,
-    MD5h    = 649,
-    SHAh    =  88,
-    SHA224h = 417,
-    SHA256h = 414,
-    SHA384h = 415,
-    SHA512h = 416
+    MD2h      = 646,
+    MD5h      = 649,
+    SHAh      =  88,
+    SHA224h   = 417,
+    SHA256h   = 414,
+    SHA384h   = 415,
+    SHA512h   = 416,
+    SHA3_224h = 420,
+    SHA3_256h = 421,
+    SHA3_384h = 422,
+    SHA3_512h = 423
 };
 
 
@@ -261,12 +425,18 @@
 enum Block_Sum {
 #ifdef WOLFSSL_AES_128
     AES128CBCb = 414,
+    AES128GCMb = 418,
+    AES128CCMb = 419,
 #endif
 #ifdef WOLFSSL_AES_192
     AES192CBCb = 434,
+    AES192GCMb = 438,
+    AES192CCMb = 439,
 #endif
 #ifdef WOLFSSL_AES_256
     AES256CBCb = 454,
+    AES256GCMb = 458,
+    AES256CCMb = 459,
 #endif
 #ifndef NO_DES3
     DESb       = 69,
@@ -281,23 +451,27 @@
     RSAk     = 645,
     NTRUk    = 274,
     ECDSAk   = 518,
-    ED25519k = 256
+    ED25519k = 256,
+    ED448k   = 257,
+    DHk      = 647, /* dhKeyAgreement OID: 1.2.840.113549.1.3.1 */
 };
 
-
-#ifndef NO_AES
+#if !defined(NO_AES) || defined(HAVE_PKCS7)
 enum KeyWrap_Sum {
 #ifdef WOLFSSL_AES_128
-    AES128_WRAP = 417,
+    AES128_WRAP  = 417,
 #endif
 #ifdef WOLFSSL_AES_192
-    AES192_WRAP = 437,
+    AES192_WRAP  = 437,
 #endif
 #ifdef WOLFSSL_AES_256
-    AES256_WRAP = 457
+    AES256_WRAP  = 457,
+#endif
+#ifdef HAVE_PKCS7
+    PWRI_KEK_WRAP = 680  /*id-alg-PWRI-KEK, 1.2.840.113549.1.9.16.3.9 */
 #endif
 };
-#endif /* !NO_AES */
+#endif /* !NO_AES || PKCS7 */
 
 enum Key_Agree {
     dhSinglePass_stdDH_sha1kdf_scheme   = 464,
@@ -308,38 +482,6 @@
 };
 
 
-enum Ecc_Sum {
-    ECC_SECP112R1_OID = 182,
-    ECC_SECP112R2_OID = 183,
-    ECC_SECP128R1_OID = 204,
-    ECC_SECP128R2_OID = 205,
-    ECC_SECP160R1_OID = 184,
-    ECC_SECP160R2_OID = 206,
-    ECC_SECP160K1_OID = 185,
-    ECC_BRAINPOOLP160R1_OID = 98,
-    ECC_SECP192R1_OID = 520,
-    ECC_PRIME192V2_OID = 521,
-    ECC_PRIME192V3_OID = 522,
-    ECC_SECP192K1_OID = 207,
-    ECC_BRAINPOOLP192R1_OID = 100,
-    ECC_SECP224R1_OID = 209,
-    ECC_SECP224K1_OID = 208,
-    ECC_BRAINPOOLP224R1_OID = 102,
-    ECC_PRIME239V1_OID = 523,
-    ECC_PRIME239V2_OID = 524,
-    ECC_PRIME239V3_OID = 525,
-    ECC_SECP256R1_OID = 526,
-    ECC_SECP256K1_OID = 186,
-    ECC_BRAINPOOLP256R1_OID = 104,
-    ECC_X25519_OID = 365,
-    ECC_ED25519_OID = 256,
-    ECC_BRAINPOOLP320R1_OID = 106,
-    ECC_SECP384R1_OID = 210,
-    ECC_BRAINPOOLP384R1_OID = 108,
-    ECC_BRAINPOOLP512R1_OID = 110,
-    ECC_SECP521R1_OID = 211,
-};
-
 
 enum KDF_Sum {
     PBKDF2_OID = 660
@@ -347,10 +489,14 @@
 
 
 enum HMAC_Sum {
-    HMAC_SHA224_OID = 652,
-    HMAC_SHA256_OID = 653,
-    HMAC_SHA384_OID = 654,
-    HMAC_SHA512_OID = 655
+    HMAC_SHA224_OID   = 652,
+    HMAC_SHA256_OID   = 653,
+    HMAC_SHA384_OID   = 654,
+    HMAC_SHA512_OID   = 655,
+    HMAC_SHA3_224_OID = 426,
+    HMAC_SHA3_256_OID = 427,
+    HMAC_SHA3_384_OID = 428,
+    HMAC_SHA3_512_OID = 429
 };
 
 
@@ -371,7 +517,8 @@
     POLICY_MAP_OID            = 147,
     POLICY_CONST_OID          = 150,
     ISSUE_ALT_NAMES_OID       = 132,
-    TLS_FEATURE_OID           = 92   /* id-pe 24 */
+    TLS_FEATURE_OID           = 92,  /* id-pe 24 */
+    NETSCAPE_CT_OID           = 753  /* 2.16.840.1.113730.1.1 */
 };
 
 enum CertificatePolicy_Sum {
@@ -397,12 +544,19 @@
     EKU_OCSP_SIGN_OID   = 79   /* 1.3.6.1.5.5.7.3.9, id-kp-OCSPSigning     */
 };
 
+#ifdef HAVE_LIBZ
+enum CompressAlg_Sum {
+    ZLIBc = 679  /* 1.2.840.113549.1.9.16.3.8, id-alg-zlibCompress */
+};
+#endif
 
 enum VerifyType {
     NO_VERIFY   = 0,
     VERIFY      = 1,
     VERIFY_CRL  = 2,
-    VERIFY_OCSP = 3
+    VERIFY_OCSP = 3,
+    VERIFY_NAME = 4,
+    VERIFY_SKIP_DATE = 5,
 };
 
 #ifdef WOLFSSL_CERT_EXT
@@ -453,6 +607,7 @@
 };
 
 #define DOMAIN_COMPONENT_MAX 10
+#define DN_NAMES_MAX 9
 
 struct DecodedName {
     char*   fullName;
@@ -460,28 +615,54 @@
     int     entryCount;
     int     cnIdx;
     int     cnLen;
+    int     cnNid;
     int     snIdx;
     int     snLen;
+    int     snNid;
     int     cIdx;
     int     cLen;
+    int     cNid;
     int     lIdx;
     int     lLen;
+    int     lNid;
     int     stIdx;
     int     stLen;
+    int     stNid;
     int     oIdx;
     int     oLen;
+    int     oNid;
     int     ouIdx;
     int     ouLen;
+#ifdef WOLFSSL_CERT_EXT
+    int     bcIdx;
+    int     bcLen;
+    int     jcIdx;
+    int     jcLen;
+    int     jsIdx;
+    int     jsLen;
+#endif
+    int     ouNid;
     int     emailIdx;
     int     emailLen;
+    int     emailNid;
     int     uidIdx;
     int     uidLen;
+    int     uidNid;
     int     serialIdx;
     int     serialLen;
+    int     serialNid;
     int     dcIdx[DOMAIN_COMPONENT_MAX];
     int     dcLen[DOMAIN_COMPONENT_MAX];
     int     dcNum;
     int     dcMode;
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+    /* hold the location / order with which each of the DN tags was found
+     *
+     * example of ASN_DOMAIN_COMPONENT at index 0 if first found and so on.
+     */
+    int     loc[DOMAIN_COMPONENT_MAX + DN_NAMES_MAX];
+    int     locSz;
+#endif
 };
 
 enum SignatureState {
@@ -517,7 +698,7 @@
     byte* out;
     byte* plain;
 #endif
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     int verify;
 #endif
     union {
@@ -530,6 +711,9 @@
     #ifdef HAVE_ED25519
         struct ed25519_key* ed25519;
     #endif
+    #ifdef HAVE_ED448
+        struct ed448_key* ed448;
+    #endif
         void* ptr;
     } key;
     int devId;
@@ -552,6 +736,16 @@
     void* pkCtxRsa;
 #endif
 #endif /* HAVE_PK_CALLBACKS */
+#ifndef NO_RSA
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    byte verifyByTSIP;
+    word32 certBegin;
+    word32 pubkey_n_start;
+    word32 pubkey_n_len;
+    word32 pubkey_e_start;
+    word32 pubkey_e_len;
+#endif
+#endif
 };
 
 enum CertSignState {
@@ -571,6 +765,20 @@
     int state; /* enum CertSignState */
 };
 
+#ifndef WOLFSSL_MAX_PATH_LEN
+    /* RFC 5280 Section 6.1.2. "Initialization" - item (k) defines
+     *     (k)  max_path_length:  this integer is initialized to "n", is
+     *     decremented for each non-self-issued certificate in the path,
+     *     and may be reduced to the value in the path length constraint
+     *     field within the basic constraints extension of a CA
+     *     certificate.
+     *
+     * wolfSSL has arbitrarily selected the value 127 for "n" in the above
+     * description. Users can modify the maximum path length by setting
+     * WOLFSSL_MAX_PATH_LEN to a preferred value at build time
+     */
+    #define WOLFSSL_MAX_PATH_LEN 127
+#endif
 
 typedef struct DecodedCert DecodedCert;
 typedef struct DecodedName DecodedName;
@@ -583,7 +791,7 @@
 
 
 struct DecodedCert {
-    byte*   publicKey;
+    const byte* publicKey;
     word32  pubKeySize;
     int     pubKeyStored;
     word32  certBegin;               /* offset to start of cert          */
@@ -601,60 +809,68 @@
     byte    subjectHash[KEYID_SIZE]; /* hash of all Names                */
     byte    issuerHash[KEYID_SIZE];  /* hash of all Names                */
 #ifdef HAVE_OCSP
+    byte    subjectKeyHash[KEYID_SIZE]; /* hash of the public Key         */
     byte    issuerKeyHash[KEYID_SIZE]; /* hash of the public Key         */
 #endif /* HAVE_OCSP */
-    byte*   signature;               /* not owned, points into raw cert  */
+    const byte* signature;           /* not owned, points into raw cert  */
     char*   subjectCN;               /* CommonName                       */
     int     subjectCNLen;            /* CommonName Length                */
     char    subjectCNEnc;            /* CommonName Encoding              */
     char    issuer[ASN_NAME_MAX];    /* full name including common name  */
     char    subject[ASN_NAME_MAX];   /* full name including common name  */
     int     verify;                  /* Default to yes, but could be off */
-    byte*   source;                  /* byte buffer holder cert, NOT owner */
+    const byte* source;              /* byte buffer holder cert, NOT owner */
     word32  srcIdx;                  /* current offset into buffer       */
     word32  maxIdx;                  /* max offset based on init size    */
     void*   heap;                    /* for user memory overrides        */
     byte    serial[EXTERNAL_SERIAL_SIZE];  /* raw serial number          */
     int     serialSz;                /* raw serial bytes stored */
-    byte*   extensions;              /* not owned, points into raw cert  */
+    const byte* extensions;          /* not owned, points into raw cert  */
     int     extensionsSz;            /* length of cert extensions */
     word32  extensionsIdx;           /* if want to go back and parse later */
-    byte*   extAuthInfo;             /* Authority Information Access URI */
+    const byte* extAuthInfo;         /* Authority Information Access URI */
     int     extAuthInfoSz;           /* length of the URI                */
-    byte*   extCrlInfo;              /* CRL Distribution Points          */
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+    const byte* extAuthInfoCaIssuer; /* Authority Info Access caIssuer URI */
+    int     extAuthInfoCaIssuerSz;   /* length of the caIssuer URI         */
+#endif
+    const byte* extCrlInfo;          /* CRL Distribution Points          */
     int     extCrlInfoSz;            /* length of the URI                */
     byte    extSubjKeyId[KEYID_SIZE]; /* Subject Key ID                  */
     byte    extAuthKeyId[KEYID_SIZE]; /* Authority Key ID                */
     byte    pathLength;              /* CA basic constraint path length  */
+    byte    maxPathLen;              /* max_path_len see RFC 5280 section
+                                      * 6.1.2 "Initialization" - (k) for
+                                      * description of max_path_len */
     word16  extKeyUsage;             /* Key usage bitfield               */
     byte    extExtKeyUsage;          /* Extended Key usage bitfield      */
 
 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
-    byte*   extExtKeyUsageSrc;
+    const byte* extExtKeyUsageSrc;
     word32  extExtKeyUsageSz;
     word32  extExtKeyUsageCount;
-    byte*   extAuthKeyIdSrc;
+    const byte* extAuthKeyIdSrc;
     word32  extAuthKeyIdSz;
-    byte*   extSubjKeyIdSrc;
+    const byte* extSubjKeyIdSrc;
     word32  extSubjKeyIdSz;
 #endif
 
-#if defined(HAVE_ECC) || defined(HAVE_ED25519)
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
     word32  pkCurveOID;           /* Public Key's curve OID */
 #endif /* HAVE_ECC */
-    byte*   beforeDate;
+    const byte* beforeDate;
     int     beforeDateLen;
-    byte*   afterDate;
+    const byte* afterDate;
     int     afterDateLen;
-#ifdef HAVE_PKCS7
-    byte*   issuerRaw;               /* pointer to issuer inside source */
+#if defined(HAVE_PKCS7) || defined(WOLFSSL_CERT_EXT)
+    const byte* issuerRaw;           /* pointer to issuer inside source */
     int     issuerRawLen;
 #endif
-#ifndef IGNORE_NAME_CONSTRAINT
-    byte*   subjectRaw;               /* pointer to subject inside source */
+#if !defined(IGNORE_NAME_CONSTRAINTS) || defined(WOLFSSL_CERT_EXT)
+    const byte* subjectRaw;          /* pointer to subject inside source */
     int     subjectRawLen;
 #endif
-#ifdef WOLFSSL_CERT_GEN
+#if defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_CERT_EXT)
     /* easy access to subject info for other sign */
     char*   subjectSN;
     int     subjectSNLen;
@@ -674,6 +890,20 @@
     char*   subjectOU;
     int     subjectOULen;
     char    subjectOUEnc;
+    char*   subjectSND;
+    int     subjectSNDLen;
+    char    subjectSNDEnc;
+#ifdef WOLFSSL_CERT_EXT
+    char*   subjectBC;
+    int     subjectBCLen;
+    char    subjectBCEnc;
+    char*   subjectJC;
+    int     subjectJCLen;
+    char    subjectJCEnc;
+    char*   subjectJS;
+    int     subjectJSLen;
+    char    subjectJSEnc;
+#endif
     char*   subjectEmail;
     int     subjectEmailLen;
 #endif /* WOLFSSL_CERT_GEN */
@@ -692,10 +922,18 @@
 #ifdef WOLFSSL_CERT_EXT
     char    extCertPolicies[MAX_CERTPOL_NB][MAX_CERTPOL_SZ];
     int     extCertPoliciesNb;
-#endif /* WOLFSSL_CERT_EXT */
+#endif /* defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_CERT_EXT) */
 
     Signer* ca;
+#ifndef NO_CERTS
     SignatureCtx sigCtx;
+#endif
+#ifdef WOLFSSL_RENESAS_TSIP
+    byte*  tsip_encRsaKeyIdx;
+#endif
+
+    int badDate;
+    int criticalExt;
 
     /* Option Bits */
     byte subjectCNStored : 1;      /* have we saved a copy we own */
@@ -714,7 +952,8 @@
     byte extBasicConstSet : 1;
     byte extSubjAltNameSet : 1;
     byte inhibitAnyOidSet : 1;
-#ifdef WOLFSSL_SEP
+    byte selfSigned : 1;           /* Indicates subject and issuer are same */
+#if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
     byte extCertPolicySet : 1;
 #endif
 #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
@@ -730,7 +969,7 @@
     byte extKeyUsageCrit : 1;
     byte extExtKeyUsageCrit : 1;
 #endif /* OPENSSL_EXTRA */
-#ifdef WOLFSSL_SEP
+#if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
     byte extCertPolicyCrit : 1;
 #endif
 
@@ -749,9 +988,11 @@
     word32  pubKeySize;
     word32  keyOID;                  /* key type */
     word16  keyUsage;
+    byte    maxPathLen;
     byte    pathLength;
-    byte    pathLengthSet;
-    byte*   publicKey;
+    byte    pathLengthSet : 1;
+    byte    selfSigned : 1;
+    const byte* publicKey;
     int     nameLen;
     char*   name;                    /* common name */
 #ifndef IGNORE_NAME_CONSTRAINTS
@@ -764,9 +1005,15 @@
         byte    subjectKeyIdHash[SIGNER_DIGEST_SIZE];
                                      /* sha hash of names in certificate */
     #endif
+    #ifdef HAVE_OCSP
+        byte subjectKeyHash[KEYID_SIZE];
+    #endif
 #ifdef WOLFSSL_SIGNER_DER_CERT
     DerBuffer* derCert;
 #endif
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+    word32 cm_idx;
+#endif
     Signer* next;
 };
 
@@ -801,6 +1048,8 @@
     #define WOLFSSL_ASN_API WOLFSSL_LOCAL
 #endif
 
+WOLFSSL_LOCAL int CalcHashId(const byte* data, word32 len, byte* hash);
+
 WOLFSSL_ASN_API int wc_BerToDer(const byte* ber, word32 berSz, byte* der,
                                 word32* derSz);
 
@@ -808,13 +1057,20 @@
 #ifndef IGNORE_NAME_CONSTRAINTS
     WOLFSSL_ASN_API void FreeNameSubtrees(Base_entry*, void*);
 #endif /* IGNORE_NAME_CONSTRAINTS */
-WOLFSSL_ASN_API void InitDecodedCert(DecodedCert*, byte*, word32, void*);
+WOLFSSL_ASN_API void InitDecodedCert(DecodedCert*, const byte*, word32, void*);
 WOLFSSL_ASN_API void FreeDecodedCert(DecodedCert*);
 WOLFSSL_ASN_API int  ParseCert(DecodedCert*, int type, int verify, void* cm);
 
-WOLFSSL_LOCAL int DecodePolicyOID(char *o, word32 oSz, byte *in, word32 inSz);
+WOLFSSL_LOCAL int DecodePolicyOID(char *o, word32 oSz,
+                                  const byte *in, word32 inSz);
+WOLFSSL_LOCAL int EncodePolicyOID(byte *out, word32 *outSz,
+                                  const char *in, void* heap);
+WOLFSSL_API int CheckCertSignature(const byte*,word32,void*,void* cm);
+WOLFSSL_LOCAL int CheckCertSignaturePubKey(const byte* cert, word32 certSz,
+        void* heap, const byte* pubKey, word32 pubKeySz, int pubKeyOID);
 WOLFSSL_LOCAL int ParseCertRelative(DecodedCert*,int type,int verify,void* cm);
 WOLFSSL_LOCAL int DecodeToKey(DecodedCert*, int verify);
+WOLFSSL_LOCAL int wc_GetPubX509(DecodedCert* cert, int verify, int* badDate);
 
 WOLFSSL_LOCAL const byte* OidFromId(word32 id, word32 type, word32* oidSz);
 WOLFSSL_LOCAL Signer* MakeSigner(void*);
@@ -826,12 +1082,21 @@
 #endif /* WOLFSSL_TRUST_PEER_CERT */
 
 WOLFSSL_ASN_API int ToTraditional(byte* buffer, word32 length);
+WOLFSSL_ASN_API int ToTraditional_ex(byte* buffer, word32 length,
+                                     word32* algId);
 WOLFSSL_LOCAL int ToTraditionalInline(const byte* input, word32* inOutIdx,
                                       word32 length);
-WOLFSSL_LOCAL int ToTraditionalEnc(byte* buffer, word32 length,const char*,int);
+WOLFSSL_LOCAL int ToTraditionalInline_ex(const byte* input, word32* inOutIdx,
+                                         word32 length, word32* algId);
+WOLFSSL_LOCAL int ToTraditionalEnc(byte* buffer, word32 length,const char*,int,
+                                   word32* algId);
 WOLFSSL_ASN_API int UnTraditionalEnc(byte* key, word32 keySz, byte* out,
         word32* outSz, const char* password, int passwordSz, int vPKCS,
         int vAlgo, byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap);
+WOLFSSL_ASN_API int TraditionalEnc(byte* key, word32 keySz, byte* out,
+        word32* outSz, const char* password, int passwordSz, int vPKCS,
+        int vAlgo, int encAlgId, byte* salt, word32 saltSz, int itt,
+        WC_RNG* rng, void* heap);
 WOLFSSL_LOCAL int DecryptContent(byte* input, word32 sz,const char* psw,int pswSz);
 WOLFSSL_LOCAL int EncryptContent(byte* input, word32 sz, byte* out, word32* outSz,
         const char* password,int passwordSz, int vPKCS, int vAlgo,
@@ -844,9 +1109,14 @@
     defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
 WOLFSSL_LOCAL int GetTimeString(byte* date, int format, char* buf, int len);
 #endif
+#if !defined(NO_ASN_TIME) && defined(HAVE_PKCS7)
+WOLFSSL_LOCAL int GetAsnTimeString(void* currTime, byte* buf, word32 len);
+#endif
 WOLFSSL_LOCAL int ExtractDate(const unsigned char* date, unsigned char format,
                                                  wolfssl_tm* certTime, int* idx);
+WOLFSSL_LOCAL int DateGreaterThan(const struct tm* a, const struct tm* b);
 WOLFSSL_LOCAL int ValidateDate(const byte* date, byte format, int dateType);
+WOLFSSL_LOCAL int wc_OBJ_sn2nid(const char *sn);
 
 /* ASN.1 helper functions */
 #ifdef WOLFSSL_CERT_GEN
@@ -854,13 +1124,24 @@
 #endif
 WOLFSSL_LOCAL int GetShortInt(const byte* input, word32* inOutIdx, int* number,
                               word32 maxIdx);
-WOLFSSL_LOCAL char* GetSigName(int oid);
+WOLFSSL_LOCAL int SetShortInt(byte* input, word32* inOutIdx, word32 number,
+                              word32 maxIdx);
+
+WOLFSSL_LOCAL const char* GetSigName(int oid);
 WOLFSSL_LOCAL int GetLength(const byte* input, word32* inOutIdx, int* len,
                            word32 maxIdx);
+WOLFSSL_LOCAL int GetLength_ex(const byte* input, word32* inOutIdx, int* len,
+                           word32 maxIdx, int check);
 WOLFSSL_LOCAL int GetSequence(const byte* input, word32* inOutIdx, int* len,
                              word32 maxIdx);
+WOLFSSL_LOCAL int GetSequence_ex(const byte* input, word32* inOutIdx, int* len,
+                           word32 maxIdx, int check);
+WOLFSSL_LOCAL int GetOctetString(const byte* input, word32* inOutIdx, int* len,
+                         word32 maxIdx);
 WOLFSSL_LOCAL int GetSet(const byte* input, word32* inOutIdx, int* len,
                         word32 maxIdx);
+WOLFSSL_LOCAL int GetSet_ex(const byte* input, word32* inOutIdx, int* len,
+                        word32 maxIdx, int check);
 WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx,
                               int* version, word32 maxIdx);
 WOLFSSL_LOCAL int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx,
@@ -873,26 +1154,39 @@
     WOLFSSL_LOCAL int DecodeObjectId(const byte* in, word32 inSz,
         word16* out, word32* outSz);
 #endif
+WOLFSSL_LOCAL int GetASNObjectId(const byte* input, word32* inOutIdx, int* len,
+                                 word32 maxIdx);
+WOLFSSL_LOCAL int SetObjectId(int len, byte* output);
 WOLFSSL_LOCAL int GetObjectId(const byte* input, word32* inOutIdx, word32* oid,
                               word32 oidType, word32 maxIdx);
 WOLFSSL_LOCAL int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
                            word32 oidType, word32 maxIdx);
+WOLFSSL_LOCAL int GetASNTag(const byte* input, word32* idx, byte* tag,
+                            word32 inputSz);
 WOLFSSL_LOCAL word32 SetLength(word32 length, byte* output);
 WOLFSSL_LOCAL word32 SetSequence(word32 len, byte* output);
 WOLFSSL_LOCAL word32 SetOctetString(word32 len, byte* output);
+#if (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) && !defined(NO_DH) \
+    || defined(WOLFSSL_OPENSSH)
+WOLFSSL_LOCAL int wc_DhParamsToDer(DhKey* key, byte* out, word32* outSz);
+WOLFSSL_LOCAL int wc_DhPubKeyToDer(DhKey* key, byte* out, word32* outSz);
+WOLFSSL_LOCAL int wc_DhPrivKeyToDer(DhKey* key, byte* out, word32* outSz);
+#endif
+WOLFSSL_LOCAL word32 SetBitString(word32 len, byte unusedBits, byte* output);
 WOLFSSL_LOCAL word32 SetImplicit(byte tag,byte number,word32 len,byte* output);
 WOLFSSL_LOCAL word32 SetExplicit(byte number, word32 len, byte* output);
 WOLFSSL_LOCAL word32 SetSet(word32 len, byte* output);
 WOLFSSL_LOCAL word32 SetAlgoID(int algoOID,byte* output,int type,int curveSz);
 WOLFSSL_LOCAL int SetMyVersion(word32 version, byte* output, int header);
 WOLFSSL_LOCAL int SetSerialNumber(const byte* sn, word32 snSz, byte* output,
-    int maxSnSz);
+    word32 outputSz, int maxSnSz);
 WOLFSSL_LOCAL int GetSerialNumber(const byte* input, word32* inOutIdx,
     byte* serial, int* serialSz, word32 maxIdx);
 WOLFSSL_LOCAL int GetNameHash(const byte* source, word32* idx, byte* hash,
                              int maxIdx);
 WOLFSSL_LOCAL int wc_CheckPrivateKey(byte* key, word32 keySz, DecodedCert* der);
-WOLFSSL_LOCAL int RsaPublicKeyDerSize(RsaKey* key, int with_header);
+WOLFSSL_LOCAL int StoreDHparams(byte* out, word32* outLen, mp_int* p, mp_int* g);
+WOLFSSL_LOCAL int FlattenAltNames( byte*, word32, const DNS_entry*);
 
 #ifdef HAVE_ECC
     /* ASN sig helpers */
@@ -901,12 +1195,18 @@
     WOLFSSL_LOCAL int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen,
                                        mp_int* r, mp_int* s);
 #endif
+#if defined HAVE_ECC && (defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL))
+WOLFSSL_API int EccEnumToNID(int n);
+#endif
 
 WOLFSSL_LOCAL void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId);
 WOLFSSL_LOCAL void FreeSignatureCtx(SignatureCtx* sigCtx);
 
 #ifndef NO_CERTS
 
+WOLFSSL_LOCAL int wc_EncryptedInfoParse(EncryptedInfo* info, char** pBuffer,
+                                        size_t bufSz);
+
 WOLFSSL_LOCAL int PemToDer(const unsigned char* buff, long sz, int type,
                           DerBuffer** pDer, void* heap, EncryptedInfo* info,
                           int* eccKey);
@@ -918,14 +1218,19 @@
 #ifdef WOLFSSL_CERT_GEN
 
 enum cert_enums {
-    NAME_ENTRIES    =  8,
+#ifdef WOLFSSL_CERT_EXT
+    NAME_ENTRIES    =  10,
+#else
+    NAME_ENTRIES    =  9,
+#endif
     JOINT_LEN       =  2,
     EMAIL_JOINT_LEN =  9,
     PILOT_JOINT_LEN =  10,
     RSA_KEY         = 10,
     NTRU_KEY        = 11,
     ECC_KEY         = 12,
-    ED25519_KEY     = 13
+    ED25519_KEY     = 13,
+    ED448_KEY       = 14
 };
 
 #endif /* WOLFSSL_CERT_GEN */
@@ -984,6 +1289,8 @@
     byte thisDateFormat;
     byte nextDateFormat;
 #if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+    WOLFSSL_ASN1_TIME thisDateParsed;
+    WOLFSSL_ASN1_TIME nextDateParsed;
     byte* thisDateAsn;
     byte* nextDateAsn;
 #endif
@@ -1031,6 +1338,9 @@
     byte   issuerKeyHash[KEYID_SIZE];
     byte*  serial;   /* copy of the serial number in source cert */
     int    serialSz;
+#ifdef OPENSSL_EXTRA
+    WOLFSSL_ASN1_INTEGER* serialInt;
+#endif
     byte*  url;      /* copy of the extAuthInfo in source cert */
     int    urlSz;
 
@@ -1040,6 +1350,22 @@
     void*  ssl;
 };
 
+typedef struct OcspEntry OcspEntry;
+
+#ifdef NO_SHA
+#define OCSP_DIGEST_SIZE WC_SHA256_DIGEST_SIZE
+#else
+#define OCSP_DIGEST_SIZE WC_SHA_DIGEST_SIZE
+#endif
+
+struct OcspEntry
+{
+    OcspEntry *next;                      /* next entry             */
+    byte issuerHash[OCSP_DIGEST_SIZE];    /* issuer hash            */
+    byte issuerKeyHash[OCSP_DIGEST_SIZE]; /* issuer public key hash */
+    CertStatus *status;                   /* OCSP response list     */
+    int totalStatus;                      /* number on list         */
+};
 
 WOLFSSL_LOCAL void InitOcspResponse(OcspResponse*, CertStatus*, byte*, word32);
 WOLFSSL_LOCAL int  OcspResponseDecode(OcspResponse*, void*, void* heap, int);
@@ -1075,7 +1401,7 @@
     word32  sigLength;               /* length of signature              */
     word32  signatureOID;            /* sum of algorithm object id       */
     byte*   signature;               /* pointer into raw source, not owned */
-    byte    issuerHash[SIGNER_DIGEST_SIZE]; /* issuer hash               */
+    byte    issuerHash[SIGNER_DIGEST_SIZE]; /* issuer name hash          */
     byte    crlHash[SIGNER_DIGEST_SIZE]; /* raw crl data hash            */
     byte    lastDate[MAX_DATE_SIZE]; /* last date updated  */
     byte    nextDate[MAX_DATE_SIZE]; /* next update date   */
@@ -1084,6 +1410,10 @@
     RevokedCert* certs;              /* revoked cert list  */
     int          totalCerts;         /* number on list     */
     void*   heap;
+#ifndef NO_SKID
+    byte    extAuthKeyIdSet;
+    byte    extAuthKeyId[SIGNER_DIGEST_SIZE]; /* Authority Key ID        */
+#endif
 };
 
 WOLFSSL_LOCAL void InitDecodedCRL(DecodedCRL*, void* heap);
@@ -1121,6 +1451,7 @@
     PBE_SHA1_DES     = 2,
     PBE_SHA1_DES3    = 3,
     PBE_AES256_CBC   = 4,
+    PBE_AES128_CBC   = 5,
 
     PBE_SHA1_RC4_128_SUM = 657,
     PBE_SHA1_DES3_SUM    = 659,
--- a/wolfssl/wolfcrypt/asn_public.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/asn_public.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* asn_public.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -41,6 +41,10 @@
     typedef struct ed25519_key ed25519_key;
     #define WC_ED25519KEY_TYPE_DEFINED
 #endif
+#ifndef WC_ED448KEY_TYPE_DEFINED
+    typedef struct ed448_key ed448_key;
+    #define WC_ED448KEY_TYPE_DEFINED
+#endif
 #ifndef WC_RSAKEY_TYPE_DEFINED
     typedef struct RsaKey RsaKey;
     #define WC_RSAKEY_TYPE_DEFINED
@@ -50,6 +54,40 @@
     #define WC_RNG_TYPE_DEFINED
 #endif
 
+enum Ecc_Sum {
+    ECC_SECP112R1_OID = 182,
+    ECC_SECP112R2_OID = 183,
+    ECC_SECP128R1_OID = 204,
+    ECC_SECP128R2_OID = 205,
+    ECC_SECP160R1_OID = 184,
+    ECC_SECP160R2_OID = 206,
+    ECC_SECP160K1_OID = 185,
+    ECC_BRAINPOOLP160R1_OID = 98,
+    ECC_SECP192R1_OID = 520,
+    ECC_PRIME192V2_OID = 521,
+    ECC_PRIME192V3_OID = 522,
+    ECC_SECP192K1_OID = 207,
+    ECC_BRAINPOOLP192R1_OID = 100,
+    ECC_SECP224R1_OID = 209,
+    ECC_SECP224K1_OID = 208,
+    ECC_BRAINPOOLP224R1_OID = 102,
+    ECC_PRIME239V1_OID = 523,
+    ECC_PRIME239V2_OID = 524,
+    ECC_PRIME239V3_OID = 525,
+    ECC_SECP256R1_OID = 526,
+    ECC_SECP256K1_OID = 186,
+    ECC_BRAINPOOLP256R1_OID = 104,
+    ECC_X25519_OID = 365,
+    ECC_ED25519_OID = 256,
+    ECC_BRAINPOOLP320R1_OID = 106,
+    ECC_X448_OID = 362,
+    ECC_ED448_OID = 257,
+    ECC_SECP384R1_OID = 210,
+    ECC_BRAINPOOLP384R1_OID = 108,
+    ECC_BRAINPOOLP512R1_OID = 110,
+    ECC_SECP521R1_OID = 211,
+};
+
 
 /* Certificate file Type */
 enum CertType {
@@ -71,7 +109,12 @@
     TRUSTED_PEER_TYPE,
     EDDSA_PRIVATEKEY_TYPE,
     ED25519_TYPE,
-    PKCS12_TYPE
+    ED448_TYPE,
+    PKCS12_TYPE,
+    PKCS8_PRIVATEKEY_TYPE,
+    PKCS8_ENC_PRIVATEKEY_TYPE,
+    DETECT_CERT_TYPE,
+    DH_PRIVATEKEY_TYPE,
 };
 
 
@@ -90,7 +133,8 @@
     CTC_SHA384wECDSA = 525,
     CTC_SHA512wRSA   = 657,
     CTC_SHA512wECDSA = 526,
-    CTC_ED25519      = 256
+    CTC_ED25519      = 256,
+    CTC_ED448        = 257
 };
 
 enum Ctc_Encoding {
@@ -98,16 +142,20 @@
     CTC_PRINTABLE  = 0x13  /* printable */
 };
 
+#ifndef WC_CTC_NAME_SIZE
+    #define WC_CTC_NAME_SIZE 64
+#endif
 #ifndef WC_CTC_MAX_ALT_SIZE
     #define WC_CTC_MAX_ALT_SIZE 16384
 #endif
 
 enum Ctc_Misc {
     CTC_COUNTRY_SIZE  =     2,
-    CTC_NAME_SIZE     =    64,
+    CTC_NAME_SIZE     = WC_CTC_NAME_SIZE,
     CTC_DATE_SIZE     =    32,
     CTC_MAX_ALT_SIZE  = WC_CTC_MAX_ALT_SIZE, /* may be huge, default: 16384 */
-    CTC_SERIAL_SIZE   =    16,
+    CTC_SERIAL_SIZE   =    20,
+    CTC_GEN_SERIAL_SZ =    16,
 #ifdef WOLFSSL_CERT_EXT
     /* AKID could contains: hash + (Option) AuthCertIssuer,AuthCertSerialNum
      * We support only hash */
@@ -127,6 +175,12 @@
     int    dynType; /* DYNAMIC_TYPE_* */
 } DerBuffer;
 
+typedef struct WOLFSSL_ASN1_TIME {
+    unsigned char data[CTC_DATE_SIZE]; /* date bytes */
+    int length;
+    int type;
+} WOLFSSL_ASN1_TIME;
+
 enum {
     IV_SZ   = 32,                   /* max iv sz */
     NAME_SZ = 80,                   /* max one line */
@@ -151,12 +205,28 @@
     char     name[NAME_SZ];    /* cipher name, such as "DES-CBC" */
     byte     iv[IV_SZ];        /* salt or encrypted IV */
 
-    int      set:1;            /* if encryption set */
+    word16   set:1;            /* if encryption set */
 } EncryptedInfo;
 
 
-#ifdef WOLFSSL_CERT_GEN
+#define WOLFSSL_ASN1_INTEGER_MAX 20
+typedef struct WOLFSSL_ASN1_INTEGER {
+    /* size can be increased set at 20 for tag, length then to hold at least 16
+     * byte type */
+    unsigned char  intData[WOLFSSL_ASN1_INTEGER_MAX];
+    /* ASN_INTEGER | LENGTH | hex of number */
+    unsigned char  negative;   /* negative number flag */
 
+    unsigned char* data;
+    unsigned int   dataMax;   /* max size of data buffer */
+    unsigned int   isDynamic:1; /* flag for if data pointer dynamic (1 is yes 0 is no) */
+
+    int length;
+    int type;
+} WOLFSSL_ASN1_INTEGER;
+
+
+#if defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_CERT_EXT)
 #ifdef WOLFSSL_EKU_OID
     #ifndef CTC_MAX_EKU_NB
         #define CTC_MAX_EKU_NB 1
@@ -168,7 +238,9 @@
     #undef CTC_MAX_EKU_OID_SZ
     #define CTC_MAX_EKU_OID_SZ 0
 #endif
+#endif /* WOLFSSL_CERT_GEN || WOLFSSL_CERT_EXT */
 
+#ifdef WOLFSSL_CERT_GEN
 
 #ifdef WOLFSSL_MULTI_ATTRIB
 #ifndef CTC_MAX_ATTRIB
@@ -200,6 +272,16 @@
     char unitEnc;
     char commonName[CTC_NAME_SIZE];
     char commonNameEnc;
+    char serialDev[CTC_NAME_SIZE];
+    char serialDevEnc;
+#ifdef WOLFSSL_CERT_EXT
+    char busCat[CTC_NAME_SIZE];
+    char busCatEnc;
+    char joiC[CTC_NAME_SIZE];
+    char joiCEnc;
+    char joiSt[CTC_NAME_SIZE];
+    char joiStEnc;
+#endif
     char email[CTC_NAME_SIZE];  /* !!!! email has to be last !!!! */
 #ifdef WOLFSSL_MULTI_ATTRIB
     NameAttrib name[CTC_MAX_ATTRIB];
@@ -243,15 +325,19 @@
 #endif
     char    certPolicies[CTC_MAX_CERTPOL_NB][CTC_MAX_CERTPOL_SZ];
     word16  certPoliciesNb;              /* Number of Cert Policy */
+    byte     issRaw[sizeof(CertName)];   /* raw issuer info */
+    byte     sbjRaw[sizeof(CertName)];   /* raw subject info */
 #endif
 #ifdef WOLFSSL_CERT_REQ
     char     challengePw[CTC_NAME_SIZE];
+    int      challengePwPrintableString; /* encode as PrintableString */
 #endif
-    void*   heap; /* heap hint */
+    void*   decodedCert;    /* internal DecodedCert allocated from heap */
+    byte*   der;            /* Pointer to buffer of current DecodedCert cache */
+    void*   heap;           /* heap hint */
 } Cert;
 
 
-
 /* Initialize and Set Certificate defaults:
    version    = 3 (0x2)
    serial     = 0 (Will be randomly generated)
@@ -286,13 +372,18 @@
 #ifdef WOLFSSL_ALT_NAMES
     WOLFSSL_API int wc_SetAltNames(Cert*, const char*);
 #endif
+
+#ifdef WOLFSSL_CERT_GEN_CACHE
+WOLFSSL_API void wc_SetCert_Free(Cert* cert);
+#endif
+
 WOLFSSL_API int wc_SetIssuerBuffer(Cert*, const byte*, int);
 WOLFSSL_API int wc_SetSubjectBuffer(Cert*, const byte*, int);
 WOLFSSL_API int wc_SetAltNamesBuffer(Cert*, const byte*, int);
 WOLFSSL_API int wc_SetDatesBuffer(Cert*, const byte*, int);
 
 #ifndef NO_ASN_TIME
-WOLFSSL_API int wc_GetCertDates(Cert* cert, struct tm* before, 
+WOLFSSL_API int wc_GetCertDates(Cert* cert, struct tm* before,
     struct tm* after);
 #endif
 
@@ -308,6 +399,9 @@
 WOLFSSL_API int wc_SetSubjectKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey,
                                                 ecc_key *eckey);
 WOLFSSL_API int wc_SetSubjectKeyId(Cert *cert, const char* file);
+WOLFSSL_API int wc_GetSubjectRaw(byte **subjectRaw, Cert *cert);
+WOLFSSL_API int wc_SetSubjectRaw(Cert* cert, const byte* der, int derSz);
+WOLFSSL_API int wc_SetIssuerRaw(Cert* cert, const byte* der, int derSz);
 
 #ifdef HAVE_NTRU
 WOLFSSL_API int wc_SetSubjectKeyIdFromNtruPublicKey(Cert *cert, byte *ntruKey,
@@ -347,7 +441,7 @@
 
 #endif /* WOLFSSL_CERT_GEN */
 
-WOLFSSL_API int wc_GetDateInfo(const byte* certDate, int certDateSz, 
+WOLFSSL_API int wc_GetDateInfo(const byte* certDate, int certDateSz,
     const byte** date, byte* format, int* length);
 #ifndef NO_ASN_TIME
 WOLFSSL_API int wc_GetDateAsCalendarTime(const byte* date, int length,
@@ -356,11 +450,14 @@
 
 #if defined(WOLFSSL_PEM_TO_DER) || defined(WOLFSSL_DER_TO_PEM)
 
-    WOLFSSL_API int wc_PemGetHeaderFooter(int type, const char** header, 
+    WOLFSSL_API int wc_PemGetHeaderFooter(int type, const char** header,
         const char** footer);
 
 #endif
 
+WOLFSSL_API  int wc_AllocDer(DerBuffer** pDer, word32 length, int type, void* heap);
+WOLFSSL_API void wc_FreeDer(DerBuffer** pDer);
+
 #ifdef WOLFSSL_PEM_TO_DER
     WOLFSSL_API int wc_PemToDer(const unsigned char* buff, long longSz, int type,
               DerBuffer** pDer, void* heap, EncryptedInfo* info, int* eccKey);
@@ -395,6 +492,14 @@
                                 word32 outputSz, byte *cipherIno, int type);
 #endif
 
+#ifndef NO_RSA
+    #if !defined(HAVE_USER_RSA)
+    WOLFSSL_API int wc_RsaPublicKeyDecode_ex(const byte* input, word32* inOutIdx,
+        word32 inSz, const byte** n, word32* nSz, const byte** e, word32* eSz);
+    #endif
+    WOLFSSL_API int wc_RsaPublicKeyDerSize(RsaKey* key, int with_header);
+#endif
+
 #ifdef HAVE_ECC
     /* private key helpers */
     WOLFSSL_API int wc_EccPrivateKeyDecode(const byte*, word32*,
@@ -410,6 +515,7 @@
                                               ecc_key*, word32);
     WOLFSSL_API int wc_EccPublicKeyToDer(ecc_key*, byte* output,
                                          word32 inLen, int with_AlgCurve);
+    WOLFSSL_API int wc_EccPublicKeyDerSize(ecc_key*, int with_AlgCurve);
 #endif
 
 #ifdef HAVE_ED25519
@@ -430,6 +536,24 @@
     #endif
 #endif
 
+#ifdef HAVE_ED448
+    /* private key helpers */
+    WOLFSSL_API int wc_Ed448PrivateKeyDecode(const byte*, word32*,
+                                             ed448_key*, word32);
+    WOLFSSL_API int wc_Ed448KeyToDer(ed448_key* key, byte* output,
+                                     word32 inLen);
+    WOLFSSL_API int wc_Ed448PrivateKeyToDer(ed448_key* key, byte* output,
+                                            word32 inLen);
+
+    /* public key helper */
+    WOLFSSL_API int wc_Ed448PublicKeyDecode(const byte*, word32*,
+                                            ed448_key*, word32);
+    #if (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN))
+        WOLFSSL_API int wc_Ed448PublicKeyToDer(ed448_key*, byte* output,
+                                               word32 inLen, int with_AlgCurve);
+    #endif
+#endif
+
 /* DER encode signature */
 WOLFSSL_API word32 wc_EncodeSignature(byte* out, const byte* digest,
                                       word32 digSz, int hashOID);
@@ -457,10 +581,31 @@
 #endif
 
 
+#ifdef WOLFSSL_CERT_PIV
+
+typedef struct _wc_CertPIV {
+    const byte*  cert;
+    word32       certSz;
+    const byte*  certErrDet;
+    word32       certErrDetSz;
+    const byte*  nonce;         /* Identiv Only */
+    word32       nonceSz;       /* Identiv Only */
+    const byte*  signedNonce;   /* Identiv Only */
+    word32       signedNonceSz; /* Identiv Only */
+
+    /* flags */
+    word16       compression:2;
+    word16       isX509:1;
+    word16       isIdentiv:1;
+} wc_CertPIV;
+
+WOLFSSL_API int wc_ParseCertPIV(wc_CertPIV* cert, const byte* buf, word32 totalSz);
+#endif /* WOLFSSL_CERT_PIV */
+
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
 
 #endif /* WOLF_CRYPT_ASN_PUBLIC_H */
-
 
--- a/wolfssl/wolfcrypt/blake2-impl.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/blake2-impl.h	Thu Jun 04 23:57:22 2020 +0000
@@ -12,7 +12,7 @@
 */
 /* blake2-impl.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/blake2-int.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/blake2-int.h	Thu Jun 04 23:57:22 2020 +0000
@@ -12,7 +12,7 @@
 */
 /* blake2-int.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,7 +42,7 @@
 
 #if defined(_MSC_VER)
     #define ALIGN(x) __declspec(align(x))
-#elif defined(__GNUC__)
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
     #define ALIGN(x) __attribute__((aligned(x)))
 #else
     #define ALIGN(x)
@@ -87,13 +87,13 @@
     byte  personal[BLAKE2S_PERSONALBYTES];  /* 32 */
   } blake2s_param;
 
-  ALIGN( 64 ) typedef struct __blake2s_state
+  ALIGN( 32 ) typedef struct __blake2s_state
   {
     word32 h[8];
     word32 t[2];
     word32 f[2];
     byte  buf[2 * BLAKE2S_BLOCKBYTES];
-    word64 buflen;
+    word32 buflen;
     byte  last_node;
   } blake2s_state ;
 
@@ -127,7 +127,7 @@
     blake2s_state S[8][1];
     blake2s_state R[1];
     byte buf[8 * BLAKE2S_BLOCKBYTES];
-    word64 buflen;
+    word32 buflen;
   } blake2sp_state;
 
   typedef struct __blake2bp_state
@@ -143,7 +143,7 @@
   int blake2s_init( blake2s_state *S, const byte outlen );
   int blake2s_init_key( blake2s_state *S, const byte outlen, const void *key, const byte keylen );
   int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
-  int blake2s_update( blake2s_state *S, const byte *in, word64 inlen );
+  int blake2s_update( blake2s_state *S, const byte *in, word32 inlen );
   int blake2s_final( blake2s_state *S, byte *out, byte outlen );
 
   int blake2b_init( blake2b_state *S, const byte outlen );
@@ -154,7 +154,7 @@
 
   int blake2sp_init( blake2sp_state *S, const byte outlen );
   int blake2sp_init_key( blake2sp_state *S, const byte outlen, const void *key, const byte keylen );
-  int blake2sp_update( blake2sp_state *S, const byte *in, word64 inlen );
+  int blake2sp_update( blake2sp_state *S, const byte *in, word32 inlen );
   int blake2sp_final( blake2sp_state *S, byte *out, byte outlen );
 
   int blake2bp_init( blake2bp_state *S, const byte outlen );
@@ -163,10 +163,10 @@
   int blake2bp_final( blake2bp_state *S, byte *out, byte outlen );
 
   /* Simple API */
-  int blake2s( byte *out, const void *in, const void *key, const byte outlen, const word64 inlen, byte keylen );
+  int blake2s( byte *out, const void *in, const void *key, const byte outlen, const word32 inlen, byte keylen );
   int blake2b( byte *out, const void *in, const void *key, const byte outlen, const word64 inlen, byte keylen );
 
-  int blake2sp( byte *out, const void *in, const void *key, const byte outlen, const word64 inlen, byte keylen );
+  int blake2sp( byte *out, const void *in, const void *key, const byte outlen, const word32 inlen, byte keylen );
   int blake2bp( byte *out, const void *in, const void *key, const byte outlen, const word64 inlen, byte keylen );
 
   static WC_INLINE int blake2( byte *out, const void *in, const void *key, const byte outlen, const word64 inlen, byte keylen )
--- a/wolfssl/wolfcrypt/blake2.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/blake2.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* blake2.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -28,7 +28,7 @@
 
 #include <wolfssl/wolfcrypt/settings.h>
 
-#ifdef HAVE_BLAKE2
+#if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
 
 #include <wolfssl/wolfcrypt/blake2-int.h>
 
@@ -46,29 +46,52 @@
 
 /* in bytes, variable digest size up to 512 bits (64 bytes) */
 enum {
+#ifdef HAVE_BLAKE2B
     BLAKE2B_ID  = WC_HASH_TYPE_BLAKE2B,
-    BLAKE2B_256 = 32   /* 256 bit type, SSL default */
+    BLAKE2B_256 = 32,  /* 256 bit type, SSL default */
+#endif
+#ifdef HAVE_BLAKE2S
+    BLAKE2S_ID  = WC_HASH_TYPE_BLAKE2S,
+    BLAKE2S_256 = 32   /* 256 bit type */
+#endif
 };
 
 
+#ifdef HAVE_BLAKE2B
 /* BLAKE2b digest */
 typedef struct Blake2b {
     blake2b_state S[1];         /* our state */
     word32        digestSz;     /* digest size used on init */
 } Blake2b;
+#endif
+
+#ifdef HAVE_BLAKE2S
+/* BLAKE2s digest */
+typedef struct Blake2s {
+    blake2s_state S[1];         /* our state */
+    word32        digestSz;     /* digest size used on init */
+} Blake2s;
+#endif
 
 
+#ifdef HAVE_BLAKE2B
 WOLFSSL_API int wc_InitBlake2b(Blake2b*, word32);
 WOLFSSL_API int wc_Blake2bUpdate(Blake2b*, const byte*, word32);
 WOLFSSL_API int wc_Blake2bFinal(Blake2b*, byte*, word32);
+#endif
 
+#ifdef HAVE_BLAKE2S
+WOLFSSL_API int wc_InitBlake2s(Blake2s*, word32);
+WOLFSSL_API int wc_Blake2sUpdate(Blake2s*, const byte*, word32);
+WOLFSSL_API int wc_Blake2sFinal(Blake2s*, byte*, word32);
+#endif
 
 
 #ifdef __cplusplus
     }
 #endif
 
-#endif  /* HAVE_BLAKE2 */
+#endif  /* HAVE_BLAKE2 || HAVE_BLAKE2S */
 #endif  /* WOLF_CRYPT_BLAKE2_H */
 
 
--- a/wolfssl/wolfcrypt/camellia.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/camellia.h	Thu Jun 04 23:57:22 2020 +0000
@@ -27,7 +27,7 @@
 
 /* camellia.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/chacha.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/chacha.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* chacha.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -57,6 +57,7 @@
 
 typedef struct ChaCha {
     word32 X[CHACHA_CHUNK_WORDS];           /* state of cipher */
+    word32 left;                            /* number of bytes leftover */
 #ifdef HAVE_INTEL_AVX1
     /* vpshufd reads 16 bytes but we only use bottom 4. */
     byte extra[12];
--- a/wolfssl/wolfcrypt/chacha20_poly1305.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/chacha20_poly1305.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* chacha20_poly1305.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -33,6 +33,8 @@
 #define WOLF_CRYPT_CHACHA20_POLY1305_H
 
 #include <wolfssl/wolfcrypt/types.h>
+#include <wolfssl/wolfcrypt/chacha.h>
+#include <wolfssl/wolfcrypt/poly1305.h>
 
 #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
 
@@ -45,18 +47,40 @@
 #define CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE 16
 
 enum {
-    CHACHA20_POLY_1305_ENC_TYPE = 8    /* cipher unique type */
+    CHACHA20_POLY_1305_ENC_TYPE = 8,    /* cipher unique type */
+
+    /* AEAD Cipher Direction */
+    CHACHA20_POLY1305_AEAD_DECRYPT = 0,
+    CHACHA20_POLY1305_AEAD_ENCRYPT = 1,
+
+    /* AEAD State */
+    CHACHA20_POLY1305_STATE_INIT = 0,
+    CHACHA20_POLY1305_STATE_READY = 1,
+    CHACHA20_POLY1305_STATE_AAD = 2,
+    CHACHA20_POLY1305_STATE_DATA = 3,
 };
 
-    /*
-     * The IV for this implementation is 96 bits to give the most flexibility.
-     *
-     * Some protocols may have unique per-invocation inputs that are not
-     * 96-bit in length. For example, IPsec may specify a 64-bit nonce. In
-     * such a case, it is up to the protocol document to define how to
-     * transform the protocol nonce into a 96-bit nonce, for example by
-     * concatenating a constant value.
-     */
+typedef struct ChaChaPoly_Aead {
+    ChaCha   chacha;
+    Poly1305 poly;
+
+    word32   aadLen;
+    word32   dataLen;
+
+    byte     state;
+    byte     isEncrypt:1;
+} ChaChaPoly_Aead;
+
+
+/*
+ * The IV for this implementation is 96 bits to give the most flexibility.
+ *
+ * Some protocols may have unique per-invocation inputs that are not
+ * 96-bit in length. For example, IPsec may specify a 64-bit nonce. In
+ * such a case, it is up to the protocol document to define how to
+ * transform the protocol nonce into a 96-bit nonce, for example by
+ * concatenating a constant value.
+ */
 
 WOLFSSL_API
 int wc_ChaCha20Poly1305_Encrypt(
@@ -76,6 +100,27 @@
                 const byte inAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE],
                 byte* outPlaintext);
 
+WOLFSSL_API
+int wc_ChaCha20Poly1305_CheckTag(
+    const byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE],
+    const byte authTagChk[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]);
+
+
+
+/* Implementation of AEAD, which includes support for adding
+    data, then final calculation of authentication tag */
+WOLFSSL_API int wc_ChaCha20Poly1305_Init(ChaChaPoly_Aead* aead,
+    const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
+    const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
+    int isEncrypt);
+WOLFSSL_API int wc_ChaCha20Poly1305_UpdateAad(ChaChaPoly_Aead* aead,
+    const byte* inAAD, word32 inAADLen);
+WOLFSSL_API int wc_ChaCha20Poly1305_UpdateData(ChaChaPoly_Aead* aead,
+    const byte* inData, byte* outData, word32 dataLen);
+WOLFSSL_API int wc_ChaCha20Poly1305_Final(ChaChaPoly_Aead* aead,
+    byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]);
+
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/cmac.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/cmac.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* cmac.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -41,7 +41,11 @@
 #if !defined(HAVE_FIPS) || \
     (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
 
-typedef struct Cmac {
+#ifndef WC_CMAC_TYPE_DEFINED
+    typedef struct Cmac Cmac;
+    #define WC_CMAC_TYPE_DEFINED
+#endif
+struct Cmac {
     Aes aes;
     byte buffer[AES_BLOCK_SIZE]; /* partially stored block */
     byte digest[AES_BLOCK_SIZE]; /* running digest */
@@ -49,7 +53,8 @@
     byte k2[AES_BLOCK_SIZE];
     word32 bufferSz;
     word32 totalSz;
-} Cmac;
+};
+
 
 
 typedef enum CmacType {
--- a/wolfssl/wolfcrypt/coding.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/coding.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* coding.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/compress.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/compress.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* compress.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -38,10 +38,15 @@
 
 #define COMPRESS_FIXED 1
 
+#define LIBZ_WINBITS_GZIP 16
+
 
 WOLFSSL_API int wc_Compress(byte*, word32, const byte*, word32, word32);
+WOLFSSL_API int wc_Compress_ex(byte* out, word32 outSz, const byte* in,
+    word32 inSz, word32 flags, word32 windowBits);
 WOLFSSL_API int wc_DeCompress(byte*, word32, const byte*, word32);
-
+WOLFSSL_API int wc_DeCompress_ex(byte* out, word32 outSz, const byte* in,
+    word32 inSz, int windowBits);
 
 #ifdef __cplusplus
     } /* extern "C" */
--- a/wolfssl/wolfcrypt/cpuid.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/cpuid.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* cpuid.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/cryptocb.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,300 @@
+/* cryptocb.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _WOLF_CRYPTO_CB_H_
+#define _WOLF_CRYPTO_CB_H_
+
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/* Defines the Crypto Callback interface version, for compatibility */
+/* Increment this when Crypto Callback interface changes are made */
+#define CRYPTO_CB_VER   2
+
+
+#ifdef WOLF_CRYPTO_CB
+
+#ifndef NO_RSA
+    #include <wolfssl/wolfcrypt/rsa.h>
+#endif
+#ifdef HAVE_ECC
+    #include <wolfssl/wolfcrypt/ecc.h>
+#endif
+#ifndef NO_AES
+    #include <wolfssl/wolfcrypt/aes.h>
+#endif
+#ifndef NO_SHA
+    #include <wolfssl/wolfcrypt/sha.h>
+#endif
+#ifndef NO_SHA256
+    #include <wolfssl/wolfcrypt/sha256.h>
+#endif
+#ifndef NO_HMAC
+    #include <wolfssl/wolfcrypt/hmac.h>
+#endif
+#ifndef WC_NO_RNG
+    #include <wolfssl/wolfcrypt/random.h>
+#endif
+#ifndef NO_DES3
+    #include <wolfssl/wolfcrypt/des3.h>
+#endif
+
+
+/* Crypto Information Structure for callbacks */
+typedef struct wc_CryptoInfo {
+    int algo_type; /* enum wc_AlgoType */
+#if !defined(NO_RSA) || defined(HAVE_ECC)
+    struct {
+        int type; /* enum wc_PkType */
+        union {
+        #ifndef NO_RSA
+            struct {
+                const byte* in;
+                word32      inLen;
+                byte*       out;
+                word32*     outLen;
+                int         type;
+                RsaKey*     key;
+                WC_RNG*     rng;
+            } rsa;
+        #ifdef WOLFSSL_KEY_GEN
+            struct {
+                RsaKey* key;
+                int     size;
+                long    e;
+                WC_RNG* rng;
+            } rsakg;
+        #endif
+        #endif
+        #ifdef HAVE_ECC
+            struct {
+                WC_RNG*  rng;
+                int      size;
+                ecc_key* key;
+                int      curveId;
+            } eckg;
+            struct {
+                ecc_key* private_key;
+                ecc_key* public_key;
+                byte*    out;
+                word32*  outlen;
+            } ecdh;
+            struct {
+                const byte* in;
+                word32      inlen;
+                byte*       out;
+                word32*     outlen;
+                WC_RNG*     rng;
+                ecc_key*    key;
+            } eccsign;
+            struct {
+                const byte* sig;
+                word32      siglen;
+                const byte* hash;
+                word32      hashlen;
+                int*        res;
+                ecc_key*    key;
+            } eccverify;
+        #endif
+        };
+    } pk;
+#endif /* !NO_RSA || HAVE_ECC */
+#if !defined(NO_AES) || !defined(NO_DES3)
+    struct {
+        int type; /* enum wc_CipherType */
+        int enc;
+        union {
+        #ifdef HAVE_AESGCM
+            struct {
+                Aes*        aes;
+                byte*       out;
+                const byte* in;
+                word32      sz;
+                const byte* iv;
+                word32      ivSz;
+                byte*       authTag;
+                word32      authTagSz;
+                const byte* authIn;
+                word32      authInSz;
+            } aesgcm_enc;
+            struct {
+                Aes*        aes;
+                byte*       out;
+                const byte* in;
+                word32      sz;
+                const byte* iv;
+                word32      ivSz;
+                const byte* authTag;
+                word32      authTagSz;
+                const byte* authIn;
+                word32      authInSz;
+            } aesgcm_dec;
+        #endif /* HAVE_AESGCM */
+        #ifdef HAVE_AES_CBC
+            struct {
+                Aes*        aes;
+                byte*       out;
+                const byte* in;
+                word32      sz;
+            } aescbc;
+        #endif /* HAVE_AES_CBC */
+        #ifndef NO_DES3
+            struct {
+                Des3*       des;
+                byte*       out;
+                const byte* in;
+                word32      sz;
+            } des3;
+        #endif
+        };
+    } cipher;
+#endif /* !NO_AES || !NO_DES3 */
+#if !defined(NO_SHA) || !defined(NO_SHA256)
+    struct {
+        int type; /* enum wc_HashType */
+        const byte* in;
+        word32 inSz;
+        byte* digest;
+        union {
+        #ifndef NO_SHA
+            wc_Sha* sha1;
+        #endif
+        #ifndef NO_SHA256
+            wc_Sha256* sha256;
+        #endif
+        };
+    } hash;
+#endif /* !NO_SHA || !NO_SHA256 */
+#ifndef NO_HMAC
+    struct {
+        int macType; /* enum wc_HashType */
+        const byte* in;
+        word32 inSz;
+        byte* digest;
+        Hmac* hmac;
+    } hmac;
+#endif
+#ifndef WC_NO_RNG
+    struct {
+        WC_RNG* rng;
+        byte* out;
+        word32 sz;
+    } rng;
+    struct {
+        OS_Seed* os;
+        byte* seed;
+        word32 sz;
+    } seed;
+#endif
+} wc_CryptoInfo;
+
+
+typedef int (*CryptoDevCallbackFunc)(int devId, wc_CryptoInfo* info, void* ctx);
+
+WOLFSSL_LOCAL void wc_CryptoCb_Init(void);
+
+WOLFSSL_API int  wc_CryptoCb_RegisterDevice(int devId, CryptoDevCallbackFunc cb, void* ctx);
+WOLFSSL_API void wc_CryptoCb_UnRegisterDevice(int devId);
+
+/* old function names */
+#define wc_CryptoDev_RegisterDevice   wc_CryptoCb_RegisterDevice
+#define wc_CryptoDev_UnRegisterDevice wc_CryptoCb_UnRegisterDevice
+
+
+#ifndef NO_RSA
+WOLFSSL_LOCAL int wc_CryptoCb_Rsa(const byte* in, word32 inLen, byte* out,
+    word32* outLen, int type, RsaKey* key, WC_RNG* rng);
+
+#ifdef WOLFSSL_KEY_GEN
+WOLFSSL_LOCAL int wc_CryptoCb_MakeRsaKey(RsaKey* key, int size, long e,
+    WC_RNG* rng);
+#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_RSA */
+
+#ifdef HAVE_ECC
+WOLFSSL_LOCAL int wc_CryptoCb_MakeEccKey(WC_RNG* rng, int keySize,
+    ecc_key* key, int curveId);
+
+WOLFSSL_LOCAL int wc_CryptoCb_Ecdh(ecc_key* private_key, ecc_key* public_key,
+    byte* out, word32* outlen);
+
+WOLFSSL_LOCAL int wc_CryptoCb_EccSign(const byte* in, word32 inlen, byte* out,
+    word32 *outlen, WC_RNG* rng, ecc_key* key);
+
+WOLFSSL_LOCAL int wc_CryptoCb_EccVerify(const byte* sig, word32 siglen,
+    const byte* hash, word32 hashlen, int* res, ecc_key* key);
+#endif /* HAVE_ECC */
+
+#ifndef NO_AES
+#ifdef HAVE_AESGCM
+WOLFSSL_LOCAL int wc_CryptoCb_AesGcmEncrypt(Aes* aes, byte* out,
+     const byte* in, word32 sz, const byte* iv, word32 ivSz,
+     byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz);
+
+WOLFSSL_LOCAL int wc_CryptoCb_AesGcmDecrypt(Aes* aes, byte* out,
+     const byte* in, word32 sz, const byte* iv, word32 ivSz,
+     const byte* authTag, word32 authTagSz,
+     const byte* authIn, word32 authInSz);
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AES_CBC
+WOLFSSL_LOCAL int wc_CryptoCb_AesCbcEncrypt(Aes* aes, byte* out,
+                               const byte* in, word32 sz);
+WOLFSSL_LOCAL int wc_CryptoCb_AesCbcDecrypt(Aes* aes, byte* out,
+                               const byte* in, word32 sz);
+#endif /* HAVE_AES_CBC */
+#endif /* !NO_AES */
+
+#ifndef NO_DES3
+WOLFSSL_LOCAL int wc_CryptoCb_Des3Encrypt(Des3* des3, byte* out,
+                               const byte* in, word32 sz);
+WOLFSSL_LOCAL int wc_CryptoCb_Des3Decrypt(Des3* des3, byte* out,
+                               const byte* in, word32 sz);
+#endif /* !NO_DES3 */
+
+#ifndef NO_SHA
+WOLFSSL_LOCAL int wc_CryptoCb_ShaHash(wc_Sha* sha, const byte* in,
+    word32 inSz, byte* digest);
+#endif /* !NO_SHA */
+
+#ifndef NO_SHA256
+WOLFSSL_LOCAL int wc_CryptoCb_Sha256Hash(wc_Sha256* sha256, const byte* in,
+    word32 inSz, byte* digest);
+#endif /* !NO_SHA256 */
+#ifndef NO_HMAC
+WOLFSSL_LOCAL int wc_CryptoCb_Hmac(Hmac* hmac, int macType, const byte* in,
+    word32 inSz, byte* digest);
+#endif /* !NO_HMAC */
+
+#ifndef WC_NO_RNG
+WOLFSSL_LOCAL int wc_CryptoCb_RandomBlock(WC_RNG* rng, byte* out, word32 sz);
+WOLFSSL_LOCAL int wc_CryptoCb_RandomSeed(OS_Seed* os, byte* seed, word32 sz);
+#endif
+
+#endif /* WOLF_CRYPTO_CB */
+
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+
+#endif /* _WOLF_CRYPTO_CB_H_ */
+
--- a/wolfssl/wolfcrypt/cryptodev.h	Sat Aug 18 22:20:43 2018 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-/* cryptodev.h
- *
- * Copyright (C) 2006-2018 wolfSSL Inc.
- *
- * This file is part of wolfSSL.
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _WOLF_CRYPTO_DEV_H_
-#define _WOLF_CRYPTO_DEV_H_
-
-#include <wolfssl/wolfcrypt/types.h>
-
-#ifdef __cplusplus
-    extern "C" {
-#endif
-
-#ifdef WOLF_CRYPTO_DEV
-
-#ifndef NO_RSA
-    #include <wolfssl/wolfcrypt/rsa.h>
-#endif
-#ifdef HAVE_ECC
-    #include <wolfssl/wolfcrypt/ecc.h>
-#endif
-
-/* Crypto Information Structure for callbacks */
-typedef struct wc_CryptoInfo {
-    int algo_type; /* enum wc_AlgoType */
-    struct {
-        int type; /* enum wc_PkType */
-        union {
-        #ifndef NO_RSA
-            struct {
-                const byte* in;
-                word32 inLen;
-                byte* out;
-                word32* outLen;
-                int type;
-                RsaKey* key;
-                WC_RNG* rng;
-            } rsa;
-        #endif
-        #ifdef HAVE_ECC
-            struct {
-                ecc_key* private_key;
-                ecc_key* public_key;
-                byte* out;
-                word32* outlen;
-            } ecdh;
-            struct {
-                const byte* in;
-                word32 inlen;
-                byte* out;
-                word32 *outlen;
-                WC_RNG* rng;
-                ecc_key* key;
-            } eccsign;
-            struct {
-                const byte* sig;
-                word32 siglen;
-                const byte* hash;
-                word32 hashlen;
-                int* res;
-                ecc_key* key;
-            } eccverify;
-        #endif
-        };
-    } pk;
-} wc_CryptoInfo;
-
-typedef int (*CryptoDevCallbackFunc)(int devId, wc_CryptoInfo* info, void* ctx);
-
-WOLFSSL_LOCAL void wc_CryptoDev_Init(void);
-
-WOLFSSL_API int  wc_CryptoDev_RegisterDevice(int devId, CryptoDevCallbackFunc cb, void* ctx);
-WOLFSSL_API void wc_CryptoDev_UnRegisterDevice(int devId);
-
-
-#ifndef NO_RSA
-WOLFSSL_LOCAL int wc_CryptoDev_Rsa(const byte* in, word32 inLen, byte* out,
-    word32* outLen, int type, RsaKey* key, WC_RNG* rng);
-#endif /* !NO_RSA */
-
-#ifdef HAVE_ECC
-WOLFSSL_LOCAL int wc_CryptoDev_Ecdh(ecc_key* private_key, ecc_key* public_key,
-    byte* out, word32* outlen);
-
-WOLFSSL_LOCAL int wc_CryptoDev_EccSign(const byte* in, word32 inlen, byte* out,
-    word32 *outlen, WC_RNG* rng, ecc_key* key);
-
-WOLFSSL_LOCAL int wc_CryptoDev_EccVerify(const byte* sig, word32 siglen,
-    const byte* hash, word32 hashlen, int* res, ecc_key* key);
-#endif /* HAVE_ECC */
-
-#endif /* WOLF_CRYPTO_DEV */
-
-#ifdef __cplusplus
-    } /* extern "C" */
-#endif
-
-#endif /* _WOLF_CRYPTO_DEV_H_ */
-
--- a/wolfssl/wolfcrypt/curve25519.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/curve25519.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* curve25519.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -44,15 +44,21 @@
 
 #define CURVE25519_KEYSIZE 32
 
+#ifdef WOLFSSL_NAMES_STATIC
+typedef char curve25519_str[12];
+#else
+typedef const char* curve25519_str;
+#endif
+
 /* curve25519 set type */
 typedef struct {
-    int size;       /* The size of the curve in octets */
-    const char* name;     /* name of this curve */
+    int size;             /* The size of the curve in octets */
+    curve25519_str name; /* name of this curve */
 } curve25519_set_type;
 
 
 /* ECC point, the internal structure is Little endian
- * the mathematical functions used the endianess */
+ * the mathematical functions used the endianness */
 typedef struct {
     byte point[CURVE25519_KEYSIZE];
     #ifdef FREESCALE_LTC_ECC
@@ -128,6 +134,8 @@
 WOLFSSL_API
 int wc_curve25519_import_public_ex(const byte* in, word32 inLen,
                                    curve25519_key* key, int endian);
+WOLFSSL_API
+int wc_curve25519_check_public(const byte* pub, word32 pubSz, int endian);
 
 WOLFSSL_API
 int wc_curve25519_export_public(curve25519_key* key, byte* out, word32* outLen);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/curve448.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,140 @@
+/* curve448.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implemented to: RFC 7748 */
+
+
+#ifndef WOLF_CRYPT_CURVE448_H
+#define WOLF_CRYPT_CURVE448_H
+
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef HAVE_CURVE448
+
+#include <wolfssl/wolfcrypt/fe_448.h>
+#include <wolfssl/wolfcrypt/random.h>
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include <wolfssl/wolfcrypt/async.h>
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#define CURVE448_KEY_SIZE        56
+#define CURVE448_PUB_KEY_SIZE    56
+
+
+/* A CURVE448 Key */
+typedef struct curve448_key {
+    byte p[CURVE448_PUB_KEY_SIZE];  /* public key  */
+    byte k[CURVE448_KEY_SIZE];      /* private key */
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif
+} curve448_key;
+
+enum {
+    EC448_LITTLE_ENDIAN = 0,
+    EC448_BIG_ENDIAN    = 1
+};
+
+WOLFSSL_API
+int wc_curve448_make_key(WC_RNG* rng, int keysize, curve448_key* key);
+
+WOLFSSL_API
+int wc_curve448_shared_secret(curve448_key* private_key,
+                              curve448_key* public_key,
+                              byte* out, word32* outlen);
+
+WOLFSSL_API
+int wc_curve448_shared_secret_ex(curve448_key* private_key,
+                                 curve448_key* public_key,
+                                 byte* out, word32* outlen, int endian);
+
+WOLFSSL_API
+int wc_curve448_init(curve448_key* key);
+
+WOLFSSL_API
+void wc_curve448_free(curve448_key* key);
+
+
+/* raw key helpers */
+WOLFSSL_API
+int wc_curve448_import_private(const byte* priv, word32 privSz,
+                               curve448_key* key);
+WOLFSSL_API
+int wc_curve448_import_private_ex(const byte* priv, word32 privSz,
+                                  curve448_key* key, int endian);
+
+WOLFSSL_API
+int wc_curve448_import_private_raw(const byte* priv, word32 privSz,
+                                   const byte* pub, word32 pubSz,
+                                   curve448_key* key);
+WOLFSSL_API
+int wc_curve448_import_private_raw_ex(const byte* priv, word32 privSz,
+                                      const byte* pub, word32 pubSz,
+                                      curve448_key* key, int endian);
+WOLFSSL_API
+int wc_curve448_export_private_raw(curve448_key* key, byte* out,
+                                   word32* outLen);
+WOLFSSL_API
+int wc_curve448_export_private_raw_ex(curve448_key* key, byte* out,
+                                      word32* outLen, int endian);
+
+WOLFSSL_API
+int wc_curve448_import_public(const byte* in, word32 inLen,
+                              curve448_key* key);
+WOLFSSL_API
+int wc_curve448_import_public_ex(const byte* in, word32 inLen,
+                                 curve448_key* key, int endian);
+WOLFSSL_API
+int wc_curve448_check_public(const byte* pub, word32 pubSz, int endian);
+
+WOLFSSL_API
+int wc_curve448_export_public(curve448_key* key, byte* out, word32* outLen);
+WOLFSSL_API
+int wc_curve448_export_public_ex(curve448_key* key, byte* out,
+                                 word32* outLen, int endian);
+
+WOLFSSL_API
+int wc_curve448_export_key_raw(curve448_key* key,
+                               byte* priv, word32 *privSz,
+                               byte* pub, word32 *pubSz);
+WOLFSSL_API
+int wc_curve448_export_key_raw_ex(curve448_key* key,
+                                  byte* priv, word32 *privSz,
+                                  byte* pub, word32 *pubSz,
+                                  int endian);
+/* size helper */
+WOLFSSL_API
+int wc_curve448_size(curve448_key* key);
+
+#ifdef __cplusplus
+    }    /* extern "C" */
+#endif
+
+#endif /* HAVE_CURVE448 */
+#endif /* WOLF_CRYPT_CURVE448_H */
+
+
--- a/wolfssl/wolfcrypt/des3.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/des3.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* des3.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -49,7 +49,7 @@
 enum {
     DES_KEY_SIZE        =  8,  /* des                     */
     DES3_KEY_SIZE       = 24,  /* 3 des ede               */
-    DES_IV_SIZE         = 16,
+    DES_IV_SIZE         =  8,  /* should be the same as DES_BLOCK_SIZE */
 };
 
 
@@ -95,17 +95,28 @@
 
 
 /* DES3 encryption and decryption */
-typedef struct Des3 {
+struct Des3 {
     word32 key[3][DES_KS_SIZE];
     word32 reg[DES_BLOCK_SIZE / sizeof(word32)];      /* for CBC mode */
     word32 tmp[DES_BLOCK_SIZE / sizeof(word32)];      /* same         */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    const byte* key_raw;
-    const byte* iv_raw;
     WC_ASYNC_DEV asyncDev;
 #endif
+#if defined(WOLF_CRYPTO_CB) || \
+    (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES))
+    word32 devKey[DES3_KEYLEN/sizeof(word32)]; /* raw key */
+#endif
+#ifdef WOLF_CRYPTO_CB
+    int    devId;
+    void*  devCtx;
+#endif
     void* heap;
-} Des3;
+};
+
+#ifndef WC_DES3_TYPE_DEFINED
+    typedef struct Des3 Des3;
+    #define WC_DES3_TYPE_DEFINED
+#endif
 #endif /* HAVE_FIPS */
 
 
--- a/wolfssl/wolfcrypt/dh.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/dh.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* dh.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -57,14 +57,22 @@
 } DhParams;
 
 /* Diffie-Hellman Key */
-typedef struct DhKey {
+struct DhKey {
     mp_int p, g, q;                         /* group parameters  */
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) || defined(WOLFSSL_OPENSSH)
+    mp_int pub;
+    mp_int priv;
+#endif
     void* heap;
 #ifdef WOLFSSL_ASYNC_CRYPT
     WC_ASYNC_DEV asyncDev;
 #endif
-} DhKey;
+};
 
+#ifndef WC_DH_TYPE_DEFINED
+    typedef struct DhKey DhKey;
+    #define WC_DH_TYPE_DEFINED
+#endif
 
 #ifdef HAVE_FFDHE_2048
 WOLFSSL_API const DhParams* wc_Dh_ffdhe2048_Get(void);
@@ -98,11 +106,20 @@
                         word32 gSz);
 WOLFSSL_API int wc_DhSetKey_ex(DhKey* key, const byte* p, word32 pSz,
                         const byte* g, word32 gSz, const byte* q, word32 qSz);
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+WOLFSSL_LOCAL int wc_DhSetFullKeys(DhKey* key,const byte* priv_key,word32 privSz,
+                                   const byte* pub_key, word32 pubSz);
+#endif
+WOLFSSL_API int wc_DhSetCheckKey(DhKey* key, const byte* p, word32 pSz,
+                        const byte* g, word32 gSz, const byte* q, word32 qSz,
+                        int trusted, WC_RNG* rng);
 WOLFSSL_API int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p,
                             word32* pInOutSz, byte* g, word32* gInOutSz);
 WOLFSSL_API int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz);
 WOLFSSL_API int wc_DhCheckPubKey_ex(DhKey* key, const byte* pub, word32 pubSz,
                             const byte* prime, word32 primeSz);
+WOLFSSL_API int wc_DhCheckPubValue(const byte* prime, word32 primeSz,
+                                   const byte* pub, word32 pubSz);
 WOLFSSL_API int wc_DhCheckPrivKey(DhKey* key, const byte* priv, word32 pubSz);
 WOLFSSL_API int wc_DhCheckPrivKey_ex(DhKey* key, const byte* priv, word32 pubSz,
                             const byte* prime, word32 primeSz);
--- a/wolfssl/wolfcrypt/dsa.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/dsa.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* dsa.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -52,6 +52,11 @@
     DSA_PRIVATE  = 1
 };
 
+enum {
+    DSA_HALF_SIZE = 20,   /* r and s size  */
+    DSA_SIG_SIZE  = 40    /* signature size */
+};
+
 /* DSA */
 typedef struct DsaKey {
     mp_int p, q, g, y, x;
@@ -71,6 +76,9 @@
 WOLFSSL_API int wc_DsaPrivateKeyDecode(const byte* input, word32* inOutIdx,
                                        DsaKey*, word32);
 WOLFSSL_API int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen);
+WOLFSSL_API int wc_SetDsaPublicKey(byte* output, DsaKey* key,
+                                   int outLen, int with_header);
+WOLFSSL_API int wc_DsaKeyToPublicDer(DsaKey* key, byte* output, word32 inLen);
 
 #ifdef WOLFSSL_KEY_GEN
 WOLFSSL_API int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa);
@@ -80,6 +88,9 @@
 /* raw export functions */
 WOLFSSL_API int wc_DsaImportParamsRaw(DsaKey* dsa, const char* p,
                                       const char* q, const char* g);
+WOLFSSL_API int wc_DsaImportParamsRawCheck(DsaKey* dsa, const char* p,
+                                      const char* q, const char* g,
+                                      int trusted, WC_RNG* rng);
 WOLFSSL_API int wc_DsaExportParamsRaw(DsaKey* dsa, byte* p, word32* pSz,
                                       byte* q, word32* qSz, byte* g,
                                       word32* gSz);
--- a/wolfssl/wolfcrypt/ecc.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/ecc.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ecc.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -31,7 +31,8 @@
 
 #ifdef HAVE_ECC
 
-#if defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+#if defined(HAVE_FIPS) && \
+    defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
     #include <wolfssl/wolfcrypt/fips.h>
 #endif /* HAVE_FIPS_VERSION >= 2 */
 
@@ -53,6 +54,9 @@
     #include <wolfssl/wolfcrypt/port/atmel/atmel.h>
 #endif /* WOLFSSL_ATECC508A */
 
+#if defined(WOLFSSL_CRYPTOCELL)
+    #include <wolfssl/wolfcrypt/port/arm/cryptoCell.h>
+#endif
 
 #ifdef __cplusplus
     extern "C" {
@@ -104,18 +108,21 @@
     #define MAX_ECC_BYTES     ((MAX_ECC_BITS / 8) + 1)
 #endif
 
+#ifndef ECC_MAX_PAD_SZ
+    /* ECC maximum padding size (when MSB is set extra byte required for R and S) */
+    #define ECC_MAX_PAD_SZ 2
+#endif
 
 enum {
     ECC_PUBLICKEY       = 1,
     ECC_PRIVATEKEY      = 2,
     ECC_PRIVATEKEY_ONLY = 3,
     ECC_MAXNAME     = 16,   /* MAX CURVE NAME LENGTH */
-    SIG_HEADER_SZ   =  6,   /* ECC signature header size */
+    SIG_HEADER_SZ   =  7,   /* ECC signature header size (30 81 87 02 42 [R] 02 42 [S]) */
     ECC_BUFSIZE     = 256,  /* for exported keys temp buffer */
     ECC_MINSIZE     = 20,   /* MIN Private Key size */
     ECC_MAXSIZE     = 66,   /* MAX Private Key size */
     ECC_MAXSIZE_GEN = 74,   /* MAX Buffer size required when generating ECC keys*/
-    ECC_MAX_PAD_SZ  = 4,    /* ECC maximum padding size */
     ECC_MAX_OID_LEN = 16,
     ECC_MAX_SIG_SIZE= ((MAX_ECC_BYTES * 2) + ECC_MAX_PAD_SZ + SIG_HEADER_SZ),
 
@@ -125,12 +132,13 @@
     ECC_MAX_CRYPTO_HW_PUBKEY_SIZE = (ATECC_KEY_SIZE*2),
 #elif defined(PLUTON_CRYPTO_ECC)
     ECC_MAX_CRYPTO_HW_SIZE = 32,
+#elif defined(WOLFSSL_CRYPTOCELL)
+    #ifndef CRYPTOCELL_KEY_SIZE
+        CRYPTOCELL_KEY_SIZE = ECC_MAXSIZE,
+    #endif
+    ECC_MAX_CRYPTO_HW_SIZE = CRYPTOCELL_KEY_SIZE,
 #endif
 
-    /* point encoding type */
-    ECC_TYPE_HEX_STR = 1,
-    ECC_TYPE_UNSIGNED_BIN = 2,
-
     /* point compression type */
     ECC_POINT_COMP_EVEN = 0x02,
     ECC_POINT_COMP_ODD = 0x03,
@@ -138,6 +146,10 @@
 
     /* Shamir's dual add constants */
     SHAMIR_PRECOMP_SZ = 16,
+
+#ifdef HAVE_PKCS11
+    ECC_MAX_ID_LEN    = 32,
+#endif
 };
 
 /* Curve Types */
@@ -184,7 +196,7 @@
 #ifdef HAVE_CURVE25519
     ECC_X25519,
 #endif
-#ifdef HAVE_X448
+#ifdef HAVE_CURVE448
     ECC_X448,
 #endif
 
@@ -202,8 +214,17 @@
         if any element > 127 then MSB 0x80 indicates additional byte */
 #endif
 
+
+#if !defined(WOLFSSL_ECC_CURVE_STATIC) && defined(USE_WINDOWS_API)
+    /* MSC does something different with the pointers to the arrays than GCC,
+     * and it causes the FIPS checksum to fail. In the case of windows builds,
+     * store everything as arrays instead of pointers to strings. */
+
+    #define WOLFSSL_ECC_CURVE_STATIC
+#endif
+
 /* ECC set type defined a GF(p) curve */
-#ifndef USE_WINDOWS_API
+#ifndef WOLFSSL_ECC_CURVE_STATIC
 typedef struct ecc_set_type {
     int size;             /* The size of the curve in octets */
     int id;               /* id of this curve */
@@ -220,10 +241,6 @@
     int         cofactor;
 } ecc_set_type;
 #else
-/* MSC does something different with the pointers to the arrays than GCC,
- * and it causes the FIPS checksum to fail. In the case of windows builds,
- * store everything as arrays instead of pointers to strings. */
-
 #define MAX_ECC_NAME 16
 #define MAX_ECC_STRING ((MAX_ECC_BYTES * 2) + 1)
     /* The values are stored as text strings. */
@@ -300,7 +317,7 @@
  * fp_digit array will be shorter. */
 typedef struct alt_fp_int {
     int used, sign, size;
-    fp_digit dp[FP_SIZE_ECC];
+    mp_digit dp[FP_SIZE_ECC];
 } alt_fp_int;
 #endif /* ALT_ECC_SIZE */
 
@@ -310,7 +327,7 @@
 #endif
 
 
-/* A point on an ECC curve, stored in Jacbobian format such that (x,y,z) =>
+/* A point on an ECC curve, stored in Jacobian format such that (x,y,z) =>
    (x/z^2, y/z^3, 1) when interpreted as affine */
 typedef struct {
 #ifndef ALT_ECC_SIZE
@@ -356,7 +373,7 @@
     int  slot;        /* Key Slot Number (-1 unknown) */
     byte pubkey_raw[ECC_MAX_CRYPTO_HW_PUBKEY_SIZE];
 #endif
-#if defined(PLUTON_CRYPTO_ECC) || defined(WOLF_CRYPTO_DEV)
+#if defined(PLUTON_CRYPTO_ECC) || defined(WOLF_CRYPTO_CB)
     int devId;
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
@@ -371,6 +388,18 @@
         CertSignCtx certSignCtx; /* context info for cert sign (MakeSignature) */
     #endif
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#ifdef HAVE_PKCS11
+    byte id[ECC_MAX_ID_LEN];
+    int  idLen;
+#endif
+#if defined(WOLFSSL_CRYPTOCELL)
+    ecc_context_t ctx;
+#endif
+
+#ifdef WOLFSSL_ECDSA_SET_K
+    mp_int *sign_k;
+#endif
+
 #ifdef WOLFSSL_SMALL_STACK_CACHE
     mp_int* t1;
     mp_int* t2;
@@ -380,11 +409,20 @@
     mp_int* z;
 #endif
 #endif
+
+#ifdef WOLFSSL_DSP
+    remote_handle64 handle;
+#endif
 };
 
 
+WOLFSSL_ABI WOLFSSL_API ecc_key* wc_ecc_key_new(void*);
+WOLFSSL_ABI WOLFSSL_API void wc_ecc_key_free(ecc_key*);
+
+
 /* ECC predefined curve sets  */
 extern const ecc_set_type ecc_sets[];
+extern const size_t ecc_sets_count;
 
 WOLFSSL_API
 const char* wc_ecc_get_name(int curve_id);
@@ -402,6 +440,7 @@
                 ecc_point* C, mp_int* a, mp_int* modulus, void* heap);
 
 ECC_API int ecc_map(ecc_point*, mp_int*, mp_digit);
+ECC_API int ecc_map_ex(ecc_point*, mp_int*, mp_digit, int ct);
 ECC_API int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
                                      mp_int* a, mp_int* modulus, mp_digit mp);
 ECC_API int ecc_projective_dbl_point(ecc_point* P, ecc_point* R, mp_int* a,
@@ -411,15 +450,16 @@
 
 WOLFSSL_API
 int wc_ecc_make_key(WC_RNG* rng, int keysize, ecc_key* key);
-WOLFSSL_API
-int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key,
-    int curve_id);
+WOLFSSL_ABI WOLFSSL_API
+int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id);
 WOLFSSL_API
 int wc_ecc_make_pub(ecc_key* key, ecc_point* pubOut);
 WOLFSSL_API
 int wc_ecc_check_key(ecc_key* key);
 WOLFSSL_API
 int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime);
+WOLFSSL_API
+int wc_ecc_get_generator(ecc_point* ecp, int curve_idx);
 
 #ifdef HAVE_ECC_DHE
 WOLFSSL_API
@@ -431,16 +471,26 @@
 WOLFSSL_API
 int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point,
                              byte* out, word32 *outlen);
+
+#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || defined(WOLFSSL_CRYPTOCELL)
+#define wc_ecc_shared_secret_ssh wc_ecc_shared_secret
+#else
 #define wc_ecc_shared_secret_ssh wc_ecc_shared_secret_ex /* For backwards compat */
+#endif
+
 #endif /* HAVE_ECC_DHE */
 
 #ifdef HAVE_ECC_SIGN
-WOLFSSL_API
+WOLFSSL_ABI WOLFSSL_API
 int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
                      WC_RNG* rng, ecc_key* key);
 WOLFSSL_API
 int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
                         ecc_key* key, mp_int *r, mp_int *s);
+#ifdef WOLFSSL_ECDSA_SET_K
+WOLFSSL_API
+int wc_ecc_sign_set_k(const byte* k, word32 klen, ecc_key* key);
+#endif
 #endif /* HAVE_ECC_SIGN */
 
 #ifdef HAVE_ECC_VERIFY
@@ -454,13 +504,18 @@
 
 WOLFSSL_API
 int wc_ecc_init(ecc_key* key);
+WOLFSSL_ABI WOLFSSL_API
+int wc_ecc_init_ex(ecc_key* key, void* heap, int devId);
+#ifdef HAVE_PKCS11
 WOLFSSL_API
-int wc_ecc_init_ex(ecc_key* key, void* heap, int devId);
+int wc_ecc_init_id(ecc_key* key, unsigned char* id, int len, void* heap,
+                   int devId);
+#endif
 #ifdef WOLFSSL_CUSTOM_CURVES
 WOLFSSL_LOCAL
 void wc_ecc_free_curve(const ecc_set_type* curve, void* heap);
 #endif
-WOLFSSL_API
+WOLFSSL_ABI WOLFSSL_API
 int wc_ecc_free(ecc_key* key);
 WOLFSSL_API
 int wc_ecc_set_flags(ecc_key* key, word32 flags);
@@ -491,7 +546,13 @@
         const byte* prime, word32 primeSz, const byte* Af, word32 AfSz,
         const byte* Bf, word32 BfSz, const byte* order, word32 orderSz,
         const byte* Gx, word32 GxSz, const byte* Gy, word32 GySz, int cofactor);
+WOLFSSL_API
+int wc_ecc_get_curve_id_from_dp_params(const ecc_set_type* dp);
 
+WOLFSSL_API
+int wc_ecc_get_curve_id_from_oid(const byte* oid, word32 len);
+
+WOLFSSL_API const ecc_set_type* wc_ecc_get_curve_params(int curve_idx);
 
 WOLFSSL_API
 ecc_point* wc_ecc_new_point(void);
@@ -528,7 +589,7 @@
 #endif /* HAVE_ECC_KEY_EXPORT */
 
 #ifdef HAVE_ECC_KEY_IMPORT
-WOLFSSL_API
+WOLFSSL_ABI WOLFSSL_API
 int wc_ecc_import_x963(const byte* in, word32 inLen, ecc_key* key);
 WOLFSSL_API
 int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key,
@@ -560,25 +621,37 @@
 
 #ifdef HAVE_ECC_KEY_EXPORT
 WOLFSSL_API
+int wc_ecc_export_ex(ecc_key* key, byte* qx, word32* qxLen,
+                     byte* qy, word32* qyLen, byte* d, word32* dLen,
+                     int encType);
+WOLFSSL_API
 int wc_ecc_export_private_only(ecc_key* key, byte* out, word32* outLen);
 WOLFSSL_API
 int wc_ecc_export_public_raw(ecc_key* key, byte* qx, word32* qxLen,
                              byte* qy, word32* qyLen);
 WOLFSSL_API
 int wc_ecc_export_private_raw(ecc_key* key, byte* qx, word32* qxLen,
-                            byte* qy, word32* qyLen, byte* d, word32* dLen);
+                              byte* qy, word32* qyLen, byte* d, word32* dLen);
 #endif /* HAVE_ECC_KEY_EXPORT */
 
 #ifdef HAVE_ECC_KEY_EXPORT
-
+WOLFSSL_API
+int wc_ecc_export_point_der_ex(const int curve_idx, ecc_point* point, byte* out,
+                               word32* outLen, int compressed);
 WOLFSSL_API
 int wc_ecc_export_point_der(const int curve_idx, ecc_point* point,
                             byte* out, word32* outLen);
+WOLFSSL_LOCAL
+int wc_ecc_export_point_der_compressed(const int curve_idx, ecc_point* point,
+                                       byte* out, word32* outLen);
 #endif /* HAVE_ECC_KEY_EXPORT */
 
 
 #ifdef HAVE_ECC_KEY_IMPORT
 WOLFSSL_API
+int wc_ecc_import_point_der_ex(byte* in, word32 inLen, const int curve_idx,
+                               ecc_point* point, int shortKeySize);
+WOLFSSL_API
 int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
                             ecc_point* point);
 #endif /* HAVE_ECC_KEY_IMPORT */
@@ -670,6 +743,16 @@
 WOLFSSL_API void wc_ecc_curve_cache_free(void);
 #endif
 
+WOLFSSL_API
+int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order);
+
+#ifdef WOLFSSL_DSP
+WOLFSSL_API
+int wc_ecc_set_handle(ecc_key* key, remote_handle64 handle);
+WOLFSSL_LOCAL
+int sp_dsp_ecc_verify_256(remote_handle64 handle, const byte* hash, word32 hashLen, mp_int* pX,
+    mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap);
+#endif
 
 #ifdef __cplusplus
     }    /* extern "C" */
--- a/wolfssl/wolfcrypt/ed25519.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/ed25519.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ed25519.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -63,6 +63,12 @@
 #define ED25519_PRV_KEY_SIZE (ED25519_PUB_KEY_SIZE+ED25519_KEY_SIZE)
 
 
+enum {
+    Ed25519    = -1,
+    Ed25519ctx = 0,
+    Ed25519ph  = 1,
+};
+
 #ifndef WC_ED25519KEY_TYPE_DEFINED
     typedef struct ed25519_key ed25519_key;
     #define WC_ED25519KEY_TYPE_DEFINED
@@ -77,7 +83,7 @@
     byte pointX[ED25519_KEY_SIZE]; /* recovered X coordinate */
     byte pointY[ED25519_KEY_SIZE]; /* Y coordinate is the public key with The most significant bit of the final octet always zero. */
 #endif
-    int pubKeySet:1;
+    word16 pubKeySet:1;
 #ifdef WOLFSSL_ASYNC_CRYPT
     WC_ASYNC_DEV asyncDev;
 #endif
@@ -85,13 +91,40 @@
 
 
 WOLFSSL_API
+int wc_ed25519_make_public(ed25519_key* key, unsigned char* pubKey,
+                           word32 pubKeySz);
+WOLFSSL_API
 int wc_ed25519_make_key(WC_RNG* rng, int keysize, ed25519_key* key);
 WOLFSSL_API
-int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out,
-                        word32 *outlen, ed25519_key* key);
+int wc_ed25519_sign_msg(const byte* in, word32 inLen, byte* out,
+                        word32 *outLen, ed25519_key* key);
+WOLFSSL_API
+int wc_ed25519ctx_sign_msg(const byte* in, word32 inLen, byte* out,
+                           word32 *outLen, ed25519_key* key,
+                           const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed25519ph_sign_hash(const byte* hash, word32 hashLen, byte* out,
+                           word32 *outLen, ed25519_key* key,
+                           const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed25519ph_sign_msg(const byte* in, word32 inLen, byte* out,
+                          word32 *outLen, ed25519_key* key, const byte* context,
+                          byte contextLen);
 WOLFSSL_API
-int wc_ed25519_verify_msg(const byte* sig, word32 siglen, const byte* msg,
-                          word32 msglen, int* stat, ed25519_key* key);
+int wc_ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                          word32 msgLen, int* stat, ed25519_key* key);
+WOLFSSL_API
+int wc_ed25519ctx_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                             word32 msgLen, int* stat, ed25519_key* key,
+                             const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed25519ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash,
+                             word32 hashLen, int* stat, ed25519_key* key,
+                             const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed25519ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                            word32 msgLen, int* stat, ed25519_key* key,
+                            const byte* context, byte contextLen);
 WOLFSSL_API
 int wc_ed25519_init(ed25519_key* key);
 WOLFSSL_API
@@ -115,6 +148,7 @@
                           byte* priv, word32 *privSz,
                           byte* pub, word32 *pubSz);
 
+WOLFSSL_API
 int wc_ed25519_check_key(ed25519_key* key);
 
 /* size helper */
@@ -133,5 +167,4 @@
 
 #endif /* HAVE_ED25519 */
 #endif /* WOLF_CRYPT_ED25519_H */
-
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/ed448.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,161 @@
+/* ed448.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/*!
+    \file wolfssl/wolfcrypt/ed448.h
+*/
+
+
+#ifndef WOLF_CRYPT_ED448_H
+#define WOLF_CRYPT_ED448_H
+
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef HAVE_ED448
+
+#include <wolfssl/wolfcrypt/fe_448.h>
+#include <wolfssl/wolfcrypt/ge_448.h>
+#include <wolfssl/wolfcrypt/random.h>
+#include <wolfssl/wolfcrypt/sha3.h>
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include <wolfssl/wolfcrypt/async.h>
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+
+/* info about EdDSA curve specifically ed448, defined as an elliptic curve
+ * over GF(p)
+ *
+ *  56                 key size
+ *  "ED448"            curve name
+ *  "2^448-2^224-1"    prime number
+ *  "-39081"           value of d
+ *  "SHAKE256"         hash function
+ */
+
+#define ED448_KEY_SIZE     57   /* private key only */
+#define ED448_SIG_SIZE     114  /* two elements */
+
+#define ED448_PUB_KEY_SIZE 57   /* compressed */
+/* both private and public key */
+#define ED448_PRV_KEY_SIZE (ED448_PUB_KEY_SIZE+ED448_KEY_SIZE)
+
+
+enum {
+    Ed448    = 0,
+    Ed448ph  = 1,
+};
+
+#ifndef WC_ED448KEY_TYPE_DEFINED
+    typedef struct ed448_key ed448_key;
+    #define WC_ED448KEY_TYPE_DEFINED
+#endif
+
+/* An ED448 Key */
+struct ed448_key {
+    byte    p[ED448_PUB_KEY_SIZE]; /* compressed public key */
+    byte    k[ED448_PRV_KEY_SIZE]; /* private key : 56 secret -- 56 public */
+#ifdef FREESCALE_LTC_ECC
+    /* uncompressed point coordinates */
+    byte pointX[ED448_KEY_SIZE]; /* recovered X coordinate */
+    byte pointY[ED448_KEY_SIZE]; /* Y coordinate is the public key with The most significant bit of the final octet always zero. */
+#endif
+    word16 pubKeySet:1;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif
+};
+
+
+WOLFSSL_API
+int wc_ed448_make_public(ed448_key* key, unsigned char* pubKey,
+                         word32 pubKeySz);
+WOLFSSL_API
+int wc_ed448_make_key(WC_RNG* rng, int keysize, ed448_key* key);
+WOLFSSL_API
+int wc_ed448_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen,
+                      ed448_key* key, const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed448ph_sign_hash(const byte* hash, word32 hashLen, byte* out,
+                         word32 *outLen, ed448_key* key,
+                         const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed448ph_sign_msg(const byte* in, word32 inLen, byte* out,
+                        word32 *outLen, ed448_key* key, const byte* context,
+                        byte contextLen);
+WOLFSSL_API
+int wc_ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                        word32 msgLen, int* stat, ed448_key* key,
+                        const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed448ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash,
+                           word32 hashLen, int* stat, ed448_key* key,
+                           const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed448ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+                          word32 msgLen, int* stat, ed448_key* key,
+                          const byte* context, byte contextLen);
+WOLFSSL_API
+int wc_ed448_init(ed448_key* key);
+WOLFSSL_API
+void wc_ed448_free(ed448_key* key);
+WOLFSSL_API
+int wc_ed448_import_public(const byte* in, word32 inLen, ed448_key* key);
+WOLFSSL_API
+int wc_ed448_import_private_only(const byte* priv, word32 privSz,
+                                 ed448_key* key);
+WOLFSSL_API
+int wc_ed448_import_private_key(const byte* priv, word32 privSz,
+                                const byte* pub, word32 pubSz, ed448_key* key);
+WOLFSSL_API
+int wc_ed448_export_public(ed448_key*, byte* out, word32* outLen);
+WOLFSSL_API
+int wc_ed448_export_private_only(ed448_key* key, byte* out, word32* outLen);
+WOLFSSL_API
+int wc_ed448_export_private(ed448_key* key, byte* out, word32* outLen);
+WOLFSSL_API
+int wc_ed448_export_key(ed448_key* key, byte* priv, word32 *privSz,
+                        byte* pub, word32 *pubSz);
+
+WOLFSSL_API
+int wc_ed448_check_key(ed448_key* key);
+
+/* size helper */
+WOLFSSL_API
+int wc_ed448_size(ed448_key* key);
+WOLFSSL_API
+int wc_ed448_priv_size(ed448_key* key);
+WOLFSSL_API
+int wc_ed448_pub_size(ed448_key* key);
+WOLFSSL_API
+int wc_ed448_sig_size(ed448_key* key);
+
+#ifdef __cplusplus
+    }    /* extern "C" */
+#endif
+
+#endif /* HAVE_ED448 */
+#endif /* WOLF_CRYPT_ED448_H */
+
--- a/wolfssl/wolfcrypt/error-crypt.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/error-crypt.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* error-crypt.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -28,9 +28,10 @@
 
 #include <wolfssl/wolfcrypt/types.h>
 
-#ifdef HAVE_FIPS
+#if defined(HAVE_FIPS) && \
+    (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
 	#include <cyassl/ctaocrypt/error-crypt.h>
-#endif /* HAVE_FIPS */
+#endif /* HAVE_FIPS V1 */
 
 #ifdef __cplusplus
     extern "C" {
@@ -198,7 +199,7 @@
     WC_HW_E             = -248,  /* Error with hardware crypto use */
     WC_HW_WAIT_E        = -249,  /* Hardware waiting on resource */
 
-    PSS_SALTLEN_E       = -250,  /* PSS length of salt is to long for hash */
+    PSS_SALTLEN_E       = -250,  /* PSS length of salt is too long for hash */
     PRIME_GEN_E         = -251,  /* Failure finding a prime. */
     BER_INDEF_E         = -252,  /* Cannot decode indefinite length BER. */
     RSA_OUT_OF_RANGE_E  = -253,  /* Ciphertext to decrypt out of range. */
@@ -213,7 +214,23 @@
     RSA_KEY_PAIR_E      = -262,  /* RSA Key Pair-Wise Consistency check fail. */
     DH_CHECK_PRIV_E     = -263,  /* DH Check Priv Key error */
 
-    WC_LAST_E           = -263,  /* Update this to indicate last error */
+    WC_AFALG_SOCK_E     = -264,  /* AF_ALG socket error */
+    WC_DEVCRYPTO_E      = -265,  /* /dev/crypto error */
+
+    ZLIB_INIT_ERROR     = -266,   /* zlib init error  */
+    ZLIB_COMPRESS_ERROR = -267,   /* zlib compression error  */
+    ZLIB_DECOMPRESS_ERROR = -268,  /* zlib decompression error  */
+
+    PKCS7_NO_SIGNER_E   = -269,  /* No signer in PKCS#7 signed data msg */
+    WC_PKCS7_WANT_READ_E= -270,  /* PKCS7 operations wants more input */
+
+    CRYPTOCB_UNAVAILABLE= -271,  /* Crypto callback unavailable */
+    PKCS7_SIGNEEDS_CHECK= -272,  /* signature needs verified by caller */
+    PSS_SALTLEN_RECOVER_E=-273,  /* PSS slat length not recoverable */
+
+    ASN_SELF_SIGNED_E   = -274, /* ASN self-signed certificate error */
+
+    WC_LAST_E           = -274,  /* Update this to indicate last error */
     MIN_CODE_E          = -300   /* errors -101 - -299 */
 
     /* add new companion error id strings for any new error codes
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/fe_448.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,117 @@
+/* fe448_448.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifndef WOLF_CRYPT_FE_448_H
+#define WOLF_CRYPT_FE_448_H
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if defined(HAVE_CURVE448) || defined(HAVE_ED448)
+
+#include <stdint.h>
+
+#include <wolfssl/wolfcrypt/types.h>
+
+#if defined(HAVE___UINT128_T) && !defined(NO_CURVED448_128BIT)
+    #define CURVED448_128BIT
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/* default to be faster but take more memory */
+#if !defined(CURVE448_SMALL) || !defined(ED448_SMALL)
+
+#if defined(CURVED448_128BIT)
+    typedef int64_t fe448;
+    #ifdef __SIZEOF_INT128__
+        typedef __uint128_t uint128_t;
+        typedef __int128_t int128_t;
+    #else
+        typedef unsigned long uint128_t __attribute__ ((mode(TI)));
+        typedef long int128_t __attribute__ ((mode(TI)));
+    #endif
+#else
+    typedef int32_t fe448;
+#endif
+
+WOLFSSL_LOCAL void fe448_init(void);
+WOLFSSL_LOCAL int  curve448(byte* r, const byte* n, const byte* a);
+
+#if !defined(CURVED448_128BIT)
+WOLFSSL_LOCAL void fe448_reduce(fe448*);
+#else
+#define fe448_reduce(a)
+#endif
+WOLFSSL_LOCAL void fe448_neg(fe448*,const fe448*);
+WOLFSSL_LOCAL void fe448_add(fe448*, const fe448*, const fe448*);
+WOLFSSL_LOCAL void fe448_sub(fe448*, const fe448*, const fe448*);
+WOLFSSL_LOCAL void fe448_mul(fe448*,const fe448*,const fe448*);
+WOLFSSL_LOCAL void fe448_sqr(fe448*, const fe448*);
+WOLFSSL_LOCAL void fe448_mul39081(fe448*, const fe448*);
+WOLFSSL_LOCAL void fe448_invert(fe448*, const fe448*);
+
+WOLFSSL_LOCAL void fe448_0(fe448*);
+WOLFSSL_LOCAL void fe448_1(fe448*);
+WOLFSSL_LOCAL void fe448_copy(fe448*, const fe448*);
+WOLFSSL_LOCAL int  fe448_isnonzero(const fe448*);
+WOLFSSL_LOCAL int  fe448_isnegative(const fe448*);
+
+WOLFSSL_LOCAL void fe448_from_bytes(fe448*,const unsigned char *);
+WOLFSSL_LOCAL void fe448_to_bytes(unsigned char *, const fe448*);
+
+WOLFSSL_LOCAL void fe448_cmov(fe448*,const fe448*, int);
+WOLFSSL_LOCAL void fe448_pow_2_446_222_1(fe448*,const fe448*);
+
+#else
+
+WOLFSSL_LOCAL void fe448_init(void);
+WOLFSSL_LOCAL int  curve448(byte* r, const byte* n, const byte* a);
+
+#define fe448_reduce(a)
+WOLFSSL_LOCAL void fe448_neg(uint8_t*,const uint8_t*);
+WOLFSSL_LOCAL void fe448_add(uint8_t*, const uint8_t*, const uint8_t*);
+WOLFSSL_LOCAL void fe448_sub(uint8_t*, const uint8_t*, const uint8_t*);
+WOLFSSL_LOCAL void fe448_mul(uint8_t*,const uint8_t*,const uint8_t*);
+WOLFSSL_LOCAL void fe448_sqr(uint8_t*, const uint8_t*);
+WOLFSSL_LOCAL void fe448_mul39081(uint8_t*, const uint8_t*);
+WOLFSSL_LOCAL void fe448_invert(uint8_t*, const uint8_t*);
+
+WOLFSSL_LOCAL void fe448_copy(uint8_t*, const uint8_t*);
+WOLFSSL_LOCAL int  fe448_isnonzero(const uint8_t*);
+
+WOLFSSL_LOCAL void fe448_norm(byte *a);
+
+WOLFSSL_LOCAL void fe448_cmov(uint8_t*,const uint8_t*, int);
+WOLFSSL_LOCAL void fe448_pow_2_446_222_1(uint8_t*,const uint8_t*);
+
+#endif /* !CURVE448_SMALL || !ED448_SMALL */
+
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+
+#endif /* HAVE_CURVE448 || HAVE_ED448 */
+
+#endif /* WOLF_CRYPT_FE_448_H */
+
--- a/wolfssl/wolfcrypt/fe_operations.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/fe_operations.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* fe_operations.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -39,6 +39,19 @@
     #define CURVED25519_128BIT
 #endif
 
+#if defined(CURVED25519_X64)
+    #define CURVED25519_ASM_64BIT
+    #define CURVED25519_ASM
+#endif
+#if defined(WOLFSSL_ARMASM)
+    #ifdef __aarch64__
+        #define CURVED25519_ASM_64BIT
+    #else
+        #define CURVED25519_ASM_32BIT
+    #endif
+    #define CURVED25519_ASM
+#endif
+
 /*
 fe means field element.
 Here the field is \Z/(2^255-19).
@@ -47,6 +60,10 @@
 Bounds on each t[i] vary depending on context.
 */
 
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
 #if defined(CURVE25519_SMALL) || defined(ED25519_SMALL)
     #define F25519_SIZE 32
 
@@ -68,8 +85,10 @@
 /* default to be faster but take more memory */
 #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM_64BIT
     typedef int64_t  fe[4];
+#elif defined(CURVED25519_ASM_32BIT)
+    typedef int32_t  fe[8];
 #elif defined(CURVED25519_128BIT)
     typedef int64_t  fe[5];
 #else
@@ -104,7 +123,7 @@
 WOLFSSL_LOCAL uint64_t load_3(const unsigned char *in);
 WOLFSSL_LOCAL uint64_t load_4(const unsigned char *in);
 
-#ifdef CURVED25519_X64
+#ifdef CURVED25519_ASM
 WOLFSSL_LOCAL void fe_ge_to_p2(fe rx, fe ry, fe rz, const fe px, const fe py,
                                const fe pz, const fe pt);
 WOLFSSL_LOCAL void fe_ge_to_p3(fe rx, fe ry, fe rz, fe rt, const fe px,
@@ -128,7 +147,7 @@
                              const fe qt2d, const fe qyplusx,
                              const fe qyminusx);
 WOLFSSL_LOCAL void fe_cmov_table(fe* r, fe* base, signed char b);
-#endif /* CURVED25519_X64 */
+#endif /* CURVED25519_ASM */
 #endif /* !CURVE25519_SMALL || !ED25519_SMALL */
 
 /* Use less memory and only 32bit types or less, but is slower
@@ -181,6 +200,11 @@
 WOLFSSL_LOCAL void fprime_copy(byte *x, const byte *a);
 
 #endif /* CURVE25519_SMALL || ED25519_SMALL */
+
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+
 #endif /* HAVE_CURVE25519 || HAVE_ED25519 */
 
 #endif /* WOLF_CRYPT_FE_OPERATIONS_H */
--- a/wolfssl/wolfcrypt/fips_test.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/fips_test.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* fips_test.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/ge_448.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,87 @@
+/* ge_448.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifndef WOLF_CRYPT_GE_448_H
+#define WOLF_CRYPT_GE_448_H
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_ED448
+
+#include <wolfssl/wolfcrypt/fe_448.h>
+
+/*
+ge448 means group element.
+
+Here the group is the set of pairs (x,y) of field elements (see fe.h)
+satisfying -x^2 + y^2 = 1 + d x^2y^2
+where d = -39081.
+
+Representations:
+  ge448_p2 (projective) : (X:Y:Z) satisfying x=X/Z, y=Y/Z
+  ge448_precomp (affine): (x,y)
+*/
+
+#ifdef ED448_SMALL
+    typedef byte     ge448;
+    #define GE448_WORDS    56
+#elif defined(CURVED448_128BIT)
+    typedef int64_t  ge448;
+    #define GE448_WORDS    8
+#else
+    typedef int32_t  ge448;
+    #define GE448_WORDS    16
+#endif
+
+typedef struct {
+  ge448 X[GE448_WORDS];
+  ge448 Y[GE448_WORDS];
+  ge448 Z[GE448_WORDS];
+} ge448_p2;
+
+
+WOLFSSL_LOCAL int  ge448_compress_key(byte*, const byte*, const byte*);
+WOLFSSL_LOCAL int  ge448_from_bytes_negate_vartime(ge448_p2 *,
+                                                   const unsigned char *);
+
+WOLFSSL_LOCAL int  ge448_double_scalarmult_vartime(ge448_p2 *,
+                                                   const unsigned char *,
+                                                   const ge448_p2 *,
+                                                   const unsigned char *);
+WOLFSSL_LOCAL void ge448_scalarmult_base(ge448_p2 *, const unsigned char *);
+WOLFSSL_LOCAL void sc448_reduce(byte*);
+WOLFSSL_LOCAL void sc448_muladd(byte*, const byte*, const byte*, const byte*);
+WOLFSSL_LOCAL void ge448_to_bytes(unsigned char *, const ge448_p2 *);
+
+
+#ifndef ED448_SMALL
+typedef struct {
+  ge448 x[GE448_WORDS];
+  ge448 y[GE448_WORDS];
+} ge448_precomp;
+
+#endif /* !ED448_SMALL */
+
+#endif /* HAVE_ED448 */
+
+#endif /* WOLF_CRYPT_GE_448_H */
+
--- a/wolfssl/wolfcrypt/ge_operations.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/ge_operations.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ge_operations.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -47,8 +47,10 @@
 
 #ifdef ED25519_SMALL
   typedef byte     ge[F25519_SIZE];
-#elif defined(CURVED25519_X64)
+#elif defined(CURVED25519_ASM_64BIT)
   typedef int64_t  ge[4];
+#elif defined(CURVED25519_ASM_32BIT)
+  typedef int32_t  ge[8];
 #elif defined(CURVED25519_128BIT)
   typedef int64_t  ge[5];
 #else
--- a/wolfssl/wolfcrypt/hash.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/hash.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hash.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -52,6 +52,9 @@
 #ifdef WOLFSSL_MD2
     #include <wolfssl/wolfcrypt/md2.h>
 #endif
+#if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+    #include <wolfssl/wolfcrypt/blake2.h>
+#endif
 
 
 #ifdef __cplusplus
@@ -63,6 +66,29 @@
 #endif
 
 
+/* Supported Message Authentication Codes from page 43 */
+enum wc_MACAlgorithm {
+    no_mac,
+    md5_mac,
+    sha_mac,
+    sha224_mac,
+    sha256_mac,     /* needs to match external KDF_MacAlgorithm */
+    sha384_mac,
+    sha512_mac,
+    rmd_mac,
+    blake2b_mac
+};
+
+enum wc_HashFlags {
+    WC_HASH_FLAG_NONE =     0x00000000,
+    WC_HASH_FLAG_WILLCOPY = 0x00000001, /* flag to indicate hash will be copied */
+    WC_HASH_FLAG_ISCOPY =   0x00000002, /* hash is copy */
+#ifdef WOLFSSL_SHA3
+    WC_HASH_SHA3_KECCAK256 =0x00010000, /* Older KECCAK256 */
+#endif
+};
+
+
 typedef union {
     #ifndef NO_MD5
         wc_Md5 md5;
@@ -82,6 +108,9 @@
     #ifdef WOLFSSL_SHA512
         wc_Sha512 sha512;
     #endif
+    #ifdef WOLFSSL_SHA3
+        wc_Sha3 sha3;
+    #endif
 } wc_HashAlg;
 
 /* Find largest possible digest size
@@ -129,12 +158,21 @@
     byte* hash, word32 hash_len);
 
 /* generic hash operation wrappers */
+WOLFSSL_API int wc_HashInit_ex(wc_HashAlg* hash, enum wc_HashType type,
+    void* heap, int devId);
 WOLFSSL_API int wc_HashInit(wc_HashAlg* hash, enum wc_HashType type);
 WOLFSSL_API int wc_HashUpdate(wc_HashAlg* hash, enum wc_HashType type,
     const byte* data, word32 dataSz);
 WOLFSSL_API int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type,
     byte* out);
+WOLFSSL_API int wc_HashFree(wc_HashAlg* hash, enum wc_HashType type);
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_HashSetFlags(wc_HashAlg* hash, enum wc_HashType type,
+        word32 flags);
+    WOLFSSL_API int wc_HashGetFlags(wc_HashAlg* hash, enum wc_HashType type,
+        word32* flags);
+#endif
 
 #ifndef NO_MD5
 #include <wolfssl/wolfcrypt/md5.h>
@@ -166,6 +204,42 @@
 WOLFSSL_API int wc_Sha512Hash(const byte*, word32, byte*);
 #endif /* WOLFSSL_SHA512 */
 
+#ifdef WOLFSSL_SHA3
+#include <wolfssl/wolfcrypt/sha3.h>
+WOLFSSL_API int wc_Sha3_224Hash(const byte*, word32, byte*);
+WOLFSSL_API int wc_Sha3_256Hash(const byte*, word32, byte*);
+WOLFSSL_API int wc_Sha3_384Hash(const byte*, word32, byte*);
+WOLFSSL_API int wc_Sha3_512Hash(const byte*, word32, byte*);
+#ifdef WOLFSSL_SHAKE256
+WOLFSSL_API int wc_Shake256Hash(const byte*, word32, byte*, word32);
+#endif
+#endif /* WOLFSSL_SHA3 */
+
+enum max_prf {
+#ifdef HAVE_FFDHE_8192
+    MAX_PRF_HALF        = 516, /* Maximum half secret len */
+#elif defined(HAVE_FFDHE_6144)
+    MAX_PRF_HALF        = 388, /* Maximum half secret len */
+#else
+    MAX_PRF_HALF        = 260, /* Maximum half secret len */
+#endif
+    MAX_PRF_LABSEED     = 128, /* Maximum label + seed len */
+    MAX_PRF_DIG         = 224  /* Maximum digest len      */
+};
+
+#ifdef WOLFSSL_HAVE_PRF
+WOLFSSL_API int wc_PRF(byte* result, word32 resLen, const byte* secret,
+                    word32 secLen, const byte* seed, word32 seedLen, int hash,
+                    void* heap, int devId);
+WOLFSSL_API int wc_PRF_TLSv1(byte* digest, word32 digLen, const byte* secret,
+                    word32 secLen, const byte* label, word32 labLen,
+                    const byte* seed, word32 seedLen, void* heap, int devId);
+WOLFSSL_API int wc_PRF_TLS(byte* digest, word32 digLen, const byte* secret,
+                    word32 secLen, const byte* label, word32 labLen,
+                    const byte* seed, word32 seedLen, int useAtLeastSha256,
+                    int hash_type, void* heap, int devId);
+#endif /* WOLFSSL_HAVE_PRF */
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/hc128.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/hc128.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hc128.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/hmac.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/hmac.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* hmac.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -59,6 +59,9 @@
     #define HMAC_BLOCK_SIZE WC_HMAC_BLOCK_SIZE
 #endif
 
+#define WC_HMAC_INNER_HASH_KEYED_SW     1
+#define WC_HMAC_INNER_HASH_KEYED_DEV    2
+
 enum {
     HMAC_FIPS_MIN_KEY = 14,   /* 112 bit key length minimum */
 
@@ -81,9 +84,6 @@
 #ifndef WOLFSSL_SHA384
     WC_SHA384  = WC_HASH_TYPE_SHA384,
 #endif
-#ifndef HAVE_BLAKE2
-    BLAKE2B_ID = WC_HASH_TYPE_BLAKE2B,
-#endif
 #ifndef WOLFSSL_SHA224
     WC_SHA224  = WC_HASH_TYPE_SHA224,
 #endif
@@ -93,14 +93,17 @@
     WC_SHA3_384 = WC_HASH_TYPE_SHA3_384,
     WC_SHA3_512 = WC_HASH_TYPE_SHA3_512,
 #endif
+#ifdef HAVE_PKCS11
+    HMAC_MAX_ID_LEN = 32,
+#endif
 };
 
 /* Select the largest available hash for the buffer size. */
 #define WC_HMAC_BLOCK_SIZE WC_MAX_BLOCK_SIZE
 
-#if !defined(WOLFSSL_SHA3) && !defined(WOLFSSL_SHA512) && !defined(HAVE_BLAKE2) && \
-    !defined(WOLFSSL_SHA384) && defined(NO_SHA256) && defined(WOLFSSL_SHA224) && \
-     defined(NO_SHA) && defined(NO_MD5)
+#if !defined(WOLFSSL_SHA3) && !defined(WOLFSSL_SHA512) && \
+    !defined(WOLFSSL_SHA384) && defined(NO_SHA256) && \
+    defined(WOLFSSL_SHA224) && defined(NO_SHA) && defined(NO_MD5)
     #error "You have to have some kind of hash if you want to use HMAC."
 #endif
 
@@ -125,16 +128,13 @@
 #ifdef WOLFSSL_SHA512
     wc_Sha512 sha512;
 #endif
-#ifdef HAVE_BLAKE2
-    Blake2b blake2b;
-#endif
 #ifdef WOLFSSL_SHA3
     wc_Sha3 sha3;
 #endif
 } Hash;
 
 /* Hmac digest */
-typedef struct Hmac {
+struct Hmac {
     Hash    hash;
     word32  ipad[WC_HMAC_BLOCK_SIZE  / sizeof(word32)];  /* same block size all*/
     word32  opad[WC_HMAC_BLOCK_SIZE  / sizeof(word32)];
@@ -142,12 +142,28 @@
     void*   heap;                 /* heap hint */
     byte    macType;              /* md5 sha or sha256 */
     byte    innerHashKeyed;       /* keyed flag */
-
 #ifdef WOLFSSL_ASYNC_CRYPT
     WC_ASYNC_DEV asyncDev;
-    word16       keyLen;          /* hmac key length (key in ipad) */
 #endif /* WOLFSSL_ASYNC_CRYPT */
-} Hmac;
+#ifdef WOLF_CRYPTO_CB
+    int     devId;
+    void*   devCtx;
+    const byte* keyRaw;
+#endif
+#ifdef HAVE_PKCS11
+    byte    id[HMAC_MAX_ID_LEN];
+    int     idLen;
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+    word16  keyLen;          /* hmac key length (key in ipad) */
+#endif
+};
+
+#ifndef WC_HMAC_TYPE_DEFINED
+    typedef struct Hmac Hmac;
+    #define WC_HMAC_TYPE_DEFINED
+#endif
+
 
 #endif /* HAVE_FIPS */
 
@@ -158,6 +174,8 @@
 WOLFSSL_API int wc_HmacSizeByType(int type);
 
 WOLFSSL_API int wc_HmacInit(Hmac* hmac, void* heap, int devId);
+WOLFSSL_API int wc_HmacInit_Id(Hmac* hmac, byte* id, int len, void* heap,
+                               int devId);
 WOLFSSL_API void wc_HmacFree(Hmac*);
 
 WOLFSSL_API int wolfSSL_GetHmacMaxSize(void);
--- a/wolfssl/wolfcrypt/idea.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/idea.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* idea.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/integer.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/integer.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* integer.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -47,23 +47,6 @@
 
 #include <wolfssl/wolfcrypt/mpi_class.h>
 
-/* wolf big int and common functions */
-#include <wolfssl/wolfcrypt/wolfmath.h>
-
-
-#ifdef WOLFSSL_PUBLIC_MP
-    #define MP_API   WOLFSSL_API
-#else
-    #define MP_API
-#endif
-
-#ifndef MIN
-   #define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-#ifndef MAX
-   #define MAX(x,y) ((x)>(y)?(x):(y))
-#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -85,7 +68,7 @@
 
 
 /* detect 64-bit mode if possible */
-#if defined(__x86_64__)
+#if defined(__x86_64__) && !(defined (_MSC_VER) && defined(__clang__))
    #if !(defined(MP_64BIT) && defined(MP_16BIT) && defined(MP_8BIT))
       #define MP_64BIT
    #endif
@@ -108,20 +91,28 @@
  * [any size beyond that is ok provided it doesn't overflow the data type]
  */
 #ifdef MP_8BIT
+   /* 8-bit */
    typedef unsigned char      mp_digit;
    typedef unsigned short     mp_word;
-#elif defined(MP_16BIT) || defined(NO_64BIT)
+   /* don't define DIGIT_BIT, so its calculated below */
+#elif defined(MP_16BIT)
+   /* 16-bit */
+   typedef unsigned int       mp_digit;
+   typedef unsigned long      mp_word;
+   /* don't define DIGIT_BIT, so its calculated below */
+#elif defined(NO_64BIT)
+   /* 32-bit forced to 16-bit */
    typedef unsigned short     mp_digit;
    typedef unsigned int       mp_word;
    #define DIGIT_BIT          12
 #elif defined(MP_64BIT)
+   /* 64-bit */
    /* for GCC only on supported platforms */
    typedef unsigned long long mp_digit;  /* 64 bit type, 128 uses mode(TI) */
    typedef unsigned long      mp_word __attribute__ ((mode(TI)));
-
    #define DIGIT_BIT          60
 #else
-   /* this is the default case, 28-bit digits */
+   /* 32-bit default case */
 
    #if defined(_MSC_VER) || defined(__BORLANDC__)
       typedef unsigned __int64   ulong64;
@@ -132,14 +123,14 @@
    typedef unsigned int       mp_digit;  /* long could be 64 now, changed TAO */
    typedef ulong64            mp_word;
 
-#ifdef MP_31BIT
-   /* this is an extension that uses 31-bit digits */
-   #define DIGIT_BIT          31
-#else
-   /* default case is 28-bit digits, defines MP_28BIT as a handy test macro */
-   #define DIGIT_BIT          28
-   #define MP_28BIT
-#endif
+   #ifdef MP_31BIT
+      /* this is an extension that uses 31-bit digits */
+      #define DIGIT_BIT          31
+   #else
+      /* default case is 28-bit digits, defines MP_28BIT as a handy test macro */
+      #define DIGIT_BIT          28
+      #define MP_28BIT
+   #endif
 #endif
 
 #endif /* WOLFSSL_BIGINT_TYPES */
@@ -193,10 +184,16 @@
 
 /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD -
    BITS_PER_DIGIT*2) */
-#define MP_WARRAY  (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
+#define MP_WARRAY  ((mp_word)1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
 
 #ifdef HAVE_WOLF_BIGINT
-    struct WC_BIGINT;
+    /* raw big integer */
+    typedef struct WC_BIGINT {
+        byte*   buf;
+        word32  len;
+        void*   heap;
+    } WC_BIGINT;
+    #define WOLF_BIGINT_DEFINED
 #endif
 
 /* the mp_int structure */
@@ -208,7 +205,10 @@
     struct WC_BIGINT raw; /* unsigned binary (big endian) */
 #endif
 } mp_int;
-#define MP_INT_DEFINED
+
+/* wolf big int and common functions */
+#include <wolfssl/wolfcrypt/wolfmath.h>
+
 
 /* callback for mp_prime_random, should fill dst with random bytes and return
    how many read [up to len] */
@@ -229,6 +229,9 @@
 #define mp_isodd(a) \
     (((a)->used > 0 && (((a)->dp[0] & 1u) == 1u)) ? MP_YES : MP_NO)
 #define mp_isneg(a)  (((a)->sign != MP_ZPOS) ? MP_YES : MP_NO)
+#define mp_isword(a, w) \
+    ((((a)->used == 1) && ((a)->dp[0] == w)) || ((w == 0) && ((a)->used == 0)) \
+                                                               ? MP_YES : MP_NO)
 
 /* number of primes */
 #ifdef MP_8BIT
@@ -282,7 +285,10 @@
 MP_API int  mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c);
 MP_API int  mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b);
 MP_API int  mp_to_unsigned_bin (mp_int * a, unsigned char *b);
+MP_API int  mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c);
 MP_API int  mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
+MP_API int  mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P,
+                           mp_int * Y);
 /* end functions needed by Rsa */
 
 /* functions added to support above needed, removed TOOM and KARATSUBA */
@@ -321,6 +327,8 @@
 MP_API int  mp_dr_is_modulus(mp_int *a);
 MP_API int  mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
                              int);
+MP_API int  mp_exptmod_base_2 (mp_int * X, mp_int * P, mp_int * Y);
+#define mp_exptmod_nct(G,X,P,Y)    mp_exptmod_fast(G,X,P,Y,0)
 MP_API int  mp_montgomery_setup (mp_int * n, mp_digit * rho);
 int  fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho);
 MP_API int  mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho);
@@ -368,15 +376,19 @@
     #define mp_dump(desc, a, verbose)
 #endif
 
-#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN)
+#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || !defined(NO_RSA) || \
+    !defined(NO_DSA) || !defined(NO_DH)
     MP_API int mp_sqrmod(mp_int* a, mp_int* b, mp_int* c);
 #endif
 #if !defined(NO_DSA) || defined(HAVE_ECC)
     MP_API int mp_read_radix(mp_int* a, const char* str, int radix);
 #endif
 
+#if defined(WOLFSSL_KEY_GEN) || !defined(NO_RSA) || !defined(NO_DSA) || !defined(NO_DH)
+    MP_API int mp_prime_is_prime (mp_int * a, int t, int *result);
+    MP_API int mp_prime_is_prime_ex (mp_int * a, int t, int *result, WC_RNG*);
+#endif /* WOLFSSL_KEY_GEN NO_RSA NO_DSA NO_DH */
 #ifdef WOLFSSL_KEY_GEN
-    MP_API int mp_prime_is_prime (mp_int * a, int t, int *result);
     MP_API int mp_gcd (mp_int * a, mp_int * b, mp_int * c);
     MP_API int mp_lcm (mp_int * a, mp_int * b, mp_int * c);
     MP_API int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap);
@@ -386,10 +398,6 @@
 MP_API int mp_mod_d(mp_int* a, mp_digit b, mp_digit* c);
 
 
-/* wolf big int and common functions */
-#include <wolfssl/wolfcrypt/wolfmath.h>
-
-
 #ifdef __cplusplus
    }
 #endif
--- a/wolfssl/wolfcrypt/logging.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/logging.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* logging.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -51,7 +51,9 @@
  *          Do not use WOLFSSL_FUNC_TIME in production code.
  */
 enum wc_FuncNum {
-    WC_FUNC_CLIENT_HELLO_SEND = 0,
+    WC_FUNC_HELLO_REQUEST_SEND = 0,
+    WC_FUNC_HELLO_REQUEST_DO,
+    WC_FUNC_CLIENT_HELLO_SEND,
     WC_FUNC_CLIENT_HELLO_DO,
     WC_FUNC_SERVER_HELLO_SEND,
     WC_FUNC_SERVER_HELLO_DO,
@@ -91,6 +93,7 @@
                                    const char *const logMessage);
 
 WOLFSSL_API int wolfSSL_SetLoggingCb(wolfSSL_Logging_cb log_function);
+WOLFSSL_API wolfSSL_Logging_cb wolfSSL_GetLoggingCb(void);
 
 /* turn logging on, only if compiled in */
 WOLFSSL_API int  wolfSSL_Debugging_ON(void);
@@ -112,7 +115,9 @@
     WOLFSSL_API   int wc_SetLoggingHeap(void* h);
     WOLFSSL_API   int wc_ERR_remove_state(void);
     #if !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM)
-        WOLFSSL_API   void wc_ERR_print_errors_fp(XFILE fp);
+        WOLFSSL_API void wc_ERR_print_errors_fp(XFILE fp);
+        WOLFSSL_API void wc_ERR_print_errors_cb(int (*cb)(const char *str,
+                                                size_t len, void *u), void *u);
     #endif
 #endif /* OPENSSL_EXTRA || DEBUG_WOLFSSL_VERBOSE */
 
@@ -146,6 +151,7 @@
     WOLFSSL_API void WOLFSSL_LEAVE(const char* msg, int ret);
     #define WOLFSSL_STUB(m) \
         WOLFSSL_MSG(WOLFSSL_LOG_CAT(wolfSSL Stub, m, not implemented))
+    WOLFSSL_API int WOLFSSL_IS_DEBUG_ON(void);
 
     WOLFSSL_API void WOLFSSL_MSG(const char* msg);
     WOLFSSL_API void WOLFSSL_BUFFER(const byte* buffer, word32 length);
@@ -155,15 +161,18 @@
     #define WOLFSSL_ENTER(m)
     #define WOLFSSL_LEAVE(m, r)
     #define WOLFSSL_STUB(m)
+    #define WOLFSSL_IS_DEBUG_ON() 0
 
     #define WOLFSSL_MSG(m)
     #define WOLFSSL_BUFFER(b, l)
 
 #endif /* DEBUG_WOLFSSL && !WOLFSSL_DEBUG_ERRORS_ONLY */
 
-#if defined(DEBUG_WOLFSSL) || defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+#if defined(DEBUG_WOLFSSL) || defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) ||\
+    defined(WOLFSSL_HAPROXY) || defined(OPENSSL_EXTRA)
 
-    #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+    #if (!defined(NO_ERROR_QUEUE) && defined(OPENSSL_EXTRA) && !defined(_WIN32))\
+        || defined(DEBUG_WOLFSSL_VERBOSE)
         WOLFSSL_API void WOLFSSL_ERROR_LINE(int err, const char* func, unsigned int line,
             const char* file, void* ctx);
         #define WOLFSSL_ERROR(x) \
--- a/wolfssl/wolfcrypt/md2.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/md2.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md2.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/md4.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/md4.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md4.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/md5.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/md5.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* md5.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -97,6 +97,9 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     WC_ASYNC_DEV asyncDev;
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    word32 flags; /* enum wc_HashFlags in hash.h */
+#endif
 } wc_Md5;
 
 #endif /* WOLFSSL_TI_HASH */
@@ -114,6 +117,11 @@
 WOLFSSL_API void wc_Md5SizeSet(wc_Md5* md5, word32 len);
 #endif
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_Md5SetFlags(wc_Md5* md5, word32 flags);
+    WOLFSSL_API int wc_Md5GetFlags(wc_Md5* md5, word32* flags);
+#endif
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/mem_track.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/mem_track.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* mem_track.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -62,34 +62,66 @@
 
     #include "wolfssl/wolfcrypt/logging.h"
 
+    #if defined(WOLFSSL_TRACK_MEMORY)
+        #define DO_MEM_STATS
+        #if defined(__linux__) || defined(__MACH__)
+            #define DO_MEM_LIST
+        #endif
+    #endif
+
+
     typedef struct memoryStats {
-        size_t totalAllocs;     /* number of allocations */
-        size_t totalDeallocs;   /* number of deallocations */
-        size_t totalBytes;      /* total number of bytes allocated */
-        size_t peakBytes;       /* concurrent max bytes */
-        size_t currentBytes;    /* total current bytes in use */
+        long totalAllocs;     /* number of allocations */
+        long totalDeallocs;   /* number of deallocations */
+        long totalBytes;      /* total number of bytes allocated */
+        long peakBytes;       /* concurrent max bytes */
+        long currentBytes;    /* total current bytes in use */
     } memoryStats;
 
     typedef struct memHint {
         size_t thisSize;      /* size of this memory */
+
+    #ifdef DO_MEM_LIST
+        struct memHint* next;
+        struct memHint* prev;
+        #ifdef WOLFSSL_DEBUG_MEMORY
+            const char* func;
+            unsigned int line;
+        #endif
+    #endif
         void*  thisMemory;    /* actual memory for user */
     } memHint;
 
     typedef struct memoryTrack {
         union {
             memHint hint;
-            byte    alignit[16];   /* make sure we have strong alignment */
+            byte    alignit[sizeof(memHint) + ((16-1) & ~(16-1))]; /* make sure we have strong alignment */
         } u;
     } memoryTrack;
 
-    #if defined(WOLFSSL_TRACK_MEMORY)
-        #define DO_MEM_STATS
-        static memoryStats ourMemStats;
+#ifdef DO_MEM_LIST
+    /* track allocations and report at end */
+    typedef struct memoryList {
+        memHint* head;
+        memHint* tail;
+        word32   count;
+    } memoryList;
+#endif
+
+#if defined(WOLFSSL_TRACK_MEMORY)
+    static memoryStats ourMemStats;
+
+    #ifdef DO_MEM_LIST
+        #include <pthread.h>
+        static memoryList ourMemList;
+        static pthread_mutex_t memLock = PTHREAD_MUTEX_INITIALIZER;
     #endif
+#endif
+
 
     /* if defined to not using inline then declare function prototypes */
     #ifdef NO_INLINE
-        #define STATIC
+        #define WC_STATIC
 		#ifdef WOLFSSL_DEBUG_MEMORY
 			WOLFSSL_LOCAL void* TrackMalloc(size_t sz, const char* func, unsigned int line);
 			WOLFSSL_LOCAL void TrackFree(void* ptr, const char* func, unsigned int line);
@@ -102,16 +134,17 @@
         WOLFSSL_LOCAL int InitMemoryTracker(void);
         WOLFSSL_LOCAL void ShowMemoryTracker(void);
     #else
-        #define STATIC static
+        #define WC_STATIC static
     #endif
 
 #ifdef WOLFSSL_DEBUG_MEMORY
-    STATIC WC_INLINE void* TrackMalloc(size_t sz, const char* func, unsigned int line)
+    WC_STATIC WC_INLINE void* TrackMalloc(size_t sz, const char* func, unsigned int line)
 #else
-    STATIC WC_INLINE void* TrackMalloc(size_t sz)
+    WC_STATIC WC_INLINE void* TrackMalloc(size_t sz)
 #endif
     {
         memoryTrack* mt;
+        memHint* header;
 
         if (sz == 0)
             return NULL;
@@ -120,57 +153,127 @@
         if (mt == NULL)
             return NULL;
 
-        mt->u.hint.thisSize   = sz;
-        mt->u.hint.thisMemory = (byte*)mt + sizeof(memoryTrack);
+        header = &mt->u.hint;
+        header->thisSize   = sz;
+        header->thisMemory = (byte*)mt + sizeof(memoryTrack);
 
-#ifdef WOLFSSL_DEBUG_MEMORY
-        printf("Alloc: %p -> %u at %s:%d\n", mt->u.hint.thisMemory, (word32)sz, func, line);
-#endif
+    #ifdef WOLFSSL_DEBUG_MEMORY
+    #ifdef WOLFSSL_DEBUG_MEMORY_PRINT
+        printf("Alloc: %p -> %u at %s:%d\n", header->thisMemory, (word32)sz, func, line);
+    #else
+        (void)func;
+        (void)line;
+    #endif
+    #endif
 
-#ifdef DO_MEM_STATS
+    #ifdef DO_MEM_STATS
         ourMemStats.totalAllocs++;
         ourMemStats.totalBytes   += sz;
         ourMemStats.currentBytes += sz;
         if (ourMemStats.currentBytes > ourMemStats.peakBytes)
             ourMemStats.peakBytes = ourMemStats.currentBytes;
-#endif
+    #endif
+    #ifdef DO_MEM_LIST
+        if (pthread_mutex_lock(&memLock) == 0) {
+        #ifdef WOLFSSL_DEBUG_MEMORY
+            header->func = func;
+            header->line = line;
+        #endif
 
-        return mt->u.hint.thisMemory;
+            /* Setup event */
+            header->next = NULL;
+            if (ourMemList.tail == NULL)  {
+                ourMemList.head = header;
+                header->prev = NULL;
+            }
+            else {
+                ourMemList.tail->next = header;
+                header->prev = ourMemList.tail;
+            }
+            ourMemList.tail = header;      /* add to the end either way */
+            ourMemList.count++;
+
+            pthread_mutex_unlock(&memLock);
+        }
+    #endif
+
+        return header->thisMemory;
     }
 
 
 #ifdef WOLFSSL_DEBUG_MEMORY
-    STATIC WC_INLINE void TrackFree(void* ptr, const char* func, unsigned int line)
+    WC_STATIC WC_INLINE void TrackFree(void* ptr, const char* func, unsigned int line)
 #else
-    STATIC WC_INLINE void TrackFree(void* ptr)
+    WC_STATIC WC_INLINE void TrackFree(void* ptr)
 #endif
     {
         memoryTrack* mt;
+        memHint* header;
+        size_t sz;
 
         if (ptr == NULL) {
             return;
         }
 
-        mt = (memoryTrack*)ptr;
-        --mt;   /* same as minus sizeof(memoryTrack), removes header */
+        mt = (memoryTrack*)((byte*)ptr - sizeof(memoryTrack));
+        header = &mt->u.hint;
+        sz = header->thisSize;
+
+    #ifdef DO_MEM_LIST
+        if (pthread_mutex_lock(&memLock) == 0) 
+        {
+    #endif
+
+    #ifdef DO_MEM_STATS
+            ourMemStats.currentBytes -= header->thisSize;
+            ourMemStats.totalDeallocs++;
+    #endif
 
-#ifdef DO_MEM_STATS
-        ourMemStats.currentBytes -= mt->u.hint.thisSize;
-        ourMemStats.totalDeallocs++;
-#endif
+    #ifdef DO_MEM_LIST
+            if (header == ourMemList.head && header == ourMemList.tail) {
+                ourMemList.head = NULL;
+                ourMemList.tail = NULL;
+            }
+            else if (header == ourMemList.head) {
+                ourMemList.head = header->next;
+                ourMemList.head->prev = NULL;
+            }
+            else if (header == ourMemList.tail) {
+                ourMemList.tail = header->prev;
+                ourMemList.tail->next = NULL;
+            }
+            else {
+                memHint* next = header->next;
+                memHint* prev = header->prev;
+                if (next)
+                    next->prev = prev;
+                if (prev)
+                    prev->next = next;
+            }
+            ourMemList.count--;
+
+            pthread_mutex_unlock(&memLock);
+        }
+    #endif
 
 #ifdef WOLFSSL_DEBUG_MEMORY
-        printf("Free: %p -> %u at %s:%d\n", ptr, (word32)mt->u.hint.thisSize, func, line);
+#ifdef WOLFSSL_DEBUG_MEMORY_PRINT
+        printf("Free: %p -> %u at %s:%d\n", ptr, (word32)sz, func, line);
+#else
+        (void)func;
+        (void)line;
 #endif
+#endif
+        (void)sz;
 
         free(mt);
     }
 
 
 #ifdef WOLFSSL_DEBUG_MEMORY
-    STATIC WC_INLINE void* TrackRealloc(void* ptr, size_t sz, const char* func, unsigned int line)
+    WC_STATIC WC_INLINE void* TrackRealloc(void* ptr, size_t sz, const char* func, unsigned int line)
 #else
-    STATIC WC_INLINE void* TrackRealloc(void* ptr, size_t sz)
+    WC_STATIC WC_INLINE void* TrackRealloc(void* ptr, size_t sz)
 #endif
     {
     #ifdef WOLFSSL_DEBUG_MEMORY
@@ -181,11 +284,14 @@
 
         if (ptr) {
             /* if realloc is bigger, don't overread old ptr */
-            memoryTrack* mt = (memoryTrack*)ptr;
-            --mt;  /* same as minus sizeof(memoryTrack), removes header */
+            memoryTrack* mt;
+            memHint* header;
 
-            if (mt->u.hint.thisSize < sz)
-                sz = mt->u.hint.thisSize;
+            mt = (memoryTrack*)((byte*)ptr - sizeof(memoryTrack));
+            header = &mt->u.hint;
+
+            if (header->thisSize < sz)
+                sz = header->thisSize;
         }
 
         if (ret && ptr)
@@ -203,14 +309,29 @@
     }
 
 #ifdef WOLFSSL_TRACK_MEMORY
-    STATIC WC_INLINE int InitMemoryTracker(void)
+    static wolfSSL_Malloc_cb mfDefault = NULL;
+    static wolfSSL_Free_cb ffDefault = NULL;
+    static wolfSSL_Realloc_cb rfDefault = NULL;
+
+    WC_STATIC WC_INLINE int InitMemoryTracker(void)
     {
-        int ret = wolfSSL_SetAllocators(TrackMalloc, TrackFree, TrackRealloc);
+        int ret;
+
+        ret = wolfSSL_GetAllocators(&mfDefault, &ffDefault, &rfDefault);
+        if (ret < 0) {
+            printf("wolfSSL GetAllocators failed to get the defaults\n");
+        }
+        ret = wolfSSL_SetAllocators(TrackMalloc, TrackFree, TrackRealloc);
         if (ret < 0) {
             printf("wolfSSL SetAllocators failed for track memory\n");
             return ret;
         }
 
+    #ifdef DO_MEM_LIST
+        if (pthread_mutex_lock(&memLock) == 0)
+        {
+    #endif
+
     #ifdef DO_MEM_STATS
         ourMemStats.totalAllocs  = 0;
         ourMemStats.totalDeallocs = 0;
@@ -218,30 +339,59 @@
         ourMemStats.peakBytes    = 0;
         ourMemStats.currentBytes = 0;
     #endif
+    
+    #ifdef DO_MEM_LIST
+        XMEMSET(&ourMemList, 0, sizeof(ourMemList));
+
+        pthread_mutex_unlock(&memLock);
+        }
+    #endif
 
         return ret;
     }
 
-    STATIC WC_INLINE void ShowMemoryTracker(void)
+    WC_STATIC WC_INLINE void ShowMemoryTracker(void)
     {
+    #ifdef DO_MEM_LIST
+        if (pthread_mutex_lock(&memLock) == 0)
+        {
+    #endif
+
     #ifdef DO_MEM_STATS
-        printf("total   Allocs   = %9lu\n",
-                                       (unsigned long)ourMemStats.totalAllocs);
-        printf("total   Deallocs = %9lu\n",
-                                       (unsigned long)ourMemStats.totalDeallocs);
-        printf("total   Bytes    = %9lu\n",
-                                       (unsigned long)ourMemStats.totalBytes);
-        printf("peak    Bytes    = %9lu\n",
-                                       (unsigned long)ourMemStats.peakBytes);
-        printf("current Bytes    = %9lu\n",
-                                       (unsigned long)ourMemStats.currentBytes);
+        printf("total   Allocs   = %9ld\n", ourMemStats.totalAllocs);
+        printf("total   Deallocs = %9ld\n", ourMemStats.totalDeallocs);
+        printf("total   Bytes    = %9ld\n", ourMemStats.totalBytes);
+        printf("peak    Bytes    = %9ld\n", ourMemStats.peakBytes);
+        printf("current Bytes    = %9ld\n", ourMemStats.currentBytes);
+    #endif
+
+    #ifdef DO_MEM_LIST
+        if (ourMemList.count > 0) {
+            /* print list of allocations */
+            memHint* header;
+            for (header = ourMemList.head; header != NULL; header = header->next) {
+                printf("Leak: Ptr %p, Size %u"
+                #ifdef WOLFSSL_DEBUG_MEMORY
+                    ", Func %s, Line %d"
+                #endif
+                    "\n",
+                    (byte*)header + sizeof(memHint), (unsigned int)header->thisSize
+                #ifdef WOLFSSL_DEBUG_MEMORY
+                    , header->func, header->line
+                #endif
+                );
+            }
+        }
+
+        pthread_mutex_unlock(&memLock);
+        }
     #endif
     }
 
-    STATIC WC_INLINE int CleanupMemoryTracker(void)
+    WC_STATIC WC_INLINE int CleanupMemoryTracker(void)
     {
         /* restore default allocators */
-        return wolfSSL_ResetAllocators();
+        return wolfSSL_SetAllocators(mfDefault, ffDefault, rfDefault);
     }
 #endif
 
--- a/wolfssl/wolfcrypt/memory.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/memory.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* memory.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -29,13 +29,19 @@
 #ifndef WOLFSSL_MEMORY_H
 #define WOLFSSL_MEMORY_H
 
+#ifndef STRING_USER
 #include <stdlib.h>
+#endif
 #include <wolfssl/wolfcrypt/types.h>
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+    WOLFSSL_API void wolfSSL_SetMemFailCount(int memFailCount);
+#endif
+
 #ifdef WOLFSSL_STATIC_MEMORY
     #ifdef WOLFSSL_DEBUG_MEMORY
         typedef void *(*wolfSSL_Malloc_cb)(size_t size, void* heap, int type, const char* func, unsigned int line);
@@ -77,7 +83,6 @@
 WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb,
                                       wolfSSL_Free_cb,
                                       wolfSSL_Realloc_cb);
-WOLFSSL_API int wolfSSL_ResetAllocators(void);
 WOLFSSL_API int wolfSSL_GetAllocators(wolfSSL_Malloc_cb*,
                                       wolfSSL_Free_cb*,
                                       wolfSSL_Realloc_cb*);
@@ -95,16 +100,43 @@
         #define WOLFMEM_IO_SZ        16992 /* 16 byte aligned */
     #endif
     #ifndef WOLFMEM_BUCKETS
-        /* default size of chunks of memory to seperate into
-         * having session certs enabled makes a 21k SSL struct */
         #ifndef SESSION_CERTS
-            #define WOLFMEM_BUCKETS 64,128,256,512,1024,2432,3456,4544,16128
+            /* default size of chunks of memory to separate into */
+            #ifndef LARGEST_MEM_BUCKET
+                #define LARGEST_MEM_BUCKET 16128
+            #endif
+            #define WOLFMEM_BUCKETS 64,128,256,512,1024,2432,3456,4544,\
+                                    LARGEST_MEM_BUCKET
+        #elif defined (OPENSSL_EXTRA)
+            /* extra storage in structs for multiple attributes and order */
+            #ifndef LARGEST_MEM_BUCKET
+                #define LARGEST_MEM_BUCKET 25600
+            #endif
+            #define WOLFMEM_BUCKETS 64,128,256,512,1024,2432,3360,4480,\
+                                    LARGEST_MEM_BUCKET
+        #elif defined (WOLFSSL_CERT_EXT)
+            /* certificate extensions requires 24k for the SSL struct */
+            #ifndef LARGEST_MEM_BUCKET
+                #define LARGEST_MEM_BUCKET 24576
+            #endif
+            #define WOLFMEM_BUCKETS 64,128,256,512,1024,2432,3456,4544,\
+                                    LARGEST_MEM_BUCKET
         #else
-            #define WOLFMEM_BUCKETS 64,128,256,512,1024,2432,3456,4544,21920
+            /* increase 23k for object member of WOLFSSL_X509_NAME_ENTRY */
+            #ifndef LARGEST_MEM_BUCKET
+                #define LARGEST_MEM_BUCKET 23440
+            #endif
+            #define WOLFMEM_BUCKETS 64,128,256,512,1024,2432,3456,4544,\
+                                    LARGEST_MEM_BUCKET
         #endif
     #endif
     #ifndef WOLFMEM_DIST
-        #define WOLFMEM_DIST    8,4,4,12,4,5,8,1,1
+        #ifndef WOLFSSL_STATIC_MEMORY_SMALL
+            #define WOLFMEM_DIST    49,10,6,14,5,6,9,1,1
+        #else
+            /* Low resource and not RSA */
+            #define WOLFMEM_DIST    29, 7,6, 9,4,4,0,0,0
+        #endif
     #endif
 
     /* flags for loading static memory (one hot bit) */
@@ -134,8 +166,8 @@
         word32 totalFr;   /* total frees for lifetime */
         word32 totalUse;  /* total amount of memory used in blocks */
         word32 avaIO;     /* available IO specific pools */
-        word32 maxHa;     /* max number of concurent handshakes allowed */
-        word32 maxIO;     /* max number of concurent IO connections allowed */
+        word32 maxHa;     /* max number of concurrent handshakes allowed */
+        word32 maxIO;     /* max number of concurrent IO connections allowed */
         word32 blockSz[WOLFMEM_MAX_BUCKETS]; /* block sizes in stacks */
         word32 avaBlock[WOLFMEM_MAX_BUCKETS];/* ava block sizes */
         word32 usedBlock[WOLFMEM_MAX_BUCKETS];
@@ -146,7 +178,7 @@
     typedef struct WOLFSSL_HEAP {
         wc_Memory* ava[WOLFMEM_MAX_BUCKETS];
         wc_Memory* io;                  /* list of buffers to use for IO */
-        word32     maxHa;               /* max concurent handshakes */
+        word32     maxHa;               /* max concurrent handshakes */
         word32     curHa;
         word32     maxIO;               /* max concurrent IO connections */
         word32     curIO;
@@ -186,6 +218,13 @@
     WOLFSSL_API int wolfSSL_MemoryPaddingSz(void);
 #endif /* WOLFSSL_STATIC_MEMORY */
 
+#ifdef WOLFSSL_STACK_LOG
+    WOLFSSL_API void __attribute__((no_instrument_function))
+            __cyg_profile_func_enter(void *func,  void *caller);
+    WOLFSSL_API void __attribute__((no_instrument_function))
+            __cyg_profile_func_exit(void *func, void *caller);
+#endif /* WOLFSSL_STACK_LOG */
+
 #ifdef __cplusplus
     }  /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/misc.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/misc.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* misc.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -94,10 +94,16 @@
 
 WOLFSSL_LOCAL byte ctMaskGT(int a, int b);
 WOLFSSL_LOCAL byte ctMaskGTE(int a, int b);
+WOLFSSL_LOCAL int  ctMaskIntGTE(int a, int b);
 WOLFSSL_LOCAL byte ctMaskLT(int a, int b);
 WOLFSSL_LOCAL byte ctMaskLTE(int a, int b);
 WOLFSSL_LOCAL byte ctMaskEq(int a, int b);
+WOLFSSL_LOCAL word16 ctMask16GT(int a, int b);
+WOLFSSL_LOCAL word16 ctMask16LT(int a, int b);
+WOLFSSL_LOCAL word16 ctMask16Eq(int a, int b);
+WOLFSSL_LOCAL byte ctMaskNotEq(int a, int b);
 WOLFSSL_LOCAL byte ctMaskSel(byte m, byte a, byte b);
+WOLFSSL_LOCAL int  ctMaskSelInt(byte m, int a, int b);
 WOLFSSL_LOCAL byte ctSetLTE(int a, int b);
 
 #endif /* NO_INLINE */
--- a/wolfssl/wolfcrypt/mpi_class.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/mpi_class.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* mpi_class.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -62,6 +62,7 @@
 #define BN_MP_DR_SETUP_C
 #define BN_MP_EXCH_C
 #define BN_MP_EXPT_D_C
+#define BN_MP_EXPTMOD_BASE_2
 #define BN_MP_EXPTMOD_C
 #define BN_MP_EXPTMOD_FAST_C
 #define BN_MP_EXTEUCLID_C
@@ -358,6 +359,7 @@
    #define BN_MP_REDUCE_IS_2K_C
    #define BN_MP_ISODD_C
    #define BN_MP_EXPTMOD_FAST_C
+   #define BN_MP_EXPTMOD_BASE_2
 #endif
 
 #if defined(BN_MP_EXPTMOD_FAST_C)
--- a/wolfssl/wolfcrypt/mpi_superclass.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/mpi_superclass.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* mpi_superclass.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/pkcs11.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,538 @@
+/* pkcs11.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef _PKCS11_H_
+#define _PKCS11_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef NULL_PTR
+#define NULL_PTR        0
+#endif
+#define CK_TRUE         1
+#define CK_FALSE        0
+
+
+#define CK_INVALID_HANDLE                     0UL
+
+#define CKN_SURRENDER                         0UL
+
+#define CKF_TOKEN_PRESENT                     0x00000001UL
+#define CKF_REMOVABLE_DEVICE                  0x00000002UL
+#define CKF_HW_SLOT                           0x00000004UL
+
+#define CKF_HW                                0x00000001UL
+#define CKF_ENCRYPT                           0x00000100UL
+#define CKF_DECRYPT                           0x00000200UL
+#define CKF_DIGEST                            0x00000400UL
+#define CKF_SIGN                              0x00000800UL
+#define CKF_SIGN_RECOVER                      0x00001000UL
+#define CKF_VERIFY                            0x00002000UL
+#define CKF_VERIFY_RECOVER                    0x00004000UL
+#define CKF_GENERATE                          0x00008000UL
+#define CKF_GENERATE_KEY_PAIR                 0x00010000UL
+#define CKF_WRAP                              0x00020000UL
+#define CKF_UNWRAP                            0x00040000UL
+#define CKF_DERIVE                            0x00080000UL
+#define CKF_EC_F_P                            0x00100000UL
+#define CKF_EC_F_2M                           0x00200000UL
+#define CKF_EC_ECPARAMETERS                   0x00400000UL
+#define CKF_EC_NAMEDCURVE                     0x00800000UL
+#define CKF_EC_UNCOMPRESS                     0x01000000UL
+#define CKF_EC_COMPRESS                       0x02000000UL
+
+#define CKF_LIBRARY_CANT_CREATE_OS_THREADS    0x00000001UL
+#define CKF_OS_LOCKING_OK                     0x00000002UL
+
+#define CKU_SO                                0UL
+#define CKU_USER                              1UL
+#define CKU_CONTEXT_SPECIFIC                  2UL
+
+#define CKF_RW_SESSION                        0x00000002UL
+#define CKF_SERIAL_SESSION                    0x00000004UL
+
+#define CKO_PUBLIC_KEY                        0x00000002UL
+#define CKO_PRIVATE_KEY                       0x00000003UL
+#define CKO_SECRET_KEY                        0x00000004UL
+
+#define CKK_RSA                               0x00000000UL
+#define CKK_DH                                0x00000002UL
+#define CKK_EC                                0x00000003UL
+#define CKK_GENERIC_SECRET                    0x00000010UL
+#define CKK_AES                               0x0000001FUL
+#define CKK_MD5_HMAC                          0x00000027UL
+#define CKK_SHA_1_HMAC                        0x00000028UL
+#define CKK_SHA256_HMAC                       0x0000002bUL
+#define CKK_SHA384_HMAC                       0x0000002cUL
+#define CKK_SHA512_HMAC                       0x0000002dUL
+#define CKK_SHA224_HMAC                       0x0000002eUL
+
+#define CKA_CLASS                             0x00000000UL
+#define CKA_TOKEN                             0x00000001UL
+#define CKA_PRIVATE                           0x00000002UL
+#define CKA_LABEL                             0x00000003UL
+#define CKA_VALUE                             0x00000011UL
+#define CKA_OBJECT_ID                         0x00000012UL
+#define CKA_OWNER                             0x00000084UL
+#define CKA_TRUSTED                           0x00000086UL
+#define CKA_KEY_TYPE                          0x00000100UL
+#define CKA_ID                                0x00000102UL
+#define CKA_SENSITIVE                         0x00000103UL
+#define CKA_ENCRYPT                           0x00000104UL
+#define CKA_DECRYPT                           0x00000105UL
+#define CKA_WRAP                              0x00000106UL
+#define CKA_UNWRAP                            0x00000107UL
+#define CKA_SIGN                              0x00000108UL
+#define CKA_SIGN_RECOVER                      0x00000109UL
+#define CKA_VERIFY                            0x0000010AUL
+#define CKA_VERIFY_RECOVER                    0x0000010BUL
+#define CKA_DERIVE                            0x0000010CUL
+#define CKA_MODULUS                           0x00000120UL
+#define CKA_MODULUS_BITS                      0x00000121UL
+#define CKA_PUBLIC_EXPONENT                   0x00000122UL
+#define CKA_PRIVATE_EXPONENT                  0x00000123UL
+#define CKA_PRIME_1                           0x00000124UL
+#define CKA_PRIME_2                           0x00000125UL
+#define CKA_EXPONENT_1                        0x00000126UL
+#define CKA_EXPONENT_2                        0x00000127UL
+#define CKA_COEFFICIENT                       0x00000128UL
+#define CKA_PUBLIC_KEY_INFO                   0x00000129UL
+#define CKA_PRIME                             0x00000130UL
+#define CKA_BASE                              0x00000132UL
+#define CKA_PRIME_BITS                        0x00000133UL
+#define CKA_VALUE_BITS                        0x00000160UL
+#define CKA_VALUE_LEN                         0x00000161UL
+#define CKA_EXTRACTABLE                       0x00000162UL
+#define CKA_LOCAL                             0x00000163UL
+#define CKA_NEVER_EXTRACTABLE                 0x00000164UL
+#define CKA_ALWAYS_SENSITIVE                  0x00000165UL
+#define CKA_KEY_GEN_MECHANISM                 0x00000166UL
+#define CKA_MODIFIABLE                        0x00000170UL
+#define CKA_COPYABLE                          0x00000171UL
+#define CKA_DESTROYABLE                       0x00000172UL
+#define CKA_EC_PARAMS                         0x00000180UL
+#define CKA_EC_POINT                          0x00000181UL
+#define CKA_ALWAYS_AUTHENTICATE               0x00000202UL
+#define CKA_HW_FEATURE_TYPE                   0x00000300UL
+#define CKA_RESET_ON_INIT                     0x00000301UL
+#define CKA_HAS_RESET                         0x00000302UL
+
+#define CKM_RSA_PKCS_KEY_PAIR_GEN             0x00000000UL
+#define CKM_RSA_X_509                         0x00000003UL
+#define CKM_DH_PKCS_KEY_PAIR_GEN              0x00000020UL
+#define CKM_DH_PKCS_DERIVE                    0x00000021UL
+#define CKM_MD5_HMAC                          0x00000211UL
+#define CKM_SHA_1_HMAC                        0x00000221UL
+#define CKM_SHA256_HMAC                       0x00000251UL
+#define CKM_SHA224_HMAC                       0x00000256UL
+#define CKM_SHA384_HMAC                       0x00000261UL
+#define CKM_SHA512_HMAC                       0x00000271UL
+#define CKM_GENERIC_SECRET_KEY_GEN            0x00000350UL
+#define CKM_EC_KEY_PAIR_GEN                   0x00001040UL
+#define CKM_ECDSA                             0x00001041UL
+#define CKM_ECDH1_DERIVE                      0x00001050UL
+#define CKM_ECDH1_COFACTOR_DERIVE             0x00001051UL
+#define CKM_AES_KEY_GEN                       0x00001080UL
+#define CKM_AES_CBC                           0x00001082UL
+#define CKM_AES_GCM                           0x00001087UL
+
+#define CKR_OK                                0x00000000UL
+#define CKR_MECHANISM_INVALID                 0x00000070UL
+#define CKR_SIGNATURE_INVALID                 0x000000C0UL
+
+#define CKD_NULL                              0x00000001UL
+
+
+typedef unsigned char     CK_BYTE;
+typedef CK_BYTE           CK_CHAR;
+typedef CK_BYTE           CK_UTF8CHAR;
+typedef CK_BYTE           CK_BBOOL;
+typedef unsigned long int CK_ULONG;
+typedef long int          CK_LONG;
+typedef CK_ULONG          CK_FLAGS;
+typedef CK_BYTE*          CK_BYTE_PTR;
+typedef CK_CHAR*          CK_CHAR_PTR;
+typedef CK_UTF8CHAR*      CK_UTF8CHAR_PTR;
+typedef CK_ULONG*         CK_ULONG_PTR;
+typedef void*             CK_VOID_PTR;
+typedef CK_VOID_PTR*      CK_VOID_PTR_PTR;
+
+
+typedef CK_ULONG          CK_RV;
+
+
+typedef struct CK_VERSION {
+    CK_BYTE major;
+    CK_BYTE minor;
+} CK_VERSION;
+typedef CK_VERSION* CK_VERSION_PTR;
+
+
+/* Info Types */
+typedef struct CK_INFO {
+    CK_VERSION  cryptokiVersion;
+    CK_UTF8CHAR manufacturerID[32];
+    CK_FLAGS    flags;
+    CK_UTF8CHAR libraryDescription[32];
+    CK_VERSION  libraryVersion;
+} CK_INFO;
+typedef CK_INFO* CK_INFO_PTR;
+
+
+/* Slot Types */
+typedef CK_ULONG    CK_SLOT_ID;
+typedef CK_SLOT_ID* CK_SLOT_ID_PTR;
+
+typedef struct CK_SLOT_INFO {
+    CK_UTF8CHAR   slotDescription[64];
+    CK_UTF8CHAR   manufacturerID[32];
+    CK_FLAGS      flags;
+
+    CK_VERSION    hardwareVersion;
+    CK_VERSION    firmwareVersion;
+} CK_SLOT_INFO;
+typedef CK_SLOT_INFO* CK_SLOT_INFO_PTR;
+
+
+/* Token Types */
+typedef struct CK_TOKEN_INFO {
+    CK_UTF8CHAR   label[32];
+    CK_UTF8CHAR   manufacturerID[32];
+    CK_UTF8CHAR   model[16];
+    CK_CHAR       serialNumber[16];
+    CK_FLAGS      flags;
+    CK_ULONG      ulMaxSessionCount;
+    CK_ULONG      ulSessionCount;
+    CK_ULONG      ulMaxRwSessionCount;
+    CK_ULONG      ulRwSessionCount;
+    CK_ULONG      ulMaxPinLen;
+    CK_ULONG      ulMinPinLen;
+    CK_ULONG      ulTotalPublicMemory;
+    CK_ULONG      ulFreePublicMemory;
+    CK_ULONG      ulTotalPrivateMemory;
+    CK_ULONG      ulFreePrivateMemory;
+    CK_VERSION    hardwareVersion;
+    CK_VERSION    firmwareVersion;
+    CK_CHAR       utcTime[16];
+} CK_TOKEN_INFO;
+typedef CK_TOKEN_INFO* CK_TOKEN_INFO_PTR;
+
+
+/* Session Types */
+typedef CK_ULONG           CK_SESSION_HANDLE;
+typedef CK_SESSION_HANDLE* CK_SESSION_HANDLE_PTR;
+
+typedef CK_ULONG          CK_USER_TYPE;
+
+typedef CK_ULONG          CK_STATE;
+
+typedef struct CK_SESSION_INFO {
+    CK_SLOT_ID    slotID;
+    CK_STATE      state;
+    CK_FLAGS      flags;
+    CK_ULONG      ulDeviceError;
+} CK_SESSION_INFO;
+typedef CK_SESSION_INFO* CK_SESSION_INFO_PTR;
+
+
+/* Object Types */
+typedef CK_ULONG          CK_OBJECT_HANDLE;
+typedef CK_OBJECT_HANDLE* CK_OBJECT_HANDLE_PTR;
+
+typedef CK_ULONG         CK_OBJECT_CLASS;
+typedef CK_OBJECT_CLASS* CK_OBJECT_CLASS_PTR;
+
+typedef CK_ULONG          CK_KEY_TYPE;
+
+typedef CK_ULONG          CK_ATTRIBUTE_TYPE;
+
+typedef struct CK_ATTRIBUTE {
+    CK_ATTRIBUTE_TYPE type;
+    CK_VOID_PTR       pValue;
+    CK_ULONG          ulValueLen;
+} CK_ATTRIBUTE;
+typedef CK_ATTRIBUTE* CK_ATTRIBUTE_PTR;
+
+
+/* Mechanism Types */
+typedef CK_ULONG           CK_MECHANISM_TYPE;
+typedef CK_MECHANISM_TYPE* CK_MECHANISM_TYPE_PTR;
+
+typedef struct CK_MECHANISM {
+    CK_MECHANISM_TYPE mechanism;
+    CK_VOID_PTR       pParameter;
+    CK_ULONG          ulParameterLen;
+} CK_MECHANISM;
+typedef CK_MECHANISM* CK_MECHANISM_PTR;
+
+typedef struct CK_MECHANISM_INFO {
+    CK_ULONG ulMinKeySize;
+    CK_ULONG ulMaxKeySize;
+    CK_FLAGS flags;
+} CK_MECHANISM_INFO;
+typedef CK_MECHANISM_INFO * CK_MECHANISM_INFO_PTR;
+
+
+typedef CK_ULONG CK_NOTIFICATION;
+
+typedef CK_RV (*CK_NOTIFY)(CK_SESSION_HANDLE hSession, CK_NOTIFICATION event,
+                           CK_VOID_PTR pApplication);
+
+
+/* Threading types. */
+typedef CK_RV (*CK_CREATEMUTEX)(CK_VOID_PTR_PTR ppMutex);
+typedef CK_RV (*CK_DESTROYMUTEX)(CK_VOID_PTR pMutex);
+typedef CK_RV (*CK_LOCKMUTEX)(CK_VOID_PTR pMutex);
+typedef CK_RV (*CK_UNLOCKMUTEX)(CK_VOID_PTR pMutex);
+
+typedef struct CK_C_INITIALIZE_ARGS {
+    CK_CREATEMUTEX CreateMutex;
+    CK_DESTROYMUTEX DestroyMutex;
+    CK_LOCKMUTEX LockMutex;
+    CK_UNLOCKMUTEX UnlockMutex;
+    CK_FLAGS flags;
+    CK_VOID_PTR pReserved;
+} CK_C_INITIALIZE_ARGS;
+typedef CK_C_INITIALIZE_ARGS* CK_C_INITIALIZE_ARGS_PTR;
+
+
+/* Cryptographic algorithm types. */
+typedef CK_ULONG CK_EC_KDF_TYPE;
+
+typedef struct CK_ECDH1_DERIVE_PARAMS {
+    CK_EC_KDF_TYPE kdf;
+    CK_ULONG ulSharedDataLen;
+    CK_BYTE_PTR pSharedData;
+    CK_ULONG ulPublicDataLen;
+    CK_BYTE_PTR pPublicData;
+} CK_ECDH1_DERIVE_PARAMS;
+typedef CK_ECDH1_DERIVE_PARAMS* CK_ECDH1_DERIVE_PARAMS_PTR;
+
+
+typedef struct CK_GCM_PARAMS {
+    CK_BYTE_PTR       pIv;
+    CK_ULONG          ulIvLen;
+    CK_ULONG          ulIvBits;
+    CK_BYTE_PTR       pAAD;
+    CK_ULONG          ulAADLen;
+    CK_ULONG          ulTagBits;
+} CK_GCM_PARAMS;
+typedef CK_GCM_PARAMS* CK_GCM_PARAMS_PTR;
+
+/* Function list types. */
+typedef struct CK_FUNCTION_LIST CK_FUNCTION_LIST;
+typedef CK_FUNCTION_LIST* CK_FUNCTION_LIST_PTR;
+typedef CK_FUNCTION_LIST_PTR* CK_FUNCTION_LIST_PTR_PTR;
+
+typedef CK_RV (*CK_C_GetFunctionList)(CK_FUNCTION_LIST_PTR_PTR ppFunctionList);
+
+struct CK_FUNCTION_LIST {
+    CK_VERSION version;
+
+    CK_RV (*C_Initialize)(CK_VOID_PTR pInitArgs);
+    CK_RV (*C_Finalize)(CK_VOID_PTR pReserved);
+    CK_RV (*C_GetInfo)(CK_INFO_PTR pInfo);
+    CK_RV (*C_GetFunctionList)(CK_FUNCTION_LIST_PTR_PTR ppFunctionList);
+    CK_RV (*C_GetSlotList)(CK_BBOOL tokenPresent, CK_SLOT_ID_PTR pSlotList,
+                           CK_ULONG_PTR pulCount);
+    CK_RV (*C_GetSlotInfo)(CK_SLOT_ID slotID, CK_SLOT_INFO_PTR pInfo);
+    CK_RV (*C_GetTokenInfo)(CK_SLOT_ID slotID, CK_TOKEN_INFO_PTR pInfo);
+    CK_RV (*C_GetMechanismList)(CK_SLOT_ID slotID,
+                                CK_MECHANISM_TYPE_PTR pMechanismList,
+                                CK_ULONG_PTR pulCount);
+    CK_RV (*C_GetMechanismInfo)(CK_SLOT_ID slotID, CK_MECHANISM_TYPE type,
+                                CK_MECHANISM_INFO_PTR pInfo);
+    CK_RV (*C_InitToken)(CK_SLOT_ID slotID, CK_UTF8CHAR_PTR pPin,
+                         CK_ULONG ulPinLen, CK_UTF8CHAR_PTR pLabel);
+    CK_RV (*C_InitPIN)(CK_SESSION_HANDLE hSession, CK_UTF8CHAR_PTR pPin, 
+                       CK_ULONG ulPinLen);
+    CK_RV (*C_SetPIN)(CK_SESSION_HANDLE hSession, CK_UTF8CHAR_PTR pOldPin,
+                      CK_ULONG ulOldLen, CK_UTF8CHAR_PTR pNewPin,
+                      CK_ULONG ulNewLen);
+    CK_RV (*C_OpenSession)(CK_SLOT_ID slotID, CK_FLAGS flags,
+                           CK_VOID_PTR pApplication, CK_NOTIFY Notify,
+                           CK_SESSION_HANDLE_PTR phSession);
+    CK_RV (*C_CloseSession)(CK_SESSION_HANDLE hSession);
+    CK_RV (*C_CloseAllSessions)(CK_SLOT_ID slotID);
+    CK_RV (*C_GetSessionInfo)(CK_SESSION_HANDLE hSession,
+                              CK_SESSION_INFO_PTR pInfo);
+    CK_RV (*C_GetOperationState)(CK_SESSION_HANDLE hSession,
+                                 CK_BYTE_PTR pOperationState,
+                                 CK_ULONG_PTR pulOperationStateLen);
+    CK_RV (*C_SetOperationState)(CK_SESSION_HANDLE hSession,
+                                 CK_BYTE_PTR pOperationState,
+                                 CK_ULONG ulOperationStateLen,
+                                 CK_OBJECT_HANDLE hEncryptionKey,
+                                 CK_OBJECT_HANDLE hAuthenticationKey);
+    CK_RV (*C_Login)(CK_SESSION_HANDLE hSession, CK_USER_TYPE userType,
+                     CK_UTF8CHAR_PTR pPin, CK_ULONG ulPinLen);
+    CK_RV (*C_Logout)(CK_SESSION_HANDLE hSession);
+    CK_RV (*C_CreateObject)(CK_SESSION_HANDLE hSession,
+                            CK_ATTRIBUTE_PTR pTemplate, CK_ULONG ulCount,
+                            CK_OBJECT_HANDLE_PTR phObject);
+    CK_RV (*C_CopyObject)(CK_SESSION_HANDLE hSession, CK_OBJECT_HANDLE hObject,
+                          CK_ATTRIBUTE_PTR pTemplate, CK_ULONG ulCount,
+                          CK_OBJECT_HANDLE_PTR phNewObject);
+    CK_RV (*C_DestroyObject)(CK_SESSION_HANDLE hSession,
+                             CK_OBJECT_HANDLE hObject);
+    CK_RV (*C_GetObjectSize)(CK_SESSION_HANDLE hSession,
+                             CK_OBJECT_HANDLE hObject, CK_ULONG_PTR pulSize);
+    CK_RV (*C_GetAttributeValue)(CK_SESSION_HANDLE hSession,
+                                 CK_OBJECT_HANDLE hObject,
+                                 CK_ATTRIBUTE_PTR pTemplate, CK_ULONG ulCount);
+    CK_RV (*C_SetAttributeValue)(CK_SESSION_HANDLE hSession,
+                                 CK_OBJECT_HANDLE hObject,
+                                 CK_ATTRIBUTE_PTR pTemplate, CK_ULONG ulCount);
+    CK_RV (*C_FindObjectsInit)(CK_SESSION_HANDLE hSession,
+                               CK_ATTRIBUTE_PTR pTemplate, CK_ULONG ulCount);
+    CK_RV (*C_FindObjects)(CK_SESSION_HANDLE hSession,
+                           CK_OBJECT_HANDLE_PTR phObject,
+                           CK_ULONG ulMaxObjectCount,
+                           CK_ULONG_PTR pulObjectCount);
+    CK_RV (*C_FindObjectsFinal)(CK_SESSION_HANDLE hSession);
+    CK_RV (*C_EncryptInit)(CK_SESSION_HANDLE hSession,
+                           CK_MECHANISM_PTR pMechanism, CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_Encrypt)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pData,
+                       CK_ULONG ulDataLen, CK_BYTE_PTR pEncryptedData,
+                       CK_ULONG_PTR pulEncryptedDataLen);
+    CK_RV (*C_EncryptUpdate)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pPart,
+                             CK_ULONG ulPartLen, CK_BYTE_PTR pEncryptedPart,
+                             CK_ULONG_PTR pulEncryptedPartLen);
+    CK_RV (*C_EncryptFinal)(CK_SESSION_HANDLE hSession,
+                            CK_BYTE_PTR pLastEncryptedPart,
+                            CK_ULONG_PTR pulLastEncryptedPartLen);
+    CK_RV (*C_DecryptInit)(CK_SESSION_HANDLE hSession,
+                           CK_MECHANISM_PTR pMechanism, CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_Decrypt)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pEncryptedData,
+                       CK_ULONG ulEncryptedDataLen, CK_BYTE_PTR pData,
+                       CK_ULONG_PTR pulDataLen);
+    CK_RV (*C_DecryptUpdate)(CK_SESSION_HANDLE hSession,
+                             CK_BYTE_PTR pEncryptedPart,
+                             CK_ULONG ulEncryptedPartLen, CK_BYTE_PTR pPart,
+                             CK_ULONG_PTR pulPartLen);
+    CK_RV (*C_DecryptFinal)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pLastPart,
+                            CK_ULONG_PTR pulLastPartLen);
+    CK_RV (*C_DigestInit)(CK_SESSION_HANDLE hSession,
+                          CK_MECHANISM_PTR pMechanism);
+    CK_RV (*C_Digest)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pData,
+                      CK_ULONG ulDataLen, CK_BYTE_PTR pDigest,
+                      CK_ULONG_PTR pulDigestLen);
+    CK_RV (*C_DigestUpdate)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pPart,
+                            CK_ULONG ulPartLen);
+    CK_RV (*C_DigestKey)(CK_SESSION_HANDLE hSession, CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_DigestFinal)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pDigest,
+                           CK_ULONG_PTR pulDigestLen);
+    CK_RV (*C_SignInit)(CK_SESSION_HANDLE hSession, CK_MECHANISM_PTR pMechanism,
+                        CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_Sign)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pData,
+                    CK_ULONG ulDataLen, CK_BYTE_PTR pSignature,
+                    CK_ULONG_PTR pulSignatureLen);
+    CK_RV (*C_SignUpdate)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pPart,
+                          CK_ULONG ulPartLen);
+    CK_RV (*C_SignFinal)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pSignature,
+                         CK_ULONG_PTR pulSignatureLen);
+    CK_RV (*C_SignRecoverInit)(CK_SESSION_HANDLE hSession,
+                               CK_MECHANISM_PTR pMechanism,
+                               CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_SignRecover)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pData,
+                           CK_ULONG ulDataLen, CK_BYTE_PTR pSignature,
+                           CK_ULONG_PTR pulSignatureLen);
+    CK_RV (*C_VerifyInit)(CK_SESSION_HANDLE hSession,
+                          CK_MECHANISM_PTR pMechanism, CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_Verify)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pData,
+                      CK_ULONG ulDataLen, CK_BYTE_PTR pSignature,
+                      CK_ULONG ulSignatureLen);
+    CK_RV (*C_VerifyUpdate)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pPart,
+                            CK_ULONG ulPartLen);
+    CK_RV (*C_VerifyFinal)(CK_SESSION_HANDLE hSession,
+                           CK_BYTE_PTR pSignature, CK_ULONG ulSignatureLen);
+    CK_RV (*C_VerifyRecoverInit)(CK_SESSION_HANDLE hSession,
+                                 CK_MECHANISM_PTR pMechanism,
+                                 CK_OBJECT_HANDLE hKey);
+    CK_RV (*C_VerifyRecover)(CK_SESSION_HANDLE hSession,
+                             CK_BYTE_PTR pSignature, CK_ULONG ulSignatureLen,
+                             CK_BYTE_PTR pData, CK_ULONG_PTR pulDataLen);
+    CK_RV (*C_DigestEncryptUpdate)(CK_SESSION_HANDLE hSession,
+                                   CK_BYTE_PTR pPart, CK_ULONG ulPartLen,
+                                   CK_BYTE_PTR pEncryptedPart,
+                                   CK_ULONG_PTR pulEncryptedPartLen);
+    CK_RV (*C_DecryptDigestUpdate)(CK_SESSION_HANDLE hSession,
+                                   CK_BYTE_PTR pEncryptedPart,
+                                   CK_ULONG ulEncryptedPartLen,
+                                   CK_BYTE_PTR pPart, CK_ULONG_PTR pulPartLen);
+    CK_RV (*C_SignEncryptUpdate)(CK_SESSION_HANDLE hSession,
+                                 CK_BYTE_PTR pPart, CK_ULONG ulPartLen,
+                                 CK_BYTE_PTR pEncryptedPart,
+                                 CK_ULONG_PTR pulEncryptedPartLen);
+    CK_RV (*C_DecryptVerifyUpdate)(CK_SESSION_HANDLE hSession,
+                                   CK_BYTE_PTR pEncryptedPart,
+                                   CK_ULONG ulEncryptedPartLen,
+                                   CK_BYTE_PTR pPart, CK_ULONG_PTR pulPartLen);
+    CK_RV (*C_GenerateKey)(CK_SESSION_HANDLE hSession,
+                           CK_MECHANISM_PTR pMechanism,
+                           CK_ATTRIBUTE_PTR pTemplate, CK_ULONG ulCount,
+                           CK_OBJECT_HANDLE_PTR phKey);
+    CK_RV (*C_GenerateKeyPair)(CK_SESSION_HANDLE hSession,
+                               CK_MECHANISM_PTR pMechanism,
+                               CK_ATTRIBUTE_PTR pPublicKeyTemplate,
+                               CK_ULONG ulPublicKeyAttributeCount,
+                               CK_ATTRIBUTE_PTR pPrivateKeyTemplate,
+                               CK_ULONG ulPrivateKeyAttributeCount,
+                               CK_OBJECT_HANDLE_PTR phPublicKey,
+                               CK_OBJECT_HANDLE_PTR phPrivateKey);
+    CK_RV (*C_WrapKey)(CK_SESSION_HANDLE hSession,
+                       CK_MECHANISM_PTR pMechanism,
+                       CK_OBJECT_HANDLE hWrappingKey, CK_OBJECT_HANDLE hKey,
+                       CK_BYTE_PTR pWrappedKey,
+                       CK_ULONG_PTR pulWrappedKeyLen);
+    CK_RV (*C_UnwrapKey)(CK_SESSION_HANDLE hSession,
+                         CK_MECHANISM_PTR pMechanism,
+                         CK_OBJECT_HANDLE hUnwrappingKey,
+                         CK_BYTE_PTR pWrappedKey, CK_ULONG ulWrappedKeyLen,
+                         CK_ATTRIBUTE_PTR pTemplate,
+                         CK_ULONG ulAttributeCount,
+                         CK_OBJECT_HANDLE_PTR phKey);
+    CK_RV (*C_DeriveKey)(CK_SESSION_HANDLE hSession,
+                         CK_MECHANISM_PTR pMechanism,
+                         CK_OBJECT_HANDLE hBaseKey,
+                         CK_ATTRIBUTE_PTR pTemplate,
+                         CK_ULONG ulAttributeCount,
+                         CK_OBJECT_HANDLE_PTR phKey);
+    CK_RV (*C_SeedRandom)(CK_SESSION_HANDLE hSession, CK_BYTE_PTR pSeed,
+                          CK_ULONG ulSeedLen);
+    CK_RV (*C_GenerateRandom)(CK_SESSION_HANDLE hSession,
+                              CK_BYTE_PTR RandomData, CK_ULONG ulRandomLen);
+    CK_RV (*C_GetFunctionStatus)(CK_SESSION_HANDLE hSession);
+    CK_RV (*C_CancelFunction)(CK_SESSION_HANDLE hSession);
+    CK_RV (*C_WaitForSlotEvent)(CK_FLAGS flags, CK_SLOT_ID_PTR pSlot,
+                                CK_VOID_PTR pRserved);
+
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PKCS11_H_ */
+
+
--- a/wolfssl/wolfcrypt/pkcs12.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/pkcs12.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pkcs12.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -42,12 +42,14 @@
 /* default values for creating PKCS12 */
 enum {
     WC_PKCS12_ITT_DEFAULT = 2048,
+    WC_PKCS12_VERSION_DEFAULT = 3,
     WC_PKCS12_MAC_DEFAULT = 1,
 };
 
 WOLFSSL_API WC_PKCS12* wc_PKCS12_new(void);
 WOLFSSL_API void wc_PKCS12_free(WC_PKCS12* pkcs12);
 WOLFSSL_API int wc_d2i_PKCS12(const byte* der, word32 derSz, WC_PKCS12* pkcs12);
+WOLFSSL_API int wc_i2d_PKCS12(WC_PKCS12* pkcs12, byte** der, int* derSz);
 WOLFSSL_API int wc_PKCS12_parse(WC_PKCS12* pkcs12, const char* psw,
         byte** pkey, word32* pkeySz, byte** cert, word32* certSz,
         WC_DerCertList** ca);
--- a/wolfssl/wolfcrypt/pkcs7.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/pkcs7.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pkcs7.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -48,18 +48,96 @@
 
 /* Max number of certificates that PKCS7 structure can parse */
 #ifndef MAX_PKCS7_CERTS
-#define MAX_PKCS7_CERTS 4
+    #define MAX_PKCS7_CERTS 4
+#endif
+
+#ifndef MAX_ORI_TYPE_SZ
+    #define MAX_ORI_TYPE_SZ  MAX_OID_SZ
+#endif
+#ifndef MAX_ORI_VALUE_SZ
+    #define MAX_ORI_VALUE_SZ 512
+#endif
+
+#ifndef MAX_SIGNED_ATTRIBS_SZ
+    #define MAX_SIGNED_ATTRIBS_SZ 7
+#endif
+
+#ifndef MAX_AUTH_ATTRIBS_SZ
+    #define MAX_AUTH_ATTRIBS_SZ 7
+#endif
+
+#ifndef MAX_UNAUTH_ATTRIBS_SZ
+    #define MAX_UNAUTH_ATTRIBS_SZ 7
 #endif
 
 /* PKCS#7 content types, ref RFC 2315 (Section 14) */
 enum PKCS7_TYPES {
-    PKCS7_MSG                 = 650,   /* 1.2.840.113549.1.7   */
-    DATA                      = 651,   /* 1.2.840.113549.1.7.1 */
-    SIGNED_DATA               = 652,   /* 1.2.840.113549.1.7.2 */
-    ENVELOPED_DATA            = 653,   /* 1.2.840.113549.1.7.3 */
-    SIGNED_AND_ENVELOPED_DATA = 654,   /* 1.2.840.113549.1.7.4 */
-    DIGESTED_DATA             = 655,   /* 1.2.840.113549.1.7.5 */
-    ENCRYPTED_DATA            = 656    /* 1.2.840.113549.1.7.6 */
+    PKCS7_MSG                 = 650,  /* 1.2.840.113549.1.7   */
+    DATA                      = 651,  /* 1.2.840.113549.1.7.1 */
+    SIGNED_DATA               = 652,  /* 1.2.840.113549.1.7.2 */
+    ENVELOPED_DATA            = 653,  /* 1.2.840.113549.1.7.3 */
+    SIGNED_AND_ENVELOPED_DATA = 654,  /* 1.2.840.113549.1.7.4 */
+    DIGESTED_DATA             = 655,  /* 1.2.840.113549.1.7.5 */
+    ENCRYPTED_DATA            = 656,  /* 1.2.840.113549.1.7.6 */
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+    COMPRESSED_DATA           = 678,  /* 1.2.840.113549.1.9.16.1.9,  RFC 3274 */
+#endif
+    FIRMWARE_PKG_DATA         = 685,  /* 1.2.840.113549.1.9.16.1.16, RFC 4108 */
+    AUTH_ENVELOPED_DATA       = 692   /* 1.2.840.113549.1.9.16.1.23, RFC 5083 */
+};
+
+enum PKCS7_STATE {
+    WC_PKCS7_START = 0,
+
+    /* decode encrypted */
+    WC_PKCS7_STAGE2,
+    WC_PKCS7_STAGE3,
+    WC_PKCS7_STAGE4,
+    WC_PKCS7_STAGE5,
+    WC_PKCS7_STAGE6,
+
+    WC_PKCS7_VERIFY_STAGE2,
+    WC_PKCS7_VERIFY_STAGE3,
+    WC_PKCS7_VERIFY_STAGE4,
+    WC_PKCS7_VERIFY_STAGE5,
+    WC_PKCS7_VERIFY_STAGE6,
+
+    /* parse info set */
+    WC_PKCS7_INFOSET_START,
+    WC_PKCS7_INFOSET_BER,
+    WC_PKCS7_INFOSET_STAGE1,
+    WC_PKCS7_INFOSET_STAGE2,
+    WC_PKCS7_INFOSET_END,
+
+    /* decode enveloped data */
+    WC_PKCS7_ENV_2,
+    WC_PKCS7_ENV_3,
+    WC_PKCS7_ENV_4,
+    WC_PKCS7_ENV_5,
+
+    /* decode auth enveloped */
+    WC_PKCS7_AUTHENV_2,
+    WC_PKCS7_AUTHENV_3,
+    WC_PKCS7_AUTHENV_4,
+    WC_PKCS7_AUTHENV_5,
+    WC_PKCS7_AUTHENV_6,
+    WC_PKCS7_AUTHENV_ATRB,
+    WC_PKCS7_AUTHENV_ATRBEND,
+    WC_PKCS7_AUTHENV_7,
+
+    /* decryption state types */
+    WC_PKCS7_DECRYPT_KTRI,
+    WC_PKCS7_DECRYPT_KTRI_2,
+    WC_PKCS7_DECRYPT_KTRI_3,
+
+
+    WC_PKCS7_DECRYPT_KARI,
+    WC_PKCS7_DECRYPT_KEKRI,
+    WC_PKCS7_DECRYPT_PWRI,
+    WC_PKCS7_DECRYPT_ORI,
+
+    WC_PKCS7_DECRYPT_DONE,
+
 };
 
 enum Pkcs7_Misc {
@@ -74,14 +152,36 @@
 #endif
     MAX_RECIP_SZ          = MAX_VERSION_SZ +
                             MAX_SEQ_SZ + ASN_NAME_MAX + MAX_SN_SZ +
-                            MAX_SEQ_SZ + MAX_ALGO_SZ + 1 + MAX_ENCRYPTED_KEY_SZ
+                            MAX_SEQ_SZ + MAX_ALGO_SZ + 1 + MAX_ENCRYPTED_KEY_SZ,
+#if (defined(HAVE_FIPS) && defined(HAVE_FIPS_VERSION) && \
+     (HAVE_FIPS_VERSION >= 2)) || defined(HAVE_SELFTEST)
+    /* In the event of fips cert 3389 or CAVP selftest build, these enums are
+     * not in aes.h for use with pkcs7 so enumerate it here outside the fips
+     * boundary */
+    GCM_NONCE_MID_SZ = 12, /* The usual default nonce size for AES-GCM. */
+    CCM_NONCE_MIN_SZ = 7,
+#endif
 };
 
+enum Cms_Options {
+    CMS_SKID = 1,
+    CMS_ISSUER_AND_SERIAL_NUMBER = 2,
+};
+#define DEGENERATE_SID 3
+
+/* CMS/PKCS#7 RecipientInfo types, RFC 5652, Section 6.2 */
+enum Pkcs7_RecipientInfo_Types {
+    PKCS7_KTRI  = 0,
+    PKCS7_KARI  = 1,
+    PKCS7_KEKRI = 2,
+    PKCS7_PWRI  = 3,
+    PKCS7_ORI   = 4
+};
 
 typedef struct PKCS7Attrib {
-    byte* oid;
+    const byte* oid;
     word32 oidSz;
-    byte* value;
+    const byte* value;
     word32 valueSz;
 } PKCS7Attrib;
 
@@ -94,20 +194,55 @@
     word32 valueSz;
 } PKCS7DecodedAttrib;
 
+typedef struct PKCS7State PKCS7State;
+typedef struct Pkcs7Cert Pkcs7Cert;
+typedef struct Pkcs7EncodedRecip Pkcs7EncodedRecip;
+typedef struct PKCS7 PKCS7;
+typedef struct PKCS7 PKCS7_SIGNED;
+typedef struct PKCS7SignerInfo PKCS7SignerInfo;
+
+/* OtherRecipientInfo decrypt callback prototype */
+typedef int (*CallbackOriDecrypt)(PKCS7* pkcs7, byte* oriType, word32 oriTypeSz,
+                                  byte* oriValue, word32 oriValueSz,
+                                  byte* decryptedKey, word32* decryptedKeySz,
+                                  void* ctx);
+typedef int (*CallbackOriEncrypt)(PKCS7* pkcs7, byte* cek, word32 cekSz,
+                                  byte* oriType, word32* oriTypeSz,
+                                  byte* oriValue, word32* oriValueSz,
+                                  void* ctx);
+typedef int (*CallbackDecryptContent)(PKCS7* pkcs7, int encryptOID,
+                                   byte* iv, int ivSz, byte* aad, word32 aadSz,
+                                   byte* authTag, word32 authTagSz, byte* in,
+                                   int inSz, byte* out, void* ctx);
+typedef int (*CallbackWrapCEK)(PKCS7* pkcs7, byte* cek, word32 cekSz,
+                                  byte* keyId, word32 keyIdSz,
+                                  byte* originKey, word32 originKeySz,
+                                  byte* out, word32 outSz,
+                                  int keyWrapAlgo, int type, int dir);
+
+#if defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)
+/* RSA sign raw digest callback, user builds DigestInfo */
+typedef int (*CallbackRsaSignRawDigest)(PKCS7* pkcs7, byte* digest,
+                                   word32 digestSz, byte* out, word32 outSz,
+                                   byte* privateKey, word32 privateKeySz,
+                                   int devId, int hashOID);
+#endif
 
 /* Public Structure Warning:
- * Existing members must not be changed to maintain backwards compatibility! 
+ * Existing members must not be changed to maintain backwards compatibility!
  */
-typedef struct PKCS7 {
+struct PKCS7 {
     WC_RNG* rng;
     PKCS7Attrib* signedAttribs;
     byte*  content;               /* inner content, not owner             */
+    byte*  contentDynamic;        /* content if constructed OCTET_STRING  */
     byte*  singleCert;            /* recipient cert, DER, not owner       */
-    byte*  issuer;                /* issuer name of singleCert            */
+    const byte* issuer;           /* issuer name of singleCert            */
     byte*  privateKey;            /* private key, DER, not owner          */
     void*  heap;                  /* heap hint for dynamic memory         */
 #ifdef ASN_BER_TO_DER
     byte*  der;                   /* DER encoded version of message       */
+    word32 derSz;
 #endif
     byte*  cert[MAX_PKCS7_CERTS];
 
@@ -141,45 +276,225 @@
     byte issuerSn[MAX_SN_SZ];     /* singleCert's serial number           */
     byte publicKey[MAX_RSA_INT_SZ + MAX_RSA_E_SZ]; /* MAX RSA key size (m + e)*/
     word32 certSz[MAX_PKCS7_CERTS];
-    
+
      /* flags - up to 16-bits */
     word16 isDynamic:1;
+    word16 noDegenerate:1; /* allow degenerate case in verify function */
+    word16 detached:1;     /* generate detached SignedData signature bundles */
 
+    byte contentType[MAX_OID_SZ]; /* custom contentType byte array */
+    word32 contentTypeSz;         /* size of contentType, bytes */
+
+    int sidType;                  /* SignerIdentifier type to use, of type
+                                     Pkcs7_SignerIdentifier_Types, default to
+                                     SID_ISSUER_AND_SERIAL_NUMBER */
+    byte issuerSubjKeyId[KEYID_SIZE];  /* SubjectKeyIdentifier of singleCert  */
+    Pkcs7Cert* certList;          /* certificates list for SignedData set */
+    Pkcs7EncodedRecip* recipList; /* recipients list */
+    byte* cek;                    /* content encryption key, random, dynamic */
+    word32 cekSz;                 /* size of cek, bytes */
+    byte* pass;                   /* password, for PWRI decryption */
+    word32 passSz;                /* size of pass, bytes */
+    int kekEncryptOID;            /* KEK encryption algorithm OID */
+
+    CallbackOriEncrypt oriEncryptCb;  /* ORI encrypt callback */
+    CallbackOriDecrypt oriDecryptCb;  /* ORI decrypt callback */
+    void* oriEncryptCtx;              /* ORI encrypt user context ptr */
+    void* oriDecryptCtx;              /* ORI decrypt user context ptr */
+
+    PKCS7Attrib* authAttribs;     /* authenticated attribs */
+    word32 authAttribsSz;
+    PKCS7Attrib* unauthAttribs;   /* unauthenticated attribs */
+    word32 unauthAttribsSz;
+
+#ifndef NO_PKCS7_STREAM
+    PKCS7State* stream;
+#endif
+    word32 state;
+
+    word16 skipDefaultSignedAttribs:1; /* skip adding default signed attribs */
+
+    byte version; /* 1 for RFC 2315 and 3 for RFC 4108 */
+    PKCS7SignerInfo* signerInfo;
+    CallbackDecryptContent decryptionCb;
+    CallbackWrapCEK        wrapCEKCb;
+    void*            decryptionCtx;
+
+    byte* signature;
+    byte* plainDigest;
+    byte* pkcs7Digest;
+    word32 signatureSz;
+    word32 plainDigestSz;
+    word32 pkcs7DigestSz;
+
+#if defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)
+    CallbackRsaSignRawDigest rsaSignRawDigestCb;
+#endif
+
+    /* used by DecodeEnvelopedData with multiple encrypted contents */
+    byte*  cachedEncryptedContent;
+    word32 cachedEncryptedContentSz;
     /* !! NEW DATA MEMBERS MUST BE ADDED AT END !! */
-} PKCS7;
-
+};
 
 WOLFSSL_API PKCS7* wc_PKCS7_New(void* heap, int devId);
 WOLFSSL_API int  wc_PKCS7_Init(PKCS7* pkcs7, void* heap, int devId);
-WOLFSSL_API int  wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* cert, word32 certSz);
+WOLFSSL_API int  wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* der, word32 derSz);
+WOLFSSL_API int  wc_PKCS7_AddCertificate(PKCS7* pkcs7, byte* der, word32 derSz);
 WOLFSSL_API void wc_PKCS7_Free(PKCS7* pkcs7);
 
 WOLFSSL_API int wc_PKCS7_GetAttributeValue(PKCS7* pkcs7, const byte* oid,
         word32 oidSz, byte* out, word32* outSz);
+
+WOLFSSL_API int wc_PKCS7_SetSignerIdentifierType(PKCS7* pkcs7, int type);
+WOLFSSL_API int wc_PKCS7_SetContentType(PKCS7* pkcs7, byte* contentType,
+                                        word32 sz);
+WOLFSSL_API int wc_PKCS7_GetPadSize(word32 inputSz, word32 blockSz);
+WOLFSSL_API int wc_PKCS7_PadData(byte* in, word32 inSz, byte* out, word32 outSz,
+                                 word32 blockSz);
+
+/* CMS/PKCS#7 Data */
 WOLFSSL_API int  wc_PKCS7_EncodeData(PKCS7* pkcs7, byte* output,
                                        word32 outputSz);
+
+/* CMS/PKCS#7 SignedData */
+WOLFSSL_API int  wc_PKCS7_SetDetached(PKCS7* pkcs7, word16 flag);
+WOLFSSL_API int  wc_PKCS7_NoDefaultSignedAttribs(PKCS7* pkcs7);
 WOLFSSL_API int  wc_PKCS7_EncodeSignedData(PKCS7* pkcs7,
-                                       byte* output, word32 outputSz);
+                                          byte* output, word32 outputSz);
+WOLFSSL_API int  wc_PKCS7_EncodeSignedData_ex(PKCS7* pkcs7, const byte* hashBuf,
+                                          word32 hashSz, byte* outputHead,
+                                          word32* outputHeadSz,
+                                          byte* outputFoot,
+                                          word32* outputFootSz);
+WOLFSSL_API void wc_PKCS7_AllowDegenerate(PKCS7* pkcs7, word16 flag);
 WOLFSSL_API int  wc_PKCS7_VerifySignedData(PKCS7* pkcs7,
-                                       byte* pkiMsg, word32 pkiMsgSz);
+                                          byte* pkiMsg, word32 pkiMsgSz);
+WOLFSSL_API int  wc_PKCS7_VerifySignedData_ex(PKCS7* pkcs7, const byte* hashBuf,
+                                          word32 hashSz, byte* pkiMsgHead,
+                                          word32 pkiMsgHeadSz, byte* pkiMsgFoot,
+                                          word32 pkiMsgFootSz);
+
+WOLFSSL_API int  wc_PKCS7_GetSignerSID(PKCS7* pkcs7, byte* out, word32* outSz);
+
+/* CMS single-shot API for Signed FirmwarePkgData */
+WOLFSSL_API int  wc_PKCS7_EncodeSignedFPD(PKCS7* pkcs7, byte* privateKey,
+                                          word32 privateKeySz, int signOID,
+                                          int hashOID, byte* content,
+                                          word32 contentSz,
+                                          PKCS7Attrib* signedAttribs,
+                                          word32 signedAttribsSz, byte* output,
+                                          word32 outputSz);
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+/* CMS single-shot API for Signed Encrypted FirmwarePkgData */
+WOLFSSL_API int  wc_PKCS7_EncodeSignedEncryptedFPD(PKCS7* pkcs7,
+                                          byte* encryptKey, word32 encryptKeySz,
+                                          byte* privateKey, word32 privateKeySz,
+                                          int encryptOID, int signOID,
+                                          int hashOID, byte* content,
+                                          word32 contentSz,
+                                          PKCS7Attrib* unprotectedAttribs,
+                                          word32 unprotectedAttribsSz,
+                                          PKCS7Attrib* signedAttribs,
+                                          word32 signedAttribsSz,
+                                          byte* output, word32 outputSz);
+#endif /* NO_PKCS7_ENCRYPTED_DATA */
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+/* CMS single-shot API for Signed Compressed FirmwarePkgData */
+WOLFSSL_API int  wc_PKCS7_EncodeSignedCompressedFPD(PKCS7* pkcs7,
+                                          byte* privateKey, word32 privateKeySz,
+                                          int signOID, int hashOID,
+                                          byte* content, word32 contentSz,
+                                          PKCS7Attrib* signedAttribs,
+                                          word32 signedAttribsSz, byte* output,
+                                          word32 outputSz);
+
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+/* CMS single-shot API for Signed Encrypted Compressed FirmwarePkgData */
+WOLFSSL_API int  wc_PKCS7_EncodeSignedEncryptedCompressedFPD(PKCS7* pkcs7,
+                                          byte* encryptKey, word32 encryptKeySz,
+                                          byte* privateKey, word32 privateKeySz,
+                                          int encryptOID, int signOID,
+                                          int hashOID, byte* content,
+                                          word32 contentSz,
+                                          PKCS7Attrib* unprotectedAttribs,
+                                          word32 unprotectedAttribsSz,
+                                          PKCS7Attrib* signedAttribs,
+                                          word32 signedAttribsSz,
+                                          byte* output, word32 outputSz);
+#endif /* !NO_PKCS7_ENCRYPTED_DATA */
+#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
+/* EnvelopedData and AuthEnvelopedData RecipientInfo functions */
+WOLFSSL_API int  wc_PKCS7_AddRecipient_KTRI(PKCS7* pkcs7, const byte* cert,
+                                          word32 certSz, int options);
+WOLFSSL_API int  wc_PKCS7_AddRecipient_KARI(PKCS7* pkcs7, const byte* cert,
+                                          word32 certSz, int keyWrapOID,
+                                          int keyAgreeOID, byte* ukm,
+                                          word32 ukmSz, int options);
+
+WOLFSSL_API int  wc_PKCS7_SetKey(PKCS7* pkcs7, byte* key, word32 keySz);
+WOLFSSL_API int  wc_PKCS7_AddRecipient_KEKRI(PKCS7* pkcs7, int keyWrapOID,
+                                          byte* kek, word32 kekSz,
+                                          byte* keyID, word32 keyIdSz,
+                                          void* timePtr, byte* otherOID,
+                                          word32 otherOIDSz, byte* other,
+                                          word32 otherSz, int options);
+
+WOLFSSL_API int  wc_PKCS7_SetPassword(PKCS7* pkcs7, byte* passwd, word32 pLen);
+WOLFSSL_API int  wc_PKCS7_AddRecipient_PWRI(PKCS7* pkcs7, byte* passwd,
+                                          word32 pLen, byte* salt,
+                                          word32 saltSz, int kdfOID,
+                                          int prfOID, int iterations,
+                                          int kekEncryptOID, int options);
+WOLFSSL_API int  wc_PKCS7_SetOriEncryptCtx(PKCS7* pkcs7, void* ctx);
+WOLFSSL_API int  wc_PKCS7_SetOriDecryptCtx(PKCS7* pkcs7, void* ctx);
+WOLFSSL_API int  wc_PKCS7_SetOriDecryptCb(PKCS7* pkcs7, CallbackOriDecrypt cb);
+WOLFSSL_API int  wc_PKCS7_AddRecipient_ORI(PKCS7* pkcs7, CallbackOriEncrypt cb,
+                                           int options);
+WOLFSSL_API int  wc_PKCS7_SetWrapCEKCb(PKCS7* pkcs7,
+        CallbackWrapCEK wrapCEKCb);
+
+#if defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)
+WOLFSSL_API int  wc_PKCS7_SetRsaSignRawDigestCb(PKCS7* pkcs7,
+        CallbackRsaSignRawDigest cb);
+#endif
+
+/* CMS/PKCS#7 EnvelopedData */
 WOLFSSL_API int  wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7,
                                           byte* output, word32 outputSz);
 WOLFSSL_API int  wc_PKCS7_DecodeEnvelopedData(PKCS7* pkcs7, byte* pkiMsg,
                                           word32 pkiMsgSz, byte* output,
                                           word32 outputSz);
 
-WOLFSSL_API int wc_PKCS7_GetPadSize(word32 inputSz, word32 blockSz);
-WOLFSSL_API int wc_PKCS7_PadData(byte* in, word32 inSz, byte* out, word32 outSz,
-                                 word32 blockSz);
+/* CMS/PKCS#7 AuthEnvelopedData */
+WOLFSSL_API int  wc_PKCS7_EncodeAuthEnvelopedData(PKCS7* pkcs7,
+                                          byte* output, word32 outputSz);
+WOLFSSL_API int  wc_PKCS7_DecodeAuthEnvelopedData(PKCS7* pkcs7, byte* pkiMsg,
+                                          word32 pkiMsgSz, byte* output,
+                                          word32 outputSz);
 
+/* CMS/PKCS#7 EncryptedData */
 #ifndef NO_PKCS7_ENCRYPTED_DATA
 WOLFSSL_API int  wc_PKCS7_EncodeEncryptedData(PKCS7* pkcs7,
                                           byte* output, word32 outputSz);
 WOLFSSL_API int  wc_PKCS7_DecodeEncryptedData(PKCS7* pkcs7, byte* pkiMsg,
                                           word32 pkiMsgSz, byte* output,
                                           word32 outputSz);
+WOLFSSL_API int  wc_PKCS7_SetDecodeEncryptedCb(PKCS7* pkcs7,
+        CallbackDecryptContent decryptionCb);
+WOLFSSL_API int  wc_PKCS7_SetDecodeEncryptedCtx(PKCS7* pkcs7, void* ctx);
 #endif /* NO_PKCS7_ENCRYPTED_DATA */
 
+/* CMS/PKCS#7 CompressedData */
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+WOLFSSL_API int wc_PKCS7_EncodeCompressedData(PKCS7* pkcs7, byte* output,
+                                              word32 outputSz);
+WOLFSSL_API int wc_PKCS7_DecodeCompressedData(PKCS7* pkcs7, byte* pkiMsg,
+                                              word32 pkiMsgSz, byte* output,
+                                              word32 outputSz);
+#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/poly1305.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/poly1305.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* poly1305.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -82,6 +82,14 @@
     unsigned char finished;
     unsigned char started;
 #else
+#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
+    ALIGN128 word32 r[5];
+    ALIGN128 word32 r_2[5]; // r^2
+    ALIGN128 word32 r_4[5]; // r^4
+    ALIGN128 word32 h[5];
+    word32 pad[4];
+    word64 leftover;
+#else
 #if defined(POLY130564)
     word64 r[3];
     word64 h[3];
@@ -92,6 +100,7 @@
     word32 pad[4];
 #endif
     size_t leftover;
+#endif /* WOLFSSL_ARMASM */
     unsigned char buffer[POLY1305_BLOCK_SIZE];
     unsigned char finished;
 #endif
@@ -103,13 +112,20 @@
                                   word32 kySz);
 WOLFSSL_API int wc_Poly1305Update(Poly1305* poly1305, const byte*, word32);
 WOLFSSL_API int wc_Poly1305Final(Poly1305* poly1305, byte* tag);
+
+/* AEAD Functions */
+WOLFSSL_API int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad);
+WOLFSSL_API int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz);
 WOLFSSL_API int wc_Poly1305_MAC(Poly1305* ctx, byte* additional, word32 addSz,
                                byte* input, word32 sz, byte* tag, word32 tagSz);
+
+void poly1305_block(Poly1305* ctx, const unsigned char *m);
+void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
+                            size_t bytes);
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
 
 #endif /* HAVE_POLY1305 */
 #endif /* WOLF_CRYPT_POLY1305_H */
-
 
--- a/wolfssl/wolfcrypt/pwdbased.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/pwdbased.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* pwdbased.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -46,6 +46,9 @@
 WOLFSSL_API int wc_PBKDF1(byte* output, const byte* passwd, int pLen,
                       const byte* salt, int sLen, int iterations, int kLen,
                       int typeH);
+WOLFSSL_API int wc_PBKDF2_ex(byte* output, const byte* passwd, int pLen,
+                    const byte* salt, int sLen, int iterations, int kLen,
+                    int typeH, void* heap, int devId);
 WOLFSSL_API int wc_PBKDF2(byte* output, const byte* passwd, int pLen,
                       const byte* salt, int sLen, int iterations, int kLen,
                       int typeH);
@@ -60,6 +63,9 @@
 WOLFSSL_API int wc_scrypt(byte* output, const byte* passwd, int passLen,
                           const byte* salt, int saltLen, int cost,
                           int blockSize, int parallel, int dkLen);
+WOLFSSL_API int wc_scrypt_ex(byte* output, const byte* passwd, int passLen,
+                             const byte* salt, int saltLen, word32 iterations,
+                             int blockSize, int parallel, int dkLen);
 #endif
 
 
--- a/wolfssl/wolfcrypt/rabbit.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/rabbit.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rabbit.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/random.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/random.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* random.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -48,9 +48,9 @@
  /* Maximum generate block length */
 #ifndef RNG_MAX_BLOCK_LEN
     #ifdef HAVE_INTEL_QA
-        #define RNG_MAX_BLOCK_LEN (0xFFFF)
+        #define RNG_MAX_BLOCK_LEN (0xFFFFl)
     #else
-        #define RNG_MAX_BLOCK_LEN (0x10000)
+        #define RNG_MAX_BLOCK_LEN (0x10000l)
     #endif
 #endif
 
@@ -66,8 +66,8 @@
 #endif
 
 /* make sure Hash DRBG is enabled, unless WC_NO_HASHDRBG is defined
-    or CUSTOM_RAND_GENERATE_BLOCK is defined*/
-#if !defined(WC_NO_HASHDRBG) || !defined(CUSTOM_RAND_GENERATE_BLOCK)
+    or CUSTOM_RAND_GENERATE_BLOCK is defined */
+#if !defined(WC_NO_HASHDRBG) && !defined(CUSTOM_RAND_GENERATE_BLOCK)
     #undef  HAVE_HASHDRBG
     #define HAVE_HASHDRBG
     #ifndef WC_RESEED_INTERVAL
@@ -88,7 +88,7 @@
  *     seeded via wc_GenerateSeed. This is the default source.
  */
 
- /* Seed source can be overriden by defining one of these:
+ /* Seed source can be overridden by defining one of these:
       CUSTOM_RAND_GENERATE_SEED
       CUSTOM_RAND_GENERATE_SEED_OS
       CUSTOM_RAND_GENERATE */
@@ -106,7 +106,9 @@
     #include <wolfssl/wolfcrypt/sha256.h>
 #elif defined(HAVE_WNR)
      /* allow whitewood as direct RNG source using wc_GenerateSeed directly */
-#else
+#elif defined(HAVE_INTEL_RDRAND)
+    /* Intel RDRAND or RDSEED */
+#elif !defined(WC_NO_RNG)
     #error No RNG source defined!
 #endif
 
@@ -136,6 +138,9 @@
     #else
         int fd;
     #endif
+    #if defined(WOLF_CRYPTO_CB)
+        int devId;
+    #endif
 } OS_Seed;
 
 
@@ -151,10 +156,26 @@
 #ifdef HAVE_HASHDRBG
     /* Hash-based Deterministic Random Bit Generator */
     struct DRBG* drbg;
+#if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY)
+    #define DRBG_STRUCT_SZ ((sizeof(word32)*3) + (DRBG_SEED_LEN*2))
+    #ifdef WOLFSSL_SMALL_STACK_CACHE
+        #define DRBG_STRUCT_SZ_SHA256 (sizeof(wc_Sha256))
+    #else
+        #define DRBG_STRUCT_SZ_SHA256 0
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+        #define DRBG_STRUCT_SZ_ASYNC (sizeof(void*) + sizeof(int))
+    #else
+        #define DRBG_STRUCT_SZ_ASYNC 0
+    #endif
+    byte drbg_data[DRBG_STRUCT_SZ + DRBG_STRUCT_SZ_SHA256 + DRBG_STRUCT_SZ_ASYNC];
+#endif
     byte status;
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
     WC_ASYNC_DEV asyncDev;
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
     int devId;
 #endif
 };
@@ -179,19 +200,36 @@
 #endif /* HAVE_WNR */
 
 
+WOLFSSL_ABI WOLFSSL_API WC_RNG* wc_rng_new(byte*, word32, void*);
+WOLFSSL_ABI WOLFSSL_API void wc_rng_free(WC_RNG*);
+
+
+#ifndef WC_NO_RNG
 WOLFSSL_API int  wc_InitRng(WC_RNG*);
 WOLFSSL_API int  wc_InitRng_ex(WC_RNG* rng, void* heap, int devId);
 WOLFSSL_API int  wc_InitRngNonce(WC_RNG* rng, byte* nonce, word32 nonceSz);
 WOLFSSL_API int  wc_InitRngNonce_ex(WC_RNG* rng, byte* nonce, word32 nonceSz,
                                     void* heap, int devId);
-WOLFSSL_API int  wc_RNG_GenerateBlock(WC_RNG*, byte*, word32 sz);
+WOLFSSL_ABI WOLFSSL_API int wc_RNG_GenerateBlock(WC_RNG*, byte*, word32 sz);
 WOLFSSL_API int  wc_RNG_GenerateByte(WC_RNG*, byte*);
 WOLFSSL_API int  wc_FreeRng(WC_RNG*);
+#else
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#define wc_InitRng(rng) NOT_COMPILED_IN
+#define wc_InitRng_ex(rng, h, d) NOT_COMPILED_IN
+#define wc_InitRngNonce(rng, n, s) NOT_COMPILED_IN
+#define wc_InitRngNonce_ex(rng, n, s, h, d) NOT_COMPILED_IN
+#define wc_RNG_GenerateBlock(rng, b, s) NOT_COMPILED_IN
+#define wc_RNG_GenerateByte(rng, b) NOT_COMPILED_IN
+#define wc_FreeRng(rng) (void)NOT_COMPILED_IN
+#endif
+
 
 
 #ifdef HAVE_HASHDRBG
     WOLFSSL_LOCAL int wc_RNG_DRBG_Reseed(WC_RNG* rng, const byte* entropy,
                                         word32 entropySz);
+    WOLFSSL_API int wc_RNG_TestSeed(const byte* seed, word32 seedSz);
     WOLFSSL_API int wc_RNG_HealthTest(int reseed,
                                         const byte* entropyA, word32 entropyASz,
                                         const byte* entropyB, word32 entropyBSz,
--- a/wolfssl/wolfcrypt/ripemd.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/ripemd.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* ripemd.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/rsa.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/rsa.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* rsa.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -37,6 +37,20 @@
     #define WC_RSA_EXPONENT 65537L
 #endif
 
+#if defined(WC_RSA_NONBLOCK)
+    /* enable support for fast math based non-blocking exptmod */
+    /* this splits the RSA function into many smaller operations */
+    #ifndef USE_FAST_MATH
+        #error RSA non-blocking mode only supported using fast math
+    #endif
+    #ifndef TFM_TIMING_RESISTANT
+      #error RSA non-blocking mode only supported with timing resistance enabled
+    #endif
+
+    /* RSA bounds check is not supported with RSA non-blocking mode */
+    #undef  NO_RSA_BOUNDS_CHECK
+    #define NO_RSA_BOUNDS_CHECK
+#endif
 
 /* allow for user to plug in own crypto */
 #if !defined(HAVE_FIPS) && (defined(HAVE_USER_RSA) || defined(HAVE_FAST_RSA))
@@ -66,10 +80,19 @@
 #include "xsecure_rsa.h"
 #endif
 
+#if defined(WOLFSSL_CRYPTOCELL)
+    #include <wolfssl/wolfcrypt/port/arm/cryptoCell.h>
+#endif
+
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+enum {
+    RSA_MIN_SIZE = 512,
+    RSA_MAX_SIZE = 4096,
+};
+
 /* avoid redefinition of structs */
 #if !defined(HAVE_FIPS) || \
     (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
@@ -94,9 +117,6 @@
     RSA_BLOCK_TYPE_1 = 1,
     RSA_BLOCK_TYPE_2 = 2,
 
-    RSA_MIN_SIZE = 512,
-    RSA_MAX_SIZE = 4096,
-
     RSA_MIN_PAD_SZ   = 11,     /* separator + 0 + pad value + 8 pads */
 
     RSA_PSS_PAD_SZ = 8,
@@ -109,14 +129,33 @@
 #ifdef WC_RSA_PSS
     RSA_PSS_PAD_TERM = 0xBC,
 #endif
+
+    RSA_PSS_SALT_LEN_DEFAULT  = -1,
+#ifdef WOLFSSL_PSS_SALT_LEN_DISCOVER
+    RSA_PSS_SALT_LEN_DISCOVER = -2,
+#endif
+
+#ifdef HAVE_PKCS11
+    RSA_MAX_ID_LEN      = 32,
+#endif
 };
 
+#ifdef WC_RSA_NONBLOCK
+typedef struct RsaNb {
+    exptModNb_t exptmod; /* non-block expt_mod */
+    mp_int tmp;
+} RsaNb;
+#endif
+
 /* RSA */
 struct RsaKey {
-    mp_int n, e, d, p, q;
+    mp_int n, e;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+    mp_int d, p, q;
 #if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)
     mp_int dP, dQ, u;
 #endif
+#endif
     void* heap;                               /* for user memory overrides */
     byte* data;                               /* temp buffer for async RSA */
     int   type;                               /* public or private */
@@ -125,7 +164,7 @@
 #ifdef WC_RSA_BLINDING
     WC_RNG* rng;                              /* for PrivateDecrypt blinding */
 #endif
-#ifdef WOLF_CRYPTO_DEV
+#ifdef WOLF_CRYPTO_CB
     int   devId;
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
@@ -139,7 +178,23 @@
     byte*  mod;
     XSecure_Rsa xRsa;
 #endif
+#ifdef HAVE_PKCS11
+    byte id[RSA_MAX_ID_LEN];
+    int  idLen;
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) || !defined(WOLFSSL_RSA_VERIFY_INLINE)
     byte   dataIsAlloc;
+#endif
+#ifdef WC_RSA_NONBLOCK
+    RsaNb* nb;
+#endif
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+    int alFd;
+    int rdFd;
+#endif
+#if defined(WOLFSSL_CRYPTOCELL)
+    rsa_context_t ctx;
+#endif
 };
 
 #ifndef WC_RSAKEY_TYPE_DEFINED
@@ -152,6 +207,10 @@
 WOLFSSL_API int  wc_InitRsaKey(RsaKey* key, void* heap);
 WOLFSSL_API int  wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId);
 WOLFSSL_API int  wc_FreeRsaKey(RsaKey* key);
+#ifdef HAVE_PKCS11
+WOLFSSL_API int wc_InitRsaKey_Id(RsaKey* key, unsigned char* id, int len,
+                                 void* heap, int devId);
+#endif
 WOLFSSL_API int  wc_CheckRsaKey(RsaKey* key);
 #ifdef WOLFSSL_XILINX_CRYPT
 WOLFSSL_LOCAL int wc_InitRsaHw(RsaKey* key);
@@ -179,6 +238,8 @@
                                     RsaKey* key);
 WOLFSSL_API int  wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out,
                               word32 outLen, RsaKey* key);
+WOLFSSL_API int  wc_RsaSSL_Verify_ex(const byte* in, word32 inLen, byte* out,
+                              word32 outLen, RsaKey* key, int pad_type);
 WOLFSSL_API int  wc_RsaPSS_VerifyInline(byte* in, word32 inLen, byte** out,
                                         enum wc_HashType hash, int mgf,
                                         RsaKey* key);
@@ -219,11 +280,19 @@
                                                                RsaKey*, word32);
 WOLFSSL_API int  wc_RsaPublicKeyDecodeRaw(const byte* n, word32 nSz,
                                         const byte* e, word32 eSz, RsaKey* key);
-#ifdef WOLFSSL_KEY_GEN
-    WOLFSSL_API int wc_RsaKeyToDer(RsaKey*, byte* output, word32 inLen);
+WOLFSSL_API int wc_RsaKeyToDer(RsaKey*, byte* output, word32 inLen);
+
+
+#ifdef WC_RSA_BLINDING
+    WOLFSSL_API int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng);
 #endif
-
-WOLFSSL_API int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng);
+#ifdef WC_RSA_NONBLOCK
+    WOLFSSL_API int wc_RsaSetNonBlock(RsaKey* key, RsaNb* nb);
+    #ifdef WC_RSA_NONBLOCK_TIME
+    WOLFSSL_API int wc_RsaSetNonBlockTime(RsaKey* key, word32 maxBlockUs,
+                                          word32 cpuMHz);
+    #endif
+#endif
 
 /*
    choice of padding added after fips, so not available when using fips RSA
@@ -257,7 +326,7 @@
                    RsaKey* key, int type, WC_RNG* rng);
 #endif
 
-#endif /* HAVE_FIPS*/
+#endif /* HAVE_FIPS */
 
 WOLFSSL_API int  wc_RsaFlattenPublicKey(RsaKey*, byte*, word32*, byte*,
                                                                        word32*);
@@ -272,12 +341,25 @@
 
 #ifdef WOLFSSL_KEY_GEN
     WOLFSSL_API int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng);
+    WOLFSSL_API int wc_CheckProbablePrime_ex(const byte* p, word32 pSz,
+                                          const byte* q, word32 qSz,
+                                          const byte* e, word32 eSz,
+                                          int nlen, int* isPrime, WC_RNG* rng);
     WOLFSSL_API int wc_CheckProbablePrime(const byte* p, word32 pSz,
                                           const byte* q, word32 qSz,
                                           const byte* e, word32 eSz,
                                           int nlen, int* isPrime);
 #endif
 
+WOLFSSL_LOCAL int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock,
+        word32 pkcsBlockLen, byte padValue, WC_RNG* rng, int padType,
+        enum wc_HashType hType, int mgf, byte* optLabel, word32 labelLen,
+        int saltLen, int bits, void* heap);
+WOLFSSL_LOCAL int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out,
+                                   byte padValue, int padType, enum wc_HashType hType,
+                                   int mgf, byte* optLabel, word32 labelLen, int saltLen,
+                                   int bits, void* heap);
+
 #endif /* HAVE_USER_RSA */
 
 #ifdef __cplusplus
--- a/wolfssl/wolfcrypt/selftest.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/selftest.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* selftest.h
  *
- * Copyright (C) 2006-2018 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -32,6 +32,9 @@
 #endif
 
 #ifdef HAVE_SELFTEST
+    /* Get wolfCrypt CAVP version */
+    WOLFSSL_API const char* wolfCrypt_GetVersion_CAVP_selftest(void);
+
     /* wolfCrypt self test, runs CAVP KATs */
     WOLFSSL_API int wolfCrypt_SelfTest(void);
 #endif
--- a/wolfssl/wolfcrypt/settings.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/settings.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* settings.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -31,6 +31,13 @@
     extern "C" {
 #endif
 
+/* This flag allows wolfSSL to include options.h instead of having client
+ * projects do it themselves. This should *NEVER* be defined when building
+ * wolfSSL as it can cause hard to debug problems. */
+#ifdef EXTERNAL_OPTS_OPENVPN
+#include <wolfssl/options.h>
+#endif
+
 /* Uncomment next line if using IPHONE */
 /* #define IPHONE */
 
@@ -40,6 +47,9 @@
 /* Uncomment next line if using Micrium uC/OS-III */
 /* #define MICRIUM */
 
+/* Uncomment next line if using Deos RTOS*/
+/* #define WOLFSSL_DEOS*/
+
 /* Uncomment next line if using Mbed */
 /* #define MBED */
 
@@ -76,6 +86,9 @@
 /* Uncomment next line if building wolfSSL for LSR */
 /* #define WOLFSSL_LSR */
 
+/* Uncomment next line if building for Freescale Classic MQX version 5.0 */
+/* #define FREESCALE_MQX_5_0 */
+
 /* Uncomment next line if building for Freescale Classic MQX version 4.0 */
 /* #define FREESCALE_MQX_4_0 */
 
@@ -98,6 +111,9 @@
 /* Uncomment next line if using STM32F4 */
 /* #define WOLFSSL_STM32F4 */
 
+/* Uncomment next line if using STM32FL */
+/* #define WOLFSSL_STM32FL */
+
 /* Uncomment next line if using STM32F7 */
 /* #define WOLFSSL_STM32F7 */
 
@@ -134,7 +150,7 @@
 /* Uncomment next line if building for VxWorks */
 /* #define WOLFSSL_VXWORKS */
 
-/* Uncomment next line if building for Nordic nRF5x platofrm */
+/* Uncomment next line if building for Nordic nRF5x platform */
 /* #define WOLFSSL_NRF5x */
 
 /* Uncomment next line to enable deprecated less secure static DH suites */
@@ -166,10 +182,35 @@
 /* Uncomment next line if building for using XILINX */
 /* #define WOLFSSL_XILINX */
 
+/* Uncomment next line if building for WICED Studio. */
+/* #define WOLFSSL_WICED  */
+
 /* Uncomment next line if building for Nucleus 1.2 */
 /* #define WOLFSSL_NUCLEUS_1_2 */
 
+/* Uncomment next line if building for using Apache mynewt */
+/* #define WOLFSSL_APACHE_MYNEWT */
+
+/* Uncomment next line if building for using ESP-IDF */
+/* #define WOLFSSL_ESPIDF */
+
+/* Uncomment next line if using Espressif ESP32-WROOM-32 */
+/* #define WOLFSSL_ESPWROOM32 */
+
+/* Uncomment next line if using Espressif ESP32-WROOM-32SE */
+/* #define WOLFSSL_ESPWROOM32SE */
+
+/* Uncomment next line if using ARM CRYPTOCELL*/
+/* #define WOLFSSL_CRYPTOCELL */
+
+/* Uncomment next line if using RENESAS TSIP */
+/* #define WOLFSSL_RENESAS_TSIP */
+
+/* Uncomment next line if using RENESAS RX64N */
+/* #define WOLFSSL_RENESAS_RX65N */
+
 #include <wolfssl/wolfcrypt/visibility.h>
+
 #define WOLFSSL_USER_SETTINGS
 #ifdef WOLFSSL_USER_SETTINGS
     #include "user_settings.h"
@@ -178,7 +219,13 @@
 
 /* make sure old RNG name is used with CTaoCrypt FIPS */
 #ifdef HAVE_FIPS
-    #define WC_RNG RNG
+    #if !defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)
+        #define WC_RNG RNG
+    #else
+        #ifndef WOLFSSL_STM32L4
+            #define RNG WC_RNG
+        #endif
+    #endif
     /* blinding adds API not available yet in FIPS mode */
     #undef WC_RSA_BLINDING
 #endif
@@ -210,6 +257,46 @@
     #include <nx_api.h>
 #endif
 
+#if defined(WOLFSSL_ESPIDF)
+    #define FREERTOS
+    #define WOLFSSL_LWIP
+    #define NO_WRITEV
+    #define SIZEOF_LONG_LONG 8
+    #define NO_WOLFSSL_DIR
+    #define WOLFSSL_NO_CURRDIR
+
+    #define TFM_TIMING_RESISTANT
+    #define ECC_TIMING_RESISTANT
+    #define WC_RSA_BLINDING
+
+#if defined(WOLFSSL_ESPWROOM32) || defined(WOLFSSL_ESPWROOM32SE)
+   #ifndef NO_ESP32WROOM32_CRYPT
+        #define WOLFSSL_ESP32WROOM32_CRYPT
+        #if defined(ESP32_USE_RSA_PRIMITIVE) && \
+            !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+            #define WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI
+            #define USE_FAST_MATH
+            #define WOLFSSL_SMALL_STACK
+        #endif
+   #endif
+#endif
+#endif /* WOLFSSL_ESPIDF */
+
+#if defined(WOLFSSL_RENESAS_TSIP)
+    #define TSIP_TLS_HMAC_KEY_INDEX_WORDSIZE 64
+    #define TSIP_TLS_MASTERSECRET_SIZE       80   /* 20 words */
+    #define TSIP_TLS_ENCPUBKEY_SZ_BY_CERTVRFY 560 /* in byte  */
+    #if !defined(NO_RENESAS_TSIP_CRYPT) && defined(WOLFSSL_RENESAS_RX65N)
+        #define WOLFSSL_RENESAS_TSIP_CRYPT
+        #define WOLFSSL_RENESAS_TSIP_TLS
+        #define WOLFSSL_RENESAS_TSIP_TLS_AES_CRYPT
+    #endif
+#endif
+
+#if defined(WOLFSSL_RENESAS_RA6M3G)
+    /* settings in user_settings.h */
+#endif
+
 #if defined(HAVE_LWIP_NATIVE) /* using LwIP native TCP socket */
     #define WOLFSSL_LWIP
     #define NO_WRITEV
@@ -218,6 +305,22 @@
     #define NO_FILESYSTEM
 #endif
 
+#if defined(WOLFSSL_CONTIKI)
+    #include <contiki.h>
+    #define WOLFSSL_UIP
+    #define NO_WOLFSSL_MEMORY
+    #define NO_WRITEV
+    #define SINGLE_THREADED
+    #define WOLFSSL_USER_IO
+    #define NO_FILESYSTEM
+    #define CUSTOM_RAND_TYPE uint16_t
+    #define CUSTOM_RAND_GENERATE random_rand
+    static inline word32 LowResTimer(void)
+    {
+        return clock_seconds();
+    }
+#endif
+
 #if defined(WOLFSSL_IAR_ARM) || defined(WOLFSSL_ROWLEY_ARM)
     #define NO_MAIN_DRIVER
     #define SINGLE_THREADED
@@ -247,9 +350,15 @@
 #endif
 
 #ifdef WOLFSSL_MICROCHIP_PIC32MZ
-    #define WOLFSSL_PIC32MZ_CRYPT
-    #define WOLFSSL_PIC32MZ_RNG
-    #define WOLFSSL_PIC32MZ_HASH
+    #ifndef NO_PIC32MZ_CRYPT
+        #define WOLFSSL_PIC32MZ_CRYPT
+    #endif
+    #ifndef NO_PIC32MZ_RNG
+        #define WOLFSSL_PIC32MZ_RNG
+    #endif
+    #ifndef NO_PIC32MZ_HASH
+        #define WOLFSSL_PIC32MZ_HASH
+    #endif
 #endif
 
 #ifdef MICROCHIP_TCPIP_V5
@@ -270,7 +379,7 @@
 #ifdef MBED
     #define WOLFSSL_USER_IO
     #define NO_FILESYSTEM
-    #define NO_CERT
+    #define NO_CERTS
     #if !defined(USE_CERT_BUFFERS_2048) && !defined(USE_CERT_BUFFERS_4096)
         #define USE_CERT_BUFFERS_1024
     #endif
@@ -348,7 +457,8 @@
     #ifdef VXWORKS_SIM
         #define TFM_NO_ASM
     #endif
-    #define WOLFSSL_PTHREADS
+    /* For VxWorks pthreads wrappers for mutexes uncomment the next line. */
+    /* #define WOLFSSL_PTHREADS */
     #define WOLFSSL_HAVE_MIN
     #define WOLFSSL_HAVE_MAX
     #define USE_FAST_MATH
@@ -356,6 +466,7 @@
     #define NO_MAIN_DRIVER
     #define NO_DEV_RANDOM
     #define NO_WRITEV
+    #define HAVE_STRINGS_H
 #endif
 
 
@@ -365,15 +476,20 @@
     #define SINGLE_THREADED
     #define NO_DEV_RANDOM
     #ifndef INTEL_GALILEO /* Galileo has time.h compatibility */
-        #define TIME_OVERRIDES /* must define XTIME and XGMTIME externally */
+        #define TIME_OVERRIDES
+        #ifndef XTIME
+            #error "Must define XTIME externally see porting guide"
+            #error "https://www.wolfssl.com/docs/porting-guide/"
+        #endif
+        #ifndef XGMTIME
+            #error "Must define XGMTIME externally see porting guide"
+            #error "https://www.wolfssl.com/docs/porting-guide/"
+        #endif
     #endif
     #define WOLFSSL_USER_IO
     #define HAVE_ECC
     #define NO_DH
     #define NO_SESSION_CACHE
-    #define USE_SLOW_SHA
-    #define NO_WOLFSSL_SERVER
-    #define NO_ERROR_STRINGS
 #endif
 
 
@@ -428,7 +544,8 @@
     #define XSTRNCMP(s1,s2,n)      strncmp((s1),(s2),(n))
     #define XSTRNCAT(s1,s2,n)      strncat((s1),(s2),(n))
     #define XSTRNCASECMP(s1,s2,n)  _strnicmp((s1),(s2),(n))
-    #if defined(WOLFSSL_CERT_EXT) || defined(HAVE_ALPN)
+    #if defined(WOLFSSL_CERT_EXT) || defined(OPENSSL_EXTRA) \
+            || defined(HAVE_ALPN)
         #define XSTRTOK            strtok_r
     #endif
 #endif
@@ -444,9 +561,11 @@
 #ifdef WOLFSSL_RIOT_OS
     #define NO_WRITEV
     #define TFM_NO_ASM
-    #define USE_FAST_MATH
     #define NO_FILESYSTEM
     #define USE_CERT_BUFFERS_2048
+    #if defined(WOLFSSL_GNRC) && !defined(WOLFSSL_DTLS)
+        #define WOLFSSL_DTLS
+    #endif
 #endif
 
 #ifdef WOLFSSL_CHIBIOS
@@ -474,9 +593,9 @@
                                      int type);
         extern void  nucleus_free(void* ptr, void* heap, int type);
 
-        #define XMALLOC(s, h, type)  nucleus_malloc
-        #define XREALLOC(p, n, h, t) nucleus_realloc
-        #define XFREE(p, h, type)    nucleus_free
+        #define XMALLOC(s, h, type)  nucleus_malloc((s), (h), (type))
+        #define XREALLOC(p, n, h, t) nucleus_realloc((p), (n), (h), (t))
+        #define XFREE(p, h, type)    nucleus_free((p), (h), (type))
     #endif
 #endif
 
@@ -536,7 +655,7 @@
     #include "tm/tmonitor.h"
 
     /* static char* gets(char *buff); */
-    static char* fgets(char *buff, int sz, FILE *fp) {
+    static char* fgets(char *buff, int sz, XFILE fp) {
         char * p = buff;
         *p = '\0';
         while (1) {
@@ -555,7 +674,8 @@
 #endif
 
 
-#if defined(WOLFSSL_LEANPSK) && !defined(XMALLOC_USER)
+#if defined(WOLFSSL_LEANPSK) && !defined(XMALLOC_USER) && \
+        !defined(NO_WOLFSSL_MEMORY)
     #include <stdlib.h>
     #define XMALLOC(s, h, type)  malloc((s))
     #define XFREE(p, h, type)    free((p))
@@ -575,13 +695,22 @@
 #ifdef FREERTOS
     #include "FreeRTOS.h"
 
-    /* FreeRTOS pvPortRealloc() only in AVR32_UC3 port */
     #if !defined(XMALLOC_USER) && !defined(NO_WOLFSSL_MEMORY) && \
         !defined(WOLFSSL_STATIC_MEMORY)
         #define XMALLOC(s, h, type)  pvPortMalloc((s))
         #define XFREE(p, h, type)    vPortFree((p))
     #endif
-
+    /* FreeRTOS pvPortRealloc() implementation can be found here:
+        https://github.com/wolfSSL/wolfssl-freertos/pull/3/files */
+    #if !defined(USE_FAST_MATH) || defined(HAVE_ED25519) || defined(HAVE_ED448)
+        #if defined(WOLFSSL_ESPIDF)
+            /*In IDF, realloc(p, n) is equivalent to 
+            heap_caps_realloc(p, s, MALLOC_CAP_8BIT) */
+            #define XREALLOC(p, n, h, t) realloc((p), (n))
+        #else
+            #define XREALLOC(p, n, h, t) pvPortRealloc((p), (n))
+        #endif
+    #endif
     #ifndef NO_WRITEV
         #define NO_WRITEV
     #endif
@@ -635,12 +764,18 @@
     #define NO_FILESYSTEM
     #define USE_CERT_BUFFERS_2048
     #define NO_ERROR_STRINGS
-    #define USER_TIME
+    /* Uncomment this setting if your toolchain does not offer time.h header */
+    /* #define USER_TIME */
     #define HAVE_ECC
     #define HAVE_ALPN
     #define USE_WOLF_STRTOK /* use with HAVE_ALPN */
     #define HAVE_TLS_EXTENSIONS
     #define HAVE_AESGCM
+    #ifdef WOLFSSL_TI_CRYPT
+        #define NO_GCM_ENCRYPT_EXTRA
+        #define NO_PUBLIC_GCM_SET_IV
+        #define NO_PUBLIC_CCM_SET_NONCE
+    #endif
     #define HAVE_SUPPORTED_CURVES
     #define ALT_ECC_SIZE
 
@@ -678,12 +813,28 @@
         #undef SIZEOF_LONG
         #define SIZEOF_LONG_LONG 8
     #else
-        #sslpro: settings.h - please implement SIZEOF_LONG and SIZEOF_LONG_LONG
+        #error settings.h - please implement SIZEOF_LONG and SIZEOF_LONG_LONG
     #endif
 
     #define XMALLOC(s, h, type) ((void *)rtp_malloc((s), SSL_PRO_MALLOC))
     #define XFREE(p, h, type) (rtp_free(p))
-    #define XREALLOC(p, n, h, t) realloc((p), (n))
+    #define XREALLOC(p, n, h, t) (rtp_realloc((p), (n)))
+
+    #if (WINMSP3)
+        #define XSTRNCASECMP(s1,s2,n)  _strnicmp((s1),(s2),(n))
+    #else
+        #sslpro: settings.h - please implement XSTRNCASECMP - needed for HAVE_ECC
+    #endif
+
+    #define WOLFSSL_HAVE_MAX
+    #define WOLFSSL_HAVE_MIN
+
+    #define USE_FAST_MATH
+    #define TFM_TIMING_RESISTANT
+    #define WC_RSA_BLINDING
+    #define ECC_TIMING_RESISTANT
+
+    #define HAVE_ECC
 
 #endif /* EBSNET */
 
@@ -726,11 +877,19 @@
     #ifndef SINGLE_THREADED
         #include "SafeRTOS/semphr.h"
     #endif
-
-    #include "SafeRTOS/heap.h"
-    #define XMALLOC(s, h, type)  pvPortMalloc((s))
-    #define XFREE(p, h, type)    vPortFree((p))
-    #define XREALLOC(p, n, h, t) pvPortRealloc((p), (n))
+    #ifndef WOLFSSL_NO_MALLOC
+        #include "SafeRTOS/heap.h"
+    #endif
+    #if !defined(XMALLOC_USER) && !defined(NO_WOLFSSL_MEMORY) && \
+        !defined(WOLFSSL_STATIC_MEMORY)
+        #define XMALLOC(s, h, type)  pvPortMalloc((s))
+        #define XFREE(p, h, type)    vPortFree((p))
+    #endif
+    /* FreeRTOS pvPortRealloc() implementation can be found here:
+        https://github.com/wolfSSL/wolfssl-freertos/pull/3/files */
+    #if !defined(USE_FAST_MATH) || defined(HAVE_ED25519) || defined(HAVE_ED448)
+        #define XREALLOC(p, n, h, t) pvPortRealloc((p), (n))
+    #endif
 #endif
 
 #ifdef WOLFSSL_LOW_MEMORY
@@ -742,6 +901,11 @@
     #define TFM_TIMING_RESISTANT
 #endif
 
+#ifdef FREESCALE_MQX_5_0
+    /* use normal Freescale MQX port, but with minor changes for 5.0 */
+    #define FREESCALE_MQX
+#endif
+
 #ifdef FREESCALE_MQX_4_0
     /* use normal Freescale MQX port, but with minor changes for 4.0 */
     #define FREESCALE_MQX
@@ -752,7 +916,8 @@
     #include "mqx.h"
     #ifndef NO_FILESYSTEM
         #include "mfs.h"
-        #if MQX_USE_IO_OLD
+        #if (defined(MQX_USE_IO_OLD) && MQX_USE_IO_OLD) || \
+            defined(FREESCALE_MQX_5_0)
             #include "fio.h"
             #define NO_STDIO_FILESYSTEM
         #else
@@ -775,7 +940,8 @@
     #define FREESCALE_COMMON
     #include <mqx.h>
     #ifndef NO_FILESYSTEM
-        #if MQX_USE_IO_OLD
+        #if (defined(MQX_USE_IO_OLD) && MQX_USE_IO_OLD) || \
+            defined(FREESCALE_MQX_5_0)
             #include <fio.h>
         #else
             #include <stdio.h>
@@ -796,12 +962,6 @@
 #endif /* FREESCALE_KSDK_MQX */
 
 #if defined(FREESCALE_FREE_RTOS) || defined(FREESCALE_KSDK_FREERTOS)
-    /* Allows use of DH with fixed points if uncommented and NO_DH is removed */
-    /* WOLFSSL_DH_CONST */
-    /* Allows use of DH with fixed points if uncommented and NO_DH is removed */
-    /* WOLFSSL_DH_CONST */
-    /* Allows use of DH with fixed points if uncommented and NO_DH is removed */
-    /* WOLFSSL_DH_CONST */
     #define NO_FILESYSTEM
     #define WOLFSSL_CRYPT_HW_MUTEX 1
 
@@ -995,14 +1155,6 @@
                     #undef  NO_ECC256
                     #define HAVE_ECC384
                 #endif
-
-                /* enable features */
-                #undef  HAVE_CURVE25519
-                #define HAVE_CURVE25519
-                #undef  HAVE_ED25519
-                #define HAVE_ED25519
-                #undef  WOLFSSL_SHA512
-                #define WOLFSSL_SHA512
             #endif
         #endif
     #endif
@@ -1030,6 +1182,9 @@
     defined(WOLFSSL_STM32L4)
 
     #define SIZEOF_LONG_LONG 8
+    #ifndef CHAR_BIT
+      #define CHAR_BIT 8
+    #endif
     #define NO_DEV_RANDOM
     #define NO_WOLFSSL_DIR
     #undef  NO_RABBIT
@@ -1044,6 +1199,10 @@
     #ifndef NO_STM32_CRYPTO
         #undef  STM32_CRYPTO
         #define STM32_CRYPTO
+
+        #ifdef WOLFSSL_STM32L4
+            #define NO_AES_192 /* hardware does not support 192-bit */
+        #endif
     #endif
     #ifndef NO_STM32_HASH
         #undef  STM32_HASH
@@ -1065,6 +1224,9 @@
         #elif defined(WOLFSSL_STM32F1)
             #include "stm32f1xx_hal.h"
         #endif
+        #if defined(WOLFSSL_CUBEMX_USE_LL) && defined(WOLFSSL_STM32L4)
+            #include "stm32l4xx_ll_rng.h"
+        #endif
 
         #ifndef STM32_HAL_TIMEOUT
             #define STM32_HAL_TIMEOUT   0xFF
@@ -1100,7 +1262,56 @@
             #include "stm32f1xx.h"
         #endif
     #endif /* WOLFSSL_STM32_CUBEMX */
-#endif /* WOLFSSL_STM32F2 || WOLFSSL_STM32F4 || WOLFSSL_STM32F7 */
+#endif /* WOLFSSL_STM32F2 || WOLFSSL_STM32F4 || WOLFSSL_STM32L4 || WOLFSSL_STM32F7 */
+#ifdef WOLFSSL_DEOS
+    #include <deos.h>
+    #include <timeout.h>
+    #include <socketapi.h>
+    #include <lwip-socket.h>
+    #include <mem.h>
+    #include <string.h>
+    #include <stdlib.h> /* for rand_r: pseudo-random number generator */
+    #include <stdio.h>  /* for snprintf */
+
+    /* use external memory XMALLOC, XFREE and XREALLOC functions */
+    #define XMALLOC_USER
+
+    /* disable fall-back case, malloc, realloc and free are unavailable */
+    #define WOLFSSL_NO_MALLOC
+
+    /* file system has not been ported since it is a separate product. */
+
+    #define NO_FILESYSTEM
+
+    #ifdef NO_FILESYSTEM
+        #define NO_WOLFSSL_DIR
+        #define NO_WRITEV
+    #endif
+
+    #define USE_FAST_MATH
+    #define TFM_TIMING_RESISTANT
+    #define ECC_TIMING_RESISTANT
+    #define WC_RSA_BLINDING
+
+    #define HAVE_ECC
+    #define ALT_ECC_SIZE
+    #define TFM_ECC192
+    #define TFM_ECC224
+    #define TFM_ECC256
+    #define TFM_ECC384
+    #define TFM_ECC521
+
+    #define HAVE_TLS_EXTENSIONS
+    #define HAVE_SUPPORTED_CURVES
+    #define HAVE_EXTENDED_MASTER
+
+    #if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+        #define BIG_ENDIAN_ORDER
+    #else
+        #undef  BIG_ENDIAN_ORDER
+        #define LITTLE_ENDIAN_ORDER
+    #endif
+#endif /* WOLFSSL_DEOS*/
 
 #ifdef MICRIUM
     #include <stdlib.h>
@@ -1137,12 +1348,6 @@
         #define CUSTOM_RAND_TYPE     RAND_NBR
         #define CUSTOM_RAND_GENERATE Math_Rand
     #endif
-
-    #define WOLFSSL_TYPES
-    typedef CPU_INT08U byte;
-    typedef CPU_INT16U word16;
-    typedef CPU_INT32U word32;
-
     #define STRING_USER
     #define XSTRLEN(pstr) ((CPU_SIZE_T)Str_Len((CPU_CHAR *)(pstr)))
     #define XSTRNCPY(pstr_dest, pstr_src, len_max) \
@@ -1164,12 +1369,14 @@
                     ((CPU_CHAR *)Str_Cat_N((CPU_CHAR *)(pstr_dest), \
                      (const CPU_CHAR *)(pstr_cat),(CPU_SIZE_T)(len_max)))
     #define XMEMSET(pmem, data_val, size) \
-                    ((void)Mem_Set((void *)(pmem), (CPU_INT08U) (data_val), \
+                    ((void)Mem_Set((void *)(pmem), \
+                    (CPU_INT08U) (data_val), \
                     (CPU_SIZE_T)(size)))
     #define XMEMCPY(pdest, psrc, size) ((void)Mem_Copy((void *)(pdest), \
                      (void *)(psrc), (CPU_SIZE_T)(size)))
     #define XMEMCMP(pmem_1, pmem_2, size) \
-                   (((CPU_BOOLEAN)Mem_Cmp((void *)(pmem_1), (void *)(pmem_2), \
+                   (((CPU_BOOLEAN)Mem_Cmp((void *)(pmem_1), \
+                                          (void *)(pmem_2), \
                      (CPU_SIZE_T)(size))) ? DEF_NO : DEF_YES)
     #define XMEMMOVE XMEMCPY
 
@@ -1185,6 +1392,15 @@
     #endif
 #endif /* MICRIUM */
 
+#ifdef WOLFSSL_MCF5441X
+    #define BIG_ENDIAN_ORDER
+    #ifndef SIZEOF_LONG
+        #define SIZEOF_LONG 4
+    #endif
+    #ifndef SIZEOF_LONG_LONG
+        #define SIZEOF_LONG_LONG 8
+    #endif
+#endif
 
 #ifdef WOLFSSL_QL
     #ifndef WOLFSSL_SEP
@@ -1218,13 +1434,12 @@
 
 
 #if defined(WOLFSSL_XILINX)
-    #define USER_TIME /* XTIME in asn.c */
     #define NO_WOLFSSL_DIR
     #define NO_DEV_RANDOM
     #define HAVE_AESGCM
 #endif
 
-#if defined(WOLFSSL_XILINX_CRYPT)
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_AFALG_XILINX)
     #if defined(WOLFSSL_ARMASM)
         #error can not use both ARMv8 instructions and XILINX hardened crypto
     #endif
@@ -1237,8 +1452,69 @@
         #define WOLFSSL_NOSHA3_256
         #define WOLFSSL_NOSHA3_512
     #endif
+    #ifdef WOLFSSL_AFALG_XILINX_AES
+        #undef  WOLFSSL_AES_DIRECT
+        #define WOLFSSL_AES_DIRECT
+    #endif
 #endif /*(WOLFSSL_XILINX_CRYPT)*/
 
+#if defined(WOLFSSL_APACHE_MYNEWT)
+    #include "os/os_malloc.h"
+    #if !defined(WOLFSSL_LWIP)
+        #include <mn_socket/mn_socket.h>
+    #endif
+
+    #if !defined(SIZEOF_LONG)
+        #define SIZEOF_LONG 4
+    #endif
+    #if !defined(SIZEOF_LONG_LONG)
+        #define SIZEOF_LONG_LONG 8
+    #endif
+    #if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+        #define BIG_ENDIAN_ORDER
+    #else
+        #undef  BIG_ENDIAN_ORDER
+        #define LITTLE_ENDIAN_ORDER
+    #endif
+    #define NO_WRITEV
+    #define WOLFSSL_USER_IO
+    #define SINGLE_THREADED
+    #define NO_DEV_RANDOM
+    #define NO_DH
+    #define NO_WOLFSSL_DIR
+    #define NO_ERROR_STRINGS
+    #define HAVE_ECC
+    #define NO_SESSION_CACHE
+    #define NO_ERROR_STRINGS
+    #define XMALLOC_USER
+    #define XMALLOC(sz, heap, type)     os_malloc(sz)
+    #define XREALLOC(p, sz, heap, type) os_realloc(p, sz)
+    #define XFREE(p, heap, type)        os_free(p)
+
+#endif /*(WOLFSSL_APACHE_MYNEWT)*/
+
+#ifdef WOLFSSL_ZEPHYR
+    #include <zephyr.h>
+    #include <misc/printk.h>
+    #include <misc/util.h>
+    #include <stdlib.h>
+
+    #define WOLFSSL_DH_CONST
+    #define WOLFSSL_HAVE_MIN
+    #define WOLFSSL_HAVE_MAX
+    #define NO_WRITEV
+
+    #define USE_FLAT_BENCHMARK_H
+    #define USE_FLAT_TEST_H
+    #define EXIT_FAILURE 1
+    #define MAIN_NO_ARGS
+
+    void *z_realloc(void *ptr, size_t size);
+    #define realloc   z_realloc
+
+    #define CONFIG_NET_SOCKETS_POSIX_NAMES
+#endif
+
 #ifdef WOLFSSL_IMX6
     #ifndef SIZEOF_LONG_LONG
         #define SIZEOF_LONG_LONG 8
@@ -1315,7 +1591,7 @@
     #if !defined(HAVE_FIPS) && !defined(NO_RSA)
         #define WC_RSA_BLINDING
     #endif
-	
+
     #define NO_FILESYSTEM
     #define ECC_TIMING_RESISTANT
     #define TFM_TIMING_RESISTANT
@@ -1364,12 +1640,6 @@
     #define XGEN_ALIGN
 #endif
 
-#ifdef HAVE_CRL
-    /* not widely supported yet */
-    #undef NO_SKID
-    #define NO_SKID
-#endif
-
 
 #ifdef __INTEL_COMPILER
     #pragma warning(disable:2259) /* explicit casts to smaller sizes, disable */
@@ -1412,7 +1682,7 @@
     #endif
 #endif /* HAVE_ECC */
 
-/* Curve255519 Configs */
+/* Curve25519 Configs */
 #ifdef HAVE_CURVE25519
     /* By default enable shared secret, key export and import */
     #ifndef NO_CURVE25519_SHARED_SECRET
@@ -1429,7 +1699,7 @@
     #endif
 #endif /* HAVE_CURVE25519 */
 
-/* Ed255519 Configs */
+/* Ed25519 Configs */
 #ifdef HAVE_ED25519
     /* By default enable sign, verify, key export and import */
     #ifndef NO_ED25519_SIGN
@@ -1450,6 +1720,44 @@
     #endif
 #endif /* HAVE_ED25519 */
 
+/* Curve448 Configs */
+#ifdef HAVE_CURVE448
+    /* By default enable shared secret, key export and import */
+    #ifndef NO_CURVE448_SHARED_SECRET
+        #undef HAVE_CURVE448_SHARED_SECRET
+        #define HAVE_CURVE448_SHARED_SECRET
+    #endif
+    #ifndef NO_CURVE448_KEY_EXPORT
+        #undef HAVE_CURVE448_KEY_EXPORT
+        #define HAVE_CURVE448_KEY_EXPORT
+    #endif
+    #ifndef NO_CURVE448_KEY_IMPORT
+        #undef HAVE_CURVE448_KEY_IMPORT
+        #define HAVE_CURVE448_KEY_IMPORT
+    #endif
+#endif /* HAVE_CURVE448 */
+
+/* Ed448 Configs */
+#ifdef HAVE_ED448
+    /* By default enable sign, verify, key export and import */
+    #ifndef NO_ED448_SIGN
+        #undef HAVE_ED448_SIGN
+        #define HAVE_ED448_SIGN
+    #endif
+    #ifndef NO_ED448_VERIFY
+        #undef HAVE_ED448_VERIFY
+        #define HAVE_ED448_VERIFY
+    #endif
+    #ifndef NO_ED448_KEY_EXPORT
+        #undef HAVE_ED448_KEY_EXPORT
+        #define HAVE_ED448_KEY_EXPORT
+    #endif
+    #ifndef NO_ED448_KEY_IMPORT
+        #undef HAVE_ED448_KEY_IMPORT
+        #define HAVE_ED448_KEY_IMPORT
+    #endif
+#endif /* HAVE_ED448 */
+
 /* AES Config */
 #ifndef NO_AES
     /* By default enable all AES key sizes, decryption and CBC */
@@ -1481,10 +1789,6 @@
     #ifndef NO_AES_CBC
         #undef  HAVE_AES_CBC
         #define HAVE_AES_CBC
-    #else
-        #ifndef WOLFCRYPT_ONLY
-            #error "AES CBC is required for TLS and can only be disabled for WOLFCRYPT_ONLY builds"
-        #endif
     #endif
     #ifdef WOLFSSL_AES_XTS
         /* AES-XTS makes calls to AES direct functions */
@@ -1500,11 +1804,46 @@
     #endif
 #endif
 
+#if (defined(WOLFSSL_TLS13) && defined(WOLFSSL_NO_TLS12)) || \
+    (!defined(HAVE_AES_CBC) && defined(NO_DES3) && defined(NO_RC4) && \
+     !defined(HAVE_CAMELLIA) && !defined(HAVE_IDEA) && \
+     !defined(HAVE_NULL_CIPHER) && !defined(HAVE_HC128))
+    #define WOLFSSL_AEAD_ONLY
+#endif
+
+#if !defined(NO_DH) && !defined(HAVE_FFDHE)
+    #if defined(HAVE_FFDHE_2048) || defined(HAVE_FFDHE_3072) || \
+            defined(HAVE_FFDHE_4096) || defined(HAVE_FFDHE_6144) || \
+            defined(HAVE_FFDHE_8192)
+        #define HAVE_FFDHE
+    #endif
+#endif
+#if defined(HAVE_FFDHE_8192)
+    #define MIN_FFDHE_FP_MAX_BITS 16384
+#elif defined(HAVE_FFDHE_6144)
+    #define MIN_FFDHE_FP_MAX_BITS 12288
+#elif defined(HAVE_FFDHE_4096)
+    #define MIN_FFDHE_FP_MAX_BITS 8192
+#elif defined(HAVE_FFDHE_3072)
+    #define MIN_FFDHE_FP_MAX_BITS 6144
+#elif defined(HAVE_FFDHE_2048)
+    #define MIN_FFDHE_FP_MAX_BITS 4096
+#else
+    #define MIN_FFDHE_FP_MAX_BITS 0
+#endif
+#if defined(HAVE_FFDHE) && defined(FP_MAX_BITS)
+    #if MIN_FFDHE_FP_MAX_BITS > FP_MAX_BITS
+        #error "FFDHE parameters are too large for FP_MAX_BIT as set"
+    #endif
+#endif
+
 /* if desktop type system and fastmath increase default max bits */
 #ifdef WOLFSSL_X86_64_BUILD
-    #ifdef USE_FAST_MATH
-        #ifndef FP_MAX_BITS
+    #if defined(USE_FAST_MATH) && !defined(FP_MAX_BITS)
+        #if MIN_FFDHE_FP_MAX_BITS <= 8192
             #define FP_MAX_BITS 8192
+        #else
+            #define FP_MAX_BITS MIN_FFDHE_FP_MAX_BITS
         #endif
     #endif
 #endif
@@ -1530,11 +1869,17 @@
     #ifndef WOLFSSL_STATIC_RSA
         #define WOLFSSL_STATIC_RSA
     #endif
-    #ifndef WOLFSSL_SESSION_STATS
-        #define WOLFSSL_SESSION_STATS
+    #ifndef WOLFSSL_STATIC_DH
+        #define WOLFSSL_STATIC_DH
     #endif
-    #ifndef WOLFSSL_PEAK_SESSIONS
-        #define WOLFSSL_PEAK_SESSIONS
+    /* Allow option to be disabled. */
+    #ifndef WOLFSSL_NO_SESSION_STATS
+        #ifndef WOLFSSL_SESSION_STATS
+            #define WOLFSSL_SESSION_STATS
+        #endif
+        #ifndef WOLFSSL_PEAK_SESSIONS
+            #define WOLFSSL_PEAK_SESSIONS
+        #endif
     #endif
 #endif
 
@@ -1566,9 +1911,9 @@
     #define HAVE_WOLF_EVENT
 
     #ifdef WOLFSSL_ASYNC_CRYPT_TEST
-        #define WC_ASYNC_DEV_SIZE 320+24
+        #define WC_ASYNC_DEV_SIZE 168
     #else
-        #define WC_ASYNC_DEV_SIZE 320
+        #define WC_ASYNC_DEV_SIZE 336
     #endif
 
     #if !defined(HAVE_CAVIUM) && !defined(HAVE_INTEL_QA) && \
@@ -1623,6 +1968,27 @@
     #endif
 #endif
 
+#ifndef NO_PKCS12
+    #undef  HAVE_PKCS12
+    #define HAVE_PKCS12
+#endif
+
+#ifndef NO_PKCS8
+    #undef  HAVE_PKCS8
+    #define HAVE_PKCS8
+#endif
+
+#if !defined(NO_PBKDF1) || defined(WOLFSSL_ENCRYPTED_KEYS) || defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+    #undef  HAVE_PBKDF1
+    #define HAVE_PBKDF1
+#endif
+
+#if !defined(NO_PBKDF2) || defined(HAVE_PKCS7) || defined(HAVE_SCRYPT)
+    #undef  HAVE_PBKDF2
+    #define HAVE_PBKDF2
+#endif
+
+
 #if !defined(WOLFCRYPT_ONLY) && !defined(NO_OLD_TLS) && \
         (defined(NO_SHA) || defined(NO_MD5))
     #error old TLS requires MD5 and SHA
@@ -1662,16 +2028,23 @@
     #endif
 #endif
 
-#if defined(WOLFSSL_NGINX)
-    #define SSL_CTRL_SET_TLSEXT_HOSTNAME
+#if defined(WOLFSSL_NGINX) || defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+    #define SSL_CTRL_SET_TLSEXT_HOSTNAME 55
 #endif
 
+
 /* both CURVE and ED small math should be enabled */
 #ifdef CURVED25519_SMALL
         #define CURVE25519_SMALL
         #define ED25519_SMALL
 #endif
 
+/* both CURVE and ED small math should be enabled */
+#ifdef CURVED448_SMALL
+        #define CURVE448_SMALL
+        #define ED448_SMALL
+#endif
+
 
 #ifndef WOLFSSL_ALERT_COUNT_MAX
     #define WOLFSSL_ALERT_COUNT_MAX 5
@@ -1681,7 +2054,8 @@
 #ifndef WC_NO_HARDEN
     #if (defined(USE_FAST_MATH) && !defined(TFM_TIMING_RESISTANT)) || \
         (defined(HAVE_ECC) && !defined(ECC_TIMING_RESISTANT)) || \
-        (!defined(NO_RSA) && !defined(WC_RSA_BLINDING) && !defined(HAVE_FIPS))
+        (!defined(NO_RSA) && !defined(WC_RSA_BLINDING) && !defined(HAVE_FIPS) && \
+            !defined(WC_NO_RNG))
 
         #ifndef _MSC_VER
             #warning "For timing resistance / side-channel attack prevention consider using harden options"
@@ -1706,8 +2080,8 @@
 #endif /* OPENSSL_EXTRA */
 
 /* support for converting DER to PEM */
-#if defined(WOLFSSL_KEY_GEN) || defined(WOLFSSL_CERT_GEN) || \
-        defined(OPENSSL_EXTRA)
+#if (defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_DER_TO_PEM)) || \
+    defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA)
     #undef  WOLFSSL_DER_TO_PEM
     #define WOLFSSL_DER_TO_PEM
 #endif
@@ -1738,6 +2112,83 @@
     #define WOLFSSL_NO_HASH_RAW
 #endif
 
+#if !defined(WOLFSSL_SHA384) && !defined(WOLFSSL_SHA512) && defined(NO_AES) && \
+                                                          !defined(WOLFSSL_SHA3)
+    #undef  WOLFSSL_NO_WORD64_OPS
+    #define WOLFSSL_NO_WORD64_OPS
+#endif
+
+#if !defined(WOLFCRYPT_ONLY) && !defined(WOLFSSL_NO_TLS12)
+    #undef  WOLFSSL_HAVE_PRF
+    #define WOLFSSL_HAVE_PRF
+#endif
+
+#if defined(NO_AES) && defined(NO_DES3) && !defined(HAVE_CAMELLIA) && \
+       !defined(WOLFSSL_HAVE_PRF) && defined(NO_PWDBASED) && !defined(HAVE_IDEA)
+    #undef  WOLFSSL_NO_XOR_OPS
+    #define WOLFSSL_NO_XOR_OPS
+#endif
+
+#if defined(NO_ASN) && defined(WOLFCRYPT_ONLY)
+    #undef  WOLFSSL_NO_INT_ENCODE
+    #define WOLFSSL_NO_INT_ENCODE
+    #undef  WOLFSSL_NO_INT_DECODE
+    #define WOLFSSL_NO_INT_DECODE
+#endif
+
+#if defined(WOLFCRYPT_ONLY) && defined(WOLFSSL_RSA_VERIFY_ONLY) && \
+    defined(WC_NO_RSA_OAEP)
+    #undef  WOLFSSL_NO_CT_OPS
+    #define WOLFSSL_NO_CT_OPS
+#endif
+
+#if defined(WOLFCRYPT_ONLY) && defined(NO_AES) && !defined(HAVE_CURVE25519) && \
+        !defined(HAVE_CURVE448) && defined(WC_NO_RNG) && defined(WC_NO_RSA_OAEP)
+    #undef  WOLFSSL_NO_CONST_CMP
+    #define WOLFSSL_NO_CONST_CMP
+#endif
+
+#if defined(WOLFCRYPT_ONLY) && defined(NO_AES) && !defined(WOLFSSL_SHA384) && \
+    !defined(WOLFSSL_SHA512) && defined(WC_NO_RNG) && \
+                    defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_RSA_PUBLIC_ONLY)
+    #undef  WOLFSSL_NO_FORCE_ZERO
+    #define WOLFSSL_NO_FORCE_ZERO
+#endif
+
+/* Detect old cryptodev name */
+#if defined(WOLF_CRYPTO_DEV) && !defined(WOLF_CRYPTO_CB)
+    #define WOLF_CRYPTO_CB
+#endif
+
+#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_NO_SIGALG)
+    #error TLS 1.3 requires the Signature Algorithms extension to be enabled
+#endif
+
+#ifndef NO_WOLFSSL_BASE64_DECODE
+    #define WOLFSSL_BASE64_DECODE
+#endif
+
+#if defined(HAVE_EX_DATA) || defined(FORTRESS)
+    #define MAX_EX_DATA 5  /* allow for five items of ex_data */
+#endif
+
+#ifdef NO_WOLFSSL_SMALL_STACK
+    #undef WOLFSSL_SMALL_STACK
+#endif
+
+/* The client session cache requires time for timeout */
+#if defined(NO_ASN_TIME) && !defined(NO_SESSION_CACHE)
+    #define NO_SESSION_CACHE
+#endif
+
+/* Use static ECC structs for Position Independant Code (PIC) */
+#if defined(__IAR_SYSTEMS_ICC__) && defined(__ROPI__)
+    #define WOLFSSL_ECC_CURVE_STATIC
+    #define WOLFSSL_NAMES_STATIC
+    #define WOLFSSL_NO_CONSTCHARCONST
+#endif
+
+
 #ifdef __cplusplus
     }   /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/sha.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/sha.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -69,6 +69,9 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include <wolfssl/wolfcrypt/async.h>
 #endif
+#ifdef WOLFSSL_ESP32WROOM32_CRYPT
+    #include <wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h>
+#endif
 
 #if !defined(NO_OLD_SHA_NAMES)
     #define SHA             WC_SHA
@@ -95,10 +98,13 @@
 
 #elif defined(WOLFSSL_IMX6_CAAM)
     #include "wolfssl/wolfcrypt/port/caam/wolfcaam_sha.h"
-
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+   !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+    #include "wolfssl/wolfcrypt/port/Renesas/renesas-tsip-crypt.h"
 #else
+
 /* Sha digest */
-typedef struct wc_Sha {
+struct wc_Sha {
 #ifdef FREESCALE_LTC_SHA
         ltc_hash_ctx_t ctx;
 #elif defined(STM32_HASH)
@@ -120,8 +126,24 @@
     #ifdef WOLFSSL_ASYNC_CRYPT
         WC_ASYNC_DEV asyncDev;
     #endif /* WOLFSSL_ASYNC_CRYPT */
+    #ifdef WOLF_CRYPTO_CB
+        int    devId;
+        void*  devCtx; /* generic crypto callback context */
+    #endif
 #endif
-} wc_Sha;
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    WC_ESP32SHA ctx;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    word32 flags; /* enum wc_HashFlags in hash.h */
+#endif
+};
+
+#ifndef WC_SHA_TYPE_DEFINED
+    typedef struct wc_Sha wc_Sha;
+    #define WC_SHA_TYPE_DEFINED
+#endif
 
 #endif /* WOLFSSL_TI_HASH */
 
@@ -142,6 +164,11 @@
 WOLFSSL_API void wc_ShaSizeSet(wc_Sha* sha, word32 len);
 #endif
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_ShaSetFlags(wc_Sha* sha, word32 flags);
+    WOLFSSL_API int wc_ShaGetFlags(wc_Sha* sha, word32* flags);
+#endif
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/sha256.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/sha256.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha256.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -24,7 +24,6 @@
 */
 
 
-/* code submitted by raphael.huck@efixo.com */
 
 #ifndef WOLF_CRYPT_SHA256_H
 #define WOLF_CRYPT_SHA256_H
@@ -80,10 +79,19 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include <wolfssl/wolfcrypt/async.h>
 #endif
+#if defined(WOLFSSL_DEVCRYPTO) && defined(WOLFSSL_DEVCRYPTO_HASH)
+    #include <wolfssl/wolfcrypt/port/devcrypto/wc_devcrypto.h>
+#endif
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT)
+    #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h"
+#endif
+#if defined(WOLFSSL_CRYPTOCELL)
+    #include <wolfssl/wolfcrypt/port/arm/cryptoCell.h>
+#endif
 
 #if defined(_MSC_VER)
     #define SHA256_NOINLINE __declspec(noinline)
-#elif defined(__GNUC__)
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
     #define SHA256_NOINLINE __attribute__((noinline))
 #else
     #define SHA256_NOINLINE
@@ -113,12 +121,18 @@
     #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
 #elif defined(WOLFSSL_IMX6_CAAM)
     #include "wolfssl/wolfcrypt/port/caam/wolfcaam_sha.h"
+#elif defined(WOLFSSL_AFALG_HASH)
+    #include "wolfssl/wolfcrypt/port/af_alg/afalg_hash.h"
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+   !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+    #include "wolfssl/wolfcrypt/port/Renesas/renesas-tsip-crypt.h"
 #else
+
 /* wc_Sha256 digest */
-typedef struct wc_Sha256 {
+struct wc_Sha256 {
 #ifdef FREESCALE_LTC_SHA
     ltc_hash_ctx_t ctx;
-#elif defined(STM32_HASH)
+#elif defined(STM32_HASH_SHA2)
     STM32_HASH_Context stmCtx;
 #else
     /* alignment on digest and buffer speeds up ARMv8 crypto operations */
@@ -128,9 +142,6 @@
     word32  loLen;     /* length in bytes   */
     word32  hiLen;     /* length in bytes   */
     void*   heap;
-#ifdef USE_INTEL_SPEEDUP
-    const byte* data;
-#endif
 #ifdef WOLFSSL_PIC32MZ_HASH
     hashUpdCache cache; /* cache for updates */
 #endif
@@ -140,8 +151,33 @@
 #ifdef WOLFSSL_SMALL_STACK_CACHE
     word32* W;
 #endif
+#ifdef WOLFSSL_DEVCRYPTO_HASH
+    WC_CRYPTODEV ctx;
+    byte*  msg;
+    word32 used;
+    word32 len;
 #endif
-} wc_Sha256;
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    WC_ESP32SHA ctx;
+#endif
+#ifdef WOLFSSL_CRYPTOCELL
+    CRYS_HASHUserContext_t ctx;
+#endif
+#ifdef WOLF_CRYPTO_CB
+    int    devId;
+    void*  devCtx; /* generic crypto callback context */
+#endif
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    word32 flags; /* enum wc_HashFlags in hash.h */
+#endif
+};
+
+#ifndef WC_SHA256_TYPE_DEFINED
+    typedef struct wc_Sha256 wc_Sha256;
+    #define WC_SHA256_TYPE_DEFINED
+#endif
 
 #endif
 
@@ -161,6 +197,11 @@
 WOLFSSL_API void wc_Sha256SizeSet(wc_Sha256*, word32);
 #endif
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags);
+    WOLFSSL_API int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags);
+#endif
+
 #ifdef WOLFSSL_SHA224
 /* avoid redefinition of structs */
 #if !defined(HAVE_FIPS) || \
@@ -183,7 +224,10 @@
 };
 
 
-typedef wc_Sha256 wc_Sha224;
+#ifndef WC_SHA224_TYPE_DEFINED
+    typedef struct wc_Sha256 wc_Sha224;
+    #define WC_SHA224_TYPE_DEFINED
+#endif
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha224(wc_Sha224*);
@@ -195,6 +239,11 @@
 WOLFSSL_API int wc_Sha224GetHash(wc_Sha224*, byte*);
 WOLFSSL_API int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst);
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags);
+    WOLFSSL_API int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags);
+#endif
+
 #endif /* WOLFSSL_SHA224 */
 
 #ifdef __cplusplus
--- a/wolfssl/wolfcrypt/sha3.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/sha3.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha3.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -81,11 +81,15 @@
 #endif
 
 
+
 #ifdef WOLFSSL_XILINX_CRYPT
     #include "wolfssl/wolfcrypt/port/xilinx/xil-sha3.h"
+#elif defined(WOLFSSL_AFALG_XILINX_SHA3)
+    #include <wolfssl/wolfcrypt/port/af_alg/afalg_hash.h>
 #else
+
 /* Sha3 digest */
-typedef struct Sha3 {
+struct Sha3 {
     /* State data that is processed for each block. */
     word64 s[25];
     /* Unprocessed message data. */
@@ -98,8 +102,19 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     WC_ASYNC_DEV asyncDev;
 #endif /* WOLFSSL_ASYNC_CRYPT */
-} wc_Sha3;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    word32 flags; /* enum wc_HashFlags in hash.h */
 #endif
+};
+
+#ifndef WC_SHA3_TYPE_DEFINED
+    typedef struct Sha3 wc_Sha3;
+    #define WC_SHA3_TYPE_DEFINED
+#endif
+
+#endif
+
+typedef wc_Sha3 wc_Shake;
 
 
 WOLFSSL_API int wc_InitSha3_224(wc_Sha3*, void*, int);
@@ -130,6 +145,17 @@
 WOLFSSL_API int wc_Sha3_512_GetHash(wc_Sha3*, byte*);
 WOLFSSL_API int wc_Sha3_512_Copy(wc_Sha3* src, wc_Sha3* dst);
 
+WOLFSSL_API int wc_InitShake256(wc_Shake*, void*, int);
+WOLFSSL_API int wc_Shake256_Update(wc_Shake*, const byte*, word32);
+WOLFSSL_API int wc_Shake256_Final(wc_Shake*, byte*, word32);
+WOLFSSL_API void wc_Shake256_Free(wc_Shake*);
+WOLFSSL_API int wc_Shake256_Copy(wc_Shake* src, wc_Sha3* dst);
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_Sha3_SetFlags(wc_Sha3* sha3, word32 flags);
+    WOLFSSL_API int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags);
+#endif
+
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
--- a/wolfssl/wolfcrypt/sha512.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/sha512.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sha512.h
  *
- * Copyright (C) 2006-2018 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -72,10 +72,12 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include <wolfssl/wolfcrypt/async.h>
 #endif
-
+#ifdef WOLFSSL_ESP32WROOM32_CRYPT
+    #include <wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h>
+#endif
 #if defined(_MSC_VER)
     #define SHA512_NOINLINE __declspec(noinline)
-#elif defined(__GNUC__)
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
     #define SHA512_NOINLINE __attribute__((noinline))
 #else
     #define SHA512_NOINLINE
@@ -111,7 +113,7 @@
     #include "wolfssl/wolfcrypt/port/caam/wolfcaam_sha.h"
 #else
 /* wc_Sha512 digest */
-typedef struct wc_Sha512 {
+struct wc_Sha512 {
     word64  digest[WC_SHA512_DIGEST_SIZE / sizeof(word64)];
     word64  buffer[WC_SHA512_BLOCK_SIZE  / sizeof(word64)];
     word32  buffLen;   /* in bytes          */
@@ -127,11 +129,28 @@
 #ifdef WOLFSSL_SMALL_STACK_CACHE
     word64* W;
 #endif
-} wc_Sha512;
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+   !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+    WC_ESP32SHA ctx;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    word32 flags; /* enum wc_HashFlags in hash.h */
+#endif
+};
+
+#ifndef WC_SHA512_TYPE_DEFINED
+    typedef struct wc_Sha512 wc_Sha512;
+    #define WC_SHA512_TYPE_DEFINED
+#endif
 #endif
 
 #endif /* HAVE_FIPS */
 
+#ifdef WOLFSSL_ARMASM
+WOLFSSL_LOCAL void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data,
+                                        word32 len);
+#endif
+
 #ifdef WOLFSSL_SHA512
 
 WOLFSSL_API int wc_InitSha512(wc_Sha512*);
@@ -144,6 +163,11 @@
 WOLFSSL_API int wc_Sha512GetHash(wc_Sha512*, byte*);
 WOLFSSL_API int wc_Sha512Copy(wc_Sha512* src, wc_Sha512* dst);
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_Sha512SetFlags(wc_Sha512* sha512, word32 flags);
+    WOLFSSL_API int wc_Sha512GetFlags(wc_Sha512* sha512, word32* flags);
+#endif
+
 #endif /* WOLFSSL_SHA512 */
 
 #if defined(WOLFSSL_SHA384)
@@ -172,7 +196,10 @@
 };
 
 
-typedef wc_Sha512 wc_Sha384;
+#ifndef WC_SHA384_TYPE_DEFINED
+    typedef struct wc_Sha512 wc_Sha384;
+    #define WC_SHA384_TYPE_DEFINED
+#endif
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha384(wc_Sha384*);
@@ -185,6 +212,11 @@
 WOLFSSL_API int wc_Sha384GetHash(wc_Sha384*, byte*);
 WOLFSSL_API int wc_Sha384Copy(wc_Sha384* src, wc_Sha384* dst);
 
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+    WOLFSSL_API int wc_Sha384SetFlags(wc_Sha384* sha384, word32 flags);
+    WOLFSSL_API int wc_Sha384GetFlags(wc_Sha384* sha384, word32* flags);
+#endif
+
 #endif /* WOLFSSL_SHA384 */
 
 #ifdef __cplusplus
--- a/wolfssl/wolfcrypt/signature.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/signature.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* signature.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -62,12 +62,23 @@
     const byte* hash_data, word32 hash_len,
     byte* sig, word32 *sig_len,
     const void* key, word32 key_len, WC_RNG* rng);
+WOLFSSL_API int wc_SignatureGenerateHash_ex(
+    enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+    const byte* hash_data, word32 hash_len,
+    byte* sig, word32 *sig_len,
+    const void* key, word32 key_len, WC_RNG* rng, int verify);
 WOLFSSL_API int wc_SignatureGenerate(
     enum wc_HashType hash_type, enum wc_SignatureType sig_type,
     const byte* data, word32 data_len,
     byte* sig, word32 *sig_len,
     const void* key, word32 key_len,
     WC_RNG* rng);
+WOLFSSL_API int wc_SignatureGenerate_ex(
+    enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+    const byte* data, word32 data_len,
+    byte* sig, word32 *sig_len,
+    const void* key, word32 key_len,
+    WC_RNG* rng, int verify);
 
 #ifdef __cplusplus
     } /* extern "C" */
--- a/wolfssl/wolfcrypt/sp.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/sp.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -37,10 +37,10 @@
 
 #if defined(_MSC_VER)
     #define SP_NOINLINE __declspec(noinline)
-#elif defined(__GNUC__)
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__) || defined(__KEIL__)
     #define SP_NOINLINE __attribute__((noinline))
 #else
-    #define 5P_NOINLINE
+    #define SP_NOINLINE
 #endif
 
 
@@ -62,19 +62,37 @@
     mp_int* dm, mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim,
     mp_int* mm, byte* out, word32* outLen);
 
+WOLFSSL_LOCAL int sp_RsaPublic_4096(const byte* in, word32 inLen,
+    mp_int* em, mp_int* mm, byte* out, word32* outLen);
+WOLFSSL_LOCAL int sp_RsaPrivate_4096(const byte* in, word32 inLen,
+    mp_int* dm, mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim,
+    mp_int* mm, byte* out, word32* outLen);
+
 #endif /* WOLFSSL_HAVE_SP_RSA */
 
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
 WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
     mp_int* res);
 WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
     mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_DH
 
 WOLFSSL_LOCAL int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
     mp_int* mod, byte* out, word32* outLen);
 WOLFSSL_LOCAL int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
     mp_int* mod, byte* out, word32* outLen);
+WOLFSSL_LOCAL int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+    mp_int* mod, byte* out, word32* outLen);
 
 #endif /* WOLFSSL_HAVE_SP_DH */
 
@@ -88,7 +106,7 @@
 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
                           word32* outlen, void* heap);
 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
-                    mp_int* rm, mp_int* sm, void* heap);
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap);
 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, mp_int* pY,
                       mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap);
 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY);
@@ -101,6 +119,28 @@
 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ);
 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym);
 
+
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* rm, int map,
+                      void* heap);
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* rm, int map, void* heap);
+
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap);
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+                          word32* outlen, void* heap);
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+                    mp_int* rm, mp_int* sm, mp_int* km, void* heap);
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, mp_int* pY,
+                      mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap);
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY);
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap);
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* qX, mp_int* qY, mp_int* qZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ);
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+                              mp_int* rX, mp_int* rY, mp_int* rZ);
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ);
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym);
+
 #endif /*ifdef WOLFSSL_HAVE_SP_ECC */
 
 
--- a/wolfssl/wolfcrypt/sp_int.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/sp_int.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* sp_int.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,6 +26,15 @@
 #include <stdint.h>
 #include <limits.h>
 
+/* Make sure WOLFSSL_SP_ASM build option defined when requested */
+#if !defined(WOLFSSL_SP_ASM) && ( \
+      defined(WOLFSSL_SP_X86_64_ASM) || defined(WOLFSSL_SP_ARM32_ASM) || \
+      defined(WOLFSSL_SP_ARM64_ASM)  || defined(WOLFSSL_SP_ARM_THUMB_ASM) || \
+      defined(WOLFSSL_SP_ARM_CORTEX_M_ASM))
+    #define WOLFSSL_SP_ASM
+#endif
+
+
 #ifdef WOLFSSL_SP_X86_64_ASM
     #define SP_WORD_SIZE 64
 
@@ -35,6 +44,8 @@
     #define SP_WORD_SIZE 64
 #elif defined(WOLFSSL_SP_ARM32_ASM)
     #define SP_WORD_SIZE 32
+#elif defined(WOLFSSL_SP_ARM_THUMB_ASM)
+    #define SP_WORD_SIZE 32
 #endif
 
 #ifndef SP_WORD_SIZE
@@ -45,15 +56,28 @@
     #endif
 #endif
 
-#ifndef WOLFSSL_SP_ASM
+#ifdef WOLFSSL_DSP_BUILD
+    typedef int32 sp_digit;
+    typedef uint32 sp_int_digit;
+    typedef uint64 sp_int_word;
+    #undef SP_WORD_SIZE
+    #define SP_WORD_SIZE 32
+#elif !defined(WOLFSSL_SP_ASM)
   #if SP_WORD_SIZE == 32
     typedef int32_t sp_digit;
     typedef uint32_t sp_int_digit;
+    typedef uint64_t sp_int_word;
   #elif SP_WORD_SIZE == 64
     typedef int64_t sp_digit;
     typedef uint64_t sp_int_digit;
-    typedef unsigned long uint128_t __attribute__ ((mode(TI)));
-    typedef long int128_t __attribute__ ((mode(TI)));
+    #ifdef __SIZEOF_INT128__
+      typedef __uint128_t uint128_t;
+      typedef __int128_t int128_t;
+    #else
+      typedef unsigned long uint128_t __attribute__ ((mode(TI)));
+      typedef long int128_t __attribute__ ((mode(TI)));
+    #endif
+    typedef uint128_t sp_int_word;
   #else
     #error Word size not defined
   #endif
@@ -61,82 +85,131 @@
   #if SP_WORD_SIZE == 32
     typedef uint32_t sp_digit;
     typedef uint32_t sp_int_digit;
+    typedef uint64_t sp_int_word;
   #elif SP_WORD_SIZE == 64
     typedef uint64_t sp_digit;
     typedef uint64_t sp_int_digit;
-    typedef unsigned long uint128_t __attribute__ ((mode(TI)));
-    typedef long int128_t __attribute__ ((mode(TI)));
+    #ifdef __SIZEOF_INT128__
+      typedef __uint128_t uint128_t;
+      typedef __int128_t int128_t;
+    #else
+      typedef unsigned long uint128_t __attribute__ ((mode(TI)));
+      typedef long int128_t __attribute__ ((mode(TI)));
+    #endif
+    typedef uint128_t sp_int_word;
   #else
     #error Word size not defined
   #endif
 #endif
 
+#define SP_MASK    (sp_digit)(-1)
+
 #ifdef WOLFSSL_SP_MATH
 #include <wolfssl/wolfcrypt/random.h>
 
-#ifndef MIN
-   #define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-#ifndef MAX
-   #define MAX(x,y) ((x)>(y)?(x):(y))
-#endif
-
-#ifdef WOLFSSL_PUBLIC_MP
-    #define MP_API   WOLFSSL_API
-#else
-    #define MP_API   WOLFSSL_LOCAL
-#endif
-
 #if !defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_HAVE_SP_DH)
     #if !defined(NO_PWDBASED) && defined(WOLFSSL_SHA512)
         #define SP_INT_DIGITS        ((512 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #elif defined(WOLFSSL_SP_384)
+        #define SP_INT_DIGITS        ((384 + SP_WORD_SIZE) / SP_WORD_SIZE)
     #else
         #define SP_INT_DIGITS        ((256 + SP_WORD_SIZE) / SP_WORD_SIZE)
     #endif
+#elif defined(WOLFSSL_SP_4096)
+    #if defined(WOLFSSL_HAVE_SP_DH)
+        #define SP_INT_DIGITS        ((8192 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #else
+        #define SP_INT_DIGITS        ((4096 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #endif
 #elif !defined(WOLFSSL_SP_NO_3072)
-    #define SP_INT_DIGITS        ((2048 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #if defined(WOLFSSL_HAVE_SP_DH)
+        #define SP_INT_DIGITS        ((6144 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #else
+        #define SP_INT_DIGITS        ((3072 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #endif
 #else
-    #define SP_INT_DIGITS        ((3072 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #if defined(WOLFSSL_HAVE_SP_DH)
+        #define SP_INT_DIGITS        ((4096 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #else
+        #define SP_INT_DIGITS        ((2048 + SP_WORD_SIZE) / SP_WORD_SIZE)
+    #endif
 #endif
 
-#define sp_isodd(a) (a->used != 0 && (a->dp[0] & 1))
+#define sp_isodd(a)  ((a)->used != 0 && ((a)->dp[0] & 1))
+#define sp_iseven(a) ((a)->used != 0 && ((a)->dp[0] & 1) == 0)
+#define sp_iszero(a) ((a)->used == 0)
+#define sp_isone(a)  ((a)->used == 1 && (a)->dp[0] == 1)
+#define sp_abs(a, b)  sp_copy(a, b)
+
+#ifdef HAVE_WOLF_BIGINT
+    /* raw big integer */
+    typedef struct WC_BIGINT {
+        byte*   buf;
+        word32  len;
+        void*   heap;
+    } WC_BIGINT;
+    #define WOLF_BIGINT_DEFINED
+#endif
 
 typedef struct sp_int {
-    sp_int_digit dp[SP_INT_DIGITS];
+    int used;
     int size;
-    int used;
+    sp_int_digit dp[SP_INT_DIGITS];
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT raw; /* unsigned binary (big endian) */
+#endif
 } sp_int;
 
+typedef sp_int       mp_int;
+typedef sp_int_digit mp_digit;
+
+#include <wolfssl/wolfcrypt/wolfmath.h>
+
 
 MP_API int sp_init(sp_int* a);
 MP_API int sp_init_multi(sp_int* a, sp_int* b, sp_int* c, sp_int* d,
                          sp_int* e, sp_int* f);
 MP_API void sp_clear(sp_int* a);
 MP_API int sp_unsigned_bin_size(sp_int* a);
-MP_API int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz);
+MP_API int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz);
 MP_API int sp_read_radix(sp_int* a, const char* in, int radix);
 MP_API int sp_cmp(sp_int* a, sp_int* b);
 MP_API int sp_count_bits(sp_int* a);
 MP_API int sp_leading_bit(sp_int* a);
-MP_API int sp_to_unsigned_bin(sp_int* a, byte* in);
+MP_API int sp_to_unsigned_bin(sp_int* a, byte* out);
+MP_API int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz);
 MP_API void sp_forcezero(sp_int* a);
-MP_API int sp_copy(sp_int* a, sp_int* b);
+MP_API int sp_copy(sp_int* a, sp_int* r);
 MP_API int sp_set(sp_int* a, sp_int_digit d);
-MP_API int sp_iszero(sp_int* a);
 MP_API void sp_clamp(sp_int* a);
 MP_API int sp_grow(sp_int* a, int l);
 MP_API int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r);
 MP_API int sp_cmp_d(sp_int* a, sp_int_digit d);
+MP_API int sp_sub(sp_int* a, sp_int* b, sp_int* r);
 MP_API int sp_mod(sp_int* a, sp_int* m, sp_int* r);
 MP_API void sp_zero(sp_int* a);
 MP_API int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r);
 MP_API int sp_lshd(sp_int* a, int s);
 MP_API int sp_add(sp_int* a, sp_int* b, sp_int* r);
 MP_API int sp_set_int(sp_int* a, unsigned long b);
+MP_API int sp_tohex(sp_int* a, char* str);
+MP_API int sp_set_bit(sp_int* a, int i);
+MP_API int sp_2expt(sp_int* a, int e);
+MP_API int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap);
+MP_API int sp_mul(sp_int* a, sp_int* b, sp_int* r);
+MP_API int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r);
+MP_API int sp_gcd(sp_int* a, sp_int* b, sp_int* r);
+MP_API int sp_invmod(sp_int* a, sp_int* m, sp_int* r);
+MP_API int sp_lcm(sp_int* a, sp_int* b, sp_int* r);
+MP_API int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r);
+MP_API int sp_prime_is_prime(mp_int* a, int t, int* result);
+MP_API int sp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng);
+MP_API int sp_exch(sp_int* a, sp_int* b);
+MP_API int sp_get_digit_count(sp_int *a);
+MP_API int sp_init_copy (sp_int * a, sp_int * b);
+MP_API void sp_rshb(sp_int* a, int n, sp_int* r);
+MP_API int sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r);
 
-typedef sp_int mp_int;
-typedef sp_digit mp_digit;
 
 #define MP_OKAY    0
 #define MP_NO      0
@@ -152,40 +225,62 @@
 #define MP_VAL   -3
 
 #define DIGIT_BIT  SP_WORD_SIZE
+#define MP_MASK    SP_MASK
 
 #define CheckFastMathSettings() 1
 
 #define mp_free(a)
 
-#define mp_init                 sp_init
-#define mp_init_multi           sp_init_multi
-#define mp_clear                sp_clear
-#define mp_read_unsigned_bin    sp_read_unsigned_bin
-#define mp_unsigned_bin_size    sp_unsigned_bin_size
-#define mp_read_radix           sp_read_radix
-#define mp_cmp                  sp_cmp
-#define mp_count_bits           sp_count_bits
-#define mp_leading_bit          sp_leading_bit
-#define mp_to_unsigned_bin      sp_to_unsigned_bin
-#define mp_forcezero            sp_forcezero
-#define mp_copy                 sp_copy
-#define mp_set                  sp_set
-#define mp_iszero               sp_iszero
-#define mp_clamp                sp_clamp
-#define mp_grow                 sp_grow
-#define mp_sub_d                sp_sub_d
-#define mp_cmp_d                sp_cmp_d
-#define mp_mod                  sp_mod
-#define mp_zero                 sp_zero
-#define mp_add_d                sp_add_d
-#define mp_lshd                 sp_lshd
-#define mp_add                  sp_add
-#define mp_isodd                sp_isodd
-#define mp_set_int              sp_set_int
+#define mp_isodd                    sp_isodd
+#define mp_iseven                   sp_iseven
+#define mp_iszero                   sp_iszero
+#define mp_isone                    sp_isone
+#define mp_abs                      sp_abs
 
-#define MP_INT_DEFINED
+#define mp_init                     sp_init
+#define mp_init_multi               sp_init_multi
+#define mp_clear                    sp_clear
+#define mp_read_unsigned_bin        sp_read_unsigned_bin
+#define mp_unsigned_bin_size        sp_unsigned_bin_size
+#define mp_read_radix               sp_read_radix
+#define mp_cmp                      sp_cmp
+#define mp_count_bits               sp_count_bits
+#define mp_leading_bit              sp_leading_bit
+#define mp_to_unsigned_bin          sp_to_unsigned_bin
+#define mp_to_unsigned_bin_len      sp_to_unsigned_bin_len
+#define mp_forcezero                sp_forcezero
+#define mp_copy                     sp_copy
+#define mp_set                      sp_set
+#define mp_clamp                    sp_clamp
+#define mp_grow                     sp_grow
+#define mp_sub_d                    sp_sub_d
+#define mp_cmp_d                    sp_cmp_d
+#define mp_sub                      sp_sub
+#define mp_mod                      sp_mod
+#define mp_zero                     sp_zero
+#define mp_add_d                    sp_add_d
+#define mp_lshd                     sp_lshd
+#define mp_add                      sp_add
+#define mp_set_int                  sp_set_int
+#define mp_tohex                    sp_tohex
+#define mp_set_bit                  sp_set_bit
+#define mp_2expt                    sp_2expt
+#define mp_rand_prime               sp_rand_prime
+#define mp_mul                      sp_mul
+#define mp_mulmod                   sp_mulmod
+#define mp_gcd                      sp_gcd
+#define mp_invmod                   sp_invmod
+#define mp_lcm                      sp_lcm
+#define mp_exptmod                  sp_exptmod
+#define mp_exptmod_nct              sp_exptmod
+#define mp_prime_is_prime           sp_prime_is_prime
+#define mp_prime_is_prime_ex        sp_prime_is_prime_ex
+#define mp_exch                     sp_exch
+#define get_digit_count             sp_get_digit_count
+#define mp_init_copy                sp_init_copy
+#define mp_rshb(A,x)                sp_rshb(A,x,A)
+#define mp_mul_d                    sp_mul_d
 
-#include <wolfssl/wolfcrypt/wolfmath.h>
 #endif
 
 #endif /* WOLF_CRYPT_SP_H */
--- a/wolfssl/wolfcrypt/srp.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/srp.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* srp.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/tfm.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/tfm.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* tfm.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -46,33 +46,21 @@
 
 #include <wolfssl/wolfcrypt/random.h>
 
-/* wolf big int and common functions */
-#include <wolfssl/wolfcrypt/wolfmath.h>
-
 #ifdef __cplusplus
     extern "C" {
 #endif
 
-#ifdef WOLFSSL_PUBLIC_MP
-    #define MP_API   WOLFSSL_API
-#else
-    #define MP_API
-#endif
-
-#ifndef MIN
-   #define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-#ifndef MAX
-   #define MAX(x,y) ((x)>(y)?(x):(y))
-#endif
-
 #ifdef WOLFSSL_NO_ASM
    #undef  TFM_NO_ASM
    #define TFM_NO_ASM
 #endif
 
-#ifndef NO_64BIT
+#ifdef NO_64BIT
+   #undef  NO_TFM_64BIT
+   #define NO_TFM_64BIT
+#endif
+
+#ifndef NO_TFM_64BIT
 /* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */
 #if defined(__x86_64__)
    #if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)
@@ -96,7 +84,7 @@
     #undef FP_64BIT
     #undef TFM_X86_64
 #endif
-#endif /* NO_64BIT */
+#endif /* NO_TFM_64BIT */
 
 /* try to detect x86-32 */
 #if defined(__i386__) && !defined(TFM_SSE2)
@@ -224,14 +212,18 @@
 
 /* some default configurations.
  */
-#if defined(FP_64BIT)
+#if defined(WC_16BIT_CPU)
+   typedef unsigned int    fp_digit;
+   #define SIZEOF_FP_DIGIT 2
+   typedef unsigned long   fp_word;
+#elif defined(FP_64BIT)
    /* for GCC only on supported platforms */
    typedef unsigned long long fp_digit;   /* 64bit, 128 uses mode(TI) below */
    #define SIZEOF_FP_DIGIT 8
    typedef unsigned long      fp_word __attribute__ ((mode(TI)));
 #else
 
-   #ifndef NO_64BIT
+   #ifndef NO_TFM_64BIT
       #if defined(_MSC_VER) || defined(__BORLANDC__)
          typedef unsigned __int64   ulong64;
       #else
@@ -252,6 +244,7 @@
 
 #endif /* WOLFSSL_BIGINT_TYPES */
 
+
 /* # of digits this is */
 #define DIGIT_BIT   ((CHAR_BIT) * SIZEOF_FP_DIGIT)
 
@@ -265,6 +258,11 @@
 #ifndef FP_MAX_BITS
     #define FP_MAX_BITS           4096
 #endif
+#ifdef WOLFSSL_OPENSSH
+    /* OpenSSH uses some BIG primes so we need to accommodate for that */
+    #undef FP_MAX_BITS
+    #define FP_MAX_BITS 16384
+#endif
 #define FP_MAX_SIZE           (FP_MAX_BITS+(8*DIGIT_BIT))
 
 /* will this lib work? */
@@ -279,6 +277,12 @@
 #define FP_DIGIT_MAX FP_MASK
 #define FP_SIZE    (FP_MAX_SIZE/DIGIT_BIT)
 
+#define FP_MAX_PRIME_SIZE (FP_MAX_BITS/(2*CHAR_BIT))
+/* In terms of FP_MAX_BITS, it is double the size possible for a number
+ * to allow for multiplication, divide that 2 out. Also divide by CHAR_BIT
+ * to convert from bits to bytes. (Note, FP_PRIME_SIZE is the number of
+ * values in the canned prime number list.) */
+
 /* signs */
 #define FP_ZPOS     0
 #define FP_NEG      1
@@ -288,6 +292,7 @@
 #define FP_VAL      -1
 #define FP_MEM      -2
 #define FP_NOT_INF	-3
+#define FP_WOULDBLOCK -4
 
 /* equalities */
 #define FP_LT        -1   /* less than */
@@ -299,7 +304,13 @@
 #define FP_NO         0   /* no response */
 
 #ifdef HAVE_WOLF_BIGINT
-    struct WC_BIGINT;
+    /* raw big integer */
+    typedef struct WC_BIGINT {
+        byte*   buf;
+        word32  len;
+        void*   heap;
+    } WC_BIGINT;
+    #define WOLF_BIGINT_DEFINED
 #endif
 
 /* a FP type */
@@ -316,6 +327,16 @@
 #endif
 } fp_int;
 
+/* Types */
+typedef fp_digit mp_digit;
+typedef fp_word  mp_word;
+typedef fp_int   mp_int;
+
+
+/* wolf big int and common functions */
+#include <wolfssl/wolfcrypt/wolfmath.h>
+
+
 /* externally define this symbol to ignore the default settings, useful for changing the build from the make process */
 #ifndef TFM_ALREADY_SET
 
@@ -398,13 +419,16 @@
 MP_API void fp_forcezero (fp_int * a);
 MP_API void fp_free(fp_int* a);
 
-/* zero/even/odd ? */
+/* zero/one/even/odd/neg/word ? */
 #define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
 #define fp_isone(a) \
     ((((a)->used == 1) && ((a)->dp[0] == 1)) ? FP_YES : FP_NO)
 #define fp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
 #define fp_isodd(a)  (((a)->used > 0  && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
 #define fp_isneg(a)  (((a)->sign != 0) ? FP_YES : FP_NO)
+#define fp_isword(a, w) \
+    ((((a)->used == 1) && ((a)->dp[0] == w)) || ((w == 0) && ((a)->used == 0)) \
+                                                               ? FP_YES : FP_NO)
 
 /* set to a small digit */
 void fp_set(fp_int *a, fp_digit b);
@@ -461,10 +485,10 @@
 void fp_sub(fp_int *a, fp_int *b, fp_int *c);
 
 /* c = a * b */
-void fp_mul(fp_int *a, fp_int *b, fp_int *c);
+int fp_mul(fp_int *a, fp_int *b, fp_int *c);
 
 /* b = a*a  */
-void fp_sqr(fp_int *a, fp_int *b);
+int fp_sqr(fp_int *a, fp_int *b);
 
 /* a/b => cb + d == a */
 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
@@ -479,7 +503,7 @@
 void fp_add_d(fp_int *a, fp_digit b, fp_int *c);
 
 /* c = a - b */
-void fp_sub_d(fp_int *a, fp_digit b, fp_int *c);
+int fp_sub_d(fp_int *a, fp_digit b, fp_int *c);
 
 /* c = a * b */
 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c);
@@ -511,12 +535,13 @@
 
 /* c = 1/a (mod b) */
 int fp_invmod(fp_int *a, fp_int *b, fp_int *c);
+int fp_invmod_mont_ct(fp_int *a, fp_int *b, fp_int *c, fp_digit mp);
 
 /* c = (a, b) */
-/*void fp_gcd(fp_int *a, fp_int *b, fp_int *c);*/
+/*int fp_gcd(fp_int *a, fp_int *b, fp_int *c);*/
 
 /* c = [a, b] */
-/*void fp_lcm(fp_int *a, fp_int *b, fp_int *c);*/
+/*int fp_lcm(fp_int *a, fp_int *b, fp_int *c);*/
 
 /* setups the montgomery reduction */
 int fp_montgomery_setup(fp_int *a, fp_digit *mp);
@@ -527,10 +552,61 @@
 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b);
 
 /* computes x/R == x (mod N) via Montgomery Reduction */
-void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp);
+int fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp);
 
 /* d = a**b (mod c) */
 int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+int fp_exptmod_ex(fp_int *a, fp_int *b, int minDigits, fp_int *c, fp_int *d);
+int fp_exptmod_nct(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
+
+#ifdef WC_RSA_NONBLOCK
+
+enum tfmExptModNbState {
+  TFM_EXPTMOD_NB_INIT = 0,
+  TFM_EXPTMOD_NB_MONT,
+  TFM_EXPTMOD_NB_MONT_RED,
+  TFM_EXPTMOD_NB_MONT_MUL,
+  TFM_EXPTMOD_NB_MONT_MOD,
+  TFM_EXPTMOD_NB_MONT_MODCHK,
+  TFM_EXPTMOD_NB_NEXT,
+  TFM_EXPTMOD_NB_MUL,
+  TFM_EXPTMOD_NB_MUL_RED,
+  TFM_EXPTMOD_NB_SQR,
+  TFM_EXPTMOD_NB_SQR_RED,
+  TFM_EXPTMOD_NB_RED,
+  TFM_EXPTMOD_NB_COUNT /* last item for total state count only */
+};
+
+typedef struct {
+#ifndef WC_NO_CACHE_RESISTANT
+  fp_int   R[3];
+#else
+  fp_int   R[2];
+#endif
+  fp_digit buf;
+  fp_digit mp;
+  int bitcnt;
+  int digidx;
+  int y;
+  int state; /* tfmExptModNbState */
+#ifdef WC_RSA_NONBLOCK_TIME
+  word32 maxBlockInst; /* maximum instructions to block */
+  word32 totalInst;    /* tracks total instructions */
+#endif
+} exptModNb_t;
+
+#ifdef WC_RSA_NONBLOCK_TIME
+enum {
+  TFM_EXPTMOD_NB_STOP = 0,     /* stop and return FP_WOULDBLOCK */
+  TFM_EXPTMOD_NB_CONTINUE = 1, /* keep blocking */
+};
+#endif
+
+/* non-blocking version of timing resistant fp_exptmod function */
+/* supports cache resistance */
+int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y);
+
+#endif /* WC_RSA_NONBLOCK */
 
 /* primality stuff */
 
@@ -541,7 +617,7 @@
 /* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime  */
 /*int fp_isprime(fp_int *a);*/
 /* extended version of fp_isprime, do 't' Miller-Rabins instead of only 8 */
-/*int fp_isprime_ex(fp_int *a, int t);*/
+/*int fp_isprime_ex(fp_int *a, int t, int* result);*/
 
 /* Primality generation flags */
 /*#define TFM_PRIME_BBS      0x0001 */ /* BBS style prime */
@@ -562,7 +638,8 @@
 
 int fp_unsigned_bin_size(fp_int *a);
 void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c);
-void fp_to_unsigned_bin(fp_int *a, unsigned char *b);
+int fp_to_unsigned_bin(fp_int *a, unsigned char *b);
+int fp_to_unsigned_bin_len(fp_int *a, unsigned char *b, int c);
 int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b);
 
 /*int fp_signed_bin_size(fp_int *a);*/
@@ -579,51 +656,45 @@
 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
 void fp_reverse(unsigned char *s, int len);
 
-void fp_mul_comba(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba(fp_int *a, fp_int *b, fp_int *c);
 
-void fp_mul_comba_small(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba3(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba4(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba6(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba7(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba8(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba9(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba12(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba17(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba20(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba24(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba28(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba32(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba48(fp_int *a, fp_int *b, fp_int *c);
-void fp_mul_comba64(fp_int *a, fp_int *b, fp_int *c);
-void fp_sqr_comba(fp_int *a, fp_int *b);
-void fp_sqr_comba_small(fp_int *a, fp_int *b);
-void fp_sqr_comba3(fp_int *a, fp_int *b);
-void fp_sqr_comba4(fp_int *a, fp_int *b);
-void fp_sqr_comba6(fp_int *a, fp_int *b);
-void fp_sqr_comba7(fp_int *a, fp_int *b);
-void fp_sqr_comba8(fp_int *a, fp_int *b);
-void fp_sqr_comba9(fp_int *a, fp_int *b);
-void fp_sqr_comba12(fp_int *a, fp_int *b);
-void fp_sqr_comba17(fp_int *a, fp_int *b);
-void fp_sqr_comba20(fp_int *a, fp_int *b);
-void fp_sqr_comba24(fp_int *a, fp_int *b);
-void fp_sqr_comba28(fp_int *a, fp_int *b);
-void fp_sqr_comba32(fp_int *a, fp_int *b);
-void fp_sqr_comba48(fp_int *a, fp_int *b);
-void fp_sqr_comba64(fp_int *a, fp_int *b);
+int  fp_mul_comba_small(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba3(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba4(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba6(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba7(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba8(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba9(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba12(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba17(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba20(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba24(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba28(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba32(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba48(fp_int *a, fp_int *b, fp_int *c);
+int  fp_mul_comba64(fp_int *a, fp_int *b, fp_int *c);
+int  fp_sqr_comba(fp_int *a, fp_int *b);
+int  fp_sqr_comba_small(fp_int *a, fp_int *b);
+int  fp_sqr_comba3(fp_int *a, fp_int *b);
+int  fp_sqr_comba4(fp_int *a, fp_int *b);
+int  fp_sqr_comba6(fp_int *a, fp_int *b);
+int  fp_sqr_comba7(fp_int *a, fp_int *b);
+int  fp_sqr_comba8(fp_int *a, fp_int *b);
+int  fp_sqr_comba9(fp_int *a, fp_int *b);
+int  fp_sqr_comba12(fp_int *a, fp_int *b);
+int  fp_sqr_comba17(fp_int *a, fp_int *b);
+int  fp_sqr_comba20(fp_int *a, fp_int *b);
+int  fp_sqr_comba24(fp_int *a, fp_int *b);
+int  fp_sqr_comba28(fp_int *a, fp_int *b);
+int  fp_sqr_comba32(fp_int *a, fp_int *b);
+int  fp_sqr_comba48(fp_int *a, fp_int *b);
+int  fp_sqr_comba64(fp_int *a, fp_int *b);
 
 
 /**
  * Used by wolfSSL
  */
 
-/* Types */
-typedef fp_digit mp_digit;
-typedef fp_word  mp_word;
-typedef fp_int mp_int;
-#define MP_INT_DEFINED
-
 /* Constants */
 #define MP_LT   FP_LT   /* less than    */
 #define MP_EQ   FP_EQ   /* equal to     */
@@ -639,10 +710,11 @@
 #define MP_MASK FP_MASK
 
 /* Prototypes */
-#define mp_zero(a)   fp_zero(a)
-#define mp_isone(a)  fp_isone(a)
-#define mp_iseven(a) fp_iseven(a)
-#define mp_isneg(a)  fp_isneg(a)
+#define mp_zero(a)      fp_zero(a)
+#define mp_isone(a)     fp_isone(a)
+#define mp_iseven(a)    fp_iseven(a)
+#define mp_isneg(a)     fp_isneg(a)
+#define mp_isword(a, w) fp_isword(a, w)
 
 #define MP_RADIX_BIN  2
 #define MP_RADIX_OCT  8
@@ -673,7 +745,11 @@
 MP_API int  mp_addmod (mp_int* a, mp_int* b, mp_int* c, mp_int* d);
 MP_API int  mp_mod(mp_int *a, mp_int *b, mp_int *c);
 MP_API int  mp_invmod(mp_int *a, mp_int *b, mp_int *c);
+MP_API int  mp_invmod_mont_ct(mp_int *a, mp_int *b, mp_int *c, fp_digit mp);
 MP_API int  mp_exptmod (mp_int * g, mp_int * x, mp_int * p, mp_int * y);
+MP_API int  mp_exptmod_ex (mp_int * g, mp_int * x, int minDigits, mp_int * p,
+                           mp_int * y);
+MP_API int  mp_exptmod_nct (mp_int * g, mp_int * x, mp_int * p, mp_int * y);
 MP_API int  mp_mul_2d(mp_int *a, int b, mp_int *c);
 MP_API int  mp_2expt(mp_int* a, int b);
 
@@ -686,6 +762,7 @@
 MP_API int  mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c);
 MP_API int  mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b);
 MP_API int  mp_to_unsigned_bin (mp_int * a, unsigned char *b);
+MP_API int  mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c);
 
 MP_API int  mp_sub_d(fp_int *a, fp_digit b, fp_int *c);
 MP_API int  mp_copy(fp_int* a, fp_int* b);
@@ -719,19 +796,24 @@
     MP_API int mp_init_copy(fp_int * a, fp_int * b);
 #endif
 
-#if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA)
+#if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA) || \
+    defined(WOLFSSL_KEY_GEN)
     MP_API int mp_set(fp_int *a, fp_digit b);
 #endif
 
-#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN)
+#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || !defined(NO_RSA) || \
+    !defined(NO_DSA) || !defined(NO_DH)
     MP_API int mp_sqrmod(mp_int* a, mp_int* b, mp_int* c);
     MP_API int mp_montgomery_calc_normalization(mp_int *a, mp_int *b);
 #endif
 
+#if !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN)
+MP_API int  mp_prime_is_prime(mp_int* a, int t, int* result);
+MP_API int  mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng);
+#endif /* !NO_DH || !NO_DSA || !NO_RSA || WOLFSSL_KEY_GEN */
 #ifdef WOLFSSL_KEY_GEN
 MP_API int  mp_gcd(fp_int *a, fp_int *b, fp_int *c);
 MP_API int  mp_lcm(fp_int *a, fp_int *b, fp_int *c);
-MP_API int  mp_prime_is_prime(mp_int* a, int t, int* result);
 MP_API int  mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap);
 MP_API int  mp_exch(mp_int *a, mp_int *b);
 #endif /* WOLFSSL_KEY_GEN */
@@ -749,10 +831,6 @@
 #define CheckFastMathSettings() (FP_SIZE == CheckRunTimeFastMath())
 
 
-/* wolf big int and common functions */
-#include <wolfssl/wolfcrypt/wolfmath.h>
-
-
 #ifdef __cplusplus
    }
 #endif
--- a/wolfssl/wolfcrypt/types.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/types.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* types.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -26,92 +26,137 @@
 #ifndef WOLF_CRYPT_TYPES_H
 #define WOLF_CRYPT_TYPES_H
 
-	#include <wolfssl/wolfcrypt/settings.h>
-	#include <wolfssl/wolfcrypt/wc_port.h>
+    #include <wolfssl/wolfcrypt/settings.h>
+    #include <wolfssl/wolfcrypt/wc_port.h>
 
-	#ifdef __cplusplus
-	    extern "C" {
-	#endif
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
 
 
-	#if defined(WORDS_BIGENDIAN)
-	    #define BIG_ENDIAN_ORDER
-	#endif
+    #define WOLFSSL_ABI
+            /* Tag for all the APIs that are a part of the fixed ABI. */
 
-	#ifndef BIG_ENDIAN_ORDER
-	    #define LITTLE_ENDIAN_ORDER
-	#endif
+    /*
+     * This struct is used multiple time by other structs and
+     * needs to be defined somwhere that all structs can import
+     * (with minimal depencencies).
+     */
+    #if defined(HAVE_EX_DATA) || defined(FORTRESS)
+    typedef struct WOLFSSL_CRYPTO_EX_DATA {
+        void* ex_data[MAX_EX_DATA];
+    } WOLFSSL_CRYPTO_EX_DATA;
+    #endif
+
+    #if defined(WORDS_BIGENDIAN)
+        #define BIG_ENDIAN_ORDER
+    #endif
+
+    #ifndef BIG_ENDIAN_ORDER
+        #define LITTLE_ENDIAN_ORDER
+    #endif
 
-	#ifndef WOLFSSL_TYPES
-	    #ifndef byte
-	        typedef unsigned char  byte;
-	    #endif
-	    typedef unsigned short word16;
-	    typedef unsigned int   word32;
-	    typedef byte           word24[3];
-	#endif
+    #ifndef WOLFSSL_TYPES
+        #ifndef byte
+            typedef unsigned char  byte;
+        #endif
+        #ifdef WC_16BIT_CPU
+            typedef unsigned int   word16;
+            typedef unsigned long  word32;
+        #else
+            typedef unsigned short word16;
+            typedef unsigned int   word32;
+        #endif
+        typedef byte           word24[3];
+    #endif
+
+
+    /* constant pointer to a constant char */
+    #ifdef WOLFSSL_NO_CONSTCHARCONST
+        typedef const char*       wcchar;
+    #else
+        typedef const char* const wcchar;
+    #endif
 
 
-	/* try to set SIZEOF_LONG or LONG_LONG if user didn't */
-	#if !defined(_MSC_VER) && !defined(__BCPLUSPLUS__) && !defined(__EMSCRIPTEN__)
-	    #if !defined(SIZEOF_LONG_LONG) && !defined(SIZEOF_LONG)
-	        #if (defined(__alpha__) || defined(__ia64__) || \
-	            defined(_ARCH_PPC64) || defined(__mips64) || \
-	            defined(__x86_64__) || \
-	            ((defined(sun) || defined(__sun)) && \
-	             (defined(LP64) || defined(_LP64))))
-	            /* long should be 64bit */
-	            #define SIZEOF_LONG 8
-	        #elif defined(__i386__) || defined(__CORTEX_M3__)
-	            /* long long should be 64bit */
-	            #define SIZEOF_LONG_LONG 8
-	        #endif
- 	    #endif
-	#endif
+    /* try to set SIZEOF_LONG or SIZEOF_LONG_LONG if user didn't */
+    #if defined(_MSC_VER) || defined(HAVE_LIMITS_H)
+        #if !defined(SIZEOF_LONG_LONG) && !defined(SIZEOF_LONG)
+            #include <limits.h>
+            #if defined(ULONG_MAX) && (ULONG_MAX == 0xffffffffUL)
+                #define SIZEOF_LONG 4
+            #endif
+            #if defined(ULLONG_MAX) && (ULLONG_MAX == 0xffffffffffffffffULL)
+                #define SIZEOF_LONG_LONG 8
+            #endif
+        #endif
+    #elif !defined(__BCPLUSPLUS__) && !defined(__EMSCRIPTEN__)
+        #if !defined(SIZEOF_LONG_LONG) && !defined(SIZEOF_LONG)
+            #if (defined(__alpha__) || defined(__ia64__) || \
+                defined(_ARCH_PPC64) || defined(__mips64) || \
+                defined(__x86_64__)  || defined(__s390x__ ) || \
+                ((defined(sun) || defined(__sun)) && \
+                 (defined(LP64) || defined(_LP64))))
+                /* long should be 64bit */
+                #define SIZEOF_LONG 8
+            #elif defined(__i386__) || defined(__CORTEX_M3__)
+                /* long long should be 64bit */
+                #define SIZEOF_LONG_LONG 8
+            #endif
+         #endif
+    #endif
 
-	#if defined(_MSC_VER) || defined(__BCPLUSPLUS__)
-	    #define WORD64_AVAILABLE
-	    #define W64LIT(x) x##ui64
-	    typedef unsigned __int64 word64;
-	#elif defined(__EMSCRIPTEN__)
-	    #define WORD64_AVAILABLE
-	    #define W64LIT(x) x##ull
-	    typedef unsigned long long word64;
-	#elif defined(SIZEOF_LONG) && SIZEOF_LONG == 8
-	    #define WORD64_AVAILABLE
-	    #define W64LIT(x) x##LL
-	    typedef unsigned long word64;
-	#elif defined(SIZEOF_LONG_LONG) && SIZEOF_LONG_LONG == 8
-	    #define WORD64_AVAILABLE
-	    #define W64LIT(x) x##LL
-	    typedef unsigned long long word64;
-	#elif defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8
-	    #define WORD64_AVAILABLE
-	    #define W64LIT(x) x##LL
-	    typedef unsigned long long word64;
-	#endif
+    #if defined(_MSC_VER) || defined(__BCPLUSPLUS__)
+        #define WORD64_AVAILABLE
+        #define W64LIT(x) x##ui64
+        typedef unsigned __int64 word64;
+    #elif defined(__EMSCRIPTEN__)
+        #define WORD64_AVAILABLE
+        #define W64LIT(x) x##ull
+        typedef unsigned long long word64;
+    #elif defined(SIZEOF_LONG) && SIZEOF_LONG == 8
+        #define WORD64_AVAILABLE
+        #define W64LIT(x) x##LL
+        typedef unsigned long word64;
+    #elif defined(SIZEOF_LONG_LONG) && SIZEOF_LONG_LONG == 8
+        #define WORD64_AVAILABLE
+        #define W64LIT(x) x##LL
+        typedef unsigned long long word64;
+    #elif defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8
+        #define WORD64_AVAILABLE
+        #define W64LIT(x) x##LL
+        typedef unsigned long long word64;
+    #endif
 
-#if !defined(NO_64BIT) && defined(WORD64_AVAILABLE)
-	/* These platforms have 64-bit CPU registers.  */
-	#if (defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || \
-	     defined(__mips64)  || defined(__x86_64__) || defined(_M_X64)) || \
-         defined(__aarch64__) || defined(__sparc64__)
-	    typedef word64 wolfssl_word;
+#if !defined(NO_64BIT) && defined(WORD64_AVAILABLE) && !defined(WC_16BIT_CPU)
+    /* These platforms have 64-bit CPU registers.  */
+    #if (defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || \
+         defined(__mips64)  || defined(__x86_64__) || defined(_M_X64)) || \
+         defined(__aarch64__) || defined(__sparc64__) || defined(__s390x__ ) || \
+        (defined(__riscv_xlen) && (__riscv_xlen == 64))
+        typedef word64 wolfssl_word;
         #define WC_64BIT_CPU
-	#elif (defined(sun) || defined(__sun)) && \
+    #elif (defined(sun) || defined(__sun)) && \
           (defined(LP64) || defined(_LP64))
         /* LP64 with GNU GCC compiler is reserved for when long int is 64 bits
          * and int uses 32 bits. When using Solaris Studio sparc and __sparc are
          * available for 32 bit detection but __sparc64__ could be missed. This
          * uses LP64 for checking 64 bit CPU arch. */
-	    typedef word64 wolfssl_word;
+        typedef word64 wolfssl_word;
         #define WC_64BIT_CPU
     #else
-	    typedef word32 wolfssl_word;
-	    #ifdef WORD64_AVAILABLE
-	        #define WOLFCRYPT_SLOW_WORD64
-	    #endif
-	#endif
+        typedef word32 wolfssl_word;
+        #ifdef WORD64_AVAILABLE
+            #define WOLFCRYPT_SLOW_WORD64
+        #endif
+    #endif
+
+#elif defined(WC_16BIT_CPU)
+        #undef WORD64_AVAILABLE
+        typedef word16 wolfssl_word;
+        #define MP_16BIT  /* for mp_int, mp_word needs to be twice as big as
+                             mp_digit, no 64 bit type so make mp_digit 16 bit */
+
 #else
         #undef WORD64_AVAILABLE
         typedef word32 wolfssl_word;
@@ -119,36 +164,42 @@
                              mp_digit, no 64 bit type so make mp_digit 16 bit */
 #endif
 
-	enum {
-	    WOLFSSL_WORD_SIZE  = sizeof(wolfssl_word),
-	    WOLFSSL_BIT_SIZE   = 8,
-	    WOLFSSL_WORD_BITS  = WOLFSSL_WORD_SIZE * WOLFSSL_BIT_SIZE
-	};
+    enum {
+        WOLFSSL_WORD_SIZE  = sizeof(wolfssl_word),
+        WOLFSSL_BIT_SIZE   = 8,
+        WOLFSSL_WORD_BITS  = WOLFSSL_WORD_SIZE * WOLFSSL_BIT_SIZE
+    };
 
-	#define WOLFSSL_MAX_16BIT 0xffffU
+    #define WOLFSSL_MAX_16BIT 0xffffU
 
-	/* use inlining if compiler allows */
-	#ifndef WC_INLINE
-	#ifndef NO_INLINE
-	    #ifdef _MSC_VER
-	        #define WC_INLINE __inline
-	    #elif defined(__GNUC__)
+    /* use inlining if compiler allows */
+    #ifndef WC_INLINE
+    #ifndef NO_INLINE
+        #ifdef _MSC_VER
+            #define WC_INLINE __inline
+        #elif defined(__GNUC__)
                #ifdef WOLFSSL_VXWORKS
                    #define WC_INLINE __inline__
                #else
                    #define WC_INLINE inline
                #endif
-	    #elif defined(__IAR_SYSTEMS_ICC__)
-	        #define WC_INLINE inline
-	    #elif defined(THREADX)
-	        #define WC_INLINE _Inline
-	    #else
-	        #define WC_INLINE
-	    #endif
-	#else
-	    #define WC_INLINE
-	#endif
-	#endif
+        #elif defined(__IAR_SYSTEMS_ICC__)
+            #define WC_INLINE inline
+        #elif defined(THREADX)
+            #define WC_INLINE _Inline
+        #elif defined(__ghc__)
+            #ifndef __cplusplus
+                #define WC_INLINE __inline
+            #else
+                #define WC_INLINE inline
+            #endif
+        #else
+            #define WC_INLINE
+        #endif
+    #else
+        #define WC_INLINE
+    #endif
+    #endif
 
     #if defined(HAVE_FIPS) || defined(HAVE_SELFTEST)
         #define INLINE WC_INLINE
@@ -170,108 +221,167 @@
     #endif
 
 
-	/* set up thread local storage if available */
-	#ifdef HAVE_THREAD_LS
-	    #if defined(_MSC_VER)
-	        #define THREAD_LS_T __declspec(thread)
-	    /* Thread local storage only in FreeRTOS v8.2.1 and higher */
-	    #elif defined(FREERTOS) || defined(FREERTOS_TCP)
-	        #define THREAD_LS_T
-	    #else
-	        #define THREAD_LS_T __thread
-	    #endif
-	#else
-	    #define THREAD_LS_T
-	#endif
+    /* set up thread local storage if available */
+    #ifdef HAVE_THREAD_LS
+        #if defined(_MSC_VER)
+            #define THREAD_LS_T __declspec(thread)
+        /* Thread local storage only in FreeRTOS v8.2.1 and higher */
+        #elif defined(FREERTOS) || defined(FREERTOS_TCP) || \
+                                                         defined(WOLFSSL_ZEPHYR)
+            #define THREAD_LS_T
+        #else
+            #define THREAD_LS_T __thread
+        #endif
+    #else
+        #define THREAD_LS_T
+    #endif
 
     /* GCC 7 has new switch() fall-through detection */
+    /* default to FALL_THROUGH stub */
+    #ifndef FALL_THROUGH
+    #define FALL_THROUGH
+
     #if defined(__GNUC__)
         #if ((__GNUC__ > 7) || ((__GNUC__ == 7) && (__GNUC_MINOR__ >= 1)))
+            #undef  FALL_THROUGH
             #define FALL_THROUGH __attribute__ ((fallthrough));
         #endif
     #endif
-    #ifndef FALL_THROUGH
-        #define FALL_THROUGH
+    #endif /* FALL_THROUGH */
+
+    /* Micrium will use Visual Studio for compilation but not the Win32 API */
+    #if defined(_WIN32) && !defined(MICRIUM) && !defined(FREERTOS) && \
+        !defined(FREERTOS_TCP) && !defined(EBSNET) && \
+        !defined(WOLFSSL_UTASKER) && !defined(INTIME_RTOS)
+        #define USE_WINDOWS_API
     #endif
 
-	/* Micrium will use Visual Studio for compilation but not the Win32 API */
-	#if defined(_WIN32) && !defined(MICRIUM) && !defined(FREERTOS) && \
-		!defined(FREERTOS_TCP) && !defined(EBSNET) && \
-        !defined(WOLFSSL_UTASKER) && !defined(INTIME_RTOS)
-	    #define USE_WINDOWS_API
-	#endif
-
+    #define XSTR_SIZEOF(x) (sizeof(x) - 1) /* -1 to not count the null char */
 
-	/* idea to add global alloc override by Moises Guimaraes  */
-	/* default to libc stuff */
-	/* XREALLOC is used once in normal math lib, not in fast math lib */
-	/* XFREE on some embedded systems doesn't like free(0) so test  */
-	#if defined(HAVE_IO_POOL)
-		WOLFSSL_API void* XMALLOC(size_t n, void* heap, int type);
-		WOLFSSL_API void* XREALLOC(void *p, size_t n, void* heap, int type);
-		WOLFSSL_API void XFREE(void *p, void* heap, int type);
-	#elif defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_INTEL_QA)
-        #include <wolfssl/wolfcrypt/port/intel/quickassist_mem.h>
-        #undef USE_WOLFSSL_MEMORY
-        #ifdef WOLFSSL_DEBUG_MEMORY
-            #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t), __func__, __LINE__)
-            #define XFREE(p, h, t)       IntelQaFree((p), (h), (t), __func__, __LINE__)
-            #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t), __func__, __LINE__)
+    /* idea to add global alloc override by Moises Guimaraes  */
+    /* default to libc stuff */
+    /* XREALLOC is used once in normal math lib, not in fast math lib */
+    /* XFREE on some embedded systems doesn't like free(0) so test  */
+    #if defined(HAVE_IO_POOL)
+        WOLFSSL_API void* XMALLOC(size_t n, void* heap, int type);
+        WOLFSSL_API void* XREALLOC(void *p, size_t n, void* heap, int type);
+        WOLFSSL_API void XFREE(void *p, void* heap, int type);
+    #elif (defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_INTEL_QA)) || \
+          defined(HAVE_INTEL_QA_SYNC)
+        #ifndef HAVE_INTEL_QA_SYNC
+            #include <wolfssl/wolfcrypt/port/intel/quickassist_mem.h>
+            #undef USE_WOLFSSL_MEMORY
+            #ifdef WOLFSSL_DEBUG_MEMORY
+                #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t), __func__, __LINE__)
+                #define XFREE(p, h, t)       IntelQaFree((p), (h), (t), __func__, __LINE__)
+                #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t), __func__, __LINE__)
+            #else
+                #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t))
+                #define XFREE(p, h, t)       IntelQaFree((p), (h), (t))
+                #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t))
+            #endif /* WOLFSSL_DEBUG_MEMORY */
         #else
-            #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t))
-            #define XFREE(p, h, t)       IntelQaFree((p), (h), (t))
-            #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t))
-        #endif /* WOLFSSL_DEBUG_MEMORY */
+            #include <wolfssl/wolfcrypt/port/intel/quickassist_sync.h>
+            #undef USE_WOLFSSL_MEMORY
+            #ifdef WOLFSSL_DEBUG_MEMORY
+                #define XMALLOC(s, h, t)     wc_CryptoCb_IntelQaMalloc((s), (h), (t), __func__, __LINE__)
+                #define XFREE(p, h, t)       wc_CryptoCb_IntelQaFree((p), (h), (t), __func__, __LINE__)
+                #define XREALLOC(p, n, h, t) wc_CryptoCb_IntelQaRealloc((p), (n), (h), (t), __func__, __LINE__)
+            #else
+                #define XMALLOC(s, h, t)     wc_CryptoCb_IntelQaMalloc((s), (h), (t))
+                #define XFREE(p, h, t)       wc_CryptoCb_IntelQaFree((p), (h), (t))
+                #define XREALLOC(p, n, h, t) wc_CryptoCb_IntelQaRealloc((p), (n), (h), (t))
+            #endif /* WOLFSSL_DEBUG_MEMORY */
+        #endif
     #elif defined(XMALLOC_USER)
-	    /* prototypes for user heap override functions */
-	    #include <stddef.h>  /* for size_t */
-	    extern void *XMALLOC(size_t n, void* heap, int type);
-	    extern void *XREALLOC(void *p, size_t n, void* heap, int type);
-	    extern void XFREE(void *p, void* heap, int type);
+        /* prototypes for user heap override functions */
+        #include <stddef.h>  /* for size_t */
+        extern void *XMALLOC(size_t n, void* heap, int type);
+        extern void *XREALLOC(void *p, size_t n, void* heap, int type);
+        extern void XFREE(void *p, void* heap, int type);
+    #elif defined(WOLFSSL_MEMORY_LOG)
+        #define XMALLOC(n, h, t)        xmalloc(n, h, t, __func__, __FILE__, __LINE__)
+        #define XREALLOC(p, n, h, t)    xrealloc(p, n, h, t, __func__,  __FILE__, __LINE__)
+        #define XFREE(p, h, t)          xfree(p, h, t, __func__, __FILE__, __LINE__)
+
+        /* prototypes for user heap override functions */
+        #include <stddef.h>  /* for size_t */
+        #include <stdlib.h>
+        WOLFSSL_API void *xmalloc(size_t n, void* heap, int type,
+                const char* func, const char* file, unsigned int line);
+        WOLFSSL_API void *xrealloc(void *p, size_t n, void* heap, int type,
+                const char* func, const char* file, unsigned int line);
+        WOLFSSL_API void xfree(void *p, void* heap, int type, const char* func,
+                const char* file, unsigned int line);
     #elif defined(XMALLOC_OVERRIDE)
         /* override the XMALLOC, XFREE and XREALLOC macros */
-	#elif defined(NO_WOLFSSL_MEMORY)
-	    /* just use plain C stdlib stuff if desired */
-	    #include <stdlib.h>
-	    #define XMALLOC(s, h, t)     ((void)h, (void)t, malloc((s)))
-	    #define XFREE(p, h, t)       {void* xp = (p); if((xp)) free((xp));}
-	    #define XREALLOC(p, n, h, t) realloc((p), (n))
-	#elif !defined(MICRIUM_MALLOC) && !defined(EBSNET) \
-	        && !defined(WOLFSSL_SAFERTOS) && !defined(FREESCALE_MQX) \
-	        && !defined(FREESCALE_KSDK_MQX) && !defined(FREESCALE_FREE_RTOS) \
+    #elif defined(WOLFSSL_TELIT_M2MB)
+        /* Telit M2MB SDK requires use m2mb_os API's, not std malloc/free */
+        /* Use of malloc/free will cause CPU reboot */
+        #define XMALLOC(s, h, t)     ((void)h, (void)t, m2mb_os_malloc((s)))
+        #define XFREE(p, h, t)       {void* xp = (p); if((xp)) m2mb_os_free((xp));}
+        #define XREALLOC(p, n, h, t) m2mb_os_realloc((p), (n))
+
+    #elif defined(NO_WOLFSSL_MEMORY)
+        #ifdef WOLFSSL_NO_MALLOC
+            /* this platform does not support heap use */
+            #ifdef WOLFSSL_MALLOC_CHECK
+                #include <stdio.h>
+                static inline void* malloc_check(size_t sz) {
+                    printf("wolfSSL_malloc failed");
+                    return NULL;
+                };
+                #define XMALLOC(s, h, t)     malloc_check((s))
+                #define XFREE(p, h, t)
+                #define XREALLOC(p, n, h, t) (NULL)
+            #else
+                #define XMALLOC(s, h, t)     (NULL)
+                #define XFREE(p, h, t)
+                #define XREALLOC(p, n, h, t) (NULL)
+            #endif
+        #else
+        /* just use plain C stdlib stuff if desired */
+        #include <stdlib.h>
+        #define XMALLOC(s, h, t)     malloc((s))
+        #define XFREE(p, h, t)       {void* xp = (p); if((xp)) free((xp));}
+        #define XREALLOC(p, n, h, t) realloc((p), (n))
+        #endif
+    #elif !defined(MICRIUM_MALLOC) && !defined(EBSNET) \
+            && !defined(WOLFSSL_SAFERTOS) && !defined(FREESCALE_MQX) \
+            && !defined(FREESCALE_KSDK_MQX) && !defined(FREESCALE_FREE_RTOS) \
             && !defined(WOLFSSL_LEANPSK) && !defined(WOLFSSL_uITRON4)
-	    /* default C runtime, can install different routines at runtime via cbs */
-	    #include <wolfssl/wolfcrypt/memory.h>
+        /* default C runtime, can install different routines at runtime via cbs */
+        #include <wolfssl/wolfcrypt/memory.h>
         #ifdef WOLFSSL_STATIC_MEMORY
             #ifdef WOLFSSL_DEBUG_MEMORY
-				#define XMALLOC(s, h, t)     wolfSSL_Malloc((s), (h), (t), __func__, __LINE__)
-				#define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp), (h), (t), __func__, __LINE__);}
-				#define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n), (h), (t), __func__, __LINE__)
+                #define XMALLOC(s, h, t)     wolfSSL_Malloc((s), (h), (t), __func__, __LINE__)
+                #define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp), (h), (t), __func__, __LINE__);}
+                #define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n), (h), (t), __func__, __LINE__)
             #else
-	            #define XMALLOC(s, h, t)     wolfSSL_Malloc((s), (h), (t))
-				#define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp), (h), (t));}
-				#define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n), (h), (t))
+                #define XMALLOC(s, h, t)     wolfSSL_Malloc((s), (h), (t))
+                #define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp), (h), (t));}
+                #define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n), (h), (t))
             #endif /* WOLFSSL_DEBUG_MEMORY */
         #elif !defined(FREERTOS) && !defined(FREERTOS_TCP)
             #ifdef WOLFSSL_DEBUG_MEMORY
-				#define XMALLOC(s, h, t)     ((void)h, (void)t, wolfSSL_Malloc((s), __func__, __LINE__))
-				#define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp), __func__, __LINE__);}
-				#define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n), __func__, __LINE__)
+                #define XMALLOC(s, h, t)     ((void)h, (void)t, wolfSSL_Malloc((s), __func__, __LINE__))
+                #define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp), __func__, __LINE__);}
+                #define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n), __func__, __LINE__)
             #else
-	            #define XMALLOC(s, h, t)     ((void)h, (void)t, wolfSSL_Malloc((s)))
-	            #define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp));}
-	            #define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n))
+                #define XMALLOC(s, h, t)     ((void)h, (void)t, wolfSSL_Malloc((s)))
+                #define XFREE(p, h, t)       {void* xp = (p); if((xp)) wolfSSL_Free((xp));}
+                #define XREALLOC(p, n, h, t) wolfSSL_Realloc((p), (n))
             #endif /* WOLFSSL_DEBUG_MEMORY */
         #endif /* WOLFSSL_STATIC_MEMORY */
-	#endif
+    #endif
 
     /* declare/free variable handling for async */
     #ifdef WOLFSSL_ASYNC_CRYPT
         #define DECLARE_VAR(VAR_NAME, VAR_TYPE, VAR_SIZE, HEAP) \
-            VAR_TYPE* VAR_NAME = (VAR_TYPE*)XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT);
+            VAR_TYPE* VAR_NAME = (VAR_TYPE*)XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, (HEAP), DYNAMIC_TYPE_WOLF_BIGINT);
         #define DECLARE_VAR_INIT(VAR_NAME, VAR_TYPE, VAR_SIZE, INIT_VALUE, HEAP) \
             VAR_TYPE* VAR_NAME = ({ \
-                VAR_TYPE* ptr = (VAR_TYPE*)XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT); \
+                VAR_TYPE* ptr = (VAR_TYPE*)XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, (HEAP), DYNAMIC_TYPE_WOLF_BIGINT); \
                 if (ptr && INIT_VALUE) { \
                     XMEMCPY(ptr, INIT_VALUE, sizeof(VAR_TYPE) * VAR_SIZE); \
                 } \
@@ -281,14 +391,20 @@
             VAR_TYPE* VAR_NAME[VAR_ITEMS]; \
             int idx##VAR_NAME; \
             for (idx##VAR_NAME=0; idx##VAR_NAME<VAR_ITEMS; idx##VAR_NAME++) { \
-                VAR_NAME[idx##VAR_NAME] = (VAR_TYPE*)XMALLOC(VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT); \
+                VAR_NAME[idx##VAR_NAME] = (VAR_TYPE*)XMALLOC(VAR_SIZE, (HEAP), DYNAMIC_TYPE_WOLF_BIGINT); \
             }
         #define FREE_VAR(VAR_NAME, HEAP) \
-            XFREE(VAR_NAME, HEAP, DYNAMIC_TYPE_WOLF_BIGINT);
+            XFREE(VAR_NAME, (HEAP), DYNAMIC_TYPE_WOLF_BIGINT);
         #define FREE_ARRAY(VAR_NAME, VAR_ITEMS, HEAP) \
             for (idx##VAR_NAME=0; idx##VAR_NAME<VAR_ITEMS; idx##VAR_NAME++) { \
-                XFREE(VAR_NAME[idx##VAR_NAME], HEAP, DYNAMIC_TYPE_WOLF_BIGINT); \
+                XFREE(VAR_NAME[idx##VAR_NAME], (HEAP), DYNAMIC_TYPE_WOLF_BIGINT); \
             }
+
+        #define DECLARE_ARRAY_DYNAMIC_DEC(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \
+            DECLARE_ARRAY(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP)
+        #define DECLARE_ARRAY_DYNAMIC_EXE(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP)
+        #define FREE_ARRAY_DYNAMIC(VAR_NAME, VAR_ITEMS, HEAP) \
+            FREE_ARRAY(VAR_NAME, VAR_ITEMS, HEAP)
     #else
         #define DECLARE_VAR(VAR_NAME, VAR_TYPE, VAR_SIZE, HEAP) \
             VAR_TYPE VAR_NAME[VAR_SIZE]
@@ -298,31 +414,44 @@
             VAR_TYPE VAR_NAME[VAR_ITEMS][VAR_SIZE]
         #define FREE_VAR(VAR_NAME, HEAP) /* nothing to free, its stack */
         #define FREE_ARRAY(VAR_NAME, VAR_ITEMS, HEAP)  /* nothing to free, its stack */
+
+        #define DECLARE_ARRAY_DYNAMIC_DEC(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \
+            VAR_TYPE* VAR_NAME[VAR_ITEMS]; \
+            int idx##VAR_NAME;
+        #define DECLARE_ARRAY_DYNAMIC_EXE(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \
+            for (idx##VAR_NAME=0; idx##VAR_NAME<VAR_ITEMS; idx##VAR_NAME++) { \
+                VAR_NAME[idx##VAR_NAME] = (VAR_TYPE*)XMALLOC(VAR_SIZE, (HEAP), DYNAMIC_TYPE_TMP_BUFFER); \
+            }
+        #define FREE_ARRAY_DYNAMIC(VAR_NAME, VAR_ITEMS, HEAP) \
+            for (idx##VAR_NAME=0; idx##VAR_NAME<VAR_ITEMS; idx##VAR_NAME++) { \
+                XFREE(VAR_NAME[idx##VAR_NAME], (HEAP), DYNAMIC_TYPE_TMP_BUFFER); \
+            }
     #endif
 
     #if !defined(USE_WOLF_STRTOK) && \
-            (defined(__MINGW32__) || defined(WOLFSSL_TIRTOS) || defined(WOLF_C99))
+            ((defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR)) || \
+             defined(WOLFSSL_TIRTOS) || defined(WOLF_C99))
         #define USE_WOLF_STRTOK
     #endif
     #if !defined(USE_WOLF_STRSEP) && (defined(WOLF_C99))
         #define USE_WOLF_STRSEP
     #endif
 
-	#ifndef STRING_USER
-	    #include <string.h>
-	    #define XMEMCPY(d,s,l)    memcpy((d),(s),(l))
-	    #define XMEMSET(b,c,l)    memset((b),(c),(l))
-	    #define XMEMCMP(s1,s2,n)  memcmp((s1),(s2),(n))
-	    #define XMEMMOVE(d,s,l)   memmove((d),(s),(l))
+    #ifndef STRING_USER
+        #include <string.h>
+        #define XMEMCPY(d,s,l)    memcpy((d),(s),(l))
+        #define XMEMSET(b,c,l)    memset((b),(c),(l))
+        #define XMEMCMP(s1,s2,n)  memcmp((s1),(s2),(n))
+        #define XMEMMOVE(d,s,l)   memmove((d),(s),(l))
 
-	    #define XSTRLEN(s1)       strlen((s1))
-	    #define XSTRNCPY(s1,s2,n) strncpy((s1),(s2),(n))
-	    /* strstr, strncmp, and strncat only used by wolfSSL proper,
+        #define XSTRLEN(s1)       strlen((s1))
+        #define XSTRNCPY(s1,s2,n) strncpy((s1),(s2),(n))
+        /* strstr, strncmp, and strncat only used by wolfSSL proper,
          * not required for wolfCrypt only */
-	    #define XSTRSTR(s1,s2)    strstr((s1),(s2))
-	    #define XSTRNSTR(s1,s2,n) mystrnstr((s1),(s2),(n))
-	    #define XSTRNCMP(s1,s2,n) strncmp((s1),(s2),(n))
-	    #define XSTRNCAT(s1,s2,n) strncat((s1),(s2),(n))
+        #define XSTRSTR(s1,s2)    strstr((s1),(s2))
+        #define XSTRNSTR(s1,s2,n) mystrnstr((s1),(s2),(n))
+        #define XSTRNCMP(s1,s2,n) strncmp((s1),(s2),(n))
+        #define XSTRNCAT(s1,s2,n) strncat((s1),(s2),(n))
 
         #ifdef USE_WOLF_STRSEP
             #define XSTRSEP(s1,d) wc_strsep((s1),(d))
@@ -330,34 +459,76 @@
             #define XSTRSEP(s1,d) strsep((s1),(d))
         #endif
 
-        #if defined(MICROCHIP_PIC32) || defined(WOLFSSL_TIRTOS)
+        #ifndef XSTRNCASECMP
+        #if defined(MICROCHIP_PIC32) || defined(WOLFSSL_TIRTOS) || \
+                defined(WOLFSSL_ZEPHYR)
             /* XC32 does not support strncasecmp, so use case sensitive one */
             #define XSTRNCASECMP(s1,s2,n) strncmp((s1),(s2),(n))
         #elif defined(USE_WINDOWS_API) || defined(FREERTOS_TCP_WINSIM)
-	        #define XSTRNCASECMP(s1,s2,n) _strnicmp((s1),(s2),(n))
+            #define XSTRNCASECMP(s1,s2,n) _strnicmp((s1),(s2),(n))
         #else
             #if defined(HAVE_STRINGS_H) && defined(WOLF_C99) && \
                 !defined(WOLFSSL_SGX)
                 #include <strings.h>
             #endif
-	        #define XSTRNCASECMP(s1,s2,n) strncasecmp((s1),(s2),(n))
-	    #endif
+            #if defined(WOLFSSL_DEOS)
+                #define XSTRNCASECMP(s1,s2,n) strnicmp((s1),(s2),(n))
+            #else
+                #define XSTRNCASECMP(s1,s2,n) strncasecmp((s1),(s2),(n))
+            #endif
+        #endif
+        #endif /* !XSTRNCASECMP */
 
         /* snprintf is used in asn.c for GetTimeString, PKCS7 test, and when
            debugging is turned on */
         #ifndef USE_WINDOWS_API
-            #if defined(NO_FILESYSTEM) && defined(OPENSSL_EXTRA) && \
-               !defined(NO_STDIO_FILESYSTEM)
-                /* case where stdio is not included else where but is needed for
-                 * snprintf */
+            #ifndef XSNPRINTF
+            #if defined(NO_FILESYSTEM) && (defined(OPENSSL_EXTRA) || \
+                   defined(HAVE_PKCS7)) && !defined(NO_STDIO_FILESYSTEM)
+                /* case where stdio is not included else where but is needed
+                   for snprintf */
                 #include <stdio.h>
             #endif
             #define XSNPRINTF snprintf
+            #endif
         #else
-            #define XSNPRINTF _snprintf
-        #endif
+            #if defined(_MSC_VER) || defined(__CYGWIN__) || defined(__MINGW32__)
+                #if defined(_MSC_VER) && (_MSC_VER >= 1900)
+                    /* Beginning with the UCRT in Visual Studio 2015 and
+                       Windows 10, snprintf is no longer identical to
+                       _snprintf. The snprintf function behavior is now
+                       C99 standard compliant. */
+                    #include <stdio.h>
+                    #define XSNPRINTF snprintf
+                #else
+                    /* 4996 warning to use MS extensions e.g., _sprintf_s
+                       instead of _snprintf */
+                    #if !defined(__MINGW32__)
+                    #pragma warning(disable: 4996)
+                    #endif
+                    static WC_INLINE
+                    int xsnprintf(char *buffer, size_t bufsize,
+                            const char *format, ...) {
+                        va_list ap;
+                        int ret;
 
-        #if defined(WOLFSSL_CERT_EXT) || defined(HAVE_ALPN)
+                        if ((int)bufsize <= 0) return -1;
+                        va_start(ap, format);
+                        ret = vsnprintf(buffer, bufsize, format, ap);
+                        if (ret >= (int)bufsize)
+                            ret = -1;
+                        va_end(ap);
+                        return ret;
+                    }
+                    #define XSNPRINTF xsnprintf
+                #endif /* (_MSC_VER >= 1900) */
+            #else
+                #define XSNPRINTF snprintf
+            #endif /* _MSC_VER */
+        #endif /* USE_WINDOWS_API */
+
+        #if defined(WOLFSSL_CERT_EXT) || defined(OPENSSL_EXTRA) \
+                    || defined(HAVE_ALPN)
             /* use only Thread Safe version of strtok */
             #if defined(USE_WOLF_STRTOK)
                 #define XSTRTOK(s1,d,ptr) wc_strtok((s1),(d),(ptr))
@@ -367,7 +538,17 @@
                 #define XSTRTOK(s1,d,ptr) strtok_r((s1),(d),(ptr))
             #endif
         #endif
-	#endif
+
+        #if defined(WOLFSSL_CERT_EXT) || defined(HAVE_OCSP) || \
+            defined(HAVE_CRL_IO) || defined(HAVE_HTTP_CLIENT) || \
+            !defined(NO_CRYPT_BENCHMARK)
+
+            #ifndef XATOI /* if custom XATOI is not already defined */
+                #include <stdlib.h>
+                #define XATOI(s)          atoi((s))
+            #endif
+        #endif
+    #endif
 
     #ifdef USE_WOLF_STRTOK
         WOLFSSL_API char* wc_strtok(char *str, const char *delim, char **nextp);
@@ -384,20 +565,20 @@
         #endif
     #endif /* OPENSSL_EXTRA */
 
-	#ifndef CTYPE_USER
-	    #include <ctype.h>
-	    #if defined(HAVE_ECC) || defined(HAVE_OCSP) || \
+    #ifndef CTYPE_USER
+        #include <ctype.h>
+        #if defined(HAVE_ECC) || defined(HAVE_OCSP) || \
             defined(WOLFSSL_KEY_GEN) || !defined(NO_DSA)
-	        #define XTOUPPER(c)     toupper((c))
-	        #define XISALPHA(c)     isalpha((c))
-	    #endif
-	    /* needed by wolfSSL_check_domain_name() */
-	    #define XTOLOWER(c)      tolower((c))
-	#endif
+            #define XTOUPPER(c)     toupper((c))
+            #define XISALPHA(c)     isalpha((c))
+        #endif
+        /* needed by wolfSSL_check_domain_name() */
+        #define XTOLOWER(c)      tolower((c))
+    #endif
 
 
-	/* memory allocation types for user hints */
-	enum {
+    /* memory allocation types for user hints */
+    enum {
         DYNAMIC_TYPE_CA           = 1,
         DYNAMIC_TYPE_CERT         = 2,
         DYNAMIC_TYPE_KEY          = 3,
@@ -481,24 +662,32 @@
         DYNAMIC_TYPE_DIRCTX       = 81,
         DYNAMIC_TYPE_HASHCTX      = 82,
         DYNAMIC_TYPE_SEED         = 83,
-        DYNAMIC_TYPE_SYMETRIC_KEY = 84,
+        DYNAMIC_TYPE_SYMMETRIC_KEY= 84,
         DYNAMIC_TYPE_ECC_BUFFER   = 85,
         DYNAMIC_TYPE_QSH          = 86,
         DYNAMIC_TYPE_SALT         = 87,
         DYNAMIC_TYPE_HASH_TMP     = 88,
         DYNAMIC_TYPE_BLOB         = 89,
         DYNAMIC_TYPE_NAME_ENTRY   = 90,
-	};
+        DYNAMIC_TYPE_CURVE448     = 91,
+        DYNAMIC_TYPE_ED448        = 92,
+        DYNAMIC_TYPE_SNIFFER_SERVER     = 1000,
+        DYNAMIC_TYPE_SNIFFER_SESSION    = 1001,
+        DYNAMIC_TYPE_SNIFFER_PB         = 1002,
+        DYNAMIC_TYPE_SNIFFER_PB_BUFFER  = 1003,
+        DYNAMIC_TYPE_SNIFFER_TICKET_ID  = 1004,
+        DYNAMIC_TYPE_SNIFFER_NAMED_KEY  = 1005,
+    };
 
-	/* max error buffer string size */
+    /* max error buffer string size */
     #ifndef WOLFSSL_MAX_ERROR_SZ
-	    #define WOLFSSL_MAX_ERROR_SZ 80
+        #define WOLFSSL_MAX_ERROR_SZ 80
     #endif
 
-	/* stack protection */
-	enum {
-	    MIN_STACK_BUFFER = 8
-	};
+    /* stack protection */
+    enum {
+        MIN_STACK_BUFFER = 8
+    };
 
 
     /* Algorithm Types */
@@ -507,12 +696,38 @@
         WC_ALGO_TYPE_HASH = 1,
         WC_ALGO_TYPE_CIPHER = 2,
         WC_ALGO_TYPE_PK = 3,
+        WC_ALGO_TYPE_RNG = 4,
+        WC_ALGO_TYPE_SEED = 5,
+        WC_ALGO_TYPE_HMAC = 6,
 
-        WC_ALGO_TYPE_MAX = WC_ALGO_TYPE_PK
+        WC_ALGO_TYPE_MAX = WC_ALGO_TYPE_HMAC
     };
 
     /* hash types */
     enum wc_HashType {
+    #if defined(HAVE_SELFTEST) || defined(HAVE_FIPS)
+        /* In selftest build, WC_* types are not mapped to WC_HASH_TYPE types.
+         * Values here are based on old selftest hmac.h enum, with additions.
+         * These values are fixed for backwards FIPS compatibility */
+        WC_HASH_TYPE_NONE = 15,
+        WC_HASH_TYPE_MD2 = 16,
+        WC_HASH_TYPE_MD4 = 17,
+        WC_HASH_TYPE_MD5 = 0,
+        WC_HASH_TYPE_SHA = 1, /* SHA-1 (not old SHA-0) */
+        WC_HASH_TYPE_SHA224 = 8,
+        WC_HASH_TYPE_SHA256 = 2,
+        WC_HASH_TYPE_SHA384 = 5,
+        WC_HASH_TYPE_SHA512 = 4,
+        WC_HASH_TYPE_MD5_SHA = 18,
+        WC_HASH_TYPE_SHA3_224 = 10,
+        WC_HASH_TYPE_SHA3_256 = 11,
+        WC_HASH_TYPE_SHA3_384 = 12,
+        WC_HASH_TYPE_SHA3_512 = 13,
+        WC_HASH_TYPE_BLAKE2B = 14,
+        WC_HASH_TYPE_BLAKE2S = 19,
+
+        WC_HASH_TYPE_MAX = WC_HASH_TYPE_BLAKE2S
+    #else
         WC_HASH_TYPE_NONE = 0,
         WC_HASH_TYPE_MD2 = 1,
         WC_HASH_TYPE_MD4 = 2,
@@ -528,8 +743,10 @@
         WC_HASH_TYPE_SHA3_384 = 12,
         WC_HASH_TYPE_SHA3_512 = 13,
         WC_HASH_TYPE_BLAKE2B = 14,
+        WC_HASH_TYPE_BLAKE2S = 15,
 
-        WC_HASH_TYPE_MAX = WC_HASH_TYPE_BLAKE2B
+        WC_HASH_TYPE_MAX = WC_HASH_TYPE_BLAKE2S
+    #endif /* HAVE_SELFTEST */
     };
 
     /* cipher types */
@@ -560,50 +777,54 @@
         WC_PK_TYPE_ECDSA_VERIFY = 5,
         WC_PK_TYPE_ED25519 = 6,
         WC_PK_TYPE_CURVE25519 = 7,
+        WC_PK_TYPE_RSA_KEYGEN = 8,
+        WC_PK_TYPE_EC_KEYGEN = 9,
 
-        WC_PK_TYPE_MAX = WC_PK_TYPE_CURVE25519
+        WC_PK_TYPE_MAX = WC_PK_TYPE_EC_KEYGEN
     };
 
 
-	/* settings detection for compile vs runtime math incompatibilities */
-	enum {
-	#if !defined(USE_FAST_MATH) && !defined(SIZEOF_LONG) && !defined(SIZEOF_LONG_LONG)
-	    CTC_SETTINGS = 0x0
-	#elif !defined(USE_FAST_MATH) && defined(SIZEOF_LONG) && (SIZEOF_LONG == 8)
-	    CTC_SETTINGS = 0x1
-	#elif !defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 8)
-	    CTC_SETTINGS = 0x2
-	#elif !defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 4)
-	    CTC_SETTINGS = 0x4
-	#elif defined(USE_FAST_MATH) && !defined(SIZEOF_LONG) && !defined(SIZEOF_LONG_LONG)
-	    CTC_SETTINGS = 0x8
-	#elif defined(USE_FAST_MATH) && defined(SIZEOF_LONG) && (SIZEOF_LONG == 8)
-	    CTC_SETTINGS = 0x10
-	#elif defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 8)
-	    CTC_SETTINGS = 0x20
-	#elif defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 4)
-	    CTC_SETTINGS = 0x40
-	#else
-	    #error "bad math long / long long settings"
-	#endif
-	};
+    /* settings detection for compile vs runtime math incompatibilities */
+    enum {
+    #if !defined(USE_FAST_MATH) && !defined(SIZEOF_LONG) && !defined(SIZEOF_LONG_LONG)
+        CTC_SETTINGS = 0x0
+    #elif !defined(USE_FAST_MATH) && defined(SIZEOF_LONG) && (SIZEOF_LONG == 8)
+        CTC_SETTINGS = 0x1
+    #elif !defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 8)
+        CTC_SETTINGS = 0x2
+    #elif !defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 4)
+        CTC_SETTINGS = 0x4
+    #elif defined(USE_FAST_MATH) && !defined(SIZEOF_LONG) && !defined(SIZEOF_LONG_LONG)
+        CTC_SETTINGS = 0x8
+    #elif defined(USE_FAST_MATH) && defined(SIZEOF_LONG) && (SIZEOF_LONG == 8)
+        CTC_SETTINGS = 0x10
+    #elif defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 8)
+        CTC_SETTINGS = 0x20
+    #elif defined(USE_FAST_MATH) && defined(SIZEOF_LONG_LONG) && (SIZEOF_LONG_LONG == 4)
+        CTC_SETTINGS = 0x40
+    #else
+        #error "bad math long / long long settings"
+    #endif
+    };
 
 
-	WOLFSSL_API word32 CheckRunTimeSettings(void);
+    WOLFSSL_API word32 CheckRunTimeSettings(void);
 
-	/* If user uses RSA, DH, DSA, or ECC math lib directly then fast math and long
-	   types need to match at compile time and run time, CheckCtcSettings will
-	   return 1 if a match otherwise 0 */
-	#define CheckCtcSettings() (CTC_SETTINGS == CheckRunTimeSettings())
+    /* If user uses RSA, DH, DSA, or ECC math lib directly then fast math and long
+       types need to match at compile time and run time, CheckCtcSettings will
+       return 1 if a match otherwise 0 */
+    #define CheckCtcSettings() (CTC_SETTINGS == CheckRunTimeSettings())
 
-	/* invalid device id */
-	#define INVALID_DEVID    -2
+    /* invalid device id */
+    #define INVALID_DEVID    -2
 
 
-    /* AESNI requires alignment and ARMASM gains some performance from it */
-    #if defined(WOLFSSL_AESNI) || defined(WOLFSSL_ARMASM) || defined(USE_INTEL_SPEEDUP)
+    /* AESNI requires alignment and ARMASM gains some performance from it
+     * Xilinx RSA operations require alignment */
+    #if defined(WOLFSSL_AESNI) || defined(WOLFSSL_ARMASM) || \
+        defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_AFALG_XILINX)
         #if !defined(ALIGN16)
-            #if defined(__GNUC__)
+            #if defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
                 #define ALIGN16 __attribute__ ( (aligned (16)))
             #elif defined(_MSC_VER)
                 /* disable align warning, we want alignment ! */
@@ -615,19 +836,7 @@
         #endif /* !ALIGN16 */
 
         #if !defined (ALIGN32)
-            #if defined (__GNUC__)
-                #define ALIGN32 __attribute__ ( (aligned (32)))
-            #elif defined(_MSC_VER)
-                /* disable align warning, we want alignment ! */
-                #pragma warning(disable: 4324)
-                #define ALIGN32 __declspec (align (32))
-            #else
-                #define ALIGN32
-            #endif
-        #endif
-
-        #if !defined(ALIGN32)
-            #if defined(__GNUC__)
+            #if defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
                 #define ALIGN32 __attribute__ ( (aligned (32)))
             #elif defined(_MSC_VER)
                 /* disable align warning, we want alignment ! */
@@ -638,7 +847,19 @@
             #endif
         #endif /* !ALIGN32 */
 
-        #if defined(__GNUC__)
+        #if !defined(ALIGN64)
+            #if defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
+                #define ALIGN64 __attribute__ ( (aligned (64)))
+            #elif defined(_MSC_VER)
+                /* disable align warning, we want alignment ! */
+                #pragma warning(disable: 4324)
+                #define ALIGN64 __declspec (align (64))
+            #else
+                #define ALIGN64
+            #endif
+        #endif /* !ALIGN64 */
+
+        #if defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
             #define ALIGN128 __attribute__ ( (aligned (128)))
         #elif defined(_MSC_VER)
             /* disable align warning, we want alignment ! */
@@ -648,7 +869,7 @@
             #define ALIGN128
         #endif
 
-        #if defined(__GNUC__)
+        #if defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
             #define ALIGN256 __attribute__ ( (aligned (256)))
         #elif defined(_MSC_VER)
             /* disable align warning, we want alignment ! */
@@ -665,6 +886,9 @@
         #ifndef ALIGN32
             #define ALIGN32
         #endif
+        #ifndef ALIGN64
+            #define ALIGN64
+        #endif
         #ifndef ALIGN128
             #define ALIGN128
         #endif
@@ -682,16 +906,15 @@
     #endif
 
 
-    #ifdef WOLFSSL_RIOT_OS
-        #define EXIT_TEST(ret) exit(ret)
-    #elif defined(HAVE_STACK_SIZE)
+    #if defined(HAVE_STACK_SIZE)
         #define EXIT_TEST(ret) return (void*)((size_t)(ret))
     #else
         #define EXIT_TEST(ret) return ret
     #endif
 
 
-    #if defined(__GNUC__)
+    #if (defined(__IAR_SYSTEMS_ICC__) && (__IAR_SYSTEMS_ICC__ > 8)) || \
+         defined(__GNUC__)
         #define WOLFSSL_PACK __attribute__ ((packed))
     #else
         #define WOLFSSL_PACK
@@ -706,16 +929,23 @@
         #endif
     #endif
 
-    #if defined(__GNUC__)
+    #if defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
         #define WC_NORETURN __attribute__((noreturn))
     #else
         #define WC_NORETURN
     #endif
 
+    #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
+        defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
+        defined(WOLFSSL_PUBLIC_MP) || defined(OPENSSL_EXTRA) || \
+            (defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT))
+        #undef  WC_MP_TO_RADIX
+        #define WC_MP_TO_RADIX
+    #endif
 
-	#ifdef __cplusplus
-	    }   /* extern "C" */
-	#endif
+    #ifdef __cplusplus
+        }   /* extern "C" */
+    #endif
 
 #endif /* WOLF_CRYPT_TYPES_H */
 
--- a/wolfssl/wolfcrypt/visibility.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/visibility.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* visibility.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -43,25 +43,25 @@
 */
 
 #if defined(BUILDING_WOLFSSL)
-    #if defined(HAVE_VISIBILITY) && HAVE_VISIBILITY
-        #define WOLFSSL_API   __attribute__ ((visibility("default")))
-        #define WOLFSSL_LOCAL __attribute__ ((visibility("hidden")))
-    #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
-        #define WOLFSSL_API   __global
-        #define WOLFSSL_LOCAL __hidden
-    #elif defined(_MSC_VER) || defined(__MINGW32__)
+    #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
         #if defined(WOLFSSL_DLL)
             #define WOLFSSL_API __declspec(dllexport)
         #else
             #define WOLFSSL_API
         #endif
         #define WOLFSSL_LOCAL
+    #elif defined(HAVE_VISIBILITY) && HAVE_VISIBILITY
+        #define WOLFSSL_API   __attribute__ ((visibility("default")))
+        #define WOLFSSL_LOCAL __attribute__ ((visibility("hidden")))
+    #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
+        #define WOLFSSL_API   __global
+        #define WOLFSSL_LOCAL __hidden
     #else
         #define WOLFSSL_API
         #define WOLFSSL_LOCAL
     #endif /* HAVE_VISIBILITY */
 #else /* BUILDING_WOLFSSL */
-    #if defined(_MSC_VER) || defined(__MINGW32__)
+    #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
         #if defined(WOLFSSL_DLL)
             #define WOLFSSL_API __declspec(dllimport)
         #else
--- a/wolfssl/wolfcrypt/wc_encrypt.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/wc_encrypt.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wc_encrypt.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -51,7 +51,7 @@
 #endif
 
 
-#ifndef NO_AES
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
 WOLFSSL_API int wc_AesCbcEncryptWithKey(byte* out, const byte* in, word32 inSz,
                                         const byte* key, word32 keySz,
                                         const byte* iv);
@@ -90,7 +90,7 @@
 #ifndef NO_PWDBASED
     WOLFSSL_LOCAL int wc_CryptKey(const char* password, int passwordSz, 
         byte* salt, int saltSz, int iterations, int id, byte* input, int length,
-        int version, byte* cbcIv, int enc);
+        int version, byte* cbcIv, int enc, int shaOid);
 #endif
 
 #ifdef __cplusplus
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wolfssl/wolfcrypt/wc_pkcs11.h	Thu Jun 04 23:57:22 2020 +0000
@@ -0,0 +1,95 @@
+/* wc_pkcs11.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef _WOLFPKCS11_H_
+#define _WOLFPKCS11_H_
+
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef HAVE_PKCS11
+
+#ifndef WOLF_CRYPTO_CB
+    #error PKCS11 support requires ./configure --enable-cryptocb or WOLF_CRYPTO_CB to be defined
+#endif
+
+#include <wolfssl/wolfcrypt/cryptocb.h>
+#include <wolfssl/wolfcrypt/pkcs11.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+
+typedef struct Pkcs11Dev {
+    void*             dlHandle;         /* Handle to library  */
+    CK_FUNCTION_LIST* func;             /* Array of functions */
+    void*             heap;
+} Pkcs11Dev;
+
+typedef struct Pkcs11Token {
+    CK_FUNCTION_LIST* func;             /* Table of PKCS#11 function from lib */
+    CK_SLOT_ID        slotId;           /* Id of slot to use                  */
+    CK_SESSION_HANDLE handle;           /* Handle to active session           */
+    CK_UTF8CHAR_PTR   userPin;          /* User's PIN to login with           */
+    CK_ULONG          userPinSz;        /* Size of user's PIN in bytes        */
+} Pkcs11Token;
+
+typedef struct Pkcs11Session {
+    CK_FUNCTION_LIST* func;             /* Table of PKCS#11 function from lib */
+    CK_SLOT_ID        slotId;           /* Id of slot to use                  */
+    CK_SESSION_HANDLE handle;           /* Handle to active session           */
+} Pkcs11Session;
+
+/* Types of keys that can be stored. */
+enum Pkcs11KeyType {
+    PKCS11_KEY_TYPE_AES_GCM,
+    PKCS11_KEY_TYPE_AES_CBC,
+    PKCS11_KEY_TYPE_HMAC,
+    PKCS11_KEY_TYPE_RSA,
+    PKCS11_KEY_TYPE_EC,
+};
+
+
+WOLFSSL_API int wc_Pkcs11_Initialize(Pkcs11Dev* dev, const char* library,
+                                     void* heap);
+WOLFSSL_API void wc_Pkcs11_Finalize(Pkcs11Dev* dev);
+
+WOLFSSL_API int wc_Pkcs11Token_Init(Pkcs11Token* token, Pkcs11Dev* dev,
+    int slotId, const char* tokenName, const unsigned char *userPin,
+    int userPinSz);
+WOLFSSL_API void wc_Pkcs11Token_Final(Pkcs11Token* token);
+WOLFSSL_API int wc_Pkcs11Token_Open(Pkcs11Token* token, int readWrite);
+WOLFSSL_API void wc_Pkcs11Token_Close(Pkcs11Token* token);
+
+WOLFSSL_API int wc_Pkcs11StoreKey(Pkcs11Token* token, int type, int clear,
+    void* key);
+
+WOLFSSL_API int wc_Pkcs11_CryptoDevCb(int devId, wc_CryptoInfo* info,
+    void* ctx);
+
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+
+#endif /* HAVE_PKCS11 */
+
+#endif /* _WOLFPKCS11_H_ */
+
--- a/wolfssl/wolfcrypt/wc_port.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/wc_port.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,538 +1,781 @@
-/* wc_port.h
- *
- * Copyright (C) 2006-2017 wolfSSL Inc.
- *
- * This file is part of wolfSSL.
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
- */
-
-/*!
-    \file wolfssl/wolfcrypt/wc_port.h
-*/
-
-#ifndef WOLF_CRYPT_PORT_H
-#define WOLF_CRYPT_PORT_H
-
-#include <wolfssl/wolfcrypt/settings.h>
-#include <wolfssl/wolfcrypt/visibility.h>
-
-#ifdef __cplusplus
-    extern "C" {
-#endif
-
-/* Detect if compiler supports C99. "NO_WOLF_C99" can be defined in
- * user_settings.h to disable checking for C99 support. */
-#if !defined(WOLF_C99) && defined(__STDC_VERSION__) && \
-    !defined(WOLFSSL_ARDUINO) && !defined(NO_WOLF_C99)
-    #if __STDC_VERSION__ >= 199901L
-        #define WOLF_C99
-    #endif
-#endif
-
-#ifdef USE_WINDOWS_API
-    #ifdef WOLFSSL_GAME_BUILD
-        #include "system/xtl.h"
-    #else
-        #ifndef WIN32_LEAN_AND_MEAN
-            #define WIN32_LEAN_AND_MEAN
-        #endif
-        #ifndef WOLFSSL_SGX
-            #if defined(_WIN32_WCE) || defined(WIN32_LEAN_AND_MEAN)
-                /* On WinCE winsock2.h must be included before windows.h */
-                #include <winsock2.h>
-            #endif
-            #include <windows.h>
-        #endif /* WOLFSSL_SGX */
-    #endif
-#elif defined(THREADX)
-    #ifndef SINGLE_THREADED
-        #ifdef NEED_THREADX_TYPES
-            #include <types.h>
-        #endif
-        #include <tx_api.h>
-    #endif
-#elif defined(MICRIUM)
-    /* do nothing, just don't pick Unix */
-#elif defined(FREERTOS) || defined(FREERTOS_TCP) || defined(WOLFSSL_SAFERTOS)
-    /* do nothing */
-#elif defined(EBSNET)
-    /* do nothing */
-#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-    /* do nothing */
-#elif defined(FREESCALE_FREE_RTOS)
-    #include "fsl_os_abstraction.h"
-#elif defined(WOLFSSL_uITRON4)
-    #include "stddef.h"
-    #include "kernel.h"
-#elif  defined(WOLFSSL_uTKERNEL2)
-    #include "tk/tkernel.h"
-#elif defined(WOLFSSL_CMSIS_RTOS)
-    #include "cmsis_os.h"
-#elif defined(WOLFSSL_MDK_ARM)
-    #if defined(WOLFSSL_MDK5)
-        #include "cmsis_os.h"
-    #else
-        #include <rtl.h>
-    #endif
-#elif defined(WOLFSSL_CMSIS_RTOS)
-    #include "cmsis_os.h"
-#elif defined(WOLFSSL_TIRTOS)
-    #include <ti/sysbios/BIOS.h>
-    #include <ti/sysbios/knl/Semaphore.h>
-#elif defined(WOLFSSL_FROSTED)
-    #include <semaphore.h>
-#elif defined(INTIME_RTOS)
-    #include <rt.h>
-    #include <io.h>
-#elif defined(WOLFSSL_NUCLEUS_1_2)
-    /* NU_DEBUG needed struct access in nucleus_realloc */
-    #define NU_DEBUG
-    #include "plus/nucleus.h"
-    #include "nucleus.h"
-#else
-    #ifndef SINGLE_THREADED
-        #define WOLFSSL_PTHREADS
-        #include <pthread.h>
-    #endif
-    #if defined(OPENSSL_EXTRA) || defined(GOAHEAD_WS)
-        #include <unistd.h>      /* for close of BIO */
-    #endif
-#endif
-
-/* For FIPS keep the function names the same */
-#ifdef HAVE_FIPS
-#define wc_InitMutex   InitMutex
-#define wc_FreeMutex   FreeMutex
-#define wc_LockMutex   LockMutex
-#define wc_UnLockMutex UnLockMutex
-#endif /* HAVE_FIPS */
-
-#ifdef SINGLE_THREADED
-    typedef int wolfSSL_Mutex;
-#else /* MULTI_THREADED */
-    /* FREERTOS comes first to enable use of FreeRTOS Windows simulator only */
-    #if defined(FREERTOS)
-        typedef xSemaphoreHandle wolfSSL_Mutex;
-    #elif defined(FREERTOS_TCP)
-        #include "FreeRTOS.h"
-        #include "semphr.h"
-		typedef SemaphoreHandle_t  wolfSSL_Mutex;
-    #elif defined(WOLFSSL_SAFERTOS)
-        typedef struct wolfSSL_Mutex {
-            signed char mutexBuffer[portQUEUE_OVERHEAD_BYTES];
-            xSemaphoreHandle mutex;
-        } wolfSSL_Mutex;
-    #elif defined(USE_WINDOWS_API)
-        typedef CRITICAL_SECTION wolfSSL_Mutex;
-    #elif defined(WOLFSSL_PTHREADS)
-        typedef pthread_mutex_t wolfSSL_Mutex;
-    #elif defined(THREADX)
-        typedef TX_MUTEX wolfSSL_Mutex;
-    #elif defined(MICRIUM)
-        typedef OS_MUTEX wolfSSL_Mutex;
-    #elif defined(EBSNET)
-        typedef RTP_MUTEX wolfSSL_Mutex;
-    #elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-        typedef MUTEX_STRUCT wolfSSL_Mutex;
-    #elif defined(FREESCALE_FREE_RTOS)
-        typedef mutex_t wolfSSL_Mutex;
-    #elif defined(WOLFSSL_uITRON4)
-        typedef struct wolfSSL_Mutex {
-            T_CSEM sem ;
-            ID     id ;
-        } wolfSSL_Mutex;
-    #elif defined(WOLFSSL_uTKERNEL2)
-        typedef struct wolfSSL_Mutex {
-            T_CSEM sem ;
-            ID     id ;
-        } wolfSSL_Mutex;
-    #elif defined(WOLFSSL_MDK_ARM)
-        #if defined(WOLFSSL_CMSIS_RTOS)
-            typedef osMutexId wolfSSL_Mutex;
-        #else
-            typedef OS_MUT wolfSSL_Mutex;
-        #endif
-    #elif defined(WOLFSSL_CMSIS_RTOS)
-        typedef osMutexId wolfSSL_Mutex;
-    #elif defined(WOLFSSL_TIRTOS)
-        typedef ti_sysbios_knl_Semaphore_Handle wolfSSL_Mutex;
-    #elif defined(WOLFSSL_FROSTED)
-        typedef mutex_t * wolfSSL_Mutex;
-    #elif defined(INTIME_RTOS)
-        typedef RTHANDLE wolfSSL_Mutex;
-    #elif defined(WOLFSSL_NUCLEUS_1_2)
-        typedef NU_SEMAPHORE wolfSSL_Mutex;
-    #else
-        #error Need a mutex type in multithreaded mode
-    #endif /* USE_WINDOWS_API */
-#endif /* SINGLE_THREADED */
-
-/* Enable crypt HW mutex for Freescale MMCAU or PIC32MZ */
-#if defined(FREESCALE_MMCAU) || defined(WOLFSSL_MICROCHIP_PIC32MZ)
-    #ifndef WOLFSSL_CRYPT_HW_MUTEX
-        #define WOLFSSL_CRYPT_HW_MUTEX  1
-    #endif
-#endif /* FREESCALE_MMCAU */
-
-#ifndef WOLFSSL_CRYPT_HW_MUTEX
-    #define WOLFSSL_CRYPT_HW_MUTEX  0
-#endif
-
-#if WOLFSSL_CRYPT_HW_MUTEX
-    /* wolfSSL_CryptHwMutexInit is called on first wolfSSL_CryptHwMutexLock,
-       however it's recommended to call this directly on Hw init to avoid possible
-       race condition where two calls to wolfSSL_CryptHwMutexLock are made at
-       the same time. */
-    int wolfSSL_CryptHwMutexInit(void);
-    int wolfSSL_CryptHwMutexLock(void);
-    int wolfSSL_CryptHwMutexUnLock(void);
-#else
-    /* Define stubs, since HW mutex is disabled */
-    #define wolfSSL_CryptHwMutexInit()      0 /* Success */
-    #define wolfSSL_CryptHwMutexLock()      0 /* Success */
-    #define wolfSSL_CryptHwMutexUnLock()    (void)0 /* Success */
-#endif /* WOLFSSL_CRYPT_HW_MUTEX */
-
-/* Mutex functions */
-WOLFSSL_API int wc_InitMutex(wolfSSL_Mutex*);
-WOLFSSL_API wolfSSL_Mutex* wc_InitAndAllocMutex(void);
-WOLFSSL_API int wc_FreeMutex(wolfSSL_Mutex*);
-WOLFSSL_API int wc_LockMutex(wolfSSL_Mutex*);
-WOLFSSL_API int wc_UnLockMutex(wolfSSL_Mutex*);
-#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-/* dynamiclly set which mutex to use. unlock / lock is controlled by flag */
-typedef void (mutex_cb)(int flag, int type, const char* file, int line);
-
-WOLFSSL_API int wc_LockMutex_ex(int flag, int type, const char* file, int line);
-WOLFSSL_API int wc_SetMutexCb(mutex_cb* cb);
-#endif
-
-/* main crypto initialization function */
-WOLFSSL_API int wolfCrypt_Init(void);
-WOLFSSL_API int wolfCrypt_Cleanup(void);
-
-/* filesystem abstraction layer, used by ssl.c */
-#ifndef NO_FILESYSTEM
-
-#if defined(EBSNET)
-    #include "vfapi.h"
-    #include "vfile.h"
+/* wc_port.h
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/*!
+    \file wolfssl/wolfcrypt/wc_port.h
+*/
+
+#ifndef WOLF_CRYPT_PORT_H
+#define WOLF_CRYPT_PORT_H
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/visibility.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/* Detect if compiler supports C99. "NO_WOLF_C99" can be defined in
+ * user_settings.h to disable checking for C99 support. */
+#if !defined(WOLF_C99) && defined(__STDC_VERSION__) && \
+    !defined(WOLFSSL_ARDUINO) && !defined(NO_WOLF_C99)
+    #if __STDC_VERSION__ >= 199901L
+        #define WOLF_C99
+    #endif
+#endif
+
+
+/* GENERIC INCLUDE SECTION */
+#if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+    #include <mqx.h>
+    #if (defined(MQX_USE_IO_OLD) && MQX_USE_IO_OLD) || \
+        defined(FREESCALE_MQX_5_0)
+        #include <fio.h>
+    #else
+        #include <nio.h>
+    #endif
+#endif
+
+
+/* THREADING/MUTEX SECTION */
+#ifdef USE_WINDOWS_API
+    #ifdef WOLFSSL_GAME_BUILD
+        #include "system/xtl.h"
+    #else
+        #ifndef WIN32_LEAN_AND_MEAN
+            #define WIN32_LEAN_AND_MEAN
+        #endif
+        #ifndef WOLFSSL_SGX
+            #if defined(_WIN32_WCE) || defined(WIN32_LEAN_AND_MEAN)
+                /* On WinCE winsock2.h must be included before windows.h */
+                #include <winsock2.h>
+            #endif
+            #include <windows.h>
+        #endif /* WOLFSSL_SGX */
+    #endif
+#elif defined(THREADX)
+    #ifndef SINGLE_THREADED
+        #ifdef NEED_THREADX_TYPES
+            #include <types.h>
+        #endif
+        #include <tx_api.h>
+    #endif
+#elif defined(WOLFSSL_DEOS)
+    #include "mutexapi.h"
+#elif defined(MICRIUM)
+    /* do nothing, just don't pick Unix */
+#elif defined(FREERTOS) || defined(FREERTOS_TCP) || defined(WOLFSSL_SAFERTOS)
+    /* do nothing */
+#elif defined(EBSNET)
+    /* do nothing */
+#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+    /* do nothing */
+#elif defined(FREESCALE_FREE_RTOS)
+    #include "fsl_os_abstraction.h"
+#elif defined(WOLFSSL_VXWORKS)
+    #include <semLib.h>
+#elif defined(WOLFSSL_uITRON4)
+    #include "stddef.h"
+    #include "kernel.h"
+#elif  defined(WOLFSSL_uTKERNEL2)
+    #include "tk/tkernel.h"
+#elif defined(WOLFSSL_CMSIS_RTOS)
+    #include "cmsis_os.h"
+#elif defined(WOLFSSL_CMSIS_RTOSv2)
+    #include "cmsis_os2.h"
+#elif defined(WOLFSSL_MDK_ARM)
+    #if defined(WOLFSSL_MDK5)
+        #include "cmsis_os.h"
+    #else
+        #include <rtl.h>
+    #endif
+#elif defined(WOLFSSL_CMSIS_RTOS)
+    #include "cmsis_os.h"
+#elif defined(WOLFSSL_TIRTOS)
+    #include <ti/sysbios/BIOS.h>
+    #include <ti/sysbios/knl/Semaphore.h>
+#elif defined(WOLFSSL_FROSTED)
+    #include <semaphore.h>
+#elif defined(INTIME_RTOS)
+    #include <rt.h>
+    #include <io.h>
+#elif defined(WOLFSSL_NUCLEUS_1_2)
+    /* NU_DEBUG needed struct access in nucleus_realloc */
+    #define NU_DEBUG
+    #include "plus/nucleus.h"
+    #include "nucleus.h"
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+    /* do nothing */
+#elif defined(WOLFSSL_ZEPHYR)
+    #ifndef SINGLE_THREADED
+        #include <kernel.h>
+    #endif
+#elif defined(WOLFSSL_TELIT_M2MB)
+
+    /* Telit SDK uses C++ compile option (--cpp), which causes link issue
+        to API's if wrapped in extern "C" */
+    #ifdef __cplusplus
+        }  /* extern "C" */
+    #endif
+
+    #include "m2mb_types.h"
+    #include "m2mb_os_types.h"
+    #include "m2mb_os_api.h"
+    #include "m2mb_os.h"
+    #include "m2mb_os_mtx.h"
+    #ifndef NO_ASN_TIME
+    #include "m2mb_rtc.h"
+    #endif
+    #ifndef NO_FILESYSTEM
+    #include "m2mb_fs_posix.h"
+    #endif
+
+    #undef kB /* eliminate conflict in asn.h */
+
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
+
+#else
+    #ifndef SINGLE_THREADED
+        #define WOLFSSL_PTHREADS
+        #include <pthread.h>
+    #endif
+    #if (defined(OPENSSL_EXTRA) || defined(GOAHEAD_WS)) && \
+        !defined(NO_FILESYSTEM)
+        #include <unistd.h>      /* for close of BIO */
+    #endif
+#endif
+
+/* For FIPS keep the function names the same */
+#ifdef HAVE_FIPS
+#define wc_InitMutex   InitMutex
+#define wc_FreeMutex   FreeMutex
+#define wc_LockMutex   LockMutex
+#define wc_UnLockMutex UnLockMutex
+#endif /* HAVE_FIPS */
+
+#ifdef SINGLE_THREADED
+    typedef int wolfSSL_Mutex;
+#else /* MULTI_THREADED */
+    /* FREERTOS comes first to enable use of FreeRTOS Windows simulator only */
+    #if defined(FREERTOS)
+        typedef xSemaphoreHandle wolfSSL_Mutex;
+    #elif defined(FREERTOS_TCP)
+        #include "FreeRTOS.h"
+        #include "semphr.h"
+		typedef SemaphoreHandle_t  wolfSSL_Mutex;
+    #elif defined(WOLFSSL_SAFERTOS)
+        typedef struct wolfSSL_Mutex {
+            signed char mutexBuffer[portQUEUE_OVERHEAD_BYTES];
+            xSemaphoreHandle mutex;
+        } wolfSSL_Mutex;
+    #elif defined(USE_WINDOWS_API)
+        typedef CRITICAL_SECTION wolfSSL_Mutex;
+    #elif defined(WOLFSSL_PTHREADS)
+        typedef pthread_mutex_t wolfSSL_Mutex;
+    #elif defined(THREADX)
+        typedef TX_MUTEX wolfSSL_Mutex;
+    #elif defined(WOLFSSL_DEOS)
+        typedef mutex_handle_t wolfSSL_Mutex;
+    #elif defined(MICRIUM)
+        typedef OS_MUTEX wolfSSL_Mutex;
+    #elif defined(EBSNET)
+        typedef RTP_MUTEX wolfSSL_Mutex;
+    #elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+        typedef MUTEX_STRUCT wolfSSL_Mutex;
+    #elif defined(FREESCALE_FREE_RTOS)
+        typedef mutex_t wolfSSL_Mutex;
+    #elif defined(WOLFSSL_VXWORKS)
+        typedef SEM_ID wolfSSL_Mutex;
+    #elif defined(WOLFSSL_uITRON4)
+        typedef struct wolfSSL_Mutex {
+            T_CSEM sem ;
+            ID     id ;
+        } wolfSSL_Mutex;
+    #elif defined(WOLFSSL_uTKERNEL2)
+        typedef struct wolfSSL_Mutex {
+            T_CSEM sem ;
+            ID     id ;
+        } wolfSSL_Mutex;
+    #elif defined(WOLFSSL_MDK_ARM)
+        #if defined(WOLFSSL_CMSIS_RTOS)
+            typedef osMutexId wolfSSL_Mutex;
+        #else
+            typedef OS_MUT wolfSSL_Mutex;
+        #endif
+    #elif defined(WOLFSSL_CMSIS_RTOS)
+        typedef osMutexId wolfSSL_Mutex;
+    #elif defined(WOLFSSL_CMSIS_RTOSv2)
+        typedef osMutexId_t wolfSSL_Mutex;
+    #elif defined(WOLFSSL_TIRTOS)
+        typedef ti_sysbios_knl_Semaphore_Handle wolfSSL_Mutex;
+    #elif defined(WOLFSSL_FROSTED)
+        typedef mutex_t * wolfSSL_Mutex;
+    #elif defined(INTIME_RTOS)
+        typedef RTHANDLE wolfSSL_Mutex;
+    #elif defined(WOLFSSL_NUCLEUS_1_2)
+        typedef NU_SEMAPHORE wolfSSL_Mutex;
+    #elif defined(WOLFSSL_ZEPHYR)
+        typedef struct k_mutex wolfSSL_Mutex;
+    #elif defined(WOLFSSL_TELIT_M2MB)
+        typedef M2MB_OS_MTX_HANDLE wolfSSL_Mutex;
+    #else
+        #error Need a mutex type in multithreaded mode
+    #endif /* USE_WINDOWS_API */
+#endif /* SINGLE_THREADED */
+
+/* Enable crypt HW mutex for Freescale MMCAU, PIC32MZ or STM32 */
+#if defined(FREESCALE_MMCAU) || defined(WOLFSSL_MICROCHIP_PIC32MZ) || \
+    defined(STM32_CRYPTO)
+    #ifndef WOLFSSL_CRYPT_HW_MUTEX
+        #define WOLFSSL_CRYPT_HW_MUTEX  1
+    #endif
+#endif /* FREESCALE_MMCAU */
+
+#ifndef WOLFSSL_CRYPT_HW_MUTEX
+    #define WOLFSSL_CRYPT_HW_MUTEX  0
+#endif
+
+#if WOLFSSL_CRYPT_HW_MUTEX
+    /* wolfSSL_CryptHwMutexInit is called on first wolfSSL_CryptHwMutexLock,
+       however it's recommended to call this directly on Hw init to avoid possible
+       race condition where two calls to wolfSSL_CryptHwMutexLock are made at
+       the same time. */
+    int wolfSSL_CryptHwMutexInit(void);
+    int wolfSSL_CryptHwMutexLock(void);
+    int wolfSSL_CryptHwMutexUnLock(void);
+#else
+    /* Define stubs, since HW mutex is disabled */
+    #define wolfSSL_CryptHwMutexInit()      0 /* Success */
+    #define wolfSSL_CryptHwMutexLock()      0 /* Success */
+    #define wolfSSL_CryptHwMutexUnLock()    (void)0 /* Success */
+#endif /* WOLFSSL_CRYPT_HW_MUTEX */
+
+/* Mutex functions */
+WOLFSSL_API int wc_InitMutex(wolfSSL_Mutex*);
+WOLFSSL_API wolfSSL_Mutex* wc_InitAndAllocMutex(void);
+WOLFSSL_API int wc_FreeMutex(wolfSSL_Mutex*);
+WOLFSSL_API int wc_LockMutex(wolfSSL_Mutex*);
+WOLFSSL_API int wc_UnLockMutex(wolfSSL_Mutex*);
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+/* dynamically set which mutex to use. unlock / lock is controlled by flag */
+typedef void (mutex_cb)(int flag, int type, const char* file, int line);
+
+WOLFSSL_API int wc_LockMutex_ex(int flag, int type, const char* file, int line);
+WOLFSSL_API int wc_SetMutexCb(mutex_cb* cb);
+#endif
+
+/* main crypto initialization function */
+WOLFSSL_API int wolfCrypt_Init(void);
+WOLFSSL_API int wolfCrypt_Cleanup(void);
+
+
+/* FILESYSTEM SECTION */
+/* filesystem abstraction layer, used by ssl.c */
+#ifndef NO_FILESYSTEM
+
+#if defined(EBSNET)
+    #include "vfapi.h"
+    #include "vfile.h"
+
+    int ebsnet_fseek(int a, long b, int c); /* Not prototyped in vfile.h per
+                                             * EBSnet feedback */
+
+    #define XFILE                    int
+    #define XFOPEN(NAME, MODE)       vf_open((const char *)NAME, VO_RDONLY, 0);
+    #define XFSEEK                   ebsnet_fseek
+    #define XFTELL                   vf_tell
+    #define XREWIND                  vf_rewind
+    #define XFREAD(BUF, SZ, AMT, FD) vf_read(FD, BUF, SZ*AMT)
+    #define XFWRITE(BUF, SZ, AMT, FD) vf_write(FD, BUF, SZ*AMT)
+    #define XFCLOSE                  vf_close
+    #define XSEEK_END                VSEEK_END
+    #define XBADFILE                 -1
+    #define XFGETS(b,s,f)            -2 /* Not ported yet */
+#elif defined(LSR_FS)
+    #include <fs.h>
+    #define XFILE                   struct fs_file*
+    #define XFOPEN(NAME, MODE)      fs_open((char*)NAME);
+    #define XFSEEK(F, O, W)         (void)F
+    #define XFTELL(F)               (F)->len
+    #define XREWIND(F)              (void)F
+    #define XFREAD(BUF, SZ, AMT, F) fs_read(F, (char*)BUF, SZ*AMT)
+    #define XFWRITE(BUF, SZ, AMT, F) fs_write(F, (char*)BUF, SZ*AMT)
+    #define XFCLOSE                 fs_close
+    #define XSEEK_END               0
+    #define XBADFILE                NULL
+    #define XFGETS(b,s,f)            -2 /* Not ported yet */
+#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+    #define XFILE                   MQX_FILE_PTR
+    #define XFOPEN                  fopen
+    #define XFSEEK                  fseek
+    #define XFTELL                  ftell
+    #define XREWIND(F)              fseek(F, 0, IO_SEEK_SET)
+    #define XFREAD                  fread
+    #define XFWRITE                 fwrite
+    #define XFCLOSE                 fclose
+    #define XSEEK_END               IO_SEEK_END
+    #define XBADFILE                NULL
+    #define XFGETS                  fgets
+#elif defined(WOLFSSL_DEOS)
+    #define NO_FILESYSTEM
+    #warning "TODO - DDC-I Certifiable Fast File System for Deos is not integrated"
+    //#define XFILE      bfd *
+
+#elif defined(MICRIUM)
+    #include <fs_api.h>
+    #define XFILE      FS_FILE*
+    #define XFOPEN     fs_fopen
+    #define XFSEEK     fs_fseek
+    #define XFTELL     fs_ftell
+    #define XREWIND    fs_rewind
+    #define XFREAD     fs_fread
+    #define XFWRITE    fs_fwrite
+    #define XFCLOSE    fs_fclose
+    #define XSEEK_END  FS_SEEK_END
+    #define XBADFILE   NULL
+    #define XFGETS(b,s,f) -2 /* Not ported yet */
+#elif defined(WOLFSSL_NUCLEUS_1_2)
+    #include "fal/inc/fal.h"
+    #define XFILE      FILE*
+    #define XFOPEN     fopen
+    #define XFSEEK     fseek
+    #define XFTELL     ftell
+    #define XREWIND    rewind
+    #define XFREAD     fread
+    #define XFWRITE    fwrite
+    #define XFCLOSE    fclose
+    #define XSEEK_END  PSEEK_END
+    #define XBADFILE   NULL
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+    #include <fs/fs.h>
+    #define XFILE  struct fs_file*
+
+    #define XFOPEN     mynewt_fopen
+    #define XFSEEK     mynewt_fseek
+    #define XFTELL     mynewt_ftell
+    #define XREWIND    mynewt_rewind
+    #define XFREAD     mynewt_fread
+    #define XFWRITE    mynewt_fwrite
+    #define XFCLOSE    mynewt_fclose
+    #define XSEEK_END  2
+    #define XBADFILE   NULL
+    #define XFGETS(b,s,f) -2 /* Not ported yet */
+#elif defined(WOLFSSL_ZEPHYR)
+    #include <fs.h>
+
+    #define XFILE      struct fs_file_t*
+    #define STAT       struct fs_dirent
+
+    XFILE z_fs_open(const char* filename, const char* perm);
+    int z_fs_close(XFILE file);
+
+    #define XFOPEN              z_fs_open
+    #define XFCLOSE             z_fs_close
+    #define XFSEEK              fs_seek
+    #define XFTELL              fs_tell
+    #define XFREWIND            fs_rewind
+    #define XREWIND(F)          fs_seek(F, 0, FS_SEEK_SET)
+    #define XFREAD(P,S,N,F)     fs_read(F, P, S*N)
+    #define XFWRITE(P,S,N,F)    fs_write(F, P, S*N)
+    #define XSEEK_END           FS_SEEK_END
+    #define XBADFILE            NULL
+    #define XFGETS(b,s,f)       -2 /* Not ported yet */
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+    #define XFILE                    INT32
+    #define XFOPEN(NAME, MODE)       m2mb_fs_open((NAME), 0, (MODE))
+    #define XFSEEK(F, O, W)          m2mb_fs_lseek((F), (O), (W))
+    #define XFTELL(F)                m2mb_fs_lseek((F), 0, M2MB_SEEK_END)
+    #define XREWIND(F)               (void)F
+    #define XFREAD(BUF, SZ, AMT, F)  m2mb_fs_read((F), (BUF), (SZ)*(AMT))
+    #define XFWRITE(BUF, SZ, AMT, F) m2mb_fs_write((F), (BUF), (SZ)*(AMT))
+    #define XFCLOSE                  m2mb_fs_close
+    #define XSEEK_END                M2MB_SEEK_END
+    #define XBADFILE                 -1
+    #define XFGETS(b,s,f)            -2 /* Not ported yet */
+
+#elif defined(WOLFSSL_USER_FILESYSTEM)
+    /* To be defined in user_settings.h */
+#else
+    /* stdio, default case */
+    #include <stdio.h>
+    #define XFILE      FILE*
+    #if defined(WOLFSSL_MDK_ARM)
+        extern FILE * wolfSSL_fopen(const char *name, const char *mode) ;
+        #define XFOPEN     wolfSSL_fopen
+    #else
+        #define XFOPEN     fopen
+    #endif
+    #define XFSEEK     fseek
+    #define XFTELL     ftell
+    #define XREWIND    rewind
+    #define XFREAD     fread
+    #define XFWRITE    fwrite
+    #define XFCLOSE    fclose
+    #define XSEEK_END  SEEK_END
+    #define XBADFILE   NULL
+    #define XFGETS     fgets
+
+    #if !defined(USE_WINDOWS_API) && !defined(NO_WOLFSSL_DIR)\
+        && !defined(WOLFSSL_NUCLEUS) && !defined(WOLFSSL_NUCLEUS_1_2)
+        #include <dirent.h>
+        #include <unistd.h>
+        #include <sys/stat.h>
+    #endif
+#endif
+
+    #ifndef MAX_FILENAME_SZ
+        #define MAX_FILENAME_SZ  256 /* max file name length */
+    #endif
+    #ifndef MAX_PATH
+        #define MAX_PATH 256
+    #endif
+
+#if !defined(NO_WOLFSSL_DIR) && !defined(WOLFSSL_NUCLEUS) && \
+    !defined(WOLFSSL_NUCLEUS_1_2)
+    typedef struct ReadDirCtx {
+    #ifdef USE_WINDOWS_API
+        WIN32_FIND_DATAA FindFileData;
+        HANDLE hFind;
+    #elif defined(WOLFSSL_ZEPHYR)
+        struct fs_dirent entry;
+        struct fs_dir_t  dir;
+        struct fs_dirent s;
+        struct fs_dir_t* dirp;
+
+    #elif defined(WOLFSSL_TELIT_M2MB)
+        M2MB_DIR_T* dir;
+        struct M2MB_DIRENT* entry;
+        struct M2MB_STAT s;
+    #else
+        struct dirent* entry;
+        DIR*   dir;
+        struct stat s;
+    #endif
+        char name[MAX_FILENAME_SZ];
+    } ReadDirCtx;
+
+    #define WC_READDIR_NOFILE -1
+
+    WOLFSSL_API int wc_ReadDirFirst(ReadDirCtx* ctx, const char* path, char** name);
+    WOLFSSL_API int wc_ReadDirNext(ReadDirCtx* ctx, const char* path, char** name);
+    WOLFSSL_API void wc_ReadDirClose(ReadDirCtx* ctx);
+#endif /* !NO_WOLFSSL_DIR */
+
+#endif /* !NO_FILESYSTEM */
+
+
+/* MIN/MAX MACRO SECTION */
+/* Windows API defines its own min() macro. */
+#if defined(USE_WINDOWS_API)
+    #if defined(min) || defined(WOLFSSL_MYSQL_COMPATIBLE)
+        #define WOLFSSL_HAVE_MIN
+    #endif /* min */
+    #if defined(max) || defined(WOLFSSL_MYSQL_COMPATIBLE)
+        #define WOLFSSL_HAVE_MAX
+    #endif /* max */
+#endif /* USE_WINDOWS_API */
+
+
+/* TIME SECTION */
+/* Time functions */
+#ifndef NO_ASN_TIME
+#if defined(USER_TIME)
+    /* Use our gmtime and time_t/struct tm types.
+       Only needs seconds since EPOCH using XTIME function.
+       time_t XTIME(time_t * timer) {}
+    */
+    #define WOLFSSL_GMTIME
+    #ifndef HAVE_TM_TYPE
+        #define USE_WOLF_TM
+    #endif
+    #ifndef HAVE_TIME_T_TYPE
+        #define USE_WOLF_TIME_T
+    #endif
+
+#elif defined(TIME_OVERRIDES)
+    /* Override XTIME() and XGMTIME() functionality.
+       Requires user to provide these functions:
+        time_t XTIME(time_t * timer) {}
+        struct tm* XGMTIME(const time_t* timer, struct tm* tmp) {}
+    */
+    #ifndef HAVE_TIME_T_TYPE
+        #define USE_WOLF_TIME_T
+    #endif
+    #ifndef HAVE_TM_TYPE
+        #define USE_WOLF_TM
+    #endif
+    #define NEED_TMP_TIME
+
+#elif defined(WOLFSSL_XILINX)
+    #define USER_TIME
+    #include <time.h>
+
+#elif defined(HAVE_RTP_SYS)
+    #include "os.h"           /* dc_rtc_api needs    */
+    #include "dc_rtc_api.h"   /* to get current time */
+
+    /* uses parital <time.h> structures */
+    #define XTIME(tl)       (0)
+    #define XGMTIME(c, t)   rtpsys_gmtime((c))
+
+#elif defined(WOLFSSL_DEOS)
+    #define XTIME(t1)       deos_time((t1))
+    #define WOLFSSL_GMTIME
+    #define USE_WOLF_TM
+    #define USE_WOLF_TIME_T
+
+#elif defined(MICRIUM)
+    #include <clk.h>
+    #include <time.h>
+    #define XTIME(t1)       micrium_time((t1))
+    #define WOLFSSL_GMTIME
+
+#elif defined(MICROCHIP_TCPIP_V5) || defined(MICROCHIP_TCPIP)
+    #include <time.h>
+    #define XTIME(t1)       pic32_time((t1))
+    #define XGMTIME(c, t)   gmtime((c))
+
+#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+    #ifdef FREESCALE_MQX_4_0
+        #include <time.h>
+        extern time_t mqx_time(time_t* timer);
+    #else
+        #define HAVE_GMTIME_R
+    #endif
+    #define XTIME(t1)       mqx_time((t1))
+
+#elif defined(FREESCALE_KSDK_BM) || defined(FREESCALE_FREE_RTOS) || defined(FREESCALE_KSDK_FREERTOS)
+    #include <time.h>
+    #ifndef XTIME
+        /*extern time_t ksdk_time(time_t* timer);*/
+        #define XTIME(t1)   ksdk_time((t1))
+    #endif
+    #define XGMTIME(c, t)   gmtime((c))
+
+#elif defined(WOLFSSL_ATMEL) && defined(WOLFSSL_ATMEL_TIME)
+    #define XTIME(t1)       atmel_get_curr_time_and_date((t1))
+    #define WOLFSSL_GMTIME
+    #define USE_WOLF_TM
+    #define USE_WOLF_TIME_T
+
+#elif defined(WOLFSSL_WICED)
+    #include <time.h>
+    time_t wiced_pseudo_unix_epoch_time(time_t * timer);
+    #define XTIME(t1)       wiced_pseudo_unix_epoch_time((t1))
+    #define HAVE_GMTIME_R
+
+#elif defined(IDIRECT_DEV_TIME)
+    /*Gets the timestamp from cloak software owned by VT iDirect
+    in place of time() from <time.h> */
+    #include <time.h>
+    #define XTIME(t1)       idirect_time((t1))
+    #define XGMTIME(c, t)   gmtime((c))
+
+#elif defined(_WIN32_WCE)
+    #include <windows.h>
+    #define XTIME(t1)       windows_time((t1))
+    #define WOLFSSL_GMTIME
+
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+    #include "os/os_time.h"
+    #define XTIME(t1)       mynewt_time((t1))
+    #define WOLFSSL_GMTIME
+    #define USE_WOLF_TM
+    #define USE_WOLF_TIME_T
+
+#elif defined(WOLFSSL_ZEPHYR)
+    #ifndef _POSIX_C_SOURCE
+        #include <posix/time.h>
+    #else
+        #include <sys/time.h>
+    #endif
+
+    typedef signed int time_t;
+
+    time_t z_time(time_t *timer);
+
+    #define XTIME(tl)       z_time((tl))
+    #define XGMTIME(c, t)   gmtime((c))
+    #define WOLFSSL_GMTIME
+
+    #define USE_WOLF_TM
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+    typedef long time_t;
+    extern time_t m2mb_xtime(time_t * timer);
+    #define XTIME(tl)       m2mb_xtime((tl))
+    #ifdef WOLFSSL_TLS13
+        extern time_t m2mb_xtime_ms(time_t * timer);
+        #define XTIME_MS(tl)    m2mb_xtime_ms((tl))
+    #endif
+    #ifndef NO_CRYPT_BENCHMARK
+        extern double m2mb_xtime_bench(int reset);
+        #define WOLFSSL_CURRTIME_REMAP m2mb_xtime_bench
+    #endif
+    #define XGMTIME(c, t)   gmtime((c))
+    #define WOLFSSL_GMTIME
+    #define USE_WOLF_TM
+
+#else
+    /* default */
+    /* uses complete <time.h> facility */
+    #include <time.h>
+    #if defined(HAVE_SYS_TIME_H)
+        #include <sys/time.h>
+    #endif
+
+    /* PowerPC time_t is int */
+    #ifdef __PPC__
+        #define TIME_T_NOT_64BIT
+    #endif
+#endif
+
+#ifdef SIZEOF_TIME_T
+    /* check if size of time_t from autoconf is less than 8 bytes (64bits) */
+    #if SIZEOF_TIME_T < 8
+        #undef  TIME_T_NOT_64BIT
+        #define TIME_T_NOT_64BIT
+    #endif
+#endif
+#ifdef TIME_T_NOT_LONG
+    /* one old reference to TIME_T_NOT_LONG in GCC-ARM example README
+     * this keeps support for the old macro name */
+    #undef  TIME_T_NOT_64BIT
+    #define TIME_T_NOT_64BIT
+#endif
+
+/* Map default time functions */
+#if !defined(XTIME) && !defined(TIME_OVERRIDES) && !defined(USER_TIME)
+    #ifdef TEST_BEFORE_DATE
+    #define XTIME(tl)       (946681200UL) /* Jan 1, 2000 */
+    #else
+    #define XTIME(tl)       time((tl))
+    #endif
+#endif
+#if !defined(XGMTIME) && !defined(TIME_OVERRIDES)
+    #if defined(WOLFSSL_GMTIME) || !defined(HAVE_GMTIME_R) || defined(WOLF_C99)
+        #define XGMTIME(c, t)   gmtime((c))
+    #else
+        #define XGMTIME(c, t)   gmtime_r((c), (t))
+        #define NEED_TMP_TIME
+    #endif
+#endif
+#if !defined(XVALIDATE_DATE) && !defined(HAVE_VALIDATE_DATE)
+    #define USE_WOLF_VALIDDATE
+    #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
+#endif
+
+/* wolf struct tm and time_t */
+#if defined(USE_WOLF_TM)
+    struct tm {
+        int  tm_sec;     /* seconds after the minute [0-60] */
+        int  tm_min;     /* minutes after the hour [0-59] */
+        int  tm_hour;    /* hours since midnight [0-23] */
+        int  tm_mday;    /* day of the month [1-31] */
+        int  tm_mon;     /* months since January [0-11] */
+        int  tm_year;    /* years since 1900 */
+        int  tm_wday;    /* days since Sunday [0-6] */
+        int  tm_yday;    /* days since January 1 [0-365] */
+        int  tm_isdst;   /* Daylight Savings Time flag */
+        long tm_gmtoff;  /* offset from CUT in seconds */
+        char *tm_zone;   /* timezone abbreviation */
+    };
+#endif /* USE_WOLF_TM */
+#if defined(USE_WOLF_TIME_T)
+    typedef long time_t;
+#endif
+#if defined(USE_WOLF_SUSECONDS_T)
+    typedef long suseconds_t;
+#endif
+#if defined(USE_WOLF_TIMEVAL_T)
+    struct timeval
+    {
+        time_t tv_sec;
+        suseconds_t tv_usec;
+    };
+#endif
+
+    /* forward declarations */
+#if defined(USER_TIME)
+    struct tm* gmtime(const time_t* timer);
+    extern time_t XTIME(time_t * timer);
+
+    #ifdef STACK_TRAP
+        /* for stack trap tracking, don't call os gmtime on OS X/linux,
+           uses a lot of stack spce */
+        extern time_t time(time_t * timer);
+        #define XTIME(tl)  time((tl))
+    #endif /* STACK_TRAP */
+
+#elif defined(TIME_OVERRIDES)
+    extern time_t XTIME(time_t * timer);
+    extern struct tm* XGMTIME(const time_t* timer, struct tm* tmp);
+#elif defined(WOLFSSL_GMTIME)
+    struct tm* gmtime(const time_t* timer);
+#endif
+#endif /* NO_ASN_TIME */
+
+
+#ifndef WOLFSSL_LEANPSK
+    char* mystrnstr(const char* s1, const char* s2, unsigned int n);
+#endif
+
+#ifndef FILE_BUFFER_SIZE
+    #define FILE_BUFFER_SIZE 1024     /* default static file buffer size for input,
+                                    will use dynamic buffer if not big enough */
+#endif
+
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+    /* By default, the OCTEON's global variables are all thread local. This
+     * tag allows them to be shared between threads. */
+    #include "cvmx-platform.h"
+    #define WOLFSSL_GLOBAL CVMX_SHARED
+#else
+    #define WOLFSSL_GLOBAL
+#endif
+
+#ifdef WOLFSSL_DSP
+    #include "wolfssl_dsp.h"
+
+    /* callbacks for setting handle */
+    typedef int (*wolfSSL_DSP_Handle_cb)(remote_handle64 *handle, int finished,
+                                         void *ctx);
+    WOLFSSL_API int wolfSSL_GetHandleCbSet();
+    WOLFSSL_API int wolfSSL_SetHandleCb(wolfSSL_DSP_Handle_cb in);
+    WOLFSSL_LOCAL int wolfSSL_InitHandle();
+    WOLFSSL_LOCAL void wolfSSL_CleanupHandle();
+#endif
+
+#ifdef WOLFSSL_SCE
+    #ifndef WOLFSSL_SCE_GSCE_HANDLE
+        #define WOLFSSL_SCE_GSCE_HANDLE g_sce
+    #endif
+#endif
+
+#ifdef __cplusplus
+    }  /* extern "C" */
+#endif
+
+#endif /* WOLF_CRYPT_PORT_H */
 
-    #define XFILE                    int
-    #define XFOPEN(NAME, MODE)       vf_open((const char *)NAME, VO_RDONLY, 0);
-    #define XFSEEK                   vf_lseek
-    #define XFTELL                   vf_tell
-    #define XREWIND                  vf_rewind
-    #define XFREAD(BUF, SZ, AMT, FD) vf_read(FD, BUF, SZ*AMT)
-    #define XFWRITE(BUF, SZ, AMT, FD) vf_write(FD, BUF, SZ*AMT)
-    #define XFCLOSE                  vf_close
-    #define XSEEK_END                VSEEK_END
-    #define XBADFILE                 -1
-    #define XFGETS(b,s,f)            -2 /* Not ported yet */
-#elif defined(LSR_FS)
-    #include <fs.h>
-    #define XFILE                   struct fs_file*
-    #define XFOPEN(NAME, MODE)      fs_open((char*)NAME);
-    #define XFSEEK(F, O, W)         (void)F
-    #define XFTELL(F)               (F)->len
-    #define XREWIND(F)              (void)F
-    #define XFREAD(BUF, SZ, AMT, F) fs_read(F, (char*)BUF, SZ*AMT)
-    #define XFWRITE(BUF, SZ, AMT, F) fs_write(F, (char*)BUF, SZ*AMT)
-    #define XFCLOSE                 fs_close
-    #define XSEEK_END               0
-    #define XBADFILE                NULL
-    #define XFGETS(b,s,f)            -2 /* Not ported yet */
-#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-    #define XFILE                   MQX_FILE_PTR
-    #define XFOPEN                  fopen
-    #define XFSEEK                  fseek
-    #define XFTELL                  ftell
-    #define XREWIND(F)              fseek(F, 0, IO_SEEK_SET)
-    #define XFREAD                  fread
-    #define XFWRITE                 fwrite
-    #define XFCLOSE                 fclose
-    #define XSEEK_END               IO_SEEK_END
-    #define XBADFILE                NULL
-    #define XFGETS                  fgets
-#elif defined(MICRIUM)
-    #include <fs_api.h>
-    #define XFILE      FS_FILE*
-    #define XFOPEN     fs_fopen
-    #define XFSEEK     fs_fseek
-    #define XFTELL     fs_ftell
-    #define XREWIND    fs_rewind
-    #define XFREAD     fs_fread
-    #define XFWRITE    fs_fwrite
-    #define XFCLOSE    fs_fclose
-    #define XSEEK_END  FS_SEEK_END
-    #define XBADFILE   NULL
-    #define XFGETS(b,s,f) -2 /* Not ported yet */
-#elif defined(WOLFSSL_NUCLEUS_1_2)
-    #include "fal/inc/fal.h"
-    #define XFILE      FILE*
-    #define XFOPEN     fopen
-    #define XFSEEK     fseek
-    #define XFTELL     ftell
-    #define XREWIND    rewind
-    #define XFREAD     fread
-    #define XFWRITE    fwrite
-    #define XFCLOSE    fclose
-    #define XSEEK_END  PSEEK_END
-    #define XBADFILE   NULL
-#else
-    /* stdio, default case */
-    #include <stdio.h>
-    #define XFILE      FILE*
-    #if defined(WOLFSSL_MDK_ARM)
-        extern FILE * wolfSSL_fopen(const char *name, const char *mode) ;
-        #define XFOPEN     wolfSSL_fopen
-    #else
-        #define XFOPEN     fopen
-    #endif
-    #define XFSEEK     fseek
-    #define XFTELL     ftell
-    #define XREWIND    rewind
-    #define XFREAD     fread
-    #define XFWRITE    fwrite
-    #define XFCLOSE    fclose
-    #define XSEEK_END  SEEK_END
-    #define XBADFILE   NULL
-    #define XFGETS     fgets
-
-    #if !defined(USE_WINDOWS_API) && !defined(NO_WOLFSSL_DIR)\
-        && !defined(WOLFSSL_NUCLEUS) && !defined(WOLFSSL_NUCLEUS_1_2)
-        #include <dirent.h>
-        #include <unistd.h>
-        #include <sys/stat.h>
-    #endif
-#endif
-
-    #ifndef MAX_FILENAME_SZ
-        #define MAX_FILENAME_SZ  256 /* max file name length */
-    #endif
-    #ifndef MAX_PATH
-        #define MAX_PATH 256
-    #endif
-
-#if !defined(NO_WOLFSSL_DIR) && !defined(WOLFSSL_NUCLEUS) && \
-    !defined(WOLFSSL_NUCLEUS_1_2)
-    typedef struct ReadDirCtx {
-    #ifdef USE_WINDOWS_API
-        WIN32_FIND_DATAA FindFileData;
-        HANDLE hFind;
-    #else
-        struct dirent* entry;
-        DIR*   dir;
-        struct stat s;
-    #endif
-        char name[MAX_FILENAME_SZ];
-    } ReadDirCtx;
-
-    WOLFSSL_API int wc_ReadDirFirst(ReadDirCtx* ctx, const char* path, char** name);
-    WOLFSSL_API int wc_ReadDirNext(ReadDirCtx* ctx, const char* path, char** name);
-    WOLFSSL_API void wc_ReadDirClose(ReadDirCtx* ctx);
-#endif /* !NO_WOLFSSL_DIR */
-
-#endif /* !NO_FILESYSTEM */
-
-/* Windows API defines its own min() macro. */
-#if defined(USE_WINDOWS_API)
-    #if defined(min) || defined(WOLFSSL_MYSQL_COMPATIBLE)
-        #define WOLFSSL_HAVE_MIN
-    #endif /* min */
-    #if defined(max) || defined(WOLFSSL_MYSQL_COMPATIBLE)
-        #define WOLFSSL_HAVE_MAX
-    #endif /* max */
-#endif /* USE_WINDOWS_API */
-
-/* Time functions */
-#ifndef NO_ASN_TIME
-#if defined(USER_TIME)
-    /* Use our gmtime and time_t/struct tm types.
-       Only needs seconds since EPOCH using XTIME function.
-       time_t XTIME(time_t * timer) {}
-    */
-    #define WOLFSSL_GMTIME
-    #define USE_WOLF_TM
-    #define USE_WOLF_TIME_T
-
-#elif defined(TIME_OVERRIDES)
-    /* Override XTIME() and XGMTIME() functionality.
-       Requires user to provide these functions:
-        time_t XTIME(time_t * timer) {}
-        struct tm* XGMTIME(const time_t* timer, struct tm* tmp) {}
-    */
-    #ifndef HAVE_TIME_T_TYPE
-        #define USE_WOLF_TIME_T
-    #endif
-    #ifndef HAVE_TM_TYPE
-        #define USE_WOLF_TM
-    #endif
-    #define NEED_TMP_TIME
-
-#elif defined(HAVE_RTP_SYS)
-    #include "os.h"           /* dc_rtc_api needs    */
-    #include "dc_rtc_api.h"   /* to get current time */
-
-    /* uses parital <time.h> structures */
-    #define XTIME(tl)       (0)
-    #define XGMTIME(c, t)   rtpsys_gmtime((c))
-
-#elif defined(MICRIUM)
-    #include <clk.h>
-    #include <time.h>
-    #define XTIME(t1)       micrium_time((t1))
-    #define WOLFSSL_GMTIME
-
-#elif defined(MICROCHIP_TCPIP_V5) || defined(MICROCHIP_TCPIP)
-    #include <time.h>
-    #define XTIME(t1)       pic32_time((t1))
-    #define XGMTIME(c, t)   gmtime((c))
-
-#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
-    #ifdef FREESCALE_MQX_4_0
-        #include <time.h>
-        extern time_t mqx_time(time_t* timer);
-    #else
-        #define HAVE_GMTIME_R
-    #endif
-    #define XTIME(t1)       mqx_time((t1))
-
-#elif defined(FREESCALE_KSDK_BM) || defined(FREESCALE_FREE_RTOS) || defined(FREESCALE_KSDK_FREERTOS)
-    #include <time.h>
-    #ifndef XTIME
-        /*extern time_t ksdk_time(time_t* timer);*/
-        #define XTIME(t1)   ksdk_time((t1))
-    #endif
-    #define XGMTIME(c, t)   gmtime((c))
-
-#elif defined(WOLFSSL_ATMEL)
-    #define XTIME(t1)       atmel_get_curr_time_and_date((t1))
-    #define WOLFSSL_GMTIME
-    #define USE_WOLF_TM
-    #define USE_WOLF_TIME_T
-
-#elif defined(IDIRECT_DEV_TIME)
-    /*Gets the timestamp from cloak software owned by VT iDirect
-    in place of time() from <time.h> */
-    #include <time.h>
-    #define XTIME(t1)       idirect_time((t1))
-    #define XGMTIME(c, t)   gmtime((c))
-
-#elif defined(_WIN32_WCE)
-    #include <windows.h>
-    #define XTIME(t1)       windows_time((t1))
-    #define WOLFSSL_GMTIME
-
-#else
-    /* default */
-    /* uses complete <time.h> facility */
-    #include <time.h>
-    #if defined(HAVE_SYS_TIME_H)
-        #include <sys/time.h>
-    #endif
-
-    /* PowerPC time_t is int */
-    #ifdef __PPC__
-        #define TIME_T_NOT_LONG
-    #endif
-#endif
-
-
-/* Map default time functions */
-#if !defined(XTIME) && !defined(TIME_OVERRIDES) && !defined(USER_TIME)
-    #define XTIME(tl)       time((tl))
-#endif
-#if !defined(XGMTIME) && !defined(TIME_OVERRIDES)
-    #if defined(WOLFSSL_GMTIME) || !defined(HAVE_GMTIME_R) || defined(WOLF_C99)
-        #define XGMTIME(c, t)   gmtime((c))
-    #else
-        #define XGMTIME(c, t)   gmtime_r((c), (t))
-        #define NEED_TMP_TIME
-    #endif
-#endif
-#if !defined(XVALIDATE_DATE) && !defined(HAVE_VALIDATE_DATE)
-    #define USE_WOLF_VALIDDATE
-    #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
-#endif
-
-/* wolf struct tm and time_t */
-#if defined(USE_WOLF_TM)
-    struct tm {
-        int  tm_sec;     /* seconds after the minute [0-60] */
-        int  tm_min;     /* minutes after the hour [0-59] */
-        int  tm_hour;    /* hours since midnight [0-23] */
-        int  tm_mday;    /* day of the month [1-31] */
-        int  tm_mon;     /* months since January [0-11] */
-        int  tm_year;    /* years since 1900 */
-        int  tm_wday;    /* days since Sunday [0-6] */
-        int  tm_yday;    /* days since January 1 [0-365] */
-        int  tm_isdst;   /* Daylight Savings Time flag */
-        long tm_gmtoff;  /* offset from CUT in seconds */
-        char *tm_zone;   /* timezone abbreviation */
-    };
-#endif /* USE_WOLF_TM */
-#if defined(USE_WOLF_TIME_T)
-    typedef long time_t;
-#endif
-#if defined(USE_WOLF_SUSECONDS_T)
-    typedef long suseconds_t;
-#endif
-#if defined(USE_WOLF_TIMEVAL_T)
-    struct timeval
-    {
-        time_t tv_sec;
-        suseconds_t tv_usec;
-    };
-#endif
-
-    /* forward declarations */
-#if defined(USER_TIME)
-    struct tm* gmtime(const time_t* timer);
-    extern time_t XTIME(time_t * timer);
-
-    #ifdef STACK_TRAP
-        /* for stack trap tracking, don't call os gmtime on OS X/linux,
-           uses a lot of stack spce */
-        extern time_t time(time_t * timer);
-        #define XTIME(tl)  time((tl))
-    #endif /* STACK_TRAP */
-
-#elif defined(TIME_OVERRIDES)
-    extern time_t XTIME(time_t * timer);
-    extern struct tm* XGMTIME(const time_t* timer, struct tm* tmp);
-#elif defined(WOLFSSL_GMTIME)
-    struct tm* gmtime(const time_t* timer);
-#endif
-#endif /* NO_ASN_TIME */
-
-#ifndef WOLFSSL_LEANPSK
-    char* mystrnstr(const char* s1, const char* s2, unsigned int n);
-#endif
-
-#ifndef FILE_BUFFER_SIZE
-    #define FILE_BUFFER_SIZE 1024     /* default static file buffer size for input,
-                                    will use dynamic buffer if not big enough */
-#endif
-
-
-#ifdef __cplusplus
-    }  /* extern "C" */
-#endif
-
-#endif /* WOLF_CRYPT_PORT_H */
-
-
--- a/wolfssl/wolfcrypt/wolfevent.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/wolfevent.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wolfevent.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
--- a/wolfssl/wolfcrypt/wolfmath.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfcrypt/wolfmath.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* wolfmath.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -19,53 +19,80 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
-#if defined(HAVE_WOLF_BIGINT) && !defined(WOLF_BIGINT_DEFINED)
-    /* raw big integer */
-    typedef struct WC_BIGINT {
-        byte*   buf;
-        word32  len;
-        void*   heap;
-    } WC_BIGINT;
-
-    #define WOLF_BIGINT_DEFINED
-#endif
-
-
-/* only define functions if mp_int has been declared */
-#ifdef MP_INT_DEFINED
-
 #ifndef __WOLFMATH_H__
 #define __WOLFMATH_H__
 
-    /* timing resistance array */
-    #if !defined(WC_NO_CACHE_RESISTANT) && \
-        ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \
-         (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#ifdef WOLFSSL_PUBLIC_MP
+    #define MP_API   WOLFSSL_API
+#else
+    #define MP_API   WOLFSSL_LOCAL
+#endif
 
-        extern const wolfssl_word wc_off_on_addr[2];
-    #endif
+#ifndef MIN
+   #define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+#ifndef MAX
+   #define MAX(x,y) ((x)>(y)?(x):(y))
+#endif
 
-    /* common math functions */
-    int get_digit_count(mp_int* a);
-    mp_digit get_digit(mp_int* a, int n);
-    int get_rand_digit(WC_RNG* rng, mp_digit* d);
-    int mp_rand(mp_int* a, int digits, WC_RNG* rng);
+/* timing resistance array */
+#if !defined(WC_NO_CACHE_RESISTANT) && \
+    ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \
+     (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))
+
+    extern const wolfssl_word wc_off_on_addr[2];
+#endif
 
 
-    #ifdef HAVE_WOLF_BIGINT
-        void wc_bigint_init(WC_BIGINT* a);
-        int wc_bigint_alloc(WC_BIGINT* a, word32 sz);
-        int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen);
-        int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen);
-        void wc_bigint_zero(WC_BIGINT* a);
-        void wc_bigint_free(WC_BIGINT* a);
+/* common math functions */
+MP_API int get_digit_count(mp_int* a);
+MP_API mp_digit get_digit(mp_int* a, int n);
+MP_API int get_rand_digit(WC_RNG* rng, mp_digit* d);
+
+WOLFSSL_API int mp_cond_copy(mp_int* a, int copy, mp_int* b);
+WOLFSSL_API int mp_rand(mp_int* a, int digits, WC_RNG* rng);
+
+enum {
+    /* format type */
+    WC_TYPE_HEX_STR = 1,
+    WC_TYPE_UNSIGNED_BIN = 2,
+};
+
+WOLFSSL_API int wc_export_int(mp_int* mp, byte* buf, word32* len,
+    word32 keySz, int encType);
 
-        int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst);
-        int wc_mp_to_bigint_sz(mp_int* src, WC_BIGINT* dst, word32 sz);
-        int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst);
-    #endif /* HAVE_WOLF_BIGINT */
+#ifdef HAVE_WOLF_BIGINT
+    #if !defined(WOLF_BIGINT_DEFINED)
+        /* raw big integer */
+        typedef struct WC_BIGINT {
+            byte*   buf;
+            word32  len;
+            void*   heap;
+        } WC_BIGINT;
+        #define WOLF_BIGINT_DEFINED
+    #endif
+
+    WOLFSSL_LOCAL void wc_bigint_init(WC_BIGINT* a);
+    WOLFSSL_LOCAL int wc_bigint_alloc(WC_BIGINT* a, word32 sz);
+    WOLFSSL_LOCAL int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen);
+    WOLFSSL_LOCAL int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen);
+    WOLFSSL_LOCAL void wc_bigint_zero(WC_BIGINT* a);
+    WOLFSSL_LOCAL void wc_bigint_free(WC_BIGINT* a);
+
+    WOLFSSL_LOCAL int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst);
+    WOLFSSL_LOCAL int wc_mp_to_bigint_sz(mp_int* src, WC_BIGINT* dst, word32 sz);
+    WOLFSSL_LOCAL int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst);
+#endif /* HAVE_WOLF_BIGINT */
+
+
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
 
 #endif /* __WOLFMATH_H__ */
-
-#endif /* MP_INT_DEFINED */
 
--- a/wolfssl/wolfio.h	Sat Aug 18 22:20:43 2018 +0000
+++ b/wolfssl/wolfio.h	Thu Jun 04 23:57:22 2020 +0000
@@ -1,6 +1,6 @@
 /* io.h
  *
- * Copyright (C) 2006-2017 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
  *
@@ -20,7 +20,7 @@
  */
 
 /*!
-    \file wolfssl/wolfio.h   
+    \file wolfssl/wolfio.h
 */
 
 #ifndef WOLFSSL_IO_H
@@ -30,16 +30,20 @@
     extern "C" {
 #endif
 
-/* OCSP and CRL_IO require HTTP client */
-#if defined(HAVE_OCSP) || defined(HAVE_CRL_IO)
-    #ifndef HAVE_HTTP_CLIENT
-        #define HAVE_HTTP_CLIENT
+/* Micrium uses NetSock I/O callbacks in wolfio.c */
+#if !defined(WOLFSSL_USER_IO)
+    /* OCSP and CRL_IO require HTTP client */
+    #if defined(HAVE_OCSP) || defined(HAVE_CRL_IO)
+        #ifndef HAVE_HTTP_CLIENT
+            #define HAVE_HTTP_CLIENT
+        #endif
     #endif
 #endif
 
 #if !defined(WOLFSSL_USER_IO)
     /* Micrium uses NetSock I/O callbacks in wolfio.c */
-    #if !defined(USE_WOLFSSL_IO) && !defined(MICRIUM)
+    #if !defined(USE_WOLFSSL_IO) && !defined(MICRIUM) && \
+        !defined(WOLFSSL_CONTIKI) && !defined(WOLFSSL_NO_SOCK)
         #define USE_WOLFSSL_IO
     #endif
 #endif
@@ -52,7 +56,7 @@
 #endif
 
 #ifndef USE_WINDOWS_API
-    #ifdef WOLFSSL_LWIP
+    #if defined(WOLFSSL_LWIP) && !defined(WOLFSSL_APACHE_MYNEWT)
         /* lwIP needs to be configured to use sockets API in this mode */
         /* LWIP_SOCKET 1 in lwip/opt.h or in build */
         #include "lwip/sockets.h"
@@ -66,17 +70,24 @@
     #elif defined(FREESCALE_KSDK_MQX)
         #include <rtcs.h>
     #elif (defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET))
-        #include "cmsis_os.h"
         #include "rl_net.h"
         #include "errno.h"
     #elif defined(WOLFSSL_CMSIS_RTOS)
         #include "cmsis_os.h"
+    #elif defined(WOLFSSL_CMSIS_RTOSv2)
+        #include "cmsis_os2.h"
     #elif defined(WOLFSSL_TIRTOS)
         #include <sys/socket.h>
     #elif defined(FREERTOS_TCP)
         #include "FreeRTOS_Sockets.h"
     #elif defined(WOLFSSL_IAR_ARM)
         /* nothing */
+    #elif defined(HAVE_NETX_BSD)
+        #ifdef NETX_DUO
+            #include "nxd_bsd.h"
+        #else
+            #include "nx_bsd.h"
+        #endif
     #elif defined(WOLFSSL_VXWORKS)
         #include <sockLib.h>
         #include <errno.h>
@@ -107,6 +118,17 @@
         #include <sys/ioctl.h>
     #elif defined(WOLFSSL_SGX)
         #include <errno.h>
+    #elif defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+        #include <mn_socket/mn_socket.h>
+    #elif defined(WOLFSSL_DEOS)
+        #include <socketapi.h>
+        #include <lwip-socket.h>
+        #include <errno.h>
+    #elif defined(WOLFSSL_ZEPHYR)
+        #include <net/socket.h>
+    #elif defined(HAVE_NETX)
+        #include "nx_api.h"
+        #include "errno.h"
     #elif !defined(WOLFSSL_NO_SOCK)
         #include <sys/types.h>
         #include <errno.h>
@@ -114,13 +136,16 @@
             #include <unistd.h>
         #endif
         #include <fcntl.h>
+        #define XFCNTL(fd, flag, block) fcntl((fd), (flag), (block))
 
         #if defined(HAVE_RTP_SYS)
             #include <socket.h>
         #elif defined(EBSNET)
             #include "rtipapi.h"  /* errno */
             #include "socket.h"
-        #elif !defined(DEVKITPRO) && !defined(WOLFSSL_PICOTCP)
+        #elif !defined(DEVKITPRO) && !defined(WOLFSSL_PICOTCP) \
+                && !defined(WOLFSSL_CONTIKI) && !defined(WOLFSSL_WICED) \
+                && !defined(WOLFSSL_GNRC) && !defined(WOLFSSL_RIOT_OS)
             #include <sys/socket.h>
             #include <arpa/inet.h>
             #include <netinet/in.h>
@@ -132,6 +157,11 @@
             #endif
         #endif
     #endif
+
+    #if defined(WOLFSSL_RENESAS_RA6M3G) /* Uses FREERTOS_TCP */
+        #include <errno.h>
+    #endif
+
 #endif /* USE_WINDOWS_API */
 
 #ifdef __sun
@@ -209,6 +239,22 @@
     #define SOCKET_EPIPE        NU_NOT_CONNECTED
     #define SOCKET_ECONNREFUSED NU_CONNECTION_REFUSED
     #define SOCKET_ECONNABORTED NU_NOT_CONNECTED
+#elif defined(WOLFSSL_DEOS)
+     #define SOCKET_EWOULDBLOCK EAGAIN
+     #define SOCKET_EAGAIN      EAGAIN
+     #define SOCKET_ECONNRESET  EINTR
+     #define SOCKET_EINTR       EINTR
+     #define SOCKET_EPIPE       EPIPE
+     #define SOCKET_ECONNREFUSED SOCKET_ERROR
+     #define SOCKET_ECONNABORTED SOCKET_ERROR
+#elif defined(HAVE_NETX)
+    #define SOCKET_EWOULDBLOCK NX_NOT_CONNECTED
+    #define SOCKET_EAGAIN      NX_NOT_CONNECTED
+    #define SOCKET_ECONNRESET  NX_NOT_CONNECTED
+    #define SOCKET_EINTR       NX_NOT_CONNECTED
+    #define SOCKET_EPIPE       NX_NOT_CONNECTED
+    #define SOCKET_ECONNREFUSED NX_NOT_CONNECTED
+    #define SOCKET_ECONNABORTED NX_NOT_CONNECTED
 #else
     #define SOCKET_EWOULDBLOCK EWOULDBLOCK
     #define SOCKET_EAGAIN      EAGAIN
@@ -219,31 +265,13 @@
     #define SOCKET_ECONNABORTED ECONNABORTED
 #endif /* USE_WINDOWS_API */
 
-
-#ifdef USE_WINDOWS_API
-    #define CloseSocket(s) closesocket(s)
-    #define StartTCP() { WSADATA wsd; WSAStartup(0x0002, &wsd); }
-#elif defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET)
-    extern int closesocket(int);
-    #define CloseSocket(s) closesocket(s)
-    #define StartTCP()
-#else
-    #define CloseSocket(s) close(s)
-    #define StartTCP()
-    #ifdef FREERTOS_TCP_WINSIM
-        extern int close(int);
-    #endif
-#endif
-
-
-
 #ifdef DEVKITPRO
     /* from network.h */
     int net_send(int, const void*, int, unsigned int);
     int net_recv(int, void*, int, unsigned int);
     #define SEND_FUNCTION net_send
     #define RECV_FUNCTION net_recv
-#elif defined(WOLFSSL_LWIP)
+#elif defined(WOLFSSL_LWIP) && !defined(WOLFSSL_APACHE_MYNEWT)
     #define SEND_FUNCTION lwip_send
     #define RECV_FUNCTION lwip_recv
 #elif defined(WOLFSSL_PICOTCP)
@@ -258,6 +286,13 @@
 #elif defined(WOLFSSL_NUCLEUS_1_2)
     #define SEND_FUNCTION NU_Send
     #define RECV_FUNCTION NU_Recv
+#elif defined(WOLFSSL_ZEPHYR)
+    #ifndef WOLFSSL_MAX_SEND_SZ
+        #define WOLFSSL_MAX_SEND_SZ       256
+    #endif
+
+    #define SEND_FUNCTION send
+    #define RECV_FUNCTION recv
 #else
     #define SEND_FUNCTION send
     #define RECV_FUNCTION recv
@@ -293,11 +328,7 @@
     #endif /* HAVE_SOCKADDR */
 
     /* use gethostbyname for c99 */
-    #ifdef WOLF_C99
-        #undef HAVE_GETADDRINFO
-    #endif
-
-    #ifdef HAVE_GETADDRINFO
+    #if defined(HAVE_GETADDRINFO) && !defined(WOLF_C99)
         typedef struct addrinfo         ADDRINFO;
     #endif
 #endif /* WOLFSSL_NO_SOCK */
@@ -306,7 +337,7 @@
 /* IO API's */
 #ifdef HAVE_IO_TIMEOUT
     WOLFSSL_API  int wolfIO_SetBlockingMode(SOCKET_T sockfd, int non_blocking);
-    WOLFSSL_API void wolfIO_SetTimeout(int to_sec);;
+    WOLFSSL_API void wolfIO_SetTimeout(int to_sec);
     WOLFSSL_API  int wolfIO_Select(SOCKET_T sockfd, int to_sec);
 #endif
 WOLFSSL_API  int wolfIO_TcpConnect(SOCKET_T* sockfd, const char* ip,
@@ -316,6 +347,29 @@
 
 #endif /* USE_WOLFSSL_IO || HAVE_HTTP_CLIENT */
 
+#ifndef WOLFSSL_NO_SOCK
+#ifdef USE_WINDOWS_API
+    #ifndef CloseSocket
+        #define CloseSocket(s) closesocket(s)
+    #endif
+    #define StartTCP() { WSADATA wsd; WSAStartup(0x0002, &wsd); }
+#elif defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET)
+    #ifndef CloseSocket
+        extern int closesocket(int);
+        #define CloseSocket(s) closesocket(s)
+    #endif
+    #define StartTCP()
+#else
+    #ifndef CloseSocket
+        #define CloseSocket(s) close(s)
+    #endif
+    #define StartTCP()
+    #ifdef FREERTOS_TCP_WINSIM
+        extern int close(int);
+    #endif
+#endif
+#endif /* WOLFSSL_NO_SOCK */
+
 
 WOLFSSL_API int BioSend(WOLFSSL* ssl, char *buf, int sz, void *ctx);
 WOLFSSL_API int BioReceive(WOLFSSL* ssl, char* buf, int sz, void* ctx);
@@ -372,6 +426,9 @@
     WOLFSSL_API  int wolfIO_HttpBuildRequest(const char* reqType,
         const char* domainName, const char* path, int pathLen, int reqSz,
         const char* contentType, unsigned char* buf, int bufSize);
+    WOLFSSL_LOCAL int wolfIO_HttpBuildRequest_ex(const char* reqType,
+        const char* domainName, const char* path, int pathLen, int reqSz,
+        const char* contentType, const char *exHdrs, unsigned char* buf, int bufSize);
     WOLFSSL_API  int wolfIO_HttpProcessResponse(int sfd, const char** appStrList,
         unsigned char** respBuf, unsigned char* httpBuf, int httpBufSz,
         int dynType, void* heap);
@@ -383,6 +440,8 @@
 typedef int (*CallbackIOSend)(WOLFSSL *ssl, char *buf, int sz, void *ctx);
 WOLFSSL_API void wolfSSL_CTX_SetIORecv(WOLFSSL_CTX*, CallbackIORecv);
 WOLFSSL_API void wolfSSL_CTX_SetIOSend(WOLFSSL_CTX*, CallbackIOSend);
+WOLFSSL_API void wolfSSL_SSLSetIORecv(WOLFSSL*, CallbackIORecv);
+WOLFSSL_API void wolfSSL_SSLSetIOSend(WOLFSSL*, CallbackIOSend);
 /* deprecated old name */
 #define wolfSSL_SetIORecv wolfSSL_CTX_SetIORecv
 #define wolfSSL_SetIOSend wolfSSL_CTX_SetIOSend
@@ -414,6 +473,77 @@
     WOLFSSL_LOCAL int MicriumSendTo(WOLFSSL* ssl, char* buf, int sz, void* ctx);
 #endif /* MICRIUM */
 
+#if defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP)
+    WOLFSSL_LOCAL int Mynewt_Receive(WOLFSSL *ssl, char *buf, int sz, void *ctx);
+    WOLFSSL_LOCAL int Mynewt_Send(WOLFSSL* ssl, char *buf, int sz, void *ctx);
+    WOLFSSL_API void wolfSSL_SetIO_Mynewt(WOLFSSL* ssl, struct mn_socket* mnSocket,
+                                          struct mn_sockaddr_in* mnSockAddrIn);
+#endif /* defined(WOLFSSL_APACHE_MYNEWT) && !defined(WOLFSSL_LWIP) */
+
+#ifdef WOLFSSL_UIP
+
+    struct uip_wolfssl_ctx {
+        union socket_connector {
+            struct tcp_socket tcp;
+            struct udp_socket udp;
+        } conn;
+        WOLFSSL_CTX *ctx;
+        WOLFSSL *ssl;
+        uint8_t *input_databuf;
+        uint8_t *output_databuf;
+        uint8_t *ssl_rx_databuf;
+        int ssl_rb_len;
+        int ssl_rb_off;
+        struct process *process;
+        tcp_socket_data_callback_t input_callback;
+        tcp_socket_event_callback_t event_callback;
+        int closing;
+        uip_ipaddr_t peer_addr;
+        uint16_t peer_port;
+    };
+
+    typedef struct uip_wolfssl_ctx uip_wolfssl_ctx;
+
+    WOLFSSL_LOCAL int uIPSend(WOLFSSL* ssl, char* buf, int sz, void* ctx);
+    WOLFSSL_LOCAL int uIPReceive(WOLFSSL* ssl, char* buf, int sz,
+                                     void* ctx);
+    WOLFSSL_LOCAL int uIPReceiveFrom(WOLFSSL* ssl, char* buf, int sz,
+                                         void* ctx);
+    WOLFSSL_LOCAL int uIPSendTo(WOLFSSL* ssl, char* buf, int sz, void* ctx);
+
+#endif
+
+#ifdef WOLFSSL_GNRC
+    #include <sock_types.h>
+    #include <net/gnrc.h>
+    #include <net/af.h>
+    #include <net/sock.h>
+    #include <net/gnrc/tcp.h>
+    #include <net/gnrc/udp.h>
+
+    struct gnrc_wolfssl_ctx {
+        union socket_connector {
+        #ifdef MODULE_SOCK_TCP
+            sock_tcp_t tcp;
+        #endif
+            sock_udp_t udp;
+        } conn;
+        WOLFSSL_CTX *ctx;
+        WOLFSSL *ssl;
+
+        int closing;
+        struct _sock_tl_ep peer_addr;
+    };
+
+    typedef struct gnrc_wolfssl_ctx sock_tls_t;
+
+    WOLFSSL_LOCAL int GNRC_ReceiveFrom(WOLFSSL* ssl, char* buf, int sz,
+                                     void* ctx);
+    WOLFSSL_LOCAL int GNRC_SendTo(WOLFSSL* ssl, char* buf, int sz, void* ctx);
+
+#endif
+
+
 #ifdef WOLFSSL_DTLS
     typedef int (*CallbackGenCookie)(WOLFSSL* ssl, unsigned char* buf, int sz,
                                      void* ctx);